diff --git a/submodules/ImageCompression/Sources/ImageCompression.swift b/submodules/ImageCompression/Sources/ImageCompression.swift
index 7fe7b191d5..ee84db8bb3 100644
--- a/submodules/ImageCompression/Sources/ImageCompression.swift
+++ b/submodules/ImageCompression/Sources/ImageCompression.swift
@@ -44,6 +44,14 @@ public func compressImageToJPEG(_ image: UIImage, quality: Float) -> Data? {
     return data as Data
 }
 
+public func compressImageToJPEGXL(_ image: UIImage, quality: Int) -> Data? {
+    return compressJPEGXLData(image, Int32(quality))
+}
+
+public func decompressImageFromJPEGXL(data: Data) -> UIImage? {
+    return decompressJPEGXLData(data)
+}
+
 @available(iOSApplicationExtension 11.0, iOS 11.0, *)
 public func compressImage(_ image: UIImage, quality: Float) -> Data? {
     let data = NSMutableData()
diff --git a/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift b/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift
index 73ba5c27cc..9a698f5f21 100644
--- a/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift
+++ b/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift
@@ -495,50 +495,128 @@ public func legacyAssetPickerEnqueueMessages(context: AccountContext, account: A
                                         }
                                     }
                                 case let .asset(asset):
-                                    var randomId: Int64 = 0
-                                    arc4random_buf(&randomId, 8)
-                                    let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight))
-                                    let scaledSize = size.aspectFittedOrSmaller(CGSize(width: 1280.0, height: 1280.0))
-                                    let resource = PhotoLibraryMediaResource(localIdentifier: asset.localIdentifier, uniqueId: Int64.random(in: Int64.min ... Int64.max))
-                                    representations.append(TelegramMediaImageRepresentation(dimensions: PixelDimensions(scaledSize), resource: resource, progressiveSizes: [], immediateThumbnailData: nil, hasVideo: false, isPersonal: false))
-                                    
-                                    let media = TelegramMediaImage(imageId: MediaId(namespace: Namespaces.Media.LocalImage, id: randomId), representations: representations, immediateThumbnailData: nil, reference: nil, partialReference: nil, flags: [])
-                                    var attributes: [MessageAttribute] = []
-                                    if let timer = item.timer, timer > 0 && timer <= 60 {
-                                        attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil))
-                                    }
-                                    if let spoiler = item.spoiler, spoiler {
-                                        attributes.append(MediaSpoilerMessageAttribute())
-                                    }
-                                
-                                    let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString()))
-                                    let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text))
-                                    if !entities.isEmpty {
-                                        attributes.append(TextEntitiesMessageAttribute(entities: entities))
-                                    }
-                                
-                                    var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:]
-                                    text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in
-                                        if let value = value as? ChatTextInputTextCustomEmojiAttribute {
-                                            if let file = value.file {
-                                                if let packId = value.interactivelySelectedFromPackId {
-                                                    bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId
+                                    if context.sharedContext.immediateExperimentalUISettings.storiesJpegExperiment {
+                                        let sizes: [Int32] = [2048, 1280]
+                                        let formats: [MediaImageFormat] = [.jxl, .jpeg]
+                                        let qualities: [Int32: [Int32]] = [
+                                            MediaImageFormat.jxl.rawValue: [
+                                                50,
+                                                75
+                                            ],
+                                            MediaImageFormat.jpeg.rawValue: [
+                                                75
+                                            ]
+                                        ]
+                                        for sizeSide in sizes {
+                                            for format in formats {
+                                                for quality in qualities[format.rawValue]! {
+                                                    var randomId: Int64 = 0
+                                                    arc4random_buf(&randomId, 8)
+                                                    let resource = PhotoLibraryMediaResource(
+                                                        localIdentifier: asset.localIdentifier,
+                                                        uniqueId: Int64.random(in: Int64.min ... Int64.max),
+                                                        width: sizeSide,
+                                                        height: sizeSide,
+                                                        format: format,
+                                                        quality: quality
+                                                    )
+                                                    
+                                                    let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight))
+                                                    let scaledSize = size.aspectFittedOrSmaller(CGSize(width: CGFloat(sizeSide), height: CGFloat(sizeSide)))
+                                                    
+                                                    let media: Media
+                                                    media = TelegramMediaFile(fileId: MediaId(namespace: Namespaces.Media.LocalFile, id: Int64.random(in: Int64.min ... Int64.max)), partialReference: nil, resource: resource, previewRepresentations: [], videoThumbnails: [], immediateThumbnailData: nil, mimeType: format == .jxl ? "image/jxl" : "image/jpeg", size: nil, attributes: [
+                                                        .FileName(fileName: format == .jxl ? "image\(sizeSide)-q\(quality).jxl" : "image\(sizeSide)-q\(quality).jpg"),
+                                                        .ImageSize(size: PixelDimensions(scaledSize))
+                                                    ])
+                                                    
+                                                    var attributes: [MessageAttribute] = []
+                                                    if let timer = item.timer, timer > 0 && timer <= 60 {
+                                                        attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil))
+                                                    }
+                                                    if let spoiler = item.spoiler, spoiler {
+                                                        attributes.append(MediaSpoilerMessageAttribute())
+                                                    }
+                                                    
+                                                    let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString()))
+                                                    let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text))
+                                                    if !entities.isEmpty {
+                                                        attributes.append(TextEntitiesMessageAttribute(entities: entities))
+                                                    }
+                                                    
+                                                    var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:]
+                                                    text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in
+                                                        if let value = value as? ChatTextInputTextCustomEmojiAttribute {
+                                                            if let file = value.file {
+                                                                if let packId = value.interactivelySelectedFromPackId {
+                                                                    bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId
+                                                                }
+                                                            }
+                                                        }
+                                                    })
+                                                    var bubbleUpEmojiOrStickersets: [ItemCollectionId] = []
+                                                    for entity in entities {
+                                                        if case let .CustomEmoji(_, fileId) = entity.type {
+                                                            if let packId = bubbleUpEmojiOrStickersetsById[fileId] {
+                                                                if !bubbleUpEmojiOrStickersets.contains(packId) {
+                                                                    bubbleUpEmojiOrStickersets.append(packId)
+                                                                }
+                                                            }
+                                                        }
+                                                    }
+                                                    
+                                                    messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false))
                                                 }
                                             }
                                         }
-                                    })
-                                    var bubbleUpEmojiOrStickersets: [ItemCollectionId] = []
-                                    for entity in entities {
-                                        if case let .CustomEmoji(_, fileId) = entity.type {
-                                            if let packId = bubbleUpEmojiOrStickersetsById[fileId] {
-                                                if !bubbleUpEmojiOrStickersets.contains(packId) {
-                                                    bubbleUpEmojiOrStickersets.append(packId)
+                                    } else {
+                                        var randomId: Int64 = 0
+                                        arc4random_buf(&randomId, 8)
+                                        let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight))
+                                        let scaledSize = size.aspectFittedOrSmaller(CGSize(width: 1280.0, height: 1280.0))
+                                        let resource = PhotoLibraryMediaResource(localIdentifier: asset.localIdentifier, uniqueId: Int64.random(in: Int64.min ... Int64.max))
+                                    
+                                        let media: Media
+                                        representations.append(TelegramMediaImageRepresentation(dimensions: PixelDimensions(scaledSize), resource: resource, progressiveSizes: [], immediateThumbnailData: nil, hasVideo: false, isPersonal: false))
+                                        media = TelegramMediaImage(imageId: MediaId(namespace: Namespaces.Media.LocalImage, id: randomId), representations: representations, immediateThumbnailData: nil, reference: nil, partialReference: nil, flags: [])
+                                    
+                                        var attributes: [MessageAttribute] = []
+                                        if let timer = item.timer, timer > 0 && timer <= 60 {
+                                            attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil))
+                                        }
+                                        if let spoiler = item.spoiler, spoiler {
+                                            attributes.append(MediaSpoilerMessageAttribute())
+                                        }
+                                    
+                                        let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString()))
+                                        let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text))
+                                        if !entities.isEmpty {
+                                            attributes.append(TextEntitiesMessageAttribute(entities: entities))
+                                        }
+                                    
+                                        var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:]
+                                        text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in
+                                            if let value = value as? ChatTextInputTextCustomEmojiAttribute {
+                                                if let file = value.file {
+                                                    if let packId = value.interactivelySelectedFromPackId {
+                                                        bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId
+                                                    }
+                                                }
+                                            }
+                                        })
+                                        var bubbleUpEmojiOrStickersets: [ItemCollectionId] = []
+                                        for entity in entities {
+                                            if case let .CustomEmoji(_, fileId) = entity.type {
+                                                if let packId = bubbleUpEmojiOrStickersetsById[fileId] {
+                                                    if !bubbleUpEmojiOrStickersets.contains(packId) {
+                                                        bubbleUpEmojiOrStickersets.append(packId)
+                                                    }
                                                 }
                                             }
                                         }
+                                        
+                                        messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false))
                                     }
-                                    
-                                    messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false))
                                 case .tempFile:
                                     break
                             }
diff --git a/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift b/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift
index f2b7aa5631..88c2bcca61 100644
--- a/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift
+++ b/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift
@@ -84,7 +84,7 @@ extension UIImage.Orientation {
 
 private let fetchPhotoWorkers = ThreadPool(threadCount: 3, threadPriority: 0.2)
 
-public func fetchPhotoLibraryResource(localIdentifier: String) -> Signal<MediaResourceDataFetchResult, MediaResourceDataFetchError> {
+public func fetchPhotoLibraryResource(localIdentifier: String, width: Int32?, height: Int32?, format: MediaImageFormat?, quality: Int32?) -> Signal<MediaResourceDataFetchResult, MediaResourceDataFetchError> {
     return Signal { subscriber in
         let queue = ThreadPoolQueue(threadPool: fetchPhotoWorkers)
         
@@ -97,7 +97,12 @@ public func fetchPhotoLibraryResource(localIdentifier: String) -> Signal<MediaRe
             option.isNetworkAccessAllowed = true
             option.isSynchronous = false
             
-            let size = CGSize(width: 1280.0, height: 1280.0)
+            let size: CGSize
+            if let width, let height {
+                size = CGSize(width: CGFloat(width), height: CGFloat(height))
+            } else {
+                size = CGSize(width: 1280.0, height: 1280.0)
+            }
             
             queue.addTask(ThreadPoolTask({ _ in
                 let startTime = CACurrentMediaTime()
@@ -127,14 +132,27 @@ public func fetchPhotoLibraryResource(localIdentifier: String) -> Signal<MediaRe
                                 print("scaled completion \((CACurrentMediaTime() - startTime) * 1000.0) ms")
 #endif
                                 
-                                if let scaledImage = scaledImage, let data = compressImageToJPEG(scaledImage, quality: 0.6) {
-#if DEBUG
-                                    print("compression completion \((CACurrentMediaTime() - startTime) * 1000.0) ms")
-#endif
-                                    subscriber.putNext(.dataPart(resourceOffset: 0, data: data, range: 0 ..< Int64(data.count), complete: true))
-                                    subscriber.putCompletion()
-                                } else {
-                                    subscriber.putCompletion()
+                                switch format {
+                                case .none, .jpeg:
+                                    if let scaledImage = scaledImage, let data = compressImageToJPEG(scaledImage, quality: 0.6) {
+    #if DEBUG
+                                        print("compression completion \((CACurrentMediaTime() - startTime) * 1000.0) ms")
+    #endif
+                                        subscriber.putNext(.dataPart(resourceOffset: 0, data: data, range: 0 ..< Int64(data.count), complete: true))
+                                        subscriber.putCompletion()
+                                    } else {
+                                        subscriber.putCompletion()
+                                    }
+                                case .jxl:
+                                    if let scaledImage = scaledImage, let data = compressImageToJPEGXL(scaledImage, quality: Int(quality ?? 75)) {
+    #if DEBUG
+                                        print("jpegxl compression completion \((CACurrentMediaTime() - startTime) * 1000.0) ms")
+    #endif
+                                        subscriber.putNext(.dataPart(resourceOffset: 0, data: data, range: 0 ..< Int64(data.count), complete: true))
+                                        subscriber.putCompletion()
+                                    } else {
+                                        subscriber.putCompletion()
+                                    }
                                 }
                                 semaphore.signal()
                             }
diff --git a/submodules/LocalMediaResources/Sources/MediaResources.swift b/submodules/LocalMediaResources/Sources/MediaResources.swift
index 0306be276e..a6f87f3f19 100644
--- a/submodules/LocalMediaResources/Sources/MediaResources.swift
+++ b/submodules/LocalMediaResources/Sources/MediaResources.swift
@@ -220,6 +220,11 @@ public struct PhotoLibraryMediaResourceId {
     }
 }
 
+public enum MediaImageFormat: Int32 {
+    case jpeg
+    case jxl
+}
+
 public class PhotoLibraryMediaResource: TelegramMediaResource {
     public var size: Int64? {
         return nil
@@ -227,20 +232,52 @@ public class PhotoLibraryMediaResource: TelegramMediaResource {
     
     public let localIdentifier: String
     public let uniqueId: Int64
+    public let width: Int32?
+    public let height: Int32?
+    public let format: MediaImageFormat?
+    public let quality: Int32?
     
-    public init(localIdentifier: String, uniqueId: Int64) {
+    public init(localIdentifier: String, uniqueId: Int64, width: Int32? = nil, height: Int32? = nil, format: MediaImageFormat? = nil, quality: Int32? = nil) {
         self.localIdentifier = localIdentifier
         self.uniqueId = uniqueId
+        self.width = width
+        self.height = height
+        self.format = format
+        self.quality = quality
     }
     
     public required init(decoder: PostboxDecoder) {
         self.localIdentifier = decoder.decodeStringForKey("i", orElse: "")
         self.uniqueId = decoder.decodeInt64ForKey("uid", orElse: 0)
+        self.width = decoder.decodeOptionalInt32ForKey("w")
+        self.height = decoder.decodeOptionalInt32ForKey("h")
+        self.format = decoder.decodeOptionalInt32ForKey("f").flatMap(MediaImageFormat.init(rawValue:))
+        self.quality = decoder.decodeOptionalInt32ForKey("q")
     }
     
     public func encode(_ encoder: PostboxEncoder) {
         encoder.encodeString(self.localIdentifier, forKey: "i")
         encoder.encodeInt64(self.uniqueId, forKey: "uid")
+        if let width = self.width {
+            encoder.encodeInt32(width, forKey: "w")
+        } else {
+            encoder.encodeNil(forKey: "w")
+        }
+        if let height = self.height {
+            encoder.encodeInt32(height, forKey: "h")
+        } else {
+            encoder.encodeNil(forKey: "h")
+        }
+        if let format = self.format {
+            encoder.encodeInt32(format.rawValue, forKey: "f")
+        } else {
+            encoder.encodeNil(forKey: "f")
+        }
+        if let quality = self.quality {
+            encoder.encodeInt32(quality, forKey: "q")
+        } else {
+            encoder.encodeNil(forKey: "q")
+        }
     }
     
     public var id: MediaResourceId {
@@ -249,7 +286,25 @@ public class PhotoLibraryMediaResource: TelegramMediaResource {
     
     public func isEqual(to: MediaResource) -> Bool {
         if let to = to as? PhotoLibraryMediaResource {
-            return self.localIdentifier == to.localIdentifier && self.uniqueId == to.uniqueId
+            if self.localIdentifier != to.localIdentifier {
+                return false
+            }
+            if self.uniqueId != to.uniqueId {
+                return false
+            }
+            if self.width != to.width {
+                return false
+            }
+            if self.height != to.height {
+                return false
+            }
+            if self.format != to.format {
+                return false
+            }
+            if self.quality != to.quality {
+                return false
+            }
+            return true
         } else {
             return false
         }
diff --git a/submodules/MozjpegBinding/BUILD b/submodules/MozjpegBinding/BUILD
index 55ef3065ee..15083f04d7 100644
--- a/submodules/MozjpegBinding/BUILD
+++ b/submodules/MozjpegBinding/BUILD
@@ -5,6 +5,7 @@ objc_library(
     enable_modules = True,
     srcs = glob([
         "Sources/**/*.m",
+        "Sources/**/*.mm",
         "Sources/**/*.h",
     ]),
     hdrs = glob([
@@ -15,6 +16,7 @@ objc_library(
     ],
     deps = [
         "//third-party/mozjpeg:mozjpeg",
+        "//third-party/libjxl:jxl",
     ],
     visibility = [
         "//visibility:public",
diff --git a/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h b/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h
index 894c5238e5..27ec64c926 100644
--- a/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h
+++ b/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h
@@ -1,6 +1,17 @@
 #import <UIKit/UIKit.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage);
 NSArray<NSNumber *> * _Nonnull extractJPEGDataScans(NSData * _Nonnull data);
 NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size);
 UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData);
+
+NSData * _Nullable compressJPEGXLData(UIImage * _Nonnull sourceImage, int quality);
+UIImage * _Nullable decompressJPEGXLData(NSData * _Nonnull data);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/submodules/MozjpegBinding/Sources/MozjpegBinding.m b/submodules/MozjpegBinding/Sources/MozjpegBinding.m
deleted file mode 100644
index a71a5f5c87..0000000000
--- a/submodules/MozjpegBinding/Sources/MozjpegBinding.m
+++ /dev/null
@@ -1,335 +0,0 @@
-#import <MozjpegBinding/MozjpegBinding.h>
-
-#import <mozjpeg/turbojpeg.h>
-#import <mozjpeg/jpeglib.h>
-#import <Accelerate/Accelerate.h>
-
-static NSData *getHeaderPattern() {
-    static NSData *value = nil;
-    static dispatch_once_t onceToken;
-    dispatch_once(&onceToken, ^{
-        value = [[NSData alloc] initWithBase64EncodedString:@"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDACgcHiMeGSgjISMtKygwPGRBPDc3PHtYXUlkkYCZlo+AjIqgtObDoKrarYqMyP/L2u71////m8H////6/+b9//j/2wBDASstLTw1PHZBQXb4pYyl+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj/wAARCAAAAAADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwA=" options:0];
-    });
-    return value;
-}
-
-static NSData *getFooterPattern() {
-    static NSData *value = nil;
-    static dispatch_once_t onceToken;
-    dispatch_once(&onceToken, ^{
-        value = [[NSData alloc] initWithBase64EncodedString:@"/9k=" options:0];
-    });
-    return value;
-}
-
-NSArray<NSNumber *> * _Nonnull extractJPEGDataScans(NSData * _Nonnull data) {
-    NSMutableArray<NSNumber *> *result = [[NSMutableArray alloc] init];
-    
-    const uint8_t *dataBytes = data.bytes;
-    int offset = 0;
-    while (offset < data.length) {
-        bool found = false;
-        for (int i = offset + 2; i < data.length - 1; i++) {
-            if (dataBytes[i] == 0xffU && dataBytes[i + 1] == 0xdaU) {
-                if (offset != 0) {
-                    [result addObject:@(i)];
-                }
-                offset = i;
-                found = true;
-            }
-        }
-        if (!found) {
-            break;
-        }
-    }
-    
-#if DEBUG
-    static NSString *sessionPrefix = nil;
-    static dispatch_once_t onceToken;
-    dispatch_once(&onceToken, ^{
-        sessionPrefix = [NSString stringWithFormat:@"%u", arc4random()];
-    });
-    
-    NSString *randomId = [NSString stringWithFormat:@"%u", arc4random()];
-    NSString *dirPath = [[NSTemporaryDirectory() stringByAppendingPathComponent:sessionPrefix] stringByAppendingPathComponent:randomId];
-    [[NSFileManager defaultManager] createDirectoryAtPath:dirPath withIntermediateDirectories:true attributes:nil error:nil];
-    for (int i = 0; i < result.count + 1; i++) {
-        NSString *filePath = [dirPath stringByAppendingPathComponent:[NSString stringWithFormat:@"%d.jpg", i]];
-        if (i == result.count) {
-            [data writeToFile:filePath atomically:true];
-        } else {
-            [[data subdataWithRange:NSMakeRange(0, [result[i] intValue])] writeToFile:filePath atomically:true];
-        }
-    }
-    NSLog(@"Path: %@", dirPath);
-#endif
-    
-    return result;
-}
-
-NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) {
-    int width = (int)(sourceImage.size.width * sourceImage.scale);
-    int height = (int)(sourceImage.size.height * sourceImage.scale);
-    
-    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
-    uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height));
-    
-    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
-    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
-    
-    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
-    
-    UIGraphicsPushContext(targetContext);
-    
-    CGColorSpaceRelease(colorSpace);
-    
-    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage);
-    
-    UIGraphicsPopContext();
-    
-    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
-    uint8_t *buffer = malloc(bufferBytesPerRow * height);
-    
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
-            
-            uint32_t r = ((*color >> 16) & 0xff);
-            uint32_t g = ((*color >> 8) & 0xff);
-            uint32_t b = (*color & 0xff);
-            
-            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
-            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
-            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
-        }
-    }
-    
-    CGContextRelease(targetContext);
-    
-    free(targetMemory);
-    
-    struct jpeg_compress_struct cinfo;
-    struct jpeg_error_mgr jerr;
-    cinfo.err = jpeg_std_error(&jerr);
-    jpeg_create_compress(&cinfo);
-    
-    uint8_t *outBuffer = NULL;
-    unsigned long outSize = 0;
-    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
-    
-    cinfo.image_width = (uint32_t)width;
-    cinfo.image_height = (uint32_t)height;
-    cinfo.input_components = 3;
-    cinfo.in_color_space = JCS_RGB;
-    jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
-    jpeg_set_defaults(&cinfo);
-    cinfo.arith_code = FALSE;
-    cinfo.dct_method = JDCT_ISLOW;
-    cinfo.optimize_coding = TRUE;
-    jpeg_set_quality(&cinfo, 72, 1);
-    jpeg_simple_progression(&cinfo);
-    jpeg_start_compress(&cinfo, 1);
-    
-    JSAMPROW rowPointer[1];
-    while (cinfo.next_scanline < cinfo.image_height) {
-        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
-        jpeg_write_scanlines(&cinfo, rowPointer, 1);
-    }
-    
-    jpeg_finish_compress(&cinfo);
-    
-    NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize];
-    
-    jpeg_destroy_compress(&cinfo);
-    
-    free(buffer);
-    
-    return result;
-}
-
-NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) {
-    CGSize fittedSize = image.size;
-    if (fittedSize.width > size.width) {
-        fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f))));
-    }
-    if (fittedSize.height > size.height) {
-        fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height);
-    }
-    
-    int width = (int)fittedSize.width;
-    int height = (int)fittedSize.height;
-    
-    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
-    uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height));
-    
-    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
-    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
-    
-    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
-    
-    UIGraphicsPushContext(targetContext);
-    
-    CGColorSpaceRelease(colorSpace);
-    
-    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage);
-    
-    UIGraphicsPopContext();
-    
-    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
-    uint8_t *buffer = malloc(bufferBytesPerRow * height);
-    
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
-            
-            uint32_t r = ((*color >> 16) & 0xff);
-            uint32_t g = ((*color >> 8) & 0xff);
-            uint32_t b = (*color & 0xff);
-            
-            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
-            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
-            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
-        }
-    }
-    
-    CGContextRelease(targetContext);
-    
-    free(targetMemory);
-    
-    struct jpeg_compress_struct cinfo;
-    struct jpeg_error_mgr jerr;
-    cinfo.err = jpeg_std_error(&jerr);
-    jpeg_create_compress(&cinfo);
-    
-    uint8_t *outBuffer = NULL;
-    unsigned long outSize = 0;
-    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
-    
-    cinfo.image_width = (uint32_t)width;
-    cinfo.image_height = (uint32_t)height;
-    cinfo.input_components = 3;
-    cinfo.in_color_space = JCS_RGB;
-    jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
-    jpeg_set_defaults(&cinfo);
-    cinfo.arith_code = FALSE;
-    cinfo.dct_method = JDCT_ISLOW;
-    cinfo.optimize_coding = FALSE;
-    jpeg_set_quality(&cinfo, 20, 1);
-    jpeg_start_compress(&cinfo, 1);
-    
-    JSAMPROW rowPointer[1];
-    while (cinfo.next_scanline < cinfo.image_height) {
-        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
-        jpeg_write_scanlines(&cinfo, rowPointer, 1);
-    }
-    
-    jpeg_finish_compress(&cinfo);
-    
-    NSMutableData *serializedData = nil;
-    
-    NSData *headerPattern = getHeaderPattern();
-    NSData *footerPattern = getFooterPattern();
-    if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) {
-        outBuffer[164] = 0;
-        outBuffer[166] = 0;
-        
-        if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) {
-            if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) {
-                serializedData = [[NSMutableData alloc] init];
-                uint8_t version = 1;
-                [serializedData appendBytes:&version length:1];
-                uint8_t outWidth = (uint8_t)width;
-                uint8_t outHeight = (uint8_t)height;
-                [serializedData appendBytes:&outHeight length:1];
-                [serializedData appendBytes:&outWidth length:1];
-                unsigned long contentSize = outSize - headerPattern.length - footerPattern.length;
-                [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize];
-            }
-        }
-    }
-    
-    jpeg_destroy_compress(&cinfo);
-    
-    free(buffer);
-    
-    return serializedData;
-}
-
-UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) {
-    long unsigned int jpegSize = sourceData.length;
-    unsigned char *_compressedImage = (unsigned char *)sourceData.bytes;
-
-    int jpegSubsamp, width, height;
-
-    tjhandle _jpegDecompressor = tjInitDecompress();
-
-    if (tjDecompressHeader2(_jpegDecompressor, _compressedImage, jpegSize, &width, &height, &jpegSubsamp) != 0) {
-        return nil;
-    }
-
-    int sourceBytesPerRow = (3 * width + 31) & ~0x1F;
-    int targetBytesPerRow = (4 * width + 31) & ~0x1F;
-
-    unsigned char *buffer = malloc(sourceBytesPerRow * height);
-
-    tjDecompress2(_jpegDecompressor, _compressedImage, jpegSize, buffer, width, sourceBytesPerRow, height, TJPF_RGB, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE);
-
-    tjDestroy(_jpegDecompressor);
-
-    vImage_Buffer source;
-    source.width = width;
-    source.height = height;
-    source.rowBytes = sourceBytesPerRow;
-    source.data = buffer;
-
-    vImage_Buffer target;
-    target.width = width;
-    target.height = height;
-    target.rowBytes = targetBytesPerRow;
-
-    unsigned char *targetBuffer = malloc(targetBytesPerRow * height);
-    target.data = targetBuffer;
-
-    vImageConvert_RGB888toARGB8888(&source, nil, 0xff, &target, false, kvImageDoNotTile);
-
-    free(buffer);
-
-    vImage_Buffer permuteTarget;
-    permuteTarget.width = width;
-    permuteTarget.height = height;
-    permuteTarget.rowBytes = targetBytesPerRow;
-
-    unsigned char *permuteTargetBuffer = malloc(targetBytesPerRow * height);
-    permuteTarget.data = permuteTargetBuffer;
-
-    const uint8_t permuteMap[4] = {3,2,1,0};
-    vImagePermuteChannels_ARGB8888(&target, &permuteTarget, permuteMap, kvImageDoNotTile);
-
-    free(targetBuffer);
-
-    NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * height deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) {
-        free(bytes);
-    }];
-
-    CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData);
-
-    static CGColorSpaceRef imageColorSpace;
-    static CGBitmapInfo bitmapInfo;
-    static dispatch_once_t onceToken;
-    dispatch_once(&onceToken, ^{
-        UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0);
-        UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext();
-        imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage));
-        bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage);
-        UIGraphicsEndImageContext();
-    });
-
-    CGImageRef cgImg = CGImageCreate(width, height, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault);
-
-    CGDataProviderRelease(dataProvider);
-
-    UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg];
-    CGImageRelease(cgImg);
-
-    return resultImage;
-}
diff --git a/submodules/MozjpegBinding/Sources/MozjpegBinding.mm b/submodules/MozjpegBinding/Sources/MozjpegBinding.mm
new file mode 100644
index 0000000000..8bf4263244
--- /dev/null
+++ b/submodules/MozjpegBinding/Sources/MozjpegBinding.mm
@@ -0,0 +1,780 @@
+#import <MozjpegBinding/MozjpegBinding.h>
+
+#define USE_JPEGLI false
+
+#import <mozjpeg/turbojpeg.h>
+#import <mozjpeg/jpeglib.h>
+
+#import <Accelerate/Accelerate.h>
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+//#include <jxl/thread_parallel_runner.h>
+//#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <limits.h>
+#include <string.h>
+#include <sstream>
+#include <string>
+#include <vector>
+
+static inline float JXLGetDistance(int32_t quality) {
+    if (quality == 0) {
+        return 1.0f;
+    } else if (quality >= 30) {
+        return 0.1f + (float)(100 - MIN(100, quality)) * 0.09f;
+    } else {
+        return 6.24f + (float)pow(2.5f, (30.0 - quality) / 5.0) / 6.25f;
+    }
+}
+
+NSData * _Nullable compressJPEGXLData(UIImage * _Nonnull sourceImage, int quality) {
+    int width = (int)(sourceImage.size.width * sourceImage.scale);
+    int height = (int)(sourceImage.size.height * sourceImage.scale);
+    
+    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
+    uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height));
+    
+    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
+    
+    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
+    
+    UIGraphicsPushContext(targetContext);
+    
+    CGColorSpaceRelease(colorSpace);
+    
+    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage);
+    
+    UIGraphicsPopContext();
+    
+    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
+    int bufferSize = bufferBytesPerRow * height;
+    uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height);
+    
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
+            
+            uint32_t r = ((*color >> 16) & 0xff);
+            uint32_t g = ((*color >> 8) & 0xff);
+            uint32_t b = (*color & 0xff);
+            
+            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
+            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
+            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
+        }
+    }
+    
+    CGContextRelease(targetContext);
+    
+    free(targetMemory);
+    
+    auto enc = JxlEncoderMake(nullptr);
+    
+    JxlPixelFormat pixel_format = {3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 16};
+
+    JxlBasicInfo basic_info;
+    JxlEncoderInitBasicInfo(&basic_info);
+    basic_info.xsize = width;
+    basic_info.ysize = height;
+    basic_info.bits_per_sample = 32;
+    basic_info.exponent_bits_per_sample = 8;
+    basic_info.uses_original_profile = JXL_FALSE;
+    if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc.get(), &basic_info)) {
+        free(buffer);
+        return nil;
+    }
+
+    JxlColorEncoding color_encoding = {};
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/pixel_format.num_channels < 3);
+    if (JXL_ENC_SUCCESS != JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) {
+        free(buffer);
+        return nil;
+    }
+
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+
+    JxlEncoderSetFrameDistance(frame_settings, JXLGetDistance(quality));
+    JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 8);
+    
+    if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(frame_settings, &pixel_format, buffer, bufferSize)) {
+        free(buffer);
+        return nil;
+    }
+    JxlEncoderCloseInput(enc.get());
+
+    NSMutableData *result = [[NSMutableData alloc] initWithLength:64];
+    uint8_t *next_out = (uint8_t *)result.mutableBytes;
+    size_t avail_out = result.length - (next_out - ((uint8_t *)result.mutableBytes));
+    
+    JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+    while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+        process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+        if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+            size_t offset = next_out - ((uint8_t *)result.mutableBytes);
+            [result setLength:result.length * 2];
+            next_out = ((uint8_t *)result.mutableBytes) + offset;
+            avail_out = result.length - offset;
+        }
+    }
+    [result setLength:next_out - ((uint8_t *)result.mutableBytes)];
+    if (JXL_ENC_SUCCESS != process_result) {
+        free(buffer);
+        return nil;
+    }
+    
+    free(buffer);
+    return result;
+    
+  /*auto runner = JxlThreadParallelRunnerMake(
+      nullptr,
+      8);
+  if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(),
+                                                     JxlThreadParallelRunner,
+                                                     runner.get())) {
+    fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+    return false;
+  }*/
+}
+
+UIImage * _Nullable decompressJPEGXLData(NSData * _Nonnull data) {
+    //const uint8_t* jxl, size_t size, std::vector<float>* pixels, size_t* xsize, size_t* ysize, std::vector<uint8_t>* icc_profile
+    
+    auto dec = JxlDecoderMake(nullptr);
+    if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE)) {
+        return nil;
+    }
+    
+    /*if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(), JxlResizableParallelRunner, runner.get())) {
+        fprintf(stderr, "JxlDecoderSetParallelRunner failed\n");
+        return false;
+    }*/
+    
+    JxlBasicInfo info;
+    JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+    
+    JxlDecoderSetInput(dec.get(), (uint8_t const *)data.bytes, data.length);
+    JxlDecoderCloseInput(dec.get());
+    
+    int xsize = 0;
+    int ysize = 0;
+    std::vector<uint8_t> icc_profile;
+    
+    std::vector<uint8_t> pixels;
+    
+    while (true) {
+        JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+        
+        if (status == JXL_DEC_ERROR) {
+            return nil;
+        } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+            return nil;
+        } else if (status == JXL_DEC_BASIC_INFO) {
+            if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+                return nil;
+            }
+            xsize = info.xsize;
+            ysize = info.ysize;
+            //JxlResizableParallelRunnerSetThreads(runner.get(), JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+        } else if (status == JXL_DEC_COLOR_ENCODING) {
+            // Get the ICC color profile of the pixel data
+            size_t icc_size;
+            if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) {
+                fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+                return nil;
+            }
+            icc_profile.resize(icc_size);
+            if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, icc_profile.data(), icc_profile.size())) {
+                return nil;
+            }
+        } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+            size_t buffer_size;
+            if (JXL_DEC_SUCCESS != JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+                return nil;
+            }
+            if (buffer_size != xsize * ysize * 16) {
+                return nil;
+            }
+            pixels.resize(xsize * ysize * 4);
+            void* pixels_buffer = (void*)pixels.data();
+            size_t pixels_buffer_size = pixels.size() * sizeof(float);
+            if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format, pixels_buffer, pixels_buffer_size)) {
+                return nil;
+            }
+        } else if (status == JXL_DEC_FULL_IMAGE) {
+            // Nothing to do. Do not yet return. If the image is an animation, more
+            // full frames may be decoded. This example only keeps the last one.
+        } else if (status == JXL_DEC_SUCCESS) {
+            // All decoding successfully finished.
+            // It's not required to call JxlDecoderReleaseInput(dec.get()) here since
+            // the decoder will be destroyed.
+            
+            int targetBytesPerRow = xsize * 4;
+            uint8_t *permuteTargetBuffer = (uint8_t *)malloc(targetBytesPerRow * ysize);
+            memcpy(permuteTargetBuffer, pixels.data(), pixels.size());
+            
+            NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * ysize deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) {
+                free(bytes);
+            }];
+
+            CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData);
+
+            static CGColorSpaceRef imageColorSpace;
+            static CGBitmapInfo bitmapInfo;
+            static dispatch_once_t onceToken;
+            dispatch_once(&onceToken, ^{
+                UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0);
+                UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext();
+                imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage));
+                bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage);
+                UIGraphicsEndImageContext();
+            });
+
+            CGImageRef cgImg = CGImageCreate(xsize, ysize, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault);
+
+            CGDataProviderRelease(dataProvider);
+
+            UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg];
+            CGImageRelease(cgImg);
+
+            return resultImage;
+        } else {
+            return nil;
+        }
+    }
+    
+    return nil;
+}
+
+static NSData *getHeaderPattern() {
+    static NSData *value = nil;
+    static dispatch_once_t onceToken;
+    dispatch_once(&onceToken, ^{
+        value = [[NSData alloc] initWithBase64EncodedString:@"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDACgcHiMeGSgjISMtKygwPGRBPDc3PHtYXUlkkYCZlo+AjIqgtObDoKrarYqMyP/L2u71////m8H////6/+b9//j/2wBDASstLTw1PHZBQXb4pYyl+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj/wAARCAAAAAADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwA=" options:0];
+    });
+    return value;
+}
+
+static NSData *getFooterPattern() {
+    static NSData *value = nil;
+    static dispatch_once_t onceToken;
+    dispatch_once(&onceToken, ^{
+        value = [[NSData alloc] initWithBase64EncodedString:@"/9k=" options:0];
+    });
+    return value;
+}
+
+NSArray<NSNumber *> * _Nonnull extractJPEGDataScans(NSData * _Nonnull data) {
+    NSMutableArray<NSNumber *> *result = [[NSMutableArray alloc] init];
+    
+    const uint8_t *dataBytes = (const uint8_t *)data.bytes;
+    int offset = 0;
+    while (offset < data.length) {
+        bool found = false;
+        for (int i = offset + 2; i < data.length - 1; i++) {
+            if (dataBytes[i] == 0xffU && dataBytes[i + 1] == 0xdaU) {
+                if (offset != 0) {
+                    [result addObject:@(i)];
+                }
+                offset = i;
+                found = true;
+            }
+        }
+        if (!found) {
+            break;
+        }
+    }
+    
+#if DEBUG
+    static NSString *sessionPrefix = nil;
+    static dispatch_once_t onceToken;
+    dispatch_once(&onceToken, ^{
+        sessionPrefix = [NSString stringWithFormat:@"%u", arc4random()];
+    });
+    
+    NSString *randomId = [NSString stringWithFormat:@"%u", arc4random()];
+    NSString *dirPath = [[NSTemporaryDirectory() stringByAppendingPathComponent:sessionPrefix] stringByAppendingPathComponent:randomId];
+    [[NSFileManager defaultManager] createDirectoryAtPath:dirPath withIntermediateDirectories:true attributes:nil error:nil];
+    for (int i = 0; i < result.count + 1; i++) {
+        NSString *filePath = [dirPath stringByAppendingPathComponent:[NSString stringWithFormat:@"%d.jpg", i]];
+        if (i == result.count) {
+            [data writeToFile:filePath atomically:true];
+        } else {
+            [[data subdataWithRange:NSMakeRange(0, [result[i] intValue])] writeToFile:filePath atomically:true];
+        }
+    }
+    NSLog(@"Path: %@", dirPath);
+#endif
+    
+    return result;
+}
+
+#if USE_JPEGLI
+NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) {
+    int width = (int)(sourceImage.size.width * sourceImage.scale);
+    int height = (int)(sourceImage.size.height * sourceImage.scale);
+    
+    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
+    uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height));
+    
+    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
+    
+    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
+    
+    UIGraphicsPushContext(targetContext);
+    
+    CGColorSpaceRelease(colorSpace);
+    
+    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage);
+    
+    UIGraphicsPopContext();
+    
+    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
+    uint8_t *buffer = malloc(bufferBytesPerRow * height);
+    
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
+            
+            uint32_t r = ((*color >> 16) & 0xff);
+            uint32_t g = ((*color >> 8) & 0xff);
+            uint32_t b = (*color & 0xff);
+            
+            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
+            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
+            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
+        }
+    }
+    
+    CGContextRelease(targetContext);
+    
+    free(targetMemory);
+    
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+    
+    uint8_t *outBuffer = NULL;
+    unsigned long outSize = 0;
+    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
+    
+    cinfo.image_width = (uint32_t)width;
+    cinfo.image_height = (uint32_t)height;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    //jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
+    jpeg_set_defaults(&cinfo);
+    cinfo.arith_code = FALSE;
+    cinfo.dct_method = JDCT_ISLOW;
+    cinfo.optimize_coding = TRUE;
+    jpeg_set_quality(&cinfo, 72, 1);
+    jpeg_simple_progression(&cinfo);
+    jpeg_start_compress(&cinfo, 1);
+    
+    JSAMPROW rowPointer[1];
+    while (cinfo.next_scanline < cinfo.image_height) {
+        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
+        jpeg_write_scanlines(&cinfo, rowPointer, 1);
+    }
+    
+    jpeg_finish_compress(&cinfo);
+    
+    NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize];
+    
+    jpeg_destroy_compress(&cinfo);
+    
+    free(buffer);
+    
+    return result;
+}
+#else
+NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) {
+    int width = (int)(sourceImage.size.width * sourceImage.scale);
+    int height = (int)(sourceImage.size.height * sourceImage.scale);
+    
+    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
+    uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height));
+    
+    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
+    
+    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
+    
+    UIGraphicsPushContext(targetContext);
+    
+    CGColorSpaceRelease(colorSpace);
+    
+    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage);
+    
+    UIGraphicsPopContext();
+    
+    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
+    uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height);
+    
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
+            
+            uint32_t r = ((*color >> 16) & 0xff);
+            uint32_t g = ((*color >> 8) & 0xff);
+            uint32_t b = (*color & 0xff);
+            
+            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
+            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
+            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
+        }
+    }
+    
+    CGContextRelease(targetContext);
+    
+    free(targetMemory);
+    
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+    
+    uint8_t *outBuffer = NULL;
+    unsigned long outSize = 0;
+    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
+    
+    cinfo.image_width = (uint32_t)width;
+    cinfo.image_height = (uint32_t)height;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
+    jpeg_set_defaults(&cinfo);
+    cinfo.arith_code = FALSE;
+    cinfo.dct_method = JDCT_ISLOW;
+    cinfo.optimize_coding = TRUE;
+    jpeg_set_quality(&cinfo, 72, 1);
+    jpeg_simple_progression(&cinfo);
+    jpeg_start_compress(&cinfo, 1);
+    
+    JSAMPROW rowPointer[1];
+    while (cinfo.next_scanline < cinfo.image_height) {
+        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
+        jpeg_write_scanlines(&cinfo, rowPointer, 1);
+    }
+    
+    jpeg_finish_compress(&cinfo);
+    
+    NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize];
+    
+    jpeg_destroy_compress(&cinfo);
+    
+    free(buffer);
+    
+    return result;
+}
+#endif
+
+#if USE_JPEGLI
+NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) {
+    CGSize fittedSize = image.size;
+    if (fittedSize.width > size.width) {
+        fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f))));
+    }
+    if (fittedSize.height > size.height) {
+        fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height);
+    }
+    
+    int width = (int)fittedSize.width;
+    int height = (int)fittedSize.height;
+    
+    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
+    uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height));
+    
+    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
+    
+    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
+    
+    UIGraphicsPushContext(targetContext);
+    
+    CGColorSpaceRelease(colorSpace);
+    
+    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage);
+    
+    UIGraphicsPopContext();
+    
+    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
+    uint8_t *buffer = malloc(bufferBytesPerRow * height);
+    
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
+            
+            uint32_t r = ((*color >> 16) & 0xff);
+            uint32_t g = ((*color >> 8) & 0xff);
+            uint32_t b = (*color & 0xff);
+            
+            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
+            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
+            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
+        }
+    }
+    
+    CGContextRelease(targetContext);
+    
+    free(targetMemory);
+    
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+    
+    uint8_t *outBuffer = NULL;
+    unsigned long outSize = 0;
+    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
+    
+    cinfo.image_width = (uint32_t)width;
+    cinfo.image_height = (uint32_t)height;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    //jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
+    jpeg_set_defaults(&cinfo);
+    cinfo.arith_code = FALSE;
+    cinfo.dct_method = JDCT_ISLOW;
+    cinfo.optimize_coding = FALSE;
+    jpeg_set_quality(&cinfo, 20, 1);
+    jpeg_start_compress(&cinfo, 1);
+    
+    JSAMPROW rowPointer[1];
+    while (cinfo.next_scanline < cinfo.image_height) {
+        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
+        jpeg_write_scanlines(&cinfo, rowPointer, 1);
+    }
+    
+    jpeg_finish_compress(&cinfo);
+    
+    NSMutableData *serializedData = nil;
+    
+    NSData *headerPattern = getHeaderPattern();
+    NSData *footerPattern = getFooterPattern();
+    if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) {
+        outBuffer[164] = 0;
+        outBuffer[166] = 0;
+        
+        if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) {
+            if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) {
+                serializedData = [[NSMutableData alloc] init];
+                uint8_t version = 1;
+                [serializedData appendBytes:&version length:1];
+                uint8_t outWidth = (uint8_t)width;
+                uint8_t outHeight = (uint8_t)height;
+                [serializedData appendBytes:&outHeight length:1];
+                [serializedData appendBytes:&outWidth length:1];
+                unsigned long contentSize = outSize - headerPattern.length - footerPattern.length;
+                [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize];
+            }
+        }
+    }
+    
+    jpeg_destroy_compress(&cinfo);
+    
+    free(buffer);
+    
+    return serializedData;
+}
+#else
+NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) {
+    CGSize fittedSize = image.size;
+    if (fittedSize.width > size.width) {
+        fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f))));
+    }
+    if (fittedSize.height > size.height) {
+        fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height);
+    }
+    
+    int width = (int)fittedSize.width;
+    int height = (int)fittedSize.height;
+    
+    int targetBytesPerRow = ((4 * (int)width) + 31) & (~31);
+    uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height));
+    
+    CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
+    CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host;
+    
+    CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo);
+    
+    UIGraphicsPushContext(targetContext);
+    
+    CGColorSpaceRelease(colorSpace);
+    
+    CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage);
+    
+    UIGraphicsPopContext();
+    
+    int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31);
+    uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height);
+    
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]);
+            
+            uint32_t r = ((*color >> 16) & 0xff);
+            uint32_t g = ((*color >> 8) & 0xff);
+            uint32_t b = (*color & 0xff);
+            
+            buffer[y * bufferBytesPerRow + x * 3 + 0] = r;
+            buffer[y * bufferBytesPerRow + x * 3 + 1] = g;
+            buffer[y * bufferBytesPerRow + x * 3 + 2] = b;
+        }
+    }
+    
+    CGContextRelease(targetContext);
+    
+    free(targetMemory);
+    
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+    
+    uint8_t *outBuffer = NULL;
+    unsigned long outSize = 0;
+    jpeg_mem_dest(&cinfo, &outBuffer, &outSize);
+    
+    cinfo.image_width = (uint32_t)width;
+    cinfo.image_height = (uint32_t)height;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST);
+    jpeg_set_defaults(&cinfo);
+    cinfo.arith_code = FALSE;
+    cinfo.dct_method = JDCT_ISLOW;
+    cinfo.optimize_coding = FALSE;
+    jpeg_set_quality(&cinfo, 20, 1);
+    jpeg_start_compress(&cinfo, 1);
+    
+    JSAMPROW rowPointer[1];
+    while (cinfo.next_scanline < cinfo.image_height) {
+        rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow);
+        jpeg_write_scanlines(&cinfo, rowPointer, 1);
+    }
+    
+    jpeg_finish_compress(&cinfo);
+    
+    NSMutableData *serializedData = nil;
+    
+    NSData *headerPattern = getHeaderPattern();
+    NSData *footerPattern = getFooterPattern();
+    if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) {
+        outBuffer[164] = 0;
+        outBuffer[166] = 0;
+        
+        if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) {
+            if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) {
+                serializedData = [[NSMutableData alloc] init];
+                uint8_t version = 1;
+                [serializedData appendBytes:&version length:1];
+                uint8_t outWidth = (uint8_t)width;
+                uint8_t outHeight = (uint8_t)height;
+                [serializedData appendBytes:&outHeight length:1];
+                [serializedData appendBytes:&outWidth length:1];
+                unsigned long contentSize = outSize - headerPattern.length - footerPattern.length;
+                [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize];
+            }
+        }
+    }
+    
+    jpeg_destroy_compress(&cinfo);
+    
+    free(buffer);
+    
+    return serializedData;
+}
+#endif
+
+#if USE_JPEGLI
+UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) {
+    return [UIImage imageWithData:sourceData];
+}
+#else
+UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) {
+    long unsigned int jpegSize = sourceData.length;
+    unsigned char *_compressedImage = (unsigned char *)sourceData.bytes;
+
+    int jpegSubsamp, width, height;
+
+    tjhandle _jpegDecompressor = tjInitDecompress();
+
+    if (tjDecompressHeader2(_jpegDecompressor, _compressedImage, jpegSize, &width, &height, &jpegSubsamp) != 0) {
+        return nil;
+    }
+
+    int sourceBytesPerRow = (3 * width + 31) & ~0x1F;
+    int targetBytesPerRow = (4 * width + 31) & ~0x1F;
+
+    unsigned char *buffer = (uint8_t *)malloc(sourceBytesPerRow * height);
+
+    tjDecompress2(_jpegDecompressor, _compressedImage, jpegSize, buffer, width, sourceBytesPerRow, height, TJPF_RGB, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE);
+
+    tjDestroy(_jpegDecompressor);
+
+    vImage_Buffer source;
+    source.width = width;
+    source.height = height;
+    source.rowBytes = sourceBytesPerRow;
+    source.data = buffer;
+
+    vImage_Buffer target;
+    target.width = width;
+    target.height = height;
+    target.rowBytes = targetBytesPerRow;
+
+    unsigned char *targetBuffer = (uint8_t *)malloc(targetBytesPerRow * height);
+    target.data = targetBuffer;
+
+    vImageConvert_RGB888toARGB8888(&source, nil, 0xff, &target, false, kvImageDoNotTile);
+
+    free(buffer);
+
+    vImage_Buffer permuteTarget;
+    permuteTarget.width = width;
+    permuteTarget.height = height;
+    permuteTarget.rowBytes = targetBytesPerRow;
+
+    unsigned char *permuteTargetBuffer = (uint8_t *)malloc(targetBytesPerRow * height);
+    permuteTarget.data = permuteTargetBuffer;
+
+    const uint8_t permuteMap[4] = {3,2,1,0};
+    vImagePermuteChannels_ARGB8888(&target, &permuteTarget, permuteMap, kvImageDoNotTile);
+
+    free(targetBuffer);
+
+    NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * height deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) {
+        free(bytes);
+    }];
+
+    CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData);
+
+    static CGColorSpaceRef imageColorSpace;
+    static CGBitmapInfo bitmapInfo;
+    static dispatch_once_t onceToken;
+    dispatch_once(&onceToken, ^{
+        UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0);
+        UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext();
+        imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage));
+        bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage);
+        UIGraphicsEndImageContext();
+    });
+
+    CGImageRef cgImg = CGImageCreate(width, height, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault);
+
+    CGDataProviderRelease(dataProvider);
+
+    UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg];
+    CGImageRelease(cgImg);
+
+    return resultImage;
+}
+#endif
diff --git a/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift b/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift
index 54a2987973..306c41662c 100644
--- a/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift
+++ b/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift
@@ -51,6 +51,7 @@ public struct ExperimentalUISettings: Codable, Equatable {
     public var disableBackgroundAnimation: Bool
     public var logLanguageRecognition: Bool
     public var storiesExperiment: Bool
+    public var storiesJpegExperiment: Bool
     
     public static var defaultSettings: ExperimentalUISettings {
         return ExperimentalUISettings(
@@ -79,7 +80,8 @@ public struct ExperimentalUISettings: Codable, Equatable {
             disableImageContentAnalysis: false,
             disableBackgroundAnimation: false,
             logLanguageRecognition: false,
-            storiesExperiment: false
+            storiesExperiment: false,
+            storiesJpegExperiment: false
         )
     }
     
@@ -109,7 +111,8 @@ public struct ExperimentalUISettings: Codable, Equatable {
         disableImageContentAnalysis: Bool,
         disableBackgroundAnimation: Bool,
         logLanguageRecognition: Bool,
-        storiesExperiment: Bool
+        storiesExperiment: Bool,
+        storiesJpegExperiment: Bool
     ) {
         self.keepChatNavigationStack = keepChatNavigationStack
         self.skipReadHistory = skipReadHistory
@@ -137,6 +140,7 @@ public struct ExperimentalUISettings: Codable, Equatable {
         self.disableBackgroundAnimation = disableBackgroundAnimation
         self.logLanguageRecognition = logLanguageRecognition
         self.storiesExperiment = storiesExperiment
+        self.storiesJpegExperiment = storiesJpegExperiment
     }
     
     public init(from decoder: Decoder) throws {
@@ -168,6 +172,7 @@ public struct ExperimentalUISettings: Codable, Equatable {
         self.disableBackgroundAnimation = try container.decodeIfPresent(Bool.self, forKey: "disableBackgroundAnimation") ?? false
         self.logLanguageRecognition = try container.decodeIfPresent(Bool.self, forKey: "logLanguageRecognition") ?? false
         self.storiesExperiment = try container.decodeIfPresent(Bool.self, forKey: "storiesExperiment") ?? false
+        self.storiesJpegExperiment = try container.decodeIfPresent(Bool.self, forKey: "storiesJpegExperiment") ?? false
     }
     
     public func encode(to encoder: Encoder) throws {
@@ -199,6 +204,7 @@ public struct ExperimentalUISettings: Codable, Equatable {
         try container.encode(self.disableBackgroundAnimation, forKey: "disableBackgroundAnimation")
         try container.encode(self.logLanguageRecognition, forKey: "logLanguageRecognition")
         try container.encode(self.storiesExperiment, forKey: "storiesExperiment")
+        try container.encode(self.storiesJpegExperiment, forKey: "storiesJpegExperiment")
     }
 }
 
diff --git a/third-party/libjxl/BUILD b/third-party/libjxl/BUILD
new file mode 100644
index 0000000000..ae93a6b1a0
--- /dev/null
+++ b/third-party/libjxl/BUILD
@@ -0,0 +1,129 @@
+
+headers = [
+    "jxl/codestream_header.h",
+    "jxl/cms_interface.h",
+    "jxl/color_encoding.h",
+    "jxl/decode_cxx.h",
+    "jxl/decode.h",
+    "jxl/encode_cxx.h",
+    "jxl/encode.h",
+    "jxl/jxl_export.h",
+    "jxl/jxl_threads_export.h",
+    "jxl/memory_manager.h",
+    "jxl/parallel_runner.h",
+    "jxl/stats.h",
+    "jxl/types.h",
+    "jxl/version.h",
+]
+
+libs = [
+    "jxl",
+]
+
+
+brotli_libs = [
+    "libbrotlicommon",
+    "libbrotlidec",
+    "libbrotlienc",
+]
+
+highway_libs = [
+    "libhwy"
+]
+
+filegroup(
+    name = "libjxl_sources",
+    srcs = glob([
+        "libjxl/**/*"
+    ]),
+)
+
+genrule(
+    name = "libjxl_build",
+    srcs = [
+        "build-libjxl-bazel.sh",
+        ":libjxl_sources",
+        "@cmake_tar_gz//file",
+    ],
+    cmd_bash = 
+    """
+    set -ex
+
+    if [ "$(TARGET_CPU)" == "ios_armv7" ]; then
+        BUILD_ARCH="armv7"
+    elif [ "$(TARGET_CPU)" == "ios_arm64" ]; then
+        BUILD_ARCH="arm64"
+    elif [ "$(TARGET_CPU)" == "ios_sim_arm64" ]; then
+        BUILD_ARCH="sim_arm64"
+    elif [ "$(TARGET_CPU)" == "ios_x86_64" ]; then
+        BUILD_ARCH="x86_64"
+    else
+        echo "Unsupported architecture $(TARGET_CPU)"
+    fi
+
+    BUILD_DIR="$(RULEDIR)/build_$${BUILD_ARCH}"
+    rm -rf "$$BUILD_DIR"
+    mkdir -p "$$BUILD_DIR"
+
+    CMAKE_DIR="$$(pwd)/$$BUILD_DIR/cmake"
+    rm -rf "$$CMAKE_DIR"
+    mkdir -p "$$CMAKE_DIR"
+    tar -xzf "$(location @cmake_tar_gz//file)" -C "$$CMAKE_DIR"
+
+    cp $(location :build-libjxl-bazel.sh) "$$BUILD_DIR/"
+
+    SOURCE_PATH="third-party/libjxl/libjxl"
+
+    cp -R "$$SOURCE_PATH" "$$BUILD_DIR/"
+
+    mkdir -p "$$BUILD_DIR/Public/jxl"
+
+    PATH="$$PATH:$$CMAKE_DIR/cmake-3.23.1-macos-universal/CMake.app/Contents/bin" sh $$BUILD_DIR/build-libjxl-bazel.sh $$BUILD_ARCH "$$BUILD_DIR/libjxl" "$$BUILD_DIR"
+    """ +
+    "\n".join([
+        "cp -f \"$$BUILD_DIR/build/lib/include/{}\" \"$(location Public/{})\"".format(header, header) for header in headers
+    ]) +
+    "\n" +
+    "\n".join([
+        "cp -f \"$$BUILD_DIR/build/lib/lib{}.a\" \"$(location Public/jxl/lib/lib{}.a)\"".format(lib, lib) for lib in libs
+    ]) +
+    "\n" +
+    "\n".join([
+        "cp -f \"$$BUILD_DIR/build/third_party/brotli/{}.a\" \"$(location Public/jxl/lib/{}.a)\"".format(lib, lib) for lib in brotli_libs
+    ]) +
+    "\n" +
+    "\n".join([
+        "cp -f \"$$BUILD_DIR/build/third_party/highway/{}.a\" \"$(location Public/jxl/lib/{}.a)\"".format(lib, lib) for lib in highway_libs
+    ]),
+    outs = ["Public/" + x for x in headers] +
+    ["Public/jxl/lib/lib{}.a".format(x) for x in libs] +
+    ["Public/jxl/lib/{}.a".format(x) for x in brotli_libs] +
+    ["Public/jxl/lib/{}.a".format(x) for x in highway_libs],
+    visibility = [
+        "//visibility:public",
+    ]
+)
+
+cc_library(
+    name = "jxl_lib",
+    srcs = [":Public/jxl/lib/lib" + x + ".a" for x in libs] +
+    [":Public/jxl/lib/" + x + ".a" for x in brotli_libs] +
+    [":Public/jxl/lib/" + x + ".a" for x in highway_libs],
+)
+
+objc_library(
+    name = "jxl",
+    module_name = "jxl",
+    enable_modules = True,
+    hdrs = [":Public/" + x for x in headers],
+    includes = [
+        "Public",
+        "Public/jxl",
+    ],
+    deps = [
+        ":jxl_lib",
+    ],
+    visibility = [
+        "//visibility:public",
+    ],
+)
diff --git a/third-party/libjxl/build-libjxl-bazel.sh b/third-party/libjxl/build-libjxl-bazel.sh
new file mode 100755
index 0000000000..42188d8120
--- /dev/null
+++ b/third-party/libjxl/build-libjxl-bazel.sh
@@ -0,0 +1,65 @@
+#! /bin/sh
+
+set -e
+
+ARCH="$1"
+
+SOURCE_DIR="$2"
+BUILD_DIR=$(echo "$(cd "$(dirname "$3")"; pwd -P)/$(basename "$3")")
+
+RSSS="9"
+
+CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DJPEGXL_ENABLE_BENCHMARK=0 -DJPEGXL_ENABLE_FUZZERS=0 -DJPEGXL_ENABLE_TOOLS=0 -DJPEGXL_ENABLE_JPEGLI=0 -DJPEGXL_ENABLE_DOXYGEN=0 -DJPEGXL_ENABLE_MANPAGES=0 -DJPEGXL_ENABLE_BENCHMARK=0 -DJPEGXL_ENABLE_EXAMPLES=0 -DJPEGXL_BUNDLE_LIBPNG=0 -DJPEGXL_ENABLE_JNI=0 -DJPEGXL_ENABLE_SJPEG=0 -DJPEGXL_ENABLE_OPENEXR=0 -DJPEGXL_ENABLE_TRANSCODE_JPEG=0 -DJPEGXL_STATIC=1 -DJPEGXL_ENABLE_BOXES=0"
+
+if [ "$ARCH" = "arm64" ]; then
+  IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneOS.platform"
+  IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk)
+  export CFLAGS="-Wall -arch arm64 -miphoneos-version-min=11.0 -funwind-tables"
+
+  cd "$BUILD_DIR"
+  mkdir build
+  cd build
+
+  touch toolchain.cmake
+  echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake
+  echo "set(CMAKE_SYSTEM_PROCESSOR aarch64)" >> toolchain.cmake
+  echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake
+
+  cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl
+  make
+elif [ "$ARCH" = "sim_arm64" ]; then
+  IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneSimulator.platform"
+  IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneSimulator*.sdk)
+  export CFLAGS="-Wall -arch arm64 --target=arm64-apple-ios11.0-simulator -miphonesimulator-version-min=11.0 -funwind-tables"
+
+  cd "$BUILD_DIR"
+  mkdir build
+  cd build
+
+  touch toolchain.cmake
+  echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake
+  echo "set(CMAKE_SYSTEM_PROCESSOR aarch64)" >> toolchain.cmake
+  echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake
+
+  cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl
+  make
+elif [ "$ARCH" = "x86_64" ]; then
+  IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneSimulator.platform"
+  IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneSimulator*.sdk)
+  export CFLAGS="-Wall -arch x86_64 -miphoneos-version-min=11.0 -funwind-tables"
+
+  cd "$BUILD_DIR"
+  mkdir build
+  cd build
+
+  touch toolchain.cmake
+  echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake
+  echo "set(CMAKE_SYSTEM_PROCESSOR AMD64)" >> toolchain.cmake
+  echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake
+
+  cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl
+  make
+else
+  echo "Unsupported architecture $ARCH"
+  exit 1
+fi
diff --git a/third-party/libjxl/libjxl/AUTHORS b/third-party/libjxl/libjxl/AUTHORS
new file mode 100644
index 0000000000..ab9dabe3af
--- /dev/null
+++ b/third-party/libjxl/libjxl/AUTHORS
@@ -0,0 +1,73 @@
+# List of the project authors for copyright purposes. When contributing to the
+# project add your name or your organization's name to this list. See
+# CONTRIBUTING.md for details.
+#
+# For organizations:
+#   Organization <email pattern: *@domain>
+#
+# For individuals:
+#   Name <email address>
+#
+# Please keep each list sorted. If you wish to change your email address please
+# send a pull request.
+
+# Organizations:
+Cloudinary Ltd. <*@cloudinary.com>
+Google LLC <*@google.com>
+
+# Individuals:
+a-shvedov
+Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
+Alexander Sago <cagelight@gmail.com>
+Alistair Barrow
+Andrius Lukas Narbutas <andrius4669@gmail.com>
+Aous Naman <aous@unsw.edu.au>
+Artem Selishchev
+Biswapriyo Nath <nathbappai@gmail.com>
+CanadianBaconBoi <beamconnor@gmail.com>
+Damiano Albani <damiano.albani@gmail.com>
+Daniel Novomeský <dnovomesky@gmail.com>
+David Burnett <vargolsoft@gmail.com>
+dependabot[bot]
+Diego Pino <dpino@igalia.com>
+Dirk Lemstra <dirk@lemstra.org>
+Don Olmstead <don.j.olmstead@gmail.com>
+Dong Xu <xdong181@gmail.com>
+Even Rouault <even.rouault@spatialys.com>
+Fred Brennan <copypaste@kittens.ph>
+gi-man
+Gilles Devillers (GilDev) <gildev@gmail.com>
+Heiko Becker <heirecka@exherbo.org>
+Jim Robinson <jimbo2150@gmail.com>
+Jon Sneyers <jon@cloudinary.com>
+Jonathan Brown (Jonnyawsom3) <jonathanbr30@gmail.com>
+Joshua Root <jmr@macports.org>
+Kai Hollberg <Schweinepriester@users.noreply.github.com>
+Kleis Auke Wolthuizen <github@kleisauke.nl>
+L. E. Segovia
+Leo Izen <leo.izen@gmail.com>
+Lovell Fuller
+Maarten DB <anonymous.maarten@gmail.com>
+Marcin Konicki <ahwayakchih@gmail.com>
+Martin Strunz
+Mathieu Malaterre <mathieu.malaterre@gmail.com>
+Mikk Leini <mikk.leini@krakul.eu>
+Misaki Kasumi <misakikasumi@outlook.com>
+Moonchild Straver <moonchild@palemoon.org>
+Nicholas Hayes <0xC0000054@users.noreply.github.com>
+Nigel Tao <nigeltao@golang.org>
+Petr Diblík
+Pieter Wuille
+roland-rollo
+Samuel Leong <wvvwvvvvwvvw@gmail.com>
+Sandro <sandro.jaeckel@gmail.com>
+Sergey Fedorov <vital.had@gmail.com>
+Stephan T. Lavavej <stl@nuwen.net>
+Sylvestre Ledru <sylvestre@debian.org>
+Thomas Bonfort <thomas.bonfort@airbus.com>
+tmkk <tmkkmac@gmail.com>
+Vincent Torri <vincent.torri@gmail.com>
+xiota
+Yonatan Nebenzhal <yonatan.nebenzhl@gmail.com>
+Ziemowit Zabawa <ziemek.zabawa@outlook.com>
+源文雨 <41315874+fumiama@users.noreply.github.com>
diff --git a/third-party/libjxl/libjxl/BUILD.bazel b/third-party/libjxl/libjxl/BUILD.bazel
new file mode 100644
index 0000000000..0b81fc7b8a
--- /dev/null
+++ b/third-party/libjxl/libjxl/BUILD.bazel
@@ -0,0 +1,22 @@
+package(default_visibility = ["//:__subpackages__"])
+
+filegroup(
+    name = "testdata",
+    srcs = glob([
+        "testdata/**/*.icc",
+        "testdata/**/*.pam",
+        "testdata/**/*.pfm",
+        "testdata/**/*.pgm",
+        "testdata/**/*.pnm",
+        "testdata/**/*.ppm",
+        "testdata/**/*.png",
+        "testdata/**/*.jpg",
+        "testdata/**/*.jxl",
+        "testdata/**/*.gif",
+        "testdata/**/*.y4m",
+        "testdata/**/*.jxl",
+        "testdata/**/*.png",
+        "testdata/**/*.jpg",
+        "testdata/position_encoding/*.txt",
+    ]),
+)
diff --git a/third-party/libjxl/libjxl/BUILDING.md b/third-party/libjxl/libjxl/BUILDING.md
new file mode 100644
index 0000000000..7e9bc2aad3
--- /dev/null
+++ b/third-party/libjxl/libjxl/BUILDING.md
@@ -0,0 +1,85 @@
+# Compilation
+
+For more details and other workflows see the "Advanced guide" below.
+
+## Checking out the code
+
+```bash
+git clone https://github.com/libjxl/libjxl.git --recursive --shallow-submodules
+```
+
+This repository uses git submodules to handle some third party dependencies
+under `third_party`, that's why it is important to pass `--recursive`. If you
+didn't check out with `--recursive`, or any submodule has changed, run:
+
+```bash
+git submodule update --init --recursive --depth 1 --recommend-shallow
+```
+
+The `--shallow-submodules` and `--depth 1 --recommend-shallow` options create
+shallow clones which only downloads the commits requested, and is all that is
+needed to build `libjxl`. Should full clones be necessary, you could always run:
+
+```bash
+git submodule foreach git fetch --unshallow
+git submodule update --init --recursive
+```
+
+which pulls the rest of the commits in the submodules.
+
+Important: If you downloaded a zip file or tarball from the web interface you
+won't get the needed submodules and the code will not compile. You can download
+these external dependencies from source running `./deps.sh`. The git workflow
+described above is recommended instead.
+
+## Installing dependencies
+
+Required dependencies for compiling the code, in a Debian/Ubuntu based
+distribution run:
+
+```bash
+sudo apt install cmake pkg-config libbrotli-dev
+```
+
+Optional dependencies for supporting other formats in the `cjxl`/`djxl` tools,
+in a Debian/Ubuntu based distribution run:
+
+```bash
+sudo apt install libgif-dev libjpeg-dev libopenexr-dev libpng-dev libwebp-dev
+```
+
+We recommend using a recent Clang compiler (version 7 or newer), for that
+install clang and set `CC` and `CXX` variables.
+
+```bash
+sudo apt install clang
+export CC=clang CXX=clang++
+```
+
+## Building
+
+```bash
+cd libjxl
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF ..
+cmake --build . -- -j$(nproc)
+```
+
+The encoder/decoder tools will be available in the `build/tools` directory.
+
+## <a name="installing"></a> Installing
+
+```bash
+sudo cmake --install .
+```
+
+
+## Building JPEG XL for developers
+
+For experienced developers, we provide build instructions for several other environments:
+
+*   [Building on Debian](doc/developing_in_debian.md)
+*   Building on Windows with [vcpkg](doc/developing_in_windows_vcpkg.md) (Visual Studio 2019)
+*   Building on Windows with [MSYS2](doc/developing_in_windows_msys.md)
+*   [Cross Compiling for Windows with Crossroad](doc/developing_with_crossroad.md)
diff --git a/third-party/libjxl/libjxl/BUILDING_Haiku.md b/third-party/libjxl/libjxl/BUILDING_Haiku.md
new file mode 100644
index 0000000000..1ffca1453c
--- /dev/null
+++ b/third-party/libjxl/libjxl/BUILDING_Haiku.md
@@ -0,0 +1,20 @@
+## Disclaimer
+
+Haiku builds are not officially supported, i.e. the build might not work at all,
+some tests may fail and some sub-projects are excluded from build.
+
+This manual outlines Haiku-specific setup. For general building and testing
+instructions see "[BUILDING](BUILDING.md)" and
+"[Building and Testing changes](doc/building_and_testing.md)".
+
+## Dependencies
+
+```shell
+pkgman install llvm9_clang ninja cmake doxygen libjpeg_turbo_devel giflib_devel
+```
+
+## Building
+
+```shell
+TEST_STACK_LIMIT=none CMAKE_FLAGS="-I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++ -I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++/x86_64-unknown-haiku" CMAKE_SHARED_LINKER_FLAGS="-shared -Xlinker -soname=libjpegxl.so -lpthread" ./ci.sh opt
+```
diff --git a/third-party/libjxl/libjxl/BUILDING_OSX.md b/third-party/libjxl/libjxl/BUILDING_OSX.md
new file mode 100644
index 0000000000..b5f5e34db7
--- /dev/null
+++ b/third-party/libjxl/libjxl/BUILDING_OSX.md
@@ -0,0 +1,41 @@
+## Disclaimer
+
+OSX builds have "best effort" support, i.e. build might not work at all, some
+tests may fail and some sub-projects are excluded from build.
+
+This manual outlines OSX specific setup. For general building and testing
+instructions see "[BUILDING](BUILDING.md)" and
+"[Building and Testing changes](doc/building_and_testing.md)".
+
+[Homebrew](https://brew.sh/) is a popular package manager. JPEG XL library and
+binaries could be installed using it:
+
+```bash
+brew install jpeg-xl
+```
+
+## Dependencies
+
+Make sure that `brew doctor` does not report serious problems and up-to-date
+version of XCode is installed.
+
+Installing (actually, building) `clang` might take a couple hours.
+
+```bash
+brew install llvm
+```
+
+```bash
+brew install coreutils cmake giflib jpeg-turbo libpng ninja zlib
+```
+
+Before building the project check that `which clang` is
+`/usr/local/opt/llvm/bin/clang`, not the one provided by XCode. If not, update
+`PATH` environment variable.
+
+Also, setting `CMAKE_PREFIX_PATH` might be necessary for correct include paths
+resolving, e.g.:
+
+```bash
+export CMAKE_PREFIX_PATH=`brew --prefix giflib`:`brew --prefix jpeg-turbo`:`brew --prefix libpng`:`brew --prefix zlib`
+```
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/CHANGELOG.md b/third-party/libjxl/libjxl/CHANGELOG.md
new file mode 100644
index 0000000000..c1235da14e
--- /dev/null
+++ b/third-party/libjxl/libjxl/CHANGELOG.md
@@ -0,0 +1,320 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## Unreleased
+
+### Added
+ - encoder API: add `JxlEncoderSetExtraChannelDistance` to adjust the quality
+   of extra channels (like alpha) separately.
+ - encoder API: new api functions for streaming encoding:
+  - `JxlEncoderSetOutputCallback`,
+  - `JxlEncoderChunkedImageFrameStart`,
+  - `JxlEncoderChunkedImageFrameAddPart` and new
+  - `JXL_ENC_FRAME_SETTING_BUFFERING` enum value.
+ - encoder API: new options for more fine-grained control over metadata
+   preservation when using `JxlEncoderAddJPEGFrame`:
+  - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF`
+  - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP`
+  - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF`
+ - encoder API: new function `JxlEncoderSetUpsamplingMode` to change the upsampling
+   method, e.g. to use nearest-neighbor upsampling for pixel art
+ - cjxl can now be used to explicitly add/update/strip Exif/XMP/JUMBF metadata using
+   the decoder-hints syntax, e.g. `cjxl input.ppm -x exif=input.exif output.jxl`
+ - djxl can now be used to extract Exif/XMP/JUMBF metadata
+
+### Removed
+ - API: the Butteraugli API (`jxl/butteraugli.h`) was removed.
+ - encoder and decoder API: all deprecated functions were removed:
+   `JxlDecoderDefaultPixelFormat`, `JxlEncoderOptionsSetLossless`,
+   `JxlEncoderOptionsSetEffort`, `JxlEncoderOptionsSetDecodingSpeed`,
+   `JxlEncoderOptionsSetDistance`, `JxlEncoderOptionsCreate`, as well as
+   the deprecated enumerator values `JXL_DEC_EXTENSIONS`, `JXL_ENC_NOT_SUPPORTED`,
+   `JXL_TYPE_BOOLEAN`, `JXL_TYPE_UINT32`, and deprecated type `JxlEncoderOptions`.
+ - decoder API: the signature of `JxlDecoderGetColorAsEncodedProfile`,
+   `JxlDecoderGetICCProfileSize`, and `JxlDecoderGetColorAsICCProfile`
+   changed: a deprecated unused argument was removed.
+
+### Changed
+ - changed the name of the cjxl flag `photon_noise` to `photon_noise_iso`
+
+## [0.8.0] - 2023-01-18
+
+### Added
+ - decoder API: new function `JxlDecoderSetImageBitDepth` to set the bit depth
+   of the output buffer.
+ - decoder API proposal: add `JxlDecoderSetOutputColorProfile` and
+   `JxlDecoderSetCms` to enable decoding to desired colorspace; NB: not
+   implemented yet.
+ - encoder API: new function `JxlEncoderSetFrameBitDepth` to set the bit depth
+   of the input buffer.
+ - encoder API: add an effort 10 option for lossless compression; using this
+   setting requires calling `JxlEncoderAllowExpertOptions`.
+ - encoder API: new `JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES` enum value to
+   allow explicit control of metadata compression
+
+### Removed
+ - common API: removed `JxlIntrinsicSizeHeader`
+ - decoder API: removed deprecated `JXL_DEC_NEED_DC_OUT_BUFFER` and
+   `JXL_DEC_DC_IMAGE` events, `JxlDecoderDCOutBufferSize` and
+   `JxlDecoderSetDCOutBuffer` functions
+
+### Changed / clarified
+ - encoder API: `JxlEncoderProcessOutput` requires at least 32 bytes of output
+   space to proceed and guarantees that at least one byte will be written
+
+## [0.7] - 2022-07-21
+
+### Added
+ - Export version information in headers.
+ - decoder API: Ability to decode the content of metadata boxes:
+   `JXL_DEC_BOX`, `JXL_DEC_BOX_NEED_MORE_OUTPUT`, `JxlDecoderSetBoxBuffer`,
+   `JxlDecoderGetBoxType`, `JxlDecoderGetBoxSizeRaw` and
+   `JxlDecoderSetDecompressBoxes`.
+ - decoder API: ability to mark the input is finished: `JxlDecoderCloseInput`.
+ - decoder API: ability to request updates on different progressive events using
+   `JxlDecoderSetProgressiveDetail`; currently supported events are
+   `kDC`, `kLastPasses` and `kPasses`.
+ - decoder API: ability to specify desired intensity target using
+   `JxlDecoderSetDesiredIntensityTarget`
+ - decoder API: new function `JxlDecoderSetCoalesced` to allow decoding
+   non-coalesced (unblended) frames, e.g. layers of a composite still image
+   or the cropped frames of a recompressed GIF/APNG.
+ - decoder API: new function `JxlDecoderSetUnpremultiplyAlpha` to set
+   preference for getting an associated alpha channel with premultiplied or
+   unpremultiplied colors.
+ - decoder API: field added to `JxlFrameHeader`: a `JxlLayerInfo` struct
+   that contains crop dimensions and offsets and blending information for
+   the non-coalesced case.
+ - decoder API: new function `JxlDecoderGetExtraChannelBlendInfo` to get
+   the blending information for extra channels in the non-coalesced case.
+ - decoder API: new function `JxlDecoderSetMultithreadedImageOutCallback`,
+   allowing output callbacks to receive more information about the number of
+   threads on which they are running.
+ - decoder API: new function `JxlDecoderSkipCurrentFrame` to skip processing
+   the current frame after a progressive detail is reached.
+ - decoder API: new function `JxlDecoderGetIntendedDownsamplingRatio` to get
+   the intended downsampling ratio of progressive steps, based on the
+   information in the frame header.
+ - decoder API: new function `JxlDecoderSetRenderSpotcolors` to allow disabling
+   rendering of spot colors.
+ - decoder/encoder API: add two fields to `JXLBasicInfo`: `intrinsic_xsize`
+   and `intrinsic_ysize` to signal the intrinsic size.
+ - encoder API: ability to add metadata boxes, added new functions
+   `JxlEncoderAddBox`, `JxlEncoderUseBoxes`, `JxlEncoderCloseBoxes` and
+   `JxlEncoderCloseFrames`.
+ - encoder API: added ability to set several encoder options / extra fields to
+   frames using `JxlEncoderSetFrameName`, `JxlEncoderFrameSettingsSetOption`,
+   `JxlEncoderFrameSettingsSetFloatOption`.
+ - encoder API: added ability to check required codestream compatibility level
+   and force specified using `JxlEncoderGetRequiredCodestreamLevel` and
+   `JxlEncoderSetCodestreamLevel`.
+ - encoder API: added ability to force emitting box-based container format
+   using `JxlEncoderUseContainer`.
+ - encoder API: added ability to store JPEG metadata for lossless reconstruction
+   using `JxlEncoderStoreJPEGMetadata`
+ - encoder API: new functions `JxlEncoderSetFrameHeader` and
+   `JxlEncoderSetExtraChannelBlendInfo` to set animation
+   and blending parameters of the frame, and `JxlEncoderInitFrameHeader` and
+   `JxlEncoderInitBlendInfo` to initialize the structs to set.
+ - encoder API: ability to encode arbitrary extra channels:
+  `JxlEncoderInitExtraChannelInfo`, `JxlEncoderSetExtraChannelInfo`,
+  `JxlEncoderSetExtraChannelName` and `JxlEncoderSetExtraChannelBuffer`.
+ - encoder API: ability to plug custom CMS implementation using
+   `JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms)`
+ - encoder API: added `JxlEncoderGetError` to retrieve last encoder error.
+
+### Changed
+- decoder API: using `JxlDecoderCloseInput` at the end of all input is required
+  when using JXL_DEC_BOX, and is now also encouraged in other cases, but not
+  required in those other cases for backwards compatibility.
+- encoder API: `JxlEncoderCloseInput` now closes both frames and boxes input.
+- CLI: `cjxl` and `djxl` have been reimplemented on the base of public decoder
+  and encoder API; dropped dependency on `gflags` for argument parsing.
+
+### Deprecated
+- decoder API: `JXL_DEC_EXTENSIONS` event: use `JXL_DEC_BASIC_INFO`
+- decoder / encoder API: pixel types `JXL_TYPE_BOOLEAN` and `JXL_TYPE_UINT32`:
+  consider using `JXL_TYPE_UINT8` and `JXL_TYPE_FLOAT` correspondingly.
+- decoder API: pixel format parameter for `JxlDecoderGetColorAsEncodedProfile`
+  and `JxlDecoderGetICCProfileSize`: pass `NULL`.
+- decoder API: `JxlDecoderDefaultPixelFormat`
+- encoder API: `JxlEncoderOptions`: use `JxlEncoderFrameSettings` instead.
+- encoder API: `JxlEncoderOptionsCreate`: use `JxlEncoderFrameSettingsCreate`
+  instead.
+- encoder API: `JxlEncoderOptionsSetDistance`: use `JxlEncoderSetFrameDistance`
+  instead.
+- encoder API: `JxlEncoderOptionsSetLossless`: use `JxlEncoderSetFrameLossless`
+  instead.
+- encoder API: `JxlEncoderOptionsSetEffort`: use
+  `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, effort)`
+  instead.
+- encoder API: `JxlEncoderOptionsSetDecodingSpeed`: use
+  `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, tier)`
+  instead.
+- encoder API: deprecated `JXL_ENC_NOT_SUPPORTED`, the encoder returns
+  `JXL_ENC_ERROR` instead and there is no need to handle
+  `JXL_ENC_NOT_SUPPORTED`.
+
+## [0.6.1] - 2021-10-29
+### Changed
+ - Security: Fix OOB read in splines rendering (#735 -
+   [CVE-2021-22563](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22563))
+ - Security: Fix OOB copy (read/write) in out-of-order/multi-threaded decoding
+   (#708 - [CVE-2021-22564](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22564))
+ - Fix segfault in `djxl` tool with `--allow_partial_files` flag (#781).
+ - Fix border in extra channels when using upsampling (#796)
+
+## [0.6] - 2021-10-04
+### Added
+ - API: New functions to decode extra channels:
+   `JxlDecoderExtraChannelBufferSize` and `JxlDecoderSetExtraChannelBuffer`.
+ - API: New function `JxlEncoderInitBasicInfo` to initialize `JxlBasicInfo`
+   (only needed when encoding). NOTE: it is now required to call this function
+   when using the encoder. Padding was added to the struct for forward
+   compatibility.
+ - API: Support for encoding oriented images.
+ - API: FLOAT16 support in the encoder API.
+ - Rewrite of the GDK pixbuf loader plugin. Added proper color management and
+   animation support.
+ - Rewrite of GIMP plugin. Added compression parameters dialog and switched to
+   using the public C API.
+ - Debian packages for GDK pixbuf loader (`libjxl-gdk-pixbuf`) and GIMP
+   (`libjxl-gimp-plugin`) plugins.
+ - `cjxl`/`djxl` support for `stdin` and `stdout`.
+
+### Changed
+ - API: Renamed the field `alpha_associated` in `JxlExtraChannelInfo` to
+   `alpha_premultiplied`, to match the corresponding name in `JxlBasicInfo`.
+ - Improved the 2x2 downscaling method in the encoder for the optional color
+   channel resampling for low bit rates.
+ - Fixed: the combination of floating point original data, XYB color encoding,
+   and Modular mode was broken (in both encoder and decoder). It now works.
+   NOTE: this can cause the current encoder to write jxl bitstreams that do
+   not decode with the old decoder. In particular this will happen when using
+   cjxl with PFM, EXR, or floating point PSD input, and a combination of XYB
+   and modular mode is used (which caused an encoder error before), e.g.
+   using options like `-m -q 80` (lossy modular), `-d 4.5` or `--progressive_dc=1`
+   (modular DC frame), or default lossy encoding on an image where patches
+   end up being used. There is no problem when using cjxl with PNG, JPEG, GIF,
+   APNG, PPM, PGM, PGX, or integer (8-bit or 16-bit) PSD input.
+ - `libjxl` static library now bundles skcms, fixing static linking in
+   downstream projects when skcms is used.
+ - Spline rendering performance improvements.
+ - Butteraugli changes for less visual masking.
+
+## [0.5] - 2021-08-02
+### Added
+ - API: New function to decode the image using a callback outputting a part of a
+   row per call.
+ - API: 16-bit float output support.
+ - API: `JxlDecoderRewind` and `JxlDecoderSkipFrames` functions to skip more
+   efficiently to earlier animation frames.
+ - API: `JxlDecoderSetPreferredColorProfile` function to choose color profile in
+   certain circumstances.
+ - encoder: Adding `center_x` and `center_y` flags for more control of the tile
+   order.
+ - New encoder speeds `lightning` (1) and `thunder` (2).
+
+### Changed
+ - Re-licensed the project under a BSD 3-Clause license. See the
+   [LICENSE](LICENSE) and [PATENTS](PATENTS) files for details.
+ - Full JPEG XL part 1 specification support: Implemented all the spec required
+   to decode files to pixels, including cases that are not used by the encoder
+   yet. Part 2 of the spec (container format) is final but not fully implemented
+   here.
+ - Butteraugli metric improvements. Exact numbers are different from previous
+   versions.
+ - Memory reductions during decoding.
+ - Reduce the size of the jxl_dec library by removing dependencies.
+ - A few encoding speedups.
+ - Clarify the security policy.
+ - Significant encoding improvements (~5 %) and less ringing.
+ - Butteraugli metric to have some less masking.
+ - `cjxl` flag `--speed` is deprecated and replaced by the `--effort` synonym.
+
+### Removed
+- API for returning a downsampled DC was deprecated
+  (`JxlDecoderDCOutBufferSize` and `JxlDecoderSetDCOutBuffer`) and will be
+  removed in the next release.
+
+## [0.3.7] - 2021-03-29
+### Changed
+ - Fix a rounding issue in 8-bit decoding.
+
+## [0.3.6] - 2021-03-25
+### Changed
+ - Fix a bug that could result in the generation of invalid codestreams as
+   well as failure to decode valid streams.
+
+## [0.3.5] - 2021-03-23
+### Added
+ - New encode-time options for faster decoding at the cost of quality.
+ - Man pages for cjxl and djxl.
+
+### Changed
+ - Memory usage improvements.
+ - Faster decoding to 8-bit output with the C API.
+ - GIMP plugin: avoid the sRGB conversion dialog for sRGB images, do not show
+   a console window on Windows.
+ - Various bug fixes.
+
+## [0.3.4] - 2021-03-16
+### Changed
+ - Improved box parsing.
+ - Improved metadata handling.
+ - Performance and memory usage improvements.
+
+## [0.3.3] - 2021-03-05
+### Changed
+ - Performance improvements for small images.
+ - Add a (flag-protected) non-high-precision mode with better speed.
+ - Significantly speed up the PQ EOTF.
+ - Allow optional HDR tone mapping in djxl (--tone_map, --display_nits).
+ - Change the behavior of djxl -j to make it consistent with cjxl (#153).
+ - Improve image quality.
+ - Improve EXIF handling.
+
+## [0.3.2] - 2021-02-12
+### Changed
+ - Fix embedded ICC encoding regression
+   [#149](https://gitlab.com/wg1/jpeg-xl/-/issues/149).
+
+## [0.3.1] - 2021-02-10
+### Changed
+ - New experimental Butteraugli API (`jxl/butteraugli.h`).
+ - Encoder improvements to low quality settings.
+ - Bug fixes, including fuzzer-found potential security bug fixes.
+ - Fixed `-q 100` and `-d 0` not triggering lossless modes.
+
+## [0.3] - 2021-01-29
+### Changed
+ - Minor change to the Decoder C API to accommodate future work for other ways
+   to provide input.
+ - Future decoder C API changes will be backwards compatible.
+ - Lots of bug fixes since the previous version.
+
+## [0.2] - 2020-12-24
+### Added
+ - JPEG XL bitstream format is frozen. Files encoded with 0.2 will be supported
+   by future versions.
+
+### Changed
+ - Files encoded with previous versions are not supported.
+
+## [0.1.1] - 2020-12-01
+
+## [0.1] - 2020-11-14
+### Added
+ - Initial release of an encoder (`cjxl`) and decoder (`djxl`) that work
+   together as well as a benchmark tool for comparison with other codecs
+   (`benchmark_xl`).
+ - Note: JPEG XL format is in the final stages of standardization, minor changes
+   to the codestream format are still possible but we are not expecting any
+   changes beyond what is required by bug fixing.
+ - API: new decoder API in C, check the `examples/` directory for its example
+   usage. The C API is a work in progress and likely to change both in API and
+   ABI in future releases.
diff --git a/third-party/libjxl/libjxl/CMakeLists.txt b/third-party/libjxl/libjxl/CMakeLists.txt
new file mode 100644
index 0000000000..89c274c9ab
--- /dev/null
+++ b/third-party/libjxl/libjxl/CMakeLists.txt
@@ -0,0 +1,527 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Ubuntu bionic ships with cmake 3.10.
+cmake_minimum_required(VERSION 3.10)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+# Honor VISIBILITY_INLINES_HIDDEN on all types of targets.
+if(POLICY CMP0063)
+  cmake_policy(SET CMP0063 NEW)
+endif()
+# Pass CMAKE_EXE_LINKER_FLAGS to CC and CXX compilers when testing if they work.
+if(POLICY CMP0065)
+  cmake_policy(SET CMP0065 NEW)
+endif()
+
+# Set PIE flags for POSITION_INDEPENDENT_CODE targets, added in 3.14.
+if(POLICY CMP0083)
+  cmake_policy(SET CMP0083 NEW)
+endif()
+
+project(LIBJXL LANGUAGES C CXX)
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+   "int main() {
+      #if !defined(__EMSCRIPTEN__)
+      static_assert(false, \"__EMSCRIPTEN__ is not defined\");
+      #endif
+      return 0;
+    }"
+  JPEGXL_EMSCRIPTEN
+)
+
+message(STATUS "CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}")
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-fsanitize=fuzzer-no-link" CXX_FUZZERS_SUPPORTED)
+check_cxx_compiler_flag("-Xclang -mconstructor-aliases" CXX_CONSTRUCTOR_ALIASES_SUPPORTED)
+check_cxx_compiler_flag("-fmacro-prefix-map=OLD=NEW" CXX_MACRO_PREFIX_MAP)
+check_cxx_compiler_flag("-fno-rtti" CXX_NO_RTTI_SUPPORTED)
+
+# Enabled PIE binaries by default if supported.
+include(CheckPIESupported OPTIONAL RESULT_VARIABLE CHECK_PIE_SUPPORTED)
+if(CHECK_PIE_SUPPORTED)
+  check_pie_supported(LANGUAGES CXX)
+  if(CMAKE_CXX_LINK_PIE_SUPPORTED)
+    set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
+  endif()
+endif()
+
+if(PROVISION_DEPENDENCIES)
+  # Run script to provision dependencies.
+  find_program (BASH_PROGRAM bash)
+  if(BASH_PROGRAM)
+    execute_process(
+      COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/deps.sh
+      RESULT_VARIABLE PROVISION_DEPENDENCIES_RESULT)
+  endif()
+  if(NOT PROVISION_DEPENDENCIES_RESULT EQUAL "0")
+    message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR}/deps.sh failed with ${PROVISION_DEPENDENCIES_RESULT}")
+  endif()
+endif()
+
+### Project build options:
+if(CXX_FUZZERS_SUPPORTED)
+  # Enabled by default except on arm64, Windows and Apple builds.
+  set(ENABLE_FUZZERS_DEFAULT true)
+endif()
+find_package(PkgConfig)
+if(NOT APPLE AND NOT WIN32 AND NOT HAIKU AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+  pkg_check_modules(TCMallocMinimalVersionCheck QUIET IMPORTED_TARGET
+      libtcmalloc_minimal)
+  if(TCMallocMinimalVersionCheck_FOUND AND
+     NOT TCMallocMinimalVersionCheck_VERSION VERSION_EQUAL 2.8.0)
+    # Enabled by default except on Windows and Apple builds for
+    # tcmalloc != 2.8.0. tcmalloc 2.8.1 already has a fix for this issue.
+    set(ENABLE_TCMALLOC_DEFAULT true)
+  else()
+    message(STATUS
+        "tcmalloc version ${TCMallocMinimalVersionCheck_VERSION} -- "
+        "tcmalloc 2.8.0 disabled due to "
+        "https://github.com/gperftools/gperftools/issues/1204")
+  endif()
+endif()
+
+check_cxx_source_compiles(
+   "int main() {
+      #if !defined(HWY_DISABLED_TARGETS)
+      static_assert(false, \"HWY_DISABLED_TARGETS is not defined\");
+      #endif
+      return 0;
+    }"
+  JXL_HWY_DISABLED_TARGETS_FORCED
+)
+
+set(WARNINGS_AS_ERRORS_DEFAULT false)
+
+if((SANITIZER STREQUAL "msan") OR JPEGXL_EMSCRIPTEN)
+  set(BUNDLE_LIBPNG_DEFAULT YES)
+else()
+  set(BUNDLE_LIBPNG_DEFAULT NO)
+endif()
+
+# Standard cmake naming for building shared libraries.
+get_property(SHARED_LIBS_SUPPORTED GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
+option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" ${SHARED_LIBS_SUPPORTED})
+
+set(JPEGXL_ENABLE_FUZZERS ${ENABLE_FUZZERS_DEFAULT} CACHE BOOL
+    "Build JPEGXL fuzzer targets.")
+set(JPEGXL_ENABLE_DEVTOOLS false CACHE BOOL
+    "Build JPEGXL developer tools.")
+set(JPEGXL_ENABLE_TOOLS true CACHE BOOL
+    "Build JPEGXL user tools: cjxl and djxl.")
+set(JPEGXL_ENABLE_JPEGLI true CACHE BOOL
+    "Build jpegli library.")
+set(JPEGXL_ENABLE_JPEGLI_LIBJPEG true CACHE BOOL
+    "Build libjpeg.so shared library based on jpegli.")
+set(JPEGXL_INSTALL_JPEGLI_LIBJPEG false CACHE BOOL
+    "Install jpegli version of libjpeg.so system-wide.")
+set(JPEGLI_LIBJPEG_LIBRARY_VERSION "62.3.0" CACHE STRING
+    "Library version of the libjpeg.so shared library that we build.")
+set(JPEGLI_LIBJPEG_LIBRARY_SOVERSION "62" CACHE STRING
+    "Library so-version of the libjpeg.so shared library that we build.")
+set(JPEGXL_ENABLE_DOXYGEN true CACHE BOOL
+    "Generate C API documentation using Doxygen.")
+set(JPEGXL_ENABLE_MANPAGES true CACHE BOOL
+    "Build and install man pages for the command-line tools.")
+set(JPEGXL_ENABLE_BENCHMARK true CACHE BOOL
+    "Build JPEGXL benchmark tools.")
+set(JPEGXL_ENABLE_EXAMPLES true CACHE BOOL
+    "Build JPEGXL library usage examples.")
+set(JPEGXL_BUNDLE_LIBPNG ${BUNDLE_LIBPNG_DEFAULT} CACHE BOOL
+    "Build libpng from source and link it statically.")
+set(JPEGXL_ENABLE_JNI true CACHE BOOL
+    "Build JPEGXL JNI Java wrapper, if Java dependencies are installed.")
+set(JPEGXL_ENABLE_SJPEG true CACHE BOOL
+    "Build JPEGXL with support for encoding with sjpeg.")
+set(JPEGXL_ENABLE_OPENEXR true CACHE BOOL
+    "Build JPEGXL with support for OpenEXR if available.")
+set(JPEGXL_ENABLE_SKCMS true CACHE BOOL
+    "Build with skcms instead of lcms2.")
+set(JPEGXL_BUNDLE_SKCMS true CACHE BOOL
+    "When building with skcms, bundle it into libjxl.a.")
+set(JPEGXL_ENABLE_VIEWERS false CACHE BOOL
+    "Build JPEGXL viewer tools for evaluation.")
+set(JPEGXL_ENABLE_TCMALLOC ${ENABLE_TCMALLOC_DEFAULT} CACHE BOOL
+    "Build JPEGXL using gperftools (tcmalloc) allocator.")
+set(JPEGXL_ENABLE_PLUGINS false CACHE BOOL
+    "Build third-party plugins to support JPEG XL in other applications.")
+set(JPEGXL_ENABLE_COVERAGE false CACHE BOOL
+    "Enable code coverage tracking for libjxl. This also enables debug and disables optimizations.")
+set(JPEGXL_ENABLE_SIZELESS_VECTORS false CACHE BOOL
+    "Builds in support for SVE/RVV vectorization")
+set(JPEGXL_ENABLE_TRANSCODE_JPEG true CACHE BOOL
+    "Builds in support for decoding transcoded JXL files back to JPEG,\
+ disabling it makes the decoder reject JXL_DEC_JPEG_RECONSTRUCTION events,\
+ (default enabled)")
+set(JPEGXL_ENABLE_BOXES true CACHE BOOL
+    "Builds in support for decoding boxes in JXL files,\
+ disabling it makes the decoder reject JXL_DEC_BOX events,\
+ (default enabled)")
+set(JPEGXL_STATIC false CACHE BOOL
+    "Build tools as static binaries.")
+set(JPEGXL_WARNINGS_AS_ERRORS ${WARNINGS_AS_ERRORS_DEFAULT} CACHE BOOL
+    "Treat warnings as errors during compilation.")
+set(JPEGXL_DEP_LICENSE_DIR "" CACHE STRING
+    "Directory where to search for system dependencies \"copyright\" files.")
+set(JPEGXL_FORCE_NEON false CACHE BOOL
+    "Set flags to enable NEON in arm if not enabled by your toolchain.")
+set(JPEGXL_TEST_TOOLS false CACHE BOOL
+    "Run scripts that test the encoding / decoding tools.")
+set(JPEGXL_ENABLE_AVX512 false CACHE BOOL
+    "Build with AVX512 support (faster on CPUs that support it, but larger binary size).")
+set(JPEGXL_ENABLE_AVX512_ZEN4 false CACHE BOOL
+    "Build with Zen4-optimized AVX512 support (faster on CPUs that support it, but larger binary size).")
+
+# Force system dependencies.
+set(JPEGXL_FORCE_SYSTEM_BROTLI false CACHE BOOL
+    "Force using system installed brotli instead of third_party/brotli source.")
+set(JPEGXL_FORCE_SYSTEM_GTEST false CACHE BOOL
+    "Force using system installed googletest (gtest/gmock) instead of third_party/googletest source.")
+set(JPEGXL_FORCE_SYSTEM_LCMS2 false CACHE BOOL
+    "Force using system installed lcms2 instead of third_party/lcms source.")
+set(JPEGXL_FORCE_SYSTEM_HWY false CACHE BOOL
+    "Force using system installed highway (libhwy-dev) instead of third_party/highway source.")
+
+# Check minimum compiler versions. Older compilers are not supported and fail
+# with hard to understand errors.
+if (NOT CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID)
+  message(FATAL_ERROR "Different C/C++ compilers set: "
+          "${CMAKE_C_COMPILER_ID} vs ${CMAKE_CXX_COMPILER_ID}")
+endif()
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  # Android NDK's toolchain.cmake fakes the clang version in
+  # CMAKE_CXX_COMPILER_VERSION with an incorrect number, so ignore this.
+  if (NOT CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION MATCHES "clang"
+      AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)
+    message(FATAL_ERROR
+      "Minimum Clang version required is Clang 5, please update.")
+  endif()
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7)
+    message(FATAL_ERROR
+      "Minimum GCC version required is 7, please update.")
+  endif()
+endif()
+
+message(STATUS
+    "Compiled IDs C:${CMAKE_C_COMPILER_ID}, C++:${CMAKE_CXX_COMPILER_ID}")
+
+# Always disable SSSE3 since it is rare to have SSSE3 but not SSE4
+set(HWY_DISABLED_TARGETS "HWY_SSSE3")
+if (NOT JPEGXL_ENABLE_AVX512)
+  message(STATUS "Disabled AVX512 (set JPEGXL_ENABLE_AVX512 to enable it)")
+  set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_AVX3")
+  add_definitions(-DFJXL_ENABLE_AVX512=0)
+endif()
+if (NOT JPEGXL_ENABLE_AVX512_ZEN4)
+  message(STATUS "Disabled AVX512_ZEN4 (set JPEGXL_ENABLE_AVX512_ZEN4 to enable it)")
+  set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_AVX3_ZEN4")
+endif()
+
+
+
+# CMAKE_EXPORT_COMPILE_COMMANDS is used to generate the compilation database
+# used by clang-tidy.
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if(JPEGXL_STATIC)
+  set(BUILD_SHARED_LIBS 0)
+  # Clang developers say that in case to use "static" we have to build stdlib
+  # ourselves; for real use case we don't care about stdlib, as it is "granted",
+  # so just linking all other libraries is fine.
+  if (NOT MSVC AND NOT APPLE)
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
+    set(CMAKE_EXE_LINKER_FLAGS
+        "${CMAKE_EXE_LINKER_FLAGS} -static -static-libgcc -static-libstdc++")
+  endif()
+endif()  # JPEGXL_STATIC
+
+# Threads
+set(THREADS_PREFER_PTHREAD_FLAG YES)
+find_package(Threads REQUIRED)
+
+# These settings are important to drive check_cxx_source_compiles
+# See CMP0067 (min cmake version is 3.10 anyway)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_STANDARD_REQUIRED YES)
+
+# Atomics
+find_package(Atomics REQUIRED)
+
+if(JPEGXL_STATIC)
+  if (MINGW)
+    # In MINGW libstdc++ uses pthreads directly. When building statically a
+    # program (regardless of whether the source code uses pthread or not) the
+    # toolchain will add stdc++ and pthread to the linking step but stdc++ will
+    # be linked statically while pthread will be linked dynamically.
+    # To avoid this and have pthread statically linked with need to pass it in
+    # the command line with "-Wl,-Bstatic -lpthread -Wl,-Bdynamic" but the
+    # linker will discard it if not used by anything else up to that point in
+    # the linker command line. If the program or any dependency don't use
+    # pthread directly -lpthread is discarded and libstdc++ (added by the
+    # toolchain later) will then use the dynamic version. For this we also need
+    # to pass -lstdc++ explicitly before -lpthread. For pure C programs -lstdc++
+    # will be discarded anyway.
+    # This adds these flags as dependencies for *all* targets. Adding this to
+    # CMAKE_EXE_LINKER_FLAGS instead would cause them to be included before any
+    # object files and therefore discarded. This should be set in the
+    # INTERFACE_LINK_LIBRARIES of Threads::Threads but some third_part targets
+    # don't depend on it.
+    link_libraries(-Wl,-Bstatic -lstdc++ -lpthread -Wl,-Bdynamic)
+  elseif(CMAKE_USE_PTHREADS_INIT)
+    # "whole-archive" is not supported on OSX.
+    if (NOT APPLE)
+      # Set pthreads as a whole-archive, otherwise weak symbols in the static
+      # libraries will discard pthreads symbols leading to segmentation fault at
+      # runtime.
+      message(STATUS "Using -lpthread as --whole-archive")
+      set_target_properties(Threads::Threads PROPERTIES
+        INTERFACE_LINK_LIBRARIES
+            "-Wl,--whole-archive;-lpthread;-Wl,--no-whole-archive")
+    endif()
+  endif()
+endif()  # JPEGXL_STATIC
+
+if (JPEGXL_EMSCRIPTEN)
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
+endif()
+
+if (CXX_MACRO_PREFIX_MAP)
+  add_compile_options(-fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}=.)
+endif()
+
+if (CXX_NO_RTTI_SUPPORTED)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+endif()
+
+# Internal flags for coverage builds:
+set(JPEGXL_COVERAGE_FLAGS)
+set(JPEGXL_COVERAGE_LINK_FLAGS)
+
+if (MSVC)
+  # TODO(janwas): add flags
+  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+else ()
+  # Global compiler flags for all targets here and in subdirectories.
+  add_definitions(
+    # Avoid changing the binary based on the current time and date.
+    -D__DATE__="redacted"
+    -D__TIMESTAMP__="redacted"
+    -D__TIME__="redacted"
+  )
+
+  # TODO(eustas): JXL currently compiles, but does not pass tests...
+  if (NOT JXL_HWY_DISABLED_TARGETS_FORCED)
+    if (NOT JPEGXL_ENABLE_SIZELESS_VECTORS)
+      set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_SVE|HWY_SVE2|HWY_SVE_256|HWY_SVE2_128|HWY_RVV")
+    endif()
+    add_definitions(-DHWY_DISABLED_TARGETS=\(${HWY_DISABLED_TARGETS}\))
+  endif()
+
+  # In CMake before 3.12 it is problematic to pass repeated flags like -Xclang.
+  # For this reason we place them in CMAKE_CXX_FLAGS instead.
+  # See https://gitlab.kitware.com/cmake/cmake/issues/15826
+
+  # Machine flags.
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funwind-tables")
+  if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mrelax-all")
+  endif()
+  if (CXX_CONSTRUCTOR_ALIASES_SUPPORTED)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mconstructor-aliases")
+  endif()
+
+  if(WIN32)
+    # Not supported by clang-cl, but frame pointers are default on Windows
+  else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
+  endif()
+
+  # CPU flags - remove once we have NEON dynamic dispatch
+
+  # TODO(janwas): this also matches M1, but only ARMv7 is intended/needed.
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+    if(JPEGXL_FORCE_NEON)
+      # GCC requires these flags, otherwise __ARM_NEON is undefined.
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+        -mfpu=neon-vfpv4 -mfloat-abi=hard")
+    endif()
+  endif()
+
+  # Force build with optimizations in release mode.
+  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
+
+  add_compile_options(
+    # Ignore this to allow redefining __DATE__ and others.
+    -Wno-builtin-macro-redefined
+
+    # Global warning settings.
+    -Wall
+  )
+
+  if (JPEGXL_WARNINGS_AS_ERRORS)
+    add_compile_options(-Werror)
+  endif ()
+
+  if(JPEGXL_ENABLE_COVERAGE)
+    set(JPEGXL_COVERAGE_FLAGS
+        -g -O0 -fprofile-arcs -ftest-coverage
+        -DJXL_ENABLE_ASSERT=0 -DJXL_ENABLE_CHECK=0
+    )
+    set(JPEGXL_COVERAGE_LINK_FLAGS
+        --coverage
+    )
+  endif()  # JPEGXL_ENABLE_COVERAGE
+endif ()  # !MSVC
+
+include(GNUInstallDirs)
+
+# Separately build/configure testing frameworks and other third_party libraries
+# to allow disabling tests in those libraries.
+include(third_party/testing.cmake)
+add_subdirectory(third_party)
+# Copy the JXL license file to the output build directory.
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/LICENSE"
+               ${PROJECT_BINARY_DIR}/LICENSE.jpeg-xl COPYONLY)
+
+# Enable tests regardless of where they are defined.
+enable_testing()
+include(CTest)
+# Specify default location of `testdata`:
+if(NOT DEFINED JPEGXL_TEST_DATA_PATH)
+  set(JPEGXL_TEST_DATA_PATH "${PROJECT_SOURCE_DIR}/testdata")
+endif()
+
+# Libraries.
+add_subdirectory(lib)
+
+if(BUILD_TESTING)
+  # Script to run tests over the source code in bash.
+  find_program (BASH_PROGRAM bash)
+  if(BASH_PROGRAM)
+    add_test(
+      NAME bash_test
+      COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/bash_test.sh)
+  endif()
+endif() # BUILD_TESTING
+
+# Documentation generated by Doxygen
+if(JPEGXL_ENABLE_DOXYGEN)
+  find_package(Doxygen)
+  if(DOXYGEN_FOUND)
+    set(DOXYGEN_GENERATE_HTML "YES")
+    set(DOXYGEN_GENERATE_XML "YES")
+    set(DOXYGEN_STRIP_FROM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lib/include")
+    set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "README.md")
+    if(JPEGXL_WARNINGS_AS_ERRORS)
+      set(DOXYGEN_WARN_AS_ERROR "YES")
+    endif()
+    set(DOXYGEN_QUIET "YES")
+    doxygen_add_docs(doc
+      "${CMAKE_CURRENT_SOURCE_DIR}/lib/include"
+      "${CMAKE_CURRENT_SOURCE_DIR}/doc/api.txt"
+      WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+      COMMENT "Generating C API documentation")
+
+    # Add sphinx doc build step for readthedocs.io (requires doxygen too).
+    find_program(SPHINX_BUILD_PROGRAM sphinx-build)
+    if(SPHINX_BUILD_PROGRAM)
+      add_custom_command(
+        OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent"
+        COMMENT "Generating readthedocs.io output on ${CMAKE_CURRENT_BINARY_DIR}/rtd"
+        COMMAND ${SPHINX_BUILD_PROGRAM} -q -W -b html -j auto
+          ${CMAKE_SOURCE_DIR}/doc/sphinx
+          ${CMAKE_CURRENT_BINARY_DIR}/rtd
+        DEPENDS doc
+      )
+      # This command runs the documentation generation every time since the output
+      # target file doesn't exist.
+      add_custom_target(rtd-html
+        DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent
+      )
+    else() # SPHINX_BUILD_PROGRAM\
+      message(WARNING "sphinx-build not found, skipping rtd documentation")
+    endif() # SPHINX_BUILD_PROGRAM
+
+  else()
+    # Create a "doc" target for compatibility since "doc" is not otherwise added to
+    # the build when doxygen is not installed.
+    add_custom_target(doc false
+      COMMENT "Error: Can't generate doc since Doxygen not installed.")
+  endif() # DOXYGEN_FOUND
+endif() # JPEGXL_ENABLE_DOXYGEN
+
+if(JPEGXL_ENABLE_MANPAGES)
+  find_program(ASCIIDOC a2x)
+  if(ASCIIDOC)
+    file(STRINGS "${ASCIIDOC}" ASCIIDOC_SHEBANG LIMIT_COUNT 1)
+    if(ASCIIDOC_SHEBANG MATCHES "/sh|/bash" OR MINGW)
+      set(ASCIIDOC_PY_FOUND ON)
+      # Run the program directly and set ASCIIDOC as empty.
+      set(ASCIIDOC_PY "${ASCIIDOC}")
+      set(ASCIIDOC "")
+    elseif(ASCIIDOC_SHEBANG MATCHES "python2")
+      find_package(Python2 COMPONENTS Interpreter)
+      set(ASCIIDOC_PY_FOUND "${Python2_Interpreter_FOUND}")
+      set(ASCIIDOC_PY Python2::Interpreter)
+    elseif(ASCIIDOC_SHEBANG MATCHES "python3")
+      find_package(Python3 COMPONENTS Interpreter)
+      set(ASCIIDOC_PY_FOUND "${Python3_Interpreter_FOUND}")
+      set(ASCIIDOC_PY Python3::Interpreter)
+    else()
+      find_package(Python COMPONENTS Interpreter QUIET)
+      if(NOT Python_Interpreter_FOUND)
+        find_program(ASCIIDOC_PY python)
+        if(ASCIIDOC_PY)
+          set(ASCIIDOC_PY_FOUND ON)
+        endif()
+      else()
+        set(ASCIIDOC_PY_FOUND "${Python_Interpreter_FOUND}")
+        set(ASCIIDOC_PY Python::Interpreter)
+      endif()
+    endif()
+
+    if (ASCIIDOC_PY_FOUND)
+      set(MANPAGE_FILES "")
+      set(MANPAGES "")
+      foreach(PAGE IN ITEMS cjxl djxl)
+        # Invoking the Python interpreter ourselves instead of running the a2x binary
+        # directly is necessary on MSYS2, otherwise it is run through cmd.exe which
+        # does not recognize it.
+        add_custom_command(
+          OUTPUT "${PAGE}.1"
+          COMMAND "${ASCIIDOC_PY}"
+          ARGS ${ASCIIDOC}
+            --format manpage --destination-dir="${CMAKE_CURRENT_BINARY_DIR}"
+            "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt"
+          MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt")
+        list(APPEND MANPAGE_FILES "${CMAKE_CURRENT_BINARY_DIR}/${PAGE}.1")
+        list(APPEND MANPAGES "${PAGE}.1")
+      endforeach()
+      add_custom_target(manpages ALL DEPENDS ${MANPAGES})
+      install(FILES ${MANPAGE_FILES} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
+    endif()  # ASCIIDOC_PY_FOUND
+  else()
+    message(WARNING "asciidoc was not found, the man pages will not be installed.")
+  endif()  # ASCIIDOC
+endif()  # JPEGXL_ENABLE_MANPAGES
+
+# Example usage code.
+if (JPEGXL_ENABLE_EXAMPLES)
+  include(examples/examples.cmake)
+endif ()
+
+# Plugins for third-party software
+if (JPEGXL_ENABLE_PLUGINS)
+  add_subdirectory(plugins)
+endif ()
+
+# Binary tools
+add_subdirectory(tools)
diff --git a/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md b/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..b2d81a3214
--- /dev/null
+++ b/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md
@@ -0,0 +1,93 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of
+experience, education, socio-economic status, nationality, personal appearance,
+race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+*   Using welcoming and inclusive language
+*   Being respectful of differing viewpoints and experiences
+*   Gracefully accepting constructive criticism
+*   Focusing on what is best for the community
+*   Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+*   The use of sexualized language or imagery and unwelcome sexual attention or
+    advances
+*   Trolling, insulting/derogatory comments, and personal or political attacks
+*   Public or private harassment
+*   Publishing others' private information, such as a physical or electronic
+    address, without explicit permission
+*   Other conduct which could reasonably be considered inappropriate in a
+    professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, or to ban temporarily or permanently any
+contributor for other behaviors that they deem inappropriate, threatening,
+offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+This Code of Conduct also applies outside the project spaces when the Project
+Steward has a reasonable belief that an individual's behavior may have a
+negative impact on the project or its community.
+
+## Conflict Resolution
+
+We do not believe that all conflict is bad; healthy debate and disagreement
+often yield positive results. However, it is never okay to be disrespectful or
+to engage in behavior that violates the project’s code of conduct.
+
+If you see someone violating the code of conduct, you are encouraged to address
+the behavior directly with those involved. Many issues can be resolved quickly
+and easily, and this gives people more control over the outcome of their
+dispute. If you are unable to resolve the matter for any reason, or if the
+behavior is threatening or harassing, report it. We are dedicated to providing
+an environment where participants feel welcome and safe.
+
+Reports should be directed to Jyrki Alakuijala <jyrki@google.com>, the
+Project Steward(s) for JPEG XL. It is the Project Steward’s duty to
+receive and address reported violations of the code of conduct. They will then
+work with a committee consisting of representatives from the Open Source
+Programs Office and the Google Open Source Strategy team. If for any reason you
+are uncomfortable reaching out to the Project Steward, please email
+opensource@google.com.
+
+We will investigate every complaint, but you may not receive a direct response.
+We will use our discretion in determining when and how to follow up on reported
+incidents, which may range from not taking action to permanent expulsion from
+the project and project-sponsored spaces. We will notify the accused of the
+report and provide them an opportunity to discuss it before any action is taken.
+The identity of the reporter will be omitted from the details of the report
+supplied to the accused. In potentially harmful situations, such as ongoing
+harassment or threats to anyone's safety, we may take action without notice.
+
+## Attribution
+
+This Code of Conduct is adapted from the Contributor Covenant, version 1.4,
+available at
+https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
diff --git a/third-party/libjxl/libjxl/CONTRIBUTING.md b/third-party/libjxl/libjxl/CONTRIBUTING.md
new file mode 100644
index 0000000000..cb6459797c
--- /dev/null
+++ b/third-party/libjxl/libjxl/CONTRIBUTING.md
@@ -0,0 +1,132 @@
+# Contributing to libjxl
+
+## Contributing with bug reports
+
+For security-related issues please see [SECURITY.md](SECURITY.md).
+
+We welcome suggestions, feature requests and bug reports. Before opening a new
+issue please take a look if there is already an existing one in the following
+link:
+
+ *  https://github.com/libjxl/libjxl/issues
+
+## Contributing with patches and Pull Requests
+
+We'd love to accept your contributions to the JPEG XL Project. Please read
+through this section before sending a Pull Request.
+
+### Contributor License Agreements
+
+Our project is open source under the terms outlined in the [LICENSE](LICENSE)
+and [PATENTS](PATENTS) files. Before we can accept your contributions, even for
+small changes, there are just a few small guidelines you need to follow:
+
+Please fill out either the individual or corporate Contributor License Agreement
+(CLA) with Google. JPEG XL Project is an an effort by multiple individuals and
+companies, including the initial contributors Cloudinary and Google, but Google
+is the legal entity in charge of receiving these CLA and relicensing this
+software:
+
+  * If you are an individual writing original source code and you're sure you
+  own the intellectual property, then you'll need to sign an [individual
+  CLA](https://code.google.com/legal/individual-cla-v1.0.html).
+
+  * If you work for a company that wants to allow you to contribute your work,
+  then you'll need to sign a [corporate
+  CLA](https://code.google.com/legal/corporate-cla-v1.0.html).
+
+Follow either of the two links above to access the appropriate CLA and
+instructions for how to sign and return it. Once we receive it, we'll be able
+to accept your pull requests.
+
+***NOTE***: Only original source code from you and other people that have signed
+the CLA can be accepted into the main repository.
+
+### License
+
+Contributions are licensed under the project's [LICENSE](LICENSE). Each new
+file must include the following header when possible, with comment style adapted
+to the language as needed:
+
+```
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+```
+
+### Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+### Contribution philosophy
+
+  * Prefer small changes, even if they don't implement a complete feature. Small
+  changes are easier to review and can be submitted faster. Think about what's
+  the smallest unit you can send that makes sense to review and submit in
+  isolation. For example, new modules that are not yet used by the tools but
+  have their own unittests are ok. If you have unrelated changes that
+  you discovered while working on something else, please send them in a
+  different Pull Request. If your are refactoring code and changing
+  functionality try to send the refactor first without any change in
+  functionality. Reviewers may ask you to split a Pull Request and it is
+  easier to create a smaller change from the beginning.
+
+  * Describe your commits. Add a meaningful description to your commit message, explain what you are changing if it is not trivially obvious, but more importantly explain *why* you are making those changes. For example "Fix
+  build" is not a good commit message, describe what build and if it makes sense
+  why is this fixing it or why was it failing without this. It is very likely
+  that people far in the future without any context you have right now will be
+  looking at your commit trying to figure out why was the change introduced. If
+  related to an issue in this or another repository include a link to it.
+
+  * Code Style: We follow the [Google C++ Coding
+  Style](https://google.github.io/styleguide/cppguide.html). A
+  [clang-format](https://clang.llvm.org/docs/ClangFormat.html) configuration
+  file is available to automatically format your code, you can invoke it with
+  the `./ci.sh lint` helper tool.
+
+  * Testing: Test your change and explain in the commit message *how* your
+  commit was tested. For example adding unittests or in some cases just testing
+  with the existing ones is enough. In any case, mention what testing was
+  performed so reviewers can evaluate whether that's enough testing. In many
+  cases, testing that the Continuous Integration workflow passes is enough.
+
+  * Make one commit per Pull Request / review, unless there's a good reason not
+  to. If you have multiple changes send multiple Pull Requests and each one can
+  have its own review.
+
+  * When addressing comments from reviewers prefer to squash or fixup your
+  edits and force-push your commit. When merging changes into the repository we
+  don't want to include the history of code review back and forth changes or
+  typos. Reviewers can click on the "force-pushed" automatic comment on a Pull
+  Request to see the changes between versions. We use "Rebase and merge" policy
+  to keep a linear git history which is easier to reason about.
+
+  * Your change must pass the build and test workflows. There's a `ci.sh` script
+  to help building and testing these configurations. See [building and
+  testing](doc/building_and_testing.md) for more details.
+
+### Contributing checklist.
+
+  * Sign the CLA (only needed once per user, see above).
+
+  * AUTHORS: If this is your first contribution, add your name or your
+  company name to the [AUTHORS](AUTHORS) file for copyright tracking purposes.
+
+  * Style guide. Check `./ci.sh lint`.
+
+  * Meaningful commit description: What and *why*, links to issues, testing
+  procedure.
+
+  * Squashed multiple edits into a single commit.
+
+  * Upload your changes to your fork and [create a Pull
+  Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
+
+# Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google.com/conduct/).
diff --git a/third-party/libjxl/libjxl/CONTRIBUTORS b/third-party/libjxl/libjxl/CONTRIBUTORS
new file mode 100644
index 0000000000..848096f921
--- /dev/null
+++ b/third-party/libjxl/libjxl/CONTRIBUTORS
@@ -0,0 +1,23 @@
+# This files lists individuals who made significant contributions to the JPEG XL
+# code base, such as design, adding features, performing experiments, ...
+# Small changes such as a small bugfix or fixing spelling errors are not
+# included. If you'd like to be included in this file thanks to a significant
+# contribution, feel free to send a pull request changing this file.
+Alex Deymo
+Alexander Rhatushnyak
+Evgenii Kliuchnikov
+Iulia-Maria Comșa
+Jan Wassenberg
+Jon Sneyers
+Jyrki Alakuijala
+Krzysztof Potempa
+Lode Vandevenne
+Luca Versari
+Martin Bruse
+Moritz Firsching
+Renata Khasanova
+Robert Obryk
+Sami Boukortt
+Sebastian Gomez-Gonzalez
+Thomas Fischbacher
+Zoltan Szabadka
diff --git a/third-party/libjxl/libjxl/LICENSE b/third-party/libjxl/libjxl/LICENSE
new file mode 100644
index 0000000000..c66034b105
--- /dev/null
+++ b/third-party/libjxl/libjxl/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) the JPEG XL Project Authors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third-party/libjxl/libjxl/PATENTS b/third-party/libjxl/libjxl/PATENTS
new file mode 100644
index 0000000000..c95b8f4105
--- /dev/null
+++ b/third-party/libjxl/libjxl/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the JPEG XL project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of JPEG XL, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of JPEG XL.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of JPEG XL or any code incorporated within this
+implementation of JPEG XL constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of JPEG XL
+shall terminate as of the date such litigation is filed.
diff --git a/third-party/libjxl/libjxl/README.md b/third-party/libjxl/libjxl/README.md
new file mode 100644
index 0000000000..1e9a9adbd1
--- /dev/null
+++ b/third-party/libjxl/libjxl/README.md
@@ -0,0 +1,133 @@
+# JPEG XL reference implementation
+
+[![Build/Test](https://github.com/libjxl/libjxl/actions/workflows/build_test.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/build_test.yml)
+[![Build/Test Cross](https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml)
+[![Conformance](https://github.com/libjxl/libjxl/actions/workflows/conformance.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/conformance.yml)
+[![CIFuzz](https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml)
+[![Releases](https://github.com/libjxl/libjxl/actions/workflows/release.yaml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/release.yaml)
+[![Doc](https://readthedocs.org/projects/libjxl/badge/?version=latest)](
+https://libjxl.readthedocs.io/en/latest/?badge=latest)
+[![codecov](https://codecov.io/gh/libjxl/libjxl/branch/main/graph/badge.svg)](
+https://codecov.io/gh/libjxl/libjxl)
+
+<img src="doc/jxl.svg" width="100" align="right" alt="JXL logo">
+
+This repository contains a reference implementation of JPEG XL (encoder and
+decoder), called `libjxl`. This software library is
+[used by many applications that support JPEG XL](doc/software_support.md).
+
+JPEG XL was standardized in 2022 as [ISO/IEC 18181](https://jpeg.org/jpegxl/workplan.html).
+The [core codestream](doc/format_overview.md#codestream-features) is specified in 18181-1,
+the [file format](doc/format_overview.md#file-format-features) in 18181-2.
+[Decoder conformance](https://github.com/libjxl/conformance) is defined in 18181-3,
+and 18181-4 is the [reference software](https://github.com/libjxl/libjxl).
+
+The library API, command line options, and tools in this repository are subject
+to change, however files encoded with `cjxl` conform to the JPEG XL specification
+and can be decoded with current and future `djxl` decoders or the `libjxl` decoding library.
+
+## Installation
+
+In most Linux distributions, installing `libjxl` is just a matter of using the package management system.
+For example in Debian-based distributions: `apt install libjxl-tools` will install `cjxl` and `djxl`
+and other tools like `benchmark_xl` are available in the package `libjxl-devtools`.
+On MacOS, you can use [Homebrew](https://brew.sh/): `brew install jpeg-xl`.
+
+[![libjxl packaging status](https://repology.org/badge/vertical-allrepos/libjxl.svg?exclude_unsupported=1&columns=3&exclude_sources=modules,site&header=libjxl%20packaging%20status)](https://repology.org/project/libjxl/versions)
+
+From the [releases page](https://github.com/libjxl/libjxl/releases/) the following can be downloaded:
+ - Windows binaries 
+ - Debian and Ubuntu .deb packages 
+
+Of course you can also [build libjxl from sources](BUILDING.md).
+
+
+## Usage
+
+To encode a source image to JPEG XL with default settings:
+
+```bash
+cjxl input.png output.jxl
+```
+
+The desired visual fidelity can be selected using the `--distance` parameter
+(in units of just-noticeable difference, where 0 is lossless and the most useful lossy range is 0.5 .. 3.0),
+or using `--quality` (on a scale from 0 to 100, roughly matching libjpeg).
+The [encode effort](doc/encode_effort.md) can be selected using the `--effort` parameter.
+
+For more settings run `cjxl --help` or for a full list of options
+run `cjxl -v -v --help`.
+
+To decode a JPEG XL file run:
+
+```bash
+djxl input.jxl output.png
+```
+
+When possible `cjxl`/`djxl` are able to read/write the following
+image formats: .exr, .gif, .jpeg/.jpg, .pfm, .pgm/.ppm, .pgx, .png.
+Specifically for JPEG files, the default `cjxl` behavior is to apply lossless
+recompression and the default `djxl` behavior is to reconstruct the original
+JPEG file (when the extension of the output file is .jpg).
+
+### Benchmarking
+
+For speed benchmarks on single images in single or multi-threaded decoding
+`djxl` can print decoding speed information. See `djxl --help` for details
+on the decoding options and note that the output image is optional for
+benchmarking purposes.
+
+For more comprehensive benchmarking options, see the
+[benchmarking guide](doc/benchmarking.md).
+
+### Library API
+
+Besides the `libjxl` library [API documentation](https://libjxl.readthedocs.io/en/latest/),
+there are [example applications](examples/) and [plugins](plugins/) that can be used as a reference or
+starting point for developers who wish to integrate `libjxl` in their project.
+
+
+## License
+
+This software is available under a 3-clause BSD license which can be found in
+the [LICENSE](LICENSE) file, with an "Additional IP Rights Grant" as outlined in
+the [PATENTS](PATENTS) file.
+
+Please note that the PATENTS file only mentions Google since Google is the legal
+entity receiving the Contributor License Agreements (CLA) from all contributors
+to the JPEG XL Project, including the initial main contributors to the JPEG XL
+format: Cloudinary and Google.
+
+## Additional documentation
+
+### Codec description
+
+*   [JPEG XL Format Overview](doc/format_overview.md)
+*   [Introductory paper](https://www.spiedigitallibrary.org/proceedings/Download?fullDOI=10.1117%2F12.2529237) (open-access)
+*   [XL Overview](doc/xl_overview.md) - a brief introduction to the source code modules
+*   [JPEG XL white paper](https://ds.jpeg.org/whitepapers/jpeg-xl-whitepaper.pdf)
+*   [JPEG XL official website](https://jpeg.org/jpegxl)
+*   [JPEG XL community website](https://jpegxl.info)
+
+### Development process
+
+*   [More information on testing/build options](doc/building_and_testing.md)
+*   [Git guide for JPEG XL](doc/developing_in_github.md) - for developers
+*   [Fuzzing](doc/fuzzing.md) - for developers
+*   [Building Web Assembly artifacts](doc/building_wasm.md)
+*   [Test coverage on Codecov.io](https://app.codecov.io/gh/libjxl/libjxl) - for
+    developers
+*   [libjxl documentation on readthedocs.io](https://libjxl.readthedocs.io/)
+
+### Contact
+
+If you encounter a bug or other issue with the software, please open an Issue here.
+
+There is a [subreddit about JPEG XL](https://www.reddit.com/r/jpegxl/), and
+informal chatting with developers and early adopters of `libjxl` can be done on the
+[JPEG XL Discord server](https://discord.gg/DqkQgDRTFu).
diff --git a/third-party/libjxl/libjxl/SECURITY.md b/third-party/libjxl/libjxl/SECURITY.md
new file mode 100644
index 0000000000..d03012a63a
--- /dev/null
+++ b/third-party/libjxl/libjxl/SECURITY.md
@@ -0,0 +1,73 @@
+# Security and Vulnerability Policy for libjxl
+
+## TL;DR:
+
+CPE prefix: `cpe:2.3:a:libjxl_project:libjxl`
+
+To report a security issue, please email libjxl-security@google.com.
+
+Include in your email a description of the issue, the steps you took to create
+the issue, affected versions, and if known, mitigations for the issue. Our
+vulnerability management team will acknowledge receiving your email within 3
+working days.
+
+This project follows a 90 day disclosure timeline.
+
+For all other bugs, where there are no security implications about disclosing
+the unpatched bug, open a [new issue](https://github.com/libjxl/libjxl/issues)
+checking first for existing similar issues. If in doubt about the security
+impact of a bug you discovered, email first.
+
+## Policy overview
+
+libjxl's Security Policy is based on the [Google Open Source program
+guidelines](https://github.com/google/oss-vulnerability-guide) for coordinated
+vulnerability disclosure.
+
+Early versions of `libjxl` had a different security policy that didn't provide
+security and vulnerability disclosure support. Versions up to and including
+0.3.7 are not covered and won't receive any security advisory.
+
+Only released versions, starting from version 0.5, are covered by this policy.
+Development branches, arbitrary commits from `main` branch or even releases with
+backported features externally patched on top are not covered. Only those
+versions with a release tag in `libjxl`'s repository are covered, starting from
+version 0.5.
+
+## What's a "Security bug"
+
+A security bug is a bug that can potentially be exploited to let an attacker
+gain unauthorized access or privileges such as disclosing information or
+arbitrary code execution. Not all fuzzer-found bugs and not all assert()
+failures are considered security bugs in libjxl. For a detailed explanation and
+examples see our [Security Vulnerabilities Playbook](doc/vuln_playbook.md).
+
+## What to expect
+
+To report a security issue, please email libjxl-security@google.com with all the
+details about the bug you encountered.
+
+ * Include a description of the issue, steps to reproduce, etc. Compiler
+   versions, flags, exact version used and even CPU are often relevant given our
+   usage of SIMD and run-time dispatch of SIMD instructions.
+
+ * A member of our security team will reply to you within 3 business days. Note
+   that business days are different in different countries.
+
+ * We will evaluate the issue and we may require more input from your side to
+   reproduce it.
+
+ * If the issue fits in the description of a security bug, we will issue a
+   CVE, publish a fix and make a new minor or patch release with it. There is
+   a maximum of 90 day disclosure timeline, we ask you to not publish the
+   details before the 90 day deadline or the release date (whichever comes
+   first).
+
+ * In the case that we publish a CVE we will credit the external researcher who
+   reported the issue. When reporting security issues please let us know if you
+   need to include specific information while doing so, like for example a
+   company affiliation.
+
+Our security team follows the [Security Vulnerabilities
+Playbook](doc/vuln_playbook.md). For more details about the process and policies
+please take a look at it.
diff --git a/third-party/libjxl/libjxl/WORKSPACE b/third-party/libjxl/libjxl/WORKSPACE
new file mode 100644
index 0000000000..ba493442ae
--- /dev/null
+++ b/third-party/libjxl/libjxl/WORKSPACE
@@ -0,0 +1,768 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository")
+
+http_archive(
+    name = "bazel_skylib",
+    sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+        "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+    ],
+)
+
+load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
+
+bazel_skylib_workspace()
+
+local_repository(
+    name = "highway",
+    path = "third_party/highway",
+)
+
+local_repository(
+    name = "brotli",
+    path = "third_party/brotli",
+)
+
+new_local_repository(
+    name = "googletest",
+    build_file = "third_party/googletest/BUILD.bazel",
+    path = "third_party/googletest",
+)
+
+new_local_repository(
+    name = "skcms",
+    build_file_content = """
+cc_library(
+    name = "skcms",
+    srcs = [
+        "skcms.cc",
+        "skcms_internal.h",
+        "src/Transform_inl.h",
+    ],
+    hdrs = ["skcms.h"],
+    visibility = ["//visibility:public"],
+)
+    """,
+    path = "third_party/skcms",
+)
+
+new_git_repository(
+    name = "zlib",
+    build_file_content = """
+cc_library(
+    name = "zlib",
+    defines = ["HAVE_UNISTD_H"],
+    srcs = [
+        "adler32.c",
+        "compress.c",
+        "crc32.c",
+        "crc32.h",
+        "deflate.c",
+        "deflate.h",
+        "gzclose.c",
+        "gzguts.h",
+        "gzlib.c",
+        "gzread.c",
+        "gzwrite.c",
+        "infback.c",
+        "inffast.c",
+        "inffast.h",
+        "inffixed.h",
+        "inflate.c",
+        "inflate.h",
+        "inftrees.c",
+        "inftrees.h",
+        "trees.c",
+        "trees.h",
+        "uncompr.c",
+        "zconf.h",
+        "zutil.c",
+        "zutil.h",
+    ],
+    hdrs = ["zlib.h"],
+    includes = ["."],
+    visibility = ["//visibility:public"],
+)
+    """,
+    remote = "https://github.com/madler/zlib",
+    tag = "v1.2.13",
+)
+
+new_local_repository(
+    name = "png",
+    build_file_content = """
+genrule(
+    name = "pnglibconf",
+    srcs = ["scripts/pnglibconf.h.prebuilt"],
+    outs = ["pnglibconf.h"],
+    cmd = "cp -f $< $@",
+)
+cc_library(
+    name = "png",
+    srcs = [
+        "png.c",
+        "pngconf.h",
+        "pngdebug.h",
+        "pngerror.c",
+        "pngget.c",
+        "pnginfo.h",
+        ":pnglibconf",
+        "pngmem.c",
+        "pngpread.c",
+        "pngpriv.h",
+        "pngread.c",
+        "pngrio.c",
+        "pngrtran.c",
+        "pngrutil.c",
+        "pngset.c",
+        "pngstruct.h",
+        "pngtrans.c",
+        "pngwio.c",
+        "pngwrite.c",
+        "pngwtran.c",
+        "pngwutil.c",
+    ],
+    hdrs = ["png.h"],
+    includes = ["."],
+    linkopts = ["-lm"],
+    visibility = ["//visibility:public"],
+    deps = ["@zlib//:zlib"],
+)
+    """,
+    path = "third_party/libpng",
+)
+
+new_git_repository(
+    name = "libjpeg_turbo",
+    build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+    "@BUILD@" : "20230208",
+    "@CMAKE_PROJECT_NAME@" : "libjpeg-turbo",
+    "@COPYRIGHT_YEAR@" : "2023",
+    "@INLINE@" : "__inline__",
+    "@JPEG_LIB_VERSION@" : "62",
+    "@LIBJPEG_TURBO_VERSION_NUMBER@" : "2001091",
+    "@SIZE_T@" : "8",
+    "@THREAD_LOCAL@" : "__thread",
+    "@VERSION@" : "2.1.91",
+}
+YES_DEFINES = [
+    "C_ARITH_CODING_SUPPORTED", "D_ARITH_CODING_SUPPORTED",
+    "HAVE_BUILTIN_CTZL", "MEM_SRCDST_SUPPORTED"
+]
+NO_DEFINES = [
+    "WITH_SIMD", "RIGHT_SHIFT_IS_UNSIGNED", "HAVE_INTRIN_H"
+]
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+[
+    expand_template(
+        name = "expand_" + src,
+        template = src + ".in",
+        out = src,
+        substitutions = SUBSTITUTIONS,
+        visibility = ["//visibility:public"],
+    ) for src in ["jconfig.h", "jconfigint.h", "jversion.h"]
+]
+JPEG16_SOURCES = [
+    "jccolor.c",
+    "jcdiffct.c",
+    "jclossls.c",
+    "jcmainct.c",
+    "jcprepct.c",
+    "jcsample.c",
+    "jdcolor.c",
+    "jddiffct.c",
+    "jdlossls.c",
+    "jdmainct.c",
+    "jdmerge.c",
+    "jdpostct.c",
+    "jdsample.c",
+    "jquant1.c",
+    "jquant2.c",
+    "jutils.c",
+]
+JPEG12_SOURCES = JPEG16_SOURCES + [
+    "jccoefct.c",
+    "jcdctmgr.c",
+    "jdcoefct.c",
+    "jddctmgr.c",
+    "jfdctfst.c",
+    "jfdctint.c",
+    "jidctflt.c",
+    "jidctfst.c",
+    "jidctint.c",
+    "jidctred.c",
+]
+JPEG_SOURCES = JPEG12_SOURCES + [
+    "jaricom.c",
+    "jcapimin.c",
+    "jcapistd.c",
+    "jcarith.c",
+    "jchuff.c",
+    "jcicc.c",
+    "jcinit.c",
+    "jclhuff.c",
+    "jcmarker.c",
+    "jcmaster.c",
+    "jcomapi.c",
+    "jcparam.c",
+    "jcphuff.c",
+    "jdapimin.c",
+    "jdapistd.c",
+    "jdarith.c",
+    "jdatadst.c",
+    "jdatasrc.c",
+    "jdhuff.c",
+    "jdicc.c",
+    "jdinput.c",
+    "jdlhuff.c",
+    "jdmarker.c",
+    "jdmaster.c",
+    "jdphuff.c",
+    "jdtrans.c",
+    "jerror.c",
+    "jfdctflt.c",
+    "jmemmgr.c",
+    "jmemnobs.c",
+]
+JPEG_HEADERS = [
+    "jccolext.c",
+    "jchuff.h",
+    "jcmaster.h",
+    "jconfig.h",
+    "jconfigint.h",
+    "jdcoefct.h",
+    "jdcol565.c",
+    "jdcolext.c",
+    "jdct.h",
+    "jdhuff.h",
+    "jdmainct.h",
+    "jdmaster.h",
+    "jdmerge.h",
+    "jdmrg565.c",
+    "jdmrgext.c",
+    "jdsample.h",
+    "jerror.h",
+    "jinclude.h",
+    "jlossls.h",
+    "jmemsys.h",
+    "jmorecfg.h",
+    "jpeg_nbits_table.h",
+    "jpegapicomp.h",
+    "jpegint.h",
+    "jpeglib.h",
+    "jsamplecomp.h",
+    "jsimd.h",
+    "jsimddct.h",
+    "jstdhuff.c",
+    "jversion.h",
+]
+cc_library(
+    name = "jpeg16",
+    srcs = JPEG16_SOURCES,
+    hdrs = JPEG_HEADERS,
+    local_defines = ["BITS_IN_JSAMPLE=16"],
+    visibility = ["//visibility:public"],
+)
+cc_library(
+    name = "jpeg12",
+    srcs = JPEG12_SOURCES,
+    hdrs = JPEG_HEADERS,
+    local_defines = ["BITS_IN_JSAMPLE=12"],
+    visibility = ["//visibility:public"],
+)
+cc_library(
+    name = "jpeg",
+    srcs = JPEG_SOURCES,
+    hdrs = JPEG_HEADERS,
+    deps = [":jpeg16", ":jpeg12"],
+    includes = ["."],
+    visibility = ["//visibility:public"],
+)
+exports_files([
+    "jmorecfg.h",
+    "jpeglib.h",
+])
+    """,
+    remote = "https://github.com/libjpeg-turbo/libjpeg-turbo.git",
+    tag = "2.1.91",
+)
+
+http_archive(
+    name = "gif",
+    build_file_content = """
+cc_library(
+    name = "gif",
+    srcs = [
+        "dgif_lib.c", "egif_lib.c", "gifalloc.c", "gif_err.c", "gif_font.c",
+        "gif_hash.c", "openbsd-reallocarray.c", "gif_hash.h",
+        "gif_lib_private.h"
+    ],
+    hdrs = ["gif_lib.h"],
+    includes = ["."],
+    visibility = ["//visibility:public"],
+)
+    """,
+    sha256 = "31da5562f44c5f15d63340a09a4fd62b48c45620cd302f77a6d9acf0077879bd",
+    strip_prefix = "giflib-5.2.1",
+    url = "https://netcologne.dl.sourceforge.net/project/giflib/giflib-5.2.1.tar.gz",
+)
+
+new_git_repository(
+    name = "imath",
+    build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+    "@IMATH_INTERNAL_NAMESPACE@": "Imath_3_1",
+    "@IMATH_LIB_VERSION@": "3.1.4",
+    "@IMATH_NAMESPACE_CUSTOM@": "0",
+    "@IMATH_NAMESPACE@": "Imath",
+    "@IMATH_PACKAGE_NAME@": "Imath 3.1.4",
+    "@IMATH_VERSION_MAJOR@": "3",
+    "@IMATH_VERSION_MINOR@": "1",
+    "@IMATH_VERSION_PATCH@": "4",
+    "@IMATH_VERSION@": "3.1.4",
+}
+YES_DEFINES = [
+    "IMATH_HALF_USE_LOOKUP_TABLE", "IMATH_ENABLE_API_VISIBILITY",
+]
+NO_DEFINES = [
+    "IMATH_HAVE_LARGE_STACK",
+]
+ONE_DEFINES = [
+    "IMATH_USE_NOEXCEPT",
+]
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES
+})
+expand_template(
+    name = "expand_ImathConfig",
+    template = "config/ImathConfig.h.in",
+    out = "src/Imath/ImathConfig.h",
+    substitutions = SUBSTITUTIONS,
+)
+cc_library(
+    name = "Imath",
+    srcs = [
+        "src/Imath/ImathColorAlgo.cpp",
+        ":src/Imath/ImathConfig.h",
+        "src/Imath/ImathFun.cpp",
+        "src/Imath/ImathMatrixAlgo.cpp",
+        "src/Imath/ImathRandom.cpp",
+        "src/Imath/half.cpp",
+        "src/Imath/toFloat.h",
+    ],
+    hdrs = [
+        "src/Imath/ImathBox.h",
+        "src/Imath/ImathBoxAlgo.h",
+        "src/Imath/ImathColor.h",
+        "src/Imath/ImathColorAlgo.h",
+        "src/Imath/ImathEuler.h",
+        "src/Imath/ImathExport.h",
+        "src/Imath/ImathForward.h",
+        "src/Imath/ImathFrame.h",
+        "src/Imath/ImathFrustum.h",
+        "src/Imath/ImathFrustumTest.h",
+        "src/Imath/ImathFun.h",
+        "src/Imath/ImathGL.h",
+        "src/Imath/ImathGLU.h",
+        "src/Imath/ImathInt64.h",
+        "src/Imath/ImathInterval.h",
+        "src/Imath/ImathLine.h",
+        "src/Imath/ImathLineAlgo.h",
+        "src/Imath/ImathMath.h",
+        "src/Imath/ImathMatrix.h",
+        "src/Imath/ImathMatrixAlgo.h",
+        "src/Imath/ImathNamespace.h",
+        "src/Imath/ImathPlane.h",
+        "src/Imath/ImathPlatform.h",
+        "src/Imath/ImathQuat.h",
+        "src/Imath/ImathRandom.h",
+        "src/Imath/ImathRoots.h",
+        "src/Imath/ImathShear.h",
+        "src/Imath/ImathSphere.h",
+        "src/Imath/ImathTypeTraits.h",
+        "src/Imath/ImathVec.h",
+        "src/Imath/ImathVecAlgo.h",
+        "src/Imath/half.h",
+        "src/Imath/halfFunction.h",
+        "src/Imath/halfLimits.h",
+    ],
+    includes = ["src/Imath"],
+    visibility = ["//visibility:public"],
+)
+""",
+    remote = "https://github.com/AcademySoftwareFoundation/imath",
+    tag = "v3.1.5",
+)
+
+new_git_repository(
+    name = "openexr",
+    build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+    "@IEX_INTERNAL_NAMESPACE@": "Iex_3_0",
+    "@IEX_NAMESPACE_CUSTOM@": "0",
+    "@IEX_NAMESPACE@": "Iex",
+    "@ILMTHREAD_INTERNAL_NAMESPACE@": "IlmThread_3_0",
+    "@ILMTHREAD_NAMESPACE_CUSTOM@": "0",
+    "@ILMTHREAD_NAMESPACE@": "IlmThread",
+    "@OPENEXR_IMF_NAMESPACE@": "Imf",
+    "@OPENEXR_INTERNAL_IMF_NAMESPACE@": "Imf_3_0",
+    "@OPENEXR_LIB_VERSION@": "3.0.4",
+    "@OPENEXR_NAMESPACE_CUSTOM@": "0",
+    "@OPENEXR_PACKAGE_NAME@": "OpenEXR 3.0.4",
+    "@OPENEXR_VERSION_EXTRA@": "",
+    "@OPENEXR_VERSION_MAJOR@": "3",
+    "@OPENEXR_VERSION_MINOR@": "0",
+    "@OPENEXR_VERSION_PATCH@": "4",
+    "@OPENEXR_VERSION@": "3.0.4",
+}
+YES_DEFINES = [
+    "OPENEXR_ENABLE_API_VISIBILITY", "OPENEXR_IMF_HAVE_COMPLETE_IOMANIP",
+    "OPENEXR_HAVE_LARGE_STACK",
+]
+NO_DEFINES = [
+    "HAVE_UCONTEXT_H", "IEX_HAVE_CONTROL_REGISTER_SUPPORT",
+    "IEX_HAVE_SIGCONTEXT_CONTROL_REGISTER_SUPPORT", "OPENEXR_IMF_HAVE_DARWIN",
+    "OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX", "OPENEXR_IMF_HAVE_LINUX_PROCFS",
+    "OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN",
+]
+ONE_DEFINES = [
+    "ILMTHREAD_THREADING_ENABLED",
+]
+ZERO_DEFINES = [
+    "ILMTHREAD_HAVE_POSIX_SEMAPHORES",
+]
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES
+})
+SUBSTITUTIONS.update({
+    "#cmakedefine01 " + key : "#define " + key + " 0" for key in ZERO_DEFINES
+})
+[
+    expand_template(
+        name = "expand_" + item,
+        template = "cmake/" + item + ".h.in",
+        out = "src/lib/Iex/" + item + ".h",
+        substitutions = SUBSTITUTIONS,
+    ) for item in ["IexConfig", "IexConfigInternal"]
+]
+[
+expand_template(
+        name = "expand_" + item,
+        template = "cmake/" + item + ".h.in",
+        out = "src/lib/IlmThread/" + item + ".h",
+        substitutions = SUBSTITUTIONS,
+    ) for item in ["IlmThreadConfig"]
+]
+[
+expand_template(
+        name = "expand_" + item,
+        template = "cmake/" + item + ".h.in",
+        out = "src/lib/OpenEXR/" + item + ".h",
+        substitutions = SUBSTITUTIONS,
+    ) for item in ["OpenEXRConfig", "OpenEXRConfigInternal"]
+]
+cc_library(
+    name = "Iex",
+    srcs = [
+        "src/lib/Iex/IexBaseExc.cpp",
+        "src/lib/Iex/IexMathFloatExc.cpp",
+        "src/lib/Iex/IexMathFpu.cpp",
+        "src/lib/Iex/IexThrowErrnoExc.cpp",
+    ],
+    hdrs = [
+        "src/lib/Iex/Iex.h",
+        "src/lib/Iex/IexBaseExc.h",
+        ":src/lib/Iex/IexConfig.h",
+        ":src/lib/Iex/IexConfigInternal.h",
+        "src/lib/Iex/IexErrnoExc.h",
+        "src/lib/Iex/IexExport.h",
+        "src/lib/Iex/IexForward.h",
+        "src/lib/Iex/IexMacros.h",
+        "src/lib/Iex/IexMathExc.h",
+        "src/lib/Iex/IexMathFloatExc.h",
+        "src/lib/Iex/IexMathFpu.h",
+        "src/lib/Iex/IexMathIeeeExc.h",
+        "src/lib/Iex/IexNamespace.h",
+        "src/lib/Iex/IexThrowErrnoExc.h",
+        ":src/lib/OpenEXR/OpenEXRConfig.h",
+    ],
+    includes = [
+        "src/lib/Iex",
+        "src/lib/OpenEXR",
+    ],
+)
+
+cc_library(
+    name = "IlmThread",
+    srcs = [
+        "src/lib/IlmThread/IlmThread.cpp",
+        "src/lib/IlmThread/IlmThreadPool.cpp",
+        "src/lib/IlmThread/IlmThreadSemaphore.cpp",
+        "src/lib/IlmThread/IlmThreadSemaphoreOSX.cpp",
+        "src/lib/IlmThread/IlmThreadSemaphorePosix.cpp",
+        "src/lib/IlmThread/IlmThreadSemaphorePosixCompat.cpp",
+        "src/lib/IlmThread/IlmThreadSemaphoreWin32.cpp",
+    ],
+    hdrs = [
+        "src/lib/IlmThread/IlmThread.h",
+        ":src/lib/IlmThread/IlmThreadConfig.h",
+        "src/lib/IlmThread/IlmThreadExport.h",
+        "src/lib/IlmThread/IlmThreadForward.h",
+        "src/lib/IlmThread/IlmThreadMutex.h",
+        "src/lib/IlmThread/IlmThreadNamespace.h",
+        "src/lib/IlmThread/IlmThreadPool.h",
+        "src/lib/IlmThread/IlmThreadSemaphore.h",
+    ],
+    includes = ["src/lib/IlmThread"],
+    deps = [":Iex"],
+)
+cc_library(
+    name = "OpenEXR",
+    srcs = [
+        "src/lib/OpenEXR/ImfAcesFile.cpp",
+        "src/lib/OpenEXR/ImfAttribute.cpp",
+        "src/lib/OpenEXR/ImfB44Compressor.cpp",
+        "src/lib/OpenEXR/ImfBoxAttribute.cpp",
+        "src/lib/OpenEXR/ImfCRgbaFile.cpp",
+        "src/lib/OpenEXR/ImfChannelList.cpp",
+        "src/lib/OpenEXR/ImfChannelListAttribute.cpp",
+        "src/lib/OpenEXR/ImfChromaticities.cpp",
+        "src/lib/OpenEXR/ImfChromaticitiesAttribute.cpp",
+        "src/lib/OpenEXR/ImfCompositeDeepScanLine.cpp",
+        "src/lib/OpenEXR/ImfCompressionAttribute.cpp",
+        "src/lib/OpenEXR/ImfCompressor.cpp",
+        "src/lib/OpenEXR/ImfConvert.cpp",
+        "src/lib/OpenEXR/ImfDeepCompositing.cpp",
+        "src/lib/OpenEXR/ImfDeepFrameBuffer.cpp",
+        "src/lib/OpenEXR/ImfDeepImageStateAttribute.cpp",
+        "src/lib/OpenEXR/ImfDeepScanLineInputFile.cpp",
+        "src/lib/OpenEXR/ImfDeepScanLineInputPart.cpp",
+        "src/lib/OpenEXR/ImfDeepScanLineOutputFile.cpp",
+        "src/lib/OpenEXR/ImfDeepScanLineOutputPart.cpp",
+        "src/lib/OpenEXR/ImfDeepTiledInputFile.cpp",
+        "src/lib/OpenEXR/ImfDeepTiledInputPart.cpp",
+        "src/lib/OpenEXR/ImfDeepTiledOutputFile.cpp",
+        "src/lib/OpenEXR/ImfDeepTiledOutputPart.cpp",
+        "src/lib/OpenEXR/ImfDoubleAttribute.cpp",
+        "src/lib/OpenEXR/ImfDwaCompressor.cpp",
+        "src/lib/OpenEXR/ImfEnvmap.cpp",
+        "src/lib/OpenEXR/ImfEnvmapAttribute.cpp",
+        "src/lib/OpenEXR/ImfFastHuf.cpp",
+        "src/lib/OpenEXR/ImfFloatAttribute.cpp",
+        "src/lib/OpenEXR/ImfFloatVectorAttribute.cpp",
+        "src/lib/OpenEXR/ImfFrameBuffer.cpp",
+        "src/lib/OpenEXR/ImfFramesPerSecond.cpp",
+        "src/lib/OpenEXR/ImfGenericInputFile.cpp",
+        "src/lib/OpenEXR/ImfGenericOutputFile.cpp",
+        "src/lib/OpenEXR/ImfHeader.cpp",
+        "src/lib/OpenEXR/ImfHuf.cpp",
+        "src/lib/OpenEXR/ImfIDManifest.cpp",
+        "src/lib/OpenEXR/ImfIDManifestAttribute.cpp",
+        "src/lib/OpenEXR/ImfIO.cpp",
+        "src/lib/OpenEXR/ImfInputFile.cpp",
+        "src/lib/OpenEXR/ImfInputPart.cpp",
+        "src/lib/OpenEXR/ImfInputPartData.cpp",
+        "src/lib/OpenEXR/ImfIntAttribute.cpp",
+        "src/lib/OpenEXR/ImfKeyCode.cpp",
+        "src/lib/OpenEXR/ImfKeyCodeAttribute.cpp",
+        "src/lib/OpenEXR/ImfLineOrderAttribute.cpp",
+        "src/lib/OpenEXR/ImfLut.cpp",
+        "src/lib/OpenEXR/ImfMatrixAttribute.cpp",
+        "src/lib/OpenEXR/ImfMisc.cpp",
+        "src/lib/OpenEXR/ImfMultiPartInputFile.cpp",
+        "src/lib/OpenEXR/ImfMultiPartOutputFile.cpp",
+        "src/lib/OpenEXR/ImfMultiView.cpp",
+        "src/lib/OpenEXR/ImfOpaqueAttribute.cpp",
+        "src/lib/OpenEXR/ImfOutputFile.cpp",
+        "src/lib/OpenEXR/ImfOutputPart.cpp",
+        "src/lib/OpenEXR/ImfOutputPartData.cpp",
+        "src/lib/OpenEXR/ImfPartType.cpp",
+        "src/lib/OpenEXR/ImfPizCompressor.cpp",
+        "src/lib/OpenEXR/ImfPreviewImage.cpp",
+        "src/lib/OpenEXR/ImfPreviewImageAttribute.cpp",
+        "src/lib/OpenEXR/ImfPxr24Compressor.cpp",
+        "src/lib/OpenEXR/ImfRational.cpp",
+        "src/lib/OpenEXR/ImfRationalAttribute.cpp",
+        "src/lib/OpenEXR/ImfRgbaFile.cpp",
+        "src/lib/OpenEXR/ImfRgbaYca.cpp",
+        "src/lib/OpenEXR/ImfRle.cpp",
+        "src/lib/OpenEXR/ImfRleCompressor.cpp",
+        "src/lib/OpenEXR/ImfScanLineInputFile.cpp",
+        "src/lib/OpenEXR/ImfStandardAttributes.cpp",
+        "src/lib/OpenEXR/ImfStdIO.cpp",
+        "src/lib/OpenEXR/ImfStringAttribute.cpp",
+        "src/lib/OpenEXR/ImfStringVectorAttribute.cpp",
+        "src/lib/OpenEXR/ImfSystemSpecific.cpp",
+        "src/lib/OpenEXR/ImfTestFile.cpp",
+        "src/lib/OpenEXR/ImfThreading.cpp",
+        "src/lib/OpenEXR/ImfTileDescriptionAttribute.cpp",
+        "src/lib/OpenEXR/ImfTileOffsets.cpp",
+        "src/lib/OpenEXR/ImfTiledInputFile.cpp",
+        "src/lib/OpenEXR/ImfTiledInputPart.cpp",
+        "src/lib/OpenEXR/ImfTiledMisc.cpp",
+        "src/lib/OpenEXR/ImfTiledOutputFile.cpp",
+        "src/lib/OpenEXR/ImfTiledOutputPart.cpp",
+        "src/lib/OpenEXR/ImfTiledRgbaFile.cpp",
+        "src/lib/OpenEXR/ImfTimeCode.cpp",
+        "src/lib/OpenEXR/ImfTimeCodeAttribute.cpp",
+        "src/lib/OpenEXR/ImfVecAttribute.cpp",
+        "src/lib/OpenEXR/ImfVersion.cpp",
+        "src/lib/OpenEXR/ImfWav.cpp",
+        "src/lib/OpenEXR/ImfZip.cpp",
+        "src/lib/OpenEXR/ImfZipCompressor.cpp",
+        "src/lib/OpenEXR/b44ExpLogTable.h",
+        "src/lib/OpenEXR/dwaLookups.h",
+    ],
+    hdrs = [
+        ":src/lib/Iex/IexConfig.h",
+        ":src/lib/Iex/IexConfigInternal.h",
+        ":src/lib/IlmThread/IlmThreadConfig.h",
+        "src/lib/OpenEXR/ImfAcesFile.h",
+        "src/lib/OpenEXR/ImfArray.h",
+        "src/lib/OpenEXR/ImfAttribute.h",
+        "src/lib/OpenEXR/ImfAutoArray.h",
+        "src/lib/OpenEXR/ImfB44Compressor.h",
+        "src/lib/OpenEXR/ImfBoxAttribute.h",
+        "src/lib/OpenEXR/ImfCRgbaFile.h",
+        "src/lib/OpenEXR/ImfChannelList.h",
+        "src/lib/OpenEXR/ImfChannelListAttribute.h",
+        "src/lib/OpenEXR/ImfCheckedArithmetic.h",
+        "src/lib/OpenEXR/ImfChromaticities.h",
+        "src/lib/OpenEXR/ImfChromaticitiesAttribute.h",
+        "src/lib/OpenEXR/ImfCompositeDeepScanLine.h",
+        "src/lib/OpenEXR/ImfCompression.h",
+        "src/lib/OpenEXR/ImfCompressionAttribute.h",
+        "src/lib/OpenEXR/ImfCompressor.h",
+        "src/lib/OpenEXR/ImfConvert.h",
+        "src/lib/OpenEXR/ImfDeepCompositing.h",
+        "src/lib/OpenEXR/ImfDeepFrameBuffer.h",
+        "src/lib/OpenEXR/ImfDeepImageState.h",
+        "src/lib/OpenEXR/ImfDeepImageStateAttribute.h",
+        "src/lib/OpenEXR/ImfDeepScanLineInputFile.h",
+        "src/lib/OpenEXR/ImfDeepScanLineInputPart.h",
+        "src/lib/OpenEXR/ImfDeepScanLineOutputFile.h",
+        "src/lib/OpenEXR/ImfDeepScanLineOutputPart.h",
+        "src/lib/OpenEXR/ImfDeepTiledInputFile.h",
+        "src/lib/OpenEXR/ImfDeepTiledInputPart.h",
+        "src/lib/OpenEXR/ImfDeepTiledOutputFile.h",
+        "src/lib/OpenEXR/ImfDeepTiledOutputPart.h",
+        "src/lib/OpenEXR/ImfDoubleAttribute.h",
+        "src/lib/OpenEXR/ImfDwaCompressor.h",
+        "src/lib/OpenEXR/ImfDwaCompressorSimd.h",
+        "src/lib/OpenEXR/ImfEnvmap.h",
+        "src/lib/OpenEXR/ImfEnvmapAttribute.h",
+        "src/lib/OpenEXR/ImfExport.h",
+        "src/lib/OpenEXR/ImfFastHuf.h",
+        "src/lib/OpenEXR/ImfFloatAttribute.h",
+        "src/lib/OpenEXR/ImfFloatVectorAttribute.h",
+        "src/lib/OpenEXR/ImfForward.h",
+        "src/lib/OpenEXR/ImfFrameBuffer.h",
+        "src/lib/OpenEXR/ImfFramesPerSecond.h",
+        "src/lib/OpenEXR/ImfGenericInputFile.h",
+        "src/lib/OpenEXR/ImfGenericOutputFile.h",
+        "src/lib/OpenEXR/ImfHeader.h",
+        "src/lib/OpenEXR/ImfHuf.h",
+        "src/lib/OpenEXR/ImfIDManifest.h",
+        "src/lib/OpenEXR/ImfIDManifestAttribute.h",
+        "src/lib/OpenEXR/ImfIO.h",
+        "src/lib/OpenEXR/ImfInputFile.h",
+        "src/lib/OpenEXR/ImfInputPart.h",
+        "src/lib/OpenEXR/ImfInputPartData.h",
+        "src/lib/OpenEXR/ImfInputStreamMutex.h",
+        "src/lib/OpenEXR/ImfInt64.h",
+        "src/lib/OpenEXR/ImfIntAttribute.h",
+        "src/lib/OpenEXR/ImfKeyCode.h",
+        "src/lib/OpenEXR/ImfKeyCodeAttribute.h",
+        "src/lib/OpenEXR/ImfLineOrder.h",
+        "src/lib/OpenEXR/ImfLineOrderAttribute.h",
+        "src/lib/OpenEXR/ImfLut.h",
+        "src/lib/OpenEXR/ImfMatrixAttribute.h",
+        "src/lib/OpenEXR/ImfMisc.h",
+        "src/lib/OpenEXR/ImfMultiPartInputFile.h",
+        "src/lib/OpenEXR/ImfMultiPartOutputFile.h",
+        "src/lib/OpenEXR/ImfMultiView.h",
+        "src/lib/OpenEXR/ImfName.h",
+        "src/lib/OpenEXR/ImfNamespace.h",
+        "src/lib/OpenEXR/ImfOpaqueAttribute.h",
+        "src/lib/OpenEXR/ImfOptimizedPixelReading.h",
+        "src/lib/OpenEXR/ImfOutputFile.h",
+        "src/lib/OpenEXR/ImfOutputPart.h",
+        "src/lib/OpenEXR/ImfOutputPartData.h",
+        "src/lib/OpenEXR/ImfOutputStreamMutex.h",
+        "src/lib/OpenEXR/ImfPartHelper.h",
+        "src/lib/OpenEXR/ImfPartType.h",
+        "src/lib/OpenEXR/ImfPixelType.h",
+        "src/lib/OpenEXR/ImfPizCompressor.h",
+        "src/lib/OpenEXR/ImfPreviewImage.h",
+        "src/lib/OpenEXR/ImfPreviewImageAttribute.h",
+        "src/lib/OpenEXR/ImfPxr24Compressor.h",
+        "src/lib/OpenEXR/ImfRational.h",
+        "src/lib/OpenEXR/ImfRationalAttribute.h",
+        "src/lib/OpenEXR/ImfRgba.h",
+        "src/lib/OpenEXR/ImfRgbaFile.h",
+        "src/lib/OpenEXR/ImfRgbaYca.h",
+        "src/lib/OpenEXR/ImfRle.h",
+        "src/lib/OpenEXR/ImfRleCompressor.h",
+        "src/lib/OpenEXR/ImfScanLineInputFile.h",
+        "src/lib/OpenEXR/ImfSimd.h",
+        "src/lib/OpenEXR/ImfStandardAttributes.h",
+        "src/lib/OpenEXR/ImfStdIO.h",
+        "src/lib/OpenEXR/ImfStringAttribute.h",
+        "src/lib/OpenEXR/ImfStringVectorAttribute.h",
+        "src/lib/OpenEXR/ImfSystemSpecific.h",
+        "src/lib/OpenEXR/ImfTestFile.h",
+        "src/lib/OpenEXR/ImfThreading.h",
+        "src/lib/OpenEXR/ImfTileDescription.h",
+        "src/lib/OpenEXR/ImfTileDescriptionAttribute.h",
+        "src/lib/OpenEXR/ImfTileOffsets.h",
+        "src/lib/OpenEXR/ImfTiledInputFile.h",
+        "src/lib/OpenEXR/ImfTiledInputPart.h",
+        "src/lib/OpenEXR/ImfTiledMisc.h",
+        "src/lib/OpenEXR/ImfTiledOutputFile.h",
+        "src/lib/OpenEXR/ImfTiledOutputPart.h",
+        "src/lib/OpenEXR/ImfTiledRgbaFile.h",
+        "src/lib/OpenEXR/ImfTimeCode.h",
+        "src/lib/OpenEXR/ImfTimeCodeAttribute.h",
+        "src/lib/OpenEXR/ImfVecAttribute.h",
+        "src/lib/OpenEXR/ImfVersion.h",
+        "src/lib/OpenEXR/ImfWav.h",
+        "src/lib/OpenEXR/ImfXdr.h",
+        "src/lib/OpenEXR/ImfZip.h",
+        "src/lib/OpenEXR/ImfZipCompressor.h",
+        ":src/lib/OpenEXR/OpenEXRConfig.h",
+        ":src/lib/OpenEXR/OpenEXRConfigInternal.h",
+    ],
+    includes = ["src/lib/OpenEXR"],
+    deps = [
+        ":IlmThread",
+        "@imath//:Imath",
+        "@zlib//:zlib",
+    ],
+    visibility = ["//visibility:public"],
+)
+""",
+    remote = "https://github.com/AcademySoftwareFoundation/openexr",
+    tag = "v3.1.5",
+)
diff --git a/third-party/libjxl/libjxl/bash_test.sh b/third-party/libjxl/libjxl/bash_test.sh
new file mode 100755
index 0000000000..9a8665c55e
--- /dev/null
+++ b/third-party/libjxl/libjxl/bash_test.sh
@@ -0,0 +1,317 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Tests implemented in bash. These typically will run checks about the source
+# code rather than the compiled one.
+
+MYDIR=$(dirname $(realpath "$0"))
+
+set -u
+
+test_includes() {
+  local ret=0
+  local f
+  for f in $(git ls-files | grep -E '(\.cc|\.cpp|\.h)$'); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    # Check that the full paths to the public headers are not used, since users
+    # of the library will include the library as: #include "jxl/foobar.h".
+    if grep -i -H -n -E '#include\s*[<"]lib/include/jxl' "$f" >&2; then
+      echo "Don't add \"include/\" to the include path of public headers." >&2
+      ret=1
+    fi
+
+    if [[ "${f#third_party/}" == "$f" ]]; then
+      # $f is not in third_party/
+
+      # Check that local files don't use the full path to third_party/
+      # directory since the installed versions will not have that path.
+      # Add an exception for third_party/dirent.h.
+      if grep -v -F 'third_party/dirent.h' "$f" | \
+          grep -i -H -n -E '#include\s*[<"]third_party/' >&2 &&
+          [[ $ret -eq 0 ]]; then
+        cat >&2 <<EOF
+$f: Don't add third_party/ to the include path of third_party projects. This \
+makes it harder to use installed system libraries instead of the third_party/ \
+ones.
+EOF
+        ret=1
+      fi
+    fi
+
+  done
+  return ${ret}
+}
+
+test_include_collision() {
+  local ret=0
+  local f
+  for f in $(git ls-files | grep -E '^lib/include/'); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    local base=${f#lib/include/}
+    if [[ -e "lib/${base}" ]]; then
+      echo "$f: Name collision, both $f and lib/${base} exist." >&2
+      ret=1
+    fi
+  done
+  return ${ret}
+}
+
+test_copyright() {
+  local ret=0
+  local f
+  for f in $(
+      git ls-files | grep -E \
+      '(Dockerfile.*|\.c|\.cc|\.cpp|\.gni|\.h|\.java|\.sh|\.m|\.py|\.ui|\.yml)$'); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    if [[ "${f#third_party/}" == "$f" ]]; then
+      # $f is not in third_party/
+      if ! head -n 10 "$f" |
+          grep -F 'Copyright (c) the JPEG XL Project Authors.' >/dev/null ; then
+        echo "$f: Missing Copyright blob near the top of the file." >&2
+        ret=1
+      fi
+      if ! head -n 10 "$f" |
+          grep -F 'Use of this source code is governed by a BSD-style' \
+            >/dev/null ; then
+        echo "$f: Missing License blob near the top of the file." >&2
+        ret=1
+      fi
+    fi
+  done
+  return ${ret}
+}
+
+# Check that we don't use "%zu" or "%zd" in format string for size_t.
+test_printf_size_t() {
+  local ret=0
+  if grep -n -E '%[0-9]*z[udx]' \
+      $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$'); then
+    echo "Don't use '%zu' or '%zd' in a format string, instead use " \
+      "'%\" PRIuS \"' or '%\" PRIdS \"'." >&2
+    ret=1
+  fi
+
+  if grep -n -E 'gtest\.h' \
+      $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then
+    echo "Don't include gtest directly, instead include 'testing.h'. " >&2
+    ret=1
+  fi
+
+  if grep -n -E 'gmock\.h' \
+      $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then
+    echo "Don't include gmock directly, instead include 'testing.h'. " >&2
+    ret=1
+  fi
+
+  local f
+  for f in $(git ls-files | grep -E "\.cc$" | xargs grep 'PRI[udx]S' |
+      cut -f 1 -d : | uniq); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    if ! grep -F printf_macros.h "$f" >/dev/null; then
+      echo "$f: Add lib/jxl/base/printf_macros.h for PRI.S, or use other " \
+        "types for code outside lib/jxl library." >&2
+      ret=1
+    fi
+  done
+
+  for f in $(git ls-files | grep -E "\.h$" | grep -v -E '(printf_macros\.h|testing\.h)' |
+      xargs grep -n 'PRI[udx]S'); do
+    # Having PRIuS / PRIdS in a header file means that printf_macros.h may
+    # be included before a system header, in particular before gtest headers.
+    # those may re-define PRIuS unconditionally causing a compile error.
+    echo "$f: Don't use PRI.S in header files. Sorry."
+    ret=1
+  done
+
+  return ${ret}
+}
+
+# Check that "dec_" code doesn't depend on "enc_" headers.
+test_dec_enc_deps() {
+  local ret=0
+  local f
+  for f in $(git ls-files | grep -E '/dec_'); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    if [[ "${f#third_party/}" == "$f" ]]; then
+      # $f is not in third_party/
+      if grep -n -H -E "#include.*/enc_" "$f" >&2; then
+        echo "$f: Don't include \"enc_*\" files from \"dec_*\" files." >&2
+        ret=1
+      fi
+    fi
+  done
+  return ${ret}
+}
+
+# Check for git merge conflict markers.
+test_merge_conflict() {
+  local ret=0
+  TEXT_FILES='(\.cc|\.cpp|\.h|\.sh|\.m|\.py|\.md|\.txt|\.cmake)$'
+  for f in $(git ls-files | grep -E "${TEXT_FILES}"); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    if grep -E '^<<<<<<< ' "$f"; then
+      echo "$f: Found git merge conflict marker. Please resolve." >&2
+      ret=1
+    fi
+  done
+  return ${ret}
+}
+
+# Check that the library and the package have the same version. This prevents
+# accidentally having them out of sync.
+get_version() {
+  local varname=$1
+  local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1)
+  [[ -n "${line}" ]]
+  line="${line#set(${varname} }"
+  line="${line%)}"
+  echo "${line}"
+}
+
+test_version() {
+  local major=$(get_version JPEGXL_MAJOR_VERSION)
+  local minor=$(get_version JPEGXL_MINOR_VERSION)
+  local patch=$(get_version JPEGXL_PATCH_VERSION)
+  # Check that the version is not empty
+  if [[ -z "${major}${minor}${patch}" ]]; then
+    echo "Couldn't parse version from CMakeLists.txt" >&2
+    return 1
+  fi
+  local pkg_version=$(head -n 1 debian/changelog)
+  # Get only the part between the first "jpeg-xl (" and the following ")".
+  pkg_version="${pkg_version#jpeg-xl (}"
+  pkg_version="${pkg_version%%)*}"
+  if [[ -z "${pkg_version}" ]]; then
+    echo "Couldn't parse version from debian package" >&2
+    return 1
+  fi
+
+  local lib_version="${major}.${minor}.${patch}"
+  lib_version="${lib_version%.0}"
+  if [[ "${pkg_version}" != "${lib_version}"* ]]; then
+    echo "Debian package version (${pkg_version}) doesn't match library" \
+      "version (${lib_version})." >&2
+    return 1
+  fi
+  return 0
+}
+
+# Check that the SHA versions in deps.sh matches the git submodules.
+test_deps_version() {
+  while IFS= read -r line; do
+    if [[ "${line:0:10}" != "[submodule" ]]; then
+      continue
+    fi
+    line="${line#[submodule \"}"
+    line="${line%\"]}"
+    local varname=$(tr '[:lower:]' '[:upper:]' <<< "${line}")
+    varname="${varname/\//_}"
+    if ! grep -F "${varname}=" deps.sh >/dev/null; then
+      # Ignoring submodule not in deps.sh
+      continue
+    fi
+    local deps_sha=$(grep -F "${varname}=" deps.sh | cut -f 2 -d '"')
+    [[ -n "${deps_sha}" ]]
+    local git_sha=$(git ls-tree -r HEAD "${line}" | cut -f 1 | cut -f 3 -d ' ')
+    if [[ "${deps_sha}" != "${git_sha}" ]]; then
+      cat >&2 <<EOF
+deps.sh: SHA for project ${line} is at ${deps_sha} but the git submodule is at
+${git_sha}. Please update deps.sh
+
+If you did not intend to change the submodule's SHA value, it is possible that
+you accidentally included this change in your commit after a rebase or checkout
+without running "git submodule --init". To revert the submodule change run from
+the top checkout directory:
+
+  git -C ${line} checkout ${deps_sha}
+  git commit --amend ${line}
+
+EOF
+      return 1
+    fi
+  done < .gitmodules
+}
+
+# Make sure that all the Fields objects are fuzzed directly.
+test_fuzz_fields() {
+  local ret=0
+  # List all the classes of the form "ClassName : public Fields".
+  # This doesn't catch class names that are too long to fit.
+  local field_classes=$( git ls-files |
+    grep -E '\.(cc|h)' | grep -v 'test\.cc$' |
+    xargs grep -h -o -E '\b[^ ]+ : public Fields' | cut -f 1 -d ' ')
+  local classname
+  for classname in ${field_classes}; do
+    if [ ! -e "$classname" ]; then
+      continue
+    fi
+    if ! grep -E "\\b${classname}\\b" tools/fields_fuzzer.cc >/dev/null; then
+      cat >&2 <<EOF
+tools/fields_fuzzer.cc: Class ${classname} not found in the fields_fuzzer.
+EOF
+      ret=1
+    fi
+  done
+  return $ret
+}
+
+# Test that we don't use %n in C++ code to avoid using it in printf and scanf.
+# This test is not very precise but in cases where "module n" is needed we would
+# normally have "% n" instead of "%n". Using %n is not allowed in Android 10+.
+test_percent_n() {
+  local ret=0
+  local f
+  for f in $(git ls-files | grep -E '(\.cc|\.cpp|\.h)$'); do
+    if [ ! -e "$f" ]; then
+      continue
+    fi
+    if grep -i -H -n -E '%h*n' "$f" >&2; then
+      echo "Don't use \"%n\"." >&2
+      ret=1
+    fi
+  done
+  return ${ret}
+}
+
+main() {
+  local ret=0
+  cd "${MYDIR}"
+
+  if ! git rev-parse >/dev/null 2>/dev/null; then
+    echo "Not a git checkout, skipping bash_test"
+    return 0
+  fi
+
+  IFS=$'\n'
+  for f in $(declare -F); do
+    local test_name=$(echo "$f" | cut -f 3 -d ' ')
+    # Runs all the local bash functions that start with "test_".
+    if [[ "${test_name}" == test_* ]]; then
+      echo "Test ${test_name}: Start"
+      if ${test_name}; then
+        echo "Test ${test_name}: PASS"
+      else
+        echo "Test ${test_name}: FAIL"
+        ret=1
+      fi
+    fi
+  done
+  return ${ret}
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/ci.sh b/third-party/libjxl/libjxl/ci.sh
new file mode 100755
index 0000000000..57e26d7340
--- /dev/null
+++ b/third-party/libjxl/libjxl/ci.sh
@@ -0,0 +1,1552 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Continuous integration helper module. This module is meant to be called from
+# the .gitlab-ci.yml file during the continuous integration build, as well as
+# from the command line for developers.
+
+set -eu
+
+OS=`uname -s`
+
+MYDIR=$(dirname $(realpath "$0"))
+
+### Environment parameters:
+TEST_STACK_LIMIT="${TEST_STACK_LIMIT:-256}"
+CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-RelWithDebInfo}
+CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH:-}
+CMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER:-}
+CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER:-}
+CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM:-}
+SKIP_BUILD="${SKIP_BUILD:-0}"
+SKIP_TEST="${SKIP_TEST:-0}"
+TARGETS="${TARGETS:-all doc}"
+TEST_SELECTOR="${TEST_SELECTOR:-}"
+BUILD_TARGET="${BUILD_TARGET:-}"
+ENABLE_WASM_SIMD="${ENABLE_WASM_SIMD:-0}"
+if [[ -n "${BUILD_TARGET}" ]]; then
+  BUILD_DIR="${BUILD_DIR:-${MYDIR}/build-${BUILD_TARGET%%-*}}"
+else
+  BUILD_DIR="${BUILD_DIR:-${MYDIR}/build}"
+fi
+# Whether we should post a message in the MR when the build fails.
+POST_MESSAGE_ON_ERROR="${POST_MESSAGE_ON_ERROR:-1}"
+
+# Set default compilers to clang if not already set
+export CC=${CC:-clang}
+export CXX=${CXX:-clang++}
+
+# Time limit for the "fuzz" command in seconds (0 means no limit).
+FUZZER_MAX_TIME="${FUZZER_MAX_TIME:-0}"
+
+SANITIZER="none"
+
+
+if [[ "${BUILD_TARGET%%-*}" == "x86_64" ||
+    "${BUILD_TARGET%%-*}" == "i686" ]]; then
+  # Default to building all targets, even if compiler baseline is SSE4
+  HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-HWY_EMU128}
+else
+  HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-}
+fi
+
+# Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS
+CMAKE_FLAGS=${CMAKE_FLAGS:-}
+CMAKE_C_FLAGS="${CMAKE_C_FLAGS:-} ${CMAKE_FLAGS}"
+CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS:-} ${CMAKE_FLAGS}"
+
+CMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR:-}
+CMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS:-}
+CMAKE_FIND_ROOT_PATH=${CMAKE_FIND_ROOT_PATH:-}
+CMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS:-}
+CMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS:-}
+CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE:-}
+
+if [[ "${ENABLE_WASM_SIMD}" -ne "0" ]]; then
+  CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -msimd128"
+  CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -msimd128"
+  CMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS} -msimd128"
+fi
+
+if [[ "${ENABLE_WASM_SIMD}" -eq "2" ]]; then
+  CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_WANT_WASM2"
+  CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -DHWY_WANT_WASM2"
+fi
+
+if [[ ! -z "${HWY_BASELINE_TARGETS}" ]]; then
+  CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS}"
+fi
+
+# Version inferred from the CI variables.
+CI_COMMIT_SHA=${CI_COMMIT_SHA:-${GITHUB_SHA:-}}
+JPEGXL_VERSION=${JPEGXL_VERSION:-${CI_COMMIT_SHA:0:8}}
+
+# Benchmark parameters
+STORE_IMAGES=${STORE_IMAGES:-1}
+BENCHMARK_CORPORA="${MYDIR}/third_party/corpora"
+
+# Local flags passed to sanitizers.
+UBSAN_FLAGS=(
+  -fsanitize=alignment
+  -fsanitize=bool
+  -fsanitize=bounds
+  -fsanitize=builtin
+  -fsanitize=enum
+  -fsanitize=float-cast-overflow
+  -fsanitize=float-divide-by-zero
+  -fsanitize=integer-divide-by-zero
+  -fsanitize=null
+  -fsanitize=object-size
+  -fsanitize=pointer-overflow
+  -fsanitize=return
+  -fsanitize=returns-nonnull-attribute
+  -fsanitize=shift-base
+  -fsanitize=shift-exponent
+  -fsanitize=unreachable
+  -fsanitize=vla-bound
+
+  -fno-sanitize-recover=undefined
+  # Brunsli uses unaligned accesses to uint32_t, so alignment is just a warning.
+  -fsanitize-recover=alignment
+)
+# -fsanitize=function doesn't work on aarch64 and arm.
+if [[ "${BUILD_TARGET%%-*}" != "aarch64" &&
+    "${BUILD_TARGET%%-*}" != "arm" ]]; then
+  UBSAN_FLAGS+=(
+    -fsanitize=function
+  )
+fi
+if [[ "${BUILD_TARGET%%-*}" != "arm" ]]; then
+  UBSAN_FLAGS+=(
+    -fsanitize=signed-integer-overflow
+  )
+fi
+
+CLANG_TIDY_BIN=$(which clang-tidy-6.0 clang-tidy-7 clang-tidy-8 clang-tidy | head -n 1)
+# Default to "cat" if "colordiff" is not installed or if stdout is not a tty.
+if [[ -t 1 ]]; then
+  COLORDIFF_BIN=$(which colordiff cat | head -n 1)
+else
+  COLORDIFF_BIN="cat"
+fi
+FIND_BIN=$(which gfind find | head -n 1)
+# "false" will disable wine64 when not installed. This won't allow
+# cross-compiling.
+WINE_BIN=$(which wine64 false | head -n 1)
+
+CLANG_VERSION="${CLANG_VERSION:-}"
+# Detect the clang version suffix and store it in CLANG_VERSION. For example,
+# "6.0" for clang 6 or "7" for clang 7.
+detect_clang_version() {
+  if [[ -n "${CLANG_VERSION}" ]]; then
+    return 0
+  fi
+  local clang_version=$("${CC:-clang}" --version | head -n1)
+  clang_version=${clang_version#"Debian "}
+  clang_version=${clang_version#"Ubuntu "}
+  local llvm_tag
+  case "${clang_version}" in
+    "clang version 6."*)
+      CLANG_VERSION="6.0"
+      ;;
+    "clang version "*)
+      # Any other clang version uses just the major version number.
+      local suffix="${clang_version#clang version }"
+      CLANG_VERSION="${suffix%%.*}"
+      ;;
+    "emcc"*)
+      # We can't use asan or msan in the emcc case.
+      ;;
+    *)
+      echo "Unknown clang version: ${clang_version}" >&2
+      return 1
+  esac
+}
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -fr "${CLEANUP_FILES[@]}"
+  fi
+}
+
+# Executed on exit.
+on_exit() {
+  local retcode="$1"
+  # Always cleanup the CLEANUP_FILES.
+  cleanup
+
+  # Post a message in the MR when requested with POST_MESSAGE_ON_ERROR but only
+  # if the run failed and we are not running from a MR pipeline.
+  if [[ ${retcode} -ne 0 && -n "${CI_BUILD_NAME:-}" &&
+        -n "${POST_MESSAGE_ON_ERROR}" && -z "${CI_MERGE_REQUEST_ID:-}" &&
+        "${CI_BUILD_REF_NAME}" = "master" ]]; then
+    load_mr_vars_from_commit
+    { set +xeu; } 2>/dev/null
+    local message="**Run ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} failed.**
+
+Check the output of the job at ${CI_JOB_URL:-} to see if this was your problem.
+If it was, please rollback this change or fix the problem ASAP, broken builds
+slow down development. Check if the error already existed in the previous build
+as well.
+
+Pipeline: ${CI_PIPELINE_URL}
+
+Previous build commit: ${CI_COMMIT_BEFORE_SHA}
+"
+    cmd_post_mr_comment "${message}"
+  fi
+}
+
+trap 'retcode=$?; { set +x; } 2>/dev/null; on_exit ${retcode}' INT TERM EXIT
+
+
+# These variables are populated when calling merge_request_commits().
+
+# The current hash at the top of the current branch or merge request branch (if
+# running from a merge request pipeline).
+MR_HEAD_SHA=""
+# The common ancestor between the current commit and the tracked branch, such
+# as master. This includes a list
+MR_ANCESTOR_SHA=""
+
+# Populate MR_HEAD_SHA and MR_ANCESTOR_SHA.
+merge_request_commits() {
+  { set +x; } 2>/dev/null
+  # GITHUB_SHA is the current reference being build in GitHub Actions.
+  if [[ -n "${GITHUB_SHA:-}" ]]; then
+    # GitHub normally does a checkout of a merge commit on a shallow repository
+    # by default. We want to get a bit more of the history to be able to diff
+    # changes on the Pull Request if needed. This fetches 10 more commits which
+    # should be enough given that PR normally should have 1 commit.
+    git -C "${MYDIR}" fetch -q origin "${GITHUB_SHA}" --depth 10
+    MR_HEAD_SHA="$(git rev-parse "FETCH_HEAD^2" 2>/dev/null ||
+                   echo "${GITHUB_SHA}")"
+  else
+    # CI_BUILD_REF is the reference currently being build in the CI workflow.
+    MR_HEAD_SHA=$(git -C "${MYDIR}" rev-parse -q "${CI_BUILD_REF:-HEAD}")
+  fi
+
+  if [[ -n "${CI_MERGE_REQUEST_IID:-}" ]]; then
+    # Merge request pipeline in CI. In this case the upstream is called "origin"
+    # but it refers to the forked project that's the source of the merge
+    # request. We need to get the target of the merge request, for which we need
+    # to query that repository using our CI_JOB_TOKEN.
+    echo "machine gitlab.com login gitlab-ci-token password ${CI_JOB_TOKEN}" \
+      >> "${HOME}/.netrc"
+    git -C "${MYDIR}" fetch "${CI_MERGE_REQUEST_PROJECT_URL}" \
+      "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}"
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+  elif [[ -n "${GITHUB_BASE_REF:-}" ]]; then
+    # Pull request workflow in GitHub Actions. GitHub checkout action uses
+    # "origin" as the remote for the git checkout.
+    git -C "${MYDIR}" fetch -q origin "${GITHUB_BASE_REF}"
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+  else
+    # We are in a local branch, not a merge request.
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q HEAD@{upstream} || true)
+  fi
+
+  if [[ -z "${MR_ANCESTOR_SHA}" ]]; then
+    echo "Warning, not tracking any branch, using the last commit in HEAD.">&2
+    # This prints the return value with just HEAD.
+    MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q "${MR_HEAD_SHA}^")
+  else
+    # GitHub runs the pipeline on a merge commit, no need to look for the common
+    # ancestor in that case.
+    if [[ -z "${GITHUB_BASE_REF:-}" ]]; then
+      MR_ANCESTOR_SHA=$(git -C "${MYDIR}" merge-base \
+        "${MR_ANCESTOR_SHA}" "${MR_HEAD_SHA}")
+    fi
+  fi
+  set -x
+}
+
+# Load the MR iid from the landed commit message when running not from a
+# merge request workflow. This is useful to post back results at the merge
+# request when running pipelines from master.
+load_mr_vars_from_commit() {
+  { set +x; } 2>/dev/null
+  if [[ -z "${CI_MERGE_REQUEST_IID:-}" ]]; then
+    local mr_iid=$(git rev-list --format=%B --max-count=1 HEAD |
+      grep -F "${CI_PROJECT_URL}" | grep -F "/merge_requests" | head -n 1)
+    # mr_iid contains a string like this if it matched:
+    #  Part-of: <https://gitlab.com/wg1/jpeg-xlm/merge_requests/123456>
+    if [[ -n "${mr_iid}" ]]; then
+      mr_iid=$(echo "${mr_iid}" |
+        sed -E 's,^.*merge_requests/([0-9]+)>.*$,\1,')
+      CI_MERGE_REQUEST_IID="${mr_iid}"
+      CI_MERGE_REQUEST_PROJECT_ID=${CI_PROJECT_ID}
+    fi
+  fi
+  set -x
+}
+
+# Posts a comment to the current merge request.
+cmd_post_mr_comment() {
+  { set +x; } 2>/dev/null
+  local comment="$1"
+  if [[ -n "${BOT_TOKEN:-}" && -n "${CI_MERGE_REQUEST_IID:-}" ]]; then
+    local url="${CI_API_V4_URL}/projects/${CI_MERGE_REQUEST_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}/notes"
+    curl -X POST -g \
+      -H "PRIVATE-TOKEN: ${BOT_TOKEN}" \
+      --data-urlencode "body=${comment}" \
+      --output /dev/null \
+      "${url}"
+  fi
+  set -x
+}
+
+# Set up and export the environment variables needed by the child processes.
+export_env() {
+  if [[ "${BUILD_TARGET}" == *mingw32 ]]; then
+    # Wine needs to know the paths to the mingw dlls. These should be
+    # separated by ';'.
+    WINEPATH=$("${CC:-clang}" -print-search-dirs --target="${BUILD_TARGET}" \
+      | grep -F 'libraries: =' | cut -f 2- -d '=' | tr ':' ';')
+    # We also need our own libraries in the wine path.
+    local real_build_dir=$(realpath "${BUILD_DIR}")
+    # Some library .dll dependencies are installed in /bin:
+    export WINEPATH="${WINEPATH};${real_build_dir};${real_build_dir}/third_party/brotli;/usr/${BUILD_TARGET}/bin"
+
+    local prefix="${BUILD_DIR}/wineprefix"
+    mkdir -p "${prefix}"
+    export WINEPREFIX=$(realpath "${prefix}")
+  fi
+  # Sanitizers need these variables to print and properly format the stack
+  # traces:
+  LLVM_SYMBOLIZER=$("${CC:-clang}" -print-prog-name=llvm-symbolizer || true)
+  if [[ -n "${LLVM_SYMBOLIZER}" ]]; then
+    export ASAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+    export MSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+    export UBSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+  fi
+}
+
+cmake_configure() {
+  export_env
+
+  if [[ "${STACK_SIZE:-0}" == 1 ]]; then
+    # Dump the stack size of each function in the .stack_sizes section for
+    # analysis.
+    CMAKE_C_FLAGS+=" -fstack-size-section"
+    CMAKE_CXX_FLAGS+=" -fstack-size-section"
+  fi
+
+  local args=(
+    -B"${BUILD_DIR}" -H"${MYDIR}"
+    -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+    -G Ninja
+    -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+    -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+    -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}"
+    -DCMAKE_MODULE_LINKER_FLAGS="${CMAKE_MODULE_LINKER_FLAGS}"
+    -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}"
+    -DJPEGXL_VERSION="${JPEGXL_VERSION}"
+    -DSANITIZER="${SANITIZER}"
+    # These are not enabled by default in cmake.
+    -DJPEGXL_ENABLE_VIEWERS=ON
+    -DJPEGXL_ENABLE_PLUGINS=ON
+    -DJPEGXL_ENABLE_DEVTOOLS=ON
+    # We always use libfuzzer in the ci.sh wrapper.
+    -DJPEGXL_FUZZER_LINK_FLAGS="-fsanitize=fuzzer"
+  )
+  if [[ "${BUILD_TARGET}" != *mingw32 ]]; then
+    args+=(
+      -DJPEGXL_WARNINGS_AS_ERRORS=ON
+    )
+  fi
+  if [[ -n "${BUILD_TARGET}" ]]; then
+    local system_name="Linux"
+    if [[ "${BUILD_TARGET}" == *mingw32 ]]; then
+      # When cross-compiling with mingw the target must be set to Windows and
+      # run programs with wine.
+      system_name="Windows"
+      args+=(
+        -DCMAKE_CROSSCOMPILING_EMULATOR="${WINE_BIN}"
+        # Normally CMake automatically defines MINGW=1 when building with the
+        # mingw compiler (x86_64-w64-mingw32-gcc) but we are normally compiling
+        # with clang.
+        -DMINGW=1
+      )
+    fi
+    # EMSCRIPTEN toolchain sets the right values itself
+    if [[ "${BUILD_TARGET}" != wasm* ]]; then
+      # If set, BUILD_TARGET must be the target triplet such as
+      # x86_64-unknown-linux-gnu.
+      args+=(
+        -DCMAKE_C_COMPILER_TARGET="${BUILD_TARGET}"
+        -DCMAKE_CXX_COMPILER_TARGET="${BUILD_TARGET}"
+        # Only the first element of the target triplet.
+        -DCMAKE_SYSTEM_PROCESSOR="${BUILD_TARGET%%-*}"
+        -DCMAKE_SYSTEM_NAME="${system_name}"
+        -DCMAKE_TOOLCHAIN_FILE="${CMAKE_TOOLCHAIN_FILE}"
+      )
+    else
+      args+=(
+        # sjpeg confuses WASM SIMD with SSE.
+        -DSJPEG_ENABLE_SIMD=OFF
+        # Building shared libs is not very useful for WASM.
+        -DBUILD_SHARED_LIBS=OFF
+      )
+    fi
+    args+=(
+      # These are needed to make googletest work when cross-compiling.
+      -DCMAKE_CROSSCOMPILING=1
+      -DHAVE_STD_REGEX=0
+      -DHAVE_POSIX_REGEX=0
+      -DHAVE_GNU_POSIX_REGEX=0
+      -DHAVE_STEADY_CLOCK=0
+      -DHAVE_THREAD_SAFETY_ATTRIBUTES=0
+    )
+    if [[ -z "${CMAKE_FIND_ROOT_PATH}" ]]; then
+      # find_package() will look in this prefix for libraries.
+      CMAKE_FIND_ROOT_PATH="/usr/${BUILD_TARGET}"
+    fi
+    if [[ -z "${CMAKE_PREFIX_PATH}" ]]; then
+      CMAKE_PREFIX_PATH="/usr/${BUILD_TARGET}"
+    fi
+    # Use pkg-config for the target. If there's no pkg-config available for the
+    # target we can set the PKG_CONFIG_PATH to the appropriate path in most
+    # linux distributions.
+    local pkg_config=$(which "${BUILD_TARGET}-pkg-config" || true)
+    if [[ -z "${pkg_config}" ]]; then
+      pkg_config=$(which pkg-config)
+      export PKG_CONFIG_LIBDIR="/usr/${BUILD_TARGET}/lib/pkgconfig"
+    fi
+    if [[ -n "${pkg_config}" ]]; then
+      args+=(-DPKG_CONFIG_EXECUTABLE="${pkg_config}")
+    fi
+  fi
+  if [[ -n "${CMAKE_CROSSCOMPILING_EMULATOR}" ]]; then
+    args+=(
+      -DCMAKE_CROSSCOMPILING_EMULATOR="${CMAKE_CROSSCOMPILING_EMULATOR}"
+    )
+  fi
+  if [[ -n "${CMAKE_FIND_ROOT_PATH}" ]]; then
+    args+=(
+      -DCMAKE_FIND_ROOT_PATH="${CMAKE_FIND_ROOT_PATH}"
+    )
+  fi
+  if [[ -n "${CMAKE_PREFIX_PATH}" ]]; then
+    args+=(
+      -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}"
+    )
+  fi
+  if [[ -n "${CMAKE_C_COMPILER_LAUNCHER}" ]]; then
+    args+=(
+      -DCMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER}"
+    )
+  fi
+  if [[ -n "${CMAKE_CXX_COMPILER_LAUNCHER}" ]]; then
+    args+=(
+      -DCMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER}"
+    )
+  fi
+  if [[ -n "${CMAKE_MAKE_PROGRAM}" ]]; then
+    args+=(
+      -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
+    )
+  fi
+  if [[ "${BUILD_TARGET}" == wasm* ]]; then
+    emcmake cmake "${args[@]}" "$@"
+  else
+    cmake "${args[@]}" "$@"
+  fi
+}
+
+cmake_build_and_test() {
+  if [[ "${SKIP_BUILD}" -eq "1" ]]; then
+      return 0
+  fi
+  # gtest_discover_tests() runs the test binaries to discover the list of tests
+  # at build time, which fails under qemu.
+  ASAN_OPTIONS=detect_leaks=0 cmake --build "${BUILD_DIR}" -- $TARGETS
+  # Pack test binaries if requested.
+  if [[ "${PACK_TEST:-}" == "1" ]]; then
+    (cd "${BUILD_DIR}"
+     ${FIND_BIN} -name '*.cmake' -a '!' -path '*CMakeFiles*'
+     # gtest / gmock / gtest_main shared libs
+     ${FIND_BIN} lib/ -name 'libg*.so*'
+     ${FIND_BIN} -type d -name tests -a '!' -path '*CMakeFiles*'
+    ) | tar -C "${BUILD_DIR}" -cf "${BUILD_DIR}/tests.tar.xz" -T - \
+      --use-compress-program="xz --threads=$(nproc --all || echo 1) -6"
+    du -h "${BUILD_DIR}/tests.tar.xz"
+    # Pack coverage data if also available.
+    touch "${BUILD_DIR}/gcno.sentinel"
+    (cd "${BUILD_DIR}"; echo gcno.sentinel; ${FIND_BIN} -name '*gcno') | \
+      tar -C "${BUILD_DIR}" -cvf "${BUILD_DIR}/gcno.tar.xz" -T - \
+        --use-compress-program="xz --threads=$(nproc --all || echo 1) -6"
+  fi
+
+  if [[ "${SKIP_TEST}" -ne "1" ]]; then
+    (cd "${BUILD_DIR}"
+     export UBSAN_OPTIONS=print_stacktrace=1
+     [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+     ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure)
+  fi
+}
+
+# Configure the build to strip unused functions. This considerably reduces the
+# output size, specially for tests which only use a small part of the whole
+# library.
+strip_dead_code() {
+  # Emscripten does tree shaking without any extra flags.
+  if [[ "${BUILD_TARGET}" == wasm* ]]; then
+    return 0
+  fi
+  # -ffunction-sections, -fdata-sections and -Wl,--gc-sections effectively
+  # discard all unreachable code, reducing the code size. For this to work, we
+  # need to also pass --no-export-dynamic to prevent it from exporting all the
+  # internal symbols (like functions) making them all reachable and thus not a
+  # candidate for removal.
+  CMAKE_CXX_FLAGS+=" -ffunction-sections -fdata-sections"
+  CMAKE_C_FLAGS+=" -ffunction-sections -fdata-sections"
+  if [[ "${OS}" == "Darwin" ]]; then
+    CMAKE_EXE_LINKER_FLAGS+=" -dead_strip"
+    CMAKE_SHARED_LINKER_FLAGS+=" -dead_strip"
+  else
+    CMAKE_EXE_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic"
+    CMAKE_SHARED_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic"
+  fi
+}
+
+### Externally visible commands
+
+cmd_debug() {
+  CMAKE_BUILD_TYPE="Debug"
+  cmake_configure "$@"
+  cmake_build_and_test
+}
+
+cmd_release() {
+  CMAKE_BUILD_TYPE="Release"
+  strip_dead_code
+  cmake_configure "$@"
+  cmake_build_and_test
+}
+
+cmd_opt() {
+  CMAKE_BUILD_TYPE="RelWithDebInfo"
+  CMAKE_CXX_FLAGS+=" -DJXL_DEBUG_WARNING -DJXL_DEBUG_ON_ERROR"
+  cmake_configure "$@"
+  cmake_build_and_test
+}
+
+cmd_coverage() {
+  # -O0 prohibits stack space reuse -> causes stack-overflow on dozens of tests.
+  TEST_STACK_LIMIT="none"
+
+  cmd_release -DJPEGXL_ENABLE_COVERAGE=ON "$@"
+
+  if [[ "${SKIP_TEST}" -ne "1" ]]; then
+    # If we didn't run the test we also don't print a coverage report.
+    cmd_coverage_report
+  fi
+}
+
+cmd_coverage_report() {
+  LLVM_COV=$("${CC:-clang}" -print-prog-name=llvm-cov)
+  local real_build_dir=$(realpath "${BUILD_DIR}")
+  local gcovr_args=(
+    -r "${real_build_dir}"
+    --gcov-executable "${LLVM_COV} gcov"
+    # Only print coverage information for the libjxl directories. The rest
+    # is not part of the code under test.
+    --filter '.*jxl/.*'
+    --exclude '.*_gbench.cc'
+    --exclude '.*_test.cc'
+    --exclude '.*_testonly..*'
+    --exclude '.*_debug.*'
+    --exclude '.*test_utils..*'
+    --object-directory "${real_build_dir}"
+  )
+
+  (
+   cd "${real_build_dir}"
+    gcovr "${gcovr_args[@]}" --html --html-details \
+      --output="${real_build_dir}/coverage.html"
+    gcovr "${gcovr_args[@]}" --print-summary |
+      tee "${real_build_dir}/coverage.txt"
+    gcovr "${gcovr_args[@]}" --xml --output="${real_build_dir}/coverage.xml"
+  )
+}
+
+cmd_test() {
+  export_env
+  # Unpack tests if needed.
+  if [[ -e "${BUILD_DIR}/tests.tar.xz" && ! -d "${BUILD_DIR}/tests" ]]; then
+    tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/tests.tar.xz"
+  fi
+  if [[ -e "${BUILD_DIR}/gcno.tar.xz" && ! -d "${BUILD_DIR}/gcno.sentinel" ]]; then
+    tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/gcno.tar.xz"
+  fi
+  (cd "${BUILD_DIR}"
+   export UBSAN_OPTIONS=print_stacktrace=1
+   [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+   ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure "$@")
+}
+
+cmd_gbench() {
+  export_env
+  (cd "${BUILD_DIR}"
+   export UBSAN_OPTIONS=print_stacktrace=1
+   lib/jxl_gbench \
+     --benchmark_counters_tabular=true \
+     --benchmark_out_format=json \
+     --benchmark_out=gbench.json "$@"
+  )
+}
+
+cmd_asanfuzz() {
+  CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+  CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+  cmd_asan -DJPEGXL_ENABLE_FUZZERS=ON "$@"
+}
+
+cmd_msanfuzz() {
+  # Install msan if needed before changing the flags.
+  detect_clang_version
+  local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+  if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then
+    # Install msan libraries for this version if needed or if an older version
+    # with libc++abi was installed.
+    cmd_msan_install
+  fi
+
+  CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+  CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+  cmd_msan -DJPEGXL_ENABLE_FUZZERS=ON "$@"
+}
+
+cmd_asan() {
+  SANITIZER="asan"
+  CMAKE_C_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \
+    -fsanitize=address ${UBSAN_FLAGS[@]}"
+  CMAKE_CXX_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \
+    -fsanitize=address ${UBSAN_FLAGS[@]}"
+  strip_dead_code
+  cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF
+  cmake_build_and_test
+}
+
+cmd_tsan() {
+  SANITIZER="tsan"
+  local tsan_args=(
+    -DJXL_ENABLE_ASSERT=1
+    -g
+    -DTHREAD_SANITIZER
+    ${UBSAN_FLAGS[@]}
+    -fsanitize=thread
+  )
+  CMAKE_C_FLAGS+=" ${tsan_args[@]}"
+  CMAKE_CXX_FLAGS+=" ${tsan_args[@]}"
+
+  CMAKE_BUILD_TYPE="RelWithDebInfo"
+  cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF
+  cmake_build_and_test
+}
+
+cmd_msan() {
+  SANITIZER="msan"
+  detect_clang_version
+  local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+  if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then
+    # Install msan libraries for this version if needed or if an older version
+    # with libc++abi was installed.
+    cmd_msan_install
+  fi
+
+  local msan_c_flags=(
+    -fsanitize=memory
+    -fno-omit-frame-pointer
+    -fsanitize-memory-track-origins
+
+    -DJXL_ENABLE_ASSERT=1
+    -g
+    -DMEMORY_SANITIZER
+
+    # Force gtest to not use the cxxbai.
+    -DGTEST_HAS_CXXABI_H_=0
+  )
+  local msan_cxx_flags=(
+    "${msan_c_flags[@]}"
+
+    # Some C++ sources don't use the std at all, so the -stdlib=libc++ is unused
+    # in those cases. Ignore the warning.
+    -Wno-unused-command-line-argument
+    -stdlib=libc++
+
+    # We include the libc++ from the msan directory instead, so we don't want
+    # the std includes.
+    -nostdinc++
+    -cxx-isystem"${msan_prefix}/include/c++/v1"
+  )
+
+  local msan_linker_flags=(
+    -L"${msan_prefix}"/lib
+    -Wl,-rpath -Wl,"${msan_prefix}"/lib/
+  )
+
+  CMAKE_C_FLAGS+=" ${msan_c_flags[@]} ${UBSAN_FLAGS[@]}"
+  CMAKE_CXX_FLAGS+=" ${msan_cxx_flags[@]} ${UBSAN_FLAGS[@]}"
+  CMAKE_EXE_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+  CMAKE_MODULE_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+  CMAKE_SHARED_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+  strip_dead_code
+  cmake_configure "$@" \
+    -DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0 \
+    -DJPEGXL_ENABLE_TCMALLOC=OFF -DJPEGXL_WARNINGS_AS_ERRORS=OFF \
+    -DCMAKE_REQUIRED_LINK_OPTIONS="${msan_linker_flags[@]}"
+  cmake_build_and_test
+}
+
+# Install libc++ libraries compiled with msan in the msan_prefix for the current
+# compiler version.
+cmd_msan_install() {
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+  # Detect the llvm to install:
+  export CC="${CC:-clang}"
+  export CXX="${CXX:-clang++}"
+  detect_clang_version
+  # Allow overriding the LLVM checkout.
+  local llvm_root="${LLVM_ROOT:-}"
+  if [ -z "${llvm_root}" ]; then
+    local llvm_tag="llvmorg-${CLANG_VERSION}.0.0"
+    case "${CLANG_VERSION}" in
+      "6.0")
+        llvm_tag="llvmorg-6.0.1"
+        ;;
+      "7")
+        llvm_tag="llvmorg-7.0.1"
+        ;;
+    esac
+    local llvm_targz="${tmpdir}/${llvm_tag}.tar.gz"
+    curl -L --show-error -o "${llvm_targz}" \
+      "https://github.com/llvm/llvm-project/archive/${llvm_tag}.tar.gz"
+    tar -C "${tmpdir}" -zxf "${llvm_targz}"
+    llvm_root="${tmpdir}/llvm-project-${llvm_tag}"
+  fi
+
+  local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+  rm -rf "${msan_prefix}"
+
+  declare -A CMAKE_EXTRAS
+  CMAKE_EXTRAS[libcxx]="\
+    -DLIBCXX_CXX_ABI=libstdc++ \
+    -DLIBCXX_INSTALL_EXPERIMENTAL_LIBRARY=ON"
+
+  for project in libcxx; do
+    local proj_build="${tmpdir}/build-${project}"
+    local proj_dir="${llvm_root}/${project}"
+    mkdir -p "${proj_build}"
+    cmake -B"${proj_build}" -H"${proj_dir}" \
+      -G Ninja \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DLLVM_USE_SANITIZER=Memory \
+      -DLLVM_PATH="${llvm_root}/llvm" \
+      -DLLVM_CONFIG_PATH="$(which llvm-config llvm-config-7 llvm-config-6.0 | \
+                            head -n1)" \
+      -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" \
+      -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" \
+      -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \
+      -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" \
+      -DCMAKE_INSTALL_PREFIX="${msan_prefix}" \
+      ${CMAKE_EXTRAS[${project}]}
+    cmake --build "${proj_build}"
+    ninja -C "${proj_build}" install
+  done
+}
+
+# Internal build step shared between all cmd_ossfuzz_* commands.
+_cmd_ossfuzz() {
+  local sanitizer="$1"
+  shift
+  mkdir -p "${BUILD_DIR}"
+  local real_build_dir=$(realpath "${BUILD_DIR}")
+
+  # oss-fuzz defines three directories:
+  # * /work, with the working directory to do re-builds
+  # * /src, with the source code to build
+  # * /out, with the output directory where to copy over the built files.
+  # We use $BUILD_DIR as the /work and the script directory as the /src. The
+  # /out directory is ignored as developers are used to look for the fuzzers in
+  # $BUILD_DIR/tools/ directly.
+
+  if [[ "${sanitizer}" = "memory" && ! -d "${BUILD_DIR}/msan" ]]; then
+    sudo docker run --rm -i \
+      --user $(id -u):$(id -g) \
+      -v "${real_build_dir}":/work \
+      gcr.io/oss-fuzz-base/msan-libs-builder \
+      bash -c "cp -r /msan /work"
+  fi
+
+  # Args passed to ninja. These will be evaluated as a string separated by
+  # spaces.
+  local jpegxl_extra_args="$@"
+
+  sudo docker run --rm -i \
+    -e JPEGXL_UID=$(id -u) \
+    -e JPEGXL_GID=$(id -g) \
+    -e FUZZING_ENGINE="${FUZZING_ENGINE:-libfuzzer}" \
+    -e SANITIZER="${sanitizer}" \
+    -e ARCHITECTURE=x86_64 \
+    -e FUZZING_LANGUAGE=c++ \
+    -e MSAN_LIBS_PATH="/work/msan" \
+    -e JPEGXL_EXTRA_ARGS="${jpegxl_extra_args}" \
+    -v "${MYDIR}":/src/libjxl \
+    -v "${MYDIR}/tools/scripts/ossfuzz-build.sh":/src/build.sh \
+    -v "${real_build_dir}":/work \
+    gcr.io/oss-fuzz/libjxl
+}
+
+cmd_ossfuzz_asan() {
+  _cmd_ossfuzz address "$@"
+}
+cmd_ossfuzz_msan() {
+  _cmd_ossfuzz memory "$@"
+}
+cmd_ossfuzz_ubsan() {
+  _cmd_ossfuzz undefined "$@"
+}
+
+cmd_ossfuzz_ninja() {
+  [[ -e "${BUILD_DIR}/build.ninja" ]]
+  local real_build_dir=$(realpath "${BUILD_DIR}")
+
+  if [[ -e "${BUILD_DIR}/msan" ]]; then
+    echo "ossfuzz_ninja doesn't work with msan builds. Use ossfuzz_msan." >&2
+    exit 1
+  fi
+
+  sudo docker run --rm -i \
+    --user $(id -u):$(id -g) \
+    -v "${MYDIR}":/src/libjxl \
+    -v "${real_build_dir}":/work \
+    gcr.io/oss-fuzz/libjxl \
+    ninja -C /work "$@"
+}
+
+cmd_fast_benchmark() {
+  local small_corpus_tar="${BENCHMARK_CORPORA}/jyrki-full.tar"
+  mkdir -p "${BENCHMARK_CORPORA}"
+  curl --show-error -o "${small_corpus_tar}" -z "${small_corpus_tar}" \
+    "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/jyrki-full.tar"
+
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+  tar -xf "${small_corpus_tar}" -C "${tmpdir}"
+
+  run_benchmark "${tmpdir}" 1048576
+}
+
+cmd_benchmark() {
+  local nikon_corpus_tar="${BENCHMARK_CORPORA}/nikon-subset.tar"
+  mkdir -p "${BENCHMARK_CORPORA}"
+  curl --show-error -o "${nikon_corpus_tar}" -z "${nikon_corpus_tar}" \
+    "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/nikon-subset.tar"
+
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+  tar -xvf "${nikon_corpus_tar}" -C "${tmpdir}"
+
+  local sem_id="jpegxl_benchmark-$$"
+  local nprocs=$(nproc --all || echo 1)
+  images=()
+  local filename
+  while IFS= read -r filename; do
+    # This removes the './'
+    filename="${filename:2}"
+    local mode
+    if [[ "${filename:0:4}" == "srgb" ]]; then
+      mode="RGB_D65_SRG_Rel_SRG"
+    elif [[ "${filename:0:5}" == "adobe" ]]; then
+      mode="RGB_D65_Ado_Rel_Ado"
+    else
+      echo "Unknown image colorspace: ${filename}" >&2
+      exit 1
+    fi
+    png_filename="${filename%.ppm}.png"
+    png_filename=$(echo "${png_filename}" | tr '/' '_')
+    sem --bg --id "${sem_id}" -j"${nprocs}" -- \
+      "${BUILD_DIR}/tools/decode_and_encode" \
+        "${tmpdir}/${filename}" "${mode}" "${tmpdir}/${png_filename}"
+    images+=( "${png_filename}" )
+  done < <(cd "${tmpdir}"; ${FIND_BIN} . -name '*.ppm' -type f)
+  sem --id "${sem_id}" --wait
+
+  # We need about 10 GiB per thread on these images.
+  run_benchmark "${tmpdir}" 10485760
+}
+
+get_mem_available() {
+  if [[ "${OS}" == "Darwin" ]]; then
+    echo $(vm_stat | grep -F 'Pages free:' | awk '{print $3 * 4}')
+  else
+    echo $(grep -F MemAvailable: /proc/meminfo | awk '{print $2}')
+  fi
+}
+
+run_benchmark() {
+  local src_img_dir="$1"
+  local mem_per_thread="${2:-10485760}"
+
+  local output_dir="${BUILD_DIR}/benchmark_results"
+  mkdir -p "${output_dir}"
+
+  # The memory available at the beginning of the benchmark run in kB. The number
+  # of threads depends on the available memory, and the passed memory per
+  # thread. We also add a 2 GiB of constant memory.
+  local mem_available="$(get_mem_available)"
+  # Check that we actually have a MemAvailable value.
+  [[ -n "${mem_available}" ]]
+  local num_threads=$(( (${mem_available} - 1048576) / ${mem_per_thread} ))
+  if [[ ${num_threads} -le 0 ]]; then
+    num_threads=1
+  fi
+
+  local benchmark_args=(
+    --input "${src_img_dir}/*.png"
+    --codec=jpeg:yuv420:q85,webp:q80,jxl:d1:6,jxl:d1:6:downsampling=8,jxl:d5:6,jxl:d5:6:downsampling=8,jxl:m:d0:2,jxl:m:d0:3,jxl:m:d2:2
+    --output_dir "${output_dir}"
+    --show_progress
+    --num_threads="${num_threads}"
+  )
+  if [[ "${STORE_IMAGES}" == "1" ]]; then
+    benchmark_args+=(--save_decompressed --save_compressed)
+  fi
+  (
+    [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+    "${BUILD_DIR}/tools/benchmark_xl" "${benchmark_args[@]}" | \
+       tee "${output_dir}/results.txt"
+
+    # Check error code for benckmark_xl command. This will exit if not.
+    return ${PIPESTATUS[0]}
+  )
+
+  if [[ -n "${CI_BUILD_NAME:-}" ]]; then
+    { set +x; } 2>/dev/null
+    local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}):
+
+$(cat "${output_dir}/results.txt")
+"
+    cmd_post_mr_comment "${message}"
+    set -x
+  fi
+}
+
+# Helper function to wait for the CPU temperature to cool down on ARM.
+wait_for_temp() {
+  { set +x; } 2>/dev/null
+  local temp_limit=${1:-38000}
+  if [[ -z "${THERMAL_FILE:-}" ]]; then
+    echo "Must define the THERMAL_FILE with the thermal_zoneX/temp file" \
+      "to read the temperature from. This is normally set in the runner." >&2
+    exit 1
+  fi
+  local org_temp=$(cat "${THERMAL_FILE}")
+  if [[ "${org_temp}" -ge "${temp_limit}" ]]; then
+    echo -n "Waiting for temp to get down from ${org_temp}... "
+  fi
+  local temp="${org_temp}"
+  local secs=0
+  while [[ "${temp}" -ge "${temp_limit}" ]]; do
+    sleep 1
+    temp=$(cat "${THERMAL_FILE}")
+    echo -n "${temp} "
+    secs=$((secs + 1))
+    if [[ ${secs} -ge 5 ]]; then
+      break
+    fi
+  done
+  if [[ "${org_temp}" -ge "${temp_limit}" ]]; then
+    echo "Done, temp=${temp}"
+  fi
+  set -x
+}
+
+# Helper function to set the cpuset restriction of the current process.
+cmd_cpuset() {
+  [[ "${SKIP_CPUSET:-}" != "1" ]] || return 0
+  local newset="$1"
+  local mycpuset=$(cat /proc/self/cpuset)
+  mycpuset="/dev/cpuset${mycpuset}"
+  # Check that the directory exists:
+  [[ -d "${mycpuset}" ]]
+  if [[ -e "${mycpuset}/cpuset.cpus" ]]; then
+    echo "${newset}" >"${mycpuset}/cpuset.cpus"
+  else
+    echo "${newset}" >"${mycpuset}/cpus"
+  fi
+}
+
+# Return the encoding/decoding speed from the Stats output.
+_speed_from_output() {
+  local speed="$1"
+  local unit="${2:-MP/s}"
+  if [[ "${speed}" == *"${unit}"* ]]; then
+    speed="${speed%% ${unit}*}"
+    speed="${speed##* }"
+    echo "${speed}"
+  fi
+}
+
+
+# Run benchmarks on ARM for the big and little CPUs.
+cmd_arm_benchmark() {
+  # Flags used for cjxl encoder with .png inputs
+  local jxl_png_benchmarks=(
+    # Lossy options:
+    "--epf=0 --distance=1.0 --speed=cheetah"
+    "--epf=2 --distance=1.0 --speed=cheetah"
+    "--epf=0 --distance=8.0 --speed=cheetah"
+    "--epf=1 --distance=8.0 --speed=cheetah"
+    "--epf=2 --distance=8.0 --speed=cheetah"
+    "--epf=3 --distance=8.0 --speed=cheetah"
+    "--modular -Q 90"
+    "--modular -Q 50"
+    # Lossless options:
+    "--modular"
+    "--modular -E 0 -I 0"
+    "--modular -P 5"
+    "--modular --responsive=1"
+    # Near-lossless options:
+    "--epf=0 --distance=0.3 --speed=fast"
+    "--modular -Q 97"
+  )
+
+  # Flags used for cjxl encoder with .jpg inputs. These should do lossless
+  # JPEG recompression (of pixels or full jpeg).
+  local jxl_jpeg_benchmarks=(
+    "--num_reps=3"
+  )
+
+  local images=(
+    "testdata/jxl/flower/flower.png"
+  )
+
+  local jpg_images=(
+    "testdata/jxl/flower/flower.png.im_q85_420.jpg"
+  )
+
+  if [[ "${SKIP_CPUSET:-}" == "1" ]]; then
+    # Use a single cpu config in this case.
+    local cpu_confs=("?")
+  else
+    # Otherwise the CPU config comes from the environment:
+    local cpu_confs=(
+      "${RUNNER_CPU_LITTLE}"
+      "${RUNNER_CPU_BIG}"
+      # The CPU description is something like 3-7, so these configurations only
+      # take the first CPU of the group.
+      "${RUNNER_CPU_LITTLE%%-*}"
+      "${RUNNER_CPU_BIG%%-*}"
+    )
+    # Check that RUNNER_CPU_ALL is defined. In the SKIP_CPUSET=1 case this will
+    # be ignored but still evaluated when calling cmd_cpuset.
+    [[ -n "${RUNNER_CPU_ALL}" ]]
+  fi
+
+  local jpg_dirname="third_party/corpora/jpeg"
+  mkdir -p "${jpg_dirname}"
+  local jpg_qualities=( 50 80 95 )
+  for src_img in "${images[@]}"; do
+    for q in "${jpg_qualities[@]}"; do
+      local jpeg_name="${jpg_dirname}/"$(basename "${src_img}" .png)"-q${q}.jpg"
+      convert -sampling-factor 1x1 -quality "${q}" \
+        "${src_img}" "${jpeg_name}"
+      jpg_images+=("${jpeg_name}")
+    done
+  done
+
+  local output_dir="${BUILD_DIR}/benchmark_results"
+  mkdir -p "${output_dir}"
+  local runs_file="${output_dir}/runs.txt"
+
+  if [[ ! -e "${runs_file}" ]]; then
+    echo -e "binary\tflags\tsrc_img\tsrc size\tsrc pixels\tcpuset\tenc size (B)\tenc speed (MP/s)\tdec speed (MP/s)\tJPG dec speed (MP/s)\tJPG dec speed (MB/s)" |
+      tee -a "${runs_file}"
+  fi
+
+  mkdir -p "${BUILD_DIR}/arm_benchmark"
+  local flags
+  local src_img
+  for src_img in "${jpg_images[@]}" "${images[@]}"; do
+    local src_img_hash=$(sha1sum "${src_img}" | cut -f 1 -d ' ')
+    local enc_binaries=("${BUILD_DIR}/tools/cjxl")
+    local src_ext="${src_img##*.}"
+    for enc_binary in "${enc_binaries[@]}"; do
+      local enc_binary_base=$(basename "${enc_binary}")
+
+      # Select the list of flags to use for the current encoder/image pair.
+      local img_benchmarks
+      if [[ "${src_ext}" == "jpg" ]]; then
+        img_benchmarks=("${jxl_jpeg_benchmarks[@]}")
+      else
+        img_benchmarks=("${jxl_png_benchmarks[@]}")
+      fi
+
+      for flags in "${img_benchmarks[@]}"; do
+        # Encoding step.
+        local enc_file_hash="${enc_binary_base} || $flags || ${src_img} || ${src_img_hash}"
+        enc_file_hash=$(echo "${enc_file_hash}" | sha1sum | cut -f 1 -d ' ')
+        local enc_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jxl"
+
+        for cpu_conf in "${cpu_confs[@]}"; do
+          cmd_cpuset "${cpu_conf}"
+          # nproc returns the number of active CPUs, which is given by the cpuset
+          # mask.
+          local num_threads="$(nproc)"
+
+          echo "Encoding with: ${enc_binary_base} img=${src_img} cpus=${cpu_conf} enc_flags=${flags}"
+          local enc_output
+          if [[ "${flags}" == *"modular"* ]]; then
+            # We don't benchmark encoding speed in this case.
+            if [[ ! -f "${enc_file}" ]]; then
+              cmd_cpuset "${RUNNER_CPU_ALL:-}"
+              "${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp"
+              mv "${enc_file}.tmp" "${enc_file}"
+              cmd_cpuset "${cpu_conf}"
+            fi
+            enc_output=" ?? MP/s"
+          else
+            wait_for_temp
+            enc_output=$("${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp" \
+              2>&1 | tee /dev/stderr | grep -F "MP/s [")
+            mv "${enc_file}.tmp" "${enc_file}"
+          fi
+          local enc_speed=$(_speed_from_output "${enc_output}")
+          local enc_size=$(stat -c "%s" "${enc_file}")
+
+          echo "Decoding with: img=${src_img} cpus=${cpu_conf} enc_flags=${flags}"
+
+          local dec_output
+          wait_for_temp
+          dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \
+            --num_reps=5 --num_threads="${num_threads}" 2>&1 | tee /dev/stderr |
+            grep -E "M[BP]/s \[")
+          local img_size=$(echo "${dec_output}" | cut -f 1 -d ',')
+          local img_size_x=$(echo "${img_size}" | cut -f 1 -d ' ')
+          local img_size_y=$(echo "${img_size}" | cut -f 3 -d ' ')
+          local img_size_px=$(( ${img_size_x} * ${img_size_y} ))
+          local dec_speed=$(_speed_from_output "${dec_output}")
+
+          # For JPEG lossless recompression modes (where the original is a JPEG)
+          # decode to JPG as well.
+          local jpeg_dec_mps_speed=""
+          local jpeg_dec_mbs_speed=""
+          if [[ "${src_ext}" == "jpg" ]]; then
+            wait_for_temp
+            local dec_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jpg"
+            dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \
+              "${dec_file}" --num_reps=5 --num_threads="${num_threads}" 2>&1 | \
+                tee /dev/stderr | grep -E "M[BP]/s \[")
+            local jpeg_dec_mps_speed=$(_speed_from_output "${dec_output}")
+            local jpeg_dec_mbs_speed=$(_speed_from_output "${dec_output}" MB/s)
+            if ! cmp --quiet "${src_img}" "${dec_file}"; then
+              # Add a start at the end to signal that the files are different.
+              jpeg_dec_mbs_speed+="*"
+            fi
+          fi
+
+          # Record entry in a tab-separated file.
+          local src_img_base=$(basename "${src_img}")
+          echo -e "${enc_binary_base}\t${flags}\t${src_img_base}\t${img_size}\t${img_size_px}\t${cpu_conf}\t${enc_size}\t${enc_speed}\t${dec_speed}\t${jpeg_dec_mps_speed}\t${jpeg_dec_mbs_speed}" |
+            tee -a "${runs_file}"
+        done
+      done
+    done
+  done
+  cmd_cpuset "${RUNNER_CPU_ALL:-}"
+  cat "${runs_file}"
+
+  if [[ -n "${CI_BUILD_NAME:-}" ]]; then
+    load_mr_vars_from_commit
+    { set +x; } 2>/dev/null
+    local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}):
+
+\`\`\`
+$(column -t -s "	" "${runs_file}")
+\`\`\`
+"
+    cmd_post_mr_comment "${message}"
+    set -x
+  fi
+}
+
+# Generate a corpus and run the fuzzer on that corpus.
+cmd_fuzz() {
+  local corpus_dir=$(realpath "${BUILD_DIR}/fuzzer_corpus")
+  local fuzzer_crash_dir=$(realpath "${BUILD_DIR}/fuzzer_crash")
+  mkdir -p "${corpus_dir}" "${fuzzer_crash_dir}"
+  # Generate step.
+  "${BUILD_DIR}/tools/fuzzer_corpus" "${corpus_dir}"
+  # Run step:
+  local nprocs=$(nproc --all || echo 1)
+  (
+   cd "${BUILD_DIR}"
+   "tools/djxl_fuzzer" "${fuzzer_crash_dir}" "${corpus_dir}" \
+     -max_total_time="${FUZZER_MAX_TIME}" -jobs=${nprocs} \
+     -artifact_prefix="${fuzzer_crash_dir}/"
+  )
+}
+
+# Runs the linters (clang-format, build_cleaner, buildirier) on the pending CLs.
+cmd_lint() {
+  merge_request_commits
+  { set +x; } 2>/dev/null
+  local versions=(${1:-16 15 14 13 12 11 10 9 8 7 6.0})
+  local clang_format_bins=("${versions[@]/#/clang-format-}" clang-format)
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+
+  local ret=0
+  local build_patch="${tmpdir}/build_cleaner.patch"
+  if ! "${MYDIR}/tools/scripts/build_cleaner.py" >"${build_patch}"; then
+    ret=1
+    echo "build_cleaner.py findings:" >&2
+    "${COLORDIFF_BIN}" <"${build_patch}"
+    echo "Run \`tools/scripts/build_cleaner.py --update\` to apply them" >&2
+  fi
+
+  # It is ok, if buildifier is not installed.
+  if which buildifier >/dev/null; then
+    local buildifier_patch="${tmpdir}/buildifier.patch"
+    local bazel_files=`git -C ${MYDIR} ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"`
+    set -x
+    buildifier -d ${bazel_files} >"${buildifier_patch}"|| true
+    { set +x; } 2>/dev/null
+    if [ -s "${buildifier_patch}" ]; then
+      ret=1
+      echo 'buildifier have found some problems in Bazel build files:' >&2
+      "${COLORDIFF_BIN}" <"${buildifier_patch}"
+      echo 'To fix them run (from the base directory):' >&2
+      echo '  buildifier `git ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"`' >&2
+    fi
+  fi
+
+  local installed=()
+  local clang_patch
+  local clang_format
+  for clang_format in "${clang_format_bins[@]}"; do
+    if ! which "${clang_format}" >/dev/null; then
+      continue
+    fi
+    installed+=("${clang_format}")
+    local tmppatch="${tmpdir}/${clang_format}.patch"
+    # We include in this linter all the changes including the uncommitted changes
+    # to avoid printing changes already applied.
+    set -x
+    # Ignoring the error that git-clang-format outputs.
+    git -C "${MYDIR}" "${clang_format}" --binary "${clang_format}" \
+      --style=file --diff "${MR_ANCESTOR_SHA}" -- >"${tmppatch}" || true
+    { set +x; } 2>/dev/null
+    if grep -E '^--- ' "${tmppatch}" | grep -v 'a/third_party' >/dev/null; then
+      if [[ -n "${LINT_OUTPUT:-}" ]]; then
+        cp "${tmppatch}" "${LINT_OUTPUT}"
+      fi
+      clang_patch="${tmppatch}"
+    else
+      echo "clang-format check OK" >&2
+      return ${ret}
+    fi
+  done
+
+  if [[ ${#installed[@]} -eq 0 ]]; then
+    echo "You must install clang-format for \"git clang-format\"" >&2
+    exit 1
+  fi
+
+  # clang-format is installed but found problems.
+  echo "clang-format findings:" >&2
+  "${COLORDIFF_BIN}" < "${clang_patch}"
+
+  echo "clang-format found issues in your patches from ${MR_ANCESTOR_SHA}" \
+    "to the current patch. Run \`./ci.sh lint | patch -p1\` from the base" \
+    "directory to apply them." >&2
+  exit 1
+}
+
+# Runs clang-tidy on the pending CLs. If the "all" argument is passed it runs
+# clang-tidy over all the source files instead.
+cmd_tidy() {
+  local what="${1:-}"
+
+  if [[ -z "${CLANG_TIDY_BIN}" ]]; then
+    echo "ERROR: You must install clang-tidy-7 or newer to use ci.sh tidy" >&2
+    exit 1
+  fi
+
+  local git_args=()
+  if [[ "${what}" == "all" ]]; then
+    git_args=(ls-files)
+    shift
+  else
+    merge_request_commits
+    git_args=(
+        diff-tree --no-commit-id --name-only -r "${MR_ANCESTOR_SHA}"
+        "${MR_HEAD_SHA}"
+    )
+  fi
+
+  # Clang-tidy needs the compilation database generated by cmake.
+  if [[ ! -e "${BUILD_DIR}/compile_commands.json" ]]; then
+    # Generate the build options in debug mode, since we need the debug asserts
+    # enabled for the clang-tidy analyzer to use them.
+    CMAKE_BUILD_TYPE="Debug"
+    cmake_configure
+    # Build the autogen targets to generate the .h files from the .ui files.
+    local autogen_targets=(
+        $(ninja -C "${BUILD_DIR}" -t targets | grep -F _autogen: |
+          cut -f 1 -d :)
+    )
+    if [[ ${#autogen_targets[@]} != 0 ]]; then
+      ninja -C "${BUILD_DIR}" "${autogen_targets[@]}"
+    fi
+  fi
+
+  cd "${MYDIR}"
+  local nprocs=$(nproc --all || echo 1)
+  local ret=0
+  if ! parallel -j"${nprocs}" --keep-order -- \
+      "${CLANG_TIDY_BIN}" -p "${BUILD_DIR}" -format-style=file -quiet "$@" {} \
+      < <(git "${git_args[@]}" | grep -E '(\.cc|\.cpp)$') \
+      >"${BUILD_DIR}/clang-tidy.txt"; then
+    ret=1
+  fi
+  { set +x; } 2>/dev/null
+  echo "Findings statistics:" >&2
+  grep -E ' \[[A-Za-z\.,\-]+\]' -o "${BUILD_DIR}/clang-tidy.txt" | sort \
+    | uniq -c >&2
+
+  if [[ $ret -ne 0 ]]; then
+    cat >&2 <<EOF
+Errors found, see ${BUILD_DIR}/clang-tidy.txt for details.
+To automatically fix them, run:
+
+  SKIP_TEST=1 ./ci.sh debug
+  ${CLANG_TIDY_BIN} -p ${BUILD_DIR} -fix -format-style=file -quiet $@ \$(git ${git_args[@]} | grep -E '(\.cc|\.cpp)\$')
+EOF
+  fi
+
+  return ${ret}
+}
+
+# Print stats about all the packages built in ${BUILD_DIR}/debs/.
+cmd_debian_stats() {
+  { set +x; } 2>/dev/null
+  local debsdir="${BUILD_DIR}/debs"
+  local f
+  while IFS='' read -r -d '' f; do
+    echo "====================================================================="
+    echo "Package $f:"
+    dpkg --info $f
+    dpkg --contents $f
+  done < <(find "${BUILD_DIR}/debs" -maxdepth 1 -mindepth 1 -type f \
+           -name '*.deb' -print0)
+}
+
+build_debian_pkg() {
+  local srcdir="$1"
+  local srcpkg="$2"
+
+  local debsdir="${BUILD_DIR}/debs"
+  local builddir="${debsdir}/${srcpkg}"
+
+  # debuild doesn't have an easy way to build out of tree, so we make a copy
+  # of with all symlinks on the first level.
+  mkdir -p "${builddir}"
+  for f in $(find "${srcdir}" -mindepth 1 -maxdepth 1 -printf '%P\n'); do
+    if [[ ! -L "${builddir}/$f" ]]; then
+      rm -f "${builddir}/$f"
+      ln -s "${srcdir}/$f" "${builddir}/$f"
+    fi
+  done
+  (
+    cd "${builddir}"
+    debuild -b -uc -us
+  )
+}
+
+cmd_debian_build() {
+  local srcpkg="${1:-}"
+
+  case "${srcpkg}" in
+    jpeg-xl)
+      build_debian_pkg "${MYDIR}" "jpeg-xl"
+      ;;
+    highway)
+      build_debian_pkg "${MYDIR}/third_party/highway" "highway"
+      ;;
+    *)
+      echo "ERROR: Must pass a valid source package name to build." >&2
+      ;;
+  esac
+}
+
+get_version() {
+  local varname=$1
+  local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1)
+  [[ -n "${line}" ]]
+  line="${line#set(${varname} }"
+  line="${line%)}"
+  echo "${line}"
+}
+
+cmd_bump_version() {
+  local newver="${1:-}"
+
+  if ! which dch >/dev/null; then
+    echo "Missing dch\nTo install it run:\n  sudo apt install devscripts"
+    exit 1
+  fi
+
+  if [[ -z "${newver}" ]]; then
+    local major=$(get_version JPEGXL_MAJOR_VERSION)
+    local minor=$(get_version JPEGXL_MINOR_VERSION)
+    local patch=0
+    minor=$(( ${minor}  + 1))
+  else
+    local major="${newver%%.*}"
+    newver="${newver#*.}"
+    local minor="${newver%%.*}"
+    newver="${newver#${minor}}"
+    local patch="${newver#.}"
+    if [[ -z "${patch}" ]]; then
+      patch=0
+    fi
+  fi
+
+  newver="${major}.${minor}.${patch}"
+
+  echo "Bumping version to ${newver} (${major}.${minor}.${patch})"
+  sed -E \
+    -e "s/(set\\(JPEGXL_MAJOR_VERSION) [0-9]+\\)/\\1 ${major})/" \
+    -e "s/(set\\(JPEGXL_MINOR_VERSION) [0-9]+\\)/\\1 ${minor})/" \
+    -e "s/(set\\(JPEGXL_PATCH_VERSION) [0-9]+\\)/\\1 ${patch})/" \
+    -i lib/CMakeLists.txt
+  sed -E \
+    -e "s/(LIBJXL_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}.${patch}/" \
+    -e "s/(LIBJXL_ABI_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}/" \
+    -i .github/workflows/conformance.yml
+
+  # Update lib.gni
+  tools/scripts/build_cleaner.py --update
+
+  # Mark the previous version as "unstable".
+  DEBCHANGE_RELEASE_HEURISTIC=log dch -M --distribution unstable --release ''
+  DEBCHANGE_RELEASE_HEURISTIC=log dch -M \
+    --newversion "${newver}" \
+    "Bump JPEG XL version to ${newver}."
+}
+
+# Check that the AUTHORS file contains the email of the committer.
+cmd_authors() {
+  merge_request_commits
+  local emails
+  local names
+  readarray -t emails < <(git log --format='%ae' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}")
+  readarray -t names < <(git log --format='%an' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}")
+  for i in "${!names[@]}"; do
+    echo "Checking name '${names[$i]}' with email '${emails[$i]}' ..."
+    "${MYDIR}"/tools/scripts/check_author.py "${emails[$i]}" "${names[$i]}"
+  done
+}
+
+main() {
+  local cmd="${1:-}"
+  if [[ -z "${cmd}" ]]; then
+    cat >&2 <<EOF
+Use: $0 CMD
+
+Where cmd is one of:
+ opt       Build and test a Release with symbols build.
+ debug     Build and test a Debug build (NDEBUG is not defined).
+ release   Build and test a striped Release binary without debug information.
+ asan      Build and test an ASan (AddressSanitizer) build.
+ msan      Build and test an MSan (MemorySanitizer) build. Needs to have msan
+           c++ libs installed with msan_install first.
+ tsan      Build and test a TSan (ThreadSanitizer) build.
+ asanfuzz  Build and test an ASan (AddressSanitizer) build for fuzzing.
+ msanfuzz  Build and test an MSan (MemorySanitizer) build for fuzzing.
+ test      Run the tests build by opt, debug, release, asan or msan. Useful when
+           building with SKIP_TEST=1.
+ gbench    Run the Google benchmark tests.
+ fuzz      Generate the fuzzer corpus and run the fuzzer on it. Useful after
+           building with asan or msan.
+ benchmark Run the benchmark over the default corpus.
+ fast_benchmark Run the benchmark over the small corpus.
+
+ coverage  Build and run tests with coverage support. Runs coverage_report as
+           well.
+ coverage_report Generate HTML, XML and text coverage report after a coverage
+           run.
+
+ lint      Run the linter checks on the current commit or merge request.
+ tidy      Run clang-tidy on the current commit or merge request.
+ authors   Check that the last commit's author is listed in the AUTHORS file.
+
+ msan_install Install the libc++ libraries required to build in msan mode. This
+              needs to be done once.
+
+ debian_build <srcpkg> Build the given source package.
+ debian_stats  Print stats about the built packages.
+
+oss-fuzz commands:
+ ossfuzz_asan   Build the local source inside oss-fuzz docker with asan.
+ ossfuzz_msan   Build the local source inside oss-fuzz docker with msan.
+ ossfuzz_ubsan  Build the local source inside oss-fuzz docker with ubsan.
+ ossfuzz_ninja  Run ninja on the BUILD_DIR inside the oss-fuzz docker. Extra
+                parameters are passed to ninja, for example "djxl_fuzzer" will
+                only build that ninja target. Use for faster build iteration
+                after one of the ossfuzz_*san commands.
+
+You can pass some optional environment variables as well:
+ - BUILD_DIR: The output build directory (by default "$$repo/build")
+ - BUILD_TARGET: The target triplet used when cross-compiling.
+ - CMAKE_FLAGS: Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS.
+ - CMAKE_PREFIX_PATH: Installation prefixes to be searched by the find_package.
+ - ENABLE_WASM_SIMD=1: enable experimental SIMD in WASM build (only).
+ - FUZZER_MAX_TIME: "fuzz" command fuzzer running timeout in seconds.
+ - LINT_OUTPUT: Path to the output patch from the "lint" command.
+ - SKIP_CPUSET=1: Skip modifying the cpuset in the arm_benchmark.
+ - SKIP_BUILD=1: Skip the build stage, cmake configure only.
+ - SKIP_TEST=1: Skip the test stage.
+ - STORE_IMAGES=0: Makes the benchmark discard the computed images.
+ - TEST_STACK_LIMIT: Stack size limit (ulimit -s) during tests, in KiB.
+ - TEST_SELECTOR: pass additional arguments to ctest, e.g. "-R .Resample.".
+ - STACK_SIZE=1: Generate binaries with the .stack_sizes sections.
+
+These optional environment variables are forwarded to the cmake call as
+parameters:
+ - CMAKE_BUILD_TYPE
+ - CMAKE_C_FLAGS
+ - CMAKE_CXX_FLAGS
+ - CMAKE_C_COMPILER_LAUNCHER
+ - CMAKE_CXX_COMPILER_LAUNCHER
+ - CMAKE_CROSSCOMPILING_EMULATOR
+ - CMAKE_FIND_ROOT_PATH
+ - CMAKE_EXE_LINKER_FLAGS
+ - CMAKE_MAKE_PROGRAM
+ - CMAKE_MODULE_LINKER_FLAGS
+ - CMAKE_SHARED_LINKER_FLAGS
+ - CMAKE_TOOLCHAIN_FILE
+
+Example:
+  BUILD_DIR=/tmp/build $0 opt
+EOF
+    exit 1
+  fi
+
+  cmd="cmd_${cmd}"
+  shift
+  set -x
+  "${cmd}" "$@"
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/cmake/FindAtomics.cmake b/third-party/libjxl/libjxl/cmake/FindAtomics.cmake
new file mode 100644
index 0000000000..9a6cdc39ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/cmake/FindAtomics.cmake
@@ -0,0 +1,53 @@
+# Original issue:
+# * https://gitlab.kitware.com/cmake/cmake/-/issues/23021#note_1098733
+#
+# For reference:
+# * https://gcc.gnu.org/wiki/Atomic/GCCMM
+#
+# riscv64 specific:
+# * https://lists.debian.org/debian-riscv/2022/01/msg00009.html
+#
+# ATOMICS_FOUND        - system has c++ atomics
+# ATOMICS_LIBRARIES    - libraries needed to use c++ atomics
+
+include(CheckCXXSourceCompiles)
+
+# RISC-V only has 32-bit and 64-bit atomic instructions. GCC is supposed
+# to convert smaller atomics to those larger ones via masking and
+# shifting like LLVM, but it’s a known bug that it does not. This means
+# anything that wants to use atomics on 1-byte or 2-byte types needs
+# -latomic, but not 4-byte or 8-byte (though it does no harm).
+set(atomic_code
+    "
+     #include <atomic>
+     #include <cstdint>
+     std::atomic<uint8_t> n8 (0); // riscv64
+     std::atomic<uint64_t> n64 (0); // armel, mipsel, powerpc
+     int main() {
+       ++n8;
+       ++n64;
+       return 0;
+     }")
+
+check_cxx_source_compiles("${atomic_code}" ATOMICS_LOCK_FREE_INSTRUCTIONS)
+
+if(ATOMICS_LOCK_FREE_INSTRUCTIONS)
+  set(ATOMICS_FOUND TRUE)
+  set(ATOMICS_LIBRARIES)
+else()
+  set(CMAKE_REQUIRED_LIBRARIES "-latomic")
+  check_cxx_source_compiles("${atomic_code}" ATOMICS_IN_LIBRARY)
+  set(CMAKE_REQUIRED_LIBRARIES)
+  if(ATOMICS_IN_LIBRARY)
+    set(ATOMICS_LIBRARY atomic)
+    include(FindPackageHandleStandardArgs)
+    find_package_handle_standard_args(Atomics DEFAULT_MSG ATOMICS_LIBRARY)
+    set(ATOMICS_LIBRARIES ${ATOMICS_LIBRARY})
+    unset(ATOMICS_LIBRARY)
+  else()
+    if(Atomics_FIND_REQUIRED)
+      message(FATAL_ERROR "Neither lock free instructions nor -latomic found.")
+    endif()
+  endif()
+endif()
+unset(atomic_code)
diff --git a/third-party/libjxl/libjxl/cmake/FindBrotli.cmake b/third-party/libjxl/libjxl/cmake/FindBrotli.cmake
new file mode 100644
index 0000000000..9fb78e47d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/cmake/FindBrotli.cmake
@@ -0,0 +1,75 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set(brlibs brotlicommon brotlienc brotlidec)
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+  foreach(brlib IN ITEMS ${brlibs})
+    string(TOUPPER "${brlib}" BRPREFIX)
+    pkg_check_modules("PC_${BRPREFIX}" lib${brlib})
+  endforeach()
+endif()
+
+find_path(BROTLI_INCLUDE_DIR
+  NAMES brotli/decode.h
+  HINTS ${PC_BROTLICOMMON_INCLUDEDIR} ${PC_BROTLICOMMON_INCLUDE_DIRS}
+)
+
+foreach(brlib IN ITEMS ${brlibs})
+  string(TOUPPER "${brlib}" BRPREFIX)
+  find_library(${BRPREFIX}_LIBRARY
+    NAMES ${${BRPREFIX}_NAMES} ${brlib}
+    HINTS ${PC_${BRPREFIX}_LIBDIR} ${PC_${BRPREFIX}_LIBRARY_DIRS}
+  )
+
+  if (${BRPREFIX}_LIBRARY AND NOT TARGET ${brlib})
+    if(CMAKE_VERSION VERSION_LESS "3.13.5")
+    add_library(${brlib} INTERFACE IMPORTED GLOBAL)
+      set_property(TARGET ${brlib} PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${BROTLI_INCLUDE_DIR})
+      target_link_libraries(${brlib} INTERFACE ${${BRPREFIX}_LIBRARY})
+      set_property(TARGET ${brlib} PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_${BRPREFIX}_CFLAGS_OTHER})
+    else()
+    add_library(${brlib} INTERFACE IMPORTED GLOBAL)
+      target_include_directories(${brlib}
+        INTERFACE ${BROTLI_INCLUDE_DIR})
+      target_link_libraries(${brlib}
+        INTERFACE ${${BRPREFIX}_LIBRARY})
+      target_link_options(${brlib}
+        INTERFACE ${PC_${BRPREFIX}_LDFLAGS_OTHER})
+      target_compile_options(${brlib}
+        INTERFACE ${PC_${BRPREFIX}_CFLAGS_OTHER})
+    endif()
+  endif()
+endforeach()
+
+if (BROTLICOMMON_FOUND AND BROTLIENC_FOUND AND BROTLIDEC_FOUND)
+  set(Brotli_FOUND ON)
+else ()
+  set(Brotli_FOUND OFF)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Brotli
+  FOUND_VAR Brotli_FOUND
+  REQUIRED_VARS
+    BROTLI_INCLUDE_DIR
+    BROTLICOMMON_LIBRARY
+    BROTLIENC_LIBRARY
+    BROTLIDEC_LIBRARY
+  VERSION_VAR Brotli_VERSION
+)
+
+mark_as_advanced(
+  BROTLI_INCLUDE_DIR
+  BROTLICOMMON_LIBRARY
+  BROTLIENC_LIBRARY
+  BROTLIDEC_LIBRARY
+)
+
+if (Brotli_FOUND)
+  set(Brotli_LIBRARIES ${BROTLICOMMON_LIBRARY} ${BROTLIENC_LIBRARY} ${BROTLIDEC_LIBRARY})
+  set(Brotli_INCLUDE_DIRS ${BROTLI_INCLUDE_DIR})
+endif()
diff --git a/third-party/libjxl/libjxl/cmake/FindHWY.cmake b/third-party/libjxl/libjxl/cmake/FindHWY.cmake
new file mode 100644
index 0000000000..c1deb9b851
--- /dev/null
+++ b/third-party/libjxl/libjxl/cmake/FindHWY.cmake
@@ -0,0 +1,66 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+  pkg_check_modules(PC_HWY QUIET libhwy)
+  set(HWY_VERSION ${PC_HWY_VERSION})
+endif ()
+
+find_path(HWY_INCLUDE_DIR
+  NAMES hwy/highway.h
+  HINTS ${PC_HWY_INCLUDEDIR} ${PC_HWY_INCLUDE_DIRS}
+)
+
+find_library(HWY_LIBRARY
+  NAMES ${HWY_NAMES} hwy
+  HINTS ${PC_HWY_LIBDIR} ${PC_HWY_LIBRARY_DIRS}
+)
+
+if (HWY_INCLUDE_DIR AND NOT HWY_VERSION)
+  if (EXISTS "${HWY_INCLUDE_DIR}/hwy/highway.h")
+    file(READ "${HWY_INCLUDE_DIR}/hwy/highway.h" HWY_VERSION_CONTENT)
+
+    string(REGEX MATCH "#define HWY_MAJOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+    set(HWY_VERSION_MAJOR "${CMAKE_MATCH_1}")
+
+    string(REGEX MATCH "#define +HWY_MINOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+    set(HWY_VERSION_MINOR "${CMAKE_MATCH_1}")
+
+    string(REGEX MATCH "#define +HWY_PATCH +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+    set(HWY_VERSION_PATCH "${CMAKE_MATCH_1}")
+
+    set(HWY_VERSION "${HWY_VERSION_MAJOR}.${HWY_VERSION_MINOR}.${HWY_VERSION_PATCH}")
+  endif ()
+endif ()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(HWY
+  FOUND_VAR HWY_FOUND
+  REQUIRED_VARS HWY_LIBRARY HWY_INCLUDE_DIR
+  VERSION_VAR HWY_VERSION
+)
+
+if (HWY_LIBRARY AND NOT TARGET hwy)
+  add_library(hwy INTERFACE IMPORTED GLOBAL)
+
+  if(CMAKE_VERSION VERSION_LESS "3.13.5")
+    set_property(TARGET hwy PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${HWY_INCLUDE_DIR})
+    target_link_libraries(hwy INTERFACE ${HWY_LIBRARY})
+    set_property(TARGET hwy PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_HWY_CFLAGS_OTHER})
+  else()
+    target_include_directories(hwy INTERFACE ${HWY_INCLUDE_DIR})
+    target_link_libraries(hwy INTERFACE ${HWY_LIBRARY})
+    target_link_options(hwy INTERFACE ${PC_HWY_LDFLAGS_OTHER})
+    target_compile_options(hwy INTERFACE ${PC_HWY_CFLAGS_OTHER})
+  endif()
+endif()
+
+mark_as_advanced(HWY_INCLUDE_DIR HWY_LIBRARY)
+
+if (HWY_FOUND)
+    set(HWY_LIBRARIES ${HWY_LIBRARY})
+    set(HWY_INCLUDE_DIRS ${HWY_INCLUDE_DIR})
+endif ()
diff --git a/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake b/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake
new file mode 100644
index 0000000000..0a7b54eb96
--- /dev/null
+++ b/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake
@@ -0,0 +1,59 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+  pkg_check_modules(PC_LCMS2 QUIET libLCMS2)
+  set(LCMS2_VERSION ${PC_LCMS2_VERSION})
+endif ()
+
+find_path(LCMS2_INCLUDE_DIR
+  NAMES lcms2.h
+  HINTS ${PC_LCMS2_INCLUDEDIR} ${PC_LCMS2_INCLUDE_DIRS}
+)
+
+find_library(LCMS2_LIBRARY
+  NAMES ${LCMS2_NAMES} lcms2 liblcms2 lcms-2 liblcms-2
+  HINTS ${PC_LCMS2_LIBDIR} ${PC_LCMS2_LIBRARY_DIRS}
+)
+
+if (LCMS2_INCLUDE_DIR AND NOT LCMS_VERSION)
+    file(READ ${LCMS2_INCLUDE_DIR}/lcms2.h LCMS2_VERSION_CONTENT)
+    string(REGEX MATCH "#define[ \t]+LCMS_VERSION[ \t]+([0-9]+)[ \t]*\n" LCMS2_VERSION_MATCH ${LCMS2_VERSION_CONTENT})
+    if (LCMS2_VERSION_MATCH)
+        string(SUBSTRING ${CMAKE_MATCH_1} 0 1 LCMS2_VERSION_MAJOR)
+        string(SUBSTRING ${CMAKE_MATCH_1} 1 2 LCMS2_VERSION_MINOR)
+        set(LCMS2_VERSION "${LCMS2_VERSION_MAJOR}.${LCMS2_VERSION_MINOR}")
+    endif ()
+endif ()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(LCMS2
+  FOUND_VAR LCMS2_FOUND
+  REQUIRED_VARS LCMS2_LIBRARY LCMS2_INCLUDE_DIR
+  VERSION_VAR LCMS2_VERSION
+)
+
+if (LCMS2_LIBRARY AND NOT TARGET lcms2)
+  add_library(lcms2 INTERFACE IMPORTED GLOBAL)
+
+  if(CMAKE_VERSION VERSION_LESS "3.13.5")
+    set_property(TARGET lcms2 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LCMS2_INCLUDE_DIR})
+    target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY})
+    set_property(TARGET lcms2 PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_LCMS2_CFLAGS_OTHER})
+  else()
+    target_include_directories(lcms2 INTERFACE ${LCMS2_INCLUDE_DIR})
+    target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY})
+    target_link_options(lcms2 INTERFACE ${PC_LCMS2_LDFLAGS_OTHER})
+    target_compile_options(lcms2 INTERFACE ${PC_LCMS2_CFLAGS_OTHER})
+  endif()
+endif()
+
+mark_as_advanced(LCMS2_INCLUDE_DIR LCMS2_LIBRARY)
+
+if (LCMS2_FOUND)
+    set(LCMS2_LIBRARIES ${LCMS2_LIBRARY})
+    set(LCMS2_INCLUDE_DIRS ${LCMS2_INCLUDE_DIR})
+endif ()
diff --git a/third-party/libjxl/libjxl/debian/changelog b/third-party/libjxl/libjxl/debian/changelog
new file mode 100644
index 0000000000..6fbaddf68a
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/changelog
@@ -0,0 +1,95 @@
+jpeg-xl (0.9.0) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.9.0.
+
+ -- JPEG XL Maintainers <jpegxl@google.com>  Wed, 11 Jan 2023 16:12:35 +0000
+
+jpeg-xl (0.8) unstable; urgency=medium
+
+  * Bump JPEG XL version to 0.8.
+
+ -- JPEG XL Maintainers <jpegxl@google.com>  Wed, 11 Jan 2023 16:12:34 +0000
+
+jpeg-xl (0.7) unstable; urgency=medium
+
+  * Bump JPEG XL version to 0.7.
+
+ -- JPEG XL Maintainers <jpegxl@google.com>  Mon, 08 Aug 2022 14:43:58 +0000
+
+jpeg-xl (0.6) unstable; urgency=medium
+
+  * Bump JPEG XL version to 0.6.
+
+ -- JPEG XL Maintainers <jpegxl@google.com>  Fri, 10 Sep 2021 16:08:17 +0200
+
+jpeg-xl (0.5.0) unstable; urgency=medium
+
+  * Bump JPEG XL version to 0.5.0.
+
+ -- JPEG XL Maintainers <jpegxl@google.com>  Thu, 12 Aug 2021 23:49:40 +0200
+
+jpeg-xl (0.3.7) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.7.
+
+ -- Sami Boukortt <sboukortt@google.com>  Mon, 29 Mar 2021 12:14:20 +0200
+
+jpeg-xl (0.3.6) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.6.
+
+ -- Sami Boukortt <sboukortt@google.com>  Thu, 25 Mar 2021 17:40:58 +0100
+
+jpeg-xl (0.3.5) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.5.
+
+ -- Sami Boukortt <sboukortt@google.com>  Tue, 23 Mar 2021 15:20:44 +0100
+
+jpeg-xl (0.3.4) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.4.
+
+ -- Sami Boukortt <sboukortt@google.com>  Tue, 16 Mar 2021 12:13:59 +0100
+
+jpeg-xl (0.3.3) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.3.
+
+ -- Sami Boukortt <sboukortt@google.com>  Fri, 5 Mar 2021 19:15:26 +0100
+
+jpeg-xl (0.3.2) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.2.
+
+ -- Alex Deymo <deymo@google.com>  Fri, 12 Feb 2021 21:00:12 +0100
+
+jpeg-xl (0.3.1) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.1.
+
+ -- Alex Deymo <deymo@google.com>  Tue, 09 Feb 2021 09:48:43 +0100
+
+jpeg-xl (0.3) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.3.
+
+ -- Alex Deymo <deymo@google.com>  Wed, 27 Jan 2021 22:36:32 +0100
+
+jpeg-xl (0.2) UNRELEASED; urgency=medium
+
+  * Bump JPEG XL version to 0.2.
+
+ -- Alex Deymo <deymo@google.com>  Wed, 23 Nov 2020 20:42:10 +0100
+
+jpeg-xl (0.1) UNRELEASED; urgency=medium
+
+  * JPEG XL format release candidate.
+
+ -- Alex Deymo <deymo@google.com>  Fri, 13 Nov 2020 17:42:24 +0100
+
+jpeg-xl (0.0.2-1) UNRELEASED; urgency=medium
+
+  * Initial debian package.
+
+ -- Alex Deymo <deymo@google.com>  Tue, 27 Oct 2020 15:27:59 +0100
diff --git a/third-party/libjxl/libjxl/debian/compat b/third-party/libjxl/libjxl/debian/compat
new file mode 100644
index 0000000000..f599e28b8a
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/compat
@@ -0,0 +1 @@
+10
diff --git a/third-party/libjxl/libjxl/debian/control b/third-party/libjxl/libjxl/debian/control
new file mode 100644
index 0000000000..f5dc5ce0cc
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/control
@@ -0,0 +1,88 @@
+Source: jpeg-xl
+Maintainer: JPEG XL Maintainers <jpegxl@google.com>
+Section: misc
+Priority: optional
+Standards-Version: 3.9.8
+Build-Depends:
+ asciidoc,
+ cmake,
+ debhelper (>= 9),
+ libbrotli-dev,
+ libgdk-pixbuf-2.0-dev | libgdk-pixbuf2.0-dev,
+ libgif-dev,
+ libgimp2.0-dev,
+ libgmock-dev,
+ libgoogle-perftools-dev,
+ libgtest-dev,
+ libhwy-dev (>= 1.0.0),
+ libjpeg-dev,
+ libopenexr-dev,
+ libpng-dev,
+ libwebp-dev,
+ pkg-config,
+ xdg-utils,
+ xmlto,
+Homepage: https://github.com/libjxl/libjxl
+Rules-Requires-Root: no
+
+Package: jxl
+Architecture: any
+Section: utils
+Depends: ${misc:Depends}, ${shlibs:Depends}
+Description: JPEG XL Image Coding System - "JXL" (command line utility)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs the command line utilities.
+
+Package: libjxl-dev
+Architecture: any
+Section: libdevel
+Depends: libjxl (= ${binary:Version}), ${misc:Depends}
+ libhwy-dev,
+Description: JPEG XL Image Coding System - "JXL" (development files)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs development files.
+
+Package: libjxl
+Architecture: any
+Multi-Arch: same
+Section: libs
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Description: JPEG XL Image Coding System - "JXL" (shared libraries)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs shared libraries.
+
+Package: libjxl-gdk-pixbuf
+Architecture: any
+Multi-Arch: same
+Section: libs
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Description: JPEG XL Plugin for gdk-pixbuf
+ This package installs the required files for reading JPEG XL files in
+ GTK applications.
+
+Package: libjxl-gimp-plugin
+Architecture: any
+Multi-Arch: same
+Section: graphics
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Enhances: gimp
+Description: JPEG XL Import and Export Plugin for GIMP
+ This is a plugin for GIMP version 2.10.x to import and export JPEG XL images.
diff --git a/third-party/libjxl/libjxl/debian/copyright b/third-party/libjxl/libjxl/debian/copyright
new file mode 100644
index 0000000000..7786a8775b
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/copyright
@@ -0,0 +1,199 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: jpeg-xl
+
+Files: *
+Copyright: 2020 the JPEG XL Project
+License: BSD-3-clause
+
+Files: third_party/libjpeg-turbo/*
+Copyright (C)2009-2023 D. R. Commander. All Rights Reserved.
+Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
+License: BSD-3-clause
+
+Files: third_party/sjpeg/*
+Copyright: 2017 Google, Inc
+License: Apache-2.0
+
+Files: third_party/skcms/*
+Copyright: 2018 Google Inc.
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ .
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Files: testdata/external/pngsuite/*
+Copyright: Willem van Schaik, 1996, 2011
+License: PngSuite License
+ See http://www.schaik.com/pngsuite/ for details.
+ .
+ Permission to use, copy, modify and distribute these images for any
+ purpose and without fee is hereby granted.
+
+Files: testdata/external/raw.pixls/*
+Copyright: their respective owners listed in https://raw.pixls.us/
+License: CC0-1.0
+
+Files: testdata/external/wesaturate/*
+Copyright: their respective owners listed in https://www.wesaturate.com/
+License: CC0-1.0
+
+Files: testdata/external/wide-gamut-tests/
+Copyright: github.com/codelogic/wide-gamut-tests authors.
+License: Apache-2.0
+
+License: Apache-2.0
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+      http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ .
+ On Debian systems, the complete text of the Apache License, Version 2
+ can be found in "/usr/share/common-licenses/Apache-2.0".
+
+License: CC0
+ Creative Commons Zero v1.0 Universal
+ .
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL
+ SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT
+ RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS"
+ BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS
+ DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS
+ LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE
+ INFORMATION OR WORKS PROVIDED HEREUNDER.
+ .
+ Statement of Purpose
+ .
+ The laws of most jurisdictions throughout the world automatically confer
+ exclusive Copyright and Related Rights (defined below) upon the creator and
+ subsequent owner(s) (each and all, an "owner") of an original work of
+ authorship and/or a database (each, a "Work").
+ .
+ Certain owners wish to permanently relinquish those rights to a Work for the
+ purpose of contributing to a commons of creative, cultural and scientific
+ works ("Commons") that the public can reliably and without fear of later
+ claims of infringement build upon, modify, incorporate in other works, reuse
+ and redistribute as freely as possible in any form whatsoever and for any
+ purposes, including without limitation commercial purposes. These owners may
+ contribute to the Commons to promote the ideal of a free culture and the
+ further production of creative, cultural and scientific works, or to gain
+ reputation or greater distribution for their Work in part through the use
+ and efforts of others.
+ .
+ For these and/or other purposes and motivations, and without any expectation
+ of additional consideration or compensation, the person associating CC0 with
+ a Work (the "Affirmer"), to the extent that he or she is an owner of
+ Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to
+ the Work and publicly distribute the Work under its terms, with knowledge of
+ his or her Copyright and Related Rights in the Work and the meaning and
+ intended legal effect of CC0 on those rights.
+ .
+ 1. Copyright and Related Rights. A Work made available under CC0 may be
+ protected by copyright and related or neighboring rights ("Copyright and
+ Related Rights"). Copyright and Related Rights include, but are not limited
+ to, the following:
+   i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+   ii. moral rights retained by the original author(s) and/or performer(s);
+   iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+   iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+   v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+   vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+   vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national implementations
+ thereof.
+ .
+ 2. Waiver. To the greatest extent permitted by, but not in contravention of,
+ applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+ unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+ and Related Rights and associated claims and causes of action, whether now
+ known or unknown (including existing as well as future claims and causes of
+ action), in the Work (i) in all territories worldwide, (ii) for the maximum
+ duration provided by applicable law or treaty (including future time
+ extensions), (iii) in any current or future medium and for any number of
+ copies, and (iv) for any purpose whatsoever, including without limitation
+ commercial, advertising or promotional purposes (the "Waiver"). Affirmer
+ makes the Waiver for the benefit of each member of the public at large and
+ to the detriment of Affirmer's heirs and successors, fully intending that
+ such Waiver shall not be subject to revocation, rescission, cancellation,
+ termination, or any other legal or equitable action to disrupt the quiet
+ enjoyment of the Work by the public as contemplated by Affirmer's express
+ Statement of Purpose.
+ .
+ 3. Public License Fallback. Should any part of the Waiver for any reason be
+ judged legally invalid or ineffective under applicable law, then the Waiver
+ shall be preserved to the maximum extent permitted taking into account
+ Affirmer's express Statement of Purpose. In addition, to the extent the
+ Waiver is so judged Affirmer hereby grants to each affected person a
+ royalty-free, non transferable, non sublicensable, non exclusive,
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
+ maximum duration provided by applicable law or treaty (including future time
+ extensions), (iii) in any current or future medium and for any number of
+ copies, and (iv) for any purpose whatsoever, including without limitation
+ commercial, advertising or promotional purposes (the "License"). The License
+ shall be deemed effective as of the date CC0 was applied by Affirmer to the
+ Work. Should any part of the License for any reason be judged legally
+ invalid or ineffective under applicable law, such partial invalidity or
+ ineffectiveness shall not invalidate the remainder of the License, and in
+ such case Affirmer hereby affirms that he or she will not (i) exercise any
+ of his or her remaining Copyright and Related Rights in the Work or (ii)
+ assert any associated claims and causes of action with respect to the Work,
+ in either case contrary to Affirmer's express Statement of Purpose.
+ .
+ 4. Limitations and Disclaimers.
+   a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+   b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied, statutory or
+ otherwise, including without limitation warranties of title,
+ merchantability, fitness for a particular purpose, non infringement, or the
+ absence of latent or other defects, accuracy, or the present or absence of
+ errors, whether or not discoverable, all to the greatest extent permissible
+ under applicable law.
+   c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+   d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+ .
+ For more information, please see:
+ http://creativecommons.org/publicdomain/zero/1.0/>
+
diff --git a/third-party/libjxl/libjxl/debian/jxl.install b/third-party/libjxl/libjxl/debian/jxl.install
new file mode 100644
index 0000000000..c3bae3ed10
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/jxl.install
@@ -0,0 +1,3 @@
+usr/bin/*
+usr/share/man/man1/cjxl.1
+usr/share/man/man1/djxl.1
diff --git a/third-party/libjxl/libjxl/debian/libjxl-dev.install b/third-party/libjxl/libjxl/debian/libjxl-dev.install
new file mode 100644
index 0000000000..b735ec2c26
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/libjxl-dev.install
@@ -0,0 +1,4 @@
+usr/include/jxl/*.h
+usr/lib/*/*.a
+usr/lib/*/*.so
+usr/lib/*/pkgconfig/*.pc
diff --git a/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install b/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install
new file mode 100644
index 0000000000..12d2ab250f
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install
@@ -0,0 +1,3 @@
+usr/lib/*/gdk-pixbuf-*/*/loaders/*
+usr/share/mime/packages/image-jxl.xml
+usr/share/thumbnailers/jxl.thumbnailer
diff --git a/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install b/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install
new file mode 100644
index 0000000000..353431dba3
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install
@@ -0,0 +1 @@
+usr/lib/gimp
diff --git a/third-party/libjxl/libjxl/debian/libjxl.install b/third-party/libjxl/libjxl/debian/libjxl.install
new file mode 100644
index 0000000000..cd157a7a5c
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/libjxl.install
@@ -0,0 +1 @@
+usr/lib/*/libjxl*.so.*
diff --git a/third-party/libjxl/libjxl/debian/rules b/third-party/libjxl/libjxl/debian/rules
new file mode 100755
index 0000000000..6259dbfc61
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/rules
@@ -0,0 +1,21 @@
+#!/usr/bin/make -f
+
+include /usr/share/dpkg/pkg-info.mk
+
+%:
+	dh $@ --buildsystem=cmake
+
+override_dh_auto_configure:
+	# TODO(deymo): Remove the DCMAKE_BUILD_TYPE once builds without NDEBUG
+	# are as useful as Release builds.
+        # TODO(szabadka) Re-enable jpegli after tests are fixed on Ubuntu 20.04,
+        # and debian:buster
+	dh_auto_configure -- \
+	  -DJPEGXL_VERSION=$(DEB_VERSION) \
+	  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+	  -DJPEGXL_FORCE_SYSTEM_GTEST=ON \
+	  -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+	  -DJPEGXL_FORCE_SYSTEM_HWY=ON \
+	  -DJPEGXL_ENABLE_JPEGLI=OFF \
+	  -DJPEGXL_ENABLE_JPEGLI_LIBJPEG=OFF \
+	  -DJPEGXL_ENABLE_PLUGINS=ON
diff --git a/third-party/libjxl/libjxl/debian/source/format b/third-party/libjxl/libjxl/debian/source/format
new file mode 100644
index 0000000000..163aaf8d82
--- /dev/null
+++ b/third-party/libjxl/libjxl/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/third-party/libjxl/libjxl/deps.sh b/third-party/libjxl/libjxl/deps.sh
new file mode 100755
index 0000000000..cfca027fcb
--- /dev/null
+++ b/third-party/libjxl/libjxl/deps.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file downloads the dependencies needed to build JPEG XL into third_party.
+# These dependencies are normally pulled by gtest.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+# Git revisions we use for the given submodules. Update these whenever you
+# update a git submodule.
+THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727"
+THIRD_PARTY_HIGHWAY="591ad359a5aa6c320951ebd35f839604c87abe6c"
+THIRD_PARTY_SKCMS="b25b07b4b07990811de121c0356155b2ba0f4318"
+THIRD_PARTY_SJPEG="e5ab13008bb214deb66d5f3e17ca2f8dbff150bf"
+THIRD_PARTY_ZLIB="cacf7f1d4e3d44d871b605da3b647f07d718623f"
+THIRD_PARTY_LIBPNG="a40189cf881e9f0db80511c382292a5604c3c3d1"
+THIRD_PARTY_LIBJPEG_TURBO="8ecba3647edb6dd940463fedf38ca33a8e2a73d1" # 2.1.5.1
+
+# Download the target revision from GitHub.
+download_github() {
+  local path="$1"
+  local project="$2"
+
+  local varname=`echo "$path" | tr '[:lower:]' '[:upper:]'`
+  varname="${varname/[\/-]/_}"
+  local sha
+  eval "sha=\${${varname}}"
+
+  local down_dir="${MYDIR}/downloads"
+  local local_fn="${down_dir}/${sha}.tar.gz"
+  if [[ -e "${local_fn}" && -d "${MYDIR}/${path}" ]]; then
+    echo "${path} already up to date." >&2
+    return 0
+  fi
+
+  local url
+  local strip_components=0
+  if [[ "${project:0:4}" == "http" ]]; then
+    # "project" is a googlesource.com base url.
+    url="${project}${sha}.tar.gz"
+  else
+    # GitHub files have a top-level directory
+    strip_components=1
+    url="https://github.com/${project}/tarball/${sha}"
+  fi
+
+  echo "Downloading ${path} version ${sha}..." >&2
+  mkdir -p "${down_dir}"
+  curl -L --show-error -o "${local_fn}.tmp" "${url}"
+  mkdir -p "${MYDIR}/${path}"
+  tar -zxf "${local_fn}.tmp" -C "${MYDIR}/${path}" \
+    --strip-components="${strip_components}"
+  mv "${local_fn}.tmp" "${local_fn}"
+}
+
+is_git_repository() {
+    local dir="$1"
+    local toplevel=$(git rev-parse --show-toplevel)
+
+    [[ "${dir}" == "${toplevel}" ]]
+}
+
+
+main() {
+  if is_git_repository "${MYDIR}"; then
+    cat >&2 <<EOF
+Current directory is a git repository, downloading dependencies via git:
+
+  git submodule update --init --recursive
+
+EOF
+    git -C "${MYDIR}" submodule update --init --recursive --depth 1 --recommend-shallow
+    return 0
+  fi
+
+  # Sources downloaded from a tarball.
+  download_github third_party/brotli google/brotli
+  download_github third_party/highway google/highway
+  download_github third_party/sjpeg webmproject/sjpeg
+  download_github third_party/skcms \
+    "https://skia.googlesource.com/skcms/+archive/"
+  download_github third_party/zlib madler/zlib
+  download_github third_party/libpng glennrp/libpng
+  download_github third_party/libjpeg-turbo libjpeg-turbo/libjpeg-turbo
+  echo "Done."
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/doc/api.txt b/third-party/libjxl/libjxl/doc/api.txt
new file mode 100644
index 0000000000..ed3f939acb
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/api.txt
@@ -0,0 +1,29 @@
+/* This document is meant for Doxygen use only. If you are looking for the API
+ * documentation generate it with `./ci.sh release` and look under the
+ * build/html directory.
+ *
+ * This file documents all the groups and defines the order in which they appear
+ * in Doxygen. Define the @defgroup commands here and use @addtogroup anywhere
+ * else.
+ */
+
+/**
+@defgroup libjxl JPEG XL library (libjxl)
+@brief The main JPEG XL decoder / encoder library.
+
+@addtogroup libjxl
+@{
+
+@defgroup libjxl_decoder JPEG XL Decoder
+
+@defgroup libjxl_encoder JPEG XL Encoder
+
+@defgroup libjxl_common JPEG XL common definitions
+
+@defgroup libjxl_butteraugli Butteraugli metric
+
+@}
+
+@defgroup libjxl_threads JPEG XL Multi-thread library (libjxl_threads)
+@brief Additional multi-threaded implementations for the parallel runner.
+*/
diff --git a/third-party/libjxl/libjxl/doc/benchmarking.md b/third-party/libjxl/libjxl/doc/benchmarking.md
new file mode 100644
index 0000000000..bac3200b08
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/benchmarking.md
@@ -0,0 +1,82 @@
+# Benchmarking
+
+For speed benchmarks on single images in single or multi-threaded decoding
+`djxl` can print decoding speed information. See `djxl --help` for details
+on the decoding options and note that the output image is optional for
+benchmarking purposes.
+
+For a more comprehensive comparison of compression density between multiple
+options, the tool `benchmark_xl` can be used (see below).
+
+## Benchmarking with benchmark_xl
+
+We recommend `build/tools/benchmark_xl` as a convenient method for reading
+images or image sequences, encoding them using various codecs (jpeg jxl png
+webp), decoding the result, and computing objective quality metrics. An example
+invocation is:
+
+```bash
+build/tools/benchmark_xl --input "/path/*.png" --codec jxl:wombat:d1,jxl:cheetah:d2
+```
+
+Multiple comma-separated codecs are allowed. The characters after : are
+parameters for the codec, separated by colons, in this case specifying maximum
+target psychovisual distances of 1 and 2 (higher implies lower quality) and
+the encoder effort (see below). Other common parameters are `r0.5` (target
+bitrate 0.5 bits per pixel) and `q92` (quality 92, on a scale of 0-100, where
+higher is better). The `jxl` codec supports the following additional parameters:
+
+Speed: `lightning`, `thunder`, `falcon`, `cheetah`, `hare`, `wombat`, `squirrel`,
+`kitten`, `tortoise` control the encoder effort in ascending order. This also
+affects memory usage: using lower effort will typically reduce memory consumption
+during encoding.
+
+*   `lightning` and `thunder` are fast modes useful for lossless mode (modular).
+*   `falcon` disables all of the following tools.
+*   `cheetah` enables coefficient reordering, context clustering, and heuristics
+    for selecting DCT sizes and quantization steps.
+*   `hare` enables Gaborish filtering, chroma from luma, and an initial estimate
+    of quantization steps.
+*   `wombat` enables error diffusion quantization and full DCT size selection
+    heuristics.
+*   `squirrel` (default) enables dots, patches, and spline detection, and full
+    context clustering.
+*   `kitten` optimizes the adaptive quantization for a psychovisual metric.
+*   `tortoise` enables a more thorough adaptive quantization search.
+
+Mode: JPEG XL has two modes. The default is Var-DCT mode, which is suitable for
+lossy compression. The other mode is Modular mode, which is suitable for lossless
+compression. Modular mode can also do lossy compression (e.g. `jxl:m:q50`).
+
+*   `m` activates modular mode.
+
+Other arguments to benchmark_xl include:
+
+*   `--save_compressed`: save codestreams to `output_dir`.
+*   `--save_decompressed`: save decompressed outputs to `output_dir`.
+*   `--output_extension`: selects the format used to output decoded images.
+*   `--num_threads`: number of codec instances that will independently
+    encode/decode images, or 0.
+*   `--inner_threads`: how many threads each instance should use for parallel
+    encoding/decoding, or 0.
+*   `--encode_reps`/`--decode_reps`: how many times to repeat encoding/decoding
+    each image, for more consistent measurements (we recommend 10).
+
+The benchmark output begins with a header:
+
+```
+Compr              Input    Compr            Compr       Compr  Decomp  Butteraugli
+Method            Pixels     Size              BPP   #    MP/s    MP/s     Distance    Error p norm           BPP*pnorm   Errors
+```
+
+`ComprMethod` lists each each comma-separated codec. `InputPixels` is the number
+of pixels in the input image. `ComprSize` is the codestream size in bytes and
+`ComprBPP` the bitrate. `Compr MP/s` and `Decomp MP/s` are the
+compress/decompress throughput, in units of Megapixels/second.
+`Butteraugli Distance` indicates the maximum psychovisual error in the decoded
+image (larger is worse). `Error p norm` is a similar summary of the psychovisual
+error, but closer to an average, giving less weight to small low-quality
+regions. `BPP*pnorm` is the product of `ComprBPP` and `Error p norm`, which is a
+figure of merit for the codec (lower is better). `Errors` is nonzero if errors
+occurred while loading or encoding/decoding the image.
+
diff --git a/third-party/libjxl/libjxl/doc/building_and_testing.md b/third-party/libjxl/libjxl/doc/building_and_testing.md
new file mode 100644
index 0000000000..b19cf83b9c
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/building_and_testing.md
@@ -0,0 +1,166 @@
+# Building and Testing
+
+This file describes the building and testing facilities provided by the `ci.sh`
+script. It assumes you already have the build environment set up.
+
+## Basic building
+
+To build the JPEG XL software and run its unit tests, run:
+
+```bash
+./ci.sh release
+```
+
+## Testing
+
+`./ci.sh` build commands including `release`, `opt`, etc. will also run tests.
+You can set the environment variable `SKIP_TEST=1` to skip this.
+
+It is possible to manually run all the tests in parallel in all your CPUs with
+the command:
+
+```bash
+./ci.sh test
+```
+
+It is also possible for faster iteration to run a specific test binary directly.
+Tests are run with the `ctest` command and arguments passed to `ci.sh test` are
+forwarded to `ctest` with the appropriate environment variables set. For
+example, to list all the available tests you can run:
+
+```bash
+./ci.sh test -N
+```
+
+To run a specific test from the list or actually a set of tests matching a
+regular expression you can use `ctest`'s parameter `-R`:
+
+```bash
+./ci.sh test -R ^MyPrefixTe
+```
+
+That command would run any test whose name that starts with `MyPrefixTe`. For
+more options run `ctest --help`, for example, you can pass `-j1` if you want
+to run only one test at a time instead of our default of multiple tests in
+parallel.
+
+## Other commands
+
+Running `./ci.sh` with no parameters shows a list of available commands. For
+example, you can run `opt` for optimized developer builds with symbols or
+`debug` for debug builds which do not have NDEBUG defined and therefore include
+more runtime debug information.
+
+### Cross-compiling
+
+To compile the code for an architecture different than the one you are running
+you can pass a
+[toolchain file](https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html)
+to cmake if you have one for your target, or you can use the `BUILD_TARGET`
+environment variable in `./ci.sh`. For some targets such the Windows targets
+`ci.sh` sets up extra environment variables that are needed for testing.
+
+This assumes that you already have a cross-compiling environment set up and the
+library dependencies are already installed for the target architecture as well.
+
+For example, to compile for the `aarch64-linux-gnu` target triplet you can run:
+
+```bash
+BUILD_TARGET=aarch64-linux-gnu ./ci.sh release
+```
+
+Whenever using a `BUILD_TARGET` or even a custom `BUILD_DIR` these variables
+must be set for **every call** to `ci.sh` even calls to `ci.sh test`, for which
+we recommend exporting them in your shell session, for example:
+
+```bash
+export BUILD_TARGET=x86_64-w64-mingw32 BUILD_DIR=build-foobar
+```
+
+### Format checks (lint)
+
+```bash
+./ci.sh lint
+```
+
+Linter checks will verify that the format of your patch conforms to the project
+style. For this, we run clang-format only on the lines that were changed by
+your commits.
+
+If your local git branch is tracking `origin/master` and you landed a few
+commits in your branch, running this lint command will check all the changes
+made from the common ancestor with `origin/master` to the latest changes,
+including uncommitted changes. The output of the program will show the patch
+that should be applied to fix your commits. You can apply these changes with the
+following command from the base directory of the git checkout:
+
+```bash
+./ci.sh lint | patch -p1
+```
+
+### Programming errors (tidy)
+
+```bash
+./ci.sh tidy
+```
+
+clang-tidy is a tool to check common programming errors in C++, and other valid
+C++ constructions that are discouraged by the style guide or otherwise dangerous
+and may constitute a bug.
+
+To run clang-tidy on the files changed by your changes you can run `./ci.sh
+tidy`. Note that this will report all the problems encountered in any file that
+was modified by one of your commits, not just on the lines that your commits
+modified.
+
+
+### Address Sanitizer (asan)
+
+```bash
+./ci.sh asan
+```
+
+ASan builds allow to check for invalid address usages, such as use-after-free.
+To perform these checks, as well as other undefined behavior checks we only need
+to build and run the unittests with ASan enabled which can be easily achieved
+with the command above. If you want to have the ASan build files separated from
+your regular `build/` directory to quickly switch between asan and regular
+builds, you can pass the build directory target as follows:
+
+```bash
+BUILD_DIR=build-asan ./ci.sh asan
+```
+
+### Memory Sanitizer (msan)
+
+MSan allows to check for invalid memory accesses at runtime, such as using an
+uninitialized value which likely means that there is a bug. To run these checks,
+a specially compiled version of the project and tests is needed.
+
+For building with MSan, you need to build a version of libc++ with
+`-fsanitize=memory` so we can link against it from the MSan build. Also, having
+an `llvm-symbolizer` installed is very helpful to obtain stack traces that
+include the symbols (functions and line numbers). To install `llvm-symbolizer`
+on a Debian-based system run:
+
+```bash
+sudo apt install llvm # or llvm-7, etc for a specific version.
+```
+
+To install a version of libc++ compiled with `-fsanitize=memory` you can use the
+`./ci.sh msan_install` command helper. This will download, compile and install
+libc++ and libc++abi in the `${HOME}/.msan` directory to be used later.
+
+After this is set up, you can build the project using the following command:
+
+```bash
+./ci.sh msan
+```
+
+This command by default uses the `build` directory to store the cmake and object
+files. If you want to have a separate build directory configured with msan you
+can for example call:
+
+```bash
+BUILD_DIR=build-msan ./ci.sh msan
+```
diff --git a/third-party/libjxl/libjxl/doc/building_wasm.md b/third-party/libjxl/libjxl/doc/building_wasm.md
new file mode 100644
index 0000000000..8d15bc432f
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/building_wasm.md
@@ -0,0 +1,62 @@
+# Building WASM artifacts
+
+This file describes the building and testing of JPEG XL
+[Web Assembly](https://webassembly.org/) bundles and wrappers.
+
+These instructions assume an up-to-date Debian/Ubuntu system.
+
+For the sake of simplicity, it is considered, that the following environment
+variables are set:
+
+ * `OPT` - path to the directory containing additional software;
+   the `emsdk` directory with the Emscripten SDK should reside there.
+
+## Requirements
+
+[CMake](https://cmake.org/) is used as a build system. To install it, follow
+[Debian build instructions](developing_in_debian.md).
+
+[Emscripten SDK](https://emscripten.org/) is required for building
+WebAssembly artifacts. To install it, follow the
+[Download and Install](https://emscripten.org/docs/getting_started/downloads.html)
+guide:
+
+```bash
+cd $OPT
+
+# Get the emsdk repo.
+git clone https://github.com/emscripten-core/emsdk.git
+
+# Enter that directory.
+cd emsdk
+
+# Download and install the latest SDK tools.
+./emsdk install latest
+
+# Make the "latest" SDK "active" for the current user. (writes ~/.emscripten file)
+./emsdk activate latest
+```
+
+## Building and testing the project
+
+```bash
+# Setup EMSDK and other environment variables. In practice EMSDK is set to be
+# $OPT/emsdk.
+source $OPT/emsdk/emsdk_env.sh
+
+# This should set the $EMSDK variable.
+# If your node version is <16.4.0, you might need to update to a newer version or override
+# the node binary with a version which supports SIMD:
+echo "NODE_JS='/path/to/node_binary'" >> $EMSDK/.emscripten
+
+# Assuming you are in the root level of the cloned libjxl repo,
+# either build with regular WASM:
+BUILD_TARGET=wasm32 emconfigure ./ci.sh release
+# or with SIMD WASM:
+BUILD_TARGET=wasm32 ENABLE_WASM_SIMD=1 emconfigure ./ci.sh release
+```
+
+## Example site
+
+Once you have build the wasm binary, you can give it a try by building a site
+that decodes jxl images, see [wasm_demo](../tools/wasm_demo/README.md).
diff --git a/third-party/libjxl/libjxl/doc/color_management.md b/third-party/libjxl/libjxl/doc/color_management.md
new file mode 100644
index 0000000000..88a7b60afa
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/color_management.md
@@ -0,0 +1,68 @@
+# Color Management
+
+[TOC]
+
+<!--*
+# Document freshness: For more information, see go/fresh-source.
+freshness: { owner: 'sboukortt' reviewed: '2022-09-27' }
+*-->
+
+## Why
+
+The vast majority of web images are still sRGB. However, wide-gamut material is
+increasingly being produced (photography, cinema, 4K). Screens covering most of
+the Adobe RGB gamut are readily available and some also cover most of DCI P3
+(iPhone, Pixel2) or even BT.2020.
+
+Currently, after a camera records a very saturated red pixel, most raw
+processors would clip it to the rather small sRGB gamut before saving as JPEG.
+In keeping with our high-quality goal, we prevent such loss by allowing wider
+input color spaces.
+
+## Which color space
+
+Even wide gamuts could be expressed relative to the sRGB primaries, but the
+resulting coordinates may be outside the valid 0..1 range. Surprisingly, such
+'unbounded' coordinates can be passed through color transforms provided the
+transfer functions are expressed as parametric functions (not lookup tables).
+However, most image file formats (including PNG and PNM) lack min/max metadata
+and thus do not support unbounded coordinates.
+
+Instead, we need a larger working gamut to ensure most pixel coordinates are
+within bounds and thus not clipped. However, larger gamuts result in lower
+precision/resolution when using <= 16 bit encodings (as opposed to 32-bit float
+in PFM). BT.2100 or P3 DCI appear to be good compromises.
+
+## CMS library
+
+Transforms with unbounded pixels are desirable because they reduce round-trip
+error in tests. This requires parametric curves, which are only supported for
+the common sRGB case in ICC v4 profiles. ArgyllCMS does not support v4. The
+other popular open-source CMS is LittleCMS. It is also used by color-managed
+editors (Krita/darktable), which increases the chances of interoperability.
+However, LCMS has race conditions and overflow issues that prevent fuzzing. We
+will later switch to the newer skcms. Note that this library does not intend to
+support multiProcessElements, so HDR transfer functions cannot be represented
+accurately. Thus in the long term, we will probably migrate away from ICC
+profiles entirely.
+
+## Which viewer
+
+On Linux, Krita and darktable support loading our PNG output images and their
+ICC profile.
+
+## How to compress/decompress
+
+### Embedded ICC profile
+
+-   Create an 8-bit or 16-bit PNG with an iCCP chunk, e.g. using darktable.
+-   Pass it to `cjxl`, then `djxl` with no special arguments. The decoded output
+    will have the same bit depth (can override with `--output_bit_depth`) and
+    color space.
+
+### Images without metadata (e.g. HDR)
+
+-   Create a PGM/PPM/PFM file in a known color space.
+-   Invoke `cjxl` with `-x color_space=RGB_D65_202_Rel_Lin` (linear 2020). For
+    details/possible values, see color_encoding.cc `Description`.
+-   Invoke `djxl` as above with no special arguments.
diff --git a/third-party/libjxl/libjxl/doc/developing_in_debian.md b/third-party/libjxl/libjxl/doc/developing_in_debian.md
new file mode 100644
index 0000000000..5b2bbd335c
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/developing_in_debian.md
@@ -0,0 +1,56 @@
+# Developing in Debian
+
+These instructions assume an up-to-date Debian/Ubuntu system.
+For other platforms, please instead use the following:
+
+* [Cross Compiling for Windows with Crossroad](developing_with_crossroad.md).
+
+## Minimum build dependencies
+
+Apart from the dependencies in `third_party`, some of the tools use external
+dependencies that need to be installed on your system first:
+
+```bash
+sudo apt install cmake clang doxygen g++ extra-cmake-modules \
+  libgif-dev libjpeg-dev ninja-build libgoogle-perftools-dev
+```
+
+Make sure your default `clang` compiler is at least version 6 by running
+
+```bash
+clang --version
+```
+
+If it still shows an old version despite having, for example, `clang-7` installed, you need
+to update the default `clang` compiler. On Debian-based systems run:
+
+```bash
+sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100
+sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
+```
+
+Optionally, to compile some of the extra tool support and tests you can install
+the following packages:
+
+```bash
+sudo apt install qt6-base-dev libwebp-dev libgimp2.0-dev libopenexr-dev \
+  libgtest-dev libgmock-dev libbenchmark-dev libbenchmark-tools
+```
+
+For the lint/coverage commands, you will also need additional packages:
+
+```bash
+sudo apt install clang-format clang-tidy curl parallel gcovr
+```
+
+## Building
+
+The `libjxl` project uses CMake to build. We provide a script that simplifies the
+invocation. To build and test the project, run
+
+```bash
+./ci.sh opt
+```
+
+This writes binaries to `build/tools` and runs unit tests. More information
+on [build modes and testing](building_and_testing.md) is available.
diff --git a/third-party/libjxl/libjxl/doc/developing_in_github.md b/third-party/libjxl/libjxl/doc/developing_in_github.md
new file mode 100644
index 0000000000..ecda64fc85
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/developing_in_github.md
@@ -0,0 +1,357 @@
+# Developing in GitHub
+
+This document describes the development steps related to handling the git
+repository.
+
+If you are new to GitHub, there's a nice [quickstart
+guide](https://docs.github.com/en/github/getting-started-with-github/quickstart)
+on GitHub explaining the basics.
+
+## Initial setup
+
+You need to perform this set up at least once if you haven't use GitHub before.
+Read through the quickstart guide [Set up
+Git](https://docs.github.com/en/github/getting-started-with-github/set-up-git)
+page to get your git up and running. You will need to Fork a repository next.
+After that "Life of a Pull Request" describes the common everyday workflows.
+
+### Configure your SSH access
+
+The easiest way to configure access to your Github repository is to use SSH
+keys. For that you need an SSH private and public key, ideally a strong one. You
+can use different keys for different sites if you want. In this example, we will
+create one for using in GitHub only.
+
+Create the `~/.ssh/id_rsa_github` file executing the following. (Here and
+elsewhere, {{X}} are placeholders for your email/username)
+
+```bash
+ssh-keygen -t rsa -b 4096 -C "{{EMAIL}}" -f ~/.ssh/id_rsa_github
+```
+
+Go to your [SSH and GPG keys](https://github.com/settings/keys) settings and
+paste the contents of your *public key* (the one ending in `.pub`), that would
+be the output of this command:
+
+```bash
+cat ~/.ssh/id_rsa_github.pub
+```
+
+To use a specific key when SSHing to the github.com domain, you can add this
+snippet of config to your .ssh/config file executing the following.
+
+```bash
+cat >> ~/.ssh/config <<EOF
+
+Host github.com
+  Hostname github.com
+  IdentityFile ~/.ssh/id_rsa_github
+  IdentitiesOnly yes
+EOF
+```
+
+The `IdentitiesOnly yes` part forces to only use the provided IdentityFile when
+talking to GitHub.
+
+### Fork your private copy
+
+The JPEG XL code is located in [this repo](https://github.com/libjxl/libjxl).
+
+The normal developer workflow in GitHub involves creating your own fork of a
+repository and uploading your own changes there. From your own copy you can
+request merges *to* the upstream repository directly, there's no need to create
+a branch in the upstream repository.
+
+[Fork the
+repository](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo)
+in GitHub to create your own copy of the repository in GitHub. You can then
+propose to include changes in the main repository via a Pull Request.
+
+Once you are done you should have your repository at
+
+ https://<!-- not a link -->github.com<!-- not a link -->/*{{USERNAME}}*/libjxl
+
+where {{USERNAME}} denotes your GitHub username.
+
+### Checkout the JPEG XL code from GitHub
+
+To get the source code on your computer you need to "clone" it. There are two
+repositories at play here, the upstream repository (`libjxl/lbjxl`) and your
+fork (`{{USERNAME}}/libjxl`). You will be normally fetching new changes from
+the upstream repository and push changes to your fork. Getting your changes from
+your fork to the upstream repository is done through the Web interface, via Pull
+Requests.
+
+The [Fork a
+repo](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo)
+goes in great detail, but uses the git remote names `upstream` for the shared
+upstream repository and `origin` for your work. This guide proposes an
+alternative naming scheme, used in the examples below.
+
+In this guide `origin` is the upstream shared repository and `myfork` is your
+fork. You can use any other name for your fork if you want. Use the following
+commands to set things up, replacing `{{USERNAME}}` with your GitHub username:
+
+```bash
+git clone git https://github.com/libjxl/libjxl --recursive
+cd libjxl
+git remote set-url --push origin git@github.com:{{USERNAME}}/libjxl.git
+git remote add myfork git@github.com:{{USERNAME}}/libjxl.git
+git remote -vv
+```
+
+These commands did three things:
+
+ * Created the repository with `origin` as the upstream remote,
+ * Changed the "push" URL to point to your fork, and
+ * Create a new remote pointing to your fork.
+
+The last step is optional. Since the "fetch" URL of `origin` points to the
+shared repository and the "push" URL points to your fork, fetching from `origin`
+always gets the latest changes from the upstream repository regardless of the
+contents of your fork.
+
+Having a second origin called `myfork` is only useful if you need to download
+pending changes from your fork from a different computer. For example, if you
+work on multiple computers, each one with this setup, you can push to your
+fork from one, and then fetch from `myfork` from another computer to get those.
+
+# Life of a Pull Request
+
+The general [GitHub flow
+guide](https://docs.github.com/en/github/getting-started-with-github/github-flow)
+applies to sending Pull Requests to this project.
+
+All the commands here assume you are in a git checkout as setup here.
+
+### Sync to the latest version
+
+```bash
+git fetch origin
+```
+
+The last upstream version is now on `origin/main` and none of your local
+branches have been modified by this command.
+
+### Start a new branch
+
+To start a new change you need a local branch. Each branch will represent a list
+of individual commits which can then be requested to be merged as a single merge
+request. So in general one branch is one code review, but each branch can have
+multiple individual commits in it.
+
+```bash
+git checkout origin/main -b mybranch
+```
+
+This will create a new branch `mybranch` tracking `origin/main`. A branch can
+track any remove or local branch, which is used by some tools. Running `git
+branch -vv` will show all the branches you have have, what are they tracking and
+how many commits are ahead or behind. If you create a branch without tracking
+any other, you can add or change the tracking branch of the current branch
+running `git branch --set-upstream-to=...`.
+
+### Add changes to your branch
+
+Follow any of the many online tutorials, for example
+[The basics](https://git-scm.com/book/en/v2/Git-Basics-Getting-a-Git-Repository)
+chapter from the https://git-scm.com/doc website is a good starting guide.
+Create, change or delete files and do a git commit with a message.
+
+The commit message is required. A commit message should follow the 50/72 rule:
+
+*   First line is 50 characters or less.
+*   Then a blank line.
+*   Remaining text should be wrapped at 72 characters.
+
+The first line should identify your commit, since that's what most tools will
+show to the user. First lines like "Some fixes" are not useful. Explain what the
+commit contains and why.
+
+We follow the [Google C++ Coding
+Style](https://google.github.io/styleguide/cppguide.html). A
+[clang-format](https://clang.llvm.org/docs/ClangFormat.html) configuration
+file is available to automatically format your code, you can invoke it with
+the `./ci.sh lint` helper tool.
+
+Read the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information about
+contributing to libjxl.
+
+### Upload your changes for review
+
+The first step is a local review of your changes to see what will you be sending
+for review. `gitg` is a nice Gtk UI for reviewing your local changes, or `tig`
+for similar ncurses console-based interface. Otherwise, from the terminal you
+can run:
+
+```bash
+git branch -vv
+```
+
+To show the current status of your local branches. In particular, since your
+branch is tracking origin/main (as seen in the output) git will tell you that
+you are one commit ahead of the tracking branch.
+
+```
+* mybranch       e74ae1a [origin/main: ahead 1] Improved decoding speed by 40%
+```
+
+It is a good idea before uploading to sync again with upstream (`git fetch
+origin`) and then run `git branch -vv` to check whether there are new changes
+upstream. If that is the case, you will see a "behind" flag in the output:
+
+```
+* mybranch       e74ae1a [origin/main: ahead 1, behind 2] Improved decoding speed by 40%
+```
+
+To sync your changes on top of the latest changes in upstream you need to
+rebase:
+
+```bash
+git rebase
+```
+
+This will by default rebase your current branch changes on top of the tracking
+branch. In this case, this will try to apply the current commit on top of the
+latest origin/main (which has 2 more commits than the ones we have in our
+branch) and your branch will now include that. There could be conflicts that you
+have to deal with. A shortcut to do both fetch and rebase is to run `git pull
+-r`, where the `-r` stands for "rebase" and will rebase the local commits on top
+of the remote ones.
+
+Before uploading a patch, make sure your patch conforms to the
+[contributing guidelines](../CONTRIBUTING.md) and it
+[builds and passes tests](building_and_testing.md).
+
+Once you are ready to send your branch for review, upload it to *your* fork:
+
+```bash
+git push origin mybranch
+```
+
+This will push your local branch "mybranch" to a remote in your fork called
+"mybranch". The name can be anything, but keep in mind that it is public. A link
+to the URL to create a merge request will be displayed.
+
+```
+Enumerating objects: 627, done.
+Counting objects: 100% (627/627), done.
+Delta compression using up to 56 threads
+Compressing objects: 100% (388/388), done.
+Writing objects: 100% (389/389), 10.71 MiB | 8.34 MiB/s, done.
+Total 389 (delta 236), reused 0 (delta 0)
+emote:
+remote: Create a pull request for 'mybranch' on GitHub by visiting:
+remote:      https://github.com/{{USERNAME}}/libjxl/pull/new/mybranch
+remote:
+To github.com:{{USERNAME}}/libjxl.git
+ * [new branch]      mybranch -> mybranch
+```
+
+### Updating submodules
+
+The repository uses submodules for external library dependencies in
+third_party. Each submodule points to a particular external commit of the
+external repository by the hash code of that external commit. Just like
+regular source code files, this hash code is part of the current branch and
+jpeg xl commit you have checked out.
+
+When changing branches or when doing `git rebase`, git will unfortunately
+*not* automatically set those hashes to the ones of the branch or jpeg xl
+commit you changed to nor set the source files of the third_party submodules
+to the new state. That is, even though git will have updated the jpeg xl
+source code files on your disk to the new ones, it will leave the submodule
+hashes and the files in third_party in your workspace to the ones they were
+before you changed branches. This will show up in a git diff because this
+is seen as a change compared to the branch you switched to. The git diff shows
+the difference in hash codes (as if you are changing to the old ones), it does
+not show changes in files inside the third_party directory.
+
+This mismatch can cause at least two problems:
+
+*) the jpeg xl codebase may not compile due to third_party library version
+mismatch if e.g. API changed or a submodule was added/removed.
+
+*) when using `commit -a` your commit, which may be a technical change
+unrelated to submodule changes, will unintentionally contain a change to the
+submodules hash code, which is undesired unless you actually want to change
+the version of third_party libraries.
+
+To resolve this, the submodules must be updated manually with
+the following command after those actions (at least when the submodules
+changed):
+
+```
+git submodule update --init --recursive
+```
+
+Here, the init flag ensures new modules get added when encessary and the
+recursive flag is required for the submodules depending on other submodules.
+
+If you checkout a different branch, you can spot that submodules changed
+when it shows a message similar to this:
+
+```
+M       third_party/brotli
+M       third_party/lcms
+```
+
+If you do a rebase you may end up in a harder to solve situation, where
+`git submodule update --init --recursive` itself fails with errors such as:
+
+```
+Unable to checkout '35ef5c554d888bef217d449346067de05e269b30' in submodule path 'third_party/brotli'
+```
+
+In that case, you can use the force flag:
+
+```
+git submodule update --init --recursive --force
+```
+
+### Iterating changes in your merge request
+
+To address reviewer changes you need to amend the local changes in your branch
+first. Make the changes you need in your commit locally by running `git commit
+--amend file1 file2 file3 ...` or `git commit --amend -a` to amend all the
+changes from all the staged files.
+
+Once you have the new version of the "mybranch" branch to re-upload, you need to
+force push it to the same branch in your fork. Since you are pushing a different
+version of the same commit (as opposed to another commit on top of the existing
+ones), you need to force the operation to replace the old version.
+
+```bash
+git push origin mybranch --force
+```
+
+The merge request should now be updated with the new changes.
+
+### Merging your changes
+
+We use "rebase" as a merge policy, which means that there a no "merge" commits
+(commits with more than one parent) but instead only a linear history of
+changes.
+
+It is possible that other changes where added to the main branch since the last
+time you rebased your changes. These changes could create a conflict with your
+Pull Request, if so you need to `git fetch`, `git rebase` and push again your
+changes which need to go through the continuous integration workflow again to
+verify that all the tests pass again after including the latest changes.
+
+### Trying locally a pending Pull Request
+
+If you want to review in your computer a pending pull request proposed by
+another user you can fetch the merge request commit with the following command,
+replacing `NNNN` with the pull request number:
+
+```bash
+git fetch origin refs/pull/NNNN/head
+git checkout FETCH_HEAD
+```
+
+The first command will add to your local git repository the remote commit for
+the pending pull request and store a temporary reference called `FETCH_HEAD`.
+The second command then checks out that reference. From this point you can
+review the files in your computer, create a local branch for this FETCH_HEAD or
+build on top of it.
diff --git a/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md b/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md
new file mode 100644
index 0000000000..3e86d5dd86
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md
@@ -0,0 +1,168 @@
+# Developing for Windows with MSYS2
+
+[MSYS2](https://www.msys2.org/) ("minimal system 2") is a software distribution and a development platform based on MinGW and Cygwin.  It provides a  Unix-like environment to build code on Windows.  These instructions were written with a 64-bit instance of Windows 10 running on a VM.  They may also work on native instances of Windows and other versions of Windows.
+
+## Build Environments
+
+MSYS2 provides multiple development [environments](https://www.msys2.org/docs/environments/).  By convention, they are referred to in uppercase.  They target slightly different platforms, runtime libraries, and compiler toolchains.  For example, to build for 32-bit Windows, use the MINGW32 environment.  For interoperability with Visual Studio projects, use the UCRT64 environment.
+
+Since all of the build environments are built on top of the MSYS environment, **all updates and package installation must be done from within the MSYS environment**.  After making any package changes, `exit` all MSYS2 terminals and restart the desired build-environment.  This reminder is repeated multiple times throughout this guide.
+
+* **MINGW32:**  To compile for 32-bit Windows (on 64-bit Windows), use packages from the `mingw32` group.  Package names are prefixed with `mingw-w64-i686`.  The naming scheme may be different on the 32-bit version of MSYS2.
+
+* **MINGW64:**  This is the primary environment to building for 64-bit Windows.  It uses the older MSVCRT runtime, which is widely available across Windows systems.  Package names are prefixed with `mingw-w64-x86_64`.
+
+* **UCRT64:**  The Universal C Runtime (UCRT) is used by recent versions of Microsoft Visual Studio.  It ships by default with Windows 10.  For older versions of Windows, it must be provided with the application or installed by the user.  Package names are prefixed with `mingw-w64-ucrt-x86_64`.
+
+* **CLANG64:** Unfortunately, the `gimp` packages are not available for the CLANG64 environment.  However, `libjxl` will otherwise build in this environment if the appropriate packages are installed.  Packages are prefixed with `mingw-w64-clang-x86_64`.
+
+## Install and Upgrade MSYS2
+
+Download MSYS2 from the homepage.  Install at a location without any spaces on a drive with ample free space.  After installing the packages used in this guide, MSYS2 used about 15GB of space.
+
+Toward the end of installation, select the option to run MSYS2 now.  A command-line window will open.  Run the following command, and answer the prompts to update the repository and close the terminal.
+
+```bash
+pacman -Syu
+```
+
+Now restart the MSYS environment and run the following command to complete updates:
+
+```bash
+pacman -Su
+```
+
+## Package Management
+
+Packages are organized in groups, which share the build environment name, but in lower case.  Then they have name prefixes that indicate which group they belong to.  Consider this package search: `pacman -Ss cmake`
+
+```
+mingw32/mingw-w64-i686-cmake
+mingw64/mingw-w64-x86_64-cmake
+ucrt64/mingw-w64-ucrt-x86_64-cmake
+clang64/mingw-w64-clang-x86_64-cmake
+msys/cmake
+```
+
+We can see the organization `group/prefix-name`.  When installing packages, the group name is optional.
+
+```bash
+pacman -S mingw-w64-x86_64-cmake
+```
+ 
+For tools that need to be aware of the compiler to function, install the package that corresponds with the specific build-environment you plan to use.  For `cmake`, install the `mingw64` version.  The generic `msys/cmake` will not function correctly because it will not find the compiler.  For other tools, the generic `msys` version is adequate, like `msys/git`.
+
+To remove packages, use:
+
+```bash
+pacman -Rsc [package-name]
+```
+
+## Worst-Case Scenario...
+
+If packages management is done within a build environment other than MSYS, the environment structure will be disrupted and compilation will likely fail.  If this happens, it may be necessary to reinstall MSYS2.
+
+1. Rename the `msys64` folder to `msys64.bak`.
+
+2. Use the installer to reinstall MSYS2 to `msys64`.
+
+3. Copy packages from `msys64.bak/var/cache/pacman/pkg/` to the new installation to save download time and bandwidth.
+
+4. Use `pacman` from within the MSYS environment to install and update packages.
+
+5. After successfully building a project, it is safe to delete `msys64.bak`
+
+## The MING64 Environment
+
+Next set up the MING64 environment.  The following commands should be run within the MSYS environment.  `pacman -S` is used to install packages.  The `--needed` argument prevents packages from being reinstalled.
+
+```bash
+pacman -S --needed base-devel mingw-w64-x86_64-toolchain
+pacman -S git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja \
+    mingw-w64-x86_64-gtest mingw-w64-x86_64-giflib \
+    mingw-w64-x86_64-libpng mingw-w64-x86_64-libjpeg-turbo 
+```
+
+## Build `libjxl`
+
+Download the source from the libjxl [releases](https://github.com/libjxl/libjxl/releases) page.  Alternatively, you may obtain the latest development version with `git`.  Run `./deps.sh` to ensure additional third-party dependencies are downloaded.
+
+Start the MINGW64 environment, create a build directory within the source directory, and configure with `cmake`.
+
+```bash
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+   -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \
+   -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_PLUGINS=ON \
+   -DJPEGXL_ENABLE_MANPAGES=OFF -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+   -DJPEGXL_FORCE_SYSTEM_GTEST=ON ..
+```
+
+Check the output to see if any dependencies were missed and need to be installed.  Adding `-G Ninja` may be helpful, but on my computer, Ninja was selected by default.  Remember that package changes must be done from the MSYS environment.  Then exit all MSYS2 terminals and restart the build environment.
+
+If all went well, you may now run `cmake` to build `libjxl`:
+
+```bash
+cmake --build .
+```
+
+Do not be alarmed by the compiler warnings.  They are a caused by differences between gcc/g++ and clang.  The build should complete successfully.  Then `cjxl`, `djxl`, `jxlinfo`, and others can be run from within the build environment.  Moving them into the native Windows environment requires resolving `dll` issues that are beyond the scope of this document.
+
+## The `clang` Compiler
+
+To use the `clang` compiler, install the packages that correspond with the environment you wish to use.  Remember to make package changes from within the MSYS environment.
+
+```
+mingw-w64-i686-clang
+mingw-w64-i686-clang-tools-extra
+mingw-w64-i686-clang-compiler-rt
+
+mingw-w64-x86_64-clang
+mingw-w64-x86_64-clang-tools-extra
+mingw-w64-x86_64-clang-compiler-rt
+
+mingw-w64-ucrt64-x86_64-clang
+mingw-w64-ucrt64-x86_64-clang-tools-extra
+mingw-w64-ucrt64-x86_64-clang-compiler-rt
+```
+
+After the `clang` compiler is installed, 'libjxl' can be built with the `./ci.sh` script.
+
+```bash
+./ci.sh opt -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \
+    -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \
+    -DJPEGXL_FORCE_SYSTEM_BROTLI=ON -DJPEGXL_FORCE_SYSTEM_GTEST=ON
+```
+
+On my computer, `doxygen` packages needed to be installed to proceed with building.  Use `pacman -Ss doxygen` to find the packages to install.
+
+## The GIMP Plugin
+
+To build the GIMP plugin, install the relevant `gimp` package.  This will also install dependencies.  Again, perform package management tasks from only the MSYS environment.  Then restart the build environment.
+
+```bash
+pacman -S mingw-w64-i686-gimp
+pacman -S mingw-w64-x86_64-gimp
+pacman -S mingw-w64-ucrt-x86_64-gimp
+```
+
+If `clang` is installed, you can use the `./ci.sh` script to build.  Otherwise, navigate to the build directory to reconfigure and build with `cmake`.
+
+```bash
+cd build
+rm -r CM*
+cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+   -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \
+   -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \
+   -DJPEGXL_ENABLE_PLUGINS=ON -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+   -DJPEGXL_FORCE_SYSTEM_GTEST=ON ..
+```
+
+The plugin is built statically, so there should be no need to install `dll` files.  To try out the plugin:
+
+1. [Download](https://www.gimp.org/downloads/) and install the stable version of GIMP (currently 2.10.24).
+
+2. Create a new folder: `C:\Program Files\GIMP 2\lib\gimp\2.0\plug-ins\file-jxl`
+
+3. Copy `build/plugins/gimp/file-jxl.exe` to the new folder.
diff --git a/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md b/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md
new file mode 100644
index 0000000000..a897be29a3
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md
@@ -0,0 +1,90 @@
+# Developing on Windows with Visual Studio 2019
+
+These instructions assume an up-to-date Windows 10 (e.g. build 19041.928) with
+**Microsoft Visual Studio 2019** (e.g. Version 16.9.0 Preview 4.0) installed. If
+unavailable, please use another build environment:
+
+* [MSYS2 on Windows](developing_in_windows_msys.md)
+* [Crossroad on Linux](developing_with_crossroad.md) (cross compilation for Windows)
+
+## Minimum build dependencies
+
+Apart from the dependencies in third_party, some of the tools use external
+dependencies that need to be installed in your system first.
+
+Please install [vcpkg](https://vcpkg.readthedocs.io/en/latest/examples/installing-and-using-packages/)
+(tested with version 2019.07.18), and use it to install the following libraries:
+
+```
+vcpkg install gtest:x64-windows
+vcpkg install giflib:x64-windows
+vcpkg install libjpeg-turbo:x64-windows
+vcpkg install libpng:x64-windows
+vcpkg install zlib:x64-windows
+```
+
+## Building
+
+From Visual Studio, open the CMakeLists.txt in the JPEG XL root directory.
+Right-click the CMakeLists.txt entry in the Folder View of the Solution
+Explorer. In the context menu, select CMake Settings. Click on the green plus
+to add an x64-Clang configuration and the red minus to remove any non-Clang
+configuration (the MSVC compiler is currently not supported). Click on the blue
+hyperlink marked "CMakeSettings.json" and an editor will open. Insert the
+following text after replacing $VCPKG with the directory where you installed
+vcpkg above.
+
+```
+{
+  "configurations": [
+    {
+      "name": "x64-Clang-Release",
+      "generator": "Ninja",
+      "configurationType": "MinSizeRel",
+      "buildRoot": "${projectDir}\\out\\build\\${name}",
+      "installRoot": "${projectDir}\\out\\install\\${name}",
+      "cmakeCommandArgs": "-DCMAKE_TOOLCHAIN_FILE=$VCPKG/scripts/buildsystems/vcpkg.cmake",
+      "buildCommandArgs": "-v",
+      "ctestCommandArgs": "",
+      "inheritEnvironments": [ "clang_cl_x64" ],
+      "variables": [
+        {
+          "name": "VCPKG_TARGET_TRIPLET",
+          "value": "x64-windows",
+          "type": "STRING"
+        },
+        {
+          "name": "JPEGXL_ENABLE_TCMALLOC",
+          "value": "False",
+          "type": "BOOL"
+        },
+        {
+          "name": "BUILD_GMOCK",
+          "value": "True",
+          "type": "BOOL"
+        },
+        {
+          "name": "gtest_force_shared_crt",
+          "value": "True",
+          "type": "BOOL"
+        },
+        {
+          "name": "JPEGXL_ENABLE_FUZZERS",
+          "value": "False",
+          "type": "BOOL"
+        },
+        {
+          "name": "JPEGXL_ENABLE_VIEWERS",
+          "value": "False",
+          "type": "BOOL"
+        }
+      ]
+    }
+  ]
+}
+```
+
+The project is now ready for use. To build, simply press F7 (or choose
+Build All from the Build menu). This writes binaries to
+`out/build/x64-Clang-Release/tools`. The main [README.md](../README.md) explains
+how to use the encoder/decoder and benchmark binaries.
diff --git a/third-party/libjxl/libjxl/doc/developing_with_crossroad.md b/third-party/libjxl/libjxl/doc/developing_with_crossroad.md
new file mode 100644
index 0000000000..e7c2f23f99
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/developing_with_crossroad.md
@@ -0,0 +1,116 @@
+# Cross Compiling for Windows with Crossroad
+
+[Crossroad](https://pypi.org/project/crossroad/) is a tool to set up cross-compilation environments on GNU/Linux distributions.  These instructions assume a Debian/Ubuntu system.  However, they can likely be adapted to other Linux environments.  Since Ubuntu can be run on Windows through WSL, these instruction may be useful for developing directly on Windows.
+
+## Install Crossroad
+
+Crossroad requires tools included with `python3-docutils` and `mingw-w64`.  They may be installed using:
+
+```bash
+sudo aptitude install python3-docutils mingw-w64
+```
+
+The `zstandard` python package is also required, but is not available in the repositories.  It may be installed using `pip`.
+
+```bash
+pip3 install zstandard
+```
+
+After the dependencies are installed, crossroad itself maybe installed with `pip`.
+
+```bash
+pip3 install crossroad
+```
+
+If there are errors while running crossroad, it may need to be downloaded and installed directly using `setup.py`.  Instructions are on the crossroad homepage.
+
+## Update Debian Alternatives
+
+Since `libjxl` uses C++ features that require posix threads, the symlinks used by the Debian alternative system need to be updated:
+
+```bash
+sudo update-alternatives --config x86_64-w64-mingw32-g++
+```
+
+Select the option that indicates `posix` usage.  Repeat for `gcc` and `i686`:
+
+```bash
+sudo update-alternatives --config x86_64-w64-mingw32-gcc
+sudo update-alternatives --config i686-w64-mingw32-gcc
+sudo update-alternatives --config i686-w64-mingw32-g++
+```
+
+## Create a New Crossroad Project
+
+Crossroad supports the following platforms:
+
+```
+native               Native platform (x86_64 GNU/Linux)
+android-x86          Generic Android/Bionic on x86
+android-mips64       Generic Android/Bionic on MIPS64
+android-x86-64       Generic Android/Bionic on x86-64
+w64                  Windows 64-bit
+w32                  Windows 32-bit
+android-arm64        Generic Android/Bionic on ARM64
+android-mips         Generic Android/Bionic on MIPS
+android-arm          Generic Android/Bionic on ARM
+```
+
+To begin cross compiling for Windows, a new project needs to be created:
+
+```bash
+crossroad w64 [project-name]
+```
+
+## Install Dependencies
+
+Since the `gimp` development package is required to build the GIMP plugin and also includes most of the packages required by `libjxl`, install it first.
+
+```bash
+crossroad install gimp
+```
+
+`gtest` and `brotli` are also required.
+
+```bash
+crossroad install gtest brotli
+```
+
+If any packages are later found to be missing, you may search for them using:
+
+```bash
+crossroad search [...]
+```
+
+## Build `libjxl`
+
+Download the source from the libjxl [releases](https://github.com/libjxl/libjxl/releases) page.  Alternatively, you may obtain the latest development version with `git`.  Run `./deps.sh` to ensure additional third-party dependencies are downloaded.  Unfortunately, the script `./ci.sh` does not work with Crossroad, so `cmake` will need to be called directly.
+
+Create a build directory within the source directory.  If you haven't already, start your crossroad project and run `cmake`:
+
+```bash
+mkdir build
+cd build
+crossroad w64 libjxl
+crossroad cmake -DCMAKE_BUILD_TYPE=Release \
+   -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \
+   -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \
+   -DJPEGXL_ENABLE_PLUGINS=ON -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+   -DJPEGXL_FORCE_SYSTEM_GTEST=ON ..
+```
+
+Check the output to see if any dependencies were missed and need to be installed.  If all went well, you may now run `cmake` to build `libjxl`:
+
+```bash
+cmake --build .
+```
+
+## Try out the GIMP Plugin
+
+The plugin is built statically, so there should be no need to install `dll` files.  To try out the plugin:
+
+1. [Download](https://www.gimp.org/downloads/) and install the stable version of GIMP (currently 2.10.24).
+
+2. Create a new folder: `C:\Program Files\GIMP 2\lib\gimp\2.0\plug-ins\file-jxl`
+
+3. Copy `build/plugins/gimp/file-jxl.exe` to the new folder. 
diff --git a/third-party/libjxl/libjxl/doc/encode_effort.md b/third-party/libjxl/libjxl/doc/encode_effort.md
new file mode 100644
index 0000000000..221b2bf649
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/encode_effort.md
@@ -0,0 +1,32 @@
+# Encode effort settings
+
+Various trade-offs between encode speed and compression performance can be selected in libjxl. In `cjxl`, this is done via the `--effort` (`-e`) option.
+Higher effort means slower encoding; generally the higher the effort, the more coding tools are used, computationally more expensive heuristics are used,
+and more exhaustive search is performed. 
+Generally efforts range between `1` and `9`, but there is also `e10` you pass the flag `--allow_expert_options` (in combination with "lossless", i.e. `-d 0`). It is considered an expert option because it can be extremely slow.
+
+
+For lossy compression, higher effort results in better visual quality at a given filesize, and also better
+encoder consistency, i.e. less image-dependent variation in the actual visual quality that is achieved. This means that for lossy compression,
+higher effort does not necessarily mean smaller filesizes for every image — some images may be somewhat lower quality than desired when using
+lower effort heuristics, and to improve consistency, higher effort heuristics may decide to use more bytes for them.
+
+For lossless compression, higher effort should result in smaller filesizes, although this is not guaranteed;
+in particular, e2 can be better than e3 for non-photographic images, and e3 can be better than e4 for photographic images.
+
+The following table describes what the various effort settings do:
+
+|Effort | Modular (lossless) | VarDCT (lossy) |
+|-------|--------------------|----------------|
+| e1 | fast-lossless, fixed YCoCg RCT, fixed ClampedGradient predictor, simple palette detection, no MA tree (one context for everything), Huffman, simple rle-only lz77 | |
+| e2 | global channel palette, fixed MA tree (context based on Gradient-error), ANS, otherwise same as e1 | |
+| e3 | same as e2 but fixed Weighted predictor and fixed MA tree with context based on WP-error | only 8x8, basically XYB jpeg with ANS |
+| e4 | try both ClampedGradient and Weighted predictor, learned MA tree, global palette | simple variable blocks heuristics, adaptive quantization, coefficient reordering |
+| e5 | e4 + patches, local palette / local channel palette, different local RCTs | e4 + gabor-like transform, chroma from luma |
+| e6 | e5 + more RCTs and MA tree properties | e5 + error diffusion, full variable blocks heuristics |
+| e7 | e6 + more RCTs and MA tree properties | e6 + patches (including dots) |
+| e8 | e7 + more RCTs, MA tree properties and Weighted predictor parameters | e7 + Butteraugli iterations for adaptive quantization |
+| e9 | e8 + more RCTs, MA tree properties and Weighted predictor parameters, try all predictors | e8 + more Butteraugli iterations |
+| e10 | e9 + previous-channel MA tree properties, different group dimensions, exhaustively try various e9 options | |
+
+For the entropy coding (context clustering, lz77 search, hybriduint configuration): slower/more exhaustive search as effort goes up.
diff --git a/third-party/libjxl/libjxl/doc/format_overview.md b/third-party/libjxl/libjxl/doc/format_overview.md
new file mode 100644
index 0000000000..4614df5509
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/format_overview.md
@@ -0,0 +1,284 @@
+# JPEG XL Format Overview
+
+This document gives an overview of the JPEG XL file format and codestream,
+its features, and the underlying design rationale.
+The aim of this document is to provide general insight into the
+format capabilities and design, thus helping developers
+better understand how to use the `libjxl` API.
+
+## Codestream and File Format
+
+The JPEG XL format is defined in ISO/IEC 18181. This standard consists of
+four parts:
+
+*   18181-1: Core codestream
+*   18181-2: File format
+*   18181-3: Conformance testing
+*   18181-4: Reference implementation
+
+### Core codestream
+
+The core codestream contains all the data necessary to decode and display
+still image or animation data. This includes basic metadata like image dimensions,
+the pixel data itself, colorspace information, orientation, upsampling, etc.
+
+### File format
+
+The JPEG XL file format can take two forms:
+
+*   A 'naked' codestream. In this case, only the image/animation data itself is
+stored, and no additional metadata can be included. Such a file starts with the
+bytes `0xFF0A` (the JPEG marker for "start of JPEG XL codestream").
+*   An ISOBMFF-based container. This is a box-based container that includes a
+JPEG XL codestream box (`jxlc`), and can optionally include other boxes with
+additional information, such as Exif metadata. In this case, the file starts with
+the bytes `0x0000000C 4A584C20 0D0A870A`.
+
+### Conformance testing
+
+This part of the standard defines precision bounds and test cases for conforming
+decoders, to verify that they implement all coding tools correctly and accurately.
+
+### Reference implementation
+
+The `libjxl` software is the reference implementation of JPEG XL.
+
+
+## Metadata versus Image Data
+
+JPEG XL makes a clear separation between metadata and image data.
+Everything that is needed to correctly display an image is
+considered to be image data, and is part of the core codestream. This includes
+elements that have traditionally been considered 'metadata', such as ICC profiles
+and Exif orientation. The goal is to reduce the ambiguity and potential for
+incorrect implementations that can be caused by having a 'black box' codestream
+that only contains numerical pixel data, requiring applications to figure out how
+to correctly interpret the data (i.e. apply color transforms, upsampling,
+orientation, blending, cropping, etc.). By including this functionality in the
+codestream itself, the decoder can provide output in a normalized way
+(e.g. in RGBA, orientation already applied, frames blended and coalesced),
+simplifying things and making it less error-prone for applications.
+
+The remaining metadata, e.g. Exif or XMP, can be stored in the container format,
+but it does not influence image rendering. In the case of Exif orientation,
+this field has to be ignored by applications, since the orientation in the
+codestream always takes precedence (and will already have been applied
+transparently by the decoder). This means that stripping metadata can be done
+without affecting the displayed image.
+
+
+## Codestream Features
+
+### Color Management
+
+In JPEG XL, images always have a fully defined colorspace, i.e. it is always
+unambiguous how to interpret the pixel values. There are two options:
+
+*   Pixel data is in a specified (non-XYB) colorspace, and the decoder will produce
+a pixel buffer in this colorspace plus an ICC profile that describes that
+colorspace. Mathematically lossless encoding can only use this option.
+*   Pixel data is in the XYB colorspace, which is an absolute colorspace.
+In this case, the decoder can produce a pixel buffer directly in a desired
+display space like sRGB, Display-P3 or Rec.2100 PQ.
+
+The image header always contains a colorspace; however, its meaning depends on
+which of the above two options were used:
+
+*   In the first case (non-XYB), the signaled colorspace defines the
+interpretation of the pixel data.
+*   In the second case (XYB), the signaled colorspace is merely a _suggestion_
+of a target colorspace to represent the image in, i.e. it is the colorspace
+the original image was in, that has a sufficiently wide gamut and a
+suitable transfer curve to represent the image data with high fidelity
+using a limited bit depth representation.
+
+Colorspaces can be signaled in two ways in JPEG XL:
+
+*    CICP-style Enum values: This is a very compact representation that
+covers most or all of the common colorspaces. The decoder can convert
+XYB to any of these colorspaces without requiring an external color management
+library.
+*    ICC profiles: Arbitrary ICC profiles can also be used, including
+CMYK ones. The ICC profile data gets compressed. In this case, external
+color management software (e.g. lcms2 or skcms) has to be used for color
+conversions.
+
+### Frames
+
+A JPEG XL codestream contains one or more frames. In the case of animation,
+these frames have a duration and can be looped (infinitely or a number of times).
+Zero-duration frames are possible and represent different layers of the image.
+
+Frames can have a blendmode (Replace, Add, Alpha-blend, Multiply, etc.) and
+they can use any previous frame as a base.
+They can be smaller than the image canvas, in which case the pixels outside the
+crop are copied from the base frame. They can be positioned at an arbitrary
+offset from the image canvas; this offset can also be negative and frames can
+also be larger than the image canvas, in which case parts of the frame will
+be invisible and only the intersection with the image canvas will be shown.
+
+By default, the decoder will blend and coalesce frames, producing only a single
+output frame when there are subsequent zero-duration frames, and all output frames
+are of the same size (the size of the image canvas) and have either no duration
+(in case of a still image) or a non-zero duration (in case of animation).
+
+### Pixel Data
+
+Every frame contains pixel data encoded in one of two modes:
+
+*   VarDCT mode: In this mode, variable-sized DCT transforms are applied
+and the image data is encoded in the form of DCT coefficients. This mode is
+always lossy, but it can also be used to losslessly represent an existing
+(already lossy) JPEG image, in which case only the DCT8x8 is used.
+*   Modular mode: In this mode, only integer arithmetic is used, which
+enables lossless compression. However, this mode can also be used for lossy
+compression. Multiple transformations can be used to improve compression or to
+obtain other desirable effects: reversible color transforms (RCTs),
+(delta) palette transforms, and a modified non-linear Haar transform
+called Squeeze, which facilitates (but does not require) lossy compression
+and enables progressive decoding.
+
+Internally, the VarDCT mode uses Modular sub-bitstreams to encode
+various auxiliary images, such as the "LF image" (a 1:8 downscaled version
+of the image that contains the DC coefficients of DCT8x8 and low-frequency
+coefficients of the larger DCT transforms), extra channels besides the
+three color channels (e.g. alpha), and weights for adaptive quantization.
+
+In addition, both modes can separately encode additional 'image features' that
+are rendered on top of the decoded image:
+
+*   Patches: rectangles from a previously decoded frame (which can be a
+'hidden' frame that is not displayed but only stored to be referenced later)
+can be blended using one of the blendmodes on top of the current frame.
+This allows the encoder to identify repeating patterns (such as letters of
+text) and encode them only once, using patches to insert the pattern in
+multiple spots. These patterns are encoded in a previous frame, making
+it possible to add Modular-encoded pixels to a VarDCT-encoded frame or
+vice versa.
+*   Splines: centripetal Catmull-Rom splines can be encoded, with a color
+and a thickness that can vary along the arclength of the curve.
+Although the current encoder does not use this bitstream feature yet, we
+anticipate that it can be useful to complement DCT-encoded data, since
+thin lines are hard to represent faithfully using the DCT.
+*   Noise: luma-modulated synthetic noise can be added to an image, e.g.
+to emulate photon noise, in a way that avoids poor compression due to
+high frequency DCT coefficients.
+
+Finally, both modes can also optionally apply two filtering methods to
+the decoded image, which both have the goal of reducing block artifacts
+and ringing:
+
+*   Gabor-like transform ('Gaborish'): a small (3x3) blur that gets
+applied across block and group boundaries, reducing blockiness. The
+encoder applies the inverse sharpening transform before encoding,
+effectively getting the benefits of lapped transforms without the
+disadvantages.
+*   Edge-preserving filter ('EPF'): similar to a bilateral filter,
+this smoothing filter avoids blurring edges while reducing ringing.
+The strength of this filter is signaled and can locally be adapted.
+
+### Groups
+
+In both modes (Modular and VarDCT), the frame data is signaled as
+a sequence of groups. These groups can be decoded independently,
+and the frame header contains a table of contents (TOC) with bitstream
+offsets for the start of each group. This enables parallel decoding,
+and also partial decoding of a region of interest or a progressive preview.
+
+In VarDCT mode, all groups have dimensions 256x256 (or smaller at the
+right and bottom borders). First the LF image is encoded, also in
+256x256 groups (corresponding to 2048x2048 pixels, since this data
+corresponds to the 1:8 image). This means there is always a basic
+progressive preview available in VarDCT mode.
+Optionally, the LF image can be encoded separately in a (hidden)
+LF frame, which can itself recursively be encoded in VarDCT mode
+and have its own LF frame. This makes it possible to represent huge
+images while still having an overall preview that can be efficiently
+decoded.
+Then the HF groups are encoded, corresponding to the remaining AC
+coefficients. The HF groups can be encoded in multiple passes for
+more progressive refinement steps; the coefficients of all passes
+are added. Unlike JPEG progressive scan scripts, JPEG XL allows
+signaling any amount of detail in any part of the image in any pass.
+
+In Modular mode, groups can have dimensions 128x128, 256x256, 512x512
+or 1024x1024. If the Squeeze transform was used, the data will
+be split in three parts: the Global groups (the top of the Laplacian
+pyramid that fits in a single group), the LF groups (the middle part
+of the Laplacian pyramid that corresponds to the data needed to
+reconstruct the 1:8 image) and the HF groups (the base of the Laplacian
+pyramid), where the HF groups are again possibly encoded in multiple
+passes (up to three: one for the 1:4 image, one for the 1:2 image,
+and one for the 1:1 image).
+
+In case of a VarDCT image with extra channels (e.g. alpha), the
+VarDCT groups and the Modular groups are interleaved in order to
+allow progressive previews of all the channels.
+
+The default group order is to encode the LF and HF groups in
+scanline order (top to bottom, left to right), but this order
+can be permuted arbitrarily. This allows, for example, a center-first
+ordering or a saliency-based ordering, causing the bitstream
+to prioritize progressive refinements in a different way.
+
+
+## File Format Features
+
+Besides the image data itself (stored in the `jxlc` codestream box),
+the optional container format allows storing additional information.
+
+## Metadata
+
+Three types of metadata can be included in a JPEG XL container:
+
+*   Exif (`Exif`)
+*   XMP (`xml `)
+*   JUMBF (`jumb`)
+
+This metadata can contain information about the image, such as copyright
+notices, GPS coordinates, camera settings, etc.
+If it contains rendering-impacting information (such as Exif orientation),
+the information in the codestream takes precedence.
+
+## Compressed Metadata
+
+The container allows the above metadata to be stored either uncompressed
+(e.g. plaintext XML in the case of XMP) or by Brotli-compression.
+In the latter case, the box type is `brob` (Brotli-compressed Box) and
+the first four bytes of the box contents define the actual box type
+(e.g. `xml `) it represents.
+
+## JPEG Bitstream Reconstruction Data
+
+JPEG XL can losslessly recompress existing JPEG files.
+The general design philosophy still applies in this case:
+all the image data is stored in the codestream box, including the DCT
+coefficients of the original JPEG image and possibly an ICC profile or
+Exif orientation.
+
+In order to allow bit-identical reconstruction of the original JPEG file
+(not just the image but the actual file), additional information is needed,
+since the same image data can be encoded in multiple ways as a JPEG file.
+The `jbrd` box (JPEG Bitstream Reconstruction Data) contains this information.
+Typically it is relatively small. Using the image data from the codestream,
+the JPEG bitstream reconstruction data, and possibly other metadata boxes
+that were present in the JPEG file (Exif/XMP/JUMBF), the exact original
+JPEG file can be reconstructed.
+
+This box is not needed to display a recompressed JPEG image; it is only
+needed to reconstruct the original JPEG file.
+
+## Frame Index
+
+The container can optionally store a `jxli` box, which contains an index
+of offsets to keyframes of a JPEG XL animation. It is not needed to display
+the animation, but it does facilitate efficient seeking.
+
+## Partial Codestream
+
+The codestream can optionally be split into multiple `jxlp` boxes;
+conceptually, this is equivalent to a single `jxlc` box that contains the
+concatenation of all partial codestream boxes.
+This makes it possible to create a file that starts with
+the data needed for a progressive preview of the image, followed by
+metadata, followed by the remaining image data.
diff --git a/third-party/libjxl/libjxl/doc/fuzzing.md b/third-party/libjxl/libjxl/doc/fuzzing.md
new file mode 100644
index 0000000000..af926596f2
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/fuzzing.md
@@ -0,0 +1,184 @@
+# Fuzzing
+
+Fuzzing is a technique to find potential bugs by providing randomly generated
+invalid inputs. To detect potential bugs such as programming errors we use
+fuzzing in combination with ASan (Address Sanitizer), MSan (Memory Sanitizer),
+UBSan (Undefined Behavior Sanitizer) and asserts in the code. An invalid input
+will likely produce a decoding error (some API function returning error), which
+is absolutely not a problem, but what it should not do is access memory out of
+bounds, use uninitialized memory or hit a false assert condition.
+
+## Automated Fuzzing with oss-fuzz
+
+libjxl fuzzing is integrated into [oss-fuzz](https://github.com/google/oss-fuzz)
+as the project `libjxl`. oss-fuzz regularly runs the fuzzers on the `main`
+branch and reports bugs into their bug tracker which remains private until the
+bugs are fixed in main.
+
+## Fuzzer targets
+
+There are several fuzzer executable targets defined in the `tools/` directory
+to fuzz different parts of the code. The main one is `djxl_fuzzer`, which uses
+the public C decoder API to attempt to decode an image. The fuzzer input is not
+directly the .jxl file, the last few bytes of the fuzzer input are used to
+decide *how* will the API be used (if preview is requested, the pixel format
+requested, if the .jxl input data is provided altogether, etc) and the rest of
+the fuzzer input is provided as the .jxl file to the decoder. Some bugs might
+reproduce only if the .jxl input is decoded in certain way.
+
+The remaining fuzzer targets execute a specific portion the codec that might be
+easier to fuzz independently from the whole codec.
+
+## Reproducing fuzzer bugs
+
+A fuzzer target, like `djxl_fuzzer` accepts as a parameter one or more files
+that will be used as inputs. This runs the fuzzer program in test-only mode
+where no new inputs are generated and only the provided files are tested. This
+is the easiest way to reproduce a bug found by the fuzzer using the generated
+test case from the bug report.
+
+oss-fuzz uses a specific compiler version and flags, and it is built using
+Docker. Different compiler versions will have different support for detecting
+certain actions as errors, so we want to reproduce the build from oss-fuzz as
+close as possible. To reproduce the build as generated by oss-fuzz there are a
+few helper commands in `ci.sh` as explained below.
+
+### Generate the gcr.io/oss-fuzz/libjxl image
+
+First you need the ossfuzz libjxl builder image. This is the base oss-fuzz
+builder image with a few dependencies installed. To generate it you need to
+check out the oss-fuzz project and build it:
+
+```bash
+git clone https://github.com/google/oss-fuzz.git ~/oss-fuzz
+cd ~/oss-fuzz
+sudo infra/helper.py build_image libjxl
+```
+
+This will create the `gcr.io/oss-fuzz/libjxl` docker image. You can check if it
+was created verifying that it is listed in the output of the `sudo docker image
+ls` command.
+
+### Build the fuzzer targets with oss-fuzz
+
+To build the fuzzer targets from the current libjxl source checkout, use the
+`./ci.sh ossfuzz_msan` command for MSan, `./ci.sh ossfuzz_asan` command for ASan
+or `./ci.sh ossfuzz_ubsan` command for UBSan. All the `JXL_ASSERT` and
+`JXL_DASSERT` calls are enabled in all the three modes. These ci.sh helpers will
+reproduce the oss-fuzz docker call to build libjxl mounting the current source
+directory into the Docker container. Ideally you will run this command in a
+different build directory separated from your regular builds.
+
+For example, for MSan builds run:
+
+```bash
+BUILD_DIR=build-fuzzmsan ./ci.sh ossfuzz_msan
+```
+
+After this, the fuzzer program will be generated in the build directory like
+for other build modes: `build-fuzzmsan/tools/djxl_fuzzer`.
+
+### Iterating changes with oss-fuzz builds
+
+After modifying the source code to fix the fuzzer-found bug, or to include more
+debug information, you can rebuild only a specific fuzzer target to save on
+rebuilding time and immediately run the test case again. For example, for
+rebuilding and testing only `djxl_fuzzer` in MSan mode we can run:
+
+```bash
+BUILD_DIR=build-fuzzmsan ./ci.sh ossfuzz_msan djxl_fuzzer && build-fuzzmsan/tools/djxl_fuzzer path/to/testcase.bin
+```
+
+When MSan and ASan fuzzers fail they will print a stack trace at the point where
+the error occurred, and some related information. To make these these stack
+traces useful we need to convert the addresses to function names and source file
+names and lines, which is done with the "symbolizer". For UBSan to print a stack
+trace we need to set the `UBSAN_OPTIONS` environment variables when running the
+fuzzer.
+
+Set the following environment variables when testing the fuzzer binaries. Here
+`clang` should match the compiler version used by the container, you can pass a
+different compiler version in the following example by first installing the
+clang package for that version outside the container and using `clang-NN`
+(for example `clang-11`) instead of `clang` in the following commands:
+
+```bash
+symbolizer=$($(realpath $(which clang)) -print-prog-name=llvm-symbolizer)
+export MSAN_SYMBOLIZER_PATH="${symbolizer}"
+export UBSAN_SYMBOLIZER_PATH="${symbolizer}"
+export ASAN_SYMBOLIZER_PATH="${symbolizer}"
+export ASAN_OPTIONS=detect_leaks=1
+export UBSAN_OPTIONS=print_stacktrace=1
+```
+
+Note: The symbolizer binary must be a program called `llvm-symbolizer`, any
+other file name will fail. There are normally symlinks already installed with
+the right name which the `-print-prog-name` would print.
+
+## Running the fuzzers locally
+
+Running the fuzzer targets in fuzzing mode can be achieved by running them with
+no parameters, or better with a parameter with the path to a *directory*
+containing a seed of files to use as a starting point. Note that passing a
+directory is considered a corpus to use for fuzzing while passing a file is
+considered an input to evaluate. Multi-process fuzzing is also supported. For
+details about all the fuzzing options run:
+
+```bash
+build-fuzzmsan/tools/djxl_fuzzer -help=1
+```
+
+## Writing fuzzer-friendly code
+
+Fuzzing on itself can't find programming bugs unless an input makes the program
+perform an invalid operation (read/write out of bounds, perform an undefined
+behavior operation, etc). You can help the fuzzer find invalid situations by
+adding asserts:
+
+ * `JXL_ASSERT()` is enabled in Release mode by default. It can be disabled
+   with `-DJXL_ENABLE_ASSERT=0` but the intention is that it will run for all
+   the users in released code. If performance of the check is not an issue (like
+   checks done once per image, once per channel, once per group, etc) a
+   JXL_ASSERT is appropriate. A failed assert is preferable to an out of bounds
+   write.
+
+ * `JXL_DASSERT()` is only enabled in Debug builds, which includes all the ASan,
+   MSan and UBSan builds. Performance of these checks is not an issue if kept
+   within reasonable limits (automated msan/asan test should finish withing 1
+   hour for example). Fuzzing is more effective when the given input runs
+   faster, so keep that in mind when adding a complex DASSERT that runs multiple
+   times per output pixel.
+
+ * For MSan builds it is also possible to specify that certain values must be
+   initialized. This is automatic for values that are used to make decisions
+   (like when used in an `if` statement or in the ternary operator condition)
+   but those checks can be made explicit for image data using the
+   `JXL_CHECK_IMAGE_INITIALIZED(image, rect)` macro. This helps document and
+   check (only in MSan builds) that a given portion of the image is expected to
+   be initialized, allowing to catch errors earlier in the process.
+
+## Dealing with use-of-uninitialized memory
+
+In MSan builds it is considered an error to *use* uninitialized memory. Using
+the memory normally requires something like a decision / branch based on the
+uninitialized value, just running `memcpy()` or simple arithmetic over
+uninitialized memory is not a problem. Notably, computing `DemoteTo()`,
+`NearestInt()` or similar expressions that create a branch based on the value of
+the uninitialized memory will trigger an MSan error.
+
+In libjxl we often run vectorized operations over a series of values, rounding
+up to the next multiple of a vector size, thus operating over uninitialized
+values past the end of the requested region. These values are part of the image
+padding but are not initialized. This behavior would not create an MSan error
+unless the processing includes operations like `NearestInt()`. For such cases
+the preferred solution is to use `msan::UnpoisonMemory` over the portion of
+memory of the last SIMD vector before processing, and then running
+`msan::PoisonMemory` over the corresponding value in the output side. A note
+including why this is safe to do must be added, for example if the processing
+doesn't involve any cross-lane computation.
+
+Initializing padding memory in MSan builds is discouraged because it may hide
+bugs in functions that weren't supposed to read from the padding. Initializing
+padding memory in all builds, including Release builds, would mitigate the
+MSan potential security issue but it would hide the logic bug for a longer time
+and potentially incur in a performance hit.
diff --git a/third-party/libjxl/libjxl/doc/jxl.svg b/third-party/libjxl/libjxl/doc/jxl.svg
new file mode 100644
index 0000000000..a80778b0b7
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/jxl.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="891.6" height="836.9" viewBox="0 0 891.6 836.9" overflow="visible"><style>.st0{fill:#5fb4b1}</style><path class="st0" d="M495.7 420.6C533 348.3 570.2 276 607.4 203.7H503.8c-24 46.6-48.1 93.3-72.1 139.9-38.8-46.6-77.6-93.3-116.4-139.9H211.7L392 420.6c-36.5 70.8-73 141.7-109.5 212.5h103.6c23.3-45.2 46.6-90.4 69.8-135.5 37.6 45.2 75.1 90.4 112.7 135.5h103.6c-58.7-70.8-117.6-141.6-176.5-212.5zM153 625.6l.3 2.3.7 2.6c3.8 15.1 8.9 59.5-12 86.3-6.2 8-14.8 14.5-25.6 19.3L53.9 836.9c36.9 0 69.4-5.8 96.5-17.4 25.9-11 47.2-27.2 63.2-48.1 22.2-28.9 33.9-66.6 33.8-109.1 0-24.8-4-44.6-5.7-52L200.8 337h.1v-90.2H0V337h109.8L153 625.6zM738.5 211.2l-.3-2.3-.7-2.6c-3.8-15.1-8.9-59.5 12-86.3 6.2-8 14.8-14.5 25.6-19.3L837.6 0c-36.9 0-69.4 5.8-96.5 17.4-25.9 11-47.2 27.2-63.2 48.1-22.2 28.9-33.9 66.6-33.8 109.1 0 24.8 4 44.6 5.7 52l40.9 273.3h-.1v90.2h200.9v-90.2H781.7l-43.2-288.7z"/><path class="st0" d="M153 625.6l.3 2.3.7 2.6c3.8 15.1 8.9 59.5-12 86.3-6.2 8-14.8 14.5-25.6 19.3L53.9 836.9c36.9 0 69.4-5.8 96.5-17.4 25.9-11 47.2-27.2 63.2-48.1 22.2-28.9 33.9-66.6 33.8-109.1 0-24.8-4-44.6-5.7-52L200.8 337h.1v-90.2H0V337h109.8L153 625.6z"/></svg>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/doc/man/cjxl.txt b/third-party/libjxl/libjxl/doc/man/cjxl.txt
new file mode 100644
index 0000000000..261742a689
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/man/cjxl.txt
@@ -0,0 +1,102 @@
+cjxl(1)
+=======
+:doctype: manpage
+
+Name
+----
+
+cjxl - compress images to JPEG XL
+
+Synopsis
+--------
+
+*cjxl* ['options'...] 'input' ['output.jxl']
+
+Description
+-----------
+
+`cjxl` compresses an image or animation to the JPEG XL format. It is intended to
+spare users the trouble of determining a set of optimal parameters for each
+individual image. Instead, for a given target quality, it should provide
+consistent visual results across various kinds of images. The defaults have been
+chosen to be sensible, so that the following commands should give satisfactory
+results in most cases:
+
+----
+cjxl input.png output.jxl
+cjxl input.jpg output.jxl
+cjxl input.gif output.jxl
+----
+
+Options
+-------
+
+-h::
+--help::
+    Displays the options that `cjxl` supports. On its own, it will only show
+    basic options. It can be combined with `-v` or `-v -v` to show increasingly
+    advanced options as well.
+
+-v::
+--verbose::
+    Increases verbosity. Can be repeated to increase it further, and also
+    applies to `--help`.
+
+-d 'distance'::
+--distance='distance'::
+    The preferred way to specify quality. It is specified in multiples of a
+    just-noticeable difference. That is, `-d 0` is mathematically lossless,
+    `-d 1` should be visually lossless, and higher distances yield denser and
+    denser files with lower and lower fidelity. Lossy sources such as JPEG and
+    GIF files are compressed losslessly by default, and in the case of JPEG
+    files specifically, the original JPEG can then be reconstructed bit-for-bit.
+    For lossless sources, `-d 1` is the default.
+
+-q 'quality'::
+--quality='quality'::
+    Alternative way to indicate the desired quality. 100 is lossless and lower
+    values yield smaller files. There is no lower bound to this quality
+    parameter, but positive values should approximately match the quality
+    setting of libjpeg.
+
+-e 'effort'::
+--effort='effort'::
+    Controls the amount of effort that goes into producing an ``optimal'' file
+    in terms of quality/size. That is to say, all other parameters being equal,
+    a higher effort should yield a file that is at least as dense and possibly
+    denser, and with at least as high and possibly higher quality.
++
+Recognized effort settings, from fastest to slowest, are:
++
+- 1 or ``lightning''
+- 2 or ``thunder''
+- 3 or ``falcon''
+- 4 or ``cheetah''
+- 5 or ``hare''
+- 6 or ``wombat''
+- 7 or ``squirrel'' (default)
+- 8 or ``kitten''
+- 9 or ``tortoise''
+
+Examples
+--------
+
+----
+# Compress a PNG file to a high-quality JPEG XL version.
+$ cjxl input.png output.jxl
+
+# Compress it at a slightly lower quality, appropriate for web use.
+$ cjxl -d 2 input.png output.jxl
+
+# Compress it losslessly. These are equivalent.
+$ cjxl -d 0   input.png lossless.jxl
+$ cjxl -q 100 input.png lossless.jxl
+
+# Compress a JPEG file losslessly.
+$ cjxl input.jpeg lossless-jpeg.jxl
+----
+
+See also
+--------
+
+*djxl*(1)
diff --git a/third-party/libjxl/libjxl/doc/man/djxl.txt b/third-party/libjxl/libjxl/doc/man/djxl.txt
new file mode 100644
index 0000000000..bd57b4420e
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/man/djxl.txt
@@ -0,0 +1,61 @@
+djxl(1)
+=======
+:doctype: manpage
+
+Name
+----
+
+djxl - decompress JPEG XL images
+
+Synopsis
+--------
+
+*djxl* ['options'...] 'input.jxl' ['output']
+
+Description
+-----------
+
+`djxl` decompresses a JPEG XL image or animation. The output format is determined
+by the extension of the output file, which can be `.png`, `.jpg`, `.ppm`, `.pfm`.
+If the JPEG XL input file contains an animation, multiple output files will be
+produced, with names of the form "'output'-*framenumber*.ext".
+
+
+Options
+-------
+
+-h::
+--help::
+    Displays the options that `djxl` supports.
+
+-j::
+--pixels_to_jpeg::
+    By default, if the input JPEG XL contains a recompressed JPEG file,
+    djxl reconstructs the exact original JPEG file if the output file has the
+    `.jpg` (or `.jpeg`) filename extension.
+    This flag causes the decoder to instead decode the image to pixels and
+    encode a new (lossy) JPEG in this case.
+
+
+-q 'quality'::
+--jpeg_quality='quality'::
+    When decoding to `.jpg`, use this output quality. This option implicitly
+    enables the --pixels_to_jpeg option.
+
+
+Examples
+--------
+
+----
+# Decompress a JPEG XL file to PNG
+$ djxl input.jxl output.png
+
+# Reconstruct a losslessly-recompressed JPEG file
+$ djxl lossless-jpeg.jxl reconstructed.jpeg
+----
+
+
+See also
+--------
+
+*cjxl*(1)
diff --git a/third-party/libjxl/libjxl/doc/release.md b/third-party/libjxl/libjxl/doc/release.md
new file mode 100644
index 0000000000..5fb042eb7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/release.md
@@ -0,0 +1,314 @@
+# libjxl release process
+
+This guide documents the release process for the libjxl project.
+
+libjxl follows the [semantic versioning](https://semver.org/spec/v2.0.0.html)
+specification for released versions. Releases are distributed as tags in the git
+repository with the semantic version prefixed by the letter "v". For example,
+release version "0.3.7" will have a git tag "v0.3.7".
+
+The public API is explicitly defined as C headers in the `lib/include`
+directory, normally installed in your include path. All other headers are
+internal API and are not covered by the versioning rules.
+
+## Development and release workflow
+
+New code development is performed on the `main` branch of the git repository.
+Pre-submit checks enforce minimum build and test requirements for new patches
+that balance impact and test latency, but not all checks are performed before
+pull requests are merged. Several slower checks only run *after* the code has
+been merged to `main`, resulting in some errors being detected hours after the
+code is merged or even days after in the case of fuzzer-detected bugs.
+
+Release tags are cut from *release branches*. Each MAJOR.MINOR version has its
+own release branch, for example releases `0.7.0`, `0.7.1`, `0.7.2`, ... would
+have tags `v0.7.0`, `v0.7.1`, `v0.7.2`, ... on commits from the `v0.7.x` branch.
+`v0.7.x` is a branch name, not a tag name, and doesn't represent a released
+version since semantic versioning requires that the PATCH is a non-negative
+number. Released tags don't each one have their own release branch, all releases
+from the same MAJOR.MINOR version will share the same branch. The first commit
+after the branch-off points between the main branch and the release branch
+should be tagged with the suffix `-snapshot` and the name of the next
+MAJOR.MINOR version, in order to get meaningful output for `git describe`.
+
+The main purpose of the release branch is to stabilize the code before a
+release. This involves including fixes to existing bugs but **not** including
+new features. New features often come with new bugs which take time to fix, so
+having a release branch allows us to cherry-pick *bug fixes* from the `main`
+branch into the release branch without including the new *features* from `main`.
+For this reason it is important to make small commits in `main` and separate bug
+fixes from new features.
+
+After the initial minor release (`MAJOR.MINOR.PATCH`, for example `0.5.0`) the
+release branch is used to continue to cherry-pick fixes to be included in a
+patch release, for example a version `0.5.1` release. Patch fixes are only meant
+to fix security bugs or other critical bugs that can't wait until the next major
+or minor release.
+
+Release branches *may* continue to be maintained even after the next minor or
+major version has been released to support users that can't update to a newer
+minor release. In that case, the same process applies to all the maintained
+release branches.
+
+A release branch with specific cherry-picks from `main` means that the release
+code is actually a version of the code that never existed in the `main` branch,
+so it needs to be tested independently. Pre-submit and post-submit tests run on
+release branches (branches matching `v*.*.x`) but extra manual checks should be
+performed before a release, specially if multiple bug fixes interact with each
+other. Take this into account when selecting which commits to include in a
+release. The objective is to have a stable version that can be used without
+problems for months. Having the latest improvements at the time the release tag
+is created is a non-goal.
+
+## Creating a release branch
+
+A new release branch is needed before creating a new major or minor release,
+that is, a new release where the MAJOR or MINOR numbers are increased. Patch
+releases, where only the PATCH number is increased, reuse the branch from the
+previous release of the same MAJOR and MINOR numbers.
+
+The following instructions assume that you followed the recommended [libjxl git
+setup](developing_in_github.md) where `origin` points to the upstream
+libjxl/libjxl project, otherwise use the name of your upstream remote repository
+instead of `origin`.
+
+The release branch is normally created from the latest work in `main` at the
+time the branch is created, but it is possible to create the branch from an
+older commit if the current `main` is particularly unstable or includes commits
+that were not intended to be included in the release. The following example
+creates the branch `v0.5.x` from the latest commit in main (`origin/main`), if a
+different commit is to be used then replace `origin/main` with the SHA of that
+commit. Change the `v0.5.x` branch name to the one you are creating.
+
+```bash
+git fetch origin main
+git push git@github.com:libjxl/libjxl.git origin/main:refs/heads/v0.5.x
+```
+
+Here we use the SSH URL explicitly since you are pushing to the `libjxl/libjxl`
+project directly to a branch there. If you followed the guide `origin` will have
+the HTTPS URL which wouldn't normally let you push since you wouldn't be
+authenticated. The `v*.*.x` branches are [GitHub protected
+branches](https://docs.github.com/en/github/administering-a-repository/defining-the-mergeability-of-pull-requests/about-protected-branches)
+in our repository, however you can push to a protected branch when *creating* it
+but you can't directly push to it after it is created. To include more changes
+in the release branch see the "Cherry-picking fixes to a release" section below.
+
+## Creating a merge label
+
+We use GitHub labels in Pull Requests to keep track of the changes that should
+be merged into a given release branch. For this purpose create a new label for
+each new MAJOR.MINOR release branch called `merge-MAJOR.MINOR`, for example,
+`merge-0.5`.
+
+In the [edit labels](https://github.com/libjxl/libjxl/issues/labels) page, click
+on "New label" and create the label. Pick your favorite color.
+
+Labels are a GitHub-only concept and are not represented in git. You can add the
+label to a Pull Request even after it was merged, whenever it is decided that
+the Pull Request should be included in the given release branch. Adding the
+label doesn't automatically merge it to the release branch.
+
+## Update the versioning number
+
+The version number (as returned by `JxlDecoderVersion`) in the source code in
+`main` must match the semantic versioning of a release. After the release
+branch is created the code in `main` will only be included in the next major
+or minor release. Right after a release branch update the version targeting the
+next release. Artifacts from `main` should include the new (unreleased) version,
+so it is important to update it. For example, after the `v0.5.x` branch is
+created from main, you should update the version on `main` to `0.6.0`.
+
+To help update it, run this helper command (in a Debian-based system):
+
+```bash
+./ci.sh bump_version 0.6.0
+```
+
+This will update the version in the following files:
+
+ * `lib/CMakeLists.txt`
+ * `lib/lib.gni`, automatically updated with
+   `tools/scripts/build_cleaner.py --update`.
+ * `debian/changelog` to create the Debian package release with the new version.
+   Debian changelog shouldn't repeat the library changelog, instead it should
+   include changes to the packaging scripts.
+ * `.github/workflows/conformance.yml`
+
+If there were incompatible API/ABI changes, make sure to also adapt the
+corresponding section in
+[CMakeLists.txt](https://github.com/libjxl/libjxl/blob/main/lib/CMakeLists.txt#L12).
+
+## Cherry-pick fixes to a release
+
+After a Pull Request that should be included in a release branch has been merged
+to `main` it can be cherry-picked to the release branch. Before cherry-picking a
+change to a release branch it is important to check that it doesn't introduce
+more problems, in particular it should run for some time in `main` to make sure
+post-submit tests and the fuzzers run on it. Waiting for a day is a good idea.
+
+Most of the testing is done on the `main` branch, so be careful with what
+commits are cherry-picked to a branch. Refactoring code is often not a good
+candidate to cherry-pick.
+
+To cherry-pick a single commit to a release branch (in this example to `v0.5.x`)
+you can run:
+
+```bash
+git fetch origin
+git checkout origin/v0.5.x -b merge_to_release
+git cherry-pick -x SHA_OF_MAIN_COMMIT
+# -x will annotate the cherry-pick with the original SHA_OF_MAIN_COMMIT value.
+# If not already mentioned in the original commit, add the original PR number to
+# the commit, for example add "(cherry picked from PR #NNNN)".
+git commit --amend
+```
+
+The `SHA_OF_MAIN_COMMIT` is the hash of the commit as it landed in main. Use
+`git log origin/main` to list the recent main commits and their hashes.
+
+Making sure that the commit message on the cherry-picked commit contains a
+reference to the original pull request (like `#NNNN`) is important. It creates
+an automatic comment in the original pull request notifying that it was
+mentioned in another commit, helping keep track of the merged pull requests. If
+the original commit was merged with the "Squash and merge" policy it will
+automatically contain the pull request number on the first line, if this is not
+the case you can amend the commit message of the cherry-pick to include a
+reference.
+
+Multiple commits can be cherry-picked and tested at once to save time. Continue
+running `git cherry-pick` and `git commit --amend` multiple times for all the
+commits you need to cherry-pick, ideally in the same order they were merged on
+the `main` branch. At the end you will have a local branch with multiple commits
+on top of the release branch.
+
+To update the version number, for example from v0.8.0 to v0.8.1 run this helper
+command (in a Debian-based system):
+
+```bash
+./ci.sh bump_version 0.8.1
+```
+
+as described above and commit the changes.
+
+Finally, upload your changes to *your fork* like normal, except that when
+creating a pull request select the desired release branch as a target:
+
+```bash
+git push myfork merge_to_release
+```
+
+If you used the [guide](developing_in_github.md) `myfork` would be `origin` in
+that example. Click on the URL displayed, which will be something like
+
+  `https://github.com/mygithubusername/libjxl/pull/new/merge_to_release`
+
+In the "Open a pull request" page, change the drop-down base branch from
+"base: main" (the default) to the release branch you are targeting.
+
+The pull request approval and pre-submit rules apply as with normal pull
+requests to the `main` branch.
+
+**Important:** When merging multiple cherry-picks use "Rebase and merge" policy,
+not the squash one since otherwise you would discard the individual commit
+message references from the git history in the release branch.
+
+## Publishing a release
+
+Once a release tag is created it must not be modified, so you need to prepare
+the changes before creating the release. Make sure you checked the following:
+
+ * The semantic version number in the release branch (see `lib/CMakeLists.txt`)
+   matches the number you intend to release, all three MAJOR, MINOR and PATCH
+   should match. Otherwise send a pull request to the release branch to
+   update them.
+
+ * The GitHub Actions checks pass on the release branch. Look for the green
+   tick next to the last commit on the release branch. This should be visible
+   on the branch page, for example: https://github.com/libjxl/libjxl/tree/v0.5.x
+
+ * There no open fuzzer-found bugs for the release branch. The most effective
+   way is to [run the fuzzer](fuzzing.md) on the release branch for a while. You
+   can seed the fuzzer with corpus generated by oss-fuzz by [downloading
+   it](https://google.github.io/oss-fuzz/advanced-topics/corpora/#downloading-the-corpus),
+   for example `djxl_fuzzer` with libFuzzer will use:
+   gs://libjxl-corpus.clusterfuzz-external.appspot.com/libFuzzer/libjxl_djxl_fuzzer
+
+ * Manually check that images encode/decode ok.
+
+ * Manually check that downstream projects compile with our code. Sometimes
+   bugs on build scripts are only detected when other projects try to use our
+   library. For example, test compiling
+   [imagemagick](https://github.com/ImageMagick/ImageMagick) and Chrome.
+
+A [GitHub
+"release"](https://docs.github.com/en/github/administering-a-repository/releasing-projects-on-github/about-releases)
+consists of two different concepts:
+
+ * a git "tag": this is a name (`v` plus the semantic version number) with a
+   commit hash associated, defined in the git repository. Most external projects
+   will use git tags or HTTP URLs to these tags to fetch the code.
+
+ * a GitHub "release": this is a GitHub-only concept and is not represented in
+   git other than by having a git tag associated with the release. A GitHub
+   release has a given source code commit SHA associated (through the tag) but
+   it *also* contains release notes and optional binary files attached to the
+   release.
+
+Releases from the older GitLab repository only have a git tag in GitHub, while
+newer releases have both a git tag and a release entry in GitHub.
+
+To publish a release open the [New Release
+page](https://github.com/libjxl/libjxl/releases/new) and follow these
+instructions:
+
+ * Set the "Tag version" as "v" plus the semantic version number.
+
+ * Select the "Target" as your release branch. For example for a "v0.7.1"
+   release tag you should use the "v0.7.x" branch.
+
+ * Use the version number as the release title.
+
+ * Copy-paste the relevant section of the [CHANGELOG.md](../CHANGELOG.md) to the
+   release notes into the release notes. Add any other information pertaining
+   the release itself that are not included in the CHANGELOG.md, although prefer
+   to include those in the CHANGELOG.md file. You can switch to the Preview tab
+   to see the results.
+
+ * Finally click "Publish release" and go celebrate with the team. 🎉
+
+ * Make sure to manually push the commit of the release also to https://gitlab.com/wg1/jpeg-xl.
+
+### How to build downstream projects
+
+```bash
+docker run -it debian:bullseye /bin/bash
+
+apt update
+apt install -y clang cmake git libbrotli-dev nasm pkg-config ninja-build
+export CC=clang
+export CXX=clang++
+
+git clone --recurse-submodules --depth 1 -b v0.7.x \
+  https://github.com/libjxl/libjxl.git
+git clone --recurse-submodules --depth 1 \
+  https://github.com/ImageMagick/ImageMagick.git
+git clone --recurse-submodules --depth 1 \
+  https://github.com/FFmpeg/FFmpeg.git
+
+cd ~/libjxl
+git checkout v0.7.x
+cmake -B build -G Ninja .
+cmake --build build
+cmake --install build
+
+cd ~/ImageMagick
+./configure --with-jxl=yes
+# check for "JPEG XL --with-jxl=yes yes"
+make -j 80
+
+cd ~/FFmpeg
+./configure --enable-libjxl
+# check for libjxl decoder/encoder support
+make -j 80
+```
diff --git a/third-party/libjxl/libjxl/doc/software_support.md b/third-party/libjxl/libjxl/doc/software_support.md
new file mode 100644
index 0000000000..62e2a27c0a
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/software_support.md
@@ -0,0 +1,75 @@
+# JPEG XL software support
+
+This document attempts to keep track of software that is using libjxl to support JPEG XL.
+This list serves several purposes:
+
+- thank/acknowledge other projects for integrating jxl support
+- point end-users to software that can read/write jxl
+- keep track of the adoption status of jxl
+- in case of a (security) bug in libjxl, it's easier to see who might be affected and check if they are updated (in case they use static linking)
+
+Please add missing software to this list.
+
+## Browsers
+
+- Chromium: behind a flag from version 91 to 109, [tracking bug](https://bugs.chromium.org/p/chromium/issues/detail?id=1178058)
+- Firefox: behind a flag since version 90, [tracking bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1539075)
+- Safari: supported since version 17 beta [release notes](https://developer.apple.com/documentation/safari-release-notes/safari-17-release-notes), [tracking bug](https://bugs.webkit.org/show_bug.cgi?id=208235)
+- Edge: behind a flag since version 91, start with `.\msedge.exe --enable-features=JXL`
+- Opera: behind a flag since version 77.
+- Basilisk: supported since version v2023.01.07, [release notes](https://www.basilisk-browser.org/releasenotes.shtml)
+- Pale Moon: supported since version 31.4.0, [release notes](https://www.palemoon.org/releasenotes-archived.shtml#v31.4.0)
+- Waterfox: [enabled by default](https://github.com/WaterfoxCo/Waterfox/pull/2936)
+
+For all browsers and to track browsers progress see [Can I Use](https://caniuse.com/jpegxl).
+
+## Image libraries
+
+- [ImageMagick](https://imagemagick.org/): supported since 7.0.10-54
+- [libvips](https://libvips.github.io/libvips/): supported since 8.11
+- [Imlib2](https://github.com/alistair7/imlib2-jxl)
+- [FFmpeg](https://github.com/FFmpeg/FFmpeg/search?q=jpeg-xl&type=commits)
+- [GDAL](https://gdal.org/drivers/raster/jpegxl.html): supported since 3.4.0 as a TIFF codec, and 3.6.0 as standalone format
+- [GraphicsMagick](http://www.graphicsmagick.org/NEWS.html#march-26-2022): supported since 1.3.38
+
+## OS-level support / UI frameworks / file browser plugins
+
+- Qt / KDE: [plugin available](https://github.com/novomesk/qt-jpegxl-image-plugin)
+- GDK-pixbuf: plugin available in libjxl repo
+- [gThumb](https://ubuntuhandbook.org/index.php/2021/04/gthumb-3-11-3-adds-jpeg-xl-support/)
+- [MacOS viewer/QuickLook plugin](https://github.com/yllan/JXLook)
+- [Windows Imaging Component](https://github.com/mirillis/jpegxl-wic)
+- [Windows thumbnail handler](https://github.com/saschanaz/jxl-winthumb)
+- [OpenMandriva Lx (since 4.3 RC)](https://www.openmandriva.org/en/news/article/openmandriva-lx-4-3-rc-available-for-testing)
+- [KaOS (since 2021.06)](https://news.itsfoss.com/kaos-2021-06-release/)
+- [EFL (since 1.27, no external plugin needed)](https://www.enlightenment.org)
+
+## Image editors
+
+- [Adobe Camera Raw (since version 15)](https://helpx.adobe.com/camera-raw/using/hdr-output.html)
+- [Affinity (since V2)](https://affinity.serif.com/en-gb/whats-new/)
+- [darktable (since 4.2)](https://github.com/darktable-org/darktable/releases/tag/release-4.2.0)
+- [GIMP (since 2.99.8)](https://www.gimp.org/news/2021/10/20/gimp-2-99-8-released/); plugin for older versions available in libjxl repo
+- [Graphic Converter (since 11.5)](https://www.lemkesoft.de/en/products/graphicconverter/)
+- [Krita](https://invent.kde.org/graphics/krita/-/commit/13e5d2e5b9f0eac5c8064b7767f0b62264a0797b)
+- [Paint.NET](https://www.getpaint.net/index.html); supported since 4.3.12 - requires a [plugin](https://github.com/0xC0000054/pdn-jpegxl) to be downloaded and installed.
+- Photoshop: no plugin available yet, no official support yet
+
+## Image viewers
+
+- [XnView](https://www.xnview.com/en/)
+- [ImageGlass](https://imageglass.org/)
+- [IrfanView](https://www.irfanview.com/); supported since 4.59 - requires a [plugin](https://www.irfanview.com/plugins.htm) to be downloaded and enabled.
+- [Tachiyomi](https://github.com/tachiyomiorg/tachiyomi/releases/tag/v0.12.1)
+- Any viewer based on Qt, KDE, GDK-pixbuf, EFL, ImageMagick, libvips or imlib2 (see above)
+  - Qt viewers: gwenview, digiKam, KolourPaint, KPhotoAlbum, LXImage-Qt, qimgv, qView, nomacs, VookiImageViewer, PhotoQt
+  - GTK viewers: Eye of Gnome (eog), gThumb, Geeqie
+  - EFL viewers: entice, ephoto
+- [Swayimg](https://github.com/artemsen/swayimg)
+
+## Online tools
+
+- [Squoosh](https://squoosh.app/)
+- [Cloudinary](https://cloudinary.com/blog/cloudinary_supports_jpeg_xl)
+- [MConverter](https://mconverter.eu/)
+- [jpegxl.io](https://jpegxl.io/)
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api.rst b/third-party/libjxl/libjxl/doc/sphinx/api.rst
new file mode 100644
index 0000000000..56fca09e25
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api.rst
@@ -0,0 +1,15 @@
+API reference
+=============
+
+``libjxl`` exposes a C API for encoding and decoding JPEG XL files with some
+C++ header-only helpers for C++ users.
+
+.. toctree::
+   :caption: API REFERENCE
+   :maxdepth: 2
+
+   api_decoder
+   api_encoder
+   api_common
+   api_butteraugli
+   api_threads
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst b/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst
new file mode 100644
index 0000000000..4aae44a991
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst
@@ -0,0 +1,6 @@
+Butteraugli API - ``jxl/butteraugli.h``
+=======================================
+
+.. doxygengroup:: libjxl_butteraugli
+   :members:
+   :private-members:
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_common.rst b/third-party/libjxl/libjxl/doc/sphinx/api_common.rst
new file mode 100644
index 0000000000..7114b51cd5
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api_common.rst
@@ -0,0 +1,6 @@
+Common API concepts
+===================
+
+.. doxygengroup:: libjxl_common
+   :members:
+   :private-members:
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst b/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst
new file mode 100644
index 0000000000..3f8db228d4
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst
@@ -0,0 +1,6 @@
+Decoder API - ``jxl/decode.h``
+==============================
+
+.. doxygengroup:: libjxl_decoder
+   :members:
+   :private-members:
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst b/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst
new file mode 100644
index 0000000000..0c76cc8891
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst
@@ -0,0 +1,6 @@
+Encoder API - ``jxl/encode.h``
+==============================
+
+.. doxygengroup:: libjxl_encoder
+   :members:
+   :private-members:
diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst b/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst
new file mode 100644
index 0000000000..78dba657df
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst
@@ -0,0 +1,6 @@
+Multi-threaded Encoder/Decoder
+==============================
+
+.. doxygengroup:: libjxl_threads
+   :members:
+   :private-members:
diff --git a/third-party/libjxl/libjxl/doc/sphinx/conf.py b/third-party/libjxl/libjxl/doc/sphinx/conf.py
new file mode 100644
index 0000000000..1591aefc70
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/conf.py
@@ -0,0 +1,110 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Configuration file for the Sphinx documentation builder.
+#
+# See https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+import os
+import re
+import subprocess
+
+def GetVersion():
+    """Function to get the version of the current code."""
+    with open(os.path.join(
+            os.path.dirname(__file__), '../../lib/CMakeLists.txt'), 'r') as f:
+        cmakevars = {}
+        for line in f:
+            m = re.match(r'set\(JPEGXL_([A-Z]+)_VERSION ([^\)]+)\)', line)
+            if m:
+                cmakevars[m.group(1)] = m.group(2)
+    return '%s.%s.%s' % (cmakevars['MAJOR'], cmakevars['MINOR'], cmakevars['PATCH'])
+
+def ConfigProject(app, config):
+    # Configure the doxygen xml directory as the "xml" directory next to the
+    # sphinx output directory. Doxygen generates by default the xml files in a
+    # "xml" sub-directory of the OUTPUT_DIRECTORY.
+    build_dir = os.path.dirname(app.outdir)
+    xml_dir = os.path.join(build_dir, 'xml')
+    config.breathe_projects['libjxl'] = xml_dir
+
+    # Read the docs build environment doesn't run our cmake script so instead we
+    # need to run doxygen manually here.
+    if os.environ.get('READTHEDOCS', None) != 'True':
+        return
+    root_dir = os.path.realpath(os.path.join(app.srcdir, '../../'))
+    doxyfile = os.path.join(build_dir, 'Doxyfile-rtd.doc')
+    with open(doxyfile, 'w') as f:
+        f.write(f"""
+FILE_PATTERNS          = *.c *.h
+GENERATE_HTML          = NO
+GENERATE_LATEX         = NO
+GENERATE_XML           = YES
+INPUT                  = lib/include doc/api.txt
+OUTPUT_DIRECTORY       = {build_dir}
+PROJECT_NAME           = LIBJXL
+QUIET                  = YES
+RECURSIVE              = YES
+STRIP_FROM_PATH        = lib/include
+WARN_AS_ERROR          = YES
+""")
+    subprocess.check_call(['doxygen', doxyfile], cwd=root_dir)
+
+def setup(app):
+    # Generate doxygen XML on init when running from Read the docs.
+    app.connect("config-inited", ConfigProject)
+
+### Project information
+
+project = 'libjxl'
+project_copyright = 'JPEG XL Project Authors'
+author = 'JPEG XL Project Authors'
+version = GetVersion()
+
+### General configuration
+
+extensions = [
+    # For integration with doxygen documentation.
+    'breathe',
+    # sphinx readthedocs theme.
+    'sphinx_rtd_theme',
+    # Do we use it?
+    'sphinx.ext.graphviz',
+]
+
+breathe_default_project = 'libjxl'
+breathe_projects = {}
+
+
+# All the API is in C, except those files that end with cxx.h.
+breathe_domain_by_extension = {'h': 'cpp'}
+breathe_domain_by_file_pattern = {
+    '*cxx.h': 'cpp',
+}
+breathe_implementation_filename_extensions = ['.cc']
+
+# These are defined at build time by cmake.
+c_id_attributes = [
+    'JXL_EXPORT',
+    'JXL_DEPRECATED',
+    'JXL_THREADS_EXPORT',
+]
+cpp_id_attributes = c_id_attributes
+
+
+breathe_projects_source = {
+    'libjxl' : ('../../', [
+        'doc/api.txt',
+        'lib/include/jxl',
+    ])
+}
+
+# Recognized suffixes.
+source_suffix = ['.rst', '.md']
+
+### Options for HTML output
+
+# Use the readthedocs.io theme when generating the HTML output.
+html_theme = 'sphinx_rtd_theme'
diff --git a/third-party/libjxl/libjxl/doc/sphinx/index.rst b/third-party/libjxl/libjxl/doc/sphinx/index.rst
new file mode 100644
index 0000000000..9a57074b0b
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/index.rst
@@ -0,0 +1,18 @@
+.. libjxl sphinx documentation entrypoint
+
+JPEG XL image format reference implementation
+=============================================
+
+.. toctree::
+   :maxdepth: 3
+   :caption: Contents:
+
+   api
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
+
diff --git a/third-party/libjxl/libjxl/doc/sphinx/requirements.txt b/third-party/libjxl/libjxl/doc/sphinx/requirements.txt
new file mode 100644
index 0000000000..28179eafa2
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/sphinx/requirements.txt
@@ -0,0 +1,3 @@
+breathe
+sphinx
+sphinx-rtd-theme
diff --git a/third-party/libjxl/libjxl/doc/vuln_playbook.md b/third-party/libjxl/libjxl/doc/vuln_playbook.md
new file mode 100644
index 0000000000..1326d70a9e
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/vuln_playbook.md
@@ -0,0 +1,245 @@
+# Security Vulnerabilities Playbook
+
+## Reporting security bugs
+
+Report security bugs by emailing libjxl-security@google.com.
+
+Don't open a GitHub issue, don't discuss it public forums like Discord and don't
+send a Pull Request if you think you have found a security bug.
+
+## Overview
+
+This document outlines the guidelines followed by the project when handling
+security bugs, their fixes, disclosure and coordination with security
+researchers. For more context about this guide, read the [coordinated
+vulnerability disclosure
+guidelines](https://github.com/google/oss-vulnerability-guide/blob/main/guide.md)
+from Google Open Source Programs Office.
+
+The main target audience of this guide is the coordinator from the libjxl
+Vulnerability Management Team (VMT) handling the requests, however it is useful
+for other people to understand what to expect from this process.
+
+Members of the VMT monitor the reports received by email and will coordinate
+for these to be addressed. This doesn't mean that said member would fix the bug,
+but their responsibility is to make sure it is handled properly according to
+this guide.
+
+## Life of security bug
+
+The Coordinator from VMT will make sure that the following steps are taken.
+
+1. Acknowledge the bug report.
+
+Our policy mandates a maximum of **3 business days** to respond to bug reports
+in the given email, but you should respond as soon as possible and keep a fluid
+communication with the reporter, who has spent some time looking at the issue.
+
+2. Determine if the bug is a security bug covered by our policy.
+
+Not all bugs are security bugs, and not all security bugs are covered by this
+vulnerability disclosure policy. See the [What's a Security bug] section below.
+
+3. Determine the affected versions.
+
+Often new bugs on stable projects are found on new features or because of those
+new features, so only the most recent versions are affected. It is important to
+determine both what older versions are affected, so users running those older
+versions can patch or update the software, and also what older versions are
+*not* affected. It is possible that stable distributions ship older versions
+that didn't contain the bug and therefore don't need to patch the code. Often
+maintainers of package distributions need to patch older versions instead of
+updating due to incompatibilities with newer ones and they need to understand
+what's the vulnerable code.
+
+Security bugs that have already been fixed in `main` or in already released code
+but not disclosed as a vulnerability, for example if fixed as a result of a
+refactor, should be treated like any other security bug in this policy and
+disclosed indicating the range of older affected versions (expect for versions
+before 0.5, see below). In such case a new release would likely not be needed if
+one already exists, but stable distributions may be still using those version
+and need to be aware of the issue and fix.
+
+If no released version is affected by the bug, for example because it was only
+introduced in the `main` branch but not yet released, then no vulnerability
+disclosure is needed.
+
+Note: Versions before 0.5 are not covered by the security policy. Those versions
+have multiple security issues and should not be used anyway.
+
+4. Communicate with the reporter
+
+Communicate the decision to the reporter.
+
+If the bug was not considered a security bug or not covered by this policy,
+explain why and direct the reporter to open a public [issue in
+GitHub](https://github.com/libjxl/libjxl/issues) or open one on their behalf.
+You don't need to follow the rest of the guide in this case.
+
+If the bug *is* a covered security bug then follow the rest of this guide.
+
+Ask the reporter how they want to be credited in the disclosure: name and
+company affiliation if any. Security researchers often value this recognition
+and helps them dedicate their time to finding security bugs in our project.
+
+There's no bug bounty (monetary compensation for security bugs) available for
+libjxl.
+
+5. Create a Security Advisory draft in GitHub
+
+At this point it was established that the bug is a security issue that requires
+a vulnerability disclosure. Start by creating a Security Advisory draft in the
+[Security Advisories](https://github.com/libjxl/libjxl/security/advisories) page
+in GitHub.
+
+Add a short description of the bug explaining what's the issue and what's the
+impact of the issue. Being 'hard' or 'complex' to exploit is not a reason to
+discard the potential impact. You can update this description later, save it as
+a draft in GitHub.
+
+Add the reporter to the security advisory draft if they have a GitHub account,
+and add the project members that will be working on a fix for the bug.
+
+Establish the severity of the issue according to the impact and tag the
+appropriate Common Weakness Enumeration (CWE) values. This helps classify the
+security issues according to their nature.
+
+6. Work on a fix in a private branch
+
+Coordinators can work on the fix themselves, use a proposed fix from the
+reporter if there is one, or work with other project members to create one.
+
+Work on a fix for the bug in *private*. Don't publish a Pull Request with the
+fix like you normally do, and don't upload the fix to your libjxl fork. If you
+ask another project member to work on it, explain them that they should follow
+this guide.
+
+7. Request a CVE number
+
+The Common Vulnerabilities and Exposures (CVE) is the system used to disclose
+vulnerabilities in software. A CVE number, like CVE-2021-NNNNNN, is a unique
+identifier for a given vulnerability. These numbers are assigned by a CVE
+Numbering Authority (CNA) with scope on the given project that has the
+vulnerability. For libjxl, we use Google's Generic CNA.
+
+For VMT coordinators at Google, file a bug at
+[go/cve-request](https://goto.google.com/cve-request) to request a CVE. See
+go/vcp-cna for context.
+
+When requesting the CVE include:
+
+ * A description of the problem (example: bug when parsing this field)
+ * A description of the impact of the bug (example: OOB read, remote code
+   execution, etc)
+ * The proposed CWE id(s) determined earlier.
+ * List of affected versions.
+ * Reporter of the bug and their preferred name/company to include in the
+   disclosure.
+ * Links to the issues/fixes (if already public), these can be added later, even
+   after the CVE is public.
+ * The CPE prefix of the affected project (`cpe:2.3:a:libjxl_project:libjxl`)
+
+When in doubt, you can discuss these with the security team while requesting it.
+
+8. File a Security bug in Chromium (if affected).
+
+libjxl project is in charge of updating and maintaining Chromium's libjxl
+integration code, this includes updating the libjxl library when needed. While
+the regular CVE disclosure process will eventually create a bug to update
+Chromium, filing one at this stage speeds up the process.
+
+[go/crbug](https://goto.google.com/crbug), select the "Security Bug" template
+and complete the details. This bug will be used to keep track of what versions
+of Chromium need backporting. The new bug in Chromium will not be public
+initially, but will be made public some time after the issue is fixed.
+
+9. Test the fixes on the intended releases
+
+When disclosing a vulnerability normally two ways to fix it are offered:
+
+ * A patch or set of patches that fix the issue on `main` branch, and
+ * A new release that contains the security fix for the user to update to.
+
+New releases that fix the vulnerability should be PATCH releases, that is, a
+previous release (like 1.2.3) plus the patches that fix the vulnerability,
+becoming a new version (like 1.2.4). See the [release process](release.md) for
+details. At least the latest MINOR release branch should have a PATCH release
+with the fix, however it might make sense to also backport the fix to older
+minor branch releases, depending on long-term support schedule for certain
+releases. For example, if many users are still using a particular older version
+of the library and updating to a new version requires significant changes (due
+to a redesigned API or new unavailable dependencies) it is helpful to provide a
+PATCH release there too.
+
+In either case, make sure that you test the fix in all the branches that you
+intend to release it to.
+
+The Continuous Integration pipelines don't work on the private forks created by
+the Security Advisory, so manual testing of the fix is needed there before
+making it public. Don't upload it to your public fork for testing.
+
+10. Coordinate a date for release of the vulnerability disclosure.
+
+Agree with the reporter and security folks from the CNA on a release date. There
+is a maximum of 90 day disclosure timeline from the day the bug was reported.
+
+On the disclosure date publish the fixes and tag the new PATCH release with the
+fix. You can prepare private drafts of the release for review beforehand to
+reduce the workload.
+
+Update Chromium to the new release version (if affected) and work with Chrome
+engineers on the required backports.
+
+## What's a Security bug
+
+A security bug is a bug that can potentially be exploited to let an attacker
+gain unauthorized access or privileges. For example, gaining code execution in
+libjxl decoder by decoding a malicious .jxl file is a security but hitting a
+`JXL_ASSERT()` is not necessarily one.
+
+The supported use cases to consider in the context of security bugs that require
+a vulnerability disclosure are "release" builds. The disclosure is intended for
+users of the project, to let them know that there is a security issue and that
+they should update or patch it.
+
+Unreleased versions are not relevant in this context. A bug introduced in the
+`main` branch that is not yet in any release is not covered by this guide even
+if the bug allows a remote code execution. CVEs should have a non-empty list of
+affected released versions.
+
+"Developer only" code is also not covered by this policy. In particular, tools
+that are not installed by the build, or not installed when packaging `libjxl`
+are not covered. For example, a bug in `tone_map` would not affect users since
+is a developer-only tool. The rationale behind this is that users of the
+released software will not have the developer code. This developer code is in
+the same libjxl repository for convenience.
+
+When considering the impact of a bug, "release" mode should be assumed. In
+release mode `JXL_ASSERT()` and `JXL_CHECK()` are enabled, but `JXL_DASSERT()`
+are not. This means that if a `JXL_DASSERT()` protects an out-of-bounds (OOB)
+write, then the impact of a bug hitting the `JXL_DASSERT()` is at least an
+OOB write. On the other hand, if a bug ends up hitting a `JXL_CHECK()` instead
+of continuing, the only impact is the process abort instead of whatever else is
+possible after the `JXL_CHECK()`.
+
+Asserts in `libjxl` *tools* cause the tool process to abort, but don't affect
+the caller. Either crashing or returning an error (non-zero exit code) would
+have the same effect, so `JXL_ASSERT()` failures in the tools have no security
+or functional impact.
+
+Asserts in `libjxl` libraries, meant to be linked into other processes, cause
+the caller process to abort, potentially causing a Denial of Service, however,
+Denial of Service issues are *not* considered security bugs by this policy.
+These are still issues and should be fixed, but they are not security issues.
+
+Out-of-bounds (OOB) reads in process memory are considered security
+vulnerabilities. OOB reads may allow an attacker to read other buffers from the
+same process that it shouldn't have access to, even a small OOB read can
+allow the attacker to read an address in the stack or in the heap, defeating
+address space randomization techniques. In combination with other bugs these
+can enable or simplify attacks to the process using libjxl. OOB reads don't need
+to require a segmentation fault to be a problem, leaking process information in
+decoded RGB pixels could be used as part of an exploit in some scenarios.
+
+OOB writes and remote code execution (RCE) are security bugs of at least high
+security impact.
diff --git a/third-party/libjxl/libjxl/doc/xl_overview.md b/third-party/libjxl/libjxl/doc/xl_overview.md
new file mode 100644
index 0000000000..b1c0f913b4
--- /dev/null
+++ b/third-party/libjxl/libjxl/doc/xl_overview.md
@@ -0,0 +1,181 @@
+# XL Overview
+
+## Requirements
+
+JPEG XL was designed for two main requirements:
+
+*   high quality: visually lossless at reasonable bitrates;
+*   decoding speed: multithreaded decoding should be able to reach around
+    400 Megapixel/s on large images.
+
+These goals apply to various types of images, including HDR content, whose
+support is made possible by full-precision (float32) computations and extensive
+support of color spaces and transfer functions.
+
+High performance is achieved by designing the format with careful consideration
+of memory bandwidth usage and ease of SIMD/GPU implementation.
+
+The full requirements for JPEG XL are listed in document wg1m82079.
+
+## General architecture
+
+The architecture follows the traditional block transform model with improvements
+in the individual components. For a quick overview, we sketch a "block diagram"
+of the lossy format decoder in the form of module names in **bold** followed by
+a brief description. Note that post-processing modules in [brackets] are
+optional - they are unnecessary or even counterproductive at very high quality
+settings.
+
+**Header**: decode metadata (e.g. image dimensions) from compressed fields
+(smaller than Exp-Golomb thanks to per-field encodings). The compression and
+small number of required fields enables very compact headers - much smaller than
+JFIF and HEVC. The container supports multiple images (e.g. animations/bursts)
+and passes (progressive).
+
+**Bitstream**: decode transform coefficient residuals using rANS-encoded
+<#bits,bits> symbols
+
+**Dequantize**: from adaptive quant map side information, plus chroma from luma
+
+**DC prediction**: expand DC residuals using adaptive (history-based) predictors
+
+**Chroma from luma**: restore predicted X from B and Y from B
+
+**IDCT:** 2x2..32x32, floating-point
+
+**[Gaborish]**: additional deblocking convolution with 3x3 kernel
+
+**[Edge preserving filter]**: nonlinear adaptive smoothing controlled by side
+information
+
+**[Noise injection]**: add perceptually pleasing noise according to a per-image
+noise model
+
+**Color space conversion**: from perceptual opsin XYB to linear RGB
+
+**[Converting to other color spaces via ICC]**
+
+The encoder is basically the reverse:
+
+**Color space conversion**: from linear RGB to perceptual opsin XYB
+
+**[Noise estimation]**: compute a noise model for the image
+
+**[Gaborish]**: sharpening to counteract the blurring on the decoder side
+
+**DCT**: transform sizes communicated via per-block side information
+
+**Chroma from luma**: find the best multipliers of Y for X and B channels of
+entire image
+
+**Adaptive quantization**: iterative search for quant map that yields the best
+perceived restoration
+
+**Quantize**: store 16-bit prediction residuals
+
+**DC prediction**: store residuals (prediction happens in quantized space)
+
+**Entropy coding**: rANS and context modeling with clustering
+
+
+# File Structure
+
+A codestream begins with a `FileHeader` followed by one or more "passes"
+(= scans: e.g. DC or AC_LF) which are then added together (summing the
+respective color components in Opsin space) to form the final image. There is no
+limit to the number of passes, so an encoder could choose to send salient parts
+first, followed by arbitrary decompositions of the final image (in terms of
+resolution, bit depth, quality or spatial location).
+
+Each pass contains groups of AC and DC data. A group is a subset of pixels that
+can be decoded in parallel. DC groups contain 256x256 DCs (from 2048x2048 input
+pixels), AC groups cover 256x256 input pixels.
+
+Each pass starts with a table of contents (sizes of each of their DC+AC
+groups), which enables parallel decoding and/or the decoding of a subset.
+However, there is no higher-level TOC of passes, as that would prevent
+appending additional images and could be too constraining for the encoder.
+
+
+## Lossless
+
+JPEG XL supports tools for lossless coding designed by Alexander Rhatushnyak and
+Jon Sneyers. They are about 60-75% of size of PNG, and smaller than WebP
+lossless for photos.
+
+An adaptive predictor computes 4 from the NW, N, NE and W pixels and combines
+them with weights based on previous errors. The error value is encoded in a
+bucket chosen based on a heuristic max error. The result is entropy-coded using
+the ANS encoder.
+
+## Current Reference Implementation
+
+### Conventions
+
+The software is written in C++ and built using CMake 3.6 or later.
+
+Error handling is done by having functions return values of type `jxl::Status`
+(a thin wrapper around bool which checks that it is not ignored). A convenience
+macro named `JXL_RETURN_IF_ERROR` makes this more convenient by automatically
+forwarding errors, and another macro named `JXL_FAILURE` exits with an error
+message if reached, with no effect in optimized builds.
+
+To diagnose the cause of encoder/decoder failures (which often only result in a
+generic "decode failed" message), build using the following command:
+
+```bash
+CMAKE_FLAGS="-DJXL_CRASH_ON_ERROR" ./ci.sh opt
+```
+
+In such builds, the first JXL_FAILURE will print a message identifying where the
+problem is and the program will exit immediately afterwards.
+
+### Architecture
+
+Getting back to the earlier block diagram:
+
+**Header** handling is implemented in `headers.h` and `field*`.
+
+**Bitstream**: `entropy_coder.h`, `dec_ans_*`.
+
+**(De)quantize**: `quantizer.h`.
+
+**DC prediction**: `predictor.h`.
+
+**Chroma from luma**: `chroma_from_luma.h`
+
+**(I)DCT**: `dct*.h`. Instead of operating directly on blocks of memory, the
+functions operate on thin wrappers which can handle blocks spread across
+multiple image lines.
+
+**DCT size selection**: `ac_strategy.cc`
+
+**[Gaborish]**: `enc_gaborish.h`.
+
+**[Edge preserving filter]**: `epf.h`
+
+**[Noise injection]**: `noise*` (currently disabled)
+
+**Color space conversion**: `color_*`, `dec_xyb.h`.
+
+## Decoder overview
+
+After decoding headers, the decoder begins processing frames (`dec_frame.cc`).
+
+For each pass, it will read the DC group table of contents (TOC) and start
+decoding, dequantizing and restoring color correlation of each DC group
+(covering 2048x2048 pixels in the input image) in parallel
+(`compressed_dc.cc`). The DC is split into parts corresponding to each AC group
+(with 1px of extra border); the AC group TOC is read and each AC group (256x256
+pixels) is processed in parallel (`dec_group.cc`).
+
+In each AC group, the decoder reads per-block side information indicating the
+kind of DCT transform; this is followed by the quantization field. Then, AC
+coefficients are read, dequantized and have color correlation restored on a
+tile per tile basis for better locality.
+
+After all the groups are read, postprocessing is applied: Gaborish smoothing
+and edge preserving filter, to reduce blocking and other artifacts.
+
+Finally, the image is converted back from the XYB color space
+(`dec_xyb.cc`) and saved to the output image (`codec_*.cc`).
diff --git a/third-party/libjxl/libjxl/examples/CMakeLists.txt b/third-party/libjxl/libjxl/examples/CMakeLists.txt
new file mode 100644
index 0000000000..88dc27c49f
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/CMakeLists.txt
@@ -0,0 +1,56 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Example project using libjxl.
+
+cmake_minimum_required(VERSION 3.10)
+
+project(SAMPLE_LIBJXL LANGUAGES C CXX)
+
+# Use pkg-config to find libjxl.
+find_package(PkgConfig)
+pkg_check_modules(Jxl REQUIRED IMPORTED_TARGET libjxl)
+pkg_check_modules(JxlThreads REQUIRED IMPORTED_TARGET libjxl_threads)
+
+# Build the example encoder/decoder binaries using the default shared libraries
+# installed.
+add_executable(decode_oneshot decode_oneshot.cc)
+target_link_libraries(decode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads)
+
+add_executable(decode_progressive decode_progressive.cc)
+target_link_libraries(decode_progressive PkgConfig::Jxl PkgConfig::JxlThreads)
+
+add_executable(encode_oneshot encode_oneshot.cc)
+target_link_libraries(encode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads)
+
+
+# Building a static binary with the static libjxl dependencies. How to load
+# static library configs from pkg-config and how to build static binaries
+# depends on the platform, and building static binaries in general has problems.
+# If you don't need static binaries you can remove this section.
+add_library(StaticJxl INTERFACE IMPORTED GLOBAL)
+set_target_properties(StaticJxl PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${Jxl_STATIC_INCLUDE_DIR}"
+    INTERFACE_COMPILE_OPTIONS "${Jxl_STATIC_CFLAGS_OTHER}"
+    INTERFACE_LINK_LIBRARIES "${Jxl_STATIC_LDFLAGS}"
+)
+add_library(StaticJxlThreads INTERFACE IMPORTED GLOBAL)
+set_target_properties(StaticJxlThreads PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${JxlThreads_STATIC_INCLUDE_DIR}"
+    INTERFACE_COMPILE_OPTIONS "${JxlThreads_STATIC_CFLAGS_OTHER}"
+    # libgcc uses weak symbols for pthread which means that -lpthread is not
+    # linked when compiling a static binary. This is a platform-specific fix for
+    # that.
+    INTERFACE_LINK_LIBRARIES
+      "${JxlThreads_STATIC_LDFLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive"
+)
+
+add_executable(decode_oneshot_static decode_oneshot.cc)
+target_link_libraries(decode_oneshot_static
+  -static StaticJxl StaticJxlThreads)
+
+add_executable(encode_oneshot_static encode_oneshot.cc)
+target_link_libraries(encode_oneshot_static
+  -static StaticJxl StaticJxlThreads)
diff --git a/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc b/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc
new file mode 100644
index 0000000000..97b0e52703
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image in one shot (all input bytes
+// available at once). The example outputs the pixels and color information to a
+// floating point image and an ICC profile on disk.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+bool DecodeJpegXlExif(const uint8_t* jxl, size_t size,
+                      std::vector<uint8_t>* exif) {
+  auto dec = JxlDecoderMake(nullptr);
+
+  // We're only interested in the Exif boxes in this example, so don't
+  // subscribe to events related to pixel data.
+  if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BOX)) {
+    fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+    return false;
+  }
+  bool support_decompression = true;
+  if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE)) {
+    fprintf(stderr,
+            "NOTE: decompressing brob boxes not supported with the currently "
+            "used jxl library.\n");
+    support_decompression = false;
+  }
+
+  JxlDecoderSetInput(dec.get(), jxl, size);
+  JxlDecoderCloseInput(dec.get());
+
+  const constexpr size_t kChunkSize = 65536;
+  size_t output_pos = 0;
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+    if (status == JXL_DEC_ERROR) {
+      fprintf(stderr, "Decoder error\n");
+      return false;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      fprintf(stderr, "Error, already provided all input\n");
+      return false;
+    } else if (status == JXL_DEC_BOX) {
+      if (!exif->empty()) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+        exif->resize(exif->size() - remaining);
+        // No need to wait for JXL_DEC_SUCCESS or decode other boxes.
+        return true;
+      }
+      JxlBoxType type;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetBoxType(dec.get(), type, support_decompression)) {
+        fprintf(stderr, "Error, failed to get box type\n");
+        return false;
+      }
+      if (!memcmp(type, "Exif", 4)) {
+        exif->resize(kChunkSize);
+        JxlDecoderSetBoxBuffer(dec.get(), exif->data(), exif->size());
+      }
+    } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+      size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+      output_pos += kChunkSize - remaining;
+      exif->resize(exif->size() + kChunkSize);
+      JxlDecoderSetBoxBuffer(dec.get(), exif->data() + output_pos,
+                             exif->size() - output_pos);
+    } else if (status == JXL_DEC_SUCCESS) {
+      if (!exif->empty()) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+        exif->resize(exif->size() - remaining);
+        return true;
+      }
+      return true;
+    } else {
+      fprintf(stderr, "Unknown decoder status\n");
+      return false;
+    }
+  }
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+  FILE* file = fopen(filename, "rb");
+  if (!file) {
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  long size = ftell(file);
+  // Avoid invalid file or directory.
+  if (size >= LONG_MAX || size < 0) {
+    fclose(file);
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_SET) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  out->resize(size);
+  size_t readsize = fread(out->data(), 1, size, file);
+  if (fclose(file) != 0) {
+    return false;
+  }
+
+  return readsize == static_cast<size_t>(size);
+}
+
+bool WriteFile(const char* filename, const uint8_t* data, size_t size) {
+  FILE* file = fopen(filename, "wb");
+  if (!file) {
+    fprintf(stderr, "Could not open %s for writing", filename);
+    return false;
+  }
+  fwrite(data, 1, size, file);
+  if (fclose(file) != 0) {
+    return false;
+  }
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 3) {
+    fprintf(stderr,
+            "Usage: %s <jxl> <exif>\n"
+            "Where:\n"
+            "  jxl = input JPEG XL image filename\n"
+            "  exif = output exif filename\n"
+            "Output files will be overwritten.\n",
+            argv[0]);
+    return 1;
+  }
+
+  const char* jxl_filename = argv[1];
+  const char* exif_filename = argv[2];
+
+  std::vector<uint8_t> jxl;
+  if (!LoadFile(jxl_filename, &jxl)) {
+    fprintf(stderr, "couldn't load %s\n", jxl_filename);
+    return 1;
+  }
+
+  std::vector<uint8_t> exif;
+  if (!DecodeJpegXlExif(jxl.data(), jxl.size(), &exif)) {
+    fprintf(stderr, "Error while decoding the jxl file\n");
+    return 1;
+  }
+  if (exif.empty()) {
+    printf("No exif data present in this image\n");
+  } else {
+    // TODO(lode): the exif box data contains the 4-byte TIFF header at the
+    // beginning, check whether this is desired to be part of the output, or
+    // should be removed.
+    if (!WriteFile(exif_filename, exif.data(), exif.size())) {
+      fprintf(stderr, "Error while writing the exif file\n");
+      return 1;
+    }
+    printf("Successfully wrote %s\n", exif_filename);
+  }
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/examples/decode_oneshot.cc b/third-party/libjxl/libjxl/examples/decode_oneshot.cc
new file mode 100644
index 0000000000..07720954f3
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/decode_oneshot.cc
@@ -0,0 +1,250 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image in one shot (all input bytes
+// available at once). The example outputs the pixels and color information to a
+// floating point image and an ICC profile on disk.
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+/** Decodes JPEG XL image to floating point pixels and ICC Profile. Pixel are
+ * stored as floating point, as interleaved RGBA (4 floating point values per
+ * pixel), line per line from top to bottom.  Pixel values have nominal range
+ * 0..1 but may go beyond this range for HDR or wide gamut. The ICC profile
+ * describes the color format of the pixel data.
+ */
+bool DecodeJpegXlOneShot(const uint8_t* jxl, size_t size,
+                         std::vector<float>* pixels, size_t* xsize,
+                         size_t* ysize, std::vector<uint8_t>* icc_profile) {
+  // Multi-threaded parallel runner.
+  auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+  auto dec = JxlDecoderMake(nullptr);
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE)) {
+    fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+    return false;
+  }
+
+  if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+                                                     JxlResizableParallelRunner,
+                                                     runner.get())) {
+    fprintf(stderr, "JxlDecoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  JxlBasicInfo info;
+  JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+
+  JxlDecoderSetInput(dec.get(), jxl, size);
+  JxlDecoderCloseInput(dec.get());
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+
+    if (status == JXL_DEC_ERROR) {
+      fprintf(stderr, "Decoder error\n");
+      return false;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      fprintf(stderr, "Error, already provided all input\n");
+      return false;
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+        fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+        return false;
+      }
+      *xsize = info.xsize;
+      *ysize = info.ysize;
+      JxlResizableParallelRunnerSetThreads(
+          runner.get(),
+          JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      // Get the ICC color profile of the pixel data
+      size_t icc_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetICCProfileSize(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA,
+                                      &icc_size)) {
+        fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+        return false;
+      }
+      icc_profile->resize(icc_size);
+      if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                 dec.get(), JXL_COLOR_PROFILE_TARGET_DATA,
+                                 icc_profile->data(), icc_profile->size())) {
+        fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      size_t buffer_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+        fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+        return false;
+      }
+      if (buffer_size != *xsize * *ysize * 16) {
+        fprintf(stderr, "Invalid out buffer size %" PRIu64 " %" PRIu64 "\n",
+                static_cast<uint64_t>(buffer_size),
+                static_cast<uint64_t>(*xsize * *ysize * 16));
+        return false;
+      }
+      pixels->resize(*xsize * *ysize * 4);
+      void* pixels_buffer = (void*)pixels->data();
+      size_t pixels_buffer_size = pixels->size() * sizeof(float);
+      if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format,
+                                                         pixels_buffer,
+                                                         pixels_buffer_size)) {
+        fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      // Nothing to do. Do not yet return. If the image is an animation, more
+      // full frames may be decoded. This example only keeps the last one.
+    } else if (status == JXL_DEC_SUCCESS) {
+      // All decoding successfully finished.
+      // It's not required to call JxlDecoderReleaseInput(dec.get()) here since
+      // the decoder will be destroyed.
+      return true;
+    } else {
+      fprintf(stderr, "Unknown decoder status\n");
+      return false;
+    }
+  }
+}
+
+/** Writes to .pfm file (Portable FloatMap). Gimp, tev viewer and ImageMagick
+ * support viewing this format.
+ * The input pixels are given as 32-bit floating point with 4-channel RGBA.
+ * The alpha channel will not be written since .pfm does not support it.
+ */
+bool WritePFM(const char* filename, const float* pixels, size_t xsize,
+              size_t ysize) {
+  FILE* file = fopen(filename, "wb");
+  if (!file) {
+    fprintf(stderr, "Could not open %s for writing", filename);
+    return false;
+  }
+  uint32_t endian_test = 1;
+  uint8_t little_endian[4];
+  memcpy(little_endian, &endian_test, 4);
+
+  fprintf(file, "PF\n%d %d\n%s\n", (int)xsize, (int)ysize,
+          little_endian[0] ? "-1.0" : "1.0");
+  for (int y = ysize - 1; y >= 0; y--) {
+    for (size_t x = 0; x < xsize; x++) {
+      for (size_t c = 0; c < 3; c++) {
+        const float* f = &pixels[(y * xsize + x) * 4 + c];
+        fwrite(f, 4, 1, file);
+      }
+    }
+  }
+  if (fclose(file) != 0) {
+    return false;
+  }
+  return true;
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+  FILE* file = fopen(filename, "rb");
+  if (!file) {
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  long size = ftell(file);
+  // Avoid invalid file or directory.
+  if (size >= LONG_MAX || size < 0) {
+    fclose(file);
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_SET) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  out->resize(size);
+  size_t readsize = fread(out->data(), 1, size, file);
+  if (fclose(file) != 0) {
+    return false;
+  }
+
+  return readsize == static_cast<size_t>(size);
+}
+
+bool WriteFile(const char* filename, const uint8_t* data, size_t size) {
+  FILE* file = fopen(filename, "wb");
+  if (!file) {
+    fprintf(stderr, "Could not open %s for writing", filename);
+    return false;
+  }
+  fwrite(data, 1, size, file);
+  if (fclose(file) != 0) {
+    return false;
+  }
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 4) {
+    fprintf(stderr,
+            "Usage: %s <jxl> <pfm> <icc>\n"
+            "Where:\n"
+            "  jxl = input JPEG XL image filename\n"
+            "  pfm = output Portable FloatMap image filename\n"
+            "  icc = output ICC color profile filename\n"
+            "Output files will be overwritten.\n",
+            argv[0]);
+    return 1;
+  }
+
+  const char* jxl_filename = argv[1];
+  const char* pfm_filename = argv[2];
+  const char* icc_filename = argv[3];
+
+  std::vector<uint8_t> jxl;
+  if (!LoadFile(jxl_filename, &jxl)) {
+    fprintf(stderr, "couldn't load %s\n", jxl_filename);
+    return 1;
+  }
+
+  std::vector<float> pixels;
+  std::vector<uint8_t> icc_profile;
+  size_t xsize = 0, ysize = 0;
+  if (!DecodeJpegXlOneShot(jxl.data(), jxl.size(), &pixels, &xsize, &ysize,
+                           &icc_profile)) {
+    fprintf(stderr, "Error while decoding the jxl file\n");
+    return 1;
+  }
+  if (!WritePFM(pfm_filename, pixels.data(), xsize, ysize)) {
+    fprintf(stderr, "Error while writing the PFM image file\n");
+    return 1;
+  }
+  if (!WriteFile(icc_filename, icc_profile.data(), icc_profile.size())) {
+    fprintf(stderr, "Error while writing the ICC profile file\n");
+    return 1;
+  }
+  printf("Successfully wrote %s and %s\n", pfm_filename, icc_filename);
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/examples/decode_progressive.cc b/third-party/libjxl/libjxl/examples/decode_progressive.cc
new file mode 100644
index 0000000000..a094cbeb4f
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/decode_progressive.cc
@@ -0,0 +1,241 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image progressively (input bytes are
+// passed in chunks). The example outputs the intermediate steps to PAM files.
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+bool WritePAM(const char* filename, const uint8_t* buffer, size_t w, size_t h) {
+  FILE* fp = fopen(filename, "wb");
+  if (!fp) {
+    fprintf(stderr, "Could not open %s for writing", filename);
+    return false;
+  }
+  fprintf(fp,
+          "P7\nWIDTH %" PRIu64 "\nHEIGHT %" PRIu64
+          "\nDEPTH 4\nMAXVAL 255\nTUPLTYPE "
+          "RGB_ALPHA\nENDHDR\n",
+          static_cast<uint64_t>(w), static_cast<uint64_t>(h));
+  size_t num_bytes = w * h * 4;
+  if (fwrite(buffer, 1, num_bytes, fp) != num_bytes) {
+    fclose(fp);
+    return false;
+  };
+  if (fclose(fp) != 0) {
+    return false;
+  }
+  return true;
+}
+
+/** Decodes JPEG XL image to 8-bit integer RGBA pixels and an ICC Profile, in a
+ * progressive way, saving the intermediate steps.
+ */
+bool DecodeJpegXlProgressive(const uint8_t* jxl, size_t size,
+                             const char* filename, size_t chunksize) {
+  std::vector<uint8_t> pixels;
+  std::vector<uint8_t> icc_profile;
+  size_t xsize = 0, ysize = 0;
+
+  // Multi-threaded parallel runner.
+  auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+  auto dec = JxlDecoderMake(nullptr);
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE)) {
+    fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+    return false;
+  }
+
+  if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+                                                     JxlResizableParallelRunner,
+                                                     runner.get())) {
+    fprintf(stderr, "JxlDecoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  JxlBasicInfo info;
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+
+  size_t seen = 0;
+  JxlDecoderSetInput(dec.get(), jxl, chunksize);
+  size_t remaining = chunksize;
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+
+    if (status == JXL_DEC_ERROR) {
+      fprintf(stderr, "Decoder error\n");
+      return false;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT || status == JXL_DEC_SUCCESS ||
+               status == JXL_DEC_FULL_IMAGE) {
+      seen += remaining - JxlDecoderReleaseInput(dec.get());
+      printf("Flushing after %" PRIu64 " bytes\n", static_cast<uint64_t>(seen));
+      if (status == JXL_DEC_NEED_MORE_INPUT &&
+          JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec.get())) {
+        printf("flush error (no preview yet)\n");
+      } else {
+        char fname[1024];
+        if (snprintf(fname, 1024, "%s-%" PRIu64 ".pam", filename,
+                     static_cast<uint64_t>(seen)) >= 1024) {
+          fprintf(stderr, "Filename too long\n");
+          return false;
+        };
+        if (!WritePAM(fname, pixels.data(), xsize, ysize)) {
+          fprintf(stderr, "Error writing progressive output\n");
+        }
+      }
+      remaining = size - seen;
+      if (remaining > chunksize) remaining = chunksize;
+      if (remaining == 0) {
+        if (status == JXL_DEC_NEED_MORE_INPUT) {
+          fprintf(stderr, "Error, already provided all input\n");
+          return false;
+        } else {
+          return true;
+        }
+      }
+      JxlDecoderSetInput(dec.get(), jxl + seen, remaining);
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+        fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+        return false;
+      }
+      xsize = info.xsize;
+      ysize = info.ysize;
+      JxlResizableParallelRunnerSetThreads(
+          runner.get(),
+          JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      // Get the ICC color profile of the pixel data
+      size_t icc_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetICCProfileSize(
+              dec.get(), JXL_COLOR_PROFILE_TARGET_ORIGINAL, &icc_size)) {
+        fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+        return false;
+      }
+      icc_profile.resize(icc_size);
+      if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                 dec.get(), JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                 icc_profile.data(), icc_profile.size())) {
+        fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      size_t buffer_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+        fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+        return false;
+      }
+      if (buffer_size != xsize * ysize * 4) {
+        fprintf(stderr, "Invalid out buffer size %" PRIu64 " != %" PRIu64 "\n",
+                static_cast<uint64_t>(buffer_size),
+                static_cast<uint64_t>(xsize * ysize * 4));
+        return false;
+      }
+      pixels.resize(xsize * ysize * 4);
+      if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format,
+                                                         pixels.data(),
+                                                         pixels.size())) {
+        fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+        return false;
+      }
+    } else {
+      fprintf(stderr, "Unknown decoder status\n");
+      return false;
+    }
+  }
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+  FILE* file = fopen(filename, "rb");
+  if (!file) {
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  long size = ftell(file);
+  // Avoid invalid file or directory.
+  if (size >= LONG_MAX || size < 0) {
+    fclose(file);
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_SET) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  out->resize(size);
+  size_t readsize = fread(out->data(), 1, size, file);
+  if (fclose(file) != 0) {
+    return false;
+  }
+
+  return readsize == static_cast<size_t>(size);
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 3) {
+    fprintf(
+        stderr,
+        "Usage: %s <jxl> <basename> [chunksize]\n"
+        "Where:\n"
+        "  jxl = input JPEG XL image filename\n"
+        "  basename = prefix of output filenames\n"
+        "  chunksize = loads chunksize bytes at a time and writes\n"
+        "              intermediate results to basename-[bytes loaded].pam\n"
+        "Output files will be overwritten.\n",
+        argv[0]);
+    return 1;
+  }
+
+  const char* jxl_filename = argv[1];
+  const char* png_filename = argv[2];
+
+  std::vector<uint8_t> jxl;
+  if (!LoadFile(jxl_filename, &jxl)) {
+    fprintf(stderr, "couldn't load %s\n", jxl_filename);
+    return 1;
+  }
+  size_t chunksize = jxl.size();
+  if (argc > 3) {
+    long cs = atol(argv[3]);
+    if (cs < 100) {
+      fprintf(stderr, "Chunk size is too low, try at least 100 bytes\n");
+      return 1;
+    }
+    chunksize = cs;
+  }
+
+  if (!DecodeJpegXlProgressive(jxl.data(), jxl.size(), png_filename,
+                               chunksize)) {
+    fprintf(stderr, "Error while decoding the jxl file\n");
+    return 1;
+  }
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/examples/encode_oneshot.cc b/third-party/libjxl/libjxl/examples/encode_oneshot.cc
new file mode 100644
index 0000000000..49b360ce3b
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/encode_oneshot.cc
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This example encodes a file containing a floating point image to another
+// file containing JPEG XL image with a single frame.
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <limits.h>
+#include <string.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+/**
+ * Reads from .pfm file (Portable FloatMap)
+ *
+ * @param filename name of the file to read
+ * @param pixels vector to fill with loaded pixels as 32-bit floating point with
+ * 3-channel RGB
+ * @param xsize set to width of loaded image
+ * @param ysize set to height of loaded image
+ */
+bool ReadPFM(const char* filename, std::vector<float>* pixels, uint32_t* xsize,
+             uint32_t* ysize) {
+  FILE* file = fopen(filename, "rb");
+  if (!file) {
+    fprintf(stderr, "Could not open %s for reading.\n", filename);
+    return false;
+  }
+  uint32_t endian_test = 1;
+  uint8_t little_endian[4];
+  memcpy(little_endian, &endian_test, 4);
+
+  if (fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  long size = ftell(file);
+  // Avoid invalid file or directory.
+  if (size >= LONG_MAX || size < 0) {
+    fclose(file);
+    return false;
+  }
+
+  if (fseek(file, 0, SEEK_SET) != 0) {
+    fclose(file);
+    return false;
+  }
+
+  std::vector<char> data;
+  data.resize(size);
+
+  size_t readsize = fread(data.data(), 1, size, file);
+  if ((long)readsize != size) {
+    return false;
+  }
+  if (fclose(file) != 0) {
+    return false;
+  }
+
+  std::stringstream datastream;
+  std::string datastream_content(data.data(), data.size());
+  datastream.str(datastream_content);
+
+  std::string pf_token;
+  getline(datastream, pf_token, '\n');
+  if (pf_token != "PF") {
+    fprintf(stderr,
+            "%s doesn't seem to be a 3 channel Portable FloatMap file (missing "
+            "'PF\\n' "
+            "bytes).\n",
+            filename);
+    return false;
+  }
+
+  std::string xsize_token;
+  getline(datastream, xsize_token, ' ');
+  *xsize = std::stoi(xsize_token);
+
+  std::string ysize_token;
+  getline(datastream, ysize_token, '\n');
+  *ysize = std::stoi(ysize_token);
+
+  std::string endianness_token;
+  getline(datastream, endianness_token, '\n');
+  bool input_little_endian;
+  if (endianness_token == "1.0") {
+    input_little_endian = false;
+  } else if (endianness_token == "-1.0") {
+    input_little_endian = true;
+  } else {
+    fprintf(stderr,
+            "%s doesn't seem to be a Portable FloatMap file (endianness token "
+            "isn't '1.0' or '-1.0').\n",
+            filename);
+    return false;
+  }
+
+  size_t offset = pf_token.size() + 1 + xsize_token.size() + 1 +
+                  ysize_token.size() + 1 + endianness_token.size() + 1;
+
+  if (data.size() != *ysize * *xsize * 3 * 4 + offset) {
+    fprintf(stderr,
+            "%s doesn't seem to be a Portable FloatMap file (pixel data bytes "
+            "are %d, but expected %d * %d * 3 * 4 + %d (%d).\n",
+            filename, (int)data.size(), (int)*ysize, (int)*xsize, (int)offset,
+            (int)(*ysize * *xsize * 3 * 4 + offset));
+    return false;
+  }
+
+  if (!!little_endian[0] != input_little_endian) {
+    fprintf(stderr,
+            "%s has a different endianness than we do, conversion is not "
+            "supported.\n",
+            filename);
+    return false;
+  }
+
+  pixels->resize(*ysize * *xsize * 3);
+
+  for (int y = *ysize - 1; y >= 0; y--) {
+    for (int x = 0; x < (int)*xsize; x++) {
+      for (int c = 0; c < 3; c++) {
+        memcpy(pixels->data() + (y * *xsize + x) * 3 + c, data.data() + offset,
+               sizeof(float));
+        offset += sizeof(float);
+      }
+    }
+  }
+
+  return true;
+}
+
+/**
+ * Compresses the provided pixels.
+ *
+ * @param pixels input pixels
+ * @param xsize width of the input image
+ * @param ysize height of the input image
+ * @param compressed will be populated with the compressed bytes
+ */
+bool EncodeJxlOneshot(const std::vector<float>& pixels, const uint32_t xsize,
+                      const uint32_t ysize, std::vector<uint8_t>* compressed) {
+  auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
+  auto runner = JxlThreadParallelRunnerMake(
+      /*memory_manager=*/nullptr,
+      JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(),
+                                                     JxlThreadParallelRunner,
+                                                     runner.get())) {
+    fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  JxlPixelFormat pixel_format = {3, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+
+  JxlBasicInfo basic_info;
+  JxlEncoderInitBasicInfo(&basic_info);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.bits_per_sample = 32;
+  basic_info.exponent_bits_per_sample = 8;
+  basic_info.uses_original_profile = JXL_FALSE;
+  if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc.get(), &basic_info)) {
+    fprintf(stderr, "JxlEncoderSetBasicInfo failed\n");
+    return false;
+  }
+
+  JxlColorEncoding color_encoding = {};
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) {
+    fprintf(stderr, "JxlEncoderSetColorEncoding failed\n");
+    return false;
+  }
+
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                              (void*)pixels.data(),
+                              sizeof(float) * pixels.size())) {
+    fprintf(stderr, "JxlEncoderAddImageFrame failed\n");
+    return false;
+  }
+  JxlEncoderCloseInput(enc.get());
+
+  compressed->resize(64);
+  uint8_t* next_out = compressed->data();
+  size_t avail_out = compressed->size() - (next_out - compressed->data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed->data();
+      compressed->resize(compressed->size() * 2);
+      next_out = compressed->data() + offset;
+      avail_out = compressed->size() - offset;
+    }
+  }
+  compressed->resize(next_out - compressed->data());
+  if (JXL_ENC_SUCCESS != process_result) {
+    fprintf(stderr, "JxlEncoderProcessOutput failed\n");
+    return false;
+  }
+
+  return true;
+}
+
+/**
+ * Writes bytes to file.
+ */
+bool WriteFile(const std::vector<uint8_t>& bytes, const char* filename) {
+  FILE* file = fopen(filename, "wb");
+  if (!file) {
+    fprintf(stderr, "Could not open %s for writing\n", filename);
+    return false;
+  }
+  if (fwrite(bytes.data(), sizeof(uint8_t), bytes.size(), file) !=
+      bytes.size()) {
+    fprintf(stderr, "Could not write bytes to %s\n", filename);
+    fclose(file);
+    return false;
+  }
+  if (fclose(file) != 0) {
+    fprintf(stderr, "Could not close %s\n", filename);
+    return false;
+  }
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 3) {
+    fprintf(stderr,
+            "Usage: %s <pfm> <jxl>\n"
+            "Where:\n"
+            "  pfm = input Portable FloatMap image filename\n"
+            "  jxl = output JPEG XL image filename\n"
+            "Output files will be overwritten.\n",
+            argv[0]);
+    return 1;
+  }
+
+  const char* pfm_filename = argv[1];
+  const char* jxl_filename = argv[2];
+
+  std::vector<float> pixels;
+  uint32_t xsize;
+  uint32_t ysize;
+  if (!ReadPFM(pfm_filename, &pixels, &xsize, &ysize)) {
+    fprintf(stderr, "Couldn't load %s\n", pfm_filename);
+    return 2;
+  }
+
+  std::vector<uint8_t> compressed;
+  if (!EncodeJxlOneshot(pixels, xsize, ysize, &compressed)) {
+    fprintf(stderr, "Couldn't encode jxl\n");
+    return 3;
+  }
+
+  if (!WriteFile(compressed, jxl_filename)) {
+    fprintf(stderr, "Couldn't write jxl file\n");
+    return 4;
+  }
+
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/examples/examples.cmake b/third-party/libjxl/libjxl/examples/examples.cmake
new file mode 100644
index 0000000000..fd159578bc
--- /dev/null
+++ b/third-party/libjxl/libjxl/examples/examples.cmake
@@ -0,0 +1,11 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+add_executable(decode_oneshot ${CMAKE_CURRENT_LIST_DIR}/decode_oneshot.cc)
+target_link_libraries(decode_oneshot jxl_dec jxl_threads)
+add_executable(decode_progressive ${CMAKE_CURRENT_LIST_DIR}/decode_progressive.cc)
+target_link_libraries(decode_progressive jxl_dec jxl_threads)
+add_executable(encode_oneshot ${CMAKE_CURRENT_LIST_DIR}/encode_oneshot.cc)
+target_link_libraries(encode_oneshot jxl jxl_threads)
diff --git a/third-party/libjxl/libjxl/lib/BUILD b/third-party/libjxl/libjxl/lib/BUILD
new file mode 100644
index 0000000000..8aa803dc7f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/BUILD
@@ -0,0 +1,298 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Load sources/headers/tests lists.
+load(
+    "jxl_lists.bzl",
+    "libjxl_base_sources",
+    "libjxl_codec_apng_sources",
+    "libjxl_codec_exr_sources",
+    "libjxl_codec_gif_sources",
+    "libjxl_codec_jpegli_sources",
+    "libjxl_codec_jpg_sources",
+    "libjxl_codec_jxl_sources",
+    "libjxl_codec_npy_sources",
+    "libjxl_codec_pgx_sources",
+    "libjxl_codec_pnm_sources",
+    "libjxl_dec_box_sources",
+    "libjxl_dec_jpeg_sources",
+    "libjxl_dec_sources",
+    "libjxl_enc_sources",
+    "libjxl_extras_for_tools_sources",
+    "libjxl_extras_sources",
+    #'libjxl_gbench_sources',
+    "libjxl_jpegli_lib_version",
+    "libjxl_jpegli_libjpeg_helper_files",
+    "libjxl_jpegli_sources",
+    "libjxl_jpegli_testlib_files",
+    "libjxl_jpegli_tests",
+    "libjxl_major_version",
+    "libjxl_minor_version",
+    "libjxl_patch_version",
+    "libjxl_public_headers",
+    "libjxl_testlib_files",
+    "libjxl_tests",
+    "libjxl_threads_public_headers",
+    "libjxl_threads_sources",
+)
+load(
+    "jxl_vars.bzl",
+    "libjxl_deps_brotli",
+    "libjxl_deps_exr",
+    "libjxl_deps_gif",
+    "libjxl_deps_gtest",
+    "libjxl_deps_hwy",
+    "libjxl_deps_hwy_nanobenchmark",
+    "libjxl_deps_hwy_test_util",
+    "libjxl_deps_jpeg",
+    "libjxl_deps_jxl_box",
+    "libjxl_deps_png",
+    "libjxl_deps_runfiles",
+    "libjxl_deps_skcms",
+    "libjxl_deps_testdata",
+    "libjxl_root_package",
+    "libjxl_test_shards",
+    "libjxl_test_timeouts",
+)
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+
+DEFAULT_VISIBILITY = ["//:__subpackages__"]
+
+DEFAULT_COMPATIBILITY = []
+
+INCLUDES_DIR = "include"
+
+package(
+    default_visibility = ["//:__subpackages__"],
+)
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+EXPORT_TEMPLATE = """
+#ifndef @_EXPORT_H
+#define @_EXPORT_H
+
+#define @_EXPORT
+#define @_NO_EXPORT
+
+#ifndef @_DEPRECATED
+#  define @_DEPRECATED __attribute__ ((__deprecated__))
+#endif
+
+#endif
+"""
+
+JXL_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_export.h"
+
+genrule(
+    name = "create_jxl_export",
+    outs = [JXL_EXPORT_H],
+    cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL") + "' > $@",
+    compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+JXL_THREADS_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_threads_export.h"
+
+genrule(
+    name = "create_jxl_threads_export",
+    outs = [JXL_THREADS_EXPORT_H],
+    cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL_THREADS") + "' > $@",
+    compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+JXL_VERSION_H = INCLUDES_DIR + "/jxl/version.h"
+
+expand_template(
+    name = "expand_jxl_version",
+    out = JXL_VERSION_H,
+    compatible_with = DEFAULT_COMPATIBILITY,
+    substitutions = {
+        "@JPEGXL_MAJOR_VERSION@": str(libjxl_major_version),
+        "@JPEGXL_MINOR_VERSION@": str(libjxl_minor_version),
+        "@JPEGXL_PATCH_VERSION@": str(libjxl_patch_version),
+    },
+    template = "jxl/version.h.in",
+)
+
+cc_library(
+    name = "jxl_version",
+    hdrs = [JXL_VERSION_H],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    strip_include_prefix = INCLUDES_DIR,
+)
+
+JPEGLI_JCONFIG_H = INCLUDES_DIR + "/jpegli/jconfig.h"
+
+JPEGLI_JMORECFG_H = INCLUDES_DIR + "/jpegli/jmorecfg.h"
+
+JPEGLI_JPEGLIB_H = INCLUDES_DIR + "/jpegli/jpeglib.h"
+
+copy_file(
+    name = "expand_jconfig",
+    src = "@libjpeg_turbo//:jconfig.h",
+    out = JPEGLI_JCONFIG_H,
+    compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+copy_file(
+    name = "copy_jmorecfg",
+    src = "@libjpeg_turbo//:jmorecfg.h",
+    out = JPEGLI_JMORECFG_H,
+    compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+copy_file(
+    name = "copy_jpeglib",
+    src = "@libjpeg_turbo//:jpeglib.h",
+    out = JPEGLI_JPEGLIB_H,
+    compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+cc_library(
+    name = "includes",
+    hdrs = libjxl_public_headers + [JXL_EXPORT_H],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    strip_include_prefix = INCLUDES_DIR,
+    deps = [":jxl_version"],
+)
+
+cc_library(
+    name = "libjpeg_includes",
+    hdrs = [
+        JPEGLI_JCONFIG_H,
+        JPEGLI_JMORECFG_H,
+        JPEGLI_JPEGLIB_H,
+    ],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    strip_include_prefix = INCLUDES_DIR + "/jpegli",
+)
+
+cc_library(
+    name = "base",
+    srcs = [path for path in libjxl_base_sources if path.endswith(".cc")],
+    hdrs = [path for path in libjxl_base_sources if path.endswith(".h")],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    deps = [
+        ":includes",
+    ] + libjxl_deps_hwy,
+)
+
+cc_library(
+    name = "jpegxl",
+    srcs = libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources,
+    compatible_with = DEFAULT_COMPATIBILITY,
+    defines = ["JPEGXL_ENABLE_SKCMS=1"],
+    deps = [
+        ":base",
+        ":includes",
+    ] + libjxl_deps_brotli + libjxl_deps_hwy + libjxl_deps_skcms,
+)
+
+cc_library(
+    name = "jpegxl_private",
+    hdrs = [
+        path
+        for path in libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources
+        if path.endswith(".h") and not path.endswith("-inl.h")
+    ],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    deps = [":jpegxl"],
+)
+
+cc_library(
+    name = "jpegxl_threads",
+    srcs = libjxl_threads_sources,
+    hdrs = libjxl_threads_public_headers + [JXL_THREADS_EXPORT_H],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    strip_include_prefix = INCLUDES_DIR,
+    deps = [
+        ":base",
+        ":includes",
+    ],
+)
+
+CODEC_FILES = libjxl_codec_apng_sources + libjxl_codec_exr_sources + libjxl_codec_gif_sources + libjxl_codec_jpegli_sources + libjxl_codec_jpg_sources + libjxl_codec_jxl_sources + libjxl_codec_npy_sources + libjxl_codec_pgx_sources + libjxl_codec_pnm_sources
+
+CODEC_SRCS = [path for path in CODEC_FILES if path.endswith(".cc")]
+
+CODEC_HDRS = [path for path in CODEC_FILES if path.endswith(".h")]
+
+cc_library(
+    name = "jpegli",
+    srcs = libjxl_jpegli_sources,
+    hdrs = [
+        "jpegli/common_internal.h",  # TODO(eustas): should not be here
+    ],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    deps = [
+        ":jpegxl_private",
+        ":libjpeg_includes",
+    ] + libjxl_deps_hwy,
+)
+
+# TODO(eustas): build codecs separately?
+cc_library(
+    name = "jpegxl_extras",
+    srcs = libjxl_extras_sources + libjxl_extras_for_tools_sources + CODEC_SRCS,
+    hdrs = CODEC_HDRS,
+    compatible_with = DEFAULT_COMPATIBILITY,
+    defines = [
+        "JPEGXL_ENABLE_APNG=1",
+        "JPEGXL_ENABLE_EXR=1",
+        "JPEGXL_ENABLE_GIF=1",
+        "JPEGXL_ENABLE_JPEG=1",
+        "JPEGXL_ENABLE_JPEGLI=1",
+    ],
+    deps = [
+        ":jpegli",
+        ":jpegxl_private",
+        ":jpegxl_threads",
+        ":jxl_version",
+    ] + libjxl_deps_exr + libjxl_deps_gif + libjxl_deps_jpeg + libjxl_deps_png,
+)
+
+TESTLIB_FILES = libjxl_testlib_files + libjxl_jpegli_testlib_files + libjxl_jpegli_libjpeg_helper_files
+
+cc_library(
+    name = "test_utils",
+    testonly = 1,
+    srcs = [path for path in TESTLIB_FILES if not path.endswith(".h")],
+    hdrs = [path for path in TESTLIB_FILES if path.endswith(".h")],
+    compatible_with = DEFAULT_COMPATIBILITY,
+    defines = [
+        'JPEGXL_ROOT_PACKAGE=\'"' + libjxl_root_package + '"\'',
+    ],
+    deps = [
+        ":jpegli",
+        ":jpegxl_extras",
+        ":jpegxl_private",
+    ] + libjxl_deps_runfiles,
+)
+
+TESTS = [path.partition(".")[0] for path in libjxl_tests + libjxl_jpegli_tests]
+
+[
+    cc_test(
+        name = test,
+        timeout = libjxl_test_timeouts.get(test, "moderate"),
+        srcs = [
+            test + ".cc",
+            "jpegli/testing.h",
+            "jxl/testing.h",
+        ],
+        data = ["//:testdata"],
+        shard_count = libjxl_test_shards.get(test, 1),
+        deps = [
+            ":jpegxl_extras",
+            ":jpegxl_private",
+            ":jpegxl_threads",
+            ":test_utils",
+        ] + libjxl_deps_gtest + libjxl_deps_hwy_test_util + libjxl_deps_hwy_nanobenchmark + libjxl_deps_jxl_box,
+    )
+    for test in TESTS
+]
diff --git a/third-party/libjxl/libjxl/lib/CMakeLists.txt b/third-party/libjxl/libjxl/lib/CMakeLists.txt
new file mode 100644
index 0000000000..24961db5e5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/CMakeLists.txt
@@ -0,0 +1,167 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set(JPEGXL_MAJOR_VERSION 0)
+set(JPEGXL_MINOR_VERSION 9)
+set(JPEGXL_PATCH_VERSION 0)
+set(JPEGXL_LIBRARY_VERSION
+    "${JPEGXL_MAJOR_VERSION}.${JPEGXL_MINOR_VERSION}.${JPEGXL_PATCH_VERSION}")
+
+# This is the library API/ABI compatibility version. Changing this value makes
+# the shared library incompatible with previous version. A program linked
+# against this shared library SOVERSION will not run with an older SOVERSION.
+# It is important to update this value when making incompatible API/ABI changes
+# so that programs that depend on libjxl can update their dependencies. Semantic
+# versioning allows 0.y.z to have incompatible changes in minor versions.
+set(JPEGXL_SO_MINOR_VERSION 9)
+if (JPEGXL_MAJOR_VERSION EQUAL 0)
+  set(JPEGXL_LIBRARY_SOVERSION
+      "${JPEGXL_MAJOR_VERSION}.${JPEGXL_SO_MINOR_VERSION}")
+else()
+  set(JPEGXL_LIBRARY_SOVERSION "${JPEGXL_MAJOR_VERSION}")
+endif()
+
+
+# List of warning and feature flags for our library and tests.
+if (MSVC)
+  set(JPEGXL_INTERNAL_FLAGS
+    # TODO(janwas): add flags
+  )
+else ()
+  set(JPEGXL_INTERNAL_FLAGS
+    # F_FLAGS
+    -fmerge-all-constants
+    -fno-builtin-fwrite
+    -fno-builtin-fread
+
+    # WARN_FLAGS
+    -Wall
+    -Wextra
+    -Wc++11-compat
+    -Warray-bounds
+    -Wformat-security
+    -Wimplicit-fallthrough
+    -Wno-register  # Needed by public headers in lcms
+    -Wno-unused-function
+    -Wno-unused-parameter
+    -Wnon-virtual-dtor
+    -Woverloaded-virtual
+    -Wvla
+  )
+
+  # Warning flags supported by clang.
+  if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    list(APPEND JPEGXL_INTERNAL_FLAGS
+      -Wdeprecated-increment-bool
+      # TODO(deymo): Add -Wextra-semi once we update third_party/highway.
+      # -Wextra-semi
+      -Wfloat-overflow-conversion
+      -Wfloat-zero-conversion
+      -Wfor-loop-analysis
+      -Wgnu-redeclared-enum
+      -Winfinite-recursion
+      -Wliteral-conversion
+      -Wno-c++98-compat
+      -Wno-unused-command-line-argument
+      -Wprivate-header
+      -Wself-assign
+      -Wstring-conversion
+      -Wtautological-overlap-compare
+      -Wthread-safety-analysis
+      -Wundefined-func-template
+      -Wunreachable-code
+      -Wunused-comparison
+    )
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
+      list(APPEND HWY_FLAGS -Wc++2a-extensions)
+    endif()
+  endif()  # Clang
+
+  if (WIN32)
+    list(APPEND JPEGXL_INTERNAL_FLAGS
+      -Wno-cast-align
+      -Wno-double-promotion
+      -Wno-float-equal
+      -Wno-format-nonliteral
+      -Wno-shadow
+      -Wno-sign-conversion
+      -Wno-zero-as-null-pointer-constant
+    )
+
+    if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+      list(APPEND JPEGXL_INTERNAL_FLAGS
+        -Wno-used-but-marked-unused
+        -Wno-unused-template
+        -Wno-unused-member-function
+        -Wno-shadow-field-in-constructor
+        -Wno-language-extension-token
+        -Wno-global-constructors
+        -Wno-c++98-compat-pedantic
+      )
+    endif()  # Clang
+  else()  # WIN32
+    list(APPEND JPEGXL_INTERNAL_FLAGS
+      -fsized-deallocation
+      -fno-exceptions
+
+      # Language flags
+      -fmath-errno
+    )
+
+    if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+      list(APPEND JPEGXL_INTERNAL_FLAGS
+        -fnew-alignment=8
+        -fno-cxx-exceptions
+        -fno-slp-vectorize
+        -fno-vectorize
+
+        -disable-free
+        -disable-llvm-verifier
+      )
+    endif()  # Clang
+  endif()  # WIN32
+endif()  #!MSVC
+
+# strips the -static suffix from all the elements in LIST
+function(strip_static OUTPUT_VAR LIB_LIST)
+  foreach(lib IN LISTS ${LIB_LIST})
+    string(REGEX REPLACE "-static$" "" lib "${lib}")
+    list(APPEND out_list "${lib}")
+  endforeach()
+  set(${OUTPUT_VAR} ${out_list} PARENT_SCOPE)
+endfunction()
+
+# The jxl library definition.
+include(jxl.cmake)
+
+# Other libraries outside the core jxl library.
+if(JPEGXL_ENABLE_TOOLS)
+  include(jxl_extras.cmake)
+endif()
+include(jxl_threads.cmake)
+if (JPEGXL_ENABLE_JPEGLI)
+  include(jpegli.cmake)
+endif()
+
+# Install all the library headers from the source and the generated ones. There
+# is no distinction on which libraries use which header since it is expected
+# that all developer libraries are available together at build time.
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/jxl
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/jxl
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+
+if(BUILD_TESTING)
+  cmake_policy(SET CMP0057 NEW)  # https://gitlab.kitware.com/cmake/cmake/issues/18198
+  include(GoogleTest)
+endif()
+
+# Tests for the jxl library.
+include(jxl_tests.cmake)
+
+if(BUILD_TESTING)
+  # Google benchmark for the jxl library
+  include(jxl_benchmark.cmake)
+endif()
diff --git a/third-party/libjxl/libjxl/lib/compatibility.cmake b/third-party/libjxl/libjxl/lib/compatibility.cmake
new file mode 100644
index 0000000000..9d99d29482
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/compatibility.cmake
@@ -0,0 +1,30 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+function(jxl_discover_tests TESTNAME)
+  if (CMAKE_VERSION VERSION_LESS "3.10.3")
+    gtest_discover_tests(${TESTNAME} TIMEOUT 240)
+  else ()
+    gtest_discover_tests(${TESTNAME} DISCOVERY_TIMEOUT 240)
+  endif ()
+endfunction()
+
+function(jxl_link_libraries DST SRC)
+  if (CMAKE_VERSION VERSION_LESS "3.13.5")
+    target_include_directories(${DST} SYSTEM PUBLIC
+       $<BUILD_INTERFACE:$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>>
+    )
+    add_dependencies(${DST} ${SRC})
+  else()
+    target_link_libraries(${DST} PUBLIC ${SRC})
+  endif()
+endfunction()
+
+
+if (CMAKE_VERSION VERSION_LESS "3.12.4")
+  set(JXL_HWY_INCLUDE_DIRS "$<BUILD_INTERFACE:$<TARGET_PROPERTY:hwy,INTERFACE_INCLUDE_DIRECTORIES>>")
+else()
+  set(JXL_HWY_INCLUDE_DIRS "$<BUILD_INTERFACE:$<TARGET_PROPERTY:$<IF:$<TARGET_EXISTS:hwy::hwy>,hwy::hwy,hwy>,INTERFACE_INCLUDE_DIRECTORIES>>")
+endif()
diff --git a/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis b/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis
new file mode 100644
index 0000000000..eb0ba7c07b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis
@@ -0,0 +1,27 @@
+APNG Disassembler 2.8
+
+Deconstructs APNG files into individual frames.
+
+http://apngdis.sourceforge.net
+
+Copyright (c) 2010-2015 Max Stepin
+maxst at users.sourceforge.net
+
+zlib license
+------------
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/third-party/libjxl/libjxl/lib/extras/README.md b/third-party/libjxl/libjxl/lib/extras/README.md
new file mode 100644
index 0000000000..06a9b5ea07
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/README.md
@@ -0,0 +1,5 @@
+## JPEG XL "extras"
+
+The files in this directory do not form part of the library or codec and are
+only used by tests or specific internal tools that have access to the internals
+of the library.
diff --git a/third-party/libjxl/libjxl/lib/extras/codec.cc b/third-party/libjxl/libjxl/lib/extras/codec.cc
new file mode 100644
index 0000000000..fb590a8a94
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/codec.cc
@@ -0,0 +1,173 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/enc/exr.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/enc/pgx.h"
+#include "lib/extras/enc/pnm.h"
+#include "lib/extras/packed_image.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+}  // namespace
+
+Status SetFromBytes(const Span<const uint8_t> bytes,
+                    const extras::ColorHints& color_hints, CodecInOut* io,
+                    ThreadPool* pool, const SizeConstraints* constraints,
+                    extras::Codec* orig_codec) {
+  if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes");
+
+  extras::PackedPixelFile ppf;
+  if (extras::DecodeBytes(bytes, color_hints, &ppf, constraints, orig_codec)) {
+    return ConvertPackedPixelFileToCodecInOut(ppf, pool, io);
+  }
+  return JXL_FAILURE("Codecs failed to decode");
+}
+
+Status Encode(const CodecInOut& io, const extras::Codec codec,
+              const ColorEncoding& c_desired, size_t bits_per_sample,
+              std::vector<uint8_t>* bytes, ThreadPool* pool) {
+  bytes->clear();
+  JXL_CHECK(!io.Main().c_current().ICC().empty());
+  JXL_CHECK(!c_desired.ICC().empty());
+  io.CheckMetadata();
+  if (io.Main().IsJPEG()) {
+    JXL_WARNING("Writing JPEG data as pixels");
+  }
+  JxlPixelFormat format = {
+      0,  // num_channels is ignored by the converter
+      bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+      0};
+  const bool floating_point = bits_per_sample > 16;
+  std::unique_ptr<extras::Encoder> encoder;
+  std::ostringstream os;
+  switch (codec) {
+    case extras::Codec::kPNG:
+      encoder = extras::GetAPNGEncoder();
+      if (encoder) {
+        break;
+      } else {
+        return JXL_FAILURE("JPEG XL was built without (A)PNG support");
+      }
+    case extras::Codec::kJPG:
+      format.data_type = JXL_TYPE_UINT8;
+      encoder = extras::GetJPEGEncoder();
+      if (encoder) {
+        os << io.jpeg_quality;
+        encoder->SetOption("q", os.str());
+        break;
+      } else {
+        return JXL_FAILURE("JPEG XL was built without JPEG support");
+      }
+    case extras::Codec::kPNM:
+      if (io.Main().HasAlpha()) {
+        encoder = extras::GetPAMEncoder();
+      } else if (io.Main().IsGray()) {
+        encoder = extras::GetPGMEncoder();
+      } else if (!floating_point) {
+        encoder = extras::GetPPMEncoder();
+      } else {
+        format.data_type = JXL_TYPE_FLOAT;
+        format.endianness = JXL_LITTLE_ENDIAN;
+        encoder = extras::GetPFMEncoder();
+      }
+      break;
+    case extras::Codec::kPGX:
+      encoder = extras::GetPGXEncoder();
+      break;
+    case extras::Codec::kGIF:
+      return JXL_FAILURE("Encoding to GIF is not implemented");
+    case extras::Codec::kEXR:
+      format.data_type = JXL_TYPE_FLOAT;
+      encoder = extras::GetEXREncoder();
+      if (encoder) {
+        break;
+      } else {
+        return JXL_FAILURE("JPEG XL was built without OpenEXR support");
+      }
+    case extras::Codec::kJXL:
+      return JXL_FAILURE("TODO: encode using Codec::kJXL");
+
+    case extras::Codec::kUnknown:
+      return JXL_FAILURE("Cannot encode using Codec::kUnknown");
+  }
+
+  if (!encoder) {
+    return JXL_FAILURE("Invalid codec.");
+  }
+
+  extras::PackedPixelFile ppf;
+  JXL_RETURN_IF_ERROR(
+      ConvertCodecInOutToPackedPixelFile(io, format, c_desired, pool, &ppf));
+  ppf.info.bits_per_sample = bits_per_sample;
+  if (format.data_type == JXL_TYPE_FLOAT) {
+    ppf.info.bits_per_sample = 32;
+    ppf.info.exponent_bits_per_sample = 8;
+  }
+  extras::EncodedImage encoded_image;
+  JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded_image, pool));
+  JXL_ASSERT(encoded_image.bitstreams.size() == 1);
+  *bytes = encoded_image.bitstreams[0];
+
+  return true;
+}
+
+Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
+              size_t bits_per_sample, const std::string& pathname,
+              std::vector<uint8_t>* bytes, ThreadPool* pool) {
+  std::string extension;
+  const extras::Codec codec = extras::CodecFromPath(
+      pathname, &bits_per_sample, /* basename */ nullptr, &extension);
+
+  // Warn about incorrect usage of PGM/PGX/PPM - only the latter supports
+  // color, but CodecFromPath lumps them all together.
+  if (codec == extras::Codec::kPNM && extension != ".pfm") {
+    if (io.Main().HasAlpha() && extension != ".pam") {
+      JXL_WARNING(
+          "For images with alpha, the filename should end with .pam.\n");
+    } else if (!io.Main().IsGray() && extension == ".pgm") {
+      JXL_WARNING("For color images, the filename should end with .ppm.\n");
+    } else if (io.Main().IsGray() && extension == ".ppm") {
+      JXL_WARNING(
+          "For grayscale images, the filename should not end with .ppm.\n");
+    }
+    if (bits_per_sample > 16) {
+      JXL_WARNING("PPM only supports up to 16 bits per sample");
+      bits_per_sample = 16;
+    }
+  } else if (codec == extras::Codec::kPGX && !io.Main().IsGray()) {
+    JXL_WARNING("Storing color image to PGX - use .ppm extension instead.\n");
+  }
+  if (bits_per_sample > 16 && codec == extras::Codec::kPNG) {
+    JXL_WARNING("PNG only supports up to 16 bits per sample");
+    bits_per_sample = 16;
+  }
+
+  return Encode(io, codec, c_desired, bits_per_sample, bytes, pool);
+}
+
+Status Encode(const CodecInOut& io, const std::string& pathname,
+              std::vector<uint8_t>* bytes, ThreadPool* pool) {
+  // TODO(lode): need to take the floating_point_sample field into account
+  return Encode(io, io.metadata.m.color_encoding,
+                io.metadata.m.bit_depth.bits_per_sample, pathname, bytes, pool);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/codec.h b/third-party/libjxl/libjxl/lib/extras/codec.h
new file mode 100644
index 0000000000..4ad75fd97d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/codec.h
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_H_
+#define LIB_EXTRAS_CODEC_H_
+
+// Facade for image encoders/decoders (PNG, PNM, ...).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/field_encodings.h"  // MakeBit
+
+namespace jxl {
+
+struct SizeConstraints;
+
+// Decodes "bytes" and sets io->metadata.m.
+// color_space_hint may specify the color space, otherwise, defaults to sRGB.
+Status SetFromBytes(Span<const uint8_t> bytes,
+                    const extras::ColorHints& color_hints, CodecInOut* io,
+                    ThreadPool* pool = nullptr,
+                    const SizeConstraints* constraints = nullptr,
+                    extras::Codec* orig_codec = nullptr);
+// Helper function to use no color_space_hint.
+JXL_INLINE Status SetFromBytes(const Span<const uint8_t> bytes, CodecInOut* io,
+                               ThreadPool* pool = nullptr,
+                               const SizeConstraints* constraints = nullptr,
+                               extras::Codec* orig_codec = nullptr) {
+  return SetFromBytes(bytes, extras::ColorHints(), io, pool, constraints,
+                      orig_codec);
+}
+
+// Replaces "bytes" with an encoding of pixels transformed from c_current
+// color space to c_desired.
+Status Encode(const CodecInOut& io, extras::Codec codec,
+              const ColorEncoding& c_desired, size_t bits_per_sample,
+              std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
+
+// Deduces codec, calls Encode and writes to file.
+Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
+              size_t bits_per_sample, const std::string& pathname,
+              std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
+// Same, but defaults to metadata.original color_encoding and bits_per_sample.
+Status Encode(const CodecInOut& io, const std::string& pathname,
+              std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/codec_test.cc b/third-party/libjxl/libjxl/lib/extras/codec_test.cc
new file mode 100644
index 0000000000..0ad540533b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/codec_test.cc
@@ -0,0 +1,450 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/pnm.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+using test::ThreadPoolForTests;
+
+namespace extras {
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Contains;
+using ::testing::Field;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+std::string ExtensionFromCodec(Codec codec, const bool is_gray,
+                               const bool has_alpha,
+                               const size_t bits_per_sample) {
+  switch (codec) {
+    case Codec::kJPG:
+      return ".jpg";
+    case Codec::kPGX:
+      return ".pgx";
+    case Codec::kPNG:
+      return ".png";
+    case Codec::kPNM:
+      if (bits_per_sample == 32) return ".pfm";
+      if (has_alpha) return ".pam";
+      return is_gray ? ".pgm" : ".ppm";
+    case Codec::kEXR:
+      return ".exr";
+    default:
+      return std::string();
+  }
+}
+
+void VerifySameImage(const PackedImage& im0, size_t bits_per_sample0,
+                     const PackedImage& im1, size_t bits_per_sample1,
+                     bool lossless = true) {
+  ASSERT_EQ(im0.xsize, im1.xsize);
+  ASSERT_EQ(im0.ysize, im1.ysize);
+  ASSERT_EQ(im0.format.num_channels, im1.format.num_channels);
+  auto get_factor = [](JxlPixelFormat f, size_t bits) -> double {
+    return 1.0 / ((1u << std::min(test::GetPrecision(f.data_type), bits)) - 1);
+  };
+  double factor0 = get_factor(im0.format, bits_per_sample0);
+  double factor1 = get_factor(im1.format, bits_per_sample1);
+  auto pixels0 = static_cast<const uint8_t*>(im0.pixels());
+  auto pixels1 = static_cast<const uint8_t*>(im1.pixels());
+  auto rgba0 =
+      test::ConvertToRGBA32(pixels0, im0.xsize, im0.ysize, im0.format, factor0);
+  auto rgba1 =
+      test::ConvertToRGBA32(pixels1, im1.xsize, im1.ysize, im1.format, factor1);
+  double tolerance =
+      lossless ? 0.5 * std::min(factor0, factor1) : 3.0f / 255.0f;
+  if (bits_per_sample0 == 32 || bits_per_sample1 == 32) {
+    tolerance = 0.5 * std::max(factor0, factor1);
+  }
+  for (size_t y = 0; y < im0.ysize; ++y) {
+    for (size_t x = 0; x < im0.xsize; ++x) {
+      for (size_t c = 0; c < im0.format.num_channels; ++c) {
+        size_t ix = (y * im0.xsize + x) * 4 + c;
+        double val0 = rgba0[ix];
+        double val1 = rgba1[ix];
+        ASSERT_NEAR(val1, val0, tolerance)
+            << "y = " << y << " x = " << x << " c = " << c;
+      }
+    }
+  }
+}
+
+JxlColorEncoding CreateTestColorEncoding(bool is_gray) {
+  JxlColorEncoding c;
+  c.color_space = is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+  c.white_point = JXL_WHITE_POINT_D65;
+  c.primaries = JXL_PRIMARIES_P3;
+  c.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+  c.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR;
+  // Roundtrip through internal color encoding to fill in primaries and white
+  // point CIE xy coordinates.
+  ColorEncoding c_internal;
+  JXL_CHECK(ConvertExternalToInternalColorEncoding(c, &c_internal));
+  ConvertInternalToExternalColorEncoding(c_internal, &c);
+  return c;
+}
+
+std::vector<uint8_t> GenerateICC(JxlColorEncoding color_encoding) {
+  ColorEncoding c;
+  JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c));
+  JXL_CHECK(c.CreateICC());
+  PaddedBytes icc = c.ICC();
+  return std::vector<uint8_t>(icc.begin(), icc.end());
+}
+
+void StoreRandomValue(uint8_t* out, Rng* rng, JxlPixelFormat format,
+                      size_t bits_per_sample) {
+  uint64_t max_val = (1ull << bits_per_sample) - 1;
+  if (format.data_type == JXL_TYPE_UINT8) {
+    *out = rng->UniformU(0, max_val);
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    uint32_t val = rng->UniformU(0, max_val);
+    if (format.endianness == JXL_BIG_ENDIAN) {
+      StoreBE16(val, out);
+    } else {
+      StoreLE16(val, out);
+    }
+  } else {
+    ASSERT_EQ(format.data_type, JXL_TYPE_FLOAT);
+    float val = rng->UniformF(0.0, 1.0);
+    uint32_t uval;
+    memcpy(&uval, &val, 4);
+    if (format.endianness == JXL_BIG_ENDIAN) {
+      StoreBE32(uval, out);
+    } else {
+      StoreLE32(uval, out);
+    }
+  }
+}
+
+void FillPackedImage(size_t bits_per_sample, PackedImage* image) {
+  JxlPixelFormat format = image->format;
+  size_t bytes_per_channel = PackedImage::BitsPerChannel(format.data_type) / 8;
+  uint8_t* out = static_cast<uint8_t*>(image->pixels());
+  size_t stride = image->xsize * format.num_channels * bytes_per_channel;
+  ASSERT_EQ(image->pixels_size, image->ysize * stride);
+  Rng rng(129);
+  for (size_t y = 0; y < image->ysize; ++y) {
+    for (size_t x = 0; x < image->xsize; ++x) {
+      for (size_t c = 0; c < format.num_channels; ++c) {
+        StoreRandomValue(out, &rng, format, bits_per_sample);
+        out += bytes_per_channel;
+      }
+    }
+  }
+}
+
+struct TestImageParams {
+  Codec codec;
+  size_t xsize;
+  size_t ysize;
+  size_t bits_per_sample;
+  bool is_gray;
+  bool add_alpha;
+  bool big_endian;
+  bool add_extra_channels;
+
+  bool ShouldTestRoundtrip() const {
+    if (codec == Codec::kPNG) {
+      return bits_per_sample <= 16;
+    } else if (codec == Codec::kPNM) {
+      // TODO(szabadka) Make PNM encoder endianness-aware.
+      return ((bits_per_sample <= 16 && big_endian) ||
+              (bits_per_sample == 32 && !add_alpha && !big_endian));
+    } else if (codec == Codec::kPGX) {
+      return ((bits_per_sample == 8 || bits_per_sample == 16) && is_gray &&
+              !add_alpha);
+    } else if (codec == Codec::kEXR) {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+      // OpenEXR 2.3 has a memory leak in IlmThread_2_3::ThreadPool
+      return false;
+#else
+      return bits_per_sample == 32 && !is_gray;
+#endif
+    } else if (codec == Codec::kJPG) {
+      return bits_per_sample == 8 && !add_alpha;
+    } else {
+      return false;
+    }
+  }
+
+  JxlPixelFormat PixelFormat() const {
+    JxlPixelFormat format;
+    format.num_channels = (is_gray ? 1 : 3) + (add_alpha ? 1 : 0);
+    format.data_type = (bits_per_sample == 32 ? JXL_TYPE_FLOAT
+                        : bits_per_sample > 8 ? JXL_TYPE_UINT16
+                                              : JXL_TYPE_UINT8);
+    format.endianness = big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN;
+    format.align = 0;
+    return format;
+  }
+
+  std::string DebugString() const {
+    std::ostringstream os;
+    os << "bps:" << bits_per_sample << " gr:" << is_gray << " al:" << add_alpha
+       << " be: " << big_endian << " ec: " << add_extra_channels;
+    return os.str();
+  }
+};
+
+void CreateTestImage(const TestImageParams& params, PackedPixelFile* ppf) {
+  ppf->info.xsize = params.xsize;
+  ppf->info.ysize = params.ysize;
+  ppf->info.bits_per_sample = params.bits_per_sample;
+  ppf->info.exponent_bits_per_sample = params.bits_per_sample == 32 ? 8 : 0;
+  ppf->info.num_color_channels = params.is_gray ? 1 : 3;
+  ppf->info.alpha_bits = params.add_alpha ? params.bits_per_sample : 0;
+  ppf->info.alpha_premultiplied = (params.codec == Codec::kEXR);
+
+  JxlColorEncoding color_encoding = CreateTestColorEncoding(params.is_gray);
+  ppf->icc = GenerateICC(color_encoding);
+  ppf->color_encoding = color_encoding;
+
+  PackedFrame frame(params.xsize, params.ysize, params.PixelFormat());
+  FillPackedImage(params.bits_per_sample, &frame.color);
+  if (params.add_extra_channels) {
+    for (size_t i = 0; i < 7; ++i) {
+      JxlPixelFormat ec_format = params.PixelFormat();
+      ec_format.num_channels = 1;
+      PackedImage ec(params.xsize, params.ysize, ec_format);
+      FillPackedImage(params.bits_per_sample, &ec);
+      frame.extra_channels.emplace_back(std::move(ec));
+      PackedExtraChannel pec;
+      pec.ec_info.bits_per_sample = params.bits_per_sample;
+      pec.ec_info.type = static_cast<JxlExtraChannelType>(i);
+      ppf->extra_channels_info.emplace_back(std::move(pec));
+    }
+  }
+  ppf->frames.emplace_back(std::move(frame));
+}
+
+// Ensures reading a newly written file leads to the same image pixels.
+void TestRoundTrip(const TestImageParams& params, ThreadPool* pool) {
+  if (!params.ShouldTestRoundtrip()) return;
+
+  std::string extension = ExtensionFromCodec(
+      params.codec, params.is_gray, params.add_alpha, params.bits_per_sample);
+  printf("Codec %s %s\n", extension.c_str(), params.DebugString().c_str());
+
+  PackedPixelFile ppf_in;
+  CreateTestImage(params, &ppf_in);
+
+  EncodedImage encoded;
+  auto encoder = Encoder::FromExtension(extension);
+  if (!encoder) {
+    fprintf(stderr, "Skipping test because of missing codec support.\n");
+    return;
+  }
+  ASSERT_TRUE(encoder->Encode(ppf_in, &encoded, pool));
+  ASSERT_EQ(encoded.bitstreams.size(), 1);
+
+  PackedPixelFile ppf_out;
+  ColorHints color_hints;
+  if (params.codec == Codec::kPNM || params.codec == Codec::kPGX) {
+    color_hints.Add("color_space",
+                    params.is_gray ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG");
+  }
+  ASSERT_TRUE(DecodeBytes(Span<const uint8_t>(encoded.bitstreams[0]),
+                          color_hints, &ppf_out));
+  if (params.codec == Codec::kPNG && ppf_out.icc.empty()) {
+    // Decoding a PNG may drop the ICC profile if there's a valid cICP chunk.
+    // Rendering intent is not preserved in this case.
+    EXPECT_EQ(ppf_in.color_encoding.color_space,
+              ppf_out.color_encoding.color_space);
+    EXPECT_EQ(ppf_in.color_encoding.white_point,
+              ppf_out.color_encoding.white_point);
+    if (ppf_in.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) {
+      EXPECT_EQ(ppf_in.color_encoding.primaries,
+                ppf_out.color_encoding.primaries);
+    }
+    EXPECT_EQ(ppf_in.color_encoding.transfer_function,
+              ppf_out.color_encoding.transfer_function);
+    EXPECT_EQ(ppf_out.color_encoding.rendering_intent,
+              JXL_RENDERING_INTENT_RELATIVE);
+  } else if (params.codec != Codec::kPNM && params.codec != Codec::kPGX &&
+             params.codec != Codec::kEXR) {
+    EXPECT_EQ(ppf_in.icc, ppf_out.icc);
+  }
+
+  ASSERT_EQ(ppf_out.frames.size(), 1);
+  const auto& frame_in = ppf_in.frames[0];
+  const auto& frame_out = ppf_out.frames[0];
+  VerifySameImage(frame_in.color, ppf_in.info.bits_per_sample, frame_out.color,
+                  ppf_out.info.bits_per_sample,
+                  /*lossless=*/params.codec != Codec::kJPG);
+  ASSERT_EQ(frame_in.extra_channels.size(), frame_out.extra_channels.size());
+  ASSERT_EQ(ppf_out.extra_channels_info.size(),
+            frame_out.extra_channels.size());
+  for (size_t i = 0; i < frame_in.extra_channels.size(); ++i) {
+    VerifySameImage(frame_in.extra_channels[i], ppf_in.info.bits_per_sample,
+                    frame_out.extra_channels[i], ppf_out.info.bits_per_sample,
+                    /*lossless=*/true);
+    EXPECT_EQ(ppf_out.extra_channels_info[i].ec_info.type,
+              ppf_in.extra_channels_info[i].ec_info.type);
+  }
+}
+
+TEST(CodecTest, TestRoundTrip) {
+  ThreadPoolForTests pool(12);
+
+  TestImageParams params;
+  params.xsize = 7;
+  params.ysize = 4;
+
+  for (Codec codec :
+       {Codec::kPNG, Codec::kPNM, Codec::kPGX, Codec::kEXR, Codec::kJPG}) {
+    for (int bits_per_sample : {4, 8, 10, 12, 16, 32}) {
+      for (bool is_gray : {false, true}) {
+        for (bool add_alpha : {false, true}) {
+          for (bool big_endian : {false, true}) {
+            params.codec = codec;
+            params.bits_per_sample = static_cast<size_t>(bits_per_sample);
+            params.is_gray = is_gray;
+            params.add_alpha = add_alpha;
+            params.big_endian = big_endian;
+            params.add_extra_channels = false;
+            TestRoundTrip(params, &pool);
+            if (codec == Codec::kPNM && add_alpha) {
+              params.add_extra_channels = true;
+              TestRoundTrip(params, &pool);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(CodecTest, LosslessPNMRoundtrip) {
+  ThreadPoolForTests pool(12);
+
+  static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"};
+  static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"};
+  for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) {
+    for (size_t channels = 1; channels <= 4; ++channels) {
+      if (bit_depth == 1 && (channels == 2 || channels == 4)) continue;
+      std::string extension(kExtension[channels]);
+      std::string filename = "jxl/flower/flower_small." +
+                             std::string(kChannels[channels]) + ".depth" +
+                             std::to_string(bit_depth) + extension;
+      const PaddedBytes orig = jxl::test::ReadTestData(filename);
+
+      PackedPixelFile ppf;
+      ColorHints color_hints;
+      color_hints.Add("color_space",
+                      channels < 3 ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG");
+      ASSERT_TRUE(DecodeBytes(Span<const uint8_t>(orig.data(), orig.size()),
+                              color_hints, &ppf));
+
+      EncodedImage encoded;
+      auto encoder = Encoder::FromExtension(extension);
+      ASSERT_TRUE(encoder.get());
+      ASSERT_TRUE(encoder->Encode(ppf, &encoded, &pool));
+      ASSERT_EQ(encoded.bitstreams.size(), 1);
+      ASSERT_EQ(orig.size(), encoded.bitstreams[0].size());
+      EXPECT_EQ(0,
+                memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size()));
+    }
+  }
+}
+
+TEST(CodecTest, TestPNM) { TestCodecPNM(); }
+
+TEST(CodecTest, FormatNegotiation) {
+  const std::vector<JxlPixelFormat> accepted_formats = {
+      {/*num_channels=*/4,
+       /*data_type=*/JXL_TYPE_UINT16,
+       /*endianness=*/JXL_NATIVE_ENDIAN,
+       /*align=*/0},
+      {/*num_channels=*/3,
+       /*data_type=*/JXL_TYPE_UINT8,
+       /*endianness=*/JXL_NATIVE_ENDIAN,
+       /*align=*/0},
+      {/*num_channels=*/3,
+       /*data_type=*/JXL_TYPE_UINT16,
+       /*endianness=*/JXL_NATIVE_ENDIAN,
+       /*align=*/0},
+      {/*num_channels=*/1,
+       /*data_type=*/JXL_TYPE_UINT8,
+       /*endianness=*/JXL_NATIVE_ENDIAN,
+       /*align=*/0},
+  };
+
+  JxlBasicInfo info;
+  JxlEncoderInitBasicInfo(&info);
+  info.bits_per_sample = 12;
+  info.num_color_channels = 2;
+
+  JxlPixelFormat format;
+  EXPECT_FALSE(SelectFormat(accepted_formats, info, &format));
+
+  info.num_color_channels = 3;
+  ASSERT_TRUE(SelectFormat(accepted_formats, info, &format));
+  EXPECT_EQ(format.num_channels, info.num_color_channels);
+  // 16 is the smallest accepted format that can accommodate the 12-bit data.
+  EXPECT_EQ(format.data_type, JXL_TYPE_UINT16);
+}
+
+TEST(CodecTest, EncodeToPNG) {
+  ThreadPool* const pool = nullptr;
+
+  std::unique_ptr<Encoder> png_encoder = Encoder::FromExtension(".png");
+  if (!png_encoder) {
+    fprintf(stderr, "Skipping test because of missing codec support.\n");
+    return;
+  }
+
+  const PaddedBytes original_png = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  PackedPixelFile ppf;
+  ASSERT_TRUE(extras::DecodeBytes(Span<const uint8_t>(original_png),
+                                  ColorHints(), &ppf));
+
+  const JxlPixelFormat& format = ppf.frames.front().color.format;
+  ASSERT_THAT(
+      png_encoder->AcceptedFormats(),
+      Contains(AllOf(Field(&JxlPixelFormat::num_channels, format.num_channels),
+                     Field(&JxlPixelFormat::data_type, format.data_type),
+                     Field(&JxlPixelFormat::endianness, format.endianness))));
+  EncodedImage encoded_png;
+  ASSERT_TRUE(png_encoder->Encode(ppf, &encoded_png, pool));
+  EXPECT_THAT(encoded_png.icc, IsEmpty());
+  ASSERT_THAT(encoded_png.bitstreams, SizeIs(1));
+
+  PackedPixelFile decoded_ppf;
+  ASSERT_TRUE(
+      extras::DecodeBytes(Span<const uint8_t>(encoded_png.bitstreams.front()),
+                          ColorHints(), &decoded_ppf));
+
+  ASSERT_EQ(decoded_ppf.info.bits_per_sample, ppf.info.bits_per_sample);
+  ASSERT_EQ(decoded_ppf.frames.size(), 1);
+  VerifySameImage(ppf.frames[0].color, ppf.info.bits_per_sample,
+                  decoded_ppf.frames[0].color,
+                  decoded_ppf.info.bits_per_sample);
+}
+
+}  // namespace
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/apng.cc b/third-party/libjxl/libjxl/lib/extras/dec/apng.cc
new file mode 100644
index 0000000000..b0a19ea721
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/apng.cc
@@ -0,0 +1,987 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/apng.h"
+
+// Parts of this code are taken from apngdis, which has the following license:
+/* APNG Disassembler 2.8
+ *
+ * Deconstructs APNG files into individual frames.
+ *
+ * http://apngdis.sourceforge.net
+ *
+ * Copyright (c) 2010-2015 Max Stepin
+ * maxst at users.sourceforge.net
+ *
+ * zlib license
+ * ------------
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ */
+
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/scope_guard.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/sanitizers.h"
+#if JPEGXL_ENABLE_APNG
+#include "png.h" /* original (unpatched) libpng is ok */
+#endif
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_APNG
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+
+/* hIST chunk tail is not proccesed properly; skip this chunk completely;
+   see https://github.com/glennrp/libpng/pull/413 */
+const png_byte kIgnoredPngChunks[] = {
+    104, 73, 83, 84, '\0' /* hIST */
+};
+
+// Returns floating-point value from the PNG encoding (times 10^5).
+static double F64FromU32(const uint32_t x) {
+  return static_cast<int32_t>(x) * 1E-5;
+}
+
+Status DecodeSRGB(const unsigned char* payload, const size_t payload_size,
+                  JxlColorEncoding* color_encoding) {
+  if (payload_size != 1) return JXL_FAILURE("Wrong sRGB size");
+  // (PNG uses the same values as ICC.)
+  if (payload[0] >= 4) return JXL_FAILURE("Invalid Rendering Intent");
+  color_encoding->white_point = JXL_WHITE_POINT_D65;
+  color_encoding->primaries = JXL_PRIMARIES_SRGB;
+  color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+  color_encoding->rendering_intent =
+      static_cast<JxlRenderingIntent>(payload[0]);
+  return true;
+}
+
+// If the cICP profile is not fully supported, return false and leave
+// color_encoding unmodified.
+Status DecodeCICP(const unsigned char* payload, const size_t payload_size,
+                  JxlColorEncoding* color_encoding) {
+  if (payload_size != 4) return JXL_FAILURE("Wrong cICP size");
+  JxlColorEncoding color_enc = *color_encoding;
+
+  // From https://www.itu.int/rec/T-REC-H.273-202107-I/en
+  if (payload[0] == 1) {
+    // IEC 61966-2-1 sRGB
+    color_enc.primaries = JXL_PRIMARIES_SRGB;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else if (payload[0] == 4) {
+    // Rec. ITU-R BT.470-6 System M
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 0.67;
+    color_enc.primaries_red_xy[1] = 0.33;
+    color_enc.primaries_green_xy[0] = 0.21;
+    color_enc.primaries_green_xy[1] = 0.71;
+    color_enc.primaries_blue_xy[0] = 0.14;
+    color_enc.primaries_blue_xy[1] = 0.08;
+    color_enc.white_point = JXL_WHITE_POINT_CUSTOM;
+    color_enc.white_point_xy[0] = 0.310;
+    color_enc.white_point_xy[1] = 0.316;
+  } else if (payload[0] == 5) {
+    // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 0.64;
+    color_enc.primaries_red_xy[1] = 0.33;
+    color_enc.primaries_green_xy[0] = 0.29;
+    color_enc.primaries_green_xy[1] = 0.60;
+    color_enc.primaries_blue_xy[0] = 0.15;
+    color_enc.primaries_blue_xy[1] = 0.06;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else if (payload[0] == 6 || payload[0] == 7) {
+    // SMPTE ST 170 (2004) / SMPTE ST 240 (1999)
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 0.630;
+    color_enc.primaries_red_xy[1] = 0.340;
+    color_enc.primaries_green_xy[0] = 0.310;
+    color_enc.primaries_green_xy[1] = 0.595;
+    color_enc.primaries_blue_xy[0] = 0.155;
+    color_enc.primaries_blue_xy[1] = 0.070;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else if (payload[0] == 8) {
+    // Generic film (colour filters using Illuminant C)
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 0.681;
+    color_enc.primaries_red_xy[1] = 0.319;
+    color_enc.primaries_green_xy[0] = 0.243;
+    color_enc.primaries_green_xy[1] = 0.692;
+    color_enc.primaries_blue_xy[0] = 0.145;
+    color_enc.primaries_blue_xy[1] = 0.049;
+    color_enc.white_point = JXL_WHITE_POINT_CUSTOM;
+    color_enc.white_point_xy[0] = 0.310;
+    color_enc.white_point_xy[1] = 0.316;
+  } else if (payload[0] == 9) {
+    // Rec. ITU-R BT.2100-2
+    color_enc.primaries = JXL_PRIMARIES_2100;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else if (payload[0] == 10) {
+    // CIE 1931 XYZ
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 1;
+    color_enc.primaries_red_xy[1] = 0;
+    color_enc.primaries_green_xy[0] = 0;
+    color_enc.primaries_green_xy[1] = 1;
+    color_enc.primaries_blue_xy[0] = 0;
+    color_enc.primaries_blue_xy[1] = 0;
+    color_enc.white_point = JXL_WHITE_POINT_E;
+  } else if (payload[0] == 11) {
+    // SMPTE RP 431-2 (2011)
+    color_enc.primaries = JXL_PRIMARIES_P3;
+    color_enc.white_point = JXL_WHITE_POINT_DCI;
+  } else if (payload[0] == 12) {
+    // SMPTE EG 432-1 (2010)
+    color_enc.primaries = JXL_PRIMARIES_P3;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else if (payload[0] == 22) {
+    color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+    color_enc.primaries_red_xy[0] = 0.630;
+    color_enc.primaries_red_xy[1] = 0.340;
+    color_enc.primaries_green_xy[0] = 0.295;
+    color_enc.primaries_green_xy[1] = 0.605;
+    color_enc.primaries_blue_xy[0] = 0.155;
+    color_enc.primaries_blue_xy[1] = 0.077;
+    color_enc.white_point = JXL_WHITE_POINT_D65;
+  } else {
+    JXL_WARNING("Unsupported primaries specified in cICP chunk: %d",
+                static_cast<int>(payload[0]));
+    return false;
+  }
+
+  if (payload[1] == 1 || payload[1] == 6 || payload[1] == 14 ||
+      payload[1] == 15) {
+    // Rec. ITU-R BT.709-6
+    color_enc.transfer_function = JXL_TRANSFER_FUNCTION_709;
+  } else if (payload[1] == 4) {
+    // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+    color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    color_enc.gamma = 1 / 2.2;
+  } else if (payload[1] == 5) {
+    // Rec. ITU-R BT.470-6 System B, G
+    color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    color_enc.gamma = 1 / 2.8;
+  } else if (payload[1] == 8 || payload[1] == 13 || payload[1] == 16 ||
+             payload[1] == 17 || payload[1] == 18) {
+    // These codes all match the corresponding JXL enum values
+    color_enc.transfer_function = static_cast<JxlTransferFunction>(payload[1]);
+  } else {
+    JXL_WARNING("Unsupported transfer function specified in cICP chunk: %d",
+                static_cast<int>(payload[1]));
+    return false;
+  }
+
+  if (payload[2] != 0) {
+    JXL_WARNING("Unsupported color space specified in cICP chunk: %d",
+                static_cast<int>(payload[2]));
+    return false;
+  }
+  if (payload[3] != 1) {
+    JXL_WARNING("Unsupported full-range flag specified in cICP chunk: %d",
+                static_cast<int>(payload[3]));
+    return false;
+  }
+  // cICP has no rendering intent, so use the default
+  color_enc.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+  *color_encoding = color_enc;
+  return true;
+}
+
+Status DecodeGAMA(const unsigned char* payload, const size_t payload_size,
+                  JxlColorEncoding* color_encoding) {
+  if (payload_size != 4) return JXL_FAILURE("Wrong gAMA size");
+  color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+  color_encoding->gamma = F64FromU32(LoadBE32(payload));
+  return true;
+}
+
+Status DecodeCHRM(const unsigned char* payload, const size_t payload_size,
+                  JxlColorEncoding* color_encoding) {
+  if (payload_size != 32) return JXL_FAILURE("Wrong cHRM size");
+
+  color_encoding->white_point = JXL_WHITE_POINT_CUSTOM;
+  color_encoding->white_point_xy[0] = F64FromU32(LoadBE32(payload + 0));
+  color_encoding->white_point_xy[1] = F64FromU32(LoadBE32(payload + 4));
+
+  color_encoding->primaries = JXL_PRIMARIES_CUSTOM;
+  color_encoding->primaries_red_xy[0] = F64FromU32(LoadBE32(payload + 8));
+  color_encoding->primaries_red_xy[1] = F64FromU32(LoadBE32(payload + 12));
+  color_encoding->primaries_green_xy[0] = F64FromU32(LoadBE32(payload + 16));
+  color_encoding->primaries_green_xy[1] = F64FromU32(LoadBE32(payload + 20));
+  color_encoding->primaries_blue_xy[0] = F64FromU32(LoadBE32(payload + 24));
+  color_encoding->primaries_blue_xy[1] = F64FromU32(LoadBE32(payload + 28));
+  return true;
+}
+
+// Retrieves XMP and EXIF/IPTC from itext and text.
+class BlobsReaderPNG {
+ public:
+  static Status Decode(const png_text_struct& info, PackedMetadata* metadata) {
+    // We trust these are properly null-terminated by libpng.
+    const char* key = info.key;
+    const char* value = info.text;
+    if (strstr(key, "XML:com.adobe.xmp")) {
+      metadata->xmp.resize(strlen(value));  // safe, see above
+      memcpy(metadata->xmp.data(), value, metadata->xmp.size());
+    }
+
+    std::string type;
+    std::vector<uint8_t> bytes;
+
+    // Handle text chunks annotated with key "Raw profile type ####", with
+    // #### a type, which may contain metadata.
+    const char* kKey = "Raw profile type ";
+    if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+
+    if (!MaybeDecodeBase16(key, value, &type, &bytes)) {
+      JXL_WARNING("Couldn't parse 'Raw format type' text chunk");
+      return false;
+    }
+    if (type == "exif") {
+      // Remove "Exif\0\0" prefix if present
+      if (bytes.size() >= sizeof kExifSignature &&
+          memcmp(bytes.data(), kExifSignature, sizeof kExifSignature) == 0) {
+        bytes.erase(bytes.begin(), bytes.begin() + sizeof kExifSignature);
+      }
+      if (!metadata->exif.empty()) {
+        JXL_WARNING("overwriting EXIF (%" PRIuS " bytes) with base16 (%" PRIuS
+                    " bytes)",
+                    metadata->exif.size(), bytes.size());
+      }
+      metadata->exif = std::move(bytes);
+    } else if (type == "iptc") {
+      // TODO (jon): Deal with IPTC in some way
+    } else if (type == "8bim") {
+      // TODO (jon): Deal with 8bim in some way
+    } else if (type == "xmp") {
+      if (!metadata->xmp.empty()) {
+        JXL_WARNING("overwriting XMP (%" PRIuS " bytes) with base16 (%" PRIuS
+                    " bytes)",
+                    metadata->xmp.size(), bytes.size());
+      }
+      metadata->xmp = std::move(bytes);
+    } else {
+      JXL_WARNING("Unknown type in 'Raw format type' text chunk: %s: %" PRIuS
+                  " bytes",
+                  type.c_str(), bytes.size());
+    }
+    return true;
+  }
+
+ private:
+  // Returns false if invalid.
+  static JXL_INLINE Status DecodeNibble(const char c,
+                                        uint32_t* JXL_RESTRICT nibble) {
+    if ('a' <= c && c <= 'f') {
+      *nibble = 10 + c - 'a';
+    } else if ('0' <= c && c <= '9') {
+      *nibble = c - '0';
+    } else {
+      *nibble = 0;
+      return JXL_FAILURE("Invalid metadata nibble");
+    }
+    JXL_ASSERT(*nibble < 16);
+    return true;
+  }
+
+  // Returns false if invalid.
+  static JXL_INLINE Status DecodeDecimal(const char** pos, const char* end,
+                                         uint32_t* JXL_RESTRICT value) {
+    size_t len = 0;
+    *value = 0;
+    while (*pos < end) {
+      char next = **pos;
+      if (next >= '0' && next <= '9') {
+        *value = (*value * 10) + static_cast<uint32_t>(next - '0');
+        len++;
+        if (len > 8) {
+          break;
+        }
+      } else {
+        // Do not consume terminator (non-decimal digit).
+        break;
+      }
+      (*pos)++;
+    }
+    if (len == 0 || len > 8) {
+      return JXL_FAILURE("Failed to parse decimal");
+    }
+    return true;
+  }
+
+  // Parses a PNG text chunk with key of the form "Raw profile type ####", with
+  // #### a type.
+  // Returns whether it could successfully parse the content.
+  // We trust key and encoded are null-terminated because they come from
+  // libpng.
+  static Status MaybeDecodeBase16(const char* key, const char* encoded,
+                                  std::string* type,
+                                  std::vector<uint8_t>* bytes) {
+    const char* encoded_end = encoded + strlen(encoded);
+
+    const char* kKey = "Raw profile type ";
+    if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+    *type = key + strlen(kKey);
+    const size_t kMaxTypeLen = 20;
+    if (type->length() > kMaxTypeLen) return false;  // Type too long
+
+    // Header: freeform string and number of bytes
+    // Expected format is:
+    // \n
+    // profile name/description\n
+    //       40\n               (the number of bytes after hex-decoding)
+    // 01234566789abcdef....\n  (72 bytes per line max).
+    // 012345667\n              (last line)
+    const char* pos = encoded;
+
+    if (*(pos++) != '\n') return false;
+    while (pos < encoded_end && *pos != '\n') {
+      pos++;
+    }
+    if (pos == encoded_end) return false;
+    // We parsed so far a \n, some number of non \n characters and are now
+    // pointing at a \n.
+    if (*(pos++) != '\n') return false;
+    // Skip leading spaces
+    while (pos < encoded_end && *pos == ' ') {
+      pos++;
+    }
+    uint32_t bytes_to_decode = 0;
+    JXL_RETURN_IF_ERROR(DecodeDecimal(&pos, encoded_end, &bytes_to_decode));
+
+    // We need 2*bytes for the hex values plus 1 byte every 36 values,
+    // plus terminal \n for length.
+    const unsigned long needed_bytes =
+        bytes_to_decode * 2 + 1 + DivCeil(bytes_to_decode, 36);
+    if (needed_bytes != static_cast<size_t>(encoded_end - pos)) {
+      return JXL_FAILURE("Not enough bytes to parse %d bytes in hex",
+                         bytes_to_decode);
+    }
+    JXL_ASSERT(bytes->empty());
+    bytes->reserve(bytes_to_decode);
+
+    // Encoding: base16 with newline after 72 chars.
+    // pos points to the \n before the first line of hex values.
+    for (size_t i = 0; i < bytes_to_decode; ++i) {
+      if (i % 36 == 0) {
+        if (pos + 1 >= encoded_end) return false;  // Truncated base16 1
+        if (*pos != '\n') return false;            // Expected newline
+        ++pos;
+      }
+
+      if (pos + 2 >= encoded_end) return false;  // Truncated base16 2;
+      uint32_t nibble0, nibble1;
+      JXL_RETURN_IF_ERROR(DecodeNibble(pos[0], &nibble0));
+      JXL_RETURN_IF_ERROR(DecodeNibble(pos[1], &nibble1));
+      bytes->push_back(static_cast<uint8_t>((nibble0 << 4) + nibble1));
+      pos += 2;
+    }
+    if (pos + 1 != encoded_end) return false;  // Too many encoded bytes
+    if (pos[0] != '\n') return false;          // Incorrect metadata terminator
+    return true;
+  }
+};
+
+constexpr bool isAbc(char c) {
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
+constexpr uint32_t kId_IHDR = 0x52444849;
+constexpr uint32_t kId_acTL = 0x4C546361;
+constexpr uint32_t kId_fcTL = 0x4C546366;
+constexpr uint32_t kId_IDAT = 0x54414449;
+constexpr uint32_t kId_fdAT = 0x54416466;
+constexpr uint32_t kId_IEND = 0x444E4549;
+constexpr uint32_t kId_cICP = 0x50434963;
+constexpr uint32_t kId_iCCP = 0x50434369;
+constexpr uint32_t kId_sRGB = 0x42475273;
+constexpr uint32_t kId_gAMA = 0x414D4167;
+constexpr uint32_t kId_cHRM = 0x4D524863;
+constexpr uint32_t kId_eXIf = 0x66495865;
+
+struct APNGFrame {
+  std::vector<uint8_t> pixels;
+  std::vector<uint8_t*> rows;
+  unsigned int w, h, delay_num, delay_den;
+};
+
+struct Reader {
+  const uint8_t* next;
+  const uint8_t* last;
+  bool Read(void* data, size_t len) {
+    size_t cap = last - next;
+    size_t to_copy = std::min(cap, len);
+    memcpy(data, next, to_copy);
+    next += to_copy;
+    return (len == to_copy);
+  }
+  bool Eof() { return next == last; }
+};
+
+const unsigned long cMaxPNGSize = 1000000UL;
+const size_t kMaxPNGChunkSize = 1lu << 30;  // 1 GB
+
+void info_fn(png_structp png_ptr, png_infop info_ptr) {
+  png_set_expand(png_ptr);
+  png_set_palette_to_rgb(png_ptr);
+  png_set_tRNS_to_alpha(png_ptr);
+  (void)png_set_interlace_handling(png_ptr);
+  png_read_update_info(png_ptr, info_ptr);
+}
+
+void row_fn(png_structp png_ptr, png_bytep new_row, png_uint_32 row_num,
+            int pass) {
+  APNGFrame* frame = (APNGFrame*)png_get_progressive_ptr(png_ptr);
+  JXL_CHECK(frame);
+  JXL_CHECK(row_num < frame->rows.size());
+  JXL_CHECK(frame->rows[row_num] < frame->pixels.data() + frame->pixels.size());
+  png_progressive_combine_row(png_ptr, frame->rows[row_num], new_row);
+}
+
+inline unsigned int read_chunk(Reader* r, std::vector<uint8_t>* pChunk) {
+  unsigned char len[4];
+  if (r->Read(&len, 4)) {
+    const auto size = png_get_uint_32(len);
+    // Check first, to avoid overflow.
+    if (size > kMaxPNGChunkSize) {
+      JXL_WARNING("APNG chunk size is too big");
+      return 0;
+    }
+    pChunk->resize(size + 12);
+    memcpy(pChunk->data(), len, 4);
+    if (r->Read(pChunk->data() + 4, pChunk->size() - 4)) {
+      return LoadLE32(pChunk->data() + 4);
+    }
+  }
+  return 0;
+}
+
+int processing_start(png_structp& png_ptr, png_infop& info_ptr, void* frame_ptr,
+                     bool hasInfo, std::vector<uint8_t>& chunkIHDR,
+                     std::vector<std::vector<uint8_t>>& chunksInfo) {
+  unsigned char header[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+  // Cleanup prior decoder, if any.
+  png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+  // Just in case. Not all versions on libpng wipe-out the pointers.
+  png_ptr = nullptr;
+  info_ptr = nullptr;
+
+  png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  info_ptr = png_create_info_struct(png_ptr);
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    return 1;
+  }
+
+  png_set_keep_unknown_chunks(png_ptr, 1, kIgnoredPngChunks,
+                              (int)sizeof(kIgnoredPngChunks) / 5);
+
+  png_set_crc_action(png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
+  png_set_progressive_read_fn(png_ptr, frame_ptr, info_fn, row_fn, NULL);
+
+  png_process_data(png_ptr, info_ptr, header, 8);
+  png_process_data(png_ptr, info_ptr, chunkIHDR.data(), chunkIHDR.size());
+
+  if (hasInfo) {
+    for (unsigned int i = 0; i < chunksInfo.size(); i++) {
+      png_process_data(png_ptr, info_ptr, chunksInfo[i].data(),
+                       chunksInfo[i].size());
+    }
+  }
+  return 0;
+}
+
+int processing_data(png_structp png_ptr, png_infop info_ptr, unsigned char* p,
+                    unsigned int size) {
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    return 1;
+  }
+
+  png_process_data(png_ptr, info_ptr, p, size);
+  return 0;
+}
+
+int processing_finish(png_structp png_ptr, png_infop info_ptr,
+                      PackedMetadata* metadata) {
+  unsigned char footer[12] = {0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130};
+
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    return 1;
+  }
+
+  png_process_data(png_ptr, info_ptr, footer, 12);
+  // before destroying: check if we encountered any metadata chunks
+  png_textp text_ptr;
+  int num_text;
+  png_get_text(png_ptr, info_ptr, &text_ptr, &num_text);
+  for (int i = 0; i < num_text; i++) {
+    (void)BlobsReaderPNG::Decode(text_ptr[i], metadata);
+  }
+
+  return 0;
+}
+
+}  // namespace
+#endif
+
+bool CanDecodeAPNG() {
+#if JPEGXL_ENABLE_APNG
+  return true;
+#else
+  return false;
+#endif
+}
+
+Status DecodeImageAPNG(const Span<const uint8_t> bytes,
+                       const ColorHints& color_hints, PackedPixelFile* ppf,
+                       const SizeConstraints* constraints) {
+#if JPEGXL_ENABLE_APNG
+  Reader r;
+  unsigned int id, j, w, h, w0, h0, x0, y0;
+  unsigned int delay_num, delay_den, dop, bop, rowbytes, imagesize;
+  unsigned char sig[8];
+  png_structp png_ptr = nullptr;
+  png_infop info_ptr = nullptr;
+  std::vector<uint8_t> chunk;
+  std::vector<uint8_t> chunkIHDR;
+  std::vector<std::vector<uint8_t>> chunksInfo;
+  bool isAnimated = false;
+  bool hasInfo = false;
+  bool seenFctl = false;
+  APNGFrame frameRaw = {};
+  uint32_t num_channels;
+  JxlPixelFormat format;
+  unsigned int bytes_per_pixel = 0;
+
+  struct FrameInfo {
+    PackedImage data;
+    uint32_t duration;
+    size_t x0, xsize;
+    size_t y0, ysize;
+    uint32_t dispose_op;
+    uint32_t blend_op;
+  };
+
+  std::vector<FrameInfo> frames;
+
+  // Make sure png memory is released in any case.
+  auto scope_guard = MakeScopeGuard([&]() {
+    png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+    // Just in case. Not all versions on libpng wipe-out the pointers.
+    png_ptr = nullptr;
+    info_ptr = nullptr;
+  });
+
+  r = {bytes.data(), bytes.data() + bytes.size()};
+  // Not a PNG => not an error
+  unsigned char png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+  if (!r.Read(sig, 8) || memcmp(sig, png_signature, 8) != 0) {
+    return false;
+  }
+  id = read_chunk(&r, &chunkIHDR);
+
+  ppf->info.exponent_bits_per_sample = 0;
+  ppf->info.alpha_exponent_bits = 0;
+  ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+  ppf->frames.clear();
+
+  bool have_color = false;
+  bool have_cicp = false, have_iccp = false, have_srgb = false;
+  bool errorstate = true;
+  if (id == kId_IHDR && chunkIHDR.size() == 25) {
+    x0 = 0;
+    y0 = 0;
+    delay_num = 1;
+    delay_den = 10;
+    dop = 0;
+    bop = 0;
+
+    w0 = w = png_get_uint_32(chunkIHDR.data() + 8);
+    h0 = h = png_get_uint_32(chunkIHDR.data() + 12);
+    if (w > cMaxPNGSize || h > cMaxPNGSize) {
+      return false;
+    }
+
+    // default settings in case e.g. only gAMA is given
+    ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+    ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+    ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+    ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+    ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+
+    if (!processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+                          chunkIHDR, chunksInfo)) {
+      while (!r.Eof()) {
+        id = read_chunk(&r, &chunk);
+        if (!id) break;
+        seenFctl |= (id == kId_fcTL);
+
+        if (id == kId_acTL && !hasInfo && !isAnimated) {
+          isAnimated = true;
+          ppf->info.have_animation = true;
+          ppf->info.animation.tps_numerator = 1000;
+          ppf->info.animation.tps_denominator = 1;
+        } else if (id == kId_IEND ||
+                   (id == kId_fcTL && (!hasInfo || isAnimated))) {
+          if (hasInfo) {
+            if (!processing_finish(png_ptr, info_ptr, &ppf->metadata)) {
+              // Allocates the frame buffer.
+              uint32_t duration = delay_num * 1000 / delay_den;
+              frames.push_back(FrameInfo{PackedImage(w0, h0, format), duration,
+                                         x0, w0, y0, h0, dop, bop});
+              auto& frame = frames.back().data;
+              for (size_t y = 0; y < h0; ++y) {
+                memcpy(static_cast<uint8_t*>(frame.pixels()) + frame.stride * y,
+                       frameRaw.rows[y], bytes_per_pixel * w0);
+              }
+            } else {
+              break;
+            }
+          }
+
+          if (id == kId_IEND) {
+            errorstate = false;
+            break;
+          }
+          if (chunk.size() < 34) {
+            return JXL_FAILURE("Received a chunk that is too small (%" PRIuS
+                               "B)",
+                               chunk.size());
+          }
+          // At this point the old frame is done. Let's start a new one.
+          w0 = png_get_uint_32(chunk.data() + 12);
+          h0 = png_get_uint_32(chunk.data() + 16);
+          x0 = png_get_uint_32(chunk.data() + 20);
+          y0 = png_get_uint_32(chunk.data() + 24);
+          delay_num = png_get_uint_16(chunk.data() + 28);
+          delay_den = png_get_uint_16(chunk.data() + 30);
+          dop = chunk[32];
+          bop = chunk[33];
+
+          if (!delay_den) delay_den = 100;
+
+          if (w0 > cMaxPNGSize || h0 > cMaxPNGSize || x0 > cMaxPNGSize ||
+              y0 > cMaxPNGSize || x0 + w0 > w || y0 + h0 > h || dop > 2 ||
+              bop > 1) {
+            break;
+          }
+
+          if (hasInfo) {
+            memcpy(chunkIHDR.data() + 8, chunk.data() + 12, 8);
+            if (processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+                                 chunkIHDR, chunksInfo)) {
+              break;
+            }
+          }
+        } else if (id == kId_IDAT) {
+          // First IDAT chunk means we now have all header info
+          if (seenFctl) {
+            // `fcTL` chunk must appear after all `IDAT` chunks
+            return JXL_FAILURE("IDAT chunk after fcTL chunk");
+          }
+          hasInfo = true;
+          JXL_CHECK(w == png_get_image_width(png_ptr, info_ptr));
+          JXL_CHECK(h == png_get_image_height(png_ptr, info_ptr));
+          int colortype = png_get_color_type(png_ptr, info_ptr);
+          int png_bit_depth = png_get_bit_depth(png_ptr, info_ptr);
+          ppf->info.bits_per_sample = png_bit_depth;
+          png_color_8p sigbits = NULL;
+          png_get_sBIT(png_ptr, info_ptr, &sigbits);
+          if (colortype & 1) {
+            // palette will actually be 8-bit regardless of the index bitdepth
+            ppf->info.bits_per_sample = 8;
+          }
+          if (colortype & 2) {
+            ppf->info.num_color_channels = 3;
+            ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+            if (sigbits && sigbits->red == sigbits->green &&
+                sigbits->green == sigbits->blue)
+              ppf->info.bits_per_sample = sigbits->red;
+          } else {
+            ppf->info.num_color_channels = 1;
+            ppf->color_encoding.color_space = JXL_COLOR_SPACE_GRAY;
+            if (sigbits) ppf->info.bits_per_sample = sigbits->gray;
+          }
+          if (colortype & 4 ||
+              png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) {
+            ppf->info.alpha_bits = ppf->info.bits_per_sample;
+            if (sigbits) {
+              if (sigbits->alpha &&
+                  sigbits->alpha != ppf->info.bits_per_sample) {
+                return JXL_FAILURE("Unsupported alpha bit-depth");
+              }
+              ppf->info.alpha_bits = sigbits->alpha;
+            }
+          } else {
+            ppf->info.alpha_bits = 0;
+          }
+          ppf->color_encoding.color_space =
+              (ppf->info.num_color_channels == 1 ? JXL_COLOR_SPACE_GRAY
+                                                 : JXL_COLOR_SPACE_RGB);
+          ppf->info.xsize = w;
+          ppf->info.ysize = h;
+          JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, w, h));
+          num_channels =
+              ppf->info.num_color_channels + (ppf->info.alpha_bits ? 1 : 0);
+          format = {
+              /*num_channels=*/num_channels,
+              /*data_type=*/ppf->info.bits_per_sample > 8 ? JXL_TYPE_UINT16
+                                                          : JXL_TYPE_UINT8,
+              /*endianness=*/JXL_BIG_ENDIAN,
+              /*align=*/0,
+          };
+          if (png_bit_depth > 8 && format.data_type == JXL_TYPE_UINT8) {
+            png_set_strip_16(png_ptr);
+          }
+          bytes_per_pixel =
+              num_channels * (format.data_type == JXL_TYPE_UINT16 ? 2 : 1);
+          rowbytes = w * bytes_per_pixel;
+          imagesize = h * rowbytes;
+          frameRaw.pixels.resize(imagesize);
+          frameRaw.rows.resize(h);
+          for (j = 0; j < h; j++)
+            frameRaw.rows[j] = frameRaw.pixels.data() + j * rowbytes;
+
+          if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+            break;
+          }
+        } else if (id == kId_fdAT && isAnimated) {
+          if (!hasInfo) {
+            return JXL_FAILURE("fDAT chunk before iDAT");
+          }
+          png_save_uint_32(chunk.data() + 4, chunk.size() - 16);
+          memcpy(chunk.data() + 8, "IDAT", 4);
+          if (processing_data(png_ptr, info_ptr, chunk.data() + 4,
+                              chunk.size() - 4)) {
+            break;
+          }
+        } else if (id == kId_cICP) {
+          // Color profile chunks: cICP has the highest priority, followed by
+          // iCCP and sRGB (which shouldn't co-exist, but if they do, we use
+          // iCCP), followed finally by gAMA and cHRM.
+          if (DecodeCICP(chunk.data() + 8, chunk.size() - 12,
+                         &ppf->color_encoding)) {
+            have_cicp = true;
+            have_color = true;
+            ppf->icc.clear();
+          }
+        } else if (!have_cicp && id == kId_iCCP) {
+          if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+            JXL_WARNING("Corrupt iCCP chunk");
+            break;
+          }
+
+          // TODO(jon): catch special case of PQ and synthesize color encoding
+          // in that case
+          int compression_type;
+          png_bytep profile;
+          png_charp name;
+          png_uint_32 proflen = 0;
+          auto ok = png_get_iCCP(png_ptr, info_ptr, &name, &compression_type,
+                                 &profile, &proflen);
+          if (ok && proflen) {
+            ppf->icc.assign(profile, profile + proflen);
+            have_color = true;
+            have_iccp = true;
+          } else {
+            // TODO(eustas): JXL_WARNING?
+          }
+        } else if (!have_cicp && !have_iccp && id == kId_sRGB) {
+          JXL_RETURN_IF_ERROR(DecodeSRGB(chunk.data() + 8, chunk.size() - 12,
+                                         &ppf->color_encoding));
+          have_srgb = true;
+          have_color = true;
+        } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_gAMA) {
+          JXL_RETURN_IF_ERROR(DecodeGAMA(chunk.data() + 8, chunk.size() - 12,
+                                         &ppf->color_encoding));
+          have_color = true;
+        } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_cHRM) {
+          JXL_RETURN_IF_ERROR(DecodeCHRM(chunk.data() + 8, chunk.size() - 12,
+                                         &ppf->color_encoding));
+          have_color = true;
+        } else if (id == kId_eXIf) {
+          ppf->metadata.exif.resize(chunk.size() - 12);
+          memcpy(ppf->metadata.exif.data(), chunk.data() + 8,
+                 chunk.size() - 12);
+        } else if (!isAbc(chunk[4]) || !isAbc(chunk[5]) || !isAbc(chunk[6]) ||
+                   !isAbc(chunk[7])) {
+          break;
+        } else {
+          if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+            break;
+          }
+          if (!hasInfo) {
+            chunksInfo.push_back(chunk);
+            continue;
+          }
+        }
+      }
+    }
+
+    JXL_RETURN_IF_ERROR(ApplyColorHints(
+        color_hints, have_color, ppf->info.num_color_channels == 1, ppf));
+  }
+
+  if (errorstate) return false;
+
+  bool has_nontrivial_background = false;
+  bool previous_frame_should_be_cleared = false;
+  enum {
+    DISPOSE_OP_NONE = 0,
+    DISPOSE_OP_BACKGROUND = 1,
+    DISPOSE_OP_PREVIOUS = 2,
+  };
+  enum {
+    BLEND_OP_SOURCE = 0,
+    BLEND_OP_OVER = 1,
+  };
+  for (size_t i = 0; i < frames.size(); i++) {
+    auto& frame = frames[i];
+    JXL_ASSERT(frame.data.xsize == frame.xsize);
+    JXL_ASSERT(frame.data.ysize == frame.ysize);
+
+    // Before encountering a DISPOSE_OP_NONE frame, the canvas is filled with 0,
+    // so DISPOSE_OP_BACKGROUND and DISPOSE_OP_PREVIOUS are equivalent.
+    if (frame.dispose_op == DISPOSE_OP_NONE) {
+      has_nontrivial_background = true;
+    }
+    bool should_blend = frame.blend_op == BLEND_OP_OVER;
+    bool use_for_next_frame =
+        has_nontrivial_background && frame.dispose_op != DISPOSE_OP_PREVIOUS;
+    size_t x0 = frame.x0;
+    size_t y0 = frame.y0;
+    size_t xsize = frame.data.xsize;
+    size_t ysize = frame.data.ysize;
+    if (previous_frame_should_be_cleared) {
+      size_t px0 = frames[i - 1].x0;
+      size_t py0 = frames[i - 1].y0;
+      size_t pxs = frames[i - 1].xsize;
+      size_t pys = frames[i - 1].ysize;
+      if (px0 >= x0 && py0 >= y0 && px0 + pxs <= x0 + xsize &&
+          py0 + pys <= y0 + ysize && frame.blend_op == BLEND_OP_SOURCE &&
+          use_for_next_frame) {
+        // If the previous frame is entirely contained in the current frame and
+        // we are using BLEND_OP_SOURCE, nothing special needs to be done.
+        ppf->frames.emplace_back(std::move(frame.data));
+      } else if (px0 == x0 && py0 == y0 && px0 + pxs == x0 + xsize &&
+                 py0 + pys == y0 + ysize && use_for_next_frame) {
+        // If the new frame has the same size as the old one, but we are
+        // blending, we can instead just not blend.
+        should_blend = false;
+        ppf->frames.emplace_back(std::move(frame.data));
+      } else if (px0 <= x0 && py0 <= y0 && px0 + pxs >= x0 + xsize &&
+                 py0 + pys >= y0 + ysize && use_for_next_frame) {
+        // If the new frame is contained within the old frame, we can pad the
+        // new frame with zeros and not blend.
+        PackedImage new_data(pxs, pys, frame.data.format);
+        memset(new_data.pixels(), 0, new_data.pixels_size);
+        for (size_t y = 0; y < ysize; y++) {
+          size_t bytes_per_pixel =
+              PackedImage::BitsPerChannel(new_data.format.data_type) *
+              new_data.format.num_channels / 8;
+          memcpy(static_cast<uint8_t*>(new_data.pixels()) +
+                     new_data.stride * (y + y0 - py0) +
+                     bytes_per_pixel * (x0 - px0),
+                 static_cast<const uint8_t*>(frame.data.pixels()) +
+                     frame.data.stride * y,
+                 xsize * bytes_per_pixel);
+        }
+
+        x0 = px0;
+        y0 = py0;
+        xsize = pxs;
+        ysize = pys;
+        should_blend = false;
+        ppf->frames.emplace_back(std::move(new_data));
+      } else {
+        // If all else fails, insert a dummy blank frame with kReplace.
+        PackedImage blank(pxs, pys, frame.data.format);
+        memset(blank.pixels(), 0, blank.pixels_size);
+        ppf->frames.emplace_back(std::move(blank));
+        auto& pframe = ppf->frames.back();
+        pframe.frame_info.layer_info.crop_x0 = px0;
+        pframe.frame_info.layer_info.crop_y0 = py0;
+        pframe.frame_info.layer_info.xsize = pxs;
+        pframe.frame_info.layer_info.ysize = pys;
+        pframe.frame_info.duration = 0;
+        bool is_full_size = px0 == 0 && py0 == 0 && pxs == ppf->info.xsize &&
+                            pys == ppf->info.ysize;
+        pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1;
+        pframe.frame_info.layer_info.blend_info.blendmode = JXL_BLEND_REPLACE;
+        pframe.frame_info.layer_info.blend_info.source = 1;
+        pframe.frame_info.layer_info.save_as_reference = 1;
+        ppf->frames.emplace_back(std::move(frame.data));
+      }
+    } else {
+      ppf->frames.emplace_back(std::move(frame.data));
+    }
+
+    auto& pframe = ppf->frames.back();
+    pframe.frame_info.layer_info.crop_x0 = x0;
+    pframe.frame_info.layer_info.crop_y0 = y0;
+    pframe.frame_info.layer_info.xsize = xsize;
+    pframe.frame_info.layer_info.ysize = ysize;
+    pframe.frame_info.duration = frame.duration;
+    pframe.frame_info.layer_info.blend_info.blendmode =
+        should_blend ? JXL_BLEND_BLEND : JXL_BLEND_REPLACE;
+    bool is_full_size = x0 == 0 && y0 == 0 && xsize == ppf->info.xsize &&
+                        ysize == ppf->info.ysize;
+    pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1;
+    pframe.frame_info.layer_info.blend_info.source = 1;
+    pframe.frame_info.layer_info.blend_info.alpha = 0;
+    pframe.frame_info.layer_info.save_as_reference = use_for_next_frame ? 1 : 0;
+
+    previous_frame_should_be_cleared =
+        has_nontrivial_background && frame.dispose_op == DISPOSE_OP_BACKGROUND;
+  }
+  if (ppf->frames.empty()) return JXL_FAILURE("No frames decoded");
+  ppf->frames.back().frame_info.is_last = true;
+
+  return true;
+#else
+  return false;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/apng.h b/third-party/libjxl/libjxl/lib/extras/dec/apng.h
new file mode 100644
index 0000000000..a292758b8f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/apng.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_APNG_H_
+#define LIB_EXTRAS_DEC_APNG_H_
+
+// Decodes APNG images in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+bool CanDecodeAPNG();
+
+// Decodes `bytes` into `ppf`.
+Status DecodeImageAPNG(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                       PackedPixelFile* ppf,
+                       const SizeConstraints* constraints = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_APNG_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc
new file mode 100644
index 0000000000..54f6aa4206
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_description.h"
+
+#include <errno.h>
+
+#include <cmath>
+
+namespace jxl {
+
+namespace {
+
+template <typename T>
+struct EnumName {
+  const char* name;
+  T value;
+};
+
+const EnumName<JxlColorSpace> kJxlColorSpaceNames[] = {
+    {"RGB", JXL_COLOR_SPACE_RGB},
+    {"Gra", JXL_COLOR_SPACE_GRAY},
+    {"XYB", JXL_COLOR_SPACE_XYB},
+    {"CS?", JXL_COLOR_SPACE_UNKNOWN},
+};
+
+const EnumName<JxlWhitePoint> kJxlWhitePointNames[] = {
+    {"D65", JXL_WHITE_POINT_D65},
+    {"Cst", JXL_WHITE_POINT_CUSTOM},
+    {"EER", JXL_WHITE_POINT_E},
+    {"DCI", JXL_WHITE_POINT_DCI},
+};
+
+const EnumName<JxlPrimaries> kJxlPrimariesNames[] = {
+    {"SRG", JXL_PRIMARIES_SRGB},
+    {"Cst", JXL_PRIMARIES_CUSTOM},
+    {"202", JXL_PRIMARIES_2100},
+    {"DCI", JXL_PRIMARIES_P3},
+};
+
+const EnumName<JxlTransferFunction> kJxlTransferFunctionNames[] = {
+    {"709", JXL_TRANSFER_FUNCTION_709},
+    {"TF?", JXL_TRANSFER_FUNCTION_UNKNOWN},
+    {"Lin", JXL_TRANSFER_FUNCTION_LINEAR},
+    {"SRG", JXL_TRANSFER_FUNCTION_SRGB},
+    {"PeQ", JXL_TRANSFER_FUNCTION_PQ},
+    {"DCI", JXL_TRANSFER_FUNCTION_DCI},
+    {"HLG", JXL_TRANSFER_FUNCTION_HLG},
+    {"", JXL_TRANSFER_FUNCTION_GAMMA},
+};
+
+const EnumName<JxlRenderingIntent> kJxlRenderingIntentNames[] = {
+    {"Per", JXL_RENDERING_INTENT_PERCEPTUAL},
+    {"Rel", JXL_RENDERING_INTENT_RELATIVE},
+    {"Sat", JXL_RENDERING_INTENT_SATURATION},
+    {"Abs", JXL_RENDERING_INTENT_ABSOLUTE},
+};
+
+template <typename T>
+Status ParseEnum(const std::string& token, const EnumName<T>* enum_values,
+                 size_t enum_len, T* value) {
+  for (size_t i = 0; i < enum_len; i++) {
+    if (enum_values[i].name == token) {
+      *value = enum_values[i].value;
+      return true;
+    }
+  }
+  return false;
+}
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+#define PARSE_ENUM(type, token, value) \
+  ParseEnum<type>(token, k##type##Names, ARRAY_SIZE(k##type##Names), value)
+
+class Tokenizer {
+ public:
+  Tokenizer(const std::string* input, char separator)
+      : input_(input), separator_(separator) {}
+
+  Status Next(std::string* next) {
+    const size_t end = input_->find(separator_, start_);
+    if (end == std::string::npos) {
+      *next = input_->substr(start_);  // rest of string
+    } else {
+      *next = input_->substr(start_, end - start_);
+    }
+    if (next->empty()) return JXL_FAILURE("Missing token");
+    start_ = end + 1;
+    return true;
+  }
+
+ private:
+  const std::string* const input_;  // not owned
+  const char separator_;
+  size_t start_ = 0;  // of next token
+};
+
+Status ParseDouble(const std::string& num, double* d) {
+  char* end;
+  errno = 0;
+  *d = strtod(num.c_str(), &end);
+  if (*d == 0.0 && end == num.c_str()) {
+    return JXL_FAILURE("Invalid double: %s", num.c_str());
+  }
+  if (std::isnan(*d)) {
+    return JXL_FAILURE("Invalid double: %s", num.c_str());
+  }
+  if (errno == ERANGE) {
+    return JXL_FAILURE("Double out of range: %s", num.c_str());
+  }
+  return true;
+}
+
+Status ParseDouble(Tokenizer* tokenizer, double* d) {
+  std::string num;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&num));
+  return ParseDouble(num, d);
+}
+
+Status ParseColorSpace(Tokenizer* tokenizer, JxlColorEncoding* c) {
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  JxlColorSpace cs;
+  if (PARSE_ENUM(JxlColorSpace, str, &cs)) {
+    c->color_space = cs;
+    return true;
+  }
+
+  return JXL_FAILURE("Unknown ColorSpace %s", str.c_str());
+}
+
+Status ParseWhitePoint(Tokenizer* tokenizer, JxlColorEncoding* c) {
+  if (c->color_space == JXL_COLOR_SPACE_XYB) {
+    // Implicit white point.
+    c->white_point = JXL_WHITE_POINT_D65;
+    return true;
+  }
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (PARSE_ENUM(JxlWhitePoint, str, &c->white_point)) return true;
+
+  Tokenizer xy_tokenizer(&str, ';');
+  c->white_point = JXL_WHITE_POINT_CUSTOM;
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 0));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 1));
+  return true;
+}
+
+Status ParsePrimaries(Tokenizer* tokenizer, JxlColorEncoding* c) {
+  if (c->color_space == JXL_COLOR_SPACE_GRAY ||
+      c->color_space == JXL_COLOR_SPACE_XYB) {
+    // No primaries case.
+    return true;
+  }
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (PARSE_ENUM(JxlPrimaries, str, &c->primaries)) return true;
+
+  Tokenizer xy_tokenizer(&str, ';');
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 0));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 1));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 0));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 1));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 0));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 1));
+  c->primaries = JXL_PRIMARIES_CUSTOM;
+
+  return JXL_FAILURE("Invalid primaries %s", str.c_str());
+}
+
+Status ParseRenderingIntent(Tokenizer* tokenizer, JxlColorEncoding* c) {
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (PARSE_ENUM(JxlRenderingIntent, str, &c->rendering_intent)) return true;
+
+  return JXL_FAILURE("Invalid RenderingIntent %s\n", str.c_str());
+}
+
+Status ParseTransferFunction(Tokenizer* tokenizer, JxlColorEncoding* c) {
+  if (c->color_space == JXL_COLOR_SPACE_XYB) {
+    // Implicit TF.
+    c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    c->gamma = 1 / 3.;
+    return true;
+  }
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (PARSE_ENUM(JxlTransferFunction, str, &c->transfer_function)) {
+    return true;
+  }
+
+  if (str[0] == 'g') {
+    JXL_RETURN_IF_ERROR(ParseDouble(str.substr(1), &c->gamma));
+    c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    return true;
+  }
+
+  return JXL_FAILURE("Invalid gamma %s", str.c_str());
+}
+
+}  // namespace
+
+Status ParseDescription(const std::string& description, JxlColorEncoding* c) {
+  *c = {};
+  Tokenizer tokenizer(&description, '_');
+  JXL_RETURN_IF_ERROR(ParseColorSpace(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseWhitePoint(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParsePrimaries(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseRenderingIntent(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseTransferFunction(&tokenizer, c));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description.h b/third-party/libjxl/libjxl/lib/extras/dec/color_description.h
new file mode 100644
index 0000000000..23680ff7c6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_COLOR_DESCRIPTION_H_
+#define LIB_EXTRAS_COLOR_DESCRIPTION_H_
+
+#include <jxl/color_encoding.h>
+
+#include <string>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Parse the color description into a JxlColorEncoding "RGB_D65_SRG_Rel_Lin".
+Status ParseDescription(const std::string& description,
+                        JxlColorEncoding* JXL_RESTRICT c);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_COLOR_DESCRIPTION_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc
new file mode 100644
index 0000000000..a1c04a94e4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_description.h"
+
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+// Verify ParseDescription(Description) yields the same ColorEncoding
+TEST(ColorDescriptionTest, RoundTripAll) {
+  for (const auto& cdesc : test::AllEncodings()) {
+    const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc);
+    const std::string description = Description(c_original);
+    printf("%s\n", description.c_str());
+
+    JxlColorEncoding c_external = {};
+    EXPECT_TRUE(ParseDescription(description, &c_external));
+    ColorEncoding c_internal;
+    EXPECT_TRUE(
+        ConvertExternalToInternalColorEncoding(c_external, &c_internal));
+    EXPECT_TRUE(c_original.SameColorEncoding(c_internal))
+        << "Where c_original=" << c_original
+        << " and c_internal=" << c_internal;
+  }
+}
+
+TEST(ColorDescriptionTest, NanGamma) {
+  const std::string description = "Gra_2_Per_gnan";
+  JxlColorEncoding c;
+  EXPECT_FALSE(ParseDescription(description, &c));
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc
new file mode 100644
index 0000000000..5c6d7b84a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc
@@ -0,0 +1,78 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_hints.h"
+
+#include <jxl/encode.h>
+
+#include <vector>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+Status ApplyColorHints(const ColorHints& color_hints,
+                       const bool color_already_set, const bool is_gray,
+                       PackedPixelFile* ppf) {
+  bool got_color_space = color_already_set;
+
+  JXL_RETURN_IF_ERROR(color_hints.Foreach(
+      [color_already_set, is_gray, ppf, &got_color_space](
+          const std::string& key, const std::string& value) -> Status {
+        if (color_already_set && (key == "color_space" || key == "icc")) {
+          JXL_WARNING("Decoder ignoring %s hint", key.c_str());
+          return true;
+        }
+        if (key == "color_space") {
+          JxlColorEncoding c_original_external;
+          if (!ParseDescription(value, &c_original_external)) {
+            return JXL_FAILURE("Failed to apply color_space");
+          }
+          ppf->color_encoding = c_original_external;
+
+          if (is_gray !=
+              (ppf->color_encoding.color_space == JXL_COLOR_SPACE_GRAY)) {
+            return JXL_FAILURE("mismatch between file and color_space hint");
+          }
+
+          got_color_space = true;
+        } else if (key == "icc") {
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(value.data());
+          std::vector<uint8_t> icc(data, data + value.size());
+          ppf->icc.swap(icc);
+          got_color_space = true;
+        } else if (key == "exif") {
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(value.data());
+          std::vector<uint8_t> blob(data, data + value.size());
+          ppf->metadata.exif.swap(blob);
+        } else if (key == "xmp") {
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(value.data());
+          std::vector<uint8_t> blob(data, data + value.size());
+          ppf->metadata.xmp.swap(blob);
+        } else if (key == "jumbf") {
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(value.data());
+          std::vector<uint8_t> blob(data, data + value.size());
+          ppf->metadata.jumbf.swap(blob);
+        } else {
+          JXL_WARNING("Ignoring %s hint", key.c_str());
+        }
+        return true;
+      }));
+
+  if (!got_color_space) {
+    ppf->color_encoding.color_space =
+        is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+    ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+    ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+    ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+  }
+
+  return true;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h
new file mode 100644
index 0000000000..036f203e26
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_COLOR_HINTS_H_
+#define LIB_EXTRAS_COLOR_HINTS_H_
+
+// Not all the formats implemented in the extras lib support bundling color
+// information into the file, and those that support it may not have it.
+// To allow attaching color information to those file formats the caller can
+// define these color hints.
+// Besides color space information, 'ColorHints' may also include other
+// additional information such as Exif, XMP and JUMBF metadata.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+class ColorHints {
+ public:
+  // key=color_space, value=Description(c/pp): specify the ColorEncoding of
+  //   the pixels for decoding. Otherwise, if the codec did not obtain an ICC
+  //   profile from the image, assume sRGB.
+  //
+  // Strings are taken from the command line, so avoid spaces for convenience.
+  void Add(const std::string& key, const std::string& value) {
+    kv_.emplace_back(key, value);
+  }
+
+  // Calls `func(key, value)` for each key/value in the order they were added,
+  // returning false immediately if `func` returns false.
+  template <class Func>
+  Status Foreach(const Func& func) const {
+    for (const KeyValue& kv : kv_) {
+      Status ok = func(kv.key, kv.value);
+      if (!ok) {
+        return JXL_FAILURE("ColorHints::Foreach returned false");
+      }
+    }
+    return true;
+  }
+
+ private:
+  // Splitting into key/value avoids parsing in each codec.
+  struct KeyValue {
+    KeyValue(std::string key, std::string value)
+        : key(std::move(key)), value(std::move(value)) {}
+
+    std::string key;
+    std::string value;
+  };
+
+  std::vector<KeyValue> kv_;
+};
+
+// Apply the color hints to the decoded image in PackedPixelFile if any.
+// color_already_set tells whether the color encoding was already set, in which
+// case the hints are ignored if any hint is passed.
+Status ApplyColorHints(const ColorHints& color_hints, bool color_already_set,
+                       bool is_gray, PackedPixelFile* ppf);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_COLOR_HINTS_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/decode.cc b/third-party/libjxl/libjxl/lib/extras/dec/decode.cc
new file mode 100644
index 0000000000..2e052db152
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/decode.cc
@@ -0,0 +1,156 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/decode.h"
+
+#include <locale>
+
+#include "lib/extras/dec/apng.h"
+#include "lib/extras/dec/exr.h"
+#include "lib/extras/dec/gif.h"
+#include "lib/extras/dec/jpg.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/dec/pgx.h"
+#include "lib/extras/dec/pnm.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+void BasenameAndExtension(std::string path, std::string* basename,
+                          std::string* extension) {
+  // Pattern: file.jxl
+  size_t pos = path.find_last_of('.');
+  if (pos < path.size()) {
+    *basename = path.substr(0, pos);
+    *extension = path.substr(pos);
+    return;
+  }
+  // Pattern: jxl:-
+  pos = path.find_first_of(':');
+  if (pos < path.size()) {
+    *basename = path.substr(pos + 1);
+    *extension = "." + path.substr(0, pos);
+    return;
+  }
+  // Extension not found
+  *basename = path;
+  *extension = "";
+}
+
+}  // namespace
+
+Codec CodecFromPath(std::string path, size_t* JXL_RESTRICT bits_per_sample,
+                    std::string* basename, std::string* extension) {
+  std::string base;
+  std::string ext;
+  BasenameAndExtension(path, &base, &ext);
+  if (basename) *basename = base;
+  if (extension) *extension = ext;
+
+  std::transform(ext.begin(), ext.end(), ext.begin(), [](char c) {
+    return std::tolower(c, std::locale::classic());
+  });
+  if (ext == ".png") return Codec::kPNG;
+
+  if (ext == ".jpg") return Codec::kJPG;
+  if (ext == ".jpeg") return Codec::kJPG;
+
+  if (ext == ".pgx") return Codec::kPGX;
+
+  if (ext == ".pam") return Codec::kPNM;
+  if (ext == ".pnm") return Codec::kPNM;
+  if (ext == ".pgm") return Codec::kPNM;
+  if (ext == ".ppm") return Codec::kPNM;
+  if (ext == ".pfm") {
+    if (bits_per_sample != nullptr) *bits_per_sample = 32;
+    return Codec::kPNM;
+  }
+
+  if (ext == ".gif") return Codec::kGIF;
+
+  if (ext == ".exr") return Codec::kEXR;
+
+  return Codec::kUnknown;
+}
+
+bool CanDecode(Codec codec) {
+  switch (codec) {
+    case Codec::kEXR:
+      return CanDecodeEXR();
+    case Codec::kGIF:
+      return CanDecodeGIF();
+    case Codec::kJPG:
+      return CanDecodeJPG();
+    case Codec::kPNG:
+      return CanDecodeAPNG();
+    case Codec::kPNM:
+    case Codec::kPGX:
+    case Codec::kJXL:
+      return true;
+    default:
+      return false;
+  }
+}
+
+Status DecodeBytes(const Span<const uint8_t> bytes,
+                   const ColorHints& color_hints, extras::PackedPixelFile* ppf,
+                   const SizeConstraints* constraints, Codec* orig_codec) {
+  if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes");
+
+  *ppf = extras::PackedPixelFile();
+
+  // Default values when not set by decoders.
+  ppf->info.uses_original_profile = true;
+  ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+  const auto choose_codec = [&]() -> Codec {
+    if (DecodeImageAPNG(bytes, color_hints, ppf, constraints)) {
+      return Codec::kPNG;
+    }
+    if (DecodeImagePGX(bytes, color_hints, ppf, constraints)) {
+      return Codec::kPGX;
+    }
+    if (DecodeImagePNM(bytes, color_hints, ppf, constraints)) {
+      return Codec::kPNM;
+    }
+    JXLDecompressParams dparams = {};
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      dparams.accepted_formats.push_back(
+          {num_channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, /*align=*/0});
+    }
+    size_t decoded_bytes;
+    if (DecodeImageJXL(bytes.data(), bytes.size(), dparams, &decoded_bytes,
+                       ppf) &&
+        ApplyColorHints(color_hints, true, ppf->info.num_color_channels == 1,
+                        ppf)) {
+      return Codec::kJXL;
+    }
+    if (DecodeImageGIF(bytes, color_hints, ppf, constraints)) {
+      return Codec::kGIF;
+    }
+    if (DecodeImageJPG(bytes, color_hints, ppf, constraints)) {
+      return Codec::kJPG;
+    }
+    if (DecodeImageEXR(bytes, color_hints, ppf, constraints)) {
+      return Codec::kEXR;
+    }
+    return Codec::kUnknown;
+  };
+
+  Codec codec = choose_codec();
+  if (codec == Codec::kUnknown) {
+    return JXL_FAILURE("Codecs failed to decode");
+  }
+  if (orig_codec) *orig_codec = codec;
+
+  return true;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/decode.h b/third-party/libjxl/libjxl/lib/extras/dec/decode.h
new file mode 100644
index 0000000000..954e54551e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/decode.h
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_DECODE_H_
+#define LIB_EXTRAS_DEC_DECODE_H_
+
+// Facade for image decoders (PNG, PNM, ...).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Codecs supported by DecodeBytes.
+enum class Codec : uint32_t {
+  kUnknown,  // for CodecFromPath
+  kPNG,
+  kPNM,
+  kPGX,
+  kJPG,
+  kGIF,
+  kEXR,
+  kJXL
+};
+
+bool CanDecode(Codec codec);
+
+// If and only if extension is ".pfm", *bits_per_sample is updated to 32 so
+// that Encode() would encode to PFM instead of PPM.
+Codec CodecFromPath(std::string path,
+                    size_t* JXL_RESTRICT bits_per_sample = nullptr,
+                    std::string* basename = nullptr,
+                    std::string* extension = nullptr);
+
+// Decodes "bytes" info *ppf.
+// color_space_hint may specify the color space, otherwise, defaults to sRGB.
+Status DecodeBytes(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                   extras::PackedPixelFile* ppf,
+                   const SizeConstraints* constraints = nullptr,
+                   Codec* orig_codec = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_DECODE_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/exr.cc b/third-party/libjxl/libjxl/lib/extras/dec/exr.cc
new file mode 100644
index 0000000000..821e0f4b21
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/exr.cc
@@ -0,0 +1,201 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/exr.h"
+
+#if JPEGXL_ENABLE_EXR
+#include <ImfChromaticitiesAttribute.h>
+#include <ImfIO.h>
+#include <ImfRgbaFile.h>
+#include <ImfStandardAttributes.h>
+#endif
+
+#include <vector>
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_EXR
+namespace {
+
+namespace OpenEXR = OPENEXR_IMF_NAMESPACE;
+
+// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using
+// uint64_t as recommended causes build failures with previous OpenEXR versions
+// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent
+// to uint64_t. This alternative should work in all cases.
+using ExrInt64 = decltype(std::declval<OpenEXR::IStream>().tellg());
+
+constexpr int kExrBitsPerSample = 16;
+constexpr int kExrAlphaBits = 16;
+
+class InMemoryIStream : public OpenEXR::IStream {
+ public:
+  // The data pointed to by `bytes` must outlive the InMemoryIStream.
+  explicit InMemoryIStream(const Span<const uint8_t> bytes)
+      : IStream(/*fileName=*/""), bytes_(bytes) {}
+
+  bool isMemoryMapped() const override { return true; }
+  char* readMemoryMapped(const int n) override {
+    JXL_ASSERT(pos_ + n <= bytes_.size());
+    char* const result =
+        const_cast<char*>(reinterpret_cast<const char*>(bytes_.data() + pos_));
+    pos_ += n;
+    return result;
+  }
+  bool read(char c[], const int n) override {
+    std::copy_n(readMemoryMapped(n), n, c);
+    return pos_ < bytes_.size();
+  }
+
+  ExrInt64 tellg() override { return pos_; }
+  void seekg(const ExrInt64 pos) override {
+    JXL_ASSERT(pos + 1 <= bytes_.size());
+    pos_ = pos;
+  }
+
+ private:
+  const Span<const uint8_t> bytes_;
+  size_t pos_ = 0;
+};
+
+}  // namespace
+#endif
+
+bool CanDecodeEXR() {
+#if JPEGXL_ENABLE_EXR
+  return true;
+#else
+  return false;
+#endif
+}
+
+Status DecodeImageEXR(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints) {
+#if JPEGXL_ENABLE_EXR
+  InMemoryIStream is(bytes);
+
+#ifdef __EXCEPTIONS
+  std::unique_ptr<OpenEXR::RgbaInputFile> input_ptr;
+  try {
+    input_ptr.reset(new OpenEXR::RgbaInputFile(is));
+  } catch (...) {
+    // silently return false if it is not an EXR file
+    return false;
+  }
+  OpenEXR::RgbaInputFile& input = *input_ptr;
+#else
+  OpenEXR::RgbaInputFile input(is);
+#endif
+
+  if ((input.channels() & OpenEXR::RgbaChannels::WRITE_RGB) !=
+      OpenEXR::RgbaChannels::WRITE_RGB) {
+    return JXL_FAILURE("only RGB OpenEXR files are supported");
+  }
+  const bool has_alpha = (input.channels() & OpenEXR::RgbaChannels::WRITE_A) ==
+                         OpenEXR::RgbaChannels::WRITE_A;
+
+  const float intensity_target = OpenEXR::hasWhiteLuminance(input.header())
+                                     ? OpenEXR::whiteLuminance(input.header())
+                                     : 0;
+
+  auto image_size = input.displayWindow().size();
+  // Size is computed as max - min, but both bounds are inclusive.
+  ++image_size.x;
+  ++image_size.y;
+
+  ppf->info.xsize = image_size.x;
+  ppf->info.ysize = image_size.y;
+  ppf->info.num_color_channels = 3;
+
+  const JxlDataType data_type =
+      kExrBitsPerSample == 16 ? JXL_TYPE_FLOAT16 : JXL_TYPE_FLOAT;
+  const JxlPixelFormat format{
+      /*num_channels=*/3u + (has_alpha ? 1u : 0u),
+      /*data_type=*/data_type,
+      /*endianness=*/JXL_NATIVE_ENDIAN,
+      /*align=*/0,
+  };
+  ppf->frames.clear();
+  // Allocates the frame buffer.
+  ppf->frames.emplace_back(image_size.x, image_size.y, format);
+  const auto& frame = ppf->frames.back();
+
+  const int row_size = input.dataWindow().size().x + 1;
+  // Number of rows to read at a time.
+  // https://www.openexr.com/documentation/ReadingAndWritingImageFiles.pdf
+  // recommends reading the whole file at once.
+  const int y_chunk_size = input.displayWindow().size().y + 1;
+  std::vector<OpenEXR::Rgba> input_rows(row_size * y_chunk_size);
+  for (int start_y =
+           std::max(input.dataWindow().min.y, input.displayWindow().min.y);
+       start_y <=
+       std::min(input.dataWindow().max.y, input.displayWindow().max.y);
+       start_y += y_chunk_size) {
+    // Inclusive.
+    const int end_y = std::min(
+        start_y + y_chunk_size - 1,
+        std::min(input.dataWindow().max.y, input.displayWindow().max.y));
+    input.setFrameBuffer(
+        input_rows.data() - input.dataWindow().min.x - start_y * row_size,
+        /*xStride=*/1, /*yStride=*/row_size);
+    input.readPixels(start_y, end_y);
+    for (int exr_y = start_y; exr_y <= end_y; ++exr_y) {
+      const int image_y = exr_y - input.displayWindow().min.y;
+      const OpenEXR::Rgba* const JXL_RESTRICT input_row =
+          &input_rows[(exr_y - start_y) * row_size];
+      uint8_t* row = static_cast<uint8_t*>(frame.color.pixels()) +
+                     frame.color.stride * image_y;
+      const uint32_t pixel_size =
+          (3 + (has_alpha ? 1 : 0)) * kExrBitsPerSample / 8;
+      for (int exr_x =
+               std::max(input.dataWindow().min.x, input.displayWindow().min.x);
+           exr_x <=
+           std::min(input.dataWindow().max.x, input.displayWindow().max.x);
+           ++exr_x) {
+        const int image_x = exr_x - input.displayWindow().min.x;
+        // TODO(eustas): UB: OpenEXR::Rgba is not TriviallyCopyable
+        memcpy(row + image_x * pixel_size,
+               input_row + (exr_x - input.dataWindow().min.x), pixel_size);
+      }
+    }
+  }
+
+  ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR;
+  ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+  ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+  ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+  if (OpenEXR::hasChromaticities(input.header())) {
+    ppf->color_encoding.primaries = JXL_PRIMARIES_CUSTOM;
+    ppf->color_encoding.white_point = JXL_WHITE_POINT_CUSTOM;
+    const auto& chromaticities = OpenEXR::chromaticities(input.header());
+    ppf->color_encoding.primaries_red_xy[0] = chromaticities.red.x;
+    ppf->color_encoding.primaries_red_xy[1] = chromaticities.red.y;
+    ppf->color_encoding.primaries_green_xy[0] = chromaticities.green.x;
+    ppf->color_encoding.primaries_green_xy[1] = chromaticities.green.y;
+    ppf->color_encoding.primaries_blue_xy[0] = chromaticities.blue.x;
+    ppf->color_encoding.primaries_blue_xy[1] = chromaticities.blue.y;
+    ppf->color_encoding.white_point_xy[0] = chromaticities.white.x;
+    ppf->color_encoding.white_point_xy[1] = chromaticities.white.y;
+  }
+
+  // EXR uses binary16 or binary32 floating point format.
+  ppf->info.bits_per_sample = kExrBitsPerSample;
+  ppf->info.exponent_bits_per_sample = kExrBitsPerSample == 16 ? 5 : 8;
+  if (has_alpha) {
+    ppf->info.alpha_bits = kExrAlphaBits;
+    ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample;
+    ppf->info.alpha_premultiplied = true;
+  }
+  ppf->info.intensity_target = intensity_target;
+  return true;
+#else
+  return false;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/exr.h b/third-party/libjxl/libjxl/lib/extras/dec/exr.h
new file mode 100644
index 0000000000..3a15e086df
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/exr.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_EXR_H_
+#define LIB_EXTRAS_DEC_EXR_H_
+
+// Decodes OpenEXR images in memory.
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+bool CanDecodeEXR();
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+Status DecodeImageEXR(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_EXR_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/gif.cc b/third-party/libjxl/libjxl/lib/extras/dec/gif.cc
new file mode 100644
index 0000000000..3d963941c0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/gif.cc
@@ -0,0 +1,415 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/gif.h"
+
+#if JPEGXL_ENABLE_GIF
+#include <gif_lib.h>
+#endif
+#include <jxl/codestream_header.h>
+#include <string.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_GIF
+namespace {
+
+struct ReadState {
+  Span<const uint8_t> bytes;
+};
+
+struct DGifCloser {
+  void operator()(GifFileType* const ptr) const { DGifCloseFile(ptr, nullptr); }
+};
+using GifUniquePtr = std::unique_ptr<GifFileType, DGifCloser>;
+
+struct PackedRgba {
+  uint8_t r, g, b, a;
+};
+
+struct PackedRgb {
+  uint8_t r, g, b;
+};
+
+void ensure_have_alpha(PackedFrame* frame) {
+  if (!frame->extra_channels.empty()) return;
+  const JxlPixelFormat alpha_format{
+      /*num_channels=*/1u,
+      /*data_type=*/JXL_TYPE_UINT8,
+      /*endianness=*/JXL_NATIVE_ENDIAN,
+      /*align=*/0,
+  };
+  frame->extra_channels.emplace_back(frame->color.xsize, frame->color.ysize,
+                                     alpha_format);
+  // We need to set opaque-by-default.
+  std::fill_n(static_cast<uint8_t*>(frame->extra_channels[0].pixels()),
+              frame->color.xsize * frame->color.ysize, 255u);
+}
+}  // namespace
+#endif
+
+bool CanDecodeGIF() {
+#if JPEGXL_ENABLE_GIF
+  return true;
+#else
+  return false;
+#endif
+}
+
+Status DecodeImageGIF(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints) {
+#if JPEGXL_ENABLE_GIF
+  int error = GIF_OK;
+  ReadState state = {bytes};
+  const auto ReadFromSpan = [](GifFileType* const gif, GifByteType* const bytes,
+                               int n) {
+    ReadState* const state = reinterpret_cast<ReadState*>(gif->UserData);
+    // giflib API requires the input size `n` to be signed int.
+    if (static_cast<size_t>(n) > state->bytes.size()) {
+      n = state->bytes.size();
+    }
+    memcpy(bytes, state->bytes.data(), n);
+    state->bytes.remove_prefix(n);
+    return n;
+  };
+  GifUniquePtr gif(DGifOpen(&state, ReadFromSpan, &error));
+  if (gif == nullptr) {
+    if (error == D_GIF_ERR_NOT_GIF_FILE) {
+      // Not an error.
+      return false;
+    } else {
+      return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(error));
+    }
+  }
+  error = DGifSlurp(gif.get());
+  if (error != GIF_OK) {
+    return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(gif->Error));
+  }
+
+  msan::UnpoisonMemory(gif.get(), sizeof(*gif));
+  if (gif->SColorMap) {
+    msan::UnpoisonMemory(gif->SColorMap, sizeof(*gif->SColorMap));
+    msan::UnpoisonMemory(
+        gif->SColorMap->Colors,
+        sizeof(*gif->SColorMap->Colors) * gif->SColorMap->ColorCount);
+  }
+  msan::UnpoisonMemory(gif->SavedImages,
+                       sizeof(*gif->SavedImages) * gif->ImageCount);
+
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions<uint32_t>(constraints, gif->SWidth, gif->SHeight));
+  uint64_t total_pixel_count =
+      static_cast<uint64_t>(gif->SWidth) * gif->SHeight;
+  for (int i = 0; i < gif->ImageCount; ++i) {
+    const SavedImage& image = gif->SavedImages[i];
+    uint32_t w = image.ImageDesc.Width;
+    uint32_t h = image.ImageDesc.Height;
+    JXL_RETURN_IF_ERROR(VerifyDimensions<uint32_t>(constraints, w, h));
+    uint64_t pixel_count = static_cast<uint64_t>(w) * h;
+    if (total_pixel_count + pixel_count < total_pixel_count) {
+      return JXL_FAILURE("Image too big");
+    }
+    total_pixel_count += pixel_count;
+    if (constraints && (total_pixel_count > constraints->dec_max_pixels)) {
+      return JXL_FAILURE("Image too big");
+    }
+  }
+
+  if (!gif->SColorMap) {
+    for (int i = 0; i < gif->ImageCount; ++i) {
+      if (!gif->SavedImages[i].ImageDesc.ColorMap) {
+        return JXL_FAILURE("Missing GIF color map");
+      }
+    }
+  }
+
+  if (gif->ImageCount > 1) {
+    ppf->info.have_animation = true;
+    // Delays in GIF are specified in 100ths of a second.
+    ppf->info.animation.tps_numerator = 100;
+    ppf->info.animation.tps_denominator = 1;
+  }
+
+  ppf->frames.clear();
+  ppf->frames.reserve(gif->ImageCount);
+
+  ppf->info.xsize = gif->SWidth;
+  ppf->info.ysize = gif->SHeight;
+  ppf->info.bits_per_sample = 8;
+  ppf->info.exponent_bits_per_sample = 0;
+  // alpha_bits is later set to 8 if we find a frame with transparent pixels.
+  ppf->info.alpha_bits = 0;
+  ppf->info.alpha_exponent_bits = 0;
+  JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+                                      /*is_gray=*/false, ppf));
+
+  ppf->info.num_color_channels = 3;
+
+  // Pixel format for the 'canvas' onto which we paint
+  // the (potentially individually cropped) GIF frames
+  // of an animation.
+  const JxlPixelFormat canvas_format{
+      /*num_channels=*/4u,
+      /*data_type=*/JXL_TYPE_UINT8,
+      /*endianness=*/JXL_NATIVE_ENDIAN,
+      /*align=*/0,
+  };
+
+  // Pixel format for the JXL PackedFrame that goes into the
+  // PackedPixelFile. Here, we use 3 color channels, and provide
+  // the alpha channel as an extra_channel wherever it is used.
+  const JxlPixelFormat packed_frame_format{
+      /*num_channels=*/3u,
+      /*data_type=*/JXL_TYPE_UINT8,
+      /*endianness=*/JXL_NATIVE_ENDIAN,
+      /*align=*/0,
+  };
+
+  GifColorType background_color;
+  if (gif->SColorMap == nullptr ||
+      gif->SBackGroundColor >= gif->SColorMap->ColorCount) {
+    background_color = {0, 0, 0};
+  } else {
+    background_color = gif->SColorMap->Colors[gif->SBackGroundColor];
+  }
+  const PackedRgba background_rgba{background_color.Red, background_color.Green,
+                                   background_color.Blue, 0};
+  PackedFrame canvas(gif->SWidth, gif->SHeight, canvas_format);
+  std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+              canvas.color.xsize * canvas.color.ysize, background_rgba);
+  Rect canvas_rect{0, 0, canvas.color.xsize, canvas.color.ysize};
+
+  Rect previous_rect_if_restore_to_background;
+
+  bool replace = true;
+  bool last_base_was_none = true;
+  for (int i = 0; i < gif->ImageCount; ++i) {
+    const SavedImage& image = gif->SavedImages[i];
+    msan::UnpoisonMemory(image.RasterBits, sizeof(*image.RasterBits) *
+                                               image.ImageDesc.Width *
+                                               image.ImageDesc.Height);
+    const Rect image_rect(image.ImageDesc.Left, image.ImageDesc.Top,
+                          image.ImageDesc.Width, image.ImageDesc.Height);
+
+    Rect total_rect;
+    if (previous_rect_if_restore_to_background.xsize() != 0 ||
+        previous_rect_if_restore_to_background.ysize() != 0) {
+      const size_t xbegin = std::min(
+          image_rect.x0(), previous_rect_if_restore_to_background.x0());
+      const size_t ybegin = std::min(
+          image_rect.y0(), previous_rect_if_restore_to_background.y0());
+      const size_t xend =
+          std::max(image_rect.x0() + image_rect.xsize(),
+                   previous_rect_if_restore_to_background.x0() +
+                       previous_rect_if_restore_to_background.xsize());
+      const size_t yend =
+          std::max(image_rect.y0() + image_rect.ysize(),
+                   previous_rect_if_restore_to_background.y0() +
+                       previous_rect_if_restore_to_background.ysize());
+      total_rect = Rect(xbegin, ybegin, xend - xbegin, yend - ybegin);
+      previous_rect_if_restore_to_background = Rect();
+      replace = true;
+    } else {
+      total_rect = image_rect;
+      replace = false;
+    }
+    if (!image_rect.IsInside(canvas_rect)) {
+      return JXL_FAILURE("GIF frame extends outside of the canvas");
+    }
+
+    // Allocates the frame buffer.
+    ppf->frames.emplace_back(total_rect.xsize(), total_rect.ysize(),
+                             packed_frame_format);
+    PackedFrame* frame = &ppf->frames.back();
+
+    // We cannot tell right from the start whether there will be a
+    // need for an alpha channel. This is discovered only as soon as
+    // we see a transparent pixel. We hence initialize alpha lazily.
+    auto set_pixel_alpha = [&frame](size_t x, size_t y, uint8_t a) {
+      // If we do not have an alpha-channel and a==255 (fully opaque),
+      // we can skip setting this pixel-value and rely on
+      // "no alpha channel = no transparency".
+      if (a == 255 && !frame->extra_channels.empty()) return;
+      ensure_have_alpha(frame);
+      static_cast<uint8_t*>(
+          frame->extra_channels[0].pixels())[y * frame->color.xsize + x] = a;
+    };
+
+    const ColorMapObject* const color_map =
+        image.ImageDesc.ColorMap ? image.ImageDesc.ColorMap : gif->SColorMap;
+    JXL_CHECK(color_map);
+    msan::UnpoisonMemory(color_map, sizeof(*color_map));
+    msan::UnpoisonMemory(color_map->Colors,
+                         sizeof(*color_map->Colors) * color_map->ColorCount);
+    GraphicsControlBlock gcb;
+    DGifSavedExtensionToGCB(gif.get(), i, &gcb);
+    msan::UnpoisonMemory(&gcb, sizeof(gcb));
+    bool is_full_size = total_rect.x0() == 0 && total_rect.y0() == 0 &&
+                        total_rect.xsize() == canvas.color.xsize &&
+                        total_rect.ysize() == canvas.color.ysize;
+    if (ppf->info.have_animation) {
+      frame->frame_info.duration = gcb.DelayTime;
+      frame->frame_info.layer_info.have_crop = static_cast<int>(!is_full_size);
+      frame->frame_info.layer_info.crop_x0 = total_rect.x0();
+      frame->frame_info.layer_info.crop_y0 = total_rect.y0();
+      frame->frame_info.layer_info.xsize = frame->color.xsize;
+      frame->frame_info.layer_info.ysize = frame->color.ysize;
+      if (last_base_was_none) {
+        replace = true;
+      }
+      frame->frame_info.layer_info.blend_info.blendmode =
+          replace ? JXL_BLEND_REPLACE : JXL_BLEND_BLEND;
+      // We always only reference at most the last frame
+      frame->frame_info.layer_info.blend_info.source =
+          last_base_was_none ? 0u : 1u;
+      frame->frame_info.layer_info.blend_info.clamp = 1;
+      frame->frame_info.layer_info.blend_info.alpha = 0;
+      // TODO(veluca): this could in principle be implemented.
+      if (last_base_was_none &&
+          (total_rect.x0() != 0 || total_rect.y0() != 0 ||
+           total_rect.xsize() != canvas.color.xsize ||
+           total_rect.ysize() != canvas.color.ysize || !replace)) {
+        return JXL_FAILURE(
+            "GIF with dispose-to-0 is not supported for non-full or "
+            "blended frames");
+      }
+      switch (gcb.DisposalMode) {
+        case DISPOSE_DO_NOT:
+        case DISPOSE_BACKGROUND:
+          frame->frame_info.layer_info.save_as_reference = 1u;
+          last_base_was_none = false;
+          break;
+        case DISPOSE_PREVIOUS:
+          frame->frame_info.layer_info.save_as_reference = 0u;
+          break;
+        default:
+          frame->frame_info.layer_info.save_as_reference = 0u;
+          last_base_was_none = true;
+      }
+    }
+
+    // Update the canvas by creating a copy first.
+    PackedImage new_canvas_image(canvas.color.xsize, canvas.color.ysize,
+                                 canvas.color.format);
+    memcpy(new_canvas_image.pixels(), canvas.color.pixels(),
+           new_canvas_image.pixels_size);
+    for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+      // Assumes format.align == 0. row points to the beginning of the y row in
+      // the image_rect.
+      PackedRgba* row = static_cast<PackedRgba*>(new_canvas_image.pixels()) +
+                        (y + image_rect.y0()) * new_canvas_image.xsize +
+                        image_rect.x0();
+      for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+        const GifByteType byte = image.RasterBits[byte_index];
+        if (byte >= color_map->ColorCount) {
+          return JXL_FAILURE("GIF color is out of bounds");
+        }
+
+        if (byte == gcb.TransparentColor) continue;
+        GifColorType color = color_map->Colors[byte];
+        row[x].r = color.Red;
+        row[x].g = color.Green;
+        row[x].b = color.Blue;
+        row[x].a = 255;
+      }
+    }
+    const PackedImage& sub_frame_image = frame->color;
+    if (replace) {
+      // Copy from the new canvas image to the subframe
+      for (size_t y = 0; y < total_rect.ysize(); ++y) {
+        const PackedRgba* row_in =
+            static_cast<const PackedRgba*>(new_canvas_image.pixels()) +
+            (y + total_rect.y0()) * new_canvas_image.xsize + total_rect.x0();
+        PackedRgb* row_out = static_cast<PackedRgb*>(sub_frame_image.pixels()) +
+                             y * sub_frame_image.xsize;
+        for (size_t x = 0; x < sub_frame_image.xsize; ++x) {
+          row_out[x].r = row_in[x].r;
+          row_out[x].g = row_in[x].g;
+          row_out[x].b = row_in[x].b;
+          set_pixel_alpha(x, y, row_in[x].a);
+        }
+      }
+    } else {
+      for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+        // Assumes format.align == 0
+        PackedRgb* row = static_cast<PackedRgb*>(sub_frame_image.pixels()) +
+                         y * sub_frame_image.xsize;
+        for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+          const GifByteType byte = image.RasterBits[byte_index];
+          if (byte > color_map->ColorCount) {
+            return JXL_FAILURE("GIF color is out of bounds");
+          }
+          if (byte == gcb.TransparentColor) {
+            row[x].r = 0;
+            row[x].g = 0;
+            row[x].b = 0;
+            set_pixel_alpha(x, y, 0);
+            continue;
+          }
+          GifColorType color = color_map->Colors[byte];
+          row[x].r = color.Red;
+          row[x].g = color.Green;
+          row[x].b = color.Blue;
+          set_pixel_alpha(x, y, 255);
+        }
+      }
+    }
+
+    if (!frame->extra_channels.empty()) {
+      ppf->info.alpha_bits = 8;
+    }
+
+    switch (gcb.DisposalMode) {
+      case DISPOSE_DO_NOT:
+        canvas.color = std::move(new_canvas_image);
+        break;
+
+      case DISPOSE_BACKGROUND:
+        std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+                    canvas.color.xsize * canvas.color.ysize, background_rgba);
+        previous_rect_if_restore_to_background = image_rect;
+        break;
+
+      case DISPOSE_PREVIOUS:
+        break;
+
+      case DISPOSAL_UNSPECIFIED:
+      default:
+        std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+                    canvas.color.xsize * canvas.color.ysize, background_rgba);
+    }
+  }
+  // Finally, if any frame has an alpha-channel, every frame will need
+  // to have an alpha-channel.
+  bool seen_alpha = false;
+  for (const PackedFrame& frame : ppf->frames) {
+    if (!frame.extra_channels.empty()) {
+      seen_alpha = true;
+      break;
+    }
+  }
+  if (seen_alpha) {
+    for (PackedFrame& frame : ppf->frames) {
+      ensure_have_alpha(&frame);
+    }
+  }
+  return true;
+#else
+  return false;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/gif.h b/third-party/libjxl/libjxl/lib/extras/dec/gif.h
new file mode 100644
index 0000000000..4d5be8664e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/gif.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_GIF_H_
+#define LIB_EXTRAS_DEC_GIF_H_
+
+// Decodes GIF images in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+bool CanDecodeGIF();
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+Status DecodeImageGIF(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_GIF_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc
new file mode 100644
index 0000000000..ffa1b79c25
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc
@@ -0,0 +1,271 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jpegli.h"
+
+#include <setjmp.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+static inline bool IsJPG(const std::vector<uint8_t>& bytes) {
+  if (bytes.size() < 2) return false;
+  if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false;
+  return true;
+}
+
+bool MarkerIsExif(const jpeg_saved_marker_ptr marker) {
+  return marker->marker == kExifMarker &&
+         marker->data_length >= sizeof kExifSignature + 2 &&
+         std::equal(std::begin(kExifSignature), std::end(kExifSignature),
+                    marker->data);
+}
+
+Status ReadICCProfile(jpeg_decompress_struct* const cinfo,
+                      std::vector<uint8_t>* const icc) {
+  uint8_t* icc_data_ptr;
+  unsigned int icc_data_len;
+  if (jpegli_read_icc_profile(cinfo, &icc_data_ptr, &icc_data_len)) {
+    icc->assign(icc_data_ptr, icc_data_ptr + icc_data_len);
+    free(icc_data_ptr);
+    return true;
+  }
+  return false;
+}
+
+void ReadExif(jpeg_decompress_struct* const cinfo,
+              std::vector<uint8_t>* const exif) {
+  constexpr size_t kExifSignatureSize = sizeof kExifSignature;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    // marker is initialized by libjpeg, which we are not instrumenting with
+    // msan.
+    msan::UnpoisonMemory(marker, sizeof(*marker));
+    msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (!MarkerIsExif(marker)) continue;
+    size_t marker_length = marker->data_length - kExifSignatureSize;
+    exif->resize(marker_length);
+    std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data());
+    return;
+  }
+}
+
+JpegliDataType ConvertDataType(JxlDataType type) {
+  switch (type) {
+    case JXL_TYPE_UINT8:
+      return JPEGLI_TYPE_UINT8;
+    case JXL_TYPE_UINT16:
+      return JPEGLI_TYPE_UINT16;
+    case JXL_TYPE_FLOAT:
+      return JPEGLI_TYPE_FLOAT;
+    default:
+      return JPEGLI_TYPE_UINT8;
+  }
+}
+
+JpegliEndianness ConvertEndianness(JxlEndianness type) {
+  switch (type) {
+    case JXL_NATIVE_ENDIAN:
+      return JPEGLI_NATIVE_ENDIAN;
+    case JXL_BIG_ENDIAN:
+      return JPEGLI_BIG_ENDIAN;
+    case JXL_LITTLE_ENDIAN:
+      return JPEGLI_LITTLE_ENDIAN;
+    default:
+      return JPEGLI_NATIVE_ENDIAN;
+  }
+}
+
+JxlColorSpace ConvertColorSpace(J_COLOR_SPACE colorspace) {
+  switch (colorspace) {
+    case JCS_GRAYSCALE:
+      return JXL_COLOR_SPACE_GRAY;
+    case JCS_RGB:
+      return JXL_COLOR_SPACE_RGB;
+    default:
+      return JXL_COLOR_SPACE_UNKNOWN;
+  }
+}
+
+void MyErrorExit(j_common_ptr cinfo) {
+  jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+  (*cinfo->err->output_message)(cinfo);
+  jpegli_destroy_decompress(reinterpret_cast<j_decompress_ptr>(cinfo));
+  longjmp(*env, 1);
+}
+
+void MyOutputMessage(j_common_ptr cinfo) {
+#if JXL_DEBUG_WARNING == 1
+  char buf[JMSG_LENGTH_MAX + 1];
+  (*cinfo->err->format_message)(cinfo, buf);
+  buf[JMSG_LENGTH_MAX] = 0;
+  JXL_WARNING("%s", buf);
+#endif
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+                 JSAMPARRAY colormap, size_t num_colors) {
+  JXL_CHECK(colormap != nullptr);
+  std::vector<uint8_t> tmp(xsize * components);
+  for (size_t x = 0; x < xsize; ++x) {
+    JXL_CHECK(row[x] < num_colors);
+    for (int c = 0; c < components; ++c) {
+      tmp[x * components + c] = colormap[c][row[x]];
+    }
+  }
+  memcpy(row, tmp.data(), tmp.size());
+}
+
+}  // namespace
+
+Status DecodeJpeg(const std::vector<uint8_t>& compressed,
+                  const JpegDecompressParams& dparams, ThreadPool* pool,
+                  PackedPixelFile* ppf) {
+  // Don't do anything for non-JPEG files (no need to report an error)
+  if (!IsJPG(compressed)) return false;
+
+  // TODO(veluca): use JPEGData also for pixels?
+
+  // We need to declare all the non-trivial destructor local variables before
+  // the call to setjmp().
+  std::unique_ptr<JSAMPLE[]> row;
+
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    // Setup error handling in jpeg library so we can deal with broken jpegs in
+    // the fuzzer.
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpegli_std_error(&jerr);
+    jerr.error_exit = &MyErrorExit;
+    jerr.output_message = &MyOutputMessage;
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = static_cast<void*>(&env);
+
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo,
+                   reinterpret_cast<const unsigned char*>(compressed.data()),
+                   compressed.size());
+    jpegli_save_markers(&cinfo, kICCMarker, 0xFFFF);
+    jpegli_save_markers(&cinfo, kExifMarker, 0xFFFF);
+    const auto failure = [&cinfo](const char* str) -> Status {
+      jpegli_abort_decompress(&cinfo);
+      jpegli_destroy_decompress(&cinfo);
+      return JXL_FAILURE("%s", str);
+    };
+    jpegli_read_header(&cinfo, TRUE);
+    // Might cause CPU-zip bomb.
+    if (cinfo.arith_code) {
+      return failure("arithmetic code JPEGs are not supported");
+    }
+    int nbcomp = cinfo.num_components;
+    if (nbcomp != 1 && nbcomp != 3) {
+      return failure("unsupported number of components in JPEG");
+    }
+    if (dparams.force_rgb) {
+      cinfo.out_color_space = JCS_RGB;
+    } else if (dparams.force_grayscale) {
+      cinfo.out_color_space = JCS_GRAYSCALE;
+    }
+    if (!ReadICCProfile(&cinfo, &ppf->icc)) {
+      ppf->icc.clear();
+      // Default to SRGB
+      ppf->color_encoding.color_space =
+          ConvertColorSpace(cinfo.out_color_space);
+      ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+      ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+      ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+      ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL;
+    }
+    ReadExif(&cinfo, &ppf->metadata.exif);
+
+    ppf->info.xsize = cinfo.image_width;
+    ppf->info.ysize = cinfo.image_height;
+    if (dparams.output_data_type == JXL_TYPE_UINT8) {
+      ppf->info.bits_per_sample = 8;
+      ppf->info.exponent_bits_per_sample = 0;
+    } else if (dparams.output_data_type == JXL_TYPE_UINT16) {
+      ppf->info.bits_per_sample = 16;
+      ppf->info.exponent_bits_per_sample = 0;
+    } else if (dparams.output_data_type == JXL_TYPE_FLOAT) {
+      ppf->info.bits_per_sample = 32;
+      ppf->info.exponent_bits_per_sample = 8;
+    } else {
+      return failure("unsupported data type");
+    }
+    ppf->info.uses_original_profile = true;
+
+    // No alpha in JPG
+    ppf->info.alpha_bits = 0;
+    ppf->info.alpha_exponent_bits = 0;
+    ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+    jpegli_set_output_format(&cinfo, ConvertDataType(dparams.output_data_type),
+                             ConvertEndianness(dparams.output_endianness));
+
+    if (dparams.num_colors > 0) {
+      cinfo.quantize_colors = TRUE;
+      cinfo.desired_number_of_colors = dparams.num_colors;
+      cinfo.two_pass_quantize = dparams.two_pass_quant;
+      cinfo.dither_mode = (J_DITHER_MODE)dparams.dither_mode;
+    }
+
+    jpegli_start_decompress(&cinfo);
+
+    ppf->info.num_color_channels = cinfo.out_color_components;
+    const JxlPixelFormat format{
+        /*num_channels=*/static_cast<uint32_t>(cinfo.out_color_components),
+        dparams.output_data_type,
+        dparams.output_endianness,
+        /*align=*/0,
+    };
+    ppf->frames.clear();
+    // Allocates the frame buffer.
+    ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format);
+    const auto& frame = ppf->frames.back();
+    JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components *
+                   cinfo.image_width <=
+               frame.color.stride);
+
+    for (size_t y = 0; y < cinfo.image_height; ++y) {
+      JSAMPROW rows[] = {reinterpret_cast<JSAMPLE*>(
+          static_cast<uint8_t*>(frame.color.pixels()) +
+          frame.color.stride * y)};
+      jpegli_read_scanlines(&cinfo, rows, 1);
+      if (dparams.num_colors > 0) {
+        UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components,
+                    cinfo.colormap, cinfo.actual_number_of_colors);
+      }
+    }
+
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  bool success = try_catch_block();
+  jpegli_destroy_decompress(&cinfo);
+  return success;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h
new file mode 100644
index 0000000000..574df54c8e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JPEGLI_H_
+#define LIB_EXTRAS_DEC_JPEGLI_H_
+
+// Decodes JPG pixels and metadata in memory using the libjpegli library.
+
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct JpegDecompressParams {
+  JxlDataType output_data_type = JXL_TYPE_UINT8;
+  JxlEndianness output_endianness = JXL_NATIVE_ENDIAN;
+  bool force_rgb = false;
+  bool force_grayscale = false;
+  int num_colors = 0;
+  bool two_pass_quant = true;
+  // 0 = none, 1 = ordered, 2 = Floyd-Steinberg
+  int dither_mode = 2;
+};
+
+Status DecodeJpeg(const std::vector<uint8_t>& compressed,
+                  const JpegDecompressParams& dparams, ThreadPool* pool,
+                  PackedPixelFile* ppf);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_JPEGLI_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc b/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc
new file mode 100644
index 0000000000..3c8a4bccfe
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc
@@ -0,0 +1,338 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jpg.h"
+
+#if JPEGXL_ENABLE_JPEG
+#include <jpeglib.h>
+#include <setjmp.h>
+#endif
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_JPEG
+namespace {
+
+constexpr unsigned char kICCSignature[12] = {
+    0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+
+static inline bool IsJPG(const Span<const uint8_t> bytes) {
+  if (bytes.size() < 2) return false;
+  if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false;
+  return true;
+}
+
+bool MarkerIsICC(const jpeg_saved_marker_ptr marker) {
+  return marker->marker == kICCMarker &&
+         marker->data_length >= sizeof kICCSignature + 2 &&
+         std::equal(std::begin(kICCSignature), std::end(kICCSignature),
+                    marker->data);
+}
+bool MarkerIsExif(const jpeg_saved_marker_ptr marker) {
+  return marker->marker == kExifMarker &&
+         marker->data_length >= sizeof kExifSignature + 2 &&
+         std::equal(std::begin(kExifSignature), std::end(kExifSignature),
+                    marker->data);
+}
+
+Status ReadICCProfile(jpeg_decompress_struct* const cinfo,
+                      std::vector<uint8_t>* const icc) {
+  constexpr size_t kICCSignatureSize = sizeof kICCSignature;
+  // ICC signature + uint8_t index + uint8_t max_index.
+  constexpr size_t kICCHeadSize = kICCSignatureSize + 2;
+  // Markers are 1-indexed, and we keep them that way in this vector to get a
+  // convenient 0 at the front for when we compute the offsets later.
+  std::vector<size_t> marker_lengths;
+  int num_markers = 0;
+  int seen_markers_count = 0;
+  bool has_num_markers = false;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    // marker is initialized by libjpeg, which we are not instrumenting with
+    // msan.
+    msan::UnpoisonMemory(marker, sizeof(*marker));
+    msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (!MarkerIsICC(marker)) continue;
+
+    const int current_marker = marker->data[kICCSignatureSize];
+    if (current_marker == 0) {
+      return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+    }
+    const int current_num_markers = marker->data[kICCSignatureSize + 1];
+    if (current_marker > current_num_markers) {
+      return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+    }
+    if (has_num_markers) {
+      if (current_num_markers != num_markers) {
+        return JXL_FAILURE("inconsistent numbers of JPEG ICC markers");
+      }
+    } else {
+      num_markers = current_num_markers;
+      has_num_markers = true;
+      marker_lengths.resize(num_markers + 1);
+    }
+
+    size_t marker_length = marker->data_length - kICCHeadSize;
+
+    if (marker_length == 0) {
+      // NB: if we allow empty chunks, then the next check is incorrect.
+      return JXL_FAILURE("Empty ICC chunk");
+    }
+
+    if (marker_lengths[current_marker] != 0) {
+      return JXL_FAILURE("duplicate JPEG ICC marker number");
+    }
+    marker_lengths[current_marker] = marker_length;
+    seen_markers_count++;
+  }
+
+  if (marker_lengths.empty()) {
+    // Not an error.
+    return false;
+  }
+
+  if (seen_markers_count != num_markers) {
+    JXL_DASSERT(has_num_markers);
+    return JXL_FAILURE("Incomplete set of ICC chunks");
+  }
+
+  std::vector<size_t> offsets = std::move(marker_lengths);
+  std::partial_sum(offsets.begin(), offsets.end(), offsets.begin());
+  icc->resize(offsets.back());
+
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    if (!MarkerIsICC(marker)) continue;
+    const uint8_t* first = marker->data + kICCHeadSize;
+    uint8_t current_marker = marker->data[kICCSignatureSize];
+    size_t offset = offsets[current_marker - 1];
+    size_t marker_length = offsets[current_marker] - offset;
+    std::copy_n(first, marker_length, icc->data() + offset);
+  }
+
+  return true;
+}
+
+void ReadExif(jpeg_decompress_struct* const cinfo,
+              std::vector<uint8_t>* const exif) {
+  constexpr size_t kExifSignatureSize = sizeof kExifSignature;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    // marker is initialized by libjpeg, which we are not instrumenting with
+    // msan.
+    msan::UnpoisonMemory(marker, sizeof(*marker));
+    msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (!MarkerIsExif(marker)) continue;
+    size_t marker_length = marker->data_length - kExifSignatureSize;
+    exif->resize(marker_length);
+    std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data());
+    return;
+  }
+}
+
+void MyErrorExit(j_common_ptr cinfo) {
+  jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+  (*cinfo->err->output_message)(cinfo);
+  jpeg_destroy_decompress(reinterpret_cast<j_decompress_ptr>(cinfo));
+  longjmp(*env, 1);
+}
+
+void MyOutputMessage(j_common_ptr cinfo) {
+#if JXL_DEBUG_WARNING == 1
+  char buf[JMSG_LENGTH_MAX + 1];
+  (*cinfo->err->format_message)(cinfo, buf);
+  buf[JMSG_LENGTH_MAX] = 0;
+  JXL_WARNING("%s", buf);
+#endif
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+                 JSAMPARRAY colormap, size_t num_colors) {
+  JXL_CHECK(colormap != nullptr);
+  std::vector<uint8_t> tmp(xsize * components);
+  for (size_t x = 0; x < xsize; ++x) {
+    JXL_CHECK(row[x] < num_colors);
+    for (int c = 0; c < components; ++c) {
+      tmp[x * components + c] = colormap[c][row[x]];
+    }
+  }
+  memcpy(row, tmp.data(), tmp.size());
+}
+
+}  // namespace
+#endif
+
+bool CanDecodeJPG() {
+#if JPEGXL_ENABLE_JPEG
+  return true;
+#else
+  return false;
+#endif
+}
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes,
+                      const ColorHints& color_hints, PackedPixelFile* ppf,
+                      const SizeConstraints* constraints,
+                      const JPGDecompressParams* dparams) {
+#if JPEGXL_ENABLE_JPEG
+  // Don't do anything for non-JPEG files (no need to report an error)
+  if (!IsJPG(bytes)) return false;
+
+  // TODO(veluca): use JPEGData also for pixels?
+
+  // We need to declare all the non-trivial destructor local variables before
+  // the call to setjmp().
+  std::unique_ptr<JSAMPLE[]> row;
+
+  const auto try_catch_block = [&]() -> bool {
+    jpeg_decompress_struct cinfo = {};
+    // Setup error handling in jpeg library so we can deal with broken jpegs in
+    // the fuzzer.
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpeg_std_error(&jerr);
+    jerr.error_exit = &MyErrorExit;
+    jerr.output_message = &MyOutputMessage;
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = static_cast<void*>(&env);
+
+    jpeg_create_decompress(&cinfo);
+    jpeg_mem_src(&cinfo, reinterpret_cast<const unsigned char*>(bytes.data()),
+                 bytes.size());
+    jpeg_save_markers(&cinfo, kICCMarker, 0xFFFF);
+    jpeg_save_markers(&cinfo, kExifMarker, 0xFFFF);
+    const auto failure = [&cinfo](const char* str) -> Status {
+      jpeg_abort_decompress(&cinfo);
+      jpeg_destroy_decompress(&cinfo);
+      return JXL_FAILURE("%s", str);
+    };
+    int read_header_result = jpeg_read_header(&cinfo, TRUE);
+    // TODO(eustas): what about JPEG_HEADER_TABLES_ONLY?
+    if (read_header_result == JPEG_SUSPENDED) {
+      return failure("truncated JPEG input");
+    }
+    if (!VerifyDimensions(constraints, cinfo.image_width, cinfo.image_height)) {
+      return failure("image too big");
+    }
+    // Might cause CPU-zip bomb.
+    if (cinfo.arith_code) {
+      return failure("arithmetic code JPEGs are not supported");
+    }
+    int nbcomp = cinfo.num_components;
+    if (nbcomp != 1 && nbcomp != 3) {
+      return failure("unsupported number of components in JPEG");
+    }
+    if (!ReadICCProfile(&cinfo, &ppf->icc)) {
+      ppf->icc.clear();
+      // Default to SRGB
+      // Actually, (cinfo.output_components == nbcomp) will be checked after
+      // `jpeg_start_decompress`.
+      ppf->color_encoding.color_space =
+          (nbcomp == 1) ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+      ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+      ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+      ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+      ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL;
+    }
+    ReadExif(&cinfo, &ppf->metadata.exif);
+    if (!ApplyColorHints(color_hints, /*color_already_set=*/true,
+                         /*is_gray=*/false, ppf)) {
+      return failure("ApplyColorHints failed");
+    }
+
+    ppf->info.xsize = cinfo.image_width;
+    ppf->info.ysize = cinfo.image_height;
+    // Original data is uint, so exponent_bits_per_sample = 0.
+    ppf->info.bits_per_sample = BITS_IN_JSAMPLE;
+    JXL_ASSERT(BITS_IN_JSAMPLE == 8 || BITS_IN_JSAMPLE == 16);
+    ppf->info.exponent_bits_per_sample = 0;
+    ppf->info.uses_original_profile = true;
+
+    // No alpha in JPG
+    ppf->info.alpha_bits = 0;
+    ppf->info.alpha_exponent_bits = 0;
+
+    ppf->info.num_color_channels = nbcomp;
+    ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+    if (dparams && dparams->num_colors > 0) {
+      cinfo.quantize_colors = TRUE;
+      cinfo.desired_number_of_colors = dparams->num_colors;
+      cinfo.two_pass_quantize = dparams->two_pass_quant;
+      cinfo.dither_mode = (J_DITHER_MODE)dparams->dither_mode;
+    }
+
+    jpeg_start_decompress(&cinfo);
+    JXL_ASSERT(cinfo.out_color_components == nbcomp);
+    JxlDataType data_type =
+        ppf->info.bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
+
+    const JxlPixelFormat format{
+        /*num_channels=*/static_cast<uint32_t>(nbcomp),
+        data_type,
+        /*endianness=*/JXL_NATIVE_ENDIAN,
+        /*align=*/0,
+    };
+    ppf->frames.clear();
+    // Allocates the frame buffer.
+    ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format);
+    const auto& frame = ppf->frames.back();
+    JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components *
+                   cinfo.image_width <=
+               frame.color.stride);
+
+    if (cinfo.quantize_colors) {
+      jxl::msan::UnpoisonMemory(cinfo.colormap, cinfo.out_color_components *
+                                                    sizeof(cinfo.colormap[0]));
+      for (int c = 0; c < cinfo.out_color_components; ++c) {
+        jxl::msan::UnpoisonMemory(
+            cinfo.colormap[c],
+            cinfo.actual_number_of_colors * sizeof(cinfo.colormap[c][0]));
+      }
+    }
+    for (size_t y = 0; y < cinfo.image_height; ++y) {
+      JSAMPROW rows[] = {reinterpret_cast<JSAMPLE*>(
+          static_cast<uint8_t*>(frame.color.pixels()) +
+          frame.color.stride * y)};
+      jpeg_read_scanlines(&cinfo, rows, 1);
+      msan::UnpoisonMemory(rows[0], sizeof(JSAMPLE) * cinfo.output_components *
+                                        cinfo.image_width);
+      if (dparams && dparams->num_colors > 0) {
+        UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components,
+                    cinfo.colormap, cinfo.actual_number_of_colors);
+      }
+    }
+
+    jpeg_finish_decompress(&cinfo);
+    jpeg_destroy_decompress(&cinfo);
+    return true;
+  };
+
+  return try_catch_block();
+#else
+  return false;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpg.h b/third-party/libjxl/libjxl/lib/extras/dec/jpg.h
new file mode 100644
index 0000000000..15ed1ffd7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jpg.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JPG_H_
+#define LIB_EXTRAS_DEC_JPG_H_
+
+// Decodes JPG pixels and metadata in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+bool CanDecodeJPG();
+
+struct JPGDecompressParams {
+  int num_colors = 0;
+  bool two_pass_quant = false;
+  // 0 = none, 1 = ordered, 2 = Floyd-Steinberg
+  int dither_mode = 0;
+};
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+// `elapsed_deinterleave`, if non-null, will be set to the time (in seconds)
+// that it took to deinterleave the raw JSAMPLEs to planar floats.
+Status DecodeImageJPG(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints = nullptr,
+                      const JPGDecompressParams* dparams = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_JPG_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc b/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc
new file mode 100644
index 0000000000..5db0e31482
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc
@@ -0,0 +1,568 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jxl.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/types.h>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/exif.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct BoxProcessor {
+  BoxProcessor(JxlDecoder* dec) : dec_(dec) { Reset(); }
+
+  void InitializeOutput(std::vector<uint8_t>* out) {
+    box_data_ = out;
+    AddMoreOutput();
+  }
+
+  bool AddMoreOutput() {
+    Flush();
+    static const size_t kBoxOutputChunkSize = 1 << 16;
+    box_data_->resize(box_data_->size() + kBoxOutputChunkSize);
+    next_out_ = box_data_->data() + total_size_;
+    avail_out_ = box_data_->size() - total_size_;
+    if (JXL_DEC_SUCCESS !=
+        JxlDecoderSetBoxBuffer(dec_, next_out_, avail_out_)) {
+      fprintf(stderr, "JxlDecoderSetBoxBuffer failed\n");
+      return false;
+    }
+    return true;
+  }
+
+  void FinalizeOutput() {
+    if (box_data_ == nullptr) return;
+    Flush();
+    box_data_->resize(total_size_);
+    Reset();
+  }
+
+ private:
+  JxlDecoder* dec_;
+  std::vector<uint8_t>* box_data_;
+  uint8_t* next_out_;
+  size_t avail_out_;
+  size_t total_size_;
+
+  void Reset() {
+    box_data_ = nullptr;
+    next_out_ = nullptr;
+    avail_out_ = 0;
+    total_size_ = 0;
+  }
+  void Flush() {
+    if (box_data_ == nullptr) return;
+    size_t remaining = JxlDecoderReleaseBoxBuffer(dec_);
+    size_t bytes_written = avail_out_ - remaining;
+    next_out_ += bytes_written;
+    avail_out_ -= bytes_written;
+    total_size_ += bytes_written;
+  }
+};
+
+void SetBitDepthFromDataType(JxlDataType data_type, uint32_t* bits_per_sample,
+                             uint32_t* exponent_bits_per_sample) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      *bits_per_sample = 8;
+      *exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_UINT16:
+      *bits_per_sample = 16;
+      *exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_FLOAT16:
+      *bits_per_sample = 16;
+      *exponent_bits_per_sample = 5;
+      break;
+    case JXL_TYPE_FLOAT:
+      *bits_per_sample = 32;
+      *exponent_bits_per_sample = 8;
+      break;
+  }
+}
+
+template <typename T>
+void UpdateBitDepth(JxlBitDepth bit_depth, JxlDataType data_type, T* info) {
+  if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+    SetBitDepthFromDataType(data_type, &info->bits_per_sample,
+                            &info->exponent_bits_per_sample);
+  } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+    info->bits_per_sample = bit_depth.bits_per_sample;
+    info->exponent_bits_per_sample = bit_depth.exponent_bits_per_sample;
+  }
+}
+
+}  // namespace
+
+bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
+                    const JXLDecompressParams& dparams, size_t* decoded_bytes,
+                    PackedPixelFile* ppf, std::vector<uint8_t>* jpeg_bytes) {
+  JxlSignature sig = JxlSignatureCheck(bytes, bytes_size);
+  // silently return false if this is not a JXL file
+  if (sig == JXL_SIG_INVALID) return false;
+
+  auto decoder = JxlDecoderMake(/*memory_manager=*/nullptr);
+  JxlDecoder* dec = decoder.get();
+  ppf->frames.clear();
+
+  if (dparams.runner_opaque != nullptr &&
+      JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec, dparams.runner,
+                                                     dparams.runner_opaque)) {
+    fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  JxlPixelFormat format;
+  std::vector<JxlPixelFormat> accepted_formats = dparams.accepted_formats;
+
+  JxlColorEncoding color_encoding;
+  size_t num_color_channels = 0;
+  if (!dparams.color_space.empty()) {
+    if (!jxl::ParseDescription(dparams.color_space, &color_encoding)) {
+      fprintf(stderr, "Failed to parse color space %s.\n",
+              dparams.color_space.c_str());
+      return false;
+    }
+    num_color_channels =
+        color_encoding.color_space == JXL_COLOR_SPACE_GRAY ? 1 : 3;
+  }
+
+  bool can_reconstruct_jpeg = false;
+  std::vector<uint8_t> jpeg_data_chunk;
+  if (jpeg_bytes != nullptr) {
+    // This bound is very likely to be enough to hold the entire
+    // reconstructed JPEG, to avoid having to do expensive retries.
+    jpeg_data_chunk.resize(bytes_size * 3 / 2 + 1024);
+    jpeg_bytes->resize(0);
+  }
+
+  int events = (JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);
+
+  bool max_passes_defined =
+      (dparams.max_passes < std::numeric_limits<uint32_t>::max());
+  if (max_passes_defined || dparams.max_downsampling > 1) {
+    events |= JXL_DEC_FRAME_PROGRESSION;
+    if (max_passes_defined) {
+      JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kPasses);
+    } else {
+      JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kLastPasses);
+    }
+  }
+  if (jpeg_bytes != nullptr) {
+    events |= JXL_DEC_JPEG_RECONSTRUCTION;
+  } else {
+    events |= (JXL_DEC_COLOR_ENCODING | JXL_DEC_FRAME | JXL_DEC_PREVIEW_IMAGE |
+               JXL_DEC_BOX);
+    if (accepted_formats.empty()) {
+      // decoding just the metadata, not the pixel data
+      events ^= (JXL_DEC_FULL_IMAGE | JXL_DEC_PREVIEW_IMAGE);
+    }
+  }
+  if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec, events)) {
+    fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+    return false;
+  }
+  if (jpeg_bytes == nullptr) {
+    if (JXL_DEC_SUCCESS !=
+        JxlDecoderSetRenderSpotcolors(dec, dparams.render_spotcolors)) {
+      fprintf(stderr, "JxlDecoderSetRenderSpotColors failed\n");
+      return false;
+    }
+    if (JXL_DEC_SUCCESS !=
+        JxlDecoderSetKeepOrientation(dec, dparams.keep_orientation)) {
+      fprintf(stderr, "JxlDecoderSetKeepOrientation failed\n");
+      return false;
+    }
+    if (JXL_DEC_SUCCESS !=
+        JxlDecoderSetUnpremultiplyAlpha(dec, dparams.unpremultiply_alpha)) {
+      fprintf(stderr, "JxlDecoderSetUnpremultiplyAlpha failed\n");
+      return false;
+    }
+    if (dparams.display_nits > 0 &&
+        JXL_DEC_SUCCESS !=
+            JxlDecoderSetDesiredIntensityTarget(dec, dparams.display_nits)) {
+      fprintf(stderr, "Decoder failed to set desired intensity target\n");
+      return false;
+    }
+    if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec, JXL_TRUE)) {
+      fprintf(stderr, "JxlDecoderSetDecompressBoxes failed\n");
+      return false;
+    }
+  }
+  if (JXL_DEC_SUCCESS != JxlDecoderSetInput(dec, bytes, bytes_size)) {
+    fprintf(stderr, "Decoder failed to set input\n");
+    return false;
+  }
+  uint32_t progression_index = 0;
+  bool codestream_done = accepted_formats.empty();
+  BoxProcessor boxes(dec);
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+    if (status == JXL_DEC_ERROR) {
+      fprintf(stderr, "Failed to decode image\n");
+      return false;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (codestream_done) {
+        break;
+      }
+      if (dparams.allow_partial_input) {
+        if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) {
+          fprintf(stderr,
+                  "Input file is truncated and there is no preview "
+                  "available yet.\n");
+          return false;
+        }
+        break;
+      }
+      size_t released_size = JxlDecoderReleaseInput(dec);
+      fprintf(stderr,
+              "Input file is truncated (total bytes: %" PRIuS
+              ", processed bytes: %" PRIuS
+              ") and --allow_partial_files is not present.\n",
+              bytes_size, bytes_size - released_size);
+      return false;
+    } else if (status == JXL_DEC_BOX) {
+      boxes.FinalizeOutput();
+      JxlBoxType box_type;
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBoxType(dec, box_type, JXL_TRUE)) {
+        fprintf(stderr, "JxlDecoderGetBoxType failed\n");
+        return false;
+      }
+      std::vector<uint8_t>* box_data = nullptr;
+      if (memcmp(box_type, "Exif", 4) == 0) {
+        box_data = &ppf->metadata.exif;
+      } else if (memcmp(box_type, "iptc", 4) == 0) {
+        box_data = &ppf->metadata.iptc;
+      } else if (memcmp(box_type, "jumb", 4) == 0) {
+        box_data = &ppf->metadata.jumbf;
+      } else if (memcmp(box_type, "xml ", 4) == 0) {
+        box_data = &ppf->metadata.xmp;
+      }
+      if (box_data) {
+        boxes.InitializeOutput(box_data);
+      }
+    } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+      boxes.AddMoreOutput();
+    } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+      can_reconstruct_jpeg = true;
+      // Decoding to JPEG.
+      if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec,
+                                                     jpeg_data_chunk.data(),
+                                                     jpeg_data_chunk.size())) {
+        fprintf(stderr, "Decoder failed to set JPEG Buffer\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+      // Decoded a chunk to JPEG.
+      size_t used_jpeg_output =
+          jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec);
+      jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(),
+                         jpeg_data_chunk.data() + used_jpeg_output);
+      if (used_jpeg_output == 0) {
+        // Chunk is too small.
+        jpeg_data_chunk.resize(jpeg_data_chunk.size() * 2);
+      }
+      if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec,
+                                                     jpeg_data_chunk.data(),
+                                                     jpeg_data_chunk.size())) {
+        fprintf(stderr, "Decoder failed to set JPEG Buffer\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec, &ppf->info)) {
+        fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+        return false;
+      }
+      if (accepted_formats.empty()) continue;
+      if (num_color_channels != 0) {
+        // Mark the change in number of color channels due to the requested
+        // color space.
+        ppf->info.num_color_channels = num_color_channels;
+      }
+      if (dparams.output_bitdepth.type == JXL_BIT_DEPTH_CUSTOM) {
+        // Select format based on custom bits per sample.
+        ppf->info.bits_per_sample = dparams.output_bitdepth.bits_per_sample;
+      }
+      // Select format according to accepted formats.
+      if (!jxl::extras::SelectFormat(accepted_formats, ppf->info, &format)) {
+        fprintf(stderr, "SelectFormat failed\n");
+        return false;
+      }
+      bool have_alpha = (format.num_channels == 2 || format.num_channels == 4);
+      if (!have_alpha) {
+        // Mark in the basic info that alpha channel was dropped.
+        ppf->info.alpha_bits = 0;
+      } else {
+        if (dparams.unpremultiply_alpha) {
+          // Mark in the basic info that alpha was unpremultiplied.
+          ppf->info.alpha_premultiplied = false;
+        }
+      }
+      bool alpha_found = false;
+      for (uint32_t i = 0; i < ppf->info.num_extra_channels; ++i) {
+        JxlExtraChannelInfo eci;
+        if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelInfo(dec, i, &eci)) {
+          fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n");
+          return false;
+        }
+        if (eci.type == JXL_CHANNEL_ALPHA && have_alpha && !alpha_found) {
+          // Skip the first alpha channels because it is already present in the
+          // interleaved image.
+          alpha_found = true;
+          continue;
+        }
+        std::string name(eci.name_length + 1, 0);
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetExtraChannelName(dec, i, &name[0], name.size())) {
+          fprintf(stderr, "JxlDecoderGetExtraChannelName failed\n");
+          return false;
+        }
+        name.resize(eci.name_length);
+        ppf->extra_channels_info.push_back({eci, i, name});
+      }
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      if (!dparams.color_space.empty()) {
+        if (ppf->info.uses_original_profile) {
+          fprintf(stderr,
+                  "Warning: --color_space ignored because the image is "
+                  "not XYB encoded.\n");
+        } else {
+          if (JXL_DEC_SUCCESS !=
+              JxlDecoderSetPreferredColorProfile(dec, &color_encoding)) {
+            fprintf(stderr, "Failed to set color space.\n");
+            return false;
+          }
+        }
+      }
+      size_t icc_size = 0;
+      JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA;
+      ppf->color_encoding.color_space = JXL_COLOR_SPACE_UNKNOWN;
+      if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsEncodedProfile(
+                                 dec, target, &ppf->color_encoding) ||
+          dparams.need_icc) {
+        // only get ICC if it is not an Enum color encoding
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetICCProfileSize(dec, target, &icc_size)) {
+          fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+        }
+        if (icc_size != 0) {
+          ppf->icc.resize(icc_size);
+          if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                     dec, target, ppf->icc.data(), icc_size)) {
+            fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+            return false;
+          }
+        }
+      }
+      icc_size = 0;
+      target = JXL_COLOR_PROFILE_TARGET_ORIGINAL;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetICCProfileSize(dec, target, &icc_size)) {
+        fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+      }
+      if (icc_size != 0) {
+        ppf->orig_icc.resize(icc_size);
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetColorAsICCProfile(dec, target, ppf->orig_icc.data(),
+                                           icc_size)) {
+          fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+          return false;
+        }
+      }
+    } else if (status == JXL_DEC_FRAME) {
+      jxl::extras::PackedFrame frame(ppf->info.xsize, ppf->info.ysize, format);
+      if (JXL_DEC_SUCCESS != JxlDecoderGetFrameHeader(dec, &frame.frame_info)) {
+        fprintf(stderr, "JxlDecoderGetFrameHeader failed\n");
+        return false;
+      }
+      frame.name.resize(frame.frame_info.name_length + 1, 0);
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetFrameName(dec, &frame.name[0], frame.name.size())) {
+        fprintf(stderr, "JxlDecoderGetFrameName failed\n");
+        return false;
+      }
+      frame.name.resize(frame.frame_info.name_length);
+      ppf->frames.emplace_back(std::move(frame));
+      progression_index = 0;
+    } else if (status == JXL_DEC_FRAME_PROGRESSION) {
+      size_t downsampling = JxlDecoderGetIntendedDownsamplingRatio(dec);
+      if ((max_passes_defined && progression_index >= dparams.max_passes) ||
+          (!max_passes_defined && downsampling <= dparams.max_downsampling)) {
+        if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) {
+          fprintf(stderr, "JxlDecoderFlushImage failed\n");
+          return false;
+        }
+        if (ppf->frames.back().frame_info.is_last) {
+          break;
+        }
+        if (JXL_DEC_SUCCESS != JxlDecoderSkipCurrentFrame(dec)) {
+          fprintf(stderr, "JxlDecoderSkipCurrentFrame failed\n");
+          return false;
+        }
+      }
+      ++progression_index;
+    } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+      size_t buffer_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)) {
+        fprintf(stderr, "JxlDecoderPreviewOutBufferSize failed\n");
+        return false;
+      }
+      ppf->preview_frame = std::unique_ptr<jxl::extras::PackedFrame>(
+          new jxl::extras::PackedFrame(ppf->info.preview.xsize,
+                                       ppf->info.preview.ysize, format));
+      if (buffer_size != ppf->preview_frame->color.pixels_size) {
+        fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n",
+                buffer_size, ppf->preview_frame->color.pixels_size);
+        return false;
+      }
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderSetPreviewOutBuffer(
+              dec, &format, ppf->preview_frame->color.pixels(), buffer_size)) {
+        fprintf(stderr, "JxlDecoderSetPreviewOutBuffer failed\n");
+        return false;
+      }
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      if (jpeg_bytes != nullptr) {
+        break;
+      }
+      size_t buffer_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)) {
+        fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+        return false;
+      }
+      jxl::extras::PackedFrame& frame = ppf->frames.back();
+      if (buffer_size != frame.color.pixels_size) {
+        fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n",
+                buffer_size, frame.color.pixels_size);
+        return false;
+      }
+
+      if (dparams.use_image_callback) {
+        auto callback = [](void* opaque, size_t x, size_t y, size_t num_pixels,
+                           const void* pixels) {
+          auto* ppf = reinterpret_cast<jxl::extras::PackedPixelFile*>(opaque);
+          jxl::extras::PackedImage& color = ppf->frames.back().color;
+          uint8_t* pixels_buffer = reinterpret_cast<uint8_t*>(color.pixels());
+          size_t sample_size = color.pixel_stride();
+          memcpy(pixels_buffer + (color.stride * y + sample_size * x), pixels,
+                 num_pixels * sample_size);
+        };
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetImageOutCallback(dec, &format, callback, ppf)) {
+          fprintf(stderr, "JxlDecoderSetImageOutCallback failed\n");
+          return false;
+        }
+      } else {
+        if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec, &format,
+                                                           frame.color.pixels(),
+                                                           buffer_size)) {
+          fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+          return false;
+        }
+      }
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderSetImageOutBitDepth(dec, &dparams.output_bitdepth)) {
+        fprintf(stderr, "JxlDecoderSetImageOutBitDepth failed\n");
+        return false;
+      }
+      UpdateBitDepth(dparams.output_bitdepth, format.data_type, &ppf->info);
+      bool have_alpha = (format.num_channels == 2 || format.num_channels == 4);
+      if (have_alpha) {
+        // Interleaved alpha channels has the same bit depth as color channels.
+        ppf->info.alpha_bits = ppf->info.bits_per_sample;
+        ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample;
+      }
+      JxlPixelFormat ec_format = format;
+      ec_format.num_channels = 1;
+      for (auto& eci : ppf->extra_channels_info) {
+        frame.extra_channels.emplace_back(jxl::extras::PackedImage(
+            ppf->info.xsize, ppf->info.ysize, ec_format));
+        auto& ec = frame.extra_channels.back();
+        size_t buffer_size;
+        if (JXL_DEC_SUCCESS != JxlDecoderExtraChannelBufferSize(
+                                   dec, &ec_format, &buffer_size, eci.index)) {
+          fprintf(stderr, "JxlDecoderExtraChannelBufferSize failed\n");
+          return false;
+        }
+        if (buffer_size != ec.pixels_size) {
+          fprintf(stderr,
+                  "Invalid extra channel buffer size"
+                  " %" PRIuS " %" PRIuS "\n",
+                  buffer_size, ec.pixels_size);
+          return false;
+        }
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetExtraChannelBuffer(dec, &ec_format, ec.pixels(),
+                                            buffer_size, eci.index)) {
+          fprintf(stderr, "JxlDecoderSetExtraChannelBuffer failed\n");
+          return false;
+        }
+        UpdateBitDepth(dparams.output_bitdepth, ec_format.data_type,
+                       &eci.ec_info);
+      }
+    } else if (status == JXL_DEC_SUCCESS) {
+      // Decoding finished successfully.
+      break;
+    } else if (status == JXL_DEC_PREVIEW_IMAGE) {
+      // Nothing to do.
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      if (jpeg_bytes != nullptr || ppf->frames.back().frame_info.is_last) {
+        codestream_done = true;
+      }
+    } else {
+      fprintf(stderr, "Error: unexpected status: %d\n",
+              static_cast<int>(status));
+      return false;
+    }
+  }
+  boxes.FinalizeOutput();
+  if (!ppf->metadata.exif.empty()) {
+    // Verify that Exif box has a valid TIFF header at the specified offset.
+    // Discard bytes preceding the header.
+    if (ppf->metadata.exif.size() >= 4) {
+      uint32_t offset = LoadBE32(ppf->metadata.exif.data());
+      if (offset <= ppf->metadata.exif.size() - 8) {
+        std::vector<uint8_t> exif(ppf->metadata.exif.begin() + 4 + offset,
+                                  ppf->metadata.exif.end());
+        bool bigendian;
+        if (IsExif(exif, &bigendian)) {
+          ppf->metadata.exif = std::move(exif);
+        } else {
+          fprintf(stderr, "Warning: invalid TIFF header in Exif\n");
+        }
+      } else {
+        fprintf(stderr, "Warning: invalid Exif offset: %" PRIu32 "\n", offset);
+      }
+    } else {
+      fprintf(stderr, "Warning: invalid Exif length: %" PRIuS "\n",
+              ppf->metadata.exif.size());
+    }
+  }
+  if (jpeg_bytes != nullptr) {
+    if (!can_reconstruct_jpeg) return false;
+    size_t used_jpeg_output =
+        jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec);
+    jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(),
+                       jpeg_data_chunk.data() + used_jpeg_output);
+  }
+  if (decoded_bytes) {
+    *decoded_bytes = bytes_size - JxlDecoderReleaseInput(dec);
+  }
+  return true;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jxl.h b/third-party/libjxl/libjxl/lib/extras/dec/jxl.h
new file mode 100644
index 0000000000..d717ee7164
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/jxl.h
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JXL_H_
+#define LIB_EXTRAS_DEC_JXL_H_
+
+// Decodes JPEG XL images in memory.
+
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+
+struct JXLDecompressParams {
+  // If empty, little endian float formats will be accepted.
+  std::vector<JxlPixelFormat> accepted_formats;
+
+  // Requested output color space description.
+  std::string color_space;
+  // If set, performs tone mapping to this intensity target luminance.
+  float display_nits = 0.0;
+  // Whether spot colors are rendered on the image.
+  bool render_spotcolors = true;
+  // Whether to keep or undo the orientation given in the header.
+  bool keep_orientation = false;
+
+  // If runner_opaque is set, the decoder uses this parallel runner.
+  JxlParallelRunner runner;
+  void* runner_opaque = nullptr;
+
+  // Whether truncated input should be treated as an error.
+  bool allow_partial_input = false;
+
+  // Set to true if an ICC profile has to be synthesized for Enum color
+  // encodings
+  bool need_icc = false;
+
+  // How many passes to decode at most. By default, decode everything.
+  uint32_t max_passes = std::numeric_limits<uint32_t>::max();
+
+  // Alternatively, one can specify the maximum tolerable downscaling factor
+  // with respect to the full size of the image. By default, nothing less than
+  // the full size is requested.
+  size_t max_downsampling = 1;
+
+  // Whether to use the image callback or the image buffer to get the output.
+  bool use_image_callback = true;
+  // Whether to unpremultiply colors for associated alpha channels.
+  bool unpremultiply_alpha = false;
+
+  // Controls the effective bit depth of the output pixels.
+  JxlBitDepth output_bitdepth = {JXL_BIT_DEPTH_FROM_CODESTREAM, 0, 0};
+};
+
+bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
+                    const JXLDecompressParams& dparams, size_t* decoded_bytes,
+                    PackedPixelFile* ppf,
+                    std::vector<uint8_t>* jpeg_bytes = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_JXL_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc b/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc
new file mode 100644
index 0000000000..a99eb0f4ee
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc
@@ -0,0 +1,202 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pgx.h"
+
+#include <string.h>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct HeaderPGX {
+  // NOTE: PGX is always grayscale
+  size_t xsize;
+  size_t ysize;
+  size_t bits_per_sample;
+  bool big_endian;
+  bool is_signed;
+};
+
+class Parser {
+ public:
+  explicit Parser(const Span<const uint8_t> bytes)
+      : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  Status ParseHeader(HeaderPGX* header, const uint8_t** pos) {
+    // codec.cc ensures we have at least two bytes => no range check here.
+    if (pos_[0] != 'P' || pos_[1] != 'G') return false;
+    pos_ += 2;
+    return ParseHeaderPGX(header, pos);
+  }
+
+  // Exposed for testing
+  Status ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before number");
+    if (!IsDigit(*pos_)) return JXL_FAILURE("PGX: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  Status SkipSpace() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before space");
+    const uint8_t c = *pos_;
+    if (c != ' ') return JXL_FAILURE("PGX: expected space");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipLineBreak() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before line break");
+    // Line break can be either "\n" (0a) or "\r\n" (0d 0a).
+    if (*pos_ == '\n') {
+      pos_++;
+      return true;
+    } else if (*pos_ == '\r' && pos_ + 1 != end_ && *(pos_ + 1) == '\n') {
+      pos_ += 2;
+      return true;
+    }
+    return JXL_FAILURE("PGX: expected line break");
+  }
+
+  Status SkipSingleWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return JXL_FAILURE("PGX: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  Status ParseHeaderPGX(HeaderPGX* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipSpace());
+    if (pos_ + 2 > end_) return JXL_FAILURE("PGX: header too small");
+    if (*pos_ == 'M' && *(pos_ + 1) == 'L') {
+      header->big_endian = true;
+    } else if (*pos_ == 'L' && *(pos_ + 1) == 'M') {
+      header->big_endian = false;
+    } else {
+      return JXL_FAILURE("PGX: invalid endianness");
+    }
+    pos_ += 2;
+    JXL_RETURN_IF_ERROR(SkipSpace());
+    if (pos_ == end_) return JXL_FAILURE("PGX: header too small");
+    if (*pos_ == '+') {
+      header->is_signed = false;
+    } else if (*pos_ == '-') {
+      header->is_signed = true;
+    } else {
+      return JXL_FAILURE("PGX: invalid signedness");
+    }
+    pos_++;
+    // Skip optional space
+    if (pos_ < end_ && *pos_ == ' ') pos_++;
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->bits_per_sample));
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+    // 0xa, or 0xd 0xa.
+    JXL_RETURN_IF_ERROR(SkipLineBreak());
+
+    // TODO(jon): could do up to 24-bit by converting the values to
+    // JXL_TYPE_FLOAT.
+    if (header->bits_per_sample > 16) {
+      return JXL_FAILURE("PGX: >16 bits not yet supported");
+    }
+    // TODO(lode): support signed integers. This may require changing the way
+    // external_image works.
+    if (header->is_signed) {
+      return JXL_FAILURE("PGX: signed not yet supported");
+    }
+
+    size_t numpixels = header->xsize * header->ysize;
+    size_t bytes_per_pixel = header->bits_per_sample <= 8 ? 1 : 2;
+    if (pos_ + numpixels * bytes_per_pixel > end_) {
+      return JXL_FAILURE("PGX: data too small");
+    }
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+}  // namespace
+
+Status DecodeImagePGX(const Span<const uint8_t> bytes,
+                      const ColorHints& color_hints, PackedPixelFile* ppf,
+                      const SizeConstraints* constraints) {
+  Parser parser(bytes);
+  HeaderPGX header = {};
+  const uint8_t* pos;
+  if (!parser.ParseHeader(&header, &pos)) return false;
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions(constraints, header.xsize, header.ysize));
+  if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+    return JXL_FAILURE("PGX: bits_per_sample invalid");
+  }
+
+  JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+                                      /*is_gray=*/true, ppf));
+  ppf->info.xsize = header.xsize;
+  ppf->info.ysize = header.ysize;
+  // Original data is uint, so exponent_bits_per_sample = 0.
+  ppf->info.bits_per_sample = header.bits_per_sample;
+  ppf->info.exponent_bits_per_sample = 0;
+  ppf->info.uses_original_profile = true;
+
+  // No alpha in PGX
+  ppf->info.alpha_bits = 0;
+  ppf->info.alpha_exponent_bits = 0;
+  ppf->info.num_color_channels = 1;  // Always grayscale
+  ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+  JxlDataType data_type;
+  if (header.bits_per_sample > 8) {
+    data_type = JXL_TYPE_UINT16;
+  } else {
+    data_type = JXL_TYPE_UINT8;
+  }
+
+  const JxlPixelFormat format{
+      /*num_channels=*/1,
+      /*data_type=*/data_type,
+      /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN,
+      /*align=*/0,
+  };
+  ppf->frames.clear();
+  // Allocates the frame buffer.
+  ppf->frames.emplace_back(header.xsize, header.ysize, format);
+  const auto& frame = ppf->frames.back();
+  size_t pgx_remaining_size = bytes.data() + bytes.size() - pos;
+  if (pgx_remaining_size < frame.color.pixels_size) {
+    return JXL_FAILURE("PGX file too small");
+  }
+  memcpy(frame.color.pixels(), pos, frame.color.pixels_size);
+  return true;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx.h b/third-party/libjxl/libjxl/lib/extras/dec/pgx.h
new file mode 100644
index 0000000000..2cbd3b4dcf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_PGX_H_
+#define LIB_EXTRAS_DEC_PGX_H_
+
+// Decodes PGX pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`.
+Status DecodeImagePGX(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints = nullptr);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_PGX_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc b/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc
new file mode 100644
index 0000000000..78ed689d07
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pgx.h"
+
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+Span<const uint8_t> MakeSpan(const char* str) {
+  return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+                             strlen(str));
+}
+
+TEST(CodecPGXTest, Test8bits) {
+  std::string pgx = "PG ML + 8 2 3\npixels";
+
+  PackedPixelFile ppf;
+  ThreadPool* pool = nullptr;
+
+  EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf));
+  CodecInOut io;
+  EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io));
+
+  ScaleImage(255.f, io.Main().color());
+
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.IsGray());
+  EXPECT_EQ(2u, io.xsize());
+  EXPECT_EQ(3u, io.ysize());
+
+  float eps = 1e-5;
+  EXPECT_NEAR('p', io.Main().color()->Plane(0).Row(0)[0], eps);
+  EXPECT_NEAR('i', io.Main().color()->Plane(0).Row(0)[1], eps);
+  EXPECT_NEAR('x', io.Main().color()->Plane(0).Row(1)[0], eps);
+  EXPECT_NEAR('e', io.Main().color()->Plane(0).Row(1)[1], eps);
+  EXPECT_NEAR('l', io.Main().color()->Plane(0).Row(2)[0], eps);
+  EXPECT_NEAR('s', io.Main().color()->Plane(0).Row(2)[1], eps);
+}
+
+TEST(CodecPGXTest, Test16bits) {
+  std::string pgx = "PG ML + 16 2 3\np_i_x_e_l_s_";
+
+  PackedPixelFile ppf;
+  ThreadPool* pool = nullptr;
+
+  EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf));
+  CodecInOut io;
+  EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io));
+
+  ScaleImage(255.f, io.Main().color());
+
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(16u, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.IsGray());
+  EXPECT_EQ(2u, io.xsize());
+  EXPECT_EQ(3u, io.ysize());
+
+  // Comparing ~16-bit numbers in floats, only ~7 bits left.
+  float eps = 1e-3;
+  const auto& plane = io.Main().color()->Plane(0);
+  EXPECT_NEAR(256.0f * 'p' + '_', plane.Row(0)[0] * 257, eps);
+  EXPECT_NEAR(256.0f * 'i' + '_', plane.Row(0)[1] * 257, eps);
+  EXPECT_NEAR(256.0f * 'x' + '_', plane.Row(1)[0] * 257, eps);
+  EXPECT_NEAR(256.0f * 'e' + '_', plane.Row(1)[1] * 257, eps);
+  EXPECT_NEAR(256.0f * 'l' + '_', plane.Row(2)[0] * 257, eps);
+  EXPECT_NEAR(256.0f * 's' + '_', plane.Row(2)[1] * 257, eps);
+}
+
+}  // namespace
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc b/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc
new file mode 100644
index 0000000000..c3c2247769
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc
@@ -0,0 +1,474 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pnm.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <cmath>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct HeaderPNM {
+  size_t xsize;
+  size_t ysize;
+  bool is_gray;    // PGM
+  bool has_alpha;  // PAM
+  size_t bits_per_sample;
+  bool floating_point;
+  bool big_endian;
+  std::vector<JxlExtraChannelType> ec_types;  // PAM
+};
+
+class Parser {
+ public:
+  explicit Parser(const Span<const uint8_t> bytes)
+      : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  Status ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+    // codec.cc ensures we have at least two bytes => no range check here.
+    if (pos_[0] != 'P') return false;
+    const uint8_t type = pos_[1];
+    pos_ += 2;
+
+    switch (type) {
+      case '4':
+        return JXL_FAILURE("pbm not supported");
+
+      case '5':
+        header->is_gray = true;
+        return ParseHeaderPNM(header, pos);
+
+      case '6':
+        header->is_gray = false;
+        return ParseHeaderPNM(header, pos);
+
+      case '7':
+        return ParseHeaderPAM(header, pos);
+
+      case 'F':
+        header->is_gray = false;
+        return ParseHeaderPFM(header, pos);
+
+      case 'f':
+        header->is_gray = true;
+        return ParseHeaderPFM(header, pos);
+    }
+    return false;
+  }
+
+  // Exposed for testing
+  Status ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before number");
+    if (!IsDigit(*pos_)) return JXL_FAILURE("PNM: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+  Status ParseSigned(double* number) {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before signed");
+
+    if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+      return JXL_FAILURE("PNM: expected signed number");
+    }
+
+    // Skip sign
+    const bool is_neg = *pos_ == '-';
+    if (is_neg || *pos_ == '+') {
+      ++pos_;
+      if (pos_ == end_) return JXL_FAILURE("PNM: reached end before digits");
+    }
+
+    // Leading digits
+    *number = 0.0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    // Decimal places?
+    if (pos_ < end_ && *pos_ == '.') {
+      ++pos_;
+      double place = 0.1;
+      while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+        *number += (*pos_ - '0') * place;
+        place *= 0.1;
+        ++pos_;
+      }
+    }
+
+    if (is_neg) *number = -*number;
+    return true;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  Status SkipBlank() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before blank");
+    const uint8_t c = *pos_;
+    if (c != ' ' && c != '\n') return JXL_FAILURE("PNM: expected blank");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipSingleWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return JXL_FAILURE("PNM: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_) && *pos_ != '#') {
+      return JXL_FAILURE("PNM: expected whitespace/comment");
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+
+    // Comment(s)
+    while (pos_ != end_ && *pos_ == '#') {
+      while (pos_ != end_ && !IsLineBreak(*pos_)) {
+        ++pos_;
+      }
+      // Newline(s)
+      while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+    return true;
+  }
+
+  Status MatchString(const char* keyword, bool skipws = true) {
+    const uint8_t* ppos = pos_;
+    while (*keyword) {
+      if (ppos >= end_) return JXL_FAILURE("PAM: unexpected end of input");
+      if (*keyword != *ppos) return false;
+      ppos++;
+      keyword++;
+    }
+    pos_ = ppos;
+    if (skipws) {
+      JXL_RETURN_IF_ERROR(SkipWhitespace());
+    } else {
+      JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    }
+    return true;
+  }
+
+  Status ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) {
+    size_t depth = 3;
+    size_t max_val = 255;
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    while (!MatchString("ENDHDR", /*skipws=*/false)) {
+      if (MatchString("WIDTH")) {
+        JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+        JXL_RETURN_IF_ERROR(SkipWhitespace());
+      } else if (MatchString("HEIGHT")) {
+        JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+        JXL_RETURN_IF_ERROR(SkipWhitespace());
+      } else if (MatchString("DEPTH")) {
+        JXL_RETURN_IF_ERROR(ParseUnsigned(&depth));
+        JXL_RETURN_IF_ERROR(SkipWhitespace());
+      } else if (MatchString("MAXVAL")) {
+        JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+        JXL_RETURN_IF_ERROR(SkipWhitespace());
+      } else if (MatchString("TUPLTYPE")) {
+        if (MatchString("RGB_ALPHA")) {
+          header->has_alpha = true;
+        } else if (MatchString("RGB")) {
+        } else if (MatchString("GRAYSCALE_ALPHA")) {
+          header->has_alpha = true;
+          header->is_gray = true;
+        } else if (MatchString("GRAYSCALE")) {
+          header->is_gray = true;
+        } else if (MatchString("BLACKANDWHITE_ALPHA")) {
+          header->has_alpha = true;
+          header->is_gray = true;
+          max_val = 1;
+        } else if (MatchString("BLACKANDWHITE")) {
+          header->is_gray = true;
+          max_val = 1;
+        } else if (MatchString("Alpha")) {
+          header->ec_types.push_back(JXL_CHANNEL_ALPHA);
+        } else if (MatchString("Depth")) {
+          header->ec_types.push_back(JXL_CHANNEL_DEPTH);
+        } else if (MatchString("SpotColor")) {
+          header->ec_types.push_back(JXL_CHANNEL_SPOT_COLOR);
+        } else if (MatchString("SelectionMask")) {
+          header->ec_types.push_back(JXL_CHANNEL_SELECTION_MASK);
+        } else if (MatchString("Black")) {
+          header->ec_types.push_back(JXL_CHANNEL_BLACK);
+        } else if (MatchString("CFA")) {
+          header->ec_types.push_back(JXL_CHANNEL_CFA);
+        } else if (MatchString("Thermal")) {
+          header->ec_types.push_back(JXL_CHANNEL_THERMAL);
+        } else {
+          return JXL_FAILURE("PAM: unknown TUPLTYPE");
+        }
+      } else {
+        constexpr size_t kMaxHeaderLength = 20;
+        char unknown_header[kMaxHeaderLength + 1];
+        size_t len = std::min<size_t>(kMaxHeaderLength, end_ - pos_);
+        strncpy(unknown_header, reinterpret_cast<const char*>(pos_), len);
+        unknown_header[len] = 0;
+        return JXL_FAILURE("PAM: unknown header keyword: %s", unknown_header);
+      }
+    }
+    size_t num_channels = header->is_gray ? 1 : 3;
+    if (header->has_alpha) num_channels++;
+    if (num_channels + header->ec_types.size() != depth) {
+      return JXL_FAILURE("PAM: bad DEPTH");
+    }
+    if (max_val == 0 || max_val >= 65536) {
+      return JXL_FAILURE("PAM: bad MAXVAL");
+    }
+    // e.g. When `max_val` is 1 , we want 1 bit:
+    header->bits_per_sample = FloorLog2Nonzero(max_val) + 1;
+    if ((1u << header->bits_per_sample) - 1 != max_val)
+      return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)");
+    // PAM does not pack bits as in PBM.
+
+    header->floating_point = false;
+    header->big_endian = true;
+    *pos = pos_;
+    return true;
+  }
+
+  Status ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    size_t max_val;
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+    if (max_val == 0 || max_val >= 65536) {
+      return JXL_FAILURE("PNM: bad MaxVal");
+    }
+    header->bits_per_sample = FloorLog2Nonzero(max_val) + 1;
+    if ((1u << header->bits_per_sample) - 1 != max_val)
+      return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)");
+    header->floating_point = false;
+    header->big_endian = true;
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  Status ParseHeaderPFM(HeaderPNM* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    JXL_RETURN_IF_ERROR(SkipBlank());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    // The scale has no meaning as multiplier, only its sign is used to
+    // indicate endianness. All software expects nominal range 0..1.
+    double scale;
+    JXL_RETURN_IF_ERROR(ParseSigned(&scale));
+    if (scale == 0.0) {
+      return JXL_FAILURE("PFM: bad scale factor value.");
+    } else if (std::abs(scale) != 1.0) {
+      JXL_WARNING("PFM: Discarding non-unit scale factor");
+    }
+    header->big_endian = scale > 0.0;
+    header->bits_per_sample = 32;
+    header->floating_point = true;
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+Span<const uint8_t> MakeSpan(const char* str) {
+  return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+                             strlen(str));
+}
+
+}  // namespace
+
+Status DecodeImagePNM(const Span<const uint8_t> bytes,
+                      const ColorHints& color_hints, PackedPixelFile* ppf,
+                      const SizeConstraints* constraints) {
+  Parser parser(bytes);
+  HeaderPNM header = {};
+  const uint8_t* pos = nullptr;
+  if (!parser.ParseHeader(&header, &pos)) return false;
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions(constraints, header.xsize, header.ysize));
+
+  if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+    return JXL_FAILURE("PNM: bits_per_sample invalid");
+  }
+
+  // PPM specify that in the raster, the sample values are "nonlinear" (BP.709,
+  // with gamma number of 2.2). Deviate from the specification and assume
+  // `sRGB` in our implementation.
+  JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+                                      header.is_gray, ppf));
+
+  ppf->info.xsize = header.xsize;
+  ppf->info.ysize = header.ysize;
+  if (header.floating_point) {
+    ppf->info.bits_per_sample = 32;
+    ppf->info.exponent_bits_per_sample = 8;
+  } else {
+    ppf->info.bits_per_sample = header.bits_per_sample;
+    ppf->info.exponent_bits_per_sample = 0;
+  }
+
+  ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+  // No alpha in PNM and PFM
+  ppf->info.alpha_bits = (header.has_alpha ? ppf->info.bits_per_sample : 0);
+  ppf->info.alpha_exponent_bits = 0;
+  ppf->info.num_color_channels = (header.is_gray ? 1 : 3);
+  uint32_t num_alpha_channels = (header.has_alpha ? 1 : 0);
+  uint32_t num_interleaved_channels =
+      ppf->info.num_color_channels + num_alpha_channels;
+  ppf->info.num_extra_channels = num_alpha_channels + header.ec_types.size();
+
+  for (auto type : header.ec_types) {
+    PackedExtraChannel pec;
+    pec.ec_info.bits_per_sample = ppf->info.bits_per_sample;
+    pec.ec_info.type = type;
+    ppf->extra_channels_info.emplace_back(std::move(pec));
+  }
+
+  JxlDataType data_type;
+  if (header.floating_point) {
+    // There's no float16 pnm version.
+    data_type = JXL_TYPE_FLOAT;
+  } else {
+    if (header.bits_per_sample > 8) {
+      data_type = JXL_TYPE_UINT16;
+    } else {
+      data_type = JXL_TYPE_UINT8;
+    }
+  }
+
+  const JxlPixelFormat format{
+      /*num_channels=*/num_interleaved_channels,
+      /*data_type=*/data_type,
+      /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN,
+      /*align=*/0,
+  };
+  const JxlPixelFormat ec_format{1, format.data_type, format.endianness, 0};
+  ppf->frames.clear();
+  ppf->frames.emplace_back(header.xsize, header.ysize, format);
+  auto* frame = &ppf->frames.back();
+  for (size_t i = 0; i < header.ec_types.size(); ++i) {
+    frame->extra_channels.emplace_back(header.xsize, header.ysize, ec_format);
+  }
+  size_t pnm_remaining_size = bytes.data() + bytes.size() - pos;
+  if (pnm_remaining_size < frame->color.pixels_size) {
+    return JXL_FAILURE("PNM file too small");
+  }
+
+  uint8_t* out = reinterpret_cast<uint8_t*>(frame->color.pixels());
+  std::vector<uint8_t*> ec_out(header.ec_types.size());
+  for (size_t i = 0; i < ec_out.size(); ++i) {
+    ec_out[i] = reinterpret_cast<uint8_t*>(frame->extra_channels[i].pixels());
+  }
+  if (ec_out.empty()) {
+    const bool flipped_y = header.bits_per_sample == 32;  // PFMs are flipped
+    for (size_t y = 0; y < header.ysize; ++y) {
+      size_t y_in = flipped_y ? header.ysize - 1 - y : y;
+      const uint8_t* row_in = &pos[y_in * frame->color.stride];
+      uint8_t* row_out = &out[y * frame->color.stride];
+      memcpy(row_out, row_in, frame->color.stride);
+    }
+  } else {
+    size_t pwidth = PackedImage::BitsPerChannel(data_type) / 8;
+    for (size_t y = 0; y < header.ysize; ++y) {
+      for (size_t x = 0; x < header.xsize; ++x) {
+        memcpy(out, pos, frame->color.pixel_stride());
+        out += frame->color.pixel_stride();
+        pos += frame->color.pixel_stride();
+        for (auto& p : ec_out) {
+          memcpy(p, pos, pwidth);
+          pos += pwidth;
+          p += pwidth;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+void TestCodecPNM() {
+  size_t u = 77777;  // Initialized to wrong value.
+  double d = 77.77;
+// Failing to parse invalid strings results in a crash if `JXL_CRASH_ON_ERROR`
+// is defined and hence the tests fail. Therefore we only run these tests if
+// `JXL_CRASH_ON_ERROR` is not defined.
+#ifndef JXL_CRASH_ON_ERROR
+  JXL_CHECK(false == Parser(MakeSpan("")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("+")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("-")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("A")).ParseUnsigned(&u));
+
+  JXL_CHECK(false == Parser(MakeSpan("")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("+")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("-")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("A")).ParseSigned(&d));
+#endif
+  JXL_CHECK(true == Parser(MakeSpan("1")).ParseUnsigned(&u));
+  JXL_CHECK(u == 1);
+
+  JXL_CHECK(true == Parser(MakeSpan("32")).ParseUnsigned(&u));
+  JXL_CHECK(u == 32);
+
+  JXL_CHECK(true == Parser(MakeSpan("1")).ParseSigned(&d));
+  JXL_CHECK(d == 1.0);
+  JXL_CHECK(true == Parser(MakeSpan("+2")).ParseSigned(&d));
+  JXL_CHECK(d == 2.0);
+  JXL_CHECK(true == Parser(MakeSpan("-3")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - -3.0) < 1E-15);
+  JXL_CHECK(true == Parser(MakeSpan("3.141592")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - 3.141592) < 1E-15);
+  JXL_CHECK(true == Parser(MakeSpan("-3.141592")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - -3.141592) < 1E-15);
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pnm.h b/third-party/libjxl/libjxl/lib/extras/dec/pnm.h
new file mode 100644
index 0000000000..0745b2f20d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/dec/pnm.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_PNM_H_
+#define LIB_EXTRAS_DEC_PNM_H_
+
+// Decodes PBM/PGM/PPM/PFM pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`. color_hints may specify "color_space", which
+// defaults to sRGB.
+Status DecodeImagePNM(Span<const uint8_t> bytes, const ColorHints& color_hints,
+                      PackedPixelFile* ppf,
+                      const SizeConstraints* constraints = nullptr);
+
+void TestCodecPNM();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_DEC_PNM_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/apng.cc b/third-party/libjxl/libjxl/lib/extras/enc/apng.cc
new file mode 100644
index 0000000000..53ef297367
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/apng.cc
@@ -0,0 +1,454 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/apng.h"
+
+// Parts of this code are taken from apngdis, which has the following license:
+/* APNG Disassembler 2.8
+ *
+ * Deconstructs APNG files into individual frames.
+ *
+ * http://apngdis.sourceforge.net
+ *
+ * Copyright (c) 2010-2015 Max Stepin
+ * maxst at users.sourceforge.net
+ *
+ * zlib license
+ * ------------
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/exif.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#if JPEGXL_ENABLE_APNG
+#include "png.h" /* original (unpatched) libpng is ok */
+#endif
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_APNG
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+
+class APNGEncoder : public Encoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+        for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+          formats.push_back(
+              JxlPixelFormat{num_channels, data_type, endianness, /*align=*/0});
+        }
+      }
+    }
+    return formats;
+  }
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    encoded_image->icc.clear();
+    encoded_image->bitstreams.resize(1);
+    return EncodePackedPixelFileToAPNG(ppf, pool,
+                                       &encoded_image->bitstreams.front());
+  }
+
+ private:
+  Status EncodePackedPixelFileToAPNG(const PackedPixelFile& ppf,
+                                     ThreadPool* pool,
+                                     std::vector<uint8_t>* bytes) const;
+};
+
+static void PngWrite(png_structp png_ptr, png_bytep data, png_size_t length) {
+  std::vector<uint8_t>* bytes =
+      static_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
+  bytes->insert(bytes->end(), data, data + length);
+}
+
+// Stores XMP and EXIF/IPTC into key/value strings for PNG
+class BlobsWriterPNG {
+ public:
+  static Status Encode(const PackedMetadata& blobs,
+                       std::vector<std::string>* strings) {
+    if (!blobs.exif.empty()) {
+      // PNG viewers typically ignore Exif orientation but not all of them do
+      // (and e.g. cjxl doesn't), so we overwrite the Exif orientation to the
+      // identity to avoid repeated orientation.
+      std::vector<uint8_t> exif = blobs.exif;
+      ResetExifOrientation(exif);
+      // By convention, the data is prefixed with "Exif\0\0" when stored in
+      // the legacy (and non-standard) "Raw profile type exif" text chunk
+      // currently used here.
+      // TODO: Store Exif data in an eXIf chunk instead, which always begins
+      // with the TIFF header.
+      if (exif.size() >= sizeof kExifSignature &&
+          memcmp(exif.data(), kExifSignature, sizeof kExifSignature) != 0) {
+        exif.insert(exif.begin(), kExifSignature,
+                    kExifSignature + sizeof kExifSignature);
+      }
+      JXL_RETURN_IF_ERROR(EncodeBase16("exif", exif, strings));
+    }
+    if (!blobs.iptc.empty()) {
+      JXL_RETURN_IF_ERROR(EncodeBase16("iptc", blobs.iptc, strings));
+    }
+    if (!blobs.xmp.empty()) {
+      // TODO: Store XMP data in an "XML:com.adobe.xmp" text chunk instead.
+      JXL_RETURN_IF_ERROR(EncodeBase16("xmp", blobs.xmp, strings));
+    }
+    return true;
+  }
+
+ private:
+  static JXL_INLINE char EncodeNibble(const uint8_t nibble) {
+    JXL_ASSERT(nibble < 16);
+    return (nibble < 10) ? '0' + nibble : 'a' + nibble - 10;
+  }
+
+  static Status EncodeBase16(const std::string& type,
+                             const std::vector<uint8_t>& bytes,
+                             std::vector<std::string>* strings) {
+    // Encoding: base16 with newline after 72 chars.
+    const size_t base16_size =
+        2 * bytes.size() + DivCeil(bytes.size(), size_t(36)) + 1;
+    std::string base16;
+    base16.reserve(base16_size);
+    for (size_t i = 0; i < bytes.size(); ++i) {
+      if (i % 36 == 0) base16.push_back('\n');
+      base16.push_back(EncodeNibble(bytes[i] >> 4));
+      base16.push_back(EncodeNibble(bytes[i] & 0x0F));
+    }
+    base16.push_back('\n');
+    JXL_ASSERT(base16.length() == base16_size);
+
+    char key[30];
+    snprintf(key, sizeof(key), "Raw profile type %s", type.c_str());
+
+    char header[30];
+    snprintf(header, sizeof(header), "\n%s\n%8" PRIuS, type.c_str(),
+             bytes.size());
+
+    strings->push_back(std::string(key));
+    strings->push_back(std::string(header) + base16);
+    return true;
+  }
+};
+
+void MaybeAddCICP(const JxlColorEncoding& c_enc, png_structp png_ptr,
+                  png_infop info_ptr) {
+  png_byte cicp_data[4] = {};
+  png_unknown_chunk cicp_chunk;
+  if (c_enc.color_space != JXL_COLOR_SPACE_RGB) {
+    return;
+  }
+  if (c_enc.primaries == JXL_PRIMARIES_P3) {
+    if (c_enc.white_point == JXL_WHITE_POINT_D65) {
+      cicp_data[0] = 12;
+    } else if (c_enc.white_point == JXL_WHITE_POINT_DCI) {
+      cicp_data[0] = 11;
+    } else {
+      return;
+    }
+  } else if (c_enc.primaries != JXL_PRIMARIES_CUSTOM &&
+             c_enc.white_point == JXL_WHITE_POINT_D65) {
+    cicp_data[0] = static_cast<png_byte>(c_enc.primaries);
+  } else {
+    return;
+  }
+  if (c_enc.transfer_function == JXL_TRANSFER_FUNCTION_UNKNOWN ||
+      c_enc.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+    return;
+  }
+  cicp_data[1] = static_cast<png_byte>(c_enc.transfer_function);
+  cicp_data[2] = 0;
+  cicp_data[3] = 1;
+  cicp_chunk.data = cicp_data;
+  cicp_chunk.size = sizeof(cicp_data);
+  cicp_chunk.location = PNG_HAVE_IHDR;
+  memcpy(cicp_chunk.name, "cICP", 5);
+  png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_ALWAYS,
+                              reinterpret_cast<const png_byte*>("cICP"), 1);
+  png_set_unknown_chunks(png_ptr, info_ptr, &cicp_chunk, 1);
+}
+
+bool MaybeAddSRGB(const JxlColorEncoding& c_enc, png_structp png_ptr,
+                  png_infop info_ptr) {
+  if (c_enc.transfer_function == JXL_TRANSFER_FUNCTION_SRGB &&
+      (c_enc.color_space == JXL_COLOR_SPACE_GRAY ||
+       (c_enc.color_space == JXL_COLOR_SPACE_RGB &&
+        c_enc.primaries == JXL_PRIMARIES_SRGB &&
+        c_enc.white_point == JXL_WHITE_POINT_D65))) {
+    png_set_sRGB(png_ptr, info_ptr, c_enc.rendering_intent);
+    png_set_cHRM_fixed(png_ptr, info_ptr, 31270, 32900, 64000, 33000, 30000,
+                       60000, 15000, 6000);
+    png_set_gAMA_fixed(png_ptr, info_ptr, 45455);
+    return true;
+  }
+  return false;
+}
+
+void MaybeAddCHRM(const JxlColorEncoding& c_enc, png_structp png_ptr,
+                  png_infop info_ptr) {
+  if (c_enc.color_space != JXL_COLOR_SPACE_RGB) return;
+  if (c_enc.primaries == 0) return;
+  png_set_cHRM(png_ptr, info_ptr, c_enc.white_point_xy[0],
+               c_enc.white_point_xy[1], c_enc.primaries_red_xy[0],
+               c_enc.primaries_red_xy[1], c_enc.primaries_green_xy[0],
+               c_enc.primaries_green_xy[1], c_enc.primaries_blue_xy[0],
+               c_enc.primaries_blue_xy[1]);
+}
+
+void MaybeAddGAMA(const JxlColorEncoding& c_enc, png_structp png_ptr,
+                  png_infop info_ptr) {
+  switch (c_enc.transfer_function) {
+    case JXL_TRANSFER_FUNCTION_LINEAR:
+      png_set_gAMA_fixed(png_ptr, info_ptr, PNG_FP_1);
+      break;
+    case JXL_TRANSFER_FUNCTION_SRGB:
+      png_set_gAMA_fixed(png_ptr, info_ptr, 45455);
+      break;
+    case JXL_TRANSFER_FUNCTION_GAMMA:
+      png_set_gAMA(png_ptr, info_ptr, c_enc.gamma);
+      break;
+
+    default:;
+      // No gAMA chunk.
+  }
+}
+
+void MaybeAddCLLi(const JxlColorEncoding& c_enc, const float intensity_target,
+                  png_structp png_ptr, png_infop info_ptr) {
+  if (c_enc.transfer_function != JXL_TRANSFER_FUNCTION_PQ) return;
+
+  const uint32_t max_cll =
+      static_cast<uint32_t>(10000.f * Clamp1(intensity_target, 0.f, 10000.f));
+  png_byte chunk_data[8] = {};
+  chunk_data[0] = (max_cll >> 24) & 0xFF;
+  chunk_data[1] = (max_cll >> 16) & 0xFF;
+  chunk_data[2] = (max_cll >> 8) & 0xFF;
+  chunk_data[3] = max_cll & 0xFF;
+  // Leave MaxFALL set to 0.
+  png_unknown_chunk chunk;
+  memcpy(chunk.name, "cLLi", 5);
+  chunk.data = chunk_data;
+  chunk.size = sizeof chunk_data;
+  chunk.location = PNG_HAVE_IHDR;
+  png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_ALWAYS,
+                              reinterpret_cast<const png_byte*>("cLLi"), 1);
+  png_set_unknown_chunks(png_ptr, info_ptr, &chunk, 1);
+}
+
+Status APNGEncoder::EncodePackedPixelFileToAPNG(
+    const PackedPixelFile& ppf, ThreadPool* pool,
+    std::vector<uint8_t>* bytes) const {
+  size_t xsize = ppf.info.xsize;
+  size_t ysize = ppf.info.ysize;
+  bool has_alpha = ppf.info.alpha_bits != 0;
+  bool is_gray = ppf.info.num_color_channels == 1;
+  size_t color_channels = ppf.info.num_color_channels;
+  size_t num_channels = color_channels + (has_alpha ? 1 : 0);
+  size_t num_samples = num_channels * xsize * ysize;
+
+  if (!ppf.info.have_animation && ppf.frames.size() != 1) {
+    return JXL_FAILURE("Invalid number of frames");
+  }
+
+  size_t count = 0;
+  size_t anim_chunks = 0;
+
+  for (const auto& frame : ppf.frames) {
+    JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+
+    const PackedImage& color = frame.color;
+    const JxlPixelFormat format = color.format;
+    const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+    size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type);
+    size_t bytes_per_sample = data_bits_per_sample / 8;
+    size_t out_bytes_per_sample = bytes_per_sample > 1 ? 2 : 1;
+    size_t out_stride = xsize * num_channels * out_bytes_per_sample;
+    size_t out_size = ysize * out_stride;
+    std::vector<uint8_t> out(out_size);
+
+    if (format.data_type == JXL_TYPE_UINT8) {
+      if (ppf.info.bits_per_sample < 8) {
+        float mul = 255.0 / ((1u << ppf.info.bits_per_sample) - 1);
+        for (size_t i = 0; i < num_samples; ++i) {
+          out[i] = static_cast<uint8_t>(in[i] * mul + 0.5);
+        }
+      } else {
+        memcpy(&out[0], in, out_size);
+      }
+    } else if (format.data_type == JXL_TYPE_UINT16) {
+      if (ppf.info.bits_per_sample < 16 ||
+          format.endianness != JXL_BIG_ENDIAN) {
+        float mul = 65535.0 / ((1u << ppf.info.bits_per_sample) - 1);
+        const uint8_t* p_in = in;
+        uint8_t* p_out = out.data();
+        for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) {
+          uint32_t val = (format.endianness == JXL_BIG_ENDIAN ? LoadBE16(p_in)
+                                                              : LoadLE16(p_in));
+          StoreBE16(static_cast<uint32_t>(val * mul + 0.5), p_out);
+        }
+      } else {
+        memcpy(&out[0], in, out_size);
+      }
+    }
+    png_structp png_ptr;
+    png_infop info_ptr;
+
+    png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+    if (!png_ptr) return JXL_FAILURE("Could not init png encoder");
+
+    info_ptr = png_create_info_struct(png_ptr);
+    if (!info_ptr) return JXL_FAILURE("Could not init png info struct");
+
+    png_set_write_fn(png_ptr, bytes, PngWrite, NULL);
+    png_set_flush(png_ptr, 0);
+
+    int width = xsize;
+    int height = ysize;
+
+    png_byte color_type = (is_gray ? PNG_COLOR_TYPE_GRAY : PNG_COLOR_TYPE_RGB);
+    if (has_alpha) color_type |= PNG_COLOR_MASK_ALPHA;
+    png_byte bit_depth = out_bytes_per_sample * 8;
+
+    png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type,
+                 PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
+                 PNG_FILTER_TYPE_BASE);
+    if (count == 0) {
+      if (!MaybeAddSRGB(ppf.color_encoding, png_ptr, info_ptr)) {
+        MaybeAddCICP(ppf.color_encoding, png_ptr, info_ptr);
+        if (!ppf.icc.empty()) {
+          png_set_benign_errors(png_ptr, 1);
+          png_set_iCCP(png_ptr, info_ptr, "1", 0, ppf.icc.data(),
+                       ppf.icc.size());
+        }
+        MaybeAddCHRM(ppf.color_encoding, png_ptr, info_ptr);
+        MaybeAddGAMA(ppf.color_encoding, png_ptr, info_ptr);
+      }
+      MaybeAddCLLi(ppf.color_encoding, ppf.info.intensity_target, png_ptr,
+                   info_ptr);
+
+      std::vector<std::string> textstrings;
+      JXL_RETURN_IF_ERROR(BlobsWriterPNG::Encode(ppf.metadata, &textstrings));
+      for (size_t kk = 0; kk + 1 < textstrings.size(); kk += 2) {
+        png_text text;
+        text.key = const_cast<png_charp>(textstrings[kk].c_str());
+        text.text = const_cast<png_charp>(textstrings[kk + 1].c_str());
+        text.compression = PNG_TEXT_COMPRESSION_zTXt;
+        png_set_text(png_ptr, info_ptr, &text, 1);
+      }
+
+      png_write_info(png_ptr, info_ptr);
+    } else {
+      // fake writing a header, otherwise libpng gets confused
+      size_t pos = bytes->size();
+      png_write_info(png_ptr, info_ptr);
+      bytes->resize(pos);
+    }
+
+    if (ppf.info.have_animation) {
+      if (count == 0) {
+        png_byte adata[8];
+        png_save_uint_32(adata, ppf.frames.size());
+        png_save_uint_32(adata + 4, ppf.info.animation.num_loops);
+        png_byte actl[5] = "acTL";
+        png_write_chunk(png_ptr, actl, adata, 8);
+      }
+      png_byte fdata[26];
+      // TODO(jon): also make this work for the non-coalesced case
+      png_save_uint_32(fdata, anim_chunks++);
+      png_save_uint_32(fdata + 4, width);
+      png_save_uint_32(fdata + 8, height);
+      png_save_uint_32(fdata + 12, 0);
+      png_save_uint_32(fdata + 16, 0);
+      png_save_uint_16(fdata + 20, frame.frame_info.duration *
+                                       ppf.info.animation.tps_denominator);
+      png_save_uint_16(fdata + 22, ppf.info.animation.tps_numerator);
+      fdata[24] = 1;
+      fdata[25] = 0;
+      png_byte fctl[5] = "fcTL";
+      png_write_chunk(png_ptr, fctl, fdata, 26);
+    }
+
+    std::vector<uint8_t*> rows(height);
+    for (int y = 0; y < height; ++y) {
+      rows[y] = out.data() + y * out_stride;
+    }
+
+    png_write_flush(png_ptr);
+    const size_t pos = bytes->size();
+    png_write_image(png_ptr, &rows[0]);
+    png_write_flush(png_ptr);
+    if (count > 0) {
+      std::vector<uint8_t> fdata(4);
+      png_save_uint_32(fdata.data(), anim_chunks++);
+      size_t p = pos;
+      while (p + 8 < bytes->size()) {
+        size_t len = png_get_uint_32(bytes->data() + p);
+        JXL_ASSERT(bytes->operator[](p + 4) == 'I');
+        JXL_ASSERT(bytes->operator[](p + 5) == 'D');
+        JXL_ASSERT(bytes->operator[](p + 6) == 'A');
+        JXL_ASSERT(bytes->operator[](p + 7) == 'T');
+        fdata.insert(fdata.end(), bytes->data() + p + 8,
+                     bytes->data() + p + 8 + len);
+        p += len + 12;
+      }
+      bytes->resize(pos);
+
+      png_byte fdat[5] = "fdAT";
+      png_write_chunk(png_ptr, fdat, fdata.data(), fdata.size());
+    }
+
+    count++;
+    if (count == ppf.frames.size() || !ppf.info.have_animation) {
+      png_write_end(png_ptr, NULL);
+    }
+
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+  }
+
+  return true;
+}
+
+}  // namespace
+#endif
+
+std::unique_ptr<Encoder> GetAPNGEncoder() {
+#if JPEGXL_ENABLE_APNG
+  return jxl::make_unique<APNGEncoder>();
+#else
+  return nullptr;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/apng.h b/third-party/libjxl/libjxl/lib/extras/enc/apng.h
new file mode 100644
index 0000000000..2a2139c8fa
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/apng.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_APNG_H_
+#define LIB_EXTRAS_ENC_APNG_H_
+
+// Encodes APNG images in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetAPNGEncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_APNG_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/encode.cc b/third-party/libjxl/libjxl/lib/extras/enc/encode.cc
new file mode 100644
index 0000000000..8a84103c21
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/encode.cc
@@ -0,0 +1,177 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/encode.h"
+
+#include <locale>
+
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/enc/exr.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/enc/npy.h"
+#include "lib/extras/enc/pgx.h"
+#include "lib/extras/enc/pnm.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jxl {
+namespace extras {
+
+Status Encoder::VerifyBasicInfo(const JxlBasicInfo& info) {
+  if (info.xsize == 0 || info.ysize == 0) {
+    return JXL_FAILURE("Empty image");
+  }
+  if (info.num_color_channels != 1 && info.num_color_channels != 3) {
+    return JXL_FAILURE("Invalid number of color channels");
+  }
+  if (info.alpha_bits > 0 && info.alpha_bits != info.bits_per_sample) {
+    return JXL_FAILURE("Alpha bit depth does not match image bit depth");
+  }
+  if (info.orientation != JXL_ORIENT_IDENTITY) {
+    return JXL_FAILURE("Orientation must be identity");
+  }
+  return true;
+}
+
+Status Encoder::VerifyFormat(const JxlPixelFormat& format) const {
+  for (auto f : AcceptedFormats()) {
+    if (f.num_channels != format.num_channels) continue;
+    if (f.data_type != format.data_type) continue;
+    if (f.data_type == JXL_TYPE_UINT8 || f.endianness == format.endianness) {
+      return true;
+    }
+  }
+  return JXL_FAILURE("Format is not in the list of accepted formats.");
+}
+
+Status Encoder::VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
+                               uint32_t exponent_bits) {
+  if ((data_type == JXL_TYPE_UINT8 &&
+       (bits_per_sample == 0 || bits_per_sample > 8 || exponent_bits != 0)) ||
+      (data_type == JXL_TYPE_UINT16 &&
+       (bits_per_sample <= 8 || bits_per_sample > 16 || exponent_bits != 0)) ||
+      (data_type == JXL_TYPE_FLOAT16 &&
+       (bits_per_sample > 16 || exponent_bits > 5))) {
+    return JXL_FAILURE(
+        "Incompatible data_type %d and bit depth %u with exponent bits %u",
+        (int)data_type, bits_per_sample, exponent_bits);
+  }
+  return true;
+}
+
+Status Encoder::VerifyImageSize(const PackedImage& image,
+                                const JxlBasicInfo& info) {
+  if (image.pixels() == nullptr) {
+    return JXL_FAILURE("Invalid image.");
+  }
+  if (image.stride != image.xsize * image.pixel_stride()) {
+    return JXL_FAILURE("Invalid image stride.");
+  }
+  if (image.pixels_size != image.ysize * image.stride) {
+    return JXL_FAILURE("Invalid image size.");
+  }
+  size_t info_num_channels =
+      (info.num_color_channels + (info.alpha_bits > 0 ? 1 : 0));
+  if (image.xsize != info.xsize || image.ysize != info.ysize ||
+      image.format.num_channels != info_num_channels) {
+    return JXL_FAILURE("Frame size does not match image size");
+  }
+  return true;
+}
+
+Status Encoder::VerifyPackedImage(const PackedImage& image,
+                                  const JxlBasicInfo& info) const {
+  JXL_RETURN_IF_ERROR(VerifyImageSize(image, info));
+  JXL_RETURN_IF_ERROR(VerifyFormat(image.format));
+  JXL_RETURN_IF_ERROR(VerifyBitDepth(image.format.data_type,
+                                     info.bits_per_sample,
+                                     info.exponent_bits_per_sample));
+  return true;
+}
+
+Status SelectFormat(const std::vector<JxlPixelFormat>& accepted_formats,
+                    const JxlBasicInfo& basic_info, JxlPixelFormat* format) {
+  const size_t original_bit_depth = basic_info.bits_per_sample;
+  size_t current_bit_depth = 0;
+  size_t num_alpha_channels = (basic_info.alpha_bits != 0 ? 1 : 0);
+  size_t num_channels = basic_info.num_color_channels + num_alpha_channels;
+  for (;;) {
+    for (const JxlPixelFormat& candidate : accepted_formats) {
+      if (candidate.num_channels != num_channels) continue;
+      const size_t candidate_bit_depth =
+          PackedImage::BitsPerChannel(candidate.data_type);
+      if (
+          // Candidate bit depth is less than what we have and still enough
+          (original_bit_depth <= candidate_bit_depth &&
+           candidate_bit_depth < current_bit_depth) ||
+          // Or larger than the too-small bit depth we currently have
+          (current_bit_depth < candidate_bit_depth &&
+           current_bit_depth < original_bit_depth)) {
+        *format = candidate;
+        current_bit_depth = candidate_bit_depth;
+      }
+    }
+    if (current_bit_depth == 0) {
+      if (num_channels > basic_info.num_color_channels) {
+        // Try dropping the alpha channel.
+        --num_channels;
+        continue;
+      }
+      return JXL_FAILURE("no appropriate format found");
+    }
+    break;
+  }
+  if (current_bit_depth < original_bit_depth) {
+    JXL_WARNING("encoding %" PRIuS "-bit original to %" PRIuS " bits",
+                original_bit_depth, current_bit_depth);
+  }
+  return true;
+}
+
+template <int metadata>
+class MetadataEncoder : public Encoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    // empty, i.e. no need for actual pixel data
+    return formats;
+  }
+
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded,
+                ThreadPool* pool) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    encoded->icc.clear();
+    encoded->bitstreams.resize(1);
+    if (metadata == 0) encoded->bitstreams.front() = ppf.metadata.exif;
+    if (metadata == 1) encoded->bitstreams.front() = ppf.metadata.xmp;
+    if (metadata == 2) encoded->bitstreams.front() = ppf.metadata.jumbf;
+    return true;
+  }
+};
+
+std::unique_ptr<Encoder> Encoder::FromExtension(std::string extension) {
+  std::transform(
+      extension.begin(), extension.end(), extension.begin(),
+      [](char c) { return std::tolower(c, std::locale::classic()); });
+  if (extension == ".png" || extension == ".apng") return GetAPNGEncoder();
+  if (extension == ".jpg") return GetJPEGEncoder();
+  if (extension == ".jpeg") return GetJPEGEncoder();
+  if (extension == ".npy") return GetNumPyEncoder();
+  if (extension == ".pgx") return GetPGXEncoder();
+  if (extension == ".pam") return GetPAMEncoder();
+  if (extension == ".pgm") return GetPGMEncoder();
+  if (extension == ".ppm") return GetPPMEncoder();
+  if (extension == ".pfm") return GetPFMEncoder();
+  if (extension == ".exr") return GetEXREncoder();
+  if (extension == ".exif") return jxl::make_unique<MetadataEncoder<0>>();
+  if (extension == ".xmp") return jxl::make_unique<MetadataEncoder<1>>();
+  if (extension == ".xml") return jxl::make_unique<MetadataEncoder<1>>();
+  if (extension == ".jumbf") return jxl::make_unique<MetadataEncoder<2>>();
+  if (extension == ".jumb") return jxl::make_unique<MetadataEncoder<2>>();
+
+  return nullptr;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/encode.h b/third-party/libjxl/libjxl/lib/extras/enc/encode.h
new file mode 100644
index 0000000000..43a02aab6d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/encode.h
@@ -0,0 +1,85 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_ENCODE_H_
+#define LIB_EXTRAS_ENC_ENCODE_H_
+
+// Facade for image encoders.
+
+#include <string>
+#include <unordered_map>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct EncodedImage {
+  // One (if the format supports animations or the image has only one frame) or
+  // more sequential bitstreams.
+  std::vector<std::vector<uint8_t>> bitstreams;
+
+  // For each extra channel one or more sequential bitstreams.
+  std::vector<std::vector<std::vector<uint8_t>>> extra_channel_bitstreams;
+
+  std::vector<uint8_t> preview_bitstream;
+
+  // If the format does not support embedding color profiles into the bitstreams
+  // above, it will be present here, to be written as a separate file. If it
+  // does support them, this field will be empty.
+  std::vector<uint8_t> icc;
+
+  // Additional output for conformance testing, only filled in by NumPyEncoder.
+  std::vector<uint8_t> metadata;
+};
+
+class Encoder {
+ public:
+  static std::unique_ptr<Encoder> FromExtension(std::string extension);
+
+  virtual ~Encoder() = default;
+
+  // Set of pixel formats that this encoder takes as input.
+  // If empty, the 'encoder' does not need any pixels (it's metadata-only).
+  virtual std::vector<JxlPixelFormat> AcceptedFormats() const = 0;
+
+  // Any existing data in encoded_image is discarded.
+  virtual Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                        ThreadPool* pool = nullptr) const = 0;
+
+  void SetOption(std::string name, std::string value) {
+    options_[std::move(name)] = std::move(value);
+  }
+
+  static Status VerifyBasicInfo(const JxlBasicInfo& info);
+  static Status VerifyImageSize(const PackedImage& image,
+                                const JxlBasicInfo& info);
+  static Status VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
+                               uint32_t exponent_bits);
+
+ protected:
+  const std::unordered_map<std::string, std::string>& options() const {
+    return options_;
+  }
+
+  Status VerifyFormat(const JxlPixelFormat& format) const;
+
+  Status VerifyPackedImage(const PackedImage& image,
+                           const JxlBasicInfo& info) const;
+
+ private:
+  std::unordered_map<std::string, std::string> options_;
+};
+
+// TODO(sboukortt): consider exposing this as part of the C API.
+Status SelectFormat(const std::vector<JxlPixelFormat>& accepted_formats,
+                    const JxlBasicInfo& basic_info, JxlPixelFormat* format);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_ENCODE_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/exr.cc b/third-party/libjxl/libjxl/lib/extras/enc/exr.cc
new file mode 100644
index 0000000000..d4005c3097
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/exr.cc
@@ -0,0 +1,208 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/exr.h"
+
+#if JPEGXL_ENABLE_EXR
+#include <ImfChromaticitiesAttribute.h>
+#include <ImfIO.h>
+#include <ImfRgbaFile.h>
+#include <ImfStandardAttributes.h>
+#endif
+#include <jxl/codestream_header.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_EXR
+namespace {
+
+namespace OpenEXR = OPENEXR_IMF_NAMESPACE;
+namespace Imath = IMATH_NAMESPACE;
+
+// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using
+// uint64_t as recommended causes build failures with previous OpenEXR versions
+// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent
+// to uint64_t. This alternative should work in all cases.
+using ExrInt64 = decltype(std::declval<OpenEXR::IStream>().tellg());
+
+class InMemoryOStream : public OpenEXR::OStream {
+ public:
+  // `bytes` must outlive the InMemoryOStream.
+  explicit InMemoryOStream(std::vector<uint8_t>* const bytes)
+      : OStream(/*fileName=*/""), bytes_(*bytes) {}
+
+  void write(const char c[], const int n) override {
+    if (bytes_.size() < pos_ + n) {
+      bytes_.resize(pos_ + n);
+    }
+    std::copy_n(c, n, bytes_.begin() + pos_);
+    pos_ += n;
+  }
+
+  ExrInt64 tellp() override { return pos_; }
+  void seekp(const ExrInt64 pos) override {
+    if (bytes_.size() + 1 < pos) {
+      bytes_.resize(pos - 1);
+    }
+    pos_ = pos;
+  }
+
+ private:
+  std::vector<uint8_t>& bytes_;
+  size_t pos_ = 0;
+};
+
+// Loads a Big-Endian float
+float LoadBEFloat(const uint8_t* p) {
+  uint32_t u = LoadBE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+// Loads a Little-Endian float
+float LoadLEFloat(const uint8_t* p) {
+  uint32_t u = LoadLE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+Status EncodeImageEXR(const PackedImage& image, const JxlBasicInfo& info,
+                      const JxlColorEncoding& c_enc, ThreadPool* pool,
+                      std::vector<uint8_t>* bytes) {
+  OpenEXR::setGlobalThreadCount(0);
+
+  const size_t xsize = info.xsize;
+  const size_t ysize = info.ysize;
+  const bool has_alpha = info.alpha_bits > 0;
+  const bool alpha_is_premultiplied = info.alpha_premultiplied;
+
+  if (info.num_color_channels != 3 ||
+      c_enc.color_space != JXL_COLOR_SPACE_RGB ||
+      c_enc.transfer_function != JXL_TRANSFER_FUNCTION_LINEAR) {
+    return JXL_FAILURE("Unsupported color encoding for OpenEXR output.");
+  }
+
+  const size_t num_channels = 3 + (has_alpha ? 1 : 0);
+  const JxlPixelFormat format = image.format;
+
+  if (format.data_type != JXL_TYPE_FLOAT) {
+    return JXL_FAILURE("Unsupported pixel format for OpenEXR output");
+  }
+
+  const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
+  size_t in_stride = num_channels * 4 * xsize;
+
+  OpenEXR::Header header(xsize, ysize);
+  OpenEXR::Chromaticities chromaticities;
+  chromaticities.red =
+      Imath::V2f(c_enc.primaries_red_xy[0], c_enc.primaries_red_xy[1]);
+  chromaticities.green =
+      Imath::V2f(c_enc.primaries_green_xy[0], c_enc.primaries_green_xy[1]);
+  chromaticities.blue =
+      Imath::V2f(c_enc.primaries_blue_xy[0], c_enc.primaries_blue_xy[1]);
+  chromaticities.white =
+      Imath::V2f(c_enc.white_point_xy[0], c_enc.white_point_xy[1]);
+  OpenEXR::addChromaticities(header, chromaticities);
+  OpenEXR::addWhiteLuminance(header, info.intensity_target);
+
+  auto loadFloat =
+      format.endianness == JXL_BIG_ENDIAN ? LoadBEFloat : LoadLEFloat;
+  auto loadAlpha =
+      has_alpha ? loadFloat : [](const uint8_t* p) -> float { return 1.0f; };
+
+  // Ensure that the destructor of RgbaOutputFile has run before we look at the
+  // size of `bytes`.
+  {
+    InMemoryOStream os(bytes);
+    OpenEXR::RgbaOutputFile output(
+        os, header, has_alpha ? OpenEXR::WRITE_RGBA : OpenEXR::WRITE_RGB);
+    // How many rows to write at once. Again, the OpenEXR documentation
+    // recommends writing the whole image in one call.
+    const int y_chunk_size = ysize;
+    std::vector<OpenEXR::Rgba> output_rows(xsize * y_chunk_size);
+
+    for (size_t start_y = 0; start_y < ysize; start_y += y_chunk_size) {
+      // Inclusive.
+      const size_t end_y = std::min(start_y + y_chunk_size - 1, ysize - 1);
+      output.setFrameBuffer(output_rows.data() - start_y * xsize,
+                            /*xStride=*/1, /*yStride=*/xsize);
+      for (size_t y = start_y; y <= end_y; ++y) {
+        const uint8_t* in_row = &in[(y - start_y) * in_stride];
+        OpenEXR::Rgba* const JXL_RESTRICT row_data =
+            &output_rows[(y - start_y) * xsize];
+        for (size_t x = 0; x < xsize; ++x) {
+          const uint8_t* in_pixel = &in_row[4 * num_channels * x];
+          float r = loadFloat(&in_pixel[0]);
+          float g = loadFloat(&in_pixel[4]);
+          float b = loadFloat(&in_pixel[8]);
+          const float alpha = loadAlpha(&in_pixel[12]);
+          if (!alpha_is_premultiplied) {
+            r *= alpha;
+            g *= alpha;
+            b *= alpha;
+          }
+          row_data[x] = OpenEXR::Rgba(r, g, b, alpha);
+        }
+      }
+      output.writePixels(/*numScanLines=*/end_y - start_y + 1);
+    }
+  }
+
+  return true;
+}
+
+class EXREncoder : public Encoder {
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      for (const JxlDataType data_type : {JXL_TYPE_FLOAT}) {
+        for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+          formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+                                           /*data_type=*/data_type,
+                                           /*endianness=*/endianness,
+                                           /*align=*/0});
+        }
+      }
+    }
+    return formats;
+  }
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool = nullptr) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    encoded_image->icc.clear();
+    encoded_image->bitstreams.clear();
+    encoded_image->bitstreams.reserve(ppf.frames.size());
+    for (const auto& frame : ppf.frames) {
+      JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+      encoded_image->bitstreams.emplace_back();
+      JXL_RETURN_IF_ERROR(EncodeImageEXR(frame.color, ppf.info,
+                                         ppf.color_encoding, pool,
+                                         &encoded_image->bitstreams.back()));
+    }
+    return true;
+  }
+};
+
+}  // namespace
+#endif
+
+std::unique_ptr<Encoder> GetEXREncoder() {
+#if JPEGXL_ENABLE_EXR
+  return jxl::make_unique<EXREncoder>();
+#else
+  return nullptr;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/exr.h b/third-party/libjxl/libjxl/lib/extras/enc/exr.h
new file mode 100644
index 0000000000..1baaa0272f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/exr.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_EXR_H_
+#define LIB_EXTRAS_ENC_EXR_H_
+
+// Encodes OpenEXR images in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetEXREncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_EXR_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc
new file mode 100644
index 0000000000..c0b0798c52
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc
@@ -0,0 +1,526 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jpegli.h"
+
+#include <jxl/codestream_header.h>
+#include <setjmp.h>
+#include <stdint.h>
+
+#include "lib/extras/enc/encode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+void MyErrorExit(j_common_ptr cinfo) {
+  jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+  (*cinfo->err->output_message)(cinfo);
+  jpegli_destroy_compress(reinterpret_cast<j_compress_ptr>(cinfo));
+  longjmp(*env, 1);
+}
+
+Status VerifyInput(const PackedPixelFile& ppf) {
+  const JxlBasicInfo& info = ppf.info;
+  JXL_RETURN_IF_ERROR(Encoder::VerifyBasicInfo(info));
+  if (ppf.frames.size() != 1) {
+    return JXL_FAILURE("JPEG input must have exactly one frame.");
+  }
+  const PackedImage& image = ppf.frames[0].color;
+  JXL_RETURN_IF_ERROR(Encoder::VerifyImageSize(image, info));
+  if (image.format.data_type == JXL_TYPE_FLOAT16) {
+    return JXL_FAILURE("FLOAT16 input is not supported.");
+  }
+  JXL_RETURN_IF_ERROR(Encoder::VerifyBitDepth(image.format.data_type,
+                                              info.bits_per_sample,
+                                              info.exponent_bits_per_sample));
+  if ((image.format.data_type == JXL_TYPE_UINT8 && info.bits_per_sample != 8) ||
+      (image.format.data_type == JXL_TYPE_UINT16 &&
+       info.bits_per_sample != 16)) {
+    return JXL_FAILURE("Only full bit depth unsigned types are supported.");
+  }
+  return true;
+}
+
+Status GetColorEncoding(const PackedPixelFile& ppf, const JxlCmsInterface* cms,
+                        ColorEncoding* color_encoding) {
+  if (!ppf.icc.empty()) {
+    PaddedBytes icc;
+    icc.assign(ppf.icc.data(), ppf.icc.data() + ppf.icc.size());
+    JXL_RETURN_IF_ERROR(color_encoding->SetICC(std::move(icc), cms));
+  } else {
+    JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+        ppf.color_encoding, color_encoding));
+  }
+  if (color_encoding->ICC().empty()) {
+    return JXL_FAILURE("Invalid color encoding.");
+  }
+  return true;
+}
+
+bool HasICCProfile(const std::vector<uint8_t>& app_data) {
+  size_t pos = 0;
+  while (pos < app_data.size()) {
+    if (pos + 16 > app_data.size()) return false;
+    uint8_t marker = app_data[pos + 1];
+    size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2;
+    if (marker == 0xe2 && memcmp(&app_data[pos + 4], "ICC_PROFILE", 12) == 0) {
+      return true;
+    }
+    pos += marker_len;
+  }
+  return false;
+}
+
+Status WriteAppData(j_compress_ptr cinfo,
+                    const std::vector<uint8_t>& app_data) {
+  size_t pos = 0;
+  while (pos < app_data.size()) {
+    if (pos + 4 > app_data.size()) {
+      return JXL_FAILURE("Incomplete APP header.");
+    }
+    uint8_t marker = app_data[pos + 1];
+    size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2;
+    if (app_data[pos] != 0xff || marker < 0xe0 || marker > 0xef) {
+      return JXL_FAILURE("Invalid APP marker %02x %02x", app_data[pos], marker);
+    }
+    if (marker_len <= 4) {
+      return JXL_FAILURE("Invalid APP marker length.");
+    }
+    if (pos + marker_len > app_data.size()) {
+      return JXL_FAILURE("Incomplete APP data");
+    }
+    jpegli_write_marker(cinfo, marker, &app_data[pos + 4], marker_len - 4);
+    pos += marker_len;
+  }
+  return true;
+}
+
+static constexpr int kICCMarker = 0xe2;
+constexpr unsigned char kICCSignature[12] = {
+    0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+static constexpr uint8_t kUnknownTf = 2;
+static constexpr unsigned char kCICPTagSignature[4] = {0x63, 0x69, 0x63, 0x70};
+static constexpr size_t kCICPTagSize = 12;
+
+bool FindCICPTag(const uint8_t* icc_data, size_t len, bool is_first_chunk,
+                 size_t* cicp_offset, size_t* cicp_length, uint8_t* cicp_tag,
+                 size_t* cicp_pos) {
+  if (is_first_chunk) {
+    // Look up the offset of the CICP tag from the first chunk of ICC data.
+    if (len < 132) {
+      return false;
+    }
+    uint32_t tag_count = LoadBE32(&icc_data[128]);
+    if (len < 132 + 12 * tag_count) {
+      return false;
+    }
+    for (uint32_t i = 0; i < tag_count; ++i) {
+      if (memcmp(&icc_data[132 + 12 * i], kCICPTagSignature, 4) == 0) {
+        *cicp_offset = LoadBE32(&icc_data[136 + 12 * i]);
+        *cicp_length = LoadBE32(&icc_data[140 + 12 * i]);
+      }
+    }
+    if (*cicp_length < kCICPTagSize) {
+      return false;
+    }
+  }
+  if (*cicp_offset < len) {
+    size_t n_bytes = std::min(len - *cicp_offset, kCICPTagSize - *cicp_pos);
+    memcpy(&cicp_tag[*cicp_pos], &icc_data[*cicp_offset], n_bytes);
+    *cicp_pos += n_bytes;
+    *cicp_offset = 0;
+  } else {
+    *cicp_offset -= len;
+  }
+  return true;
+}
+
+uint8_t LookupCICPTransferFunctionFromAppData(const uint8_t* app_data,
+                                              size_t len) {
+  size_t last_index = 0;
+  size_t cicp_offset = 0;
+  size_t cicp_length = 0;
+  uint8_t cicp_tag[kCICPTagSize] = {};
+  size_t cicp_pos = 0;
+  size_t pos = 0;
+  while (pos < len) {
+    const uint8_t* marker = &app_data[pos];
+    if (pos + 4 > len) {
+      return kUnknownTf;
+    }
+    size_t marker_size = (marker[2] << 8) + marker[3] + 2;
+    if (pos + marker_size > len) {
+      return kUnknownTf;
+    }
+    if (marker_size < 18 || marker[0] != 0xff || marker[1] != kICCMarker ||
+        memcmp(&marker[4], kICCSignature, 12) != 0) {
+      pos += marker_size;
+      continue;
+    }
+    uint8_t index = marker[16];
+    uint8_t total = marker[17];
+    const uint8_t* payload = marker + 18;
+    const size_t payload_size = marker_size - 18;
+    if (index != last_index + 1 || index > total) {
+      return kUnknownTf;
+    }
+    if (!FindCICPTag(payload, payload_size, last_index == 0, &cicp_offset,
+                     &cicp_length, &cicp_tag[0], &cicp_pos)) {
+      return kUnknownTf;
+    }
+    if (cicp_pos == kCICPTagSize) {
+      break;
+    }
+    ++last_index;
+  }
+  if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) {
+    return cicp_tag[9];
+  }
+  return kUnknownTf;
+}
+
+uint8_t LookupCICPTransferFunctionFromICCProfile(const uint8_t* icc_data,
+                                                 size_t len) {
+  size_t cicp_offset = 0;
+  size_t cicp_length = 0;
+  uint8_t cicp_tag[kCICPTagSize] = {};
+  size_t cicp_pos = 0;
+  if (!FindCICPTag(icc_data, len, true, &cicp_offset, &cicp_length,
+                   &cicp_tag[0], &cicp_pos)) {
+    return kUnknownTf;
+  }
+  if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) {
+    return cicp_tag[9];
+  }
+  return kUnknownTf;
+}
+
+JpegliDataType ConvertDataType(JxlDataType type) {
+  switch (type) {
+    case JXL_TYPE_UINT8:
+      return JPEGLI_TYPE_UINT8;
+    case JXL_TYPE_UINT16:
+      return JPEGLI_TYPE_UINT16;
+    case JXL_TYPE_FLOAT:
+      return JPEGLI_TYPE_FLOAT;
+    default:
+      return JPEGLI_TYPE_UINT8;
+  }
+}
+
+JpegliEndianness ConvertEndianness(JxlEndianness endianness) {
+  switch (endianness) {
+    case JXL_NATIVE_ENDIAN:
+      return JPEGLI_NATIVE_ENDIAN;
+    case JXL_LITTLE_ENDIAN:
+      return JPEGLI_LITTLE_ENDIAN;
+    case JXL_BIG_ENDIAN:
+      return JPEGLI_BIG_ENDIAN;
+    default:
+      return JPEGLI_NATIVE_ENDIAN;
+  }
+}
+
+void ToFloatRow(const uint8_t* row_in, JxlPixelFormat format, size_t len,
+                float* row_out) {
+  bool is_little_endian =
+      (format.endianness == JXL_LITTLE_ENDIAN ||
+       (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()));
+  static constexpr double kMul8 = 1.0 / 255.0;
+  static constexpr double kMul16 = 1.0 / 65535.0;
+  if (format.data_type == JXL_TYPE_UINT8) {
+    for (size_t x = 0; x < len; ++x) {
+      row_out[x] = row_in[x] * kMul8;
+    }
+  } else if (format.data_type == JXL_TYPE_UINT16 && is_little_endian) {
+    for (size_t x = 0; x < len; ++x) {
+      row_out[x] = LoadLE16(&row_in[2 * x]) * kMul16;
+    }
+  } else if (format.data_type == JXL_TYPE_UINT16 && !is_little_endian) {
+    for (size_t x = 0; x < len; ++x) {
+      row_out[x] = LoadBE16(&row_in[2 * x]) * kMul16;
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT && is_little_endian) {
+    for (size_t x = 0; x < len; ++x) {
+      row_out[x] = LoadLEFloat(&row_in[4 * x]);
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT && !is_little_endian) {
+    for (size_t x = 0; x < len; ++x) {
+      row_out[x] = LoadBEFloat(&row_in[4 * x]);
+    }
+  }
+}
+
+Status EncodeJpegToTargetSize(const PackedPixelFile& ppf,
+                              const JpegSettings& jpeg_settings,
+                              size_t target_size, ThreadPool* pool,
+                              std::vector<uint8_t>* output) {
+  output->clear();
+  size_t best_error = std::numeric_limits<size_t>::max();
+  float distance0 = -1.0f;
+  float distance1 = -1.0f;
+  float distance = 1.0f;
+  for (int step = 0; step < 15; ++step) {
+    JpegSettings settings = jpeg_settings;
+    settings.libjpeg_quality = 0;
+    settings.distance = distance;
+    settings.target_size = 0;
+    std::vector<uint8_t> compressed;
+    JXL_RETURN_IF_ERROR(EncodeJpeg(ppf, settings, pool, &compressed));
+    size_t size = compressed.size();
+    // prefer being under the target size to being over it
+    size_t error = size < target_size
+                       ? target_size - size
+                       : static_cast<size_t>(1.2f * (size - target_size));
+    if (error < best_error) {
+      best_error = error;
+      std::swap(*output, compressed);
+    }
+    float rel_error = size * 1.0f / target_size;
+    if (std::abs(rel_error - 1.0f) < 0.002f) {
+      break;
+    }
+    if (size < target_size) {
+      distance1 = distance;
+    } else {
+      distance0 = distance;
+    }
+    if (distance1 == -1) {
+      distance *= std::pow(rel_error, 1.5) * 1.05;
+    } else if (distance0 == -1) {
+      distance *= std::pow(rel_error, 1.5) * 0.95;
+    } else {
+      distance = 0.5 * (distance0 + distance1);
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed) {
+  if (jpeg_settings.libjpeg_quality > 0) {
+    auto encoder = Encoder::FromExtension(".jpg");
+    encoder->SetOption("q", std::to_string(jpeg_settings.libjpeg_quality));
+    if (!jpeg_settings.libjpeg_chroma_subsampling.empty()) {
+      encoder->SetOption("chroma_subsampling",
+                         jpeg_settings.libjpeg_chroma_subsampling);
+    }
+    EncodedImage encoded;
+    JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded, pool));
+    size_t target_size = encoded.bitstreams[0].size();
+    return EncodeJpegToTargetSize(ppf, jpeg_settings, target_size, pool,
+                                  compressed);
+  }
+  if (jpeg_settings.target_size > 0) {
+    return EncodeJpegToTargetSize(ppf, jpeg_settings, jpeg_settings.target_size,
+                                  pool, compressed);
+  }
+  JXL_RETURN_IF_ERROR(VerifyInput(ppf));
+
+  const JxlCmsInterface& cms = GetJxlCms();
+
+  ColorEncoding color_encoding;
+  JXL_RETURN_IF_ERROR(GetColorEncoding(ppf, &cms, &color_encoding));
+
+  ColorSpaceTransform c_transform(cms);
+  ColorEncoding xyb_encoding;
+  if (jpeg_settings.xyb) {
+    if (ppf.info.num_color_channels != 3) {
+      return JXL_FAILURE("Only RGB input is supported in XYB mode.");
+    }
+    if (HasICCProfile(jpeg_settings.app_data)) {
+      return JXL_FAILURE("APP data ICC profile is not supported in XYB mode.");
+    }
+    const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(false);
+    JXL_RETURN_IF_ERROR(
+        c_transform.Init(color_encoding, c_desired, 255.0f, ppf.info.xsize, 1));
+    xyb_encoding.SetColorSpace(jxl::ColorSpace::kXYB);
+    xyb_encoding.rendering_intent = jxl::RenderingIntent::kPerceptual;
+    JXL_RETURN_IF_ERROR(xyb_encoding.CreateICC());
+  }
+  const ColorEncoding& output_encoding =
+      jpeg_settings.xyb ? xyb_encoding : color_encoding;
+
+  // We need to declare all the non-trivial destructor local variables
+  // before the call to setjmp().
+  std::vector<uint8_t> pixels;
+  unsigned char* output_buffer = nullptr;
+  unsigned long output_size = 0;
+  std::vector<uint8_t> row_bytes;
+  size_t rowlen = RoundUpTo(ppf.info.xsize, VectorSize());
+  hwy::AlignedFreeUniquePtr<float[]> xyb_tmp =
+      hwy::AllocateAligned<float>(6 * rowlen);
+  hwy::AlignedFreeUniquePtr<float[]> premul_absorb =
+      hwy::AllocateAligned<float>(VectorSize() * 12);
+  ComputePremulAbsorb(255.0f, premul_absorb.get());
+
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpegli_std_error(&jerr);
+    jerr.error_exit = &MyErrorExit;
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = static_cast<void*>(&env);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &output_buffer, &output_size);
+    const JxlBasicInfo& info = ppf.info;
+    cinfo.image_width = info.xsize;
+    cinfo.image_height = info.ysize;
+    cinfo.input_components = info.num_color_channels;
+    cinfo.in_color_space =
+        cinfo.input_components == 1 ? JCS_GRAYSCALE : JCS_RGB;
+    if (jpeg_settings.xyb) {
+      jpegli_set_xyb_mode(&cinfo);
+    } else if (jpeg_settings.use_std_quant_tables) {
+      jpegli_use_standard_quant_tables(&cinfo);
+    }
+    uint8_t cicp_tf = kUnknownTf;
+    if (!jpeg_settings.app_data.empty()) {
+      cicp_tf = LookupCICPTransferFunctionFromAppData(
+          jpeg_settings.app_data.data(), jpeg_settings.app_data.size());
+    } else if (!output_encoding.IsSRGB()) {
+      cicp_tf = LookupCICPTransferFunctionFromICCProfile(
+          output_encoding.ICC().data(), output_encoding.ICC().size());
+    }
+    jpegli_set_cicp_transfer_function(&cinfo, cicp_tf);
+    jpegli_set_defaults(&cinfo);
+    if (!jpeg_settings.chroma_subsampling.empty()) {
+      if (jpeg_settings.chroma_subsampling == "444") {
+        cinfo.comp_info[0].h_samp_factor = 1;
+        cinfo.comp_info[0].v_samp_factor = 1;
+      } else if (jpeg_settings.chroma_subsampling == "440") {
+        cinfo.comp_info[0].h_samp_factor = 1;
+        cinfo.comp_info[0].v_samp_factor = 2;
+      } else if (jpeg_settings.chroma_subsampling == "422") {
+        cinfo.comp_info[0].h_samp_factor = 2;
+        cinfo.comp_info[0].v_samp_factor = 1;
+      } else if (jpeg_settings.chroma_subsampling == "420") {
+        cinfo.comp_info[0].h_samp_factor = 2;
+        cinfo.comp_info[0].v_samp_factor = 2;
+      } else {
+        return false;
+      }
+      for (int i = 1; i < cinfo.num_components; ++i) {
+        cinfo.comp_info[i].h_samp_factor = 1;
+        cinfo.comp_info[i].v_samp_factor = 1;
+      }
+    }
+    jpegli_enable_adaptive_quantization(
+        &cinfo, jpeg_settings.use_adaptive_quantization);
+    if (jpeg_settings.psnr_target > 0.0) {
+      jpegli_set_psnr(&cinfo, jpeg_settings.psnr_target,
+                      jpeg_settings.search_tolerance,
+                      jpeg_settings.min_distance, jpeg_settings.max_distance);
+    } else if (jpeg_settings.quality > 0.0) {
+      float distance = jpegli_quality_to_distance(jpeg_settings.quality);
+      jpegli_set_distance(&cinfo, distance, TRUE);
+    } else {
+      jpegli_set_distance(&cinfo, jpeg_settings.distance, TRUE);
+    }
+    jpegli_set_progressive_level(&cinfo, jpeg_settings.progressive_level);
+    cinfo.optimize_coding = jpeg_settings.optimize_coding;
+    if (!jpeg_settings.app_data.empty()) {
+      // Make sure jpegli_start_compress() does not write any APP markers.
+      cinfo.write_JFIF_header = false;
+      cinfo.write_Adobe_marker = false;
+    }
+    const PackedImage& image = ppf.frames[0].color;
+    if (jpeg_settings.xyb) {
+      jpegli_set_input_format(&cinfo, JPEGLI_TYPE_FLOAT, JPEGLI_NATIVE_ENDIAN);
+    } else {
+      jpegli_set_input_format(&cinfo, ConvertDataType(image.format.data_type),
+                              ConvertEndianness(image.format.endianness));
+    }
+    jpegli_start_compress(&cinfo, TRUE);
+    if (!jpeg_settings.app_data.empty()) {
+      JXL_RETURN_IF_ERROR(WriteAppData(&cinfo, jpeg_settings.app_data));
+    }
+    if ((jpeg_settings.app_data.empty() && !output_encoding.IsSRGB()) ||
+        jpeg_settings.xyb) {
+      jpegli_write_icc_profile(&cinfo, output_encoding.ICC().data(),
+                               output_encoding.ICC().size());
+    }
+    const uint8_t* pixels = reinterpret_cast<const uint8_t*>(image.pixels());
+    if (jpeg_settings.xyb) {
+      float* src_buf = c_transform.BufSrc(0);
+      float* dst_buf = c_transform.BufDst(0);
+      for (size_t y = 0; y < image.ysize; ++y) {
+        // convert to float
+        ToFloatRow(&pixels[y * image.stride], image.format, 3 * image.xsize,
+                   src_buf);
+        // convert to linear srgb
+        if (!c_transform.Run(0, src_buf, dst_buf)) {
+          return false;
+        }
+        // deinterleave channels
+        float* row0 = &xyb_tmp[0];
+        float* row1 = &xyb_tmp[rowlen];
+        float* row2 = &xyb_tmp[2 * rowlen];
+        for (size_t x = 0; x < image.xsize; ++x) {
+          row0[x] = dst_buf[3 * x + 0];
+          row1[x] = dst_buf[3 * x + 1];
+          row2[x] = dst_buf[3 * x + 2];
+        }
+        // convert to xyb
+        LinearRGBRowToXYB(row0, row1, row2, premul_absorb.get(), image.xsize);
+        // scale xyb
+        ScaleXYBRow(row0, row1, row2, image.xsize);
+        // interleave channels
+        float* row_out = &xyb_tmp[3 * rowlen];
+        for (size_t x = 0; x < image.xsize; ++x) {
+          row_out[3 * x + 0] = row0[x];
+          row_out[3 * x + 1] = row1[x];
+          row_out[3 * x + 2] = row2[x];
+        }
+        // feed to jpegli as native endian floats
+        JSAMPROW row[] = {reinterpret_cast<uint8_t*>(row_out)};
+        jpegli_write_scanlines(&cinfo, row, 1);
+      }
+    } else {
+      row_bytes.resize(image.stride);
+      if (cinfo.num_components == (int)image.format.num_channels) {
+        for (size_t y = 0; y < info.ysize; ++y) {
+          memcpy(&row_bytes[0], pixels + y * image.stride, image.stride);
+          JSAMPROW row[] = {row_bytes.data()};
+          jpegli_write_scanlines(&cinfo, row, 1);
+        }
+      } else {
+        for (size_t y = 0; y < info.ysize; ++y) {
+          int bytes_per_channel =
+              PackedImage::BitsPerChannel(image.format.data_type) / 8;
+          int bytes_per_pixel = cinfo.num_components * bytes_per_channel;
+          for (size_t x = 0; x < info.xsize; ++x) {
+            memcpy(&row_bytes[x * bytes_per_pixel],
+                   &pixels[y * image.stride + x * image.pixel_stride()],
+                   bytes_per_pixel);
+          }
+          JSAMPROW row[] = {row_bytes.data()};
+          jpegli_write_scanlines(&cinfo, row, 1);
+        }
+      }
+    }
+    jpegli_finish_compress(&cinfo);
+    compressed->resize(output_size);
+    std::copy_n(output_buffer, output_size, compressed->data());
+    return true;
+  };
+  bool success = try_catch_block();
+  jpegli_destroy_compress(&cinfo);
+  if (output_buffer) free(output_buffer);
+  return success;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h
new file mode 100644
index 0000000000..9538b2e3fc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JPEGLI_H_
+#define LIB_EXTRAS_ENC_JPEGLI_H_
+
+// Encodes JPG pixels and metadata in memory using the libjpegli library.
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct JpegSettings {
+  bool xyb = false;
+  size_t target_size = 0;
+  float quality = 0.0f;
+  float distance = 1.f;
+  bool use_adaptive_quantization = true;
+  bool use_std_quant_tables = false;
+  int progressive_level = 2;
+  bool optimize_coding = true;
+  std::string chroma_subsampling;
+  int libjpeg_quality = 0;
+  std::string libjpeg_chroma_subsampling;
+  // Parameters for selecting distance based on PSNR target.
+  float psnr_target = 0.0f;
+  float search_tolerance = 0.01;
+  float min_distance = 0.1f;
+  float max_distance = 25.0f;
+  // If not empty, must contain concatenated APP marker segments. In this case,
+  // these and only these APP marker segments will be written to the JPEG
+  // output. In xyb mode app_data must not contain an ICC profile, in this
+  // case an additional APP2 ICC profile for the XYB colorspace will be emitted.
+  std::vector<uint8_t> app_data;
+};
+
+Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_JPEGLI_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc b/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc
new file mode 100644
index 0000000000..f1355bbcb7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc
@@ -0,0 +1,630 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jpg.h"
+
+#if JPEGXL_ENABLE_JPEG
+#include <jpeglib.h>
+#include <setjmp.h>
+#endif
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <fstream>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/exif.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+#if JPEGXL_ENABLE_SJPEG
+#include "sjpeg.h"
+#include "sjpegi.h"
+#endif
+
+namespace jxl {
+namespace extras {
+
+#if JPEGXL_ENABLE_JPEG
+namespace {
+
+constexpr unsigned char kICCSignature[12] = {
+    0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+constexpr size_t kMaxBytesInMarker = 65533;
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+
+enum class JpegEncoder {
+  kLibJpeg,
+  kSJpeg,
+};
+
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+
+// Popular jpeg scan scripts
+// The fields of the individual scans are:
+// comps_in_scan, component_index[], Ss, Se, Ah, Al
+static constexpr jpeg_scan_info kScanScript1[] = {
+    {1, {0}, 0, 0, 0, 0},   //
+    {1, {1}, 0, 0, 0, 0},   //
+    {1, {2}, 0, 0, 0, 0},   //
+    {1, {0}, 1, 8, 0, 0},   //
+    {1, {0}, 9, 63, 0, 0},  //
+    {1, {1}, 1, 63, 0, 0},  //
+    {1, {2}, 1, 63, 0, 0},  //
+};
+static constexpr size_t kNumScans1 = ARRAY_SIZE(kScanScript1);
+
+static constexpr jpeg_scan_info kScanScript2[] = {
+    {1, {0}, 0, 0, 0, 0},   //
+    {1, {1}, 0, 0, 0, 0},   //
+    {1, {2}, 0, 0, 0, 0},   //
+    {1, {0}, 1, 2, 0, 1},   //
+    {1, {0}, 3, 63, 0, 1},  //
+    {1, {0}, 1, 63, 1, 0},  //
+    {1, {1}, 1, 63, 0, 0},  //
+    {1, {2}, 1, 63, 0, 0},  //
+};
+static constexpr size_t kNumScans2 = ARRAY_SIZE(kScanScript2);
+
+static constexpr jpeg_scan_info kScanScript3[] = {
+    {1, {0}, 0, 0, 0, 0},   //
+    {1, {1}, 0, 0, 0, 0},   //
+    {1, {2}, 0, 0, 0, 0},   //
+    {1, {0}, 1, 63, 0, 2},  //
+    {1, {0}, 1, 63, 2, 1},  //
+    {1, {0}, 1, 63, 1, 0},  //
+    {1, {1}, 1, 63, 0, 0},  //
+    {1, {2}, 1, 63, 0, 0},  //
+};
+static constexpr size_t kNumScans3 = ARRAY_SIZE(kScanScript3);
+
+static constexpr jpeg_scan_info kScanScript4[] = {
+    {3, {0, 1, 2}, 0, 0, 0, 1},  //
+    {1, {0}, 1, 5, 0, 2},        //
+    {1, {2}, 1, 63, 0, 1},       //
+    {1, {1}, 1, 63, 0, 1},       //
+    {1, {0}, 6, 63, 0, 2},       //
+    {1, {0}, 1, 63, 2, 1},       //
+    {3, {0, 1, 2}, 0, 0, 1, 0},  //
+    {1, {2}, 1, 63, 1, 0},       //
+    {1, {1}, 1, 63, 1, 0},       //
+    {1, {0}, 1, 63, 1, 0},       //
+};
+static constexpr size_t kNumScans4 = ARRAY_SIZE(kScanScript4);
+
+static constexpr jpeg_scan_info kScanScript5[] = {
+    {3, {0, 1, 2}, 0, 0, 0, 1},  //
+    {1, {0}, 1, 5, 0, 2},        //
+    {1, {1}, 1, 5, 0, 2},        //
+    {1, {2}, 1, 5, 0, 2},        //
+    {1, {1}, 6, 63, 0, 2},       //
+    {1, {2}, 6, 63, 0, 2},       //
+    {1, {0}, 6, 63, 0, 2},       //
+    {1, {0}, 1, 63, 2, 1},       //
+    {1, {1}, 1, 63, 2, 1},       //
+    {1, {2}, 1, 63, 2, 1},       //
+    {3, {0, 1, 2}, 0, 0, 1, 0},  //
+    {1, {0}, 1, 63, 1, 0},       //
+    {1, {1}, 1, 63, 1, 0},       //
+    {1, {2}, 1, 63, 1, 0},       //
+};
+static constexpr size_t kNumScans5 = ARRAY_SIZE(kScanScript5);
+
+// default progressive mode of jpegli
+static constexpr jpeg_scan_info kScanScript6[] = {
+    {3, {0, 1, 2}, 0, 0, 0, 0},  //
+    {1, {0}, 1, 2, 0, 0},        //
+    {1, {1}, 1, 2, 0, 0},        //
+    {1, {2}, 1, 2, 0, 0},        //
+    {1, {0}, 3, 63, 0, 2},       //
+    {1, {1}, 3, 63, 0, 2},       //
+    {1, {2}, 3, 63, 0, 2},       //
+    {1, {0}, 3, 63, 2, 1},       //
+    {1, {1}, 3, 63, 2, 1},       //
+    {1, {2}, 3, 63, 2, 1},       //
+    {1, {0}, 3, 63, 1, 0},       //
+    {1, {1}, 3, 63, 1, 0},       //
+    {1, {2}, 3, 63, 1, 0},       //
+};
+static constexpr size_t kNumScans6 = ARRAY_SIZE(kScanScript6);
+
+// Adapt RGB scan info to grayscale jpegs.
+void FilterScanComponents(const jpeg_compress_struct* cinfo,
+                          jpeg_scan_info* si) {
+  const int all_comps_in_scan = si->comps_in_scan;
+  si->comps_in_scan = 0;
+  for (int j = 0; j < all_comps_in_scan; ++j) {
+    const int component = si->component_index[j];
+    if (component < cinfo->input_components) {
+      si->component_index[si->comps_in_scan++] = component;
+    }
+  }
+}
+
+Status SetJpegProgression(int progressive_id,
+                          std::vector<jpeg_scan_info>* scan_infos,
+                          jpeg_compress_struct* cinfo) {
+  if (progressive_id < 0) {
+    return true;
+  }
+  if (progressive_id == 0) {
+    jpeg_simple_progression(cinfo);
+    return true;
+  }
+  constexpr const jpeg_scan_info* kScanScripts[] = {kScanScript1, kScanScript2,
+                                                    kScanScript3, kScanScript4,
+                                                    kScanScript5, kScanScript6};
+  constexpr size_t kNumScans[] = {kNumScans1, kNumScans2, kNumScans3,
+                                  kNumScans4, kNumScans5, kNumScans6};
+  if (progressive_id > static_cast<int>(ARRAY_SIZE(kNumScans))) {
+    return JXL_FAILURE("Unknown jpeg scan script id %d", progressive_id);
+  }
+  const jpeg_scan_info* scan_script = kScanScripts[progressive_id - 1];
+  const size_t num_scans = kNumScans[progressive_id - 1];
+  // filter scan script for number of components
+  for (size_t i = 0; i < num_scans; ++i) {
+    jpeg_scan_info scan_info = scan_script[i];
+    FilterScanComponents(cinfo, &scan_info);
+    if (scan_info.comps_in_scan > 0) {
+      scan_infos->emplace_back(std::move(scan_info));
+    }
+  }
+  cinfo->scan_info = scan_infos->data();
+  cinfo->num_scans = scan_infos->size();
+  return true;
+}
+
+bool IsSRGBEncoding(const JxlColorEncoding& c) {
+  return ((c.color_space == JXL_COLOR_SPACE_RGB ||
+           c.color_space == JXL_COLOR_SPACE_GRAY) &&
+          c.primaries == JXL_PRIMARIES_SRGB &&
+          c.white_point == JXL_WHITE_POINT_D65 &&
+          c.transfer_function == JXL_TRANSFER_FUNCTION_SRGB);
+}
+
+void WriteICCProfile(jpeg_compress_struct* const cinfo,
+                     const std::vector<uint8_t>& icc) {
+  constexpr size_t kMaxIccBytesInMarker =
+      kMaxBytesInMarker - sizeof kICCSignature - 2;
+  const int num_markers =
+      static_cast<int>(DivCeil(icc.size(), kMaxIccBytesInMarker));
+  size_t begin = 0;
+  for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+    const size_t length = std::min(kMaxIccBytesInMarker, icc.size() - begin);
+    jpeg_write_m_header(
+        cinfo, kICCMarker,
+        static_cast<unsigned int>(length + sizeof kICCSignature + 2));
+    for (const unsigned char c : kICCSignature) {
+      jpeg_write_m_byte(cinfo, c);
+    }
+    jpeg_write_m_byte(cinfo, current_marker + 1);
+    jpeg_write_m_byte(cinfo, num_markers);
+    for (size_t i = 0; i < length; ++i) {
+      jpeg_write_m_byte(cinfo, icc[begin]);
+      ++begin;
+    }
+  }
+}
+void WriteExif(jpeg_compress_struct* const cinfo,
+               const std::vector<uint8_t>& exif) {
+  jpeg_write_m_header(
+      cinfo, kExifMarker,
+      static_cast<unsigned int>(exif.size() + sizeof kExifSignature));
+  for (const unsigned char c : kExifSignature) {
+    jpeg_write_m_byte(cinfo, c);
+  }
+  for (size_t i = 0; i < exif.size(); ++i) {
+    jpeg_write_m_byte(cinfo, exif[i]);
+  }
+}
+
+Status SetChromaSubsampling(const std::string& subsampling,
+                            jpeg_compress_struct* const cinfo) {
+  const std::pair<const char*,
+                  std::pair<std::array<uint8_t, 3>, std::array<uint8_t, 3>>>
+      options[] = {{"444", {{{1, 1, 1}}, {{1, 1, 1}}}},
+                   {"420", {{{2, 1, 1}}, {{2, 1, 1}}}},
+                   {"422", {{{2, 1, 1}}, {{1, 1, 1}}}},
+                   {"440", {{{1, 1, 1}}, {{2, 1, 1}}}}};
+  for (const auto& option : options) {
+    if (subsampling == option.first) {
+      for (size_t i = 0; i < 3; i++) {
+        cinfo->comp_info[i].h_samp_factor = option.second.first[i];
+        cinfo->comp_info[i].v_samp_factor = option.second.second[i];
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+struct JpegParams {
+  // Common between sjpeg and libjpeg
+  int quality = 100;
+  std::string chroma_subsampling = "444";
+  // Libjpeg parameters
+  int progressive_id = -1;
+  bool optimize_coding = true;
+  bool is_xyb = false;
+  // Sjpeg parameters
+  int libjpeg_quality = 0;
+  std::string libjpeg_chroma_subsampling = "444";
+  float psnr_target = 0;
+  std::string custom_base_quant_fn;
+  float search_q_start = 65.0f;
+  float search_q_min = 1.0f;
+  float search_q_max = 100.0f;
+  int search_max_iters = 20;
+  float search_tolerance = 0.1f;
+  float search_q_precision = 0.01f;
+  float search_first_iter_slope = 3.0f;
+  bool enable_adaptive_quant = true;
+};
+
+Status EncodeWithLibJpeg(const PackedImage& image, const JxlBasicInfo& info,
+                         const std::vector<uint8_t>& icc,
+                         std::vector<uint8_t> exif, const JpegParams& params,
+                         std::vector<uint8_t>* bytes) {
+  if (BITS_IN_JSAMPLE != 8 || sizeof(JSAMPLE) != 1) {
+    return JXL_FAILURE("Only 8 bit JSAMPLE is supported.");
+  }
+  jpeg_compress_struct cinfo = {};
+  jpeg_error_mgr jerr;
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_compress(&cinfo);
+  unsigned char* buffer = nullptr;
+  unsigned long size = 0;
+  jpeg_mem_dest(&cinfo, &buffer, &size);
+  cinfo.image_width = image.xsize;
+  cinfo.image_height = image.ysize;
+  cinfo.input_components = info.num_color_channels;
+  cinfo.in_color_space = info.num_color_channels == 1 ? JCS_GRAYSCALE : JCS_RGB;
+  jpeg_set_defaults(&cinfo);
+  cinfo.optimize_coding = params.optimize_coding;
+  if (cinfo.input_components == 3) {
+    JXL_RETURN_IF_ERROR(
+        SetChromaSubsampling(params.chroma_subsampling, &cinfo));
+  }
+  if (params.is_xyb) {
+    // Tell libjpeg not to convert XYB data to YCbCr.
+    jpeg_set_colorspace(&cinfo, JCS_RGB);
+  }
+  jpeg_set_quality(&cinfo, params.quality, TRUE);
+  std::vector<jpeg_scan_info> scan_infos;
+  JXL_RETURN_IF_ERROR(
+      SetJpegProgression(params.progressive_id, &scan_infos, &cinfo));
+  jpeg_start_compress(&cinfo, TRUE);
+  if (!icc.empty()) {
+    WriteICCProfile(&cinfo, icc);
+  }
+  if (!exif.empty()) {
+    ResetExifOrientation(exif);
+    WriteExif(&cinfo, exif);
+  }
+  if (cinfo.input_components > 3 || cinfo.input_components < 0)
+    return JXL_FAILURE("invalid numbers of components");
+
+  std::vector<uint8_t> row_bytes(image.stride);
+  const uint8_t* pixels = reinterpret_cast<const uint8_t*>(image.pixels());
+  if (cinfo.num_components == (int)image.format.num_channels &&
+      image.format.data_type == JXL_TYPE_UINT8) {
+    for (size_t y = 0; y < info.ysize; ++y) {
+      memcpy(&row_bytes[0], pixels + y * image.stride, image.stride);
+      JSAMPROW row[] = {row_bytes.data()};
+      jpeg_write_scanlines(&cinfo, row, 1);
+    }
+  } else if (image.format.data_type == JXL_TYPE_UINT8) {
+    for (size_t y = 0; y < info.ysize; ++y) {
+      const uint8_t* image_row = pixels + y * image.stride;
+      for (size_t x = 0; x < info.xsize; ++x) {
+        const uint8_t* image_pixel = image_row + x * image.pixel_stride();
+        memcpy(&row_bytes[x * cinfo.num_components], image_pixel,
+               cinfo.num_components);
+      }
+      JSAMPROW row[] = {row_bytes.data()};
+      jpeg_write_scanlines(&cinfo, row, 1);
+    }
+  } else {
+    for (size_t y = 0; y < info.ysize; ++y) {
+      const uint8_t* image_row = pixels + y * image.stride;
+      for (size_t x = 0; x < info.xsize; ++x) {
+        const uint8_t* image_pixel = image_row + x * image.pixel_stride();
+        for (int c = 0; c < cinfo.num_components; ++c) {
+          uint32_t val16 = (image_pixel[2 * c] << 8) + image_pixel[2 * c + 1];
+          row_bytes[x * cinfo.num_components + c] = (val16 + 128) / 257;
+        }
+      }
+      JSAMPROW row[] = {row_bytes.data()};
+      jpeg_write_scanlines(&cinfo, row, 1);
+    }
+  }
+  jpeg_finish_compress(&cinfo);
+  jpeg_destroy_compress(&cinfo);
+  bytes->resize(size);
+  // Compressed image data is initialized by libjpeg, which we are not
+  // instrumenting with msan.
+  msan::UnpoisonMemory(buffer, size);
+  std::copy_n(buffer, size, bytes->data());
+  std::free(buffer);
+  return true;
+}
+
+#if JPEGXL_ENABLE_SJPEG
+struct MySearchHook : public sjpeg::SearchHook {
+  uint8_t base_tables[2][64];
+  float q_start;
+  float q_precision;
+  float first_iter_slope;
+  void ReadBaseTables(const std::string& fn) {
+    const uint8_t kJPEGAnnexKMatrices[2][64] = {
+        {16, 11, 10, 16, 24,  40,  51,  61,  12, 12, 14, 19, 26,  58,  60,  55,
+         14, 13, 16, 24, 40,  57,  69,  56,  14, 17, 22, 29, 51,  87,  80,  62,
+         18, 22, 37, 56, 68,  109, 103, 77,  24, 35, 55, 64, 81,  104, 113, 92,
+         49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98, 112, 100, 103, 99},
+        {17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99,
+         24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99,
+         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}};
+    memcpy(base_tables[0], kJPEGAnnexKMatrices[0], sizeof(base_tables[0]));
+    memcpy(base_tables[1], kJPEGAnnexKMatrices[1], sizeof(base_tables[1]));
+    if (!fn.empty()) {
+      std::ifstream f(fn);
+      std::string line;
+      int idx = 0;
+      while (idx < 128 && std::getline(f, line)) {
+        if (line.empty() || line[0] == '#') continue;
+        std::istringstream line_stream(line);
+        std::string token;
+        while (idx < 128 && std::getline(line_stream, token, ',')) {
+          uint8_t val = std::stoi(token);
+          base_tables[idx / 64][idx % 64] = val;
+          idx++;
+        }
+      }
+    }
+  }
+  bool Setup(const sjpeg::EncoderParam& param) override {
+    sjpeg::SearchHook::Setup(param);
+    q = q_start;
+    return true;
+  }
+  void NextMatrix(int idx, uint8_t dst[64]) override {
+    float factor = (q <= 0)       ? 5000.0f
+                   : (q < 50.0f)  ? 5000.0f / q
+                   : (q < 100.0f) ? 2 * (100.0f - q)
+                                  : 0.0f;
+    sjpeg::SetQuantMatrix(base_tables[idx], factor, dst);
+  }
+  bool Update(float result) override {
+    value = result;
+    if (fabs(value - target) < tolerance * target) {
+      return true;
+    }
+    if (value > target) {
+      qmax = q;
+    } else {
+      qmin = q;
+    }
+    if (qmin == qmax) {
+      return true;
+    }
+    const float last_q = q;
+    if (pass == 0) {
+      q += first_iter_slope *
+           (for_size ? 0.1 * std::log(target / value) : (target - value));
+      q = std::max(qmin, std::min(qmax, q));
+    } else {
+      q = (qmin + qmax) / 2.;
+    }
+    return (pass > 0 && fabs(q - last_q) < q_precision);
+  }
+  ~MySearchHook() override {}
+};
+#endif
+
+Status EncodeWithSJpeg(const PackedImage& image, const JxlBasicInfo& info,
+                       const std::vector<uint8_t>& icc,
+                       std::vector<uint8_t> exif, const JpegParams& params,
+                       std::vector<uint8_t>* bytes) {
+#if !JPEGXL_ENABLE_SJPEG
+  return JXL_FAILURE("JPEG XL was built without sjpeg support");
+#else
+  if (image.format.data_type != JXL_TYPE_UINT8) {
+    return JXL_FAILURE("Unsupported pixel data type");
+  }
+  if (info.alpha_bits > 0) {
+    return JXL_FAILURE("alpha is not supported");
+  }
+  sjpeg::EncoderParam param(params.quality);
+  if (!icc.empty()) {
+    param.iccp.assign(icc.begin(), icc.end());
+  }
+  if (!exif.empty()) {
+    ResetExifOrientation(exif);
+    param.exif.assign(exif.begin(), exif.end());
+  }
+  if (params.chroma_subsampling == "444") {
+    param.yuv_mode = SJPEG_YUV_444;
+  } else if (params.chroma_subsampling == "420") {
+    param.yuv_mode = SJPEG_YUV_420;
+  } else if (params.chroma_subsampling == "420sharp") {
+    param.yuv_mode = SJPEG_YUV_SHARP;
+  } else {
+    return JXL_FAILURE("sjpeg does not support this chroma subsampling mode");
+  }
+  param.adaptive_quantization = params.enable_adaptive_quant;
+  std::unique_ptr<MySearchHook> hook;
+  if (params.libjpeg_quality > 0) {
+    JpegParams libjpeg_params;
+    libjpeg_params.quality = params.libjpeg_quality;
+    libjpeg_params.chroma_subsampling = params.libjpeg_chroma_subsampling;
+    std::vector<uint8_t> libjpeg_bytes;
+    JXL_RETURN_IF_ERROR(EncodeWithLibJpeg(image, info, icc, exif,
+                                          libjpeg_params, &libjpeg_bytes));
+    param.target_mode = sjpeg::EncoderParam::TARGET_SIZE;
+    param.target_value = libjpeg_bytes.size();
+  }
+  if (params.psnr_target > 0) {
+    param.target_mode = sjpeg::EncoderParam::TARGET_PSNR;
+    param.target_value = params.psnr_target;
+  }
+  if (param.target_mode != sjpeg::EncoderParam::TARGET_NONE) {
+    param.passes = params.search_max_iters;
+    param.tolerance = params.search_tolerance;
+    param.qmin = params.search_q_min;
+    param.qmax = params.search_q_max;
+    hook.reset(new MySearchHook());
+    hook->ReadBaseTables(params.custom_base_quant_fn);
+    hook->q_start = params.search_q_start;
+    hook->q_precision = params.search_q_precision;
+    hook->first_iter_slope = params.search_first_iter_slope;
+    param.search_hook = hook.get();
+  }
+  size_t stride = info.xsize * 3;
+  const uint8_t* pixels = reinterpret_cast<const uint8_t*>(image.pixels());
+  std::string output;
+  JXL_RETURN_IF_ERROR(
+      sjpeg::Encode(pixels, image.xsize, image.ysize, stride, param, &output));
+  bytes->assign(
+      reinterpret_cast<const uint8_t*>(output.data()),
+      reinterpret_cast<const uint8_t*>(output.data() + output.size()));
+  return true;
+#endif
+}
+
+Status EncodeImageJPG(const PackedImage& image, const JxlBasicInfo& info,
+                      const std::vector<uint8_t>& icc,
+                      std::vector<uint8_t> exif, JpegEncoder encoder,
+                      const JpegParams& params, ThreadPool* pool,
+                      std::vector<uint8_t>* bytes) {
+  if (params.quality > 100) {
+    return JXL_FAILURE("please specify a 0-100 JPEG quality");
+  }
+
+  switch (encoder) {
+    case JpegEncoder::kLibJpeg:
+      JXL_RETURN_IF_ERROR(
+          EncodeWithLibJpeg(image, info, icc, std::move(exif), params, bytes));
+      break;
+    case JpegEncoder::kSJpeg:
+      JXL_RETURN_IF_ERROR(
+          EncodeWithSJpeg(image, info, icc, std::move(exif), params, bytes));
+      break;
+    default:
+      return JXL_FAILURE("tried to use an unknown JPEG encoder");
+  }
+
+  return true;
+}
+
+class JPEGEncoder : public Encoder {
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+        formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+                                         /*data_type=*/JXL_TYPE_UINT8,
+                                         /*endianness=*/endianness,
+                                         /*align=*/0});
+      }
+      formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+                                       /*data_type=*/JXL_TYPE_UINT16,
+                                       /*endianness=*/JXL_BIG_ENDIAN,
+                                       /*align=*/0});
+    }
+    return formats;
+  }
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool = nullptr) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    JpegEncoder jpeg_encoder = JpegEncoder::kLibJpeg;
+    JpegParams params;
+    for (const auto& it : options()) {
+      if (it.first == "q") {
+        std::istringstream is(it.second);
+        JXL_RETURN_IF_ERROR(static_cast<bool>(is >> params.quality));
+      } else if (it.first == "libjpeg_quality") {
+        std::istringstream is(it.second);
+        JXL_RETURN_IF_ERROR(static_cast<bool>(is >> params.libjpeg_quality));
+      } else if (it.first == "chroma_subsampling") {
+        params.chroma_subsampling = it.second;
+      } else if (it.first == "libjpeg_chroma_subsampling") {
+        params.libjpeg_chroma_subsampling = it.second;
+      } else if (it.first == "jpeg_encoder") {
+        if (it.second == "libjpeg") {
+          jpeg_encoder = JpegEncoder::kLibJpeg;
+        } else if (it.second == "sjpeg") {
+          jpeg_encoder = JpegEncoder::kSJpeg;
+        } else {
+          return JXL_FAILURE("unknown jpeg encoder \"%s\"", it.second.c_str());
+        }
+      } else if (it.first == "progressive") {
+        std::istringstream is(it.second);
+        JXL_RETURN_IF_ERROR(static_cast<bool>(is >> params.progressive_id));
+      } else if (it.first == "optimize" && it.second == "OFF") {
+        params.optimize_coding = false;
+      } else if (it.first == "adaptive_q" && it.second == "OFF") {
+        params.enable_adaptive_quant = false;
+      } else if (it.first == "psnr") {
+        params.psnr_target = std::stof(it.second);
+      } else if (it.first == "base_quant_fn") {
+        params.custom_base_quant_fn = it.second;
+      } else if (it.first == "search_q_start") {
+        params.search_q_start = std::stof(it.second);
+      } else if (it.first == "search_q_min") {
+        params.search_q_min = std::stof(it.second);
+      } else if (it.first == "search_q_max") {
+        params.search_q_max = std::stof(it.second);
+      } else if (it.first == "search_max_iters") {
+        params.search_max_iters = std::stoi(it.second);
+      } else if (it.first == "search_tolerance") {
+        params.search_tolerance = std::stof(it.second);
+      } else if (it.first == "search_q_precision") {
+        params.search_q_precision = std::stof(it.second);
+      } else if (it.first == "search_first_iter_slope") {
+        params.search_first_iter_slope = std::stof(it.second);
+      }
+    }
+    params.is_xyb = (ppf.color_encoding.color_space == JXL_COLOR_SPACE_XYB);
+    std::vector<uint8_t> icc;
+    if (!IsSRGBEncoding(ppf.color_encoding)) {
+      icc = ppf.icc;
+    }
+    encoded_image->bitstreams.clear();
+    encoded_image->bitstreams.reserve(ppf.frames.size());
+    for (const auto& frame : ppf.frames) {
+      JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+      encoded_image->bitstreams.emplace_back();
+      JXL_RETURN_IF_ERROR(EncodeImageJPG(
+          frame.color, ppf.info, icc, ppf.metadata.exif, jpeg_encoder, params,
+          pool, &encoded_image->bitstreams.back()));
+    }
+    return true;
+  }
+};
+
+}  // namespace
+#endif
+
+std::unique_ptr<Encoder> GetJPEGEncoder() {
+#if JPEGXL_ENABLE_JPEG
+  return jxl::make_unique<JPEGEncoder>();
+#else
+  return nullptr;
+#endif
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpg.h b/third-party/libjxl/libjxl/lib/extras/enc/jpg.h
new file mode 100644
index 0000000000..20b37cd168
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jpg.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JPG_H_
+#define LIB_EXTRAS_ENC_JPG_H_
+
+// Encodes JPG pixels and metadata in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetJPEGEncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_JPG_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc b/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc
new file mode 100644
index 0000000000..036cd13e5d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc
@@ -0,0 +1,318 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jxl.h"
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+
+#include "lib/jxl/exif.h"
+
+namespace jxl {
+namespace extras {
+
+JxlEncoderStatus SetOption(const JXLOption& opt,
+                           JxlEncoderFrameSettings* settings) {
+  return opt.is_float
+             ? JxlEncoderFrameSettingsSetFloatOption(settings, opt.id, opt.fval)
+             : JxlEncoderFrameSettingsSetOption(settings, opt.id, opt.ival);
+}
+
+bool SetFrameOptions(const std::vector<JXLOption>& options, size_t frame_index,
+                     size_t* option_idx, JxlEncoderFrameSettings* settings) {
+  while (*option_idx < options.size()) {
+    const auto& opt = options[*option_idx];
+    if (opt.frame_index > frame_index) {
+      break;
+    }
+    if (JXL_ENC_SUCCESS != SetOption(opt, settings)) {
+      fprintf(stderr, "Setting option id %d failed.\n", opt.id);
+      return false;
+    }
+    (*option_idx)++;
+  }
+  return true;
+}
+
+bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
+                    const std::vector<uint8_t>* jpeg_bytes,
+                    std::vector<uint8_t>* compressed) {
+  auto encoder = JxlEncoderMake(/*memory_manager=*/nullptr);
+  JxlEncoder* enc = encoder.get();
+
+  if (params.allow_expert_options) {
+    JxlEncoderAllowExpertOptions(enc);
+  }
+
+  if (params.runner_opaque != nullptr &&
+      JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc, params.runner,
+                                                     params.runner_opaque)) {
+    fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  auto settings = JxlEncoderFrameSettingsCreate(enc, nullptr);
+  size_t option_idx = 0;
+  if (!SetFrameOptions(params.options, 0, &option_idx, settings)) {
+    return false;
+  }
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderSetFrameDistance(settings, params.distance)) {
+    fprintf(stderr, "Setting frame distance failed.\n");
+    return false;
+  }
+  if (params.debug_image) {
+    JxlEncoderSetDebugImageCallback(settings, params.debug_image,
+                                    params.debug_image_opaque);
+  }
+  if (params.stats) {
+    JxlEncoderCollectStats(settings, params.stats);
+  }
+
+  bool use_boxes = !ppf.metadata.exif.empty() || !ppf.metadata.xmp.empty() ||
+                   !ppf.metadata.jumbf.empty() || !ppf.metadata.iptc.empty();
+  bool use_container = params.use_container || use_boxes ||
+                       (jpeg_bytes && params.jpeg_store_metadata);
+
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderUseContainer(enc, static_cast<int>(use_container))) {
+    fprintf(stderr, "JxlEncoderUseContainer failed.\n");
+    return false;
+  }
+
+  if (jpeg_bytes) {
+    if (params.jpeg_store_metadata &&
+        JXL_ENC_SUCCESS != JxlEncoderStoreJPEGMetadata(enc, JXL_TRUE)) {
+      fprintf(stderr, "Storing JPEG metadata failed.\n");
+      return false;
+    }
+    if (!params.jpeg_store_metadata && params.jpeg_strip_exif) {
+      JxlEncoderFrameSettingsSetOption(settings,
+                                       JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF, 0);
+    }
+    if (!params.jpeg_store_metadata && params.jpeg_strip_xmp) {
+      JxlEncoderFrameSettingsSetOption(settings,
+                                       JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP, 0);
+    }
+    if (params.jpeg_strip_jumbf) {
+      JxlEncoderFrameSettingsSetOption(
+          settings, JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF, 0);
+    }
+    if (JXL_ENC_SUCCESS != JxlEncoderAddJPEGFrame(settings, jpeg_bytes->data(),
+                                                  jpeg_bytes->size())) {
+      JxlEncoderError error = JxlEncoderGetError(enc);
+      if (error == JXL_ENC_ERR_BAD_INPUT) {
+        fprintf(stderr,
+                "Error while decoding the JPEG image. It may be corrupt (e.g. "
+                "truncated) or of an unsupported type (e.g. CMYK).\n");
+      } else if (error == JXL_ENC_ERR_JBRD) {
+        fprintf(stderr,
+                "JPEG bitstream reconstruction data could not be created. "
+                "Possibly there is too much tail data.\n"
+                "Try using --jpeg_store_metadata 0, to losslessly "
+                "recompress the JPEG image data without bitstream "
+                "reconstruction data.\n");
+      } else {
+        fprintf(stderr, "JxlEncoderAddJPEGFrame() failed.\n");
+      }
+      return false;
+    }
+  } else {
+    size_t num_alpha_channels = 0;  // Adjusted below.
+    JxlBasicInfo basic_info = ppf.info;
+    basic_info.xsize *= params.already_downsampled;
+    basic_info.ysize *= params.already_downsampled;
+    if (basic_info.alpha_bits > 0) num_alpha_channels = 1;
+    if (params.intensity_target > 0) {
+      basic_info.intensity_target = params.intensity_target;
+    }
+    basic_info.num_extra_channels =
+        std::max<uint32_t>(num_alpha_channels, ppf.info.num_extra_channels);
+    basic_info.num_color_channels = ppf.info.num_color_channels;
+    const bool lossless = params.distance == 0;
+    basic_info.uses_original_profile = lossless;
+    if (params.override_bitdepth != 0) {
+      basic_info.bits_per_sample = params.override_bitdepth;
+      basic_info.exponent_bits_per_sample =
+          params.override_bitdepth == 32 ? 8 : 0;
+    }
+    if (JXL_ENC_SUCCESS !=
+        JxlEncoderSetCodestreamLevel(enc, params.codestream_level)) {
+      fprintf(stderr, "Setting --codestream_level failed.\n");
+      return false;
+    }
+    if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc, &basic_info)) {
+      fprintf(stderr, "JxlEncoderSetBasicInfo() failed.\n");
+      return false;
+    }
+    if (JXL_ENC_SUCCESS !=
+        JxlEncoderSetUpsamplingMode(enc, params.already_downsampled,
+                                    params.upsampling_mode)) {
+      fprintf(stderr, "JxlEncoderSetUpsamplingMode() failed.\n");
+      return false;
+    }
+    if (JXL_ENC_SUCCESS !=
+        JxlEncoderSetFrameBitDepth(settings, &params.input_bitdepth)) {
+      fprintf(stderr, "JxlEncoderSetFrameBitDepth() failed.\n");
+      return false;
+    }
+    if (num_alpha_channels != 0 &&
+        JXL_ENC_SUCCESS != JxlEncoderSetExtraChannelDistance(
+                               settings, 0, params.alpha_distance)) {
+      fprintf(stderr, "Setting alpha distance failed.\n");
+      return false;
+    }
+    if (lossless &&
+        JXL_ENC_SUCCESS != JxlEncoderSetFrameLossless(settings, JXL_TRUE)) {
+      fprintf(stderr, "JxlEncoderSetFrameLossless() failed.\n");
+      return false;
+    }
+    if (!ppf.icc.empty()) {
+      if (JXL_ENC_SUCCESS !=
+          JxlEncoderSetICCProfile(enc, ppf.icc.data(), ppf.icc.size())) {
+        fprintf(stderr, "JxlEncoderSetICCProfile() failed.\n");
+        return false;
+      }
+    } else {
+      if (JXL_ENC_SUCCESS !=
+          JxlEncoderSetColorEncoding(enc, &ppf.color_encoding)) {
+        fprintf(stderr, "JxlEncoderSetColorEncoding() failed.\n");
+        return false;
+      }
+    }
+
+    if (use_boxes) {
+      if (JXL_ENC_SUCCESS != JxlEncoderUseBoxes(enc)) {
+        fprintf(stderr, "JxlEncoderUseBoxes() failed.\n");
+        return false;
+      }
+      // Prepend 4 zero bytes to exif for tiff header offset
+      std::vector<uint8_t> exif_with_offset;
+      bool bigendian;
+      if (IsExif(ppf.metadata.exif, &bigendian)) {
+        exif_with_offset.resize(ppf.metadata.exif.size() + 4);
+        memcpy(exif_with_offset.data() + 4, ppf.metadata.exif.data(),
+               ppf.metadata.exif.size());
+      }
+      const struct BoxInfo {
+        const char* type;
+        const std::vector<uint8_t>& bytes;
+      } boxes[] = {
+          {"Exif", exif_with_offset},
+          {"xml ", ppf.metadata.xmp},
+          {"jumb", ppf.metadata.jumbf},
+          {"xml ", ppf.metadata.iptc},
+      };
+      for (size_t i = 0; i < sizeof boxes / sizeof *boxes; ++i) {
+        const BoxInfo& box = boxes[i];
+        if (!box.bytes.empty() &&
+            JXL_ENC_SUCCESS != JxlEncoderAddBox(enc, box.type, box.bytes.data(),
+                                                box.bytes.size(),
+                                                params.compress_boxes)) {
+          fprintf(stderr, "JxlEncoderAddBox() failed (%s).\n", box.type);
+          return false;
+        }
+      }
+      JxlEncoderCloseBoxes(enc);
+    }
+
+    for (size_t num_frame = 0; num_frame < ppf.frames.size(); ++num_frame) {
+      const jxl::extras::PackedFrame& pframe = ppf.frames[num_frame];
+      const jxl::extras::PackedImage& pimage = pframe.color;
+      JxlPixelFormat ppixelformat = pimage.format;
+      if (JXL_ENC_SUCCESS !=
+          JxlEncoderSetFrameHeader(settings, &pframe.frame_info)) {
+        fprintf(stderr, "JxlEncoderSetFrameHeader() failed.\n");
+        return false;
+      }
+      if (!SetFrameOptions(params.options, num_frame, &option_idx, settings)) {
+        return false;
+      }
+      if (num_alpha_channels > 0) {
+        JxlExtraChannelInfo extra_channel_info;
+        JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &extra_channel_info);
+        extra_channel_info.bits_per_sample = ppf.info.alpha_bits;
+        extra_channel_info.exponent_bits_per_sample =
+            ppf.info.alpha_exponent_bits;
+        if (params.premultiply != -1) {
+          if (params.premultiply != 0 && params.premultiply != 1) {
+            fprintf(stderr, "premultiply must be one of: -1, 0, 1.\n");
+            return false;
+          }
+          extra_channel_info.alpha_premultiplied = params.premultiply;
+        }
+        if (JXL_ENC_SUCCESS !=
+            JxlEncoderSetExtraChannelInfo(enc, 0, &extra_channel_info)) {
+          fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
+          return false;
+        }
+        // We take the extra channel blend info frame_info, but don't do
+        // clamping.
+        JxlBlendInfo extra_channel_blend_info =
+            pframe.frame_info.layer_info.blend_info;
+        extra_channel_blend_info.clamp = JXL_FALSE;
+        JxlEncoderSetExtraChannelBlendInfo(settings, 0,
+                                           &extra_channel_blend_info);
+      }
+      size_t num_interleaved_alpha =
+          (ppixelformat.num_channels - ppf.info.num_color_channels);
+      // Add extra channel info for the rest of the extra channels.
+      for (size_t i = 0; i < ppf.info.num_extra_channels; ++i) {
+        if (i < ppf.extra_channels_info.size()) {
+          const auto& ec_info = ppf.extra_channels_info[i].ec_info;
+          if (JXL_ENC_SUCCESS !=
+              JxlEncoderSetExtraChannelInfo(enc, num_interleaved_alpha + i,
+                                            &ec_info)) {
+            fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
+            return false;
+          }
+        }
+      }
+      if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(settings, &ppixelformat,
+                                                     pimage.pixels(),
+                                                     pimage.pixels_size)) {
+        fprintf(stderr, "JxlEncoderAddImageFrame() failed.\n");
+        return false;
+      }
+      // Only set extra channel buffer if it is provided non-interleaved.
+      for (size_t i = 0; i < pframe.extra_channels.size(); ++i) {
+        if (JXL_ENC_SUCCESS !=
+            JxlEncoderSetExtraChannelBuffer(settings, &ppixelformat,
+                                            pframe.extra_channels[i].pixels(),
+                                            pframe.extra_channels[i].stride *
+                                                pframe.extra_channels[i].ysize,
+                                            num_interleaved_alpha + i)) {
+          fprintf(stderr, "JxlEncoderSetExtraChannelBuffer() failed.\n");
+          return false;
+        }
+      }
+    }
+  }
+  JxlEncoderCloseInput(enc);
+  // Reading compressed output
+  compressed->clear();
+  compressed->resize(4096);
+  uint8_t* next_out = compressed->data();
+  size_t avail_out = compressed->size() - (next_out - compressed->data());
+  JxlEncoderStatus result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (result == JXL_ENC_NEED_MORE_OUTPUT) {
+    result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed->data();
+      compressed->resize(compressed->size() * 2);
+      next_out = compressed->data() + offset;
+      avail_out = compressed->size() - offset;
+    }
+  }
+  compressed->resize(next_out - compressed->data());
+  if (result != JXL_ENC_SUCCESS) {
+    fprintf(stderr, "JxlEncoderProcessOutput failed.\n");
+    return false;
+  }
+  return true;
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jxl.h b/third-party/libjxl/libjxl/lib/extras/enc/jxl.h
new file mode 100644
index 0000000000..8c270c4935
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/jxl.h
@@ -0,0 +1,85 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JXL_H_
+#define LIB_EXTRAS_ENC_JXL_H_
+
+#include <jxl/encode.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+
+struct JXLOption {
+  JXLOption(JxlEncoderFrameSettingId id, int64_t val, size_t frame_index)
+      : id(id), is_float(false), ival(val), frame_index(frame_index) {}
+  JXLOption(JxlEncoderFrameSettingId id, float val, size_t frame_index)
+      : id(id), is_float(true), fval(val), frame_index(frame_index) {}
+
+  JxlEncoderFrameSettingId id;
+  bool is_float;
+  union {
+    int64_t ival;
+    float fval;
+  };
+  size_t frame_index;
+};
+
+struct JXLCompressParams {
+  std::vector<JXLOption> options;
+  // Target butteraugli distance, 0.0 means lossless.
+  float distance = 1.0f;
+  float alpha_distance = 1.0f;
+  // If set to true, forces container mode.
+  bool use_container = false;
+  // Whether to enable/disable byte-exact jpeg reconstruction for jpeg inputs.
+  bool jpeg_store_metadata = true;
+  bool jpeg_strip_exif = false;
+  bool jpeg_strip_xmp = false;
+  bool jpeg_strip_jumbf = false;
+  // Whether to create brob boxes.
+  bool compress_boxes = true;
+  // Upper bound on the intensity level present in the image in nits (zero means
+  // that the library chooses a default).
+  float intensity_target = 0;
+  int already_downsampled = 1;
+  int upsampling_mode = -1;
+  // Overrides for bitdepth, codestream level and alpha premultiply.
+  size_t override_bitdepth = 0;
+  int32_t codestream_level = -1;
+  int32_t premultiply = -1;
+  // Override input buffer interpretation.
+  JxlBitDepth input_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0};
+  // If runner_opaque is set, the decoder uses this parallel runner.
+  JxlParallelRunner runner = JxlThreadParallelRunner;
+  void* runner_opaque = nullptr;
+  JxlDebugImageCallback debug_image = nullptr;
+  void* debug_image_opaque = nullptr;
+  JxlEncoderStats* stats = nullptr;
+  bool allow_expert_options = false;
+
+  void AddOption(JxlEncoderFrameSettingId id, int64_t val) {
+    options.emplace_back(JXLOption(id, val, 0));
+  }
+  void AddFloatOption(JxlEncoderFrameSettingId id, float val) {
+    options.emplace_back(JXLOption(id, val, 0));
+  }
+};
+
+bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
+                    const std::vector<uint8_t>* jpeg_bytes,
+                    std::vector<uint8_t>* compressed);
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_JXL_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/npy.cc b/third-party/libjxl/libjxl/lib/extras/enc/npy.cc
new file mode 100644
index 0000000000..e7a659184b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/npy.cc
@@ -0,0 +1,322 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/npy.h"
+
+#include <jxl/types.h>
+#include <stdio.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+// JSON value writing
+
+class JSONField {
+ public:
+  virtual ~JSONField() = default;
+  virtual void Write(std::ostream& o, uint32_t indent) const = 0;
+
+ protected:
+  JSONField() = default;
+};
+
+class JSONValue : public JSONField {
+ public:
+  template <typename T>
+  explicit JSONValue(const T& value) : value_(std::to_string(value)) {}
+
+  explicit JSONValue(const std::string& value) : value_("\"" + value + "\"") {}
+
+  explicit JSONValue(bool value) : value_(value ? "true" : "false") {}
+
+  void Write(std::ostream& o, uint32_t indent) const override { o << value_; }
+
+ private:
+  std::string value_;
+};
+
+class JSONDict : public JSONField {
+ public:
+  JSONDict() = default;
+
+  template <typename T>
+  T* AddEmpty(const std::string& key) {
+    static_assert(std::is_convertible<T*, JSONField*>::value,
+                  "T must be a JSONField");
+    T* ret = new T();
+    values_.emplace_back(
+        key, std::unique_ptr<JSONField>(static_cast<JSONField*>(ret)));
+    return ret;
+  }
+
+  template <typename T>
+  void Add(const std::string& key, const T& value) {
+    values_.emplace_back(key, std::unique_ptr<JSONField>(new JSONValue(value)));
+  }
+
+  void Write(std::ostream& o, uint32_t indent) const override {
+    std::string indent_str(indent, ' ');
+    o << "{";
+    bool is_first = true;
+    for (const auto& key_value : values_) {
+      if (!is_first) {
+        o << ",";
+      }
+      is_first = false;
+      o << std::endl << indent_str << "  \"" << key_value.first << "\": ";
+      key_value.second->Write(o, indent + 2);
+    }
+    if (!values_.empty()) {
+      o << std::endl << indent_str;
+    }
+    o << "}";
+  }
+
+ private:
+  // Dictionary with order.
+  std::vector<std::pair<std::string, std::unique_ptr<JSONField>>> values_;
+};
+
+class JSONArray : public JSONField {
+ public:
+  JSONArray() = default;
+
+  template <typename T>
+  T* AddEmpty() {
+    static_assert(std::is_convertible<T*, JSONField*>::value,
+                  "T must be a JSONField");
+    T* ret = new T();
+    values_.emplace_back(ret);
+    return ret;
+  }
+
+  template <typename T>
+  void Add(const T& value) {
+    values_.emplace_back(new JSONValue(value));
+  }
+
+  void Write(std::ostream& o, uint32_t indent) const override {
+    std::string indent_str(indent, ' ');
+    o << "[";
+    bool is_first = true;
+    for (const auto& value : values_) {
+      if (!is_first) {
+        o << ",";
+      }
+      is_first = false;
+      o << std::endl << indent_str << "  ";
+      value->Write(o, indent + 2);
+    }
+    if (!values_.empty()) {
+      o << std::endl << indent_str;
+    }
+    o << "]";
+  }
+
+ private:
+  std::vector<std::unique_ptr<JSONField>> values_;
+};
+
+void GenerateMetadata(const PackedPixelFile& ppf, std::vector<uint8_t>* out) {
+  JSONDict meta;
+  // Same order as in 18181-3 CD.
+
+  // Frames.
+  auto* meta_frames = meta.AddEmpty<JSONArray>("frames");
+  for (size_t i = 0; i < ppf.frames.size(); i++) {
+    auto* frame_i = meta_frames->AddEmpty<JSONDict>();
+    if (ppf.info.have_animation) {
+      frame_i->Add("duration",
+                   JSONValue(ppf.frames[i].frame_info.duration * 1.0f *
+                             ppf.info.animation.tps_denominator /
+                             ppf.info.animation.tps_numerator));
+    }
+
+    frame_i->Add("name", JSONValue(ppf.frames[i].name));
+
+    if (ppf.info.animation.have_timecodes) {
+      frame_i->Add("timecode", JSONValue(ppf.frames[i].frame_info.timecode));
+    }
+  }
+
+#define METADATA(FIELD) meta.Add(#FIELD, ppf.info.FIELD)
+
+  METADATA(intensity_target);
+  METADATA(min_nits);
+  METADATA(relative_to_max_display);
+  METADATA(linear_below);
+
+  if (ppf.info.have_preview) {
+    meta.AddEmpty<JSONDict>("preview");
+    // TODO(veluca): can we have duration/name/timecode here?
+  }
+
+  {
+    auto ectype = meta.AddEmpty<JSONArray>("extra_channel_type");
+    auto bps = meta.AddEmpty<JSONArray>("bits_per_sample");
+    auto ebps = meta.AddEmpty<JSONArray>("exp_bits_per_sample");
+    bps->Add(ppf.info.bits_per_sample);
+    ebps->Add(ppf.info.exponent_bits_per_sample);
+    for (size_t i = 0; i < ppf.extra_channels_info.size(); i++) {
+      switch (ppf.extra_channels_info[i].ec_info.type) {
+        case JXL_CHANNEL_ALPHA: {
+          ectype->Add(std::string("Alpha"));
+          break;
+        }
+        case JXL_CHANNEL_DEPTH: {
+          ectype->Add(std::string("Depth"));
+          break;
+        }
+        case JXL_CHANNEL_SPOT_COLOR: {
+          ectype->Add(std::string("SpotColor"));
+          break;
+        }
+        case JXL_CHANNEL_SELECTION_MASK: {
+          ectype->Add(std::string("SelectionMask"));
+          break;
+        }
+        case JXL_CHANNEL_BLACK: {
+          ectype->Add(std::string("Black"));
+          break;
+        }
+        case JXL_CHANNEL_CFA: {
+          ectype->Add(std::string("CFA"));
+          break;
+        }
+        case JXL_CHANNEL_THERMAL: {
+          ectype->Add(std::string("Thermal"));
+          break;
+        }
+        default: {
+          ectype->Add(std::string("UNKNOWN"));
+          break;
+        }
+      }
+      bps->Add(ppf.extra_channels_info[i].ec_info.bits_per_sample);
+      ebps->Add(ppf.extra_channels_info[i].ec_info.exponent_bits_per_sample);
+    }
+  }
+
+  std::ostringstream os;
+  meta.Write(os, 0);
+  out->resize(os.str().size());
+  memcpy(out->data(), os.str().data(), os.str().size());
+}
+
+void Append(std::vector<uint8_t>* out, const void* data, size_t size) {
+  size_t pos = out->size();
+  out->resize(pos + size);
+  memcpy(out->data() + pos, data, size);
+}
+
+void WriteNPYHeader(size_t xsize, size_t ysize, uint32_t num_channels,
+                    size_t num_frames, std::vector<uint8_t>* out) {
+  const uint8_t header[] = "\x93NUMPY\x01\x00";
+  Append(out, header, 8);
+  std::stringstream ss;
+  ss << "{'descr': '<f4', 'fortran_order': False, 'shape': (" << num_frames
+     << ", " << ysize << ", " << xsize << ", " << num_channels << "), }\n";
+  // 16-bit little endian header length.
+  uint8_t header_len[2] = {static_cast<uint8_t>(ss.str().size() % 256),
+                           static_cast<uint8_t>(ss.str().size() / 256)};
+  Append(out, header_len, 2);
+  Append(out, ss.str().data(), ss.str().size());
+}
+
+bool WriteFrameToNPYArray(size_t xsize, size_t ysize, const PackedFrame& frame,
+                          std::vector<uint8_t>* out) {
+  const auto& color = frame.color;
+  if (color.xsize != xsize || color.ysize != ysize) {
+    return false;
+  }
+  for (const auto& ec : frame.extra_channels) {
+    if (ec.xsize != xsize || ec.ysize != ysize) {
+      return false;
+    }
+  }
+  // interleave the samples from color and extra channels
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      {
+        size_t sample_size = color.pixel_stride();
+        size_t offset = y * color.stride + x * sample_size;
+        uint8_t* pixels = reinterpret_cast<uint8_t*>(color.pixels());
+        JXL_ASSERT(offset + sample_size <= color.pixels_size);
+        Append(out, pixels + offset, sample_size);
+      }
+      for (const auto& ec : frame.extra_channels) {
+        size_t sample_size = ec.pixel_stride();
+        size_t offset = y * ec.stride + x * sample_size;
+        uint8_t* pixels = reinterpret_cast<uint8_t*>(ec.pixels());
+        JXL_ASSERT(offset + sample_size <= ec.pixels_size);
+        Append(out, pixels + offset, sample_size);
+      }
+    }
+  }
+  return true;
+}
+
+// Writes a PackedPixelFile as a numpy 4D ndarray in binary format.
+bool WriteNPYArray(const PackedPixelFile& ppf, std::vector<uint8_t>* out) {
+  size_t xsize = ppf.info.xsize;
+  size_t ysize = ppf.info.ysize;
+  WriteNPYHeader(xsize, ysize,
+                 ppf.info.num_color_channels + ppf.extra_channels_info.size(),
+                 ppf.frames.size(), out);
+  for (const auto& frame : ppf.frames) {
+    if (!WriteFrameToNPYArray(xsize, ysize, frame, out)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+class NumPyEncoder : public Encoder {
+ public:
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool = nullptr) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    GenerateMetadata(ppf, &encoded_image->metadata);
+    encoded_image->bitstreams.emplace_back();
+    if (!WriteNPYArray(ppf, &encoded_image->bitstreams.back())) {
+      return false;
+    }
+    if (ppf.preview_frame) {
+      size_t xsize = ppf.info.preview.xsize;
+      size_t ysize = ppf.info.preview.ysize;
+      WriteNPYHeader(xsize, ysize, ppf.info.num_color_channels, 1,
+                     &encoded_image->preview_bitstream);
+      if (!WriteFrameToNPYArray(xsize, ysize, *ppf.preview_frame,
+                                &encoded_image->preview_bitstream)) {
+        return false;
+      }
+    }
+    return true;
+  }
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 3}) {
+      formats.push_back(JxlPixelFormat{num_channels, JXL_TYPE_FLOAT,
+                                       JXL_LITTLE_ENDIAN, /*align=*/0});
+    }
+    return formats;
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<Encoder> GetNumPyEncoder() {
+  return jxl::make_unique<NumPyEncoder>();
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/npy.h b/third-party/libjxl/libjxl/lib/extras/enc/npy.h
new file mode 100644
index 0000000000..3ee6208ec2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/npy.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_NPY_H_
+#define LIB_EXTRAS_ENC_NPY_H_
+
+// Encodes pixels to numpy array, used for conformance testing.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetNumPyEncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_NPY_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc b/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc
new file mode 100644
index 0000000000..201c8b4189
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/pgx.h"
+
+#include <jxl/codestream_header.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+constexpr size_t kMaxHeaderSize = 200;
+
+Status EncodeHeader(const JxlBasicInfo& info, char* header,
+                    int* chars_written) {
+  if (info.alpha_bits > 0) {
+    return JXL_FAILURE("PGX: can't store alpha");
+  }
+  if (info.num_color_channels != 1) {
+    return JXL_FAILURE("PGX: must be grayscale");
+  }
+  // TODO(lode): verify other bit depths: for other bit depths such as 1 or 4
+  // bits, have a test case to verify it works correctly. For bits > 16, we may
+  // need to change the way external_image works.
+  if (info.bits_per_sample != 8 && info.bits_per_sample != 16) {
+    return JXL_FAILURE("PGX: bits other than 8 or 16 not yet supported");
+  }
+
+  // Use ML (Big Endian), LM may not be well supported by all decoders.
+  *chars_written = snprintf(header, kMaxHeaderSize, "PG ML + %u %u %u\n",
+                            info.bits_per_sample, info.xsize, info.ysize);
+  JXL_RETURN_IF_ERROR(static_cast<unsigned int>(*chars_written) <
+                      kMaxHeaderSize);
+  return true;
+}
+
+Status EncodeImagePGX(const PackedFrame& frame, const JxlBasicInfo& info,
+                      std::vector<uint8_t>* bytes) {
+  char header[kMaxHeaderSize];
+  int header_size = 0;
+  JXL_RETURN_IF_ERROR(EncodeHeader(info, header, &header_size));
+
+  const PackedImage& color = frame.color;
+  const JxlPixelFormat format = color.format;
+  const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+  size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type);
+  size_t bytes_per_sample = data_bits_per_sample / kBitsPerByte;
+  size_t num_samples = info.xsize * info.ysize;
+
+  if (info.bits_per_sample != data_bits_per_sample) {
+    return JXL_FAILURE("Bit depth does not match pixel data type");
+  }
+
+  std::vector<uint8_t> pixels(num_samples * bytes_per_sample);
+
+  if (format.data_type == JXL_TYPE_UINT8) {
+    memcpy(&pixels[0], in, num_samples * bytes_per_sample);
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    if (format.endianness != JXL_BIG_ENDIAN) {
+      const uint8_t* p_in = in;
+      uint8_t* p_out = pixels.data();
+      for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) {
+        StoreBE16(LoadLE16(p_in), p_out);
+      }
+    } else {
+      memcpy(&pixels[0], in, num_samples * bytes_per_sample);
+    }
+  } else {
+    return JXL_FAILURE("Unsupported pixel data type");
+  }
+
+  bytes->resize(static_cast<size_t>(header_size) + pixels.size());
+  memcpy(bytes->data(), header, static_cast<size_t>(header_size));
+  memcpy(bytes->data() + header_size, pixels.data(), pixels.size());
+
+  return true;
+}
+
+class PGXEncoder : public Encoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+      for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+        formats.push_back(JxlPixelFormat{/*num_channels=*/1,
+                                         /*data_type=*/data_type,
+                                         /*endianness=*/endianness,
+                                         /*align=*/0});
+      }
+    }
+    return formats;
+  }
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    encoded_image->icc.assign(ppf.icc.begin(), ppf.icc.end());
+    encoded_image->bitstreams.clear();
+    encoded_image->bitstreams.reserve(ppf.frames.size());
+    for (const auto& frame : ppf.frames) {
+      JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+      encoded_image->bitstreams.emplace_back();
+      JXL_RETURN_IF_ERROR(
+          EncodeImagePGX(frame, ppf.info, &encoded_image->bitstreams.back()));
+    }
+    return true;
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<Encoder> GetPGXEncoder() {
+  return jxl::make_unique<PGXEncoder>();
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pgx.h b/third-party/libjxl/libjxl/lib/extras/enc/pgx.h
new file mode 100644
index 0000000000..f24e391b09
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/pgx.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_PGX_H_
+#define LIB_EXTRAS_ENC_PGX_H_
+
+// Encodes PGX pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetPGXEncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_PGX_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc b/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc
new file mode 100644
index 0000000000..91323692c0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc
@@ -0,0 +1,302 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/pnm.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/fields.h"  // AllDefault
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+constexpr size_t kMaxHeaderSize = 200;
+
+class PNMEncoder : public Encoder {
+ public:
+  Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+                ThreadPool* pool = nullptr) const override {
+    JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+    if (!ppf.metadata.exif.empty() || !ppf.metadata.iptc.empty() ||
+        !ppf.metadata.jumbf.empty() || !ppf.metadata.xmp.empty()) {
+      JXL_WARNING("PNM encoder ignoring metadata - use a different codec");
+    }
+    encoded_image->icc = ppf.icc;
+    encoded_image->bitstreams.clear();
+    encoded_image->bitstreams.reserve(ppf.frames.size());
+    for (const auto& frame : ppf.frames) {
+      JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+      encoded_image->bitstreams.emplace_back();
+      JXL_RETURN_IF_ERROR(
+          EncodeFrame(ppf, frame, &encoded_image->bitstreams.back()));
+    }
+    for (size_t i = 0; i < ppf.extra_channels_info.size(); ++i) {
+      const auto& ec_info = ppf.extra_channels_info[i].ec_info;
+      encoded_image->extra_channel_bitstreams.emplace_back();
+      auto& ec_bitstreams = encoded_image->extra_channel_bitstreams.back();
+      for (const auto& frame : ppf.frames) {
+        ec_bitstreams.emplace_back();
+        JXL_RETURN_IF_ERROR(EncodeExtraChannel(frame.extra_channels[i],
+                                               ec_info.bits_per_sample,
+                                               &ec_bitstreams.back()));
+      }
+    }
+    return true;
+  }
+
+ protected:
+  virtual Status EncodeFrame(const PackedPixelFile& ppf,
+                             const PackedFrame& frame,
+                             std::vector<uint8_t>* bytes) const = 0;
+  virtual Status EncodeExtraChannel(const PackedImage& image,
+                                    size_t bits_per_sample,
+                                    std::vector<uint8_t>* bytes) const = 0;
+};
+
+class PPMEncoder : public PNMEncoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    return {JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
+            JxlPixelFormat{3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
+  }
+  Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+                     std::vector<uint8_t>* bytes) const override {
+    return EncodeImage(frame.color, ppf.info.bits_per_sample, bytes);
+  }
+  Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+                            std::vector<uint8_t>* bytes) const override {
+    return EncodeImage(image, bits_per_sample, bytes);
+  }
+
+ private:
+  Status EncodeImage(const PackedImage& image, size_t bits_per_sample,
+                     std::vector<uint8_t>* bytes) const {
+    uint32_t maxval = (1u << bits_per_sample) - 1;
+    char type = image.format.num_channels == 1 ? '5' : '6';
+    char header[kMaxHeaderSize];
+    size_t header_size =
+        snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%u\n",
+                 type, image.xsize, image.ysize, maxval);
+    JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
+    bytes->resize(header_size + image.pixels_size);
+    memcpy(bytes->data(), header, header_size);
+    memcpy(bytes->data() + header_size,
+           reinterpret_cast<uint8_t*>(image.pixels()), image.pixels_size);
+    return true;
+  }
+};
+
+class PGMEncoder : public PPMEncoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    return {JxlPixelFormat{1, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
+            JxlPixelFormat{1, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
+  }
+};
+
+class PFMEncoder : public PNMEncoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 3}) {
+      for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+        formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+                                         /*data_type=*/JXL_TYPE_FLOAT,
+                                         /*endianness=*/endianness,
+                                         /*align=*/0});
+      }
+    }
+    return formats;
+  }
+  Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+                     std::vector<uint8_t>* bytes) const override {
+    return EncodeImage(frame.color, bytes);
+  }
+  Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+                            std::vector<uint8_t>* bytes) const override {
+    return EncodeImage(image, bytes);
+  }
+
+ private:
+  Status EncodeImage(const PackedImage& image,
+                     std::vector<uint8_t>* bytes) const {
+    char type = image.format.num_channels == 1 ? 'f' : 'F';
+    double scale = image.format.endianness == JXL_LITTLE_ENDIAN ? -1.0 : 1.0;
+    char header[kMaxHeaderSize];
+    size_t header_size =
+        snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%.1f\n",
+                 type, image.xsize, image.ysize, scale);
+    JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
+    bytes->resize(header_size + image.pixels_size);
+    memcpy(bytes->data(), header, header_size);
+    const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
+    uint8_t* out = bytes->data() + header_size;
+    for (size_t y = 0; y < image.ysize; ++y) {
+      size_t y_out = image.ysize - 1 - y;
+      const uint8_t* row_in = &in[y * image.stride];
+      uint8_t* row_out = &out[y_out * image.stride];
+      memcpy(row_out, row_in, image.stride);
+    }
+    return true;
+  }
+};
+
+class PAMEncoder : public PNMEncoder {
+ public:
+  std::vector<JxlPixelFormat> AcceptedFormats() const override {
+    std::vector<JxlPixelFormat> formats;
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+        formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+                                         /*data_type=*/data_type,
+                                         /*endianness=*/JXL_BIG_ENDIAN,
+                                         /*align=*/0});
+      }
+    }
+    return formats;
+  }
+  Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+                     std::vector<uint8_t>* bytes) const override {
+    const PackedImage& color = frame.color;
+    const auto& ec_info = ppf.extra_channels_info;
+    JXL_RETURN_IF_ERROR(frame.extra_channels.size() == ec_info.size());
+    for (const auto& ec : frame.extra_channels) {
+      if (ec.xsize != color.xsize || ec.ysize != color.ysize) {
+        return JXL_FAILURE("Extra channel and color size mismatch.");
+      }
+      if (ec.format.data_type != color.format.data_type ||
+          ec.format.endianness != color.format.endianness) {
+        return JXL_FAILURE("Extra channel and color format mismatch.");
+      }
+    }
+    if (ppf.info.bits_per_sample != ppf.info.alpha_bits) {
+      return JXL_FAILURE("Alpha bit depth does not match image bit depth");
+    }
+    for (const auto& it : ec_info) {
+      if (it.ec_info.bits_per_sample != ppf.info.bits_per_sample) {
+        return JXL_FAILURE(
+            "Extra channel bit depth does not match image bit depth");
+      }
+    }
+    const char* kColorTypes[4] = {"GRAYSCALE", "GRAYSCALE_ALPHA", "RGB",
+                                  "RGB_ALPHA"};
+    uint32_t maxval = (1u << ppf.info.bits_per_sample) - 1;
+    uint32_t depth = color.format.num_channels + ec_info.size();
+    char header[kMaxHeaderSize];
+    size_t pos = 0;
+    pos += snprintf(header + pos, kMaxHeaderSize - pos,
+                    "P7\nWIDTH %" PRIuS "\nHEIGHT %" PRIuS
+                    "\nDEPTH %u\n"
+                    "MAXVAL %u\nTUPLTYPE %s\n",
+                    color.xsize, color.ysize, depth, maxval,
+                    kColorTypes[color.format.num_channels - 1]);
+    JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+    for (const auto& info : ec_info) {
+      pos += snprintf(header + pos, kMaxHeaderSize - pos, "TUPLTYPE %s\n",
+                      ExtraChannelTypeName(info.ec_info.type).c_str());
+      JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+    }
+    pos += snprintf(header + pos, kMaxHeaderSize - pos, "ENDHDR\n");
+    JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+    size_t total_size = color.pixels_size;
+    for (const auto& ec : frame.extra_channels) {
+      total_size += ec.pixels_size;
+    }
+    bytes->resize(pos + total_size);
+    memcpy(bytes->data(), header, pos);
+    // If we have no extra channels, just copy color pixel data over.
+    if (frame.extra_channels.empty()) {
+      memcpy(bytes->data() + pos, reinterpret_cast<uint8_t*>(color.pixels()),
+             color.pixels_size);
+      return true;
+    }
+    // Interleave color and extra channels.
+    const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+    std::vector<const uint8_t*> ec_in(frame.extra_channels.size());
+    for (size_t i = 0; i < frame.extra_channels.size(); ++i) {
+      ec_in[i] =
+          reinterpret_cast<const uint8_t*>(frame.extra_channels[i].pixels());
+    }
+    uint8_t* out = bytes->data() + pos;
+    size_t pwidth = PackedImage::BitsPerChannel(color.format.data_type) / 8;
+    for (size_t y = 0; y < color.ysize; ++y) {
+      for (size_t x = 0; x < color.xsize; ++x) {
+        memcpy(out, in, color.pixel_stride());
+        out += color.pixel_stride();
+        in += color.pixel_stride();
+        for (auto& p : ec_in) {
+          memcpy(out, p, pwidth);
+          out += pwidth;
+          p += pwidth;
+        }
+      }
+    }
+    return true;
+  }
+  Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+                            std::vector<uint8_t>* bytes) const override {
+    return true;
+  }
+
+ private:
+  static std::string ExtraChannelTypeName(JxlExtraChannelType type) {
+    switch (type) {
+      case JXL_CHANNEL_ALPHA:
+        return std::string("Alpha");
+      case JXL_CHANNEL_DEPTH:
+        return std::string("Depth");
+      case JXL_CHANNEL_SPOT_COLOR:
+        return std::string("SpotColor");
+      case JXL_CHANNEL_SELECTION_MASK:
+        return std::string("SelectionMask");
+      case JXL_CHANNEL_BLACK:
+        return std::string("Black");
+      case JXL_CHANNEL_CFA:
+        return std::string("CFA");
+      case JXL_CHANNEL_THERMAL:
+        return std::string("Thermal");
+      default:
+        return std::string("UNKNOWN");
+    }
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<Encoder> GetPPMEncoder() {
+  return jxl::make_unique<PPMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPFMEncoder() {
+  return jxl::make_unique<PFMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPGMEncoder() {
+  return jxl::make_unique<PGMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPAMEncoder() {
+  return jxl::make_unique<PAMEncoder>();
+}
+
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pnm.h b/third-party/libjxl/libjxl/lib/extras/enc/pnm.h
new file mode 100644
index 0000000000..403208cecd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/enc/pnm.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_PNM_H_
+#define LIB_EXTRAS_ENC_PNM_H_
+
+// Encodes/decodes PBM/PGM/PPM/PFM pixels in memory.
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetPAMEncoder();
+std::unique_ptr<Encoder> GetPGMEncoder();
+std::unique_ptr<Encoder> GetPPMEncoder();
+std::unique_ptr<Encoder> GetPFMEncoder();
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_ENC_PNM_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/exif.cc b/third-party/libjxl/libjxl/lib/extras/exif.cc
new file mode 100644
index 0000000000..aea632732b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/exif.cc
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/exif.h"
+
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+
+constexpr uint16_t kExifOrientationTag = 274;
+
+void ResetExifOrientation(std::vector<uint8_t>& exif) {
+  if (exif.size() < 12) return;  // not enough bytes for a valid exif blob
+  bool bigendian;
+  uint8_t* t = exif.data();
+  if (LoadLE32(t) == 0x2A004D4D) {
+    bigendian = true;
+  } else if (LoadLE32(t) == 0x002A4949) {
+    bigendian = false;
+  } else {
+    return;  // not a valid tiff header
+  }
+  t += 4;
+  uint64_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t));
+  if (exif.size() < 12 + offset + 2 || offset < 8) return;
+  t += offset - 4;
+  uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t));
+  t += 2;
+  while (nb_tags > 0) {
+    if (t + 12 >= exif.data() + exif.size()) return;
+    uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 2;
+    if (tag == kExifOrientationTag) {
+      uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t));
+      t += 2;
+      uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t));
+      t += 4;
+      if (type == 3 && count == 1) {
+        if (bigendian) {
+          StoreBE16(1, t);
+        } else {
+          StoreLE16(1, t);
+        }
+      }
+      return;
+    } else {
+      t += 10;
+      nb_tags--;
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/exif.h b/third-party/libjxl/libjxl/lib/extras/exif.h
new file mode 100644
index 0000000000..f22b2ccef5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/exif.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_EXIF_H_
+#define LIB_EXTRAS_EXIF_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+namespace jxl {
+
+// Sets the Exif orientation to the identity, to avoid repeated orientation
+void ResetExifOrientation(std::vector<uint8_t>& exif);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_EXIF_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/hlg.cc b/third-party/libjxl/libjxl/lib/extras/hlg.cc
new file mode 100644
index 0000000000..e39a0807f5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/hlg.cc
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/hlg.h"
+
+#include <cmath>
+
+#include "lib/jxl/enc_color_management.h"
+
+namespace jxl {
+
+float GetHlgGamma(const float peak_luminance, const float surround_luminance) {
+  return 1.2f * std::pow(1.111f, std::log2(peak_luminance / 1000.f)) *
+         std::pow(0.98f, std::log2(surround_luminance / 5.f));
+}
+
+Status HlgOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) {
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+  JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, ib->ysize(), ThreadPool::NoInit,
+      [&](const int y, const int thread) {
+        float* const JXL_RESTRICT rows[3] = {ib->color()->PlaneRow(0, y),
+                                             ib->color()->PlaneRow(1, y),
+                                             ib->color()->PlaneRow(2, y)};
+        for (size_t x = 0; x < ib->xsize(); ++x) {
+          float& red = rows[0][x];
+          float& green = rows[1][x];
+          float& blue = rows[2][x];
+          const float luminance =
+              0.2627f * red + 0.6780f * green + 0.0593f * blue;
+          const float ratio = std::pow(luminance, gamma - 1);
+          if (std::isfinite(ratio)) {
+            red *= ratio;
+            green *= ratio;
+            blue *= ratio;
+          }
+        }
+      },
+      "HlgOOTF"));
+  return true;
+}
+
+Status HlgInverseOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) {
+  return HlgOOTF(ib, 1.f / gamma, pool);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/hlg.h b/third-party/libjxl/libjxl/lib/extras/hlg.h
new file mode 100644
index 0000000000..4cfec444f4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/hlg.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_HLG_H_
+#define LIB_EXTRAS_HLG_H_
+
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+float GetHlgGamma(float peak_luminance, float surround_luminance = 5.f);
+
+Status HlgOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr);
+
+Status HlgInverseOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_HLG_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc b/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc
new file mode 100644
index 0000000000..a710048e83
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc
@@ -0,0 +1,413 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if JPEGXL_ENABLE_JPEGLI
+
+#include "lib/extras/dec/jpegli.h"
+
+#include <jxl/color_encoding.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/jpg.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/enc/jpegli.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+using test::Butteraugli3Norm;
+using test::ButteraugliDistance;
+using test::TestImage;
+
+Status ReadTestImage(const std::string& pathname, PackedPixelFile* ppf) {
+  const PaddedBytes encoded = jxl::test::ReadTestData(pathname);
+  ColorHints color_hints;
+  if (pathname.find(".ppm") != std::string::npos) {
+    color_hints.Add("color_space", "RGB_D65_SRG_Rel_SRG");
+  } else if (pathname.find(".pgm") != std::string::npos) {
+    color_hints.Add("color_space", "Gra_D65_Rel_SRG");
+  }
+  return DecodeBytes(Span<const uint8_t>(encoded), color_hints, ppf);
+}
+
+std::vector<uint8_t> GetAppData(const std::vector<uint8_t>& compressed) {
+  std::vector<uint8_t> result;
+  size_t pos = 2;  // After SOI
+  while (pos + 4 < compressed.size()) {
+    if (compressed[pos] != 0xff || compressed[pos + 1] < 0xe0 ||
+        compressed[pos + 1] > 0xf0) {
+      break;
+    }
+    size_t len = (compressed[pos + 2] << 8) + compressed[pos + 3] + 2;
+    if (pos + len > compressed.size()) {
+      break;
+    }
+    result.insert(result.end(), &compressed[pos], &compressed[pos] + len);
+    pos += len;
+  }
+  return result;
+}
+
+Status DecodeWithLibjpeg(const std::vector<uint8_t>& compressed,
+                         PackedPixelFile* ppf,
+                         const JPGDecompressParams* dparams = nullptr) {
+  return DecodeImageJPG(Span<const uint8_t>(compressed), ColorHints(), ppf,
+                        /*constraints=*/nullptr, dparams);
+}
+
+Status EncodeWithLibjpeg(const PackedPixelFile& ppf, int quality,
+                         std::vector<uint8_t>* compressed) {
+  std::unique_ptr<Encoder> encoder = GetJPEGEncoder();
+  encoder->SetOption("q", std::to_string(quality));
+  EncodedImage encoded;
+  JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded));
+  JXL_RETURN_IF_ERROR(!encoded.bitstreams.empty());
+  *compressed = std::move(encoded.bitstreams[0]);
+  return true;
+}
+
+std::string Description(const JxlColorEncoding& color_encoding) {
+  ColorEncoding c_enc;
+  JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c_enc));
+  return Description(c_enc);
+}
+
+float BitsPerPixel(const PackedPixelFile& ppf,
+                   const std::vector<uint8_t>& compressed) {
+  const size_t num_pixels = ppf.info.xsize * ppf.info.ysize;
+  return compressed.size() * 8.0 / num_pixels;
+}
+
+TEST(JpegliTest, JpegliSRGBDecodeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf0;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding));
+  EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+  PackedPixelFile ppf1;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1));
+  PackedPixelFile ppf2;
+  JpegDecompressParams dparams;
+  ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2));
+  EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1));
+}
+
+TEST(JpegliTest, JpegliGrayscaleDecodeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.g.depth8.pgm";
+  PackedPixelFile ppf0;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+  EXPECT_EQ("Gra_D65_Rel_SRG", Description(ppf0.color_encoding));
+  EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+  PackedPixelFile ppf1;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1));
+  PackedPixelFile ppf2;
+  JpegDecompressParams dparams;
+  ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2));
+  EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1));
+}
+
+TEST(JpegliTest, JpegliXYBEncodeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  settings.xyb = true;
+  ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  PackedPixelFile ppf_out;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+  EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.45f));
+  EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f));
+}
+
+TEST(JpegliTest, JpegliDecodeTestLargeSmoothArea) {
+  TEST_LIBJPEG_SUPPORT();
+  TestImage t;
+  const size_t xsize = 2070;
+  const size_t ysize = 1063;
+  t.SetDimensions(xsize, ysize).SetChannels(3);
+  t.SetAllBitDepths(8).SetEndianness(JXL_NATIVE_ENDIAN);
+  TestImage::Frame frame = t.AddFrame();
+  frame.RandomFill();
+  // Create a large smooth area in the top half of the image. This is to test
+  // that the bias statistics calculation can handle many blocks with all-zero
+  // AC coefficients.
+  for (size_t y = 0; y < ysize / 2; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      for (size_t c = 0; c < 3; ++c) {
+        frame.SetValue(y, x, c, 0.5f);
+      }
+    }
+  }
+  const PackedPixelFile& ppf0 = t.ppf();
+
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+  PackedPixelFile ppf1;
+  JpegDecompressParams dparams;
+  ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf1));
+  EXPECT_LT(ButteraugliDistance(ppf0, ppf1), 3.0f);
+}
+
+TEST(JpegliTest, JpegliYUVEncodeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  settings.xyb = false;
+  ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  PackedPixelFile ppf_out;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+  EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.7f));
+  EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f));
+}
+
+TEST(JpegliTest, JpegliYUVChromaSubsamplingEncodeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  for (const char* sampling : {"440", "422", "420"}) {
+    settings.xyb = false;
+    settings.chroma_subsampling = std::string(sampling);
+    ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+    PackedPixelFile ppf_out;
+    ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+    EXPECT_LE(BitsPerPixel(ppf_in, compressed), 1.55f);
+    EXPECT_LE(ButteraugliDistance(ppf_in, ppf_out), 1.82f);
+  }
+}
+
+TEST(JpegliTest, JpegliYUVEncodeTestNoAq) {
+  TEST_LIBJPEG_SUPPORT();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  settings.xyb = false;
+  settings.use_adaptive_quantization = false;
+  ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  PackedPixelFile ppf_out;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+  EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.85f));
+  EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.25f));
+}
+
+TEST(JpegliTest, JpegliHDRRoundtripTest) {
+  std::string testimage = "jxl/hdr_room.png";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_202_Rel_HLG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(16, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  settings.xyb = false;
+  ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  PackedPixelFile ppf_out;
+  JpegDecompressParams dparams;
+  dparams.output_data_type = JXL_TYPE_UINT16;
+  ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf_out));
+  EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(2.95f));
+  EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.05f));
+}
+
+TEST(JpegliTest, JpegliSetAppData) {
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf_in;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+  EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  JpegSettings settings;
+  settings.app_data = {0xff, 0xe3, 0, 4, 0, 1};
+  EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+  EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+  settings.app_data = {0xff, 0xe3, 0, 6, 0, 1, 2, 3, 0xff, 0xef, 0, 4, 0, 1};
+  EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+  EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+  settings.xyb = true;
+  EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+  EXPECT_EQ(0, memcmp(settings.app_data.data(), GetAppData(compressed).data(),
+                      settings.app_data.size()));
+
+  settings.xyb = false;
+  settings.app_data = {0};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.app_data = {0xff, 0xe0};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.app_data = {0xff, 0xe0, 0, 2};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.app_data = {0xff, 0xeb, 0, 4, 0};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.app_data = {0xff, 0xeb, 0, 4, 0, 1, 2, 3};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.app_data = {0xff, 0xab, 0, 4, 0, 1};
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+  settings.xyb = false;
+  settings.app_data = {
+      0xff, 0xeb, 0,    4,    0,    1,                       //
+      0xff, 0xe2, 0,    20,   0x49, 0x43, 0x43, 0x5F, 0x50,  //
+      0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00, 0,    1,     //
+      0,    0,    0,    0,                                   //
+  };
+  EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+  EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+  settings.xyb = true;
+  EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+}
+
+struct TestConfig {
+  int num_colors;
+  int passes;
+  int dither;
+};
+
+class JpegliColorQuantTestParam : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(JpegliColorQuantTestParam, JpegliColorQuantizeTest) {
+  TEST_LIBJPEG_SUPPORT();
+  TestConfig config = GetParam();
+  std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+  PackedPixelFile ppf0;
+  ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+  EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding));
+  EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+  PackedPixelFile ppf1;
+  JPGDecompressParams dparams1;
+  dparams1.two_pass_quant = (config.passes == 2);
+  dparams1.num_colors = config.num_colors;
+  dparams1.dither_mode = config.dither;
+  ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1, &dparams1));
+
+  PackedPixelFile ppf2;
+  JpegDecompressParams dparams2;
+  dparams2.two_pass_quant = (config.passes == 2);
+  dparams2.num_colors = config.num_colors;
+  dparams2.dither_mode = config.dither;
+  ASSERT_TRUE(DecodeJpeg(compressed, dparams2, nullptr, &ppf2));
+
+  double dist1 = Butteraugli3Norm(ppf0, ppf1);
+  double dist2 = Butteraugli3Norm(ppf0, ppf2);
+  printf("distance: %f  vs %f\n", dist2, dist1);
+  if (config.passes == 1) {
+    if (config.num_colors == 16 && config.dither == 2) {
+      // TODO(szabadka) Fix this case.
+      EXPECT_LT(dist2, dist1 * 1.5);
+    } else {
+      EXPECT_LT(dist2, dist1 * 1.05);
+    }
+  } else if (config.num_colors > 64) {
+    // TODO(szabadka) Fix 2pass quantization for <= 64 colors.
+    EXPECT_LT(dist2, dist1 * 1.1);
+  } else if (config.num_colors > 32) {
+    EXPECT_LT(dist2, dist1 * 1.2);
+  } else {
+    EXPECT_LT(dist2, dist1 * 1.7);
+  }
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  for (int num_colors = 8; num_colors <= 256; num_colors *= 2) {
+    for (int passes = 1; passes <= 2; ++passes) {
+      for (int dither = 0; dither < 3; dither += passes) {
+        TestConfig config;
+        config.num_colors = num_colors;
+        config.passes = passes;
+        config.dither = dither;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  static constexpr const char* kDitherModeStr[] = {"No", "Ordered", "FS"};
+  os << c.passes << "pass";
+  os << c.num_colors << "colors";
+  os << kDitherModeStr[c.dither] << "dither";
+  return os;
+}
+
+std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(JpegliColorQuantTest,
+                                   JpegliColorQuantTestParam,
+                                   testing::ValuesIn(GenerateTests()),
+                                   TestDescription);
+
+}  // namespace
+}  // namespace extras
+}  // namespace jxl
+#endif  // JPEGXL_ENABLE_JPEGLI
diff --git a/third-party/libjxl/libjxl/lib/extras/metrics.cc b/third-party/libjxl/libjxl/lib/extras/metrics.cc
new file mode 100644
index 0000000000..8d91da6b8f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/metrics.cc
@@ -0,0 +1,224 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/metrics.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/extras/metrics.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p) {
+  const double onePerPixels = 1.0 / (distmap.ysize() * distmap.xsize());
+  if (std::abs(p - 3.0) < 1E-6) {
+    double sum1[3] = {0.0};
+
+// Prefer double if possible, but otherwise use float rather than scalar.
+#if HWY_CAP_FLOAT64
+    using T = double;
+    const Rebind<float, HWY_FULL(double)> df;
+#else
+    using T = float;
+#endif
+    const HWY_FULL(T) d;
+    constexpr size_t N = MaxLanes(HWY_FULL(T)());
+    // Manually aligned storage to avoid asan crash on clang-7 due to
+    // unaligned spill.
+    HWY_ALIGN T sum_totals0[N] = {0};
+    HWY_ALIGN T sum_totals1[N] = {0};
+    HWY_ALIGN T sum_totals2[N] = {0};
+
+    for (size_t y = 0; y < distmap.ysize(); ++y) {
+      const float* JXL_RESTRICT row = distmap.ConstRow(y);
+
+      auto sums0 = Zero(d);
+      auto sums1 = Zero(d);
+      auto sums2 = Zero(d);
+
+      size_t x = 0;
+      for (; x + Lanes(d) <= distmap.xsize(); x += Lanes(d)) {
+#if HWY_CAP_FLOAT64
+        const auto d1 = PromoteTo(d, Load(df, row + x));
+#else
+        const auto d1 = Load(d, row + x);
+#endif
+        const auto d2 = Mul(d1, Mul(d1, d1));
+        sums0 = Add(sums0, d2);
+        const auto d3 = Mul(d2, d2);
+        sums1 = Add(sums1, d3);
+        const auto d4 = Mul(d3, d3);
+        sums2 = Add(sums2, d4);
+      }
+
+      Store(Add(sums0, Load(d, sum_totals0)), d, sum_totals0);
+      Store(Add(sums1, Load(d, sum_totals1)), d, sum_totals1);
+      Store(Add(sums2, Load(d, sum_totals2)), d, sum_totals2);
+
+      for (; x < distmap.xsize(); ++x) {
+        const double d1 = row[x];
+        double d2 = d1 * d1 * d1;
+        sum1[0] += d2;
+        d2 *= d2;
+        sum1[1] += d2;
+        d2 *= d2;
+        sum1[2] += d2;
+      }
+    }
+    double v = 0;
+    v += pow(
+        onePerPixels * (sum1[0] + GetLane(SumOfLanes(d, Load(d, sum_totals0)))),
+        1.0 / (p * 1.0));
+    v += pow(
+        onePerPixels * (sum1[1] + GetLane(SumOfLanes(d, Load(d, sum_totals1)))),
+        1.0 / (p * 2.0));
+    v += pow(
+        onePerPixels * (sum1[2] + GetLane(SumOfLanes(d, Load(d, sum_totals2)))),
+        1.0 / (p * 4.0));
+    v /= 3.0;
+    return v;
+  } else {
+    static std::atomic<int> once{0};
+    if (once.fetch_add(1, std::memory_order_relaxed) == 0) {
+      JXL_WARNING("WARNING: using slow ComputeDistanceP");
+    }
+    double sum1[3] = {0.0};
+    for (size_t y = 0; y < distmap.ysize(); ++y) {
+      const float* JXL_RESTRICT row = distmap.ConstRow(y);
+      for (size_t x = 0; x < distmap.xsize(); ++x) {
+        double d2 = std::pow(row[x], p);
+        sum1[0] += d2;
+        d2 *= d2;
+        sum1[1] += d2;
+        d2 *= d2;
+        sum1[2] += d2;
+      }
+    }
+    double v = 0;
+    for (int i = 0; i < 3; ++i) {
+      v += pow(onePerPixels * (sum1[i]), 1.0 / (p * (1 << i)));
+    }
+    v /= 3.0;
+    return v;
+  }
+}
+
+void ComputeSumOfSquares(const ImageBundle& ib1, const ImageBundle& ib2,
+                         const JxlCmsInterface& cms, double sum_of_squares[3]) {
+  // Convert to sRGB - closer to perception than linear.
+  const Image3F* srgb1 = &ib1.color();
+  Image3F copy1;
+  if (!ib1.IsSRGB()) {
+    JXL_CHECK(
+        ib1.CopyTo(Rect(ib1), ColorEncoding::SRGB(ib1.IsGray()), cms, &copy1));
+    srgb1 = &copy1;
+  }
+  const Image3F* srgb2 = &ib2.color();
+  Image3F copy2;
+  if (!ib2.IsSRGB()) {
+    JXL_CHECK(
+        ib2.CopyTo(Rect(ib2), ColorEncoding::SRGB(ib2.IsGray()), cms, &copy2));
+    srgb2 = &copy2;
+  }
+
+  JXL_CHECK(SameSize(*srgb1, *srgb2));
+
+  // TODO(veluca): SIMD.
+  float yuvmatrix[3][3] = {{0.299, 0.587, 0.114},
+                           {-0.14713, -0.28886, 0.436},
+                           {0.615, -0.51499, -0.10001}};
+  for (size_t y = 0; y < srgb1->ysize(); ++y) {
+    const float* JXL_RESTRICT row1[3];
+    const float* JXL_RESTRICT row2[3];
+    for (size_t j = 0; j < 3; j++) {
+      row1[j] = srgb1->ConstPlaneRow(j, y);
+      row2[j] = srgb2->ConstPlaneRow(j, y);
+    }
+    for (size_t x = 0; x < srgb1->xsize(); ++x) {
+      float cdiff[3] = {};
+      // YUV conversion is linear, so we can run it on the difference.
+      for (size_t j = 0; j < 3; j++) {
+        cdiff[j] = row1[j][x] - row2[j][x];
+      }
+      float yuvdiff[3] = {};
+      for (size_t j = 0; j < 3; j++) {
+        for (size_t k = 0; k < 3; k++) {
+          yuvdiff[j] += yuvmatrix[j][k] * cdiff[k];
+        }
+      }
+      for (size_t j = 0; j < 3; j++) {
+        sum_of_squares[j] += yuvdiff[j] * yuvdiff[j];
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeDistanceP);
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p) {
+  return HWY_DYNAMIC_DISPATCH(ComputeDistanceP)(distmap, params, p);
+}
+
+HWY_EXPORT(ComputeSumOfSquares);
+
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2,
+                        const JxlCmsInterface& cms) {
+  double sum_of_squares[3] = {};
+  HWY_DYNAMIC_DISPATCH(ComputeSumOfSquares)(ib1, ib2, cms, sum_of_squares);
+  // Weighted PSNR as in JPEG-XL: chroma counts 1/8.
+  const float weights[3] = {6.0f / 8, 1.0f / 8, 1.0f / 8};
+  // Avoid squaring the weight - 1/64 is too extreme.
+  double norm = 0;
+  for (size_t i = 0; i < 3; i++) {
+    norm += std::sqrt(sum_of_squares[i]) * weights[i];
+  }
+  // This function returns distance *squared*.
+  return norm * norm;
+}
+
+double ComputePSNR(const ImageBundle& ib1, const ImageBundle& ib2,
+                   const JxlCmsInterface& cms) {
+  if (!SameSize(ib1, ib2)) return 0.0;
+  double sum_of_squares[3] = {};
+  HWY_DYNAMIC_DISPATCH(ComputeSumOfSquares)(ib1, ib2, cms, sum_of_squares);
+  constexpr double kChannelWeights[3] = {6.0 / 8, 1.0 / 8, 1.0 / 8};
+  double avg_psnr = 0;
+  const size_t input_pixels = ib1.xsize() * ib1.ysize();
+  for (int i = 0; i < 3; ++i) {
+    const double rmse = std::sqrt(sum_of_squares[i] / input_pixels);
+    const double psnr =
+        sum_of_squares[i] == 0 ? 99.99 : (20 * std::log10(1 / rmse));
+    avg_psnr += kChannelWeights[i] * psnr;
+  }
+  return avg_psnr;
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/extras/metrics.h b/third-party/libjxl/libjxl/lib/extras/metrics.h
new file mode 100644
index 0000000000..87a69a99ce
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/metrics.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_METRICS_H_
+#define LIB_EXTRAS_METRICS_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Computes p-norm given the butteraugli distmap.
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p);
+
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2,
+                        const JxlCmsInterface& cms);
+
+double ComputePSNR(const ImageBundle& ib1, const ImageBundle& ib2,
+                   const JxlCmsInterface& cms);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_METRICS_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image.h b/third-party/libjxl/libjxl/lib/extras/packed_image.h
new file mode 100644
index 0000000000..3eaf5a0c6d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/packed_image.h
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_PACKED_IMAGE_H_
+#define LIB_EXTRAS_PACKED_IMAGE_H_
+
+// Helper class for storing external (int or float, interleaved) images. This is
+// the common format used by other libraries and in the libjxl API.
+
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/common.h"
+
+namespace jxl {
+namespace extras {
+
+// Class representing an interleaved image with a bunch of channels.
+class PackedImage {
+ public:
+  PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format)
+      : PackedImage(xsize, ysize, format, CalcStride(format, xsize)) {}
+
+  PackedImage Copy() const {
+    PackedImage copy(xsize, ysize, format);
+    memcpy(reinterpret_cast<uint8_t*>(copy.pixels()),
+           reinterpret_cast<const uint8_t*>(pixels()), pixels_size);
+    return copy;
+  }
+
+  // The interleaved pixels as defined in the storage format.
+  void* pixels() const { return pixels_.get(); }
+
+  // The image size in pixels.
+  size_t xsize;
+  size_t ysize;
+
+  // The number of bytes per row.
+  size_t stride;
+
+  // Pixel storage format and buffer size of the pixels_ pointer.
+  JxlPixelFormat format;
+  size_t pixels_size;
+
+  size_t pixel_stride() const {
+    return (BitsPerChannel(format.data_type) * format.num_channels /
+            jxl::kBitsPerByte);
+  }
+
+  static size_t BitsPerChannel(JxlDataType data_type) {
+    switch (data_type) {
+      case JXL_TYPE_UINT8:
+        return 8;
+      case JXL_TYPE_UINT16:
+        return 16;
+      case JXL_TYPE_FLOAT:
+        return 32;
+      case JXL_TYPE_FLOAT16:
+        return 16;
+      default:
+        JXL_ABORT("Unhandled JxlDataType");
+    }
+  }
+
+ private:
+  PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format,
+              size_t stride)
+      : xsize(xsize),
+        ysize(ysize),
+        stride(stride),
+        format(format),
+        pixels_size(ysize * stride),
+        pixels_(malloc(std::max<size_t>(1, pixels_size)), free) {}
+
+  static size_t CalcStride(const JxlPixelFormat& format, size_t xsize) {
+    size_t stride = xsize * (BitsPerChannel(format.data_type) *
+                             format.num_channels / jxl::kBitsPerByte);
+    if (format.align > 1) {
+      stride = jxl::DivCeil(stride, format.align) * format.align;
+    }
+    return stride;
+  }
+
+  std::unique_ptr<void, decltype(free)*> pixels_;
+};
+
+// Helper class representing a frame, as seen from the API. Animations will have
+// multiple frames, but a single frame can have a color/grayscale channel and
+// multiple extra channels. The order of the extra channels should be the same
+// as all other frames in the same image.
+class PackedFrame {
+ public:
+  template <typename... Args>
+  explicit PackedFrame(Args&&... args) : color(std::forward<Args>(args)...) {}
+
+  PackedFrame Copy() const {
+    PackedFrame copy(color.xsize, color.ysize, color.format);
+    copy.frame_info = frame_info;
+    copy.name = name;
+    copy.color = color.Copy();
+    for (size_t i = 0; i < extra_channels.size(); ++i) {
+      PackedImage ec = extra_channels[i].Copy();
+      copy.extra_channels.emplace_back(std::move(ec));
+    }
+    return copy;
+  }
+
+  // The Frame metadata.
+  JxlFrameHeader frame_info = {};
+  std::string name;
+
+  // The pixel data for the color (or grayscale) channels.
+  PackedImage color;
+  // Extra channel image data.
+  std::vector<PackedImage> extra_channels;
+};
+
+// Optional metadata associated with a file
+class PackedMetadata {
+ public:
+  std::vector<uint8_t> exif;
+  std::vector<uint8_t> iptc;
+  std::vector<uint8_t> jumbf;
+  std::vector<uint8_t> xmp;
+};
+
+// The extra channel metadata information.
+struct PackedExtraChannel {
+  JxlExtraChannelInfo ec_info;
+  size_t index;
+  std::string name;
+};
+
+// Helper class representing a JXL image file as decoded to pixels from the API.
+class PackedPixelFile {
+ public:
+  JxlBasicInfo info = {};
+
+  std::vector<PackedExtraChannel> extra_channels_info;
+
+  // Color information of the decoded pixels.
+  // If the icc is empty, the JxlColorEncoding should be used instead.
+  std::vector<uint8_t> icc;
+  JxlColorEncoding color_encoding = {};
+  // The icc profile of the original image.
+  std::vector<uint8_t> orig_icc;
+
+  std::unique_ptr<PackedFrame> preview_frame;
+  std::vector<PackedFrame> frames;
+
+  PackedMetadata metadata;
+  PackedPixelFile() { JxlEncoderInitBasicInfo(&info); };
+};
+
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_PACKED_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc
new file mode 100644
index 0000000000..a67510b270
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc
@@ -0,0 +1,301 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/packed_image_convert.h"
+
+#include <jxl/color_encoding.h>
+#include <jxl/types.h>
+
+#include <cstdint>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace extras {
+
+Status ConvertPackedFrameToImageBundle(const JxlBasicInfo& info,
+                                       const PackedFrame& frame,
+                                       const CodecInOut& io, ThreadPool* pool,
+                                       ImageBundle* bundle) {
+  JXL_ASSERT(frame.color.pixels() != nullptr);
+  const bool float_in = frame.color.format.data_type == JXL_TYPE_FLOAT16 ||
+                        frame.color.format.data_type == JXL_TYPE_FLOAT;
+  size_t frame_bits_per_sample =
+      float_in ? PackedImage::BitsPerChannel(frame.color.format.data_type)
+               : info.bits_per_sample;
+  JXL_ASSERT(frame_bits_per_sample != 0);
+  // It is ok for the frame.color.format.num_channels to not match the
+  // number of channels on the image.
+  JXL_ASSERT(1 <= frame.color.format.num_channels &&
+             frame.color.format.num_channels <= 4);
+
+  const Span<const uint8_t> span(
+      static_cast<const uint8_t*>(frame.color.pixels()),
+      frame.color.pixels_size);
+  JXL_ASSERT(Rect(frame.frame_info.layer_info.crop_x0,
+                  frame.frame_info.layer_info.crop_y0,
+                  frame.frame_info.layer_info.xsize,
+                  frame.frame_info.layer_info.ysize)
+                 .IsInside(Rect(0, 0, info.xsize, info.ysize)));
+  if (info.have_animation) {
+    bundle->duration = frame.frame_info.duration;
+    bundle->blend = frame.frame_info.layer_info.blend_info.blendmode > 0;
+    bundle->use_for_next_frame =
+        frame.frame_info.layer_info.save_as_reference > 0;
+    bundle->origin.x0 = frame.frame_info.layer_info.crop_x0;
+    bundle->origin.y0 = frame.frame_info.layer_info.crop_y0;
+  }
+  bundle->name = frame.name;  // frame.frame_info.name_length is ignored here.
+  JXL_ASSERT(io.metadata.m.color_encoding.IsGray() ==
+             (frame.color.format.num_channels <= 2));
+
+  JXL_RETURN_IF_ERROR(ConvertFromExternal(
+      span, frame.color.xsize, frame.color.ysize, io.metadata.m.color_encoding,
+      frame_bits_per_sample, frame.color.format, pool, bundle));
+
+  bundle->extra_channels().resize(io.metadata.m.extra_channel_info.size());
+  for (size_t i = 0; i < frame.extra_channels.size(); i++) {
+    const auto& ppf_ec = frame.extra_channels[i];
+    bundle->extra_channels()[i] = ImageF(ppf_ec.xsize, ppf_ec.ysize);
+    JXL_CHECK(BufferToImageF(ppf_ec.format, ppf_ec.xsize, ppf_ec.ysize,
+                             ppf_ec.pixels(), ppf_ec.pixels_size, pool,
+                             &bundle->extra_channels()[i]));
+  }
+  return true;
+}
+
+Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
+                                          ThreadPool* pool, CodecInOut* io) {
+  const bool has_alpha = ppf.info.alpha_bits != 0;
+  JXL_ASSERT(!ppf.frames.empty());
+  if (has_alpha) {
+    JXL_ASSERT(ppf.info.alpha_bits == ppf.info.bits_per_sample);
+    JXL_ASSERT(ppf.info.alpha_exponent_bits ==
+               ppf.info.exponent_bits_per_sample);
+  }
+
+  const bool is_gray = ppf.info.num_color_channels == 1;
+  JXL_ASSERT(ppf.info.num_color_channels == 1 ||
+             ppf.info.num_color_channels == 3);
+
+  // Convert the image metadata
+  io->SetSize(ppf.info.xsize, ppf.info.ysize);
+  io->metadata.m.bit_depth.bits_per_sample = ppf.info.bits_per_sample;
+  io->metadata.m.bit_depth.exponent_bits_per_sample =
+      ppf.info.exponent_bits_per_sample;
+  io->metadata.m.bit_depth.floating_point_sample =
+      ppf.info.exponent_bits_per_sample != 0;
+  io->metadata.m.modular_16_bit_buffer_sufficient =
+      ppf.info.exponent_bits_per_sample == 0 && ppf.info.bits_per_sample <= 12;
+
+  io->metadata.m.SetAlphaBits(ppf.info.alpha_bits,
+                              ppf.info.alpha_premultiplied);
+
+  io->metadata.m.xyb_encoded = !ppf.info.uses_original_profile;
+  JXL_ASSERT(ppf.info.orientation > 0 && ppf.info.orientation <= 8);
+  io->metadata.m.orientation = ppf.info.orientation;
+
+  // Convert animation metadata
+  JXL_ASSERT(ppf.frames.size() == 1 || ppf.info.have_animation);
+  io->metadata.m.have_animation = ppf.info.have_animation;
+  io->metadata.m.animation.tps_numerator = ppf.info.animation.tps_numerator;
+  io->metadata.m.animation.tps_denominator = ppf.info.animation.tps_denominator;
+  io->metadata.m.animation.num_loops = ppf.info.animation.num_loops;
+
+  // Convert the color encoding.
+  if (!ppf.icc.empty()) {
+    PaddedBytes icc;
+    icc.append(ppf.icc);
+    const JxlCmsInterface& cms = GetJxlCms();
+    if (!io->metadata.m.color_encoding.SetICC(std::move(icc), &cms)) {
+      fprintf(stderr, "Warning: error setting ICC profile, assuming SRGB\n");
+      io->metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+    } else {
+      if (io->metadata.m.color_encoding.IsGray() != is_gray) {
+        // E.g. JPG image has 3 channels, but gray ICC.
+        return JXL_FAILURE("Embedded ICC does not match image color type");
+      }
+    }
+  } else {
+    JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+        ppf.color_encoding, &io->metadata.m.color_encoding));
+    if (io->metadata.m.color_encoding.ICC().empty()) {
+      return JXL_FAILURE("Failed to serialize ICC");
+    }
+  }
+
+  // Convert the extra blobs
+  io->blobs.exif = ppf.metadata.exif;
+  io->blobs.iptc = ppf.metadata.iptc;
+  io->blobs.jumbf = ppf.metadata.jumbf;
+  io->blobs.xmp = ppf.metadata.xmp;
+
+  // Append all other extra channels.
+  for (const auto& info : ppf.extra_channels_info) {
+    ExtraChannelInfo out;
+    out.type = static_cast<jxl::ExtraChannel>(info.ec_info.type);
+    out.bit_depth.bits_per_sample = info.ec_info.bits_per_sample;
+    out.bit_depth.exponent_bits_per_sample =
+        info.ec_info.exponent_bits_per_sample;
+    out.bit_depth.floating_point_sample =
+        info.ec_info.exponent_bits_per_sample != 0;
+    out.dim_shift = info.ec_info.dim_shift;
+    out.name = info.name;
+    out.alpha_associated = (info.ec_info.alpha_premultiplied != 0);
+    out.spot_color[0] = info.ec_info.spot_color[0];
+    out.spot_color[1] = info.ec_info.spot_color[1];
+    out.spot_color[2] = info.ec_info.spot_color[2];
+    out.spot_color[3] = info.ec_info.spot_color[3];
+    io->metadata.m.extra_channel_info.push_back(std::move(out));
+  }
+
+  // Convert the preview
+  if (ppf.preview_frame) {
+    size_t preview_xsize = ppf.preview_frame->color.xsize;
+    size_t preview_ysize = ppf.preview_frame->color.ysize;
+    io->metadata.m.have_preview = true;
+    JXL_RETURN_IF_ERROR(
+        io->metadata.m.preview_size.Set(preview_xsize, preview_ysize));
+    JXL_RETURN_IF_ERROR(ConvertPackedFrameToImageBundle(
+        ppf.info, *ppf.preview_frame, *io, pool, &io->preview_frame));
+  }
+
+  // Convert the pixels
+  io->frames.clear();
+  for (const auto& frame : ppf.frames) {
+    ImageBundle bundle(&io->metadata.m);
+    JXL_RETURN_IF_ERROR(
+        ConvertPackedFrameToImageBundle(ppf.info, frame, *io, pool, &bundle));
+    io->frames.push_back(std::move(bundle));
+  }
+
+  if (ppf.info.exponent_bits_per_sample == 0) {
+    // uint case.
+    io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
+  }
+  if (ppf.info.intensity_target != 0) {
+    io->metadata.m.SetIntensityTarget(ppf.info.intensity_target);
+  } else {
+    SetIntensityTarget(&io->metadata.m);
+  }
+  io->CheckMetadata();
+  return true;
+}
+
+// Allows converting from internal CodecInOut to external PackedPixelFile
+Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
+                                          const JxlPixelFormat& pixel_format,
+                                          const ColorEncoding& c_desired,
+                                          ThreadPool* pool,
+                                          PackedPixelFile* ppf) {
+  const bool has_alpha = io.metadata.m.HasAlpha();
+  bool alpha_premultiplied = false;
+  JXL_ASSERT(!io.frames.empty());
+
+  if (has_alpha) {
+    JXL_ASSERT(io.metadata.m.GetAlphaBits() ==
+               io.metadata.m.bit_depth.bits_per_sample);
+    const auto* alpha_channel = io.metadata.m.Find(ExtraChannel::kAlpha);
+    JXL_ASSERT(alpha_channel->bit_depth.exponent_bits_per_sample ==
+               io.metadata.m.bit_depth.exponent_bits_per_sample);
+    alpha_premultiplied = alpha_channel->alpha_associated;
+  }
+
+  // Convert the image metadata
+  ppf->info.xsize = io.metadata.size.xsize();
+  ppf->info.ysize = io.metadata.size.ysize();
+  ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels();
+  ppf->info.bits_per_sample = io.metadata.m.bit_depth.bits_per_sample;
+  ppf->info.exponent_bits_per_sample =
+      io.metadata.m.bit_depth.exponent_bits_per_sample;
+
+  ppf->info.intensity_target = io.metadata.m.tone_mapping.intensity_target;
+  ppf->info.linear_below = io.metadata.m.tone_mapping.linear_below;
+  ppf->info.min_nits = io.metadata.m.tone_mapping.min_nits;
+  ppf->info.relative_to_max_display =
+      io.metadata.m.tone_mapping.relative_to_max_display;
+
+  ppf->info.alpha_bits = io.metadata.m.GetAlphaBits();
+  ppf->info.alpha_premultiplied = alpha_premultiplied;
+
+  ppf->info.uses_original_profile = !io.metadata.m.xyb_encoded;
+  JXL_ASSERT(0 < io.metadata.m.orientation && io.metadata.m.orientation <= 8);
+  ppf->info.orientation =
+      static_cast<JxlOrientation>(io.metadata.m.orientation);
+  ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels();
+
+  // Convert animation metadata
+  JXL_ASSERT(io.frames.size() == 1 || io.metadata.m.have_animation);
+  ppf->info.have_animation = io.metadata.m.have_animation;
+  ppf->info.animation.tps_numerator = io.metadata.m.animation.tps_numerator;
+  ppf->info.animation.tps_denominator = io.metadata.m.animation.tps_denominator;
+  ppf->info.animation.num_loops = io.metadata.m.animation.num_loops;
+
+  // Convert the color encoding
+  ppf->icc.assign(c_desired.ICC().begin(), c_desired.ICC().end());
+  ConvertInternalToExternalColorEncoding(c_desired, &ppf->color_encoding);
+
+  // Convert the extra blobs
+  ppf->metadata.exif = io.blobs.exif;
+  ppf->metadata.iptc = io.blobs.iptc;
+  ppf->metadata.jumbf = io.blobs.jumbf;
+  ppf->metadata.xmp = io.blobs.xmp;
+  const bool float_out = pixel_format.data_type == JXL_TYPE_FLOAT ||
+                         pixel_format.data_type == JXL_TYPE_FLOAT16;
+  // Convert the pixels
+  ppf->frames.clear();
+  for (const auto& frame : io.frames) {
+    JXL_ASSERT(frame.metadata()->bit_depth.bits_per_sample != 0);
+    // It is ok for the frame.color().kNumPlanes to not match the
+    // number of channels on the image.
+    const uint32_t num_channels =
+        frame.metadata()->color_encoding.Channels() + has_alpha;
+    JxlPixelFormat format{/*num_channels=*/num_channels,
+                          /*data_type=*/pixel_format.data_type,
+                          /*endianness=*/pixel_format.endianness,
+                          /*align=*/pixel_format.align};
+
+    PackedFrame packed_frame(frame.oriented_xsize(), frame.oriented_ysize(),
+                             format);
+    const size_t bits_per_sample =
+        float_out ? packed_frame.color.BitsPerChannel(pixel_format.data_type)
+                  : ppf->info.bits_per_sample;
+    packed_frame.name = frame.name;
+    packed_frame.frame_info.name_length = frame.name.size();
+    // Color transform
+    ImageBundle ib = frame.Copy();
+    const ImageBundle* to_color_transform = &ib;
+    ImageMetadata metadata = io.metadata.m;
+    ImageBundle store(&metadata);
+    const ImageBundle* transformed;
+    // TODO(firsching): handle the transform here.
+    JXL_RETURN_IF_ERROR(TransformIfNeeded(*to_color_transform, c_desired,
+                                          GetJxlCms(), pool, &store,
+                                          &transformed));
+
+    JXL_RETURN_IF_ERROR(ConvertToExternal(
+        *transformed, bits_per_sample, float_out, format.num_channels,
+        format.endianness,
+        /* stride_out=*/packed_frame.color.stride, pool,
+        packed_frame.color.pixels(), packed_frame.color.pixels_size,
+        /*out_callback=*/{}, frame.metadata()->GetOrientation()));
+
+    // TODO(firsching): Convert the extra channels, beside one potential alpha
+    // channel. FIXME!
+    JXL_CHECK(frame.extra_channels().size() <= has_alpha);
+    ppf->frames.push_back(std::move(packed_frame));
+  }
+
+  return true;
+}
+}  // namespace extras
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h
new file mode 100644
index 0000000000..100adccc09
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
+#define LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
+
+// Helper functions to convert from the external image types to the internal
+// CodecInOut to help transitioning to the external types.
+
+#include <jxl/types.h>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+namespace extras {
+
+// Converts an external PackedPixelFile to the internal CodecInOut for use with
+// internal functions directly.
+Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
+                                          ThreadPool* pool, CodecInOut* io);
+
+// Converts an internal CodecInOut for use with internal function to an external
+// PackedPixelFile.
+Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
+                                          const JxlPixelFormat& pixel_format,
+                                          const ColorEncoding& c_desired,
+                                          ThreadPool* pool,
+                                          PackedPixelFile* ppf);
+}  // namespace extras
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/size_constraints.h b/third-party/libjxl/libjxl/lib/extras/size_constraints.h
new file mode 100644
index 0000000000..cf06f8cb22
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/size_constraints.h
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SIZE_CONSTRAINTS_H_
+#define LIB_JXL_SIZE_CONSTRAINTS_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints {
+  // Upper limit on pixel dimensions/area, enforced by VerifyDimensions
+  // (called from decoders). Fuzzers set smaller values to limit memory use.
+  uint32_t dec_max_xsize = 0xFFFFFFFFu;
+  uint32_t dec_max_ysize = 0xFFFFFFFFu;
+  uint64_t dec_max_pixels = 0xFFFFFFFFu;  // Might be up to ~0ull
+};
+
+template <typename T,
+          class = typename std::enable_if<std::is_unsigned<T>::value>::type>
+Status VerifyDimensions(const SizeConstraints* constraints, T xs, T ys) {
+  if (!constraints) return true;
+
+  if (xs == 0 || ys == 0) return JXL_FAILURE("Empty image.");
+  if (xs > constraints->dec_max_xsize) return JXL_FAILURE("Image too wide.");
+  if (ys > constraints->dec_max_ysize) return JXL_FAILURE("Image too tall.");
+
+  const uint64_t num_pixels = static_cast<uint64_t>(xs) * ys;
+  if (num_pixels > constraints->dec_max_pixels) {
+    return JXL_FAILURE("Image too big.");
+  }
+
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_SIZE_CONSTRAINTS_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/time.cc b/third-party/libjxl/libjxl/lib/extras/time.cc
new file mode 100644
index 0000000000..73d1b8f260
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/time.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/time.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <ctime>
+
+#include "lib/jxl/base/os_macros.h"  // for JXL_OS_*
+
+#if JXL_OS_WIN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+#endif  // JXL_OS_WIN
+
+#if JXL_OS_MAC
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif  // JXL_OS_MAC
+
+#if JXL_OS_HAIKU
+#include <OS.h>
+#endif  // JXL_OS_HAIKU
+
+namespace jxl {
+
+double Now() {
+#if JXL_OS_WIN
+  LARGE_INTEGER counter;
+  (void)QueryPerformanceCounter(&counter);
+  LARGE_INTEGER freq;
+  (void)QueryPerformanceFrequency(&freq);
+  return double(counter.QuadPart) / freq.QuadPart;
+#elif JXL_OS_MAC
+  const auto t = mach_absolute_time();
+  // On OSX/iOS platform the elapsed time is cpu time unit
+  // We have to query the time base information to convert it back
+  // See https://developer.apple.com/library/mac/qa/qa1398/_index.html
+  static mach_timebase_info_data_t timebase;
+  if (timebase.denom == 0) {
+    (void)mach_timebase_info(&timebase);
+  }
+  return double(t) * timebase.numer / timebase.denom * 1E-9;
+#elif JXL_OS_HAIKU
+  return double(system_time_nsecs()) * 1E-9;
+#else
+  timespec t;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return t.tv_sec + t.tv_nsec * 1E-9;
+#endif
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/extras/time.h b/third-party/libjxl/libjxl/lib/extras/time.h
new file mode 100644
index 0000000000..c71414b877
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/time.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TIME_H_
+#define LIB_EXTRAS_TIME_H_
+
+// OS-specific function for timing.
+
+namespace jxl {
+
+// Returns current time [seconds] from a monotonic clock with unspecified
+// starting point - only suitable for computing elapsed time.
+double Now();
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_TIME_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc b/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc
new file mode 100644
index 0000000000..1cdd6ed826
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/tone_mapping.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/extras/tone_mapping.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_bundle.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+static constexpr float rec2020_luminances[3] = {0.2627f, 0.6780f, 0.0593f};
+
+Status ToneMapFrame(const std::pair<float, float> display_nits,
+                    ImageBundle* const ib, ThreadPool* const pool) {
+  // Perform tone mapping as described in Report ITU-R BT.2390-8, section 5.4
+  // (pp. 23-25).
+  // https://www.itu.int/pub/R-REP-BT.2390-8-2020
+
+  HWY_FULL(float) df;
+  using V = decltype(Zero(df));
+
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+  JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+  Rec2408ToneMapper<decltype(df)> tone_mapper(
+      {ib->metadata()->tone_mapping.min_nits,
+       ib->metadata()->IntensityTarget()},
+      display_nits, rec2020_luminances);
+
+  return RunOnPool(
+      pool, 0, ib->ysize(), ThreadPool::NoInit,
+      [&](const uint32_t y, size_t /* thread */) {
+        float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y);
+        float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y);
+        float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y);
+        for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) {
+          V red = Load(df, row_r + x);
+          V green = Load(df, row_g + x);
+          V blue = Load(df, row_b + x);
+          tone_mapper.ToneMap(&red, &green, &blue);
+          Store(red, df, row_r + x);
+          Store(green, df, row_g + x);
+          Store(blue, df, row_b + x);
+        }
+      },
+      "ToneMap");
+}
+
+Status GamutMapFrame(ImageBundle* const ib, float preserve_saturation,
+                     ThreadPool* const pool) {
+  HWY_FULL(float) df;
+  using V = decltype(Zero(df));
+
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+  JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, ib->ysize(), ThreadPool::NoInit,
+      [&](const uint32_t y, size_t /* thread*/) {
+        float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y);
+        float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y);
+        float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y);
+        for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) {
+          V red = Load(df, row_r + x);
+          V green = Load(df, row_g + x);
+          V blue = Load(df, row_b + x);
+          GamutMap(&red, &green, &blue, rec2020_luminances,
+                   preserve_saturation);
+          Store(red, df, row_r + x);
+          Store(green, df, row_g + x);
+          Store(blue, df, row_b + x);
+        }
+      },
+      "GamutMap"));
+
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+namespace {
+HWY_EXPORT(ToneMapFrame);
+HWY_EXPORT(GamutMapFrame);
+}  // namespace
+
+Status ToneMapTo(const std::pair<float, float> display_nits,
+                 CodecInOut* const io, ThreadPool* const pool) {
+  const auto tone_map_frame = HWY_DYNAMIC_DISPATCH(ToneMapFrame);
+  for (ImageBundle& ib : io->frames) {
+    JXL_RETURN_IF_ERROR(tone_map_frame(display_nits, &ib, pool));
+  }
+  io->metadata.m.SetIntensityTarget(display_nits.second);
+  return true;
+}
+
+Status GamutMap(CodecInOut* const io, float preserve_saturation,
+                ThreadPool* const pool) {
+  const auto gamut_map_frame = HWY_DYNAMIC_DISPATCH(GamutMapFrame);
+  for (ImageBundle& ib : io->frames) {
+    JXL_RETURN_IF_ERROR(gamut_map_frame(&ib, preserve_saturation, pool));
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping.h b/third-party/libjxl/libjxl/lib/extras/tone_mapping.h
new file mode 100644
index 0000000000..1f474101eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TONE_MAPPING_H_
+#define LIB_EXTRAS_TONE_MAPPING_H_
+
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+// Important: after calling this, the result will contain many out-of-gamut
+// colors. It is very strongly recommended to call GamutMap afterwards to
+// rectify this.
+Status ToneMapTo(std::pair<float, float> display_nits, CodecInOut* io,
+                 ThreadPool* pool = nullptr);
+
+// `preserve_saturation` indicates to what extent to favor saturation over
+// luminance when mapping out-of-gamut colors to Rec. 2020. 0 preserves
+// luminance at the complete expense of saturation, while 1 gives the most
+// saturated color with the same hue that Rec. 2020 can represent even if it
+// means lowering the luminance. Values in between correspond to linear mixtures
+// of those two extremes.
+Status GamutMap(CodecInOut* io, float preserve_saturation,
+                ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_TONE_MAPPING_H_
diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc b/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc
new file mode 100644
index 0000000000..720d2ad0a9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc
@@ -0,0 +1,42 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/extras/codec.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/enc_color_management.h"
+
+namespace jxl {
+
+static void BM_ToneMapping(benchmark::State& state) {
+  Image3F color(2268, 1512);
+  FillImage(0.5f, &color);
+
+  // Use linear Rec. 2020 so that `ToneMapTo` doesn't have to convert to it and
+  // we mainly measure the tone mapping itself.
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_CHECK(linear_rec2020.CreateICC());
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    CodecInOut tone_mapping_input;
+    Image3F color2(color.xsize(), color.ysize());
+    CopyImageTo(color, &color2);
+    tone_mapping_input.SetFromImage(std::move(color2), linear_rec2020);
+    tone_mapping_input.metadata.m.SetIntensityTarget(255);
+    state.ResumeTiming();
+
+    JXL_CHECK(ToneMapTo({0.1, 100}, &tone_mapping_input));
+  }
+
+  state.SetItemsProcessed(state.iterations() * color.xsize() * color.ysize());
+}
+BENCHMARK(BM_ToneMapping);
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/gbench_main.cc b/third-party/libjxl/libjxl/lib/gbench_main.cc
new file mode 100644
index 0000000000..1cc1772017
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/gbench_main.cc
@@ -0,0 +1,8 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+
+BENCHMARK_MAIN();
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h b/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h
new file mode 100644
index 0000000000..491f373829
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h
@@ -0,0 +1,252 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file cms_interface.h
+ * @brief Interface to allow the injection of different color management systems
+ * (CMSes, also called color management modules, or CMMs) in JPEG XL.
+ *
+ * A CMS is needed by the JPEG XL encoder and decoder to perform colorspace
+ * conversions. This defines an interface that can be implemented for different
+ * CMSes and then passed to the library.
+ */
+
+#ifndef JXL_CMS_INTERFACE_H_
+#define JXL_CMS_INTERFACE_H_
+
+#include <jxl/color_encoding.h>
+#include <jxl/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parses an ICC profile and populates @p c and @p cmyk with the data.
+ *
+ * @param user_data JxlCmsInterface::set_fields_data passed as-is.
+ * @param icc_data the ICC data to parse.
+ * @param icc_size how many bytes of icc_data are valid.
+ * @param c a JxlColorEncoding to populate if applicable.
+ * @param cmyk a boolean to set to whether the colorspace is a CMYK colorspace.
+ * @return Whether the relevant fields in @p c were successfully populated.
+ */
+typedef JXL_BOOL (*jpegxl_cms_set_fields_from_icc_func)(void* user_data,
+                                                        const uint8_t* icc_data,
+                                                        size_t icc_size,
+                                                        JxlColorEncoding* c,
+                                                        JXL_BOOL* cmyk);
+
+/** Represents an input or output colorspace to a color transform, as a
+ * serialized ICC profile. */
+typedef struct {
+  /** The serialized ICC profile. This is guaranteed to be present and valid. */
+  struct {
+    const uint8_t* data;
+    size_t size;
+  } icc;
+
+  /** Structured representation of the colorspace, if applicable. If all fields
+   * are different from their "unknown" value, then this is equivalent to the
+   * ICC representation of the colorspace. If some are "unknown", those that are
+   * not are still valid and can still be used on their own if they are useful.
+   */
+  JxlColorEncoding color_encoding;
+
+  /** Number of components per pixel. This can be deduced from the other
+   * representations of the colorspace but is provided for convenience and
+   * validation. */
+  size_t num_channels;
+} JxlColorProfile;
+
+/** Allocates and returns the data needed for @p num_threads parallel transforms
+ * from the @p input colorspace to @p output, with up to @p pixels_per_thread
+ * pixels to transform per call to JxlCmsInterface::run. @p init_data comes
+ * directly from the JxlCmsInterface instance. Since @c run only receives the
+ * data returned by @c init, a reference to @p init_data should be kept there
+ * if access to it is desired in @c run. Likewise for JxlCmsInterface::destroy.
+ *
+ * The ICC data in @p input and @p output is guaranteed to outlive the @c init /
+ * @c run / @c destroy cycle.
+ *
+ * @param init_data JxlCmsInterface::init_data passed as-is.
+ * @param num_threads the maximum number of threads from which
+ *        JxlCmsInterface::run will be called.
+ * @param pixels_per_thread the maximum number of pixels that each call to
+ *        JxlCmsInterface::run will have to transform.
+ * @param input_profile the input colorspace for the transform.
+ * @param output_profile the colorspace to which JxlCmsInterface::run should
+ *        convert the input data.
+ * @param intensity_target for colorspaces where luminance is relative
+ *        (essentially: not PQ), indicates the luminance at which (1, 1, 1) will
+ *        be displayed. This is useful for conversions between PQ and a relative
+ *        luminance colorspace, in either direction: @p intensity_target cd/m²
+ *        in PQ should map to and from (1, 1, 1) in the relative one.\n
+ *        It is also used for conversions to and from HLG, as it is
+ *        scene-referred while other colorspaces are assumed to be
+ *        display-referred. That is, conversions from HLG should apply the OOTF
+ *        for a peak display luminance of @p intensity_target, and conversions
+ *        to HLG should undo it. The OOTF is a gamma function applied to the
+ *        luminance channel (https://www.itu.int/rec/R-REC-BT.2100-2-201807-I
+ *        page 7), with the gamma value computed as
+ *        <tt>1.2 * 1.111^log2(intensity_target / 1000)</tt> (footnote 2 page 8
+ *        of the same document).
+ * @return The data needed for the transform, or @c NULL in case of failure.
+ *         This will be passed to the other functions as @c user_data.
+ */
+typedef void* (*jpegxl_cms_init_func)(void* init_data, size_t num_threads,
+                                      size_t pixels_per_thread,
+                                      const JxlColorProfile* input_profile,
+                                      const JxlColorProfile* output_profile,
+                                      float intensity_target);
+
+/** Returns a buffer that can be used by callers of the interface to store the
+ * input of the conversion or read its result, if they pass it as the input or
+ * output of the @c run function.
+ * @param user_data the data returned by @c init.
+ * @param thread the index of the thread for which to return a buffer.
+ * @return A buffer that can be used by the caller for passing to @c run.
+ */
+typedef float* (*jpegxl_cms_get_buffer_func)(void* user_data, size_t thread);
+
+/** Executes one transform and returns true on success or false on error. It
+ * must be possible to call this from different threads with different values
+ * for @p thread, all between 0 (inclusive) and the value of @p num_threads
+ * passed to @c init (exclusive). It is allowed to implement this by locking
+ * such that the transforms are essentially performed sequentially, if such a
+ * performance profile is acceptable. @p user_data is the data returned by
+ * @c init.
+ * The buffers each contain @p num_pixels × @c num_channels interleaved floating
+ * point (0..1) samples where @c num_channels is the number of color channels of
+ * their respective color profiles. It is guaranteed that the only case in which
+ * they might overlap is if the output has fewer channels than the input, in
+ * which case the pointers may be identical.
+ * For CMYK data, 0 represents the maximum amount of ink while 1 represents no
+ * ink.
+ * @param user_data the data returned by @c init.
+ * @param thread the index of the thread from which the function is being
+ *        called.
+ * @param input_buffer the buffer containing the pixel data to be transformed.
+ * @param output_buffer the buffer receiving the transformed pixel data.
+ * @param num_pixels the number of pixels to transform from @p input to
+ * @p output.
+ * @return JXL_TRUE on success, JXL_FALSE on failure.
+ */
+typedef JXL_BOOL (*jpegxl_cms_run_func)(void* user_data, size_t thread,
+                                        const float* input_buffer,
+                                        float* output_buffer,
+                                        size_t num_pixels);
+
+/** Performs the necessary clean-up and frees the memory allocated for user
+ * data.
+ */
+typedef void (*jpegxl_cms_destroy_func)(void*);
+
+/**
+ * Interface for performing colorspace transforms. The @c init function can be
+ * called several times to instantiate several transforms, including before
+ * other transforms have been destroyed.
+ *
+ * The call sequence for a given colorspace transform could look like the
+ * following:
+ * @dot
+ * digraph calls {
+ *   newrank = true
+ *   node [shape = box, fontname = monospace]
+ *   init [label = "user_data <- init(\l\
+ *     init_data = data,\l\
+ *     num_threads = 3,\l\
+ *     pixels_per_thread = 20,\l\
+ *     input = (sRGB, 3 channels),\l\
+ *     output = (Display-P3, 3 channels),\l\
+ *     intensity_target = 255\l\
+ *   )\l"]
+ *   subgraph cluster_0 {
+ *   color = lightgrey
+ *   label = "thread 1"
+ *   labeljust = "c"
+ *   run_1_1 [label = "run(\l\
+ *     user_data,\l\
+ *     thread = 1,\l\
+ *     input = in[0],\l\
+ *     output = out[0],\l\
+ *     num_pixels = 20\l\
+ *   )\l"]
+ *   run_1_2 [label = "run(\l\
+ *     user_data,\l\
+ *     thread = 1,\l\
+ *     input = in[3],\l\
+ *     output = out[3],\l\
+ *     num_pixels = 20\l\
+ *   )\l"]
+ *   }
+ *   subgraph cluster_1 {
+ *   color = lightgrey
+ *   label = "thread 2"
+ *   labeljust = "l"
+ *   run_2_1 [label = "run(\l\
+ *     user_data,\l\
+ *     thread = 2,\l\
+ *     input = in[1],\l\
+ *     output = out[1],\l\
+ *     num_pixels = 20\l\
+ *   )\l"]
+ *   run_2_2 [label = "run(\l\
+ *     user_data,\l\
+ *     thread = 2,\l\
+ *     input = in[4],\l\
+ *     output = out[4],\l\
+ *     num_pixels = 13\l\
+ *   )\l"]
+ *   }
+ *   subgraph cluster_3 {
+ *   color = lightgrey
+ *   label = "thread 3"
+ *   labeljust = "c"
+ *   run_3_1 [label = "run(\l\
+ *     user_data,\l\
+ *     thread = 3,\l\
+ *     input = in[2],\l\
+ *     output = out[2],\l\
+ *     num_pixels = 20\l\
+ *   )\l"]
+ *   }
+ *   init -> {run_1_1; run_2_1; run_3_1; rank = same}
+ *   run_1_1 -> run_1_2
+ *   run_2_1 -> run_2_2
+ *   {run_1_2; run_2_2, run_3_1} -> "destroy(user_data)"
+ * }
+ * @enddot
+ */
+typedef struct {
+  /** CMS-specific data that will be passed to @ref set_fields_from_icc. */
+  void* set_fields_data;
+  /** Populates a JxlColorEncoding from an ICC profile. */
+  jpegxl_cms_set_fields_from_icc_func set_fields_from_icc;
+
+  /** CMS-specific data that will be passed to @ref init. */
+  void* init_data;
+  /** Prepares a colorspace transform as described in the documentation of @ref
+   * jpegxl_cms_init_func. */
+  jpegxl_cms_init_func init;
+  /** Returns a buffer that can be used as input to @c run. */
+  jpegxl_cms_get_buffer_func get_src_buf;
+  /** Returns a buffer that can be used as output from @c run. */
+  jpegxl_cms_get_buffer_func get_dst_buf;
+  /** Executes the transform on a batch of pixels, per @ref jpegxl_cms_run_func.
+   */
+  jpegxl_cms_run_func run;
+  /** Cleans up the transform. */
+  jpegxl_cms_destroy_func destroy;
+} JxlCmsInterface;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_CMS_INTERFACE_H_ */
+
+/** @} */
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h b/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h
new file mode 100644
index 0000000000..66dd7df4ce
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h
@@ -0,0 +1,430 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file codestream_header.h
+ * @brief Definitions of structs and enums for the metadata from the JPEG XL
+ * codestream headers (signature, metadata, preview dimensions, ...), excluding
+ * color encoding which is in color_encoding.h.
+ */
+
+#ifndef JXL_CODESTREAM_HEADER_H_
+#define JXL_CODESTREAM_HEADER_H_
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Image orientation metadata.
+ * Values 1..8 match the EXIF definitions.
+ * The name indicates the operation to perform to transform from the encoded
+ * image to the display image.
+ */
+typedef enum {
+  JXL_ORIENT_IDENTITY = 1,
+  JXL_ORIENT_FLIP_HORIZONTAL = 2,
+  JXL_ORIENT_ROTATE_180 = 3,
+  JXL_ORIENT_FLIP_VERTICAL = 4,
+  JXL_ORIENT_TRANSPOSE = 5,
+  JXL_ORIENT_ROTATE_90_CW = 6,
+  JXL_ORIENT_ANTI_TRANSPOSE = 7,
+  JXL_ORIENT_ROTATE_90_CCW = 8,
+} JxlOrientation;
+
+/** Given type of an extra channel.
+ */
+typedef enum {
+  JXL_CHANNEL_ALPHA,
+  JXL_CHANNEL_DEPTH,
+  JXL_CHANNEL_SPOT_COLOR,
+  JXL_CHANNEL_SELECTION_MASK,
+  JXL_CHANNEL_BLACK,
+  JXL_CHANNEL_CFA,
+  JXL_CHANNEL_THERMAL,
+  JXL_CHANNEL_RESERVED0,
+  JXL_CHANNEL_RESERVED1,
+  JXL_CHANNEL_RESERVED2,
+  JXL_CHANNEL_RESERVED3,
+  JXL_CHANNEL_RESERVED4,
+  JXL_CHANNEL_RESERVED5,
+  JXL_CHANNEL_RESERVED6,
+  JXL_CHANNEL_RESERVED7,
+  JXL_CHANNEL_UNKNOWN,
+  JXL_CHANNEL_OPTIONAL
+} JxlExtraChannelType;
+
+/** The codestream preview header */
+typedef struct {
+  /** Preview width in pixels */
+  uint32_t xsize;
+
+  /** Preview height in pixels */
+  uint32_t ysize;
+} JxlPreviewHeader;
+
+/** The codestream animation header, optionally present in the beginning of
+ * the codestream, and if it is it applies to all animation frames, unlike
+ * JxlFrameHeader which applies to an individual frame.
+ */
+typedef struct {
+  /** Numerator of ticks per second of a single animation frame time unit */
+  uint32_t tps_numerator;
+
+  /** Denominator of ticks per second of a single animation frame time unit */
+  uint32_t tps_denominator;
+
+  /** Amount of animation loops, or 0 to repeat infinitely */
+  uint32_t num_loops;
+
+  /** Whether animation time codes are present at animation frames in the
+   * codestream */
+  JXL_BOOL have_timecodes;
+} JxlAnimationHeader;
+
+/** Basic image information. This information is available from the file
+ * signature and first part of the codestream header.
+ */
+typedef struct {
+  /* TODO(lode): need additional fields for (transcoded) JPEG? For reusable
+   * fields orientation must be read from Exif APP1. For has_icc_profile: must
+   * look up where ICC profile is guaranteed to be in a JPEG file to be able to
+   * indicate this. */
+
+  /* TODO(lode): make struct packed, and/or make this opaque struct with getter
+   * functions (still separate struct from opaque decoder) */
+
+  /** Whether the codestream is embedded in the container format. If true,
+   * metadata information and extensions may be available in addition to the
+   * codestream.
+   */
+  JXL_BOOL have_container;
+
+  /** Width of the image in pixels, before applying orientation.
+   */
+  uint32_t xsize;
+
+  /** Height of the image in pixels, before applying orientation.
+   */
+  uint32_t ysize;
+
+  /** Original image color channel bit depth.
+   */
+  uint32_t bits_per_sample;
+
+  /** Original image color channel floating point exponent bits, or 0 if they
+   * are unsigned integer. For example, if the original data is half-precision
+   * (binary16) floating point, bits_per_sample is 16 and
+   * exponent_bits_per_sample is 5, and so on for other floating point
+   * precisions.
+   */
+  uint32_t exponent_bits_per_sample;
+
+  /** Upper bound on the intensity level present in the image in nits. For
+   * unsigned integer pixel encodings, this is the brightness of the largest
+   * representable value. The image does not necessarily contain a pixel
+   * actually this bright. An encoder is allowed to set 255 for SDR images
+   * without computing a histogram.
+   * Leaving this set to its default of 0 lets libjxl choose a sensible default
+   * value based on the color encoding.
+   */
+  float intensity_target;
+
+  /** Lower bound on the intensity level present in the image. This may be
+   * loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+   * decoder will map [min_nits, intensity_target] to the display range.
+   */
+  float min_nits;
+
+  /** See the description of @see linear_below.
+   */
+  JXL_BOOL relative_to_max_display;
+
+  /** The tone mapping will leave unchanged (linear mapping) any pixels whose
+   * brightness is strictly below this. The interpretation depends on
+   * relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+   * display brightness [nits], otherwise an absolute brightness [nits].
+   */
+  float linear_below;
+
+  /** Whether the data in the codestream is encoded in the original color
+   * profile that is attached to the codestream metadata header, or is
+   * encoded in an internally supported absolute color space (which the decoder
+   * can always convert to linear or non-linear sRGB or to XYB). If the original
+   * profile is used, the decoder outputs pixel data in the color space matching
+   * that profile, but doesn't convert it to any other color space. If the
+   * original profile is not used, the decoder only outputs the data as sRGB
+   * (linear if outputting to floating point, nonlinear with standard sRGB
+   * transfer function if outputting to unsigned integers) but will not convert
+   * it to to the original color profile. The decoder also does not convert to
+   * the target display color profile. To convert the pixel data produced by
+   * the decoder to the original color profile, one of the JxlDecoderGetColor*
+   * functions needs to be called with @ref JXL_COLOR_PROFILE_TARGET_DATA to get
+   * the color profile of the decoder output, and then an external CMS can be
+   * used for conversion.
+   * Note that for lossy compression, this should be set to false for most use
+   * cases, and if needed, the image should be converted to the original color
+   * profile after decoding, as described above.
+   */
+  JXL_BOOL uses_original_profile;
+
+  /** Indicates a preview image exists near the beginning of the codestream.
+   * The preview itself or its dimensions are not included in the basic info.
+   */
+  JXL_BOOL have_preview;
+
+  /** Indicates animation frames exist in the codestream. The animation
+   * information is not included in the basic info.
+   */
+  JXL_BOOL have_animation;
+
+  /** Image orientation, value 1-8 matching the values used by JEITA CP-3451C
+   * (Exif version 2.3).
+   */
+  JxlOrientation orientation;
+
+  /** Number of color channels encoded in the image, this is either 1 for
+   * grayscale data, or 3 for colored data. This count does not include
+   * the alpha channel or other extra channels. To check presence of an alpha
+   * channel, such as in the case of RGBA color, check alpha_bits != 0.
+   * If and only if this is 1, the JxlColorSpace in the JxlColorEncoding is
+   * JXL_COLOR_SPACE_GRAY.
+   */
+  uint32_t num_color_channels;
+
+  /** Number of additional image channels. This includes the main alpha channel,
+   * but can also include additional channels such as depth, additional alpha
+   * channels, spot colors, and so on. Information about the extra channels
+   * can be queried with JxlDecoderGetExtraChannelInfo. The main alpha channel,
+   * if it exists, also has its information available in the alpha_bits,
+   * alpha_exponent_bits and alpha_premultiplied fields in this JxlBasicInfo.
+   */
+  uint32_t num_extra_channels;
+
+  /** Bit depth of the encoded alpha channel, or 0 if there is no alpha channel.
+   * If present, matches the alpha_bits value of the JxlExtraChannelInfo
+   * associated with this alpha channel.
+   */
+  uint32_t alpha_bits;
+
+  /** Alpha channel floating point exponent bits, or 0 if they are unsigned. If
+   * present, matches the alpha_bits value of the JxlExtraChannelInfo associated
+   * with this alpha channel. integer.
+   */
+  uint32_t alpha_exponent_bits;
+
+  /** Whether the alpha channel is premultiplied. Only used if there is a main
+   * alpha channel. Matches the alpha_premultiplied value of the
+   * JxlExtraChannelInfo associated with this alpha channel.
+   */
+  JXL_BOOL alpha_premultiplied;
+
+  /** Dimensions of encoded preview image, only used if have_preview is
+   * JXL_TRUE.
+   */
+  JxlPreviewHeader preview;
+
+  /** Animation header with global animation properties for all frames, only
+   * used if have_animation is JXL_TRUE.
+   */
+  JxlAnimationHeader animation;
+
+  /** Intrinsic width of the image.
+   * The intrinsic size can be different from the actual size in pixels
+   * (as given by xsize and ysize) and it denotes the recommended dimensions
+   * for displaying the image, i.e. applications are advised to resample the
+   * decoded image to the intrinsic dimensions.
+   */
+  uint32_t intrinsic_xsize;
+
+  /** Intrinsic height of the image.
+   * The intrinsic size can be different from the actual size in pixels
+   * (as given by xsize and ysize) and it denotes the recommended dimensions
+   * for displaying the image, i.e. applications are advised to resample the
+   * decoded image to the intrinsic dimensions.
+   */
+  uint32_t intrinsic_ysize;
+
+  /** Padding for forwards-compatibility, in case more fields are exposed
+   * in a future version of the library.
+   */
+  uint8_t padding[100];
+} JxlBasicInfo;
+
+/** Information for a single extra channel.
+ */
+typedef struct {
+  /** Given type of an extra channel.
+   */
+  JxlExtraChannelType type;
+
+  /** Total bits per sample for this channel.
+   */
+  uint32_t bits_per_sample;
+
+  /** Floating point exponent bits per channel, or 0 if they are unsigned
+   * integer.
+   */
+  uint32_t exponent_bits_per_sample;
+
+  /** The exponent the channel is downsampled by on each axis.
+   * TODO(lode): expand this comment to match the JPEG XL specification,
+   * specify how to upscale, how to round the size computation, and to which
+   * extra channels this field applies.
+   */
+  uint32_t dim_shift;
+
+  /** Length of the extra channel name in bytes, or 0 if no name.
+   * Excludes null termination character.
+   */
+  uint32_t name_length;
+
+  /** Whether alpha channel uses premultiplied alpha. Only applicable if
+   * type is JXL_CHANNEL_ALPHA.
+   */
+  JXL_BOOL alpha_premultiplied;
+
+  /** Spot color of the current spot channel in linear RGBA. Only applicable if
+   * type is JXL_CHANNEL_SPOT_COLOR.
+   */
+  float spot_color[4];
+
+  /** Only applicable if type is JXL_CHANNEL_CFA.
+   * TODO(lode): add comment about the meaning of this field.
+   */
+  uint32_t cfa_channel;
+} JxlExtraChannelInfo;
+
+/* TODO(lode): add API to get the codestream header extensions. */
+/** Extensions in the codestream header. */
+typedef struct {
+  /** Extension bits. */
+  uint64_t extensions;
+} JxlHeaderExtensions;
+
+/** Frame blend modes.
+ * When decoding, if coalescing is enabled (default), this can be ignored.
+ */
+typedef enum {
+  JXL_BLEND_REPLACE = 0,
+  JXL_BLEND_ADD = 1,
+  JXL_BLEND_BLEND = 2,
+  JXL_BLEND_MULADD = 3,
+  JXL_BLEND_MUL = 4,
+} JxlBlendMode;
+
+/** The information about blending the color channels or a single extra channel.
+ * When decoding, if coalescing is enabled (default), this can be ignored and
+ * the blend mode is considered to be JXL_BLEND_REPLACE.
+ * When encoding, these settings apply to the pixel data given to the encoder.
+ */
+typedef struct {
+  /** Blend mode.
+   */
+  JxlBlendMode blendmode;
+  /** Reference frame ID to use as the 'bottom' layer (0-3).
+   */
+  uint32_t source;
+  /** Which extra channel to use as the 'alpha' channel for blend modes
+   * JXL_BLEND_BLEND and JXL_BLEND_MULADD.
+   */
+  uint32_t alpha;
+  /** Clamp values to [0,1] for the purpose of blending.
+   */
+  JXL_BOOL clamp;
+} JxlBlendInfo;
+
+/** The information about layers.
+ * When decoding, if coalescing is enabled (default), this can be ignored.
+ * When encoding, these settings apply to the pixel data given to the encoder,
+ * the encoder could choose an internal representation that differs.
+ */
+typedef struct {
+  /** Whether cropping is applied for this frame. When decoding, if false,
+   * crop_x0 and crop_y0 are set to zero, and xsize and ysize to the main
+   * image dimensions. When encoding and this is false, those fields are
+   * ignored. When decoding, if coalescing is enabled (default), this is always
+   * false, regardless of the internal encoding in the JPEG XL codestream.
+   */
+  JXL_BOOL have_crop;
+
+  /** Horizontal offset of the frame (can be negative).
+   */
+  int32_t crop_x0;
+
+  /** Vertical offset of the frame (can be negative).
+   */
+  int32_t crop_y0;
+
+  /** Width of the frame (number of columns).
+   */
+  uint32_t xsize;
+
+  /** Height of the frame (number of rows).
+   */
+  uint32_t ysize;
+
+  /** The blending info for the color channels. Blending info for extra channels
+   * has to be retrieved separately using JxlDecoderGetExtraChannelBlendInfo.
+   */
+  JxlBlendInfo blend_info;
+
+  /** After blending, save the frame as reference frame with this ID (0-3).
+   * Special case: if the frame duration is nonzero, ID 0 means "will not be
+   * referenced in the future". This value is not used for the last frame.
+   * When encoding, ID 3 is reserved to frames that are generated internally by
+   * the encoder, and should not be used by applications.
+   */
+  uint32_t save_as_reference;
+} JxlLayerInfo;
+
+/** The header of one displayed frame or non-coalesced layer. */
+typedef struct {
+  /** How long to wait after rendering in ticks. The duration in seconds of a
+   * tick is given by tps_numerator and tps_denominator in JxlAnimationHeader.
+   */
+  uint32_t duration;
+
+  /** SMPTE timecode of the current frame in form 0xHHMMSSFF, or 0. The bits are
+   * interpreted from most-significant to least-significant as hour, minute,
+   * second, and frame. If timecode is nonzero, it is strictly larger than that
+   * of a previous frame with nonzero duration. These values are only available
+   * if have_timecodes in JxlAnimationHeader is JXL_TRUE.
+   * This value is only used if have_timecodes in JxlAnimationHeader is
+   * JXL_TRUE.
+   */
+  uint32_t timecode;
+
+  /** Length of the frame name in bytes, or 0 if no name.
+   * Excludes null termination character. This value is set by the decoder.
+   * For the encoder, this value is ignored and @ref JxlEncoderSetFrameName is
+   * used instead to set the name and the length.
+   */
+  uint32_t name_length;
+
+  /** Indicates this is the last animation frame. This value is set by the
+   * decoder to indicate no further frames follow. For the encoder, it is not
+   * required to set this value and it is ignored, @ref JxlEncoderCloseFrames is
+   * used to indicate the last frame to the encoder instead.
+   */
+  JXL_BOOL is_last;
+
+  /** Information about the layer in case of no coalescing.
+   */
+  JxlLayerInfo layer_info;
+} JxlFrameHeader;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_CODESTREAM_HEADER_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h b/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h
new file mode 100644
index 0000000000..b16f6a01ee
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h
@@ -0,0 +1,162 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file color_encoding.h
+ * @brief Color Encoding definitions used by JPEG XL.
+ * All CIE units are for the standard 1931 2 degree observer.
+ */
+
+#ifndef JXL_COLOR_ENCODING_H_
+#define JXL_COLOR_ENCODING_H_
+
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Color space of the image data. */
+typedef enum {
+  /** Tristimulus RGB */
+  JXL_COLOR_SPACE_RGB,
+  /** Luminance based, the primaries in JxlColorEncoding must be ignored. This
+   * value implies that num_color_channels in JxlBasicInfo is 1, any other value
+   * implies num_color_channels is 3. */
+  JXL_COLOR_SPACE_GRAY,
+  /** XYB (opsin) color space */
+  JXL_COLOR_SPACE_XYB,
+  /** None of the other table entries describe the color space appropriately */
+  JXL_COLOR_SPACE_UNKNOWN,
+} JxlColorSpace;
+
+/** Built-in whitepoints for color encoding. When decoding, the numerical xy
+ * whitepoint value can be read from the JxlColorEncoding white_point field
+ * regardless of the enum value. When encoding, enum values except
+ * JXL_WHITE_POINT_CUSTOM override the numerical fields. Some enum values match
+ * a subset of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)), however the
+ * white point and RGB primaries are separate enums here.
+ */
+typedef enum {
+  /** CIE Standard Illuminant D65: 0.3127, 0.3290 */
+  JXL_WHITE_POINT_D65 = 1,
+  /** White point must be read from the JxlColorEncoding white_point field, or
+   * as ICC profile. This enum value is not an exact match of the corresponding
+   * CICP value. */
+  JXL_WHITE_POINT_CUSTOM = 2,
+  /** CIE Standard Illuminant E (equal-energy): 1/3, 1/3 */
+  JXL_WHITE_POINT_E = 10,
+  /** DCI-P3 from SMPTE RP 431-2: 0.314, 0.351 */
+  JXL_WHITE_POINT_DCI = 11,
+} JxlWhitePoint;
+
+/** Built-in primaries for color encoding. When decoding, the primaries can be
+ * read from the JxlColorEncoding primaries_red_xy, primaries_green_xy and
+ * primaries_blue_xy fields regardless of the enum value. When encoding, the
+ * enum values except JXL_PRIMARIES_CUSTOM override the numerical fields. Some
+ * enum values match a subset of CICP (Rec. ITU-T H.273 | ISO/IEC
+ * 23091-2:2019(E)), however the white point and RGB primaries are separate
+ * enums here.
+ */
+typedef enum {
+  /** The CIE xy values of the red, green and blue primaries are: 0.639998686,
+     0.330010138; 0.300003784, 0.600003357; 0.150002046, 0.059997204 */
+  JXL_PRIMARIES_SRGB = 1,
+  /** Primaries must be read from the JxlColorEncoding primaries_red_xy,
+   * primaries_green_xy and primaries_blue_xy fields, or as ICC profile. This
+   * enum value is not an exact match of the corresponding CICP value. */
+  JXL_PRIMARIES_CUSTOM = 2,
+  /** As specified in Rec. ITU-R BT.2100-1 */
+  JXL_PRIMARIES_2100 = 9,
+  /** As specified in SMPTE RP 431-2 */
+  JXL_PRIMARIES_P3 = 11,
+} JxlPrimaries;
+
+/** Built-in transfer functions for color encoding. Enum values match a subset
+ * of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)) unless specified
+ * otherwise. */
+typedef enum {
+  /** As specified in SMPTE RP 431-2 */
+  JXL_TRANSFER_FUNCTION_709 = 1,
+  /** None of the other table entries describe the transfer function. */
+  JXL_TRANSFER_FUNCTION_UNKNOWN = 2,
+  /** The gamma exponent is 1 */
+  JXL_TRANSFER_FUNCTION_LINEAR = 8,
+  /** As specified in IEC 61966-2-1 sRGB */
+  JXL_TRANSFER_FUNCTION_SRGB = 13,
+  /** As specified in SMPTE ST 2084 */
+  JXL_TRANSFER_FUNCTION_PQ = 16,
+  /** As specified in SMPTE ST 428-1 */
+  JXL_TRANSFER_FUNCTION_DCI = 17,
+  /** As specified in Rec. ITU-R BT.2100-1 (HLG) */
+  JXL_TRANSFER_FUNCTION_HLG = 18,
+  /** Transfer function follows power law given by the gamma value in
+     JxlColorEncoding. Not a CICP value. */
+  JXL_TRANSFER_FUNCTION_GAMMA = 65535,
+} JxlTransferFunction;
+
+/** Renderig intent for color encoding, as specified in ISO 15076-1:2010 */
+typedef enum {
+  /** vendor-specific */
+  JXL_RENDERING_INTENT_PERCEPTUAL = 0,
+  /** media-relative */
+  JXL_RENDERING_INTENT_RELATIVE,
+  /** vendor-specific */
+  JXL_RENDERING_INTENT_SATURATION,
+  /** ICC-absolute */
+  JXL_RENDERING_INTENT_ABSOLUTE,
+} JxlRenderingIntent;
+
+/** Color encoding of the image as structured information.
+ */
+typedef struct {
+  /** Color space of the image data.
+   */
+  JxlColorSpace color_space;
+
+  /** Built-in white point. If this value is JXL_WHITE_POINT_CUSTOM, must
+   * use the numerical whitepoint values from white_point_xy.
+   */
+  JxlWhitePoint white_point;
+
+  /** Numerical whitepoint values in CIE xy space. */
+  double white_point_xy[2];
+
+  /** Built-in RGB primaries. If this value is JXL_PRIMARIES_CUSTOM, must
+   * use the numerical primaries values below. This field and the custom values
+   * below are unused and must be ignored if the color space is
+   * JXL_COLOR_SPACE_GRAY or JXL_COLOR_SPACE_XYB.
+   */
+  JxlPrimaries primaries;
+
+  /** Numerical red primary values in CIE xy space. */
+  double primaries_red_xy[2];
+
+  /** Numerical green primary values in CIE xy space. */
+  double primaries_green_xy[2];
+
+  /** Numerical blue primary values in CIE xy space. */
+  double primaries_blue_xy[2];
+
+  /** Transfer function if have_gamma is 0 */
+  JxlTransferFunction transfer_function;
+
+  /** Gamma value used when transfer_function is JXL_TRANSFER_FUNCTION_GAMMA
+   */
+  double gamma;
+
+  /** Rendering intent defined for the color profile. */
+  JxlRenderingIntent rendering_intent;
+} JxlColorEncoding;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_COLOR_ENCODING_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/decode.h b/third-party/libjxl/libjxl/lib/include/jxl/decode.h
new file mode 100644
index 0000000000..5922728b07
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/decode.h
@@ -0,0 +1,1415 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_decoder
+ * @{
+ * @file decode.h
+ * @brief Decoding API for JPEG XL.
+ */
+
+#ifndef JXL_DECODE_H_
+#define JXL_DECODE_H_
+
+#include <jxl/cms_interface.h>
+#include <jxl/codestream_header.h>
+#include <jxl/color_encoding.h>
+#include <jxl/jxl_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+#include <jxl/version.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Decoder library version.
+ *
+ * @return the decoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlDecoderVersion(void);
+
+/** The result of @ref JxlSignatureCheck.
+ */
+typedef enum {
+  /** Not enough bytes were passed to determine if a valid signature was found.
+   */
+  JXL_SIG_NOT_ENOUGH_BYTES = 0,
+
+  /** No valid JPEG XL header was found. */
+  JXL_SIG_INVALID = 1,
+
+  /** A valid JPEG XL codestream signature was found, that is a JPEG XL image
+   * without container.
+   */
+  JXL_SIG_CODESTREAM = 2,
+
+  /** A valid container signature was found, that is a JPEG XL image embedded
+   * in a box format container.
+   */
+  JXL_SIG_CONTAINER = 3,
+} JxlSignature;
+
+/**
+ * JPEG XL signature identification.
+ *
+ * Checks if the passed buffer contains a valid JPEG XL signature. The passed @p
+ * buf of size
+ * @p size doesn't need to be a full image, only the beginning of the file.
+ *
+ * @return a flag indicating if a JPEG XL signature was found and what type.
+ *  - @ref JXL_SIG_NOT_ENOUGH_BYTES if not enough bytes were passed to
+ *    determine if a valid signature is there.
+ *  - @ref JXL_SIG_INVALID if no valid signature found for JPEG XL decoding.
+ *  - @ref JXL_SIG_CODESTREAM if a valid JPEG XL codestream signature was
+ *    found.
+ *  - @ref JXL_SIG_CONTAINER if a valid JPEG XL container signature was found.
+ */
+JXL_EXPORT JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len);
+
+/**
+ * Opaque structure that holds the JPEG XL decoder.
+ *
+ * Allocated and initialized with @ref JxlDecoderCreate().
+ * Cleaned up and deallocated with @ref JxlDecoderDestroy().
+ */
+typedef struct JxlDecoderStruct JxlDecoder;
+
+/**
+ * Creates an instance of @ref JxlDecoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ *        manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized @ref JxlDecoder otherwise
+ */
+JXL_EXPORT JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a @ref JxlDecoder instance, so it can be re-used for decoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with @ref JxlDecoderCreate, but the memory manager is kept.
+ *
+ * @param dec instance to be re-initialized.
+ */
+JXL_EXPORT void JxlDecoderReset(JxlDecoder* dec);
+
+/**
+ * Deinitializes and frees @ref JxlDecoder instance.
+ *
+ * @param dec instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlDecoderDestroy(JxlDecoder* dec);
+
+/**
+ * Return value for @ref JxlDecoderProcessInput.
+ * The values from @ref JXL_DEC_BASIC_INFO onwards are optional informative
+ * events that can be subscribed to, they are never returned if they
+ * have not been registered with @ref JxlDecoderSubscribeEvents.
+ */
+typedef enum {
+  /** Function call finished successfully, or decoding is finished and there is
+   * nothing more to be done.
+   *
+   * Note that @ref JxlDecoderProcessInput will return JXL_DEC_SUCCESS if all
+   * events that were registered with @ref JxlDecoderSubscribeEvents were
+   * processed, even before the end of the JPEG XL codestream.
+   *
+   * In this case, the return value @ref JxlDecoderReleaseInput will be the same
+   * as it was at the last signaled event. E.g. if JXL_DEC_FULL_IMAGE was
+   * subscribed to, then all bytes from the end of the JPEG XL codestream
+   * (including possible boxes needed for jpeg reconstruction) will be returned
+   * as unprocessed.
+   */
+  JXL_DEC_SUCCESS = 0,
+
+  /** An error occurred, for example invalid input file or out of memory.
+   * TODO(lode): add function to get error information from decoder.
+   */
+  JXL_DEC_ERROR = 1,
+
+  /** The decoder needs more input bytes to continue. Before the next @ref
+   * JxlDecoderProcessInput call, more input data must be set, by calling @ref
+   * JxlDecoderReleaseInput (if input was set previously) and then calling @ref
+   * JxlDecoderSetInput. @ref JxlDecoderReleaseInput returns how many bytes
+   * are not yet processed, before a next call to @ref JxlDecoderProcessInput
+   * all unprocessed bytes must be provided again (the address need not match,
+   * but the contents must), and more bytes must be concatenated after the
+   * unprocessed bytes.
+   * In most cases, @ref JxlDecoderReleaseInput will return no unprocessed bytes
+   * at this event, the only exceptions are if the previously set input ended
+   * within (a) the raw codestream signature, (b) the signature box, (c) a box
+   * header, or (d) the first 4 bytes of a brob, ftyp, or jxlp box. In any of
+   * these cases the number of unprocessed bytes is less than 20.
+   */
+  JXL_DEC_NEED_MORE_INPUT = 2,
+
+  /** The decoder is able to decode a preview image and requests setting a
+   * preview output buffer using @ref JxlDecoderSetPreviewOutBuffer. This occurs
+   * if @ref JXL_DEC_PREVIEW_IMAGE is requested and it is possible to decode a
+   * preview image from the codestream and the preview out buffer was not yet
+   * set. There is maximum one preview image in a codestream.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the frame header (including ToC) of the preview frame as
+   * unprocessed.
+   */
+  JXL_DEC_NEED_PREVIEW_OUT_BUFFER = 3,
+
+  /** The decoder requests an output buffer to store the full resolution image,
+   * which can be set with @ref JxlDecoderSetImageOutBuffer or with @ref
+   * JxlDecoderSetImageOutCallback. This event re-occurs for new frames if
+   * there are multiple animation frames and requires setting an output again.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the frame header (including ToC) as unprocessed.
+   */
+  JXL_DEC_NEED_IMAGE_OUT_BUFFER = 5,
+
+  /** The JPEG reconstruction buffer is too small for reconstructed JPEG
+   * codestream to fit. @ref JxlDecoderSetJPEGBuffer must be called again to
+   * make room for remaining bytes. This event may occur multiple times
+   * after @ref JXL_DEC_JPEG_RECONSTRUCTION.
+   */
+  JXL_DEC_JPEG_NEED_MORE_OUTPUT = 6,
+
+  /** The box contents output buffer is too small. @ref JxlDecoderSetBoxBuffer
+   * must be called again to make room for remaining bytes. This event may occur
+   * multiple times after @ref JXL_DEC_BOX.
+   */
+  JXL_DEC_BOX_NEED_MORE_OUTPUT = 7,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": Basic information such as image dimensions and
+   * extra channels. This event occurs max once per image.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the basic info as unprocessed (including the last byte of basic info
+   * if it did not end on a byte boundary).
+   */
+  JXL_DEC_BASIC_INFO = 0x40,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": Color encoding or ICC profile from the
+   * codestream header. This event occurs max once per image and always later
+   * than @ref JXL_DEC_BASIC_INFO and earlier than any pixel data.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the image header (which is the start of the first frame) as
+   * unprocessed.
+   */
+  JXL_DEC_COLOR_ENCODING = 0x100,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": Preview image, a small frame, decoded. This
+   * event can only happen if the image has a preview frame encoded. This event
+   * occurs max once for the codestream and always later than @ref
+   * JXL_DEC_COLOR_ENCODING and before @ref JXL_DEC_FRAME.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the preview frame as unprocessed.
+   */
+  JXL_DEC_PREVIEW_IMAGE = 0x200,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": Beginning of a frame. @ref
+   * JxlDecoderGetFrameHeader can be used at this point. A note on frames:
+   * a JPEG XL image can have internal frames that are not intended to be
+   * displayed (e.g. used for compositing a final frame), but this only returns
+   * displayed frames, unless @ref JxlDecoderSetCoalescing was set to JXL_FALSE:
+   * in that case, the individual layers are returned, without blending. Note
+   * that even when coalescing is disabled, only frames of type kRegularFrame
+   * are returned; frames of type kReferenceOnly and kLfFrame are always for
+   * internal purposes only and cannot be accessed. A displayed frame either has
+   * an animation duration or is the only or last frame in the image. This event
+   * occurs max once per displayed frame, always later than @ref
+   * JXL_DEC_COLOR_ENCODING, and always earlier than any pixel data. While
+   * JPEG XL supports encoding a single frame as the composition of multiple
+   * internal sub-frames also called frames, this event is not indicated for the
+   * internal frames.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the frame header (including ToC) as unprocessed.
+   */
+  JXL_DEC_FRAME = 0x400,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": full frame (or layer, in case coalescing is
+   * disabled) is decoded. @ref JxlDecoderSetImageOutBuffer must be used after
+   * getting the basic image information to be able to get the image pixels, if
+   * not this return status only indicates we're past this point in the
+   * codestream. This event occurs max once per frame.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the frame (or if @ref JXL_DEC_JPEG_RECONSTRUCTION is subscribed to,
+   * from the end of the last box that is needed for jpeg reconstruction) as
+   * unprocessed.
+   */
+  JXL_DEC_FULL_IMAGE = 0x1000,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": JPEG reconstruction data decoded. @ref
+   * JxlDecoderSetJPEGBuffer may be used to set a JPEG reconstruction buffer
+   * after getting the JPEG reconstruction data. If a JPEG reconstruction buffer
+   * is set a byte stream identical to the JPEG codestream used to encode the
+   * image will be written to the JPEG reconstruction buffer instead of pixels
+   * to the image out buffer. This event occurs max once per image and always
+   * before @ref JXL_DEC_FULL_IMAGE.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the 'jbrd' box as unprocessed.
+   */
+  JXL_DEC_JPEG_RECONSTRUCTION = 0x2000,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": The header of a box of the container format
+   * (BMFF) is decoded. The following API functions related to boxes can be used
+   * after this event:
+   *  - @ref JxlDecoderSetBoxBuffer and @ref JxlDecoderReleaseBoxBuffer
+   *    "JxlDecoderReleaseBoxBuffer": set and release a buffer to get the box
+   *    data.
+   *  - @ref JxlDecoderGetBoxType get the 4-character box typename.
+   *  - @ref JxlDecoderGetBoxSizeRaw get the size of the box as it appears in
+   *    the container file, not decompressed.
+   *  - @ref JxlDecoderSetDecompressBoxes to configure whether to get the box
+   *    data decompressed, or possibly compressed.
+   *
+   * Boxes can be compressed. This is so when their box type is
+   * "brob". In that case, they have an underlying decompressed box
+   * type and decompressed data. @ref JxlDecoderSetDecompressBoxes allows
+   * configuring which data to get. Decompressing requires
+   * Brotli. @ref JxlDecoderGetBoxType has a flag to get the compressed box
+   * type, which can be "brob", or the decompressed box type. If a box
+   * is not compressed (its compressed type is not "brob"), then
+   * the output decompressed box type and data is independent of what
+   * setting is configured.
+   *
+   * The buffer set with @ref JxlDecoderSetBoxBuffer must be set again for each
+   * next box to be obtained, or can be left unset to skip outputting this box.
+   * The output buffer contains the full box data when the next @ref JXL_DEC_BOX
+   * event or @ref JXL_DEC_SUCCESS occurs. @ref JXL_DEC_BOX occurs for all
+   * boxes, including non-metadata boxes such as the signature box or codestream
+   * boxes. To check whether the box is a metadata type for respectively EXIF,
+   * XMP or JUMBF, use @ref JxlDecoderGetBoxType and check for types "Exif",
+   * "xml " and "jumb" respectively.
+   *
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * start of the box header as unprocessed.
+   */
+  JXL_DEC_BOX = 0x4000,
+
+  /** Informative event by @ref JxlDecoderProcessInput
+   * "JxlDecoderProcessInput": a progressive step in decoding the frame is
+   * reached. When calling @ref JxlDecoderFlushImage at this point, the flushed
+   * image will correspond exactly to this point in decoding, and not yet
+   * contain partial results (such as partially more fine detail) of a next
+   * step. By default, this event will trigger maximum once per frame, when a
+   * 8x8th resolution (DC) image is ready (the image data is still returned at
+   * full resolution, giving upscaled DC). Use @ref
+   * JxlDecoderSetProgressiveDetail to configure more fine-grainedness. The
+   * event is not guaranteed to trigger, not all images have progressive steps
+   * or DC encoded.
+   * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+   * end of the section that was needed to produce this progressive event as
+   * unprocessed.
+   */
+  JXL_DEC_FRAME_PROGRESSION = 0x8000,
+} JxlDecoderStatus;
+
+/** Rewinds decoder to the beginning. The same input must be given again from
+ * the beginning of the file and the decoder will emit events from the beginning
+ * again. When rewinding (as opposed to @ref JxlDecoderReset), the decoder can
+ * keep state about the image, which it can use to skip to a requested frame
+ * more efficiently with @ref JxlDecoderSkipFrames. Settings such as parallel
+ * runner or subscribed events are kept. After rewind, @ref
+ * JxlDecoderSubscribeEvents can be used again, and it is feasible to leave out
+ * events that were already handled before, such as @ref JXL_DEC_BASIC_INFO
+ * and @ref JXL_DEC_COLOR_ENCODING, since they will provide the same information
+ * as before.
+ * The difference to @ref JxlDecoderReset is that some state is kept, namely
+ * settings set by a call to
+ *  - @ref JxlDecoderSetCoalescing,
+ *  - @ref JxlDecoderSetDesiredIntensityTarget,
+ *  - @ref JxlDecoderSetDecompressBoxes,
+ *  - @ref JxlDecoderSetKeepOrientation,
+ *  - @ref JxlDecoderSetUnpremultiplyAlpha,
+ *  - @ref JxlDecoderSetParallelRunner,
+ *  - @ref JxlDecoderSetRenderSpotcolors, and
+ *  - @ref JxlDecoderSubscribeEvents.
+ *
+ * @param dec decoder object
+ */
+JXL_EXPORT void JxlDecoderRewind(JxlDecoder* dec);
+
+/** Makes the decoder skip the next `amount` frames. It still needs to process
+ * the input, but will not output the frame events. It can be more efficient
+ * when skipping frames, and even more so when using this after @ref
+ * JxlDecoderRewind. If the decoder is already processing a frame (could
+ * have emitted @ref JXL_DEC_FRAME but not yet @ref JXL_DEC_FULL_IMAGE), it
+ * starts skipping from the next frame. If the amount is larger than the amount
+ * of frames remaining in the image, all remaining frames are skipped. Calling
+ * this function multiple times adds the amount to skip to the already existing
+ * amount.
+ *
+ * A frame here is defined as a frame that without skipping emits events such
+ * as @ref JXL_DEC_FRAME and @ref JXL_DEC_FULL_IMAGE, frames that are internal
+ * to the file format but are not rendered as part of an animation, or are not
+ * the final still frame of a still image, are not counted.
+ *
+ * @param dec decoder object
+ * @param amount the amount of frames to skip
+ */
+JXL_EXPORT void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount);
+
+/**
+ * Skips processing the current frame. Can be called after frame processing
+ * already started, signaled by a @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event,
+ * but before the corresponding @ref JXL_DEC_FULL_IMAGE event. The next signaled
+ * event will be another @ref JXL_DEC_FRAME, or @ref JXL_DEC_SUCCESS if there
+ * are no more frames. If pixel data is required from the already processed part
+ * of the frame, @ref JxlDecoderFlushImage must be called before this.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS if there is a frame to skip, and @ref
+ *     JXL_DEC_ERROR if the function was not called during frame processing.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * decoding.
+ *
+ * @param dec decoder object
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ *     be NULL to use the default, single-threaded, runner. A multithreaded
+ *     runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return @ref JXL_DEC_SUCCESS if the runner was set, @ref JXL_DEC_ERROR
+ *     otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque);
+
+/**
+ * Returns a hint indicating how many more bytes the decoder is expected to
+ * need to make @ref JxlDecoderGetBasicInfo available after the next @ref
+ * JxlDecoderProcessInput call. This is a suggested large enough value for
+ * the amount of bytes to provide in the next @ref JxlDecoderSetInput call, but
+ * it is not guaranteed to be an upper bound nor a lower bound. This number does
+ * not include bytes that have already been released from the input. Can be used
+ * before the first @ref JxlDecoderProcessInput call, and is correct the first
+ * time in most cases. If not, @ref JxlDecoderSizeHintBasicInfo can be called
+ * again to get an updated hint.
+ *
+ * @param dec decoder object
+ * @return the size hint in bytes if the basic info is not yet fully decoded.
+ * @return 0 when the basic info is already available.
+ */
+JXL_EXPORT size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec);
+
+/** Select for which informative events, i.e. @ref JXL_DEC_BASIC_INFO, etc., the
+ * decoder should return with a status. It is not required to subscribe to any
+ * events, data can still be requested from the decoder as soon as it available.
+ * By default, the decoder is subscribed to no events (events_wanted == 0), and
+ * the decoder will then only return when it cannot continue because it needs
+ * more input data or more output buffer. This function may only be be called
+ * before using @ref JxlDecoderProcessInput.
+ *
+ * @param dec decoder object
+ * @param events_wanted bitfield of desired events.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec,
+                                                      int events_wanted);
+
+/** Enables or disables preserving of as-in-bitstream pixeldata
+ * orientation. Some images are encoded with an Orientation tag
+ * indicating that the decoder must perform a rotation and/or
+ * mirroring to the encoded image data.
+ *
+ *  - If skip_reorientation is JXL_FALSE (the default): the decoder
+ *    will apply the transformation from the orientation setting, hence
+ *    rendering the image according to its specified intent. When
+ *    producing a JxlBasicInfo, the decoder will always set the
+ *    orientation field to JXL_ORIENT_IDENTITY (matching the returned
+ *    pixel data) and also align xsize and ysize so that they correspond
+ *    to the width and the height of the returned pixel data.
+ *  - If skip_reorientation is JXL_TRUE: the decoder will skip
+ *    applying the transformation from the orientation setting, returning
+ *    the image in the as-in-bitstream pixeldata orientation.
+ *    This may be faster to decode since the decoder doesn't have to apply the
+ *    transformation, but can cause wrong display of the image if the
+ *    orientation tag is not correctly taken into account by the user.
+ *
+ * By default, this option is disabled, and the returned pixel data is
+ * re-oriented according to the image's Orientation setting.
+ *
+ * This function must be called at the beginning, before decoding is performed.
+ *
+ * @see JxlBasicInfo for the orientation field, and @ref JxlOrientation for the
+ * possible values.
+ *
+ * @param dec decoder object
+ * @param skip_reorientation JXL_TRUE to enable, JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetKeepOrientation(JxlDecoder* dec, JXL_BOOL skip_reorientation);
+
+/**
+ * Enables or disables preserving of associated alpha channels. If
+ * unpremul_alpha is set to JXL_FALSE then for associated alpha channel, the
+ * pixel data is returned with premultiplied colors. If it is set to JXL_TRUE,
+ * The colors will be unpremultiplied based on the alpha channel. This function
+ * has no effect if the image does not have an associated alpha channel.
+ *
+ * By default, this option is disabled, and the returned pixel data "as is".
+ *
+ * This function must be called at the beginning, before decoding is performed.
+ *
+ * @param dec decoder object
+ * @param unpremul_alpha JXL_TRUE to enable, JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec, JXL_BOOL unpremul_alpha);
+
+/** Enables or disables rendering spot colors. By default, spot colors
+ * are rendered, which is OK for viewing the decoded image. If render_spotcolors
+ * is JXL_FALSE, then spot colors are not rendered, and have to be retrieved
+ * separately using @ref JxlDecoderSetExtraChannelBuffer. This is useful for
+ * e.g. printing applications.
+ *
+ * @param dec decoder object
+ * @param render_spotcolors JXL_TRUE to enable (default), JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetRenderSpotcolors(JxlDecoder* dec, JXL_BOOL render_spotcolors);
+
+/** Enables or disables coalescing of zero-duration frames. By default, frames
+ * are returned with coalescing enabled, i.e. all frames have the image
+ * dimensions, and are blended if needed. When coalescing is disabled, frames
+ * can have arbitrary dimensions, a non-zero crop offset, and blending is not
+ * performed. For display, coalescing is recommended. For loading a multi-layer
+ * still image as separate layers (as opposed to the merged image), coalescing
+ * has to be disabled.
+ *
+ * @param dec decoder object
+ * @param coalescing JXL_TRUE to enable coalescing (default), JXL_FALSE to
+ *     disable it.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec,
+                                                    JXL_BOOL coalescing);
+
+/**
+ * Decodes JPEG XL file using the available bytes. Requires input has been
+ * set with @ref JxlDecoderSetInput. After @ref JxlDecoderProcessInput, input
+ * can optionally be released with @ref JxlDecoderReleaseInput and then set
+ * again to next bytes in the stream. @ref JxlDecoderReleaseInput returns how
+ * many bytes are not yet processed, before a next call to @ref
+ * JxlDecoderProcessInput all unprocessed bytes must be provided again (the
+ * address need not match, but the contents must), and more bytes may be
+ * concatenated after the unprocessed bytes.
+ *
+ * The returned status indicates whether the decoder needs more input bytes, or
+ * more output buffer for a certain type of output data. No matter what the
+ * returned status is (other than @ref JXL_DEC_ERROR), new information, such
+ * as @ref JxlDecoderGetBasicInfo, may have become available after this call.
+ * When the return value is not @ref JXL_DEC_ERROR or @ref JXL_DEC_SUCCESS, the
+ * decoding requires more @ref JxlDecoderProcessInput calls to continue.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS when decoding finished and all events handled.
+ *     If you still have more unprocessed input data anyway, then you can still
+ *     continue by using @ref JxlDecoderSetInput and calling @ref
+ *     JxlDecoderProcessInput again, similar to handling @ref
+ *     JXL_DEC_NEED_MORE_INPUT. @ref JXL_DEC_SUCCESS can occur instead of @ref
+ *     JXL_DEC_NEED_MORE_INPUT when, for example, the input data ended right at
+ *     the boundary of a box of the container format, all essential codestream
+ *     boxes were already decoded, but extra metadata boxes are still present in
+ *     the next data. @ref JxlDecoderProcessInput cannot return success if all
+ *     codestream boxes have not been seen yet.
+ * @return @ref JXL_DEC_ERROR when decoding failed, e.g. invalid codestream.
+ *     TODO(lode): document the input data mechanism
+ * @return @ref JXL_DEC_NEED_MORE_INPUT when more input data is necessary.
+ * @return @ref JXL_DEC_BASIC_INFO when basic info such as image dimensions is
+ *     available and this informative event is subscribed to.
+ * @return @ref JXL_DEC_COLOR_ENCODING when color profile information is
+ *     available and this informative event is subscribed to.
+ * @return @ref JXL_DEC_PREVIEW_IMAGE when preview pixel information is
+ *     available and output in the preview buffer.
+ * @return @ref JXL_DEC_FULL_IMAGE when all pixel information at highest detail
+ *     is available and has been output in the pixel buffer.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec);
+
+/**
+ * Sets input data for @ref JxlDecoderProcessInput. The data is owned by the
+ * caller and may be used by the decoder until @ref JxlDecoderReleaseInput is
+ * called or the decoder is destroyed or reset so must be kept alive until then.
+ * Cannot be called if @ref JxlDecoderSetInput was already called and @ref
+ * JxlDecoderReleaseInput was not yet called, and cannot be called after @ref
+ * JxlDecoderCloseInput indicating the end of input was called.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to read from
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if input was already set without releasing or @ref
+ *     JxlDecoderCloseInput was already called, @ref JXL_DEC_SUCCESS otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec,
+                                               const uint8_t* data,
+                                               size_t size);
+
+/**
+ * Releases input which was provided with @ref JxlDecoderSetInput. Between @ref
+ * JxlDecoderProcessInput and @ref JxlDecoderReleaseInput, the user may not
+ * alter the data in the buffer. Calling @ref JxlDecoderReleaseInput is required
+ * whenever any input is already set and new input needs to be added with @ref
+ * JxlDecoderSetInput, but is not required before @ref JxlDecoderDestroy or @ref
+ * JxlDecoderReset. Calling @ref JxlDecoderReleaseInput when no input is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return The amount of bytes the decoder has not yet processed that are still
+ *     remaining in the data set by @ref JxlDecoderSetInput, or 0 if no input is
+ *     set or @ref JxlDecoderReleaseInput was already called. For a next call
+ *     to @ref JxlDecoderProcessInput, the buffer must start with these
+ *     unprocessed bytes. From this value it is possible to infer the position
+ *     of certain JPEG XL codestream elements (e.g. end of headers, frame
+ *     start/end). See the documentation of individual values of @ref
+ *     JxlDecoderStatus for more information.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseInput(JxlDecoder* dec);
+
+/**
+ * Marks the input as finished, indicates that no more @ref JxlDecoderSetInput
+ * will be called. This function allows the decoder to determine correctly if it
+ * should return success, need more input or error in certain cases. For
+ * backwards compatibility with a previous version of the API, using this
+ * function is optional when not using the @ref JXL_DEC_BOX event (the decoder
+ * is able to determine the end of the image frames without marking the end),
+ * but using this function is required when using @ref JXL_DEC_BOX for getting
+ * metadata box contents. This function does not replace @ref
+ * JxlDecoderReleaseInput, that function should still be called if its return
+ * value is needed.
+ *
+ * @ref JxlDecoderCloseInput should be called as soon as all known input bytes
+ * are set (e.g. at the beginning when not streaming but setting all input
+ * at once), before the final @ref JxlDecoderProcessInput calls.
+ *
+ * @param dec decoder object
+ */
+JXL_EXPORT void JxlDecoderCloseInput(JxlDecoder* dec);
+
+/**
+ * Outputs the basic image information, such as image dimensions, bit depth and
+ * all other JxlBasicInfo fields, if available.
+ *
+ * @param dec decoder object
+ * @param info struct to copy the information into, or NULL to only check
+ *     whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ *     in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+                                                   JxlBasicInfo* info);
+
+/**
+ * Outputs information for extra channel at the given index. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param info struct to copy the information into, or NULL to only check
+ *     whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ *     in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelInfo(
+    const JxlDecoder* dec, size_t index, JxlExtraChannelInfo* info);
+
+/**
+ * Outputs name for extra channel at the given index in UTF-8. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo. The buffer
+ * for name must have at least name_length + 1 bytes allocated, gotten from
+ * the associated JxlExtraChannelInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ *     in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+                                                          size_t index,
+                                                          char* name,
+                                                          size_t size);
+
+/** Defines which color profile to get: the profile from the codestream
+ * metadata header, which represents the color profile of the original image,
+ * or the color profile from the pixel data produced by the decoder. Both are
+ * the same if the JxlBasicInfo has uses_original_profile set.
+ */
+typedef enum {
+  /** Get the color profile of the original image from the metadata.
+   */
+  JXL_COLOR_PROFILE_TARGET_ORIGINAL = 0,
+
+  /** Get the color profile of the pixel data the decoder outputs. */
+  JXL_COLOR_PROFILE_TARGET_DATA = 1,
+} JxlColorProfileTarget;
+
+/**
+ * Outputs the color profile as JPEG XL encoded structured data, if available.
+ * This is an alternative to an ICC Profile, which can represent a more limited
+ * amount of color spaces, but represents them exactly through enum values.
+ *
+ * It is often possible to use @ref JxlDecoderGetColorAsICCProfile as an
+ * alternative anyway. The following scenarios are possible:
+ *  - The JPEG XL image has an attached ICC Profile, in that case, the encoded
+ *    structured data is not available, this function will return an error
+ *    status. @ref JxlDecoderGetColorAsICCProfile should be called instead.
+ *  - The JPEG XL image has an encoded structured color profile, and it
+ *    represents an RGB or grayscale color space. This function will return it.
+ *    You can still use @ref JxlDecoderGetColorAsICCProfile as well as an
+ *    alternative if desired, though depending on which RGB color space is
+ *    represented, the ICC profile may be a close approximation. It is also not
+ *    always feasible to deduce from an ICC profile which named color space it
+ *    exactly represents, if any, as it can represent any arbitrary space.
+ *    HDR color spaces such as those using PQ and HLG are also potentially
+ *    problematic, in that: while ICC profiles can encode a transfer function
+ *    that happens to approximate those of PQ and HLG (HLG for only one given
+ *    system gamma at a time, and necessitating a 3D LUT if gamma is to be
+ *    different from 1), they cannot (before ICCv4.4) semantically signal that
+ *    this is the color space that they represent. Therefore, they will
+ *    typically not actually be interpreted as representing an HDR color space.
+ *    This is especially detrimental to PQ which will then be interpreted as if
+ *    the maximum signal value represented SDR white instead of 10000 cd/m^2,
+ *    meaning that the image will be displayed two orders of magnitude (5-7 EV)
+ *    too dim.
+ *  - The JPEG XL image has an encoded structured color profile, and it
+ *    indicates an unknown or xyb color space. In that case, @ref
+ *    JxlDecoderGetColorAsICCProfile is not available.
+ *
+ * When rendering an image on a system where ICC-based color management is used,
+ * @ref JxlDecoderGetColorAsICCProfile should generally be used first as it will
+ * return a ready-to-use profile (with the aforementioned caveat about HDR).
+ * When knowledge about the nominal color space is desired if available, @ref
+ * JxlDecoderGetColorAsEncodedProfile should be used first.
+ *
+ * @param dec decoder object
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param color_encoding struct to copy the information into, or NULL to only
+ *     check whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the data is available and returned, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ *     case the encoded structured color profile does not exist in the
+ *     codestream.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+    const JxlDecoder* dec, JxlColorProfileTarget target,
+    JxlColorEncoding* color_encoding);
+
+/**
+ * Outputs the size in bytes of the ICC profile returned by @ref
+ * JxlDecoderGetColorAsICCProfile, if available, or indicates there is none
+ * available. In most cases, the image will have an ICC profile available, but
+ * if it does not, @ref JxlDecoderGetColorAsEncodedProfile must be used instead.
+ *
+ * @see JxlDecoderGetColorAsEncodedProfile for more information. The ICC
+ * profile is either the exact ICC profile attached to the codestream metadata,
+ * or a close approximation generated from JPEG XL encoded structured data,
+ * depending of what is encoded in the codestream.
+ *
+ * @param dec decoder object
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param size variable to output the size into, or NULL to only check the
+ *     return status.
+ * @return @ref JXL_DEC_SUCCESS if the ICC profile is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if the decoder has not yet received enough
+ *     input data to determine whether an ICC profile is available or what its
+ *     size is, @ref JXL_DEC_ERROR in case the ICC profile is not available and
+ *     cannot be generated.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetICCProfileSize(
+    const JxlDecoder* dec, JxlColorProfileTarget target, size_t* size);
+
+/**
+ * Outputs ICC profile if available. The profile is only available if @ref
+ * JxlDecoderGetICCProfileSize returns success. The output buffer must have
+ * at least as many bytes as given by @ref JxlDecoderGetICCProfileSize.
+ *
+ * @param dec decoder object
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param icc_profile buffer to copy the ICC profile into
+ * @param size size of the icc_profile buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS if the profile was successfully returned is
+ *     available, @ref JXL_DEC_NEED_MORE_INPUT if not yet available, @ref
+ *     JXL_DEC_ERROR if the profile doesn't exist or the output size is not
+ *     large enough.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsICCProfile(
+    const JxlDecoder* dec, JxlColorProfileTarget target, uint8_t* icc_profile,
+    size_t size);
+
+/** Sets the desired output color profile of the decoded image by calling
+ * @ref JxlDecoderSetOutputColorProfile, passing on @c color_encoding and
+ * setting @c icc_data to NULL. See @ref JxlDecoderSetOutputColorProfile for
+ * details.
+ *
+ * @param dec decoder object
+ * @param color_encoding the default color encoding to set
+ * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref
+ *     JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding);
+
+/** Requests that the decoder perform tone mapping to the peak display luminance
+ * passed as @c desired_intensity_target, if appropriate.
+ * @note This is provided for convenience and the exact tone mapping that is
+ * performed is not meant to be considered authoritative in any way. It may
+ * change from version to version.
+ * @param dec decoder object
+ * @param desired_intensity_target the intended target peak luminance
+ * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref
+ * JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget(
+    JxlDecoder* dec, float desired_intensity_target);
+
+/**
+ * Sets the desired output color profile of the decoded image either from a
+ * color encoding or an ICC profile. Valid calls of this function have either @c
+ * color_encoding or @c icc_data set to NULL and @c icc_size must be 0 if and
+ * only if @c icc_data is NULL.
+ *
+ * Depending on whether a color management system (CMS) has been set the
+ * behavior is as follows:
+ *
+ * If a color management system (CMS) has been set with @ref JxlDecoderSetCms,
+ * and the CMS supports output to the desired color encoding or ICC profile,
+ * then it will provide the output in that color encoding or ICC profile. If the
+ * desired color encoding or the ICC is not supported, then an error will be
+ * returned.
+ *
+ * If no CMS has been set with @ref JxlDecoderSetCms, there are two cases:
+ *
+ * (1) Calling this function with a color encoding will convert XYB images to
+ * the desired color encoding. In this case, if the requested color encoding has
+ * a narrower gamut, or the white points differ, then the resulting image can
+ * have significant color distortion. Non-XYB images will not be converted to
+ * the desired color space.
+ *
+ * (2) Calling this function with an ICC profile will result in an error.
+ *
+ * If called with an ICC profile (after a call to @ref JxlDecoderSetCms), the
+ * ICC profile has to be a valid RGB or grayscale color profile.
+ *
+ * Can only be set after the @ref JXL_DEC_COLOR_ENCODING event occurred and
+ * before any other event occurred, and should be used before getting
+ * JXL_COLOR_PROFILE_TARGET_DATA.
+ *
+ * This function must not be called before JxlDecoderSetCms.
+ *
+ * @param dec decoder orbject
+ * @param color_encoding the output color encoding
+ * @param icc_data bytes of the icc profile
+ * @param icc_size size of the icc profile in bytes
+ * @return @ref JXL_DEC_SUCCESS if the color profile was set successfully, @ref
+ *     JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetOutputColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding,
+    const uint8_t* icc_data, size_t icc_size);
+
+/**
+ * Sets the color management system (CMS) that will be used for color
+ * conversion (if applicable) during decoding. May only be set before starting
+ * decoding and must not be called after @ref JxlDecoderSetOutputColorProfile.
+ *
+ * See @ref JxlDecoderSetOutputColorProfile for how color conversions are done
+ * depending on whether or not a CMS has been set with @ref JxlDecoderSetCms.
+ *
+ * @param dec decoder object.
+ * @param cms structure representing a CMS implementation. See @ref
+ * JxlCmsInterface for more details.
+ */
+JXL_EXPORT void JxlDecoderSetCms(JxlDecoder* dec, JxlCmsInterface cms);
+// TODO(firsching): add a function JxlDecoderSetDefaultCms() for setting a
+// default in case libjxl is build with a CMS.
+
+/**
+ * Returns the minimum size in bytes of the preview image output pixel buffer
+ * for the given format. This is the buffer for @ref
+ * JxlDecoderSetPreviewOutBuffer. Requires the preview header information is
+ * available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of pixels
+ * @param size output value, buffer size in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the small resolution preview image
+ * to. The size of the buffer must be at least as large as given by @ref
+ * JxlDecoderPreviewOutBufferSize. The buffer follows the format described
+ * by JxlPixelFormat. The preview image dimensions are given by the
+ * JxlPreviewHeader. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of pixels. Object owned by user and its contents are
+ *     copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Outputs the information from the frame, such as duration when have_animation.
+ * This function can be called when @ref JXL_DEC_FRAME occurred for the current
+ * frame, even when have_animation in the JxlBasicInfo is JXL_FALSE.
+ *
+ * @param dec decoder object
+ * @param header struct to copy the information into, or NULL to only check
+ *     whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ *     case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+                                                     JxlFrameHeader* header);
+
+/**
+ * Outputs name for the current frame. The buffer for name must have at least
+ * name_length + 1 bytes allocated, gotten from the associated JxlFrameHeader.
+ *
+ * @param dec decoder object
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes, including zero termination
+ *    character, so this must be at least JxlFrameHeader.name_length + 1.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ *     JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ *     case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec,
+                                                   char* name, size_t size);
+
+/**
+ * Outputs the blend information for the current frame for a specific extra
+ * channel. This function can be called when @ref JXL_DEC_FRAME occurred for the
+ * current frame, even when have_animation in the JxlBasicInfo is JXL_FALSE.
+ * This information is only useful if coalescing is disabled; otherwise the
+ * decoder will have performed blending already.
+ *
+ * @param dec decoder object
+ * @param index the index of the extra channel
+ * @param blend_info struct to copy the information into
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo(
+    const JxlDecoder* dec, size_t index, JxlBlendInfo* blend_info);
+
+/**
+ * Returns the minimum size in bytes of the image output pixel buffer for the
+ * given format. This is the buffer for @ref JxlDecoderSetImageOutBuffer.
+ * Requires that the basic image information is available in the decoder in the
+ * case of coalescing enabled (default). In case coalescing is disabled, this
+ * can only be called after the @ref JXL_DEC_FRAME event occurs. In that case,
+ * it will return the size required to store the possibly cropped frame (which
+ * can be larger or smaller than the image dimensions).
+ *
+ * @param dec decoder object
+ * @param format format of the pixels.
+ * @param size output value, buffer size in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the full resolution image to. This can be set when
+ * the @ref JXL_DEC_FRAME event occurs, must be set when the @ref
+ * JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, and applies only for the
+ * current frame. The size of the buffer must be at least as large as given
+ * by @ref JxlDecoderImageOutBufferSize. The buffer follows the format described
+ * by JxlPixelFormat. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ *     are copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetImageOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Function type for @ref JxlDecoderSetImageOutCallback.
+ *
+ * The callback may be called simultaneously by different threads when using a
+ * threaded parallel runner, on different pixels.
+ *
+ * @param opaque optional user data, as given to @ref
+ *     JxlDecoderSetImageOutCallback.
+ * @param x horizontal position of leftmost pixel of the pixel data.
+ * @param y vertical position of the pixel data.
+ * @param num_pixels amount of pixels included in the pixel data, horizontally.
+ *     This is not the same as xsize of the full image, it may be smaller.
+ * @param pixels pixel data as a horizontal stripe, in the format passed to @ref
+ *     JxlDecoderSetImageOutCallback. The memory is not owned by the user, and
+ *     is only valid during the time the callback is running.
+ */
+typedef void (*JxlImageOutCallback)(void* opaque, size_t x, size_t y,
+                                    size_t num_pixels, const void* pixels);
+
+/**
+ * Initialization callback for @ref JxlDecoderSetMultithreadedImageOutCallback.
+ *
+ * @param init_opaque optional user data, as given to @ref
+ *     JxlDecoderSetMultithreadedImageOutCallback.
+ * @param num_threads maximum number of threads that will call the @c run
+ *     callback concurrently.
+ * @param num_pixels_per_thread maximum number of pixels that will be passed in
+ *     one call to @c run.
+ * @return a pointer to data that will be passed to the @c run callback, or
+ *     @c NULL if initialization failed.
+ */
+typedef void* (*JxlImageOutInitCallback)(void* init_opaque, size_t num_threads,
+                                         size_t num_pixels_per_thread);
+
+/**
+ * Worker callback for @ref JxlDecoderSetMultithreadedImageOutCallback.
+ *
+ * @param run_opaque user data returned by the @c init callback.
+ * @param thread_id number in `[0, num_threads)` identifying the thread of the
+ *     current invocation of the callback.
+ * @param x horizontal position of the first (leftmost) pixel of the pixel data.
+ * @param y vertical position of the pixel data.
+ * @param num_pixels number of pixels in the pixel data. May be less than the
+ *     full @c xsize of the image, and will be at most equal to the @c
+ *     num_pixels_per_thread that was passed to @c init.
+ * @param pixels pixel data as a horizontal stripe, in the format passed to @ref
+ *     JxlDecoderSetMultithreadedImageOutCallback. The data pointed to
+ *     remains owned by the caller and is only guaranteed to outlive the current
+ *     callback invocation.
+ */
+typedef void (*JxlImageOutRunCallback)(void* run_opaque, size_t thread_id,
+                                       size_t x, size_t y, size_t num_pixels,
+                                       const void* pixels);
+
+/**
+ * Destruction callback for @ref JxlDecoderSetMultithreadedImageOutCallback,
+ * called after all invocations of the @c run callback to perform any
+ * appropriate clean-up of the @c run_opaque data returned by @c init.
+ *
+ * @param run_opaque user data returned by the @c init callback.
+ */
+typedef void (*JxlImageOutDestroyCallback)(void* run_opaque);
+
+/**
+ * Sets pixel output callback. This is an alternative to @ref
+ * JxlDecoderSetImageOutBuffer. This can be set when the @ref JXL_DEC_FRAME
+ * event occurs, must be set when the @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event
+ * occurs, and applies only for the current frame. Only one of @ref
+ * JxlDecoderSetImageOutBuffer or @ref JxlDecoderSetImageOutCallback may be used
+ * for the same frame, not both at the same time.
+ *
+ * The callback will be called multiple times, to receive the image
+ * data in small chunks. The callback receives a horizontal stripe of pixel
+ * data, 1 pixel high, xsize pixels wide, called a scanline. The xsize here is
+ * not the same as the full image width, the scanline may be a partial section,
+ * and xsize may differ between calls. The user can then process and/or copy the
+ * partial scanline to an image buffer. The callback may be called
+ * simultaneously by different threads when using a threaded parallel runner, on
+ * different pixels.
+ *
+ * If @ref JxlDecoderFlushImage is not used, then each pixel will be visited
+ * exactly once by the different callback calls, during processing with one or
+ * more @ref JxlDecoderProcessInput calls. These pixels are decoded to full
+ * detail, they are not part of a lower resolution or lower quality progressive
+ * pass, but the final pass.
+ *
+ * If @ref JxlDecoderFlushImage is used, then in addition each pixel will be
+ * visited zero or one times during the blocking @ref JxlDecoderFlushImage call.
+ * Pixels visited as a result of @ref JxlDecoderFlushImage may represent a lower
+ * resolution or lower quality intermediate progressive pass of the image. Any
+ * visited pixel will be of a quality at least as good or better than previous
+ * visits of this pixel. A pixel may be visited zero times if it cannot be
+ * decoded yet or if it was already decoded to full precision (this behavior is
+ * not guaranteed).
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user; its contents are
+ *     copied internally.
+ * @param callback the callback function receiving partial scanlines of pixel
+ *     data.
+ * @param opaque optional user data, which will be passed on to the callback,
+ *     may be NULL.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such
+ *     as @ref JxlDecoderSetImageOutBuffer already set.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetImageOutCallback(JxlDecoder* dec, const JxlPixelFormat* format,
+                              JxlImageOutCallback callback, void* opaque);
+
+/** Similar to @ref JxlDecoderSetImageOutCallback except that the callback is
+ * allowed an initialization phase during which it is informed of how many
+ * threads will call it concurrently, and those calls are further informed of
+ * which thread they are occurring in.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user; its contents are
+ *     copied internally.
+ * @param init_callback initialization callback.
+ * @param run_callback the callback function receiving partial scanlines of
+ *     pixel data.
+ * @param destroy_callback clean-up callback invoked after all calls to @c
+ *     run_callback. May be NULL if no clean-up is necessary.
+ * @param init_opaque optional user data passed to @c init_callback, may be NULL
+ *     (unlike the return value from @c init_callback which may only be NULL if
+ *     initialization failed).
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such
+ *     as @ref JxlDecoderSetImageOutBuffer having already been called.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback(
+    JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback,
+    JxlImageOutDestroyCallback destroy_callback, void* init_opaque);
+
+/**
+ * Returns the minimum size in bytes of an extra channel pixel buffer for the
+ * given format. This is the buffer for @ref JxlDecoderSetExtraChannelBuffer.
+ * Requires the basic image information is available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. The num_channels value is ignored and is
+ *     always treated to be 1.
+ * @param size output value, buffer size in bytes
+ * @param index which extra channel to get, matching the index used in @ref
+ *     JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in
+ *     the associated JxlBasicInfo.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     information not available yet or invalid index.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderExtraChannelBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size,
+    uint32_t index);
+
+/**
+ * Sets the buffer to write an extra channel to. This can be set when
+ * the @ref JXL_DEC_FRAME or @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs,
+ * and applies only for the current frame. The size of the buffer must be at
+ * least as large as given by @ref JxlDecoderExtraChannelBufferSize. The buffer
+ * follows the format described by JxlPixelFormat, but where num_channels is 1.
+ * The buffer is owned by the caller. The amount of extra channels is given by
+ * the num_extra_channels field in the associated JxlBasicInfo, and the
+ * information of individual extra channels can be queried with @ref
+ * JxlDecoderGetExtraChannelInfo. To get multiple extra channels, this function
+ * must be called multiple times, once for each wanted index. Not all images
+ * have extra channels. The alpha channel is an extra channel and can be gotten
+ * as part of the color channels when using an RGBA pixel buffer with @ref
+ * JxlDecoderSetImageOutBuffer, but additionally also can be gotten
+ * separately as extra channel. The color channels themselves cannot be gotten
+ * this way.
+ *
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ *     are copied internally. The num_channels value is ignored and is always
+ *     treated to be 1.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @param index which extra channel to get, matching the index used in @ref
+ *     JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in
+ *     the associated JxlBasicInfo.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     size too small or invalid index.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec, const JxlPixelFormat* format,
+                                void* buffer, size_t size, uint32_t index);
+
+/**
+ * Sets output buffer for reconstructed JPEG codestream.
+ *
+ * The data is owned by the caller and may be used by the decoder until @ref
+ * JxlDecoderReleaseJPEGBuffer is called or the decoder is destroyed or
+ * reset so must be kept alive until then.
+ *
+ * If a JPEG buffer was set before and released with @ref
+ * JxlDecoderReleaseJPEGBuffer, bytes that the decoder has already output
+ * should not be included, only the remaining bytes output must be set.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to write to
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref
+ *     JxlDecoderReleaseJPEGBuffer was not called on it, @ref JXL_DEC_SUCCESS
+ *     otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec,
+                                                    uint8_t* data, size_t size);
+
+/**
+ * Releases buffer which was provided with @ref JxlDecoderSetJPEGBuffer.
+ *
+ * Calling @ref JxlDecoderReleaseJPEGBuffer is required whenever
+ * a buffer is already set and a new buffer needs to be added with @ref
+ * JxlDecoderSetJPEGBuffer, but is not required before @ref
+ * JxlDecoderDestroy or @ref JxlDecoderReset.
+ *
+ * Calling @ref JxlDecoderReleaseJPEGBuffer when no buffer is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet written to of the data
+ *     set by @ref JxlDecoderSetJPEGBuffer, or 0 if no buffer is set or @ref
+ *     JxlDecoderReleaseJPEGBuffer was already called.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec);
+
+/**
+ * Sets output buffer for box output codestream.
+ *
+ * The data is owned by the caller and may be used by the decoder until @ref
+ * JxlDecoderReleaseBoxBuffer is called or the decoder is destroyed or
+ * reset so must be kept alive until then.
+ *
+ * If for the current box a box buffer was set before and released with @ref
+ * JxlDecoderReleaseBoxBuffer, bytes that the decoder has already output
+ * should not be included, only the remaining bytes output must be set.
+ *
+ * The @ref JxlDecoderReleaseBoxBuffer must be used at the next @ref JXL_DEC_BOX
+ * event or final @ref JXL_DEC_SUCCESS event to compute the size of the output
+ * box bytes.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to write to
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref
+ *     JxlDecoderReleaseBoxBuffer was not called on it, @ref JXL_DEC_SUCCESS
+ *     otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec,
+                                                   uint8_t* data, size_t size);
+
+/**
+ * Releases buffer which was provided with @ref JxlDecoderSetBoxBuffer.
+ *
+ * Calling @ref JxlDecoderReleaseBoxBuffer is required whenever
+ * a buffer is already set and a new buffer needs to be added with @ref
+ * JxlDecoderSetBoxBuffer, but is not required before @ref
+ * JxlDecoderDestroy or @ref JxlDecoderReset.
+ *
+ * Calling @ref JxlDecoderReleaseBoxBuffer when no buffer is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet written to of the data
+ *     set by @ref JxlDecoderSetBoxBuffer, or 0 if no buffer is set or @ref
+ *     JxlDecoderReleaseBoxBuffer was already called.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec);
+
+/**
+ * Configures whether to get boxes in raw mode or in decompressed mode. In raw
+ * mode, boxes are output as their bytes appear in the container file, which may
+ * be decompressed, or compressed if their type is "brob". In decompressed mode,
+ * "brob" boxes are decompressed with Brotli before outputting them. The size of
+ * the decompressed stream is not known before the decompression has already
+ * finished.
+ *
+ * The default mode is raw. This setting can only be changed before decoding, or
+ * directly after a @ref JXL_DEC_BOX event, and is remembered until the decoder
+ * is reset or destroyed.
+ *
+ * Enabling decompressed mode requires Brotli support from the library.
+ *
+ * @param dec decoder object
+ * @param decompress JXL_TRUE to transparently decompress, JXL_FALSE to get
+ *     boxes in raw mode.
+ * @return @ref JXL_DEC_ERROR if decompressed mode is set and Brotli is not
+ *     available, @ref JXL_DEC_SUCCESS otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec,
+                                                         JXL_BOOL decompress);
+
+/**
+ * Outputs the type of the current box, after a @ref JXL_DEC_BOX event occurred,
+ * as 4 characters without null termination character. In case of a compressed
+ * "brob" box, this will return "brob" if the decompressed argument is
+ * JXL_FALSE, or the underlying box type if the decompressed argument is
+ * JXL_TRUE.
+ *
+ * The following box types are currently described in ISO/IEC 18181-2:
+ *  - "Exif": a box with EXIF metadata.  Starts with a 4-byte tiff header offset
+ *    (big-endian uint32) that indicates the start of the actual EXIF data
+ *    (which starts with a tiff header). Usually the offset will be zero and the
+ *    EXIF data starts immediately after the offset field. The Exif orientation
+ *    should be ignored by applications; the JPEG XL codestream orientation
+ *    takes precedence and libjxl will by default apply the correct orientation
+ *    automatically (see @ref JxlDecoderSetKeepOrientation).
+ *  - "xml ": a box with XML data, in particular XMP metadata.
+ *  - "jumb": a JUMBF superbox (JPEG Universal Metadata Box Format, ISO/IEC
+ *    19566-5).
+ *  - "JXL ": mandatory signature box, must come first, 12 bytes long including
+ *    the box header
+ *  - "ftyp": a second mandatory signature box, must come second, 20 bytes long
+ *    including the box header
+ *  - "jxll": a JXL level box. This indicates if the codestream is level 5 or
+ *    level 10 compatible. If not present, it is level 5. Level 10 allows more
+ *    features such as very high image resolution and bit-depths above 16 bits
+ *    per channel. Added automatically by the encoder when
+ *    JxlEncoderSetCodestreamLevel is used
+ *  - "jxlc": a box with the image codestream, in case the codestream is not
+ *    split across multiple boxes. The codestream contains the JPEG XL image
+ *    itself, including the basic info such as image dimensions, ICC color
+ *    profile, and all the pixel data of all the image frames.
+ *  - "jxlp": a codestream box in case it is split across multiple boxes.
+ *    The contents are the same as in case of a jxlc box, when concatenated.
+ *  - "brob": a Brotli-compressed box, which otherwise represents an existing
+ *    type of box such as Exif or "xml ". When @ref JxlDecoderSetDecompressBoxes
+ *    is set to JXL_TRUE, these boxes will be transparently decompressed by the
+ *    decoder.
+ *  - "jxli": frame index box, can list the keyframes in case of a JPEG XL
+ *    animation allowing the decoder to jump to individual frames more
+ *    efficiently.
+ *  - "jbrd": JPEG reconstruction box, contains the information required to
+ *    byte-for-byte losslessly recontruct a JPEG-1 image. The JPEG DCT
+ *    coefficients (pixel content) themselves as well as the ICC profile are
+ *    encoded in the JXL codestream (jxlc or jxlp) itself. EXIF, XMP and JUMBF
+ *    metadata is encoded in the corresponding boxes. The jbrd box itself
+ *    contains information such as the remaining app markers of the JPEG-1 file
+ *    and everything else required to fit the information together into the
+ *    exact original JPEG file.
+ *
+ * Other application-specific boxes can exist. Their typename should not begin
+ * with "jxl" or "JXL" or conflict with other existing typenames.
+ *
+ * The signature, jxl* and jbrd boxes are processed by the decoder and would
+ * typically be ignored by applications. The typical way to use this function is
+ * to check if an encountered box contains metadata that the application is
+ * interested in (e.g. EXIF or XMP metadata), in order to conditionally set a
+ * box buffer.
+ *
+ * @param dec decoder object
+ * @param type buffer to copy the type into
+ * @param decompressed which box type to get: JXL_FALSE to get the raw box type,
+ *     which can be "brob", JXL_TRUE, get the underlying box type.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref JXL_DEC_ERROR if
+ *     not, for example the JXL file does not use the container format.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec,
+                                                 JxlBoxType type,
+                                                 JXL_BOOL decompressed);
+
+/**
+ * Returns the size of a box as it appears in the container file, after the @ref
+ * JXL_DEC_BOX event. For a non-compressed box, this is the size of the
+ * contents, excluding the 4 bytes indicating the box type. For a compressed
+ * "brob" box, this is the size of the compressed box contents plus the
+ * additional 4 byte indicating the underlying box type, but excluding the 4
+ * bytes indicating "brob". This function gives the size of the data that will
+ * be written in the output buffer when getting boxes in the default raw
+ * compressed mode. When @ref JxlDecoderSetDecompressBoxes is enabled, the
+ * return value of function does not change, and the decompressed size is not
+ * known before it has already been decompressed and output.
+ *
+ * @param dec decoder object
+ * @param size raw size of the box in bytes
+ * @return @ref JXL_DEC_ERROR if no box size is available, @ref JXL_DEC_SUCCESS
+ *     otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec,
+                                                    uint64_t* size);
+
+/**
+ * Configures at which progressive steps in frame decoding these @ref
+ * JXL_DEC_FRAME_PROGRESSION event occurs. The default value for the level
+ * of detail if this function is never called is `kDC`.
+ *
+ * @param dec decoder object
+ * @param detail at which level of detail to trigger @ref
+ *     JXL_DEC_FRAME_PROGRESSION
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     an invalid value for the progressive detail.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetProgressiveDetail(JxlDecoder* dec, JxlProgressiveDetail detail);
+
+/**
+ * Returns the intended downsampling ratio for the progressive frame produced
+ * by @ref JxlDecoderFlushImage after the latest @ref JXL_DEC_FRAME_PROGRESSION
+ * event.
+ *
+ * @param dec decoder object
+ * @return The intended downsampling ratio, can be 1, 2, 4 or 8.
+ */
+JXL_EXPORT size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec);
+
+/**
+ * Outputs progressive step towards the decoded image so far when only partial
+ * input was received. If the flush was successful, the buffer set with @ref
+ * JxlDecoderSetImageOutBuffer will contain partial image data.
+ *
+ * Can be called when @ref JxlDecoderProcessInput returns @ref
+ * JXL_DEC_NEED_MORE_INPUT, after the @ref JXL_DEC_FRAME event already occurred
+ * and before the @ref JXL_DEC_FULL_IMAGE event occurred for a frame.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS if image data was flushed to the output buffer,
+ *     or @ref JXL_DEC_ERROR when no flush was done, e.g. if not enough image
+ *     data was available yet even for flush, or no output buffer was set yet.
+ *     This error is not fatal, it only indicates no flushed image is available
+ *     right now. Regular decoding can still be performed.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec);
+
+/**
+ * Sets the bit depth of the output buffer or callback.
+ *
+ * Can be called after @ref JxlDecoderSetImageOutBuffer or @ref
+ * JxlDecoderSetImageOutCallback. For float pixel data types, only the default
+ * @ref JXL_BIT_DEPTH_FROM_PIXEL_FORMAT setting is supported.
+ *
+ * @param dec decoder object
+ * @param bit_depth the bit depth setting of the pixel output
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ *     incompatible custom bit depth and pixel data type.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetImageOutBitDepth(JxlDecoder* dec, const JxlBitDepth* bit_depth);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_DECODE_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h
new file mode 100644
index 0000000000..bc6e8a3789
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_decoder
+/// @{
+///
+/// @file decode_cxx.h
+/// @brief C++ header-only helper for @ref decode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_DECODE_CXX_H_
+#define JXL_DECODE_CXX_H_
+
+#include <jxl/decode.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/decode.h from C sources."
+#endif
+
+/// Struct to call JxlDecoderDestroy from the JxlDecoderPtr unique_ptr.
+struct JxlDecoderDestroyStruct {
+  /// Calls @ref JxlDecoderDestroy() on the passed decoder.
+  void operator()(JxlDecoder* decoder) { JxlDecoderDestroy(decoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlDecoderDestroy() when releasing the
+/// decoder.
+///
+/// Use this helper type from C++ sources to ensure the decoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlDecoder, JxlDecoderDestroyStruct> JxlDecoderPtr;
+
+/// Creates an instance of JxlDecoder into a JxlDecoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlDecoderDestroy() when
+/// releasing the pointer. See @ref JxlDecoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @return a @c NULL JxlDecoderPtr if the instance can not be allocated or
+///         initialized
+/// @return initialized JxlDecoderPtr instance otherwise.
+static inline JxlDecoderPtr JxlDecoderMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlDecoderPtr(JxlDecoderCreate(memory_manager));
+}
+
+#endif  // JXL_DECODE_CXX_H_
+
+/// @}
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/encode.h b/third-party/libjxl/libjxl/lib/include/jxl/encode.h
new file mode 100644
index 0000000000..7501f9db55
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/encode.h
@@ -0,0 +1,1313 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_encoder
+ * @{
+ * @file encode.h
+ * @brief Encoding API for JPEG XL.
+ */
+
+#ifndef JXL_ENCODE_H_
+#define JXL_ENCODE_H_
+
+#include <jxl/cms_interface.h>
+#include <jxl/codestream_header.h>
+#include <jxl/jxl_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/stats.h>
+#include <jxl/version.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Encoder library version.
+ *
+ * @return the encoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlEncoderVersion(void);
+
+/**
+ * Opaque structure that holds the JPEG XL encoder.
+ *
+ * Allocated and initialized with JxlEncoderCreate().
+ * Cleaned up and deallocated with JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderStruct JxlEncoder;
+
+/**
+ * Settings and metadata for a single image frame. This includes encoder options
+ * for a frame such as compression quality and speed.
+ *
+ * Allocated and initialized with JxlEncoderFrameSettingsCreate().
+ * Cleaned up and deallocated when the encoder is destroyed with
+ * JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderFrameSettingsStruct JxlEncoderFrameSettings;
+
+/**
+ * Return value for multiple encoder functions.
+ */
+typedef enum {
+  /** Function call finished successfully, or encoding is finished and there is
+   * nothing more to be done.
+   */
+  JXL_ENC_SUCCESS = 0,
+
+  /** An error occurred, for example out of memory.
+   */
+  JXL_ENC_ERROR = 1,
+
+  /** The encoder needs more output buffer to continue encoding.
+   */
+  JXL_ENC_NEED_MORE_OUTPUT = 2,
+
+} JxlEncoderStatus;
+
+/**
+ * Error conditions:
+ * API usage errors have the 0x80 bit set to 1
+ * Other errors have the 0x80 bit set to 0
+ */
+typedef enum {
+  /** No error
+   */
+  JXL_ENC_ERR_OK = 0,
+
+  /** Generic encoder error due to unspecified cause
+   */
+  JXL_ENC_ERR_GENERIC = 1,
+
+  /** Out of memory
+   *  TODO(jon): actually catch this and return this error
+   */
+  JXL_ENC_ERR_OOM = 2,
+
+  /** JPEG bitstream reconstruction data could not be
+   *  represented (e.g. too much tail data)
+   */
+  JXL_ENC_ERR_JBRD = 3,
+
+  /** Input is invalid (e.g. corrupt JPEG file or ICC profile)
+   */
+  JXL_ENC_ERR_BAD_INPUT = 4,
+
+  /** The encoder doesn't (yet) support this. Either no version of libjxl
+   * supports this, and the API is used incorrectly, or the libjxl version
+   * should have been checked before trying to do this.
+   */
+  JXL_ENC_ERR_NOT_SUPPORTED = 0x80,
+
+  /** The encoder API is used in an incorrect way.
+   *  In this case, a debug build of libjxl should output a specific error
+   * message. (if not, please open an issue about it)
+   */
+  JXL_ENC_ERR_API_USAGE = 0x81,
+
+} JxlEncoderError;
+
+/**
+ * Id of encoder options for a frame. This includes options such as setting
+ * encoding effort/speed or overriding the use of certain coding tools, for this
+ * frame. This does not include non-frame related encoder options such as for
+ * boxes.
+ */
+typedef enum {
+  /** Sets encoder effort/speed level without affecting decoding speed. Valid
+   * values are, from faster to slower speed: 1:lightning 2:thunder 3:falcon
+   * 4:cheetah 5:hare 6:wombat 7:squirrel 8:kitten 9:tortoise.
+   * Default: squirrel (7).
+   */
+  JXL_ENC_FRAME_SETTING_EFFORT = 0,
+
+  /** Sets the decoding speed tier for the provided options. Minimum is 0
+   * (slowest to decode, best quality/density), and maximum is 4 (fastest to
+   * decode, at the cost of some quality/density). Default is 0.
+   */
+  JXL_ENC_FRAME_SETTING_DECODING_SPEED = 1,
+
+  /** Sets resampling option. If enabled, the image is downsampled before
+   * compression, and upsampled to original size in the decoder. Integer option,
+   * use -1 for the default behavior (resampling only applied for low quality),
+   * 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for 4x4
+   * downsampling, 8 for 8x8 downsampling.
+   */
+  JXL_ENC_FRAME_SETTING_RESAMPLING = 2,
+
+  /** Similar to JXL_ENC_FRAME_SETTING_RESAMPLING, but for extra channels.
+   * Integer option, use -1 for the default behavior (depends on encoder
+   * implementation), 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for
+   * 4x4 downsampling, 8 for 8x8 downsampling.
+   */
+  JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING = 3,
+
+  /** Indicates the frame added with @ref JxlEncoderAddImageFrame is already
+   * downsampled by the downsampling factor set with @ref
+   * JXL_ENC_FRAME_SETTING_RESAMPLING. The input frame must then be given in the
+   * downsampled resolution, not the full image resolution. The downsampled
+   * resolution is given by ceil(xsize / resampling), ceil(ysize / resampling)
+   * with xsize and ysize the dimensions given in the basic info, and resampling
+   * the factor set with @ref JXL_ENC_FRAME_SETTING_RESAMPLING.
+   * Use 0 to disable, 1 to enable. Default value is 0.
+   */
+  JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED = 4,
+
+  /** Adds noise to the image emulating photographic film noise, the higher the
+   * given number, the grainier the image will be. As an example, a value of 100
+   * gives low noise whereas a value of 3200 gives a lot of noise. The default
+   * value is 0.
+   */
+  JXL_ENC_FRAME_SETTING_PHOTON_NOISE = 5,
+
+  /** Enables adaptive noise generation. This setting is not recommended for
+   * use, please use JXL_ENC_FRAME_SETTING_PHOTON_NOISE instead. Use -1 for the
+   * default (encoder chooses), 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_NOISE = 6,
+
+  /** Enables or disables dots generation. Use -1 for the default (encoder
+   * chooses), 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_DOTS = 7,
+
+  /** Enables or disables patches generation. Use -1 for the default (encoder
+   * chooses), 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_PATCHES = 8,
+
+  /** Edge preserving filter level, -1 to 3. Use -1 for the default (encoder
+   * chooses), 0 to 3 to set a strength.
+   */
+  JXL_ENC_FRAME_SETTING_EPF = 9,
+
+  /** Enables or disables the gaborish filter. Use -1 for the default (encoder
+   * chooses), 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_GABORISH = 10,
+
+  /** Enables modular encoding. Use -1 for default (encoder
+   * chooses), 0 to enforce VarDCT mode (e.g. for photographic images), 1 to
+   * enforce modular mode (e.g. for lossless images).
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR = 11,
+
+  /** Enables or disables preserving color of invisible pixels. Use -1 for the
+   * default (1 if lossless, 0 if lossy), 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE = 12,
+
+  /** Determines the order in which 256x256 regions are stored in the codestream
+   * for progressive rendering. Use -1 for the encoder
+   * default, 0 for scanline order, 1 for center-first order.
+   */
+  JXL_ENC_FRAME_SETTING_GROUP_ORDER = 13,
+
+  /** Determines the horizontal position of center for the center-first group
+   * order. Use -1 to automatically use the middle of the image, 0..xsize to
+   * specifically set it.
+   */
+  JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X = 14,
+
+  /** Determines the center for the center-first group order. Use -1 to
+   * automatically use the middle of the image, 0..ysize to specifically set it.
+   */
+  JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y = 15,
+
+  /** Enables or disables progressive encoding for modular mode. Use -1 for the
+   * encoder default, 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_RESPONSIVE = 16,
+
+  /** Set the progressive mode for the AC coefficients of VarDCT, using spectral
+   * progression from the DCT coefficients. Use -1 for the encoder default, 0 to
+   * disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC = 17,
+
+  /** Set the progressive mode for the AC coefficients of VarDCT, using
+   * quantization of the least significant bits. Use -1 for the encoder default,
+   * 0 to disable, 1 to enable.
+   */
+  JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC = 18,
+
+  /** Set the progressive mode using lower-resolution DC images for VarDCT. Use
+   * -1 for the encoder default, 0 to disable, 1 to have an extra 64x64 lower
+   * resolution pass, 2 to have a 512x512 and 64x64 lower resolution pass.
+   */
+  JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC = 19,
+
+  /** Use Global channel palette if the amount of colors is smaller than this
+   * percentage of range. Use 0-100 to set an explicit percentage, -1 to use the
+   * encoder default. Used for modular encoding.
+   */
+  JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT = 20,
+
+  /** Use Local (per-group) channel palette if the amount of colors is smaller
+   * than this percentage of range. Use 0-100 to set an explicit percentage, -1
+   * to use the encoder default. Used for modular encoding.
+   */
+  JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT = 21,
+
+  /** Use color palette if amount of colors is smaller than or equal to this
+   * amount, or -1 to use the encoder default. Used for modular encoding.
+   */
+  JXL_ENC_FRAME_SETTING_PALETTE_COLORS = 22,
+
+  /** Enables or disables delta palette. Use -1 for the default (encoder
+   * chooses), 0 to disable, 1 to enable. Used in modular mode.
+   */
+  JXL_ENC_FRAME_SETTING_LOSSY_PALETTE = 23,
+
+  /** Color transform for internal encoding: -1 = default, 0=XYB, 1=none (RGB),
+   * 2=YCbCr. The XYB setting performs the forward XYB transform. None and
+   * YCbCr both perform no transform, but YCbCr is used to indicate that the
+   * encoded data losslessly represents YCbCr values.
+   */
+  JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM = 24,
+
+  /** Reversible color transform for modular encoding: -1=default, 0-41=RCT
+   * index, e.g. index 0 = none, index 6 = YCoCg.
+   * If this option is set to a non-default value, the RCT will be globally
+   * applied to the whole frame.
+   * The default behavior is to try several RCTs locally per modular group,
+   * depending on the speed and distance setting.
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE = 25,
+
+  /** Group size for modular encoding: -1=default, 0=128, 1=256, 2=512, 3=1024.
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE = 26,
+
+  /** Predictor for modular encoding. -1 = default, 0=zero, 1=left, 2=top,
+   * 3=avg0, 4=select, 5=gradient, 6=weighted, 7=topright, 8=topleft,
+   * 9=leftleft, 10=avg1, 11=avg2, 12=avg3, 13=toptop predictive average 14=mix
+   * 5 and 6, 15=mix everything.
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR = 27,
+
+  /** Fraction of pixels used to learn MA trees as a percentage. -1 = default,
+   * 0 = no MA and fast decode, 50 = default value, 100 = all, values above
+   * 100 are also permitted. Higher values use more encoder memory.
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT = 28,
+
+  /** Number of extra (previous-channel) MA tree properties to use. -1 =
+   * default, 0-11 = valid values. Recommended values are in the range 0 to 3,
+   * or 0 to amount of channels minus 1 (including all extra channels, and
+   * excluding color channels when using VarDCT mode). Higher value gives slower
+   * encoding and slower decoding.
+   */
+  JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS = 29,
+
+  /** Enable or disable CFL (chroma-from-luma) for lossless JPEG recompression.
+   * -1 = default, 0 = disable CFL, 1 = enable CFL.
+   */
+  JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL = 30,
+
+  /** Prepare the frame for indexing in the frame index box.
+   * 0 = ignore this frame (same as not setting a value),
+   * 1 = index this frame within the Frame Index Box.
+   * If any frames are indexed, the first frame needs to
+   * be indexed, too. If the first frame is not indexed, and
+   * a later frame is attempted to be indexed, JXL_ENC_ERROR will occur.
+   * If non-keyframes, i.e., frames with cropping, blending or patches are
+   * attempted to be indexed, JXL_ENC_ERROR will occur.
+   */
+  JXL_ENC_FRAME_INDEX_BOX = 31,
+
+  /** Sets brotli encode effort for use in JPEG recompression and compressed
+   * metadata boxes (brob). Can be -1 (default) or 0 (fastest) to 11 (slowest).
+   * Default is based on the general encode effort in case of JPEG
+   * recompression, and 4 for brob boxes.
+   */
+  JXL_ENC_FRAME_SETTING_BROTLI_EFFORT = 32,
+
+  /** Enables or disables brotli compression of metadata boxes derived from
+   * a JPEG frame when using JxlEncoderAddJPEGFrame. This has no effect on boxes
+   * added using JxlEncoderAddBox.
+   * -1 = default, 0 = disable compression, 1 = enable compression.
+   */
+  JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES = 33,
+
+  /** Control what kind of buffering is used, when using chunked image frames.
+   * 0 = buffers everything, basically the same as non-streamed code path
+   (mainly for testing)
+   * 1 = can buffer internal data (the tokens)
+   * 2 = can buffer the output
+   * 3 = minimize buffer usage: streamed input and chunked output, writing TOC
+   last (will not work with progressive)
+
+   When the image dimensions is smaller than 2048 x 2048 all the options are the
+   same. Using 1, 2 or 3 can result increasingly in less compression density.
+   */
+  JXL_ENC_FRAME_SETTING_BUFFERING = 34,
+
+  /** Keep or discard Exif metadata boxes derived from a JPEG frame when using
+   * JxlEncoderAddJPEGFrame. This has no effect on boxes added using
+   * JxlEncoderAddBox. When JxlEncoderStoreJPEGMetadata is set to 1, this option
+   * cannot be set to 0. Even when Exif metadata is discarded, the orientation
+   * will still be applied. 0 = discard Exif metadata, 1 = keep Exif metadata
+   * (default).
+   */
+  JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF = 35,
+
+  /** Keep or discard XMP metadata boxes derived from a JPEG frame when using
+   * JxlEncoderAddJPEGFrame. This has no effect on boxes added using
+   * JxlEncoderAddBox. When JxlEncoderStoreJPEGMetadata is set to 1, this option
+   * cannot be set to 0. 0 = discard XMP metadata, 1 = keep XMP metadata
+   * (default).
+   */
+  JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP = 36,
+
+  /** Keep or discard JUMBF metadata boxes derived from a JPEG frame when using
+   * JxlEncoderAddJPEGFrame. This has no effect on boxes added using
+   * JxlEncoderAddBox. 0 = discard JUMBF metadata, 1 = keep JUMBF metadata
+   * (default).
+   */
+  JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF = 37,
+
+  /** Enum value not to be used as an option. This value is added to force the
+   * C compiler to have the enum to take a known size.
+   */
+  JXL_ENC_FRAME_SETTING_FILL_ENUM = 65535,
+
+} JxlEncoderFrameSettingId;
+
+/**
+ * Creates an instance of JxlEncoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jpegxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ *        manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlEncoder otherwise
+ */
+JXL_EXPORT JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a JxlEncoder instance, so it can be re-used for encoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with JxlEncoderCreate, but the memory manager is kept.
+ *
+ * @param enc instance to be re-initialized.
+ */
+JXL_EXPORT void JxlEncoderReset(JxlEncoder* enc);
+
+/**
+ * Deinitializes and frees JxlEncoder instance.
+ *
+ * @param enc instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlEncoderDestroy(JxlEncoder* enc);
+
+/**
+ * Sets the color management system (CMS) that will be used for color conversion
+ * (if applicable) during encoding. May only be set before starting encoding. If
+ * left unset, the default CMS implementation will be used.
+ *
+ * @param enc encoder object.
+ * @param cms structure representing a CMS implementation. See JxlCmsInterface
+ * for more details.
+ */
+JXL_EXPORT void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * encoding.
+ *
+ * @param enc encoder object.
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ *        be NULL to use the default, single-threaded, runner. A multithreaded
+ *        runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return JXL_ENC_SUCCESS if the runner was set, JXL_ENC_ERROR
+ * otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetParallelRunner(JxlEncoder* enc, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque);
+
+/**
+ * Get the (last) error code in case JXL_ENC_ERROR was returned.
+ *
+ * @param enc encoder object.
+ * @return the JxlEncoderError that caused the (last) JXL_ENC_ERROR to be
+ * returned.
+ */
+JXL_EXPORT JxlEncoderError JxlEncoderGetError(JxlEncoder* enc);
+
+/**
+ * Encodes JPEG XL file using the available bytes. @p *avail_out indicates how
+ * many output bytes are available, and @p *next_out points to the input bytes.
+ * *avail_out will be decremented by the amount of bytes that have been
+ * processed by the encoder and *next_out will be incremented by the same
+ * amount, so *next_out will now point at the amount of *avail_out unprocessed
+ * bytes.
+ *
+ * The returned status indicates whether the encoder needs more output bytes.
+ * When the return value is not JXL_ENC_ERROR or JXL_ENC_SUCCESS, the encoding
+ * requires more JxlEncoderProcessOutput calls to continue.
+ *
+ * The caller must guarantee that *avail_out >= 32 when calling
+ * JxlEncoderProcessOutput; otherwise, JXL_ENC_NEED_MORE_OUTPUT will be
+ * returned. It is guaranteed that, if *avail_out >= 32, at least one byte of
+ * output will be written.
+ *
+ * This encodes the frames and/or boxes added so far. If the last frame or last
+ * box has been added, @ref JxlEncoderCloseInput, @ref JxlEncoderCloseFrames
+ * and/or @ref JxlEncoderCloseBoxes must be called before the next
+ * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded
+ * correctly.
+ *
+ * @param enc encoder object.
+ * @param next_out pointer to next bytes to write to.
+ * @param avail_out amount of bytes available starting from *next_out.
+ * @return JXL_ENC_SUCCESS when encoding finished and all events handled.
+ * @return JXL_ENC_ERROR when encoding failed, e.g. invalid input.
+ * @return JXL_ENC_NEED_MORE_OUTPUT more output buffer is necessary.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc,
+                                                    uint8_t** next_out,
+                                                    size_t* avail_out);
+
+/**
+ * Sets the frame information for this frame to the encoder. This includes
+ * animation information such as frame duration to store in the frame header.
+ * The frame header fields represent the frame as passed to the encoder, but not
+ * necessarily the exact values as they will be encoded file format: the encoder
+ * could change crop and blending options of a frame for more efficient encoding
+ * or introduce additional internal frames. Animation duration and time code
+ * information is not altered since those are immutable metadata of the frame.
+ *
+ * It is not required to use this function, however if have_animation is set
+ * to true in the basic info, then this function should be used to set the
+ * time duration of this individual frame. By default individual frames have a
+ * time duration of 0, making them form a composite still. See @ref
+ * JxlFrameHeader for more information.
+ *
+ * This information is stored in the JxlEncoderFrameSettings and so is used for
+ * any frame encoded with these JxlEncoderFrameSettings. It is ok to change
+ * between @ref JxlEncoderAddImageFrame calls, each added image frame will have
+ * the frame header that was set in the options at the time of calling
+ * JxlEncoderAddImageFrame.
+ *
+ * The is_last and name_length fields of the JxlFrameHeader are ignored, use
+ * @ref JxlEncoderCloseFrames to indicate last frame, and @ref
+ * JxlEncoderSetFrameName to indicate the name and its length instead.
+ * Calling this function will clear any name that was previously set with @ref
+ * JxlEncoderSetFrameName.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param frame_header frame header data to set. Object owned by the caller and
+ * does not need to be kept in memory, its information is copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetFrameHeader(JxlEncoderFrameSettings* frame_settings,
+                         const JxlFrameHeader* frame_header);
+
+/**
+ * Sets blend info of an extra channel. The blend info of extra channels is set
+ * separately from that of the color channels, the color channels are set with
+ * @ref JxlEncoderSetFrameHeader.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param index index of the extra channel to use.
+ * @param blend_info blend info to set for the extra channel
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo(
+    JxlEncoderFrameSettings* frame_settings, size_t index,
+    const JxlBlendInfo* blend_info);
+
+/**
+ * Sets the name of the animation frame. This function is optional, frames are
+ * not required to have a name. This setting is a part of the frame header, and
+ * the same principles as for @ref JxlEncoderSetFrameHeader apply. The
+ * name_length field of JxlFrameHeader is ignored by the encoder, this function
+ * determines the name length instead as the length in bytes of the C string.
+ *
+ * The maximum possible name length is 1071 bytes (excluding terminating null
+ * character).
+ *
+ * Calling @ref JxlEncoderSetFrameHeader clears any name that was
+ * previously set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param frame_name name of the next frame to be encoded, as a UTF-8 encoded C
+ * string (zero terminated). Owned by the caller, and copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameName(
+    JxlEncoderFrameSettings* frame_settings, const char* frame_name);
+
+/**
+ * Sets the bit depth of the input buffer.
+ *
+ * For float pixel formats, only the default JXL_BIT_DEPTH_FROM_PIXEL_FORMAT
+ * setting is allowed, while for unsigned pixel formats,
+ * JXL_BIT_DEPTH_FROM_CODESTREAM setting is also allowed. See the comment on
+ * @ref JxlEncoderAddImageFrame for the effects of the bit depth setting.
+
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param bit_depth the bit depth setting of the pixel input
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameBitDepth(
+    JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth);
+
+/**
+ * Sets the buffer to read JPEG encoded bytes from for the next frame to encode.
+ *
+ * If JxlEncoderSetBasicInfo has not yet been called, calling
+ * JxlEncoderAddJPEGFrame will implicitly call it with the parameters of the
+ * added JPEG frame.
+ *
+ * If JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile has not yet been
+ * called, calling JxlEncoderAddJPEGFrame will implicitly call it with the
+ * parameters of the added JPEG frame.
+ *
+ * If the encoder is set to store JPEG reconstruction metadata using @ref
+ * JxlEncoderStoreJPEGMetadata and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * If this is the last frame, @ref JxlEncoderCloseInput or @ref
+ * JxlEncoderCloseFrames must be called before the next
+ * @ref JxlEncoderProcessOutput call.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param buffer bytes to read JPEG from. Owned by the caller and its contents
+ * are copied internally.
+ * @param size size of buffer in bytes.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderAddJPEGFrame(const JxlEncoderFrameSettings* frame_settings,
+                       const uint8_t* buffer, size_t size);
+
+/**
+ * Sets the buffer to read pixels from for the next image to encode. Must call
+ * JxlEncoderSetBasicInfo before JxlEncoderAddImageFrame.
+ *
+ * Currently only some data types for pixel formats are supported:
+ * - JXL_TYPE_UINT8, with range 0..255
+ * - JXL_TYPE_UINT16, with range 0..65535
+ * - JXL_TYPE_FLOAT16, with nominal range 0..1
+ * - JXL_TYPE_FLOAT, with nominal range 0..1
+ *
+ * Note: the sample data type in pixel_format is allowed to be different from
+ * what is described in the JxlBasicInfo. The type in pixel_format, together
+ * with an optional @ref JxlBitDepth parameter set by @ref
+ * JxlEncoderSetFrameBitDepth describes the format of the uncompressed pixel
+ * buffer. The bits_per_sample and exponent_bits_per_sample in the JxlBasicInfo
+ * describes what will actually be encoded in the JPEG XL codestream.
+ * For example, to encode a 12-bit image, you would set bits_per_sample to 12,
+ * while the input frame buffer can be in the following formats:
+ *  - if pixel format is in JXL_TYPE_UINT16 with default bit depth setting
+ *    (i.e. JXL_BIT_DEPTH_FROM_PIXEL_FORMAT), input sample values are rescaled
+ *    to 16-bit, i.e. multiplied by 65535/4095;
+ *  - if pixel format is in JXL_TYPE_UINT16 with JXL_BIT_DEPTH_FROM_CODESTREAM
+ *    bit depth setting, input sample values are provided unscaled;
+ *  - if pixel format is in JXL_TYPE_FLOAT, input sample values are rescaled
+ *    to 0..1, i.e.  multiplied by 1.f/4095.f.
+ * While it is allowed, it is obviously not recommended to use a pixel_format
+ * with lower precision than what is specified in the JxlBasicInfo.
+ *
+ * We support interleaved channels as described by the JxlPixelFormat:
+ * - single-channel data, e.g. grayscale
+ * - single-channel + alpha
+ * - trichromatic, e.g. RGB
+ * - trichromatic + alpha
+ *
+ * Extra channels not handled here need to be set by @ref
+ * JxlEncoderSetExtraChannelBuffer.
+ * If the image has alpha, and alpha is not passed here, it will implicitly be
+ * set to all-opaque (an alpha value of 1.0 everywhere).
+ *
+ * The pixels are assumed to be encoded in the original profile that is set with
+ * JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile. If none of these
+ * functions were used, the pixels are assumed to be nonlinear sRGB for integer
+ * data types (JXL_TYPE_UINT8, JXL_TYPE_UINT16), and linear sRGB for floating
+ * point data types (JXL_TYPE_FLOAT16, JXL_TYPE_FLOAT).
+ *
+ * Sample values in floating-point pixel formats are allowed to be outside the
+ * nominal range, e.g. to represent out-of-sRGB-gamut colors in the
+ * uses_original_profile=false case. They are however not allowed to be NaN or
+ * +-infinity.
+ *
+ * If this is the last frame, @ref JxlEncoderCloseInput or @ref
+ * JxlEncoderCloseFrames must be called before the next
+ * @ref JxlEncoderProcessOutput call.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param pixel_format format for pixels. Object owned by the caller and its
+ * contents are copied internally.
+ * @param buffer buffer type to input the pixel data from. Owned by the caller
+ * and its contents are copied internally.
+ * @param size size of buffer in bytes. This size should match what is implied
+ * by the frame dimensions and the pixel format.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddImageFrame(
+    const JxlEncoderFrameSettings* frame_settings,
+    const JxlPixelFormat* pixel_format, const void* buffer, size_t size);
+
+/**
+ * TODO(firsching): add documentation
+ *
+ */
+typedef void (*JxlEncoderOutputCallback)(void* run_opaque, size_t pos,
+                                         size_t num_bytes);
+
+/**
+ * TODO(firsching): add documentation
+ *
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetOutputCallback(JxlEncoderOutputCallback callback);
+
+/**
+ * TODO(firsching): add documentation
+ *
+ * @param frame_settings
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderChunkedImageFrameStart(const JxlEncoderFrameSettings* frame_settings);
+
+/**
+ * TODO(firsching): add documentation
+ * We process exactly one 2048x2048 DC-group.
+ *
+ * @param frame_settings
+ * @param x horizontal position of the top-left corner of the processed group.
+ * Must be divisible by 2048.
+ * @param y vertical position of the top-left corner of the processed group.
+ * Must be divisible by 2048.
+ * @param pixel_format for pixels. Object owned by the caller and its contents
+ * are copied internally.
+ * @param input_data the input buffer.
+ * @param input_size size of the input data in bytes.
+ * @return JXL_EXPORT
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderChunkedImageFrameAddPart(
+    const JxlEncoderFrameSettings* frame_settings, size_t x, size_t y,
+    const JxlPixelFormat* pixel_format, const void* input_data,
+    size_t input_size);
+
+/**
+ * Sets the buffer to read pixels from for an extra channel at a given index.
+ * The index must be smaller than the num_extra_channels in the associated
+ * JxlBasicInfo. Must call @ref JxlEncoderSetExtraChannelInfo before
+ * JxlEncoderSetExtraChannelBuffer.
+ *
+ * TODO(firsching): mention what data types in pixel formats are supported.
+ *
+ * It is required to call this function for every extra channel, except for the
+ * alpha channel if that was already set through @ref JxlEncoderAddImageFrame.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param pixel_format format for pixels. Object owned by the caller and its
+ * contents are copied internally. The num_channels value is ignored, since the
+ * number of channels for an extra channel is always assumed to be one.
+ * @param buffer buffer type to input the pixel data from. Owned by the caller
+ * and its contents are copied internally.
+ * @param size size of buffer in bytes. This size should match what is implied
+ * by the frame dimensions and the pixel format.
+ * @param index index of the extra channel to use.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer(
+    const JxlEncoderFrameSettings* frame_settings,
+    const JxlPixelFormat* pixel_format, const void* buffer, size_t size,
+    uint32_t index);
+
+/** Adds a metadata box to the file format. JxlEncoderProcessOutput must be used
+ * to effectively write the box to the output. @ref JxlEncoderUseBoxes must
+ * be enabled before using this function.
+ *
+ * Boxes allow inserting application-specific data and metadata (Exif, XML/XMP,
+ * JUMBF and user defined boxes).
+ *
+ * The box format follows ISO BMFF and shares features and box types with other
+ * image and video formats, including the Exif, XML and JUMBF boxes. The box
+ * format for JPEG XL is specified in ISO/IEC 18181-2.
+ *
+ * Boxes in general don't contain other boxes inside, except a JUMBF superbox.
+ * Boxes follow each other sequentially and are byte-aligned. If the container
+ * format is used, the JXL stream consists of concatenated boxes.
+ * It is also possible to use a direct codestream without boxes, but in that
+ * case metadata cannot be added.
+ *
+ * Each box generally has the following byte structure in the file:
+ * - 4 bytes: box size including box header (Big endian. If set to 0, an
+ *   8-byte 64-bit size follows instead).
+ * - 4 bytes: type, e.g. "JXL " for the signature box, "jxlc" for a codestream
+ *   box.
+ * - N bytes: box contents.
+ *
+ * Only the box contents are provided to the contents argument of this function,
+ * the encoder encodes the size header itself. Most boxes are written
+ * automatically by the encoder as needed ("JXL ", "ftyp", "jxll", "jxlc",
+ * "jxlp", "jxli", "jbrd"), and this function only needs to be called to add
+ * optional metadata when encoding from pixels (using JxlEncoderAddImageFrame).
+ * When recompressing JPEG files (using JxlEncoderAddJPEGFrame), if the input
+ * JPEG contains EXIF, XMP or JUMBF metadata, the corresponding boxes are
+ * already added automatically.
+ *
+ * Box types are given by 4 characters. The following boxes can be added with
+ * this function:
+ * - "Exif": a box with EXIF metadata, can be added by libjxl users, or is
+ *   automatically added when needed for JPEG reconstruction. The contents of
+ *   this box must be prepended by a 4-byte tiff header offset, which may
+ *   be 4 zero bytes in case the tiff header follows immediately.
+ *   The EXIF metadata must be in sync with what is encoded in the JPEG XL
+ *   codestream, specifically the image orientation. While this is not
+ *   recommended in practice, in case of conflicting metadata, the JPEG XL
+ *   codestream takes precedence.
+ * - "xml ": a box with XML data, in particular XMP metadata, can be added by
+ *   libjxl users, or is automatically added when needed for JPEG reconstruction
+ * - "jumb": a JUMBF superbox, which can contain boxes with different types of
+ *   metadata inside. This box type can be added by the encoder transparently,
+ *   and other libraries to create and handle JUMBF content exist.
+ * - Application-specific boxes. Their typename should not begin with "jxl" or
+ *   "JXL" or conflict with other existing typenames, and they should be
+ *   registered with MP4RA (mp4ra.org).
+ *
+ * These boxes can be stored uncompressed or Brotli-compressed (using a "brob"
+ * box), depending on the compress_box parameter.
+ *
+ * @param enc encoder object.
+ * @param type the box type, e.g. "Exif" for EXIF metadata, "xml " for XMP or
+ * IPTC metadata, "jumb" for JUMBF metadata.
+ * @param contents the full contents of the box, for example EXIF
+ * data. ISO BMFF box header must not be included, only the contents. Owned by
+ * the caller and its contents are copied internally.
+ * @param size size of the box contents.
+ * @param compress_box Whether to compress this box as a "brob" box. Requires
+ * Brotli support.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error, such as when
+ * using this function without JxlEncoderUseContainer, or adding a box type
+ * that would result in an invalid file format.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc,
+                                             const JxlBoxType type,
+                                             const uint8_t* contents,
+                                             size_t size,
+                                             JXL_BOOL compress_box);
+
+/**
+ * Indicates the intention to add metadata boxes. This allows @ref
+ * JxlEncoderAddBox to be used. When using this function, then it is required
+ * to use @ref JxlEncoderCloseBoxes at the end.
+ *
+ * By default the encoder assumes no metadata boxes will be added.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc);
+
+/**
+ * Declares that no further boxes will be added with @ref JxlEncoderAddBox.
+ * This function must be called after the last box is added so the encoder knows
+ * the stream will be finished. It is not necessary to use this function if
+ * @ref JxlEncoderUseBoxes is not used. Further frames may still be added.
+ *
+ * Must be called between JxlEncoderAddBox of the last box
+ * and the next call to JxlEncoderProcessOutput, or @ref JxlEncoderProcessOutput
+ * won't output the last box correctly.
+ *
+ * NOTE: if you don't need to close frames and boxes at separate times, you can
+ * use @ref JxlEncoderCloseInput instead to close both at once.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseBoxes(JxlEncoder* enc);
+
+/**
+ * Declares that no frames will be added and @ref JxlEncoderAddImageFrame and
+ * @ref JxlEncoderAddJPEGFrame won't be called anymore. Further metadata boxes
+ * may still be added. This function or @ref JxlEncoderCloseInput must be called
+ * after adding the last frame and the next call to
+ * @ref JxlEncoderProcessOutput, or the frame won't be properly marked as last.
+ *
+ * NOTE: if you don't need to close frames and boxes at separate times, you can
+ * use @ref JxlEncoderCloseInput instead to close both at once.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseFrames(JxlEncoder* enc);
+
+/**
+ * Closes any input to the encoder, equivalent to calling JxlEncoderCloseFrames
+ * as well as calling JxlEncoderCloseBoxes if needed. No further input of any
+ * kind may be given to the encoder, but further @ref JxlEncoderProcessOutput
+ * calls should be done to create the final output.
+ *
+ * The requirements of both @ref JxlEncoderCloseFrames and @ref
+ * JxlEncoderCloseBoxes apply to this function. Either this function or the
+ * other two must be called after the final frame and/or box, and the next
+ * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded
+ * correctly.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseInput(JxlEncoder* enc);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder. This
+ * is an alternative to JxlEncoderSetICCProfile and only one of these two must
+ * be used. This one sets the color encoding as a @ref JxlColorEncoding, while
+ * the other sets it as ICC binary data.
+ * Must be called after JxlEncoderSetBasicInfo.
+ *
+ * @param enc encoder object.
+ * @param color color encoding. Object owned by the caller and its contents are
+ * copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetColorEncoding(JxlEncoder* enc, const JxlColorEncoding* color);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder as an
+ * ICC color profile. This is an alternative to JxlEncoderSetColorEncoding and
+ * only one of these two must be used. This one sets the color encoding as ICC
+ * binary data, while the other defines it as a @ref JxlColorEncoding.
+ * Must be called after JxlEncoderSetBasicInfo.
+ *
+ * @param enc encoder object.
+ * @param icc_profile bytes of the original ICC profile
+ * @param size size of the icc_profile buffer in bytes
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+                                                    const uint8_t* icc_profile,
+                                                    size_t size);
+
+/**
+ * Initializes a JxlBasicInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to an 8-bit RGB image, no alpha or any
+ * other extra channels.
+ *
+ * @param info global image metadata. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitBasicInfo(JxlBasicInfo* info);
+
+/**
+ * Initializes a JxlFrameHeader struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to a frame with no animation duration and the
+ * 'replace' blend mode. After using this function, For animation duration must
+ * be set, for composite still blend settings must be set.
+ *
+ * @param frame_header frame metadata. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header);
+
+/**
+ * Initializes a JxlBlendInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ *
+ * @param blend_info blending info. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info);
+
+/**
+ * Sets the global metadata of the image encoded by this encoder.
+ *
+ * If the JxlBasicInfo contains information of extra channels beyond an alpha
+ * channel, then @ref JxlEncoderSetExtraChannelInfo must be called between
+ * JxlEncoderSetBasicInfo and @ref JxlEncoderAddImageFrame. In order to indicate
+ * extra channels, the value of `info.num_extra_channels` should be set to the
+ * number of extra channels, also counting the alpha channel if present.
+ *
+ * @param enc encoder object.
+ * @param info global image metadata. Object owned by the caller and its
+ * contents are copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful,
+ * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+                                                   const JxlBasicInfo* info);
+
+/**
+ * Sets the upsampling method the decoder will use in case there are frames
+ * with JXL_ENC_FRAME_SETTING_RESAMPLING set. This is useful in combination
+ * with the JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED option, to control the
+ * type of upsampling that will be used.
+ *
+ * @param enc encoder object.
+ * @param factor upsampling factor to configure (1, 2, 4 or 8; for 1 this
+ * function has no effect at all)
+ * @param mode upsampling mode to use for this upsampling:
+ * -1: default (good for photographic images, no signaling overhead)
+ * 0: nearest neighbor (good for pixel art)
+ * 1: 'pixel dots' (same as NN for 2x, diamond-shaped 'pixel dots' for 4x/8x)
+ * @return JXL_ENC_SUCCESS if the operation was successful,
+ * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetUpsamplingMode(JxlEncoder* enc,
+                                                        const int64_t factor,
+                                                        const int64_t mode);
+
+/**
+ * Initializes a JxlExtraChannelInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to an 8-bit channel of the provided type.
+ *
+ * @param type type of the extra channel.
+ * @param info global extra channel metadata. Object owned by the caller and its
+ * contents are copied internally.
+ */
+JXL_EXPORT void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type,
+                                               JxlExtraChannelInfo* info);
+
+/**
+ * Sets information for the extra channel at the given index. The index
+ * must be smaller than num_extra_channels in the associated JxlBasicInfo.
+ *
+ * @param enc encoder object
+ * @param index index of the extra channel to set.
+ * @param info global extra channel metadata. Object owned by the caller and its
+ * contents are copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo(
+    JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info);
+
+/**
+ * Sets the name for the extra channel at the given index in UTF-8. The index
+ * must be smaller than the num_extra_channels in the associated JxlBasicInfo.
+ *
+ * TODO(lode): remove size parameter for consistency with
+ * JxlEncoderSetFrameName
+ *
+ * @param enc encoder object
+ * @param index index of the extra channel to set.
+ * @param name buffer with the name of the extra channel.
+ * @param size size of the name buffer in bytes, not counting the terminating
+ * character.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc,
+                                                          size_t index,
+                                                          const char* name,
+                                                          size_t size);
+
+/**
+ * Sets a frame-specific option of integer type to the encoder options.
+ * The JxlEncoderFrameSettingId argument determines which option is set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param option ID of the option to set.
+ * @param value Integer value to set for this option.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in
+ * case of an error, such as invalid or unknown option id, or invalid integer
+ * value for the given option. If an error is returned, the state of the
+ * JxlEncoderFrameSettings object is still valid and is the same as before this
+ * function was called.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
+    JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+    int64_t value);
+
+/**
+ * Sets a frame-specific option of float type to the encoder options.
+ * The JxlEncoderFrameSettingId argument determines which option is set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param option ID of the option to set.
+ * @param value Float value to set for this option.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in
+ * case of an error, such as invalid or unknown option id, or invalid integer
+ * value for the given option. If an error is returned, the state of the
+ * JxlEncoderFrameSettings object is still valid and is the same as before this
+ * function was called.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption(
+    JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+    float value);
+
+/** Forces the encoder to use the box-based container format (BMFF) even
+ * when not necessary.
+ *
+ * When using @ref JxlEncoderUseBoxes, @ref JxlEncoderStoreJPEGMetadata or @ref
+ * JxlEncoderSetCodestreamLevel with level 10, the encoder will automatically
+ * also use the container format, it is not necessary to use
+ * JxlEncoderUseContainer for those use cases.
+ *
+ * By default this setting is disabled.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param use_container true if the encoder should always output the JPEG XL
+ * container format, false to only output it when necessary.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+                                                   JXL_BOOL use_container);
+
+/**
+ * Configure the encoder to store JPEG reconstruction metadata in the JPEG XL
+ * container.
+ *
+ * If this is set to true and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param store_jpeg_metadata true if the encoder should store JPEG metadata.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderStoreJPEGMetadata(JxlEncoder* enc, JXL_BOOL store_jpeg_metadata);
+
+/** Sets the feature level of the JPEG XL codestream. Valid values are 5 and
+ * 10, or -1 (to choose automatically). Using the minimum required level, or
+ * level 5 in most cases, is recommended for compatibility with all decoders.
+ *
+ * Level 5: for end-user image delivery, this level is the most widely
+ * supported level by image decoders and the recommended level to use unless a
+ * level 10 feature is absolutely necessary. Supports a maximum resolution
+ * 268435456 pixels total with a maximum width or height of 262144 pixels,
+ * maximum 16-bit color channel depth, maximum 120 frames per second for
+ * animation, maximum ICC color profile size of 4 MiB, it allows all color
+ * models and extra channel types except CMYK and the JXL_CHANNEL_BLACK extra
+ * channel, and a maximum of 4 extra channels in addition to the 3 color
+ * channels. It also sets boundaries to certain internally used coding tools.
+ *
+ * Level 10: this level removes or increases the bounds of most of the level
+ * 5 limitations, allows CMYK color and up to 32 bits per color channel, but
+ * may be less widely supported.
+ *
+ * The default value is -1. This means the encoder will automatically choose
+ * between level 5 and level 10 based on what information is inside the @ref
+ * JxlBasicInfo structure. Do note that some level 10 features, particularly
+ * those used by animated JPEG XL codestreams, might require level 10, even
+ * though the @ref JxlBasicInfo only suggests level 5. In this case, the level
+ * must be explicitly set to 10, otherwise the encoder will return an error.
+ * The encoder will restrict internal encoding choices to those compatible with
+ * the level setting.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param level the level value to set, must be -1, 5, or 10.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc,
+                                                         int level);
+
+/** Returns the codestream level required to support the currently configured
+ * settings and basic info. This function can only be used at the beginning,
+ * before encoding starts, but after setting basic info.
+ *
+ * This does not support per-frame settings, only global configuration, such as
+ * the image dimensions, that are known at the time of writing the header of
+ * the JPEG XL file.
+ *
+ * If this returns 5, nothing needs to be done and the codestream can be
+ * compatible with any decoder. If this returns 10, JxlEncoderSetCodestreamLevel
+ * has to be used to set the codestream level to 10, or the encoder can be
+ * configured differently to allow using the more compatible level 5.
+ *
+ * @param enc encoder object.
+ * @return -1 if no level can support the configuration (e.g. image dimensions
+ * larger than even level 10 supports), 5 if level 5 is supported, 10 if setting
+ * the codestream level to 10 is required.
+ *
+ */
+JXL_EXPORT int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc);
+
+/**
+ * Enables lossless encoding.
+ *
+ * This is not an option like the others on itself, but rather while enabled it
+ * overrides a set of existing options (such as distance, modular mode and
+ * color transform) that enables bit-for-bit lossless encoding.
+ *
+ * When disabled, those options are not overridden, but since those options
+ * could still have been manually set to a combination that operates losslessly,
+ * using this function with lossless set to JXL_DEC_FALSE does not guarantee
+ * lossy encoding, though the default set of options is lossy.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param lossless whether to override options for lossless mode
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameLossless(
+    JxlEncoderFrameSettings* frame_settings, JXL_BOOL lossless);
+
+/**
+ * Sets the distance level for lossy compression: target max butteraugli
+ * distance, lower = higher quality. Range: 0 .. 15.
+ * 0.0 = mathematically lossless (however, use JxlEncoderSetFrameLossless
+ * instead to use true lossless, as setting distance to 0 alone is not the only
+ * requirement). 1.0 = visually lossless. Recommended range: 0.5 .. 3.0. Default
+ * value: 1.0.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param distance the distance value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameDistance(
+    JxlEncoderFrameSettings* frame_settings, float distance);
+
+/**
+ * Sets the distance level for lossy compression of extra channels.
+ * The distance is as in JxlEncoderSetFrameDistance (lower = higher quality).
+ * If not set, or if set to the special value -1, the distance that was set with
+ * JxlEncoderSetFrameDistance will be used.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param index index of the extra channel to set a distance value for.
+ * @param distance the distance value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelDistance(
+    JxlEncoderFrameSettings* frame_settings, size_t index, float distance);
+
+/**
+ * Create a new set of encoder options, with all values initially copied from
+ * the @p source options, or set to default if @p source is NULL.
+ *
+ * The returned pointer is an opaque struct tied to the encoder and it will be
+ * deallocated by the encoder when JxlEncoderDestroy() is called. For functions
+ * taking both a @ref JxlEncoder and a @ref JxlEncoderFrameSettings, only
+ * JxlEncoderFrameSettings created with this function for the same encoder
+ * instance can be used.
+ *
+ * @param enc encoder object.
+ * @param source source options to copy initial values from, or NULL to get
+ * defaults initialized to defaults.
+ * @return the opaque struct pointer identifying a new set of encoder options.
+ */
+JXL_EXPORT JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate(
+    JxlEncoder* enc, const JxlEncoderFrameSettings* source);
+
+/**
+ * Sets a color encoding to be sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+                                          JXL_BOOL is_gray);
+
+/**
+ * Sets a color encoding to be linear sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToLinearSRGB(
+    JxlColorEncoding* color_encoding, JXL_BOOL is_gray);
+
+/**
+ * Enables usage of expert options.
+ *
+ * At the moment, the only expert option is setting an effort value of 10,
+ * which gives the best compression for pixel-lossless modes but is very slow.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderAllowExpertOptions(JxlEncoder* enc);
+
+/**
+ * Function type for @ref JxlEncoderSetDebugImageCallback.
+ *
+ * The callback may be called simultaneously by different threads when using a
+ * threaded parallel runner, on different debug images.
+ *
+ * @param opaque optional user data, as given to @ref
+ *   JxlEncoderSetDebugImageCallback.
+ * @param label label of debug image, can be used in filenames
+ * @param xsize width of debug image
+ * @param ysize height of debug image
+ * @param color color encoding of debug image
+ * @param pixels pixel data of debug image as big-endian 16-bit unsigned
+ *   samples. The memory is not owned by the user, and is only valid during the
+ *   time the callback is running.
+ */
+typedef void (*JxlDebugImageCallback)(void* opaque, const char* label,
+                                      size_t xsize, size_t ysize,
+                                      const JxlColorEncoding* color,
+                                      const uint16_t* pixels);
+
+/**
+ * Sets the given debug image callback that will be used by the encoder to
+ * output various debug images during encoding.
+ *
+ * This only has any effect if the encoder was compiled with the appropriate
+ * debug build flags.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param callback used to return the debug image
+ * @param opaque user supplied parameter to the image callback
+ */
+JXL_EXPORT void JxlEncoderSetDebugImageCallback(
+    JxlEncoderFrameSettings* frame_settings, JxlDebugImageCallback callback,
+    void* opaque);
+
+/**
+ * Sets the given stats object for gathering various statistics during encoding.
+ *
+ * This only has any effect if the encoder was compiled with the appropriate
+ * debug build flags.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param stats object that can be used to query the gathered stats (created
+ *   by @ref JxlEncoderStatsCreate)
+ */
+JXL_EXPORT void JxlEncoderCollectStats(JxlEncoderFrameSettings* frame_settings,
+                                       JxlEncoderStats* stats);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_ENCODE_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h
new file mode 100644
index 0000000000..3889e12c14
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_encoder
+///@{
+///
+/// @file encode_cxx.h
+/// @brief C++ header-only helper for @ref encode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_ENCODE_CXX_H_
+#define JXL_ENCODE_CXX_H_
+
+#include <jxl/encode.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/encode.h from C sources."
+#endif
+
+/// Struct to call JxlEncoderDestroy from the JxlEncoderPtr unique_ptr.
+struct JxlEncoderDestroyStruct {
+  /// Calls @ref JxlEncoderDestroy() on the passed encoder.
+  void operator()(JxlEncoder* encoder) { JxlEncoderDestroy(encoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlEncoderDestroy() when releasing the
+/// encoder.
+///
+/// Use this helper type from C++ sources to ensure the encoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlEncoder, JxlEncoderDestroyStruct> JxlEncoderPtr;
+
+/// Creates an instance of JxlEncoder into a JxlEncoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlEncoderDestroy() when
+/// releasing the pointer. See @ref JxlEncoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @return a @c NULL JxlEncoderPtr if the instance can not be allocated or
+///         initialized
+/// @return initialized JxlEncoderPtr instance otherwise.
+static inline JxlEncoderPtr JxlEncoderMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlEncoderPtr(JxlEncoderCreate(memory_manager));
+}
+
+#endif  // JXL_ENCODE_CXX_H_
+
+/// @}
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h b/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h
new file mode 100644
index 0000000000..52640a8beb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h
@@ -0,0 +1,72 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file memory_manager.h
+ * @brief Abstraction functions used by JPEG XL to allocate memory.
+ */
+
+#ifndef JXL_MEMORY_MANAGER_H_
+#define JXL_MEMORY_MANAGER_H_
+
+#include <stddef.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Allocating function for a memory region of a given size.
+ *
+ * Allocates a contiguous memory region of size @p size bytes. The returned
+ * memory may not be aligned to a specific size or initialized at all.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param size in bytes of the requested memory region.
+ * @return @c NULL if the memory can not be allocated,
+ * @return pointer to the memory otherwise.
+ */
+typedef void* (*jpegxl_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b MUST do nothing if @p address is @c NULL.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param address memory region pointer returned by ::jpegxl_alloc_func, or @c
+ * NULL.
+ */
+typedef void (*jpegxl_free_func)(void* opaque, void* address);
+
+/**
+ * Memory Manager struct.
+ * These functions, when provided by the caller, will be used to handle memory
+ * allocations.
+ */
+typedef struct JxlMemoryManagerStruct {
+  /** The opaque pointer that will be passed as the first parameter to all the
+   * functions in this struct. */
+  void* opaque;
+
+  /** Memory allocation function. This can be NULL if and only if also the
+   * free() member in this class is NULL. All dynamic memory will be allocated
+   * and freed with these functions if they are not NULL. */
+  jpegxl_alloc_func alloc;
+  /** Free function matching the alloc() member. */
+  jpegxl_free_func free;
+
+  /* TODO(deymo): Add cache-aligned alloc/free functions here. */
+} JxlMemoryManager;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_MEMORY_MANAGER_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h
new file mode 100644
index 0000000000..45394e972c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h
@@ -0,0 +1,156 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ *  @{
+ */
+/**
+ * @file parallel_runner.h
+ */
+
+/** API for running data operations in parallel in a multi-threaded environment.
+ * This module allows the JPEG XL caller to define their own way of creating and
+ * assigning threads.
+ *
+ * The JxlParallelRunner function type defines a parallel data processing
+ * runner that may be implemented by the caller to allow the library to process
+ * in multiple threads. The multi-threaded processing in this library only
+ * requires to run the same function over each number of a range, possibly
+ * running each call in a different thread. The JPEG XL caller is responsible
+ * for implementing this logic using the thread APIs available in their system.
+ * For convenience, a C++ implementation based on std::thread is provided in
+ * jpegxl/parallel_runner_thread.h (part of the jpegxl_threads library).
+ *
+ * Thread pools usually store small numbers of heterogeneous tasks in a queue.
+ * When tasks are identical or differ only by an integer input parameter, it is
+ * much faster to store just one function of an integer parameter and call it
+ * for each value. Conventional vector-of-tasks can be run in parallel using a
+ * lambda function adapter that simply calls task_funcs[task].
+ *
+ * If no multi-threading is desired, a @c NULL value of JxlParallelRunner
+ * will use an internal implementation without multi-threading.
+ */
+
+#ifndef JXL_PARALLEL_RUNNER_H_
+#define JXL_PARALLEL_RUNNER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Return code used in the JxlParallel* functions as return value. A value
+ * of 0 means success and any other value means error. The special value
+ * JXL_PARALLEL_RET_RUNNER_ERROR can be used by the runner to indicate any
+ * other error.
+ */
+typedef int JxlParallelRetCode;
+
+/**
+ * General error returned by the JxlParallelRunInit function to indicate
+ * an error.
+ */
+#define JXL_PARALLEL_RET_RUNNER_ERROR (-1)
+
+/**
+ * Parallel run initialization callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called by the JxlParallelRunner only once, on the
+ * same thread that called JxlParallelRunner, before any parallel execution.
+ * The purpose of this call is to provide the maximum number of threads that the
+ * JxlParallelRunner will use, which can be used by JPEG XL to allocate
+ * per-thread storage if needed.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param num_threads the maximum number of threads. This value must be
+ * positive.
+ * @return 0 if the initialization process was successful.
+ * @return an error code if there was an error, which should be returned by
+ * JxlParallelRunner().
+ */
+typedef JxlParallelRetCode (*JxlParallelRunInit)(void* jpegxl_opaque,
+                                                 size_t num_threads);
+
+/**
+ * Parallel run data processing callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called once for every number in the range [start_range,
+ * end_range) (including start_range but not including end_range) passing this
+ * number as the @p value. Calls for different value may be executed from
+ * different threads in parallel.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param value the number in the range [start_range, end_range) of the call.
+ * @param thread_id the thread number where this function is being called from.
+ * This must be lower than the @p num_threads value passed to
+ * JxlParallelRunInit.
+ */
+typedef void (*JxlParallelRunFunction)(void* jpegxl_opaque, uint32_t value,
+                                       size_t thread_id);
+
+/**
+ * JxlParallelRunner function type. A parallel runner implementation can be
+ * provided by a JPEG XL caller to allow running computations in multiple
+ * threads. This function must call the initialization function @p init in the
+ * same thread that called it and then call the passed @p func once for every
+ * number in the range [start_range, end_range) (including start_range but not
+ * including end_range) possibly from different multiple threads in parallel.
+ *
+ * The JxlParallelRunner function does not need to be re-entrant. This means
+ * that the same JxlParallelRunner function with the same runner_opaque
+ * provided parameter will not be called from the library from either @p init or
+ * @p func in the same decoder or encoder instance. However, a single decoding
+ * or encoding instance may call the provided JxlParallelRunner multiple
+ * times for different parts of the decoding or encoding process.
+ *
+ * @return 0 if the @p init call succeeded (returned 0) and no other error
+ * occurred in the runner code.
+ * @return JXL_PARALLEL_RET_RUNNER_ERROR if an error occurred in the runner
+ * code, for example, setting up the threads.
+ * @return the return value of @p init() if non-zero.
+ */
+typedef JxlParallelRetCode (*JxlParallelRunner)(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/* The following is an example of a JxlParallelRunner that doesn't use any
+ * multi-threading. Note that this implementation doesn't store any state
+ * between multiple calls of the ExampleSequentialRunner function, so the
+ * runner_opaque value is not used.
+
+  JxlParallelRetCode ExampleSequentialRunner(void* runner_opaque,
+                                                void* jpegxl_opaque,
+                                                JxlParallelRunInit init,
+                                                JxlParallelRunFunction func,
+                                                uint32_t start_range,
+                                                uint32_t end_range) {
+    // We only use one thread (the currently running thread).
+    JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+    if (init_ret != 0) return init_ret;
+
+    // In case of other initialization error (for example when initializing the
+    // threads) one can return JXL_PARALLEL_RET_RUNNER_ERROR.
+
+    for (uint32_t i = start_range; i < end_range; i++) {
+      // Every call is in the thread number 0. These don't need to be in any
+      // order.
+      (*func)(jpegxl_opaque, i, 0);
+    }
+    return 0;
+  }
+ */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h
new file mode 100644
index 0000000000..196e66d30a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h
@@ -0,0 +1,78 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_threads
+ * @{
+ * @file resizable_parallel_runner.h
+ * @brief implementation using std::thread of a resizeable ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created can be changed after creation of the thread pool; the threads
+ * (including the main thread) are re-used for every
+ * ResizableParallelRunner::Runner call. Only one concurrent
+ * JxlResizableParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * Compared to the implementation in @ref thread_parallel_runner.h, this
+ * implementation is tuned for execution on lower-powered systems, including
+ * for example ARM CPUs with big.LITTLE computation models.
+ */
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_H_
+
+#include <jxl/jxl_threads_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlResizableParallelRunner. Use as the opaque
+ * runner. The runner will execute tasks on the calling thread until
+ * @ref JxlResizableParallelRunnerSetThreads is called.
+ */
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager);
+
+/** Changes the number of threads for JxlResizableParallelRunner.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+    void* runner_opaque, size_t num_threads);
+
+/** Suggests a number of threads to use for an image of given size.
+ */
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize);
+
+/** Destroys the runner created by JxlResizableParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_RESIZABLE_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h
new file mode 100644
index 0000000000..39bbbd283a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_threads
+/// @{
+///
+/// @file resizable_parallel_runner_cxx.h
+/// @ingroup libjxl_threads
+/// @brief C++ header-only helper for @ref resizable_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+
+#include <jxl/resizable_parallel_runner.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+    "This a C++ only header. Use jxl/jxl_resizable_parallel_runner.h from C" \
+    "sources."
+#endif
+
+/// Struct to call JxlResizableParallelRunnerDestroy from the
+/// JxlResizableParallelRunnerPtr unique_ptr.
+struct JxlResizableParallelRunnerDestroyStruct {
+  /// Calls @ref JxlResizableParallelRunnerDestroy() on the passed runner.
+  void operator()(void* runner) { JxlResizableParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlResizableParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlResizableParallelRunnerDestroyStruct>
+    JxlResizableParallelRunnerPtr;
+
+/// Creates an instance of JxlResizableParallelRunner into a
+/// JxlResizableParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlResizableParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlResizableParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @return a @c NULL JxlResizableParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlResizableParallelRunnerPtr instance otherwise.
+static inline JxlResizableParallelRunnerPtr JxlResizableParallelRunnerMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlResizableParallelRunnerPtr(
+      JxlResizableParallelRunnerCreate(memory_manager));
+}
+
+#endif  // JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+
+/// @}
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/stats.h b/third-party/libjxl/libjxl/lib/include/jxl/stats.h
new file mode 100644
index 0000000000..7aeca26325
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/stats.h
@@ -0,0 +1,103 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_encoder
+ * @{
+ * @file stats.h
+ * @brief API to collect various statistics from JXL encoder.
+ */
+
+#ifndef JXL_STATS_H_
+#define JXL_STATS_H_
+
+#include <jxl/jxl_export.h>
+#include <stddef.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Opaque structure that holds the encoder statistics.
+ *
+ * Allocated and initialized with JxlEncoderStatsCreate().
+ * Cleaned up and deallocated with JxlEncoderStatsDestroy().
+ */
+typedef struct JxlEncoderStatsStruct JxlEncoderStats;
+
+/**
+ * Creates an instance of JxlEncoderStats and initializes it.
+ *
+ * @return pointer to initialized JxlEncoderStats instance
+ */
+JXL_EXPORT JxlEncoderStats* JxlEncoderStatsCreate();
+
+/**
+ * Deinitializes and frees JxlEncoderStats instance.
+ *
+ * @param stats instance to be cleaned up and deallocated. No-op if stats is
+ * null pointer.
+ */
+JXL_EXPORT void JxlEncoderStatsDestroy(JxlEncoderStats* stats);
+
+/** Data type for querying JxlEncoderStats object
+ */
+typedef enum {
+  JXL_ENC_STAT_HEADER_BITS,
+  JXL_ENC_STAT_TOC_BITS,
+  JXL_ENC_STAT_DICTIONARY_BITS,
+  JXL_ENC_STAT_SPLINES_BITS,
+  JXL_ENC_STAT_NOISE_BITS,
+  JXL_ENC_STAT_QUANT_BITS,
+  JXL_ENC_STAT_MODULAR_TREE_BITS,
+  JXL_ENC_STAT_MODULAR_GLOBAL_BITS,
+  JXL_ENC_STAT_DC_BITS,
+  JXL_ENC_STAT_MODULAR_DC_GROUP_BITS,
+  JXL_ENC_STAT_CONTROL_FIELDS_BITS,
+  JXL_ENC_STAT_COEF_ORDER_BITS,
+  JXL_ENC_STAT_AC_HISTOGRAM_BITS,
+  JXL_ENC_STAT_AC_BITS,
+  JXL_ENC_STAT_MODULAR_AC_GROUP_BITS,
+  JXL_ENC_STAT_NUM_SMALL_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT4X8_BLOCKS,
+  JXL_ENC_STAT_NUM_AFV_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT8_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT8X32_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT16_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT16X32_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT32_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT32X64_BLOCKS,
+  JXL_ENC_STAT_NUM_DCT64_BLOCKS,
+  JXL_ENC_STAT_NUM_BUTTERAUGLI_ITERS,
+  JXL_ENC_NUM_STATS,
+} JxlEncoderStatsKey;
+
+/** Returns the value of the statistics corresponding the given key.
+ *
+ * @param stats object that was passed to the encoder with a
+ *   @ref JxlEncoderCollectStats function
+ * @param key the particular statistics to query
+ *
+ * @return the value of the statistics
+ */
+JXL_EXPORT size_t JxlEncoderStatsGet(const JxlEncoderStats* stats,
+                                     JxlEncoderStatsKey key);
+
+/** Updates the values of the given stats object with that of an other.
+ *
+ * @param stats object whose values will be updated (usually added together)
+ * @param other stats object whose values will be merged with stats
+ */
+JXL_EXPORT void JxlEncoderStatsMerge(JxlEncoderStats* stats,
+                                     const JxlEncoderStats* other);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_STATS_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h
new file mode 100644
index 0000000000..715648b256
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h
@@ -0,0 +1,72 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_threads
+ * @{
+ * @file thread_parallel_runner.h
+ * @brief implementation using std::thread of a ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created is fixed at construction time and the threads are re-used for every
+ * ThreadParallelRunner::Runner call. Only one concurrent
+ * JxlThreadParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * This thread pool can efficiently load-balance millions of tasks using an
+ * atomic counter, thus avoiding per-task virtual or system calls. With 48
+ * hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+ * 10-20x higher when using std::async, and ~200x for a queue-based thread
+ */
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_H_
+#define JXL_THREAD_PARALLEL_RUNNER_H_
+
+#include <jxl/jxl_threads_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlThreadParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlThreadParallelRunner. Use as the opaque
+ * runner.
+ */
+JXL_THREADS_EXPORT void* JxlThreadParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager, size_t num_worker_threads);
+
+/** Destroys the runner created by JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlThreadParallelRunnerDestroy(void* runner_opaque);
+
+/** Returns a default num_worker_threads value for
+ * JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT size_t JxlThreadParallelRunnerDefaultNumWorkerThreads();
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_THREAD_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h
new file mode 100644
index 0000000000..4974ffee87
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_threads
+/// @{
+///
+/// @file thread_parallel_runner_cxx.h
+/// @brief C++ header-only helper for @ref thread_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+#define JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+
+#include <jxl/thread_parallel_runner.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+    "This a C++ only header. Use jxl/jxl_thread_parallel_runner.h from C" \
+    "sources."
+#endif
+
+/// Struct to call JxlThreadParallelRunnerDestroy from the
+/// JxlThreadParallelRunnerPtr unique_ptr.
+struct JxlThreadParallelRunnerDestroyStruct {
+  /// Calls @ref JxlThreadParallelRunnerDestroy() on the passed runner.
+  void operator()(void* runner) { JxlThreadParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlThreadParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlThreadParallelRunnerDestroyStruct>
+    JxlThreadParallelRunnerPtr;
+
+/// Creates an instance of JxlThreadParallelRunner into a
+/// JxlThreadParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlThreadParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlThreadParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @param num_worker_threads the number of worker threads to create.
+/// @return a @c NULL JxlThreadParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlThreadParallelRunnerPtr instance otherwise.
+static inline JxlThreadParallelRunnerPtr JxlThreadParallelRunnerMake(
+    const JxlMemoryManager* memory_manager, size_t num_worker_threads) {
+  return JxlThreadParallelRunnerPtr(
+      JxlThreadParallelRunnerCreate(memory_manager, num_worker_threads));
+}
+
+#endif  // JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+
+/// @}
diff --git a/third-party/libjxl/libjxl/lib/include/jxl/types.h b/third-party/libjxl/libjxl/lib/include/jxl/types.h
new file mode 100644
index 0000000000..f280fe99ca
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/include/jxl/types.h
@@ -0,0 +1,179 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file types.h
+ * @brief Data types for the JPEG XL API, for both encoding and decoding.
+ */
+
+#ifndef JXL_TYPES_H_
+#define JXL_TYPES_H_
+
+#include <jxl/jxl_export.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::JXL_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::JXL_TRUE and ::JXL_FALSE.
+ */
+#define JXL_BOOL int
+/** Portable @c true replacement. */
+#define JXL_TRUE 1
+/** Portable @c false replacement. */
+#define JXL_FALSE 0
+
+/** Data type for the sample values per channel per pixel.
+ */
+typedef enum {
+  /** Use 32-bit single-precision floating point values, with range 0.0-1.0
+   * (within gamut, may go outside this range for wide color gamut). Floating
+   * point output, either JXL_TYPE_FLOAT or JXL_TYPE_FLOAT16, is recommended
+   * for HDR and wide gamut images when color profile conversion is required. */
+  JXL_TYPE_FLOAT = 0,
+
+  /** Use type uint8_t. May clip wide color gamut data.
+   */
+  JXL_TYPE_UINT8 = 2,
+
+  /** Use type uint16_t. May clip wide color gamut data.
+   */
+  JXL_TYPE_UINT16 = 3,
+
+  /** Use 16-bit IEEE 754 half-precision floating point values */
+  JXL_TYPE_FLOAT16 = 5,
+} JxlDataType;
+
+/** Ordering of multi-byte data.
+ */
+typedef enum {
+  /** Use the endianness of the system, either little endian or big endian,
+   * without forcing either specific endianness. Do not use if pixel data
+   * should be exported to a well defined format.
+   */
+  JXL_NATIVE_ENDIAN = 0,
+  /** Force little endian */
+  JXL_LITTLE_ENDIAN = 1,
+  /** Force big endian */
+  JXL_BIG_ENDIAN = 2,
+} JxlEndianness;
+
+/** Data type for the sample values per channel per pixel for the output buffer
+ * for pixels. This is not necessarily the same as the data type encoded in the
+ * codestream. The channels are interleaved per pixel. The pixels are
+ * organized row by row, left to right, top to bottom.
+ * TODO(lode): support different channel orders if needed (RGB, BGR, ...)
+ */
+typedef struct {
+  /** Amount of channels available in a pixel buffer.
+   * 1: single-channel data, e.g. grayscale or a single extra channel
+   * 2: single-channel + alpha
+   * 3: trichromatic, e.g. RGB
+   * 4: trichromatic + alpha
+   * TODO(lode): this needs finetuning. It is not yet defined how the user
+   * chooses output color space. CMYK+alpha needs 5 channels.
+   */
+  uint32_t num_channels;
+
+  /** Data type of each channel.
+   */
+  JxlDataType data_type;
+
+  /** Whether multi-byte data types are represented in big endian or little
+   * endian format. This applies to JXL_TYPE_UINT16, JXL_TYPE_UINT32
+   * and JXL_TYPE_FLOAT.
+   */
+  JxlEndianness endianness;
+
+  /** Align scanlines to a multiple of align bytes, or 0 to require no
+   * alignment at all (which has the same effect as value 1)
+   */
+  size_t align;
+} JxlPixelFormat;
+
+/** Settings for the interpretation of UINT input and output buffers.
+ *  (buffers using a FLOAT data type are not affected by this)
+ */
+typedef enum {
+  /** This is the default setting, where the encoder expects the input pixels
+   * to use the full range of the pixel format data type (e.g. for UINT16, the
+   * input range is 0 .. 65535 and the value 65535 is mapped to 1.0 when
+   * converting to float), and the decoder uses the full range to output
+   * pixels. If the bit depth in the basic info is different from this, the
+   * encoder expects the values to be rescaled accordingly (e.g. multiplied by
+   * 65535/4095 for a 12-bit image using UINT16 input data type). */
+  JXL_BIT_DEPTH_FROM_PIXEL_FORMAT = 0,
+
+  /** If this setting is selected, the encoder expects the input pixels to be
+   * in the range defined by the bits_per_sample value of the basic info (e.g.
+   * for 12-bit images using UINT16 input data types, the allowed range is
+   * 0 .. 4095 and the value 4095 is mapped to 1.0 when converting to float),
+   * and the decoder outputs pixels in this range. */
+  JXL_BIT_DEPTH_FROM_CODESTREAM = 1,
+
+  /** This setting can only be used in the decoder to select a custom range for
+   * pixel output */
+  JXL_BIT_DEPTH_CUSTOM = 2,
+} JxlBitDepthType;
+
+/** Data type for describing the interpretation of the input and output buffers
+ * in terms of the range of allowed input and output pixel values. */
+typedef struct {
+  /** Bit depth setting, see comment on @ref JxlBitDepthType */
+  JxlBitDepthType type;
+
+  /** Custom bits per sample */
+  uint32_t bits_per_sample;
+
+  /** Custom exponent bits per sample */
+  uint32_t exponent_bits_per_sample;
+} JxlBitDepth;
+
+/** Data type holding the 4-character type name of an ISOBMFF box.
+ */
+typedef char JxlBoxType[4];
+
+/** Types of progressive detail.
+ * Setting a progressive detail with value N implies all progressive details
+ * with smaller or equal value. Currently only the following level of
+ * progressive detail is implemented:
+ *  - kDC (which implies kFrames)
+ *  - kLastPasses (which implies kDC and kFrames)
+ *  - kPasses (which implies kLastPasses, kDC and kFrames)
+ */
+typedef enum {
+  // after completed kRegularFrames
+  kFrames = 0,
+  // after completed DC (1:8)
+  kDC = 1,
+  // after completed AC passes that are the last pass for their resolution
+  // target.
+  kLastPasses = 2,
+  // after completed AC passes that are not the last pass for their resolution
+  // target.
+  kPasses = 3,
+  // during DC frame when lower resolution are completed (1:32, 1:16)
+  kDCProgressive = 4,
+  // after completed groups
+  kDCGroups = 5,
+  // after completed groups
+  kGroups = 6,
+} JxlProgressiveDetail;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_TYPES_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/jpegli.cmake b/third-party/libjxl/libjxl/lib/jpegli.cmake
new file mode 100644
index 0000000000..5d4f45e58a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli.cmake
@@ -0,0 +1,159 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+set(JPEGLI_INTERNAL_LIBS
+  hwy
+  Threads::Threads
+  ${ATOMICS_LIBRARIES}
+)
+
+# JPEGLIB setup
+set(BITS_IN_JSAMPLE 8)
+set(MEM_SRCDST_SUPPORTED 1)
+
+if(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "62")
+  set(JPEG_LIB_VERSION 62)
+elseif(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "7")
+  set(JPEG_LIB_VERSION 70)
+elseif(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "8")
+  set(JPEG_LIB_VERSION 80)
+endif()
+
+configure_file(
+  ../third_party/libjpeg-turbo/jconfig.h.in include/jpegli/jconfig.h)
+configure_file(
+  ../third_party/libjpeg-turbo/jpeglib.h include/jpegli/jpeglib.h COPYONLY)
+configure_file(
+  ../third_party/libjpeg-turbo/jmorecfg.h include/jpegli/jmorecfg.h COPYONLY)
+
+add_library(jpegli-static STATIC EXCLUDE_FROM_ALL "${JPEGXL_INTERNAL_JPEGLI_SOURCES}")
+target_compile_options(jpegli-static PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+target_compile_options(jpegli-static PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jpegli-static PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jpegli-static PRIVATE
+  "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  "${JXL_HWY_INCLUDE_DIRS}"
+)
+target_include_directories(jpegli-static PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include/jpegli>"
+)
+target_link_libraries(jpegli-static PUBLIC ${JPEGLI_INTERNAL_LIBS})
+
+#
+# Tests for jpegli-static
+#
+
+find_package(JPEG)
+if(JPEG_FOUND AND BUILD_TESTING)
+# TODO(eustas): merge into jxl_tests.cmake?
+
+add_library(jpegli_libjpeg_util-obj OBJECT
+  ${JPEGXL_INTERNAL_JPEGLI_LIBJPEG_HELPER_FILES}
+)
+target_include_directories(jpegli_libjpeg_util-obj PRIVATE
+  "${PROJECT_SOURCE_DIR}"
+  "${JPEG_INCLUDE_DIRS}"
+)
+target_compile_options(jpegli_libjpeg_util-obj PRIVATE
+  "${JPEGXL_INTERNAL_FLAGS}" "${JPEGXL_COVERAGE_FLAGS}")
+
+# Individual test binaries:
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_JPEGLI_TESTS)
+  # The TESTNAME is the name without the extension or directory.
+  get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+  add_executable(${TESTNAME} ${TESTFILE}
+    $<TARGET_OBJECTS:jpegli_libjpeg_util-obj>
+    ${JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES}
+  )
+  target_compile_options(${TESTNAME} PRIVATE
+    ${JPEGXL_INTERNAL_FLAGS}
+    # Add coverage flags to the test binary so code in the private headers of
+    # the library is also instrumented when running tests that execute it.
+    ${JPEGXL_COVERAGE_FLAGS}
+  )
+  target_compile_definitions(${TESTNAME} PRIVATE
+    -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+  target_include_directories(${TESTNAME} PRIVATE
+    "${PROJECT_SOURCE_DIR}"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    "${CMAKE_CURRENT_BINARY_DIR}/include"
+  )
+  target_link_libraries(${TESTNAME}
+    hwy
+    jpegli-static
+    gmock
+    GTest::GTest
+    GTest::Main
+    ${JPEG_LIBRARIES}
+  )
+  set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}")
+  # Output test targets in the test directory.
+  set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/")
+  if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error")
+  endif ()
+  jxl_discover_tests(${TESTNAME})
+endforeach ()
+endif()
+
+#
+# Build libjpeg.so that links to libjpeg-static
+#
+
+if (JPEGXL_ENABLE_JPEGLI_LIBJPEG AND NOT APPLE AND NOT WIN32 AND NOT JPEGXL_EMSCRIPTEN)
+add_library(jpegli-libjpeg-obj OBJECT "${JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES}")
+target_compile_options(jpegli-libjpeg-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jpegli-libjpeg-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jpegli-libjpeg-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jpegli-libjpeg-obj PRIVATE
+  "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include/jpegli>"
+)
+target_compile_definitions(jpegli-libjpeg-obj PUBLIC
+  ${JPEGLI_LIBJPEG_OBJ_COMPILE_DEFINITIONS}
+)
+set(JPEGLI_LIBJPEG_INTERNAL_OBJECTS $<TARGET_OBJECTS:jpegli-libjpeg-obj>)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/jpegli)
+add_library(jpeg SHARED ${JPEGLI_LIBJPEG_INTERNAL_OBJECTS})
+target_link_libraries(jpeg PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+target_link_libraries(jpeg PRIVATE jpegli-static)
+set_target_properties(jpeg PROPERTIES
+  VERSION ${JPEGLI_LIBJPEG_LIBRARY_VERSION}
+  SOVERSION ${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli")
+
+# Add a jpeg.version file as a version script to tag symbols with the
+# appropriate version number.
+set_target_properties(jpeg PROPERTIES
+  LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION})
+set_property(TARGET jpeg APPEND_STRING PROPERTY
+  LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}")
+
+if (JPEGXL_INSTALL_JPEGLI_LIBJPEG)
+  install(TARGETS jpeg
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  install(
+    DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include/jpegli/"
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+endif()
+
+# This hides the default visibility symbols from static libraries bundled into
+# the shared library. In particular this prevents exposing symbols from hwy
+# in the shared library.
+if(LINKER_SUPPORT_EXCLUDE_LIBS)
+  set_property(TARGET jpeg APPEND_STRING PROPERTY
+    LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}")
+endif()
+endif()
diff --git a/third-party/libjxl/libjxl/lib/jpegli/README.md b/third-party/libjxl/libjxl/lib/jpegli/README.md
new file mode 100644
index 0000000000..72f13afd22
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/README.md
@@ -0,0 +1,49 @@
+# Improved JPEG encoder and decoder implementation
+
+This subdirectory contains a JPEG encoder and decoder implementation that is
+API and ABI compatible with libjpeg62.
+
+## Building
+
+When building the parent libjxl project, two binaries, `tools/cjpegli` and
+`tools/djpegli` will be built, as well as a
+`lib/jpegli/libjpeg.so.62.3.0` shared library that can be used as a drop-in
+replacement for the system library with the same name.
+
+## Encoder improvements
+
+Improvements and new features used by the encoder include:
+
+* Support for 16-bit unsigned and 32-bit floating point input buffers.
+
+* Color space conversions, chroma subsampling and DCT are all done in floating
+  point precision, the conversion to integers happens first when producing
+  the final quantized DCT coefficients.
+
+* The desired quality can be indicated by a distance parameter that is
+  analogous to the distance parameter of JPEG XL. The quantization tables
+  are chosen based on the distance and the chroma subsampling mode, with
+  different positions in the quantization matrix scaling differently, and the
+  red and blue chrominance channels have separate quantization tables.
+
+* Adaptive dead-zone quantization. On noisy parts of the image, quantization
+  thresholds for zero coefficients are higher than on smoother parts of the
+  image.
+
+* Support for more efficient compression of JPEGs with an ICC profile
+  representing the XYB colorspace. These JPEGs will not be converted to the
+  YCbCr colorspace, but specialized quantization tables will be chosen for
+  the original X, Y, B channels.
+
+## Decoder improvements
+
+* Support for 16-bit unsigned and 32-bit floating point output buffers.
+
+* Non-zero DCT coefficients are dequantized to the expectation value of their
+  respective quantization intervals assuming a Laplacian distribution of the
+  original unquantized DCT coefficients.
+
+* After dequantization, inverse DCT, chroma upsampling and color space
+  conversions are all done in floating point precision, the conversion to
+  integer samples happens only in the final output phase (unless output to
+  floating point was requested).
diff --git a/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc
new file mode 100644
index 0000000000..a1c0b89ad3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc
@@ -0,0 +1,563 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Floor;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+static constexpr float kInputScaling = 1.0f / 255.0f;
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+  HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+  // One Newton-Raphson iteration.
+  static HWY_INLINE V ReciprocalNR(const V x) {
+    const auto rcp = ApproximateReciprocal(x);
+    const auto sum = Add(rcp, rcp);
+    const auto x_rcp = Mul(x, rcp);
+    return NegMulAdd(x_rcp, rcp, sum);
+  }
+
+  V operator()(const V n, const V d) const {
+#if 1  // Faster on SKX
+    return Div(n, d);
+#else
+    return n * ReciprocalNR(d);
+#endif
+  }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+                                                     const T (&p)[NP],
+                                                     const T (&q)[NQ]) {
+  constexpr size_t kDegP = NP / 4 - 1;
+  constexpr size_t kDegQ = NQ / 4 - 1;
+  auto yp = LoadDup128(d, &p[kDegP * 4]);
+  auto yq = LoadDup128(d, &q[kDegQ * 4]);
+  // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+  // compiler warning that the index is out of bounds since we are already
+  // checking that it is not out of bounds with (kDegP >= n) and the access
+  // will be optimized away. Similarly with q and kDegQ.
+  HWY_FENCE;
+  if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+  if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+  if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+  if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+  if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+  if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+  if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+  if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+  return FastDivision<T, V>()(yp, yq);
+}
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+  // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+  HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+                                          HWY_REP4(1.4287160470083755E+00f),
+                                          HWY_REP4(7.4245873327820566E-01f)};
+  HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+                                          HWY_REP4(1.0096718572241148E+00f),
+                                          HWY_REP4(1.7409343003366853E-01f)};
+
+  const Rebind<int32_t, DF> di;
+  const auto x_bits = BitCast(di, x);
+
+  // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+  const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab));  // = 2/3
+  // Shifted exponent = log2; also used to clear mantissa.
+  const auto exp_shifted = ShiftRight<23>(exp_bits);
+  const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+  const auto exp_val = ConvertTo(df, exp_shifted);
+  return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q),
+             exp_val);
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+  const Rebind<int32_t, DF> di;
+  auto floorx = Floor(x);
+  auto exp =
+      BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127))));
+  auto frac = Sub(x, floorx);
+  auto num = Add(frac, Set(df, 1.01749063e+01));
+  num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+  num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+  num = Mul(num, exp);
+  auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+  den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+  den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+  return Div(num, den);
+}
+
+inline float FastPow2f(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+  const auto kBase = Set(d, -0.74174993f);
+  const auto kMul4 = Set(d, 3.2353257320940401f);
+  const auto kMul2 = Set(d, 12.906028311180409f);
+  const auto kOffset2 = Set(d, 305.04035728311436f);
+  const auto kMul3 = Set(d, 5.0220313103171232f);
+  const auto kOffset3 = Set(d, 2.1925739705298404f);
+  const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3);
+  const auto kMul0 = Set(d, 0.74760422233706747f);
+  const auto k1 = Set(d, 1.0f);
+
+  // Avoid division by zero.
+  const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f));
+  const auto v2 = Div(k1, Add(v1, kOffset2));
+  const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3));
+  const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4));
+  // TODO(jyrki):
+  // A log or two here could make sense. In butteraugli we have effectively
+  // log(log(x + C)) for this kind of use, as a single log is used in
+  // saturating visual masking and here the modulation values are exponential,
+  // another log would counter that.
+  return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3))));
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.0480446705883f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.14672470003f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+  // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+  // is related to the number of photons.
+  //
+  // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+  // This ratio allows quantization to move from jxl's opsin space to
+  // butteraugli's log-gamma space.
+  static const float kEpsilon = 1e-2;
+  static const float kNumOffset = kEpsilon / kInputScaling / kInputScaling;
+  static const float kNumMul = kSGRetMul * 3 * kSGmul;
+  static const float kVOffset = (kSGVOffset * kLog2 + kEpsilon) / kInputScaling;
+  static const float kDenMul = kLog2 * kSGmul * kInputScaling * kInputScaling;
+
+  v = ZeroIfNegative(v);
+  const auto num_mul = Set(d, kNumMul);
+  const auto num_offset = Set(d, kNumOffset);
+  const auto den_offset = Set(d, kVOffset);
+  const auto den_mul = Set(d, kDenMul);
+
+  const auto v2 = Mul(v, v);
+
+  const auto num = MulAdd(num_mul, v2, num_offset);
+  const auto den = MulAdd(Mul(den_mul, v), v2, den_offset);
+  return invert ? Div(num, den) : Div(den, num);
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+  using DScalar = HWY_CAPPED(float, 1);
+  auto vscalar = Load(DScalar(), &v);
+  return GetLane(
+      RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+  // A simple HDR compatible gamma function.
+  const auto mul = Set(d, kSGmul);
+  const auto kRetMul = Set(d, kSGRetMul);
+  const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+  const auto kVOffset = Set(d, kSGVOffset);
+
+  v *= mul;
+
+  // This should happen rarely, but may lead to a NaN, which is rather
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+  v = ZeroIfNegative(v);
+  return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+                  const RowBuffer<float>& input, const V out_val) {
+  static const float kBias = 0.16f / kInputScaling;
+  static const float kScale = kInputScaling / 64.0f;
+  auto overall_ratio = Zero(d);
+  const auto bias = Set(d, kBias);
+  const auto scale = Set(d, kScale);
+  const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* const JXL_RESTRICT row_in = block_start + dy * input.stride();
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+      const auto iny = Add(Load(d, row_in + dx), bias);
+      const auto ratio_g =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, iny);
+      overall_ratio = Add(overall_ratio, ratio_g);
+    }
+  }
+  overall_ratio = Mul(SumOfLanes(d, overall_ratio), scale);
+  // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+  // less than that.
+  // ln(2) constant folded in because we want std::log but have FastLog2f.
+  const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+  return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y,
+               const RowBuffer<float>& input, const V out_val) {
+  // Zero out the invalid differences for the rightmost value per row.
+  const Rebind<uint32_t, D> du;
+  HWY_ALIGN constexpr uint32_t kMaskRight[8] = {~0u, ~0u, ~0u, ~0u,
+                                                ~0u, ~0u, ~0u, 0};
+
+  auto sum = Zero(d);  // sum of absolute differences with right and below
+  static const float kSumCoeff = -2.0052193233688884f * kInputScaling / 112.0;
+  auto sumcoeff = Set(d, kSumCoeff);
+
+  const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* JXL_RESTRICT row_in = block_start + dy * input.stride();
+    const float* JXL_RESTRICT row_in_next =
+        dy == 7 ? row_in : row_in + input.stride();
+
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+      const auto p = Load(d, row_in + dx);
+      const auto pr = LoadU(d, row_in + dx + 1);
+      const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+      sum = Add(sum, And(mask, AbsDiff(p, pr)));
+      const auto pd = Load(d, row_in_next + dx);
+      sum = Add(sum, AbsDiff(p, pd));
+    }
+  }
+
+  sum = SumOfLanes(d, sum);
+  return MulAdd(sum, sumcoeff, out_val);
+}
+
+void PerBlockModulations(const float y_quant_01, const RowBuffer<float>& input,
+                         const size_t yb0, const size_t yblen,
+                         RowBuffer<float>* aq_map) {
+  static const float kAcQuant = 0.841f;
+  float base_level = 0.48f * kAcQuant;
+  float kDampenRampStart = 9.0f;
+  float kDampenRampEnd = 65.0f;
+  float dampen = 1.0f;
+  if (y_quant_01 >= kDampenRampStart) {
+    dampen = 1.0f - ((y_quant_01 - kDampenRampStart) /
+                     (kDampenRampEnd - kDampenRampStart));
+    if (dampen < 0) {
+      dampen = 0;
+    }
+  }
+  const float mul = kAcQuant * dampen;
+  const float add = (1.0f - dampen) * base_level;
+  for (size_t iy = 0; iy < yblen; iy++) {
+    const size_t yb = yb0 + iy;
+    const size_t y = yb * 8;
+    float* const JXL_RESTRICT row_out = aq_map->Row(yb);
+    const HWY_CAPPED(float, 8) df;
+    for (size_t ix = 0; ix < aq_map->xsize(); ix++) {
+      size_t x = ix * 8;
+      auto out_val = Set(df, row_out[ix]);
+      out_val = ComputeMask(df, out_val);
+      out_val = HfModulation(df, x, y, input, out_val);
+      out_val = GammaModulation(df, x, y, input, out_val);
+      // We want multiplicative quantization field, so everything
+      // until this point has been modulating the exponent.
+      row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+    }
+  }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+  static const float kLogOffset = 28;
+  static const float kMul = 211.50759899638012f;
+  const auto mul_v = Set(d, kMul * 1e8);
+  const auto offset_v = Set(d, kLogOffset);
+  return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v)));
+}
+
+template <typename V>
+void Sort4(V& min0, V& min1, V& min2, V& min3) {
+  const auto tmp0 = Min(min0, min1);
+  const auto tmp1 = Max(min0, min1);
+  const auto tmp2 = Min(min2, min3);
+  const auto tmp3 = Max(min2, min3);
+  const auto tmp4 = Max(tmp0, tmp2);
+  const auto tmp5 = Min(tmp1, tmp3);
+  min0 = Min(tmp0, tmp2);
+  min1 = Min(tmp4, tmp5);
+  min2 = Max(tmp4, tmp5);
+  min3 = Max(tmp1, tmp3);
+}
+
+template <typename V>
+void UpdateMin4(const V v, V& min0, V& min1, V& min2, V& min3) {
+  const auto tmp0 = Max(min0, v);
+  const auto tmp1 = Max(min1, tmp0);
+  const auto tmp2 = Max(min2, tmp1);
+  min0 = Min(min0, v);
+  min1 = Min(min1, tmp0);
+  min2 = Min(min2, tmp1);
+  min3 = Min(min3, tmp2);
+}
+
+// Computes a linear combination of the 4 lowest values of the 3x3 neighborhood
+// of each pixel. Output is downsampled 2x.
+void FuzzyErosion(const RowBuffer<float>& pre_erosion, const size_t yb0,
+                  const size_t yblen, RowBuffer<float>* tmp,
+                  RowBuffer<float>* aq_map) {
+  int xsize_blocks = aq_map->xsize();
+  int xsize = pre_erosion.xsize();
+  HWY_FULL(float) d;
+  const auto mul0 = Set(d, 0.125f);
+  const auto mul1 = Set(d, 0.075f);
+  const auto mul2 = Set(d, 0.06f);
+  const auto mul3 = Set(d, 0.05f);
+  for (size_t iy = 0; iy < 2 * yblen; ++iy) {
+    size_t y = 2 * yb0 + iy;
+    const float* JXL_RESTRICT rowt = pre_erosion.Row(y - 1);
+    const float* JXL_RESTRICT rowm = pre_erosion.Row(y);
+    const float* JXL_RESTRICT rowb = pre_erosion.Row(y + 1);
+    float* row_out = tmp->Row(y);
+    for (int x = 0; x < xsize; x += Lanes(d)) {
+      int xm1 = x - 1;
+      int xp1 = x + 1;
+      auto min0 = LoadU(d, rowm + x);
+      auto min1 = LoadU(d, rowm + xm1);
+      auto min2 = LoadU(d, rowm + xp1);
+      auto min3 = LoadU(d, rowt + xm1);
+      Sort4(min0, min1, min2, min3);
+      UpdateMin4(LoadU(d, rowt + x), min0, min1, min2, min3);
+      UpdateMin4(LoadU(d, rowt + xp1), min0, min1, min2, min3);
+      UpdateMin4(LoadU(d, rowb + xm1), min0, min1, min2, min3);
+      UpdateMin4(LoadU(d, rowb + x), min0, min1, min2, min3);
+      UpdateMin4(LoadU(d, rowb + xp1), min0, min1, min2, min3);
+      const auto v = Add(Add(Mul(mul0, min0), Mul(mul1, min1)),
+                         Add(Mul(mul2, min2), Mul(mul3, min3)));
+      Store(v, d, row_out + x);
+    }
+    if (iy % 2 == 1) {
+      const float* JXL_RESTRICT row_out0 = tmp->Row(y - 1);
+      float* JXL_RESTRICT aq_out = aq_map->Row(yb0 + iy / 2);
+      for (int bx = 0, x = 0; bx < xsize_blocks; ++bx, x += 2) {
+        aq_out[bx] =
+            (row_out[x] + row_out[x + 1] + row_out0[x] + row_out0[x + 1]);
+      }
+    }
+  }
+}
+
+void ComputePreErosion(const RowBuffer<float>& input, const size_t xsize,
+                       const size_t y0, const size_t ylen, int border,
+                       float* diff_buffer, RowBuffer<float>* pre_erosion) {
+  const size_t xsize_out = xsize / 4;
+  const size_t y0_out = y0 / 4;
+
+  // The XYB gamma is 3.0 to be able to decode faster with two muls.
+  // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+  // We approximate the gamma difference by adding one cubic root into
+  // the adaptive quantization. This gives us a total gamma of 2.6666
+  // for quantization uses.
+  static const float match_gamma_offset = 0.019 / kInputScaling;
+
+  const HWY_CAPPED(float, 8) df;
+
+  static const float limit = 0.2f;
+  // Computes image (padded to multiple of 8x8) of local pixel differences.
+  // Subsample both directions by 4.
+  for (size_t iy = 0; iy < ylen; ++iy) {
+    size_t y = y0 + iy;
+    const float* row_in = input.Row(y);
+    const float* row_in1 = input.Row(y + 1);
+    const float* row_in2 = input.Row(y - 1);
+    float* JXL_RESTRICT row_out = diff_buffer;
+    const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+    const auto quarter = Set(df, 0.25f);
+    for (size_t x = 0; x < xsize; x += Lanes(df)) {
+      const auto in = LoadU(df, row_in + x);
+      const auto in_r = LoadU(df, row_in + x + 1);
+      const auto in_l = LoadU(df, row_in + x - 1);
+      const auto in_t = LoadU(df, row_in2 + x);
+      const auto in_b = LoadU(df, row_in1 + x);
+      const auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b)));
+      const auto gammacv =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+              df, Add(in, match_gamma_offset_v));
+      auto diff = Mul(gammacv, Sub(in, base));
+      diff = Mul(diff, diff);
+      diff = Min(diff, Set(df, limit));
+      diff = MaskingSqrt(df, diff);
+      if ((iy & 3) != 0) {
+        diff = Add(diff, LoadU(df, row_out + x));
+      }
+      StoreU(diff, df, row_out + x);
+    }
+    if (iy % 4 == 3) {
+      size_t y_out = y0_out + iy / 4;
+      float* row_dout = pre_erosion->Row(y_out);
+      for (size_t x = 0; x < xsize_out; x++) {
+        row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+                       row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+                      0.25f;
+      }
+      pre_erosion->PadRow(y_out, xsize_out, border);
+    }
+  }
+}
+
+}  // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+HWY_EXPORT(ComputePreErosion);
+HWY_EXPORT(FuzzyErosion);
+HWY_EXPORT(PerBlockModulations);
+
+namespace {
+
+static constexpr int kPreErosionBorder = 1;
+
+}  // namespace
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  if (!m->use_adaptive_quantization) {
+    return;
+  }
+  int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+  jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+  int y_quant_01 = cinfo->quant_tbl_ptrs[y_comp->quant_tbl_no]->quantval[1];
+  if (m->next_iMCU_row == 0) {
+    m->input_buffer[y_channel].CopyRow(-1, 0, 1);
+  }
+  if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+    size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+    m->input_buffer[y_channel].CopyRow(last_row + 1, last_row, 1);
+  }
+  const RowBuffer<float>& input = m->input_buffer[y_channel];
+  const size_t xsize_blocks = y_comp->width_in_blocks;
+  const size_t xsize = xsize_blocks * DCTSIZE;
+  const size_t yb0 = m->next_iMCU_row * cinfo->max_v_samp_factor;
+  const size_t yblen = cinfo->max_v_samp_factor;
+  size_t y0 = yb0 * DCTSIZE;
+  size_t ylen = cinfo->max_v_samp_factor * DCTSIZE;
+  if (y0 == 0) {
+    ylen += 4;
+  } else {
+    y0 += 4;
+  }
+  if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+    ylen -= 4;
+  }
+  HWY_DYNAMIC_DISPATCH(ComputePreErosion)
+  (input, xsize, y0, ylen, kPreErosionBorder, m->diff_buffer, &m->pre_erosion);
+  if (y0 == 0) {
+    m->pre_erosion.CopyRow(-1, 0, kPreErosionBorder);
+  }
+  if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+    size_t last_row = m->ysize_blocks * 2 - 1;
+    m->pre_erosion.CopyRow(last_row + 1, last_row, kPreErosionBorder);
+  }
+  HWY_DYNAMIC_DISPATCH(FuzzyErosion)
+  (m->pre_erosion, yb0, yblen, &m->fuzzy_erosion_tmp, &m->quant_field);
+  HWY_DYNAMIC_DISPATCH(PerBlockModulations)
+  (y_quant_01, input, yb0, yblen, &m->quant_field);
+  for (int y = 0; y < cinfo->max_v_samp_factor; ++y) {
+    float* row = m->quant_field.Row(yb0 + y);
+    for (size_t x = 0; x < xsize_blocks; ++x) {
+      row[x] = std::max(0.0f, (0.6f / row[x]) - 1.0f);
+    }
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h
new file mode 100644
index 0000000000..d8537e85df
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc
new file mode 100644
index 0000000000..9788f35b8d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bit_writer.h"
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void JpegBitWriterInit(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  JpegBitWriter* bw = &m->bw;
+  size_t buffer_size = m->blocks_per_iMCU_row * (DCTSIZE2 * 16 + 8) + (1 << 16);
+  bw->cinfo = cinfo;
+  bw->data = Allocate<uint8_t>(cinfo, buffer_size, JPOOL_IMAGE);
+  bw->len = buffer_size;
+  bw->pos = 0;
+  bw->output_pos = 0;
+  bw->put_buffer = 0;
+  bw->free_bits = 64;
+  bw->healthy = true;
+}
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw) {
+  while (bw->output_pos < bw->pos) {
+    j_compress_ptr cinfo = bw->cinfo;
+    if (cinfo->dest->free_in_buffer == 0 &&
+        !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+      return false;
+    }
+    size_t buflen = bw->pos - bw->output_pos;
+    size_t copylen = std::min<size_t>(cinfo->dest->free_in_buffer, buflen);
+    memcpy(cinfo->dest->next_output_byte, bw->data + bw->output_pos, copylen);
+    bw->output_pos += copylen;
+    cinfo->dest->free_in_buffer -= copylen;
+    cinfo->dest->next_output_byte += copylen;
+  }
+  bw->output_pos = bw->pos = 0;
+  return true;
+}
+
+void JumpToByteBoundary(JpegBitWriter* bw) {
+  size_t n_bits = bw->free_bits & 7u;
+  if (n_bits > 0) {
+    WriteBits(bw, n_bits, (1u << n_bits) - 1);
+  }
+  bw->put_buffer <<= bw->free_bits;
+  while (bw->free_bits <= 56) {
+    int c = (bw->put_buffer >> 56) & 0xFF;
+    EmitByte(bw, c);
+    bw->put_buffer <<= 8;
+    bw->free_bits += 8;
+  }
+  bw->put_buffer = 0;
+  bw->free_bits = 64;
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h
new file mode 100644
index 0000000000..3adf1eaca1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BIT_WRITER_H_
+#define LIB_JPEGLI_BIT_WRITER_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+  j_compress_ptr cinfo;
+  uint8_t* data;
+  size_t len;
+  size_t pos;
+  size_t output_pos;
+  uint64_t put_buffer;
+  int free_bits;
+  bool healthy;
+};
+
+void JpegBitWriterInit(j_compress_ptr cinfo);
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw);
+
+void JumpToByteBoundary(JpegBitWriter* bw);
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+  return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+  bw->data[bw->pos++] = byte;
+  if (byte == 0xFF) bw->data[bw->pos++] = 0;
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) {
+  // At this point we are ready to emit the bytes of put_buffer to the output.
+  // The JPEG format requires that after every 0xff byte in the entropy
+  // coded section, there is a zero byte, therefore we first check if any of
+  // the bytes of put_buffer is 0xFF.
+  if (HasZeroByte(~bw->put_buffer)) {
+    // We have a 0xFF byte somewhere, examine each byte and append a zero
+    // byte if necessary.
+    EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 8) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 0) & 0xFF);
+  } else {
+    // We don't have any 0xFF bytes, output all 8 bytes without checking.
+    StoreBE64(bw->put_buffer, bw->data + bw->pos);
+    bw->pos += 8;
+  }
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+  // This is an optimization; if everything goes well,
+  // then |nbits| is positive; if non-existing Huffman symbol is going to be
+  // encoded, its length should be zero; later encoder could check the
+  // "health" of JpegBitWriter.
+  if (nbits == 0) {
+    bw->healthy = false;
+    return;
+  }
+  bw->free_bits -= nbits;
+  if (bw->free_bits < 0) {
+    bw->put_buffer <<= (bw->free_bits + nbits);
+    bw->put_buffer |= (bits >> -bw->free_bits);
+    DischargeBitBuffer(bw);
+    bw->free_bits += 64;
+    bw->put_buffer = nbits;
+  }
+  bw->put_buffer <<= nbits;
+  bw->put_buffer |= bits;
+}
+
+}  // namespace jpegli
+#endif  // LIB_JPEGLI_BIT_WRITER_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc b/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc
new file mode 100644
index 0000000000..3448367dde
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc
@@ -0,0 +1,452 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bitstream.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) {
+  size_t pos = 0;
+  while (pos < bufsize) {
+    if (cinfo->dest->free_in_buffer == 0 &&
+        !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+      JPEGLI_ERROR("Destination suspension is not supported in markers.");
+    }
+    size_t len = std::min<size_t>(cinfo->dest->free_in_buffer, bufsize - pos);
+    memcpy(cinfo->dest->next_output_byte, buf + pos, len);
+    pos += len;
+    cinfo->dest->free_in_buffer -= len;
+    cinfo->dest->next_output_byte += len;
+  }
+}
+
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes) {
+  WriteOutput(cinfo, bytes.data(), bytes.size());
+}
+
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes) {
+  WriteOutput(cinfo, bytes.begin(), bytes.size());
+}
+
+void EncodeAPP0(j_compress_ptr cinfo) {
+  WriteOutput(cinfo,
+              {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0',
+               cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+               cinfo->density_unit, static_cast<uint8_t>(cinfo->X_density >> 8),
+               static_cast<uint8_t>(cinfo->X_density & 0xff),
+               static_cast<uint8_t>(cinfo->Y_density >> 8),
+               static_cast<uint8_t>(cinfo->Y_density & 0xff), 0, 0});
+}
+
+void EncodeAPP14(j_compress_ptr cinfo) {
+  uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr  ? 1
+                            : cinfo->jpeg_color_space == JCS_YCCK ? 2
+                                                                  : 0;
+  WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0,
+                      0, 0, color_transform});
+}
+
+void WriteFileHeader(j_compress_ptr cinfo) {
+  WriteOutput(cinfo, {0xFF, 0xD8});  // SOI
+  if (cinfo->write_JFIF_header) {
+    EncodeAPP0(cinfo);
+  }
+  if (cinfo->write_Adobe_marker) {
+    EncodeAPP14(cinfo);
+  }
+}
+
+bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables) {
+  uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)];  // 520 bytes
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDB;
+  pos += 2;  // Length will be filled in later.
+
+  int send_table[NUM_QUANT_TBLS] = {};
+  if (write_all_tables) {
+    for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+      if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1;
+    }
+  } else {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      send_table[cinfo->comp_info[c].quant_tbl_no] = 1;
+    }
+  }
+
+  bool is_baseline = true;
+  for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+    if (!send_table[i]) continue;
+    JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i];
+    if (quant_table == nullptr) {
+      JPEGLI_ERROR("Missing quant table %d", i);
+    }
+    int precision = 0;
+    for (size_t k = 0; k < DCTSIZE2; ++k) {
+      if (quant_table->quantval[k] > 255) {
+        precision = 1;
+        is_baseline = false;
+      }
+    }
+    if (quant_table->sent_table) {
+      continue;
+    }
+    data[pos++] = (precision << 4) + i;
+    for (size_t j = 0; j < DCTSIZE2; ++j) {
+      int val_idx = kJPEGNaturalOrder[j];
+      int val = quant_table->quantval[val_idx];
+      if (val == 0) {
+        JPEGLI_ERROR("Invalid quantval 0.");
+      }
+      if (precision) {
+        data[pos++] = val >> 8;
+      }
+      data[pos++] = val & 0xFFu;
+    }
+    quant_table->sent_table = TRUE;
+  }
+  if (pos > 4) {
+    data[2] = (pos - 2) >> 8u;
+    data[3] = (pos - 2) & 0xFFu;
+    WriteOutput(cinfo, data, pos);
+  }
+  return is_baseline;
+}
+
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) {
+  if (cinfo->data_precision != kJpegPrecision) {
+    is_baseline = false;
+    JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision);
+  }
+  const uint8_t marker = cinfo->progressive_mode ? 0xc2
+                         : is_baseline           ? 0xc0
+                                                 : 0xc1;
+  const size_t n_comps = cinfo->num_components;
+  const size_t marker_len = 8 + 3 * n_comps;
+  std::vector<uint8_t> data(marker_len + 2);
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = marker;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = kJpegPrecision;
+  data[pos++] = cinfo->image_height >> 8u;
+  data[pos++] = cinfo->image_height & 0xFFu;
+  data[pos++] = cinfo->image_width >> 8u;
+  data[pos++] = cinfo->image_width & 0xFFu;
+  data[pos++] = n_comps;
+  for (size_t i = 0; i < n_comps; ++i) {
+    jpeg_component_info* comp = &cinfo->comp_info[i];
+    data[pos++] = comp->component_id;
+    data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor));
+    const uint32_t quant_idx = comp->quant_tbl_no;
+    if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) {
+      JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx);
+    }
+    data[pos++] = quant_idx;
+  }
+  WriteOutput(cinfo, data);
+}
+
+void WriteFrameHeader(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  bool is_baseline = EncodeDQT(cinfo, /*write_all_tables=*/false);
+  if (cinfo->progressive_mode || cinfo->arith_code ||
+      cinfo->data_precision != 8) {
+    is_baseline = false;
+  }
+  for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+    int slot_id = m->slot_id_map[i];
+    if (slot_id > 0x11 || (slot_id > 0x01 && slot_id < 0x10)) {
+      is_baseline = false;
+    }
+  }
+  EncodeSOF(cinfo, is_baseline);
+}
+
+void EncodeDRI(j_compress_ptr cinfo) {
+  WriteOutput(cinfo, {0xFF, 0xDD, 0, 4,
+                      static_cast<uint8_t>(cinfo->restart_interval >> 8),
+                      static_cast<uint8_t>(cinfo->restart_interval & 0xFF)});
+}
+
+void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num) {
+  jpeg_comp_master* m = cinfo->master;
+  size_t marker_len = 2;
+  for (size_t i = 0; i < num; ++i) {
+    const JHUFF_TBL& table = m->huffman_tables[offset + i];
+    if (table.sent_table) continue;
+    marker_len += kJpegHuffmanMaxBitLength + 1;
+    for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) {
+      marker_len += table.bits[j];
+    }
+  }
+  std::vector<uint8_t> data(marker_len + 2);
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xC4;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  for (size_t i = 0; i < num; ++i) {
+    const JHUFF_TBL& table = m->huffman_tables[offset + i];
+    if (table.sent_table) continue;
+    size_t total_count = 0;
+    for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) {
+      total_count += table.bits[i];
+    }
+    data[pos++] = m->slot_id_map[offset + i];
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      data[pos++] = table.bits[i];
+    }
+    for (size_t i = 0; i < total_count; ++i) {
+      data[pos++] = table.huffval[i];
+    }
+  }
+  if (marker_len > 2) {
+    WriteOutput(cinfo, data);
+  }
+}
+
+void EncodeSOS(j_compress_ptr cinfo, int scan_index) {
+  jpeg_comp_master* m = cinfo->master;
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  const size_t marker_len = 6 + 2 * scan_info->comps_in_scan;
+  std::vector<uint8_t> data(marker_len + 2);
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDA;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = scan_info->comps_in_scan;
+  for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+    int comp_idx = scan_info->component_index[i];
+    data[pos++] = cinfo->comp_info[comp_idx].component_id;
+    int dc_slot_id = m->slot_id_map[m->context_map[comp_idx]];
+    int ac_context = m->ac_ctx_offset[scan_index] + i;
+    int ac_slot_id = m->slot_id_map[m->context_map[ac_context]];
+    data[pos++] = (dc_slot_id << 4u) + (ac_slot_id - 16);
+  }
+  data[pos++] = scan_info->Ss;
+  data[pos++] = scan_info->Se;
+  data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al));
+  WriteOutput(cinfo, data);
+}
+
+void WriteScanHeader(j_compress_ptr cinfo, int scan_index) {
+  jpeg_comp_master* m = cinfo->master;
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  cinfo->restart_interval = m->scan_token_info[scan_index].restart_interval;
+  if (cinfo->restart_interval != m->last_restart_interval) {
+    EncodeDRI(cinfo);
+    m->last_restart_interval = cinfo->restart_interval;
+  }
+  size_t num_dht = 0;
+  if (scan_index == 0) {
+    // For the first scan we emit all DC and at most 4 AC Huffman codes.
+    for (size_t i = 0, num_ac = 0; i < m->num_huffman_tables; ++i) {
+      if (m->slot_id_map[i] >= 16 && num_ac++ >= 4) break;
+      ++num_dht;
+    }
+  } else if (scan_info->Ss > 0) {
+    // For multi-scan sequential and progressive DC scans we have already
+    // emitted all Huffman codes that we need before the first scan. For
+    // progressive AC scans we only need at most one new Huffman code.
+    if (m->context_map[m->ac_ctx_offset[scan_index]] == m->next_dht_index) {
+      num_dht = 1;
+    }
+  }
+  if (num_dht > 0) {
+    EncodeDHT(cinfo, m->next_dht_index, num_dht);
+    m->next_dht_index += num_dht;
+  }
+  EncodeSOS(cinfo, scan_index);
+}
+
+void WriteBlock(const int32_t* JXL_RESTRICT symbols,
+                const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros,
+                const bool emit_eob,
+                const HuffmanCodeTable* JXL_RESTRICT dc_code,
+                const HuffmanCodeTable* JXL_RESTRICT ac_code,
+                JpegBitWriter* JXL_RESTRICT bw) {
+  int symbol = symbols[0];
+  WriteBits(bw, dc_code->depth[symbol], dc_code->code[symbol] | extra_bits[0]);
+  for (int i = 1; i < num_nonzeros; ++i) {
+    symbol = symbols[i];
+    if (symbol > 255) {
+      WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+      symbol -= 256;
+      if (symbol > 255) {
+        WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+        symbol -= 256;
+        if (symbol > 255) {
+          WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+          symbol -= 256;
+        }
+      }
+    }
+    WriteBits(bw, ac_code->depth[symbol],
+              ac_code->code[symbol] | extra_bits[i]);
+  }
+  if (emit_eob) {
+    WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
+  }
+}
+
+namespace {
+
+static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) {
+  bw->data[bw->pos++] = 0xFF;
+  bw->data[bw->pos++] = marker;
+}
+
+void WriteTokens(j_compress_ptr cinfo, int scan_index, JpegBitWriter* bw) {
+  jpeg_comp_master* m = cinfo->master;
+  HuffmanCodeTable* coding_tables = &m->coding_tables[0];
+  int next_restart_marker = 0;
+  const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+  size_t num_token_arrays = m->cur_token_array + 1;
+  size_t total_tokens = 0;
+  size_t restart_idx = 0;
+  size_t next_restart = sti.restarts[restart_idx];
+  uint8_t* context_map = m->context_map;
+  for (size_t i = 0; i < num_token_arrays; ++i) {
+    Token* tokens = m->token_arrays[i].tokens;
+    size_t num_tokens = m->token_arrays[i].num_tokens;
+    if (sti.token_offset < total_tokens + num_tokens &&
+        total_tokens < sti.token_offset + sti.num_tokens) {
+      size_t start_ix =
+          total_tokens < sti.token_offset ? sti.token_offset - total_tokens : 0;
+      size_t end_ix = std::min(sti.token_offset + sti.num_tokens - total_tokens,
+                               num_tokens);
+      size_t cycle_len = bw->len / 8;
+      size_t next_cycle = cycle_len;
+      for (size_t i = start_ix; i < end_ix; ++i) {
+        if (total_tokens + i == next_restart) {
+          JumpToByteBoundary(bw);
+          EmitMarker(bw, 0xD0 + next_restart_marker);
+          next_restart_marker += 1;
+          next_restart_marker &= 0x7;
+          next_restart = sti.restarts[++restart_idx];
+        }
+        Token t = tokens[i];
+        const HuffmanCodeTable* code = &coding_tables[context_map[t.context]];
+        WriteBits(bw, code->depth[t.symbol], code->code[t.symbol] | t.bits);
+        if (--next_cycle == 0) {
+          if (!EmptyBitWriterBuffer(bw)) {
+            JPEGLI_ERROR(
+                "Output suspension is not supported in "
+                "finish_compress");
+          }
+          next_cycle = cycle_len;
+        }
+      }
+    }
+    total_tokens += num_tokens;
+  }
+}
+
+void WriteACRefinementTokens(j_compress_ptr cinfo, int scan_index,
+                             JpegBitWriter* bw) {
+  jpeg_comp_master* m = cinfo->master;
+  const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+  const uint8_t context = m->ac_ctx_offset[scan_index];
+  const HuffmanCodeTable* code = &m->coding_tables[m->context_map[context]];
+  size_t cycle_len = bw->len / 64;
+  size_t next_cycle = cycle_len;
+  size_t refbit_idx = 0;
+  size_t eobrun_idx = 0;
+  size_t restart_idx = 0;
+  size_t next_restart = sti.restarts[restart_idx];
+  int next_restart_marker = 0;
+  for (size_t i = 0; i < sti.num_tokens; ++i) {
+    if (i == next_restart) {
+      JumpToByteBoundary(bw);
+      EmitMarker(bw, 0xD0 + next_restart_marker);
+      next_restart_marker += 1;
+      next_restart_marker &= 0x7;
+      next_restart = sti.restarts[++restart_idx];
+    }
+    RefToken t = sti.tokens[i];
+    int symbol = t.symbol & 253;
+    uint16_t bits = 0;
+    if ((symbol & 1) == 0) {
+      int r = symbol >> 4;
+      if (r > 0 && r < 15) {
+        bits = sti.eobruns[eobrun_idx++];
+      }
+    } else {
+      bits = (t.symbol >> 1) & 1;
+    }
+    WriteBits(bw, code->depth[symbol], code->code[symbol] | bits);
+    for (int j = 0; j < t.refbits; ++j) {
+      WriteBits(bw, 1, sti.refbits[refbit_idx++]);
+    }
+    if (--next_cycle == 0) {
+      if (!EmptyBitWriterBuffer(bw)) {
+        JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+      }
+      next_cycle = cycle_len;
+    }
+  }
+}
+
+void WriteDCRefinementBits(j_compress_ptr cinfo, int scan_index,
+                           JpegBitWriter* bw) {
+  jpeg_comp_master* m = cinfo->master;
+  const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+  size_t restart_idx = 0;
+  size_t next_restart = sti.restarts[restart_idx];
+  int next_restart_marker = 0;
+  size_t cycle_len = bw->len * 4;
+  size_t next_cycle = cycle_len;
+  size_t refbit_idx = 0;
+  for (size_t i = 0; i < sti.num_tokens; ++i) {
+    if (i == next_restart) {
+      JumpToByteBoundary(bw);
+      EmitMarker(bw, 0xD0 + next_restart_marker);
+      next_restart_marker += 1;
+      next_restart_marker &= 0x7;
+      next_restart = sti.restarts[++restart_idx];
+    }
+    WriteBits(bw, 1, sti.refbits[refbit_idx++]);
+    if (--next_cycle == 0) {
+      if (!EmptyBitWriterBuffer(bw)) {
+        JPEGLI_ERROR(
+            "Output suspension is not supported in "
+            "finish_compress");
+      }
+      next_cycle = cycle_len;
+    }
+  }
+}
+
+}  // namespace
+
+void WriteScanData(j_compress_ptr cinfo, int scan_index) {
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  JpegBitWriter* bw = &cinfo->master->bw;
+  if (scan_info->Ah == 0) {
+    WriteTokens(cinfo, scan_index, bw);
+  } else if (scan_info->Ss > 0) {
+    WriteACRefinementTokens(cinfo, scan_index, bw);
+  } else {
+    WriteDCRefinementBits(cinfo, scan_index, bw);
+  }
+  if (!bw->healthy) {
+    JPEGLI_ERROR("Unknown Huffman coded symbol found in scan %d", scan_index);
+  }
+  JumpToByteBoundary(bw);
+  if (!EmptyBitWriterBuffer(bw)) {
+    JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/bitstream.h b/third-party/libjxl/libjxl/lib/jpegli/bitstream.h
new file mode 100644
index 0000000000..aa54c73d7e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/bitstream.h
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BITSTREAM_H_
+#define LIB_JPEGLI_BITSTREAM_H_
+
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize);
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes);
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes);
+
+void EncodeAPP0(j_compress_ptr cinfo);
+void EncodeAPP14(j_compress_ptr cinfo);
+void WriteFileHeader(j_compress_ptr cinfo);
+
+// Returns true of only baseline 8-bit tables are used.
+bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables);
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline);
+void WriteFrameHeader(j_compress_ptr cinfo);
+
+void EncodeDRI(j_compress_ptr cinfo);
+void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num);
+void EncodeSOS(j_compress_ptr cinfo, int scan_index);
+void WriteScanHeader(j_compress_ptr cinfo, int scan_index);
+
+void WriteBlock(const int32_t* JXL_RESTRICT symbols,
+                const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros,
+                const bool emit_eob,
+                const HuffmanCodeTable* JXL_RESTRICT dc_code,
+                const HuffmanCodeTable* JXL_RESTRICT ac_code,
+                JpegBitWriter* JXL_RESTRICT bw);
+void WriteScanData(j_compress_ptr cinfo, int scan_index);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_BITSTREAM_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc
new file mode 100644
index 0000000000..1079c45c9f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc
@@ -0,0 +1,533 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_quantize.h"
+
+#include <cmath>
+#include <limits>
+#include <unordered_map>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+namespace {
+
+static constexpr int kNumColorCellBits[kMaxComponents] = {3, 4, 3, 3};
+static constexpr int kCompW[kMaxComponents] = {2, 3, 1, 1};
+
+int Pow(int a, int b) {
+  int r = 1;
+  for (int i = 0; i < b; ++i) {
+    r *= a;
+  }
+  return r;
+}
+
+int ComponentOrder(j_decompress_ptr cinfo, int i) {
+  if (cinfo->out_color_components == 3) {
+    return i < 2 ? 1 - i : i;
+  }
+  return i;
+}
+
+int GetColorComponent(int i, int N) {
+  return (i * 255 + (N - 1) / 2) / (N - 1);
+}
+
+}  // namespace
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  int components = cinfo->out_color_components;
+  int desired = std::min(cinfo->desired_number_of_colors, 256);
+  int num = 1;
+  while (Pow(num + 1, components) <= desired) {
+    ++num;
+  }
+  if (num == 1) {
+    JPEGLI_ERROR("Too few colors (%d) in requested colormap", desired);
+  }
+  int actual = Pow(num, components);
+  for (int i = 0; i < components; ++i) {
+    m->num_colors_[i] = num;
+  }
+  while (actual < desired) {
+    int total = actual;
+    for (int i = 0; i < components; ++i) {
+      int c = ComponentOrder(cinfo, i);
+      int new_total = (actual / m->num_colors_[c]) * (m->num_colors_[c] + 1);
+      if (new_total <= desired) {
+        ++m->num_colors_[c];
+        actual = new_total;
+      }
+    }
+    if (actual == total) {
+      break;
+    }
+  }
+  cinfo->actual_number_of_colors = actual;
+  cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+      reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, actual, components);
+  int next_color[kMaxComponents] = {0};
+  for (int i = 0; i < actual; ++i) {
+    for (int c = 0; c < components; ++c) {
+      cinfo->colormap[c][i] =
+          GetColorComponent(next_color[c], m->num_colors_[c]);
+    }
+    int c = components - 1;
+    while (c > 0 && next_color[c] + 1 == m->num_colors_[c]) {
+      next_color[c--] = 0;
+    }
+    ++next_color[c];
+  }
+  if (!m->colormap_lut_) {
+    m->colormap_lut_ = Allocate<uint8_t>(cinfo, components * 256, JPOOL_IMAGE);
+  }
+  int stride = actual;
+  for (int c = 0; c < components; ++c) {
+    int N = m->num_colors_[c];
+    stride /= N;
+    for (int i = 0; i < 256; ++i) {
+      int index = ((2 * i - 1) * (N - 1) + 254) / 510;
+      m->colormap_lut_[c * 256 + i] = index * stride;
+    }
+  }
+}
+
+namespace {
+
+// 2^13 priority levels for the PQ seems to be a good compromise between
+// accuracy, running time and stack space usage.
+static const int kMaxPriority = 1 << 13;
+static const int kMaxLevel = 3;
+
+// This function is used in the multi-resolution grid to be able to compute
+// the keys for the different resolutions by just shifting the first key.
+inline int InterlaceBitsRGB(uint8_t r, uint8_t g, uint8_t b) {
+  int z = 0;
+  for (int i = 0; i < 7; ++i) {
+    z += (r >> 5) & 4;
+    z += (g >> 6) & 2;
+    z += (b >> 7);
+    z <<= 3;
+    r <<= 1;
+    g <<= 1;
+    b <<= 1;
+  }
+  z += (r >> 5) & 4;
+  z += (g >> 6) & 2;
+  z += (b >> 7);
+  return z;
+}
+
+// This function will compute the actual priorities of the colors based on
+// the current distance from the palette, the population count and the signals
+// from the multi-resolution grid.
+inline int Priority(int d, int n, const int* density, const int* radius) {
+  int p = d * n;
+  for (int level = 0; level < kMaxLevel; ++level) {
+    if (d > radius[level]) {
+      p += density[level] * (d - radius[level]);
+    }
+  }
+  return std::min(kMaxPriority - 1, p >> 4);
+}
+
+inline int ColorIntQuadDistanceRGB(uint8_t r1, uint8_t g1, uint8_t b1,
+                                   uint8_t r2, uint8_t g2, uint8_t b2) {
+  // weights for the intensity calculation
+  static constexpr int ired = 2;
+  static constexpr int igreen = 5;
+  static constexpr int iblue = 1;
+  // normalization factor for the intensity calculation (2^ishift)
+  static constexpr int ishift = 3;
+  const int rd = r1 - r2;
+  const int gd = g1 - g2;
+  const int bd = b1 - b2;
+  const int id = ired * rd + igreen * gd + iblue * bd;
+  return rd * rd + gd * gd + bd * bd + ((id * id) >> (2 * ishift));
+}
+
+inline int ScaleQuadDistanceRGB(int d) {
+  return static_cast<int>(sqrt(d * 0.25) + 0.5);
+}
+
+// The function updates the minimal distances, the clustering and the
+// quantization error after the insertion of the new color into the palette.
+void AddToRGBPalette(const uint8_t* red, const uint8_t* green,
+                     const uint8_t* blue,
+                     const int* count,  // histogram of colors
+                     const int index,   // index of color to be added
+                     const int k,       // size of current palette
+                     const int n,       // number of colors
+                     int* dist,         // array of distances from palette
+                     int* cluster,      // mapping of color indices to palette
+                     int* center,       // the inverse mapping
+                     int64_t* error) {  // measure of the quantization error
+  center[k] = index;
+  cluster[index] = k;
+  *error -=
+      static_cast<int64_t>(dist[index]) * static_cast<int64_t>(count[index]);
+  dist[index] = 0;
+  for (int j = 0; j < n; ++j) {
+    if (dist[j] > 0) {
+      const int d = ColorIntQuadDistanceRGB(
+          red[index], green[index], blue[index], red[j], green[j], blue[j]);
+      if (d < dist[j]) {
+        *error += static_cast<int64_t>((d - dist[j])) *
+                  static_cast<int64_t>(count[j]);
+        dist[j] = d;
+        cluster[j] = k;
+      }
+    }
+  }
+}
+
+struct RGBPixelHasher {
+  // A quick but good-enough hash to get 24 bits of RGB into the lower 12 bits.
+  size_t operator()(uint32_t a) const { return (a ^ (a >> 12)) * 0x9e3779b9; }
+};
+
+struct WangHasher {
+  // Thomas Wang's Hash.  Nearly perfect and still quite fast.  Above (for
+  // pixels) we use a simpler hash because the number of hash calls is
+  // proportional to the number of pixels and that hash dominates; we want the
+  // cost to be minimal and we start with a large table.  We can use a better
+  // hash for the histogram since the number of hash calls is proportional to
+  // the number of unique colors in the image, which is hopefully much smaller.
+  // Note that the difference is slight; e.g. replacing RGBPixelHasher with
+  // WangHasher only slows things down by 5% on an Opteron.
+  size_t operator()(uint32_t a) const {
+    a = (a ^ 61) ^ (a >> 16);
+    a = a + (a << 3);
+    a = a ^ (a >> 4);
+    a = a * 0x27d4eb2d;
+    a = a ^ (a >> 15);
+    return a;
+  }
+};
+
+// Build an index of all the different colors in the input
+// image. To do this we map the 24 bit RGB representation of the colors
+// to a unique integer index assigned to the different colors in order of
+// appearence in the image.  Return the number of unique colors found.
+// The colors are pre-quantized to 3 * 6 bits precision.
+static int BuildRGBColorIndex(const uint8_t* const image, int const num_pixels,
+                              int* const count, uint8_t* const red,
+                              uint8_t* const green, uint8_t* const blue) {
+  // Impossible because rgb are in the low 24 bits, and the upper 8 bits is 0.
+  const uint32_t impossible_pixel_value = 0x10000000;
+  std::unordered_map<uint32_t, int, RGBPixelHasher> index_map(1 << 12);
+  std::unordered_map<uint32_t, int, RGBPixelHasher>::iterator index_map_lookup;
+  const uint8_t* imagep = &image[0];
+  uint32_t prev_pixel = impossible_pixel_value;
+  int index = 0;
+  int n = 0;
+  for (int i = 0; i < num_pixels; ++i) {
+    uint8_t r = ((*imagep++) & 0xfc) + 2;
+    uint8_t g = ((*imagep++) & 0xfc) + 2;
+    uint8_t b = ((*imagep++) & 0xfc) + 2;
+    uint32_t pixel = (b << 16) | (g << 8) | r;
+    if (pixel != prev_pixel) {
+      prev_pixel = pixel;
+      index_map_lookup = index_map.find(pixel);
+      if (index_map_lookup != index_map.end()) {
+        index = index_map_lookup->second;
+      } else {
+        index_map[pixel] = index = n++;
+        red[index] = r;
+        green[index] = g;
+        blue[index] = b;
+      }
+    }
+    ++count[index];
+  }
+  return n;
+}
+
+}  // namespace
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo) {
+  if (cinfo->out_color_space != JCS_RGB) {
+    JPEGLI_ERROR("Two-pass quantizer must use RGB output color space.");
+  }
+  jpeg_decomp_master* m = cinfo->master;
+  const size_t num_pixels = cinfo->output_width * cinfo->output_height;
+  const int max_color_count = std::max<size_t>(num_pixels, 1u << 18);
+  const int max_palette_size = cinfo->desired_number_of_colors;
+  std::unique_ptr<uint8_t[]> red(new uint8_t[max_color_count]);
+  std::unique_ptr<uint8_t[]> green(new uint8_t[max_color_count]);
+  std::unique_ptr<uint8_t[]> blue(new uint8_t[max_color_count]);
+  std::vector<int> count(max_color_count, 0);
+  // number of colors
+  int n = BuildRGBColorIndex(m->pixels_, num_pixels, &count[0], &red[0],
+                             &green[0], &blue[0]);
+
+  std::vector<int> dist(n, std::numeric_limits<int>::max());
+  std::vector<int> cluster(n);
+  std::vector<bool> in_palette(n, false);
+  int center[256];
+  int k = 0;  // palette size
+  const int count_threshold = (num_pixels * 4) / max_palette_size;
+  static constexpr int kAveragePixelErrorThreshold = 1;
+  const int64_t error_threshold = num_pixels * kAveragePixelErrorThreshold;
+  int64_t error = 0;  // quantization error
+
+  int max_count = 0;
+  int winner = 0;
+  for (int i = 0; i < n; ++i) {
+    if (count[i] > max_count) {
+      max_count = count[i];
+      winner = i;
+    }
+    if (!in_palette[i] && count[i] > count_threshold) {
+      AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+                      &dist[0], &cluster[0], &center[0], &error);
+      in_palette[i] = true;
+    }
+  }
+  if (k == 0) {
+    AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], winner, k++, n,
+                    &dist[0], &cluster[0], &center[0], &error);
+    in_palette[winner] = true;
+  }
+
+  // Calculation of the multi-resolution density grid.
+  std::vector<int> density(n * kMaxLevel);
+  std::vector<int> radius(n * kMaxLevel);
+  std::unordered_map<uint32_t, int, WangHasher> histogram[kMaxLevel];
+  for (int level = 0; level < kMaxLevel; ++level) {
+    // This value is never used because key = InterlaceBitsRGB(...) >> 6
+  }
+
+  for (int i = 0; i < n; ++i) {
+    if (!in_palette[i]) {
+      const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+      for (int level = 0; level < kMaxLevel; ++level) {
+        histogram[level][key >> (3 * level)] += count[i];
+      }
+    }
+  }
+  for (int i = 0; i < n; ++i) {
+    if (!in_palette[i]) {
+      for (int level = 0; level < kMaxLevel; ++level) {
+        const int mask = (4 << level) - 1;
+        const int rd = std::max(red[i] & mask, mask - (red[i] & mask));
+        const int gd = std::max(green[i] & mask, mask - (green[i] & mask));
+        const int bd = std::max(blue[i] & mask, mask - (blue[i] & mask));
+        radius[i * kMaxLevel + level] =
+            ScaleQuadDistanceRGB(ColorIntQuadDistanceRGB(0, 0, 0, rd, gd, bd));
+      }
+      const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+      if (kMaxLevel > 0) {
+        density[i * kMaxLevel] = histogram[0][key] - count[i];
+      }
+      for (int level = 1; level < kMaxLevel; ++level) {
+        density[i * kMaxLevel + level] =
+            (histogram[level][key >> (3 * level)] -
+             histogram[level - 1][key >> (3 * level - 3)]);
+      }
+    }
+  }
+
+  // Calculate the initial error now that the palette has been initialized.
+  error = 0;
+  for (int i = 0; i < n; ++i) {
+    error += static_cast<int64_t>(dist[i]) * static_cast<int64_t>(count[i]);
+  }
+
+  std::unique_ptr<std::vector<int>[]> bucket_array(
+      new std::vector<int>[kMaxPriority]);
+  int top_priority = -1;
+  for (int i = 0; i < n; ++i) {
+    if (!in_palette[i]) {
+      int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+                              &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+      bucket_array[priority].push_back(i);
+      top_priority = std::max(priority, top_priority);
+    }
+  }
+  double error_accum = 0;
+  while (top_priority >= 0 && k < max_palette_size) {
+    if (error < error_threshold) {
+      error_accum += std::min(error_threshold, error_threshold - error);
+      if (error_accum >= 10 * error_threshold) {
+        break;
+      }
+    }
+    int i = bucket_array[top_priority].back();
+    int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+                            &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+    if (priority < top_priority) {
+      bucket_array[priority].push_back(i);
+    } else {
+      AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+                      &dist[0], &cluster[0], &center[0], &error);
+    }
+    bucket_array[top_priority].pop_back();
+    while (top_priority >= 0 && bucket_array[top_priority].empty()) {
+      --top_priority;
+    }
+  }
+
+  cinfo->actual_number_of_colors = k;
+  cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+      reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, k, 3);
+  for (int i = 0; i < k; ++i) {
+    int index = center[i];
+    cinfo->colormap[0][i] = red[index];
+    cinfo->colormap[1][i] = green[index];
+    cinfo->colormap[2][i] = blue[index];
+  }
+}
+
+namespace {
+
+void FindCandidatesForCell(j_decompress_ptr cinfo, int ncomp, int cell[],
+                           std::vector<uint8_t>* candidates) {
+  int cell_min[kMaxComponents];
+  int cell_max[kMaxComponents];
+  int cell_center[kMaxComponents];
+  for (int c = 0; c < ncomp; ++c) {
+    cell_min[c] = cell[c] << (8 - kNumColorCellBits[c]);
+    cell_max[c] = cell_min[c] + (1 << (8 - kNumColorCellBits[c])) - 1;
+    cell_center[c] = (cell_min[c] + cell_max[c]) >> 1;
+  }
+  int min_maxdist = std::numeric_limits<int>::max();
+  int mindist[256];
+  for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+    int dmin = 0;
+    int dmax = 0;
+    for (int c = 0; c < ncomp; ++c) {
+      int palette_c = cinfo->colormap[c][i];
+      int dminc = 0, dmaxc;
+      if (palette_c < cell_min[c]) {
+        dminc = cell_min[c] - palette_c;
+        dmaxc = cell_max[c] - palette_c;
+      } else if (palette_c > cell_max[c]) {
+        dminc = palette_c - cell_max[c];
+        dmaxc = palette_c - cell_min[c];
+      } else if (palette_c > cell_center[c]) {
+        dmaxc = palette_c - cell_min[c];
+      } else {
+        dmaxc = cell_max[c] - palette_c;
+      }
+      dminc *= kCompW[c];
+      dmaxc *= kCompW[c];
+      dmin += dminc * dminc;
+      dmax += dmaxc * dmaxc;
+    }
+    mindist[i] = dmin;
+    min_maxdist = std::min(dmax, min_maxdist);
+  }
+  for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+    if (mindist[i] < min_maxdist) {
+      candidates->push_back(i);
+    }
+  }
+}
+
+}  // namespace
+
+void CreateInverseColorMap(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  int ncomp = cinfo->out_color_components;
+  int num_cells = 1;
+  for (int c = 0; c < ncomp; ++c) {
+    num_cells *= (1 << kNumColorCellBits[c]);
+  }
+  m->candidate_lists_.resize(num_cells);
+
+  int next_cell[kMaxComponents] = {0};
+  for (int i = 0; i < num_cells; ++i) {
+    m->candidate_lists_[i].clear();
+    FindCandidatesForCell(cinfo, ncomp, next_cell, &m->candidate_lists_[i]);
+    int c = ncomp - 1;
+    while (c > 0 && next_cell[c] + 1 == (1 << kNumColorCellBits[c])) {
+      next_cell[c--] = 0;
+    }
+    ++next_cell[c];
+  }
+  m->regenerate_inverse_colormap_ = false;
+}
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel) {
+  jpeg_decomp_master* m = cinfo->master;
+  int num_channels = cinfo->out_color_components;
+  int index = 0;
+  if (m->quant_mode_ == 1) {
+    for (int c = 0; c < num_channels; ++c) {
+      index += m->colormap_lut_[c * 256 + pixel[c]];
+    }
+  } else {
+    size_t cell_idx = 0;
+    size_t stride = 1;
+    for (int c = num_channels - 1; c >= 0; --c) {
+      cell_idx += (pixel[c] >> (8 - kNumColorCellBits[c])) * stride;
+      stride <<= kNumColorCellBits[c];
+    }
+    JXL_ASSERT(cell_idx < m->candidate_lists_.size());
+    int mindist = std::numeric_limits<int>::max();
+    const auto& candidates = m->candidate_lists_[cell_idx];
+    for (uint8_t i : candidates) {
+      int dist = 0;
+      for (int c = 0; c < num_channels; ++c) {
+        int d = (cinfo->colormap[c][i] - pixel[c]) * kCompW[c];
+        dist += d * d;
+      }
+      if (dist < mindist) {
+        mindist = dist;
+        index = i;
+      }
+    }
+  }
+  JXL_ASSERT(index < cinfo->actual_number_of_colors);
+  return index;
+}
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  static constexpr size_t kDitherSize = 4;
+  static constexpr size_t kDitherMask = kDitherSize - 1;
+  static constexpr float kBaseDitherMatrix[] = {
+      0,  8,  2,  10,  //
+      12, 4,  14, 6,   //
+      3,  11, 1,  9,   //
+      15, 7,  13, 5,   //
+  };
+  m->dither_size_ = kDitherSize;
+  m->dither_mask_ = kDitherMask;
+  size_t ncells = m->dither_size_ * m->dither_size_;
+  for (int c = 0; c < cinfo->out_color_components; ++c) {
+    float spread = 1.0f / (m->num_colors_[c] - 1);
+    float mul = spread / ncells;
+    float offset = 0.5f * spread;
+    if (m->dither_[c] == nullptr) {
+      m->dither_[c] = Allocate<float>(cinfo, ncells, JPOOL_IMAGE_ALIGNED);
+    }
+    for (size_t idx = 0; idx < ncells; ++idx) {
+      m->dither_[c][idx] = kBaseDitherMatrix[idx] * mul - offset;
+    }
+  }
+}
+
+void InitFSDitherState(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  for (int c = 0; c < cinfo->out_color_components; ++c) {
+    if (m->error_row_[c] == nullptr) {
+      m->error_row_[c] =
+          Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+      m->error_row_[c + kMaxComponents] =
+          Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+    }
+    memset(m->error_row_[c], 0.0, cinfo->output_width * sizeof(float));
+    memset(m->error_row_[c + kMaxComponents], 0.0,
+           cinfo->output_width * sizeof(float));
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h
new file mode 100644
index 0000000000..3dda1d8713
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_QUANTIZE_H_
+#define LIB_JPEGLI_COLOR_QUANTIZE_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo);
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo);
+
+void CreateInverseColorMap(j_decompress_ptr cinfo);
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo);
+
+void InitFSDitherState(j_decompress_ptr cinfo);
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_COLOR_QUANTIZE_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc b/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc
new file mode 100644
index 0000000000..020a6fd80c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc
@@ -0,0 +1,281 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_transform.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/color_transform.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+void YCbCrToRGB(float* row[kMaxComponents], size_t xsize) {
+  const HWY_CAPPED(float, 8) df;
+  float* JXL_RESTRICT row0 = row[0];
+  float* JXL_RESTRICT row1 = row[1];
+  float* JXL_RESTRICT row2 = row[2];
+
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto crcr = Set(df, 1.402f);
+  const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+  const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+  const auto cbcb = Set(df, 1.772f);
+
+  for (size_t x = 0; x < xsize; x += Lanes(df)) {
+    const auto y_vec = Load(df, row0 + x);
+    const auto cb_vec = Load(df, row1 + x);
+    const auto cr_vec = Load(df, row2 + x);
+    const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+    const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+    const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+    Store(r_vec, df, row0 + x);
+    Store(g_vec, df, row1 + x);
+    Store(b_vec, df, row2 + x);
+  }
+}
+
+void YCCKToCMYK(float* row[kMaxComponents], size_t xsize) {
+  const HWY_CAPPED(float, 8) df;
+  float* JXL_RESTRICT row0 = row[0];
+  float* JXL_RESTRICT row1 = row[1];
+  float* JXL_RESTRICT row2 = row[2];
+  YCbCrToRGB(row, xsize);
+  const auto offset = Set(df, -1.0f / 255.0f);
+  for (size_t x = 0; x < xsize; x += Lanes(df)) {
+    Store(Sub(offset, Load(df, row0 + x)), df, row0 + x);
+    Store(Sub(offset, Load(df, row1 + x)), df, row1 + x);
+    Store(Sub(offset, Load(df, row2 + x)), df, row2 + x);
+  }
+}
+
+void RGBToYCbCr(float* row[kMaxComponents], size_t xsize) {
+  const HWY_CAPPED(float, 8) df;
+  float* JXL_RESTRICT row0 = row[0];
+  float* JXL_RESTRICT row1 = row[1];
+  float* JXL_RESTRICT row2 = row[2];
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto c128 = Set(df, 128.0f);
+  const auto kR = Set(df, 0.299f);  // NTSC luma
+  const auto kG = Set(df, 0.587f);
+  const auto kB = Set(df, 0.114f);
+  const auto kAmpR = Set(df, 0.701f);
+  const auto kAmpB = Set(df, 0.886f);
+  const auto kDiffR = Add(kAmpR, kR);
+  const auto kDiffB = Add(kAmpB, kB);
+  const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
+  const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
+
+  for (size_t x = 0; x < xsize; x += Lanes(df)) {
+    const auto r = Load(df, row0 + x);
+    const auto g = Load(df, row1 + x);
+    const auto b = Load(df, row2 + x);
+    const auto r_base = Mul(r, kR);
+    const auto r_diff = Mul(r, kDiffR);
+    const auto g_base = Mul(g, kG);
+    const auto b_base = Mul(b, kB);
+    const auto b_diff = Mul(b, kDiffB);
+    const auto y_base = Add(r_base, Add(g_base, b_base));
+    const auto cb_vec = MulAdd(Sub(b_diff, y_base), kNormB, c128);
+    const auto cr_vec = MulAdd(Sub(r_diff, y_base), kNormR, c128);
+    Store(y_base, df, row0 + x);
+    Store(cb_vec, df, row1 + x);
+    Store(cr_vec, df, row2 + x);
+  }
+}
+
+void CMYKToYCCK(float* row[kMaxComponents], size_t xsize) {
+  const HWY_CAPPED(float, 8) df;
+  float* JXL_RESTRICT row0 = row[0];
+  float* JXL_RESTRICT row1 = row[1];
+  float* JXL_RESTRICT row2 = row[2];
+  const auto unity = Set(df, 255.0f);
+  for (size_t x = 0; x < xsize; x += Lanes(df)) {
+    Store(Sub(unity, Load(df, row0 + x)), df, row0 + x);
+    Store(Sub(unity, Load(df, row1 + x)), df, row1 + x);
+    Store(Sub(unity, Load(df, row2 + x)), df, row2 + x);
+  }
+  RGBToYCbCr(row, xsize);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(CMYKToYCCK);
+HWY_EXPORT(YCCKToCMYK);
+HWY_EXPORT(YCbCrToRGB);
+HWY_EXPORT(RGBToYCbCr);
+
+bool CheckColorSpaceComponents(int num_components, J_COLOR_SPACE colorspace) {
+  switch (colorspace) {
+    case JCS_GRAYSCALE:
+      return num_components == 1;
+    case JCS_RGB:
+    case JCS_YCbCr:
+    case JCS_EXT_RGB:
+    case JCS_EXT_BGR:
+      return num_components == 3;
+    case JCS_CMYK:
+    case JCS_YCCK:
+    case JCS_EXT_RGBX:
+    case JCS_EXT_BGRX:
+    case JCS_EXT_XBGR:
+    case JCS_EXT_XRGB:
+    case JCS_EXT_RGBA:
+    case JCS_EXT_BGRA:
+    case JCS_EXT_ABGR:
+    case JCS_EXT_ARGB:
+      return num_components == 4;
+    default:
+      // Unrecognized colorspaces can have any number of channels, since no
+      // color transform will be performed on them.
+      return true;
+  }
+}
+
+void NullTransform(float* row[kMaxComponents], size_t len) {}
+
+void GrayscaleToRGB(float* row[kMaxComponents], size_t len) {
+  memcpy(row[1], row[0], len * sizeof(row[1][0]));
+  memcpy(row[2], row[0], len * sizeof(row[2][0]));
+}
+
+void GrayscaleToYCbCr(float* row[kMaxComponents], size_t len) {
+  memset(row[1], 0, len * sizeof(row[1][0]));
+  memset(row[2], 0, len * sizeof(row[2][0]));
+}
+
+void ChooseColorTransform(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  if (!CheckColorSpaceComponents(cinfo->input_components,
+                                 cinfo->in_color_space)) {
+    JPEGLI_ERROR("Invalid number of input components %d for colorspace %d",
+                 cinfo->input_components, cinfo->in_color_space);
+  }
+  if (!CheckColorSpaceComponents(cinfo->num_components,
+                                 cinfo->jpeg_color_space)) {
+    JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+                 cinfo->num_components, cinfo->jpeg_color_space);
+  }
+  if (cinfo->jpeg_color_space == cinfo->in_color_space) {
+    if (cinfo->num_components != cinfo->input_components) {
+      JPEGLI_ERROR("Input/output components mismatch:  %d vs %d",
+                   cinfo->input_components, cinfo->num_components);
+    }
+    // No color transform requested.
+    m->color_transform = NullTransform;
+    return;
+  }
+
+  if (cinfo->in_color_space == JCS_RGB && m->xyb_mode) {
+    JPEGLI_ERROR("Color transform on XYB colorspace is not supported.");
+  }
+
+  m->color_transform = nullptr;
+  if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+    if (cinfo->in_color_space == JCS_RGB) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+    } else if (cinfo->in_color_space == JCS_YCbCr ||
+               cinfo->in_color_space == JCS_YCCK) {
+      // Since the first luminance channel is the grayscale version of the
+      // image, nothing to do here
+      m->color_transform = NullTransform;
+    }
+  } else if (cinfo->jpeg_color_space == JCS_RGB) {
+    if (cinfo->in_color_space == JCS_GRAYSCALE) {
+      m->color_transform = GrayscaleToRGB;
+    }
+  } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+    if (cinfo->in_color_space == JCS_RGB) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+    } else if (cinfo->in_color_space == JCS_GRAYSCALE) {
+      m->color_transform = GrayscaleToYCbCr;
+    }
+  } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+    if (cinfo->in_color_space == JCS_CMYK) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(CMYKToYCCK);
+    }
+  }
+
+  if (m->color_transform == nullptr) {
+    // TODO(szabadka) Support more color transforms.
+    JPEGLI_ERROR("Unsupported color transform %d -> %d", cinfo->in_color_space,
+                 cinfo->jpeg_color_space);
+  }
+}
+
+void ChooseColorTransform(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!CheckColorSpaceComponents(cinfo->out_color_components,
+                                 cinfo->out_color_space)) {
+    JPEGLI_ERROR("Invalid number of output components %d for colorspace %d",
+                 cinfo->out_color_components, cinfo->out_color_space);
+  }
+  if (!CheckColorSpaceComponents(cinfo->num_components,
+                                 cinfo->jpeg_color_space)) {
+    JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+                 cinfo->num_components, cinfo->jpeg_color_space);
+  }
+  if (cinfo->jpeg_color_space == cinfo->out_color_space) {
+    if (cinfo->num_components != cinfo->out_color_components) {
+      JPEGLI_ERROR("Input/output components mismatch:  %d vs %d",
+                   cinfo->num_components, cinfo->out_color_components);
+    }
+    // No color transform requested.
+    m->color_transform = NullTransform;
+    return;
+  }
+
+  m->color_transform = nullptr;
+  if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+    if (cinfo->out_color_space == JCS_RGB) {
+      m->color_transform = GrayscaleToRGB;
+    }
+  } else if (cinfo->jpeg_color_space == JCS_RGB) {
+    if (cinfo->out_color_space == JCS_GRAYSCALE) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+    }
+  } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+    if (cinfo->out_color_space == JCS_RGB) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(YCbCrToRGB);
+    } else if (cinfo->out_color_space == JCS_GRAYSCALE) {
+      m->color_transform = NullTransform;
+    }
+  } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+    if (cinfo->out_color_space == JCS_CMYK) {
+      m->color_transform = HWY_DYNAMIC_DISPATCH(YCCKToCMYK);
+    }
+  }
+
+  if (m->color_transform == nullptr) {
+    // TODO(szabadka) Support more color transforms.
+    JPEGLI_ERROR("Unsupported color transform %d -> %d",
+                 cinfo->jpeg_color_space, cinfo->out_color_space);
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_transform.h b/third-party/libjxl/libjxl/lib/jpegli/color_transform.h
new file mode 100644
index 0000000000..8d58f8849a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/color_transform.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_TRANSFORM_H_
+#define LIB_JPEGLI_COLOR_TRANSFORM_H_
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseColorTransform(j_compress_ptr cinfo);
+
+void ChooseColorTransform(j_decompress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_COLOR_TRANSFORM_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/common.cc b/third-party/libjxl/libjxl/lib/jpegli/common.cc
new file mode 100644
index 0000000000..5f34372f3e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/common.cc
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/common.h"
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/memory_manager.h"
+
+void jpegli_abort(j_common_ptr cinfo) {
+  if (cinfo->mem == nullptr) return;
+  for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+    if (pool_id == JPOOL_PERMANENT) continue;
+    (*cinfo->mem->free_pool)(cinfo, pool_id);
+  }
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = jpegli::kDecStart;
+  } else {
+    cinfo->global_state = jpegli::kEncStart;
+  }
+}
+
+void jpegli_destroy(j_common_ptr cinfo) {
+  if (cinfo->mem == nullptr) return;
+  (*cinfo->mem->self_destruct)(cinfo);
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = jpegli::kDecNull;
+    delete reinterpret_cast<j_decompress_ptr>(cinfo)->master;
+  } else {
+    cinfo->global_state = jpegli::kEncNull;
+  }
+}
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo) {
+  JQUANT_TBL* table = jpegli::Allocate<JQUANT_TBL>(cinfo, 1);
+  table->sent_table = FALSE;
+  return table;
+}
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo) {
+  JHUFF_TBL* table = jpegli::Allocate<JHUFF_TBL>(cinfo, 1);
+  table->sent_table = FALSE;
+  return table;
+}
+
+int jpegli_bytes_per_sample(JpegliDataType data_type) {
+  switch (data_type) {
+    case JPEGLI_TYPE_UINT8:
+      return 1;
+    case JPEGLI_TYPE_UINT16:
+      return 2;
+    case JPEGLI_TYPE_FLOAT:
+      return 4;
+    default:
+      return 0;
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/common.h b/third-party/libjxl/libjxl/lib/jpegli/common.h
new file mode 100644
index 0000000000..3691b2c6a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/common.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the common encoder/decoder part of libjpegli
+// library, which is based on the C API of libjpeg, with the function names
+// changed from jpeg_* to jpegli_*, while compressor and dempressor object
+// definitions are included directly from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+//  (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+//      names of the API and link against libjpegli.
+//
+//  (2) Leave the application code unchanged, but replace the libjpeg.so library
+//      with the one built by this project that is API- and ABI-compatible with
+//      libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_COMMON_H_
+#define LIB_JPEGLI_COMMON_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err);
+
+void jpegli_abort(j_common_ptr cinfo);
+
+void jpegli_destroy(j_common_ptr cinfo);
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo);
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif  // LIB_JPEGLI_COMMON_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/common_internal.h b/third-party/libjxl/libjxl/lib/jpegli/common_internal.h
new file mode 100644
index 0000000000..248d3154e1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/common_internal.h
@@ -0,0 +1,150 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COMMON_INTERNAL_H_
+#define LIB_JPEGLI_COMMON_INTERNAL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/simd.h"
+#include "lib/jxl/base/compiler_specific.h"  // for ssize_t
+#include "lib/jxl/base/status.h"             // for JXL_CHECK
+
+namespace jpegli {
+
+enum State {
+  kDecNull,
+  kDecStart,
+  kDecInHeader,
+  kDecHeaderDone,
+  kDecProcessMarkers,
+  kDecProcessScan,
+  kEncNull,
+  kEncStart,
+  kEncHeader,
+  kEncReadImage,
+  kEncWriteCoeffs,
+};
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 RoundUpTo(T1 a, T2 b) {
+  return DivCeil(a, b) * b;
+}
+
+constexpr size_t kDCTBlockSize = 64;
+// This is set to the same value as MAX_COMPS_IN_SCAN, because that is the
+// maximum number of channels the libjpeg-turbo decoder can decode.
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kJpegPrecision = 8;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+  0,   1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13,  6,  7, 14, 21, 28,
+  35, 42, 49, 56, 57, 50, 43, 36,
+  29, 22, 15, 23, 30, 37, 44, 51,
+  58, 59, 52, 45, 38, 31, 39, 46,
+  53, 60, 61, 54, 47, 55, 62, 63,
+  // extra entries for safety in decoder
+  63, 63, 63, 63, 63, 63, 63, 63,
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+  0,   1,  5,  6, 14, 15, 27, 28,
+  2,   4,  7, 13, 16, 26, 29, 42,
+  3,   8, 12, 17, 25, 30, 41, 43,
+  9,  11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+template <typename T>
+class RowBuffer {
+ public:
+  template <typename CInfoType>
+  void Allocate(CInfoType cinfo, size_t num_rows, size_t rowsize) {
+    size_t vec_size = std::max(VectorSize(), sizeof(T));
+    JXL_CHECK(vec_size % sizeof(T) == 0);
+    size_t alignment = std::max<size_t>(HWY_ALIGNMENT, vec_size);
+    size_t min_memstride = alignment + rowsize * sizeof(T) + vec_size;
+    size_t memstride = RoundUpTo(min_memstride, alignment);
+    xsize_ = rowsize;
+    ysize_ = num_rows;
+    stride_ = memstride / sizeof(T);
+    offset_ = alignment / sizeof(T);
+    data_ = ::jpegli::Allocate<T>(cinfo, ysize_ * stride_, JPOOL_IMAGE_ALIGNED);
+  }
+
+  T* Row(ssize_t y) const {
+    return &data_[((ysize_ + y) % ysize_) * stride_ + offset_];
+  }
+
+  size_t xsize() const { return xsize_; };
+  size_t ysize() const { return ysize_; };
+  size_t stride() const { return stride_; }
+
+  void PadRow(size_t y, size_t from, int border) {
+    float* row = Row(y);
+    for (int offset = -border; offset < 0; ++offset) {
+      row[offset] = row[0];
+    }
+    float last_val = row[from - 1];
+    for (size_t x = from; x < xsize_ + border; ++x) {
+      row[x] = last_val;
+    }
+  }
+
+  void CopyRow(ssize_t dst_row, ssize_t src_row, int border) {
+    memcpy(Row(dst_row) - border, Row(src_row) - border,
+           (xsize_ + 2 * border) * sizeof(T));
+  }
+
+  void FillRow(ssize_t y, T val, size_t len) {
+    T* row = Row(y);
+    for (size_t x = 0; x < len; ++x) {
+      row[x] = val;
+    }
+  }
+
+ private:
+  size_t xsize_;
+  size_t ysize_;
+  size_t stride_;
+  size_t offset_;
+  T* data_;
+};
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_COMMON_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h b/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h
new file mode 100644
index 0000000000..eb88654631
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h
@@ -0,0 +1,256 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_DCT_INL_H_
+#undef LIB_JPEGLI_DCT_INL_H_
+#else
+#define LIB_JPEGLI_DCT_INL_H_
+#endif
+
+#include "lib/jpegli/transpose-inl.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::DemoteTo;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Round;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+
+template <size_t N>
+void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+                float* JXL_RESTRICT aout) {
+  HWY_CAPPED(float, 8) d8;
+  for (size_t i = 0; i < N; i++) {
+    auto in1 = Load(d8, ain1 + i * 8);
+    auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+    Store(Add(in1, in2), d8, aout + i * 8);
+  }
+}
+
+template <size_t N>
+void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+                float* JXL_RESTRICT aout) {
+  HWY_CAPPED(float, 8) d8;
+  for (size_t i = 0; i < N; i++) {
+    auto in1 = Load(d8, ain1 + i * 8);
+    auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+    Store(Sub(in1, in2), d8, aout + i * 8);
+  }
+}
+
+template <size_t N>
+void B(float* JXL_RESTRICT coeff) {
+  HWY_CAPPED(float, 8) d8;
+  constexpr float kSqrt2 = 1.41421356237f;
+  auto sqrt2 = Set(d8, kSqrt2);
+  auto in1 = Load(d8, coeff);
+  auto in2 = Load(d8, coeff + 8);
+  Store(MulAdd(in1, sqrt2, in2), d8, coeff);
+  for (size_t i = 1; i + 1 < N; i++) {
+    auto in1 = Load(d8, coeff + i * 8);
+    auto in2 = Load(d8, coeff + (i + 1) * 8);
+    Store(Add(in1, in2), d8, coeff + i * 8);
+  }
+}
+
+// Ideally optimized away by compiler (except the multiply).
+template <size_t N>
+void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) {
+  HWY_CAPPED(float, 8) d8;
+  for (size_t i = 0; i < N / 2; i++) {
+    auto in1 = Load(d8, ain + i * 8);
+    Store(in1, d8, aout + 2 * i * 8);
+  }
+  for (size_t i = N / 2; i < N; i++) {
+    auto in1 = Load(d8, ain + i * 8);
+    Store(in1, d8, aout + (2 * (i - N / 2) + 1) * 8);
+  }
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+//    print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+  static constexpr float kMultipliers[] = {
+      0.541196100146197,
+      1.3065629648763764,
+  };
+};
+
+template <>
+struct WcMultipliers<8> {
+  static constexpr float kMultipliers[] = {
+      0.5097955791041592,
+      0.6013448869350453,
+      0.8999762231364156,
+      2.5629154477415055,
+  };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+// Invoked on full vector.
+template <size_t N>
+void Multiply(float* JXL_RESTRICT coeff) {
+  HWY_CAPPED(float, 8) d8;
+  for (size_t i = 0; i < N / 2; i++) {
+    auto in1 = Load(d8, coeff + (N / 2 + i) * 8);
+    auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+    Store(Mul(in1, mul), d8, coeff + (N / 2 + i) * 8);
+  }
+}
+
+void LoadFromBlock(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                   size_t off, float* JXL_RESTRICT coeff) {
+  HWY_CAPPED(float, 8) d8;
+  for (size_t i = 0; i < 8; i++) {
+    Store(LoadU(d8, pixels + i * pixels_stride + off), d8, coeff + i * 8);
+  }
+}
+
+void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, float* output,
+                          size_t off) {
+  HWY_CAPPED(float, 8) d8;
+  auto mul = Set(d8, 1.0f / 8);
+  for (size_t i = 0; i < 8; i++) {
+    StoreU(Mul(mul, Load(d8, coeff + i * 8)), d8, output + i * 8 + off);
+  }
+}
+
+template <size_t N>
+struct DCT1DImpl;
+
+template <>
+struct DCT1DImpl<1> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <>
+struct DCT1DImpl<2> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+    HWY_CAPPED(float, 8) d8;
+    auto in1 = Load(d8, mem);
+    auto in2 = Load(d8, mem + 8);
+    Store(Add(in1, in2), d8, mem);
+    Store(Sub(in1, in2), d8, mem + 8);
+  }
+};
+
+template <size_t N>
+struct DCT1DImpl {
+  void operator()(float* JXL_RESTRICT mem) {
+    HWY_ALIGN float tmp[N * 8];
+    AddReverse<N / 2>(mem, mem + N * 4, tmp);
+    DCT1DImpl<N / 2>()(tmp);
+    SubReverse<N / 2>(mem, mem + N * 4, tmp + N * 4);
+    Multiply<N>(tmp);
+    DCT1DImpl<N / 2>()(tmp + N * 4);
+    B<N / 2>(tmp + N * 4);
+    InverseEvenOdd<N>(tmp, mem);
+  }
+};
+
+void DCT1D(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+           float* JXL_RESTRICT output) {
+  HWY_CAPPED(float, 8) d8;
+  HWY_ALIGN float tmp[64];
+  for (size_t i = 0; i < 8; i += Lanes(d8)) {
+    // TODO(veluca): consider removing the temporary memory here (as is done in
+    // IDCT), if it turns out that some compilers don't optimize away the loads
+    // and this is performance-critical.
+    LoadFromBlock(pixels, pixels_stride, i, tmp);
+    DCT1DImpl<8>()(tmp);
+    StoreToBlockAndScale(tmp, output, i);
+  }
+}
+
+void TransformFromPixels(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                         float* JXL_RESTRICT coefficients,
+                         float* JXL_RESTRICT scratch_space) {
+  DCT1D(pixels, pixels_stride, scratch_space);
+  Transpose8x8Block(scratch_space, coefficients);
+  DCT1D(coefficients, 8, scratch_space);
+  Transpose8x8Block(scratch_space, coefficients);
+}
+
+void StoreQuantizedValue(const Vec<DI>& ival, int16_t* out) {
+  Rebind<int16_t, DI> di16;
+  Store(DemoteTo(di16, ival), di16, out);
+}
+
+void StoreQuantizedValue(const Vec<DI>& ival, int32_t* out) {
+  DI di;
+  Store(ival, di, out);
+}
+
+template <typename T>
+void QuantizeBlock(const float* dct, const float* qmc, float aq_strength,
+                   const float* zero_bias_offset, const float* zero_bias_mul,
+                   T* block) {
+  D d;
+  DI di;
+  const auto aq_mul = Set(d, aq_strength);
+  for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+    const auto val = Load(d, dct + k);
+    const auto q = Load(d, qmc + k);
+    const auto qval = Mul(val, q);
+    const auto zb_offset = Load(d, zero_bias_offset + k);
+    const auto zb_mul = Load(d, zero_bias_mul + k);
+    const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+    const auto nzero_mask = Ge(Abs(qval), threshold);
+    const auto ival = ConvertTo(di, IfThenElseZero(nzero_mask, Round(qval)));
+    StoreQuantizedValue(ival, block + k);
+  }
+}
+
+template <typename T>
+void ComputeCoefficientBlock(const float* JXL_RESTRICT pixels, size_t stride,
+                             const float* JXL_RESTRICT qmc,
+                             int16_t last_dc_coeff, float aq_strength,
+                             const float* zero_bias_offset,
+                             const float* zero_bias_mul,
+                             float* JXL_RESTRICT tmp, T* block) {
+  float* JXL_RESTRICT dct = tmp;
+  float* JXL_RESTRICT scratch_space = tmp + DCTSIZE2;
+  TransformFromPixels(pixels, stride, dct, scratch_space);
+  QuantizeBlock(dct, qmc, aq_strength, zero_bias_offset, zero_bias_mul, block);
+  // Center DC values around zero.
+  static constexpr float kDCBias = 128.0f;
+  const float dc = (dct[0] - kDCBias) * qmc[0];
+  float dc_threshold = zero_bias_offset[0] + aq_strength * zero_bias_mul[0];
+  if (std::abs(dc - last_dc_coeff) < dc_threshold) {
+    block[0] = last_dc_coeff;
+  } else {
+    block[0] = std::round(dc);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JPEGLI_DCT_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode.cc b/third-party/libjxl/libjxl/lib/jpegli/decode.cc
new file mode 100644
index 0000000000..758babeb5e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode.cc
@@ -0,0 +1,1028 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/decode_marker.h"
+#include "lib/jpegli/decode_scan.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/render.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+void InitializeImage(j_decompress_ptr cinfo) {
+  cinfo->restart_interval = 0;
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1;
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+  cinfo->CCIR601_sampling = FALSE;  // not used
+  cinfo->marker_list = nullptr;
+  cinfo->comp_info = nullptr;
+  cinfo->input_scan_number = 0;
+  cinfo->input_iMCU_row = 0;
+  cinfo->output_scan_number = 0;
+  cinfo->output_iMCU_row = 0;
+  cinfo->output_scanline = 0;
+  cinfo->unread_marker = 0;
+  cinfo->coef_bits = nullptr;
+  // We set all these to zero since we don't yet support arithmetic coding.
+  memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+  memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+  memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+  // Initialize the private fields.
+  jpeg_decomp_master* m = cinfo->master;
+  m->input_buffer_.clear();
+  m->input_buffer_pos_ = 0;
+  m->codestream_bits_ahead_ = 0;
+  m->is_multiscan_ = false;
+  m->found_soi_ = false;
+  m->found_dri_ = false;
+  m->found_sof_ = false;
+  m->found_eoi_ = false;
+  m->icc_index_ = 0;
+  m->icc_total_ = 0;
+  m->icc_profile_.clear();
+  memset(m->dc_huff_lut_, 0, sizeof(m->dc_huff_lut_));
+  memset(m->ac_huff_lut_, 0, sizeof(m->ac_huff_lut_));
+  // Initialize the values to an invalid symbol so that we can recognize it
+  // when reading the bit stream using a Huffman code with space > 0.
+  for (size_t i = 0; i < kAllHuffLutSize; ++i) {
+    m->dc_huff_lut_[i].bits = 0;
+    m->dc_huff_lut_[i].value = 0xffff;
+    m->ac_huff_lut_[i].bits = 0;
+    m->ac_huff_lut_[i].value = 0xffff;
+  }
+  m->colormap_lut_ = nullptr;
+  m->pixels_ = nullptr;
+  m->scanlines_ = nullptr;
+  m->regenerate_inverse_colormap_ = true;
+  for (int i = 0; i < kMaxComponents; ++i) {
+    m->dither_[i] = nullptr;
+    m->error_row_[i] = nullptr;
+  }
+  m->output_passes_done_ = 0;
+  m->xoffset_ = 0;
+  m->dequant_ = nullptr;
+}
+
+void InitializeDecompressParams(j_decompress_ptr cinfo) {
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->out_color_space = JCS_UNKNOWN;
+  cinfo->scale_num = 1;
+  cinfo->scale_denom = 1;
+  cinfo->output_gamma = 0.0f;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  cinfo->dither_mode = JDITHER_FS;
+  cinfo->two_pass_quantize = TRUE;
+  cinfo->desired_number_of_colors = 256;
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+  cinfo->actual_number_of_colors = 0;
+  cinfo->colormap = nullptr;
+}
+
+void InitProgressMonitor(j_decompress_ptr cinfo, bool coef_only) {
+  if (!cinfo->progress) return;
+  jpeg_decomp_master* m = cinfo->master;
+  int nc = cinfo->num_components;
+  int estimated_num_scans =
+      cinfo->progressive_mode ? 2 + 3 * nc : (m->is_multiscan_ ? nc : 1);
+  cinfo->progress->pass_limit = cinfo->total_iMCU_rows * estimated_num_scans;
+  cinfo->progress->pass_counter = 0;
+  if (coef_only) {
+    cinfo->progress->total_passes = 1;
+  } else {
+    int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+    bool two_pass_quant = cinfo->quantize_colors && !cinfo->colormap &&
+                          cinfo->two_pass_quantize && cinfo->enable_2pass_quant;
+    cinfo->progress->total_passes = input_passes + (two_pass_quant ? 2 : 1);
+  }
+  cinfo->progress->completed_passes = 0;
+}
+
+void InitProgressMonitorForOutput(j_decompress_ptr cinfo) {
+  if (!cinfo->progress) return;
+  jpeg_decomp_master* m = cinfo->master;
+  int passes_per_output = cinfo->enable_2pass_quant ? 2 : 1;
+  int output_passes_left = cinfo->buffered_image && !m->found_eoi_ ? 2 : 1;
+  cinfo->progress->total_passes =
+      m->output_passes_done_ + passes_per_output * output_passes_left;
+  cinfo->progress->completed_passes = m->output_passes_done_;
+}
+
+void ProgressMonitorInputPass(j_decompress_ptr cinfo) {
+  if (!cinfo->progress) return;
+  cinfo->progress->pass_counter =
+      ((cinfo->input_scan_number - 1) * cinfo->total_iMCU_rows +
+       cinfo->input_iMCU_row);
+  if (cinfo->progress->pass_counter > cinfo->progress->pass_limit) {
+    cinfo->progress->pass_limit =
+        cinfo->input_scan_number * cinfo->total_iMCU_rows;
+  }
+  (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void ProgressMonitorOutputPass(j_decompress_ptr cinfo) {
+  if (!cinfo->progress) return;
+  jpeg_decomp_master* m = cinfo->master;
+  int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+  cinfo->progress->pass_counter = cinfo->output_scanline;
+  cinfo->progress->pass_limit = cinfo->output_height;
+  cinfo->progress->completed_passes = input_passes + m->output_passes_done_;
+  (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void BuildHuffmanLookupTable(j_decompress_ptr cinfo, JHUFF_TBL* table,
+                             HuffmanTableEntry* huff_lut) {
+  uint32_t counts[kJpegHuffmanMaxBitLength + 1] = {};
+  counts[0] = 0;
+  int total_count = 0;
+  int space = 1 << kJpegHuffmanMaxBitLength;
+  int max_depth = 1;
+  for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+    int count = table->bits[i];
+    if (count != 0) {
+      max_depth = i;
+    }
+    counts[i] = count;
+    total_count += count;
+    space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+  }
+  uint32_t values[kJpegHuffmanAlphabetSize + 1] = {};
+  uint8_t values_seen[256] = {0};
+  for (int i = 0; i < total_count; ++i) {
+    int value = table->huffval[i];
+    if (values_seen[value]) {
+      return JPEGLI_ERROR("Duplicate Huffman code value %d", value);
+    }
+    values_seen[value] = 1;
+    values[i] = value;
+  }
+  // Add an invalid symbol that will have the all 1 code.
+  ++counts[max_depth];
+  values[total_count] = kJpegHuffmanAlphabetSize;
+  space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+  if (space < 0) {
+    JPEGLI_ERROR("Invalid Huffman code lengths.");
+  } else if (space > 0 && huff_lut[0].value != 0xffff) {
+    // Re-initialize the values to an invalid symbol so that we can recognize
+    // it when reading the bit stream using a Huffman code with space > 0.
+    for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+      huff_lut[i].bits = 0;
+      huff_lut[i].value = 0xffff;
+    }
+  }
+  BuildJpegHuffmanTable(&counts[0], &values[0], huff_lut);
+}
+
+void PrepareForScan(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    int comp_idx = cinfo->cur_comp_info[i]->component_index;
+    int* prev_coef_bits = cinfo->coef_bits[comp_idx + cinfo->num_components];
+    for (int k = std::min(cinfo->Ss, 1); k <= std::max(cinfo->Se, 9); k++) {
+      prev_coef_bits[k] =
+          (cinfo->input_scan_number > 0) ? cinfo->coef_bits[comp_idx][k] : 0;
+    }
+    for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+      cinfo->coef_bits[comp_idx][k] = cinfo->Al;
+    }
+  }
+  AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+                           /*is_dc=*/false);
+  AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+                           /*is_dc=*/true);
+  // Check that all the Huffman tables needed for this scan are defined and
+  // build derived lookup tables.
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    if (cinfo->Ss == 0) {
+      int dc_tbl_idx = cinfo->cur_comp_info[i]->dc_tbl_no;
+      JHUFF_TBL* table = cinfo->dc_huff_tbl_ptrs[dc_tbl_idx];
+      HuffmanTableEntry* huff_lut =
+          &m->dc_huff_lut_[dc_tbl_idx * kJpegHuffmanLutSize];
+      if (!table) {
+        return JPEGLI_ERROR("DC Huffman table %d not found", dc_tbl_idx);
+      }
+      BuildHuffmanLookupTable(cinfo, table, huff_lut);
+    }
+    if (cinfo->Se > 0) {
+      int ac_tbl_idx = cinfo->cur_comp_info[i]->ac_tbl_no;
+      JHUFF_TBL* table = cinfo->ac_huff_tbl_ptrs[ac_tbl_idx];
+      HuffmanTableEntry* huff_lut =
+          &m->ac_huff_lut_[ac_tbl_idx * kJpegHuffmanLutSize];
+      if (!table) {
+        return JPEGLI_ERROR("AC Huffman table %d not found", ac_tbl_idx);
+      }
+      BuildHuffmanLookupTable(cinfo, table, huff_lut);
+    }
+  }
+  // Copy quantization tables into comp_info.
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    jpeg_component_info* comp = cinfo->cur_comp_info[i];
+    if (comp->quant_table == nullptr) {
+      comp->quant_table = Allocate<JQUANT_TBL>(cinfo, 1, JPOOL_IMAGE);
+      memcpy(comp->quant_table, cinfo->quant_tbl_ptrs[comp->quant_tbl_no],
+             sizeof(JQUANT_TBL));
+    }
+  }
+  if (cinfo->comps_in_scan == 1) {
+    const auto& comp = *cinfo->cur_comp_info[0];
+    cinfo->MCUs_per_row = DivCeil(cinfo->image_width * comp.h_samp_factor,
+                                  cinfo->max_h_samp_factor * DCTSIZE);
+    cinfo->MCU_rows_in_scan = DivCeil(cinfo->image_height * comp.v_samp_factor,
+                                      cinfo->max_v_samp_factor * DCTSIZE);
+    m->mcu_rows_per_iMCU_row_ = cinfo->cur_comp_info[0]->v_samp_factor;
+  } else {
+    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
+    cinfo->MCUs_per_row = m->iMCU_cols_;
+    m->mcu_rows_per_iMCU_row_ = 1;
+    size_t mcu_size = 0;
+    for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+      jpeg_component_info* comp = cinfo->cur_comp_info[i];
+      mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+    }
+    if (mcu_size > D_MAX_BLOCKS_IN_MCU) {
+      JPEGLI_ERROR("MCU size too big");
+    }
+  }
+  memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+  m->restarts_to_go_ = cinfo->restart_interval;
+  m->next_restart_marker_ = 0;
+  m->eobrun_ = -1;
+  m->scan_mcu_row_ = 0;
+  m->scan_mcu_col_ = 0;
+  m->codestream_bits_ahead_ = 0;
+  ++cinfo->input_scan_number;
+  cinfo->input_iMCU_row = 0;
+  PrepareForiMCURow(cinfo);
+  cinfo->global_state = kDecProcessScan;
+}
+
+int ConsumeInput(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (cinfo->global_state == kDecProcessScan && m->streaming_mode_ &&
+      cinfo->input_iMCU_row > cinfo->output_iMCU_row) {
+    // Prevent input from getting ahead of output in streaming mode.
+    return JPEG_SUSPENDED;
+  }
+  jpeg_source_mgr* src = cinfo->src;
+  int status;
+  for (;;) {
+    const uint8_t* data;
+    size_t len;
+    if (m->input_buffer_.empty()) {
+      data = cinfo->src->next_input_byte;
+      len = cinfo->src->bytes_in_buffer;
+    } else {
+      data = &m->input_buffer_[m->input_buffer_pos_];
+      len = m->input_buffer_.size() - m->input_buffer_pos_;
+    }
+    size_t pos = 0;
+    if (cinfo->global_state == kDecProcessScan) {
+      status = ProcessScan(cinfo, data, len, &pos, &m->codestream_bits_ahead_);
+    } else {
+      status = ProcessMarkers(cinfo, data, len, &pos);
+    }
+    if (m->input_buffer_.empty()) {
+      cinfo->src->next_input_byte += pos;
+      cinfo->src->bytes_in_buffer -= pos;
+    } else {
+      m->input_buffer_pos_ += pos;
+      size_t bytes_left = m->input_buffer_.size() - m->input_buffer_pos_;
+      if (bytes_left <= src->bytes_in_buffer) {
+        src->next_input_byte += (src->bytes_in_buffer - bytes_left);
+        src->bytes_in_buffer = bytes_left;
+        m->input_buffer_.clear();
+        m->input_buffer_pos_ = 0;
+      }
+    }
+    if (status == kHandleRestart) {
+      JXL_DASSERT(m->input_buffer_.size() <=
+                  m->input_buffer_pos_ + src->bytes_in_buffer);
+      m->input_buffer_.clear();
+      m->input_buffer_pos_ = 0;
+      if (cinfo->unread_marker == 0xd0 + m->next_restart_marker_) {
+        cinfo->unread_marker = 0;
+      } else {
+        if (!(*cinfo->src->resync_to_restart)(cinfo, m->next_restart_marker_)) {
+          return JPEG_SUSPENDED;
+        }
+      }
+      m->next_restart_marker_ += 1;
+      m->next_restart_marker_ &= 0x7;
+      m->restarts_to_go_ = cinfo->restart_interval;
+      if (cinfo->unread_marker != 0) {
+        JPEGLI_WARN("Failed to resync to next restart marker, skipping scan.");
+        return JPEG_SCAN_COMPLETED;
+      }
+      continue;
+    }
+    if (status == kHandleMarkerProcessor) {
+      JXL_DASSERT(m->input_buffer_.size() <=
+                  m->input_buffer_pos_ + src->bytes_in_buffer);
+      m->input_buffer_.clear();
+      m->input_buffer_pos_ = 0;
+      if (!(*GetMarkerProcessor(cinfo))(cinfo)) {
+        return JPEG_SUSPENDED;
+      }
+      cinfo->unread_marker = 0;
+      continue;
+    }
+    if (status != kNeedMoreInput) {
+      break;
+    }
+    if (m->input_buffer_.empty()) {
+      JXL_DASSERT(m->input_buffer_pos_ == 0);
+      m->input_buffer_.assign(src->next_input_byte,
+                              src->next_input_byte + src->bytes_in_buffer);
+    }
+    if (!(*cinfo->src->fill_input_buffer)(cinfo)) {
+      m->input_buffer_.clear();
+      m->input_buffer_pos_ = 0;
+      return JPEG_SUSPENDED;
+    }
+    if (src->bytes_in_buffer == 0) {
+      JPEGLI_ERROR("Empty input.");
+    }
+    m->input_buffer_.insert(m->input_buffer_.end(), src->next_input_byte,
+                            src->next_input_byte + src->bytes_in_buffer);
+  }
+  if (status == JPEG_SCAN_COMPLETED) {
+    cinfo->global_state = kDecProcessMarkers;
+  } else if (status == JPEG_REACHED_SOS) {
+    if (cinfo->global_state == kDecInHeader) {
+      cinfo->global_state = kDecHeaderDone;
+    } else {
+      PrepareForScan(cinfo);
+    }
+  }
+  return status;
+}
+
+bool IsInputReady(j_decompress_ptr cinfo) {
+  if (cinfo->master->found_eoi_) {
+    return true;
+  }
+  if (cinfo->input_scan_number > cinfo->output_scan_number) {
+    return true;
+  }
+  if (cinfo->input_scan_number < cinfo->output_scan_number) {
+    return false;
+  }
+  if (cinfo->input_iMCU_row == cinfo->total_iMCU_rows) {
+    return true;
+  }
+  return cinfo->input_iMCU_row >
+         cinfo->output_iMCU_row + (cinfo->master->streaming_mode_ ? 0 : 2);
+}
+
+bool ReadOutputPass(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!m->pixels_) {
+    size_t stride = cinfo->out_color_components * cinfo->output_width;
+    size_t num_samples = cinfo->output_height * stride;
+    m->pixels_ = Allocate<uint8_t>(cinfo, num_samples, JPOOL_IMAGE);
+    m->scanlines_ =
+        Allocate<JSAMPROW>(cinfo, cinfo->output_height, JPOOL_IMAGE);
+    for (size_t i = 0; i < cinfo->output_height; ++i) {
+      m->scanlines_[i] = &m->pixels_[i * stride];
+    }
+  }
+  size_t num_output_rows = 0;
+  while (num_output_rows < cinfo->output_height) {
+    if (IsInputReady(cinfo)) {
+      ProgressMonitorOutputPass(cinfo);
+      ProcessOutput(cinfo, &num_output_rows, m->scanlines_,
+                    cinfo->output_height);
+    } else if (ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+      return false;
+    }
+  }
+  cinfo->output_scanline = 0;
+  cinfo->output_iMCU_row = 0;
+  return true;
+}
+
+boolean PrepareQuantizedOutput(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (cinfo->raw_data_out) {
+    JPEGLI_ERROR("Color quantization is not supported in raw data mode.");
+  }
+  if (m->output_data_type_ != JPEGLI_TYPE_UINT8) {
+    JPEGLI_ERROR("Color quantization must use 8-bit mode.");
+  }
+  if (cinfo->colormap) {
+    m->quant_mode_ = 3;
+  } else if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+    m->quant_mode_ = 2;
+  } else if (cinfo->enable_1pass_quant) {
+    m->quant_mode_ = 1;
+  } else {
+    JPEGLI_ERROR("Invalid quantization mode change");
+  }
+  if (m->quant_mode_ > 1 && cinfo->dither_mode == JDITHER_ORDERED) {
+    cinfo->dither_mode = JDITHER_FS;
+  }
+  if (m->quant_mode_ == 1) {
+    ChooseColorMap1Pass(cinfo);
+  } else if (m->quant_mode_ == 2) {
+    m->quant_pass_ = 0;
+    if (!ReadOutputPass(cinfo)) {
+      return FALSE;
+    }
+    ChooseColorMap2Pass(cinfo);
+  }
+  if (m->quant_mode_ == 2 ||
+      (m->quant_mode_ == 3 && m->regenerate_inverse_colormap_)) {
+    CreateInverseColorMap(cinfo);
+  }
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    CreateOrderedDitherTables(cinfo);
+  } else if (cinfo->dither_mode == JDITHER_FS) {
+    InitFSDitherState(cinfo);
+  }
+  m->quant_pass_ = 1;
+  return TRUE;
+}
+
+void AllocateCoefficientBuffer(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+  jvirt_barray_ptr* coef_arrays = jpegli::Allocate<jvirt_barray_ptr>(
+      cinfo, cinfo->num_components, JPOOL_IMAGE);
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    size_t height_in_blocks =
+        m->streaming_mode_ ? comp->v_samp_factor : comp->height_in_blocks;
+    coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+        comptr, JPOOL_IMAGE, TRUE, comp->width_in_blocks, height_in_blocks,
+        comp->v_samp_factor);
+  }
+  cinfo->master->coef_arrays = coef_arrays;
+  (*cinfo->mem->realize_virt_arrays)(comptr);
+}
+
+void AllocateOutputBuffers(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  size_t iMCU_width = cinfo->max_h_samp_factor * m->min_scaled_dct_size;
+  size_t output_stride = m->iMCU_cols_ * iMCU_width;
+  m->need_context_rows_ = false;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) {
+      m->need_context_rows_ = true;
+    }
+  }
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    const auto& comp = cinfo->comp_info[c];
+    size_t cheight = comp.v_samp_factor * m->scaled_dct_size[c];
+    int downsampled_width = output_stride / m->h_factor[c];
+    m->raw_height_[c] = cinfo->total_iMCU_rows * cheight;
+    if (m->need_context_rows_) {
+      cheight *= 3;
+    }
+    m->raw_output_[c].Allocate(cinfo, cheight, downsampled_width);
+  }
+  int num_all_components =
+      std::max(cinfo->out_color_components, cinfo->num_components);
+  for (int c = 0; c < num_all_components; ++c) {
+    m->render_output_[c].Allocate(cinfo, cinfo->max_v_samp_factor,
+                                  output_stride);
+  }
+  m->idct_scratch_ = Allocate<float>(cinfo, 5 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+  // Padding for horizontal chroma upsampling.
+  constexpr size_t kPaddingLeft = 64;
+  constexpr size_t kPaddingRight = 64;
+  m->upsample_scratch_ = Allocate<float>(
+      cinfo, output_stride + kPaddingLeft + kPaddingRight, JPOOL_IMAGE_ALIGNED);
+  size_t bytes_per_sample = jpegli_bytes_per_sample(m->output_data_type_);
+  size_t bytes_per_pixel = cinfo->out_color_components * bytes_per_sample;
+  size_t scratch_stride = RoundUpTo(output_stride, HWY_ALIGNMENT);
+  m->output_scratch_ = Allocate<uint8_t>(
+      cinfo, bytes_per_pixel * scratch_stride, JPOOL_IMAGE_ALIGNED);
+  m->smoothing_scratch_ =
+      Allocate<int16_t>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+  size_t coeffs_per_block = cinfo->num_components * DCTSIZE2;
+  m->nonzeros_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+  m->sumabs_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+  m->biases_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+  m->dequant_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+  memset(m->dequant_, 0, coeffs_per_block * sizeof(float));
+}
+
+}  // namespace jpegli
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+                             size_t structsize) {
+  cinfo->mem = nullptr;
+  if (structsize != sizeof(*cinfo)) {
+    JPEGLI_ERROR("jpeg_decompress_struct has wrong size.");
+  }
+  jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+  cinfo->is_decompressor = TRUE;
+  cinfo->progress = nullptr;
+  cinfo->src = nullptr;
+  for (int i = 0; i < NUM_QUANT_TBLS; i++) {
+    cinfo->quant_tbl_ptrs[i] = nullptr;
+  }
+  for (int i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+    cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+  }
+  cinfo->global_state = jpegli::kDecStart;
+  cinfo->sample_range_limit = nullptr;  // not used
+  cinfo->rec_outbuf_height = 1;         // output works with any buffer height
+  cinfo->master = new jpeg_decomp_master;
+  jpeg_decomp_master* m = cinfo->master;
+  for (int i = 0; i < 16; ++i) {
+    m->app_marker_parsers[i] = nullptr;
+  }
+  m->com_marker_parser = nullptr;
+  memset(m->markers_to_save_, 0, sizeof(m->markers_to_save_));
+  jpegli::InitializeDecompressParams(cinfo);
+  jpegli::InitializeImage(cinfo);
+}
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo) {
+  jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo) {
+  jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+                         unsigned int length_limit) {
+  // TODO(szabadka) Limit our memory usage by taking into account length_limit.
+  jpeg_decomp_master* m = cinfo->master;
+  if (marker_code < 0xe0) {
+    JPEGLI_ERROR("jpegli_save_markers: invalid marker code %d", marker_code);
+  }
+  m->markers_to_save_[marker_code - 0xe0] = 1;
+}
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+                                 jpeg_marker_parser_method routine) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (marker_code == 0xfe) {
+    m->com_marker_parser = routine;
+  } else if (marker_code >= 0xe0 && marker_code <= 0xef) {
+    m->app_marker_parsers[marker_code - 0xe0] = routine;
+  } else {
+    JPEGLI_ERROR("jpegli_set_marker_processor: invalid marker code %d",
+                 marker_code);
+  }
+}
+
+int jpegli_consume_input(j_decompress_ptr cinfo) {
+  if (cinfo->global_state == jpegli::kDecStart) {
+    (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+    (*cinfo->src->init_source)(cinfo);
+    jpegli::InitializeDecompressParams(cinfo);
+    jpegli::InitializeImage(cinfo);
+    cinfo->global_state = jpegli::kDecInHeader;
+  }
+  if (cinfo->global_state == jpegli::kDecHeaderDone) {
+    return JPEG_REACHED_SOS;
+  }
+  if (cinfo->master->found_eoi_) {
+    return JPEG_REACHED_EOI;
+  }
+  if (cinfo->global_state == jpegli::kDecInHeader ||
+      cinfo->global_state == jpegli::kDecProcessMarkers ||
+      cinfo->global_state == jpegli::kDecProcessScan) {
+    return jpegli::ConsumeInput(cinfo);
+  }
+  JPEGLI_ERROR("Unexpected state %d", cinfo->global_state);
+  return JPEG_REACHED_EOI;  // return value does not matter
+}
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image) {
+  if (cinfo->global_state != jpegli::kDecStart &&
+      cinfo->global_state != jpegli::kDecInHeader) {
+    JPEGLI_ERROR("jpegli_read_header: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (cinfo->src == nullptr) {
+    JPEGLI_ERROR("Missing source.");
+  }
+  for (;;) {
+    int retcode = jpegli_consume_input(cinfo);
+    if (retcode == JPEG_SUSPENDED) {
+      return retcode;
+    } else if (retcode == JPEG_REACHED_SOS) {
+      break;
+    } else if (retcode == JPEG_REACHED_EOI) {
+      if (require_image) {
+        JPEGLI_ERROR("jpegli_read_header: unexpected EOI marker.");
+      }
+      jpegli_abort_decompress(cinfo);
+      return JPEG_HEADER_TABLES_ONLY;
+    }
+  };
+  return JPEG_HEADER_OK;
+}
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET** icc_data_ptr,
+                                unsigned int* icc_data_len) {
+  if (cinfo->global_state == jpegli::kDecStart ||
+      cinfo->global_state == jpegli::kDecInHeader) {
+    JPEGLI_ERROR("jpegli_read_icc_profile: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (icc_data_ptr == nullptr || icc_data_len == nullptr) {
+    JPEGLI_ERROR("jpegli_read_icc_profile: invalid output buffer");
+  }
+  jpeg_decomp_master* m = cinfo->master;
+  if (m->icc_profile_.empty()) {
+    *icc_data_ptr = nullptr;
+    *icc_data_len = 0;
+    return FALSE;
+  }
+  *icc_data_len = m->icc_profile_.size();
+  *icc_data_ptr = (JOCTET*)malloc(*icc_data_len);
+  if (*icc_data_ptr == nullptr) {
+    JPEGLI_ERROR("jpegli_read_icc_profile: Out of memory");
+  }
+  memcpy(*icc_data_ptr, m->icc_profile_.data(), *icc_data_len);
+  return TRUE;
+}
+
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!m->found_sof_) {
+    JPEGLI_ERROR("No SOF marker found.");
+  }
+  if (cinfo->raw_data_out) {
+    if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+      JPEGLI_ERROR("Output scaling is not supported in raw output mode");
+    }
+  }
+  if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+    int dctsize = 16;
+    while (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * (dctsize - 1)) {
+      --dctsize;
+    }
+    m->min_scaled_dct_size = dctsize;
+    cinfo->output_width =
+        jpegli::DivCeil(cinfo->image_width * dctsize, DCTSIZE);
+    cinfo->output_height =
+        jpegli::DivCeil(cinfo->image_height * dctsize, DCTSIZE);
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      m->scaled_dct_size[c] = m->min_scaled_dct_size;
+    }
+  } else {
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    m->min_scaled_dct_size = DCTSIZE;
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      m->scaled_dct_size[c] = DCTSIZE;
+    }
+  }
+}
+
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  jpegli_core_output_dimensions(cinfo);
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+    m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+  }
+  if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      // Prefer IDCT scaling over 2x upsampling.
+      while (m->scaled_dct_size[c] < DCTSIZE && (m->v_factor[c] % 2) == 0 &&
+             (m->h_factor[c] % 2) == 0) {
+        m->scaled_dct_size[c] *= 2;
+        m->v_factor[c] /= 2;
+        m->h_factor[c] /= 2;
+      }
+    }
+  }
+  if (cinfo->out_color_space == JCS_GRAYSCALE) {
+    cinfo->out_color_components = 1;
+  } else if (cinfo->out_color_space == JCS_RGB ||
+             cinfo->out_color_space == JCS_YCbCr) {
+    cinfo->out_color_components = 3;
+  } else if (cinfo->out_color_space == JCS_CMYK ||
+             cinfo->out_color_space == JCS_YCCK) {
+    cinfo->out_color_components = 4;
+  } else {
+    cinfo->out_color_components = cinfo->num_components;
+  }
+  cinfo->output_components =
+      cinfo->quantize_colors ? 1 : cinfo->out_color_components;
+  cinfo->rec_outbuf_height = 1;
+}
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo) {
+  if (cinfo->input_scan_number == 0) {
+    JPEGLI_ERROR("No SOS marker found.");
+  }
+  return cinfo->master->is_multiscan_;
+}
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo) {
+  return cinfo->master->found_eoi_;
+}
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (cinfo->global_state == jpegli::kDecHeaderDone) {
+    m->streaming_mode_ = !m->is_multiscan_ && !cinfo->buffered_image &&
+                         (!cinfo->quantize_colors || !cinfo->two_pass_quantize);
+    jpegli::AllocateCoefficientBuffer(cinfo);
+    jpegli_calc_output_dimensions(cinfo);
+    jpegli::PrepareForScan(cinfo);
+    if (cinfo->quantize_colors) {
+      if (cinfo->colormap != nullptr) {
+        cinfo->enable_external_quant = TRUE;
+      } else if (cinfo->two_pass_quantize &&
+                 cinfo->out_color_space == JCS_RGB) {
+        cinfo->enable_2pass_quant = TRUE;
+      } else {
+        cinfo->enable_1pass_quant = TRUE;
+      }
+    }
+    jpegli::InitProgressMonitor(cinfo, /*coef_only=*/false);
+    jpegli::AllocateOutputBuffers(cinfo);
+    if (cinfo->buffered_image == TRUE) {
+      cinfo->output_scan_number = 0;
+      return TRUE;
+    }
+  } else if (!m->is_multiscan_) {
+    JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (m->is_multiscan_) {
+    if (cinfo->global_state != jpegli::kDecProcessScan &&
+        cinfo->global_state != jpegli::kDecProcessMarkers) {
+      JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+                   cinfo->global_state);
+    }
+    while (!m->found_eoi_) {
+      jpegli::ProgressMonitorInputPass(cinfo);
+      if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+        return FALSE;
+      }
+    }
+  }
+  cinfo->output_scan_number = cinfo->input_scan_number;
+  jpegli::PrepareForOutput(cinfo);
+  if (cinfo->quantize_colors) {
+    return jpegli::PrepareQuantizedOutput(cinfo);
+  } else {
+    return TRUE;
+  }
+}
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!cinfo->buffered_image) {
+    JPEGLI_ERROR("jpegli_start_output: buffered image mode was not set");
+  }
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_start_output: unexpected state %d",
+                 cinfo->global_state);
+  }
+  cinfo->output_scan_number = std::max(1, scan_number);
+  if (m->found_eoi_) {
+    cinfo->output_scan_number =
+        std::min(cinfo->output_scan_number, cinfo->input_scan_number);
+  }
+  jpegli::InitProgressMonitorForOutput(cinfo);
+  jpegli::PrepareForOutput(cinfo);
+  if (cinfo->quantize_colors) {
+    return jpegli::PrepareQuantizedOutput(cinfo);
+  } else {
+    return TRUE;
+  }
+}
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo) {
+  if (!cinfo->buffered_image) {
+    JPEGLI_ERROR("jpegli_finish_output: buffered image mode was not set");
+  }
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_finish_output: unexpected state %d",
+                 cinfo->global_state);
+  }
+  // Advance input to the start of the next scan, or to the end of input.
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+         !cinfo->master->found_eoi_) {
+    if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+      return FALSE;
+    }
+  }
+  return TRUE;
+}
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+                                 JDIMENSION max_lines) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_read_scanlines: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (cinfo->buffered_image) {
+    if (cinfo->output_scan_number == 0) {
+      JPEGLI_ERROR(
+          "jpegli_read_scanlines: "
+          "jpegli_start_output() was not called");
+    }
+  } else if (m->is_multiscan_ && !m->found_eoi_) {
+    JPEGLI_ERROR(
+        "jpegli_read_scanlines: "
+        "jpegli_start_decompress() did not finish");
+  }
+  if (cinfo->output_scanline + max_lines > cinfo->output_height) {
+    max_lines = cinfo->output_height - cinfo->output_scanline;
+  }
+  jpegli::ProgressMonitorOutputPass(cinfo);
+  size_t num_output_rows = 0;
+  while (num_output_rows < max_lines) {
+    if (jpegli::IsInputReady(cinfo)) {
+      jpegli::ProcessOutput(cinfo, &num_output_rows, scanlines, max_lines);
+    } else if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+      break;
+    }
+  }
+  return num_output_rows;
+}
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+  // TODO(szabadka) Skip the IDCT for skipped over blocks.
+  return jpegli_read_scanlines(cinfo, nullptr, num_lines);
+}
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION* xoffset,
+                          JDIMENSION* width) {
+  jpeg_decomp_master* m = cinfo->master;
+  if ((cinfo->global_state != jpegli::kDecProcessScan &&
+       cinfo->global_state != jpegli::kDecProcessMarkers) ||
+      cinfo->output_scanline != 0) {
+    JPEGLI_ERROR("jpegli_crop_decompress: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (cinfo->raw_data_out) {
+    JPEGLI_ERROR("Output cropping is not supported in raw data mode");
+  }
+  if (xoffset == nullptr || width == nullptr || *width == 0 ||
+      *xoffset + *width > cinfo->output_width) {
+    JPEGLI_ERROR("jpegli_crop_scanline: Invalid arguments");
+  }
+  // TODO(szabadka) Skip the IDCT for skipped over blocks.
+  size_t xend = *xoffset + *width;
+  size_t iMCU_width = m->min_scaled_dct_size * cinfo->max_h_samp_factor;
+  *xoffset = (*xoffset / iMCU_width) * iMCU_width;
+  *width = xend - *xoffset;
+  cinfo->master->xoffset_ = *xoffset;
+  cinfo->output_width = *width;
+}
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+                                JDIMENSION max_lines) {
+  if ((cinfo->global_state != jpegli::kDecProcessScan &&
+       cinfo->global_state != jpegli::kDecProcessMarkers) ||
+      !cinfo->raw_data_out) {
+    JPEGLI_ERROR("jpegli_read_raw_data: unexpected state %d",
+                 cinfo->global_state);
+  }
+  size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+  if (max_lines < iMCU_height) {
+    JPEGLI_ERROR("jpegli_read_raw_data: output buffer too small");
+  }
+  jpegli::ProgressMonitorOutputPass(cinfo);
+  while (!jpegli::IsInputReady(cinfo)) {
+    if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+      return 0;
+    }
+  }
+  if (cinfo->output_iMCU_row < cinfo->total_iMCU_rows) {
+    jpegli::ProcessRawOutput(cinfo, data);
+    return iMCU_height;
+  }
+  return 0;
+}
+
+jvirt_barray_ptr* jpegli_read_coefficients(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  m->streaming_mode_ = false;
+  if (!cinfo->buffered_image && cinfo->global_state == jpegli::kDecHeaderDone) {
+    jpegli::AllocateCoefficientBuffer(cinfo);
+    jpegli_calc_output_dimensions(cinfo);
+    jpegli::InitProgressMonitor(cinfo, /*coef_only=*/true);
+    jpegli::PrepareForScan(cinfo);
+  }
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_read_coefficients: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (!cinfo->buffered_image) {
+    while (!m->found_eoi_) {
+      jpegli::ProgressMonitorInputPass(cinfo);
+      if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+        return nullptr;
+      }
+    }
+    cinfo->output_scanline = cinfo->output_height;
+  }
+  return m->coef_arrays;
+}
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo) {
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_finish_decompress: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (!cinfo->buffered_image && cinfo->output_scanline < cinfo->output_height) {
+    JPEGLI_ERROR("Incomplete output");
+  }
+  while (!cinfo->master->found_eoi_) {
+    if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+      return FALSE;
+    }
+  }
+  (*cinfo->src->term_source)(cinfo);
+  jpegli_abort_decompress(cinfo);
+  return TRUE;
+}
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+  JPEGLI_WARN("Invalid restart marker found: 0x%02x vs 0x%02x.",
+              cinfo->unread_marker, 0xd0 + desired);
+  // This is a trivial implementation, we just let the decoder skip the entire
+  // scan and attempt to render the partial input.
+  return TRUE;
+}
+
+void jpegli_new_colormap(j_decompress_ptr cinfo) {
+  if (cinfo->global_state != jpegli::kDecProcessScan &&
+      cinfo->global_state != jpegli::kDecProcessMarkers) {
+    JPEGLI_ERROR("jpegli_new_colormap: unexpected state %d",
+                 cinfo->global_state);
+  }
+  if (!cinfo->buffered_image) {
+    JPEGLI_ERROR("jpegli_new_colormap: not in  buffered image mode");
+  }
+  if (!cinfo->enable_external_quant) {
+    JPEGLI_ERROR("external colormap quantizer was not enabled");
+  }
+  if (!cinfo->quantize_colors || cinfo->colormap == nullptr) {
+    JPEGLI_ERROR("jpegli_new_colormap: not in external colormap mode");
+  }
+  cinfo->master->regenerate_inverse_colormap_ = true;
+}
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+                              JpegliEndianness endianness) {
+  switch (data_type) {
+    case JPEGLI_TYPE_UINT8:
+    case JPEGLI_TYPE_UINT16:
+    case JPEGLI_TYPE_FLOAT:
+      cinfo->master->output_data_type_ = data_type;
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported data type %d", data_type);
+  }
+  switch (endianness) {
+    case JPEGLI_NATIVE_ENDIAN:
+      cinfo->master->swap_endianness_ = false;
+      break;
+    case JPEGLI_LITTLE_ENDIAN:
+      cinfo->master->swap_endianness_ = !IsLittleEndian();
+      break;
+    case JPEGLI_BIG_ENDIAN:
+      cinfo->master->swap_endianness_ = IsLittleEndian();
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported endianness %d", endianness);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode.h b/third-party/libjxl/libjxl/lib/jpegli/decode.h
new file mode 100644
index 0000000000..c862630f6b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the decoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while dempressor object definitions are included directly
+// from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+//  (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+//      names of the API and link against libjpegli.
+//
+//  (2) Leave the application code unchanged, but replace the libjpeg.so library
+//      with the one built by this project that is API- and ABI-compatible with
+//      libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_DECODE_H_
+#define LIB_JPEGLI_DECODE_H_
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_decompress(cinfo)              \
+  jpegli_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+                          (size_t)sizeof(struct jpeg_decompress_struct))
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+                             size_t structsize);
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE *infile);
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                    unsigned long insize);
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image);
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+                                 JDIMENSION max_lines);
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines);
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                          JDIMENSION *width);
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+                                JDIMENSION max_lines);
+
+jvirt_barray_ptr *jpegli_read_coefficients(j_decompress_ptr cinfo);
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo);
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number);
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo);
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo);
+
+int jpegli_consume_input(j_decompress_ptr cinfo);
+
+#if JPEG_LIB_VERSION >= 80
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo);
+#endif
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo);
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+                         unsigned int length_limit);
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+                                 jpeg_marker_parser_method routine);
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired);
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+                                unsigned int *icc_data_len);
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo);
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo);
+
+void jpegli_new_colormap(j_decompress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+                              JpegliEndianness endianness);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif  // LIB_JPEGLI_DECODE_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc
new file mode 100644
index 0000000000..39dd693ce0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc
@@ -0,0 +1,1305 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+static constexpr size_t kNumSourceBuffers = 4;
+
+// Custom source manager that refills the input buffer in chunks, simulating
+// a file reader with a fixed buffer size.
+class SourceManager {
+ public:
+  SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size)
+      : data_(data), len_(len), max_chunk_size_(max_chunk_size) {
+    pub_.skip_input_data = skip_input_data;
+    pub_.resync_to_restart = jpegli_resync_to_restart;
+    pub_.term_source = term_source;
+    pub_.init_source = init_source;
+    pub_.fill_input_buffer = fill_input_buffer;
+    if (max_chunk_size_ == 0) max_chunk_size_ = len;
+    buffers_.resize(kNumSourceBuffers, std::vector<uint8_t>(max_chunk_size_));
+    Reset();
+  }
+
+  void Reset() {
+    pub_.next_input_byte = nullptr;
+    pub_.bytes_in_buffer = 0;
+    pos_ = 0;
+    chunk_idx_ = 0;
+  }
+
+  ~SourceManager() {
+    EXPECT_EQ(0, pub_.bytes_in_buffer);
+    EXPECT_EQ(len_, pos_);
+  }
+
+ private:
+  jpeg_source_mgr pub_;
+  const uint8_t* data_;
+  size_t len_;
+  size_t chunk_idx_;
+  size_t pos_;
+  size_t max_chunk_size_;
+  std::vector<std::vector<uint8_t>> buffers_;
+
+  static void init_source(j_decompress_ptr cinfo) {}
+
+  static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    if (src->pos_ < src->len_) {
+      size_t chunk_size = std::min(src->len_ - src->pos_, src->max_chunk_size_);
+      size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers;
+      uint8_t* next_buffer = src->buffers_[next_idx].data();
+      memcpy(next_buffer, src->data_ + src->pos_, chunk_size);
+      src->pub_.next_input_byte = next_buffer;
+      src->pub_.bytes_in_buffer = chunk_size;
+    } else {
+      src->pub_.next_input_byte = kFakeEoiMarker;
+      src->pub_.bytes_in_buffer = 2;
+      src->len_ += 2;
+    }
+    src->pos_ += src->pub_.bytes_in_buffer;
+    return TRUE;
+  }
+
+  static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    if (num_bytes <= 0) {
+      return;
+    }
+    if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+      src->pub_.bytes_in_buffer -= num_bytes;
+      src->pub_.next_input_byte += num_bytes;
+    } else {
+      src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+      src->pub_.bytes_in_buffer = 0;
+    }
+  }
+
+  static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+  if (cinfo->src->bytes_in_buffer == 0) {
+    (*cinfo->src->fill_input_buffer)(cinfo);
+  }
+  cinfo->src->bytes_in_buffer--;
+  return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+  markers_seen[num_markers_seen] = cinfo->unread_marker;
+  size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+  EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+  if (marker_len > 2) {
+    (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+  }
+  ++num_markers_seen;
+  return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+                     TestImage* output) {
+  JDIMENSION xoffset = 0;
+  JDIMENSION yoffset = 0;
+  JDIMENSION xsize_cropped = cinfo->output_width;
+  JDIMENSION ysize_cropped = cinfo->output_height;
+  if (dparams.crop_output) {
+    xoffset = xsize_cropped = cinfo->output_width / 3;
+    yoffset = ysize_cropped = cinfo->output_height / 3;
+    jpegli_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+  }
+  output->ysize = ysize_cropped;
+  output->xsize = cinfo->output_width;
+  output->components = cinfo->out_color_components;
+  output->data_type = dparams.data_type;
+  output->endianness = dparams.endianness;
+  size_t bytes_per_sample = jpegli_bytes_per_sample(dparams.data_type);
+  if (cinfo->raw_data_out) {
+    output->color_space = cinfo->jpeg_color_space;
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+      size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+      std::vector<uint8_t> plane(ysize * xsize * bytes_per_sample);
+      output->raw_data.emplace_back(std::move(plane));
+    }
+  } else {
+    output->color_space = cinfo->out_color_space;
+    output->AllocatePixels();
+  }
+  size_t total_output_lines = 0;
+  while (cinfo->output_scanline < cinfo->output_height) {
+    size_t max_lines;
+    size_t num_output_lines;
+    if (cinfo->raw_data_out) {
+      size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+      EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+      max_lines = iMCU_height;
+      std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+      std::vector<JSAMPARRAY> data(cinfo->num_components);
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+        size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+        size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+        rowdata[c].resize(num_lines);
+        size_t y0 = cinfo->output_iMCU_row * num_lines;
+        for (size_t i = 0; i < num_lines; ++i) {
+          rowdata[c][i] =
+              y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+        }
+        data[c] = &rowdata[c][0];
+      }
+      num_output_lines = jpegli_read_raw_data(cinfo, &data[0], max_lines);
+    } else {
+      size_t max_output_lines = dparams.max_output_lines;
+      if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+      if (cinfo->output_scanline < yoffset) {
+        max_lines = yoffset - cinfo->output_scanline;
+        num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+      } else if (cinfo->output_scanline >= yoffset + ysize_cropped) {
+        max_lines = cinfo->output_height - cinfo->output_scanline;
+        num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+      } else {
+        size_t lines_left = yoffset + ysize_cropped - cinfo->output_scanline;
+        max_lines = std::min<size_t>(max_output_lines, lines_left);
+        size_t stride = cinfo->output_width * cinfo->out_color_components *
+                        bytes_per_sample;
+        std::vector<JSAMPROW> scanlines(max_lines);
+        for (size_t i = 0; i < max_lines; ++i) {
+          size_t yidx = cinfo->output_scanline - yoffset + i;
+          scanlines[i] = &output->pixels[yidx * stride];
+        }
+        num_output_lines =
+            jpegli_read_scanlines(cinfo, &scanlines[0], max_lines);
+        if (cinfo->quantize_colors) {
+          for (size_t i = 0; i < num_output_lines; ++i) {
+            UnmapColors(scanlines[i], cinfo->output_width,
+                        cinfo->out_color_components, cinfo->colormap,
+                        cinfo->actual_number_of_colors);
+          }
+        }
+      }
+    }
+    total_output_lines += num_output_lines;
+    EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+    EXPECT_EQ(num_output_lines, max_lines);
+  }
+  EXPECT_EQ(cinfo->total_iMCU_rows,
+            DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+struct TestConfig {
+  std::string fn;
+  std::string fn_desc;
+  TestImage input;
+  CompressParams jparams;
+  DecompressParams dparams;
+  bool compare_to_orig = false;
+  float max_tolerance_factor = 1.01f;
+  float max_rms_dist = 1.0f;
+  float max_diff = 35.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+  std::vector<uint8_t> compressed;
+  if (!config.fn.empty()) {
+    compressed = ReadTestData(config.fn.c_str());
+  } else {
+    GeneratePixels(&config.input);
+    JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+  }
+  if (config.dparams.size_factor < 1.0f) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  return compressed;
+}
+
+void TestAPINonBuffered(const CompressParams& jparams,
+                        const DecompressParams& dparams,
+                        const TestImage& expected_output,
+                        j_decompress_ptr cinfo, TestImage* output) {
+  if (jparams.add_marker) {
+    jpegli_save_markers(cinfo, kSpecialMarker0, 0xffff);
+    jpegli_save_markers(cinfo, kSpecialMarker1, 0xffff);
+    num_markers_seen = 0;
+    jpegli_set_marker_processor(cinfo, 0xe6, test_marker_processor);
+    jpegli_set_marker_processor(cinfo, 0xe7, test_marker_processor);
+    jpegli_set_marker_processor(cinfo, 0xe8, test_marker_processor);
+  }
+  if (!jparams.icc.empty()) {
+    jpegli_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+  }
+  jpegli_read_header(cinfo, /*require_image=*/TRUE);
+  if (jparams.add_marker) {
+    EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+    EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+  }
+  if (!jparams.icc.empty()) {
+    uint8_t* icc_data = nullptr;
+    unsigned int icc_len;
+    JXL_CHECK(jpegli_read_icc_profile(cinfo, &icc_data, &icc_len));
+    JXL_CHECK(icc_data);
+    EXPECT_EQ(0, memcmp(jparams.icc.data(), icc_data, icc_len));
+    free(icc_data);
+  }
+  // Check that jpegli_calc_output_dimensions can be called multiple times
+  // even with different parameters.
+  if (!cinfo->raw_data_out) {
+    cinfo->scale_num = 1;
+    cinfo->scale_denom = 2;
+  }
+  jpegli_calc_output_dimensions(cinfo);
+  SetDecompressParams(dparams, cinfo);
+  jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness);
+  VerifyHeader(jparams, cinfo);
+  jpegli_calc_output_dimensions(cinfo);
+  EXPECT_LE(expected_output.xsize, cinfo->output_width);
+  if (!dparams.crop_output) {
+    EXPECT_EQ(expected_output.xsize, cinfo->output_width);
+  }
+  if (dparams.output_mode == COEFFICIENTS) {
+    jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+    JXL_CHECK(coef_arrays != nullptr);
+    CopyCoefficients(cinfo, coef_arrays, output);
+  } else {
+    jpegli_start_decompress(cinfo);
+    VerifyScanHeader(jparams, cinfo);
+    ReadOutputImage(dparams, cinfo, output);
+  }
+  jpegli_finish_decompress(cinfo);
+}
+
+void TestAPIBuffered(const CompressParams& jparams,
+                     const DecompressParams& dparams, j_decompress_ptr cinfo,
+                     std::vector<TestImage>* output_progression) {
+  EXPECT_EQ(JPEG_REACHED_SOS,
+            jpegli_read_header(cinfo, /*require_image=*/TRUE));
+  cinfo->buffered_image = TRUE;
+  SetDecompressParams(dparams, cinfo);
+  jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness);
+  VerifyHeader(jparams, cinfo);
+  EXPECT_TRUE(jpegli_start_decompress(cinfo));
+  // start decompress should not read the whole input in buffered image mode
+  EXPECT_FALSE(jpegli_input_complete(cinfo));
+  bool has_multiple_scans = jpegli_has_multiple_scans(cinfo);
+  EXPECT_EQ(0, cinfo->output_scan_number);
+  int sos_marker_cnt = 1;  // read_header reads the first SOS marker
+  while (!jpegli_input_complete(cinfo)) {
+    EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+    if (dparams.skip_scans && (cinfo->input_scan_number % 2) != 1) {
+      int result = JPEG_SUSPENDED;
+      while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+        result = jpegli_consume_input(cinfo);
+      }
+      if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+      continue;
+    }
+    SetScanDecompressParams(dparams, cinfo, cinfo->input_scan_number);
+    EXPECT_TRUE(jpegli_start_output(cinfo, cinfo->input_scan_number));
+    // start output sets output_scan_number, but does not change
+    // input_scan_number
+    EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+    EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+    VerifyScanHeader(jparams, cinfo);
+    TestImage output;
+    ReadOutputImage(dparams, cinfo, &output);
+    output_progression->emplace_back(std::move(output));
+    // read scanlines/read raw data does not change input/output scan number
+    EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+    EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+    EXPECT_TRUE(jpegli_finish_output(cinfo));
+    ++sos_marker_cnt;  // finish output reads the next SOS marker or EOI
+    if (dparams.output_mode == COEFFICIENTS) {
+      jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+      JXL_CHECK(coef_arrays != nullptr);
+      CopyCoefficients(cinfo, coef_arrays, &output_progression->back());
+    }
+  }
+  jpegli_finish_decompress(cinfo);
+  if (dparams.size_factor == 1.0f) {
+    EXPECT_EQ(has_multiple_scans, cinfo->input_scan_number > 1);
+  }
+}
+
+TEST(DecodeAPITest, ReuseCinfo) {
+  TestImage input, output, expected;
+  std::vector<TestImage> output_progression, expected_output_progression;
+  CompressParams jparams;
+  DecompressParams dparams;
+  std::vector<uint8_t> compressed;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    input.xsize = 129;
+    input.ysize = 73;
+    GeneratePixels(&input);
+    for (int h_samp : {2, 1}) {
+      for (int v_samp : {2, 1}) {
+        for (int progr : {0, 2}) {
+          jparams.h_sampling = {h_samp, 1, 1};
+          jparams.v_sampling = {v_samp, 1, 1};
+          jparams.progressive_mode = progr;
+          printf(
+              "Generating input with %dx%d chroma subsampling "
+              "progressive level %d\n",
+              h_samp, v_samp, progr);
+          JXL_CHECK(EncodeWithJpegli(input, jparams, &compressed));
+          for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+            for (bool crop : {true, false}) {
+              if (crop && output_mode != PIXELS) continue;
+              for (int scale_num : {1, 2, 3, 4, 7, 8, 13, 16}) {
+                if (scale_num != 8 && output_mode != PIXELS) continue;
+                int scale_denom = 8;
+                while (scale_num % 2 == 0 && scale_denom % 2 == 0) {
+                  scale_num /= 2;
+                  scale_denom /= 2;
+                }
+                printf("Decoding with output mode %d output scaling %d/%d %s\n",
+                       output_mode, scale_num, scale_denom,
+                       crop ? "with cropped output" : "");
+                dparams.output_mode = output_mode;
+                dparams.scale_num = scale_num;
+                dparams.scale_denom = scale_denom;
+                expected.Clear();
+                DecodeWithLibjpeg(jparams, dparams, compressed, &expected);
+                output.Clear();
+                cinfo.buffered_image = false;
+                cinfo.raw_data_out = false;
+                cinfo.scale_num = cinfo.scale_denom = 1;
+                SourceManager src(compressed.data(), compressed.size(),
+                                  1u << 12);
+                cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+                jpegli_read_header(&cinfo, /*require_image=*/TRUE);
+                jpegli_abort_decompress(&cinfo);
+                src.Reset();
+                TestAPINonBuffered(jparams, dparams, expected, &cinfo, &output);
+                float max_rms = output_mode == COEFFICIENTS ? 0.0f : 1.0f;
+                if (scale_num == 1 && scale_denom == 8 && h_samp != v_samp) {
+                  max_rms = 5.0f;  // libjpeg does not do fancy upsampling
+                }
+                VerifyOutputImage(expected, output, max_rms);
+                printf("Decoding in buffered image mode\n");
+                expected_output_progression.clear();
+                DecodeAllScansWithLibjpeg(jparams, dparams, compressed,
+                                          &expected_output_progression);
+                output_progression.clear();
+                src.Reset();
+                TestAPIBuffered(jparams, dparams, &cinfo, &output_progression);
+                JXL_CHECK(output_progression.size() ==
+                          expected_output_progression.size());
+                for (size_t i = 0; i < output_progression.size(); ++i) {
+                  const TestImage& output = output_progression[i];
+                  const TestImage& expected = expected_output_progression[i];
+                  VerifyOutputImage(expected, output, max_rms);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+  std::vector<TestConfig> all_configs;
+  for (int samp : {1, 2}) {
+    for (int progr : {0, 2}) {
+      TestConfig config;
+      config.input.xsize = 257 + samp * 37;
+      config.input.ysize = 265 + (progr / 2) * 17;
+      config.jparams.h_sampling = {samp, 1, 1};
+      config.jparams.v_sampling = {samp, 1, 1};
+      config.jparams.progressive_mode = progr;
+      GeneratePixels(&config.input);
+      all_configs.push_back(config);
+    }
+  }
+  return all_configs;
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameMemSource) {
+  std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+      for (const TestConfig& config : all_configs) {
+        EncodeWithJpegli(config.input, config.jparams, &cinfo);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_compress(&cinfo);
+  }
+  std::vector<TestImage> all_outputs(all_configs.size());
+  {
+    jpeg_decompress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_decompress(&cinfo);
+      jpegli_mem_src(&cinfo, buffer, buffer_size);
+      for (size_t i = 0; i < all_configs.size(); ++i) {
+        TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+                           all_configs[i].input, &cinfo, &all_outputs[i]);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_decompress(&cinfo);
+  }
+  for (size_t i = 0; i < all_configs.size(); ++i) {
+    VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+  }
+  if (buffer) free(buffer);
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameStdSource) {
+  std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+  FILE* tmpf = tmpfile();
+  JXL_CHECK(tmpf);
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_stdio_dest(&cinfo, tmpf);
+      for (const TestConfig& config : all_configs) {
+        EncodeWithJpegli(config.input, config.jparams, &cinfo);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_compress(&cinfo);
+  }
+  rewind(tmpf);
+  std::vector<TestImage> all_outputs(all_configs.size());
+  {
+    jpeg_decompress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_decompress(&cinfo);
+      jpegli_stdio_src(&cinfo, tmpf);
+      for (size_t i = 0; i < all_configs.size(); ++i) {
+        TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+                           all_configs[i].input, &cinfo, &all_outputs[i]);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_decompress(&cinfo);
+  }
+  for (size_t i = 0; i < all_configs.size(); ++i) {
+    VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+  }
+  fclose(tmpf);
+}
+
+TEST(DecodeAPITest, AbbreviatedStreams) {
+  uint8_t* table_stream = nullptr;
+  unsigned long table_stream_size = 0;
+  uint8_t* data_stream = nullptr;
+  unsigned long data_stream_size = 0;
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+      cinfo.input_components = 3;
+      cinfo.in_color_space = JCS_RGB;
+      jpegli_set_defaults(&cinfo);
+      jpegli_write_tables(&cinfo);
+      jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+      cinfo.image_width = 1;
+      cinfo.image_height = 1;
+      cinfo.optimize_coding = FALSE;
+      jpegli_set_progressive_level(&cinfo, 0);
+      jpegli_start_compress(&cinfo, FALSE);
+      JSAMPLE image[3] = {0};
+      JSAMPROW row[] = {image};
+      jpegli_write_scanlines(&cinfo, row, 1);
+      jpegli_finish_compress(&cinfo);
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    EXPECT_LT(data_stream_size, 50);
+    jpegli_destroy_compress(&cinfo);
+  }
+  {
+    jpeg_decompress_struct cinfo = {};
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_decompress(&cinfo);
+      jpegli_mem_src(&cinfo, table_stream, table_stream_size);
+      jpegli_read_header(&cinfo, FALSE);
+      jpegli_mem_src(&cinfo, data_stream, data_stream_size);
+      jpegli_read_header(&cinfo, TRUE);
+      EXPECT_EQ(1, cinfo.image_width);
+      EXPECT_EQ(1, cinfo.image_height);
+      EXPECT_EQ(3, cinfo.num_components);
+      jpegli_start_decompress(&cinfo);
+      JSAMPLE image[3] = {0};
+      JSAMPROW row[] = {image};
+      jpegli_read_scanlines(&cinfo, row, 1);
+      EXPECT_EQ(0, image[0]);
+      EXPECT_EQ(0, image[1]);
+      EXPECT_EQ(0, image[2]);
+      jpegli_finish_decompress(&cinfo);
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_decompress(&cinfo);
+  }
+  if (table_stream) free(table_stream);
+  if (data_stream) free(data_stream);
+}
+
+class DecodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(DecodeAPITestParam, TestAPI) {
+  TestConfig config = GetParam();
+  const DecompressParams& dparams = config.dparams;
+  if (dparams.skip_scans) return;
+  const std::vector<uint8_t> compressed = GetTestJpegData(config);
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+  TestImage output1;
+  DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+    TestAPINonBuffered(config.jparams, dparams, output1, &cinfo, &output0);
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  if (config.compare_to_orig) {
+    double rms0 = DistanceRms(config.input, output0);
+    double rms1 = DistanceRms(config.input, output1);
+    printf("rms: %f  vs  %f\n", rms0, rms1);
+    EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+  } else {
+    VerifyOutputImage(output0, output1, config.max_rms_dist, config.max_diff);
+  }
+}
+
+class DecodeAPITestParamBuffered : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(DecodeAPITestParamBuffered, TestAPI) {
+  TestConfig config = GetParam();
+  const DecompressParams& dparams = config.dparams;
+  const std::vector<uint8_t> compressed = GetTestJpegData(config);
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+  std::vector<TestImage> output_progression1;
+  DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+                            &output_progression1);
+
+  std::vector<TestImage> output_progression0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+    TestAPIBuffered(config.jparams, dparams, &cinfo, &output_progression0);
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  ASSERT_EQ(output_progression0.size(), output_progression1.size());
+  for (size_t i = 0; i < output_progression0.size(); ++i) {
+    const TestImage& output = output_progression0[i];
+    const TestImage& expected = output_progression1[i];
+    if (config.compare_to_orig) {
+      double rms0 = DistanceRms(config.input, output);
+      double rms1 = DistanceRms(config.input, expected);
+      printf("rms: %f  vs  %f\n", rms0, rms1);
+      EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+    } else {
+      VerifyOutputImage(expected, output, config.max_rms_dist, config.max_diff);
+    }
+  }
+}
+
+std::vector<TestConfig> GenerateTests(bool buffered) {
+  std::vector<TestConfig> all_tests;
+  {
+    std::vector<std::pair<std::string, std::string>> testfiles({
+        {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+        {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+        {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+    });
+    for (size_t i = 0; i < (buffered ? 1u : testfiles.size()); ++i) {
+      TestConfig config;
+      config.fn = testfiles[i].first;
+      config.fn_desc = testfiles[i].second;
+      for (size_t chunk_size : {0, 1, 64, 65536}) {
+        config.dparams.chunk_size = chunk_size;
+        for (size_t max_output_lines : {0, 1, 8, 16}) {
+          config.dparams.max_output_lines = max_output_lines;
+          config.dparams.output_mode = PIXELS;
+          all_tests.push_back(config);
+        }
+        {
+          config.dparams.max_output_lines = 16;
+          config.dparams.output_mode = RAW_DATA;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+
+  {
+    std::vector<std::pair<std::string, std::string>> testfiles({
+        {"jxl/flower/flower_small.q85_444_non_interleaved.jpg",
+         "Q85YUV444NonInterleaved"},
+        {"jxl/flower/flower_small.q85_420_non_interleaved.jpg",
+         "Q85YUV420NonInterleaved"},
+        {"jxl/flower/flower_small.q85_444_partially_interleaved.jpg",
+         "Q85YUV444PartiallyInterleaved"},
+        {"jxl/flower/flower_small.q85_420_partially_interleaved.jpg",
+         "Q85YUV420PartiallyInterleaved"},
+        {"jxl/flower/flower.png.im_q85_422.jpg", "Q85YUV422"},
+        {"jxl/flower/flower.png.im_q85_440.jpg", "Q85YUV440"},
+        {"jxl/flower/flower.png.im_q85_444_1x2.jpg", "Q85YUV444_1x2"},
+        {"jxl/flower/flower.png.im_q85_asymmetric.jpg", "Q85Asymmetric"},
+        {"jxl/flower/flower.png.im_q85_gray.jpg", "Q85Gray"},
+        {"jxl/flower/flower.png.im_q85_luma_subsample.jpg", "Q85LumaSubsample"},
+        {"jxl/flower/flower.png.im_q85_rgb.jpg", "Q85RGB"},
+        {"jxl/flower/flower.png.im_q85_rgb_subsample_blue.jpg",
+         "Q85RGBSubsampleBlue"},
+        {"jxl/flower/flower_small.cmyk.jpg", "CMYK"},
+    });
+    for (size_t i = 0; i < (buffered ? 4u : testfiles.size()); ++i) {
+      for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+        TestConfig config;
+        config.fn = testfiles[i].first;
+        config.fn_desc = testfiles[i].second;
+        config.dparams.output_mode = output_mode;
+        all_tests.push_back(config);
+      }
+    }
+  }
+
+  // Tests for common chroma subsampling and output modes.
+  for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+    for (int h_samp : {1, 2}) {
+      for (int v_samp : {1, 2}) {
+        for (bool fancy : {true, false}) {
+          if (!fancy && (output_mode != PIXELS || h_samp * v_samp == 1)) {
+            continue;
+          }
+          TestConfig config;
+          config.dparams.output_mode = output_mode;
+          config.dparams.do_fancy_upsampling = fancy;
+          config.jparams.progressive_mode = 2;
+          config.jparams.h_sampling = {h_samp, 1, 1};
+          config.jparams.v_sampling = {v_samp, 1, 1};
+          if (output_mode == COEFFICIENTS) {
+            config.max_rms_dist = 0.0f;
+          }
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+
+  // Tests for partial input.
+  for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+    for (int progr : {0, 1, 3}) {
+      for (int samp : {1, 2}) {
+        for (bool skip_scans : {false, true}) {
+          if (skip_scans && (progr != 1 || size_factor < 0.5f)) continue;
+          for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+            TestConfig config;
+            config.input.xsize = 517;
+            config.input.ysize = 523;
+            config.jparams.h_sampling = {samp, 1, 1};
+            config.jparams.v_sampling = {samp, 1, 1};
+            config.jparams.progressive_mode = progr;
+            config.dparams.size_factor = size_factor;
+            config.dparams.output_mode = output_mode;
+            config.dparams.skip_scans = skip_scans;
+            // The last partially available block can behave differently.
+            // TODO(szabadka) Figure out if we can make the behaviour more
+            // similar.
+            config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+            config.max_diff = 255.0f;
+            all_tests.push_back(config);
+          }
+        }
+      }
+    }
+  }
+
+  // Tests for block smoothing.
+  for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+    for (int samp : {1, 2}) {
+      for (bool skip_scans : {false, true}) {
+        if (skip_scans && size_factor < 0.3f) continue;
+        TestConfig config;
+        config.input.xsize = 517;
+        config.input.ysize = 523;
+        config.jparams.h_sampling = {samp, 1, 1};
+        config.jparams.v_sampling = {samp, 1, 1};
+        config.jparams.progressive_mode = 2;
+        config.dparams.size_factor = size_factor;
+        config.dparams.do_block_smoothing = true;
+        config.dparams.skip_scans = skip_scans;
+        // libjpeg does smoothing for incomplete scans differently at
+        // the border between current and previous scans.
+        config.max_rms_dist = 8.0f;
+        config.max_diff = 255.0f;
+        all_tests.push_back(config);
+      }
+    }
+  }
+
+  // Test for switching output color quantization modes between scans.
+  if (buffered) {
+    TestConfig config;
+    config.jparams.progressive_mode = 2;
+    config.dparams.quantize_colors = true;
+    config.dparams.scan_params = {
+        {3, JDITHER_NONE, CQUANT_1PASS},  {4, JDITHER_ORDERED, CQUANT_1PASS},
+        {5, JDITHER_FS, CQUANT_1PASS},    {6, JDITHER_NONE, CQUANT_EXTERNAL},
+        {8, JDITHER_NONE, CQUANT_REUSE},  {9, JDITHER_NONE, CQUANT_EXTERNAL},
+        {10, JDITHER_NONE, CQUANT_2PASS}, {11, JDITHER_NONE, CQUANT_REUSE},
+        {12, JDITHER_NONE, CQUANT_2PASS}, {13, JDITHER_FS, CQUANT_2PASS},
+    };
+    config.compare_to_orig = true;
+    config.max_tolerance_factor = 1.04f;
+    all_tests.push_back(config);
+  }
+
+  if (buffered) {
+    return all_tests;
+  }
+
+  // Tests for output color quantization.
+  for (int num_colors : {8, 64, 256}) {
+    for (ColorQuantMode mode : {CQUANT_1PASS, CQUANT_EXTERNAL, CQUANT_2PASS}) {
+      if (mode == CQUANT_EXTERNAL && num_colors != 256) continue;
+      for (J_DITHER_MODE dither : {JDITHER_NONE, JDITHER_ORDERED, JDITHER_FS}) {
+        if (mode == CQUANT_EXTERNAL && dither != JDITHER_NONE) continue;
+        if (mode != CQUANT_1PASS && dither == JDITHER_ORDERED) continue;
+        for (bool crop : {false, true}) {
+          for (bool scale : {false, true}) {
+            for (bool samp : {false, true}) {
+              if ((num_colors != 256) && (crop || scale || samp)) {
+                continue;
+              }
+              if (mode == CQUANT_2PASS && crop) continue;
+              TestConfig config;
+              config.input.xsize = 1024;
+              config.input.ysize = 768;
+              config.dparams.quantize_colors = true;
+              config.dparams.desired_number_of_colors = num_colors;
+              config.dparams.scan_params = {{kLastScan, dither, mode}};
+              config.dparams.crop_output = crop;
+              if (scale) {
+                config.dparams.scale_num = 7;
+                config.dparams.scale_denom = 8;
+              }
+              if (samp) {
+                config.jparams.h_sampling = {2, 1, 1};
+                config.jparams.v_sampling = {2, 1, 1};
+              }
+              if (!scale && !crop) {
+                config.compare_to_orig = true;
+                if (dither != JDITHER_NONE) {
+                  config.max_tolerance_factor = 1.05f;
+                }
+                if (mode == CQUANT_2PASS &&
+                    (num_colors == 8 || dither == JDITHER_FS)) {
+                  // TODO(szabadka) Lower this bound.
+                  config.max_tolerance_factor = 1.5f;
+                }
+              } else {
+                // We only test for buffer overflows, etc.
+                config.max_rms_dist = 100.0f;
+                config.max_diff = 255.0f;
+              }
+              all_tests.push_back(config);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Tests for output formats.
+  for (JpegliDataType type :
+       {JPEGLI_TYPE_UINT8, JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+    for (JpegliEndianness endianness :
+         {JPEGLI_NATIVE_ENDIAN, JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN}) {
+      if (type == JPEGLI_TYPE_UINT8 && endianness != JPEGLI_NATIVE_ENDIAN) {
+        continue;
+      }
+      for (int channels = 1; channels <= 4; ++channels) {
+        TestConfig config;
+        config.dparams.data_type = type;
+        config.dparams.endianness = endianness;
+        config.input.color_space = JCS_UNKNOWN;
+        config.input.components = channels;
+        config.dparams.set_out_color_space = true;
+        config.dparams.out_color_space = JCS_UNKNOWN;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  // Test for output cropping.
+  {
+    TestConfig config;
+    config.dparams.crop_output = true;
+    all_tests.push_back(config);
+  }
+  // Tests for color transforms.
+  for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_GRAYSCALE}) {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.input.color_space = JCS_GRAYSCALE;
+    config.dparams.set_out_color_space = true;
+    config.dparams.out_color_space = out_color_space;
+    all_tests.push_back(config);
+  }
+  for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+    for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+      if (jpeg_color_space == JCS_RGB && out_color_space == JCS_YCbCr) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 256;
+      config.jparams.set_jpeg_colorspace = true;
+      config.jparams.jpeg_color_space = jpeg_color_space;
+      config.dparams.set_out_color_space = true;
+      config.dparams.out_color_space = out_color_space;
+      all_tests.push_back(config);
+    }
+  }
+  for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+    for (J_COLOR_SPACE out_color_space : {JCS_CMYK, JCS_YCCK}) {
+      if (jpeg_color_space == JCS_CMYK && out_color_space == JCS_YCCK) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 256;
+      config.input.color_space = JCS_CMYK;
+      config.jparams.set_jpeg_colorspace = true;
+      config.jparams.jpeg_color_space = jpeg_color_space;
+      config.dparams.set_out_color_space = true;
+      config.dparams.out_color_space = out_color_space;
+      all_tests.push_back(config);
+    }
+  }
+  // Tests for progressive levels.
+  for (int p = 0; p < 3 + NumTestScanScripts(); ++p) {
+    TestConfig config;
+    config.jparams.progressive_mode = p;
+    all_tests.push_back(config);
+  }
+  // Tests for RST markers.
+  for (size_t r : {1, 17, 1024}) {
+    for (size_t chunk_size : {1, 65536}) {
+      for (int progr : {0, 2}) {
+        TestConfig config;
+        config.dparams.chunk_size = chunk_size;
+        config.jparams.progressive_mode = progr;
+        config.jparams.restart_interval = r;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  for (size_t rr : {1, 3, 8, 100}) {
+    TestConfig config;
+    config.jparams.restart_in_rows = rr;
+    all_tests.push_back(config);
+  }
+  // Tests for custom quantization tables.
+  for (int type : {0, 1, 10, 100, 10000}) {
+    for (int scale : {1, 50, 100, 200, 500}) {
+      for (bool add_raw : {false, true}) {
+        for (bool baseline : {true, false}) {
+          if (!baseline && (add_raw || type * scale < 25500)) continue;
+          TestConfig config;
+          config.input.xsize = 64;
+          config.input.ysize = 64;
+          CustomQuantTable table;
+          table.table_type = type;
+          table.scale_factor = scale;
+          table.force_baseline = baseline;
+          table.add_raw = add_raw;
+          table.Generate();
+          config.jparams.quant_tables.push_back(table);
+          config.jparams.quant_indexes = {0, 0, 0};
+          config.compare_to_orig = true;
+          config.max_tolerance_factor = 1.02;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    if (qidx == 3) continue;
+    TestConfig config;
+    config.input.xsize = 256;
+    config.input.ysize = 256;
+    config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                    (qidx >> 0) & 1};
+    all_tests.push_back(config);
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+      if (qidx == 0 && slot_idx == 0) continue;
+      TestConfig config;
+      config.input.xsize = 256;
+      config.input.ysize = 256;
+      config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                      (qidx >> 0) & 1};
+      CustomQuantTable table;
+      table.slot_idx = slot_idx;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+      all_tests.push_back(config);
+    }
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    for (bool xyb : {false, true}) {
+      TestConfig config;
+      config.input.xsize = 256;
+      config.input.ysize = 256;
+      config.jparams.xyb_mode = xyb;
+      config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                      (qidx >> 0) & 1};
+      {
+        CustomQuantTable table;
+        table.slot_idx = 0;
+        table.Generate();
+        config.jparams.quant_tables.push_back(table);
+      }
+      {
+        CustomQuantTable table;
+        table.slot_idx = 1;
+        table.table_type = 20;
+        table.Generate();
+        config.jparams.quant_tables.push_back(table);
+      }
+      config.compare_to_orig = true;
+      all_tests.push_back(config);
+    }
+  }
+  for (bool xyb : {false, true}) {
+    TestConfig config;
+    config.input.xsize = 256;
+    config.input.ysize = 256;
+    config.jparams.xyb_mode = xyb;
+    config.jparams.quant_indexes = {0, 1, 2};
+    {
+      CustomQuantTable table;
+      table.slot_idx = 0;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    {
+      CustomQuantTable table;
+      table.slot_idx = 1;
+      table.table_type = 20;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    {
+      CustomQuantTable table;
+      table.slot_idx = 2;
+      table.table_type = 30;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    config.compare_to_orig = true;
+    all_tests.push_back(config);
+  }
+  // Tests for fixed (and custom) prefix codes.
+  for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+    for (bool flat_dc_luma : {false, true}) {
+      TestConfig config;
+      config.jparams.set_jpeg_colorspace = true;
+      config.jparams.jpeg_color_space = jpeg_color_space;
+      config.jparams.progressive_mode = 0;
+      config.jparams.optimize_coding = 0;
+      config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+      all_tests.push_back(config);
+    }
+  }
+  for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+    for (bool flat_dc_luma : {false, true}) {
+      TestConfig config;
+      config.input.color_space = JCS_CMYK;
+      config.jparams.set_jpeg_colorspace = true;
+      config.jparams.jpeg_color_space = jpeg_color_space;
+      config.jparams.progressive_mode = 0;
+      config.jparams.optimize_coding = 0;
+      config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+      all_tests.push_back(config);
+    }
+  }
+  // Test for jpeg without DHT marker.
+  {
+    TestConfig config;
+    config.jparams.progressive_mode = 0;
+    config.jparams.optimize_coding = 0;
+    config.jparams.omit_standard_tables = true;
+    all_tests.push_back(config);
+  }
+  // Test for custom component ids.
+  {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 128;
+    config.jparams.comp_ids = {7, 17, 177};
+    all_tests.push_back(config);
+  }
+  // Tests for JFIF/Adobe markers.
+  for (int override_JFIF : {-1, 0, 1}) {
+    for (int override_Adobe : {-1, 0, 1}) {
+      if (override_JFIF == -1 && override_Adobe == -1) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 128;
+      config.jparams.override_JFIF = override_JFIF;
+      config.jparams.override_Adobe = override_Adobe;
+      all_tests.push_back(config);
+    }
+  }
+  // Tests for small images.
+  for (int xsize : {1, 7, 8, 9, 15, 16, 17}) {
+    for (int ysize : {1, 7, 8, 9, 15, 16, 17}) {
+      TestConfig config;
+      config.input.xsize = xsize;
+      config.input.ysize = ysize;
+      all_tests.push_back(config);
+    }
+  }
+  // Tests for custom marker processor.
+  for (size_t chunk_size : {0, 1, 64, 65536}) {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.dparams.chunk_size = chunk_size;
+    config.jparams.add_marker = true;
+    all_tests.push_back(config);
+  }
+  // Tests for icc profile decoding.
+  for (size_t icc_size : {728, 70000, 1000000}) {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.jparams.icc.resize(icc_size);
+    for (size_t i = 0; i < icc_size; ++i) {
+      config.jparams.icc[i] = (i * 17) & 0xff;
+    }
+    all_tests.push_back(config);
+  }
+  // Tests for unusual sampling factors.
+  for (int h0_samp : {1, 2, 3, 4}) {
+    for (int v0_samp : {1, 2, 3, 4}) {
+      for (int dxb = 0; dxb < h0_samp; ++dxb) {
+        for (int dyb = 0; dyb < v0_samp; ++dyb) {
+          for (int dx = 0; dx < 2; ++dx) {
+            for (int dy = 0; dy < 2; ++dy) {
+              TestConfig config;
+              config.input.xsize = 128 + dyb * 8 + dy;
+              config.input.ysize = 256 + dxb * 8 + dx;
+              config.jparams.progressive_mode = 2;
+              config.jparams.h_sampling = {h0_samp, 1, 1};
+              config.jparams.v_sampling = {v0_samp, 1, 1};
+              config.compare_to_orig = true;
+              all_tests.push_back(config);
+            }
+          }
+        }
+      }
+    }
+  }
+  for (int h0_samp : {1, 2, 4}) {
+    for (int v0_samp : {1, 2, 4}) {
+      for (int h2_samp : {1, 2, 4}) {
+        for (int v2_samp : {1, 2, 4}) {
+          TestConfig config;
+          config.input.xsize = 137;
+          config.input.ysize = 75;
+          config.jparams.progressive_mode = 2;
+          config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+          config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+          config.compare_to_orig = true;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (int h0_samp : {1, 3}) {
+    for (int v0_samp : {1, 3}) {
+      for (int h2_samp : {1, 3}) {
+        for (int v2_samp : {1, 3}) {
+          TestConfig config;
+          config.input.xsize = 205;
+          config.input.ysize = 99;
+          config.jparams.progressive_mode = 2;
+          config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+          config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  // Tests for output scaling.
+  for (int scale_num = 1; scale_num <= 16; ++scale_num) {
+    if (scale_num == 8) continue;
+    for (bool crop : {false, true}) {
+      for (int samp : {1, 2}) {
+        for (int progr : {0, 2}) {
+          TestConfig config;
+          config.jparams.h_sampling = {samp, 1, 1};
+          config.jparams.v_sampling = {samp, 1, 1};
+          config.jparams.progressive_mode = progr;
+          config.dparams.scale_num = scale_num;
+          config.dparams.scale_denom = 8;
+          config.dparams.crop_output = crop;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  return all_tests;
+}
+
+std::string QuantMode(ColorQuantMode mode) {
+  switch (mode) {
+    case CQUANT_1PASS:
+      return "1pass";
+    case CQUANT_EXTERNAL:
+      return "External";
+    case CQUANT_2PASS:
+      return "2pass";
+    case CQUANT_REUSE:
+      return "Reuse";
+  }
+  return "";
+}
+
+std::string DitherMode(J_DITHER_MODE mode) {
+  switch (mode) {
+    case JDITHER_NONE:
+      return "No";
+    case JDITHER_ORDERED:
+      return "Ordered";
+    case JDITHER_FS:
+      return "FS";
+  }
+  return "";
+}
+
+std::ostream& operator<<(std::ostream& os, const DecompressParams& dparams) {
+  if (dparams.chunk_size == 0) {
+    os << "CompleteInput";
+  } else {
+    os << "InputChunks" << dparams.chunk_size;
+  }
+  if (dparams.size_factor < 1.0f) {
+    os << "Partial" << static_cast<int>(dparams.size_factor * 100) << "p";
+  }
+  if (dparams.max_output_lines == 0) {
+    os << "CompleteOutput";
+  } else {
+    os << "OutputLines" << dparams.max_output_lines;
+  }
+  if (dparams.output_mode == RAW_DATA) {
+    os << "RawDataOut";
+  } else if (dparams.output_mode == COEFFICIENTS) {
+    os << "CoeffsOut";
+  }
+  os << IOMethodName(dparams.data_type, dparams.endianness);
+  if (dparams.set_out_color_space) {
+    os << "OutColor" << ColorSpaceName((J_COLOR_SPACE)dparams.out_color_space);
+  }
+  if (dparams.crop_output) {
+    os << "Crop";
+  }
+  if (dparams.do_block_smoothing) {
+    os << "BlockSmoothing";
+  }
+  if (!dparams.do_fancy_upsampling) {
+    os << "NoFancyUpsampling";
+  }
+  if (dparams.scale_num != 1 || dparams.scale_denom != 1) {
+    os << "Scale" << dparams.scale_num << "_" << dparams.scale_denom;
+  }
+  if (dparams.quantize_colors) {
+    os << "Quant" << dparams.desired_number_of_colors << "colors";
+    for (size_t i = 0; i < dparams.scan_params.size(); ++i) {
+      if (i > 0) os << "_";
+      const auto& sparam = dparams.scan_params[i];
+      os << QuantMode(sparam.color_quant_mode);
+      os << DitherMode((J_DITHER_MODE)sparam.dither_mode) << "Dither";
+    }
+  }
+  if (dparams.skip_scans) {
+    os << "SkipScans";
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  if (!c.fn.empty()) {
+    os << c.fn_desc;
+  } else {
+    os << c.input;
+  }
+  os << c.jparams;
+  os << c.dparams;
+  return os;
+}
+
+std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITest, DecodeAPITestParam,
+                                testing::ValuesIn(GenerateTests(false)),
+                                TestDescription);
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITestBuffered,
+                                DecodeAPITestParamBuffered,
+                                testing::ValuesIn(GenerateTests(true)),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h b/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h
new file mode 100644
index 0000000000..ed7baa39e9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_INTERNAL_H_
+#define LIB_JPEGLI_DECODE_INTERNAL_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/huffman.h"
+
+namespace jpegli {
+
+static constexpr int kNeedMoreInput = 100;
+static constexpr int kHandleRestart = 101;
+static constexpr int kHandleMarkerProcessor = 102;
+static constexpr int kProcessNextMarker = 103;
+static constexpr size_t kAllHuffLutSize = NUM_HUFF_TBLS * kJpegHuffmanLutSize;
+
+typedef int16_t coeff_t;
+
+// State of the decoder that has to be saved before decoding one MCU in case
+// we run out of the bitstream.
+struct MCUCodingState {
+  coeff_t last_dc_coeff[kMaxComponents];
+  int eobrun;
+  coeff_t coeffs[D_MAX_BLOCKS_IN_MCU * DCTSIZE2];
+};
+
+}  // namespace jpegli
+
+// Use this forward-declared libjpeg struct to hold all our private variables.
+// TODO(szabadka) Remove variables that have a corresponding version in cinfo.
+struct jpeg_decomp_master {
+  //
+  // Input handling state.
+  //
+  std::vector<uint8_t> input_buffer_;
+  size_t input_buffer_pos_;
+  // Number of bits after codestream_pos_ that were already processed.
+  size_t codestream_bits_ahead_;
+  bool streaming_mode_;
+
+  // Coefficient buffers
+  jvirt_barray_ptr* coef_arrays;
+  JBLOCKARRAY coeff_rows[jpegli::kMaxComponents];
+
+  //
+  // Marker data processing state.
+  //
+  bool found_soi_;
+  bool found_dri_;
+  bool found_sof_;
+  bool found_eoi_;
+  size_t icc_index_;
+  size_t icc_total_;
+  std::vector<uint8_t> icc_profile_;
+  jpegli::HuffmanTableEntry dc_huff_lut_[jpegli::kAllHuffLutSize];
+  jpegli::HuffmanTableEntry ac_huff_lut_[jpegli::kAllHuffLutSize];
+  uint8_t markers_to_save_[32];
+  jpeg_marker_parser_method app_marker_parsers[16];
+  jpeg_marker_parser_method com_marker_parser;
+  // Whether this jpeg has multiple scans (progressive or non-interleaved
+  // sequential).
+  bool is_multiscan_;
+
+  // Fields defined by SOF marker.
+  size_t iMCU_cols_;
+  int h_factor[jpegli::kMaxComponents];
+  int v_factor[jpegli::kMaxComponents];
+
+  // Initialized at strat of frame.
+  uint16_t scan_progression_[jpegli::kMaxComponents][DCTSIZE2];
+
+  //
+  // Per scan state.
+  //
+  size_t scan_mcu_row_;
+  size_t scan_mcu_col_;
+  size_t mcu_rows_per_iMCU_row_;
+  jpegli::coeff_t last_dc_coeff_[jpegli::kMaxComponents];
+  int eobrun_;
+  int restarts_to_go_;
+  int next_restart_marker_;
+
+  jpegli::MCUCodingState mcu_;
+
+  //
+  // Rendering state.
+  //
+  int output_passes_done_;
+  JpegliDataType output_data_type_ = JPEGLI_TYPE_UINT8;
+  bool swap_endianness_ = false;
+  size_t xoffset_;
+  bool need_context_rows_;
+
+  int min_scaled_dct_size;
+  int scaled_dct_size[jpegli::kMaxComponents];
+
+  size_t raw_height_[jpegli::kMaxComponents];
+  jpegli::RowBuffer<float> raw_output_[jpegli::kMaxComponents];
+  jpegli::RowBuffer<float> render_output_[jpegli::kMaxComponents];
+
+  void (*inverse_transform[jpegli::kMaxComponents])(
+      const int16_t* JXL_RESTRICT qblock, const float* JXL_RESTRICT dequant,
+      const float* JXL_RESTRICT biases, float* JXL_RESTRICT scratch_space,
+      float* JXL_RESTRICT output, size_t output_stride, size_t dctsize);
+
+  void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+
+  float* idct_scratch_;
+  float* upsample_scratch_;
+  uint8_t* output_scratch_;
+  int16_t* smoothing_scratch_;
+  float* dequant_;
+  // 1 = 1pass, 2 = 2pass, 3 = external
+  int quant_mode_;
+  int quant_pass_;
+  int num_colors_[jpegli::kMaxComponents];
+  uint8_t* colormap_lut_;
+  uint8_t* pixels_;
+  JSAMPARRAY scanlines_;
+  std::vector<std::vector<uint8_t>> candidate_lists_;
+  bool regenerate_inverse_colormap_;
+  float* dither_[jpegli::kMaxComponents];
+  float* error_row_[2 * jpegli::kMaxComponents];
+  size_t dither_size_;
+  size_t dither_mask_;
+
+  // Per channel and per frequency statistics about the number of nonzeros and
+  // the sum of coefficient absolute values, used in dequantization bias
+  // computation.
+  int* nonzeros_;
+  int* sumabs_;
+  size_t num_processed_blocks_[jpegli::kMaxComponents];
+  float* biases_;
+#define SAVED_COEFS 10
+  // This holds the coef_bits of the scan before the current scan,
+  // i.e. the bottom half when rendering incomplete scans.
+  int (*coef_bits_latch)[SAVED_COEFS];
+  int (*prev_coef_bits_latch)[SAVED_COEFS];
+  bool apply_smoothing;
+};
+
+#endif  // LIB_JPEGLI_DECODE_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc
new file mode 100644
index 0000000000..c5c5790cdf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc
@@ -0,0 +1,588 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_marker.h"
+
+#include <string.h>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jpegli {
+namespace {
+
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+
+// Macros for commonly used error conditions.
+
+#define JPEG_VERIFY_LEN(n)                                      \
+  if (pos + (n) > len) {                                        \
+    return JPEGLI_ERROR("Unexpected end of marker: pos=%" PRIuS \
+                        " need=%d len=%" PRIuS,                 \
+                        pos, static_cast<int>(n), len);         \
+  }
+
+#define JPEG_VERIFY_INPUT(var, low, high)                               \
+  if ((var) < (low) || (var) > (high)) {                                \
+    return JPEGLI_ERROR("Invalid " #var ": %d", static_cast<int>(var)); \
+  }
+
+#define JPEG_VERIFY_MARKER_END()                                  \
+  if (pos != len) {                                               \
+    return JPEGLI_ERROR("Invalid marker length: declared=%" PRIuS \
+                        " actual=%" PRIuS,                        \
+                        len, pos);                                \
+  }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+  return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+  int v = (data[*pos] << 8) + data[*pos + 1];
+  *pos += 2;
+  return v;
+}
+
+void ProcessSOF(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!m->found_soi_) {
+    JPEGLI_ERROR("Unexpected SOF marker.");
+  }
+  if (m->found_sof_) {
+    JPEGLI_ERROR("Duplicate SOF marker.");
+  }
+  m->found_sof_ = true;
+  cinfo->progressive_mode = (cinfo->unread_marker == 0xc2);
+  cinfo->arith_code = 0;
+  size_t pos = 2;
+  JPEG_VERIFY_LEN(6);
+  cinfo->data_precision = ReadUint8(data, &pos);
+  cinfo->image_height = ReadUint16(data, &pos);
+  cinfo->image_width = ReadUint16(data, &pos);
+  cinfo->num_components = ReadUint8(data, &pos);
+  JPEG_VERIFY_INPUT(cinfo->data_precision, kJpegPrecision, kJpegPrecision);
+  JPEG_VERIFY_INPUT(cinfo->image_height, 1, kMaxDimPixels);
+  JPEG_VERIFY_INPUT(cinfo->image_width, 1, kMaxDimPixels);
+  JPEG_VERIFY_INPUT(cinfo->num_components, 1, kMaxComponents);
+  JPEG_VERIFY_LEN(3 * cinfo->num_components);
+  cinfo->comp_info = jpegli::Allocate<jpeg_component_info>(
+      cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+  // Read sampling factors and quant table index for each component.
+  uint8_t ids_seen[256] = {0};
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (int i = 0; i < cinfo->num_components; ++i) {
+    jpeg_component_info* comp = &cinfo->comp_info[i];
+    comp->component_index = i;
+    const int id = ReadUint8(data, &pos);
+    if (ids_seen[id]) {  // (cf. section B.2.2, syntax of Ci)
+      JPEGLI_ERROR("Duplicate ID %d in SOF.", id);
+    }
+    ids_seen[id] = 1;
+    comp->component_id = id;
+    int factor = ReadUint8(data, &pos);
+    int h_samp_factor = factor >> 4;
+    int v_samp_factor = factor & 0xf;
+    JPEG_VERIFY_INPUT(h_samp_factor, 1, MAX_SAMP_FACTOR);
+    JPEG_VERIFY_INPUT(v_samp_factor, 1, MAX_SAMP_FACTOR);
+    comp->h_samp_factor = h_samp_factor;
+    comp->v_samp_factor = v_samp_factor;
+    cinfo->max_h_samp_factor =
+        std::max(cinfo->max_h_samp_factor, h_samp_factor);
+    cinfo->max_v_samp_factor =
+        std::max(cinfo->max_v_samp_factor, v_samp_factor);
+    int quant_tbl_idx = ReadUint8(data, &pos);
+    JPEG_VERIFY_INPUT(quant_tbl_idx, 0, NUM_QUANT_TBLS - 1);
+    comp->quant_tbl_no = quant_tbl_idx;
+    if (cinfo->quant_tbl_ptrs[quant_tbl_idx] == nullptr) {
+      JPEGLI_ERROR("Quantization table with index %u not found", quant_tbl_idx);
+    }
+    comp->quant_table = nullptr;  // will be allocated after SOS marker
+  }
+  JPEG_VERIFY_MARKER_END();
+
+  // Set the input colorspace based on the markers we have seen and set
+  // default output colorspace.
+  if (cinfo->num_components == 1) {
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+  } else if (cinfo->num_components == 3) {
+    if (cinfo->saw_JFIF_marker) {
+      cinfo->jpeg_color_space = JCS_YCbCr;
+    } else if (cinfo->saw_Adobe_marker) {
+      cinfo->jpeg_color_space =
+          cinfo->Adobe_transform == 0 ? JCS_RGB : JCS_YCbCr;
+    } else {
+      cinfo->jpeg_color_space = JCS_YCbCr;
+      if (cinfo->comp_info[0].component_id == 'R' &&  //
+          cinfo->comp_info[1].component_id == 'G' &&  //
+          cinfo->comp_info[2].component_id == 'B') {
+        cinfo->jpeg_color_space = JCS_RGB;
+      }
+    }
+    cinfo->out_color_space = JCS_RGB;
+  } else if (cinfo->num_components == 4) {
+    if (cinfo->saw_Adobe_marker) {
+      cinfo->jpeg_color_space =
+          cinfo->Adobe_transform == 0 ? JCS_CMYK : JCS_YCCK;
+    } else {
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+  }
+
+  // We have checked above that none of the sampling factors are 0, so the max
+  // sampling factors can not be 0.
+  cinfo->total_iMCU_rows =
+      DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE);
+  m->iMCU_cols_ =
+      DivCeil(cinfo->image_width, cinfo->max_h_samp_factor * DCTSIZE);
+  // Compute the block dimensions for each component.
+  for (int i = 0; i < cinfo->num_components; ++i) {
+    jpeg_component_info* comp = &cinfo->comp_info[i];
+    if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+        cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+      JPEGLI_ERROR("Non-integral subsampling ratios.");
+    }
+    m->h_factor[i] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+    m->v_factor[i] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+    comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[i]);
+    comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[i]);
+    comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+    comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+  }
+  memset(m->scan_progression_, 0, sizeof(m->scan_progression_));
+}
+
+void ProcessSOS(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!m->found_sof_) {
+    JPEGLI_ERROR("Unexpected SOS marker.");
+  }
+  size_t pos = 2;
+  JPEG_VERIFY_LEN(1);
+  cinfo->comps_in_scan = ReadUint8(data, &pos);
+  JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, cinfo->num_components);
+  JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, MAX_COMPS_IN_SCAN);
+
+  JPEG_VERIFY_LEN(2 * cinfo->comps_in_scan);
+  bool is_interleaved = (cinfo->comps_in_scan > 1);
+  uint8_t ids_seen[256] = {0};
+  cinfo->blocks_in_MCU = 0;
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    int id = ReadUint8(data, &pos);
+    if (ids_seen[id]) {  // (cf. section B.2.3, regarding CSj)
+      return JPEGLI_ERROR("Duplicate ID %d in SOS.", id);
+    }
+    ids_seen[id] = 1;
+    jpeg_component_info* comp = nullptr;
+    for (int j = 0; j < cinfo->num_components; ++j) {
+      if (cinfo->comp_info[j].component_id == id) {
+        comp = &cinfo->comp_info[j];
+        cinfo->cur_comp_info[i] = comp;
+      }
+    }
+    if (!comp) {
+      return JPEGLI_ERROR("SOS marker: Could not find component with id %d",
+                          id);
+    }
+    int c = ReadUint8(data, &pos);
+    comp->dc_tbl_no = c >> 4;
+    comp->ac_tbl_no = c & 0xf;
+    JPEG_VERIFY_INPUT(comp->dc_tbl_no, 0, 3);
+    JPEG_VERIFY_INPUT(comp->ac_tbl_no, 0, 3);
+    comp->MCU_width = is_interleaved ? comp->h_samp_factor : 1;
+    comp->MCU_height = is_interleaved ? comp->v_samp_factor : 1;
+    comp->MCU_blocks = comp->MCU_width * comp->MCU_height;
+    if (cinfo->blocks_in_MCU + comp->MCU_blocks > D_MAX_BLOCKS_IN_MCU) {
+      JPEGLI_ERROR("Too many blocks in MCU.");
+    }
+    for (int j = 0; j < comp->MCU_blocks; ++j) {
+      cinfo->MCU_membership[cinfo->blocks_in_MCU++] = i;
+    }
+  }
+  JPEG_VERIFY_LEN(3);
+  cinfo->Ss = ReadUint8(data, &pos);
+  cinfo->Se = ReadUint8(data, &pos);
+  JPEG_VERIFY_INPUT(cinfo->Ss, 0, 63);
+  JPEG_VERIFY_INPUT(cinfo->Se, cinfo->Ss, 63);
+  int c = ReadUint8(data, &pos);
+  cinfo->Ah = c >> 4;
+  cinfo->Al = c & 0xf;
+  JPEG_VERIFY_MARKER_END();
+
+  if (cinfo->input_scan_number == 0) {
+    m->is_multiscan_ = (cinfo->comps_in_scan < cinfo->num_components ||
+                        cinfo->progressive_mode);
+  }
+  if (cinfo->Ah != 0 && cinfo->Al != cinfo->Ah - 1) {
+    // section G.1.1.1.2 : Successive approximation control only improves
+    // by one bit at a time.
+    JPEGLI_ERROR("Invalid progressive parameters: Al=%d Ah=%d", cinfo->Al,
+                 cinfo->Ah);
+  }
+  if (!cinfo->progressive_mode) {
+    cinfo->Ss = 0;
+    cinfo->Se = 63;
+    cinfo->Ah = 0;
+    cinfo->Al = 0;
+  }
+  const uint16_t scan_bitmask =
+      cinfo->Ah == 0 ? (0xffff << cinfo->Al) : (1u << cinfo->Al);
+  const uint16_t refinement_bitmask = (1 << cinfo->Al) - 1;
+  if (!cinfo->coef_bits) {
+    cinfo->coef_bits =
+        Allocate<int[DCTSIZE2]>(cinfo, cinfo->num_components * 2, JPOOL_IMAGE);
+    m->coef_bits_latch =
+        Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+    m->prev_coef_bits_latch =
+        Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      for (int i = 0; i < DCTSIZE2; ++i) {
+        cinfo->coef_bits[c][i] = -1;
+        if (i < SAVED_COEFS) {
+          m->coef_bits_latch[c][i] = -1;
+        }
+      }
+    }
+  }
+
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    int comp_idx = cinfo->cur_comp_info[i]->component_index;
+    for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+      if (m->scan_progression_[comp_idx][k] & scan_bitmask) {
+        return JPEGLI_ERROR(
+            "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+            comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+      }
+      if (m->scan_progression_[comp_idx][k] & refinement_bitmask) {
+        return JPEGLI_ERROR(
+            "Invalid scan order, a more refined scan was already done: "
+            "component=%d k=%d prev_mask=%u cur_mask=%u",
+            comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+      }
+      m->scan_progression_[comp_idx][k] |= scan_bitmask;
+    }
+  }
+  if (cinfo->Al > 10) {
+    return JPEGLI_ERROR("Scan parameter Al=%d is not supported.", cinfo->Al);
+  }
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and builds the Huffman
+// decoding table in either dc_huff_lut_ or ac_huff_lut_, depending on the type
+// and solt_id of Huffman code being read.
+void ProcessDHT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  size_t pos = 2;
+  if (pos == len) {
+    return JPEGLI_ERROR("DHT marker: no Huffman table found");
+  }
+  while (pos < len) {
+    JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+    // The index of the Huffman code in the current set of Huffman codes. For AC
+    // component Huffman codes, 0x10 is added to the index.
+    int slot_id = ReadUint8(data, &pos);
+    int huffman_index = slot_id;
+    int is_ac_table = (slot_id & 0x10) != 0;
+    JHUFF_TBL** table;
+    if (is_ac_table) {
+      huffman_index -= 0x10;
+      JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+      table = &cinfo->ac_huff_tbl_ptrs[huffman_index];
+    } else {
+      JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+      table = &cinfo->dc_huff_tbl_ptrs[huffman_index];
+    }
+    if (*table == nullptr) {
+      *table = jpegli_alloc_huff_table(reinterpret_cast<j_common_ptr>(cinfo));
+    }
+    int total_count = 0;
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      int count = ReadUint8(data, &pos);
+      (*table)->bits[i] = count;
+      total_count += count;
+    }
+    if (is_ac_table) {
+      JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize);
+    } else {
+      // Allow symbols up to 15 here, we check later whether any invalid symbols
+      // are actually decoded.
+      // TODO(szabadka) Make sure decoder works (does not crash) with up to
+      // 15-nbits DC symbols and then increase kJpegDCAlphabetSize.
+      JPEG_VERIFY_INPUT(total_count, 0, 16);
+    }
+    JPEG_VERIFY_LEN(total_count);
+    for (int i = 0; i < total_count; ++i) {
+      int value = ReadUint8(data, &pos);
+      if (!is_ac_table) {
+        JPEG_VERIFY_INPUT(value, 0, 15);
+      }
+      (*table)->huffval[i] = value;
+    }
+    for (int i = total_count; i < kJpegHuffmanAlphabetSize; ++i) {
+      (*table)->huffval[i] = 0;
+    }
+  }
+  JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDQT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (m->found_sof_) {
+    JPEGLI_ERROR("Updating quant tables between scans is not supported.");
+  }
+  size_t pos = 2;
+  if (pos == len) {
+    return JPEGLI_ERROR("DQT marker: no quantization table found");
+  }
+  while (pos < len) {
+    JPEG_VERIFY_LEN(1);
+    int quant_table_index = ReadUint8(data, &pos);
+    int precision = quant_table_index >> 4;
+    JPEG_VERIFY_INPUT(precision, 0, 1);
+    quant_table_index &= 0xf;
+    JPEG_VERIFY_INPUT(quant_table_index, 0, NUM_QUANT_TBLS - 1);
+    JPEG_VERIFY_LEN((precision + 1) * DCTSIZE2);
+
+    if (cinfo->quant_tbl_ptrs[quant_table_index] == nullptr) {
+      cinfo->quant_tbl_ptrs[quant_table_index] =
+          jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+    }
+    JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_table_index];
+
+    for (size_t i = 0; i < DCTSIZE2; ++i) {
+      int quant_val =
+          precision ? ReadUint16(data, &pos) : ReadUint8(data, &pos);
+      JPEG_VERIFY_INPUT(quant_val, 1, 65535);
+      quant_table->quantval[kJPEGNaturalOrder[i]] = quant_val;
+    }
+  }
+  JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDNL(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  // Ignore marker.
+}
+
+void ProcessDRI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (m->found_dri_) {
+    return JPEGLI_ERROR("Duplicate DRI marker.");
+  }
+  m->found_dri_ = true;
+  size_t pos = 2;
+  JPEG_VERIFY_LEN(2);
+  cinfo->restart_interval = ReadUint16(data, &pos);
+  JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessAPP(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  const uint8_t marker = cinfo->unread_marker;
+  const uint8_t* payload = data + 2;
+  size_t payload_size = len - 2;
+  if (marker == 0xE0) {
+    if (payload_size >= 14 && memcmp(payload, "JFIF", 4) == 0) {
+      cinfo->saw_JFIF_marker = TRUE;
+      cinfo->JFIF_major_version = payload[5];
+      cinfo->JFIF_minor_version = payload[6];
+      cinfo->density_unit = payload[7];
+      cinfo->X_density = (payload[8] << 8) + payload[9];
+      cinfo->Y_density = (payload[10] << 8) + payload[11];
+    }
+  } else if (marker == 0xEE) {
+    if (payload_size >= 12 && memcmp(payload, "Adobe", 5) == 0) {
+      cinfo->saw_Adobe_marker = TRUE;
+      cinfo->Adobe_transform = payload[11];
+    }
+  } else if (marker == 0xE2) {
+    if (payload_size >= sizeof(kIccProfileTag) &&
+        memcmp(payload, kIccProfileTag, sizeof(kIccProfileTag)) == 0) {
+      payload += sizeof(kIccProfileTag);
+      payload_size -= sizeof(kIccProfileTag);
+      if (payload_size < 2) {
+        return JPEGLI_ERROR("ICC chunk is too small.");
+      }
+      uint8_t index = payload[0];
+      uint8_t total = payload[1];
+      ++m->icc_index_;
+      if (m->icc_index_ != index) {
+        return JPEGLI_ERROR("Invalid ICC chunk order.");
+      }
+      if (total == 0) {
+        return JPEGLI_ERROR("Invalid ICC chunk total.");
+      }
+      if (m->icc_total_ == 0) {
+        m->icc_total_ = total;
+      } else if (m->icc_total_ != total) {
+        return JPEGLI_ERROR("Invalid ICC chunk total.");
+      }
+      if (m->icc_index_ > m->icc_total_) {
+        return JPEGLI_ERROR("Invalid ICC chunk index.");
+      }
+      m->icc_profile_.insert(m->icc_profile_.end(), payload + 2,
+                             payload + payload_size);
+    }
+  }
+}
+
+void ProcessCOM(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  // Ignore marker.
+}
+
+void ProcessSOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (m->found_soi_) {
+    JPEGLI_ERROR("Duplicate SOI marker");
+  }
+  m->found_soi_ = true;
+}
+
+void ProcessEOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  cinfo->master->found_eoi_ = true;
+}
+
+void SaveMarker(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+  const uint8_t marker = cinfo->unread_marker;
+  const uint8_t* payload = data + 2;
+  size_t payload_size = len - 2;
+
+  // Insert new saved marker to the head of the list.
+  jpeg_saved_marker_ptr next = cinfo->marker_list;
+  cinfo->marker_list =
+      jpegli::Allocate<jpeg_marker_struct>(cinfo, 1, JPOOL_IMAGE);
+  cinfo->marker_list->next = next;
+  cinfo->marker_list->marker = marker;
+  cinfo->marker_list->original_length = payload_size;
+  cinfo->marker_list->data_length = payload_size;
+  cinfo->marker_list->data =
+      jpegli::Allocate<uint8_t>(cinfo, payload_size, JPOOL_IMAGE);
+  memcpy(cinfo->marker_list->data, payload, payload_size);
+}
+
+uint8_t ProcessNextMarker(j_decompress_ptr cinfo, const uint8_t* const data,
+                          const size_t len, size_t* pos) {
+  jpeg_decomp_master* m = cinfo->master;
+  size_t num_skipped = 0;
+  uint8_t marker = cinfo->unread_marker;
+  if (marker == 0) {
+    // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+    static const uint8_t kIsValidMarker[] = {
+        1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+    };
+    // Skip bytes between markers.
+    while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] < 0xc0 ||
+                              !kIsValidMarker[data[*pos + 1] - 0xc0])) {
+      ++(*pos);
+      ++num_skipped;
+    }
+    if (*pos + 2 > len) {
+      return kNeedMoreInput;
+    }
+    marker = data[*pos + 1];
+    if (num_skipped > 0) {
+      if (m->found_soi_) {
+        JPEGLI_WARN("Skipped %d bytes before marker 0x%02x", (int)num_skipped,
+                    marker);
+      } else {
+        JPEGLI_ERROR("Did not find SOI marker.");
+      }
+    }
+    *pos += 2;
+    cinfo->unread_marker = marker;
+  }
+  if (!m->found_soi_ && marker != 0xd8) {
+    JPEGLI_ERROR("Did not find SOI marker.");
+  }
+  if (GetMarkerProcessor(cinfo)) {
+    return kHandleMarkerProcessor;
+  }
+  const uint8_t* marker_data = &data[*pos];
+  size_t marker_len = 0;
+  if (marker != 0xd8 && marker != 0xd9) {
+    if (*pos + 2 > len) {
+      return kNeedMoreInput;
+    }
+    marker_len += (data[*pos] << 8) + data[*pos + 1];
+    if (marker_len < 2) {
+      JPEGLI_ERROR("Invalid marker length");
+    }
+    if (*pos + marker_len > len) {
+      // TODO(szabadka) Limit our memory usage by using the skip_input_data
+      // source manager callback on APP markers that are not saved.
+      return kNeedMoreInput;
+    }
+    if (marker >= 0xe0 && m->markers_to_save_[marker - 0xe0]) {
+      SaveMarker(cinfo, marker_data, marker_len);
+    }
+  }
+  if (marker == 0xc0 || marker == 0xc1 || marker == 0xc2) {
+    ProcessSOF(cinfo, marker_data, marker_len);
+  } else if (marker == 0xc4) {
+    ProcessDHT(cinfo, marker_data, marker_len);
+  } else if (marker == 0xda) {
+    ProcessSOS(cinfo, marker_data, marker_len);
+  } else if (marker == 0xdb) {
+    ProcessDQT(cinfo, marker_data, marker_len);
+  } else if (marker == 0xdc) {
+    ProcessDNL(cinfo, marker_data, marker_len);
+  } else if (marker == 0xdd) {
+    ProcessDRI(cinfo, marker_data, marker_len);
+  } else if (marker >= 0xe0 && marker <= 0xef) {
+    ProcessAPP(cinfo, marker_data, marker_len);
+  } else if (marker == 0xfe) {
+    ProcessCOM(cinfo, marker_data, marker_len);
+  } else if (marker == 0xd8) {
+    ProcessSOI(cinfo, marker_data, marker_len);
+  } else if (marker == 0xd9) {
+    ProcessEOI(cinfo, marker_data, marker_len);
+  } else {
+    JPEGLI_ERROR("Unexpected marker 0x%x", marker);
+  }
+  *pos += marker_len;
+  cinfo->unread_marker = 0;
+  if (marker == 0xda) {
+    return JPEG_REACHED_SOS;
+  } else if (marker == 0xd9) {
+    return JPEG_REACHED_EOI;
+  }
+  return kProcessNextMarker;
+}
+
+}  // namespace
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  uint8_t marker = cinfo->unread_marker;
+  jpeg_marker_parser_method callback = nullptr;
+  if (marker >= 0xe0 && marker <= 0xef) {
+    callback = m->app_marker_parsers[marker - 0xe0];
+  } else if (marker == 0xfe) {
+    callback = m->com_marker_parser;
+  }
+  return callback;
+}
+
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+                   const size_t len, size_t* pos) {
+  for (;;) {
+    int status = ProcessNextMarker(cinfo, data, len, pos);
+    if (status != kProcessNextMarker) {
+      return status;
+    }
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h
new file mode 100644
index 0000000000..fb24b3ee87
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_MARKER_H_
+#define LIB_JPEGLI_DECODE_MARKER_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until either
+// the end of the next SOS marker or the end of the input.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start or at the end of a marker
+// data segment (inter-marker data is allowed).
+// Return value is one of:
+//   * JPEG_SUSPENDED, if the current input buffer ends before the next SOS or
+//       EOI marker. Input buffer refill is handled by the caller;
+//   * JPEG_REACHED_SOS, if the next SOS marker is found;
+//   * JPEG_REACHED_EOR, if the end of the input is found.
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+                   const size_t len, size_t* pos);
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_DECODE_MARKER_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc
new file mode 100644
index 0000000000..29c0172950
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc
@@ -0,0 +1,566 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_scan.h"
+
+#include <string.h>
+
+#include <hwy/base.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+// Max 14 block per MCU (when 1 channel is subsampled)
+// Max 64 nonzero coefficients per block
+// Max 16 symbol bits plus 11 extra bits per nonzero symbol
+// Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff)
+constexpr int kMaxMCUByteSize = 6048;
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+  BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+      : data_(data), len_(len), start_pos_(pos) {
+    Reset(pos);
+  }
+
+  void Reset(size_t pos) {
+    pos_ = pos;
+    val_ = 0;
+    bits_left_ = 0;
+    next_marker_pos_ = len_;
+    FillBitWindow();
+  }
+
+  // Returns the next byte and skips the 0xff/0x00 escape sequences.
+  uint8_t GetNextByte() {
+    if (pos_ >= next_marker_pos_) {
+      ++pos_;
+      return 0;
+    }
+    uint8_t c = data_[pos_++];
+    if (c == 0xff) {
+      uint8_t escape = pos_ < len_ ? data_[pos_] : 0;
+      if (escape == 0) {
+        ++pos_;
+      } else {
+        // 0xff was followed by a non-zero byte, which means that we found the
+        // start of the next marker segment.
+        next_marker_pos_ = pos_ - 1;
+      }
+    }
+    return c;
+  }
+
+  void FillBitWindow() {
+    if (bits_left_ <= 16) {
+      while (bits_left_ <= 56) {
+        val_ <<= 8;
+        val_ |= (uint64_t)GetNextByte();
+        bits_left_ += 8;
+      }
+    }
+  }
+
+  int ReadBits(int nbits) {
+    FillBitWindow();
+    uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+    bits_left_ -= nbits;
+    return val;
+  }
+
+  // Sets *pos to the next stream position, and *bit_pos to the bit position
+  // within the next byte where parsing should continue.
+  // Returns false if the stream ended too early.
+  bool FinishStream(size_t* pos, size_t* bit_pos) {
+    *bit_pos = (8 - (bits_left_ & 7)) & 7;
+    // Give back some bytes that we did not use.
+    int unused_bytes_left = DivCeil(bits_left_, 8);
+    while (unused_bytes_left-- > 0) {
+      --pos_;
+      // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+      // sequence, and if yes, we need to give back one more byte.
+      if (((pos_ == len_ && pos_ == next_marker_pos_) ||
+           (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) &&
+          (data_[pos_ - 1] == 0xff)) {
+        --pos_;
+      }
+    }
+    if (pos_ >= next_marker_pos_) {
+      *pos = next_marker_pos_;
+      if (pos_ > next_marker_pos_ || *bit_pos > 0) {
+        // Data ran out before the scan was complete.
+        return false;
+      }
+    }
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* data_;
+  const size_t len_;
+  size_t pos_;
+  uint64_t val_;
+  int bits_left_;
+  size_t next_marker_pos_;
+  size_t start_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+  int nbits;
+  br->FillBitWindow();
+  int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+  table += val;
+  nbits = table->bits - 8;
+  if (nbits > 0) {
+    br->bits_left_ -= 8;
+    table += table->value;
+    val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+    table += val;
+  }
+  br->bits_left_ -= table->bits;
+  return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ *  SSSS | DIFF values
+ * ------+--------------------------
+ *     0 | 0
+ *     1 | –1, 1
+ *     2 | –3, –2, 2, 3
+ *     3 | –7..–4, 4..7
+ * ......|..........................
+ *    11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+  JXL_DASSERT(s >= 1);
+  int half = 1 << (s - 1);
+  if (x >= half) {
+    JXL_DASSERT(x < (1 << s));
+    return x;
+  } else {
+    return x - (1 << s) + 1;
+  }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+                    const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff,
+                    coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = ReadSymbol(dc_huff, br);
+    if (s >= kJpegDCAlphabetSize) {
+      return false;
+    }
+    int diff = 0;
+    if (s > 0) {
+      int bits = br->ReadBits(s);
+      diff = HuffExtend(bits, s);
+    }
+    int coeff = diff + *last_dc_coeff;
+    const int dc_coeff = coeff * Am;
+    coeffs[0] = dc_coeff;
+    // TODO(eustas): is there a more elegant / explicit way to check this?
+    if (dc_coeff != coeffs[0]) {
+      return false;
+    }
+    *last_dc_coeff = coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  if (*eobrun > 0) {
+    --(*eobrun);
+    return true;
+  }
+  for (int k = Ss; k <= Se; k++) {
+    int sr = ReadSymbol(ac_huff, br);
+    if (sr >= kJpegHuffmanAlphabetSize) {
+      return false;
+    }
+    int r = sr >> 4;
+    int s = sr & 15;
+    if (s > 0) {
+      k += r;
+      if (k > Se) {
+        return false;
+      }
+      if (s + Al >= kJpegDCAlphabetSize) {
+        return false;
+      }
+      int bits = br->ReadBits(s);
+      int coeff = HuffExtend(bits, s);
+      coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+    } else if (r == 15) {
+      k += 15;
+    } else {
+      *eobrun = 1 << r;
+      if (r > 0) {
+        if (!eobrun_allowed) {
+          return false;
+        }
+        *eobrun += br->ReadBits(r);
+      }
+      break;
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, BitReaderState* br, coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = br->ReadBits(1);
+    coeff_t dc_coeff = coeffs[0];
+    dc_coeff |= s * Am;
+    coeffs[0] = dc_coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int p1 = Am;
+  int m1 = -Am;
+  int k = Ss;
+  int r;
+  int s;
+  bool in_zero_run = false;
+  if (*eobrun <= 0) {
+    for (; k <= Se; k++) {
+      s = ReadSymbol(ac_huff, br);
+      if (s >= kJpegHuffmanAlphabetSize) {
+        return false;
+      }
+      r = s >> 4;
+      s &= 15;
+      if (s) {
+        if (s != 1) {
+          return false;
+        }
+        s = br->ReadBits(1) ? p1 : m1;
+        in_zero_run = false;
+      } else {
+        if (r != 15) {
+          *eobrun = 1 << r;
+          if (r > 0) {
+            if (!eobrun_allowed) {
+              return false;
+            }
+            *eobrun += br->ReadBits(r);
+          }
+          break;
+        }
+        in_zero_run = true;
+      }
+      do {
+        coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+        if (thiscoef != 0) {
+          if (br->ReadBits(1)) {
+            if ((thiscoef & p1) == 0) {
+              if (thiscoef >= 0) {
+                thiscoef += p1;
+              } else {
+                thiscoef += m1;
+              }
+            }
+          }
+          coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+        } else {
+          if (--r < 0) {
+            break;
+          }
+        }
+        k++;
+      } while (k <= Se);
+      if (s) {
+        if (k > Se) {
+          return false;
+        }
+        coeffs[kJPEGNaturalOrder[k]] = s;
+      }
+    }
+  }
+  if (in_zero_run) {
+    return false;
+  }
+  if (*eobrun > 0) {
+    for (; k <= Se; k++) {
+      coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+      if (thiscoef != 0) {
+        if (br->ReadBits(1)) {
+          if ((thiscoef & p1) == 0) {
+            if (thiscoef >= 0) {
+              thiscoef += p1;
+            } else {
+              thiscoef += m1;
+            }
+          }
+        }
+        coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+      }
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+void SaveMCUCodingState(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_));
+  m->mcu_.eobrun = m->eobrun_;
+  size_t offset = 0;
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+    int c = comp->component_index;
+    size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+    for (int iy = 0; iy < comp->MCU_height; ++iy) {
+      size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+      size_t biy = block_y % comp->v_samp_factor;
+      if (block_y >= comp->height_in_blocks) {
+        continue;
+      }
+      size_t nblocks =
+          std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+      size_t ncoeffs = nblocks * DCTSIZE2;
+      coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+      memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0]));
+      offset += ncoeffs;
+    }
+  }
+}
+
+void RestoreMCUCodingState(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_));
+  m->eobrun_ = m->mcu_.eobrun;
+  size_t offset = 0;
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+    int c = comp->component_index;
+    size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+    for (int iy = 0; iy < comp->MCU_height; ++iy) {
+      size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+      size_t biy = block_y % comp->v_samp_factor;
+      if (block_y >= comp->height_in_blocks) {
+        continue;
+      }
+      size_t nblocks =
+          std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+      size_t ncoeffs = nblocks * DCTSIZE2;
+      coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+      memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0]));
+      offset += ncoeffs;
+    }
+  }
+}
+
+bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len,
+                size_t* pos, size_t* bit_pos) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (m->eobrun_ > 0) {
+    JPEGLI_ERROR("End-of-block run too long.");
+  }
+  m->eobrun_ = -1;
+  memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+  if (*bit_pos == 0) {
+    return true;
+  }
+  if (data[*pos] == 0xff) {
+    // After last br.FinishStream we checked that there is at least 2 bytes
+    // in the buffer.
+    JXL_DASSERT(*pos + 1 < len);
+    // br.FinishStream would have detected an early marker.
+    JXL_DASSERT(data[*pos + 1] == 0);
+    *pos += 2;
+  } else {
+    *pos += 1;
+  }
+  *bit_pos = 0;
+  return true;
+}
+
+}  // namespace
+
+void PrepareForiMCURow(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+    const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+    int c = comp->component_index;
+    int by0 = cinfo->input_iMCU_row * comp->v_samp_factor;
+    int block_rows_left = comp->height_in_blocks - by0;
+    int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+    int offset = m->streaming_mode_ ? 0 : by0;
+    m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)(
+        reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+        max_block_rows, true);
+  }
+}
+
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+                const size_t len, size_t* pos, size_t* bit_pos) {
+  if (len == 0) {
+    return kNeedMoreInput;
+  }
+  jpeg_decomp_master* m = cinfo->master;
+  for (;;) {
+    // Handle the restart intervals.
+    if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) {
+      if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+        return kNeedMoreInput;
+      }
+      // Go to the next marker, warn if we had to skip any data.
+      size_t num_skipped = 0;
+      while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 ||
+                                data[*pos + 1] == 0xff)) {
+        ++(*pos);
+        ++num_skipped;
+      }
+      if (num_skipped > 0) {
+        JPEGLI_WARN("Skipped %d bytes before restart marker", (int)num_skipped);
+      }
+      if (*pos + 2 > len) {
+        return kNeedMoreInput;
+      }
+      cinfo->unread_marker = data[*pos + 1];
+      *pos += 2;
+      return kHandleRestart;
+    }
+
+    size_t start_pos = *pos;
+    BitReaderState br(data, len, start_pos);
+    if (*bit_pos > 0) {
+      br.ReadBits(*bit_pos);
+    }
+    if (start_pos + kMaxMCUByteSize > len) {
+      SaveMCUCodingState(cinfo);
+    }
+
+    // Decode one MCU.
+    HWY_ALIGN_MAX coeff_t dummy_block[DCTSIZE2];
+    bool scan_ok = true;
+    for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+      const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+      int c = comp->component_index;
+      const HuffmanTableEntry* dc_lut =
+          &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize];
+      const HuffmanTableEntry* ac_lut =
+          &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize];
+      for (int iy = 0; iy < comp->MCU_height; ++iy) {
+        size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+        int biy = block_y % comp->v_samp_factor;
+        for (int ix = 0; ix < comp->MCU_width; ++ix) {
+          size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix;
+          coeff_t* coeffs;
+          if (block_x >= comp->width_in_blocks ||
+              block_y >= comp->height_in_blocks) {
+            // Note that it is OK that dummy_block is uninitialized because
+            // it will never be used in any branches, even in the RefineDCTBlock
+            // case, because only DC scans can be interleaved and we don't use
+            // the zero-ness of the DC coeff in the DC refinement code-path.
+            coeffs = dummy_block;
+          } else {
+            coeffs = &m->coeff_rows[c][biy][block_x][0];
+          }
+          if (cinfo->Ah == 0) {
+            if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+                                &m->eobrun_, &br,
+                                &m->last_dc_coeff_[comp->component_index],
+                                coeffs)) {
+              scan_ok = false;
+            }
+          } else {
+            if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+                                &m->eobrun_, &br, coeffs)) {
+              scan_ok = false;
+            }
+          }
+        }
+      }
+    }
+    size_t new_pos;
+    size_t new_bit_pos;
+    bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos);
+    if (new_pos + 2 > len) {
+      // If reading stopped within the last two bytes, we have to request more
+      // input even if FinishStream() returned true, since the Huffman code
+      // reader could have peaked ahead some bits past the current input chunk
+      // and thus the last prefix code length could have been wrong. We can do
+      // this because a valid JPEG bit stream has two extra bytes at the end.
+      RestoreMCUCodingState(cinfo);
+      return kNeedMoreInput;
+    }
+    *pos = new_pos;
+    *bit_pos = new_bit_pos;
+    if (!stream_ok) {
+      // We hit a marker during parsing.
+      JXL_DASSERT(data[*pos] == 0xff);
+      JXL_DASSERT(data[*pos + 1] != 0);
+      RestoreMCUCodingState(cinfo);
+      JPEGLI_WARN("Incomplete scan detected.");
+      return JPEG_SCAN_COMPLETED;
+    }
+    if (!scan_ok) {
+      JPEGLI_ERROR("Failed to decode DCT block");
+    }
+    if (m->restarts_to_go_ > 0) {
+      --m->restarts_to_go_;
+    }
+    ++m->scan_mcu_col_;
+    if (m->scan_mcu_col_ == cinfo->MCUs_per_row) {
+      ++m->scan_mcu_row_;
+      m->scan_mcu_col_ = 0;
+      if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) {
+        if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+          return kNeedMoreInput;
+        }
+        break;
+      } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) {
+        // Current iMCU row is done.
+        break;
+      }
+    }
+  }
+  ++cinfo->input_iMCU_row;
+  if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) {
+    PrepareForiMCURow(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  return JPEG_SCAN_COMPLETED;
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h
new file mode 100644
index 0000000000..1d7b18fc1a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_SCAN_H_
+#define LIB_JPEGLI_DECODE_SCAN_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until the end
+// of the next iMCU row.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start of an MCU, or at the end
+// of the scan.
+// Return value is one of:
+//   * JPEG_SUSPENDED, if the input buffer ends before the end of an iMCU row;
+//   * JPEG_ROW_COMPLETED, if the next iMCU row (but not the scan) is reached;
+//   * JPEG_SCAN_COMPLETED, if the end of the scan is reached.
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+                const size_t len, size_t* pos, size_t* bit_pos);
+
+void PrepareForiMCURow(j_decompress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_DECODE_SCAN_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc
new file mode 100644
index 0000000000..9bc269f0c9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+constexpr size_t kDestBufferSize = 64 << 10;
+
+struct StdioDestinationManager {
+  jpeg_destination_mgr pub;
+  FILE* f;
+  uint8_t* buffer;
+
+  static void init_destination(j_compress_ptr cinfo) {
+    auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+    dest->pub.next_output_byte = dest->buffer;
+    dest->pub.free_in_buffer = kDestBufferSize;
+  }
+
+  static boolean empty_output_buffer(j_compress_ptr cinfo) {
+    auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+    if (fwrite(dest->buffer, 1, kDestBufferSize, dest->f) != kDestBufferSize) {
+      JPEGLI_ERROR("Failed to write to output stream.");
+    }
+    dest->pub.next_output_byte = dest->buffer;
+    dest->pub.free_in_buffer = kDestBufferSize;
+    return TRUE;
+  }
+
+  static void term_destination(j_compress_ptr cinfo) {
+    auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+    size_t bytes_left = kDestBufferSize - dest->pub.free_in_buffer;
+    if (bytes_left &&
+        fwrite(dest->buffer, 1, bytes_left, dest->f) != bytes_left) {
+      JPEGLI_ERROR("Failed to write to output stream.");
+    }
+    fflush(dest->f);
+    if (ferror(dest->f)) {
+      JPEGLI_ERROR("Failed to write to output stream.");
+    }
+  }
+};
+
+struct MemoryDestinationManager {
+  jpeg_destination_mgr pub;
+  // Output buffer supplied by the application
+  uint8_t** output;
+  unsigned long* output_size;
+  // Output buffer allocated by us.
+  uint8_t* temp_buffer;
+  // Current output buffer (either application supplied or allocated by us).
+  uint8_t* current_buffer;
+  size_t buffer_size;
+
+  static void init_destination(j_compress_ptr cinfo) {}
+
+  static boolean empty_output_buffer(j_compress_ptr cinfo) {
+    auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+    uint8_t* next_buffer =
+        reinterpret_cast<uint8_t*>(malloc(dest->buffer_size * 2));
+    memcpy(next_buffer, dest->current_buffer, dest->buffer_size);
+    if (dest->temp_buffer != nullptr) {
+      free(dest->temp_buffer);
+    }
+    dest->temp_buffer = next_buffer;
+    dest->current_buffer = next_buffer;
+    *dest->output = next_buffer;
+    *dest->output_size = dest->buffer_size;
+    dest->pub.next_output_byte = next_buffer + dest->buffer_size;
+    dest->pub.free_in_buffer = dest->buffer_size;
+    dest->buffer_size *= 2;
+    return TRUE;
+  }
+
+  static void term_destination(j_compress_ptr cinfo) {
+    auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+    *dest->output_size = dest->buffer_size - dest->pub.free_in_buffer;
+  }
+};
+
+}  // namespace jpegli
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile) {
+  if (outfile == nullptr) {
+    JPEGLI_ERROR("jpegli_stdio_dest: Invalid destination.");
+  }
+  if (cinfo->dest && cinfo->dest->init_destination !=
+                         jpegli::StdioDestinationManager::init_destination) {
+    JPEGLI_ERROR("jpegli_stdio_dest: a different dest manager was already set");
+  }
+  if (!cinfo->dest) {
+    cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(
+        jpegli::Allocate<jpegli::StdioDestinationManager>(cinfo, 1));
+  }
+  auto dest = reinterpret_cast<jpegli::StdioDestinationManager*>(cinfo->dest);
+  dest->f = outfile;
+  dest->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kDestBufferSize);
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = jpegli::kDestBufferSize;
+  dest->pub.init_destination =
+      jpegli::StdioDestinationManager::init_destination;
+  dest->pub.empty_output_buffer =
+      jpegli::StdioDestinationManager::empty_output_buffer;
+  dest->pub.term_destination =
+      jpegli::StdioDestinationManager::term_destination;
+}
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+                     unsigned long* outsize) {
+  if (outbuffer == nullptr || outsize == nullptr) {
+    JPEGLI_ERROR("jpegli_mem_dest: Invalid destination.");
+  }
+  if (cinfo->dest && cinfo->dest->init_destination !=
+                         jpegli::MemoryDestinationManager::init_destination) {
+    JPEGLI_ERROR("jpegli_mem_dest: a different dest manager was already set");
+  }
+  if (!cinfo->dest) {
+    auto dest = jpegli::Allocate<jpegli::MemoryDestinationManager>(cinfo, 1);
+    dest->temp_buffer = nullptr;
+    cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(dest);
+  }
+  auto dest = reinterpret_cast<jpegli::MemoryDestinationManager*>(cinfo->dest);
+  dest->pub.init_destination =
+      jpegli::MemoryDestinationManager::init_destination;
+  dest->pub.empty_output_buffer =
+      jpegli::MemoryDestinationManager::empty_output_buffer;
+  dest->pub.term_destination =
+      jpegli::MemoryDestinationManager::term_destination;
+  dest->output = outbuffer;
+  dest->output_size = outsize;
+  if (*outbuffer == nullptr || *outsize == 0) {
+    dest->temp_buffer =
+        reinterpret_cast<uint8_t*>(malloc(jpegli::kDestBufferSize));
+    *outbuffer = dest->temp_buffer;
+    *outsize = jpegli::kDestBufferSize;
+  }
+  dest->current_buffer = *outbuffer;
+  dest->buffer_size = *outsize;
+  dest->pub.next_output_byte = dest->current_buffer;
+  dest->pub.free_in_buffer = dest->buffer_size;
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/downsample.cc b/third-party/libjxl/libjxl/lib/jpegli/downsample.cc
new file mode 100644
index 0000000000..df2c156972
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/downsample.cc
@@ -0,0 +1,356 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/downsample.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/downsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, 8);
+constexpr D d;
+
+void DownsampleRow2x1(const float* row_in, size_t len, float* row_out) {
+  const size_t N = Lanes(d);
+  const size_t len_out = len / 2;
+  const auto mul = Set(d, 0.5f);
+  Vec<D> v0, v1;
+  for (size_t x = 0; x < len_out; x += N) {
+    LoadInterleaved2(d, row_in + 2 * x, v0, v1);
+    Store(Mul(mul, Add(v0, v1)), d, row_out + x);
+  }
+}
+
+void DownsampleRow3x1(const float* row_in, size_t len, float* row_out) {
+  const size_t N = Lanes(d);
+  const size_t len_out = len / 3;
+  const auto mul = Set(d, 1.0f / 3);
+  Vec<D> v0, v1, v2;
+  for (size_t x = 0; x < len_out; x += N) {
+    LoadInterleaved3(d, row_in + 3 * x, v0, v1, v2);
+    Store(Mul(mul, Add(Add(v0, v1), v2)), d, row_out + x);
+  }
+}
+
+void DownsampleRow4x1(const float* row_in, size_t len, float* row_out) {
+  const size_t N = Lanes(d);
+  const size_t len_out = len / 4;
+  const auto mul = Set(d, 0.25f);
+  Vec<D> v0, v1, v2, v3;
+  for (size_t x = 0; x < len_out; x += N) {
+    LoadInterleaved4(d, row_in + 4 * x, v0, v1, v2, v3);
+    Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+  }
+}
+
+void Downsample2x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow2x1(rows_in[0], len, row_out);
+}
+
+void Downsample3x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow3x1(rows_in[0], len, row_out);
+}
+
+void Downsample4x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow4x1(rows_in[0], len, row_out);
+}
+
+void Downsample1x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  const size_t N = Lanes(d);
+  const auto mul = Set(d, 0.5f);
+  float* row0 = rows_in[0];
+  float* row1 = rows_in[1];
+  for (size_t x = 0; x < len; x += N) {
+    Store(Mul(mul, Add(Load(d, row0 + x), Load(d, row1 + x))), d, row_out + x);
+  }
+}
+
+void Downsample2x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  const size_t N = Lanes(d);
+  const size_t len_out = len / 2;
+  const auto mul = Set(d, 0.25f);
+  float* row0 = rows_in[0];
+  float* row1 = rows_in[1];
+  Vec<D> v0, v1, v2, v3;
+  for (size_t x = 0; x < len_out; x += N) {
+    LoadInterleaved2(d, row0 + 2 * x, v0, v1);
+    LoadInterleaved2(d, row1 + 2 * x, v2, v3);
+    Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+  }
+}
+
+void Downsample3x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+  Downsample1x2(rows_in, len / 3, row_out);
+}
+
+void Downsample4x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+  Downsample1x2(rows_in, len / 4, row_out);
+}
+
+void Downsample1x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  const size_t N = Lanes(d);
+  const auto mul = Set(d, 1.0f / 3);
+  float* row0 = rows_in[0];
+  float* row1 = rows_in[1];
+  float* row2 = rows_in[2];
+  for (size_t x = 0; x < len; x += N) {
+    const auto in0 = Load(d, row0 + x);
+    const auto in1 = Load(d, row1 + x);
+    const auto in2 = Load(d, row2 + x);
+    Store(Mul(mul, Add(Add(in0, in1), in2)), d, row_out + x);
+  }
+}
+
+void Downsample2x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+  Downsample1x3(rows_in, len / 2, row_out);
+}
+
+void Downsample3x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+  Downsample1x3(rows_in, len / 3, row_out);
+}
+
+void Downsample4x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+  Downsample1x3(rows_in, len / 4, row_out);
+}
+
+void Downsample1x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  const size_t N = Lanes(d);
+  const auto mul = Set(d, 0.25f);
+  float* row0 = rows_in[0];
+  float* row1 = rows_in[1];
+  float* row2 = rows_in[2];
+  float* row3 = rows_in[3];
+  for (size_t x = 0; x < len; x += N) {
+    const auto in0 = Load(d, row0 + x);
+    const auto in1 = Load(d, row1 + x);
+    const auto in2 = Load(d, row2 + x);
+    const auto in3 = Load(d, row3 + x);
+    Store(Mul(mul, Add(Add(in0, in1), Add(in2, in3))), d, row_out + x);
+  }
+}
+
+void Downsample2x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+  DownsampleRow2x1(rows_in[3], len, rows_in[3]);
+  Downsample1x4(rows_in, len / 2, row_out);
+}
+
+void Downsample3x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+  DownsampleRow3x1(rows_in[3], len, rows_in[3]);
+  Downsample1x4(rows_in, len / 3, row_out);
+}
+
+void Downsample4x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                   float* row_out) {
+  DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+  DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+  DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+  DownsampleRow4x1(rows_in[3], len, rows_in[3]);
+  Downsample1x4(rows_in, len / 4, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Downsample1x2);
+HWY_EXPORT(Downsample1x3);
+HWY_EXPORT(Downsample1x4);
+HWY_EXPORT(Downsample2x1);
+HWY_EXPORT(Downsample2x2);
+HWY_EXPORT(Downsample2x3);
+HWY_EXPORT(Downsample2x4);
+HWY_EXPORT(Downsample3x1);
+HWY_EXPORT(Downsample3x2);
+HWY_EXPORT(Downsample3x3);
+HWY_EXPORT(Downsample3x4);
+HWY_EXPORT(Downsample4x1);
+HWY_EXPORT(Downsample4x2);
+HWY_EXPORT(Downsample4x3);
+HWY_EXPORT(Downsample4x4);
+
+void NullDownsample(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+                    float* row_out) {}
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  for (int c = 0; c < cinfo->num_components; c++) {
+    m->downsample_method[c] = nullptr;
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+    const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+    if (v_factor == 1) {
+      if (h_factor == 1) {
+        m->downsample_method[c] = NullDownsample;
+      } else if (h_factor == 2) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x1);
+      } else if (h_factor == 3) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x1);
+      } else if (h_factor == 4) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x1);
+      }
+    } else if (v_factor == 2) {
+      if (h_factor == 1) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+      } else if (h_factor == 2) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+      } else if (h_factor == 3) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+      } else if (h_factor == 4) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+      }
+    } else if (v_factor == 3) {
+      if (h_factor == 1) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+      } else if (h_factor == 2) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+      } else if (h_factor == 3) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+      } else if (h_factor == 4) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+      }
+    } else if (v_factor == 4) {
+      if (h_factor == 1) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x4);
+      } else if (h_factor == 2) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x4);
+      } else if (h_factor == 3) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x4);
+      } else if (h_factor == 4) {
+        m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x4);
+      }
+    }
+    if (m->downsample_method[c] == nullptr) {
+      JPEGLI_ERROR("Unsupported downsampling ratio %dx%d", h_factor, v_factor);
+    }
+  }
+}
+
+void DownsampleInputBuffer(j_compress_ptr cinfo) {
+  if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) {
+    return;
+  }
+  jpeg_comp_master* m = cinfo->master;
+  const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  const size_t y0 = m->next_iMCU_row * iMCU_height;
+  const size_t y1 = y0 + iMCU_height;
+  const size_t xsize_padded = m->xsize_blocks * DCTSIZE;
+  for (int c = 0; c < cinfo->num_components; c++) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+    const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+    if (h_factor == 1 && v_factor == 1) {
+      continue;
+    }
+    auto& input = *m->smooth_input[c];
+    auto& output = *m->raw_data[c];
+    const size_t yout0 = y0 / v_factor;
+    float* rows_in[MAX_SAMP_FACTOR];
+    for (size_t yin = y0, yout = yout0; yin < y1; yin += v_factor, ++yout) {
+      for (int iy = 0; iy < v_factor; ++iy) {
+        rows_in[iy] = input.Row(yin + iy);
+      }
+      float* row_out = output.Row(yout);
+      (*m->downsample_method[c])(rows_in, xsize_padded, row_out);
+    }
+  }
+}
+
+void ApplyInputSmoothing(j_compress_ptr cinfo) {
+  if (!cinfo->smoothing_factor) {
+    return;
+  }
+  jpeg_comp_master* m = cinfo->master;
+  const float kW1 = cinfo->smoothing_factor / 1024.0;
+  const float kW0 = 1.0f - 8.0f * kW1;
+  const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  const ssize_t y0 = m->next_iMCU_row * iMCU_height;
+  const ssize_t y1 = y0 + iMCU_height;
+  const ssize_t xsize_padded = m->xsize_blocks * DCTSIZE;
+  for (int c = 0; c < cinfo->num_components; c++) {
+    auto& input = m->input_buffer[c];
+    auto& output = *m->smooth_input[c];
+    if (m->next_iMCU_row == 0) {
+      input.CopyRow(-1, 0, 1);
+    }
+    if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+      size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+      input.CopyRow(last_row + 1, last_row, 1);
+    }
+    // TODO(szabadka) SIMDify this.
+    for (ssize_t y = y0; y < y1; ++y) {
+      const float* row_t = input.Row(y - 1);
+      const float* row_m = input.Row(y);
+      const float* row_b = input.Row(y + 1);
+      float* row_out = output.Row(y);
+      for (ssize_t x = 0; x < xsize_padded; ++x) {
+        float val_tl = row_t[x - 1];
+        float val_tm = row_t[x];
+        float val_tr = row_t[x + 1];
+        float val_ml = row_m[x - 1];
+        float val_mm = row_m[x];
+        float val_mr = row_m[x + 1];
+        float val_bl = row_b[x - 1];
+        float val_bm = row_b[x];
+        float val_br = row_b[x + 1];
+        float val1 = (val_tl + val_tm + val_tr + val_ml + val_mr + val_bl +
+                      val_bm + val_br);
+        row_out[x] = val_mm * kW0 + val1 * kW1;
+      }
+    }
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/downsample.h b/third-party/libjxl/libjxl/lib/jpegli/downsample.h
new file mode 100644
index 0000000000..3ccf069e4e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/downsample.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DOWNSAMPLE_H_
+#define LIB_JPEGLI_DOWNSAMPLE_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo);
+
+void DownsampleInputBuffer(j_compress_ptr cinfo);
+
+void ApplyInputSmoothing(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_DOWNSAMPLE_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode.cc b/third-party/libjxl/libjxl/lib/jpegli/encode.cc
new file mode 100644
index 0000000000..8a106e239a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode.cc
@@ -0,0 +1,1253 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+
+#include <cmath>
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/bitstream.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/downsample.h"
+#include "lib/jpegli/encode_finish.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/encode_streaming.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/input.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/quant.h"
+
+namespace jpegli {
+
+constexpr size_t kMaxBytesInMarker = 65533;
+
+void CheckState(j_compress_ptr cinfo, int state) {
+  if (cinfo->global_state != state) {
+    JPEGLI_ERROR("Unexpected global state %d [expected %d]",
+                 cinfo->global_state, state);
+  }
+}
+
+void CheckState(j_compress_ptr cinfo, int state1, int state2) {
+  if (cinfo->global_state != state1 && cinfo->global_state != state2) {
+    JPEGLI_ERROR("Unexpected global state %d [expected %d or %d]",
+                 cinfo->global_state, state1, state2);
+  }
+}
+
+//
+// Parameter setup
+//
+
+// Initialize cinfo fields that are not dependent on input image. This is shared
+// between jpegli_CreateCompress() and jpegli_set_defaults()
+void InitializeCompressParams(j_compress_ptr cinfo) {
+  cinfo->data_precision = 8;
+  cinfo->num_scans = 0;
+  cinfo->scan_info = nullptr;
+  cinfo->raw_data_in = FALSE;
+  cinfo->arith_code = FALSE;
+  cinfo->optimize_coding = FALSE;
+  cinfo->CCIR601_sampling = FALSE;
+  cinfo->smoothing_factor = 0;
+  cinfo->dct_method = JDCT_FLOAT;
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+  cinfo->write_JFIF_header = FALSE;
+  cinfo->JFIF_major_version = 1;
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+#if JPEG_LIB_VERSION >= 70
+  cinfo->scale_num = 1;
+  cinfo->scale_denom = 1;
+  cinfo->do_fancy_downsampling = FALSE;
+  cinfo->min_DCT_h_scaled_size = DCTSIZE;
+  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+#endif
+  cinfo->master->psnr_target = 0.0f;
+  cinfo->master->psnr_tolerance = 0.01f;
+  cinfo->master->min_distance = 0.1f;
+  cinfo->master->max_distance = 25.0f;
+}
+
+float LinearQualityToDistance(int scale_factor) {
+  scale_factor = std::min(5000, std::max(0, scale_factor));
+  int quality =
+      scale_factor < 100 ? 100 - scale_factor / 2 : 5000 / scale_factor;
+  return jpegli_quality_to_distance(quality);
+}
+
+template <typename T>
+void SetSentTableFlag(T** table_ptrs, size_t num, boolean val) {
+  for (size_t i = 0; i < num; ++i) {
+    if (table_ptrs[i]) table_ptrs[i]->sent_table = val;
+  }
+}
+
+//
+// Compressor initialization
+//
+
+struct ProgressiveScan {
+  int Ss, Se, Ah, Al;
+  bool interleaved;
+};
+
+void SetDefaultScanScript(j_compress_ptr cinfo) {
+  int level = cinfo->master->progressive_level;
+  std::vector<ProgressiveScan> progressive_mode;
+  bool interleave_dc =
+      (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1);
+  if (level == 0) {
+    progressive_mode.push_back({0, 63, 0, 0, true});
+  } else if (level == 1) {
+    progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+    progressive_mode.push_back({1, 63, 0, 1, false});
+    progressive_mode.push_back({1, 63, 1, 0, false});
+  } else {
+    progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+    progressive_mode.push_back({1, 2, 0, 0, false});
+    progressive_mode.push_back({3, 63, 0, 2, false});
+    progressive_mode.push_back({3, 63, 2, 1, false});
+    progressive_mode.push_back({3, 63, 1, 0, false});
+  }
+
+  cinfo->script_space_size = 0;
+  for (const auto& scan : progressive_mode) {
+    int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+    cinfo->script_space_size += DivCeil(cinfo->num_components, comps);
+  }
+  cinfo->script_space =
+      Allocate<jpeg_scan_info>(cinfo, cinfo->script_space_size);
+
+  jpeg_scan_info* next_scan = cinfo->script_space;
+  for (const auto& scan : progressive_mode) {
+    int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+    for (int c = 0; c < cinfo->num_components; c += comps) {
+      next_scan->Ss = scan.Ss;
+      next_scan->Se = scan.Se;
+      next_scan->Ah = scan.Ah;
+      next_scan->Al = scan.Al;
+      next_scan->comps_in_scan = std::min(comps, cinfo->num_components - c);
+      for (int j = 0; j < next_scan->comps_in_scan; ++j) {
+        next_scan->component_index[j] = c + j;
+      }
+      ++next_scan;
+    }
+  }
+  JXL_ASSERT(next_scan - cinfo->script_space == cinfo->script_space_size);
+  cinfo->scan_info = cinfo->script_space;
+  cinfo->num_scans = cinfo->script_space_size;
+}
+
+void ValidateScanScript(j_compress_ptr cinfo) {
+  // Mask of coefficient bits defined by the scan script, for each component
+  // and coefficient index.
+  uint16_t comp_mask[kMaxComponents][DCTSIZE2] = {};
+  static constexpr int kMaxRefinementBit = 10;
+
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    const jpeg_scan_info& si = cinfo->scan_info[i];
+    if (si.comps_in_scan < 1 || si.comps_in_scan > MAX_COMPS_IN_SCAN) {
+      JPEGLI_ERROR("Invalid number of components in scan %d", si.comps_in_scan);
+    }
+    int last_ci = -1;
+    for (int j = 0; j < si.comps_in_scan; ++j) {
+      int ci = si.component_index[j];
+      if (ci < 0 || ci >= cinfo->num_components) {
+        JPEGLI_ERROR("Invalid component index %d in scan", ci);
+      } else if (ci == last_ci) {
+        JPEGLI_ERROR("Duplicate component index %d in scan", ci);
+      } else if (ci < last_ci) {
+        JPEGLI_ERROR("Out of order component index %d in scan", ci);
+      }
+      last_ci = ci;
+    }
+    if (si.Ss < 0 || si.Se < si.Ss || si.Se >= DCTSIZE2) {
+      JPEGLI_ERROR("Invalid spectral range %d .. %d in scan", si.Ss, si.Se);
+    }
+    if (si.Ah < 0 || si.Al < 0 || si.Al > kMaxRefinementBit) {
+      JPEGLI_ERROR("Invalid refinement bits %d/%d", si.Ah, si.Al);
+    }
+    if (!cinfo->progressive_mode) {
+      if (si.Ss != 0 || si.Se != DCTSIZE2 - 1 || si.Ah != 0 || si.Al != 0) {
+        JPEGLI_ERROR("Invalid scan for sequential mode");
+      }
+    } else {
+      if (si.Ss == 0 && si.Se != 0) {
+        JPEGLI_ERROR("DC and AC together in progressive scan");
+      }
+    }
+    if (si.Ss != 0 && si.comps_in_scan != 1) {
+      JPEGLI_ERROR("Interleaved AC only scan.");
+    }
+    for (int j = 0; j < si.comps_in_scan; ++j) {
+      int ci = si.component_index[j];
+      if (si.Ss != 0 && comp_mask[ci][0] == 0) {
+        JPEGLI_ERROR("AC before DC in component %d of scan", ci);
+      }
+      for (int k = si.Ss; k <= si.Se; ++k) {
+        if (comp_mask[ci][k] == 0) {
+          if (si.Ah != 0) {
+            JPEGLI_ERROR("Invalid first scan refinement bit");
+          }
+          comp_mask[ci][k] = ((0xffff << si.Al) & 0xffff);
+        } else {
+          if (comp_mask[ci][k] != ((0xffff << si.Ah) & 0xffff) ||
+              si.Al != si.Ah - 1) {
+            JPEGLI_ERROR("Invalid refinement bit progression.");
+          }
+          comp_mask[ci][k] |= 1 << si.Al;
+        }
+      }
+    }
+    if (si.comps_in_scan > 1) {
+      size_t mcu_size = 0;
+      for (int j = 0; j < si.comps_in_scan; ++j) {
+        int ci = si.component_index[j];
+        jpeg_component_info* comp = &cinfo->comp_info[ci];
+        mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+      }
+      if (mcu_size > C_MAX_BLOCKS_IN_MCU) {
+        JPEGLI_ERROR("MCU size too big");
+      }
+    }
+  }
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    for (int k = 0; k < DCTSIZE2; ++k) {
+      if (comp_mask[c][k] != 0xffff) {
+        JPEGLI_ERROR("Incomplete scan of component %d and frequency %d", c, k);
+      }
+    }
+  }
+}
+
+void ProcessCompressionParams(j_compress_ptr cinfo) {
+  if (cinfo->dest == nullptr) {
+    JPEGLI_ERROR("Missing destination.");
+  }
+  if (cinfo->image_width < 1 || cinfo->image_height < 1 ||
+      cinfo->input_components < 1) {
+    JPEGLI_ERROR("Empty input image.");
+  }
+  if (cinfo->image_width > static_cast<int>(JPEG_MAX_DIMENSION) ||
+      cinfo->image_height > static_cast<int>(JPEG_MAX_DIMENSION) ||
+      cinfo->input_components > static_cast<int>(kMaxComponents)) {
+    JPEGLI_ERROR("Input image too big.");
+  }
+  if (cinfo->num_components < 1 ||
+      cinfo->num_components > static_cast<int>(kMaxComponents)) {
+    JPEGLI_ERROR("Invalid number of components.");
+  }
+  if (cinfo->data_precision != kJpegPrecision) {
+    JPEGLI_ERROR("Invalid data precision");
+  }
+  if (cinfo->arith_code) {
+    JPEGLI_ERROR("Arithmetic coding is not implemented.");
+  }
+  if (cinfo->CCIR601_sampling) {
+    JPEGLI_ERROR("CCIR601 sampling is not implemented.");
+  }
+  if (cinfo->restart_interval > 65535u) {
+    JPEGLI_ERROR("Restart interval too big");
+  }
+  if (cinfo->smoothing_factor < 0 || cinfo->smoothing_factor > 100) {
+    JPEGLI_ERROR("Invalid smoothing factor %d", cinfo->smoothing_factor);
+  }
+  jpeg_comp_master* m = cinfo->master;
+  cinfo->max_h_samp_factor = cinfo->max_v_samp_factor = 1;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    if (comp->component_index != c) {
+      JPEGLI_ERROR("Invalid component index");
+    }
+    for (int j = 0; j < c; ++j) {
+      if (cinfo->comp_info[j].component_id == comp->component_id) {
+        JPEGLI_ERROR("Duplicate component id %d", comp->component_id);
+      }
+    }
+    if (comp->h_samp_factor <= 0 || comp->v_samp_factor <= 0 ||
+        comp->h_samp_factor > MAX_SAMP_FACTOR ||
+        comp->v_samp_factor > MAX_SAMP_FACTOR) {
+      JPEGLI_ERROR("Invalid sampling factor %d x %d", comp->h_samp_factor,
+                   comp->v_samp_factor);
+    }
+    cinfo->max_h_samp_factor =
+        std::max(comp->h_samp_factor, cinfo->max_h_samp_factor);
+    cinfo->max_v_samp_factor =
+        std::max(comp->v_samp_factor, cinfo->max_v_samp_factor);
+  }
+  if (cinfo->num_components == 1 &&
+      (cinfo->max_h_samp_factor != 1 || cinfo->max_v_samp_factor != 1)) {
+    JPEGLI_ERROR("Sampling is not supported for simgle component image.");
+  }
+  size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+  size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+  cinfo->total_iMCU_rows = DivCeil(cinfo->image_height, iMCU_height);
+  m->xsize_blocks = total_iMCU_cols * cinfo->max_h_samp_factor;
+  m->ysize_blocks = cinfo->total_iMCU_rows * cinfo->max_v_samp_factor;
+
+  size_t blocks_per_iMCU = 0;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+        cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+      JPEGLI_ERROR("Non-integral sampling ratios are not supported.");
+    }
+    m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+    m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+    comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[c]);
+    comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[c]);
+    comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+    comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+    blocks_per_iMCU += comp->h_samp_factor * comp->v_samp_factor;
+  }
+  m->blocks_per_iMCU_row = total_iMCU_cols * blocks_per_iMCU;
+  // Disable adaptive quantization for subsampled luma channel.
+  int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+  jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+  if (y_comp->h_samp_factor != cinfo->max_h_samp_factor ||
+      y_comp->v_samp_factor != cinfo->max_v_samp_factor) {
+    m->use_adaptive_quantization = false;
+  }
+  if (cinfo->scan_info == nullptr) {
+    SetDefaultScanScript(cinfo);
+  }
+  cinfo->progressive_mode =
+      cinfo->scan_info->Ss != 0 || cinfo->scan_info->Se != DCTSIZE2 - 1;
+  ValidateScanScript(cinfo);
+  m->scan_token_info =
+      Allocate<ScanTokenInfo>(cinfo, cinfo->num_scans, JPOOL_IMAGE);
+  memset(m->scan_token_info, 0, cinfo->num_scans * sizeof(ScanTokenInfo));
+  m->ac_ctx_offset = Allocate<uint8_t>(cinfo, cinfo->num_scans, JPOOL_IMAGE);
+  size_t num_ac_contexts = 0;
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    const jpeg_scan_info* scan_info = &cinfo->scan_info[i];
+    m->ac_ctx_offset[i] = 4 + num_ac_contexts;
+    if (scan_info->Se > 0) {
+      num_ac_contexts += scan_info->comps_in_scan;
+    }
+    if (num_ac_contexts > 252) {
+      JPEGLI_ERROR("Too many AC scans in image");
+    }
+    ScanTokenInfo* sti = &m->scan_token_info[i];
+    if (scan_info->comps_in_scan == 1) {
+      int comp_idx = scan_info->component_index[0];
+      jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+      sti->MCUs_per_row = comp->width_in_blocks;
+      sti->MCU_rows_in_scan = comp->height_in_blocks;
+      sti->blocks_in_MCU = 1;
+    } else {
+      sti->MCUs_per_row =
+          DivCeil(cinfo->image_width, DCTSIZE * cinfo->max_h_samp_factor);
+      sti->MCU_rows_in_scan =
+          DivCeil(cinfo->image_height, DCTSIZE * cinfo->max_v_samp_factor);
+      sti->blocks_in_MCU = 0;
+      for (int j = 0; j < scan_info->comps_in_scan; ++j) {
+        int comp_idx = scan_info->component_index[j];
+        jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+        sti->blocks_in_MCU += comp->h_samp_factor * comp->v_samp_factor;
+      }
+    }
+    size_t num_MCUs = sti->MCU_rows_in_scan * sti->MCUs_per_row;
+    sti->num_blocks = num_MCUs * sti->blocks_in_MCU;
+    if (cinfo->restart_in_rows <= 0) {
+      sti->restart_interval = cinfo->restart_interval;
+    } else {
+      sti->restart_interval =
+          std::min<size_t>(sti->MCUs_per_row * cinfo->restart_in_rows, 65535u);
+    }
+    sti->num_restarts = sti->restart_interval > 0
+                            ? DivCeil(num_MCUs, sti->restart_interval)
+                            : 1;
+    sti->restarts = Allocate<size_t>(cinfo, sti->num_restarts, JPOOL_IMAGE);
+  }
+  m->num_contexts = 4 + num_ac_contexts;
+}
+
+bool IsStreamingSupported(j_compress_ptr cinfo) {
+  if (cinfo->global_state == kEncWriteCoeffs) {
+    return false;
+  }
+  // TODO(szabadka) Remove this restriction.
+  if (cinfo->restart_interval > 0 || cinfo->restart_in_rows > 0) {
+    return false;
+  }
+  if (cinfo->num_scans > 1) {
+    return false;
+  }
+  if (cinfo->master->psnr_target > 0) {
+    return false;
+  }
+  return true;
+}
+
+void AllocateBuffers(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  memset(m->last_dc_coeff, 0, sizeof(m->last_dc_coeff));
+  if (!IsStreamingSupported(cinfo) || cinfo->optimize_coding) {
+    int ysize_blocks = DivCeil(cinfo->image_height, DCTSIZE);
+    int num_arrays = cinfo->num_scans * ysize_blocks;
+    m->token_arrays = Allocate<TokenArray>(cinfo, num_arrays, JPOOL_IMAGE);
+    m->cur_token_array = 0;
+    memset(m->token_arrays, 0, num_arrays * sizeof(TokenArray));
+    m->num_tokens = 0;
+    m->total_num_tokens = 0;
+  }
+  if (cinfo->global_state == kEncWriteCoeffs) {
+    return;
+  }
+  size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+  size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+  size_t xsize_full = total_iMCU_cols * iMCU_width;
+  size_t ysize_full = 3 * iMCU_height;
+  if (!cinfo->raw_data_in) {
+    int num_all_components =
+        std::max(cinfo->input_components, cinfo->num_components);
+    for (int c = 0; c < num_all_components; ++c) {
+      m->input_buffer[c].Allocate(cinfo, ysize_full, xsize_full);
+    }
+  }
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    size_t xsize = total_iMCU_cols * comp->h_samp_factor * DCTSIZE;
+    size_t ysize = 3 * comp->v_samp_factor * DCTSIZE;
+    if (cinfo->raw_data_in) {
+      m->input_buffer[c].Allocate(cinfo, ysize, xsize);
+    }
+    m->smooth_input[c] = &m->input_buffer[c];
+    if (!cinfo->raw_data_in && cinfo->smoothing_factor) {
+      m->smooth_input[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+      m->smooth_input[c]->Allocate(cinfo, ysize_full, xsize_full);
+    }
+    m->raw_data[c] = m->smooth_input[c];
+    if (!cinfo->raw_data_in && (m->h_factor[c] > 1 || m->v_factor[c] > 1)) {
+      m->raw_data[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+      m->raw_data[c]->Allocate(cinfo, ysize, xsize);
+    }
+    m->quant_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+  }
+  m->dct_buffer = Allocate<float>(cinfo, 2 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+  m->block_tmp = Allocate<int32_t>(cinfo, DCTSIZE2 * 4, JPOOL_IMAGE_ALIGNED);
+  if (!IsStreamingSupported(cinfo)) {
+    m->coeff_buffers =
+        Allocate<jvirt_barray_ptr>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      jpeg_component_info* comp = &cinfo->comp_info[c];
+      const size_t xsize_blocks = comp->width_in_blocks;
+      const size_t ysize_blocks = comp->height_in_blocks;
+      m->coeff_buffers[c] = (*cinfo->mem->request_virt_barray)(
+          reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+          /*pre_zero=*/false, xsize_blocks, ysize_blocks, comp->v_samp_factor);
+    }
+  }
+  if (m->use_adaptive_quantization) {
+    int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+    jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+    const size_t xsize_blocks = y_comp->width_in_blocks;
+    const size_t vecsize = VectorSize();
+    const size_t xsize_padded = DivCeil(2 * xsize_blocks, vecsize) * vecsize;
+    m->diff_buffer =
+        Allocate<float>(cinfo, xsize_blocks * DCTSIZE + 8, JPOOL_IMAGE_ALIGNED);
+    m->fuzzy_erosion_tmp.Allocate(cinfo, 2, xsize_padded);
+    m->pre_erosion.Allocate(cinfo, 6 * cinfo->max_v_samp_factor, xsize_padded);
+    size_t qf_height = cinfo->max_v_samp_factor;
+    if (m->psnr_target > 0) {
+      qf_height *= cinfo->total_iMCU_rows;
+    }
+    m->quant_field.Allocate(cinfo, qf_height, xsize_blocks);
+  } else {
+    m->quant_field.Allocate(cinfo, 1, m->xsize_blocks);
+    m->quant_field.FillRow(0, 0, m->xsize_blocks);
+  }
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    m->zero_bias_offset[c] =
+        Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+    m->zero_bias_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+    memset(m->zero_bias_mul[c], 0, DCTSIZE2 * sizeof(float));
+    memset(m->zero_bias_offset[c], 0, DCTSIZE2 * sizeof(float));
+  }
+}
+
+void InitProgressMonitor(j_compress_ptr cinfo) {
+  if (cinfo->progress == nullptr) {
+    return;
+  }
+  if (IsStreamingSupported(cinfo)) {
+    // We have only one input pass.
+    cinfo->progress->total_passes = 1;
+  } else {
+    // We have one input pass, a histogram pass for each scan, and an encode
+    // pass for each scan.
+    cinfo->progress->total_passes = 1 + 2 * cinfo->num_scans;
+  }
+}
+
+// Common setup code between streaming and transcoding code paths. Called in
+// both jpegli_start_compress() and jpegli_write_coefficients().
+void InitCompress(j_compress_ptr cinfo, boolean write_all_tables) {
+  jpeg_comp_master* m = cinfo->master;
+  (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+  ProcessCompressionParams(cinfo);
+  InitProgressMonitor(cinfo);
+  AllocateBuffers(cinfo);
+  if (cinfo->global_state != kEncWriteCoeffs) {
+    ChooseInputMethod(cinfo);
+    if (!cinfo->raw_data_in) {
+      ChooseColorTransform(cinfo);
+      ChooseDownsampleMethods(cinfo);
+    }
+    QuantPass pass = m->psnr_target > 0 ? QuantPass::SEARCH_FIRST_PASS
+                                        : QuantPass::NO_SEARCH;
+    InitQuantizer(cinfo, pass);
+  }
+  if (write_all_tables) {
+    jpegli_suppress_tables(cinfo, FALSE);
+  }
+  if (!cinfo->optimize_coding && !cinfo->progressive_mode) {
+    CopyHuffmanTables(cinfo);
+    InitEntropyCoder(cinfo);
+  }
+  (*cinfo->dest->init_destination)(cinfo);
+  WriteFileHeader(cinfo);
+  JpegBitWriterInit(cinfo);
+  m->next_iMCU_row = 0;
+  m->last_restart_interval = 0;
+  m->next_dht_index = 0;
+}
+
+//
+// Input streaming
+//
+
+void ProgressMonitorInputPass(j_compress_ptr cinfo) {
+  if (cinfo->progress == nullptr) {
+    return;
+  }
+  cinfo->progress->completed_passes = 0;
+  cinfo->progress->pass_counter = cinfo->next_scanline;
+  cinfo->progress->pass_limit = cinfo->image_height;
+  (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void ReadInputRow(j_compress_ptr cinfo, const uint8_t* scanline,
+                  float* row[kMaxComponents]) {
+  jpeg_comp_master* m = cinfo->master;
+  int num_all_components =
+      std::max(cinfo->input_components, cinfo->num_components);
+  for (int c = 0; c < num_all_components; ++c) {
+    row[c] = m->input_buffer[c].Row(m->next_input_row);
+  }
+  ++m->next_input_row;
+  if (scanline == nullptr) {
+    for (int c = 0; c < cinfo->input_components; ++c) {
+      memset(row[c], 0, cinfo->image_width * sizeof(row[c][0]));
+    }
+    return;
+  }
+  (*m->input_method)(scanline, cinfo->image_width, row);
+}
+
+void PadInputBuffer(j_compress_ptr cinfo, float* row[kMaxComponents]) {
+  jpeg_comp_master* m = cinfo->master;
+  const size_t len0 = cinfo->image_width;
+  const size_t len1 = m->xsize_blocks * DCTSIZE;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    // Pad row to a multiple of the iMCU width, plus create a border of 1
+    // repeated pixel for adaptive quant field calculation.
+    float last_val = row[c][len0 - 1];
+    for (size_t x = len0; x <= len1; ++x) {
+      row[c][x] = last_val;
+    }
+    row[c][-1] = row[c][0];
+  }
+  if (m->next_input_row == cinfo->image_height) {
+    size_t num_rows = m->ysize_blocks * DCTSIZE - cinfo->image_height;
+    for (size_t i = 0; i < num_rows; ++i) {
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        float* dest = m->input_buffer[c].Row(m->next_input_row) - 1;
+        memcpy(dest, row[c] - 1, (len1 + 2) * sizeof(dest[0]));
+      }
+      ++m->next_input_row;
+    }
+  }
+}
+
+void ProcessiMCURow(j_compress_ptr cinfo) {
+  JXL_ASSERT(cinfo->master->next_iMCU_row < cinfo->total_iMCU_rows);
+  if (!cinfo->raw_data_in) {
+    ApplyInputSmoothing(cinfo);
+    DownsampleInputBuffer(cinfo);
+  }
+  ComputeAdaptiveQuantField(cinfo);
+  if (IsStreamingSupported(cinfo)) {
+    if (cinfo->optimize_coding) {
+      ComputeTokensForiMCURow(cinfo);
+    } else {
+      WriteiMCURow(cinfo);
+    }
+  } else {
+    ComputeCoefficientsForiMCURow(cinfo);
+  }
+  ++cinfo->master->next_iMCU_row;
+}
+
+void ProcessiMCURows(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  // To have context rows both above and below the current iMCU row, we delay
+  // processing the first iMCU row and process two iMCU rows after we receive
+  // the last input row.
+  if (m->next_input_row % iMCU_height == 0 && m->next_input_row > iMCU_height) {
+    ProcessiMCURow(cinfo);
+  }
+  if (m->next_input_row >= cinfo->image_height) {
+    ProcessiMCURow(cinfo);
+  }
+}
+
+//
+// Non-streaming part
+//
+
+void ZigZagShuffleBlocks(j_compress_ptr cinfo) {
+  JCOEF tmp[DCTSIZE2];
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+      JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+      for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+        JCOEF* block = &ba[0][bx][0];
+        for (int k = 0; k < DCTSIZE2; ++k) {
+          tmp[k] = block[kJPEGNaturalOrder[k]];
+        }
+        memcpy(block, tmp, sizeof(tmp));
+      }
+    }
+  }
+}
+
+}  // namespace jpegli
+
+//
+// Parameter setup
+//
+
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+                           size_t structsize) {
+  cinfo->mem = nullptr;
+  if (structsize != sizeof(*cinfo)) {
+    JPEGLI_ERROR("jpegli_compress_struct has wrong size.");
+  }
+  jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+  cinfo->progress = nullptr;
+  cinfo->is_decompressor = FALSE;
+  cinfo->global_state = jpegli::kEncStart;
+  cinfo->dest = nullptr;
+  cinfo->image_width = 0;
+  cinfo->image_height = 0;
+  cinfo->input_components = 0;
+  cinfo->in_color_space = JCS_UNKNOWN;
+  cinfo->input_gamma = 1.0f;
+  cinfo->num_components = 0;
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->comp_info = nullptr;
+  for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+    cinfo->quant_tbl_ptrs[i] = nullptr;
+  }
+  for (int i = 0; i < NUM_HUFF_TBLS; ++i) {
+    cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+    cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+  }
+  memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+  memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+  memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+  cinfo->write_Adobe_marker = false;
+  cinfo->master = jpegli::Allocate<jpeg_comp_master>(cinfo, 1);
+  jpegli::InitializeCompressParams(cinfo);
+  cinfo->master->force_baseline = true;
+  cinfo->master->xyb_mode = false;
+  cinfo->master->cicp_transfer_function = 2;  // unknown transfer function code
+  cinfo->master->use_std_tables = false;
+  cinfo->master->use_adaptive_quantization = true;
+  cinfo->master->progressive_level = jpegli::kDefaultProgressiveLevel;
+  cinfo->master->data_type = JPEGLI_TYPE_UINT8;
+  cinfo->master->endianness = JPEGLI_NATIVE_ENDIAN;
+  cinfo->master->coeff_buffers = nullptr;
+}
+
+void jpegli_set_xyb_mode(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->xyb_mode = true;
+}
+
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->cicp_transfer_function = code;
+}
+
+void jpegli_set_defaults(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  jpegli::InitializeCompressParams(cinfo);
+  jpegli_default_colorspace(cinfo);
+  jpegli_set_quality(cinfo, 90, TRUE);
+  jpegli_set_progressive_level(cinfo, jpegli::kDefaultProgressiveLevel);
+  jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+                                   /*is_dc=*/false);
+  jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+                                   /*is_dc=*/true);
+}
+
+void jpegli_default_colorspace(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  switch (cinfo->in_color_space) {
+    case JCS_GRAYSCALE:
+      jpegli_set_colorspace(cinfo, JCS_GRAYSCALE);
+      break;
+    case JCS_RGB: {
+      if (cinfo->master->xyb_mode) {
+        jpegli_set_colorspace(cinfo, JCS_RGB);
+      } else {
+        jpegli_set_colorspace(cinfo, JCS_YCbCr);
+      }
+      break;
+    }
+    case JCS_YCbCr:
+      jpegli_set_colorspace(cinfo, JCS_YCbCr);
+      break;
+    case JCS_CMYK:
+      jpegli_set_colorspace(cinfo, JCS_CMYK);
+      break;
+    case JCS_YCCK:
+      jpegli_set_colorspace(cinfo, JCS_YCCK);
+      break;
+    case JCS_UNKNOWN:
+      jpegli_set_colorspace(cinfo, JCS_UNKNOWN);
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported input colorspace %d", cinfo->in_color_space);
+  }
+}
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->jpeg_color_space = colorspace;
+  switch (colorspace) {
+    case JCS_GRAYSCALE:
+      cinfo->num_components = 1;
+      break;
+    case JCS_RGB:
+    case JCS_YCbCr:
+      cinfo->num_components = 3;
+      break;
+    case JCS_CMYK:
+    case JCS_YCCK:
+      cinfo->num_components = 4;
+      break;
+    case JCS_UNKNOWN:
+      cinfo->num_components =
+          std::min<int>(jpegli::kMaxComponents, cinfo->input_components);
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported jpeg colorspace %d", colorspace);
+  }
+  // Adobe marker is only needed to distinguish CMYK and YCCK JPEGs.
+  cinfo->write_Adobe_marker = (cinfo->jpeg_color_space == JCS_YCCK);
+  if (cinfo->comp_info == nullptr) {
+    cinfo->comp_info =
+        jpegli::Allocate<jpeg_component_info>(cinfo, MAX_COMPONENTS);
+  }
+  memset(cinfo->comp_info, 0,
+         jpegli::kMaxComponents * sizeof(jpeg_component_info));
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    comp->component_index = c;
+    comp->component_id = c + 1;
+    comp->h_samp_factor = 1;
+    comp->v_samp_factor = 1;
+    comp->quant_tbl_no = 0;
+    comp->dc_tbl_no = 0;
+    comp->ac_tbl_no = 0;
+  }
+  if (colorspace == JCS_RGB) {
+    cinfo->comp_info[0].component_id = 'R';
+    cinfo->comp_info[1].component_id = 'G';
+    cinfo->comp_info[2].component_id = 'B';
+    if (cinfo->master->xyb_mode) {
+      // Subsample blue channel.
+      cinfo->comp_info[0].h_samp_factor = cinfo->comp_info[0].v_samp_factor = 2;
+      cinfo->comp_info[1].h_samp_factor = cinfo->comp_info[1].v_samp_factor = 2;
+      cinfo->comp_info[2].h_samp_factor = cinfo->comp_info[2].v_samp_factor = 1;
+      // Use separate quantization tables for each component
+      cinfo->comp_info[1].quant_tbl_no = 1;
+      cinfo->comp_info[2].quant_tbl_no = 2;
+    }
+  } else if (colorspace == JCS_CMYK) {
+    cinfo->comp_info[0].component_id = 'C';
+    cinfo->comp_info[1].component_id = 'M';
+    cinfo->comp_info[2].component_id = 'Y';
+    cinfo->comp_info[3].component_id = 'K';
+  } else if (colorspace == JCS_YCbCr || colorspace == JCS_YCCK) {
+    // Use separate quantization and Huffman tables for luma and chroma
+    cinfo->comp_info[1].quant_tbl_no = 1;
+    cinfo->comp_info[2].quant_tbl_no = 1;
+    cinfo->comp_info[1].dc_tbl_no = cinfo->comp_info[1].ac_tbl_no = 1;
+    cinfo->comp_info[2].dc_tbl_no = cinfo->comp_info[2].ac_tbl_no = 1;
+  }
+}
+
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+                         boolean force_baseline) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->force_baseline = force_baseline;
+  float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+  jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
+}
+
+float jpegli_quality_to_distance(int quality) {
+  return (quality >= 100  ? 0.01f
+          : quality >= 30 ? 0.1f + (100 - quality) * 0.09f
+                          : 53.0f / 3000.0f * quality * quality -
+                                23.0f / 20.0f * quality + 25.0f);
+}
+
+void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance,
+                     float min_distance, float max_distance) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->psnr_target = psnr;
+  cinfo->master->psnr_tolerance = tolerance;
+  cinfo->master->min_distance = min_distance;
+  cinfo->master->max_distance = max_distance;
+}
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+                        boolean force_baseline) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->force_baseline = force_baseline;
+  float distance = jpegli_quality_to_distance(quality);
+  float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+  jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+                               boolean force_baseline) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->force_baseline = force_baseline;
+  float distance = jpegli::LinearQualityToDistance(scale_factor);
+  float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+  jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->force_baseline = force_baseline;
+  float distances[NUM_QUANT_TBLS];
+  for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+    distances[i] = jpegli::LinearQualityToDistance(cinfo->q_scale_factor[i]);
+  }
+  jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+#endif
+
+int jpegli_quality_scaling(int quality) {
+  quality = std::min(100, std::max(1, quality));
+  return quality < 50 ? 5000 / quality : 200 - 2 * quality;
+}
+
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->use_std_tables = true;
+}
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+                            const unsigned int* basic_table, int scale_factor,
+                            boolean force_baseline) {
+  CheckState(cinfo, jpegli::kEncStart);
+  if (which_tbl < 0 || which_tbl > NUM_QUANT_TBLS) {
+    JPEGLI_ERROR("Invalid quant table index %d", which_tbl);
+  }
+  if (cinfo->quant_tbl_ptrs[which_tbl] == nullptr) {
+    cinfo->quant_tbl_ptrs[which_tbl] =
+        jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+  }
+  int max_qval = force_baseline ? 255 : 32767U;
+  JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[which_tbl];
+  for (int k = 0; k < DCTSIZE2; ++k) {
+    int qval = (basic_table[k] * scale_factor + 50) / 100;
+    qval = std::max(1, std::min(qval, max_qval));
+    quant_table->quantval[k] = qval;
+  }
+  quant_table->sent_table = FALSE;
+}
+
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->master->use_adaptive_quantization = value;
+}
+
+void jpegli_simple_progression(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  jpegli_set_progressive_level(cinfo, 2);
+}
+
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level) {
+  CheckState(cinfo, jpegli::kEncStart);
+  if (level < 0) {
+    JPEGLI_ERROR("Invalid progressive level %d", level);
+  }
+  cinfo->master->progressive_level = level;
+}
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+                             JpegliEndianness endianness) {
+  CheckState(cinfo, jpegli::kEncStart);
+  switch (data_type) {
+    case JPEGLI_TYPE_UINT8:
+    case JPEGLI_TYPE_UINT16:
+    case JPEGLI_TYPE_FLOAT:
+      cinfo->master->data_type = data_type;
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported data type %d", data_type);
+  }
+  switch (endianness) {
+    case JPEGLI_NATIVE_ENDIAN:
+    case JPEGLI_LITTLE_ENDIAN:
+    case JPEGLI_BIG_ENDIAN:
+      cinfo->master->endianness = endianness;
+      break;
+    default:
+      JPEGLI_ERROR("Unsupported endianness %d", endianness);
+  }
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+  // Since input scaling is not supported, we just copy the image dimensions.
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+}
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+                                     j_compress_ptr dstinfo) {
+  CheckState(dstinfo, jpegli::kEncStart);
+  // Image parameters.
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  dstinfo->input_gamma = srcinfo->output_gamma;
+  // Compression parameters.
+  jpegli_set_defaults(dstinfo);
+  jpegli_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  if (dstinfo->num_components != srcinfo->num_components) {
+    const auto& cinfo = dstinfo;
+    return JPEGLI_ERROR("Mismatch between src colorspace and components");
+  }
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+  dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+  dstinfo->density_unit = srcinfo->density_unit;
+  dstinfo->X_density = srcinfo->X_density;
+  dstinfo->Y_density = srcinfo->Y_density;
+  for (int c = 0; c < dstinfo->num_components; ++c) {
+    jpeg_component_info* srccomp = &srcinfo->comp_info[c];
+    jpeg_component_info* dstcomp = &dstinfo->comp_info[c];
+    dstcomp->component_id = srccomp->component_id;
+    dstcomp->h_samp_factor = srccomp->h_samp_factor;
+    dstcomp->v_samp_factor = srccomp->v_samp_factor;
+    dstcomp->quant_tbl_no = srccomp->quant_tbl_no;
+  }
+  for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+    if (!srcinfo->quant_tbl_ptrs[i]) continue;
+    if (dstinfo->quant_tbl_ptrs[i] == nullptr) {
+      dstinfo->quant_tbl_ptrs[i] = jpegli::Allocate<JQUANT_TBL>(dstinfo, 1);
+    }
+    memcpy(dstinfo->quant_tbl_ptrs[i], srcinfo->quant_tbl_ptrs[i],
+           sizeof(JQUANT_TBL));
+    dstinfo->quant_tbl_ptrs[i]->sent_table = FALSE;
+  }
+}
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+  jpegli::SetSentTableFlag(cinfo->quant_tbl_ptrs, NUM_QUANT_TBLS, suppress);
+  jpegli::SetSentTableFlag(cinfo->dc_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+  jpegli::SetSentTableFlag(cinfo->ac_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+}
+
+//
+// Compressor initialization
+//
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->global_state = jpegli::kEncHeader;
+  jpegli::InitCompress(cinfo, write_all_tables);
+  cinfo->next_scanline = 0;
+  cinfo->master->next_input_row = 0;
+}
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+                               jvirt_barray_ptr* coef_arrays) {
+  CheckState(cinfo, jpegli::kEncStart);
+  cinfo->global_state = jpegli::kEncWriteCoeffs;
+  jpegli::InitCompress(cinfo, /*write_all_tables=*/true);
+  cinfo->master->coeff_buffers = coef_arrays;
+  cinfo->next_scanline = cinfo->image_height;
+  cinfo->master->next_input_row = cinfo->image_height;
+}
+
+void jpegli_write_tables(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncStart);
+  if (cinfo->dest == nullptr) {
+    JPEGLI_ERROR("Missing destination.");
+  }
+  jpeg_comp_master* m = cinfo->master;
+  (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+  (*cinfo->dest->init_destination)(cinfo);
+  jpegli::WriteOutput(cinfo, {0xFF, 0xD8});  // SOI
+  jpegli::EncodeDQT(cinfo, /*write_all_tables=*/true);
+  jpegli::CopyHuffmanTables(cinfo);
+  jpegli::EncodeDHT(cinfo, 0, m->num_huffman_tables);
+  jpegli::WriteOutput(cinfo, {0xFF, 0xD9});  // EOI
+  (*cinfo->dest->term_destination)(cinfo);
+  jpegli_suppress_tables(cinfo, TRUE);
+}
+
+//
+// Marker writing
+//
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+                           unsigned int datalen) {
+  CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncWriteCoeffs);
+  if (datalen > jpegli::kMaxBytesInMarker) {
+    JPEGLI_ERROR("Invalid marker length %u", datalen);
+  }
+  if (marker != 0xfe && (marker < 0xe0 || marker > 0xef)) {
+    JPEGLI_ERROR(
+        "jpegli_write_m_header: Only APP and COM markers are supported.");
+  }
+  std::vector<uint8_t> marker_data(4 + datalen);
+  marker_data[0] = 0xff;
+  marker_data[1] = marker;
+  marker_data[2] = (datalen + 2) >> 8;
+  marker_data[3] = (datalen + 2) & 0xff;
+  jpegli::WriteOutput(cinfo, &marker_data[0], 4);
+}
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val) {
+  uint8_t data = val;
+  jpegli::WriteOutput(cinfo, &data, 1);
+}
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+                         const JOCTET* dataptr, unsigned int datalen) {
+  jpegli_write_m_header(cinfo, marker, datalen);
+  jpegli::WriteOutput(cinfo, dataptr, datalen);
+}
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+                              unsigned int icc_data_len) {
+  constexpr size_t kMaxIccBytesInMarker =
+      jpegli::kMaxBytesInMarker - sizeof jpegli::kICCSignature - 2;
+  const int num_markers =
+      static_cast<int>(jpegli::DivCeil(icc_data_len, kMaxIccBytesInMarker));
+  size_t begin = 0;
+  for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+    const size_t length = std::min(kMaxIccBytesInMarker, icc_data_len - begin);
+    jpegli_write_m_header(
+        cinfo, jpegli::kICCMarker,
+        static_cast<unsigned int>(length + sizeof jpegli::kICCSignature + 2));
+    for (const unsigned char c : jpegli::kICCSignature) {
+      jpegli_write_m_byte(cinfo, c);
+    }
+    jpegli_write_m_byte(cinfo, current_marker + 1);
+    jpegli_write_m_byte(cinfo, num_markers);
+    for (size_t i = 0; i < length; ++i) {
+      jpegli_write_m_byte(cinfo, icc_data_ptr[begin]);
+      ++begin;
+    }
+  }
+}
+
+//
+// Input streaming
+//
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+                                  JDIMENSION num_lines) {
+  CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+  if (cinfo->raw_data_in) {
+    JPEGLI_ERROR("jpegli_write_raw_data() must be called for raw data mode.");
+  }
+  jpegli::ProgressMonitorInputPass(cinfo);
+  if (cinfo->global_state == jpegli::kEncHeader &&
+      jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) {
+    jpegli::WriteFrameHeader(cinfo);
+    jpegli::WriteScanHeader(cinfo, 0);
+  }
+  cinfo->global_state = jpegli::kEncReadImage;
+  jpeg_comp_master* m = cinfo->master;
+  if (num_lines + cinfo->next_scanline > cinfo->image_height) {
+    num_lines = cinfo->image_height - cinfo->next_scanline;
+  }
+  JDIMENSION prev_scanline = cinfo->next_scanline;
+  size_t input_lag = (std::min<size_t>(cinfo->image_height, m->next_input_row) -
+                      cinfo->next_scanline);
+  if (input_lag > num_lines) {
+    JPEGLI_ERROR("Need at least %u lines to continue", input_lag);
+  }
+  if (input_lag > 0) {
+    if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+      return 0;
+    }
+    cinfo->next_scanline += input_lag;
+  }
+  float* rows[jpegli::kMaxComponents];
+  for (size_t i = input_lag; i < num_lines; ++i) {
+    jpegli::ReadInputRow(cinfo, scanlines[i], rows);
+    (*m->color_transform)(rows, cinfo->image_width);
+    jpegli::PadInputBuffer(cinfo, rows);
+    jpegli::ProcessiMCURows(cinfo);
+    if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+      break;
+    }
+    ++cinfo->next_scanline;
+  }
+  return cinfo->next_scanline - prev_scanline;
+}
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+                                 JDIMENSION num_lines) {
+  CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+  if (!cinfo->raw_data_in) {
+    JPEGLI_ERROR("jpegli_write_raw_data(): raw data mode was not set");
+  }
+  jpegli::ProgressMonitorInputPass(cinfo);
+  if (cinfo->global_state == jpegli::kEncHeader &&
+      jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) {
+    jpegli::WriteFrameHeader(cinfo);
+    jpegli::WriteScanHeader(cinfo, 0);
+  }
+  cinfo->global_state = jpegli::kEncReadImage;
+  jpeg_comp_master* m = cinfo->master;
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    return 0;
+  }
+  size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+  if (num_lines < iMCU_height) {
+    JPEGLI_ERROR("Missing input lines, minimum is %u", iMCU_height);
+  }
+  if (cinfo->next_scanline < m->next_input_row) {
+    JXL_ASSERT(m->next_input_row - cinfo->next_scanline == iMCU_height);
+    if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+      return 0;
+    }
+    cinfo->next_scanline = m->next_input_row;
+    return iMCU_height;
+  }
+  size_t iMCU_y = m->next_input_row / iMCU_height;
+  float* rows[jpegli::kMaxComponents];
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    JSAMPARRAY plane = data[c];
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    size_t xsize = comp->width_in_blocks * DCTSIZE;
+    size_t ysize = comp->v_samp_factor * DCTSIZE;
+    size_t y0 = iMCU_y * ysize;
+    auto& buffer = m->input_buffer[c];
+    for (size_t i = 0; i < ysize; ++i) {
+      rows[0] = buffer.Row(y0 + i);
+      if (plane[i] == nullptr) {
+        memset(rows[0], 0, xsize * sizeof(rows[0][0]));
+      } else {
+        (*m->input_method)(plane[i], xsize, rows);
+      }
+      // We need a border of 1 repeated pixel for adaptive quant field.
+      buffer.PadRow(y0 + i, xsize, /*border=*/1);
+    }
+  }
+  m->next_input_row += iMCU_height;
+  jpegli::ProcessiMCURows(cinfo);
+  if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+    return 0;
+  }
+  cinfo->next_scanline += iMCU_height;
+  return iMCU_height;
+}
+
+//
+// Non-streaming part
+//
+
+void jpegli_finish_compress(j_compress_ptr cinfo) {
+  CheckState(cinfo, jpegli::kEncReadImage, jpegli::kEncWriteCoeffs);
+  jpeg_comp_master* m = cinfo->master;
+  if (cinfo->next_scanline < cinfo->image_height) {
+    JPEGLI_ERROR("Incomplete image, expected %d rows, got %d",
+                 cinfo->image_height, cinfo->next_scanline);
+  }
+
+  if (cinfo->global_state == jpegli::kEncWriteCoeffs) {
+    // Zig-zag shuffle all the blocks. For non-transcoding case it was already
+    // done in EncodeiMCURow().
+    jpegli::ZigZagShuffleBlocks(cinfo);
+  }
+
+  if (m->psnr_target > 0) {
+    jpegli::QuantizetoPSNR(cinfo);
+  }
+
+  const bool tokens_done = jpegli::IsStreamingSupported(cinfo);
+  const bool bitstream_done = tokens_done && !cinfo->optimize_coding;
+
+  if (!tokens_done) {
+    jpegli::TokenizeJpeg(cinfo);
+  }
+
+  if (cinfo->optimize_coding || cinfo->progressive_mode) {
+    jpegli::OptimizeHuffmanCodes(cinfo);
+    jpegli::InitEntropyCoder(cinfo);
+  }
+
+  if (!bitstream_done) {
+    jpegli::WriteFrameHeader(cinfo);
+    for (int i = 0; i < cinfo->num_scans; ++i) {
+      jpegli::WriteScanHeader(cinfo, i);
+      jpegli::WriteScanData(cinfo, i);
+    }
+  } else {
+    JumpToByteBoundary(&m->bw);
+    if (!EmptyBitWriterBuffer(&m->bw)) {
+      JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+    }
+  }
+
+  jpegli::WriteOutput(cinfo, {0xFF, 0xD9});  // EOI
+  (*cinfo->dest->term_destination)(cinfo);
+
+  // Release memory and reset global state.
+  jpegli_abort_compress(cinfo);
+}
+
+void jpegli_abort_compress(j_compress_ptr cinfo) {
+  jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_destroy_compress(j_compress_ptr cinfo) {
+  jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode.h b/third-party/libjxl/libjxl/lib/jpegli/encode.h
new file mode 100644
index 0000000000..7fa328f7e9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode.h
@@ -0,0 +1,158 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the encoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while compressor object definitions are included directly
+// from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+//  (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+//      names of the API and link against libjpegli.
+//
+//  (2) Leave the application code unchanged, but replace the libjpeg.so library
+//      with the one built by this project that is API- and ABI-compatible with
+//      libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_ENCODE_H_
+#define LIB_JPEGLI_ENCODE_H_
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_compress(cinfo)              \
+  jpegli_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+                        (size_t)sizeof(struct jpeg_compress_struct))
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+                           size_t structsize);
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile);
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+                     unsigned long* outsize);
+
+void jpegli_set_defaults(j_compress_ptr cinfo);
+
+void jpegli_default_colorspace(j_compress_ptr cinfo);
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace);
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+                        boolean force_baseline);
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+                               boolean force_baseline);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline);
+#endif
+
+int jpegli_quality_scaling(int quality);
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+                            const unsigned int* basic_table, int scale_factor,
+                            boolean force_baseline);
+
+void jpegli_simple_progression(j_compress_ptr cinfo);
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo);
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+                                     j_compress_ptr dstinfo);
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+                           unsigned int datalen);
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val);
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+                         const JOCTET* dataptr, unsigned int datalen);
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+                              unsigned int icc_data_len);
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables);
+
+void jpegli_write_tables(j_compress_ptr cinfo);
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+                                  JDIMENSION num_lines);
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+                                 JDIMENSION num_lines);
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+                               jvirt_barray_ptr* coef_arrays);
+
+void jpegli_finish_compress(j_compress_ptr cinfo);
+
+void jpegli_abort_compress(j_compress_ptr cinfo);
+
+void jpegli_destroy_compress(j_compress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+// Sets the butteraugli target distance for the compressor. This may override
+// the default quantization table indexes based on jpeg colorspace, therefore
+// it must be called after jpegli_set_defaults() or after the last
+// jpegli_set_colorspace() or jpegli_default_colorspace() calls.
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+                         boolean force_baseline);
+
+// Returns the butteraugli target distance for the given quality parameter.
+float jpegli_quality_to_distance(int quality);
+
+// Enables distance parameter search to meet the given psnr target.
+void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance,
+                     float min_distance, float max_distance);
+
+// Changes the default behaviour of the encoder in the selection of quantization
+// matrices and chroma subsampling. Must be called before jpegli_set_defaults()
+// because some default setting depend on the XYB mode.
+void jpegli_set_xyb_mode(j_compress_ptr cinfo);
+
+// Signals to the encoder that the pixel data that will be provided later
+// through jpegli_write_scanlines() has this transfer function. This must be
+// called before jpegli_set_defaults() because it changes the default
+// quantization tables.
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code);
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+                             JpegliEndianness endianness);
+
+// Sets whether or not the encoder uses adaptive quantization for createing more
+// zero coefficients based on the local properties of the image.
+// Enabled by default.
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value);
+
+// Sets the default progression parameters, where level 0 is sequential, and
+// greater level value means more progression steps. Default is 2.
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level);
+
+// If this function is called before starting compression, the quality and
+// linear quality parameters will be used to scale the standard quantization
+// tables from Annex K of the JPEG standard. By default jpegli uses a different
+// set of quantization tables and used different scaling parameters for DC and
+// AC coefficients. Must be called before jpegli_set_defaults().
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif  // LIB_JPEGLI_ENCODE_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc
new file mode 100644
index 0000000000..4039c297a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc
@@ -0,0 +1,839 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+struct TestConfig {
+  TestImage input;
+  CompressParams jparams;
+  JpegIOMode input_mode = PIXELS;
+  double max_bpp;
+  double max_dist;
+};
+
+class EncodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+void GenerateInput(JpegIOMode input_mode, const CompressParams& jparams,
+                   TestImage* input) {
+  GeneratePixels(input);
+  if (input_mode == RAW_DATA) {
+    GenerateRawData(jparams, input);
+  } else if (input_mode == COEFFICIENTS) {
+    GenerateCoeffs(jparams, input);
+  }
+}
+
+TEST_P(EncodeAPITestParam, TestAPI) {
+  TestConfig config = GetParam();
+  GenerateInput(config.input_mode, config.jparams, &config.input);
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithJpegli(config.input, config.jparams, &compressed));
+  if (config.jparams.icc.empty()) {
+    double bpp =
+        compressed.size() * 8.0 / (config.input.xsize * config.input.ysize);
+    printf("bpp: %f\n", bpp);
+    EXPECT_LT(bpp, config.max_bpp);
+  }
+  DecompressParams dparams;
+  dparams.output_mode =
+      config.input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+  if (config.jparams.set_jpeg_colorspace &&
+      config.jparams.jpeg_color_space == JCS_GRAYSCALE) {
+    ConvertToGrayscale(&config.input);
+  } else {
+    dparams.set_out_color_space = true;
+    dparams.out_color_space = config.input.color_space;
+  }
+  TestImage output;
+  DecodeWithLibjpeg(config.jparams, dparams, compressed, &output);
+  VerifyOutputImage(config.input, output, config.max_dist);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameImageTwice) {
+  TestImage input;
+  input.xsize = 129;
+  input.ysize = 73;
+  CompressParams jparams;
+  GenerateInput(PIXELS, jparams, &input);
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  std::vector<uint8_t> compressed0;
+  std::vector<uint8_t> compressed1;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    EncodeWithJpegli(input, jparams, &cinfo);
+    compressed0.assign(buffer, buffer + buffer_size);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    EncodeWithJpegli(input, jparams, &cinfo);
+    compressed1.assign(buffer, buffer + buffer_size);
+    return true;
+  };
+  EXPECT_TRUE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+  ASSERT_EQ(compressed0.size(), compressed1.size());
+  EXPECT_EQ(0,
+            memcmp(compressed0.data(), compressed1.data(), compressed0.size()));
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+  std::vector<TestConfig> all_configs;
+  for (int samp : {1, 2}) {
+    for (int progr : {0, 2}) {
+      for (int optimize : {0, 1}) {
+        if (progr && optimize) continue;
+        TestConfig config;
+        config.input.xsize = 257 + samp * 37;
+        config.input.ysize = 265 + optimize * 17;
+        config.jparams.h_sampling = {samp, 1, 1};
+        config.jparams.v_sampling = {samp, 1, 1};
+        config.jparams.progressive_mode = progr;
+        config.jparams.optimize_coding = optimize;
+        config.max_dist = 2.4f;
+        GeneratePixels(&config.input);
+        all_configs.push_back(config);
+      }
+    }
+  }
+  return all_configs;
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameMemOutput) {
+  std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+      for (const TestConfig& config : all_configs) {
+        EncodeWithJpegli(config.input, config.jparams, &cinfo);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_compress(&cinfo);
+  }
+  size_t pos = 0;
+  for (size_t i = 0; i < all_configs.size(); ++i) {
+    TestImage output;
+    pos +=
+        DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), nullptr,
+                          0, buffer + pos, buffer_size - pos, &output);
+    VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist);
+  }
+  if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameStdOutput) {
+  std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+  FILE* tmpf = tmpfile();
+  JXL_CHECK(tmpf);
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_stdio_dest(&cinfo, tmpf);
+      for (const TestConfig& config : all_configs) {
+        EncodeWithJpegli(config.input, config.jparams, &cinfo);
+      }
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_compress(&cinfo);
+  }
+  size_t total_size = ftell(tmpf);
+  rewind(tmpf);
+  std::vector<uint8_t> compressed(total_size);
+  JXL_CHECK(total_size == fread(&compressed[0], 1, total_size, tmpf));
+  fclose(tmpf);
+  size_t pos = 0;
+  for (size_t i = 0; i < all_configs.size(); ++i) {
+    TestImage output;
+    pos += DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(),
+                             nullptr, 0, &compressed[pos],
+                             compressed.size() - pos, &output);
+    VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist);
+  }
+}
+
+TEST(EncodeAPITest, ReuseCinfoChangeParams) {
+  TestImage input, output;
+  CompressParams jparams;
+  DecompressParams dparams;
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  std::vector<uint8_t> compressed;
+  jpeg_compress_struct cinfo;
+  const auto max_rms = [](int q, int hs, int vs) {
+    if (hs == 1 && vs == 1) return q == 90 ? 2.2 : 0.6;
+    if (hs == 2 && vs == 2) return q == 90 ? 2.8 : 1.2;
+    return q == 90 ? 2.4 : 1.0;
+  };
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    input.xsize = 129;
+    input.ysize = 73;
+    dparams.set_out_color_space = true;
+    for (JpegIOMode input_mode : {PIXELS, RAW_DATA, PIXELS, COEFFICIENTS}) {
+      for (int h_samp : {2, 1}) {
+        for (int v_samp : {2, 1}) {
+          for (int progr : {0, 2}) {
+            for (int quality : {90, 100}) {
+              input.Clear();
+              input.color_space =
+                  (input_mode == RAW_DATA ? JCS_YCbCr : JCS_RGB);
+              jparams.quality = quality;
+              jparams.h_sampling = {h_samp, 1, 1};
+              jparams.v_sampling = {v_samp, 1, 1};
+              jparams.progressive_mode = progr;
+              printf(
+                  "Generating input with quality %d chroma subsampling %dx%d "
+                  "input mode %d progressive_mode %d\n",
+                  quality, h_samp, v_samp, input_mode, progr);
+              GenerateInput(input_mode, jparams, &input);
+              jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+              if (input_mode != COEFFICIENTS) {
+                cinfo.image_width = input.xsize;
+                cinfo.image_height = input.ysize;
+                cinfo.input_components = input.components;
+                jpegli_set_defaults(&cinfo);
+                jpegli_start_compress(&cinfo, TRUE);
+                jpegli_abort_compress(&cinfo);
+                jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+              }
+              EncodeWithJpegli(input, jparams, &cinfo);
+              compressed.resize(buffer_size);
+              std::copy_n(buffer, buffer_size, compressed.data());
+              dparams.output_mode =
+                  input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+              dparams.out_color_space = input.color_space;
+              output.Clear();
+              DecodeWithLibjpeg(jparams, dparams, compressed, &output);
+              VerifyOutputImage(input, output,
+                                max_rms(quality, h_samp, v_samp));
+            }
+          }
+        }
+      }
+    }
+    return true;
+  };
+  EXPECT_TRUE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, AbbreviatedStreams) {
+  uint8_t* table_stream = nullptr;
+  unsigned long table_stream_size = 0;
+  uint8_t* data_stream = nullptr;
+  unsigned long data_stream_size = 0;
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+      cinfo.input_components = 3;
+      cinfo.in_color_space = JCS_RGB;
+      jpegli_set_defaults(&cinfo);
+      jpegli_write_tables(&cinfo);
+      jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+      cinfo.image_width = 1;
+      cinfo.image_height = 1;
+      cinfo.optimize_coding = FALSE;
+      jpegli_set_progressive_level(&cinfo, 0);
+      jpegli_start_compress(&cinfo, FALSE);
+      JSAMPLE image[3] = {0};
+      JSAMPROW row[] = {image};
+      jpegli_write_scanlines(&cinfo, row, 1);
+      jpegli_finish_compress(&cinfo);
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    EXPECT_LT(data_stream_size, 50);
+    jpegli_destroy_compress(&cinfo);
+  }
+  TestImage output;
+  DecodeWithLibjpeg(CompressParams(), DecompressParams(), table_stream,
+                    table_stream_size, data_stream, data_stream_size, &output);
+  EXPECT_EQ(1, output.xsize);
+  EXPECT_EQ(1, output.ysize);
+  EXPECT_EQ(3, output.components);
+  EXPECT_EQ(0, output.pixels[0]);
+  EXPECT_EQ(0, output.pixels[1]);
+  EXPECT_EQ(0, output.pixels[2]);
+  if (table_stream) free(table_stream);
+  if (data_stream) free(data_stream);
+}
+
+void CopyQuantTables(j_compress_ptr cinfo, uint16_t* quant_tables) {
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+    JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+    for (int k = 0; k < DCTSIZE2; ++k) {
+      quant_tables[c * DCTSIZE2 + k] = quant_table->quantval[k];
+    }
+  }
+}
+
+TEST(EncodeAPITest, QualitySettings) {
+  // Test that jpegli_set_quality, jpegli_set_linear_quality and
+  // jpegli_quality_scaling are consistent with each other.
+  uint16_t quant_tables0[3 * DCTSIZE2];
+  uint16_t quant_tables1[3 * DCTSIZE2];
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    jpegli_set_defaults(&cinfo);
+    for (boolean baseline : {FALSE, TRUE}) {
+      for (int q = 1; q <= 100; ++q) {
+        jpegli_set_quality(&cinfo, q, baseline);
+        CopyQuantTables(&cinfo, quant_tables0);
+        jpegli_set_linear_quality(&cinfo, jpegli_quality_scaling(q), baseline);
+        CopyQuantTables(&cinfo, quant_tables1);
+        EXPECT_EQ(0,
+                  memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#if JPEG_LIB_VERSION >= 70
+        for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+          cinfo.q_scale_factor[i] = jpegli_quality_scaling(q);
+        }
+        jpegli_default_qtables(&cinfo, baseline);
+        CopyQuantTables(&cinfo, quant_tables1);
+        EXPECT_EQ(0,
+                  memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#endif
+      }
+    }
+    return true;
+  };
+  EXPECT_TRUE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  // Test jpegli_quality_scaling for some specific values .
+  EXPECT_EQ(5000, jpegli_quality_scaling(-1));
+  EXPECT_EQ(5000, jpegli_quality_scaling(0));
+  EXPECT_EQ(5000, jpegli_quality_scaling(1));
+  EXPECT_EQ(100, jpegli_quality_scaling(50));
+  EXPECT_EQ(50, jpegli_quality_scaling(75));
+  EXPECT_EQ(20, jpegli_quality_scaling(90));
+  EXPECT_EQ(0, jpegli_quality_scaling(100));
+  EXPECT_EQ(0, jpegli_quality_scaling(101));
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  for (int h_samp : {1, 2}) {
+    for (int v_samp : {1, 2}) {
+      for (int progr : {0, 2}) {
+        for (int optimize : {0, 1}) {
+          if (progr && optimize) continue;
+          TestConfig config;
+          config.jparams.h_sampling = {h_samp, 1, 1};
+          config.jparams.v_sampling = {v_samp, 1, 1};
+          config.jparams.progressive_mode = progr;
+          if (!progr) {
+            config.jparams.optimize_coding = optimize;
+          }
+          const float kMaxBpp[4] = {1.55, 1.4, 1.4, 1.32};
+          const float kMaxDist[4] = {1.95, 2.2, 2.2, 2.0};
+          const int idx = v_samp * 2 + h_samp - 3;
+          config.max_bpp =
+              kMaxBpp[idx] * (optimize ? 0.97 : 1.0) * (progr ? 0.97 : 1.0);
+          config.max_dist = kMaxDist[idx];
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  {
+    TestConfig config;
+    config.jparams.quality = 100;
+    config.max_bpp = 6.6;
+    config.max_dist = 0.6;
+    all_tests.push_back(config);
+  }
+  {
+    TestConfig config;
+    config.jparams.quality = 80;
+    config.max_bpp = 1.05;
+    config.max_dist = 2.7;
+    all_tests.push_back(config);
+  }
+  for (int samp : {1, 2}) {
+    for (int progr : {0, 2}) {
+      for (int optimize : {0, 1}) {
+        if (progr && optimize) continue;
+        TestConfig config;
+        config.input.xsize = 257;
+        config.input.ysize = 265;
+        config.jparams.h_sampling = {samp, 1, 1};
+        config.jparams.v_sampling = {samp, 1, 1};
+        config.jparams.progressive_mode = progr;
+        if (!progr) {
+          config.jparams.optimize_coding = optimize;
+        }
+        config.jparams.use_adaptive_quantization = false;
+        config.max_bpp = 2.05f;
+        config.max_dist = 2.3f;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  for (int h0_samp : {1, 2, 4}) {
+    for (int v0_samp : {1, 2, 4}) {
+      for (int h2_samp : {1, 2, 4}) {
+        for (int v2_samp : {1, 2, 4}) {
+          TestConfig config;
+          config.input.xsize = 137;
+          config.input.ysize = 75;
+          config.jparams.progressive_mode = 2;
+          config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+          config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+          config.max_bpp = 2.5;
+          config.max_dist = 12.0;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (int h0_samp : {1, 3}) {
+    for (int v0_samp : {1, 3}) {
+      for (int h2_samp : {1, 3}) {
+        for (int v2_samp : {1, 3}) {
+          TestConfig config;
+          config.input.xsize = 205;
+          config.input.ysize = 99;
+          config.jparams.progressive_mode = 2;
+          config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+          config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+          config.max_bpp = 2.5;
+          config.max_dist = 10.0;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (int h0_samp : {1, 2, 3, 4}) {
+    for (int v0_samp : {1, 2, 3, 4}) {
+      TestConfig config;
+      config.input.xsize = 217;
+      config.input.ysize = 129;
+      config.jparams.progressive_mode = 2;
+      config.jparams.h_sampling = {h0_samp, 1, 1};
+      config.jparams.v_sampling = {v0_samp, 1, 1};
+      config.max_bpp = 2.0;
+      config.max_dist = 5.5;
+      all_tests.push_back(config);
+    }
+  }
+  for (int p = 0; p < 3 + NumTestScanScripts(); ++p) {
+    for (int samp : {1, 2}) {
+      for (int quality : {100, 90, 1}) {
+        for (int r : {0, 1024, 1}) {
+          for (int optimize : {0, 1}) {
+            bool progressive = p == 1 || p == 2 || p > 4;
+            if (progressive && !optimize) continue;
+            TestConfig config;
+            config.input.xsize = 273;
+            config.input.ysize = 265;
+            config.jparams.progressive_mode = p;
+            if (!progressive) {
+              config.jparams.optimize_coding = optimize;
+            }
+            config.jparams.h_sampling = {samp, 1, 1};
+            config.jparams.v_sampling = {samp, 1, 1};
+            config.jparams.quality = quality;
+            config.jparams.restart_interval = r;
+            config.max_bpp = quality == 100 ? 8.0 : 1.9;
+            if (r == 1) {
+              config.max_bpp += 10.0;
+            }
+            config.max_dist = quality == 1 ? 20.0 : 2.1;
+            all_tests.push_back(config);
+          }
+        }
+      }
+    }
+  }
+  {
+    TestConfig config;
+    config.jparams.simple_progression = true;
+    config.max_bpp = 1.48;
+    config.max_dist = 2.0;
+    all_tests.push_back(config);
+  }
+  {
+    TestConfig config;
+    config.input_mode = COEFFICIENTS;
+    config.jparams.h_sampling = {2, 1, 1};
+    config.jparams.v_sampling = {2, 1, 1};
+    config.jparams.progressive_mode = 0;
+    config.jparams.optimize_coding = 0;
+    config.max_bpp = 16;
+    config.max_dist = 0.0;
+    all_tests.push_back(config);
+  }
+  {
+    TestConfig config;
+    config.jparams.xyb_mode = true;
+    config.jparams.progressive_mode = 2;
+    config.max_bpp = 1.5;
+    config.max_dist = 3.5;
+    all_tests.push_back(config);
+  }
+  {
+    TestConfig config;
+    config.jparams.libjpeg_mode = true;
+    config.max_bpp = 2.1;
+    config.max_dist = 1.7;
+    all_tests.push_back(config);
+  }
+
+  for (J_COLOR_SPACE in_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+    for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+      if (jpeg_color_space == JCS_RGB && in_color_space == JCS_YCbCr) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 256;
+      config.input.color_space = in_color_space;
+      config.jparams.set_jpeg_colorspace = true;
+      config.jparams.jpeg_color_space = jpeg_color_space;
+      config.max_bpp = jpeg_color_space == JCS_RGB ? 4.5 : 1.85;
+      config.max_dist = jpeg_color_space == JCS_RGB ? 1.4 : 2.05;
+      all_tests.push_back(config);
+    }
+  }
+  for (J_COLOR_SPACE in_color_space : {JCS_CMYK, JCS_YCCK}) {
+    for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+      if (jpeg_color_space == JCS_CMYK && in_color_space == JCS_YCCK) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 256;
+      config.input.color_space = in_color_space;
+      if (in_color_space != jpeg_color_space) {
+        config.jparams.set_jpeg_colorspace = true;
+        config.jparams.jpeg_color_space = jpeg_color_space;
+      }
+      config.max_bpp = jpeg_color_space == JCS_CMYK ? 4.0 : 3.6;
+      config.max_dist = jpeg_color_space == JCS_CMYK ? 1.2 : 1.5;
+      all_tests.push_back(config);
+    }
+  }
+  {
+    TestConfig config;
+    config.input.color_space = JCS_YCbCr;
+    config.max_bpp = 1.6;
+    config.max_dist = 1.35;
+    all_tests.push_back(config);
+  }
+  for (bool xyb : {false, true}) {
+    TestConfig config;
+    config.input.color_space = JCS_GRAYSCALE;
+    config.jparams.xyb_mode = xyb;
+    config.max_bpp = 1.35;
+    config.max_dist = 1.4;
+    all_tests.push_back(config);
+  }
+  for (int channels = 1; channels <= 4; ++channels) {
+    TestConfig config;
+    config.input.color_space = JCS_UNKNOWN;
+    config.input.components = channels;
+    config.max_bpp = 1.35 * channels;
+    config.max_dist = 1.4;
+    all_tests.push_back(config);
+  }
+  for (size_t r : {1, 3, 17, 1024}) {
+    for (int progr : {0, 2}) {
+      TestConfig config;
+      config.jparams.restart_interval = r;
+      config.jparams.progressive_mode = progr;
+      config.max_bpp = 1.58 + 5.5 / r;
+      config.max_dist = 2.2;
+      all_tests.push_back(config);
+    }
+  }
+  for (size_t rr : {1, 3, 8, 100}) {
+    TestConfig config;
+    config.jparams.restart_in_rows = rr;
+    config.max_bpp = 1.6;
+    config.max_dist = 2.2;
+    all_tests.push_back(config);
+  }
+  for (int type : {0, 1, 10, 100, 10000}) {
+    for (int scale : {1, 50, 100, 200, 500}) {
+      for (bool add_raw : {false, true}) {
+        for (bool baseline : {true, false}) {
+          if (!baseline && (add_raw || type * scale < 25500)) continue;
+          TestConfig config;
+          config.input.xsize = 64;
+          config.input.ysize = 64;
+          CustomQuantTable table;
+          table.table_type = type;
+          table.scale_factor = scale;
+          table.force_baseline = baseline;
+          table.add_raw = add_raw;
+          table.Generate();
+          config.jparams.optimize_coding = 1;
+          config.jparams.quant_tables.push_back(table);
+          config.jparams.quant_indexes = {0, 0, 0};
+          float q = (type == 0 ? 16 : type) * scale * 0.01f;
+          if (baseline && !add_raw) q = std::max(1.0f, std::min(255.0f, q));
+          config.max_bpp = 1.5f + 25.0f / q;
+          config.max_dist = 0.6f + 0.25f * q;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    if (qidx == 3) continue;
+    TestConfig config;
+    config.input.xsize = 256;
+    config.input.ysize = 256;
+    config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                    (qidx >> 0) & 1};
+    config.max_bpp = 2.25;
+    config.max_dist = 2.8;
+    all_tests.push_back(config);
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+      if (qidx == 0 && slot_idx == 0) continue;
+      TestConfig config;
+      config.input.xsize = 256;
+      config.input.ysize = 256;
+      config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                      (qidx >> 0) & 1};
+      CustomQuantTable table;
+      table.slot_idx = slot_idx;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+      config.max_bpp = 2.3;
+      config.max_dist = 2.9;
+      all_tests.push_back(config);
+    }
+  }
+  for (int qidx = 0; qidx < 8; ++qidx) {
+    for (bool xyb : {false, true}) {
+      TestConfig config;
+      config.input.xsize = 256;
+      config.input.ysize = 256;
+      config.jparams.xyb_mode = xyb;
+      config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+                                      (qidx >> 0) & 1};
+      {
+        CustomQuantTable table;
+        table.slot_idx = 0;
+        table.Generate();
+        config.jparams.quant_tables.push_back(table);
+      }
+      {
+        CustomQuantTable table;
+        table.slot_idx = 1;
+        table.table_type = 20;
+        table.Generate();
+        config.jparams.quant_tables.push_back(table);
+      }
+      config.max_bpp = 2.0;
+      config.max_dist = 3.85;
+      all_tests.push_back(config);
+    }
+  }
+  for (bool xyb : {false, true}) {
+    TestConfig config;
+    config.input.xsize = 256;
+    config.input.ysize = 256;
+    config.jparams.xyb_mode = xyb;
+    config.jparams.quant_indexes = {0, 1, 2};
+    {
+      CustomQuantTable table;
+      table.slot_idx = 0;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    {
+      CustomQuantTable table;
+      table.slot_idx = 1;
+      table.table_type = 20;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    {
+      CustomQuantTable table;
+      table.slot_idx = 2;
+      table.table_type = 30;
+      table.Generate();
+      config.jparams.quant_tables.push_back(table);
+    }
+    config.max_bpp = 1.5;
+    config.max_dist = 3.75;
+    all_tests.push_back(config);
+  }
+  {
+    TestConfig config;
+    config.jparams.comp_ids = {7, 17, 177};
+    config.input.xsize = config.input.ysize = 128;
+    config.max_bpp = 2.25;
+    config.max_dist = 2.4;
+    all_tests.push_back(config);
+  }
+  for (int override_JFIF : {-1, 0, 1}) {
+    for (int override_Adobe : {-1, 0, 1}) {
+      if (override_JFIF == -1 && override_Adobe == -1) continue;
+      TestConfig config;
+      config.input.xsize = config.input.ysize = 128;
+      config.jparams.override_JFIF = override_JFIF;
+      config.jparams.override_Adobe = override_Adobe;
+      config.max_bpp = 2.25;
+      config.max_dist = 2.4;
+      all_tests.push_back(config);
+    }
+  }
+  {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.max_bpp = 1.85;
+    config.max_dist = 2.05;
+    config.jparams.add_marker = true;
+    all_tests.push_back(config);
+  }
+  for (size_t icc_size : {728, 70000, 1000000}) {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.max_dist = 2.05;
+    config.jparams.icc.resize(icc_size);
+    for (size_t i = 0; i < icc_size; ++i) {
+      config.jparams.icc[i] = (i * 17) & 0xff;
+    }
+    all_tests.push_back(config);
+  }
+  for (JpegIOMode input_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+    TestConfig config;
+    config.input.xsize = config.input.ysize = 256;
+    config.input_mode = input_mode;
+    if (input_mode == RAW_DATA) {
+      config.input.color_space = JCS_YCbCr;
+    }
+    config.jparams.progressive_mode = 0;
+    config.jparams.optimize_coding = 0;
+    config.max_bpp = 1.85;
+    config.max_dist = 2.05;
+    if (input_mode == COEFFICIENTS) {
+      config.max_bpp = 3.5;
+      config.max_dist = 0.0;
+    }
+    all_tests.push_back(config);
+    config.jparams.use_flat_dc_luma_code = true;
+    all_tests.push_back(config);
+  }
+  for (int xsize : {640, 641, 648, 649}) {
+    for (int ysize : {640, 641, 648, 649}) {
+      for (int h_sampling : {1, 2}) {
+        for (int v_sampling : {1, 2}) {
+          if (h_sampling == 1 && v_sampling == 1) continue;
+          for (int progr : {0, 2}) {
+            TestConfig config;
+            config.input.xsize = xsize;
+            config.input.ysize = ysize;
+            config.input.color_space = JCS_YCbCr;
+            config.jparams.h_sampling = {h_sampling, 1, 1};
+            config.jparams.v_sampling = {v_sampling, 1, 1};
+            config.jparams.progressive_mode = progr;
+            config.input_mode = RAW_DATA;
+            config.max_bpp = 1.75;
+            config.max_dist = 2.0;
+            all_tests.push_back(config);
+            config.input_mode = COEFFICIENTS;
+            if (xsize & 1) {
+              config.jparams.add_marker = true;
+            }
+            config.max_bpp = 24.0;
+            all_tests.push_back(config);
+          }
+        }
+      }
+    }
+  }
+  for (JpegliDataType data_type : {JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+    for (JpegliEndianness endianness :
+         {JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN, JPEGLI_NATIVE_ENDIAN}) {
+      J_COLOR_SPACE colorspace[4] = {JCS_GRAYSCALE, JCS_UNKNOWN, JCS_RGB,
+                                     JCS_CMYK};
+      float max_bpp[4] = {1.32, 2.7, 1.6, 4.0};
+      for (int channels = 1; channels <= 4; ++channels) {
+        TestConfig config;
+        config.input.data_type = data_type;
+        config.input.endianness = endianness;
+        config.input.components = channels;
+        config.input.color_space = colorspace[channels - 1];
+        config.max_bpp = max_bpp[channels - 1];
+        config.max_dist = 2.2;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  for (int smoothing : {1, 5, 50, 100}) {
+    for (int h_sampling : {1, 2}) {
+      for (int v_sampling : {1, 2}) {
+        TestConfig config;
+        config.input.xsize = 257;
+        config.input.ysize = 265;
+        config.jparams.smoothing_factor = smoothing;
+        config.jparams.h_sampling = {h_sampling, 1, 1};
+        config.jparams.v_sampling = {v_sampling, 1, 1};
+        config.max_bpp = 1.85;
+        config.max_dist = 3.05f;
+        all_tests.push_back(config);
+      }
+    }
+  }
+  return all_tests;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  os << c.input;
+  os << c.jparams;
+  if (c.input_mode == RAW_DATA) {
+    os << "RawDataIn";
+  } else if (c.input_mode == COEFFICIENTS) {
+    os << "WriteCoeffs";
+  }
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<EncodeAPITestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(EncodeAPITest, EncodeAPITestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc
new file mode 100644
index 0000000000..955676bdee
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc
@@ -0,0 +1,230 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode_finish.h"
+
+#include <cmath>
+#include <limits>
+
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/quant.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::GetLane;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>;
+
+void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength,
+                     const float* zero_bias_offset,
+                     const float* zero_bias_mul) {
+  D d;
+  DI di;
+  DI16 di16;
+  const auto aq_mul = Set(d, aq_strength);
+  for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+    const auto in = Load(di16, block + k);
+    const auto val = ConvertTo(d, PromoteTo(di, in));
+    const auto q = Load(d, qmc + k);
+    const auto qval = Mul(val, q);
+    const auto zb_offset = Load(d, zero_bias_offset + k);
+    const auto zb_mul = Load(d, zero_bias_mul + k);
+    const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+    const auto nzero_mask = Ge(Abs(qval), threshold);
+    const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
+    Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k);
+  }
+}
+
+float BlockError(const int16_t* block, const float* qmc, const float* iqmc,
+                 const float aq_strength, const float* zero_bias_offset,
+                 const float* zero_bias_mul) {
+  D d;
+  DI di;
+  DI16 di16;
+  auto err = Zero(d);
+  const auto scale = Set(d, 1.0 / 16);
+  const auto aq_mul = Set(d, aq_strength);
+  for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+    const auto in = Load(di16, block + k);
+    const auto val = ConvertTo(d, PromoteTo(di, in));
+    const auto q = Load(d, qmc + k);
+    const auto qval = Mul(val, q);
+    const auto zb_offset = Load(d, zero_bias_offset + k);
+    const auto zb_mul = Load(d, zero_bias_mul + k);
+    const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+    const auto nzero_mask = Ge(Abs(qval), threshold);
+    const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
+    const auto invq = Load(d, iqmc + k);
+    const auto rval = Mul(iqval, invq);
+    const auto diff = Mul(Sub(val, rval), scale);
+    err = Add(err, Mul(diff, diff));
+  }
+  return GetLane(SumOfLanes(d, err));
+}
+
+void ComputeInverseWeights(const float* qmc, float* iqmc) {
+  for (int k = 0; k < 64; ++k) {
+    iqmc[k] = 1.0f / qmc[k];
+  }
+}
+
+float ComputePSNR(j_compress_ptr cinfo, int sampling) {
+  jpeg_comp_master* m = cinfo->master;
+  InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
+  double error = 0.0;
+  size_t num = 0;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    const float* qmc = m->quant_mul[c];
+    const int h_factor = m->h_factor[c];
+    const int v_factor = m->v_factor[c];
+    const float* zero_bias_offset = m->zero_bias_offset[c];
+    const float* zero_bias_mul = m->zero_bias_mul[c];
+    HWY_ALIGN float iqmc[64];
+    ComputeInverseWeights(qmc, iqmc);
+    for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) {
+      JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+      const float* qf = m->quant_field.Row(by * v_factor);
+      for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) {
+        error += BlockError(&ba[0][bx][0], qmc, iqmc, qf[bx * h_factor],
+                            zero_bias_offset, zero_bias_mul);
+        num += DCTSIZE2;
+      }
+    }
+  }
+  return 4.3429448f * log(num / (error / 255. / 255.));
+}
+
+void ReQuantizeCoeffs(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    const float* qmc = m->quant_mul[c];
+    const int h_factor = m->h_factor[c];
+    const int v_factor = m->v_factor[c];
+    const float* zero_bias_offset = m->zero_bias_offset[c];
+    const float* zero_bias_mul = m->zero_bias_mul[c];
+    for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+      JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+      const float* qf = m->quant_field.Row(by * v_factor);
+      for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+        ReQuantizeBlock(&ba[0][bx][0], qmc, qf[bx * h_factor], zero_bias_offset,
+                        zero_bias_mul);
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+HWY_EXPORT(ComputePSNR);
+HWY_EXPORT(ReQuantizeCoeffs);
+
+void ReQuantizeCoeffs(j_compress_ptr cinfo) {
+  HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo);
+}
+
+float ComputePSNR(j_compress_ptr cinfo, int sampling) {
+  return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling);
+}
+
+void UpdateDistance(j_compress_ptr cinfo, float distance) {
+  float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+  SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
+}
+
+float Clamp(float val, float minval, float maxval) {
+  return std::max(minval, std::min(maxval, val));
+}
+
+#define PSNR_SEARCH_DBG 0
+
+float FindDistanceForPSNR(j_compress_ptr cinfo) {
+  constexpr int kMaxIters = 20;
+  const float psnr_target = cinfo->master->psnr_target;
+  const float tolerance = cinfo->master->psnr_tolerance;
+  const float min_dist = cinfo->master->min_distance;
+  const float max_dist = cinfo->master->max_distance;
+  float d = Clamp(1.0f, min_dist, max_dist);
+  for (int sampling : {4, 1}) {
+    float best_diff = std::numeric_limits<float>::max();
+    float best_distance = 0.0f;
+    float best_psnr = 0.0;
+    float dmin = min_dist;
+    float dmax = max_dist;
+    bool found_lower_bound = false;
+    bool found_upper_bound = false;
+    for (int i = 0; i < kMaxIters; ++i) {
+      UpdateDistance(cinfo, d);
+      float psnr = ComputePSNR(cinfo, sampling);
+      if (psnr > psnr_target) {
+        dmin = d;
+        found_lower_bound = true;
+      } else {
+        dmax = d;
+        found_upper_bound = true;
+      }
+#if (PSNR_SEARCH_DBG > 1)
+      printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr);
+      if (found_upper_bound && found_lower_bound) {
+        printf("    d-interval: [ %7.4f .. %7.4f ]", dmin, dmax);
+      }
+      printf("\n");
+#endif
+      float diff = std::abs(psnr - psnr_target);
+      if (diff < best_diff) {
+        best_diff = diff;
+        best_distance = d;
+        best_psnr = psnr;
+      }
+      if (diff < tolerance * psnr_target || dmin == dmax) {
+        break;
+      }
+      if (!found_lower_bound || !found_upper_bound) {
+        d *= std::exp(0.15f * (psnr - psnr_target));
+      } else {
+        d = 0.5f * (dmin + dmax);
+      }
+      d = Clamp(d, min_dist, max_dist);
+    }
+    d = best_distance;
+    if (sampling == 1 && PSNR_SEARCH_DBG) {
+      printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d);
+    }
+  }
+  return d;
+}
+
+}  // namespace
+
+void QuantizetoPSNR(j_compress_ptr cinfo) {
+  float distance = FindDistanceForPSNR(cinfo);
+  UpdateDistance(cinfo, distance);
+  ReQuantizeCoeffs(cinfo);
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h
new file mode 100644
index 0000000000..f6862decb9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_FINISH_H_
+#define LIB_JPEGLI_ENCODE_FINISH_H_
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void QuantizetoPSNR(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_ENCODE_FINISH_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h b/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h
new file mode 100644
index 0000000000..4dbef97538
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_INTERNAL_H_
+#define LIB_JPEGLI_ENCODE_INTERNAL_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/encode.h"
+
+namespace jpegli {
+
+constexpr unsigned char kICCSignature[12] = {
+    0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+constexpr int kDefaultProgressiveLevel = 0;
+
+typedef int16_t coeff_t;
+
+struct HuffmanCodeTable {
+  int depth[256];
+  int code[256];
+};
+
+struct Token {
+  uint8_t context;
+  uint8_t symbol;
+  uint16_t bits;
+  Token(int c, int s, int b) : context(c), symbol(s), bits(b) {}
+};
+
+struct TokenArray {
+  Token* tokens;
+  size_t num_tokens;
+};
+
+struct RefToken {
+  uint8_t symbol;
+  uint8_t refbits;
+};
+
+struct ScanTokenInfo {
+  RefToken* tokens;
+  size_t num_tokens;
+  uint8_t* refbits;
+  uint16_t* eobruns;
+  size_t* restarts;
+  size_t num_restarts;
+  size_t num_nonzeros;
+  size_t num_future_nonzeros;
+  size_t token_offset;
+  size_t restart_interval;
+  size_t MCUs_per_row;
+  size_t MCU_rows_in_scan;
+  size_t blocks_in_MCU;
+  size_t num_blocks;
+};
+
+}  // namespace jpegli
+
+struct jpeg_comp_master {
+  jpegli::RowBuffer<float> input_buffer[jpegli::kMaxComponents];
+  jpegli::RowBuffer<float>* smooth_input[jpegli::kMaxComponents];
+  jpegli::RowBuffer<float>* raw_data[jpegli::kMaxComponents];
+  bool force_baseline;
+  bool xyb_mode;
+  uint8_t cicp_transfer_function;
+  bool use_std_tables;
+  bool use_adaptive_quantization;
+  int progressive_level;
+  size_t xsize_blocks;
+  size_t ysize_blocks;
+  size_t blocks_per_iMCU_row;
+  jpegli::ScanTokenInfo* scan_token_info;
+  JpegliDataType data_type;
+  JpegliEndianness endianness;
+  void (*input_method)(const uint8_t* row_in, size_t len,
+                       float* row_out[jpegli::kMaxComponents]);
+  void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+  void (*downsample_method[jpegli::kMaxComponents])(
+      float* rows_in[MAX_SAMP_FACTOR], size_t len, float* row_out);
+  float* quant_mul[jpegli::kMaxComponents];
+  float* zero_bias_offset[jpegli::kMaxComponents];
+  float* zero_bias_mul[jpegli::kMaxComponents];
+  int h_factor[jpegli::kMaxComponents];
+  int v_factor[jpegli::kMaxComponents];
+  // Array of Huffman tables that will be encoded in one or more DHT segments.
+  // In progressive mode we compute all Huffman tables that will be used in any
+  // of the scans, thus we can have more than 4 tables here.
+  JHUFF_TBL* huffman_tables;
+  size_t num_huffman_tables;
+  // Array of num_huffman_tables slot ids, where the ith element is the slot id
+  // of the ith Huffman table, as it appears in the DHT segment. The range of
+  // the slot ids is 0..3 for DC and 16..19 for AC Huffman codes.
+  uint8_t* slot_id_map;
+  // Maps context ids to an index in the huffman_tables array. Each component in
+  // each scan has a DC and AC context id, which are defined as follows:
+  //   - DC context id is the component index (relative to cinfo->comp_info) of
+  //     the scan component
+  //   - AC context ids start at 4 and are increased for each component of each
+  //     scan that have AC components (i.e. Se > 0)
+  uint8_t* context_map;
+  size_t num_contexts;
+  // Array of cinfo->num_scans context ids, where the ith element is the context
+  // id of the first AC component of the ith scan.
+  uint8_t* ac_ctx_offset;
+  // Array of num_huffman tables derived coding tables.
+  jpegli::HuffmanCodeTable* coding_tables;
+  float* diff_buffer;
+  jpegli::RowBuffer<float> fuzzy_erosion_tmp;
+  jpegli::RowBuffer<float> pre_erosion;
+  jpegli::RowBuffer<float> quant_field;
+  jvirt_barray_ptr* coeff_buffers;
+  size_t next_input_row;
+  size_t next_iMCU_row;
+  size_t next_dht_index;
+  size_t last_restart_interval;
+  JCOEF last_dc_coeff[MAX_COMPS_IN_SCAN];
+  jpegli::JpegBitWriter bw;
+  float* dct_buffer;
+  int32_t* block_tmp;
+  jpegli::TokenArray* token_arrays;
+  size_t cur_token_array;
+  jpegli::Token* next_token;
+  size_t num_tokens;
+  size_t total_num_tokens;
+  jpegli::RefToken* next_refinement_token;
+  uint8_t* next_refinement_bit;
+  float psnr_target;
+  float psnr_tolerance;
+  float min_distance;
+  float max_distance;
+};
+
+#endif  // LIB_JPEGLI_ENCODE_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc
new file mode 100644
index 0000000000..89dbd813f8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc
@@ -0,0 +1,259 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode_streaming.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/bitstream.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+#include "lib/jpegli/entropy_coding-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+static const int kStreamingModeCoefficients = 0;
+static const int kStreamingModeTokens = 1;
+static const int kStreamingModeBits = 2;
+
+namespace {
+void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
+  // TODO(szabadka) SIMDify this.
+  int32_t tmp[DCTSIZE2];
+  tmp[0] = block[0];
+  tmp[1] = block[1];
+  tmp[2] = block[8];
+  tmp[3] = block[16];
+  tmp[4] = block[9];
+  tmp[5] = block[2];
+  tmp[6] = block[3];
+  tmp[7] = block[10];
+  tmp[8] = block[17];
+  tmp[9] = block[24];
+  tmp[10] = block[32];
+  tmp[11] = block[25];
+  tmp[12] = block[18];
+  tmp[13] = block[11];
+  tmp[14] = block[4];
+  tmp[15] = block[5];
+  tmp[16] = block[12];
+  tmp[17] = block[19];
+  tmp[18] = block[26];
+  tmp[19] = block[33];
+  tmp[20] = block[40];
+  tmp[21] = block[48];
+  tmp[22] = block[41];
+  tmp[23] = block[34];
+  tmp[24] = block[27];
+  tmp[25] = block[20];
+  tmp[26] = block[13];
+  tmp[27] = block[6];
+  tmp[28] = block[7];
+  tmp[29] = block[14];
+  tmp[30] = block[21];
+  tmp[31] = block[28];
+  tmp[32] = block[35];
+  tmp[33] = block[42];
+  tmp[34] = block[49];
+  tmp[35] = block[56];
+  tmp[36] = block[57];
+  tmp[37] = block[50];
+  tmp[38] = block[43];
+  tmp[39] = block[36];
+  tmp[40] = block[29];
+  tmp[41] = block[22];
+  tmp[42] = block[15];
+  tmp[43] = block[23];
+  tmp[44] = block[30];
+  tmp[45] = block[37];
+  tmp[46] = block[44];
+  tmp[47] = block[51];
+  tmp[48] = block[58];
+  tmp[49] = block[59];
+  tmp[50] = block[52];
+  tmp[51] = block[45];
+  tmp[52] = block[38];
+  tmp[53] = block[31];
+  tmp[54] = block[39];
+  tmp[55] = block[46];
+  tmp[56] = block[53];
+  tmp[57] = block[60];
+  tmp[58] = block[61];
+  tmp[59] = block[54];
+  tmp[60] = block[47];
+  tmp[61] = block[55];
+  tmp[62] = block[62];
+  tmp[63] = block[63];
+  memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
+}
+}  // namespace
+
+template <int kMode>
+void ProcessiMCURow(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  JpegBitWriter* bw = &m->bw;
+  int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+  int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
+  int mcu_y = m->next_iMCU_row;
+  int32_t* block = m->block_tmp;
+  int32_t* symbols = m->block_tmp + DCTSIZE2;
+  int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
+  coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
+  bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0;
+  JBLOCKARRAY ba[kMaxComponents];
+  if (kMode == kStreamingModeCoefficients) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      jpeg_component_info* comp = &cinfo->comp_info[c];
+      int by0 = mcu_y * comp->v_samp_factor;
+      int block_rows_left = comp->height_in_blocks - by0;
+      int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+      ba[c] = (*cinfo->mem->access_virt_barray)(
+          reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
+          max_block_rows, true);
+    }
+  }
+  if (kMode == kStreamingModeTokens) {
+    TokenArray* ta = &m->token_arrays[m->cur_token_array];
+    int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+    if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
+      if (ta->tokens) {
+        m->total_num_tokens += ta->num_tokens;
+        ++m->cur_token_array;
+        ta = &m->token_arrays[m->cur_token_array];
+      }
+      m->num_tokens =
+          EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens,
+                            max_tokens_per_mcu_row);
+      ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+      m->next_token = ta->tokens;
+    }
+  }
+  const float* imcu_start[kMaxComponents];
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
+  }
+  const float* qf = nullptr;
+  if (adaptive_quant) {
+    qf = m->quant_field.Row(0);
+  }
+  HuffmanCodeTable* dc_code = nullptr;
+  HuffmanCodeTable* ac_code = nullptr;
+  const size_t qf_stride = m->quant_field.stride();
+  for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      jpeg_component_info* comp = &cinfo->comp_info[c];
+      if (kMode == kStreamingModeBits) {
+        dc_code = &m->coding_tables[m->context_map[c]];
+        ac_code = &m->coding_tables[m->context_map[c + 4]];
+      }
+      float* JXL_RESTRICT qmc = m->quant_mul[c];
+      const size_t stride = m->raw_data[c]->stride();
+      const int h_factor = m->h_factor[c];
+      const float* zero_bias_offset = m->zero_bias_offset[c];
+      const float* zero_bias_mul = m->zero_bias_mul[c];
+      float aq_strength = 0.0f;
+      for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+        for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+          size_t by = mcu_y * comp->v_samp_factor + iy;
+          size_t bx = mcu_x * comp->h_samp_factor + ix;
+          if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
+            if (kMode == kStreamingModeTokens) {
+              *m->next_token++ = Token(c, 0, 0);
+              *m->next_token++ = Token(c + 4, 0, 0);
+            } else if (kMode == kStreamingModeBits) {
+              WriteBits(bw, dc_code->depth[0], dc_code->code[0]);
+              WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
+            }
+            continue;
+          }
+          if (adaptive_quant) {
+            aq_strength = qf[iy * qf_stride + bx * h_factor];
+          }
+          const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
+          ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c],
+                                  aq_strength, zero_bias_offset, zero_bias_mul,
+                                  m->dct_buffer, block);
+          if (kMode == kStreamingModeCoefficients) {
+            JCOEF* cblock = &ba[c][iy][bx][0];
+            for (int k = 0; k < DCTSIZE2; ++k) {
+              cblock[k] = block[kJPEGNaturalOrder[k]];
+            }
+          }
+          block[0] -= last_dc_coeff[c];
+          last_dc_coeff[c] += block[0];
+          if (kMode == kStreamingModeTokens) {
+            ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4,
+                                                  &m->next_token);
+          } else if (kMode == kStreamingModeBits) {
+            ZigZagShuffle(block);
+            const int num_nonzeros = CompactBlock(block, nonzero_idx);
+            const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008;
+            ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
+            WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code,
+                       bw);
+          }
+        }
+      }
+    }
+  }
+  if (kMode == kStreamingModeTokens) {
+    TokenArray* ta = &m->token_arrays[m->cur_token_array];
+    ta->num_tokens = m->next_token - ta->tokens;
+    ScanTokenInfo* sti = &m->scan_token_info[0];
+    sti->num_tokens = m->total_num_tokens + ta->num_tokens;
+    sti->restarts[0] = sti->num_tokens;
+  }
+}
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
+  ProcessiMCURow<kStreamingModeCoefficients>(cinfo);
+}
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
+  ProcessiMCURow<kStreamingModeTokens>(cinfo);
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+  ProcessiMCURow<kStreamingModeBits>(cinfo);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+HWY_EXPORT(ComputeCoefficientsForiMCURow);
+HWY_EXPORT(ComputeTokensForiMCURow);
+HWY_EXPORT(WriteiMCURow);
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
+  HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo);
+}
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
+  HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo);
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+  HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h
new file mode 100644
index 0000000000..69acff4eaf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_STREAMING_H_
+#define LIB_JPEGLI_ENCODE_STREAMING_H_
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo);
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo);
+
+void WriteiMCURow(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_ENCODE_STREAMING_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h
new file mode 100644
index 0000000000..bfb436d795
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h
@@ -0,0 +1,213 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_ENTROPY_CODING_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#undef LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#else
+#define LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#endif
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Compress;
+using hwy::HWY_NAMESPACE::CountTrue;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Not;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Shl;
+using hwy::HWY_NAMESPACE::Sub;
+
+using DI = HWY_FULL(int32_t);
+constexpr DI di;
+
+template <typename DI, class V>
+JXL_INLINE V NumBits(DI di, const V x) {
+  // TODO(szabadka) Add faster implementations for some specific architectures.
+  const auto b1 = And(x, Set(di, 1));
+  const auto b2 = And(x, Set(di, 2));
+  const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1));
+  const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4));
+  const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11));
+  const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26));
+  const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57));
+  const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120));
+  const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247));
+  const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502));
+  const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013));
+  const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036));
+  return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))),
+             Max(Max(b9, b10), Max(b11, b12)));
+}
+
+// Coefficient indexes pre-multiplied by 16 for the symbol calculation.
+HWY_ALIGN constexpr int32_t kIndexes[64] = {
+    0,   16,  32,  48,  64,  80,  96,  112, 128, 144, 160, 176,  192,
+    208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384,  400,
+    416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592,  608,
+    624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800,  816,
+    832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008,
+};
+
+JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block,
+                            int32_t* JXL_RESTRICT nonzero_idx) {
+  const auto zero = Zero(di);
+  HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1};
+  const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes));
+  int num_nonzeros = 0;
+  int k = 0;
+  {
+    const auto coef = Load(di, block);
+    const auto idx = Load(di, kIndexes);
+    const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero)));
+    const auto nzero_coef = Compress(coef, nonzero_mask);
+    const auto nzero_idx = Compress(idx, nonzero_mask);
+    StoreU(nzero_coef, di, &block[num_nonzeros]);
+    StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+    num_nonzeros += CountTrue(di, nonzero_mask);
+    k += Lanes(di);
+  }
+  for (; k < DCTSIZE2; k += Lanes(di)) {
+    const auto coef = Load(di, &block[k]);
+    const auto idx = Load(di, &kIndexes[k]);
+    const auto nonzero_mask = Not(Eq(coef, zero));
+    const auto nzero_coef = Compress(coef, nonzero_mask);
+    const auto nzero_idx = Compress(idx, nonzero_mask);
+    StoreU(nzero_coef, di, &block[num_nonzeros]);
+    StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+    num_nonzeros += CountTrue(di, nonzero_mask);
+  }
+  return num_nonzeros;
+}
+
+JXL_INLINE void ComputeSymbols(const int num_nonzeros,
+                               int32_t* JXL_RESTRICT nonzero_idx,
+                               int32_t* JXL_RESTRICT block,
+                               int32_t* JXL_RESTRICT symbols) {
+  nonzero_idx[-1] = -16;
+  const auto one = Set(di, 1);
+  const auto offset = Set(di, 16);
+  for (int i = 0; i < num_nonzeros; i += Lanes(di)) {
+    const auto idx = Load(di, &nonzero_idx[i]);
+    const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]);
+    const auto coeff = Load(di, &block[i]);
+    const auto nbits = NumBits(di, Abs(coeff));
+    const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff);
+    const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one));
+    const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset));
+    Store(symbol, di, symbols + i);
+    Store(bits, di, block + i);
+  }
+}
+
+template <typename T>
+int NumNonZero8x8ExceptDC(const T* block) {
+  const HWY_CAPPED(T, 8) di;
+
+  const auto zero = Zero(di);
+  // Add FFFF for every zero coefficient, negate to get #zeros.
+  auto neg_sum_zero = zero;
+  {
+    // First row has DC, so mask
+    const size_t y = 0;
+    HWY_ALIGN const T dc_mask_lanes[8] = {-1};
+
+    for (size_t x = 0; x < 8; x += Lanes(di)) {
+      const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+      // DC counts as zero so we don't include it in nzeros.
+      const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x]));
+
+      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+    }
+  }
+  // Remaining rows: no mask
+  for (size_t y = 1; y < 8; y++) {
+    for (size_t x = 0; x < 8; x += Lanes(di)) {
+      const auto coef = Load(di, &block[y * 8 + x]);
+      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+    }
+  }
+
+  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+  return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero));
+}
+
+template <typename T, bool zig_zag_order>
+void ComputeTokensForBlock(const T* block, int last_dc, int dc_ctx, int ac_ctx,
+                           Token** tokens_ptr) {
+  Token* next_token = *tokens_ptr;
+  coeff_t temp2;
+  coeff_t temp;
+  temp = block[0] - last_dc;
+  if (temp == 0) {
+    *next_token++ = Token(dc_ctx, 0, 0);
+  } else {
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;
+      temp2--;
+    }
+    int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+    int dc_mask = (1 << dc_nbits) - 1;
+    *next_token++ = Token(dc_ctx, dc_nbits, temp2 & dc_mask);
+  }
+  int num_nonzeros = NumNonZero8x8ExceptDC(block);
+  for (int k = 1; k < 64; ++k) {
+    if (num_nonzeros == 0) {
+      *next_token++ = Token(ac_ctx, 0, 0);
+      break;
+    }
+    int r = 0;
+    if (zig_zag_order) {
+      while ((temp = block[k]) == 0) {
+        r++;
+        k++;
+      }
+    } else {
+      while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+        r++;
+        k++;
+      }
+    }
+    --num_nonzeros;
+    if (temp < 0) {
+      temp = -temp;
+      temp2 = ~temp;
+    } else {
+      temp2 = temp;
+    }
+    while (r > 15) {
+      *next_token++ = Token(ac_ctx, 0xf0, 0);
+      r -= 16;
+    }
+    int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+    int ac_mask = (1 << ac_nbits) - 1;
+    int symbol = (r << 4u) + ac_nbits;
+    *next_token++ = Token(ac_ctx, symbol, temp2 & ac_mask);
+  }
+  *tokens_ptr = next_token;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JPEGLI_ENTROPY_CODING_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc
new file mode 100644
index 0000000000..149768fd30
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc
@@ -0,0 +1,837 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/entropy_coding.h"
+
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/entropy_coding-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx,
+                             int ac_ctx, Token** tokens_ptr) {
+  ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx,
+                                       tokens_ptr);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
+  int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+  size_t blocks_per_mcu = 0;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
+  }
+  return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
+}
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+                         size_t num_tokens, size_t max_per_row) {
+  size_t estimate;
+  if (mcu_y == 0) {
+    estimate = 16 * max_per_row;
+  } else {
+    estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
+  }
+  size_t mcus_left = ysize_mcus - mcu_y;
+  return std::min(mcus_left * max_per_row,
+                  std::max(max_per_row, estimate - num_tokens));
+}
+
+namespace {
+HWY_EXPORT(ComputeTokensSequential);
+
+void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al,
+                           coeff_t* last_dc_coeff, Token** next_token) {
+  coeff_t temp2;
+  coeff_t temp;
+  temp2 = coeffs[0] >> Al;
+  temp = temp2 - *last_dc_coeff;
+  *last_dc_coeff = temp2;
+  temp2 = temp;
+  if (temp < 0) {
+    temp = -temp;
+    temp2--;
+  }
+  int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+  int bits = temp2 & ((1 << nbits) - 1);
+  *(*next_token)++ = Token(context, nbits, bits);
+}
+
+void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index,
+                               int context, ScanTokenInfo* sti) {
+  jpeg_comp_master* m = cinfo->master;
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  const int comp_idx = scan_info->component_index[0];
+  const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+  const int Al = scan_info->Al;
+  const int Ss = scan_info->Ss;
+  const int Se = scan_info->Se;
+  const size_t restart_interval = sti->restart_interval;
+  int restarts_to_go = restart_interval;
+  size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+  size_t num_restarts =
+      restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+  size_t restart_idx = 0;
+  int eob_run = 0;
+  TokenArray* ta = &m->token_arrays[m->cur_token_array];
+  sti->token_offset = m->total_num_tokens + ta->num_tokens;
+  sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+  for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+    JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+        reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+        1, false);
+    // Each coefficient can appear in at most one token, but we have to reserve
+    // one extra EOBrun token that was rolled over from the previous block-row
+    // and has to be flushed at the end.
+    int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1);
+    if (ta->num_tokens + max_tokens_per_row > m->num_tokens) {
+      if (ta->tokens) {
+        m->total_num_tokens += ta->num_tokens;
+        ++m->cur_token_array;
+        ta = &m->token_arrays[m->cur_token_array];
+      }
+      m->num_tokens =
+          EstimateNumTokens(cinfo, by, comp->height_in_blocks,
+                            m->total_num_tokens, max_tokens_per_row);
+      ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+      m->next_token = ta->tokens;
+    }
+    for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+      if (restart_interval > 0 && restarts_to_go == 0) {
+        if (eob_run > 0) {
+          int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+          int symbol = nbits << 4u;
+          *m->next_token++ =
+              Token(context, symbol, eob_run & ((1 << nbits) - 1));
+          eob_run = 0;
+        }
+        ta->num_tokens = m->next_token - ta->tokens;
+        sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+        restarts_to_go = restart_interval;
+      }
+      const coeff_t* block = &ba[0][bx][0];
+      coeff_t temp2;
+      coeff_t temp;
+      int r = 0;
+      int num_nzeros = 0;
+      int num_future_nzeros = 0;
+      for (int k = Ss; k <= Se; ++k) {
+        if ((temp = block[k]) == 0) {
+          r++;
+          continue;
+        }
+        if (temp < 0) {
+          temp = -temp;
+          temp >>= Al;
+          temp2 = ~temp;
+        } else {
+          temp >>= Al;
+          temp2 = temp;
+        }
+        if (temp == 0) {
+          r++;
+          num_future_nzeros++;
+          continue;
+        }
+        if (eob_run > 0) {
+          int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+          int symbol = nbits << 4u;
+          *m->next_token++ =
+              Token(context, symbol, eob_run & ((1 << nbits) - 1));
+          eob_run = 0;
+        }
+        while (r > 15) {
+          *m->next_token++ = Token(context, 0xf0, 0);
+          r -= 16;
+        }
+        int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+        int symbol = (r << 4u) + nbits;
+        *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1));
+        ++num_nzeros;
+        r = 0;
+      }
+      if (r > 0) {
+        ++eob_run;
+        if (eob_run == 0x7FFF) {
+          int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+          int symbol = nbits << 4u;
+          *m->next_token++ =
+              Token(context, symbol, eob_run & ((1 << nbits) - 1));
+          eob_run = 0;
+        }
+      }
+      sti->num_nonzeros += num_nzeros;
+      sti->num_future_nonzeros += num_future_nzeros;
+      --restarts_to_go;
+    }
+    ta->num_tokens = m->next_token - ta->tokens;
+  }
+  if (eob_run > 0) {
+    int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+    int symbol = nbits << 4u;
+    *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1));
+    ++ta->num_tokens;
+    eob_run = 0;
+  }
+  sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset;
+  sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+}
+
+void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index,
+                              ScanTokenInfo* sti) {
+  jpeg_comp_master* m = cinfo->master;
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  const int comp_idx = scan_info->component_index[0];
+  const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+  const int Al = scan_info->Al;
+  const int Ss = scan_info->Ss;
+  const int Se = scan_info->Se;
+  const size_t restart_interval = sti->restart_interval;
+  int restarts_to_go = restart_interval;
+  RefToken token;
+  int eob_run = 0;
+  int eob_refbits = 0;
+  size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+  size_t num_restarts =
+      restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+  sti->tokens = m->next_refinement_token;
+  sti->refbits = m->next_refinement_bit;
+  sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE);
+  sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+  RefToken* next_token = sti->tokens;
+  RefToken* next_eob_token = next_token;
+  uint8_t* next_ref_bit = sti->refbits;
+  uint16_t* next_eobrun = sti->eobruns;
+  size_t restart_idx = 0;
+  for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+    JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+        reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+        1, false);
+    for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+      if (restart_interval > 0 && restarts_to_go == 0) {
+        sti->restarts[restart_idx++] = next_token - sti->tokens;
+        restarts_to_go = restart_interval;
+        next_eob_token = next_token;
+        eob_run = eob_refbits = 0;
+      }
+      const coeff_t* block = &ba[0][bx][0];
+      int num_eob_refinement_bits = 0;
+      int num_refinement_bits = 0;
+      int num_nzeros = 0;
+      int r = 0;
+      for (int k = Ss; k <= Se; ++k) {
+        int absval = block[k];
+        if (absval == 0) {
+          r++;
+          continue;
+        }
+        const int mask = absval >> (8 * sizeof(int) - 1);
+        absval += mask;
+        absval ^= mask;
+        absval >>= Al;
+        if (absval == 0) {
+          r++;
+          continue;
+        }
+        while (r > 15) {
+          token.symbol = 0xf0;
+          token.refbits = num_refinement_bits;
+          *next_token++ = token;
+          r -= 16;
+          num_eob_refinement_bits += num_refinement_bits;
+          num_refinement_bits = 0;
+        }
+        if (absval > 1) {
+          *next_ref_bit++ = absval & 1u;
+          ++num_refinement_bits;
+          continue;
+        }
+        int symbol = (r << 4u) + 1 + ((mask + 1) << 1);
+        token.symbol = symbol;
+        token.refbits = num_refinement_bits;
+        *next_token++ = token;
+        ++num_nzeros;
+        num_refinement_bits = 0;
+        num_eob_refinement_bits = 0;
+        r = 0;
+        next_eob_token = next_token;
+        eob_run = eob_refbits = 0;
+      }
+      if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) {
+        ++eob_run;
+        eob_refbits += num_eob_refinement_bits + num_refinement_bits;
+        if (eob_refbits > 255) {
+          ++next_eob_token;
+          eob_refbits = num_eob_refinement_bits + num_refinement_bits;
+          eob_run = 1;
+        }
+        next_token = next_eob_token;
+        next_token->refbits = eob_refbits;
+        if (eob_run == 1) {
+          next_token->symbol = 0;
+        } else if (eob_run == 2) {
+          next_token->symbol = 16;
+          *next_eobrun++ = 0;
+        } else if ((eob_run & (eob_run - 1)) == 0) {
+          next_token->symbol += 16;
+          next_eobrun[-1] = 0;
+        } else {
+          ++next_eobrun[-1];
+        }
+        ++next_token;
+        if (eob_run == 0x7fff) {
+          next_eob_token = next_token;
+          eob_run = eob_refbits = 0;
+        }
+      }
+      sti->num_nonzeros += num_nzeros;
+      --restarts_to_go;
+    }
+  }
+  sti->num_tokens = next_token - sti->tokens;
+  sti->restarts[restart_idx++] = sti->num_tokens;
+  m->next_refinement_token = next_token;
+  m->next_refinement_bit = next_ref_bit;
+}
+
+void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset,
+                  ScanTokenInfo* sti) {
+  const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+  if (scan_info->Ss > 0) {
+    if (scan_info->Ah == 0) {
+      TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti);
+    } else {
+      TokenizeACRefinementScan(cinfo, scan_index, sti);
+    }
+    return;
+  }
+
+  jpeg_comp_master* m = cinfo->master;
+  size_t restart_interval = sti->restart_interval;
+  int restarts_to_go = restart_interval;
+  coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+
+  // "Non-interleaved" means color data comes in separate scans, in other words
+  // each scan can contain only one color component.
+  const bool is_interleaved = (scan_info->comps_in_scan > 1);
+  const bool is_progressive = cinfo->progressive_mode;
+  const int Ah = scan_info->Ah;
+  const int Al = scan_info->Al;
+  HWY_ALIGN constexpr coeff_t kDummyBlock[DCTSIZE2] = {0};
+
+  size_t restart_idx = 0;
+  TokenArray* ta = &m->token_arrays[m->cur_token_array];
+  sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens;
+
+  if (Ah > 0) {
+    sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE);
+  } else if (cinfo->progressive_mode) {
+    if (ta->num_tokens + sti->num_blocks > m->num_tokens) {
+      if (ta->tokens) {
+        m->total_num_tokens += ta->num_tokens;
+        ++m->cur_token_array;
+        ta = &m->token_arrays[m->cur_token_array];
+      }
+      m->num_tokens = sti->num_blocks;
+      ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+      m->next_token = ta->tokens;
+    }
+  }
+
+  JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+  size_t block_idx = 0;
+  for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) {
+    for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+      int comp_idx = scan_info->component_index[i];
+      jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+      int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+      int by0 = mcu_y * n_blocks_y;
+      int block_rows_left = comp->height_in_blocks - by0;
+      int max_block_rows = std::min(n_blocks_y, block_rows_left);
+      ba[i] = (*cinfo->mem->access_virt_barray)(
+          reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+          by0, max_block_rows, false);
+    }
+    if (!cinfo->progressive_mode) {
+      int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+      if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
+        if (ta->tokens) {
+          m->total_num_tokens += ta->num_tokens;
+          ++m->cur_token_array;
+          ta = &m->token_arrays[m->cur_token_array];
+        }
+        m->num_tokens =
+            EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan,
+                              m->total_num_tokens, max_tokens_per_mcu_row);
+        ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+        m->next_token = ta->tokens;
+      }
+    }
+    for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) {
+      // Possibly emit a restart marker.
+      if (restart_interval > 0 && restarts_to_go == 0) {
+        restarts_to_go = restart_interval;
+        memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+        ta->num_tokens = m->next_token - ta->tokens;
+        sti->restarts[restart_idx++] =
+            Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens;
+      }
+      // Encode one MCU
+      for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+        int comp_idx = scan_info->component_index[i];
+        jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+        int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+        int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+        for (int iy = 0; iy < n_blocks_y; ++iy) {
+          for (int ix = 0; ix < n_blocks_x; ++ix) {
+            size_t block_y = mcu_y * n_blocks_y + iy;
+            size_t block_x = mcu_x * n_blocks_x + ix;
+            const coeff_t* block;
+            if (block_x >= comp->width_in_blocks ||
+                block_y >= comp->height_in_blocks) {
+              block = kDummyBlock;
+            } else {
+              block = &ba[i][iy][block_x][0];
+            }
+            if (!is_progressive) {
+              HWY_DYNAMIC_DISPATCH(ComputeTokensSequential)
+              (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i,
+               &m->next_token);
+              last_dc_coeff[i] = block[0];
+            } else {
+              if (Ah == 0) {
+                TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i,
+                                      &m->next_token);
+              } else {
+                sti->refbits[block_idx] = (block[0] >> Al) & 1;
+              }
+            }
+            ++block_idx;
+          }
+        }
+      }
+      --restarts_to_go;
+    }
+    ta->num_tokens = m->next_token - ta->tokens;
+  }
+  JXL_DASSERT(block_idx == sti->num_blocks);
+  sti->num_tokens =
+      Ah > 0 ? sti->num_blocks
+             : m->total_num_tokens + ta->num_tokens - sti->token_offset;
+  sti->restarts[restart_idx++] =
+      Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens;
+  if (Ah == 0 && cinfo->progressive_mode) {
+    JXL_DASSERT(sti->num_blocks == sti->num_tokens);
+  }
+}
+
+}  // namespace
+
+void TokenizeJpeg(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  std::vector<int> processed(cinfo->num_scans);
+  size_t max_refinement_tokens = 0;
+  size_t num_refinement_bits = 0;
+  int num_refinement_scans[DCTSIZE2] = {};
+  int max_num_refinement_scans = 0;
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    const jpeg_scan_info* si = &cinfo->scan_info[i];
+    ScanTokenInfo* sti = &m->scan_token_info[i];
+    if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) {
+      int offset = m->ac_ctx_offset[i];
+      TokenizeScan(cinfo, i, offset, sti);
+      processed[i] = 1;
+      max_refinement_tokens += sti->num_future_nonzeros;
+      for (int k = si->Ss; k <= si->Se; ++k) {
+        num_refinement_scans[k] = si->Al;
+      }
+      max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al);
+      num_refinement_bits += sti->num_nonzeros;
+    }
+    if (si->Ss > 0 && si->Ah > 0) {
+      int comp_idx = si->component_index[0];
+      const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+      size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks;
+      max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks;
+    }
+  }
+  if (max_refinement_tokens > 0) {
+    m->next_refinement_token =
+        Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE);
+  }
+  for (int j = 0; j < max_num_refinement_scans; ++j) {
+    uint8_t* refinement_bits =
+        Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE);
+    m->next_refinement_bit = refinement_bits;
+    size_t new_refinement_bits = 0;
+    for (int i = 0; i < cinfo->num_scans; ++i) {
+      const jpeg_scan_info* si = &cinfo->scan_info[i];
+      ScanTokenInfo* sti = &m->scan_token_info[i];
+      if (si->Ss > 0 && si->Ah > 0 &&
+          si->Ah == num_refinement_scans[si->Ss] - j) {
+        int offset = m->ac_ctx_offset[i];
+        TokenizeScan(cinfo, i, offset, sti);
+        processed[i] = 1;
+        new_refinement_bits += sti->num_nonzeros;
+      }
+    }
+    JXL_DASSERT(m->next_refinement_bit ==
+                refinement_bits + num_refinement_bits);
+    num_refinement_bits += new_refinement_bits;
+  }
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    if (processed[i]) {
+      continue;
+    }
+    int offset = m->ac_ctx_offset[i];
+    TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]);
+    processed[i] = 1;
+  }
+}
+
+namespace {
+
+struct Histogram {
+  int count[kJpegHuffmanAlphabetSize];
+  Histogram() { memset(count, 0, sizeof(count)); }
+};
+
+void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) {
+  jpeg_comp_master* m = cinfo->master;
+  size_t num_token_arrays = m->cur_token_array + 1;
+  for (size_t i = 0; i < num_token_arrays; ++i) {
+    Token* tokens = m->token_arrays[i].tokens;
+    size_t num_tokens = m->token_arrays[i].num_tokens;
+    for (size_t j = 0; j < num_tokens; ++j) {
+      Token t = tokens[j];
+      ++histograms[t.context].count[t.symbol];
+    }
+  }
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    const jpeg_scan_info& si = cinfo->scan_info[i];
+    const ScanTokenInfo& sti = m->scan_token_info[i];
+    if (si.Ss > 0 && si.Ah > 0) {
+      int context = m->ac_ctx_offset[i];
+      int* ac_histo = &histograms[context].count[0];
+      for (size_t j = 0; j < sti.num_tokens; ++j) {
+        ++ac_histo[sti.tokens[j].symbol & 253];
+      }
+    }
+  }
+}
+
+struct JpegClusteredHistograms {
+  std::vector<Histogram> histograms;
+  std::vector<uint32_t> histogram_indexes;
+  std::vector<uint32_t> slot_ids;
+};
+
+float HistogramCost(const Histogram& histo) {
+  std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+  std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    counts[i] = histo.count[i];
+  }
+  counts[kJpegHuffmanAlphabetSize] = 1;
+  CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+                    &depths[0]);
+  size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8;
+  size_t data_bits = 0;
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    if (depths[i] > 0) {
+      header_bits += 8;
+      data_bits += counts[i] * depths[i];
+    }
+  }
+  return header_bits + data_bits;
+}
+
+void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) {
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    c->count[i] = a.count[i] + b.count[i];
+  }
+}
+
+bool IsEmptyHistogram(const Histogram& histo) {
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    if (histo.count[i]) return false;
+  }
+  return true;
+}
+
+void ClusterJpegHistograms(const Histogram* histograms, size_t num,
+                           JpegClusteredHistograms* clusters) {
+  clusters->histogram_indexes.resize(num);
+  std::vector<uint32_t> slot_histograms;
+  std::vector<float> slot_costs;
+  for (size_t i = 0; i < num; ++i) {
+    const Histogram& cur = histograms[i];
+    if (IsEmptyHistogram(cur)) {
+      continue;
+    }
+    float best_cost = HistogramCost(cur);
+    size_t best_slot = slot_histograms.size();
+    for (size_t j = 0; j < slot_histograms.size(); ++j) {
+      size_t prev_idx = slot_histograms[j];
+      const Histogram& prev = clusters->histograms[prev_idx];
+      Histogram combined;
+      AddHistograms(prev, cur, &combined);
+      float combined_cost = HistogramCost(combined);
+      float cost = combined_cost - slot_costs[j];
+      if (cost < best_cost) {
+        best_cost = cost;
+        best_slot = j;
+      }
+    }
+    if (best_slot == slot_histograms.size()) {
+      // Create new histogram.
+      size_t histogram_index = clusters->histograms.size();
+      clusters->histograms.push_back(cur);
+      clusters->histogram_indexes[i] = histogram_index;
+      if (best_slot < 4) {
+        // We have a free slot, so we put the new histogram there.
+        slot_histograms.push_back(histogram_index);
+        slot_costs.push_back(best_cost);
+      } else {
+        // TODO(szabadka) Find the best histogram to replce.
+        best_slot = (clusters->slot_ids.back() + 1) % 4;
+      }
+      slot_histograms[best_slot] = histogram_index;
+      slot_costs[best_slot] = best_cost;
+      clusters->slot_ids.push_back(best_slot);
+    } else {
+      // Merge this histogram with a previous one.
+      size_t histogram_index = slot_histograms[best_slot];
+      const Histogram& prev = clusters->histograms[histogram_index];
+      AddHistograms(prev, cur, &clusters->histograms[histogram_index]);
+      clusters->histogram_indexes[i] = histogram_index;
+      JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot);
+      slot_costs[best_slot] += best_cost;
+    }
+  }
+}
+
+void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc,
+                      int* inv_slot_map, uint8_t* slot_id_map,
+                      JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) {
+  const char* type = is_dc ? "DC" : "AC";
+  if (index < 0 || index >= NUM_HUFF_TBLS) {
+    JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index);
+  }
+  // Check if we have already copied this Huffman table.
+  int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS);
+  if (inv_slot_map[slot_idx] != -1) {
+    return;
+  }
+  inv_slot_map[slot_idx] = *num_huffman_tables;
+  // Look up and validate Huffman table.
+  JHUFF_TBL* table =
+      is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index];
+  if (table == nullptr) {
+    JPEGLI_ERROR("Missing %s Huffman table %d", type, index);
+  }
+  ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc);
+  // Copy Huffman table to the end of the list and save slot id.
+  slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10);
+  memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL));
+  ++(*num_huffman_tables);
+}
+
+void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) {
+  std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+  std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+  for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) {
+    counts[j] = histo.count[j];
+  }
+  counts[kJpegHuffmanAlphabetSize] = 1;
+  CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+                    &depths[0]);
+  memset(table, 0, sizeof(JHUFF_TBL));
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    if (depths[i] > 0) {
+      ++table->bits[depths[i]];
+    }
+  }
+  int offset[kJpegHuffmanMaxBitLength + 1] = {0};
+  for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+    offset[i] = offset[i - 1] + table->bits[i - 1];
+  }
+  for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+    if (depths[i] > 0) {
+      table->huffval[offset[depths[i]]++] = i;
+    }
+  }
+}
+
+}  // namespace
+
+void CopyHuffmanTables(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  size_t max_huff_tables = 2 * cinfo->num_components;
+  // Copy Huffman tables and save slot ids.
+  m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE);
+  m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE);
+  m->num_huffman_tables = 0;
+  int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0],
+                     m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+    CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0],
+                     m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+  }
+  // Compute context map.
+  m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE);
+  memset(m->context_map, 0, 8);
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no];
+  }
+  int ac_ctx = 4;
+  for (int i = 0; i < cinfo->num_scans; ++i) {
+    const jpeg_scan_info* si = &cinfo->scan_info[i];
+    if (si->Se > 0) {
+      for (int j = 0; j < si->comps_in_scan; ++j) {
+        int c = si->component_index[j];
+        jpeg_component_info* comp = &cinfo->comp_info[c];
+        m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4];
+      }
+    }
+  }
+}
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  // Build DC and AC histograms.
+  std::vector<Histogram> histograms(m->num_contexts);
+  BuildHistograms(cinfo, &histograms[0]);
+
+  // Cluster DC histograms.
+  JpegClusteredHistograms dc_clusters;
+  ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters);
+
+  // Cluster AC histograms.
+  JpegClusteredHistograms ac_clusters;
+  ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4,
+                        &ac_clusters);
+
+  // Create Huffman tables and slot ids clusters.
+  size_t num_dc_huff = dc_clusters.histograms.size();
+  m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size();
+  m->huffman_tables =
+      Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+  m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+  for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+    JHUFF_TBL huff_table = {};
+    if (i < dc_clusters.histograms.size()) {
+      m->slot_id_map[i] = i;
+      BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table);
+    } else {
+      m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff];
+      BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff],
+                            &huff_table);
+    }
+    memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table));
+  }
+
+  // Create context map from clustered histogram indexes.
+  m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE);
+  memset(m->context_map, 0, m->num_contexts);
+  for (size_t i = 0; i < m->num_contexts; ++i) {
+    if (i < (size_t)cinfo->num_components) {
+      m->context_map[i] = dc_clusters.histogram_indexes[i];
+    } else if (i >= 4) {
+      m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4];
+    }
+  }
+}
+
+namespace {
+
+constexpr uint8_t kNumExtraBits[256] = {
+    0,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    1,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    2,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    3,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    4,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    5,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    6,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    7,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    8,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    9,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+    0,  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  //
+};
+
+void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) {
+  int huff_code[kJpegHuffmanAlphabetSize];
+  // +1 for a sentinel element.
+  uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+  int p = 0;
+  for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+    int i = table.bits[l];
+    while (i--) huff_size[p++] = l;
+  }
+
+  // Reuse sentinel element.
+  int last_p = p;
+  huff_size[last_p] = 0;
+
+  int next_code = 0;
+  uint32_t si = huff_size[0];
+  p = 0;
+  while (huff_size[p]) {
+    while ((huff_size[p]) == si) {
+      huff_code[p++] = next_code;
+      next_code++;
+    }
+    next_code <<= 1;
+    si++;
+  }
+  for (p = 0; p < last_p; p++) {
+    int i = table.huffval[p];
+    int nbits = kNumExtraBits[i];
+    code->depth[i] = huff_size[p] + nbits;
+    code->code[i] = huff_code[p] << nbits;
+  }
+}
+
+}  // namespace
+
+void InitEntropyCoder(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  m->coding_tables =
+      Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+  for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+    BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]);
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h
new file mode 100644
index 0000000000..a552219ec3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENTROPY_CODING_H_
+#define LIB_JPEGLI_ENTROPY_CODING_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo);
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+                         size_t num_tokens, size_t max_per_row);
+
+void TokenizeJpeg(j_compress_ptr cinfo);
+
+void CopyHuffmanTables(j_compress_ptr cinfo);
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo);
+
+void InitEntropyCoder(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_ENTROPY_CODING_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/error.cc b/third-party/libjxl/libjxl/lib/jpegli/error.cc
new file mode 100644
index 0000000000..289261672d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/error.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/error.h"
+
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+const char* const kErrorMessageTable[] = {
+    "Message codes are not supported, error message is in msg_parm.s string",
+};
+
+bool FormatString(char* buffer, const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buffer, JMSG_STR_PARM_MAX, format, args);
+  va_end(args);
+  return false;
+}
+
+void ExitWithAbort(j_common_ptr cinfo) {
+  (*cinfo->err->output_message)(cinfo);
+  jpegli_destroy(cinfo);
+  exit(EXIT_FAILURE);
+}
+
+void EmitMessage(j_common_ptr cinfo, int msg_level) {
+  if (msg_level < 0) {
+    if (cinfo->err->num_warnings <= 5 || cinfo->err->trace_level >= 3) {
+      (*cinfo->err->output_message)(cinfo);
+    }
+    ++cinfo->err->num_warnings;
+  } else if (cinfo->err->trace_level >= msg_level) {
+    (*cinfo->err->output_message)(cinfo);
+  }
+}
+
+void OutputMessage(j_common_ptr cinfo) {
+  char buffer[JMSG_LENGTH_MAX];
+  (*cinfo->err->format_message)(cinfo, buffer);
+  fprintf(stderr, "%s\n", buffer);
+}
+
+void FormatMessage(j_common_ptr cinfo, char* buffer) {
+  jpeg_error_mgr* err = cinfo->err;
+  int code = err->msg_code;
+  if (code == 0) {
+    memcpy(buffer, cinfo->err->msg_parm.s, JMSG_STR_PARM_MAX);
+  } else if (err->addon_message_table != nullptr &&
+             code >= err->first_addon_message &&
+             code <= err->last_addon_message) {
+    std::string msg(err->addon_message_table[code - err->first_addon_message]);
+    if (msg.find("%s") != std::string::npos) {
+      snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.s);
+    } else {
+      snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.i[0],
+               err->msg_parm.i[1], err->msg_parm.i[2], err->msg_parm.i[3],
+               err->msg_parm.i[4], err->msg_parm.i[5], err->msg_parm.i[6],
+               err->msg_parm.i[7]);
+    }
+  } else {
+    snprintf(buffer, JMSG_LENGTH_MAX, "%s", kErrorMessageTable[0]);
+  }
+}
+
+void ResetErrorManager(j_common_ptr cinfo) {
+  memset(cinfo->err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+  cinfo->err->msg_code = 0;
+  cinfo->err->num_warnings = 0;
+}
+
+}  // namespace jpegli
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err) {
+  err->error_exit = jpegli::ExitWithAbort;
+  err->emit_message = jpegli::EmitMessage;
+  err->output_message = jpegli::OutputMessage;
+  err->format_message = jpegli::FormatMessage;
+  err->reset_error_mgr = jpegli::ResetErrorManager;
+  memset(err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+  err->trace_level = 0;
+  err->num_warnings = 0;
+  // We don't support message codes and message table, but we define one here
+  // in case the application has a custom format_message and tries to access
+  // these fields there.
+  err->msg_code = 0;
+  err->jpeg_message_table = jpegli::kErrorMessageTable;
+  err->last_jpeg_message = 0;
+  err->addon_message_table = nullptr;
+  err->first_addon_message = 0;
+  err->last_addon_message = 0;
+  return err;
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/error.h b/third-party/libjxl/libjxl/lib/jpegli/error.h
new file mode 100644
index 0000000000..4451abd416
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/error.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ERROR_H_
+#define LIB_JPEGLI_ERROR_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+bool FormatString(char* buffer, const char* format, ...);
+
+}  // namespace jpegli
+
+#define JPEGLI_ERROR(format, ...)                                            \
+  jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+                       __LINE__, ##__VA_ARGS__),                             \
+      (*cinfo->err->error_exit)(reinterpret_cast<j_common_ptr>(cinfo))
+
+#define JPEGLI_WARN(format, ...)                                             \
+  jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+                       __LINE__, ##__VA_ARGS__),                             \
+      (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), -1)
+
+#define JPEGLI_TRACE(level, format, ...)                                     \
+  if (cinfo->err->trace_level >= (level))                                    \
+  jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+                       __LINE__, ##__VA_ARGS__),                             \
+      (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo),     \
+                                  (level))
+
+#endif  // LIB_JPEGLI_ERROR_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc b/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc
new file mode 100644
index 0000000000..0d481c572a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc
@@ -0,0 +1,1276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+TEST(EncoderErrorHandlingTest, MinimalSuccess) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  {
+    jpeg_compress_struct cinfo;
+    const auto try_catch_block = [&]() -> bool {
+      ERROR_HANDLER_SETUP(jpegli);
+      jpegli_create_compress(&cinfo);
+      jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+      cinfo.image_width = 1;
+      cinfo.image_height = 1;
+      cinfo.input_components = 1;
+      jpegli_set_defaults(&cinfo);
+      jpegli_start_compress(&cinfo, TRUE);
+      JSAMPLE image[1] = {0};
+      JSAMPROW row[] = {image};
+      jpegli_write_scanlines(&cinfo, row, 1);
+      jpegli_finish_compress(&cinfo);
+      return true;
+    };
+    EXPECT_TRUE(try_catch_block());
+    jpegli_destroy_compress(&cinfo);
+  }
+  TestImage output;
+  DecodeWithLibjpeg(CompressParams(), DecompressParams(), nullptr, 0, buffer,
+                    buffer_size, &output);
+  EXPECT_EQ(1, output.xsize);
+  EXPECT_EQ(1, output.ysize);
+  EXPECT_EQ(1, output.components);
+  EXPECT_EQ(0, output.pixels[0]);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoDestination) {
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, NoImageDimensions) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ImageTooBig) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 100000;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoInputComponents) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, TooManyInputComponents) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1000;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoSetDefaults) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoStartCompress) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteScanlines) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteAllScanlines) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 2;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantValue) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.quant_tbl_ptrs[0] = jpegli_alloc_quant_table((j_common_ptr)&cinfo);
+    for (size_t k = 0; k < DCTSIZE2; ++k) {
+      cinfo.quant_tbl_ptrs[0]->quantval[k] = 0;
+    }
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantTableIndex) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].quant_tbl_no = 3;
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch1) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.num_components = 100;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch2) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.num_components = 2;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch3) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.num_components = 2;
+    cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[1].v_samp_factor = 1;
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch4) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    cinfo.in_color_space = JCS_RGB;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[1] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch5) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_GRAYSCALE;
+    jpegli_set_defaults(&cinfo);
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[3] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch6) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+    jpegli_set_defaults(&cinfo);
+    cinfo.num_components = 2;
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[3] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidColorTransform) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_YCbCr;
+    jpegli_set_defaults(&cinfo);
+    cinfo.jpeg_color_space = JCS_RGB;
+    jpegli_start_compress(&cinfo, TRUE);
+    JSAMPLE image[3] = {0};
+    JSAMPROW row[] = {image};
+    jpegli_write_scanlines(&cinfo, row, 1);
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, DuplicateComponentIds) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].component_id = 0;
+    cinfo.comp_info[1].component_id = 0;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidComponentIndex) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].component_index = 17;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ArithmeticCoding) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.arith_code = TRUE;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, CCIR601Sampling) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.CCIR601_sampling = TRUE;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript1) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 63, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = 0;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript2) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{2, {0, 1}, 0, 63, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript3) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{5, {0}, 0, 63, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript4) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 2;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{2, {0, 0}, 0, 63, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript5) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 2;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{2, {1, 0}, 0, 63, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript6) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 64, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript7) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {{1, {0}, 2, 1, 0, 0}};  //
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript8) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 2;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {1, {0}, 0, 63, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {1}, 1, 63, 0, 0}  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript9) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {1, {0}, 0, 1, 0, 0}, {1, {0}, 2, 63, 0, 0},  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript10) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 2;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {2, {0, 1}, 0, 0, 0, 0}, {2, {0, 1}, 1, 63, 0, 0}  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript11) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {1, {0}, 1, 63, 0, 0}, {1, {0}, 0, 0, 0, 0}  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript12) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {1, {0}, 0, 0, 10, 1}, {1, {0}, 0, 0, 1, 0}, {1, {0}, 1, 63, 0, 0}  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript13) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    static constexpr jpeg_scan_info kScript[] = {
+        {1, {0}, 0, 0, 0, 2},
+        {1, {0}, 0, 0, 1, 0},
+        {1, {0}, 0, 0, 2, 1},  //
+        {1, {0}, 1, 63, 0, 0}  //
+    };
+    cinfo.scan_info = kScript;
+    cinfo.num_scans = ARRAY_SIZE(kScript);
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, MCUSizeTooBig) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    jpegli_set_progressive_level(&cinfo, 0);
+    cinfo.comp_info[0].h_samp_factor = 3;
+    cinfo.comp_info[0].v_samp_factor = 3;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, RestartIntervalTooBig) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 1;
+    jpegli_set_defaults(&cinfo);
+    cinfo.restart_interval = 1000000;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, SamplingFactorTooBig) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].h_samp_factor = 5;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NonIntegralSamplingRatio) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = 1;
+    cinfo.image_height = 1;
+    cinfo.input_components = 3;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].h_samp_factor = 3;
+    cinfo.comp_info[1].h_samp_factor = 2;
+    jpegli_start_compress(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  if (buffer) free(buffer);
+}
+
+constexpr const char* kAddOnTable[] = {"First message",
+                                       "Second message with int param %d",
+                                       "Third message with string param %s"};
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoParam) {
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.err->addon_message_table = kAddOnTable;
+    cinfo.err->first_addon_message = 10000;
+    cinfo.err->last_addon_message = 10002;
+    cinfo.err->msg_code = 10000;
+    (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableIntParam) {
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.err->addon_message_table = kAddOnTable;
+    cinfo.err->first_addon_message = 10000;
+    cinfo.err->last_addon_message = 10002;
+    cinfo.err->msg_code = 10001;
+    cinfo.err->msg_parm.i[0] = 17;
+    (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoStringParam) {
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.err->addon_message_table = kAddOnTable;
+    cinfo.err->first_addon_message = 10000;
+    cinfo.err->last_addon_message = 10002;
+    cinfo.err->msg_code = 10002;
+    memcpy(cinfo.err->msg_parm.s, "MESSAGE PARAM", 14);
+    (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+}
+
+static const uint8_t kCompressed0[] = {
+    // SOI
+    0xff, 0xd8,  //
+    // DQT
+    0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x03, 0x02,  //
+    0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05,  //
+    0x08, 0x05, 0x05, 0x04, 0x04, 0x05, 0x0a, 0x07, 0x07, 0x06,  //
+    0x08, 0x0c, 0x0a, 0x0c, 0x0c, 0x0b, 0x0a, 0x0b, 0x0b, 0x0d,  //
+    0x0e, 0x12, 0x10, 0x0d, 0x0e, 0x11, 0x0e, 0x0b, 0x0b, 0x10,  //
+    0x16, 0x10, 0x11, 0x13, 0x14, 0x15, 0x15, 0x15, 0x0c, 0x0f,  //
+    0x17, 0x18, 0x16, 0x14, 0x18, 0x12, 0x14, 0x15, 0x14,        //
+    // SOF
+    0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01,  //
+    0x01, 0x11, 0x00,                                            //
+    // DHT
+    0xff, 0xc4, 0x00, 0xd2, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01,  //
+    0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+    0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,  //
+    0x09, 0x0a, 0x0b, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02,  //
+    0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d,  //
+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31,  //
+    0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32,  //
+    0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52,  //
+    0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,  //
+    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a,  //
+    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45,  //
+    0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57,  //
+    0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,  //
+    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83,  //
+    0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94,  //
+    0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,  //
+    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,  //
+    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,  //
+    0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,  //
+    0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,  //
+    0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,  //
+    0xf9, 0xfa,                                                  //
+    // SOS
+    0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00,  //
+    // entropy coded data
+    0xfc, 0xaa, 0xaf,  //
+    // EOI
+    0xff, 0xd9,  //
+};
+static const size_t kLen0 = sizeof(kCompressed0);
+
+static const size_t kDQTOffset = 2;
+static const size_t kSOFOffset = 71;
+static const size_t kDHTOffset = 84;
+static const size_t kSOSOffset = 296;
+
+TEST(DecoderErrorHandlingTest, MinimalSuccess) {
+  JXL_CHECK(kCompressed0[kDQTOffset] == 0xff);
+  JXL_CHECK(kCompressed0[kSOFOffset] == 0xff);
+  JXL_CHECK(kCompressed0[kDHTOffset] == 0xff);
+  JXL_CHECK(kCompressed0[kSOSOffset] == 0xff);
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+    jpegli_read_header(&cinfo, TRUE);
+    EXPECT_EQ(1, cinfo.image_width);
+    EXPECT_EQ(1, cinfo.image_height);
+    jpegli_start_decompress(&cinfo);
+    JSAMPLE image[1];
+    JSAMPROW row[] = {image};
+    jpegli_read_scanlines(&cinfo, row, 1);
+    EXPECT_EQ(0, image[0]);
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  EXPECT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoSource) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_read_header(&cinfo, TRUE);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadHeader) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+    jpegli_start_decompress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoStartDecompress) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+    jpegli_read_header(&cinfo, TRUE);
+    EXPECT_EQ(1, cinfo.image_width);
+    EXPECT_EQ(1, cinfo.image_height);
+    JSAMPLE image[1];
+    JSAMPROW row[] = {image};
+    jpegli_read_scanlines(&cinfo, row, 1);
+    EXPECT_EQ(0, image[0]);
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadScanlines) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+    jpegli_read_header(&cinfo, TRUE);
+    EXPECT_EQ(1, cinfo.image_width);
+    EXPECT_EQ(1, cinfo.image_height);
+    jpegli_start_decompress(&cinfo);
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  EXPECT_FALSE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+}
+
+static const size_t kMaxImageWidth = 0xffff;
+JSAMPLE kOutputBuffer[MAX_COMPONENTS * kMaxImageWidth];
+
+bool ParseCompressed(const std::vector<uint8_t>& compressed) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+    jpegli_read_header(&cinfo, TRUE);
+    jpegli_start_decompress(&cinfo);
+    for (JDIMENSION i = 0; i < cinfo.output_height; ++i) {
+      JSAMPROW row[] = {kOutputBuffer};
+      jpegli_read_scanlines(&cinfo, row, 1);
+    }
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  bool retval = try_catch_block();
+  jpegli_destroy_decompress(&cinfo);
+  return retval;
+}
+
+TEST(DecoderErrorHandlingTest, NoSOI) {
+  for (int pos : {0, 1}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[pos] = 0;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDQT) {
+  // Bad marker length
+  for (int diff : {-2, -1, 1, 2}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDQTOffset + 3] += diff;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // inavlid table index / precision
+  for (int val : {0x20, 0x05}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDQTOffset + 4] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // zero quant value
+  for (int k : {0, 1, 17, 63}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDQTOffset + 5 + k] = 0;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOF) {
+  // Bad marker length
+  for (int diff : {-2, -1, 1, 2}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + 3] += diff;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // zero width, height or num_components
+  for (int pos : {6, 8, 9}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + pos] = 0;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // invalid data precision
+  for (int val : {0, 1, 127}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + 4] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // too many num_components
+  for (int val : {5, 255}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + 9] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // invalid sampling factors
+  for (int val : {0x00, 0x01, 0x10, 0x15, 0x51}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + 11] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // invalid quant table index
+  for (int val : {5, 17}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOFOffset + 12] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDHT) {
+  // Bad marker length
+  for (int diff : {-2, -1, 1, 2}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDHTOffset + 3] += diff;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDHTOffset + 2] += 17;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // inavlid table slot_id
+  for (int val : {0x05, 0x15, 0x20}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kDHTOffset + 4] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOS) {
+  // Invalid comps_in_scan
+  for (int val : {2, 5, 17}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOSOffset + 4] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // invalid Huffman table indexes
+  for (int val : {0x05, 0x50, 0x15, 0x51}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOSOffset + 6] = val;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+  // invalid Ss/Se
+  for (int pos : {7, 8}) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    compressed[kSOSOffset + pos] = 64;
+    EXPECT_FALSE(ParseCompressed(compressed));
+  }
+}
+
+TEST(DecoderErrorHandlingTest, MutateSingleBytes) {
+  for (size_t pos = 0; pos < kLen0; ++pos) {
+    std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+    for (int val : {0x00, 0x0f, 0xf0, 0xff}) {
+      compressed[pos] = val;
+      ParseCompressed(compressed);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/huffman.cc b/third-party/libjxl/libjxl/lib/jpegli/huffman.cc
new file mode 100644
index 0000000000..1cf88a5536
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/huffman.cc
@@ -0,0 +1,321 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/huffman.h"
+
+#include <limits>
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+  int left = 1 << (len - kJpegHuffmanRootTableBits);
+  while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+                           HuffmanTableEntry* lut) {
+  HuffmanTableEntry code;    // current table entry
+  HuffmanTableEntry* table;  // next available space in table
+  int len;                   // current code length
+  int idx;                   // symbol index
+  int key;                   // prefix code
+  int reps;                  // number of replicate key values in current table
+  int low;                   // low bits for current root entry
+  int table_bits;            // key length of current table
+  int table_size;            // size of current table
+
+  // Make a local copy of the input bit length histogram.
+  int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+  int total_count = 0;
+  for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    tmp_count[len] = count[len];
+    total_count += tmp_count[len];
+  }
+
+  table = lut;
+  table_bits = kJpegHuffmanRootTableBits;
+  table_size = 1 << table_bits;
+
+  // Special case code with only one value.
+  if (total_count == 1) {
+    code.bits = 0;
+    code.value = symbols[0];
+    for (key = 0; key < table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  // Fill in root table.
+  key = 0;
+  idx = 0;
+  for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      code.bits = len;
+      code.value = symbols[idx++];
+      reps = 1 << (kJpegHuffmanRootTableBits - len);
+      while (reps--) {
+        table[key++] = code;
+      }
+    }
+  }
+
+  // Fill in 2nd level tables and add pointers to root table.
+  table += table_size;
+  table_size = 0;
+  low = 0;
+  for (len = kJpegHuffmanRootTableBits + 1;
+       len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      // Start a new sub-table if the previous one is full.
+      if (low >= table_size) {
+        table += table_size;
+        table_bits = NextTableBitSize(tmp_count, len);
+        table_size = 1 << table_bits;
+        low = 0;
+        lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+        lut[key].value = (table - lut) - key;
+        ++key;
+      }
+      code.bits = len - kJpegHuffmanRootTableBits;
+      code.value = symbols[idx++];
+      reps = 1 << (table_bits - code.bits);
+      while (reps--) {
+        table[low++] = code;
+      }
+    }
+  }
+}
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count(count), index_left(left), index_right_or_value(right) {}
+  uint32_t total_count;
+  int16_t index_left;
+  int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level) {
+  if (p.index_left >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value] = level;
+  }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+  return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+                       const int tree_limit, uint8_t* depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (uint32_t count_limit = 1;; count_limit *= 2) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (size_t i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = std::max(data[i], count_limit - 1);
+        tree.emplace_back(count, -1, static_cast<int16_t>(i));
+      }
+    }
+
+    const size_t n = tree.size();
+    if (n == 1) {
+      // Fake value; will be fixed on upper level.
+      depth[tree[0].index_right_or_value] = 1;
+      break;
+    }
+
+    std::stable_sort(tree.begin(), tree.end(), Compare);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count <= tree[j].total_count) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count <= tree[j].total_count) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      size_t j_end = tree.size() - 1;
+      tree[j_end].total_count =
+          tree[left].total_count + tree[right].total_count;
+      tree[j_end].index_left = static_cast<int16_t>(left);
+      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    JXL_DASSERT(tree.size() == 2 * n + 1);
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+                          bool is_dc) {
+  size_t total_symbols = 0;
+  size_t total_p = 0;
+  size_t max_depth = 0;
+  for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) {
+    uint8_t count = table->bits[d];
+    if (count) {
+      total_symbols += count;
+      total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count;
+      max_depth = d;
+    }
+  }
+  total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth);  // sentinel symbol
+  if (total_symbols == 0) {
+    JPEGLI_ERROR("Empty Huffman table");
+  }
+  if (total_symbols > kJpegHuffmanAlphabetSize) {
+    JPEGLI_ERROR("Too many symbols in Huffman table");
+  }
+  if (total_p != (1u << kJpegHuffmanMaxBitLength)) {
+    JPEGLI_ERROR("Invalid bit length distribution");
+  }
+  uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {};
+  for (size_t i = 0; i < total_symbols; ++i) {
+    uint8_t symbol = table->huffval[i];
+    if (symbol_seen[symbol]) {
+      JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol);
+    }
+    symbol_seen[symbol] = 1;
+  }
+}
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) {
+  // Huffman tables from the JPEG standard.
+  static constexpr JHUFF_TBL kStandardDCTables[2] = {
+      // DC luma
+      {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1},
+       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+       FALSE},
+      // DC chroma
+      {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+       FALSE}};
+  static constexpr JHUFF_TBL kStandardACTables[2] = {
+      // AC luma
+      {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
+       {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06,
+        0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+        0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72,
+        0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+        0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45,
+        0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+        0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75,
+        0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+        0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3,
+        0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+        0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
+        0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+        0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4,
+        0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+       FALSE},
+      // AC chroma
+      {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
+       {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41,
+        0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+        0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1,
+        0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+        0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44,
+        0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+        0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74,
+        0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+        0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a,
+        0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+        0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+        0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+        0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4,
+        0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+       FALSE}};
+  const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables;
+  JHUFF_TBL** tables;
+  if (cinfo->is_decompressor) {
+    j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo);
+    tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs;
+  } else {
+    j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo);
+    tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs;
+  }
+  for (int i = 0; i < 2; ++i) {
+    if (tables[i] == nullptr) {
+      tables[i] = jpegli_alloc_huff_table(cinfo);
+      memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL));
+      ValidateHuffmanTable(cinfo, tables[i], is_dc);
+    }
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/huffman.h b/third-party/libjxl/libjxl/lib/jpegli/huffman.h
new file mode 100644
index 0000000000..f0e5e1de40
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/huffman.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_HUFFMAN_H_
+#define LIB_JPEGLI_HUFFMAN_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jpegli/common_internal.h"
+
+namespace jpegli {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+  uint8_t bits;    // number of bits used for this symbol
+  uint16_t value;  // symbol value or table offset
+};
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+                           HuffmanTableEntry* lut);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit,
+                       uint8_t* depth);
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+                          bool is_dc);
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_HUFFMAN_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/idct.cc b/third-party/libjxl/libjxl/lib/jpegli/idct.cc
new file mode 100644
index 0000000000..4d10563583
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/idct.cc
@@ -0,0 +1,692 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/idct.h"
+
+#include <cmath>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jxl/base/status.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/idct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/transpose-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::Xor;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+using D8 = HWY_CAPPED(float, 8);
+constexpr D8 d8;
+
+void DequantBlock(const int16_t* JXL_RESTRICT qblock,
+                  const float* JXL_RESTRICT dequant,
+                  const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) {
+  for (size_t k = 0; k < 64; k += Lanes(d)) {
+    const auto mul = Load(d, dequant + k);
+    const auto bias = Load(d, biases + k);
+    const Rebind<int16_t, DI> di16;
+    const Vec<DI> quant_i = PromoteTo(di, Load(di16, qblock + k));
+    const Rebind<float, DI> df;
+    const auto quant = ConvertTo(df, quant_i);
+    const auto abs_quant = Abs(quant);
+    const auto not_0 = Gt(abs_quant, Zero(df));
+    const auto sign_quant = Xor(quant, abs_quant);
+    const auto biased_quant = Sub(quant, Xor(bias, sign_quant));
+    const auto dequant = IfThenElseZero(not_0, Mul(biased_quant, mul));
+    Store(dequant, d, block + k);
+  }
+}
+
+template <size_t N>
+void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+                    float* JXL_RESTRICT aout) {
+  for (size_t i = 0; i < N / 2; i++) {
+    auto in1 = LoadU(d8, ain + 2 * i * ain_stride);
+    Store(in1, d8, aout + i * 8);
+  }
+  for (size_t i = N / 2; i < N; i++) {
+    auto in1 = LoadU(d8, ain + (2 * (i - N / 2) + 1) * ain_stride);
+    Store(in1, d8, aout + i * 8);
+  }
+}
+
+template <size_t N>
+void BTranspose(float* JXL_RESTRICT coeff) {
+  for (size_t i = N - 1; i > 0; i--) {
+    auto in1 = Load(d8, coeff + i * 8);
+    auto in2 = Load(d8, coeff + (i - 1) * 8);
+    Store(Add(in1, in2), d8, coeff + i * 8);
+  }
+  constexpr float kSqrt2 = 1.41421356237f;
+  auto sqrt2 = Set(d8, kSqrt2);
+  auto in1 = Load(d8, coeff);
+  Store(Mul(in1, sqrt2), d8, coeff);
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+//    print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+  static constexpr float kMultipliers[] = {
+      0.541196100146197,
+      1.3065629648763764,
+  };
+};
+
+template <>
+struct WcMultipliers<8> {
+  static constexpr float kMultipliers[] = {
+      0.5097955791041592,
+      0.6013448869350453,
+      0.8999762231364156,
+      2.5629154477415055,
+  };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+template <size_t N>
+void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out,
+                    size_t out_stride) {
+  for (size_t i = 0; i < N / 2; i++) {
+    auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+    auto in1 = Load(d8, coeff + i * 8);
+    auto in2 = Load(d8, coeff + (N / 2 + i) * 8);
+    auto out1 = MulAdd(mul, in2, in1);
+    auto out2 = NegMulAdd(mul, in2, in1);
+    StoreU(out1, d8, out + i * out_stride);
+    StoreU(out2, d8, out + (N - i - 1) * out_stride);
+  }
+}
+
+template <size_t N>
+struct IDCT1DImpl;
+
+template <>
+struct IDCT1DImpl<1> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    StoreU(LoadU(d8, from), d8, to);
+  }
+};
+
+template <>
+struct IDCT1DImpl<2> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    JXL_DASSERT(from_stride >= 8);
+    JXL_DASSERT(to_stride >= 8);
+    auto in1 = LoadU(d8, from);
+    auto in2 = LoadU(d8, from + from_stride);
+    StoreU(Add(in1, in2), d8, to);
+    StoreU(Sub(in1, in2), d8, to + to_stride);
+  }
+};
+
+template <size_t N>
+struct IDCT1DImpl {
+  void operator()(const float* from, size_t from_stride, float* to,
+                  size_t to_stride) {
+    JXL_DASSERT(from_stride >= 8);
+    JXL_DASSERT(to_stride >= 8);
+    HWY_ALIGN float tmp[64];
+    ForwardEvenOdd<N>(from, from_stride, tmp);
+    IDCT1DImpl<N / 2>()(tmp, 8, tmp, 8);
+    BTranspose<N / 2>(tmp + N * 4);
+    IDCT1DImpl<N / 2>()(tmp + N * 4, 8, tmp + N * 4, 8);
+    MultiplyAndAdd<N>(tmp, to, to_stride);
+  }
+};
+
+template <size_t N>
+void IDCT1D(float* JXL_RESTRICT from, float* JXL_RESTRICT output,
+            size_t output_stride) {
+  for (size_t i = 0; i < 8; i += Lanes(d8)) {
+    IDCT1DImpl<N>()(from + i, 8, output + i, output_stride);
+  }
+}
+
+void ComputeScaledIDCT(float* JXL_RESTRICT block0, float* JXL_RESTRICT block1,
+                       float* JXL_RESTRICT output, size_t output_stride) {
+  Transpose8x8Block(block0, block1);
+  IDCT1D<8>(block1, block0, 8);
+  Transpose8x8Block(block0, block1);
+  IDCT1D<8>(block1, output, output_stride);
+}
+
+void InverseTransformBlock8x8(const int16_t* JXL_RESTRICT qblock,
+                              const float* JXL_RESTRICT dequant,
+                              const float* JXL_RESTRICT biases,
+                              float* JXL_RESTRICT scratch_space,
+                              float* JXL_RESTRICT output, size_t output_stride,
+                              size_t dctsize) {
+  float* JXL_RESTRICT block0 = scratch_space;
+  float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+  DequantBlock(qblock, dequant, biases, block0);
+  ComputeScaledIDCT(block0, block1, output, output_stride);
+}
+
+// Computes the N-point IDCT of in[], and stores the result in out[]. The in[]
+// array is at most 8 values long, values in[8:N-1] are assumed to be 0.
+void Compute1dIDCT(float* in, float* out, size_t N) {
+  switch (N) {
+    case 3: {
+      static constexpr float kC3[3] = {
+          1.414213562373,
+          1.224744871392,
+          0.707106781187,
+      };
+      float even0 = in[0] + kC3[2] * in[2];
+      float even1 = in[0] - kC3[0] * in[2];
+      float odd0 = kC3[1] * in[1];
+      out[0] = even0 + odd0;
+      out[2] = even0 - odd0;
+      out[1] = even1;
+      break;
+    }
+    case 5: {
+      static constexpr float kC5[5] = {
+          1.414213562373, 1.344997023928, 1.144122805635,
+          0.831253875555, 0.437016024449,
+      };
+      float even0 = in[0] + kC5[2] * in[2] + kC5[4] * in[4];
+      float even1 = in[0] - kC5[4] * in[2] - kC5[2] * in[4];
+      float even2 = in[0] - kC5[0] * in[2] + kC5[0] * in[4];
+      float odd0 = kC5[1] * in[1] + kC5[3] * in[3];
+      float odd1 = kC5[3] * in[1] - kC5[1] * in[3];
+      out[0] = even0 + odd0;
+      out[4] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[3] = even1 - odd1;
+      out[2] = even2;
+      break;
+    }
+    case 6: {
+      static constexpr float kC6[6] = {
+          1.414213562373, 1.366025403784, 1.224744871392,
+          1.000000000000, 0.707106781187, 0.366025403784,
+      };
+      float even0 = in[0] + kC6[2] * in[2] + kC6[4] * in[4];
+      float even1 = in[0] - kC6[0] * in[4];
+      float even2 = in[0] - kC6[2] * in[2] + kC6[4] * in[4];
+      float odd0 = kC6[1] * in[1] + kC6[3] * in[3] + kC6[5] * in[5];
+      float odd1 = kC6[3] * in[1] - kC6[3] * in[3] - kC6[3] * in[5];
+      float odd2 = kC6[5] * in[1] - kC6[3] * in[3] + kC6[1] * in[5];
+      out[0] = even0 + odd0;
+      out[5] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[4] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[3] = even2 - odd2;
+      break;
+    }
+    case 7: {
+      static constexpr float kC7[7] = {
+          1.414213562373, 1.378756275744, 1.274162392264, 1.105676685997,
+          0.881747733790, 0.613604268353, 0.314692122713,
+      };
+      float even0 = in[0] + kC7[2] * in[2] + kC7[4] * in[4] + kC7[6] * in[6];
+      float even1 = in[0] + kC7[6] * in[2] - kC7[2] * in[4] - kC7[4] * in[6];
+      float even2 = in[0] - kC7[4] * in[2] - kC7[6] * in[4] + kC7[2] * in[6];
+      float even3 = in[0] - kC7[0] * in[2] + kC7[0] * in[4] - kC7[0] * in[6];
+      float odd0 = kC7[1] * in[1] + kC7[3] * in[3] + kC7[5] * in[5];
+      float odd1 = kC7[3] * in[1] - kC7[5] * in[3] - kC7[1] * in[5];
+      float odd2 = kC7[5] * in[1] - kC7[1] * in[3] + kC7[3] * in[5];
+      out[0] = even0 + odd0;
+      out[6] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[5] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[4] = even2 - odd2;
+      out[3] = even3;
+      break;
+    }
+    case 9: {
+      static constexpr float kC9[9] = {
+          1.414213562373, 1.392728480640, 1.328926048777,
+          1.224744871392, 1.083350440839, 0.909038955344,
+          0.707106781187, 0.483689525296, 0.245575607938,
+      };
+      float even0 = in[0] + kC9[2] * in[2] + kC9[4] * in[4] + kC9[6] * in[6];
+      float even1 = in[0] + kC9[6] * in[2] - kC9[6] * in[4] - kC9[0] * in[6];
+      float even2 = in[0] - kC9[8] * in[2] - kC9[2] * in[4] + kC9[6] * in[6];
+      float even3 = in[0] - kC9[4] * in[2] + kC9[8] * in[4] + kC9[6] * in[6];
+      float even4 = in[0] - kC9[0] * in[2] + kC9[0] * in[4] - kC9[0] * in[6];
+      float odd0 =
+          kC9[1] * in[1] + kC9[3] * in[3] + kC9[5] * in[5] + kC9[7] * in[7];
+      float odd1 = kC9[3] * in[1] - kC9[3] * in[5] - kC9[3] * in[7];
+      float odd2 =
+          kC9[5] * in[1] - kC9[3] * in[3] - kC9[7] * in[5] + kC9[1] * in[7];
+      float odd3 =
+          kC9[7] * in[1] - kC9[3] * in[3] + kC9[1] * in[5] - kC9[5] * in[7];
+      out[0] = even0 + odd0;
+      out[8] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[7] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[6] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[5] = even3 - odd3;
+      out[4] = even4;
+      break;
+    }
+    case 10: {
+      static constexpr float kC10[10] = {
+          1.414213562373, 1.396802246667, 1.344997023928, 1.260073510670,
+          1.144122805635, 1.000000000000, 0.831253875555, 0.642039521920,
+          0.437016024449, 0.221231742082,
+      };
+      float even0 = in[0] + kC10[2] * in[2] + kC10[4] * in[4] + kC10[6] * in[6];
+      float even1 = in[0] + kC10[6] * in[2] - kC10[8] * in[4] - kC10[2] * in[6];
+      float even2 = in[0] - kC10[0] * in[4];
+      float even3 = in[0] - kC10[6] * in[2] - kC10[8] * in[4] + kC10[2] * in[6];
+      float even4 = in[0] - kC10[2] * in[2] + kC10[4] * in[4] - kC10[6] * in[6];
+      float odd0 =
+          kC10[1] * in[1] + kC10[3] * in[3] + kC10[5] * in[5] + kC10[7] * in[7];
+      float odd1 =
+          kC10[3] * in[1] + kC10[9] * in[3] - kC10[5] * in[5] - kC10[1] * in[7];
+      float odd2 =
+          kC10[5] * in[1] - kC10[5] * in[3] - kC10[5] * in[5] + kC10[5] * in[7];
+      float odd3 =
+          kC10[7] * in[1] - kC10[1] * in[3] + kC10[5] * in[5] + kC10[9] * in[7];
+      float odd4 =
+          kC10[9] * in[1] - kC10[7] * in[3] + kC10[5] * in[5] - kC10[3] * in[7];
+      out[0] = even0 + odd0;
+      out[9] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[8] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[7] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[6] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[5] = even4 - odd4;
+      break;
+    }
+    case 11: {
+      static constexpr float kC11[11] = {
+          1.414213562373, 1.399818907436, 1.356927976287, 1.286413904599,
+          1.189712155524, 1.068791297809, 0.926112931411, 0.764581576418,
+          0.587485545401, 0.398430002847, 0.201263574413,
+      };
+      float even0 = in[0] + kC11[2] * in[2] + kC11[4] * in[4] + kC11[6] * in[6];
+      float even1 =
+          in[0] + kC11[6] * in[2] - kC11[10] * in[4] - kC11[4] * in[6];
+      float even2 =
+          in[0] + kC11[10] * in[2] - kC11[2] * in[4] - kC11[8] * in[6];
+      float even3 = in[0] - kC11[8] * in[2] - kC11[6] * in[4] + kC11[2] * in[6];
+      float even4 =
+          in[0] - kC11[4] * in[2] + kC11[8] * in[4] + kC11[10] * in[6];
+      float even5 = in[0] - kC11[0] * in[2] + kC11[0] * in[4] - kC11[0] * in[6];
+      float odd0 =
+          kC11[1] * in[1] + kC11[3] * in[3] + kC11[5] * in[5] + kC11[7] * in[7];
+      float odd1 =
+          kC11[3] * in[1] + kC11[9] * in[3] - kC11[7] * in[5] - kC11[1] * in[7];
+      float odd2 =
+          kC11[5] * in[1] - kC11[7] * in[3] - kC11[3] * in[5] + kC11[9] * in[7];
+      float odd3 =
+          kC11[7] * in[1] - kC11[1] * in[3] + kC11[9] * in[5] + kC11[5] * in[7];
+      float odd4 =
+          kC11[9] * in[1] - kC11[5] * in[3] + kC11[1] * in[5] - kC11[3] * in[7];
+      out[0] = even0 + odd0;
+      out[10] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[9] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[8] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[7] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[6] = even4 - odd4;
+      out[5] = even5;
+      break;
+    }
+    case 12: {
+      static constexpr float kC12[12] = {
+          1.414213562373, 1.402114769300, 1.366025403784, 1.306562964876,
+          1.224744871392, 1.121971053594, 1.000000000000, 0.860918669154,
+          0.707106781187, 0.541196100146, 0.366025403784, 0.184591911283,
+      };
+      float even0 = in[0] + kC12[2] * in[2] + kC12[4] * in[4] + kC12[6] * in[6];
+      float even1 = in[0] + kC12[6] * in[2] - kC12[6] * in[6];
+      float even2 =
+          in[0] + kC12[10] * in[2] - kC12[4] * in[4] - kC12[6] * in[6];
+      float even3 =
+          in[0] - kC12[10] * in[2] - kC12[4] * in[4] + kC12[6] * in[6];
+      float even4 = in[0] - kC12[6] * in[2] + kC12[6] * in[6];
+      float even5 = in[0] - kC12[2] * in[2] + kC12[4] * in[4] - kC12[6] * in[6];
+      float odd0 =
+          kC12[1] * in[1] + kC12[3] * in[3] + kC12[5] * in[5] + kC12[7] * in[7];
+      float odd1 =
+          kC12[3] * in[1] + kC12[9] * in[3] - kC12[9] * in[5] - kC12[3] * in[7];
+      float odd2 = kC12[5] * in[1] - kC12[9] * in[3] - kC12[1] * in[5] -
+                   kC12[11] * in[7];
+      float odd3 = kC12[7] * in[1] - kC12[3] * in[3] - kC12[11] * in[5] +
+                   kC12[1] * in[7];
+      float odd4 =
+          kC12[9] * in[1] - kC12[3] * in[3] + kC12[3] * in[5] - kC12[9] * in[7];
+      float odd5 = kC12[11] * in[1] - kC12[9] * in[3] + kC12[7] * in[5] -
+                   kC12[5] * in[7];
+      out[0] = even0 + odd0;
+      out[11] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[10] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[9] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[8] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[7] = even4 - odd4;
+      out[5] = even5 + odd5;
+      out[6] = even5 - odd5;
+      break;
+    }
+    case 13: {
+      static constexpr float kC13[13] = {
+          1.414213562373, 1.403902353238, 1.373119086479, 1.322312651445,
+          1.252223920364, 1.163874944761, 1.058554051646, 0.937797056801,
+          0.803364869133, 0.657217812653, 0.501487040539, 0.338443458124,
+          0.170464607981,
+      };
+      float even0 = in[0] + kC13[2] * in[2] + kC13[4] * in[4] + kC13[6] * in[6];
+      float even1 =
+          in[0] + kC13[6] * in[2] + kC13[12] * in[4] - kC13[8] * in[6];
+      float even2 =
+          in[0] + kC13[10] * in[2] - kC13[6] * in[4] - kC13[4] * in[6];
+      float even3 =
+          in[0] - kC13[12] * in[2] - kC13[2] * in[4] + kC13[10] * in[6];
+      float even4 =
+          in[0] - kC13[8] * in[2] - kC13[10] * in[4] + kC13[2] * in[6];
+      float even5 =
+          in[0] - kC13[4] * in[2] + kC13[8] * in[4] - kC13[12] * in[6];
+      float even6 = in[0] - kC13[0] * in[2] + kC13[0] * in[4] - kC13[0] * in[6];
+      float odd0 =
+          kC13[1] * in[1] + kC13[3] * in[3] + kC13[5] * in[5] + kC13[7] * in[7];
+      float odd1 = kC13[3] * in[1] + kC13[9] * in[3] - kC13[11] * in[5] -
+                   kC13[5] * in[7];
+      float odd2 = kC13[5] * in[1] - kC13[11] * in[3] - kC13[1] * in[5] -
+                   kC13[9] * in[7];
+      float odd3 =
+          kC13[7] * in[1] - kC13[5] * in[3] - kC13[9] * in[5] + kC13[3] * in[7];
+      float odd4 = kC13[9] * in[1] - kC13[1] * in[3] + kC13[7] * in[5] +
+                   kC13[11] * in[7];
+      float odd5 = kC13[11] * in[1] - kC13[7] * in[3] + kC13[3] * in[5] -
+                   kC13[1] * in[7];
+      out[0] = even0 + odd0;
+      out[12] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[11] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[10] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[9] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[8] = even4 - odd4;
+      out[5] = even5 + odd5;
+      out[7] = even5 - odd5;
+      out[6] = even6;
+      break;
+    }
+    case 14: {
+      static constexpr float kC14[14] = {
+          1.414213562373, 1.405321284327, 1.378756275744, 1.334852607020,
+          1.274162392264, 1.197448846138, 1.105676685997, 1.000000000000,
+          0.881747733790, 0.752406978226, 0.613604268353, 0.467085128785,
+          0.314692122713, 0.158341680609,
+      };
+      float even0 = in[0] + kC14[2] * in[2] + kC14[4] * in[4] + kC14[6] * in[6];
+      float even1 =
+          in[0] + kC14[6] * in[2] + kC14[12] * in[4] - kC14[10] * in[6];
+      float even2 =
+          in[0] + kC14[10] * in[2] - kC14[8] * in[4] - kC14[2] * in[6];
+      float even3 = in[0] - kC14[0] * in[4];
+      float even4 =
+          in[0] - kC14[10] * in[2] - kC14[8] * in[4] + kC14[2] * in[6];
+      float even5 =
+          in[0] - kC14[6] * in[2] + kC14[12] * in[4] + kC14[10] * in[6];
+      float even6 = in[0] - kC14[2] * in[2] + kC14[4] * in[4] - kC14[6] * in[6];
+      float odd0 =
+          kC14[1] * in[1] + kC14[3] * in[3] + kC14[5] * in[5] + kC14[7] * in[7];
+      float odd1 = kC14[3] * in[1] + kC14[9] * in[3] - kC14[13] * in[5] -
+                   kC14[7] * in[7];
+      float odd2 = kC14[5] * in[1] - kC14[13] * in[3] - kC14[3] * in[5] -
+                   kC14[7] * in[7];
+      float odd3 =
+          kC14[7] * in[1] - kC14[7] * in[3] - kC14[7] * in[5] + kC14[7] * in[7];
+      float odd4 = kC14[9] * in[1] - kC14[1] * in[3] + kC14[11] * in[5] +
+                   kC14[7] * in[7];
+      float odd5 = kC14[11] * in[1] - kC14[5] * in[3] + kC14[1] * in[5] -
+                   kC14[7] * in[7];
+      float odd6 = kC14[13] * in[1] - kC14[11] * in[3] + kC14[9] * in[5] -
+                   kC14[7] * in[7];
+      out[0] = even0 + odd0;
+      out[13] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[12] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[11] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[10] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[9] = even4 - odd4;
+      out[5] = even5 + odd5;
+      out[8] = even5 - odd5;
+      out[6] = even6 + odd6;
+      out[7] = even6 - odd6;
+      break;
+    }
+    case 15: {
+      static constexpr float kC15[15] = {
+          1.414213562373, 1.406466352507, 1.383309602960, 1.344997023928,
+          1.291948376043, 1.224744871392, 1.144122805635, 1.050965490998,
+          0.946293578512, 0.831253875555, 0.707106781187, 0.575212476952,
+          0.437016024449, 0.294031532930, 0.147825570407,
+      };
+      float even0 = in[0] + kC15[2] * in[2] + kC15[4] * in[4] + kC15[6] * in[6];
+      float even1 =
+          in[0] + kC15[6] * in[2] + kC15[12] * in[4] - kC15[12] * in[6];
+      float even2 =
+          in[0] + kC15[10] * in[2] - kC15[10] * in[4] - kC15[0] * in[6];
+      float even3 =
+          in[0] + kC15[14] * in[2] - kC15[2] * in[4] - kC15[12] * in[6];
+      float even4 =
+          in[0] - kC15[12] * in[2] - kC15[6] * in[4] + kC15[6] * in[6];
+      float even5 =
+          in[0] - kC15[8] * in[2] - kC15[14] * in[4] + kC15[6] * in[6];
+      float even6 =
+          in[0] - kC15[4] * in[2] + kC15[8] * in[4] - kC15[12] * in[6];
+      float even7 = in[0] - kC15[0] * in[2] + kC15[0] * in[4] - kC15[0] * in[6];
+      float odd0 =
+          kC15[1] * in[1] + kC15[3] * in[3] + kC15[5] * in[5] + kC15[7] * in[7];
+      float odd1 = kC15[3] * in[1] + kC15[9] * in[3] - kC15[9] * in[7];
+      float odd2 = kC15[5] * in[1] - kC15[5] * in[5] - kC15[5] * in[7];
+      float odd3 = kC15[7] * in[1] - kC15[9] * in[3] - kC15[5] * in[5] +
+                   kC15[11] * in[7];
+      float odd4 = kC15[9] * in[1] - kC15[3] * in[3] + kC15[3] * in[7];
+      float odd5 = kC15[11] * in[1] - kC15[3] * in[3] + kC15[5] * in[5] -
+                   kC15[13] * in[7];
+      float odd6 = kC15[13] * in[1] - kC15[9] * in[3] + kC15[5] * in[5] -
+                   kC15[1] * in[7];
+      out[0] = even0 + odd0;
+      out[14] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[13] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[12] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[11] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[10] = even4 - odd4;
+      out[5] = even5 + odd5;
+      out[9] = even5 - odd5;
+      out[6] = even6 + odd6;
+      out[8] = even6 - odd6;
+      out[7] = even7;
+      break;
+    }
+    case 16: {
+      static constexpr float kC16[16] = {
+          1.414213562373, 1.407403737526, 1.387039845322, 1.353318001174,
+          1.306562964876, 1.247225012987, 1.175875602419, 1.093201867002,
+          1.000000000000, 0.897167586343, 0.785694958387, 0.666655658478,
+          0.541196100146, 0.410524527522, 0.275899379283, 0.138617169199,
+      };
+      float even0 = in[0] + kC16[2] * in[2] + kC16[4] * in[4] + kC16[6] * in[6];
+      float even1 =
+          in[0] + kC16[6] * in[2] + kC16[12] * in[4] - kC16[14] * in[6];
+      float even2 =
+          in[0] + kC16[10] * in[2] - kC16[12] * in[4] - kC16[2] * in[6];
+      float even3 =
+          in[0] + kC16[14] * in[2] - kC16[4] * in[4] - kC16[10] * in[6];
+      float even4 =
+          in[0] - kC16[14] * in[2] - kC16[4] * in[4] + kC16[10] * in[6];
+      float even5 =
+          in[0] - kC16[10] * in[2] - kC16[12] * in[4] + kC16[2] * in[6];
+      float even6 =
+          in[0] - kC16[6] * in[2] + kC16[12] * in[4] + kC16[14] * in[6];
+      float even7 = in[0] - kC16[2] * in[2] + kC16[4] * in[4] - kC16[6] * in[6];
+      float odd0 = (kC16[1] * in[1] + kC16[3] * in[3] + kC16[5] * in[5] +
+                    kC16[7] * in[7]);
+      float odd1 = (kC16[3] * in[1] + kC16[9] * in[3] + kC16[15] * in[5] -
+                    kC16[11] * in[7]);
+      float odd2 = (kC16[5] * in[1] + kC16[15] * in[3] - kC16[7] * in[5] -
+                    kC16[3] * in[7]);
+      float odd3 = (kC16[7] * in[1] - kC16[11] * in[3] - kC16[3] * in[5] +
+                    kC16[15] * in[7]);
+      float odd4 = (kC16[9] * in[1] - kC16[5] * in[3] - kC16[13] * in[5] +
+                    kC16[1] * in[7]);
+      float odd5 = (kC16[11] * in[1] - kC16[1] * in[3] + kC16[9] * in[5] +
+                    kC16[13] * in[7]);
+      float odd6 = (kC16[13] * in[1] - kC16[7] * in[3] + kC16[1] * in[5] -
+                    kC16[5] * in[7]);
+      float odd7 = (kC16[15] * in[1] - kC16[13] * in[3] + kC16[11] * in[5] -
+                    kC16[9] * in[7]);
+      out[0] = even0 + odd0;
+      out[15] = even0 - odd0;
+      out[1] = even1 + odd1;
+      out[14] = even1 - odd1;
+      out[2] = even2 + odd2;
+      out[13] = even2 - odd2;
+      out[3] = even3 + odd3;
+      out[12] = even3 - odd3;
+      out[4] = even4 + odd4;
+      out[11] = even4 - odd4;
+      out[5] = even5 + odd5;
+      out[10] = even5 - odd5;
+      out[6] = even6 + odd6;
+      out[9] = even6 - odd6;
+      out[7] = even7 + odd7;
+      out[8] = even7 - odd7;
+      break;
+    }
+  }
+}
+
+void InverseTransformBlockGeneric(const int16_t* JXL_RESTRICT qblock,
+                                  const float* JXL_RESTRICT dequant,
+                                  const float* JXL_RESTRICT biases,
+                                  float* JXL_RESTRICT scratch_space,
+                                  float* JXL_RESTRICT output,
+                                  size_t output_stride, size_t dctsize) {
+  float* JXL_RESTRICT block0 = scratch_space;
+  float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+  DequantBlock(qblock, dequant, biases, block0);
+  if (dctsize == 1) {
+    *output = *block0;
+  } else if (dctsize == 2 || dctsize == 4) {
+    float* JXL_RESTRICT block2 = scratch_space + 2 * DCTSIZE2;
+    ComputeScaledIDCT(block0, block1, block2, 8);
+    if (dctsize == 4) {
+      for (size_t iy = 0; iy < 4; ++iy) {
+        for (size_t ix = 0; ix < 4; ++ix) {
+          float* block = &block2[16 * iy + 2 * ix];
+          output[iy * output_stride + ix] =
+              0.25f * (block[0] + block[1] + block[8] + block[9]);
+        }
+      }
+    } else {
+      for (size_t iy = 0; iy < 2; ++iy) {
+        for (size_t ix = 0; ix < 2; ++ix) {
+          float* block = &block2[32 * iy + 4 * ix];
+          output[iy * output_stride + ix] =
+              0.0625f *
+              (block[0] + block[1] + block[2] + block[3] + block[8] + block[9] +
+               block[10] + block[11] + block[16] + block[17] + block[18] +
+               block[19] + block[24] + block[25] + block[26] + block[27]);
+        }
+      }
+    }
+  } else {
+    float dctin[DCTSIZE];
+    float dctout[DCTSIZE * 2];
+    size_t insize = std::min<size_t>(dctsize, DCTSIZE);
+    for (size_t ix = 0; ix < insize; ++ix) {
+      for (size_t iy = 0; iy < insize; ++iy) {
+        dctin[iy] = block0[iy * DCTSIZE + ix];
+      }
+      Compute1dIDCT(dctin, dctout, dctsize);
+      for (size_t iy = 0; iy < dctsize; ++iy) {
+        block1[iy * dctsize + ix] = dctout[iy];
+      }
+    }
+    for (size_t iy = 0; iy < dctsize; ++iy) {
+      Compute1dIDCT(block1 + iy * dctsize, output + iy * output_stride,
+                    dctsize);
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(InverseTransformBlock8x8);
+HWY_EXPORT(InverseTransformBlockGeneric);
+
+void ChooseInverseTransform(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    if (m->scaled_dct_size[c] == DCTSIZE) {
+      m->inverse_transform[c] = HWY_DYNAMIC_DISPATCH(InverseTransformBlock8x8);
+    } else {
+      m->inverse_transform[c] =
+          HWY_DYNAMIC_DISPATCH(InverseTransformBlockGeneric);
+    }
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/idct.h b/third-party/libjxl/libjxl/lib/jpegli/idct.h
new file mode 100644
index 0000000000..c2ec6d18dc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/idct.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_IDCT_H_
+#define LIB_JPEGLI_IDCT_H_
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseInverseTransform(j_decompress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_IDCT_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/input.cc b/third-party/libjxl/libjxl/lib/jpegli/input.cc
new file mode 100644
index 0000000000..765bf98946
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/input.cc
@@ -0,0 +1,414 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/input.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/input.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DU8 = Rebind<uint8_t, D>;
+using DU16 = Rebind<uint16_t, D>;
+
+constexpr D d;
+constexpr DU du;
+constexpr DU8 du8;
+constexpr DU16 du16;
+
+static constexpr double kMul16 = 1.0 / 257.0;
+static constexpr double kMulFloat = 255.0;
+
+template <size_t C>
+void ReadUint8Row(const uint8_t* row_in, size_t x0, size_t len,
+                  float* row_out[kMaxComponents]) {
+  for (size_t x = x0; x < len; ++x) {
+    for (size_t c = 0; c < C; ++c) {
+      row_out[c][x] = row_in[C * x + c];
+    }
+  }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadUint16Row(const uint8_t* row_in, size_t x0, size_t len,
+                   float* row_out[kMaxComponents]) {
+  const uint16_t* row16 = reinterpret_cast<const uint16_t*>(row_in);
+  for (size_t x = x0; x < len; ++x) {
+    for (size_t c = 0; c < C; ++c) {
+      uint16_t val = row16[C * x + c];
+      if (swap_endianness) val = JXL_BSWAP16(val);
+      row_out[c][x] = val * kMul16;
+    }
+  }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadFloatRow(const uint8_t* row_in, size_t x0, size_t len,
+                  float* row_out[kMaxComponents]) {
+  const float* rowf = reinterpret_cast<const float*>(row_in);
+  for (size_t x = x0; x < len; ++x) {
+    for (size_t c = 0; c < C; ++c) {
+      float val = rowf[C * x + c];
+      if (swap_endianness) val = BSwapFloat(val);
+      row_out[c][x] = val * kMulFloat;
+    }
+  }
+}
+
+void ReadUint8RowSingle(const uint8_t* row_in, size_t len,
+                        float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  float* JXL_RESTRICT const row0 = row_out[0];
+  for (size_t x = 0; x < simd_len; x += N) {
+    Store(ConvertTo(d, PromoteTo(du, LoadU(du8, row_in + x))), d, row0 + x);
+  }
+  ReadUint8Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved2(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  Vec<DU8> out0, out1;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved2(du8, row_in + 2 * x, out0, out1);
+    Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+    Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+  }
+  ReadUint8Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved3(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  Vec<DU8> out0, out1, out2;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved3(du8, row_in + 3 * x, out0, out1, out2);
+    Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+    Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+    Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+  }
+  ReadUint8Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved4(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  float* JXL_RESTRICT const row3 = row_out[3];
+  Vec<DU8> out0, out1, out2, out3;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved4(du8, row_in + 4 * x, out0, out1, out2, out3);
+    Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+    Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+    Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+    Store(ConvertTo(d, PromoteTo(du, out3)), d, row3 + x);
+  }
+  ReadUint8Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingle(const uint8_t* row_in, size_t len,
+                         float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMul16);
+  const uint16_t* JXL_RESTRICT const row =
+      reinterpret_cast<const uint16_t*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  for (size_t x = 0; x < simd_len; x += N) {
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, LoadU(du16, row + x)))), d,
+          row0 + x);
+  }
+  ReadUint16Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved2(const uint8_t* row_in, size_t len,
+                               float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMul16);
+  const uint16_t* JXL_RESTRICT const row =
+      reinterpret_cast<const uint16_t*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  Vec<DU16> out0, out1;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved2(du16, row + 2 * x, out0, out1);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+  }
+  ReadUint16Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved3(const uint8_t* row_in, size_t len,
+                               float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMul16);
+  const uint16_t* JXL_RESTRICT const row =
+      reinterpret_cast<const uint16_t*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  Vec<DU16> out0, out1, out2;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved3(du16, row + 3 * x, out0, out1, out2);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+  }
+  ReadUint16Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved4(const uint8_t* row_in, size_t len,
+                               float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMul16);
+  const uint16_t* JXL_RESTRICT const row =
+      reinterpret_cast<const uint16_t*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  float* JXL_RESTRICT const row3 = row_out[3];
+  Vec<DU16> out0, out1, out2, out3;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved4(du16, row + 4 * x, out0, out1, out2, out3);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+    Store(Mul(mul, ConvertTo(d, PromoteTo(du, out3))), d, row3 + x);
+  }
+  ReadUint16Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingleSwap(const uint8_t* row_in, size_t len,
+                             float* row_out[kMaxComponents]) {
+  ReadUint16Row<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved2Swap(const uint8_t* row_in, size_t len,
+                                   float* row_out[kMaxComponents]) {
+  ReadUint16Row<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved3Swap(const uint8_t* row_in, size_t len,
+                                   float* row_out[kMaxComponents]) {
+  ReadUint16Row<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved4Swap(const uint8_t* row_in, size_t len,
+                                   float* row_out[kMaxComponents]) {
+  ReadUint16Row<4, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowSingle(const uint8_t* row_in, size_t len,
+                        float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMulFloat);
+  const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  for (size_t x = 0; x < simd_len; x += N) {
+    Store(Mul(mul, LoadU(d, row + x)), d, row0 + x);
+  }
+  ReadFloatRow<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved2(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMulFloat);
+  const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  Vec<D> out0, out1;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved2(d, row + 2 * x, out0, out1);
+    Store(Mul(mul, out0), d, row0 + x);
+    Store(Mul(mul, out1), d, row1 + x);
+  }
+  ReadFloatRow<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved3(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMulFloat);
+  const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  Vec<D> out0, out1, out2;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved3(d, row + 3 * x, out0, out1, out2);
+    Store(Mul(mul, out0), d, row0 + x);
+    Store(Mul(mul, out1), d, row1 + x);
+    Store(Mul(mul, out2), d, row2 + x);
+  }
+  ReadFloatRow<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved4(const uint8_t* row_in, size_t len,
+                              float* row_out[kMaxComponents]) {
+  const size_t N = Lanes(d);
+  const size_t simd_len = len & (~(N - 1));
+  const auto mul = Set(d, kMulFloat);
+  const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+  float* JXL_RESTRICT const row0 = row_out[0];
+  float* JXL_RESTRICT const row1 = row_out[1];
+  float* JXL_RESTRICT const row2 = row_out[2];
+  float* JXL_RESTRICT const row3 = row_out[3];
+  Vec<D> out0, out1, out2, out3;
+  for (size_t x = 0; x < simd_len; x += N) {
+    LoadInterleaved4(d, row + 4 * x, out0, out1, out2, out3);
+    Store(Mul(mul, out0), d, row0 + x);
+    Store(Mul(mul, out1), d, row1 + x);
+    Store(Mul(mul, out2), d, row2 + x);
+    Store(Mul(mul, out3), d, row3 + x);
+  }
+  ReadFloatRow<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowSingleSwap(const uint8_t* row_in, size_t len,
+                            float* row_out[kMaxComponents]) {
+  ReadFloatRow<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved2Swap(const uint8_t* row_in, size_t len,
+                                  float* row_out[kMaxComponents]) {
+  ReadFloatRow<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved3Swap(const uint8_t* row_in, size_t len,
+                                  float* row_out[kMaxComponents]) {
+  ReadFloatRow<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved4Swap(const uint8_t* row_in, size_t len,
+                                  float* row_out[kMaxComponents]) {
+  ReadFloatRow<4, true>(row_in, 0, len, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(ReadUint8RowSingle);
+HWY_EXPORT(ReadUint8RowInterleaved2);
+HWY_EXPORT(ReadUint8RowInterleaved3);
+HWY_EXPORT(ReadUint8RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingle);
+HWY_EXPORT(ReadUint16RowInterleaved2);
+HWY_EXPORT(ReadUint16RowInterleaved3);
+HWY_EXPORT(ReadUint16RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingleSwap);
+HWY_EXPORT(ReadUint16RowInterleaved2Swap);
+HWY_EXPORT(ReadUint16RowInterleaved3Swap);
+HWY_EXPORT(ReadUint16RowInterleaved4Swap);
+HWY_EXPORT(ReadFloatRowSingle);
+HWY_EXPORT(ReadFloatRowInterleaved2);
+HWY_EXPORT(ReadFloatRowInterleaved3);
+HWY_EXPORT(ReadFloatRowInterleaved4);
+HWY_EXPORT(ReadFloatRowSingleSwap);
+HWY_EXPORT(ReadFloatRowInterleaved2Swap);
+HWY_EXPORT(ReadFloatRowInterleaved3Swap);
+HWY_EXPORT(ReadFloatRowInterleaved4Swap);
+
+void ChooseInputMethod(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  bool swap_endianness =
+      (m->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+      (m->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+  m->input_method = nullptr;
+  if (m->data_type == JPEGLI_TYPE_UINT8) {
+    if (cinfo->raw_data_in || cinfo->input_components == 1) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowSingle);
+    } else if (cinfo->input_components == 2) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved2);
+    } else if (cinfo->input_components == 3) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved3);
+    } else if (cinfo->input_components == 4) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved4);
+    }
+  } else if (m->data_type == JPEGLI_TYPE_UINT16 && !swap_endianness) {
+    if (cinfo->raw_data_in || cinfo->input_components == 1) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingle);
+    } else if (cinfo->input_components == 2) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2);
+    } else if (cinfo->input_components == 3) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3);
+    } else if (cinfo->input_components == 4) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4);
+    }
+  } else if (m->data_type == JPEGLI_TYPE_UINT16 && swap_endianness) {
+    if (cinfo->raw_data_in || cinfo->input_components == 1) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingleSwap);
+    } else if (cinfo->input_components == 2) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2Swap);
+    } else if (cinfo->input_components == 3) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3Swap);
+    } else if (cinfo->input_components == 4) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4Swap);
+    }
+  } else if (m->data_type == JPEGLI_TYPE_FLOAT && !swap_endianness) {
+    if (cinfo->raw_data_in || cinfo->input_components == 1) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingle);
+    } else if (cinfo->input_components == 2) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2);
+    } else if (cinfo->input_components == 3) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3);
+    } else if (cinfo->input_components == 4) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4);
+    }
+  } else if (m->data_type == JPEGLI_TYPE_FLOAT && swap_endianness) {
+    if (cinfo->raw_data_in || cinfo->input_components == 1) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingleSwap);
+    } else if (cinfo->input_components == 2) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2Swap);
+    } else if (cinfo->input_components == 3) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3Swap);
+    } else if (cinfo->input_components == 4) {
+      m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4Swap);
+    }
+  }
+  if (m->input_method == nullptr) {
+    JPEGLI_ERROR("Could not find input method.");
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/input.h b/third-party/libjxl/libjxl/lib/jpegli/input.h
new file mode 100644
index 0000000000..f54d0bee43
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/input.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_INPUT_H_
+#define LIB_JPEGLI_INPUT_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseInputMethod(j_compress_ptr cinfo);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_INPUT_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc b/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc
new file mode 100644
index 0000000000..565559bcce
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc
@@ -0,0 +1,613 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+
+struct SourceManager {
+  SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size,
+                bool is_partial_file)
+      : data_(data),
+        len_(len),
+        pos_(0),
+        max_chunk_size_(max_chunk_size),
+        is_partial_file_(is_partial_file) {
+    pub_.init_source = init_source;
+    pub_.fill_input_buffer = fill_input_buffer;
+    pub_.next_input_byte = nullptr;
+    pub_.bytes_in_buffer = 0;
+    pub_.skip_input_data = skip_input_data;
+    pub_.resync_to_restart = jpegli_resync_to_restart;
+    pub_.term_source = term_source;
+    if (max_chunk_size_ == 0) max_chunk_size_ = len;
+  }
+
+  ~SourceManager() {
+    EXPECT_EQ(0, pub_.bytes_in_buffer);
+    if (!is_partial_file_) {
+      EXPECT_EQ(len_, pos_);
+    }
+  }
+
+  bool LoadNextChunk() {
+    if (pos_ >= len_ && !is_partial_file_) {
+      return false;
+    }
+    if (pub_.bytes_in_buffer > 0) {
+      EXPECT_LE(pub_.bytes_in_buffer, buffer_.size());
+      memmove(&buffer_[0], pub_.next_input_byte, pub_.bytes_in_buffer);
+    }
+    size_t chunk_size =
+        pos_ < len_ ? std::min(len_ - pos_, max_chunk_size_) : 2;
+    buffer_.resize(pub_.bytes_in_buffer + chunk_size);
+    memcpy(&buffer_[pub_.bytes_in_buffer],
+           pos_ < len_ ? data_ + pos_ : kFakeEoiMarker, chunk_size);
+    pub_.next_input_byte = &buffer_[0];
+    pub_.bytes_in_buffer += chunk_size;
+    pos_ += chunk_size;
+    return true;
+  }
+
+ private:
+  jpeg_source_mgr pub_;
+  std::vector<uint8_t> buffer_;
+  const uint8_t* data_;
+  size_t len_;
+  size_t pos_;
+  size_t max_chunk_size_;
+  bool is_partial_file_;
+
+  static void init_source(j_decompress_ptr cinfo) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    src->pub_.next_input_byte = nullptr;
+    src->pub_.bytes_in_buffer = 0;
+  }
+
+  static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+
+  static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    if (num_bytes <= 0) {
+      return;
+    }
+    if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+      src->pub_.bytes_in_buffer -= num_bytes;
+      src->pub_.next_input_byte += num_bytes;
+    } else {
+      src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+      src->pub_.bytes_in_buffer = 0;
+    }
+  }
+
+  static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+  cinfo->src->bytes_in_buffer--;
+  return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+  markers_seen[num_markers_seen] = cinfo->unread_marker;
+  if (cinfo->src->bytes_in_buffer < 2) {
+    return FALSE;
+  }
+  size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+  EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+  if (marker_len > 2) {
+    (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+  }
+  ++num_markers_seen;
+  return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+                     SourceManager* src, TestImage* output) {
+  output->ysize = cinfo->output_height;
+  output->xsize = cinfo->output_width;
+  output->components = cinfo->num_components;
+  if (cinfo->raw_data_out) {
+    output->color_space = cinfo->jpeg_color_space;
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+      size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+      std::vector<uint8_t> plane(ysize * xsize);
+      output->raw_data.emplace_back(std::move(plane));
+    }
+  } else {
+    output->color_space = cinfo->out_color_space;
+    output->AllocatePixels();
+  }
+  size_t total_output_lines = 0;
+  while (cinfo->output_scanline < cinfo->output_height) {
+    size_t max_lines;
+    size_t num_output_lines;
+    if (cinfo->raw_data_out) {
+      size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+      EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+      max_lines = iMCU_height;
+      std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+      std::vector<JSAMPARRAY> data(cinfo->num_components);
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+        size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+        size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+        rowdata[c].resize(num_lines);
+        size_t y0 = cinfo->output_iMCU_row * num_lines;
+        for (size_t i = 0; i < num_lines; ++i) {
+          rowdata[c][i] =
+              y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+        }
+        data[c] = &rowdata[c][0];
+      }
+      while ((num_output_lines =
+                  jpegli_read_raw_data(cinfo, &data[0], max_lines)) == 0) {
+        JXL_CHECK(src && src->LoadNextChunk());
+      }
+    } else {
+      size_t max_output_lines = dparams.max_output_lines;
+      if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+      size_t lines_left = cinfo->output_height - cinfo->output_scanline;
+      max_lines = std::min<size_t>(max_output_lines, lines_left);
+      size_t stride = cinfo->output_width * cinfo->num_components;
+      std::vector<JSAMPROW> scanlines(max_lines);
+      for (size_t i = 0; i < max_lines; ++i) {
+        size_t yidx = cinfo->output_scanline + i;
+        scanlines[i] = &output->pixels[yidx * stride];
+      }
+      while ((num_output_lines = jpegli_read_scanlines(cinfo, &scanlines[0],
+                                                       max_lines)) == 0) {
+        JXL_CHECK(src && src->LoadNextChunk());
+      }
+    }
+    total_output_lines += num_output_lines;
+    EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+    if (num_output_lines < max_lines) {
+      JXL_CHECK(src && src->LoadNextChunk());
+    }
+  }
+}
+
+struct TestConfig {
+  std::string fn;
+  std::string fn_desc;
+  TestImage input;
+  CompressParams jparams;
+  DecompressParams dparams;
+  float max_rms_dist = 1.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+  if (!config.fn.empty()) {
+    return ReadTestData(config.fn.c_str());
+  }
+  GeneratePixels(&config.input);
+  std::vector<uint8_t> compressed;
+  JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+  return compressed;
+}
+
+bool IsSequential(const TestConfig& config) {
+  if (!config.fn.empty()) {
+    return config.fn_desc.find("PROGR") == std::string::npos;
+  }
+  return config.jparams.progressive_mode <= 0;
+}
+
+class InputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepNonBuffered) {
+  TestConfig config = GetParam();
+  const DecompressParams& dparams = config.dparams;
+  std::vector<uint8_t> compressed = GetTestJpegData(config);
+  bool is_partial = config.dparams.size_factor < 1.0f;
+  if (is_partial) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+                    is_partial);
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+    if (config.jparams.add_marker) {
+      jpegli_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+      jpegli_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+      num_markers_seen = 0;
+      jpegli_set_marker_processor(&cinfo, 0xe6, test_marker_processor);
+      jpegli_set_marker_processor(&cinfo, 0xe7, test_marker_processor);
+      jpegli_set_marker_processor(&cinfo, 0xe8, test_marker_processor);
+    }
+    while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+      JXL_CHECK(src.LoadNextChunk());
+    }
+    SetDecompressParams(dparams, &cinfo);
+    jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness);
+    if (config.jparams.add_marker) {
+      EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+      EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+    }
+    VerifyHeader(config.jparams, &cinfo);
+    cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+    if (dparams.output_mode == COEFFICIENTS) {
+      jvirt_barray_ptr* coef_arrays;
+      while ((coef_arrays = jpegli_read_coefficients(&cinfo)) == nullptr) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+      CopyCoefficients(&cinfo, coef_arrays, &output0);
+    } else {
+      while (!jpegli_start_decompress(&cinfo)) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+      ReadOutputImage(dparams, &cinfo, &src, &output0);
+    }
+
+    while (!jpegli_finish_decompress(&cinfo)) {
+      JXL_CHECK(src.LoadNextChunk());
+    }
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  TestImage output1;
+  DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+  VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepBuffered) {
+  TestConfig config = GetParam();
+  if (config.jparams.add_marker) return;
+  const DecompressParams& dparams = config.dparams;
+  std::vector<uint8_t> compressed = GetTestJpegData(config);
+  bool is_partial = config.dparams.size_factor < 1.0f;
+  if (is_partial) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+                    is_partial);
+  std::vector<TestImage> output_progression0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+    while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+      JXL_CHECK(src.LoadNextChunk());
+    }
+    SetDecompressParams(dparams, &cinfo);
+    jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness);
+
+    cinfo.buffered_image = TRUE;
+    cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+    EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+    EXPECT_FALSE(jpegli_input_complete(&cinfo));
+    EXPECT_EQ(0, cinfo.output_scan_number);
+
+    int sos_marker_cnt = 1;  // read_header reads the first SOS marker
+    while (!jpegli_input_complete(&cinfo)) {
+      EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+      EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+      // start output sets output_scan_number, but does not change
+      // input_scan_number
+      EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+      EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+      TestImage output;
+      ReadOutputImage(dparams, &cinfo, &src, &output);
+      output_progression0.emplace_back(std::move(output));
+      // read scanlines/read raw data does not change input/output scan number
+      EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+      EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+      while (!jpegli_finish_output(&cinfo)) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+      ++sos_marker_cnt;  // finish output reads the next SOS marker or EOI
+      if (dparams.output_mode == COEFFICIENTS) {
+        jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+        JXL_CHECK(coef_arrays != nullptr);
+        CopyCoefficients(&cinfo, coef_arrays, &output_progression0.back());
+      }
+    }
+
+    EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  std::vector<TestImage> output_progression1;
+  DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+                            &output_progression1);
+  ASSERT_EQ(output_progression0.size(), output_progression1.size());
+  for (size_t i = 0; i < output_progression0.size(); ++i) {
+    const TestImage& output = output_progression0[i];
+    const TestImage& expected = output_progression1[i];
+    VerifyOutputImage(expected, output, config.max_rms_dist);
+  }
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputBuffered) {
+  TestConfig config = GetParam();
+  if (config.jparams.add_marker) return;
+  const DecompressParams& dparams = config.dparams;
+  std::vector<uint8_t> compressed = GetTestJpegData(config);
+  bool is_partial = config.dparams.size_factor < 1.0f;
+  if (is_partial) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  std::vector<TestImage> output_progression1;
+  DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+                            &output_progression1);
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+                    is_partial);
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+    int status;
+    while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+      if (status == JPEG_SUSPENDED) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+    }
+    EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+    cinfo.buffered_image = TRUE;
+    cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+    cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+    EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+    EXPECT_FALSE(jpegli_input_complete(&cinfo));
+    EXPECT_EQ(1, cinfo.input_scan_number);
+    EXPECT_EQ(0, cinfo.output_scan_number);
+
+    while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+      if (status == JPEG_SUSPENDED) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+    }
+
+    EXPECT_TRUE(jpegli_input_complete(&cinfo));
+    EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+    EXPECT_EQ(0, cinfo.output_scan_number);
+
+    EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+    EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+    EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+    ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+    EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+    EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+    EXPECT_TRUE(jpegli_finish_output(&cinfo));
+    if (dparams.output_mode == COEFFICIENTS) {
+      jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+      JXL_CHECK(coef_arrays != nullptr);
+      CopyCoefficients(&cinfo, coef_arrays, &output0);
+    }
+    EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  VerifyOutputImage(output_progression1.back(), output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputNonBuffered) {
+  TestConfig config = GetParam();
+  if (config.jparams.add_marker || IsSequential(config)) return;
+  const DecompressParams& dparams = config.dparams;
+  std::vector<uint8_t> compressed = GetTestJpegData(config);
+  bool is_partial = config.dparams.size_factor < 1.0f;
+  if (is_partial) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+                    is_partial);
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+    int status;
+    while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+      if (status == JPEG_SUSPENDED) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+    }
+    EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+    cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+    cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+    if (dparams.output_mode == COEFFICIENTS) {
+      jpegli_read_coefficients(&cinfo);
+    } else {
+      while (!jpegli_start_decompress(&cinfo)) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+    }
+
+    while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+      if (status == JPEG_SUSPENDED) {
+        JXL_CHECK(src.LoadNextChunk());
+      }
+    }
+
+    if (dparams.output_mode == COEFFICIENTS) {
+      jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+      JXL_CHECK(coef_arrays != nullptr);
+      CopyCoefficients(&cinfo, coef_arrays, &output0);
+    } else {
+      ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+    }
+
+    EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  TestImage output1;
+  DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+  VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  std::vector<std::pair<std::string, std::string>> testfiles({
+      {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+      {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+      {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+  });
+  for (const auto& it : testfiles) {
+    for (size_t chunk_size : {1, 64, 65536}) {
+      for (size_t max_output_lines : {0, 1, 8, 16}) {
+        TestConfig config;
+        config.fn = it.first;
+        config.fn_desc = it.second;
+        config.dparams.chunk_size = chunk_size;
+        config.dparams.max_output_lines = max_output_lines;
+        all_tests.push_back(config);
+        if (max_output_lines == 16) {
+          config.dparams.output_mode = RAW_DATA;
+          all_tests.push_back(config);
+          config.dparams.output_mode = COEFFICIENTS;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  for (size_t r : {1, 17, 1024}) {
+    for (size_t chunk_size : {1, 65536}) {
+      TestConfig config;
+      config.dparams.chunk_size = chunk_size;
+      config.jparams.progressive_mode = 2;
+      config.jparams.restart_interval = r;
+      all_tests.push_back(config);
+    }
+  }
+  for (size_t chunk_size : {1, 4, 1024}) {
+    TestConfig config;
+    config.input.xsize = 256;
+    config.input.ysize = 256;
+    config.dparams.chunk_size = chunk_size;
+    config.jparams.add_marker = true;
+    all_tests.push_back(config);
+  }
+  // Tests for partial input.
+  for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+    for (int progr : {0, 1, 3}) {
+      for (int samp : {1, 2}) {
+        for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+          TestConfig config;
+          config.input.xsize = 517;
+          config.input.ysize = 523;
+          config.jparams.h_sampling = {samp, 1, 1};
+          config.jparams.v_sampling = {samp, 1, 1};
+          config.jparams.progressive_mode = progr;
+          config.dparams.size_factor = size_factor;
+          config.dparams.output_mode = output_mode;
+          // The last partially available block can behave differently.
+          // TODO(szabadka) Figure out if we can make the behaviour more
+          // similar.
+          config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  // Tests for block smoothing.
+  for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+    for (int samp : {1, 2}) {
+      TestConfig config;
+      config.input.xsize = 517;
+      config.input.ysize = 523;
+      config.jparams.h_sampling = {samp, 1, 1};
+      config.jparams.v_sampling = {samp, 1, 1};
+      config.jparams.progressive_mode = 2;
+      config.dparams.size_factor = size_factor;
+      config.dparams.do_block_smoothing = true;
+      // libjpeg does smoothing for incomplete scans differently at
+      // the border between current and previous scans.
+      config.max_rms_dist = 8.0f;
+      all_tests.push_back(config);
+    }
+  }
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  if (!c.fn.empty()) {
+    os << c.fn_desc;
+  } else {
+    os << c.input;
+  }
+  os << c.jparams;
+  if (c.dparams.chunk_size == 0) {
+    os << "CompleteInput";
+  } else {
+    os << "InputChunks" << c.dparams.chunk_size;
+  }
+  if (c.dparams.size_factor < 1.0f) {
+    os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+  }
+  if (c.dparams.max_output_lines == 0) {
+    os << "CompleteOutput";
+  } else {
+    os << "OutputLines" << c.dparams.max_output_lines;
+  }
+  if (c.dparams.output_mode == RAW_DATA) {
+    os << "RawDataOut";
+  } else if (c.dparams.output_mode == COEFFICIENTS) {
+    os << "CoeffsOut";
+  }
+  if (c.dparams.do_block_smoothing) {
+    os << "BlockSmoothing";
+  }
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<InputSuspensionTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(InputSuspensionTest, InputSuspensionTestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62 b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62
new file mode 100644
index 0000000000..3a8d1f5ec5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62
@@ -0,0 +1,11 @@
+LIBJPEG_6.2 {
+  global:
+    jpeg*;
+};
+
+LIBJPEGTURBO_6.2 {
+  global:
+    jpeg_mem_src*;
+    jpeg_mem_dest*;
+    tj*;
+};
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8 b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8
new file mode 100644
index 0000000000..aa891f8571
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8
@@ -0,0 +1,9 @@
+LIBJPEG_8.0 {
+  global:
+    jpeg*;
+};
+
+LIBJPEGTURBO_8.0 {
+  global:
+    tj*;
+};
diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc
new file mode 100644
index 0000000000..de2303756e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/libjpeg_test_util.h"
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+/* clang-format on */
+
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+
+namespace {
+
+#define JPEG_API_FN(name) jpeg_##name
+#include "lib/jpegli/test_utils-inl.h"
+#undef JPEG_API_FN
+
+void ReadOutputPass(j_decompress_ptr cinfo, const DecompressParams& dparams,
+                    TestImage* output) {
+  JDIMENSION xoffset = 0;
+  JDIMENSION yoffset = 0;
+  JDIMENSION xsize_cropped = cinfo->output_width;
+  JDIMENSION ysize_cropped = cinfo->output_height;
+  if (dparams.crop_output) {
+    xoffset = xsize_cropped = cinfo->output_width / 3;
+    yoffset = ysize_cropped = cinfo->output_height / 3;
+    jpeg_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+    JXL_CHECK(xsize_cropped == cinfo->output_width);
+  }
+  output->xsize = xsize_cropped;
+  output->ysize = ysize_cropped;
+  output->components = cinfo->out_color_components;
+  if (cinfo->quantize_colors) {
+    jxl::msan::UnpoisonMemory(cinfo->colormap, cinfo->out_color_components *
+                                                   sizeof(cinfo->colormap[0]));
+    for (int c = 0; c < cinfo->out_color_components; ++c) {
+      jxl::msan::UnpoisonMemory(
+          cinfo->colormap[c],
+          cinfo->actual_number_of_colors * sizeof(cinfo->colormap[c][0]));
+    }
+  }
+  if (!cinfo->raw_data_out) {
+    size_t stride = output->xsize * output->components;
+    output->pixels.resize(output->ysize * stride);
+    output->color_space = cinfo->out_color_space;
+    if (yoffset > 0) {
+      jpeg_skip_scanlines(cinfo, yoffset);
+    }
+    for (size_t y = 0; y < output->ysize; ++y) {
+      JSAMPROW rows[] = {
+          reinterpret_cast<JSAMPLE*>(&output->pixels[y * stride])};
+      JXL_CHECK(1 == jpeg_read_scanlines(cinfo, rows, 1));
+      jxl::msan::UnpoisonMemory(
+          rows[0], sizeof(JSAMPLE) * cinfo->output_components * output->xsize);
+      if (cinfo->quantize_colors) {
+        UnmapColors(rows[0], cinfo->output_width, cinfo->out_color_components,
+                    cinfo->colormap, cinfo->actual_number_of_colors);
+      }
+    }
+    if (cinfo->output_scanline < cinfo->output_height) {
+      jpeg_skip_scanlines(cinfo, cinfo->output_height - cinfo->output_scanline);
+    }
+  } else {
+    output->color_space = cinfo->jpeg_color_space;
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+      size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+      std::vector<uint8_t> plane(ysize * xsize);
+      output->raw_data.emplace_back(std::move(plane));
+    }
+    while (cinfo->output_scanline < cinfo->output_height) {
+      size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+      JXL_CHECK(cinfo->output_scanline == cinfo->output_iMCU_row * iMCU_height);
+      std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+      std::vector<JSAMPARRAY> data(cinfo->num_components);
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+        size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+        size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+        rowdata[c].resize(num_lines);
+        size_t y0 = cinfo->output_iMCU_row * num_lines;
+        for (size_t i = 0; i < num_lines; ++i) {
+          rowdata[c][i] =
+              y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+        }
+        data[c] = &rowdata[c][0];
+      }
+      JXL_CHECK(iMCU_height ==
+                jpeg_read_raw_data(cinfo, &data[0], iMCU_height));
+    }
+  }
+  JXL_CHECK(cinfo->total_iMCU_rows ==
+            DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+                       const DecompressParams& dparams, j_decompress_ptr cinfo,
+                       TestImage* output) {
+  if (jparams.add_marker) {
+    jpeg_save_markers(cinfo, kSpecialMarker0, 0xffff);
+    jpeg_save_markers(cinfo, kSpecialMarker1, 0xffff);
+  }
+  if (!jparams.icc.empty()) {
+    jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+  }
+  JXL_CHECK(JPEG_REACHED_SOS ==
+            jpeg_read_header(cinfo, /*require_image=*/TRUE));
+  if (!jparams.icc.empty()) {
+    uint8_t* icc_data = nullptr;
+    unsigned int icc_len;
+    JXL_CHECK(jpeg_read_icc_profile(cinfo, &icc_data, &icc_len));
+    JXL_CHECK(icc_data);
+    jxl::msan::UnpoisonMemory(icc_data, icc_len);
+    JXL_CHECK(0 == memcmp(jparams.icc.data(), icc_data, icc_len));
+    free(icc_data);
+  }
+  SetDecompressParams(dparams, cinfo);
+  VerifyHeader(jparams, cinfo);
+  if (dparams.output_mode == COEFFICIENTS) {
+    jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(cinfo);
+    JXL_CHECK(coef_arrays != nullptr);
+    CopyCoefficients(cinfo, coef_arrays, output);
+  } else {
+    JXL_CHECK(jpeg_start_decompress(cinfo));
+    VerifyScanHeader(jparams, cinfo);
+    ReadOutputPass(cinfo, dparams, output);
+  }
+  JXL_CHECK(jpeg_finish_decompress(cinfo));
+}
+
+}  // namespace
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+                               const DecompressParams& dparams,
+                               const std::vector<uint8_t>& compressed,
+                               std::vector<TestImage>* output_progression) {
+  jpeg_decompress_struct cinfo = {};
+  const auto try_catch_block = [&]() {
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpeg_std_error(&jerr);
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = reinterpret_cast<void*>(&env);
+    cinfo.err->error_exit = [](j_common_ptr cinfo) {
+      (*cinfo->err->output_message)(cinfo);
+      jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+      jpeg_destroy(cinfo);
+      longjmp(*env, 1);
+    };
+    jpeg_create_decompress(&cinfo);
+    jpeg_mem_src(&cinfo, compressed.data(), compressed.size());
+    if (jparams.add_marker) {
+      jpeg_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+      jpeg_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+    }
+    JXL_CHECK(JPEG_REACHED_SOS ==
+              jpeg_read_header(&cinfo, /*require_image=*/TRUE));
+    cinfo.buffered_image = TRUE;
+    SetDecompressParams(dparams, &cinfo);
+    VerifyHeader(jparams, &cinfo);
+    JXL_CHECK(jpeg_start_decompress(&cinfo));
+    // start decompress should not read the whole input in buffered image mode
+    JXL_CHECK(!jpeg_input_complete(&cinfo));
+    JXL_CHECK(cinfo.output_scan_number == 0);
+    int sos_marker_cnt = 1;  // read header reads the first SOS marker
+    while (!jpeg_input_complete(&cinfo)) {
+      JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+      if (dparams.skip_scans && (cinfo.input_scan_number % 2) != 1) {
+        int result = JPEG_SUSPENDED;
+        while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+          result = jpeg_consume_input(&cinfo);
+        }
+        if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+        continue;
+      }
+      SetScanDecompressParams(dparams, &cinfo, cinfo.input_scan_number);
+      JXL_CHECK(jpeg_start_output(&cinfo, cinfo.input_scan_number));
+      // start output sets output_scan_number, but does not change
+      // input_scan_number
+      JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+      JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+      VerifyScanHeader(jparams, &cinfo);
+      TestImage output;
+      ReadOutputPass(&cinfo, dparams, &output);
+      output_progression->emplace_back(std::move(output));
+      // read scanlines/read raw data does not change input/output scan number
+      if (!cinfo.progressive_mode) {
+        JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+        JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+      }
+      JXL_CHECK(jpeg_finish_output(&cinfo));
+      ++sos_marker_cnt;  // finish output reads the next SOS marker or EOI
+      if (dparams.output_mode == COEFFICIENTS) {
+        jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(&cinfo);
+        JXL_CHECK(coef_arrays != nullptr);
+        CopyCoefficients(&cinfo, coef_arrays, &output_progression->back());
+      }
+    }
+    JXL_CHECK(jpeg_finish_decompress(&cinfo));
+    return true;
+  };
+  JXL_CHECK(try_catch_block());
+  jpeg_destroy_decompress(&cinfo);
+}
+
+// Returns the number of bytes read from compressed.
+size_t DecodeWithLibjpeg(const CompressParams& jparams,
+                         const DecompressParams& dparams,
+                         const uint8_t* table_stream, size_t table_stream_size,
+                         const uint8_t* compressed, size_t len,
+                         TestImage* output) {
+  jpeg_decompress_struct cinfo = {};
+  size_t bytes_read;
+  const auto try_catch_block = [&]() {
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpeg_std_error(&jerr);
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = reinterpret_cast<void*>(&env);
+    cinfo.err->error_exit = [](j_common_ptr cinfo) {
+      (*cinfo->err->output_message)(cinfo);
+      jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+      jpeg_destroy(cinfo);
+      longjmp(*env, 1);
+    };
+    jpeg_create_decompress(&cinfo);
+    if (table_stream != nullptr) {
+      jpeg_mem_src(&cinfo, table_stream, table_stream_size);
+      jpeg_read_header(&cinfo, FALSE);
+    }
+    jpeg_mem_src(&cinfo, compressed, len);
+    DecodeWithLibjpeg(jparams, dparams, &cinfo, output);
+    bytes_read = len - cinfo.src->bytes_in_buffer;
+    return true;
+  };
+  JXL_CHECK(try_catch_block());
+  jpeg_destroy_decompress(&cinfo);
+  return bytes_read;
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+                       const DecompressParams& dparams,
+                       const std::vector<uint8_t>& compressed,
+                       TestImage* output) {
+  DecodeWithLibjpeg(jparams, dparams, nullptr, 0, compressed.data(),
+                    compressed.size(), output);
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h
new file mode 100644
index 0000000000..18cc1e57b5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
+#define LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jpegli/test_params.h"
+
+namespace jpegli {
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+                               const DecompressParams& dparams,
+                               const std::vector<uint8_t>& compressed,
+                               std::vector<TestImage>* output_progression);
+// Returns the number of bytes read from compressed.
+size_t DecodeWithLibjpeg(const CompressParams& jparams,
+                         const DecompressParams& dparams,
+                         const uint8_t* table_stream, size_t table_stream_size,
+                         const uint8_t* compressed, size_t len,
+                         TestImage* output);
+void DecodeWithLibjpeg(const CompressParams& jparams,
+                       const DecompressParams& dparams,
+                       const std::vector<uint8_t>& compressed,
+                       TestImage* output);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc
new file mode 100644
index 0000000000..b38d16f255
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc
@@ -0,0 +1,255 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains wrapper-functions that are used to build the libjpeg.so
+// shared library that is API- and ABI-compatible with libjpeg-turbo's version
+// of libjpeg.so.
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+
+struct jpeg_error_mgr *jpeg_std_error(struct jpeg_error_mgr *err) {
+  return jpegli_std_error(err);
+}
+
+void jpeg_abort(j_common_ptr cinfo) { jpegli_abort(cinfo); }
+
+void jpeg_destroy(j_common_ptr cinfo) { jpegli_destroy(cinfo); }
+
+JQUANT_TBL *jpeg_alloc_quant_table(j_common_ptr cinfo) {
+  return jpegli_alloc_quant_table(cinfo);
+}
+
+JHUFF_TBL *jpeg_alloc_huff_table(j_common_ptr cinfo) {
+  return jpegli_alloc_huff_table(cinfo);
+}
+
+void jpeg_CreateDecompress(j_decompress_ptr cinfo, int version,
+                           size_t structsize) {
+  jpegli_CreateDecompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) {
+  jpegli_stdio_src(cinfo, infile);
+}
+
+void jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                  unsigned long insize) {
+  jpegli_mem_src(cinfo, inbuffer, insize);
+}
+
+int jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) {
+  return jpegli_read_header(cinfo, require_image);
+}
+
+boolean jpeg_start_decompress(j_decompress_ptr cinfo) {
+  return jpegli_start_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+                               JDIMENSION max_lines) {
+  return jpegli_read_scanlines(cinfo, scanlines, max_lines);
+}
+
+JDIMENSION jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+  return jpegli_skip_scanlines(cinfo, num_lines);
+}
+
+void jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                        JDIMENSION *width) {
+  jpegli_crop_scanline(cinfo, xoffset, width);
+}
+
+boolean jpeg_finish_decompress(j_decompress_ptr cinfo) {
+  return jpegli_finish_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+                              JDIMENSION max_lines) {
+  return jpegli_read_raw_data(cinfo, data, max_lines);
+}
+
+jvirt_barray_ptr *jpeg_read_coefficients(j_decompress_ptr cinfo) {
+  return jpegli_read_coefficients(cinfo);
+}
+
+boolean jpeg_has_multiple_scans(j_decompress_ptr cinfo) {
+  return jpegli_has_multiple_scans(cinfo);
+}
+
+boolean jpeg_start_output(j_decompress_ptr cinfo, int scan_number) {
+  return jpegli_start_output(cinfo, scan_number);
+}
+
+boolean jpeg_finish_output(j_decompress_ptr cinfo) {
+  return jpegli_finish_output(cinfo);
+}
+
+boolean jpeg_input_complete(j_decompress_ptr cinfo) {
+  return jpegli_input_complete(cinfo);
+}
+
+int jpeg_consume_input(j_decompress_ptr cinfo) {
+  return jpegli_consume_input(cinfo);
+}
+
+#if JPEG_LIB_VERSION >= 80
+void jpeg_core_output_dimensions(j_decompress_ptr cinfo) {
+  jpegli_core_output_dimensions(cinfo);
+}
+#endif
+void jpeg_calc_output_dimensions(j_decompress_ptr cinfo) {
+  jpegli_calc_output_dimensions(cinfo);
+}
+
+void jpeg_save_markers(j_decompress_ptr cinfo, int marker_code,
+                       unsigned int length_limit) {
+  jpegli_save_markers(cinfo, marker_code, length_limit);
+}
+
+void jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+                               jpeg_marker_parser_method routine) {
+  jpegli_set_marker_processor(cinfo, marker_code, routine);
+}
+
+boolean jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+                              unsigned int *icc_data_len) {
+  return jpegli_read_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_abort_decompress(j_decompress_ptr cinfo) {
+  return jpegli_abort_decompress(cinfo);
+}
+
+void jpeg_destroy_decompress(j_decompress_ptr cinfo) {
+  return jpegli_destroy_decompress(cinfo);
+}
+
+void jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) {
+  jpegli_CreateCompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) {
+  jpegli_stdio_dest(cinfo, outfile);
+}
+
+void jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
+                   unsigned long *outsize) {
+  jpegli_mem_dest(cinfo, outbuffer, outsize);
+}
+
+void jpeg_set_defaults(j_compress_ptr cinfo) { jpegli_set_defaults(cinfo); }
+
+void jpeg_default_colorspace(j_compress_ptr cinfo) {
+  jpegli_default_colorspace(cinfo);
+}
+
+void jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+  jpegli_set_colorspace(cinfo, colorspace);
+}
+
+void jpeg_set_quality(j_compress_ptr cinfo, int quality,
+                      boolean force_baseline) {
+  jpegli_set_quality(cinfo, quality, force_baseline);
+}
+
+void jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+                             boolean force_baseline) {
+  jpegli_set_linear_quality(cinfo, scale_factor, force_baseline);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+  jpegli_default_qtables(cinfo, force_baseline);
+}
+#endif
+
+int jpeg_quality_scaling(int quality) {
+  return jpegli_quality_scaling(quality);
+}
+
+void jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+                          const unsigned int *basic_table, int scale_factor,
+                          boolean force_baseline) {
+  jpegli_add_quant_table(cinfo, which_tbl, basic_table, scale_factor,
+                         force_baseline);
+}
+
+void jpeg_simple_progression(j_compress_ptr cinfo) {
+  jpegli_simple_progression(cinfo);
+}
+
+void jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+  jpegli_suppress_tables(cinfo, suppress);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+  jpegli_calc_jpeg_dimensions(cinfo);
+}
+#endif
+
+void jpeg_copy_critical_parameters(j_decompress_ptr srcinfo,
+                                   j_compress_ptr dstinfo) {
+  jpegli_copy_critical_parameters(srcinfo, dstinfo);
+}
+
+void jpeg_write_m_header(j_compress_ptr cinfo, int marker,
+                         unsigned int datalen) {
+  jpegli_write_m_header(cinfo, marker, datalen);
+}
+
+void jpeg_write_m_byte(j_compress_ptr cinfo, int val) {
+  jpegli_write_m_byte(cinfo, val);
+}
+
+void jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr,
+                       unsigned int datalen) {
+  jpegli_write_marker(cinfo, marker, dataptr, datalen);
+}
+
+void jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr,
+                            unsigned int icc_data_len) {
+  jpegli_write_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+  jpegli_start_compress(cinfo, write_all_tables);
+}
+
+void jpeg_write_tables(j_compress_ptr cinfo) { jpegli_write_tables(cinfo); }
+
+JDIMENSION jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+                                JDIMENSION num_lines) {
+  return jpegli_write_scanlines(cinfo, scanlines, num_lines);
+}
+
+JDIMENSION jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+                               JDIMENSION num_lines) {
+  return jpegli_write_raw_data(cinfo, data, num_lines);
+}
+
+void jpeg_write_coefficients(j_compress_ptr cinfo,
+                             jvirt_barray_ptr *coef_arrays) {
+  jpegli_write_coefficients(cinfo, coef_arrays);
+}
+
+void jpeg_finish_compress(j_compress_ptr cinfo) {
+  jpegli_finish_compress(cinfo);
+}
+
+void jpeg_abort_compress(j_compress_ptr cinfo) { jpegli_abort_compress(cinfo); }
+
+void jpeg_destroy_compress(j_compress_ptr cinfo) {
+  jpegli_destroy_compress(cinfo);
+}
+
+boolean jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+  return jpegli_resync_to_restart(cinfo, desired);
+}
+
+void jpeg_new_colormap(j_decompress_ptr cinfo) { jpegli_new_colormap(cinfo); }
diff --git a/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc
new file mode 100644
index 0000000000..3a8f230e63
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc
@@ -0,0 +1,186 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/memory_manager.h"
+
+#include <string.h>
+
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/error.h"
+
+struct jvirt_sarray_control {
+  JSAMPARRAY full_buffer;
+  size_t numrows;
+  JDIMENSION maxaccess;
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY full_buffer;
+  size_t numrows;
+  JDIMENSION maxaccess;
+};
+
+namespace jpegli {
+
+namespace {
+
+struct MemoryManager {
+  struct jpeg_memory_mgr pub;
+  std::vector<void*> owned_ptrs[2 * JPOOL_NUMPOOLS];
+  uint64_t pool_memory_usage[2 * JPOOL_NUMPOOLS];
+  uint64_t total_memory_usage;
+  uint64_t peak_memory_usage;
+};
+
+void* Alloc(j_common_ptr cinfo, int pool_id, size_t sizeofobject) {
+  MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+  if (pool_id < 0 || pool_id >= 2 * JPOOL_NUMPOOLS) {
+    JPEGLI_ERROR("Invalid pool id %d", pool_id);
+  }
+  if (mem->pub.max_memory_to_use > 0 &&
+      mem->total_memory_usage + static_cast<uint64_t>(sizeofobject) >
+          static_cast<uint64_t>(mem->pub.max_memory_to_use)) {
+    JPEGLI_ERROR("Total memory usage exceeding %ld",
+                 mem->pub.max_memory_to_use);
+  }
+  void* p;
+  if (pool_id < JPOOL_NUMPOOLS) {
+    p = malloc(sizeofobject);
+  } else {
+    p = hwy::AllocateAlignedBytes(sizeofobject, nullptr, nullptr);
+  }
+  if (p == nullptr) {
+    JPEGLI_ERROR("Out of memory");
+  }
+  mem->owned_ptrs[pool_id].push_back(p);
+  mem->pool_memory_usage[pool_id] += sizeofobject;
+  mem->total_memory_usage += sizeofobject;
+  mem->peak_memory_usage =
+      std::max(mem->peak_memory_usage, mem->total_memory_usage);
+  return p;
+}
+
+constexpr size_t gcd(size_t a, size_t b) { return b == 0 ? a : gcd(b, a % b); }
+constexpr size_t lcm(size_t a, size_t b) { return (a * b) / gcd(a, b); }
+
+template <typename T>
+T** Alloc2dArray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
+                 JDIMENSION numrows) {
+  T** array = Allocate<T*>(cinfo, numrows, pool_id);
+  // Always use aligned allocator for large 2d arrays.
+  if (pool_id < JPOOL_NUMPOOLS) {
+    pool_id += JPOOL_NUMPOOLS;
+  }
+  size_t alignment = lcm(sizeof(T), HWY_ALIGNMENT);
+  size_t memstride = RoundUpTo(samplesperrow * sizeof(T), alignment);
+  size_t stride = memstride / sizeof(T);
+  T* buffer = Allocate<T>(cinfo, numrows * stride, pool_id);
+  for (size_t i = 0; i < numrows; ++i) {
+    array[i] = &buffer[i * stride];
+  }
+  return array;
+}
+
+template <typename Control, typename T>
+Control* RequestVirtualArray(j_common_ptr cinfo, int pool_id, boolean pre_zero,
+                             JDIMENSION samplesperrow, JDIMENSION numrows,
+                             JDIMENSION maxaccess) {
+  if (pool_id != JPOOL_IMAGE) {
+    JPEGLI_ERROR("Only image lifetime virtual arrays are supported.");
+  }
+  Control* p = Allocate<Control>(cinfo, 1, pool_id);
+  p->full_buffer = Alloc2dArray<T>(cinfo, pool_id, samplesperrow, numrows);
+  p->numrows = numrows;
+  p->maxaccess = maxaccess;
+  if (pre_zero) {
+    for (size_t i = 0; i < numrows; ++i) {
+      memset(p->full_buffer[i], 0, samplesperrow * sizeof(T));
+    }
+  }
+  return p;
+}
+
+void RealizeVirtualArrays(j_common_ptr cinfo) {
+  // Nothing to do, the full arrays were realized at request time already.
+}
+
+template <typename Control, typename T>
+T** AccessVirtualArray(j_common_ptr cinfo, Control* ptr, JDIMENSION start_row,
+                       JDIMENSION num_rows, boolean writable) {
+  if (num_rows > ptr->maxaccess) {
+    JPEGLI_ERROR("Invalid virtual array access, num rows %u vs max rows %u",
+                 num_rows, ptr->maxaccess);
+  }
+  if (start_row + num_rows > ptr->numrows) {
+    JPEGLI_ERROR("Invalid virtual array access, %u vs %u total rows",
+                 start_row + num_rows, ptr->numrows);
+  }
+  if (ptr->full_buffer == nullptr) {
+    JPEGLI_ERROR("Invalid virtual array access, array not realized.");
+  }
+  return ptr->full_buffer + start_row;
+}
+
+void ClearPool(j_common_ptr cinfo, int pool_id) {
+  MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+  mem->owned_ptrs[pool_id].clear();
+  mem->total_memory_usage -= mem->pool_memory_usage[pool_id];
+  mem->pool_memory_usage[pool_id] = 0;
+}
+
+void FreePool(j_common_ptr cinfo, int pool_id) {
+  MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) {
+    JPEGLI_ERROR("Invalid pool id %d", pool_id);
+  }
+  for (void* ptr : mem->owned_ptrs[pool_id]) {
+    free(ptr);
+  }
+  ClearPool(cinfo, pool_id);
+  for (void* ptr : mem->owned_ptrs[JPOOL_NUMPOOLS + pool_id]) {
+    hwy::FreeAlignedBytes(ptr, nullptr, nullptr);
+  }
+  ClearPool(cinfo, JPOOL_NUMPOOLS + pool_id);
+}
+
+void SelfDestruct(j_common_ptr cinfo) {
+  MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+  for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+    FreePool(cinfo, pool_id);
+  }
+  delete mem;
+  cinfo->mem = nullptr;
+}
+
+}  // namespace
+
+void InitMemoryManager(j_common_ptr cinfo) {
+  MemoryManager* mem = new MemoryManager;
+  mem->pub.alloc_small = jpegli::Alloc;
+  mem->pub.alloc_large = jpegli::Alloc;
+  mem->pub.alloc_sarray = jpegli::Alloc2dArray<JSAMPLE>;
+  mem->pub.alloc_barray = jpegli::Alloc2dArray<JBLOCK>;
+  mem->pub.request_virt_sarray =
+      jpegli::RequestVirtualArray<jvirt_sarray_control, JSAMPLE>;
+  mem->pub.request_virt_barray =
+      jpegli::RequestVirtualArray<jvirt_barray_control, JBLOCK>;
+  mem->pub.realize_virt_arrays = jpegli::RealizeVirtualArrays;
+  mem->pub.access_virt_sarray =
+      jpegli::AccessVirtualArray<jvirt_sarray_control, JSAMPLE>;
+  mem->pub.access_virt_barray =
+      jpegli::AccessVirtualArray<jvirt_barray_control, JBLOCK>;
+  mem->pub.free_pool = jpegli::FreePool;
+  mem->pub.self_destruct = jpegli::SelfDestruct;
+  mem->pub.max_memory_to_use = 0;
+  mem->total_memory_usage = 0;
+  mem->peak_memory_usage = 0;
+  memset(mem->pool_memory_usage, 0, sizeof(mem->pool_memory_usage));
+  cinfo->mem = reinterpret_cast<struct jpeg_memory_mgr*>(mem);
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h
new file mode 100644
index 0000000000..3e2bdabe06
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_MEMORY_MANAGER_H_
+#define LIB_JPEGLI_MEMORY_MANAGER_H_
+
+#include <stdlib.h>
+
+#include "lib/jpegli/common.h"
+
+#define JPOOL_PERMANENT_ALIGNED (JPOOL_NUMPOOLS + JPOOL_PERMANENT)
+#define JPOOL_IMAGE_ALIGNED (JPOOL_NUMPOOLS + JPOOL_IMAGE)
+
+namespace jpegli {
+
+void InitMemoryManager(j_common_ptr cinfo);
+
+template <typename T>
+T* Allocate(j_common_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+  void* p = (*cinfo->mem->alloc_small)(cinfo, pool_id, len * sizeof(T));
+  return reinterpret_cast<T*>(p);
+}
+
+template <typename T>
+T* Allocate(j_decompress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+  return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+template <typename T>
+T* Allocate(j_compress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+  return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+template <typename T>
+JBLOCKARRAY GetBlockRow(T cinfo, int c, JDIMENSION by) {
+  return (*cinfo->mem->access_virt_barray)(
+      reinterpret_cast<j_common_ptr>(cinfo), cinfo->master->coeff_buffers[c],
+      by, 1, true);
+}
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_MEMORY_MANAGER_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc b/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc
new file mode 100644
index 0000000000..73db791727
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc
@@ -0,0 +1,219 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr size_t kInitialBufferSize = 1024;
+static constexpr size_t kFinalBufferSize = 18;
+
+struct DestinationManager {
+  jpeg_destination_mgr pub;
+  std::vector<uint8_t> buffer;
+
+  DestinationManager() {
+    pub.init_destination = init_destination;
+    pub.empty_output_buffer = empty_output_buffer;
+    pub.term_destination = term_destination;
+  }
+
+  void Rewind() {
+    pub.next_output_byte = buffer.data();
+    pub.free_in_buffer = buffer.size();
+  }
+
+  void EmptyTo(std::vector<uint8_t>* output, size_t new_size = 0) {
+    output->insert(output->end(), buffer.data(), pub.next_output_byte);
+    if (new_size > 0) {
+      buffer.resize(new_size);
+    }
+    Rewind();
+  }
+
+  static void init_destination(j_compress_ptr cinfo) {
+    auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+    us->buffer.resize(kInitialBufferSize);
+    us->Rewind();
+  }
+
+  static boolean empty_output_buffer(j_compress_ptr cinfo) { return FALSE; }
+
+  static void term_destination(j_compress_ptr cinfo) {}
+};
+
+struct TestConfig {
+  TestImage input;
+  CompressParams jparams;
+  size_t buffer_size;
+  size_t lines_batch_size;
+};
+
+class OutputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(OutputSuspensionTestParam, PixelData) {
+  jpeg_compress_struct cinfo = {};
+  TestConfig config = GetParam();
+  TestImage& input = config.input;
+  GeneratePixels(&input);
+  DestinationManager dest;
+  std::vector<uint8_t> compressed;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+    cinfo.image_width = input.xsize;
+    cinfo.image_height = input.ysize;
+    cinfo.input_components = input.components;
+    cinfo.in_color_space = JCS_RGB;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+    jpegli_set_progressive_level(&cinfo, 0);
+    cinfo.optimize_coding = FALSE;
+    jpegli_start_compress(&cinfo, TRUE);
+
+    size_t stride = cinfo.image_width * cinfo.input_components;
+    std::vector<uint8_t> row_bytes(config.lines_batch_size * stride);
+    while (cinfo.next_scanline < cinfo.image_height) {
+      size_t lines_left = cinfo.image_height - cinfo.next_scanline;
+      size_t num_lines = std::min(config.lines_batch_size, lines_left);
+      memcpy(&row_bytes[0], &input.pixels[cinfo.next_scanline * stride],
+             num_lines * stride);
+      std::vector<JSAMPROW> rows(num_lines);
+      for (size_t i = 0; i < num_lines; ++i) {
+        rows[i] = &row_bytes[i * stride];
+      }
+      size_t lines_done = 0;
+      while (lines_done < num_lines) {
+        lines_done += jpegli_write_scanlines(&cinfo, &rows[lines_done],
+                                             num_lines - lines_done);
+        if (lines_done < num_lines) {
+          dest.EmptyTo(&compressed, config.buffer_size);
+        }
+      }
+    }
+    dest.EmptyTo(&compressed, kFinalBufferSize);
+    jpegli_finish_compress(&cinfo);
+    dest.EmptyTo(&compressed);
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_compress(&cinfo);
+  TestImage output;
+  DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output);
+  VerifyOutputImage(input, output, 2.5);
+}
+
+TEST_P(OutputSuspensionTestParam, RawData) {
+  jpeg_compress_struct cinfo = {};
+  TestConfig config = GetParam();
+  if (config.lines_batch_size != 1) return;
+  TestImage& input = config.input;
+  input.color_space = JCS_YCbCr;
+  GeneratePixels(&input);
+  GenerateRawData(config.jparams, &input);
+  DestinationManager dest;
+  std::vector<uint8_t> compressed;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+    cinfo.image_width = input.xsize;
+    cinfo.image_height = input.ysize;
+    cinfo.input_components = input.components;
+    cinfo.in_color_space = JCS_YCbCr;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+    jpegli_set_progressive_level(&cinfo, 0);
+    cinfo.optimize_coding = FALSE;
+    cinfo.raw_data_in = TRUE;
+    jpegli_start_compress(&cinfo, TRUE);
+
+    std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+    size_t max_lines = config.jparams.max_v_sample() * DCTSIZE;
+    std::vector<std::vector<JSAMPROW>> rowdata(cinfo.num_components);
+    std::vector<JSAMPARRAY> data(cinfo.num_components);
+    for (int c = 0; c < cinfo.num_components; ++c) {
+      rowdata[c].resize(config.jparams.v_samp(c) * DCTSIZE);
+      data[c] = &rowdata[c][0];
+    }
+    while (cinfo.next_scanline < cinfo.image_height) {
+      for (int c = 0; c < cinfo.num_components; ++c) {
+        size_t cwidth = cinfo.comp_info[c].width_in_blocks * DCTSIZE;
+        size_t cheight = cinfo.comp_info[c].height_in_blocks * DCTSIZE;
+        size_t num_lines = config.jparams.v_samp(c) * DCTSIZE;
+        size_t y0 = (cinfo.next_scanline / max_lines) * num_lines;
+        for (size_t i = 0; i < num_lines; ++i) {
+          rowdata[c][i] =
+              (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+        }
+      }
+      while (jpegli_write_raw_data(&cinfo, &data[0], max_lines) == 0) {
+        dest.EmptyTo(&compressed, config.buffer_size);
+      }
+    }
+    dest.EmptyTo(&compressed, kFinalBufferSize);
+    jpegli_finish_compress(&cinfo);
+    dest.EmptyTo(&compressed);
+    return true;
+  };
+  try_catch_block();
+  jpegli_destroy_compress(&cinfo);
+  DecompressParams dparams;
+  dparams.output_mode = RAW_DATA;
+  TestImage output;
+  DecodeWithLibjpeg(CompressParams(), dparams, compressed, &output);
+  VerifyOutputImage(input, output, 3.5);
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  const size_t xsize0 = 1920;
+  const size_t ysize0 = 1080;
+  for (int dysize : {0, 1, 8, 9}) {
+    for (int v_sampling : {1, 2}) {
+      for (int nlines : {1, 8, 117}) {
+        for (int bufsize : {1, 16, 16 << 10}) {
+          TestConfig config;
+          config.lines_batch_size = nlines;
+          config.buffer_size = bufsize;
+          config.input.xsize = xsize0;
+          config.input.ysize = ysize0 + dysize;
+          config.jparams.h_sampling = {1, 1, 1};
+          config.jparams.v_sampling = {v_sampling, 1, 1};
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  os << c.input;
+  os << c.jparams;
+  os << "Lines" << c.lines_batch_size;
+  os << "BufSize" << c.buffer_size;
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<OutputSuspensionTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(OutputSuspensionTest, OutputSuspensionTestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/quant.cc b/third-party/libjxl/libjxl/lib/jpegli/quant.cc
new file mode 100644
index 0000000000..36f1df4cdd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/quant.cc
@@ -0,0 +1,768 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/quant.h"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+namespace {
+
+// Global scale is chosen in a way that butteraugli 3-norm matches libjpeg
+// with the same quality setting. Fitted for quality 90 on jyrki31 corpus.
+constexpr float kGlobalScaleXYB = 1.43951668f;
+constexpr float kGlobalScaleYCbCr = 1.73966010f;
+
+static constexpr float kBaseQuantMatrixXYB[] = {
+    // c = 0
+    7.5629935265f,
+    19.8247814178f,
+    22.5724945068f,
+    20.6706695557f,
+    22.6864585876f,
+    23.5696277618f,
+    25.8129081726f,
+    36.3307571411f,
+    19.8247814178f,
+    21.5503177643f,
+    19.9372234344f,
+    20.5424213409f,
+    21.8645496368f,
+    23.9041385651f,
+    28.2844066620f,
+    32.6609764099f,
+    22.5724945068f,
+    19.9372234344f,
+    21.9017257690f,
+    19.1223449707f,
+    21.7515811920f,
+    24.6724700928f,
+    25.4249649048f,
+    32.6653823853f,
+    20.6706695557f,
+    20.5424213409f,
+    19.1223449707f,
+    20.1610221863f,
+    25.3719692230f,
+    25.9668903351f,
+    30.9804954529f,
+    31.3406009674f,
+    22.6864585876f,
+    21.8645496368f,
+    21.7515811920f,
+    25.3719692230f,
+    26.2431850433f,
+    40.5992202759f,
+    43.2624626160f,
+    63.3010940552f,
+    23.5696277618f,
+    23.9041385651f,
+    24.6724700928f,
+    25.9668903351f,
+    40.5992202759f,
+    48.3026771545f,
+    34.0964355469f,
+    61.9852142334f,
+    25.8129081726f,
+    28.2844066620f,
+    25.4249649048f,
+    30.9804954529f,
+    43.2624626160f,
+    34.0964355469f,
+    34.4937438965f,
+    66.9702758789f,
+    36.3307571411f,
+    32.6609764099f,
+    32.6653823853f,
+    31.3406009674f,
+    63.3010940552f,
+    61.9852142334f,
+    66.9702758789f,
+    39.9652709961f,
+    // c = 1
+    1.6262000799f,
+    3.2199242115f,
+    3.4903779030f,
+    3.9148359299f,
+    4.8337211609f,
+    4.9108843803f,
+    5.3137121201f,
+    6.1676793098f,
+    3.2199242115f,
+    3.4547898769f,
+    3.6036829948f,
+    4.2652835846f,
+    4.8368387222f,
+    4.8226222992f,
+    5.6120514870f,
+    6.3431472778f,
+    3.4903779030f,
+    3.6036829948f,
+    3.9044559002f,
+    4.3374395370f,
+    4.8435096741f,
+    5.4057979584f,
+    5.6066360474f,
+    6.1075134277f,
+    3.9148359299f,
+    4.2652835846f,
+    4.3374395370f,
+    4.6064834595f,
+    5.1751475334f,
+    5.4013924599f,
+    6.0399808884f,
+    6.7825231552f,
+    4.8337211609f,
+    4.8368387222f,
+    4.8435096741f,
+    5.1751475334f,
+    5.3748049736f,
+    6.1410837173f,
+    7.6529307365f,
+    7.5235214233f,
+    4.9108843803f,
+    4.8226222992f,
+    5.4057979584f,
+    5.4013924599f,
+    6.1410837173f,
+    6.3431472778f,
+    7.1083049774f,
+    7.6008300781f,
+    5.3137121201f,
+    5.6120514870f,
+    5.6066360474f,
+    6.0399808884f,
+    7.6529307365f,
+    7.1083049774f,
+    7.0943155289f,
+    7.0478363037f,
+    6.1676793098f,
+    6.3431472778f,
+    6.1075134277f,
+    6.7825231552f,
+    7.5235214233f,
+    7.6008300781f,
+    7.0478363037f,
+    6.9186143875f,
+    // c = 2
+    3.3038473129f,
+    10.0689258575f,
+    12.2785224915f,
+    14.6041173935f,
+    16.2107315063f,
+    19.2314529419f,
+    28.0129547119f,
+    55.6682891846f,
+    10.0689258575f,
+    11.4085016251f,
+    11.3871345520f,
+    15.4934167862f,
+    16.5364933014f,
+    14.9153423309f,
+    26.3748722076f,
+    40.8614425659f,
+    12.2785224915f,
+    11.3871345520f,
+    17.0886878967f,
+    13.9500350952f,
+    16.0003223419f,
+    28.5660629272f,
+    26.2124195099f,
+    30.1260128021f,
+    14.6041173935f,
+    15.4934167862f,
+    13.9500350952f,
+    21.1235027313f,
+    26.1579780579f,
+    25.5579223633f,
+    40.6859359741f,
+    33.8056335449f,
+    16.2107315063f,
+    16.5364933014f,
+    16.0003223419f,
+    26.1579780579f,
+    26.8042831421f,
+    26.1587715149f,
+    35.7343978882f,
+    43.6857032776f,
+    19.2314529419f,
+    14.9153423309f,
+    28.5660629272f,
+    25.5579223633f,
+    26.1587715149f,
+    34.5418128967f,
+    41.3197937012f,
+    48.7867660522f,
+    28.0129547119f,
+    26.3748722076f,
+    26.2124195099f,
+    40.6859359741f,
+    35.7343978882f,
+    41.3197937012f,
+    47.6329460144f,
+    55.3498458862f,
+    55.6682891846f,
+    40.8614425659f,
+    30.1260128021f,
+    33.8056335449f,
+    43.6857032776f,
+    48.7867660522f,
+    55.3498458862f,
+    63.6065597534f,
+};
+
+static const float kBaseQuantMatrixYCbCr[] = {
+    // c = 0
+    1.2397409345866273f,  //
+    1.7227115097630963f,  //
+    2.9212167156636855f,  //
+    2.812737435286529f,   //
+    3.339819711906184f,   //
+    3.463603762596166f,   //
+    3.840915217993518f,   //
+    3.86956f,             //
+    1.7227115097630963f,  //
+    2.0928894413636874f,  //
+    2.8456760904429297f,  //
+    2.704506820909662f,   //
+    3.4407673520905337f,  //
+    3.166232352090534f,   //
+    4.025208741558432f,   //
+    4.035324490952577f,   //
+    2.9212167156636855f,  //
+    2.8456760904429297f,  //
+    2.9587403520905338f,  //
+    3.3862948970669273f,  //
+    3.619523781336757f,   //
+    3.9046279999999998f,  //
+    3.757835838431854f,   //
+    4.237447515714274f,   //
+    2.812737435286529f,   //
+    2.704506820909662f,   //
+    3.3862948970669273f,  //
+    3.380058821812233f,   //
+    4.1679867415584315f,  //
+    4.805510627261856f,   //
+    4.784259f,            //
+    4.605934f,            //
+    3.339819711906184f,   //
+    3.4407673520905337f,  //
+    3.619523781336757f,   //
+    4.1679867415584315f,  //
+    4.579851258441568f,   //
+    4.923237f,            //
+    5.574107f,            //
+    5.48533336146308f,    //
+    3.463603762596166f,   //
+    3.166232352090534f,   //
+    3.9046279999999998f,  //
+    4.805510627261856f,   //
+    4.923237f,            //
+    5.43936f,             //
+    5.093895741558431f,   //
+    6.0872254423617225f,  //
+    3.840915217993518f,   //
+    4.025208741558432f,   //
+    3.757835838431854f,   //
+    4.784259f,            //
+    5.574107f,            //
+    5.093895741558431f,   //
+    5.438461f,            //
+    5.4037359493250845f,  //
+    3.86956f,             //
+    4.035324490952577f,   //
+    4.237447515714274f,   //
+    4.605934f,            //
+    5.48533336146308f,    //
+    6.0872254423617225f,  //
+    5.4037359493250845f,  //
+    4.37787101190424f,
+    // c = 1
+    2.8236197786377537f,  //
+    6.495639358561486f,   //
+    9.310489207538302f,   //
+    10.64747864717083f,   //
+    11.07419143098738f,   //
+    17.146390223910462f,  //
+    18.463982229408998f,  //
+    29.087001644203088f,  //
+    6.495639358561486f,   //
+    8.890103846667353f,   //
+    8.976895794294748f,   //
+    13.666270550318826f,  //
+    16.547071905624193f,  //
+    16.63871382827686f,   //
+    26.778396930893695f,  //
+    21.33034294694781f,   //
+    9.310489207538302f,   //
+    8.976895794294748f,   //
+    11.08737706005991f,   //
+    18.20548239870446f,   //
+    19.752481654011646f,  //
+    23.985660533114896f,  //
+    102.6457378402362f,   //
+    24.450989f,           //
+    10.64747864717083f,   //
+    13.666270550318826f,  //
+    18.20548239870446f,   //
+    18.628012327860365f,  //
+    16.042509519487183f,  //
+    25.04918273242625f,   //
+    25.017140189353015f,  //
+    35.79788782635831f,   //
+    11.07419143098738f,   //
+    16.547071905624193f,  //
+    19.752481654011646f,  //
+    16.042509519487183f,  //
+    19.373482748612577f,  //
+    14.677529999999999f,  //
+    19.94695960400931f,   //
+    51.094112f,           //
+    17.146390223910462f,  //
+    16.63871382827686f,   //
+    23.985660533114896f,  //
+    25.04918273242625f,   //
+    14.677529999999999f,  //
+    31.320412426835304f,  //
+    46.357234000000005f,  //
+    67.48111451705412f,   //
+    18.463982229408998f,  //
+    26.778396930893695f,  //
+    102.6457378402362f,   //
+    25.017140189353015f,  //
+    19.94695960400931f,   //
+    46.357234000000005f,  //
+    61.315764694388044f,  //
+    88.34665293823721f,   //
+    29.087001644203088f,  //
+    21.33034294694781f,   //
+    24.450989f,           //
+    35.79788782635831f,   //
+    51.094112f,           //
+    67.48111451705412f,   //
+    88.34665293823721f,   //
+    112.16099098350989f,
+    // c = 2
+    2.9217254961255255f,  //
+    4.497681013199305f,   //
+    7.356344520940414f,   //
+    6.583891506504051f,   //
+    8.535608740100237f,   //
+    8.799434353234647f,   //
+    9.188341534163023f,   //
+    9.482700481227672f,   //
+    4.497681013199305f,   //
+    6.309548851989123f,   //
+    7.024608962670982f,   //
+    7.156445324163424f,   //
+    8.049059218663244f,   //
+    7.0124290657218555f,  //
+    6.711923184393611f,   //
+    8.380307846134853f,   //
+    7.356344520940414f,   //
+    7.024608962670982f,   //
+    6.892101177327445f,   //
+    6.882819916277163f,   //
+    8.782226090078568f,   //
+    6.8774750000000004f,  //
+    7.8858175969577955f,  //
+    8.67909f,             //
+    6.583891506504051f,   //
+    7.156445324163424f,   //
+    6.882819916277163f,   //
+    7.003072944847055f,   //
+    7.7223464701024875f,  //
+    7.955425720217421f,   //
+    7.4734110000000005f,  //
+    8.362933242943903f,   //
+    8.535608740100237f,   //
+    8.049059218663244f,   //
+    8.782226090078568f,   //
+    7.7223464701024875f,  //
+    6.778005927001542f,   //
+    9.484922741558432f,   //
+    9.043702663686046f,   //
+    8.053178199770173f,   //
+    8.799434353234647f,   //
+    7.0124290657218555f,  //
+    6.8774750000000004f,  //
+    7.955425720217421f,   //
+    9.484922741558432f,   //
+    8.607606527385098f,   //
+    9.922697394370815f,   //
+    64.25135180237939f,   //
+    9.188341534163023f,   //
+    6.711923184393611f,   //
+    7.8858175969577955f,  //
+    7.4734110000000005f,  //
+    9.043702663686046f,   //
+    9.922697394370815f,   //
+    63.184936549738225f,  //
+    83.35294340273799f,   //
+    9.482700481227672f,   //
+    8.380307846134853f,   //
+    8.67909f,             //
+    8.362933242943903f,   //
+    8.053178199770173f,   //
+    64.25135180237939f,   //
+    83.35294340273799f,   //
+    114.89202448569779f,  //
+};
+
+static const float k420GlobalScale = 1.22;
+static const float k420Rescale[64] = {
+    0.4093, 0.3209, 0.3477, 0.3333, 0.3144, 0.2823, 0.3214, 0.3354,  //
+    0.3209, 0.3111, 0.3489, 0.2801, 0.3059, 0.3119, 0.4135, 0.3445,  //
+    0.3477, 0.3489, 0.3586, 0.3257, 0.2727, 0.3754, 0.3369, 0.3484,  //
+    0.3333, 0.2801, 0.3257, 0.3020, 0.3515, 0.3410, 0.3971, 0.3839,  //
+    0.3144, 0.3059, 0.2727, 0.3515, 0.3105, 0.3397, 0.2716, 0.3836,  //
+    0.2823, 0.3119, 0.3754, 0.3410, 0.3397, 0.3212, 0.3203, 0.0726,  //
+    0.3214, 0.4135, 0.3369, 0.3971, 0.2716, 0.3203, 0.0798, 0.0553,  //
+    0.3354, 0.3445, 0.3484, 0.3839, 0.3836, 0.0726, 0.0553, 0.3368,  //
+};
+
+static const float kBaseQuantMatrixStd[] = {
+    // c = 0
+    16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f,      //
+    12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f,      //
+    14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f,      //
+    14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f,      //
+    18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f,    //
+    24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f,    //
+    49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f,  //
+    72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f,   //
+    // c = 1
+    17.0f, 18.0f, 24.0f, 47.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    18.0f, 21.0f, 26.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    24.0f, 26.0f, 56.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    47.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+    99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f,  //
+};
+
+static const float kZeroBiasMulYCbCrLQ[] = {
+    // c = 0
+    0.0000f, 0.0568f, 0.3880f, 0.6190f, 0.6190f, 0.4490f, 0.4490f, 0.6187f,  //
+    0.0568f, 0.5829f, 0.6189f, 0.6190f, 0.6190f, 0.7190f, 0.6190f, 0.6189f,  //
+    0.3880f, 0.6189f, 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.6187f, 0.6100f,  //
+    0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.3839f, 0.7160f, 0.6190f,  //
+    0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.6190f, 0.3880f, 0.5860f, 0.4790f,  //
+    0.4490f, 0.7190f, 0.6190f, 0.3839f, 0.3880f, 0.6190f, 0.6190f, 0.6190f,  //
+    0.4490f, 0.6190f, 0.6187f, 0.7160f, 0.5860f, 0.6190f, 0.6204f, 0.6190f,  //
+    0.6187f, 0.6189f, 0.6100f, 0.6190f, 0.4790f, 0.6190f, 0.6190f, 0.3480f,  //
+    // c = 1
+    0.0000f, 1.1640f, 0.9373f, 1.1319f, 0.8016f, 0.9136f, 1.1530f, 0.9430f,  //
+    1.1640f, 0.9188f, 0.9160f, 1.1980f, 1.1830f, 0.9758f, 0.9430f, 0.9430f,  //
+    0.9373f, 0.9160f, 0.8430f, 1.1720f, 0.7083f, 0.9430f, 0.9430f, 0.9430f,  //
+    1.1319f, 1.1980f, 1.1720f, 1.1490f, 0.8547f, 0.9430f, 0.9430f, 0.9430f,  //
+    0.8016f, 1.1830f, 0.7083f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, 0.9430f,  //
+    0.9136f, 0.9758f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f,  //
+    1.1530f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f,  //
+    0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, 0.9430f,  //
+    // c = 2
+    0.0000f, 1.3190f, 0.4308f, 0.4460f, 0.0661f, 0.0660f, 0.2660f, 0.2960f,  //
+    1.3190f, 0.3280f, 0.3093f, 0.0750f, 0.0505f, 0.1594f, 0.3060f, 0.2113f,  //
+    0.4308f, 0.3093f, 0.3060f, 0.1182f, 0.0500f, 0.3060f, 0.3915f, 0.2426f,  //
+    0.4460f, 0.0750f, 0.1182f, 0.0512f, 0.0500f, 0.2130f, 0.3930f, 0.1590f,  //
+    0.0661f, 0.0505f, 0.0500f, 0.0500f, 0.3055f, 0.3360f, 0.5148f, 0.5403f,  //
+    0.0660f, 0.1594f, 0.3060f, 0.2130f, 0.3360f, 0.5060f, 0.5874f, 0.3060f,  //
+    0.2660f, 0.3060f, 0.3915f, 0.3930f, 0.5148f, 0.5874f, 0.3060f, 0.3060f,  //
+    0.2960f, 0.2113f, 0.2426f, 0.1590f, 0.5403f, 0.3060f, 0.3060f, 0.3060f,  //
+};
+
+static const float kZeroBiasMulYCbCrHQ[] = {
+    // c = 0
+    0.0000f, 0.0044f, 0.2521f, 0.6547f, 0.8161f, 0.6130f, 0.8841f, 0.8155f,  //
+    0.0044f, 0.6831f, 0.6553f, 0.6295f, 0.7848f, 0.7843f, 0.8474f, 0.7836f,  //
+    0.2521f, 0.6553f, 0.7834f, 0.7829f, 0.8161f, 0.8072f, 0.7743f, 0.9242f,  //
+    0.6547f, 0.6295f, 0.7829f, 0.8654f, 0.7829f, 0.6986f, 0.7818f, 0.7726f,  //
+    0.8161f, 0.7848f, 0.8161f, 0.7829f, 0.7471f, 0.7827f, 0.7843f, 0.7653f,  //
+    0.6130f, 0.7843f, 0.8072f, 0.6986f, 0.7827f, 0.7848f, 0.9508f, 0.7653f,  //
+    0.8841f, 0.8474f, 0.7743f, 0.7818f, 0.7843f, 0.9508f, 0.7839f, 0.8437f,  //
+    0.8155f, 0.7836f, 0.9242f, 0.7726f, 0.7653f, 0.7653f, 0.8437f, 0.7819f,  //
+    // c = 1
+    0.0000f, 1.0816f, 1.0556f, 1.2876f, 1.1554f, 1.1567f, 1.8851f, 0.5488f,  //
+    1.0816f, 1.1537f, 1.1850f, 1.0712f, 1.1671f, 2.0719f, 1.0544f, 1.4764f,  //
+    1.0556f, 1.1850f, 1.2870f, 1.1981f, 1.8181f, 1.2618f, 1.0564f, 1.1191f,  //
+    1.2876f, 1.0712f, 1.1981f, 1.4753f, 2.0609f, 1.0564f, 1.2645f, 1.0564f,  //
+    1.1554f, 1.1671f, 1.8181f, 2.0609f, 0.7324f, 1.1163f, 0.8464f, 1.0564f,  //
+    1.1567f, 2.0719f, 1.2618f, 1.0564f, 1.1163f, 1.0040f, 1.0564f, 1.0564f,  //
+    1.8851f, 1.0544f, 1.0564f, 1.2645f, 0.8464f, 1.0564f, 1.0564f, 1.0564f,  //
+    0.5488f, 1.4764f, 1.1191f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, 1.0564f,  //
+    // c = 2
+    0.0000f, 0.5392f, 0.6659f, 0.8968f, 0.6829f, 0.6328f, 0.5802f, 0.4836f,  //
+    0.5392f, 0.6746f, 0.6760f, 0.6102f, 0.6015f, 0.6958f, 0.7327f, 0.4897f,  //
+    0.6659f, 0.6760f, 0.6957f, 0.6543f, 0.4396f, 0.6330f, 0.7081f, 0.2583f,  //
+    0.8968f, 0.6102f, 0.6543f, 0.5913f, 0.6457f, 0.5828f, 0.5139f, 0.3565f,  //
+    0.6829f, 0.6015f, 0.4396f, 0.6457f, 0.5633f, 0.4263f, 0.6371f, 0.5949f,  //
+    0.6328f, 0.6958f, 0.6330f, 0.5828f, 0.4263f, 0.2847f, 0.2909f, 0.6629f,  //
+    0.5802f, 0.7327f, 0.7081f, 0.5139f, 0.6371f, 0.2909f, 0.6644f, 0.6644f,  //
+    0.4836f, 0.4897f, 0.2583f, 0.3565f, 0.5949f, 0.6629f, 0.6644f, 0.6644f,  //
+};
+
+static const float kZeroBiasOffsetYCbCrDC[] = {0.0f, 0.0f, 0.0f};
+
+static const float kZeroBiasOffsetYCbCrAC[] = {
+    0.59082f,
+    0.58146f,
+    0.57988f,
+};
+
+constexpr uint8_t kTransferFunctionPQ = 16;
+constexpr uint8_t kTransferFunctionHLG = 18;
+
+float DistanceToLinearQuality(float distance) {
+  if (distance <= 0.1f) {
+    return 1.0f;
+  } else if (distance <= 4.6f) {
+    return (200.0f / 9.0f) * (distance - 0.1f);
+  } else if (distance <= 6.4f) {
+    return 5000.0f / (100.0f - (distance - 0.1f) / 0.09f);
+  } else if (distance < 25.0f) {
+    return 530000.0f /
+           (3450.0f -
+            300.0f * std::sqrt((848.0f * distance - 5330.0f) / 120.0f));
+  } else {
+    return 5000.0f;
+  }
+}
+
+constexpr float kExponent[DCTSIZE2] = {
+    1.00f, 0.51f, 0.67f, 0.74f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+    0.51f, 0.66f, 0.69f, 0.87f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+    0.67f, 0.69f, 0.84f, 0.83f, 0.96f, 1.00f, 1.00f, 1.00f,  //
+    0.74f, 0.87f, 0.83f, 1.00f, 1.00f, 0.91f, 0.91f, 1.00f,  //
+    1.00f, 1.00f, 0.96f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+    1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+    1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+    1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f,  //
+};
+constexpr float kDist0 = 1.5f;  // distance where non-linearity kicks in.
+
+float DistanceToScale(float distance, int k) {
+  if (distance < kDist0) {
+    return distance;
+  }
+  const float exp = kExponent[k];
+  const float mul = std::pow(kDist0, 1.0 - exp);
+  return std::max<float>(0.5f * distance, mul * std::pow(distance, exp));
+}
+
+float ScaleToDistance(float scale, int k) {
+  if (scale < kDist0) {
+    return scale;
+  }
+  const float exp = 1.0 / kExponent[k];
+  const float mul = std::pow(kDist0, 1.0 - exp);
+  return std::min<float>(2.0f * scale, mul * std::pow(scale, exp));
+}
+
+float QuantValsToDistance(j_compress_ptr cinfo) {
+  jpeg_comp_master* m = cinfo->master;
+  float global_scale = kGlobalScaleYCbCr;
+  if (m->cicp_transfer_function == kTransferFunctionPQ) {
+    global_scale *= .4f;
+  } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+    global_scale *= .5f;
+  }
+  int quant_max = m->force_baseline ? 255 : 32767U;
+  static const float kDistMax = 10000.0f;
+  float dist_min = 0.0f;
+  float dist_max = kDistMax;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+    uint16_t* quantval = cinfo->quant_tbl_ptrs[quant_idx]->quantval;
+    const float* base_qm = &kBaseQuantMatrixYCbCr[quant_idx * DCTSIZE2];
+    for (int k = 0; k < DCTSIZE2; ++k) {
+      float dmin = 0.0;
+      float dmax = kDistMax;
+      float invq = 1.0f / base_qm[k] / global_scale;
+      int qval = quantval[k];
+      if (qval > 1) {
+        float scale_min = (qval - 0.5f) * invq;
+        dmin = ScaleToDistance(scale_min, k);
+      }
+      if (qval < quant_max) {
+        float scale_max = (qval + 0.5f) * invq;
+        dmax = ScaleToDistance(scale_max, k);
+      }
+      if (dmin <= dist_max) {
+        dist_min = std::max(dmin, dist_min);
+      }
+      if (dmax >= dist_min) {
+        dist_max = std::min(dist_max, dmax);
+      }
+    }
+  }
+  float distance;
+  if (dist_min == 0) {
+    distance = dist_max;
+  } else if (dist_max == kDistMax) {
+    distance = dist_min;
+  } else {
+    distance = 0.5f * (dist_min + dist_max);
+  }
+  return distance;
+}
+
+bool IsYUV420(j_compress_ptr cinfo) {
+  return (cinfo->jpeg_color_space == JCS_YCbCr &&
+          cinfo->comp_info[0].h_samp_factor == 2 &&
+          cinfo->comp_info[0].v_samp_factor == 2 &&
+          cinfo->comp_info[1].h_samp_factor == 1 &&
+          cinfo->comp_info[1].v_samp_factor == 1 &&
+          cinfo->comp_info[2].h_samp_factor == 1 &&
+          cinfo->comp_info[2].v_samp_factor == 1);
+}
+
+}  // namespace
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+                      bool add_two_chroma_tables) {
+  jpeg_comp_master* m = cinfo->master;
+  const bool xyb = m->xyb_mode && cinfo->jpeg_color_space == JCS_RGB;
+  const bool is_yuv420 = IsYUV420(cinfo);
+
+  float global_scale;
+  bool non_linear_scaling = true;
+  const float* base_quant_matrix[NUM_QUANT_TBLS];
+  int num_base_tables;
+
+  if (xyb) {
+    global_scale = kGlobalScaleXYB;
+    num_base_tables = 3;
+    base_quant_matrix[0] = kBaseQuantMatrixXYB;
+    base_quant_matrix[1] = kBaseQuantMatrixXYB + DCTSIZE2;
+    base_quant_matrix[2] = kBaseQuantMatrixXYB + 2 * DCTSIZE2;
+  } else if (cinfo->jpeg_color_space == JCS_YCbCr && !m->use_std_tables) {
+    global_scale = kGlobalScaleYCbCr;
+    if (m->cicp_transfer_function == kTransferFunctionPQ) {
+      global_scale *= .4f;
+    } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+      global_scale *= .5f;
+    }
+    if (is_yuv420) {
+      global_scale *= k420GlobalScale;
+    }
+    if (add_two_chroma_tables) {
+      cinfo->comp_info[2].quant_tbl_no = 2;
+      num_base_tables = 3;
+      base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+      base_quant_matrix[1] = kBaseQuantMatrixYCbCr + DCTSIZE2;
+      base_quant_matrix[2] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+    } else {
+      num_base_tables = 2;
+      base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+      // Use the Cr table for both Cb and Cr.
+      base_quant_matrix[1] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+    }
+  } else {
+    global_scale = 0.01f;
+    non_linear_scaling = false;
+    num_base_tables = 2;
+    base_quant_matrix[0] = kBaseQuantMatrixStd;
+    base_quant_matrix[1] = kBaseQuantMatrixStd + DCTSIZE2;
+  }
+
+  int quant_max = m->force_baseline ? 255 : 32767U;
+  for (int quant_idx = 0; quant_idx < num_base_tables; ++quant_idx) {
+    const float* base_qm = base_quant_matrix[quant_idx];
+    JQUANT_TBL** qtable = &cinfo->quant_tbl_ptrs[quant_idx];
+    if (*qtable == nullptr) {
+      *qtable = jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+    }
+    for (int k = 0; k < DCTSIZE2; ++k) {
+      float scale = global_scale;
+      if (non_linear_scaling) {
+        scale *= DistanceToScale(distances[quant_idx], k);
+        if (is_yuv420 && quant_idx > 0) {
+          scale *= k420Rescale[k];
+        }
+      } else {
+        scale *= DistanceToLinearQuality(distances[quant_idx]);
+      }
+      int qval = std::round(scale * base_qm[k]);
+      (*qtable)->quantval[k] = std::max(1, std::min(qval, quant_max));
+    }
+    (*qtable)->sent_table = FALSE;
+  }
+}
+
+void InitQuantizer(j_compress_ptr cinfo, QuantPass pass) {
+  jpeg_comp_master* m = cinfo->master;
+  // Compute quantization multupliers from the quant table values.
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+    JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+    if (!quant_table) {
+      JPEGLI_ERROR("Missing quantization table %d for component %d", quant_idx,
+                   c);
+    }
+    for (size_t k = 0; k < DCTSIZE2; k++) {
+      int val = quant_table->quantval[k];
+      if (val == 0) {
+        JPEGLI_ERROR("Invalid quantval 0.");
+      }
+      switch (pass) {
+        case QuantPass::NO_SEARCH:
+          m->quant_mul[c][k] = 8.0f / val;
+          break;
+        case QuantPass::SEARCH_FIRST_PASS:
+          m->quant_mul[c][k] = 128.0f;
+          break;
+        case QuantPass::SEARCH_SECOND_PASS:
+          m->quant_mul[c][kJPEGZigZagOrder[k]] = 1.0f / (16 * val);
+          break;
+      }
+    }
+  }
+  if (m->use_adaptive_quantization) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      for (int k = 0; k < DCTSIZE2; ++k) {
+        m->zero_bias_mul[c][k] = k == 0 ? 0.0f : 0.5f;
+        m->zero_bias_offset[c][k] = k == 0 ? 0.0f : 0.5f;
+      }
+    }
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      float distance = QuantValsToDistance(cinfo);
+      static const float kDistHQ = 1.0f;
+      static const float kDistLQ = 3.0f;
+      float mix0 = (distance - kDistHQ) / (kDistLQ - kDistHQ);
+      mix0 = std::max(0.0f, std::min(1.0f, mix0));
+      float mix1 = 1.0f - mix0;
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        for (int k = 0; k < DCTSIZE2; ++k) {
+          float mul0 = kZeroBiasMulYCbCrLQ[c * DCTSIZE2 + k];
+          float mul1 = kZeroBiasMulYCbCrHQ[c * DCTSIZE2 + k];
+          m->zero_bias_mul[c][k] = mix0 * mul0 + mix1 * mul1;
+          m->zero_bias_offset[c][k] =
+              k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c];
+        }
+      }
+    }
+  } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      for (int k = 0; k < DCTSIZE2; ++k) {
+        m->zero_bias_offset[c][k] =
+            k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c];
+      }
+    }
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/quant.h b/third-party/libjxl/libjxl/lib/jpegli/quant.h
new file mode 100644
index 0000000000..cb37757ae2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/quant.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_QUANT_H_
+#define LIB_JPEGLI_QUANT_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+                      bool add_two_chroma_tables);
+
+enum QuantPass {
+  NO_SEARCH,
+  SEARCH_FIRST_PASS,
+  SEARCH_SECOND_PASS,
+};
+
+void InitQuantizer(j_compress_ptr cinfo, QuantPass pass);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_QUANT_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/render.cc b/third-party/libjxl/libjxl/lib/jpegli/render.cc
new file mode 100644
index 0000000000..24e7e99618
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/render.cc
@@ -0,0 +1,763 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/render.h"
+
+#include <string.h>
+
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/idct.h"
+#include "lib/jpegli/upsample.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#if JXL_MEMORY_SANITIZER
+#include "sanitizer/msan_interface.h"
+#endif
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/render.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::NearestInt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeftSame;
+using hwy::HWY_NAMESPACE::ShiftRightSame;
+using hwy::HWY_NAMESPACE::Vec;
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+                      const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+                      int32_t* JXL_RESTRICT sumabs) {
+  for (size_t i = 0; i < coeffs_size; i += Lanes(d)) {
+    size_t k = i % DCTSIZE2;
+    const Rebind<int16_t, DI> di16;
+    const Vec<DI> coeff = PromoteTo(di, Load(di16, coeffs + i));
+    const auto abs_coeff = Abs(coeff);
+    const auto not_0 = Gt(abs_coeff, Zero(di));
+    const auto nzero = IfThenElseZero(not_0, Set(di, 1));
+    Store(Add(nzero, Load(di, nonzeros + k)), di, nonzeros + k);
+    Store(Add(abs_coeff, Load(di, sumabs + k)), di, sumabs + k);
+  }
+}
+
+void DecenterRow(float* row, size_t xsize) {
+  const HWY_CAPPED(float, 8) df;
+  const auto c128 = Set(df, 128.0f / 255);
+  for (size_t x = 0; x < xsize; x += Lanes(df)) {
+    Store(Add(Load(df, row + x), c128), df, row + x);
+  }
+}
+
+void DitherRow(j_decompress_ptr cinfo, float* row, int c, size_t y,
+               size_t xsize) {
+  jpeg_decomp_master* m = cinfo->master;
+  if (!m->dither_[c]) return;
+  const float* dither_row =
+      &m->dither_[c][(y & m->dither_mask_) * m->dither_size_];
+  for (size_t x = 0; x < xsize; ++x) {
+    row[x] += dither_row[x & m->dither_mask_];
+  }
+}
+
+template <typename T>
+void StoreUnsignedRow(float* JXL_RESTRICT input[], size_t x0, size_t len,
+                      size_t num_channels, float multiplier, T* output) {
+  const HWY_CAPPED(float, 8) d;
+  auto zero = Zero(d);
+  auto mul = Set(d, multiplier);
+  const Rebind<T, decltype(d)> du;
+#if JXL_MEMORY_SANITIZER
+  const size_t padding = hwy::RoundUpTo(len, Lanes(d)) - len;
+  for (size_t c = 0; c < num_channels; ++c) {
+    __msan_unpoison(input[c] + x0 + len, sizeof(input[c][0]) * padding);
+  }
+#endif
+  if (num_channels == 1) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+      StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
+    }
+  } else if (num_channels == 2) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+      auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+      StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
+                        DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
+    }
+  } else if (num_channels == 3) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+      auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+      auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+      StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
+                        DemoteTo(du, NearestInt(v1)),
+                        DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
+    }
+  } else if (num_channels == 4) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+      auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+      auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+      auto v3 = Clamp(zero, Mul(LoadU(d, &input[3][x0 + i]), mul), mul);
+      StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
+                        DemoteTo(du, NearestInt(v1)),
+                        DemoteTo(du, NearestInt(v2)),
+                        DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
+    }
+  }
+#if JXL_MEMORY_SANITIZER
+  __msan_poison(output + num_channels * len,
+                sizeof(output[0]) * num_channels * padding);
+#endif
+}
+
+void StoreFloatRow(float* JXL_RESTRICT input[3], size_t x0, size_t len,
+                   size_t num_channels, float* output) {
+  const HWY_CAPPED(float, 8) d;
+  if (num_channels == 1) {
+    memcpy(output, input[0] + x0, len * sizeof(output[0]));
+  } else if (num_channels == 2) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      StoreInterleaved2(LoadU(d, &input[0][x0 + i]),
+                        LoadU(d, &input[1][x0 + i]), d, &output[2 * i]);
+    }
+  } else if (num_channels == 3) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      StoreInterleaved3(LoadU(d, &input[0][x0 + i]),
+                        LoadU(d, &input[1][x0 + i]),
+                        LoadU(d, &input[2][x0 + i]), d, &output[3 * i]);
+    }
+  } else if (num_channels == 4) {
+    for (size_t i = 0; i < len; i += Lanes(d)) {
+      StoreInterleaved4(LoadU(d, &input[0][x0 + i]),
+                        LoadU(d, &input[1][x0 + i]),
+                        LoadU(d, &input[2][x0 + i]),
+                        LoadU(d, &input[3][x0 + i]), d, &output[4 * i]);
+    }
+  }
+}
+
+static constexpr float kFSWeightMR = 7.0f / 16.0f;
+static constexpr float kFSWeightBL = 3.0f / 16.0f;
+static constexpr float kFSWeightBM = 5.0f / 16.0f;
+static constexpr float kFSWeightBR = 1.0f / 16.0f;
+
+float LimitError(float error) {
+  float abserror = std::abs(error);
+  if (abserror > 48.0f) {
+    abserror = 32.0f;
+  } else if (abserror > 16.0f) {
+    abserror = 0.5f * abserror + 8.0f;
+  }
+  return error > 0.0f ? abserror : -abserror;
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+                   size_t xoffset, size_t len, size_t num_channels,
+                   uint8_t* JXL_RESTRICT output) {
+  jpeg_decomp_master* m = cinfo->master;
+  uint8_t* JXL_RESTRICT scratch_space = m->output_scratch_;
+  if (cinfo->quantize_colors && m->quant_pass_ == 1) {
+    float* error_row[kMaxComponents];
+    float* next_error_row[kMaxComponents];
+    if (cinfo->dither_mode == JDITHER_ORDERED) {
+      for (size_t c = 0; c < num_channels; ++c) {
+        DitherRow(cinfo, &rows[c][xoffset], c, cinfo->output_scanline,
+                  cinfo->output_width);
+      }
+    } else if (cinfo->dither_mode == JDITHER_FS) {
+      for (size_t c = 0; c < num_channels; ++c) {
+        if (cinfo->output_scanline % 2 == 0) {
+          error_row[c] = m->error_row_[c];
+          next_error_row[c] = m->error_row_[c + kMaxComponents];
+        } else {
+          error_row[c] = m->error_row_[c + kMaxComponents];
+          next_error_row[c] = m->error_row_[c];
+        }
+        memset(next_error_row[c], 0.0, cinfo->output_width * sizeof(float));
+      }
+    }
+    const float mul = 255.0f;
+    if (cinfo->dither_mode != JDITHER_FS) {
+      StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+    }
+    for (size_t i = 0; i < len; ++i) {
+      uint8_t* pixel = &scratch_space[num_channels * i];
+      if (cinfo->dither_mode == JDITHER_FS) {
+        for (size_t c = 0; c < num_channels; ++c) {
+          float val = rows[c][i] * mul + LimitError(error_row[c][i]);
+          pixel[c] = std::round(std::min(255.0f, std::max(0.0f, val)));
+        }
+      }
+      int index = LookupColorIndex(cinfo, pixel);
+      output[i] = index;
+      if (cinfo->dither_mode == JDITHER_FS) {
+        size_t prev_i = i > 0 ? i - 1 : 0;
+        size_t next_i = i + 1 < len ? i + 1 : len - 1;
+        for (size_t c = 0; c < num_channels; ++c) {
+          float error = pixel[c] - cinfo->colormap[c][index];
+          error_row[c][next_i] += kFSWeightMR * error;
+          next_error_row[c][prev_i] += kFSWeightBL * error;
+          next_error_row[c][i] += kFSWeightBM * error;
+          next_error_row[c][next_i] += kFSWeightBR * error;
+        }
+      }
+    }
+  } else if (m->output_data_type_ == JPEGLI_TYPE_UINT8) {
+    const float mul = 255.0;
+    StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+    memcpy(output, scratch_space, len * num_channels);
+  } else if (m->output_data_type_ == JPEGLI_TYPE_UINT16) {
+    const float mul = 65535.0;
+    uint16_t* tmp = reinterpret_cast<uint16_t*>(scratch_space);
+    StoreUnsignedRow(rows, xoffset, len, num_channels, mul, tmp);
+    if (m->swap_endianness_) {
+      const HWY_CAPPED(uint16_t, 8) du;
+      size_t output_len = len * num_channels;
+      for (size_t j = 0; j < output_len; j += Lanes(du)) {
+        auto v = LoadU(du, tmp + j);
+        auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8));
+        StoreU(vswap, du, tmp + j);
+      }
+    }
+    memcpy(output, tmp, len * num_channels * 2);
+  } else if (m->output_data_type_ == JPEGLI_TYPE_FLOAT) {
+    float* tmp = reinterpret_cast<float*>(scratch_space);
+    StoreFloatRow(rows, xoffset, len, num_channels, tmp);
+    if (m->swap_endianness_) {
+      size_t output_len = len * num_channels;
+      for (size_t j = 0; j < output_len; ++j) {
+        tmp[j] = BSwapFloat(tmp[j]);
+      }
+    }
+    memcpy(output, tmp, len * num_channels * 4);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jpegli {
+
+HWY_EXPORT(GatherBlockStats);
+HWY_EXPORT(WriteToOutput);
+HWY_EXPORT(DecenterRow);
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+                      const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+                      int32_t* JXL_RESTRICT sumabs) {
+  return HWY_DYNAMIC_DISPATCH(GatherBlockStats)(coeffs, coeffs_size, nonzeros,
+                                                sumabs);
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+                   size_t xoffset, size_t len, size_t num_channels,
+                   uint8_t* JXL_RESTRICT output) {
+  return HWY_DYNAMIC_DISPATCH(WriteToOutput)(cinfo, rows, xoffset, len,
+                                             num_channels, output);
+}
+
+void DecenterRow(float* row, size_t xsize) {
+  return HWY_DYNAMIC_DISPATCH(DecenterRow)(row, xsize);
+}
+
+bool ShouldApplyDequantBiases(j_decompress_ptr cinfo, int ci) {
+  const auto& compinfo = cinfo->comp_info[ci];
+  return (compinfo.h_samp_factor == cinfo->max_h_samp_factor &&
+          compinfo.v_samp_factor == cinfo->max_v_samp_factor);
+}
+
+// See the following article for the details:
+// J. R. Price and M. Rabbani, "Dequantization bias for JPEG decompression"
+// Proceedings International Conference on Information Technology: Coding and
+// Computing (Cat. No.PR00540), 2000, pp. 30-35, doi: 10.1109/ITCC.2000.844179.
+void ComputeOptimalLaplacianBiases(const int num_blocks, const int* nonzeros,
+                                   const int* sumabs, float* biases) {
+  for (size_t k = 1; k < DCTSIZE2; ++k) {
+    if (nonzeros[k] == 0) {
+      biases[k] = 0.5f;
+      continue;
+    }
+    // Notation adapted from the article
+    float N = num_blocks;
+    float N1 = nonzeros[k];
+    float N0 = num_blocks - N1;
+    float S = sumabs[k];
+    // Compute gamma from N0, N1, N, S (eq. 11), with A and B being just
+    // temporary grouping of terms.
+    float A = 4.0 * S + 2.0 * N;
+    float B = 4.0 * S - 2.0 * N1;
+    float gamma = (-1.0 * N0 + std::sqrt(N0 * N0 * 1.0 + A * B)) / A;
+    float gamma2 = gamma * gamma;
+    // The bias is computed from gamma with (eq. 5), where the quantization
+    // multiplier Q can be factored out and thus the bias can be applied
+    // directly on the quantized coefficient.
+    biases[k] =
+        0.5 * (((1.0 + gamma2) / (1.0 - gamma2)) + 1.0 / std::log(gamma));
+  }
+}
+
+constexpr std::array<int, SAVED_COEFS> Q_POS = {0, 1, 8,  16, 9,
+                                                2, 3, 10, 17, 24};
+
+bool is_nonzero_quantizers(const JQUANT_TBL* qtable) {
+  return std::all_of(Q_POS.begin(), Q_POS.end(),
+                     [&](int pos) { return qtable->quantval[pos] != 0; });
+}
+
+// Determine whether smoothing should be applied during decompression
+bool do_smoothing(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  bool smoothing_useful = false;
+
+  if (!cinfo->progressive_mode || cinfo->coef_bits == nullptr) {
+    return false;
+  }
+  auto coef_bits_latch = m->coef_bits_latch;
+  auto prev_coef_bits_latch = m->prev_coef_bits_latch;
+
+  for (int ci = 0; ci < cinfo->num_components; ci++) {
+    jpeg_component_info* compptr = &cinfo->comp_info[ci];
+    JQUANT_TBL* qtable = compptr->quant_table;
+    int* coef_bits = cinfo->coef_bits[ci];
+    int* prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
+
+    // Return early if conditions for smoothing are not met
+    if (qtable == nullptr || !is_nonzero_quantizers(qtable) ||
+        coef_bits[0] < 0) {
+      return false;
+    }
+
+    coef_bits_latch[ci][0] = coef_bits[0];
+
+    for (int coefi = 1; coefi < SAVED_COEFS; coefi++) {
+      prev_coef_bits_latch[ci][coefi] =
+          cinfo->input_scan_number > 1 ? prev_coef_bits[coefi] : -1;
+      if (coef_bits[coefi] != 0) {
+        smoothing_useful = true;
+      }
+      coef_bits_latch[ci][coefi] = coef_bits[coefi];
+    }
+  }
+
+  return smoothing_useful;
+}
+
+void PredictSmooth(j_decompress_ptr cinfo, JBLOCKARRAY blocks, int component,
+                   size_t bx, int iy) {
+  const size_t imcu_row = cinfo->output_iMCU_row;
+  int16_t* scratch = cinfo->master->smoothing_scratch_;
+  std::vector<int> Q_VAL(SAVED_COEFS);
+  int* coef_bits;
+
+  std::array<std::array<int, 5>, 5> dc_values;
+  auto& compinfo = cinfo->comp_info[component];
+  const size_t by0 = imcu_row * compinfo.v_samp_factor;
+  const size_t by = by0 + iy;
+
+  int prev_iy = by > 0 ? iy - 1 : 0;
+  int prev_prev_iy = by > 1 ? iy - 2 : prev_iy;
+  int next_iy = by + 1 < compinfo.height_in_blocks ? iy + 1 : iy;
+  int next_next_iy = by + 2 < compinfo.height_in_blocks ? iy + 2 : next_iy;
+
+  const int16_t* cur_row = blocks[iy][bx];
+  const int16_t* prev_row = blocks[prev_iy][bx];
+  const int16_t* prev_prev_row = blocks[prev_prev_iy][bx];
+  const int16_t* next_row = blocks[next_iy][bx];
+  const int16_t* next_next_row = blocks[next_next_iy][bx];
+
+  int prev_block_ind = bx ? -DCTSIZE2 : 0;
+  int prev_prev_block_ind = bx > 1 ? -2 * DCTSIZE2 : prev_block_ind;
+  int next_block_ind = bx + 1 < compinfo.width_in_blocks ? DCTSIZE2 : 0;
+  int next_next_block_ind =
+      bx + 2 < compinfo.width_in_blocks ? DCTSIZE2 * 2 : next_block_ind;
+
+  std::array<const int16_t*, 5> row_ptrs = {prev_prev_row, prev_row, cur_row,
+                                            next_row, next_next_row};
+  std::array<int, 5> block_inds = {prev_prev_block_ind, prev_block_ind, 0,
+                                   next_block_ind, next_next_block_ind};
+
+  memcpy(scratch, cur_row, DCTSIZE2 * sizeof(cur_row[0]));
+
+  for (int r = 0; r < 5; ++r) {
+    for (int c = 0; c < 5; ++c) {
+      dc_values[r][c] = row_ptrs[r][block_inds[c]];
+    }
+  }
+  // Get the correct coef_bits: In case of an incomplete scan, we use the
+  // prev coeficients.
+  if (cinfo->output_iMCU_row + 1 > cinfo->input_iMCU_row) {
+    coef_bits = cinfo->master->prev_coef_bits_latch[component];
+  } else {
+    coef_bits = cinfo->master->coef_bits_latch[component];
+  }
+
+  bool change_dc = true;
+  for (int i = 1; i < SAVED_COEFS; i++) {
+    if (coef_bits[i] != -1) {
+      change_dc = false;
+      break;
+    }
+  }
+
+  JQUANT_TBL* quanttbl = cinfo->quant_tbl_ptrs[compinfo.quant_tbl_no];
+  for (size_t i = 0; i < 6; ++i) {
+    Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+  }
+  if (change_dc) {
+    for (size_t i = 6; i < SAVED_COEFS; ++i) {
+      Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+    }
+  }
+  auto calculate_dct_value = [&](int coef_index) {
+    int64_t num = 0;
+    int pred;
+    int Al;
+    // we use the symmetry of the smoothing matrices by transposing the 5x5 dc
+    // matrix in that case.
+    bool swap_indices = coef_index == 2 || coef_index == 5 || coef_index == 8 ||
+                        coef_index == 9;
+    auto dc = [&](int i, int j) {
+      return swap_indices ? dc_values[j][i] : dc_values[i][j];
+    };
+    Al = coef_bits[coef_index];
+    switch (coef_index) {
+      case 0:
+        // set the DC
+        num = (-2 * dc(0, 0) - 6 * dc(0, 1) - 8 * dc(0, 2) - 6 * dc(0, 3) -
+               2 * dc(0, 4) - 6 * dc(1, 0) + 6 * dc(1, 1) + 42 * dc(1, 2) +
+               6 * dc(1, 3) - 6 * dc(1, 4) - 8 * dc(2, 0) + 42 * dc(2, 1) +
+               152 * dc(2, 2) + 42 * dc(2, 3) - 8 * dc(2, 4) - 6 * dc(3, 0) +
+               6 * dc(3, 1) + 42 * dc(3, 2) + 6 * dc(3, 3) - 6 * dc(3, 4) -
+               2 * dc(4, 0) - 6 * dc(4, 1) - 8 * dc(4, 2) - 6 * dc(4, 3) -
+               2 * dc(4, 4));
+        // special case: for the DC the dequantization is different
+        Al = 0;
+        break;
+      case 1:
+      case 2:
+        // set Q01 or Q10
+        num = (change_dc ? (-dc(0, 0) - dc(0, 1) + dc(0, 3) + dc(0, 4) -
+                            3 * dc(1, 0) + 13 * dc(1, 1) - 13 * dc(1, 3) +
+                            3 * dc(1, 4) - 3 * dc(2, 0) + 38 * dc(2, 1) -
+                            38 * dc(2, 3) + 3 * dc(2, 4) - 3 * dc(3, 0) +
+                            13 * dc(3, 1) - 13 * dc(3, 3) + 3 * dc(3, 4) -
+                            dc(4, 0) - dc(4, 1) + dc(4, 3) + dc(4, 4))
+                         : (-7 * dc(2, 0) + 50 * dc(2, 1) - 50 * dc(2, 3) +
+                            7 * dc(2, 4)));
+        break;
+      case 3:
+      case 5:
+        // set Q02 or Q20
+        num = (change_dc
+                   ? dc(0, 2) + 2 * dc(1, 1) + 7 * dc(1, 2) + 2 * dc(1, 3) -
+                         5 * dc(2, 1) - 14 * dc(2, 2) - 5 * dc(2, 3) +
+                         2 * dc(3, 1) + 7 * dc(3, 2) + 2 * dc(3, 3) + dc(4, 2)
+                   : (-dc(0, 2) + 13 * dc(1, 2) - 24 * dc(2, 2) +
+                      13 * dc(3, 2) - dc(4, 2)));
+        break;
+      case 4:
+        // set Q11
+        num =
+            (change_dc ? -dc(0, 0) + dc(0, 4) + 9 * dc(1, 1) - 9 * dc(1, 3) -
+                             9 * dc(3, 1) + 9 * dc(3, 3) + dc(4, 0) - dc(4, 4)
+                       : (dc(1, 4) + dc(3, 0) - 10 * dc(3, 1) + 10 * dc(3, 3) -
+                          dc(0, 1) - dc(3, 4) + dc(4, 1) - dc(4, 3) + dc(0, 3) -
+                          dc(1, 0) + 10 * dc(1, 1) - 10 * dc(1, 3)));
+        break;
+      case 6:
+      case 9:
+        // set Q03 or Q30
+        num = (dc(1, 1) - dc(1, 3) + 2 * dc(2, 1) - 2 * dc(2, 3) + dc(3, 1) -
+               dc(3, 3));
+        break;
+      case 7:
+      case 8:
+        // set Q12 and Q21
+        num = (dc(1, 1) - 3 * dc(1, 2) + dc(1, 3) - dc(3, 1) + 3 * dc(3, 2) -
+               dc(3, 3));
+        break;
+    }
+    num = Q_VAL[0] * num;
+    if (num >= 0) {
+      pred = ((Q_VAL[coef_index] << 7) + num) / (Q_VAL[coef_index] << 8);
+      if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+    } else {
+      pred = ((Q_VAL[coef_index] << 7) - num) / (Q_VAL[coef_index] << 8);
+      if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+      pred = -pred;
+    }
+    return static_cast<int16_t>(pred);
+  };
+
+  int loop_end = change_dc ? SAVED_COEFS : 6;
+  for (int i = 1; i < loop_end; ++i) {
+    if (coef_bits[i] != 0 && scratch[Q_POS[i]] == 0) {
+      scratch[Q_POS[i]] = calculate_dct_value(i);
+    }
+  }
+  if (change_dc) {
+    scratch[0] = calculate_dct_value(0);
+  }
+}
+
+void PrepareForOutput(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  bool smoothing = do_smoothing(cinfo);
+  m->apply_smoothing = smoothing && cinfo->do_block_smoothing;
+  size_t coeffs_per_block = cinfo->num_components * DCTSIZE2;
+  memset(m->nonzeros_, 0, coeffs_per_block * sizeof(m->nonzeros_[0]));
+  memset(m->sumabs_, 0, coeffs_per_block * sizeof(m->sumabs_[0]));
+  memset(m->num_processed_blocks_, 0, sizeof(m->num_processed_blocks_));
+  memset(m->biases_, 0, coeffs_per_block * sizeof(m->biases_[0]));
+  cinfo->output_iMCU_row = 0;
+  cinfo->output_scanline = 0;
+  const float kDequantScale = 1.0f / (8 * 255);
+  for (int c = 0; c < cinfo->num_components; c++) {
+    const auto& comp = cinfo->comp_info[c];
+    JQUANT_TBL* table = comp.quant_table;
+    if (table == nullptr) continue;
+    for (size_t k = 0; k < DCTSIZE2; ++k) {
+      m->dequant_[c * DCTSIZE2 + k] = table->quantval[k] * kDequantScale;
+    }
+  }
+  ChooseInverseTransform(cinfo);
+  ChooseColorTransform(cinfo);
+}
+
+void DecodeCurrentiMCURow(j_decompress_ptr cinfo) {
+  jpeg_decomp_master* m = cinfo->master;
+  const size_t imcu_row = cinfo->output_iMCU_row;
+  JBLOCKARRAY ba[kMaxComponents];
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    const jpeg_component_info* comp = &cinfo->comp_info[c];
+    int by0 = imcu_row * comp->v_samp_factor;
+    int block_rows_left = comp->height_in_blocks - by0;
+    int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+    int offset = m->streaming_mode_ ? 0 : by0;
+    ba[c] = (*cinfo->mem->access_virt_barray)(
+        reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+        max_block_rows, false);
+  }
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    size_t k0 = c * DCTSIZE2;
+    auto& compinfo = cinfo->comp_info[c];
+    size_t block_row = imcu_row * compinfo.v_samp_factor;
+    if (ShouldApplyDequantBiases(cinfo, c)) {
+      // Update statistics for this iMCU row.
+      for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+        size_t by = block_row + iy;
+        if (by >= compinfo.height_in_blocks) {
+          continue;
+        }
+        int16_t* JXL_RESTRICT coeffs = &ba[c][iy][0][0];
+        size_t num = compinfo.width_in_blocks * DCTSIZE2;
+        GatherBlockStats(coeffs, num, &m->nonzeros_[k0], &m->sumabs_[k0]);
+        m->num_processed_blocks_[c] += compinfo.width_in_blocks;
+      }
+      if (imcu_row % 4 == 3) {
+        // Re-compute optimal biases every few iMCU-rows.
+        ComputeOptimalLaplacianBiases(m->num_processed_blocks_[c],
+                                      &m->nonzeros_[k0], &m->sumabs_[k0],
+                                      &m->biases_[k0]);
+      }
+    }
+    RowBuffer<float>* raw_out = &m->raw_output_[c];
+    for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+      size_t by = block_row + iy;
+      if (by >= compinfo.height_in_blocks) {
+        continue;
+      }
+      size_t dctsize = m->scaled_dct_size[c];
+      int16_t* JXL_RESTRICT row_in = &ba[c][iy][0][0];
+      float* JXL_RESTRICT row_out = raw_out->Row(by * dctsize);
+      for (size_t bx = 0; bx < compinfo.width_in_blocks; ++bx) {
+        if (m->apply_smoothing) {
+          PredictSmooth(cinfo, ba[c], c, bx, iy);
+          (*m->inverse_transform[c])(m->smoothing_scratch_, &m->dequant_[k0],
+                                     &m->biases_[k0], m->idct_scratch_,
+                                     &row_out[bx * dctsize], raw_out->stride(),
+                                     dctsize);
+        } else {
+          (*m->inverse_transform[c])(&row_in[bx * DCTSIZE2], &m->dequant_[k0],
+                                     &m->biases_[k0], m->idct_scratch_,
+                                     &row_out[bx * dctsize], raw_out->stride(),
+                                     dctsize);
+        }
+      }
+      if (m->streaming_mode_) {
+        memset(row_in, 0, compinfo.width_in_blocks * sizeof(JBLOCK));
+      }
+    }
+  }
+}
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data) {
+  jpegli::DecodeCurrentiMCURow(cinfo);
+  jpeg_decomp_master* m = cinfo->master;
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    const auto& compinfo = cinfo->comp_info[c];
+    size_t comp_width = compinfo.width_in_blocks * DCTSIZE;
+    size_t comp_height = compinfo.height_in_blocks * DCTSIZE;
+    size_t comp_nrows = compinfo.v_samp_factor * DCTSIZE;
+    size_t y0 = cinfo->output_iMCU_row * compinfo.v_samp_factor * DCTSIZE;
+    size_t y1 = std::min(y0 + comp_nrows, comp_height);
+    for (size_t y = y0; y < y1; ++y) {
+      float* rows[1] = {m->raw_output_[c].Row(y)};
+      uint8_t* output = data[c][y - y0];
+      DecenterRow(rows[0], comp_width);
+      WriteToOutput(cinfo, rows, 0, comp_width, 1, output);
+    }
+  }
+  ++cinfo->output_iMCU_row;
+  cinfo->output_scanline += cinfo->max_v_samp_factor * DCTSIZE;
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    ++m->output_passes_done_;
+  }
+}
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+                   JSAMPARRAY scanlines, size_t max_output_rows) {
+  jpeg_decomp_master* m = cinfo->master;
+  const int vfactor = cinfo->max_v_samp_factor;
+  const int hfactor = cinfo->max_h_samp_factor;
+  const size_t context = m->need_context_rows_ ? 1 : 0;
+  const size_t imcu_row = cinfo->output_iMCU_row;
+  const size_t imcu_height = vfactor * m->min_scaled_dct_size;
+  const size_t imcu_width = hfactor * m->min_scaled_dct_size;
+  const size_t output_width = m->iMCU_cols_ * imcu_width;
+  if (imcu_row == cinfo->total_iMCU_rows ||
+      (imcu_row > context &&
+       cinfo->output_scanline < (imcu_row - context) * imcu_height)) {
+    // We are ready to output some scanlines.
+    size_t ybegin = cinfo->output_scanline;
+    size_t yend = (imcu_row == cinfo->total_iMCU_rows
+                       ? cinfo->output_height
+                       : (imcu_row - context) * imcu_height);
+    yend = std::min<size_t>(yend, ybegin + max_output_rows - *num_output_rows);
+    size_t yb = (ybegin / vfactor) * vfactor;
+    size_t ye = DivCeil(yend, vfactor) * vfactor;
+    for (size_t y = yb; y < ye; y += vfactor) {
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        RowBuffer<float>* raw_out = &m->raw_output_[c];
+        RowBuffer<float>* render_out = &m->render_output_[c];
+        int line_groups = vfactor / m->v_factor[c];
+        int downsampled_width = output_width / m->h_factor[c];
+        size_t yc = y / m->v_factor[c];
+        for (int dy = 0; dy < line_groups; ++dy) {
+          size_t ymid = yc + dy;
+          const float* JXL_RESTRICT row_mid = raw_out->Row(ymid);
+          if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) {
+            const float* JXL_RESTRICT row_top =
+                ymid == 0 ? row_mid : raw_out->Row(ymid - 1);
+            const float* JXL_RESTRICT row_bot = ymid + 1 == m->raw_height_[c]
+                                                    ? row_mid
+                                                    : raw_out->Row(ymid + 1);
+            Upsample2Vertical(row_top, row_mid, row_bot,
+                              render_out->Row(2 * dy),
+                              render_out->Row(2 * dy + 1), downsampled_width);
+          } else {
+            for (int yix = 0; yix < m->v_factor[c]; ++yix) {
+              memcpy(render_out->Row(m->v_factor[c] * dy + yix), row_mid,
+                     downsampled_width * sizeof(float));
+            }
+          }
+          if (m->h_factor[c] > 1) {
+            for (int yix = 0; yix < m->v_factor[c]; ++yix) {
+              int row_ix = m->v_factor[c] * dy + yix;
+              float* JXL_RESTRICT row = render_out->Row(row_ix);
+              float* JXL_RESTRICT tmp = m->upsample_scratch_;
+              if (cinfo->do_fancy_upsampling && m->h_factor[c] == 2) {
+                Upsample2Horizontal(row, tmp, output_width);
+              } else {
+                // TODO(szabadka) SIMDify this.
+                for (size_t x = 0; x < output_width; ++x) {
+                  tmp[x] = row[x / m->h_factor[c]];
+                }
+                memcpy(row, tmp, output_width * sizeof(tmp[0]));
+              }
+            }
+          }
+        }
+      }
+      for (int yix = 0; yix < vfactor; ++yix) {
+        if (y + yix < ybegin || y + yix >= yend) continue;
+        float* rows[kMaxComponents];
+        int num_all_components =
+            std::max(cinfo->out_color_components, cinfo->num_components);
+        for (int c = 0; c < num_all_components; ++c) {
+          rows[c] = m->render_output_[c].Row(yix);
+        }
+        (*m->color_transform)(rows, output_width);
+        for (int c = 0; c < cinfo->out_color_components; ++c) {
+          // Undo the centering of the sample values around zero.
+          DecenterRow(rows[c], output_width);
+        }
+        if (scanlines) {
+          uint8_t* output = scanlines[*num_output_rows];
+          WriteToOutput(cinfo, rows, m->xoffset_, cinfo->output_width,
+                        cinfo->out_color_components, output);
+        }
+        JXL_ASSERT(cinfo->output_scanline == y + yix);
+        ++cinfo->output_scanline;
+        ++(*num_output_rows);
+        if (cinfo->output_scanline == cinfo->output_height) {
+          ++m->output_passes_done_;
+        }
+      }
+    }
+  } else {
+    DecodeCurrentiMCURow(cinfo);
+    ++cinfo->output_iMCU_row;
+  }
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/render.h b/third-party/libjxl/libjxl/lib/jpegli/render.h
new file mode 100644
index 0000000000..ad69335d70
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/render.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_RENDER_H_
+#define LIB_JPEGLI_RENDER_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void PrepareForOutput(j_decompress_ptr cinfo);
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+                   JSAMPARRAY scanlines, size_t max_output_rows);
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_RENDER_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/simd.cc b/third-party/libjxl/libjxl/lib/jpegli/simd.cc
new file mode 100644
index 0000000000..5e84939342
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/simd.cc
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/simd.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/simd.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+
+HWY_EXPORT(GetVectorSize);  // Local function.
+
+}  // namespace
+
+size_t VectorSize() {
+  static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+  return bytes;
+}
+
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/simd.h b/third-party/libjxl/libjxl/lib/jpegli/simd.h
new file mode 100644
index 0000000000..aec772e2d4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/simd.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_SIMD_H_
+#define LIB_JPEGLI_SIMD_H_
+
+#include <stddef.h>
+
+namespace jpegli {
+
+// Returns SIMD vector size in bytes.
+size_t VectorSize();
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_SIMD_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc
new file mode 100644
index 0000000000..0b8e0a5c8c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+void init_mem_source(j_decompress_ptr cinfo) {}
+void init_stdio_source(j_decompress_ptr cinfo) {}
+
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+  if (num_bytes <= 0) return;
+  while (num_bytes > static_cast<long>(cinfo->src->bytes_in_buffer)) {
+    num_bytes -= cinfo->src->bytes_in_buffer;
+    (*cinfo->src->fill_input_buffer)(cinfo);
+  }
+  cinfo->src->next_input_byte += num_bytes;
+  cinfo->src->bytes_in_buffer -= num_bytes;
+}
+
+void term_source(j_decompress_ptr cinfo) {}
+
+boolean EmitFakeEoiMarker(j_decompress_ptr cinfo) {
+  static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+  cinfo->src->next_input_byte = kFakeEoiMarker;
+  cinfo->src->bytes_in_buffer = 2;
+  return TRUE;
+}
+
+constexpr size_t kStdioBufferSize = 64 << 10;
+
+struct StdioSourceManager {
+  jpeg_source_mgr pub;
+  FILE* f;
+  uint8_t* buffer;
+
+  static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+    auto src = reinterpret_cast<StdioSourceManager*>(cinfo->src);
+    size_t num_bytes_read = fread(src->buffer, 1, kStdioBufferSize, src->f);
+    if (num_bytes_read == 0) {
+      return EmitFakeEoiMarker(cinfo);
+    }
+    src->pub.next_input_byte = src->buffer;
+    src->pub.bytes_in_buffer = num_bytes_read;
+    return TRUE;
+  }
+};
+
+}  // namespace jpegli
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char* inbuffer,
+                    unsigned long insize) {
+  if (cinfo->src && cinfo->src->init_source != jpegli::init_mem_source) {
+    JPEGLI_ERROR("jpegli_mem_src: a different source manager was already set");
+  }
+  if (!cinfo->src) {
+    cinfo->src = jpegli::Allocate<jpeg_source_mgr>(cinfo, 1);
+  }
+  cinfo->src->next_input_byte = inbuffer;
+  cinfo->src->bytes_in_buffer = insize;
+  cinfo->src->init_source = jpegli::init_mem_source;
+  cinfo->src->fill_input_buffer = jpegli::EmitFakeEoiMarker;
+  cinfo->src->skip_input_data = jpegli::skip_input_data;
+  cinfo->src->resync_to_restart = jpegli_resync_to_restart;
+  cinfo->src->term_source = jpegli::term_source;
+}
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE* infile) {
+  if (cinfo->src && cinfo->src->init_source != jpegli::init_stdio_source) {
+    JPEGLI_ERROR("jpeg_stdio_src: a different source manager was already set");
+  }
+  if (!cinfo->src) {
+    cinfo->src = reinterpret_cast<jpeg_source_mgr*>(
+        jpegli::Allocate<jpegli::StdioSourceManager>(cinfo, 1));
+  }
+  auto src = reinterpret_cast<jpegli::StdioSourceManager*>(cinfo->src);
+  src->f = infile;
+  src->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kStdioBufferSize);
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = 0;
+  src->pub.init_source = jpegli::init_stdio_source;
+  src->pub.fill_input_buffer = jpegli::StdioSourceManager::fill_input_buffer;
+  src->pub.skip_input_data = jpegli::skip_input_data;
+  src->pub.resync_to_restart = jpegli_resync_to_restart;
+  src->pub.term_source = jpegli::term_source;
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc b/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc
new file mode 100644
index 0000000000..e15d18ec80
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void ReadOutputImage(j_decompress_ptr cinfo, TestImage* output) {
+  jpegli_read_header(cinfo, /*require_image=*/TRUE);
+  jpegli_start_decompress(cinfo);
+  output->ysize = cinfo->output_height;
+  output->xsize = cinfo->output_width;
+  output->components = cinfo->num_components;
+  output->AllocatePixels();
+  size_t stride = cinfo->output_width * cinfo->num_components;
+  while (cinfo->output_scanline < cinfo->output_height) {
+    JSAMPROW scanline = &output->pixels[cinfo->output_scanline * stride];
+    jpegli_read_scanlines(cinfo, &scanline, 1);
+  }
+  jpegli_finish_decompress(cinfo);
+}
+
+struct TestConfig {
+  std::string fn;
+  std::string fn_desc;
+  DecompressParams dparams;
+};
+
+class SourceManagerTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+namespace {
+FILE* MemOpen(const std::vector<uint8_t>& data) {
+  FILE* src = tmpfile();
+  if (!src) return nullptr;
+  fwrite(data.data(), 1, data.size(), src);
+  rewind(src);
+  return src;
+}
+}  // namespace
+
+TEST_P(SourceManagerTestParam, TestStdioSourceManager) {
+  TestConfig config = GetParam();
+  std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+  if (config.dparams.size_factor < 1.0) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  FILE* src = MemOpen(compressed);
+  ASSERT_TRUE(src);
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_stdio_src(&cinfo, src);
+    ReadOutputImage(&cinfo, &output0);
+    return true;
+  };
+  bool ok = try_catch_block();
+  fclose(src);
+  ASSERT_TRUE(ok);
+  jpegli_destroy_decompress(&cinfo);
+
+  TestImage output1;
+  DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1);
+  VerifyOutputImage(output1, output0, 1.0f);
+}
+
+TEST_P(SourceManagerTestParam, TestMemSourceManager) {
+  TestConfig config = GetParam();
+  std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+  if (config.dparams.size_factor < 1.0f) {
+    compressed.resize(compressed.size() * config.dparams.size_factor);
+  }
+  TestImage output0;
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_decompress(&cinfo);
+    jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+    ReadOutputImage(&cinfo, &output0);
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&cinfo);
+
+  TestImage output1;
+  DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1);
+  VerifyOutputImage(output1, output0, 1.0f);
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  {
+    std::vector<std::pair<std::string, std::string>> testfiles({
+        {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+        {"jxl/flower/flower.png.im_q85_420.jpg", "Q85YUV420"},
+        {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+    });
+    for (const auto& it : testfiles) {
+      for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+        TestConfig config;
+        config.fn = it.first;
+        config.fn_desc = it.second;
+        config.dparams.size_factor = size_factor;
+        all_tests.push_back(config);
+      }
+    }
+    return all_tests;
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  os << c.fn_desc;
+  if (c.dparams.size_factor < 1.0f) {
+    os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+  }
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<SourceManagerTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(SourceManagerTest, SourceManagerTestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc b/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc
new file mode 100644
index 0000000000..9dcc0ff4ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc
@@ -0,0 +1,233 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+// A simple suspending source manager with an input buffer.
+struct SourceManager {
+  jpeg_source_mgr pub;
+  std::vector<uint8_t> buffer;
+
+  SourceManager() {
+    pub.next_input_byte = nullptr;
+    pub.bytes_in_buffer = 0;
+    pub.init_source = init_source;
+    pub.fill_input_buffer = fill_input_buffer;
+    pub.skip_input_data = skip_input_data;
+    pub.resync_to_restart = jpegli_resync_to_restart;
+    pub.term_source = term_source;
+  }
+
+  static void init_source(j_decompress_ptr cinfo) {}
+  static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+  static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {}
+  static void term_source(j_decompress_ptr cinfo) {}
+};
+
+// A destination manager that empties its output buffer into a SourceManager's
+// input buffer. The buffer size is kept short because empty_output_buffer() is
+// called only when the output buffer is full, and we want to update the decoder
+// input frequently to demostrate that streaming works.
+static constexpr size_t kOutputBufferSize = 1024;
+struct DestinationManager {
+  jpeg_destination_mgr pub;
+  std::vector<uint8_t> buffer;
+  SourceManager* dest;
+
+  DestinationManager(SourceManager* src)
+      : buffer(kOutputBufferSize), dest(src) {
+    pub.next_output_byte = buffer.data();
+    pub.free_in_buffer = buffer.size();
+    pub.init_destination = init_destination;
+    pub.empty_output_buffer = empty_output_buffer;
+    pub.term_destination = term_destination;
+  }
+
+  static void init_destination(j_compress_ptr cinfo) {}
+
+  static boolean empty_output_buffer(j_compress_ptr cinfo) {
+    auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+    jpeg_destination_mgr* src = &us->pub;
+    jpeg_source_mgr* dst = &us->dest->pub;
+    std::vector<uint8_t>& src_buf = us->buffer;
+    std::vector<uint8_t>& dst_buf = us->dest->buffer;
+    if (dst->bytes_in_buffer > 0 && dst->bytes_in_buffer < dst_buf.size()) {
+      memmove(dst_buf.data(), dst->next_input_byte, dst->bytes_in_buffer);
+    }
+    size_t src_len = src_buf.size() - src->free_in_buffer;
+    dst_buf.resize(dst->bytes_in_buffer + src_len);
+    memcpy(&dst_buf[dst->bytes_in_buffer], src_buf.data(), src_len);
+    dst->next_input_byte = dst_buf.data();
+    dst->bytes_in_buffer = dst_buf.size();
+    src->next_output_byte = src_buf.data();
+    src->free_in_buffer = src_buf.size();
+    return true;
+  }
+
+  static void term_destination(j_compress_ptr cinfo) {
+    empty_output_buffer(cinfo);
+  }
+};
+
+struct TestConfig {
+  TestImage input;
+  CompressParams jparams;
+};
+
+class StreamingTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(StreamingTestParam, TestStreaming) {
+  jpeg_decompress_struct dinfo = {};
+  jpeg_compress_struct cinfo = {};
+  TestConfig config = GetParam();
+  TestImage& input = config.input;
+  TestImage output;
+  GeneratePixels(&input);
+  const auto try_catch_block = [&]() {
+    ERROR_HANDLER_SETUP(jpegli);
+    dinfo.err = cinfo.err;
+    dinfo.client_data = cinfo.client_data;
+    // Create a pair of compressor and decompressor objects, where the
+    // compressor's output is connected to the decompressor's input.
+    jpegli_create_decompress(&dinfo);
+    jpegli_create_compress(&cinfo);
+    SourceManager src;
+    dinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+    DestinationManager dest(&src);
+    cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+    cinfo.image_width = input.xsize;
+    cinfo.image_height = input.ysize;
+    cinfo.input_components = input.components;
+    cinfo.in_color_space = (J_COLOR_SPACE)input.color_space;
+    jpegli_set_defaults(&cinfo);
+    cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+    jpegli_set_progressive_level(&cinfo, 0);
+    cinfo.optimize_coding = FALSE;
+    jpegli_start_compress(&cinfo, TRUE);
+
+    size_t stride = cinfo.image_width * cinfo.input_components;
+    size_t iMCU_height = 8 * cinfo.max_v_samp_factor;
+    std::vector<uint8_t> row_bytes(iMCU_height * stride);
+    size_t yin = 0;
+    size_t yout = 0;
+    while (yin < cinfo.image_height) {
+      // Feed one iMCU row at a time to the compressor.
+      size_t lines_in = std::min(iMCU_height, cinfo.image_height - yin);
+      memcpy(&row_bytes[0], &input.pixels[yin * stride], lines_in * stride);
+      std::vector<JSAMPROW> rows_in(lines_in);
+      for (size_t i = 0; i < lines_in; ++i) {
+        rows_in[i] = &row_bytes[i * stride];
+      }
+      EXPECT_EQ(lines_in,
+                jpegli_write_scanlines(&cinfo, &rows_in[0], lines_in));
+      yin += lines_in;
+      if (yin == cinfo.image_height) {
+        jpegli_finish_compress(&cinfo);
+      }
+
+      // Atfer the first iMCU row, we don't yet expect any output because the
+      // compressor delays processing to have context rows after the iMCU row.
+      if (yin < std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+        continue;
+      }
+
+      // After two iMCU rows, the compressor has started emitting compressed
+      // data. We check here that at least the scan header was output, because
+      // we expect that the compressor's output buffer was filled at least once
+      // while emitting the first compressed iMCU row.
+      if (yin == std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+        EXPECT_EQ(JPEG_REACHED_SOS,
+                  jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+        output.xsize = dinfo.image_width;
+        output.ysize = dinfo.image_height;
+        output.components = dinfo.num_components;
+        EXPECT_EQ(output.xsize, input.xsize);
+        EXPECT_EQ(output.ysize, input.ysize);
+        EXPECT_EQ(output.components, input.components);
+        EXPECT_TRUE(jpegli_start_decompress(&dinfo));
+        output.pixels.resize(output.ysize * stride);
+        if (yin < cinfo.image_height) {
+          continue;
+        }
+      }
+
+      // After six iMCU rows, the compressor has emitted five iMCU rows of
+      // compressed data, of which we expect four full iMCU row of compressed
+      // data to be in the decoder's input buffer, but since the decoder also
+      // needs context rows for upsampling and smoothing, we don't expect any
+      // output to be ready yet.
+      if (yin < 7 * iMCU_height && yin < cinfo.image_height) {
+        continue;
+      }
+
+      // After five iMCU rows, we expect the decoder to have rendered the output
+      // with four iMCU rows of delay.
+      // TODO(szabadka) Reduce the processing delay in the decoder if possible.
+      size_t lines_out =
+          (yin == cinfo.image_height ? cinfo.image_height - yout : iMCU_height);
+      std::vector<JSAMPROW> rows_out(lines_out);
+      for (size_t i = 0; i < lines_out; ++i) {
+        rows_out[i] =
+            reinterpret_cast<JSAMPLE*>(&output.pixels[(yout + i) * stride]);
+      }
+      EXPECT_EQ(lines_out,
+                jpegli_read_scanlines(&dinfo, &rows_out[0], lines_out));
+      VerifyOutputImage(input, output, yout, lines_out, 3.8f);
+      yout += lines_out;
+
+      if (yout == cinfo.image_height) {
+        EXPECT_TRUE(jpegli_finish_decompress(&dinfo));
+      }
+    }
+    return true;
+  };
+  EXPECT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&dinfo);
+  jpegli_destroy_compress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  const size_t xsize0 = 1920;
+  const size_t ysize0 = 1080;
+  for (int dysize : {0, 1, 8, 9}) {
+    for (int v_sampling : {1, 2}) {
+      TestConfig config;
+      config.input.xsize = xsize0;
+      config.input.ysize = ysize0 + dysize;
+      config.jparams.h_sampling = {1, 1, 1};
+      config.jparams.v_sampling = {v_sampling, 1, 1};
+      all_tests.push_back(config);
+    }
+  }
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  os << c.input;
+  os << c.jparams;
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<StreamingTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(StreamingTest, StreamingTestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_params.h b/third-party/libjxl/libjxl/lib/jpegli/test_params.h
new file mode 100644
index 0000000000..6ab9fa573a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/test_params.h
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TEST_PARAMS_H_
+#define LIB_JPEGLI_TEST_PARAMS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jpegli/types.h"
+
+namespace jpegli {
+
+// We define this here as well to make sure that the *_api_test.cc tests only
+// use the public API and therefore we don't include any *_internal.h headers.
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+
+static constexpr int kLastScan = 0xffff;
+
+static uint32_t kTestColorMap[] = {
+    0x000000, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0x00ffff,
+    0xff00ff, 0xffffff, 0x6251fc, 0x45d9c7, 0xa7f059, 0xd9a945,
+    0xfa4e44, 0xceaffc, 0xbad7db, 0xc1f0b1, 0xdbca9a, 0xfacac5,
+    0xf201ff, 0x0063db, 0x00f01c, 0xdbb204, 0xf12f0c, 0x7ba1dc};
+static constexpr int kTestColorMapNumColors = ARRAY_SIZE(kTestColorMap);
+
+static constexpr int kSpecialMarker0 = 0xe5;
+static constexpr int kSpecialMarker1 = 0xe9;
+static constexpr uint8_t kMarkerData[] = {0, 1, 255, 0, 17};
+static constexpr uint8_t kMarkerSequence[] = {0xe6, 0xe8, 0xe7,
+                                              0xe6, 0xe7, 0xe8};
+static constexpr size_t kMarkerSequenceLen = ARRAY_SIZE(kMarkerSequence);
+
+enum JpegIOMode {
+  PIXELS,
+  RAW_DATA,
+  COEFFICIENTS,
+};
+
+struct CustomQuantTable {
+  int slot_idx = 0;
+  uint16_t table_type = 0;
+  int scale_factor = 100;
+  bool add_raw = false;
+  bool force_baseline = true;
+  std::vector<unsigned int> basic_table;
+  std::vector<unsigned int> quantval;
+  void Generate();
+};
+
+struct TestImage {
+  size_t xsize = 2268;
+  size_t ysize = 1512;
+  int color_space = 2;  // JCS_RGB
+  size_t components = 3;
+  JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+  JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+  std::vector<uint8_t> pixels;
+  std::vector<std::vector<uint8_t>> raw_data;
+  std::vector<std::vector<int16_t>> coeffs;
+  void AllocatePixels() {
+    pixels.resize(ysize * xsize * components *
+                  jpegli_bytes_per_sample(data_type));
+  }
+  void Clear() {
+    pixels.clear();
+    raw_data.clear();
+    coeffs.clear();
+  }
+};
+
+struct CompressParams {
+  int quality = 90;
+  bool set_jpeg_colorspace = false;
+  int jpeg_color_space = 0;  // JCS_UNKNOWN
+  std::vector<int> quant_indexes;
+  std::vector<CustomQuantTable> quant_tables;
+  std::vector<int> h_sampling;
+  std::vector<int> v_sampling;
+  std::vector<int> comp_ids;
+  int override_JFIF = -1;
+  int override_Adobe = -1;
+  bool add_marker = false;
+  bool simple_progression = false;
+  // -1 is library default
+  // 0, 1, 2 is set through jpegli_set_progressive_level()
+  // 2 + N is kScriptN
+  int progressive_mode = -1;
+  unsigned int restart_interval = 0;
+  int restart_in_rows = 0;
+  int smoothing_factor = 0;
+  int optimize_coding = -1;
+  bool use_flat_dc_luma_code = false;
+  bool omit_standard_tables = false;
+  bool xyb_mode = false;
+  bool libjpeg_mode = false;
+  bool use_adaptive_quantization = true;
+  std::vector<uint8_t> icc;
+
+  int h_samp(int c) const { return h_sampling.empty() ? 1 : h_sampling[c]; }
+  int v_samp(int c) const { return v_sampling.empty() ? 1 : v_sampling[c]; }
+  int max_h_sample() const {
+    auto it = std::max_element(h_sampling.begin(), h_sampling.end());
+    return it == h_sampling.end() ? 1 : *it;
+  }
+  int max_v_sample() const {
+    auto it = std::max_element(v_sampling.begin(), v_sampling.end());
+    return it == v_sampling.end() ? 1 : *it;
+  }
+  int comp_width(const TestImage& input, int c) const {
+    return DivCeil(input.xsize * h_samp(c), max_h_sample() * 8) * 8;
+  }
+  int comp_height(const TestImage& input, int c) const {
+    return DivCeil(input.ysize * v_samp(c), max_v_sample() * 8) * 8;
+  }
+};
+
+enum ColorQuantMode {
+  CQUANT_1PASS,
+  CQUANT_2PASS,
+  CQUANT_EXTERNAL,
+  CQUANT_REUSE,
+};
+
+struct ScanDecompressParams {
+  int max_scan_number;
+  int dither_mode;
+  ColorQuantMode color_quant_mode;
+};
+
+struct DecompressParams {
+  float size_factor = 1.0f;
+  size_t chunk_size = 65536;
+  size_t max_output_lines = 16;
+  JpegIOMode output_mode = PIXELS;
+  JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+  JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+  bool set_out_color_space = false;
+  int out_color_space = 0;  // JCS_UNKNOWN
+  bool crop_output = false;
+  bool do_block_smoothing = false;
+  bool do_fancy_upsampling = true;
+  bool skip_scans = false;
+  int scale_num = 1;
+  int scale_denom = 1;
+  bool quantize_colors = false;
+  int desired_number_of_colors = 256;
+  std::vector<ScanDecompressParams> scan_params;
+};
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_TEST_PARAMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h b/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h
new file mode 100644
index 0000000000..a454917187
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h
@@ -0,0 +1,430 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This template file is included in both the libjpeg_test_util.cc and the
+// test_utils.cc files with different JPEG_API_FN macros and possibly different
+// include paths for the jpeg headers.
+
+// Sequential non-interleaved.
+static constexpr jpeg_scan_info kScript1[] = {
+    {1, {0}, 0, 63, 0, 0},
+    {1, {1}, 0, 63, 0, 0},
+    {1, {2}, 0, 63, 0, 0},
+};
+// Sequential partially interleaved, chroma first.
+static constexpr jpeg_scan_info kScript2[] = {
+    {2, {1, 2}, 0, 63, 0, 0},
+    {1, {0}, 0, 63, 0, 0},
+};
+
+// Rest of the scan scripts are progressive.
+
+static constexpr jpeg_scan_info kScript3[] = {
+    // Interleaved full DC.
+    {3, {0, 1, 2}, 0, 0, 0, 0},
+    // Full AC scans.
+    {1, {0}, 1, 63, 0, 0},
+    {1, {1}, 1, 63, 0, 0},
+    {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript4[] = {
+    // Non-interleaved full DC.
+    {1, {0}, 0, 0, 0, 0},
+    {1, {1}, 0, 0, 0, 0},
+    {1, {2}, 0, 0, 0, 0},
+    // Full AC scans.
+    {1, {0}, 1, 63, 0, 0},
+    {1, {1}, 1, 63, 0, 0},
+    {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript5[] = {
+    // Partially interleaved full DC, chroma first.
+    {2, {1, 2}, 0, 0, 0, 0},
+    {1, {0}, 0, 0, 0, 0},
+    // AC shifted by 1 bit.
+    {1, {0}, 1, 63, 0, 1},
+    {1, {1}, 1, 63, 0, 1},
+    {1, {2}, 1, 63, 0, 1},
+    // AC refinement scan.
+    {1, {0}, 1, 63, 1, 0},
+    {1, {1}, 1, 63, 1, 0},
+    {1, {2}, 1, 63, 1, 0},
+};
+static constexpr jpeg_scan_info kScript6[] = {
+    // Interleaved DC shifted by 2 bits.
+    {3, {0, 1, 2}, 0, 0, 0, 2},
+    // Interleaved DC refinement scans.
+    {3, {0, 1, 2}, 0, 0, 2, 1},
+    {3, {0, 1, 2}, 0, 0, 1, 0},
+    // Full AC scans.
+    {1, {0}, 1, 63, 0, 0},
+    {1, {1}, 1, 63, 0, 0},
+    {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript7[] = {
+    // Non-interleaved DC shifted by 2 bits.
+    {1, {0}, 0, 0, 0, 2},
+    {1, {1}, 0, 0, 0, 2},
+    {1, {2}, 0, 0, 0, 2},
+    // Non-interleaved DC first refinement scans.
+    {1, {0}, 0, 0, 2, 1},
+    {1, {1}, 0, 0, 2, 1},
+    {1, {2}, 0, 0, 2, 1},
+    // Non-interleaved DC second refinement scans.
+    {1, {0}, 0, 0, 1, 0},
+    {1, {1}, 0, 0, 1, 0},
+    {1, {2}, 0, 0, 1, 0},
+    // Full AC scans.
+    {1, {0}, 1, 63, 0, 0},
+    {1, {1}, 1, 63, 0, 0},
+    {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript8[] = {
+    // Partially interleaved DC shifted by 2 bits, chroma first
+    {2, {1, 2}, 0, 0, 0, 2},
+    {1, {0}, 0, 0, 0, 2},
+    // Partially interleaved DC first refinement scans.
+    {2, {0, 2}, 0, 0, 2, 1},
+    {1, {1}, 0, 0, 2, 1},
+    // Partially interleaved DC first refinement scans, chroma first.
+    {2, {1, 2}, 0, 0, 1, 0},
+    {1, {0}, 0, 0, 1, 0},
+    // Full AC scans.
+    {1, {0}, 1, 63, 0, 0},
+    {1, {1}, 1, 63, 0, 0},
+    {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript9[] = {
+    // Interleaved full DC.
+    {3, {0, 1, 2}, 0, 0, 0, 0},
+    // AC scans for component 0
+    // shifted by 1 bit, two spectral ranges
+    {1, {0}, 1, 6, 0, 1},
+    {1, {0}, 7, 63, 0, 1},
+    // refinement scan, full
+    {1, {0}, 1, 63, 1, 0},
+    // AC scans for component 1
+    // shifted by 1 bit, full
+    {1, {1}, 1, 63, 0, 1},
+    // refinement scan, two spectral ranges
+    {1, {1}, 1, 6, 1, 0},
+    {1, {1}, 7, 63, 1, 0},
+    // AC scans for component 2
+    // shifted by 1 bit, two spectral ranges
+    {1, {2}, 1, 6, 0, 1},
+    {1, {2}, 7, 63, 0, 1},
+    // refinement scan, two spectral ranges (but different from above)
+    {1, {2}, 1, 16, 1, 0},
+    {1, {2}, 17, 63, 1, 0},
+};
+
+static constexpr jpeg_scan_info kScript10[] = {
+    // Interleaved full DC.
+    {3, {0, 1, 2}, 0, 0, 0, 0},
+    // AC scans for spectral range 1..16
+    // shifted by 1
+    {1, {0}, 1, 16, 0, 1},
+    {1, {1}, 1, 16, 0, 1},
+    {1, {2}, 1, 16, 0, 1},
+    // refinement scans, two sub-ranges
+    {1, {0}, 1, 8, 1, 0},
+    {1, {0}, 9, 16, 1, 0},
+    {1, {1}, 1, 8, 1, 0},
+    {1, {1}, 9, 16, 1, 0},
+    {1, {2}, 1, 8, 1, 0},
+    {1, {2}, 9, 16, 1, 0},
+    // AC scans for spectral range 17..63
+    {1, {0}, 17, 63, 0, 1},
+    {1, {1}, 17, 63, 0, 1},
+    {1, {2}, 17, 63, 0, 1},
+    // refinement scans, two sub-ranges
+    {1, {0}, 17, 28, 1, 0},
+    {1, {0}, 29, 63, 1, 0},
+    {1, {1}, 17, 28, 1, 0},
+    {1, {1}, 29, 63, 1, 0},
+    {1, {2}, 17, 28, 1, 0},
+    {1, {2}, 29, 63, 1, 0},
+};
+
+struct ScanScript {
+  int num_scans;
+  const jpeg_scan_info* scans;
+};
+
+static constexpr ScanScript kTestScript[] = {
+    {ARRAY_SIZE(kScript1), kScript1}, {ARRAY_SIZE(kScript2), kScript2},
+    {ARRAY_SIZE(kScript3), kScript3}, {ARRAY_SIZE(kScript4), kScript4},
+    {ARRAY_SIZE(kScript5), kScript5}, {ARRAY_SIZE(kScript6), kScript6},
+    {ARRAY_SIZE(kScript7), kScript7}, {ARRAY_SIZE(kScript8), kScript8},
+    {ARRAY_SIZE(kScript9), kScript9}, {ARRAY_SIZE(kScript10), kScript10},
+};
+static constexpr int kNumTestScripts = ARRAY_SIZE(kTestScript);
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+                             j_decompress_ptr cinfo, int scan_number) {
+  const ScanDecompressParams* sparams = nullptr;
+  for (const auto& sp : dparams.scan_params) {
+    if (scan_number <= sp.max_scan_number) {
+      sparams = &sp;
+      break;
+    }
+  }
+  if (sparams == nullptr) {
+    return;
+  }
+  if (dparams.quantize_colors) {
+    cinfo->dither_mode = (J_DITHER_MODE)sparams->dither_mode;
+    if (sparams->color_quant_mode == CQUANT_1PASS) {
+      cinfo->two_pass_quantize = FALSE;
+      cinfo->colormap = nullptr;
+    } else if (sparams->color_quant_mode == CQUANT_2PASS) {
+      JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+      cinfo->two_pass_quantize = TRUE;
+      cinfo->colormap = nullptr;
+    } else if (sparams->color_quant_mode == CQUANT_EXTERNAL) {
+      JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+      cinfo->two_pass_quantize = FALSE;
+      bool have_colormap = cinfo->colormap != nullptr;
+      cinfo->actual_number_of_colors = kTestColorMapNumColors;
+      cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+          reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+          cinfo->actual_number_of_colors, 3);
+      jxl::msan::UnpoisonMemory(cinfo->colormap, 3 * sizeof(JSAMPROW));
+      for (int i = 0; i < kTestColorMapNumColors; ++i) {
+        cinfo->colormap[0][i] = (kTestColorMap[i] >> 16) & 0xff;
+        cinfo->colormap[1][i] = (kTestColorMap[i] >> 8) & 0xff;
+        cinfo->colormap[2][i] = (kTestColorMap[i] >> 0) & 0xff;
+      }
+      if (have_colormap) {
+        JPEG_API_FN(new_colormap)(cinfo);
+      }
+    } else if (sparams->color_quant_mode == CQUANT_REUSE) {
+      JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+      JXL_CHECK(cinfo->colormap);
+    }
+  }
+}
+
+void SetDecompressParams(const DecompressParams& dparams,
+                         j_decompress_ptr cinfo) {
+  cinfo->do_block_smoothing = dparams.do_block_smoothing;
+  cinfo->do_fancy_upsampling = dparams.do_fancy_upsampling;
+  if (dparams.output_mode == RAW_DATA) {
+    cinfo->raw_data_out = TRUE;
+  }
+  if (dparams.set_out_color_space) {
+    cinfo->out_color_space = (J_COLOR_SPACE)dparams.out_color_space;
+    if (dparams.out_color_space == JCS_UNKNOWN) {
+      cinfo->jpeg_color_space = JCS_UNKNOWN;
+    }
+  }
+  cinfo->scale_num = dparams.scale_num;
+  cinfo->scale_denom = dparams.scale_denom;
+  cinfo->quantize_colors = dparams.quantize_colors;
+  cinfo->desired_number_of_colors = dparams.desired_number_of_colors;
+  if (!dparams.scan_params.empty()) {
+    if (cinfo->buffered_image) {
+      for (const auto& sparams : dparams.scan_params) {
+        if (sparams.color_quant_mode == CQUANT_1PASS) {
+          cinfo->enable_1pass_quant = TRUE;
+        } else if (sparams.color_quant_mode == CQUANT_2PASS) {
+          cinfo->enable_2pass_quant = TRUE;
+        } else if (sparams.color_quant_mode == CQUANT_EXTERNAL) {
+          cinfo->enable_external_quant = TRUE;
+        }
+      }
+      SetScanDecompressParams(dparams, cinfo, 1);
+    } else {
+      SetScanDecompressParams(dparams, cinfo, kLastScan);
+    }
+  }
+}
+
+void CheckMarkerPresent(j_decompress_ptr cinfo, uint8_t marker_type) {
+  bool marker_found = false;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    jxl::msan::UnpoisonMemory(marker, sizeof(*marker));
+    jxl::msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (marker->marker == marker_type &&
+        marker->data_length == sizeof(kMarkerData) &&
+        memcmp(marker->data, kMarkerData, sizeof(kMarkerData)) == 0) {
+      marker_found = true;
+    }
+  }
+  JXL_CHECK(marker_found);
+}
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+  if (jparams.set_jpeg_colorspace) {
+    JXL_CHECK(cinfo->jpeg_color_space == jparams.jpeg_color_space);
+  }
+  if (jparams.override_JFIF >= 0) {
+    JXL_CHECK(cinfo->saw_JFIF_marker == jparams.override_JFIF);
+  }
+  if (jparams.override_Adobe >= 0) {
+    JXL_CHECK(cinfo->saw_Adobe_marker == jparams.override_Adobe);
+  }
+  if (jparams.add_marker) {
+    CheckMarkerPresent(cinfo, kSpecialMarker0);
+    CheckMarkerPresent(cinfo, kSpecialMarker1);
+  }
+  jxl::msan::UnpoisonMemory(
+      cinfo->comp_info, cinfo->num_components * sizeof(cinfo->comp_info[0]));
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (int i = 0; i < cinfo->num_components; ++i) {
+    jpeg_component_info* comp = &cinfo->comp_info[i];
+    if (!jparams.comp_ids.empty()) {
+      JXL_CHECK(comp->component_id == jparams.comp_ids[i]);
+    }
+    if (!jparams.h_sampling.empty()) {
+      JXL_CHECK(comp->h_samp_factor == jparams.h_sampling[i]);
+    }
+    if (!jparams.v_sampling.empty()) {
+      JXL_CHECK(comp->v_samp_factor == jparams.v_sampling[i]);
+    }
+    if (!jparams.quant_indexes.empty()) {
+      JXL_CHECK(comp->quant_tbl_no == jparams.quant_indexes[i]);
+    }
+    max_h_samp_factor = std::max(max_h_samp_factor, comp->h_samp_factor);
+    max_v_samp_factor = std::max(max_v_samp_factor, comp->v_samp_factor);
+  }
+  JXL_CHECK(max_h_samp_factor == cinfo->max_h_samp_factor);
+  JXL_CHECK(max_v_samp_factor == cinfo->max_v_samp_factor);
+  int referenced_tables[NUM_QUANT_TBLS] = {};
+  for (int i = 0; i < cinfo->num_components; ++i) {
+    jpeg_component_info* comp = &cinfo->comp_info[i];
+    JXL_CHECK(comp->width_in_blocks ==
+              DivCeil(cinfo->image_width * comp->h_samp_factor,
+                      max_h_samp_factor * DCTSIZE));
+    JXL_CHECK(comp->height_in_blocks ==
+              DivCeil(cinfo->image_height * comp->v_samp_factor,
+                      max_v_samp_factor * DCTSIZE));
+    referenced_tables[comp->quant_tbl_no] = 1;
+  }
+  for (const auto& table : jparams.quant_tables) {
+    JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[table.slot_idx];
+    if (!referenced_tables[table.slot_idx]) {
+      JXL_CHECK(quant_table == nullptr);
+      continue;
+    }
+    JXL_CHECK(quant_table != nullptr);
+    jxl::msan::UnpoisonMemory(quant_table, sizeof(*quant_table));
+    for (int k = 0; k < DCTSIZE2; ++k) {
+      JXL_CHECK(quant_table->quantval[k] == table.quantval[k]);
+    }
+  }
+}
+
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+  JXL_CHECK(cinfo->input_scan_number > 0);
+  if (cinfo->progressive_mode) {
+    JXL_CHECK(cinfo->Ss != 0 || cinfo->Se != 63);
+  } else {
+    JXL_CHECK(cinfo->Ss == 0 && cinfo->Se == 63);
+  }
+  if (jparams.progressive_mode > 2) {
+    JXL_CHECK(jparams.progressive_mode < 3 + kNumTestScripts);
+    const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+    JXL_CHECK(cinfo->input_scan_number <= script.num_scans);
+    const jpeg_scan_info& scan = script.scans[cinfo->input_scan_number - 1];
+    JXL_CHECK(cinfo->comps_in_scan == scan.comps_in_scan);
+    for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+      JXL_CHECK(cinfo->cur_comp_info[i]->component_index ==
+                scan.component_index[i]);
+    }
+    JXL_CHECK(cinfo->Ss == scan.Ss);
+    JXL_CHECK(cinfo->Se == scan.Se);
+    JXL_CHECK(cinfo->Ah == scan.Ah);
+    JXL_CHECK(cinfo->Al == scan.Al);
+  }
+  if (jparams.restart_interval > 0) {
+    JXL_CHECK(cinfo->restart_interval == jparams.restart_interval);
+  } else if (jparams.restart_in_rows > 0) {
+    JXL_CHECK(cinfo->restart_interval ==
+              jparams.restart_in_rows * cinfo->MCUs_per_row);
+  }
+  if (jparams.progressive_mode == 0 && jparams.optimize_coding == 0) {
+    if (cinfo->jpeg_color_space == JCS_RGB) {
+      JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+    } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+    } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+      JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+      JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+      JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+    }
+    if (jparams.use_flat_dc_luma_code) {
+      JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+      jxl::msan::UnpoisonMemory(tbl, sizeof(*tbl));
+      for (int i = 0; i < 15; ++i) {
+        JXL_CHECK(tbl->huffval[i] == i);
+      }
+    }
+  }
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+                 JSAMPARRAY colormap, size_t num_colors) {
+  JXL_CHECK(colormap != nullptr);
+  std::vector<uint8_t> tmp(xsize * components);
+  for (size_t x = 0; x < xsize; ++x) {
+    JXL_CHECK(row[x] < num_colors);
+    for (int c = 0; c < components; ++c) {
+      tmp[x * components + c] = colormap[c][row[x]];
+    }
+  }
+  memcpy(row, tmp.data(), tmp.size());
+}
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+                      TestImage* output) {
+  output->xsize = cinfo->image_width;
+  output->ysize = cinfo->image_height;
+  output->components = cinfo->num_components;
+  output->color_space = cinfo->out_color_space;
+  j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+  for (int c = 0; c < cinfo->num_components; ++c) {
+    jpeg_component_info* comp = &cinfo->comp_info[c];
+    std::vector<JCOEF> coeffs(comp->width_in_blocks * comp->height_in_blocks *
+                              DCTSIZE2);
+    for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+      JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(comptr, coef_arrays[c],
+                                                         by, 1, true);
+      size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+      size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+      memcpy(&coeffs[offset], ba[0], stride);
+    }
+    output->coeffs.emplace_back(std::move(coeffs));
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc b/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc
new file mode 100644
index 0000000000..e4a4dc7a6a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc
@@ -0,0 +1,786 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/test_utils.h"
+
+#include <cmath>
+#include <fstream>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+#if !defined(TEST_DATA_PATH)
+#include "tools/cpp/runfiles/runfiles.h"
+#endif
+
+namespace jpegli {
+
+#define JPEG_API_FN(name) jpegli_##name
+#include "lib/jpegli/test_utils-inl.h"
+#undef JPEG_API_FN
+
+#if defined(TEST_DATA_PATH)
+std::string GetTestDataPath(const std::string& filename) {
+  return std::string(TEST_DATA_PATH "/") + filename;
+}
+#else
+using bazel::tools::cpp::runfiles::Runfiles;
+const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create(""));
+std::string GetTestDataPath(const std::string& filename) {
+  std::string root(JPEGXL_ROOT_PACKAGE "/testdata/");
+  return kRunfiles->Rlocation(root + filename);
+}
+#endif
+
+std::vector<uint8_t> ReadTestData(const std::string& filename) {
+  std::string full_path = GetTestDataPath(filename);
+  fprintf(stderr, "ReadTestData %s\n", full_path.c_str());
+  std::ifstream file(full_path, std::ios::binary);
+  std::vector<char> str((std::istreambuf_iterator<char>(file)),
+                        std::istreambuf_iterator<char>());
+  JXL_CHECK(file.good());
+  const uint8_t* raw = reinterpret_cast<const uint8_t*>(str.data());
+  std::vector<uint8_t> data(raw, raw + str.size());
+  printf("Test data %s is %d bytes long.\n", filename.c_str(),
+         static_cast<int>(data.size()));
+  return data;
+}
+
+void CustomQuantTable::Generate() {
+  basic_table.resize(DCTSIZE2);
+  quantval.resize(DCTSIZE2);
+  switch (table_type) {
+    case 0: {
+      for (int k = 0; k < DCTSIZE2; ++k) {
+        basic_table[k] = k + 1;
+      }
+      break;
+    }
+    default:
+      for (int k = 0; k < DCTSIZE2; ++k) {
+        basic_table[k] = table_type;
+      }
+  }
+  for (int k = 0; k < DCTSIZE2; ++k) {
+    quantval[k] = (basic_table[k] * scale_factor + 50U) / 100U;
+    quantval[k] = std::max(quantval[k], 1U);
+    quantval[k] = std::min(quantval[k], 65535U);
+    if (!add_raw) {
+      quantval[k] = std::min(quantval[k], force_baseline ? 255U : 32767U);
+    }
+  }
+}
+
+bool PNMParser::ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+                            size_t* num_channels, size_t* bitdepth) {
+  if (pos_[0] != 'P' || (pos_[1] != '5' && pos_[1] != '6')) {
+    fprintf(stderr, "Invalid PNM header.");
+    return false;
+  }
+  *num_channels = (pos_[1] == '5' ? 1 : 3);
+  pos_ += 2;
+
+  size_t maxval;
+  if (!SkipWhitespace() || !ParseUnsigned(xsize) || !SkipWhitespace() ||
+      !ParseUnsigned(ysize) || !SkipWhitespace() || !ParseUnsigned(&maxval) ||
+      !SkipWhitespace()) {
+    return false;
+  }
+  if (maxval == 0 || maxval >= 65536) {
+    fprintf(stderr, "Invalid maxval value.\n");
+    return false;
+  }
+  bool found_bitdepth = false;
+  for (int bits = 1; bits <= 16; ++bits) {
+    if (maxval == (1u << bits) - 1) {
+      *bitdepth = bits;
+      found_bitdepth = true;
+      break;
+    }
+  }
+  if (!found_bitdepth) {
+    fprintf(stderr, "Invalid maxval value.\n");
+    return false;
+  }
+
+  *pos = pos_;
+  return true;
+}
+
+bool PNMParser::ParseUnsigned(size_t* number) {
+  if (pos_ == end_ || *pos_ < '0' || *pos_ > '9') {
+    fprintf(stderr, "Expected unsigned number.\n");
+    return false;
+  }
+  *number = 0;
+  while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+    *number *= 10;
+    *number += *pos_ - '0';
+    ++pos_;
+  }
+
+  return true;
+}
+
+bool PNMParser::SkipWhitespace() {
+  if (pos_ == end_ || !IsWhitespace(*pos_)) {
+    fprintf(stderr, "Expected whitespace.\n");
+    return false;
+  }
+  while (pos_ < end_ && IsWhitespace(*pos_)) {
+    ++pos_;
+  }
+  return true;
+}
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+             size_t* num_channels, size_t* bitdepth,
+             std::vector<uint8_t>* pixels) {
+  if (data.size() < 2) {
+    fprintf(stderr, "PNM file too small.\n");
+    return false;
+  }
+  PNMParser parser(data.data(), data.size());
+  const uint8_t* pos = nullptr;
+  if (!parser.ParseHeader(&pos, xsize, ysize, num_channels, bitdepth)) {
+    return false;
+  }
+  pixels->resize(data.data() + data.size() - pos);
+  memcpy(&(*pixels)[0], pos, pixels->size());
+  return true;
+}
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace) {
+  switch (colorspace) {
+    case JCS_UNKNOWN:
+      return "UNKNOWN";
+    case JCS_GRAYSCALE:
+      return "GRAYSCALE";
+    case JCS_RGB:
+      return "RGB";
+    case JCS_YCbCr:
+      return "YCbCr";
+    case JCS_CMYK:
+      return "CMYK";
+    case JCS_YCCK:
+      return "YCCK";
+    default:
+      return "";
+  }
+}
+
+std::string IOMethodName(JpegliDataType data_type,
+                         JpegliEndianness endianness) {
+  std::string retval;
+  if (data_type == JPEGLI_TYPE_UINT8) {
+    return "";
+  } else if (data_type == JPEGLI_TYPE_UINT16) {
+    retval = "UINT16";
+  } else if (data_type == JPEGLI_TYPE_FLOAT) {
+    retval = "FLOAT";
+  }
+  if (endianness == JPEGLI_LITTLE_ENDIAN) {
+    retval += "LE";
+  } else if (endianness == JPEGLI_BIG_ENDIAN) {
+    retval += "BE";
+  }
+  return retval;
+}
+
+std::string SamplingId(const CompressParams& jparams) {
+  std::stringstream os;
+  JXL_CHECK(jparams.h_sampling.size() == jparams.v_sampling.size());
+  if (!jparams.h_sampling.empty()) {
+    size_t len = jparams.h_sampling.size();
+    while (len > 1 && jparams.h_sampling[len - 1] == 1 &&
+           jparams.v_sampling[len - 1] == 1) {
+      --len;
+    }
+    os << "SAMP";
+    for (size_t i = 0; i < len; ++i) {
+      if (i > 0) os << "_";
+      os << jparams.h_sampling[i] << "x" << jparams.v_sampling[i];
+    }
+  }
+  return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input) {
+  os << input.xsize << "x" << input.ysize;
+  os << IOMethodName(input.data_type, input.endianness);
+  if (input.color_space != JCS_RGB) {
+    os << "InputColor" << ColorSpaceName((J_COLOR_SPACE)input.color_space);
+  }
+  if (input.color_space == JCS_UNKNOWN) {
+    os << input.components;
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams) {
+  os << "Q" << jparams.quality;
+  os << SamplingId(jparams);
+  if (jparams.set_jpeg_colorspace) {
+    os << "JpegColor"
+       << ColorSpaceName((J_COLOR_SPACE)jparams.jpeg_color_space);
+  }
+  if (!jparams.comp_ids.empty()) {
+    os << "CID";
+    for (size_t i = 0; i < jparams.comp_ids.size(); ++i) {
+      os << jparams.comp_ids[i];
+    }
+  }
+  if (!jparams.quant_indexes.empty()) {
+    os << "QIDX";
+    for (size_t i = 0; i < jparams.quant_indexes.size(); ++i) {
+      os << jparams.quant_indexes[i];
+    }
+    for (const auto& table : jparams.quant_tables) {
+      os << "TABLE" << table.slot_idx << "T" << table.table_type << "F"
+         << table.scale_factor
+         << (table.add_raw          ? "R"
+             : table.force_baseline ? "B"
+                                    : "");
+    }
+  }
+  if (jparams.progressive_mode >= 0) {
+    os << "P" << jparams.progressive_mode;
+  } else if (jparams.simple_progression) {
+    os << "Psimple";
+  }
+  if (jparams.optimize_coding == 1) {
+    os << "OptimizedCode";
+  } else if (jparams.optimize_coding == 0) {
+    os << "FixedCode";
+    if (jparams.use_flat_dc_luma_code) {
+      os << "FlatDCLuma";
+    } else if (jparams.omit_standard_tables) {
+      os << "OmitDHT";
+    }
+  }
+  if (!jparams.use_adaptive_quantization) {
+    os << "NoAQ";
+  }
+  if (jparams.restart_interval > 0) {
+    os << "R" << jparams.restart_interval;
+  }
+  if (jparams.restart_in_rows > 0) {
+    os << "RR" << jparams.restart_in_rows;
+  }
+  if (jparams.xyb_mode) {
+    os << "XYB";
+  } else if (jparams.libjpeg_mode) {
+    os << "Libjpeg";
+  }
+  if (jparams.override_JFIF >= 0) {
+    os << (jparams.override_JFIF ? "AddJFIF" : "NoJFIF");
+  }
+  if (jparams.override_Adobe >= 0) {
+    os << (jparams.override_Adobe ? "AddAdobe" : "NoAdobe");
+  }
+  if (jparams.add_marker) {
+    os << "AddMarker";
+  }
+  if (!jparams.icc.empty()) {
+    os << "ICCSize" << jparams.icc.size();
+  }
+  if (jparams.smoothing_factor != 0) {
+    os << "SF" << jparams.smoothing_factor;
+  }
+  return os;
+}
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels) {
+  if (colorspace == JCS_GRAYSCALE) {
+    *channels = 1;
+  } else if (colorspace == JCS_RGB || colorspace == JCS_YCbCr) {
+    *channels = 3;
+  } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+    *channels = 4;
+  } else if (colorspace == JCS_UNKNOWN) {
+    JXL_CHECK(*channels <= 4);
+  } else {
+    JXL_ABORT();
+  }
+}
+
+void RGBToYCbCr(float r, float g, float b, float* y, float* cb, float* cr) {
+  *y = 0.299f * r + 0.587f * g + 0.114f * b;
+  *cb = -0.168736f * r - 0.331264f * g + 0.5f * b + 0.5f;
+  *cr = 0.5f * r - 0.418688f * g - 0.081312f * b + 0.5f;
+}
+
+void ConvertPixel(const uint8_t* input_rgb, uint8_t* out,
+                  J_COLOR_SPACE colorspace, size_t num_channels,
+                  JpegliDataType data_type = JPEGLI_TYPE_UINT8,
+                  bool swap_endianness = JPEGLI_NATIVE_ENDIAN) {
+  const float kMul = 255.0f;
+  float r = input_rgb[0] / kMul;
+  float g = input_rgb[1] / kMul;
+  float b = input_rgb[2] / kMul;
+  uint8_t out8[MAX_COMPONENTS];
+  if (colorspace == JCS_GRAYSCALE) {
+    const float Y = 0.299f * r + 0.587f * g + 0.114f * b;
+    out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+  } else if (colorspace == JCS_RGB || colorspace == JCS_UNKNOWN) {
+    for (size_t c = 0; c < num_channels; ++c) {
+      out8[c] = input_rgb[std::min<size_t>(2, c)];
+    }
+  } else if (colorspace == JCS_YCbCr) {
+    float Y, Cb, Cr;
+    RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+    out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+    out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+    out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+  } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+    float K = 1.0f - std::max(r, std::max(g, b));
+    float scaleK = 1.0f / (1.0f - K);
+    r *= scaleK;
+    g *= scaleK;
+    b *= scaleK;
+    if (colorspace == JCS_CMYK) {
+      out8[0] = static_cast<uint8_t>(std::round((1.0f - r) * kMul));
+      out8[1] = static_cast<uint8_t>(std::round((1.0f - g) * kMul));
+      out8[2] = static_cast<uint8_t>(std::round((1.0f - b) * kMul));
+    } else if (colorspace == JCS_YCCK) {
+      float Y, Cb, Cr;
+      RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+      out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+      out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+      out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+    }
+    out8[3] = static_cast<uint8_t>(std::round(K * kMul));
+  } else {
+    JXL_ABORT("Colorspace %d not supported", colorspace);
+  }
+  if (data_type == JPEGLI_TYPE_UINT8) {
+    memcpy(out, out8, num_channels);
+  } else if (data_type == JPEGLI_TYPE_UINT16) {
+    for (size_t c = 0; c < num_channels; ++c) {
+      uint16_t val = (out8[c] << 8) + out8[c];
+      val |= 0x40;  // Make little-endian and big-endian asymmetric
+      if (swap_endianness) {
+        val = JXL_BSWAP16(val);
+      }
+      memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+    }
+  } else if (data_type == JPEGLI_TYPE_FLOAT) {
+    for (size_t c = 0; c < num_channels; ++c) {
+      float val = out8[c] / 255.0f;
+      if (swap_endianness) {
+        val = BSwapFloat(val);
+      }
+      memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+    }
+  }
+}
+
+void ConvertToGrayscale(TestImage* img) {
+  if (img->color_space == JCS_GRAYSCALE) return;
+  JXL_CHECK(img->data_type == JPEGLI_TYPE_UINT8);
+  for (size_t i = 0; i < img->pixels.size(); i += 3) {
+    if (img->color_space == JCS_RGB) {
+      ConvertPixel(&img->pixels[i], &img->pixels[i / 3], JCS_GRAYSCALE, 1);
+    } else if (img->color_space == JCS_YCbCr) {
+      img->pixels[i / 3] = img->pixels[i];
+    }
+  }
+  img->pixels.resize(img->pixels.size() / 3);
+  img->color_space = JCS_GRAYSCALE;
+  img->components = 1;
+}
+
+void GeneratePixels(TestImage* img) {
+  const std::vector<uint8_t> imgdata = ReadTestData("jxl/flower/flower.pnm");
+  size_t xsize, ysize, channels, bitdepth;
+  std::vector<uint8_t> pixels;
+  JXL_CHECK(ReadPNM(imgdata, &xsize, &ysize, &channels, &bitdepth, &pixels));
+  if (img->xsize == 0) img->xsize = xsize;
+  if (img->ysize == 0) img->ysize = ysize;
+  JXL_CHECK(img->xsize <= xsize);
+  JXL_CHECK(img->ysize <= ysize);
+  JXL_CHECK(3 == channels);
+  JXL_CHECK(8 == bitdepth);
+  size_t in_bytes_per_pixel = channels;
+  size_t in_stride = xsize * in_bytes_per_pixel;
+  size_t x0 = (xsize - img->xsize) / 2;
+  size_t y0 = (ysize - img->ysize) / 2;
+  SetNumChannels((J_COLOR_SPACE)img->color_space, &img->components);
+  size_t out_bytes_per_pixel =
+      jpegli_bytes_per_sample(img->data_type) * img->components;
+  size_t out_stride = img->xsize * out_bytes_per_pixel;
+  bool swap_endianness =
+      (img->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+      (img->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+  img->pixels.resize(img->ysize * out_stride);
+  for (size_t iy = 0; iy < img->ysize; ++iy) {
+    size_t y = y0 + iy;
+    for (size_t ix = 0; ix < img->xsize; ++ix) {
+      size_t x = x0 + ix;
+      size_t idx_in = y * in_stride + x * in_bytes_per_pixel;
+      size_t idx_out = iy * out_stride + ix * out_bytes_per_pixel;
+      ConvertPixel(&pixels[idx_in], &img->pixels[idx_out],
+                   (J_COLOR_SPACE)img->color_space, img->components,
+                   img->data_type, swap_endianness);
+    }
+  }
+}
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img) {
+  for (size_t c = 0; c < img->components; ++c) {
+    size_t xsize = jparams.comp_width(*img, c);
+    size_t ysize = jparams.comp_height(*img, c);
+    size_t factor_y = jparams.max_v_sample() / jparams.v_samp(c);
+    size_t factor_x = jparams.max_h_sample() / jparams.h_samp(c);
+    size_t factor = factor_x * factor_y;
+    std::vector<uint8_t> plane(ysize * xsize);
+    size_t bytes_per_pixel = img->components;
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        int result = 0;
+        for (size_t iy = 0; iy < factor_y; ++iy) {
+          size_t yy = std::min(y * factor_y + iy, img->ysize - 1);
+          for (size_t ix = 0; ix < factor_x; ++ix) {
+            size_t xx = std::min(x * factor_x + ix, img->xsize - 1);
+            size_t pixel_ix = (yy * img->xsize + xx) * bytes_per_pixel + c;
+            result += img->pixels[pixel_ix];
+          }
+        }
+        result = static_cast<uint8_t>((result + factor / 2) / factor);
+        plane[y * xsize + x] = result;
+      }
+    }
+    img->raw_data.emplace_back(std::move(plane));
+  }
+}
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img) {
+  for (size_t c = 0; c < img->components; ++c) {
+    int xsize_blocks = jparams.comp_width(*img, c) / DCTSIZE;
+    int ysize_blocks = jparams.comp_height(*img, c) / DCTSIZE;
+    std::vector<JCOEF> plane(ysize_blocks * xsize_blocks * DCTSIZE2);
+    for (int by = 0; by < ysize_blocks; ++by) {
+      for (int bx = 0; bx < xsize_blocks; ++bx) {
+        JCOEF* block = &plane[(by * xsize_blocks + bx) * DCTSIZE2];
+        for (int k = 0; k < DCTSIZE2; ++k) {
+          block[k] = (bx - by) / (k + 1);
+        }
+      }
+    }
+    img->coeffs.emplace_back(std::move(plane));
+  }
+}
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+                      j_compress_ptr cinfo) {
+  cinfo->image_width = input.xsize;
+  cinfo->image_height = input.ysize;
+  cinfo->input_components = input.components;
+  if (jparams.xyb_mode) {
+    jpegli_set_xyb_mode(cinfo);
+  }
+  if (jparams.libjpeg_mode) {
+    jpegli_enable_adaptive_quantization(cinfo, FALSE);
+    jpegli_use_standard_quant_tables(cinfo);
+    jpegli_set_progressive_level(cinfo, 0);
+  }
+  jpegli_set_defaults(cinfo);
+  cinfo->in_color_space = (J_COLOR_SPACE)input.color_space;
+  jpegli_default_colorspace(cinfo);
+  if (jparams.override_JFIF >= 0) {
+    cinfo->write_JFIF_header = jparams.override_JFIF;
+  }
+  if (jparams.override_Adobe >= 0) {
+    cinfo->write_Adobe_marker = jparams.override_Adobe;
+  }
+  if (jparams.set_jpeg_colorspace) {
+    jpegli_set_colorspace(cinfo, (J_COLOR_SPACE)jparams.jpeg_color_space);
+  }
+  if (!jparams.comp_ids.empty()) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      cinfo->comp_info[c].component_id = jparams.comp_ids[c];
+    }
+  }
+  if (!jparams.h_sampling.empty()) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      cinfo->comp_info[c].h_samp_factor = jparams.h_sampling[c];
+      cinfo->comp_info[c].v_samp_factor = jparams.v_sampling[c];
+    }
+  }
+  jpegli_set_quality(cinfo, jparams.quality, TRUE);
+  if (!jparams.quant_indexes.empty()) {
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      cinfo->comp_info[c].quant_tbl_no = jparams.quant_indexes[c];
+    }
+    for (const auto& table : jparams.quant_tables) {
+      if (table.add_raw) {
+        cinfo->quant_tbl_ptrs[table.slot_idx] =
+            jpegli_alloc_quant_table((j_common_ptr)cinfo);
+        for (int k = 0; k < DCTSIZE2; ++k) {
+          cinfo->quant_tbl_ptrs[table.slot_idx]->quantval[k] =
+              table.quantval[k];
+        }
+        cinfo->quant_tbl_ptrs[table.slot_idx]->sent_table = FALSE;
+      } else {
+        jpegli_add_quant_table(cinfo, table.slot_idx, &table.basic_table[0],
+                               table.scale_factor, table.force_baseline);
+      }
+    }
+  }
+  if (jparams.simple_progression) {
+    jpegli_simple_progression(cinfo);
+    JXL_CHECK(jparams.progressive_mode == -1);
+  }
+  if (jparams.progressive_mode > 2) {
+    const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+    cinfo->scan_info = script.scans;
+    cinfo->num_scans = script.num_scans;
+  } else if (jparams.progressive_mode >= 0) {
+    jpegli_set_progressive_level(cinfo, jparams.progressive_mode);
+  }
+  jpegli_set_input_format(cinfo, input.data_type, input.endianness);
+  jpegli_enable_adaptive_quantization(cinfo, jparams.use_adaptive_quantization);
+  cinfo->restart_interval = jparams.restart_interval;
+  cinfo->restart_in_rows = jparams.restart_in_rows;
+  cinfo->smoothing_factor = jparams.smoothing_factor;
+  if (jparams.optimize_coding == 1) {
+    cinfo->optimize_coding = TRUE;
+  } else if (jparams.optimize_coding == 0) {
+    cinfo->optimize_coding = FALSE;
+  }
+  cinfo->raw_data_in = !input.raw_data.empty();
+  if (jparams.optimize_coding == 0 && jparams.use_flat_dc_luma_code) {
+    JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+    memset(tbl, 0, sizeof(*tbl));
+    tbl->bits[4] = 15;
+    for (int i = 0; i < 15; ++i) tbl->huffval[i] = i;
+  }
+  if (input.coeffs.empty()) {
+    bool write_all_tables = TRUE;
+    if (jparams.optimize_coding == 0 && !jparams.use_flat_dc_luma_code &&
+        jparams.omit_standard_tables) {
+      write_all_tables = FALSE;
+      cinfo->dc_huff_tbl_ptrs[0]->sent_table = TRUE;
+      cinfo->dc_huff_tbl_ptrs[1]->sent_table = TRUE;
+      cinfo->ac_huff_tbl_ptrs[0]->sent_table = TRUE;
+      cinfo->ac_huff_tbl_ptrs[1]->sent_table = TRUE;
+    }
+    jpegli_start_compress(cinfo, write_all_tables);
+    if (jparams.add_marker) {
+      jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+                          sizeof(kMarkerData));
+      jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+      for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+        jpegli_write_m_byte(cinfo, kMarkerData[p]);
+      }
+      for (size_t i = 0; i < kMarkerSequenceLen; ++i) {
+        jpegli_write_marker(cinfo, kMarkerSequence[i], kMarkerData,
+                            ((i + 2) % sizeof(kMarkerData)));
+      }
+    }
+    if (!jparams.icc.empty()) {
+      jpegli_write_icc_profile(cinfo, jparams.icc.data(), jparams.icc.size());
+    }
+  }
+  if (cinfo->raw_data_in) {
+    // Need to copy because jpeg API requires non-const pointers.
+    std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+    size_t max_lines = jparams.max_v_sample() * DCTSIZE;
+    std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+    std::vector<JSAMPARRAY> data(cinfo->num_components);
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      rowdata[c].resize(jparams.v_samp(c) * DCTSIZE);
+      data[c] = &rowdata[c][0];
+    }
+    while (cinfo->next_scanline < cinfo->image_height) {
+      for (int c = 0; c < cinfo->num_components; ++c) {
+        size_t cwidth = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+        size_t cheight = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+        size_t num_lines = jparams.v_samp(c) * DCTSIZE;
+        size_t y0 = (cinfo->next_scanline / max_lines) * num_lines;
+        for (size_t i = 0; i < num_lines; ++i) {
+          rowdata[c][i] =
+              (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+        }
+      }
+      size_t num_lines = jpegli_write_raw_data(cinfo, &data[0], max_lines);
+      JXL_CHECK(num_lines == max_lines);
+    }
+  } else if (!input.coeffs.empty()) {
+    j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+    jvirt_barray_ptr* coef_arrays = reinterpret_cast<jvirt_barray_ptr*>((
+        *cinfo->mem->alloc_small)(
+        comptr, JPOOL_IMAGE, cinfo->num_components * sizeof(jvirt_barray_ptr)));
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      size_t xsize_blocks = jparams.comp_width(input, c) / DCTSIZE;
+      size_t ysize_blocks = jparams.comp_height(input, c) / DCTSIZE;
+      coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+          comptr, JPOOL_IMAGE, FALSE, xsize_blocks, ysize_blocks,
+          cinfo->comp_info[c].v_samp_factor);
+    }
+    jpegli_write_coefficients(cinfo, coef_arrays);
+    if (jparams.add_marker) {
+      jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+                          sizeof(kMarkerData));
+      jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+      for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+        jpegli_write_m_byte(cinfo, kMarkerData[p]);
+      }
+    }
+    for (int c = 0; c < cinfo->num_components; ++c) {
+      jpeg_component_info* comp = &cinfo->comp_info[c];
+      for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+        JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+            comptr, coef_arrays[c], by, 1, true);
+        size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+        size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+        memcpy(ba[0], &input.coeffs[c][offset], stride);
+      }
+    }
+  } else {
+    size_t stride = cinfo->image_width * cinfo->input_components *
+                    jpegli_bytes_per_sample(input.data_type);
+    std::vector<uint8_t> row_bytes(stride);
+    for (size_t y = 0; y < cinfo->image_height; ++y) {
+      memcpy(&row_bytes[0], &input.pixels[y * stride], stride);
+      JSAMPROW row[] = {row_bytes.data()};
+      jpegli_write_scanlines(cinfo, row, 1);
+    }
+  }
+  jpegli_finish_compress(cinfo);
+}
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+                      std::vector<uint8_t>* compressed) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    EncodeWithJpegli(input, jparams, &cinfo);
+    return true;
+  };
+  bool success = try_catch_block();
+  jpegli_destroy_compress(&cinfo);
+  if (success) {
+    compressed->resize(buffer_size);
+    std::copy_n(buffer, buffer_size, compressed->data());
+  }
+  if (buffer) std::free(buffer);
+  return success;
+}
+
+int NumTestScanScripts() { return kNumTestScripts; }
+
+void DumpImage(const TestImage& image, const std::string fn) {
+  JXL_CHECK(image.components == 1 || image.components == 3);
+  size_t bytes_per_sample = jpegli_bytes_per_sample(image.data_type);
+  uint32_t maxval = (1u << (8 * bytes_per_sample)) - 1;
+  char type = image.components == 1 ? '5' : '6';
+  std::ofstream out(fn.c_str(), std::ofstream::binary);
+  out << "P" << type << std::endl
+      << image.xsize << " " << image.ysize << std::endl
+      << maxval << std::endl;
+  out.write(reinterpret_cast<const char*>(image.pixels.data()),
+            image.pixels.size());
+  out.close();
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+                   size_t start_line, size_t num_lines, double* max_diff) {
+  size_t stride = input.xsize * input.components;
+  size_t start_offset = start_line * stride;
+  auto get_sample = [&](const TestImage& im, const std::vector<uint8_t>& data,
+                        size_t idx) -> double {
+    size_t bytes_per_sample = jpegli_bytes_per_sample(im.data_type);
+    bool is_little_endian =
+        (im.endianness == JPEGLI_LITTLE_ENDIAN ||
+         (im.endianness == JPEGLI_NATIVE_ENDIAN && IsLittleEndian()));
+    size_t offset = start_offset + idx * bytes_per_sample;
+    JXL_CHECK(offset < data.size());
+    const uint8_t* p = &data[offset];
+    if (im.data_type == JPEGLI_TYPE_UINT8) {
+      static const double mul8 = 1.0 / 255.0;
+      return p[0] * mul8;
+    } else if (im.data_type == JPEGLI_TYPE_UINT16) {
+      static const double mul16 = 1.0 / 65535.0;
+      return (is_little_endian ? LoadLE16(p) : LoadBE16(p)) * mul16;
+    } else if (im.data_type == JPEGLI_TYPE_FLOAT) {
+      return (is_little_endian ? LoadLEFloat(p) : LoadBEFloat(p));
+    }
+    return 0.0;
+  };
+  double diff2 = 0.0;
+  size_t num_samples = 0;
+  if (max_diff) *max_diff = 0.0;
+  if (!input.pixels.empty() && !output.pixels.empty()) {
+    num_samples = num_lines * stride;
+    for (size_t i = 0; i < num_samples; ++i) {
+      double sample_orig = get_sample(input, input.pixels, i);
+      double sample_output = get_sample(output, output.pixels, i);
+      double diff = sample_orig - sample_output;
+      if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+      diff2 += diff * diff;
+    }
+  } else {
+    JXL_CHECK(!input.raw_data.empty());
+    JXL_CHECK(!output.raw_data.empty());
+    for (size_t c = 0; c < input.raw_data.size(); ++c) {
+      JXL_CHECK(c < output.raw_data.size());
+      num_samples += input.raw_data[c].size();
+      for (size_t i = 0; i < input.raw_data[c].size(); ++i) {
+        double sample_orig = get_sample(input, input.raw_data[c], i);
+        double sample_output = get_sample(output, output.raw_data[c], i);
+        double diff = sample_orig - sample_output;
+        if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+        diff2 += diff * diff;
+      }
+    }
+  }
+  return std::sqrt(diff2 / num_samples) * 255.0;
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+                   double* max_diff) {
+  return DistanceRms(input, output, 0, output.ysize, max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+                       size_t start_line, size_t num_lines, double max_rms,
+                       double max_diff) {
+  double max_d;
+  double rms = DistanceRms(input, output, start_line, num_lines, &max_d);
+  printf("rms: %f, max_rms: %f, max_d: %f,  max_diff: %f\n", rms, max_rms,
+         max_d, max_diff);
+  JXL_CHECK(rms <= max_rms);
+  JXL_CHECK(max_d <= max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+                       double max_rms, double max_diff) {
+  JXL_CHECK(output.xsize == input.xsize);
+  JXL_CHECK(output.ysize == input.ysize);
+  JXL_CHECK(output.components == input.components);
+  JXL_CHECK(output.color_space == input.color_space);
+  if (!input.coeffs.empty()) {
+    JXL_CHECK(input.coeffs.size() == input.components);
+    JXL_CHECK(output.coeffs.size() == input.components);
+    for (size_t c = 0; c < input.components; ++c) {
+      JXL_CHECK(output.coeffs[c].size() == input.coeffs[c].size());
+      JXL_CHECK(0 == memcmp(input.coeffs[c].data(), output.coeffs[c].data(),
+                            input.coeffs[c].size()));
+    }
+  } else {
+    VerifyOutputImage(input, output, 0, output.ysize, max_rms, max_diff);
+  }
+}
+
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils.h b/third-party/libjxl/libjxl/lib/jpegli/test_utils.h
new file mode 100644
index 0000000000..132cfd042a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils.h
@@ -0,0 +1,130 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TEST_UTILS_H_
+#define LIB_JPEGLI_TEST_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/libjpeg_test_util.h"
+#include "lib/jpegli/test_params.h"
+
+namespace jpegli {
+
+#define ERROR_HANDLER_SETUP(flavor)                                \
+  jpeg_error_mgr jerr;                                             \
+  jmp_buf env;                                                     \
+  cinfo.err = flavor##_std_error(&jerr);                           \
+  if (setjmp(env)) {                                               \
+    return false;                                                  \
+  }                                                                \
+  cinfo.client_data = reinterpret_cast<void*>(&env);               \
+  cinfo.err->error_exit = [](j_common_ptr cinfo) {                 \
+    (*cinfo->err->output_message)(cinfo);                          \
+    jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); \
+    flavor##_destroy(cinfo);                                       \
+    longjmp(*env, 1);                                              \
+  };
+
+std::string IOMethodName(JpegliDataType data_type, JpegliEndianness endianness);
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace);
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input);
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams);
+
+int NumTestScanScripts();
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+
+void SetDecompressParams(const DecompressParams& dparams,
+                         j_decompress_ptr cinfo);
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+                             j_decompress_ptr cinfo, int scan_number);
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+                      TestImage* output);
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+                 JSAMPARRAY colormap, size_t num_colors);
+
+std::string GetTestDataPath(const std::string& filename);
+std::vector<uint8_t> ReadTestData(const std::string& filename);
+
+class PNMParser {
+ public:
+  explicit PNMParser(const uint8_t* data, const size_t len)
+      : pos_(data), end_(data + len) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  bool ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+                   size_t* num_channels, size_t* bitdepth);
+
+ private:
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  bool ParseUnsigned(size_t* number);
+
+  bool SkipWhitespace();
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+             size_t* num_channels, size_t* bitdepth,
+             std::vector<uint8_t>* pixels);
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels);
+
+void ConvertToGrayscale(TestImage* img);
+
+void GeneratePixels(TestImage* img);
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img);
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img);
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+                      j_compress_ptr cinfo);
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+                      std::vector<uint8_t>* compressed);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+                   size_t start_line, size_t num_lines,
+                   double* max_diff = nullptr);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+                   double* max_diff = nullptr);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+                       size_t start_line, size_t num_lines, double max_rms,
+                       double max_diff = 255.0);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+                       double max_rms, double max_diff = 255.0);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_TEST_UTILS_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/testing.h b/third-party/libjxl/libjxl/lib/jpegli/testing.h
new file mode 100644
index 0000000000..873a0171e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/testing.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TESTING_H_
+#define LIB_JPEGLI_TESTING_H_
+
+// GTest/GMock specific macros / wrappers.
+
+// gmock unconditionally redefines those macros (to wrong values).
+// Lets include it only here and mitigate the problem.
+#pragma push_macro("PRIdS")
+#pragma push_macro("PRIuS")
+#include "gmock/gmock.h"
+#pragma pop_macro("PRIuS")
+#pragma pop_macro("PRIdS")
+
+#include "gtest/gtest.h"
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Ensures that we don't make our test bounds too lax, effectively disabling the
+// tests.
+MATCHER_P(IsSlightlyBelow, max, "") {
+  return max * 0.75 <= arg && arg <= max * 1.0;
+}
+
+#endif  // LIB_JPEGLI_TESTING_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc
new file mode 100644
index 0000000000..1d99ce37fa
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void TranscodeWithJpegli(const std::vector<uint8_t>& jpeg_input,
+                         const CompressParams& jparams,
+                         std::vector<uint8_t>* jpeg_output) {
+  jpeg_decompress_struct dinfo = {};
+  jpeg_compress_struct cinfo = {};
+  uint8_t* transcoded_data = nullptr;
+  unsigned long transcoded_size;
+  const auto try_catch_block = [&]() -> bool {
+    ERROR_HANDLER_SETUP(jpegli);
+    dinfo.err = cinfo.err;
+    dinfo.client_data = cinfo.client_data;
+    jpegli_create_decompress(&dinfo);
+    jpegli_mem_src(&dinfo, jpeg_input.data(), jpeg_input.size());
+    EXPECT_EQ(JPEG_REACHED_SOS,
+              jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+    jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&dinfo);
+    JXL_CHECK(coef_arrays != nullptr);
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &transcoded_data, &transcoded_size);
+    jpegli_copy_critical_parameters(&dinfo, &cinfo);
+    jpegli_set_progressive_level(&cinfo, jparams.progressive_mode);
+    cinfo.optimize_coding = jparams.optimize_coding;
+    jpegli_write_coefficients(&cinfo, coef_arrays);
+    jpegli_finish_compress(&cinfo);
+    jpegli_finish_decompress(&dinfo);
+    return true;
+  };
+  ASSERT_TRUE(try_catch_block());
+  jpegli_destroy_decompress(&dinfo);
+  jpegli_destroy_compress(&cinfo);
+  if (transcoded_data) {
+    jpeg_output->assign(transcoded_data, transcoded_data + transcoded_size);
+    free(transcoded_data);
+  }
+}
+
+struct TestConfig {
+  TestImage input;
+  CompressParams jparams;
+};
+
+class TranscodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(TranscodeAPITestParam, TestAPI) {
+  TestConfig config = GetParam();
+  CompressParams& jparams = config.jparams;
+  GeneratePixels(&config.input);
+
+  // Start with sequential non-optimized jpeg.
+  jparams.progressive_mode = 0;
+  jparams.optimize_coding = 0;
+  std::vector<uint8_t> compressed;
+  ASSERT_TRUE(EncodeWithJpegli(config.input, jparams, &compressed));
+  TestImage output0;
+  DecodeWithLibjpeg(jparams, DecompressParams(), compressed, &output0);
+
+  // Transcode first to a sequential optimized jpeg, and then further to
+  // a progressive jpeg.
+  for (int progr : {0, 2}) {
+    std::vector<uint8_t> transcoded;
+    jparams.progressive_mode = progr;
+    jparams.optimize_coding = 1;
+    TranscodeWithJpegli(compressed, jparams, &transcoded);
+
+    // We expect a size reduction of at least 2%.
+    EXPECT_LT(transcoded.size(), compressed.size() * 0.98f);
+
+    // Verify that transcoding is lossless.
+    TestImage output1;
+    DecodeWithLibjpeg(jparams, DecompressParams(), transcoded, &output1);
+    ASSERT_EQ(output0.pixels.size(), output1.pixels.size());
+    EXPECT_EQ(0, memcmp(output0.pixels.data(), output1.pixels.data(),
+                        output0.pixels.size()));
+    compressed = transcoded;
+  }
+}
+
+std::vector<TestConfig> GenerateTests() {
+  std::vector<TestConfig> all_tests;
+  const size_t xsize0 = 1024;
+  const size_t ysize0 = 768;
+  for (int dxsize : {0, 1, 8, 9}) {
+    for (int dysize : {0, 1, 8, 9}) {
+      for (int h_sampling : {1, 2}) {
+        for (int v_sampling : {1, 2}) {
+          TestConfig config;
+          config.input.xsize = xsize0 + dxsize;
+          config.input.ysize = ysize0 + dysize;
+          config.jparams.h_sampling = {h_sampling, 1, 1};
+          config.jparams.v_sampling = {v_sampling, 1, 1};
+          all_tests.push_back(config);
+        }
+      }
+    }
+  }
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+  os << c.input;
+  os << c.jparams;
+  return os;
+}
+
+std::string TestDescription(
+    const testing::TestParamInfo<TranscodeAPITestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(TranscodeAPITest, TranscodeAPITestParam,
+                                testing::ValuesIn(GenerateTests()),
+                                TestDescription);
+
+}  // namespace
+}  // namespace jpegli
diff --git a/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h b/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h
new file mode 100644
index 0000000000..9fdd222f4e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_TRANSPOSE_INL_H_
+#undef LIB_JPEGLI_TRANSPOSE_INL_H_
+#else
+#define LIB_JPEGLI_TRANSPOSE_INL_H_
+#endif
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+#if HWY_CAP_GE256
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+                                         float* JXL_RESTRICT to) {
+  const HWY_CAPPED(float, 8) d;
+  auto i0 = Load(d, from);
+  auto i1 = Load(d, from + 1 * 8);
+  auto i2 = Load(d, from + 2 * 8);
+  auto i3 = Load(d, from + 3 * 8);
+  auto i4 = Load(d, from + 4 * 8);
+  auto i5 = Load(d, from + 5 * 8);
+  auto i6 = Load(d, from + 6 * 8);
+  auto i7 = Load(d, from + 7 * 8);
+
+  const auto q0 = InterleaveLower(d, i0, i2);
+  const auto q1 = InterleaveLower(d, i1, i3);
+  const auto q2 = InterleaveUpper(d, i0, i2);
+  const auto q3 = InterleaveUpper(d, i1, i3);
+  const auto q4 = InterleaveLower(d, i4, i6);
+  const auto q5 = InterleaveLower(d, i5, i7);
+  const auto q6 = InterleaveUpper(d, i4, i6);
+  const auto q7 = InterleaveUpper(d, i5, i7);
+
+  const auto r0 = InterleaveLower(d, q0, q1);
+  const auto r1 = InterleaveUpper(d, q0, q1);
+  const auto r2 = InterleaveLower(d, q2, q3);
+  const auto r3 = InterleaveUpper(d, q2, q3);
+  const auto r4 = InterleaveLower(d, q4, q5);
+  const auto r5 = InterleaveUpper(d, q4, q5);
+  const auto r6 = InterleaveLower(d, q6, q7);
+  const auto r7 = InterleaveUpper(d, q6, q7);
+
+  i0 = ConcatLowerLower(d, r4, r0);
+  i1 = ConcatLowerLower(d, r5, r1);
+  i2 = ConcatLowerLower(d, r6, r2);
+  i3 = ConcatLowerLower(d, r7, r3);
+  i4 = ConcatUpperUpper(d, r4, r0);
+  i5 = ConcatUpperUpper(d, r5, r1);
+  i6 = ConcatUpperUpper(d, r6, r2);
+  i7 = ConcatUpperUpper(d, r7, r3);
+
+  Store(i0, d, to);
+  Store(i1, d, to + 1 * 8);
+  Store(i2, d, to + 2 * 8);
+  Store(i3, d, to + 3 * 8);
+  Store(i4, d, to + 4 * 8);
+  Store(i5, d, to + 5 * 8);
+  Store(i6, d, to + 6 * 8);
+  Store(i7, d, to + 7 * 8);
+}
+#elif HWY_TARGET != HWY_SCALAR
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+                                         float* JXL_RESTRICT to) {
+  const HWY_CAPPED(float, 4) d;
+  for (size_t n = 0; n < 8; n += 4) {
+    for (size_t m = 0; m < 8; m += 4) {
+      auto p0 = Load(d, from + n * 8 + m);
+      auto p1 = Load(d, from + (n + 1) * 8 + m);
+      auto p2 = Load(d, from + (n + 2) * 8 + m);
+      auto p3 = Load(d, from + (n + 3) * 8 + m);
+      const auto q0 = InterleaveLower(d, p0, p2);
+      const auto q1 = InterleaveLower(d, p1, p3);
+      const auto q2 = InterleaveUpper(d, p0, p2);
+      const auto q3 = InterleaveUpper(d, p1, p3);
+
+      const auto r0 = InterleaveLower(d, q0, q1);
+      const auto r1 = InterleaveUpper(d, q0, q1);
+      const auto r2 = InterleaveLower(d, q2, q3);
+      const auto r3 = InterleaveUpper(d, q2, q3);
+      Store(r0, d, to + m * 8 + n);
+      Store(r1, d, to + (1 + m) * 8 + n);
+      Store(r2, d, to + (2 + m) * 8 + n);
+      Store(r3, d, to + (3 + m) * 8 + n);
+    }
+  }
+}
+#else
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+                                         float* JXL_RESTRICT to) {
+  for (size_t n = 0; n < 8; ++n) {
+    for (size_t m = 0; m < 8; ++m) {
+      to[8 * n + m] = from[8 * m + n];
+    }
+  }
+}
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JPEGLI_TRANSPOSE_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/types.h b/third-party/libjxl/libjxl/lib/jpegli/types.h
new file mode 100644
index 0000000000..2f446b7fff
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/types.h
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TYPES_H_
+#define LIB_JPEGLI_TYPES_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//
+// New API structs and functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+typedef enum {
+  JPEGLI_TYPE_FLOAT = 0,
+  JPEGLI_TYPE_UINT8 = 2,
+  JPEGLI_TYPE_UINT16 = 3,
+} JpegliDataType;
+
+typedef enum {
+  JPEGLI_NATIVE_ENDIAN = 0,
+  JPEGLI_LITTLE_ENDIAN = 1,
+  JPEGLI_BIG_ENDIAN = 2,
+} JpegliEndianness;
+
+int jpegli_bytes_per_sample(JpegliDataType data_type);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif  // LIB_JPEGLI_TYPES_H_
diff --git a/third-party/libjxl/libjxl/lib/jpegli/upsample.cc b/third-party/libjxl/libjxl/lib/jpegli/upsample.cc
new file mode 100644
index 0000000000..5559aa78a6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/upsample.cc
@@ -0,0 +1,137 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/upsample.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+#if HWY_CAP_GE512
+using hwy::HWY_NAMESPACE::Half;
+using hwy::HWY_NAMESPACE::Vec;
+template <size_t i, class DF, class V>
+HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
+  using HF = Half<DF>;
+  using HHF = Half<HF>;
+  auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
+  return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
+}
+
+template <class DF, class V>
+HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
+  using HF = Half<DF>;
+  return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
+}
+
+#endif
+
+// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
+// aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
+  static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+  Store(v0, df, mem);
+  Store(v1, df, mem + 1);
+#elif !HWY_CAP_GE256
+  Store(InterleaveLower(df, v0, v1), df, mem);
+  Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
+#else
+  if (!HWY_CAP_GE512 || Lanes(df) == 8) {
+    auto t0 = InterleaveLower(df, v0, v1);
+    auto t1 = InterleaveUpper(df, v0, v1);
+    Store(ConcatLowerLower(df, t1, t0), df, mem);
+    Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
+  } else {
+#if HWY_CAP_GE512
+    auto t0 = InterleaveLower(df, v0, v1);
+    auto t1 = InterleaveUpper(df, v0, v1);
+    Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
+                  Quarter<1>(df, t0), Quarter<1>(df, t1)),
+          df, mem);
+    Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
+                  Quarter<3>(df, t0), Quarter<3>(df, t1)),
+          df, mem + Lanes(df));
+#endif
+  }
+#endif
+}
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+                         float* JXL_RESTRICT scratch_space, size_t len_out) {
+  HWY_FULL(float) df;
+  auto threefour = Set(df, 0.75f);
+  auto onefour = Set(df, 0.25f);
+  const size_t len_in = (len_out + 1) >> 1;
+  memcpy(scratch_space, row, len_in * sizeof(row[0]));
+  scratch_space[-1] = scratch_space[0];
+  scratch_space[len_in] = scratch_space[len_in - 1];
+  for (size_t x = 0; x < len_in; x += Lanes(df)) {
+    auto current = Mul(Load(df, scratch_space + x), threefour);
+    auto prev = LoadU(df, scratch_space + x - 1);
+    auto next = LoadU(df, scratch_space + x + 1);
+    auto left = MulAdd(onefour, prev, current);
+    auto right = MulAdd(onefour, next, current);
+    StoreInterleaved(df, left, right, row + x * 2);
+  }
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+                       const float* JXL_RESTRICT row_mid,
+                       const float* JXL_RESTRICT row_bot,
+                       float* JXL_RESTRICT row_out0,
+                       float* JXL_RESTRICT row_out1, size_t len) {
+  HWY_FULL(float) df;
+  auto threefour = Set(df, 0.75f);
+  auto onefour = Set(df, 0.25f);
+  for (size_t x = 0; x < len; x += Lanes(df)) {
+    auto it = Load(df, row_top + x);
+    auto im = Load(df, row_mid + x);
+    auto ib = Load(df, row_bot + x);
+    auto im_scaled = Mul(im, threefour);
+    Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
+    Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Upsample2Horizontal);
+HWY_EXPORT(Upsample2Vertical);
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+                         float* JXL_RESTRICT scratch_space, size_t len_out) {
+  return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out);
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+                       const float* JXL_RESTRICT row_mid,
+                       const float* JXL_RESTRICT row_bot,
+                       float* JXL_RESTRICT row_out0,
+                       float* JXL_RESTRICT row_out1, size_t len) {
+  return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot,
+                                                 row_out0, row_out1, len);
+}
+}  // namespace jpegli
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jpegli/upsample.h b/third-party/libjxl/libjxl/lib/jpegli/upsample.h
new file mode 100644
index 0000000000..1a057208dc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jpegli/upsample.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_UPSAMPLE_H_
+#define LIB_JPEGLI_UPSAMPLE_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+                         float* JXL_RESTRICT scratch_space, size_t len_out);
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+                       const float* JXL_RESTRICT row_mid,
+                       const float* JXL_RESTRICT row_bot,
+                       float* JXL_RESTRICT row_out0,
+                       float* JXL_RESTRICT row_out1, size_t len);
+
+}  // namespace jpegli
+
+#endif  // LIB_JPEGLI_UPSAMPLE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl.cmake b/third-party/libjxl/libjxl/lib/jxl.cmake
new file mode 100644
index 0000000000..2464383288
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl.cmake
@@ -0,0 +1,325 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+if (JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS OR JPEGXL_ENABLE_BOXES)
+list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_BOX_SOURCES})
+endif()
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS)
+list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_JPEG_SOURCES})
+endif()
+
+set_source_files_properties(jxl/enc_fast_lossless.cc PROPERTIES COMPILE_FLAGS -O3)
+
+set(JPEGXL_DEC_INTERNAL_LIBS
+  hwy
+  Threads::Threads
+  ${ATOMICS_LIBRARIES}
+)
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_BOXES)
+list(APPEND JPEGXL_DEC_INTERNAL_LIBS brotlidec brotlicommon)
+endif()
+
+set(JPEGXL_INTERNAL_LIBS
+  ${JPEGXL_DEC_INTERNAL_LIBS}
+  brotlienc
+)
+
+if (JPEGXL_ENABLE_SKCMS)
+  list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_SKCMS=1)
+  if (JPEGXL_BUNDLE_SKCMS)
+    list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_BUNDLE_SKCMS=1)
+    # skcms objects are later added to JPEGXL_INTERNAL_OBJECTS
+  else ()
+    list(APPEND JPEGXL_INTERNAL_LIBS skcms)
+  endif ()
+else ()
+  list(APPEND JPEGXL_INTERNAL_LIBS lcms2)
+endif ()
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG)
+  list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=1)
+else()
+  list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=0)
+endif ()
+
+if (JPEGXL_ENABLE_BOXES)
+  list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=1)
+else()
+  list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=0)
+endif ()
+
+set(OBJ_COMPILE_DEFINITIONS
+  JPEGXL_MAJOR_VERSION=${JPEGXL_MAJOR_VERSION}
+  JPEGXL_MINOR_VERSION=${JPEGXL_MINOR_VERSION}
+  JPEGXL_PATCH_VERSION=${JPEGXL_PATCH_VERSION}
+  # Used to determine if we are building the library when defined or just
+  # including the library when not defined. This is public so libjxl shared
+  # library gets this define too.
+  JXL_INTERNAL_LIBRARY_BUILD
+)
+
+# Generate version.h
+configure_file("jxl/version.h.in" "include/jxl/version.h")
+
+# Headers for exporting/importing public headers
+include(GenerateExportHeader)
+
+# CMake does not allow generate_export_header for INTERFACE library, so we
+# add this stub library just for file generation.
+add_library(jxl_export OBJECT ${JPEGXL_INTERNAL_PUBLIC_HEADERS})
+set_target_properties(jxl_export PROPERTIES
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN 1
+  DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+  LINKER_LANGUAGE CXX
+)
+generate_export_header(jxl_export
+  BASE_NAME JXL
+  EXPORT_FILE_NAME include/jxl/jxl_export.h)
+# Place all public headers in a single directory.
+foreach(path ${JPEGXL_INTERNAL_PUBLIC_HEADERS})
+  configure_file(
+    ${path}
+    ${path}
+    COPYONLY
+  )
+endforeach()
+
+add_library(jxl_includes INTERFACE)
+target_include_directories(jxl_includes SYSTEM INTERFACE
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+)
+add_dependencies(jxl_includes jxl_export)
+
+# Base headers / utilities.
+add_library(jxl_base-obj OBJECT ${JPEGXL_INTERNAL_BASE_SOURCES})
+target_compile_options(jxl_base-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_base-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_base-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_base-obj PUBLIC
+  ${PROJECT_SOURCE_DIR}
+  ${JXL_HWY_INCLUDE_DIRS}
+)
+
+jxl_link_libraries(jxl_base-obj jxl_includes)
+
+# Decoder-only object library
+add_library(jxl_dec-obj OBJECT ${JPEGXL_INTERNAL_DEC_SOURCES})
+target_compile_options(jxl_dec-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_dec-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_dec-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_dec-obj PUBLIC
+  "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>"
+  "${JXL_HWY_INCLUDE_DIRS}"
+  "$<BUILD_INTERFACE:$<TARGET_PROPERTY:brotlicommon,INTERFACE_INCLUDE_DIRECTORIES>>"
+)
+target_compile_definitions(jxl_dec-obj PUBLIC
+  ${OBJ_COMPILE_DEFINITIONS}
+)
+jxl_link_libraries(jxl_dec-obj jxl_base-obj)
+
+# Object library. This is used to hold the set of objects and properties.
+add_library(jxl_enc-obj OBJECT ${JPEGXL_INTERNAL_ENC_SOURCES})
+target_compile_options(jxl_enc-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_enc-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_enc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_enc-obj PUBLIC
+  ${PROJECT_SOURCE_DIR}
+  ${JXL_HWY_INCLUDE_DIRS}
+  $<TARGET_PROPERTY:brotlicommon,INTERFACE_INCLUDE_DIRECTORIES>
+)
+target_compile_definitions(jxl_enc-obj PUBLIC
+  ${OBJ_COMPILE_DEFINITIONS}
+)
+jxl_link_libraries(jxl_enc-obj jxl_base-obj)
+
+#TODO(lode): don't depend on CMS for the core library
+if (JPEGXL_ENABLE_SKCMS)
+  target_include_directories(jxl_enc-obj PRIVATE
+    $<TARGET_PROPERTY:skcms,INCLUDE_DIRECTORIES>
+  )
+else ()
+  target_include_directories(jxl_enc-obj PRIVATE
+    $<TARGET_PROPERTY:lcms2,INCLUDE_DIRECTORIES>
+  )
+endif ()
+
+set_target_properties(jxl_dec-obj PROPERTIES
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN 1
+  DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+)
+
+set_target_properties(jxl_enc-obj PROPERTIES
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN 1
+  DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+)
+
+# Private static library. This exposes all the internal functions and is used
+# for tests.
+add_library(jxl_dec-static STATIC
+  $<TARGET_OBJECTS:jxl_base-obj>
+  $<TARGET_OBJECTS:jxl_dec-obj>
+)
+target_link_libraries(jxl_dec-static
+  PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_DEC_INTERNAL_LIBS} jxl_includes)
+
+# The list of objects in the static and shared libraries.
+set(JPEGXL_INTERNAL_OBJECTS
+  $<TARGET_OBJECTS:jxl_base-obj>
+  $<TARGET_OBJECTS:jxl_enc-obj>
+  $<TARGET_OBJECTS:jxl_dec-obj>
+)
+if (JPEGXL_ENABLE_SKCMS AND JPEGXL_BUNDLE_SKCMS)
+  list(APPEND JPEGXL_INTERNAL_OBJECTS $<TARGET_OBJECTS:skcms-obj>)
+endif()
+
+# Private static library. This exposes all the internal functions and is used
+# for tests.
+# TODO(lode): once the source files are correctly split so that it is possible
+# to do, remove $<TARGET_OBJECTS:jxl_dec-obj> here and depend on jxl_dec-static
+add_library(jxl-static STATIC ${JPEGXL_INTERNAL_OBJECTS})
+target_link_libraries(jxl-static
+  PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_INTERNAL_LIBS} jxl_includes)
+target_include_directories(jxl-static PUBLIC
+  "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>")
+
+# JXL_EXPORT is defined to "__declspec(dllimport)" automatically by CMake
+# in Windows builds when including headers from the C API and compiling from
+# outside the jxl library. This is required when using the shared library,
+# however in windows this causes the function to not be found when linking
+# against the static library. This define JXL_EXPORT= here forces it to not
+# use dllimport in tests and other tools that require the static library.
+target_compile_definitions(jxl-static INTERFACE -DJXL_EXPORT=)
+target_compile_definitions(jxl_dec-static INTERFACE -DJXL_EXPORT=)
+
+# TODO(deymo): Move TCMalloc linkage to the tools/ directory since the library
+# shouldn't do any allocs anyway.
+if(JPEGXL_ENABLE_TCMALLOC)
+  pkg_check_modules(TCMallocMinimal REQUIRED IMPORTED_TARGET
+      libtcmalloc_minimal)
+  # tcmalloc 2.8 has concurrency issues that makes it sometimes return nullptr
+  # for large allocs. See https://github.com/gperftools/gperftools/issues/1204
+  # for details.
+  if(TCMallocMinimal_VERSION VERSION_EQUAL 2.8)
+    message(FATAL_ERROR
+        "tcmalloc version 2.8 has a concurrency bug. You have installed "
+        "version ${TCMallocMinimal_VERSION}, please either downgrade tcmalloc "
+        "to version 2.7, upgrade to 2.8.1 or newer or pass "
+        "-DJPEGXL_ENABLE_TCMALLOC=OFF to jpeg-xl cmake line. See the following "
+        "bug for details:\n"
+        "   https://github.com/gperftools/gperftools/issues/1204\n")
+  endif()
+  target_link_libraries(jxl-static PUBLIC PkgConfig::TCMallocMinimal)
+endif()  # JPEGXL_ENABLE_TCMALLOC
+
+# Install the static library too, but as jxl.a file without the -static except
+# in Windows.
+if (NOT WIN32 OR MINGW)
+  set_target_properties(jxl-static PROPERTIES OUTPUT_NAME "jxl")
+  set_target_properties(jxl_dec-static PROPERTIES OUTPUT_NAME "jxl_dec")
+endif()
+install(TARGETS jxl-static DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(TARGETS jxl_dec-static DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+if (BUILD_SHARED_LIBS)
+
+# Public shared library.
+add_library(jxl SHARED ${JPEGXL_INTERNAL_OBJECTS})
+strip_static(JPEGXL_INTERNAL_SHARED_LIBS JPEGXL_INTERNAL_LIBS)
+target_link_libraries(jxl PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes)
+target_link_libraries(jxl PRIVATE ${JPEGXL_INTERNAL_SHARED_LIBS})
+# Shared library include path contains only the "include/" paths.
+set_target_properties(jxl PROPERTIES
+  VERSION ${JPEGXL_LIBRARY_VERSION}
+  SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+# Public shared decoder library.
+add_library(jxl_dec SHARED $<TARGET_OBJECTS:jxl_base-obj> $<TARGET_OBJECTS:jxl_dec-obj>)
+strip_static(JPEGXL_DEC_INTERNAL_SHARED_LIBS JPEGXL_DEC_INTERNAL_LIBS)
+target_link_libraries(jxl_dec PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes)
+target_link_libraries(jxl_dec PRIVATE ${JPEGXL_DEC_INTERNAL_SHARED_LIBS})
+# Shared library include path contains only the "include/" paths.
+set_target_properties(jxl_dec PROPERTIES
+  VERSION ${JPEGXL_LIBRARY_VERSION}
+  SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+# Check whether the linker support excluding libs
+set(LINKER_EXCLUDE_LIBS_FLAG "-Wl,--exclude-libs=ALL")
+include(CheckCSourceCompiles)
+list(APPEND CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG})
+check_c_source_compiles("int main(){return 0;}" LINKER_SUPPORT_EXCLUDE_LIBS)
+list(REMOVE_ITEM CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG})
+
+# Add a jxl.version file as a version script to tag symbols with the
+# appropriate version number. This script is also used to limit what's exposed
+# in the shared library from the static dependencies bundled here.
+foreach(target IN ITEMS jxl jxl_dec)
+  set_target_properties(${target} PROPERTIES
+      LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version)
+  if(APPLE)
+  set_property(TARGET ${target} APPEND_STRING PROPERTY
+      LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms")
+  elseif(WIN32)
+    # Nothing needed here, we use __declspec(dllexport) (jxl_export.h)
+  else()
+  set_property(TARGET ${target} APPEND_STRING PROPERTY
+      LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version")
+  endif()  # APPLE
+  # This hides the default visibility symbols from static libraries bundled into
+  # the shared library. In particular this prevents exposing symbols from hwy
+  # and skcms in the shared library.
+  if(LINKER_SUPPORT_EXCLUDE_LIBS)
+    set_property(TARGET ${target} APPEND_STRING PROPERTY
+        LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}")
+  endif()
+endforeach()
+
+# Only install libjxl shared library. The libjxl_dec is not installed since it
+# contains symbols also in libjxl which would conflict if programs try to use
+# both.
+install(TARGETS jxl
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+else()
+add_library(jxl ALIAS jxl-static)
+add_library(jxl_dec ALIAS jxl_dec-static)
+endif()  # BUILD_SHARED_LIBS
+
+# Add a pkg-config file for libjxl.
+set(JPEGXL_LIBRARY_REQUIRES
+    "libhwy libbrotlienc libbrotlidec")
+if(NOT JPEGXL_ENABLE_SKCMS)
+  set(JPEGXL_LIBRARY_REQUIRES "${JPEGXL_LIBRARY_REQUIRES} lcms2")
+endif()
+
+# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}")
+    set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}")
+else()
+    set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
+endif()
+# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}")
+    set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}")
+else()
+    set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+endif()
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/jxl/libjxl.pc.in"
+               "libjxl.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl.pc"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_context.h b/third-party/libjxl/libjxl/lib/jxl/ac_context.h
new file mode 100644
index 0000000000..a2b9e046d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ac_context.h
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_CONTEXT_H_
+#define LIB_JXL_AC_CONTEXT_H_
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+
+namespace jxl {
+
+// Block context used for scanning order, number of non-zeros, AC coefficients.
+// Equal to the channel.
+constexpr uint32_t kDCTOrderContextStart = 0;
+
+// The number of predicted nonzeros goes from 0 to 1008. We use
+// ceil(log2(predicted+1)) as a context for the number of nonzeros, so from 0 to
+// 10, inclusive.
+constexpr uint32_t kNonZeroBuckets = 37;
+
+static const uint16_t kCoeffFreqContext[64] = {
+    0xBAD, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+    15,    15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+    23,    23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26,
+    27,    27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
+};
+
+static const uint16_t kCoeffNumNonzeroContext[64] = {
+    0xBAD, 0,   31,  62,  62,  93,  93,  93,  93,  123, 123, 123, 123,
+    152,   152, 152, 152, 152, 152, 152, 152, 180, 180, 180, 180, 180,
+    180,   180, 180, 180, 180, 180, 180, 206, 206, 206, 206, 206, 206,
+    206,   206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+    206,   206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+};
+
+// Supremum of ZeroDensityContext(x, y) + 1, when x + y < 64.
+constexpr int kZeroDensityContextCount = 458;
+// Supremum of ZeroDensityContext(x, y) + 1.
+constexpr int kZeroDensityContextLimit = 474;
+
+/* This function is used for entropy-sources pre-clustering.
+ *
+ * Ideally, each combination of |nonzeros_left| and |k| should go to its own
+ * bucket; but it implies (64 * 63 / 2) == 2016 buckets. If there is other
+ * dimension (e.g. block context), then number of primary clusters becomes too
+ * big.
+ *
+ * To solve this problem, |nonzeros_left| and |k| values are clustered. It is
+ * known that their sum is at most 64, consequently, the total number buckets
+ * is at most A(64) * B(64).
+ */
+// TODO(user): investigate, why disabling pre-clustering makes entropy code
+// less dense. Perhaps we would need to add HQ clustering algorithm that would
+// be able to squeeze better by spending more CPU cycles.
+static JXL_INLINE size_t ZeroDensityContext(size_t nonzeros_left, size_t k,
+                                            size_t covered_blocks,
+                                            size_t log2_covered_blocks,
+                                            size_t prev) {
+  JXL_DASSERT((1u << log2_covered_blocks) == covered_blocks);
+  nonzeros_left = (nonzeros_left + covered_blocks - 1) >> log2_covered_blocks;
+  k >>= log2_covered_blocks;
+  JXL_DASSERT(k > 0);
+  JXL_DASSERT(k < 64);
+  JXL_DASSERT(nonzeros_left > 0);
+  // Asserting nonzeros_left + k < 65 here causes crashes in debug mode with
+  // invalid input, since the (hot) decoding loop does not check this condition.
+  // As no out-of-bound memory reads are issued even if that condition is
+  // broken, we check this simpler condition which holds anyway. The decoder
+  // will still mark a file in which that condition happens as not valid at the
+  // end of the decoding loop, as `nzeros` will not be `0`.
+  JXL_DASSERT(nonzeros_left < 64);
+  return (kCoeffNumNonzeroContext[nonzeros_left] + kCoeffFreqContext[k]) * 2 +
+         prev;
+}
+
+struct BlockCtxMap {
+  std::vector<int> dc_thresholds[3];
+  std::vector<uint32_t> qf_thresholds;
+  std::vector<uint8_t> ctx_map;
+  size_t num_ctxs, num_dc_ctxs;
+
+  static constexpr uint8_t kDefaultCtxMap[] = {
+      // Default ctx map clusters all the large transforms together.
+      0, 1, 2, 2, 3,  3,  4,  5,  6,  6,  6,  6,  6,   //
+      7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14,  //
+      7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14,  //
+  };
+  static_assert(3 * kNumOrders ==
+                    sizeof(kDefaultCtxMap) / sizeof *kDefaultCtxMap,
+                "Update default context map");
+
+  size_t Context(int dc_idx, uint32_t qf, size_t ord, size_t c) const {
+    size_t qf_idx = 0;
+    for (uint32_t t : qf_thresholds) {
+      if (qf > t) qf_idx++;
+    }
+    size_t idx = c < 2 ? c ^ 1 : 2;
+    idx = idx * kNumOrders + ord;
+    idx = idx * (qf_thresholds.size() + 1) + qf_idx;
+    idx = idx * num_dc_ctxs + dc_idx;
+    return ctx_map[idx];
+  }
+  // Non-zero context is based on number of non-zeros and block context.
+  // For better clustering, contexts with same number of non-zeros are grouped.
+  constexpr uint32_t ZeroDensityContextsOffset(uint32_t block_ctx) const {
+    return num_ctxs * kNonZeroBuckets + kZeroDensityContextCount * block_ctx;
+  }
+
+  // Context map for AC coefficients consists of 2 blocks:
+  //  |num_ctxs x                : context for number of non-zeros in the block
+  //   kNonZeroBuckets|            computed from block context and predicted
+  //                               value (based top and left values)
+  //  |num_ctxs x                : context for AC coefficient symbols,
+  //   kZeroDensityContextCount|   computed from block context,
+  //                               number of non-zeros left and
+  //                               index in scan order
+  constexpr uint32_t NumACContexts() const {
+    return num_ctxs * (kNonZeroBuckets + kZeroDensityContextCount);
+  }
+
+  // Non-zero context is based on number of non-zeros and block context.
+  // For better clustering, contexts with same number of non-zeros are grouped.
+  inline uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) const {
+    uint32_t ctx;
+    if (non_zeros >= 64) non_zeros = 64;
+    if (non_zeros < 8) {
+      ctx = non_zeros;
+    } else {
+      ctx = 4 + non_zeros / 2;
+    }
+    return ctx * num_ctxs + block_ctx;
+  }
+
+  BlockCtxMap() {
+    ctx_map.assign(std::begin(kDefaultCtxMap), std::end(kDefaultCtxMap));
+    num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+    num_dc_ctxs = 1;
+  }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AC_CONTEXT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc
new file mode 100644
index 0000000000..3dda5df7a6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>  // iota
+#include <type_traits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+// Tries to generalize zig-zag order to non-square blocks. Surprisingly, in
+// square block frequency along the (i + j == const) diagonals is roughly the
+// same. For historical reasons, consecutive diagonals are traversed
+// in alternating directions - so called "zig-zag" (or "snake") order.
+template <bool is_lut>
+static void CoeffOrderAndLut(AcStrategy acs, coeff_order_t* out) {
+  size_t cx = acs.covered_blocks_x();
+  size_t cy = acs.covered_blocks_y();
+  CoefficientLayout(&cy, &cx);
+
+  // CoefficientLayout ensures cx >= cy.
+  // We compute the zigzag order for a cx x cx block, then discard all the
+  // lines that are not multiple of the ratio between cx and cy.
+  size_t xs = cx / cy;
+  size_t xsm = xs - 1;
+  size_t xss = CeilLog2Nonzero(xs);
+  // First half of the block
+  size_t cur = cx * cy;
+  for (size_t i = 0; i < cx * kBlockDim; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      size_t x = j;
+      size_t y = i - j;
+      if (i % 2) std::swap(x, y);
+      if ((y & xsm) != 0) continue;
+      y >>= xss;
+      size_t val = 0;
+      if (x < cx && y < cy) {
+        val = y * cx + x;
+      } else {
+        val = cur++;
+      }
+      if (is_lut) {
+        out[y * cx * kBlockDim + x] = val;
+      } else {
+        out[val] = y * cx * kBlockDim + x;
+      }
+    }
+  }
+  // Second half
+  for (size_t ip = cx * kBlockDim - 1; ip > 0; ip--) {
+    size_t i = ip - 1;
+    for (size_t j = 0; j <= i; j++) {
+      size_t x = cx * kBlockDim - 1 - (i - j);
+      size_t y = cx * kBlockDim - 1 - j;
+      if (i % 2) std::swap(x, y);
+      if ((y & xsm) != 0) continue;
+      y >>= xss;
+      size_t val = cur++;
+      if (is_lut) {
+        out[y * cx * kBlockDim + x] = val;
+      } else {
+        out[val] = y * cx * kBlockDim + x;
+      }
+    }
+  }
+}
+
+void AcStrategy::ComputeNaturalCoeffOrder(coeff_order_t* order) const {
+  CoeffOrderAndLut</*is_lut=*/false>(*this, order);
+}
+void AcStrategy::ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const {
+  CoeffOrderAndLut</*is_lut=*/true>(*this, lut);
+}
+
+// These definitions are needed before C++17.
+constexpr size_t AcStrategy::kMaxCoeffBlocks;
+constexpr size_t AcStrategy::kMaxBlockDim;
+constexpr size_t AcStrategy::kMaxCoeffArea;
+
+AcStrategyImage::AcStrategyImage(size_t xsize, size_t ysize)
+    : layers_(xsize, ysize) {
+  row_ = layers_.Row(0);
+  stride_ = layers_.PixelsPerRow();
+}
+
+size_t AcStrategyImage::CountBlocks(AcStrategy::Type type) const {
+  size_t ret = 0;
+  for (size_t y = 0; y < layers_.ysize(); y++) {
+    const uint8_t* JXL_RESTRICT row = layers_.ConstRow(y);
+    for (size_t x = 0; x < layers_.xsize(); x++) {
+      if (row[x] == ((static_cast<uint8_t>(type) << 1) | 1)) ret++;
+    }
+  }
+  return ret;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h
new file mode 100644
index 0000000000..7d21167e6e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_STRATEGY_H_
+#define LIB_JXL_AC_STRATEGY_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/base.h>  // kMaxVectorSize
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+// Defines the different kinds of transforms, and heuristics to choose between
+// them.
+// `AcStrategy` represents what transform should be used, and which sub-block of
+// that transform we are currently in. Note that DCT4x4 is applied on all four
+// 4x4 sub-blocks of an 8x8 block.
+// `AcStrategyImage` defines which strategy should be used for each 8x8 block
+// of the image. The highest 4 bits represent the strategy to be used, the
+// lowest 4 represent the index of the block inside that strategy.
+
+namespace jxl {
+
+class AcStrategy {
+ public:
+  // Extremal values for the number of blocks/coefficients of a single strategy.
+  static constexpr size_t kMaxCoeffBlocks = 32;
+  static constexpr size_t kMaxBlockDim = kBlockDim * kMaxCoeffBlocks;
+  // Maximum number of coefficients in a block. Guaranteed to be a multiple of
+  // the vector size.
+  static constexpr size_t kMaxCoeffArea = kMaxBlockDim * kMaxBlockDim;
+  static_assert((kMaxCoeffArea * sizeof(float)) % hwy::kMaxVectorSize == 0,
+                "Coefficient area is not a multiple of vector size");
+
+  // Raw strategy types.
+  enum Type : uint32_t {
+    // Regular block size DCT
+    DCT = 0,
+    // Encode pixels without transforming
+    IDENTITY = 1,
+    // Use 2-by-2 DCT
+    DCT2X2 = 2,
+    // Use 4-by-4 DCT
+    DCT4X4 = 3,
+    // Use 16-by-16 DCT
+    DCT16X16 = 4,
+    // Use 32-by-32 DCT
+    DCT32X32 = 5,
+    // Use 16-by-8 DCT
+    DCT16X8 = 6,
+    // Use 8-by-16 DCT
+    DCT8X16 = 7,
+    // Use 32-by-8 DCT
+    DCT32X8 = 8,
+    // Use 8-by-32 DCT
+    DCT8X32 = 9,
+    // Use 32-by-16 DCT
+    DCT32X16 = 10,
+    // Use 16-by-32 DCT
+    DCT16X32 = 11,
+    // 4x8 and 8x4 DCT
+    DCT4X8 = 12,
+    DCT8X4 = 13,
+    // Corner-DCT.
+    AFV0 = 14,
+    AFV1 = 15,
+    AFV2 = 16,
+    AFV3 = 17,
+    // Larger DCTs
+    DCT64X64 = 18,
+    DCT64X32 = 19,
+    DCT32X64 = 20,
+    DCT128X128 = 21,
+    DCT128X64 = 22,
+    DCT64X128 = 23,
+    DCT256X256 = 24,
+    DCT256X128 = 25,
+    DCT128X256 = 26,
+    // Marker for num of valid strategies.
+    kNumValidStrategies
+  };
+
+  static constexpr uint32_t TypeBit(const Type type) {
+    return 1u << static_cast<uint32_t>(type);
+  }
+
+  // Returns true if this block is the first 8x8 block (i.e. top-left) of a
+  // possibly multi-block strategy.
+  JXL_INLINE bool IsFirstBlock() const { return is_first_; }
+
+  JXL_INLINE bool IsMultiblock() const {
+    constexpr uint32_t bits =
+        TypeBit(Type::DCT16X16) | TypeBit(Type::DCT32X32) |
+        TypeBit(Type::DCT16X8) | TypeBit(Type::DCT8X16) |
+        TypeBit(Type::DCT32X8) | TypeBit(Type::DCT8X32) |
+        TypeBit(Type::DCT16X32) | TypeBit(Type::DCT32X16) |
+        TypeBit(Type::DCT32X64) | TypeBit(Type::DCT64X32) |
+        TypeBit(Type::DCT64X64) | TypeBit(DCT64X128) | TypeBit(DCT128X64) |
+        TypeBit(DCT128X128) | TypeBit(DCT128X256) | TypeBit(DCT256X128) |
+        TypeBit(DCT256X256);
+    JXL_DASSERT(Strategy() < kNumValidStrategies);
+    return ((1u << static_cast<uint32_t>(Strategy())) & bits) != 0;
+  }
+
+  // Returns the raw strategy value. Should only be used for tokenization.
+  JXL_INLINE uint8_t RawStrategy() const {
+    return static_cast<uint8_t>(strategy_);
+  }
+
+  JXL_INLINE Type Strategy() const { return strategy_; }
+
+  // Inverse check
+  static JXL_INLINE constexpr bool IsRawStrategyValid(int raw_strategy) {
+    return raw_strategy < static_cast<int32_t>(kNumValidStrategies) &&
+           raw_strategy >= 0;
+  }
+  static JXL_INLINE AcStrategy FromRawStrategy(uint8_t raw_strategy) {
+    return FromRawStrategy(static_cast<Type>(raw_strategy));
+  }
+  static JXL_INLINE AcStrategy FromRawStrategy(Type raw_strategy) {
+    JXL_DASSERT(IsRawStrategyValid(static_cast<uint32_t>(raw_strategy)));
+    return AcStrategy(raw_strategy, /*is_first=*/true);
+  }
+
+  // "Natural order" means the order of increasing of "anisotropic" frequency of
+  // continuous version of DCT basis.
+  // Round-trip, for any given strategy s:
+  //  X = NaturalCoeffOrder(s)[NaturalCoeffOrderLutN(s)[X]]
+  //  X = NaturalCoeffOrderLut(s)[NaturalCoeffOrderN(s)[X]]
+  void ComputeNaturalCoeffOrder(coeff_order_t* order) const;
+  void ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const;
+
+  // Number of 8x8 blocks that this strategy will cover. 0 for non-top-left
+  // blocks inside a multi-block transform.
+  JXL_INLINE size_t covered_blocks_x() const {
+    static constexpr uint8_t kLut[] = {1, 1, 1, 1,  2, 4,  1,  2,  1,
+                                       4, 2, 4, 1,  1, 1,  1,  1,  1,
+                                       8, 4, 8, 16, 8, 16, 32, 16, 32};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+  JXL_INLINE size_t covered_blocks_y() const {
+    static constexpr uint8_t kLut[] = {1, 1, 1, 1,  2,  4, 2,  1,  4,
+                                       1, 4, 2, 1,  1,  1, 1,  1,  1,
+                                       8, 8, 4, 16, 16, 8, 32, 32, 16};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+  JXL_INLINE size_t log2_covered_blocks() const {
+    static constexpr uint8_t kLut[] = {0, 0, 0, 0, 2, 4, 1,  1, 2,
+                                       2, 3, 3, 0, 0, 0, 0,  0, 0,
+                                       6, 5, 5, 8, 7, 7, 10, 9, 9};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+ private:
+  friend class AcStrategyRow;
+  JXL_INLINE AcStrategy(Type strategy, bool is_first)
+      : strategy_(strategy), is_first_(is_first) {
+    JXL_DASSERT(IsMultiblock() || is_first == true);
+  }
+
+  Type strategy_;
+  bool is_first_;
+};
+
+// Class to use a certain row of the AC strategy.
+class AcStrategyRow {
+ public:
+  explicit AcStrategyRow(const uint8_t* row) : row_(row) {}
+  AcStrategy operator[](size_t x) const {
+    return AcStrategy(static_cast<AcStrategy::Type>(row_[x] >> 1), row_[x] & 1);
+  }
+
+ private:
+  const uint8_t* JXL_RESTRICT row_;
+};
+
+class AcStrategyImage {
+ public:
+  AcStrategyImage() = default;
+  AcStrategyImage(size_t xsize, size_t ysize);
+  AcStrategyImage(AcStrategyImage&&) = default;
+  AcStrategyImage& operator=(AcStrategyImage&&) = default;
+
+  void FillDCT8(const Rect& rect) {
+    FillPlane<uint8_t>((static_cast<uint8_t>(AcStrategy::Type::DCT) << 1) | 1,
+                       &layers_, rect);
+  }
+  void FillDCT8() { FillDCT8(Rect(layers_)); }
+
+  void FillInvalid() { FillImage(INVALID, &layers_); }
+
+  void Set(size_t x, size_t y, AcStrategy::Type type) {
+#if JXL_ENABLE_ASSERT
+    AcStrategy acs = AcStrategy::FromRawStrategy(type);
+#endif  // JXL_ENABLE_ASSERT
+    JXL_ASSERT(y + acs.covered_blocks_y() <= layers_.ysize());
+    JXL_ASSERT(x + acs.covered_blocks_x() <= layers_.xsize());
+    JXL_CHECK(SetNoBoundsCheck(x, y, type, /*check=*/false));
+  }
+
+  Status SetNoBoundsCheck(size_t x, size_t y, AcStrategy::Type type,
+                          bool check = true) {
+    AcStrategy acs = AcStrategy::FromRawStrategy(type);
+    for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+      for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+        size_t pos = (y + iy) * stride_ + x + ix;
+        if (check && row_[pos] != INVALID) {
+          return JXL_FAILURE("Invalid AC strategy: block overlap");
+        }
+        row_[pos] =
+            (static_cast<uint8_t>(type) << 1) | ((iy | ix) == 0 ? 1 : 0);
+      }
+    }
+    return true;
+  }
+
+  bool IsValid(size_t x, size_t y) { return row_[y * stride_ + x] != INVALID; }
+
+  AcStrategyRow ConstRow(size_t y, size_t x_prefix = 0) const {
+    return AcStrategyRow(layers_.ConstRow(y) + x_prefix);
+  }
+
+  AcStrategyRow ConstRow(const Rect& rect, size_t y) const {
+    return ConstRow(rect.y0() + y, rect.x0());
+  }
+
+  size_t PixelsPerRow() const { return layers_.PixelsPerRow(); }
+
+  size_t xsize() const { return layers_.xsize(); }
+  size_t ysize() const { return layers_.ysize(); }
+
+  // Count the number of blocks of a given type.
+  size_t CountBlocks(AcStrategy::Type type) const;
+
+ private:
+  ImageB layers_;
+  uint8_t* JXL_RESTRICT row_;
+  size_t stride_;
+
+  // A value that does not represent a valid combined AC strategy
+  // value. Used as a sentinel.
+  static constexpr uint8_t INVALID = 0xFF;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AC_STRATEGY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc b/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc
new file mode 100644
index 0000000000..5b46c697f3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc
@@ -0,0 +1,239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <utility>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+// Test that DCT -> IDCT is a noop.
+class AcStrategyRoundtrip : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+    float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+    Rng rng(type * 65537 + 13);
+
+    for (size_t j = 0; j < 64; j++) {
+      size_t i = (acs.log2_covered_blocks()
+                      ? rng.UniformU(0, 64u << acs.log2_covered_blocks())
+                      : j);
+      float* input = idct + AcStrategy::kMaxCoeffArea;
+      std::fill_n(input, AcStrategy::kMaxCoeffArea, 0);
+      input[i] = 0.2f;
+      TransformFromPixels(type, input, acs.covered_blocks_x() * 8, coeffs,
+                          scratch_space);
+      ASSERT_NEAR(coeffs[0], 0.2 / (64 << acs.log2_covered_blocks()), 1e-6)
+          << " i = " << i;
+      TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                        scratch_space);
+      for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+        ASSERT_NEAR(idct[j], j == i ? 0.2f : 0, 2e-6)
+            << "j = " << j << " i = " << i << " acs " << type;
+      }
+    }
+    // Test DC.
+    std::fill_n(idct, AcStrategy::kMaxCoeffArea, 0);
+    for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+      for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+        float* dc = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+        LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs,
+                                scratch_space);
+        DCFromLowestFrequencies(type, coeffs, idct, acs.covered_blocks_x() * 8);
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+        for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+          ASSERT_NEAR(idct[j], dc[j], 1e-6)
+              << "j = " << j << " x = " << x << " y = " << y << " acs " << type;
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyRoundtrip,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtrip, Test) { Run(); }
+
+// Test that DC(2x2) -> DCT coefficients -> IDCT -> downsampled IDCT is a noop.
+class AcStrategyRoundtripDownsample
+    : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+    std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+    float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+    Rng rng(type * 65537 + 13);
+
+    for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+      for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+        if (x > 4 || y > 4) {
+          if (rng.Bernoulli(0.9f)) continue;
+        }
+        float* dc = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+        LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs,
+                                scratch_space);
+        TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                          scratch_space);
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+        // Downsample
+        for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+          for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+            float sum = 0;
+            for (size_t iy = 0; iy < 8; iy++) {
+              for (size_t ix = 0; ix < 8; ix++) {
+                sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+                            dx * 8 + ix];
+              }
+            }
+            sum /= 64.0f;
+            ASSERT_NEAR(sum, dc[dy * 8 * acs.covered_blocks_x() + dx], 1e-6)
+                << "acs " << type;
+          }
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyRoundtripDownsample,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtripDownsample, Test) { Run(); }
+
+// Test that IDCT(block with zeros in the non-topleft corner) -> downsampled
+// IDCT is the same as IDCT -> DC(2x2) of the same block.
+class AcStrategyDownsample : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+    size_t cx = acs.covered_blocks_y();
+    size_t cy = acs.covered_blocks_x();
+    CoefficientLayout(&cy, &cx);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* idct = scratch_space + AcStrategy::kMaxCoeffArea;
+    float* idct_acs_downsampled = idct + AcStrategy::kMaxCoeffArea;
+    Rng rng(type * 65537 + 13);
+
+    for (size_t y = 0; y < cy; y++) {
+      for (size_t x = 0; x < cx; x++) {
+        if (x > 4 || y > 4) {
+          if (rng.Bernoulli(0.9f)) continue;
+        }
+        float* coeffs = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+        coeffs[y * cx * 8 + x] = 0.2f;
+        TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                          scratch_space);
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+        coeffs[y * cx * 8 + x] = 0.2f;
+        DCFromLowestFrequencies(type, coeffs, idct_acs_downsampled,
+                                acs.covered_blocks_x() * 8);
+        // Downsample
+        for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+          for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+            float sum = 0;
+            for (size_t iy = 0; iy < 8; iy++) {
+              for (size_t ix = 0; ix < 8; ix++) {
+                sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+                            dx * 8 + ix];
+              }
+            }
+            sum /= 64;
+            ASSERT_NEAR(
+                sum, idct_acs_downsampled[dy * 8 * acs.covered_blocks_x() + dx],
+                1e-6)
+                << " acs " << type;
+          }
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyDownsample,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyDownsample, Test) { Run(); }
+
+class AcStrategyTargetTest : public ::hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(AcStrategyTargetTest);
+
+TEST_P(AcStrategyTargetTest, RoundtripAFVDCT) {
+  HWY_ALIGN_MAX float idct[16];
+  for (size_t i = 0; i < 16; i++) {
+    HWY_ALIGN_MAX float pixels[16] = {};
+    pixels[i] = 1;
+    HWY_ALIGN_MAX float coeffs[16] = {};
+
+    AFVDCT4x4(pixels, coeffs);
+    AFVIDCT4x4(coeffs, idct);
+    for (size_t j = 0; j < 16; j++) {
+      EXPECT_NEAR(idct[j], pixels[j], 1e-6);
+    }
+  }
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFV) {
+  const AcStrategy::Type type = AcStrategy::Type::AFV0;
+  HWY_ALIGN_MAX float pixels[64] = {1};
+  HWY_ALIGN_MAX float coeffs[64] = {};
+  HWY_ALIGN_MAX float scratch_space[64] = {};
+  for (size_t i = 0; i < 1 << 14; i++) {
+    TransformToPixels(type, coeffs, pixels, 8, scratch_space);
+    TransformFromPixels(type, pixels, 8, coeffs, scratch_space);
+  }
+  EXPECT_NEAR(pixels[0], 0.0, 1E-6);
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFVDCT) {
+  HWY_ALIGN_MAX float pixels[64] = {1};
+  HWY_ALIGN_MAX float coeffs[64] = {};
+  for (size_t i = 0; i < 1 << 14; i++) {
+    AFVDCT4x4(pixels, coeffs);
+    AFVIDCT4x4(coeffs, pixels);
+  }
+  EXPECT_NEAR(pixels[0], 1.0, 1E-6);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha.cc b/third-party/libjxl/libjxl/lib/jxl/alpha.cc
new file mode 100644
index 0000000000..48d7e7ee92
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/alpha.cc
@@ -0,0 +1,115 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include <string.h>
+
+#include <algorithm>
+
+namespace jxl {
+
+static float Clamp(float x) { return std::max(std::min(1.0f, x), 0.0f); }
+
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+                          const AlphaBlendingInputLayer& fg,
+                          const AlphaBlendingOutput& out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp) {
+  if (alpha_is_premultiplied) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+      out.r[x] = (fg.r[x] + bg.r[x] * (1.f - fga));
+      out.g[x] = (fg.g[x] + bg.g[x] * (1.f - fga));
+      out.b[x] = (fg.b[x] + bg.b[x] * (1.f - fga));
+      out.a[x] = (1.f - (1.f - fga) * (1.f - bg.a[x]));
+    }
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+      const float new_a = 1.f - (1.f - fga) * (1.f - bg.a[x]);
+      const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+      out.r[x] = (fg.r[x] * fga + bg.r[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.g[x] = (fg.g[x] * fga + bg.g[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.b[x] = (fg.b[x] * fga + bg.b[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.a[x] = new_a;
+    }
+  }
+}
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+                          const float* fga, float* out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp) {
+  if (bg == bga && fg == fga) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fa = clamp ? fga[x] : Clamp(fga[x]);
+      out[x] = (1.f - (1.f - fa) * (1.f - bga[x]));
+    }
+  } else {
+    if (alpha_is_premultiplied) {
+      for (size_t x = 0; x < num_pixels; ++x) {
+        float fa = clamp ? fga[x] : Clamp(fga[x]);
+        out[x] = (fg[x] + bg[x] * (1.f - fa));
+      }
+    } else {
+      for (size_t x = 0; x < num_pixels; ++x) {
+        float fa = clamp ? fga[x] : Clamp(fga[x]);
+        const float new_a = 1.f - (1.f - fa) * (1.f - bga[x]);
+        const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+        out[x] = (fg[x] * fa + bg[x] * bga[x] * (1.f - fa)) * rnew_a;
+      }
+    }
+  }
+}
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+                             float* out, size_t num_pixels, bool clamp) {
+  if (fg == fga) {
+    memcpy(out, bg, num_pixels * sizeof(*out));
+  } else if (clamp) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] + fg[x] * Clamp(fga[x]);
+    }
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] + fg[x] * fga[x];
+    }
+  }
+}
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+                        size_t num_pixels, bool clamp) {
+  if (clamp) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] * Clamp(fg[x]);
+    }
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] * fg[x];
+    }
+  }
+}
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                      float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                      size_t num_pixels) {
+  for (size_t x = 0; x < num_pixels; ++x) {
+    const float multiplier = std::max(kSmallAlpha, a[x]);
+    r[x] *= multiplier;
+    g[x] *= multiplier;
+    b[x] *= multiplier;
+  }
+}
+
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                        float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                        size_t num_pixels) {
+  for (size_t x = 0; x < num_pixels; ++x) {
+    const float multiplier = 1.f / std::max(kSmallAlpha, a[x]);
+    r[x] *= multiplier;
+    g[x] *= multiplier;
+    b[x] *= multiplier;
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha.h b/third-party/libjxl/libjxl/lib/jxl/alpha.h
new file mode 100644
index 0000000000..efb76c800f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/alpha.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ALPHA_H_
+#define LIB_JXL_ALPHA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// A very small value to avoid divisions by zero when converting to
+// unpremultiplied alpha. Page 21 of the technical introduction to OpenEXR
+// (https://www.openexr.com/documentation/TechnicalIntroduction.pdf) recommends
+// "a power of two" that is "less than half of the smallest positive 16-bit
+// floating-point value". That smallest value happens to be the denormal number
+// 2^-24, so 2^-26 should be a good choice.
+static constexpr float kSmallAlpha = 1.f / (1u << 26u);
+
+struct AlphaBlendingInputLayer {
+  const float* r;
+  const float* g;
+  const float* b;
+  const float* a;
+};
+
+struct AlphaBlendingOutput {
+  float* r;
+  float* g;
+  float* b;
+  float* a;
+};
+
+// Note: The pointers in `out` are allowed to alias those in `bg` or `fg`.
+// No pointer shall be null.
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+                          const AlphaBlendingInputLayer& fg,
+                          const AlphaBlendingOutput& out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp);
+// Single plane alpha blending
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+                          const float* fga, float* out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp);
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+                             float* out, size_t num_pixels, bool clamp);
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+                        size_t num_pixels, bool clamp);
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                      float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                      size_t num_pixels);
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                        float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                        size_t num_pixels);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ALPHA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc b/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc
new file mode 100644
index 0000000000..ddafd829ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::_;
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+TEST(AlphaTest, BlendingWithNonPremultiplied) {
+  const float bg_rgb[3] = {100, 110, 120};
+  const float bg_a = 180.f / 255;
+  const float fg_rgb[3] = {25, 21, 23};
+  const float fg_a = 15420.f / 65535;
+  const float fg_a2 = 2.0f;
+  float out_rgb[3];
+  float out_a;
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/false, /*clamp=*/false);
+  EXPECT_THAT(out_rgb,
+              ElementsAre(FloatNear(77.2f, .05f), FloatNear(83.0f, .05f),
+                          FloatNear(90.6f, .05f)));
+  EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/false, /*clamp=*/true);
+  EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+                                   FloatNear(fg_rgb[1], .05f),
+                                   FloatNear(fg_rgb[2], .05f)));
+  EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, BlendingWithPremultiplied) {
+  const float bg_rgb[3] = {100, 110, 120};
+  const float bg_a = 180.f / 255;
+  const float fg_rgb[3] = {25, 21, 23};
+  const float fg_a = 15420.f / 65535;
+  const float fg_a2 = 2.0f;
+  float out_rgb[3];
+  float out_a;
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/true, /*clamp=*/false);
+  EXPECT_THAT(out_rgb,
+              ElementsAre(FloatNear(101.5f, .05f), FloatNear(105.1f, .05f),
+                          FloatNear(114.8f, .05f)));
+  EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/true, /*clamp=*/true);
+  EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+                                   FloatNear(fg_rgb[1], .05f),
+                                   FloatNear(fg_rgb[2], .05f)));
+  EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, Mul) {
+  const float bg = 100;
+  const float fg = 25;
+  float out;
+  PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/false);
+  EXPECT_THAT(out, FloatNear(fg * bg, .05f));
+  PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/true);
+  EXPECT_THAT(out, FloatNear(bg, .05f));
+}
+
+TEST(AlphaTest, PremultiplyAndUnpremultiply) {
+  const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+  float r[] = {120, 130, 140, 150};
+  float g[] = {124, 134, 144, 154};
+  float b[] = {127, 137, 147, 157};
+
+  PremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(
+      r, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(130 * 63.f / 255, 1e-5f),
+                     FloatNear(140 * 127.f / 255, 1e-5f), 150));
+  EXPECT_THAT(
+      g, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(134 * 63.f / 255, 1e-5f),
+                     FloatNear(144 * 127.f / 255, 1e-5f), 154));
+  EXPECT_THAT(
+      b, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(137 * 63.f / 255, 1e-5f),
+                     FloatNear(147 * 127.f / 255, 1e-5f), 157));
+
+  UnpremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(FloatNear(120, 1e-4f), FloatNear(130, 1e-4f),
+                             FloatNear(140, 1e-4f), FloatNear(150, 1e-4f)));
+  EXPECT_THAT(g, ElementsAre(FloatNear(124, 1e-4f), FloatNear(134, 1e-4f),
+                             FloatNear(144, 1e-4f), FloatNear(154, 1e-4f)));
+  EXPECT_THAT(b, ElementsAre(FloatNear(127, 1e-4f), FloatNear(137, 1e-4f),
+                             FloatNear(147, 1e-4f), FloatNear(157, 1e-4f)));
+}
+
+TEST(AlphaTest, UnpremultiplyAndPremultiply) {
+  const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+  float r[] = {50, 60, 70, 80};
+  float g[] = {54, 64, 74, 84};
+  float b[] = {57, 67, 77, 87};
+
+  UnpremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(_, FloatNear(60 * 255.f / 63, 1e-4f),
+                             FloatNear(70 * 255.f / 127, 1e-4f), 80));
+  EXPECT_THAT(g, ElementsAre(_, FloatNear(64 * 255.f / 63, 1e-4f),
+                             FloatNear(74 * 255.f / 127, 1e-4f), 84));
+  EXPECT_THAT(b, ElementsAre(_, FloatNear(67 * 255.f / 63, 1e-4f),
+                             FloatNear(77 * 255.f / 127, 1e-4f), 87));
+
+  PremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(FloatNear(50, 1e-4f), FloatNear(60, 1e-4f),
+                             FloatNear(70, 1e-4f), FloatNear(80, 1e-4f)));
+  EXPECT_THAT(g, ElementsAre(FloatNear(54, 1e-4f), FloatNear(64, 1e-4f),
+                             FloatNear(74, 1e-4f), FloatNear(84, 1e-4f)));
+  EXPECT_THAT(b, ElementsAre(FloatNear(57, 1e-4f), FloatNear(67, 1e-4f),
+                             FloatNear(77, 1e-4f), FloatNear(87, 1e-4f)));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common.cc b/third-party/libjxl/libjxl/lib/jxl/ans_common.cc
new file mode 100644
index 0000000000..d2cf897ec4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ans_common.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <numeric>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+std::vector<int32_t> CreateFlatHistogram(int length, int total_count) {
+  JXL_ASSERT(length > 0);
+  JXL_ASSERT(length <= total_count);
+  const int count = total_count / length;
+  std::vector<int32_t> result(length, count);
+  const int rem_counts = total_count % length;
+  for (int i = 0; i < rem_counts; ++i) {
+    ++result[i];
+  }
+  return result;
+}
+
+// First, all trailing non-occurring symbols are removed from the distribution;
+// if this leaves the distribution empty, a dummy symbol with max weight is
+// added. This ensures that the resulting distribution sums to total table size.
+// Then, `entry_size` is chosen to be the largest power of two so that
+// `table_size` = ANS_TAB_SIZE/`entry_size` is at least as big as the
+// distribution size.
+// Note that each entry will only ever contain two different symbols, and
+// consecutive ranges of offsets, which allows us to use a compact
+// representation.
+// Each entry is initialized with only the (symbol=i, offset) pairs; then
+// positions for which the entry overflows (i.e. distribution[i] > entry_size)
+// or is not full are computed, and put into a stack in increasing order.
+// Missing symbols in the distribution are padded with 0 (because `table_size`
+// >= number of symbols). The `cutoff` value for each entry is initialized to
+// the number of occupied slots in that entry (i.e. `distributions[i]`). While
+// the overflowing-symbol stack is not empty (which implies that the
+// underflowing-symbol stack also is not), the top overfull and underfull
+// positions are popped from the stack; the empty slots in the underfull entry
+// are then filled with as many slots as needed from the overfull entry; such
+// slots are placed after the slots in the overfull entry, and `offsets[1]` is
+// computed accordingly. The formerly underfull entry is thus now neither
+// underfull nor overfull, and represents exactly two symbols. The overfull
+// entry might be either overfull or underfull, and is pushed into the
+// corresponding stack.
+void InitAliasTable(std::vector<int32_t> distribution, uint32_t range,
+                    size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a) {
+  while (!distribution.empty() && distribution.back() == 0) {
+    distribution.pop_back();
+  }
+  // Ensure that a valid table is always returned, even for an empty
+  // alphabet. Otherwise, a specially-crafted stream might crash the
+  // decoder.
+  if (distribution.empty()) {
+    distribution.emplace_back(range);
+  }
+  const size_t table_size = 1 << log_alpha_size;
+#if JXL_ENABLE_ASSERT
+  int sum = std::accumulate(distribution.begin(), distribution.end(), 0);
+#endif  // JXL_ENABLE_ASSERT
+  JXL_ASSERT(static_cast<uint32_t>(sum) == range);
+  // range must be a power of two
+  JXL_ASSERT((range & (range - 1)) == 0);
+  JXL_ASSERT(distribution.size() <= table_size);
+  JXL_ASSERT(table_size <= range);
+  const uint32_t entry_size = range >> log_alpha_size;  // this is exact
+  // Special case for single-symbol distributions, that ensures that the state
+  // does not change when decoding from such a distribution. Note that, since we
+  // hardcode offset0 == 0, it is not straightforward (if at all possible) to
+  // fix the general case to produce this result.
+  for (size_t sym = 0; sym < distribution.size(); sym++) {
+    if (distribution[sym] == ANS_TAB_SIZE) {
+      for (size_t i = 0; i < table_size; i++) {
+        a[i].right_value = sym;
+        a[i].cutoff = 0;
+        a[i].offsets1 = entry_size * i;
+        a[i].freq0 = 0;
+        a[i].freq1_xor_freq0 = ANS_TAB_SIZE;
+      }
+      return;
+    }
+  }
+  std::vector<uint32_t> underfull_posn;
+  std::vector<uint32_t> overfull_posn;
+  std::vector<uint32_t> cutoffs(1 << log_alpha_size);
+  // Initialize entries.
+  for (size_t i = 0; i < distribution.size(); i++) {
+    cutoffs[i] = distribution[i];
+    if (cutoffs[i] > entry_size) {
+      overfull_posn.push_back(i);
+    } else if (cutoffs[i] < entry_size) {
+      underfull_posn.push_back(i);
+    }
+  }
+  for (uint32_t i = distribution.size(); i < table_size; i++) {
+    cutoffs[i] = 0;
+    underfull_posn.push_back(i);
+  }
+  // Reassign overflow/underflow values.
+  while (!overfull_posn.empty()) {
+    uint32_t overfull_i = overfull_posn.back();
+    overfull_posn.pop_back();
+    JXL_ASSERT(!underfull_posn.empty());
+    uint32_t underfull_i = underfull_posn.back();
+    underfull_posn.pop_back();
+    uint32_t underfull_by = entry_size - cutoffs[underfull_i];
+    cutoffs[overfull_i] -= underfull_by;
+    // overfull positions have their original symbols
+    a[underfull_i].right_value = overfull_i;
+    a[underfull_i].offsets1 = cutoffs[overfull_i];
+    // Slots in the right part of entry underfull_i were taken from the end
+    // of the symbols in entry overfull_i.
+    if (cutoffs[overfull_i] < entry_size) {
+      underfull_posn.push_back(overfull_i);
+    } else if (cutoffs[overfull_i] > entry_size) {
+      overfull_posn.push_back(overfull_i);
+    }
+  }
+  for (uint32_t i = 0; i < table_size; i++) {
+    // cutoffs[i] is properly initialized but the clang-analyzer doesn't infer
+    // it since it is partially initialized across two for-loops.
+    // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
+    if (cutoffs[i] == entry_size) {
+      a[i].right_value = i;
+      a[i].offsets1 = 0;
+      a[i].cutoff = 0;
+    } else {
+      // Note that, if cutoff is not equal to entry_size,
+      // a[i].offsets1 was initialized with (overfull cutoff) -
+      // (entry_size - a[i].cutoff). Thus, subtracting
+      // a[i].cutoff cannot make it negative.
+      a[i].offsets1 -= cutoffs[i];
+      a[i].cutoff = cutoffs[i];
+    }
+    const size_t freq0 = i < distribution.size() ? distribution[i] : 0;
+    const size_t i1 = a[i].right_value;
+    const size_t freq1 = i1 < distribution.size() ? distribution[i1] : 0;
+    a[i].freq0 = static_cast<uint16_t>(freq0);
+    a[i].freq1_xor_freq0 = static_cast<uint16_t>(freq1 ^ freq0);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common.h b/third-party/libjxl/libjxl/lib/jxl/ans_common.h
new file mode 100644
index 0000000000..fb5058e310
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ans_common.h
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_COMMON_H_
+#define LIB_JXL_ANS_COMMON_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/cache_control.h>  // Prefetch
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Returns the precision (number of bits) that should be used to store
+// a histogram count such that Log2Floor(count) == logcount.
+static JXL_INLINE uint32_t GetPopulationCountPrecision(uint32_t logcount,
+                                                       uint32_t shift) {
+  int32_t r = std::min<int>(
+      logcount, int(shift) - int((ANS_LOG_TAB_SIZE - logcount) >> 1));
+  if (r < 0) return 0;
+  return r;
+}
+
+// Returns a histogram where the counts are positive, differ by at most 1,
+// and add up to total_count. The bigger counts (if any) are at the beginning
+// of the histogram.
+std::vector<int32_t> CreateFlatHistogram(int length, int total_count);
+
+// An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into
+// the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions:
+// - each symbol occurs as many times as specified by any valid distribution
+//   of frequencies of the symbols. A valid distribution here is an array of
+//   ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE],
+//   and whose sum is ANS_TAB_SIZE.
+// - lookups can be done in constant time, and also return how many smaller
+//   input values map into the same symbol, according to some well-defined order
+//   of input values.
+// - the space used by the alias table is given by a small constant times the
+//   index of the largest symbol with nonzero probability in the distribution.
+// Each of the entries in the table covers a range of `entry_size` values in the
+// [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive
+// sub-ranges. In the range covered by entry `i`, the first `cutoff` values map
+// to symbol `i`, while the others map to symbol `right_value`.
+//
+// TODO(veluca): consider making the order used for computing offsets easier to
+// define - it is currently defined by the algorithm to compute the alias table.
+// Beware of breaking the implicit assumption that symbols that come after the
+// cutoff value should have an offset at least as big as the cutoff.
+
+struct AliasTable {
+  struct Symbol {
+    size_t value;
+    size_t offset;
+    size_t freq;
+  };
+
+// Working set size matters here (~64 tables x 256 entries).
+// offsets0 is always zero (beginning of [0] side among the same symbol).
+// offsets1 is an offset of (pos >= cutoff) side decremented by cutoff.
+#pragma pack(push, 1)
+  struct Entry {
+    uint8_t cutoff;       // < kEntrySizeMinus1 when used by ANS.
+    uint8_t right_value;  // < alphabet size.
+    uint16_t freq0;
+
+    // Only used if `greater` (see Lookup)
+    uint16_t offsets1;         // <= ANS_TAB_SIZE
+    uint16_t freq1_xor_freq0;  // for branchless ternary in Lookup
+  };
+#pragma pack(pop)
+
+  // Dividing `value` by `entry_size` determines `i`, the entry which is
+  // responsible for the input. If the remainder is below `cutoff`, then the
+  // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences
+  // of `i` "before" the start of this entry, the offset of the input will be
+  // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped
+  // symbol is `right_value`; since `offsets[1]` stores the number of
+  // occurrences of `right_value` "before" this entry, minus the `cutoff` value,
+  // the input offset is then `remainder + offsets[1]`.
+  static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value,
+                                  size_t log_entry_size,
+                                  size_t entry_size_minus_1) {
+    const size_t i = value >> log_entry_size;
+    const size_t pos = value & entry_size_minus_1;
+
+#if JXL_BYTE_ORDER_LITTLE
+    uint64_t entry;
+    memcpy(&entry, &table[i].cutoff, sizeof(entry));
+    const size_t cutoff = entry & 0xFF;              // = MOVZX
+    const size_t right_value = (entry >> 8) & 0xFF;  // = MOVZX
+    const size_t freq0 = (entry >> 16) & 0xFFFF;
+#else
+    // Generates multiple loads with complex addressing.
+    const size_t cutoff = table[i].cutoff;
+    const size_t right_value = table[i].right_value;
+    const size_t freq0 = table[i].freq0;
+#endif
+
+    const bool greater = pos >= cutoff;
+
+#if JXL_BYTE_ORDER_LITTLE
+    const uint64_t conditional = greater ? entry : 0;  // = CMOV
+    const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF;
+    const size_t freq1_xor_freq0_or_0 = conditional >> 48;
+#else
+    const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0;
+    const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0;
+#endif
+
+    // WARNING: moving this code may interfere with CMOV heuristics.
+    Symbol s;
+    s.value = greater ? right_value : i;
+    s.offset = offsets1_or_0 + pos;
+    s.freq = freq0 ^ freq1_xor_freq0_or_0;  // = greater ? freq1 : freq0
+    // XOR avoids implementation-defined conversion from unsigned to signed.
+    // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes
+    // spills, simple ternary has a long dependency chain.
+
+    return s;
+  }
+
+  static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value,
+                                  size_t log_entry_size) {
+    const size_t i = value >> log_entry_size;
+    hwy::Prefetch(table + i);
+  }
+};
+
+// Computes an alias table for a given distribution.
+void InitAliasTable(std::vector<int32_t> distribution, uint32_t range,
+                    size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ANS_COMMON_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc b/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc
new file mode 100644
index 0000000000..487b6cf5bd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void VerifyAliasDistribution(const std::vector<int>& distribution,
+                             uint32_t range) {
+  constexpr size_t log_alpha_size = 8;
+  AliasTable::Entry table[1 << log_alpha_size];
+  InitAliasTable(distribution, range, log_alpha_size, table);
+  std::vector<std::vector<uint32_t>> offsets(distribution.size());
+  for (uint32_t i = 0; i < range; i++) {
+    AliasTable::Symbol s = AliasTable::Lookup(
+        table, i, ANS_LOG_TAB_SIZE - 8, (1 << (ANS_LOG_TAB_SIZE - 8)) - 1);
+    offsets[s.value].push_back(s.offset);
+  }
+  for (uint32_t i = 0; i < distribution.size(); i++) {
+    ASSERT_EQ(static_cast<size_t>(distribution[i]), offsets[i].size());
+    std::sort(offsets[i].begin(), offsets[i].end());
+    for (uint32_t j = 0; j < offsets[i].size(); j++) {
+      ASSERT_EQ(offsets[i][j], j);
+    }
+  }
+}
+
+TEST(ANSCommonTest, AliasDistributionSmoke) {
+  VerifyAliasDistribution({ANS_TAB_SIZE / 2, ANS_TAB_SIZE / 2}, ANS_TAB_SIZE);
+  VerifyAliasDistribution({ANS_TAB_SIZE}, ANS_TAB_SIZE);
+  VerifyAliasDistribution({0, 0, 0, ANS_TAB_SIZE, 0}, ANS_TAB_SIZE);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_params.h b/third-party/libjxl/libjxl/lib/jxl/ans_params.h
new file mode 100644
index 0000000000..4bbc284c0b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ans_params.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_PARAMS_H_
+#define LIB_JXL_ANS_PARAMS_H_
+
+// Common parameters that are needed for both the ANS entropy encoding and
+// decoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// TODO(veluca): decide if 12 is the best constant here (valid range is up to
+// 16). This requires recomputing the Huffman tables in {enc,dec}_ans.cc
+// 14 gives a 0.2% improvement at d1 and makes d8 slightly worse. This is
+// likely not worth the increase in encoder complexity.
+#define ANS_LOG_TAB_SIZE 12u
+#define ANS_TAB_SIZE (1 << ANS_LOG_TAB_SIZE)
+#define ANS_TAB_MASK (ANS_TAB_SIZE - 1)
+
+// Largest possible symbol to be encoded by either ANS or prefix coding.
+#define PREFIX_MAX_ALPHABET_SIZE 4096
+#define ANS_MAX_ALPHABET_SIZE 256
+
+// Max number of bits for prefix coding.
+#define PREFIX_MAX_BITS 15
+
+#define ANS_SIGNATURE 0x13  // Initial state, used as CRC.
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ANS_PARAMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_test.cc b/third-party/libjxl/libjxl/lib/jxl/ans_test.cc
new file mode 100644
index 0000000000..06bc46477f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/ans_test.cc
@@ -0,0 +1,278 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripTestcase(int n_histograms, int alphabet_size,
+                       const std::vector<Token>& input_values) {
+  constexpr uint16_t kMagic1 = 0x9e33;
+  constexpr uint16_t kMagic2 = 0x8b04;
+
+  BitWriter writer;
+  // Space for magic bytes.
+  BitWriter::Allotment allotment_magic1(&writer, 16);
+  writer.Write(16, kMagic1);
+  allotment_magic1.ReclaimAndCharge(&writer, 0, nullptr);
+
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  std::vector<std::vector<Token>> input_values_vec;
+  input_values_vec.push_back(input_values);
+
+  BuildAndEncodeHistograms(HistogramParams(), n_histograms, input_values_vec,
+                           &codes, &context_map, &writer, 0, nullptr);
+  WriteTokens(input_values_vec[0], codes, context_map, &writer, 0, nullptr);
+
+  // Magic bytes + padding
+  BitWriter::Allotment allotment_magic2(&writer, 24);
+  writer.Write(16, kMagic2);
+  writer.ZeroPadToByte();
+  allotment_magic2.ReclaimAndCharge(&writer, 0, nullptr);
+
+  // We do not truncate the output. Reading past the end reads out zeroes
+  // anyway.
+  BitReader br(writer.GetSpan());
+
+  ASSERT_EQ(br.ReadBits(16), kMagic1);
+
+  std::vector<uint8_t> dec_context_map;
+  ANSCode decoded_codes;
+  ASSERT_TRUE(
+      DecodeHistograms(&br, n_histograms, &decoded_codes, &dec_context_map));
+  ASSERT_EQ(dec_context_map, context_map);
+  ANSSymbolReader reader(&decoded_codes, &br);
+
+  for (const Token& symbol : input_values) {
+    uint32_t read_symbol =
+        reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+    ASSERT_EQ(read_symbol, symbol.value);
+  }
+  ASSERT_TRUE(reader.CheckANSFinalState());
+
+  ASSERT_EQ(br.ReadBits(16), kMagic2);
+  EXPECT_TRUE(br.Close());
+}
+
+TEST(ANSTest, EmptyRoundtrip) {
+  RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, std::vector<Token>());
+}
+
+TEST(ANSTest, SingleSymbolRoundtrip) {
+  for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+    RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, {{0, i}});
+  }
+  for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+    RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE,
+                      std::vector<Token>(1024, {0, i}));
+  }
+}
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+constexpr size_t kReps = 3;
+#else
+constexpr size_t kReps = 10;
+#endif
+
+void RoundtripRandomStream(int alphabet_size, size_t reps = kReps,
+                           size_t num = 1 << 18) {
+  constexpr int kNumHistograms = 3;
+  Rng rng(0);
+  for (size_t i = 0; i < reps; i++) {
+    std::vector<Token> symbols;
+    for (size_t j = 0; j < num; j++) {
+      int context = rng.UniformI(0, kNumHistograms);
+      int value = rng.UniformU(0, alphabet_size);
+      symbols.emplace_back(context, value);
+    }
+    RoundtripTestcase(kNumHistograms, alphabet_size, symbols);
+  }
+}
+
+void RoundtripRandomUnbalancedStream(int alphabet_size) {
+  constexpr int kNumHistograms = 3;
+  constexpr int kPrecision = 1 << 10;
+  Rng rng(0);
+  for (size_t i = 0; i < kReps; i++) {
+    std::vector<int> distributions[kNumHistograms] = {};
+    for (int j = 0; j < kNumHistograms; j++) {
+      distributions[j].resize(kPrecision);
+      int symbol = 0;
+      int remaining = 1;
+      for (int k = 0; k < kPrecision; k++) {
+        if (remaining == 0) {
+          if (symbol < alphabet_size - 1) symbol++;
+          // There is no meaning behind this distribution: it's anything that
+          // will create a nonuniform distribution and won't have too few
+          // symbols usually. Also we want different distributions we get to be
+          // sufficiently dissimilar.
+          remaining = rng.UniformU(0, kPrecision - k + 1);
+        }
+        distributions[j][k] = symbol;
+        remaining--;
+      }
+    }
+    std::vector<Token> symbols;
+    for (int j = 0; j < 1 << 18; j++) {
+      int context = rng.UniformI(0, kNumHistograms);
+      int value = rng.UniformU(0, kPrecision);
+      symbols.emplace_back(context, value);
+    }
+    RoundtripTestcase(kNumHistograms + 1, alphabet_size, symbols);
+  }
+}
+
+TEST(ANSTest, RandomStreamRoundtrip3Small) { RoundtripRandomStream(3, 1, 16); }
+
+TEST(ANSTest, RandomStreamRoundtrip3) { RoundtripRandomStream(3); }
+
+TEST(ANSTest, RandomStreamRoundtripBig) {
+  RoundtripRandomStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtrip3) {
+  RoundtripRandomUnbalancedStream(3);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtripBig) {
+  RoundtripRandomUnbalancedStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, UintConfigRoundtrip) {
+  for (size_t log_alpha_size = 5; log_alpha_size <= 8; log_alpha_size++) {
+    std::vector<HybridUintConfig> uint_config, uint_config_dec;
+    for (size_t i = 0; i < log_alpha_size; i++) {
+      for (size_t j = 0; j <= i; j++) {
+        for (size_t k = 0; k <= i - j; k++) {
+          uint_config.emplace_back(i, j, k);
+        }
+      }
+    }
+    uint_config.emplace_back(log_alpha_size, 0, 0);
+    uint_config_dec.resize(uint_config.size());
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, 10 * uint_config.size());
+    EncodeUintConfigs(uint_config, &writer, log_alpha_size);
+    allotment.ReclaimAndCharge(&writer, 0, nullptr);
+    writer.ZeroPadToByte();
+    BitReader br(writer.GetSpan());
+    EXPECT_TRUE(DecodeUintConfigs(log_alpha_size, &uint_config_dec, &br));
+    EXPECT_TRUE(br.Close());
+    for (size_t i = 0; i < uint_config.size(); i++) {
+      EXPECT_EQ(uint_config[i].split_token, uint_config_dec[i].split_token);
+      EXPECT_EQ(uint_config[i].msb_in_token, uint_config_dec[i].msb_in_token);
+      EXPECT_EQ(uint_config[i].lsb_in_token, uint_config_dec[i].lsb_in_token);
+    }
+  }
+}
+
+void TestCheckpointing(bool ans, bool lz77) {
+  std::vector<std::vector<Token>> input_values(1);
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+  // up to lz77 window size.
+  for (size_t i = 0; i < (1 << 20) - 1022; i++) {
+    input_values[0].push_back(Token(0, (i % 5) + 4));
+  }
+  // Ensure that when the window wraps around, new values are different.
+  input_values[0].push_back(Token(0, 0));
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  HistogramParams params;
+  params.lz77_method = lz77 ? HistogramParams::LZ77Method::kLZ77
+                            : HistogramParams::LZ77Method::kNone;
+  params.force_huffman = !ans;
+
+  BitWriter writer;
+  {
+    auto input_values_copy = input_values;
+    BuildAndEncodeHistograms(params, 1, input_values_copy, &codes, &context_map,
+                             &writer, 0, nullptr);
+    WriteTokens(input_values_copy[0], codes, context_map, &writer, 0, nullptr);
+    writer.ZeroPadToByte();
+  }
+
+  // We do not truncate the output. Reading past the end reads out zeroes
+  // anyway.
+  BitReader br(writer.GetSpan());
+  Status status = true;
+  {
+    BitReaderScopedCloser bc(&br, &status);
+
+    std::vector<uint8_t> dec_context_map;
+    ANSCode decoded_codes;
+    ASSERT_TRUE(DecodeHistograms(&br, 1, &decoded_codes, &dec_context_map));
+    ASSERT_EQ(dec_context_map, context_map);
+    ANSSymbolReader reader(&decoded_codes, &br);
+
+    ANSSymbolReader::Checkpoint checkpoint;
+    size_t br_pos = 0;
+    constexpr size_t kInterval = ANSSymbolReader::kMaxCheckpointInterval - 2;
+    for (size_t i = 0; i < input_values[0].size(); i++) {
+      if (i % kInterval == 0 && i > 0) {
+        reader.Restore(checkpoint);
+        ASSERT_TRUE(br.Close());
+        br = BitReader(writer.GetSpan());
+        br.SkipBits(br_pos);
+        for (size_t j = i - kInterval; j < i; j++) {
+          Token symbol = input_values[0][j];
+          uint32_t read_symbol =
+              reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+          ASSERT_EQ(read_symbol, symbol.value) << "j = " << j;
+        }
+      }
+      if (i % kInterval == 0) {
+        reader.Save(&checkpoint);
+        br_pos = br.TotalBitsConsumed();
+      }
+      Token symbol = input_values[0][i];
+      uint32_t read_symbol =
+          reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+      ASSERT_EQ(read_symbol, symbol.value) << "i = " << i;
+    }
+    ASSERT_TRUE(reader.CheckANSFinalState());
+  }
+  EXPECT_TRUE(status);
+}
+
+TEST(ANSTest, TestCheckpointingANS) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingPrefix) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingANSLZ77) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/true);
+}
+
+TEST(ANSTest, TestCheckpointingPrefixLZ77) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/true);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h
new file mode 100644
index 0000000000..a98301915e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_ARCH_MACROS_H_
+#define LIB_JXL_BASE_ARCH_MACROS_H_
+
+// Defines the JXL_ARCH_* macros.
+
+namespace jxl {
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define JXL_ARCH_X64 1
+#else
+#define JXL_ARCH_X64 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC)
+#define JXL_ARCH_PPC 1
+#else
+#define JXL_ARCH_PPC 0
+#endif
+
+#if defined(__aarch64__) || defined(__arm__)
+#define JXL_ARCH_ARM 1
+#else
+#define JXL_ARCH_ARM 0
+#endif
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_ARCH_MACROS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/bits.h b/third-party/libjxl/libjxl/lib/jxl/base/bits.h
new file mode 100644
index 0000000000..9f86118e72
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/bits.h
@@ -0,0 +1,147 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BITS_H_
+#define LIB_JXL_BASE_BITS_H_
+
+// Specialized instructions for processing register-sized bit arrays.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace jxl {
+
+// Empty struct used as a size tag type.
+template <size_t N>
+struct SizeTag {};
+
+template <typename T>
+constexpr bool IsSigned() {
+  return T(0) > T(-1);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+  unsigned long index;
+  _BitScanReverse(&index, x);
+  return 31 - index;
+#else
+  return static_cast<size_t>(__builtin_clz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+  unsigned long index;
+  _BitScanReverse64(&index, x);
+  return 63 - index;
+#else   // JXL_ARCH_X64
+  // _BitScanReverse64 not available
+  uint32_t msb = static_cast<uint32_t>(x >> 32u);
+  unsigned long index;
+  if (msb == 0) {
+    uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+    _BitScanReverse(&index, lsb);
+    return 63 - index;
+  } else {
+    _BitScanReverse(&index, msb);
+    return 31 - index;
+  }
+#endif  // JXL_ARCH_X64
+#else
+  return static_cast<size_t>(__builtin_clzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(const T x) {
+  static_assert(!IsSigned<T>(), "Num0BitsAboveMS1Bit_Nonzero: use unsigned");
+  return Num0BitsAboveMS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+  unsigned long index;
+  _BitScanForward(&index, x);
+  return index;
+#else
+  return static_cast<size_t>(__builtin_ctz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+  unsigned long index;
+  _BitScanForward64(&index, x);
+  return index;
+#else   // JXL_ARCH_64
+  // _BitScanForward64 not available
+  uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+  unsigned long index;
+  if (lsb == 0) {
+    uint32_t msb = static_cast<uint32_t>(x >> 32u);
+    _BitScanForward(&index, msb);
+    return 32 + index;
+  } else {
+    _BitScanForward(&index, lsb);
+    return index;
+  }
+#endif  // JXL_ARCH_X64
+#else
+  return static_cast<size_t>(__builtin_ctzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit_Nonzero(T x) {
+  static_assert(!IsSigned<T>(), "Num0BitsBelowLS1Bit_Nonzero: use unsigned");
+  return Num0BitsBelowLS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsAboveMS1Bit(const T x) {
+  return (x == 0) ? sizeof(T) * 8 : Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit(const T x) {
+  return (x == 0) ? sizeof(T) * 8 : Num0BitsBelowLS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded down.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t FloorLog2Nonzero(const T x) {
+  return (sizeof(T) * 8 - 1) ^ Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded up.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t CeilLog2Nonzero(const T x) {
+  const size_t floor_log2 = FloorLog2Nonzero(x);
+  if ((x & (x - 1)) == 0) return floor_log2;  // power of two
+  return floor_log2 + 1;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_BITS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h b/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h
new file mode 100644
index 0000000000..8966834e08
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h
@@ -0,0 +1,274 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BYTE_ORDER_H_
+#define LIB_JXL_BASE_BYTE_ORDER_H_
+
+#include <jxl/types.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#include "lib/jxl/base/compiler_specific.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>  // _byteswap_*
+#endif
+
+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define JXL_BYTE_ORDER_LITTLE 1
+#else
+// This means that we don't know that the byte order is little endian, in
+// this case we use endian-neutral code that works for both little- and
+// big-endian.
+#define JXL_BYTE_ORDER_LITTLE 0
+#endif
+
+// Returns whether the system is little-endian (least-significant byte first).
+#if JXL_BYTE_ORDER_LITTLE
+static constexpr bool IsLittleEndian() { return true; }
+#else
+static inline bool IsLittleEndian() {
+  const uint32_t multibyte = 1;
+  uint8_t byte;
+  memcpy(&byte, &multibyte, 1);
+  return byte == 1;
+}
+#endif
+
+static inline bool SwapEndianness(JxlEndianness endianness) {
+  return ((endianness == JXL_BIG_ENDIAN && IsLittleEndian()) ||
+          (endianness == JXL_LITTLE_ENDIAN && !IsLittleEndian()));
+}
+
+#if JXL_COMPILER_MSVC
+#define JXL_BSWAP16(x) _byteswap_ushort(x)
+#define JXL_BSWAP32(x) _byteswap_ulong(x)
+#define JXL_BSWAP64(x) _byteswap_uint64(x)
+#else
+#define JXL_BSWAP16(x) __builtin_bswap16(x)
+#define JXL_BSWAP32(x) __builtin_bswap32(x)
+#define JXL_BSWAP64(x) __builtin_bswap64(x)
+#endif
+
+static JXL_INLINE uint32_t LoadBE16(const uint8_t* p) {
+  const uint32_t byte1 = p[0];
+  const uint32_t byte0 = p[1];
+  return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadLE16(const uint8_t* p) {
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadBE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint32_t big;
+  memcpy(&big, p, 4);
+  return JXL_BSWAP32(big);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte3 = p[0];
+  const uint32_t byte2 = p[1];
+  const uint32_t byte1 = p[2];
+  const uint32_t byte0 = p[3];
+  return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadBE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t big;
+  memcpy(&big, p, 8);
+  return JXL_BSWAP64(big);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint64_t byte7 = p[0];
+  const uint64_t byte6 = p[1];
+  const uint64_t byte5 = p[2];
+  const uint64_t byte4 = p[3];
+  const uint64_t byte3 = p[4];
+  const uint64_t byte2 = p[5];
+  const uint64_t byte1 = p[6];
+  const uint64_t byte0 = p[7];
+  return (byte7 << 56ull) | (byte6 << 48ull) | (byte5 << 40ull) |
+         (byte4 << 32ull) | (byte3 << 24ull) | (byte2 << 16ull) |
+         (byte1 << 8ull) | byte0;
+#endif
+}
+
+static JXL_INLINE uint32_t LoadLE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint32_t little;
+  memcpy(&little, p, 4);
+  return little;
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  const uint32_t byte2 = p[2];
+  const uint32_t byte3 = p[3];
+  return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadLE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t little;
+  memcpy(&little, p, 8);
+  return little;
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint64_t byte0 = p[0];
+  const uint64_t byte1 = p[1];
+  const uint64_t byte2 = p[2];
+  const uint64_t byte3 = p[3];
+  const uint64_t byte4 = p[4];
+  const uint64_t byte5 = p[5];
+  const uint64_t byte6 = p[6];
+  const uint64_t byte7 = p[7];
+  return (byte7 << 56) | (byte6 << 48) | (byte5 << 40) | (byte4 << 32) |
+         (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+// Loads a Big-Endian float
+static JXL_INLINE float LoadBEFloat(const uint8_t* p) {
+  uint32_t u = LoadBE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+// Loads a Little-Endian float
+static JXL_INLINE float LoadLEFloat(const uint8_t* p) {
+  uint32_t u = LoadLE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+static JXL_INLINE void StoreBE16(const uint32_t native, uint8_t* p) {
+  p[0] = (native >> 8) & 0xFF;
+  p[1] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreLE16(const uint32_t native, uint8_t* p) {
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreBE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint32_t big = JXL_BSWAP32(native);
+  memcpy(p, &big, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[0] = native >> 24;
+  p[1] = (native >> 16) & 0xFF;
+  p[2] = (native >> 8) & 0xFF;
+  p[3] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreBE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint64_t big = JXL_BSWAP64(native);
+  memcpy(p, &big, 8);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[0] = native >> 56ull;
+  p[1] = (native >> 48ull) & 0xFF;
+  p[2] = (native >> 40ull) & 0xFF;
+  p[3] = (native >> 32ull) & 0xFF;
+  p[4] = (native >> 24ull) & 0xFF;
+  p[5] = (native >> 16ull) & 0xFF;
+  p[6] = (native >> 8ull) & 0xFF;
+  p[7] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint32_t little = native;
+  memcpy(p, &little, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[3] = native >> 24;
+  p[2] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint64_t little = native;
+  memcpy(p, &little, 8);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[7] = native >> 56;
+  p[6] = (native >> 48) & 0xFF;
+  p[5] = (native >> 40) & 0xFF;
+  p[4] = (native >> 32) & 0xFF;
+  p[3] = (native >> 24) & 0xFF;
+  p[2] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE float BSwapFloat(float x) {
+  uint32_t u;
+  memcpy(&u, &x, 4);
+  uint32_t uswap = JXL_BSWAP32(u);
+  float xswap;
+  memcpy(&xswap, &uswap, 4);
+  return xswap;
+}
+
+// Big/Little Endian order.
+struct OrderBE {};
+struct OrderLE {};
+
+// Wrappers for calling from generic code.
+static JXL_INLINE void Store16(OrderBE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreBE16(native, p);
+}
+
+static JXL_INLINE void Store16(OrderLE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreLE16(native, p);
+}
+
+static JXL_INLINE void Store32(OrderBE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreBE32(native, p);
+}
+
+static JXL_INLINE void Store32(OrderLE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreLE32(native, p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderBE /*tag*/, const uint8_t* p) {
+  return LoadBE16(p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderLE /*tag*/, const uint8_t* p) {
+  return LoadLE16(p);
+}
+
+static JXL_INLINE uint32_t Load32(OrderBE /*tag*/, const uint8_t* p) {
+  return LoadBE32(p);
+}
+
+static JXL_INLINE uint32_t Load32(OrderLE /*tag*/, const uint8_t* p) {
+  return LoadLE32(p);
+}
+
+#endif  // LIB_JXL_BASE_BYTE_ORDER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc
new file mode 100644
index 0000000000..9a9cc585a1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/cache_aligned.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Disabled: slower than malloc + alignment.
+#define JXL_USE_MMAP 0
+
+#if JXL_USE_MMAP
+#include <sys/mman.h>
+#endif
+
+#include <algorithm>  // std::max
+#include <atomic>
+#include <hwy/base.h>  // kMaxVectorSize
+#include <limits>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace {
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+  void* allocated;
+  size_t allocated_size;
+  uint8_t left_padding[hwy::kMaxVectorSize];
+};
+#pragma pack(pop)
+
+std::atomic<uint64_t> num_allocations{0};
+std::atomic<uint64_t> bytes_in_use{0};
+std::atomic<uint64_t> max_bytes_in_use{0};
+
+}  // namespace
+
+// Avoids linker errors in pre-C++17 builds.
+constexpr size_t CacheAligned::kPointerSize;
+constexpr size_t CacheAligned::kCacheLineSize;
+constexpr size_t CacheAligned::kAlignment;
+constexpr size_t CacheAligned::kAlias;
+
+void CacheAligned::PrintStats() {
+  fprintf(
+      stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n",
+      static_cast<size_t>(num_allocations.load(std::memory_order_relaxed)),
+      static_cast<double>(max_bytes_in_use.load(std::memory_order_relaxed)));
+}
+
+size_t CacheAligned::NextOffset() {
+  static std::atomic<uint32_t> next{0};
+  constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment;
+  const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+  return CacheAligned::kAlignment * group;
+}
+
+void* CacheAligned::Allocate(const size_t payload_size, size_t offset) {
+  JXL_ASSERT(payload_size <= std::numeric_limits<size_t>::max() / 2);
+  JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias);
+
+  // What: | misalign | unused | AllocationHeader |payload
+  // Size: |<= kAlias | offset |                  |payload_size
+  //       ^allocated.^aligned.^header............^payload
+  // The header must immediately precede payload, which must remain aligned.
+  // To avoid wasting space, the header resides at the end of `unused`,
+  // which therefore cannot be empty (offset == 0).
+  if (offset == 0) {
+    // SVE/RVV vectors can be large, so we cannot rely on them (including the
+    // padding at the end of AllocationHeader) to fit in kAlignment.
+    offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment);
+  }
+
+#if JXL_USE_MMAP
+  const size_t allocated_size = offset + payload_size;
+  const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
+  void* allocated =
+      mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+  if (allocated == MAP_FAILED) return nullptr;
+  const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated);
+#else
+  const size_t allocated_size = kAlias + offset + payload_size;
+  void* allocated = malloc(allocated_size);
+  if (allocated == nullptr) return nullptr;
+  // Always round up even if already aligned - we already asked for kAlias
+  // extra bytes and there's no way to give them back.
+  uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+  static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+  static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+  aligned &= ~(kAlias - 1);
+#endif
+
+#if 0
+  // No effect.
+  uintptr_t page_aligned = reinterpret_cast<uintptr_t>(allocated);
+  page_aligned &= ~(4096 - 1);
+  if (madvise(reinterpret_cast<void*>(page_aligned), allocated_size,
+              MADV_WILLNEED) != 0) {
+    JXL_NOTIFY_ERROR("madvise failed");
+  }
+#elif 0
+  // INCREASES both first and subsequent decode times.
+  if (mlock(allocated, allocated_size) != 0) {
+    JXL_NOTIFY_ERROR("mlock failed");
+  }
+#endif
+
+  // Update statistics (#allocations and max bytes in use)
+  num_allocations.fetch_add(1, std::memory_order_relaxed);
+  const uint64_t prev_bytes =
+      bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel);
+  uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire);
+  for (;;) {
+    const uint64_t desired =
+        std::max(expected_max, prev_bytes + allocated_size);
+    if (max_bytes_in_use.compare_exchange_strong(expected_max, desired,
+                                                 std::memory_order_acq_rel)) {
+      break;
+    }
+  }
+
+  const uintptr_t payload = aligned + offset;  // still aligned
+
+  // Stash `allocated` and payload_size inside header for use by Free().
+  AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+  header->allocated = allocated;
+  header->allocated_size = allocated_size;
+
+  return JXL_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64);
+}
+
+void CacheAligned::Free(const void* aligned_pointer) {
+  if (aligned_pointer == nullptr) {
+    return;
+  }
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  JXL_ASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  // Subtract (2's complement negation).
+  bytes_in_use.fetch_add(~header->allocated_size + 1,
+                         std::memory_order_acq_rel);
+
+#if JXL_USE_MMAP
+  munmap(header->allocated, header->allocated_size);
+#else
+  free(header->allocated);
+#endif
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h
new file mode 100644
index 0000000000..e57df14837
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_CACHE_ALIGNED_H_
+#define LIB_JXL_BASE_CACHE_ALIGNED_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Functions that depend on the cache line size.
+class CacheAligned {
+ public:
+  static void PrintStats();
+
+  static constexpr size_t kPointerSize = sizeof(void*);
+  static constexpr size_t kCacheLineSize = 64;
+  // To avoid RFOs, match L2 fill size (pairs of lines).
+  static constexpr size_t kAlignment = 2 * kCacheLineSize;
+  // Minimum multiple for which cache set conflicts and/or loads blocked by
+  // preceding stores can occur.
+  static constexpr size_t kAlias = 2048;
+
+  // Returns a 'random' (cyclical) offset suitable for Allocate.
+  static size_t NextOffset();
+
+  // Returns null or memory whose address is congruent to `offset` (mod kAlias).
+  // This reduces cache conflicts and load/store stalls, especially with large
+  // allocations that would otherwise have similar alignments. At least
+  // `payload_size` (which can be zero) bytes will be accessible.
+  static void* Allocate(size_t payload_size, size_t offset);
+
+  static void* Allocate(const size_t payload_size) {
+    return Allocate(payload_size, NextOffset());
+  }
+
+  static void Free(const void* aligned_pointer);
+};
+
+// Avoids the need for a function pointer (deleter) in CacheAlignedUniquePtr.
+struct CacheAlignedDeleter {
+  void operator()(uint8_t* aligned_pointer) const {
+    return CacheAligned::Free(aligned_pointer);
+  }
+};
+
+using CacheAlignedUniquePtr = std::unique_ptr<uint8_t[], CacheAlignedDeleter>;
+
+// Does not invoke constructors.
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes)),
+      CacheAlignedDeleter());
+}
+
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes,
+                                                  const size_t offset) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes, offset)),
+      CacheAlignedDeleter());
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_CACHE_ALIGNED_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h b/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h
new file mode 100644
index 0000000000..702ff8e058
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+#define LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+
+// Macros for compiler version + nonstandard keywords, e.g. __builtin_expect.
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "lib/jxl/base/sanitizer_definitions.h"
+
+// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected,
+// otherwise 100 * major + minor version. Note that other packages check for
+// #ifdef COMPILER_MSVC, so we cannot use that same name.
+
+#ifdef _MSC_VER
+#define JXL_COMPILER_MSVC _MSC_VER
+#else
+#define JXL_COMPILER_MSVC 0
+#endif
+
+#ifdef __GNUC__
+#define JXL_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define JXL_COMPILER_GCC 0
+#endif
+
+#ifdef __clang__
+#define JXL_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
+// Clang pretends to be GCC for compatibility.
+#undef JXL_COMPILER_GCC
+#define JXL_COMPILER_GCC 0
+#else
+#define JXL_COMPILER_CLANG 0
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_RESTRICT __restrict
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_RESTRICT __restrict__
+#else
+#define JXL_RESTRICT
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_INLINE __forceinline
+#define JXL_NOINLINE __declspec(noinline)
+#else
+#define JXL_INLINE inline __attribute__((always_inline))
+#define JXL_NOINLINE __attribute__((noinline))
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_NORETURN __declspec(noreturn)
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_NORETURN __attribute__((noreturn))
+#else
+#define JXL_NORETURN
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_UNREACHABLE_BUILTIN __assume(false)
+#elif JXL_COMPILER_CLANG || JXL_COMPILER_GCC >= 405
+#define JXL_UNREACHABLE_BUILTIN __builtin_unreachable()
+#else
+#define JXL_UNREACHABLE_BUILTIN
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_MAYBE_UNUSED
+#else
+// Encountered "attribute list cannot appear here" when using the C++17
+// [[maybe_unused]], so only use the old style attribute for now.
+#define JXL_MAYBE_UNUSED __attribute__((unused))
+#endif
+
+// MSAN execution won't hurt if some code it not inlined, but this can greatly
+// improve compilation time. Unfortunately this macro can not be used just
+// everywhere - inside header files it leads to "multiple definition" error;
+// though it would be better not to have JXL_INLINE in header overall.
+#if JXL_MEMORY_SANITIZER || JXL_ADDRESS_SANITIZER || JXL_THREAD_SANITIZER
+#define JXL_MAYBE_INLINE JXL_MAYBE_UNUSED
+#else
+#define JXL_MAYBE_INLINE JXL_INLINE
+#endif
+
+#if JXL_COMPILER_MSVC
+// Unsupported, __assume is not the same.
+#define JXL_LIKELY(expr) expr
+#define JXL_UNLIKELY(expr) expr
+#else
+#define JXL_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define JXL_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if JXL_COMPILER_CLANG
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define JXL_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif JXL_COMPILER_GCC
+#define JXL_HAS_ASSUME_ALIGNED 1
+#else
+#define JXL_HAS_ASSUME_ALIGNED 0
+#endif
+
+#if JXL_HAS_ASSUME_ALIGNED
+#define JXL_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define JXL_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+#ifdef __has_attribute
+#define JXL_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define JXL_HAVE_ATTRIBUTE(x) 0
+#endif
+
+// Raises warnings if the function return value is unused. Should appear as the
+// first part of a function definition/declaration.
+#if JXL_HAVE_ATTRIBUTE(nodiscard)
+#define JXL_MUST_USE_RESULT [[nodiscard]]
+#elif JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(warn_unused_result)
+#define JXL_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define JXL_MUST_USE_RESULT
+#endif
+
+// Disable certain -fsanitize flags for functions that are expected to include
+// things like unsigned integer overflow. For example use in the function
+// declaration JXL_NO_SANITIZE("unsigned-integer-overflow") to silence unsigned
+// integer overflow ubsan messages.
+#if JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(no_sanitize)
+#define JXL_NO_SANITIZE(X) __attribute__((no_sanitize(X)))
+#else
+#define JXL_NO_SANITIZE(X)
+#endif
+
+#if JXL_HAVE_ATTRIBUTE(__format__)
+#define JXL_FORMAT(idx_fmt, idx_arg) \
+  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define JXL_FORMAT(idx_fmt, idx_arg)
+#endif
+
+#if JXL_COMPILER_MSVC
+using ssize_t = intptr_t;
+#endif
+
+#endif  // LIB_JXL_BASE_COMPILER_SPECIFIC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc
new file mode 100644
index 0000000000..20a911255c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+namespace jxl {
+
+// static
+JxlParallelRetCode ThreadPool::SequentialRunnerStatic(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+  if (init_ret != 0) return init_ret;
+
+  for (uint32_t i = start_range; i < end_range; i++) {
+    (*func)(jpegxl_opaque, i, 0);
+  }
+  return 0;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h
new file mode 100644
index 0000000000..ba7e7adfad
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h
@@ -0,0 +1,120 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_DATA_PARALLEL_H_
+#define LIB_JXL_BASE_DATA_PARALLEL_H_
+
+// Portable, low-overhead C++11 ThreadPool alternative to OpenMP for
+// data-parallel computations.
+
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#if JXL_COMPILER_MSVC
+// suppress warnings about the const & applied to function types
+#pragma warning(disable : 4180)
+#endif
+
+namespace jxl {
+
+class ThreadPool {
+ public:
+  ThreadPool(JxlParallelRunner runner, void* runner_opaque)
+      : runner_(runner ? runner : &ThreadPool::SequentialRunnerStatic),
+        runner_opaque_(runner ? runner_opaque : static_cast<void*>(this)) {}
+
+  ThreadPool(const ThreadPool&) = delete;
+  ThreadPool& operator&(const ThreadPool&) = delete;
+
+  JxlParallelRunner runner() const { return runner_; }
+  void* runner_opaque() const { return runner_opaque_; }
+
+  // Runs init_func(num_threads) followed by data_func(task, thread) on worker
+  // thread(s) for every task in [begin, end). init_func() must return a Status
+  // indicating whether the initialization succeeded.
+  // "thread" is an integer smaller than num_threads.
+  // Not thread-safe - no two calls to Run may overlap.
+  // Subsequent calls will reuse the same threads.
+  //
+  // Precondition: begin <= end.
+  template <class InitFunc, class DataFunc>
+  Status Run(uint32_t begin, uint32_t end, const InitFunc& init_func,
+             const DataFunc& data_func, const char* caller = "") {
+    JXL_ASSERT(begin <= end);
+    if (begin == end) return true;
+    RunCallState<InitFunc, DataFunc> call_state(init_func, data_func);
+    // The runner_ uses the C convention and returns 0 in case of error, so we
+    // convert it to a Status.
+    return (*runner_)(runner_opaque_, static_cast<void*>(&call_state),
+                      &call_state.CallInitFunc, &call_state.CallDataFunc, begin,
+                      end) == 0;
+  }
+
+  // Use this as init_func when no initialization is needed.
+  static Status NoInit(size_t num_threads) { return true; }
+
+ private:
+  // class holding the state of a Run() call to pass to the runner_ as an
+  // opaque_jpegxl pointer.
+  template <class InitFunc, class DataFunc>
+  class RunCallState final {
+   public:
+    RunCallState(const InitFunc& init_func, const DataFunc& data_func)
+        : init_func_(init_func), data_func_(data_func) {}
+
+    // JxlParallelRunInit interface.
+    static int CallInitFunc(void* jpegxl_opaque, size_t num_threads) {
+      const auto* self =
+          static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+      // Returns -1 when the internal init function returns false Status to
+      // indicate an error.
+      return self->init_func_(num_threads) ? 0 : -1;
+    }
+
+    // JxlParallelRunFunction interface.
+    static void CallDataFunc(void* jpegxl_opaque, uint32_t value,
+                             size_t thread_id) {
+      const auto* self =
+          static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+      return self->data_func_(value, thread_id);
+    }
+
+   private:
+    const InitFunc& init_func_;
+    const DataFunc& data_func_;
+  };
+
+  // Default JxlParallelRunner used when no runner is provided by the
+  // caller. This runner doesn't use any threading and thread_id is always 0.
+  static JxlParallelRetCode SequentialRunnerStatic(
+      void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+      JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+  // The caller supplied runner function and its opaque void*.
+  const JxlParallelRunner runner_;
+  void* const runner_opaque_;
+};
+
+template <class InitFunc, class DataFunc>
+Status RunOnPool(ThreadPool* pool, const uint32_t begin, const uint32_t end,
+                 const InitFunc& init_func, const DataFunc& data_func,
+                 const char* caller) {
+  if (pool == nullptr) {
+    ThreadPool default_pool(nullptr, nullptr);
+    return default_pool.Run(begin, end, init_func, data_func, caller);
+  } else {
+    return pool->Run(begin, end, init_func, data_func, caller);
+  }
+}
+
+}  // namespace jxl
+#if JXL_COMPILER_MSVC
+#pragma warning(default : 4180)
+#endif
+
+#endif  // LIB_JXL_BASE_DATA_PARALLEL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/float.h b/third-party/libjxl/libjxl/lib/jxl/base/float.h
new file mode 100644
index 0000000000..b17413fc19
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/float.h
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_FLOAT_H_
+#define LIB_JXL_BASE_FLOAT_H_
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+namespace {
+// Based on highway scalar implementation, for testing
+float LoadFloat16(uint16_t bits16) {
+  const uint32_t sign = bits16 >> 15;
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  // Subnormal or zero
+  if (biased_exp == 0) {
+    const float subnormal =
+        (1.0f / 16384) * (static_cast<float>(mantissa) * (1.0f / 1024));
+    return sign ? -subnormal : subnormal;
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+
+  float result;
+  memcpy(&result, &bits32, 4);
+  return result;
+}
+}  // namespace
+
+template <typename SaveFloatAtFn>
+static Status JXL_INLINE LoadFloatRow(const uint8_t* src, size_t count,
+                                      size_t stride, JxlDataType type,
+                                      bool little_endian, float scale,
+                                      SaveFloatAtFn callback) {
+  switch (type) {
+    case JXL_TYPE_FLOAT:
+      if (little_endian) {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadLEFloat(src + stride * i));
+        }
+      } else {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadBEFloat(src + stride * i));
+        }
+      }
+      return true;
+
+    case JXL_TYPE_UINT8:
+      for (size_t i = 0; i < count; ++i) {
+        callback(i, src[stride * i] * scale);
+      }
+      return true;
+
+    case JXL_TYPE_UINT16:
+      if (little_endian) {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadLE16(src + stride * i) * scale);
+        }
+      } else {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadBE16(src + stride * i) * scale);
+        }
+      }
+      return true;
+
+    case JXL_TYPE_FLOAT16:
+      if (little_endian) {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadFloat16(LoadLE16(src + stride * i)));
+        }
+      } else {
+        for (size_t i = 0; i < count; ++i) {
+          callback(i, LoadFloat16(LoadBE16(src + stride * i)));
+        }
+      }
+      return true;
+
+    default:
+      return JXL_FAILURE("Unsupported sample format");
+  }
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_FLOAT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/iaca.h b/third-party/libjxl/libjxl/lib/jxl/base/iaca.h
new file mode 100644
index 0000000000..e5732dae5c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/iaca.h
@@ -0,0 +1,65 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_IACA_H_
+#define LIB_JXL_BASE_IACA_H_
+
+#include "lib/jxl/base/compiler_specific.h"
+
+// IACA (Intel's Code Analyzer) analyzes instruction latencies, but only for
+// code between special markers. These functions embed such markers in an
+// executable, but only for reading via IACA - they deliberately trigger a
+// crash if executed to ensure they are removed in normal builds.
+
+#ifndef JXL_IACA_ENABLED
+#define JXL_IACA_ENABLED 0
+#endif
+
+namespace jxl {
+
+// Call before the region of interest.
+static JXL_INLINE void BeginIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+  asm volatile(
+      // UD2 "instruction" raises an invalid opcode exception.
+      ".byte 0x0F, 0x0B\n\t"
+      // Magic sequence recognized by IACA (MOV + addr32 fs:NOP). This actually
+      // clobbers EBX, but we don't care because the code won't be run, and we
+      // want IACA to observe the same code the compiler would have generated
+      // without this marker.
+      "movl $111, %%ebx\n\t"
+      ".byte 0x64, 0x67, 0x90\n\t"
+      :
+      :
+      // (Allegedly) clobbering memory may prevent reordering.
+      : "memory");
+#endif
+}
+
+// Call after the region of interest.
+static JXL_INLINE void EndIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+  asm volatile(
+      // See above.
+      "movl $222, %%ebx\n\t"
+      ".byte 0x64, 0x67, 0x90\n\t"
+      // UD2
+      ".byte 0x0F, 0x0B\n\t"
+      :
+      :
+      // (Allegedly) clobbering memory may prevent reordering.
+      : "memory");
+#endif
+}
+
+// Add to a scope to mark a region.
+struct ScopeIACA {
+  JXL_INLINE ScopeIACA() { BeginIACA(); }
+  JXL_INLINE ~ScopeIACA() { EndIACA(); }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_IACA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h
new file mode 100644
index 0000000000..84d0b82bf5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OS_MACROS_H_
+#define LIB_JXL_BASE_OS_MACROS_H_
+
+// Defines the JXL_OS_* macros.
+
+#if defined(_WIN32) || defined(_WIN64)
+#define JXL_OS_WIN 1
+#else
+#define JXL_OS_WIN 0
+#endif
+
+#ifdef __linux__
+#define JXL_OS_LINUX 1
+#else
+#define JXL_OS_LINUX 0
+#endif
+
+#ifdef __APPLE__
+#define JXL_OS_MAC 1
+#else
+#define JXL_OS_MAC 0
+#endif
+
+#define JXL_OS_IOS 0
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#undef JXL_OS_IOS
+#define JXL_OS_IOS 1
+#endif
+#endif
+
+#ifdef __FreeBSD__
+#define JXL_OS_FREEBSD 1
+#else
+#define JXL_OS_FREEBSD 0
+#endif
+
+#ifdef __HAIKU__
+#define JXL_OS_HAIKU 1
+#else
+#define JXL_OS_HAIKU 0
+#endif
+
+#endif  // LIB_JXL_BASE_OS_MACROS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/override.h b/third-party/libjxl/libjxl/lib/jxl/base/override.h
new file mode 100644
index 0000000000..1f8b657974
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/override.h
@@ -0,0 +1,29 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OVERRIDE_H_
+#define LIB_JXL_BASE_OVERRIDE_H_
+
+// 'Trool' for command line arguments: force enable/disable, or use default.
+
+namespace jxl {
+
+// No effect if kDefault, otherwise forces a feature (typically a FrameHeader
+// flag) on or off.
+enum class Override : int { kOn = 1, kOff = 0, kDefault = -1 };
+
+static inline Override OverrideFromBool(bool flag) {
+  return flag ? Override::kOn : Override::kOff;
+}
+
+static inline bool ApplyOverride(Override o, bool default_condition) {
+  if (o == Override::kOn) return true;
+  if (o == Override::kOff) return false;
+  return default_condition;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_OVERRIDE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc
new file mode 100644
index 0000000000..11e4bff6fe
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+namespace jxl {
+
+void PaddedBytes::IncreaseCapacityTo(size_t capacity) {
+  JXL_ASSERT(capacity > capacity_);
+
+  size_t new_capacity = std::max(capacity, 3 * capacity_ / 2);
+  new_capacity = std::max<size_t>(64, new_capacity);
+
+  // BitWriter writes up to 7 bytes past the end.
+  CacheAlignedUniquePtr new_data = AllocateArray(new_capacity + 8);
+  if (new_data == nullptr) {
+    // Allocation failed, discard all data to ensure this is noticed.
+    size_ = capacity_ = 0;
+    return;
+  }
+
+  if (data_ == nullptr) {
+    // First allocation: ensure first byte is initialized (won't be copied).
+    new_data[0] = 0;
+  } else {
+    // Subsequent resize: copy existing data to new location.
+    memcpy(new_data.get(), data_.get(), size_);
+    // Ensure that the first new byte is initialized, to allow write_bits to
+    // safely append to the newly-resized PaddedBytes.
+    new_data[size_] = 0;
+  }
+
+  capacity_ = new_capacity;
+  std::swap(new_data, data_);
+}
+
+void PaddedBytes::assign(const uint8_t* new_begin, const uint8_t* new_end) {
+  JXL_DASSERT(new_begin <= new_end);
+  const size_t new_size = static_cast<size_t>(new_end - new_begin);
+
+  // memcpy requires non-overlapping ranges, and resizing might invalidate the
+  // new range. Neither happens if the new range is completely to the left or
+  // right of the _allocated_ range (irrespective of size_).
+  const uint8_t* allocated_end = begin() + capacity_;
+  const bool outside = new_end <= begin() || new_begin >= allocated_end;
+  if (outside) {
+    resize(new_size);  // grow or shrink
+    memcpy(data(), new_begin, new_size);
+    return;
+  }
+
+  // There is overlap. The new size cannot be larger because we own the memory
+  // and the new range cannot include anything outside the allocated range.
+  JXL_ASSERT(new_size <= capacity_);
+
+  // memmove allows overlap and capacity_ is sufficient.
+  memmove(data(), new_begin, new_size);
+  size_ = new_size;  // shrink
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h
new file mode 100644
index 0000000000..4534ddf863
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h
@@ -0,0 +1,197 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PADDED_BYTES_H_
+#define LIB_JXL_BASE_PADDED_BYTES_H_
+
+// std::vector replacement with padding to reduce bounds checks in WriteBits
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#include <algorithm>  // max
+#include <initializer_list>
+#include <utility>  // swap
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Provides a subset of the std::vector interface with some differences:
+// - allows BitWriter to write 64 bits at a time without bounds checking;
+// - ONLY zero-initializes the first byte (required by BitWriter);
+// - ensures cache-line alignment.
+class PaddedBytes {
+ public:
+  // Required for output params.
+  PaddedBytes() : size_(0), capacity_(0) {}
+
+  explicit PaddedBytes(size_t size) : size_(size), capacity_(0) {
+    if (size != 0) IncreaseCapacityTo(size);
+  }
+
+  PaddedBytes(size_t size, uint8_t value) : size_(size), capacity_(0) {
+    if (size != 0) {
+      IncreaseCapacityTo(size);
+    }
+    if (size_ != 0) {
+      memset(data(), value, size);
+    }
+  }
+
+  PaddedBytes(const PaddedBytes& other) : size_(other.size_), capacity_(0) {
+    if (size_ != 0) IncreaseCapacityTo(size_);
+    if (data() != nullptr) memcpy(data(), other.data(), size_);
+  }
+  PaddedBytes& operator=(const PaddedBytes& other) {
+    // Self-assignment is safe.
+    resize(other.size());
+    if (data() != nullptr) memmove(data(), other.data(), size_);
+    return *this;
+  }
+
+  // default is not OK - need to set other.size_ to 0!
+  PaddedBytes(PaddedBytes&& other) noexcept
+      : size_(other.size_),
+        capacity_(other.capacity_),
+        data_(std::move(other.data_)) {
+    other.size_ = other.capacity_ = 0;
+  }
+  PaddedBytes& operator=(PaddedBytes&& other) noexcept {
+    size_ = other.size_;
+    capacity_ = other.capacity_;
+    data_ = std::move(other.data_);
+
+    if (&other != this) {
+      other.size_ = other.capacity_ = 0;
+    }
+    return *this;
+  }
+
+  void swap(PaddedBytes& other) {
+    std::swap(size_, other.size_);
+    std::swap(capacity_, other.capacity_);
+    std::swap(data_, other.data_);
+  }
+
+  void reserve(size_t capacity) {
+    if (capacity > capacity_) IncreaseCapacityTo(capacity);
+  }
+
+  // NOTE: unlike vector, this does not initialize the new data!
+  // However, we guarantee that write_bits can safely append after
+  // the resize, as we zero-initialize the first new byte of data.
+  // If size < capacity(), does not invalidate the memory.
+  void resize(size_t size) {
+    if (size > capacity_) IncreaseCapacityTo(size);
+    size_ = (data() == nullptr) ? 0 : size;
+  }
+
+  // resize(size) plus explicit initialization of the new data with `value`.
+  void resize(size_t size, uint8_t value) {
+    size_t old_size = size_;
+    resize(size);
+    if (size_ > old_size) {
+      memset(data() + old_size, value, size_ - old_size);
+    }
+  }
+
+  // Amortized constant complexity due to exponential growth.
+  void push_back(uint8_t x) {
+    if (size_ == capacity_) {
+      IncreaseCapacityTo(capacity_ + 1);
+      if (data() == nullptr) return;
+    }
+
+    data_[size_++] = x;
+  }
+
+  size_t size() const { return size_; }
+  size_t capacity() const { return capacity_; }
+
+  uint8_t* data() { return data_.get(); }
+  const uint8_t* data() const { return data_.get(); }
+
+  // std::vector operations implemented in terms of the public interface above.
+
+  void clear() { resize(0); }
+  bool empty() const { return size() == 0; }
+
+  void assign(std::initializer_list<uint8_t> il) {
+    resize(il.size());
+    memcpy(data(), il.begin(), il.size());
+  }
+
+  // Replaces data() with [new_begin, new_end); potentially reallocates.
+  void assign(const uint8_t* new_begin, const uint8_t* new_end);
+
+  uint8_t* begin() { return data(); }
+  const uint8_t* begin() const { return data(); }
+  uint8_t* end() { return begin() + size(); }
+  const uint8_t* end() const { return begin() + size(); }
+
+  uint8_t& operator[](const size_t i) {
+    BoundsCheck(i);
+    return data()[i];
+  }
+  const uint8_t& operator[](const size_t i) const {
+    BoundsCheck(i);
+    return data()[i];
+  }
+
+  uint8_t& back() {
+    JXL_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+  const uint8_t& back() const {
+    JXL_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+
+  template <typename T>
+  void append(const T& other) {
+    append(reinterpret_cast<const uint8_t*>(other.data()),
+           reinterpret_cast<const uint8_t*>(other.data()) + other.size());
+  }
+
+  void append(const uint8_t* begin, const uint8_t* end) {
+    if (end - begin > 0) {
+      size_t old_size = size();
+      resize(size() + (end - begin));
+      memcpy(data() + old_size, begin, end - begin);
+    }
+  }
+
+ private:
+  void BoundsCheck(size_t i) const {
+    // <= is safe due to padding and required by BitWriter.
+    JXL_ASSERT(i <= size());
+  }
+
+  // Copies existing data to newly allocated "data_". If allocation fails,
+  // data() == nullptr and size_ = capacity_ = 0.
+  // The new capacity will be at least 1.5 times the old capacity. This ensures
+  // that we avoid quadratic behaviour.
+  void IncreaseCapacityTo(size_t capacity);
+
+  size_t size_;
+  size_t capacity_;
+  CacheAlignedUniquePtr data_;
+};
+
+template <typename T>
+static inline void Append(const T& s, PaddedBytes* out,
+                          size_t* JXL_RESTRICT byte_pos) {
+  memcpy(out->data() + *byte_pos, s.data(), s.size());
+  *byte_pos += s.size();
+  JXL_CHECK(*byte_pos <= out->size());
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_PADDED_BYTES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h
new file mode 100644
index 0000000000..3215052afd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PRINTF_MACROS_H_
+#define LIB_JXL_BASE_PRINTF_MACROS_H_
+
+// Format string macros. These should be included after any other system
+// library since those may unconditionally define these, depending on the
+// platform.
+
+// PRIuS and PRIdS macros to print size_t and ssize_t respectively.
+#if !defined(PRIdS)
+#if defined(_WIN64)
+#define PRIdS "lld"
+#elif defined(_WIN32)
+#define PRIdS "d"
+#else
+#define PRIdS "zd"
+#endif
+#endif  // PRIdS
+
+#if !defined(PRIuS)
+#if defined(_WIN64)
+#define PRIuS "llu"
+#elif defined(_WIN32)
+#define PRIuS "u"
+#else
+#define PRIuS "zu"
+#endif
+#endif  // PRIuS
+
+#endif  // LIB_JXL_BASE_PRINTF_MACROS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/random.cc b/third-party/libjxl/libjxl/lib/jxl/base/random.cc
new file mode 100644
index 0000000000..c99f88921c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/random.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/random.h"
+
+#include <cmath>
+
+namespace jxl {
+
+Rng::GeometricDistribution::GeometricDistribution(float p)
+    : inv_log_1mp(1.0 / std::log(1 - p)) {}
+
+uint32_t Rng::Geometric(const GeometricDistribution& dist) {
+  float f = UniformF(0, 1);
+  float log = std::log(1 - f) * dist.inv_log_1mp;
+  return static_cast<uint32_t>(log);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/random.h b/third-party/libjxl/libjxl/lib/jxl/base/random.h
new file mode 100644
index 0000000000..663b88c95d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/random.h
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_RANDOM_
+#define LIB_JXL_BASE_RANDOM_
+
+// Random number generator + distributions.
+// We don't use <random> because the implementation (and thus results) differs
+// between libstdc++ and libc++.
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+struct Rng {
+  explicit Rng(size_t seed)
+      : s{static_cast<uint64_t>(0x94D049BB133111EBull),
+          static_cast<uint64_t>(0xBF58476D1CE4E5B9ull) + seed} {}
+
+  // Xorshift128+ adapted from xorshift128+-inl.h
+  uint64_t operator()() {
+    uint64_t s1 = s[0];
+    const uint64_t s0 = s[1];
+    const uint64_t bits = s1 + s0;  // b, c
+    s[0] = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s[1] = s1;
+    return bits;
+  }
+
+  // Uniformly distributed int64_t in [begin, end), under the assumption that
+  // `end-begin` is significantly smaller than 1<<64, otherwise there is some
+  // bias.
+  int64_t UniformI(int64_t begin, int64_t end) {
+    JXL_DASSERT(end > begin);
+    return static_cast<int64_t>((*this)() %
+                                static_cast<uint64_t>(end - begin)) +
+           begin;
+  }
+
+  // Same as UniformI, but for uint64_t.
+  uint64_t UniformU(uint64_t begin, uint64_t end) {
+    JXL_DASSERT(end > begin);
+    return (*this)() % (end - begin) + begin;
+  }
+
+  // Uniformly distributed float in [begin, end) range. Note: only 23 bits of
+  // randomness.
+  float UniformF(float begin, float end) {
+    float f;
+    // Bits of a random [1, 2) float.
+    uint32_t u = ((*this)() >> (64 - 23)) | 0x3F800000;
+    static_assert(sizeof(f) == sizeof(u),
+                  "Float and U32 must have the same size");
+    memcpy(&f, &u, sizeof(f));
+    // Note: (end-begin) * f + (2*begin-end) may fail to return a number >=
+    // begin.
+    return (end - begin) * (f - 1.0f) + begin;
+  }
+
+  // Bernoulli trial
+  bool Bernoulli(float p) { return UniformF(0, 1) < p; }
+
+  // State for geometric distributions.
+  struct GeometricDistribution {
+    explicit GeometricDistribution(float p);
+
+   private:
+    float inv_log_1mp;
+    friend struct Rng;
+  };
+
+  uint32_t Geometric(const GeometricDistribution& dist);
+
+  template <typename T>
+  void Shuffle(T* t, size_t n) {
+    for (size_t i = 0; i + 1 < n; i++) {
+      size_t a = UniformU(i, n);
+      std::swap(t[a], t[i]);
+    }
+  }
+
+ private:
+  uint64_t s[2];
+};
+
+}  // namespace jxl
+#endif  // LIB_JXL_BASE_RANDOM_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h b/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h
new file mode 100644
index 0000000000..315f3bd003
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_
+#define LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#ifdef ADDRESS_SANITIZER
+#define JXL_ADDRESS_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define JXL_ADDRESS_SANITIZER 1
+#else
+#define JXL_ADDRESS_SANITIZER 0
+#endif
+#else
+#define JXL_ADDRESS_SANITIZER 0
+#endif
+
+#ifdef THREAD_SANITIZER
+#define JXL_THREAD_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define JXL_THREAD_SANITIZER 1
+#else
+#define JXL_THREAD_SANITIZER 0
+#endif
+#else
+#define JXL_THREAD_SANITIZER 0
+#endif
+#endif  // LIB_JXL_BASE_SANITIZER_DEFINITIONS_H
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h b/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h
new file mode 100644
index 0000000000..a18a44cb79
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SCOPE_GUARD_H_
+#define LIB_JXL_BASE_SCOPE_GUARD_H_
+
+#include <utility>
+
+namespace jxl {
+
+template <typename Callback>
+class ScopeGuard {
+ public:
+  // Discourage unnecessary moves / copies.
+  ScopeGuard(const ScopeGuard &) = delete;
+  ScopeGuard &operator=(const ScopeGuard &) = delete;
+  ScopeGuard &operator=(ScopeGuard &&) = delete;
+
+  // Pre-C++17 does not guarantee RVO -> require move constructor.
+  ScopeGuard(ScopeGuard &&other) : callback_(std::move(other.callback_)) {
+    other.armed_ = false;
+  }
+
+  template <typename CallbackParam>
+  explicit ScopeGuard(CallbackParam &&callback)
+      : callback_(std::forward<CallbackParam>(callback)), armed_(true) {}
+
+  ~ScopeGuard() {
+    if (armed_) callback_();
+  }
+
+  void Disarm() { armed_ = false; }
+
+ private:
+  Callback callback_;
+  bool armed_;
+};
+
+template <typename Callback>
+ScopeGuard<Callback> MakeScopeGuard(Callback &&callback) {
+  return ScopeGuard<Callback>{std::forward<Callback>(callback)};
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_SCOPE_GUARD_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/span.h b/third-party/libjxl/libjxl/lib/jxl/base/span.h
new file mode 100644
index 0000000000..41c3623a4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/span.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SPAN_H_
+#define LIB_JXL_BASE_SPAN_H_
+
+// Span (array view) is a non-owning container that provides cheap "cut"
+// operations and could be used as "ArrayLike" data source for PaddedBytes.
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+template <typename T>
+class Span {
+ public:
+  constexpr Span() noexcept : Span(nullptr, 0) {}
+
+  constexpr Span(T* array, size_t length) noexcept
+      : ptr_(array), len_(length) {}
+
+  template <size_t N>
+  explicit constexpr Span(T (&a)[N]) noexcept : Span(a, N) {}
+
+  template <typename ArrayLike>
+  explicit constexpr Span(const ArrayLike& other) noexcept
+      : Span(reinterpret_cast<T*>(other.data()), other.size()) {
+    static_assert(sizeof(*other.data()) == sizeof(T),
+                  "Incompatible type of source.");
+  }
+
+  constexpr T* data() const noexcept { return ptr_; }
+
+  constexpr size_t size() const noexcept { return len_; }
+
+  constexpr bool empty() const noexcept { return len_ == 0; }
+
+  constexpr T& operator[](size_t i) const noexcept {
+    // MSVC 2015 accepts this as constexpr, but not ptr_[i]
+    return *(data() + i);
+  }
+
+  void remove_prefix(size_t n) noexcept {
+    JXL_ASSERT(size() >= n);
+    ptr_ += n;
+    len_ -= n;
+  }
+
+ private:
+  T* ptr_;
+  size_t len_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_SPAN_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/base/status.h b/third-party/libjxl/libjxl/lib/jxl/base/status.h
new file mode 100644
index 0000000000..45e7244ce5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/base/status.h
@@ -0,0 +1,429 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_STATUS_H_
+#define LIB_JXL_BASE_STATUS_H_
+
+// Error handling: Status return type + helper macros.
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <type_traits>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/sanitizer_definitions.h"
+
+#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif                                        // defined(*_SANITIZER)
+
+namespace jxl {
+
+// Uncomment to abort when JXL_FAILURE or JXL_STATUS with a fatal error is
+// reached:
+// #define JXL_CRASH_ON_ERROR
+
+#ifndef JXL_ENABLE_ASSERT
+#define JXL_ENABLE_ASSERT 1
+#endif
+
+#ifndef JXL_ENABLE_CHECK
+#define JXL_ENABLE_CHECK 1
+#endif
+
+// Pass -DJXL_DEBUG_ON_ERROR at compile time to print debug messages when a
+// function returns JXL_FAILURE or calls JXL_NOTIFY_ERROR. Note that this is
+// irrelevant if you also pass -DJXL_CRASH_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ERROR) || defined(JXL_CRASH_ON_ERROR)
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else  // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+#ifdef NDEBUG
+#define JXL_DEBUG_ON_ERROR 0
+#else  // NDEBUG
+#define JXL_DEBUG_ON_ERROR 1
+#endif  // NDEBUG
+#endif  // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+
+// Pass -DJXL_DEBUG_ON_ALL_ERROR at compile time to print debug messages on
+// all error (fatal and non-fatal) status. This implies JXL_DEBUG_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ALL_ERROR)
+#undef JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 1
+// JXL_DEBUG_ON_ALL_ERROR implies JXL_DEBUG_ON_ERROR too.
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else  // JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 0
+#endif  // JXL_DEBUG_ON_ALL_ERROR
+
+// The Verbose level for the library
+#ifndef JXL_DEBUG_V_LEVEL
+#define JXL_DEBUG_V_LEVEL 0
+#endif  // JXL_DEBUG_V_LEVEL
+
+// Pass -DJXL_DEBUG_ON_ABORT={0,1} to force disable/enable the debug messages on
+// JXL_ASSERT, JXL_CHECK and JXL_ABORT.
+#ifndef JXL_DEBUG_ON_ABORT
+#define JXL_DEBUG_ON_ABORT JXL_DEBUG_ON_ERROR
+#endif  // JXL_DEBUG_ON_ABORT
+
+// Print a debug message on standard error. You should use the JXL_DEBUG macro
+// instead of calling Debug directly. This function returns false, so it can be
+// used as a return value in JXL_FAILURE.
+JXL_FORMAT(1, 2)
+inline JXL_NOINLINE bool Debug(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  vfprintf(stderr, format, args);
+  va_end(args);
+  return false;
+}
+
+// Print a debug message on standard error if "enabled" is true. "enabled" is
+// normally a macro that evaluates to 0 or 1 at compile time, so the Debug
+// function is never called and optimized out in release builds. Note that the
+// arguments are compiled but not evaluated when enabled is false. The format
+// string must be a explicit string in the call, for example:
+//   JXL_DEBUG(JXL_DEBUG_MYMODULE, "my module message: %d", some_var);
+// Add a header at the top of your module's .cc or .h file (depending on whether
+// you have JXL_DEBUG calls from the .h as well) like this:
+//   #ifndef JXL_DEBUG_MYMODULE
+//   #define JXL_DEBUG_MYMODULE 0
+//   #endif JXL_DEBUG_MYMODULE
+#define JXL_DEBUG_TMP(format, ...) \
+  ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define JXL_DEBUG(enabled, format, ...)     \
+  do {                                      \
+    if (enabled) {                          \
+      JXL_DEBUG_TMP(format, ##__VA_ARGS__); \
+    }                                       \
+  } while (0)
+
+// JXL_DEBUG version that prints the debug message if the global verbose level
+// defined at compile time by JXL_DEBUG_V_LEVEL is greater or equal than the
+// passed level.
+#define JXL_DEBUG_V(level, format, ...) \
+  JXL_DEBUG(level <= JXL_DEBUG_V_LEVEL, format, ##__VA_ARGS__)
+
+// Warnings (via JXL_WARNING) are enabled by default in debug builds (opt and
+// debug).
+#ifdef JXL_DEBUG_WARNING
+#undef JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#else  // JXL_DEBUG_WARNING
+#ifdef NDEBUG
+#define JXL_DEBUG_WARNING 0
+#else  // JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#endif  // NDEBUG
+#endif  // JXL_DEBUG_WARNING
+#define JXL_WARNING(format, ...) \
+  JXL_DEBUG(JXL_DEBUG_WARNING, format, ##__VA_ARGS__)
+
+// Exits the program after printing a stack trace when possible.
+JXL_NORETURN inline JXL_NOINLINE bool Abort() {
+#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER
+  // If compiled with any sanitizer print a stack trace. This call doesn't crash
+  // the program, instead the trap below will crash it also allowing gdb to
+  // break there.
+  __sanitizer_print_stack_trace();
+#endif  // *_SANITIZER)
+
+#if JXL_COMPILER_MSVC
+  __debugbreak();
+  abort();
+#else
+  __builtin_trap();
+#endif
+}
+
+// Exits the program after printing file/line plus a formatted string.
+#define JXL_ABORT(format, ...)                                              \
+  ((JXL_DEBUG_ON_ABORT) && ::jxl::Debug(("%s:%d: JXL_ABORT: " format "\n"), \
+                                        __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort())
+
+// Use this for code paths that are unreachable unless the code would change
+// to make it reachable, in which case it will print a warning and abort in
+// debug builds. In release builds no code is produced for this, so only use
+// this if this path is really unreachable.
+#define JXL_UNREACHABLE(format, ...)                                   \
+  do {                                                                 \
+    if (JXL_DEBUG_WARNING) {                                           \
+      ::jxl::Debug(("%s:%d: JXL_UNREACHABLE: " format "\n"), __FILE__, \
+                   __LINE__, ##__VA_ARGS__);                           \
+      ::jxl::Abort();                                                  \
+    } else {                                                           \
+      JXL_UNREACHABLE_BUILTIN;                                         \
+    }                                                                  \
+  } while (0)
+
+// Does not guarantee running the code, use only for debug mode checks.
+#if JXL_ENABLE_ASSERT
+#define JXL_ASSERT(condition)                                      \
+  do {                                                             \
+    if (!(condition)) {                                            \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_ASSERT: %s", #condition); \
+      ::jxl::Abort();                                              \
+    }                                                              \
+  } while (0)
+#else
+#define JXL_ASSERT(condition) \
+  do {                        \
+  } while (0)
+#endif
+
+// Define JXL_IS_DEBUG_BUILD that denotes asan, msan and other debug builds,
+// but not opt or release.
+#ifndef JXL_IS_DEBUG_BUILD
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) ||         \
+    defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \
+    defined(__clang_analyzer__)
+#define JXL_IS_DEBUG_BUILD 1
+#else
+#define JXL_IS_DEBUG_BUILD 0
+#endif
+#endif  //  JXL_IS_DEBUG_BUILD
+
+// Same as above, but only runs in debug builds (builds where NDEBUG is not
+// defined). This is useful for slower asserts that we want to run more rarely
+// than usual. These will run on asan, msan and other debug builds, but not in
+// opt or release.
+#if JXL_IS_DEBUG_BUILD
+#define JXL_DASSERT(condition)                                      \
+  do {                                                              \
+    if (!(condition)) {                                             \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_DASSERT: %s", #condition); \
+      ::jxl::Abort();                                               \
+    }                                                               \
+  } while (0)
+#else
+#define JXL_DASSERT(condition) \
+  do {                         \
+  } while (0)
+#endif
+
+// Always runs the condition, so can be used for non-debug calls.
+#if JXL_ENABLE_CHECK
+#define JXL_CHECK(condition)                                      \
+  do {                                                            \
+    if (!(condition)) {                                           \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_CHECK: %s", #condition); \
+      ::jxl::Abort();                                             \
+    }                                                             \
+  } while (0)
+#else
+#define JXL_CHECK(condition) \
+  do {                       \
+    (void)(condition);       \
+  } while (0)
+#endif
+
+// A jxl::Status value from a StatusCode or Status which prints a debug message
+// when enabled.
+#define JXL_STATUS(status, format, ...)                                        \
+  ::jxl::StatusMessage(::jxl::Status(status), "%s:%d: " format "\n", __FILE__, \
+                       __LINE__, ##__VA_ARGS__)
+
+// Notify of an error but discard the resulting Status value. This is only
+// useful for debug builds or when building with JXL_CRASH_ON_ERROR.
+#define JXL_NOTIFY_ERROR(format, ...)                                      \
+  (void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_ERROR: " format, \
+                   ##__VA_ARGS__)
+
+// An error Status with a message. The JXL_STATUS() macro will return a Status
+// object with a kGenericError code, but the comma operator helps with
+// clang-tidy inference and potentially with optimizations.
+#define JXL_FAILURE(format, ...)                                              \
+  ((void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_FAILURE: " format, \
+                    ##__VA_ARGS__),                                           \
+   ::jxl::Status(::jxl::StatusCode::kGenericError))
+
+// Always evaluates the status exactly once, so can be used for non-debug calls.
+// Returns from the current context if the passed Status expression is an error
+// (fatal or non-fatal). The return value is the passed Status.
+#define JXL_RETURN_IF_ERROR(status)                                       \
+  do {                                                                    \
+    ::jxl::Status jxl_return_if_error_status = (status);                  \
+    if (!jxl_return_if_error_status) {                                    \
+      (void)::jxl::StatusMessage(                                         \
+          jxl_return_if_error_status,                                     \
+          "%s:%d: JXL_RETURN_IF_ERROR code=%d: %s\n", __FILE__, __LINE__, \
+          static_cast<int>(jxl_return_if_error_status.code()), #status);  \
+      return jxl_return_if_error_status;                                  \
+    }                                                                     \
+  } while (0)
+
+// As above, but without calling StatusMessage. Intended for bundles (see
+// fields.h), which have numerous call sites (-> relevant for code size) and do
+// not want to generate excessive messages when decoding partial headers.
+#define JXL_QUIET_RETURN_IF_ERROR(status)                \
+  do {                                                   \
+    ::jxl::Status jxl_return_if_error_status = (status); \
+    if (!jxl_return_if_error_status) {                   \
+      return jxl_return_if_error_status;                 \
+    }                                                    \
+  } while (0)
+
+enum class StatusCode : int32_t {
+  // Non-fatal errors (negative values).
+  kNotEnoughBytes = -1,
+
+  // The only non-error status code.
+  kOk = 0,
+
+  // Fatal-errors (positive values)
+  kGenericError = 1,
+};
+
+// Drop-in replacement for bool that raises compiler warnings if not used
+// after being returned from a function. Example:
+// Status LoadFile(...) { return true; } is more compact than
+// bool JXL_MUST_USE_RESULT LoadFile(...) { return true; }
+// In case of error, the status can carry an extra error code in its value which
+// is split between fatal and non-fatal error codes.
+class JXL_MUST_USE_RESULT Status {
+ public:
+  // We want implicit constructor from bool to allow returning "true" or "false"
+  // on a function when using Status. "true" means kOk while "false" means a
+  // generic fatal error.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr Status(bool ok)
+      : code_(ok ? StatusCode::kOk : StatusCode::kGenericError) {}
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr Status(StatusCode code) : code_(code) {}
+
+  // We also want implicit cast to bool to check for return values of functions.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr operator bool() const { return code_ == StatusCode::kOk; }
+
+  constexpr StatusCode code() const { return code_; }
+
+  // Returns whether the status code is a fatal error.
+  constexpr bool IsFatalError() const {
+    return static_cast<int32_t>(code_) > 0;
+  }
+
+ private:
+  StatusCode code_;
+};
+
+// Helper function to create a Status and print the debug message or abort when
+// needed.
+inline JXL_FORMAT(2, 3) Status
+    StatusMessage(const Status status, const char* format, ...) {
+  // This block will be optimized out when JXL_DEBUG_ON_ERROR and
+  // JXL_DEBUG_ON_ALL_ERROR are both disabled.
+  if ((JXL_DEBUG_ON_ERROR && status.IsFatalError()) ||
+      (JXL_DEBUG_ON_ALL_ERROR && !status)) {
+    va_list args;
+    va_start(args, format);
+    vfprintf(stderr, format, args);
+    va_end(args);
+  }
+#ifdef JXL_CRASH_ON_ERROR
+  // JXL_CRASH_ON_ERROR means to Abort() only on non-fatal errors.
+  if (status.IsFatalError()) {
+    Abort();
+  }
+#endif  // JXL_CRASH_ON_ERROR
+  return status;
+}
+
+template <typename T>
+class JXL_MUST_USE_RESULT StatusOr {
+  static_assert(!std::is_convertible<StatusCode, T>::value &&
+                    !std::is_convertible<T, StatusCode>::value,
+                "You cannot make a StatusOr with a type convertible from or to "
+                "StatusCode");
+  static_assert(std::is_move_constructible<T>::value &&
+                    std::is_move_assignable<T>::value,
+                "T must be move constructible and move assignable");
+
+ public:
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  StatusOr(StatusCode code) : code_(code) {
+    JXL_ASSERT(code_ != StatusCode::kOk);
+  }
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  StatusOr(Status status) : StatusOr(status.code()) {}
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  StatusOr(T&& value) : code_(StatusCode::kOk) {
+    new (&storage_.data_) T(std::move(value));
+  }
+
+  StatusOr(StatusOr&& other) noexcept {
+    if (other.ok()) {
+      new (&storage_.data_) T(std::move(other.storage_.data_));
+    }
+    code_ = other.code_;
+  }
+
+  StatusOr& operator=(StatusOr&& other) noexcept {
+    if (this == &other) return *this;
+    if (ok() && other.ok()) {
+      storage_.data_ = std::move(other.storage_.data_);
+    } else if (other.ok()) {
+      new (&storage_.data_) T(std::move(other.storage_.data_));
+    } else if (ok()) {
+      storage_.data_.~T();
+    }
+    code_ = other.code_;
+    return *this;
+  }
+
+  StatusOr(const StatusOr&) = delete;
+  StatusOr operator=(const StatusOr&) = delete;
+
+  bool ok() const { return code_ == StatusCode::kOk; }
+  Status status() const { return code_; }
+
+  // Only call this if you are absolutely sure that `ok()` is true.
+  // Ideally, never call this manually and rely on JXL_ASSIGN_OR_RETURN.
+  T value() && {
+    JXL_ASSERT(ok());
+    return std::move(storage_.data_);
+  }
+
+  ~StatusOr() {
+    if (code_ == StatusCode::kOk) {
+      storage_.data_.~T();
+    }
+  }
+
+ private:
+  union Storage {
+    char dummy_;
+    T data_;
+    Storage() {}
+    ~Storage() {}
+  } storage_;
+
+  StatusCode code_;
+};
+
+#define JXL_ASSIGN_OR_RETURN(lhs, statusor) \
+  PRIVATE_JXL_ASSIGN_OR_RETURN_IMPL(        \
+      assign_or_return_temporary_variable##__LINE__, lhs, statusor)
+
+// NOLINTBEGIN(bugprone-macro-parentheses)
+#define PRIVATE_JXL_ASSIGN_OR_RETURN_IMPL(name, lhs, statusor) \
+  auto name = std::move(statusor);                             \
+  JXL_RETURN_IF_ERROR(name.status());                          \
+  lhs = std::move(name).value();
+// NOLINTEND(bugprone-macro-parentheses)
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_STATUS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc b/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc
new file mode 100644
index 0000000000..24cc9b64e8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc
@@ -0,0 +1,262 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitReaderTest, ExtendsWithZeroes) {
+  for (size_t size = 4; size < 32; ++size) {
+    std::vector<uint8_t> data(size, 0xff);
+
+    for (size_t n_bytes = 0; n_bytes < size; n_bytes++) {
+      BitReader br(Span<const uint8_t>(data.data(), n_bytes));
+      // Read all the bits
+      for (size_t i = 0; i < n_bytes * kBitsPerByte; i++) {
+        ASSERT_EQ(br.ReadBits(1), 1u) << "n_bytes=" << n_bytes << " i=" << i;
+      }
+
+      // PEEK more than the declared size - all will be zero. Cannot consume.
+      for (size_t i = 0; i < BitReader::kMaxBitsPerCall; i++) {
+        ASSERT_EQ(br.PeekBits(i), 0u)
+            << "size=" << size << "n_bytes=" << n_bytes << " i=" << i;
+      }
+
+      EXPECT_TRUE(br.Close());
+    }
+  }
+}
+
+struct Symbol {
+  uint32_t num_bits;
+  uint32_t value;
+};
+
+// Reading from output gives the same values.
+TEST(BitReaderTest, TestRoundTrip) {
+  test::ThreadPoolForTests pool(8);
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, 1000, ThreadPool::NoInit,
+      [](const uint32_t task, size_t /* thread */) {
+        constexpr size_t kMaxBits = 8000;
+        BitWriter writer;
+        BitWriter::Allotment allotment(&writer, kMaxBits);
+
+        std::vector<Symbol> symbols;
+        symbols.reserve(1000);
+
+        Rng rng(55537 + 129 * task);
+
+        for (;;) {
+          const uint32_t num_bits = rng.UniformU(1, 33);
+          if (writer.BitsWritten() + num_bits > kMaxBits) break;
+          const uint32_t value = rng.UniformU(0, 1ULL << num_bits);
+          symbols.push_back({num_bits, value});
+          writer.Write(num_bits, value);
+        }
+
+        writer.ZeroPadToByte();
+        allotment.ReclaimAndCharge(&writer, 0, nullptr);
+        BitReader reader(writer.GetSpan());
+        for (const Symbol& s : symbols) {
+          EXPECT_EQ(s.value, reader.ReadBits(s.num_bits));
+        }
+        EXPECT_TRUE(reader.Close());
+      },
+      "TestTBitReaderRoundTrip"));
+}
+
+// SkipBits is the same as reading that many bits.
+TEST(BitReaderTest, TestSkip) {
+  test::ThreadPoolForTests pool(8);
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, 96, ThreadPool::NoInit,
+      [](const uint32_t task, size_t /* thread */) {
+        constexpr size_t kSize = 100;
+
+        for (size_t skip = 0; skip < 128; ++skip) {
+          BitWriter writer;
+          BitWriter::Allotment allotment(&writer, kSize * kBitsPerByte);
+          // Start with "task" 1-bits.
+          for (size_t i = 0; i < task; ++i) {
+            writer.Write(1, 1);
+          }
+
+          // Write 0-bits that we will skip over
+          for (size_t i = 0; i < skip; ++i) {
+            writer.Write(1, 0);
+          }
+
+          // Write terminator bits '101'
+          writer.Write(3, 5);
+          EXPECT_EQ(task + skip + 3, writer.BitsWritten());
+          writer.ZeroPadToByte();
+          AuxOut aux_out;
+          allotment.ReclaimAndCharge(&writer, 0, &aux_out);
+          EXPECT_LT(aux_out.layers[0].total_bits, kSize * 8);
+
+          BitReader reader1(writer.GetSpan());
+          BitReader reader2(writer.GetSpan());
+          // Verify initial 1-bits
+          for (size_t i = 0; i < task; ++i) {
+            EXPECT_EQ(1u, reader1.ReadBits(1));
+            EXPECT_EQ(1u, reader2.ReadBits(1));
+          }
+
+          // SkipBits or manually read "skip" bits
+          reader1.SkipBits(skip);
+          for (size_t i = 0; i < skip; ++i) {
+            EXPECT_EQ(0u, reader2.ReadBits(1))
+                << " skip=" << skip << " i=" << i;
+          }
+          EXPECT_EQ(reader1.TotalBitsConsumed(), reader2.TotalBitsConsumed());
+
+          // Ensure both readers see the terminator bits.
+          EXPECT_EQ(5u, reader1.ReadBits(3));
+          EXPECT_EQ(5u, reader2.ReadBits(3));
+
+          EXPECT_TRUE(reader1.Close());
+          EXPECT_TRUE(reader2.Close());
+        }
+      },
+      "TestSkip"));
+}
+
+// Verifies byte order and different groupings of bits.
+TEST(BitReaderTest, TestOrder) {
+  constexpr size_t kMaxBits = 16;
+
+  // u(1) - bits written into LSBs of first byte
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    for (size_t i = 0; i < 5; ++i) {
+      writer.Write(1, 1);
+    }
+    for (size_t i = 0; i < 5; ++i) {
+      writer.Write(1, 0);
+    }
+    for (size_t i = 0; i < 6; ++i) {
+      writer.Write(1, 1);
+    }
+
+    writer.ZeroPadToByte();
+    allotment.ReclaimAndCharge(&writer, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0x1Fu, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0xFCu, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // u(8) - get bytes in the same order
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(8, 0xF8);
+    writer.Write(8, 0x3F);
+
+    writer.ZeroPadToByte();
+    allotment.ReclaimAndCharge(&writer, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // u(16) - little-endian bytes
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(16, 0xF83F);
+
+    writer.ZeroPadToByte();
+    allotment.ReclaimAndCharge(&writer, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // Non-byte-aligned, mixed sizes
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(1, 1);
+    writer.Write(3, 6);
+    writer.Write(8, 0xDB);
+    writer.Write(4, 8);
+
+    writer.ZeroPadToByte();
+    allotment.ReclaimAndCharge(&writer, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0xBDu, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0x8Du, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+}
+
+TEST(BitReaderTest, TotalCountersTest) {
+  uint8_t buf[8] = {1, 2, 3, 4};
+  BitReader reader(Span<const uint8_t>(buf, sizeof(buf)));
+
+  EXPECT_EQ(sizeof(buf), reader.TotalBytes());
+  EXPECT_EQ(0u, reader.TotalBitsConsumed());
+  reader.ReadFixedBits<1>();
+  EXPECT_EQ(1u, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<10>();
+  EXPECT_EQ(11u, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<4>();
+  EXPECT_EQ(15u, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<1>();
+  EXPECT_EQ(16u, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<16>();
+  EXPECT_EQ(32u, reader.TotalBitsConsumed());
+
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(BitReaderTest, MoveTest) {
+  uint8_t buf[8] = {1, 2, 3, 4};
+  BitReader reader2;
+  {
+    BitReader reader1(Span<const uint8_t>(buf, sizeof(buf)));
+
+    EXPECT_EQ(0u, reader1.TotalBitsConsumed());
+    reader1.ReadFixedBits<16>();
+    EXPECT_EQ(16u, reader1.TotalBitsConsumed());
+
+    reader2 = std::move(reader1);
+    // From this point reader1 is invalid, but can continue to access reader2
+    // and we don't need to call Close() on reader1.
+  }
+
+  EXPECT_EQ(16u, reader2.TotalBitsConsumed());
+  EXPECT_EQ(3U, reader2.ReadFixedBits<8>());
+  EXPECT_EQ(24u, reader2.TotalBitsConsumed());
+
+  EXPECT_TRUE(reader2.Close());
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/bits_test.cc b/third-party/libjxl/libjxl/lib/jxl/bits_test.cc
new file mode 100644
index 0000000000..bd7aa548c8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/bits_test.cc
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/bits.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitsTest, TestNumZeroBits) {
+  // Zero input is well-defined.
+  EXPECT_EQ(32u, Num0BitsAboveMS1Bit(0u));
+  EXPECT_EQ(64u, Num0BitsAboveMS1Bit(0ull));
+  EXPECT_EQ(32u, Num0BitsBelowLS1Bit(0u));
+  EXPECT_EQ(64u, Num0BitsBelowLS1Bit(0ull));
+
+  EXPECT_EQ(31u, Num0BitsAboveMS1Bit(1u));
+  EXPECT_EQ(30u, Num0BitsAboveMS1Bit(2u));
+  EXPECT_EQ(63u, Num0BitsAboveMS1Bit(1ull));
+  EXPECT_EQ(62u, Num0BitsAboveMS1Bit(2ull));
+
+  EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1u));
+  EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1ull));
+  EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2u));
+  EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2ull));
+
+  EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x80000000u));
+  EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x8000000000000000ull));
+  EXPECT_EQ(31u, Num0BitsBelowLS1Bit(0x80000000u));
+  EXPECT_EQ(63u, Num0BitsBelowLS1Bit(0x8000000000000000ull));
+}
+
+TEST(BitsTest, TestFloorLog2) {
+  // for input = [1, 7]
+  const size_t expected[7] = {0, 1, 1, 2, 2, 2, 2};
+  for (uint32_t i = 1; i <= 7; ++i) {
+    EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(i)) << " " << i;
+    EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(uint64_t(i))) << " " << i;
+  }
+
+  EXPECT_EQ(11u, FloorLog2Nonzero(0x00000fffu));  // 4095
+  EXPECT_EQ(12u, FloorLog2Nonzero(0x00001000u));  // 4096
+  EXPECT_EQ(12u, FloorLog2Nonzero(0x00001001u));  // 4097
+
+  EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000u));
+  EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001u));
+  EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFu));
+
+  EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000ull));
+  EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001ull));
+  EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFull));
+
+  EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000000ull));
+  EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000001ull));
+  EXPECT_EQ(63u, FloorLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+TEST(BitsTest, TestCeilLog2) {
+  // for input = [1, 7]
+  const size_t expected[7] = {0, 1, 2, 2, 3, 3, 3};
+  for (uint32_t i = 1; i <= 7; ++i) {
+    EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(i)) << " " << i;
+    EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(uint64_t(i))) << " " << i;
+  }
+
+  EXPECT_EQ(12u, CeilLog2Nonzero(0x00000fffu));  // 4095
+  EXPECT_EQ(12u, CeilLog2Nonzero(0x00001000u));  // 4096
+  EXPECT_EQ(13u, CeilLog2Nonzero(0x00001001u));  // 4097
+
+  EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000u));
+  EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001u));
+  EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFu));
+
+  EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000ull));
+  EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001ull));
+  EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFull));
+
+  EXPECT_EQ(63u, CeilLog2Nonzero(0x8000000000000000ull));
+  EXPECT_EQ(64u, CeilLog2Nonzero(0x8000000000000001ull));
+  EXPECT_EQ(64u, CeilLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/blending.cc b/third-party/libjxl/libjxl/lib/jxl/blending.cc
new file mode 100644
index 0000000000..291e3ba525
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/blending.cc
@@ -0,0 +1,152 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/blending.h"
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+bool NeedsBlending(PassesDecoderState* dec_state) {
+  const PassesSharedState& state = *dec_state->shared;
+  if (!(state.frame_header.frame_type == FrameType::kRegularFrame ||
+        state.frame_header.frame_type == FrameType::kSkipProgressive)) {
+    return false;
+  }
+  const auto& info = state.frame_header.blending_info;
+  bool replace_all = (info.mode == BlendMode::kReplace);
+  for (const auto& ec_i : state.frame_header.extra_channel_blending_info) {
+    if (ec_i.mode != BlendMode::kReplace) {
+      replace_all = false;
+    }
+  }
+  // Replace the full frame: nothing to do.
+  if (!state.frame_header.custom_size_or_origin && replace_all) {
+    return false;
+  }
+  return true;
+}
+
+void PerformBlending(const float* const* bg, const float* const* fg,
+                     float* const* out, size_t x0, size_t xsize,
+                     const PatchBlending& color_blending,
+                     const PatchBlending* ec_blending,
+                     const std::vector<ExtraChannelInfo>& extra_channel_info) {
+  bool has_alpha = false;
+  size_t num_ec = extra_channel_info.size();
+  for (size_t i = 0; i < num_ec; i++) {
+    if (extra_channel_info[i].type == jxl::ExtraChannel::kAlpha) {
+      has_alpha = true;
+      break;
+    }
+  }
+  ImageF tmp(xsize, 3 + num_ec);
+  // Blend extra channels first so that we use the pre-blending alpha.
+  for (size_t i = 0; i < num_ec; i++) {
+    if (ec_blending[i].mode == PatchBlendMode::kAdd) {
+      for (size_t x = 0; x < xsize; x++) {
+        tmp.Row(3 + i)[x] = bg[3 + i][x + x0] + fg[3 + i][x + x0];
+      }
+    } else if (ec_blending[i].mode == PatchBlendMode::kBlendAbove) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+      PerformAlphaBlending(bg[3 + i] + x0, bg[3 + alpha] + x0, fg[3 + i] + x0,
+                           fg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+                           is_premultiplied, ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kBlendBelow) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+      PerformAlphaBlending(fg[3 + i] + x0, fg[3 + alpha] + x0, bg[3 + i] + x0,
+                           bg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+                           is_premultiplied, ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      PerformAlphaWeightedAdd(bg[3 + i] + x0, fg[3 + i] + x0,
+                              fg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+                              ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      PerformAlphaWeightedAdd(fg[3 + i] + x0, bg[3 + i] + x0,
+                              bg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+                              ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kMul) {
+      PerformMulBlending(bg[3 + i] + x0, fg[3 + i] + x0, tmp.Row(3 + i), xsize,
+                         ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kReplace) {
+      memcpy(tmp.Row(3 + i), fg[3 + i] + x0, xsize * sizeof(**fg));
+    } else if (ec_blending[i].mode == PatchBlendMode::kNone) {
+      if (xsize) memcpy(tmp.Row(3 + i), bg[3 + i] + x0, xsize * sizeof(**fg));
+    } else {
+      JXL_UNREACHABLE("new PatchBlendMode?");
+    }
+  }
+  size_t alpha = color_blending.alpha_channel;
+
+  if (color_blending.mode == PatchBlendMode::kAdd ||
+      (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove &&
+       !has_alpha) ||
+      (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow &&
+       !has_alpha)) {
+    for (int p = 0; p < 3; p++) {
+      float* out = tmp.Row(p);
+      for (size_t x = 0; x < xsize; x++) {
+        out[x] = bg[p][x + x0] + fg[p][x + x0];
+      }
+    }
+  } else if (color_blending.mode == PatchBlendMode::kBlendAbove
+             // blend without alpha is just replace
+             && has_alpha) {
+    bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+    PerformAlphaBlending(
+        {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0},
+        {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0},
+        {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+        is_premultiplied, color_blending.clamp);
+  } else if (color_blending.mode == PatchBlendMode::kBlendBelow
+             // blend without alpha is just replace
+             && has_alpha) {
+    bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+    PerformAlphaBlending(
+        {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0},
+        {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0},
+        {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+        is_premultiplied, color_blending.clamp);
+  } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+    JXL_DASSERT(has_alpha);
+    for (size_t c = 0; c < 3; c++) {
+      PerformAlphaWeightedAdd(bg[c] + x0, fg[c] + x0, fg[3 + alpha] + x0,
+                              tmp.Row(c), xsize, color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+    JXL_DASSERT(has_alpha);
+    for (size_t c = 0; c < 3; c++) {
+      PerformAlphaWeightedAdd(fg[c] + x0, bg[c] + x0, bg[3 + alpha] + x0,
+                              tmp.Row(c), xsize, color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kMul) {
+    for (int p = 0; p < 3; p++) {
+      PerformMulBlending(bg[p] + x0, fg[p] + x0, tmp.Row(p), xsize,
+                         color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kReplace ||
+             color_blending.mode == PatchBlendMode::kBlendAbove ||
+             color_blending.mode == PatchBlendMode::kBlendBelow) {  // kReplace
+    for (size_t p = 0; p < 3; p++) {
+      memcpy(tmp.Row(p), fg[p] + x0, xsize * sizeof(**fg));
+    }
+  } else if (color_blending.mode == PatchBlendMode::kNone) {
+    for (size_t p = 0; p < 3; p++) {
+      memcpy(tmp.Row(p), bg[p] + x0, xsize * sizeof(**fg));
+    }
+  } else {
+    JXL_UNREACHABLE("new PatchBlendMode?");
+  }
+  for (size_t i = 0; i < 3 + num_ec; i++) {
+    if (xsize != 0) memcpy(out[i] + x0, tmp.Row(i), xsize * sizeof(**out));
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/blending.h b/third-party/libjxl/libjxl/lib/jxl/blending.h
new file mode 100644
index 0000000000..7eab7d50cd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/blending.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BLENDING_H_
+#define LIB_JXL_BLENDING_H_
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+bool NeedsBlending(PassesDecoderState* dec_state);
+
+void PerformBlending(const float* const* bg, const float* const* fg,
+                     float* const* out, size_t x0, size_t xsize,
+                     const PatchBlending& color_blending,
+                     const PatchBlending* ec_blending,
+                     const std::vector<ExtraChannelInfo>& extra_channel_info);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BLENDING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/blending_test.cc b/third-party/libjxl/libjxl/lib/jxl/blending_test.cc
new file mode 100644
index 0000000000..ff4c46c529
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/blending_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::SizeIs;
+
+TEST(BlendingTest, Crops) {
+  const PaddedBytes compressed =
+      jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl");
+  CodecInOut decoded;
+  ASSERT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &decoded));
+  ASSERT_THAT(decoded.frames, SizeIs(4));
+
+  int i = 0;
+  for (const ImageBundle& ib : decoded.frames) {
+    std::ostringstream filename;
+    filename << "jxl/blending/cropped_traffic_light_frame-" << i << ".png";
+    const PaddedBytes compressed_frame =
+        jxl::test::ReadTestData(filename.str());
+    CodecInOut frame;
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(compressed_frame), &frame));
+    JXL_EXPECT_OK(SamePixels(ib.color(), *frame.Main().color(), _));
+    ++i;
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc
new file mode 100644
index 0000000000..c4cba3a31a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/box_content_decoder.h"
+
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+JxlBoxContentDecoder::JxlBoxContentDecoder() {}
+
+JxlBoxContentDecoder::~JxlBoxContentDecoder() {
+  if (brotli_dec) {
+    BrotliDecoderDestroyInstance(brotli_dec);
+  }
+}
+
+void JxlBoxContentDecoder::StartBox(bool brob_decode, bool box_until_eof,
+                                    size_t contents_size) {
+  if (brotli_dec) {
+    BrotliDecoderDestroyInstance(brotli_dec);
+    brotli_dec = nullptr;
+  }
+  header_done_ = false;
+  brob_decode_ = brob_decode;
+  box_until_eof_ = box_until_eof;
+  remaining_ = box_until_eof ? 0 : contents_size;
+  pos_ = 0;
+}
+
+JxlDecoderStatus JxlBoxContentDecoder::Process(const uint8_t* next_in,
+                                               size_t avail_in, size_t box_pos,
+                                               uint8_t** next_out,
+                                               size_t* avail_out) {
+  next_in += pos_ - box_pos;
+  avail_in -= pos_ - box_pos;
+
+  if (brob_decode_) {
+    if (!header_done_) {
+      if (avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+      if (!box_until_eof_) {
+        if (remaining_ < 4) return JXL_DEC_ERROR;
+        remaining_ -= 4;
+      }
+      next_in += 4;
+      avail_in -= 4;
+      pos_ += 4;
+      header_done_ = true;
+    }
+
+    if (!brotli_dec) {
+      brotli_dec = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+    }
+
+    const uint8_t* next_in_before = next_in;
+    uint8_t* next_out_before = *next_out;
+    msan::MemoryIsInitialized(next_in, avail_in);
+    BrotliDecoderResult res = BrotliDecoderDecompressStream(
+        brotli_dec, &avail_in, &next_in, avail_out, next_out, nullptr);
+    size_t consumed = next_in - next_in_before;
+    size_t produced = *next_out - next_out_before;
+    if (res == BROTLI_DECODER_RESULT_ERROR) {
+      return JXL_DEC_ERROR;
+    }
+    msan::UnpoisonMemory(next_out_before, produced);
+    pos_ += consumed;
+    if (!box_until_eof_) remaining_ -= consumed;
+    if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+      return JXL_DEC_NEED_MORE_INPUT;
+    }
+    if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      return JXL_DEC_BOX_NEED_MORE_OUTPUT;
+    }
+    if (res == BROTLI_DECODER_RESULT_SUCCESS) {
+      return JXL_DEC_SUCCESS;
+    }
+    // unknown Brotli result
+    return JXL_DEC_ERROR;
+  } else {
+    // remaining box bytes as seen from dec->file_pos
+    size_t can_read = avail_in;
+    if (!box_until_eof_) can_read = std::min<size_t>(can_read, remaining_);
+    size_t to_write = std::min<size_t>(can_read, *avail_out);
+    memcpy(*next_out, next_in, to_write);
+
+    *next_out += to_write;
+    *avail_out -= to_write;
+    if (!box_until_eof_) remaining_ -= to_write;
+    pos_ += to_write;
+
+    if (to_write < can_read) return JXL_DEC_BOX_NEED_MORE_OUTPUT;
+
+    if (!box_until_eof_ && remaining_ > 0) return JXL_DEC_NEED_MORE_INPUT;
+
+    return JXL_DEC_SUCCESS;
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h
new file mode 100644
index 0000000000..6153360a8e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BOX_CONTENT_DECODER_H_
+#define LIB_JXL_BOX_CONTENT_DECODER_H_
+
+#include <brotli/decode.h>
+#include <jxl/decode.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <vector>
+
+namespace jxl {
+
+/** Outputs the contents of a box in a streaming fashion, either directly, or
+ * optionally decoding with Brotli, in case of a brob box. The input must be
+ * the contents of a box, excluding the box header.
+ */
+class JxlBoxContentDecoder {
+ public:
+  JxlBoxContentDecoder();
+  ~JxlBoxContentDecoder();
+
+  void StartBox(bool brob_decode, bool box_until_eof, size_t contents_size);
+
+  // Outputs decoded bytes from the box, decoding with brotli if needed.
+  // box_pos is the position in the box content which next_in points to.
+  // Returns success, whether more input or output bytes are needed, or error.
+  JxlDecoderStatus Process(const uint8_t* next_in, size_t avail_in,
+                           size_t box_pos, uint8_t** next_out,
+                           size_t* avail_out);
+
+ private:
+  BrotliDecoderState* brotli_dec;
+
+  bool header_done_;
+  bool brob_decode_;
+  bool box_until_eof_;
+  size_t remaining_;
+  size_t pos_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BOX_CONTENT_DECODER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc
new file mode 100644
index 0000000000..dec8c5ea2d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc
@@ -0,0 +1,1939 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+//
+// The physical architecture of butteraugli is based on the following naming
+// convention:
+//   * Opsin - dynamics of the photosensitive chemicals in the retina
+//             with their immediate electrical processing
+//   * Xyb - hybrid opponent/trichromatic color space
+//     x is roughly red-subtract-green.
+//     y is yellow.
+//     b is blue.
+//     Xyb values are computed from Opsin mixing, not directly from rgb.
+//   * Mask - for visual masking
+//   * Hf - color modeling for spatially high-frequency features
+//   * Lf - color modeling for spatially low-frequency features
+//   * Diffmap - to cluster and build an image of error between the images
+//   * Blur - to hold the smoothing code
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <new>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+#ifndef JXL_BUTTERAUGLI_ONCE
+#define JXL_BUTTERAUGLI_ONCE
+
+namespace jxl {
+
+std::vector<float> ComputeKernel(float sigma) {
+  const float m = 2.25;  // Accuracy increases when m is increased.
+  const double scaler = -1.0 / (2.0 * sigma * sigma);
+  const int diff = std::max<int>(1, m * std::fabs(sigma));
+  std::vector<float> kernel(2 * diff + 1);
+  for (int i = -diff; i <= diff; ++i) {
+    kernel[i + diff] = std::exp(scaler * i * i);
+  }
+  return kernel;
+}
+
+void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
+                          const size_t x, float* BUTTERAUGLI_RESTRICT row_out) {
+  const size_t offset = kernel.size() / 2;
+  int minx = x < offset ? 0 : x - offset;
+  int maxx = std::min<int>(in.xsize() - 1, x + offset);
+  float weight = 0.0f;
+  for (int j = minx; j <= maxx; ++j) {
+    weight += kernel[j - x + offset];
+  }
+  float scale = 1.0f / weight;
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+    float sum = 0.0f;
+    for (int j = minx; j <= maxx; ++j) {
+      sum += row_in[j] * kernel[j - x + offset];
+    }
+    row_out[y] = sum * scale;
+  }
+}
+
+// Computes a horizontal convolution and transposes the result.
+void ConvolutionWithTranspose(const ImageF& in,
+                              const std::vector<float>& kernel,
+                              ImageF* BUTTERAUGLI_RESTRICT out) {
+  JXL_CHECK(out->xsize() == in.ysize());
+  JXL_CHECK(out->ysize() == in.xsize());
+  const size_t len = kernel.size();
+  const size_t offset = len / 2;
+  float weight_no_border = 0.0f;
+  for (size_t j = 0; j < len; ++j) {
+    weight_no_border += kernel[j];
+  }
+  const float scale_no_border = 1.0f / weight_no_border;
+  const size_t border1 = std::min(in.xsize(), offset);
+  const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0;
+  std::vector<float> scaled_kernel(len / 2 + 1);
+  for (size_t i = 0; i <= len / 2; ++i) {
+    scaled_kernel[i] = kernel[i] * scale_no_border;
+  }
+
+  // middle
+  switch (len) {
+    case 7: {
+      const float sk0 = scaled_kernel[0];
+      const float sk1 = scaled_kernel[1];
+      const float sk2 = scaled_kernel[2];
+      const float sk3 = scaled_kernel[3];
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          const float sum0 = (row_in[0] + row_in[6]) * sk0;
+          const float sum1 = (row_in[1] + row_in[5]) * sk1;
+          const float sum2 = (row_in[2] + row_in[4]) * sk2;
+          const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+    } break;
+    case 13: {
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5];
+          const float sum = (row_in[6]) * scaled_kernel[6];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 15: {
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5];
+          sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6];
+          const float sum = (row_in[7]) * scaled_kernel[7];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 33: {
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5];
+          sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6];
+          sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7];
+          sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8];
+          sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9];
+          sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10];
+          sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11];
+          sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12];
+          sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13];
+          sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14];
+          sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15];
+          const float sum = (row_in[16]) * scaled_kernel[16];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    default:
+      JXL_UNREACHABLE("Kernel size %" PRIuS " not implemented", len);
+  }
+  // left border
+  for (size_t x = 0; x < border1; ++x) {
+    ConvolveBorderColumn(in, kernel, x, out->Row(x));
+  }
+
+  // right border
+  for (size_t x = border2; x < in.xsize(); ++x) {
+    ConvolveBorderColumn(in, kernel, x, out->Row(x));
+  }
+}
+
+// A blur somewhat similar to a 2D Gaussian blur.
+// See: https://en.wikipedia.org/wiki/Gaussian_blur
+//
+// This is a bottleneck because the sigma can be quite large (>7). We can use
+// gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated
+// Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are
+// zero-valued. This leads to noticeable differences at the edges of diffmaps.
+// We retain a special case for 5x5 kernels (even faster than gauss_blur),
+// optionally use gauss_blur followed by fixup of the borders for large images,
+// or fall back to the previous truncated FIR followed by a transpose.
+void Blur(const ImageF& in, float sigma, const ButteraugliParams& params,
+          BlurTemp* temp, ImageF* out) {
+  std::vector<float> kernel = ComputeKernel(sigma);
+  // Separable5 does an in-place convolution, so this fast path is not safe if
+  // in aliases out.
+  if (kernel.size() == 5 && &in != out) {
+    float sum_weights = 0.0f;
+    for (const float w : kernel) {
+      sum_weights += w;
+    }
+    const float scale = 1.0f / sum_weights;
+    const float w0 = kernel[2] * scale;
+    const float w1 = kernel[1] * scale;
+    const float w2 = kernel[0] * scale;
+    const WeightsSeparable5 weights = {
+        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+    };
+    Separable5(in, Rect(in), weights, /*pool=*/nullptr, out);
+    return;
+  }
+
+  ImageF* JXL_RESTRICT temp_t = temp->GetTransposed(in);
+  ConvolutionWithTranspose(in, kernel, temp_t);
+  ConvolutionWithTranspose(*temp_t, kernel, out);
+}
+
+// Allows PaddedMaltaUnit to call either function via overloading.
+struct MaltaTagLF {};
+struct MaltaTag {};
+
+}  // namespace jxl
+
+#endif  // JXL_BUTTERAUGLI_ONCE
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::MulSub;
+using hwy::HWY_NAMESPACE::Neg;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+template <class D, class V>
+HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) {
+  static const double kMul = 0.724216145665;
+  const V mul = Set(d, kMul);
+  const V maxval = Set(d, kMaxVal);
+  // If greater than maxval or less than -maxval, replace with if_*.
+  const V if_pos = MulAdd(Sub(v, maxval), mul, maxval);
+  const V if_neg = MulSub(Add(v, maxval), mul, maxval);
+  const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v);
+  return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v);
+}
+
+// Make area around zero less important (remove it).
+template <class D, class V>
+HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) {
+  const auto w = Set(d, kw);
+  return IfThenElse(Gt(x, w), Sub(x, w),
+                    IfThenElseZero(Lt(x, Neg(w)), Add(x, w)));
+}
+
+// Make area around zero more important (2x it until the limit).
+template <class D, class V>
+HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) {
+  const auto w = Set(d, kw);
+  return IfThenElse(Gt(x, w), Add(x, w),
+                    IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x)));
+}
+
+// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space.
+// Vals space can be converted to L2-norm space (Euclidean and normalized)
+// through visual masking.
+template <class D, class V>
+HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y,
+                                 const V& b_arg, V* HWY_RESTRICT valx,
+                                 V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) {
+  static const double xmul_scalar = 33.832837186260;
+  static const double ymul_scalar = 14.458268100570;
+  static const double bmul_scalar = 49.87984651440;
+  static const double y_to_b_mul_scalar = -0.362267051518;
+  const V xmul = Set(d, xmul_scalar);
+  const V ymul = Set(d, ymul_scalar);
+  const V bmul = Set(d, bmul_scalar);
+  const V y_to_b_mul = Set(d, y_to_b_mul_scalar);
+  const V b = MulAdd(y_to_b_mul, y, b_arg);
+  *valb = Mul(b, bmul);
+  *valx = Mul(x, xmul);
+  *valy = Mul(y, ymul);
+}
+
+void SuppressXByY(const ImageF& in_x, const ImageF& in_y, const double yw,
+                  ImageF* HWY_RESTRICT out) {
+  JXL_DASSERT(SameSize(in_x, in_y) && SameSize(in_x, *out));
+  const size_t xsize = in_x.xsize();
+  const size_t ysize = in_x.ysize();
+
+  const HWY_FULL(float) d;
+  static const double s = 0.653020556257;
+  const auto sv = Set(d, s);
+  const auto one_minus_s = Set(d, 1.0 - s);
+  const auto ywv = Set(d, yw);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* HWY_RESTRICT row_x = in_x.ConstRow(y);
+    const float* HWY_RESTRICT row_y = in_y.ConstRow(y);
+    float* HWY_RESTRICT row_out = out->Row(y);
+
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      const auto vx = Load(d, row_x + x);
+      const auto vy = Load(d, row_y + x);
+      const auto scaler =
+          MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv);
+      Store(Mul(scaler, vx), d, row_out + x);
+    }
+  }
+}
+
+static void SeparateFrequencies(size_t xsize, size_t ysize,
+                                const ButteraugliParams& params,
+                                BlurTemp* blur_temp, const Image3F& xyb,
+                                PsychoImage& ps) {
+  const HWY_FULL(float) d;
+
+  // Extract lf ...
+  static const double kSigmaLf = 7.15593339443;
+  static const double kSigmaHf = 3.22489901262;
+  static const double kSigmaUhf = 1.56416327805;
+  ps.mf = Image3F(xsize, ysize);
+  ps.hf[0] = ImageF(xsize, ysize);
+  ps.hf[1] = ImageF(xsize, ysize);
+  ps.lf = Image3F(xyb.xsize(), xyb.ysize());
+  ps.mf = Image3F(xyb.xsize(), xyb.ysize());
+  for (int i = 0; i < 3; ++i) {
+    Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &ps.lf.Plane(i));
+
+    // ... and keep everything else in mf.
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* BUTTERAUGLI_RESTRICT row_xyb = xyb.PlaneRow(i, y);
+      const float* BUTTERAUGLI_RESTRICT row_lf = ps.lf.ConstPlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+      for (size_t x = 0; x < xsize; x += Lanes(d)) {
+        const auto mf = Sub(Load(d, row_xyb + x), Load(d, row_lf + x));
+        Store(mf, d, row_mf + x);
+      }
+    }
+    if (i == 2) {
+      Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+      break;
+    }
+    // Divide mf into mf and hf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; x += Lanes(d)) {
+        Store(Load(d, row_mf + x), d, row_hf + x);
+      }
+    }
+    Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+    static const double kRemoveMfRange = 0.29;
+    static const double kAddMfRange = 0.1;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto mf = Load(d, row_mf + x);
+          auto hf = Sub(Load(d, row_hf + x), mf);
+          mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf);
+          Store(mf, d, row_mf + x);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto mf = Load(d, row_mf + x);
+          auto hf = Sub(Load(d, row_hf + x), mf);
+
+          mf = AmplifyRangeAroundZero(d, kAddMfRange, mf);
+          Store(mf, d, row_mf + x);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    }
+  }
+
+  // Temporarily used as output of SuppressXByY
+  ps.uhf[0] = ImageF(xsize, ysize);
+  ps.uhf[1] = ImageF(xsize, ysize);
+
+  // Suppress red-green by intensity change in the high freq channels.
+  static const double suppress = 46.0;
+  SuppressXByY(ps.hf[0], ps.hf[1], suppress, &ps.uhf[0]);
+  // hf is the SuppressXByY output, uhf will be written below.
+  ps.hf[0].Swap(ps.uhf[0]);
+
+  for (int i = 0; i < 2; ++i) {
+    // Divide hf into hf and uhf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_uhf[x] = row_hf[x];
+      }
+    }
+    Blur(ps.hf[i], kSigmaUhf, params, blur_temp, &ps.hf[i]);
+    static const double kRemoveHfRange = 1.5;
+    static const double kAddHfRange = 0.132;
+    static const double kRemoveUhfRange = 0.04;
+    static const double kMaxclampHf = 28.4691806922;
+    static const double kMaxclampUhf = 5.19175294647;
+    static double kMulYHf = 2.155;
+    static double kMulYUhf = 2.69313763794;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto hf = Load(d, row_hf + x);
+          auto uhf = Sub(Load(d, row_uhf + x), hf);
+          hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf);
+          uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf);
+          Store(hf, d, row_hf + x);
+          Store(uhf, d, row_uhf + x);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto hf = Load(d, row_hf + x);
+          hf = MaximumClamp(d, hf, kMaxclampHf);
+
+          auto uhf = Sub(Load(d, row_uhf + x), hf);
+          uhf = MaximumClamp(d, uhf, kMaxclampUhf);
+          uhf = Mul(uhf, Set(d, kMulYUhf));
+          Store(uhf, d, row_uhf + x);
+
+          hf = Mul(hf, Set(d, kMulYHf));
+          hf = AmplifyRangeAroundZero(d, kAddHfRange, hf);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    }
+  }
+  // Modify range around zero code only concerns the high frequency
+  // planes and only the X and Y channels.
+  // Convert low freq xyb to vals space so that we can do a simple squared sum
+  // diff on the low frequencies later.
+  for (size_t y = 0; y < ysize; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y);
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      auto valx = Undefined(d);
+      auto valy = Undefined(d);
+      auto valb = Undefined(d);
+      XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x),
+                       Load(d, row_b + x), &valx, &valy, &valb);
+      Store(valx, d, row_x + x);
+      Store(valy, d, row_y + x);
+      Store(valb, d, row_b + x);
+    }
+  }
+}
+
+namespace {
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) {
+  return Add(Add(a, b), Add(c, d));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) {
+  return Sum(a, b, c, Add(d, e));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) {
+  return Sum(a, b, c, Sum(d, e, f, g));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) {
+  return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i);
+}
+}  // namespace
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df,
+                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+  const intptr_t xs3 = 3 * xs;
+
+  const auto center = LoadU(df, d);
+
+  // x grows, y constant
+  const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center,
+                              LoadU(df, d + 2), LoadU(df, d + 4));
+  // Will return this, sum of all line kernels
+  auto retval = Mul(sum_yconst, sum_yconst);
+  {
+    // y grows, x constant
+    auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center,
+                   LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // both grow
+    auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center,
+                   LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows, x shrinks
+    auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center,
+                   LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    auto sum =
+        Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center,
+            LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    auto sum =
+        Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center,
+            LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center,
+                   LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center,
+                   LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7______*__
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center,
+                   LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7__*______
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center,
+                   LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__*______
+       4____0____
+       5______*__
+       6_______*_
+       7_________
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center,
+                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3______*__
+       4____0____
+       5__*______
+       6_*_______
+       7_________
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center,
+                   LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2________*
+       3______*__
+       4____0____
+       5__*______
+       6*________
+       7_________
+       8_________ */
+
+    auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center,
+                   LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2*________
+       3__*______
+       4____0____
+       5______*__
+       6________*
+       7_________
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center,
+                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0__*______
+       1_________
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7_________
+       8______*__ */
+    auto sum =
+        Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center,
+            LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0______*__
+       1_________
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7_________
+       8__*______ */
+    auto sum =
+        Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center,
+            LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  return retval;
+}
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df,
+                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+  const intptr_t xs3 = 3 * xs;
+
+  const auto center = LoadU(df, d);
+
+  // x grows, y constant
+  const auto sum_yconst =
+      Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2),
+          LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2),
+          LoadU(df, d + 3), LoadU(df, d + 4));
+  // Will return this, sum of all line kernels
+  auto retval = Mul(sum_yconst, sum_yconst);
+
+  {
+    // y grows, x constant
+    auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3),
+                   LoadU(df, d - xs - xs), LoadU(df, d - xs), center,
+                   LoadU(df, d + xs), LoadU(df, d + xs + xs),
+                   LoadU(df, d + xs3), LoadU(df, d + xs3 + xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // both grow
+    auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2),
+                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+                   LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows, x shrinks
+    auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2),
+                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+                   LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
+                   LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
+                   LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
+                   LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
+                   LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
+                   LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
+                   LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    auto sum =
+        Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs),
+            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs),
+            LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    auto sum =
+        Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs),
+            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs),
+            LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3___*_____
+       4____0____
+       5_____*___
+       6_____*___
+       7______*__
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1),
+                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+                   LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_____*___
+       4____0____
+       5___*_____
+       6___*_____
+       7__*______
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1),
+                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+                   LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__**_____
+       4____0____
+       5_____**__
+       6_______*_
+       7_________
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2),
+                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3_____**__
+       4____0____
+       5__**_____
+       6_*_______
+       7_________
+       8_________ */
+    auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2),
+                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+                   LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_________
+       3______***
+       4___*0*___
+       5***______
+       6_________
+       7_________
+       8_________ */
+
+    auto sum =
+        Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2),
+            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2),
+            LoadU(df, d - xs + 3), LoadU(df, d - xs + 4));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_________
+       3***______
+       4___*0*___
+       5______***
+       6_________
+       7_________
+       8_________ */
+    auto sum =
+        Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2),
+            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2),
+            LoadU(df, d + xs + 3), LoadU(df, d + xs + 4));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0___*_____
+       1___*_____
+       2___*_____
+       3____*____
+       4____0____
+       5____*____
+       6_____*___
+       7_____*___
+       8_____*___ */
+    auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
+                   LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
+                   LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
+                   LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_____*___
+       1_____*___
+       2____ *___
+       3____*____
+       4____0____
+       5____*____
+       6___*_____
+       7___*_____
+       8___*_____ */
+    auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
+                   LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
+                   LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
+                   LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
+    retval = MulAdd(sum, sum, retval);
+  }
+  return retval;
+}
+
+// Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known
+// to be far enough from the image borders. "diffs" is a packed image.
+template <class Tag>
+static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs,
+                                                const size_t x0,
+                                                const size_t y0) {
+  const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0;
+  const HWY_CAPPED(float, 1) df;
+  if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) &&
+       y0 < (diffs.ysize() - 4))) {
+    return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow()));
+  }
+
+  float borderimage[12 * 9];  // round up to 4
+  for (int dy = 0; dy < 9; ++dy) {
+    int y = y0 + dy - 4;
+    if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) {
+      for (int dx = 0; dx < 12; ++dx) {
+        borderimage[dy * 12 + dx] = 0.0f;
+      }
+      continue;
+    }
+
+    const float* row_diffs = diffs.ConstRow(y);
+    for (int dx = 0; dx < 9; ++dx) {
+      int x = x0 + dx - 4;
+      if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) {
+        borderimage[dy * 12 + dx] = 0.0f;
+      } else {
+        borderimage[dy * 12 + dx] = row_diffs[x];
+      }
+    }
+    std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f);
+  }
+  return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12));
+}
+
+template <class Tag>
+static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1,
+                          const double w_0gt1, const double w_0lt1,
+                          const double norm1, const double len,
+                          const double mulli, ImageF* HWY_RESTRICT diffs,
+                          Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs));
+  const size_t xsize_ = lum0.xsize();
+  const size_t ysize_ = lum0.ysize();
+
+  const float kWeight0 = 0.5;
+  const float kWeight1 = 0.33;
+
+  const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1);
+  const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1);
+  const float norm2_0gt1 = w_pre0gt1 * norm1;
+  const float norm2_0lt1 = w_pre0lt1 * norm1;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    const float* HWY_RESTRICT row0 = lum0.ConstRow(y);
+    const float* HWY_RESTRICT row1 = lum1.ConstRow(y);
+    float* HWY_RESTRICT row_diffs = diffs->Row(y);
+    for (size_t x = 0; x < xsize_; ++x) {
+      const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x]));
+      const float diff = row0[x] - row1[x];
+      const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval);
+
+      // Primary symmetric quadratic objective.
+      row_diffs[x] = scaler * diff;
+
+      const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval);
+      const double fabs0 = std::fabs(row0[x]);
+
+      // Secondary half-open quadratic objectives.
+      const double too_small = 0.55 * fabs0;
+      const double too_big = 1.05 * fabs0;
+
+      if (row0[x] < 0) {
+        if (row1[x] > -too_small) {
+          double impact = scaler2 * (row1[x] + too_small);
+          row_diffs[x] -= impact;
+        } else if (row1[x] < -too_big) {
+          double impact = scaler2 * (-row1[x] - too_big);
+          row_diffs[x] += impact;
+        }
+      } else {
+        if (row1[x] < too_small) {
+          double impact = scaler2 * (too_small - row1[x]);
+          row_diffs[x] += impact;
+        } else if (row1[x] > too_big) {
+          double impact = scaler2 * (row1[x] - too_big);
+          row_diffs[x] -= impact;
+        }
+      }
+    }
+  }
+
+  size_t y0 = 0;
+  // Top
+  for (; y0 < 4; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+
+  const HWY_FULL(float) df;
+  const size_t aligned_x = std::max(size_t(4), Lanes(df));
+  const intptr_t stride = diffs->PixelsPerRow();
+
+  // Middle
+  for (; y0 < ysize_ - 4; ++y0) {
+    const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0);
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    size_t x0 = 0;
+    for (; x0 < aligned_x; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+    for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) {
+      auto diff = Load(df, row_diff + x0);
+      diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride));
+      Store(diff, df, row_diff + x0);
+    }
+
+    for (; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+
+  // Bottom
+  for (; y0 < ysize_; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+}
+
+// Need non-template wrapper functions for HWY_EXPORT.
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                  const double w_0lt1, const double norm1, const double len,
+                  const double mulli, ImageF* HWY_RESTRICT diffs,
+                  Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+                diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                    const double w_0lt1, const double norm1, const double len,
+                    const double mulli, ImageF* HWY_RESTRICT diffs,
+                    Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+                diffs, block_diff_ac, c);
+}
+
+void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) {
+  const size_t xsize = xyb.xsize();
+  const size_t ysize = xyb.ysize();
+  const float bias = mul * bias_arg;
+  const float sqrt_bias = sqrt(bias);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      // kBias makes sqrt behave more linearly.
+      row_out[x] = sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias;
+    }
+  }
+}
+
+// std::log(80.0) / std::log(255.0);
+constexpr float kIntensityTargetNormalizationHack = 0.79079917404f;
+static const float kInternalGoodQualityThreshold =
+    17.83f * kIntensityTargetNormalizationHack;
+static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
+
+void StoreMin3(const float v, float& min0, float& min1, float& min2) {
+  if (v < min2) {
+    if (v < min0) {
+      min2 = min1;
+      min1 = min0;
+      min0 = v;
+    } else if (v < min1) {
+      min2 = min1;
+      min1 = v;
+    } else {
+      min2 = v;
+    }
+  }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas area generally smooth, don't do masking.
+void FuzzyErosion(const ImageF& from, ImageF* to) {
+  const size_t xsize = from.xsize();
+  const size_t ysize = from.ysize();
+  static const int kStep = 3;
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      float min0 = from.Row(y)[x];
+      float min1 = 2 * min0;
+      float min2 = min1;
+      if (x >= kStep) {
+        float v = from.Row(y)[x - kStep];
+        StoreMin3(v, min0, min1, min2);
+        if (y >= kStep) {
+          float v = from.Row(y - kStep)[x - kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+        if (y < ysize - kStep) {
+          float v = from.Row(y + kStep)[x - kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+      }
+      if (x < xsize - kStep) {
+        float v = from.Row(y)[x + kStep];
+        StoreMin3(v, min0, min1, min2);
+        if (y >= kStep) {
+          float v = from.Row(y - kStep)[x + kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+        if (y < ysize - kStep) {
+          float v = from.Row(y + kStep)[x + kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+      }
+      if (y >= kStep) {
+        float v = from.Row(y - kStep)[x];
+        StoreMin3(v, min0, min1, min2);
+      }
+      if (y < ysize - kStep) {
+        float v = from.Row(y + kStep)[x];
+        StoreMin3(v, min0, min1, min2);
+      }
+      to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2);
+    }
+  }
+}
+
+// Compute values of local frequency and dc masking based on the activity
+// in the two images. img_diff_ac may be null.
+void Mask(const ImageF& mask0, const ImageF& mask1,
+          const ButteraugliParams& params, BlurTemp* blur_temp,
+          ImageF* BUTTERAUGLI_RESTRICT mask,
+          ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  // Only X and Y components are involved in masking. B's influence
+  // is considered less important in the high frequency area, and we
+  // don't model masking from lower frequency signals.
+  const size_t xsize = mask0.xsize();
+  const size_t ysize = mask0.ysize();
+  *mask = ImageF(xsize, ysize);
+  static const float kMul = 6.19424080439;
+  static const float kBias = 12.61050594197;
+  static const float kRadius = 2.7;
+  ImageF diff0(xsize, ysize);
+  ImageF diff1(xsize, ysize);
+  ImageF blurred0(xsize, ysize);
+  ImageF blurred1(xsize, ysize);
+  DiffPrecompute(mask0, kMul, kBias, &diff0);
+  DiffPrecompute(mask1, kMul, kBias, &diff1);
+  Blur(diff0, kRadius, params, blur_temp, &blurred0);
+  FuzzyErosion(blurred0, &diff0);
+  Blur(diff1, kRadius, params, blur_temp, &blurred1);
+  FuzzyErosion(blurred1, &diff1);
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      mask->Row(y)[x] = diff0.Row(y)[x];
+      if (diff_ac != nullptr) {
+        static const float kMaskToErrorMul = 10.0;
+        float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x];
+        diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff;
+      }
+    }
+  }
+}
+
+// `diff_ac` may be null.
+void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1,
+                     const size_t xsize, const size_t ysize,
+                     const ButteraugliParams& params, Image3F* temp,
+                     BlurTemp* blur_temp, ImageF* BUTTERAUGLI_RESTRICT mask,
+                     ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  ImageF mask0(xsize, ysize);
+  ImageF mask1(xsize, ysize);
+  static const float muls[3] = {
+      2.5f,
+      0.4f,
+      0.4f,
+  };
+  // Silly and unoptimized approach here. TODO(jyrki): rework this.
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_y_hf0 = pi0.hf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_hf1 = pi1.hf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_uhf0 = pi0.uhf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_uhf1 = pi1.uhf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_hf0 = pi0.hf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_hf1 = pi1.hf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_uhf0 = pi0.uhf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_uhf1 = pi1.uhf[0].Row(y);
+    float* BUTTERAUGLI_RESTRICT row0 = mask0.Row(y);
+    float* BUTTERAUGLI_RESTRICT row1 = mask1.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float xdiff0 = (row_x_uhf0[x] + row_x_hf0[x]) * muls[0];
+      float xdiff1 = (row_x_uhf1[x] + row_x_hf1[x]) * muls[0];
+      float ydiff0 = row_y_uhf0[x] * muls[1] + row_y_hf0[x] * muls[2];
+      float ydiff1 = row_y_uhf1[x] * muls[1] + row_y_hf1[x] * muls[2];
+      row0[x] = xdiff0 * xdiff0 + ydiff0 * ydiff0;
+      row0[x] = sqrt(row0[x]);
+      row1[x] = xdiff1 * xdiff1 + ydiff1 * ydiff1;
+      row1[x] = sqrt(row1[x]);
+    }
+  }
+  Mask(mask0, mask1, params, blur_temp, mask, diff_ac);
+}
+
+double MaskY(double delta) {
+  static const double offset = 0.829591754942;
+  static const double scaler = 0.451936922203;
+  static const double mul = 2.5485944793;
+  const double c = mul / ((scaler * delta) + offset);
+  const double retval = kGlobalScale * (1.0 + c);
+  return retval * retval;
+}
+
+double MaskDcY(double delta) {
+  static const double offset = 0.20025578522;
+  static const double scaler = 3.87449418804;
+  static const double mul = 0.505054525019;
+  const double c = mul / ((scaler * delta) + offset);
+  const double retval = kGlobalScale * (1.0 + c);
+  return retval * retval;
+}
+
+inline float MaskColor(const float color[3], const float mask) {
+  return color[0] * mask + color[1] * mask + color[2] * mask;
+}
+
+// Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks}
+void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc,
+                              const Image3F& block_diff_ac, float xmul,
+                              ImageF* result) {
+  JXL_CHECK(SameSize(mask, *result));
+  size_t xsize = mask.xsize();
+  size_t ysize = mask.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_out = result->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float val = mask.Row(y)[x];
+      float maskval = MaskY(val);
+      float dc_maskval = MaskDcY(val);
+      float diff_dc[3];
+      float diff_ac[3];
+      for (int i = 0; i < 3; ++i) {
+        diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x];
+        diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x];
+      }
+      diff_ac[0] *= xmul;
+      diff_dc[0] *= xmul;
+      row_out[x] =
+          sqrt(MaskColor(diff_dc, dc_maskval) + MaskColor(diff_ac, maskval));
+    }
+  }
+}
+
+// Adds weighted L2 difference between i0 and i1 to diffmap.
+static void L2Diff(const ImageF& i0, const ImageF& i1, const float w,
+                   Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w == 0) return;
+
+  const HWY_FULL(float) d;
+  const auto weight = Set(d, w);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
+      const auto diff2 = Mul(diff, diff);
+      const auto prev = Load(d, row_diff + x);
+      Store(MulAdd(diff2, weight, prev), d, row_diff + x);
+    }
+  }
+}
+
+// Initializes diffmap to the weighted L2 difference between i0 and i1.
+static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w,
+                      Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w == 0) return;
+
+  const HWY_FULL(float) d;
+  const auto weight = Set(d, w);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
+      const auto diff2 = Mul(diff, diff);
+      Store(Mul(diff2, weight), d, row_diff + x);
+    }
+  }
+}
+
+// i0 is the original image.
+// i1 is the deformed copy.
+static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1,
+                             float w_0lt1,
+                             Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w_0gt1 == 0 && w_0lt1 == 0) {
+    return;
+  }
+
+  const HWY_FULL(float) d;
+  const auto vw_0gt1 = Set(d, w_0gt1 * 0.8);
+  const auto vw_0lt1 = Set(d, w_0lt1 * 0.8);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto val0 = Load(d, row0 + x);
+      const auto val1 = Load(d, row1 + x);
+
+      // Primary symmetric quadratic objective.
+      const auto diff = Sub(val0, val1);
+      auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x));
+
+      // Secondary half-open quadratic objectives.
+      const auto fabs0 = Abs(val0);
+      const auto too_small = Mul(Set(d, 0.4), fabs0);
+      const auto too_big = fabs0;
+
+      const auto if_neg = IfThenElse(
+          Gt(val1, Neg(too_small)), Add(val1, too_small),
+          IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big)));
+      const auto if_pos =
+          IfThenElse(Lt(val1, too_small), Sub(too_small, val1),
+                     IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big)));
+      const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos);
+      total = MulAdd(vw_0lt1, Mul(v, v), total);
+      Store(total, d, row_diff + x);
+    }
+  }
+}
+
+// A simple HDR compatible gamma function.
+template <class DF, class V>
+V Gamma(const DF df, V v) {
+  // ln(2) constant folded in because we want std::log but have FastLog2f.
+  const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f);
+  const auto kRetAdd = Set(df, -23.16046239805755);
+  // This should happen rarely, but may lead to a NaN in log, which is
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  v = ZeroIfNegative(v);
+
+  const auto biased = Add(v, Set(df, 9.9710635769299145));
+  const auto log = FastLog2f(df, biased);
+  // We could fold this into a custom Log2 polynomial, but there would be
+  // relatively little gain.
+  return MulAdd(kRetMul, log, kRetAdd);
+}
+
+template <bool Clamp, class DF, class V>
+BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1,
+                                        const V& in2, V* JXL_RESTRICT out0,
+                                        V* JXL_RESTRICT out1,
+                                        V* JXL_RESTRICT out2) {
+  // https://en.wikipedia.org/wiki/Photopsin absorbance modeling.
+  static const double mixi0 = 0.29956550340058319;
+  static const double mixi1 = 0.63373087833825936;
+  static const double mixi2 = 0.077705617820981968;
+  static const double mixi3 = 1.7557483643287353;
+  static const double mixi4 = 0.22158691104574774;
+  static const double mixi5 = 0.69391388044116142;
+  static const double mixi6 = 0.0987313588422;
+  static const double mixi7 = 1.7557483643287353;
+  static const double mixi8 = 0.02;
+  static const double mixi9 = 0.02;
+  static const double mixi10 = 0.20480129041026129;
+  static const double mixi11 = 12.226454707163354;
+
+  const V mix0 = Set(df, mixi0);
+  const V mix1 = Set(df, mixi1);
+  const V mix2 = Set(df, mixi2);
+  const V mix3 = Set(df, mixi3);
+  const V mix4 = Set(df, mixi4);
+  const V mix5 = Set(df, mixi5);
+  const V mix6 = Set(df, mixi6);
+  const V mix7 = Set(df, mixi7);
+  const V mix8 = Set(df, mixi8);
+  const V mix9 = Set(df, mixi9);
+  const V mix10 = Set(df, mixi10);
+  const V mix11 = Set(df, mixi11);
+
+  *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3)));
+  *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7)));
+  *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11)));
+
+  if (Clamp) {
+    *out0 = Max(*out0, mix3);
+    *out1 = Max(*out1, mix7);
+    *out2 = Max(*out2, mix11);
+  }
+}
+
+// `blurred` is a temporary image used inside this function and not returned.
+Image3F OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params,
+                           Image3F* blurred, BlurTemp* blur_temp) {
+  Image3F xyb(rgb.xsize(), rgb.ysize());
+  const double kSigma = 1.2;
+  Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0));
+  Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1));
+  Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2));
+  const HWY_FULL(float) df;
+  const auto intensity_target_multiplier = Set(df, params.intensity_target);
+  for (size_t y = 0; y < rgb.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_r =
+        blurred->ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_g =
+        blurred->ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_b =
+        blurred->ConstPlaneRow(2, y);
+    float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y);
+    const auto min = Set(df, 1e-4f);
+    for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) {
+      auto sensitivity0 = Undefined(df);
+      auto sensitivity1 = Undefined(df);
+      auto sensitivity2 = Undefined(df);
+      {
+        // Calculate sensitivity based on the smoothed image gamma derivative.
+        auto pre_mixed0 = Undefined(df);
+        auto pre_mixed1 = Undefined(df);
+        auto pre_mixed2 = Undefined(df);
+        OpsinAbsorbance<true>(
+            df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier),
+            Mul(Load(df, row_blurred_g + x), intensity_target_multiplier),
+            Mul(Load(df, row_blurred_b + x), intensity_target_multiplier),
+            &pre_mixed0, &pre_mixed1, &pre_mixed2);
+        pre_mixed0 = Max(pre_mixed0, min);
+        pre_mixed1 = Max(pre_mixed1, min);
+        pre_mixed2 = Max(pre_mixed2, min);
+        sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0);
+        sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1);
+        sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2);
+        sensitivity0 = Max(sensitivity0, min);
+        sensitivity1 = Max(sensitivity1, min);
+        sensitivity2 = Max(sensitivity2, min);
+      }
+      auto cur_mixed0 = Undefined(df);
+      auto cur_mixed1 = Undefined(df);
+      auto cur_mixed2 = Undefined(df);
+      OpsinAbsorbance<false>(
+          df, Mul(Load(df, row_r + x), intensity_target_multiplier),
+          Mul(Load(df, row_g + x), intensity_target_multiplier),
+          Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0,
+          &cur_mixed1, &cur_mixed2);
+      cur_mixed0 = Mul(cur_mixed0, sensitivity0);
+      cur_mixed1 = Mul(cur_mixed1, sensitivity1);
+      cur_mixed2 = Mul(cur_mixed2, sensitivity2);
+      // This is a kludge. The negative values should be zeroed away before
+      // blurring. Ideally there would be no negative values in the first place.
+      const auto min01 = Set(df, 1.7557483643287353f);
+      const auto min2 = Set(df, 12.226454707163354f);
+      cur_mixed0 = Max(cur_mixed0, min01);
+      cur_mixed1 = Max(cur_mixed1, min01);
+      cur_mixed2 = Max(cur_mixed2, min2);
+
+      Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x);
+      Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x);
+      Store(cur_mixed2, df, row_out_b + x);
+    }
+  }
+  return xyb;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(SeparateFrequencies);       // Local function.
+HWY_EXPORT(MaskPsychoImage);           // Local function.
+HWY_EXPORT(L2DiffAsymmetric);          // Local function.
+HWY_EXPORT(L2Diff);                    // Local function.
+HWY_EXPORT(SetL2Diff);                 // Local function.
+HWY_EXPORT(CombineChannelsToDiffmap);  // Local function.
+HWY_EXPORT(MaltaDiffMap);              // Local function.
+HWY_EXPORT(MaltaDiffMapLF);            // Local function.
+HWY_EXPORT(OpsinDynamicsImage);        // Local function.
+
+#if BUTTERAUGLI_ENABLE_CHECKS
+
+static inline bool IsNan(const float x) {
+  uint32_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  const uint32_t bitmask_exp = 0x7F800000;
+  return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
+}
+
+static inline bool IsNan(const double x) {
+  uint64_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
+         (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
+}
+
+static inline void CheckImage(const ImageF& image, const char* name) {
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      if (IsNan(row[x])) {
+        printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS
+               ")\n",
+               name, x, y, image.xsize(), image.ysize());
+        exit(1);
+      }
+    }
+  }
+}
+
+#define CHECK_NAN(x, str)                \
+  do {                                   \
+    if (IsNan(x)) {                      \
+      printf("%d: %s\n", __LINE__, str); \
+      abort();                           \
+    }                                    \
+  } while (0)
+
+#define CHECK_IMAGE(image, name) CheckImage(image, name)
+
+#else  // BUTTERAUGLI_ENABLE_CHECKS
+
+#define CHECK_NAN(x, str)
+#define CHECK_IMAGE(image, name)
+
+#endif  // BUTTERAUGLI_ENABLE_CHECKS
+
+// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at
+// multiresolution.
+static Image3F SubSample2x(const Image3F& in) {
+  size_t xs = (in.xsize() + 1) / 2;
+  size_t ys = (in.ysize() + 1) / 2;
+  Image3F retval(xs, ys);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ys; ++y) {
+      for (size_t x = 0; x < xs; ++x) {
+        retval.PlaneRow(c, y)[x] = 0;
+      }
+    }
+  }
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < in.ysize(); ++y) {
+      for (size_t x = 0; x < in.xsize(); ++x) {
+        retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x];
+      }
+    }
+    if ((in.xsize() & 1) != 0) {
+      for (size_t y = 0; y < retval.ysize(); ++y) {
+        size_t last_column = retval.xsize() - 1;
+        retval.PlaneRow(c, y)[last_column] *= 2.0f;
+      }
+    }
+    if ((in.ysize() & 1) != 0) {
+      for (size_t x = 0; x < retval.xsize(); ++x) {
+        size_t last_row = retval.ysize() - 1;
+        retval.PlaneRow(c, last_row)[x] *= 2.0f;
+      }
+    }
+  }
+  return retval;
+}
+
+// Supersample src by 2x and add it to dest.
+static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) {
+  for (size_t y = 0; y < dest.ysize(); ++y) {
+    for (size_t x = 0; x < dest.xsize(); ++x) {
+      // There will be less errors from the more averaged images.
+      // We take it into account to some extent using a scaler.
+      static const double kHeuristicMixingValue = 0.3;
+      dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w;
+      dest.Row(y)[x] += w * src.Row(y / 2)[x / 2];
+    }
+  }
+}
+
+Image3F* ButteraugliComparator::Temp() const {
+  bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel);
+  JXL_ASSERT(!was_in_use);
+  (void)was_in_use;
+  return &temp_;
+}
+
+void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); }
+
+ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0,
+                                             const ButteraugliParams& params)
+    : xsize_(rgb0.xsize()),
+      ysize_(rgb0.ysize()),
+      params_(params),
+      temp_(xsize_, ysize_) {
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+
+  Image3F xyb0 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(rgb0, params, Temp(),
+                                                          &blur_temp_);
+  ReleaseTemp();
+  HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+  (xsize_, ysize_, params_, &blur_temp_, xyb0, pi0_);
+
+  // Awful recursive construction of samples of different resolution.
+  // This is an after-thought and possibly somewhat parallel in
+  // functionality with the PsychoImage multi-resolution approach.
+  sub_.reset(new ButteraugliComparator(SubSample2x(rgb0), params));
+}
+
+void ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const {
+  HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+  (pi0_, pi0_, xsize_, ysize_, params_, Temp(), &blur_temp_, mask, nullptr);
+  ReleaseTemp();
+}
+
+void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const {
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&result);
+    return;
+  }
+  const Image3F xyb1 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+      rgb1, params_, Temp(), &blur_temp_);
+  ReleaseTemp();
+  DiffmapOpsinDynamicsImage(xyb1, result);
+  if (sub_) {
+    if (sub_->xsize_ < 8 || sub_->ysize_ < 8) {
+      return;
+    }
+    const Image3F sub_xyb = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+        SubSample2x(rgb1), params_, sub_->Temp(), &sub_->blur_temp_);
+    sub_->ReleaseTemp();
+    ImageF subresult;
+    sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult);
+    AddSupersampled2x(subresult, 0.5, result);
+  }
+}
+
+void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1,
+                                                      ImageF& result) const {
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&result);
+    return;
+  }
+  PsychoImage pi1;
+  HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+  (xsize_, ysize_, params_, &blur_temp_, xyb1, pi1);
+  result = ImageF(xsize_, ysize_);
+  DiffmapPsychoImage(pi1, result);
+}
+
+namespace {
+
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                  const double w_0lt1, const double norm1,
+                  ImageF* HWY_RESTRICT diffs,
+                  Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  const double len = 3.75;
+  static const double mulli = 0.39905817637;
+  HWY_DYNAMIC_DISPATCH(MaltaDiffMap)
+  (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                    const double w_0lt1, const double norm1,
+                    ImageF* HWY_RESTRICT diffs,
+                    Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  const double len = 3.75;
+  static const double mulli = 0.611612573796;
+  HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)
+  (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+}  // namespace
+
+void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1,
+                                               ImageF& diffmap) const {
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&diffmap);
+    return;
+  }
+
+  const float hf_asymmetry_ = params_.hf_asymmetry;
+  const float xmul_ = params_.xmul;
+
+  ImageF diffs(xsize_, ysize_);
+  Image3F block_diff_ac(xsize_, ysize_);
+  ZeroFillImage(&block_diff_ac);
+  static const double wUhfMalta = 1.10039032555;
+  static const double norm1Uhf = 71.7800275169;
+  MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_,
+               wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1);
+
+  static const double wUhfMaltaX = 173.5;
+  static const double norm1UhfX = 5.0;
+  MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_,
+               wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac,
+               0);
+
+  static const double wHfMalta = 18.7237414387;
+  static const double norm1Hf = 4498534.45232;
+  MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_),
+                 wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs,
+                 &block_diff_ac, 1);
+
+  static const double wHfMaltaX = 6923.99476109;
+  static const double norm1HfX = 8051.15833247;
+  MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_),
+                 wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs,
+                 &block_diff_ac, 0);
+
+  static const double wMfMalta = 37.0819870399;
+  static const double norm1Mf = 130262059.556;
+  MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf,
+                 &diffs, &block_diff_ac, 1);
+
+  static const double wMfMaltaX = 8246.75321353;
+  static const double norm1MfX = 1009002.70582;
+  MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX,
+                 norm1MfX, &diffs, &block_diff_ac, 0);
+
+  static const double wmul[9] = {
+      400.0,         1.50815703118,  0,
+      2150.0,        10.6195433239,  16.2176043152,
+      29.2353797994, 0.844626970982, 0.703646627719,
+  };
+  Image3F block_diff_dc(xsize_, ysize_);
+  for (size_t c = 0; c < 3; ++c) {
+    if (c < 2) {  // No blue channel error accumulated at HF.
+      HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric)
+      (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_,
+       &block_diff_ac, c);
+    }
+    HWY_DYNAMIC_DISPATCH(L2Diff)
+    (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c);
+    HWY_DYNAMIC_DISPATCH(SetL2Diff)
+    (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c);
+  }
+
+  ImageF mask;
+  HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+  (pi0_, pi1, xsize_, ysize_, params_, Temp(), &blur_temp_, &mask,
+   &block_diff_ac.Plane(1));
+  ReleaseTemp();
+
+  HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)
+  (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap);
+}
+
+double ButteraugliScoreFromDiffmap(const ImageF& diffmap,
+                                   const ButteraugliParams* params) {
+  float retval = 0.0f;
+  for (size_t y = 0; y < diffmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y);
+    for (size_t x = 0; x < diffmap.xsize(); ++x) {
+      retval = std::max(retval, row[x]);
+    }
+  }
+  return retval;
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+                        double hf_asymmetry, double xmul, ImageF& diffmap) {
+  ButteraugliParams params;
+  params.hf_asymmetry = hf_asymmetry;
+  params.xmul = xmul;
+  return ButteraugliDiffmap(rgb0, rgb1, params, diffmap);
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+                        const ButteraugliParams& params, ImageF& diffmap) {
+  const size_t xsize = rgb0.xsize();
+  const size_t ysize = rgb0.ysize();
+  if (xsize < 1 || ysize < 1) {
+    return JXL_FAILURE("Zero-sized image");
+  }
+  if (!SameSize(rgb0, rgb1)) {
+    return JXL_FAILURE("Size mismatch");
+  }
+  static const int kMax = 8;
+  if (xsize < kMax || ysize < kMax) {
+    // Butteraugli values for small (where xsize or ysize is smaller
+    // than 8 pixels) images are non-sensical, but most likely it is
+    // less disruptive to try to compute something than just give up.
+    // Temporarily extend the borders of the image to fit 8 x 8 size.
+    size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0;
+    size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0;
+    size_t xscaled = std::max<size_t>(kMax, xsize);
+    size_t yscaled = std::max<size_t>(kMax, ysize);
+    Image3F scaled0(xscaled, yscaled);
+    Image3F scaled1(xscaled, yscaled);
+    for (int i = 0; i < 3; ++i) {
+      for (size_t y = 0; y < yscaled; ++y) {
+        for (size_t x = 0; x < xscaled; ++x) {
+          size_t x2 =
+              std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0);
+          size_t y2 =
+              std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0);
+          scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2];
+          scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2];
+        }
+      }
+    }
+    ImageF diffmap_scaled;
+    const bool ok =
+        ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled);
+    diffmap = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder];
+      }
+    }
+    return ok;
+  }
+  ButteraugliComparator butteraugli(rgb0, params);
+  butteraugli.Diffmap(rgb1, diffmap);
+  return true;
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+                          float hf_asymmetry, float xmul, ImageF& diffmap,
+                          double& diffvalue) {
+  ButteraugliParams params;
+  params.hf_asymmetry = hf_asymmetry;
+  params.xmul = xmul;
+  return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue);
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+                          const ButteraugliParams& params, ImageF& diffmap,
+                          double& diffvalue) {
+  if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) {
+    return false;
+  }
+  diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
+  return true;
+}
+
+double ButteraugliFuzzyClass(double score) {
+  static const double fuzzy_width_up = 4.8;
+  static const double fuzzy_width_down = 4.8;
+  static const double m0 = 2.0;
+  static const double scaler = 0.7777;
+  double val;
+  if (score < 1.0) {
+    // val in [scaler .. 2.0]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down));
+    val -= 1.0;           // from [1 .. 2] to [0 .. 1]
+    val *= 2.0 - scaler;  // from [0 .. 1] to [0 .. 2.0 - scaler]
+    val += scaler;        // from [0 .. 2.0 - scaler] to [scaler .. 2.0]
+  } else {
+    // val in [0 .. scaler]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up));
+    val *= scaler;
+  }
+  return val;
+}
+
+// #define PRINT_OUT_NORMALIZATION
+
+double ButteraugliFuzzyInverse(double seek) {
+  double pos = 0;
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (double range = 1.0; range >= 1e-10; range *= 0.5) {
+    double cur = ButteraugliFuzzyClass(pos);
+    if (cur < seek) {
+      pos -= range;
+    } else {
+      pos += range;
+    }
+  }
+#ifdef PRINT_OUT_NORMALIZATION
+  if (seek == 1.0) {
+    fprintf(stderr, "Fuzzy inverse %g\n", pos);
+  }
+#endif
+  return pos;
+}
+
+#ifdef PRINT_OUT_NORMALIZATION
+static double print_out_normalization = ButteraugliFuzzyInverse(1.0);
+#endif
+
+namespace {
+
+void ScoreToRgb(double score, double good_threshold, double bad_threshold,
+                float rgb[3]) {
+  double heatmap[12][3] = {
+      {0, 0, 0},       {0, 0, 1},
+      {0, 1, 1},       {0, 1, 0},  // Good level
+      {1, 1, 0},       {1, 0, 0},  // Bad level
+      {1, 0, 1},       {0.5, 0.5, 1.0},
+      {1.0, 0.5, 0.5},  // Pastel colors for the very bad quality range.
+      {1.0, 1.0, 0.5}, {1, 1, 1},
+      {1, 1, 1},  // Last color repeated to have a solid range of white.
+  };
+  if (score < good_threshold) {
+    score = (score / good_threshold) * 0.3;
+  } else if (score < bad_threshold) {
+    score = 0.3 +
+            (score - good_threshold) / (bad_threshold - good_threshold) * 0.15;
+  } else {
+    score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5;
+  }
+  static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]);
+  score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0),
+                           kTableSize - 2);
+  int ix = static_cast<int>(score);
+  ix = std::min(std::max(0, ix), kTableSize - 2);  // Handle NaN
+  double mix = score - ix;
+  for (int i = 0; i < 3; ++i) {
+    double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i];
+    rgb[i] = pow(v, 0.5);
+  }
+}
+
+}  // namespace
+
+Image3F CreateHeatMapImage(const ImageF& distmap, double good_threshold,
+                           double bad_threshold) {
+  Image3F heatmap(distmap.xsize(), distmap.ysize());
+  for (size_t y = 0; y < distmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y);
+    for (size_t x = 0; x < distmap.xsize(); ++x) {
+      const float d = row_distmap[x];
+      float rgb[3];
+      ScoreToRgb(d, good_threshold, bad_threshold, rgb);
+      row_h0[x] = rgb[0];
+      row_h1[x] = rgb[1];
+      row_h2[x] = rgb[2];
+    }
+  }
+  return heatmap;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h
new file mode 100644
index 0000000000..652b9528c4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h
@@ -0,0 +1,209 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+
+#ifndef LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+#define LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atomic>
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+#define BUTTERAUGLI_ENABLE_CHECKS 0
+#define BUTTERAUGLI_RESTRICT JXL_RESTRICT
+
+// This is the main interface to butteraugli image similarity
+// analysis function.
+
+namespace jxl {
+
+struct ButteraugliParams {
+  // Multiplier for penalizing new HF artifacts more than blurring away
+  // features. 1.0=neutral.
+  float hf_asymmetry = 1.0f;
+
+  // Multiplier for the psychovisual difference in the X channel.
+  float xmul = 1.0f;
+
+  // Number of nits that correspond to 1.0f input values.
+  float intensity_target = 80.0f;
+};
+
+// ButteraugliInterface defines the public interface for butteraugli.
+//
+// It calculates the difference between rgb0 and rgb1.
+//
+// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
+// the red image for c == 0, green for c == 1, blue for c == 2. Location index
+// px is calculated as y * xsize + x.
+//
+// Value of pixels of images rgb0 and rgb1 need to be represented as raw
+// intensity. Most image formats store gamma corrected intensity in pixel
+// values. This gamma correction has to be removed, by applying the following
+// function to values in the 0-1 range:
+// butteraugli_val = pow(input_val, gamma);
+// A typical value of gamma is 2.2. It is usually stored in the image header.
+// Take care not to confuse that value with its inverse. The gamma value should
+// be always greater than one.
+// Butteraugli does not work as intended if the caller does not perform
+// gamma correction.
+//
+// hf_asymmetry is a multiplier for penalizing new HF artifacts more than
+// blurring away features (1.0 -> neutral).
+//
+// diffmap will contain an image of the size xsize * ysize, containing
+// localized differences for values px (indexed with the px the same as rgb0
+// and rgb1). diffvalue will give a global score of similarity.
+//
+// A diffvalue smaller than kButteraugliGood indicates that images can be
+// observed as the same image.
+// diffvalue larger than kButteraugliBad indicates that a difference between
+// the images can be observed.
+// A diffvalue between kButteraugliGood and kButteraugliBad indicates that
+// a subtle difference can be observed between the images.
+//
+// Returns true on success.
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+                          const ButteraugliParams &params, ImageF &diffmap,
+                          double &diffvalue);
+
+// Deprecated (calls the previous function)
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+                          float hf_asymmetry, float xmul, ImageF &diffmap,
+                          double &diffvalue);
+
+// Converts the butteraugli score into fuzzy class values that are continuous
+// at the class boundary. The class boundary location is based on human
+// raters, but the slope is arbitrary. Particularly, it does not reflect
+// the expectation value of probabilities of the human raters. It is just
+// expected that a smoother class boundary will allow for higher-level
+// optimization algorithms to work faster.
+//
+// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
+// scoring is fuzzy, a butteraugli score of 0.96 would return a class of
+// around 1.9.
+double ButteraugliFuzzyClass(double score);
+
+// Input values should be in range 0 (bad) to 2 (good). Use
+// kButteraugliNormalization as normalization.
+double ButteraugliFuzzyInverse(double seek);
+
+// Implementation details, don't use anything below or your code will
+// break in the future.
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_INLINE __forceinline
+#else
+#define BUTTERAUGLI_INLINE inline
+#endif
+
+#ifdef __clang__
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif defined(__GNUC__)
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 1
+#else
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 0
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if BUTTERAUGLI_HAS_ASSUME_ALIGNED
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) \
+  __builtin_assume_aligned((ptr), (align))
+#else
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) (ptr)
+#endif  // BUTTERAUGLI_HAS_ASSUME_ALIGNED
+
+struct PsychoImage {
+  ImageF uhf[2];  // XY
+  ImageF hf[2];   // XY
+  Image3F mf;     // XYB
+  Image3F lf;     // XYB
+};
+
+// Blur needs a transposed image.
+// Hold it here and only allocate on demand to reduce memory usage.
+struct BlurTemp {
+  ImageF *GetTransposed(const ImageF &in) {
+    if (transposed_temp.xsize() == 0) {
+      transposed_temp = ImageF(in.ysize(), in.xsize());
+    }
+    return &transposed_temp;
+  }
+
+  ImageF transposed_temp;
+};
+
+class ButteraugliComparator {
+ public:
+  // Butteraugli is calibrated at xmul = 1.0. We add a multiplier here so that
+  // we can test the hypothesis that a higher weighing of the X channel would
+  // improve results at higher Butteraugli values.
+  ButteraugliComparator(const Image3F &rgb0, const ButteraugliParams &params);
+  virtual ~ButteraugliComparator() = default;
+
+  // Computes the butteraugli map between the original image given in the
+  // constructor and the distorted image give here.
+  void Diffmap(const Image3F &rgb1, ImageF &result) const;
+
+  // Same as above, but OpsinDynamicsImage() was already applied.
+  void DiffmapOpsinDynamicsImage(const Image3F &xyb1, ImageF &result) const;
+
+  // Same as above, but the frequency decomposition was already applied.
+  void DiffmapPsychoImage(const PsychoImage &pi1, ImageF &diffmap) const;
+
+  void Mask(ImageF *BUTTERAUGLI_RESTRICT mask) const;
+
+ private:
+  Image3F *Temp() const;
+  void ReleaseTemp() const;
+
+  const size_t xsize_;
+  const size_t ysize_;
+  ButteraugliParams params_;
+  PsychoImage pi0_;
+
+  // Shared temporary image storage to reduce the number of allocations;
+  // obtained via Temp(), must call ReleaseTemp when no longer needed.
+  mutable Image3F temp_;
+  mutable std::atomic_flag temp_in_use_ = ATOMIC_FLAG_INIT;
+
+  mutable BlurTemp blur_temp_;
+  std::unique_ptr<ButteraugliComparator> sub_;
+};
+
+// Deprecated.
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+                        double hf_asymmetry, double xmul, ImageF &diffmap);
+
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+                        const ButteraugliParams &params, ImageF &diffmap);
+
+double ButteraugliScoreFromDiffmap(const ImageF &diffmap,
+                                   const ButteraugliParams *params = nullptr);
+
+// Generate rgb-representation of the distance between two images.
+Image3F CreateHeatMapImage(const ImageF &distmap, double good_threshold,
+                           double bad_threshold);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc b/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc
new file mode 100644
index 0000000000..17d7ef6643
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/byte_order.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ByteOrderTest, TestRoundTripBE16) {
+  const uint32_t in = 0x1234;
+  uint8_t buf[2];
+  StoreBE16(in, buf);
+  EXPECT_EQ(in, LoadBE16(buf));
+  EXPECT_NE(in, LoadLE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE16) {
+  const uint32_t in = 0x1234;
+  uint8_t buf[2];
+  StoreLE16(in, buf);
+  EXPECT_EQ(in, LoadLE16(buf));
+  EXPECT_NE(in, LoadBE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripBE32) {
+  const uint32_t in = 0xFEDCBA98u;
+  uint8_t buf[4];
+  StoreBE32(in, buf);
+  EXPECT_EQ(in, LoadBE32(buf));
+  EXPECT_NE(in, LoadLE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE32) {
+  const uint32_t in = 0xFEDCBA98u;
+  uint8_t buf[4];
+  StoreLE32(in, buf);
+  EXPECT_EQ(in, LoadLE32(buf));
+  EXPECT_NE(in, LoadBE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE64) {
+  const uint64_t in = 0xFEDCBA9876543210ull;
+  uint8_t buf[8];
+  StoreLE64(in, buf);
+  EXPECT_EQ(in, LoadLE64(buf));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc
new file mode 100644
index 0000000000..63d21cbb4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/chroma_from_luma.h"
+
+namespace jxl {
+
+ColorCorrelationMap::ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB)
+    : ytox_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)),
+      ytob_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)) {
+  ZeroFillImage(&ytox_map);
+  ZeroFillImage(&ytob_map);
+  if (!XYB) {
+    base_correlation_b_ = 0;
+  }
+  RecomputeDCFactors();
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h
new file mode 100644
index 0000000000..9a7f3d45bc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h
@@ -0,0 +1,147 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+// Tile is the rectangular grid of blocks that share color correlation
+// parameters ("factor_x/b" such that residual_b = blue - Y * factor_b).
+static constexpr size_t kColorTileDim = 64;
+
+static_assert(kColorTileDim % kBlockDim == 0,
+              "Color tile dim should be divisible by block dim");
+static constexpr size_t kColorTileDimInBlocks = kColorTileDim / kBlockDim;
+
+static_assert(kGroupDimInBlocks % kColorTileDimInBlocks == 0,
+              "Group dim should be divisible by color tile dim");
+
+static constexpr uint8_t kDefaultColorFactor = 84;
+
+// JPEG DCT coefficients are at most 1024. CfL constants are at most 127, and
+// the ratio of two entries in a JPEG quantization table is at most 255. Thus,
+// since the CfL denominator is 84, this leaves 12 bits of mantissa to be used.
+// For extra caution, we use 11.
+static constexpr uint8_t kCFLFixedPointPrecision = 11;
+
+static constexpr U32Enc kColorFactorDist(Val(kDefaultColorFactor), Val(256),
+                                         BitsOffset(8, 2), BitsOffset(16, 258));
+
+struct ColorCorrelationMap {
+  ColorCorrelationMap() = default;
+  // xsize/ysize are in pixels
+  // set XYB=false to do something close to no-op cmap (needed for now since
+  // cmap is mandatory)
+  ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB = true);
+
+  float YtoXRatio(int32_t x_factor) const {
+    return base_correlation_x_ + x_factor * color_scale_;
+  }
+
+  float YtoBRatio(int32_t b_factor) const {
+    return base_correlation_b_ + b_factor * color_scale_;
+  }
+
+  Status DecodeDC(BitReader* br) {
+    if (br->ReadFixedBits<1>() == 1) {
+      // All default.
+      return true;
+    }
+    SetColorFactor(U32Coder::Read(kColorFactorDist, br));
+    JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_x_));
+    if (std::abs(base_correlation_x_) > 4.0f) {
+      return JXL_FAILURE("Base X correlation is out of range");
+    }
+    JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_b_));
+    if (std::abs(base_correlation_b_) > 4.0f) {
+      return JXL_FAILURE("Base B correlation is out of range");
+    }
+    ytox_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+               std::numeric_limits<int8_t>::min();
+    ytob_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+               std::numeric_limits<int8_t>::min();
+    RecomputeDCFactors();
+    return true;
+  }
+
+  // We consider a CfL map to be JPEG-reconstruction-compatible if base
+  // correlation is 0, no DC correlation is used, and we use the default color
+  // factor.
+  bool IsJPEGCompatible() const {
+    return base_correlation_x_ == 0 && base_correlation_b_ == 0 &&
+           ytob_dc_ == 0 && ytox_dc_ == 0 &&
+           color_factor_ == kDefaultColorFactor;
+  }
+
+  int32_t RatioJPEG(int32_t factor) const {
+    return factor * (1 << kCFLFixedPointPrecision) / kDefaultColorFactor;
+  }
+
+  void SetColorFactor(uint32_t factor) {
+    color_factor_ = factor;
+    color_scale_ = 1.0f / color_factor_;
+    RecomputeDCFactors();
+  }
+
+  void SetYToBDC(int32_t ytob_dc) {
+    ytob_dc_ = ytob_dc;
+    RecomputeDCFactors();
+  }
+  void SetYToXDC(int32_t ytox_dc) {
+    ytox_dc_ = ytox_dc;
+    RecomputeDCFactors();
+  }
+
+  int32_t GetYToXDC() const { return ytox_dc_; }
+  int32_t GetYToBDC() const { return ytob_dc_; }
+  float GetColorFactor() const { return color_factor_; }
+  float GetBaseCorrelationX() const { return base_correlation_x_; }
+  float GetBaseCorrelationB() const { return base_correlation_b_; }
+
+  const float* DCFactors() const { return dc_factors_; }
+
+  void RecomputeDCFactors() {
+    dc_factors_[0] = YtoXRatio(ytox_dc_);
+    dc_factors_[2] = YtoBRatio(ytob_dc_);
+  }
+
+  ImageSB ytox_map;
+  ImageSB ytob_map;
+
+ private:
+  float dc_factors_[4] = {};
+  // range of factor: -1.51 to +1.52
+  uint32_t color_factor_ = kDefaultColorFactor;
+  float color_scale_ = 1.0f / color_factor_;
+  float base_correlation_x_ = 0.0f;
+  float base_correlation_b_ = kYToBRatio;
+  int32_t ytox_dc_ = 0;
+  int32_t ytob_dc_ = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CHROMA_FROM_LUMA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h b/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h
new file mode 100644
index 0000000000..9e48b5e937
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h
@@ -0,0 +1,116 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CODEC_IN_OUT_H_
+#define LIB_JXL_CODEC_IN_OUT_H_
+
+// Holds inputs/outputs for decoding/encoding images.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+
+// Optional text/EXIF metadata.
+struct Blobs {
+  std::vector<uint8_t> exif;
+  std::vector<uint8_t> iptc;
+  std::vector<uint8_t> jumbf;
+  std::vector<uint8_t> xmp;
+};
+
+// Holds a preview, a main image or one or more frames, plus the inputs/outputs
+// to/from decoding/encoding.
+class CodecInOut {
+ public:
+  CodecInOut() : preview_frame(&metadata.m) {
+    frames.reserve(1);
+    frames.emplace_back(&metadata.m);
+  }
+
+  // Move-only.
+  CodecInOut(CodecInOut&&) = default;
+  CodecInOut& operator=(CodecInOut&&) = default;
+
+  size_t LastStillFrame() const {
+    JXL_DASSERT(!frames.empty());
+    size_t last = 0;
+    for (size_t i = 0; i < frames.size(); i++) {
+      last = i;
+      if (frames[i].duration > 0) break;
+    }
+    return last;
+  }
+
+  ImageBundle& Main() { return frames[LastStillFrame()]; }
+  const ImageBundle& Main() const { return frames[LastStillFrame()]; }
+
+  // If c_current.IsGray(), all planes must be identical.
+  void SetFromImage(Image3F&& color, const ColorEncoding& c_current) {
+    Main().SetFromImage(std::move(color), c_current);
+    SetIntensityTarget(&this->metadata.m);
+    SetSize(Main().xsize(), Main().ysize());
+  }
+
+  void SetSize(size_t xsize, size_t ysize) {
+    JXL_CHECK(metadata.size.Set(xsize, ysize));
+  }
+
+  void CheckMetadata() const {
+    JXL_CHECK(metadata.m.bit_depth.bits_per_sample != 0);
+    JXL_CHECK(!metadata.m.color_encoding.ICC().empty());
+
+    if (preview_frame.xsize() != 0) preview_frame.VerifyMetadata();
+    JXL_CHECK(preview_frame.metadata() == &metadata.m);
+
+    for (const ImageBundle& ib : frames) {
+      ib.VerifyMetadata();
+      JXL_CHECK(ib.metadata() == &metadata.m);
+    }
+  }
+
+  size_t xsize() const { return metadata.size.xsize(); }
+  size_t ysize() const { return metadata.size.ysize(); }
+  void ShrinkTo(size_t xsize, size_t ysize) {
+    // preview is unaffected.
+    for (ImageBundle& ib : frames) {
+      ib.ShrinkTo(xsize, ysize);
+    }
+    SetSize(xsize, ysize);
+  }
+
+  // -- DECODER OUTPUT, ENCODER INPUT:
+
+  // Metadata stored into / retrieved from bitstreams.
+
+  Blobs blobs;
+
+  CodecMetadata metadata;  // applies to preview and all frames
+
+  // If metadata.have_preview:
+  ImageBundle preview_frame;
+
+  std::vector<ImageBundle> frames;  // size=1 if !metadata.have_animation
+
+  // If the image should be written to a JPEG, use this quality for encoding.
+  size_t jpeg_quality;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CODEC_IN_OUT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc b/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc
new file mode 100644
index 0000000000..75ad2f26f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+uint32_t CoeffOrderContext(uint32_t val) {
+  uint32_t token, nbits, bits;
+  HybridUintConfig(0, 0, 0).Encode(val, &token, &nbits, &bits);
+  return std::min(token, kPermutationContexts - 1);
+}
+
+namespace {
+Status ReadPermutation(size_t skip, size_t size, coeff_order_t* order,
+                       BitReader* br, ANSSymbolReader* reader,
+                       const std::vector<uint8_t>& context_map) {
+  std::vector<LehmerT> lehmer(size);
+  // temp space needs to be as large as the next power of 2, so doubling the
+  // allocated size is enough.
+  std::vector<uint32_t> temp(size * 2);
+  uint32_t end =
+      reader->ReadHybridUint(CoeffOrderContext(size), br, context_map) + skip;
+  if (end > size) {
+    return JXL_FAILURE("Invalid permutation size");
+  }
+  uint32_t last = 0;
+  for (size_t i = skip; i < end; ++i) {
+    lehmer[i] =
+        reader->ReadHybridUint(CoeffOrderContext(last), br, context_map);
+    last = lehmer[i];
+    if (lehmer[i] + i >= size) {
+      return JXL_FAILURE("Invalid lehmer code");
+    }
+  }
+  if (order == nullptr) return true;
+  DecodeLehmerCode(lehmer.data(), temp.data(), size, order);
+  return true;
+}
+
+}  // namespace
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+                         BitReader* br) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+  ANSSymbolReader reader(&code, br);
+  JXL_RETURN_IF_ERROR(
+      ReadPermutation(skip, size, order, br, &reader, context_map));
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("Invalid ANS stream");
+  }
+  return true;
+}
+
+namespace {
+
+Status DecodeCoeffOrder(AcStrategy acs, coeff_order_t* order, BitReader* br,
+                        ANSSymbolReader* reader,
+                        std::vector<coeff_order_t>& natural_order,
+                        const std::vector<uint8_t>& context_map) {
+  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+  const size_t size = kDCTBlockSize * llf;
+
+  JXL_RETURN_IF_ERROR(
+      ReadPermutation(llf, size, order, br, reader, context_map));
+  if (order == nullptr) return true;
+  for (size_t k = 0; k < size; ++k) {
+    order[k] = natural_order[order[k]];
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+                         coeff_order_t* order, BitReader* br) {
+  uint16_t computed = 0;
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  std::unique_ptr<ANSSymbolReader> reader;
+  std::vector<coeff_order_t> natural_order;
+  // Bitstream does not have histograms if no coefficient order is used.
+  if (used_orders != 0) {
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+    reader = make_unique<ANSSymbolReader>(&code, br);
+  }
+  uint32_t acs_mask = 0;
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    if ((used_acs & (1 << o)) == 0) continue;
+    acs_mask |= 1 << kStrategyOrder[o];
+  }
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    bool used = (acs_mask & (1 << ord)) != 0;
+
+    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+    const size_t size = kDCTBlockSize * llf;
+
+    if (used || (used_orders & (1 << ord))) {
+      if (natural_order.size() < size) natural_order.resize(size);
+      acs.ComputeNaturalCoeffOrder(natural_order.data());
+    }
+
+    if ((used_orders & (1 << ord)) == 0) {
+      // No need to set the default order if no ACS uses this order.
+      if (used) {
+        for (size_t c = 0; c < 3; c++) {
+          memcpy(&order[CoeffOrderOffset(ord, c)], natural_order.data(),
+                 size * sizeof(*order));
+        }
+      }
+    } else {
+      for (size_t c = 0; c < 3; c++) {
+        coeff_order_t* dest = used ? &order[CoeffOrderOffset(ord, c)] : nullptr;
+        JXL_RETURN_IF_ERROR(DecodeCoeffOrder(acs, dest, br, reader.get(),
+                                             natural_order, context_map));
+      }
+    }
+  }
+  if (used_orders && !reader->CheckANSFinalState()) {
+    return JXL_FAILURE("Invalid ANS stream");
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order.h b/third-party/libjxl/libjxl/lib/jxl/coeff_order.h
new file mode 100644
index 0000000000..fb32499f2f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_H_
+#define LIB_JXL_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+
+namespace jxl {
+
+class BitReader;
+
+// Those offsets get multiplied by kDCTBlockSize.
+static constexpr size_t kCoeffOrderOffset[] = {
+    0,    1,    2,    3,    4,    5,    6,    10,   14,   18,
+    34,   50,   66,   68,   70,   72,   76,   80,   84,   92,
+    100,  108,  172,  236,  300,  332,  364,  396,  652,  908,
+    1164, 1292, 1420, 1548, 2572, 3596, 4620, 5132, 5644, 6156,
+};
+static_assert(3 * kNumOrders + 1 ==
+                  sizeof(kCoeffOrderOffset) / sizeof(*kCoeffOrderOffset),
+              "Update this array when adding or removing order types.");
+
+static constexpr size_t CoeffOrderOffset(size_t order, size_t c) {
+  return kCoeffOrderOffset[3 * order + c] * kDCTBlockSize;
+}
+
+static constexpr size_t kCoeffOrderMaxSize =
+    kCoeffOrderOffset[3 * kNumOrders] * kDCTBlockSize;
+
+// Mapping from AC strategy to order bucket. Strategies with different natural
+// orders must have different buckets.
+constexpr uint8_t kStrategyOrder[] = {
+    0, 1, 1, 1, 2, 3, 4, 4, 5,  5,  6,  6,  1,  1,
+    1, 1, 1, 1, 7, 8, 8, 9, 10, 10, 11, 12, 12,
+};
+
+static_assert(AcStrategy::kNumValidStrategies ==
+                  sizeof(kStrategyOrder) / sizeof(*kStrategyOrder),
+              "Update this array when adding or removing AC strategies.");
+
+constexpr uint32_t kPermutationContexts = 8;
+
+uint32_t CoeffOrderContext(uint32_t val);
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+                         coeff_order_t* order, BitReader* br);
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+                         BitReader* br);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COEFF_ORDER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h b/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h
new file mode 100644
index 0000000000..26306575c1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_FWD_H_
+#define LIB_JXL_COEFF_ORDER_FWD_H_
+
+// Breaks circular dependency between ac_strategy and coeff_order.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Needs at least 16 bits. A 32-bit type speeds up DecodeAC by 2% at the cost of
+// more memory.
+using coeff_order_t = uint32_t;
+
+// Maximum number of orders to be used. Note that this needs to be multiplied by
+// the number of channels. One per "size class" (plus one extra for DCT8),
+// shared between transforms of size XxY and of size YxX.
+constexpr uint8_t kNumOrders = 13;
+
+// DCT coefficients are laid out in such a way that the number of rows of
+// coefficients is always the smaller coordinate.
+JXL_INLINE constexpr size_t CoefficientRows(size_t rows, size_t columns) {
+  return rows < columns ? rows : columns;
+}
+
+JXL_INLINE constexpr size_t CoefficientColumns(size_t rows, size_t columns) {
+  return rows < columns ? columns : rows;
+}
+
+JXL_INLINE void CoefficientLayout(size_t* JXL_RESTRICT rows,
+                                  size_t* JXL_RESTRICT columns) {
+  size_t r = *rows;
+  size_t c = *columns;
+  *rows = CoefficientRows(r, c);
+  *columns = CoefficientColumns(r, c);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COEFF_ORDER_FWD_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc b/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc
new file mode 100644
index 0000000000..6fa0775697
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <numeric>  // iota
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripPermutation(coeff_order_t* perm, coeff_order_t* out, size_t len,
+                          size_t* size) {
+  BitWriter writer;
+  EncodePermutation(perm, 0, len, &writer, 0, nullptr);
+  writer.ZeroPadToByte();
+  Status status = true;
+  {
+    BitReader reader(writer.GetSpan());
+    BitReaderScopedCloser closer(&reader, &status);
+    ASSERT_TRUE(DecodePermutation(0, len, out, &reader));
+  }
+  ASSERT_TRUE(status);
+  *size = writer.GetSpan().size();
+}
+
+enum Permutation { kIdentity, kFewSwaps, kFewSlides, kRandom };
+
+constexpr size_t kSwaps = 32;
+
+void TestPermutation(Permutation kind, size_t len) {
+  std::vector<coeff_order_t> perm(len);
+  std::iota(perm.begin(), perm.end(), 0);
+  Rng rng(0);
+  if (kind == kFewSwaps) {
+    for (size_t i = 0; i < kSwaps; i++) {
+      size_t a = rng.UniformU(0, len - 1);
+      size_t b = rng.UniformU(0, len - 1);
+      std::swap(perm[a], perm[b]);
+    }
+  }
+  if (kind == kFewSlides) {
+    for (size_t i = 0; i < kSwaps; i++) {
+      size_t a = rng.UniformU(0, len - 1);
+      size_t b = rng.UniformU(0, len - 1);
+      size_t from = std::min(a, b);
+      size_t to = std::max(a, b);
+      size_t start = perm[from];
+      for (size_t j = from; j < to; j++) {
+        perm[j] = perm[j + 1];
+      }
+      perm[to] = start;
+    }
+  }
+  if (kind == kRandom) {
+    rng.Shuffle(perm.data(), perm.size());
+  }
+  std::vector<coeff_order_t> out(len);
+  size_t size = 0;
+  RoundtripPermutation(perm.data(), out.data(), len, &size);
+  for (size_t idx = 0; idx < len; idx++) {
+    EXPECT_EQ(perm[idx], out[idx]);
+  }
+  printf("Encoded size: %" PRIuS "\n", size);
+}
+
+TEST(CoeffOrderTest, IdentitySmall) { TestPermutation(kIdentity, 256); }
+TEST(CoeffOrderTest, FewSlidesSmall) { TestPermutation(kFewSlides, 256); }
+TEST(CoeffOrderTest, FewSwapsSmall) { TestPermutation(kFewSwaps, 256); }
+TEST(CoeffOrderTest, RandomSmall) { TestPermutation(kRandom, 256); }
+
+TEST(CoeffOrderTest, IdentityMedium) { TestPermutation(kIdentity, 1 << 12); }
+TEST(CoeffOrderTest, FewSlidesMedium) { TestPermutation(kFewSlides, 1 << 12); }
+TEST(CoeffOrderTest, FewSwapsMedium) { TestPermutation(kFewSwaps, 1 << 12); }
+TEST(CoeffOrderTest, RandomMedium) { TestPermutation(kRandom, 1 << 12); }
+
+TEST(CoeffOrderTest, IdentityBig) { TestPermutation(kIdentity, 1 << 16); }
+TEST(CoeffOrderTest, FewSlidesBig) { TestPermutation(kFewSlides, 1 << 16); }
+TEST(CoeffOrderTest, FewSwapsBig) { TestPermutation(kFewSwaps, 1 << 16); }
+TEST(CoeffOrderTest, RandomBig) { TestPermutation(kRandom, 1 << 16); }
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc
new file mode 100644
index 0000000000..a59a9fd2ac
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc
@@ -0,0 +1,789 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <errno.h>
+
+#include <array>
+#include <cmath>
+
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/matrix_ops.h"
+
+namespace jxl {
+namespace {
+
+// Highest reasonable value for the gamma of a transfer curve.
+constexpr uint32_t kMaxGamma = 8192;
+
+// These strings are baked into Description - do not change.
+
+std::string ToString(ColorSpace color_space) {
+  switch (color_space) {
+    case ColorSpace::kRGB:
+      return "RGB";
+    case ColorSpace::kGray:
+      return "Gra";
+    case ColorSpace::kXYB:
+      return "XYB";
+    case ColorSpace::kUnknown:
+      return "CS?";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_UNREACHABLE("Invalid ColorSpace %u", static_cast<uint32_t>(color_space));
+}
+
+std::string ToString(WhitePoint white_point) {
+  switch (white_point) {
+    case WhitePoint::kD65:
+      return "D65";
+    case WhitePoint::kCustom:
+      return "Cst";
+    case WhitePoint::kE:
+      return "EER";
+    case WhitePoint::kDCI:
+      return "DCI";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_UNREACHABLE("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+std::string ToString(Primaries primaries) {
+  switch (primaries) {
+    case Primaries::kSRGB:
+      return "SRG";
+    case Primaries::k2100:
+      return "202";
+    case Primaries::kP3:
+      return "DCI";
+    case Primaries::kCustom:
+      return "Cst";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_UNREACHABLE("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+std::string ToString(TransferFunction transfer_function) {
+  switch (transfer_function) {
+    case TransferFunction::kSRGB:
+      return "SRG";
+    case TransferFunction::kLinear:
+      return "Lin";
+    case TransferFunction::k709:
+      return "709";
+    case TransferFunction::kPQ:
+      return "PeQ";
+    case TransferFunction::kHLG:
+      return "HLG";
+    case TransferFunction::kDCI:
+      return "DCI";
+    case TransferFunction::kUnknown:
+      return "TF?";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_UNREACHABLE("Invalid TransferFunction %u",
+                  static_cast<uint32_t>(transfer_function));
+}
+
+std::string ToString(RenderingIntent rendering_intent) {
+  switch (rendering_intent) {
+    case RenderingIntent::kPerceptual:
+      return "Per";
+    case RenderingIntent::kRelative:
+      return "Rel";
+    case RenderingIntent::kSaturation:
+      return "Sat";
+    case RenderingIntent::kAbsolute:
+      return "Abs";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_UNREACHABLE("Invalid RenderingIntent %u",
+                  static_cast<uint32_t>(rendering_intent));
+}
+
+static double F64FromCustomxyI32(const int32_t i) { return i * 1E-6; }
+static Status F64ToCustomxyI32(const double f, int32_t* JXL_RESTRICT i) {
+  if (!(-4 <= f && f <= 4)) {
+    return JXL_FAILURE("F64 out of bounds for CustomxyI32");
+  }
+  *i = static_cast<int32_t>(roundf(f * 1E6));
+  return true;
+}
+
+Status ConvertExternalToInternalWhitePoint(const JxlWhitePoint external,
+                                           WhitePoint* internal) {
+  switch (external) {
+    case JXL_WHITE_POINT_D65:
+      *internal = WhitePoint::kD65;
+      return true;
+    case JXL_WHITE_POINT_CUSTOM:
+      *internal = WhitePoint::kCustom;
+      return true;
+    case JXL_WHITE_POINT_E:
+      *internal = WhitePoint::kE;
+      return true;
+    case JXL_WHITE_POINT_DCI:
+      *internal = WhitePoint::kDCI;
+      return true;
+  }
+  return JXL_FAILURE("Invalid WhitePoint enum value %d",
+                     static_cast<int>(external));
+}
+
+Status ConvertExternalToInternalPrimaries(const JxlPrimaries external,
+                                          Primaries* internal) {
+  switch (external) {
+    case JXL_PRIMARIES_SRGB:
+      *internal = Primaries::kSRGB;
+      return true;
+    case JXL_PRIMARIES_CUSTOM:
+      *internal = Primaries::kCustom;
+      return true;
+    case JXL_PRIMARIES_2100:
+      *internal = Primaries::k2100;
+      return true;
+    case JXL_PRIMARIES_P3:
+      *internal = Primaries::kP3;
+      return true;
+  }
+  return JXL_FAILURE("Invalid Primaries enum value");
+}
+
+Status ConvertExternalToInternalTransferFunction(
+    const JxlTransferFunction external, TransferFunction* internal) {
+  switch (external) {
+    case JXL_TRANSFER_FUNCTION_709:
+      *internal = TransferFunction::k709;
+      return true;
+    case JXL_TRANSFER_FUNCTION_UNKNOWN:
+      *internal = TransferFunction::kUnknown;
+      return true;
+    case JXL_TRANSFER_FUNCTION_LINEAR:
+      *internal = TransferFunction::kLinear;
+      return true;
+    case JXL_TRANSFER_FUNCTION_SRGB:
+      *internal = TransferFunction::kSRGB;
+      return true;
+    case JXL_TRANSFER_FUNCTION_PQ:
+      *internal = TransferFunction::kPQ;
+      return true;
+    case JXL_TRANSFER_FUNCTION_DCI:
+      *internal = TransferFunction::kDCI;
+      return true;
+    case JXL_TRANSFER_FUNCTION_HLG:
+      *internal = TransferFunction::kHLG;
+      return true;
+    case JXL_TRANSFER_FUNCTION_GAMMA:
+      return JXL_FAILURE("Gamma should be handled separately");
+  }
+  return JXL_FAILURE("Invalid TransferFunction enum value");
+}
+
+Status ConvertExternalToInternalRenderingIntent(
+    const JxlRenderingIntent external, RenderingIntent* internal) {
+  switch (external) {
+    case JXL_RENDERING_INTENT_PERCEPTUAL:
+      *internal = RenderingIntent::kPerceptual;
+      return true;
+    case JXL_RENDERING_INTENT_RELATIVE:
+      *internal = RenderingIntent::kRelative;
+      return true;
+    case JXL_RENDERING_INTENT_SATURATION:
+      *internal = RenderingIntent::kSaturation;
+      return true;
+    case JXL_RENDERING_INTENT_ABSOLUTE:
+      *internal = RenderingIntent::kAbsolute;
+      return true;
+  }
+  return JXL_FAILURE("Invalid RenderingIntent enum value");
+}
+
+}  // namespace
+
+CIExy Customxy::Get() const {
+  CIExy xy;
+  xy.x = F64FromCustomxyI32(x);
+  xy.y = F64FromCustomxyI32(y);
+  return xy;
+}
+
+Status Customxy::Set(const CIExy& xy) {
+  JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.x, &x));
+  JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.y, &y));
+  size_t extension_bits, total_bits;
+  if (!Bundle::CanEncode(*this, &extension_bits, &total_bits)) {
+    return JXL_FAILURE("Unable to encode XY %f %f", xy.x, xy.y);
+  }
+  return true;
+}
+
+bool CustomTransferFunction::SetImplicit() {
+  if (nonserialized_color_space == ColorSpace::kXYB) {
+    if (!SetGamma(1.0 / 3)) JXL_ASSERT(false);
+    return true;
+  }
+  return false;
+}
+
+Status CustomTransferFunction::SetGamma(double gamma) {
+  if (gamma < (1.0f / kMaxGamma) || gamma > 1.0) {
+    return JXL_FAILURE("Invalid gamma %f", gamma);
+  }
+
+  have_gamma_ = false;
+  if (ApproxEq(gamma, 1.0)) {
+    transfer_function_ = TransferFunction::kLinear;
+    return true;
+  }
+  if (ApproxEq(gamma, 1.0 / 2.6)) {
+    transfer_function_ = TransferFunction::kDCI;
+    return true;
+  }
+  // Don't translate 0.45.. to kSRGB nor k709 - that might change pixel
+  // values because those curves also have a linear part.
+
+  have_gamma_ = true;
+  gamma_ = roundf(gamma * kGammaMul);
+  transfer_function_ = TransferFunction::kUnknown;
+  return true;
+}
+
+namespace {
+
+std::array<ColorEncoding, 2> CreateC2(const Primaries pr,
+                                      const TransferFunction tf) {
+  std::array<ColorEncoding, 2> c2;
+
+  {
+    ColorEncoding* c_rgb = c2.data() + 0;
+    c_rgb->SetColorSpace(ColorSpace::kRGB);
+    c_rgb->white_point = WhitePoint::kD65;
+    c_rgb->primaries = pr;
+    c_rgb->tf.SetTransferFunction(tf);
+    JXL_CHECK(c_rgb->CreateICC());
+  }
+
+  {
+    ColorEncoding* c_gray = c2.data() + 1;
+    c_gray->SetColorSpace(ColorSpace::kGray);
+    c_gray->white_point = WhitePoint::kD65;
+    c_gray->primaries = pr;
+    c_gray->tf.SetTransferFunction(tf);
+    JXL_CHECK(c_gray->CreateICC());
+  }
+
+  return c2;
+}
+
+}  // namespace
+
+const ColorEncoding& ColorEncoding::SRGB(bool is_gray) {
+  static std::array<ColorEncoding, 2> c2 =
+      CreateC2(Primaries::kSRGB, TransferFunction::kSRGB);
+  return c2[is_gray];
+}
+const ColorEncoding& ColorEncoding::LinearSRGB(bool is_gray) {
+  static std::array<ColorEncoding, 2> c2 =
+      CreateC2(Primaries::kSRGB, TransferFunction::kLinear);
+  return c2[is_gray];
+}
+
+CIExy ColorEncoding::GetWhitePoint() const {
+  JXL_DASSERT(have_fields_);
+  CIExy xy;
+  switch (white_point) {
+    case WhitePoint::kCustom:
+      return white_.Get();
+
+    case WhitePoint::kD65:
+      xy.x = 0.3127;
+      xy.y = 0.3290;
+      return xy;
+
+    case WhitePoint::kDCI:
+      // From https://ieeexplore.ieee.org/document/7290729 C.2 page 11
+      xy.x = 0.314;
+      xy.y = 0.351;
+      return xy;
+
+    case WhitePoint::kE:
+      xy.x = xy.y = 1.0 / 3;
+      return xy;
+  }
+  JXL_UNREACHABLE("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+Status ColorEncoding::SetWhitePoint(const CIExy& xy) {
+  JXL_DASSERT(have_fields_);
+  if (xy.x == 0.0 || xy.y == 0.0) {
+    return JXL_FAILURE("Invalid white point %f %f", xy.x, xy.y);
+  }
+  if (ApproxEq(xy.x, 0.3127) && ApproxEq(xy.y, 0.3290)) {
+    white_point = WhitePoint::kD65;
+    return true;
+  }
+  if (ApproxEq(xy.x, 1.0 / 3) && ApproxEq(xy.y, 1.0 / 3)) {
+    white_point = WhitePoint::kE;
+    return true;
+  }
+  if (ApproxEq(xy.x, 0.314) && ApproxEq(xy.y, 0.351)) {
+    white_point = WhitePoint::kDCI;
+    return true;
+  }
+  white_point = WhitePoint::kCustom;
+  return white_.Set(xy);
+}
+
+PrimariesCIExy ColorEncoding::GetPrimaries() const {
+  JXL_DASSERT(have_fields_);
+  JXL_ASSERT(HasPrimaries());
+  PrimariesCIExy xy;
+  switch (primaries) {
+    case Primaries::kCustom:
+      xy.r = red_.Get();
+      xy.g = green_.Get();
+      xy.b = blue_.Get();
+      return xy;
+
+    case Primaries::kSRGB:
+      xy.r.x = 0.639998686;
+      xy.r.y = 0.330010138;
+      xy.g.x = 0.300003784;
+      xy.g.y = 0.600003357;
+      xy.b.x = 0.150002046;
+      xy.b.y = 0.059997204;
+      return xy;
+
+    case Primaries::k2100:
+      xy.r.x = 0.708;
+      xy.r.y = 0.292;
+      xy.g.x = 0.170;
+      xy.g.y = 0.797;
+      xy.b.x = 0.131;
+      xy.b.y = 0.046;
+      return xy;
+
+    case Primaries::kP3:
+      xy.r.x = 0.680;
+      xy.r.y = 0.320;
+      xy.g.x = 0.265;
+      xy.g.y = 0.690;
+      xy.b.x = 0.150;
+      xy.b.y = 0.060;
+      return xy;
+  }
+  JXL_UNREACHABLE("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+Status ColorEncoding::SetPrimaries(const PrimariesCIExy& xy) {
+  JXL_DASSERT(have_fields_);
+  JXL_ASSERT(HasPrimaries());
+  if (xy.r.x == 0.0 || xy.r.y == 0.0 || xy.g.x == 0.0 || xy.g.y == 0.0 ||
+      xy.b.x == 0.0 || xy.b.y == 0.0) {
+    return JXL_FAILURE("Invalid primaries %f %f %f %f %f %f", xy.r.x, xy.r.y,
+                       xy.g.x, xy.g.y, xy.b.x, xy.b.y);
+  }
+
+  if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) &&
+      ApproxEq(xy.g.x, 0.30) && ApproxEq(xy.g.y, 0.60) &&
+      ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) {
+    primaries = Primaries::kSRGB;
+    return true;
+  }
+
+  if (ApproxEq(xy.r.x, 0.708) && ApproxEq(xy.r.y, 0.292) &&
+      ApproxEq(xy.g.x, 0.170) && ApproxEq(xy.g.y, 0.797) &&
+      ApproxEq(xy.b.x, 0.131) && ApproxEq(xy.b.y, 0.046)) {
+    primaries = Primaries::k2100;
+    return true;
+  }
+  if (ApproxEq(xy.r.x, 0.680) && ApproxEq(xy.r.y, 0.320) &&
+      ApproxEq(xy.g.x, 0.265) && ApproxEq(xy.g.y, 0.690) &&
+      ApproxEq(xy.b.x, 0.150) && ApproxEq(xy.b.y, 0.060)) {
+    primaries = Primaries::kP3;
+    return true;
+  }
+
+  primaries = Primaries::kCustom;
+  JXL_RETURN_IF_ERROR(red_.Set(xy.r));
+  JXL_RETURN_IF_ERROR(green_.Set(xy.g));
+  JXL_RETURN_IF_ERROR(blue_.Set(xy.b));
+  return true;
+}
+
+Status ColorEncoding::CreateICC() {
+  InternalRemoveICC();
+  return MaybeCreateProfile(*this, &icc_);
+}
+
+Status ColorEncoding::SetFieldsFromICC(const JxlCmsInterface& cms) {
+  // In case parsing fails, mark the ColorEncoding as invalid.
+  SetColorSpace(ColorSpace::kUnknown);
+  tf.SetTransferFunction(TransferFunction::kUnknown);
+
+  if (icc_.empty()) return JXL_FAILURE("Empty ICC profile");
+
+  JxlColorEncoding external;
+  JXL_BOOL cmyk;
+  JXL_RETURN_IF_ERROR(cms.set_fields_from_icc(cms.set_fields_data, icc_.data(),
+                                              icc_.size(), &external, &cmyk));
+  if (cmyk) {
+    cmyk_ = true;
+    return true;
+  }
+  PaddedBytes icc = std::move(icc_);
+  JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(external, this));
+  icc_ = std::move(icc);
+  return true;
+}
+
+void ColorEncoding::DecideIfWantICC(const JxlCmsInterface& cms) {
+  if (icc_.empty()) return;
+
+  JxlColorEncoding c;
+  JXL_BOOL cmyk;
+  if (!cms.set_fields_from_icc(cms.set_fields_data, icc_.data(), icc_.size(),
+                               &c, &cmyk)) {
+    return;
+  }
+  if (cmyk) return;
+
+  PaddedBytes new_icc;
+  if (!MaybeCreateProfile(*this, &new_icc)) return;
+
+  want_icc_ = false;
+}
+
+std::string Description(const ColorEncoding& c_in) {
+  // Copy required for Implicit*
+  ColorEncoding c = c_in;
+
+  std::string d = ToString(c.GetColorSpace());
+
+  if (!c.ImplicitWhitePoint()) {
+    d += '_';
+    if (c.white_point == WhitePoint::kCustom) {
+      const CIExy wp = c.GetWhitePoint();
+      d += ToString(wp.x) + ';';
+      d += ToString(wp.y);
+    } else {
+      d += ToString(c.white_point);
+    }
+  }
+
+  if (c.HasPrimaries()) {
+    d += '_';
+    if (c.primaries == Primaries::kCustom) {
+      const PrimariesCIExy pr = c.GetPrimaries();
+      d += ToString(pr.r.x) + ';';
+      d += ToString(pr.r.y) + ';';
+      d += ToString(pr.g.x) + ';';
+      d += ToString(pr.g.y) + ';';
+      d += ToString(pr.b.x) + ';';
+      d += ToString(pr.b.y);
+    } else {
+      d += ToString(c.primaries);
+    }
+  }
+
+  d += '_';
+  d += ToString(c.rendering_intent);
+
+  if (!c.tf.SetImplicit()) {
+    d += '_';
+    if (c.tf.IsGamma()) {
+      d += 'g';
+      d += ToString(c.tf.GetGamma());
+    } else {
+      d += ToString(c.tf.GetTransferFunction());
+    }
+  }
+
+  return d;
+}
+
+Customxy::Customxy() { Bundle::Init(this); }
+Status Customxy::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  uint32_t ux = PackSigned(x);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+                                         BitsOffset(20, 1048576),
+                                         BitsOffset(21, 2097152), 0, &ux));
+  x = UnpackSigned(ux);
+  uint32_t uy = PackSigned(y);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+                                         BitsOffset(20, 1048576),
+                                         BitsOffset(21, 2097152), 0, &uy));
+  y = UnpackSigned(uy);
+  return true;
+}
+
+CustomTransferFunction::CustomTransferFunction() { Bundle::Init(this); }
+Status CustomTransferFunction::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->Conditional(!SetImplicit())) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_gamma_));
+
+    if (visitor->Conditional(have_gamma_)) {
+      // Gamma is represented as a 24-bit int, the exponent used is
+      // gamma_ / 1e7. Valid values are (0, 1]. On the low end side, we also
+      // limit it to kMaxGamma/1e7.
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(24, kGammaMul, &gamma_));
+      if (gamma_ > kGammaMul ||
+          static_cast<uint64_t>(gamma_) * kMaxGamma < kGammaMul) {
+        return JXL_FAILURE("Invalid gamma %u", gamma_);
+      }
+    }
+
+    if (visitor->Conditional(!have_gamma_)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Enum(TransferFunction::kSRGB, &transfer_function_));
+    }
+  }
+
+  return true;
+}
+
+ColorEncoding::ColorEncoding() { Bundle::Init(this); }
+Status ColorEncoding::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &want_icc_));
+
+  // Always send even if want_icc_ because this affects decoding.
+  // We can skip the white point/primaries because they do not.
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ColorSpace::kRGB, &color_space_));
+
+  if (visitor->Conditional(!WantICC())) {
+    // Serialize enums. NOTE: we set the defaults to the most common values so
+    // ImageMetadata.all_default is true in the common case.
+
+    if (visitor->Conditional(!ImplicitWhitePoint())) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(WhitePoint::kD65, &white_point));
+      if (visitor->Conditional(white_point == WhitePoint::kCustom)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&white_));
+      }
+    }
+
+    if (visitor->Conditional(HasPrimaries())) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(Primaries::kSRGB, &primaries));
+      if (visitor->Conditional(primaries == Primaries::kCustom)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&red_));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&green_));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blue_));
+      }
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tf));
+
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->Enum(RenderingIntent::kRelative, &rendering_intent));
+
+    // We didn't have ICC, so all fields should be known.
+    if (color_space_ == ColorSpace::kUnknown || tf.IsUnknown()) {
+      return JXL_FAILURE(
+          "No ICC but cs %u and tf %u%s",
+          static_cast<unsigned int>(color_space_),
+          tf.IsGamma() ? 0
+                       : static_cast<unsigned int>(tf.GetTransferFunction()),
+          tf.IsGamma() ? "(gamma)" : "");
+    }
+
+    JXL_RETURN_IF_ERROR(CreateICC());
+  }
+
+  if (WantICC() && visitor->IsReading()) {
+    // Haven't called SetICC() yet, do nothing.
+  } else {
+    if (ICC().empty()) return JXL_FAILURE("Empty ICC");
+  }
+
+  return true;
+}
+
+void ConvertInternalToExternalColorEncoding(const ColorEncoding& internal,
+                                            JxlColorEncoding* external) {
+  external->color_space = static_cast<JxlColorSpace>(internal.GetColorSpace());
+
+  external->white_point = static_cast<JxlWhitePoint>(internal.white_point);
+
+  jxl::CIExy whitepoint = internal.GetWhitePoint();
+  external->white_point_xy[0] = whitepoint.x;
+  external->white_point_xy[1] = whitepoint.y;
+
+  if (external->color_space == JXL_COLOR_SPACE_RGB ||
+      external->color_space == JXL_COLOR_SPACE_UNKNOWN) {
+    external->primaries = static_cast<JxlPrimaries>(internal.primaries);
+    jxl::PrimariesCIExy primaries = internal.GetPrimaries();
+    external->primaries_red_xy[0] = primaries.r.x;
+    external->primaries_red_xy[1] = primaries.r.y;
+    external->primaries_green_xy[0] = primaries.g.x;
+    external->primaries_green_xy[1] = primaries.g.y;
+    external->primaries_blue_xy[0] = primaries.b.x;
+    external->primaries_blue_xy[1] = primaries.b.y;
+  }
+
+  if (internal.tf.IsGamma()) {
+    external->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    external->gamma = internal.tf.GetGamma();
+  } else {
+    external->transfer_function =
+        static_cast<JxlTransferFunction>(internal.tf.GetTransferFunction());
+    external->gamma = 0;
+  }
+
+  external->rendering_intent =
+      static_cast<JxlRenderingIntent>(internal.rendering_intent);
+}
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+                                              ColorEncoding* internal) {
+  internal->SetColorSpace(static_cast<ColorSpace>(external.color_space));
+
+  JXL_RETURN_IF_ERROR(ConvertExternalToInternalWhitePoint(
+      external.white_point, &internal->white_point));
+  if (external.white_point == JXL_WHITE_POINT_CUSTOM) {
+    CIExy wp;
+    wp.x = external.white_point_xy[0];
+    wp.y = external.white_point_xy[1];
+    JXL_RETURN_IF_ERROR(internal->SetWhitePoint(wp));
+  }
+
+  if (external.color_space == JXL_COLOR_SPACE_RGB ||
+      external.color_space == JXL_COLOR_SPACE_UNKNOWN) {
+    JXL_RETURN_IF_ERROR(ConvertExternalToInternalPrimaries(
+        external.primaries, &internal->primaries));
+    if (external.primaries == JXL_PRIMARIES_CUSTOM) {
+      PrimariesCIExy primaries;
+      primaries.r.x = external.primaries_red_xy[0];
+      primaries.r.y = external.primaries_red_xy[1];
+      primaries.g.x = external.primaries_green_xy[0];
+      primaries.g.y = external.primaries_green_xy[1];
+      primaries.b.x = external.primaries_blue_xy[0];
+      primaries.b.y = external.primaries_blue_xy[1];
+      JXL_RETURN_IF_ERROR(internal->SetPrimaries(primaries));
+    }
+  }
+  CustomTransferFunction tf;
+  tf.nonserialized_color_space = internal->GetColorSpace();
+  if (external.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+    JXL_RETURN_IF_ERROR(tf.SetGamma(external.gamma));
+  } else {
+    TransferFunction tf_enum;
+    // JXL_TRANSFER_FUNCTION_GAMMA is not handled by this function since there's
+    // no internal enum value for it.
+    JXL_RETURN_IF_ERROR(ConvertExternalToInternalTransferFunction(
+        external.transfer_function, &tf_enum));
+    tf.SetTransferFunction(tf_enum);
+  }
+  internal->tf = tf;
+
+  JXL_RETURN_IF_ERROR(ConvertExternalToInternalRenderingIntent(
+      external.rendering_intent, &internal->rendering_intent));
+
+  // The ColorEncoding caches an ICC profile it created earlier that may no
+  // longer match the profile with the changed fields, so re-create it.
+  if (!(internal->CreateICC())) {
+    // This is not an error: for example, it doesn't have ICC profile creation
+    // implemented for XYB. This should not be returned as error, since
+    // ConvertExternalToInternalColorEncoding still worked correctly, and what
+    // matters is that internal->ICC() will not return the wrong profile.
+  }
+
+  return true;
+}
+
+/* Chromatic adaptation matrices*/
+static const float kBradford[9] = {
+    0.8951f, 0.2664f, -0.1614f, -0.7502f, 1.7135f,
+    0.0367f, 0.0389f, -0.0685f, 1.0296f,
+};
+
+static const float kBradfordInv[9] = {
+    0.9869929f, -0.1470543f, 0.1599627f, 0.4323053f, 0.5183603f,
+    0.0492912f, -0.0085287f, 0.0400428f, 0.9684867f,
+};
+
+// Adapts whitepoint x, y to D50
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]) {
+  if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+    // Out of range values can cause division through zero
+    // further down with the bradford adaptation too.
+    return JXL_FAILURE("Invalid white point");
+  }
+  float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+  // 1 / tiny float can still overflow
+  JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+  float w50[3] = {0.96422f, 1.0f, 0.82521f};
+
+  float lms[3];
+  float lms50[3];
+
+  Mul3x3Vector(kBradford, w, lms);
+  Mul3x3Vector(kBradford, w50, lms50);
+
+  if (lms[0] == 0 || lms[1] == 0 || lms[2] == 0) {
+    return JXL_FAILURE("Invalid white point");
+  }
+  float a[9] = {
+      //       /----> 0, 1, 2, 3,          /----> 4, 5, 6, 7,          /----> 8,
+      lms50[0] / lms[0], 0, 0, 0, lms50[1] / lms[1], 0, 0, 0, lms50[2] / lms[2],
+  };
+  if (!std::isfinite(a[0]) || !std::isfinite(a[4]) || !std::isfinite(a[8])) {
+    return JXL_FAILURE("Invalid white point");
+  }
+
+  float b[9];
+  Mul3x3Matrix(a, kBradford, b);
+  Mul3x3Matrix(kBradfordInv, b, matrix);
+
+  return true;
+}
+
+Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx,
+                      float by, float wx, float wy, float matrix[9]) {
+  if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+    return JXL_FAILURE("Invalid white point");
+  }
+  // TODO(lode): also require rx, ry, gx, gy, bx, to be in range 0-1? ICC
+  // profiles in theory forbid negative XYZ values, but in practice the ACES P0
+  // color space uses a negative y for the blue primary.
+  float primaries[9] = {
+      rx, gx, bx, ry, gy, by, 1.0f - rx - ry, 1.0f - gx - gy, 1.0f - bx - by};
+  float primaries_inv[9];
+  memcpy(primaries_inv, primaries, sizeof(float) * 9);
+  JXL_RETURN_IF_ERROR(Inv3x3Matrix(primaries_inv));
+
+  float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+  // 1 / tiny float can still overflow
+  JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+  float xyz[3];
+  Mul3x3Vector(primaries_inv, w, xyz);
+
+  float a[9] = {
+      xyz[0], 0, 0, 0, xyz[1], 0, 0, 0, xyz[2],
+  };
+
+  Mul3x3Matrix(primaries, a, matrix);
+  return true;
+}
+
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+                         float by, float wx, float wy, float matrix[9]) {
+  float toXYZ[9];
+  JXL_RETURN_IF_ERROR(PrimariesToXYZ(rx, ry, gx, gy, bx, by, wx, wy, toXYZ));
+  float d50[9];
+  JXL_RETURN_IF_ERROR(AdaptToXYZD50(wx, wy, d50));
+
+  Mul3x3Matrix(d50, toXYZ, matrix);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h
new file mode 100644
index 0000000000..f2f0b4675e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h
@@ -0,0 +1,464 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+#define LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+
+// Metadata for color space conversions.
+
+#include <jxl/cms_interface.h>
+#include <jxl/color_encoding.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cmath>  // std::abs
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (All CIE units are for the standard 1931 2 degree observer)
+
+// Color space the color pixel data is encoded in. The color pixel data is
+// 3-channel in all cases except in case of kGray, where it uses only 1 channel.
+// This also determines the amount of channels used in modular encoding.
+enum class ColorSpace : uint32_t {
+  // Trichromatic color data. This also includes CMYK if a kBlack
+  // ExtraChannelInfo is present. This implies, if there is an ICC profile, that
+  // the ICC profile uses a 3-channel color space if no kBlack extra channel is
+  // present, or uses color space 'CMYK' if a kBlack extra channel is present.
+  kRGB,
+  // Single-channel data. This implies, if there is an ICC profile, that the ICC
+  // profile also represents single-channel data and has the appropriate color
+  // space ('GRAY').
+  kGray,
+  // Like kRGB, but implies fixed values for primaries etc.
+  kXYB,
+  // For non-RGB/gray data, e.g. from non-electro-optical sensors. Otherwise
+  // the same conditions as kRGB apply.
+  kUnknown
+};
+
+static inline const char* EnumName(ColorSpace /*unused*/) {
+  return "ColorSpace";
+}
+static inline constexpr uint64_t EnumBits(ColorSpace /*unused*/) {
+  using CS = ColorSpace;
+  return MakeBit(CS::kRGB) | MakeBit(CS::kGray) | MakeBit(CS::kXYB) |
+         MakeBit(CS::kUnknown);
+}
+
+// Values from CICP ColourPrimaries.
+enum class WhitePoint : uint32_t {
+  kD65 = 1,     // sRGB/BT.709/Display P3/BT.2020
+  kCustom = 2,  // Actual values encoded in separate fields
+  kE = 10,      // XYZ
+  kDCI = 11,    // DCI-P3
+};
+
+static inline const char* EnumName(WhitePoint /*unused*/) {
+  return "WhitePoint";
+}
+static inline constexpr uint64_t EnumBits(WhitePoint /*unused*/) {
+  return MakeBit(WhitePoint::kD65) | MakeBit(WhitePoint::kCustom) |
+         MakeBit(WhitePoint::kE) | MakeBit(WhitePoint::kDCI);
+}
+
+// Values from CICP ColourPrimaries
+enum class Primaries : uint32_t {
+  kSRGB = 1,    // Same as BT.709
+  kCustom = 2,  // Actual values encoded in separate fields
+  k2100 = 9,    // Same as BT.2020
+  kP3 = 11,
+};
+
+static inline const char* EnumName(Primaries /*unused*/) { return "Primaries"; }
+static inline constexpr uint64_t EnumBits(Primaries /*unused*/) {
+  using Pr = Primaries;
+  return MakeBit(Pr::kSRGB) | MakeBit(Pr::kCustom) | MakeBit(Pr::k2100) |
+         MakeBit(Pr::kP3);
+}
+
+// Values from CICP TransferCharacteristics
+enum class TransferFunction : uint32_t {
+  k709 = 1,
+  kUnknown = 2,
+  kLinear = 8,
+  kSRGB = 13,
+  kPQ = 16,   // from BT.2100
+  kDCI = 17,  // from SMPTE RP 431-2 reference projector
+  kHLG = 18,  // from BT.2100
+};
+
+static inline const char* EnumName(TransferFunction /*unused*/) {
+  return "TransferFunction";
+}
+static inline constexpr uint64_t EnumBits(TransferFunction /*unused*/) {
+  using TF = TransferFunction;
+  return MakeBit(TF::k709) | MakeBit(TF::kLinear) | MakeBit(TF::kSRGB) |
+         MakeBit(TF::kPQ) | MakeBit(TF::kDCI) | MakeBit(TF::kHLG) |
+         MakeBit(TF::kUnknown);
+}
+
+enum class RenderingIntent : uint32_t {
+  // Values match ICC sRGB encodings.
+  kPerceptual = 0,  // good for photos, requires a profile with LUT.
+  kRelative,        // good for logos.
+  kSaturation,      // perhaps useful for CG with fully saturated colors.
+  kAbsolute,        // leaves white point unchanged; good for proofing.
+};
+
+static inline const char* EnumName(RenderingIntent /*unused*/) {
+  return "RenderingIntent";
+}
+static inline constexpr uint64_t EnumBits(RenderingIntent /*unused*/) {
+  using RI = RenderingIntent;
+  return MakeBit(RI::kPerceptual) | MakeBit(RI::kRelative) |
+         MakeBit(RI::kSaturation) | MakeBit(RI::kAbsolute);
+}
+
+// Chromaticity (Y is omitted because it is 1 for primaries/white points)
+struct CIExy {
+  double x = 0.0;
+  double y = 0.0;
+};
+
+struct PrimariesCIExy {
+  CIExy r;
+  CIExy g;
+  CIExy b;
+};
+
+// Serializable form of CIExy.
+struct Customxy : public Fields {
+  Customxy();
+  JXL_FIELDS_NAME(Customxy)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  CIExy Get() const;
+  // Returns false if x or y do not fit in the encoding.
+  Status Set(const CIExy& xy);
+
+  int32_t x;
+  int32_t y;
+};
+
+struct CustomTransferFunction : public Fields {
+  CustomTransferFunction();
+  JXL_FIELDS_NAME(CustomTransferFunction)
+
+  // Sets fields and returns true if nonserialized_color_space has an implicit
+  // transfer function, otherwise leaves fields unchanged and returns false.
+  bool SetImplicit();
+
+  // Gamma: only used for PNG inputs
+  bool IsGamma() const { return have_gamma_; }
+  double GetGamma() const {
+    JXL_ASSERT(IsGamma());
+    return gamma_ * 1E-7;  // (0, 1)
+  }
+  Status SetGamma(double gamma);
+
+  TransferFunction GetTransferFunction() const {
+    JXL_ASSERT(!IsGamma());
+    return transfer_function_;
+  }
+  void SetTransferFunction(const TransferFunction tf) {
+    have_gamma_ = false;
+    transfer_function_ = tf;
+  }
+
+  bool IsUnknown() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kUnknown);
+  }
+  bool IsSRGB() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kSRGB);
+  }
+  bool IsLinear() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kLinear);
+  }
+  bool IsPQ() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kPQ);
+  }
+  bool IsHLG() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kHLG);
+  }
+  bool Is709() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::k709);
+  }
+  bool IsDCI() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kDCI);
+  }
+  bool IsSame(const CustomTransferFunction& other) const {
+    if (have_gamma_ != other.have_gamma_) return false;
+    if (have_gamma_) {
+      if (gamma_ != other.gamma_) return false;
+    } else {
+      if (transfer_function_ != other.transfer_function_) return false;
+    }
+    return true;
+  }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Must be set before calling VisitFields!
+  ColorSpace nonserialized_color_space = ColorSpace::kRGB;
+
+ private:
+  static constexpr uint32_t kGammaMul = 10000000;
+
+  bool have_gamma_;
+
+  // OETF exponent to go from linear to gamma-compressed.
+  uint32_t gamma_;  // Only used if have_gamma_.
+
+  // Can be kUnknown.
+  TransferFunction transfer_function_;  // Only used if !have_gamma_.
+};
+
+// Compact encoding of data required to interpret and translate pixels to a
+// known color space. Stored in Metadata. Thread-compatible.
+struct ColorEncoding : public Fields {
+  ColorEncoding();
+  JXL_FIELDS_NAME(ColorEncoding)
+
+  // Returns ready-to-use color encodings (initialized on-demand).
+  static const ColorEncoding& SRGB(bool is_gray = false);
+  static const ColorEncoding& LinearSRGB(bool is_gray = false);
+
+  // Returns true if an ICC profile was successfully created from fields.
+  // Must be called after modifying fields. Defined in color_management.cc.
+  Status CreateICC();
+
+  // Returns non-empty and valid ICC profile, unless:
+  // - between calling InternalRemoveICC() and CreateICC() in tests;
+  // - WantICC() == true and SetICC() was not yet called;
+  // - after a failed call to SetSRGB(), SetICC(), or CreateICC().
+  const PaddedBytes& ICC() const { return icc_; }
+
+  // Internal only, do not call except from tests.
+  void InternalRemoveICC() { icc_.clear(); }
+
+  // Returns true if `icc` is assigned and decoded successfully. If so,
+  // subsequent WantICC() will return true until DecideIfWantICC() changes it.
+  // Returning false indicates data has been lost.
+  Status SetICC(PaddedBytes&& icc, const JxlCmsInterface* cms) {
+    if (icc.empty()) return false;
+    icc_ = std::move(icc);
+
+    if (cms == nullptr) {
+      want_icc_ = true;
+      have_fields_ = false;
+      return true;
+    }
+
+    if (!SetFieldsFromICC(*cms)) {
+      InternalRemoveICC();
+      return false;
+    }
+
+    want_icc_ = true;
+    return true;
+  }
+
+  // Sets the raw ICC profile bytes, without parsing the ICC, and without
+  // updating the direct fields such as whitepoint, primaries and color
+  // space. Functions to get and set fields, such as SetWhitePoint, cannot be
+  // used anymore after this and functions such as IsSRGB return false no matter
+  // what the contents of the icc profile.
+  Status SetICCRaw(PaddedBytes&& icc) {
+    if (icc.empty()) return false;
+    icc_ = std::move(icc);
+
+    want_icc_ = true;
+    have_fields_ = false;
+    return true;
+  }
+
+  // Returns whether to send the ICC profile in the codestream.
+  bool WantICC() const { return want_icc_; }
+
+  // Return whether the direct fields are set, if false but ICC is set, only
+  // raw ICC bytes are known.
+  bool HaveFields() const { return have_fields_; }
+
+  // Causes WantICC() to return false if ICC() can be reconstructed from fields.
+  void DecideIfWantICC(const JxlCmsInterface& cms);
+
+  bool IsGray() const { return color_space_ == ColorSpace::kGray; }
+  bool IsCMYK() const { return cmyk_; }
+  size_t Channels() const { return IsGray() ? 1 : 3; }
+
+  // Returns false if the field is invalid and unusable.
+  bool HasPrimaries() const {
+    return !IsGray() && color_space_ != ColorSpace::kXYB;
+  }
+
+  // Returns true after setting the field to a value defined by color_space,
+  // otherwise false and leaves the field unchanged.
+  bool ImplicitWhitePoint() {
+    if (color_space_ == ColorSpace::kXYB) {
+      white_point = WhitePoint::kD65;
+      return true;
+    }
+    return false;
+  }
+
+  // Returns whether the color space is known to be sRGB. If a raw unparsed ICC
+  // profile is set without the fields being set, this returns false, even if
+  // the content of the ICC profile would match sRGB.
+  bool IsSRGB() const {
+    if (!have_fields_) return false;
+    if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+    if (white_point != WhitePoint::kD65) return false;
+    if (primaries != Primaries::kSRGB) return false;
+    if (!tf.IsSRGB()) return false;
+    return true;
+  }
+
+  // Returns whether the color space is known to be linear sRGB. If a raw
+  // unparsed ICC profile is set without the fields being set, this returns
+  // false, even if the content of the ICC profile would match linear sRGB.
+  bool IsLinearSRGB() const {
+    if (!have_fields_) return false;
+    if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+    if (white_point != WhitePoint::kD65) return false;
+    if (primaries != Primaries::kSRGB) return false;
+    if (!tf.IsLinear()) return false;
+    return true;
+  }
+
+  Status SetSRGB(const ColorSpace cs,
+                 const RenderingIntent ri = RenderingIntent::kRelative) {
+    InternalRemoveICC();
+    JXL_ASSERT(cs == ColorSpace::kGray || cs == ColorSpace::kRGB);
+    color_space_ = cs;
+    white_point = WhitePoint::kD65;
+    primaries = Primaries::kSRGB;
+    tf.SetTransferFunction(TransferFunction::kSRGB);
+    rendering_intent = ri;
+    return CreateICC();
+  }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Accessors ensure tf.nonserialized_color_space is updated at the same time.
+  ColorSpace GetColorSpace() const { return color_space_; }
+  void SetColorSpace(const ColorSpace cs) {
+    color_space_ = cs;
+    tf.nonserialized_color_space = cs;
+  }
+
+  CIExy GetWhitePoint() const;
+  Status SetWhitePoint(const CIExy& xy);
+
+  PrimariesCIExy GetPrimaries() const;
+  Status SetPrimaries(const PrimariesCIExy& xy);
+
+  // Checks if the color spaces (including white point / primaries) are the
+  // same, but ignores the transfer function, rendering intent and ICC bytes.
+  bool SameColorSpace(const ColorEncoding& other) const {
+    if (color_space_ != other.color_space_) return false;
+
+    if (white_point != other.white_point) return false;
+    if (white_point == WhitePoint::kCustom) {
+      if (white_.x != other.white_.x || white_.y != other.white_.y)
+        return false;
+    }
+
+    if (HasPrimaries() != other.HasPrimaries()) return false;
+    if (HasPrimaries()) {
+      if (primaries != other.primaries) return false;
+      if (primaries == Primaries::kCustom) {
+        if (red_.x != other.red_.x || red_.y != other.red_.y) return false;
+        if (green_.x != other.green_.x || green_.y != other.green_.y)
+          return false;
+        if (blue_.x != other.blue_.x || blue_.y != other.blue_.y) return false;
+      }
+    }
+    return true;
+  }
+
+  // Checks if the color space and transfer function are the same, ignoring
+  // rendering intent and ICC bytes
+  bool SameColorEncoding(const ColorEncoding& other) const {
+    return SameColorSpace(other) && tf.IsSame(other.tf);
+  }
+
+  mutable bool all_default;
+
+  // Only valid if HaveFields()
+  WhitePoint white_point;
+  Primaries primaries;  // Only valid if HasPrimaries()
+  CustomTransferFunction tf;
+  RenderingIntent rendering_intent;
+
+ private:
+  // Returns true if all fields have been initialized (possibly to kUnknown).
+  // Returns false if the ICC profile is invalid or decoding it fails.
+  Status SetFieldsFromICC(const JxlCmsInterface& cms);
+
+  // If true, the codestream contains an ICC profile and we do not serialize
+  // fields. Otherwise, fields are serialized and we create an ICC profile.
+  bool want_icc_;
+
+  // When false, fields such as white_point and tf are invalid and must not be
+  // used. This occurs after setting a raw bytes-only ICC profile, only the
+  // ICC bytes may be used. The color_space_ field is still valid.
+  bool have_fields_ = true;
+
+  PaddedBytes icc_;  // Valid ICC profile
+
+  ColorSpace color_space_;  // Can be kUnknown
+  bool cmyk_ = false;
+
+  // Only used if white_point == kCustom.
+  Customxy white_;
+
+  // Only used if primaries == kCustom.
+  Customxy red_;
+  Customxy green_;
+  Customxy blue_;
+};
+
+// Returns whether the two inputs are approximately equal.
+static inline bool ApproxEq(const double a, const double b,
+                            double max_l1 = 1E-3) {
+  // Threshold should be sufficient for ICC's 15-bit fixed-point numbers.
+  // We have seen differences of 7.1E-5 with lcms2 and 1E-3 with skcms.
+  return std::abs(a - b) <= max_l1;
+}
+
+// Returns a representation of the ColorEncoding fields (not icc).
+// Example description: "RGB_D65_SRG_Rel_Lin"
+std::string Description(const ColorEncoding& c);
+static inline std::ostream& operator<<(std::ostream& os,
+                                       const ColorEncoding& c) {
+  return os << Description(c);
+}
+
+void ConvertInternalToExternalColorEncoding(const jxl::ColorEncoding& internal,
+                                            JxlColorEncoding* external);
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+                                              jxl::ColorEncoding* internal);
+
+Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx,
+                      float by, float wx, float wy, float matrix[9]);
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+                         float by, float wx, float wy, float matrix[9]);
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COLOR_ENCODING_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc
new file mode 100644
index 0000000000..6ad47e1923
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <stdio.h>
+
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ColorEncodingTest, RoundTripAll) {
+  for (const test::ColorEncodingDescriptor& cdesc : test::AllEncodings()) {
+    const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc);
+    // Verify Set(Get) yields the same white point/primaries/gamma.
+    {
+      ColorEncoding c;
+      EXPECT_TRUE(c.SetWhitePoint(c_original.GetWhitePoint()));
+      EXPECT_EQ(c_original.white_point, c.white_point);
+    }
+    {
+      ColorEncoding c;
+      EXPECT_TRUE(c.SetPrimaries(c_original.GetPrimaries()));
+      EXPECT_EQ(c_original.primaries, c.primaries);
+    }
+    if (c_original.tf.IsGamma()) {
+      ColorEncoding c;
+      EXPECT_TRUE(c.tf.SetGamma(c_original.tf.GetGamma()));
+      EXPECT_TRUE(c_original.tf.IsSame(c.tf));
+    }
+  }
+}
+
+TEST(ColorEncodingTest, CustomWhitePoint) {
+  ColorEncoding c;
+  // Nonsensical values
+  CIExy xy_in;
+  xy_in.x = 0.8;
+  xy_in.y = 0.01;
+  EXPECT_TRUE(c.SetWhitePoint(xy_in));
+  const CIExy xy = c.GetWhitePoint();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.SetWhitePoint(xy));
+  EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomPrimaries) {
+  ColorEncoding c;
+  PrimariesCIExy xy_in;
+  // Nonsensical values
+  xy_in.r.x = -0.01;
+  xy_in.r.y = 0.2;
+  xy_in.g.x = 0.4;
+  xy_in.g.y = 0.401;
+  xy_in.b.x = 1.1;
+  xy_in.b.y = -1.2;
+  EXPECT_TRUE(c.SetPrimaries(xy_in));
+  const PrimariesCIExy xy = c.GetPrimaries();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.SetPrimaries(xy));
+  EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomGamma) {
+  ColorEncoding c;
+#ifndef JXL_CRASH_ON_ERROR
+  EXPECT_FALSE(c.tf.SetGamma(0.0));
+  EXPECT_FALSE(c.tf.SetGamma(-1E-6));
+  EXPECT_FALSE(c.tf.SetGamma(1.001));
+#endif
+  EXPECT_TRUE(c.tf.SetGamma(1.0));
+  EXPECT_FALSE(c.tf.IsGamma());
+  EXPECT_TRUE(c.tf.IsLinear());
+
+  EXPECT_TRUE(c.tf.SetGamma(0.123));
+  EXPECT_TRUE(c.tf.IsGamma());
+  const double gamma = c.tf.GetGamma();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.tf.SetGamma(gamma));
+  EXPECT_TRUE(c.SameColorEncoding(c2));
+  EXPECT_TRUE(c2.tf.IsGamma());
+}
+
+TEST(ColorEncodingTest, InternalExternalConversion) {
+  ColorEncoding source_internal;
+  JxlColorEncoding external;
+  ColorEncoding destination_internal;
+
+  for (int i = 0; i < 100; i++) {
+    source_internal.SetColorSpace(static_cast<ColorSpace>(rand() % 4));
+    CIExy wp;
+    wp.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+    wp.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+    EXPECT_TRUE(source_internal.SetWhitePoint(wp));
+    if (source_internal.HasPrimaries()) {
+      PrimariesCIExy primaries;
+      primaries.r.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.r.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.g.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.g.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.b.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.b.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      EXPECT_TRUE(source_internal.SetPrimaries(primaries));
+    }
+    CustomTransferFunction tf;
+    EXPECT_TRUE(tf.SetGamma((float(rand()) / float((RAND_MAX)) * 0.5) + 0.25));
+    source_internal.tf = tf;
+    source_internal.rendering_intent = static_cast<RenderingIntent>(rand() % 4);
+
+    ConvertInternalToExternalColorEncoding(source_internal, &external);
+    EXPECT_TRUE(ConvertExternalToInternalColorEncoding(external,
+                                                       &destination_internal));
+
+    EXPECT_EQ(source_internal.GetColorSpace(),
+              destination_internal.GetColorSpace());
+    EXPECT_EQ(source_internal.white_point, destination_internal.white_point);
+    EXPECT_EQ(source_internal.GetWhitePoint().x,
+              destination_internal.GetWhitePoint().x);
+    EXPECT_EQ(source_internal.GetWhitePoint().y,
+              destination_internal.GetWhitePoint().y);
+    if (source_internal.HasPrimaries()) {
+      EXPECT_EQ(source_internal.GetPrimaries().r.x,
+                destination_internal.GetPrimaries().r.x);
+      EXPECT_EQ(source_internal.GetPrimaries().r.y,
+                destination_internal.GetPrimaries().r.y);
+      EXPECT_EQ(source_internal.GetPrimaries().g.x,
+                destination_internal.GetPrimaries().g.x);
+      EXPECT_EQ(source_internal.GetPrimaries().g.y,
+                destination_internal.GetPrimaries().g.y);
+      EXPECT_EQ(source_internal.GetPrimaries().b.x,
+                destination_internal.GetPrimaries().b.x);
+      EXPECT_EQ(source_internal.GetPrimaries().b.y,
+                destination_internal.GetPrimaries().b.y);
+    }
+    EXPECT_EQ(source_internal.tf.IsGamma(), destination_internal.tf.IsGamma());
+    if (source_internal.tf.IsGamma()) {
+      EXPECT_EQ(source_internal.tf.GetGamma(),
+                destination_internal.tf.GetGamma());
+    } else {
+      EXPECT_EQ(source_internal.tf.GetTransferFunction(),
+                destination_internal.tf.GetTransferFunction());
+    }
+    EXPECT_EQ(source_internal.rendering_intent,
+              destination_internal.rendering_intent);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management.cc b/third-party/libjxl/libjxl/lib/jxl/color_management.cc
new file mode 100644
index 0000000000..9715d7f149
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_management.cc
@@ -0,0 +1,877 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_management.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+#ifndef JXL_ENABLE_3D_ICC_TONEMAPPING
+#define JXL_ENABLE_3D_ICC_TONEMAPPING 1
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+Status ToneMapPixel(const ColorEncoding& c, const float in[3],
+                    uint8_t pcslab_out[3]) {
+  const PrimariesCIExy primaries = c.GetPrimaries();
+  const CIExy white_point = c.GetWhitePoint();
+  float primaries_XYZ[9];
+  JXL_RETURN_IF_ERROR(PrimariesToXYZ(
+      primaries.r.x, primaries.r.y, primaries.g.x, primaries.g.y, primaries.b.x,
+      primaries.b.y, white_point.x, white_point.y, primaries_XYZ));
+  const float luminances[3] = {primaries_XYZ[3], primaries_XYZ[4],
+                               primaries_XYZ[5]};
+  float linear[3];
+  HWY_CAPPED(float, 1) d;
+  if (c.tf.IsPQ()) {
+    for (size_t i = 0; i < 3; ++i) {
+      linear[i] = TF_PQ().DisplayFromEncoded(in[i]);
+    }
+  } else {
+    for (size_t i = 0; i < 3; ++i) {
+      linear[i] = TF_HLG().DisplayFromEncoded(in[i]);
+    }
+  }
+  auto r = LoadU(d, &linear[0]), g = LoadU(d, &linear[1]),
+       b = LoadU(d, &linear[2]);
+  if (c.tf.IsPQ()) {
+    Rec2408ToneMapper<decltype(d)> tone_mapper({0, 10000}, {0, 250},
+                                               luminances);
+    tone_mapper.ToneMap(&r, &g, &b);
+  } else {
+    HlgOOTF ootf(/*source_luminance=*/300, /*target_luminance=*/80, luminances);
+    ootf.Apply(&r, &g, &b);
+  }
+  GamutMap(&r, &g, &b, luminances, /*preserve_saturation=*/0.3f);
+  StoreU(r, d, &linear[0]);
+  StoreU(g, d, &linear[1]);
+  StoreU(b, d, &linear[2]);
+
+  float chad[9];
+  JXL_RETURN_IF_ERROR(AdaptToXYZD50(white_point.x, white_point.y, chad));
+  float to_xyzd50[9];
+  Mul3x3Matrix(chad, primaries_XYZ, to_xyzd50);
+
+  float xyz[3] = {0, 0, 0};
+  for (size_t xyz_c = 0; xyz_c < 3; ++xyz_c) {
+    for (size_t rgb_c = 0; rgb_c < 3; ++rgb_c) {
+      xyz[xyz_c] += linear[rgb_c] * to_xyzd50[3 * xyz_c + rgb_c];
+    }
+  }
+
+  const auto lab_f = [](const float x) {
+    static constexpr float kDelta = 6. / 29;
+    return x <= kDelta * kDelta * kDelta
+               ? x * (1 / (3 * kDelta * kDelta)) + 4.f / 29
+               : std::cbrt(x);
+  };
+  static constexpr float kXn = 0.964212;
+  static constexpr float kYn = 1;
+  static constexpr float kZn = 0.825188;
+
+  const float f_x = lab_f(xyz[0] / kXn);
+  const float f_y = lab_f(xyz[1] / kYn);
+  const float f_z = lab_f(xyz[2] / kZn);
+
+  pcslab_out[0] =
+      static_cast<uint8_t>(.5f + 255.f * Clamp1(1.16f * f_y - .16f, 0.f, 1.f));
+  pcslab_out[1] = static_cast<uint8_t>(
+      .5f + 128.f + Clamp1(500 * (f_x - f_y), -128.f, 127.f));
+  pcslab_out[2] = static_cast<uint8_t>(
+      .5f + 128.f + Clamp1(200 * (f_y - f_z), -128.f, 127.f));
+
+  return true;
+}
+
+std::vector<uint16_t> CreateTableCurve(uint32_t N, const ExtraTF tf,
+                                       bool tone_map) {
+  // The generated PQ curve will make room for highlights up to this luminance.
+  // TODO(sboukortt): make this variable?
+  static constexpr float kPQIntensityTarget = 10000;
+
+  JXL_ASSERT(N <= 4096);  // ICC MFT2 only allows 4K entries
+  JXL_ASSERT(tf == ExtraTF::kPQ || tf == ExtraTF::kHLG);
+
+  static constexpr float kLuminances[] = {1.f / 3, 1.f / 3, 1.f / 3};
+  using D = HWY_CAPPED(float, 1);
+  Rec2408ToneMapper<D> tone_mapper({0, kPQIntensityTarget},
+                                   {0, kDefaultIntensityTarget}, kLuminances);
+  // No point using float - LCMS converts to 16-bit for A2B/MFT.
+  std::vector<uint16_t> table(N);
+  for (uint32_t i = 0; i < N; ++i) {
+    const float x = static_cast<float>(i) / (N - 1);  // 1.0 at index N - 1.
+    const double dx = static_cast<double>(x);
+    // LCMS requires EOTF (e.g. 2.4 exponent).
+    double y = (tf == ExtraTF::kHLG) ? TF_HLG().DisplayFromEncoded(dx)
+                                     : TF_PQ().DisplayFromEncoded(dx);
+    if (tone_map && tf == ExtraTF::kPQ &&
+        kPQIntensityTarget > kDefaultIntensityTarget) {
+      D df;
+      auto r = Set(df, y * 10000 / kPQIntensityTarget), g = r, b = r;
+      tone_mapper.ToneMap(&r, &g, &b);
+      float fy;
+      StoreU(r, df, &fy);
+      y = fy;
+    }
+    JXL_ASSERT(y >= 0.0);
+    // Clamp to table range - necessary for HLG.
+    if (y > 1.0) y = 1.0;
+    // 1.0 corresponds to table value 0xFFFF.
+    table[i] = static_cast<uint16_t>(roundf(y * 65535.0));
+  }
+  return table;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+// Local functions.
+HWY_EXPORT(ToneMapPixel);
+HWY_EXPORT(CreateTableCurve);
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]) {
+  // Target Y = 1.
+  if (std::abs(xy.y) < 1e-12) return JXL_FAILURE("Y value is too small");
+  const float factor = 1 / xy.y;
+  XYZ[0] = xy.x * factor;
+  XYZ[1] = 1;
+  XYZ[2] = (1 - xy.x - xy.y) * factor;
+  return true;
+}
+
+namespace {
+
+constexpr bool kEnable3DToneMapping = JXL_ENABLE_3D_ICC_TONEMAPPING;
+
+bool CanToneMap(const ColorEncoding& encoding) {
+  // If the color space cannot be represented by a CICP tag in the ICC profile
+  // then the rest of the profile must unambiguously identify it; we have less
+  // freedom to do use it for tone mapping.
+  return encoding.GetColorSpace() == ColorSpace::kRGB &&
+         encoding.HasPrimaries() &&
+         (encoding.tf.IsPQ() || encoding.tf.IsHLG()) &&
+         ((encoding.primaries == Primaries::kP3 &&
+           (encoding.white_point == WhitePoint::kD65 ||
+            encoding.white_point == WhitePoint::kDCI)) ||
+          (encoding.primaries != Primaries::kCustom &&
+           encoding.white_point == WhitePoint::kD65));
+}
+
+void ICCComputeMD5(const PaddedBytes& data, uint8_t sum[16])
+    JXL_NO_SANITIZE("unsigned-integer-overflow") {
+  PaddedBytes data64 = data;
+  data64.push_back(128);
+  // Add bytes such that ((size + 8) & 63) == 0.
+  size_t extra = ((64 - ((data64.size() + 8) & 63)) & 63);
+  data64.resize(data64.size() + extra, 0);
+  for (uint64_t i = 0; i < 64; i += 8) {
+    data64.push_back(static_cast<uint64_t>(data.size() << 3u) >> i);
+  }
+
+  static const uint32_t sineparts[64] = {
+      0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a,
+      0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+      0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340,
+      0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+      0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
+      0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+      0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa,
+      0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+      0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92,
+      0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+      0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+  };
+  static const uint32_t shift[64] = {
+      7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+      5, 9,  14, 20, 5, 9,  14, 20, 5, 9,  14, 20, 5, 9,  14, 20,
+      4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+      6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21,
+  };
+
+  uint32_t a0 = 0x67452301, b0 = 0xefcdab89, c0 = 0x98badcfe, d0 = 0x10325476;
+
+  for (size_t i = 0; i < data64.size(); i += 64) {
+    uint32_t a = a0, b = b0, c = c0, d = d0, f, g;
+    for (size_t j = 0; j < 64; j++) {
+      if (j < 16) {
+        f = (b & c) | ((~b) & d);
+        g = j;
+      } else if (j < 32) {
+        f = (d & b) | ((~d) & c);
+        g = (5 * j + 1) & 0xf;
+      } else if (j < 48) {
+        f = b ^ c ^ d;
+        g = (3 * j + 5) & 0xf;
+      } else {
+        f = c ^ (b | (~d));
+        g = (7 * j) & 0xf;
+      }
+      uint32_t dg0 = data64[i + g * 4 + 0], dg1 = data64[i + g * 4 + 1],
+               dg2 = data64[i + g * 4 + 2], dg3 = data64[i + g * 4 + 3];
+      uint32_t u = dg0 | (dg1 << 8u) | (dg2 << 16u) | (dg3 << 24u);
+      f += a + sineparts[j] + u;
+      a = d;
+      d = c;
+      c = b;
+      b += (f << shift[j]) | (f >> (32u - shift[j]));
+    }
+    a0 += a;
+    b0 += b;
+    c0 += c;
+    d0 += d;
+  }
+  sum[0] = a0;
+  sum[1] = a0 >> 8u;
+  sum[2] = a0 >> 16u;
+  sum[3] = a0 >> 24u;
+  sum[4] = b0;
+  sum[5] = b0 >> 8u;
+  sum[6] = b0 >> 16u;
+  sum[7] = b0 >> 24u;
+  sum[8] = c0;
+  sum[9] = c0 >> 8u;
+  sum[10] = c0 >> 16u;
+  sum[11] = c0 >> 24u;
+  sum[12] = d0;
+  sum[13] = d0 >> 8u;
+  sum[14] = d0 >> 16u;
+  sum[15] = d0 >> 24u;
+}
+
+Status CreateICCChadMatrix(CIExy w, float result[9]) {
+  float m[9];
+  if (w.y == 0) {  // WhitePoint can not be pitch-black.
+    return JXL_FAILURE("Invalid WhitePoint");
+  }
+  JXL_RETURN_IF_ERROR(AdaptToXYZD50(w.x, w.y, m));
+  memcpy(result, m, sizeof(float) * 9);
+  return true;
+}
+
+// Creates RGB to XYZ matrix given RGB primaries and whitepoint in xy.
+Status CreateICCRGBMatrix(CIExy r, CIExy g, CIExy b, CIExy w, float result[9]) {
+  float m[9];
+  JXL_RETURN_IF_ERROR(
+      PrimariesToXYZD50(r.x, r.y, g.x, g.y, b.x, b.y, w.x, w.y, m));
+  memcpy(result, m, sizeof(float) * 9);
+  return true;
+}
+
+void WriteICCUint32(uint32_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 4) icc->resize(pos + 4);
+  (*icc)[pos + 0] = (value >> 24u) & 255;
+  (*icc)[pos + 1] = (value >> 16u) & 255;
+  (*icc)[pos + 2] = (value >> 8u) & 255;
+  (*icc)[pos + 3] = value & 255;
+}
+
+void WriteICCUint16(uint16_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 2) icc->resize(pos + 2);
+  (*icc)[pos + 0] = (value >> 8u) & 255;
+  (*icc)[pos + 1] = value & 255;
+}
+
+void WriteICCUint8(uint8_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 1) icc->resize(pos + 1);
+  (*icc)[pos] = value;
+}
+
+// Writes a 4-character tag
+void WriteICCTag(const char* value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 4) icc->resize(pos + 4);
+  memcpy(icc->data() + pos, value, 4);
+}
+
+Status WriteICCS15Fixed16(float value, size_t pos,
+                          PaddedBytes* JXL_RESTRICT icc) {
+  // "nextafterf" for 32768.0f towards zero are:
+  // 32767.998046875, 32767.99609375, 32767.994140625
+  // Even the first value works well,...
+  bool ok = (-32767.995f <= value) && (value <= 32767.995f);
+  if (!ok) return JXL_FAILURE("ICC value is out of range / NaN");
+  int32_t i = value * 65536.0f + 0.5f;
+  // Use two's complement
+  uint32_t u = static_cast<uint32_t>(i);
+  WriteICCUint32(u, pos, icc);
+  return true;
+}
+
+Status CreateICCHeader(const ColorEncoding& c,
+                       PaddedBytes* JXL_RESTRICT header) {
+  // TODO(lode): choose color management engine name, e.g. "skia" if
+  // integrated in skia.
+  static const char* kCmm = "jxl ";
+
+  header->resize(128, 0);
+
+  WriteICCUint32(0, 0, header);  // size, correct value filled in at end
+  WriteICCTag(kCmm, 4, header);
+  WriteICCUint32(0x04400000u, 8, header);
+  const char* profile_type =
+      c.GetColorSpace() == ColorSpace::kXYB ? "scnr" : "mntr";
+  WriteICCTag(profile_type, 12, header);
+  WriteICCTag(c.IsGray() ? "GRAY" : "RGB ", 16, header);
+  if (kEnable3DToneMapping && CanToneMap(c)) {
+    // We are going to use a 3D LUT for tone mapping, which will be more compact
+    // with an 8-bit LUT to CIELAB than with a 16-bit LUT to XYZ. 8-bit XYZ
+    // would not be viable due to XYZ being linear, whereas it is fine with
+    // CIELAB's ~cube root.
+    WriteICCTag("Lab ", 20, header);
+  } else {
+    WriteICCTag("XYZ ", 20, header);
+  }
+
+  // Three uint32_t's date/time encoding.
+  // TODO(lode): encode actual date and time, this is a placeholder
+  uint32_t year = 2019, month = 12, day = 1;
+  uint32_t hour = 0, minute = 0, second = 0;
+  WriteICCUint16(year, 24, header);
+  WriteICCUint16(month, 26, header);
+  WriteICCUint16(day, 28, header);
+  WriteICCUint16(hour, 30, header);
+  WriteICCUint16(minute, 32, header);
+  WriteICCUint16(second, 34, header);
+
+  WriteICCTag("acsp", 36, header);
+  WriteICCTag("APPL", 40, header);
+  WriteICCUint32(0, 44, header);  // flags
+  WriteICCUint32(0, 48, header);  // device manufacturer
+  WriteICCUint32(0, 52, header);  // device model
+  WriteICCUint32(0, 56, header);  // device attributes
+  WriteICCUint32(0, 60, header);  // device attributes
+  WriteICCUint32(static_cast<uint32_t>(c.rendering_intent), 64, header);
+
+  // Mandatory D50 white point of profile connection space
+  WriteICCUint32(0x0000f6d6, 68, header);
+  WriteICCUint32(0x00010000, 72, header);
+  WriteICCUint32(0x0000d32d, 76, header);
+
+  WriteICCTag(kCmm, 80, header);
+
+  return true;
+}
+
+void AddToICCTagTable(const char* tag, size_t offset, size_t size,
+                      PaddedBytes* JXL_RESTRICT tagtable,
+                      std::vector<size_t>* offsets) {
+  WriteICCTag(tag, tagtable->size(), tagtable);
+  // writing true offset deferred to later
+  WriteICCUint32(0, tagtable->size(), tagtable);
+  offsets->push_back(offset);
+  WriteICCUint32(size, tagtable->size(), tagtable);
+}
+
+void FinalizeICCTag(PaddedBytes* JXL_RESTRICT tags, size_t* offset,
+                    size_t* size) {
+  while ((tags->size() & 3) != 0) {
+    tags->push_back(0);
+  }
+  *offset += *size;
+  *size = tags->size() - *offset;
+}
+
+// The input text must be ASCII, writing other characters to UTF-16 is not
+// implemented.
+void CreateICCMlucTag(const std::string& text, PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("mluc", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  WriteICCUint32(1, tags->size(), tags);
+  WriteICCUint32(12, tags->size(), tags);
+  WriteICCTag("enUS", tags->size(), tags);
+  WriteICCUint32(text.size() * 2, tags->size(), tags);
+  WriteICCUint32(28, tags->size(), tags);
+  for (size_t i = 0; i < text.size(); i++) {
+    tags->push_back(0);  // prepend 0 for UTF-16
+    tags->push_back(text[i]);
+  }
+}
+
+Status CreateICCXYZTag(float xyz[3], PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("XYZ ", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  for (size_t i = 0; i < 3; ++i) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(xyz[i], tags->size(), tags));
+  }
+  return true;
+}
+
+Status CreateICCChadTag(float chad[9], PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("sf32", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  for (size_t i = 0; i < 9; i++) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(chad[i], tags->size(), tags));
+  }
+  return true;
+}
+
+void MaybeCreateICCCICPTag(const ColorEncoding& c,
+                           PaddedBytes* JXL_RESTRICT tags, size_t* offset,
+                           size_t* size, PaddedBytes* JXL_RESTRICT tagtable,
+                           std::vector<size_t>* offsets) {
+  if (c.GetColorSpace() != ColorSpace::kRGB) {
+    return;
+  }
+  uint8_t primaries = 0;
+  if (c.primaries == Primaries::kP3) {
+    if (c.white_point == WhitePoint::kD65) {
+      primaries = 12;
+    } else if (c.white_point == WhitePoint::kDCI) {
+      primaries = 11;
+    } else {
+      return;
+    }
+  } else if (c.primaries != Primaries::kCustom &&
+             c.white_point == WhitePoint::kD65) {
+    primaries = static_cast<uint8_t>(c.primaries);
+  } else {
+    return;
+  }
+  if (c.tf.IsUnknown() || c.tf.IsGamma()) {
+    return;
+  }
+  WriteICCTag("cicp", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  WriteICCUint8(primaries, tags->size(), tags);
+  WriteICCUint8(static_cast<uint8_t>(c.tf.GetTransferFunction()), tags->size(),
+                tags);
+  // Matrix
+  WriteICCUint8(0, tags->size(), tags);
+  // Full range
+  WriteICCUint8(1, tags->size(), tags);
+  FinalizeICCTag(tags, offset, size);
+  AddToICCTagTable("cicp", *offset, *size, tagtable, offsets);
+}
+
+void CreateICCCurvCurvTag(const std::vector<uint16_t>& curve,
+                          PaddedBytes* JXL_RESTRICT tags) {
+  size_t pos = tags->size();
+  tags->resize(tags->size() + 12 + curve.size() * 2, 0);
+  WriteICCTag("curv", pos, tags);
+  WriteICCUint32(0, pos + 4, tags);
+  WriteICCUint32(curve.size(), pos + 8, tags);
+  for (size_t i = 0; i < curve.size(); i++) {
+    WriteICCUint16(curve[i], pos + 12 + i * 2, tags);
+  }
+}
+
+// Writes 12 + 4*params.size() bytes
+Status CreateICCCurvParaTag(std::vector<float> params, size_t curve_type,
+                            PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("para", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  WriteICCUint16(curve_type, tags->size(), tags);
+  WriteICCUint16(0, tags->size(), tags);
+  for (size_t i = 0; i < params.size(); i++) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(params[i], tags->size(), tags));
+  }
+  return true;
+}
+
+Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("mAB ", tags->size(), tags);
+  // 4 reserved bytes set to 0
+  WriteICCUint32(0, tags->size(), tags);
+  // number of input channels
+  WriteICCUint8(3, tags->size(), tags);
+  // number of output channels
+  WriteICCUint8(3, tags->size(), tags);
+  // 2 reserved bytes for padding
+  WriteICCUint16(0, tags->size(), tags);
+  // offset to first B curve
+  WriteICCUint32(32, tags->size(), tags);
+  // offset to matrix
+  WriteICCUint32(244, tags->size(), tags);
+  // offset to first M curve
+  WriteICCUint32(148, tags->size(), tags);
+  // offset to CLUT
+  WriteICCUint32(80, tags->size(), tags);
+  // offset to first A curve
+  // (reuse linear B curves)
+  WriteICCUint32(32, tags->size(), tags);
+
+  // offset = 32
+  // no-op curves
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+  // offset = 80
+  // number of grid points for each input channel
+  for (int i = 0; i < 16; ++i) {
+    WriteICCUint8(i < 3 ? 2 : 0, tags->size(), tags);
+  }
+  // precision = 2
+  WriteICCUint8(2, tags->size(), tags);
+  // 3 bytes of padding
+  WriteICCUint8(0, tags->size(), tags);
+  WriteICCUint16(0, tags->size(), tags);
+  const float kOffsets[3] = {
+      kScaledXYBOffset[0] + kScaledXYBOffset[1],
+      kScaledXYBOffset[1] - kScaledXYBOffset[0] + 1.0f / kScaledXYBScale[0],
+      kScaledXYBOffset[1] + kScaledXYBOffset[2]};
+  const float kScaling[3] = {
+      1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
+      1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
+      1.0f / (1.0f / kScaledXYBScale[1] + 1.0f / kScaledXYBScale[2])};
+  // 2*2*2*3 entries of 2 bytes each = 48 bytes
+  for (size_t ix = 0; ix < 2; ++ix) {
+    for (size_t iy = 0; iy < 2; ++iy) {
+      for (size_t ib = 0; ib < 2; ++ib) {
+        float in_f[3] = {ix * 1.0f, iy * 1.0f, ib * 1.0f};
+        for (size_t c = 0; c < 3; ++c) {
+          in_f[c] /= kScaledXYBScale[c];
+          in_f[c] -= kScaledXYBOffset[c];
+        }
+        float out_f[3];
+        out_f[0] = in_f[1] + in_f[0];
+        out_f[1] = in_f[1] - in_f[0];
+        out_f[2] = in_f[2] + in_f[1];
+        for (int i = 0; i < 3; ++i) {
+          out_f[i] += kOffsets[i];
+          out_f[i] *= kScaling[i];
+        }
+        for (int i = 0; i < 3; ++i) {
+          JXL_RETURN_IF_ERROR(out_f[i] >= 0.f && out_f[i] <= 1.f);
+          uint16_t val = static_cast<uint16_t>(
+              0.5f + 65535 * std::max(0.f, std::min(1.f, out_f[i])));
+          WriteICCUint16(val, tags->size(), tags);
+        }
+      }
+    }
+  }
+  // offset = 148
+  // 3 curves with 5 parameters = 3 * (12 + 5 * 4) = 96 bytes
+  for (size_t i = 0; i < 3; ++i) {
+    const float b =
+        -kOffsets[i] - std::cbrt(jxl::kNegOpsinAbsorbanceBiasRGB[i]);
+    std::vector<float> params = {
+        3,
+        1.0f / kScaling[i],
+        b,
+        0,                                // unused
+        std::max(0.f, -b * kScaling[i]),  // make skcms happy
+    };
+    JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(params, 3, tags));
+  }
+  // offset = 244
+  const double matrix[] = {1.5170095, -1.1065225, 0.071623,
+                           -0.050022, 0.5683655,  -0.018344,
+                           -1.387676, 1.1145555,  0.6857255};
+  // 12 * 4 = 48 bytes
+  for (size_t i = 0; i < 9; ++i) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(matrix[i], tags->size(), tags));
+  }
+  for (size_t i = 0; i < 3; ++i) {
+    float intercept = 0;
+    for (size_t j = 0; j < 3; ++j) {
+      intercept += matrix[i * 3 + j] * jxl::kNegOpsinAbsorbanceBiasRGB[j];
+    }
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(intercept, tags->size(), tags));
+  }
+  return true;
+}
+
+Status CreateICCLutAtoBTagForHDR(ColorEncoding c,
+                                 PaddedBytes* JXL_RESTRICT tags) {
+  static constexpr size_t k3DLutDim = 9;
+  WriteICCTag("mft1", tags->size(), tags);
+  // 4 reserved bytes set to 0
+  WriteICCUint32(0, tags->size(), tags);
+  // number of input channels
+  WriteICCUint8(3, tags->size(), tags);
+  // number of output channels
+  WriteICCUint8(3, tags->size(), tags);
+  // number of CLUT grid points
+  WriteICCUint8(k3DLutDim, tags->size(), tags);
+  // 1 reserved bytes for padding
+  WriteICCUint8(0, tags->size(), tags);
+
+  // Matrix (per specification, must be identity if input is not XYZ)
+  for (size_t i = 0; i < 3; ++i) {
+    for (size_t j = 0; j < 3; ++j) {
+      JXL_RETURN_IF_ERROR(
+          WriteICCS15Fixed16(i == j ? 1.f : 0.f, tags->size(), tags));
+    }
+  }
+
+  // Input tables
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t i = 0; i < 256; ++i) {
+      WriteICCUint8(i, tags->size(), tags);
+    }
+  }
+
+  for (size_t ix = 0; ix < k3DLutDim; ++ix) {
+    for (size_t iy = 0; iy < k3DLutDim; ++iy) {
+      for (size_t ib = 0; ib < k3DLutDim; ++ib) {
+        float f[3] = {ix * (1.0f / (k3DLutDim - 1)),
+                      iy * (1.0f / (k3DLutDim - 1)),
+                      ib * (1.0f / (k3DLutDim - 1))};
+        uint8_t pcslab_out[3];
+        JXL_RETURN_IF_ERROR(
+            HWY_DYNAMIC_DISPATCH(ToneMapPixel)(c, f, pcslab_out));
+        for (uint8_t val : pcslab_out) {
+          WriteICCUint8(val, tags->size(), tags);
+        }
+      }
+    }
+  }
+
+  // Output tables
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t i = 0; i < 256; ++i) {
+      WriteICCUint8(i, tags->size(), tags);
+    }
+  }
+
+  return true;
+}
+
+// Some software (Apple Safari, Preview) requires this.
+Status CreateICCNoOpBToATag(PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("mBA ", tags->size(), tags);
+  // 4 reserved bytes set to 0
+  WriteICCUint32(0, tags->size(), tags);
+  // number of input channels
+  WriteICCUint8(3, tags->size(), tags);
+  // number of output channels
+  WriteICCUint8(3, tags->size(), tags);
+  // 2 reserved bytes for padding
+  WriteICCUint16(0, tags->size(), tags);
+  // offset to first B curve
+  WriteICCUint32(32, tags->size(), tags);
+  // offset to matrix
+  WriteICCUint32(0, tags->size(), tags);
+  // offset to first M curve
+  WriteICCUint32(0, tags->size(), tags);
+  // offset to CLUT
+  WriteICCUint32(0, tags->size(), tags);
+  // offset to first A curve
+  WriteICCUint32(0, tags->size(), tags);
+
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+  JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+
+  return true;
+}
+
+}  // namespace
+
+Status MaybeCreateProfile(const ColorEncoding& c,
+                          PaddedBytes* JXL_RESTRICT icc) {
+  PaddedBytes header, tagtable, tags;
+
+  if (c.GetColorSpace() == ColorSpace::kUnknown || c.tf.IsUnknown()) {
+    return false;  // Not an error
+  }
+
+  switch (c.GetColorSpace()) {
+    case ColorSpace::kRGB:
+    case ColorSpace::kGray:
+    case ColorSpace::kXYB:
+      break;  // OK
+    default:
+      return JXL_FAILURE("Invalid CS %u",
+                         static_cast<unsigned int>(c.GetColorSpace()));
+  }
+
+  if (c.GetColorSpace() == ColorSpace::kXYB &&
+      c.rendering_intent != RenderingIntent::kPerceptual) {
+    return JXL_FAILURE(
+        "Only perceptual rendering intent implemented for XYB "
+        "ICC profile.");
+  }
+
+  JXL_RETURN_IF_ERROR(CreateICCHeader(c, &header));
+
+  std::vector<size_t> offsets;
+  // tag count, deferred to later
+  WriteICCUint32(0, tagtable.size(), &tagtable);
+
+  size_t tag_offset = 0, tag_size = 0;
+
+  CreateICCMlucTag(Description(c), &tags);
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("desc", tag_offset, tag_size, &tagtable, &offsets);
+
+  const std::string copyright = "CC0";
+  CreateICCMlucTag(copyright, &tags);
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("cprt", tag_offset, tag_size, &tagtable, &offsets);
+
+  // TODO(eustas): isn't it the other way round: gray image has d50 WhitePoint?
+  if (c.IsGray()) {
+    float wtpt[3];
+    JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(c.GetWhitePoint(), wtpt));
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(wtpt, &tags));
+  } else {
+    float d50[3] = {0.964203, 1.0, 0.824905};
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(d50, &tags));
+  }
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("wtpt", tag_offset, tag_size, &tagtable, &offsets);
+
+  if (!c.IsGray()) {
+    // Chromatic adaptation matrix
+    float chad[9];
+    JXL_RETURN_IF_ERROR(CreateICCChadMatrix(c.GetWhitePoint(), chad));
+
+    JXL_RETURN_IF_ERROR(CreateICCChadTag(chad, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("chad", tag_offset, tag_size, &tagtable, &offsets);
+  }
+
+  if (c.GetColorSpace() == ColorSpace::kRGB) {
+    MaybeCreateICCCICPTag(c, &tags, &tag_offset, &tag_size, &tagtable,
+                          &offsets);
+
+    const PrimariesCIExy primaries = c.GetPrimaries();
+    float m[9];
+    JXL_RETURN_IF_ERROR(CreateICCRGBMatrix(primaries.r, primaries.g,
+                                           primaries.b, c.GetWhitePoint(), m));
+    float r[3] = {m[0], m[3], m[6]};
+    float g[3] = {m[1], m[4], m[7]};
+    float b[3] = {m[2], m[5], m[8]};
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(r, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("rXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(g, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("gXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(b, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("bXYZ", tag_offset, tag_size, &tagtable, &offsets);
+  }
+
+  if (c.GetColorSpace() == ColorSpace::kXYB) {
+    JXL_RETURN_IF_ERROR(CreateICCLutAtoBTagForXYB(&tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("A2B0", tag_offset, tag_size, &tagtable, &offsets);
+    JXL_RETURN_IF_ERROR(CreateICCNoOpBToATag(&tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("B2A0", tag_offset, tag_size, &tagtable, &offsets);
+  } else if (kEnable3DToneMapping && CanToneMap(c)) {
+    JXL_RETURN_IF_ERROR(CreateICCLutAtoBTagForHDR(c, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("A2B0", tag_offset, tag_size, &tagtable, &offsets);
+    JXL_RETURN_IF_ERROR(CreateICCNoOpBToATag(&tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("B2A0", tag_offset, tag_size, &tagtable, &offsets);
+  } else {
+    if (c.tf.IsGamma()) {
+      float gamma = 1.0 / c.tf.GetGamma();
+      JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({gamma}, 0, &tags));
+    } else if (c.GetColorSpace() != ColorSpace::kXYB) {
+      switch (c.tf.GetTransferFunction()) {
+        case TransferFunction::kHLG:
+          CreateICCCurvCurvTag(HWY_DYNAMIC_DISPATCH(CreateTableCurve)(
+                                   64, ExtraTF::kHLG, CanToneMap(c)),
+                               &tags);
+          break;
+        case TransferFunction::kPQ:
+          CreateICCCurvCurvTag(HWY_DYNAMIC_DISPATCH(CreateTableCurve)(
+                                   64, ExtraTF::kPQ, CanToneMap(c)),
+                               &tags);
+          break;
+        case TransferFunction::kSRGB:
+          JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+              {2.4, 1.0 / 1.055, 0.055 / 1.055, 1.0 / 12.92, 0.04045}, 3,
+              &tags));
+          break;
+        case TransferFunction::k709:
+          JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+              {1.0 / 0.45, 1.0 / 1.099, 0.099 / 1.099, 1.0 / 4.5, 0.081}, 3,
+              &tags));
+          break;
+        case TransferFunction::kLinear:
+          JXL_RETURN_IF_ERROR(
+              CreateICCCurvParaTag({1.0, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+          break;
+        case TransferFunction::kDCI:
+          JXL_RETURN_IF_ERROR(
+              CreateICCCurvParaTag({2.6, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+          break;
+        default:
+          JXL_UNREACHABLE("Unknown TF %u", static_cast<unsigned int>(
+                                               c.tf.GetTransferFunction()));
+      }
+    }
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    if (c.IsGray()) {
+      AddToICCTagTable("kTRC", tag_offset, tag_size, &tagtable, &offsets);
+    } else {
+      AddToICCTagTable("rTRC", tag_offset, tag_size, &tagtable, &offsets);
+      AddToICCTagTable("gTRC", tag_offset, tag_size, &tagtable, &offsets);
+      AddToICCTagTable("bTRC", tag_offset, tag_size, &tagtable, &offsets);
+    }
+  }
+
+  // Tag count
+  WriteICCUint32(offsets.size(), 0, &tagtable);
+  for (size_t i = 0; i < offsets.size(); i++) {
+    WriteICCUint32(offsets[i] + header.size() + tagtable.size(), 4 + 12 * i + 4,
+                   &tagtable);
+  }
+
+  // ICC profile size
+  WriteICCUint32(header.size() + tagtable.size() + tags.size(), 0, &header);
+
+  *icc = header;
+  icc->append(tagtable);
+  icc->append(tags);
+
+  // The MD5 checksum must be computed on the profile with profile flags,
+  // rendering intent, and region of the checksum itself, set to 0.
+  // TODO(lode): manually verify with a reliable tool that this creates correct
+  // signature (profile id) for ICC profiles.
+  PaddedBytes icc_sum = *icc;
+  if (icc_sum.size() >= 64 + 4) {
+    memset(icc_sum.data() + 44, 0, 4);
+    memset(icc_sum.data() + 64, 0, 4);
+  }
+  uint8_t checksum[16];
+  ICCComputeMD5(icc_sum, checksum);
+
+  memcpy(icc->data() + 84, checksum, sizeof(checksum));
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management.h b/third-party/libjxl/libjxl/lib/jxl/color_management.h
new file mode 100644
index 0000000000..f623aa1c90
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_management.h
@@ -0,0 +1,40 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_MANAGEMENT_H_
+#define LIB_JXL_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+enum class ExtraTF {
+  kNone,
+  kPQ,
+  kHLG,
+  kSRGB,
+};
+
+// NOTE: for XYB colorspace, the created profile can be used to transform a
+// *scaled* XYB image (created by ScaleXYB()) to another colorspace.
+Status MaybeCreateProfile(const ColorEncoding& c,
+                          PaddedBytes* JXL_RESTRICT icc);
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COLOR_MANAGEMENT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc b/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc
new file mode 100644
index 0000000000..69c9f83499
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc
@@ -0,0 +1,435 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_management.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <new>
+#include <string>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const CIExy& xy) {
+  return os << "{x=" << xy.x << ", y=" << xy.y << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, const PrimariesCIExy& primaries) {
+  return os << "{r=" << primaries.r << ", g=" << primaries.g
+            << ", b=" << primaries.b << "}";
+}
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+// Small enough to be fast. If changed, must update Generate*.
+static constexpr size_t kWidth = 16;
+
+static constexpr size_t kNumThreads = 1;  // only have a single row.
+
+MATCHER_P(HasSameFieldsAs, expected, "") {
+  if (arg.rendering_intent != expected.rendering_intent) {
+    *result_listener << "which has a different rendering intent: "
+                     << ToString(arg.rendering_intent) << " instead of "
+                     << ToString(expected.rendering_intent);
+    return false;
+  }
+  if (arg.GetColorSpace() != expected.GetColorSpace()) {
+    *result_listener << "which has a different color space: "
+                     << ToString(arg.GetColorSpace()) << " instead of "
+                     << ToString(expected.GetColorSpace());
+    return false;
+  }
+  if (arg.white_point != expected.white_point) {
+    *result_listener << "which has a different white point: "
+                     << ToString(arg.white_point) << " instead of "
+                     << ToString(expected.white_point);
+    return false;
+  }
+  if (arg.HasPrimaries() && arg.primaries != expected.primaries) {
+    *result_listener << "which has different primaries: "
+                     << ToString(arg.primaries) << " instead of "
+                     << ToString(expected.primaries);
+    return false;
+  }
+  if (!arg.tf.IsSame(expected.tf)) {
+    static const auto tf_to_string = [](const CustomTransferFunction& tf) {
+      if (tf.IsGamma()) {
+        return "g" + ToString(tf.GetGamma());
+      }
+      return ToString(tf.GetTransferFunction());
+    };
+    *result_listener << "which has a different transfer function: "
+                     << tf_to_string(arg.tf) << " instead of "
+                     << tf_to_string(expected.tf);
+    return false;
+  }
+  return true;
+}
+
+struct Globals {
+  // TODO(deymo): Make this a const.
+  static Globals* GetInstance() {
+    static Globals ret;
+    return &ret;
+  }
+
+ private:
+  Globals() {
+    in_gray = GenerateGray();
+    in_color = GenerateColor();
+    out_gray = ImageF(kWidth, 1);
+    out_color = ImageF(kWidth * 3, 1);
+
+    c_native = ColorEncoding::LinearSRGB(/*is_gray=*/false);
+    c_gray = ColorEncoding::LinearSRGB(/*is_gray=*/true);
+  }
+
+  static ImageF GenerateGray() {
+    ImageF gray(kWidth, 1);
+    float* JXL_RESTRICT row = gray.Row(0);
+    // Increasing left to right
+    for (uint32_t x = 0; x < kWidth; ++x) {
+      row[x] = x * 1.0f / (kWidth - 1);  // [0, 1]
+    }
+    return gray;
+  }
+
+  static ImageF GenerateColor() {
+    ImageF image(kWidth * 3, 1);
+    float* JXL_RESTRICT interleaved = image.Row(0);
+    std::fill(interleaved, interleaved + kWidth * 3, 0.0f);
+
+    // [0, 4): neutral
+    for (int32_t x = 0; x < 4; ++x) {
+      interleaved[3 * x + 0] = x * 1.0f / 3;  // [0, 1]
+      interleaved[3 * x + 2] = interleaved[3 * x + 1] = interleaved[3 * x + 0];
+    }
+
+    // [4, 13): pure RGB with low/medium/high saturation
+    for (int32_t c = 0; c < 3; ++c) {
+      interleaved[3 * (4 + c) + c] = 0.08f + c * 0.01f;
+      interleaved[3 * (7 + c) + c] = 0.75f + c * 0.01f;
+      interleaved[3 * (10 + c) + c] = 1.0f;
+    }
+
+    // [13, 16): impure, not quite saturated RGB
+    interleaved[3 * 13 + 0] = 0.86f;
+    interleaved[3 * 13 + 2] = interleaved[3 * 13 + 1] = 0.16f;
+    interleaved[3 * 14 + 1] = 0.87f;
+    interleaved[3 * 14 + 2] = interleaved[3 * 14 + 0] = 0.16f;
+    interleaved[3 * 15 + 2] = 0.88f;
+    interleaved[3 * 15 + 1] = interleaved[3 * 15 + 0] = 0.16f;
+
+    return image;
+  }
+
+ public:
+  // ImageF so we can use VerifyRelativeError; all are interleaved RGB.
+  ImageF in_gray;
+  ImageF in_color;
+  ImageF out_gray;
+  ImageF out_color;
+  ColorEncoding c_native;
+  ColorEncoding c_gray;
+};
+
+class ColorManagementTest
+    : public ::testing::TestWithParam<test::ColorEncodingDescriptor> {
+ public:
+  // "Same" pixels after converting g->c_native -> c -> g->c_native.
+  static void VerifyPixelRoundTrip(const ColorEncoding& c) {
+    Globals* g = Globals::GetInstance();
+    const ColorEncoding& c_native = c.IsGray() ? g->c_gray : g->c_native;
+    const JxlCmsInterface& cms = GetJxlCms();
+    ColorSpaceTransform xform_fwd(cms);
+    ColorSpaceTransform xform_rev(cms);
+    const float intensity_target =
+        c.tf.IsHLG() ? 1000 : kDefaultIntensityTarget;
+    ASSERT_TRUE(
+        xform_fwd.Init(c_native, c, intensity_target, kWidth, kNumThreads));
+    ASSERT_TRUE(
+        xform_rev.Init(c, c_native, intensity_target, kWidth, kNumThreads));
+
+    const size_t thread = 0;
+    const ImageF& in = c.IsGray() ? g->in_gray : g->in_color;
+    ImageF* JXL_RESTRICT out = c.IsGray() ? &g->out_gray : &g->out_color;
+    ASSERT_TRUE(xform_fwd.Run(thread, in.Row(0), xform_fwd.BufDst(thread)));
+    ASSERT_TRUE(xform_rev.Run(thread, xform_fwd.BufDst(thread), out->Row(0)));
+
+#if JPEGXL_ENABLE_SKCMS
+    double max_l1 = 7E-4;
+    double max_rel = 4E-7;
+#else
+    double max_l1 = 5E-5;
+    // Most are lower; reached 3E-7 with D60 AP0.
+    double max_rel = 4E-7;
+#endif
+    if (c.IsGray()) max_rel = 2E-5;
+    JXL_ASSERT_OK(VerifyRelativeError(in, *out, max_l1, max_rel, _));
+  }
+};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(ColorManagementTestInstantiation,
+                                   ColorManagementTest,
+                                   ::testing::ValuesIn(test::AllEncodings()));
+
+// Exercises the ColorManagement interface for ALL ColorEncoding synthesizable
+// via enums.
+TEST_P(ColorManagementTest, VerifyAllProfiles) {
+  ColorEncoding c = ColorEncodingFromDescriptor(GetParam());
+  printf("%s\n", Description(c).c_str());
+
+  // Can create profile.
+  ASSERT_TRUE(c.CreateICC());
+
+  // Can set an equivalent ColorEncoding from the generated ICC profile.
+  ColorEncoding c3;
+  ASSERT_TRUE(c3.SetICC(PaddedBytes(c.ICC()), &GetJxlCms()));
+  EXPECT_THAT(c3, HasSameFieldsAs(c));
+
+  VerifyPixelRoundTrip(c);
+}
+
+testing::Matcher<CIExy> CIExyIs(const double x, const double y) {
+  static constexpr double kMaxError = 1e-4;
+  return testing::AllOf(
+      testing::Field(&CIExy::x, testing::DoubleNear(x, kMaxError)),
+      testing::Field(&CIExy::y, testing::DoubleNear(y, kMaxError)));
+}
+
+testing::Matcher<PrimariesCIExy> PrimariesAre(
+    const testing::Matcher<CIExy>& r, const testing::Matcher<CIExy>& g,
+    const testing::Matcher<CIExy>& b) {
+  return testing::AllOf(testing::Field(&PrimariesCIExy::r, r),
+                        testing::Field(&PrimariesCIExy::g, g),
+                        testing::Field(&PrimariesCIExy::b, b));
+}
+
+TEST_F(ColorManagementTest, sRGBChromaticity) {
+  const ColorEncoding sRGB = ColorEncoding::SRGB();
+  EXPECT_THAT(sRGB.GetWhitePoint(), CIExyIs(0.3127, 0.3290));
+  EXPECT_THAT(sRGB.GetPrimaries(),
+              PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+                           CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700Chromaticity) {
+  PaddedBytes icc =
+      jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc");
+  ColorEncoding sRGB_D2700;
+  ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc), &GetJxlCms()));
+
+  EXPECT_THAT(sRGB_D2700.GetWhitePoint(), CIExyIs(0.45986, 0.41060));
+  // The illuminant-relative chromaticities of this profile's primaries are the
+  // same as for sRGB. It is the PCS-relative chromaticities that would be
+  // different.
+  EXPECT_THAT(sRGB_D2700.GetPrimaries(),
+              PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+                           CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700ToSRGB) {
+  const JxlCmsInterface& cms = GetJxlCms();
+  PaddedBytes icc =
+      jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc");
+  ColorEncoding sRGB_D2700;
+  ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc), &cms));
+
+  ColorSpaceTransform transform(cms);
+  ASSERT_TRUE(transform.Init(sRGB_D2700, ColorEncoding::SRGB(),
+                             kDefaultIntensityTarget, 1, 1));
+  const float sRGB_D2700_values[3] = {0.863, 0.737, 0.490};
+  float sRGB_values[3];
+  ASSERT_TRUE(transform.Run(0, sRGB_D2700_values, sRGB_values));
+  EXPECT_THAT(sRGB_values,
+              ElementsAre(FloatNear(0.914, 1e-3), FloatNear(0.745, 1e-3),
+                          FloatNear(0.601, 1e-3)));
+}
+
+TEST_F(ColorManagementTest, P3HlgTo2020Hlg) {
+  ColorEncoding p3_hlg;
+  p3_hlg.SetColorSpace(ColorSpace::kRGB);
+  p3_hlg.white_point = WhitePoint::kD65;
+  p3_hlg.primaries = Primaries::kP3;
+  p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+  ASSERT_TRUE(p3_hlg.CreateICC());
+
+  ColorEncoding rec2020_hlg = p3_hlg;
+  rec2020_hlg.primaries = Primaries::k2100;
+  ASSERT_TRUE(rec2020_hlg.CreateICC());
+
+  ColorSpaceTransform transform(GetJxlCms());
+  ASSERT_TRUE(transform.Init(p3_hlg, rec2020_hlg, 1000, 1, 1));
+  const float p3_hlg_values[3] = {0., 0.75, 0.};
+  float rec2020_hlg_values[3];
+  ASSERT_TRUE(transform.Run(0, p3_hlg_values, rec2020_hlg_values));
+  EXPECT_THAT(rec2020_hlg_values,
+              ElementsAre(FloatNear(0.3973, 1e-4), FloatNear(0.7382, 1e-4),
+                          FloatNear(0.1183, 1e-4)));
+}
+
+TEST_F(ColorManagementTest, HlgOotf) {
+  ColorEncoding p3_hlg;
+  p3_hlg.SetColorSpace(ColorSpace::kRGB);
+  p3_hlg.white_point = WhitePoint::kD65;
+  p3_hlg.primaries = Primaries::kP3;
+  p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+  ASSERT_TRUE(p3_hlg.CreateICC());
+
+  ColorSpaceTransform transform_to_1000(GetJxlCms());
+  ASSERT_TRUE(
+      transform_to_1000.Init(p3_hlg, ColorEncoding::LinearSRGB(), 1000, 1, 1));
+  // HDR reference white: https://www.itu.int/pub/R-REP-BT.2408-4-2021
+  float p3_hlg_values[3] = {0.75, 0.75, 0.75};
+  float linear_srgb_values[3];
+  ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values));
+  // On a 1000-nit display, HDR reference white should be 203 cd/m² which is
+  // 0.203 times the maximum.
+  EXPECT_THAT(linear_srgb_values,
+              ElementsAre(FloatNear(0.203, 1e-3), FloatNear(0.203, 1e-3),
+                          FloatNear(0.203, 1e-3)));
+
+  ColorSpaceTransform transform_to_400(GetJxlCms());
+  ASSERT_TRUE(
+      transform_to_400.Init(p3_hlg, ColorEncoding::LinearSRGB(), 400, 1, 1));
+  ASSERT_TRUE(transform_to_400.Run(0, p3_hlg_values, linear_srgb_values));
+  // On a 400-nit display, it should be 100 cd/m².
+  EXPECT_THAT(linear_srgb_values,
+              ElementsAre(FloatNear(0.250, 1e-3), FloatNear(0.250, 1e-3),
+                          FloatNear(0.250, 1e-3)));
+
+  p3_hlg_values[2] = 0.50;
+  ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values));
+  EXPECT_THAT(linear_srgb_values,
+              ElementsAre(FloatNear(0.201, 1e-3), FloatNear(0.201, 1e-3),
+                          FloatNear(0.050, 1e-3)));
+
+  ColorSpaceTransform transform_from_400(GetJxlCms());
+  ASSERT_TRUE(
+      transform_from_400.Init(ColorEncoding::LinearSRGB(), p3_hlg, 400, 1, 1));
+  linear_srgb_values[0] = linear_srgb_values[1] = linear_srgb_values[2] = 0.250;
+  ASSERT_TRUE(transform_from_400.Run(0, linear_srgb_values, p3_hlg_values));
+  EXPECT_THAT(p3_hlg_values,
+              ElementsAre(FloatNear(0.75, 1e-3), FloatNear(0.75, 1e-3),
+                          FloatNear(0.75, 1e-3)));
+
+  ColorEncoding grayscale_hlg;
+  grayscale_hlg.SetColorSpace(ColorSpace::kGray);
+  grayscale_hlg.white_point = WhitePoint::kD65;
+  grayscale_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+  ASSERT_TRUE(grayscale_hlg.CreateICC());
+
+  ColorSpaceTransform grayscale_transform(GetJxlCms());
+  ASSERT_TRUE(grayscale_transform.Init(
+      grayscale_hlg, ColorEncoding::LinearSRGB(/*is_gray=*/true), 1000, 1, 1));
+  const float grayscale_hlg_value = 0.75;
+  float linear_grayscale_value;
+  ASSERT_TRUE(grayscale_transform.Run(0, &grayscale_hlg_value,
+                                      &linear_grayscale_value));
+  EXPECT_THAT(linear_grayscale_value, FloatNear(0.203, 1e-3));
+}
+
+TEST_F(ColorManagementTest, XYBProfile) {
+  ColorEncoding c_xyb;
+  c_xyb.SetColorSpace(ColorSpace::kXYB);
+  c_xyb.rendering_intent = RenderingIntent::kPerceptual;
+  ASSERT_TRUE(c_xyb.CreateICC());
+  ColorEncoding c_native = ColorEncoding::LinearSRGB(false);
+
+  static const size_t kGridDim = 17;
+  static const size_t kNumColors = kGridDim * kGridDim * kGridDim;
+  const JxlCmsInterface& cms = GetJxlCms();
+  ColorSpaceTransform xform(cms);
+  ASSERT_TRUE(
+      xform.Init(c_xyb, c_native, kDefaultIntensityTarget, kNumColors, 1));
+
+  ImageMetadata metadata;
+  metadata.color_encoding = c_native;
+  ImageBundle ib(&metadata);
+  Image3F native(kNumColors, 1);
+  float mul = 1.0f / (kGridDim - 1);
+  for (size_t ir = 0, x = 0; ir < kGridDim; ++ir) {
+    for (size_t ig = 0; ig < kGridDim; ++ig) {
+      for (size_t ib = 0; ib < kGridDim; ++ib, ++x) {
+        native.PlaneRow(0, 0)[x] = ir * mul;
+        native.PlaneRow(1, 0)[x] = ig * mul;
+        native.PlaneRow(2, 0)[x] = ib * mul;
+      }
+    }
+  }
+  ib.SetFromImage(std::move(native), c_native);
+  const Image3F& in = *ib.color();
+  Image3F opsin(kNumColors, 1);
+  ToXYB(ib, nullptr, &opsin, cms, nullptr);
+
+  Image3F opsin2(kNumColors, 1);
+  CopyImageTo(opsin, &opsin2);
+  ScaleXYB(&opsin2);
+
+  float* src = xform.BufSrc(0);
+  for (size_t i = 0; i < kNumColors; ++i) {
+    for (size_t c = 0; c < 3; ++c) {
+      src[3 * i + c] = opsin2.PlaneRow(c, 0)[i];
+    }
+  }
+
+  float* dst = xform.BufDst(0);
+  ASSERT_TRUE(xform.Run(0, src, dst));
+
+  Image3F out(kNumColors, 1);
+  for (size_t i = 0; i < kNumColors; ++i) {
+    for (size_t c = 0; c < 3; ++c) {
+      out.PlaneRow(c, 0)[i] = dst[3 * i + c];
+    }
+  }
+
+  auto debug_print_color = [&](size_t i) {
+    printf(
+        "(%f, %f, %f) -> (%9.6f, %f, %f) -> (%f, %f, %f) -> "
+        "(%9.6f, %9.6f, %9.6f)",
+        in.PlaneRow(0, 0)[i], in.PlaneRow(1, 0)[i], in.PlaneRow(2, 0)[i],
+        opsin.PlaneRow(0, 0)[i], opsin.PlaneRow(1, 0)[i],
+        opsin.PlaneRow(2, 0)[i], opsin2.PlaneRow(0, 0)[i],
+        opsin2.PlaneRow(1, 0)[i], opsin2.PlaneRow(2, 0)[i],
+        out.PlaneRow(0, 0)[i], out.PlaneRow(1, 0)[i], out.PlaneRow(2, 0)[i]);
+  };
+
+  float max_err[3] = {};
+  size_t max_err_i[3] = {};
+  for (size_t i = 0; i < kNumColors; ++i) {
+    for (size_t c = 0; c < 3; ++c) {
+      // debug_print_color(i); printf("\n");
+      float err = std::abs(in.PlaneRow(c, 0)[i] - out.PlaneRow(c, 0)[i]);
+      if (err > max_err[c]) {
+        max_err[c] = err;
+        max_err_i[c] = i;
+      }
+    }
+  }
+  static float kMaxError[3] = {9e-4, 4e-4, 5e-4};
+  printf("Maximum errors:\n");
+  for (size_t c = 0; c < 3; ++c) {
+    debug_print_color(max_err_i[c]);
+    printf("    %f\n", max_err[c]);
+    EXPECT_LT(max_err[c], kMaxError[c]);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/common.h b/third-party/libjxl/libjxl/lib/jxl/common.h
new file mode 100644
index 0000000000..c2ebe029a8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/common.h
@@ -0,0 +1,245 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMMON_H_
+#define LIB_JXL_COMMON_H_
+
+// Shared constants and helper functions.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <limits>  // numeric_limits
+#include <memory>  // unique_ptr
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+
+#ifndef JXL_HIGH_PRECISION
+#define JXL_HIGH_PRECISION 1
+#endif
+
+// Macro that defines whether support for decoding JXL files to JPEG is enabled.
+#ifndef JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JPEGXL_ENABLE_TRANSCODE_JPEG 1
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+// Macro that defines whether support for decoding boxes is enabled.
+#ifndef JPEGXL_ENABLE_BOXES
+#define JPEGXL_ENABLE_BOXES 1
+#endif  // JPEGXL_ENABLE_BOXES
+
+namespace jxl {
+// Some enums and typedefs used by more than one header file.
+
+constexpr size_t kBitsPerByte = 8;  // more clear than CHAR_BIT
+
+constexpr inline size_t RoundUpBitsToByteMultiple(size_t bits) {
+  return (bits + 7) & ~size_t(7);
+}
+
+constexpr inline size_t RoundUpToBlockDim(size_t dim) {
+  return (dim + 7) & ~size_t(7);
+}
+
+static inline bool JXL_MAYBE_UNUSED SafeAdd(const uint64_t a, const uint64_t b,
+                                            uint64_t& sum) {
+  sum = a + b;
+  return sum >= a;  // no need to check b - either sum >= both or < both.
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+// Works for any `align`; if a power of two, compiler emits ADD+AND.
+constexpr inline size_t RoundUpTo(size_t what, size_t align) {
+  return DivCeil(what, align) * align;
+}
+
+constexpr double kPi = 3.14159265358979323846264338327950288;
+
+// Reasonable default for sRGB, matches common monitors. We map white to this
+// many nits (cd/m^2) by default. Butteraugli was tuned for 250 nits, which is
+// very close.
+static constexpr float kDefaultIntensityTarget = 255;
+
+template <typename T>
+constexpr T Pi(T multiplier) {
+  return static_cast<T>(multiplier * kPi);
+}
+
+// Block is the square grid of pixels to which an "energy compaction"
+// transformation (e.g. DCT) is applied. Each block has its own AC quantizer.
+constexpr size_t kBlockDim = 8;
+
+constexpr size_t kDCTBlockSize = kBlockDim * kBlockDim;
+
+constexpr size_t kGroupDim = 256;
+static_assert(kGroupDim % kBlockDim == 0,
+              "Group dim should be divisible by block dim");
+constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim;
+
+// Maximum number of passes in an image.
+constexpr size_t kMaxNumPasses = 11;
+
+// Maximum number of reference frames.
+constexpr size_t kMaxNumReferenceFrames = 4;
+
+// Dimensions of a frame, in pixels, and other derived dimensions.
+// Computed from FrameHeader.
+// TODO(veluca): add extra channels.
+struct FrameDimensions {
+  void Set(size_t xsize, size_t ysize, size_t group_size_shift,
+           size_t max_hshift, size_t max_vshift, bool modular_mode,
+           size_t upsampling) {
+    group_dim = (kGroupDim >> 1) << group_size_shift;
+    dc_group_dim = group_dim * kBlockDim;
+    xsize_upsampled = xsize;
+    ysize_upsampled = ysize;
+    this->xsize = DivCeil(xsize, upsampling);
+    this->ysize = DivCeil(ysize, upsampling);
+    xsize_blocks = DivCeil(this->xsize, kBlockDim << max_hshift) << max_hshift;
+    ysize_blocks = DivCeil(this->ysize, kBlockDim << max_vshift) << max_vshift;
+    xsize_padded = xsize_blocks * kBlockDim;
+    ysize_padded = ysize_blocks * kBlockDim;
+    if (modular_mode) {
+      // Modular mode doesn't have any padding.
+      xsize_padded = this->xsize;
+      ysize_padded = this->ysize;
+    }
+    xsize_upsampled_padded = xsize_padded * upsampling;
+    ysize_upsampled_padded = ysize_padded * upsampling;
+    xsize_groups = DivCeil(this->xsize, group_dim);
+    ysize_groups = DivCeil(this->ysize, group_dim);
+    xsize_dc_groups = DivCeil(xsize_blocks, group_dim);
+    ysize_dc_groups = DivCeil(ysize_blocks, group_dim);
+    num_groups = xsize_groups * ysize_groups;
+    num_dc_groups = xsize_dc_groups * ysize_dc_groups;
+  }
+
+  // Image size without any upsampling, i.e. original_size / upsampling.
+  size_t xsize;
+  size_t ysize;
+  // Original image size.
+  size_t xsize_upsampled;
+  size_t ysize_upsampled;
+  // Image size after upsampling the padded image.
+  size_t xsize_upsampled_padded;
+  size_t ysize_upsampled_padded;
+  // Image size after padding to a multiple of kBlockDim (if VarDCT mode).
+  size_t xsize_padded;
+  size_t ysize_padded;
+  // Image size in kBlockDim blocks.
+  size_t xsize_blocks;
+  size_t ysize_blocks;
+  // Image size in number of groups.
+  size_t xsize_groups;
+  size_t ysize_groups;
+  // Image size in number of DC groups.
+  size_t xsize_dc_groups;
+  size_t ysize_dc_groups;
+  // Number of AC or DC groups.
+  size_t num_groups;
+  size_t num_dc_groups;
+  // Size of a group.
+  size_t group_dim;
+  size_t dc_group_dim;
+};
+
+// Prior to C++14 (i.e. C++11): provide our own make_unique
+#if __cplusplus < 201402L
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#else
+using std::make_unique;
+#endif
+
+template <typename T>
+JXL_INLINE T Clamp1(T val, T low, T hi) {
+  return val < low ? low : val > hi ? hi : val;
+}
+
+// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1)
+constexpr uint32_t PackSigned(int32_t value)
+    JXL_NO_SANITIZE("unsigned-integer-overflow") {
+  return (static_cast<uint32_t>(value) << 1) ^
+         ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+// Reverse to PackSigned, i.e. UnpackSigned(PackSigned(X)) == X.
+// (((~value) & 1) - 1) is either 0 or 0xFF...FF and it will have an expected
+// unsigned-integer-overflow.
+constexpr intptr_t UnpackSigned(size_t value)
+    JXL_NO_SANITIZE("unsigned-integer-overflow") {
+  return static_cast<intptr_t>((value >> 1) ^ (((~value) & 1) - 1));
+}
+
+// conversion from integer to string.
+template <typename T>
+std::string ToString(T n) {
+  char data[32] = {};
+  if (T(0.1) != T(0)) {
+    // float
+    snprintf(data, sizeof(data), "%g", static_cast<double>(n));
+  } else if (T(-1) > T(0)) {
+    // unsigned
+    snprintf(data, sizeof(data), "%llu", static_cast<unsigned long long>(n));
+  } else {
+    // signed
+    snprintf(data, sizeof(data), "%lld", static_cast<long long>(n));
+  }
+  return data;
+}
+
+static inline JXL_MAYBE_UNUSED uint64_t DecodeVarInt(const uint8_t* input,
+                                                     size_t inputSize,
+                                                     size_t* pos) {
+  size_t i;
+  uint64_t ret = 0;
+  for (i = 0; *pos + i < inputSize && i < 10; ++i) {
+    ret |= uint64_t(input[*pos + i] & 127) << uint64_t(7 * i);
+    // If the next-byte flag is not set, stop
+    if ((input[*pos + i] & 128) == 0) break;
+  }
+  // TODO: Return a decoding error if i == 10.
+  *pos += i + 1;
+  return ret;
+}
+
+static inline JXL_MAYBE_UNUSED bool EncodeVarInt(uint64_t value,
+                                                 size_t output_size,
+                                                 size_t* output_pos,
+                                                 uint8_t* output) {
+  // While more than 7 bits of data are left,
+  // store 7 bits and set the next byte flag
+  while (value > 127) {
+    if (*output_pos > output_size) return false;
+    // |128: Set the next byte flag
+    output[(*output_pos)++] = ((uint8_t)(value & 127)) | 128;
+    // Remove the seven bits we just wrote
+    value >>= 7;
+  }
+  if (*output_pos > output_size) return false;
+  output[(*output_pos)++] = ((uint8_t)value) & 127;
+  return true;
+}
+
+static inline JXL_MAYBE_UNUSED void EncodeVarInt(uint64_t value,
+                                                 PaddedBytes* data) {
+  size_t pos = data->size();
+  data->resize(data->size() + 9);
+  JXL_CHECK(EncodeVarInt(value, data->size(), &pos, data->data()));
+  data->resize(pos);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COMMON_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc
new file mode 100644
index 0000000000..52438b9bf0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc
@@ -0,0 +1,315 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/compressed_dc.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/compressed_dc.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using D = HWY_FULL(float);
+using DScalar = HWY_CAPPED(float, 1);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// TODO(veluca): optimize constants.
+const float w1 = 0.20345139757231578f;
+const float w2 = 0.0334829185968739f;
+const float w0 = 1.0f - 4.0f * (w1 + w2);
+
+template <class V>
+V MaxWorkaround(V a, V b) {
+#if (HWY_TARGET == HWY_AVX3) && HWY_COMPILER_CLANG <= 800
+  // Prevents "Do not know how to split the result of this operator" error
+  return IfThenElse(a > b, a, b);
+#else
+  return Max(a, b);
+#endif
+}
+
+template <typename D>
+JXL_INLINE void ComputePixelChannel(const D d, const float dc_factor,
+                                    const float* JXL_RESTRICT row_top,
+                                    const float* JXL_RESTRICT row,
+                                    const float* JXL_RESTRICT row_bottom,
+                                    Vec<D>* JXL_RESTRICT mc,
+                                    Vec<D>* JXL_RESTRICT sm,
+                                    Vec<D>* JXL_RESTRICT gap, size_t x) {
+  const auto tl = LoadU(d, row_top + x - 1);
+  const auto tc = Load(d, row_top + x);
+  const auto tr = LoadU(d, row_top + x + 1);
+
+  const auto ml = LoadU(d, row + x - 1);
+  *mc = Load(d, row + x);
+  const auto mr = LoadU(d, row + x + 1);
+
+  const auto bl = LoadU(d, row_bottom + x - 1);
+  const auto bc = Load(d, row_bottom + x);
+  const auto br = LoadU(d, row_bottom + x + 1);
+
+  const auto w_center = Set(d, w0);
+  const auto w_side = Set(d, w1);
+  const auto w_corner = Set(d, w2);
+
+  const auto corner = Add(Add(tl, tr), Add(bl, br));
+  const auto side = Add(Add(ml, mr), Add(tc, bc));
+  *sm = MulAdd(corner, w_corner, MulAdd(side, w_side, Mul(*mc, w_center)));
+
+  const auto dc_quant = Set(d, dc_factor);
+  *gap = MaxWorkaround(*gap, Abs(Div(Sub(*mc, *sm), dc_quant)));
+}
+
+template <typename D>
+JXL_INLINE void ComputePixel(
+    const float* JXL_RESTRICT dc_factors,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows_top,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows_bottom,
+    float* JXL_RESTRICT* JXL_RESTRICT out_rows, size_t x) {
+  const D d;
+  auto mc_x = Undefined(d);
+  auto mc_y = Undefined(d);
+  auto mc_b = Undefined(d);
+  auto sm_x = Undefined(d);
+  auto sm_y = Undefined(d);
+  auto sm_b = Undefined(d);
+  auto gap = Set(d, 0.5f);
+  ComputePixelChannel(d, dc_factors[0], rows_top[0], rows[0], rows_bottom[0],
+                      &mc_x, &sm_x, &gap, x);
+  ComputePixelChannel(d, dc_factors[1], rows_top[1], rows[1], rows_bottom[1],
+                      &mc_y, &sm_y, &gap, x);
+  ComputePixelChannel(d, dc_factors[2], rows_top[2], rows[2], rows_bottom[2],
+                      &mc_b, &sm_b, &gap, x);
+  auto factor = MulAdd(Set(d, -4.0f), gap, Set(d, 3.0f));
+  factor = ZeroIfNegative(factor);
+
+  auto out = MulAdd(Sub(sm_x, mc_x), factor, mc_x);
+  Store(out, d, out_rows[0] + x);
+  out = MulAdd(Sub(sm_y, mc_y), factor, mc_y);
+  Store(out, d, out_rows[1] + x);
+  out = MulAdd(Sub(sm_b, mc_b), factor, mc_b);
+  Store(out, d, out_rows[2] + x);
+}
+
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool) {
+  const size_t xsize = dc->xsize();
+  const size_t ysize = dc->ysize();
+  if (ysize <= 2 || xsize <= 2) return;
+
+  // TODO(veluca): use tile-based processing?
+  // TODO(veluca): decide if changes to the y channel should be propagated to
+  // the x and b channels through color correlation.
+  JXL_ASSERT(w1 + w2 < 0.25f);
+
+  Image3F smoothed(xsize, ysize);
+  // Fill in borders that the loop below will not. First and last are unused.
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y : {size_t(0), ysize - 1}) {
+      memcpy(smoothed.PlaneRow(c, y), dc->PlaneRow(c, y),
+             xsize * sizeof(float));
+    }
+  }
+  auto process_row = [&](const uint32_t y, size_t /*thread*/) {
+    const float* JXL_RESTRICT rows_top[3]{
+        dc->ConstPlaneRow(0, y - 1),
+        dc->ConstPlaneRow(1, y - 1),
+        dc->ConstPlaneRow(2, y - 1),
+    };
+    const float* JXL_RESTRICT rows[3] = {
+        dc->ConstPlaneRow(0, y),
+        dc->ConstPlaneRow(1, y),
+        dc->ConstPlaneRow(2, y),
+    };
+    const float* JXL_RESTRICT rows_bottom[3] = {
+        dc->ConstPlaneRow(0, y + 1),
+        dc->ConstPlaneRow(1, y + 1),
+        dc->ConstPlaneRow(2, y + 1),
+    };
+    float* JXL_RESTRICT rows_out[3] = {
+        smoothed.PlaneRow(0, y),
+        smoothed.PlaneRow(1, y),
+        smoothed.PlaneRow(2, y),
+    };
+    for (size_t x : {size_t(0), xsize - 1}) {
+      for (size_t c = 0; c < 3; c++) {
+        rows_out[c][x] = rows[c][x];
+      }
+    }
+
+    size_t x = 1;
+    // First pixels
+    const size_t N = Lanes(D());
+    for (; x < std::min(N, xsize - 1); x++) {
+      ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+                            x);
+    }
+    // Full vectors.
+    for (; x + N <= xsize - 1; x += N) {
+      ComputePixel<D>(dc_factors, rows_top, rows, rows_bottom, rows_out, x);
+    }
+    // Last pixels.
+    for (; x < xsize - 1; x++) {
+      ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+                            x);
+    }
+  };
+  JXL_CHECK(RunOnPool(pool, 1, ysize - 1, ThreadPool::NoInit, process_row,
+                      "DCSmoothingRow"));
+  dc->Swap(smoothed);
+}
+
+// DC dequantization.
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+  if (chroma_subsampling.Is444()) {
+    const auto fac_x = Set(df, dc_factors[0] * mul);
+    const auto fac_y = Set(df, dc_factors[1] * mul);
+    const auto fac_b = Set(df, dc_factors[2] * mul);
+    const auto cfl_fac_x = Set(df, cfl_factors[0]);
+    const auto cfl_fac_b = Set(df, cfl_factors[2]);
+    for (size_t y = 0; y < r.ysize(); y++) {
+      float* dec_row_x = r.PlaneRow(dc, 0, y);
+      float* dec_row_y = r.PlaneRow(dc, 1, y);
+      float* dec_row_b = r.PlaneRow(dc, 2, y);
+      const int32_t* quant_row_x = in.channel[1].plane.Row(y);
+      const int32_t* quant_row_y = in.channel[0].plane.Row(y);
+      const int32_t* quant_row_b = in.channel[2].plane.Row(y);
+      for (size_t x = 0; x < r.xsize(); x += Lanes(di)) {
+        const auto in_q_x = Load(di, quant_row_x + x);
+        const auto in_q_y = Load(di, quant_row_y + x);
+        const auto in_q_b = Load(di, quant_row_b + x);
+        const auto in_x = Mul(ConvertTo(df, in_q_x), fac_x);
+        const auto in_y = Mul(ConvertTo(df, in_q_y), fac_y);
+        const auto in_b = Mul(ConvertTo(df, in_q_b), fac_b);
+        Store(in_y, df, dec_row_y + x);
+        Store(MulAdd(in_y, cfl_fac_x, in_x), df, dec_row_x + x);
+        Store(MulAdd(in_y, cfl_fac_b, in_b), df, dec_row_b + x);
+      }
+    }
+  } else {
+    for (size_t c : {1, 0, 2}) {
+      Rect rect(r.x0() >> chroma_subsampling.HShift(c),
+                r.y0() >> chroma_subsampling.VShift(c),
+                r.xsize() >> chroma_subsampling.HShift(c),
+                r.ysize() >> chroma_subsampling.VShift(c));
+      const auto fac = Set(df, dc_factors[c] * mul);
+      const Channel& ch = in.channel[c < 2 ? c ^ 1 : c];
+      for (size_t y = 0; y < rect.ysize(); y++) {
+        const int32_t* quant_row = ch.plane.Row(y);
+        float* row = rect.PlaneRow(dc, c, y);
+        for (size_t x = 0; x < rect.xsize(); x += Lanes(di)) {
+          const auto in_q = Load(di, quant_row + x);
+          const auto in = Mul(ConvertTo(df, in_q), fac);
+          Store(in, df, row + x);
+        }
+      }
+    }
+  }
+  if (bctx.num_dc_ctxs <= 1) {
+    for (size_t y = 0; y < r.ysize(); y++) {
+      uint8_t* qdc_row = r.Row(quant_dc, y);
+      memset(qdc_row, 0, sizeof(*qdc_row) * r.xsize());
+    }
+  } else {
+    for (size_t y = 0; y < r.ysize(); y++) {
+      uint8_t* qdc_row_val = r.Row(quant_dc, y);
+      const int32_t* quant_row_x =
+          in.channel[1].plane.Row(y >> chroma_subsampling.VShift(0));
+      const int32_t* quant_row_y =
+          in.channel[0].plane.Row(y >> chroma_subsampling.VShift(1));
+      const int32_t* quant_row_b =
+          in.channel[2].plane.Row(y >> chroma_subsampling.VShift(2));
+      for (size_t x = 0; x < r.xsize(); x++) {
+        int bucket_x = 0, bucket_y = 0, bucket_b = 0;
+        for (int t : bctx.dc_thresholds[0]) {
+          if (quant_row_x[x >> chroma_subsampling.HShift(0)] > t) bucket_x++;
+        }
+        for (int t : bctx.dc_thresholds[1]) {
+          if (quant_row_y[x >> chroma_subsampling.HShift(1)] > t) bucket_y++;
+        }
+        for (int t : bctx.dc_thresholds[2]) {
+          if (quant_row_b[x >> chroma_subsampling.HShift(2)] > t) bucket_b++;
+        }
+        int bucket = bucket_x;
+        bucket *= bctx.dc_thresholds[2].size() + 1;
+        bucket += bucket_b;
+        bucket *= bctx.dc_thresholds[1].size() + 1;
+        bucket += bucket_y;
+        qdc_row_val[x] = bucket;
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(DequantDC);
+HWY_EXPORT(AdaptiveDCSmoothing);
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(AdaptiveDCSmoothing)(dc_factors, dc, pool);
+}
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx) {
+  return HWY_DYNAMIC_DISPATCH(DequantDC)(r, dc, quant_dc, in, dc_factors, mul,
+                                         cfl_factors, chroma_subsampling, bctx);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h
new file mode 100644
index 0000000000..b06e5931f0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMPRESSED_DC_H_
+#define LIB_JXL_COMPRESSED_DC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/modular_image.h"
+
+// DC handling functions: encoding and decoding of DC to and from bitstream, and
+// related function to initialize the per-group decoder cache.
+
+namespace jxl {
+
+// Smooth DC in already-smooth areas, to counteract banding.
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool);
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COMPRESSED_DC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h b/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h
new file mode 100644
index 0000000000..cd79153a3a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h
@@ -0,0 +1,295 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_CONVOLVE_INL_H_
+#undef LIB_JXL_CONVOLVE_INL_H_
+#else
+#define LIB_JXL_CONVOLVE_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::CombineShiftRightBytes;
+#endif
+using hwy::HWY_NAMESPACE::TableLookupLanes;
+using hwy::HWY_NAMESPACE::Vec;
+
+// Synthesizes left/right neighbors from a vector of center pixels.
+class Neighbors {
+ public:
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+
+  // Returns l[i] == c[Mirror(i - 1)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2,  3,  4,  5,  6,
+                                             7, 8, 9, 10, 11, 12, 13, 14};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // ONML'KJII
+#elif HWY_TARGET == HWY_SCALAR
+    return c;  // Same (the first mirrored value is the last valid one)
+#else  // 128 bit
+    // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))};  // KJII
+#else
+    const D d;
+    // TODO(deymo): Figure out if this can be optimized using a single vsri
+    // instruction to convert LKJI to KJII.
+    HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2};  // KJII
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 2)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2,  3,  4,  5,
+                                             6, 7, 8, 9, 10, 11, 12, 13};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // NMLK'JIIJ
+#elif HWY_TARGET == HWY_SCALAR
+    const D d;
+    JXL_ASSERT(false);  // unsupported, avoid calling this.
+    return Zero(d);
+#else  // 128 bit
+    // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))};  // JIIJ
+#else
+    const D d;
+    HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1};  // JIIJ
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 3)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2,  3,  4,
+                                             5, 6, 7, 8, 9, 10, 11, 12};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // MLKJ'IIJK
+#elif HWY_TARGET == HWY_SCALAR
+    const D d;
+    JXL_ASSERT(false);  // unsupported, avoid calling this.
+    return Zero(d);
+#else  // 128 bit
+    // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))};  // IIJK
+#else
+    const D d;
+    HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0};  // IIJK
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+};
+
+#if HWY_TARGET != HWY_SCALAR
+
+// Returns indices for SetTableIndices such that TableLookupLanes on the
+// rightmost unaligned vector (rightmost sample in its most-significant lane)
+// returns the mirrored values, with the mirror outside the last valid sample.
+static inline const int32_t* MirrorLanes(const size_t mod) {
+  const HWY_CAPPED(float, 16) d;
+  constexpr size_t kN = MaxLanes(d);
+
+  // For mod = `image width mod 16` 0..15:
+  // last full vec     mirrored (mem order)  loadedVec  mirrorVec  idxVec
+  // 0123456789abcdef| fedcba9876543210      fed..210   012..def   012..def
+  // 0123456789abcdef|0 0fedcba98765432      0fe..321   234..f00   123..eff
+  // 0123456789abcdef|01 10fedcba987654      10f..432   456..110   234..ffe
+  // 0123456789abcdef|012 210fedcba9876      210..543   67..2210   34..ffed
+  // 0123456789abcdef|0123 3210fedcba98      321..654   8..33210   4..ffedc
+  // 0123456789abcdef|01234 43210fedcba
+  // 0123456789abcdef|012345 543210fedc
+  // 0123456789abcdef|0123456 6543210fe
+  // 0123456789abcdef|01234567 76543210
+  // 0123456789abcdef|012345678 8765432
+  // 0123456789abcdef|0123456789 987654
+  // 0123456789abcdef|0123456789A A9876
+  // 0123456789abcdef|0123456789AB BA98
+  // 0123456789abcdef|0123456789ABC CBA
+  // 0123456789abcdef|0123456789ABCD DC
+  // 0123456789abcdef|0123456789ABCDE E      EDC..10f   EED..210   ffe..321
+#if HWY_CAP_GE512
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+      1,  2,  3,  4,  5,  6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15,  //
+      14, 13, 12, 11, 10, 9, 8, 7, 6, 5,  4,  3,  2,  1,  0};
+#elif HWY_CAP_GE256
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+      1, 2, 3, 4, 5, 6, 7, 7,  //
+      6, 5, 4, 3, 2, 1, 0};
+#else  // 128-bit
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {1, 2, 3, 3,  //
+                                                              2, 1, 0};
+#endif
+  return idx_lanes + kN - 1 - mod;
+}
+
+#endif  // HWY_TARGET != HWY_SCALAR
+
+// Single entry point for convolution.
+// "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it.
+template <class Strategy>
+class ConvolveT {
+  static constexpr int64_t kRadius = Strategy::kRadius;
+  using Simd = HWY_CAPPED(float, 16);
+
+ public:
+  static size_t MinWidth() {
+#if HWY_TARGET == HWY_SCALAR
+    // First/Last use mirrored loads of up to +/- kRadius.
+    return 2 * kRadius;
+#else
+    return Lanes(Simd()) + kRadius;
+#endif
+  }
+
+  // "Image" is ImageF or Image3F.
+  template <class Image, class Weights>
+  static void Run(const Image& in, const Rect& rect, const Weights& weights,
+                  ThreadPool* pool, Image* out) {
+    JXL_CHECK(SameSize(rect, *out));
+    JXL_CHECK(rect.xsize() >= MinWidth());
+
+    static_assert(int64_t(kRadius) <= 3,
+                  "Must handle [0, kRadius) and >= kRadius");
+    switch (rect.xsize() % Lanes(Simd())) {
+      case 0:
+        return RunRows<0>(in, rect, weights, pool, out);
+      case 1:
+        return RunRows<1>(in, rect, weights, pool, out);
+      case 2:
+        return RunRows<2>(in, rect, weights, pool, out);
+      default:
+        return RunRows<3>(in, rect, weights, pool, out);
+    }
+  }
+
+ private:
+  template <size_t kSizeModN, class WrapRow, class Weights>
+  static JXL_INLINE void RunRow(const float* JXL_RESTRICT in,
+                                const size_t xsize, const int64_t stride,
+                                const WrapRow& wrap_row, const Weights& weights,
+                                float* JXL_RESTRICT out) {
+    Strategy::template ConvolveRow<kSizeModN>(in, xsize, stride, wrap_row,
+                                              weights, out);
+  }
+
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunBorderRows(const ImageF& in, const Rect& rect,
+                                       const int64_t ybegin, const int64_t yend,
+                                       const Weights& weights, ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    const WrapRowMirror wrap_row(in, rect.ysize());
+    for (int64_t y = ybegin; y < yend; ++y) {
+      RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride, wrap_row,
+                        weights, out->Row(y));
+    }
+  }
+
+  // Image3F.
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunBorderRows(const Image3F& in, const Rect& rect,
+                                       const int64_t ybegin, const int64_t yend,
+                                       const Weights& weights, Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    for (int64_t y = ybegin; y < yend; ++y) {
+      for (size_t c = 0; c < 3; ++c) {
+        const WrapRowMirror wrap_row(in.Plane(c), rect.ysize());
+        RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride,
+                          wrap_row, weights, out->PlaneRow(c, y));
+      }
+    }
+  }
+
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunInteriorRows(const ImageF& in, const Rect& rect,
+                                         const int64_t ybegin,
+                                         const int64_t yend,
+                                         const Weights& weights,
+                                         ThreadPool* pool, ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    JXL_CHECK(RunOnPool(
+        pool, ybegin, yend, ThreadPool::NoInit,
+        [&](const uint32_t y, size_t /*thread*/) HWY_ATTR {
+          RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride,
+                            WrapRowUnchanged(), weights, out->Row(y));
+        },
+        "Convolve"));
+  }
+
+  // Image3F.
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunInteriorRows(const Image3F& in, const Rect& rect,
+                                         const int64_t ybegin,
+                                         const int64_t yend,
+                                         const Weights& weights,
+                                         ThreadPool* pool, Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    JXL_CHECK(RunOnPool(
+        pool, ybegin, yend, ThreadPool::NoInit,
+        [&](const uint32_t y, size_t /*thread*/) HWY_ATTR {
+          for (size_t c = 0; c < 3; ++c) {
+            RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(),
+                              stride, WrapRowUnchanged(), weights,
+                              out->PlaneRow(c, y));
+          }
+        },
+        "Convolve3"));
+  }
+
+  template <size_t kSizeModN, class Image, class Weights>
+  static JXL_INLINE void RunRows(const Image& in, const Rect& rect,
+                                 const Weights& weights, ThreadPool* pool,
+                                 Image* out) {
+    const int64_t ysize = rect.ysize();
+    RunBorderRows<kSizeModN>(in, rect, 0, std::min(int64_t(kRadius), ysize),
+                             weights, out);
+    if (ysize > 2 * int64_t(kRadius)) {
+      RunInteriorRows<kSizeModN>(in, rect, int64_t(kRadius),
+                                 ysize - int64_t(kRadius), weights, pool, out);
+    }
+    if (ysize > int64_t(kRadius)) {
+      RunBorderRows<kSizeModN>(in, rect, ysize - int64_t(kRadius), ysize,
+                               weights, out);
+    }
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_CONVOLVE_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve.h b/third-party/libjxl/libjxl/lib/jxl/convolve.h
new file mode 100644
index 0000000000..2fcd2d0980
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve.h
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CONVOLVE_H_
+#define LIB_JXL_CONVOLVE_H_
+
+// 2D convolution.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// No valid values outside [0, xsize), but the strategy may still safely load
+// the preceding vector, and/or round xsize up to the vector lane count. This
+// avoids needing PadImage.
+// Requires xsize >= kConvolveLanes + kConvolveMaxRadius.
+static constexpr size_t kConvolveMaxRadius = 3;
+
+// Weights must already be normalized.
+
+struct WeightsSymmetric3 {
+  // d r d (each replicated 4x)
+  // r c r
+  // d r d
+  float c[4];
+  float r[4];
+  float d[4];
+};
+
+struct WeightsSymmetric5 {
+  // The lower-right quadrant is: c r R  (each replicated 4x)
+  //                              r d L
+  //                              R L D
+  float c[4];
+  float r[4];
+  float R[4];
+  float d[4];
+  float D[4];
+  float L[4];
+};
+
+// Weights for separable 5x5 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+struct WeightsSeparable5 {
+  // Horizontal 1D, distances 0..2 (each replicated 4x)
+  float horz[3 * 4];
+  float vert[3 * 4];
+};
+
+// Weights for separable 7x7 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+//
+// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead,
+// at least when images exceed the L1 cache size.
+struct WeightsSeparable7 {
+  // Horizontal 1D, distances 0..3 (each replicated 4x)
+  float horz[4 * 4];
+  float vert[4 * 4];
+};
+
+const WeightsSymmetric3& WeightsSymmetric3Lowpass();
+const WeightsSeparable5& WeightsSeparable5Lowpass();
+const WeightsSymmetric5& WeightsSymmetric5Lowpass();
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    ImageF* JXL_RESTRICT out);
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    ImageF* out);
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    ImageF* out);
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out);
+
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out);
+
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out);
+
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CONVOLVE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc
new file mode 100644
index 0000000000..b26ff54bbc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable5.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+// 5x5 convolution by separable kernel with a single scan through the input.
+// This is more cache-efficient than separate horizontal/vertical passes, and
+// possibly faster (given enough registers) than tiling and/or transposing.
+//
+// Overview: imagine a 5x5 window around a central pixel. First convolve the
+// rows by multiplying the pixels with the corresponding weights from
+// WeightsSeparable5.horz[abs(x_offset) * 4]. Then multiply each of these
+// intermediate results by the corresponding vertical weight, i.e.
+// vert[abs(y_offset) * 4]. Finally, store the sum of these values as the
+// convolution result at the position of the central pixel in the output.
+//
+// Each of these operations uses SIMD vectors. The central pixel and most
+// importantly the output are aligned, so neighnoring pixels (e.g. x_offset=1)
+// require unaligned loads. Because weights are supplied in identical groups of
+// 4, we can use LoadDup128 to load them (slightly faster).
+//
+// Uses mirrored boundary handling. Until x >= kRadius, the horizontal
+// convolution uses Neighbors class to shuffle vectors as if each of its lanes
+// had been loaded from the mirrored offset. Similarly, the last full vector to
+// write uses mirroring. In the case of scalar vectors, Neighbors is not usable
+// and the value is loaded directly. Otherwise, the number of valid pixels
+// modulo the vector size enables a small optimization: for smaller offsets,
+// a non-mirrored load is sufficient.
+class Separable5Strategy {
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+
+ public:
+  static constexpr int64_t kRadius = 2;
+
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_MAYBE_INLINE void ConvolveRow(
+      const float* const JXL_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const WeightsSeparable5& weights, float* const JXL_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const JXL_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const JXL_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+
+    const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+    const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+    const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+    const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+    const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+    const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+
+    size_t x = 0;
+
+    // More than one iteration for scalars.
+    for (; x < kRadius; x += Lanes(d)) {
+      const V conv0 =
+          Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2), wv0);
+
+      const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+      Store(conv2, d, row_out + x);
+    }
+
+    // Main loop: load inputs without padding
+    for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+      const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2), wv0);
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+      Store(conv2, d, row_out + x);
+    }
+
+    // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+    while (x < xsize) {
+#else
+    if (kSizeModN < kRadius) {
+#endif
+      const V conv0 =
+          Mul(HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2), wv0);
+
+      const V conv1t =
+          HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b =
+          HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t =
+          HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b =
+          HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+      Store(conv2, d, row_out + x);
+      x += Lanes(d);
+    }
+
+    // If mod = 0, the above vector was the last.
+    if (kSizeModN != 0) {
+      for (; x < xsize; ++x) {
+        float mul = 0.0f;
+        for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+          const float wy = weights.vert[std::abs(dy) * 4];
+          const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+          for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+            const float wx = weights.horz[std::abs(dx) * 4];
+            const int64_t clamped_x = Mirror(x + dx, xsize);
+            mul += clamped_row[clamped_x] * wx * wy;
+          }
+        }
+        row_out[x] = mul;
+      }
+    }
+  }
+
+ private:
+  // Same as HorzConvolve for the first/last vector in a row.
+  static JXL_MAYBE_INLINE V HorzConvolveFirst(
+      const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+      const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = Mul(c, wh0);
+
+#if HWY_TARGET == HWY_SCALAR
+    const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+    const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+#else
+    (void)xsize;
+    const V l1 = Neighbors::FirstL1(c);
+    const V l2 = Neighbors::FirstL2(c);
+#endif
+
+    const V r1 = LoadU(d, row + x + 1);
+    const V r2 = LoadU(d, row + x + 2);
+
+    const V mul1 = MulAdd(Add(l1, r1), wh1, mul0);
+    const V mul2 = MulAdd(Add(l2, r2), wh2, mul1);
+    return mul2;
+  }
+
+  template <size_t kSizeModN>
+  static JXL_MAYBE_INLINE V
+  HorzConvolveLast(const float* const JXL_RESTRICT row, const int64_t x,
+                   const int64_t xsize, const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = Mul(c, wh0);
+
+    const V l1 = LoadU(d, row + x - 1);
+    const V l2 = LoadU(d, row + x - 2);
+
+    V r1, r2;
+#if HWY_TARGET == HWY_SCALAR
+    r1 = LoadU(d, row + Mirror(x + 1, xsize));
+    r2 = LoadU(d, row + Mirror(x + 2, xsize));
+#else
+    const size_t N = Lanes(d);
+    if (kSizeModN == 0) {
+      r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+      r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+    } else {  // == 1
+      const auto last = LoadU(d, row + xsize - N);
+      r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r1 = last;
+    }
+#endif
+
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = Add(l1, r1);
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = Add(l2, r2);
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    return mul2;
+  }
+
+  // Requires kRadius valid pixels before/after pos.
+  static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+                                         const V wh0, const V wh1,
+                                         const V wh2) {
+    const D d;
+    const V c = LoadU(d, pos);
+    const V mul0 = Mul(c, wh0);
+
+    // Loading anew is faster than combining vectors.
+    const V l1 = LoadU(d, pos - 1);
+    const V r1 = LoadU(d, pos + 1);
+    const V l2 = LoadU(d, pos - 2);
+    const V r2 = LoadU(d, pos + 2);
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = Add(l1, r1);
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = Add(l2, r2);
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    return mul2;
+  }
+};
+
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<Separable5Strategy>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable5(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Separable5);
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable5)(in, rect, weights, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc
new file mode 100644
index 0000000000..086dfd22b5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc
@@ -0,0 +1,285 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable7.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+// 7x7 convolution by separable kernel with a single scan through the input.
+// Extended version of Separable5, see documentation there.
+class Separable7Strategy {
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+
+ public:
+  static constexpr int64_t kRadius = 3;
+
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_MAYBE_INLINE void ConvolveRow(
+      const float* const JXL_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const WeightsSeparable7& weights, float* const JXL_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const JXL_RESTRICT row_t3 =
+        wrap_row(row_m + 3 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const JXL_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+    const float* const JXL_RESTRICT row_b3 =
+        wrap_row(row_m + 3 * stride, stride);
+
+    const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+    const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+    const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+    const V wh3 = LoadDup128(d, weights.horz + 3 * 4);
+    const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+    const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+    const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+    const V wv3 = LoadDup128(d, weights.vert + 3 * 4);
+
+    size_t x = 0;
+
+    // More than one iteration for scalars.
+    for (; x < kRadius; x += Lanes(d)) {
+      const V conv0 =
+          Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2, wh3), wv0);
+
+      const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+      const V conv3t = HorzConvolveFirst(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3b = HorzConvolveFirst(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+    }
+
+    // Main loop: load inputs without padding
+    for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+      const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2, wh3), wv0);
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2, wh3);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2, wh3);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+      const V conv3t = HorzConvolve(row_t3 + x, wh0, wh1, wh2, wh3);
+      const V conv3b = HorzConvolve(row_b3 + x, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+    }
+
+    // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+    while (x < xsize) {
+#else
+    if (kSizeModN < kRadius) {
+#endif
+      const V conv0 =
+          Mul(HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2, wh3),
+              wv0);
+
+      const V conv1t =
+          HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1b =
+          HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+      const V conv2t =
+          HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2b =
+          HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+      const V conv3t =
+          HorzConvolveLast<kSizeModN>(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3b =
+          HorzConvolveLast<kSizeModN>(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+      x += Lanes(d);
+    }
+
+    // If mod = 0, the above vector was the last.
+    if (kSizeModN != 0) {
+      for (; x < xsize; ++x) {
+        float mul = 0.0f;
+        for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+          const float wy = weights.vert[std::abs(dy) * 4];
+          const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+          for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+            const float wx = weights.horz[std::abs(dx) * 4];
+            const int64_t clamped_x = Mirror(x + dx, xsize);
+            mul += clamped_row[clamped_x] * wx * wy;
+          }
+        }
+        row_out[x] = mul;
+      }
+    }
+  }
+
+ private:
+  // Same as HorzConvolve for the first/last vector in a row.
+  static JXL_MAYBE_INLINE V HorzConvolveFirst(
+      const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+      const V wh0, const V wh1, const V wh2, const V wh3) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = Mul(c, wh0);
+
+#if HWY_TARGET == HWY_SCALAR
+    const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+    const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+    const V l3 = LoadU(d, row + Mirror(x - 3, xsize));
+#else
+    (void)xsize;
+    const V l1 = Neighbors::FirstL1(c);
+    const V l2 = Neighbors::FirstL2(c);
+    const V l3 = Neighbors::FirstL3(c);
+#endif
+
+    const V r1 = LoadU(d, row + x + 1);
+    const V r2 = LoadU(d, row + x + 2);
+    const V r3 = LoadU(d, row + x + 3);
+
+    const V mul1 = MulAdd(Add(l1, r1), wh1, mul0);
+    const V mul2 = MulAdd(Add(l2, r2), wh2, mul1);
+    const V mul3 = MulAdd(Add(l3, r3), wh3, mul2);
+    return mul3;
+  }
+
+  template <size_t kSizeModN>
+  static JXL_MAYBE_INLINE V HorzConvolveLast(
+      const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+      const V wh0, const V wh1, const V wh2, const V wh3) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = Mul(c, wh0);
+
+    const V l1 = LoadU(d, row + x - 1);
+    const V l2 = LoadU(d, row + x - 2);
+    const V l3 = LoadU(d, row + x - 3);
+
+    V r1, r2, r3;
+#if HWY_TARGET == HWY_SCALAR
+    r1 = LoadU(d, row + Mirror(x + 1, xsize));
+    r2 = LoadU(d, row + Mirror(x + 2, xsize));
+    r3 = LoadU(d, row + Mirror(x + 3, xsize));
+#else
+    const size_t N = Lanes(d);
+    if (kSizeModN == 0) {
+      r3 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 3)));
+      r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+      r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+    } else if (kSizeModN == 1) {
+      const auto last = LoadU(d, row + xsize - N);
+      r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 2)));
+      r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r1 = last;
+    } else /* kSizeModN >= 2 */ {
+      const auto last = LoadU(d, row + xsize - N);
+      r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r2 = last;
+      r1 = LoadU(d, row + x + 1);
+    }
+#endif
+
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = Add(l1, r1);
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = Add(l2, r2);
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    const V sum3 = Add(l3, r3);
+    const V mul3 = MulAdd(sum3, wh3, mul2);
+    return mul3;
+  }
+
+  // Returns one vector of horizontal convolution results; lane i is the result
+  // for pixel pos + i. This is the fast path for interior pixels, i.e. kRadius
+  // valid pixels before/after pos.
+  static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+                                         const V wh0, const V wh1, const V wh2,
+                                         const V wh3) {
+    const D d;
+    const V c = LoadU(d, pos);
+    const V mul0 = Mul(c, wh0);
+
+    // TODO(janwas): better to Combine
+    const V l1 = LoadU(d, pos - 1);
+    const V r1 = LoadU(d, pos + 1);
+    const V l2 = LoadU(d, pos - 2);
+    const V r2 = LoadU(d, pos + 2);
+    const V l3 = LoadU(d, pos - 3);
+    const V r3 = LoadU(d, pos + 3);
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = Add(l1, r1);
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = Add(l2, r2);
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    const V sum3 = Add(l3, r3);
+    const V mul3 = MulAdd(sum3, wh3, mul2);
+    return mul3;
+  }
+};
+
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<Separable7Strategy>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable7(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Separable7);
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable7)(in, rect, weights, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc
new file mode 100644
index 0000000000..91e11dcfd5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc
@@ -0,0 +1,208 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#include "lib/jxl/convolve-inl.h"
+
+namespace jxl {
+
+//------------------------------------------------------------------------------
+// Kernels
+
+// 4 instances of a given literal value, useful as input to LoadDup128.
+#define JXL_REP4(literal) literal, literal, literal, literal
+
+// Concentrates energy in low-frequency components (e.g. for antialiasing).
+const WeightsSymmetric3& WeightsSymmetric3Lowpass() {
+  // Computed by research/convolve_weights.py's cubic spline approximations of
+  // prolate spheroidal wave functions.
+  constexpr float w0 = 0.36208932f;
+  constexpr float w1 = 0.12820096f;
+  constexpr float w2 = 0.03127668f;
+  static constexpr WeightsSymmetric3 weights = {
+      {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Lowpass() {
+  constexpr float w0 = 0.41714928f;
+  constexpr float w1 = 0.25539268f;
+  constexpr float w2 = 0.03603267f;
+  static constexpr WeightsSeparable5 weights = {
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSymmetric5& WeightsSymmetric5Lowpass() {
+  static constexpr WeightsSymmetric5 weights = {
+      {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)},
+      {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian1() {
+  constexpr float w0 = 0.38774f;
+  constexpr float w1 = 0.24477f;
+  constexpr float w2 = 0.06136f;
+  static constexpr WeightsSeparable5 weights = {
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian2() {
+  constexpr float w0 = 0.250301f;
+  constexpr float w1 = 0.221461f;
+  constexpr float w2 = 0.153388f;
+  static constexpr WeightsSeparable5 weights = {
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+  return weights;
+}
+
+#undef JXL_REP4
+
+//------------------------------------------------------------------------------
+// Slow
+
+namespace {
+
+template <class WrapX, class WrapY>
+float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy,
+                          const int64_t xsize, const int64_t ysize,
+                          const WeightsSymmetric3& weights) {
+  float sum = 0.0f;
+
+  // ix: image; kx: kernel
+  for (int64_t ky = -1; ky <= 1; ky++) {
+    const int64_t y = WrapY()(iy + ky, ysize);
+    const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y));
+
+    const float wc = ky == 0 ? weights.c[0] : weights.r[0];
+    const float wlr = ky == 0 ? weights.r[0] : weights.d[0];
+
+    const int64_t xm1 = WrapX()(ix - 1, xsize);
+    const int64_t xp1 = WrapX()(ix + 1, xsize);
+    sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr;
+  }
+  return sum;
+}
+
+template <class WrapY>
+void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize,
+                       const int64_t ysize, const WeightsSymmetric3& weights,
+                       float* JXL_RESTRICT row_out) {
+  row_out[0] =
+      SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights);
+  for (int64_t ix = 1; ix < xsize - 1; ix++) {
+    row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize,
+                                                            ysize, weights);
+  }
+  {
+    const int64_t ix = xsize - 1;
+    row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize,
+                                                         ysize, weights);
+  }
+}
+
+}  // namespace
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    ImageF* JXL_RESTRICT out) {
+  const int64_t xsize = static_cast<int64_t>(rect.xsize());
+  const int64_t ysize = static_cast<int64_t>(rect.ysize());
+  const int64_t kRadius = 1;
+
+  JXL_CHECK(RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const int64_t iy = task;
+        float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy));
+
+        if (iy < kRadius || iy >= ysize - kRadius) {
+          SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row);
+        } else {
+          SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights,
+                                           out_row);
+        }
+      },
+      "SlowSymmetric3"));
+}
+
+namespace {
+
+// Separable kernels, any radius.
+float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
+                         const int64_t y, const int64_t radius,
+                         const float* JXL_RESTRICT horz_weights,
+                         const float* JXL_RESTRICT vert_weights) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const WrapMirror wrap;
+
+  float mul = 0.0f;
+  for (int dy = -radius; dy <= radius; ++dy) {
+    const float wy = vert_weights[std::abs(dy) * 4];
+    const size_t sy = wrap(y + dy, ysize);
+    JXL_CHECK(sy < ysize);
+    const float* const JXL_RESTRICT row = rect.ConstRow(in, sy);
+    for (int dx = -radius; dx <= radius; ++dx) {
+      const float wx = horz_weights[std::abs(dx) * 4];
+      const size_t sx = wrap(x + dx, xsize);
+      JXL_CHECK(sx < xsize);
+      mul += row[sx] * wx * wy;
+    }
+  }
+  return mul;
+}
+
+}  // namespace
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    ImageF* out) {
+  const float* horz_weights = &weights.horz[0];
+  const float* vert_weights = &weights.vert[0];
+
+  const size_t ysize = rect.ysize();
+  JXL_CHECK(RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const int64_t y = task;
+
+        float* const JXL_RESTRICT row_out = out->Row(y);
+        for (size_t x = 0; x < rect.xsize(); ++x) {
+          row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2,
+                                          horz_weights, vert_weights);
+        }
+      },
+      "SlowSeparable5"));
+}
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    ImageF* out) {
+  const float* horz_weights = &weights.horz[0];
+  const float* vert_weights = &weights.vert[0];
+
+  const size_t ysize = rect.ysize();
+  JXL_CHECK(RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const int64_t y = task;
+
+        float* const JXL_RESTRICT row_out = out->Row(y);
+        for (size_t x = 0; x < rect.xsize(); ++x) {
+          row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3,
+                                          horz_weights, vert_weights);
+        }
+      },
+      "SlowSeparable7"));
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc
new file mode 100644
index 0000000000..06b59dfb60
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc
@@ -0,0 +1,194 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric3.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+template <class WrapY, class V>
+static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
+                     const int64_t iy, const size_t ysize, const V wx0,
+                     const V wx1, const V wx2) {
+  const HWY_FULL(float) d;
+  const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
+  const auto in_m2 = LoadU(d, center - 2);
+  const auto in_p2 = LoadU(d, center + 2);
+  const auto in_m1 = LoadU(d, center - 1);
+  const auto in_p1 = LoadU(d, center + 1);
+  const auto in_00 = Load(d, center);
+  const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
+  const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
+  const auto sum_0 = Mul(wx0, in_00);
+  return Add(sum_2, Add(sum_1, sum_0));
+}
+
+// 3x3 convolution by symmetric kernel with a single scan through the input.
+class Symmetric3Strategy {
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_MAYBE_INLINE void ConvolveRow(
+      const float* const JXL_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const WeightsSymmetric3& weights, float* const JXL_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const JXL_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const JXL_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand LoadDup128 and
+    // schedules them too late.
+    const V w0 = LoadDup128(d, weights.c);
+    const V w1 = LoadDup128(d, weights.r);
+    const V w2 = LoadDup128(d, weights.d);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = LoadU(d, row_t + 0);
+      const V mc = LoadU(d, row_m + 0);
+      const V bc = LoadU(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = LoadU(d, row_t + 0 + 1);
+      const V ml = Neighbors::FirstL1(mc);
+      const V mr = LoadU(d, row_m + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = LoadU(d, row_b + 0 + 1);
+      const V conv =
+          WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+      Store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    const size_t N = Lanes(d);
+    size_t x = N;
+    for (; x + N + kRadius <= xsize; x += N) {
+      const auto conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2);
+      Store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = LoadU(d, row_t + x);
+    const V mc = LoadU(d, row_m + x);
+    const V bc = LoadU(d, row_b + x);
+
+    V tr, mr, br;
+#if HWY_TARGET == HWY_SCALAR
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    mr = mc;
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = SetTableIndices(d, MirrorLanes(N - 1));
+      tr = TableLookupLanes(tc, mirror);
+      mr = TableLookupLanes(mc, mirror);
+      br = TableLookupLanes(bc, mirror);
+    } else {
+      auto mirror = SetTableIndices(d, MirrorLanes((xsize % N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = TableLookupLanes(LoadU(d, row_t + xsize - N), mirror);
+      mr = TableLookupLanes(LoadU(d, row_m + xsize - N), mirror);
+      br = TableLookupLanes(LoadU(d, row_b + xsize - N), mirror);
+    }
+#endif
+
+    const V tl = LoadU(d, row_t + x - 1);
+    const V ml = LoadU(d, row_m + x - 1);
+    const V bl = LoadU(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+    Store(conv, d, row_out + x);
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static JXL_MAYBE_INLINE V WeightedSum(const V tl, const V tc, const V tr,
+                                        const V ml, const V mc, const V mr,
+                                        const V bl, const V bc, const V br,
+                                        const V w0, const V w1, const V w2) {
+    const V sum_tb = Add(tc, bc);
+
+    // Faster than 5 mul + 4 FMA.
+    const V mul0 = Mul(mc, w0);
+    const V sum_lr = Add(ml, mr);
+
+    const V x1 = Add(sum_tb, sum_lr);
+    const V mul1 = MulAdd(x1, w1, mul0);
+
+    const V sum_t2 = Add(tl, tr);
+    const V sum_b2 = Add(bl, br);
+    const V x2 = Add(sum_t2, sum_b2);
+    const V mul2 = MulAdd(x2, w2, mul1);
+    return mul2;
+  }
+
+  static JXL_MAYBE_INLINE V ConvolveValid(const float* JXL_RESTRICT row_t,
+                                          const float* JXL_RESTRICT row_m,
+                                          const float* JXL_RESTRICT row_b,
+                                          const int64_t x, const V w0,
+                                          const V w1, const V w2) {
+    const D d;
+    const V tc = LoadU(d, row_t + x);
+    const V mc = LoadU(d, row_m + x);
+    const V bc = LoadU(d, row_b + x);
+    const V tl = LoadU(d, row_t + x - 1);
+    const V tr = LoadU(d, row_t + x + 1);
+    const V ml = LoadU(d, row_m + x - 1);
+    const V mr = LoadU(d, row_m + x + 1);
+    const V bl = LoadU(d, row_b + x - 1);
+    const V br = LoadU(d, row_b + x + 1);
+    return WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+  }
+};
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<Symmetric3Strategy>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSymmetric3(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Symmetric3);
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Symmetric3)(in, rect, weights, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc
new file mode 100644
index 0000000000..3c46024e72
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc
@@ -0,0 +1,183 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"  // RoundUpTo
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Vec;
+
+// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2].
+template <class WrapY>
+static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
+                               const int64_t ix, const int64_t iy,
+                               const size_t xsize, const size_t ysize,
+                               const float wx0, const float wx1,
+                               const float wx2) {
+  const WrapMirror wrap_x;
+  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
+  const float in_m2 = row[wrap_x(ix - 2, xsize)];
+  const float in_p2 = row[wrap_x(ix + 2, xsize)];
+  const float in_m1 = row[wrap_x(ix - 1, xsize)];
+  const float in_p1 = row[wrap_x(ix + 1, xsize)];
+  const float in_00 = row[ix];
+  const float sum_2 = wx2 * (in_m2 + in_p2);
+  const float sum_1 = wx1 * (in_m1 + in_p1);
+  const float sum_0 = wx0 * in_00;
+  return sum_2 + sum_1 + sum_0;
+}
+
+template <class WrapY, class V>
+static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
+                     const int64_t iy, const size_t ysize, const V wx0,
+                     const V wx1, const V wx2) {
+  const HWY_FULL(float) d;
+  const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
+  const auto in_m2 = LoadU(d, center - 2);
+  const auto in_p2 = LoadU(d, center + 2);
+  const auto in_m1 = LoadU(d, center - 1);
+  const auto in_p1 = LoadU(d, center + 1);
+  const auto in_00 = Load(d, center);
+  const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
+  const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
+  const auto sum_0 = Mul(wx0, in_00);
+  return Add(sum_2, Add(sum_1, sum_0));
+}
+
+// Produces result for one pixel
+template <class WrapY>
+float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
+                       const int64_t iy, const WeightsSymmetric5& weights) {
+  const float w0 = weights.c[0];
+  const float w1 = weights.r[0];
+  const float w2 = weights.R[0];
+  const float w4 = weights.d[0];
+  const float w5 = weights.L[0];
+  const float w8 = weights.D[0];
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const WrapY wrap_y;
+  // Unrolled loop over all 5 rows of the kernel.
+  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
+
+  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
+  float sum1 =
+      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
+
+  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
+  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
+
+  return sum0 + sum1;
+}
+
+// Produces result for one vector's worth of pixels
+template <class WrapY>
+static void Symmetric5Interior(const ImageF& in, const Rect& rect,
+                               const int64_t ix, const int64_t iy,
+                               const WeightsSymmetric5& weights,
+                               float* JXL_RESTRICT row_out) {
+  const HWY_FULL(float) d;
+
+  const auto w0 = LoadDup128(d, weights.c);
+  const auto w1 = LoadDup128(d, weights.r);
+  const auto w2 = LoadDup128(d, weights.R);
+  const auto w4 = LoadDup128(d, weights.d);
+  const auto w5 = LoadDup128(d, weights.L);
+  const auto w8 = LoadDup128(d, weights.D);
+
+  const size_t ysize = rect.ysize();
+  const WrapY wrap_y;
+  // Unrolled loop over all 5 rows of the kernel.
+  auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
+
+  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8));
+  auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8);
+
+  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
+  sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
+
+  Store(Add(sum0, sum1), d, row_out + ix);
+}
+
+template <class WrapY>
+static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
+                          const WeightsSymmetric5& weights,
+                          float* JXL_RESTRICT row_out) {
+  const int64_t kRadius = 2;
+  const size_t xsize = rect.xsize();
+
+  size_t ix = 0;
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  const size_t aligned_x = RoundUpTo(kRadius, N);
+  for (; ix < std::min(aligned_x, xsize); ++ix) {
+    row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+  }
+  for (; ix + N + kRadius <= xsize; ix += N) {
+    Symmetric5Interior<WrapY>(in, rect, ix, iy, weights, row_out);
+  }
+  for (; ix < xsize; ++ix) {
+    row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+  }
+}
+
+static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect,
+                                             const int64_t iy,
+                                             const WeightsSymmetric5& weights,
+                                             float* JXL_RESTRICT row_out) {
+  return Symmetric5Row<WrapMirror>(in, rect, iy, weights, row_out);
+}
+
+// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
+// the fully vectorized strategies below.
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out) {
+  const size_t ysize = rect.ysize();
+  JXL_CHECK(RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const int64_t iy = task;
+
+        if (iy < 2 || iy >= static_cast<ssize_t>(ysize) - 2) {
+          Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy));
+        } else {
+          Symmetric5Row<WrapUnchanged>(in, rect, iy, weights, out->Row(iy));
+        }
+      },
+      "Symmetric5x5Convolution"));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Symmetric5);
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc
new file mode 100644
index 0000000000..e86d637114
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc
@@ -0,0 +1,252 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#include <time.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/nanobenchmark.h>
+#include <hwy/tests/test_util-inl.h>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+#ifndef JXL_DEBUG_CONVOLVE
+#define JXL_DEBUG_CONVOLVE 0
+#endif
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+void TestNeighbors() {
+  const Neighbors::D d;
+  const Neighbors::V v = Iota(d, 0);
+  HWY_ALIGN float actual[hwy::kTestMaxVectorSize / sizeof(float)] = {0};
+
+  HWY_ALIGN float first_l1[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14};
+  Store(Neighbors::FirstL1(v), d, actual);
+  const size_t N = Lanes(d);
+  EXPECT_EQ(std::vector<float>(first_l1, first_l1 + N),
+            std::vector<float>(actual, actual + N));
+
+#if HWY_TARGET != HWY_SCALAR
+  HWY_ALIGN float first_l2[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
+  Store(Neighbors::FirstL2(v), d, actual);
+  EXPECT_EQ(std::vector<float>(first_l2, first_l2 + N),
+            std::vector<float>(actual, actual + N));
+
+  HWY_ALIGN float first_l3[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Store(Neighbors::FirstL3(v), d, actual);
+  EXPECT_EQ(std::vector<float>(first_l3, first_l3 + N),
+            std::vector<float>(actual, actual + N));
+#endif  // HWY_TARGET != HWY_SCALAR
+}
+
+void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Rng* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  const WeightsSymmetric3& weights = WeightsSymmetric3Lowpass();
+  Symmetric3(in, rect, weights, pool, &out_expected);
+  SlowSymmetric3(in, rect, weights, pool, &out_actual);
+
+  JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+// Ensures Symmetric and Separable give the same result.
+void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Rng* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected);
+  Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual);
+
+  JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Rng* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  const WeightsSeparable5& weights = WeightsSeparable5Lowpass();
+  Separable5(in, Rect(in), weights, pool, &out_expected);
+  SlowSeparable5(in, rect, weights, pool, &out_actual);
+
+  JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Rng* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  // Gaussian sigma 1.0
+  const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
+                                     {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
+
+  SlowSeparable7(in, rect, weights, pool, &out_expected);
+  Separable7(in, Rect(in), weights, pool, &out_actual);
+
+  JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+// For all xsize/ysize and kernels:
+void TestConvolve() {
+  TestNeighbors();
+
+  test::ThreadPoolForTests pool(4);
+  EXPECT_EQ(true,
+            RunOnPool(
+                &pool, kConvolveMaxRadius, 40, ThreadPool::NoInit,
+                [](const uint32_t task, size_t /*thread*/) {
+                  const size_t xsize = task;
+                  Rng rng(129 + 13 * xsize);
+
+                  ThreadPool* null_pool = nullptr;
+                  test::ThreadPoolForTests pool3(3);
+                  for (size_t ysize = kConvolveMaxRadius; ysize < 16; ++ysize) {
+                    JXL_DEBUG(JXL_DEBUG_CONVOLVE,
+                              "%" PRIuS " x %" PRIuS " (target %" PRIx64
+                              ")===============================",
+                              xsize, ysize, static_cast<int64_t>(HWY_TARGET));
+
+                    JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym3------------------");
+                    VerifySymmetric3(xsize, ysize, null_pool, &rng);
+                    VerifySymmetric3(xsize, ysize, &pool3, &rng);
+
+                    JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym5------------------");
+                    VerifySymmetric5(xsize, ysize, null_pool, &rng);
+                    VerifySymmetric5(xsize, ysize, &pool3, &rng);
+
+                    JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------");
+                    VerifySeparable5(xsize, ysize, null_pool, &rng);
+                    VerifySeparable5(xsize, ysize, &pool3, &rng);
+
+                    JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep7------------------");
+                    VerifySeparable7(xsize, ysize, null_pool, &rng);
+                    VerifySeparable7(xsize, ysize, &pool3, &rng);
+                  }
+                },
+                "TestConvolve"));
+}
+
+// Measures durations, verifies results, prints timings. `unpredictable1`
+// must have value 1 (unknown to the compiler to prevent elision).
+template <class Conv>
+void BenchmarkConv(const char* caption, const Conv& conv,
+                   const hwy::FuncInput unpredictable1) {
+  const size_t kNumInputs = 1;
+  const hwy::FuncInput inputs[kNumInputs] = {unpredictable1};
+  hwy::Result results[kNumInputs];
+
+  const size_t kDim = 160;  // in+out fit in L2
+  ImageF in(kDim, kDim);
+  ZeroFillImage(&in);
+  in.Row(kDim / 2)[kDim / 2] = unpredictable1;
+  ImageF out(kDim, kDim);
+
+  hwy::Params p;
+  p.verbose = false;
+  p.max_evals = 7;
+  p.target_rel_mad = 0.002;
+  const size_t num_results = MeasureClosure(
+      [&in, &conv, &out](const hwy::FuncInput input) {
+        conv(in, &out);
+        return out.Row(input)[0];
+      },
+      inputs, kNumInputs, results, p);
+  if (num_results != kNumInputs) {
+    fprintf(stderr, "MeasureClosure failed.\n");
+  }
+  for (size_t i = 0; i < num_results; ++i) {
+    const double seconds = static_cast<double>(results[i].ticks) /
+                           hwy::platform::InvariantTicksPerSecond();
+    printf("%12s: %7.2f MP/s (MAD=%4.2f%%)\n", caption,
+           kDim * kDim * 1E-6 / seconds,
+           static_cast<double>(results[i].variability) * 100.0);
+  }
+}
+
+struct ConvSymmetric3 {
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+    ThreadPool* null_pool = nullptr;
+    Symmetric3(in, Rect(in), WeightsSymmetric3Lowpass(), null_pool, out);
+  }
+};
+
+struct ConvSeparable5 {
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+    ThreadPool* null_pool = nullptr;
+    Separable5(in, Rect(in), WeightsSeparable5Lowpass(), null_pool, out);
+  }
+};
+
+void BenchmarkAll() {
+#if 0  // disabled to avoid test timeouts, run manually on demand
+  const hwy::FuncInput unpredictable1 = time(nullptr) != 1234;
+  BenchmarkConv("Symmetric3", ConvSymmetric3(), unpredictable1);
+  BenchmarkConv("Separable5", ConvSeparable5(), unpredictable1);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class ConvolveTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(ConvolveTest);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, TestConvolve);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, BenchmarkAll);
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc b/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc
new file mode 100644
index 0000000000..ee2a97f93a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+class DataParallelTest : public ::testing::Test {
+ protected:
+  // A fake class to verify that DataParallel is properly calling the
+  // client-provided runner functions.
+  static int FakeRunner(void* runner_opaque, void* jpegxl_opaque,
+                        JxlParallelRunInit init, JxlParallelRunFunction func,
+                        uint32_t start_range, uint32_t end_range) {
+    DataParallelTest* self = static_cast<DataParallelTest*>(runner_opaque);
+    self->runner_called_++;
+    self->jpegxl_opaque_ = jpegxl_opaque;
+    self->init_ = init;
+    self->func_ = func;
+    self->start_range_ = start_range;
+    self->end_range_ = end_range;
+    return self->runner_return_;
+  }
+
+  ThreadPool pool_{&DataParallelTest::FakeRunner, this};
+
+  // Number of times FakeRunner() was called.
+  int runner_called_ = 0;
+
+  // Parameters passed to FakeRunner.
+  void* jpegxl_opaque_ = nullptr;
+  JxlParallelRunInit init_ = nullptr;
+  JxlParallelRunFunction func_ = nullptr;
+  uint32_t start_range_ = -1;
+  uint32_t end_range_ = -1;
+
+  // Return value that FakeRunner will return.
+  int runner_return_ = 0;
+};
+
+// JxlParallelRunInit interface.
+typedef int (*JxlParallelRunInit)();
+
+}  // namespace
+
+TEST_F(DataParallelTest, RunnerCalledParameters) {
+  EXPECT_TRUE(pool_.Run(
+      1234, 5678, [](size_t /* num_threads */) { return true; },
+      [](uint32_t /* task */, size_t /* thread */) { return; }));
+  EXPECT_EQ(1, runner_called_);
+  EXPECT_NE(nullptr, init_);
+  EXPECT_NE(nullptr, func_);
+  EXPECT_NE(nullptr, jpegxl_opaque_);
+  EXPECT_EQ(1234u, start_range_);
+  EXPECT_EQ(5678u, end_range_);
+}
+
+TEST_F(DataParallelTest, RunnerFailurePropagates) {
+  runner_return_ = -1;  // FakeRunner return value.
+  EXPECT_FALSE(pool_.Run(
+      1234, 5678, [](size_t /* num_threads */) { return false; },
+      [](uint32_t /* task */, size_t /* thread */) { return; }));
+  EXPECT_FALSE(RunOnPool(
+      nullptr, 1234, 5678, [](size_t /* num_threads */) { return false; },
+      [](uint32_t /* task */, size_t /* thread */) { return; }, "Test"));
+}
+
+TEST_F(DataParallelTest, RunnerNotCalledOnEmptyRange) {
+  runner_return_ = -1;  // FakeRunner return value.
+  EXPECT_TRUE(pool_.Run(
+      123, 123, [](size_t /* num_threads */) { return false; },
+      [](uint32_t /* task */, size_t /* thread */) { return; }));
+  EXPECT_TRUE(RunOnPool(
+      nullptr, 123, 123, [](size_t /* num_threads */) { return false; },
+      [](uint32_t /* task */, size_t /* thread */) { return; }, "Test"));
+  // We don't call the external runner when the range is empty. We don't even
+  // need to call the init function.
+  EXPECT_EQ(0, runner_called_);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct-inl.h b/third-party/libjxl/libjxl/lib/jxl/dct-inl.h
new file mode 100644
index 0000000000..532606075e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct-inl.h
@@ -0,0 +1,334 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD floating-point (I)DCT, any power of two.
+
+#if defined(LIB_JXL_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_INL_H_
+#undef LIB_JXL_DCT_INL_H_
+#else
+#define LIB_JXL_DCT_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_block-inl.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/transpose-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+template <size_t SZ>
+struct FVImpl {
+  using type = HWY_CAPPED(float, SZ);
+};
+
+template <>
+struct FVImpl<0> {
+  using type = HWY_FULL(float);
+};
+
+template <size_t SZ>
+using FV = typename FVImpl<SZ>::type;
+
+// Implementation of Lowest Complexity Self Recursive Radix-2 DCT II/III
+// Algorithms, by Siriani M. Perera and Jianhua Liu.
+
+template <size_t N, size_t SZ>
+struct CoeffBundle {
+  static void AddReverse(const float* JXL_RESTRICT ain1,
+                         const float* JXL_RESTRICT ain2,
+                         float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+      auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+      Store(Add(in1, in2), FV<SZ>(), aout + i * SZ);
+    }
+  }
+  static void SubReverse(const float* JXL_RESTRICT ain1,
+                         const float* JXL_RESTRICT ain2,
+                         float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+      auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+      Store(Sub(in1, in2), FV<SZ>(), aout + i * SZ);
+    }
+  }
+  static void B(float* JXL_RESTRICT coeff) {
+    auto sqrt2 = Set(FV<SZ>(), kSqrt2);
+    auto in1 = Load(FV<SZ>(), coeff);
+    auto in2 = Load(FV<SZ>(), coeff + SZ);
+    Store(MulAdd(in1, sqrt2, in2), FV<SZ>(), coeff);
+    for (size_t i = 1; i + 1 < N; i++) {
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (i + 1) * SZ);
+      Store(Add(in1, in2), FV<SZ>(), coeff + i * SZ);
+    }
+  }
+  static void BTranspose(float* JXL_RESTRICT coeff) {
+    for (size_t i = N - 1; i > 0; i--) {
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (i - 1) * SZ);
+      Store(Add(in1, in2), FV<SZ>(), coeff + i * SZ);
+    }
+    auto sqrt2 = Set(FV<SZ>(), kSqrt2);
+    auto in1 = Load(FV<SZ>(), coeff);
+    Store(Mul(in1, sqrt2), FV<SZ>(), coeff);
+  }
+  // Ideally optimized away by compiler (except the multiply).
+  static void InverseEvenOdd(const float* JXL_RESTRICT ain,
+                             float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = Load(FV<SZ>(), ain + i * SZ);
+      Store(in1, FV<SZ>(), aout + 2 * i * SZ);
+    }
+    for (size_t i = N / 2; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain + i * SZ);
+      Store(in1, FV<SZ>(), aout + (2 * (i - N / 2) + 1) * SZ);
+    }
+  }
+  // Ideally optimized away by compiler.
+  static void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+                             float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = LoadU(FV<SZ>(), ain + 2 * i * ain_stride);
+      Store(in1, FV<SZ>(), aout + i * SZ);
+    }
+    for (size_t i = N / 2; i < N; i++) {
+      auto in1 = LoadU(FV<SZ>(), ain + (2 * (i - N / 2) + 1) * ain_stride);
+      Store(in1, FV<SZ>(), aout + i * SZ);
+    }
+  }
+  // Invoked on full vector.
+  static void Multiply(float* JXL_RESTRICT coeff) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+      auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+      Store(Mul(in1, mul), FV<SZ>(), coeff + (N / 2 + i) * SZ);
+    }
+  }
+  static void MultiplyAndAdd(const float* JXL_RESTRICT coeff,
+                             float* JXL_RESTRICT out, size_t out_stride) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+      auto out1 = MulAdd(mul, in2, in1);
+      auto out2 = NegMulAdd(mul, in2, in1);
+      StoreU(out1, FV<SZ>(), out + i * out_stride);
+      StoreU(out2, FV<SZ>(), out + (N - i - 1) * out_stride);
+    }
+  }
+  template <typename Block>
+  static void LoadFromBlock(const Block& in, size_t off,
+                            float* JXL_RESTRICT coeff) {
+    for (size_t i = 0; i < N; i++) {
+      Store(in.LoadPart(FV<SZ>(), i, off), FV<SZ>(), coeff + i * SZ);
+    }
+  }
+  template <typename Block>
+  static void StoreToBlockAndScale(const float* JXL_RESTRICT coeff,
+                                   const Block& out, size_t off) {
+    auto mul = Set(FV<SZ>(), 1.0f / N);
+    for (size_t i = 0; i < N; i++) {
+      out.StorePart(FV<SZ>(), Mul(mul, Load(FV<SZ>(), coeff + i * SZ)), i, off);
+    }
+  }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl;
+
+template <size_t SZ>
+struct DCT1DImpl<1, SZ> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <size_t SZ>
+struct DCT1DImpl<2, SZ> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+    auto in1 = Load(FV<SZ>(), mem);
+    auto in2 = Load(FV<SZ>(), mem + SZ);
+    Store(Add(in1, in2), FV<SZ>(), mem);
+    Store(Sub(in1, in2), FV<SZ>(), mem + SZ);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl {
+  void operator()(float* JXL_RESTRICT mem) {
+    // This is relatively small (4kB with 64-DCT and AVX-512)
+    HWY_ALIGN float tmp[N * SZ];
+    CoeffBundle<N / 2, SZ>::AddReverse(mem, mem + N / 2 * SZ, tmp);
+    DCT1DImpl<N / 2, SZ>()(tmp);
+    CoeffBundle<N / 2, SZ>::SubReverse(mem, mem + N / 2 * SZ, tmp + N / 2 * SZ);
+    CoeffBundle<N, SZ>::Multiply(tmp);
+    DCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ);
+    CoeffBundle<N / 2, SZ>::B(tmp + N / 2 * SZ);
+    CoeffBundle<N, SZ>::InverseEvenOdd(tmp, mem);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl;
+
+template <size_t SZ>
+struct IDCT1DImpl<1, SZ> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    StoreU(LoadU(FV<SZ>(), from), FV<SZ>(), to);
+  }
+};
+
+template <size_t SZ>
+struct IDCT1DImpl<2, SZ> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    JXL_DASSERT(from_stride >= SZ);
+    JXL_DASSERT(to_stride >= SZ);
+    auto in1 = LoadU(FV<SZ>(), from);
+    auto in2 = LoadU(FV<SZ>(), from + from_stride);
+    StoreU(Add(in1, in2), FV<SZ>(), to);
+    StoreU(Sub(in1, in2), FV<SZ>(), to + to_stride);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl {
+  void operator()(const float* from, size_t from_stride, float* to,
+                  size_t to_stride) {
+    JXL_DASSERT(from_stride >= SZ);
+    JXL_DASSERT(to_stride >= SZ);
+    // This is relatively small (4kB with 64-DCT and AVX-512)
+    HWY_ALIGN float tmp[N * SZ];
+    CoeffBundle<N, SZ>::ForwardEvenOdd(from, from_stride, tmp);
+    IDCT1DImpl<N / 2, SZ>()(tmp, SZ, tmp, SZ);
+    CoeffBundle<N / 2, SZ>::BTranspose(tmp + N / 2 * SZ);
+    IDCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ, SZ, tmp + N / 2 * SZ, SZ);
+    CoeffBundle<N, SZ>::MultiplyAndAdd(tmp, to, to_stride);
+  }
+};
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void DCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+  size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+  constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+  HWY_ALIGN float tmp[N * SZ];
+  for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+    // TODO(veluca): consider removing the temporary memory here (as is done in
+    // IDCT), if it turns out that some compilers don't optimize away the loads
+    // and this is performance-critical.
+    CoeffBundle<N, SZ>::LoadFromBlock(from, i, tmp);
+    DCT1DImpl<N, SZ>()(tmp);
+    CoeffBundle<N, SZ>::StoreToBlockAndScale(tmp, to, i);
+  }
+}
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void IDCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+  size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+  constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+  for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+    IDCT1DImpl<N, SZ>()(from.Address(0, i), from.Stride(), to.Address(0, i),
+                        to.Stride());
+  }
+}
+
+template <size_t N, size_t M, typename = void>
+struct DCT1D {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return DCT1DWrapper<N, M>(from, to, M);
+  }
+};
+
+template <size_t N, size_t M>
+struct DCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return NoInlineWrapper(DCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to, M);
+  }
+};
+
+template <size_t N, size_t M, typename = void>
+struct IDCT1D {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return IDCT1DWrapper<N, M>(from, to, M);
+  }
+};
+
+template <size_t N, size_t M>
+struct IDCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return NoInlineWrapper(IDCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to,
+                           M);
+  }
+};
+
+// Computes the maybe-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledDCT {
+  // scratch_space must be aligned, and should have space for ROWS*COLS
+  // floats.
+  template <class From>
+  HWY_MAYBE_UNUSED void operator()(const From& from, float* to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    if (ROWS < COLS) {
+      DCT1D<ROWS, COLS>()(from, DCTTo(block, COLS));
+      Transpose<ROWS, COLS>::Run(DCTFrom(block, COLS), DCTTo(to, ROWS));
+      DCT1D<COLS, ROWS>()(DCTFrom(to, ROWS), DCTTo(block, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(to, COLS));
+    } else {
+      DCT1D<ROWS, COLS>()(from, DCTTo(to, COLS));
+      Transpose<ROWS, COLS>::Run(DCTFrom(to, COLS), DCTTo(block, ROWS));
+      DCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(to, ROWS));
+    }
+  }
+};
+// Computes the maybe-transposed, scaled IDCT of a block, that needs to be
+// HWY_ALIGN'ed.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledIDCT {
+  // scratch_space must be aligned, and should have space for ROWS*COLS
+  // floats.
+  template <class To>
+  HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    // Reverse the steps done in ComputeScaledDCT.
+    if (ROWS < COLS) {
+      Transpose<ROWS, COLS>::Run(DCTFrom(from, COLS), DCTTo(block, ROWS));
+      IDCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(from, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(from, ROWS), DCTTo(block, COLS));
+      IDCT1D<ROWS, COLS>()(DCTFrom(block, COLS), to);
+    } else {
+      IDCT1D<COLS, ROWS>()(DCTFrom(from, ROWS), DCTTo(block, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(from, COLS));
+      IDCT1D<ROWS, COLS>()(DCTFrom(from, COLS), to);
+    }
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JXL_DCT_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h b/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h
new file mode 100644
index 0000000000..50646a737f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h
@@ -0,0 +1,108 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Adapters for DCT input/output: from/to contiguous blocks or image rows.
+
+#if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_BLOCK_INL_H_
+#undef LIB_JXL_DCT_BLOCK_INL_H_
+#else
+#define LIB_JXL_DCT_BLOCK_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+// Block: (x, y) <-> (N * y + x)
+// Lines: (x, y) <-> (stride * y + x)
+//
+// I.e. Block is a specialization of Lines with fixed stride.
+//
+// FromXXX should implement Read and Load (Read vector).
+// ToXXX should implement Write and Store (Write vector).
+
+template <size_t N>
+using BlockDesc = HWY_CAPPED(float, N);
+
+// Here and in the following, the SZ template parameter specifies the number of
+// values to load/store. Needed because we want to handle 4x4 sub-blocks of
+// 16x16 blocks.
+class DCTFrom {
+ public:
+  DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {}
+
+  template <typename D>
+  HWY_INLINE Vec<D> LoadPart(D, const size_t row, size_t i) const {
+    JXL_DASSERT(Lanes(D()) <= stride_);
+    // Since these functions are used also for DC, no alignment at all is
+    // guaranteed in the case of floating blocks.
+    // TODO(veluca): consider using a different class for DC-to-LF and
+    // DC-from-LF, or copying DC values to/from a temporary aligned location.
+    return LoadU(D(), Address(row, i));
+  }
+
+  HWY_INLINE float Read(const size_t row, const size_t i) const {
+    return *Address(row, i);
+  }
+
+  constexpr HWY_INLINE const float* Address(const size_t row,
+                                            const size_t i) const {
+    return data_ + row * stride_ + i;
+  }
+
+  size_t Stride() const { return stride_; }
+
+ private:
+  size_t stride_;
+  const float* JXL_RESTRICT data_;
+};
+
+class DCTTo {
+ public:
+  DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {}
+
+  template <typename D>
+  HWY_INLINE void StorePart(D, const Vec<D>& v, const size_t row,
+                            size_t i) const {
+    JXL_DASSERT(Lanes(D()) <= stride_);
+    // Since these functions are used also for DC, no alignment at all is
+    // guaranteed in the case of floating blocks.
+    // TODO(veluca): consider using a different class for DC-to-LF and
+    // DC-from-LF, or copying DC values to/from a temporary aligned location.
+    StoreU(v, D(), Address(row, i));
+  }
+
+  HWY_INLINE void Write(float v, const size_t row, const size_t i) const {
+    *Address(row, i) = v;
+  }
+
+  constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const {
+    return data_ + row * stride_ + i;
+  }
+
+  size_t Stride() const { return stride_; }
+
+ private:
+  size_t stride_;
+  float* JXL_RESTRICT data_;
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DCT_BLOCK_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h b/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h
new file mode 100644
index 0000000000..8e32aa7eff
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_FOR_TEST_H_
+#define LIB_JXL_DCT_FOR_TEST_H_
+
+// Unoptimized DCT only for use in tests.
+
+#include <string.h>  // memcpy
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jxl/common.h"  // Pi
+
+namespace jxl {
+
+namespace test {
+static inline double alpha(int u) { return u == 0 ? 0.7071067811865475 : 1.0; }
+
+// N-DCT on M columns, divided by sqrt(N). Matches the definition in the spec.
+template <size_t N, size_t M>
+void DCT1D(double block[N * M], double out[N * M]) {
+  std::vector<double> matrix(N * N);
+  const double scale = std::sqrt(2.0) / N;
+  for (size_t y = 0; y < N; y++) {
+    for (size_t u = 0; u < N; u++) {
+      matrix[N * u + y] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+    }
+  }
+  for (size_t x = 0; x < M; x++) {
+    for (size_t u = 0; u < N; u++) {
+      out[M * u + x] = 0;
+      for (size_t y = 0; y < N; y++) {
+        out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+      }
+    }
+  }
+}
+
+// N-IDCT on M columns, multiplied by sqrt(N). Matches the definition in the
+// spec.
+template <size_t N, size_t M>
+void IDCT1D(double block[N * M], double out[N * M]) {
+  std::vector<double> matrix(N * N);
+  const double scale = std::sqrt(2.0);
+  for (size_t y = 0; y < N; y++) {
+    for (size_t u = 0; u < N; u++) {
+      // Transpose of DCT matrix.
+      matrix[N * y + u] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+    }
+  }
+  for (size_t x = 0; x < M; x++) {
+    for (size_t u = 0; u < N; u++) {
+      out[M * u + x] = 0;
+      for (size_t y = 0; y < N; y++) {
+        out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+      }
+    }
+  }
+}
+
+template <size_t N, size_t M>
+void TransposeBlock(double in[N * M], double out[M * N]) {
+  for (size_t x = 0; x < N; x++) {
+    for (size_t y = 0; y < M; y++) {
+      out[y * N + x] = in[x * M + y];
+    }
+  }
+}
+}  // namespace test
+
+// Untransposed DCT.
+template <size_t N>
+void DCTSlow(double block[N * N]) {
+  constexpr size_t kBlockSize = N * N;
+  std::vector<double> g(kBlockSize);
+  test::DCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+  test::DCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+}
+
+// Untransposed IDCT.
+template <size_t N>
+void IDCTSlow(double block[N * N]) {
+  constexpr size_t kBlockSize = N * N;
+  std::vector<double> g(kBlockSize);
+  test::IDCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+  test::IDCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_FOR_TEST_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc b/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc
new file mode 100644
index 0000000000..f9e89a6014
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dct_scales.h"
+
+namespace jxl {
+
+// Definition of constexpr arrays.
+constexpr float DCTResampleScales<1, 8>::kScales[];
+constexpr float DCTResampleScales<2, 16>::kScales[];
+constexpr float DCTResampleScales<4, 32>::kScales[];
+constexpr float DCTResampleScales<8, 64>::kScales[];
+constexpr float DCTResampleScales<16, 128>::kScales[];
+constexpr float DCTResampleScales<32, 256>::kScales[];
+constexpr float DCTResampleScales<8, 1>::kScales[];
+constexpr float DCTResampleScales<16, 2>::kScales[];
+constexpr float DCTResampleScales<32, 4>::kScales[];
+constexpr float DCTResampleScales<64, 8>::kScales[];
+constexpr float DCTResampleScales<128, 16>::kScales[];
+constexpr float DCTResampleScales<256, 32>::kScales[];
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+constexpr float WcMultipliers<16>::kMultipliers[];
+constexpr float WcMultipliers<32>::kMultipliers[];
+constexpr float WcMultipliers<64>::kMultipliers[];
+constexpr float WcMultipliers<128>::kMultipliers[];
+constexpr float WcMultipliers<256>::kMultipliers[];
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_scales.h b/third-party/libjxl/libjxl/lib/jxl/dct_scales.h
new file mode 100644
index 0000000000..23af03d60f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_scales.h
@@ -0,0 +1,379 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_SCALES_H_
+#define LIB_JXL_DCT_SCALES_H_
+
+// Scaling factors.
+
+#include <stddef.h>
+
+namespace jxl {
+
+static constexpr float kSqrt2 = 1.41421356237f;
+static constexpr float kSqrt0_5 = 0.70710678118f;
+
+// For n != 0, the n-th basis function of a N-DCT, evaluated in pixel k, has a
+// value of cos((k+1/2) n/(2N) pi). When downsampling by 2x, we average
+// the values for pixel k and k+1 to get the value for pixel (k/2), thus we get
+//
+// [cos((k+1/2) n/N pi) + cos((k+3/2) n/N pi)]/2 =
+// cos(n/(2N) pi) cos((k+1) n/N pi) =
+// cos(n/(2N) pi) cos(((k/2)+1/2) n/(N/2) pi)
+//
+// which is exactly the same as the value of pixel k/2 of a N/2-sized DCT,
+// except for the cos(n/(2N) pi) scaling factor (which does *not*
+// depend on the pixel). Thus, when using the lower-frequency coefficients of a
+// DCT-N to compute a DCT-(N/2), they should be scaled by this constant. Scaling
+// factors for a DCT-(N/4) etc can then be obtained by successive
+// multiplications. The structs below contain the above-mentioned scaling
+// factors.
+//
+// Python code for the tables below:
+//
+// for i in range(N // 8):
+//    v = math.cos(i / (2 * N) * math.pi)
+//    v *= math.cos(i / (N) * math.pi)
+//    v *= math.cos(i / (N / 2) * math.pi)
+//    print(v, end=", ")
+
+template <size_t FROM, size_t TO>
+struct DCTResampleScales;
+
+template <>
+struct DCTResampleScales<8, 1> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+  };
+};
+
+template <>
+struct DCTResampleScales<16, 2> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      0.901764195028874394,
+  };
+};
+
+template <>
+struct DCTResampleScales<32, 4> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      0.974886821136879522,
+      0.901764195028874394,
+      0.787054918159101335,
+  };
+};
+
+template <>
+struct DCTResampleScales<64, 8> {
+  static constexpr float kScales[] = {
+      1.0000000000000000, 0.9936866130906366, 0.9748868211368796,
+      0.9440180941651672, 0.9017641950288744, 0.8490574973847023,
+      0.7870549181591013, 0.7171081282466044,
+  };
+};
+
+template <>
+struct DCTResampleScales<128, 16> {
+  static constexpr float kScales[] = {
+      1.0,
+      0.9984194528776054,
+      0.9936866130906366,
+      0.9858278282666936,
+      0.9748868211368796,
+      0.9609244059440204,
+      0.9440180941651672,
+      0.9242615922757944,
+      0.9017641950288744,
+      0.8766500784429904,
+      0.8490574973847023,
+      0.8191378932865928,
+      0.7870549181591013,
+      0.7529833816270532,
+      0.7171081282466044,
+      0.6796228528314651,
+  };
+};
+
+template <>
+struct DCTResampleScales<256, 32> {
+  static constexpr float kScales[] = {
+      1.0,
+      0.9996047255830407,
+      0.9984194528776054,
+      0.9964458326264695,
+      0.9936866130906366,
+      0.9901456355893141,
+      0.9858278282666936,
+      0.9807391980963174,
+      0.9748868211368796,
+      0.9682788310563117,
+      0.9609244059440204,
+      0.9528337534340876,
+      0.9440180941651672,
+      0.9344896436056892,
+      0.9242615922757944,
+      0.913348084400198,
+      0.9017641950288744,
+      0.8895259056651056,
+      0.8766500784429904,
+      0.8631544288990163,
+      0.8490574973847023,
+      0.8343786191696513,
+      0.8191378932865928,
+      0.8033561501721485,
+      0.7870549181591013,
+      0.7702563888779096,
+      0.7529833816270532,
+      0.7352593067735488,
+      0.7171081282466044,
+      0.6985543251889097,
+      0.6796228528314651,
+      0.6603391026591464,
+  };
+};
+
+// Inverses of the above.
+template <>
+struct DCTResampleScales<1, 8> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+  };
+};
+
+template <>
+struct DCTResampleScales<2, 16> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      1.108937353592731823,
+  };
+};
+
+template <>
+struct DCTResampleScales<4, 32> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      1.025760096781116015,
+      1.108937353592731823,
+      1.270559368765487251,
+  };
+};
+
+template <>
+struct DCTResampleScales<8, 64> {
+  static constexpr float kScales[] = {
+      1.0000000000000000, 1.0063534990068217, 1.0257600967811158,
+      1.0593017296817173, 1.1089373535927318, 1.1777765381970435,
+      1.2705593687654873, 1.3944898413647777,
+  };
+};
+
+template <>
+struct DCTResampleScales<16, 128> {
+  static constexpr float kScales[] = {
+      1.0,
+      1.0015830492062623,
+      1.0063534990068217,
+      1.0143759095928793,
+      1.0257600967811158,
+      1.0406645869480142,
+      1.0593017296817173,
+      1.0819447744633812,
+      1.1089373535927318,
+      1.1407059950032632,
+      1.1777765381970435,
+      1.2207956782315876,
+      1.2705593687654873,
+      1.3280505578213306,
+      1.3944898413647777,
+      1.4714043176061107,
+  };
+};
+
+template <>
+struct DCTResampleScales<32, 256> {
+  static constexpr float kScales[] = {
+      1.0,
+      1.0003954307206069,
+      1.0015830492062623,
+      1.0035668445360069,
+      1.0063534990068217,
+      1.009952439375063,
+      1.0143759095928793,
+      1.0196390660647288,
+      1.0257600967811158,
+      1.0327603660498115,
+      1.0406645869480142,
+      1.049501024072585,
+      1.0593017296817173,
+      1.0701028169146336,
+      1.0819447744633812,
+      1.0948728278734026,
+      1.1089373535927318,
+      1.124194353004584,
+      1.1407059950032632,
+      1.158541237256391,
+      1.1777765381970435,
+      1.1984966740820495,
+      1.2207956782315876,
+      1.244777922949508,
+      1.2705593687654873,
+      1.2982690107339132,
+      1.3280505578213306,
+      1.3600643892400104,
+      1.3944898413647777,
+      1.4315278911623237,
+      1.4714043176061107,
+      1.5143734423314616,
+  };
+};
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+//    print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+  static constexpr float kMultipliers[] = {
+      0.541196100146197,
+      1.3065629648763764,
+  };
+};
+
+template <>
+struct WcMultipliers<8> {
+  static constexpr float kMultipliers[] = {
+      0.5097955791041592,
+      0.6013448869350453,
+      0.8999762231364156,
+      2.5629154477415055,
+  };
+};
+
+template <>
+struct WcMultipliers<16> {
+  static constexpr float kMultipliers[] = {
+      0.5024192861881557, 0.5224986149396889, 0.5669440348163577,
+      0.6468217833599901, 0.7881546234512502, 1.060677685990347,
+      1.7224470982383342, 5.101148618689155,
+  };
+};
+
+template <>
+struct WcMultipliers<32> {
+  static constexpr float kMultipliers[] = {
+      0.5006029982351963, 0.5054709598975436, 0.5154473099226246,
+      0.5310425910897841, 0.5531038960344445, 0.5829349682061339,
+      0.6225041230356648, 0.6748083414550057, 0.7445362710022986,
+      0.8393496454155268, 0.9725682378619608, 1.1694399334328847,
+      1.4841646163141662, 2.057781009953411,  3.407608418468719,
+      10.190008123548033,
+  };
+};
+template <>
+struct WcMultipliers<64> {
+  static constexpr float kMultipliers[] = {
+      0.500150636020651,  0.5013584524464084, 0.5037887256810443,
+      0.5074711720725553, 0.5124514794082247, 0.5187927131053328,
+      0.52657731515427,   0.535909816907992,  0.5469204379855088,
+      0.5597698129470802, 0.57465518403266,   0.5918185358574165,
+      0.6115573478825099, 0.6342389366884031, 0.6603198078137061,
+      0.6903721282002123, 0.7251205223771985, 0.7654941649730891,
+      0.8127020908144905, 0.8683447152233481, 0.9345835970364075,
+      1.0144082649970547, 1.1120716205797176, 1.233832737976571,
+      1.3892939586328277, 1.5939722833856311, 1.8746759800084078,
+      2.282050068005162,  2.924628428158216,  4.084611078129248,
+      6.796750711673633,  20.373878167231453,
+  };
+};
+template <>
+struct WcMultipliers<128> {
+  static constexpr float kMultipliers[] = {
+      0.5000376519155477, 0.5003390374428216, 0.5009427176380873,
+      0.5018505174842379, 0.5030651913013697, 0.5045904432216454,
+      0.5064309549285542, 0.5085924210498143, 0.5110815927066812,
+      0.5139063298475396, 0.5170756631334912, 0.5205998663018917,
+      0.524490540114724,  0.5287607092074876, 0.5334249333971333,
+      0.538499435291984,  0.5440022463817783, 0.549953374183236,
+      0.5563749934898856, 0.5632916653417023, 0.5707305880121454,
+      0.5787218851348208, 0.5872989370937893, 0.5964987630244563,
+      0.606362462272146,  0.6169357260050706, 0.6282694319707711,
+      0.6404203382416639, 0.6534518953751283, 0.6674352009263413,
+      0.6824501259764195, 0.6985866506472291, 0.7159464549705746,
+      0.7346448236478627, 0.7548129391165311, 0.776600658233963,
+      0.8001798956216941, 0.8257487738627852, 0.8535367510066064,
+      0.8838110045596234, 0.9168844461846523, 0.9531258743921193,
+      0.9929729612675466, 1.036949040910389,  1.0856850642580145,
+      1.1399486751015042, 1.2006832557294167, 1.2690611716991191,
+      1.346557628206286,  1.4350550884414341, 1.5369941008524954,
+      1.6555965242641195, 1.7952052190778898, 1.961817848571166,
+      2.163957818751979,  2.4141600002500763, 2.7316450287739396,
+      3.147462191781909,  3.7152427383269746, 4.5362909369693565,
+      5.827688377844654,  8.153848602466814,  13.58429025728446,
+      40.744688103351834,
+  };
+};
+
+template <>
+struct WcMultipliers<256> {
+  static constexpr float kMultipliers[128] = {
+      0.5000094125358878, 0.500084723455784,  0.5002354020255269,
+      0.5004615618093246, 0.5007633734146156, 0.5011410648064231,
+      0.5015949217281668, 0.502125288230386,  0.5027325673091954,
+      0.5034172216566842, 0.5041797745258774, 0.5050208107132756,
+      0.5059409776624396, 0.5069409866925212, 0.5080216143561264,
+      0.509183703931388,  0.5104281670536573, 0.5117559854927805,
+      0.5131682130825206, 0.5146659778093218, 0.516250484068288,
+      0.5179230150949777, 0.5196849355823947, 0.5215376944933958,
+      0.5234828280796439, 0.52552196311921,   0.5276568203859896,
+      0.5298892183652453, 0.5322210772308335, 0.5346544231010253,
+      0.537191392591309,  0.5398342376841637, 0.5425853309375497,
+      0.545447171055775,  0.5484223888484947, 0.551513753605893,
+      0.554724179920619,  0.5580567349898085, 0.5615146464335654,
+      0.5651013106696203, 0.5688203018875696, 0.5726753816701664,
+      0.5766705093136241, 0.5808098529038624, 0.5850978012111273,
+      0.58953897647151,   0.5941382481306648, 0.5989007476325463,
+      0.6038318843443582, 0.6089373627182432, 0.614223200800649,
+      0.6196957502119484, 0.6253617177319102, 0.6312281886412079,
+      0.6373026519855411, 0.6435930279473415, 0.6501076975307724,
+      0.6568555347890955, 0.6638459418498757, 0.6710888870233562,
+      0.6785949463131795, 0.6863753486870501, 0.6944420255086364,
+      0.7028076645818034, 0.7114857693151208, 0.7204907235796304,
+      0.7298378629074134, 0.7395435527641373, 0.749625274727372,
+      0.7601017215162176, 0.7709929019493761, 0.7823202570613161,
+      0.7941067887834509, 0.8063772028037925, 0.8191580674598145,
+      0.83247799080191,   0.8463678182968619, 0.860860854031955,
+      0.8759931087426972, 0.8918035785352535, 0.9083345588266809,
+      0.9256319988042384, 0.9437459026371479, 0.962730784794803,
+      0.9826461881778968, 1.0035572754078206, 1.0255355056139732,
+      1.048659411496106,  1.0730154944316674, 1.0986992590905857,
+      1.1258164135986009, 1.1544842669978943, 1.184833362908442,
+      1.217009397314603,  1.2511754798461228, 1.287514812536712,
+      1.326233878832723,  1.3675662599582539, 1.411777227500661,
+      1.459169302866857,  1.5100890297227016, 1.5649352798258847,
+      1.6241695131835794, 1.6883285509131505, 1.7580406092704062,
+      1.8340456094306077, 1.9172211551275689, 2.0086161135167564,
+      2.1094945286246385, 2.22139377701127,   2.346202662531156,
+      2.486267909203593,  2.644541877144861,  2.824791402350551,
+      3.0318994541759925, 3.2723115884254845, 3.5547153325075804,
+      3.891107790700307,  4.298537526449054,  4.802076008665048,
+      5.440166215091329,  6.274908408039339,  7.413566756422303,
+      9.058751453879703,  11.644627325175037, 16.300023088031555,
+      27.163977662448232, 81.48784219222516,
+  };
+};
+
+// Apply the DCT algorithm-intrinsic constants to DCTResampleScale.
+template <size_t FROM, size_t TO>
+constexpr float DCTTotalResampleScale(size_t x) {
+  return DCTResampleScales<FROM, TO>::kScales[x];
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_SCALES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_test.cc b/third-party/libjxl/libjxl/lib/jxl/dct_test.cc
new file mode 100644
index 0000000000..9f5eff41e9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_test.cc
@@ -0,0 +1,389 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include <cmath>
+#include <numeric>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/test_utils.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// Computes the in-place NxN DCT of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// Performs ComputeTransposedScaledDCT and then transposes and scales it to
+// obtain "vanilla" DCT.
+template <size_t N>
+void ComputeDCT(float block[N * N]) {
+  HWY_ALIGN float tmp_block[N * N];
+  HWY_ALIGN float scratch_space[N * N];
+  ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space);
+
+  // Untranspose.
+  Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
+}
+
+// Computes the in-place 8x8 iDCT of block.
+// Requires that block is HWY_ALIGN'ed.
+template <int N>
+void ComputeIDCT(float block[N * N]) {
+  HWY_ALIGN float tmp_block[N * N];
+  HWY_ALIGN float scratch_space[N * N];
+  // Untranspose.
+  Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
+
+  ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space);
+}
+
+template <size_t N>
+void TransposeTestT(float accuracy) {
+  constexpr size_t kBlockSize = N * N;
+  HWY_ALIGN float src[kBlockSize];
+  DCTTo to_src(src, N);
+  for (size_t y = 0; y < N; ++y) {
+    for (size_t x = 0; x < N; ++x) {
+      to_src.Write(y * N + x, y, x);
+    }
+  }
+  HWY_ALIGN float dst[kBlockSize];
+  Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
+  DCTFrom from_dst(dst, N);
+  for (size_t y = 0; y < N; ++y) {
+    for (size_t x = 0; x < N; ++x) {
+      float expected = x * N + y;
+      float actual = from_dst.Read(y, x);
+      EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
+    }
+  }
+}
+
+void TransposeTest() {
+  TransposeTestT<8>(1e-7f);
+  TransposeTestT<16>(1e-7f);
+  TransposeTestT<32>(1e-7f);
+}
+
+template <size_t N>
+void ColumnDctRoundtripT(float accuracy) {
+  constexpr size_t kBlockSize = N * N;
+  // Though we are only interested in single column result, dct.h has built-in
+  // limit on minimal number of columns processed. So, to be safe, we do
+  // regular 8x8 block transformation. On the bright side - we could check all
+  // 8 basis vectors at once.
+  HWY_ALIGN float block[kBlockSize];
+  DCTTo to(block, N);
+  DCTFrom from(block, N);
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      to.Write((i == j) ? 1.0f : 0.0f, i, j);
+    }
+  }
+
+  // Running (I)DCT on the same memory block seems to trigger a compiler bug on
+  // ARMv7 with clang6.
+  HWY_ALIGN float tmp[kBlockSize];
+  DCTTo to_tmp(tmp, N);
+  DCTFrom from_tmp(tmp, N);
+
+  DCT1D<N, N>()(from, to_tmp);
+  IDCT1D<N, N>()(from_tmp, to);
+
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      float expected = (i == j) ? 1.0f : 0.0f;
+      float actual = from.Read(i, j);
+      EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
+    }
+  }
+}
+
+void ColumnDctRoundtrip() {
+  ColumnDctRoundtripT<8>(1e-6f);
+  ColumnDctRoundtripT<16>(1e-6f);
+  ColumnDctRoundtripT<32>(1e-6f);
+}
+
+template <size_t N>
+void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    HWY_ALIGN float fast[kBlockSize] = {0.0f};
+    double slow[kBlockSize] = {0.0};
+    fast[i] = 1.0;
+    slow[i] = 1.0;
+    DCTSlow<N>(slow);
+    ComputeDCT<N>(fast);
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(fast[k], slow[k], accuracy / N)
+          << "i = " << i << ", k = " << k << ", N = " << N;
+    }
+  }
+}
+
+template <size_t N>
+void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    HWY_ALIGN float fast[kBlockSize] = {0.0f};
+    double slow[kBlockSize] = {0.0};
+    fast[i] = 1.0;
+    slow[i] = 1.0;
+    IDCTSlow<N>(slow);
+    ComputeIDCT<N>(fast);
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(fast[k], slow[k], accuracy * N)
+          << "i = " << i << ", k = " << k << ", N = " << N;
+    }
+  }
+}
+
+template <size_t N>
+void TestInverseT(float accuracy) {
+  test::ThreadPoolForTests pool(N < 32 ? 0 : 8);
+  enum { kBlockSize = N * N };
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, kBlockSize, ThreadPool::NoInit,
+      [accuracy](const uint32_t task, size_t /*thread*/) {
+        const size_t i = static_cast<size_t>(task);
+        HWY_ALIGN float x[kBlockSize] = {0.0f};
+        x[i] = 1.0;
+
+        ComputeIDCT<N>(x);
+        ComputeDCT<N>(x);
+
+        for (size_t k = 0; k < kBlockSize; ++k) {
+          EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+              << "i = " << i << ", k = " << k;
+        }
+      },
+      "TestInverse"));
+}
+
+void InverseTest() {
+  TestInverseT<8>(1e-6f);
+  TestInverseT<16>(1e-6f);
+  TestInverseT<32>(3e-6f);
+}
+
+template <size_t N>
+void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    for (size_t j = 0; j < kBlockSize; ++j) {
+      // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
+      // That means (Me_j)_i = (M^\dagger{}e_i)_j
+
+      // x := Me_j
+      HWY_ALIGN float x[kBlockSize] = {0.0f};
+      x[j] = 1.0;
+      ComputeIDCT<N>(x);
+      // y := M^\dagger{}e_i
+      HWY_ALIGN float y[kBlockSize] = {0.0f};
+      y[i] = 1.0;
+      ComputeDCT<N>(y);
+
+      EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
+    }
+  }
+}
+
+template <size_t N>
+void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    double x[kBlockSize] = {0.0f};
+    x[i] = 1.0;
+
+    DCTSlow<N>(x);
+    IDCTSlow<N>(x);
+
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+          << "i = " << i << ", k = " << k;
+    }
+  }
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectInverseT(float accuracy) {
+  constexpr size_t kBlockSize = ROWS * COLS;
+  for (size_t i = 0; i < kBlockSize; ++i) {
+    HWY_ALIGN float x[kBlockSize] = {0.0f};
+    HWY_ALIGN float out[kBlockSize] = {0.0f};
+    x[i] = 1.0;
+    HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
+    HWY_ALIGN float scratch_space[kBlockSize * 2];
+
+    ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
+    ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
+
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
+          << "i = " << i << ", k = " << k << " ROWS = " << ROWS
+          << " COLS = " << COLS;
+    }
+  }
+}
+
+void TestRectInverse() {
+  TestRectInverseT<16, 32>(1e-6f);
+  TestRectInverseT<8, 32>(1e-6f);
+  TestRectInverseT<8, 16>(1e-6f);
+  TestRectInverseT<4, 8>(1e-6f);
+  TestRectInverseT<2, 4>(1e-6f);
+  TestRectInverseT<1, 4>(1e-6f);
+  TestRectInverseT<1, 2>(1e-6f);
+
+  TestRectInverseT<32, 16>(1e-6f);
+  TestRectInverseT<32, 8>(1e-6f);
+  TestRectInverseT<16, 8>(1e-6f);
+  TestRectInverseT<8, 4>(1e-6f);
+  TestRectInverseT<4, 2>(1e-6f);
+  TestRectInverseT<4, 1>(1e-6f);
+  TestRectInverseT<2, 1>(1e-6f);
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectTransposeT(float accuracy) {
+  constexpr size_t kBlockSize = ROWS * COLS;
+  HWY_ALIGN float scratch_space[kBlockSize * 2];
+  for (size_t px = 0; px < COLS; ++px) {
+    for (size_t py = 0; py < ROWS; ++py) {
+      HWY_ALIGN float x1[kBlockSize] = {0.0f};
+      HWY_ALIGN float x2[kBlockSize] = {0.0f};
+      HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
+      HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
+      x1[py * COLS + px] = 1;
+      x2[px * ROWS + py] = 1;
+
+      constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
+      constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
+
+      ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
+      ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
+
+      for (size_t x = 0; x < OUT_COLS; ++x) {
+        for (size_t y = 0; y < OUT_ROWS; ++y) {
+          EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
+                      accuracy)
+              << " px = " << px << ", py = " << py << ", x = " << x
+              << ", y = " << y;
+        }
+      }
+    }
+  }
+}
+
+void TestRectTranspose() {
+  TestRectTransposeT<16, 32>(1e-6f);
+  TestRectTransposeT<8, 32>(1e-6f);
+  TestRectTransposeT<8, 16>(1e-6f);
+  TestRectTransposeT<4, 8>(1e-6f);
+  TestRectTransposeT<2, 4>(1e-6f);
+  TestRectTransposeT<1, 4>(1e-6f);
+  TestRectTransposeT<1, 2>(1e-6f);
+
+  // Identical to 8, 16
+  //  TestRectTranspose<16, 8>(1e-6f);
+}
+
+void TestDctAccuracyShard(size_t shard) {
+  if (shard == 0) {
+    TestDctAccuracy<1>(1.1E-7f);
+    TestDctAccuracy<2>(1.1E-7f);
+    TestDctAccuracy<4>(1.1E-7f);
+    TestDctAccuracy<8>(1.1E-7f);
+    TestDctAccuracy<16>(1.3E-7f);
+  }
+  TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestIdctAccuracyShard(size_t shard) {
+  if (shard == 0) {
+    TestIdctAccuracy<1>(1E-7f);
+    TestIdctAccuracy<2>(1E-7f);
+    TestIdctAccuracy<4>(1E-7f);
+    TestIdctAccuracy<8>(1E-7f);
+    TestIdctAccuracy<16>(1E-7f);
+  }
+  TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestDctTransposeShard(size_t shard) {
+  if (shard == 0) {
+    TestDctTranspose<8>(1E-6f);
+    TestDctTranspose<16>(1E-6f);
+  }
+  TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestSlowInverseShard(size_t shard) {
+  if (shard == 0) {
+    TestSlowInverse<1>(1E-5f);
+    TestSlowInverse<2>(1E-5f);
+    TestSlowInverse<4>(1E-5f);
+    TestSlowInverse<8>(1E-5f);
+    TestSlowInverse<16>(1E-5f);
+  }
+  TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class TransposeTest : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
+
+HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
+
+// Tests in the DctShardedTest class are sharded for N=32.
+class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
+
+std::vector<uint32_t> ShardRange(uint32_t n) {
+#ifdef JXL_DISABLE_SLOW_TESTS
+  JXL_ASSERT(n > 6);
+  std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
+#else
+  std::vector<uint32_t> ret(n);
+  std::iota(ret.begin(), ret.end(), 0);
+#endif  // JXL_DISABLE_SLOW_TESTS
+  return ret;
+}
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
+                                      ::testing::ValuesIn(ShardRange(32)));
+
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_util.h b/third-party/libjxl/libjxl/lib/jxl/dct_util.h
new file mode 100644
index 0000000000..fb6ce3b971
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dct_util.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_UTIL_H_
+#define LIB_JXL_DCT_UTIL_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+union ACPtr {
+  int32_t* ptr32;
+  int16_t* ptr16;
+  ACPtr() = default;
+  explicit ACPtr(int16_t* p) : ptr16(p) {}
+  explicit ACPtr(int32_t* p) : ptr32(p) {}
+};
+
+union ConstACPtr {
+  const int32_t* ptr32;
+  const int16_t* ptr16;
+  ConstACPtr() = default;
+  explicit ConstACPtr(const int16_t* p) : ptr16(p) {}
+  explicit ConstACPtr(const int32_t* p) : ptr32(p) {}
+};
+
+enum class ACType { k16 = 0, k32 = 1 };
+
+class ACImage {
+ public:
+  virtual ~ACImage() = default;
+  virtual ACType Type() const = 0;
+  virtual ACPtr PlaneRow(size_t c, size_t y, size_t xbase) = 0;
+  virtual ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const = 0;
+  virtual size_t PixelsPerRow() const = 0;
+  virtual void ZeroFill() = 0;
+  virtual void ZeroFillPlane(size_t c) = 0;
+  virtual bool IsEmpty() const = 0;
+};
+
+template <typename T>
+class ACImageT final : public ACImage {
+ public:
+  ACImageT() = default;
+  ACImageT(size_t xsize, size_t ysize) {
+    static_assert(
+        std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value,
+        "ACImage must be either 32- or 16- bit");
+    img_ = Image3<T>(xsize, ysize);
+  }
+  ACType Type() const override {
+    return sizeof(T) == 2 ? ACType::k16 : ACType::k32;
+  }
+  ACPtr PlaneRow(size_t c, size_t y, size_t xbase) override {
+    return ACPtr(img_.PlaneRow(c, y) + xbase);
+  }
+  ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const override {
+    return ConstACPtr(img_.PlaneRow(c, y) + xbase);
+  }
+
+  size_t PixelsPerRow() const override { return img_.PixelsPerRow(); }
+
+  void ZeroFill() override { ZeroFillImage(&img_); }
+
+  void ZeroFillPlane(size_t c) override { ZeroFillImage(&img_.Plane(c)); }
+
+  bool IsEmpty() const override {
+    return img_.xsize() == 0 || img_.ysize() == 0;
+  }
+
+ private:
+  Image3<T> img_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_UTIL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc b/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc
new file mode 100644
index 0000000000..13a57238f1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc
@@ -0,0 +1,372 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_ans.h"
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+// Decodes a number in the range [0..255], by reading 1 - 11 bits.
+inline int DecodeVarLenUint8(BitReader* input) {
+  if (input->ReadFixedBits<1>()) {
+    int nbits = static_cast<int>(input->ReadFixedBits<3>());
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+// Decodes a number in the range [0..65535], by reading 1 - 21 bits.
+inline int DecodeVarLenUint16(BitReader* input) {
+  if (input->ReadFixedBits<1>()) {
+    int nbits = static_cast<int>(input->ReadFixedBits<4>());
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+Status ReadHistogram(int precision_bits, std::vector<int32_t>* counts,
+                     BitReader* input) {
+  int simple_code = input->ReadBits(1);
+  if (simple_code == 1) {
+    int i;
+    int symbols[2] = {0};
+    int max_symbol = 0;
+    const int num_symbols = input->ReadBits(1) + 1;
+    for (i = 0; i < num_symbols; ++i) {
+      symbols[i] = DecodeVarLenUint8(input);
+      if (symbols[i] > max_symbol) max_symbol = symbols[i];
+    }
+    counts->resize(max_symbol + 1);
+    if (num_symbols == 1) {
+      (*counts)[symbols[0]] = 1 << precision_bits;
+    } else {
+      if (symbols[0] == symbols[1]) {  // corrupt data
+        return false;
+      }
+      (*counts)[symbols[0]] = input->ReadBits(precision_bits);
+      (*counts)[symbols[1]] = (1 << precision_bits) - (*counts)[symbols[0]];
+    }
+  } else {
+    int is_flat = input->ReadBits(1);
+    if (is_flat == 1) {
+      int alphabet_size = DecodeVarLenUint8(input) + 1;
+      *counts = CreateFlatHistogram(alphabet_size, 1 << precision_bits);
+      return true;
+    }
+
+    uint32_t shift;
+    {
+      // TODO(veluca): speed up reading with table lookups.
+      int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+      int log = 0;
+      for (; log < upper_bound_log; log++) {
+        if (input->ReadFixedBits<1>() == 0) break;
+      }
+      shift = (input->ReadBits(log) | (1 << log)) - 1;
+      if (shift > ANS_LOG_TAB_SIZE + 1) {
+        return JXL_FAILURE("Invalid shift value");
+      }
+    }
+
+    int length = DecodeVarLenUint8(input) + 3;
+    counts->resize(length);
+    int total_count = 0;
+
+    static const uint8_t huff[128][2] = {
+        {3, 10}, {7, 12}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {7, 13}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+    };
+
+    std::vector<int> logcounts(counts->size());
+    int omit_log = -1;
+    int omit_pos = -1;
+    // This array remembers which symbols have an RLE length.
+    std::vector<int> same(counts->size(), 0);
+    for (size_t i = 0; i < logcounts.size(); ++i) {
+      input->Refill();  // for PeekFixedBits + Advance
+      int idx = input->PeekFixedBits<7>();
+      input->Consume(huff[idx][0]);
+      logcounts[i] = huff[idx][1];
+      // The RLE symbol.
+      if (logcounts[i] == ANS_LOG_TAB_SIZE + 1) {
+        int rle_length = DecodeVarLenUint8(input);
+        same[i] = rle_length + 5;
+        i += rle_length + 3;
+        continue;
+      }
+      if (logcounts[i] > omit_log) {
+        omit_log = logcounts[i];
+        omit_pos = i;
+      }
+    }
+    // Invalid input, e.g. due to invalid usage of RLE.
+    if (omit_pos < 0) return JXL_FAILURE("Invalid histogram.");
+    if (static_cast<size_t>(omit_pos) + 1 < logcounts.size() &&
+        logcounts[omit_pos + 1] == ANS_TAB_SIZE + 1) {
+      return JXL_FAILURE("Invalid histogram.");
+    }
+    int prev = 0;
+    int numsame = 0;
+    for (size_t i = 0; i < logcounts.size(); ++i) {
+      if (same[i]) {
+        // RLE sequence, let this loop output the same count for the next
+        // iterations.
+        numsame = same[i] - 1;
+        prev = i > 0 ? (*counts)[i - 1] : 0;
+      }
+      if (numsame > 0) {
+        (*counts)[i] = prev;
+        numsame--;
+      } else {
+        int code = logcounts[i];
+        // omit_pos may not be negative at this point (checked before).
+        if (i == static_cast<size_t>(omit_pos)) {
+          continue;
+        } else if (code == 0) {
+          continue;
+        } else if (code == 1) {
+          (*counts)[i] = 1;
+        } else {
+          int bitcount = GetPopulationCountPrecision(code - 1, shift);
+          (*counts)[i] = (1 << (code - 1)) +
+                         (input->ReadBits(bitcount) << (code - 1 - bitcount));
+        }
+      }
+      total_count += (*counts)[i];
+    }
+    (*counts)[omit_pos] = (1 << precision_bits) - total_count;
+    if ((*counts)[omit_pos] <= 0) {
+      // The histogram we've read sums to more than total_count (including at
+      // least 1 for the omitted value).
+      return JXL_FAILURE("Invalid histogram count.");
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeANSCodes(const size_t num_histograms,
+                      const size_t max_alphabet_size, BitReader* in,
+                      ANSCode* result) {
+  result->degenerate_symbols.resize(num_histograms, -1);
+  if (result->use_prefix_code) {
+    JXL_ASSERT(max_alphabet_size <= 1 << PREFIX_MAX_BITS);
+    result->huffman_data.resize(num_histograms);
+    std::vector<uint16_t> alphabet_sizes(num_histograms);
+    for (size_t c = 0; c < num_histograms; c++) {
+      alphabet_sizes[c] = DecodeVarLenUint16(in) + 1;
+      if (alphabet_sizes[c] > max_alphabet_size) {
+        return JXL_FAILURE("Alphabet size is too long: %u", alphabet_sizes[c]);
+      }
+    }
+    for (size_t c = 0; c < num_histograms; c++) {
+      if (alphabet_sizes[c] > 1) {
+        if (!result->huffman_data[c].ReadFromBitStream(alphabet_sizes[c], in)) {
+          if (!in->AllReadsWithinBounds()) {
+            return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                              "Not enough bytes for huffman code");
+          }
+          return JXL_FAILURE("Invalid huffman tree number %" PRIuS
+                             ", alphabet size %u",
+                             c, alphabet_sizes[c]);
+        }
+      } else {
+        // 0-bit codes does not require extension tables.
+        result->huffman_data[c].table_.clear();
+        result->huffman_data[c].table_.resize(1u << kHuffmanTableBits);
+      }
+      for (const auto& h : result->huffman_data[c].table_) {
+        if (h.bits <= kHuffmanTableBits) {
+          result->UpdateMaxNumBits(c, h.value);
+        }
+      }
+    }
+  } else {
+    JXL_ASSERT(max_alphabet_size <= ANS_MAX_ALPHABET_SIZE);
+    result->alias_tables =
+        AllocateArray(num_histograms * (1 << result->log_alpha_size) *
+                      sizeof(AliasTable::Entry));
+    AliasTable::Entry* alias_tables =
+        reinterpret_cast<AliasTable::Entry*>(result->alias_tables.get());
+    for (size_t c = 0; c < num_histograms; ++c) {
+      std::vector<int32_t> counts;
+      if (!ReadHistogram(ANS_LOG_TAB_SIZE, &counts, in)) {
+        return JXL_FAILURE("Invalid histogram bitstream.");
+      }
+      if (counts.size() > max_alphabet_size) {
+        return JXL_FAILURE("Alphabet size is too long: %" PRIuS, counts.size());
+      }
+      while (!counts.empty() && counts.back() == 0) {
+        counts.pop_back();
+      }
+      for (size_t s = 0; s < counts.size(); s++) {
+        if (counts[s] != 0) {
+          result->UpdateMaxNumBits(c, s);
+        }
+      }
+      // InitAliasTable "fixes" empty counts to contain degenerate "0" symbol.
+      int degenerate_symbol = counts.empty() ? 0 : (counts.size() - 1);
+      for (int s = 0; s < degenerate_symbol; ++s) {
+        if (counts[s] != 0) {
+          degenerate_symbol = -1;
+          break;
+        }
+      }
+      result->degenerate_symbols[c] = degenerate_symbol;
+      InitAliasTable(counts, ANS_TAB_SIZE, result->log_alpha_size,
+                     alias_tables + c * (1 << result->log_alpha_size));
+    }
+  }
+  return true;
+}
+Status DecodeUintConfig(size_t log_alpha_size, HybridUintConfig* uint_config,
+                        BitReader* br) {
+  br->Refill();
+  size_t split_exponent = br->ReadBits(CeilLog2Nonzero(log_alpha_size + 1));
+  size_t msb_in_token = 0, lsb_in_token = 0;
+  if (split_exponent != log_alpha_size) {
+    // otherwise, msb/lsb don't matter.
+    size_t nbits = CeilLog2Nonzero(split_exponent + 1);
+    msb_in_token = br->ReadBits(nbits);
+    if (msb_in_token > split_exponent) {
+      // This could be invalid here already and we need to check this before
+      // we use its value to read more bits.
+      return JXL_FAILURE("Invalid HybridUintConfig");
+    }
+    nbits = CeilLog2Nonzero(split_exponent - msb_in_token + 1);
+    lsb_in_token = br->ReadBits(nbits);
+  }
+  if (lsb_in_token + msb_in_token > split_exponent) {
+    return JXL_FAILURE("Invalid HybridUintConfig");
+  }
+  *uint_config = HybridUintConfig(split_exponent, msb_in_token, lsb_in_token);
+  return true;
+}
+
+Status DecodeUintConfigs(size_t log_alpha_size,
+                         std::vector<HybridUintConfig>* uint_config,
+                         BitReader* br) {
+  // TODO(veluca): RLE?
+  for (size_t i = 0; i < uint_config->size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        DecodeUintConfig(log_alpha_size, &(*uint_config)[i], br));
+  }
+  return true;
+}
+
+LZ77Params::LZ77Params() { Bundle::Init(this); }
+Status LZ77Params::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &enabled));
+  if (!visitor->Conditional(enabled)) return true;
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(224), Val(512), Val(4096),
+                                         BitsOffset(15, 8), 224, &min_symbol));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(3), Val(4), BitsOffset(2, 5),
+                                         BitsOffset(8, 9), 3, &min_length));
+  return true;
+}
+
+void ANSCode::UpdateMaxNumBits(size_t ctx, size_t symbol) {
+  HybridUintConfig* cfg = &uint_config[ctx];
+  // LZ77 symbols use a different uint config.
+  if (lz77.enabled && lz77.nonserialized_distance_context != ctx &&
+      symbol >= lz77.min_symbol) {
+    symbol -= lz77.min_symbol;
+    cfg = &lz77.length_uint_config;
+  }
+  size_t split_token = cfg->split_token;
+  size_t msb_in_token = cfg->msb_in_token;
+  size_t lsb_in_token = cfg->lsb_in_token;
+  size_t split_exponent = cfg->split_exponent;
+  if (symbol < split_token) {
+    max_num_bits = std::max(max_num_bits, split_exponent);
+    return;
+  }
+  uint32_t n_extra_bits =
+      split_exponent - (msb_in_token + lsb_in_token) +
+      ((symbol - split_token) >> (msb_in_token + lsb_in_token));
+  size_t total_bits = msb_in_token + lsb_in_token + n_extra_bits + 1;
+  max_num_bits = std::max(max_num_bits, total_bits);
+}
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+                        std::vector<uint8_t>* context_map, bool disallow_lz77) {
+  JXL_RETURN_IF_ERROR(Bundle::Read(br, &code->lz77));
+  if (code->lz77.enabled) {
+    num_contexts++;
+    JXL_RETURN_IF_ERROR(DecodeUintConfig(/*log_alpha_size=*/8,
+                                         &code->lz77.length_uint_config, br));
+  }
+  if (code->lz77.enabled && disallow_lz77) {
+    return JXL_FAILURE("Using LZ77 when explicitly disallowed");
+  }
+  size_t num_histograms = 1;
+  context_map->resize(num_contexts);
+  if (num_contexts > 1) {
+    JXL_RETURN_IF_ERROR(DecodeContextMap(context_map, &num_histograms, br));
+  }
+  code->lz77.nonserialized_distance_context = context_map->back();
+  code->use_prefix_code = br->ReadFixedBits<1>();
+  if (code->use_prefix_code) {
+    code->log_alpha_size = PREFIX_MAX_BITS;
+  } else {
+    code->log_alpha_size = br->ReadFixedBits<2>() + 5;
+  }
+  code->uint_config.resize(num_histograms);
+  JXL_RETURN_IF_ERROR(
+      DecodeUintConfigs(code->log_alpha_size, &code->uint_config, br));
+  const size_t max_alphabet_size = 1 << code->log_alpha_size;
+  JXL_RETURN_IF_ERROR(
+      DecodeANSCodes(num_histograms, max_alphabet_size, br, code));
+  // When using LZ77, flat codes might result in valid codestreams with
+  // histograms that potentially allow very large bit counts.
+  // TODO(veluca): in principle, a valid codestream might contain a histogram
+  // that could allow very large numbers of bits that is never used during ANS
+  // decoding. There's no benefit to doing that, though.
+  if (!code->lz77.enabled && code->max_num_bits > 32) {
+    // Just emit a warning as there are many opportunities for false positives.
+    JXL_WARNING("Histogram can represent numbers that are too large: %" PRIuS
+                "\n",
+                code->max_num_bits);
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_ans.h b/third-party/libjxl/libjxl/lib/jxl/dec_ans.h
new file mode 100644
index 0000000000..6986cf1b1f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_ans.h
@@ -0,0 +1,505 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_ANS_H_
+#define LIB_JXL_DEC_ANS_H_
+
+// Library to decode the ANS population counts from the bit-stream and build a
+// decoding table from them.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstring>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_huffman.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+class ANSSymbolReader;
+
+// Experiments show that best performance is typically achieved for a
+// split-exponent of 3 or 4. Trend seems to be that '4' is better
+// for large-ish pictures, and '3' better for rather small-ish pictures.
+// This is plausible - the more special symbols we have, the better
+// statistics we need to get a benefit out of them.
+
+// Our hybrid-encoding scheme has dedicated tokens for the smallest
+// (1 << split_exponents) numbers, and for the rest
+// encodes (number of bits) + (msb_in_token sub-leading binary digits) +
+// (lsb_in_token lowest binary digits) in the token, with the remaining bits
+// then being encoded as data.
+//
+// Example with split_exponent = 4, msb_in_token = 2, lsb_in_token = 0.
+//
+// Numbers N in [0 .. 15]:
+//   These get represented as (token=N, bits='').
+// Numbers N >= 16:
+//   If n is such that 2**n <= N < 2**(n+1),
+//   and m = N - 2**n is the 'mantissa',
+//   these get represented as:
+// (token=split_token +
+//        ((n - split_exponent) * 4) +
+//        (m >> (n - msb_in_token)),
+//  bits=m & (1 << (n - msb_in_token)) - 1)
+// Specifically, we would get:
+// N = 0 - 15:          (token=N, nbits=0, bits='')
+// N = 16 (10000):      (token=16, nbits=2, bits='00')
+// N = 17 (10001):      (token=16, nbits=2, bits='01')
+// N = 20 (10100):      (token=17, nbits=2, bits='00')
+// N = 24 (11000):      (token=18, nbits=2, bits='00')
+// N = 28 (11100):      (token=19, nbits=2, bits='00')
+// N = 32 (100000):     (token=20, nbits=3, bits='000')
+// N = 65535:           (token=63, nbits=13, bits='1111111111111')
+struct HybridUintConfig {
+  uint32_t split_exponent;
+  uint32_t split_token;
+  uint32_t msb_in_token;
+  uint32_t lsb_in_token;
+  JXL_INLINE void Encode(uint32_t value, uint32_t* JXL_RESTRICT token,
+                         uint32_t* JXL_RESTRICT nbits,
+                         uint32_t* JXL_RESTRICT bits) const {
+    if (value < split_token) {
+      *token = value;
+      *nbits = 0;
+      *bits = 0;
+    } else {
+      uint32_t n = FloorLog2Nonzero(value);
+      uint32_t m = value - (1 << n);
+      *token = split_token +
+               ((n - split_exponent) << (msb_in_token + lsb_in_token)) +
+               ((m >> (n - msb_in_token)) << lsb_in_token) +
+               (m & ((1 << lsb_in_token) - 1));
+      *nbits = n - msb_in_token - lsb_in_token;
+      *bits = (value >> lsb_in_token) & ((1UL << *nbits) - 1);
+    }
+  }
+
+  explicit HybridUintConfig(uint32_t split_exponent = 4,
+                            uint32_t msb_in_token = 2,
+                            uint32_t lsb_in_token = 0)
+      : split_exponent(split_exponent),
+        split_token(1 << split_exponent),
+        msb_in_token(msb_in_token),
+        lsb_in_token(lsb_in_token) {
+    JXL_DASSERT(split_exponent >= msb_in_token + lsb_in_token);
+  }
+};
+
+struct LZ77Params : public Fields {
+  LZ77Params();
+  JXL_FIELDS_NAME(LZ77Params)
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+  bool enabled;
+
+  // Symbols above min_symbol use a special hybrid uint encoding and
+  // represent a length, to be added to min_length.
+  uint32_t min_symbol;
+  uint32_t min_length;
+
+  // Not serialized by VisitFields.
+  HybridUintConfig length_uint_config{0, 0, 0};
+
+  size_t nonserialized_distance_context;
+};
+
+static constexpr size_t kWindowSize = 1 << 20;
+static constexpr size_t kNumSpecialDistances = 120;
+// Table of special distance codes from WebP lossless.
+static constexpr int8_t kSpecialDistances[kNumSpecialDistances][2] = {
+    {0, 1},  {1, 0},  {1, 1},  {-1, 1}, {0, 2},  {2, 0},  {1, 2},  {-1, 2},
+    {2, 1},  {-2, 1}, {2, 2},  {-2, 2}, {0, 3},  {3, 0},  {1, 3},  {-1, 3},
+    {3, 1},  {-3, 1}, {2, 3},  {-2, 3}, {3, 2},  {-3, 2}, {0, 4},  {4, 0},
+    {1, 4},  {-1, 4}, {4, 1},  {-4, 1}, {3, 3},  {-3, 3}, {2, 4},  {-2, 4},
+    {4, 2},  {-4, 2}, {0, 5},  {3, 4},  {-3, 4}, {4, 3},  {-4, 3}, {5, 0},
+    {1, 5},  {-1, 5}, {5, 1},  {-5, 1}, {2, 5},  {-2, 5}, {5, 2},  {-5, 2},
+    {4, 4},  {-4, 4}, {3, 5},  {-3, 5}, {5, 3},  {-5, 3}, {0, 6},  {6, 0},
+    {1, 6},  {-1, 6}, {6, 1},  {-6, 1}, {2, 6},  {-2, 6}, {6, 2},  {-6, 2},
+    {4, 5},  {-4, 5}, {5, 4},  {-5, 4}, {3, 6},  {-3, 6}, {6, 3},  {-6, 3},
+    {0, 7},  {7, 0},  {1, 7},  {-1, 7}, {5, 5},  {-5, 5}, {7, 1},  {-7, 1},
+    {4, 6},  {-4, 6}, {6, 4},  {-6, 4}, {2, 7},  {-2, 7}, {7, 2},  {-7, 2},
+    {3, 7},  {-3, 7}, {7, 3},  {-7, 3}, {5, 6},  {-5, 6}, {6, 5},  {-6, 5},
+    {8, 0},  {4, 7},  {-4, 7}, {7, 4},  {-7, 4}, {8, 1},  {8, 2},  {6, 6},
+    {-6, 6}, {8, 3},  {5, 7},  {-5, 7}, {7, 5},  {-7, 5}, {8, 4},  {6, 7},
+    {-6, 7}, {7, 6},  {-7, 6}, {8, 5},  {7, 7},  {-7, 7}, {8, 6},  {8, 7}};
+
+struct ANSCode {
+  CacheAlignedUniquePtr alias_tables;
+  std::vector<HuffmanDecodingData> huffman_data;
+  std::vector<HybridUintConfig> uint_config;
+  std::vector<int> degenerate_symbols;
+  bool use_prefix_code;
+  uint8_t log_alpha_size;  // for ANS.
+  LZ77Params lz77;
+  // Maximum number of bits necessary to represent the result of a
+  // ReadHybridUint call done with this ANSCode.
+  size_t max_num_bits = 0;
+  void UpdateMaxNumBits(size_t ctx, size_t symbol);
+};
+
+class ANSSymbolReader {
+ public:
+  // Invalid symbol reader, to be overwritten.
+  ANSSymbolReader() = default;
+  ANSSymbolReader(const ANSCode* code, BitReader* JXL_RESTRICT br,
+                  size_t distance_multiplier = 0)
+      : alias_tables_(
+            reinterpret_cast<AliasTable::Entry*>(code->alias_tables.get())),
+        huffman_data_(code->huffman_data.data()),
+        use_prefix_code_(code->use_prefix_code),
+        configs(code->uint_config.data()) {
+    if (!use_prefix_code_) {
+      state_ = static_cast<uint32_t>(br->ReadFixedBits<32>());
+      log_alpha_size_ = code->log_alpha_size;
+      log_entry_size_ = ANS_LOG_TAB_SIZE - code->log_alpha_size;
+      entry_size_minus_1_ = (1 << log_entry_size_) - 1;
+    } else {
+      state_ = (ANS_SIGNATURE << 16u);
+    }
+    if (!code->lz77.enabled) return;
+    // a std::vector incurs unacceptable decoding speed loss because of
+    // initialization.
+    lz77_window_storage_ = AllocateArray(kWindowSize * sizeof(uint32_t));
+    lz77_window_ = reinterpret_cast<uint32_t*>(lz77_window_storage_.get());
+    lz77_ctx_ = code->lz77.nonserialized_distance_context;
+    lz77_length_uint_ = code->lz77.length_uint_config;
+    lz77_threshold_ = code->lz77.min_symbol;
+    lz77_min_length_ = code->lz77.min_length;
+    num_special_distances_ =
+        distance_multiplier == 0 ? 0 : kNumSpecialDistances;
+    for (size_t i = 0; i < num_special_distances_; i++) {
+      int dist = kSpecialDistances[i][0];
+      dist += static_cast<int>(distance_multiplier) * kSpecialDistances[i][1];
+      if (dist < 1) dist = 1;
+      special_distances_[i] = dist;
+    }
+  }
+
+  JXL_INLINE size_t ReadSymbolANSWithoutRefill(const size_t histo_idx,
+                                               BitReader* JXL_RESTRICT br) {
+    const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+
+    const AliasTable::Entry* table =
+        &alias_tables_[histo_idx << log_alpha_size_];
+    const AliasTable::Symbol symbol =
+        AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+    state_ = symbol.freq * (state_ >> ANS_LOG_TAB_SIZE) + symbol.offset;
+
+#if 1
+    // Branchless version is about equally fast on SKX.
+    const uint32_t new_state =
+        (state_ << 16u) | static_cast<uint32_t>(br->PeekFixedBits<16>());
+    const bool normalize = state_ < (1u << 16u);
+    state_ = normalize ? new_state : state_;
+    br->Consume(normalize ? 16 : 0);
+#else
+    if (JXL_UNLIKELY(state_ < (1u << 16u))) {
+      state_ = (state_ << 16u) | br->PeekFixedBits<16>();
+      br->Consume(16);
+    }
+#endif
+    const uint32_t next_res = state_ & (ANS_TAB_SIZE - 1u);
+    AliasTable::Prefetch(table, next_res, log_entry_size_);
+
+    return symbol.value;
+  }
+
+  JXL_INLINE size_t ReadSymbolHuffWithoutRefill(const size_t histo_idx,
+                                                BitReader* JXL_RESTRICT br) {
+    return huffman_data_[histo_idx].ReadSymbol(br);
+  }
+
+  JXL_INLINE size_t ReadSymbolWithoutRefill(const size_t histo_idx,
+                                            BitReader* JXL_RESTRICT br) {
+    // TODO(veluca): hoist if in hotter loops.
+    if (JXL_UNLIKELY(use_prefix_code_)) {
+      return ReadSymbolHuffWithoutRefill(histo_idx, br);
+    }
+    return ReadSymbolANSWithoutRefill(histo_idx, br);
+  }
+
+  JXL_INLINE size_t ReadSymbol(const size_t histo_idx,
+                               BitReader* JXL_RESTRICT br) {
+    br->Refill();
+    return ReadSymbolWithoutRefill(histo_idx, br);
+  }
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  bool CheckANSFinalState() const { return true; }
+#else
+  bool CheckANSFinalState() const { return state_ == (ANS_SIGNATURE << 16u); }
+#endif
+
+  template <typename BitReader>
+  static JXL_INLINE uint32_t ReadHybridUintConfig(
+      const HybridUintConfig& config, size_t token, BitReader* br) {
+    size_t split_token = config.split_token;
+    size_t msb_in_token = config.msb_in_token;
+    size_t lsb_in_token = config.lsb_in_token;
+    size_t split_exponent = config.split_exponent;
+    // Fast-track version of hybrid integer decoding.
+    if (token < split_token) return token;
+    uint32_t nbits = split_exponent - (msb_in_token + lsb_in_token) +
+                     ((token - split_token) >> (msb_in_token + lsb_in_token));
+    // Max amount of bits for ReadBits is 32 and max valid left shift is 29
+    // bits. However, for speed no error is propagated here, instead limit the
+    // nbits size. If nbits > 29, the code stream is invalid, but no error is
+    // returned.
+    // Note that in most cases we will emit an error if the histogram allows
+    // representing numbers that would cause invalid shifts, but we need to
+    // keep this check as when LZ77 is enabled it might make sense to have an
+    // histogram that could in principle cause invalid shifts.
+    nbits &= 31u;
+    uint32_t low = token & ((1 << lsb_in_token) - 1);
+    token >>= lsb_in_token;
+    const size_t bits = br->PeekBits(nbits);
+    br->Consume(nbits);
+    size_t ret = (((((1 << msb_in_token) | (token & ((1 << msb_in_token) - 1)))
+                    << nbits) |
+                   bits)
+                  << lsb_in_token) |
+                 low;
+    // TODO(eustas): mark BitReader as unhealthy if nbits > 29 or ret does not
+    //               fit uint32_t
+    return static_cast<uint32_t>(ret);
+  }
+
+  // Takes a *clustered* idx. Can only use if HuffRleOnly() is true.
+  JXL_INLINE void ReadHybridUintClusteredHuffRleOnly(size_t ctx,
+                                                     BitReader* JXL_RESTRICT br,
+                                                     uint32_t* value,
+                                                     uint32_t* run) {
+    JXL_DASSERT(HuffRleOnly());
+    br->Refill();  // covers ReadSymbolWithoutRefill + PeekBits
+    size_t token = ReadSymbolHuffWithoutRefill(ctx, br);
+    if (JXL_UNLIKELY(token >= lz77_threshold_)) {
+      *run =
+          ReadHybridUintConfig(lz77_length_uint_, token - lz77_threshold_, br) +
+          lz77_min_length_ - 1;
+      return;
+    }
+    *value = ReadHybridUintConfig(configs[ctx], token, br);
+  }
+  bool HuffRleOnly() {
+    if (lz77_window_ == nullptr) return false;
+    if (!use_prefix_code_) return false;
+    for (size_t i = 0; i < kHuffmanTableBits; i++) {
+      if (huffman_data_[lz77_ctx_].table_[i].bits) return false;
+      if (huffman_data_[lz77_ctx_].table_[i].value != 1) return false;
+    }
+    if (configs[lz77_ctx_].split_token > 1) return false;
+    return true;
+  }
+  bool UsesLZ77() { return lz77_window_ != nullptr; }
+
+  // Takes a *clustered* idx. Inlined, for use in hot paths.
+  template <bool uses_lz77>
+  JXL_INLINE size_t ReadHybridUintClusteredInlined(size_t ctx,
+                                                   BitReader* JXL_RESTRICT br) {
+    if (uses_lz77) {
+      if (JXL_UNLIKELY(num_to_copy_ > 0)) {
+        size_t ret = lz77_window_[(copy_pos_++) & kWindowMask];
+        num_to_copy_--;
+        lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+        return ret;
+      }
+    }
+
+    br->Refill();  // covers ReadSymbolWithoutRefill + PeekBits
+    size_t token = ReadSymbolWithoutRefill(ctx, br);
+    if (uses_lz77) {
+      if (JXL_UNLIKELY(token >= lz77_threshold_)) {
+        num_to_copy_ = ReadHybridUintConfig(lz77_length_uint_,
+                                            token - lz77_threshold_, br) +
+                       lz77_min_length_;
+        br->Refill();  // covers ReadSymbolWithoutRefill + PeekBits
+        // Distance code.
+        size_t token = ReadSymbolWithoutRefill(lz77_ctx_, br);
+        size_t distance = ReadHybridUintConfig(configs[lz77_ctx_], token, br);
+        if (JXL_LIKELY(distance < num_special_distances_)) {
+          distance = special_distances_[distance];
+        } else {
+          distance = distance + 1 - num_special_distances_;
+        }
+        if (JXL_UNLIKELY(distance > num_decoded_)) {
+          distance = num_decoded_;
+        }
+        if (JXL_UNLIKELY(distance > kWindowSize)) {
+          distance = kWindowSize;
+        }
+        copy_pos_ = num_decoded_ - distance;
+        if (JXL_UNLIKELY(distance == 0)) {
+          JXL_DASSERT(lz77_window_ != nullptr);
+          // distance 0 -> num_decoded_ == copy_pos_ == 0
+          size_t to_fill = std::min<size_t>(num_to_copy_, kWindowSize);
+          memset(lz77_window_, 0, to_fill * sizeof(lz77_window_[0]));
+        }
+        // TODO(eustas): overflow; mark BitReader as unhealthy
+        if (num_to_copy_ < lz77_min_length_) return 0;
+        // the code below is the same as doing this:
+        //        return ReadHybridUintClustered<uses_lz77>(ctx, br);
+        // but gcc doesn't like recursive inlining
+
+        size_t ret = lz77_window_[(copy_pos_++) & kWindowMask];
+        num_to_copy_--;
+        lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+        return ret;
+      }
+    }
+    size_t ret = ReadHybridUintConfig(configs[ctx], token, br);
+    if (uses_lz77 && lz77_window_)
+      lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+    return ret;
+  }
+
+  // same but not inlined
+  template <bool uses_lz77>
+  size_t ReadHybridUintClustered(size_t ctx, BitReader* JXL_RESTRICT br) {
+    return ReadHybridUintClusteredInlined<uses_lz77>(ctx, br);
+  }
+
+  // inlined only in the no-lz77 case
+  template <bool uses_lz77>
+  JXL_INLINE size_t
+  ReadHybridUintClusteredMaybeInlined(size_t ctx, BitReader* JXL_RESTRICT br) {
+    if (uses_lz77) {
+      return ReadHybridUintClustered<uses_lz77>(ctx, br);
+    } else {
+      return ReadHybridUintClusteredInlined<uses_lz77>(ctx, br);
+    }
+  }
+
+  // inlined, for use in hot paths
+  template <bool uses_lz77>
+  JXL_INLINE size_t
+  ReadHybridUintInlined(size_t ctx, BitReader* JXL_RESTRICT br,
+                        const std::vector<uint8_t>& context_map) {
+    return ReadHybridUintClustered<uses_lz77>(context_map[ctx], br);
+  }
+
+  // not inlined, for use in non-hot paths
+  size_t ReadHybridUint(size_t ctx, BitReader* JXL_RESTRICT br,
+                        const std::vector<uint8_t>& context_map) {
+    return ReadHybridUintClustered</*uses_lz77=*/true>(context_map[ctx], br);
+  }
+
+  // ctx is a *clustered* context!
+  // This function will modify the ANS state as if `count` symbols have been
+  // decoded.
+  bool IsSingleValueAndAdvance(size_t ctx, uint32_t* value, size_t count) {
+    // TODO(veluca): No optimization for Huffman mode yet.
+    if (use_prefix_code_) return false;
+    // TODO(eustas): propagate "degenerate_symbol" to simplify this method.
+    const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+    const AliasTable::Entry* table = &alias_tables_[ctx << log_alpha_size_];
+    AliasTable::Symbol symbol =
+        AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+    if (symbol.freq != ANS_TAB_SIZE) return false;
+    if (configs[ctx].split_token <= symbol.value) return false;
+    if (symbol.value >= lz77_threshold_) return false;
+    *value = symbol.value;
+    if (lz77_window_) {
+      for (size_t i = 0; i < count; i++) {
+        lz77_window_[(num_decoded_++) & kWindowMask] = symbol.value;
+      }
+    }
+    return true;
+  }
+
+  static constexpr size_t kMaxCheckpointInterval = 512;
+  struct Checkpoint {
+    uint32_t state;
+    uint32_t num_to_copy;
+    uint32_t copy_pos;
+    uint32_t num_decoded;
+    uint32_t lz77_window[kMaxCheckpointInterval];
+  };
+  void Save(Checkpoint* checkpoint) {
+    checkpoint->state = state_;
+    checkpoint->num_decoded = num_decoded_;
+    checkpoint->num_to_copy = num_to_copy_;
+    checkpoint->copy_pos = copy_pos_;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(checkpoint->lz77_window + (kWindowSize - win_start),
+               lz77_window_, win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+  void Restore(const Checkpoint& checkpoint) {
+    state_ = checkpoint.state;
+    JXL_DASSERT(num_decoded_ <=
+                checkpoint.num_decoded + kMaxCheckpointInterval);
+    num_decoded_ = checkpoint.num_decoded;
+    num_to_copy_ = checkpoint.num_to_copy;
+    copy_pos_ = checkpoint.copy_pos;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(lz77_window_, checkpoint.lz77_window + (kWindowSize - win_start),
+               win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+
+ private:
+  const AliasTable::Entry* JXL_RESTRICT alias_tables_;  // not owned
+  const HuffmanDecodingData* huffman_data_;
+  bool use_prefix_code_;
+  uint32_t state_ = ANS_SIGNATURE << 16u;
+  const HybridUintConfig* JXL_RESTRICT configs;
+  uint32_t log_alpha_size_{};
+  uint32_t log_entry_size_{};
+  uint32_t entry_size_minus_1_{};
+
+  // LZ77 structures and constants.
+  static constexpr size_t kWindowMask = kWindowSize - 1;
+  CacheAlignedUniquePtr lz77_window_storage_;
+  uint32_t* lz77_window_ = nullptr;
+  uint32_t num_decoded_ = 0;
+  uint32_t num_to_copy_ = 0;
+  uint32_t copy_pos_ = 0;
+  uint32_t lz77_ctx_ = 0;
+  uint32_t lz77_min_length_ = 0;
+  uint32_t lz77_threshold_ = 1 << 20;  // bigger than any symbol.
+  HybridUintConfig lz77_length_uint_;
+  uint32_t special_distances_[kNumSpecialDistances]{};
+  uint32_t num_special_distances_{};
+};
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+                        std::vector<uint8_t>* context_map,
+                        bool disallow_lz77 = false);
+
+// Exposed for tests.
+Status DecodeUintConfigs(size_t log_alpha_size,
+                         std::vector<HybridUintConfig>* uint_config,
+                         BitReader* br);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_ANS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h b/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h
new file mode 100644
index 0000000000..aea44505a3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h
@@ -0,0 +1,352 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_BIT_READER_H_
+#define LIB_JXL_DEC_BIT_READER_H_
+
+// Bounds-checked bit reader; 64-bit buffer with support for deferred refills
+// and switching to reading byte-aligned words.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#ifdef __BMI2__
+#include <immintrin.h>
+#endif
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+// Reads bits previously written to memory by BitWriter. Uses unaligned 8-byte
+// little-endian loads.
+class BitReader {
+ public:
+  static constexpr size_t kMaxBitsPerCall = 56;
+
+  // Constructs an invalid BitReader, to be overwritten before usage.
+  BitReader()
+      : buf_(0),
+        bits_in_buf_(0),
+        next_byte_{nullptr},
+        end_minus_8_{nullptr},
+        first_byte_(nullptr) {}
+  BitReader(const BitReader&) = delete;
+
+  // bytes need not be aligned nor padded!
+  template <class ArrayLike>
+  explicit BitReader(const ArrayLike& bytes)
+      : buf_(0),
+        bits_in_buf_(0),
+        next_byte_(bytes.data()),
+        // Assumes first_byte_ >= 8.
+        end_minus_8_(bytes.data() - 8 + bytes.size()),
+        first_byte_(bytes.data()) {
+    Refill();
+  }
+  ~BitReader() {
+    // Close() must be called before destroying an initialized bit reader.
+    // Invalid bit readers will have a nullptr in first_byte_.
+    JXL_ASSERT(close_called_ || !first_byte_);
+  }
+
+  // Move operator needs to invalidate the other BitReader such that it is
+  // irrelevant if we call Close() on it or not.
+  BitReader& operator=(BitReader&& other) noexcept {
+    // Ensure the current instance was already closed, before we overwrite it
+    // with other.
+    JXL_ASSERT(close_called_ || !first_byte_);
+
+    JXL_DASSERT(!other.close_called_);
+    buf_ = other.buf_;
+    bits_in_buf_ = other.bits_in_buf_;
+    next_byte_ = other.next_byte_;
+    end_minus_8_ = other.end_minus_8_;
+    first_byte_ = other.first_byte_;
+    overread_bytes_ = other.overread_bytes_;
+    close_called_ = other.close_called_;
+
+    other.first_byte_ = nullptr;
+    other.next_byte_ = nullptr;
+    return *this;
+  }
+  BitReader& operator=(const BitReader& other) = delete;
+
+  // For time-critical reads, refills can be shared by multiple reads.
+  // Based on variant 4 (plus bounds-checking), see
+  // fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
+  JXL_INLINE void Refill() {
+    if (JXL_UNLIKELY(next_byte_ > end_minus_8_)) {
+      BoundsCheckedRefill();
+    } else {
+      // It's safe to load 64 bits; insert valid (possibly nonzero) bits above
+      // bits_in_buf_. The shift requires bits_in_buf_ < 64.
+      buf_ |= LoadLE64(next_byte_) << bits_in_buf_;
+
+      // Advance by bytes fully absorbed into the buffer.
+      next_byte_ += (63 - bits_in_buf_) >> 3;
+
+      // We absorbed a multiple of 8 bits, so the lower 3 bits of bits_in_buf_
+      // must remain unchanged, otherwise the next refill's shifted bits will
+      // not align with buf_. Set the three upper bits so the result >= 56.
+      bits_in_buf_ |= 56;
+      JXL_DASSERT(56 <= bits_in_buf_ && bits_in_buf_ < 64);
+    }
+  }
+
+  // Returns the bits that would be returned by Read without calling Advance().
+  // It is legal to PEEK at more bits than present in the bitstream (required
+  // by Huffman), and those bits will be zero.
+  template <size_t N>
+  JXL_INLINE uint64_t PeekFixedBits() const {
+    static_assert(N <= kMaxBitsPerCall, "Reading too many bits in one call.");
+    JXL_DASSERT(!close_called_);
+    return buf_ & ((1ULL << N) - 1);
+  }
+
+  JXL_INLINE uint64_t PeekBits(size_t nbits) const {
+    JXL_DASSERT(nbits <= kMaxBitsPerCall);
+    JXL_DASSERT(!close_called_);
+
+    // Slightly faster but requires BMI2. It is infeasible to make the many
+    // callers reside between begin/end_target, especially because only the
+    // callers in dec_ans are time-critical. Therefore only enabled if the
+    // entire binary is compiled for (and thus requires) BMI2.
+#if defined(__BMI2__) && defined(__x86_64__)
+    return _bzhi_u64(buf_, nbits);
+#else
+    const uint64_t mask = (1ULL << nbits) - 1;
+    return buf_ & mask;
+#endif
+  }
+
+  // Removes bits from the buffer. Need not match the previous Peek size, but
+  // the buffer must contain at least num_bits (this prevents consuming more
+  // than the total number of bits).
+  JXL_INLINE void Consume(size_t num_bits) {
+    JXL_DASSERT(!close_called_);
+    JXL_DASSERT(bits_in_buf_ >= num_bits);
+#ifdef JXL_CRASH_ON_ERROR
+    // When JXL_CRASH_ON_ERROR is defined, it is a fatal error to read more bits
+    // than available in the stream. A non-zero overread_bytes_ implies that
+    // next_byte_ is already at the end of the stream, so we don't need to
+    // check that.
+    JXL_ASSERT(bits_in_buf_ >= num_bits + overread_bytes_ * kBitsPerByte);
+#endif
+    bits_in_buf_ -= num_bits;
+    buf_ >>= num_bits;
+  }
+
+  JXL_INLINE uint64_t ReadBits(size_t nbits) {
+    JXL_DASSERT(!close_called_);
+    Refill();
+    const uint64_t bits = PeekBits(nbits);
+    Consume(nbits);
+    return bits;
+  }
+
+  template <size_t N>
+  JXL_INLINE uint64_t ReadFixedBits() {
+    JXL_DASSERT(!close_called_);
+    Refill();
+    const uint64_t bits = PeekFixedBits<N>();
+    Consume(N);
+    return bits;
+  }
+
+  // Equivalent to calling ReadFixedBits(1) `skip` times, but much faster.
+  // `skip` is typically large.
+  void SkipBits(size_t skip) {
+    JXL_DASSERT(!close_called_);
+    // Buffer is large enough - don't zero buf_ below.
+    if (JXL_UNLIKELY(skip <= bits_in_buf_)) {
+      Consume(skip);
+      return;
+    }
+
+    // First deduct what we can satisfy from the buffer
+    skip -= bits_in_buf_;
+    bits_in_buf_ = 0;
+    // Not enough to call Advance - that may leave some bits in the buffer
+    // which were previously ABOVE bits_in_buf.
+    buf_ = 0;
+
+    // Skip whole bytes
+    const size_t whole_bytes = skip / kBitsPerByte;
+    skip %= kBitsPerByte;
+    if (JXL_UNLIKELY(whole_bytes >
+                     static_cast<size_t>(end_minus_8_ + 8 - next_byte_))) {
+      // This is already an overflow condition (skipping past the end of the bit
+      // stream). However if we increase next_byte_ too much we risk overflowing
+      // that value and potentially making it valid again (next_byte_ < end).
+      // This will set next_byte_ to the end of the stream and still consume
+      // some bits in overread_bytes_, however the TotalBitsConsumed() will be
+      // incorrect (still larger than the TotalBytes()).
+      next_byte_ = end_minus_8_ + 8;
+      skip += kBitsPerByte;
+    } else {
+      next_byte_ += whole_bytes;
+    }
+
+    Refill();
+    Consume(skip);
+  }
+
+  size_t TotalBitsConsumed() const {
+    const size_t bytes_read = static_cast<size_t>(next_byte_ - first_byte_);
+    return (bytes_read + overread_bytes_) * kBitsPerByte - bits_in_buf_;
+  }
+
+  Status JumpToByteBoundary() {
+    const size_t remainder = TotalBitsConsumed() % kBitsPerByte;
+    if (remainder == 0) return true;
+    if (JXL_UNLIKELY(ReadBits(kBitsPerByte - remainder) != 0)) {
+      return JXL_FAILURE("Non-zero padding bits");
+    }
+    return true;
+  }
+
+  // For interoperability with other bitreaders (for resuming at
+  // non-byte-aligned positions).
+  const uint8_t* FirstByte() const { return first_byte_; }
+  size_t TotalBytes() const {
+    return static_cast<size_t>(end_minus_8_ + 8 - first_byte_);
+  }
+
+  // Returns span of the remaining (unconsumed) bytes, e.g. for passing to
+  // external decoders such as Brotli.
+  Span<const uint8_t> GetSpan() const {
+    JXL_DASSERT(first_byte_ != nullptr);
+    JXL_ASSERT(TotalBitsConsumed() % kBitsPerByte == 0);
+    const size_t offset = TotalBitsConsumed() / kBitsPerByte;  // no remainder
+    JXL_ASSERT(offset <= TotalBytes());
+    return Span<const uint8_t>(first_byte_ + offset, TotalBytes() - offset);
+  }
+
+  // Returns whether all the bits read so far have been within the input bounds.
+  // When reading past the EOF, the Read*() and Consume() functions return zeros
+  // but flag a failure when calling Close() without checking this function.
+  Status AllReadsWithinBounds() {
+    // Mark up to which point the user checked the out of bounds condition. If
+    // the user handles the condition at higher level (e.g. fetch more bytes
+    // from network, return a custom JXL_FAILURE, ...), Close() should not
+    // output a debug error (which would break tests with JXL_CRASH_ON_ERROR
+    // even when legitimately handling the situation at higher level). This is
+    // used by Bundle::CanRead.
+    checked_out_of_bounds_bits_ = TotalBitsConsumed();
+    if (TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+      return false;
+    }
+    return true;
+  }
+
+  // Close the bit reader and return whether all the previous reads were
+  // successful. Close must be called once.
+  Status Close() {
+    JXL_DASSERT(!close_called_);
+    close_called_ = true;
+    if (!first_byte_) return true;
+    if (TotalBitsConsumed() > checked_out_of_bounds_bits_ &&
+        TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+      return JXL_FAILURE("Read more bits than available in the bit_reader");
+    }
+    return true;
+  }
+
+ private:
+  // Separate function avoids inlining this relatively cold code into callers.
+  JXL_NOINLINE void BoundsCheckedRefill() {
+    const uint8_t* end = end_minus_8_ + 8;
+
+    // Read whole bytes until we have [56, 64) bits (same as LoadLE64)
+    for (; bits_in_buf_ < 64 - kBitsPerByte; bits_in_buf_ += kBitsPerByte) {
+      if (next_byte_ >= end) break;
+      buf_ |= static_cast<uint64_t>(*next_byte_++) << bits_in_buf_;
+    }
+    JXL_DASSERT(bits_in_buf_ < 64);
+
+    // Add extra bytes as 0 at the end of the stream in the bit_buffer_. If
+    // these bits are read, Close() will return a failure.
+    size_t extra_bytes = (63 - bits_in_buf_) / kBitsPerByte;
+    overread_bytes_ += extra_bytes;
+    bits_in_buf_ += extra_bytes * kBitsPerByte;
+
+    JXL_DASSERT(bits_in_buf_ < 64);
+    JXL_DASSERT(bits_in_buf_ >= 56);
+  }
+
+  JXL_NOINLINE uint32_t BoundsCheckedReadByteAlignedWord() {
+    if (next_byte_ + 1 < end_minus_8_ + 8) {
+      uint32_t ret = LoadLE16(next_byte_);
+      next_byte_ += 2;
+      return ret;
+    }
+    overread_bytes_ += 2;
+    return 0;
+  }
+
+  uint64_t buf_;
+  size_t bits_in_buf_;  // [0, 64)
+  const uint8_t* JXL_RESTRICT next_byte_;
+  const uint8_t* end_minus_8_;  // for refill bounds check
+  const uint8_t* first_byte_;   // for GetSpan
+
+  // Number of bytes past the end that were loaded into the buf_. These bytes
+  // are not read from memory, but instead assumed 0. It is an error (likely due
+  // to an invalid stream) to Consume() more bits than specified in the range
+  // passed to the constructor.
+  uint64_t overread_bytes_{0};
+  bool close_called_{false};
+
+  uint64_t checked_out_of_bounds_bits_{0};
+};
+
+// Closes a BitReader when the BitReaderScopedCloser goes out of scope. When
+// closing the bit reader, if the status result was failure it sets this failure
+// to the passed variable pointer. Typical usage.
+//
+// Status ret = true;
+// {
+//   BitReader reader(...);
+//   BitReaderScopedCloser reader_closer(&reader, &ret);
+//
+//   // ... code that can return errors here ...
+// }
+// // ... more code that doesn't use the BitReader.
+// return ret;
+
+class BitReaderScopedCloser {
+ public:
+  BitReaderScopedCloser(BitReader* reader, Status* status)
+      : reader_(reader), status_(status) {
+    JXL_DASSERT(reader_ != nullptr);
+    JXL_DASSERT(status_ != nullptr);
+  }
+  ~BitReaderScopedCloser() {
+    if (reader_ != nullptr) {
+      Status close_ret = reader_->Close();
+      if (!close_ret) *status_ = close_ret;
+    }
+  }
+  void CloseAndSuppressError() {
+    JXL_ASSERT(reader_ != nullptr);
+    (void)reader_->Close();
+    reader_ = nullptr;
+  }
+  BitReaderScopedCloser(const BitReaderScopedCloser&) = delete;
+
+ private:
+  BitReader* reader_;
+  Status* status_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_BIT_READER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc b/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc
new file mode 100644
index 0000000000..5cf34ebbbd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc
@@ -0,0 +1,231 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_cache.h"
+
+#include "lib/jxl/blending.h"
+#include "lib/jxl/render_pipeline/stage_blending.h"
+#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"
+#include "lib/jxl/render_pipeline/stage_epf.h"
+#include "lib/jxl/render_pipeline/stage_from_linear.h"
+#include "lib/jxl/render_pipeline/stage_gaborish.h"
+#include "lib/jxl/render_pipeline/stage_noise.h"
+#include "lib/jxl/render_pipeline/stage_patches.h"
+#include "lib/jxl/render_pipeline/stage_splines.h"
+#include "lib/jxl/render_pipeline/stage_spot.h"
+#include "lib/jxl/render_pipeline/stage_to_linear.h"
+#include "lib/jxl/render_pipeline/stage_tone_mapping.h"
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+#include "lib/jxl/render_pipeline/stage_write.h"
+#include "lib/jxl/render_pipeline/stage_xyb.h"
+#include "lib/jxl/render_pipeline/stage_ycbcr.h"
+
+namespace jxl {
+
+Status PassesDecoderState::PreparePipeline(ImageBundle* decoded,
+                                           PipelineOptions options) {
+  const FrameHeader& frame_header = shared->frame_header;
+  size_t num_c = 3 + frame_header.nonserialized_metadata->m.num_extra_channels;
+  if ((frame_header.flags & FrameHeader::kNoise) != 0) {
+    num_c += 3;
+  }
+
+  if (frame_header.CanBeReferenced()) {
+    // Necessary so that SetInputSizes() can allocate output buffers as needed.
+    frame_storage_for_referencing = ImageBundle(decoded->metadata());
+  }
+
+  RenderPipeline::Builder builder(num_c);
+
+  if (options.use_slow_render_pipeline) {
+    builder.UseSimpleImplementation();
+  }
+
+  if (!frame_header.chroma_subsampling.Is444()) {
+    for (size_t c = 0; c < 3; c++) {
+      if (frame_header.chroma_subsampling.HShift(c) != 0) {
+        builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/true));
+      }
+      if (frame_header.chroma_subsampling.VShift(c) != 0) {
+        builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/false));
+      }
+    }
+  }
+
+  if (frame_header.loop_filter.gab) {
+    builder.AddStage(GetGaborishStage(frame_header.loop_filter));
+  }
+
+  {
+    const LoopFilter& lf = frame_header.loop_filter;
+    if (lf.epf_iters >= 3) {
+      builder.AddStage(GetEPFStage(lf, sigma, 0));
+    }
+    if (lf.epf_iters >= 1) {
+      builder.AddStage(GetEPFStage(lf, sigma, 1));
+    }
+    if (lf.epf_iters >= 2) {
+      builder.AddStage(GetEPFStage(lf, sigma, 2));
+    }
+  }
+
+  bool late_ec_upsample = frame_header.upsampling != 1;
+  for (auto ecups : frame_header.extra_channel_upsampling) {
+    if (ecups != frame_header.upsampling) {
+      // If patches are applied, either frame_header.upsampling == 1 or
+      // late_ec_upsample is true.
+      late_ec_upsample = false;
+    }
+  }
+
+  if (!late_ec_upsample) {
+    for (size_t ec = 0; ec < frame_header.extra_channel_upsampling.size();
+         ec++) {
+      if (frame_header.extra_channel_upsampling[ec] != 1) {
+        builder.AddStage(GetUpsamplingStage(
+            frame_header.nonserialized_metadata->transform_data, 3 + ec,
+            CeilLog2Nonzero(frame_header.extra_channel_upsampling[ec])));
+      }
+    }
+  }
+
+  if ((frame_header.flags & FrameHeader::kPatches) != 0) {
+    builder.AddStage(
+        GetPatchesStage(&shared->image_features.patches,
+                        3 + shared->metadata->m.num_extra_channels));
+  }
+  if ((frame_header.flags & FrameHeader::kSplines) != 0) {
+    builder.AddStage(GetSplineStage(&shared->image_features.splines));
+  }
+
+  if (frame_header.upsampling != 1) {
+    size_t nb_channels =
+        3 +
+        (late_ec_upsample ? frame_header.extra_channel_upsampling.size() : 0);
+    for (size_t c = 0; c < nb_channels; c++) {
+      builder.AddStage(GetUpsamplingStage(
+          frame_header.nonserialized_metadata->transform_data, c,
+          CeilLog2Nonzero(frame_header.upsampling)));
+    }
+  }
+
+  if ((frame_header.flags & FrameHeader::kNoise) != 0) {
+    builder.AddStage(GetConvolveNoiseStage(num_c - 3));
+    builder.AddStage(GetAddNoiseStage(shared->image_features.noise_params,
+                                      shared->cmap, num_c - 3));
+  }
+  if (frame_header.dc_level != 0) {
+    builder.AddStage(GetWriteToImage3FStage(
+        &shared_storage.dc_frames[frame_header.dc_level - 1]));
+  }
+
+  if (frame_header.CanBeReferenced() &&
+      frame_header.save_before_color_transform) {
+    builder.AddStage(GetWriteToImageBundleStage(
+        &frame_storage_for_referencing, output_encoding_info.color_encoding));
+  }
+
+  bool has_alpha = false;
+  size_t alpha_c = 0;
+  for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size(); i++) {
+    if (decoded->metadata()->extra_channel_info[i].type ==
+        ExtraChannel::kAlpha) {
+      has_alpha = true;
+      alpha_c = 3 + i;
+      break;
+    }
+  }
+
+  if (fast_xyb_srgb8_conversion) {
+#if !JXL_HIGH_PRECISION
+    JXL_ASSERT(!NeedsBlending(this));
+    JXL_ASSERT(!frame_header.CanBeReferenced() ||
+               frame_header.save_before_color_transform);
+    JXL_ASSERT(!options.render_spotcolors ||
+               !decoded->metadata()->Find(ExtraChannel::kSpotColor));
+    bool is_rgba = (main_output.format.num_channels == 4);
+    uint8_t* rgb_output = reinterpret_cast<uint8_t*>(main_output.buffer);
+    builder.AddStage(GetFastXYBTosRGB8Stage(rgb_output, main_output.stride,
+                                            width, height, is_rgba, has_alpha,
+                                            alpha_c));
+#endif
+  } else {
+    bool linear = false;
+    if (frame_header.color_transform == ColorTransform::kYCbCr) {
+      builder.AddStage(GetYCbCrStage());
+    } else if (frame_header.color_transform == ColorTransform::kXYB) {
+      builder.AddStage(GetXYBStage(output_encoding_info));
+      if (output_encoding_info.color_encoding.GetColorSpace() !=
+          ColorSpace::kXYB) {
+        linear = true;
+      }
+    }  // Nothing to do for kNone.
+
+    if (options.coalescing && NeedsBlending(this)) {
+      if (linear) {
+        builder.AddStage(GetFromLinearStage(output_encoding_info));
+        linear = false;
+      }
+      builder.AddStage(
+          GetBlendingStage(this, output_encoding_info.color_encoding));
+    }
+
+    if (options.coalescing && frame_header.CanBeReferenced() &&
+        !frame_header.save_before_color_transform) {
+      if (linear) {
+        builder.AddStage(GetFromLinearStage(output_encoding_info));
+        linear = false;
+      }
+      builder.AddStage(GetWriteToImageBundleStage(
+          &frame_storage_for_referencing, output_encoding_info.color_encoding));
+    }
+
+    if (options.render_spotcolors &&
+        frame_header.nonserialized_metadata->m.Find(ExtraChannel::kSpotColor)) {
+      for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size();
+           i++) {
+        // Don't use Find() because there may be multiple spot color channels.
+        const ExtraChannelInfo& eci =
+            decoded->metadata()->extra_channel_info[i];
+        if (eci.type == ExtraChannel::kSpotColor) {
+          builder.AddStage(GetSpotColorStage(3 + i, eci.spot_color));
+        }
+      }
+    }
+
+    auto tone_mapping_stage = GetToneMappingStage(output_encoding_info);
+    if (tone_mapping_stage) {
+      if (!linear) {
+        auto to_linear_stage = GetToLinearStage(output_encoding_info);
+        if (!to_linear_stage) {
+          return JXL_FAILURE(
+              "attempting to perform tone mapping on colorspace not "
+              "convertible to linear");
+        }
+        builder.AddStage(std::move(to_linear_stage));
+        linear = true;
+      }
+      builder.AddStage(std::move(tone_mapping_stage));
+    }
+
+    if (linear) {
+      builder.AddStage(GetFromLinearStage(output_encoding_info));
+      linear = false;
+    }
+
+    if (main_output.callback.IsPresent() || main_output.buffer) {
+      builder.AddStage(GetWriteToOutputStage(main_output, width, height,
+                                             has_alpha, unpremul_alpha, alpha_c,
+                                             undo_orientation, extra_output));
+    } else {
+      builder.AddStage(GetWriteToImageBundleStage(
+          decoded, output_encoding_info.color_encoding));
+    }
+  }
+  render_pipeline = std::move(builder).Finalize(shared->frame_dim);
+  return render_pipeline->IsInitialized();
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_cache.h b/third-party/libjxl/libjxl/lib/jxl/dec_cache.h
new file mode 100644
index 0000000000..051638a2cb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_cache.h
@@ -0,0 +1,258 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CACHE_H_
+#define LIB_JXL_DEC_CACHE_H_
+
+#include <jxl/decode.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+constexpr size_t kSigmaBorder = 1;
+constexpr size_t kSigmaPadding = 2;
+
+struct PixelCallback {
+  PixelCallback() = default;
+  PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run,
+                JxlImageOutDestroyCallback destroy, void* init_opaque)
+      : init(init), run(run), destroy(destroy), init_opaque(init_opaque) {
+#if JXL_ENABLE_ASSERT
+    const bool has_init = init != nullptr;
+    const bool has_run = run != nullptr;
+    const bool has_destroy = destroy != nullptr;
+    JXL_ASSERT(has_init == has_run && has_run == has_destroy);
+#endif
+  }
+
+  bool IsPresent() const { return run != nullptr; }
+
+  void* Init(size_t num_threads, size_t num_pixels) const {
+    return init(init_opaque, num_threads, num_pixels);
+  }
+
+  JxlImageOutInitCallback init = nullptr;
+  JxlImageOutRunCallback run = nullptr;
+  JxlImageOutDestroyCallback destroy = nullptr;
+  void* init_opaque = nullptr;
+};
+
+struct ImageOutput {
+  // Pixel format of the output pixels, used for buffer and callback output.
+  JxlPixelFormat format;
+  // Output bit depth for unsigned data types, used for float to int conversion.
+  size_t bits_per_sample;
+  // Callback for line-by-line output.
+  PixelCallback callback;
+  // Pixel buffer for image output.
+  void* buffer;
+  size_t buffer_size;
+  // Length of a row of image_buffer in bytes (based on oriented width).
+  size_t stride;
+};
+
+// Per-frame decoder state. All the images here should be accessed through a
+// group rect (either with block units or pixel units).
+struct PassesDecoderState {
+  PassesSharedState shared_storage;
+  // Allows avoiding copies for encoder loop.
+  const PassesSharedState* JXL_RESTRICT shared = &shared_storage;
+
+  // 8x upsampling stage for DC.
+  std::unique_ptr<RenderPipelineStage> upsampler8x;
+
+  // For ANS decoding.
+  std::vector<ANSCode> code;
+  std::vector<std::vector<uint8_t>> context_map;
+
+  // Multiplier to be applied to the quant matrices of the x channel.
+  float x_dm_multiplier;
+  float b_dm_multiplier;
+
+  // Sigma values for EPF.
+  ImageF sigma;
+
+  // Image dimensions before applying undo_orientation.
+  size_t width;
+  size_t height;
+  ImageOutput main_output;
+  std::vector<ImageOutput> extra_output;
+
+  // Whether to use int16 float-XYB-to-uint8-srgb conversion.
+  bool fast_xyb_srgb8_conversion;
+
+  // If true, the RGBA output will be unpremultiplied before writing to the
+  // output.
+  bool unpremul_alpha;
+
+  // The render pipeline will apply this orientation to bring the image to the
+  // intended display orientation.
+  Orientation undo_orientation;
+
+  // Used for seeding noise.
+  size_t visible_frame_index = 0;
+  size_t nonvisible_frame_index = 0;
+
+  // Keep track of the transform types used.
+  std::atomic<uint32_t> used_acs{0};
+
+  // Storage for coefficients if in "accumulate" mode.
+  std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>(0, 0);
+
+  // Rendering pipeline.
+  std::unique_ptr<RenderPipeline> render_pipeline;
+
+  // Storage for the current frame if it can be referenced by future frames.
+  ImageBundle frame_storage_for_referencing;
+
+  struct PipelineOptions {
+    bool use_slow_render_pipeline;
+    bool coalescing;
+    bool render_spotcolors;
+  };
+
+  Status PreparePipeline(ImageBundle* decoded, PipelineOptions options);
+
+  // Information for colour conversions.
+  OutputEncodingInfo output_encoding_info;
+
+  // Initializes decoder-specific structures using information from *shared.
+  Status Init() {
+    x_dm_multiplier =
+        std::pow(1 / (1.25f), shared->frame_header.x_qm_scale - 2.0f);
+    b_dm_multiplier =
+        std::pow(1 / (1.25f), shared->frame_header.b_qm_scale - 2.0f);
+
+    main_output.callback = PixelCallback();
+    main_output.buffer = nullptr;
+    extra_output.clear();
+
+    fast_xyb_srgb8_conversion = false;
+    unpremul_alpha = false;
+    undo_orientation = Orientation::kIdentity;
+
+    used_acs = 0;
+
+    upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3);
+    if (shared->frame_header.loop_filter.epf_iters > 0) {
+      sigma = ImageF(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding,
+                     shared->frame_dim.ysize_blocks + 2 * kSigmaPadding);
+    }
+    return true;
+  }
+
+  // Initialize the decoder state after all of DC is decoded.
+  Status InitForAC(ThreadPool* pool) {
+    shared_storage.coeff_order_size = 0;
+    for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+      if (((1 << o) & used_acs) == 0) continue;
+      uint8_t ord = kStrategyOrder[o];
+      shared_storage.coeff_order_size =
+          std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize,
+                   shared_storage.coeff_order_size);
+    }
+    size_t sz = shared_storage.frame_header.passes.num_passes *
+                shared_storage.coeff_order_size;
+    if (sz > shared_storage.coeff_orders.size()) {
+      shared_storage.coeff_orders.resize(sz);
+    }
+    return true;
+  }
+
+  // Fills the `state->filter_weights.sigma` image with the precomputed sigma
+  // values in the area inside `block_rect`. Accesses the AC strategy, quant
+  // field and epf_sharpness fields in the corresponding positions.
+  void ComputeSigma(const Rect& block_rect, PassesDecoderState* state);
+};
+
+// Temp images required for decoding a single group. Reduces memory allocations
+// for large images because we only initialize min(#threads, #groups) instances.
+struct GroupDecCache {
+  void InitOnce(size_t num_passes, size_t used_acs) {
+    for (size_t i = 0; i < num_passes; i++) {
+      if (num_nzeroes[i].xsize() == 0) {
+        // Allocate enough for a whole group - partial groups on the
+        // right/bottom border just use a subset. The valid size is passed via
+        // Rect.
+
+        num_nzeroes[i] = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+      }
+    }
+    size_t max_block_area = 0;
+
+    for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+      AcStrategy acs = AcStrategy::FromRawStrategy(o);
+      if ((used_acs & (1 << o)) == 0) continue;
+      size_t area =
+          acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
+      max_block_area = std::max(area, max_block_area);
+    }
+
+    if (max_block_area > max_block_area_) {
+      max_block_area_ = max_block_area;
+      // We need 3x float blocks for dequantized coefficients and 1x for scratch
+      // space for transforms.
+      float_memory_ = hwy::AllocateAligned<float>(max_block_area_ * 4);
+      // We need 3x int32 or int16 blocks for quantized coefficients.
+      int32_memory_ = hwy::AllocateAligned<int32_t>(max_block_area_ * 3);
+      int16_memory_ = hwy::AllocateAligned<int16_t>(max_block_area_ * 3);
+    }
+
+    dec_group_block = float_memory_.get();
+    scratch_space = dec_group_block + max_block_area_ * 3;
+    dec_group_qblock = int32_memory_.get();
+    dec_group_qblock16 = int16_memory_.get();
+  }
+
+  void InitDCBufferOnce() {
+    if (dc_buffer.xsize() == 0) {
+      dc_buffer = ImageF(kGroupDimInBlocks + kRenderPipelineXOffset * 2,
+                         kGroupDimInBlocks + 4);
+    }
+  }
+
+  // Scratch space used by DecGroupImpl().
+  float* dec_group_block;
+  int32_t* dec_group_qblock;
+  int16_t* dec_group_qblock16;
+
+  // For TransformToPixels.
+  float* scratch_space;
+  // Note that scratch_space is never used at the same time as dec_group_qblock.
+  // Moreover, only one of dec_group_qblock16 is ever used.
+  // TODO(veluca): figure out if we can save allocations.
+
+  // AC decoding
+  Image3I num_nzeroes[kMaxNumPasses];
+
+  // Buffer for DC upsampling.
+  ImageF dc_buffer;
+
+ private:
+  hwy::AlignedFreeUniquePtr<float[]> float_memory_;
+  hwy::AlignedFreeUniquePtr<int32_t[]> int32_memory_;
+  hwy::AlignedFreeUniquePtr<int16_t[]> int16_memory_;
+  size_t max_block_area_ = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_CACHE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc
new file mode 100644
index 0000000000..ffb29aad6b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_context_map.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/inverse_mtf-inl.h"
+
+namespace jxl {
+
+namespace {
+
+Status VerifyContextMap(const std::vector<uint8_t>& context_map,
+                        const size_t num_htrees) {
+  std::vector<bool> have_htree(num_htrees);
+  size_t num_found = 0;
+  for (const uint8_t htree : context_map) {
+    if (htree >= num_htrees) {
+      return JXL_FAILURE("Invalid histogram index in context map.");
+    }
+    if (!have_htree[htree]) {
+      have_htree[htree] = true;
+      ++num_found;
+    }
+  }
+  if (num_found != num_htrees) {
+    return JXL_FAILURE("Incomplete context map.");
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                        BitReader* input) {
+  bool is_simple = input->ReadFixedBits<1>();
+  if (is_simple) {
+    int bits_per_entry = input->ReadFixedBits<2>();
+    if (bits_per_entry != 0) {
+      for (size_t i = 0; i < context_map->size(); i++) {
+        (*context_map)[i] = input->ReadBits(bits_per_entry);
+      }
+    } else {
+      std::fill(context_map->begin(), context_map->end(), 0);
+    }
+  } else {
+    bool use_mtf = input->ReadFixedBits<1>();
+    ANSCode code;
+    std::vector<uint8_t> dummy_ctx_map;
+    // Usage of LZ77 is disallowed if decoding only two symbols. This doesn't
+    // make sense in non-malicious bitstreams, and could cause a stack overflow
+    // in malicious bitstreams by making every context map require its own
+    // context map.
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(input, 1, &code, &dummy_ctx_map,
+                         /*disallow_lz77=*/context_map->size() <= 2));
+    ANSSymbolReader reader(&code, input);
+    size_t i = 0;
+    uint32_t maxsym = 0;
+    while (i < context_map->size()) {
+      uint32_t sym = reader.ReadHybridUintInlined</*uses_lz77=*/true>(
+          0, input, dummy_ctx_map);
+      maxsym = sym > maxsym ? sym : maxsym;
+      (*context_map)[i] = sym;
+      i++;
+    }
+    if (maxsym >= kMaxClusters) {
+      return JXL_FAILURE("Invalid cluster ID");
+    }
+    if (!reader.CheckANSFinalState()) {
+      return JXL_FAILURE("Invalid context map");
+    }
+    if (use_mtf) {
+      InverseMoveToFrontTransform(context_map->data(), context_map->size());
+    }
+  }
+  *num_htrees = *std::max_element(context_map->begin(), context_map->end()) + 1;
+  return VerifyContextMap(*context_map, *num_htrees);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h
new file mode 100644
index 0000000000..95b8a0ca92
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CONTEXT_MAP_H_
+#define LIB_JXL_DEC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+// Context map uses uint8_t.
+constexpr size_t kMaxClusters = 256;
+
+// Reads the context map from the bit stream. On calling this function,
+// context_map->size() must be the number of possible context ids.
+// Sets *num_htrees to the number of different histogram ids in
+// *context_map.
+Status DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                        BitReader* input);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_CONTEXT_MAP_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc
new file mode 100644
index 0000000000..1661d99965
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc
@@ -0,0 +1,481 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_external_image.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::NearestInt;
+
+// TODO(jon): check if this can be replaced by a FloatToU16 function
+void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
+                size_t bits_per_sample) {
+  const HWY_FULL(float) d;
+  const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
+
+  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+  // This is because we run NearestInt() on the vector, which triggers msan even
+  // it it safe to do so since the values are not mixed between lanes.
+  const size_t num_round_up = RoundUpTo(num, Lanes(d));
+  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+  const auto one = Set(d, 1.0f);
+  const auto scale = Set(d, mul);
+  for (size_t x = 0; x < num; x += Lanes(d)) {
+    auto v = Load(d, in + x);
+    // Clamp turns NaN to 'min'.
+    v = Clamp(v, Zero(d), one);
+    auto i = NearestInt(Mul(v, scale));
+    Store(BitCast(du, i), du, out + x);
+  }
+
+  // Poison back the output.
+  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+void FloatToF16(const float* in, hwy::float16_t* out, size_t num) {
+  const HWY_FULL(float) d;
+  const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du;
+
+  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+  // This is because we run DemoteTo() on the vector which triggers msan.
+  const size_t num_round_up = RoundUpTo(num, Lanes(d));
+  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+  for (size_t x = 0; x < num; x += Lanes(d)) {
+    auto v = Load(d, in + x);
+    auto v16 = DemoteTo(du, v);
+    Store(v16, du, out + x);
+  }
+
+  // Poison back the output.
+  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+namespace {
+
+// Stores a float in big endian
+void StoreBEFloat(float value, uint8_t* p) {
+  uint32_t u;
+  memcpy(&u, &value, 4);
+  StoreBE32(u, p);
+}
+
+// Stores a float in little endian
+void StoreLEFloat(float value, uint8_t* p) {
+  uint32_t u;
+  memcpy(&u, &value, 4);
+  StoreLE32(u, p);
+}
+
+// The orientation may not be identity.
+// TODO(lode): SIMDify where possible
+template <typename T>
+Status UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image,
+                       Plane<T>& out, jxl::ThreadPool* pool) {
+  const size_t xsize = image.xsize();
+  const size_t ysize = image.ysize();
+
+  if (undo_orientation == Orientation::kFlipHorizontal) {
+    out = Plane<T>(xsize, ysize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[xsize - x - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kRotate180) {
+    out = Plane<T>(xsize, ysize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[xsize - x - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kFlipVertical) {
+    out = Plane<T>(xsize, ysize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[x] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kTranspose) {
+    out = Plane<T>(ysize, xsize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(x)[y] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kRotate90) {
+    out = Plane<T>(ysize, xsize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(x)[ysize - y - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kAntiTranspose) {
+    out = Plane<T>(ysize, xsize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  } else if (undo_orientation == Orientation::kRotate270) {
+    out = Plane<T>(ysize, xsize);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+        [&](const uint32_t task, size_t /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(xsize - x - 1)[y] = row_in[x];
+          }
+        },
+        "UndoOrientation"));
+  }
+  return true;
+}
+}  // namespace
+
+HWY_EXPORT(FloatToU32);
+HWY_EXPORT(FloatToF16);
+
+namespace {
+
+using StoreFuncType = void(uint32_t value, uint8_t* dest);
+template <StoreFuncType StoreFunc>
+void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels,
+                  size_t xsize, size_t bytes_per_sample,
+                  uint8_t* JXL_RESTRICT out) {
+  for (size_t x = 0; x < xsize; ++x) {
+    for (size_t c = 0; c < num_channels; c++) {
+      StoreFunc(rows_u32[c][x],
+                out + (num_channels * x + c) * bytes_per_sample);
+    }
+  }
+}
+
+template <void(StoreFunc)(float, uint8_t*)>
+void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels,
+                   size_t xsize, uint8_t* JXL_RESTRICT out) {
+  for (size_t x = 0; x < xsize; ++x) {
+    for (size_t c = 0; c < num_channels; c++) {
+      StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float));
+    }
+  }
+}
+
+void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; }
+
+}  // namespace
+
+Status ConvertChannelsToExternal(const ImageF* channels[], size_t num_channels,
+                                 size_t bits_per_sample, bool float_out,
+                                 JxlEndianness endianness, size_t stride,
+                                 jxl::ThreadPool* pool, void* out_image,
+                                 size_t out_size,
+                                 const PixelCallback& out_callback,
+                                 jxl::Orientation undo_orientation) {
+  JXL_DASSERT(num_channels != 0 && num_channels <= kConvertMaxChannels);
+  JXL_DASSERT(channels[0] != nullptr);
+  JXL_CHECK(float_out ? bits_per_sample == 16 || bits_per_sample == 32
+                      : bits_per_sample > 0 && bits_per_sample <= 16);
+  if (!!out_image == out_callback.IsPresent()) {
+    return JXL_FAILURE(
+        "Must provide either an out_image or an out_callback, but not both.");
+  }
+
+  const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
+  const size_t bytes_per_pixel = num_channels * bytes_per_channel;
+
+  std::vector<std::vector<uint8_t>> row_out_callback;
+  const auto FreeCallbackOpaque = [&out_callback](void* p) {
+    out_callback.destroy(p);
+  };
+  std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque(
+      nullptr, FreeCallbackOpaque);
+  auto InitOutCallback = [&](size_t num_threads) -> Status {
+    if (out_callback.IsPresent()) {
+      out_run_opaque.reset(out_callback.Init(num_threads, stride));
+      JXL_RETURN_IF_ERROR(out_run_opaque != nullptr);
+      row_out_callback.resize(num_threads);
+      for (size_t i = 0; i < num_threads; ++i) {
+        row_out_callback[i].resize(stride);
+      }
+    }
+    return true;
+  };
+
+  // Channels used to store the transformed original channels if needed.
+  ImageF temp_channels[kConvertMaxChannels];
+  if (undo_orientation != Orientation::kIdentity) {
+    for (size_t c = 0; c < num_channels; ++c) {
+      if (channels[c]) {
+        JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c],
+                                            temp_channels[c], pool));
+        channels[c] = &(temp_channels[c]);
+      }
+    }
+  }
+
+  // First channel may not be nullptr.
+  size_t xsize = channels[0]->xsize();
+  size_t ysize = channels[0]->ysize();
+  if (stride < bytes_per_pixel * xsize) {
+    return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS
+                       " vs %" PRIuS,
+                       stride, bytes_per_pixel * xsize);
+  }
+  if (!out_callback.IsPresent() &&
+      out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) {
+    return JXL_FAILURE("out_size is too small to store image");
+  }
+
+  const bool little_endian =
+      endianness == JXL_LITTLE_ENDIAN ||
+      (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+  // Handle the case where a channel is nullptr by creating a single row with
+  // ones to use instead.
+  ImageF ones;
+  for (size_t c = 0; c < num_channels; ++c) {
+    if (!channels[c]) {
+      ones = ImageF(xsize, 1);
+      FillImage(1.0f, &ones);
+      break;
+    }
+  }
+
+  if (float_out) {
+    if (bits_per_sample == 16) {
+      bool swap_endianness = little_endian != IsLittleEndian();
+      Plane<hwy::float16_t> f16_cache;
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize),
+          [&](size_t num_threads) {
+            f16_cache =
+                Plane<hwy::float16_t>(xsize, num_channels * num_threads);
+            return InitOutCallback(num_threads);
+          },
+          [&](const uint32_t task, const size_t thread) {
+            const int64_t y = task;
+            const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+            for (size_t c = 0; c < num_channels; c++) {
+              row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+            }
+            hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels];
+            for (size_t c = 0; c < num_channels; c++) {
+              row_f16[c] = f16_cache.Row(c + thread * num_channels);
+              HWY_DYNAMIC_DISPATCH(FloatToF16)
+              (row_in[c], row_f16[c], xsize);
+            }
+            uint8_t* row_out =
+                out_callback.IsPresent()
+                    ? row_out_callback[thread].data()
+                    : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+            // interleave the one scanline
+            hwy::float16_t* row_f16_out =
+                reinterpret_cast<hwy::float16_t*>(row_out);
+            for (size_t x = 0; x < xsize; x++) {
+              for (size_t c = 0; c < num_channels; c++) {
+                row_f16_out[x * num_channels + c] = row_f16[c][x];
+              }
+            }
+            if (swap_endianness) {
+              size_t size = xsize * num_channels * 2;
+              for (size_t i = 0; i < size; i += 2) {
+                std::swap(row_out[i + 0], row_out[i + 1]);
+              }
+            }
+            if (out_callback.IsPresent()) {
+              out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+                               row_out);
+            }
+          },
+          "ConvertF16"));
+    } else if (bits_per_sample == 32) {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize),
+          [&](size_t num_threads) { return InitOutCallback(num_threads); },
+          [&](const uint32_t task, const size_t thread) {
+            const int64_t y = task;
+            uint8_t* row_out =
+                out_callback.IsPresent()
+                    ? row_out_callback[thread].data()
+                    : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+            const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+            for (size_t c = 0; c < num_channels; c++) {
+              row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+            }
+            if (little_endian) {
+              StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, row_out);
+            } else {
+              StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, row_out);
+            }
+            if (out_callback.IsPresent()) {
+              out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+                               row_out);
+            }
+          },
+          "ConvertFloat"));
+    } else {
+      return JXL_FAILURE("float other than 16-bit and 32-bit not supported");
+    }
+  } else {
+    // Multiplier to convert from floating point 0-1 range to the integer
+    // range.
+    float mul = (1ull << bits_per_sample) - 1;
+    Plane<uint32_t> u32_cache;
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize),
+        [&](size_t num_threads) {
+          u32_cache = Plane<uint32_t>(xsize, num_channels * num_threads);
+          return InitOutCallback(num_threads);
+        },
+        [&](const uint32_t task, const size_t thread) {
+          const int64_t y = task;
+          uint8_t* row_out =
+              out_callback.IsPresent()
+                  ? row_out_callback[thread].data()
+                  : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+          const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+          for (size_t c = 0; c < num_channels; c++) {
+            row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+          }
+          uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels];
+          for (size_t c = 0; c < num_channels; c++) {
+            row_u32[c] = u32_cache.Row(c + thread * num_channels);
+            // row_u32[] is a per-thread temporary row storage, this isn't
+            // intended to be initialized on a previous run.
+            msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0]));
+            HWY_DYNAMIC_DISPATCH(FloatToU32)
+            (row_in[c], row_u32[c], xsize, mul, bits_per_sample);
+          }
+          if (bits_per_sample <= 8) {
+            StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, row_out);
+          } else {
+            if (little_endian) {
+              StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, row_out);
+            } else {
+              StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, row_out);
+            }
+          }
+          if (out_callback.IsPresent()) {
+            out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+                             row_out);
+          }
+        },
+        "ConvertUint"));
+  }
+  return true;
+}
+
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+                         bool float_out, size_t num_channels,
+                         JxlEndianness endianness, size_t stride,
+                         jxl::ThreadPool* pool, void* out_image,
+                         size_t out_size, const PixelCallback& out_callback,
+                         jxl::Orientation undo_orientation,
+                         bool unpremul_alpha) {
+  bool want_alpha = num_channels == 2 || num_channels == 4;
+  size_t color_channels = num_channels <= 2 ? 1 : 3;
+
+  const Image3F* color = &ib.color();
+  // Undo premultiplied alpha.
+  Image3F unpremul;
+  if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) {
+    unpremul = Image3F(color->xsize(), color->ysize());
+    CopyImageTo(*color, &unpremul);
+    for (size_t y = 0; y < unpremul.ysize(); y++) {
+      UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y),
+                         unpremul.PlaneRow(2, y), ib.alpha().Row(y),
+                         unpremul.xsize());
+    }
+    color = &unpremul;
+  }
+
+  const ImageF* channels[kConvertMaxChannels];
+  size_t c = 0;
+  for (; c < color_channels; c++) {
+    channels[c] = &color->Plane(c);
+  }
+  if (want_alpha) {
+    channels[c++] = ib.HasAlpha() ? &ib.alpha() : nullptr;
+  }
+  JXL_ASSERT(num_channels == c);
+
+  return ConvertChannelsToExternal(
+      channels, num_channels, bits_per_sample, float_out, endianness, stride,
+      pool, out_image, out_size, out_callback, undo_orientation);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h
new file mode 100644
index 0000000000..7ca7cfd0e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Maximum number of channels for the ConvertChannelsToExternal function.
+const size_t kConvertMaxChannels = 4;
+
+// Converts a list of channels to an interleaved image, applying transformations
+// when needed.
+// The input channels are given as a (non-const!) array of channel pointers and
+// interleaved in that order.
+//
+// Note: if a pointer in channels[] is nullptr, a 1.0 value will be used
+// instead. This is useful for handling when a user requests an alpha channel
+// from an image that doesn't have one. The first channel in the list may not
+// be nullptr, since it is used to determine the image size.
+Status ConvertChannelsToExternal(const ImageF* channels[], size_t num_channels,
+                                 size_t bits_per_sample, bool float_out,
+                                 JxlEndianness endianness, size_t stride,
+                                 jxl::ThreadPool* pool, void* out_image,
+                                 size_t out_size,
+                                 const PixelCallback& out_callback,
+                                 jxl::Orientation undo_orientation);
+
+// Converts ib to interleaved void* pixel buffer with the given format.
+// bits_per_sample: must be 16 or 32 if float_out is true, and at most 16
+// if it is false. No bit packing is done.
+// num_channels: must be 1, 2, 3 or 4 for gray, gray+alpha, RGB, RGB+alpha.
+// This supports the features needed for the C API and does not perform
+// color space conversion.
+// TODO(lode): support rectangle crop.
+// stride_out is output scanline size in bytes, must be >=
+// output_xsize * output_bytes_per_pixel.
+// undo_orientation is an EXIF orientation to undo. Depending on the
+// orientation, the output xsize and ysize are swapped compared to input
+// xsize and ysize.
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+                         bool float_out, size_t num_channels,
+                         JxlEndianness endianness, size_t stride_out,
+                         jxl::ThreadPool* thread_pool, void* out_image,
+                         size_t out_size, const PixelCallback& out_callback,
+                         jxl::Orientation undo_orientation,
+                         bool unpremul_alpha = false);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_EXTERNAL_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc
new file mode 100644
index 0000000000..c87a4d5f36
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Decoder case, interleaves an internal float image.
+void BM_DecExternalImage_ConvertImageRGBA(benchmark::State& state) {
+  const size_t kNumIter = 5;
+  size_t xsize = state.range();
+  size_t ysize = state.range();
+  size_t num_channels = 4;
+
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+  Image3F color(xsize, ysize);
+  ZeroFillImage(&color);
+  ib.SetFromImage(std::move(color), ColorEncoding::SRGB());
+  ImageF alpha(xsize, ysize);
+  ZeroFillImage(&alpha);
+  ib.SetAlpha(std::move(alpha));
+
+  const size_t bytes_per_row = xsize * num_channels;
+  std::vector<uint8_t> interleaved(bytes_per_row * ysize);
+
+  for (auto _ : state) {
+    for (size_t i = 0; i < kNumIter; ++i) {
+      JXL_CHECK(ConvertToExternal(
+          ib,
+          /*bits_per_sample=*/8,
+          /*float_out=*/false, num_channels, JXL_NATIVE_ENDIAN,
+          /*stride*/ bytes_per_row,
+          /*thread_pool=*/nullptr, interleaved.data(), interleaved.size(),
+          /*out_callback=*/{},
+          /*undo_orientation=*/jxl::Orientation::kIdentity));
+    }
+  }
+
+  // Pixels per second.
+  state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+  state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_DecExternalImage_ConvertImageRGBA)
+    ->RangeMultiplier(2)
+    ->Range(256, 2048);
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc b/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc
new file mode 100644
index 0000000000..82458de1ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc
@@ -0,0 +1,871 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_frame.h"
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <atomic>
+#include <hwy/aligned_allocator.h>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+Status DecodeGlobalDCInfo(BitReader* reader, bool is_jpeg,
+                          PassesDecoderState* state, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(state->shared_storage.quantizer.Decode(reader));
+
+  JXL_RETURN_IF_ERROR(
+      DecodeBlockCtxMap(reader, &state->shared_storage.block_ctx_map));
+
+  JXL_RETURN_IF_ERROR(state->shared_storage.cmap.DecodeDC(reader));
+
+  // Pre-compute info for decoding a group.
+  if (is_jpeg) {
+    state->shared_storage.quantizer.ClearDCMul();  // Don't dequant DC
+  }
+
+  state->shared_storage.ac_strategy.FillInvalid();
+  return true;
+}
+}  // namespace
+
+Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+                   const uint8_t* next_in, size_t avail_in,
+                   ImageBundle* decoded, const CodecMetadata& metadata,
+                   bool use_slow_rendering_pipeline) {
+  FrameDecoder frame_decoder(dec_state, metadata, pool,
+                             use_slow_rendering_pipeline);
+
+  BitReader reader(Span<const uint8_t>(next_in, avail_in));
+  JXL_RETURN_IF_ERROR(frame_decoder.InitFrame(&reader, decoded,
+                                              /*is_preview=*/false));
+  JXL_RETURN_IF_ERROR(frame_decoder.InitFrameOutput());
+
+  JXL_RETURN_IF_ERROR(reader.AllReadsWithinBounds());
+  size_t header_bytes = reader.TotalBitsConsumed() / kBitsPerByte;
+  JXL_RETURN_IF_ERROR(reader.Close());
+
+  size_t processed_bytes = header_bytes;
+  Status close_ok = true;
+  std::vector<std::unique_ptr<BitReader>> section_readers;
+  {
+    std::vector<std::unique_ptr<BitReaderScopedCloser>> section_closers;
+    std::vector<FrameDecoder::SectionInfo> section_info;
+    std::vector<FrameDecoder::SectionStatus> section_status;
+    size_t pos = header_bytes;
+    size_t index = 0;
+    for (auto toc_entry : frame_decoder.Toc()) {
+      JXL_RETURN_IF_ERROR(pos + toc_entry.size <= avail_in);
+      auto br = make_unique<BitReader>(
+          Span<const uint8_t>(next_in + pos, toc_entry.size));
+      section_info.emplace_back(
+          FrameDecoder::SectionInfo{br.get(), toc_entry.id, index++});
+      section_closers.emplace_back(
+          make_unique<BitReaderScopedCloser>(br.get(), &close_ok));
+      section_readers.emplace_back(std::move(br));
+      pos += toc_entry.size;
+    }
+    section_status.resize(section_info.size());
+    JXL_RETURN_IF_ERROR(frame_decoder.ProcessSections(
+        section_info.data(), section_info.size(), section_status.data()));
+    for (size_t i = 0; i < section_status.size(); i++) {
+      JXL_RETURN_IF_ERROR(section_status[i] == FrameDecoder::kDone);
+      processed_bytes += frame_decoder.Toc()[i].size;
+    }
+  }
+  JXL_RETURN_IF_ERROR(close_ok);
+  JXL_RETURN_IF_ERROR(frame_decoder.FinalizeFrame());
+  decoded->SetDecodedBytes(processed_bytes);
+  return true;
+}
+
+Status FrameDecoder::InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+                               bool is_preview) {
+  decoded_ = decoded;
+  JXL_ASSERT(is_finalized_);
+
+  // Reset the dequantization matrices to their default values.
+  dec_state_->shared_storage.matrices = DequantMatrices();
+
+  frame_header_.nonserialized_is_preview = is_preview;
+  JXL_ASSERT(frame_header_.nonserialized_metadata != nullptr);
+  JXL_RETURN_IF_ERROR(ReadFrameHeader(br, &frame_header_));
+  frame_dim_ = frame_header_.ToFrameDimensions();
+  JXL_DEBUG_V(2, "FrameHeader: %s", frame_header_.DebugString().c_str());
+
+  const size_t num_passes = frame_header_.passes.num_passes;
+  const size_t num_groups = frame_dim_.num_groups;
+
+  // If the previous frame was not a kRegularFrame, `decoded` may have different
+  // dimensions; must reset to avoid errors.
+  decoded->RemoveColor();
+  decoded->ClearExtraChannels();
+
+  decoded->duration = frame_header_.animation_frame.duration;
+
+  if (!frame_header_.nonserialized_is_preview &&
+      (frame_header_.is_last || frame_header_.animation_frame.duration > 0) &&
+      (frame_header_.frame_type == kRegularFrame ||
+       frame_header_.frame_type == kSkipProgressive)) {
+    ++dec_state_->visible_frame_index;
+    dec_state_->nonvisible_frame_index = 0;
+  } else {
+    ++dec_state_->nonvisible_frame_index;
+  }
+
+  // Read TOC.
+  const bool has_ac_global = true;
+  const size_t toc_entries = NumTocEntries(num_groups, frame_dim_.num_dc_groups,
+                                           num_passes, has_ac_global);
+  std::vector<uint32_t> sizes;
+  std::vector<coeff_order_t> permutation;
+  JXL_RETURN_IF_ERROR(ReadToc(toc_entries, br, &sizes, &permutation));
+  bool have_permutation = !permutation.empty();
+  toc_.resize(toc_entries);
+  section_sizes_sum_ = 0;
+  for (size_t i = 0; i < toc_entries; ++i) {
+    toc_[i].size = sizes[i];
+    size_t index = have_permutation ? permutation[i] : i;
+    toc_[index].id = i;
+    if (section_sizes_sum_ + toc_[i].size < section_sizes_sum_) {
+      return JXL_FAILURE("group offset overflow");
+    }
+    section_sizes_sum_ += toc_[i].size;
+  }
+
+  JXL_DASSERT((br->TotalBitsConsumed() % kBitsPerByte) == 0);
+  const size_t group_codes_begin = br->TotalBitsConsumed() / kBitsPerByte;
+  JXL_DASSERT(!toc_.empty());
+
+  // Overflow check.
+  if (group_codes_begin + section_sizes_sum_ < group_codes_begin) {
+    return JXL_FAILURE("Invalid group codes");
+  }
+
+  if (!frame_header_.chroma_subsampling.Is444() &&
+      !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+      frame_header_.encoding == FrameEncoding::kVarDCT) {
+    return JXL_FAILURE(
+        "Non-444 chroma subsampling is not allowed when adaptive DC "
+        "smoothing is enabled");
+  }
+  return true;
+}
+
+Status FrameDecoder::InitFrameOutput() {
+  JXL_RETURN_IF_ERROR(
+      InitializePassesSharedState(frame_header_, &dec_state_->shared_storage));
+  JXL_RETURN_IF_ERROR(dec_state_->Init());
+  modular_frame_decoder_.Init(frame_dim_);
+
+  if (decoded_->IsJPEG()) {
+    if (frame_header_.encoding == FrameEncoding::kModular) {
+      return JXL_FAILURE("Cannot output JPEG from Modular");
+    }
+    jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get();
+    size_t num_components = jpeg_data->components.size();
+    if (num_components != 1 && num_components != 3) {
+      return JXL_FAILURE("Invalid number of components");
+    }
+    if (frame_header_.nonserialized_metadata->m.xyb_encoded) {
+      return JXL_FAILURE("Cannot decode to JPEG an XYB image");
+    }
+    auto jpeg_c_map = JpegOrder(ColorTransform::kYCbCr, num_components == 1);
+    decoded_->jpeg_data->width = frame_dim_.xsize;
+    decoded_->jpeg_data->height = frame_dim_.ysize;
+    for (size_t c = 0; c < num_components; c++) {
+      auto& component = jpeg_data->components[jpeg_c_map[c]];
+      component.width_in_blocks =
+          frame_dim_.xsize_blocks >> frame_header_.chroma_subsampling.HShift(c);
+      component.height_in_blocks =
+          frame_dim_.ysize_blocks >> frame_header_.chroma_subsampling.VShift(c);
+      component.h_samp_factor =
+          1 << frame_header_.chroma_subsampling.RawHShift(c);
+      component.v_samp_factor =
+          1 << frame_header_.chroma_subsampling.RawVShift(c);
+      component.coeffs.resize(component.width_in_blocks *
+                              component.height_in_blocks * jxl::kDCTBlockSize);
+    }
+  }
+
+  // Clear the state.
+  decoded_dc_global_ = false;
+  decoded_ac_global_ = false;
+  is_finalized_ = false;
+  finalized_dc_ = false;
+  num_sections_done_ = 0;
+  decoded_dc_groups_.clear();
+  decoded_dc_groups_.resize(frame_dim_.num_dc_groups);
+  decoded_passes_per_ac_group_.clear();
+  decoded_passes_per_ac_group_.resize(frame_dim_.num_groups, 0);
+  processed_section_.clear();
+  processed_section_.resize(toc_.size());
+  allocated_ = false;
+  return true;
+}
+
+Status FrameDecoder::ProcessDCGlobal(BitReader* br) {
+  PassesSharedState& shared = dec_state_->shared_storage;
+  if (shared.frame_header.flags & FrameHeader::kPatches) {
+    bool uses_extra_channels = false;
+    JXL_RETURN_IF_ERROR(shared.image_features.patches.Decode(
+        br, frame_dim_.xsize_padded, frame_dim_.ysize_padded,
+        &uses_extra_channels));
+    if (uses_extra_channels && frame_header_.upsampling != 1) {
+      for (size_t ecups : frame_header_.extra_channel_upsampling) {
+        if (ecups != frame_header_.upsampling) {
+          return JXL_FAILURE(
+              "Cannot use extra channels in patches if color channels are "
+              "subsampled differently from extra channels");
+        }
+      }
+    }
+  } else {
+    shared.image_features.patches.Clear();
+  }
+  shared.image_features.splines.Clear();
+  if (shared.frame_header.flags & FrameHeader::kSplines) {
+    JXL_RETURN_IF_ERROR(shared.image_features.splines.Decode(
+        br, frame_dim_.xsize * frame_dim_.ysize));
+  }
+  if (shared.frame_header.flags & FrameHeader::kNoise) {
+    JXL_RETURN_IF_ERROR(DecodeNoise(br, &shared.image_features.noise_params));
+  }
+  JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.DecodeDC(br));
+
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        jxl::DecodeGlobalDCInfo(br, decoded_->IsJPEG(), dec_state_, pool_));
+  }
+  // Splines' draw cache uses the color correlation map.
+  if (shared.frame_header.flags & FrameHeader::kSplines) {
+    JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
+        frame_dim_.xsize_upsampled, frame_dim_.ysize_upsampled,
+        dec_state_->shared->cmap));
+  }
+  Status dec_status = modular_frame_decoder_.DecodeGlobalInfo(
+      br, frame_header_, /*allow_truncated_group=*/false);
+  if (dec_status.IsFatalError()) return dec_status;
+  if (dec_status) {
+    decoded_dc_global_ = true;
+  }
+  return dec_status;
+}
+
+Status FrameDecoder::ProcessDCGroup(size_t dc_group_id, BitReader* br) {
+  const size_t gx = dc_group_id % frame_dim_.xsize_dc_groups;
+  const size_t gy = dc_group_id / frame_dim_.xsize_dc_groups;
+  const LoopFilter& lf = dec_state_->shared->frame_header.loop_filter;
+  if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+      !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+    JXL_RETURN_IF_ERROR(
+        modular_frame_decoder_.DecodeVarDCTDC(dc_group_id, br, dec_state_));
+  }
+  const Rect mrect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
+                   frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
+  JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+      mrect, br, 3, 1000, ModularStreamId::ModularDC(dc_group_id),
+      /*zerofill=*/false, nullptr, nullptr,
+      /*allow_truncated=*/false));
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        modular_frame_decoder_.DecodeAcMetadata(dc_group_id, br, dec_state_));
+  } else if (lf.epf_iters > 0) {
+    FillImage(kInvSigmaNum / lf.epf_sigma_for_modular, &dec_state_->sigma);
+  }
+  decoded_dc_groups_[dc_group_id] = uint8_t{true};
+  return true;
+}
+
+void FrameDecoder::FinalizeDC() {
+  // Do Adaptive DC smoothing if enabled. This *must* happen between all the
+  // ProcessDCGroup and ProcessACGroup.
+  if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+      !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+      !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+    AdaptiveDCSmoothing(dec_state_->shared->quantizer.MulDC(),
+                        &dec_state_->shared_storage.dc_storage, pool_);
+  }
+
+  finalized_dc_ = true;
+}
+
+Status FrameDecoder::AllocateOutput() {
+  if (allocated_) return true;
+  modular_frame_decoder_.MaybeDropFullImage();
+  decoded_->origin = dec_state_->shared->frame_header.frame_origin;
+  JXL_RETURN_IF_ERROR(dec_state_->InitForAC(nullptr));
+  allocated_ = true;
+  return true;
+}
+
+Status FrameDecoder::ProcessACGlobal(BitReader* br) {
+  JXL_CHECK(finalized_dc_);
+
+  // Decode AC group.
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.Decode(
+        br, &modular_frame_decoder_));
+    JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.EnsureComputed(
+        dec_state_->used_acs));
+
+    size_t num_histo_bits =
+        CeilLog2Nonzero(dec_state_->shared->frame_dim.num_groups);
+    dec_state_->shared_storage.num_histograms =
+        1 + br->ReadBits(num_histo_bits);
+
+    dec_state_->code.resize(kMaxNumPasses);
+    dec_state_->context_map.resize(kMaxNumPasses);
+    // Read coefficient orders and histograms.
+    size_t max_num_bits_ac = 0;
+    for (size_t i = 0;
+         i < dec_state_->shared_storage.frame_header.passes.num_passes; i++) {
+      uint16_t used_orders = U32Coder::Read(kOrderEnc, br);
+      JXL_RETURN_IF_ERROR(DecodeCoeffOrders(
+          used_orders, dec_state_->used_acs,
+          &dec_state_->shared_storage
+               .coeff_orders[i * dec_state_->shared_storage.coeff_order_size],
+          br));
+      size_t num_contexts =
+          dec_state_->shared->num_histograms *
+          dec_state_->shared_storage.block_ctx_map.NumACContexts();
+      JXL_RETURN_IF_ERROR(DecodeHistograms(
+          br, num_contexts, &dec_state_->code[i], &dec_state_->context_map[i]));
+      // Add extra values to enable the cheat in hot loop of DecodeACVarBlock.
+      dec_state_->context_map[i].resize(
+          num_contexts + kZeroDensityContextLimit - kZeroDensityContextCount);
+      max_num_bits_ac =
+          std::max(max_num_bits_ac, dec_state_->code[i].max_num_bits);
+    }
+    max_num_bits_ac += CeilLog2Nonzero(
+        dec_state_->shared_storage.frame_header.passes.num_passes);
+    // 16-bit buffer for decoding to JPEG are not implemented.
+    // TODO(veluca): figure out the exact limit - 16 should still work with
+    // 16-bit buffers, but we are excluding it for safety.
+    bool use_16_bit = max_num_bits_ac < 16 && !decoded_->IsJPEG();
+    bool store = frame_header_.passes.num_passes > 1;
+    size_t xs = store ? kGroupDim * kGroupDim : 0;
+    size_t ys = store ? frame_dim_.num_groups : 0;
+    if (use_16_bit) {
+      dec_state_->coefficients = make_unique<ACImageT<int16_t>>(xs, ys);
+    } else {
+      dec_state_->coefficients = make_unique<ACImageT<int32_t>>(xs, ys);
+    }
+    if (store) {
+      dec_state_->coefficients->ZeroFill();
+    }
+  }
+
+  // Set JPEG decoding data.
+  if (decoded_->IsJPEG()) {
+    decoded_->color_transform = frame_header_.color_transform;
+    decoded_->chroma_subsampling = frame_header_.chroma_subsampling;
+    const std::vector<QuantEncoding>& qe =
+        dec_state_->shared_storage.matrices.encodings();
+    if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+        std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+      return JXL_FAILURE(
+          "Quantization table is not a JPEG quantization table.");
+    }
+    jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get();
+    size_t num_components = jpeg_data->components.size();
+    bool is_gray = (num_components == 1);
+    auto jpeg_c_map = JpegOrder(frame_header_.color_transform, is_gray);
+    size_t qt_set = 0;
+    for (size_t c = 0; c < num_components; c++) {
+      // TODO(eustas): why 1-st quant table for gray?
+      size_t quant_c = is_gray ? 1 : c;
+      size_t qpos = jpeg_data->components[jpeg_c_map[c]].quant_idx;
+      JXL_CHECK(qpos != jpeg_data->quant.size());
+      qt_set |= 1 << qpos;
+      for (size_t x = 0; x < 8; x++) {
+        for (size_t y = 0; y < 8; y++) {
+          jpeg_data->quant[qpos].values[x * 8 + y] =
+              (*qe[0].qraw.qtable)[quant_c * 64 + y * 8 + x];
+        }
+      }
+    }
+    for (size_t i = 0; i < jpeg_data->quant.size(); i++) {
+      if (qt_set & (1 << i)) continue;
+      if (i == 0) return JXL_FAILURE("First quant table unused.");
+      // Unused quant table is set to copy of previous quant table
+      for (size_t j = 0; j < 64; j++) {
+        jpeg_data->quant[i].values[j] = jpeg_data->quant[i - 1].values[j];
+      }
+    }
+  }
+  decoded_ac_global_ = true;
+  return true;
+}
+
+Status FrameDecoder::ProcessACGroup(size_t ac_group_id,
+                                    BitReader* JXL_RESTRICT* br,
+                                    size_t num_passes, size_t thread,
+                                    bool force_draw, bool dc_only) {
+  size_t group_dim = frame_dim_.group_dim;
+  const size_t gx = ac_group_id % frame_dim_.xsize_groups;
+  const size_t gy = ac_group_id / frame_dim_.xsize_groups;
+  const size_t x = gx * group_dim;
+  const size_t y = gy * group_dim;
+  JXL_DEBUG_V(3,
+              "Processing AC group %" PRIuS "(%" PRIuS ",%" PRIuS
+              ") group_dim: %" PRIuS " decoded passes: %u new passes: %" PRIuS,
+              ac_group_id, gx, gy, group_dim,
+              decoded_passes_per_ac_group_[ac_group_id], num_passes);
+
+  RenderPipelineInput render_pipeline_input =
+      dec_state_->render_pipeline->GetInputBuffers(ac_group_id, thread);
+
+  bool should_run_pipeline = true;
+
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    group_dec_caches_[thread].InitOnce(frame_header_.passes.num_passes,
+                                       dec_state_->used_acs);
+    JXL_RETURN_IF_ERROR(DecodeGroup(br, num_passes, ac_group_id, dec_state_,
+                                    &group_dec_caches_[thread], thread,
+                                    render_pipeline_input, decoded_,
+                                    decoded_passes_per_ac_group_[ac_group_id],
+                                    force_draw, dc_only, &should_run_pipeline));
+  }
+
+  // don't limit to image dimensions here (is done in DecodeGroup)
+  const Rect mrect(x, y, group_dim, group_dim);
+  bool modular_ready = false;
+  size_t pass0 = decoded_passes_per_ac_group_[ac_group_id];
+  size_t pass1 =
+      force_draw ? frame_header_.passes.num_passes : pass0 + num_passes;
+  for (size_t i = pass0; i < pass1; ++i) {
+    int minShift, maxShift;
+    frame_header_.passes.GetDownsamplingBracket(i, minShift, maxShift);
+    bool modular_pass_ready = true;
+    if (i < pass0 + num_passes) {
+      JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+          mrect, br[i - pass0], minShift, maxShift,
+          ModularStreamId::ModularAC(ac_group_id, i),
+          /*zerofill=*/false, dec_state_, &render_pipeline_input,
+          /*allow_truncated=*/false, &modular_pass_ready));
+    } else {
+      JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+          mrect, nullptr, minShift, maxShift,
+          ModularStreamId::ModularAC(ac_group_id, i), /*zerofill=*/true,
+          dec_state_, &render_pipeline_input,
+          /*allow_truncated=*/false, &modular_pass_ready));
+    }
+    if (modular_pass_ready) modular_ready = true;
+  }
+  decoded_passes_per_ac_group_[ac_group_id] += num_passes;
+
+  if ((frame_header_.flags & FrameHeader::kNoise) != 0) {
+    size_t noise_c_start =
+        3 + frame_header_.nonserialized_metadata->m.num_extra_channels;
+    // When the color channels are downsampled, we need to generate more noise
+    // input for the current group than just the group dimensions.
+    std::pair<ImageF*, Rect> rects[3];
+    for (size_t iy = 0; iy < frame_header_.upsampling; iy++) {
+      for (size_t ix = 0; ix < frame_header_.upsampling; ix++) {
+        for (size_t c = 0; c < 3; c++) {
+          auto r = render_pipeline_input.GetBuffer(noise_c_start + c);
+          rects[c].first = r.first;
+          size_t x1 = r.second.x0() + r.second.xsize();
+          size_t y1 = r.second.y0() + r.second.ysize();
+          rects[c].second = Rect(r.second.x0() + ix * group_dim,
+                                 r.second.y0() + iy * group_dim, group_dim,
+                                 group_dim, x1, y1);
+        }
+        Random3Planes(dec_state_->visible_frame_index,
+                      dec_state_->nonvisible_frame_index,
+                      (gx * frame_header_.upsampling + ix) * group_dim,
+                      (gy * frame_header_.upsampling + iy) * group_dim,
+                      rects[0], rects[1], rects[2]);
+      }
+    }
+  }
+
+  if (!modular_frame_decoder_.UsesFullImage() && !decoded_->IsJPEG()) {
+    if (should_run_pipeline && modular_ready) {
+      render_pipeline_input.Done();
+    } else if (force_draw) {
+      return JXL_FAILURE("Modular group decoding failed.");
+    }
+  }
+  return true;
+}
+
+void FrameDecoder::MarkSections(const SectionInfo* sections, size_t num,
+                                SectionStatus* section_status) {
+  num_sections_done_ += num;
+  for (size_t i = 0; i < num; i++) {
+    if (section_status[i] != SectionStatus::kDone) {
+      processed_section_[sections[i].id] = false;
+      num_sections_done_--;
+    }
+  }
+}
+
+Status FrameDecoder::ProcessSections(const SectionInfo* sections, size_t num,
+                                     SectionStatus* section_status) {
+  if (num == 0) return true;  // Nothing to process
+  std::fill(section_status, section_status + num, SectionStatus::kSkipped);
+  size_t dc_global_sec = num;
+  size_t ac_global_sec = num;
+  std::vector<size_t> dc_group_sec(frame_dim_.num_dc_groups, num);
+  std::vector<std::vector<size_t>> ac_group_sec(
+      frame_dim_.num_groups,
+      std::vector<size_t>(frame_header_.passes.num_passes, num));
+  // This keeps track of the number of ac passes we want to process during this
+  // call of ProcessSections.
+  std::vector<size_t> desired_num_ac_passes(frame_dim_.num_groups);
+  bool single_section =
+      frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1;
+  if (single_section) {
+    JXL_ASSERT(num == 1);
+    JXL_ASSERT(sections[0].id == 0);
+    if (processed_section_[0] == false) {
+      processed_section_[0] = true;
+      ac_group_sec[0].resize(1);
+      dc_global_sec = ac_global_sec = dc_group_sec[0] = ac_group_sec[0][0] = 0;
+      desired_num_ac_passes[0] = 1;
+    } else {
+      section_status[0] = SectionStatus::kDuplicate;
+    }
+  } else {
+    size_t ac_global_index = frame_dim_.num_dc_groups + 1;
+    for (size_t i = 0; i < num; i++) {
+      JXL_ASSERT(sections[i].id < processed_section_.size());
+      if (processed_section_[sections[i].id]) {
+        section_status[i] = SectionStatus::kDuplicate;
+        continue;
+      }
+      if (sections[i].id == 0) {
+        dc_global_sec = i;
+      } else if (sections[i].id < ac_global_index) {
+        dc_group_sec[sections[i].id - 1] = i;
+      } else if (sections[i].id == ac_global_index) {
+        ac_global_sec = i;
+      } else {
+        size_t ac_idx = sections[i].id - ac_global_index - 1;
+        size_t acg = ac_idx % frame_dim_.num_groups;
+        size_t acp = ac_idx / frame_dim_.num_groups;
+        if (acp >= frame_header_.passes.num_passes) {
+          return JXL_FAILURE("Invalid section ID");
+        }
+        ac_group_sec[acg][acp] = i;
+      }
+      processed_section_[sections[i].id] = true;
+    }
+    // Count number of new passes per group.
+    for (size_t g = 0; g < ac_group_sec.size(); g++) {
+      size_t j = 0;
+      for (; j + decoded_passes_per_ac_group_[g] <
+             frame_header_.passes.num_passes;
+           j++) {
+        if (ac_group_sec[g][j + decoded_passes_per_ac_group_[g]] == num) {
+          break;
+        }
+      }
+      desired_num_ac_passes[g] = j;
+    }
+  }
+  if (dc_global_sec != num) {
+    Status dc_global_status = ProcessDCGlobal(sections[dc_global_sec].br);
+    if (dc_global_status.IsFatalError()) return dc_global_status;
+    if (dc_global_status) {
+      section_status[dc_global_sec] = SectionStatus::kDone;
+    } else {
+      section_status[dc_global_sec] = SectionStatus::kPartial;
+    }
+  }
+
+  std::atomic<bool> has_error{false};
+  if (decoded_dc_global_) {
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool_, 0, dc_group_sec.size(), ThreadPool::NoInit,
+        [this, &dc_group_sec, &num, &sections, &section_status, &has_error](
+            size_t i, size_t thread) {
+          if (dc_group_sec[i] != num) {
+            if (!ProcessDCGroup(i, sections[dc_group_sec[i]].br)) {
+              has_error = true;
+            } else {
+              section_status[dc_group_sec[i]] = SectionStatus::kDone;
+            }
+          }
+        },
+        "DecodeDCGroup"));
+  }
+  if (has_error) return JXL_FAILURE("Error in DC group");
+
+  if (*std::min_element(decoded_dc_groups_.begin(), decoded_dc_groups_.end()) &&
+      !finalized_dc_) {
+    PassesDecoderState::PipelineOptions pipeline_options;
+    pipeline_options.use_slow_render_pipeline = use_slow_rendering_pipeline_;
+    pipeline_options.coalescing = coalescing_;
+    pipeline_options.render_spotcolors = render_spotcolors_;
+    JXL_RETURN_IF_ERROR(
+        dec_state_->PreparePipeline(decoded_, pipeline_options));
+    FinalizeDC();
+    JXL_RETURN_IF_ERROR(AllocateOutput());
+    if (progressive_detail_ >= JxlProgressiveDetail::kDC) {
+      MarkSections(sections, num, section_status);
+      return true;
+    }
+  }
+
+  if (finalized_dc_ && ac_global_sec != num && !decoded_ac_global_) {
+    JXL_RETURN_IF_ERROR(ProcessACGlobal(sections[ac_global_sec].br));
+    section_status[ac_global_sec] = SectionStatus::kDone;
+  }
+
+  if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) {
+    // Mark that we only want the next progression pass.
+    size_t target_complete_passes = NextNumPassesToPause();
+    for (size_t i = 0; i < ac_group_sec.size(); i++) {
+      desired_num_ac_passes[i] =
+          std::min(desired_num_ac_passes[i],
+                   target_complete_passes - decoded_passes_per_ac_group_[i]);
+    }
+  }
+
+  if (decoded_ac_global_) {
+    // Mark all the AC groups that we received as not complete yet.
+    for (size_t i = 0; i < ac_group_sec.size(); i++) {
+      if (desired_num_ac_passes[i] != 0) {
+        dec_state_->render_pipeline->ClearDone(i);
+      }
+    }
+
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool_, 0, ac_group_sec.size(),
+        [this](size_t num_threads) {
+          return PrepareStorage(num_threads,
+                                decoded_passes_per_ac_group_.size());
+        },
+        [this, &ac_group_sec, &desired_num_ac_passes, &num, &sections,
+         &section_status, &has_error](size_t g, size_t thread) {
+          if (desired_num_ac_passes[g] == 0) {
+            // no new AC pass, nothing to do
+            return;
+          }
+          (void)num;
+          size_t first_pass = decoded_passes_per_ac_group_[g];
+          BitReader* JXL_RESTRICT readers[kMaxNumPasses];
+          for (size_t i = 0; i < desired_num_ac_passes[g]; i++) {
+            JXL_ASSERT(ac_group_sec[g][first_pass + i] != num);
+            readers[i] = sections[ac_group_sec[g][first_pass + i]].br;
+          }
+          if (!ProcessACGroup(g, readers, desired_num_ac_passes[g],
+                              GetStorageLocation(thread, g),
+                              /*force_draw=*/false, /*dc_only=*/false)) {
+            has_error = true;
+          } else {
+            for (size_t i = 0; i < desired_num_ac_passes[g]; i++) {
+              section_status[ac_group_sec[g][first_pass + i]] =
+                  SectionStatus::kDone;
+            }
+          }
+        },
+        "DecodeGroup"));
+  }
+  if (has_error) return JXL_FAILURE("Error in AC group");
+
+  MarkSections(sections, num, section_status);
+  return true;
+}
+
+Status FrameDecoder::Flush() {
+  bool has_blending = frame_header_.blending_info.mode != BlendMode::kReplace ||
+                      frame_header_.custom_size_or_origin;
+  for (const auto& blending_info_ec :
+       frame_header_.extra_channel_blending_info) {
+    if (blending_info_ec.mode != BlendMode::kReplace) has_blending = true;
+  }
+  // No early Flush() if blending is enabled.
+  if (has_blending && !is_finalized_) {
+    return false;
+  }
+  // No early Flush() - nothing to do - if the frame is a kSkipProgressive
+  // frame.
+  if (frame_header_.frame_type == FrameType::kSkipProgressive &&
+      !is_finalized_) {
+    return true;
+  }
+  if (decoded_->IsJPEG()) {
+    // Nothing to do.
+    return true;
+  }
+  JXL_RETURN_IF_ERROR(AllocateOutput());
+
+  uint32_t completely_decoded_ac_pass = *std::min_element(
+      decoded_passes_per_ac_group_.begin(), decoded_passes_per_ac_group_.end());
+  if (completely_decoded_ac_pass < frame_header_.passes.num_passes) {
+    // We don't have all AC yet: force a draw of all the missing areas.
+    // Mark all sections as not complete.
+    for (size_t i = 0; i < decoded_passes_per_ac_group_.size(); i++) {
+      if (decoded_passes_per_ac_group_[i] < frame_header_.passes.num_passes) {
+        dec_state_->render_pipeline->ClearDone(i);
+      }
+    }
+    std::atomic<bool> has_error{false};
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool_, 0, decoded_passes_per_ac_group_.size(),
+        [this](const size_t num_threads) {
+          return PrepareStorage(num_threads,
+                                decoded_passes_per_ac_group_.size());
+        },
+        [this, &has_error](const uint32_t g, size_t thread) {
+          if (decoded_passes_per_ac_group_[g] ==
+              frame_header_.passes.num_passes) {
+            // This group was drawn already, nothing to do.
+            return;
+          }
+          BitReader* JXL_RESTRICT readers[kMaxNumPasses] = {};
+          bool ok = ProcessACGroup(
+              g, readers, /*num_passes=*/0, GetStorageLocation(thread, g),
+              /*force_draw=*/true, /*dc_only=*/!decoded_ac_global_);
+          if (!ok) has_error = true;
+        },
+        "ForceDrawGroup"));
+    if (has_error) {
+      return JXL_FAILURE("Drawing groups failed");
+    }
+  }
+
+  // undo global modular transforms and copy int pixel buffers to float ones
+  JXL_RETURN_IF_ERROR(modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_,
+                                                              is_finalized_));
+
+  return true;
+}
+
+int FrameDecoder::SavedAs(const FrameHeader& header) {
+  if (header.frame_type == FrameType::kDCFrame) {
+    // bits 16, 32, 64, 128 for DC level
+    return 16 << (header.dc_level - 1);
+  } else if (header.CanBeReferenced()) {
+    // bits 1, 2, 4 and 8 for the references
+    return 1 << header.save_as_reference;
+  }
+
+  return 0;
+}
+
+bool FrameDecoder::HasEverything() const {
+  if (!decoded_dc_global_) return false;
+  if (!decoded_ac_global_) return false;
+  for (auto& have_dc_group : decoded_dc_groups_) {
+    if (!have_dc_group) return false;
+  }
+  for (auto& nb_passes : decoded_passes_per_ac_group_) {
+    if (nb_passes < frame_header_.passes.num_passes) return false;
+  }
+  return true;
+}
+
+int FrameDecoder::References() const {
+  if (is_finalized_) {
+    return 0;
+  }
+  if (!HasEverything()) return 0;
+
+  int result = 0;
+
+  // Blending
+  if (frame_header_.frame_type == FrameType::kRegularFrame ||
+      frame_header_.frame_type == FrameType::kSkipProgressive) {
+    bool cropped = frame_header_.custom_size_or_origin;
+    if (cropped || frame_header_.blending_info.mode != BlendMode::kReplace) {
+      result |= (1 << frame_header_.blending_info.source);
+    }
+    const auto& extra = frame_header_.extra_channel_blending_info;
+    for (size_t i = 0; i < extra.size(); ++i) {
+      if (cropped || extra[i].mode != BlendMode::kReplace) {
+        result |= (1 << extra[i].source);
+      }
+    }
+  }
+
+  // Patches
+  if (frame_header_.flags & FrameHeader::kPatches) {
+    result |= dec_state_->shared->image_features.patches.GetReferences();
+  }
+
+  // DC Level
+  if (frame_header_.flags & FrameHeader::kUseDcFrame) {
+    // Reads from the next dc level
+    int dc_level = frame_header_.dc_level + 1;
+    // bits 16, 32, 64, 128 for DC level
+    result |= (16 << (dc_level - 1));
+  }
+
+  return result;
+}
+
+Status FrameDecoder::FinalizeFrame() {
+  if (is_finalized_) {
+    return JXL_FAILURE("FinalizeFrame called multiple times");
+  }
+  is_finalized_ = true;
+  if (decoded_->IsJPEG()) {
+    // Nothing to do.
+    return true;
+  }
+
+  // undo global modular transforms and copy int pixel buffers to float ones
+  JXL_RETURN_IF_ERROR(
+      modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_,
+                                              /*inplace=*/true));
+
+  if (frame_header_.CanBeReferenced()) {
+    auto& info = dec_state_->shared_storage
+                     .reference_frames[frame_header_.save_as_reference];
+    info.frame = std::move(dec_state_->frame_storage_for_referencing);
+    info.ib_is_in_xyb = frame_header_.save_before_color_transform;
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_frame.h b/third-party/libjxl/libjxl/lib/jxl/dec_frame.h
new file mode 100644
index 0000000000..6b54ac631f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_frame.h
@@ -0,0 +1,329 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_FRAME_H_
+#define LIB_JXL_DEC_FRAME_H_
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Decodes a frame. Groups may be processed in parallel by `pool`.
+// `metadata` is the metadata that applies to all frames of the codestream
+// `decoded->metadata` must already be set and must match metadata.m.
+// Used in the encoder to model decoder behaviour, and in tests.
+Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+                   const uint8_t* next_in, size_t avail_in,
+                   ImageBundle* decoded, const CodecMetadata& metadata,
+                   bool use_slow_rendering_pipeline = false);
+
+// TODO(veluca): implement "forced drawing".
+class FrameDecoder {
+ public:
+  // All parameters must outlive the FrameDecoder.
+  FrameDecoder(PassesDecoderState* dec_state, const CodecMetadata& metadata,
+               ThreadPool* pool, bool use_slow_rendering_pipeline)
+      : dec_state_(dec_state),
+        pool_(pool),
+        frame_header_(&metadata),
+        use_slow_rendering_pipeline_(use_slow_rendering_pipeline) {}
+
+  void SetRenderSpotcolors(bool rsc) { render_spotcolors_ = rsc; }
+  void SetCoalescing(bool c) { coalescing_ = c; }
+
+  // Read FrameHeader and table of contents from the given BitReader.
+  Status InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+                   bool is_preview);
+
+  // Checks frame dimensions for their limits, and sets the output
+  // image buffer.
+  Status InitFrameOutput();
+
+  struct SectionInfo {
+    BitReader* JXL_RESTRICT br;
+    // Logical index of the section, regardless of any permutation that may be
+    // applied in the table of contents or of the physical position in the file.
+    size_t id;
+    // Index of the section in the order of the bytes inside the frame.
+    size_t index;
+  };
+
+  struct TocEntry {
+    size_t size;
+    size_t id;
+  };
+
+  enum SectionStatus {
+    // Processed correctly.
+    kDone = 0,
+    // Skipped because other required sections were not yet processed.
+    kSkipped = 1,
+    // Skipped because the section was already processed.
+    kDuplicate = 2,
+    // Only partially decoded: the section will need to be processed again.
+    kPartial = 3,
+  };
+
+  // Processes `num` sections; each SectionInfo contains the index
+  // of the section and a BitReader that only contains the data of the section.
+  // `section_status` should point to `num` elements, and will be filled with
+  // information about whether each section was processed or not.
+  // A section is a part of the encoded file that is indexed by the TOC.
+  Status ProcessSections(const SectionInfo* sections, size_t num,
+                         SectionStatus* section_status);
+
+  // Flushes all the data decoded so far to pixels.
+  Status Flush();
+
+  // Runs final operations once a frame data is decoded.
+  // Must be called exactly once per frame, after all calls to ProcessSections.
+  Status FinalizeFrame();
+
+  // Returns dependencies of this frame on reference ids as a bit mask: bits 0-3
+  // indicate reference frame 0-3 for patches and blending, bits 4-7 indicate DC
+  // frames this frame depends on. Only returns a valid result after all calls
+  // to ProcessSections are finished and before FinalizeFrame.
+  int References() const;
+
+  // Returns reference id of storage location where this frame is stored as a
+  // bit flag, or 0 if not stored.
+  // Matches the bit mask used for GetReferences: bits 0-3 indicate it is stored
+  // for patching or blending, bits 4-7 indicate DC frame.
+  // Unlike References, can be ran at any time as
+  // soon as the frame header is known.
+  static int SavedAs(const FrameHeader& header);
+
+  uint64_t SumSectionSizes() const { return section_sizes_sum_; }
+  const std::vector<TocEntry>& Toc() const { return toc_; }
+
+  const FrameHeader& GetFrameHeader() const { return frame_header_; }
+
+  // Returns whether a DC image has been decoded, accessible at low resolution
+  // at passes.shared_storage.dc_storage
+  bool HasDecodedDC() const { return finalized_dc_; }
+  bool HasDecodedAll() const { return toc_.size() == num_sections_done_; }
+
+  size_t NumCompletePasses() const {
+    return *std::min_element(decoded_passes_per_ac_group_.begin(),
+                             decoded_passes_per_ac_group_.end());
+  }
+
+  // If enabled, ProcessSections will stop and return true when the DC
+  // sections have been processed, instead of starting the AC sections. This
+  // will only occur if supported (that is, flushing will produce a valid
+  // 1/8th*1/8th resolution image). The return value of true then does not mean
+  // all sections have been processed, use HasDecodedDC and HasDecodedAll
+  // to check the true finished state.
+  // Returns the progressive detail that will be effective for the frame.
+  JxlProgressiveDetail SetPauseAtProgressive(JxlProgressiveDetail prog_detail) {
+    bool single_section =
+        frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1;
+    if (frame_header_.frame_type != kSkipProgressive &&
+        // If there's only one group and one pass, there is no separate section
+        // for DC and the entire full resolution image is available at once.
+        !single_section &&
+        // If extra channels are encoded with modular without squeeze, they
+        // don't support DC. If the are encoded with squeeze, DC works in theory
+        // but the implementation may not yet correctly support this for Flush.
+        // Therefore, can't correctly pause for a progressive step if there is
+        // an extra channel (including alpha channel)
+        // TODO(firsching): Check if this is still the case.
+        decoded_->metadata()->extra_channel_info.empty() &&
+        // DC is not guaranteed to be available in modular mode and may be a
+        // black image. If squeeze is used, it may be available depending on the
+        // current implementation.
+        // TODO(lode): do return DC if it's known that flushing at this point
+        // will produce a valid 1/8th downscaled image with modular encoding.
+        frame_header_.encoding == FrameEncoding::kVarDCT) {
+      progressive_detail_ = prog_detail;
+    } else {
+      progressive_detail_ = JxlProgressiveDetail::kFrames;
+    }
+    if (progressive_detail_ >= JxlProgressiveDetail::kPasses) {
+      for (size_t i = 1; i < frame_header_.passes.num_passes; ++i) {
+        passes_to_pause_.push_back(i);
+      }
+    } else if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) {
+      for (size_t i = 0; i < frame_header_.passes.num_downsample; ++i) {
+        passes_to_pause_.push_back(frame_header_.passes.last_pass[i] + 1);
+      }
+      // The format does not guarantee that these values are sorted.
+      std::sort(passes_to_pause_.begin(), passes_to_pause_.end());
+    }
+    return progressive_detail_;
+  }
+
+  size_t NextNumPassesToPause() const {
+    auto it = std::upper_bound(passes_to_pause_.begin(), passes_to_pause_.end(),
+                               NumCompletePasses());
+    return (it != passes_to_pause_.end() ? *it
+                                         : std::numeric_limits<size_t>::max());
+  }
+
+  // Sets the pixel callback or image buffer where the pixels will be decoded.
+  //
+  // @param undo_orientation: if true, indicates the frame decoder should apply
+  // the exif orientation to bring the image to the intended display
+  // orientation.
+  void SetImageOutput(const PixelCallback& pixel_callback, void* image_buffer,
+                      size_t image_buffer_size, size_t xsize, size_t ysize,
+                      JxlPixelFormat format, size_t bits_per_sample,
+                      bool unpremul_alpha, bool undo_orientation) const {
+    dec_state_->width = xsize;
+    dec_state_->height = ysize;
+    dec_state_->main_output.format = format;
+    dec_state_->main_output.bits_per_sample = bits_per_sample;
+    dec_state_->main_output.callback = pixel_callback;
+    dec_state_->main_output.buffer = image_buffer;
+    dec_state_->main_output.buffer_size = image_buffer_size;
+    dec_state_->main_output.stride = GetStride(xsize, format);
+    const jxl::ExtraChannelInfo* alpha =
+        decoded_->metadata()->Find(jxl::ExtraChannel::kAlpha);
+    if (alpha && alpha->alpha_associated && unpremul_alpha) {
+      dec_state_->unpremul_alpha = true;
+    }
+    if (undo_orientation) {
+      dec_state_->undo_orientation = decoded_->metadata()->GetOrientation();
+      if (static_cast<int>(dec_state_->undo_orientation) > 4) {
+        std::swap(dec_state_->width, dec_state_->height);
+      }
+    }
+    dec_state_->extra_output.clear();
+#if !JXL_HIGH_PRECISION
+    if (dec_state_->main_output.buffer &&
+        (format.data_type == JXL_TYPE_UINT8) && (format.num_channels >= 3) &&
+        !dec_state_->unpremul_alpha &&
+        (dec_state_->undo_orientation == Orientation::kIdentity) &&
+        decoded_->metadata()->xyb_encoded &&
+        dec_state_->output_encoding_info.color_encoding.IsSRGB() &&
+        dec_state_->output_encoding_info.all_default_opsin &&
+        (dec_state_->output_encoding_info.desired_intensity_target ==
+         dec_state_->output_encoding_info.orig_intensity_target) &&
+        HasFastXYBTosRGB8() && frame_header_.needs_color_transform()) {
+      dec_state_->fast_xyb_srgb8_conversion = true;
+    }
+#endif
+  }
+
+  void AddExtraChannelOutput(void* buffer, size_t buffer_size, size_t xsize,
+                             JxlPixelFormat format, size_t bits_per_sample) {
+    ImageOutput out;
+    out.format = format;
+    out.bits_per_sample = bits_per_sample;
+    out.buffer = buffer;
+    out.buffer_size = buffer_size;
+    out.stride = GetStride(xsize, format);
+    dec_state_->extra_output.push_back(out);
+  }
+
+ private:
+  Status ProcessDCGlobal(BitReader* br);
+  Status ProcessDCGroup(size_t dc_group_id, BitReader* br);
+  void FinalizeDC();
+  Status AllocateOutput();
+  Status ProcessACGlobal(BitReader* br);
+  Status ProcessACGroup(size_t ac_group_id, BitReader* JXL_RESTRICT* br,
+                        size_t num_passes, size_t thread, bool force_draw,
+                        bool dc_only);
+  void MarkSections(const SectionInfo* sections, size_t num,
+                    SectionStatus* section_status);
+
+  // Allocates storage for parallel decoding using up to `num_threads` threads
+  // of up to `num_tasks` tasks. The value of `thread` passed to
+  // `GetStorageLocation` must be smaller than the `num_threads` value passed
+  // here. The value of `task` passed to `GetStorageLocation` must be smaller
+  // than the value of `num_tasks` passed here.
+  Status PrepareStorage(size_t num_threads, size_t num_tasks) {
+    size_t storage_size = std::min(num_threads, num_tasks);
+    if (storage_size > group_dec_caches_.size()) {
+      group_dec_caches_.resize(storage_size);
+    }
+    use_task_id_ = num_threads > num_tasks;
+    bool use_group_ids = (modular_frame_decoder_.UsesFullImage() &&
+                          (frame_header_.encoding == FrameEncoding::kVarDCT ||
+                           (frame_header_.flags & FrameHeader::kNoise)));
+    if (dec_state_->render_pipeline) {
+      JXL_RETURN_IF_ERROR(dec_state_->render_pipeline->PrepareForThreads(
+          storage_size, use_group_ids));
+    }
+    return true;
+  }
+
+  size_t GetStorageLocation(size_t thread, size_t task) {
+    if (use_task_id_) return task;
+    return thread;
+  }
+
+  static size_t BytesPerChannel(JxlDataType data_type) {
+    return (data_type == JXL_TYPE_UINT8   ? 1u
+            : data_type == JXL_TYPE_FLOAT ? 4u
+                                          : 2u);
+  }
+
+  static size_t GetStride(const size_t xsize, JxlPixelFormat format) {
+    size_t stride =
+        (xsize * BytesPerChannel(format.data_type) * format.num_channels);
+    if (format.align > 1) {
+      stride = (jxl::DivCeil(stride, format.align) * format.align);
+    }
+    return stride;
+  }
+
+  PassesDecoderState* dec_state_;
+  ThreadPool* pool_;
+  std::vector<TocEntry> toc_;
+  uint64_t section_sizes_sum_;
+  // TODO(veluca): figure out the duplication between these and dec_state_.
+  FrameHeader frame_header_;
+  FrameDimensions frame_dim_;
+  ImageBundle* decoded_;
+  ModularFrameDecoder modular_frame_decoder_;
+  bool render_spotcolors_ = true;
+  bool coalescing_ = true;
+
+  std::vector<uint8_t> processed_section_;
+  std::vector<uint8_t> decoded_passes_per_ac_group_;
+  std::vector<uint8_t> decoded_dc_groups_;
+  bool decoded_dc_global_;
+  bool decoded_ac_global_;
+  bool HasEverything() const;
+  bool finalized_dc_ = true;
+  size_t num_sections_done_ = 0;
+  bool is_finalized_ = true;
+  bool allocated_ = false;
+
+  std::vector<GroupDecCache> group_dec_caches_;
+
+  // Whether or not the task id should be used for storage indexing, instead of
+  // the thread id.
+  bool use_task_id_ = false;
+
+  // Testing setting: whether or not to use the slow rendering pipeline.
+  bool use_slow_rendering_pipeline_;
+
+  JxlProgressiveDetail progressive_detail_ = kFrames;
+  // Number of completed passes where section decoding should pause.
+  // Used for progressive details at least kLastPasses.
+  std::vector<int> passes_to_pause_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_FRAME_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group.cc b/third-party/libjxl/libjxl/lib/jxl/dec_group.cc
new file mode 100644
index 0000000000..37bb3d2dc1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_group.cc
@@ -0,0 +1,791 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+
+#ifndef LIB_JXL_DEC_GROUP_CC
+#define LIB_JXL_DEC_GROUP_CC
+namespace jxl {
+
+struct AuxOut;
+
+// Interface for reading groups for DecodeGroupImpl.
+class GetBlock {
+ public:
+  virtual void StartRow(size_t by) = 0;
+  virtual Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs,
+                           size_t size, size_t log2_covered_blocks,
+                           ACPtr block[3], ACType ac_type) = 0;
+  virtual ~GetBlock() {}
+};
+
+// Controls whether DecodeGroupImpl renders to pixels or not.
+enum DrawMode {
+  // Render to pixels.
+  kDraw = 0,
+  // Don't render to pixels.
+  kDontDraw = 1,
+};
+
+}  // namespace jxl
+#endif  // LIB_JXL_DEC_GROUP_CC
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DI = HWY_FULL(int32_t);
+using DI16 = Rebind<int16_t, DI>;
+constexpr D d;
+constexpr DI di;
+constexpr DI16 di16;
+
+// TODO(veluca): consider SIMDfying.
+void Transpose8x8InPlace(int32_t* JXL_RESTRICT block) {
+  for (size_t x = 0; x < 8; x++) {
+    for (size_t y = x + 1; y < 8; y++) {
+      std::swap(block[y * 8 + x], block[x * 8 + y]);
+    }
+  }
+}
+
+template <ACType ac_type>
+void DequantLane(Vec<D> scaled_dequant_x, Vec<D> scaled_dequant_y,
+                 Vec<D> scaled_dequant_b,
+                 const float* JXL_RESTRICT dequant_matrices, size_t size,
+                 size_t k, Vec<D> x_cc_mul, Vec<D> b_cc_mul,
+                 const float* JXL_RESTRICT biases, ACPtr qblock[3],
+                 float* JXL_RESTRICT block) {
+  const auto x_mul = Mul(Load(d, dequant_matrices + k), scaled_dequant_x);
+  const auto y_mul =
+      Mul(Load(d, dequant_matrices + size + k), scaled_dequant_y);
+  const auto b_mul =
+      Mul(Load(d, dequant_matrices + 2 * size + k), scaled_dequant_b);
+
+  Vec<DI> quantized_x_int;
+  Vec<DI> quantized_y_int;
+  Vec<DI> quantized_b_int;
+  if (ac_type == ACType::k16) {
+    Rebind<int16_t, DI> di16;
+    quantized_x_int = PromoteTo(di, Load(di16, qblock[0].ptr16 + k));
+    quantized_y_int = PromoteTo(di, Load(di16, qblock[1].ptr16 + k));
+    quantized_b_int = PromoteTo(di, Load(di16, qblock[2].ptr16 + k));
+  } else {
+    quantized_x_int = Load(di, qblock[0].ptr32 + k);
+    quantized_y_int = Load(di, qblock[1].ptr32 + k);
+    quantized_b_int = Load(di, qblock[2].ptr32 + k);
+  }
+
+  const auto dequant_x_cc =
+      Mul(AdjustQuantBias(di, 0, quantized_x_int, biases), x_mul);
+  const auto dequant_y =
+      Mul(AdjustQuantBias(di, 1, quantized_y_int, biases), y_mul);
+  const auto dequant_b_cc =
+      Mul(AdjustQuantBias(di, 2, quantized_b_int, biases), b_mul);
+
+  const auto dequant_x = MulAdd(x_cc_mul, dequant_y, dequant_x_cc);
+  const auto dequant_b = MulAdd(b_cc_mul, dequant_y, dequant_b_cc);
+  Store(dequant_x, d, block + k);
+  Store(dequant_y, d, block + size + k);
+  Store(dequant_b, d, block + 2 * size + k);
+}
+
+template <ACType ac_type>
+void DequantBlock(const AcStrategy& acs, float inv_global_scale, int quant,
+                  float x_dm_multiplier, float b_dm_multiplier, Vec<D> x_cc_mul,
+                  Vec<D> b_cc_mul, size_t kind, size_t size,
+                  const Quantizer& quantizer, size_t covered_blocks,
+                  const size_t* sbx,
+                  const float* JXL_RESTRICT* JXL_RESTRICT dc_row,
+                  size_t dc_stride, const float* JXL_RESTRICT biases,
+                  ACPtr qblock[3], float* JXL_RESTRICT block,
+                  float* JXL_RESTRICT scratch) {
+  const auto scaled_dequant_s = inv_global_scale / quant;
+
+  const auto scaled_dequant_x = Set(d, scaled_dequant_s * x_dm_multiplier);
+  const auto scaled_dequant_y = Set(d, scaled_dequant_s);
+  const auto scaled_dequant_b = Set(d, scaled_dequant_s * b_dm_multiplier);
+
+  const float* dequant_matrices = quantizer.DequantMatrix(kind, 0);
+
+  for (size_t k = 0; k < covered_blocks * kDCTBlockSize; k += Lanes(d)) {
+    DequantLane<ac_type>(scaled_dequant_x, scaled_dequant_y, scaled_dequant_b,
+                         dequant_matrices, size, k, x_cc_mul, b_cc_mul, biases,
+                         qblock, block);
+  }
+  for (size_t c = 0; c < 3; c++) {
+    LowestFrequenciesFromDC(acs.Strategy(), dc_row[c] + sbx[c], dc_stride,
+                            block + c * size, scratch);
+  }
+}
+
+Status DecodeGroupImpl(GetBlock* JXL_RESTRICT get_block,
+                       GroupDecCache* JXL_RESTRICT group_dec_cache,
+                       PassesDecoderState* JXL_RESTRICT dec_state,
+                       size_t thread, size_t group_idx,
+                       RenderPipelineInput& render_pipeline_input,
+                       ImageBundle* decoded, DrawMode draw) {
+  // TODO(veluca): investigate cache usage in this function.
+  const Rect block_rect = dec_state->shared->BlockGroupRect(group_idx);
+  const AcStrategyImage& ac_strategy = dec_state->shared->ac_strategy;
+
+  const size_t xsize_blocks = block_rect.xsize();
+  const size_t ysize_blocks = block_rect.ysize();
+
+  const size_t dc_stride = dec_state->shared->dc->PixelsPerRow();
+
+  const float inv_global_scale = dec_state->shared->quantizer.InvGlobalScale();
+
+  const YCbCrChromaSubsampling& cs =
+      dec_state->shared->frame_header.chroma_subsampling;
+
+  size_t idct_stride[3];
+  for (size_t c = 0; c < 3; c++) {
+    idct_stride[c] = render_pipeline_input.GetBuffer(c).first->PixelsPerRow();
+  }
+
+  HWY_ALIGN int32_t scaled_qtable[64 * 3];
+
+  ACType ac_type = dec_state->coefficients->Type();
+  auto dequant_block = ac_type == ACType::k16 ? DequantBlock<ACType::k16>
+                                              : DequantBlock<ACType::k32>;
+  // Whether or not coefficients should be stored for future usage, and/or read
+  // from past usage.
+  bool accumulate = !dec_state->coefficients->IsEmpty();
+  // Offset of the current block in the group.
+  size_t offset = 0;
+
+  std::array<int, 3> jpeg_c_map;
+  bool jpeg_is_gray = false;
+  std::array<int, 3> dcoff = {};
+
+  // TODO(veluca): all of this should be done only once per image.
+  if (decoded->IsJPEG()) {
+    if (!dec_state->shared->cmap.IsJPEGCompatible()) {
+      return JXL_FAILURE("The CfL map is not JPEG-compatible");
+    }
+    jpeg_is_gray = (decoded->jpeg_data->components.size() == 1);
+    jpeg_c_map = JpegOrder(dec_state->shared->frame_header.color_transform,
+                           jpeg_is_gray);
+    const std::vector<QuantEncoding>& qe =
+        dec_state->shared->matrices.encodings();
+    if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+        std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+      return JXL_FAILURE(
+          "Quantization table is not a JPEG quantization table.");
+    }
+    for (size_t c = 0; c < 3; c++) {
+      if (dec_state->shared->frame_header.color_transform ==
+          ColorTransform::kNone) {
+        dcoff[c] = 1024 / (*qe[0].qraw.qtable)[64 * c];
+      }
+      for (size_t i = 0; i < 64; i++) {
+        // Transpose the matrix, as it will be used on the transposed block.
+        int n = qe[0].qraw.qtable->at(64 + i);
+        int d = qe[0].qraw.qtable->at(64 * c + i);
+        if (n <= 0 || d <= 0 || n >= 65536 || d >= 65536) {
+          return JXL_FAILURE("Invalid JPEG quantization table");
+        }
+        scaled_qtable[64 * c + (i % 8) * 8 + (i / 8)] =
+            (1 << kCFLFixedPointPrecision) * n / d;
+      }
+    }
+  }
+
+  size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)};
+  size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)};
+  Rect r[3];
+  for (size_t i = 0; i < 3; i++) {
+    r[i] =
+        Rect(block_rect.x0() >> hshift[i], block_rect.y0() >> vshift[i],
+             block_rect.xsize() >> hshift[i], block_rect.ysize() >> vshift[i]);
+    if (!r[i].IsInside({0, 0, dec_state->shared->dc->Plane(i).xsize(),
+                        dec_state->shared->dc->Plane(i).ysize()})) {
+      return JXL_FAILURE("Frame dimensions are too big for the image.");
+    }
+  }
+
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    get_block->StartRow(by);
+    size_t sby[3] = {by >> vshift[0], by >> vshift[1], by >> vshift[2]};
+
+    const int32_t* JXL_RESTRICT row_quant =
+        block_rect.ConstRow(dec_state->shared->raw_quant_field, by);
+
+    const float* JXL_RESTRICT dc_rows[3] = {
+        r[0].ConstPlaneRow(*dec_state->shared->dc, 0, sby[0]),
+        r[1].ConstPlaneRow(*dec_state->shared->dc, 1, sby[1]),
+        r[2].ConstPlaneRow(*dec_state->shared->dc, 2, sby[2]),
+    };
+
+    const size_t ty = (block_rect.y0() + by) / kColorTileDimInBlocks;
+    AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+
+    const int8_t* JXL_RESTRICT row_cmap[3] = {
+        dec_state->shared->cmap.ytox_map.ConstRow(ty),
+        nullptr,
+        dec_state->shared->cmap.ytob_map.ConstRow(ty),
+    };
+
+    float* JXL_RESTRICT idct_row[3];
+    int16_t* JXL_RESTRICT jpeg_row[3];
+    for (size_t c = 0; c < 3; c++) {
+      idct_row[c] = render_pipeline_input.GetBuffer(c).second.Row(
+          render_pipeline_input.GetBuffer(c).first, sby[c] * kBlockDim);
+      if (decoded->IsJPEG()) {
+        auto& component = decoded->jpeg_data->components[jpeg_c_map[c]];
+        jpeg_row[c] =
+            component.coeffs.data() +
+            (component.width_in_blocks * (r[c].y0() + sby[c]) + r[c].x0()) *
+                kDCTBlockSize;
+      }
+    }
+
+    size_t bx = 0;
+    for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+         tx++) {
+      size_t abs_tx = tx + block_rect.x0() / kColorTileDimInBlocks;
+      auto x_cc_mul =
+          Set(d, dec_state->shared->cmap.YtoXRatio(row_cmap[0][abs_tx]));
+      auto b_cc_mul =
+          Set(d, dec_state->shared->cmap.YtoBRatio(row_cmap[2][abs_tx]));
+      // Increment bx by llf_x because those iterations would otherwise
+      // immediately continue (!IsFirstBlock). Reduces mispredictions.
+      for (; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;) {
+        size_t sbx[3] = {bx >> hshift[0], bx >> hshift[1], bx >> hshift[2]};
+        AcStrategy acs = acs_row[bx];
+        const size_t llf_x = acs.covered_blocks_x();
+
+        // Can only happen in the second or lower rows of a varblock.
+        if (JXL_UNLIKELY(!acs.IsFirstBlock())) {
+          bx += llf_x;
+          continue;
+        }
+        const size_t log2_covered_blocks = acs.log2_covered_blocks();
+
+        const size_t covered_blocks = 1 << log2_covered_blocks;
+        const size_t size = covered_blocks * kDCTBlockSize;
+
+        ACPtr qblock[3];
+        if (accumulate) {
+          for (size_t c = 0; c < 3; c++) {
+            qblock[c] = dec_state->coefficients->PlaneRow(c, group_idx, offset);
+          }
+        } else {
+          // No point in reading from bitstream without accumulating and not
+          // drawing.
+          JXL_ASSERT(draw == kDraw);
+          if (ac_type == ACType::k16) {
+            memset(group_dec_cache->dec_group_qblock16, 0,
+                   size * 3 * sizeof(int16_t));
+            for (size_t c = 0; c < 3; c++) {
+              qblock[c].ptr16 = group_dec_cache->dec_group_qblock16 + c * size;
+            }
+          } else {
+            memset(group_dec_cache->dec_group_qblock, 0,
+                   size * 3 * sizeof(int32_t));
+            for (size_t c = 0; c < 3; c++) {
+              qblock[c].ptr32 = group_dec_cache->dec_group_qblock + c * size;
+            }
+          }
+        }
+        JXL_RETURN_IF_ERROR(get_block->LoadBlock(
+            bx, by, acs, size, log2_covered_blocks, qblock, ac_type));
+        offset += size;
+        if (draw == kDontDraw) {
+          bx += llf_x;
+          continue;
+        }
+
+        if (JXL_UNLIKELY(decoded->IsJPEG())) {
+          if (acs.Strategy() != AcStrategy::Type::DCT) {
+            return JXL_FAILURE(
+                "Can only decode to JPEG if only DCT-8 is used.");
+          }
+
+          HWY_ALIGN int32_t transposed_dct_y[64];
+          for (size_t c : {1, 0, 2}) {
+            // Propagate only Y for grayscale.
+            if (jpeg_is_gray && c != 1) {
+              continue;
+            }
+            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+              continue;
+            }
+            int16_t* JXL_RESTRICT jpeg_pos =
+                jpeg_row[c] + sbx[c] * kDCTBlockSize;
+            // JPEG XL is transposed, JPEG is not.
+            auto transposed_dct = qblock[c].ptr32;
+            Transpose8x8InPlace(transposed_dct);
+            // No CfL - no need to store the y block converted to integers.
+            if (!cs.Is444() ||
+                (row_cmap[0][abs_tx] == 0 && row_cmap[2][abs_tx] == 0)) {
+              for (size_t i = 0; i < 64; i += Lanes(d)) {
+                const auto ini = Load(di, transposed_dct + i);
+                const auto ini16 = DemoteTo(di16, ini);
+                StoreU(ini16, di16, jpeg_pos + i);
+              }
+            } else if (c == 1) {
+              // Y channel: save for restoring X/B, but nothing else to do.
+              for (size_t i = 0; i < 64; i += Lanes(d)) {
+                const auto ini = Load(di, transposed_dct + i);
+                Store(ini, di, transposed_dct_y + i);
+                const auto ini16 = DemoteTo(di16, ini);
+                StoreU(ini16, di16, jpeg_pos + i);
+              }
+            } else {
+              // transposed_dct_y contains the y channel block, transposed.
+              const auto scale = Set(
+                  di, dec_state->shared->cmap.RatioJPEG(row_cmap[c][abs_tx]));
+              const auto round = Set(di, 1 << (kCFLFixedPointPrecision - 1));
+              for (int i = 0; i < 64; i += Lanes(d)) {
+                auto in = Load(di, transposed_dct + i);
+                auto in_y = Load(di, transposed_dct_y + i);
+                auto qt = Load(di, scaled_qtable + c * size + i);
+                auto coeff_scale = ShiftRight<kCFLFixedPointPrecision>(
+                    Add(Mul(qt, scale), round));
+                auto cfl_factor = ShiftRight<kCFLFixedPointPrecision>(
+                    Add(Mul(in_y, coeff_scale), round));
+                StoreU(DemoteTo(di16, Add(in, cfl_factor)), di16, jpeg_pos + i);
+              }
+            }
+            jpeg_pos[0] =
+                Clamp1<float>(dc_rows[c][sbx[c]] - dcoff[c], -2047, 2047);
+          }
+        } else {
+          HWY_ALIGN float* const block = group_dec_cache->dec_group_block;
+          // Dequantize and add predictions.
+          dequant_block(
+              acs, inv_global_scale, row_quant[bx], dec_state->x_dm_multiplier,
+              dec_state->b_dm_multiplier, x_cc_mul, b_cc_mul, acs.RawStrategy(),
+              size, dec_state->shared->quantizer,
+              acs.covered_blocks_y() * acs.covered_blocks_x(), sbx, dc_rows,
+              dc_stride,
+              dec_state->output_encoding_info.opsin_params.quant_biases, qblock,
+              block, group_dec_cache->scratch_space);
+
+          for (size_t c : {1, 0, 2}) {
+            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+              continue;
+            }
+            // IDCT
+            float* JXL_RESTRICT idct_pos = idct_row[c] + sbx[c] * kBlockDim;
+            TransformToPixels(acs.Strategy(), block + c * size, idct_pos,
+                              idct_stride[c], group_dec_cache->scratch_space);
+          }
+        }
+        bx += llf_x;
+      }
+    }
+  }
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+// Decode quantized AC coefficients of DCT blocks.
+// LLF components in the output block will not be modified.
+template <ACType ac_type, bool uses_lz77>
+Status DecodeACVarBlock(size_t ctx_offset, size_t log2_covered_blocks,
+                        int32_t* JXL_RESTRICT row_nzeros,
+                        const int32_t* JXL_RESTRICT row_nzeros_top,
+                        size_t nzeros_stride, size_t c, size_t bx, size_t by,
+                        size_t lbx, AcStrategy acs,
+                        const coeff_order_t* JXL_RESTRICT coeff_order,
+                        BitReader* JXL_RESTRICT br,
+                        ANSSymbolReader* JXL_RESTRICT decoder,
+                        const std::vector<uint8_t>& context_map,
+                        const uint8_t* qdc_row, const int32_t* qf_row,
+                        const BlockCtxMap& block_ctx_map, ACPtr block,
+                        size_t shift = 0) {
+  // Equal to number of LLF coefficients.
+  const size_t covered_blocks = 1 << log2_covered_blocks;
+  const size_t size = covered_blocks * kDCTBlockSize;
+  int32_t predicted_nzeros =
+      PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);
+
+  size_t ord = kStrategyOrder[acs.RawStrategy()];
+  const coeff_order_t* JXL_RESTRICT order =
+      &coeff_order[CoeffOrderOffset(ord, c)];
+
+  size_t block_ctx = block_ctx_map.Context(qdc_row[lbx], qf_row[bx], ord, c);
+  const int32_t nzero_ctx =
+      block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx) + ctx_offset;
+
+  size_t nzeros =
+      decoder->ReadHybridUintInlined<uses_lz77>(nzero_ctx, br, context_map);
+  if (nzeros + covered_blocks > size) {
+    return JXL_FAILURE("Invalid AC: nzeros too large");
+  }
+  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+      row_nzeros[bx + x + y * nzeros_stride] =
+          (nzeros + covered_blocks - 1) >> log2_covered_blocks;
+    }
+  }
+
+  const size_t histo_offset =
+      ctx_offset + block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+
+  size_t prev = (nzeros > size / 16 ? 0 : 1);
+  for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+    const size_t ctx =
+        histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+                                          log2_covered_blocks, prev);
+    const size_t u_coeff =
+        decoder->ReadHybridUintInlined<uses_lz77>(ctx, br, context_map);
+    // Hand-rolled version of UnpackSigned, shifting before the conversion to
+    // signed integer to avoid undefined behavior of shifting negative numbers.
+    const size_t magnitude = u_coeff >> 1;
+    const size_t neg_sign = (~u_coeff) & 1;
+    const intptr_t coeff =
+        static_cast<intptr_t>((magnitude ^ (neg_sign - 1)) << shift);
+    if (ac_type == ACType::k16) {
+      block.ptr16[order[k]] += coeff;
+    } else {
+      block.ptr32[order[k]] += coeff;
+    }
+    prev = static_cast<size_t>(u_coeff != 0);
+    nzeros -= prev;
+  }
+  if (JXL_UNLIKELY(nzeros != 0)) {
+    return JXL_FAILURE("Invalid AC: nzeros not 0. Block (%" PRIuS ", %" PRIuS
+                       "), channel %" PRIuS,
+                       bx, by, c);
+  }
+
+  return true;
+}
+
+// Structs used by DecodeGroupImpl to get a quantized block.
+// GetBlockFromBitstream uses ANS decoding (and thus keeps track of row
+// pointers in row_nzeros), GetBlockFromEncoder simply reads the coefficient
+// image provided by the encoder.
+
+struct GetBlockFromBitstream : public GetBlock {
+  void StartRow(size_t by) override {
+    qf_row = rect.ConstRow(*qf, by);
+    for (size_t c = 0; c < 3; c++) {
+      size_t sby = by >> vshift[c];
+      quant_dc_row = quant_dc->ConstRow(rect.y0() + by) + rect.x0();
+      for (size_t i = 0; i < num_passes; i++) {
+        row_nzeros[i][c] = group_dec_cache->num_nzeroes[i].PlaneRow(c, sby);
+        row_nzeros_top[i][c] =
+            sby == 0
+                ? nullptr
+                : group_dec_cache->num_nzeroes[i].ConstPlaneRow(c, sby - 1);
+      }
+    }
+  }
+
+  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+                   size_t log2_covered_blocks, ACPtr block[3],
+                   ACType ac_type) override {
+    ;
+    for (size_t c : {1, 0, 2}) {
+      size_t sbx = bx >> hshift[c];
+      size_t sby = by >> vshift[c];
+      if (JXL_UNLIKELY((sbx << hshift[c] != bx) || (sby << vshift[c] != by))) {
+        continue;
+      }
+
+      for (size_t pass = 0; JXL_UNLIKELY(pass < num_passes); pass++) {
+        auto decode_ac_varblock =
+            decoders[pass].UsesLZ77()
+                ? (ac_type == ACType::k16 ? DecodeACVarBlock<ACType::k16, 1>
+                                          : DecodeACVarBlock<ACType::k32, 1>)
+                : (ac_type == ACType::k16 ? DecodeACVarBlock<ACType::k16, 0>
+                                          : DecodeACVarBlock<ACType::k32, 0>);
+        JXL_RETURN_IF_ERROR(decode_ac_varblock(
+            ctx_offset[pass], log2_covered_blocks, row_nzeros[pass][c],
+            row_nzeros_top[pass][c], nzeros_stride, c, sbx, sby, bx, acs,
+            &coeff_orders[pass * coeff_order_size], readers[pass],
+            &decoders[pass], context_map[pass], quant_dc_row, qf_row,
+            *block_ctx_map, block[c], shift_for_pass[pass]));
+      }
+    }
+    return true;
+  }
+
+  Status Init(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, size_t num_passes,
+              size_t group_idx, size_t histo_selector_bits, const Rect& rect,
+              GroupDecCache* JXL_RESTRICT group_dec_cache,
+              PassesDecoderState* dec_state, size_t first_pass) {
+    for (size_t i = 0; i < 3; i++) {
+      hshift[i] = dec_state->shared->frame_header.chroma_subsampling.HShift(i);
+      vshift[i] = dec_state->shared->frame_header.chroma_subsampling.VShift(i);
+    }
+    this->coeff_order_size = dec_state->shared->coeff_order_size;
+    this->coeff_orders =
+        dec_state->shared->coeff_orders.data() + first_pass * coeff_order_size;
+    this->context_map = dec_state->context_map.data() + first_pass;
+    this->readers = readers;
+    this->num_passes = num_passes;
+    this->shift_for_pass =
+        dec_state->shared->frame_header.passes.shift + first_pass;
+    this->group_dec_cache = group_dec_cache;
+    this->rect = rect;
+    block_ctx_map = &dec_state->shared->block_ctx_map;
+    qf = &dec_state->shared->raw_quant_field;
+    quant_dc = &dec_state->shared->quant_dc;
+
+    for (size_t pass = 0; pass < num_passes; pass++) {
+      // Select which histogram set to use among those of the current pass.
+      size_t cur_histogram = 0;
+      if (histo_selector_bits != 0) {
+        cur_histogram = readers[pass]->ReadBits(histo_selector_bits);
+      }
+      if (cur_histogram >= dec_state->shared->num_histograms) {
+        return JXL_FAILURE("Invalid histogram selector");
+      }
+      ctx_offset[pass] = cur_histogram * block_ctx_map->NumACContexts();
+
+      decoders[pass] =
+          ANSSymbolReader(&dec_state->code[pass + first_pass], readers[pass]);
+    }
+    nzeros_stride = group_dec_cache->num_nzeroes[0].PixelsPerRow();
+    for (size_t i = 0; i < num_passes; i++) {
+      JXL_ASSERT(
+          nzeros_stride ==
+          static_cast<size_t>(group_dec_cache->num_nzeroes[i].PixelsPerRow()));
+    }
+    return true;
+  }
+
+  const uint32_t* shift_for_pass = nullptr;  // not owned
+  const coeff_order_t* JXL_RESTRICT coeff_orders;
+  size_t coeff_order_size;
+  const std::vector<uint8_t>* JXL_RESTRICT context_map;
+  ANSSymbolReader decoders[kMaxNumPasses];
+  BitReader* JXL_RESTRICT* JXL_RESTRICT readers;
+  size_t num_passes;
+  size_t ctx_offset[kMaxNumPasses];
+  size_t nzeros_stride;
+  int32_t* JXL_RESTRICT row_nzeros[kMaxNumPasses][3];
+  const int32_t* JXL_RESTRICT row_nzeros_top[kMaxNumPasses][3];
+  GroupDecCache* JXL_RESTRICT group_dec_cache;
+  const BlockCtxMap* block_ctx_map;
+  const ImageI* qf;
+  const ImageB* quant_dc;
+  const int32_t* qf_row;
+  const uint8_t* quant_dc_row;
+  Rect rect;
+  size_t hshift[3], vshift[3];
+};
+
+struct GetBlockFromEncoder : public GetBlock {
+  void StartRow(size_t by) override {}
+
+  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+                   size_t log2_covered_blocks, ACPtr block[3],
+                   ACType ac_type) override {
+    JXL_DASSERT(ac_type == ACType::k32);
+    for (size_t c = 0; c < 3; c++) {
+      // for each pass
+      for (size_t i = 0; i < quantized_ac->size(); i++) {
+        for (size_t k = 0; k < size; k++) {
+          // TODO(veluca): SIMD.
+          block[c].ptr32[k] +=
+              rows[i][c][offset + k] * (1 << shift_for_pass[i]);
+        }
+      }
+    }
+    offset += size;
+    return true;
+  }
+
+  GetBlockFromEncoder(const std::vector<std::unique_ptr<ACImage>>& ac,
+                      size_t group_idx, const uint32_t* shift_for_pass)
+      : quantized_ac(&ac), shift_for_pass(shift_for_pass) {
+    // TODO(veluca): not supported with chroma subsampling.
+    for (size_t i = 0; i < quantized_ac->size(); i++) {
+      JXL_CHECK((*quantized_ac)[i]->Type() == ACType::k32);
+      for (size_t c = 0; c < 3; c++) {
+        rows[i][c] = (*quantized_ac)[i]->PlaneRow(c, group_idx, 0).ptr32;
+      }
+    }
+  }
+
+  const std::vector<std::unique_ptr<ACImage>>* JXL_RESTRICT quantized_ac;
+  size_t offset = 0;
+  const int32_t* JXL_RESTRICT rows[kMaxNumPasses][3];
+  const uint32_t* shift_for_pass = nullptr;  // not owned
+};
+
+HWY_EXPORT(DecodeGroupImpl);
+
+}  // namespace
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+                   size_t num_passes, size_t group_idx,
+                   PassesDecoderState* JXL_RESTRICT dec_state,
+                   GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+                   RenderPipelineInput& render_pipeline_input,
+                   ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+                   bool force_draw, bool dc_only, bool* should_run_pipeline) {
+  DrawMode draw = (num_passes + first_pass ==
+                   dec_state->shared->frame_header.passes.num_passes) ||
+                          force_draw
+                      ? kDraw
+                      : kDontDraw;
+
+  if (should_run_pipeline) {
+    *should_run_pipeline = draw != kDontDraw;
+  }
+
+  if (draw == kDraw && num_passes == 0 && first_pass == 0) {
+    group_dec_cache->InitDCBufferOnce();
+    const YCbCrChromaSubsampling& cs =
+        dec_state->shared->frame_header.chroma_subsampling;
+    for (size_t c : {0, 1, 2}) {
+      size_t hs = cs.HShift(c);
+      size_t vs = cs.VShift(c);
+      // We reuse filter_input_storage here as it is not currently in use.
+      const Rect src_rect_precs = dec_state->shared->BlockGroupRect(group_idx);
+      const Rect src_rect =
+          Rect(src_rect_precs.x0() >> hs, src_rect_precs.y0() >> vs,
+               src_rect_precs.xsize() >> hs, src_rect_precs.ysize() >> vs);
+      const Rect copy_rect(kRenderPipelineXOffset, 2, src_rect.xsize(),
+                           src_rect.ysize());
+      CopyImageToWithPadding(src_rect, dec_state->shared->dc->Plane(c), 2,
+                             copy_rect, &group_dec_cache->dc_buffer);
+      // Mirrorpad. Interleaving left and right padding ensures that padding
+      // works out correctly even for images with DC size of 1.
+      for (size_t y = 0; y < src_rect.ysize() + 4; y++) {
+        size_t xend = kRenderPipelineXOffset +
+                      (dec_state->shared->dc->Plane(c).xsize() >> hs) -
+                      src_rect.x0();
+        for (size_t ix = 0; ix < 2; ix++) {
+          if (src_rect.x0() == 0) {
+            group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset - ix - 1] =
+                group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset + ix];
+          }
+          if (src_rect.x0() + src_rect.xsize() + 2 >=
+              (dec_state->shared->dc->xsize() >> hs)) {
+            group_dec_cache->dc_buffer.Row(y)[xend + ix] =
+                group_dec_cache->dc_buffer.Row(y)[xend - ix - 1];
+          }
+        }
+      }
+      Rect dst_rect = render_pipeline_input.GetBuffer(c).second;
+      ImageF* upsampling_dst = render_pipeline_input.GetBuffer(c).first;
+      JXL_ASSERT(dst_rect.IsInside(*upsampling_dst));
+
+      RenderPipelineStage::RowInfo input_rows(1, std::vector<float*>(5));
+      RenderPipelineStage::RowInfo output_rows(1, std::vector<float*>(8));
+      for (size_t y = src_rect.y0(); y < src_rect.y0() + src_rect.ysize();
+           y++) {
+        for (ssize_t iy = 0; iy < 5; iy++) {
+          input_rows[0][iy] = group_dec_cache->dc_buffer.Row(
+              Mirror(ssize_t(y) + iy - 2,
+                     dec_state->shared->dc->Plane(c).ysize() >> vs) +
+              2 - src_rect.y0());
+        }
+        for (size_t iy = 0; iy < 8; iy++) {
+          output_rows[0][iy] =
+              dst_rect.Row(upsampling_dst, ((y - src_rect.y0()) << 3) + iy) -
+              kRenderPipelineXOffset;
+        }
+        // Arguments set to 0/nullptr are not used.
+        dec_state->upsampler8x->ProcessRow(input_rows, output_rows,
+                                           /*xextra=*/0, src_rect.xsize(), 0, 0,
+                                           thread);
+      }
+    }
+    return true;
+  }
+
+  size_t histo_selector_bits = 0;
+  if (dc_only) {
+    JXL_ASSERT(num_passes == 0);
+  } else {
+    JXL_ASSERT(dec_state->shared->num_histograms > 0);
+    histo_selector_bits = CeilLog2Nonzero(dec_state->shared->num_histograms);
+  }
+
+  auto get_block = jxl::make_unique<GetBlockFromBitstream>();
+  JXL_RETURN_IF_ERROR(
+      get_block->Init(readers, num_passes, group_idx, histo_selector_bits,
+                      dec_state->shared->BlockGroupRect(group_idx),
+                      group_dec_cache, dec_state, first_pass));
+
+  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
+      get_block.get(), group_dec_cache, dec_state, thread, group_idx,
+      render_pipeline_input, decoded, draw));
+
+  for (size_t pass = 0; pass < num_passes; pass++) {
+    if (!get_block->decoders[pass].CheckANSFinalState()) {
+      return JXL_FAILURE("ANS checksum failure.");
+    }
+  }
+  return true;
+}
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+                               size_t group_idx,
+                               PassesDecoderState* JXL_RESTRICT dec_state,
+                               GroupDecCache* JXL_RESTRICT group_dec_cache,
+                               size_t thread,
+                               RenderPipelineInput& render_pipeline_input,
+                               ImageBundle* JXL_RESTRICT decoded,
+                               AuxOut* aux_out) {
+  GetBlockFromEncoder get_block(ac, group_idx,
+                                dec_state->shared->frame_header.passes.shift);
+  group_dec_cache->InitOnce(
+      /*num_passes=*/0,
+      /*used_acs=*/(1u << AcStrategy::kNumValidStrategies) - 1);
+
+  return HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
+      &get_block, group_dec_cache, dec_state, thread, group_idx,
+      render_pipeline_input, decoded, kDraw);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group.h b/third-party/libjxl/libjxl/lib/jxl/dec_group.h
new file mode 100644
index 0000000000..e32ea67b5f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_group.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_H_
+#define LIB_JXL_DEC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+                   size_t num_passes, size_t group_idx,
+                   PassesDecoderState* JXL_RESTRICT dec_state,
+                   GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+                   RenderPipelineInput& render_pipeline_input,
+                   ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+                   bool force_draw, bool dc_only, bool* should_run_pipeline);
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+                               size_t group_idx,
+                               PassesDecoderState* JXL_RESTRICT dec_state,
+                               GroupDecCache* JXL_RESTRICT group_dec_cache,
+                               size_t thread,
+                               RenderPipelineInput& render_pipeline_input,
+                               ImageBundle* JXL_RESTRICT decoded,
+                               AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_GROUP_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc
new file mode 100644
index 0000000000..4bee3ae6ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc
@@ -0,0 +1,184 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group_border.h"
+
+#include <atomic>
+
+namespace jxl {
+
+void GroupBorderAssigner::Init(const FrameDimensions& frame_dim) {
+  frame_dim_ = frame_dim;
+  size_t num_corners =
+      (frame_dim_.xsize_groups + 1) * (frame_dim_.ysize_groups + 1);
+  counters_.reset(new std::atomic<uint8_t>[num_corners]);
+  // Initialize counters.
+  for (size_t y = 0; y < frame_dim_.ysize_groups + 1; y++) {
+    for (size_t x = 0; x < frame_dim_.xsize_groups + 1; x++) {
+      // Counters at image borders don't have anything on the other side, we
+      // pre-fill their value to have more uniform handling afterwards.
+      uint8_t init_value = 0;
+      if (x == 0) {
+        init_value |= kTopLeft | kBottomLeft;
+      }
+      if (x == frame_dim_.xsize_groups) {
+        init_value |= kTopRight | kBottomRight;
+      }
+      if (y == 0) {
+        init_value |= kTopLeft | kTopRight;
+      }
+      if (y == frame_dim_.ysize_groups) {
+        init_value |= kBottomLeft | kBottomRight;
+      }
+      counters_[y * (frame_dim_.xsize_groups + 1) + x] = init_value;
+    }
+  }
+}
+
+void GroupBorderAssigner::ClearDone(size_t group_id) {
+  size_t x = group_id % frame_dim_.xsize_groups;
+  size_t y = group_id / frame_dim_.xsize_groups;
+  size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+  size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+  counters_[top_left_idx].fetch_and(~kBottomRight);
+  counters_[top_right_idx].fetch_and(~kBottomLeft);
+  counters_[bottom_left_idx].fetch_and(~kTopRight);
+  counters_[bottom_right_idx].fetch_and(~kTopLeft);
+}
+
+// Looking at each corner between groups, we can guarantee that the four
+// involved groups will agree between each other regarding the order in which
+// each of the four groups terminated. Thus, the last of the four groups
+// gets the responsibility of handling the corner. For borders, every border
+// is assigned to its top corner (for vertical borders) or to its left corner
+// (for horizontal borders): the order as seen on those corners will decide who
+// handles that border.
+
+void GroupBorderAssigner::GroupDone(size_t group_id, size_t padx, size_t pady,
+                                    Rect* rects_to_finalize,
+                                    size_t* num_to_finalize) {
+  size_t x = group_id % frame_dim_.xsize_groups;
+  size_t y = group_id / frame_dim_.xsize_groups;
+  Rect block_rect(x * frame_dim_.group_dim / kBlockDim,
+                  y * frame_dim_.group_dim / kBlockDim,
+                  frame_dim_.group_dim / kBlockDim,
+                  frame_dim_.group_dim / kBlockDim, frame_dim_.xsize_blocks,
+                  frame_dim_.ysize_blocks);
+
+  size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+  size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+
+  auto fetch_status = [this](size_t idx, uint8_t bit) {
+    // Note that the acq-rel semantics of this fetch are actually needed to
+    // ensure that the pixel data of the group is already written to memory.
+    size_t status = counters_[idx].fetch_or(bit);
+    JXL_DASSERT((bit & status) == 0);
+    return bit | status;
+  };
+
+  size_t top_left_status = fetch_status(top_left_idx, kBottomRight);
+  size_t top_right_status = fetch_status(top_right_idx, kBottomLeft);
+  size_t bottom_right_status = fetch_status(bottom_right_idx, kTopLeft);
+  size_t bottom_left_status = fetch_status(bottom_left_idx, kTopRight);
+
+  size_t x1 = block_rect.x0() + block_rect.xsize();
+  size_t y1 = block_rect.y0() + block_rect.ysize();
+
+  bool is_last_group_x = frame_dim_.xsize_groups == x + 1;
+  bool is_last_group_y = frame_dim_.ysize_groups == y + 1;
+
+  // Start of border of neighbouring group, end of border of this group, start
+  // of border of this group (on the other side), end of border of next group.
+  size_t xpos[4] = {
+      block_rect.x0() == 0 ? 0 : block_rect.x0() * kBlockDim - padx,
+      block_rect.x0() == 0
+          ? 0
+          : std::min(frame_dim_.xsize, block_rect.x0() * kBlockDim + padx),
+      is_last_group_x ? frame_dim_.xsize : x1 * kBlockDim - padx,
+      std::min(frame_dim_.xsize, x1 * kBlockDim + padx)};
+  size_t ypos[4] = {
+      block_rect.y0() == 0 ? 0 : block_rect.y0() * kBlockDim - pady,
+      block_rect.y0() == 0
+          ? 0
+          : std::min(frame_dim_.ysize, block_rect.y0() * kBlockDim + pady),
+      is_last_group_y ? frame_dim_.ysize : y1 * kBlockDim - pady,
+      std::min(frame_dim_.ysize, y1 * kBlockDim + pady)};
+
+  *num_to_finalize = 0;
+  auto append_rect = [&](size_t x0, size_t x1, size_t y0, size_t y1) {
+    Rect rect(xpos[x0], ypos[y0], xpos[x1] - xpos[x0], ypos[y1] - ypos[y0]);
+    if (rect.xsize() == 0 || rect.ysize() == 0) return;
+    JXL_DASSERT(*num_to_finalize < kMaxToFinalize);
+    rects_to_finalize[(*num_to_finalize)++] = rect;
+  };
+
+  // Because of how group borders are assigned, it is impossible that we need to
+  // process the left and right side of some area but not the center area. Thus,
+  // we compute the first/last part to process in every horizontal strip and
+  // merge them together. We first collect a mask of what parts should be
+  // processed.
+  // We do this horizontally rather than vertically because horizontal borders
+  // are larger.
+  bool available_parts_mask[3][3] = {};  // [x][y]
+  // Center
+  available_parts_mask[1][1] = true;
+  // Corners
+  if (top_left_status == 0xF) available_parts_mask[0][0] = true;
+  if (top_right_status == 0xF) available_parts_mask[2][0] = true;
+  if (bottom_right_status == 0xF) available_parts_mask[2][2] = true;
+  if (bottom_left_status == 0xF) available_parts_mask[0][2] = true;
+  // Other borders
+  if (top_left_status & kTopRight) available_parts_mask[1][0] = true;
+  if (top_left_status & kBottomLeft) available_parts_mask[0][1] = true;
+  if (top_right_status & kBottomRight) available_parts_mask[2][1] = true;
+  if (bottom_left_status & kBottomRight) available_parts_mask[1][2] = true;
+
+  // Collect horizontal ranges.
+  constexpr size_t kNoSegment = 3;
+  std::pair<size_t, size_t> horizontal_segments[3] = {{kNoSegment, kNoSegment},
+                                                      {kNoSegment, kNoSegment},
+                                                      {kNoSegment, kNoSegment}};
+  for (size_t y = 0; y < 3; y++) {
+    for (size_t x = 0; x < 3; x++) {
+      if (!available_parts_mask[x][y]) continue;
+      JXL_DASSERT(horizontal_segments[y].second == kNoSegment ||
+                  horizontal_segments[y].second == x);
+      JXL_DASSERT((horizontal_segments[y].first == kNoSegment) ==
+                  (horizontal_segments[y].second == kNoSegment));
+      if (horizontal_segments[y].first == kNoSegment) {
+        horizontal_segments[y].first = x;
+      }
+      horizontal_segments[y].second = x + 1;
+    }
+  }
+  if (horizontal_segments[0] == horizontal_segments[1] &&
+      horizontal_segments[0] == horizontal_segments[2]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                3);
+  } else if (horizontal_segments[0] == horizontal_segments[1]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                2);
+    append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+                3);
+  } else if (horizontal_segments[1] == horizontal_segments[2]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                1);
+    append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+                3);
+  } else {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                1);
+    append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+                2);
+    append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+                3);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h
new file mode 100644
index 0000000000..2d974c9987
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_BORDER_H_
+#define LIB_JXL_DEC_GROUP_BORDER_H_
+
+#include <stddef.h>
+
+#include <atomic>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+class GroupBorderAssigner {
+ public:
+  // Prepare the GroupBorderAssigner to handle a given frame.
+  void Init(const FrameDimensions& frame_dim);
+  // Marks a group as done, and returns the (at most 3) rects to run
+  // FinalizeImageRect on. `block_rect` must be the rect corresponding
+  // to the given `group_id`, measured in blocks.
+  void GroupDone(size_t group_id, size_t padx, size_t pady,
+                 Rect* rects_to_finalize, size_t* num_to_finalize);
+  // Marks a group as not-done, for running re-paints.
+  void ClearDone(size_t group_id);
+
+  static constexpr size_t kMaxToFinalize = 3;
+
+ private:
+  FrameDimensions frame_dim_;
+  std::unique_ptr<std::atomic<uint8_t>[]> counters_;
+
+  // Constants to identify group positions relative to the corners.
+  static constexpr uint8_t kTopLeft = 0x01;
+  static constexpr uint8_t kTopRight = 0x02;
+  static constexpr uint8_t kBottomRight = 0x04;
+  static constexpr uint8_t kBottomLeft = 0x08;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_GROUP_BORDER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc
new file mode 100644
index 0000000000..05b275773a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc
@@ -0,0 +1,255 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_huffman.h"
+
+#include <string.h> /* for memset */
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static const int kCodeLengthCodes = 18;
+static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+static const uint8_t kDefaultCodeLength = 8;
+static const uint8_t kCodeLengthRepeatCode = 16;
+
+int ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths,
+                           int num_symbols, uint8_t* code_lengths,
+                           BitReader* br) {
+  int symbol = 0;
+  uint8_t prev_code_len = kDefaultCodeLength;
+  int repeat = 0;
+  uint8_t repeat_code_len = 0;
+  int space = 32768;
+  HuffmanCode table[32];
+
+  uint16_t counts[16] = {0};
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    ++counts[code_length_code_lengths[i]];
+  }
+  if (!BuildHuffmanTable(table, 5, code_length_code_lengths, kCodeLengthCodes,
+                         &counts[0])) {
+    return 0;
+  }
+
+  while (symbol < num_symbols && space > 0) {
+    const HuffmanCode* p = table;
+    uint8_t code_len;
+    br->Refill();
+    p += br->PeekFixedBits<5>();
+    br->Consume(p->bits);
+    code_len = (uint8_t)p->value;
+    if (code_len < kCodeLengthRepeatCode) {
+      repeat = 0;
+      code_lengths[symbol++] = code_len;
+      if (code_len != 0) {
+        prev_code_len = code_len;
+        space -= 32768u >> code_len;
+      }
+    } else {
+      const int extra_bits = code_len - 14;
+      int old_repeat;
+      int repeat_delta;
+      uint8_t new_len = 0;
+      if (code_len == kCodeLengthRepeatCode) {
+        new_len = prev_code_len;
+      }
+      if (repeat_code_len != new_len) {
+        repeat = 0;
+        repeat_code_len = new_len;
+      }
+      old_repeat = repeat;
+      if (repeat > 0) {
+        repeat -= 2;
+        repeat <<= extra_bits;
+      }
+      repeat += (int)br->ReadBits(extra_bits) + 3;
+      repeat_delta = repeat - old_repeat;
+      if (symbol + repeat_delta > num_symbols) {
+        return 0;
+      }
+      memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta);
+      symbol += repeat_delta;
+      if (repeat_code_len != 0) {
+        space -= repeat_delta << (15 - repeat_code_len);
+      }
+    }
+  }
+  if (space != 0) {
+    return 0;
+  }
+  memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol));
+  return true;
+}
+
+static JXL_INLINE bool ReadSimpleCode(size_t alphabet_size, BitReader* br,
+                                      HuffmanCode* table) {
+  size_t max_bits =
+      (alphabet_size > 1u) ? FloorLog2Nonzero(alphabet_size - 1u) + 1 : 0;
+
+  size_t num_symbols = br->ReadFixedBits<2>() + 1;
+
+  uint16_t symbols[4] = {0};
+  for (size_t i = 0; i < num_symbols; ++i) {
+    uint16_t symbol = br->ReadBits(max_bits);
+    if (symbol >= alphabet_size) {
+      return false;
+    }
+    symbols[i] = symbol;
+  }
+
+  for (size_t i = 0; i < num_symbols - 1; ++i) {
+    for (size_t j = i + 1; j < num_symbols; ++j) {
+      if (symbols[i] == symbols[j]) return false;
+    }
+  }
+
+  // 4 symbols have to option to encode.
+  if (num_symbols == 4) num_symbols += br->ReadFixedBits<1>();
+
+  const auto swap_symbols = [&symbols](size_t i, size_t j) {
+    uint16_t t = symbols[j];
+    symbols[j] = symbols[i];
+    symbols[i] = t;
+  };
+
+  size_t table_size = 1;
+  switch (num_symbols) {
+    case 1:
+      table[0] = {0, symbols[0]};
+      break;
+    case 2:
+      if (symbols[0] > symbols[1]) swap_symbols(0, 1);
+      table[0] = {1, symbols[0]};
+      table[1] = {1, symbols[1]};
+      table_size = 2;
+      break;
+    case 3:
+      if (symbols[1] > symbols[2]) swap_symbols(1, 2);
+      table[0] = {1, symbols[0]};
+      table[2] = {1, symbols[0]};
+      table[1] = {2, symbols[1]};
+      table[3] = {2, symbols[2]};
+      table_size = 4;
+      break;
+    case 4: {
+      for (size_t i = 0; i < 3; ++i) {
+        for (size_t j = i + 1; j < 4; ++j) {
+          if (symbols[i] > symbols[j]) swap_symbols(i, j);
+        }
+      }
+      table[0] = {2, symbols[0]};
+      table[2] = {2, symbols[1]};
+      table[1] = {2, symbols[2]};
+      table[3] = {2, symbols[3]};
+      table_size = 4;
+      break;
+    }
+    case 5: {
+      if (symbols[2] > symbols[3]) swap_symbols(2, 3);
+      table[0] = {1, symbols[0]};
+      table[1] = {2, symbols[1]};
+      table[2] = {1, symbols[0]};
+      table[3] = {3, symbols[2]};
+      table[4] = {1, symbols[0]};
+      table[5] = {2, symbols[1]};
+      table[6] = {1, symbols[0]};
+      table[7] = {3, symbols[3]};
+      table_size = 8;
+      break;
+    }
+    default: {
+      // Unreachable.
+      return false;
+    }
+  }
+
+  const uint32_t goal_size = 1u << kHuffmanTableBits;
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  return true;
+}
+
+bool HuffmanDecodingData::ReadFromBitStream(size_t alphabet_size,
+                                            BitReader* br) {
+  if (alphabet_size > (1 << PREFIX_MAX_BITS)) return false;
+
+  /* simple_code_or_skip is used as follows:
+     1 for simple code;
+     0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+  uint32_t simple_code_or_skip = br->ReadFixedBits<2>();
+  if (simple_code_or_skip == 1u) {
+    table_.resize(1u << kHuffmanTableBits);
+    return ReadSimpleCode(alphabet_size, br, table_.data());
+  }
+
+  std::vector<uint8_t> code_lengths(alphabet_size, 0);
+  uint8_t code_length_code_lengths[kCodeLengthCodes] = {0};
+  int space = 32;
+  int num_codes = 0;
+  /* Static Huffman code for the code length code lengths */
+  static const HuffmanCode huff[16] = {
+      {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
+      {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5},
+  };
+  for (size_t i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) {
+    const int code_len_idx = kCodeLengthCodeOrder[i];
+    const HuffmanCode* p = huff;
+    uint8_t v;
+    br->Refill();
+    p += br->PeekFixedBits<4>();
+    br->Consume(p->bits);
+    v = (uint8_t)p->value;
+    code_length_code_lengths[code_len_idx] = v;
+    if (v != 0) {
+      space -= (32u >> v);
+      ++num_codes;
+    }
+  }
+  bool ok = (num_codes == 1 || space == 0) &&
+            ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size,
+                                   &code_lengths[0], br);
+
+  if (!ok) return false;
+  uint16_t counts[16] = {0};
+  for (size_t i = 0; i < alphabet_size; ++i) {
+    ++counts[code_lengths[i]];
+  }
+  table_.resize(alphabet_size + 376);
+  uint32_t table_size =
+      BuildHuffmanTable(table_.data(), kHuffmanTableBits, &code_lengths[0],
+                        alphabet_size, &counts[0]);
+  table_.resize(table_size);
+  return (table_size > 0);
+}
+
+// Decodes the next Huffman coded symbol from the bit-stream.
+uint16_t HuffmanDecodingData::ReadSymbol(BitReader* br) const {
+  size_t n_bits;
+  const HuffmanCode* table = table_.data();
+  table += br->PeekBits(kHuffmanTableBits);
+  n_bits = table->bits;
+  if (n_bits > kHuffmanTableBits) {
+    br->Consume(kHuffmanTableBits);
+    n_bits -= kHuffmanTableBits;
+    table += table->value;
+    table += br->PeekBits(n_bits);
+  }
+  br->Consume(table->bits);
+  return table->value;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h
new file mode 100644
index 0000000000..162c3e309c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_HUFFMAN_H_
+#define LIB_JXL_DEC_HUFFMAN_H_
+
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static constexpr size_t kHuffmanTableBits = 8u;
+
+struct HuffmanDecodingData {
+  // Decodes the Huffman code lengths from the bit-stream and fills in the
+  // pre-allocated table with the corresponding 2-level Huffman decoding table.
+  // Returns false if the Huffman code lengths can not de decoded.
+  bool ReadFromBitStream(size_t alphabet_size, BitReader* br);
+
+  uint16_t ReadSymbol(BitReader* br) const;
+
+  std::vector<HuffmanCode> table_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_HUFFMAN_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc b/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc
new file mode 100644
index 0000000000..0509b32269
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc
@@ -0,0 +1,776 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_modular.h"
+
+#include <stdint.h>
+
+#include <atomic>
+#include <sstream>
+#include <vector>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+
+void MultiplySum(const size_t xsize,
+                 const pixel_type* const JXL_RESTRICT row_in,
+                 const pixel_type* const JXL_RESTRICT row_in_Y,
+                 const float factor, float* const JXL_RESTRICT row_out) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
+    const auto out = Mul(ConvertTo(df, in), factor_v);
+    Store(out, df, row_out + x);
+  }
+}
+
+void RgbFromSingle(const size_t xsize,
+                   const pixel_type* const JXL_RESTRICT row_in,
+                   const float factor, float* out_r, float* out_g,
+                   float* out_b) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Load(di, row_in + x);
+    const auto out = Mul(ConvertTo(df, in), factor_v);
+    Store(out, df, out_r + x);
+    Store(out, df, out_g + x);
+    Store(out, df, out_b + x);
+  }
+}
+
+void SingleFromSingle(const size_t xsize,
+                      const pixel_type* const JXL_RESTRICT row_in,
+                      const float factor, float* row_out) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Load(di, row_in + x);
+    const auto out = Mul(ConvertTo(df, in), factor_v);
+    Store(out, df, row_out + x);
+  }
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(MultiplySum);       // Local function
+HWY_EXPORT(RgbFromSingle);     // Local function
+HWY_EXPORT(SingleFromSingle);  // Local function
+
+// Slow conversion using double precision multiplication, only
+// needed when the bit depth is too high for single precision
+void SingleFromSingleAccurate(const size_t xsize,
+                              const pixel_type* const JXL_RESTRICT row_in,
+                              const double factor, float* row_out) {
+  for (size_t x = 0; x < xsize; x++) {
+    row_out[x] = row_in[x] * factor;
+  }
+}
+
+// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int
+// back to binary32 float
+void int_to_float(const pixel_type* const JXL_RESTRICT row_in,
+                  float* const JXL_RESTRICT row_out, const size_t xsize,
+                  const int bits, const int exp_bits) {
+  if (bits == 32) {
+    JXL_ASSERT(sizeof(pixel_type) == sizeof(float));
+    JXL_ASSERT(exp_bits == 8);
+    memcpy(row_out, row_in, xsize * sizeof(float));
+    return;
+  }
+  int exp_bias = (1 << (exp_bits - 1)) - 1;
+  int sign_shift = bits - 1;
+  int mant_bits = bits - exp_bits - 1;
+  int mant_shift = 23 - mant_bits;
+  for (size_t x = 0; x < xsize; ++x) {
+    uint32_t f;
+    memcpy(&f, &row_in[x], 4);
+    int signbit = (f >> sign_shift);
+    f &= (1 << sign_shift) - 1;
+    if (f == 0) {
+      row_out[x] = (signbit ? -0.f : 0.f);
+      continue;
+    }
+    int exp = (f >> mant_bits);
+    int mantissa = (f & ((1 << mant_bits) - 1));
+    mantissa <<= mant_shift;
+    // Try to normalize only if there is space for maneuver.
+    if (exp == 0 && exp_bits < 8) {
+      // subnormal number
+      while ((mantissa & 0x800000) == 0) {
+        mantissa <<= 1;
+        exp--;
+      }
+      exp++;
+      // remove leading 1 because it is implicit now
+      mantissa &= 0x7fffff;
+    }
+    exp -= exp_bias;
+    // broke up the arbitrary float into its parts, now reassemble into
+    // binary32
+    exp += 127;
+    JXL_ASSERT(exp >= 0);
+    f = (signbit ? 0x80000000 : 0);
+    f |= (exp << 23);
+    f |= mantissa;
+    memcpy(&row_out[x], &f, 4);
+  }
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string ModularStreamId::DebugString() const {
+  std::ostringstream os;
+  os << (kind == kGlobalData   ? "ModularGlobal"
+         : kind == kVarDCTDC   ? "VarDCTDC"
+         : kind == kModularDC  ? "ModularDC"
+         : kind == kACMetadata ? "ACMeta"
+         : kind == kQuantTable ? "QuantTable"
+         : kind == kModularAC  ? "ModularAC"
+                               : "");
+  if (kind == kVarDCTDC || kind == kModularDC || kind == kACMetadata ||
+      kind == kModularAC) {
+    os << " group " << group_id;
+  }
+  if (kind == kModularAC) {
+    os << " pass " << pass_id;
+  }
+  if (kind == kQuantTable) {
+    os << " " << quant_table_id;
+  }
+  return os.str();
+}
+#endif
+
+Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader,
+                                             const FrameHeader& frame_header,
+                                             bool allow_truncated_group) {
+  bool decode_color = frame_header.encoding == FrameEncoding::kModular;
+  const auto& metadata = frame_header.nonserialized_metadata->m;
+  bool is_gray = metadata.color_encoding.IsGray();
+  size_t nb_chans = 3;
+  if (is_gray && frame_header.color_transform == ColorTransform::kNone) {
+    nb_chans = 1;
+  }
+  do_color = decode_color;
+  size_t nb_extra = metadata.extra_channel_info.size();
+  bool has_tree = reader->ReadBits(1);
+  if (!allow_truncated_group ||
+      reader->TotalBitsConsumed() < reader->TotalBytes() * kBitsPerByte) {
+    if (has_tree) {
+      size_t tree_size_limit =
+          std::min(static_cast<size_t>(1 << 22),
+                   1024 + frame_dim.xsize * frame_dim.ysize *
+                              (nb_chans + nb_extra) / 16);
+      JXL_RETURN_IF_ERROR(DecodeTree(reader, &tree, tree_size_limit));
+      JXL_RETURN_IF_ERROR(
+          DecodeHistograms(reader, (tree.size() + 1) / 2, &code, &context_map));
+    }
+  }
+  if (!do_color) nb_chans = 0;
+
+  bool fp = metadata.bit_depth.floating_point_sample;
+
+  // bits_per_sample is just metadata for XYB images.
+  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+      frame_header.color_transform != ColorTransform::kXYB) {
+    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+      return JXL_FAILURE("uint32_t not supported in dec_modular");
+    } else if (metadata.bit_depth.bits_per_sample > 32) {
+      return JXL_FAILURE("bits_per_sample > 32 not supported");
+    }
+  }
+
+  Image gi(frame_dim.xsize, frame_dim.ysize, metadata.bit_depth.bits_per_sample,
+           nb_chans + nb_extra);
+
+  all_same_shift = true;
+  if (frame_header.color_transform == ColorTransform::kYCbCr) {
+    for (size_t c = 0; c < nb_chans; c++) {
+      gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c);
+      gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c);
+      size_t xsize_shifted =
+          DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift);
+      size_t ysize_shifted =
+          DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift);
+      gi.channel[c].shrink(xsize_shifted, ysize_shifted);
+      if (gi.channel[c].hshift != gi.channel[0].hshift ||
+          gi.channel[c].vshift != gi.channel[0].vshift)
+        all_same_shift = false;
+    }
+  }
+
+  for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) {
+    size_t ecups = frame_header.extra_channel_upsampling[ec];
+    gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups),
+                         DivCeil(frame_dim.ysize_upsampled, ecups));
+    gi.channel[c].hshift = gi.channel[c].vshift =
+        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+    if (gi.channel[c].hshift != gi.channel[0].hshift ||
+        gi.channel[c].vshift != gi.channel[0].vshift)
+      all_same_shift = false;
+  }
+
+  JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s",
+              gi.DebugString().c_str());
+  ModularOptions options;
+  options.max_chan_size = frame_dim.group_dim;
+  options.group_dim = frame_dim.group_dim;
+  Status dec_status = ModularGenericDecompress(
+      reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim),
+      &options,
+      /*undo_transforms=*/false, &tree, &code, &context_map,
+      allow_truncated_group);
+  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+  if (dec_status.IsFatalError()) {
+    return JXL_FAILURE("Failed to decode global modular info");
+  }
+
+  // TODO(eustas): are we sure this can be done after partial decode?
+  have_something = false;
+  for (size_t c = 0; c < gi.channel.size(); c++) {
+    Channel& gic = gi.channel[c];
+    if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim &&
+        gic.h <= frame_dim.group_dim)
+      have_something = true;
+  }
+  // move global transforms to groups if possible
+  if (!have_something && all_same_shift) {
+    if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) {
+      global_transform = gi.transform;
+      gi.transform.clear();
+      // TODO(jon): also move no-delta-palette out (trickier though)
+    }
+  }
+  full_image = std::move(gi);
+  JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s",
+              full_image.DebugString().c_str());
+  return dec_status;
+}
+
+void ModularFrameDecoder::MaybeDropFullImage() {
+  if (full_image.transform.empty() && !have_something && all_same_shift) {
+    use_full_image = false;
+    JXL_DEBUG_V(6, "Dropping full image");
+    for (auto& ch : full_image.channel) {
+      // keep metadata on channels around, but dealloc their planes
+      ch.plane = Plane<pixel_type>();
+    }
+  }
+}
+
+Status ModularFrameDecoder::DecodeGroup(
+    const Rect& rect, BitReader* reader, int minShift, int maxShift,
+    const ModularStreamId& stream, bool zerofill, PassesDecoderState* dec_state,
+    RenderPipelineInput* render_pipeline_input, bool allow_truncated,
+    bool* should_run_pipeline) {
+  JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s",
+              stream.DebugString().c_str(), Description(rect).c_str(), minShift,
+              maxShift, zerofill ? "using zerofill" : "");
+  JXL_DASSERT(stream.kind == ModularStreamId::kModularDC ||
+              stream.kind == ModularStreamId::kModularAC);
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  Image gi(xsize, ysize, full_image.bitdepth, 0);
+  // start at the first bigger-than-groupsize non-metachannel
+  size_t c = full_image.nb_meta_channels;
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
+  }
+  size_t beginc = c;
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    int shift = std::min(fc.hshift, fc.vshift);
+    if (shift > maxShift) continue;
+    if (shift < minShift) continue;
+    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+    if (r.xsize() == 0 || r.ysize() == 0) continue;
+    if (zerofill && use_full_image) {
+      for (size_t y = 0; y < r.ysize(); ++y) {
+        pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
+        memset(row_out, 0, r.xsize() * sizeof(*row_out));
+      }
+    } else {
+      Channel gc(r.xsize(), r.ysize());
+      if (zerofill) ZeroFillImage(&gc.plane);
+      gc.hshift = fc.hshift;
+      gc.vshift = fc.vshift;
+      gi.channel.emplace_back(std::move(gc));
+    }
+  }
+  if (zerofill && use_full_image) return true;
+  // Return early if there's nothing to decode. Otherwise there might be
+  // problems later (in ModularImageToDecodedRect).
+  if (gi.channel.empty()) {
+    if (dec_state && should_run_pipeline) {
+      const auto& frame_header = dec_state->shared->frame_header;
+      const auto* metadata = frame_header.nonserialized_metadata;
+      if (do_color || metadata->m.num_extra_channels > 0) {
+        // Signal to FrameDecoder that we do not have some of the required input
+        // for the render pipeline.
+        *should_run_pipeline = false;
+      }
+    }
+    JXL_DEBUG_V(6, "Nothing to decode, returning early.");
+    return true;
+  }
+  ModularOptions options;
+  if (!zerofill) {
+    auto status = ModularGenericDecompress(
+        reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options,
+        /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated);
+    if (!allow_truncated) JXL_RETURN_IF_ERROR(status);
+    if (status.IsFatalError()) return status;
+  }
+  // Undo global transforms that have been pushed to the group level
+  if (!use_full_image) {
+    JXL_ASSERT(render_pipeline_input);
+    for (auto t : global_transform) {
+      JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header));
+    }
+    JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(gi, dec_state, nullptr,
+                                                  *render_pipeline_input,
+                                                  Rect(0, 0, gi.w, gi.h)));
+    return true;
+  }
+  int gic = 0;
+  for (c = beginc; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    int shift = std::min(fc.hshift, fc.vshift);
+    if (shift > maxShift) continue;
+    if (shift < minShift) continue;
+    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+    if (r.xsize() == 0 || r.ysize() == 0) continue;
+    JXL_ASSERT(use_full_image);
+    CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()),
+                /*from=*/gi.channel[gic].plane,
+                /*rect_to=*/r, /*to=*/&fc.plane);
+    gic++;
+  }
+  return true;
+}
+
+Status ModularFrameDecoder::DecodeVarDCTDC(size_t group_id, BitReader* reader,
+                                           PassesDecoderState* dec_state) {
+  const Rect r = dec_state->shared->DCGroupRect(group_id);
+  // TODO(eustas): investigate if we could reduce the impact of
+  //               EvalRationalPolynomial; generally speaking, the limit is
+  //               2**(128/(3*magic)), where 128 comes from IEEE 754 exponent,
+  //               3 comes from XybToRgb that cubes the values, and "magic" is
+  //               the sum of all other contributions. 2**18 is known to lead
+  //               to NaN on input found by fuzzing (see commit message).
+  Image image(r.xsize(), r.ysize(), full_image.bitdepth, 3);
+  size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim);
+  reader->Refill();
+  size_t extra_precision = reader->ReadFixedBits<2>();
+  float mul = 1.0f / (1 << extra_precision);
+  ModularOptions options;
+  for (size_t c = 0; c < 3; c++) {
+    Channel& ch = image.channel[c < 2 ? c ^ 1 : c];
+    ch.w >>= dec_state->shared->frame_header.chroma_subsampling.HShift(c);
+    ch.h >>= dec_state->shared->frame_header.chroma_subsampling.VShift(c);
+    ch.shrink();
+  }
+  if (!ModularGenericDecompress(
+          reader, image, /*header=*/nullptr, stream_id, &options,
+          /*undo_transforms=*/true, &tree, &code, &context_map)) {
+    return JXL_FAILURE("Failed to decode modular DC group");
+  }
+  DequantDC(r, &dec_state->shared_storage.dc_storage,
+            &dec_state->shared_storage.quant_dc, image,
+            dec_state->shared->quantizer.MulDC(), mul,
+            dec_state->shared->cmap.DCFactors(),
+            dec_state->shared->frame_header.chroma_subsampling,
+            dec_state->shared->block_ctx_map);
+  return true;
+}
+
+Status ModularFrameDecoder::DecodeAcMetadata(size_t group_id, BitReader* reader,
+                                             PassesDecoderState* dec_state) {
+  const Rect r = dec_state->shared->DCGroupRect(group_id);
+  size_t upper_bound = r.xsize() * r.ysize();
+  reader->Refill();
+  size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1;
+  size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim);
+  // YToX, YToB, ACS + QF, EPF
+  Image image(r.xsize(), r.ysize(), full_image.bitdepth, 4);
+  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+  image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[2] = Channel(count, 2, 0, 0);
+  ModularOptions options;
+  if (!ModularGenericDecompress(
+          reader, image, /*header=*/nullptr, stream_id, &options,
+          /*undo_transforms=*/true, &tree, &code, &context_map)) {
+    return JXL_FAILURE("Failed to decode AC metadata");
+  }
+  ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane, cr,
+                       &dec_state->shared_storage.cmap.ytox_map);
+  ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane, cr,
+                       &dec_state->shared_storage.cmap.ytob_map);
+  size_t num = 0;
+  bool is444 = dec_state->shared->frame_header.chroma_subsampling.Is444();
+  auto& ac_strategy = dec_state->shared_storage.ac_strategy;
+  size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize());
+  size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize());
+  uint32_t local_used_acs = 0;
+  for (size_t iy = 0; iy < r.ysize(); iy++) {
+    size_t y = r.y0() + iy;
+    int32_t* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy);
+    uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy);
+    int32_t* row_in_1 = image.channel[2].plane.Row(0);
+    int32_t* row_in_2 = image.channel[2].plane.Row(1);
+    int32_t* row_in_3 = image.channel[3].plane.Row(iy);
+    for (size_t ix = 0; ix < r.xsize(); ix++) {
+      size_t x = r.x0() + ix;
+      int sharpness = row_in_3[ix];
+      if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) {
+        return JXL_FAILURE("Corrupted sharpness field");
+      }
+      row_epf[ix] = sharpness;
+      if (ac_strategy.IsValid(x, y)) {
+        continue;
+      }
+
+      if (num >= count) return JXL_FAILURE("Corrupted stream");
+
+      if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) {
+        return JXL_FAILURE("Invalid AC strategy");
+      }
+      local_used_acs |= 1u << row_in_1[num];
+      AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]);
+      if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) &&
+          !is444) {
+        return JXL_FAILURE(
+            "AC strategy not compatible with chroma subsampling");
+      }
+      // Ensure that blocks do not overflow *AC* groups.
+      size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+      size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+      size_t next_x_dct_block = x + acs.covered_blocks_x();
+      size_t next_y_dct_block = y + acs.covered_blocks_y();
+      if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) {
+        return JXL_FAILURE("Invalid AC strategy, x overflow");
+      }
+      if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) {
+        return JXL_FAILURE("Invalid AC strategy, y overflow");
+      }
+      JXL_RETURN_IF_ERROR(
+          ac_strategy.SetNoBoundsCheck(x, y, AcStrategy::Type(row_in_1[num])));
+      row_qf[ix] = 1 + std::max<int32_t>(0, std::min(Quantizer::kQuantMax - 1,
+                                                     row_in_2[num]));
+      num++;
+    }
+  }
+  dec_state->used_acs |= local_used_acs;
+  if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+    ComputeSigma(r, dec_state);
+  }
+  return true;
+}
+
+Status ModularFrameDecoder::ModularImageToDecodedRect(
+    Image& gi, PassesDecoderState* dec_state, jxl::ThreadPool* pool,
+    RenderPipelineInput& render_pipeline_input, Rect modular_rect) {
+  const auto& frame_header = dec_state->shared->frame_header;
+  const auto* metadata = frame_header.nonserialized_metadata;
+  JXL_CHECK(gi.transform.empty());
+
+  auto get_row = [&](size_t c, size_t y) {
+    const auto& buffer = render_pipeline_input.GetBuffer(c);
+    return buffer.second.Row(buffer.first, y);
+  };
+
+  size_t c = 0;
+  if (do_color) {
+    const bool rgb_from_gray =
+        metadata->m.color_encoding.IsGray() &&
+        frame_header.color_transform == ColorTransform::kNone;
+    const bool fp = metadata->m.bit_depth.floating_point_sample &&
+                    frame_header.color_transform != ColorTransform::kXYB;
+    for (; c < 3; c++) {
+      double factor = full_image.bitdepth < 32
+                          ? 1.0 / ((1u << full_image.bitdepth) - 1)
+                          : 0;
+      size_t c_in = c;
+      if (frame_header.color_transform == ColorTransform::kXYB) {
+        factor = dec_state->shared->matrices.DCQuants()[c];
+        // XYB is encoded as YX(B-Y)
+        if (c < 2) c_in = 1 - c;
+      } else if (rgb_from_gray) {
+        c_in = 0;
+      }
+      JXL_ASSERT(c_in < gi.channel.size());
+      Channel& ch_in = gi.channel[c_in];
+      // TODO(eustas): could we detect it on earlier stage?
+      if (ch_in.w == 0 || ch_in.h == 0) {
+        return JXL_FAILURE("Empty image");
+      }
+      JXL_CHECK(ch_in.hshift <= 3 && ch_in.vshift <= 3);
+      Rect r = render_pipeline_input.GetBuffer(c).second;
+      Rect mr(modular_rect.x0() >> ch_in.hshift,
+              modular_rect.y0() >> ch_in.vshift,
+              DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
+              DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
+      mr = mr.Crop(ch_in.plane);
+      size_t xsize_shifted = r.xsize();
+      size_t ysize_shifted = r.ysize();
+      if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
+        return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
+                           "x%" PRIuS
+                           " modular channel into "
+                           "a %" PRIuS "x%" PRIuS " rect",
+                           mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
+      }
+      if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
+        JXL_ASSERT(!fp);
+        JXL_RETURN_IF_ERROR(RunOnPool(
+            pool, 0, ysize_shifted, ThreadPool::NoInit,
+            [&](const uint32_t task, size_t /* thread */) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  mr.Row(&ch_in.plane, y);
+              const pixel_type* const JXL_RESTRICT row_in_Y =
+                  mr.Row(&gi.channel[0].plane, y);
+              float* const JXL_RESTRICT row_out = get_row(c, y);
+              HWY_DYNAMIC_DISPATCH(MultiplySum)
+              (xsize_shifted, row_in, row_in_Y, factor, row_out);
+            },
+            "ModularIntToFloat"));
+      } else if (fp) {
+        int bits = metadata->m.bit_depth.bits_per_sample;
+        int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample;
+        JXL_RETURN_IF_ERROR(RunOnPool(
+            pool, 0, ysize_shifted, ThreadPool::NoInit,
+            [&](const uint32_t task, size_t /* thread */) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  mr.Row(&ch_in.plane, y);
+              if (rgb_from_gray) {
+                for (size_t cc = 0; cc < 3; cc++) {
+                  float* const JXL_RESTRICT row_out = get_row(cc, y);
+                  int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
+                }
+              } else {
+                float* const JXL_RESTRICT row_out = get_row(c, y);
+                int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
+              }
+            },
+            "ModularIntToFloat_losslessfloat"));
+      } else {
+        JXL_RETURN_IF_ERROR(RunOnPool(
+            pool, 0, ysize_shifted, ThreadPool::NoInit,
+            [&](const uint32_t task, size_t /* thread */) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  mr.Row(&ch_in.plane, y);
+              if (rgb_from_gray) {
+                if (full_image.bitdepth < 23) {
+                  HWY_DYNAMIC_DISPATCH(RgbFromSingle)
+                  (xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y),
+                   get_row(2, y));
+                } else {
+                  SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+                                           get_row(0, y));
+                  SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+                                           get_row(1, y));
+                  SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+                                           get_row(2, y));
+                }
+              } else {
+                float* const JXL_RESTRICT row_out = get_row(c, y);
+                if (full_image.bitdepth < 23) {
+                  HWY_DYNAMIC_DISPATCH(SingleFromSingle)
+                  (xsize_shifted, row_in, factor, row_out);
+                } else {
+                  SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+                                           row_out);
+                }
+              }
+            },
+            "ModularIntToFloat"));
+      }
+      if (rgb_from_gray) {
+        break;
+      }
+    }
+    if (rgb_from_gray) {
+      c = 1;
+    }
+  }
+  size_t num_extra_channels = metadata->m.num_extra_channels;
+  for (size_t ec = 0; ec < num_extra_channels; ec++, c++) {
+    const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec];
+    int bits = eci.bit_depth.bits_per_sample;
+    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+    bool fp = eci.bit_depth.floating_point_sample;
+    JXL_ASSERT(fp || bits < 32);
+    const double factor = fp ? 0 : (1.0 / ((1u << bits) - 1));
+    JXL_ASSERT(c < gi.channel.size());
+    Channel& ch_in = gi.channel[c];
+    Rect r = render_pipeline_input.GetBuffer(3 + ec).second;
+    Rect mr(modular_rect.x0() >> ch_in.hshift,
+            modular_rect.y0() >> ch_in.vshift,
+            DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
+            DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
+    mr = mr.Crop(ch_in.plane);
+    if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
+      return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
+                         "x%" PRIuS
+                         " modular channel into "
+                         "a %" PRIuS "x%" PRIuS " rect",
+                         mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
+    }
+    for (size_t y = 0; y < r.ysize(); ++y) {
+      float* const JXL_RESTRICT row_out =
+          r.Row(render_pipeline_input.GetBuffer(3 + ec).first, y);
+      const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
+      if (fp) {
+        int_to_float(row_in, row_out, r.xsize(), bits, exp_bits);
+      } else {
+        if (full_image.bitdepth < 23) {
+          HWY_DYNAMIC_DISPATCH(SingleFromSingle)
+          (r.xsize(), row_in, factor, row_out);
+        } else {
+          SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state,
+                                             jxl::ThreadPool* pool,
+                                             bool inplace) {
+  if (!use_full_image) return true;
+  Image gi = (inplace ? std::move(full_image) : full_image.clone());
+  size_t xsize = gi.w;
+  size_t ysize = gi.h;
+
+  JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s",
+              gi.DebugString().c_str());
+
+  // Don't use threads if total image size is smaller than a group
+  if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr;
+
+  // Undo the global transforms
+  gi.undo_transforms(global_header.wp_header, pool);
+  JXL_DASSERT(global_transform.empty());
+  if (gi.error) return JXL_FAILURE("Undoing transforms failed");
+
+  for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) {
+    dec_state->render_pipeline->ClearDone(i);
+  }
+  std::atomic<bool> has_error{false};
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, dec_state->shared->frame_dim.num_groups,
+      [&](size_t num_threads) {
+        const auto& frame_header = dec_state->shared->frame_header;
+        bool use_group_ids = (frame_header.encoding == FrameEncoding::kVarDCT ||
+                              (frame_header.flags & FrameHeader::kNoise));
+        return dec_state->render_pipeline->PrepareForThreads(num_threads,
+                                                             use_group_ids);
+      },
+      [&](const uint32_t group, size_t thread_id) {
+        RenderPipelineInput input =
+            dec_state->render_pipeline->GetInputBuffers(group, thread_id);
+        if (!ModularImageToDecodedRect(gi, dec_state, nullptr, input,
+                                       dec_state->shared->GroupRect(group))) {
+          has_error = true;
+          return;
+        }
+        input.Done();
+      },
+      "ModularToRect"));
+  if (has_error) {
+    return JXL_FAILURE("Error producing input to render pipeline");
+  }
+  return true;
+}
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+Status ModularFrameDecoder::DecodeQuantTable(
+    size_t required_size_x, size_t required_size_y, BitReader* br,
+    QuantEncoding* encoding, size_t idx,
+    ModularFrameDecoder* modular_frame_decoder) {
+  JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den));
+  if (encoding->qraw.qtable_den < kAlmostZero) {
+    // qtable[] values are already checked for <= 0 so the denominator may not
+    // be negative.
+    return JXL_FAILURE("Invalid qtable_den: value too small");
+  }
+  Image image(required_size_x, required_size_y, 8, 3);
+  ModularOptions options;
+  if (modular_frame_decoder) {
+    JXL_RETURN_IF_ERROR(ModularGenericDecompress(
+        br, image, /*header=*/nullptr,
+        ModularStreamId::QuantTable(idx).ID(modular_frame_decoder->frame_dim),
+        &options, /*undo_transforms=*/true, &modular_frame_decoder->tree,
+        &modular_frame_decoder->code, &modular_frame_decoder->context_map));
+  } else {
+    JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr,
+                                                 0, &options,
+                                                 /*undo_transforms=*/true));
+  }
+  if (!encoding->qraw.qtable) {
+    encoding->qraw.qtable = new std::vector<int>();
+  }
+  encoding->qraw.qtable->resize(required_size_x * required_size_y * 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < required_size_y; y++) {
+      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < required_size_x; x++) {
+        (*encoding->qraw.qtable)[c * required_size_x * required_size_y +
+                                 y * required_size_x + x] = row[x];
+        if (row[x] <= 0) {
+          return JXL_FAILURE("Invalid raw quantization table");
+        }
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_modular.h b/third-party/libjxl/libjxl/lib/jxl/dec_modular.h
new file mode 100644
index 0000000000..aae643cf1f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_modular.h
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_MODULAR_H_
+#define LIB_JXL_DEC_MODULAR_H_
+
+#include <stddef.h>
+
+#include <string>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct ModularStreamId {
+  enum Kind {
+    kGlobalData,
+    kVarDCTDC,
+    kModularDC,
+    kACMetadata,
+    kQuantTable,
+    kModularAC
+  };
+  Kind kind;
+  size_t quant_table_id;
+  size_t group_id;  // DC or AC group id.
+  size_t pass_id;   // Only for kModularAC.
+  size_t ID(const FrameDimensions& frame_dim) const {
+    size_t id = 0;
+    switch (kind) {
+      case kGlobalData:
+        id = 0;
+        break;
+      case kVarDCTDC:
+        id = 1 + group_id;
+        break;
+      case kModularDC:
+        id = 1 + frame_dim.num_dc_groups + group_id;
+        break;
+      case kACMetadata:
+        id = 1 + 2 * frame_dim.num_dc_groups + group_id;
+        break;
+      case kQuantTable:
+        id = 1 + 3 * frame_dim.num_dc_groups + quant_table_id;
+        break;
+      case kModularAC:
+        id = 1 + 3 * frame_dim.num_dc_groups + DequantMatrices::kNum +
+             frame_dim.num_groups * pass_id + group_id;
+        break;
+    };
+    return id;
+  }
+  static ModularStreamId Global() {
+    return ModularStreamId{kGlobalData, 0, 0, 0};
+  }
+  static ModularStreamId VarDCTDC(size_t group_id) {
+    return ModularStreamId{kVarDCTDC, 0, group_id, 0};
+  }
+  static ModularStreamId ModularDC(size_t group_id) {
+    return ModularStreamId{kModularDC, 0, group_id, 0};
+  }
+  static ModularStreamId ACMetadata(size_t group_id) {
+    return ModularStreamId{kACMetadata, 0, group_id, 0};
+  }
+  static ModularStreamId QuantTable(size_t quant_table_id) {
+    JXL_ASSERT(quant_table_id < DequantMatrices::kNum);
+    return ModularStreamId{kQuantTable, quant_table_id, 0, 0};
+  }
+  static ModularStreamId ModularAC(size_t group_id, size_t pass_id) {
+    return ModularStreamId{kModularAC, 0, group_id, pass_id};
+  }
+  static size_t Num(const FrameDimensions& frame_dim, size_t passes) {
+    return ModularAC(0, passes).ID(frame_dim);
+  }
+  std::string DebugString() const;
+};
+
+class ModularFrameDecoder {
+ public:
+  void Init(const FrameDimensions& frame_dim) { this->frame_dim = frame_dim; }
+  Status DecodeGlobalInfo(BitReader* reader, const FrameHeader& frame_header,
+                          bool allow_truncated_group);
+  Status DecodeGroup(const Rect& rect, BitReader* reader, int minShift,
+                     int maxShift, const ModularStreamId& stream, bool zerofill,
+                     PassesDecoderState* dec_state,
+                     RenderPipelineInput* render_pipeline_input,
+                     bool allow_truncated, bool* should_run_pipeline = nullptr);
+  // Decodes a VarDCT DC group (`group_id`) from the given `reader`.
+  Status DecodeVarDCTDC(size_t group_id, BitReader* reader,
+                        PassesDecoderState* dec_state);
+  // Decodes a VarDCT AC Metadata group (`group_id`) from the given `reader`.
+  Status DecodeAcMetadata(size_t group_id, BitReader* reader,
+                          PassesDecoderState* dec_state);
+  // Decodes a RAW quant table from `br` into the given `encoding`, of size
+  // `required_size_x x required_size_y`. If `modular_frame_decoder` is passed,
+  // its global tree is used, otherwise no global tree is used.
+  static Status DecodeQuantTable(size_t required_size_x, size_t required_size_y,
+                                 BitReader* br, QuantEncoding* encoding,
+                                 size_t idx,
+                                 ModularFrameDecoder* modular_frame_decoder);
+  // if inplace is true, this can only be called once
+  // if it is false, it can be called multiple times (e.g. for progressive
+  // steps)
+  Status FinalizeDecoding(PassesDecoderState* dec_state, jxl::ThreadPool* pool,
+                          bool inplace);
+  bool have_dc() const { return have_something; }
+  void MaybeDropFullImage();
+  bool UsesFullImage() const { return use_full_image; }
+
+ private:
+  Status ModularImageToDecodedRect(Image& gi, PassesDecoderState* dec_state,
+                                   jxl::ThreadPool* pool,
+                                   RenderPipelineInput& render_pipeline_input,
+                                   Rect modular_rect);
+
+  Image full_image;
+  std::vector<Transform> global_transform;
+  FrameDimensions frame_dim;
+  bool do_color;
+  bool have_something;
+  bool use_full_image = true;
+  bool all_same_shift;
+  Tree tree;
+  ANSCode code;
+  std::vector<uint8_t> context_map;
+  GroupHeader global_header;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_MODULAR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc b/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc
new file mode 100644
index 0000000000..275a6d0b21
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, kBlockDim);
+using DI = hwy::HWY_NAMESPACE::Rebind<int, D>;
+using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
+
+// Converts one vector's worth of random bits to floats in [1, 2).
+// NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in
+// [0, 1) or in [1, 2).
+void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits,
+                 float* JXL_RESTRICT floats) {
+  const HWY_FULL(float) df;
+  const HWY_FULL(uint32_t) du;
+
+  const auto bits = Load(du, random_bits);
+  // 1.0 + 23 random mantissa bits = [1, 2)
+  const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000)));
+  Store(rand12, df, floats);
+}
+
+void RandomImage(Xorshift128Plus* rng, const Rect& rect,
+                 ImageF* JXL_RESTRICT noise) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  // May exceed the vector size, hence we have two loops over x below.
+  constexpr size_t kFloatsPerBatch =
+      Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float);
+  HWY_ALIGN uint64_t batch[Xorshift128Plus::N] = {};
+
+  const HWY_FULL(float) df;
+  const size_t N = Lanes(df);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    float* JXL_RESTRICT row = rect.Row(noise, y);
+
+    size_t x = 0;
+    // Only entire batches (avoids exceeding the image padding).
+    for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) {
+      rng->Fill(batch);
+      for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) {
+        BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + i, row + x + i);
+      }
+    }
+
+    // Any remaining pixels, rounded up to vectors (safe due to padding).
+    rng->Fill(batch);
+    size_t batch_pos = 0;  // < kFloatsPerBatch
+    for (; x < xsize; x += N) {
+      BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + batch_pos,
+                  row + x);
+      batch_pos += N;
+    }
+  }
+}
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+                   size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+                   const std::pair<ImageF*, Rect>& plane1,
+                   const std::pair<ImageF*, Rect>& plane2) {
+  HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0,
+                                y0);
+  RandomImage(&rng, plane0.second, plane0.first);
+  RandomImage(&rng, plane1.second, plane1.first);
+  RandomImage(&rng, plane2.second, plane2.first);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Random3Planes);
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+                   size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+                   const std::pair<ImageF*, Rect>& plane1,
+                   const std::pair<ImageF*, Rect>& plane2) {
+  return HWY_DYNAMIC_DISPATCH(Random3Planes)(visible_frame_index,
+                                             nonvisible_frame_index, x0, y0,
+                                             plane0, plane1, plane2);
+}
+
+void DecodeFloatParam(float precision, float* val, BitReader* br) {
+  const int absval_quant = br->ReadFixedBits<10>();
+  *val = absval_quant / precision;
+}
+
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params) {
+  for (float& i : noise_params->lut) {
+    DecodeFloatParam(kNoisePrecision, &i, br);
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_noise.h b/third-party/libjxl/libjxl/lib/jxl/dec_noise.h
new file mode 100644
index 0000000000..ac05866470
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_noise.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_NOISE_H_
+#define LIB_JXL_DEC_NOISE_H_
+
+// Noise synthesis. Currently disabled.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+                   size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+                   const std::pair<ImageF*, Rect>& plane1,
+                   const std::pair<ImageF*, Rect>& plane2);
+
+// Must only call if FrameHeader.flags.kNoise.
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_NOISE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc
new file mode 100644
index 0000000000..56538bc232
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc
@@ -0,0 +1,357 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+Status PatchDictionary::Decode(BitReader* br, size_t xsize, size_t ysize,
+                               bool* uses_extra_channels) {
+  positions_.clear();
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumPatchDictionaryContexts, &code, &context_map));
+  ANSSymbolReader decoder(&code, br);
+
+  auto read_num = [&](size_t context) {
+    size_t r = decoder.ReadHybridUint(context, br, context_map);
+    return r;
+  };
+
+  size_t num_ref_patch = read_num(kNumRefPatchContext);
+  // Limit max memory usage of patches to about 66 bytes per pixel (assuming 8
+  // bytes per size_t)
+  const size_t num_pixels = xsize * ysize;
+  const size_t max_ref_patches = 1024 + num_pixels / 4;
+  const size_t max_patches = max_ref_patches * 4;
+  const size_t max_blending_infos = max_patches * 4;
+  if (num_ref_patch > max_ref_patches) {
+    return JXL_FAILURE("Too many patches in dictionary");
+  }
+  size_t num_ec = shared_->metadata->m.num_extra_channels;
+
+  size_t total_patches = 0;
+  size_t next_size = 1;
+
+  for (size_t id = 0; id < num_ref_patch; id++) {
+    PatchReferencePosition ref_pos;
+    ref_pos.ref = read_num(kReferenceFrameContext);
+    if (ref_pos.ref >= kMaxNumReferenceFrames ||
+        shared_->reference_frames[ref_pos.ref].frame.xsize() == 0) {
+      return JXL_FAILURE("Invalid reference frame ID");
+    }
+    if (!shared_->reference_frames[ref_pos.ref].ib_is_in_xyb) {
+      return JXL_FAILURE(
+          "Patches cannot use frames saved post color transforms");
+    }
+    const ImageBundle& ib = shared_->reference_frames[ref_pos.ref].frame;
+    ref_pos.x0 = read_num(kPatchReferencePositionContext);
+    ref_pos.y0 = read_num(kPatchReferencePositionContext);
+    ref_pos.xsize = read_num(kPatchSizeContext) + 1;
+    ref_pos.ysize = read_num(kPatchSizeContext) + 1;
+    if (ref_pos.x0 + ref_pos.xsize > ib.xsize()) {
+      return JXL_FAILURE("Invalid position specified in reference frame");
+    }
+    if (ref_pos.y0 + ref_pos.ysize > ib.ysize()) {
+      return JXL_FAILURE("Invalid position specified in reference frame");
+    }
+    size_t id_count = read_num(kPatchCountContext) + 1;
+    total_patches += id_count;
+    if (total_patches > max_patches) {
+      return JXL_FAILURE("Too many patches in dictionary");
+    }
+    if (next_size < total_patches) {
+      next_size *= 2;
+      next_size = std::min<size_t>(next_size, max_patches);
+    }
+    if (next_size * (num_ec + 1) > max_blending_infos) {
+      return JXL_FAILURE("Too many patches in dictionary");
+    }
+    positions_.reserve(next_size);
+    blendings_.reserve(next_size * (num_ec + 1));
+    for (size_t i = 0; i < id_count; i++) {
+      PatchPosition pos;
+      pos.ref_pos_idx = ref_positions_.size();
+      if (i == 0) {
+        pos.x = read_num(kPatchPositionContext);
+        pos.y = read_num(kPatchPositionContext);
+      } else {
+        ssize_t deltax = UnpackSigned(read_num(kPatchOffsetContext));
+        if (deltax < 0 && static_cast<size_t>(-deltax) > positions_.back().x) {
+          return JXL_FAILURE("Invalid patch: negative x coordinate (%" PRIuS
+                             " base x %" PRIdS " delta x)",
+                             positions_.back().x, deltax);
+        }
+        pos.x = positions_.back().x + deltax;
+        ssize_t deltay = UnpackSigned(read_num(kPatchOffsetContext));
+        if (deltay < 0 && static_cast<size_t>(-deltay) > positions_.back().y) {
+          return JXL_FAILURE("Invalid patch: negative y coordinate (%" PRIuS
+                             " base y %" PRIdS " delta y)",
+                             positions_.back().y, deltay);
+        }
+        pos.y = positions_.back().y + deltay;
+      }
+      if (pos.x + ref_pos.xsize > xsize) {
+        return JXL_FAILURE("Invalid patch x: at %" PRIuS " + %" PRIuS
+                           " > %" PRIuS,
+                           pos.x, ref_pos.xsize, xsize);
+      }
+      if (pos.y + ref_pos.ysize > ysize) {
+        return JXL_FAILURE("Invalid patch y: at %" PRIuS " + %" PRIuS
+                           " > %" PRIuS,
+                           pos.y, ref_pos.ysize, ysize);
+      }
+      for (size_t j = 0; j < num_ec + 1; j++) {
+        uint32_t blend_mode = read_num(kPatchBlendModeContext);
+        if (blend_mode >= uint32_t(PatchBlendMode::kNumBlendModes)) {
+          return JXL_FAILURE("Invalid patch blend mode: %u", blend_mode);
+        }
+        PatchBlending info;
+        info.mode = static_cast<PatchBlendMode>(blend_mode);
+        if (UsesAlpha(info.mode)) {
+          *uses_extra_channels = true;
+        }
+        if (info.mode != PatchBlendMode::kNone && j > 0) {
+          *uses_extra_channels = true;
+        }
+        if (UsesAlpha(info.mode) &&
+            shared_->metadata->m.extra_channel_info.size() > 1) {
+          info.alpha_channel = read_num(kPatchAlphaChannelContext);
+          if (info.alpha_channel >=
+              shared_->metadata->m.extra_channel_info.size()) {
+            return JXL_FAILURE(
+                "Invalid alpha channel for blending: %u out of %u\n",
+                info.alpha_channel,
+                (uint32_t)shared_->metadata->m.extra_channel_info.size());
+          }
+        } else {
+          info.alpha_channel = 0;
+        }
+        if (UsesClamp(info.mode)) {
+          info.clamp = read_num(kPatchClampContext);
+        } else {
+          info.clamp = false;
+        }
+        blendings_.push_back(info);
+      }
+      positions_.push_back(std::move(pos));
+    }
+    ref_positions_.emplace_back(std::move(ref_pos));
+  }
+  positions_.shrink_to_fit();
+
+  if (!decoder.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS checksum failure.");
+  }
+
+  ComputePatchTree();
+  return true;
+}
+
+int PatchDictionary::GetReferences() const {
+  int result = 0;
+  for (size_t i = 0; i < ref_positions_.size(); ++i) {
+    result |= (1 << static_cast<int>(ref_positions_[i].ref));
+  }
+  return result;
+}
+
+namespace {
+struct PatchInterval {
+  size_t idx;
+  size_t y0, y1;
+};
+}  // namespace
+
+void PatchDictionary::ComputePatchTree() {
+  patch_tree_.clear();
+  num_patches_.clear();
+  sorted_patches_y0_.clear();
+  sorted_patches_y1_.clear();
+  if (positions_.empty()) {
+    return;
+  }
+  // Create a y-interval for each patch.
+  std::vector<PatchInterval> intervals(positions_.size());
+  for (size_t i = 0; i < positions_.size(); ++i) {
+    const auto& pos = positions_[i];
+    intervals[i].idx = i;
+    intervals[i].y0 = pos.y;
+    intervals[i].y1 = pos.y + ref_positions_[pos.ref_pos_idx].ysize;
+  }
+  auto sort_by_y0 = [&intervals](size_t start, size_t end) {
+    std::sort(intervals.data() + start, intervals.data() + end,
+              [](const PatchInterval& i0, const PatchInterval& i1) {
+                return i0.y0 < i1.y0;
+              });
+  };
+  auto sort_by_y1 = [&intervals](size_t start, size_t end) {
+    std::sort(intervals.data() + start, intervals.data() + end,
+              [](const PatchInterval& i0, const PatchInterval& i1) {
+                return i0.y1 < i1.y1;
+              });
+  };
+  // Count the number of patches for each row.
+  sort_by_y1(0, intervals.size());
+  num_patches_.resize(intervals.back().y1);
+  for (auto iv : intervals) {
+    for (size_t y = iv.y0; y < iv.y1; ++y) num_patches_[y]++;
+  }
+  PatchTreeNode root;
+  root.start = 0;
+  root.num = intervals.size();
+  patch_tree_.push_back(root);
+  size_t next = 0;
+  while (next < patch_tree_.size()) {
+    auto& node = patch_tree_[next];
+    size_t start = node.start;
+    size_t end = node.start + node.num;
+    // Choose the y_center for this node to be the median of interval starts.
+    sort_by_y0(start, end);
+    size_t middle_idx = start + node.num / 2;
+    node.y_center = intervals[middle_idx].y0;
+    // Divide the intervals in [start, end) into three groups:
+    //   * those completely to the right of y_center: [right_start, end)
+    //   * those overlapping y_center: [left_end, right_start)
+    //   * those completely to the left of y_center: [start, left_end)
+    size_t right_start = middle_idx;
+    while (right_start < end && intervals[right_start].y0 == node.y_center) {
+      ++right_start;
+    }
+    sort_by_y1(start, right_start);
+    size_t left_end = right_start;
+    while (left_end > start && intervals[left_end - 1].y1 > node.y_center) {
+      --left_end;
+    }
+    // Fill in sorted_patches_y0_ and sorted_patches_y1_ for the current node.
+    node.num = right_start - left_end;
+    node.start = sorted_patches_y0_.size();
+    for (ssize_t i = static_cast<ssize_t>(right_start) - 1;
+         i >= static_cast<ssize_t>(left_end); --i) {
+      sorted_patches_y1_.push_back({intervals[i].y1, intervals[i].idx});
+    }
+    sort_by_y0(left_end, right_start);
+    for (size_t i = left_end; i < right_start; ++i) {
+      sorted_patches_y0_.push_back({intervals[i].y0, intervals[i].idx});
+    }
+    // Create the left and right nodes (if not empty).
+    node.left_child = node.right_child = -1;
+    if (left_end > start) {
+      PatchTreeNode left;
+      left.start = start;
+      left.num = left_end - left.start;
+      patch_tree_[next].left_child = patch_tree_.size();
+      patch_tree_.push_back(left);
+    }
+    if (right_start < end) {
+      PatchTreeNode right;
+      right.start = right_start;
+      right.num = end - right.start;
+      patch_tree_[next].right_child = patch_tree_.size();
+      patch_tree_.push_back(right);
+    }
+    ++next;
+  }
+}
+
+std::vector<size_t> PatchDictionary::GetPatchesForRow(size_t y) const {
+  std::vector<size_t> result;
+  if (y < num_patches_.size() && num_patches_[y] > 0) {
+    result.reserve(num_patches_[y]);
+    for (ssize_t tree_idx = 0; tree_idx != -1;) {
+      JXL_DASSERT(tree_idx < (ssize_t)patch_tree_.size());
+      const auto& node = patch_tree_[tree_idx];
+      if (y <= node.y_center) {
+        for (size_t i = 0; i < node.num; ++i) {
+          const auto& p = sorted_patches_y0_[node.start + i];
+          if (y < p.first) break;
+          result.push_back(p.second);
+        }
+        tree_idx = y < node.y_center ? node.left_child : -1;
+      } else {
+        for (size_t i = 0; i < node.num; ++i) {
+          const auto& p = sorted_patches_y1_[node.start + i];
+          if (y >= p.first) break;
+          result.push_back(p.second);
+        }
+        tree_idx = node.right_child;
+      }
+    }
+    // Ensure that he relative order of patches that affect the same pixels is
+    // preserved. This is important for patches that have a blend mode
+    // different from kAdd.
+    std::sort(result.begin(), result.end());
+  }
+  return result;
+}
+
+// Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed
+// to be located at position (x0, y) in the frame.
+void PatchDictionary::AddOneRow(float* const* inout, size_t y, size_t x0,
+                                size_t xsize) const {
+  size_t num_ec = shared_->metadata->m.num_extra_channels;
+  std::vector<const float*> fg_ptrs(3 + num_ec);
+  for (size_t pos_idx : GetPatchesForRow(y)) {
+    const size_t blending_idx = pos_idx * (num_ec + 1);
+    const PatchPosition& pos = positions_[pos_idx];
+    const PatchReferencePosition& ref_pos = ref_positions_[pos.ref_pos_idx];
+    size_t by = pos.y;
+    size_t bx = pos.x;
+    size_t patch_xsize = ref_pos.xsize;
+    JXL_DASSERT(y >= by);
+    JXL_DASSERT(y < by + ref_pos.ysize);
+    size_t iy = y - by;
+    size_t ref = ref_pos.ref;
+    if (bx >= x0 + xsize) continue;
+    if (bx + patch_xsize < x0) continue;
+    size_t patch_x0 = std::max(bx, x0);
+    size_t patch_x1 = std::min(bx + patch_xsize, x0 + xsize);
+    for (size_t c = 0; c < 3; c++) {
+      fg_ptrs[c] = shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+                       c, ref_pos.y0 + iy) +
+                   ref_pos.x0 + x0 - bx;
+    }
+    for (size_t i = 0; i < num_ec; i++) {
+      fg_ptrs[3 + i] =
+          shared_->reference_frames[ref].frame.extra_channels()[i].ConstRow(
+              ref_pos.y0 + iy) +
+          ref_pos.x0 + x0 - bx;
+    }
+    PerformBlending(inout, fg_ptrs.data(), inout, patch_x0 - x0,
+                    patch_x1 - patch_x0, blendings_[blending_idx],
+                    blendings_.data() + blending_idx + 1,
+                    shared_->metadata->m.extra_channel_info);
+  }
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h
new file mode 100644
index 0000000000..a950e83e85
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_PATCH_DICTIONARY_H_
+#define LIB_JXL_DEC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+enum class PatchBlendMode : uint8_t {
+  // The new values are the old ones. Useful to skip some channels.
+  kNone = 0,
+  // The new values (in the crop) replace the old ones: sample = new
+  kReplace = 1,
+  // The new values (in the crop) get added to the old ones: sample = old + new
+  kAdd = 2,
+  // The new values (in the crop) get multiplied by the old ones:
+  // sample = old * new
+  // This blend mode is only supported if BlendColorSpace is kEncoded. The
+  // range of the new value matters for multiplication purposes, and its
+  // nominal range of 0..1 is computed the same way as this is done for the
+  // alpha values in kBlend and kAlphaWeightedAdd.
+  kMul = 3,
+  // The new values (in the crop) replace the old ones if alpha>0:
+  // For first alpha channel:
+  // alpha = old + new * (1 - old)
+  // For other channels if !alpha_associated:
+  // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+  // For other channels if alpha_associated:
+  // sample = (1 - new_alpha) * old + new
+  // The alpha formula applies to the alpha used for the division in the other
+  // channels formula, and applies to the alpha channel itself if its
+  // blend_channel value matches itself.
+  // If using kBlendAbove, new is the patch and old is the original image; if
+  // using kBlendBelow, the meaning is inverted.
+  kBlendAbove = 4,
+  kBlendBelow = 5,
+  // The new values (in the crop) are added to the old ones if alpha>0:
+  // For first alpha channel: sample = sample = old + new * (1 - old)
+  // For other channels: sample = old + alpha * new
+  kAlphaWeightedAddAbove = 6,
+  kAlphaWeightedAddBelow = 7,
+  kNumBlendModes,
+};
+
+inline bool UsesAlpha(PatchBlendMode mode) {
+  return mode == PatchBlendMode::kBlendAbove ||
+         mode == PatchBlendMode::kBlendBelow ||
+         mode == PatchBlendMode::kAlphaWeightedAddAbove ||
+         mode == PatchBlendMode::kAlphaWeightedAddBelow;
+}
+inline bool UsesClamp(PatchBlendMode mode) {
+  return UsesAlpha(mode) || mode == PatchBlendMode::kMul;
+}
+
+struct PatchBlending {
+  PatchBlendMode mode;
+  uint32_t alpha_channel;
+  bool clamp;
+};
+
+// Position and size of the patch in the reference frame.
+struct PatchReferencePosition {
+  size_t ref, x0, y0, xsize, ysize;
+};
+
+struct PatchPosition {
+  // Position of top-left corner of the patch in the image.
+  size_t x, y;
+  size_t ref_pos_idx;
+};
+
+struct PassesSharedState;
+
+// Encoder-side helper class to encode the PatchesDictionary.
+class PatchDictionaryEncoder;
+
+class PatchDictionary {
+ public:
+  PatchDictionary() = default;
+
+  void SetPassesSharedState(const PassesSharedState* shared) {
+    shared_ = shared;
+  }
+
+  bool HasAny() const { return !positions_.empty(); }
+
+  Status Decode(BitReader* br, size_t xsize, size_t ysize,
+                bool* uses_extra_channels);
+
+  void Clear() {
+    positions_.clear();
+    ComputePatchTree();
+  }
+
+  // Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed
+  // to be located at position (x0, y) in the frame.
+  void AddOneRow(float* const* inout, size_t y, size_t x0, size_t xsize) const;
+
+  // Returns dependencies of this patch dictionary on reference frame ids as a
+  // bit mask: bits 0-3 indicate reference frame 0-3.
+  int GetReferences() const;
+
+  std::vector<size_t> GetPatchesForRow(size_t y) const;
+
+ private:
+  friend class PatchDictionaryEncoder;
+
+  const PassesSharedState* shared_;
+  std::vector<PatchPosition> positions_;
+  std::vector<PatchReferencePosition> ref_positions_;
+  std::vector<PatchBlending> blendings_;
+
+  // Interval tree on the y coordinates of the patches.
+  struct PatchTreeNode {
+    ssize_t left_child;
+    ssize_t right_child;
+    size_t y_center;
+    // Range of patches in sorted_patches_y0_ and sorted_patches_y1_ that
+    // contain the row y_center.
+    size_t start;
+    size_t num;
+  };
+  std::vector<PatchTreeNode> patch_tree_;
+  // Number of patches for each row.
+  std::vector<size_t> num_patches_;
+  std::vector<std::pair<size_t, size_t>> sorted_patches_y0_;
+  std::vector<std::pair<size_t, size_t>> sorted_patches_y1_;
+
+  void ComputePatchTree();
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_PATCH_DICTIONARY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h
new file mode 100644
index 0000000000..ffe0c10bff
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h
@@ -0,0 +1,239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_DEC_TONE_MAPPING_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#undef LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#else
+#define LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+template <typename D>
+class Rec2408ToneMapper {
+ private:
+  using V = hwy::HWY_NAMESPACE::Vec<D>;
+
+ public:
+  explicit Rec2408ToneMapper(std::pair<float, float> source_range,
+                             std::pair<float, float> target_range,
+                             const float primaries_luminances[3])
+      : source_range_(source_range),
+        target_range_(target_range),
+        red_Y_(primaries_luminances[0]),
+        green_Y_(primaries_luminances[1]),
+        blue_Y_(primaries_luminances[2]) {}
+
+  void ToneMap(V* red, V* green, V* blue) const {
+    const V luminance = Mul(Set(df_, source_range_.second),
+                            (MulAdd(Set(df_, red_Y_), *red,
+                                    MulAdd(Set(df_, green_Y_), *green,
+                                           Mul(Set(df_, blue_Y_), *blue)))));
+    const V pq_mastering_min = Set(df_, pq_mastering_min_);
+    const V inv_pq_mastering_range = Set(df_, inv_pq_mastering_range_);
+    const V normalized_pq = Min(
+        Set(df_, 1.f),
+        Mul(Sub(InvEOTF(luminance), pq_mastering_min), inv_pq_mastering_range));
+    const V ks = Set(df_, ks_);
+    const V e2 =
+        IfThenElse(Lt(normalized_pq, ks), normalized_pq, P(normalized_pq));
+    const V one_minus_e2 = Sub(Set(df_, 1), e2);
+    const V one_minus_e2_2 = Mul(one_minus_e2, one_minus_e2);
+    const V one_minus_e2_4 = Mul(one_minus_e2_2, one_minus_e2_2);
+    const V b = Set(df_, min_lum_);
+    const V e3 = MulAdd(b, one_minus_e2_4, e2);
+    const V pq_mastering_range = Set(df_, pq_mastering_range_);
+    const V e4 = MulAdd(e3, pq_mastering_range, pq_mastering_min);
+    const V new_luminance =
+        Min(Set(df_, target_range_.second),
+            ZeroIfNegative(
+                Mul(Set(df_, 10000), TF_PQ().DisplayFromEncoded(df_, e4))));
+    const V min_luminance = Set(df_, 1e-6f);
+    const auto use_cap = Le(luminance, min_luminance);
+    const V ratio = Div(new_luminance, Max(luminance, min_luminance));
+    const V cap = Mul(new_luminance, Set(df_, inv_target_peak_));
+    const V normalizer = Set(df_, normalizer_);
+    const V multiplier = Mul(ratio, normalizer);
+    for (V* const val : {red, green, blue}) {
+      *val = IfThenElse(use_cap, cap, Mul(*val, multiplier));
+    }
+  }
+
+ private:
+  V InvEOTF(const V luminance) const {
+    return TF_PQ().EncodedFromDisplay(df_,
+                                      Mul(luminance, Set(df_, 1. / 10000)));
+  }
+  float InvEOTF(const float luminance) const {
+    return TF_PQ().EncodedFromDisplay(luminance / 10000.0f);
+  }
+  V T(const V a) const {
+    const V ks = Set(df_, ks_);
+    const V inv_one_minus_ks = Set(df_, inv_one_minus_ks_);
+    return Mul(Sub(a, ks), inv_one_minus_ks);
+  }
+  V P(const V b) const {
+    const V t_b = T(b);
+    const V t_b_2 = Mul(t_b, t_b);
+    const V t_b_3 = Mul(t_b_2, t_b);
+    const V ks = Set(df_, ks_);
+    const V max_lum = Set(df_, max_lum_);
+    return MulAdd(
+        MulAdd(Set(df_, 2), t_b_3, MulAdd(Set(df_, -3), t_b_2, Set(df_, 1))),
+        ks,
+        MulAdd(Add(t_b_3, MulAdd(Set(df_, -2), t_b_2, t_b)),
+               Sub(Set(df_, 1), ks),
+               Mul(MulAdd(Set(df_, -2), t_b_3, Mul(Set(df_, 3), t_b_2)),
+                   max_lum)));
+  }
+
+  D df_;
+  const std::pair<float, float> source_range_;
+  const std::pair<float, float> target_range_;
+  const float red_Y_;
+  const float green_Y_;
+  const float blue_Y_;
+
+  const float pq_mastering_min_ = InvEOTF(source_range_.first);
+  const float pq_mastering_max_ = InvEOTF(source_range_.second);
+  const float pq_mastering_range_ = pq_mastering_max_ - pq_mastering_min_;
+  const float inv_pq_mastering_range_ = 1.0f / pq_mastering_range_;
+  // TODO(eustas): divide instead of inverse-multiply?
+  const float min_lum_ = (InvEOTF(target_range_.first) - pq_mastering_min_) *
+                         inv_pq_mastering_range_;
+  // TODO(eustas): divide instead of inverse-multiply?
+  const float max_lum_ = (InvEOTF(target_range_.second) - pq_mastering_min_) *
+                         inv_pq_mastering_range_;
+  const float ks_ = 1.5f * max_lum_ - 0.5f;
+
+  const float inv_one_minus_ks_ = 1.0f / std::max(1e-6f, 1.0f - ks_);
+
+  const float normalizer_ = source_range_.second / target_range_.second;
+  const float inv_target_peak_ = 1.f / target_range_.second;
+};
+
+class HlgOOTF {
+ public:
+  explicit HlgOOTF(float source_luminance, float target_luminance,
+                   const float primaries_luminances[3])
+      : HlgOOTF(/*gamma=*/std::pow(
+                    1.111f, std::log2(target_luminance / source_luminance)),
+                primaries_luminances) {}
+
+  static HlgOOTF FromSceneLight(float display_luminance,
+                                const float primaries_luminances[3]) {
+    return HlgOOTF(/*gamma=*/1.2f *
+                       std::pow(1.111f, std::log2(display_luminance / 1000.f)),
+                   primaries_luminances);
+  }
+
+  static HlgOOTF ToSceneLight(float display_luminance,
+                              const float primaries_luminances[3]) {
+    return HlgOOTF(
+        /*gamma=*/(1 / 1.2f) *
+            std::pow(1.111f, -std::log2(display_luminance / 1000.f)),
+        primaries_luminances);
+  }
+
+  template <typename V>
+  void Apply(V* red, V* green, V* blue) const {
+    hwy::HWY_NAMESPACE::DFromV<V> df;
+    if (!apply_ootf_) return;
+    const V luminance =
+        MulAdd(Set(df, red_Y_), *red,
+               MulAdd(Set(df, green_Y_), *green, Mul(Set(df, blue_Y_), *blue)));
+    const V ratio =
+        Min(FastPowf(df, luminance, Set(df, exponent_)), Set(df, 1e9));
+    *red = Mul(*red, ratio);
+    *green = Mul(*green, ratio);
+    *blue = Mul(*blue, ratio);
+  }
+
+  bool WarrantsGamutMapping() const { return apply_ootf_ && exponent_ < 0; }
+
+ private:
+  explicit HlgOOTF(float gamma, const float luminances[3])
+      : exponent_(gamma - 1),
+        red_Y_(luminances[0]),
+        green_Y_(luminances[1]),
+        blue_Y_(luminances[2]) {}
+  const float exponent_;
+  const bool apply_ootf_ = exponent_ < -0.01f || 0.01f < exponent_;
+  const float red_Y_;
+  const float green_Y_;
+  const float blue_Y_;
+};
+
+template <typename V>
+void GamutMap(V* red, V* green, V* blue, const float primaries_luminances[3],
+              float preserve_saturation = 0.1f) {
+  hwy::HWY_NAMESPACE::DFromV<V> df;
+  const V luminance =
+      MulAdd(Set(df, primaries_luminances[0]), *red,
+             MulAdd(Set(df, primaries_luminances[1]), *green,
+                    Mul(Set(df, primaries_luminances[2]), *blue)));
+
+  // Desaturate out-of-gamut pixels. This is done by mixing each pixel
+  // with just enough gray of the target luminance to make all
+  // components non-negative.
+  // - For saturation preservation, if a component is still larger than
+  // 1 then the pixel is normalized to have a maximum component of 1.
+  // That will reduce its luminance.
+  // - For luminance preservation, getting all components below 1 is
+  // done by mixing in yet more gray. That will desaturate it further.
+  const V zero = Zero(df);
+  const V one = Set(df, 1);
+  V gray_mix_saturation = zero;
+  V gray_mix_luminance = zero;
+  for (const V* ch : {red, green, blue}) {
+    const V& val = *ch;
+    const V val_minus_gray = Sub(val, luminance);
+    const V inv_val_minus_gray =
+        Div(one, IfThenElse(Eq(val_minus_gray, zero), one, val_minus_gray));
+    const V val_over_val_minus_gray = Mul(val, inv_val_minus_gray);
+    gray_mix_saturation =
+        IfThenElse(Ge(val_minus_gray, zero), gray_mix_saturation,
+                   Max(gray_mix_saturation, val_over_val_minus_gray));
+    gray_mix_luminance =
+        Max(gray_mix_luminance,
+            IfThenElse(Le(val_minus_gray, zero), gray_mix_saturation,
+                       Sub(val_over_val_minus_gray, inv_val_minus_gray)));
+  }
+  const V gray_mix = Clamp(
+      MulAdd(Set(df, preserve_saturation),
+             Sub(gray_mix_saturation, gray_mix_luminance), gray_mix_luminance),
+      zero, one);
+  for (V* const ch : {red, green, blue}) {
+    V& val = *ch;
+    val = MulAdd(gray_mix, Sub(luminance, val), val);
+  }
+  const V max_clr = Max(Max(one, *red), Max(*green, *blue));
+  const V normalizer = Div(one, max_clr);
+  for (V* const ch : {red, green, blue}) {
+    V& val = *ch;
+    val = Mul(val, normalizer);
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DEC_TONE_MAPPING_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h
new file mode 100644
index 0000000000..c2267d75e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h
@@ -0,0 +1,827 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_DEC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::MulAdd;
+
+// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
+// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
+// input block.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+          size_t ROWS, size_t COLS>
+JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
+                                  float* output, const size_t output_stride,
+                                  float* JXL_RESTRICT block,
+                                  float* JXL_RESTRICT scratch_space) {
+  static_assert(LF_ROWS == ROWS,
+                "ReinterpretingDCT should only be called with LF == N");
+  static_assert(LF_COLS == COLS,
+                "ReinterpretingDCT should only be called with LF == N");
+  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
+                                 scratch_space);
+  if (ROWS < COLS) {
+    for (size_t y = 0; y < LF_ROWS; y++) {
+      for (size_t x = 0; x < LF_COLS; x++) {
+        output[y * output_stride + x] =
+            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
+            DCTTotalResampleScale<COLS, DCT_COLS>(x);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < LF_COLS; y++) {
+      for (size_t x = 0; x < LF_ROWS; x++) {
+        output[y * output_stride + x] =
+            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
+            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
+      }
+    }
+  }
+}
+
+template <size_t S>
+void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kDCTBlockSize];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * kBlockDim + x];
+      float c01 = block[y * kBlockDim + num_2x2 + x];
+      float c10 = block[(y + num_2x2) * kBlockDim + x];
+      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      temp[y * 2 * kBlockDim + x * 2] = r00;
+      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
+      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
+      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * stride_out + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
+      {
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+      },
+      {
+          0.876902929799142f,
+          0.2206518106944235f,
+          -0.10140050393753763f,
+          -0.1014005039375375f,
+          0.2206518106944236f,
+          -0.10140050393753777f,
+          -0.10140050393753772f,
+          -0.10140050393753763f,
+          -0.10140050393753758f,
+          -0.10140050393753769f,
+          -0.1014005039375375f,
+          -0.10140050393753768f,
+          -0.10140050393753768f,
+          -0.10140050393753759f,
+          -0.10140050393753763f,
+          -0.10140050393753741f,
+      },
+      {
+          0.0,
+          0.0,
+          0.40670075830260755f,
+          0.44444816619734445f,
+          0.0,
+          0.0,
+          0.19574399372042936f,
+          0.2929100136981264f,
+          -0.40670075830260716f,
+          -0.19574399372042872f,
+          0.0,
+          0.11379074460448091f,
+          -0.44444816619734384f,
+          -0.29291001369812636f,
+          -0.1137907446044814f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.21255748058288748f,
+          0.3085497062849767f,
+          0.0,
+          0.4706702258572536f,
+          -0.1621205195722993f,
+          0.0,
+          -0.21255748058287047f,
+          -0.16212051957228327f,
+          -0.47067022585725277f,
+          -0.1464291867126764f,
+          0.3085497062849487f,
+          0.0,
+          -0.14642918671266536f,
+          0.4251149611657548f,
+      },
+      {
+          0.0,
+          -0.7071067811865474f,
+          0.0,
+          0.0,
+          0.7071067811865476f,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+      },
+      {
+          -0.4105377591765233f,
+          0.6235485373547691f,
+          -0.06435071657946274f,
+          -0.06435071657946266f,
+          0.6235485373547694f,
+          -0.06435071657946284f,
+          -0.0643507165794628f,
+          -0.06435071657946274f,
+          -0.06435071657946272f,
+          -0.06435071657946279f,
+          -0.06435071657946266f,
+          -0.06435071657946277f,
+          -0.06435071657946277f,
+          -0.06435071657946273f,
+          -0.06435071657946274f,
+          -0.0643507165794626f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.4517556589999482f,
+          0.15854503551840063f,
+          0.0,
+          -0.04038515160822202f,
+          0.0074182263792423875f,
+          0.39351034269210167f,
+          -0.45175565899994635f,
+          0.007418226379244351f,
+          0.1107416575309343f,
+          0.08298163094882051f,
+          0.15854503551839705f,
+          0.3935103426921022f,
+          0.0829816309488214f,
+          -0.45175565899994796f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.304684750724869f,
+          0.5112616136591823f,
+          0.0,
+          0.0,
+          -0.290480129728998f,
+          -0.06578701549142804f,
+          0.304684750724884f,
+          0.2904801297290076f,
+          0.0,
+          -0.23889773523344604f,
+          -0.5112616136592012f,
+          0.06578701549142545f,
+          0.23889773523345467f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          0.3017929516615495f,
+          0.25792362796341184f,
+          0.0,
+          0.16272340142866204f,
+          0.09520022653475037f,
+          0.0,
+          0.3017929516615503f,
+          0.09520022653475055f,
+          -0.16272340142866173f,
+          -0.35312385449816297f,
+          0.25792362796341295f,
+          0.0,
+          -0.3531238544981624f,
+          -0.6035859033230976f,
+      },
+      {
+          0.0,
+          0.0,
+          0.40824829046386274f,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          -0.4082482904638628f,
+          -0.4082482904638635f,
+          0.0,
+          0.0,
+          -0.40824829046386296f,
+          0.0,
+          0.4082482904638634f,
+          0.408248290463863f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          0.1747866975480809f,
+          0.0812611176717539f,
+          0.0,
+          0.0,
+          -0.3675398009862027f,
+          -0.307882213957909f,
+          -0.17478669754808135f,
+          0.3675398009862011f,
+          0.0,
+          0.4826689115059883f,
+          -0.08126111767175039f,
+          0.30788221395790305f,
+          -0.48266891150598584f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.21105601049335784f,
+          0.18567180916109802f,
+          0.0,
+          0.0,
+          0.49215859013738733f,
+          -0.38525013709251915f,
+          0.21105601049335806f,
+          -0.49215859013738905f,
+          0.0,
+          0.17419412659916217f,
+          -0.18567180916109904f,
+          0.3852501370925211f,
+          -0.1741941265991621f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.14266084808807264f,
+          -0.3416446842253372f,
+          0.0,
+          0.7367497537172237f,
+          0.24627107722075148f,
+          -0.08574019035519306f,
+          -0.14266084808807344f,
+          0.24627107722075137f,
+          0.14883399227113567f,
+          -0.04768680350229251f,
+          -0.3416446842253373f,
+          -0.08574019035519267f,
+          -0.047686803502292804f,
+          -0.14266084808807242f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.13813540350758585f,
+          0.3302282550303788f,
+          0.0,
+          0.08755115000587084f,
+          -0.07946706605909573f,
+          -0.4613374887461511f,
+          -0.13813540350758294f,
+          -0.07946706605910261f,
+          0.49724647109535086f,
+          0.12538059448563663f,
+          0.3302282550303805f,
+          -0.4613374887461554f,
+          0.12538059448564315f,
+          -0.13813540350758452f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.17437602599651067f,
+          0.0702790691196284f,
+          0.0,
+          -0.2921026642334881f,
+          0.3623817333531167f,
+          0.0,
+          -0.1743760259965108f,
+          0.36238173335311646f,
+          0.29210266423348785f,
+          -0.4326608024727445f,
+          0.07027906911962818f,
+          0.0,
+          -0.4326608024727457f,
+          0.34875205199302267f,
+      },
+      {
+          0.0,
+          0.0,
+          0.11354987314994337f,
+          -0.07417504595810355f,
+          0.0,
+          0.19402893032594343f,
+          -0.435190496523228f,
+          0.21918684838857466f,
+          0.11354987314994257f,
+          -0.4351904965232251f,
+          0.5550443808910661f,
+          -0.25468277124066463f,
+          -0.07417504595810233f,
+          0.2191868483885728f,
+          -0.25468277124066413f,
+          0.1135498731499429f,
+      },
+  };
+
+  const HWY_CAPPED(float, 16) d;
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    auto pixel = Zero(d);
+    for (size_t j = 0; j < 16; j++) {
+      auto cf = Set(d, coeffs[j]);
+      auto basis = Load(d, k4x4AFVBasis[j] + i);
+      pixel = MulAdd(cf, basis, pixel);
+    }
+    Store(pixel, d, pixels + i);
+  }
+}
+
+template <size_t afv_kind>
+void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
+                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
+  HWY_ALIGN float scratch_space[4 * 8];
+  size_t afv_x = afv_kind & 1;
+  size_t afv_y = afv_kind / 2;
+  float dcs[3] = {};
+  float block00 = coefficients[0];
+  float block01 = coefficients[1];
+  float block10 = coefficients[8];
+  dcs[0] = (block00 + block10 + block01) * 4.0f;
+  dcs[1] = (block00 + block10 - block01);
+  dcs[2] = block00 - block10;
+  // IAFV: (even, even) positions.
+  HWY_ALIGN float coeff[4 * 4];
+  coeff[0] = dcs[0];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
+    }
+  }
+  HWY_ALIGN float block[4 * 8];
+  AFVIDCT4x4(coeff, block);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
+          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
+    }
+  }
+  // IDCT4x4 in (odd, even) positions.
+  block[0] = dcs[1];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
+    }
+  }
+  ComputeScaledIDCT<4, 4>()(
+      block,
+      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+            pixels_stride),
+      scratch_space);
+  // IDCT4x8.
+  block[0] = dcs[2];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
+    }
+  }
+  ComputeScaledIDCT<4, 8>()(
+      block,
+      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+      scratch_space);
+}
+
+HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy,
+                                        float* JXL_RESTRICT coefficients,
+                                        float* JXL_RESTRICT pixels,
+                                        size_t pixels_stride,
+                                        float* scratch_space) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::IDENTITY: {
+      float dcs[4] = {};
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      dcs[0] = block00 + block01 + block10 + block11;
+      dcs[1] = block00 + block01 - block10 - block11;
+      dcs[2] = block00 - block01 + block10 - block11;
+      dcs[3] = block00 - block01 - block10 + block11;
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          float block_dc = dcs[y * 2 + x];
+          float residual_sum = 0;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 0 && iy == 0) continue;
+              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
+            }
+          }
+          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
+              block_dc - residual_sum * (1.0f / 16);
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 1 && iy == 1) continue;
+              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
+                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
+                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+            }
+          }
+          pixels[y * 4 * pixels_stride + x * 4] =
+              coefficients[(y + 2) * 8 + x + 2] +
+              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+        }
+      }
+      break;
+    }
+    case Type::DCT8X4: {
+      float dcs[2] = {};
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      dcs[0] = block0 + block1;
+      dcs[1] = block0 - block1;
+      for (size_t x = 0; x < 2; x++) {
+        HWY_ALIGN float block[4 * 8];
+        block[0] = dcs[x];
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            if (ix == 0 && iy == 0) continue;
+            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
+          }
+        }
+        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
+                                  scratch_space);
+      }
+      break;
+    }
+    case Type::DCT4X8: {
+      float dcs[2] = {};
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      dcs[0] = block0 + block1;
+      dcs[1] = block0 - block1;
+      for (size_t y = 0; y < 2; y++) {
+        HWY_ALIGN float block[4 * 8];
+        block[0] = dcs[y];
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            if (ix == 0 && iy == 0) continue;
+            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
+          }
+        }
+        ComputeScaledIDCT<4, 8>()(
+            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
+            scratch_space);
+      }
+      break;
+    }
+    case Type::DCT4X4: {
+      float dcs[4] = {};
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      dcs[0] = block00 + block01 + block10 + block11;
+      dcs[1] = block00 + block01 - block10 - block11;
+      dcs[2] = block00 - block01 + block10 - block11;
+      dcs[3] = block00 - block01 - block10 + block11;
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          HWY_ALIGN float block[4 * 4];
+          block[0] = dcs[y * 2 + x];
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 0 && iy == 0) continue;
+              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
+            }
+          }
+          ComputeScaledIDCT<4, 4>()(
+              block,
+              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+              scratch_space);
+        }
+      }
+      break;
+    }
+    case Type::DCT2X2: {
+      HWY_ALIGN float coeffs[kDCTBlockSize];
+      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
+      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
+      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
+      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
+      for (size_t y = 0; y < kBlockDim; y++) {
+        for (size_t x = 0; x < kBlockDim; x++) {
+          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
+        }
+      }
+      break;
+    }
+    case Type::DCT16X16: {
+      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT16X8: {
+      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT8X16: {
+      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X8: {
+      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT8X32: {
+      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X16: {
+      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT16X32: {
+      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT32X32: {
+      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT: {
+      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+                                scratch_space);
+      break;
+    }
+    case Type::AFV0: {
+      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV1: {
+      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV2: {
+      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV3: {
+      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::DCT64X32: {
+      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT32X64: {
+      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT64X64: {
+      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT128X64: {
+      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT64X128: {
+      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT128X128: {
+      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::DCT256X128: {
+      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::DCT128X256: {
+      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::DCT256X256: {
+      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::kNumValidStrategies:
+      JXL_UNREACHABLE("Invalid strategy");
+  }
+}
+
+HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy,
+                                              const float* dc, size_t dc_stride,
+                                              float* llf,
+                                              float* JXL_RESTRICT scratch) {
+  using Type = AcStrategy::Type;
+  HWY_ALIGN float warm_block[4 * 4];
+  HWY_ALIGN float warm_scratch_space[4 * 4];
+  switch (strategy) {
+    case Type::DCT16X8: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT8X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT16X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT32X8: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT8X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT32X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT16X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT32X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
+      break;
+    }
+    case Type::DCT64X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
+      break;
+    }
+    case Type::DCT32X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
+      break;
+    }
+    case Type::DCT64X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
+      break;
+    }
+    case Type::DCT128X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
+      break;
+    }
+    case Type::DCT64X128: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
+      break;
+    }
+    case Type::DCT128X128: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
+      break;
+    }
+    case Type::DCT256X128: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
+      break;
+    }
+    case Type::DCT128X256: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
+      break;
+    }
+    case Type::DCT256X256: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
+      break;
+    }
+    case Type::DCT:
+    case Type::DCT2X2:
+    case Type::DCT4X4:
+    case Type::DCT4X8:
+    case Type::DCT8X4:
+    case Type::AFV0:
+    case Type::AFV1:
+    case Type::AFV2:
+    case Type::AFV3:
+    case Type::IDENTITY:
+      llf[0] = dc[0];
+      break;
+    case Type::kNumValidStrategies:
+      JXL_UNREACHABLE("Invalid strategy");
+  };
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc
new file mode 100644
index 0000000000..2d40740262
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc
@@ -0,0 +1,42 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_transforms_testonly.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_transforms_testonly.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformToPixels);
+void TransformToPixels(AcStrategy::Type strategy,
+                       float* JXL_RESTRICT coefficients,
+                       float* JXL_RESTRICT pixels, size_t pixels_stride,
+                       float* scratch_space) {
+  return HWY_DYNAMIC_DISPATCH(TransformToPixels)(strategy, coefficients, pixels,
+                                                 pixels_stride, scratch_space);
+}
+
+HWY_EXPORT(LowestFrequenciesFromDC);
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+                             const float* dc, size_t dc_stride, float* llf,
+                             float* JXL_RESTRICT scratch) {
+  return HWY_DYNAMIC_DISPATCH(LowestFrequenciesFromDC)(strategy, dc, dc_stride,
+                                                       llf, scratch);
+}
+
+HWY_EXPORT(AFVIDCT4x4);
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+  return HWY_DYNAMIC_DISPATCH(AFVIDCT4x4)(coeffs, pixels);
+}
+#endif  // HWY_ONCE
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h
new file mode 100644
index 0000000000..f68481fda9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+#define LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+
+// Facade for (non-inlined) inverse integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformToPixels(AcStrategy::Type strategy,
+                       float* JXL_RESTRICT coefficients,
+                       float* JXL_RESTRICT pixels, size_t pixels_stride,
+                       float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+                             const float* dc, size_t dc_stride, float* llf,
+                             float* JXL_RESTRICT scratch);
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h
new file mode 100644
index 0000000000..495693b257
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h
@@ -0,0 +1,346 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// XYB -> linear sRGB helper function.
+
+#if defined(LIB_JXL_DEC_XYB_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_XYB_INL_H_
+#undef LIB_JXL_DEC_XYB_INL_H_
+#else
+#define LIB_JXL_DEC_XYB_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_xyb.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Broadcast;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including
+// the gamma mixing and simple gamma). Avoids clamping to [0, 1] - out of (sRGB)
+// gamut values may be in-gamut after transforming to a wider space.
+// "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries
+// of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its
+// entries by c is equivalent to multiplying linear_* by c afterwards.
+template <class D, class V>
+HWY_INLINE HWY_MAYBE_UNUSED void XybToRgb(D d, const V opsin_x, const V opsin_y,
+                                          const V opsin_b,
+                                          const OpsinParams& opsin_params,
+                                          V* const HWY_RESTRICT linear_r,
+                                          V* const HWY_RESTRICT linear_g,
+                                          V* const HWY_RESTRICT linear_b) {
+#if HWY_TARGET == HWY_SCALAR
+  const auto neg_bias_r = Set(d, opsin_params.opsin_biases[0]);
+  const auto neg_bias_g = Set(d, opsin_params.opsin_biases[1]);
+  const auto neg_bias_b = Set(d, opsin_params.opsin_biases[2]);
+#else
+  const auto neg_bias_rgb = LoadDup128(d, opsin_params.opsin_biases);
+  const auto neg_bias_r = Broadcast<0>(neg_bias_rgb);
+  const auto neg_bias_g = Broadcast<1>(neg_bias_rgb);
+  const auto neg_bias_b = Broadcast<2>(neg_bias_rgb);
+#endif
+
+  // Color space: XYB -> RGB
+  auto gamma_r = Add(opsin_y, opsin_x);
+  auto gamma_g = Sub(opsin_y, opsin_x);
+  auto gamma_b = opsin_b;
+
+  gamma_r = Sub(gamma_r, Set(d, opsin_params.opsin_biases_cbrt[0]));
+  gamma_g = Sub(gamma_g, Set(d, opsin_params.opsin_biases_cbrt[1]));
+  gamma_b = Sub(gamma_b, Set(d, opsin_params.opsin_biases_cbrt[2]));
+
+  // Undo gamma compression: linear = gamma^3 for efficiency.
+  const auto gamma_r2 = Mul(gamma_r, gamma_r);
+  const auto gamma_g2 = Mul(gamma_g, gamma_g);
+  const auto gamma_b2 = Mul(gamma_b, gamma_b);
+  const auto mixed_r = MulAdd(gamma_r2, gamma_r, neg_bias_r);
+  const auto mixed_g = MulAdd(gamma_g2, gamma_g, neg_bias_g);
+  const auto mixed_b = MulAdd(gamma_b2, gamma_b, neg_bias_b);
+
+  const float* HWY_RESTRICT inverse_matrix = opsin_params.inverse_opsin_matrix;
+
+  // Unmix (multiply by 3x3 inverse_matrix)
+  // TODO(eustas): ref would be more readable than pointer
+  *linear_r = Mul(LoadDup128(d, &inverse_matrix[0 * 4]), mixed_r);
+  *linear_g = Mul(LoadDup128(d, &inverse_matrix[3 * 4]), mixed_r);
+  *linear_b = Mul(LoadDup128(d, &inverse_matrix[6 * 4]), mixed_r);
+  *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[1 * 4]), mixed_g, *linear_r);
+  *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[4 * 4]), mixed_g, *linear_g);
+  *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[7 * 4]), mixed_g, *linear_b);
+  *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[2 * 4]), mixed_b, *linear_r);
+  *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[5 * 4]), mixed_b, *linear_g);
+  *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[8 * 4]), mixed_b, *linear_b);
+}
+
+static inline HWY_MAYBE_UNUSED bool HasFastXYBTosRGB8() {
+#if HWY_TARGET == HWY_NEON
+  return true;
+#else
+  return false;
+#endif
+}
+
+static inline HWY_MAYBE_UNUSED void FastXYBTosRGB8(const float* input[4],
+                                                   uint8_t* output,
+                                                   bool is_rgba, size_t xsize) {
+  // This function is very NEON-specific. As such, it uses intrinsics directly.
+#if HWY_TARGET == HWY_NEON
+  // WARNING: doing fixed point arithmetic correctly is very complicated.
+  // Changes to this function should be thoroughly tested.
+
+  // Note that the input is assumed to have 13 bits of mantissa, and the output
+  // will have 14 bits.
+  auto srgb_tf = [&](int16x8_t v16) {
+    int16x8_t clz = vclzq_s16(v16);
+    // Convert to [0.25, 0.5) range.
+    int16x8_t v025_05_16 = vqshlq_s16(v16, vqsubq_s16(clz, vdupq_n_s16(2)));
+
+    // third degree polynomial approximation between 0.25 and 0.5
+    // of 1.055/2^(7/2.4) * x^(1/2.4) / 32.
+    // poly ~ ((0.95x-1.75)*x+1.72)*x+0.29
+    // We actually compute ~ ((0.47x-0.87)*x+0.86)*(2x)+0.29 as 1.75 and 1.72
+    // overflow our fixed point representation.
+
+    int16x8_t twov = vqaddq_s16(v025_05_16, v025_05_16);
+
+    // 0.47 * x
+    int16x8_t step1 = vqrdmulhq_n_s16(v025_05_16, 15706);
+    // - 0.87
+    int16x8_t step2 = vsubq_s16(step1, vdupq_n_s16(28546));
+    // * x
+    int16x8_t step3 = vqrdmulhq_s16(step2, v025_05_16);
+    // + 0.86
+    int16x8_t step4 = vaddq_s16(step3, vdupq_n_s16(28302));
+    // * 2x
+    int16x8_t step5 = vqrdmulhq_s16(step4, twov);
+    // + 0.29
+    int16x8_t mul16 = vaddq_s16(step5, vdupq_n_s16(9485));
+
+    int16x8_t exp16 = vsubq_s16(vdupq_n_s16(11), clz);
+    // Compute 2**(1/2.4*exp16)/32. Values of exp16 that would overflow are
+    // capped to 1.
+    // Generated with the following Python script:
+    // a = []
+    // b = []
+    //
+    // for i in range(0, 16):
+    //   v = 2**(5/12.*i)
+    //   v /= 16
+    //   v *= 256 * 128
+    //   v = int(v)
+    //   a.append(v // 256)
+    //   b.append(v % 256)
+    //
+    // print(", ".join("0x%02x" % x for x in a))
+    //
+    // print(", ".join("0x%02x" % x for x in b))
+
+    HWY_ALIGN constexpr uint8_t k2to512powersm1div32_high[16] = {
+        0x08, 0x0a, 0x0e, 0x13, 0x19, 0x21, 0x2d, 0x3c,
+        0x50, 0x6b, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f,
+    };
+    HWY_ALIGN constexpr uint8_t k2to512powersm1div32_low[16] = {
+        0x00, 0xad, 0x41, 0x06, 0x65, 0xe7, 0x41, 0x68,
+        0xa2, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    };
+    // Using the highway implementation here since vqtbl1q is aarch64-only.
+    using hwy::HWY_NAMESPACE::Vec128;
+    uint8x16_t pow_low =
+        TableLookupBytes(
+            Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_low)),
+            Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+            .raw;
+    uint8x16_t pow_high =
+        TableLookupBytes(
+            Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_high)),
+            Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+            .raw;
+    int16x8_t pow16 = vreinterpretq_s16_u16(vsliq_n_u16(
+        vreinterpretq_u16_u8(pow_low), vreinterpretq_u16_u8(pow_high), 8));
+
+    // approximation of v * 12.92, divided by 2
+    // Note that our input is using 13 mantissa bits instead of 15.
+    int16x8_t v16_linear = vrshrq_n_s16(vmulq_n_s16(v16, 826), 5);
+    // 1.055*pow(v, 1/2.4) - 0.055, divided by 2
+    auto v16_pow = vsubq_s16(vqrdmulhq_s16(mul16, pow16), vdupq_n_s16(901));
+    // > 0.0031308f (note that v16 has 13 mantissa bits)
+    return vbslq_s16(vcgeq_s16(v16, vdupq_n_s16(26)), v16_pow, v16_linear);
+  };
+
+  const float* JXL_RESTRICT row_in_x = input[0];
+  const float* JXL_RESTRICT row_in_y = input[1];
+  const float* JXL_RESTRICT row_in_b = input[2];
+  const float* JXL_RESTRICT row_in_a = input[3];
+  for (size_t x = 0; x < xsize; x += 8) {
+    // Normal ranges for xyb for in-gamut sRGB colors:
+    // x: -0.015386 0.028100
+    // y: 0.000000 0.845308
+    // b: 0.000000 0.845308
+
+    // We actually want x * 8 to have some extra precision.
+    // TODO(veluca): consider different approaches here, like vld1q_f32_x2.
+    float32x4_t opsin_x_left = vld1q_f32(row_in_x + x);
+    int16x4_t opsin_x16_times8_left =
+        vqmovn_s32(vcvtq_n_s32_f32(opsin_x_left, 18));
+    float32x4_t opsin_x_right =
+        vld1q_f32(row_in_x + x + (x + 4 < xsize ? 4 : 0));
+    int16x4_t opsin_x16_times8_right =
+        vqmovn_s32(vcvtq_n_s32_f32(opsin_x_right, 18));
+    int16x8_t opsin_x16_times8 =
+        vcombine_s16(opsin_x16_times8_left, opsin_x16_times8_right);
+
+    float32x4_t opsin_y_left = vld1q_f32(row_in_y + x);
+    int16x4_t opsin_y16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_left, 15));
+    float32x4_t opsin_y_right =
+        vld1q_f32(row_in_y + x + (x + 4 < xsize ? 4 : 0));
+    int16x4_t opsin_y16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_right, 15));
+    int16x8_t opsin_y16 = vcombine_s16(opsin_y16_left, opsin_y16_right);
+
+    float32x4_t opsin_b_left = vld1q_f32(row_in_b + x);
+    int16x4_t opsin_b16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_left, 15));
+    float32x4_t opsin_b_right =
+        vld1q_f32(row_in_b + x + (x + 4 < xsize ? 4 : 0));
+    int16x4_t opsin_b16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_right, 15));
+    int16x8_t opsin_b16 = vcombine_s16(opsin_b16_left, opsin_b16_right);
+
+    int16x8_t neg_bias16 = vdupq_n_s16(-124);        // -0.0037930732552754493
+    int16x8_t neg_bias_cbrt16 = vdupq_n_s16(-5110);  // -0.155954201
+    int16x8_t neg_bias_half16 = vdupq_n_s16(-62);
+
+    // Color space: XYB -> RGB
+    // Compute ((y+x-bias_cbrt)^3-(y-x-bias_cbrt)^3)/2,
+    // ((y+x-bias_cbrt)^3+(y-x-bias_cbrt)^3)/2+bias, (b-bias_cbrt)^3+bias.
+    // Note that ignoring x2 in the formulas below (as x << y) results in
+    // errors of at least 3 in the final sRGB values.
+    int16x8_t opsin_yp16 = vqsubq_s16(opsin_y16, neg_bias_cbrt16);
+    int16x8_t ysq16 = vqrdmulhq_s16(opsin_yp16, opsin_yp16);
+    int16x8_t twentyfourx16 = vmulq_n_s16(opsin_x16_times8, 3);
+    int16x8_t twentyfourxy16 = vqrdmulhq_s16(opsin_yp16, twentyfourx16);
+    int16x8_t threexsq16 =
+        vrshrq_n_s16(vqrdmulhq_s16(opsin_x16_times8, twentyfourx16), 6);
+
+    // We can ignore x^3 here. Note that this is multiplied by 8.
+    int16x8_t mixed_rmg16 = vqrdmulhq_s16(twentyfourxy16, opsin_yp16);
+
+    int16x8_t mixed_rpg_sos_half = vhaddq_s16(ysq16, threexsq16);
+    int16x8_t mixed_rpg16 = vhaddq_s16(
+        vqrdmulhq_s16(opsin_yp16, mixed_rpg_sos_half), neg_bias_half16);
+
+    int16x8_t gamma_b16 = vqsubq_s16(opsin_b16, neg_bias_cbrt16);
+    int16x8_t gamma_bsq16 = vqrdmulhq_s16(gamma_b16, gamma_b16);
+    int16x8_t gamma_bcb16 = vqrdmulhq_s16(gamma_bsq16, gamma_b16);
+    int16x8_t mixed_b16 = vqaddq_s16(gamma_bcb16, neg_bias16);
+    // mixed_rpg and mixed_b are in 0-1 range.
+    // mixed_rmg has a smaller range (-0.035 to 0.035 for valid sRGB). Note
+    // that at this point it is already multiplied by 8.
+
+    // We multiply all the mixed values by 1/4 (i.e. shift them to 13-bit
+    // fixed point) to ensure intermediate quantities are in range. Note that
+    // r-g is not shifted, and was x8 before here; this corresponds to a x32
+    // overall multiplicative factor and ensures that all the matrix constants
+    // are in 0-1 range.
+    // Similarly, mixed_rpg16 is already multiplied by 1/4 because of the two
+    // vhadd + using neg_bias_half.
+    mixed_b16 = vshrq_n_s16(mixed_b16, 2);
+
+    // Unmix (multiply by 3x3 inverse_matrix)
+    // For increased precision, we use a matrix for converting from
+    // ((mixed_r - mixed_g)/2, (mixed_r + mixed_g)/2, mixed_b) to rgb. This
+    // avoids cancellation effects when computing (y+x)^3-(y-x)^3.
+    // We compute mixed_rpg - mixed_b because the (1+c)*mixed_rpg - c *
+    // mixed_b pattern is repeated frequently in the code below. This allows
+    // us to save a multiply per channel, and removes the presence of
+    // some constants above 1. Moreover, mixed_rmg - mixed_b is in (-1, 1)
+    // range, so the subtraction is safe.
+    // All the magic-looking constants here are derived by computing the
+    // inverse opsin matrix for the transformation modified as described
+    // above.
+
+    // Precomputation common to multiple color values.
+    int16x8_t mixed_rpgmb16 = vqsubq_s16(mixed_rpg16, mixed_b16);
+    int16x8_t mixed_rpgmb_times_016 = vqrdmulhq_n_s16(mixed_rpgmb16, 5394);
+    int16x8_t mixed_rg16 = vqaddq_s16(mixed_rpgmb_times_016, mixed_rpg16);
+
+    // R
+    int16x8_t linear_r16 =
+        vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, 21400));
+
+    // G
+    int16x8_t linear_g16 =
+        vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, -7857));
+
+    // B
+    int16x8_t linear_b16 = vqrdmulhq_n_s16(mixed_rpgmb16, -30996);
+    linear_b16 = vqaddq_s16(linear_b16, mixed_b16);
+    linear_b16 = vqaddq_s16(linear_b16, vqrdmulhq_n_s16(mixed_rmg16, -6525));
+
+    // Apply SRGB transfer function.
+    int16x8_t r = srgb_tf(linear_r16);
+    int16x8_t g = srgb_tf(linear_g16);
+    int16x8_t b = srgb_tf(linear_b16);
+
+    uint8x8_t r8 =
+        vqmovun_s16(vrshrq_n_s16(vsubq_s16(r, vshrq_n_s16(r, 8)), 6));
+    uint8x8_t g8 =
+        vqmovun_s16(vrshrq_n_s16(vsubq_s16(g, vshrq_n_s16(g, 8)), 6));
+    uint8x8_t b8 =
+        vqmovun_s16(vrshrq_n_s16(vsubq_s16(b, vshrq_n_s16(b, 8)), 6));
+
+    size_t n = xsize - x;
+    if (is_rgba) {
+      float32x4_t a_f32_left =
+          row_in_a ? vld1q_f32(row_in_a + x) : vdupq_n_f32(1.0f);
+      float32x4_t a_f32_right =
+          row_in_a ? vld1q_f32(row_in_a + x + (x + 4 < xsize ? 4 : 0))
+                   : vdupq_n_f32(1.0f);
+      int16x4_t a16_left = vqmovn_s32(vcvtq_n_s32_f32(a_f32_left, 8));
+      int16x4_t a16_right = vqmovn_s32(vcvtq_n_s32_f32(a_f32_right, 8));
+      uint8x8_t a8 = vqmovun_s16(vcombine_s16(a16_left, a16_right));
+      uint8_t* buf = output + 4 * x;
+      uint8x8x4_t data = {r8, g8, b8, a8};
+      if (n >= 8) {
+        vst4_u8(buf, data);
+      } else {
+        uint8_t tmp[8 * 4];
+        vst4_u8(tmp, data);
+        memcpy(buf, tmp, n * 4);
+      }
+    } else {
+      uint8_t* buf = output + 3 * x;
+      uint8x8x3_t data = {r8, g8, b8};
+      if (n >= 8) {
+        vst3_u8(buf, data);
+      } else {
+        uint8_t tmp[8 * 3];
+        vst3_u8(tmp, data);
+        memcpy(buf, tmp, n * 3);
+      }
+    }
+  }
+#else
+  (void)input;
+  (void)output;
+  (void)is_rgba;
+  (void)xsize;
+  JXL_UNREACHABLE("Unreachable");
+#endif
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DEC_XYB_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc
new file mode 100644
index 0000000000..bbd373f239
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc
@@ -0,0 +1,325 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_xyb.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::MulAdd;
+
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params) {
+  JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
+
+  const size_t xsize = inout->xsize();  // not padded
+  JXL_CHECK(RunOnPool(
+      pool, 0, inout->ysize(), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /* thread */) {
+        const size_t y = task;
+
+        // Faster than adding via ByteOffset at end of loop.
+        float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
+        float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
+        float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
+
+        const HWY_FULL(float) d;
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_opsin_x = Load(d, row0 + x);
+          const auto in_opsin_y = Load(d, row1 + x);
+          const auto in_opsin_b = Load(d, row2 + x);
+          auto linear_r = Undefined(d);
+          auto linear_g = Undefined(d);
+          auto linear_b = Undefined(d);
+          XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+                   &linear_r, &linear_g, &linear_b);
+
+          Store(linear_r, d, row0 + x);
+          Store(linear_g, d, row1 + x);
+          Store(linear_b, d, row2 + x);
+        }
+      },
+      "OpsinToLinear"));
+}
+
+// Same, but not in-place.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params) {
+  JXL_ASSERT(SameSize(rect, *linear));
+  JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
+
+  JXL_CHECK(RunOnPool(
+      pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+
+        // Faster than adding via ByteOffset at end of loop.
+        const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
+        const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
+        const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
+        float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
+        float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
+        float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
+
+        const HWY_FULL(float) d;
+
+        for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
+          const auto in_opsin_x = Load(d, row_opsin_0 + x);
+          const auto in_opsin_y = Load(d, row_opsin_1 + x);
+          const auto in_opsin_b = Load(d, row_opsin_2 + x);
+          auto linear_r = Undefined(d);
+          auto linear_g = Undefined(d);
+          auto linear_b = Undefined(d);
+          XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+                   &linear_r, &linear_g, &linear_b);
+
+          Store(linear_r, d, row_linear_0 + x);
+          Store(linear_g, d, row_linear_1 + x);
+          Store(linear_b, d, row_linear_2 + x);
+        }
+      },
+      "OpsinToLinear(Rect)"));
+  JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
+}
+
+// Transform YCbCr to RGB.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+  JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
+  const HWY_CAPPED(float, kBlockDim) df;
+  const size_t S = Lanes(df);  // Step.
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  if ((xsize == 0) || (ysize == 0)) return;
+
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto c128 = Set(df, 128.0f / 255);
+  const auto crcr = Set(df, 1.402f);
+  const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+  const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+  const auto cbcb = Set(df, 1.772f);
+
+  for (size_t y = 0; y < ysize; y++) {
+    const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
+    const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
+    const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
+    float* r_row = rect.PlaneRow(rgb, 0, y);
+    float* g_row = rect.PlaneRow(rgb, 1, y);
+    float* b_row = rect.PlaneRow(rgb, 2, y);
+    for (size_t x = 0; x < xsize; x += S) {
+      const auto y_vec = Add(Load(df, y_row + x), c128);
+      const auto cb_vec = Load(df, cb_row + x);
+      const auto cr_vec = Load(df, cr_row + x);
+      const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+      const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+      const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+      Store(r_vec, df, r_row + x);
+      Store(g_vec, df, g_row + x);
+      Store(b_vec, df, b_row + x);
+    }
+  }
+  JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(OpsinToLinearInplace);
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params) {
+  return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
+}
+
+HWY_EXPORT(OpsinToLinear);
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params) {
+  return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear,
+                                             opsin_params);
+}
+
+HWY_EXPORT(YcbcrToRgb);
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+  return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
+}
+
+HWY_EXPORT(HasFastXYBTosRGB8);
+bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
+
+HWY_EXPORT(FastXYBTosRGB8);
+void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
+                    size_t xsize) {
+  return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize);
+}
+
+void OpsinParams::Init(float intensity_target) {
+  InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
+                        intensity_target);
+  memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB,
+         sizeof(kNegOpsinAbsorbanceBiasRGB));
+  memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
+  for (size_t c = 0; c < 4; c++) {
+    opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
+  }
+}
+
+bool CanOutputToColorEncoding(const ColorEncoding& c_desired) {
+  if (!c_desired.HaveFields()) {
+    return false;
+  }
+  // TODO(veluca): keep in sync with dec_reconstruct.cc
+  if (!c_desired.tf.IsPQ() && !c_desired.tf.IsSRGB() &&
+      !c_desired.tf.IsGamma() && !c_desired.tf.IsLinear() &&
+      !c_desired.tf.IsHLG() && !c_desired.tf.IsDCI() && !c_desired.tf.Is709()) {
+    return false;
+  }
+  if (c_desired.IsGray() && c_desired.white_point != WhitePoint::kD65) {
+    // TODO(veluca): figure out what should happen here.
+    return false;
+  }
+  return true;
+}
+
+Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) {
+  orig_color_encoding = metadata.m.color_encoding;
+  orig_intensity_target = metadata.m.IntensityTarget();
+  desired_intensity_target = orig_intensity_target;
+  const auto& im = metadata.transform_data.opsin_inverse_matrix;
+  memcpy(orig_inverse_matrix, im.inverse_matrix, sizeof(orig_inverse_matrix));
+  default_transform = im.all_default;
+  xyb_encoded = metadata.m.xyb_encoded;
+  std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
+            opsin_params.opsin_biases);
+  for (int i = 0; i < 3; ++i) {
+    opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
+  }
+  opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
+  std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
+            opsin_params.quant_biases);
+  bool orig_ok = CanOutputToColorEncoding(orig_color_encoding);
+  bool orig_grey = orig_color_encoding.IsGray();
+  return SetColorEncoding(!xyb_encoded || orig_ok
+                              ? orig_color_encoding
+                              : ColorEncoding::LinearSRGB(orig_grey));
+}
+
+Status OutputEncodingInfo::MaybeSetColorEncoding(
+    const ColorEncoding& c_desired) {
+  if (c_desired.GetColorSpace() == ColorSpace::kXYB &&
+      ((color_encoding.GetColorSpace() == ColorSpace::kRGB &&
+        color_encoding.primaries != Primaries::kSRGB) ||
+       color_encoding.tf.IsPQ())) {
+    return false;
+  }
+  if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) {
+    return false;
+  }
+  return SetColorEncoding(c_desired);
+}
+
+Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) {
+  color_encoding = c_desired;
+  color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired);
+
+  // Compute the opsin inverse matrix and luminances based on primaries and
+  // white point.
+  float inverse_matrix[9];
+  bool inverse_matrix_is_default = default_transform;
+  memcpy(inverse_matrix, orig_inverse_matrix, sizeof(inverse_matrix));
+  constexpr float kSRGBLuminances[3] = {0.2126, 0.7152, 0.0722};
+  memcpy(luminances, kSRGBLuminances, sizeof(luminances));
+  if ((c_desired.primaries != Primaries::kSRGB ||
+       c_desired.white_point != WhitePoint::kD65) &&
+      !c_desired.IsGray()) {
+    float srgb_to_xyzd50[9];
+    const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
+    JXL_CHECK(PrimariesToXYZD50(
+        srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y,
+        srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y,
+        srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y,
+        srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50));
+    float original_to_xyz[3][3];
+    JXL_RETURN_IF_ERROR(PrimariesToXYZ(
+        c_desired.GetPrimaries().r.x, c_desired.GetPrimaries().r.y,
+        c_desired.GetPrimaries().g.x, c_desired.GetPrimaries().g.y,
+        c_desired.GetPrimaries().b.x, c_desired.GetPrimaries().b.y,
+        c_desired.GetWhitePoint().x, c_desired.GetWhitePoint().y,
+        &original_to_xyz[0][0]));
+    memcpy(luminances, original_to_xyz[1], sizeof luminances);
+    if (xyb_encoded) {
+      float adapt_to_d50[9];
+      JXL_RETURN_IF_ERROR(AdaptToXYZD50(c_desired.GetWhitePoint().x,
+                                        c_desired.GetWhitePoint().y,
+                                        adapt_to_d50));
+      float xyzd50_to_original[9];
+      Mul3x3Matrix(adapt_to_d50, &original_to_xyz[0][0], xyzd50_to_original);
+      JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
+      float srgb_to_original[9];
+      Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original);
+      Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix);
+      inverse_matrix_is_default = false;
+    }
+  }
+
+  if (c_desired.IsGray()) {
+    float tmp_inv_matrix[9];
+    memcpy(tmp_inv_matrix, inverse_matrix, sizeof(inverse_matrix));
+    float srgb_to_luma[9];
+    memcpy(&srgb_to_luma[0], luminances, sizeof(luminances));
+    memcpy(&srgb_to_luma[3], luminances, sizeof(luminances));
+    memcpy(&srgb_to_luma[6], luminances, sizeof(luminances));
+    Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix);
+  }
+
+  // The internal XYB color space uses absolute luminance, so we scale back the
+  // opsin inverse matrix to relative luminance where 1.0 corresponds to the
+  // original intensity target, or to absolute luminance for PQ, where 1.0
+  // corresponds to 10000 nits.
+  if (xyb_encoded) {
+    float intensity_target =
+        (c_desired.tf.IsPQ() ? 10000 : orig_intensity_target);
+    InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
+                          intensity_target);
+    all_default_opsin = (std::abs(intensity_target - 255.0) <= 0.1f &&
+                         inverse_matrix_is_default);
+  }
+
+  // Set the inverse gamma based on color space transfer function.
+  inverse_gamma = (c_desired.tf.IsGamma() ? c_desired.tf.GetGamma()
+                   : c_desired.tf.IsDCI() ? 1.0f / 2.6f
+                                          : 1.0);
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h
new file mode 100644
index 0000000000..ebaae9a176
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_XYB_H_
+#define LIB_JXL_DEC_XYB_H_
+
+// XYB -> linear sRGB.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+// Parameters for XYB->sRGB conversion.
+struct OpsinParams {
+  float inverse_opsin_matrix[9 * 4];
+  float opsin_biases[4];
+  float opsin_biases_cbrt[4];
+  float quant_biases[4];
+  void Init(float intensity_target);
+};
+
+struct OutputEncodingInfo {
+  //
+  // Fields depending only on image metadata
+  //
+  ColorEncoding orig_color_encoding;
+  // Used for the HLG OOTF and PQ tone mapping.
+  float orig_intensity_target;
+  // Opsin inverse matrix taken from the metadata.
+  float orig_inverse_matrix[9];
+  bool default_transform;
+  bool xyb_encoded;
+  //
+  // Fields depending on output color encoding
+  //
+  ColorEncoding color_encoding;
+  bool color_encoding_is_original;
+  // Contains an opsin matrix that converts to the primaries of the output
+  // encoding.
+  OpsinParams opsin_params;
+  bool all_default_opsin;
+  // Used for Gamma and DCI transfer functions.
+  float inverse_gamma;
+  // Luminances of color_encoding's primaries, used for the HLG inverse OOTF and
+  // for PQ tone mapping.
+  // Default to sRGB's.
+  float luminances[3];
+  // Used for the HLG inverse OOTF and PQ tone mapping.
+  float desired_intensity_target;
+
+  Status SetFromMetadata(const CodecMetadata& metadata);
+  Status MaybeSetColorEncoding(const ColorEncoding& c_desired);
+
+ private:
+  Status SetColorEncoding(const ColorEncoding& c_desired);
+};
+
+// Converts `inout` (not padded) from opsin to linear sRGB in-place. Called from
+// per-pass postprocessing, hence parallelized.
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params);
+
+// Converts `opsin:rect` (opsin may be padded, rect.x0 must be vector-aligned)
+// to linear sRGB. Called from whole-frame encoder, hence parallelized.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params);
+
+// Bt.601 to match JPEG/JFIF. Inputs are _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect);
+
+bool HasFastXYBTosRGB8();
+void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
+                    size_t xsize);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_XYB_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/decode.cc b/third-party/libjxl/libjxl/lib/jxl/decode.cc
new file mode 100644
index 0000000000..16ddcbcb1b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/decode.cc
@@ -0,0 +1,2790 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#if JPEGXL_ENABLE_BOXES || JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/box_content_decoder.h"
+#endif
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/dec_modular.h"
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/decode_to_jpeg.h"
+#endif
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/memory_manager_internal.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/toc.h"
+
+namespace {
+
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return true;
+  if (pos < a) return true;  // overflow happened
+  return false;
+}
+
+JXL_INLINE size_t InitialBasicInfoSizeHint() {
+  // Amount of bytes before the start of the codestream in the container format,
+  // assuming that the codestream is the first box after the signature and
+  // filetype boxes. 12 bytes signature box + 20 bytes filetype box + 16 bytes
+  // codestream box length + name + optional XLBox length.
+  const size_t container_header_size = 48;
+
+  // Worst-case amount of bytes for basic info of the JPEG XL codestream header,
+  // that is all information up to and including extra_channel_bits. Up to
+  // around 2 bytes signature + 8 bytes SizeHeader + 31 bytes ColorEncoding + 4
+  // bytes rest of ImageMetadata + 5 bytes part of ImageMetadata2.
+  // TODO(lode): recompute and update this value when alpha_bits is moved to
+  // extra channels info.
+  const size_t max_codestream_basic_info_size = 50;
+
+  return container_header_size + max_codestream_basic_info_size;
+}
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_DEC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                           \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_DEC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                             \
+  (((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_DEC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+// Error caused by bad input (invalid file) rather than incorrect API usage.
+// For now there is no way to distinguish these two types of errors yet.
+#define JXL_INPUT_ERROR(format, ...) JXL_API_ERROR(format, ##__VA_ARGS__)
+
+JxlDecoderStatus ConvertStatus(JxlDecoderStatus status) { return status; }
+
+JxlDecoderStatus ConvertStatus(jxl::Status status) {
+  return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+#define JXL_API_RETURN_IF_ERROR(expr)               \
+  {                                                 \
+    JxlDecoderStatus status_ = ConvertStatus(expr); \
+    if (status_ != JXL_DEC_SUCCESS) return status_; \
+  }
+
+JxlSignature ReadSignature(const uint8_t* buf, size_t len, size_t* pos) {
+  if (*pos >= len) return JXL_SIG_NOT_ENOUGH_BYTES;
+
+  buf += *pos;
+  len -= *pos;
+
+  // JPEG XL codestream: 0xff 0x0a
+  if (len >= 1 && buf[0] == 0xff) {
+    if (len < 2) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == jxl::kCodestreamMarker) {
+      *pos += 2;
+      return JXL_SIG_CODESTREAM;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  // JPEG XL container
+  if (len >= 1 && buf[0] == 0) {
+    if (len < 12) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0xC && buf[4] == 'J' &&
+               buf[5] == 'X' && buf[6] == 'L' && buf[7] == ' ' &&
+               buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 &&
+               buf[11] == 0xA) {
+      *pos += 12;
+      return JXL_SIG_CONTAINER;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  return JXL_SIG_INVALID;
+}
+
+}  // namespace
+
+uint32_t JxlDecoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len) {
+  size_t pos = 0;
+  return ReadSignature(buf, len, &pos);
+}
+
+namespace {
+
+size_t BitsPerChannel(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+    default:
+      return 0;  // signals unhandled JxlDataType
+  }
+}
+
+template <typename T>
+uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
+                     JxlPixelFormat format) {
+  if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+    return BitsPerChannel(format.data_type);
+  } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) {
+    return metadata.bit_depth.bits_per_sample;
+  } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+    return bit_depth.bits_per_sample;
+  }
+  return 0;
+}
+
+enum class DecoderStage : uint32_t {
+  kInited,              // Decoder created, no JxlDecoderProcessInput called yet
+  kStarted,             // Running JxlDecoderProcessInput calls
+  kCodestreamFinished,  // Codestream done, but other boxes could still occur.
+                        // This stage can also occur before having seen the
+                        // entire codestream if the user didn't subscribe to any
+                        // codestream events at all, e.g. only to box events,
+                        // or, the user only subscribed to basic info, and only
+                        // the header of the codestream was parsed.
+  kError,               // Error occurred, decoder object no longer usable
+};
+
+enum class FrameStage : uint32_t {
+  kHeader,  // Must parse frame header.
+  kTOC,     // Must parse TOC
+  kFull,    // Must parse full pixels
+};
+
+enum class BoxStage : uint32_t {
+  kHeader,      // Parsing box header of the next box, or start of non-container
+                // stream
+  kFtyp,        // The ftyp box
+  kSkip,        // Box whose contents are skipped
+  kCodestream,  // Handling codestream box contents, or non-container stream
+  kPartialCodestream,  // Handling the extra header of partial codestream box
+  kJpegRecon,          // Handling jpeg reconstruction box
+};
+
+enum class JpegReconStage : uint32_t {
+  kNone,             // Not outputting
+  kSettingMetadata,  // Ready to output, must set metadata to the jpeg_data
+  kOutputting,       // Currently outputting the JPEG bytes
+  kFinished,         // JPEG reconstruction fully handled
+};
+
+/*
+Given list of frame references to storage slots, and storage slots in which this
+frame is saved, computes which frames are required to decode the frame at the
+given index and any frames after it. The frames on which this depends are
+returned as a vector of their indices, in no particular order. The given index
+must be smaller than saved_as.size(), and references.size() must equal
+saved_as.size(). Any frames beyond saved_as and references are considered
+unknown future frames and must be treated as if something depends on them.
+*/
+std::vector<size_t> GetFrameDependencies(size_t index,
+                                         const std::vector<int>& saved_as,
+                                         const std::vector<int>& references) {
+  JXL_ASSERT(references.size() == saved_as.size());
+  JXL_ASSERT(index < references.size());
+
+  std::vector<size_t> result;
+
+  constexpr size_t kNumStorage = 8;
+
+  // value which indicates nothing is stored in this storage slot
+  const size_t invalid = references.size();
+  // for each of the 8 storage slots, a vector that translates frame index to
+  // frame stored in this storage slot at this point, that is, the last
+  // frame that was stored in this slot before or at this index.
+  std::array<std::vector<size_t>, kNumStorage> storage;
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    storage[s].resize(saved_as.size());
+    int mask = 1 << s;
+    size_t id = invalid;
+    for (size_t i = 0; i < saved_as.size(); ++i) {
+      if (saved_as[i] & mask) {
+        id = i;
+      }
+      storage[s][i] = id;
+    }
+  }
+
+  std::vector<char> seen(index + 1, 0);
+  std::vector<size_t> stack;
+  stack.push_back(index);
+  seen[index] = 1;
+
+  // For frames after index, assume they can depend on any of the 8 storage
+  // slots, so push the frame for each stored reference to the stack and result.
+  // All frames after index are treated as having unknown references and with
+  // the possibility that there are more frames after the last known.
+  // TODO(lode): take values of saved_as and references after index, and a
+  // input flag indicating if they are all frames of the image, to further
+  // optimize this.
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    size_t frame_ref = storage[s][index];
+    if (frame_ref == invalid) continue;
+    if (seen[frame_ref]) continue;
+    stack.push_back(frame_ref);
+    seen[frame_ref] = 1;
+    result.push_back(frame_ref);
+  }
+
+  while (!stack.empty()) {
+    size_t frame_index = stack.back();
+    stack.pop_back();
+    if (frame_index == 0) continue;  // first frame cannot have references
+    for (size_t s = 0; s < kNumStorage; ++s) {
+      int mask = 1 << s;
+      if (!(references[frame_index] & mask)) continue;
+      size_t frame_ref = storage[s][frame_index - 1];
+      if (frame_ref == invalid) continue;
+      if (seen[frame_ref]) continue;
+      stack.push_back(frame_ref);
+      seen[frame_ref] = 1;
+      result.push_back(frame_ref);
+    }
+  }
+
+  return result;
+}
+
+// Parameters for user-requested extra channel output.
+struct ExtraChannelOutput {
+  JxlPixelFormat format;
+  void* buffer;
+  size_t buffer_size;
+};
+
+}  // namespace
+
+namespace jxl {
+
+typedef struct JxlDecoderFrameIndexBoxEntryStruct {
+  // OFFi: offset of start byte of this frame compared to start
+  // byte of previous frame from this index in the JPEG XL codestream. For the
+  // first frame, this is the offset from the first byte of the JPEG XL
+  // codestream.
+  uint64_t OFFi;
+  // Ti: duration in ticks between the start of this frame and
+  // the start of the next frame in the index. If this is the last frame in the
+  // index, this is the duration in ticks between the start of this frame and
+  // the end of the stream. A tick lasts TNUM / TDEN seconds.
+  uint32_t Ti;
+  // Fi: amount of frames the next frame in the index occurs
+  // after this frame. If this is the last frame in the index, this is the
+  // amount of frames after this frame in the remainder of the stream. Only
+  // frames that are presented by the decoder are counted for this purpose, this
+  // excludes frames that are not intended for display but for compositing with
+  // other frames, such as frames that aren't the last frame with a duration of
+  // 0 ticks.
+  uint32_t Fi;
+} JxlDecoderFrameIndexBoxEntry;
+
+typedef struct JxlDecoderFrameIndexBoxStruct {
+  int64_t NF() const { return entries.size(); }
+  int32_t TNUM = 1;
+  int32_t TDEN = 1000;
+
+  std::vector<JxlDecoderFrameIndexBoxEntry> entries;
+
+  // That way we can ensure that every index box will have the first frame.
+  // If the API user decides to mark it as an indexed frame, we call
+  // the AddFrame again, this time with requested.
+  void AddFrame(uint64_t OFFi, uint32_t Ti, uint32_t Fi) {
+    JxlDecoderFrameIndexBoxEntry e;
+    e.OFFi = OFFi;
+    e.Ti = Ti;
+    e.Fi = Fi;
+    entries.push_back(e);
+  }
+} JxlDecoderFrameIndexBox;
+
+}  // namespace jxl
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct JxlDecoderStruct {
+  JxlDecoderStruct() = default;
+
+  JxlMemoryManager memory_manager;
+  std::unique_ptr<jxl::ThreadPool> thread_pool;
+
+  DecoderStage stage;
+
+  // Status of progression, internal.
+  bool got_signature;
+  // Indicates we know that we've seen the last codestream box: either this
+  // was a jxlc box, or a jxlp box that has its index indicated as last by
+  // having its most significant bit set, or no boxes are used at all. This
+  // does not indicate the full codestream has already been seen, only the
+  // last box of it has been initiated.
+  bool last_codestream_seen;
+  bool got_codestream_signature;
+  bool got_basic_info;
+  bool got_transform_data;  // To skip everything before ICC.
+  bool got_all_headers;     // Codestream metadata headers.
+  bool post_headers;        // Already decoding pixels.
+  jxl::ICCReader icc_reader;
+  jxl::JxlDecoderFrameIndexBox frame_index_box;
+  // This means either we actually got the preview image, or determined we
+  // cannot get it or there is none.
+  bool got_preview_image;
+  bool preview_frame;
+
+  // Position of next_in in the original file including box format if present
+  // (as opposed to position in the codestream)
+  size_t file_pos;
+
+  size_t box_contents_begin;
+  size_t box_contents_end;
+  size_t box_contents_size;
+  size_t box_size;
+  size_t header_size;
+  // Either a final box that runs until EOF, or the case of no container format
+  // at all.
+  bool box_contents_unbounded;
+
+  JxlBoxType box_type;
+  JxlBoxType box_decoded_type;  // Underlying type for brob boxes
+  // Set to true right after a JXL_DEC_BOX event only.
+  bool box_event;
+  bool decompress_boxes;
+
+  bool box_out_buffer_set;
+  // Whether the out buffer is set for the current box, if the user did not yet
+  // release the buffer while the next box is encountered, this will be set to
+  // false. If this is false, no JXL_DEC_NEED_MORE_INPUT is emitted
+  // (irrespective of the value of box_out_buffer_set), because not setting
+  // output indicates the user does not wish the data of this box.
+  bool box_out_buffer_set_current_box;
+  uint8_t* box_out_buffer;
+  size_t box_out_buffer_size;
+  // which byte of the full box content the start of the out buffer points to
+  size_t box_out_buffer_begin;
+  // which byte of box_out_buffer to write to next
+  size_t box_out_buffer_pos;
+
+  // Settings
+  bool keep_orientation;
+  bool unpremul_alpha;
+  bool render_spotcolors;
+  bool coalescing;
+  float desired_intensity_target;
+
+  // Bitfield, for which informative events (JXL_DEC_BASIC_INFO, etc...) the
+  // decoder returns a status. By default, do not return for any of the events,
+  // only return when the decoder cannot continue because it needs more input or
+  // output data.
+  int events_wanted;
+  int orig_events_wanted;
+
+  // Fields for reading the basic info from the header.
+  size_t basic_info_size_hint;
+  bool have_container;
+  size_t box_count;
+
+  // The level of progressive detail in frame decoding.
+  JxlProgressiveDetail prog_detail = kDC;
+  // The progressive detail of the current frame.
+  JxlProgressiveDetail frame_prog_detail;
+  // The intended downsampling ratio for the current progression step.
+  size_t downsampling_target;
+
+  // Set to true if either an image out buffer or an image out callback was set.
+  bool image_out_buffer_set;
+
+  // Owned by the caller, buffer for preview or full resolution image.
+  void* image_out_buffer;
+  JxlImageOutInitCallback image_out_init_callback;
+  JxlImageOutRunCallback image_out_run_callback;
+  JxlImageOutDestroyCallback image_out_destroy_callback;
+  void* image_out_init_opaque;
+  struct SimpleImageOutCallback {
+    JxlImageOutCallback callback;
+    void* opaque;
+  };
+  SimpleImageOutCallback simple_image_out_callback;
+
+  size_t image_out_size;
+
+  JxlPixelFormat image_out_format;
+  JxlBitDepth image_out_bit_depth;
+
+  // For extra channels. Empty if no extra channels are requested, and they are
+  // reset each frame
+  std::vector<ExtraChannelOutput> extra_channel_output;
+
+  jxl::CodecMetadata metadata;
+  // Same as metadata.m, except for the color_encoding, which is set to the
+  // output encoding.
+  jxl::ImageMetadata image_metadata;
+  std::unique_ptr<jxl::ImageBundle> ib;
+
+  std::unique_ptr<jxl::PassesDecoderState> passes_state;
+  std::unique_ptr<jxl::FrameDecoder> frame_dec;
+  size_t next_section;
+  std::vector<char> section_processed;
+
+  // headers and TOC for the current frame. When got_toc is true, this is
+  // always the frame header of the last frame of the current still series,
+  // that is, the displayed frame.
+  std::unique_ptr<jxl::FrameHeader> frame_header;
+
+  size_t remaining_frame_size;
+  FrameStage frame_stage;
+  bool dc_frame_progression_done;
+  // The currently processed frame is the last of the current composite still,
+  // and so must be returned as pixels
+  bool is_last_of_still;
+  // The currently processed frame is the last of the codestream
+  bool is_last_total;
+  // How many frames to skip.
+  size_t skip_frames;
+  // Skipping the current frame. May be false if skip_frames was just set to
+  // a positive value while already processing a current frame, then
+  // skipping_frame will be enabled only for the next frame.
+  bool skipping_frame;
+
+  // Amount of internal frames and external frames started. External frames are
+  // user-visible frames, internal frames includes all external frames and
+  // also invisible frames such as patches, blending-only and dc_level frames.
+  size_t internal_frames;
+  size_t external_frames;
+
+  // For each internal frame, which storage locations it references, and which
+  // storage locations it is stored in, using the bit mask as defined in
+  // FrameDecoder::References and FrameDecoder::SaveAs.
+  std::vector<int> frame_references;
+  std::vector<int> frame_saved_as;
+
+  // Translates external frame index to internal frame index. The external
+  // index is the index of user-visible frames. The internal index can be larger
+  // since non-visible frames (such as frames with patches, ...) are included.
+  std::vector<size_t> frame_external_to_internal;
+
+  // Whether the frame with internal index is required to decode the frame
+  // being skipped to or any frames after that. If no skipping is active,
+  // this vector is ignored. If the current internal frame index is beyond this
+  // vector, it must be treated as a required frame.
+  std::vector<char> frame_required;
+
+  // Codestream input data is copied here temporarily when the decoder needs
+  // more input bytes to process the next part of the stream. We copy the input
+  // data in order to be able to release it all through the API it when
+  // returning JXL_DEC_NEED_MORE_INPUT.
+  std::vector<uint8_t> codestream_copy;
+  // Number of bytes at the end of codestream_copy that were not yet consumed
+  // by calling AdvanceInput().
+  size_t codestream_unconsumed;
+  // Position in the codestream_copy vector that the decoder already finished
+  // processing. It can be greater than the current size of codestream_copy in
+  // case where the decoder skips some parts of the frame that were not yet
+  // provided.
+  size_t codestream_pos;
+  // Number of bits after codestream_pos that were already processed.
+  size_t codestream_bits_ahead;
+
+  BoxStage box_stage;
+
+#if JPEGXL_ENABLE_BOXES
+  jxl::JxlBoxContentDecoder box_content_decoder;
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  jxl::JxlToJpegDecoder jpeg_decoder;
+  // Decodes Exif or XMP metadata for JPEG reconstruction
+  jxl::JxlBoxContentDecoder metadata_decoder;
+  std::vector<uint8_t> exif_metadata;
+  std::vector<uint8_t> xmp_metadata;
+  // must store JPEG reconstruction metadata from the current box
+  // 0 = not stored, 1 = currently storing, 2 = finished
+  int store_exif;
+  int store_xmp;
+  size_t recon_out_buffer_pos;
+  size_t recon_exif_size;  // Expected exif size as read from the jbrd box
+  size_t recon_xmp_size;   // Expected exif size as read from the jbrd box
+  JpegReconStage recon_output_jpeg;
+
+  bool JbrdNeedMoreBoxes() const {
+    // jbrd box wants exif but exif box not yet seen
+    if (store_exif < 2 && recon_exif_size > 0) return true;
+    // jbrd box wants xmp but xmp box not yet seen
+    if (store_xmp < 2 && recon_xmp_size > 0) return true;
+    return false;
+  }
+#endif
+
+  const uint8_t* next_in;
+  size_t avail_in;
+  bool input_closed;
+
+  void AdvanceInput(size_t size) {
+    JXL_DASSERT(avail_in >= size);
+    next_in += size;
+    avail_in -= size;
+    file_pos += size;
+  }
+
+  size_t AvailableCodestream() const {
+    size_t avail_codestream = avail_in;
+    if (!box_contents_unbounded) {
+      avail_codestream =
+          std::min<size_t>(avail_codestream, box_contents_end - file_pos);
+    }
+    return avail_codestream;
+  }
+
+  void AdvanceCodestream(size_t size) {
+    size_t avail_codestream = AvailableCodestream();
+    if (codestream_copy.empty()) {
+      if (size <= avail_codestream) {
+        AdvanceInput(size);
+      } else {
+        codestream_pos = size - avail_codestream;
+        AdvanceInput(avail_codestream);
+      }
+    } else {
+      codestream_pos += size;
+      if (codestream_pos + codestream_unconsumed >= codestream_copy.size()) {
+        size_t advance = std::min(
+            codestream_unconsumed,
+            codestream_unconsumed + codestream_pos - codestream_copy.size());
+        AdvanceInput(advance);
+        codestream_pos -= std::min(codestream_pos, codestream_copy.size());
+        codestream_unconsumed = 0;
+        codestream_copy.clear();
+      }
+    }
+  }
+
+  JxlDecoderStatus RequestMoreInput() {
+    if (codestream_copy.empty()) {
+      size_t avail_codestream = AvailableCodestream();
+      codestream_copy.insert(codestream_copy.end(), next_in,
+                             next_in + avail_codestream);
+      AdvanceInput(avail_codestream);
+    } else {
+      AdvanceInput(codestream_unconsumed);
+      codestream_unconsumed = 0;
+    }
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+
+  JxlDecoderStatus GetCodestreamInput(jxl::Span<const uint8_t>* span) {
+    if (codestream_copy.empty() && codestream_pos > 0) {
+      size_t avail_codestream = AvailableCodestream();
+      size_t skip = std::min<size_t>(codestream_pos, avail_codestream);
+      AdvanceInput(skip);
+      codestream_pos -= skip;
+      if (codestream_pos > 0) {
+        return RequestMoreInput();
+      }
+    }
+    JXL_ASSERT(codestream_pos <= codestream_copy.size());
+    JXL_ASSERT(codestream_unconsumed <= codestream_copy.size());
+    size_t avail_codestream = AvailableCodestream();
+    if (codestream_copy.empty()) {
+      if (avail_codestream == 0) {
+        return RequestMoreInput();
+      }
+      *span = jxl::Span<const uint8_t>(next_in, avail_codestream);
+      return JXL_DEC_SUCCESS;
+    } else {
+      codestream_copy.insert(codestream_copy.end(),
+                             next_in + codestream_unconsumed,
+                             next_in + avail_codestream);
+      codestream_unconsumed = avail_codestream;
+      *span = jxl::Span<const uint8_t>(codestream_copy.data() + codestream_pos,
+                                       codestream_copy.size() - codestream_pos);
+      return JXL_DEC_SUCCESS;
+    }
+  }
+
+  // Whether the decoder can use more codestream input for a purpose it needs.
+  // This returns false if the user didn't subscribe to any events that
+  // require the codestream (e.g. only subscribed to metadata boxes), or all
+  // parts of the codestream that are subscribed to (e.g. only basic info) have
+  // already occurred.
+  bool CanUseMoreCodestreamInput() const {
+    // The decoder can set this to finished early if all relevant events were
+    // processed, so this check works.
+    return stage != DecoderStage::kCodestreamFinished;
+  }
+
+  // If set then some operations will fail, if those would require
+  // allocating large objects. Actual memory usage might be two orders of
+  // magnitude bigger.
+  // TODO(eustas): remove once there is working API for memory / CPU limit.
+  size_t memory_limit_base = 0;
+  size_t cpu_limit_base = 0;
+  size_t used_cpu_base = 0;
+};
+
+namespace {
+
+bool CheckSizeLimit(JxlDecoder* dec, size_t xsize, size_t ysize) {
+  if (!dec->memory_limit_base) return true;
+  if (xsize == 0 || ysize == 0) return true;
+  if (xsize >= dec->memory_limit_base || ysize >= dec->memory_limit_base) {
+    return false;
+  }
+  // Rough estimate of real row length.
+  xsize = jxl::DivCeil(xsize, 32) * 32;
+  size_t num_pixels = xsize * ysize;
+  if (num_pixels / xsize != ysize) return false;  // overflow
+  if (num_pixels > dec->memory_limit_base) return false;
+  return true;
+}
+
+}  // namespace
+
+// Resets the state that must be reset for both Rewind and Reset
+void JxlDecoderRewindDecodingState(JxlDecoder* dec) {
+  dec->stage = DecoderStage::kInited;
+  dec->got_signature = false;
+  dec->last_codestream_seen = false;
+  dec->got_codestream_signature = false;
+  dec->got_basic_info = false;
+  dec->got_transform_data = false;
+  dec->got_all_headers = false;
+  dec->post_headers = false;
+  dec->icc_reader.Reset();
+  dec->got_preview_image = false;
+  dec->preview_frame = false;
+  dec->file_pos = 0;
+  dec->box_contents_begin = 0;
+  dec->box_contents_end = 0;
+  dec->box_contents_size = 0;
+  dec->box_size = 0;
+  dec->header_size = 0;
+  dec->box_contents_unbounded = false;
+  memset(dec->box_type, 0, sizeof(dec->box_type));
+  memset(dec->box_decoded_type, 0, sizeof(dec->box_decoded_type));
+  dec->box_event = false;
+  dec->box_stage = BoxStage::kHeader;
+  dec->box_out_buffer_set = false;
+  dec->box_out_buffer_set_current_box = false;
+  dec->box_out_buffer = nullptr;
+  dec->box_out_buffer_size = 0;
+  dec->box_out_buffer_begin = 0;
+  dec->box_out_buffer_pos = 0;
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  dec->exif_metadata.clear();
+  dec->xmp_metadata.clear();
+  dec->store_exif = 0;
+  dec->store_xmp = 0;
+  dec->recon_out_buffer_pos = 0;
+  dec->recon_exif_size = 0;
+  dec->recon_xmp_size = 0;
+  dec->recon_output_jpeg = JpegReconStage::kNone;
+#endif
+
+  dec->events_wanted = dec->orig_events_wanted;
+  dec->basic_info_size_hint = InitialBasicInfoSizeHint();
+  dec->have_container = 0;
+  dec->box_count = 0;
+  dec->downsampling_target = 8;
+  dec->image_out_buffer_set = false;
+  dec->image_out_buffer = nullptr;
+  dec->image_out_init_callback = nullptr;
+  dec->image_out_run_callback = nullptr;
+  dec->image_out_destroy_callback = nullptr;
+  dec->image_out_init_opaque = nullptr;
+  dec->image_out_size = 0;
+  dec->image_out_bit_depth.type = JXL_BIT_DEPTH_FROM_PIXEL_FORMAT;
+  dec->extra_channel_output.clear();
+  dec->next_in = 0;
+  dec->avail_in = 0;
+  dec->input_closed = false;
+
+  dec->passes_state.reset(nullptr);
+  dec->frame_dec.reset(nullptr);
+  dec->next_section = 0;
+  dec->section_processed.clear();
+
+  dec->ib.reset();
+  dec->metadata = jxl::CodecMetadata();
+  dec->image_metadata = dec->metadata.m;
+  dec->frame_header.reset(new jxl::FrameHeader(&dec->metadata));
+
+  dec->codestream_copy.clear();
+  dec->codestream_unconsumed = 0;
+  dec->codestream_pos = 0;
+  dec->codestream_bits_ahead = 0;
+
+  dec->frame_stage = FrameStage::kHeader;
+  dec->remaining_frame_size = 0;
+  dec->is_last_of_still = false;
+  dec->is_last_total = false;
+  dec->skip_frames = 0;
+  dec->skipping_frame = false;
+  dec->internal_frames = 0;
+  dec->external_frames = 0;
+}
+
+void JxlDecoderReset(JxlDecoder* dec) {
+  JxlDecoderRewindDecodingState(dec);
+
+  dec->thread_pool.reset();
+  dec->keep_orientation = false;
+  dec->unpremul_alpha = false;
+  dec->render_spotcolors = true;
+  dec->coalescing = true;
+  dec->desired_intensity_target = 0;
+  dec->orig_events_wanted = 0;
+  dec->events_wanted = 0;
+  dec->frame_references.clear();
+  dec->frame_saved_as.clear();
+  dec->frame_external_to_internal.clear();
+  dec->frame_required.clear();
+  dec->decompress_boxes = false;
+}
+
+JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlDecoder));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  JxlDecoder* dec = new (alloc) JxlDecoder();
+  dec->memory_manager = local_memory_manager;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  if (!memory_manager) {
+    dec->memory_limit_base = 53 << 16;
+    // Allow 5 x max_image_size processing units; every frame is accounted
+    // as W x H CPU processing units, so there could be numerous small frames
+    // or few larger ones.
+    dec->cpu_limit_base = 5 * dec->memory_limit_base;
+  }
+#endif
+
+  JxlDecoderReset(dec);
+
+  return dec;
+}
+
+void JxlDecoderDestroy(JxlDecoder* dec) {
+  if (dec) {
+    JxlMemoryManager local_memory_manager = dec->memory_manager;
+    // Call destructor directly since custom free function is used.
+    dec->~JxlDecoder();
+    jxl::MemoryManagerFree(&local_memory_manager, dec);
+  }
+}
+
+void JxlDecoderRewind(JxlDecoder* dec) { JxlDecoderRewindDecodingState(dec); }
+
+void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount) {
+  // Increment amount, rather than set it: making the amount smaller is
+  // impossible because the decoder may already have skipped frames required to
+  // decode earlier frames, and making the amount larger compared to an existing
+  // amount is impossible because if JxlDecoderSkipFrames is called in the
+  // middle of already skipping frames, the user cannot know how many frames
+  // have already been skipped internally so far so an absolute value cannot
+  // be defined.
+  dec->skip_frames += amount;
+
+  dec->frame_required.clear();
+  size_t next_frame = dec->external_frames + dec->skip_frames;
+
+  // A frame that has been seen before a rewind
+  if (next_frame < dec->frame_external_to_internal.size()) {
+    size_t internal_index = dec->frame_external_to_internal[next_frame];
+    if (internal_index < dec->frame_saved_as.size()) {
+      std::vector<size_t> deps = GetFrameDependencies(
+          internal_index, dec->frame_saved_as, dec->frame_references);
+
+      dec->frame_required.resize(internal_index + 1, 0);
+      for (size_t i = 0; i < deps.size(); i++) {
+        JXL_ASSERT(deps[i] < dec->frame_required.size());
+        dec->frame_required[deps[i]] = 1;
+      }
+    }
+  }
+}
+
+JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec) {
+  if (dec->frame_stage != FrameStage::kFull) {
+    return JXL_API_ERROR("JxlDecoderSkipCurrentFrame called at the wrong time");
+  }
+  JXL_DASSERT(dec->frame_dec);
+  dec->frame_stage = FrameStage::kHeader;
+  dec->AdvanceCodestream(dec->remaining_frame_size);
+  if (dec->is_last_of_still) {
+    dec->image_out_buffer_set = false;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR(
+        "JxlDecoderSetParallelRunner must be called before starting");
+  }
+  dec->thread_pool.reset(
+      new jxl::ThreadPool(parallel_runner, parallel_runner_opaque));
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec) {
+  if (dec->got_basic_info) return 0;
+  return dec->basic_info_size_hint;
+}
+
+JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, int events_wanted) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_DEC_ERROR;  // Cannot subscribe to events after having started.
+  }
+  if (events_wanted & 63) {
+    return JXL_DEC_ERROR;  // Can only subscribe to informative events.
+  }
+  dec->events_wanted = events_wanted;
+  dec->orig_events_wanted = events_wanted;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetKeepOrientation(JxlDecoder* dec,
+                                              JXL_BOOL skip_reorientation) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set keep_orientation option before starting");
+  }
+  dec->keep_orientation = !!skip_reorientation;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec,
+                                                 JXL_BOOL unpremul_alpha) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set unpremul_alpha option before starting");
+  }
+  dec->unpremul_alpha = !!unpremul_alpha;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetRenderSpotcolors(JxlDecoder* dec,
+                                               JXL_BOOL render_spotcolors) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set render_spotcolors option before starting");
+  }
+  dec->render_spotcolors = !!render_spotcolors;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec, JXL_BOOL coalescing) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set coalescing option before starting");
+  }
+  dec->coalescing = !!coalescing;
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+// helper function to get the dimensions of the current image buffer
+void GetCurrentDimensions(const JxlDecoder* dec, size_t& xsize, size_t& ysize) {
+  if (dec->frame_header->nonserialized_is_preview) {
+    xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+    ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+    return;
+  }
+  xsize = dec->metadata.oriented_xsize(dec->keep_orientation);
+  ysize = dec->metadata.oriented_ysize(dec->keep_orientation);
+  if (!dec->coalescing) {
+    const auto frame_dim = dec->frame_header->ToFrameDimensions();
+    xsize = frame_dim.xsize_upsampled;
+    ysize = frame_dim.ysize_upsampled;
+    if (!dec->keep_orientation &&
+        static_cast<int>(dec->metadata.m.GetOrientation()) > 4) {
+      std::swap(xsize, ysize);
+    }
+  }
+}
+}  // namespace
+
+namespace jxl {
+namespace {
+
+template <class T>
+bool CanRead(Span<const uint8_t> data, BitReader* reader, T* JXL_RESTRICT t) {
+  // Use a copy of the bit reader because CanRead advances bits.
+  BitReader reader2(data);
+  reader2.SkipBits(reader->TotalBitsConsumed());
+  bool result = Bundle::CanRead(&reader2, t);
+  JXL_ASSERT(reader2.Close());
+  return result;
+}
+
+// Returns JXL_DEC_SUCCESS if the full bundle was successfully read, status
+// indicating either error or need more input otherwise.
+template <class T>
+JxlDecoderStatus ReadBundle(JxlDecoder* dec, Span<const uint8_t> data,
+                            BitReader* reader, T* JXL_RESTRICT t) {
+  if (!CanRead(data, reader, t)) {
+    return dec->RequestMoreInput();
+  }
+  if (!Bundle::Read(reader, t)) {
+    return JXL_DEC_ERROR;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+std::unique_ptr<BitReader, std::function<void(BitReader*)>> GetBitReader(
+    Span<const uint8_t> span) {
+  BitReader* reader = new BitReader(span);
+  return std::unique_ptr<BitReader, std::function<void(BitReader*)>>(
+      reader, [](BitReader* reader) {
+        // We can't allow Close to abort the program if the reader is out of
+        // bounds, or all return paths in the code, even those that already
+        // return failure, would have to manually call AllReadsWithinBounds().
+        // Invalid JXL codestream should not cause program to quit.
+        (void)reader->AllReadsWithinBounds();
+        (void)reader->Close();
+        delete reader;
+      });
+}
+
+JxlDecoderStatus JxlDecoderReadBasicInfo(JxlDecoder* dec) {
+  if (!dec->got_codestream_signature) {
+    // Check and skip the codestream signature
+    Span<const uint8_t> span;
+    JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+    if (span.size() < 2) {
+      return dec->RequestMoreInput();
+    }
+    if (span.data()[0] != 0xff || span.data()[1] != jxl::kCodestreamMarker) {
+      return JXL_INPUT_ERROR("invalid signature");
+    }
+    dec->got_codestream_signature = true;
+    dec->AdvanceCodestream(2);
+  }
+
+  Span<const uint8_t> span;
+  JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+  auto reader = GetBitReader(span);
+  JXL_API_RETURN_IF_ERROR(
+      ReadBundle(dec, span, reader.get(), &dec->metadata.size));
+  JXL_API_RETURN_IF_ERROR(
+      ReadBundle(dec, span, reader.get(), &dec->metadata.m));
+  size_t total_bits = reader->TotalBitsConsumed();
+  dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte);
+  dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte;
+  dec->got_basic_info = true;
+  dec->basic_info_size_hint = 0;
+  dec->image_metadata = dec->metadata.m;
+  JXL_DEBUG_V(2, "Decoded BasicInfo: %s", dec->metadata.DebugString().c_str());
+
+  if (!CheckSizeLimit(dec, dec->metadata.size.xsize(),
+                      dec->metadata.size.ysize())) {
+    return JXL_INPUT_ERROR("image is too large");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+// Reads all codestream headers (but not frame headers)
+JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec) {
+  if (!dec->got_transform_data) {
+    Span<const uint8_t> span;
+    JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+    auto reader = GetBitReader(span);
+    reader->SkipBits(dec->codestream_bits_ahead);
+    dec->metadata.transform_data.nonserialized_xyb_encoded =
+        dec->metadata.m.xyb_encoded;
+    JXL_API_RETURN_IF_ERROR(
+        ReadBundle(dec, span, reader.get(), &dec->metadata.transform_data));
+    size_t total_bits = reader->TotalBitsConsumed();
+    dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte);
+    dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte;
+    dec->got_transform_data = true;
+  }
+
+  Span<const uint8_t> span;
+  JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+  auto reader = GetBitReader(span);
+  reader->SkipBits(dec->codestream_bits_ahead);
+
+  if (dec->metadata.m.color_encoding.WantICC()) {
+    jxl::Status status =
+        dec->icc_reader.Init(reader.get(), dec->memory_limit_base);
+    // Always check AllReadsWithinBounds, not all the C++ decoder implementation
+    // handles reader out of bounds correctly  yet (e.g. context map). Not
+    // checking AllReadsWithinBounds can cause reader->Close() to trigger an
+    // assert, but we don't want library to quit program for invalid codestream.
+    if (!reader->AllReadsWithinBounds() ||
+        status.code() == StatusCode::kNotEnoughBytes) {
+      return dec->RequestMoreInput();
+    }
+    if (!status) {
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    PaddedBytes icc;
+    status = dec->icc_reader.Process(reader.get(), &icc);
+    if (status.code() == StatusCode::kNotEnoughBytes) {
+      return dec->RequestMoreInput();
+    }
+    if (!status) {
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  dec->got_all_headers = true;
+  JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  dec->AdvanceCodestream(reader->TotalBitsConsumed() / jxl::kBitsPerByte);
+  dec->codestream_bits_ahead = 0;
+
+  if (!dec->passes_state) {
+    dec->passes_state.reset(new jxl::PassesDecoderState());
+  }
+
+  JXL_API_RETURN_IF_ERROR(
+      dec->passes_state->output_encoding_info.SetFromMetadata(dec->metadata));
+  if (dec->desired_intensity_target > 0) {
+    dec->passes_state->output_encoding_info.desired_intensity_target =
+        dec->desired_intensity_target;
+  }
+  dec->image_metadata = dec->metadata.m;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderProcessSections(JxlDecoder* dec) {
+  Span<const uint8_t> span;
+  JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+  const auto& toc = dec->frame_dec->Toc();
+  size_t pos = 0;
+  std::vector<jxl::FrameDecoder::SectionInfo> section_info;
+  std::vector<jxl::FrameDecoder::SectionStatus> section_status;
+  for (size_t i = dec->next_section; i < toc.size(); ++i) {
+    if (dec->section_processed[i]) {
+      pos += toc[i].size;
+      continue;
+    }
+    size_t id = toc[i].id;
+    size_t size = toc[i].size;
+    if (OutOfBounds(pos, size, span.size())) {
+      break;
+    }
+    auto br =
+        new jxl::BitReader(jxl::Span<const uint8_t>(span.data() + pos, size));
+    section_info.emplace_back(jxl::FrameDecoder::SectionInfo{br, id, i});
+    section_status.emplace_back();
+    pos += size;
+  }
+  jxl::Status status = dec->frame_dec->ProcessSections(
+      section_info.data(), section_info.size(), section_status.data());
+  bool out_of_bounds = false;
+  for (const auto& info : section_info) {
+    if (!info.br->AllReadsWithinBounds()) {
+      // Mark out of bounds section, but keep closing and deleting the next
+      // ones as well.
+      out_of_bounds = true;
+    }
+    JXL_ASSERT(info.br->Close());
+    delete info.br;
+  }
+  if (out_of_bounds) {
+    // If any bit reader indicates out of bounds, it's an error, not just
+    // needing more input, since we ensure only bit readers containing
+    // a complete section are provided to the FrameDecoder.
+    return JXL_INPUT_ERROR("frame out of bounds");
+  }
+  if (!status) {
+    return JXL_INPUT_ERROR("frame processing failed");
+  }
+  for (size_t i = 0; i < section_status.size(); ++i) {
+    auto status = section_status[i];
+    if (status == jxl::FrameDecoder::kDone) {
+      dec->section_processed[section_info[i].index] = 1;
+    } else if (status != jxl::FrameDecoder::kSkipped) {
+      return JXL_INPUT_ERROR("unexpected section status");
+    }
+  }
+  size_t completed_prefix_bytes = 0;
+  while (dec->next_section < dec->section_processed.size() &&
+         dec->section_processed[dec->next_section] == 1) {
+    completed_prefix_bytes += toc[dec->next_section].size;
+    ++dec->next_section;
+  }
+  dec->remaining_frame_size -= completed_prefix_bytes;
+  dec->AdvanceCodestream(completed_prefix_bytes);
+  return JXL_DEC_SUCCESS;
+}
+
+// TODO(eustas): no CodecInOut -> no image size reinforcement -> possible OOM.
+JxlDecoderStatus JxlDecoderProcessCodestream(JxlDecoder* dec) {
+  // If no parallel runner is set, use the default
+  // TODO(lode): move this initialization to an appropriate location once the
+  // runner is used to decode pixels.
+  if (!dec->thread_pool) {
+    dec->thread_pool.reset(new jxl::ThreadPool(nullptr, nullptr));
+  }
+
+  // No matter what events are wanted, the basic info is always required.
+  if (!dec->got_basic_info) {
+    JxlDecoderStatus status = JxlDecoderReadBasicInfo(dec);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_BASIC_INFO) {
+    dec->events_wanted &= ~JXL_DEC_BASIC_INFO;
+    return JXL_DEC_BASIC_INFO;
+  }
+
+  if (!dec->events_wanted) {
+    dec->stage = DecoderStage::kCodestreamFinished;
+    return JXL_DEC_SUCCESS;
+  }
+
+  if (!dec->got_all_headers) {
+    JxlDecoderStatus status = JxlDecoderReadAllHeaders(dec);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_COLOR_ENCODING) {
+    dec->events_wanted &= ~JXL_DEC_COLOR_ENCODING;
+    return JXL_DEC_COLOR_ENCODING;
+  }
+
+  if (!dec->events_wanted) {
+    dec->stage = DecoderStage::kCodestreamFinished;
+    return JXL_DEC_SUCCESS;
+  }
+
+  dec->post_headers = true;
+
+  if (!dec->got_preview_image && dec->metadata.m.have_preview) {
+    dec->preview_frame = true;
+  }
+
+  // Handle frames
+  for (;;) {
+    bool parse_frames =
+        (dec->events_wanted &
+         (JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+    if (!parse_frames) {
+      break;
+    }
+    if (dec->frame_stage == FrameStage::kHeader && dec->is_last_total) {
+      break;
+    }
+    if (dec->frame_stage == FrameStage::kHeader) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata ||
+          dec->recon_output_jpeg == JpegReconStage::kOutputting) {
+        // The image bundle contains the JPEG reconstruction frame, but the
+        // decoder is still waiting to decode an EXIF or XMP box. It's not
+        // implemented to decode additional frames during this, and a JPEG
+        // reconstruction image should have only one frame.
+        return JXL_API_ERROR(
+            "cannot decode a next frame after JPEG reconstruction frame");
+      }
+#endif
+      if (!dec->ib) {
+        dec->ib.reset(new jxl::ImageBundle(&dec->image_metadata));
+      }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      // If JPEG reconstruction is wanted and possible, set the jpeg_data of
+      // the ImageBundle.
+      if (!dec->jpeg_decoder.SetImageBundleJpegData(dec->ib.get()))
+        return JXL_DEC_ERROR;
+#endif
+      dec->frame_dec.reset(new FrameDecoder(
+          dec->passes_state.get(), dec->metadata, dec->thread_pool.get(),
+          /*use_slow_rendering_pipeline=*/false));
+      dec->frame_header.reset(new FrameHeader(&dec->metadata));
+      Span<const uint8_t> span;
+      JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+      auto reader = GetBitReader(span);
+      jxl::Status status = dec->frame_dec->InitFrame(
+          reader.get(), dec->ib.get(), dec->preview_frame);
+      if (!reader->AllReadsWithinBounds() ||
+          status.code() == StatusCode::kNotEnoughBytes) {
+        return dec->RequestMoreInput();
+      } else if (!status) {
+        return JXL_INPUT_ERROR("invalid frame header");
+      }
+      dec->AdvanceCodestream(reader->TotalBitsConsumed() / kBitsPerByte);
+      *dec->frame_header = dec->frame_dec->GetFrameHeader();
+      jxl::FrameDimensions frame_dim = dec->frame_header->ToFrameDimensions();
+      if (!CheckSizeLimit(dec, frame_dim.xsize_upsampled_padded,
+                          frame_dim.ysize_upsampled_padded)) {
+        return JXL_INPUT_ERROR("frame is too large");
+      }
+      bool output_needed =
+          (dec->preview_frame ? (dec->events_wanted & JXL_DEC_PREVIEW_IMAGE)
+                              : (dec->events_wanted & JXL_DEC_FULL_IMAGE));
+      if (output_needed) {
+        JXL_API_RETURN_IF_ERROR(dec->frame_dec->InitFrameOutput());
+      }
+      if (dec->cpu_limit_base != 0) {
+        // No overflow, checked in CheckSizeLimit.
+        size_t num_pixels = frame_dim.xsize * frame_dim.ysize;
+        if (dec->used_cpu_base + num_pixels < dec->used_cpu_base) {
+          return JXL_INPUT_ERROR("image too large");
+        }
+        dec->used_cpu_base += num_pixels;
+        if (dec->used_cpu_base > dec->cpu_limit_base) {
+          return JXL_INPUT_ERROR("image too large");
+        }
+      }
+      dec->remaining_frame_size = dec->frame_dec->SumSectionSizes();
+
+      dec->frame_stage = FrameStage::kTOC;
+      if (dec->preview_frame) {
+        if (!(dec->events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+          dec->frame_stage = FrameStage::kHeader;
+          dec->AdvanceCodestream(dec->remaining_frame_size);
+          dec->got_preview_image = true;
+          dec->preview_frame = false;
+        }
+        continue;
+      }
+
+      int saved_as = FrameDecoder::SavedAs(*dec->frame_header);
+      // is last in entire codestream
+      dec->is_last_total = dec->frame_header->is_last;
+      // is last of current still
+      dec->is_last_of_still =
+          dec->is_last_total || dec->frame_header->animation_frame.duration > 0;
+      // is kRegularFrame and coalescing is disabled
+      dec->is_last_of_still |=
+          (!dec->coalescing &&
+           dec->frame_header->frame_type == FrameType::kRegularFrame);
+      const size_t internal_frame_index = dec->internal_frames;
+      const size_t external_frame_index = dec->external_frames;
+      if (dec->is_last_of_still) dec->external_frames++;
+      dec->internal_frames++;
+
+      if (dec->skip_frames > 0) {
+        dec->skipping_frame = true;
+        if (dec->is_last_of_still) {
+          dec->skip_frames--;
+        }
+      } else {
+        dec->skipping_frame = false;
+      }
+
+      if (external_frame_index >= dec->frame_external_to_internal.size()) {
+        dec->frame_external_to_internal.push_back(internal_frame_index);
+        JXL_ASSERT(dec->frame_external_to_internal.size() ==
+                   external_frame_index + 1);
+      }
+
+      if (internal_frame_index >= dec->frame_saved_as.size()) {
+        dec->frame_saved_as.push_back(saved_as);
+        JXL_ASSERT(dec->frame_saved_as.size() == internal_frame_index + 1);
+
+        // add the value 0xff (which means all references) to new slots: we only
+        // know the references of the frame at FinalizeFrame, and fill in the
+        // correct values there. As long as this information is not known, the
+        // worst case where the frame depends on all storage slots is assumed.
+        dec->frame_references.push_back(0xff);
+        JXL_ASSERT(dec->frame_references.size() == internal_frame_index + 1);
+      }
+
+      if (dec->skipping_frame) {
+        // Whether this frame could be referenced by any future frame: either
+        // because it's a frame saved for blending or patches, or because it's
+        // a DC frame.
+        bool referenceable =
+            dec->frame_header->CanBeReferenced() ||
+            dec->frame_header->frame_type == FrameType::kDCFrame;
+        if (internal_frame_index < dec->frame_required.size() &&
+            !dec->frame_required[internal_frame_index]) {
+          referenceable = false;
+        }
+        if (!referenceable) {
+          // Skip all decoding for this frame, since the user is skipping this
+          // frame and no future frames can reference it.
+          dec->frame_stage = FrameStage::kHeader;
+          dec->AdvanceCodestream(dec->remaining_frame_size);
+          continue;
+        }
+      }
+
+      if ((dec->events_wanted & JXL_DEC_FRAME) && dec->is_last_of_still) {
+        // Only return this for the last of a series of stills: patches frames
+        // etc... before this one do not contain the correct information such
+        // as animation timing, ...
+        if (!dec->skipping_frame) {
+          return JXL_DEC_FRAME;
+        }
+      }
+    }
+
+    if (dec->frame_stage == FrameStage::kTOC) {
+      dec->frame_dec->SetRenderSpotcolors(dec->render_spotcolors);
+      dec->frame_dec->SetCoalescing(dec->coalescing);
+
+      if (!dec->preview_frame &&
+          (dec->events_wanted & JXL_DEC_FRAME_PROGRESSION)) {
+        dec->frame_prog_detail =
+            dec->frame_dec->SetPauseAtProgressive(dec->prog_detail);
+      } else {
+        dec->frame_prog_detail = JxlProgressiveDetail::kFrames;
+      }
+      dec->dc_frame_progression_done = 0;
+
+      dec->next_section = 0;
+      dec->section_processed.clear();
+      dec->section_processed.resize(dec->frame_dec->Toc().size(), 0);
+
+      // If we don't need pixels, we can skip actually decoding the frames.
+      if (dec->preview_frame || (dec->events_wanted & JXL_DEC_FULL_IMAGE)) {
+        dec->frame_stage = FrameStage::kFull;
+      } else if (!dec->is_last_total) {
+        dec->frame_stage = FrameStage::kHeader;
+        dec->AdvanceCodestream(dec->remaining_frame_size);
+        continue;
+      } else {
+        break;
+      }
+    }
+
+    if (dec->frame_stage == FrameStage::kFull) {
+      if (!dec->image_out_buffer_set) {
+        if (dec->preview_frame) {
+          return JXL_DEC_NEED_PREVIEW_OUT_BUFFER;
+        }
+        if (
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+            (!dec->jpeg_decoder.IsOutputSet() ||
+             dec->ib->jpeg_data == nullptr) &&
+#endif
+            dec->is_last_of_still && !dec->skipping_frame) {
+          // TODO(lode): remove the dec->is_last_of_still condition if the
+          // frame decoder needs the image buffer as working space for decoding
+          // non-visible or blending frames too
+          return JXL_DEC_NEED_IMAGE_OUT_BUFFER;
+        }
+      }
+
+      if (dec->image_out_buffer_set) {
+        size_t xsize, ysize;
+        GetCurrentDimensions(dec, xsize, ysize);
+        size_t bits_per_sample = GetBitDepth(
+            dec->image_out_bit_depth, dec->metadata.m, dec->image_out_format);
+        dec->frame_dec->SetImageOutput(
+            PixelCallback{
+                dec->image_out_init_callback, dec->image_out_run_callback,
+                dec->image_out_destroy_callback, dec->image_out_init_opaque},
+            reinterpret_cast<uint8_t*>(dec->image_out_buffer),
+            dec->image_out_size, xsize, ysize, dec->image_out_format,
+            bits_per_sample, dec->unpremul_alpha, !dec->keep_orientation);
+        for (size_t i = 0; i < dec->extra_channel_output.size(); ++i) {
+          const auto& extra = dec->extra_channel_output[i];
+          size_t ec_bits_per_sample =
+              GetBitDepth(dec->image_out_bit_depth,
+                          dec->metadata.m.extra_channel_info[i], extra.format);
+          dec->frame_dec->AddExtraChannelOutput(extra.buffer, extra.buffer_size,
+                                                xsize, extra.format,
+                                                ec_bits_per_sample);
+        }
+      }
+
+      size_t next_num_passes_to_pause = dec->frame_dec->NextNumPassesToPause();
+
+      JXL_API_RETURN_IF_ERROR(JxlDecoderProcessSections(dec));
+
+      bool all_sections_done = dec->frame_dec->HasDecodedAll();
+      bool got_dc_only = !all_sections_done && dec->frame_dec->HasDecodedDC();
+
+      if (dec->frame_prog_detail >= JxlProgressiveDetail::kDC &&
+          !dec->dc_frame_progression_done && got_dc_only) {
+        dec->dc_frame_progression_done = true;
+        dec->downsampling_target = 8;
+        return JXL_DEC_FRAME_PROGRESSION;
+      }
+
+      bool new_progression_step_done =
+          dec->frame_dec->NumCompletePasses() >= next_num_passes_to_pause;
+
+      if (!all_sections_done &&
+          dec->frame_prog_detail >= JxlProgressiveDetail::kLastPasses &&
+          new_progression_step_done) {
+        dec->downsampling_target =
+            dec->frame_header->passes.GetDownsamplingTargetForCompletedPasses(
+                dec->frame_dec->NumCompletePasses());
+        return JXL_DEC_FRAME_PROGRESSION;
+      }
+
+      if (!all_sections_done) {
+        // Not all sections have been processed yet
+        return dec->RequestMoreInput();
+      }
+
+      if (!dec->preview_frame) {
+        size_t internal_index = dec->internal_frames - 1;
+        JXL_ASSERT(dec->frame_references.size() > internal_index);
+        // Always fill this in, even if it was already written, it could be that
+        // this frame was skipped before and set to 255, while only now we know
+        // the true value.
+        dec->frame_references[internal_index] = dec->frame_dec->References();
+      }
+
+      if (!dec->frame_dec->FinalizeFrame()) {
+        return JXL_INPUT_ERROR("decoding frame failed");
+      }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      // If jpeg output was requested, we merely return the JXL_DEC_FULL_IMAGE
+      // status without outputting pixels.
+      if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) {
+        dec->frame_stage = FrameStage::kHeader;
+        dec->recon_output_jpeg = JpegReconStage::kSettingMetadata;
+        return JXL_DEC_FULL_IMAGE;
+      }
+#endif
+      if (dec->preview_frame || dec->is_last_of_still) {
+        dec->image_out_buffer_set = false;
+        dec->extra_channel_output.clear();
+      }
+    }
+
+    dec->frame_stage = FrameStage::kHeader;
+
+    // The pixels have been output or are not needed, do not keep them in
+    // memory here.
+    dec->ib.reset();
+    if (dec->preview_frame) {
+      dec->got_preview_image = true;
+      dec->preview_frame = false;
+      dec->events_wanted &= ~JXL_DEC_PREVIEW_IMAGE;
+      return JXL_DEC_PREVIEW_IMAGE;
+    } else if (dec->is_last_of_still &&
+               (dec->events_wanted & JXL_DEC_FULL_IMAGE) &&
+               !dec->skipping_frame) {
+      return JXL_DEC_FULL_IMAGE;
+    }
+  }
+
+  dec->stage = DecoderStage::kCodestreamFinished;
+  // Return success, this means there is nothing more to do.
+  return JXL_DEC_SUCCESS;
+}
+
+}  // namespace
+}  // namespace jxl
+
+JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, const uint8_t* data,
+                                    size_t size) {
+  if (dec->next_in) {
+    return JXL_API_ERROR("already set input, use JxlDecoderReleaseInput first");
+  }
+  if (dec->input_closed) {
+    return JXL_API_ERROR("input already closed");
+  }
+
+  dec->next_in = data;
+  dec->avail_in = size;
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseInput(JxlDecoder* dec) {
+  size_t result = dec->avail_in;
+  dec->next_in = nullptr;
+  dec->avail_in = 0;
+  return result;
+}
+
+void JxlDecoderCloseInput(JxlDecoder* dec) { dec->input_closed = true; }
+
+JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, uint8_t* data,
+                                         size_t size) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  // JPEG reconstruction buffer can only set and updated before or during the
+  // first frame, the reconstruction box refers to the first frame and in
+  // theory multi-frame images should not be used with a jbrd box.
+  if (dec->internal_frames > 1) {
+    return JXL_API_ERROR("JPEG reconstruction only works for the first frame");
+  }
+  if (dec->jpeg_decoder.IsOutputSet()) {
+    return JXL_API_ERROR("Already set JPEG buffer");
+  }
+  return dec->jpeg_decoder.SetOutputBuffer(data, size);
+#else
+  return JXL_API_ERROR("JPEG reconstruction is not supported.");
+#endif
+}
+
+size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  return dec->jpeg_decoder.ReleaseOutputBuffer();
+#else
+  return JXL_API_ERROR("JPEG reconstruction is not supported.");
+#endif
+}
+
+// Parses the header of the box, outputting the 4-character type and the box
+// size, including header size, as stored in the box header.
+// @param in current input bytes.
+// @param size available input size.
+// @param pos position in the input, must begin at the header of the box.
+// @param file_pos position of pos since the start of the JXL file, rather than
+// the current input, used for integer overflow checking.
+// @param type the output box type.
+// @param box_size output the total box size, including header, in bytes, or 0
+// if it's a final unbounded box.
+// @param header_size output size of the box header.
+// @return JXL_DEC_SUCCESS if the box header was fully parsed. In that case the
+// parsing position must be incremented by header_size bytes.
+// JXL_DEC_NEED_MORE_INPUT if not enough input bytes available, in that case
+// header_size indicates a lower bound for the known size the header has to be
+// at least. JXL_DEC_ERROR if the box header is invalid.
+static JxlDecoderStatus ParseBoxHeader(const uint8_t* in, size_t size,
+                                       size_t pos, size_t file_pos,
+                                       JxlBoxType type, uint64_t* box_size,
+                                       uint64_t* header_size) {
+  if (OutOfBounds(pos, 8, size)) {
+    *header_size = 8;
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  size_t box_start = pos;
+  // Box size, including this header itself.
+  *box_size = LoadBE32(in + pos);
+  pos += 4;
+  if (*box_size == 1) {
+    *header_size = 16;
+    if (OutOfBounds(pos, 12, size)) return JXL_DEC_NEED_MORE_INPUT;
+    *box_size = LoadBE64(in + pos);
+    pos += 8;
+  }
+  memcpy(type, in + pos, 4);
+  pos += 4;
+  *header_size = pos - box_start;
+  if (*box_size > 0 && *box_size < *header_size) {
+    return JXL_INPUT_ERROR("invalid box size");
+  }
+  if (file_pos + *box_size < file_pos) {
+    return JXL_INPUT_ERROR("Box size overflow");
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+// This includes handling the codestream if it is not a box-based jxl file.
+static JxlDecoderStatus HandleBoxes(JxlDecoder* dec) {
+  // Box handling loop
+  for (;;) {
+    if (dec->box_stage != BoxStage::kHeader) {
+      dec->AdvanceInput(dec->header_size);
+      dec->header_size = 0;
+#if JPEGXL_ENABLE_BOXES
+      if ((dec->events_wanted & JXL_DEC_BOX) &&
+          dec->box_out_buffer_set_current_box) {
+        uint8_t* next_out = dec->box_out_buffer + dec->box_out_buffer_pos;
+        size_t avail_out = dec->box_out_buffer_size - dec->box_out_buffer_pos;
+
+        JxlDecoderStatus box_result = dec->box_content_decoder.Process(
+            dec->next_in, dec->avail_in,
+            dec->file_pos - dec->box_contents_begin, &next_out, &avail_out);
+        size_t produced =
+            next_out - (dec->box_out_buffer + dec->box_out_buffer_pos);
+        dec->box_out_buffer_pos += produced;
+
+        // Don't return JXL_DEC_NEED_MORE_INPUT: the box stages below, instead,
+        // handle the input progression, and the above only outputs the part of
+        // the box seen so far.
+        if (box_result != JXL_DEC_SUCCESS &&
+            box_result != JXL_DEC_NEED_MORE_INPUT) {
+          return box_result;
+        }
+      }
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      if (dec->store_exif == 1 || dec->store_xmp == 1) {
+        std::vector<uint8_t>& metadata =
+            (dec->store_exif == 1) ? dec->exif_metadata : dec->xmp_metadata;
+        for (;;) {
+          if (metadata.empty()) metadata.resize(64);
+          uint8_t* orig_next_out = metadata.data() + dec->recon_out_buffer_pos;
+          uint8_t* next_out = orig_next_out;
+          size_t avail_out = metadata.size() - dec->recon_out_buffer_pos;
+          JxlDecoderStatus box_result = dec->metadata_decoder.Process(
+              dec->next_in, dec->avail_in,
+              dec->file_pos - dec->box_contents_begin, &next_out, &avail_out);
+          size_t produced = next_out - orig_next_out;
+          dec->recon_out_buffer_pos += produced;
+          if (box_result == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+            metadata.resize(metadata.size() * 2);
+          } else if (box_result == JXL_DEC_NEED_MORE_INPUT) {
+            break;  // box stage handling below will handle this instead
+          } else if (box_result == JXL_DEC_SUCCESS) {
+            size_t needed_size = (dec->store_exif == 1) ? dec->recon_exif_size
+                                                        : dec->recon_xmp_size;
+            if (dec->box_contents_unbounded &&
+                dec->recon_out_buffer_pos < needed_size) {
+              // Unbounded box, but we know the expected size due to the jbrd
+              // box's data. Treat this as the JXL_DEC_NEED_MORE_INPUT case.
+              break;
+            } else {
+              metadata.resize(dec->recon_out_buffer_pos);
+              if (dec->store_exif == 1) dec->store_exif = 2;
+              if (dec->store_xmp == 1) dec->store_xmp = 2;
+              break;
+            }
+          } else {
+            // error
+            return box_result;
+          }
+        }
+      }
+#endif
+    }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+    if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata &&
+        !dec->JbrdNeedMoreBoxes()) {
+      jxl::jpeg::JPEGData* jpeg_data = dec->ib->jpeg_data.get();
+      if (dec->recon_exif_size) {
+        JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetExif(
+            dec->exif_metadata.data(), dec->exif_metadata.size(), jpeg_data);
+        if (status != JXL_DEC_SUCCESS) return status;
+      }
+      if (dec->recon_xmp_size) {
+        JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetXmp(
+            dec->xmp_metadata.data(), dec->xmp_metadata.size(), jpeg_data);
+        if (status != JXL_DEC_SUCCESS) return status;
+      }
+      dec->recon_output_jpeg = JpegReconStage::kOutputting;
+    }
+
+    if (dec->recon_output_jpeg == JpegReconStage::kOutputting &&
+        !dec->JbrdNeedMoreBoxes()) {
+      JxlDecoderStatus status =
+          dec->jpeg_decoder.WriteOutput(*dec->ib->jpeg_data);
+      if (status != JXL_DEC_SUCCESS) return status;
+      dec->recon_output_jpeg = JpegReconStage::kFinished;
+      dec->ib.reset();
+      if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+        // Return the full image event here now, this may be delayed if this
+        // could only be done after decoding an exif or xmp box after the
+        // codestream.
+        return JXL_DEC_FULL_IMAGE;
+      }
+    }
+#endif
+
+    if (dec->box_stage == BoxStage::kHeader) {
+      if (!dec->have_container) {
+        if (dec->stage == DecoderStage::kCodestreamFinished)
+          return JXL_DEC_SUCCESS;
+        dec->box_stage = BoxStage::kCodestream;
+        dec->box_contents_unbounded = true;
+        continue;
+      }
+      if (dec->avail_in == 0) {
+        if (dec->stage != DecoderStage::kCodestreamFinished) {
+          // Not yet seen (all) codestream boxes.
+          return JXL_DEC_NEED_MORE_INPUT;
+        }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+        if (dec->JbrdNeedMoreBoxes()) {
+          return JXL_DEC_NEED_MORE_INPUT;
+        }
+#endif
+        if (dec->input_closed) {
+          return JXL_DEC_SUCCESS;
+        }
+        if (!(dec->events_wanted & JXL_DEC_BOX)) {
+          // All codestream and jbrd metadata boxes finished, and no individual
+          // boxes requested by user, so no need to request any more input.
+          // This returns success for backwards compatibility, when
+          // JxlDecoderCloseInput and JXL_DEC_BOX did not exist, as well
+          // as for efficiency.
+          return JXL_DEC_SUCCESS;
+        }
+        // Even though we are exactly at a box end, there still may be more
+        // boxes. The user may call JxlDecoderCloseInput to indicate the input
+        // is finished and get success instead.
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+
+      bool boxed_codestream_done =
+          ((dec->events_wanted & JXL_DEC_BOX) &&
+           dec->stage == DecoderStage::kCodestreamFinished &&
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+           !dec->JbrdNeedMoreBoxes() &&
+#endif
+           dec->last_codestream_seen);
+      if (boxed_codestream_done && dec->avail_in >= 2 &&
+          dec->next_in[0] == 0xff &&
+          dec->next_in[1] == jxl::kCodestreamMarker) {
+        // We detected the start of the next naked codestream, so we can return
+        // success here.
+        return JXL_DEC_SUCCESS;
+      }
+
+      uint64_t box_size, header_size;
+      JxlDecoderStatus status =
+          ParseBoxHeader(dec->next_in, dec->avail_in, 0, dec->file_pos,
+                         dec->box_type, &box_size, &header_size);
+      if (status != JXL_DEC_SUCCESS) {
+        if (status == JXL_DEC_NEED_MORE_INPUT) {
+          dec->basic_info_size_hint =
+              InitialBasicInfoSizeHint() + header_size - dec->file_pos;
+        }
+        return status;
+      }
+      if (memcmp(dec->box_type, "brob", 4) == 0) {
+        if (dec->avail_in < header_size + 4) {
+          return JXL_DEC_NEED_MORE_INPUT;
+        }
+        memcpy(dec->box_decoded_type, dec->next_in + header_size,
+               sizeof(dec->box_decoded_type));
+      } else {
+        memcpy(dec->box_decoded_type, dec->box_type,
+               sizeof(dec->box_decoded_type));
+      }
+
+      // Box order validity checks
+      // The signature box at box_count == 1 is not checked here since that's
+      // already done at the beginning.
+      dec->box_count++;
+      if (boxed_codestream_done && memcmp(dec->box_type, "JXL ", 4) == 0) {
+        // We detected the start of the next boxed stream, so we can return
+        // success here.
+        return JXL_DEC_SUCCESS;
+      }
+      if (dec->box_count == 2 && memcmp(dec->box_type, "ftyp", 4) != 0) {
+        return JXL_INPUT_ERROR("the second box must be the ftyp box");
+      }
+      if (memcmp(dec->box_type, "ftyp", 4) == 0 && dec->box_count != 2) {
+        return JXL_INPUT_ERROR("the ftyp box must come second");
+      }
+
+      dec->box_contents_unbounded = (box_size == 0);
+      dec->box_contents_begin = dec->file_pos + header_size;
+      dec->box_contents_end =
+          dec->box_contents_unbounded ? 0 : (dec->file_pos + box_size);
+      dec->box_contents_size =
+          dec->box_contents_unbounded ? 0 : (box_size - header_size);
+      dec->box_size = box_size;
+      dec->header_size = header_size;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      if (dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) {
+        // Initiate storing of Exif or XMP data for JPEG reconstruction
+        if (dec->store_exif == 0 &&
+            memcmp(dec->box_decoded_type, "Exif", 4) == 0) {
+          dec->store_exif = 1;
+          dec->recon_out_buffer_pos = 0;
+        }
+        if (dec->store_xmp == 0 &&
+            memcmp(dec->box_decoded_type, "xml ", 4) == 0) {
+          dec->store_xmp = 1;
+          dec->recon_out_buffer_pos = 0;
+        }
+      }
+#endif
+#if JPEGXL_ENABLE_BOXES
+      if (dec->events_wanted & JXL_DEC_BOX) {
+        bool decompress =
+            dec->decompress_boxes && memcmp(dec->box_type, "brob", 4) == 0;
+        dec->box_content_decoder.StartBox(
+            decompress, dec->box_contents_unbounded, dec->box_contents_size);
+      }
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      if (dec->store_exif == 1 || dec->store_xmp == 1) {
+        bool brob = memcmp(dec->box_type, "brob", 4) == 0;
+        dec->metadata_decoder.StartBox(brob, dec->box_contents_unbounded,
+                                       dec->box_contents_size);
+      }
+#endif
+      if (memcmp(dec->box_type, "ftyp", 4) == 0) {
+        dec->box_stage = BoxStage::kFtyp;
+      } else if (memcmp(dec->box_type, "jxlc", 4) == 0) {
+        if (dec->last_codestream_seen) {
+          return JXL_INPUT_ERROR("there can only be one jxlc box");
+        }
+        dec->last_codestream_seen = true;
+        dec->box_stage = BoxStage::kCodestream;
+      } else if (memcmp(dec->box_type, "jxlp", 4) == 0) {
+        dec->box_stage = BoxStage::kPartialCodestream;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      } else if ((dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) &&
+                 memcmp(dec->box_type, "jbrd", 4) == 0) {
+        if (!(dec->events_wanted & JXL_DEC_JPEG_RECONSTRUCTION)) {
+          return JXL_INPUT_ERROR(
+              "multiple JPEG reconstruction boxes not supported");
+        }
+        dec->box_stage = BoxStage::kJpegRecon;
+#endif
+      } else {
+        dec->box_stage = BoxStage::kSkip;
+      }
+
+      if (dec->events_wanted & JXL_DEC_BOX) {
+        dec->box_event = true;
+        dec->box_out_buffer_set_current_box = false;
+        return JXL_DEC_BOX;
+      }
+    } else if (dec->box_stage == BoxStage::kFtyp) {
+      if (dec->box_contents_size < 12) {
+        return JXL_INPUT_ERROR("file type box too small");
+      }
+      if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+      if (memcmp(dec->next_in, "jxl ", 4) != 0) {
+        return JXL_INPUT_ERROR("file type box major brand must be \"jxl \"");
+      }
+      dec->AdvanceInput(4);
+      dec->box_stage = BoxStage::kSkip;
+    } else if (dec->box_stage == BoxStage::kPartialCodestream) {
+      if (dec->last_codestream_seen) {
+        return JXL_INPUT_ERROR("cannot have jxlp box after last jxlp box");
+      }
+      // TODO(lode): error if box is unbounded but last bit not set
+      if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+      if (!dec->box_contents_unbounded && dec->box_contents_size < 4) {
+        return JXL_INPUT_ERROR("jxlp box too small to contain index");
+      }
+      size_t jxlp_index = LoadBE32(dec->next_in);
+      // The high bit of jxlp_index indicates whether this is the last
+      // jxlp box.
+      if (jxlp_index & 0x80000000) {
+        dec->last_codestream_seen = true;
+      }
+      dec->AdvanceInput(4);
+      dec->box_stage = BoxStage::kCodestream;
+    } else if (dec->box_stage == BoxStage::kCodestream) {
+      JxlDecoderStatus status = jxl::JxlDecoderProcessCodestream(dec);
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+      if (status == JXL_DEC_FULL_IMAGE) {
+        if (dec->recon_output_jpeg != JpegReconStage::kNone) {
+          continue;
+        }
+      }
+#endif
+      if (status == JXL_DEC_NEED_MORE_INPUT) {
+        if (dec->file_pos == dec->box_contents_end &&
+            !dec->box_contents_unbounded) {
+          dec->box_stage = BoxStage::kHeader;
+          continue;
+        }
+      }
+
+      if (status == JXL_DEC_SUCCESS) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+        if (dec->JbrdNeedMoreBoxes()) {
+          dec->box_stage = BoxStage::kSkip;
+          continue;
+        }
+#endif
+        if (dec->box_contents_unbounded) {
+          // Last box reached and codestream done, nothing more to do.
+          break;
+        }
+        if (dec->events_wanted & JXL_DEC_BOX) {
+          // Codestream done, but there may be more other boxes.
+          dec->box_stage = BoxStage::kSkip;
+          continue;
+        }
+      }
+      return status;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+    } else if (dec->box_stage == BoxStage::kJpegRecon) {
+      if (!dec->jpeg_decoder.IsParsingBox()) {
+        // This is a new JPEG reconstruction metadata box.
+        dec->jpeg_decoder.StartBox(dec->box_contents_unbounded,
+                                   dec->box_contents_size);
+      }
+      const uint8_t* next_in = dec->next_in;
+      size_t avail_in = dec->avail_in;
+      JxlDecoderStatus recon_result =
+          dec->jpeg_decoder.Process(&next_in, &avail_in);
+      size_t consumed = next_in - dec->next_in;
+      dec->AdvanceInput(consumed);
+      if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+        jxl::jpeg::JPEGData* jpeg_data = dec->jpeg_decoder.GetJpegData();
+        size_t num_exif = jxl::JxlToJpegDecoder::NumExifMarkers(*jpeg_data);
+        size_t num_xmp = jxl::JxlToJpegDecoder::NumXmpMarkers(*jpeg_data);
+        if (num_exif) {
+          if (num_exif > 1) {
+            return JXL_INPUT_ERROR(
+                "multiple exif markers for JPEG reconstruction not supported");
+          }
+          if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::ExifBoxContentSize(
+                                     *jpeg_data, &dec->recon_exif_size)) {
+            return JXL_INPUT_ERROR("invalid jbrd exif size");
+          }
+        }
+        if (num_xmp) {
+          if (num_xmp > 1) {
+            return JXL_INPUT_ERROR(
+                "multiple XMP markers for JPEG reconstruction not supported");
+          }
+          if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::XmlBoxContentSize(
+                                     *jpeg_data, &dec->recon_xmp_size)) {
+            return JXL_INPUT_ERROR("invalid jbrd XMP size");
+          }
+        }
+
+        dec->box_stage = BoxStage::kHeader;
+        // If successful JPEG reconstruction, return the success if the user
+        // cares about it, otherwise continue.
+        if (dec->events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) {
+          dec->events_wanted &= ~JXL_DEC_JPEG_RECONSTRUCTION;
+          return JXL_DEC_JPEG_RECONSTRUCTION;
+        }
+      } else {
+        // If anything else, return the result.
+        return recon_result;
+      }
+#endif
+    } else if (dec->box_stage == BoxStage::kSkip) {
+      if (dec->box_contents_unbounded) {
+        if (dec->input_closed) {
+          return JXL_DEC_SUCCESS;
+        }
+        if (!(dec->box_out_buffer_set)) {
+          // An unbounded box is always the last box. Not requesting box data,
+          // so return success even if JxlDecoderCloseInput was not called for
+          // backwards compatibility as well as efficiency since this box is
+          // being skipped.
+          return JXL_DEC_SUCCESS;
+        }
+        // Arbitrarily more bytes may follow, only JxlDecoderCloseInput can
+        // mark the end.
+        dec->AdvanceInput(dec->avail_in);
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Amount of remaining bytes in the box that is being skipped.
+      size_t remaining = dec->box_contents_end - dec->file_pos;
+      if (dec->avail_in < remaining) {
+        // Indicate how many more bytes needed starting from next_in.
+        dec->basic_info_size_hint =
+            InitialBasicInfoSizeHint() + dec->box_contents_end - dec->file_pos;
+        // Don't have the full box yet, skip all we have so far
+        dec->AdvanceInput(dec->avail_in);
+        return JXL_DEC_NEED_MORE_INPUT;
+      } else {
+        // Full box available, skip all its remaining bytes
+        dec->AdvanceInput(remaining);
+        dec->box_stage = BoxStage::kHeader;
+      }
+    } else {
+      JXL_DASSERT(false);  // unknown box stage
+    }
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec) {
+  if (dec->stage == DecoderStage::kInited) {
+    dec->stage = DecoderStage::kStarted;
+  }
+  if (dec->stage == DecoderStage::kError) {
+    return JXL_API_ERROR(
+        "Cannot keep using decoder after it encountered an error, use "
+        "JxlDecoderReset to reset it");
+  }
+
+  if (!dec->got_signature) {
+    JxlSignature sig = JxlSignatureCheck(dec->next_in, dec->avail_in);
+    if (sig == JXL_SIG_INVALID) return JXL_INPUT_ERROR("invalid signature");
+    if (sig == JXL_SIG_NOT_ENOUGH_BYTES) {
+      if (dec->input_closed) {
+        return JXL_INPUT_ERROR("file too small for signature");
+      }
+      return JXL_DEC_NEED_MORE_INPUT;
+    }
+
+    dec->got_signature = true;
+
+    if (sig == JXL_SIG_CONTAINER) {
+      dec->have_container = 1;
+    } else {
+      dec->last_codestream_seen = true;
+    }
+  }
+
+  JxlDecoderStatus status = HandleBoxes(dec);
+
+  if (status == JXL_DEC_NEED_MORE_INPUT && dec->input_closed) {
+    return JXL_INPUT_ERROR("premature end of input");
+  }
+
+  // Even if the box handling returns success, certain types of
+  // data may be missing.
+  if (status == JXL_DEC_SUCCESS) {
+    if (dec->CanUseMoreCodestreamInput()) {
+      return JXL_INPUT_ERROR("codestream never finished");
+    }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+    if (dec->JbrdNeedMoreBoxes()) {
+      return JXL_INPUT_ERROR("missing metadata boxes for jpeg reconstruction");
+    }
+#endif
+  }
+
+  return status;
+}
+
+// To ensure ABI forward-compatibility, this struct has a constant size.
+static_assert(sizeof(JxlBasicInfo) == 204,
+              "JxlBasicInfo struct size should remain constant");
+
+JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+                                        JxlBasicInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  if (info) {
+    memset(info, 0, sizeof(*info));
+
+    const jxl::ImageMetadata& meta = dec->metadata.m;
+
+    info->have_container = dec->have_container;
+    info->xsize = dec->metadata.size.xsize();
+    info->ysize = dec->metadata.size.ysize();
+    info->uses_original_profile = !meta.xyb_encoded;
+
+    info->bits_per_sample = meta.bit_depth.bits_per_sample;
+    info->exponent_bits_per_sample = meta.bit_depth.exponent_bits_per_sample;
+
+    info->have_preview = meta.have_preview;
+    info->have_animation = meta.have_animation;
+    info->orientation = static_cast<JxlOrientation>(meta.orientation);
+
+    if (!dec->keep_orientation) {
+      if (info->orientation >= JXL_ORIENT_TRANSPOSE) {
+        std::swap(info->xsize, info->ysize);
+      }
+      info->orientation = JXL_ORIENT_IDENTITY;
+    }
+
+    info->intensity_target = meta.IntensityTarget();
+    if (dec->desired_intensity_target > 0) {
+      info->intensity_target = dec->desired_intensity_target;
+    }
+    info->min_nits = meta.tone_mapping.min_nits;
+    info->relative_to_max_display = meta.tone_mapping.relative_to_max_display;
+    info->linear_below = meta.tone_mapping.linear_below;
+
+    const jxl::ExtraChannelInfo* alpha = meta.Find(jxl::ExtraChannel::kAlpha);
+    if (alpha != nullptr) {
+      info->alpha_bits = alpha->bit_depth.bits_per_sample;
+      info->alpha_exponent_bits = alpha->bit_depth.exponent_bits_per_sample;
+      info->alpha_premultiplied = alpha->alpha_associated;
+    } else {
+      info->alpha_bits = 0;
+      info->alpha_exponent_bits = 0;
+      info->alpha_premultiplied = 0;
+    }
+
+    info->num_color_channels =
+        meta.color_encoding.GetColorSpace() == jxl::ColorSpace::kGray ? 1 : 3;
+
+    info->num_extra_channels = meta.num_extra_channels;
+
+    if (info->have_preview) {
+      info->preview.xsize = dec->metadata.m.preview_size.xsize();
+      info->preview.ysize = dec->metadata.m.preview_size.ysize();
+    }
+
+    if (info->have_animation) {
+      info->animation.tps_numerator = dec->metadata.m.animation.tps_numerator;
+      info->animation.tps_denominator =
+          dec->metadata.m.animation.tps_denominator;
+      info->animation.num_loops = dec->metadata.m.animation.num_loops;
+      info->animation.have_timecodes = dec->metadata.m.animation.have_timecodes;
+    }
+
+    if (meta.have_intrinsic_size) {
+      info->intrinsic_xsize = dec->metadata.m.intrinsic_size.xsize();
+      info->intrinsic_ysize = dec->metadata.m.intrinsic_size.ysize();
+    } else {
+      info->intrinsic_xsize = info->xsize;
+      info->intrinsic_ysize = info->ysize;
+    }
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelInfo(const JxlDecoder* dec,
+                                               size_t index,
+                                               JxlExtraChannelInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  info->type = static_cast<JxlExtraChannelType>(channel.type);
+  info->bits_per_sample = channel.bit_depth.bits_per_sample;
+  info->exponent_bits_per_sample =
+      channel.bit_depth.floating_point_sample
+          ? channel.bit_depth.exponent_bits_per_sample
+          : 0;
+  info->dim_shift = channel.dim_shift;
+  info->name_length = channel.name.size();
+  info->alpha_premultiplied = channel.alpha_associated;
+  info->spot_color[0] = channel.spot_color[0];
+  info->spot_color[1] = channel.spot_color[1];
+  info->spot_color[2] = channel.spot_color[2];
+  info->spot_color[3] = channel.spot_color[3];
+  info->cfa_channel = channel.cfa_channel;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+                                               size_t index, char* name,
+                                               size_t size) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  // Also need null-termination character
+  if (channel.name.size() + 1 > size) return JXL_DEC_ERROR;
+
+  memcpy(name, channel.name.c_str(), channel.name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Gets the jxl::ColorEncoding for the desired target, and checks errors.
+// Returns the object regardless of whether the actual color space is in ICC,
+// but ensures that if the color encoding is not the encoding from the
+// codestream header metadata, it cannot require ICC profile.
+JxlDecoderStatus GetColorEncodingForTarget(
+    const JxlDecoder* dec, JxlColorProfileTarget target,
+    const jxl::ColorEncoding** encoding) {
+  if (!dec->got_all_headers) return JXL_DEC_NEED_MORE_INPUT;
+  *encoding = nullptr;
+  if (target == JXL_COLOR_PROFILE_TARGET_DATA && dec->metadata.m.xyb_encoded) {
+    *encoding = &dec->passes_state->output_encoding_info.color_encoding;
+  } else {
+    *encoding = &dec->metadata.m.color_encoding;
+  }
+  return JXL_DEC_SUCCESS;
+}
+}  // namespace
+
+JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+    const JxlDecoder* dec, JxlColorProfileTarget target,
+    JxlColorEncoding* color_encoding) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+  if (status) return status;
+
+  if (jxl_color_encoding->WantICC())
+    return JXL_DEC_ERROR;  // Indicate no encoded profile available.
+
+  if (color_encoding) {
+    ConvertInternalToExternalColorEncoding(*jxl_color_encoding, color_encoding);
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetICCProfileSize(const JxlDecoder* dec,
+                                             JxlColorProfileTarget target,
+                                             size_t* size) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (jxl_color_encoding->WantICC()) {
+    jxl::ColorSpace color_space =
+        dec->metadata.m.color_encoding.GetColorSpace();
+    if (color_space == jxl::ColorSpace::kUnknown ||
+        color_space == jxl::ColorSpace::kXYB) {
+      // This indicates there's no ICC profile available
+      // TODO(lode): for the XYB case, do we want to craft an ICC profile that
+      // represents XYB as an RGB profile? It may be possible, but not with
+      // only 1D transfer functions.
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  if (size) {
+    *size = jxl_color_encoding->ICC().size();
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetColorAsICCProfile(const JxlDecoder* dec,
+                                                JxlColorProfileTarget target,
+                                                uint8_t* icc_profile,
+                                                size_t size) {
+  size_t wanted_size;
+  // This also checks the NEED_MORE_INPUT and the unknown/xyb cases
+  JxlDecoderStatus status =
+      JxlDecoderGetICCProfileSize(dec, target, &wanted_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+  if (size < wanted_size) return JXL_API_ERROR("ICC profile output too small");
+
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  status = GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  memcpy(icc_profile, jxl_color_encoding->ICC().data(),
+         jxl_color_encoding->ICC().size());
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Returns the amount of bits needed for getting memory buffer size, and does
+// all error checking required for size checking and format validity.
+JxlDecoderStatus PrepareSizeCheck(const JxlDecoder* dec,
+                                  const JxlPixelFormat* format, size_t* bits) {
+  if (!dec->got_basic_info) {
+    // Don't know image dimensions yet, cannot check for valid size.
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (!dec->coalescing &&
+      (!dec->frame_header || dec->frame_stage == FrameStage::kHeader)) {
+    return JXL_API_ERROR("Don't know frame dimensions yet");
+  }
+  if (format->num_channels > 4) {
+    return JXL_API_ERROR("More than 4 channels not supported");
+  }
+
+  *bits = BitsPerChannel(format->data_type);
+
+  if (*bits == 0) {
+    return JXL_API_ERROR("Invalid/unsupported data type");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+}  // namespace
+
+size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec) {
+  return dec->downsampling_target;
+}
+
+JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) {
+  if (!dec->image_out_buffer_set) return JXL_DEC_ERROR;
+  if (dec->frame_stage != FrameStage::kFull) {
+    return JXL_DEC_ERROR;
+  }
+  JXL_DASSERT(dec->frame_dec);
+  if (!dec->frame_dec->HasDecodedDC()) {
+    // FrameDecoder::Flush currently requires DC to have been decoded already
+    // to work correctly.
+    return JXL_DEC_ERROR;
+  }
+
+  if (!dec->frame_dec->Flush()) {
+    return JXL_DEC_ERROR;
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+  if (format->num_channels < 3 &&
+      !dec->image_metadata.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Number of channels is too low for color output");
+  }
+
+  size_t xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+  size_t ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  size_t last_row_size = row_size;
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * (ysize - 1) + last_row_size;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+  if (!dec->got_basic_info || !dec->metadata.m.have_preview ||
+      !(dec->orig_events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+    return JXL_API_ERROR("No preview out buffer needed at this time");
+  }
+  if (format->num_channels < 3 &&
+      !dec->image_metadata.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Number of channels is too low for color output");
+  }
+
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderPreviewOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_buffer = buffer;
+  dec->image_out_size = size;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+  if (format->num_channels < 3 &&
+      !dec->image_metadata.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Number of channels is too low for color output");
+  }
+  size_t xsize, ysize;
+  GetCurrentDimensions(dec, xsize, ysize);
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutBuffer(JxlDecoder* dec,
+                                             const JxlPixelFormat* format,
+                                             void* buffer, size_t size) {
+  if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+    return JXL_API_ERROR("No image out buffer needed at this time");
+  }
+  if (dec->image_out_buffer_set && !!dec->image_out_run_callback) {
+    return JXL_API_ERROR(
+        "Cannot change from image out callback to image out buffer");
+  }
+  if (format->num_channels < 3 &&
+      !dec->image_metadata.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Number of channels is too low for color output");
+  }
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderImageOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_buffer = buffer;
+  dec->image_out_size = size;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderExtraChannelBufferSize(const JxlDecoder* dec,
+                                                  const JxlPixelFormat* format,
+                                                  size_t* size,
+                                                  uint32_t index) {
+  if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+    return JXL_API_ERROR("No extra channel buffer needed at this time");
+  }
+
+  if (index >= dec->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR("Invalid extra channel index");
+  }
+
+  size_t num_channels = 1;  // Do not use format's num_channels
+
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t xsize, ysize;
+  GetCurrentDimensions(dec, xsize, ysize);
+  size_t row_size =
+      jxl::DivCeil(xsize * num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec,
+                                                 const JxlPixelFormat* format,
+                                                 void* buffer, size_t size,
+                                                 uint32_t index) {
+  size_t min_size;
+  // This also checks whether the format and index are valid and supported and
+  // basic info is available.
+  JxlDecoderStatus status =
+      JxlDecoderExtraChannelBufferSize(dec, format, &min_size, index);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  if (dec->extra_channel_output.size() <= index) {
+    dec->extra_channel_output.resize(dec->metadata.m.num_extra_channels,
+                                     {{}, nullptr, 0});
+  }
+  // Guaranteed correct thanks to check in JxlDecoderExtraChannelBufferSize.
+  JXL_ASSERT(index < dec->extra_channel_output.size());
+
+  dec->extra_channel_output[index].format = *format;
+  dec->extra_channel_output[index].format.num_channels = 1;
+  dec->extra_channel_output[index].buffer = buffer;
+  dec->extra_channel_output[index].buffer_size = size;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutCallback(JxlDecoder* dec,
+                                               const JxlPixelFormat* format,
+                                               JxlImageOutCallback callback,
+                                               void* opaque) {
+  dec->simple_image_out_callback.callback = callback;
+  dec->simple_image_out_callback.opaque = opaque;
+  const auto init_callback =
+      +[](void* init_opaque, size_t num_threads, size_t num_pixels_per_thread) {
+        // No initialization to do, just reuse init_opaque as run_opaque.
+        return init_opaque;
+      };
+  const auto run_callback =
+      +[](void* run_opaque, size_t thread_id, size_t x, size_t y,
+          size_t num_pixels, const void* pixels) {
+        const auto* const simple_callback =
+            static_cast<const JxlDecoder::SimpleImageOutCallback*>(run_opaque);
+        simple_callback->callback(simple_callback->opaque, x, y, num_pixels,
+                                  pixels);
+      };
+  const auto destroy_callback = +[](void* run_opaque) {};
+  return JxlDecoderSetMultithreadedImageOutCallback(
+      dec, format, init_callback, run_callback,
+      /*destroy_callback=*/destroy_callback, &dec->simple_image_out_callback);
+}
+
+JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback(
+    JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback,
+    JxlImageOutDestroyCallback destroy_callback, void* init_opaque) {
+  if (dec->image_out_buffer_set && !!dec->image_out_buffer) {
+    return JXL_API_ERROR(
+        "Cannot change from image out buffer to image out callback");
+  }
+
+  if (init_callback == nullptr || run_callback == nullptr ||
+      destroy_callback == nullptr) {
+    return JXL_API_ERROR("All callbacks are required");
+  }
+
+  // Perform error checking for invalid format.
+  size_t bits_dummy;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits_dummy);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_init_callback = init_callback;
+  dec->image_out_run_callback = run_callback;
+  dec->image_out_destroy_callback = destroy_callback;
+  dec->image_out_init_opaque = init_opaque;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+                                          JxlFrameHeader* header) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  const auto& metadata = dec->metadata.m;
+  memset(header, 0, sizeof(*header));
+  if (metadata.have_animation) {
+    header->duration = dec->frame_header->animation_frame.duration;
+    if (metadata.animation.have_timecodes) {
+      header->timecode = dec->frame_header->animation_frame.timecode;
+    }
+  }
+  header->name_length = dec->frame_header->name.size();
+  header->is_last = dec->frame_header->is_last;
+  size_t xsize, ysize;
+  GetCurrentDimensions(dec, xsize, ysize);
+  header->layer_info.xsize = xsize;
+  header->layer_info.ysize = ysize;
+  if (!dec->coalescing && dec->frame_header->custom_size_or_origin) {
+    header->layer_info.crop_x0 = dec->frame_header->frame_origin.x0;
+    header->layer_info.crop_y0 = dec->frame_header->frame_origin.y0;
+    header->layer_info.have_crop = JXL_TRUE;
+  } else {
+    header->layer_info.crop_x0 = 0;
+    header->layer_info.crop_y0 = 0;
+    header->layer_info.have_crop = JXL_FALSE;
+  }
+  if (!dec->keep_orientation && !dec->coalescing) {
+    // orient the crop offset
+    size_t W = dec->metadata.oriented_xsize(false);
+    size_t H = dec->metadata.oriented_ysize(false);
+    if (metadata.orientation > 4) {
+      std::swap(header->layer_info.crop_x0, header->layer_info.crop_y0);
+    }
+    size_t o = (metadata.orientation - 1) & 3;
+    if (o > 0 && o < 3) {
+      header->layer_info.crop_x0 = W - xsize - header->layer_info.crop_x0;
+    }
+    if (o > 1) {
+      header->layer_info.crop_y0 = H - ysize - header->layer_info.crop_y0;
+    }
+  }
+  if (dec->coalescing) {
+    header->layer_info.blend_info.blendmode = JXL_BLEND_REPLACE;
+    header->layer_info.blend_info.source = 0;
+    header->layer_info.blend_info.alpha = 0;
+    header->layer_info.blend_info.clamp = JXL_FALSE;
+    header->layer_info.save_as_reference = 0;
+  } else {
+    header->layer_info.blend_info.blendmode =
+        static_cast<JxlBlendMode>(dec->frame_header->blending_info.mode);
+    header->layer_info.blend_info.source =
+        dec->frame_header->blending_info.source;
+    header->layer_info.blend_info.alpha =
+        dec->frame_header->blending_info.alpha_channel;
+    header->layer_info.blend_info.clamp =
+        dec->frame_header->blending_info.clamp;
+    header->layer_info.save_as_reference = dec->frame_header->save_as_reference;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo(const JxlDecoder* dec,
+                                                    size_t index,
+                                                    JxlBlendInfo* blend_info) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  const auto& metadata = dec->metadata.m;
+  if (index >= metadata.num_extra_channels) {
+    return JXL_API_ERROR("Invalid extra channel index");
+  }
+  blend_info->blendmode = static_cast<JxlBlendMode>(
+      dec->frame_header->extra_channel_blending_info[index].mode);
+  blend_info->source =
+      dec->frame_header->extra_channel_blending_info[index].source;
+  blend_info->alpha =
+      dec->frame_header->extra_channel_blending_info[index].alpha_channel;
+  blend_info->clamp =
+      dec->frame_header->extra_channel_blending_info[index].clamp;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, char* name,
+                                        size_t size) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  if (size < dec->frame_header->name.size() + 1) {
+    return JXL_API_ERROR("too small frame name output buffer");
+  }
+  memcpy(name, dec->frame_header->name.c_str(),
+         dec->frame_header->name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding) {
+  if (!dec->got_all_headers) {
+    return JXL_API_ERROR("color info not yet available");
+  }
+  if (dec->post_headers) {
+    return JXL_API_ERROR("too late to set the color encoding");
+  }
+  if (dec->image_metadata.color_encoding.IsGray() &&
+      color_encoding->color_space != JXL_COLOR_SPACE_GRAY &&
+      dec->image_out_buffer_set && dec->image_out_format.num_channels < 3) {
+    return JXL_API_ERROR("Number of channels is too low for color output");
+  }
+  if (color_encoding->color_space == JXL_COLOR_SPACE_UNKNOWN) {
+    return JXL_API_ERROR("Unknown output colorspace");
+  }
+  jxl::ColorEncoding c_out;
+  JXL_API_RETURN_IF_ERROR(
+      ConvertExternalToInternalColorEncoding(*color_encoding, &c_out));
+  JXL_API_RETURN_IF_ERROR(!c_out.ICC().empty());
+  auto& output_encoding = dec->passes_state->output_encoding_info;
+  if (!c_out.SameColorEncoding(output_encoding.color_encoding)) {
+    JXL_API_RETURN_IF_ERROR(output_encoding.MaybeSetColorEncoding(c_out));
+    dec->image_metadata.color_encoding = output_encoding.color_encoding;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget(
+    JxlDecoder* dec, float desired_intensity_target) {
+  if (desired_intensity_target < 0) {
+    return JXL_API_ERROR("negative intensity target requested");
+  }
+  dec->desired_intensity_target = desired_intensity_target;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec, uint8_t* data,
+                                        size_t size) {
+  if (dec->box_out_buffer_set) {
+    return JXL_API_ERROR("must release box buffer before setting it again");
+  }
+  if (!dec->box_event) {
+    return JXL_API_ERROR("can only set box buffer after box event");
+  }
+
+  dec->box_out_buffer_set = true;
+  dec->box_out_buffer_set_current_box = true;
+  dec->box_out_buffer = data;
+  dec->box_out_buffer_size = size;
+  dec->box_out_buffer_pos = 0;
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec) {
+  if (!dec->box_out_buffer_set) {
+    return 0;
+  }
+  size_t result = dec->box_out_buffer_size - dec->box_out_buffer_pos;
+  dec->box_out_buffer_set = false;
+  dec->box_out_buffer = nullptr;
+  dec->box_out_buffer_size = 0;
+  if (!dec->box_out_buffer_set_current_box) {
+    dec->box_out_buffer_begin = 0;
+  } else {
+    dec->box_out_buffer_begin += dec->box_out_buffer_pos;
+  }
+  dec->box_out_buffer_set_current_box = false;
+  return result;
+}
+
+JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec,
+                                              JXL_BOOL decompress) {
+  // TODO(lode): return error if libbrotli is not compiled in the jxl decoding
+  // library
+  dec->decompress_boxes = decompress;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec, JxlBoxType type,
+                                      JXL_BOOL decompressed) {
+  if (!dec->box_event) {
+    return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event");
+  }
+  if (decompressed) {
+    memcpy(type, dec->box_decoded_type, sizeof(dec->box_decoded_type));
+  } else {
+    memcpy(type, dec->box_type, sizeof(dec->box_type));
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec,
+                                         uint64_t* size) {
+  if (!dec->box_event) {
+    return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event");
+  }
+  if (size) {
+    *size = dec->box_size;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetProgressiveDetail(JxlDecoder* dec,
+                                                JxlProgressiveDetail detail) {
+  if (detail != kDC && detail != kLastPasses && detail != kPasses) {
+    return JXL_API_ERROR(
+        "Values other than kDC (%d), kLastPasses (%d) and kPasses (%d), "
+        "like %d are not implemented.",
+        kDC, kLastPasses, kPasses, detail);
+  }
+  dec->prog_detail = detail;
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+template <typename T>
+JxlDecoderStatus VerifyOutputBitDepth(JxlBitDepth bit_depth, const T& metadata,
+                                      JxlPixelFormat format) {
+  uint32_t bits_per_sample = GetBitDepth(bit_depth, metadata, format);
+  if (bits_per_sample == 0) return JXL_API_ERROR("Invalid output bit depth");
+  if (format.data_type == JXL_TYPE_UINT8 && bits_per_sample > 8) {
+    return JXL_API_ERROR("Invalid bit depth %u for uint8 output",
+                         bits_per_sample);
+  } else if (format.data_type == JXL_TYPE_UINT16 && bits_per_sample > 16) {
+    return JXL_API_ERROR("Invalid bit depth %u for uint16 output",
+                         bits_per_sample);
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+}  // namespace
+
+JxlDecoderStatus JxlDecoderSetImageOutBitDepth(JxlDecoder* dec,
+                                               const JxlBitDepth* bit_depth) {
+  if (!dec->image_out_buffer_set) {
+    return JXL_API_ERROR("No image out buffer was set.");
+  }
+  JXL_API_RETURN_IF_ERROR(
+      VerifyOutputBitDepth(*bit_depth, dec->metadata.m, dec->image_out_format));
+  dec->image_out_bit_depth = *bit_depth;
+  return JXL_DEC_SUCCESS;
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_test.cc b/third-party/libjxl/libjxl/lib/jxl/decode_test.cc
new file mode 100644
index 0000000000..da647958a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/decode_test.cc
@@ -0,0 +1,5506 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <jxl/types.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_description.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/enc_progressive_split.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+#include "lib/jxl/toc.h"
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+void AppendU32BE(uint32_t u32, jxl::PaddedBytes* bytes) {
+  bytes->push_back(u32 >> 24);
+  bytes->push_back(u32 >> 16);
+  bytes->push_back(u32 >> 8);
+  bytes->push_back(u32 >> 0);
+}
+
+// What type of codestream format in the boxes to use for testing
+enum CodeStreamBoxFormat {
+  // Do not use box format at all, only pure codestream
+  kCSBF_None,
+  // Have a single codestream box, with its actual size given in the box
+  kCSBF_Single,
+  // Have a single codestream box, with box size 0 (final box running to end)
+  kCSBF_Single_Zero_Terminated,
+  // Single codestream box, with another unknown box behind it
+  kCSBF_Single_Other,
+  // Have multiple partial codestream boxes
+  kCSBF_Multi,
+  // Have multiple partial codestream boxes, with final box size 0 (running
+  // to end)
+  kCSBF_Multi_Zero_Terminated,
+  // Have multiple partial codestream boxes, terminated by non-codestream box
+  kCSBF_Multi_Other_Terminated,
+  // Have multiple partial codestream boxes, terminated by non-codestream box
+  // that has its size set to 0 (running to end)
+  kCSBF_Multi_Other_Zero_Terminated,
+  // Have multiple partial codestream boxes, and the first one has a content
+  // of zero length
+  kCSBF_Multi_First_Empty,
+  // Have multiple partial codestream boxes, and the last one has a content
+  // of zero length and there is an unknown empty box at the end
+  kCSBF_Multi_Last_Empty_Other,
+  // Have a compressed exif box before a regular codestream box
+  kCSBF_Brob_Exif,
+  // Not a value but used for counting amount of enum entries
+  kCSBF_NUM_ENTRIES,
+};
+
+// Unknown boxes for testing
+static const char* unk1_box_type = "unk1";
+static const char* unk1_box_contents = "abcdefghijklmnopqrstuvwxyz";
+static const size_t unk1_box_size = strlen(unk1_box_contents);
+static const char* unk2_box_type = "unk2";
+static const char* unk2_box_contents = "0123456789";
+static const size_t unk2_box_size = strlen(unk2_box_contents);
+static const char* unk3_box_type = "unk3";
+static const char* unk3_box_contents = "ABCDEF123456";
+static const size_t unk3_box_size = strlen(unk3_box_contents);
+// Box with brob-compressed exif, including header
+static const uint8_t* box_brob_exif = reinterpret_cast<const uint8_t*>(
+    "\0\0\0@brobExif\241\350\2\300\177\244v\2525\304\360\27=?\267{"
+    "\33\37\314\332\214QX17PT\"\256\0\0\202s\214\313t\333\310\320k\20\276\30"
+    "\204\277l$\326c#\1\b");
+size_t box_brob_exif_size = 64;
+// The uncompressed Exif data from the brob box
+static const uint8_t* exif_uncompressed = reinterpret_cast<const uint8_t*>(
+    "\0\0\0\0MM\0*"
+    "\0\0\0\b\0\5\1\22\0\3\0\0\0\1\0\5\0\0\1\32\0\5\0\0\0\1\0\0\0J\1\33\0\5\0\0"
+    "\0\1\0\0\0R\1("
+    "\0\3\0\0\0\1\0\1\0\0\2\23\0\3\0\0\0\1\0\1\0\0\0\0\0\0\0\0\0\1\0\0\0\1\0\0"
+    "\0\1\0\0\0\1");
+size_t exif_uncompressed_size = 94;
+
+// Returns an ICC profile output by the JPEG XL decoder for RGB_D65_SRG_Rel_Lin,
+// but with, on purpose, rXYZ, bXYZ and gXYZ (the RGB primaries) switched to a
+// different order to ensure the profile does not match any known profile, so
+// the encoder cannot encode it in a compact struct instead.
+jxl::PaddedBytes GetIccTestProfile() {
+  const uint8_t* profile = reinterpret_cast<const uint8_t*>(
+      "\0\0\3\200lcms\0040\0\0mntrRGB XYZ "
+      "\a\344\0\a\0\27\0\21\0$"
+      "\0\37acspAPPL\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\366"
+      "\326\0\1\0\0\0\0\323-lcms\372c\207\36\227\200{"
+      "\2\232s\255\327\340\0\n\26\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+      "\0\0\0\0\0\0\0\0\rdesc\0\0\1 "
+      "\0\0\0Bcprt\0\0\1d\0\0\1\0wtpt\0\0\2d\0\0\0\24chad\0\0\2x\0\0\0,"
+      "bXYZ\0\0\2\244\0\0\0\24gXYZ\0\0\2\270\0\0\0\24rXYZ\0\0\2\314\0\0\0\24rTR"
+      "C\0\0\2\340\0\0\0 gTRC\0\0\2\340\0\0\0 bTRC\0\0\2\340\0\0\0 "
+      "chrm\0\0\3\0\0\0\0$dmnd\0\0\3$\0\0\0("
+      "dmdd\0\0\3L\0\0\0002mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0&"
+      "\0\0\0\34\0R\0G\0B\0_\0D\0006\0005\0_\0S\0R\0G\0_\0R\0e\0l\0_"
+      "\0L\0i\0n\0\0mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\344\0\0\0\34\0C\0o\0"
+      "p\0y\0r\0i\0g\0h\0t\0 \0002\0000\0001\08\0 \0G\0o\0o\0g\0l\0e\0 "
+      "\0L\0L\0C\0,\0 \0C\0C\0-\0B\0Y\0-\0S\0A\0 \0003\0.\0000\0 "
+      "\0U\0n\0p\0o\0r\0t\0e\0d\0 "
+      "\0l\0i\0c\0e\0n\0s\0e\0(\0h\0t\0t\0p\0s\0:\0/\0/"
+      "\0c\0r\0e\0a\0t\0i\0v\0e\0c\0o\0m\0m\0o\0n\0s\0.\0o\0r\0g\0/"
+      "\0l\0i\0c\0e\0n\0s\0e\0s\0/\0b\0y\0-\0s\0a\0/\0003\0.\0000\0/"
+      "\0l\0e\0g\0a\0l\0c\0o\0d\0e\0)XYZ "
+      "\0\0\0\0\0\0\366\326\0\1\0\0\0\0\323-"
+      "sf32\0\0\0\0\0\1\fB\0\0\5\336\377\377\363%"
+      "\0\0\a\223\0\0\375\220\377\377\373\241\377\377\375\242\0\0\3\334\0\0\300"
+      "nXYZ \0\0\0\0\0\0o\240\0\08\365\0\0\3\220XYZ "
+      "\0\0\0\0\0\0$\237\0\0\17\204\0\0\266\304XYZ "
+      "\0\0\0\0\0\0b\227\0\0\267\207\0\0\30\331para\0\0\0\0\0\3\0\0\0\1\0\0\0\1"
+      "\0\0\0\0\0\0\0\1\0\0\0\0\0\0chrm\0\0\0\0\0\3\0\0\0\0\243\327\0\0T|"
+      "\0\0L\315\0\0\231\232\0\0&"
+      "g\0\0\17\\mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\f\0\0\0\34\0G\0o\0o\0g"
+      "\0l\0emluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\26\0\0\0\34\0I\0m\0a\0g\0e"
+      "\0 \0c\0o\0d\0e\0c\0\0");
+  size_t profile_size = 896;
+  jxl::PaddedBytes icc_profile;
+  icc_profile.assign(profile, profile + profile_size);
+  return icc_profile;
+}
+
+}  // namespace
+
+namespace jxl {
+namespace {
+
+void AppendTestBox(const char* type, const char* contents, size_t contents_size,
+                   bool unbounded, PaddedBytes* bytes) {
+  AppendU32BE(contents_size + 8, bytes);
+  bytes->push_back(type[0]);
+  bytes->push_back(type[1]);
+  bytes->push_back(type[2]);
+  bytes->push_back(type[3]);
+  const uint8_t* contents_u = reinterpret_cast<const uint8_t*>(contents);
+  bytes->append(contents_u, contents_u + contents_size);
+}
+
+enum PreviewMode {
+  kNoPreview,
+  kSmallPreview,
+  kBigPreview,
+  kNumPreviewModes,
+};
+
+void GeneratePreview(PreviewMode preview_mode, ImageBundle* ib) {
+  if (preview_mode == kSmallPreview) {
+    ib->ShrinkTo(ib->xsize() / 7, ib->ysize() / 7);
+  } else if (preview_mode == kBigPreview) {
+    auto upsample7 = [&](const ImageF& in, ImageF* out) {
+      for (size_t y = 0; y < out->ysize(); ++y) {
+        for (size_t x = 0; x < out->xsize(); ++x) {
+          out->Row(y)[x] = in.ConstRow(y / 7)[x / 7];
+        }
+      }
+    };
+    Image3F preview(ib->xsize() * 7, ib->ysize() * 7);
+    for (size_t c = 0; c < 3; ++c) {
+      upsample7(ib->color()->Plane(c), &preview.Plane(c));
+    }
+    std::vector<ImageF> extra_channels;
+    for (size_t i = 0; i < ib->extra_channels().size(); ++i) {
+      ImageF ec(ib->xsize() * 7, ib->ysize() * 7);
+      upsample7(ib->extra_channels()[i], &ec);
+      extra_channels.emplace_back(std::move(ec));
+    }
+    ib->RemoveColor();
+    ib->ClearExtraChannels();
+    ib->SetFromImage(std::move(preview), ib->c_current());
+    ib->SetExtraChannels(std::move(extra_channels));
+  }
+}
+
+struct TestCodestreamParams {
+  CompressParams cparams;
+  CodeStreamBoxFormat box_format = kCSBF_None;
+  JxlOrientation orientation = JXL_ORIENT_IDENTITY;
+  PreviewMode preview_mode = kNoPreview;
+  bool add_intrinsic_size = false;
+  bool add_icc_profile = false;
+  float intensity_target = 0.0;
+  std::string color_space;
+  PaddedBytes* jpeg_codestream = nullptr;
+  const ProgressiveMode* progressive_mode = nullptr;
+};
+
+// Input pixels always given as 16-bit RGBA, 8 bytes per pixel.
+// include_alpha determines if the encoded image should contain the alpha
+// channel.
+// add_icc_profile: if false, encodes the image as sRGB using the JXL fields,
+// for grayscale or RGB images. If true, encodes the image using the ICC profile
+// returned by GetIccTestProfile, without the JXL fields, this requires the
+// image is RGB, not grayscale.
+// Providing jpeg_codestream will populate the jpeg_codestream with compressed
+// JPEG bytes, and make it possible to reconstruct those exact JPEG bytes using
+// the return value _if_ add_container indicates a box format.
+PaddedBytes CreateTestJXLCodestream(Span<const uint8_t> pixels, size_t xsize,
+                                    size_t ysize, size_t num_channels,
+                                    const TestCodestreamParams& params) {
+  // Compress the pixels with JPEG XL.
+  bool grayscale = (num_channels <= 2);
+  bool include_alpha = !(num_channels & 1) && params.jpeg_codestream == nullptr;
+  size_t bitdepth = params.jpeg_codestream == nullptr ? 16 : 8;
+  CodecInOut io;
+  io.SetSize(xsize, ysize);
+  ColorEncoding color_encoding;
+  if (params.add_icc_profile) {
+    // the hardcoded ICC profile we attach requires RGB.
+    EXPECT_EQ(false, grayscale);
+    EXPECT_TRUE(params.color_space.empty());
+    EXPECT_TRUE(color_encoding.SetICC(GetIccTestProfile(), &GetJxlCms()));
+  } else if (!params.color_space.empty()) {
+    JxlColorEncoding c;
+    EXPECT_TRUE(jxl::ParseDescription(params.color_space, &c));
+    EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding));
+    EXPECT_EQ(color_encoding.IsGray(), grayscale);
+  } else {
+    color_encoding = jxl::ColorEncoding::SRGB(/*is_gray=*/grayscale);
+  }
+  ThreadPool pool(nullptr, nullptr);
+  io.metadata.m.SetUintSamples(bitdepth);
+  if (include_alpha) {
+    io.metadata.m.SetAlphaBits(bitdepth);
+  }
+  if (params.intensity_target != 0) {
+    io.metadata.m.SetIntensityTarget(params.intensity_target);
+  }
+  JxlPixelFormat format = {static_cast<uint32_t>(num_channels), JXL_TYPE_UINT16,
+                           JXL_BIG_ENDIAN, 0};
+  // Make the grayscale-ness of the io metadata color_encoding and the packed
+  // image match.
+  io.metadata.m.color_encoding = color_encoding;
+  EXPECT_TRUE(ConvertFromExternal(pixels, xsize, ysize, color_encoding,
+                                  /*bits_per_sample=*/16, format, &pool,
+                                  &io.Main()));
+  jxl::PaddedBytes jpeg_data;
+  if (params.jpeg_codestream != nullptr) {
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) {
+      std::vector<uint8_t> jpeg_bytes;
+      io.jpeg_quality = 70;
+      EXPECT_TRUE(Encode(io, extras::Codec::kJPG, io.metadata.m.color_encoding,
+                         /*bits_per_sample=*/8, &jpeg_bytes, &pool));
+      params.jpeg_codestream->append(jpeg_bytes.data(),
+                                     jpeg_bytes.data() + jpeg_bytes.size());
+      EXPECT_TRUE(jxl::jpeg::DecodeImageJPG(
+          jxl::Span<const uint8_t>(jpeg_bytes.data(), jpeg_bytes.size()), &io));
+      EXPECT_TRUE(
+          EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data, params.cparams));
+      io.metadata.m.xyb_encoded = false;
+    } else {
+      JXL_ABORT(
+          "unable to create reconstructible JPEG without JPEG support enabled");
+    }
+  }
+  if (params.preview_mode) {
+    io.preview_frame = io.Main().Copy();
+    GeneratePreview(params.preview_mode, &io.preview_frame);
+    io.metadata.m.have_preview = true;
+    EXPECT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+                                               io.preview_frame.ysize()));
+  }
+  if (params.add_intrinsic_size) {
+    EXPECT_TRUE(io.metadata.m.intrinsic_size.Set(xsize / 3, ysize / 3));
+  }
+  io.metadata.m.orientation = params.orientation;
+  AuxOut aux_out;
+  PaddedBytes compressed;
+  PassesEncoderState enc_state;
+  if (params.progressive_mode) {
+    enc_state.progressive_splitter.SetProgressiveMode(*params.progressive_mode);
+  }
+  EXPECT_TRUE(EncodeFile(params.cparams, &io, &enc_state, &compressed,
+                         GetJxlCms(), &aux_out, &pool));
+  CodeStreamBoxFormat add_container = params.box_format;
+  if (add_container != kCSBF_None) {
+    // Header with signature box and ftyp box.
+    const uint8_t header[] = {0,    0,    0,    0xc,  0x4a, 0x58, 0x4c, 0x20,
+                              0xd,  0xa,  0x87, 0xa,  0,    0,    0,    0x14,
+                              0x66, 0x74, 0x79, 0x70, 0x6a, 0x78, 0x6c, 0x20,
+                              0,    0,    0,    0,    0x6a, 0x78, 0x6c, 0x20};
+
+    bool is_multi = add_container == kCSBF_Multi ||
+                    add_container == kCSBF_Multi_Zero_Terminated ||
+                    add_container == kCSBF_Multi_Other_Terminated ||
+                    add_container == kCSBF_Multi_Other_Zero_Terminated ||
+                    add_container == kCSBF_Multi_First_Empty ||
+                    add_container == kCSBF_Multi_Last_Empty_Other;
+
+    if (is_multi) {
+      size_t third = compressed.size() / 3;
+      std::vector<uint8_t> compressed0(compressed.data(),
+                                       compressed.data() + third);
+      std::vector<uint8_t> compressed1(compressed.data() + third,
+                                       compressed.data() + 2 * third);
+      std::vector<uint8_t> compressed2(compressed.data() + 2 * third,
+                                       compressed.data() + compressed.size());
+
+      PaddedBytes c;
+      c.append(header, header + sizeof(header));
+      if (params.jpeg_codestream != nullptr) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                             &c);
+        c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+      }
+      uint32_t jxlp_index = 0;
+      if (add_container == kCSBF_Multi_First_Empty) {
+        // Dummy (empty) codestream part
+        AppendU32BE(12, &c);
+        c.push_back('j');
+        c.push_back('x');
+        c.push_back('l');
+        c.push_back('p');
+        AppendU32BE(jxlp_index++, &c);
+      }
+      // First codestream part
+      AppendU32BE(compressed0.size() + 12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      c.append(compressed0.data(), compressed0.data() + compressed0.size());
+      // A few non-codestream boxes in between
+      AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false, &c);
+      AppendTestBox(unk2_box_type, unk2_box_contents, unk2_box_size, false, &c);
+      // Dummy (empty) codestream part
+      AppendU32BE(12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      // Second codestream part
+      AppendU32BE(compressed1.size() + 12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      c.append(compressed1.data(), compressed1.data() + compressed1.size());
+      // Third (last) codestream part
+      AppendU32BE(add_container == kCSBF_Multi_Zero_Terminated
+                      ? 0
+                      : (compressed2.size() + 12),
+                  &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      if (add_container != kCSBF_Multi_Last_Empty_Other) {
+        AppendU32BE(jxlp_index++ | 0x80000000, &c);
+      } else {
+        AppendU32BE(jxlp_index++, &c);
+      }
+      c.append(compressed2.data(), compressed2.data() + compressed2.size());
+      if (add_container == kCSBF_Multi_Last_Empty_Other) {
+        // Dummy (empty) codestream part
+        AppendU32BE(12, &c);
+        c.push_back('j');
+        c.push_back('x');
+        c.push_back('l');
+        c.push_back('p');
+        AppendU32BE(jxlp_index++ | 0x80000000, &c);
+        AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false,
+                      &c);
+      }
+      if (add_container == kCSBF_Multi_Other_Terminated) {
+        AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false,
+                      &c);
+      }
+      if (add_container == kCSBF_Multi_Other_Zero_Terminated) {
+        AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, true,
+                      &c);
+      }
+      compressed.swap(c);
+    } else {
+      PaddedBytes c;
+      c.append(header, header + sizeof(header));
+      if (params.jpeg_codestream != nullptr) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                             &c);
+        c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+      }
+      if (add_container == kCSBF_Brob_Exif) {
+        c.append(box_brob_exif, box_brob_exif + box_brob_exif_size);
+      }
+      AppendU32BE(add_container == kCSBF_Single_Zero_Terminated
+                      ? 0
+                      : (compressed.size() + 8),
+                  &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('c');
+      c.append(compressed.data(), compressed.data() + compressed.size());
+      if (add_container == kCSBF_Single_Other) {
+        AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false,
+                      &c);
+      }
+      compressed.swap(c);
+    }
+  }
+
+  return compressed;
+}
+
+JxlDecoderStatus ProcessInputIgnoreBoxes(JxlDecoder* dec) {
+  JxlDecoderStatus status = JXL_DEC_BOX;
+  while (status == JXL_DEC_BOX) {
+    status = JxlDecoderProcessInput(dec);
+  }
+  return status;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(JxlDecoder* dec,
+                                   Span<const uint8_t> compressed,
+                                   const JxlPixelFormat& format,
+                                   bool use_callback, bool set_buffer_early,
+                                   bool use_resizable_runner,
+                                   bool require_boxes, bool expect_success,
+                                   PaddedBytes* icc = nullptr) {
+  JxlThreadParallelRunnerPtr runner_fixed;
+  JxlResizableParallelRunnerPtr runner_resizable;
+  JxlParallelRunner runner_fn;
+  void* runner;
+
+  if (use_resizable_runner) {
+    runner_resizable = JxlResizableParallelRunnerMake(nullptr);
+    runner = runner_resizable.get();
+    runner_fn = JxlResizableParallelRunner;
+  } else {
+    size_t hw_threads = JxlThreadParallelRunnerDefaultNumWorkerThreads();
+    runner_fixed =
+        JxlThreadParallelRunnerMake(nullptr, std::min<size_t>(hw_threads, 16));
+    runner = runner_fixed.get();
+    runner_fn = JxlThreadParallelRunner;
+  }
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, runner_fn, runner));
+
+  auto process_input =
+      require_boxes ? ProcessInputIgnoreBoxes : JxlDecoderProcessInput;
+
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderSubscribeEvents(
+          dec, JXL_DEC_BASIC_INFO | (set_buffer_early ? JXL_DEC_FRAME : 0) |
+                   JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FULL_IMAGE |
+                   (require_boxes ? JXL_DEC_BOX : 0) |
+                   (icc != nullptr ? JXL_DEC_COLOR_ENCODING : 0)));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  if (use_resizable_runner) {
+    JxlResizableParallelRunnerSetThreads(
+        runner,
+        JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+  }
+
+  std::vector<uint8_t> pixels(buffer_size);
+  size_t bytes_per_pixel = format.num_channels *
+                           test::GetDataBits(format.data_type) /
+                           jxl::kBitsPerByte;
+  size_t stride = bytes_per_pixel * info.xsize;
+  if (format.align > 1) {
+    stride = jxl::DivCeil(stride, format.align) * format.align;
+  }
+  auto callback = [&](size_t x, size_t y, size_t num_pixels,
+                      const void* pixels_row) {
+    memcpy(pixels.data() + stride * y + bytes_per_pixel * x, pixels_row,
+           num_pixels * bytes_per_pixel);
+  };
+
+  JxlDecoderStatus status = process_input(dec);
+
+  if (status == JXL_DEC_COLOR_ENCODING) {
+    size_t icc_size = 0;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                          &icc_size));
+    icc->resize(icc_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetColorAsICCProfile(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                             icc->data(), icc_size));
+
+    status = process_input(dec);
+  }
+
+  std::vector<uint8_t> preview;
+  if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+    size_t buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+    preview.resize(buffer_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(),
+                                            preview.size()));
+    EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, process_input(dec));
+
+    status = process_input(dec);
+  }
+
+  if (set_buffer_early) {
+    EXPECT_EQ(JXL_DEC_FRAME, status);
+  } else {
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, status);
+  }
+
+  if (use_callback) {
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetImageOutCallback(
+                  dec, &format,
+                  [](void* opaque, size_t x, size_t y, size_t xsize,
+                     const void* pixels_row) {
+                    auto cb = static_cast<decltype(&callback)>(opaque);
+                    (*cb)(x, y, xsize, pixels_row);
+                  },
+                  /*opaque=*/&callback));
+  } else {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+  }
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input(dec));
+
+  // After the full image was output, JxlDecoderProcessInput should return
+  // success to indicate all is done, unless we requested boxes and the last
+  // box was not a terminal unbounded box, in which case it should ask for
+  // more input.
+  JxlDecoderStatus expected_status =
+      expect_success ? JXL_DEC_SUCCESS : JXL_DEC_NEED_MORE_INPUT;
+  EXPECT_EQ(expected_status, process_input(dec));
+
+  return pixels;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(Span<const uint8_t> compressed,
+                                   const JxlPixelFormat& format,
+                                   bool use_callback, bool set_buffer_early,
+                                   bool use_resizable_runner,
+                                   bool require_boxes, bool expect_success) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  std::vector<uint8_t> pixels =
+      DecodeWithAPI(dec, compressed, format, use_callback, set_buffer_early,
+                    use_resizable_runner, require_boxes, expect_success);
+  JxlDecoderDestroy(dec);
+  return pixels;
+}
+
+}  // namespace
+}  // namespace jxl
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(DecodeTest, JxlSignatureCheckTest) {
+  std::vector<std::pair<int, std::vector<uint8_t>>> tests = {
+      // No JPEGXL header starts with 'a'.
+      {JXL_SIG_INVALID, {'a'}},
+      {JXL_SIG_INVALID, {'a', 'b', 'c', 'd', 'e', 'f'}},
+
+      // Empty file is not enough bytes.
+      {JXL_SIG_NOT_ENOUGH_BYTES, {}},
+
+      // JPEGXL headers.
+      {JXL_SIG_NOT_ENOUGH_BYTES, {0xff}},  // Part of a signature.
+      {JXL_SIG_INVALID, {0xff, 0xD8}},     // JPEG-1
+      {JXL_SIG_CODESTREAM, {0xff, 0x0a}},
+
+      // JPEGXL container file.
+      {JXL_SIG_CONTAINER,
+       {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0xA}},
+      // Ending with invalid byte.
+      {JXL_SIG_INVALID, {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0}},
+      // Part of signature.
+      {JXL_SIG_NOT_ENOUGH_BYTES,
+       {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87}},
+      {JXL_SIG_NOT_ENOUGH_BYTES, {0}},
+  };
+  for (const auto& test : tests) {
+    EXPECT_EQ(test.first,
+              JxlSignatureCheck(test.second.data(), test.second.size()))
+        << "Where test data is " << ::testing::PrintToString(test.second);
+  }
+}
+
+TEST(DecodeTest, DefaultAllocTest) {
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, CustomAllocTest) {
+  struct CalledCounters {
+    int allocs = 0;
+    int frees = 0;
+  } counters;
+
+  JxlMemoryManager mm;
+  mm.opaque = &counters;
+  mm.alloc = [](void* opaque, size_t size) {
+    reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+    return malloc(size);
+  };
+  mm.free = [](void* opaque, void* address) {
+    reinterpret_cast<CalledCounters*>(opaque)->frees++;
+    free(address);
+  };
+
+  JxlDecoder* dec = JxlDecoderCreate(&mm);
+  EXPECT_NE(nullptr, dec);
+  EXPECT_LE(1, counters.allocs);
+  EXPECT_EQ(0, counters.frees);
+  JxlDecoderDestroy(dec);
+  EXPECT_LE(1, counters.frees);
+}
+
+// TODO(lode): add multi-threaded test when multithreaded pixel decoding from
+// API is implemented.
+TEST(DecodeTest, DefaultParallelRunnerTest) {
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, nullptr, nullptr));
+  JxlDecoderDestroy(dec);
+}
+
+// Creates the header of a JPEG XL file with various custom parameters for
+// testing.
+// xsize, ysize: image dimensions to store in the SizeHeader, max 512.
+// bits_per_sample, orientation: a selection of header parameters to test with.
+// orientation: image orientation to set in the metadata
+// alpha_bits: if non-0, alpha extra channel bits to set in the metadata. Also
+//   gives the alpha channel the name "alpha_test"
+// have_container: add box container format around the codestream.
+// metadata_default: if true, ImageMetadata is set to default and
+//   bits_per_sample, orientation and alpha_bits are ignored.
+// insert_box: insert an extra box before the codestream box, making the header
+// farther away from the front than is ideal. Only used if have_container.
+std::vector<uint8_t> GetTestHeader(size_t xsize, size_t ysize,
+                                   size_t bits_per_sample, size_t orientation,
+                                   size_t alpha_bits, bool xyb_encoded,
+                                   bool have_container, bool metadata_default,
+                                   bool insert_extra_box,
+                                   const jxl::PaddedBytes& icc_profile) {
+  jxl::BitWriter writer;
+  jxl::BitWriter::Allotment allotment(&writer, 65536);  // Large enough
+
+  if (have_container) {
+    const std::vector<uint8_t> signature_box = {0,   0,   0,   0xc, 'J',  'X',
+                                                'L', ' ', 0xd, 0xa, 0x87, 0xa};
+    const std::vector<uint8_t> filetype_box = {
+        0,   0,   0, 0x14, 'f', 't', 'y', 'p', 'j', 'x',
+        'l', ' ', 0, 0,    0,   0,   'j', 'x', 'l', ' '};
+    const std::vector<uint8_t> extra_box_header = {0,   0,   0,   0xff,
+                                                   't', 'e', 's', 't'};
+    // Beginning of codestream box, with an arbitrary size certainly large
+    // enough to contain the header
+    const std::vector<uint8_t> codestream_box_header = {0,   0,   0,   0xff,
+                                                        'j', 'x', 'l', 'c'};
+
+    for (size_t i = 0; i < signature_box.size(); i++) {
+      writer.Write(8, signature_box[i]);
+    }
+    for (size_t i = 0; i < filetype_box.size(); i++) {
+      writer.Write(8, filetype_box[i]);
+    }
+    if (insert_extra_box) {
+      for (size_t i = 0; i < extra_box_header.size(); i++) {
+        writer.Write(8, extra_box_header[i]);
+      }
+      for (size_t i = 0; i < 255 - 8; i++) {
+        writer.Write(8, 0);
+      }
+    }
+    for (size_t i = 0; i < codestream_box_header.size(); i++) {
+      writer.Write(8, codestream_box_header[i]);
+    }
+  }
+
+  // JXL signature
+  writer.Write(8, 0xff);
+  writer.Write(8, 0x0a);
+
+  // SizeHeader
+  jxl::CodecMetadata metadata;
+  EXPECT_TRUE(metadata.size.Set(xsize, ysize));
+  EXPECT_TRUE(WriteSizeHeader(metadata.size, &writer, 0, nullptr));
+
+  if (!metadata_default) {
+    metadata.m.SetUintSamples(bits_per_sample);
+    metadata.m.orientation = orientation;
+    metadata.m.SetAlphaBits(alpha_bits);
+    metadata.m.xyb_encoded = xyb_encoded;
+    if (alpha_bits != 0) {
+      metadata.m.extra_channel_info[0].name = "alpha_test";
+    }
+  }
+
+  if (!icc_profile.empty()) {
+    jxl::PaddedBytes copy = icc_profile;
+    EXPECT_TRUE(
+        metadata.m.color_encoding.SetICC(std::move(copy), &jxl::GetJxlCms()));
+  }
+
+  EXPECT_TRUE(jxl::Bundle::Write(metadata.m, &writer, 0, nullptr));
+  metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+  EXPECT_TRUE(jxl::Bundle::Write(metadata.transform_data, &writer, 0, nullptr));
+
+  if (!icc_profile.empty()) {
+    EXPECT_TRUE(metadata.m.color_encoding.WantICC());
+    EXPECT_TRUE(jxl::WriteICC(icc_profile, &writer, 0, nullptr));
+  }
+
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, 0, nullptr);
+  return std::vector<uint8_t>(
+      writer.GetSpan().data(),
+      writer.GetSpan().data() + writer.GetSpan().size());
+}
+
+TEST(DecodeTest, BasicInfoTest) {
+  size_t xsize[2] = {50, 33};
+  size_t ysize[2] = {50, 77};
+  size_t bits_per_sample[2] = {8, 23};
+  size_t orientation[2] = {3, 5};
+  size_t alpha_bits[2] = {0, 8};
+  JXL_BOOL have_container[2] = {0, 1};
+  bool xyb_encoded = false;
+
+  std::vector<std::vector<uint8_t>> test_samples;
+  // Test with direct codestream
+  test_samples.push_back(GetTestHeader(
+      xsize[0], ysize[0], bits_per_sample[0], orientation[0], alpha_bits[0],
+      xyb_encoded, have_container[0], /*metadata_default=*/false,
+      /*insert_extra_box=*/false, {}));
+  // Test with container and different parameters
+  test_samples.push_back(GetTestHeader(
+      xsize[1], ysize[1], bits_per_sample[1], orientation[1], alpha_bits[1],
+      xyb_encoded, have_container[1], /*metadata_default=*/false,
+      /*insert_extra_box=*/false, {}));
+
+  for (size_t i = 0; i < test_samples.size(); ++i) {
+    const std::vector<uint8_t>& data = test_samples[i];
+    // Test decoding too small header first, until we reach the final byte.
+    for (size_t size = 0; size <= data.size(); ++size) {
+      // Test with a new decoder for each tested byte size.
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+      const uint8_t* next_in = data.data();
+      size_t avail_in = size;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+
+      JxlBasicInfo info;
+      bool have_basic_info = !JxlDecoderGetBasicInfo(dec, &info);
+
+      if (size == data.size()) {
+        EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+        // All header bytes given so the decoder must have the basic info.
+        EXPECT_EQ(true, have_basic_info);
+        EXPECT_EQ(have_container[i], info.have_container);
+        EXPECT_EQ(alpha_bits[i], info.alpha_bits);
+        // Orientations 5..8 swap the dimensions
+        if (orientation[i] >= 5) {
+          EXPECT_EQ(xsize[i], info.ysize);
+          EXPECT_EQ(ysize[i], info.xsize);
+        } else {
+          EXPECT_EQ(xsize[i], info.xsize);
+          EXPECT_EQ(ysize[i], info.ysize);
+        }
+        // The API should set the orientation to identity by default since it
+        // already applies the transformation internally by default.
+        EXPECT_EQ(1u, info.orientation);
+
+        EXPECT_EQ(3u, info.num_color_channels);
+
+        if (alpha_bits[i] != 0) {
+          // Expect an extra channel
+          EXPECT_EQ(1u, info.num_extra_channels);
+          JxlExtraChannelInfo extra;
+          EXPECT_EQ(0, JxlDecoderGetExtraChannelInfo(dec, 0, &extra));
+          EXPECT_EQ(alpha_bits[i], extra.bits_per_sample);
+          EXPECT_EQ(JXL_CHANNEL_ALPHA, extra.type);
+          EXPECT_EQ(0, extra.alpha_premultiplied);
+          // Verify the name "alpha_test" given to the alpha channel
+          EXPECT_EQ(10u, extra.name_length);
+          char name[11];
+          EXPECT_EQ(0,
+                    JxlDecoderGetExtraChannelName(dec, 0, name, sizeof(name)));
+          EXPECT_EQ(std::string("alpha_test"), std::string(name));
+        } else {
+          EXPECT_EQ(0u, info.num_extra_channels);
+        }
+
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+      } else {
+        // If we did not give the full header, the basic info should not be
+        // available. Allow a few bytes of slack due to some bits for default
+        // opsinmatrix/extension bits.
+        if (size + 2 < data.size()) {
+          EXPECT_EQ(false, have_basic_info);
+          EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+        }
+      }
+
+      // Test that decoder doesn't allow setting a setting required at beginning
+      // unless it's reset
+      EXPECT_EQ(JXL_DEC_ERROR,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+      JxlDecoderReset(dec);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+TEST(DecodeTest, BufferSizeTest) {
+  size_t xsize = 33;
+  size_t ysize = 77;
+  size_t bits_per_sample = 8;
+  size_t orientation = 1;
+  size_t alpha_bits = 8;
+  bool have_container = false;
+  bool xyb_encoded = false;
+
+  std::vector<uint8_t> header =
+      GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+                    xyb_encoded, have_container, /*metadata_default=*/false,
+                    /*insert_extra_box=*/false, {});
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+  const uint8_t* next_in = header.data();
+  size_t avail_in = header.size();
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+  size_t image_out_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &image_out_size));
+  EXPECT_EQ(xsize * ysize * 4, image_out_size);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, BasicInfoSizeHintTest) {
+  // Test on a file where the size hint is too small initially due to inserting
+  // a box before the codestream (something that is normally not recommended)
+  size_t xsize = 50;
+  size_t ysize = 50;
+  size_t bits_per_sample = 16;
+  size_t orientation = 1;
+  size_t alpha_bits = 0;
+  bool xyb_encoded = false;
+  std::vector<uint8_t> data = GetTestHeader(
+      xsize, ysize, bits_per_sample, orientation, alpha_bits, xyb_encoded,
+      /*have_container=*/true, /*metadata_default=*/false,
+      /*insert_extra_box=*/true, {});
+
+  JxlDecoderStatus status;
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+  size_t hint0 = JxlDecoderSizeHintBasicInfo(dec);
+  // Test that the test works as intended: we construct a file on purpose to
+  // be larger than the first hint by having that extra box.
+  EXPECT_LT(hint0, data.size());
+  const uint8_t* next_in = data.data();
+  // Do as if we have only as many bytes as indicated by the hint available
+  size_t avail_in = std::min(hint0, data.size());
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+  // Basic info cannot be available yet due to the extra inserted box.
+  EXPECT_EQ(false, !JxlDecoderGetBasicInfo(dec, nullptr));
+
+  size_t num_read = avail_in - JxlDecoderReleaseInput(dec);
+  EXPECT_LT(num_read, data.size());
+
+  size_t hint1 = JxlDecoderSizeHintBasicInfo(dec);
+  // The hint must be larger than the previous hint (taking already processed
+  // bytes into account, the hint is a hint for the next avail_in) since the
+  // decoder now knows there is a box in between.
+  EXPECT_GT(hint1 + num_read, hint0);
+  avail_in = std::min<size_t>(hint1, data.size() - num_read);
+  next_in += num_read;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+  JxlBasicInfo info;
+  // We should have the basic info now, since we only added one box in-between,
+  // and the decoder should have known its size, its implementation can return
+  // a correct hint.
+  EXPECT_EQ(true, !JxlDecoderGetBasicInfo(dec, &info));
+
+  // Also test if the basic info is correct.
+  EXPECT_EQ(1, info.have_container);
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+  EXPECT_EQ(orientation, info.orientation);
+  EXPECT_EQ(bits_per_sample, info.bits_per_sample);
+
+  JxlDecoderDestroy(dec);
+}
+
+std::vector<uint8_t> GetIccTestHeader(const jxl::PaddedBytes& icc_profile,
+                                      bool xyb_encoded) {
+  size_t xsize = 50;
+  size_t ysize = 50;
+  size_t bits_per_sample = 16;
+  size_t orientation = 1;
+  size_t alpha_bits = 0;
+  return GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+                       xyb_encoded,
+                       /*have_container=*/false, /*metadata_default=*/false,
+                       /*insert_extra_box=*/false, icc_profile);
+}
+
+// Tests the case where pixels and metadata ICC profile are the same
+TEST(DecodeTest, IccProfileTestOriginal) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  bool xyb_encoded = false;
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Expect the opposite of xyb_encoded for uses_original_profile
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(JXL_TRUE, info.uses_original_profile);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  // the encoded color profile expected to be not available, since the image
+  // has an ICC profile instead
+  EXPECT_EQ(JXL_DEC_ERROR,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  size_t dec_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        &dec_profile_size));
+
+  // Check that can get return status with NULL size
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        nullptr));
+
+  // The profiles must be equal. This requires they have equal size, and if
+  // they do, we can get the profile and compare the contents.
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+  if (icc_profile.size() == dec_profile_size) {
+    jxl::PaddedBytes icc_profile2(icc_profile.size());
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                   dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                   icc_profile2.data(), icc_profile2.size()));
+    EXPECT_EQ(icc_profile, icc_profile2);
+  }
+
+  // the data is not xyb_encoded, so same result expected for the pixel data
+  // color profile
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetColorAsEncodedProfile(
+                               dec, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &dec_profile_size));
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+  JxlDecoderDestroy(dec);
+}
+
+// Tests the case where pixels and metadata ICC profile are different
+TEST(DecodeTest, IccProfileTestXybEncoded) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  bool xyb_encoded = true;
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Expect the opposite of xyb_encoded for uses_original_profile
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(JXL_FALSE, info.uses_original_profile);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  // the encoded color profile expected to be not available, since the image
+  // has an ICC profile instead
+  EXPECT_EQ(JXL_DEC_ERROR,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  // Check that can get return status with NULL size
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        nullptr));
+
+  size_t dec_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        &dec_profile_size));
+
+  // The profiles must be equal. This requires they have equal size, and if
+  // they do, we can get the profile and compare the contents.
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+  if (icc_profile.size() == dec_profile_size) {
+    jxl::PaddedBytes icc_profile2(icc_profile.size());
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                   dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                   icc_profile2.data(), icc_profile2.size()));
+    EXPECT_EQ(icc_profile, icc_profile2);
+  }
+
+  // Data is xyb_encoded, so the data profile is a different profile, encoded
+  // as structured profile.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsEncodedProfile(
+                                 dec, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+  JxlColorEncoding pixel_encoding;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+  // The API returns LINEAR by default when the colorspace cannot be represented
+  // by enum values.
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+  // Test the same but with integer format.
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+  // Test after setting the preferred color profile to non-linear sRGB:
+  // for XYB images with ICC profile, this setting is expected to take effect.
+  jxl::ColorEncoding temp_jxl_srgb = jxl::ColorEncoding::SRGB(false);
+  JxlColorEncoding pixel_encoding_srgb;
+  ConvertInternalToExternalColorEncoding(temp_jxl_srgb, &pixel_encoding_srgb);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_srgb));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_SRGB, pixel_encoding.transfer_function);
+
+  // The decoder can also output this as a generated ICC profile anyway, and
+  // we're certain that it will differ from the above defined profile since
+  // the sRGB data should not have swapped R/G/B primaries.
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &dec_profile_size));
+  // We don't need to dictate exactly what size the generated ICC profile
+  // must be (since there are many ways to represent the same color space),
+  // but it should not be zero.
+  EXPECT_NE(0u, dec_profile_size);
+  jxl::PaddedBytes icc_profile2(dec_profile_size);
+  if (0 != dec_profile_size) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                   dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                   icc_profile2.data(), icc_profile2.size()));
+    // expected not equal
+    EXPECT_NE(icc_profile, icc_profile2);
+  }
+
+  // Test setting another different preferred profile, to verify that the
+  // returned JXL_COLOR_PROFILE_TARGET_DATA ICC profile is correctly
+  // updated.
+
+  jxl::ColorEncoding temp_jxl_linear = jxl::ColorEncoding::LinearSRGB(false);
+  JxlColorEncoding pixel_encoding_linear;
+  ConvertInternalToExternalColorEncoding(temp_jxl_linear,
+                                         &pixel_encoding_linear);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_linear));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &dec_profile_size));
+  EXPECT_NE(0u, dec_profile_size);
+  jxl::PaddedBytes icc_profile3(dec_profile_size);
+  if (0 != dec_profile_size) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                   dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                   icc_profile3.data(), icc_profile3.size()));
+    // expected not equal to the previously set preferred profile.
+    EXPECT_NE(icc_profile2, icc_profile3);
+  }
+
+  JxlDecoderDestroy(dec);
+}
+
+// Test decoding ICC from partial files byte for byte.
+// This test must pass also if JXL_CRASH_ON_ERROR is enabled, that is, the
+// decoding of the ANS histogram and stream of the encoded ICC profile must also
+// handle the case of not enough input bytes with StatusCode::kNotEnoughBytes
+// rather than fatal error status codes.
+TEST(DecodeTest, ICCPartialTest) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, false);
+
+  const uint8_t* next_in = data.data();
+  size_t avail_in = 0;
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+  bool seen_basic_info = false;
+  bool seen_color_encoding = false;
+  size_t total_size = 0;
+
+  for (;;) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+    size_t remaining = JxlDecoderReleaseInput(dec);
+    EXPECT_LE(remaining, avail_in);
+    next_in += avail_in - remaining;
+    avail_in = remaining;
+    if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (total_size >= data.size()) {
+        // End of partial codestream with codestrema headers and ICC profile
+        // reached, it should not require more input since full image is not
+        // requested
+        FAIL();
+        break;
+      }
+      size_t increment = 1;
+      if (total_size + increment > data.size()) {
+        increment = data.size() - total_size;
+      }
+      total_size += increment;
+      avail_in += increment;
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      EXPECT_FALSE(seen_basic_info);
+      seen_basic_info = true;
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      EXPECT_TRUE(seen_basic_info);
+      EXPECT_FALSE(seen_color_encoding);
+      seen_color_encoding = true;
+
+      // Sanity check that the ICC profile was decoded correctly
+      size_t dec_profile_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderGetICCProfileSize(
+                    dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size));
+      EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+    } else if (status == JXL_DEC_SUCCESS) {
+      EXPECT_TRUE(seen_color_encoding);
+      break;
+    } else {
+      // We do not expect any other events or errors
+      FAIL();
+      break;
+    }
+  }
+
+  EXPECT_TRUE(seen_basic_info);
+  EXPECT_TRUE(seen_color_encoding);
+
+  JxlDecoderDestroy(dec);
+}
+
+struct PixelTestConfig {
+  // Input image definition.
+  bool grayscale;
+  bool include_alpha;
+  size_t xsize;
+  size_t ysize;
+  jxl::PreviewMode preview_mode;
+  bool add_intrinsic_size;
+  // Output format.
+  JxlEndianness endianness;
+  JxlDataType data_type;
+  uint32_t output_channels;
+  // Container options.
+  CodeStreamBoxFormat add_container;
+  // Decoding mode.
+  bool use_callback;
+  bool set_buffer_early;
+  bool use_resizable_runner;
+  // Exif orientation, 1-8
+  JxlOrientation orientation;
+  bool keep_orientation;
+  size_t upsampling;
+};
+
+class DecodeTestParam : public ::testing::TestWithParam<PixelTestConfig> {};
+
+TEST_P(DecodeTestParam, PixelTest) {
+  PixelTestConfig config = GetParam();
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  if (config.keep_orientation) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetKeepOrientation(dec, JXL_TRUE));
+  }
+
+  size_t num_pixels = config.xsize * config.ysize;
+  uint32_t orig_channels =
+      (config.grayscale ? 1 : 3) + (config.include_alpha ? 1 : 0);
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(config.xsize, config.ysize, orig_channels, 0);
+  JxlPixelFormat format_orig = {orig_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+                                0};
+  jxl::TestCodestreamParams params;
+  // Lossless to verify pixels exactly after roundtrip.
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  params.cparams.resampling = config.upsampling;
+  params.cparams.ec_resampling = config.upsampling;
+  params.box_format = config.add_container;
+  params.orientation = config.orientation;
+  params.preview_mode = config.preview_mode;
+  params.add_intrinsic_size = config.add_intrinsic_size;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), config.xsize,
+      config.ysize, orig_channels, params);
+
+  JxlPixelFormat format = {config.output_channels, config.data_type,
+                           config.endianness, 0};
+
+  bool swap_xy = !config.keep_orientation && (config.orientation > 4);
+  size_t xsize = swap_xy ? config.ysize : config.xsize;
+  size_t ysize = swap_xy ? config.xsize : config.ysize;
+
+  std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+      dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      format, config.use_callback, config.set_buffer_early,
+      config.use_resizable_runner, /*require_boxes=*/false,
+      /*expect_success=*/true);
+  JxlDecoderReset(dec);
+  EXPECT_EQ(num_pixels * config.output_channels *
+                jxl::test::GetDataBits(config.data_type) / jxl::kBitsPerByte,
+            pixels2.size());
+
+  // If an orientation transformation is expected, to compare the pixels, also
+  // apply this transformation to the original pixels. ConvertToExternal is
+  // used to achieve this, with a temporary conversion to CodecInOut and back.
+  if (config.orientation > 1 && !config.keep_orientation) {
+    jxl::Span<const uint8_t> bytes(pixels.data(), pixels.size());
+    jxl::ColorEncoding color_encoding =
+        jxl::ColorEncoding::SRGB(config.grayscale);
+
+    jxl::CodecInOut io;
+    if (config.include_alpha) io.metadata.m.SetAlphaBits(16);
+    io.metadata.m.color_encoding = color_encoding;
+    io.SetSize(config.xsize, config.ysize);
+
+    EXPECT_TRUE(ConvertFromExternal(bytes, config.xsize, config.ysize,
+                                    color_encoding, 16, format_orig, nullptr,
+                                    &io.Main()));
+
+    for (size_t i = 0; i < pixels.size(); i++) pixels[i] = 0;
+    EXPECT_TRUE(ConvertToExternal(
+        io.Main(), 16,
+        /*float_out=*/false, orig_channels, JXL_BIG_ENDIAN,
+        xsize * 2 * orig_channels, nullptr, pixels.data(), pixels.size(),
+        /*out_callback=*/{},
+        static_cast<jxl::Orientation>(config.orientation)));
+  }
+  if (config.upsampling == 1) {
+    EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                           ysize, format_orig, format));
+  } else {
+    // resampling is of course not lossless, so as a rough check:
+    // count pixels that are more than off-by-25 in the 8-bit value of one of
+    // the channels
+    EXPECT_LE(
+        jxl::test::ComparePixels(
+            pixels.data(), pixels2.data(), xsize, ysize, format_orig, format,
+            50.0 * (config.data_type == JXL_TYPE_UINT8 ? 1.0 : 256.0)),
+        300u);
+  }
+
+  JxlDecoderDestroy(dec);
+}
+
+std::vector<PixelTestConfig> GeneratePixelTests() {
+  std::vector<PixelTestConfig> all_tests;
+  struct ChannelInfo {
+    bool grayscale;
+    bool include_alpha;
+    size_t output_channels;
+  };
+  ChannelInfo ch_info[] = {
+      {false, true, 4},   // RGBA -> RGBA
+      {true, false, 1},   // G -> G
+      {true, true, 1},    // GA -> G
+      {true, true, 2},    // GA -> GA
+      {false, false, 3},  // RGB -> RGB
+      {false, true, 3},   // RGBA -> RGB
+      {false, false, 4},  // RGB -> RGBA
+  };
+
+  struct OutputFormat {
+    JxlEndianness endianness;
+    JxlDataType data_type;
+  };
+  OutputFormat out_formats[] = {
+      {JXL_NATIVE_ENDIAN, JXL_TYPE_UINT8},
+      {JXL_LITTLE_ENDIAN, JXL_TYPE_UINT16},
+      {JXL_BIG_ENDIAN, JXL_TYPE_UINT16},
+      {JXL_NATIVE_ENDIAN, JXL_TYPE_FLOAT16},
+      {JXL_LITTLE_ENDIAN, JXL_TYPE_FLOAT},
+      {JXL_BIG_ENDIAN, JXL_TYPE_FLOAT},
+  };
+
+  auto make_test = [&](ChannelInfo ch, size_t xsize, size_t ysize,
+                       jxl::PreviewMode preview_mode, bool intrinsic_size,
+                       CodeStreamBoxFormat box, JxlOrientation orientation,
+                       bool keep_orientation, OutputFormat format,
+                       bool use_callback, bool set_buffer_early,
+                       bool resizable_runner, size_t upsampling) {
+    PixelTestConfig c;
+    c.grayscale = ch.grayscale;
+    c.include_alpha = ch.include_alpha;
+    c.preview_mode = preview_mode;
+    c.add_intrinsic_size = intrinsic_size;
+    c.xsize = xsize;
+    c.ysize = ysize;
+    c.add_container = (CodeStreamBoxFormat)box;
+    c.output_channels = ch.output_channels;
+    c.data_type = format.data_type;
+    c.endianness = format.endianness;
+    c.use_callback = use_callback;
+    c.set_buffer_early = set_buffer_early;
+    c.use_resizable_runner = resizable_runner;
+    c.orientation = orientation;
+    c.keep_orientation = keep_orientation;
+    c.upsampling = upsampling;
+    all_tests.push_back(c);
+  };
+
+  // Test output formats and methods.
+  for (ChannelInfo ch : ch_info) {
+    for (int use_callback = 0; use_callback <= 1; use_callback++) {
+      for (size_t upsampling : {1, 2, 4, 8}) {
+        for (OutputFormat fmt : out_formats) {
+          make_test(ch, 301, 33, jxl::kNoPreview,
+                    /*add_intrinsic_size=*/false,
+                    CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+                    /*keep_orientation=*/false, fmt, use_callback,
+                    /*set_buffer_early=*/false, /*resizable_runner=*/false,
+                    upsampling);
+        }
+      }
+    }
+  }
+  // Test codestream formats.
+  for (size_t box = 1; box < kCSBF_NUM_ENTRIES; ++box) {
+    make_test(ch_info[0], 77, 33, jxl::kNoPreview,
+              /*add_intrinsic_size=*/false, (CodeStreamBoxFormat)box,
+              JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false,
+              /*set_buffer_early=*/false, /*resizable_runner=*/false, 1);
+  }
+  // Test previews.
+  for (int preview_mode = 0; preview_mode < jxl::kNumPreviewModes;
+       preview_mode++) {
+    make_test(ch_info[0], 77, 33, (jxl::PreviewMode)preview_mode,
+              /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+              JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/false,
+              /*resizable_runner=*/false, 1);
+  }
+  // Test intrinsic sizes.
+  for (int add_intrinsic_size = 0; add_intrinsic_size <= 1;
+       add_intrinsic_size++) {
+    make_test(ch_info[0], 55, 34, jxl::kNoPreview, add_intrinsic_size,
+              CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/false,
+              /*resizable_runner=*/false, 1);
+  }
+  // Test setting buffers early.
+  make_test(ch_info[0], 300, 33, jxl::kNoPreview,
+            /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+            JXL_ORIENT_IDENTITY,
+            /*keep_orientation=*/false, out_formats[0],
+            /*use_callback=*/false, /*set_buffer_early=*/true,
+            /*resizable_runner=*/false, 1);
+
+  // Test using the resizable runner
+  for (size_t i = 0; i < 4; i++) {
+    make_test(ch_info[0], 300 << i, 33 << i, jxl::kNoPreview,
+              /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+              JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/false,
+              /*resizable_runner=*/true, 1);
+  }
+
+  // Test orientations.
+  for (int orientation = 2; orientation <= 8; ++orientation) {
+    for (int keep_orientation = 0; keep_orientation <= 1; keep_orientation++) {
+      for (int use_callback = 0; use_callback <= 1; use_callback++) {
+        for (ChannelInfo ch : ch_info) {
+          for (OutputFormat fmt : out_formats) {
+            make_test(ch, 280, 12, jxl::kNoPreview,
+                      /*add_intrinsic_size=*/false,
+                      CodeStreamBoxFormat::kCSBF_None,
+                      static_cast<JxlOrientation>(orientation),
+                      /*keep_orientation=*/keep_orientation, fmt,
+                      /*use_callback=*/use_callback, /*set_buffer_early=*/true,
+                      /*resizable_runner=*/false, 1);
+          }
+        }
+      }
+    }
+  }
+
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const PixelTestConfig& c) {
+  os << c.xsize << "x" << c.ysize;
+  const char* colors[] = {"", "G", "GA", "RGB", "RGBA"};
+  os << colors[(c.grayscale ? 1 : 3) + (c.include_alpha ? 1 : 0)];
+  os << "to";
+  os << colors[c.output_channels];
+  switch (c.data_type) {
+    case JXL_TYPE_UINT8:
+      os << "u8";
+      break;
+    case JXL_TYPE_UINT16:
+      os << "u16";
+      break;
+    case JXL_TYPE_FLOAT:
+      os << "f32";
+      break;
+    case JXL_TYPE_FLOAT16:
+      os << "f16";
+      break;
+    default:
+      JXL_ASSERT(false);
+  };
+  if (jxl::test::GetDataBits(c.data_type) > jxl::kBitsPerByte) {
+    if (c.endianness == JXL_NATIVE_ENDIAN) {
+      // add nothing
+    } else if (c.endianness == JXL_BIG_ENDIAN) {
+      os << "BE";
+    } else if (c.endianness == JXL_LITTLE_ENDIAN) {
+      os << "LE";
+    }
+  }
+  if (c.add_container != CodeStreamBoxFormat::kCSBF_None) {
+    os << "Box";
+    os << (size_t)c.add_container;
+  }
+  if (c.preview_mode == jxl::kSmallPreview) os << "Preview";
+  if (c.preview_mode == jxl::kBigPreview) os << "BigPreview";
+  if (c.add_intrinsic_size) os << "IntrinicSize";
+  if (c.use_callback) os << "Callback";
+  if (c.set_buffer_early) os << "EarlyBuffer";
+  if (c.use_resizable_runner) os << "ResizableRunner";
+  if (c.orientation != 1) os << "O" << c.orientation;
+  if (c.keep_orientation) os << "Keep";
+  if (c.upsampling > 1) os << "x" << c.upsampling;
+  return os;
+}
+
+std::string PixelTestDescription(
+    const testing::TestParamInfo<DecodeTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeTest, DecodeTestParam,
+                                   testing::ValuesIn(GeneratePixelTests()),
+                                   PixelTestDescription);
+
+TEST(DecodeTest, PixelTestWithICCProfileLossless) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::TestCodestreamParams params;
+  // Lossless to verify pixels exactly after roundtrip.
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  params.add_icc_profile = true;
+  // For variation: some have container and no preview, others have preview
+  // and no container.
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  for (uint32_t channels = 3; channels <= 4; ++channels) {
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/false, /*set_buffer_early=*/false,
+          /*use_resizable_runner=*/false, /*require_boxes=*/false,
+          /*expect_success=*/true);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels, pixels2.size());
+      EXPECT_EQ(0u,
+                jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                         ysize, format_orig, format));
+    }
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0};
+
+      // Test with the container for one of the pixel formats.
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/true, /*set_buffer_early=*/true,
+          /*use_resizable_runner=*/false, /*require_boxes=*/false,
+          /*expect_success=*/true);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels * 2, pixels2.size());
+      EXPECT_EQ(0u,
+                jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                         ysize, format_orig, format));
+    }
+
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/false, /*set_buffer_early=*/false,
+          /*use_resizable_runner=*/false, /*require_boxes=*/false,
+          /*expect_success=*/true);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+      EXPECT_EQ(0u,
+                jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                         ysize, format_orig, format));
+    }
+  }
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PixelTestWithICCProfileLossy) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::TestCodestreamParams params;
+  params.add_icc_profile = true;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+      params);
+  uint32_t channels = 3;
+
+  JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+  jxl::PaddedBytes icc;
+  std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+      dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      format, /*use_callback=*/false, /*set_buffer_early=*/true,
+      /*use_resizable_runner=*/false, /*require_boxes=*/false,
+      /*expect_success=*/true, /*icc=*/&icc);
+  JxlDecoderReset(dec);
+  EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+
+  // The input pixels use the profile matching GetIccTestProfile, since we set
+  // add_icc_profile for CreateTestJXLCodestream to true.
+  jxl::ColorEncoding color_encoding0;
+  EXPECT_TRUE(color_encoding0.SetICC(GetIccTestProfile(), &jxl::GetJxlCms()));
+  jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+  jxl::CodecInOut io0;
+  io0.SetSize(xsize, ysize);
+  EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+                                  /*bits_per_sample=*/16, format_orig,
+                                  /*pool=*/nullptr, &io0.Main()));
+
+  jxl::ColorEncoding color_encoding1;
+  EXPECT_TRUE(color_encoding1.SetICC(std::move(icc), &jxl::GetJxlCms()));
+  jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+  jxl::CodecInOut io1;
+  io1.SetSize(xsize, ysize);
+  EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+                                  /*bits_per_sample=*/32, format,
+                                  /*pool=*/nullptr, &io1.Main()));
+
+  jxl::ButteraugliParams ba;
+  EXPECT_THAT(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+                                  /*distmap=*/nullptr, nullptr),
+#if JXL_HIGH_PRECISION
+              IsSlightlyBelow(0.9f));
+#else
+              IsSlightlyBelow(0.98f));
+#endif
+
+  JxlDecoderDestroy(dec);
+}
+
+std::string ColorDescription(JxlColorEncoding c) {
+  jxl::ColorEncoding color_encoding;
+  EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding));
+  return Description(color_encoding);
+}
+
+std::string GetOrigProfile(JxlDecoder* dec) {
+  JxlColorEncoding c;
+  JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_ORIGINAL;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(dec, target, &c));
+  return ColorDescription(c);
+}
+
+std::string GetDataProfile(JxlDecoder* dec) {
+  JxlColorEncoding c;
+  JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(dec, target, &c));
+  return ColorDescription(c);
+}
+
+double ButteraugliDistance(size_t xsize, size_t ysize,
+                           const std::vector<uint8_t>& pixels_in,
+                           const jxl::ColorEncoding& color_in,
+                           float intensity_in,
+                           const std::vector<uint8_t>& pixels_out,
+                           const jxl::ColorEncoding& color_out,
+                           float intensity_out) {
+  jxl::CodecInOut in;
+  in.metadata.m.color_encoding = color_in;
+  in.metadata.m.SetIntensityTarget(intensity_in);
+  JxlPixelFormat format_in = {static_cast<uint32_t>(color_in.Channels()),
+                              JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  EXPECT_TRUE(jxl::ConvertFromExternal(
+      jxl::Span<const uint8_t>(pixels_in.data(), pixels_in.size()), xsize,
+      ysize, color_in,
+      /*bits_per_sample=*/16, format_in,
+      /*pool=*/nullptr, &in.Main()));
+  jxl::CodecInOut out;
+  out.metadata.m.color_encoding = color_out;
+  out.metadata.m.SetIntensityTarget(intensity_out);
+  JxlPixelFormat format_out = {static_cast<uint32_t>(color_out.Channels()),
+                               JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  EXPECT_TRUE(jxl::ConvertFromExternal(
+      jxl::Span<const uint8_t>(pixels_out.data(), pixels_out.size()), xsize,
+      ysize, color_out,
+      /*bits_per_sample=*/16, format_out,
+      /*pool=*/nullptr, &out.Main()));
+  return ButteraugliDistance(in.frames, out.frames, jxl::ButteraugliParams(),
+                             jxl::GetJxlCms(), nullptr, nullptr);
+}
+
+class DecodeAllEncodingsTest
+    : public ::testing::TestWithParam<jxl::test::ColorEncodingDescriptor> {};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
+    DecodeAllEncodingsTestInstantiation, DecodeAllEncodingsTest,
+    ::testing::ValuesIn(jxl::test::AllEncodings()));
+TEST_P(DecodeAllEncodingsTest, PreserveOriginalProfileTest) {
+  size_t xsize = 123, ysize = 77;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE;
+  const auto& cdesc = GetParam();
+  jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(cdesc);
+  if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return;
+  std::string color_space_in = Description(c_in);
+  float intensity_in = c_in.tf.IsPQ() ? 10000 : 255;
+  printf("Testing input color space %s\n", color_space_in.c_str());
+  jxl::TestCodestreamParams params;
+  params.color_space = color_space_in;
+  params.intensity_target = intensity_in;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+      params);
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+  EXPECT_FALSE(info.uses_original_profile);
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+  EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+  EXPECT_EQ(GetDataProfile(dec), color_space_in);
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+  std::vector<uint8_t> out(pixels.size());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetImageOutBuffer(dec, &format, out.data(), out.size()));
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in,
+                                    out, c_in, intensity_in);
+  EXPECT_LT(dist, 1.29);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+  JxlDecoderDestroy(dec);
+}
+
+namespace {
+void SetPreferredColorProfileTest(
+    const jxl::test::ColorEncodingDescriptor& from) {
+  size_t xsize = 123, ysize = 77;
+  int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE;
+  jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(from);
+  if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return;
+  if (c_in.white_point != jxl::WhitePoint::kD65) return;
+  uint32_t num_channels = c_in.Channels();
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::string color_space_in = Description(c_in);
+  float intensity_in = c_in.tf.IsPQ() ? 10000 : 255;
+  jxl::TestCodestreamParams params;
+  params.color_space = color_space_in;
+  params.intensity_target = intensity_in;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  auto all_encodings = jxl::test::AllEncodings();
+  all_encodings.push_back(
+      {jxl::ColorSpace::kXYB, jxl::WhitePoint::kD65, jxl::Primaries::kCustom,
+       jxl::TransferFunction::kUnknown, jxl::RenderingIntent::kPerceptual});
+  for (const auto& c1 : all_encodings) {
+    jxl::ColorEncoding c_out = jxl::test::ColorEncodingFromDescriptor(c1);
+    float intensity_out = intensity_in;
+    if (c_out.GetColorSpace() != jxl::ColorSpace::kXYB) {
+      if (c_out.rendering_intent != jxl::RenderingIntent::kRelative) {
+        continue;
+      }
+      if ((c_in.primaries == jxl::Primaries::k2100 &&
+           c_out.primaries != jxl::Primaries::k2100) ||
+          (c_in.primaries == jxl::Primaries::kP3 &&
+           c_out.primaries == jxl::Primaries::kSRGB)) {
+        // Converting to a narrower gamut does not work without gammut mapping.
+        continue;
+      }
+    }
+    if (c_out.tf.IsHLG() && intensity_out > 300) {
+      // The Linear->HLG OOTF function at this intensity level can push
+      // saturated colors out of gamut, so we would need gamut mapping in
+      // this case too.
+      continue;
+    }
+    std::string color_space_out = Description(c_out);
+    if (color_space_in == color_space_out) continue;
+    printf("Testing input color space %s with output color space %s\n",
+           color_space_in.c_str(), color_space_out.c_str());
+    JxlDecoder* dec = JxlDecoderCreate(nullptr);
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetInput(dec, data.data(), data.size()));
+    EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+    JxlBasicInfo info;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+    EXPECT_EQ(xsize, info.xsize);
+    EXPECT_EQ(ysize, info.ysize);
+    EXPECT_FALSE(info.uses_original_profile);
+    EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+    EXPECT_EQ(GetDataProfile(dec), color_space_in);
+    JxlColorEncoding encoding_out;
+    EXPECT_TRUE(jxl::ParseDescription(color_space_out, &encoding_out));
+    if (c_out.GetColorSpace() == jxl::ColorSpace::kXYB &&
+        (c_in.primaries != jxl::Primaries::kSRGB || c_in.tf.IsPQ())) {
+      EXPECT_EQ(JXL_DEC_ERROR,
+                JxlDecoderSetPreferredColorProfile(dec, &encoding_out));
+      JxlDecoderDestroy(dec);
+      continue;
+    }
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetPreferredColorProfile(dec, &encoding_out));
+    EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+    EXPECT_EQ(GetDataProfile(dec), color_space_out);
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+    size_t buffer_size;
+    JxlPixelFormat out_format = format;
+    out_format.num_channels = c_out.Channels();
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderImageOutBufferSize(dec, &out_format, &buffer_size));
+    std::vector<uint8_t> out(buffer_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &out_format, out.data(), out.size()));
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in,
+                                      out, c_out, intensity_out);
+    if (c_in.white_point == c_out.white_point) {
+      EXPECT_LT(dist, 1.29);
+    } else {
+      EXPECT_LT(dist, 4.0);
+    }
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+    JxlDecoderDestroy(dec);
+  }
+}
+}  // namespace
+
+TEST(DecodeTest, SetPreferredColorProfileTestFromGray) {
+  jxl::test::ColorEncodingDescriptor gray = {
+      jxl::ColorSpace::kGray, jxl::WhitePoint::kD65, jxl::Primaries::kSRGB,
+      jxl::TransferFunction::kSRGB, jxl::RenderingIntent::kRelative};
+  SetPreferredColorProfileTest(gray);
+}
+
+TEST_P(DecodeAllEncodingsTest, SetPreferredColorProfileTest) {
+  const auto& from = GetParam();
+  SetPreferredColorProfileTest(from);
+}
+
+// Tests the case of lossy sRGB image without alpha channel, decoded to RGB8
+// and to RGBA8
+TEST(DecodeTest, PixelTestOpaqueSrgbLossy) {
+  for (unsigned channels = 3; channels <= 4; channels++) {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+    size_t xsize = 123, ysize = 77;
+    size_t num_pixels = xsize * ysize;
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+    JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        jxl::TestCodestreamParams());
+
+    JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/true, /*set_buffer_early=*/false,
+        /*use_resizable_runner=*/false, /*require_boxes=*/false,
+        /*expect_success*/ true);
+    JxlDecoderReset(dec);
+    EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+    jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+    jxl::CodecInOut io0;
+    io0.SetSize(xsize, ysize);
+    EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+                                    /*bits_per_sample=*/16, format_orig,
+                                    /*pool=*/nullptr, &io0.Main()));
+
+    jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+    jxl::CodecInOut io1;
+    EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+                                    /*bits_per_sample=*/8, format,
+                                    /*pool=*/nullptr, &io1.Main()));
+
+    jxl::ButteraugliParams ba;
+    EXPECT_THAT(
+        ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+                            /*distmap=*/nullptr, nullptr),
+#if JXL_HIGH_PRECISION
+        IsSlightlyBelow(0.93f));
+#else
+        IsSlightlyBelow(0.94f));
+#endif
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+// Opaque image with noise enabled, decoded to RGB8 and RGBA8.
+TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) {
+  for (unsigned channels = 3; channels <= 4; channels++) {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+    size_t xsize = 512, ysize = 300;
+    size_t num_pixels = xsize * ysize;
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+    JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    jxl::TestCodestreamParams params;
+    params.cparams.noise = jxl::Override::kOn;
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params);
+
+    JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/false, /*set_buffer_early=*/true,
+        /*use_resizable_runner=*/false, /*require_boxes=*/false,
+        /*expect_success=*/true);
+    JxlDecoderReset(dec);
+    EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+    jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+    jxl::CodecInOut io0;
+    io0.SetSize(xsize, ysize);
+    EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+                                    /*bits_per_sample=*/16, format_orig,
+                                    /*pool=*/nullptr, &io0.Main()));
+
+    jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+    jxl::CodecInOut io1;
+    EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+                                    /*bits_per_sample=*/8, format,
+                                    /*pool=*/nullptr, &io1.Main()));
+
+    jxl::ButteraugliParams ba;
+    EXPECT_THAT(
+        ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+                            /*distmap=*/nullptr, nullptr),
+        IsSlightlyBelow(2.04444f));
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, ProcessEmptyInputWithBoxes) {
+  size_t xsize = 123, ysize = 77;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  jxl::CompressParams cparams;
+  uint32_t channels = 3;
+  JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    jxl::TestCodestreamParams params;
+    params.box_format = (CodeStreamBoxFormat)i;
+    printf("Testing empty input with box format %d\n", (int)params.box_format);
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params);
+    const int events =
+        JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_COLOR_ENCODING;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+    EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+    EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+    size_t buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+    JxlBasicInfo info;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+    const size_t remaining = JxlDecoderReleaseInput(dec);
+    EXPECT_LE(remaining, compressed.size());
+    EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, ExtraBytesAfterCompressedStream) {
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  jxl::CompressParams cparams;
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i;
+    if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+    printf("Testing with box format %d\n", (int)box_format);
+    size_t last_unknown_box_size = 0;
+    if (box_format == kCSBF_Single_Other) {
+      last_unknown_box_size = unk1_box_size + 8;
+    } else if (box_format == kCSBF_Multi_Other_Terminated) {
+      last_unknown_box_size = unk3_box_size + 8;
+    } else if (box_format == kCSBF_Multi_Last_Empty_Other) {
+      // If boxes are not required, the decoder won't consume the last empty
+      // jxlp box.
+      last_unknown_box_size = 12 + unk3_box_size + 8;
+    }
+    jxl::TestCodestreamParams params;
+    params.box_format = box_format;
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params);
+    // Add some more bytes after compressed data.
+    compressed.push_back(0);
+    compressed.push_back(1);
+    compressed.push_back(2);
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    uint32_t channels = 3;
+    JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/false, /*set_buffer_early=*/true,
+        /*use_resizable_runner=*/false, /*require_boxes=*/false,
+        /*expect_success=*/true);
+    size_t unconsumed_bytes = JxlDecoderReleaseInput(dec);
+    EXPECT_EQ(last_unknown_box_size + 3, unconsumed_bytes);
+    EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, ExtraBytesAfterCompressedStreamRequireBoxes) {
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  jxl::CompressParams cparams;
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i;
+    if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+    printf("Testing with box format %d\n", (int)box_format);
+    bool expect_success = (box_format == kCSBF_None ||
+                           box_format == kCSBF_Single_Zero_Terminated ||
+                           box_format == kCSBF_Multi_Zero_Terminated);
+    jxl::TestCodestreamParams params;
+    params.box_format = box_format;
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params);
+    // Add some more bytes after compressed data.
+    compressed.push_back(0);
+    compressed.push_back(1);
+    compressed.push_back(2);
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    uint32_t channels = 3;
+    JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/false, /*set_buffer_early=*/true,
+        /*use_resizable_runner=*/false, /*require_boxes=*/true, expect_success);
+    size_t unconsumed_bytes = JxlDecoderReleaseInput(dec);
+    EXPECT_EQ(3, unconsumed_bytes);
+    EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, ConcatenatedCompressedStreams) {
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  jxl::CompressParams cparams;
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    CodeStreamBoxFormat first_box_format = (CodeStreamBoxFormat)i;
+    if (first_box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+    jxl::TestCodestreamParams params1;
+    params1.box_format = first_box_format;
+    jxl::PaddedBytes compressed1 = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params1);
+    for (int j = 0; j < kCSBF_NUM_ENTRIES; ++j) {
+      CodeStreamBoxFormat second_box_format = (CodeStreamBoxFormat)j;
+      if (second_box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+      printf("Testing with box format pair %d, %d\n", (int)first_box_format,
+             (int)second_box_format);
+      jxl::TestCodestreamParams params2;
+      params2.box_format = second_box_format;
+      jxl::PaddedBytes compressed2 = jxl::CreateTestJXLCodestream(
+          jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+          3, params2);
+      jxl::PaddedBytes concat;
+      concat.append(compressed1);
+      concat.append(compressed2);
+      uint32_t channels = 3;
+      JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+      size_t remaining = concat.size();
+      for (int part = 0; part < 2; ++part) {
+        printf("  Decoding part %d\n", part + 1);
+        JxlDecoder* dec = JxlDecoderCreate(NULL);
+        size_t pos = concat.size() - remaining;
+        bool expect_success =
+            (part == 0 || second_box_format == kCSBF_None ||
+             second_box_format == kCSBF_Single_Zero_Terminated ||
+             second_box_format == kCSBF_Multi_Zero_Terminated);
+        std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+            dec, jxl::Span<const uint8_t>(concat.data() + pos, remaining),
+            format, /*use_callback=*/false, /*set_buffer_early=*/true,
+            /*use_resizable_runner=*/false, /*require_boxes=*/true,
+            expect_success);
+        EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+        remaining = JxlDecoderReleaseInput(dec);
+        JxlDecoderDestroy(dec);
+      }
+      EXPECT_EQ(0, remaining);
+    }
+  }
+}
+
+void TestPartialStream(bool reconstructible_jpeg) {
+  size_t xsize = 123, ysize = 77;
+  uint32_t channels = 4;
+  if (reconstructible_jpeg) {
+    channels = 3;
+  }
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, channels, 0);
+  JxlPixelFormat format_orig = {channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::TestCodestreamParams params;
+  if (reconstructible_jpeg) {
+    params.cparams.color_transform = jxl::ColorTransform::kNone;
+  } else {
+    // Lossless to verify pixels exactly after roundtrip.
+    params.cparams.SetLossless();
+  }
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  jxl::PaddedBytes jpeg_output(64);
+  size_t used_jpeg_output = 0;
+
+  std::vector<jxl::PaddedBytes> codestreams(kCSBF_NUM_ENTRIES);
+  std::vector<jxl::PaddedBytes> jpeg_codestreams(kCSBF_NUM_ENTRIES);
+  for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    params.box_format = (CodeStreamBoxFormat)i;
+    if (reconstructible_jpeg) {
+      params.jpeg_codestream = &jpeg_codestreams[i];
+    }
+    codestreams[i] = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        channels, params);
+  }
+
+  // Test multiple step sizes, to test different combinations of the streaming
+  // box parsing.
+  std::vector<size_t> increments = {1, 3, 17, 23, 120, 700, 1050};
+
+  for (size_t index = 0; index < increments.size(); index++) {
+    for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+      if (reconstructible_jpeg &&
+          (CodeStreamBoxFormat)i == CodeStreamBoxFormat::kCSBF_None) {
+        continue;
+      }
+      const jxl::PaddedBytes& data = codestreams[i];
+      const uint8_t* next_in = data.data();
+      size_t avail_in = 0;
+
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(
+                    dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE |
+                             JXL_DEC_JPEG_RECONSTRUCTION));
+
+      bool seen_basic_info = false;
+      bool seen_full_image = false;
+      bool seen_jpeg_recon = false;
+
+      size_t total_size = 0;
+
+      for (;;) {
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+        JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+        size_t remaining = JxlDecoderReleaseInput(dec);
+        EXPECT_LE(remaining, avail_in);
+        next_in += avail_in - remaining;
+        avail_in = remaining;
+        if (status == JXL_DEC_NEED_MORE_INPUT) {
+          if (total_size >= data.size()) {
+            // End of test data reached, it should have successfully decoded the
+            // image now.
+            FAIL();
+            break;
+          }
+
+          size_t increment = increments[index];
+          // End of the file reached, should be the final test.
+          if (total_size + increment > data.size()) {
+            increment = data.size() - total_size;
+          }
+          total_size += increment;
+          avail_in += increment;
+        } else if (status == JXL_DEC_BASIC_INFO) {
+          // This event should happen exactly once
+          EXPECT_FALSE(seen_basic_info);
+          if (seen_basic_info) break;
+          seen_basic_info = true;
+          JxlBasicInfo info;
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+          EXPECT_EQ(info.xsize, xsize);
+          EXPECT_EQ(info.ysize, ysize);
+        } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+          EXPECT_FALSE(seen_basic_info);
+          EXPECT_FALSE(seen_full_image);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetJPEGBuffer(dec, jpeg_output.data(),
+                                            jpeg_output.size()));
+          seen_jpeg_recon = true;
+        } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+          EXPECT_TRUE(seen_jpeg_recon);
+          used_jpeg_output =
+              jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+          jpeg_output.resize(jpeg_output.size() * 2);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetJPEGBuffer(
+                        dec, jpeg_output.data() + used_jpeg_output,
+                        jpeg_output.size() - used_jpeg_output));
+        } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetImageOutBuffer(
+                        dec, &format_orig, pixels2.data(), pixels2.size()));
+        } else if (status == JXL_DEC_FULL_IMAGE) {
+          // This event should happen exactly once
+          EXPECT_FALSE(seen_full_image);
+          if (seen_full_image) break;
+          // This event should happen after basic info
+          EXPECT_TRUE(seen_basic_info);
+          seen_full_image = true;
+          if (reconstructible_jpeg) {
+            used_jpeg_output =
+                jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+            EXPECT_EQ(used_jpeg_output, jpeg_codestreams[i].size());
+            EXPECT_EQ(0, memcmp(jpeg_output.data(), jpeg_codestreams[i].data(),
+                                used_jpeg_output));
+          } else {
+            EXPECT_EQ(pixels, pixels2);
+          }
+        } else if (status == JXL_DEC_SUCCESS) {
+          EXPECT_TRUE(seen_full_image);
+          break;
+        } else {
+          // We do not expect any other events or errors
+          FAIL();
+          break;
+        }
+      }
+
+      // Ensure the decoder emitted the basic info and full image events
+      EXPECT_TRUE(seen_basic_info);
+      EXPECT_TRUE(seen_full_image);
+
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+// Tests the return status when trying to decode pixels on incomplete file: it
+// should return JXL_DEC_NEED_MORE_INPUT, not error.
+TEST(DecodeTest, PixelPartialTest) { TestPartialStream(false); }
+
+// Tests the return status when trying to decode JPEG bytes on incomplete file.
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGPartialTest)) {
+  TEST_LIBJPEG_SUPPORT();
+  TestPartialStream(true);
+}
+
+// The DC event still exists, but is no longer implemented, it is deprecated.
+TEST(DecodeTest, DCNotGettableTest) {
+  // 1x1 pixel JXL image
+  std::string compressed(
+      "\377\n\0\20\260\23\0H\200("
+      "\0\334\0U\17\0\0\250P\31e\334\340\345\\\317\227\37:,"
+      "\246m\\gh\253m\vK\22E\306\261I\252C&pH\22\353 "
+      "\363\6\22\bp\0\200\237\34\231W2d\255$\1",
+      68);
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(
+                dec, reinterpret_cast<const uint8_t*>(compressed.data()),
+                compressed.size()));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Since the image is only 1x1 pixel, there is only 1 group, the decoder is
+  // unable to get DC size from this, and will not return the DC at all. Since
+  // no full image is requested either, it is expected to return success.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PreviewTest) {
+  size_t xsize = 77, ysize = 120;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  for (jxl::PreviewMode mode : {jxl::kSmallPreview, jxl::kBigPreview}) {
+    jxl::TestCodestreamParams params;
+    params.preview_mode = mode;
+
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        params);
+
+    JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = compressed.size();
+
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(
+                  dec, JXL_DEC_BASIC_INFO | JXL_DEC_PREVIEW_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+    EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+    JxlBasicInfo info;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+    size_t buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+
+    jxl::ColorEncoding c_srgb = jxl::ColorEncoding::SRGB(false);
+    jxl::CodecInOut io0;
+    EXPECT_TRUE(jxl::ConvertFromExternal(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        c_srgb, /*bits_per_sample=*/16, format_orig, /*pool=*/nullptr,
+        &io0.Main()));
+    GeneratePreview(params.preview_mode, &io0.Main());
+
+    size_t xsize_preview = io0.Main().xsize();
+    size_t ysize_preview = io0.Main().ysize();
+    EXPECT_EQ(xsize_preview, info.preview.xsize);
+    EXPECT_EQ(ysize_preview, info.preview.ysize);
+    EXPECT_EQ(xsize_preview * ysize_preview * 3, buffer_size);
+
+    EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    std::vector<uint8_t> preview(buffer_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(),
+                                            preview.size()));
+
+    EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+
+    jxl::CodecInOut io1;
+    EXPECT_TRUE(jxl::ConvertFromExternal(
+        jxl::Span<const uint8_t>(preview.data(), preview.size()), xsize_preview,
+        ysize_preview, c_srgb,
+        /*bits_per_sample=*/8, format,
+        /*pool=*/nullptr, &io1.Main()));
+
+    jxl::ButteraugliParams ba;
+    // TODO(lode): this ButteraugliDistance silently returns 0 (dangerous for
+    // tests) if xsize or ysize is < 8, no matter how different the images, a
+    // tiny size that could happen for a preview. ButteraugliDiffmap does
+    // support smaller than 8x8, but jxl's ButteraugliDistance does not. Perhaps
+    // move butteraugli's <8x8 handling from ButteraugliDiffmap to
+    // ButteraugliComparator::Diffmap in butteraugli.cc.
+    EXPECT_LE(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+                                  /*distmap=*/nullptr, nullptr),
+              mode == jxl::kSmallPreview ? 0.7f : 1.2f);
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, AlignTest) {
+  size_t xsize = 123, ysize = 77;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::TestCodestreamParams params;
+  // Lossless to verify pixels exactly after roundtrip.
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  size_t align = 17;
+  JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align};
+  // On purpose not using jxl::RoundUpTo to test it independently.
+  size_t expected_line_bytes = (1 * 3 * xsize + align - 1) / align * align;
+
+  for (int use_callback = 0; use_callback <= 1; ++use_callback) {
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        jxl::Span<const uint8_t>(compressed.data(), compressed.size()), format,
+        use_callback, /*set_buffer_early=*/false,
+        /*use_resizable_runner=*/false, /*require_boxes=*/false,
+        /*expect_success=*/true);
+    EXPECT_EQ(expected_line_bytes * ysize, pixels2.size());
+    EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                           ysize, format_orig, format));
+  }
+}
+
+TEST(DecodeTest, AnimationTest) {
+  size_t xsize = 123, ysize = 77;
+  static const size_t num_frames = 2;
+  std::vector<uint8_t> frames[2];
+  frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+
+  // Decode and test the animation frames
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+    EXPECT_EQ(0u, frame_header.name_length);
+    // For now, test with empty name, there's currently no easy way to encode
+    // a jxl file with a frame name because ImageBundle doesn't have a
+    // jxl::FrameHeader to set the name in. We can test the null termination
+    // character though.
+    char name;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameName(dec, &name, 1));
+    EXPECT_EQ(0, name);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, AnimationTestStreaming) {
+  size_t xsize = 123, ysize = 77;
+  static const size_t num_frames = 2;
+  std::vector<uint8_t> frames[2];
+  frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+
+  // Decode and test the animation frames
+
+  const size_t step_size = 16;
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = 0;
+  size_t frame_headers_seen = 0;
+  size_t frames_seen = 0;
+  bool seen_basic_info = false;
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  std::vector<uint8_t> frames2[2];
+  for (size_t i = 0; i < num_frames; ++i) {
+    frames2[i].resize(frames[i].size());
+  }
+
+  size_t total_in = 0;
+  size_t loop_count = 0;
+
+  for (;;) {
+    if (loop_count++ > compressed.size()) {
+      fprintf(stderr, "Too many loops\n");
+      FAIL();
+      break;
+    }
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    auto status = JxlDecoderProcessInput(dec);
+    size_t remaining = JxlDecoderReleaseInput(dec);
+    EXPECT_LE(remaining, avail_in);
+    next_in += avail_in - remaining;
+    avail_in = remaining;
+
+    if (status == JXL_DEC_SUCCESS) {
+      break;
+    } else if (status == JXL_DEC_ERROR) {
+      FAIL();
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (total_in >= compressed.size()) {
+        fprintf(stderr, "Already gave all input data\n");
+        FAIL();
+        break;
+      }
+      size_t amount = step_size;
+      if (total_in + amount > compressed.size()) {
+        amount = compressed.size() - total_in;
+      }
+      avail_in += amount;
+      total_in += amount;
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                     dec, &format, frames2[frames_seen].data(),
+                                     frames2[frames_seen].size()));
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      EXPECT_EQ(false, seen_basic_info);
+      seen_basic_info = true;
+      JxlBasicInfo info;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+      EXPECT_EQ(xsize, info.xsize);
+      EXPECT_EQ(ysize, info.ysize);
+    } else if (status == JXL_DEC_FRAME) {
+      EXPECT_EQ(true, seen_basic_info);
+      frame_headers_seen++;
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      frames_seen++;
+      EXPECT_EQ(frame_headers_seen, frames_seen);
+    } else {
+      fprintf(stderr, "Unexpected status: %d\n", (int)status);
+      FAIL();
+    }
+  }
+
+  EXPECT_EQ(true, seen_basic_info);
+  EXPECT_EQ(num_frames, frames_seen);
+  EXPECT_EQ(num_frames, frame_headers_seen);
+  for (size_t i = 0; i < num_frames; ++i) {
+    EXPECT_EQ(frames[i], frames2[i]);
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, ExtraChannelTest) {
+  size_t xsize = 55, ysize = 257;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::TestCodestreamParams params;
+  // Lossless to verify pixels exactly after roundtrip.
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  size_t align = 17;
+  JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align};
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                 dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(1u, info.num_extra_channels);
+  EXPECT_EQ(JXL_FALSE, info.alpha_premultiplied);
+
+  JxlExtraChannelInfo extra_info;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info));
+  EXPECT_EQ(0, extra_info.type);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  size_t extra_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0));
+
+  std::vector<uint8_t> image(buffer_size);
+  std::vector<uint8_t> extra(extra_size);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, image.data(), image.size()));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetExtraChannelBuffer(
+                                 dec, &format, extra.data(), extra.size(), 0));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  // After the full image was output, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+  JxlDecoderDestroy(dec);
+
+  EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), image.data(), xsize,
+                                         ysize, format_orig, format));
+
+  // Compare the extracted extra channel with the original alpha channel
+
+  std::vector<uint8_t> alpha(pixels.size() / 4);
+  for (size_t i = 0; i < pixels.size(); i += 8) {
+    size_t index_alpha = i / 4;
+    alpha[index_alpha + 0] = pixels[i + 6];
+    alpha[index_alpha + 1] = pixels[i + 7];
+  }
+  JxlPixelFormat format_alpha = format;
+  format_alpha.num_channels = 1;
+  JxlPixelFormat format_orig_alpha = format_orig;
+  format_orig_alpha.num_channels = 1;
+
+  EXPECT_EQ(0u,
+            jxl::test::ComparePixels(alpha.data(), extra.data(), xsize, ysize,
+                                     format_orig_alpha, format_alpha));
+}
+
+TEST(DecodeTest, SkipCurrentFrameTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 7;
+  std::vector<uint8_t> frames[num_frames];
+  for (size_t i = 0; i < num_frames; i++) {
+    frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i);
+  }
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+    if (i & 1) {
+      // Mark some frames as referenceable, others not.
+      bundle.use_for_next_frame = true;
+    }
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  jxl::PassDefinition passes[] = {{2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 0, 1}};
+  jxl::ProgressiveMode progressive_mode{passes};
+  enc_state.progressive_splitter.SetProgressiveMode(progressive_mode);
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+                                               JXL_DEC_FRAME_PROGRESSION |
+                                               JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kLastPasses));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    printf("Decoding frame %d\n", (int)i);
+    EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+    std::vector<uint8_t> pixels(buffer_size);
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+    if (i == 2) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+      continue;
+    }
+    EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(8, JxlDecoderGetIntendedDownsamplingRatio(dec));
+    if (i == 3) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+      continue;
+    }
+    EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(4, JxlDecoderGetIntendedDownsamplingRatio(dec));
+    if (i == 4) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+      continue;
+    }
+    EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(2, JxlDecoderGetIntendedDownsamplingRatio(dec));
+    if (i == 5) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+      continue;
+    }
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 16;
+  std::vector<uint8_t> frames[num_frames];
+  for (size_t i = 0; i < num_frames; i++) {
+    frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i);
+  }
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+    if (i & 1) {
+      // Mark some frames as referenceable, others not.
+      bundle.use_for_next_frame = true;
+    }
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+
+  // Decode and test the animation frames
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i == 3) {
+      JxlDecoderSkipFrames(dec, 5);
+      i += 5;
+    }
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  // Test rewinding the decoder and skipping different frames
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    int test_skipping = (i == 9) ? 3 : 0;
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+    // should only skip the next frame, not the currently processed one.
+    if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+
+    if (test_skipping) i += test_skipping;
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameWithBlendingTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 16;
+  std::vector<uint8_t> frames[num_frames];
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i < 5) {
+      std::vector<uint8_t> frame_internal =
+          jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2 + 1);
+      // An internal frame with 0 duration, and use_for_next_frame, this is a
+      // frame that is not rendered and not output by the API, but on which the
+      // rendered frames depend
+      jxl::ImageBundle bundle_internal(&io.metadata.m);
+      EXPECT_TRUE(ConvertFromExternal(
+          jxl::Span<const uint8_t>(frame_internal.data(),
+                                   frame_internal.size()),
+          xsize, ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+          /*bits_per_sample=*/16, format,
+          /*pool=*/nullptr, &bundle_internal));
+      bundle_internal.duration = 0;
+      bundle_internal.use_for_next_frame = true;
+      io.frames.push_back(std::move(bundle_internal));
+    }
+
+    std::vector<uint8_t> frame =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2);
+    // Actual rendered frame
+    frame_durations[i] = 5 + i;
+    jxl::ImageBundle bundle(&io.metadata.m);
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frame.data(), frame.size()), xsize, ysize,
+        jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    // Create some variation in which frames depend on which.
+    if (i != 3 && i != 9 && i != 10) {
+      bundle.use_for_next_frame = true;
+    }
+    if (i != 12) {
+      bundle.blend = true;
+      // Choose a blend mode that depends on the pixels of the saved frame and
+      // doesn't use alpha
+      bundle.blendmode = jxl::BlendMode::kMul;
+    }
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+
+  // Independently decode all frames without any skipping, to create the
+  // expected blended frames, for the actual tests below to compare with.
+  {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = compressed.size();
+
+    void* runner = JxlThreadParallelRunnerCreate(
+        NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+                                   dec, JxlThreadParallelRunner, runner));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    for (size_t i = 0; i < num_frames; ++i) {
+      EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+      frames[i].resize(xsize * ysize * 6);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+                                            frames[i].size()));
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    }
+
+    // After all frames were decoded, JxlDecoderProcessInput should return
+    // success to indicate all is done.
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+    JxlThreadParallelRunnerDestroy(runner);
+    JxlDecoderDestroy(dec);
+  }
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+
+    // Test rewinding mid-way, not decoding all frames.
+    if (i == 8) {
+      break;
+    }
+  }
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i == 3) {
+      JxlDecoderSkipFrames(dec, 5);
+      i += 5;
+    }
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  // Test rewinding the decoder and skipping different frames
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    int test_skipping = (i == 9) ? 3 : 0;
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+    // should only skip the next frame, not the currently processed one.
+    if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                           xsize, ysize, format, format));
+
+    if (test_skipping) i += test_skipping;
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameWithAlphaBlendingTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 16;
+  std::vector<uint8_t> frames[num_frames + 5];
+  JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames + 5);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations_c;
+  std::vector<uint32_t> frame_durations_nc;
+  std::vector<uint32_t> frame_xsize, frame_ysize, frame_x0, frame_y0;
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    size_t cropxsize = 1 + xsize * 2 / (i + 1);
+    size_t cropysize = 1 + ysize * 3 / (i + 2);
+    int cropx0 = i * 3 - 8;
+    int cropy0 = i * 4 - 7;
+    if (i < 5) {
+      std::vector<uint8_t> frame_internal =
+          jxl::test::GetSomeTestImage(xsize / 2, ysize / 2, 4, i * 2 + 1);
+      // An internal frame with 0 duration, and use_for_next_frame, this is a
+      // frame that is not rendered and not output by default by the API, but on
+      // which the rendered frames depend
+      jxl::ImageBundle bundle_internal(&io.metadata.m);
+      EXPECT_TRUE(ConvertFromExternal(
+          jxl::Span<const uint8_t>(frame_internal.data(),
+                                   frame_internal.size()),
+          xsize / 2, ysize / 2, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+          /*bits_per_sample=*/16, format,
+          /*pool=*/nullptr, &bundle_internal));
+      bundle_internal.duration = 0;
+      bundle_internal.use_for_next_frame = true;
+      bundle_internal.origin = {13, 17};
+      io.frames.push_back(std::move(bundle_internal));
+      frame_durations_nc.push_back(0);
+      frame_xsize.push_back(xsize / 2);
+      frame_ysize.push_back(ysize / 2);
+      frame_x0.push_back(13);
+      frame_y0.push_back(17);
+    }
+
+    std::vector<uint8_t> frame =
+        jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2);
+    // Actual rendered frame
+    jxl::ImageBundle bundle(&io.metadata.m);
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
+        cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &bundle));
+    bundle.duration = 5 + i;
+    frame_durations_nc.push_back(5 + i);
+    frame_durations_c.push_back(5 + i);
+    frame_xsize.push_back(cropxsize);
+    frame_ysize.push_back(cropysize);
+    frame_x0.push_back(cropx0);
+    frame_y0.push_back(cropy0);
+    bundle.origin = {cropx0, cropy0};
+    // Create some variation in which frames depend on which.
+    if (i != 3 && i != 9 && i != 10) {
+      bundle.use_for_next_frame = true;
+    }
+    if (i != 12) {
+      bundle.blend = true;
+      bundle.blendmode = jxl::BlendMode::kBlend;
+    }
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  cparams.speed_tier = jxl::SpeedTier::kThunder;
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                              jxl::GetJxlCms(), &aux_out, nullptr));
+  // try both with and without coalescing
+  for (auto coalescing : {JXL_TRUE, JXL_FALSE}) {
+    // Independently decode all frames without any skipping, to create the
+    // expected blended frames, for the actual tests below to compare with.
+    {
+      JxlDecoder* dec = JxlDecoderCreate(NULL);
+      const uint8_t* next_in = compressed.data();
+      size_t avail_in = compressed.size();
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+      void* runner = JxlThreadParallelRunnerCreate(
+          NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+                                     dec, JxlThreadParallelRunner, runner));
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+      for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+        EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+        size_t buffer_size;
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+        if (coalescing) {
+          EXPECT_EQ(xsize * ysize * 8, buffer_size);
+        } else {
+          EXPECT_EQ(frame_xsize[i] * frame_ysize[i] * 8, buffer_size);
+        }
+        frames[i].resize(buffer_size);
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+                                              frames[i].size()));
+        EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+      }
+
+      // After all frames were decoded, JxlDecoderProcessInput should return
+      // success to indicate all is done.
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+      JxlThreadParallelRunnerDestroy(runner);
+      JxlDecoderDestroy(dec);
+    }
+
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = compressed.size();
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+    void* runner = JxlThreadParallelRunnerCreate(
+        NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+                                   dec, JxlThreadParallelRunner, runner));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                   dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+                                            JXL_DEC_FULL_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+    JxlBasicInfo info;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+    for (size_t i = 0; i < num_frames; ++i) {
+      EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+      size_t buffer_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+      std::vector<uint8_t> pixels(buffer_size);
+
+      JxlFrameHeader frame_header;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+      EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+                frame_header.duration);
+
+      EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+      EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+                                            pixels.size()));
+
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+      if (coalescing) {
+        EXPECT_EQ(frame_header.layer_info.xsize, xsize);
+      } else {
+        EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]);
+      }
+      if (coalescing) {
+        EXPECT_EQ(frame_header.layer_info.ysize, ysize);
+      } else {
+        EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]);
+      }
+      EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                             frame_header.layer_info.xsize,
+                                             frame_header.layer_info.ysize,
+                                             format, format));
+
+      // Test rewinding mid-way, not decoding all frames.
+      if (i == 8) {
+        break;
+      }
+    }
+
+    JxlDecoderRewind(dec);
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                   dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+    for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+      if (i == 3) {
+        JxlDecoderSkipFrames(dec, 5);
+        i += 5;
+      }
+
+      EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+      size_t buffer_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+      std::vector<uint8_t> pixels(buffer_size);
+
+      JxlFrameHeader frame_header;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+      EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+                frame_header.duration);
+
+      EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5),
+                frame_header.is_last);
+
+      EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+                                            pixels.size()));
+
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+      if (coalescing) {
+        EXPECT_EQ(frame_header.layer_info.xsize, xsize);
+        EXPECT_EQ(frame_header.layer_info.ysize, ysize);
+        EXPECT_EQ(frame_header.layer_info.crop_x0, 0);
+        EXPECT_EQ(frame_header.layer_info.crop_y0, 0);
+      } else {
+        EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]);
+        EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]);
+        EXPECT_EQ(frame_header.layer_info.crop_x0, frame_x0[i]);
+        EXPECT_EQ(frame_header.layer_info.crop_y0, frame_y0[i]);
+        EXPECT_EQ(frame_header.layer_info.blend_info.blendmode,
+                  i != 12 + 5 && frame_header.duration != 0
+                      ? 2
+                      : 0);  // kBlend or the default kReplace
+      }
+      EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                             frame_header.layer_info.xsize,
+                                             frame_header.layer_info.ysize,
+                                             format, format));
+    }
+
+    // After all frames were decoded, JxlDecoderProcessInput should return
+    // success to indicate all is done.
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+    // Test rewinding the decoder and skipping different frames
+
+    JxlDecoderRewind(dec);
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                   dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+    for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+      int test_skipping = (i == 9) ? 3 : 0;
+
+      EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+      size_t buffer_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+      std::vector<uint8_t> pixels(buffer_size);
+
+      // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+      // should only skip the next frame, not the currently processed one.
+      if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+      JxlFrameHeader frame_header;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+      EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+                frame_header.duration);
+
+      EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5),
+                frame_header.is_last);
+
+      EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+                                            pixels.size()));
+
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+      EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+                                             frame_header.layer_info.xsize,
+                                             frame_header.layer_info.ysize,
+                                             format, format));
+
+      if (test_skipping) i += test_skipping;
+    }
+
+    JxlThreadParallelRunnerDestroy(runner);
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, OrientedCroppedFrameTest) {
+  const auto test = [](bool keep_orientation, uint32_t orientation,
+                       uint32_t resampling) {
+    size_t xsize = 90, ysize = 120;
+    JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    size_t oxsize = (!keep_orientation && orientation > 4 ? ysize : xsize);
+    size_t oysize = (!keep_orientation && orientation > 4 ? xsize : ysize);
+    jxl::CodecInOut io;
+    io.SetSize(xsize, ysize);
+    io.metadata.m.SetUintSamples(16);
+    io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+    io.metadata.m.orientation = orientation;
+    io.frames.clear();
+    io.SetSize(xsize, ysize);
+
+    for (size_t i = 0; i < 3; ++i) {
+      size_t cropxsize = 1 + xsize * 2 / (i + 1);
+      size_t cropysize = 1 + ysize * 3 / (i + 2);
+      int cropx0 = i * 3 - 8;
+      int cropy0 = i * 4 - 7;
+
+      std::vector<uint8_t> frame =
+          jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2);
+      jxl::ImageBundle bundle(&io.metadata.m);
+      EXPECT_TRUE(ConvertFromExternal(
+          jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
+          cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+          /*bits_per_sample=*/16, format,
+          /*pool=*/nullptr, &bundle));
+      bundle.origin = {cropx0, cropy0};
+      bundle.use_for_next_frame = true;
+      io.frames.push_back(std::move(bundle));
+    }
+
+    jxl::CompressParams cparams;
+    cparams
+        .SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+    cparams.speed_tier = jxl::SpeedTier::kThunder;
+    cparams.resampling = resampling;
+    jxl::AuxOut aux_out;
+    jxl::PaddedBytes compressed;
+    jxl::PassesEncoderState enc_state;
+    EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+                                jxl::GetJxlCms(), &aux_out, nullptr));
+
+    // 0 is merged frame as decoded with coalescing enabled (default)
+    // 1-3 are non-coalesced frames as decoded with coalescing disabled
+    // 4 is the manually merged frame
+    std::vector<uint8_t> frames[5];
+    frames[4].resize(xsize * ysize * 8, 0);
+
+    // try both with and without coalescing
+    for (auto coalescing : {JXL_TRUE, JXL_FALSE}) {
+      // Independently decode all frames without any skipping, to create the
+      // expected blended frames, for the actual tests below to compare with.
+      {
+        JxlDecoder* dec = JxlDecoderCreate(NULL);
+        const uint8_t* next_in = compressed.data();
+        size_t avail_in = compressed.size();
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetKeepOrientation(dec, keep_orientation));
+        void* runner = JxlThreadParallelRunnerCreate(
+            NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+                                       dec, JxlThreadParallelRunner, runner));
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+        for (size_t i = (coalescing ? 0 : 1); i < (coalescing ? 1 : 4); ++i) {
+          EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+          JxlFrameHeader frame_header;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderGetFrameHeader(dec, &frame_header));
+          size_t buffer_size;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+          if (coalescing) {
+            EXPECT_EQ(xsize * ysize * 8, buffer_size);
+          } else {
+            EXPECT_EQ(frame_header.layer_info.xsize *
+                          frame_header.layer_info.ysize * 8,
+                      buffer_size);
+          }
+          frames[i].resize(buffer_size);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+                                                frames[i].size()));
+          EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+          EXPECT_EQ(frame_header.layer_info.blend_info.blendmode,
+                    JXL_BLEND_REPLACE);
+          if (coalescing) {
+            EXPECT_EQ(frame_header.layer_info.xsize, oxsize);
+            EXPECT_EQ(frame_header.layer_info.ysize, oysize);
+            EXPECT_EQ(frame_header.layer_info.crop_x0, 0);
+            EXPECT_EQ(frame_header.layer_info.crop_y0, 0);
+          } else {
+            // manually merge this layer
+            int x0 = frame_header.layer_info.crop_x0;
+            int y0 = frame_header.layer_info.crop_y0;
+            int w = frame_header.layer_info.xsize;
+            int h = frame_header.layer_info.ysize;
+            for (int y = 0; y < static_cast<int>(oysize); y++) {
+              if (y < y0 || y >= y0 + h) continue;
+              // pointers do whole 16-bit RGBA pixels at a time
+              uint64_t* row_merged = static_cast<uint64_t*>(
+                  (void*)(frames[4].data() + y * oxsize * 8));
+              uint64_t* row_layer = static_cast<uint64_t*>(
+                  (void*)(frames[i].data() + (y - y0) * w * 8));
+              for (int x = 0; x < static_cast<int>(oxsize); x++) {
+                if (x < x0 || x >= x0 + w) continue;
+                row_merged[x] = row_layer[x - x0];
+              }
+            }
+          }
+        }
+
+        // After all frames were decoded, JxlDecoderProcessInput should return
+        // success to indicate all is done.
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+        JxlThreadParallelRunnerDestroy(runner);
+        JxlDecoderDestroy(dec);
+      }
+    }
+
+    EXPECT_EQ(0u, jxl::test::ComparePixels(frames[0].data(), frames[4].data(),
+                                           oxsize, oysize, format, format));
+  };
+
+  for (bool keep_orientation : {true, false}) {
+    for (uint32_t orientation = 1; orientation <= 8; orientation++) {
+      for (uint32_t resampling : {1, 2, 4, 8}) {
+        SCOPED_TRACE(testing::Message()
+                     << "keep_orientation: " << keep_orientation << ", "
+                     << "orientation: " << orientation << ", "
+                     << "resampling: " << resampling);
+        test(keep_orientation, orientation, resampling);
+      }
+    }
+  }
+}
+
+struct FramePositions {
+  size_t frame_start;
+  size_t header_end;
+  size_t toc_end;
+  std::vector<size_t> section_end;
+};
+
+struct StreamPositions {
+  size_t codestream_start;
+  size_t codestream_end;
+  size_t basic_info;
+  size_t jbrd_end = 0;
+  std::vector<size_t> box_start;
+  std::vector<FramePositions> frames;
+};
+
+void AnalyzeCodestream(const jxl::PaddedBytes& data,
+                       StreamPositions* streampos) {
+  // Unbox data to codestream and mark where it is broken up by boxes.
+  std::vector<uint8_t> codestream;
+  std::vector<std::pair<size_t, size_t>> breakpoints;
+  bool codestream_end = false;
+  ASSERT_LE(2, data.size());
+  if (data[0] == 0xff && data[1] == 0x0a) {
+    codestream = std::vector<uint8_t>(data.begin(), data.end());
+    streampos->codestream_start = 0;
+  } else {
+    const uint8_t* in = data.data();
+    size_t pos = 0;
+    while (pos < data.size()) {
+      ASSERT_LE(pos + 8, data.size());
+      streampos->box_start.push_back(pos);
+      size_t box_size = LoadBE32(in + pos);
+      if (box_size == 0) box_size = data.size() - pos;
+      ASSERT_LE(pos + box_size, data.size());
+      if (memcmp(in + pos + 4, "jxlc", 4) == 0) {
+        EXPECT_TRUE(codestream.empty());
+        streampos->codestream_start = pos + 8;
+        codestream.insert(codestream.end(), in + pos + 8, in + pos + box_size);
+        codestream_end = true;
+      } else if (memcmp(in + pos + 4, "jxlp", 4) == 0) {
+        codestream_end = (LoadBE32(in + pos + 8) & 0x80000000);
+        if (codestream.empty()) {
+          streampos->codestream_start = pos + 12;
+        } else if (box_size > 12 || !codestream_end) {
+          breakpoints.push_back({codestream.size(), 12});
+        }
+        codestream.insert(codestream.end(), in + pos + 12, in + pos + box_size);
+      } else if (memcmp(in + pos + 4, "jbrd", 4) == 0) {
+        EXPECT_TRUE(codestream.empty());
+        streampos->jbrd_end = pos + box_size;
+      } else if (!codestream.empty() && !codestream_end) {
+        breakpoints.push_back({codestream.size(), box_size});
+      }
+      pos += box_size;
+    }
+    ASSERT_EQ(pos, data.size());
+  }
+  // Translate codestream positions to boxed stream positions.
+  size_t offset = streampos->codestream_start;
+  size_t bp = 0;
+  auto add_offset = [&](size_t pos) {
+    while (bp < breakpoints.size() && pos >= breakpoints[bp].first) {
+      offset += breakpoints[bp++].second;
+    }
+    return pos + offset;
+  };
+  // Analyze the unboxed codestream.
+  jxl::BitReader br(
+      jxl::Span<const uint8_t>(codestream.data(), codestream.size()));
+  ASSERT_EQ(br.ReadFixedBits<16>(), 0x0AFF);
+  jxl::CodecMetadata metadata;
+  ASSERT_TRUE(ReadSizeHeader(&br, &metadata.size));
+  ASSERT_TRUE(ReadImageMetadata(&br, &metadata.m));
+  streampos->basic_info =
+      add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte);
+  metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+  ASSERT_TRUE(jxl::Bundle::Read(&br, &metadata.transform_data));
+  if (metadata.m.color_encoding.WantICC()) {
+    jxl::PaddedBytes icc;
+    ASSERT_TRUE(jxl::ReadICC(&br, &icc));
+    ASSERT_TRUE(metadata.m.color_encoding.SetICCRaw(std::move(icc)));
+  }
+  ASSERT_TRUE(br.JumpToByteBoundary());
+  bool has_preview = metadata.m.have_preview;
+  while (br.TotalBitsConsumed() < br.TotalBytes() * jxl::kBitsPerByte) {
+    FramePositions p;
+    p.frame_start = add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte);
+    jxl::FrameHeader frame_header(&metadata);
+    if (has_preview) {
+      frame_header.nonserialized_is_preview = true;
+      has_preview = false;
+    }
+    ASSERT_TRUE(ReadFrameHeader(&br, &frame_header));
+    p.header_end =
+        add_offset(jxl::DivCeil(br.TotalBitsConsumed(), jxl::kBitsPerByte));
+    jxl::FrameDimensions frame_dim = frame_header.ToFrameDimensions();
+    uint64_t groups_total_size;
+    const size_t toc_entries = jxl::NumTocEntries(
+        frame_dim.num_groups, frame_dim.num_dc_groups,
+        frame_header.passes.num_passes, /*has_ac_global=*/true);
+    std::vector<uint64_t> section_offsets;
+    std::vector<uint32_t> section_sizes;
+    ASSERT_TRUE(ReadGroupOffsets(toc_entries, &br, &section_offsets,
+                                 &section_sizes, &groups_total_size));
+    EXPECT_EQ(br.TotalBitsConsumed() % jxl::kBitsPerByte, 0);
+    size_t sections_start = br.TotalBitsConsumed() / jxl::kBitsPerByte;
+    p.toc_end = add_offset(sections_start);
+    for (size_t i = 0; i < toc_entries; ++i) {
+      size_t end = sections_start + section_offsets[i] + section_sizes[i];
+      p.section_end.push_back(add_offset(end));
+    }
+    br.SkipBits(groups_total_size * jxl::kBitsPerByte);
+    streampos->frames.push_back(p);
+  }
+  streampos->codestream_end = add_offset(codestream.size());
+  EXPECT_EQ(br.TotalBitsConsumed(), br.TotalBytes() * jxl::kBitsPerByte);
+  EXPECT_TRUE(br.Close());
+}
+
+enum ExpectedFlushState { NO_FLUSH, SAME_FLUSH, NEW_FLUSH };
+struct Breakpoint {
+  size_t file_pos;
+  ExpectedFlushState expect_flush;
+};
+
+void VerifyProgression(size_t xsize, size_t ysize, uint32_t num_channels,
+                       const std::vector<uint8_t>& pixels,
+                       const jxl::PaddedBytes& data,
+                       std::vector<Breakpoint> breakpoints) {
+  // Size large enough for multiple groups, required to have progressive stages.
+  ASSERT_LT(256, xsize);
+  ASSERT_LT(256, ysize);
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  int bp = 0;
+  const uint8_t* next_in = data.data();
+  size_t avail_in = breakpoints[bp].file_pos;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  double prev_dist = 1.0;
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+    printf("bp: %d  status: 0x%x\n", bp, (int)status);
+    if (status == JXL_DEC_BASIC_INFO) {
+      JxlBasicInfo info;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+      EXPECT_EQ(info.xsize, xsize);
+      EXPECT_EQ(info.ysize, ysize);
+      // Output buffer/callback not yet set
+      EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+      size_t buffer_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+      EXPECT_EQ(pixels2.size(), buffer_size);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+                                            pixels2.size()));
+    } else if (status == JXL_DEC_FRAME) {
+      // Nothing to do.
+    } else if (status == JXL_DEC_SUCCESS) {
+      EXPECT_EQ(bp + 1, breakpoints.size());
+      break;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT ||
+               status == JXL_DEC_FULL_IMAGE) {
+      if (breakpoints[bp].expect_flush == NO_FLUSH) {
+        EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+      } else {
+        if (status != JXL_DEC_FULL_IMAGE) {
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+        }
+        double dist = jxl::test::DistanceRMS(pixels2.data(), pixels.data(),
+                                             xsize, ysize, format);
+        if (breakpoints[bp].expect_flush == NEW_FLUSH) {
+          EXPECT_LT(dist, prev_dist);
+          prev_dist = dist;
+        } else {
+          EXPECT_EQ(dist, prev_dist);
+        }
+      }
+      if (status == JXL_DEC_FULL_IMAGE) {
+        EXPECT_EQ(bp + 1, breakpoints.size());
+        continue;
+      }
+      ASSERT_LT(++bp, breakpoints.size());
+      next_in += avail_in - JxlDecoderReleaseInput(dec);
+      avail_in = breakpoints[bp].file_pos - (next_in - data.data());
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    } else {
+      printf("Unexpected status: 0x%x\n", (int)status);
+      FAIL();  // unexpected returned status
+    }
+  }
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, ProgressionTest) {
+  size_t xsize = 508, ysize = 470;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.cparams.progressive_dc = 1;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  StreamPositions streampos;
+  AnalyzeCodestream(data, &streampos);
+  const std::vector<FramePositions>& fp = streampos.frames;
+  // We have preview, dc frame and regular frame.
+  EXPECT_EQ(3, fp.size());
+  EXPECT_EQ(7, fp[2].section_end.size());
+  EXPECT_EQ(data.size(), fp[2].section_end[6]);
+  std::vector<Breakpoint> breakpoints{
+      {fp[0].frame_start, NO_FLUSH},           // headers
+      {fp[1].frame_start, NO_FLUSH},           // preview
+      {fp[2].frame_start, NO_FLUSH},           // dc frame
+      {fp[2].section_end[0], NO_FLUSH},        // DC global
+      {fp[2].section_end[1] - 1, NO_FLUSH},    // partial DC group
+      {fp[2].section_end[1], NEW_FLUSH},       // DC group
+      {fp[2].section_end[2], SAME_FLUSH},      // AC global
+      {fp[2].section_end[3], NEW_FLUSH},       // AC group 0
+      {fp[2].section_end[4] - 1, SAME_FLUSH},  // partial AC group 1
+      {fp[2].section_end[4], NEW_FLUSH},       // AC group 1
+      {fp[2].section_end[5], NEW_FLUSH},       // AC group 2
+      {data.size() - 1, SAME_FLUSH},           // partial AC group 3
+      {data.size(), NEW_FLUSH}};               // full image
+  VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints);
+}
+
+TEST(DecodeTest, ProgressionTestLosslessAlpha) {
+  size_t xsize = 508, ysize = 470;
+  uint32_t num_channels = 4;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  params.cparams.responsive = 1;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  StreamPositions streampos;
+  AnalyzeCodestream(data, &streampos);
+  const std::vector<FramePositions>& fp = streampos.frames;
+  // We have preview, dc frame and regular frame.
+  EXPECT_EQ(1, fp.size());
+  EXPECT_EQ(7, fp[0].section_end.size());
+  EXPECT_EQ(data.size(), fp[0].section_end[6]);
+  std::vector<Breakpoint> breakpoints{
+      {fp[0].frame_start, NO_FLUSH},           // headers
+      {fp[0].section_end[0] - 1, NO_FLUSH},    // partial DC global
+      {fp[0].section_end[0], NEW_FLUSH},       // DC global
+      {fp[0].section_end[1], SAME_FLUSH},      // DC group
+      {fp[0].section_end[2], SAME_FLUSH},      // AC global
+      {fp[0].section_end[3], NEW_FLUSH},       // AC group 0
+      {fp[0].section_end[4] - 1, SAME_FLUSH},  // partial AC group 1
+      {fp[0].section_end[4], NEW_FLUSH},       // AC group 1
+      {fp[0].section_end[5], NEW_FLUSH},       // AC group 2
+      {data.size() - 1, SAME_FLUSH},           // partial AC group 3
+      {data.size(), NEW_FLUSH}};               // full image
+  VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints);
+}
+
+void VerifyFilePosition(size_t expected_pos, const jxl::PaddedBytes& data,
+                        JxlDecoder* dec) {
+  size_t remaining = JxlDecoderReleaseInput(dec);
+  size_t pos = data.size() - remaining;
+  EXPECT_EQ(expected_pos, pos);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, data.data() + pos, remaining));
+}
+
+TEST(DecodeTest, InputHandlingTestOneShot) {
+  size_t xsize = 508, ysize = 470;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    printf("Testing with box format %d\n", i);
+    jxl::TestCodestreamParams params;
+    params.cparams.progressive_dc = 1;
+    params.preview_mode = jxl::kSmallPreview;
+    params.box_format = (CodeStreamBoxFormat)i;
+    jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        num_channels, params);
+    JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    StreamPositions streampos;
+    AnalyzeCodestream(data, &streampos);
+    const std::vector<FramePositions>& fp = streampos.frames;
+    // We have preview, dc frame and regular frame.
+    EXPECT_EQ(3, fp.size());
+
+    std::vector<uint8_t> pixels2;
+    pixels2.resize(pixels.size());
+
+    int kNumEvents = 6;
+    int events[] = {
+        JXL_DEC_BASIC_INFO, JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE,
+        JXL_DEC_FRAME,      JXL_DEC_FULL_IMAGE,     JXL_DEC_FRAME_PROGRESSION,
+    };
+    size_t end_positions[] = {
+        streampos.basic_info,     fp[0].frame_start,
+        fp[1].frame_start,        fp[2].toc_end,
+        streampos.codestream_end, streampos.codestream_end};
+    int events_wanted = 0;
+    for (int j = 0; j < kNumEvents; ++j) {
+      events_wanted |= events[j];
+      size_t end_pos = end_positions[j];
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetInput(dec, data.data(), data.size()));
+      EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+      VerifyFilePosition(streampos.basic_info, data, dec);
+      if (j >= 1) {
+        EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[0].frame_start, data, dec);
+      }
+      if (j >= 2) {
+        EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[0].toc_end, data, dec);
+        size_t buffer_size;
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+        EXPECT_GE(pixels2.size(), buffer_size);
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+                                                buffer_size));
+        EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[1].frame_start, data, dec);
+      }
+      if (j >= 3) {
+        EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[2].toc_end, data, dec);
+        if (j >= 5) {
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC));
+        }
+      }
+      if (j >= 4) {
+        EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[2].toc_end, data, dec);
+        size_t buffer_size;
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+        EXPECT_EQ(pixels2.size(), buffer_size);
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+                                              pixels2.size()));
+        if (j >= 5) {
+          EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+          VerifyFilePosition(fp[2].section_end[1], data, dec);
+        }
+        EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(streampos.codestream_end, data, dec);
+      }
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+      VerifyFilePosition(end_pos, data, dec);
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(InputHandlingTestJPEGOneshot)) {
+  TEST_LIBJPEG_SUPPORT();
+  size_t xsize = 123;
+  size_t ysize = 77;
+  size_t channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0);
+  for (int i = 1; i < kCSBF_NUM_ENTRIES; ++i) {
+    printf("Testing with box format %d\n", i);
+    jxl::PaddedBytes jpeg_codestream;
+    jxl::TestCodestreamParams params;
+    params.cparams.color_transform = jxl::ColorTransform::kNone;
+    params.jpeg_codestream = &jpeg_codestream;
+    params.preview_mode = jxl::kSmallPreview;
+    params.box_format = (CodeStreamBoxFormat)i;
+    jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        channels, params);
+    JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    StreamPositions streampos;
+    AnalyzeCodestream(data, &streampos);
+    const std::vector<FramePositions>& fp = streampos.frames;
+    // We have preview and regular frame.
+    EXPECT_EQ(2, fp.size());
+    EXPECT_LT(0, streampos.jbrd_end);
+
+    std::vector<uint8_t> pixels2;
+    pixels2.resize(pixels.size());
+
+    int kNumEvents = 6;
+    int events[] = {JXL_DEC_BASIC_INFO,     JXL_DEC_JPEG_RECONSTRUCTION,
+                    JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE,
+                    JXL_DEC_FRAME,          JXL_DEC_FULL_IMAGE};
+    size_t end_positions[] = {streampos.basic_info, streampos.basic_info,
+                              fp[0].frame_start,    fp[1].frame_start,
+                              fp[1].toc_end,        streampos.codestream_end};
+    int events_wanted = 0;
+    for (int j = 0; j < kNumEvents; ++j) {
+      printf("j = %d\n", j);
+      events_wanted |= events[j];
+      size_t end_pos = end_positions[j];
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetInput(dec, data.data(), data.size()));
+      if (j >= 1) {
+        EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(streampos.jbrd_end, data, dec);
+      }
+      EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+      VerifyFilePosition(streampos.basic_info, data, dec);
+      if (j >= 2) {
+        EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[0].frame_start, data, dec);
+      }
+      if (j >= 3) {
+        EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[0].toc_end, data, dec);
+        size_t buffer_size;
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+        EXPECT_GE(pixels2.size(), buffer_size);
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+                                                buffer_size));
+        EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[1].frame_start, data, dec);
+      }
+      if (j >= 4) {
+        EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[1].toc_end, data, dec);
+      }
+      if (j >= 5) {
+        EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(fp[1].toc_end, data, dec);
+        size_t buffer_size;
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+        EXPECT_EQ(pixels2.size(), buffer_size);
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+                                              pixels2.size()));
+        EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+        VerifyFilePosition(streampos.codestream_end, data, dec);
+      }
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+      VerifyFilePosition(end_pos, data, dec);
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+TEST(DecodeTest, InputHandlingTestStreaming) {
+  size_t xsize = 508, ysize = 470;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    printf("Testing with box format %d\n", i);
+    fflush(stdout);
+    jxl::TestCodestreamParams params;
+    params.cparams.progressive_dc = 1;
+    params.box_format = (CodeStreamBoxFormat)i;
+    params.preview_mode = jxl::kSmallPreview;
+    jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        num_channels, params);
+    JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    StreamPositions streampos;
+    AnalyzeCodestream(data, &streampos);
+    const std::vector<FramePositions>& fp = streampos.frames;
+    // We have preview, dc frame and regular frame.
+    EXPECT_EQ(3, fp.size());
+    std::vector<uint8_t> pixels2;
+    pixels2.resize(pixels.size());
+    int events_wanted =
+        (JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_PREVIEW_IMAGE |
+         JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION |
+         JXL_DEC_BOX);
+    for (size_t increment : {1, 7, 27, 1024}) {
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+      size_t file_pos = 0;
+      size_t box_index = 0;
+      size_t avail_in = 0;
+      for (;;) {
+        const uint8_t* next_in = data.data() + file_pos;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+        JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+        size_t remaining = JxlDecoderReleaseInput(dec);
+        size_t consumed = avail_in - remaining;
+        file_pos += consumed;
+        avail_in += increment;
+        avail_in = std::min<size_t>(avail_in, data.size() - file_pos);
+        if (status == JXL_DEC_BASIC_INFO) {
+          EXPECT_EQ(file_pos, streampos.basic_info);
+        } else if (status == JXL_DEC_COLOR_ENCODING) {
+          EXPECT_EQ(file_pos, streampos.frames[0].frame_start);
+        } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+          EXPECT_EQ(file_pos, streampos.frames[0].toc_end);
+          size_t buffer_size;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+          EXPECT_GE(pixels2.size(), buffer_size);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+                                                  buffer_size));
+        } else if (status == JXL_DEC_PREVIEW_IMAGE) {
+          EXPECT_EQ(file_pos, streampos.frames[1].frame_start);
+        } else if (status == JXL_DEC_FRAME) {
+          EXPECT_EQ(file_pos, streampos.frames[2].toc_end);
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC));
+        } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+          EXPECT_EQ(file_pos, streampos.frames[2].toc_end);
+          size_t buffer_size;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+          EXPECT_EQ(pixels2.size(), buffer_size);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+                                                pixels2.size()));
+        } else if (status == JXL_DEC_FRAME_PROGRESSION) {
+          EXPECT_EQ(file_pos, streampos.frames[2].section_end[1]);
+        } else if (status == JXL_DEC_FULL_IMAGE) {
+          EXPECT_EQ(file_pos, streampos.codestream_end);
+        } else if (status == JXL_DEC_SUCCESS) {
+          EXPECT_EQ(file_pos, streampos.codestream_end);
+          break;
+        } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+          EXPECT_LT(remaining, 12);
+          if ((i == kCSBF_None && file_pos >= 2) ||
+              (box_index > 0 && box_index < streampos.box_start.size() &&
+               file_pos >= streampos.box_start[box_index - 1] + 12 &&
+               file_pos < streampos.box_start[box_index])) {
+            EXPECT_EQ(remaining, 0);
+          }
+          if (file_pos == data.size()) break;
+        } else if (status == JXL_DEC_BOX) {
+          ASSERT_LT(box_index, streampos.box_start.size());
+          EXPECT_EQ(file_pos, streampos.box_start[box_index++]);
+        } else {
+          printf("Unexpected status: 0x%x\n", (int)status);
+          FAIL();
+        }
+      }
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+TEST(DecodeTest, FlushTest) {
+  // Size large enough for multiple groups, required to have progressive
+  // stages
+  size_t xsize = 333, ysize = 300;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work.
+  size_t first_part = data.size() - 1;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output buffer not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(pixels2.size(), buffer_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, pixels2.data(), pixels2.size()));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535).
+  // 29000 pixels can be above the threshold
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            29000u);
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  // Lower threshold for the final (still lossy) image
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            11000u);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, FlushTestImageOutCallback) {
+  // Size large enough for multiple groups, required to have progressive
+  // stages
+  size_t xsize = 333, ysize = 300;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  size_t bytes_per_pixel = format.num_channels * 2;
+  size_t stride = bytes_per_pixel * xsize;
+  auto callback = [&](size_t x, size_t y, size_t num_pixels,
+                      const void* pixels_row) {
+    memcpy(pixels2.data() + stride * y + bytes_per_pixel * x, pixels_row,
+           num_pixels * bytes_per_pixel);
+  };
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work.
+  size_t first_part = data.size() - 1;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output callback not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutCallback(
+                                 dec, &format,
+                                 [](void* opaque, size_t x, size_t y,
+                                    size_t xsize, const void* pixels_row) {
+                                   auto cb =
+                                       static_cast<decltype(&callback)>(opaque);
+                                   (*cb)(x, y, xsize, pixels_row);
+                                 },
+                                 /*opaque=*/&callback));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535).
+  // 29000 pixels can be above the threshold
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            29000u);
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  // Lower threshold for the final (still lossy) image
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            11000u);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, FlushTestLossyProgressiveAlpha) {
+  // Size large enough for multiple groups, required to have progressive
+  // stages
+  size_t xsize = 333, ysize = 300;
+  uint32_t num_channels = 4;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work.
+  size_t first_part = data.size() - 1;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output buffer not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(pixels2.size(), buffer_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, pixels2.data(), pixels2.size()));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            30000u);
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            11000u);
+
+  JxlDecoderDestroy(dec);
+}
+TEST(DecodeTest, FlushTestLossyProgressiveAlphaUpsampling) {
+  size_t xsize = 533, ysize = 401;
+  uint32_t num_channels = 4;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.cparams.resampling = 2;
+  params.cparams.ec_resampling = 4;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work.
+  size_t first_part = data.size() * 2 / 3;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output buffer not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(pixels2.size(), buffer_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, pixels2.data(), pixels2.size()));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            125000u);
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            70000u);
+
+  JxlDecoderDestroy(dec);
+}
+TEST(DecodeTest, FlushTestLosslessProgressiveAlpha) {
+  // Size large enough for multiple groups, required to have progressive
+  // stages
+  size_t xsize = 333, ysize = 300;
+  uint32_t num_channels = 4;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::TestCodestreamParams params;
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  params.cparams.responsive = 1;
+  params.cparams.modular_group_size_shift = 1;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, params);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work.
+  size_t first_part = data.size() / 2;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output buffer not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(pixels2.size(), buffer_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, pixels2.data(), pixels2.size()));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format, 2560.0),
+            2700u);
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+                                     ysize, format, format),
+            0u);
+
+  JxlDecoderDestroy(dec);
+}
+
+class DecodeProgressiveTest : public ::testing::TestWithParam<int> {};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeProgressiveTestInstantiation,
+                                   DecodeProgressiveTest,
+                                   ::testing::Range(0, 8));
+TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
+  const int params = GetParam();
+  int single_group = params & 1;
+  int lossless = (params >> 1) & 1;
+  uint32_t num_channels = 3 + ((params >> 2) & 1);
+  std::set<JxlProgressiveDetail> progressive_details = {kDC, kLastPasses,
+                                                        kPasses};
+  for (auto prog_detail : progressive_details) {
+    // Only few combinations are expected to support outputting
+    // intermediate flushes for complete DC and complete passes.
+    // The test can be updated if more cases are expected to support it.
+    bool expect_flush = (num_channels & 1) && !lossless;
+    size_t xsize, ysize;
+    if (single_group) {
+      // An image smaller than 256x256 ensures it contains only 1 group.
+      xsize = 99;
+      ysize = 100;
+    } else {
+      xsize = 277;
+      ysize = 280;
+    }
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+    JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    jxl::ColorEncoding color_encoding = jxl::ColorEncoding::SRGB(false);
+    jxl::CodecInOut io;
+    EXPECT_TRUE(jxl::ConvertFromExternal(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        color_encoding,
+        /*bits_per_sample=*/16, format,
+        /*pool=*/nullptr, &io.Main()));
+    jxl::TestCodestreamParams params;
+    if (lossless) {
+      params.cparams.SetLossless();
+    } else {
+      params.cparams.butteraugli_distance = 0.5f;
+    }
+    jxl::PassDefinition passes[] = {
+        {2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 1, 2}, {8, 0, 1}};
+    const int kNumPasses = 5;
+    jxl::ProgressiveMode progressive_mode{passes};
+    params.progressive_mode = &progressive_mode;
+    jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        num_channels, params);
+
+    for (size_t increment : {(size_t)1, data.size()}) {
+      printf(
+          "Testing with single_group=%d, lossless=%d, "
+          "num_channels=%d, prog_detail=%d, increment=%d\n",
+          single_group, lossless, (int)num_channels, (int)prog_detail,
+          (int)increment);
+      std::vector<std::vector<uint8_t>> passes(kNumPasses + 1);
+      for (int i = 0; i <= kNumPasses; ++i) {
+        passes[i].resize(pixels.size());
+      }
+
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(
+                    dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+                             JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION));
+      EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kFrames));
+      EXPECT_EQ(JXL_DEC_ERROR,
+                JxlDecoderSetProgressiveDetail(dec, kDCProgressive));
+      EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kDCGroups));
+      EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kGroups));
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetProgressiveDetail(dec, prog_detail));
+
+      uint8_t* next_in = data.data();
+      size_t avail_in = 0;
+      size_t pos = 0;
+
+      auto process_input = [&]() {
+        for (;;) {
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetInput(dec, next_in, avail_in));
+          JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+          size_t remaining = JxlDecoderReleaseInput(dec);
+          EXPECT_LE(remaining, avail_in);
+          next_in += avail_in - remaining;
+          avail_in = remaining;
+          if (status == JXL_DEC_NEED_MORE_INPUT && pos < data.size()) {
+            size_t chunk = std::min<size_t>(increment, data.size() - pos);
+            pos += chunk;
+            avail_in += chunk;
+            continue;
+          }
+          return status;
+        }
+      };
+
+      EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input());
+      JxlBasicInfo info;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+      EXPECT_EQ(info.xsize, xsize);
+      EXPECT_EQ(info.ysize, ysize);
+
+      EXPECT_EQ(JXL_DEC_FRAME, process_input());
+
+      size_t buffer_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+      EXPECT_EQ(pixels.size(), buffer_size);
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                     dec, &format, passes[kNumPasses].data(),
+                                     passes[kNumPasses].size()));
+
+      auto next_pass = [&](int pass) {
+        if (prog_detail <= kDC) return kNumPasses;
+        if (prog_detail <= kLastPasses) {
+          return std::min(pass + 2, kNumPasses);
+        }
+        return pass + 1;
+      };
+
+      if (expect_flush) {
+        // Return a particular downsampling ratio only after the last
+        // pass for that downsampling was processed.
+        int expected_downsampling_ratios[] = {8, 8, 4, 4, 2};
+        for (int p = 0; p < kNumPasses; p = next_pass(p)) {
+          EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, process_input());
+          EXPECT_EQ(expected_downsampling_ratios[p],
+                    JxlDecoderGetIntendedDownsamplingRatio(dec));
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+          passes[p] = passes[kNumPasses];
+        }
+      }
+
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input());
+      EXPECT_EQ(JXL_DEC_SUCCESS, process_input());
+
+      JxlDecoderDestroy(dec);
+
+      if (!expect_flush) {
+        continue;
+      }
+      jxl::ButteraugliParams ba;
+      std::vector<float> distances(kNumPasses + 1);
+      for (int p = 0;; p = next_pass(p)) {
+        jxl::CodecInOut io1;
+        EXPECT_TRUE(jxl::ConvertFromExternal(
+            jxl::Span<const uint8_t>(passes[p].data(), passes[p].size()), xsize,
+            ysize, color_encoding,
+            /*bits_per_sample=*/16, format,
+            /*pool=*/nullptr, &io1.Main()));
+        distances[p] = ButteraugliDistance(io.frames, io1.frames, ba,
+                                           jxl::GetJxlCms(), nullptr, nullptr);
+        if (p == kNumPasses) break;
+      }
+      const float kMaxDistance[kNumPasses + 1] = {30.0f, 20.0f, 10.0f,
+                                                  5.0f,  3.0f,  2.0f};
+      EXPECT_LT(distances[kNumPasses], kMaxDistance[kNumPasses]);
+      for (int p = 0; p < kNumPasses;) {
+        int next_p = next_pass(p);
+        EXPECT_LT(distances[p], kMaxDistance[p]);
+        // Verify that the returned pass image is actually not the
+        // same as the next pass image, by checking that it has a bit
+        // worse butteraugli score.
+        EXPECT_LT(distances[next_p] * 1.1f, distances[p]);
+        p = next_p;
+      }
+    }
+  }
+}
+
+void VerifyJPEGReconstruction(const jxl::PaddedBytes& container,
+                              const jxl::PaddedBytes& jpeg_bytes) {
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+  JxlDecoderSetInput(dec.get(), container.data(), container.size());
+  EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+  std::vector<uint8_t> reconstructed_buffer(128);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+                                    reconstructed_buffer.size()));
+  size_t used = 0;
+  JxlDecoderStatus process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+  while (process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+    used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+    reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+    EXPECT_EQ(
+        JXL_DEC_SUCCESS,
+        JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+                                reconstructed_buffer.size() - used));
+    process_result = JxlDecoderProcessInput(dec.get());
+  }
+  ASSERT_EQ(JXL_DEC_FULL_IMAGE, process_result);
+  used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+  ASSERT_EQ(used, jpeg_bytes.size());
+  EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), jpeg_bytes.data(), used));
+}
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructTestCodestream)) {
+  TEST_LIBJPEG_SUPPORT();
+  size_t xsize = 123;
+  size_t ysize = 77;
+  size_t channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0);
+  jxl::PaddedBytes jpeg_codestream;
+  jxl::TestCodestreamParams params;
+  params.cparams.color_transform = jxl::ColorTransform::kNone;
+  params.box_format = kCSBF_Single;
+  params.jpeg_codestream = &jpeg_codestream;
+  params.preview_mode = jxl::kSmallPreview;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      channels, params);
+  VerifyJPEGReconstruction(compressed, jpeg_codestream);
+}
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+  const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(orig), &orig_io));
+  orig_io.metadata.m.xyb_encoded = false;
+  jxl::BitWriter writer;
+  ASSERT_TRUE(WriteCodestreamHeaders(&orig_io.metadata, &writer, nullptr));
+  writer.ZeroPadToByte();
+  jxl::PassesEncoderState enc_state;
+  jxl::CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  ASSERT_TRUE(jxl::EncodeFrame(cparams, jxl::FrameInfo{}, &orig_io.metadata,
+                               orig_io.Main(), &enc_state, jxl::GetJxlCms(),
+                               /*pool=*/nullptr, &writer,
+                               /*aux_out=*/nullptr));
+
+  jxl::PaddedBytes jpeg_data;
+  ASSERT_TRUE(
+      EncodeJPEGData(*orig_io.Main().jpeg_data.get(), &jpeg_data, cparams));
+  jxl::PaddedBytes container;
+  container.append(jxl::kContainerHeader,
+                   jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+  jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                       &container);
+  container.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+  jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &container);
+  jxl::PaddedBytes codestream = std::move(writer).TakeBytes();
+  container.append(codestream.data(), codestream.data() + codestream.size());
+  VerifyJPEGReconstruction(container, orig);
+}
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionMetadataTest)) {
+  const std::string jpeg_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jpg";
+  const std::string jxl_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jxl";
+  const jxl::PaddedBytes jpeg = jxl::test::ReadTestData(jpeg_path);
+  const jxl::PaddedBytes jxl = jxl::test::ReadTestData(jxl_path);
+  VerifyJPEGReconstruction(jxl, jpeg);
+}
+
+TEST(DecodeTest, ContinueFinalNonEssentialBoxTest) {
+  size_t xsize = 80, ysize = 90;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  jxl::TestCodestreamParams params;
+  params.box_format = kCSBF_Multi_Other_Terminated;
+  params.add_icc_profile = true;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+  StreamPositions streampos;
+  AnalyzeCodestream(compressed, &streampos);
+
+  // The non-essential final box size including 8-byte header
+  size_t final_box_size = unk3_box_size + 8;
+  size_t last_box_begin = compressed.size() - final_box_size;
+  // Verify that the test is indeed setup correctly to be at the beginning of
+  // the 'unkn' box header.
+  ASSERT_EQ(compressed[last_box_begin + 3], final_box_size);
+  ASSERT_EQ(compressed[last_box_begin + 4], 'u');
+  ASSERT_EQ(compressed[last_box_begin + 5], 'n');
+  ASSERT_EQ(compressed[last_box_begin + 6], 'k');
+  ASSERT_EQ(compressed[last_box_begin + 7], '3');
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, compressed.data(), last_box_begin));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+  // The decoder returns success despite not having seen the final unknown box
+  // yet. This is because calling JxlDecoderCloseInput is not mandatory for
+  // backwards compatibility, so it doesn't know more bytes follow, the current
+  // bytes ended at a perfectly valid place.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  size_t remaining = JxlDecoderReleaseInput(dec);
+  // Since the test was set up to end exactly at the boundary of the final
+  // codestream box, and the decoder returned success, all bytes are expected to
+  // be consumed until the end of the  frame header.
+  EXPECT_EQ(remaining, last_box_begin - streampos.frames[0].toc_end);
+
+  // Now set the remaining non-codestream box as input.
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, compressed.data() + last_box_begin,
+                               compressed.size() - last_box_begin));
+  // Even though JxlDecoderProcessInput already returned JXL_DEC_SUCCESS before,
+  // when calling it again now after setting more input, success is expected, no
+  // event occurs but the box has been successfully skipped.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+namespace {
+bool BoxTypeEquals(const std::string& type_string, JxlBoxType type) {
+  return type_string.size() == 4 && type_string[0] == type[0] &&
+         type_string[1] == type[1] && type_string[2] == type[2] &&
+         type_string[3] == type[3];
+}
+}  // namespace
+
+TEST(DecodeTest, ExtentedBoxSizeTest) {
+  const std::string jxl_path = "jxl/boxes/square-extended-size-container.jxl";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jxl_path);
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+
+  JxlBoxType type;
+  uint64_t box_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, orig.data(), orig.size()));
+  EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+  EXPECT_TRUE(BoxTypeEquals("JXL ", type));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+  EXPECT_EQ(12, box_size);
+  EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+  EXPECT_TRUE(BoxTypeEquals("ftyp", type));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+  EXPECT_EQ(20, box_size);
+  EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+  EXPECT_TRUE(BoxTypeEquals("jxlc", type));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+  EXPECT_EQ(72, box_size);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(BoxTest)) {
+  size_t xsize = 1, ysize = 1;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  jxl::TestCodestreamParams params;
+  params.box_format = kCSBF_Multi_Other_Terminated;
+  params.add_icc_profile = true;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+
+  std::vector<std::string> expected_box_types = {
+      "JXL ", "ftyp", "jxlp", "unk1", "unk2", "jxlp", "jxlp", "jxlp", "unk3"};
+
+  // Value 0 means to not test the size: codestream is not required to be a
+  // particular exact size.
+  std::vector<size_t> expected_box_sizes = {12, 20, 0, 34, 18, 0, 0, 0, 20};
+
+  JxlBoxType type;
+  uint64_t box_size;
+  std::vector<uint8_t> contents(50);
+  size_t expected_release_size = 0;
+
+  // Cannot get these when decoding didn't start yet
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+
+  uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+  for (size_t i = 0; i < expected_box_types.size(); i++) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+    EXPECT_TRUE(BoxTypeEquals(expected_box_types[i], type));
+    if (expected_box_sizes[i]) {
+      EXPECT_EQ(expected_box_sizes[i], box_size);
+    }
+
+    if (expected_release_size > 0) {
+      EXPECT_EQ(expected_release_size, JxlDecoderReleaseBoxBuffer(dec));
+      expected_release_size = 0;
+    }
+
+    if (type[0] == 'u' && type[1] == 'n' && type[2] == 'k') {
+      JxlDecoderSetBoxBuffer(dec, contents.data(), contents.size());
+      size_t expected_box_contents_size =
+          type[3] == '1' ? unk1_box_size
+                         : (type[3] == '2' ? unk2_box_size : unk3_box_size);
+      expected_release_size = contents.size() - expected_box_contents_size;
+    }
+    size_t consumed = avail_in - JxlDecoderReleaseInput(dec);
+    next_in += consumed;
+    avail_in -= consumed;
+  }
+
+  // After the last DEC_BOX event, check that the input position is exactly at
+  // the stat of the box header.
+  EXPECT_EQ(avail_in, expected_box_sizes.back());
+
+  // Even though all input is given, the decoder cannot assume there aren't
+  // more boxes if the input was not closed.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+  JxlDecoderCloseInput(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(ExifBrobBoxTest)) {
+  size_t xsize = 1, ysize = 1;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  jxl::TestCodestreamParams params;
+  // Lossless to verify pixels exactly after roundtrip.
+  params.cparams.SetLossless();
+  params.box_format = kCSBF_Brob_Exif;
+  params.add_icc_profile = true;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  // Test raw brob box, not brotli-decompressing
+  for (int streaming = 0; streaming < 2; ++streaming) {
+    JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+    if (!streaming) {
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+      JxlDecoderCloseInput(dec);
+    }
+    // for streaming input case
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = 0;
+    size_t total_in = 0;
+    size_t step_size = 64;
+
+    std::vector<uint8_t> box_buffer;
+    size_t box_num_output;
+    bool seen_brob_begin = false;
+    bool seen_brob_end = false;
+
+    for (;;) {
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+      if (status == JXL_DEC_NEED_MORE_INPUT) {
+        if (streaming) {
+          size_t remaining = JxlDecoderReleaseInput(dec);
+          EXPECT_LE(remaining, avail_in);
+          next_in += avail_in - remaining;
+          avail_in = remaining;
+          size_t amount = step_size;
+          if (total_in + amount > compressed.size()) {
+            amount = compressed.size() - total_in;
+          }
+          avail_in += amount;
+          total_in += amount;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetInput(dec, next_in, avail_in));
+          if (total_in == compressed.size()) JxlDecoderCloseInput(dec);
+        } else {
+          FAIL();
+          break;
+        }
+      } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+        if (!box_buffer.empty()) {
+          EXPECT_EQ(false, seen_brob_end);
+          seen_brob_end = true;
+          size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+          box_num_output = box_buffer.size() - remaining;
+          EXPECT_EQ(box_num_output, box_brob_exif_size - 8);
+          EXPECT_EQ(
+              0, memcmp(box_buffer.data(), box_brob_exif + 8, box_num_output));
+          box_buffer.clear();
+        }
+        if (status == JXL_DEC_SUCCESS) break;
+        JxlBoxType type;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+        if (BoxTypeEquals("brob", type)) {
+          EXPECT_EQ(false, seen_brob_begin);
+          seen_brob_begin = true;
+          box_buffer.resize(8);
+          JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+        }
+      } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+        box_num_output = box_buffer.size() - remaining;
+        box_buffer.resize(box_buffer.size() * 2);
+        JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+                               box_buffer.size() - box_num_output);
+      } else {
+        // We do not expect any other events or errors
+        FAIL();
+        break;
+      }
+    }
+
+    EXPECT_EQ(true, seen_brob_begin);
+    EXPECT_EQ(true, seen_brob_end);
+
+    JxlDecoderDestroy(dec);
+  }
+
+  // Test decompressed brob box
+  for (int streaming = 0; streaming < 2; ++streaming) {
+    JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+    if (!streaming) {
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+      JxlDecoderCloseInput(dec);
+    }
+    // for streaming input case
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = 0;
+    size_t total_in = 0;
+    size_t step_size = 64;
+
+    std::vector<uint8_t> box_buffer;
+    size_t box_num_output;
+    bool seen_exif_begin = false;
+    bool seen_exif_end = false;
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetDecompressBoxes(dec, JXL_TRUE));
+
+    for (;;) {
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+      if (status == JXL_DEC_NEED_MORE_INPUT) {
+        if (streaming) {
+          size_t remaining = JxlDecoderReleaseInput(dec);
+          EXPECT_LE(remaining, avail_in);
+          next_in += avail_in - remaining;
+          avail_in = remaining;
+          size_t amount = step_size;
+          if (total_in + amount > compressed.size()) {
+            amount = compressed.size() - total_in;
+          }
+          avail_in += amount;
+          total_in += amount;
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetInput(dec, next_in, avail_in));
+          if (total_in == compressed.size()) JxlDecoderCloseInput(dec);
+        } else {
+          FAIL();
+          break;
+        }
+      } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+        if (!box_buffer.empty()) {
+          EXPECT_EQ(false, seen_exif_end);
+          seen_exif_end = true;
+          size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+          box_num_output = box_buffer.size() - remaining;
+          // Expect that the output has the same size and contents as the
+          // uncompressed exif data. Only check contents if the sizes match to
+          // avoid comparing uninitialized memory in the test.
+          EXPECT_EQ(box_num_output, exif_uncompressed_size);
+          if (box_num_output == exif_uncompressed_size) {
+            EXPECT_EQ(0, memcmp(box_buffer.data(), exif_uncompressed,
+                                exif_uncompressed_size));
+          }
+          box_buffer.clear();
+        }
+        if (status == JXL_DEC_SUCCESS) break;
+        JxlBoxType type;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_TRUE));
+        if (BoxTypeEquals("Exif", type)) {
+          EXPECT_EQ(false, seen_exif_begin);
+          seen_exif_begin = true;
+          box_buffer.resize(8);
+          JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+        }
+      } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+        box_num_output = box_buffer.size() - remaining;
+        box_buffer.resize(box_buffer.size() * 2);
+        JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+                               box_buffer.size() - box_num_output);
+      } else {
+        // We do not expect any other events or errors
+        FAIL();
+        break;
+      }
+    }
+
+    EXPECT_EQ(true, seen_exif_begin);
+    EXPECT_EQ(true, seen_exif_end);
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(PartialCodestreamBoxTest)) {
+  size_t xsize = 23, ysize = 81;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  // Lossless to verify pixels exactly after roundtrip.
+  jxl::TestCodestreamParams params;
+  params.cparams.SetLossless();
+  params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+  params.box_format = kCSBF_Multi;
+  params.add_icc_profile = true;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      params);
+
+  std::vector<uint8_t> extracted_codestream;
+
+  {
+    JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(
+                  dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+    JxlDecoderCloseInput(dec);
+
+    size_t num_jxlp = 0;
+
+    std::vector<uint8_t> pixels2;
+    pixels2.resize(pixels.size());
+
+    std::vector<uint8_t> box_buffer;
+    size_t box_num_output;
+
+    for (;;) {
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+      if (status == JXL_DEC_NEED_MORE_INPUT) {
+        FAIL();
+        break;
+      } else if (status == JXL_DEC_BASIC_INFO) {
+        JxlBasicInfo info;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+        EXPECT_EQ(info.xsize, xsize);
+        EXPECT_EQ(info.ysize, ysize);
+      } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(),
+                                              pixels2.size()));
+      } else if (status == JXL_DEC_FULL_IMAGE) {
+        continue;
+      } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+        if (!box_buffer.empty()) {
+          size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+          box_num_output = box_buffer.size() - remaining;
+          EXPECT_GE(box_num_output, 4);
+          // Do not insert the first 4 bytes, which are not part of the
+          // codestream, but the partial codestream box index
+          extracted_codestream.insert(extracted_codestream.end(),
+                                      box_buffer.begin() + 4,
+                                      box_buffer.begin() + box_num_output);
+          box_buffer.clear();
+        }
+        if (status == JXL_DEC_SUCCESS) break;
+        JxlBoxType type;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+        if (BoxTypeEquals("jxlp", type)) {
+          num_jxlp++;
+          box_buffer.resize(8);
+          JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+        }
+      } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+        box_num_output = box_buffer.size() - remaining;
+        box_buffer.resize(box_buffer.size() * 2);
+        JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+                               box_buffer.size() - box_num_output);
+      } else {
+        // We do not expect any other events or errors
+        FAIL();
+        break;
+      }
+    }
+
+    // The test file created with kCSBF_Multi is expected to have 4 jxlp boxes.
+    EXPECT_EQ(4, num_jxlp);
+
+    EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                           ysize, format_orig, format_orig));
+
+    JxlDecoderDestroy(dec);
+  }
+
+  // Now test whether the codestream extracted from the jxlp boxes can itself
+  // also be decoded and gives the same pixels
+  {
+    JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(
+                  dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetInput(dec, extracted_codestream.data(),
+                                 extracted_codestream.size()));
+    JxlDecoderCloseInput(dec);
+
+    size_t num_boxes = 0;
+
+    std::vector<uint8_t> pixels2;
+    pixels2.resize(pixels.size());
+
+    std::vector<uint8_t> box_buffer;
+    size_t box_num_output;
+
+    for (;;) {
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+      if (status == JXL_DEC_NEED_MORE_INPUT) {
+        FAIL();
+        break;
+      } else if (status == JXL_DEC_BASIC_INFO) {
+        JxlBasicInfo info;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+        EXPECT_EQ(info.xsize, xsize);
+        EXPECT_EQ(info.ysize, ysize);
+      } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(),
+                                              pixels2.size()));
+      } else if (status == JXL_DEC_FULL_IMAGE) {
+        continue;
+      } else if (status == JXL_DEC_BOX) {
+        num_boxes++;
+      } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+        size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+        box_num_output = box_buffer.size() - remaining;
+        box_buffer.resize(box_buffer.size() * 2);
+        JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+                               box_buffer.size() - box_num_output);
+      } else if (status == JXL_DEC_SUCCESS) {
+        break;
+      } else {
+        // We do not expect any other events or errors
+        FAIL();
+        break;
+      }
+    }
+
+    EXPECT_EQ(0, num_boxes);  // The data does not use the container format.
+    EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+                                           ysize, format_orig, format_orig));
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+TEST(DecodeTest, SpotColorTest) {
+  jxl::ThreadPool* pool = nullptr;
+  jxl::CodecInOut io;
+  size_t xsize = 55, ysize = 257;
+  io.metadata.m.color_encoding = jxl::ColorEncoding::LinearSRGB();
+  jxl::Image3F main(xsize, ysize);
+  jxl::ImageF spot(xsize, ysize);
+  jxl::ZeroFillImage(&main);
+  jxl::ZeroFillImage(&spot);
+
+  for (size_t y = 0; y < ysize; y++) {
+    float* JXL_RESTRICT rowm = main.PlaneRow(1, y);
+    float* JXL_RESTRICT rows = spot.Row(y);
+    for (size_t x = 0; x < xsize; x++) {
+      rowm[x] = (x + y) * (1.f / 255.f);
+      rows[x] = ((x ^ y) & 255) * (1.f / 255.f);
+    }
+  }
+  io.SetFromImage(std::move(main), jxl::ColorEncoding::LinearSRGB());
+  jxl::ExtraChannelInfo info;
+  info.bit_depth.bits_per_sample = 8;
+  info.dim_shift = 0;
+  info.type = jxl::ExtraChannel::kSpotColor;
+  info.spot_color[0] = 0.5f;
+  info.spot_color[1] = 0.2f;
+  info.spot_color[2] = 1.f;
+  info.spot_color[3] = 0.5f;
+
+  io.metadata.m.extra_channel_info.push_back(info);
+  std::vector<jxl::ImageF> ec;
+  ec.push_back(std::move(spot));
+  io.frames[0].SetExtraChannels(std::move(ec));
+
+  jxl::CompressParams cparams;
+  cparams.speed_tier = jxl::SpeedTier::kLightning;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.butteraugli_distance = 0.f;
+
+  jxl::PaddedBytes compressed;
+  std::unique_ptr<jxl::PassesEncoderState> enc_state =
+      jxl::make_unique<jxl::PassesEncoderState>();
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, enc_state.get(), &compressed,
+                              jxl::GetJxlCms(), nullptr, pool));
+
+  for (size_t render_spot = 0; render_spot < 2; render_spot++) {
+    JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(
+                  dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+    if (!render_spot) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetRenderSpotcolors(dec, JXL_FALSE));
+    }
+
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+    EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+    JxlBasicInfo binfo;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &binfo));
+    EXPECT_EQ(1u, binfo.num_extra_channels);
+    EXPECT_EQ(xsize, binfo.xsize);
+    EXPECT_EQ(ysize, binfo.ysize);
+
+    JxlExtraChannelInfo extra_info;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info));
+    EXPECT_EQ((unsigned int)jxl::ExtraChannel::kSpotColor, extra_info.type);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+    size_t buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+    size_t extra_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0));
+
+    std::vector<uint8_t> image(buffer_size);
+    std::vector<uint8_t> extra(extra_size);
+    size_t bytes_per_pixel = format.num_channels *
+                             jxl::test::GetDataBits(format.data_type) /
+                             jxl::kBitsPerByte;
+    size_t stride = bytes_per_pixel * binfo.xsize;
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, image.data(), image.size()));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetExtraChannelBuffer(dec, &format, extra.data(),
+                                              extra.size(), 0));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+    // After the full image was output, JxlDecoderProcessInput should return
+    // success to indicate all is done.
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+    JxlDecoderDestroy(dec);
+
+    for (size_t y = 0; y < ysize; y++) {
+      uint8_t* JXL_RESTRICT rowm = image.data() + stride * y;
+      uint8_t* JXL_RESTRICT rows = extra.data() + xsize * y;
+      for (size_t x = 0; x < xsize; x++) {
+        if (!render_spot) {
+          // if spot color isn't rendered, main image should be as we made it
+          // (red and blue are all zeroes)
+
+          EXPECT_EQ(rowm[x * 3 + 0], 0);
+          EXPECT_EQ(rowm[x * 3 + 1], (x + y > 255 ? 255 : x + y));
+          EXPECT_EQ(rowm[x * 3 + 2], 0);
+        }
+        if (render_spot) {
+          // if spot color is rendered, expect red and blue to look like the
+          // spot color channel
+          EXPECT_LT(abs(rowm[x * 3 + 0] - (rows[x] * 0.25f)), 1);
+          EXPECT_LT(abs(rowm[x * 3 + 2] - (rows[x] * 0.5f)), 1);
+        }
+        EXPECT_EQ(rows[x], ((x ^ y) & 255));
+      }
+    }
+  }
+}
+
+TEST(DecodeTest, CloseInput) {
+  std::vector<uint8_t> partial_file = {0xff};
+
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec.get(),
+                                      JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec.get(), partial_file.data(),
+                                                partial_file.size()));
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get()));
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get()));
+  JxlDecoderCloseInput(dec.get());
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderProcessInput(dec.get()));
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc
new file mode 100644
index 0000000000..40d8b1354d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc
@@ -0,0 +1,169 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/decode_to_jpeg.h"
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+JxlDecoderStatus JxlToJpegDecoder::Process(const uint8_t** next_in,
+                                           size_t* avail_in) {
+  if (!inside_box_) {
+    JXL_UNREACHABLE(
+        "processing of JPEG reconstruction data outside JPEG reconstruction "
+        "box");
+  }
+  Span<const uint8_t> to_decode;
+  if (box_until_eof_) {
+    // Until EOF means consume all data.
+    to_decode = Span<const uint8_t>(*next_in, *avail_in);
+    *next_in += *avail_in;
+    *avail_in = 0;
+  } else {
+    // Defined size means consume min(available, needed).
+    size_t avail_recon_in =
+        std::min<size_t>(*avail_in, box_size_ - buffer_.size());
+    to_decode = Span<const uint8_t>(*next_in, avail_recon_in);
+    *next_in += avail_recon_in;
+    *avail_in -= avail_recon_in;
+  }
+  bool old_data_exists = !buffer_.empty();
+  if (old_data_exists) {
+    // Append incoming data to buffer if we already had data in the buffer.
+    buffer_.insert(buffer_.end(), to_decode.data(),
+                   to_decode.data() + to_decode.size());
+    to_decode = Span<const uint8_t>(buffer_.data(), buffer_.size());
+  }
+  if (!box_until_eof_ && to_decode.size() > box_size_) {
+    JXL_UNREACHABLE("JPEG reconstruction data to decode larger than expected");
+  }
+  if (box_until_eof_ || to_decode.size() == box_size_) {
+    // If undefined size, or the right size, try to decode.
+    jpeg_data_ = make_unique<jpeg::JPEGData>();
+    const auto status = jpeg::DecodeJPEGData(to_decode, jpeg_data_.get());
+    if (status.IsFatalError()) return JXL_DEC_ERROR;
+    if (status) {
+      // Successful decoding, emit event after updating state to track that we
+      // are no longer parsing JPEG reconstruction data.
+      inside_box_ = false;
+      return JXL_DEC_JPEG_RECONSTRUCTION;
+    }
+    if (box_until_eof_) {
+      // Unsuccessful decoding and undefined size, assume incomplete data. Copy
+      // the data if we haven't already.
+      if (!old_data_exists) {
+        buffer_.insert(buffer_.end(), to_decode.data(),
+                       to_decode.data() + to_decode.size());
+      }
+    } else {
+      // Unsuccessful decoding of correct amount of data, assume error.
+      return JXL_DEC_ERROR;
+    }
+  } else {
+    // Not enough data, copy the data if we haven't already.
+    if (!old_data_exists) {
+      buffer_.insert(buffer_.end(), to_decode.data(),
+                     to_decode.data() + to_decode.size());
+    }
+  }
+  return JXL_DEC_NEED_MORE_INPUT;
+}
+
+size_t JxlToJpegDecoder::NumExifMarkers(const jpeg::JPEGData& jpeg_data) {
+  size_t num = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+    if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+      num++;
+    }
+  }
+  return num;
+}
+
+size_t JxlToJpegDecoder::NumXmpMarkers(const jpeg::JPEGData& jpeg_data) {
+  size_t num = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+    if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+      num++;
+    }
+  }
+  return num;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::ExifBoxContentSize(
+    const jpeg::JPEGData& jpeg_data, size_t* size) {
+  for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+    if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+      if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kExifTag)) {
+        // too small for app marker header
+        return JXL_DEC_ERROR;
+      }
+      // The first 4 bytes are the TIFF header from the box contents, and are
+      // not included in the JPEG
+      *size = jpeg_data.app_data[i].size() + 4 - 3 - sizeof(jpeg::kExifTag);
+      return JXL_DEC_SUCCESS;
+    }
+  }
+  return JXL_DEC_ERROR;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::XmlBoxContentSize(
+    const jpeg::JPEGData& jpeg_data, size_t* size) {
+  for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+    if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+      if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kXMPTag)) {
+        // too small for app marker header
+        return JXL_DEC_ERROR;
+      }
+      *size = jpeg_data.app_data[i].size() - 3 - sizeof(jpeg::kXMPTag);
+      return JXL_DEC_SUCCESS;
+    }
+  }
+  return JXL_DEC_ERROR;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::SetExif(const uint8_t* data, size_t size,
+                                           jpeg::JPEGData* jpeg_data) {
+  for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) {
+    if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+      if (jpeg_data->app_data[i].size() !=
+          size + 3 + sizeof(jpeg::kExifTag) - 4)
+        return JXL_DEC_ERROR;
+      // The first 9 bytes are used for JPEG marker header.
+      jpeg_data->app_data[i][0] = 0xE1;
+      // The second and third byte are already filled in correctly
+      memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kExifTag,
+             sizeof(jpeg::kExifTag));
+      // The first 4 bytes are the TIFF header from the box contents, and are
+      // not included in the JPEG
+      memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kExifTag),
+             data + 4, size - 4);
+      return JXL_DEC_SUCCESS;
+    }
+  }
+  return JXL_DEC_ERROR;
+}
+JxlDecoderStatus JxlToJpegDecoder::SetXmp(const uint8_t* data, size_t size,
+                                          jpeg::JPEGData* jpeg_data) {
+  for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) {
+    if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+      if (jpeg_data->app_data[i].size() != size + 3 + sizeof(jpeg::kXMPTag))
+        return JXL_DEC_ERROR;
+      // The first 9 bytes are used for JPEG marker header.
+      jpeg_data->app_data[i][0] = 0xE1;
+      // The second and third byte are already filled in correctly
+      memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kXMPTag,
+             sizeof(jpeg::kXMPTag));
+      memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kXMPTag), data,
+             size);
+      return JXL_DEC_SUCCESS;
+    }
+  }
+  return JXL_DEC_ERROR;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h
new file mode 100644
index 0000000000..a64ace27a2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h
@@ -0,0 +1,217 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DECODE_TO_JPEG_H_
+#define LIB_JXL_DECODE_TO_JPEG_H_
+
+// JPEG XL to JPEG bytes decoder logic. The JxlToJpegDecoder class keeps track
+// of the decoder state needed to parse the JPEG reconstruction box and provide
+// the reconstructed JPEG to the output buffer.
+
+#include <jxl/decode.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"  // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+class JxlToJpegDecoder {
+ public:
+  // Returns whether an output buffer is set.
+  bool IsOutputSet() const { return next_out_ != nullptr; }
+
+  // Returns whether the decoder is parsing a boxa JPEG box was parsed.
+  bool IsParsingBox() const { return inside_box_; }
+
+  // Sets the output buffer used when producing JPEG output.
+  JxlDecoderStatus SetOutputBuffer(uint8_t* data, size_t size) {
+    if (next_out_) return JXL_DEC_ERROR;
+    next_out_ = data;
+    avail_size_ = size;
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Releases the buffer set with SetOutputBuffer().
+  size_t ReleaseOutputBuffer() {
+    size_t result = avail_size_;
+    next_out_ = nullptr;
+    avail_size_ = 0;
+    return result;
+  }
+
+  void StartBox(bool box_until_eof, size_t contents_size) {
+    // A new box implies that we clear the buffer.
+    buffer_.clear();
+    inside_box_ = true;
+    if (box_until_eof) {
+      box_until_eof_ = true;
+    } else {
+      box_size_ = contents_size;
+    }
+  }
+
+  // Consumes data from next_in/avail_in to reconstruct JPEG data.
+  // Uses box_size_, inside_box_ and box_until_eof_ to calculate how much to
+  // consume. Potentially stores unparsed data in buffer_.
+  // Potentially populates jpeg_data_. Potentially updates inside_box_.
+  // Returns JXL_DEC_JPEG_RECONSTRUCTION when finished, JXL_DEC_NEED_MORE_INPUT
+  // if more input is needed, JXL_DEC_ERROR on parsing error.
+  JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in);
+
+  // Returns non-owned copy of the JPEGData, only after Process finished and
+  // the JPEGData was not yet moved to an image bundle with
+  // SetImageBundleJpegData.
+  jpeg::JPEGData* GetJpegData() { return jpeg_data_.get(); }
+
+  // Returns how many exif or xmp app markers are present in the JPEG data. A
+  // return value higher than 1 would require multiple exif boxes or multiple
+  // xmp boxes in the container format, and this is not supported by the API and
+  // considered an error. May only be called after Process returned success.
+  static size_t NumExifMarkers(const jpeg::JPEGData& jpeg_data);
+  static size_t NumXmpMarkers(const jpeg::JPEGData& jpeg_data);
+
+  // Returns box content size for metadata, using the known data from the app
+  // markers.
+  static JxlDecoderStatus ExifBoxContentSize(const jpeg::JPEGData& jpeg_data,
+                                             size_t* size);
+  static JxlDecoderStatus XmlBoxContentSize(const jpeg::JPEGData& jpeg_data,
+                                            size_t* size);
+
+  // Returns JXL_DEC_ERROR if there is no exif/XMP marker or the data size
+  // does not match, or this function is called before Process returned
+  // success, JXL_DEC_SUCCESS otherwise. As input, provide the full box contents
+  // but not the box header. In case of exif, this includes the 4-byte TIFF
+  // header, even though it won't be copied into the JPEG.
+  static JxlDecoderStatus SetExif(const uint8_t* data, size_t size,
+                                  jpeg::JPEGData* jpeg_data);
+  static JxlDecoderStatus SetXmp(const uint8_t* data, size_t size,
+                                 jpeg::JPEGData* jpeg_data);
+
+  // Sets the JpegData of the ImageBundle passed if there is anything to set.
+  // Releases the JpegData from this decoder if set.
+  Status SetImageBundleJpegData(ImageBundle* ib) {
+    if (IsOutputSet() && jpeg_data_ != nullptr) {
+      if (!jpeg::SetJPEGDataFromICC(ib->metadata()->color_encoding.ICC(),
+                                    jpeg_data_.get())) {
+        return false;
+      }
+      ib->jpeg_data.reset(jpeg_data_.release());
+    }
+    return true;
+  }
+
+  JxlDecoderStatus WriteOutput(const jpeg::JPEGData& jpeg_data) {
+    // Copy JPEG bytestream if desired.
+    uint8_t* tmp_next_out = next_out_;
+    size_t tmp_avail_size = avail_size_;
+    auto write = [&tmp_next_out, &tmp_avail_size](const uint8_t* buf,
+                                                  size_t len) {
+      size_t to_write = std::min<size_t>(tmp_avail_size, len);
+      if (to_write != 0) memcpy(tmp_next_out, buf, to_write);
+      tmp_next_out += to_write;
+      tmp_avail_size -= to_write;
+      return to_write;
+    };
+    Status write_result = jpeg::WriteJpeg(jpeg_data, write);
+    if (!write_result) {
+      if (tmp_avail_size == 0) {
+        return JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+      }
+      return JXL_DEC_ERROR;
+    }
+    next_out_ = tmp_next_out;
+    avail_size_ = tmp_avail_size;
+    return JXL_DEC_SUCCESS;
+  }
+
+ private:
+  // Content of the most recently parsed JPEG reconstruction box if any.
+  std::vector<uint8_t> buffer_;
+
+  // Decoded content of the most recently parsed JPEG reconstruction box is
+  // stored here.
+  std::unique_ptr<jpeg::JPEGData> jpeg_data_;
+
+  // True if the decoder is currently reading bytes inside a JPEG reconstruction
+  // box.
+  bool inside_box_ = false;
+
+  // True if the JPEG reconstruction box had undefined size (all remaining
+  // bytes).
+  bool box_until_eof_ = false;
+  // Size of most recently parsed JPEG reconstruction box contents.
+  size_t box_size_ = 0;
+
+  // Next bytes to write JPEG reconstruction to.
+  uint8_t* next_out_ = nullptr;
+  // Available bytes to write JPEG reconstruction to.
+  size_t avail_size_ = 0;
+};
+
+#else
+
+// Fake class that disables support for decoding JPEG XL to JPEG.
+class JxlToJpegDecoder {
+ public:
+  bool IsOutputSet() const { return false; }
+  bool IsParsingBox() const { return false; }
+
+  JxlDecoderStatus SetOutputBuffer(uint8_t* /* data */, size_t /* size */) {
+    return JXL_DEC_ERROR;
+  }
+  size_t ReleaseOutputBuffer() { return 0; }
+
+  void StartBox(bool /* box_until_eof */, size_t /* contents_size */) {}
+
+  JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in) {
+    return JXL_DEC_ERROR;
+  }
+  jpeg::JPEGData* GetJpegData() { return nullptr; }
+
+  Status SetImageBundleJpegData(ImageBundle* /* ib */) { return true; }
+
+  static size_t NumExifMarkers(const jpeg::JPEGData& /*jpeg_data*/) {
+    return 0;
+  }
+  static size_t NumXmpMarkers(const jpeg::JPEGData& /*jpeg_data*/) { return 0; }
+  static size_t ExifBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/,
+                                   size_t* /*size*/) {
+    return JXL_DEC_ERROR;
+  }
+  static size_t XmlBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/,
+                                  size_t* /*size*/) {
+    return JXL_DEC_ERROR;
+  }
+  static JxlDecoderStatus SetExif(const uint8_t* /*data*/, size_t /*size*/,
+                                  jpeg::JPEGData* /*jpeg_data*/) {
+    return JXL_DEC_ERROR;
+  }
+  static JxlDecoderStatus SetXmp(const uint8_t* /*data*/, size_t /*size*/,
+                                 jpeg::JPEGData* /*jpeg_data*/) {
+    return JXL_DEC_ERROR;
+  }
+
+  JxlDecoderStatus WriteOutput(const jpeg::JPEGData& /* jpeg_data */) {
+    return JXL_DEC_SUCCESS;
+  }
+};
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DECODE_TO_JPEG_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc
new file mode 100644
index 0000000000..44e6f049eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc
@@ -0,0 +1,1200 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ac_strategy.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_debug_image.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fast_math-inl.h"
+
+// Some of the floating point constants in this file and in other
+// files in the libjxl project have been obtained using the
+// tools/optimizer/simplex_fork.py tool. It is a variation of
+// Nelder-Mead optimization, and we generally try to minimize
+// BPP * pnorm aggregate as reported by the benchmark_xl tool,
+// but occasionally the values are optimized by using additional
+// constraints such as maintaining a certain density, or ratio of
+// popularity of integral transforms. Jyrki visually reviews all
+// such changes and often makes manual changes to maintain good
+// visual quality to changes where butteraugli was not sufficiently
+// sensitive to some kind of degradation. Unfortunately image quality
+// is still more of an art than science.
+
+// Set JXL_DEBUG_AC_STRATEGY to 1 to enable debugging.
+#ifndef JXL_DEBUG_AC_STRATEGY
+#define JXL_DEBUG_AC_STRATEGY 0
+#endif
+
+// This must come before the begin/end_target, but HWY_ONCE is only true
+// after that, so use an "include guard".
+#ifndef LIB_JXL_ENC_AC_STRATEGY_
+#define LIB_JXL_ENC_AC_STRATEGY_
+// Parameters of the heuristic are marked with a OPTIMIZE comment.
+namespace jxl {
+namespace {
+
+// Debugging utilities.
+
+// Returns a linear sRGB color (as bytes) for each AC strategy.
+const uint8_t* TypeColor(const uint8_t& raw_strategy) {
+  JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+  static_assert(AcStrategy::kNumValidStrategies == 27, "Change colors");
+  static constexpr uint8_t kColors[][3] = {
+      {0xFF, 0xFF, 0x00},  // DCT8
+      {0xFF, 0x80, 0x80},  // HORNUSS
+      {0xFF, 0x80, 0x80},  // DCT2x2
+      {0xFF, 0x80, 0x80},  // DCT4x4
+      {0x80, 0xFF, 0x00},  // DCT16x16
+      {0x00, 0xC0, 0x00},  // DCT32x32
+      {0xC0, 0xFF, 0x00},  // DCT16x8
+      {0xC0, 0xFF, 0x00},  // DCT8x16
+      {0x00, 0xFF, 0x00},  // DCT32x8
+      {0x00, 0xFF, 0x00},  // DCT8x32
+      {0x00, 0xFF, 0x00},  // DCT32x16
+      {0x00, 0xFF, 0x00},  // DCT16x32
+      {0xFF, 0x80, 0x00},  // DCT4x8
+      {0xFF, 0x80, 0x00},  // DCT8x4
+      {0xFF, 0xFF, 0x80},  // AFV0
+      {0xFF, 0xFF, 0x80},  // AFV1
+      {0xFF, 0xFF, 0x80},  // AFV2
+      {0xFF, 0xFF, 0x80},  // AFV3
+      {0x00, 0xC0, 0xFF},  // DCT64x64
+      {0x00, 0xFF, 0xFF},  // DCT64x32
+      {0x00, 0xFF, 0xFF},  // DCT32x64
+      {0x00, 0x40, 0xFF},  // DCT128x128
+      {0x00, 0x80, 0xFF},  // DCT128x64
+      {0x00, 0x80, 0xFF},  // DCT64x128
+      {0x00, 0x00, 0xC0},  // DCT256x256
+      {0x00, 0x00, 0xFF},  // DCT256x128
+      {0x00, 0x00, 0xFF},  // DCT128x256
+  };
+  return kColors[raw_strategy];
+}
+
+const uint8_t* TypeMask(const uint8_t& raw_strategy) {
+  JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+  static_assert(AcStrategy::kNumValidStrategies == 27, "Add masks");
+  // implicitly, first row and column is made dark
+  static constexpr uint8_t kMask[][64] = {
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // DCT8
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 1, 1, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // HORNUSS
+      {
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+      },                           // 2x2
+      {
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+      },                           // 4x4
+      {},                          // DCT16x16 (unused)
+      {},                          // DCT32x32 (unused)
+      {},                          // DCT16x8 (unused)
+      {},                          // DCT8x16 (unused)
+      {},                          // DCT32x8 (unused)
+      {},                          // DCT8x32 (unused)
+      {},                          // DCT32x16 (unused)
+      {},                          // DCT16x32 (unused)
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // DCT4x8
+      {
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+      },                           // DCT8x4
+      {
+          1, 1, 1, 1, 1, 0, 0, 0,  //
+          1, 1, 1, 1, 0, 0, 0, 0,  //
+          1, 1, 1, 0, 0, 0, 0, 0,  //
+          1, 1, 0, 0, 0, 0, 0, 0,  //
+          1, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // AFV0
+      {
+          0, 0, 0, 0, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // AFV1
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 1, 0, 0, 0, 0,  //
+      },                           // AFV2
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 1,  //
+          0, 0, 0, 0, 0, 0, 1, 1,  //
+          0, 0, 0, 0, 0, 1, 1, 1,  //
+      },                           // AFV3
+  };
+  return kMask[raw_strategy];
+}
+
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize,
+                    size_t ysize, const char* tag, AuxOut* aux_out,
+                    const CompressParams& cparams) {
+  Image3F color_acs(xsize, ysize);
+  for (size_t y = 0; y < ysize; y++) {
+    float* JXL_RESTRICT rows[3] = {
+        color_acs.PlaneRow(0, y),
+        color_acs.PlaneRow(1, y),
+        color_acs.PlaneRow(2, y),
+    };
+    const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim);
+    for (size_t x = 0; x < xsize; x++) {
+      AcStrategy acs = acs_row[x / kBlockDim];
+      const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+      for (size_t c = 0; c < 3; c++) {
+        rows[c][x] = color[c] / 255.f;
+      }
+    }
+  }
+  size_t stride = color_acs.PixelsPerRow();
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) {
+      float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim);
+      const AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) {
+        AcStrategy acs = acs_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+        const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy());
+        if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) {
+          for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize;
+               iy++) {
+            for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize;
+                 ix++) {
+              if (mask[iy * kBlockDim + ix]) {
+                row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f;
+              }
+            }
+          }
+        }
+        // draw block edges
+        for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() &&
+                            bx * kBlockDim + ix < xsize;
+             ix++) {
+          row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f;
+        }
+        for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() &&
+                            by * kBlockDim + iy < ysize;
+             iy++) {
+          row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f;
+        }
+      }
+    }
+  }
+  DumpImage(cparams, tag, color_acs);
+}
+
+}  // namespace
+}  // namespace jxl
+#endif  // LIB_JXL_ENC_AC_STRATEGY_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Round;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+bool MultiBlockTransformCrossesHorizontalBoundary(
+    const AcStrategyImage& ac_strategy, size_t start_x, size_t y,
+    size_t end_x) {
+  if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) {
+    return false;
+  }
+  if (y % 8 == 0) {
+    // Nothing crosses 64x64 boundaries, and the memory on the other side
+    // of the 64x64 block may still uninitialized.
+    return false;
+  }
+  end_x = std::min(end_x, ac_strategy.xsize());
+  // The first multiblock might be before the start_x, let's adjust it
+  // to point to the first IsFirstBlock() == true block we find by backward
+  // tracing.
+  AcStrategyRow row = ac_strategy.ConstRow(y);
+  const size_t start_x_limit = start_x & ~7;
+  while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) {
+    --start_x;
+  }
+  for (size_t x = start_x; x < end_x;) {
+    if (row[x].IsFirstBlock()) {
+      x += row[x].covered_blocks_x();
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool MultiBlockTransformCrossesVerticalBoundary(
+    const AcStrategyImage& ac_strategy, size_t x, size_t start_y,
+    size_t end_y) {
+  if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) {
+    return false;
+  }
+  if (x % 8 == 0) {
+    // Nothing crosses 64x64 boundaries, and the memory on the other side
+    // of the 64x64 block may still uninitialized.
+    return false;
+  }
+  end_y = std::min(end_y, ac_strategy.ysize());
+  // The first multiblock might be before the start_y, let's adjust it
+  // to point to the first IsFirstBlock() == true block we find by backward
+  // tracing.
+  const size_t start_y_limit = start_y & ~7;
+  while (start_y != start_y_limit &&
+         !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) {
+    --start_y;
+  }
+
+  for (size_t y = start_y; y < end_y;) {
+    AcStrategyRow row = ac_strategy.ConstRow(y);
+    if (row[x].IsFirstBlock()) {
+      y += row[x].covered_blocks_y();
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+static const float kChromaErrorWeight[AcStrategy::kNumValidStrategies] = {
+    0.95f,  // DCT = 0,
+    1.0f,   // IDENTITY = 1,
+    0.5f,   // DCT2X2 = 2,
+    1.0f,   // DCT4X4 = 3,
+    2.0f,   // DCT16X16 = 4,
+    2.0f,   // DCT32X32 = 5,
+    1.4f,   // DCT16X8 = 6,
+    1.4f,   // DCT8X16 = 7,
+    2.0f,   // DCT32X8 = 8,
+    2.0f,   // DCT8X32 = 9,
+    2.0f,   // DCT32X16 = 10,
+    2.0f,   // DCT16X32 = 11,
+    2.0f,   // DCT4X8 = 12,
+    2.0f,   // DCT8X4 = 13,
+    1.7f,   // AFV0 = 14,
+    1.7f,   // AFV1 = 15,
+    1.7f,   // AFV2 = 16,
+    1.7f,   // AFV3 = 17,
+    2.0f,   // DCT64X64 = 18,
+    2.0f,   // DCT64X32 = 19,
+    2.0f,   // DCT32X64 = 20,
+    2.0f,   // DCT128X128 = 21,
+    2.0f,   // DCT128X64 = 22,
+    2.0f,   // DCT64X128 = 23,
+    2.0f,   // DCT256X256 = 24,
+    2.0f,   // DCT256X128 = 25,
+    2.0f,   // DCT128X256 = 26,
+};
+
+// For DCT the maximum error is roughly a sum of the values.
+// For some transforms, especially IDENTITY and DCT2X2, not all
+// the coefficients affect the maximum error. Probably would
+// be better to do transforms back and forth and look at the pixels
+// but that would significantly slow down the computation.
+static const float kMixLossTable[AcStrategy::kNumValidStrategies] = {
+    1.0f,   // DCT = 0,
+    0.45f,  // IDENTITY = 1,
+    0.45f,  // DCT2X2 = 2,
+    0.7f,   // DCT4X4 = 3,
+    1.0f,   // DCT16X16 = 4,
+    1.0f,   // DCT32X32 = 5,
+    1.0f,   // DCT16X8 = 6,
+    1.0f,   // DCT8X16 = 7,
+    1.0f,   // DCT32X8 = 8,
+    1.0f,   // DCT8X32 = 9,
+    1.0f,   // DCT32X16 = 10,
+    1.0f,   // DCT16X32 = 11,
+    0.96f,  // DCT4X8 = 12,
+    0.96f,  // DCT8X4 = 13,
+    0.94f,  // AFV0 = 14,
+    0.94f,  // AFV1 = 15,
+    0.94f,  // AFV2 = 16,
+    0.94f,  // AFV3 = 17,
+    1.0f,   // DCT64X64 = 18,
+    1.0f,   // DCT64X32 = 19,
+    1.0f,   // DCT32X64 = 20,
+    1.0f,   // DCT128X128 = 21,
+    1.0f,   // DCT128X64 = 22,
+    1.0f,   // DCT64X128 = 23,
+    1.0f,   // DCT256X256 = 24,
+    1.0f,   // DCT256X128 = 25,
+    1.0f,   // DCT128X256 = 26,
+};
+
+float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y,
+                      const ACSConfig& config,
+                      const float* JXL_RESTRICT cmap_factors, float* block,
+                      float* scratch_space, uint32_t* quantized) {
+  const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize;
+
+  // Apply transform.
+  for (size_t c = 0; c < 3; c++) {
+    float* JXL_RESTRICT block_c = block + size * c;
+    TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y),
+                        config.src_stride, block_c, scratch_space);
+  }
+  HWY_FULL(float) df;
+
+  const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y();
+  // avoid large blocks when there is a lot going on in red-green.
+  float cmul[3] = {kChromaErrorWeight[acs.RawStrategy()], 1.0f, 1.0f};
+  float quant_norm8 = 0;
+  float masking = 0;
+  if (num_blocks == 1) {
+    // When it is only one 8x8, we don't need aggregation of values.
+    quant_norm8 = config.Quant(x / 8, y / 8);
+    masking = config.Masking(x / 8, y / 8);
+    // Make DCT2X2 more favored when area is exposed.
+    float kExposedMasking = 0.118f;
+    if (acs.RawStrategy() == 2 && masking >= kExposedMasking) {
+      masking = kExposedMasking + 0.56 * (masking - kExposedMasking);
+    }
+  } else if (num_blocks == 2) {
+    // Taking max instead of 8th norm seems to work
+    // better for smallest blocks up to 16x8. Jyrki couldn't get
+    // improvements in trying the same for 16x16 blocks.
+    if (acs.covered_blocks_y() == 2) {
+      quant_norm8 =
+          std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1));
+      masking = std::max(config.Masking(x / 8, y / 8),
+                         config.Masking(x / 8, y / 8 + 1));
+    } else {
+      quant_norm8 =
+          std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8));
+      masking = std::max(config.Masking(x / 8, y / 8),
+                         config.Masking(x / 8 + 1, y / 8));
+    }
+  } else {
+    float masking_norm2 = 0;
+    float masking_max = 0;
+    // Load QF value, calculate empirical heuristic on masking field
+    // for weighting the information loss. Information loss manifests
+    // itself as ringing, and masking could hide it.
+    for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+      for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+        float qval = config.Quant(x / 8 + ix, y / 8 + iy);
+        qval *= qval;
+        qval *= qval;
+        quant_norm8 += qval * qval;
+        float maskval = config.Masking(x / 8 + ix, y / 8 + iy);
+        masking_max = std::max<float>(masking_max, maskval);
+        masking_norm2 += maskval * maskval;
+      }
+    }
+    quant_norm8 /= num_blocks;
+    quant_norm8 = FastPowf(quant_norm8, 1.0f / 8.0f);
+    masking_norm2 = sqrt(masking_norm2 / num_blocks);
+    // This is a highly empirical formula.
+    masking = 0.5 * (masking_norm2 + masking_max);
+  }
+  const auto q = Set(df, quant_norm8);
+
+  // Compute entropy.
+  float entropy = 0.0f;
+  auto info_loss = Zero(df);
+  auto info_loss2 = Zero(df);
+
+  for (size_t c = 0; c < 3; c++) {
+    const float* inv_matrix = config.dequant->InvMatrix(acs.RawStrategy(), c);
+    const auto cmap_factor = Set(df, cmap_factors[c]);
+
+    auto entropy_v = Zero(df);
+    auto nzeros_v = Zero(df);
+    for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) {
+      const auto in = Load(df, block + c * size + i);
+      const auto in_y = Mul(Load(df, block + size + i), cmap_factor);
+      const auto im = Load(df, inv_matrix + i);
+      const auto val = Mul(Sub(in, in_y), Mul(im, q));
+      const auto rval = Round(val);
+      const auto diff = AbsDiff(val, rval);
+      info_loss = Add(info_loss, diff);
+      info_loss2 = MulAdd(diff, diff, info_loss2);
+      const auto q = Abs(rval);
+      const auto q_is_zero = Eq(q, Zero(df));
+      // We used to have q * C here, but that cost model seems to
+      // be punishing large values more than necessary. Sqrt tries
+      // to avoid large values less aggressively.
+      entropy_v = Add(Sqrt(q), entropy_v);
+      nzeros_v = Add(nzeros_v, IfThenZeroElse(q_is_zero, Set(df, 1.0f)));
+    }
+    entropy += config.cost_delta * cmul[c] * GetLane(SumOfLanes(df, entropy_v));
+    size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v));
+    // Add #bit of num_nonzeros, as an estimate of the cost for encoding the
+    // number of non-zeros of the block.
+    size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1;
+    // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a
+    // bias.
+    entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits);
+  }
+  const float kMixLoss = kMixLossTable[acs.RawStrategy()];
+  const float loss1 = GetLane(SumOfLanes(df, info_loss));
+  const float loss2 =
+      sqrt(GetLane(SumOfLanes(df, info_loss2)) * (num_blocks * 64));
+  const float loss = kMixLoss * (config.info_loss_multiplier * loss1) +
+                     (1.0 - kMixLoss) * (config.info_loss_multiplier2 * loss2);
+  const float kRegulateSurface = 11.5f;
+  float large_surface_error_mul =
+      (kRegulateSurface + sqrt(num_blocks)) * (1.0f / (kRegulateSurface + 1));
+  return entropy + large_surface_error_mul * masking * loss;
+}
+
+uint8_t FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier,
+                             const ACSConfig& config,
+                             const float* JXL_RESTRICT cmap_factors,
+                             AcStrategyImage* JXL_RESTRICT ac_strategy,
+                             float* block, float* scratch_space,
+                             uint32_t* quantized, float* entropy_out) {
+  struct TransformTry8x8 {
+    AcStrategy::Type type;
+    int encoding_speed_tier_max_limit;
+    float entropy_add;
+    float entropy_mul;
+  };
+  static const TransformTry8x8 kTransforms8x8[] = {
+      {
+          AcStrategy::Type::DCT,
+          9,
+          3.0f,
+          0.785f,
+      },
+      {
+          AcStrategy::Type::DCT4X4,
+          5,
+          4.0f,
+          0.7f,
+      },
+      {
+          AcStrategy::Type::DCT2X2,
+          5,
+          0.0f,
+          0.685f,
+      },
+      {
+          AcStrategy::Type::DCT4X8,
+          4,
+          3.0f,
+          0.745f,
+      },
+      {
+          AcStrategy::Type::DCT8X4,
+          4,
+          3.0f,
+          0.745f,
+      },
+      {
+          AcStrategy::Type::IDENTITY,
+          5,
+          8.0f,
+          0.81217614513585534f,
+      },
+      {
+          AcStrategy::Type::AFV0,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV1,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV2,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV3,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+  };
+  double best = 1e30;
+  uint8_t best_tx = kTransforms8x8[0].type;
+  for (auto tx : kTransforms8x8) {
+    if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) {
+      continue;
+    }
+    AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+    float entropy = EstimateEntropy(acs, x, y, config, cmap_factors, block,
+                                    scratch_space, quantized);
+    entropy = tx.entropy_add + tx.entropy_mul * entropy;
+    if (entropy < best) {
+      best_tx = tx.type;
+      best = entropy;
+    }
+  }
+  *entropy_out = best;
+  return best_tx;
+}
+
+// bx, by addresses the 64x64 block at 8x8 subresolution
+// cx, cy addresses the left, upper 8x8 block position of the candidate
+// transform.
+void TryMergeAcs(AcStrategy::Type acs_raw, size_t bx, size_t by, size_t cx,
+                 size_t cy, const ACSConfig& config,
+                 const float* JXL_RESTRICT cmap_factors,
+                 AcStrategyImage* JXL_RESTRICT ac_strategy,
+                 const float entropy_mul, const uint8_t candidate_priority,
+                 uint8_t* priority, float* JXL_RESTRICT entropy_estimate,
+                 float* block, float* scratch_space, uint32_t* quantized) {
+  AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+  float entropy_current = 0;
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) {
+      if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) {
+        // Transform would reuse already allocated blocks and
+        // lead to invalid overlaps, for example DCT64X32 vs.
+        // DCT32X64.
+        return;
+      }
+      entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)];
+    }
+  }
+  float entropy_candidate =
+      entropy_mul * EstimateEntropy(acs, (bx + cx) * 8, (by + cy) * 8, config,
+                                    cmap_factors, block, scratch_space,
+                                    quantized);
+  if (entropy_candidate >= entropy_current) return;
+  // Accept the candidate.
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+      entropy_estimate[(cy + iy) * 8 + cx + ix] = 0;
+      priority[(cy + iy) * 8 + cx + ix] = candidate_priority;
+    }
+  }
+  ac_strategy->Set(bx + cx, by + cy, acs_raw);
+  entropy_estimate[cy * 8 + cx] = entropy_candidate;
+}
+
+static void SetEntropyForTransform(size_t cx, size_t cy,
+                                   const AcStrategy::Type acs_raw,
+                                   float entropy,
+                                   float* JXL_RESTRICT entropy_estimate) {
+  const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+  for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) {
+    for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) {
+      entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0;
+    }
+  }
+  entropy_estimate[cy * 8 + cx] = entropy;
+}
+
+AcStrategy::Type AcsSquare(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT16X16;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT32X32;
+  } else {
+    return AcStrategy::Type::DCT64X64;
+  }
+}
+
+AcStrategy::Type AcsVerticalSplit(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT16X8;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT32X16;
+  } else {
+    return AcStrategy::Type::DCT64X32;
+  }
+}
+
+AcStrategy::Type AcsHorizontalSplit(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT8X16;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT16X32;
+  } else {
+    return AcStrategy::Type::DCT32X64;
+  }
+}
+
+// The following function tries to merge smaller transforms into
+// squares and the rectangles originating from a single middle division
+// (horizontal or vertical) fairly.
+//
+// This is now generalized to concern about squares
+// of blocks X blocks size, where a block is 8x8 pixels.
+void FindBestFirstLevelDivisionForSquare(
+    size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx,
+    size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors,
+    AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK,
+    const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate,
+    float* block, float* scratch_space, uint32_t* quantized) {
+  // We denote J for the larger dimension here, and K for the smaller.
+  // For example, for 32x32 block splitting, J would be 32, K 16.
+  const size_t blocks_half = blocks / 2;
+  const AcStrategy::Type acs_rawJXK = AcsVerticalSplit(blocks);
+  const AcStrategy::Type acs_rawKXJ = AcsHorizontalSplit(blocks);
+  const AcStrategy::Type acs_rawJXJ = AcsSquare(blocks);
+  const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK);
+  const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ);
+  const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ);
+  AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0);
+  AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half);
+  // Let's check if we can consider a JXJ block here at all.
+  // This is not necessary in the basic use of hierarchically merging
+  // blocks in the simplest possible way, but is needed when we try other
+  // 'floating' options of merging, possibly after a simple hierarchical
+  // merge has been explored.
+  if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx,
+                                                   by + cy, bx + cx + blocks) ||
+      MultiBlockTransformCrossesHorizontalBoundary(
+          *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) ||
+      MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy,
+                                                 by + cy + blocks) ||
+      MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks,
+                                                 by + cy, by + cy + blocks)) {
+    return;  // not suitable for JxJ analysis, some transforms leak out.
+  }
+  // For floating transforms there may be
+  // already blocks selected that make either or both JXK and
+  // KXJ not feasible for this location.
+  const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary(
+      *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks);
+  const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary(
+      *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks);
+  // Current entropies aggregated on NxN resolution.
+  float entropy[2][2] = {};
+  for (size_t dy = 0; dy < blocks; ++dy) {
+    for (size_t dx = 0; dx < blocks; ++dx) {
+      entropy[dy / blocks_half][dx / blocks_half] +=
+          entropy_estimate[(cy + dy) * 8 + (cx + dx)];
+    }
+  }
+  float entropy_JXK_left = std::numeric_limits<float>::max();
+  float entropy_JXK_right = std::numeric_limits<float>::max();
+  float entropy_KXJ_top = std::numeric_limits<float>::max();
+  float entropy_KXJ_bottom = std::numeric_limits<float>::max();
+  float entropy_JXJ = std::numeric_limits<float>::max();
+  if (allow_JXK) {
+    if (row0[bx + cx + 0].RawStrategy() != acs_rawJXK) {
+      entropy_JXK_left =
+          entropy_mul_JXK *
+          EstimateEntropy(acsJXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                          cmap_factors, block, scratch_space, quantized);
+    }
+    if (row0[bx + cx + blocks_half].RawStrategy() != acs_rawJXK) {
+      entropy_JXK_right =
+          entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + blocks_half) * 8,
+                                            (by + cy + 0) * 8, config,
+                                            cmap_factors, block, scratch_space,
+                                            quantized);
+    }
+  }
+  if (allow_KXJ) {
+    if (row0[bx + cx].RawStrategy() != acs_rawKXJ) {
+      entropy_KXJ_top =
+          entropy_mul_JXK *
+          EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                          cmap_factors, block, scratch_space, quantized);
+    }
+    if (row1[bx + cx].RawStrategy() != acs_rawKXJ) {
+      entropy_KXJ_bottom =
+          entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8,
+                                            (by + cy + blocks_half) * 8, config,
+                                            cmap_factors, block, scratch_space,
+                                            quantized);
+    }
+  }
+  if (allow_square_transform) {
+    // We control the exploration of the square transform separately so that
+    // we can turn it off at high decoding speeds for 32x32, but still allow
+    // exploring 16x32 and 32x16.
+    entropy_JXJ = entropy_mul_JXJ * EstimateEntropy(acsJXJ, (bx + cx + 0) * 8,
+                                                    (by + cy + 0) * 8, config,
+                                                    cmap_factors, block,
+                                                    scratch_space, quantized);
+  }
+
+  // Test if this block should have JXK or KXJ transforms,
+  // because it can have only one or the other.
+  float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) +
+                  std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]);
+  float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) +
+                  std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]);
+  if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) {
+    ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ);
+    SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate);
+  } else if (costJxN < costNxJ) {
+    if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) {
+      ac_strategy->Set(bx + cx, by + cy, acs_rawJXK);
+      SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left,
+                             entropy_estimate);
+    }
+    if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) {
+      ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK);
+      SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK,
+                             entropy_JXK_right, entropy_estimate);
+    }
+  } else {
+    if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) {
+      ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ);
+      SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top,
+                             entropy_estimate);
+    }
+    if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) {
+      ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ);
+      SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ,
+                             entropy_KXJ_bottom, entropy_estimate);
+    }
+  }
+}
+
+void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
+                    const ACSConfig& config, const Rect& rect) {
+  // Main philosophy here:
+  // 1. First find best 8x8 transform for each area.
+  // 2. Merging them into larger transforms where possibly, but
+  // starting from the smallest transforms (16x8 and 8x16).
+  // Additional complication: 16x8 and 8x16 are considered
+  // simultanouesly and fairly against each other.
+  // We are looking at 64x64 squares since the YtoX and YtoB
+  // maps happen to be at that resolution, and having
+  // integral transforms cross these boundaries leads to
+  // additional complications.
+  const CompressParams& cparams = enc_state->cparams;
+  const float butteraugli_target = cparams.butteraugli_distance;
+  AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
+  // TODO(veluca): reuse allocations
+  auto mem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+  auto qmem = hwy::AllocateAligned<uint32_t>(AcStrategy::kMaxCoeffArea);
+  uint32_t* JXL_RESTRICT quantized = qmem.get();
+  float* JXL_RESTRICT block = mem.get();
+  float* JXL_RESTRICT scratch_space = mem.get() + 3 * AcStrategy::kMaxCoeffArea;
+  size_t bx = rect.x0();
+  size_t by = rect.y0();
+  JXL_ASSERT(rect.xsize() <= 8);
+  JXL_ASSERT(rect.ysize() <= 8);
+  size_t tx = bx / kColorTileDimInBlocks;
+  size_t ty = by / kColorTileDimInBlocks;
+  const float cmap_factors[3] = {
+      enc_state->shared.cmap.YtoXRatio(
+          enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]),
+      0.0f,
+      enc_state->shared.cmap.YtoBRatio(
+          enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
+  };
+  if (cparams.speed_tier > SpeedTier::kHare) return;
+  // First compute the best 8x8 transform for each square. Later, we do not
+  // experiment with different combinations, but only use the best of the 8x8s
+  // when DCT8X8 is specified in the tree search.
+  // 8x8 transforms have 10 variants, but every larger transform is just a DCT.
+  float entropy_estimate[64] = {};
+  // Favor all 8x8 transforms (against 16x8 and larger transforms)) at
+  // low butteraugli_target distances.
+  static const float k8x8mul1 = -0.55;
+  static const float k8x8mul2 = 1.0;
+  static const float k8x8base = 1.4;
+  const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+  for (size_t iy = 0; iy < rect.ysize(); iy++) {
+    for (size_t ix = 0; ix < rect.xsize(); ix++) {
+      float entropy = 0.0;
+      const uint8_t best_of_8x8s = FindBest8x8Transform(
+          8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier),
+          config, cmap_factors, ac_strategy, block, scratch_space, quantized,
+          &entropy);
+      ac_strategy->Set(bx + ix, by + iy,
+                       static_cast<AcStrategy::Type>(best_of_8x8s));
+      entropy_estimate[iy * 8 + ix] = entropy * mul8x8;
+    }
+  }
+  // Merge when a larger transform is better than the previously
+  // searched best combination of 8x8 transforms.
+  struct MergeTry {
+    AcStrategy::Type type;
+    uint8_t priority;
+    uint8_t decoding_speed_tier_max_limit;
+    uint8_t encoding_speed_tier_max_limit;
+    float entropy_mul;
+  };
+  static const float k8X16mul1 = -0.55;
+  static const float k8X16mul2 = 0.885;
+  static const float k8X16base = 1.6;
+  const float entropy_mul16X8 =
+      k8X16mul2 + k8X16mul1 / (butteraugli_target + k8X16base);
+  //  const float entropy_mul16X8 = mul8X16 * 0.91195782912371126f;
+
+  static const float k16X16mul1 = -0.35;
+  static const float k16X16mul2 = 0.808;
+  static const float k16X16base = 2.0;
+  const float entropy_mul16X16 =
+      k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+  //  const float entropy_mul16X16 = mul16X16 * 0.83183417727960129f;
+
+  static const float k32X16mul1 = -0.1;
+  static const float k32X16mul2 = 0.854;
+  static const float k32X16base = 2.5;
+  const float entropy_mul16X32 =
+      k32X16mul2 + k32X16mul1 / (butteraugli_target + k32X16base);
+
+  const float entropy_mul32X32 = 0.93;
+  const float entropy_mul64X64 = 1.52f;
+  // TODO(jyrki): Consider this feedback in further changes:
+  // Also effectively when the multipliers for smaller blocks are
+  // below 1, this raises the bar for the bigger blocks even higher
+  // in that sense these constants are not independent (e.g. changing
+  // the constant for DCT16x32 by -5% (making it more likely) also
+  // means that DCT32x32 becomes harder to do when starting from
+  // two DCT16x32s). It might be better to make them more independent,
+  // e.g. by not applying the multiplier when storing the new entropy
+  // estimates in TryMergeToACSCandidate().
+  const MergeTry kTransformsForMerge[9] = {
+      {AcStrategy::Type::DCT16X8, 2, 4, 5, entropy_mul16X8},
+      {AcStrategy::Type::DCT8X16, 2, 4, 5, entropy_mul16X8},
+      // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its
+      // subdivisions. {AcStrategy::Type::DCT16X16, 3, entropy_mul16X16},
+      {AcStrategy::Type::DCT16X32, 4, 4, 4, entropy_mul16X32},
+      {AcStrategy::Type::DCT32X16, 4, 4, 4, entropy_mul16X32},
+      // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its
+      // subdivisions. {AcStrategy::Type::DCT32X32, 5, 1, 5,
+      // 0.9822994906548809f},
+      {AcStrategy::Type::DCT64X32, 6, 1, 3, 1.29f},
+      {AcStrategy::Type::DCT32X64, 6, 1, 3, 1.29f},
+      // {AcStrategy::Type::DCT64X64, 8, 1, 3, 2.0846542128012948f},
+  };
+  /*
+  These sizes not yet included in merge heuristic:
+  set(AcStrategy::Type::DCT32X8, 0.0f, 2.261390410971102f);
+  set(AcStrategy::Type::DCT8X32, 0.0f, 2.261390410971102f);
+  set(AcStrategy::Type::DCT128X128, 0.0f, 1.0f);
+  set(AcStrategy::Type::DCT128X64, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT64X128, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT256X256, 0.0f, 1.0f);
+  set(AcStrategy::Type::DCT256X128, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT128X256, 0.0f, 0.73f);
+  */
+
+  // Priority is a tricky kludge to avoid collisions so that transforms
+  // don't overlap.
+  uint8_t priority[64] = {};
+  bool enable_32x32 = cparams.decoding_speed_tier < 4;
+  for (auto tx : kTransformsForMerge) {
+    if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) {
+      continue;
+    }
+    AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+
+    for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize();
+         cy += acs.covered_blocks_y()) {
+      for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize();
+           cx += acs.covered_blocks_x()) {
+        if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) {
+          if (cparams.decoding_speed_tier < 4 &&
+              tx.type == AcStrategy::Type::DCT32X64) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            if ((cy | cx) % 8 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                  tx.entropy_mul, entropy_mul64X64, entropy_estimate, block,
+                  scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT32X16) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+            (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+          // already covered by FindBest32X32
+          continue;
+        }
+
+        if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) {
+          if (tx.type == AcStrategy::Type::DCT16X32) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            if ((cy | cx) % 4 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  4, enable_32x32, bx, by, cx, cy, config, cmap_factors,
+                  ac_strategy, tx.entropy_mul, entropy_mul32X32,
+                  entropy_estimate, block, scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT32X16) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+            (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+          // already covered by FindBest32X32
+          continue;
+        }
+        if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) {
+          if (tx.type == AcStrategy::Type::DCT8X16) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            if ((cy | cx) % 2 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                  tx.entropy_mul, entropy_mul16X16, entropy_estimate, block,
+                  scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT16X8) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT8X16 && cy % 2 == 1) ||
+            (tx.type == AcStrategy::Type::DCT16X8 && cx % 2 == 1)) {
+          // already covered by FindBestFirstLevelDivisionForSquare
+          continue;
+        }
+        // All other merge sizes are handled here.
+        // Some of the DCT16X8s and DCT8X16s will still leak through here
+        // when there is an odd number of 8x8 blocks, then the last row
+        // and column will get their DCT16X8s and DCT8X16s through the
+        // normal integral transform merging process.
+        TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                    tx.entropy_mul, tx.priority, &priority[0], entropy_estimate,
+                    block, scratch_space, quantized);
+      }
+    }
+  }
+  if (cparams.speed_tier >= SpeedTier::kHare) {
+    return;
+  }
+  // Here we still try to do some non-aligned matching, find a few more
+  // 16X8, 8X16 and 16X16s between the non-2-aligned blocks.
+  for (size_t cy = 0; cy + 1 < rect.ysize(); ++cy) {
+    for (size_t cx = 0; cx + 1 < rect.xsize(); ++cx) {
+      if ((cy | cx) % 2 != 0) {
+        FindBestFirstLevelDivisionForSquare(
+            2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+            entropy_mul16X8, entropy_mul16X16, entropy_estimate, block,
+            scratch_space, quantized);
+      }
+    }
+  }
+  // Non-aligned matching for 32X32, 16X32 and 32X16.
+  size_t step = cparams.speed_tier >= SpeedTier::kTortoise ? 2 : 1;
+  for (size_t cy = 0; cy + 3 < rect.ysize(); cy += step) {
+    for (size_t cx = 0; cx + 3 < rect.xsize(); cx += step) {
+      if ((cy | cx) % 4 == 0) {
+        continue;  // Already tried with loop above (DCT16X32 case).
+      }
+      FindBestFirstLevelDivisionForSquare(
+          4, enable_32x32, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+          entropy_mul16X32, entropy_mul32X32, entropy_estimate, block,
+          scratch_space, quantized);
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessRectACS);
+
+void AcStrategyHeuristics::Init(const Image3F& src,
+                                PassesEncoderState* enc_state) {
+  this->enc_state = enc_state;
+  config.dequant = &enc_state->shared.matrices;
+  const CompressParams& cparams = enc_state->cparams;
+
+  if (cparams.speed_tier >= SpeedTier::kCheetah) {
+    JXL_CHECK(enc_state->shared.matrices.EnsureComputed(1));  // DCT8 only
+  } else {
+    uint32_t acs_mask = 0;
+    // All transforms up to 64x64.
+    for (size_t i = 0; i < AcStrategy::DCT128X128; i++) {
+      acs_mask |= (1 << i);
+    }
+    JXL_CHECK(enc_state->shared.matrices.EnsureComputed(acs_mask));
+  }
+
+  // Image row pointers and strides.
+  config.quant_field_row = enc_state->initial_quant_field.Row(0);
+  config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
+  auto& mask = enc_state->initial_quant_masking;
+  if (mask.xsize() > 0 && mask.ysize() > 0) {
+    config.masking_field_row = mask.Row(0);
+    config.masking_field_stride = mask.PixelsPerRow();
+  }
+
+  config.src_rows[0] = src.ConstPlaneRow(0, 0);
+  config.src_rows[1] = src.ConstPlaneRow(1, 0);
+  config.src_rows[2] = src.ConstPlaneRow(2, 0);
+  config.src_stride = src.PixelsPerRow();
+
+  // Entropy estimate is composed of two factors:
+  //  - estimate of the number of bits that will be used by the block
+  //  - information loss due to quantization
+  // The following constant controls the relative weights of these components.
+  config.info_loss_multiplier = 58.67516723857484f;
+  config.info_loss_multiplier2 = 43.0f;
+  config.zeros_mul = 2.55f;
+  config.cost_delta = 4.9425062806007478f;
+  JXL_ASSERT(enc_state->shared.ac_strategy.xsize() ==
+             enc_state->shared.frame_dim.xsize_blocks);
+  JXL_ASSERT(enc_state->shared.ac_strategy.ysize() ==
+             enc_state->shared.frame_dim.ysize_blocks);
+}
+
+void AcStrategyHeuristics::ProcessRect(const Rect& rect) {
+  const CompressParams& cparams = enc_state->cparams;
+  // In Falcon mode, use DCT8 everywhere and uniform quantization.
+  if (cparams.speed_tier >= SpeedTier::kCheetah) {
+    enc_state->shared.ac_strategy.FillDCT8(rect);
+    return;
+  }
+  HWY_DYNAMIC_DISPATCH(ProcessRectACS)
+  (enc_state, config, rect);
+}
+
+void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
+  const auto& ac_strategy = enc_state->shared.ac_strategy;
+  // Accounting and debug output.
+  if (aux_out != nullptr) {
+    aux_out->num_small_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::IDENTITY) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT2X2) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT4X4);
+    aux_out->num_dct4x8_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT4X8) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X4);
+    aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategy::Type::AFV0) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV1) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV2) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV3);
+    aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT);
+    aux_out->num_dct8x16_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X16) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X8);
+    aux_out->num_dct8x32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X32) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X8);
+    aux_out->num_dct16_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X16);
+    aux_out->num_dct16x32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X32) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X16);
+    aux_out->num_dct32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X32);
+    aux_out->num_dct32x64_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X64) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT64X32);
+    aux_out->num_dct64_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64);
+  }
+
+  // if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(aux_out)) {
+  if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(enc_state->cparams)) {
+    DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize,
+                   enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out,
+                   enc_state->cparams);
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h
new file mode 100644
index 0000000000..c53a79bb04
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h
@@ -0,0 +1,65 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AC_STRATEGY_H_
+#define LIB_JXL_ENC_AC_STRATEGY_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be
+// used in each block, as well as the initial quantization field.
+
+namespace jxl {
+
+struct AuxOut;
+
+// AC strategy selection: utility struct.
+
+struct ACSConfig {
+  const DequantMatrices* JXL_RESTRICT dequant;
+  float info_loss_multiplier;
+  float info_loss_multiplier2;
+  float* JXL_RESTRICT quant_field_row;
+  size_t quant_field_stride;
+  float* JXL_RESTRICT masking_field_row;
+  size_t masking_field_stride;
+  const float* JXL_RESTRICT src_rows[3];
+  size_t src_stride;
+  float cost_delta;
+  float zeros_mul;
+  const float& Pixel(size_t c, size_t x, size_t y) const {
+    return src_rows[c][y * src_stride + x];
+  }
+  float Masking(size_t bx, size_t by) const {
+    JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
+    return masking_field_row[by * masking_field_stride + bx];
+  }
+  float Quant(size_t bx, size_t by) const {
+    JXL_DASSERT(quant_field_row[by * quant_field_stride + bx] > 0);
+    return quant_field_row[by * quant_field_stride + bx];
+  }
+};
+
+struct AcStrategyHeuristics {
+  void Init(const Image3F& src, PassesEncoderState* enc_state);
+  void ProcessRect(const Rect& rect);
+  void Finalize(AuxOut* aux_out);
+  ACSConfig config;
+  PassesEncoderState* enc_state;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_AC_STRATEGY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc
new file mode 100644
index 0000000000..fbd3f953c9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc
@@ -0,0 +1,1170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_debug_image.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+// Set JXL_DEBUG_ADAPTIVE_QUANTIZATION to 1 to enable debugging.
+#ifndef JXL_DEBUG_ADAPTIVE_QUANTIZATION
+#define JXL_DEBUG_ADAPTIVE_QUANTIZATION 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+// Hack for mask estimation. Eventually replace this code with butteraugli's
+// masking.
+float ComputeMaskForAcStrategyUse(const float out_val) {
+  const float kMul = 1.0f;
+  const float kOffset = 0.001f;
+  return kMul / (out_val + kOffset);
+}
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+  const auto kBase = Set(d, -0.76471879237038032f);
+  const auto kMul4 = Set(d, 4.4585596705216615f);
+  const auto kMul2 = Set(d, 17.282053892620215f);
+  const auto kOffset2 = Set(d, 302.36961315317848f);
+  const auto kMul3 = Set(d, 7.0561261998705858f);
+  const auto kOffset3 = Set(d, 2.3179635626140773f);
+  const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3);
+  const auto kMul0 = Set(d, 0.80061762862741759f);
+  const auto k1 = Set(d, 1.0f);
+
+  // Avoid division by zero.
+  const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f));
+  const auto v2 = Div(k1, Add(v1, kOffset2));
+  const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3));
+  const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4));
+  // TODO(jyrki):
+  // A log or two here could make sense. In butteraugli we have effectively
+  // log(log(x + C)) for this kind of use, as a single log is used in
+  // saturating visual masking and here the modulation values are exponential,
+  // another log would counter that.
+  return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3))));
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.77216153508914f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.7825991679894591f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+  // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+  // is related to the number of photons.
+  //
+  // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+  // This ratio allows quantization to move from jxl's opsin space to
+  // butteraugli's log-gamma space.
+  float kEpsilon = 1e-2;
+  v = ZeroIfNegative(v);
+  const auto kNumMul = Set(d, kSGRetMul * 3 * kSGmul);
+  const auto kVOffset = Set(d, kSGVOffset * kLog2 + kEpsilon);
+  const auto kDenMul = Set(d, kLog2 * kSGmul);
+
+  const auto v2 = Mul(v, v);
+
+  const auto num = MulAdd(kNumMul, v2, Set(d, kEpsilon));
+  const auto den = MulAdd(Mul(kDenMul, v), v2, kVOffset);
+  return invert ? Div(num, den) : Div(den, num);
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+  using DScalar = HWY_CAPPED(float, 1);
+  auto vscalar = Load(DScalar(), &v);
+  return GetLane(
+      RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+  // A simple HDR compatible gamma function.
+  const auto mul = Set(d, kSGmul);
+  const auto kRetMul = Set(d, kSGRetMul);
+  const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+  const auto kVOffset = Set(d, kSGVOffset);
+
+  v *= mul;
+
+  // This should happen rarely, but may lead to a NaN, which is rather
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+  v = ZeroIfNegative(v);
+  return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+                  const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
+  const float kBias = 0.16f;
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[0]);
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[1]);
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[2]);
+  auto overall_ratio = Zero(d);
+  auto bias = Set(d, kBias);
+  auto half = Set(d, 0.5f);
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+    const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+      const auto iny = Add(Load(d, row_in_y + x + dx), bias);
+      const auto inx = Load(d, row_in_x + x + dx);
+      const auto r = Sub(iny, inx);
+      const auto g = Add(iny, inx);
+      const auto ratio_r =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, r);
+      const auto ratio_g =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, g);
+      const auto avg_ratio = Mul(half, Add(ratio_r, ratio_g));
+
+      overall_ratio = Add(overall_ratio, avg_ratio);
+    }
+  }
+  overall_ratio = Mul(SumOfLanes(d, overall_ratio), Set(d, 1.0f / 64));
+  // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+  // less than that.
+  // ln(2) constant folded in because we want std::log but have FastLog2f.
+  const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+  return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+template <class D, class V>
+V ColorModulation(const D d, const size_t x, const size_t y,
+                  const ImageF& xyb_x, const ImageF& xyb_y, const ImageF& xyb_b,
+                  const double butteraugli_target, V out_val) {
+  static const float kStrengthMul = 4.2456542701250122f;
+  static const float kRedRampStart = 0.18748564245760829f;
+  static const float kRedRampLength = 0.16701783842516479f;
+  static const float kBlueRampLength = 0.16117602661852037f;
+  static const float kBlueRampStart = 0.47897504338287333f;
+  const float strength = kStrengthMul * (1.0f - 0.15f * butteraugli_target);
+  if (strength < 0) {
+    return out_val;
+  }
+  // x values are smaller than y and b values, need to take the difference into
+  // account.
+  const float red_strength = strength * 6.0f;
+  const float blue_strength = strength;
+  {
+    // Reduce some bits from areas not blue or red.
+    const float offset = strength * -0.007;  // 9174542291185913f;
+    out_val = Add(out_val, Set(d, offset));
+  }
+  // Calculate how much of the 8x8 block is covered with blue or red.
+  auto blue_coverage = Zero(d);
+  auto red_coverage = Zero(d);
+  auto bias_y = Set(d, 0.2f);
+  auto bias_y_add = Set(d, 0.1f);
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+    const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+    const float* const JXL_RESTRICT row_in_b = xyb_b.Row(y + dy);
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+      const auto pixel_y = Load(d, row_in_y + x + dx);
+      // Estimate redness-greeness relative to the intensity.
+      const auto pixel_xpy = Div(Abs(Load(d, row_in_x + x + dx)),
+                                 Max(Add(bias_y_add, pixel_y), bias_y));
+      const auto pixel_x =
+          Max(Set(d, 0.0f), Sub(pixel_xpy, Set(d, kRedRampStart)));
+      const auto pixel_b =
+          Max(Set(d, 0.0f), Sub(Load(d, row_in_b + x + dx),
+                                Add(pixel_y, Set(d, kBlueRampStart))));
+      const auto blue_slope = Min(pixel_b, Set(d, kBlueRampLength));
+      const auto red_slope = Min(pixel_x, Set(d, kRedRampLength));
+      red_coverage = Add(red_coverage, red_slope);
+      blue_coverage = Add(blue_coverage, blue_slope);
+    }
+  }
+
+  // Saturate when the high red or high blue coverage is above a level.
+  // The idea here is that if a certain fraction of the block is red or
+  // blue we consider as if it was fully red or blue.
+  static const float ratio = 28.0f;  // out of 64 pixels.
+
+  auto overall_red_coverage = SumOfLanes(d, red_coverage);
+  overall_red_coverage =
+      Min(overall_red_coverage, Set(d, ratio * kRedRampLength));
+  overall_red_coverage =
+      Mul(overall_red_coverage, Set(d, red_strength / ratio));
+
+  auto overall_blue_coverage = SumOfLanes(d, blue_coverage);
+  overall_blue_coverage =
+      Min(overall_blue_coverage, Set(d, ratio * kBlueRampLength));
+  overall_blue_coverage =
+      Mul(overall_blue_coverage, Set(d, blue_strength / ratio));
+
+  return Add(overall_red_coverage, Add(overall_blue_coverage, out_val));
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
+               const V out_val) {
+  // Zero out the invalid differences for the rightmost value per row.
+  const Rebind<uint32_t, D> du;
+  HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u,
+                                                        ~0u, ~0u, ~0u, 0};
+
+  auto sum = Zero(d);  // sum of absolute differences with right and below
+
+  static const float valmin = 0.52489909479039587f;
+  auto valminv = Set(d, valmin);
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
+    const float* JXL_RESTRICT row_in_next =
+        dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
+
+    // In SCALAR, there is no guarantee of having extra row padding.
+    // Hence, we need to ensure we don't access pixels outside the row itself.
+    // In SIMD modes, however, rows are padded, so it's safe to access one
+    // garbage value after the row. The vector then gets masked with kMaskRight
+    // to remove the influence of that value.
+#if HWY_TARGET != HWY_SCALAR
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+#else
+    for (size_t dx = 0; dx < 7; dx += Lanes(d)) {
+#endif
+      const auto p = Load(d, row_in + dx);
+      const auto pr = LoadU(d, row_in + dx + 1);
+      const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+      sum = Add(sum, And(mask, Min(valminv, AbsDiff(p, pr))));
+
+      const auto pd = Load(d, row_in_next + dx);
+      sum = Add(sum, Min(valminv, AbsDiff(p, pd)));
+    }
+#if HWY_TARGET == HWY_SCALAR
+    const auto p = Load(d, row_in + 7);
+    const auto pd = Load(d, row_in_next + 7);
+    sum = Add(sum, Min(valminv, AbsDiff(p, pd)));
+#endif
+  }
+  // more negative value gives more bpp
+  static const float kOffset = -2.6545897672771526;
+  static const float kMul = -0.049868161744916512;
+
+  sum = SumOfLanes(d, sum);
+  float scalar_sum = GetLane(sum);
+  static const float maxsum = 7.9076877647025947f;
+  static const float minsum = 0.53640540945659809f;
+  scalar_sum = std::min(maxsum, scalar_sum);
+  scalar_sum = std::max(minsum, scalar_sum);
+  scalar_sum += kOffset;
+  scalar_sum *= kMul;
+  return Add(Set(d, scalar_sum), out_val);
+}
+
+void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
+                         const ImageF& xyb_y, const ImageF& xyb_b,
+                         const float scale, const Rect& rect, ImageF* out) {
+  JXL_ASSERT(SameSize(xyb_x, xyb_y));
+  JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
+  JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
+
+  float base_level = 0.48f * scale;
+  float kDampenRampStart = 2.0f;
+  float kDampenRampEnd = 14.0f;
+  float dampen = 1.0f;
+  if (butteraugli_target >= kDampenRampStart) {
+    dampen = 1.0f - ((butteraugli_target - kDampenRampStart) /
+                     (kDampenRampEnd - kDampenRampStart));
+    if (dampen < 0) {
+      dampen = 0;
+    }
+  }
+  const float mul = scale * dampen;
+  const float add = (1.0f - dampen) * base_level;
+  for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
+    const size_t y = iy * 8;
+    float* const JXL_RESTRICT row_out = out->Row(iy);
+    const HWY_CAPPED(float, kBlockDim) df;
+    for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
+      size_t x = ix * 8;
+      auto out_val = Set(df, row_out[ix]);
+      out_val = ComputeMask(df, out_val);
+      out_val = HfModulation(df, x, y, xyb_y, out_val);
+      out_val = ColorModulation(df, x, y, xyb_x, xyb_y, xyb_b,
+                                butteraugli_target, out_val);
+      out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
+      // We want multiplicative quantization field, so everything
+      // until this point has been modulating the exponent.
+      row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+    }
+  }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+  static const float kLogOffset = 27.97044946785558f;
+  static const float kMul = 211.53333281566171f;
+  const auto mul_v = Set(d, kMul * 1e8);
+  const auto offset_v = Set(d, kLogOffset);
+  return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v)));
+}
+
+float MaskingSqrt(const float v) {
+  using DScalar = HWY_CAPPED(float, 1);
+  auto vscalar = Load(DScalar(), &v);
+  return GetLane(MaskingSqrt(DScalar(), vscalar));
+}
+
+void StoreMin4(const float v, float& min0, float& min1, float& min2,
+               float& min3) {
+  if (v < min3) {
+    if (v < min0) {
+      min3 = min2;
+      min2 = min1;
+      min1 = min0;
+      min0 = v;
+    } else if (v < min1) {
+      min3 = min2;
+      min2 = min1;
+      min1 = v;
+    } else if (v < min2) {
+      min3 = min2;
+      min2 = v;
+    } else {
+      min3 = v;
+    }
+  }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas are generally smooth, don't do masking.
+// Output is downsampled 2x.
+void FuzzyErosion(const Rect& from_rect, const ImageF& from,
+                  const Rect& to_rect, ImageF* to) {
+  const size_t xsize = from.xsize();
+  const size_t ysize = from.ysize();
+  constexpr int kStep = 1;
+  static_assert(kStep == 1, "Step must be 1");
+  JXL_ASSERT(to_rect.xsize() * 2 == from_rect.xsize());
+  JXL_ASSERT(to_rect.ysize() * 2 == from_rect.ysize());
+  for (size_t fy = 0; fy < from_rect.ysize(); ++fy) {
+    size_t y = fy + from_rect.y0();
+    size_t ym1 = y >= kStep ? y - kStep : y;
+    size_t yp1 = y + kStep < ysize ? y + kStep : y;
+    const float* rowt = from.Row(ym1);
+    const float* row = from.Row(y);
+    const float* rowb = from.Row(yp1);
+    float* row_out = to_rect.Row(to, fy / 2);
+    for (size_t fx = 0; fx < from_rect.xsize(); ++fx) {
+      size_t x = fx + from_rect.x0();
+      size_t xm1 = x >= kStep ? x - kStep : x;
+      size_t xp1 = x + kStep < xsize ? x + kStep : x;
+      float min0 = row[x];
+      float min1 = row[xm1];
+      float min2 = row[xp1];
+      float min3 = rowt[xm1];
+      // Sort the first four values.
+      if (min0 > min1) std::swap(min0, min1);
+      if (min0 > min2) std::swap(min0, min2);
+      if (min0 > min3) std::swap(min0, min3);
+      if (min1 > min2) std::swap(min1, min2);
+      if (min1 > min3) std::swap(min1, min3);
+      if (min2 > min3) std::swap(min2, min3);
+      // The remaining five values of a 3x3 neighbourhood.
+      StoreMin4(rowt[x], min0, min1, min2, min3);
+      StoreMin4(rowt[xp1], min0, min1, min2, min3);
+      StoreMin4(rowb[xm1], min0, min1, min2, min3);
+      StoreMin4(rowb[x], min0, min1, min2, min3);
+      StoreMin4(rowb[xp1], min0, min1, min2, min3);
+      static const float kMul0 = 0.125f;
+      static const float kMul1 = 0.075f;
+      static const float kMul2 = 0.06f;
+      static const float kMul3 = 0.05f;
+      float v = kMul0 * min0 + kMul1 * min1 + kMul2 * min2 + kMul3 * min3;
+      if (fx % 2 == 0 && fy % 2 == 0) {
+        row_out[fx / 2] = v;
+      } else {
+        row_out[fx / 2] += v;
+      }
+    }
+  }
+}
+
+struct AdaptiveQuantizationImpl {
+  void Init(const Image3F& xyb) {
+    JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
+    JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
+    const size_t xsize = xyb.xsize();
+    const size_t ysize = xyb.ysize();
+    aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
+  }
+  void PrepareBuffers(size_t num_threads) {
+    diff_buffer = ImageF(kEncTileDim + 8, num_threads);
+    for (size_t i = pre_erosion.size(); i < num_threads; i++) {
+      pre_erosion.emplace_back(kEncTileDimInBlocks * 2 + 2,
+                               kEncTileDimInBlocks * 2 + 2);
+    }
+  }
+
+  void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
+                   const Rect& rect, const int thread, ImageF* mask) {
+    const size_t xsize = xyb.xsize();
+    const size_t ysize = xyb.ysize();
+
+    // The XYB gamma is 3.0 to be able to decode faster with two muls.
+    // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+    // We approximate the gamma difference by adding one cubic root into
+    // the adaptive quantization. This gives us a total gamma of 2.6666
+    // for quantization uses.
+    const float match_gamma_offset = 0.019;
+
+    const HWY_FULL(float) df;
+
+    size_t y_start = rect.y0() * 8;
+    size_t y_end = y_start + rect.ysize() * 8;
+
+    size_t x0 = rect.x0() * 8;
+    size_t x1 = x0 + rect.xsize() * 8;
+    if (x0 != 0) x0 -= 4;
+    if (x1 != xyb.xsize()) x1 += 4;
+    if (y_start != 0) y_start -= 4;
+    if (y_end != xyb.ysize()) y_end += 4;
+    pre_erosion[thread].ShrinkTo((x1 - x0) / 4, (y_end - y_start) / 4);
+
+    static const float limit = 0.2f;
+    // Computes image (padded to multiple of 8x8) of local pixel differences.
+    // Subsample both directions by 4.
+    for (size_t y = y_start; y < y_end; ++y) {
+      size_t y2 = y + 1 < ysize ? y + 1 : y;
+      size_t y1 = y > 0 ? y - 1 : y;
+
+      const float* row_in = xyb.PlaneRow(1, y);
+      const float* row_in1 = xyb.PlaneRow(1, y1);
+      const float* row_in2 = xyb.PlaneRow(1, y2);
+      float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
+
+      auto scalar_pixel = [&](size_t x) {
+        const size_t x2 = x + 1 < xsize ? x + 1 : x;
+        const size_t x1 = x > 0 ? x - 1 : x;
+        const float base =
+            0.25f * (row_in2[x] + row_in1[x] + row_in[x1] + row_in[x2]);
+        const float gammac = RatioOfDerivativesOfCubicRootToSimpleGamma(
+            row_in[x] + match_gamma_offset);
+        float diff = gammac * (row_in[x] - base);
+        diff *= diff;
+        if (diff >= limit) {
+          diff = limit;
+        }
+        diff = MaskingSqrt(diff);
+        if ((y % 4) != 0) {
+          row_out[x - x0] += diff;
+        } else {
+          row_out[x - x0] = diff;
+        }
+      };
+
+      size_t x = x0;
+      // First pixel of the row.
+      if (x0 == 0) {
+        scalar_pixel(x0);
+        ++x;
+      }
+      // SIMD
+      const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+      const auto quarter = Set(df, 0.25f);
+      for (; x + 1 + Lanes(df) < x1; x += Lanes(df)) {
+        const auto in = LoadU(df, row_in + x);
+        const auto in_r = LoadU(df, row_in + x + 1);
+        const auto in_l = LoadU(df, row_in + x - 1);
+        const auto in_t = LoadU(df, row_in2 + x);
+        const auto in_b = LoadU(df, row_in1 + x);
+        auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b)));
+        auto gammacv =
+            RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+                df, Add(in, match_gamma_offset_v));
+        auto diff = Mul(gammacv, Sub(in, base));
+        diff = Mul(diff, diff);
+        diff = Min(diff, Set(df, limit));
+        diff = MaskingSqrt(df, diff);
+        if ((y & 3) != 0) {
+          diff = Add(diff, LoadU(df, row_out + x - x0));
+        }
+        StoreU(diff, df, row_out + x - x0);
+      }
+      // Scalar
+      for (; x < x1; ++x) {
+        scalar_pixel(x);
+      }
+      if (y % 4 == 3) {
+        float* row_dout = pre_erosion[thread].Row((y - y_start) / 4);
+        for (size_t x = 0; x < (x1 - x0) / 4; x++) {
+          row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+                         row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+                        0.25f;
+        }
+      }
+    }
+    Rect from_rect(x0 % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1,
+                   rect.xsize() * 2, rect.ysize() * 2);
+    FuzzyErosion(from_rect, pre_erosion[thread], rect, &aq_map);
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      const float* aq_map_row = rect.ConstRow(aq_map, y);
+      float* mask_row = rect.Row(mask, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
+      }
+    }
+    PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1),
+                        xyb.Plane(2), scale, rect, &aq_map);
+  }
+  std::vector<ImageF> pre_erosion;
+  ImageF aq_map;
+  ImageF diff_buffer;
+};
+
+ImageF AdaptiveQuantizationMap(const float butteraugli_target,
+                               const Image3F& xyb,
+                               const FrameDimensions& frame_dim, float scale,
+                               ThreadPool* pool, ImageF* mask) {
+  AdaptiveQuantizationImpl impl;
+  impl.Init(xyb);
+  *mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  JXL_CHECK(RunOnPool(
+      pool, 0,
+      DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+          DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
+      [&](const size_t num_threads) {
+        impl.PrepareBuffers(num_threads);
+        return true;
+      },
+      [&](const uint32_t tid, const size_t thread) {
+        size_t n_enc_tiles =
+            DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
+        size_t tx = tid % n_enc_tiles;
+        size_t ty = tid / n_enc_tiles;
+        size_t by0 = ty * kEncTileDimInBlocks;
+        size_t by1 =
+            std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
+        size_t bx0 = tx * kEncTileDimInBlocks;
+        size_t bx1 =
+            std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
+        Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+        impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask);
+      },
+      "AQ DiffPrecompute"));
+
+  return std::move(impl).aq_map;
+}
+
+}  // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(AdaptiveQuantizationMap);
+
+namespace {
+
+// If true, prints the quantization maps at each iteration.
+constexpr bool FLAGS_dump_quant_state = false;
+
+void DumpHeatmap(const CompressParams& cparams, const AuxOut* aux_out,
+                 const std::string& label, const ImageF& image,
+                 float good_threshold, float bad_threshold) {
+  if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) {
+    Image3F heatmap = CreateHeatMapImage(image, good_threshold, bad_threshold);
+    char filename[200];
+    snprintf(filename, sizeof(filename), "%s%05d", label.c_str(),
+             aux_out->num_butteraugli_iters);
+    DumpImage(cparams, filename, heatmap);
+  }
+}
+
+void DumpHeatmaps(const CompressParams& cparams, const AuxOut* aux_out,
+                  float ba_target, const ImageF& quant_field,
+                  const ImageF& tile_heatmap, const ImageF& bt_diffmap) {
+  if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) {
+    if (!WantDebugOutput(cparams)) return;
+    ImageF inv_qmap(quant_field.xsize(), quant_field.ysize());
+    for (size_t y = 0; y < quant_field.ysize(); ++y) {
+      const float* JXL_RESTRICT row_q = quant_field.ConstRow(y);
+      float* JXL_RESTRICT row_inv_q = inv_qmap.Row(y);
+      for (size_t x = 0; x < quant_field.xsize(); ++x) {
+        row_inv_q[x] = 1.0f / row_q[x];  // never zero
+      }
+    }
+    DumpHeatmap(cparams, aux_out, "quant_heatmap", inv_qmap, 4.0f * ba_target,
+                6.0f * ba_target);
+    DumpHeatmap(cparams, aux_out, "tile_heatmap", tile_heatmap, ba_target,
+                1.5f * ba_target);
+    // matches heat maps produced by the command line tool.
+    DumpHeatmap(cparams, aux_out, "bt_diffmap", bt_diffmap,
+                ButteraugliFuzzyInverse(1.5), ButteraugliFuzzyInverse(0.5));
+  }
+}
+
+ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin,
+                   const AcStrategyImage& ac_strategy) {
+  const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size;
+  const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size;
+  ImageF tile_distmap(tile_xsize, tile_ysize);
+  size_t distmap_stride = tile_distmap.PixelsPerRow();
+  for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y);
+    float* JXL_RESTRICT dist_row = tile_distmap.Row(tile_y);
+    for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      AcStrategy acs = ac_strategy_row[tile_x];
+      if (!acs.IsFirstBlock()) continue;
+      int this_tile_xsize = acs.covered_blocks_x() * tile_size;
+      int this_tile_ysize = acs.covered_blocks_y() * tile_size;
+      int y_begin = std::max<int>(0, tile_size * tile_y - margin);
+      int y_end = std::min<int>(distmap.ysize(),
+                                tile_size * tile_y + this_tile_ysize + margin);
+      int x_begin = std::max<int>(0, tile_size * tile_x - margin);
+      int x_end = std::min<int>(distmap.xsize(),
+                                tile_size * tile_x + this_tile_xsize + margin);
+      float dist_norm = 0.0;
+      double pixels = 0;
+      for (int y = y_begin; y < y_end; ++y) {
+        float ymul = 1.0;
+        constexpr float kBorderMul = 0.98f;
+        constexpr float kCornerMul = 0.7f;
+        if (margin != 0 && (y == y_begin || y == y_end - 1)) {
+          ymul = kBorderMul;
+        }
+        const float* const JXL_RESTRICT row = distmap.Row(y);
+        for (int x = x_begin; x < x_end; ++x) {
+          float xmul = ymul;
+          if (margin != 0 && (x == x_begin || x == x_end - 1)) {
+            if (xmul == 1.0) {
+              xmul = kBorderMul;
+            } else {
+              xmul = kCornerMul;
+            }
+          }
+          float v = row[x];
+          v *= v;
+          v *= v;
+          v *= v;
+          v *= v;
+          dist_norm += xmul * v;
+          pixels += xmul;
+        }
+      }
+      if (pixels == 0) pixels = 1;
+      // 16th norm is less than the max norm, we reduce the difference
+      // with this normalization factor.
+      constexpr float kTileNorm = 1.2f;
+      const float tile_dist =
+          kTileNorm * std::pow(dist_norm / pixels, 1.0f / 16.0f);
+      dist_row[tile_x] = tile_dist;
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          dist_row[tile_x + distmap_stride * iy + ix] = tile_dist;
+        }
+      }
+    }
+  }
+  return tile_distmap;
+}
+
+static const float kDcQuantPow = 0.83;
+static const float kDcQuant = 1.095924047623553f;
+static const float kAcQuant = 0.7635;
+
+// Computes the decoded image for a given set of compression parameters.
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state,
+                           const JxlCmsInterface& cms, ThreadPool* pool) {
+  std::unique_ptr<PassesDecoderState> dec_state =
+      jxl::make_unique<PassesDecoderState>();
+  JXL_CHECK(dec_state->output_encoding_info.SetFromMetadata(
+      *enc_state->shared.metadata));
+  dec_state->shared = &enc_state->shared;
+  JXL_ASSERT(opsin.ysize() % kBlockDim == 0);
+
+  const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim);
+  const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim);
+  const size_t num_groups = xsize_groups * ysize_groups;
+
+  size_t num_special_frames = enc_state->special_frames.size();
+
+  std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+      jxl::make_unique<ModularFrameEncoder>(enc_state->shared.frame_header,
+                                            enc_state->cparams);
+  JXL_CHECK(InitializePassesEncoder(opsin, cms, pool, enc_state,
+                                    modular_frame_encoder.get(), nullptr));
+  JXL_CHECK(dec_state->Init());
+  JXL_CHECK(dec_state->InitForAC(pool));
+
+  ImageBundle decoded(&enc_state->shared.metadata->m);
+  decoded.origin = enc_state->shared.frame_header.frame_origin;
+  decoded.SetFromImage(Image3F(opsin.xsize(), opsin.ysize()),
+                       dec_state->output_encoding_info.color_encoding);
+
+  PassesDecoderState::PipelineOptions options;
+  options.use_slow_render_pipeline = false;
+  options.coalescing = false;
+  options.render_spotcolors = false;
+
+  // Same as dec_state->shared->frame_header.nonserialized_metadata->m
+  const ImageMetadata& metadata = *decoded.metadata();
+
+  JXL_CHECK(dec_state->PreparePipeline(&decoded, options));
+
+  hwy::AlignedUniquePtr<GroupDecCache[]> group_dec_caches;
+  const auto allocate_storage = [&](const size_t num_threads) -> Status {
+    JXL_RETURN_IF_ERROR(
+        dec_state->render_pipeline->PrepareForThreads(num_threads,
+                                                      /*use_group_ids=*/false));
+    group_dec_caches = hwy::MakeUniqueAlignedArray<GroupDecCache>(num_threads);
+    return true;
+  };
+  const auto process_group = [&](const uint32_t group_index,
+                                 const size_t thread) {
+    if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+      ComputeSigma(dec_state->shared->BlockGroupRect(group_index),
+                   dec_state.get());
+    }
+    RenderPipelineInput input =
+        dec_state->render_pipeline->GetInputBuffers(group_index, thread);
+    JXL_CHECK(DecodeGroupForRoundtrip(
+        enc_state->coeffs, group_index, dec_state.get(),
+        &group_dec_caches[thread], thread, input, &decoded, nullptr));
+    for (size_t c = 0; c < metadata.num_extra_channels; c++) {
+      std::pair<ImageF*, Rect> ri = input.GetBuffer(3 + c);
+      FillPlane(0.0f, ri.first, ri.second);
+    }
+    input.Done();
+  };
+  JXL_CHECK(RunOnPool(pool, 0, num_groups, allocate_storage, process_group,
+                      "AQ loop"));
+
+  // Ensure we don't create any new special frames.
+  enc_state->special_frames.resize(num_special_frames);
+
+  return decoded;
+}
+
+constexpr int kMaxButteraugliIters = 4;
+
+void FindBestQuantization(const ImageBundle& linear, const Image3F& opsin,
+                          PassesEncoderState* enc_state,
+                          const JxlCmsInterface& cms, ThreadPool* pool,
+                          AuxOut* aux_out) {
+  const CompressParams& cparams = enc_state->cparams;
+  if (cparams.resampling > 1 &&
+      cparams.original_butteraugli_distance <= 4.0 * cparams.resampling) {
+    // For downsampled opsin image, the butteraugli based adaptive quantization
+    // loop would only make the size bigger without improving the distance much,
+    // so in this case we enable it only for very high butteraugli targets.
+    return;
+  }
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+  ImageF& quant_field = enc_state->initial_quant_field;
+
+  // TODO(veluca): this should really be rather handled on the
+  // ButteraugliComparator side.
+  struct TemporaryShrink {
+    TemporaryShrink(ImageBundle& bundle, size_t xsize, size_t ysize)
+        : bundle(bundle),
+          orig_xsize(bundle.xsize()),
+          orig_ysize(bundle.ysize()) {
+      bundle.ShrinkTo(xsize, ysize);
+    }
+    TemporaryShrink(const TemporaryShrink&) = delete;
+    TemporaryShrink(TemporaryShrink&&) = delete;
+
+    ~TemporaryShrink() { bundle.ShrinkTo(orig_xsize, orig_ysize); }
+
+    ImageBundle& bundle;
+    size_t orig_xsize;
+    size_t orig_ysize;
+  } t(const_cast<ImageBundle&>(linear),
+      enc_state->shared.frame_header.frame_size.xsize,
+      enc_state->shared.frame_header.frame_size.ysize);
+
+  const float butteraugli_target = cparams.butteraugli_distance;
+  const float original_butteraugli = cparams.original_butteraugli_distance;
+  ButteraugliParams params;
+  params.intensity_target = linear.metadata()->IntensityTarget();
+  // Hack the default intensity target value to be 80.0, the intensity
+  // target of sRGB images and a more reasonable viewing default than
+  // JPEG XL file format's default.
+  if (fabs(params.intensity_target - 255.0f) < 1e-3) {
+    params.intensity_target = 80.0f;
+  }
+  JxlButteraugliComparator comparator(params, cms);
+  JXL_CHECK(comparator.SetReferenceImage(linear));
+  bool lower_is_better =
+      (comparator.GoodQualityScore() < comparator.BadQualityScore());
+  const float initial_quant_dc = InitialQuantDC(butteraugli_target);
+  AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+                   original_butteraugli, &quant_field);
+  ImageF tile_distmap;
+  ImageF initial_quant_field(quant_field.xsize(), quant_field.ysize());
+  CopyImageTo(quant_field, &initial_quant_field);
+
+  float initial_qf_min, initial_qf_max;
+  ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max);
+  float initial_qf_ratio = initial_qf_max / initial_qf_min;
+  float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio);
+  float asymmetry = 2;
+  if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low;
+  float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low);
+  float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry);
+
+  JXL_ASSERT(qf_higher / qf_lower < 253);
+
+  constexpr int kOriginalComparisonRound = 1;
+  int iters = kMaxButteraugliIters;
+  if (cparams.speed_tier != SpeedTier::kTortoise) {
+    iters = 2;
+  }
+  for (int i = 0; i < iters + 1; ++i) {
+    if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) {
+      printf("\nQuantization field:\n");
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          printf(" %.5f", quant_field.Row(y)[x]);
+        }
+        printf("\n");
+      }
+    }
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+    ImageBundle dec_linear = RoundtripImage(opsin, enc_state, cms, pool);
+    float score;
+    ImageF diffmap;
+    JXL_CHECK(comparator.CompareWith(dec_linear, &diffmap, &score));
+    if (!lower_is_better) {
+      score = -score;
+      ScaleImage(-1.0f, &diffmap);
+    }
+    tile_distmap = TileDistMap(diffmap, 8 * cparams.resampling, 0,
+                               enc_state->shared.ac_strategy);
+    if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && WantDebugOutput(cparams)) {
+      DumpImage(cparams, ("dec" + ToString(i)).c_str(), *dec_linear.color());
+      DumpHeatmaps(cparams, aux_out, butteraugli_target, quant_field,
+                   tile_distmap, diffmap);
+    }
+    if (aux_out != nullptr) ++aux_out->num_butteraugli_iters;
+    if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) {
+      float minval, maxval;
+      ImageMinMax(quant_field, &minval, &maxval);
+      printf("\nButteraugli iter: %d/%d\n", i, kMaxButteraugliIters);
+      printf("Butteraugli distance: %f  (target = %f)\n", score,
+             original_butteraugli);
+      printf("quant range: %f ... %f  DC quant: %f\n", minval, maxval,
+             initial_quant_dc);
+      if (FLAGS_dump_quant_state) {
+        quantizer.DumpQuantizationMap(raw_quant_field);
+      }
+    }
+
+    if (i == iters) break;
+
+    double kPow[8] = {
+        0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+    };
+    double kPowMod[8] = {
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+    };
+    if (i == kOriginalComparisonRound) {
+      // Don't allow optimization to make the quant field a lot worse than
+      // what the initial guess was. This allows the AC field to have enough
+      // precision to reduce the oscillations due to the dc reconstruction.
+      double kInitMul = 0.6;
+      const double kOneMinusInitMul = 1.0 - kInitMul;
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        const float* const JXL_RESTRICT row_init = initial_quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x];
+          if (row_q[x] < clamp) {
+            row_q[x] = clamp;
+            if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+            if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+          }
+        }
+      }
+    }
+
+    double cur_pow = 0.0;
+    if (i < 7) {
+      cur_pow = kPow[i] + (original_butteraugli - 1.0) * kPowMod[i];
+      if (cur_pow < 0) {
+        cur_pow = 0;
+      }
+    }
+    if (cur_pow == 0.0) {
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / original_butteraugli;
+          if (diff > 1.0f) {
+            float old = row_q[x];
+            row_q[x] *= diff;
+            int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+            int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+            if (qf_old == qf_new) {
+              row_q[x] = old + quantizer.Scale();
+            }
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    } else {
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / original_butteraugli;
+          if (diff <= 1.0f) {
+            row_q[x] *= std::pow(diff, cur_pow);
+          } else {
+            float old = row_q[x];
+            row_q[x] *= diff;
+            int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+            int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+            if (qf_old == qf_new) {
+              row_q[x] = old + quantizer.Scale();
+            }
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    }
+  }
+  quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+void FindBestQuantizationMaxError(const Image3F& opsin,
+                                  PassesEncoderState* enc_state,
+                                  const JxlCmsInterface& cms, ThreadPool* pool,
+                                  AuxOut* aux_out) {
+  // TODO(szabadka): Make this work for non-opsin color spaces.
+  const CompressParams& cparams = enc_state->cparams;
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+  ImageF& quant_field = enc_state->initial_quant_field;
+
+  // TODO(veluca): better choice of this value.
+  const float initial_quant_dc =
+      16 * std::sqrt(0.1f / cparams.butteraugli_distance);
+  AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+                   cparams.original_butteraugli_distance, &quant_field);
+
+  const float inv_max_err[3] = {1.0f / enc_state->cparams.max_error[0],
+                                1.0f / enc_state->cparams.max_error[1],
+                                1.0f / enc_state->cparams.max_error[2]};
+
+  for (int i = 0; i < kMaxButteraugliIters + 1; ++i) {
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+    if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && aux_out) {
+      DumpXybImage(cparams, ("ops" + ToString(i)).c_str(), opsin);
+    }
+    ImageBundle decoded = RoundtripImage(opsin, enc_state, cms, pool);
+    if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && aux_out) {
+      DumpXybImage(cparams, ("dec" + ToString(i)).c_str(), *decoded.color());
+    }
+    for (size_t by = 0; by < enc_state->shared.frame_dim.ysize_blocks; by++) {
+      AcStrategyRow ac_strategy_row =
+          enc_state->shared.ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < enc_state->shared.frame_dim.xsize_blocks; bx++) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        float max_error = 0;
+        for (size_t c = 0; c < 3; c++) {
+          for (size_t y = by * kBlockDim;
+               y < (by + acs.covered_blocks_y()) * kBlockDim; y++) {
+            if (y >= decoded.ysize()) continue;
+            const float* JXL_RESTRICT in_row = opsin.ConstPlaneRow(c, y);
+            const float* JXL_RESTRICT dec_row =
+                decoded.color()->ConstPlaneRow(c, y);
+            for (size_t x = bx * kBlockDim;
+                 x < (bx + acs.covered_blocks_x()) * kBlockDim; x++) {
+              if (x >= decoded.xsize()) continue;
+              max_error = std::max(
+                  std::abs(in_row[x] - dec_row[x]) * inv_max_err[c], max_error);
+            }
+          }
+        }
+        // Target an error between max_error/2 and max_error.
+        // If the error in the varblock is above the target, increase the qf to
+        // compensate. If the error is below the target, decrease the qf.
+        // However, to avoid an excessive increase of the qf, only do so if the
+        // error is less than half the maximum allowed error.
+        const float qf_mul = (max_error < 0.5f)   ? max_error * 2.0f
+                             : (max_error > 1.0f) ? max_error
+                                                  : 1.0f;
+        for (size_t qy = by; qy < by + acs.covered_blocks_y(); qy++) {
+          float* JXL_RESTRICT quant_field_row = quant_field.Row(qy);
+          for (size_t qx = bx; qx < bx + acs.covered_blocks_x(); qx++) {
+            quant_field_row[qx] *= qf_mul;
+          }
+        }
+      }
+    }
+  }
+  quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+}  // namespace
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+                      float butteraugli_target, ImageF* quant_field) {
+  // Replace the whole quant_field in non-8x8 blocks with the maximum of each
+  // 8x8 block.
+  size_t stride = quant_field->PixelsPerRow();
+
+  // At low distances it is great to use max, but mean works better
+  // at high distances. We interpolate between them for a distance
+  // range.
+  float mean_max_mixer = 1.0f;
+  {
+    static const float kLimit = 1.54138f;
+    static const float kMul = 0.56391f;
+    static const float kMin = 0.0f;
+    if (butteraugli_target > kLimit) {
+      mean_max_mixer -= (butteraugli_target - kLimit) * kMul;
+      if (mean_max_mixer < kMin) {
+        mean_max_mixer = kMin;
+      }
+    }
+  }
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, y);
+    float* JXL_RESTRICT quant_row = rect.Row(quant_field, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      AcStrategy acs = ac_strategy_row[x];
+      if (!acs.IsFirstBlock()) continue;
+      JXL_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize());
+      JXL_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize());
+      float max = quant_row[x];
+      float mean = 0.0;
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          mean += quant_row[x + ix + iy * stride];
+          max = std::max(quant_row[x + ix + iy * stride], max);
+        }
+      }
+      mean /= acs.covered_blocks_y() * acs.covered_blocks_x();
+      if (acs.covered_blocks_y() * acs.covered_blocks_x() >= 4) {
+        max *= mean_max_mixer;
+        max += (1.0f - mean_max_mixer) * mean;
+      }
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          quant_row[x + ix + iy * stride] = max;
+        }
+      }
+    }
+  }
+}
+
+float InitialQuantDC(float butteraugli_target) {
+  const float kDcMul = 0.3;  // Butteraugli target where non-linearity kicks in.
+  const float butteraugli_target_dc = std::max<float>(
+      0.5f * butteraugli_target,
+      std::min<float>(butteraugli_target,
+                      kDcMul * std::pow((1.0f / kDcMul) * butteraugli_target,
+                                        kDcQuantPow)));
+  // We want the maximum DC value to be at most 2**15 * kInvDCQuant / quant_dc.
+  // The maximum DC value might not be in the kXybRange because of inverse
+  // gaborish, so we add some slack to the maximum theoretical quant obtained
+  // this way (64).
+  return std::min(kDcQuant / butteraugli_target_dc, 50.f);
+}
+
+ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin,
+                         const FrameDimensions& frame_dim, ThreadPool* pool,
+                         float rescale, ImageF* mask) {
+  const float quant_ac = kAcQuant / butteraugli_target;
+  return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(
+      butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask);
+}
+
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+                       PassesEncoderState* enc_state,
+                       const JxlCmsInterface& cms, ThreadPool* pool,
+                       AuxOut* aux_out, double rescale) {
+  const CompressParams& cparams = enc_state->cparams;
+  if (cparams.max_error_mode) {
+    FindBestQuantizationMaxError(opsin, enc_state, cms, pool, aux_out);
+  } else if (cparams.speed_tier <= SpeedTier::kKitten) {
+    // Normal encoding to a butteraugli score.
+    FindBestQuantization(*linear, opsin, enc_state, cms, pool, aux_out);
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h
new file mode 100644
index 0000000000..730cec6dcb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Heuristics to find a good quantizer for a given image. InitialQuantField
+// produces a quantization field (i.e. relative quantization amounts for each
+// block) out of an opsin-space image. `InitialQuantField` uses heuristics,
+// `FindBestQuantizer` (in non-fast mode) will run multiple encoding-decoding
+// steps and try to improve the given quant field.
+
+namespace jxl {
+
+struct AuxOut;
+
+// Returns an image subsampled by kBlockDim in each direction. If the value
+// at pixel (x,y) in the returned image is greater than 1.0, it means that
+// more fine-grained quantization should be used in the corresponding block
+// of the input image, while a value less than 1.0 indicates that less
+// fine-grained quantization should be enough. Returns a mask, too, which
+// can later be used to make better decisions about ac strategy.
+ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin,
+                         const FrameDimensions& frame_dim, ThreadPool* pool,
+                         float rescale, ImageF* initial_quant_mask);
+
+float InitialQuantDC(float butteraugli_target);
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+                      float butteraugli_target, ImageF* quant_field);
+
+// Returns a quantizer that uses an adjusted version of the provided
+// quant_field. Also computes the dequant_map corresponding to the given
+// dequant_float_map and chosen quantization levels.
+// `linear` is only used in Kitten mode or slower.
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+                       PassesEncoderState* enc_state,
+                       const JxlCmsInterface& cms, ThreadPool* pool,
+                       AuxOut* aux_out, double rescale = 1.0);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc
new file mode 100644
index 0000000000..564ceba71b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc
@@ -0,0 +1,1691 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ans.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_huffman.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+namespace {
+
+#if !JXL_IS_DEBUG_BUILD
+constexpr
+#endif
+    bool ans_fuzzer_friendly_ = false;
+
+static const int kMaxNumSymbolsForSmallCode = 4;
+
+void ANSBuildInfoTable(const ANSHistBin* counts, const AliasTable::Entry* table,
+                       size_t alphabet_size, size_t log_alpha_size,
+                       ANSEncSymbolInfo* info) {
+  size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size;
+  size_t entry_size_minus_1 = (1 << log_entry_size) - 1;
+  // create valid alias table for empty streams.
+  for (size_t s = 0; s < std::max<size_t>(1, alphabet_size); ++s) {
+    const ANSHistBin freq = s == alphabet_size ? ANS_TAB_SIZE : counts[s];
+    info[s].freq_ = static_cast<uint16_t>(freq);
+#ifdef USE_MULT_BY_RECIPROCAL
+    if (freq != 0) {
+      info[s].ifreq_ =
+          ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+    } else {
+      info[s].ifreq_ = 1;  // shouldn't matter (symbol shouldn't occur), but...
+    }
+#endif
+    info[s].reverse_map_.resize(freq);
+  }
+  for (int i = 0; i < ANS_TAB_SIZE; i++) {
+    AliasTable::Symbol s =
+        AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1);
+    info[s.value].reverse_map_[s.offset] = i;
+  }
+}
+
+float EstimateDataBits(const ANSHistBin* histogram, const ANSHistBin* counts,
+                       size_t len) {
+  float sum = 0.0f;
+  int total_histogram = 0;
+  int total_counts = 0;
+  for (size_t i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+    total_counts += counts[i];
+    if (histogram[i] > 0) {
+      JXL_ASSERT(counts[i] > 0);
+      // += histogram[i] * -log(counts[i]/total_counts)
+      sum += histogram[i] *
+             std::max(0.0f, ANS_LOG_TAB_SIZE - FastLog2f(counts[i]));
+    }
+  }
+  if (total_histogram > 0) {
+    // Used only in assert.
+    (void)total_counts;
+    JXL_ASSERT(total_counts == ANS_TAB_SIZE);
+  }
+  return sum;
+}
+
+float EstimateDataBitsFlat(const ANSHistBin* histogram, size_t len) {
+  const float flat_bits = std::max(FastLog2f(len), 0.0f);
+  float total_histogram = 0;
+  for (size_t i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+  }
+  return total_histogram * flat_bits;
+}
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = {
+    5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7,
+};
+static const uint8_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = {
+    17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65,
+};
+
+// Returns the difference between largest count that can be represented and is
+// smaller than "count" and smallest representable count larger than "count".
+static int SmallestIncrement(uint32_t count, uint32_t shift) {
+  int bits = count == 0 ? -1 : FloorLog2Nonzero(count);
+  int drop_bits = bits - GetPopulationCountPrecision(bits, shift);
+  return drop_bits < 0 ? 1 : (1 << drop_bits);
+}
+
+template <bool minimize_error_of_sum>
+bool RebalanceHistogram(const float* targets, int max_symbol, int table_size,
+                        uint32_t shift, int* omit_pos, ANSHistBin* counts) {
+  int sum = 0;
+  float sum_nonrounded = 0.0;
+  int remainder_pos = 0;  // if all of them are handled in first loop
+  int remainder_log = -1;
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] > 0 && targets[n] < 1.0f) {
+      counts[n] = 1;
+      sum_nonrounded += targets[n];
+      sum += counts[n];
+    }
+  }
+  const float discount_ratio =
+      (table_size - sum) / (table_size - sum_nonrounded);
+  JXL_ASSERT(discount_ratio > 0);
+  JXL_ASSERT(discount_ratio <= 1.0f);
+  // Invariant for minimize_error_of_sum == true:
+  // abs(sum - sum_nonrounded)
+  //   <= SmallestIncrement(max(targets[])) + max_symbol
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] >= 1.0f) {
+      sum_nonrounded += targets[n];
+      counts[n] =
+          static_cast<ANSHistBin>(targets[n] * discount_ratio);  // truncate
+      if (counts[n] == 0) counts[n] = 1;
+      if (counts[n] == table_size) counts[n] = table_size - 1;
+      // Round the count to the closest nonzero multiple of SmallestIncrement
+      // (when minimize_error_of_sum is false) or one of two closest so as to
+      // keep the sum as close as possible to sum_nonrounded.
+      int inc = SmallestIncrement(counts[n], shift);
+      counts[n] -= counts[n] & (inc - 1);
+      // TODO(robryk): Should we rescale targets[n]?
+      const float target =
+          minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n];
+      if (counts[n] == 0 ||
+          (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) {
+        counts[n] += inc;
+      }
+      sum += counts[n];
+      const int count_log = FloorLog2Nonzero(static_cast<uint32_t>(counts[n]));
+      if (count_log > remainder_log) {
+        remainder_pos = n;
+        remainder_log = count_log;
+      }
+    }
+  }
+  JXL_ASSERT(remainder_pos != -1);
+  // NOTE: This is the only place where counts could go negative. We could
+  // detect that, return false and make ANSHistBin uint32_t.
+  counts[remainder_pos] -= sum - table_size;
+  *omit_pos = remainder_pos;
+  return counts[remainder_pos] > 0;
+}
+
+Status NormalizeCounts(ANSHistBin* counts, int* omit_pos, const int length,
+                       const int precision_bits, uint32_t shift,
+                       int* num_symbols, int* symbols) {
+  const int32_t table_size = 1 << precision_bits;  // target sum / table size
+  uint64_t total = 0;
+  int max_symbol = 0;
+  int symbol_count = 0;
+  for (int n = 0; n < length; ++n) {
+    total += counts[n];
+    if (counts[n] > 0) {
+      if (symbol_count < kMaxNumSymbolsForSmallCode) {
+        symbols[symbol_count] = n;
+      }
+      ++symbol_count;
+      max_symbol = n + 1;
+    }
+  }
+  *num_symbols = symbol_count;
+  if (symbol_count == 0) {
+    return true;
+  }
+  if (symbol_count == 1) {
+    counts[symbols[0]] = table_size;
+    return true;
+  }
+  if (symbol_count > table_size)
+    return JXL_FAILURE("Too many entries in an ANS histogram");
+
+  const float norm = 1.f * table_size / total;
+  std::vector<float> targets(max_symbol);
+  for (size_t n = 0; n < targets.size(); ++n) {
+    targets[n] = norm * counts[n];
+  }
+  if (!RebalanceHistogram<false>(&targets[0], max_symbol, table_size, shift,
+                                 omit_pos, counts)) {
+    // Use an alternative rebalancing mechanism if the one above failed
+    // to create a histogram that is positive wherever the original one was.
+    if (!RebalanceHistogram<true>(&targets[0], max_symbol, table_size, shift,
+                                  omit_pos, counts)) {
+      return JXL_FAILURE("Logic error: couldn't rebalance a histogram");
+    }
+  }
+  return true;
+}
+
+struct SizeWriter {
+  size_t size = 0;
+  void Write(size_t num, size_t bits) { size += num; }
+};
+
+template <typename Writer>
+void StoreVarLenUint8(size_t n, Writer* writer) {
+  JXL_DASSERT(n <= 255);
+  if (n == 0) {
+    writer->Write(1, 0);
+  } else {
+    writer->Write(1, 1);
+    size_t nbits = FloorLog2Nonzero(n);
+    writer->Write(3, nbits);
+    writer->Write(nbits, n - (1ULL << nbits));
+  }
+}
+
+template <typename Writer>
+void StoreVarLenUint16(size_t n, Writer* writer) {
+  JXL_DASSERT(n <= 65535);
+  if (n == 0) {
+    writer->Write(1, 0);
+  } else {
+    writer->Write(1, 1);
+    size_t nbits = FloorLog2Nonzero(n);
+    writer->Write(4, nbits);
+    writer->Write(nbits, n - (1ULL << nbits));
+  }
+}
+
+template <typename Writer>
+bool EncodeCounts(const ANSHistBin* counts, const int alphabet_size,
+                  const int omit_pos, const int num_symbols, uint32_t shift,
+                  const int* symbols, Writer* writer) {
+  bool ok = true;
+  if (num_symbols <= 2) {
+    // Small tree marker to encode 1-2 symbols.
+    writer->Write(1, 1);
+    if (num_symbols == 0) {
+      writer->Write(1, 0);
+      StoreVarLenUint8(0, writer);
+    } else {
+      writer->Write(1, num_symbols - 1);
+      for (int i = 0; i < num_symbols; ++i) {
+        StoreVarLenUint8(symbols[i], writer);
+      }
+    }
+    if (num_symbols == 2) {
+      writer->Write(ANS_LOG_TAB_SIZE, counts[symbols[0]]);
+    }
+  } else {
+    // Mark non-small tree.
+    writer->Write(1, 0);
+    // Mark non-flat histogram.
+    writer->Write(1, 0);
+
+    // Precompute sequences for RLE encoding. Contains the number of identical
+    // values starting at a given index. Only contains the value at the first
+    // element of the series.
+    std::vector<uint32_t> same(alphabet_size, 0);
+    int last = 0;
+    for (int i = 1; i < alphabet_size; i++) {
+      // Store the sequence length once different symbol reached, or we're at
+      // the end, or the length is longer than we can encode, or we are at
+      // the omit_pos. We don't support including the omit_pos in an RLE
+      // sequence because this value may use a different amount of log2 bits
+      // than standard, it is too complex to handle in the decoder.
+      if (counts[i] != counts[last] || i + 1 == alphabet_size ||
+          (i - last) >= 255 || i == omit_pos || i == omit_pos + 1) {
+        same[last] = (i - last);
+        last = i + 1;
+      }
+    }
+
+    int length = 0;
+    std::vector<int> logcounts(alphabet_size);
+    int omit_log = 0;
+    for (int i = 0; i < alphabet_size; ++i) {
+      JXL_ASSERT(counts[i] <= ANS_TAB_SIZE);
+      JXL_ASSERT(counts[i] >= 0);
+      if (i == omit_pos) {
+        length = i + 1;
+      } else if (counts[i] > 0) {
+        logcounts[i] = FloorLog2Nonzero(static_cast<uint32_t>(counts[i])) + 1;
+        length = i + 1;
+        if (i < omit_pos) {
+          omit_log = std::max(omit_log, logcounts[i] + 1);
+        } else {
+          omit_log = std::max(omit_log, logcounts[i]);
+        }
+      }
+    }
+    logcounts[omit_pos] = omit_log;
+
+    // Elias gamma-like code for shift. Only difference is that if the number
+    // of bits to be encoded is equal to FloorLog2(ANS_LOG_TAB_SIZE+1), we skip
+    // the terminating 0 in unary coding.
+    int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+    int log = FloorLog2Nonzero(shift + 1);
+    writer->Write(log, (1 << log) - 1);
+    if (log != upper_bound_log) writer->Write(1, 0);
+    writer->Write(log, ((1 << log) - 1) & (shift + 1));
+
+    // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+    // length - 3.
+    if (length - 3 > 255) {
+      // Pretend that everything is OK, but complain about correctness later.
+      StoreVarLenUint8(255, writer);
+      ok = false;
+    } else {
+      StoreVarLenUint8(length - 3, writer);
+    }
+
+    // The logcount values are encoded with a static Huffman code.
+    static const size_t kMinReps = 4;
+    size_t rep = ANS_LOG_TAB_SIZE + 1;
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Encode the RLE symbol and skip the repeated ones.
+        writer->Write(kLogCountBitLengths[rep], kLogCountSymbols[rep]);
+        StoreVarLenUint8(same[i - 1] - kMinReps - 1, writer);
+        i += same[i - 1] - 2;
+        continue;
+      }
+      writer->Write(kLogCountBitLengths[logcounts[i]],
+                    kLogCountSymbols[logcounts[i]]);
+    }
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Skip symbols encoded by RLE.
+        i += same[i - 1] - 2;
+        continue;
+      }
+      if (logcounts[i] > 1 && i != omit_pos) {
+        int bitcount = GetPopulationCountPrecision(logcounts[i] - 1, shift);
+        int drop_bits = logcounts[i] - 1 - bitcount;
+        JXL_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0);
+        writer->Write(bitcount, (counts[i] >> drop_bits) - (1 << bitcount));
+      }
+    }
+  }
+  return ok;
+}
+
+void EncodeFlatHistogram(const int alphabet_size, BitWriter* writer) {
+  // Mark non-small tree.
+  writer->Write(1, 0);
+  // Mark uniform histogram.
+  writer->Write(1, 1);
+  JXL_ASSERT(alphabet_size > 0);
+  // Encode alphabet size.
+  StoreVarLenUint8(alphabet_size - 1, writer);
+}
+
+float ComputeHistoAndDataCost(const ANSHistBin* histogram, size_t alphabet_size,
+                              uint32_t method) {
+  if (method == 0) {  // Flat code
+    return ANS_LOG_TAB_SIZE + 2 +
+           EstimateDataBitsFlat(histogram, alphabet_size);
+  }
+  // Non-flat: shift = method-1.
+  uint32_t shift = method - 1;
+  std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+  int omit_pos = 0;
+  int num_symbols;
+  int symbols[kMaxNumSymbolsForSmallCode] = {};
+  JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+                            ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+  SizeWriter writer;
+  // Ignore the correctness, no real encoding happens at this stage.
+  (void)EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, shift,
+                     symbols, &writer);
+  return writer.size +
+         EstimateDataBits(histogram, counts.data(), alphabet_size);
+}
+
+uint32_t ComputeBestMethod(
+    const ANSHistBin* histogram, size_t alphabet_size, float* cost,
+    HistogramParams::ANSHistogramStrategy ans_histogram_strategy) {
+  size_t method = 0;
+  float fcost = ComputeHistoAndDataCost(histogram, alphabet_size, 0);
+  auto try_shift = [&](size_t shift) {
+    float c = ComputeHistoAndDataCost(histogram, alphabet_size, shift + 1);
+    if (c < fcost) {
+      method = shift + 1;
+      fcost = c;
+    }
+  };
+  switch (ans_histogram_strategy) {
+    case HistogramParams::ANSHistogramStrategy::kPrecise: {
+      for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift++) {
+        try_shift(shift);
+      }
+      break;
+    }
+    case HistogramParams::ANSHistogramStrategy::kApproximate: {
+      for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift += 2) {
+        try_shift(shift);
+      }
+      break;
+    }
+    case HistogramParams::ANSHistogramStrategy::kFast: {
+      try_shift(0);
+      try_shift(ANS_LOG_TAB_SIZE / 2);
+      try_shift(ANS_LOG_TAB_SIZE);
+      break;
+    }
+  };
+  *cost = fcost;
+  return method;
+}
+
+}  // namespace
+
+// Returns an estimate of the cost of encoding this histogram and the
+// corresponding data.
+size_t BuildAndStoreANSEncodingData(
+    HistogramParams::ANSHistogramStrategy ans_histogram_strategy,
+    const ANSHistBin* histogram, size_t alphabet_size, size_t log_alpha_size,
+    bool use_prefix_code, ANSEncSymbolInfo* info, BitWriter* writer) {
+  if (use_prefix_code) {
+    if (alphabet_size <= 1) return 0;
+    std::vector<uint32_t> histo(alphabet_size);
+    for (size_t i = 0; i < alphabet_size; i++) {
+      histo[i] = histogram[i];
+      JXL_CHECK(histogram[i] >= 0);
+    }
+    size_t cost = 0;
+    {
+      std::vector<uint8_t> depths(alphabet_size);
+      std::vector<uint16_t> bits(alphabet_size);
+      if (writer == nullptr) {
+        BitWriter tmp_writer;
+        BitWriter::Allotment allotment(
+            &tmp_writer, 8 * alphabet_size + 8);  // safe upper bound
+        BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(),
+                                 bits.data(), &tmp_writer);
+        allotment.ReclaimAndCharge(&tmp_writer, 0, /*aux_out=*/nullptr);
+        cost = tmp_writer.BitsWritten();
+      } else {
+        size_t start = writer->BitsWritten();
+        BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(),
+                                 bits.data(), writer);
+        cost = writer->BitsWritten() - start;
+      }
+      for (size_t i = 0; i < alphabet_size; i++) {
+        info[i].bits = depths[i] == 0 ? 0 : bits[i];
+        info[i].depth = depths[i];
+      }
+    }
+    // Estimate data cost.
+    for (size_t i = 0; i < alphabet_size; i++) {
+      cost += histogram[i] * info[i].depth;
+    }
+    return cost;
+  }
+  JXL_ASSERT(alphabet_size <= ANS_TAB_SIZE);
+  // Ensure we ignore trailing zeros in the histogram.
+  if (alphabet_size != 0) {
+    size_t largest_symbol = 0;
+    for (size_t i = 0; i < alphabet_size; i++) {
+      if (histogram[i] != 0) largest_symbol = i;
+    }
+    alphabet_size = largest_symbol + 1;
+  }
+  float cost;
+  uint32_t method = ComputeBestMethod(histogram, alphabet_size, &cost,
+                                      ans_histogram_strategy);
+  JXL_ASSERT(cost >= 0);
+  int num_symbols;
+  int symbols[kMaxNumSymbolsForSmallCode] = {};
+  std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+  if (!counts.empty()) {
+    size_t sum = 0;
+    for (size_t i = 0; i < counts.size(); i++) {
+      sum += counts[i];
+    }
+    if (sum == 0) {
+      counts[0] = ANS_TAB_SIZE;
+    }
+  }
+  if (method == 0) {
+    counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE);
+    AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+    InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+    ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+    if (writer != nullptr) {
+      EncodeFlatHistogram(alphabet_size, writer);
+    }
+    return cost;
+  }
+  int omit_pos = 0;
+  uint32_t shift = method - 1;
+  JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+                            ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+  AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+  InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+  ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+  if (writer != nullptr) {
+    bool ok = EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols,
+                           shift, symbols, writer);
+    (void)ok;
+    JXL_DASSERT(ok);
+  }
+  return cost;
+}
+
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size) {
+  float c;
+  ComputeBestMethod(data, alphabet_size, &c,
+                    HistogramParams::ANSHistogramStrategy::kFast);
+  return c;
+}
+
+template <typename Writer>
+void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer,
+                      size_t log_alpha_size) {
+  writer->Write(CeilLog2Nonzero(log_alpha_size + 1),
+                uint_config.split_exponent);
+  if (uint_config.split_exponent == log_alpha_size) {
+    return;  // msb/lsb don't matter.
+  }
+  size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1);
+  writer->Write(nbits, uint_config.msb_in_token);
+  nbits = CeilLog2Nonzero(uint_config.split_exponent -
+                          uint_config.msb_in_token + 1);
+  writer->Write(nbits, uint_config.lsb_in_token);
+}
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+                       Writer* writer, size_t log_alpha_size) {
+  // TODO(veluca): RLE?
+  for (size_t i = 0; i < uint_config.size(); i++) {
+    EncodeUintConfig(uint_config[i], writer, log_alpha_size);
+  }
+}
+template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+                                BitWriter*, size_t);
+
+namespace {
+
+void ChooseUintConfigs(const HistogramParams& params,
+                       const std::vector<std::vector<Token>>& tokens,
+                       const std::vector<uint8_t>& context_map,
+                       std::vector<Histogram>* clustered_histograms,
+                       EntropyEncodingData* codes, size_t* log_alpha_size) {
+  codes->uint_config.resize(clustered_histograms->size());
+
+  if (params.uint_method == HistogramParams::HybridUintMethod::kNone) return;
+  if (params.uint_method == HistogramParams::HybridUintMethod::k000) {
+    codes->uint_config.clear();
+    codes->uint_config.resize(clustered_histograms->size(),
+                              HybridUintConfig(0, 0, 0));
+    return;
+  }
+  if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+    codes->uint_config.clear();
+    codes->uint_config.resize(clustered_histograms->size(),
+                              HybridUintConfig(2, 0, 1));
+    return;
+  }
+
+  // Brute-force method that tries a few options.
+  std::vector<HybridUintConfig> configs;
+  if (params.uint_method == HistogramParams::HybridUintMethod::kBest) {
+    configs = {
+        HybridUintConfig(4, 2, 0),  // default
+        HybridUintConfig(4, 1, 0),  // less precise
+        HybridUintConfig(4, 2, 1),  // add sign
+        HybridUintConfig(4, 2, 2),  // add sign+parity
+        HybridUintConfig(4, 1, 2),  // add parity but less msb
+        // Same as above, but more direct coding.
+        HybridUintConfig(5, 2, 0), HybridUintConfig(5, 1, 0),
+        HybridUintConfig(5, 2, 1), HybridUintConfig(5, 2, 2),
+        HybridUintConfig(5, 1, 2),
+        // Same as above, but less direct coding.
+        HybridUintConfig(3, 2, 0), HybridUintConfig(3, 1, 0),
+        HybridUintConfig(3, 2, 1), HybridUintConfig(3, 1, 2),
+        // For near-lossless.
+        HybridUintConfig(4, 1, 3), HybridUintConfig(5, 1, 4),
+        HybridUintConfig(5, 2, 3), HybridUintConfig(6, 1, 5),
+        HybridUintConfig(6, 2, 4), HybridUintConfig(6, 0, 0),
+        // Other
+        HybridUintConfig(0, 0, 0),   // varlenuint
+        HybridUintConfig(2, 0, 1),   // works well for ctx map
+        HybridUintConfig(7, 0, 0),   // direct coding
+        HybridUintConfig(8, 0, 0),   // direct coding
+        HybridUintConfig(9, 0, 0),   // direct coding
+        HybridUintConfig(10, 0, 0),  // direct coding
+        HybridUintConfig(11, 0, 0),  // direct coding
+        HybridUintConfig(12, 0, 0),  // direct coding
+    };
+  } else if (params.uint_method == HistogramParams::HybridUintMethod::kFast) {
+    configs = {
+        HybridUintConfig(4, 2, 0),  // default
+        HybridUintConfig(4, 1, 2),  // add parity but less msb
+        HybridUintConfig(0, 0, 0),  // smallest histograms
+        HybridUintConfig(2, 0, 1),  // works well for ctx map
+    };
+  }
+
+  std::vector<float> costs(clustered_histograms->size(),
+                           std::numeric_limits<float>::max());
+  std::vector<uint32_t> extra_bits(clustered_histograms->size());
+  std::vector<uint8_t> is_valid(clustered_histograms->size());
+  size_t max_alpha =
+      codes->use_prefix_code ? PREFIX_MAX_ALPHABET_SIZE : ANS_MAX_ALPHABET_SIZE;
+  for (HybridUintConfig cfg : configs) {
+    std::fill(is_valid.begin(), is_valid.end(), true);
+    std::fill(extra_bits.begin(), extra_bits.end(), 0);
+
+    for (size_t i = 0; i < clustered_histograms->size(); i++) {
+      (*clustered_histograms)[i].Clear();
+    }
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token token = tokens[i][j];
+        // TODO(veluca): do not ignore lz77 commands.
+        if (token.is_lz77_length) continue;
+        size_t histo = context_map[token.context];
+        uint32_t tok, nbits, bits;
+        cfg.Encode(token.value, &tok, &nbits, &bits);
+        if (tok >= max_alpha ||
+            (codes->lz77.enabled && tok >= codes->lz77.min_symbol)) {
+          is_valid[histo] = false;
+          continue;
+        }
+        extra_bits[histo] += nbits;
+        (*clustered_histograms)[histo].Add(tok);
+      }
+    }
+
+    for (size_t i = 0; i < clustered_histograms->size(); i++) {
+      if (!is_valid[i]) continue;
+      float cost = (*clustered_histograms)[i].PopulationCost() + extra_bits[i];
+      // add signaling cost of the hybriduintconfig itself
+      cost += CeilLog2Nonzero(cfg.split_exponent + 1);
+      cost += CeilLog2Nonzero(cfg.split_exponent - cfg.msb_in_token + 1);
+      if (cost < costs[i]) {
+        codes->uint_config[i] = cfg;
+        costs[i] = cost;
+      }
+    }
+  }
+
+  // Rebuild histograms.
+  for (size_t i = 0; i < clustered_histograms->size(); i++) {
+    (*clustered_histograms)[i].Clear();
+  }
+  *log_alpha_size = 4;
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    for (size_t j = 0; j < tokens[i].size(); ++j) {
+      const Token token = tokens[i][j];
+      uint32_t tok, nbits, bits;
+      size_t histo = context_map[token.context];
+      (token.is_lz77_length ? codes->lz77.length_uint_config
+                            : codes->uint_config[histo])
+          .Encode(token.value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+      (*clustered_histograms)[histo].Add(tok);
+      while (tok >= (1u << *log_alpha_size)) (*log_alpha_size)++;
+    }
+  }
+#if JXL_ENABLE_ASSERT
+  size_t max_log_alpha_size = codes->use_prefix_code ? PREFIX_MAX_BITS : 8;
+  JXL_ASSERT(*log_alpha_size <= max_log_alpha_size);
+#endif
+}
+
+class HistogramBuilder {
+ public:
+  explicit HistogramBuilder(const size_t num_contexts)
+      : histograms_(num_contexts) {}
+
+  void VisitSymbol(int symbol, size_t histo_idx) {
+    JXL_DASSERT(histo_idx < histograms_.size());
+    histograms_[histo_idx].Add(symbol);
+  }
+
+  // NOTE: `layer` is only for clustered_entropy; caller does ReclaimAndCharge.
+  size_t BuildAndStoreEntropyCodes(
+      const HistogramParams& params,
+      const std::vector<std::vector<Token>>& tokens, EntropyEncodingData* codes,
+      std::vector<uint8_t>* context_map, bool use_prefix_code,
+      BitWriter* writer, size_t layer, AuxOut* aux_out) const {
+    size_t cost = 0;
+    codes->encoding_info.clear();
+    std::vector<Histogram> clustered_histograms(histograms_);
+    context_map->resize(histograms_.size());
+    if (histograms_.size() > 1) {
+      if (!ans_fuzzer_friendly_) {
+        std::vector<uint32_t> histogram_symbols;
+        ClusterHistograms(params, histograms_, kClustersLimit,
+                          &clustered_histograms, &histogram_symbols);
+        for (size_t c = 0; c < histograms_.size(); ++c) {
+          (*context_map)[c] = static_cast<uint8_t>(histogram_symbols[c]);
+        }
+      } else {
+        fill(context_map->begin(), context_map->end(), 0);
+        size_t max_symbol = 0;
+        for (const Histogram& h : histograms_) {
+          max_symbol = std::max(h.data_.size(), max_symbol);
+        }
+        size_t num_symbols = 1 << CeilLog2Nonzero(max_symbol + 1);
+        clustered_histograms.resize(1);
+        clustered_histograms[0].Clear();
+        for (size_t i = 0; i < num_symbols; i++) {
+          clustered_histograms[0].Add(i);
+        }
+      }
+      if (writer != nullptr) {
+        EncodeContextMap(*context_map, clustered_histograms.size(), writer,
+                         layer, aux_out);
+      }
+    }
+    if (aux_out != nullptr) {
+      for (size_t i = 0; i < clustered_histograms.size(); ++i) {
+        aux_out->layers[layer].clustered_entropy +=
+            clustered_histograms[i].ShannonEntropy();
+      }
+    }
+    codes->use_prefix_code = use_prefix_code;
+    size_t log_alpha_size = codes->lz77.enabled ? 8 : 7;  // Sane default.
+    if (ans_fuzzer_friendly_) {
+      codes->uint_config.clear();
+      codes->uint_config.resize(1, HybridUintConfig(7, 0, 0));
+    } else {
+      ChooseUintConfigs(params, tokens, *context_map, &clustered_histograms,
+                        codes, &log_alpha_size);
+    }
+    if (log_alpha_size < 5) log_alpha_size = 5;
+    SizeWriter size_writer;  // Used if writer == nullptr to estimate costs.
+    cost += 1;
+    if (writer) writer->Write(1, use_prefix_code);
+
+    if (use_prefix_code) {
+      log_alpha_size = PREFIX_MAX_BITS;
+    } else {
+      cost += 2;
+    }
+    if (writer == nullptr) {
+      EncodeUintConfigs(codes->uint_config, &size_writer, log_alpha_size);
+    } else {
+      if (!use_prefix_code) writer->Write(2, log_alpha_size - 5);
+      EncodeUintConfigs(codes->uint_config, writer, log_alpha_size);
+    }
+    if (use_prefix_code) {
+      for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+        size_t num_symbol = 1;
+        for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+          if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+        }
+        if (writer) {
+          StoreVarLenUint16(num_symbol - 1, writer);
+        } else {
+          StoreVarLenUint16(num_symbol - 1, &size_writer);
+        }
+      }
+    }
+    cost += size_writer.size;
+    for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+      size_t num_symbol = 1;
+      for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+        if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+      }
+      codes->encoding_info.emplace_back();
+      codes->encoding_info.back().resize(std::max<size_t>(1, num_symbol));
+
+      BitWriter::Allotment allotment(writer, 256 + num_symbol * 24);
+      cost += BuildAndStoreANSEncodingData(
+          params.ans_histogram_strategy, clustered_histograms[c].data_.data(),
+          num_symbol, log_alpha_size, use_prefix_code,
+          codes->encoding_info.back().data(), writer);
+      allotment.FinishedHistogram(writer);
+      allotment.ReclaimAndCharge(writer, layer, aux_out);
+    }
+    return cost;
+  }
+
+  const Histogram& Histo(size_t i) const { return histograms_[i]; }
+
+ private:
+  std::vector<Histogram> histograms_;
+};
+
+class SymbolCostEstimator {
+ public:
+  SymbolCostEstimator(size_t num_contexts, bool force_huffman,
+                      const std::vector<std::vector<Token>>& tokens,
+                      const LZ77Params& lz77) {
+    HistogramBuilder builder(num_contexts);
+    // Build histograms for estimating lz77 savings.
+    HybridUintConfig uint_config;
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token token = tokens[i][j];
+        uint32_t tok, nbits, bits;
+        (token.is_lz77_length ? lz77.length_uint_config : uint_config)
+            .Encode(token.value, &tok, &nbits, &bits);
+        tok += token.is_lz77_length ? lz77.min_symbol : 0;
+        builder.VisitSymbol(tok, token.context);
+      }
+    }
+    max_alphabet_size_ = 0;
+    for (size_t i = 0; i < num_contexts; i++) {
+      max_alphabet_size_ =
+          std::max(max_alphabet_size_, builder.Histo(i).data_.size());
+    }
+    bits_.resize(num_contexts * max_alphabet_size_);
+    // TODO(veluca): SIMD?
+    add_symbol_cost_.resize(num_contexts);
+    for (size_t i = 0; i < num_contexts; i++) {
+      float inv_total = 1.0f / (builder.Histo(i).total_count_ + 1e-8f);
+      float total_cost = 0;
+      for (size_t j = 0; j < builder.Histo(i).data_.size(); j++) {
+        size_t cnt = builder.Histo(i).data_[j];
+        float cost = 0;
+        if (cnt != 0 && cnt != builder.Histo(i).total_count_) {
+          cost = -FastLog2f(cnt * inv_total);
+          if (force_huffman) cost = std::ceil(cost);
+        } else if (cnt == 0) {
+          cost = ANS_LOG_TAB_SIZE;  // Highest possible cost.
+        }
+        bits_[i * max_alphabet_size_ + j] = cost;
+        total_cost += cost * builder.Histo(i).data_[j];
+      }
+      // Penalty for adding a lz77 symbol to this contest (only used for static
+      // cost model). Higher penalty for contexts that have a very low
+      // per-symbol entropy.
+      add_symbol_cost_[i] = std::max(0.0f, 6.0f - total_cost * inv_total);
+    }
+  }
+  float Bits(size_t ctx, size_t sym) const {
+    return bits_[ctx * max_alphabet_size_ + sym];
+  }
+  float LenCost(size_t ctx, size_t len, const LZ77Params& lz77) const {
+    uint32_t nbits, bits, tok;
+    lz77.length_uint_config.Encode(len, &tok, &nbits, &bits);
+    tok += lz77.min_symbol;
+    return nbits + Bits(ctx, tok);
+  }
+  float DistCost(size_t len, const LZ77Params& lz77) const {
+    uint32_t nbits, bits, tok;
+    HybridUintConfig().Encode(len, &tok, &nbits, &bits);
+    return nbits + Bits(lz77.nonserialized_distance_context, tok);
+  }
+  float AddSymbolCost(size_t idx) const { return add_symbol_cost_[idx]; }
+
+ private:
+  size_t max_alphabet_size_;
+  std::vector<float> bits_;
+  std::vector<float> add_symbol_cost_;
+};
+
+void ApplyLZ77_RLE(const HistogramParams& params, size_t num_contexts,
+                   const std::vector<std::vector<Token>>& tokens,
+                   LZ77Params& lz77,
+                   std::vector<std::vector<Token>>& tokens_lz77) {
+  // TODO(veluca): tune heuristics here.
+  SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+  float bit_decrease = 0;
+  size_t total_symbols = 0;
+  tokens_lz77.resize(tokens.size());
+  std::vector<float> sym_cost;
+  HybridUintConfig uint_config;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    total_symbols += in.size();
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+    out.reserve(in.size());
+    for (size_t i = 0; i < in.size(); i++) {
+      size_t num_to_copy = 0;
+      size_t distance_symbol = 0;  // 1 for RLE.
+      if (distance_multiplier != 0) {
+        distance_symbol = 1;  // Special distance 1 if enabled.
+        JXL_DASSERT(kSpecialDistances[1][0] == 1);
+        JXL_DASSERT(kSpecialDistances[1][1] == 0);
+      }
+      if (i > 0) {
+        for (; i + num_to_copy < in.size(); num_to_copy++) {
+          if (in[i + num_to_copy].value != in[i - 1].value) {
+            break;
+          }
+        }
+      }
+      if (num_to_copy == 0) {
+        out.push_back(in[i]);
+        continue;
+      }
+      float cost = sym_cost[i + num_to_copy] - sym_cost[i];
+      // This subtraction might overflow, but that's OK.
+      size_t lz77_len = num_to_copy - lz77.min_length;
+      float lz77_cost = num_to_copy >= lz77.min_length
+                            ? CeilLog2Nonzero(lz77_len + 1) + 1
+                            : 0;
+      if (num_to_copy < lz77.min_length || cost <= lz77_cost) {
+        for (size_t j = 0; j < num_to_copy; j++) {
+          out.push_back(in[i + j]);
+        }
+        i += num_to_copy - 1;
+        continue;
+      }
+      // Output the LZ77 length
+      out.emplace_back(in[i].context, lz77_len);
+      out.back().is_lz77_length = true;
+      i += num_to_copy - 1;
+      bit_decrease += cost - lz77_cost;
+      // Output the LZ77 copy distance.
+      out.emplace_back(lz77.nonserialized_distance_context, distance_symbol);
+    }
+  }
+
+  if (bit_decrease > total_symbols * 0.2 + 16) {
+    lz77.enabled = true;
+  }
+}
+
+// Hash chain for LZ77 matching
+struct HashChain {
+  size_t size_;
+  std::vector<uint32_t> data_;
+
+  unsigned hash_num_values_ = 32768;
+  unsigned hash_mask_ = hash_num_values_ - 1;
+  unsigned hash_shift_ = 5;
+
+  std::vector<int> head;
+  std::vector<uint32_t> chain;
+  std::vector<int> val;
+
+  // Speed up repetitions of zero
+  std::vector<int> headz;
+  std::vector<uint32_t> chainz;
+  std::vector<uint32_t> zeros;
+  uint32_t numzeros = 0;
+
+  size_t window_size_;
+  size_t window_mask_;
+  size_t min_length_;
+  size_t max_length_;
+
+  // Map of special distance codes.
+  std::unordered_map<int, int> special_dist_table_;
+  size_t num_special_distances_ = 0;
+
+  uint32_t maxchainlength = 256;  // window_size_ to allow all
+
+  HashChain(const Token* data, size_t size, size_t window_size,
+            size_t min_length, size_t max_length, size_t distance_multiplier)
+      : size_(size),
+        window_size_(window_size),
+        window_mask_(window_size - 1),
+        min_length_(min_length),
+        max_length_(max_length) {
+    data_.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      data_[i] = data[i].value;
+    }
+
+    head.resize(hash_num_values_, -1);
+    val.resize(window_size_, -1);
+    chain.resize(window_size_);
+    for (uint32_t i = 0; i < window_size_; ++i) {
+      chain[i] = i;  // same value as index indicates uninitialized
+    }
+
+    zeros.resize(window_size_);
+    headz.resize(window_size_ + 1, -1);
+    chainz.resize(window_size_);
+    for (uint32_t i = 0; i < window_size_; ++i) {
+      chainz[i] = i;
+    }
+    // Translate distance to special distance code.
+    if (distance_multiplier) {
+      // Count down, so if due to small distance multiplier multiple distances
+      // map to the same code, the smallest code will be used in the end.
+      for (int i = kNumSpecialDistances - 1; i >= 0; --i) {
+        int xi = kSpecialDistances[i][0];
+        int yi = kSpecialDistances[i][1];
+        int distance = yi * distance_multiplier + xi;
+        // Ensure that we map distance 1 to the lowest symbols.
+        if (distance < 1) distance = 1;
+        special_dist_table_[distance] = i;
+      }
+      num_special_distances_ = kNumSpecialDistances;
+    }
+  }
+
+  uint32_t GetHash(size_t pos) const {
+    uint32_t result = 0;
+    if (pos + 2 < size_) {
+      // TODO(lode): take the MSB's of the uint32_t values into account as well,
+      // given that the hash code itself is less than 32 bits.
+      result ^= (uint32_t)(data_[pos + 0] << 0u);
+      result ^= (uint32_t)(data_[pos + 1] << hash_shift_);
+      result ^= (uint32_t)(data_[pos + 2] << (hash_shift_ * 2));
+    } else {
+      // No need to compute hash of last 2 bytes, the length 2 is too short.
+      return 0;
+    }
+    return result & hash_mask_;
+  }
+
+  uint32_t CountZeros(size_t pos, uint32_t prevzeros) const {
+    size_t end = pos + window_size_;
+    if (end > size_) end = size_;
+    if (prevzeros > 0) {
+      if (prevzeros >= window_mask_ && data_[end - 1] == 0 &&
+          end == pos + window_size_) {
+        return prevzeros;
+      } else {
+        return prevzeros - 1;
+      }
+    }
+    uint32_t num = 0;
+    while (pos + num < end && data_[pos + num] == 0) num++;
+    return num;
+  }
+
+  void Update(size_t pos) {
+    uint32_t hashval = GetHash(pos);
+    uint32_t wpos = pos & window_mask_;
+
+    val[wpos] = (int)hashval;
+    if (head[hashval] != -1) chain[wpos] = head[hashval];
+    head[hashval] = wpos;
+
+    if (pos > 0 && data_[pos] != data_[pos - 1]) numzeros = 0;
+    numzeros = CountZeros(pos, numzeros);
+
+    zeros[wpos] = numzeros;
+    if (headz[numzeros] != -1) chainz[wpos] = headz[numzeros];
+    headz[numzeros] = wpos;
+  }
+
+  void Update(size_t pos, size_t len) {
+    for (size_t i = 0; i < len; i++) {
+      Update(pos + i);
+    }
+  }
+
+  template <typename CB>
+  void FindMatches(size_t pos, int max_dist, const CB& found_match) const {
+    uint32_t wpos = pos & window_mask_;
+    uint32_t hashval = GetHash(pos);
+    uint32_t hashpos = chain[wpos];
+
+    int prev_dist = 0;
+    int end = std::min<int>(pos + max_length_, size_);
+    uint32_t chainlength = 0;
+    uint32_t best_len = 0;
+    for (;;) {
+      int dist = (hashpos <= wpos) ? (wpos - hashpos)
+                                   : (wpos - hashpos + window_mask_ + 1);
+      if (dist < prev_dist) break;
+      prev_dist = dist;
+      uint32_t len = 0;
+      if (dist > 0) {
+        int i = pos;
+        int j = pos - dist;
+        if (numzeros > 3) {
+          int r = std::min<int>(numzeros - 1, zeros[hashpos]);
+          if (i + r >= end) r = end - i - 1;
+          i += r;
+          j += r;
+        }
+        while (i < end && data_[i] == data_[j]) {
+          i++;
+          j++;
+        }
+        len = i - pos;
+        // This can trigger even if the new length is slightly smaller than the
+        // best length, because it is possible for a slightly cheaper distance
+        // symbol to occur.
+        if (len >= min_length_ && len + 2 >= best_len) {
+          auto it = special_dist_table_.find(dist);
+          int dist_symbol = (it == special_dist_table_.end())
+                                ? (num_special_distances_ + dist - 1)
+                                : it->second;
+          found_match(len, dist_symbol);
+          if (len > best_len) best_len = len;
+        }
+      }
+
+      chainlength++;
+      if (chainlength >= maxchainlength) break;
+
+      if (numzeros >= 3 && len > numzeros) {
+        if (hashpos == chainz[hashpos]) break;
+        hashpos = chainz[hashpos];
+        if (zeros[hashpos] != numzeros) break;
+      } else {
+        if (hashpos == chain[hashpos]) break;
+        hashpos = chain[hashpos];
+        if (val[hashpos] != (int)hashval) break;  // outdated hash value
+      }
+    }
+  }
+  void FindMatch(size_t pos, int max_dist, size_t* result_dist_symbol,
+                 size_t* result_len) const {
+    *result_dist_symbol = 0;
+    *result_len = 1;
+    FindMatches(pos, max_dist, [&](size_t len, size_t dist_symbol) {
+      if (len > *result_len ||
+          (len == *result_len && *result_dist_symbol > dist_symbol)) {
+        *result_len = len;
+        *result_dist_symbol = dist_symbol;
+      }
+    });
+  }
+};
+
+float LenCost(size_t len) {
+  uint32_t nbits, bits, tok;
+  HybridUintConfig(1, 0, 0).Encode(len, &tok, &nbits, &bits);
+  constexpr float kCostTable[] = {
+      2.797667318563126,  3.213177690381199,  2.5706009246743737,
+      2.408392498667534,  2.829649191872326,  3.3923087753324577,
+      4.029267451554331,  4.415576699706408,  4.509357574741465,
+      9.21481543803004,   10.020590190114898, 11.858671627804766,
+      12.45853300490526,  11.713105831990857, 12.561996324849314,
+      13.775477692278367, 13.174027068768641,
+  };
+  size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+  if (tok >= table_size) tok = table_size - 1;
+  return kCostTable[tok] + nbits;
+}
+
+// TODO(veluca): this does not take into account usage or non-usage of distance
+// multipliers.
+float DistCost(size_t dist) {
+  uint32_t nbits, bits, tok;
+  HybridUintConfig(7, 0, 0).Encode(dist, &tok, &nbits, &bits);
+  constexpr float kCostTable[] = {
+      6.368282626312716,  5.680793277090298,  8.347404197105247,
+      7.641619201599141,  6.914328374119438,  7.959808291537444,
+      8.70023120759855,   8.71378518934703,   9.379132523982769,
+      9.110472749092708,  9.159029569270908,  9.430936766731973,
+      7.278284055315169,  7.8278514904267755, 10.026641158289236,
+      9.976049229827066,  9.64351607048908,   9.563403863480442,
+      10.171474111762747, 10.45950155077234,  9.994813912104219,
+      10.322524683741156, 8.465808729388186,  8.756254166066853,
+      10.160930174662234, 10.247329273413435, 10.04090403724809,
+      10.129398517544082, 9.342311691539546,  9.07608009102374,
+      10.104799540677513, 10.378079384990906, 10.165828974075072,
+      10.337595322341553, 7.940557464567944,  10.575665823319431,
+      11.023344321751955, 10.736144698831827, 11.118277044595054,
+      7.468468230648442,  10.738305230932939, 10.906980780216568,
+      10.163468216353817, 10.17805759656433,  11.167283670483565,
+      11.147050200274544, 10.517921919244333, 10.651764778156886,
+      10.17074446448919,  11.217636876224745, 11.261630721139484,
+      11.403140815247259, 10.892472096873417, 11.1859607804481,
+      8.017346947551262,  7.895143720278828,  11.036577113822025,
+      11.170562110315794, 10.326988722591086, 10.40872184751056,
+      11.213498225466386, 11.30580635516863,  10.672272515665442,
+      10.768069466228063, 11.145257364153565, 11.64668307145549,
+      10.593156194627339, 11.207499484844943, 10.767517766396908,
+      10.826629811407042, 10.737764794499988, 10.6200448518045,
+      10.191315385198092, 8.468384171390085,  11.731295299170432,
+      11.824619886654398, 10.41518844301179,  10.16310536548649,
+      10.539423685097576, 10.495136599328031, 10.469112847728267,
+      11.72057686174922,  10.910326337834674, 11.378921834673758,
+      11.847759036098536, 11.92071647623854,  10.810628276345282,
+      11.008601085273893, 11.910326337834674, 11.949212023423133,
+      11.298614839104337, 11.611603659010392, 10.472930394619985,
+      11.835564720850282, 11.523267392285337, 12.01055816679611,
+      8.413029688994023,  11.895784139536406, 11.984679534970505,
+      11.220654278717394, 11.716311684833672, 10.61036646226114,
+      10.89849965960364,  10.203762898863669, 10.997560826267238,
+      11.484217379438984, 11.792836176993665, 12.24310468755171,
+      11.464858097919262, 12.212747017409377, 11.425595666074955,
+      11.572048533398757, 12.742093965163013, 11.381874288645637,
+      12.191870445817015, 11.683156920035426, 11.152442115262197,
+      11.90303691580457,  11.653292787169159, 11.938615382266098,
+      16.970641701570223, 16.853602280380002, 17.26240782594733,
+      16.644655390108507, 17.14310889757499,  16.910935455445955,
+      17.505678976959697, 17.213498225466388, 2.4162310293553024,
+      3.494587244462329,  3.5258600986408344, 3.4959806589517095,
+      3.098390886949687,  3.343454654302911,  3.588847442290287,
+      4.14614790111827,   5.152948641990529,  7.433696808092598,
+      9.716311684833672,
+  };
+  size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+  if (tok >= table_size) tok = table_size - 1;
+  return kCostTable[tok] + nbits;
+}
+
+void ApplyLZ77_LZ77(const HistogramParams& params, size_t num_contexts,
+                    const std::vector<std::vector<Token>>& tokens,
+                    LZ77Params& lz77,
+                    std::vector<std::vector<Token>>& tokens_lz77) {
+  // TODO(veluca): tune heuristics here.
+  SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+  float bit_decrease = 0;
+  size_t total_symbols = 0;
+  tokens_lz77.resize(tokens.size());
+  HybridUintConfig uint_config;
+  std::vector<float> sym_cost;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    total_symbols += in.size();
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+
+    out.reserve(in.size());
+    size_t max_distance = in.size();
+    size_t min_length = lz77.min_length;
+    JXL_ASSERT(min_length >= 3);
+    size_t max_length = in.size();
+
+    // Use next power of two as window size.
+    size_t window_size = 1;
+    while (window_size < max_distance && window_size < kWindowSize) {
+      window_size <<= 1;
+    }
+
+    HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+                    distance_multiplier);
+    size_t len, dist_symbol;
+
+    const size_t max_lazy_match_len = 256;  // 0 to disable lazy matching
+
+    // Whether the next symbol was already updated (to test lazy matching)
+    bool already_updated = false;
+    for (size_t i = 0; i < in.size(); i++) {
+      out.push_back(in[i]);
+      if (!already_updated) chain.Update(i);
+      already_updated = false;
+      chain.FindMatch(i, max_distance, &dist_symbol, &len);
+      if (len >= min_length) {
+        if (len < max_lazy_match_len && i + 1 < in.size()) {
+          // Try length at next symbol lazy matching
+          chain.Update(i + 1);
+          already_updated = true;
+          size_t len2, dist_symbol2;
+          chain.FindMatch(i + 1, max_distance, &dist_symbol2, &len2);
+          if (len2 > len) {
+            // Use the lazy match. Add literal, and use the next length starting
+            // from the next byte.
+            ++i;
+            already_updated = false;
+            len = len2;
+            dist_symbol = dist_symbol2;
+            out.push_back(in[i]);
+          }
+        }
+
+        float cost = sym_cost[i + len] - sym_cost[i];
+        size_t lz77_len = len - lz77.min_length;
+        float lz77_cost = LenCost(lz77_len) + DistCost(dist_symbol) +
+                          sce.AddSymbolCost(out.back().context);
+
+        if (lz77_cost <= cost) {
+          out.back().value = len - min_length;
+          out.back().is_lz77_length = true;
+          out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+          bit_decrease += cost - lz77_cost;
+        } else {
+          // LZ77 match ignored, and symbol already pushed. Push all other
+          // symbols and skip.
+          for (size_t j = 1; j < len; j++) {
+            out.push_back(in[i + j]);
+          }
+        }
+
+        if (already_updated) {
+          chain.Update(i + 2, len - 2);
+          already_updated = false;
+        } else {
+          chain.Update(i + 1, len - 1);
+        }
+        i += len - 1;
+      } else {
+        // Literal, already pushed
+      }
+    }
+  }
+
+  if (bit_decrease > total_symbols * 0.2 + 16) {
+    lz77.enabled = true;
+  }
+}
+
+void ApplyLZ77_Optimal(const HistogramParams& params, size_t num_contexts,
+                       const std::vector<std::vector<Token>>& tokens,
+                       LZ77Params& lz77,
+                       std::vector<std::vector<Token>>& tokens_lz77) {
+  std::vector<std::vector<Token>> tokens_for_cost_estimate;
+  ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_for_cost_estimate);
+  // If greedy-LZ77 does not give better compression than no-lz77, no reason to
+  // run the optimal matching.
+  if (!lz77.enabled) return;
+  SymbolCostEstimator sce(num_contexts + 1, params.force_huffman,
+                          tokens_for_cost_estimate, lz77);
+  tokens_lz77.resize(tokens.size());
+  HybridUintConfig uint_config;
+  std::vector<float> sym_cost;
+  std::vector<uint32_t> dist_symbols;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+
+    out.reserve(in.size());
+    size_t max_distance = in.size();
+    size_t min_length = lz77.min_length;
+    JXL_ASSERT(min_length >= 3);
+    size_t max_length = in.size();
+
+    // Use next power of two as window size.
+    size_t window_size = 1;
+    while (window_size < max_distance && window_size < kWindowSize) {
+      window_size <<= 1;
+    }
+
+    HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+                    distance_multiplier);
+
+    struct MatchInfo {
+      uint32_t len;
+      uint32_t dist_symbol;
+      uint32_t ctx;
+      float total_cost = std::numeric_limits<float>::max();
+    };
+    // Total cost to encode the first N symbols.
+    std::vector<MatchInfo> prefix_costs(in.size() + 1);
+    prefix_costs[0].total_cost = 0;
+
+    size_t rle_length = 0;
+    size_t skip_lz77 = 0;
+    for (size_t i = 0; i < in.size(); i++) {
+      chain.Update(i);
+      float lit_cost =
+          prefix_costs[i].total_cost + sym_cost[i + 1] - sym_cost[i];
+      if (prefix_costs[i + 1].total_cost > lit_cost) {
+        prefix_costs[i + 1].dist_symbol = 0;
+        prefix_costs[i + 1].len = 1;
+        prefix_costs[i + 1].ctx = in[i].context;
+        prefix_costs[i + 1].total_cost = lit_cost;
+      }
+      if (skip_lz77 > 0) {
+        skip_lz77--;
+        continue;
+      }
+      dist_symbols.clear();
+      chain.FindMatches(i, max_distance,
+                        [&dist_symbols](size_t len, size_t dist_symbol) {
+                          if (dist_symbols.size() <= len) {
+                            dist_symbols.resize(len + 1, dist_symbol);
+                          }
+                          if (dist_symbol < dist_symbols[len]) {
+                            dist_symbols[len] = dist_symbol;
+                          }
+                        });
+      if (dist_symbols.size() <= min_length) continue;
+      {
+        size_t best_cost = dist_symbols.back();
+        for (size_t j = dist_symbols.size() - 1; j >= min_length; j--) {
+          if (dist_symbols[j] < best_cost) {
+            best_cost = dist_symbols[j];
+          }
+          dist_symbols[j] = best_cost;
+        }
+      }
+      for (size_t j = min_length; j < dist_symbols.size(); j++) {
+        // Cost model that uses results from lazy LZ77.
+        float lz77_cost = sce.LenCost(in[i].context, j - min_length, lz77) +
+                          sce.DistCost(dist_symbols[j], lz77);
+        float cost = prefix_costs[i].total_cost + lz77_cost;
+        if (prefix_costs[i + j].total_cost > cost) {
+          prefix_costs[i + j].len = j;
+          prefix_costs[i + j].dist_symbol = dist_symbols[j] + 1;
+          prefix_costs[i + j].ctx = in[i].context;
+          prefix_costs[i + j].total_cost = cost;
+        }
+      }
+      // We are in a RLE sequence: skip all the symbols except the first 8 and
+      // the last 8. This avoid quadratic costs for sequences with long runs of
+      // the same symbol.
+      if ((dist_symbols.back() == 0 && distance_multiplier == 0) ||
+          (dist_symbols.back() == 1 && distance_multiplier != 0)) {
+        rle_length++;
+      } else {
+        rle_length = 0;
+      }
+      if (rle_length >= 8 && dist_symbols.size() > 9) {
+        skip_lz77 = dist_symbols.size() - 10;
+        rle_length = 0;
+      }
+    }
+    size_t pos = in.size();
+    while (pos > 0) {
+      bool is_lz77_length = prefix_costs[pos].dist_symbol != 0;
+      if (is_lz77_length) {
+        size_t dist_symbol = prefix_costs[pos].dist_symbol - 1;
+        out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+      }
+      size_t val = is_lz77_length ? prefix_costs[pos].len - min_length
+                                  : in[pos - 1].value;
+      out.emplace_back(prefix_costs[pos].ctx, val);
+      out.back().is_lz77_length = is_lz77_length;
+      pos -= prefix_costs[pos].len;
+    }
+    std::reverse(out.begin(), out.end());
+  }
+}
+
+void ApplyLZ77(const HistogramParams& params, size_t num_contexts,
+               const std::vector<std::vector<Token>>& tokens, LZ77Params& lz77,
+               std::vector<std::vector<Token>>& tokens_lz77) {
+  lz77.enabled = false;
+  if (params.force_huffman) {
+    lz77.min_symbol = std::min(PREFIX_MAX_ALPHABET_SIZE - 32, 512);
+  } else {
+    lz77.min_symbol = 224;
+  }
+  if (params.lz77_method == HistogramParams::LZ77Method::kNone) {
+    return;
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kRLE) {
+    ApplyLZ77_RLE(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kLZ77) {
+    ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kOptimal) {
+    ApplyLZ77_Optimal(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else {
+    JXL_UNREACHABLE("Not implemented");
+  }
+}
+}  // namespace
+
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+                                size_t num_contexts,
+                                std::vector<std::vector<Token>>& tokens,
+                                EntropyEncodingData* codes,
+                                std::vector<uint8_t>* context_map,
+                                BitWriter* writer, size_t layer,
+                                AuxOut* aux_out) {
+  size_t total_bits = 0;
+  codes->lz77.nonserialized_distance_context = num_contexts;
+  std::vector<std::vector<Token>> tokens_lz77;
+  ApplyLZ77(params, num_contexts, tokens, codes->lz77, tokens_lz77);
+  if (ans_fuzzer_friendly_) {
+    codes->lz77.length_uint_config = HybridUintConfig(10, 0, 0);
+    codes->lz77.min_symbol = 2048;
+  }
+
+  const size_t max_contexts = std::min(num_contexts, kClustersLimit);
+  BitWriter::Allotment allotment(writer,
+                                 128 + num_contexts * 40 + max_contexts * 96);
+  if (writer) {
+    JXL_CHECK(Bundle::Write(codes->lz77, writer, layer, aux_out));
+  } else {
+    size_t ebits, bits;
+    JXL_CHECK(Bundle::CanEncode(codes->lz77, &ebits, &bits));
+    total_bits += bits;
+  }
+  if (codes->lz77.enabled) {
+    if (writer) {
+      size_t b = writer->BitsWritten();
+      EncodeUintConfig(codes->lz77.length_uint_config, writer,
+                       /*log_alpha_size=*/8);
+      total_bits += writer->BitsWritten() - b;
+    } else {
+      SizeWriter size_writer;
+      EncodeUintConfig(codes->lz77.length_uint_config, &size_writer,
+                       /*log_alpha_size=*/8);
+      total_bits += size_writer.size;
+    }
+    num_contexts += 1;
+    tokens = std::move(tokens_lz77);
+  }
+  size_t total_tokens = 0;
+  // Build histograms.
+  HistogramBuilder builder(num_contexts);
+  HybridUintConfig uint_config;  //  Default config for clustering.
+  // Unless we are using the kContextMap histogram option.
+  if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+    uint_config = HybridUintConfig(2, 0, 1);
+  }
+  if (params.uint_method == HistogramParams::HybridUintMethod::k000) {
+    uint_config = HybridUintConfig(0, 0, 0);
+  }
+  if (ans_fuzzer_friendly_) {
+    uint_config = HybridUintConfig(10, 0, 0);
+  }
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    if (codes->lz77.enabled) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token& token = tokens[i][j];
+        total_tokens++;
+        uint32_t tok, nbits, bits;
+        (token.is_lz77_length ? codes->lz77.length_uint_config : uint_config)
+            .Encode(token.value, &tok, &nbits, &bits);
+        tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+        builder.VisitSymbol(tok, token.context);
+      }
+    } else if (num_contexts == 1) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token& token = tokens[i][j];
+        total_tokens++;
+        uint32_t tok, nbits, bits;
+        uint_config.Encode(token.value, &tok, &nbits, &bits);
+        builder.VisitSymbol(tok, /*token.context=*/0);
+      }
+    } else {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token& token = tokens[i][j];
+        total_tokens++;
+        uint32_t tok, nbits, bits;
+        uint_config.Encode(token.value, &tok, &nbits, &bits);
+        builder.VisitSymbol(tok, token.context);
+      }
+    }
+  }
+
+  bool use_prefix_code =
+      params.force_huffman || total_tokens < 100 ||
+      params.clustering == HistogramParams::ClusteringType::kFastest ||
+      ans_fuzzer_friendly_;
+  if (!use_prefix_code) {
+    bool all_singleton = true;
+    for (size_t i = 0; i < num_contexts; i++) {
+      if (builder.Histo(i).ShannonEntropy() >= 1e-5) {
+        all_singleton = false;
+      }
+    }
+    if (all_singleton) {
+      use_prefix_code = true;
+    }
+  }
+
+  // Encode histograms.
+  total_bits += builder.BuildAndStoreEntropyCodes(params, tokens, codes,
+                                                  context_map, use_prefix_code,
+                                                  writer, layer, aux_out);
+  allotment.FinishedHistogram(writer);
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].num_clustered_histograms +=
+        codes->encoding_info.size();
+  }
+  return total_bits;
+}
+
+size_t WriteTokens(const std::vector<Token>& tokens,
+                   const EntropyEncodingData& codes,
+                   const std::vector<uint8_t>& context_map, BitWriter* writer) {
+  size_t num_extra_bits = 0;
+  if (codes.use_prefix_code) {
+    for (size_t i = 0; i < tokens.size(); i++) {
+      uint32_t tok, nbits, bits;
+      const Token& token = tokens[i];
+      size_t histo = context_map[token.context];
+      (token.is_lz77_length ? codes.lz77.length_uint_config
+                            : codes.uint_config[histo])
+          .Encode(token.value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+      // Combine two calls to the BitWriter. Equivalent to:
+      // writer->Write(codes.encoding_info[histo][tok].depth,
+      //               codes.encoding_info[histo][tok].bits);
+      // writer->Write(nbits, bits);
+      uint64_t data = codes.encoding_info[histo][tok].bits;
+      data |= bits << codes.encoding_info[histo][tok].depth;
+      writer->Write(codes.encoding_info[histo][tok].depth + nbits, data);
+      num_extra_bits += nbits;
+    }
+    return num_extra_bits;
+  }
+  std::vector<uint64_t> out;
+  std::vector<uint8_t> out_nbits;
+  out.reserve(tokens.size());
+  out_nbits.reserve(tokens.size());
+  uint64_t allbits = 0;
+  size_t numallbits = 0;
+  // Writes in *reversed* order.
+  auto addbits = [&](size_t bits, size_t nbits) {
+    if (JXL_UNLIKELY(nbits)) {
+      JXL_DASSERT(bits >> nbits == 0);
+      if (JXL_UNLIKELY(numallbits + nbits > BitWriter::kMaxBitsPerCall)) {
+        out.push_back(allbits);
+        out_nbits.push_back(numallbits);
+        numallbits = allbits = 0;
+      }
+      allbits <<= nbits;
+      allbits |= bits;
+      numallbits += nbits;
+    }
+  };
+  const int end = tokens.size();
+  ANSCoder ans;
+  if (codes.lz77.enabled || context_map.size() > 1) {
+    for (int i = end - 1; i >= 0; --i) {
+      const Token token = tokens[i];
+      const uint8_t histo = context_map[token.context];
+      uint32_t tok, nbits, bits;
+      (token.is_lz77_length ? codes.lz77.length_uint_config
+                            : codes.uint_config[histo])
+          .Encode(tokens[i].value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+      const ANSEncSymbolInfo& info = codes.encoding_info[histo][tok];
+      // Extra bits first as this is reversed.
+      addbits(bits, nbits);
+      num_extra_bits += nbits;
+      uint8_t ans_nbits = 0;
+      uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits);
+      addbits(ans_bits, ans_nbits);
+    }
+  } else {
+    for (int i = end - 1; i >= 0; --i) {
+      uint32_t tok, nbits, bits;
+      codes.uint_config[0].Encode(tokens[i].value, &tok, &nbits, &bits);
+      const ANSEncSymbolInfo& info = codes.encoding_info[0][tok];
+      // Extra bits first as this is reversed.
+      addbits(bits, nbits);
+      num_extra_bits += nbits;
+      uint8_t ans_nbits = 0;
+      uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits);
+      addbits(ans_bits, ans_nbits);
+    }
+  }
+  const uint32_t state = ans.GetState();
+  writer->Write(32, state);
+  writer->Write(numallbits, allbits);
+  for (int i = out.size(); i > 0; --i) {
+    writer->Write(out_nbits[i - 1], out[i - 1]);
+  }
+  return num_extra_bits;
+}
+
+void WriteTokens(const std::vector<Token>& tokens,
+                 const EntropyEncodingData& codes,
+                 const std::vector<uint8_t>& context_map, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, 32 * tokens.size() + 32 * 1024 * 4);
+  size_t num_extra_bits = WriteTokens(tokens, codes, context_map, writer);
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].extra_bits += num_extra_bits;
+  }
+}
+
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly) {
+#if JXL_IS_DEBUG_BUILD  // Guard against accidental / malicious changes.
+  ans_fuzzer_friendly_ = ans_fuzzer_friendly;
+#endif
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans.h b/third-party/libjxl/libjxl/lib/jxl/enc_ans.h
new file mode 100644
index 0000000000..a4afb19b4e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans.h
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_H_
+#define LIB_JXL_ENC_ANS_H_
+
+// Library to encode the ANS population counts to the bit-stream and encode
+// symbols based on the respective distributions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans_params.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+#define USE_MULT_BY_RECIPROCAL
+
+// precision must be equal to:  #bits(state_) + #bits(freq)
+#define RECIPROCAL_PRECISION (32 + ANS_LOG_TAB_SIZE)
+
+// Data structure representing one element of the encoding table built
+// from a distribution.
+// TODO(veluca): split this up, or use an union.
+struct ANSEncSymbolInfo {
+  // ANS
+  uint16_t freq_;
+  std::vector<uint16_t> reverse_map_;
+#ifdef USE_MULT_BY_RECIPROCAL
+  uint64_t ifreq_;
+#endif
+  // Prefix coding.
+  uint8_t depth;
+  uint16_t bits;
+};
+
+class ANSCoder {
+ public:
+  ANSCoder() : state_(ANS_SIGNATURE << 16) {}
+
+  uint32_t PutSymbol(const ANSEncSymbolInfo& t, uint8_t* nbits) {
+    uint32_t bits = 0;
+    *nbits = 0;
+    if ((state_ >> (32 - ANS_LOG_TAB_SIZE)) >= t.freq_) {
+      bits = state_ & 0xffff;
+      state_ >>= 16;
+      *nbits = 16;
+    }
+#ifdef USE_MULT_BY_RECIPROCAL
+    // We use mult-by-reciprocal trick, but that requires 64b calc.
+    const uint32_t v = (state_ * t.ifreq_) >> RECIPROCAL_PRECISION;
+    const uint32_t offset = t.reverse_map_[state_ - v * t.freq_];
+    state_ = (v << ANS_LOG_TAB_SIZE) + offset;
+#else
+    state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) +
+             t.reverse_map_[state_ % t.freq_];
+#endif
+    return bits;
+  }
+
+  uint32_t GetState() const { return state_; }
+
+ private:
+  uint32_t state_;
+};
+
+// RebalanceHistogram requires a signed type.
+using ANSHistBin = int32_t;
+
+struct EntropyEncodingData {
+  std::vector<std::vector<ANSEncSymbolInfo>> encoding_info;
+  bool use_prefix_code;
+  std::vector<HybridUintConfig> uint_config;
+  LZ77Params lz77;
+};
+
+// Integer to be encoded by an entropy coder, either ANS or Huffman.
+struct Token {
+  Token() {}
+  Token(uint32_t c, uint32_t value)
+      : is_lz77_length(false), context(c), value(value) {}
+  uint32_t is_lz77_length : 1;
+  uint32_t context : 31;
+  uint32_t value;
+};
+
+// Returns an estimate of the number of bits required to encode the given
+// histogram (header bits plus data bits).
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size);
+
+// Apply context clustering, compute histograms and encode them. Returns an
+// estimate of the total bits used for encoding the stream. If `writer` ==
+// nullptr, the bit estimate will not take into account the context map (which
+// does not get written if `num_contexts` == 1).
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+                                size_t num_contexts,
+                                std::vector<std::vector<Token>>& tokens,
+                                EntropyEncodingData* codes,
+                                std::vector<uint8_t>* context_map,
+                                BitWriter* writer, size_t layer,
+                                AuxOut* aux_out);
+
+// Write the tokens to a string.
+void WriteTokens(const std::vector<Token>& tokens,
+                 const EntropyEncodingData& codes,
+                 const std::vector<uint8_t>& context_map, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out);
+
+// Same as above, but assumes allotment created by caller.
+size_t WriteTokens(const std::vector<Token>& tokens,
+                   const EntropyEncodingData& codes,
+                   const std::vector<uint8_t>& context_map, BitWriter* writer);
+
+// Exposed for tests; to be used with Writer=BitWriter only.
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+                       Writer* writer, size_t log_alpha_size);
+extern template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+                                       BitWriter*, size_t);
+
+// Globally set the option to create fuzzer-friendly ANS streams. Negatively
+// impacts compression. Not thread-safe.
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ANS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h b/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h
new file mode 100644
index 0000000000..50ca31dc03
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_PARAMS_H_
+#define LIB_JXL_ENC_ANS_PARAMS_H_
+
+// Encoder-only parameter needed for ANS entropy encoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct HistogramParams {
+  enum class ClusteringType {
+    kFastest,  // Only 4 clusters.
+    kFast,
+    kBest,
+  };
+
+  enum class HybridUintMethod {
+    kNone,        // just use kHybridUint420Config.
+    k000,         // force the fastest option.
+    kFast,        // just try a couple of options.
+    kContextMap,  // fast choice for ctx map.
+    kBest,
+  };
+
+  enum class LZ77Method {
+    kNone,     // do not try lz77.
+    kRLE,      // only try doing RLE.
+    kLZ77,     // try lz77 with backward references.
+    kOptimal,  // optimal-matching LZ77 parsing.
+  };
+
+  enum class ANSHistogramStrategy {
+    kFast,         // Only try some methods, early exit.
+    kApproximate,  // Only try some methods.
+    kPrecise,      // Try all methods.
+  };
+
+  HistogramParams() = default;
+
+  HistogramParams(SpeedTier tier, size_t num_ctx) {
+    if (tier > SpeedTier::kFalcon) {
+      clustering = ClusteringType::kFastest;
+      lz77_method = LZ77Method::kNone;
+    } else if (tier > SpeedTier::kTortoise) {
+      clustering = ClusteringType::kFast;
+    } else {
+      clustering = ClusteringType::kBest;
+    }
+    if (tier > SpeedTier::kTortoise) {
+      uint_method = HybridUintMethod::kNone;
+    }
+    if (tier >= SpeedTier::kSquirrel) {
+      ans_histogram_strategy = ANSHistogramStrategy::kApproximate;
+    }
+  }
+
+  ClusteringType clustering = ClusteringType::kBest;
+  HybridUintMethod uint_method = HybridUintMethod::kBest;
+  LZ77Method lz77_method = LZ77Method::kRLE;
+  ANSHistogramStrategy ans_histogram_strategy = ANSHistogramStrategy::kPrecise;
+  std::vector<size_t> image_widths;
+  size_t max_histograms = ~0;
+  bool force_huffman = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ANS_PARAMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc
new file mode 100644
index 0000000000..9030430e2b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc
@@ -0,0 +1,325 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ar_control_field.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ar_control_field.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state,
+                 const Rect& rect,
+                 ArControlFieldHeuristics::TempImages* temp_image) {
+  constexpr size_t N = kBlockDim;
+  ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+  ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field;
+  JXL_ASSERT(
+      epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks &&
+      epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks);
+
+  if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
+      enc_state->cparams.speed_tier > SpeedTier::kWombat ||
+      enc_state->shared.frame_header.loop_filter.epf_iters == 0) {
+    FillPlane(static_cast<uint8_t>(4), epf_sharpness, rect);
+    return;
+  }
+
+  // Likely better to have a higher X weight, like:
+  // const float kChannelWeights[3] = {47.0f, 4.35f, 0.287f};
+  const float kChannelWeights[3] = {4.35f, 4.35f, 0.287f};
+  const float kChannelWeightsLapNeg[3] = {-0.125f * kChannelWeights[0],
+                                          -0.125f * kChannelWeights[1],
+                                          -0.125f * kChannelWeights[2]};
+  const size_t sharpness_stride =
+      static_cast<size_t>(epf_sharpness->PixelsPerRow());
+
+  size_t by0 = rect.y0();
+  size_t by1 = rect.y0() + rect.ysize();
+  size_t bx0 = rect.x0();
+  size_t bx1 = rect.x0() + rect.xsize();
+  temp_image->InitOnce();
+  ImageF& laplacian_sqrsum = temp_image->laplacian_sqrsum;
+  // Calculate the L2 of the 3x3 Laplacian in an integral transform
+  // (for example 32x32 dct). This relates to transforms ability
+  // to propagate artefacts.
+  size_t y0 = by0 == 0 ? 2 : 0;
+  size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4
+                                               : opsin.ysize() + 2 - by0 * N;
+  size_t x0 = bx0 == 0 ? 2 : 0;
+  size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4
+                                               : opsin.xsize() + 2 - bx0 * N;
+  HWY_FULL(float) df;
+  for (size_t y = y0; y < y1; y++) {
+    float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
+    size_t cy = y + by0 * N - 2;
+    const float* JXL_RESTRICT in_row_t[3];
+    const float* JXL_RESTRICT in_row[3];
+    const float* JXL_RESTRICT in_row_b[3];
+    for (size_t c = 0; c < 3; c++) {
+      in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy);
+      in_row[c] = opsin.PlaneRow(c, cy);
+      in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy);
+    }
+    auto compute_laplacian_scalar = [&](size_t x) {
+      size_t cx = x + bx0 * N - 2;
+      const size_t prevX = cx >= 1 ? cx - 1 : cx;
+      const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx;
+      float sumsqr = 0;
+      for (size_t c = 0; c < 3; c++) {
+        float laplacian =
+            kChannelWeights[c] * in_row[c][cx] +
+            kChannelWeightsLapNeg[c] *
+                (in_row[c][prevX] + in_row[c][nextX] + in_row_b[c][prevX] +
+                 in_row_b[c][cx] + in_row_b[c][nextX] + in_row_t[c][prevX] +
+                 in_row_t[c][cx] + in_row_t[c][nextX]);
+        sumsqr += laplacian * laplacian;
+      }
+      laplacian_sqrsum_row[x] = sumsqr;
+    };
+    size_t x = x0;
+    for (; x + bx0 * N < 3; x++) {
+      compute_laplacian_scalar(x);
+    }
+    // Interior. One extra pixel of border as the last pixel is special.
+    for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize();
+         x += Lanes(df)) {
+      size_t cx = x + bx0 * N - 2;
+      auto sumsqr = Zero(df);
+      for (size_t c = 0; c < 3; c++) {
+        auto laplacian =
+            Mul(LoadU(df, in_row[c] + cx), Set(df, kChannelWeights[c]));
+        auto sum_oth0 = LoadU(df, in_row[c] + cx - 1);
+        auto sum_oth1 = LoadU(df, in_row[c] + cx + 1);
+        auto sum_oth2 = LoadU(df, in_row_t[c] + cx - 1);
+        auto sum_oth3 = LoadU(df, in_row_t[c] + cx);
+        sum_oth0 = Add(sum_oth0, LoadU(df, in_row_t[c] + cx + 1));
+        sum_oth1 = Add(sum_oth1, LoadU(df, in_row_b[c] + cx - 1));
+        sum_oth2 = Add(sum_oth2, LoadU(df, in_row_b[c] + cx));
+        sum_oth3 = Add(sum_oth3, LoadU(df, in_row_b[c] + cx + 1));
+        sum_oth0 = Add(sum_oth0, sum_oth1);
+        sum_oth2 = Add(sum_oth2, sum_oth3);
+        sum_oth0 = Add(sum_oth0, sum_oth2);
+        laplacian =
+            MulAdd(Set(df, kChannelWeightsLapNeg[c]), sum_oth0, laplacian);
+        sumsqr = MulAdd(laplacian, laplacian, sumsqr);
+      }
+      StoreU(sumsqr, df, laplacian_sqrsum_row + x);
+    }
+    for (; x < x1; x++) {
+      compute_laplacian_scalar(x);
+    }
+  }
+  HWY_CAPPED(float, 4) df4;
+  // Calculate the L2 of the 3x3 Laplacian in 4x4 blocks within the area
+  // of the integral transform. Sample them within the integral transform
+  // with two offsets (0,0) and (-2, -2) pixels (sqrsum_00 and sqrsum_22,
+  //  respectively).
+  ImageF& sqrsum_00 = temp_image->sqrsum_00;
+  size_t sqrsum_00_stride = sqrsum_00.PixelsPerRow();
+  float* JXL_RESTRICT sqrsum_00_row = sqrsum_00.Row(0);
+  for (size_t y = 0; y < (by1 - by0) * 2; y++) {
+    const float* JXL_RESTRICT rows_in[4];
+    for (size_t iy = 0; iy < 4; iy++) {
+      rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy + 2);
+    }
+    float* JXL_RESTRICT row_out = sqrsum_00_row + y * sqrsum_00_stride;
+    for (size_t x = 0; x < (bx1 - bx0) * 2; x++) {
+      auto sum = Zero(df4);
+      for (size_t iy = 0; iy < 4; iy++) {
+        for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+          sum = Add(sum, LoadU(df4, rows_in[iy] + x * 4 + ix + 2));
+        }
+      }
+      row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f);
+    }
+  }
+  // Indexing iy and ix is a bit tricky as we include a 2 pixel border
+  // around the block for evenness calculations. This is similar to what
+  // we did in guetzli for the observability of artefacts, except there
+  // the element is a sliding 5x5, not sparsely sampled 4x4 box like here.
+  ImageF& sqrsum_22 = temp_image->sqrsum_22;
+  size_t sqrsum_22_stride = sqrsum_22.PixelsPerRow();
+  float* JXL_RESTRICT sqrsum_22_row = sqrsum_22.Row(0);
+  for (size_t y = 0; y < (by1 - by0) * 2 + 1; y++) {
+    const float* JXL_RESTRICT rows_in[4];
+    for (size_t iy = 0; iy < 4; iy++) {
+      rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy);
+    }
+    float* JXL_RESTRICT row_out = sqrsum_22_row + y * sqrsum_22_stride;
+    // ignore pixels outside the image.
+    // Y coordinates are relative to by0*8+y*4.
+    size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
+    size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2
+                    ? 4
+                    : opsin.ysize() - y * 4 - by0 * 8 + 2;
+    for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
+      // ignore pixels outside the image.
+      // X coordinates are relative to bx0*8.
+      size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
+      size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2
+                      ? x * 4 + 4
+                      : opsin.xsize() - bx0 * 8 + 2;
+      if (ex - sx == 4 && ey - sy == 4) {
+        auto sum = Zero(df4);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+            sum = Add(sum, Load(df4, rows_in[iy] + sx + ix));
+          }
+        }
+        row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f);
+      } else {
+        float sum = 0;
+        for (size_t iy = sy; iy < ey; iy++) {
+          for (size_t ix = sx; ix < ex; ix++) {
+            sum += rows_in[iy][ix];
+          }
+        }
+        row_out[x] = std::sqrt(sum / ((ex - sx) * (ey - sy)));
+      }
+    }
+  }
+  for (size_t by = by0; by < by1; by++) {
+    AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by);
+    uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
+    float* JXL_RESTRICT quant_row = quant->Row(by);
+    for (size_t bx = bx0; bx < bx1; bx++) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      // The errors are going to be linear to the quantization value in this
+      // locality. We only have access to the initial quant field here.
+      float quant_val = 1.0f / quant_row[bx];
+
+      const auto sq00 = [&](size_t y, size_t x) {
+        return sqrsum_00_row[((by - by0) * 2 + y) * sqrsum_00_stride +
+                             (bx - bx0) * 2 + x];
+      };
+      const auto sq22 = [&](size_t y, size_t x) {
+        return sqrsum_22_row[((by - by0) * 2 + y) * sqrsum_22_stride +
+                             (bx - bx0) * 2 + x];
+      };
+      float sqrsum_integral_transform = 0;
+      for (size_t iy = 0; iy < acs.covered_blocks_y() * 2; iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x() * 2; ix++) {
+          sqrsum_integral_transform += sq00(iy, ix) * sq00(iy, ix);
+        }
+      }
+      sqrsum_integral_transform /=
+          4 * acs.covered_blocks_x() * acs.covered_blocks_y();
+      sqrsum_integral_transform = std::sqrt(sqrsum_integral_transform);
+      // If masking is high or amplitude of the artefacts is low, then no
+      // smoothing is needed.
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          // Five 4x4 blocks for masking estimation, all within the
+          // 8x8 area.
+          float minval_1 = std::min(sq00(2 * iy + 0, 2 * ix + 0),
+                                    sq00(2 * iy + 0, 2 * ix + 1));
+          float minval_2 = std::min(sq00(2 * iy + 1, 2 * ix + 0),
+                                    sq00(2 * iy + 1, 2 * ix + 1));
+          float minval = std::min(minval_1, minval_2);
+          minval = std::min(minval, sq22(2 * iy + 1, 2 * ix + 1));
+          // Nine more 4x4 blocks for masking estimation, includes
+          // the 2 pixel area around the 8x8 block being controlled.
+          float minval2_1 = std::min(sq22(2 * iy + 0, 2 * ix + 0),
+                                     sq22(2 * iy + 0, 2 * ix + 1));
+          float minval2_2 = std::min(sq22(2 * iy + 0, 2 * ix + 2),
+                                     sq22(2 * iy + 1, 2 * ix + 0));
+          float minval2_3 = std::min(sq22(2 * iy + 1, 2 * ix + 1),
+                                     sq22(2 * iy + 1, 2 * ix + 2));
+          float minval2_4 = std::min(sq22(2 * iy + 2, 2 * ix + 0),
+                                     sq22(2 * iy + 2, 2 * ix + 1));
+          float minval2_5 = std::min(minval2_1, minval2_2);
+          float minval2_6 = std::min(minval2_3, minval2_4);
+          float minval2 = std::min(minval2_5, minval2_6);
+          minval2 = std::min(minval2, sq22(2 * iy + 2, 2 * ix + 2));
+          float minval3 = std::min(minval, minval2);
+          minval *= 0.125f;
+          minval += 0.625f * minval3;
+          minval +=
+              0.125f * std::min(1.5f * minval3, sq22(2 * iy + 1, 2 * ix + 1));
+          minval += 0.125f * minval2;
+          // Larger kBias, less smoothing for low intensity changes.
+          float kDeltaLimit = 3.2;
+          float bias = 0.0625f * quant_val;
+          float delta =
+              (sqrsum_integral_transform + (kDeltaLimit + 0.05) * bias) /
+              (minval + bias);
+          int out = 4;
+          if (delta > kDeltaLimit) {
+            out = 4;  // smooth
+          } else {
+            out = 0;
+          }
+          // 'threshold' is separate from 'bias' for easier tuning of these
+          // heuristics.
+          float threshold = 0.0625f * quant_val;
+          const float kSmoothLimit = 0.085f;
+          float smooth = 0.20f * (sq00(2 * iy + 0, 2 * ix + 0) +
+                                  sq00(2 * iy + 0, 2 * ix + 1) +
+                                  sq00(2 * iy + 1, 2 * ix + 0) +
+                                  sq00(2 * iy + 1, 2 * ix + 1) + minval);
+          if (smooth < kSmoothLimit * threshold) {
+            out = 4;
+          }
+          out_row[bx + sharpness_stride * iy + ix] = out;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessTile);
+
+void ArControlFieldHeuristics::RunRect(const Rect& block_rect,
+                                       const Image3F& opsin,
+                                       PassesEncoderState* enc_state,
+                                       size_t thread) {
+  HWY_DYNAMIC_DISPATCH(ProcessTile)
+  (opsin, enc_state, block_rect, &temp_images[thread]);
+}
+
+}  // namespace jxl
+
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h
new file mode 100644
index 0000000000..aabe71f46f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+#define LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct PassesEncoderState;
+
+struct ArControlFieldHeuristics {
+  struct TempImages {
+    void InitOnce() {
+      if (laplacian_sqrsum.xsize() != 0) return;
+      laplacian_sqrsum = ImageF(kEncTileDim + 4, kEncTileDim + 4);
+      sqrsum_00 = ImageF(kEncTileDim / 4, kEncTileDim / 4);
+      sqrsum_22 = ImageF(kEncTileDim / 4 + 1, kEncTileDim / 4 + 1);
+    }
+
+    ImageF laplacian_sqrsum;
+    ImageF sqrsum_00;
+    ImageF sqrsum_22;
+  };
+
+  void PrepareForThreads(size_t num_threads) {
+    temp_images.resize(num_threads);
+  }
+
+  void RunRect(const Rect& block_rect, const Image3F& opsin,
+               PassesEncoderState* enc_state, size_t thread);
+
+  std::vector<TempImages> temp_images;
+  ImageB* epf_sharpness;
+  ImageF* quant;
+  bool all_default;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AR_ENC_CONTROL_FIELD_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc
new file mode 100644
index 0000000000..5d784c43f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc
@@ -0,0 +1,127 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_aux_out.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <numeric>  // accumulate
+#include <sstream>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+const char* LayerName(size_t layer) {
+  switch (layer) {
+    case kLayerHeader:
+      return "Headers";
+    case kLayerTOC:
+      return "TOC";
+    case kLayerDictionary:
+      return "Patches";
+    case kLayerSplines:
+      return "Splines";
+    case kLayerNoise:
+      return "Noise";
+    case kLayerQuant:
+      return "Quantizer";
+    case kLayerModularTree:
+      return "ModularTree";
+    case kLayerModularGlobal:
+      return "ModularGlobal";
+    case kLayerDC:
+      return "DC";
+    case kLayerModularDcGroup:
+      return "ModularDcGroup";
+    case kLayerControlFields:
+      return "ControlFields";
+    case kLayerOrder:
+      return "CoeffOrder";
+    case kLayerAC:
+      return "ACHistograms";
+    case kLayerACTokens:
+      return "ACTokens";
+    case kLayerModularAcGroup:
+      return "ModularAcGroup";
+    default:
+      JXL_UNREACHABLE("Invalid layer %d\n", static_cast<int>(layer));
+  }
+}
+
+void AuxOut::LayerTotals::Print(size_t num_inputs) const {
+  if (JXL_DEBUG_V_LEVEL > 0) {
+    printf("%10" PRId64, static_cast<int64_t>(total_bits));
+    if (histogram_bits != 0) {
+      printf("   [c/i:%6.2f | hst:%8" PRId64 " | ex:%8" PRId64
+             " | h+c+e:%12.3f",
+             num_clustered_histograms * 1.0 / num_inputs,
+             static_cast<int64_t>(histogram_bits >> 3),
+             static_cast<int64_t>(extra_bits >> 3),
+             (histogram_bits + clustered_entropy + extra_bits) / 8.0);
+      printf("]");
+    }
+    printf("\n");
+  }
+}
+
+void AuxOut::Assimilate(const AuxOut& victim) {
+  for (size_t i = 0; i < layers.size(); ++i) {
+    layers[i].Assimilate(victim.layers[i]);
+  }
+  num_blocks += victim.num_blocks;
+  num_small_blocks += victim.num_small_blocks;
+  num_dct4x8_blocks += victim.num_dct4x8_blocks;
+  num_afv_blocks += victim.num_afv_blocks;
+  num_dct8_blocks += victim.num_dct8_blocks;
+  num_dct8x16_blocks += victim.num_dct8x16_blocks;
+  num_dct8x32_blocks += victim.num_dct8x32_blocks;
+  num_dct16_blocks += victim.num_dct16_blocks;
+  num_dct16x32_blocks += victim.num_dct16x32_blocks;
+  num_dct32_blocks += victim.num_dct32_blocks;
+  num_dct32x64_blocks += victim.num_dct32x64_blocks;
+  num_dct64_blocks += victim.num_dct64_blocks;
+  num_butteraugli_iters += victim.num_butteraugli_iters;
+}
+
+void AuxOut::Print(size_t num_inputs) const {
+  if (JXL_DEBUG_V_LEVEL > 0) {
+    if (num_inputs == 0) return;
+
+    LayerTotals all_layers;
+    for (size_t i = 0; i < layers.size(); ++i) {
+      all_layers.Assimilate(layers[i]);
+    }
+
+    printf("Average butteraugli iters: %10.2f\n",
+           num_butteraugli_iters * 1.0 / num_inputs);
+
+    for (size_t i = 0; i < layers.size(); ++i) {
+      if (layers[i].total_bits != 0) {
+        printf("Total layer bits %-10s\t", LayerName(i));
+        printf("%10f%%", 100.0 * layers[i].total_bits / all_layers.total_bits);
+        layers[i].Print(num_inputs);
+      }
+    }
+    printf("Total image size           ");
+    all_layers.Print(num_inputs);
+
+    size_t total_blocks = 0;
+    size_t total_positions = 0;
+    if (total_blocks != 0 && total_positions != 0) {
+      printf("\n\t\t  Blocks\t\tPositions\t\t\tBlocks/Position\n");
+      printf(" Total:\t\t    %7" PRIuS "\t\t     %7" PRIuS " \t\t\t%10f%%\n\n",
+             total_blocks, total_positions,
+             100.0 * total_blocks / total_positions);
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h
new file mode 100644
index 0000000000..545711af83
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AUX_OUT_H_
+#define LIB_JXL_AUX_OUT_H_
+
+// Optional output information for debugging and analyzing size usage.
+
+#include <stddef.h>
+
+#include <array>
+#include <functional>
+#include <string>
+
+namespace jxl {
+
+struct ColorEncoding;
+
+// For LayerName and AuxOut::layers[] index. Order does not matter.
+enum {
+  kLayerHeader = 0,
+  kLayerTOC,
+  kLayerDictionary,
+  kLayerSplines,
+  kLayerNoise,
+  kLayerQuant,
+  kLayerModularTree,
+  kLayerModularGlobal,
+  kLayerDC,
+  kLayerModularDcGroup,
+  kLayerControlFields,
+  kLayerOrder,
+  kLayerAC,
+  kLayerACTokens,
+  kLayerModularAcGroup,
+  kNumImageLayers
+};
+
+const char* LayerName(size_t layer);
+
+// Statistics gathered during compression or decompression.
+struct AuxOut {
+ private:
+  struct LayerTotals {
+    void Assimilate(const LayerTotals& victim) {
+      num_clustered_histograms += victim.num_clustered_histograms;
+      histogram_bits += victim.histogram_bits;
+      extra_bits += victim.extra_bits;
+      total_bits += victim.total_bits;
+      clustered_entropy += victim.clustered_entropy;
+    }
+    void Print(size_t num_inputs) const;
+
+    size_t num_clustered_histograms = 0;
+    size_t extra_bits = 0;
+
+    // Set via BitsWritten below
+    size_t histogram_bits = 0;
+    size_t total_bits = 0;
+
+    double clustered_entropy = 0.0;
+  };
+
+ public:
+  AuxOut() = default;
+  AuxOut(const AuxOut&) = default;
+
+  void Assimilate(const AuxOut& victim);
+
+  void Print(size_t num_inputs) const;
+
+  size_t TotalBits() const {
+    size_t total = 0;
+    for (const auto& layer : layers) {
+      total += layer.total_bits;
+    }
+    return total;
+  }
+
+  std::array<LayerTotals, kNumImageLayers> layers;
+  size_t num_blocks = 0;
+
+  // Number of blocks that use larger DCT (set by ac_strategy).
+  size_t num_small_blocks = 0;
+  size_t num_dct4x8_blocks = 0;
+  size_t num_afv_blocks = 0;
+  size_t num_dct8_blocks = 0;
+  size_t num_dct8x16_blocks = 0;
+  size_t num_dct8x32_blocks = 0;
+  size_t num_dct16_blocks = 0;
+  size_t num_dct16x32_blocks = 0;
+  size_t num_dct32_blocks = 0;
+  size_t num_dct32x64_blocks = 0;
+  size_t num_dct64_blocks = 0;
+
+  int num_butteraugli_iters = 0;
+};
+}  // namespace jxl
+
+#endif  // LIB_JXL_AUX_OUT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc
new file mode 100644
index 0000000000..662aaa5416
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc
@@ -0,0 +1,201 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_bit_writer.h"
+
+#include <string.h>  // memcpy
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_aux_out.h"
+
+namespace jxl {
+
+BitWriter::Allotment::Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits)
+    : max_bits_(max_bits) {
+  if (writer == nullptr) return;
+  prev_bits_written_ = writer->BitsWritten();
+  const size_t prev_bytes = writer->storage_.size();
+  const size_t next_bytes = DivCeil(max_bits, kBitsPerByte);
+  writer->storage_.resize(prev_bytes + next_bytes);
+  parent_ = writer->current_allotment_;
+  writer->current_allotment_ = this;
+}
+
+BitWriter::Allotment::~Allotment() {
+  if (!called_) {
+    // Not calling is a bug - unused storage will not be reclaimed.
+    JXL_UNREACHABLE("Did not call Allotment::ReclaimUnused");
+  }
+}
+
+void BitWriter::Allotment::FinishedHistogram(BitWriter* JXL_RESTRICT writer) {
+  if (writer == nullptr) return;
+  JXL_ASSERT(!called_);              // Call before ReclaimUnused
+  JXL_ASSERT(histogram_bits_ == 0);  // Do not call twice
+  JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+  histogram_bits_ = writer->BitsWritten() - prev_bits_written_;
+}
+
+void BitWriter::Allotment::ReclaimAndCharge(BitWriter* JXL_RESTRICT writer,
+                                            size_t layer,
+                                            AuxOut* JXL_RESTRICT aux_out) {
+  size_t used_bits, unused_bits;
+  PrivateReclaim(writer, &used_bits, &unused_bits);
+
+#if 0
+  printf("Layer %s bits: max %" PRIuS " used %" PRIuS " unused %" PRIuS "\n",
+         LayerName(layer), MaxBits(), used_bits, unused_bits);
+#endif
+
+  // This may be a nested call with aux_out == null. Whenever we know that
+  // aux_out is null, we can call ReclaimUnused directly.
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].total_bits += used_bits;
+    aux_out->layers[layer].histogram_bits += HistogramBits();
+  }
+}
+
+void BitWriter::Allotment::PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+                                          size_t* JXL_RESTRICT used_bits,
+                                          size_t* JXL_RESTRICT unused_bits) {
+  JXL_ASSERT(!called_);  // Do not call twice
+  called_ = true;
+  if (writer == nullptr) return;
+
+  JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+  *used_bits = writer->BitsWritten() - prev_bits_written_;
+  JXL_ASSERT(*used_bits <= max_bits_);
+  *unused_bits = max_bits_ - *used_bits;
+
+  // Reclaim unused bytes whole bytes from writer's allotment.
+  const size_t unused_bytes = *unused_bits / kBitsPerByte;  // truncate
+  JXL_ASSERT(writer->storage_.size() >= unused_bytes);
+  writer->storage_.resize(writer->storage_.size() - unused_bytes);
+  writer->current_allotment_ = parent_;
+  // Ensure we don't also charge the parent for these bits.
+  auto parent = parent_;
+  while (parent != nullptr) {
+    parent->prev_bits_written_ += *used_bits;
+    parent = parent->parent_;
+  }
+}
+
+void BitWriter::AppendByteAligned(const Span<const uint8_t>& span) {
+  if (span.empty()) return;
+  storage_.resize(storage_.size() + span.size() + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  memcpy(storage_.data() + pos, span.data(), span.size());
+  pos += span.size();
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += span.size() * kBitsPerByte;
+}
+
+void BitWriter::AppendByteAligned(const BitWriter& other) {
+  JXL_ASSERT(other.BitsWritten() % kBitsPerByte == 0);
+  JXL_ASSERT(other.BitsWritten() / kBitsPerByte != 0);
+
+  AppendByteAligned(other.GetSpan());
+}
+
+void BitWriter::AppendByteAligned(const std::vector<BitWriter>& others) {
+  // Total size to add so we can preallocate
+  size_t other_bytes = 0;
+  for (const BitWriter& writer : others) {
+    JXL_ASSERT(writer.BitsWritten() % kBitsPerByte == 0);
+    other_bytes += writer.BitsWritten() / kBitsPerByte;
+  }
+  if (other_bytes == 0) {
+    // No bytes to append: this happens for example when creating per-group
+    // storage for groups, but not writing anything in them for e.g. lossless
+    // images with no alpha. Do nothing.
+    return;
+  }
+  storage_.resize(storage_.size() + other_bytes + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  for (const BitWriter& writer : others) {
+    const Span<const uint8_t> span = writer.GetSpan();
+    if (!span.empty()) {
+      memcpy(storage_.data() + pos, span.data(), span.size());
+      pos += span.size();
+    }
+  }
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += other_bytes * kBitsPerByte;
+}
+
+// TODO(lode): avoid code duplication
+void BitWriter::AppendByteAligned(
+    const std::vector<std::unique_ptr<BitWriter>>& others) {
+  // Total size to add so we can preallocate
+  size_t other_bytes = 0;
+  for (const auto& writer : others) {
+    JXL_ASSERT(writer->BitsWritten() % kBitsPerByte == 0);
+    other_bytes += writer->BitsWritten() / kBitsPerByte;
+  }
+  if (other_bytes == 0) {
+    // No bytes to append: this happens for example when creating per-group
+    // storage for groups, but not writing anything in them for e.g. lossless
+    // images with no alpha. Do nothing.
+    return;
+  }
+  storage_.resize(storage_.size() + other_bytes + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  for (const auto& writer : others) {
+    const Span<const uint8_t> span = writer->GetSpan();
+    memcpy(storage_.data() + pos, span.data(), span.size());
+    pos += span.size();
+  }
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += other_bytes * kBitsPerByte;
+}
+
+// Example: let's assume that 3 bits (Rs below) have been written already:
+// BYTE+0       BYTE+1       BYTE+2
+// 0000 0RRR    ???? ????    ???? ????
+//
+// Now, we could write up to 5 bits by just shifting them left by 3 bits and
+// OR'ing to BYTE-0.
+//
+// For n > 5 bits, we write the lowest 5 bits as above, then write the next
+// lowest bits into BYTE+1 starting from its lower bits and so on.
+void BitWriter::Write(size_t n_bits, uint64_t bits) {
+  JXL_DASSERT((bits >> n_bits) == 0);
+  JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+  uint8_t* p = &storage_[bits_written_ / kBitsPerByte];
+  const size_t bits_in_first_byte = bits_written_ % kBitsPerByte;
+  bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t v = *p;
+  // Last (partial) or next byte to write must be zero-initialized!
+  // PaddedBytes initializes the first, and Write/Append maintain this.
+  JXL_DASSERT(v >> bits_in_first_byte == 0);
+  v |= bits;
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+#else
+  *p++ |= static_cast<uint8_t>(bits & 0xFF);
+  for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *p++ = static_cast<uint8_t>(bits & 0xFF);
+  }
+  *p = 0;
+#endif
+  bits_written_ += n_bits;
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h
new file mode 100644
index 0000000000..d3fac15a68
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BIT_WRITER_H_
+#define LIB_JXL_ENC_BIT_WRITER_H_
+
+// BitWriter class: unbuffered writes using unaligned 64-bit stores.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+struct BitWriter {
+  // Upper bound on `n_bits` in each call to Write. We shift a 64-bit word by
+  // 7 bits (max already valid bits in the last byte) and at least 1 bit is
+  // needed to zero-initialize the bit-stream ahead (i.e. if 7 bits are valid
+  // and we write 57 bits, then the next write will access a byte that was not
+  // yet zero-initialized).
+  static constexpr size_t kMaxBitsPerCall = 56;
+
+  BitWriter() : bits_written_(0) {}
+
+  // Disallow copying - may lead to bugs.
+  BitWriter(const BitWriter&) = delete;
+  BitWriter& operator=(const BitWriter&) = delete;
+  BitWriter(BitWriter&&) = default;
+  BitWriter& operator=(BitWriter&&) = default;
+
+  size_t BitsWritten() const { return bits_written_; }
+
+  Span<const uint8_t> GetSpan() const {
+    // Callers must ensure byte alignment to avoid uninitialized bits.
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+    return Span<const uint8_t>(storage_.data(), bits_written_ / kBitsPerByte);
+  }
+
+  // Example usage: bytes = std::move(writer).TakeBytes(); Useful for the
+  // top-level encoder which returns PaddedBytes, not a BitWriter.
+  // *this must be an rvalue reference and is invalid afterwards.
+  PaddedBytes&& TakeBytes() && {
+    // Callers must ensure byte alignment to avoid uninitialized bits.
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+    storage_.resize(bits_written_ / kBitsPerByte);
+    return std::move(storage_);
+  }
+
+ private:
+  // Must be byte-aligned before calling.
+  void AppendByteAligned(const Span<const uint8_t>& span);
+
+ public:
+  // NOTE: no allotment needed, the other BitWriters have already been charged.
+  void AppendByteAligned(const BitWriter& other);
+  void AppendByteAligned(const std::vector<std::unique_ptr<BitWriter>>& others);
+  void AppendByteAligned(const std::vector<BitWriter>& others);
+
+  class Allotment {
+   public:
+    // Expands a BitWriter's storage. Must happen before calling Write or
+    // ZeroPadToByte. Must call ReclaimUnused after writing to reclaim the
+    // unused storage so that BitWriter memory use remains tightly bounded.
+    Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits);
+    ~Allotment();
+
+    size_t MaxBits() const { return max_bits_; }
+
+    // Call after writing a histogram, but before ReclaimUnused.
+    void FinishedHistogram(BitWriter* JXL_RESTRICT writer);
+
+    size_t HistogramBits() const {
+      JXL_ASSERT(called_);
+      return histogram_bits_;
+    }
+
+    void ReclaimAndCharge(BitWriter* JXL_RESTRICT writer, size_t layer,
+                          AuxOut* JXL_RESTRICT aux_out);
+
+   private:
+    void PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+                        size_t* JXL_RESTRICT used_bits,
+                        size_t* JXL_RESTRICT unused_bits);
+
+    size_t prev_bits_written_;
+    const size_t max_bits_;
+    size_t histogram_bits_ = 0;
+    bool called_ = false;
+    Allotment* parent_;
+  };
+
+  // Writes bits into bytes in increasing addresses, and within a byte
+  // least-significant-bit first.
+  //
+  // The function can write up to 56 bits in one go.
+  void Write(size_t n_bits, uint64_t bits);
+
+  // This should only rarely be used - e.g. when the current location will be
+  // referenced via byte offset (TOCs point to groups), or byte-aligned reading
+  // is required for speed.
+  void ZeroPadToByte() {
+    const size_t remainder_bits =
+        RoundUpBitsToByteMultiple(bits_written_) - bits_written_;
+    if (remainder_bits == 0) return;
+    Write(remainder_bits, 0);
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+  }
+
+ private:
+  size_t bits_written_;
+  PaddedBytes storage_;
+  Allotment* current_allotment_ = nullptr;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_BIT_WRITER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc
new file mode 100644
index 0000000000..d378fd2e23
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc
@@ -0,0 +1,100 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_butteraugli_comparator.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_image_bundle.h"
+
+namespace jxl {
+
+JxlButteraugliComparator::JxlButteraugliComparator(
+    const ButteraugliParams& params, const JxlCmsInterface& cms)
+    : params_(params), cms_(cms) {}
+
+Status JxlButteraugliComparator::SetReferenceImage(const ImageBundle& ref) {
+  const ImageBundle* ref_linear_srgb;
+  ImageMetadata metadata = *ref.metadata();
+  ImageBundle store(&metadata);
+  if (!TransformIfNeeded(ref, ColorEncoding::LinearSRGB(ref.IsGray()), cms_,
+                         /*pool=*/nullptr, &store, &ref_linear_srgb)) {
+    return false;
+  }
+
+  comparator_.reset(
+      new ButteraugliComparator(ref_linear_srgb->color(), params_));
+  xsize_ = ref.xsize();
+  ysize_ = ref.ysize();
+  return true;
+}
+
+Status JxlButteraugliComparator::CompareWith(const ImageBundle& actual,
+                                             ImageF* diffmap, float* score) {
+  if (!comparator_) {
+    return JXL_FAILURE("Must set reference image first");
+  }
+  if (xsize_ != actual.xsize() || ysize_ != actual.ysize()) {
+    return JXL_FAILURE("Images must have same size");
+  }
+
+  const ImageBundle* actual_linear_srgb;
+  ImageMetadata metadata = *actual.metadata();
+  ImageBundle store(&metadata);
+  if (!TransformIfNeeded(actual, ColorEncoding::LinearSRGB(actual.IsGray()),
+                         cms_,
+                         /*pool=*/nullptr, &store, &actual_linear_srgb)) {
+    return false;
+  }
+
+  ImageF temp_diffmap(xsize_, ysize_);
+  comparator_->Diffmap(actual_linear_srgb->color(), temp_diffmap);
+
+  if (score != nullptr) {
+    *score = ButteraugliScoreFromDiffmap(temp_diffmap, &params_);
+  }
+  if (diffmap != nullptr) {
+    diffmap->Swap(temp_diffmap);
+  }
+
+  return true;
+}
+
+float JxlButteraugliComparator::GoodQualityScore() const {
+  return ButteraugliFuzzyInverse(1.5);
+}
+
+float JxlButteraugliComparator::BadQualityScore() const {
+  return ButteraugliFuzzyInverse(0.5);
+}
+
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                          const ButteraugliParams& params,
+                          const JxlCmsInterface& cms, ImageF* distmap,
+                          ThreadPool* pool, bool ignore_alpha) {
+  JxlButteraugliComparator comparator(params, cms);
+  return ComputeScore(rgb0, rgb1, &comparator, cms, distmap, pool,
+                      ignore_alpha);
+}
+
+float ButteraugliDistance(const std::vector<ImageBundle>& frames0,
+                          const std::vector<ImageBundle>& frames1,
+                          const ButteraugliParams& params,
+                          const JxlCmsInterface& cms, ImageF* distmap,
+                          ThreadPool* pool) {
+  JxlButteraugliComparator comparator(params, cms);
+  JXL_ASSERT(frames0.size() == frames1.size());
+  float max_dist = 0.0f;
+  for (size_t i = 0; i < frames0.size(); ++i) {
+    max_dist = std::max(
+        max_dist,
+        ComputeScore(frames0[i], frames1[i], &comparator, cms, distmap, pool));
+  }
+  return max_dist;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h
new file mode 100644
index 0000000000..28d9faa2b7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+#define LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+
+#include <memory>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/enc_comparator.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class JxlButteraugliComparator : public Comparator {
+ public:
+  explicit JxlButteraugliComparator(const ButteraugliParams& params,
+                                    const JxlCmsInterface& cms);
+
+  Status SetReferenceImage(const ImageBundle& ref) override;
+
+  Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+                     float* score) override;
+
+  float GoodQualityScore() const override;
+  float BadQualityScore() const override;
+
+ private:
+  ButteraugliParams params_;
+  JxlCmsInterface cms_;
+  std::unique_ptr<ButteraugliComparator> comparator_;
+  size_t xsize_ = 0;
+  size_t ysize_ = 0;
+};
+
+// Returns the butteraugli distance between rgb0 and rgb1.
+// If distmap is not null, it must be the same size as rgb0 and rgb1.
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                          const ButteraugliParams& params,
+                          const JxlCmsInterface& cms, ImageF* distmap = nullptr,
+                          ThreadPool* pool = nullptr,
+                          bool ignore_alpha = false);
+
+float ButteraugliDistance(const std::vector<ImageBundle>& frames0,
+                          const std::vector<ImageBundle>& frames1,
+                          const ButteraugliParams& params,
+                          const JxlCmsInterface& cms, ImageF* distmap = nullptr,
+                          ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc b/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc
new file mode 100644
index 0000000000..97d88e0fca
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc
@@ -0,0 +1,208 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cache.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms,
+                               ThreadPool* pool, PassesEncoderState* enc_state,
+                               ModularFrameEncoder* modular_frame_encoder,
+                               AuxOut* aux_out) {
+  PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
+
+  enc_state->histogram_idx.resize(shared.frame_dim.num_groups);
+
+  enc_state->x_qm_multiplier =
+      std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f);
+  enc_state->b_qm_multiplier =
+      std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f);
+
+  if (enc_state->coeffs.size() < shared.frame_header.passes.num_passes) {
+    enc_state->coeffs.reserve(shared.frame_header.passes.num_passes);
+    for (size_t i = enc_state->coeffs.size();
+         i < shared.frame_header.passes.num_passes; i++) {
+      // Allocate enough coefficients for each group on every row.
+      enc_state->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+          kGroupDim * kGroupDim, shared.frame_dim.num_groups));
+    }
+  }
+  while (enc_state->coeffs.size() > shared.frame_header.passes.num_passes) {
+    enc_state->coeffs.pop_back();
+  }
+
+  float scale =
+      shared.quantizer.ScaleGlobalScale(enc_state->cparams.quant_ac_rescale);
+  DequantMatricesScaleDC(&shared.matrices, scale);
+  shared.quantizer.RecomputeFromGlobalScale();
+
+  Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, shared.frame_dim.num_groups, ThreadPool::NoInit,
+      [&](size_t group_idx, size_t _) {
+        ComputeCoefficients(group_idx, enc_state, opsin, &dc);
+      },
+      "Compute coeffs"));
+
+  if (shared.frame_header.flags & FrameHeader::kUseDcFrame) {
+    CompressParams cparams = enc_state->cparams;
+    cparams.dots = Override::kOff;
+    cparams.noise = Override::kOff;
+    cparams.patches = Override::kOff;
+    cparams.gaborish = Override::kOff;
+    cparams.epf = 0;
+    cparams.resampling = 1;
+    cparams.ec_resampling = 1;
+    // The DC frame will have alpha=0. Don't erase its contents.
+    cparams.keep_invisible = Override::kOn;
+    JXL_ASSERT(cparams.progressive_dc > 0);
+    cparams.progressive_dc--;
+    // Use kVarDCT in max_error_mode for intermediate progressive DC,
+    // and kModular for the smallest DC (first in the bitstream)
+    if (cparams.progressive_dc == 0) {
+      cparams.modular_mode = true;
+      cparams.speed_tier =
+          SpeedTier(std::max(static_cast<int>(SpeedTier::kTortoise),
+                             static_cast<int>(cparams.speed_tier) - 1));
+      cparams.butteraugli_distance =
+          std::max(kMinButteraugliDistance,
+                   enc_state->cparams.butteraugli_distance * 0.02f);
+    } else {
+      cparams.max_error_mode = true;
+      for (size_t c = 0; c < 3; c++) {
+        cparams.max_error[c] = shared.quantizer.MulDC()[c];
+      }
+      // Guess a distance that produces good initial results.
+      cparams.butteraugli_distance =
+          std::max(kMinButteraugliDistance,
+                   enc_state->cparams.butteraugli_distance * 0.1f);
+    }
+    ImageBundle ib(&shared.metadata->m);
+    // This is a lie - dc is in XYB
+    // (but EncodeFrame will skip RGB->XYB conversion anyway)
+    ib.SetFromImage(
+        std::move(dc),
+        ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray()));
+    if (!ib.metadata()->extra_channel_info.empty()) {
+      // Add dummy extra channels to the patch image: dc_level frames do not yet
+      // support extra channels, but the codec expects that the amount of extra
+      // channels in frames matches that in the metadata of the codestream.
+      std::vector<ImageF> extra_channels;
+      extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+      for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+        extra_channels.emplace_back(ib.xsize(), ib.ysize());
+        // Must initialize the image with data to not affect blending with
+        // uninitialized memory.
+        // TODO(lode): dc_level must copy and use the real extra channels
+        // instead.
+        ZeroFillImage(&extra_channels.back());
+      }
+      ib.SetExtraChannels(std::move(extra_channels));
+    }
+    std::unique_ptr<PassesEncoderState> state =
+        jxl::make_unique<PassesEncoderState>();
+
+    auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+    FrameInfo dc_frame_info;
+    dc_frame_info.frame_type = FrameType::kDCFrame;
+    dc_frame_info.dc_level = shared.frame_header.dc_level + 1;
+    dc_frame_info.ib_needs_color_transform = false;
+    dc_frame_info.save_before_color_transform = true;  // Implicitly true
+    AuxOut dc_aux_out;
+    JXL_CHECK(EncodeFrame(cparams, dc_frame_info, shared.metadata, ib,
+                          state.get(), cms, pool, special_frame.get(),
+                          aux_out ? &dc_aux_out : nullptr));
+    if (aux_out) {
+      for (const auto& l : dc_aux_out.layers) {
+        aux_out->layers[kLayerDC].Assimilate(l);
+      }
+    }
+    const Span<const uint8_t> encoded = special_frame->GetSpan();
+    enc_state->special_frames.emplace_back(std::move(special_frame));
+
+    ImageBundle decoded(&shared.metadata->m);
+    std::unique_ptr<PassesDecoderState> dec_state =
+        jxl::make_unique<PassesDecoderState>();
+    JXL_CHECK(
+        dec_state->output_encoding_info.SetFromMetadata(*shared.metadata));
+    const uint8_t* frame_start = encoded.data();
+    size_t encoded_size = encoded.size();
+    for (int i = 0; i <= cparams.progressive_dc; ++i) {
+      JXL_CHECK(DecodeFrame(dec_state.get(), pool, frame_start, encoded_size,
+                            &decoded, *shared.metadata));
+      frame_start += decoded.decoded_bytes();
+      encoded_size -= decoded.decoded_bytes();
+    }
+    // TODO(lode): shared.frame_header.dc_level should be equal to
+    // dec_state.shared->frame_header.dc_level - 1 here, since above we set
+    // dc_frame_info.dc_level = shared.frame_header.dc_level + 1, and
+    // dc_frame_info.dc_level is used by EncodeFrame. However, if EncodeFrame
+    // outputs multiple frames, this assumption could be wrong.
+    const Image3F& dc_frame =
+        dec_state->shared->dc_frames[shared.frame_header.dc_level];
+    shared.dc_storage = Image3F(dc_frame.xsize(), dc_frame.ysize());
+    CopyImageTo(dc_frame, &shared.dc_storage);
+    ZeroFillImage(&shared.quant_dc);
+    shared.dc = &shared.dc_storage;
+    JXL_CHECK(encoded_size == 0);
+  } else {
+    auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+      modular_frame_encoder->AddVarDCTDC(
+          dc, group_index, enc_state->cparams.speed_tier < SpeedTier::kFalcon,
+          enc_state, /*jpeg_transcode=*/false);
+    };
+    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups,
+                                  ThreadPool::NoInit, compute_dc_coeffs,
+                                  "Compute DC coeffs"));
+    // TODO(veluca): this is only useful in tests and if inspection is enabled.
+    if (!(shared.frame_header.flags & FrameHeader::kSkipAdaptiveDCSmoothing)) {
+      AdaptiveDCSmoothing(shared.quantizer.MulDC(), &shared.dc_storage, pool);
+    }
+  }
+  auto compute_ac_meta = [&](int group_index, int /* thread */) {
+    modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false,
+                                         enc_state);
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups,
+                                ThreadPool::NoInit, compute_ac_meta,
+                                "Compute AC Metadata"));
+
+  return true;
+}
+
+void EncCache::InitOnce() {
+  if (num_nzeroes.xsize() == 0) {
+    num_nzeroes = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cache.h b/third-party/libjxl/libjxl/lib/jxl/enc_cache.h
new file mode 100644
index 0000000000..6c7870ba00
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_cache.h
@@ -0,0 +1,93 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CACHE_H_
+#define LIB_JXL_ENC_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_heuristics.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_progressive_split.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Contains encoder state.
+struct PassesEncoderState {
+  PassesSharedState shared;
+
+  ImageF initial_quant_field;    // Invalid in Falcon mode.
+  ImageF initial_quant_masking;  // Invalid in Falcon mode.
+
+  // Per-pass DCT coefficients for the image. One row per group.
+  std::vector<std::unique_ptr<ACImage>> coeffs;
+
+  // Raw data for special (reference+DC) frames.
+  std::vector<std::unique_ptr<BitWriter>> special_frames;
+
+  // For splitting into passes.
+  ProgressiveSplitter progressive_splitter;
+
+  CompressParams cparams;
+
+  struct PassData {
+    std::vector<std::vector<Token>> ac_tokens;
+    std::vector<uint8_t> context_map;
+    EntropyEncodingData codes;
+  };
+
+  std::vector<PassData> passes;
+  std::vector<uint8_t> histogram_idx;
+
+  // Coefficient orders that are non-default.
+  std::vector<uint32_t> used_orders;
+
+  // Multiplier to be applied to the quant matrices of the x channel.
+  float x_qm_multiplier = 1.0f;
+  float b_qm_multiplier = 1.0f;
+
+  // Heuristics to be used by the encoder.
+  std::unique_ptr<EncoderHeuristics> heuristics =
+      make_unique<DefaultEncoderHeuristics>();
+};
+
+// Initialize per-frame information.
+class ModularFrameEncoder;
+Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms,
+                               ThreadPool* pool,
+                               PassesEncoderState* passes_enc_state,
+                               ModularFrameEncoder* modular_frame_encoder,
+                               AuxOut* aux_out);
+
+// Working area for ComputeCoefficients (per-group!)
+struct EncCache {
+  // Allocates memory when first called, shrinks images to current group size.
+  void InitOnce();
+
+  // TokenizeCoefficients
+  Image3I num_nzeroes;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CACHE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc
new file mode 100644
index 0000000000..4ed85be536
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc
@@ -0,0 +1,408 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_chroma_from_luma.h"
+
+#include <float.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_chroma_from_luma.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+
+static HWY_FULL(float) df;
+
+struct CFLFunction {
+  static constexpr float kCoeff = 1.f / 3;
+  static constexpr float kThres = 100.0f;
+  static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+  CFLFunction(const float* values_m, const float* values_s, size_t num,
+              float base, float distance_mul)
+      : values_m(values_m),
+        values_s(values_s),
+        num(num),
+        base(base),
+        distance_mul(distance_mul) {}
+
+  // Returns f'(x), where f is 1/3 * sum ((|color residual| + 1)^2-1) +
+  // distance_mul * x^2 * num.
+  float Compute(float x, float eps, float* fpeps, float* fmeps) const {
+    float first_derivative = 2 * distance_mul * num * x;
+    float first_derivative_peps = 2 * distance_mul * num * (x + eps);
+    float first_derivative_meps = 2 * distance_mul * num * (x - eps);
+
+    const auto inv_color_factor = Set(df, kInvColorFactor);
+    const auto thres = Set(df, kThres);
+    const auto coeffx2 = Set(df, kCoeff * 2.0f);
+    const auto one = Set(df, 1.0f);
+    const auto zero = Set(df, 0.0f);
+    const auto base_v = Set(df, base);
+    const auto x_v = Set(df, x);
+    const auto xpe_v = Set(df, x + eps);
+    const auto xme_v = Set(df, x - eps);
+    auto fd_v = Zero(df);
+    auto fdpe_v = Zero(df);
+    auto fdme_v = Zero(df);
+    JXL_ASSERT(num % Lanes(df) == 0);
+
+    for (size_t i = 0; i < num; i += Lanes(df)) {
+      // color residual = ax + b
+      const auto a = Mul(inv_color_factor, Load(df, values_m + i));
+      const auto b =
+          Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i));
+      const auto v = MulAdd(a, x_v, b);
+      const auto vpe = MulAdd(a, xpe_v, b);
+      const auto vme = MulAdd(a, xme_v, b);
+      const auto av = Abs(v);
+      const auto avpe = Abs(vpe);
+      const auto avme = Abs(vme);
+      const auto acoeffx2 = Mul(coeffx2, a);
+      auto d = Mul(acoeffx2, Add(av, one));
+      auto dpe = Mul(acoeffx2, Add(avpe, one));
+      auto dme = Mul(acoeffx2, Add(avme, one));
+      d = IfThenElse(Lt(v, zero), Sub(zero, d), d);
+      dpe = IfThenElse(Lt(vpe, zero), Sub(zero, dpe), dpe);
+      dme = IfThenElse(Lt(vme, zero), Sub(zero, dme), dme);
+      const auto above = Ge(av, thres);
+      // TODO(eustas): use IfThenElseZero
+      fd_v = Add(fd_v, IfThenElse(above, zero, d));
+      fdpe_v = Add(fdpe_v, IfThenElse(above, zero, dpe));
+      fdme_v = Add(fdme_v, IfThenElse(above, zero, dme));
+    }
+
+    *fpeps = first_derivative_peps + GetLane(SumOfLanes(df, fdpe_v));
+    *fmeps = first_derivative_meps + GetLane(SumOfLanes(df, fdme_v));
+    return first_derivative + GetLane(SumOfLanes(df, fd_v));
+  }
+
+  const float* JXL_RESTRICT values_m;
+  const float* JXL_RESTRICT values_s;
+  size_t num;
+  float base;
+  float distance_mul;
+};
+
+// Chroma-from-luma search, values_m will have luma -- and values_s chroma.
+int32_t FindBestMultiplier(const float* values_m, const float* values_s,
+                           size_t num, float base, float distance_mul,
+                           bool fast) {
+  if (num == 0) {
+    return 0;
+  }
+  float x;
+  if (fast) {
+    static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+    auto ca = Zero(df);
+    auto cb = Zero(df);
+    const auto inv_color_factor = Set(df, kInvColorFactor);
+    const auto base_v = Set(df, base);
+    for (size_t i = 0; i < num; i += Lanes(df)) {
+      // color residual = ax + b
+      const auto a = Mul(inv_color_factor, Load(df, values_m + i));
+      const auto b =
+          Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i));
+      ca = MulAdd(a, a, ca);
+      cb = MulAdd(a, b, cb);
+    }
+    // + distance_mul * x^2 * num
+    x = -GetLane(SumOfLanes(df, cb)) /
+        (GetLane(SumOfLanes(df, ca)) + num * distance_mul * 0.5f);
+  } else {
+    constexpr float eps = 100;
+    constexpr float kClamp = 20.0f;
+    CFLFunction fn(values_m, values_s, num, base, distance_mul);
+    x = 0;
+    // Up to 20 Newton iterations, with approximate derivatives.
+    // Derivatives are approximate due to the high amount of noise in the exact
+    // derivatives.
+    for (size_t i = 0; i < 20; i++) {
+      float dfpeps, dfmeps;
+      float df = fn.Compute(x, eps, &dfpeps, &dfmeps);
+      float ddf = (dfpeps - dfmeps) / (2 * eps);
+      float kExperimentalInsignificantStabilizer = 0.85;
+      float step = df / (ddf + kExperimentalInsignificantStabilizer);
+      x -= std::min(kClamp, std::max(-kClamp, step));
+      if (std::abs(step) < 3e-3) break;
+    }
+  }
+  // CFL seems to be tricky for larger transforms for HF components
+  // close to zero. This heuristic brings the solutions closer to zero
+  // and reduces red-green oscillations.
+  float towards_zero = 2.6;
+  if (x >= towards_zero) {
+    x -= towards_zero;
+  } else if (x <= -towards_zero) {
+    x += towards_zero;
+  } else {
+    x = 0;
+  }
+  return std::max(-128.0f, std::min(127.0f, roundf(x)));
+}
+
+void InitDCStorage(size_t num_blocks, ImageF* dc_values) {
+  // First row: Y channel
+  // Second row: X channel
+  // Third row: Y channel
+  // Fourth row: B channel
+  *dc_values = ImageF(RoundUpTo(num_blocks, Lanes(df)), 4);
+
+  JXL_ASSERT(dc_values->xsize() != 0);
+  // Zero-fill the last lanes
+  for (size_t y = 0; y < 4; y++) {
+    for (size_t x = dc_values->xsize() - Lanes(df); x < dc_values->xsize();
+         x++) {
+      dc_values->Row(y)[x] = 0;
+    }
+  }
+}
+
+void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x,
+               int32_t* dc_b) {
+  constexpr float kDistanceMultiplierDC = 1e-5f;
+  const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0);
+  const float* JXL_RESTRICT dc_values_x = dc_values.Row(1);
+  const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2);
+  const float* JXL_RESTRICT dc_values_b = dc_values.Row(3);
+  *dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f,
+                             kDistanceMultiplierDC, fast);
+  *dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(),
+                             kYToBRatio, kDistanceMultiplierDC, fast);
+}
+
+void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
+                 const AcStrategyImage* ac_strategy,
+                 const ImageI* raw_quant_field, const Quantizer* quantizer,
+                 const Rect& r, bool fast, bool use_dct8, ImageSB* map_x,
+                 ImageSB* map_b, ImageF* dc_values, float* mem) {
+  static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks,
+                "Invalid color tile dim");
+  size_t xsize_blocks = opsin.xsize() / kBlockDim;
+  constexpr float kDistanceMultiplierAC = 1e-9f;
+
+  const size_t y0 = r.y0();
+  const size_t x0 = r.x0();
+  const size_t x1 = r.x0() + r.xsize();
+  const size_t y1 = r.y0() + r.ysize();
+
+  int ty = y0 / kColorTileDimInBlocks;
+  int tx = x0 / kColorTileDimInBlocks;
+
+  int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty);
+  int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty);
+
+  float* JXL_RESTRICT dc_values_yx = dc_values->Row(0);
+  float* JXL_RESTRICT dc_values_x = dc_values->Row(1);
+  float* JXL_RESTRICT dc_values_yb = dc_values->Row(2);
+  float* JXL_RESTRICT dc_values_b = dc_values->Row(3);
+
+  // All are aligned.
+  float* HWY_RESTRICT block_y = mem;
+  float* HWY_RESTRICT block_x = block_y + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT block_b = block_x + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT coeffs_yx = block_b + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT coeffs_x = coeffs_yx + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT coeffs_yb = coeffs_x + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT coeffs_b = coeffs_yb + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT scratch_space = coeffs_b + kColorTileDim * kColorTileDim;
+  JXL_DASSERT(scratch_space + 2 * AcStrategy::kMaxCoeffArea ==
+              block_y + CfLHeuristics::kItemsPerThread);
+
+  // Small (~256 bytes each)
+  HWY_ALIGN_MAX float
+      dc_y[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  HWY_ALIGN_MAX float
+      dc_x[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  HWY_ALIGN_MAX float
+      dc_b[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  size_t num_ac = 0;
+
+  for (size_t y = y0; y < y1; ++y) {
+    const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
+    const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
+    const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
+    size_t stride = opsin.PixelsPerRow();
+
+    for (size_t x = x0; x < x1; x++) {
+      AcStrategy acs = use_dct8
+                           ? AcStrategy::FromRawStrategy(AcStrategy::Type::DCT)
+                           : ac_strategy->ConstRow(y)[x];
+      if (!acs.IsFirstBlock()) continue;
+      size_t xs = acs.covered_blocks_x();
+      TransformFromPixels(acs.Strategy(), row_y + x * kBlockDim, stride,
+                          block_y, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_y, dc_y, xs);
+      TransformFromPixels(acs.Strategy(), row_x + x * kBlockDim, stride,
+                          block_x, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_x, dc_x, xs);
+      TransformFromPixels(acs.Strategy(), row_b + x * kBlockDim, stride,
+                          block_b, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_b, dc_b, xs);
+      const float* const JXL_RESTRICT qm_x =
+          dequant.InvMatrix(acs.Strategy(), 0);
+      const float* const JXL_RESTRICT qm_b =
+          dequant.InvMatrix(acs.Strategy(), 2);
+      float q_dc_x = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(0);
+      float q_dc_b = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(2);
+
+      // Copy DCs in dc_values.
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < xs; ix++) {
+          dc_values_yx[(iy + y) * xsize_blocks + ix + x] =
+              dc_y[iy * xs + ix] * q_dc_x;
+          dc_values_x[(iy + y) * xsize_blocks + ix + x] =
+              dc_x[iy * xs + ix] * q_dc_x;
+          dc_values_yb[(iy + y) * xsize_blocks + ix + x] =
+              dc_y[iy * xs + ix] * q_dc_b;
+          dc_values_b[(iy + y) * xsize_blocks + ix + x] =
+              dc_b[iy * xs + ix] * q_dc_b;
+        }
+      }
+
+      // Do not use this block for computing AC CfL.
+      if (acs.covered_blocks_x() + x0 > x1 ||
+          acs.covered_blocks_y() + y0 > y1) {
+        continue;
+      }
+
+      // Copy AC coefficients in the local block. The order in which
+      // coefficients get stored does not matter.
+      size_t cx = acs.covered_blocks_x();
+      size_t cy = acs.covered_blocks_y();
+      CoefficientLayout(&cy, &cx);
+      // Zero out LFs. This introduces terms in the optimization loop that
+      // don't affect the result, as they are all 0, but allow for simpler
+      // SIMDfication.
+      for (size_t iy = 0; iy < cy; iy++) {
+        for (size_t ix = 0; ix < cx; ix++) {
+          block_y[cx * kBlockDim * iy + ix] = 0;
+          block_x[cx * kBlockDim * iy + ix] = 0;
+          block_b[cx * kBlockDim * iy + ix] = 0;
+        }
+      }
+      // Unclear why this is like it is. (This works slightly better
+      // than the previous approach which was also a hack.)
+      const float qq =
+          (raw_quant_field == nullptr) ? 1.0f : raw_quant_field->Row(y)[x];
+      // Experimentally values 128-130 seem best -- I don't know why we
+      // need this multiplier.
+      const float kStrangeMultiplier = 128;
+      float q = use_dct8 ? 1 : quantizer->Scale() * kStrangeMultiplier * qq;
+      const auto qv = Set(df, q);
+      for (size_t i = 0; i < cx * cy * 64; i += Lanes(df)) {
+        const auto b_y = Load(df, block_y + i);
+        const auto b_x = Load(df, block_x + i);
+        const auto b_b = Load(df, block_b + i);
+        const auto qqm_x = Mul(qv, Load(df, qm_x + i));
+        const auto qqm_b = Mul(qv, Load(df, qm_b + i));
+        Store(Mul(b_y, qqm_x), df, coeffs_yx + num_ac);
+        Store(Mul(b_x, qqm_x), df, coeffs_x + num_ac);
+        Store(Mul(b_y, qqm_b), df, coeffs_yb + num_ac);
+        Store(Mul(b_b, qqm_b), df, coeffs_b + num_ac);
+        num_ac += Lanes(df);
+      }
+    }
+  }
+  JXL_CHECK(num_ac % Lanes(df) == 0);
+  row_out_x[tx] = FindBestMultiplier(coeffs_yx, coeffs_x, num_ac, 0.0f,
+                                     kDistanceMultiplierAC, fast);
+  row_out_b[tx] = FindBestMultiplier(coeffs_yb, coeffs_b, num_ac, kYToBRatio,
+                                     kDistanceMultiplierAC, fast);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InitDCStorage);
+HWY_EXPORT(ComputeDC);
+HWY_EXPORT(ComputeTile);
+
+void CfLHeuristics::Init(const Image3F& opsin) {
+  size_t xsize_blocks = opsin.xsize() / kBlockDim;
+  size_t ysize_blocks = opsin.ysize() / kBlockDim;
+  HWY_DYNAMIC_DISPATCH(InitDCStorage)
+  (xsize_blocks * ysize_blocks, &dc_values);
+}
+
+void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
+                                const DequantMatrices& dequant,
+                                const AcStrategyImage* ac_strategy,
+                                const ImageI* raw_quant_field,
+                                const Quantizer* quantizer, bool fast,
+                                size_t thread, ColorCorrelationMap* cmap) {
+  bool use_dct8 = ac_strategy == nullptr;
+  HWY_DYNAMIC_DISPATCH(ComputeTile)
+  (opsin, dequant, ac_strategy, raw_quant_field, quantizer, r, fast, use_dct8,
+   &cmap->ytox_map, &cmap->ytob_map, &dc_values,
+   mem.get() + thread * kItemsPerThread);
+}
+
+void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) {
+  int32_t ytob_dc = 0;
+  int32_t ytox_dc = 0;
+  HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc);
+  cmap->SetYToBDC(ytob_dc);
+  cmap->SetYToXDC(ytox_dc);
+}
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+                                 size_t layer, AuxOut* aux_out) {
+  float color_factor = map->GetColorFactor();
+  float base_correlation_x = map->GetBaseCorrelationX();
+  float base_correlation_b = map->GetBaseCorrelationB();
+  int32_t ytox_dc = map->GetYToXDC();
+  int32_t ytob_dc = map->GetYToBDC();
+
+  BitWriter::Allotment allotment(writer, 1 + 2 * kBitsPerByte + 12 + 32);
+  if (ytox_dc == 0 && ytob_dc == 0 && color_factor == kDefaultColorFactor &&
+      base_correlation_x == 0.0f && base_correlation_b == kYToBRatio) {
+    writer->Write(1, 1);
+    allotment.ReclaimAndCharge(writer, layer, aux_out);
+    return;
+  }
+  writer->Write(1, 0);
+  JXL_CHECK(U32Coder::Write(kColorFactorDist, color_factor, writer));
+  JXL_CHECK(F16Coder::Write(base_correlation_x, writer));
+  JXL_CHECK(F16Coder::Write(base_correlation_b, writer));
+  writer->Write(kBitsPerByte, ytox_dc - std::numeric_limits<int8_t>::min());
+  writer->Write(kBitsPerByte, ytob_dc - std::numeric_limits<int8_t>::min());
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h
new file mode 100644
index 0000000000..899b91b041
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+struct AuxOut;
+class Quantizer;
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+                                 size_t layer, AuxOut* aux_out);
+
+struct CfLHeuristics {
+  void Init(const Image3F& opsin);
+
+  void PrepareForThreads(size_t num_threads) {
+    mem = hwy::AllocateAligned<float>(num_threads * kItemsPerThread);
+  }
+
+  void ComputeTile(const Rect& r, const Image3F& opsin,
+                   const DequantMatrices& dequant,
+                   const AcStrategyImage* ac_strategy,
+                   const ImageI* raw_quant_field, const Quantizer* quantizer,
+                   bool fast, size_t thread, ColorCorrelationMap* cmap);
+
+  void ComputeDC(bool fast, ColorCorrelationMap* cmap);
+
+  ImageF dc_values;
+  hwy::AlignedFreeUniquePtr<float[]> mem;
+
+  // Working set is too large for stack; allocate dynamically.
+  constexpr static size_t kItemsPerThread =
+      AcStrategy::kMaxCoeffArea * 3        // Blocks
+      + kColorTileDim * kColorTileDim * 4  // AC coeff storage
+      + AcStrategy::kMaxCoeffArea * 2;     // Scratch space
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc
new file mode 100644
index 0000000000..e8acb240c9
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc
@@ -0,0 +1,293 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cluster.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <tuple>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_cluster.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <class V>
+V Entropy(V count, V inv_total, V total) {
+  const HWY_CAPPED(float, Histogram::kRounding) d;
+  const auto zero = Set(d, 0.0f);
+  // TODO(eustas): why (0 - x) instead of Neg(x)?
+  return IfThenZeroElse(
+      Eq(count, total),
+      Sub(zero, Mul(count, FastLog2f(d, Mul(inv_total, count)))));
+}
+
+void HistogramEntropy(const Histogram& a) {
+  a.entropy_ = 0.0f;
+  if (a.total_count_ == 0) return;
+
+  const HWY_CAPPED(float, Histogram::kRounding) df;
+  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+  const auto inv_tot = Set(df, 1.0f / a.total_count_);
+  auto entropy_lanes = Zero(df);
+  auto total = Set(df, a.total_count_);
+
+  for (size_t i = 0; i < a.data_.size(); i += Lanes(di)) {
+    const auto counts = LoadU(di, &a.data_[i]);
+    entropy_lanes =
+        Add(entropy_lanes, Entropy(ConvertTo(df, counts), inv_tot, total));
+  }
+  a.entropy_ += GetLane(SumOfLanes(df, entropy_lanes));
+}
+
+float HistogramDistance(const Histogram& a, const Histogram& b) {
+  if (a.total_count_ == 0 || b.total_count_ == 0) return 0;
+
+  const HWY_CAPPED(float, Histogram::kRounding) df;
+  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+  const auto inv_tot = Set(df, 1.0f / (a.total_count_ + b.total_count_));
+  auto distance_lanes = Zero(df);
+  auto total = Set(df, a.total_count_ + b.total_count_);
+
+  for (size_t i = 0; i < std::max(a.data_.size(), b.data_.size());
+       i += Lanes(di)) {
+    const auto a_counts =
+        a.data_.size() > i ? LoadU(di, &a.data_[i]) : Zero(di);
+    const auto b_counts =
+        b.data_.size() > i ? LoadU(di, &b.data_[i]) : Zero(di);
+    const auto counts = ConvertTo(df, Add(a_counts, b_counts));
+    distance_lanes = Add(distance_lanes, Entropy(counts, inv_tot, total));
+  }
+  const float total_distance = GetLane(SumOfLanes(df, distance_lanes));
+  return total_distance - a.entropy_ - b.entropy_;
+}
+
+// First step of a k-means clustering with a fancy distance metric.
+void FastClusterHistograms(const std::vector<Histogram>& in,
+                           size_t max_histograms, std::vector<Histogram>* out,
+                           std::vector<uint32_t>* histogram_symbols) {
+  out->clear();
+  out->reserve(max_histograms);
+  histogram_symbols->clear();
+  histogram_symbols->resize(in.size(), max_histograms);
+
+  std::vector<float> dists(in.size(), std::numeric_limits<float>::max());
+  size_t largest_idx = 0;
+  for (size_t i = 0; i < in.size(); i++) {
+    if (in[i].total_count_ == 0) {
+      (*histogram_symbols)[i] = 0;
+      dists[i] = 0.0f;
+      continue;
+    }
+    HistogramEntropy(in[i]);
+    if (in[i].total_count_ > in[largest_idx].total_count_) {
+      largest_idx = i;
+    }
+  }
+
+  constexpr float kMinDistanceForDistinct = 48.0f;
+  while (out->size() < max_histograms) {
+    (*histogram_symbols)[largest_idx] = out->size();
+    out->push_back(in[largest_idx]);
+    dists[largest_idx] = 0.0f;
+    largest_idx = 0;
+    for (size_t i = 0; i < in.size(); i++) {
+      if (dists[i] == 0.0f) continue;
+      dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]);
+      if (dists[i] > dists[largest_idx]) largest_idx = i;
+    }
+    if (dists[largest_idx] < kMinDistanceForDistinct) break;
+  }
+
+  for (size_t i = 0; i < in.size(); i++) {
+    if ((*histogram_symbols)[i] != max_histograms) continue;
+    size_t best = 0;
+    float best_dist = HistogramDistance(in[i], (*out)[best]);
+    for (size_t j = 1; j < out->size(); j++) {
+      float dist = HistogramDistance(in[i], (*out)[j]);
+      if (dist < best_dist) {
+        best = j;
+        best_dist = dist;
+      }
+    }
+    (*out)[best].AddHistogram(in[i]);
+    HistogramEntropy((*out)[best]);
+    (*histogram_symbols)[i] = best;
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(FastClusterHistograms);  // Local function
+HWY_EXPORT(HistogramEntropy);       // Local function
+
+float Histogram::ShannonEntropy() const {
+  HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this);
+  return entropy_;
+}
+
+namespace {
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+void HistogramReindex(std::vector<Histogram>* out,
+                      std::vector<uint32_t>* symbols) {
+  std::vector<Histogram> tmp(*out);
+  std::map<int, int> new_index;
+  int next_index = 0;
+  for (uint32_t symbol : *symbols) {
+    if (new_index.find(symbol) == new_index.end()) {
+      new_index[symbol] = next_index;
+      (*out)[next_index] = tmp[symbol];
+      ++next_index;
+    }
+  }
+  out->resize(next_index);
+  for (uint32_t& symbol : *symbols) {
+    symbol = new_index[symbol];
+  }
+}
+
+}  // namespace
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+void ClusterHistograms(const HistogramParams params,
+                       const std::vector<Histogram>& in, size_t max_histograms,
+                       std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols) {
+  max_histograms = std::min(max_histograms, params.max_histograms);
+  max_histograms = std::min(max_histograms, in.size());
+  if (params.clustering == HistogramParams::ClusteringType::kFastest) {
+    max_histograms = std::min(max_histograms, static_cast<size_t>(4));
+  }
+
+  HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+  (in, max_histograms, out, histogram_symbols);
+
+  if (params.clustering == HistogramParams::ClusteringType::kBest) {
+    for (size_t i = 0; i < out->size(); i++) {
+      (*out)[i].entropy_ =
+          ANSPopulationCost((*out)[i].data_.data(), (*out)[i].data_.size());
+    }
+    uint32_t next_version = 2;
+    std::vector<uint32_t> version(out->size(), 1);
+    std::vector<uint32_t> renumbering(out->size());
+    std::iota(renumbering.begin(), renumbering.end(), 0);
+
+    // Try to pair up clusters if doing so reduces the total cost.
+
+    struct HistogramPair {
+      // validity of a pair: p.version == max(version[i], version[j])
+      float cost;
+      uint32_t first;
+      uint32_t second;
+      uint32_t version;
+      // We use > because priority queues sort in *decreasing* order, but we
+      // want lower cost elements to appear first.
+      bool operator<(const HistogramPair& other) const {
+        return std::make_tuple(cost, first, second, version) >
+               std::make_tuple(other.cost, other.first, other.second,
+                               other.version);
+      }
+    };
+
+    // Create list of all pairs by increasing merging cost.
+    std::priority_queue<HistogramPair> pairs_to_merge;
+    for (uint32_t i = 0; i < out->size(); i++) {
+      for (uint32_t j = i + 1; j < out->size(); j++) {
+        Histogram histo;
+        histo.AddHistogram((*out)[i]);
+        histo.AddHistogram((*out)[j]);
+        float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+                     (*out)[i].entropy_ - (*out)[j].entropy_;
+        // Avoid enqueueing pairs that are not advantageous to merge.
+        if (cost >= 0) continue;
+        pairs_to_merge.push(
+            HistogramPair{cost, i, j, std::max(version[i], version[j])});
+      }
+    }
+
+    // Merge the best pair to merge, add new pairs that get formed as a
+    // consequence.
+    while (!pairs_to_merge.empty()) {
+      uint32_t first = pairs_to_merge.top().first;
+      uint32_t second = pairs_to_merge.top().second;
+      uint32_t ver = pairs_to_merge.top().version;
+      pairs_to_merge.pop();
+      if (ver != std::max(version[first], version[second]) ||
+          version[first] == 0 || version[second] == 0) {
+        continue;
+      }
+      (*out)[first].AddHistogram((*out)[second]);
+      (*out)[first].entropy_ = ANSPopulationCost((*out)[first].data_.data(),
+                                                 (*out)[first].data_.size());
+      for (size_t i = 0; i < renumbering.size(); i++) {
+        if (renumbering[i] == second) {
+          renumbering[i] = first;
+        }
+      }
+      version[second] = 0;
+      version[first] = next_version++;
+      for (uint32_t j = 0; j < out->size(); j++) {
+        if (j == first) continue;
+        if (version[j] == 0) continue;
+        Histogram histo;
+        histo.AddHistogram((*out)[first]);
+        histo.AddHistogram((*out)[j]);
+        float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+                     (*out)[first].entropy_ - (*out)[j].entropy_;
+        // Avoid enqueueing pairs that are not advantageous to merge.
+        if (cost >= 0) continue;
+        pairs_to_merge.push(
+            HistogramPair{cost, std::min(first, j), std::max(first, j),
+                          std::max(version[first], version[j])});
+      }
+    }
+    std::vector<uint32_t> reverse_renumbering(out->size(), -1);
+    size_t num_alive = 0;
+    for (size_t i = 0; i < out->size(); i++) {
+      if (version[i] == 0) continue;
+      (*out)[num_alive++] = (*out)[i];
+      reverse_renumbering[i] = num_alive - 1;
+    }
+    out->resize(num_alive);
+    for (size_t i = 0; i < histogram_symbols->size(); i++) {
+      (*histogram_symbols)[i] =
+          reverse_renumbering[renumbering[(*histogram_symbols)[i]]];
+    }
+  }
+
+  // Convert the context map to a canonical form.
+  HistogramReindex(out, histogram_symbols);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h
new file mode 100644
index 0000000000..4b062e820c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for clustering similar histograms together.
+
+#ifndef LIB_JXL_ENC_CLUSTER_H_
+#define LIB_JXL_ENC_CLUSTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/enc_ans.h"
+
+namespace jxl {
+
+struct Histogram {
+  Histogram() {
+    total_count_ = 0;
+    entropy_ = 0.0;
+  }
+  void Clear() {
+    data_.clear();
+    total_count_ = 0;
+  }
+  void Add(size_t symbol) {
+    if (data_.size() <= symbol) {
+      data_.resize(DivCeil(symbol + 1, kRounding) * kRounding);
+    }
+    ++data_[symbol];
+    ++total_count_;
+  }
+  void AddHistogram(const Histogram& other) {
+    if (other.data_.size() > data_.size()) {
+      data_.resize(other.data_.size());
+    }
+    for (size_t i = 0; i < other.data_.size(); ++i) {
+      data_[i] += other.data_[i];
+    }
+    total_count_ += other.total_count_;
+  }
+  float PopulationCost() const {
+    return ANSPopulationCost(data_.data(), data_.size());
+  }
+  float ShannonEntropy() const;
+
+  std::vector<ANSHistBin> data_;
+  size_t total_count_;
+  mutable float entropy_;  // WARNING: not kept up-to-date.
+  static constexpr size_t kRounding = 8;
+};
+
+void ClusterHistograms(HistogramParams params, const std::vector<Histogram>& in,
+                       size_t max_histograms, std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CLUSTER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc
new file mode 100644
index 0000000000..e994952731
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc
@@ -0,0 +1,290 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+std::pair<uint32_t, uint32_t> ComputeUsedOrders(
+    const SpeedTier speed, const AcStrategyImage& ac_strategy,
+    const Rect& rect) {
+  // Only uses DCT8 = 0, so bitfield = 1.
+  if (speed >= SpeedTier::kFalcon) return {1, 1};
+
+  uint32_t ret = 0;
+  uint32_t ret_customize = 0;
+  size_t xsize_blocks = rect.xsize();
+  size_t ysize_blocks = rect.ysize();
+  // TODO(veluca): precompute when doing DCT.
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
+      // Do not customize coefficient orders for blocks bigger than 32x32.
+      ret |= 1u << ord;
+      if (ord > 6) {
+        continue;
+      }
+      ret_customize |= 1u << ord;
+    }
+  }
+  // Use default orders for small images.
+  if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
+  return {ret, ret_customize};
+}
+
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+                       const AcStrategyImage& ac_strategy,
+                       const FrameDimensions& frame_dim, uint32_t& used_orders,
+                       uint16_t used_acs, coeff_order_t* JXL_RESTRICT order) {
+  std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
+  // If compressing at high speed and only using 8x8 DCTs, only consider a
+  // subset of blocks.
+  double block_fraction = 1.0f;
+  // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
+  // encoding significantly less dense.
+  if (speed >= SpeedTier::kSquirrel && used_orders == 1) {
+    block_fraction = 0.5f;
+  }
+  // No need to compute number of zero coefficients if all orders are the
+  // default.
+  if (used_orders != 0) {
+    uint64_t threshold =
+        (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
+    uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
+                     static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
+    // Xorshift128+ adapted from xorshift128+-inl.h
+    auto use_sample = [&]() {
+      auto s1 = s[0];
+      const auto s0 = s[1];
+      const auto bits = s1 + s0;  // b, c
+      s[0] = s0;
+      s1 ^= s1 << 23;
+      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+      s[1] = s1;
+      return (bits >> 32) <= threshold;
+    };
+
+    // Count number of zero coefficients, separately for each DCT band.
+    // TODO(veluca): precompute when doing DCT.
+    for (size_t group_index = 0; group_index < frame_dim.num_groups;
+         group_index++) {
+      const size_t gx = group_index % frame_dim.xsize_groups;
+      const size_t gy = group_index / frame_dim.xsize_groups;
+      const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
+                      kGroupDimInBlocks, kGroupDimInBlocks,
+                      frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+      ConstACPtr rows[3];
+      ACType type = acs.Type();
+      for (size_t c = 0; c < 3; c++) {
+        rows[c] = acs.PlaneRow(c, group_index, 0);
+      }
+      size_t ac_offset = 0;
+
+      // TODO(veluca): SIMDfy.
+      for (size_t by = 0; by < rect.ysize(); ++by) {
+        AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+        for (size_t bx = 0; bx < rect.xsize(); ++bx) {
+          AcStrategy acs = acs_row[bx];
+          if (!acs.IsFirstBlock()) continue;
+          if (!use_sample()) continue;
+          size_t size = kDCTBlockSize << acs.log2_covered_blocks();
+          for (size_t c = 0; c < 3; ++c) {
+            const size_t order_offset =
+                CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
+            if (type == ACType::k16) {
+              for (size_t k = 0; k < size; k++) {
+                bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
+                num_zeros[order_offset + k] += is_zero ? 1 : 0;
+              }
+            } else {
+              for (size_t k = 0; k < size; k++) {
+                bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
+                num_zeros[order_offset + k] += is_zero ? 1 : 0;
+              }
+            }
+            // Ensure LLFs are first in the order.
+            size_t cx = acs.covered_blocks_x();
+            size_t cy = acs.covered_blocks_y();
+            CoefficientLayout(&cy, &cx);
+            for (size_t iy = 0; iy < cy; iy++) {
+              for (size_t ix = 0; ix < cx; ix++) {
+                num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
+              }
+            }
+          }
+          ac_offset += size;
+        }
+      }
+    }
+  }
+  struct PosAndCount {
+    uint32_t pos;
+    uint32_t count;
+  };
+  auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
+
+  std::vector<coeff_order_t> natural_order_buffer;
+
+  uint16_t computed = 0;
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+
+    // Do nothing for transforms that don't appear.
+    if ((1 << ord) & ~used_acs) continue;
+
+    if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
+    acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
+
+    // Ensure natural coefficient order is not permuted if the order is
+    // not transmitted.
+    if ((1 << ord) & ~used_orders) {
+      for (size_t c = 0; c < 3; c++) {
+        size_t offset = CoeffOrderOffset(ord, c);
+        JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+        memcpy(&order[offset], natural_order_buffer.data(),
+               sz * sizeof(*order));
+      }
+      continue;
+    }
+
+    bool is_nondefault = false;
+    for (uint8_t c = 0; c < 3; c++) {
+      // Apply zig-zag order.
+      PosAndCount* pos_and_val = mem.get();
+      size_t offset = CoeffOrderOffset(ord, c);
+      JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+      float inv_sqrt_sz = 1.0f / std::sqrt(sz);
+      for (size_t i = 0; i < sz; ++i) {
+        size_t pos = natural_order_buffer[i];
+        pos_and_val[i].pos = pos;
+        // We don't care for the exact number -> quantize number of zeros,
+        // to get less permuted order.
+        pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
+      }
+
+      // Stable-sort -> elements with same number of zeros will preserve their
+      // order.
+      auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
+        return a.count < b.count;
+      };
+      std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
+
+      // Grab indices.
+      for (size_t i = 0; i < sz; ++i) {
+        order[offset + i] = pos_and_val[i].pos;
+        is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
+      }
+    }
+    if (!is_nondefault) {
+      used_orders &= ~(1 << ord);
+    }
+  }
+}
+
+namespace {
+
+void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                         size_t size, std::vector<Token>* tokens) {
+  std::vector<LehmerT> lehmer(size);
+  std::vector<uint32_t> temp(size + 1);
+  ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+  size_t end = size;
+  while (end > skip && lehmer[end - 1] == 0) {
+    --end;
+  }
+  tokens->emplace_back(CoeffOrderContext(size), end - skip);
+  uint32_t last = 0;
+  for (size_t i = skip; i < end; ++i) {
+    tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+    last = lehmer[i];
+  }
+}
+
+}  // namespace
+
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                       size_t size, BitWriter* writer, int layer,
+                       AuxOut* aux_out) {
+  std::vector<std::vector<Token>> tokens(1);
+  TokenizePermutation(order, skip, size, &tokens[0]);
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+                           &codes, &context_map, writer, layer, aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+namespace {
+void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
+                      std::vector<Token>* tokens, coeff_order_t* order_zigzag,
+                      std::vector<coeff_order_t>& natural_order_lut) {
+  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+  const size_t size = kDCTBlockSize * llf;
+  for (size_t i = 0; i < size; ++i) {
+    order_zigzag[i] = natural_order_lut[order[i]];
+  }
+  TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+}  // namespace
+
+void EncodeCoeffOrders(uint16_t used_orders,
+                       const coeff_order_t* JXL_RESTRICT order,
+                       BitWriter* writer, size_t layer,
+                       AuxOut* JXL_RESTRICT aux_out) {
+  auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+  uint16_t computed = 0;
+  std::vector<std::vector<Token>> tokens(1);
+  std::vector<coeff_order_t> natural_order_lut;
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    if ((used_orders & (1 << ord)) == 0) continue;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+    const size_t size = kDCTBlockSize * llf;
+    if (natural_order_lut.size() < size) natural_order_lut.resize(size);
+    acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
+    for (size_t c = 0; c < 3; c++) {
+      EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0],
+                       mem.get(), natural_order_lut);
+    }
+  }
+  // Do not write anything if no order is used.
+  if (used_orders != 0) {
+    std::vector<uint8_t> context_map;
+    EntropyEncodingData codes;
+    BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+                             &codes, &context_map, writer, layer, aux_out);
+    WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h
new file mode 100644
index 0000000000..3a43f4f986
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h
@@ -0,0 +1,54 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COEFF_ORDER_H_
+#define LIB_JXL_ENC_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Orders that are actually used in part of image. `rect` is in block units.
+// Returns {orders that are used, orders that might be made non-default}.
+std::pair<uint32_t, uint32_t> ComputeUsedOrders(
+    SpeedTier speed, const AcStrategyImage& ac_strategy, const Rect& rect);
+
+// Modify zig-zag order, so that DCT bands with more zeros go later.
+// Order of DCT bands with same number of zeros is untouched, so
+// permutation will be cheaper to encode.
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+                       const AcStrategyImage& ac_strategy,
+                       const FrameDimensions& frame_dim, uint32_t& used_orders,
+                       uint16_t used_acs, coeff_order_t* JXL_RESTRICT order);
+
+void EncodeCoeffOrders(uint16_t used_orders,
+                       const coeff_order_t* JXL_RESTRICT order,
+                       BitWriter* writer, size_t layer,
+                       AuxOut* JXL_RESTRICT aux_out);
+
+// Encoding/decoding of a single permutation. `size`: number of elements in the
+// permutation. `skip`: number of elements to skip from the *beginning* of the
+// permutation.
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                       size_t size, BitWriter* writer, int layer,
+                       AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COEFF_ORDER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc
new file mode 100644
index 0000000000..0cb9188bc6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc
@@ -0,0 +1,1297 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_color_management.h"
+
+#ifndef JPEGXL_ENABLE_SKCMS
+#define JPEGXL_ENABLE_SKCMS 0
+#endif
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/transfer_functions-inl.h"
+#if JPEGXL_ENABLE_SKCMS
+#include "lib/jxl/enc_jxl_skcms.h"
+#else  // JPEGXL_ENABLE_SKCMS
+#include "lcms2.h"
+#include "lcms2_plugin.h"
+#endif  // JPEGXL_ENABLE_SKCMS
+
+#define JXL_CMS_VERBOSE 0
+
+// Define these only once. We can't use HWY_ONCE here because it is defined as
+// 1 only on the last pass.
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+namespace jxl {
+namespace {
+struct JxlCms {
+#if JPEGXL_ENABLE_SKCMS
+  PaddedBytes icc_src, icc_dst;
+  skcms_ICCProfile profile_src, profile_dst;
+#else
+  void* lcms_transform;
+#endif
+
+  // These fields are used when the HLG OOTF or inverse OOTF must be applied.
+  bool apply_hlg_ootf;
+  size_t hlg_ootf_num_channels;
+  // Y component of the primaries.
+  std::array<float, 3> hlg_ootf_luminances;
+
+  size_t channels_src;
+  size_t channels_dst;
+  ImageF buf_src;
+  ImageF buf_dst;
+  float intensity_target;
+  bool skip_lcms = false;
+  ExtraTF preprocess = ExtraTF::kNone;
+  ExtraTF postprocess = ExtraTF::kNone;
+};
+
+Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize,
+                    bool forward);
+}  // namespace
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+#if JXL_CMS_VERBOSE >= 2
+const size_t kX = 0;  // pixel index, multiplied by 3 for RGB
+#endif
+
+// xform_src = UndoGammaCompression(buf_src).
+Status BeforeTransform(JxlCms* t, const float* buf_src, float* xform_src,
+                       size_t buf_size) {
+  switch (t->preprocess) {
+    case ExtraTF::kNone:
+      JXL_DASSERT(false);  // unreachable
+      break;
+
+    case ExtraTF::kPQ: {
+      // By default, PQ content has an intensity target of 10000, stored
+      // exactly.
+      HWY_FULL(float) df;
+      const auto multiplier = Set(df, t->intensity_target == 10000.f
+                                          ? 1.0f
+                                          : 10000.f / t->intensity_target);
+      for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+        const auto val = Load(df, buf_src + i);
+        const auto result =
+            Mul(multiplier, TF_PQ().DisplayFromEncoded(df, val));
+        Store(result, df, xform_src + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoPQ %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+    }
+
+    case ExtraTF::kHLG:
+      for (size_t i = 0; i < buf_size; ++i) {
+        xform_src[i] = static_cast<float>(
+            TF_HLG().DisplayFromEncoded(static_cast<double>(buf_src[i])));
+      }
+      if (t->apply_hlg_ootf) {
+        JXL_RETURN_IF_ERROR(
+            ApplyHlgOotf(t, xform_src, buf_size, /*forward=*/true));
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoHLG %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+
+    case ExtraTF::kSRGB:
+      HWY_FULL(float) df;
+      for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+        const auto val = Load(df, buf_src + i);
+        const auto result = TF_SRGB().DisplayFromEncoded(val);
+        Store(result, df, xform_src + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoSRGB %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+  }
+  return true;
+}
+
+// Applies gamma compression in-place.
+Status AfterTransform(JxlCms* t, float* JXL_RESTRICT buf_dst, size_t buf_size) {
+  switch (t->postprocess) {
+    case ExtraTF::kNone:
+      JXL_DASSERT(false);  // unreachable
+      break;
+    case ExtraTF::kPQ: {
+      HWY_FULL(float) df;
+      const auto multiplier =
+          Set(df, t->intensity_target == 10000.f ? 1.0f
+                                                 : t->intensity_target * 1e-4f);
+      for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+        const auto val = Load(df, buf_dst + i);
+        const auto result =
+            TF_PQ().EncodedFromDisplay(df, Mul(multiplier, val));
+        Store(result, df, buf_dst + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after PQ enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    }
+    case ExtraTF::kHLG:
+      if (t->apply_hlg_ootf) {
+        JXL_RETURN_IF_ERROR(
+            ApplyHlgOotf(t, buf_dst, buf_size, /*forward=*/false));
+      }
+      for (size_t i = 0; i < buf_size; ++i) {
+        buf_dst[i] = static_cast<float>(
+            TF_HLG().EncodedFromDisplay(static_cast<double>(buf_dst[i])));
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after HLG enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kSRGB:
+      HWY_FULL(float) df;
+      for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+        const auto val = Load(df, buf_dst + i);
+        const auto result =
+            TF_SRGB().EncodedFromDisplay(HWY_FULL(float)(), val);
+        Store(result, df, buf_dst + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after SRGB enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+  }
+  return true;
+}
+
+Status DoColorSpaceTransform(void* cms_data, const size_t thread,
+                             const float* buf_src, float* buf_dst,
+                             size_t xsize) {
+  // No lock needed.
+  JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+
+  const float* xform_src = buf_src;  // Read-only.
+  if (t->preprocess != ExtraTF::kNone) {
+    float* mutable_xform_src = t->buf_src.Row(thread);  // Writable buffer.
+    JXL_RETURN_IF_ERROR(BeforeTransform(t, buf_src, mutable_xform_src,
+                                        xsize * t->channels_src));
+    xform_src = mutable_xform_src;
+  }
+
+#if JPEGXL_ENABLE_SKCMS
+  if (t->channels_src == 1 && !t->skip_lcms) {
+    // Expand from 1 to 3 channels, starting from the end in case
+    // xform_src == t->buf_src.Row(thread).
+    float* mutable_xform_src = t->buf_src.Row(thread);
+    for (size_t i = 0; i < xsize; ++i) {
+      const size_t x = xsize - i - 1;
+      mutable_xform_src[x * 3] = mutable_xform_src[x * 3 + 1] =
+          mutable_xform_src[x * 3 + 2] = xform_src[x];
+    }
+    xform_src = mutable_xform_src;
+  }
+#else
+  if (t->channels_src == 4 && !t->skip_lcms) {
+    // LCMS does CMYK in a weird way: 0 = white, 100 = max ink
+    float* mutable_xform_src = t->buf_src.Row(thread);
+    for (size_t x = 0; x < xsize * 4; ++x) {
+      mutable_xform_src[x] = 100.f - 100.f * mutable_xform_src[x];
+    }
+    xform_src = mutable_xform_src;
+  }
+#endif
+
+#if JXL_CMS_VERBOSE >= 2
+  // Save inputs for printing before in-place transforms overwrite them.
+  const float in0 = xform_src[3 * kX + 0];
+  const float in1 = xform_src[3 * kX + 1];
+  const float in2 = xform_src[3 * kX + 2];
+#endif
+
+  if (t->skip_lcms) {
+    if (buf_dst != xform_src) {
+      memcpy(buf_dst, xform_src, xsize * t->channels_src * sizeof(*buf_dst));
+    }  // else: in-place, no need to copy
+  } else {
+#if JPEGXL_ENABLE_SKCMS
+    JXL_CHECK(
+        skcms_Transform(xform_src,
+                        (t->channels_src == 4 ? skcms_PixelFormat_RGBA_ffff
+                                              : skcms_PixelFormat_RGB_fff),
+                        skcms_AlphaFormat_Opaque, &t->profile_src, buf_dst,
+                        skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+                        &t->profile_dst, xsize));
+#else   // JPEGXL_ENABLE_SKCMS
+    cmsDoTransform(t->lcms_transform, xform_src, buf_dst,
+                   static_cast<cmsUInt32Number>(xsize));
+#endif  // JPEGXL_ENABLE_SKCMS
+  }
+#if JXL_CMS_VERBOSE >= 2
+  printf("xform skip%d: %.4f %.4f %.4f (%p) -> (%p) %.4f %.4f %.4f\n",
+         t->skip_lcms, in0, in1, in2, xform_src, buf_dst, buf_dst[3 * kX],
+         buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+
+#if JPEGXL_ENABLE_SKCMS
+  if (t->channels_dst == 1 && !t->skip_lcms) {
+    // Contract back from 3 to 1 channel, this time forward.
+    float* grayscale_buf_dst = t->buf_dst.Row(thread);
+    for (size_t x = 0; x < xsize; ++x) {
+      grayscale_buf_dst[x] = buf_dst[x * 3];
+    }
+    buf_dst = grayscale_buf_dst;
+  }
+#endif
+
+  if (t->postprocess != ExtraTF::kNone) {
+    JXL_RETURN_IF_ERROR(AfterTransform(t, buf_dst, xsize * t->channels_dst));
+  }
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(DoColorSpaceTransform);
+int DoColorSpaceTransform(void* t, size_t thread, const float* buf_src,
+                          float* buf_dst, size_t xsize) {
+  return HWY_DYNAMIC_DISPATCH(DoColorSpaceTransform)(t, thread, buf_src,
+                                                     buf_dst, xsize);
+}
+
+// Define to 1 on OS X as a workaround for older LCMS lacking MD5.
+#define JXL_CMS_OLD_VERSION 0
+
+#if JPEGXL_ENABLE_SKCMS
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const float XYZ[3]) {
+  const float factor = 1.f / (XYZ[0] + XYZ[1] + XYZ[2]);
+  CIExy xy;
+  xy.x = XYZ[0] * factor;
+  xy.y = XYZ[1] * factor;
+  return xy;
+}
+
+#else  // JPEGXL_ENABLE_SKCMS
+// (LCMS interface requires xyY but we omit the Y for white points/primaries.)
+
+JXL_MUST_USE_RESULT CIExy CIExyFromxyY(const cmsCIExyY& xyY) {
+  CIExy xy;
+  xy.x = xyY.x;
+  xy.y = xyY.y;
+  return xy;
+}
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const cmsCIEXYZ& XYZ) {
+  cmsCIExyY xyY;
+  cmsXYZ2xyY(/*Dest=*/&xyY, /*Source=*/&XYZ);
+  return CIExyFromxyY(xyY);
+}
+
+JXL_MUST_USE_RESULT cmsCIEXYZ D50_XYZ() {
+  // Quantized D50 as stored in ICC profiles.
+  return {0.96420288, 1.0, 0.82490540};
+}
+
+// RAII
+
+struct ProfileDeleter {
+  void operator()(void* p) { cmsCloseProfile(p); }
+};
+using Profile = std::unique_ptr<void, ProfileDeleter>;
+
+struct TransformDeleter {
+  void operator()(void* p) { cmsDeleteTransform(p); }
+};
+using Transform = std::unique_ptr<void, TransformDeleter>;
+
+struct CurveDeleter {
+  void operator()(cmsToneCurve* p) { cmsFreeToneCurve(p); }
+};
+using Curve = std::unique_ptr<cmsToneCurve, CurveDeleter>;
+
+Status CreateProfileXYZ(const cmsContext context,
+                        Profile* JXL_RESTRICT profile) {
+  profile->reset(cmsCreateXYZProfileTHR(context));
+  if (profile->get() == nullptr) return JXL_FAILURE("Failed to create XYZ");
+  return true;
+}
+
+#endif  // !JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+// IMPORTANT: icc must outlive profile.
+Status DecodeProfile(const uint8_t* icc, size_t size,
+                     skcms_ICCProfile* const profile) {
+  if (!skcms_Parse(icc, size, profile)) {
+    return JXL_FAILURE("Failed to parse ICC profile with %" PRIuS " bytes",
+                       size);
+  }
+  return true;
+}
+#else  // JPEGXL_ENABLE_SKCMS
+Status DecodeProfile(const cmsContext context, Span<const uint8_t> icc,
+                     Profile* profile) {
+  profile->reset(cmsOpenProfileFromMemTHR(context, icc.data(), icc.size()));
+  if (profile->get() == nullptr) {
+    return JXL_FAILURE("Failed to decode profile");
+  }
+
+  // WARNING: due to the LCMS MD5 issue mentioned above, many existing
+  // profiles have incorrect MD5, so do not even bother checking them nor
+  // generating warning clutter.
+
+  return true;
+}
+#endif  // JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+
+ColorSpace ColorSpaceFromProfile(const skcms_ICCProfile& profile) {
+  switch (profile.data_color_space) {
+    case skcms_Signature_RGB:
+    case skcms_Signature_CMYK:
+      // spec says CMYK is encoded as RGB (the kBlack extra channel signals that
+      // it is actually CMYK)
+      return ColorSpace::kRGB;
+    case skcms_Signature_Gray:
+      return ColorSpace::kGray;
+    default:
+      return ColorSpace::kUnknown;
+  }
+}
+
+// vector_out := matmul(matrix, vector_in)
+void MatrixProduct(const skcms_Matrix3x3& matrix, const float vector_in[3],
+                   float vector_out[3]) {
+  for (int i = 0; i < 3; ++i) {
+    vector_out[i] = 0;
+    for (int j = 0; j < 3; ++j) {
+      vector_out[i] += matrix.vals[i][j] * vector_in[j];
+    }
+  }
+}
+
+// Returns white point that was specified when creating the profile.
+JXL_MUST_USE_RESULT Status UnadaptedWhitePoint(const skcms_ICCProfile& profile,
+                                               CIExy* out) {
+  float media_white_point_XYZ[3];
+  if (!skcms_GetWTPT(&profile, media_white_point_XYZ)) {
+    return JXL_FAILURE("ICC profile does not contain WhitePoint tag");
+  }
+  skcms_Matrix3x3 CHAD;
+  if (!skcms_GetCHAD(&profile, &CHAD)) {
+    // If there is no chromatic adaptation matrix, it means that the white point
+    // is already unadapted.
+    *out = CIExyFromXYZ(media_white_point_XYZ);
+    return true;
+  }
+  // Otherwise, it has been adapted to the PCS white point using said matrix,
+  // and the adaptation needs to be undone.
+  skcms_Matrix3x3 inverse_CHAD;
+  if (!skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD)) {
+    return JXL_FAILURE("Non-invertible ChromaticAdaptation matrix");
+  }
+  float unadapted_white_point_XYZ[3];
+  MatrixProduct(inverse_CHAD, media_white_point_XYZ, unadapted_white_point_XYZ);
+  *out = CIExyFromXYZ(unadapted_white_point_XYZ);
+  return true;
+}
+
+Status IdentifyPrimaries(const skcms_ICCProfile& profile,
+                         const CIExy& wp_unadapted, ColorEncoding* c) {
+  if (!c->HasPrimaries()) return true;
+
+  skcms_Matrix3x3 CHAD, inverse_CHAD;
+  if (skcms_GetCHAD(&profile, &CHAD)) {
+    JXL_RETURN_IF_ERROR(skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD));
+  } else {
+    static constexpr skcms_Matrix3x3 kLMSFromXYZ = {
+        {{0.8951, 0.2664, -0.1614},
+         {-0.7502, 1.7135, 0.0367},
+         {0.0389, -0.0685, 1.0296}}};
+    static constexpr skcms_Matrix3x3 kXYZFromLMS = {
+        {{0.9869929, -0.1470543, 0.1599627},
+         {0.4323053, 0.5183603, 0.0492912},
+         {-0.0085287, 0.0400428, 0.9684867}}};
+    static constexpr float kWpD50XYZ[3] = {0.96420288, 1.0, 0.82490540};
+    float wp_unadapted_XYZ[3];
+    JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(wp_unadapted, wp_unadapted_XYZ));
+    float wp_D50_LMS[3], wp_unadapted_LMS[3];
+    MatrixProduct(kLMSFromXYZ, kWpD50XYZ, wp_D50_LMS);
+    MatrixProduct(kLMSFromXYZ, wp_unadapted_XYZ, wp_unadapted_LMS);
+    inverse_CHAD = {{{wp_unadapted_LMS[0] / wp_D50_LMS[0], 0, 0},
+                     {0, wp_unadapted_LMS[1] / wp_D50_LMS[1], 0},
+                     {0, 0, wp_unadapted_LMS[2] / wp_D50_LMS[2]}}};
+    inverse_CHAD = skcms_Matrix3x3_concat(&kXYZFromLMS, &inverse_CHAD);
+    inverse_CHAD = skcms_Matrix3x3_concat(&inverse_CHAD, &kLMSFromXYZ);
+  }
+
+  float XYZ[3];
+  PrimariesCIExy primaries;
+  CIExy* const chromaticities[] = {&primaries.r, &primaries.g, &primaries.b};
+  for (int i = 0; i < 3; ++i) {
+    float RGB[3] = {};
+    RGB[i] = 1;
+    skcms_Transform(RGB, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+                    &profile, XYZ, skcms_PixelFormat_RGB_fff,
+                    skcms_AlphaFormat_Opaque, skcms_XYZD50_profile(), 1);
+    float unadapted_XYZ[3];
+    MatrixProduct(inverse_CHAD, XYZ, unadapted_XYZ);
+    *chromaticities[i] = CIExyFromXYZ(unadapted_XYZ);
+  }
+  return c->SetPrimaries(primaries);
+}
+
+void DetectTransferFunction(const skcms_ICCProfile& profile,
+                            ColorEncoding* JXL_RESTRICT c) {
+  if (c->tf.SetImplicit()) return;
+
+  float gamma[3] = {};
+  if (profile.has_trc) {
+    const auto IsGamma = [](const skcms_TransferFunction& tf) {
+      return tf.a == 1 && tf.b == 0 &&
+             /* if b and d are zero, it is fine for c not to be */ tf.d == 0 &&
+             tf.e == 0 && tf.f == 0;
+    };
+    for (int i = 0; i < 3; ++i) {
+      if (profile.trc[i].table_entries == 0 &&
+          IsGamma(profile.trc->parametric)) {
+        gamma[i] = 1.f / profile.trc->parametric.g;
+      } else {
+        skcms_TransferFunction approximate_tf;
+        float max_error;
+        if (skcms_ApproximateCurve(&profile.trc[i], &approximate_tf,
+                                   &max_error)) {
+          if (IsGamma(approximate_tf)) {
+            gamma[i] = 1.f / approximate_tf.g;
+          }
+        }
+      }
+    }
+  }
+  if (gamma[0] != 0 && std::abs(gamma[0] - gamma[1]) < 1e-4f &&
+      std::abs(gamma[1] - gamma[2]) < 1e-4f) {
+    if (c->tf.SetGamma(gamma[0])) {
+      skcms_ICCProfile profile_test;
+      PaddedBytes bytes;
+      if (MaybeCreateProfile(*c, &bytes) &&
+          DecodeProfile(bytes.data(), bytes.size(), &profile_test) &&
+          skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) {
+        return;
+      }
+    }
+  }
+
+  for (TransferFunction tf : Values<TransferFunction>()) {
+    // Can only create profile from known transfer function.
+    if (tf == TransferFunction::kUnknown) continue;
+
+    c->tf.SetTransferFunction(tf);
+
+    skcms_ICCProfile profile_test;
+    PaddedBytes bytes;
+    if (MaybeCreateProfile(*c, &bytes) &&
+        DecodeProfile(bytes.data(), bytes.size(), &profile_test) &&
+        skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) {
+      return;
+    }
+  }
+
+  c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+#else  // JPEGXL_ENABLE_SKCMS
+
+uint32_t Type32(const ColorEncoding& c, bool cmyk) {
+  if (cmyk) return TYPE_CMYK_FLT;
+  if (c.IsGray()) return TYPE_GRAY_FLT;
+  return TYPE_RGB_FLT;
+}
+
+uint32_t Type64(const ColorEncoding& c) {
+  if (c.IsGray()) return TYPE_GRAY_DBL;
+  return TYPE_RGB_DBL;
+}
+
+ColorSpace ColorSpaceFromProfile(const Profile& profile) {
+  switch (cmsGetColorSpace(profile.get())) {
+    case cmsSigRgbData:
+    case cmsSigCmykData:
+      return ColorSpace::kRGB;
+    case cmsSigGrayData:
+      return ColorSpace::kGray;
+    default:
+      return ColorSpace::kUnknown;
+  }
+}
+
+// "profile1" is pre-decoded to save time in DetectTransferFunction.
+Status ProfileEquivalentToICC(const cmsContext context, const Profile& profile1,
+                              const PaddedBytes& icc, const ColorEncoding& c) {
+  const uint32_t type_src = Type64(c);
+
+  Profile profile2;
+  JXL_RETURN_IF_ERROR(
+      DecodeProfile(context, Span<const uint8_t>(icc), &profile2));
+
+  Profile profile_xyz;
+  JXL_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile_xyz));
+
+  const uint32_t intent = INTENT_RELATIVE_COLORIMETRIC;
+  const uint32_t flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+                         cmsFLAGS_HIGHRESPRECALC;
+  Transform xform1(cmsCreateTransformTHR(context, profile1.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  Transform xform2(cmsCreateTransformTHR(context, profile2.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  if (xform1 == nullptr || xform2 == nullptr) {
+    return JXL_FAILURE("Failed to create transform");
+  }
+
+  double in[3];
+  double out1[3];
+  double out2[3];
+
+  // Uniformly spaced samples from very dark to almost fully bright.
+  const double init = 1E-3;
+  const double step = 0.2;
+
+  if (c.IsGray()) {
+    // Finer sampling and replicate each component.
+    for (in[0] = init; in[0] < 1.0; in[0] += step / 8) {
+      cmsDoTransform(xform1.get(), in, out1, 1);
+      cmsDoTransform(xform2.get(), in, out2, 1);
+      if (!ApproxEq(out1[0], out2[0], 2E-4)) {
+        return false;
+      }
+    }
+  } else {
+    for (in[0] = init; in[0] < 1.0; in[0] += step) {
+      for (in[1] = init; in[1] < 1.0; in[1] += step) {
+        for (in[2] = init; in[2] < 1.0; in[2] += step) {
+          cmsDoTransform(xform1.get(), in, out1, 1);
+          cmsDoTransform(xform2.get(), in, out2, 1);
+          for (size_t i = 0; i < 3; ++i) {
+            if (!ApproxEq(out1[i], out2[i], 2E-4)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// Returns white point that was specified when creating the profile.
+// NOTE: we can't just use cmsSigMediaWhitePointTag because its interpretation
+// differs between ICC versions.
+JXL_MUST_USE_RESULT cmsCIEXYZ UnadaptedWhitePoint(const cmsContext context,
+                                                  const Profile& profile,
+                                                  const ColorEncoding& c) {
+  const cmsCIEXYZ* white_point = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigMediaWhitePointTag));
+  if (white_point != nullptr &&
+      cmsReadTag(profile.get(), cmsSigChromaticAdaptationTag) == nullptr) {
+    // No chromatic adaptation matrix: the white point is already unadapted.
+    return *white_point;
+  }
+
+  cmsCIEXYZ XYZ = {1.0, 1.0, 1.0};
+  Profile profile_xyz;
+  if (!CreateProfileXYZ(context, &profile_xyz)) return XYZ;
+  // Array arguments are one per profile.
+  cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+  // Leave white point unchanged - that is what we're trying to extract.
+  cmsUInt32Number intents[2] = {INTENT_ABSOLUTE_COLORIMETRIC,
+                                INTENT_ABSOLUTE_COLORIMETRIC};
+  cmsBool black_compensation[2] = {0, 0};
+  cmsFloat64Number adaption[2] = {0.0, 0.0};
+  // Only transforming a single pixel, so skip expensive optimizations.
+  cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+  Transform xform(cmsCreateExtendedTransform(
+      context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+      Type64(c), TYPE_XYZ_DBL, flags));
+  if (!xform) return XYZ;  // TODO(lode): return error
+
+  // xy are relative, so magnitude does not matter if we ignore output Y.
+  const cmsFloat64Number in[3] = {1.0, 1.0, 1.0};
+  cmsDoTransform(xform.get(), in, &XYZ.X, 1);
+  return XYZ;
+}
+
+Status IdentifyPrimaries(const cmsContext context, const Profile& profile,
+                         const cmsCIEXYZ& wp_unadapted, ColorEncoding* c) {
+  if (!c->HasPrimaries()) return true;
+  if (ColorSpaceFromProfile(profile) == ColorSpace::kUnknown) return true;
+
+  // These were adapted to the profile illuminant before storing in the profile.
+  const cmsCIEXYZ* adapted_r = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigRedColorantTag));
+  const cmsCIEXYZ* adapted_g = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigGreenColorantTag));
+  const cmsCIEXYZ* adapted_b = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigBlueColorantTag));
+
+  cmsCIEXYZ converted_rgb[3];
+  if (adapted_r == nullptr || adapted_g == nullptr || adapted_b == nullptr) {
+    // No colorant tag, determine the XYZ coordinates of the primaries by
+    // converting from the colorspace.
+    Profile profile_xyz;
+    if (!CreateProfileXYZ(context, &profile_xyz)) {
+      return JXL_FAILURE("Failed to retrieve colorants");
+    }
+    // Array arguments are one per profile.
+    cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+    cmsUInt32Number intents[2] = {INTENT_RELATIVE_COLORIMETRIC,
+                                  INTENT_RELATIVE_COLORIMETRIC};
+    cmsBool black_compensation[2] = {0, 0};
+    cmsFloat64Number adaption[2] = {0.0, 0.0};
+    // Only transforming three pixels, so skip expensive optimizations.
+    cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+    Transform xform(cmsCreateExtendedTransform(
+        context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+        Type64(*c), TYPE_XYZ_DBL, flags));
+    if (!xform) return JXL_FAILURE("Failed to retrieve colorants");
+
+    const cmsFloat64Number in[9] = {1.0, 0.0, 0.0, 0.0, 1.0,
+                                    0.0, 0.0, 0.0, 1.0};
+    cmsDoTransform(xform.get(), in, &converted_rgb->X, 3);
+    adapted_r = &converted_rgb[0];
+    adapted_g = &converted_rgb[1];
+    adapted_b = &converted_rgb[2];
+  }
+
+  // TODO(janwas): no longer assume Bradford and D50.
+  // Undo the chromatic adaptation.
+  const cmsCIEXYZ d50 = D50_XYZ();
+
+  cmsCIEXYZ r, g, b;
+  cmsAdaptToIlluminant(&r, &d50, &wp_unadapted, adapted_r);
+  cmsAdaptToIlluminant(&g, &d50, &wp_unadapted, adapted_g);
+  cmsAdaptToIlluminant(&b, &d50, &wp_unadapted, adapted_b);
+
+  const PrimariesCIExy rgb = {CIExyFromXYZ(r), CIExyFromXYZ(g),
+                              CIExyFromXYZ(b)};
+  return c->SetPrimaries(rgb);
+}
+
+void DetectTransferFunction(const cmsContext context, const Profile& profile,
+                            ColorEncoding* JXL_RESTRICT c) {
+  if (c->tf.SetImplicit()) return;
+
+  float gamma = 0;
+  if (const auto* gray_trc = reinterpret_cast<const cmsToneCurve*>(
+          cmsReadTag(profile.get(), cmsSigGrayTRCTag))) {
+    const double estimated_gamma =
+        cmsEstimateGamma(gray_trc, /*precision=*/1e-4);
+    if (estimated_gamma > 0) {
+      gamma = 1. / estimated_gamma;
+    }
+  } else {
+    float rgb_gamma[3] = {};
+    int i = 0;
+    for (const auto tag :
+         {cmsSigRedTRCTag, cmsSigGreenTRCTag, cmsSigBlueTRCTag}) {
+      if (const auto* trc = reinterpret_cast<const cmsToneCurve*>(
+              cmsReadTag(profile.get(), tag))) {
+        const double estimated_gamma =
+            cmsEstimateGamma(trc, /*precision=*/1e-4);
+        if (estimated_gamma > 0) {
+          rgb_gamma[i] = 1. / estimated_gamma;
+        }
+      }
+      ++i;
+    }
+    if (rgb_gamma[0] != 0 && std::abs(rgb_gamma[0] - rgb_gamma[1]) < 1e-4f &&
+        std::abs(rgb_gamma[1] - rgb_gamma[2]) < 1e-4f) {
+      gamma = rgb_gamma[0];
+    }
+  }
+
+  if (gamma != 0 && c->tf.SetGamma(gamma)) {
+    PaddedBytes icc_test;
+    if (MaybeCreateProfile(*c, &icc_test) &&
+        ProfileEquivalentToICC(context, profile, icc_test, *c)) {
+      return;
+    }
+  }
+
+  for (TransferFunction tf : Values<TransferFunction>()) {
+    // Can only create profile from known transfer function.
+    if (tf == TransferFunction::kUnknown) continue;
+
+    c->tf.SetTransferFunction(tf);
+
+    PaddedBytes icc_test;
+    if (MaybeCreateProfile(*c, &icc_test) &&
+        ProfileEquivalentToICC(context, profile, icc_test, *c)) {
+      return;
+    }
+  }
+
+  c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+void ErrorHandler(cmsContext context, cmsUInt32Number code, const char* text) {
+  JXL_WARNING("LCMS error %u: %s", code, text);
+}
+
+// Returns a context for the current thread, creating it if necessary.
+cmsContext GetContext() {
+  static thread_local void* context_;
+  if (context_ == nullptr) {
+    context_ = cmsCreateContext(nullptr, nullptr);
+    JXL_ASSERT(context_ != nullptr);
+
+    cmsSetLogErrorHandlerTHR(static_cast<cmsContext>(context_), &ErrorHandler);
+  }
+  return static_cast<cmsContext>(context_);
+}
+
+#endif  // JPEGXL_ENABLE_SKCMS
+
+Status GetPrimariesLuminances(const ColorEncoding& encoding,
+                              float luminances[3]) {
+  // Explanation:
+  // We know that the three primaries must sum to white:
+  //
+  // [Xr, Xg, Xb;     [1;     [Xw;
+  //  Yr, Yg, Yb;  ×   1;  =   Yw;
+  //  Zr, Zg, Zb]      1]      Zw]
+  //
+  // By noting that X = x·(X+Y+Z), Y = y·(X+Y+Z) and Z = z·(X+Y+Z) (note the
+  // lower case indicating chromaticity), and factoring the totals (X+Y+Z) out
+  // of the left matrix and into the all-ones vector, we get:
+  //
+  // [xr, xg, xb;     [Xr + Yr + Zr;     [Xw;
+  //  yr, yg, yb;  ×   Xg + Yg + Zg;  =   Yw;
+  //  zr, zg, zb]      Xb + Yb + Zb]      Zw]
+  //
+  // Which makes it apparent that we can compute those totals as:
+  //
+  //                  [Xr + Yr + Zr;     inv([xr, xg, xb;      [Xw;
+  //                   Xg + Yg + Zg;  =       yr, yg, yb;   ×   Yw;
+  //                   Xb + Yb + Zb]          zr, zg, zb])      Zw]
+  //
+  // From there, by multiplying each total by its corresponding y, we get Y for
+  // that primary.
+
+  float white_XYZ[3];
+  JXL_RETURN_IF_ERROR(
+      CIEXYZFromWhiteCIExy(encoding.GetWhitePoint(), white_XYZ));
+
+  const PrimariesCIExy primaries = encoding.GetPrimaries();
+  double chromaticities[3][3] = {
+      {primaries.r.x, primaries.g.x, primaries.b.x},
+      {primaries.r.y, primaries.g.y, primaries.b.y},
+      {1 - primaries.r.x - primaries.r.y, 1 - primaries.g.x - primaries.g.y,
+       1 - primaries.b.x - primaries.b.y}};
+  JXL_RETURN_IF_ERROR(Inv3x3Matrix(&chromaticities[0][0]));
+  const double ys[3] = {primaries.r.y, primaries.g.y, primaries.b.y};
+  for (size_t i = 0; i < 3; ++i) {
+    luminances[i] = ys[i] * (chromaticities[i][0] * white_XYZ[0] +
+                             chromaticities[i][1] * white_XYZ[1] +
+                             chromaticities[i][2] * white_XYZ[2]);
+  }
+  return true;
+}
+
+Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize,
+                    bool forward) {
+  if (295 <= t->intensity_target && t->intensity_target <= 305) {
+    // The gamma is approximately 1 so this can essentially be skipped.
+    return true;
+  }
+  float gamma = 1.2f * std::pow(1.111f, std::log2(t->intensity_target * 1e-3f));
+  if (!forward) gamma = 1.f / gamma;
+
+  switch (t->hlg_ootf_num_channels) {
+    case 1:
+      for (size_t x = 0; x < xsize; ++x) {
+        buf[x] = std::pow(buf[x], gamma);
+      }
+      break;
+
+    case 3:
+      for (size_t x = 0; x < xsize; x += 3) {
+        const float luminance = buf[x] * t->hlg_ootf_luminances[0] +
+                                buf[x + 1] * t->hlg_ootf_luminances[1] +
+                                buf[x + 2] * t->hlg_ootf_luminances[2];
+        const float ratio = std::pow(luminance, gamma - 1);
+        if (std::isfinite(ratio)) {
+          buf[x] *= ratio;
+          buf[x + 1] *= ratio;
+          buf[x + 2] *= ratio;
+          if (forward && gamma < 1) {
+            // If gamma < 1, the ratio above will be > 1 which can push bright
+            // saturated highlights out of gamut. There are several possible
+            // ways to bring them back in-gamut; this one preserves hue and
+            // saturation at the slight expense of luminance. If !forward, the
+            // previously-applied forward OOTF with gamma > 1 already pushed
+            // those highlights down and we are simply putting them back where
+            // they were so this is not necessary.
+            const float maximum =
+                std::max(buf[x], std::max(buf[x + 1], buf[x + 2]));
+            if (maximum > 1) {
+              const float normalizer = 1.f / maximum;
+              buf[x] *= normalizer;
+              buf[x + 1] *= normalizer;
+              buf[x + 2] *= normalizer;
+            }
+          }
+        }
+      }
+      break;
+
+    default:
+      return JXL_FAILURE("HLG OOTF not implemented for %" PRIuS " channels",
+                         t->hlg_ootf_num_channels);
+  }
+  return true;
+}
+
+bool ApplyCICP(const uint8_t color_primaries,
+               const uint8_t transfer_characteristics,
+               const uint8_t matrix_coefficients, const uint8_t full_range,
+               ColorEncoding* JXL_RESTRICT c) {
+  if (matrix_coefficients != 0) return false;
+  if (full_range != 1) return false;
+
+  const auto primaries = static_cast<Primaries>(color_primaries);
+  const auto tf = static_cast<TransferFunction>(transfer_characteristics);
+  if (tf == TransferFunction::kUnknown || !EnumValid(tf)) return false;
+  if (primaries == Primaries::kCustom ||
+      !(color_primaries == 12 || EnumValid(primaries))) {
+    return false;
+  }
+  c->SetColorSpace(ColorSpace::kRGB);
+  c->tf.SetTransferFunction(tf);
+  if (primaries == Primaries::kP3) {
+    c->white_point = WhitePoint::kDCI;
+    c->primaries = Primaries::kP3;
+  } else if (color_primaries == 12) {
+    c->white_point = WhitePoint::kD65;
+    c->primaries = Primaries::kP3;
+  } else {
+    c->white_point = WhitePoint::kD65;
+    c->primaries = primaries;
+  }
+  return true;
+}
+
+JXL_BOOL JxlCmsSetFieldsFromICC(void* user_data, const uint8_t* icc_data,
+                                size_t icc_size, JxlColorEncoding* c,
+                                JXL_BOOL* cmyk) {
+  if (c == nullptr) return JXL_FALSE;
+  if (cmyk == nullptr) return JXL_FALSE;
+
+  *cmyk = JXL_FALSE;
+
+  // In case parsing fails, mark the ColorEncoding as invalid.
+  c->color_space = JXL_COLOR_SPACE_UNKNOWN;
+  c->transfer_function = JXL_TRANSFER_FUNCTION_UNKNOWN;
+
+  if (icc_size == 0) return JXL_FAILURE("Empty ICC profile");
+
+  ColorEncoding c_enc;
+
+#if JPEGXL_ENABLE_SKCMS
+  if (icc_size < 128) {
+    return JXL_FAILURE("ICC file too small");
+  }
+
+  skcms_ICCProfile profile;
+  JXL_RETURN_IF_ERROR(skcms_Parse(icc_data, icc_size, &profile));
+
+  // skcms does not return the rendering intent, so get it from the file. It
+  // is encoded as big-endian 32-bit integer in bytes 60..63.
+  uint32_t rendering_intent32 = icc_data[67];
+  if (rendering_intent32 > 3 || icc_data[64] != 0 || icc_data[65] != 0 ||
+      icc_data[66] != 0) {
+    return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+  }
+  // ICC and RenderingIntent have the same values (0..3).
+  c_enc.rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+
+  if (profile.has_CICP &&
+      ApplyCICP(profile.CICP.color_primaries,
+                profile.CICP.transfer_characteristics,
+                profile.CICP.matrix_coefficients,
+                profile.CICP.video_full_range_flag, &c_enc)) {
+    ConvertInternalToExternalColorEncoding(c_enc, c);
+    return true;
+  }
+
+  c_enc.SetColorSpace(ColorSpaceFromProfile(profile));
+  *cmyk = (profile.data_color_space == skcms_Signature_CMYK);
+
+  CIExy wp_unadapted;
+  JXL_RETURN_IF_ERROR(UnadaptedWhitePoint(profile, &wp_unadapted));
+  JXL_RETURN_IF_ERROR(c_enc.SetWhitePoint(wp_unadapted));
+
+  // Relies on color_space.
+  JXL_RETURN_IF_ERROR(IdentifyPrimaries(profile, wp_unadapted, &c_enc));
+
+  // Relies on color_space/white point/primaries being set already.
+  DetectTransferFunction(profile, &c_enc);
+#else  // JPEGXL_ENABLE_SKCMS
+
+  const cmsContext context = GetContext();
+
+  Profile profile;
+  JXL_RETURN_IF_ERROR(DecodeProfile(
+      context, Span<const uint8_t>(icc_data, icc_size), &profile));
+
+  const cmsUInt32Number rendering_intent32 =
+      cmsGetHeaderRenderingIntent(profile.get());
+  if (rendering_intent32 > 3) {
+    return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+  }
+  // ICC and RenderingIntent have the same values (0..3).
+  c_enc.rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+
+  static constexpr size_t kCICPSize = 12;
+  static constexpr auto kCICPSignature =
+      static_cast<cmsTagSignature>(0x63696370);
+  uint8_t cicp_buffer[kCICPSize];
+  if (cmsReadRawTag(profile.get(), kCICPSignature, cicp_buffer, kCICPSize) ==
+          kCICPSize &&
+      ApplyCICP(cicp_buffer[8], cicp_buffer[9], cicp_buffer[10],
+                cicp_buffer[11], &c_enc)) {
+    ConvertInternalToExternalColorEncoding(c_enc, c);
+    return true;
+  }
+
+  c_enc.SetColorSpace(ColorSpaceFromProfile(profile));
+  if (cmsGetColorSpace(profile.get()) == cmsSigCmykData) {
+    *cmyk = JXL_TRUE;
+    ConvertInternalToExternalColorEncoding(c_enc, c);
+    return true;
+  }
+
+  const cmsCIEXYZ wp_unadapted = UnadaptedWhitePoint(context, profile, c_enc);
+  JXL_RETURN_IF_ERROR(c_enc.SetWhitePoint(CIExyFromXYZ(wp_unadapted)));
+
+  // Relies on color_space.
+  JXL_RETURN_IF_ERROR(
+      IdentifyPrimaries(context, profile, wp_unadapted, &c_enc));
+
+  // Relies on color_space/white point/primaries being set already.
+  DetectTransferFunction(context, profile, &c_enc);
+
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  ConvertInternalToExternalColorEncoding(c_enc, c);
+  return true;
+}
+
+}  // namespace
+
+namespace {
+
+void JxlCmsDestroy(void* cms_data) {
+  if (cms_data == nullptr) return;
+  JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+#if !JPEGXL_ENABLE_SKCMS
+  TransformDeleter()(t->lcms_transform);
+#endif
+  delete t;
+}
+
+void* JxlCmsInit(void* init_data, size_t num_threads, size_t xsize,
+                 const JxlColorProfile* input, const JxlColorProfile* output,
+                 float intensity_target) {
+  auto cms = static_cast<const JxlCmsInterface*>(init_data);
+  auto t = jxl::make_unique<JxlCms>();
+  PaddedBytes icc_src, icc_dst;
+  icc_src.assign(input->icc.data, input->icc.data + input->icc.size);
+  ColorEncoding c_src;
+  if (!c_src.SetICC(std::move(icc_src), cms)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse input ICC");
+    return nullptr;
+  }
+  icc_dst.assign(output->icc.data, output->icc.data + output->icc.size);
+  ColorEncoding c_dst;
+  if (!c_dst.SetICC(std::move(icc_dst), cms)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse output ICC");
+    return nullptr;
+  }
+#if JXL_CMS_VERBOSE
+  printf("%s -> %s\n", Description(c_src).c_str(), Description(c_dst).c_str());
+#endif
+
+#if JPEGXL_ENABLE_SKCMS
+  if (!DecodeProfile(input->icc.data, input->icc.size, &t->profile_src)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse input ICC");
+    return nullptr;
+  }
+  if (!DecodeProfile(output->icc.data, output->icc.size, &t->profile_dst)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse output ICC");
+    return nullptr;
+  }
+#else   // JPEGXL_ENABLE_SKCMS
+  const cmsContext context = GetContext();
+  Profile profile_src, profile_dst;
+  if (!DecodeProfile(context, Span<const uint8_t>(c_src.ICC()), &profile_src)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse input ICC");
+    return nullptr;
+  }
+  if (!DecodeProfile(context, Span<const uint8_t>(c_dst.ICC()), &profile_dst)) {
+    JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse output ICC");
+    return nullptr;
+  }
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  t->skip_lcms = false;
+  if (c_src.SameColorEncoding(c_dst)) {
+    t->skip_lcms = true;
+#if JXL_CMS_VERBOSE
+    printf("Skip CMS\n");
+#endif
+  }
+
+  t->apply_hlg_ootf = c_src.tf.IsHLG() != c_dst.tf.IsHLG();
+  if (t->apply_hlg_ootf) {
+    const ColorEncoding* c_hlg = c_src.tf.IsHLG() ? &c_src : &c_dst;
+    t->hlg_ootf_num_channels = c_hlg->Channels();
+    if (t->hlg_ootf_num_channels == 3 &&
+        !GetPrimariesLuminances(*c_hlg, t->hlg_ootf_luminances.data())) {
+      JXL_NOTIFY_ERROR(
+          "JxlCmsInit: failed to compute the luminances of primaries");
+      return nullptr;
+    }
+  }
+
+  // Special-case SRGB <=> linear if the primaries / white point are the same,
+  // or any conversion where PQ or HLG is involved:
+  bool src_linear = c_src.tf.IsLinear();
+  const bool dst_linear = c_dst.tf.IsLinear();
+
+  if (c_src.tf.IsPQ() || c_src.tf.IsHLG() ||
+      (c_src.tf.IsSRGB() && dst_linear && c_src.SameColorSpace(c_dst))) {
+    // Construct new profile as if the data were already/still linear.
+    ColorEncoding c_linear_src = c_src;
+    c_linear_src.tf.SetTransferFunction(TransferFunction::kLinear);
+#if JPEGXL_ENABLE_SKCMS
+    skcms_ICCProfile new_src;
+#else  // JPEGXL_ENABLE_SKCMS
+    Profile new_src;
+#endif  // JPEGXL_ENABLE_SKCMS
+        // Only enable ExtraTF if profile creation succeeded.
+    if (MaybeCreateProfile(c_linear_src, &icc_src) &&
+#if JPEGXL_ENABLE_SKCMS
+        DecodeProfile(icc_src.data(), icc_src.size(), &new_src)) {
+#else   // JPEGXL_ENABLE_SKCMS
+        DecodeProfile(context, Span<const uint8_t>(icc_src), &new_src)) {
+#endif  // JPEGXL_ENABLE_SKCMS
+#if JXL_CMS_VERBOSE
+      printf("Special HLG/PQ/sRGB -> linear\n");
+#endif
+#if JPEGXL_ENABLE_SKCMS
+      t->icc_src = std::move(icc_src);
+      t->profile_src = new_src;
+#else   // JPEGXL_ENABLE_SKCMS
+      profile_src.swap(new_src);
+#endif  // JPEGXL_ENABLE_SKCMS
+      t->preprocess = c_src.tf.IsSRGB()
+                          ? ExtraTF::kSRGB
+                          : (c_src.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+      c_src = c_linear_src;
+      src_linear = true;
+    } else {
+      if (t->apply_hlg_ootf) {
+        JXL_NOTIFY_ERROR(
+            "Failed to create extra linear source profile, and HLG OOTF "
+            "required");
+        return nullptr;
+      }
+      JXL_WARNING("Failed to create extra linear destination profile");
+    }
+  }
+
+  if (c_dst.tf.IsPQ() || c_dst.tf.IsHLG() ||
+      (c_dst.tf.IsSRGB() && src_linear && c_src.SameColorSpace(c_dst))) {
+    ColorEncoding c_linear_dst = c_dst;
+    c_linear_dst.tf.SetTransferFunction(TransferFunction::kLinear);
+#if JPEGXL_ENABLE_SKCMS
+    skcms_ICCProfile new_dst;
+#else   // JPEGXL_ENABLE_SKCMS
+    Profile new_dst;
+#endif  // JPEGXL_ENABLE_SKCMS
+    // Only enable ExtraTF if profile creation succeeded.
+    if (MaybeCreateProfile(c_linear_dst, &icc_dst) &&
+#if JPEGXL_ENABLE_SKCMS
+        DecodeProfile(icc_dst.data(), icc_dst.size(), &new_dst)) {
+#else   // JPEGXL_ENABLE_SKCMS
+        DecodeProfile(context, Span<const uint8_t>(icc_dst), &new_dst)) {
+#endif  // JPEGXL_ENABLE_SKCMS
+#if JXL_CMS_VERBOSE
+      printf("Special linear -> HLG/PQ/sRGB\n");
+#endif
+#if JPEGXL_ENABLE_SKCMS
+      t->icc_dst = std::move(icc_dst);
+      t->profile_dst = new_dst;
+#else   // JPEGXL_ENABLE_SKCMS
+      profile_dst.swap(new_dst);
+#endif  // JPEGXL_ENABLE_SKCMS
+      t->postprocess = c_dst.tf.IsSRGB()
+                           ? ExtraTF::kSRGB
+                           : (c_dst.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+      c_dst = c_linear_dst;
+    } else {
+      if (t->apply_hlg_ootf) {
+        JXL_NOTIFY_ERROR(
+            "Failed to create extra linear destination profile, and inverse "
+            "HLG OOTF required");
+        return nullptr;
+      }
+      JXL_WARNING("Failed to create extra linear destination profile");
+    }
+  }
+
+  if (c_src.SameColorEncoding(c_dst)) {
+#if JXL_CMS_VERBOSE
+    printf("Same intermediary linear profiles, skipping CMS\n");
+#endif
+    t->skip_lcms = true;
+  }
+
+#if JPEGXL_ENABLE_SKCMS
+  if (!skcms_MakeUsableAsDestination(&t->profile_dst)) {
+    JXL_NOTIFY_ERROR(
+        "Failed to make %s usable as a color transform destination",
+        Description(c_dst).c_str());
+    return nullptr;
+  }
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  // Not including alpha channel (copied separately).
+  const size_t channels_src = (c_src.IsCMYK() ? 4 : c_src.Channels());
+  const size_t channels_dst = c_dst.Channels();
+  JXL_CHECK(channels_src == channels_dst ||
+            (channels_src == 4 && channels_dst == 3));
+#if JXL_CMS_VERBOSE
+  printf("Channels: %" PRIuS "; Threads: %" PRIuS "\n", channels_src,
+         num_threads);
+#endif
+
+#if !JPEGXL_ENABLE_SKCMS
+  // Type includes color space (XYZ vs RGB), so can be different.
+  const uint32_t type_src = Type32(c_src, channels_src == 4);
+  const uint32_t type_dst = Type32(c_dst, false);
+  const uint32_t intent = static_cast<uint32_t>(c_dst.rendering_intent);
+  // Use cmsFLAGS_NOCACHE to disable the 1-pixel cache and make calling
+  // cmsDoTransform() thread-safe.
+  const uint32_t flags = cmsFLAGS_NOCACHE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+                         cmsFLAGS_HIGHRESPRECALC;
+  t->lcms_transform =
+      cmsCreateTransformTHR(context, profile_src.get(), type_src,
+                            profile_dst.get(), type_dst, intent, flags);
+  if (t->lcms_transform == nullptr) {
+    JXL_NOTIFY_ERROR("Failed to create transform");
+    return nullptr;
+  }
+#endif  // !JPEGXL_ENABLE_SKCMS
+
+  // Ideally LCMS would convert directly from External to Image3. However,
+  // cmsDoTransformLineStride only accepts 32-bit BytesPerPlaneIn, whereas our
+  // planes can be more than 4 GiB apart. Hence, transform inputs/outputs must
+  // be interleaved. Calling cmsDoTransform for each pixel is expensive
+  // (indirect call). We therefore transform rows, which requires per-thread
+  // buffers. To avoid separate allocations, we use the rows of an image.
+  // Because LCMS apparently also cannot handle <= 16 bit inputs and 32-bit
+  // outputs (or vice versa), we use floating point input/output.
+  t->channels_src = channels_src;
+  t->channels_dst = channels_dst;
+#if JPEGXL_ENABLE_SKCMS
+  // SkiaCMS doesn't support grayscale float buffers, so we create space for RGB
+  // float buffers anyway.
+  t->buf_src = ImageF(xsize * (channels_src == 4 ? 4 : 3), num_threads);
+  t->buf_dst = ImageF(xsize * 3, num_threads);
+#else
+  t->buf_src = ImageF(xsize * channels_src, num_threads);
+  t->buf_dst = ImageF(xsize * channels_dst, num_threads);
+#endif
+  t->intensity_target = intensity_target;
+  return t.release();
+}
+
+float* JxlCmsGetSrcBuf(void* cms_data, size_t thread) {
+  JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+  return t->buf_src.Row(thread);
+}
+
+float* JxlCmsGetDstBuf(void* cms_data, size_t thread) {
+  JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+  return t->buf_dst.Row(thread);
+}
+
+}  // namespace
+
+const JxlCmsInterface& GetJxlCms() {
+  static constexpr JxlCmsInterface kInterface = {
+      /*set_fields_data=*/nullptr,
+      /*set_fields_from_icc=*/&JxlCmsSetFieldsFromICC,
+      /*init_data=*/const_cast<void*>(static_cast<const void*>(&kInterface)),
+      /*init=*/&JxlCmsInit,
+      /*get_src_buf=*/&JxlCmsGetSrcBuf,
+      /*get_dst_buf=*/&JxlCmsGetDstBuf,
+      /*run=*/&DoColorSpaceTransform,
+      /*destroy=*/&JxlCmsDestroy};
+  return kInterface;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h
new file mode 100644
index 0000000000..6f6e9023a6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Internal C++ wrapper for a JxlCmsInterface.
+class ColorSpaceTransform {
+ public:
+  explicit ColorSpaceTransform(const JxlCmsInterface& cms) : cms_(cms) {}
+  ~ColorSpaceTransform() {
+    if (cms_data_ != nullptr) {
+      cms_.destroy(cms_data_);
+    }
+  }
+
+  // Cannot copy.
+  ColorSpaceTransform(const ColorSpaceTransform&) = delete;
+  ColorSpaceTransform& operator=(const ColorSpaceTransform&) = delete;
+
+  Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst,
+              float intensity_target, size_t xsize, size_t num_threads) {
+    xsize_ = xsize;
+    JxlColorProfile input_profile;
+    icc_src_ = c_src.ICC();
+    input_profile.icc.data = icc_src_.data();
+    input_profile.icc.size = icc_src_.size();
+    ConvertInternalToExternalColorEncoding(c_src,
+                                           &input_profile.color_encoding);
+    input_profile.num_channels = c_src.IsCMYK() ? 4 : c_src.Channels();
+    JxlColorProfile output_profile;
+    icc_dst_ = c_dst.ICC();
+    output_profile.icc.data = icc_dst_.data();
+    output_profile.icc.size = icc_dst_.size();
+    ConvertInternalToExternalColorEncoding(c_dst,
+                                           &output_profile.color_encoding);
+    if (c_dst.IsCMYK())
+      return JXL_FAILURE("Conversion to CMYK is not supported");
+    output_profile.num_channels = c_dst.Channels();
+    cms_data_ = cms_.init(cms_.init_data, num_threads, xsize, &input_profile,
+                          &output_profile, intensity_target);
+    JXL_RETURN_IF_ERROR(cms_data_ != nullptr);
+    return true;
+  }
+
+  float* BufSrc(const size_t thread) const {
+    return cms_.get_src_buf(cms_data_, thread);
+  }
+
+  float* BufDst(const size_t thread) const {
+    return cms_.get_dst_buf(cms_data_, thread);
+  }
+
+  Status Run(const size_t thread, const float* buf_src, float* buf_dst) {
+    return cms_.run(cms_data_, thread, buf_src, buf_dst, xsize_);
+  }
+
+ private:
+  JxlCmsInterface cms_;
+  void* cms_data_ = nullptr;
+  // The interface may retain pointers into these.
+  PaddedBytes icc_src_;
+  PaddedBytes icc_dst_;
+  size_t xsize_;
+};
+
+const JxlCmsInterface& GetJxlCms();
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COLOR_MANAGEMENT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc
new file mode 100644
index 0000000000..79989e2db4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc
@@ -0,0 +1,128 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_comparator.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/enc_image_bundle.h"
+
+namespace jxl {
+namespace {
+
+// color is linear, but blending happens in gamma-compressed space using
+// (gamma-compressed) grayscale background color, alpha image represents
+// weights of the sRGB colors in the [0 .. (1 << bit_depth) - 1] interval,
+// output image is in linear space.
+void AlphaBlend(const Image3F& in, const size_t c, float background_linear,
+                const ImageF& alpha, Image3F* out) {
+  const float background = LinearToSrgb8Direct(background_linear);
+
+  for (size_t y = 0; y < out->ysize(); ++y) {
+    const float* JXL_RESTRICT row_a = alpha.ConstRow(y);
+    const float* JXL_RESTRICT row_i = in.ConstPlaneRow(c, y);
+    float* JXL_RESTRICT row_o = out->PlaneRow(c, y);
+    for (size_t x = 0; x < out->xsize(); ++x) {
+      const float a = row_a[x];
+      if (a <= 0.f) {
+        row_o[x] = background_linear;
+      } else if (a >= 1.f) {
+        row_o[x] = row_i[x];
+      } else {
+        const float w_fg = a;
+        const float w_bg = 1.0f - w_fg;
+        const float fg = w_fg * LinearToSrgb8Direct(row_i[x]);
+        const float bg = w_bg * background;
+        row_o[x] = Srgb8ToLinearDirect(fg + bg);
+      }
+    }
+  }
+}
+
+void AlphaBlend(float background_linear, ImageBundle* io_linear_srgb) {
+  // No alpha => all opaque.
+  if (!io_linear_srgb->HasAlpha()) return;
+
+  for (size_t c = 0; c < 3; ++c) {
+    AlphaBlend(*io_linear_srgb->color(), c, background_linear,
+               *io_linear_srgb->alpha(), io_linear_srgb->color());
+  }
+}
+
+float ComputeScoreImpl(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                       Comparator* comparator, ImageF* distmap) {
+  JXL_CHECK(comparator->SetReferenceImage(rgb0));
+  float score;
+  JXL_CHECK(comparator->CompareWith(rgb1, distmap, &score));
+  return score;
+}
+
+}  // namespace
+
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                   Comparator* comparator, const JxlCmsInterface& cms,
+                   ImageF* diffmap, ThreadPool* pool, bool ignore_alpha) {
+  // Convert to linear sRGB (unless already in that space)
+  ImageMetadata metadata0 = *rgb0.metadata();
+  ImageBundle store0(&metadata0);
+  const ImageBundle* linear_srgb0;
+  JXL_CHECK(TransformIfNeeded(rgb0, ColorEncoding::LinearSRGB(rgb0.IsGray()),
+                              cms, pool, &store0, &linear_srgb0));
+  ImageMetadata metadata1 = *rgb1.metadata();
+  ImageBundle store1(&metadata1);
+  const ImageBundle* linear_srgb1;
+  JXL_CHECK(TransformIfNeeded(rgb1, ColorEncoding::LinearSRGB(rgb1.IsGray()),
+                              cms, pool, &store1, &linear_srgb1));
+
+  // No alpha: skip blending, only need a single call to Butteraugli.
+  if (ignore_alpha || (!rgb0.HasAlpha() && !rgb1.HasAlpha())) {
+    return ComputeScoreImpl(*linear_srgb0, *linear_srgb1, comparator, diffmap);
+  }
+
+  // Blend on black and white backgrounds
+
+  const float black = 0.0f;
+  ImageBundle blended_black0 = linear_srgb0->Copy();
+  ImageBundle blended_black1 = linear_srgb1->Copy();
+  AlphaBlend(black, &blended_black0);
+  AlphaBlend(black, &blended_black1);
+
+  const float white = 1.0f;
+  ImageBundle blended_white0 = linear_srgb0->Copy();
+  ImageBundle blended_white1 = linear_srgb1->Copy();
+
+  AlphaBlend(white, &blended_white0);
+  AlphaBlend(white, &blended_white1);
+
+  ImageF diffmap_black, diffmap_white;
+  const float dist_black = ComputeScoreImpl(blended_black0, blended_black1,
+                                            comparator, &diffmap_black);
+  const float dist_white = ComputeScoreImpl(blended_white0, blended_white1,
+                                            comparator, &diffmap_white);
+
+  // diffmap and return values are the max of diffmap_black/white.
+  if (diffmap != nullptr) {
+    const size_t xsize = rgb0.xsize();
+    const size_t ysize = rgb0.ysize();
+    *diffmap = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* JXL_RESTRICT row_black = diffmap_black.ConstRow(y);
+      const float* JXL_RESTRICT row_white = diffmap_white.ConstRow(y);
+      float* JXL_RESTRICT row_out = diffmap->Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = std::max(row_black[x], row_white[x]);
+      }
+    }
+  }
+  return std::max(dist_black, dist_white);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h
new file mode 100644
index 0000000000..c545ea6111
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COMPARATOR_H_
+#define LIB_JXL_ENC_COMPARATOR_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class Comparator {
+ public:
+  virtual ~Comparator() = default;
+
+  // Sets the reference image, the first to compare
+  // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+  // the range from standard black point to standard white point, but values
+  // outside permitted.
+  virtual Status SetReferenceImage(const ImageBundle& ref) = 0;
+
+  // Sets the actual image (with loss), the second to compare
+  // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+  // the range from standard black point to standard white point, but values
+  // outside permitted.
+  // In diffmap it outputs the local score per pixel, while in score it outputs
+  // a single score. Any one may be set to nullptr to not compute it.
+  virtual Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+                             float* score) = 0;
+
+  // Quality thresholds for diffmap and score values.
+  // The good score must represent a value where the images are considered to
+  // be perceptually indistinguishable (but not identical)
+  // The bad value must be larger than good to indicate "lower means better"
+  // and smaller than good to indicate "higher means better"
+  virtual float GoodQualityScore() const = 0;
+  virtual float BadQualityScore() const = 0;
+};
+
+// Computes the score given images in any RGB color model, optionally with
+// alpha channel.
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                   Comparator* comparator, const JxlCmsInterface& cms,
+                   ImageF* diffmap = nullptr, ThreadPool* pool = nullptr,
+                   bool ignore_alpha = false);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COMPARATOR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc
new file mode 100644
index 0000000000..842dd12423
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library to encode the context map.
+
+#include "lib/jxl/enc_context_map.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/entropy_coder.h"
+
+namespace jxl {
+
+namespace {
+
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+  size_t i = 0;
+  for (; i < v.size(); ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+  uint8_t value = (*v)[index];
+  for (size_t i = index; i != 0; --i) {
+    (*v)[i] = (*v)[i - 1];
+  }
+  (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+  if (v.empty()) return v;
+  uint8_t max_value = *std::max_element(v.begin(), v.end());
+  std::vector<uint8_t> mtf(max_value + 1);
+  for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+  std::vector<uint8_t> result(v.size());
+  for (size_t i = 0; i < v.size(); ++i) {
+    size_t index = IndexOf(mtf, v[i]);
+    JXL_ASSERT(index < mtf.size());
+    result[i] = static_cast<uint8_t>(index);
+    MoveToFront(&mtf, index);
+  }
+  return result;
+}
+
+}  // namespace
+
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, BitWriter* writer, size_t layer,
+                      AuxOut* aux_out) {
+  if (num_histograms == 1) {
+    // Simple code
+    writer->Write(1, 1);
+    // 0 bits per entry.
+    writer->Write(2, 0);
+    return;
+  }
+
+  std::vector<uint8_t> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<std::vector<Token>> tokens(1), mtf_tokens(1);
+  EntropyEncodingData codes;
+  std::vector<uint8_t> dummy_context_map;
+  for (size_t i = 0; i < context_map.size(); i++) {
+    tokens[0].emplace_back(0, context_map[i]);
+  }
+  for (size_t i = 0; i < transformed_symbols.size(); i++) {
+    mtf_tokens[0].emplace_back(0, transformed_symbols[i]);
+  }
+  HistogramParams params;
+  params.uint_method = HistogramParams::HybridUintMethod::kContextMap;
+  size_t ans_cost = BuildAndEncodeHistograms(
+      params, 1, tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+  size_t mtf_cost = BuildAndEncodeHistograms(
+      params, 1, mtf_tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+  bool use_mtf = mtf_cost < ans_cost;
+  // Rebuild token list.
+  tokens[0].clear();
+  for (size_t i = 0; i < transformed_symbols.size(); i++) {
+    tokens[0].emplace_back(0,
+                           use_mtf ? transformed_symbols[i] : context_map[i]);
+  }
+  size_t entry_bits = CeilLog2Nonzero(num_histograms);
+  size_t simple_cost = entry_bits * context_map.size();
+  if (entry_bits < 4 && simple_cost < ans_cost && simple_cost < mtf_cost) {
+    writer->Write(1, 1);
+    writer->Write(2, entry_bits);
+    for (size_t i = 0; i < context_map.size(); i++) {
+      writer->Write(entry_bits, context_map[i]);
+    }
+  } else {
+    writer->Write(1, 0);
+    writer->Write(1, use_mtf);  // Use/don't use MTF.
+    BuildAndEncodeHistograms(params, 1, tokens, &codes, &dummy_context_map,
+                             writer, layer, aux_out);
+    WriteTokens(tokens[0], codes, dummy_context_map, writer);
+  }
+}
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+                       AuxOut* aux_out) {
+  auto& dct = block_ctx_map.dc_thresholds;
+  auto& qft = block_ctx_map.qf_thresholds;
+  auto& ctx_map = block_ctx_map.ctx_map;
+  BitWriter::Allotment allotment(
+      writer,
+      (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 +
+          4 + 4 + ctx_map.size() * 10 + 1024);
+  if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() &&
+      ctx_map.size() == 21 &&
+      std::equal(ctx_map.begin(), ctx_map.end(), BlockCtxMap::kDefaultCtxMap)) {
+    writer->Write(1, 1);  // default
+    allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+    return;
+  }
+  writer->Write(1, 0);
+  for (int j : {0, 1, 2}) {
+    writer->Write(4, dct[j].size());
+    for (int i : dct[j]) {
+      JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), writer));
+    }
+  }
+  writer->Write(4, qft.size());
+  for (uint32_t i : qft) {
+    JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, writer));
+  }
+  EncodeContextMap(ctx_map, block_ctx_map.num_ctxs, writer, kLayerAC, aux_out);
+  allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h
new file mode 100644
index 0000000000..041e71de7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CONTEXT_MAP_H_
+#define LIB_JXL_ENC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Max limit is 255 because encoding assumes numbers < 255
+// More clusters can help compression, but makes encode/decode somewhat slower
+static const size_t kClustersLimit = 128;
+
+// Encodes the given context map to the bit stream. The number of different
+// histogram ids is given by num_histograms.
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, BitWriter* writer, size_t layer,
+                      AuxOut* aux_out);
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+                       AuxOut* aux_out);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CONTEXT_MAP_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc
new file mode 100644
index 0000000000..706bcd69ba
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_debug_image.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+namespace {
+template <typename T>
+void DumpImageT(const CompressParams& cparams, const char* label,
+                const ColorEncoding& color_encoding, const Image3<T>& image) {
+  if (!cparams.debug_image) return;
+  Image3F float_image = ConvertToFloat(image);
+  JxlColorEncoding color;
+  ConvertInternalToExternalColorEncoding(color_encoding, &color);
+  size_t num_pixels = 3 * image.xsize() * image.ysize();
+  std::vector<uint16_t> pixels(num_pixels);
+  const ImageF* channels[3];
+  for (int c = 0; c < 3; ++c) {
+    channels[c] = &float_image.Plane(c);
+  }
+  JXL_CHECK(ConvertChannelsToExternal(
+      channels, 3, 16, false, JXL_BIG_ENDIAN, 6 * image.xsize(), nullptr,
+      &pixels[0], 2 * num_pixels, PixelCallback(), Orientation::kIdentity));
+  (*cparams.debug_image)(cparams.debug_image_opaque, label, image.xsize(),
+                         image.ysize(), &color, &pixels[0]);
+}
+
+template <typename T>
+void DumpPlaneNormalizedT(const CompressParams& cparams, const char* label,
+                          const Plane<T>& image) {
+  T min;
+  T max;
+  ImageMinMax(image, &min, &max);
+  Image3B normalized(image.xsize(), image.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    float mul = min == max ? 0 : (255.0f / (max - min));
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* JXL_RESTRICT row_in = image.ConstRow(y);
+      uint8_t* JXL_RESTRICT row_out = normalized.PlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        row_out[x] = static_cast<uint8_t>((row_in[x] - min) * mul);
+      }
+    }
+  }
+  DumpImageT(cparams, label, ColorEncoding::SRGB(), normalized);
+}
+
+}  // namespace
+
+void DumpImage(const CompressParams& cparams, const char* label,
+               const Image3<float>& image) {
+  DumpImageT(cparams, label, ColorEncoding::SRGB(), image);
+}
+
+void DumpImage(const CompressParams& cparams, const char* label,
+               const Image3<uint8_t>& image) {
+  DumpImageT(cparams, label, ColorEncoding::SRGB(), image);
+}
+
+void DumpXybImage(const CompressParams& cparams, const char* label,
+                  const Image3F& image) {
+  if (!cparams.debug_image) return;
+
+  Image3F linear(image.xsize(), image.ysize());
+  OpsinParams opsin_params;
+  opsin_params.Init(kDefaultIntensityTarget);
+  OpsinToLinear(image, Rect(linear), nullptr, &linear, opsin_params);
+
+  DumpImageT(cparams, label, ColorEncoding::LinearSRGB(), linear);
+}
+
+void DumpPlaneNormalized(const CompressParams& cparams, const char* label,
+                         const Plane<float>& image) {
+  DumpPlaneNormalizedT(cparams, label, image);
+}
+
+void DumpPlaneNormalized(const CompressParams& cparams, const char* label,
+                         const Plane<uint8_t>& image) {
+  DumpPlaneNormalizedT(cparams, label, image);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h
new file mode 100644
index 0000000000..33799a5f7f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_DEBUG_IMAGE_H_
+#define LIB_JXL_ENC_DEBUG_IMAGE_H_
+
+// Optional output images for debugging.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+void DumpImage(const CompressParams& cparams, const char* label,
+               const Image3<float>& image);
+void DumpImage(const CompressParams& cparams, const char* label,
+               const Image3<uint8_t>& image);
+void DumpXybImage(const CompressParams& cparams, const char* label,
+                  const Image3<float>& image);
+void DumpPlaneNormalized(const CompressParams& cparams, const char* label,
+                         const Plane<float>& image);
+void DumpPlaneNormalized(const CompressParams& cparams, const char* label,
+                         const Plane<uint8_t>& image);
+
+// Used to skip image creation if they won't be written to debug directory.
+static inline bool WantDebugOutput(const CompressParams& cparams) {
+  return cparams.debug_image != nullptr;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_DEBUG_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc
new file mode 100644
index 0000000000..eaea1db7b8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc
@@ -0,0 +1,587 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_detect_dots.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdio>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_detect_dots.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_linalg.h"
+#include "lib/jxl/enc_optimize.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+// Set JXL_DEBUG_DOT_DETECT to 1 to enable debugging.
+#ifndef JXL_DEBUG_DOT_DETECT
+#define JXL_DEBUG_DOT_DETECT 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Sub;
+
+ImageF SumOfSquareDifferences(const Image3F& forig, const Image3F& smooth,
+                              ThreadPool* pool) {
+  const HWY_FULL(float) d;
+  const auto color_coef0 = Set(d, 0.0f);
+  const auto color_coef1 = Set(d, 10.0f);
+  const auto color_coef2 = Set(d, 0.0f);
+
+  ImageF sum_of_squares(forig.xsize(), forig.ysize());
+  JXL_CHECK(RunOnPool(
+      pool, 0, forig.ysize(), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t thread) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT orig_row0 = forig.Plane(0).ConstRow(y);
+        const float* JXL_RESTRICT orig_row1 = forig.Plane(1).ConstRow(y);
+        const float* JXL_RESTRICT orig_row2 = forig.Plane(2).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row0 = smooth.Plane(0).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row1 = smooth.Plane(1).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row2 = smooth.Plane(2).ConstRow(y);
+        float* JXL_RESTRICT sos_row = sum_of_squares.Row(y);
+
+        for (size_t x = 0; x < forig.xsize(); x += Lanes(d)) {
+          auto v0 = Sub(Load(d, orig_row0 + x), Load(d, smooth_row0 + x));
+          auto v1 = Sub(Load(d, orig_row1 + x), Load(d, smooth_row1 + x));
+          auto v2 = Sub(Load(d, orig_row2 + x), Load(d, smooth_row2 + x));
+          v0 = Mul(Mul(v0, v0), color_coef0);
+          v1 = Mul(Mul(v1, v1), color_coef1);
+          v2 = Mul(Mul(v2, v2), color_coef2);
+          const auto sos =
+              Add(v0, Add(v1, v2));  // weighted sum of square diffs
+          Store(sos, d, sos_row + x);
+        }
+      },
+      "ComputeEnergyImage"));
+  return sum_of_squares;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(SumOfSquareDifferences);  // Local function
+
+const int kEllipseWindowSize = 5;
+
+namespace {
+struct GaussianEllipse {
+  double x;                         // position in x
+  double y;                         // position in y
+  double sigma_x;                   // scale in x
+  double sigma_y;                   // scale in y
+  double angle;                     // ellipse rotation in radians
+  std::array<double, 3> intensity;  // intensity in each channel
+
+  // The following variables do not need to be encoded
+  double l2_loss;  // error after the Gaussian was fit
+  double l1_loss;
+  double ridge_loss;              // the l2_loss plus regularization term
+  double custom_loss;             // experimental custom loss
+  std::array<double, 3> bgColor;  // best background color
+  size_t neg_pixels;  // number of negative pixels when subtracting dot
+  std::array<double, 3> neg_value;  // debt due to channel truncation
+};
+double DotGaussianModel(double dx, double dy, double ct, double st,
+                        double sigma_x, double sigma_y, double intensity) {
+  double rx = ct * dx + st * dy;
+  double ry = -st * dx + ct * dy;
+  double md = (rx * rx / sigma_x) + (ry * ry / sigma_y);
+  double value = intensity * exp(-0.5 * md);
+  return value;
+}
+
+constexpr bool kOptimizeBackground = true;
+
+// Gaussian that smooths noise but preserves dots
+const WeightsSeparable5& WeightsSeparable5Gaussian0_65() {
+  constexpr float w0 = 0.558311f;
+  constexpr float w1 = 0.210395f;
+  constexpr float w2 = 0.010449f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+// (Iterated) Gaussian that removes dots.
+const WeightsSeparable5& WeightsSeparable5Gaussian3() {
+  constexpr float w0 = 0.222338f;
+  constexpr float w1 = 0.210431f;
+  constexpr float w2 = 0.1784f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+ImageF ComputeEnergyImage(const Image3F& orig, Image3F* smooth,
+                          ThreadPool* pool) {
+  // Prepare guidance images for dot selection.
+  Image3F forig(orig.xsize(), orig.ysize());
+  *smooth = Image3F(orig.xsize(), orig.ysize());
+  Rect rect(orig);
+
+  const auto& weights1 = WeightsSeparable5Gaussian0_65();
+  const auto& weights3 = WeightsSeparable5Gaussian3();
+
+  for (size_t c = 0; c < 3; ++c) {
+    // Use forig as temporary storage to reduce memory and keep it warmer.
+    Separable5(orig.Plane(c), rect, weights3, pool, &forig.Plane(c));
+    Separable5(forig.Plane(c), rect, weights3, pool, &smooth->Plane(c));
+    Separable5(orig.Plane(c), rect, weights1, pool, &forig.Plane(c));
+  }
+
+  return HWY_DYNAMIC_DISPATCH(SumOfSquareDifferences)(forig, *smooth, pool);
+}
+
+struct Pixel {
+  int x;
+  int y;
+};
+
+Pixel operator+(const Pixel& a, const Pixel& b) {
+  return Pixel{a.x + b.x, a.y + b.y};
+}
+
+// Maximum area in pixels of a ellipse
+const size_t kMaxCCSize = 1000;
+
+// Extracts a connected component from a Binary image where seed is part
+// of the component
+bool ExtractComponent(ImageF* img, std::vector<Pixel>* pixels,
+                      const Pixel& seed, double threshold) {
+  static const std::vector<Pixel> neighbors{{1, -1}, {1, 0},   {1, 1},  {0, -1},
+                                            {0, 1},  {-1, -1}, {-1, 1}, {1, 0}};
+  std::vector<Pixel> q{seed};
+  while (!q.empty()) {
+    Pixel current = q.back();
+    q.pop_back();
+    pixels->push_back(current);
+    if (pixels->size() > kMaxCCSize) return false;
+    for (const Pixel& delta : neighbors) {
+      Pixel child = current + delta;
+      if (child.x >= 0 && static_cast<size_t>(child.x) < img->xsize() &&
+          child.y >= 0 && static_cast<size_t>(child.y) < img->ysize()) {
+        float* value = &img->Row(child.y)[child.x];
+        if (*value > threshold) {
+          *value = 0.0;
+          q.push_back(child);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+inline bool PointInRect(const Rect& r, const Pixel& p) {
+  return (static_cast<size_t>(p.x) >= r.x0() &&
+          static_cast<size_t>(p.x) < (r.x0() + r.xsize()) &&
+          static_cast<size_t>(p.y) >= r.y0() &&
+          static_cast<size_t>(p.y) < (r.y0() + r.ysize()));
+}
+
+struct ConnectedComponent {
+  ConnectedComponent(const Rect& bounds, const std::vector<Pixel>&& pixels)
+      : bounds(bounds), pixels(pixels) {}
+  Rect bounds;
+  std::vector<Pixel> pixels;
+  float maxEnergy;
+  float meanEnergy;
+  float varEnergy;
+  float meanBg;
+  float varBg;
+  float score;
+  Pixel mode;
+
+  void CompStats(const ImageF& energy, int extra) {
+    maxEnergy = 0.0;
+    meanEnergy = 0.0;
+    varEnergy = 0.0;
+    meanBg = 0.0;
+    varBg = 0.0;
+    int nIn = 0;
+    int nOut = 0;
+    mode.x = 0;
+    mode.y = 0;
+    for (int sy = -extra; sy < (static_cast<int>(bounds.ysize()) + extra);
+         sy++) {
+      int y = sy + static_cast<int>(bounds.y0());
+      if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+      const float* JXL_RESTRICT erow = energy.ConstRow(y);
+      for (int sx = -extra; sx < (static_cast<int>(bounds.xsize()) + extra);
+           sx++) {
+        int x = sx + static_cast<int>(bounds.x0());
+        if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+        if (erow[x] > maxEnergy) {
+          maxEnergy = erow[x];
+          mode.x = x;
+          mode.y = y;
+        }
+        if (PointInRect(bounds, Pixel{x, y})) {
+          meanEnergy += erow[x];
+          varEnergy += erow[x] * erow[x];
+          nIn++;
+        } else {
+          meanBg += erow[x];
+          varBg += erow[x] * erow[x];
+          nOut++;
+        }
+      }
+    }
+    meanEnergy = meanEnergy / nIn;
+    meanBg = meanBg / nOut;
+    varEnergy = (varEnergy / nIn) - meanEnergy * meanEnergy;
+    varBg = (varBg / nOut) - meanBg * meanBg;
+    score = (meanEnergy - meanBg) / std::sqrt(varBg);
+  }
+};
+
+Rect BoundingRectangle(const std::vector<Pixel>& pixels) {
+  JXL_ASSERT(!pixels.empty());
+  int low_x, high_x, low_y, high_y;
+  low_x = high_x = pixels[0].x;
+  low_y = high_y = pixels[0].y;
+  for (const Pixel& p : pixels) {
+    low_x = std::min(low_x, p.x);
+    high_x = std::max(high_x, p.x);
+    low_y = std::min(low_y, p.y);
+    high_y = std::max(high_y, p.y);
+  }
+  return Rect(low_x, low_y, high_x - low_x + 1, high_y - low_y + 1);
+}
+
+std::vector<ConnectedComponent> FindCC(const ImageF& energy, double t_low,
+                                       double t_high, uint32_t maxWindow,
+                                       double minScore) {
+  const int kExtraRect = 4;
+  ImageF img(energy.xsize(), energy.ysize());
+  CopyImageTo(energy, &img);
+  std::vector<ConnectedComponent> ans;
+  for (size_t y = 0; y < img.ysize(); y++) {
+    float* JXL_RESTRICT row = img.Row(y);
+    for (size_t x = 0; x < img.xsize(); x++) {
+      if (row[x] > t_high) {
+        std::vector<Pixel> pixels;
+        row[x] = 0.0;
+        bool success = ExtractComponent(
+            &img, &pixels, Pixel{static_cast<int>(x), static_cast<int>(y)},
+            t_low);
+        if (!success) continue;
+#if JXL_DEBUG_DOT_DETECT
+        for (size_t i = 0; i < pixels.size(); i++) {
+          fprintf(stderr, "(%d,%d) ", pixels[i].x, pixels[i].y);
+        }
+        fprintf(stderr, "\n");
+#endif  // JXL_DEBUG_DOT_DETECT
+        Rect bounds = BoundingRectangle(pixels);
+        if (bounds.xsize() < maxWindow && bounds.ysize() < maxWindow) {
+          ConnectedComponent cc{bounds, std::move(pixels)};
+          cc.CompStats(energy, kExtraRect);
+          if (cc.score < minScore) continue;
+          JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+                    "cc mode: (%d,%d), max: %f, bgMean: %f bgVar: "
+                    "%f bound:(%" PRIuS ",%" PRIuS ",%" PRIuS ",%" PRIuS ")\n",
+                    cc.mode.x, cc.mode.y, cc.maxEnergy, cc.meanEnergy,
+                    cc.varEnergy, cc.bounds.x0(), cc.bounds.y0(),
+                    cc.bounds.xsize(), cc.bounds.ysize());
+          ans.push_back(cc);
+        }
+      }
+    }
+  }
+  return ans;
+}
+
+// TODO (sggonzalez): Adapt this function for the different color spaces or
+// remove it if the color space with the best performance does not need it
+void ComputeDotLosses(GaussianEllipse* ellipse, const ConnectedComponent& cc,
+                      const Image3F& img, const Image3F& background) {
+  const int rectBounds = 2;
+  const double kIntensityR = 0.0;   // 0.015;
+  const double kSigmaR = 0.0;       // 0.01;
+  const double kZeroEpsilon = 0.1;  // Tolerance to consider a value negative
+  double ct = cos(ellipse->angle), st = sin(ellipse->angle);
+  const std::array<double, 3> channelGains{{1.0, 1.0, 1.0}};
+  int N = 0;
+  ellipse->l1_loss = 0.0;
+  ellipse->l2_loss = 0.0;
+  ellipse->neg_pixels = 0;
+  ellipse->neg_value.fill(0.0);
+  double distMeanModeSq = (cc.mode.x - ellipse->x) * (cc.mode.x - ellipse->x) +
+                          (cc.mode.y - ellipse->y) * (cc.mode.y - ellipse->y);
+  ellipse->custom_loss = 0.0;
+  for (int c = 0; c < 3; c++) {
+    for (int sy = -rectBounds;
+         sy < (static_cast<int>(cc.bounds.ysize()) + rectBounds); sy++) {
+      int y = sy + cc.bounds.y0();
+      if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+      const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+      // bgrow is only used if kOptimizeBackground is false.
+      // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
+      const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+      for (int sx = -rectBounds;
+           sx < (static_cast<int>(cc.bounds.xsize()) + rectBounds); sx++) {
+        int x = sx + cc.bounds.x0();
+        if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+        double target = row[x];
+        double dotDelta = DotGaussianModel(
+            x - ellipse->x, y - ellipse->y, ct, st, ellipse->sigma_x,
+            ellipse->sigma_y, ellipse->intensity[c]);
+        if (dotDelta > target + kZeroEpsilon) {
+          ellipse->neg_pixels++;
+          ellipse->neg_value[c] += dotDelta - target;
+        }
+        double bkg = kOptimizeBackground ? ellipse->bgColor[c] : bgrow[x];
+        double pred = bkg + dotDelta;
+        double diff = target - pred;
+        double l2 = channelGains[c] * diff * diff;
+        double l1 = channelGains[c] * std::fabs(diff);
+        ellipse->l2_loss += l2;
+        ellipse->l1_loss += l1;
+        double w = DotGaussianModel(x - cc.mode.x, y - cc.mode.y, 1.0, 0.0,
+                                    1.0 + ellipse->sigma_x,
+                                    1.0 + ellipse->sigma_y, 1.0);
+        ellipse->custom_loss += w * l2;
+        N++;
+      }
+    }
+  }
+  ellipse->l2_loss /= N;
+  ellipse->custom_loss /= N;
+  ellipse->custom_loss += 20.0 * distMeanModeSq + ellipse->neg_value[1];
+  ellipse->l1_loss /= N;
+  double ridgeTerm = kSigmaR * ellipse->sigma_x + kSigmaR * ellipse->sigma_y;
+  for (int c = 0; c < 3; c++) {
+    ridgeTerm += kIntensityR * ellipse->intensity[c] * ellipse->intensity[c];
+  }
+  ellipse->ridge_loss = ellipse->l2_loss + ridgeTerm;
+}
+
+GaussianEllipse FitGaussianFast(const ConnectedComponent& cc,
+                                const ImageF& energy, const Image3F& img,
+                                const Image3F& background) {
+  constexpr bool leastSqIntensity = true;
+  constexpr double kEpsilon = 1e-6;
+  GaussianEllipse ans;
+  constexpr int kRectBounds = (kEllipseWindowSize >> 1);
+
+  // Compute the 1st and 2nd moments of the CC
+  double sum = 0.0;
+  int N = 0;
+  std::array<double, 3> m1{{0.0, 0.0, 0.0}};
+  std::array<double, 3> m2{{0.0, 0.0, 0.0}};
+  std::array<double, 3> color{{0.0, 0.0, 0.0}};
+  std::array<double, 3> bgColor{{0.0, 0.0, 0.0}};
+
+  JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+            "%" PRIuS " %" PRIuS " %" PRIuS " %" PRIuS "\n", cc.bounds.x0(),
+            cc.bounds.y0(), cc.bounds.xsize(), cc.bounds.ysize());
+  for (int c = 0; c < 3; c++) {
+    color[c] = img.ConstPlaneRow(c, cc.mode.y)[cc.mode.x] -
+               background.ConstPlaneRow(c, cc.mode.y)[cc.mode.x];
+  }
+  double sign = (color[1] > 0) ? 1 : -1;
+  for (int sy = -kRectBounds; sy <= kRectBounds; sy++) {
+    int y = sy + cc.mode.y;
+    if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+    const float* JXL_RESTRICT row = img.ConstPlaneRow(1, y);
+    const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(1, y);
+    for (int sx = -kRectBounds; sx <= kRectBounds; sx++) {
+      int x = sx + cc.mode.x;
+      if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+      double w = std::max(kEpsilon, sign * (row[x] - bgrow[x]));
+      sum += w;
+
+      m1[0] += w * x;
+      m1[1] += w * y;
+      m2[0] += w * x * x;
+      m2[1] += w * x * y;
+      m2[2] += w * y * y;
+      for (int c = 0; c < 3; c++) {
+        bgColor[c] += background.ConstPlaneRow(c, y)[x];
+      }
+      N++;
+    }
+  }
+  JXL_CHECK(N > 0);
+
+  for (int i = 0; i < 3; i++) {
+    m1[i] /= sum;
+    m2[i] /= sum;
+    bgColor[i] /= N;
+  }
+
+  // Some magic constants
+  constexpr double kSigmaMult = 1.0;
+  constexpr std::array<double, 3> kScaleMult{{1.1, 1.1, 1.1}};
+
+  // Now set the parameters of the Gaussian
+  ans.x = m1[0];
+  ans.y = m1[1];
+  for (int j = 0; j < 3; j++) {
+    ans.intensity[j] = kScaleMult[j] * color[j];
+  }
+
+  ImageD Sigma(2, 2), D(1, 2), U(2, 2);
+  Sigma.Row(0)[0] = m2[0] - m1[0] * m1[0];
+  Sigma.Row(1)[1] = m2[2] - m1[1] * m1[1];
+  Sigma.Row(0)[1] = Sigma.Row(1)[0] = m2[1] - m1[0] * m1[1];
+  ConvertToDiagonal(Sigma, &D, &U);
+  const double* JXL_RESTRICT d = D.ConstRow(0);
+  const double* JXL_RESTRICT u = U.ConstRow(1);
+  int p1 = 0, p2 = 1;
+  if (d[0] < d[1]) std::swap(p1, p2);
+  ans.sigma_x = kSigmaMult * d[p1];
+  ans.sigma_y = kSigmaMult * d[p2];
+  ans.angle = std::atan2(u[p1], u[p2]);
+  ans.l2_loss = 0.0;
+  ans.bgColor = bgColor;
+  if (leastSqIntensity) {
+    GaussianEllipse* ellipse = &ans;
+    double ct = cos(ans.angle), st = sin(ans.angle);
+    // Estimate intensity with least squares (fixed background)
+    for (int c = 0; c < 3; c++) {
+      double gg = 0.0;
+      double gd = 0.0;
+      int yc = static_cast<int>(cc.mode.y);
+      int xc = static_cast<int>(cc.mode.x);
+      for (int y = yc - kRectBounds; y <= yc + kRectBounds; y++) {
+        if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+        const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+        const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+        for (int x = xc - kRectBounds; x <= xc + kRectBounds; x++) {
+          if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+          double target = row[x] - bgrow[x];
+          double gaussian =
+              DotGaussianModel(x - ellipse->x, y - ellipse->y, ct, st,
+                               ellipse->sigma_x, ellipse->sigma_y, 1.0);
+          gg += gaussian * gaussian;
+          gd += gaussian * target;
+        }
+      }
+      ans.intensity[c] = gd / (gg + 1e-6);  // Regularized least squares
+    }
+  }
+  ComputeDotLosses(&ans, cc, img, background);
+  return ans;
+}
+
+GaussianEllipse FitGaussian(const ConnectedComponent& cc, const ImageF& energy,
+                            const Image3F& img, const Image3F& background) {
+  auto ellipse = FitGaussianFast(cc, energy, img, background);
+  if (ellipse.sigma_x < ellipse.sigma_y) {
+    std::swap(ellipse.sigma_x, ellipse.sigma_y);
+    ellipse.angle += kPi / 2.0;
+  }
+  ellipse.angle -= kPi * std::floor(ellipse.angle / kPi);
+  if (fabs(ellipse.angle - kPi) < 1e-6 || fabs(ellipse.angle) < 1e-6) {
+    ellipse.angle = 0.0;
+  }
+  JXL_CHECK(ellipse.angle >= 0 && ellipse.angle <= kPi &&
+            ellipse.sigma_x >= ellipse.sigma_y);
+  JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+            "Ellipse mu=(%lf,%lf) sigma=(%lf,%lf) angle=%lf "
+            "intensity=(%lf,%lf,%lf) bg=(%lf,%lf,%lf) l2_loss=%lf "
+            "custom_loss=%lf, neg_pix=%" PRIuS ", neg_v=(%lf,%lf,%lf)\n",
+            ellipse.x, ellipse.y, ellipse.sigma_x, ellipse.sigma_y,
+            ellipse.angle, ellipse.intensity[0], ellipse.intensity[1],
+            ellipse.intensity[2], ellipse.bgColor[0], ellipse.bgColor[1],
+            ellipse.bgColor[2], ellipse.l2_loss, ellipse.custom_loss,
+            ellipse.neg_pixels, ellipse.neg_value[0], ellipse.neg_value[1],
+            ellipse.neg_value[2]);
+  return ellipse;
+}
+
+}  // namespace
+
+std::vector<PatchInfo> DetectGaussianEllipses(
+    const Image3F& opsin, const GaussianDetectParams& params,
+    const EllipseQuantParams& qParams, ThreadPool* pool) {
+  std::vector<PatchInfo> dots;
+  Image3F smooth(opsin.xsize(), opsin.ysize());
+  ImageF energy = ComputeEnergyImage(opsin, &smooth, pool);
+  std::vector<ConnectedComponent> components = FindCC(
+      energy, params.t_low, params.t_high, params.maxWinSize, params.minScore);
+  size_t numCC =
+      std::min(params.maxCC, (components.size() * params.percCC) / 100);
+  if (components.size() > numCC) {
+    std::sort(
+        components.begin(), components.end(),
+        [](const ConnectedComponent& a, const ConnectedComponent& b) -> bool {
+          return a.score > b.score;
+        });
+    components.erase(components.begin() + numCC, components.end());
+  }
+  for (const auto& cc : components) {
+    GaussianEllipse ellipse = FitGaussian(cc, energy, opsin, smooth);
+    if (ellipse.x < 0.0 ||
+        std::ceil(ellipse.x) >= static_cast<double>(opsin.xsize()) ||
+        ellipse.y < 0.0 ||
+        std::ceil(ellipse.y) >= static_cast<double>(opsin.ysize())) {
+      continue;
+    }
+    if (ellipse.neg_pixels > params.maxNegPixels) continue;
+    double intensity = 0.21 * ellipse.intensity[0] +
+                       0.72 * ellipse.intensity[1] +
+                       0.07 * ellipse.intensity[2];
+    double intensitySq = intensity * intensity;
+    // for (int c = 0; c < 3; c++) {
+    //  intensitySq += ellipse.intensity[c] * ellipse.intensity[c];
+    //}
+    double sqDistMeanMode = (ellipse.x - cc.mode.x) * (ellipse.x - cc.mode.x) +
+                            (ellipse.y - cc.mode.y) * (ellipse.y - cc.mode.y);
+    if (ellipse.l2_loss < params.maxL2Loss &&
+        ellipse.custom_loss < params.maxCustomLoss &&
+        intensitySq > (params.minIntensity * params.minIntensity) &&
+        sqDistMeanMode < params.maxDistMeanMode * params.maxDistMeanMode) {
+      size_t x0 = cc.bounds.x0();
+      size_t y0 = cc.bounds.y0();
+      dots.emplace_back();
+      dots.back().second.emplace_back(x0, y0);
+      QuantizedPatch& patch = dots.back().first;
+      patch.xsize = cc.bounds.xsize();
+      patch.ysize = cc.bounds.ysize();
+      for (size_t y = 0; y < patch.ysize; y++) {
+        for (size_t x = 0; x < patch.xsize; x++) {
+          for (size_t c = 0; c < 3; c++) {
+            patch.fpixels[c][y * patch.xsize + x] =
+                opsin.ConstPlaneRow(c, y0 + y)[x0 + x] -
+                smooth.ConstPlaneRow(c, y0 + y)[x0 + x];
+          }
+        }
+      }
+    }
+  }
+  return dots;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h
new file mode 100644
index 0000000000..c3071d9a2f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h
@@ -0,0 +1,67 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// We attempt to remove dots, or speckle from images using Gaussian blur.
+#ifndef LIB_JXL_ENC_DETECT_DOTS_H_
+#define LIB_JXL_ENC_DETECT_DOTS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct GaussianDetectParams {
+  double t_high = 0;  // at least one pixel must have larger energy than t_high
+  double t_low = 0;   // all pixels must have a larger energy than tLow
+  uint32_t maxWinSize = 0;  // discard dots larger than this containing window
+  double maxL2Loss = 0;
+  double maxCustomLoss = 0;
+  double minIntensity = 0;     // If the intensity is too low, discard it
+  double maxDistMeanMode = 0;  // The mean and the mode must be close
+  size_t maxNegPixels = 0;     // Maximum number of negative pixel
+  size_t minScore = 0;
+  size_t maxCC = 50;   // Maximum number of CC to keep
+  size_t percCC = 15;  // Percentage in [0,100] of CC to keep
+};
+
+// Ellipse Quantization Params
+struct EllipseQuantParams {
+  size_t xsize;      // Image size in x
+  size_t ysize;      // Image size in y
+  size_t qPosition;  // Position quantization delta
+  // Quantization for the Gaussian sigma parameters
+  double minSigma;
+  double maxSigma;
+  size_t qSigma;  // number of quantization levels
+  // Quantization for the rotation angle (between -pi and pi)
+  size_t qAngle;
+  // Quantization for the intensity
+  std::array<double, 3> minIntensity;
+  std::array<double, 3> maxIntensity;
+  std::array<size_t, 3> qIntensity;  // number of quantization levels
+  // Extra parameters for the encoding
+  bool subtractQuantized;  // Should we subtract quantized or detected dots?
+  float ytox;
+  float ytob;
+
+  void QuantPositionSize(size_t* xsize, size_t* ysize) const;
+};
+
+// Detects dots in XYB image.
+std::vector<PatchInfo> DetectGaussianEllipses(
+    const Image3F& opsin, const GaussianDetectParams& params,
+    const EllipseQuantParams& qParams, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_DETECT_DOTS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc
new file mode 100644
index 0000000000..2d22c1edb8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc
@@ -0,0 +1,71 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_dot_dictionary.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_detect_dots.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Private implementation of Dictionary Encode/Decode
+namespace {
+
+/* Quantization constants for Ellipse dots */
+const size_t kEllipsePosQ = 2;        // Quantization level for the position
+const double kEllipseMinSigma = 0.1;  // Minimum sigma value
+const double kEllipseMaxSigma = 3.1;  // Maximum Sigma value
+const size_t kEllipseSigmaQ = 16;     // Number of quantization levels for sigma
+const size_t kEllipseAngleQ = 8;      // Quantization level for the angle
+// TODO: fix these values.
+const std::array<double, 3> kEllipseMinIntensity{{-0.05, 0.0, -0.5}};
+const std::array<double, 3> kEllipseMaxIntensity{{0.05, 1.0, 0.4}};
+const std::array<size_t, 3> kEllipseIntensityQ{{10, 36, 10}};
+}  // namespace
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+                                         const Image3F& opsin,
+                                         const ColorCorrelationMap& cmap,
+                                         ThreadPool* pool) {
+  if (ApplyOverride(cparams.dots,
+                    cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+    GaussianDetectParams ellipse_params;
+    ellipse_params.t_high = 0.04;
+    ellipse_params.t_low = 0.02;
+    ellipse_params.maxWinSize = 5;
+    ellipse_params.maxL2Loss = 0.005;
+    ellipse_params.maxCustomLoss = 300;
+    ellipse_params.minIntensity = 0.12;
+    ellipse_params.maxDistMeanMode = 1.0;
+    ellipse_params.maxNegPixels = 0;
+    ellipse_params.minScore = 12.0;
+    ellipse_params.maxCC = 100;
+    ellipse_params.percCC = 100;
+    EllipseQuantParams qParams{
+        opsin.xsize(),      opsin.ysize(),        kEllipsePosQ,
+        kEllipseMinSigma,   kEllipseMaxSigma,     kEllipseSigmaQ,
+        kEllipseAngleQ,     kEllipseMinIntensity, kEllipseMaxIntensity,
+        kEllipseIntensityQ, kEllipsePosQ <= 5,    cmap.YtoXRatio(0),
+        cmap.YtoBRatio(0)};
+
+    return DetectGaussianEllipses(opsin, ellipse_params, qParams, pool);
+  }
+  return {};
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h
new file mode 100644
index 0000000000..2ba4393f30
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_DOT_DICTIONARY_H_
+#define LIB_JXL_ENC_DOT_DICTIONARY_H_
+
+// Dots are stored in a dictionary to avoid storing similar dots multiple
+// times.
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+                                         const Image3F& opsin,
+                                         const ColorCorrelationMap& cmap,
+                                         ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_DOT_DICTIONARY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc
new file mode 100644
index 0000000000..0c293b91d5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc
@@ -0,0 +1,273 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+
+// Returns number of non-zero coefficients (but skip LLF).
+// We cannot rely on block[] being all-zero bits, so first truncate to integer.
+// Also writes the per-8x8 block nzeros starting at nzeros_pos.
+int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
+                            const AcStrategy acs, const size_t covered_blocks,
+                            const size_t log2_covered_blocks,
+                            const int32_t* JXL_RESTRICT block,
+                            const size_t nzeros_stride,
+                            int32_t* JXL_RESTRICT nzeros_pos) {
+  const HWY_CAPPED(int32_t, kBlockDim) di;
+
+  const auto zero = Zero(di);
+  // Add FF..FF for every zero coefficient, negate to get #zeros.
+  auto neg_sum_zero = zero;
+
+  {
+    // Mask sufficient for one row of coefficients.
+    HWY_ALIGN const int32_t
+        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
+            -1, -1, -1, -1};
+    // First cx=1,2,4 elements are FF..FF, others 0.
+    const int32_t* llf_mask_pos =
+        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
+
+    // Rows with LLF: mask out the LLF
+    for (size_t y = 0; y < cy; y++) {
+      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+        const auto llf_mask = LoadU(di, llf_mask_pos + x);
+
+        // LLF counts as zero so we don't include it in nzeros.
+        const auto coef =
+            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
+
+        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+      }
+    }
+  }
+
+  // Remaining rows: no mask
+  for (size_t y = cy; y < cy * kBlockDim; y++) {
+    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
+      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+    }
+  }
+
+  // We want area - sum_zero, add because neg_sum_zero is already negated.
+  const int32_t nzeros =
+      int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+  const int32_t shifted_nzeros = static_cast<int32_t>(
+      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
+  // Need non-canonicalized dimensions!
+  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
+    }
+  }
+
+  return nzeros;
+}
+
+// Specialization for 8x8, where only top-left is LLF/DC.
+// About 1% overall speedup vs. NumNonZeroExceptLLF.
+int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
+                              int32_t* JXL_RESTRICT nzeros_pos) {
+  const HWY_CAPPED(int32_t, kBlockDim) di;
+
+  const auto zero = Zero(di);
+  // Add FF..FF for every zero coefficient, negate to get #zeros.
+  auto neg_sum_zero = zero;
+
+  {
+    // First row has DC, so mask
+    const size_t y = 0;
+    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
+
+    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+      const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+      // DC counts as zero so we don't include it in nzeros.
+      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
+
+      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+    }
+  }
+
+  // Remaining rows: no mask
+  for (size_t y = 1; y < kBlockDim; y++) {
+    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+      const auto coef = Load(di, &block[y * kBlockDim + x]);
+      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+    }
+  }
+
+  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+  const int32_t nzeros =
+      int32_t(kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+  *nzeros_pos = nzeros;
+
+  return nzeros;
+}
+
+// The number of nonzeros of each block is predicted from the top and the left
+// blocks, with opportune scaling to take into account the number of blocks of
+// each strategy.  The predicted number of nonzeros divided by two is used as a
+// context; if this number is above 63, a specific context is used.  If the
+// number of nonzeros of a strategy is above 63, it is written directly using a
+// fixed number of bits (that depends on the size of the strategy).
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map) {
+  const size_t xsize_blocks = rect.xsize();
+  const size_t ysize_blocks = rect.ysize();
+
+  // TODO(user): update the estimate: usually less coefficients are used.
+  output->reserve(output->size() +
+                  3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
+
+  size_t offset[3] = {};
+  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
+                     by >> cs.VShift(2)};
+    int32_t* JXL_RESTRICT row_nzeros[3] = {
+        tmp_num_nzeroes->PlaneRow(0, sby[0]),
+        tmp_num_nzeroes->PlaneRow(1, sby[1]),
+        tmp_num_nzeroes->PlaneRow(2, sby[2]),
+    };
+    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
+        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
+        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
+        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
+    };
+    const uint8_t* JXL_RESTRICT row_qdc =
+        qdc.ConstRow(rect.y0() + by) + rect.x0();
+    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
+                       bx >> cs.HShift(2)};
+      size_t cx = acs.covered_blocks_x();
+      size_t cy = acs.covered_blocks_y();
+      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
+      const size_t log2_covered_blocks =
+          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
+      const size_t size = covered_blocks * kDCTBlockSize;
+
+      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
+
+      for (int c : {1, 0, 2}) {
+        if (sbx[c] << cs.HShift(c) != bx) continue;
+        if (sby[c] << cs.VShift(c) != by) continue;
+        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
+
+        int32_t nzeros =
+            (covered_blocks == 1)
+                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
+                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
+                                      log2_covered_blocks, block, nzeros_stride,
+                                      row_nzeros[c] + sbx[c]);
+
+        int ord = kStrategyOrder[acs.RawStrategy()];
+        const coeff_order_t* JXL_RESTRICT order =
+            &orders[CoeffOrderOffset(ord, c)];
+
+        int32_t predicted_nzeros =
+            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
+        size_t block_ctx =
+            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
+        const int32_t nzero_ctx =
+            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
+
+        output->emplace_back(nzero_ctx, nzeros);
+        const size_t histo_offset =
+            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+        // Skip LLF.
+        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
+        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+          int32_t coeff = block[order[k]];
+          size_t ctx =
+              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+                                                log2_covered_blocks, prev);
+          uint32_t u_coeff = PackSigned(coeff);
+          output->emplace_back(ctx, u_coeff);
+          prev = coeff != 0;
+          nzeros -= prev;
+        }
+        JXL_DASSERT(nzeros == 0);
+        offset[c] += size;
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(TokenizeCoefficients);
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map) {
+  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
+      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
+      block_ctx_map);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h
new file mode 100644
index 0000000000..7dfc71c726
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ENTROPY_CODER_H_
+#define LIB_JXL_ENC_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"  // BlockCtxMap
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h"  // YCbCrChromaSubsampling
+#include "lib/jxl/image.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+// Generate DCT NxN quantized AC values tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// See also DecodeACVarBlock.
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ENTROPY_CODER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc
new file mode 100644
index 0000000000..473e71adda
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc
@@ -0,0 +1,180 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <jxl/types.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/float.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+namespace {
+
+size_t JxlDataTypeBytes(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      return 1;
+    case JXL_TYPE_UINT16:
+      return 2;
+    case JXL_TYPE_FLOAT16:
+      return 2;
+    case JXL_TYPE_FLOAT:
+      return 4;
+    default:
+      return 0;
+  }
+}
+
+}  // namespace
+
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, size_t bits_per_sample,
+                           JxlPixelFormat format, size_t c, ThreadPool* pool,
+                           ImageF* channel) {
+  if (format.data_type == JXL_TYPE_UINT8) {
+    JXL_RETURN_IF_ERROR(bits_per_sample > 0 && bits_per_sample <= 8);
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    JXL_RETURN_IF_ERROR(bits_per_sample > 8 && bits_per_sample <= 16);
+  } else if (format.data_type != JXL_TYPE_FLOAT16 &&
+             format.data_type != JXL_TYPE_FLOAT) {
+    JXL_FAILURE("unsupported pixel format data type %d", format.data_type);
+  }
+  size_t bytes_per_channel = JxlDataTypeBytes(format.data_type);
+  size_t bytes_per_pixel = format.num_channels * bytes_per_channel;
+  size_t pixel_offset = c * bytes_per_channel;
+  // Only for uint8/16.
+  float scale = 1. / ((1ull << bits_per_sample) - 1);
+
+  const size_t last_row_size = xsize * bytes_per_pixel;
+  const size_t align = format.align;
+  const size_t row_size =
+      (align > 1 ? jxl::DivCeil(last_row_size, align) * align : last_row_size);
+  const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
+  if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image");
+  if (bytes.size() < bytes_to_read) {
+    return JXL_FAILURE("Buffer size is too small, expected: %" PRIuS
+                       " got: %" PRIuS " (Image: %" PRIuS "x%" PRIuS
+                       "x%u, bytes_per_channel: %" PRIuS ")",
+                       bytes_to_read, bytes.size(), xsize, ysize,
+                       format.num_channels, bytes_per_channel);
+  }
+  JXL_ASSERT(channel->xsize() == xsize);
+  JXL_ASSERT(channel->ysize() == ysize);
+  // Too large buffer is likely an application bug, so also fail for that.
+  // Do allow padding to stride in last row though.
+  if (bytes.size() > row_size * ysize) {
+    return JXL_FAILURE("Buffer size is too large");
+  }
+
+  const bool little_endian =
+      format.endianness == JXL_LITTLE_ENDIAN ||
+      (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+  const uint8_t* const in = bytes.data();
+
+  std::atomic<size_t> error_count = {0};
+
+  const auto convert_row = [&](const uint32_t task, size_t /*thread*/) {
+    const size_t y = task;
+    size_t offset = row_size * task + pixel_offset;
+    float* JXL_RESTRICT row_out = channel->Row(y);
+    const auto save_value = [&](size_t index, float value) {
+      row_out[index] = value;
+    };
+    if (!LoadFloatRow(in + offset, xsize, bytes_per_pixel, format.data_type,
+                      little_endian, scale, save_value)) {
+      error_count++;
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
+                                ThreadPool::NoInit, convert_row,
+                                "ConvertExtraChannel"));
+
+  if (error_count) {
+    JXL_FAILURE("unsupported pixel format data type");
+  }
+
+  return true;
+}
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, const ColorEncoding& c_current,
+                           size_t bits_per_sample, JxlPixelFormat format,
+                           ThreadPool* pool, ImageBundle* ib) {
+  const size_t color_channels = c_current.Channels();
+  bool has_alpha = format.num_channels == 2 || format.num_channels == 4;
+  if (format.num_channels < color_channels) {
+    return JXL_FAILURE("Expected %" PRIuS
+                       " color channels, received only %u channels",
+                       color_channels, format.num_channels);
+  }
+
+  Image3F color(xsize, ysize);
+  for (size_t c = 0; c < color_channels; ++c) {
+    JXL_RETURN_IF_ERROR(ConvertFromExternal(bytes, xsize, ysize,
+                                            bits_per_sample, format, c, pool,
+                                            &color.Plane(c)));
+  }
+  if (color_channels == 1) {
+    CopyImageTo(color.Plane(0), &color.Plane(1));
+    CopyImageTo(color.Plane(0), &color.Plane(2));
+  }
+  ib->SetFromImage(std::move(color), c_current);
+
+  // Passing an interleaved image with an alpha channel to an image that doesn't
+  // have alpha channel just discards the passed alpha channel.
+  if (has_alpha && ib->HasAlpha()) {
+    ImageF alpha(xsize, ysize);
+    JXL_RETURN_IF_ERROR(
+        ConvertFromExternal(bytes, xsize, ysize, bits_per_sample, format,
+                            format.num_channels - 1, pool, &alpha));
+    ib->SetAlpha(std::move(alpha));
+  } else if (!has_alpha && ib->HasAlpha()) {
+    // if alpha is not passed, but it is expected, then assume
+    // it is all-opaque
+    ImageF alpha(xsize, ysize);
+    FillImage(1.0f, &alpha);
+    ib->SetAlpha(std::move(alpha));
+  }
+
+  return true;
+}
+
+Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
+                      size_t ysize, const void* buffer, size_t size,
+                      ThreadPool* pool, ImageF* channel) {
+  size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
+  return ConvertFromExternal(
+      jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
+      xsize, ysize, bitdepth, pixel_format, 0, pool, channel);
+}
+
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+                           uint32_t ysize, const void* buffer, size_t size,
+                           jxl::ThreadPool* pool,
+                           const jxl::ColorEncoding& c_current,
+                           jxl::ImageBundle* ib) {
+  size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
+  JXL_RETURN_IF_ERROR(ConvertFromExternal(
+      jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
+      xsize, ysize, c_current, bitdepth, pixel_format, pool, ib));
+  ib->VerifyMetadata();
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h
new file mode 100644
index 0000000000..3b2b295076
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, size_t bits_per_sample,
+                           JxlPixelFormat format, size_t c, ThreadPool* pool,
+                           ImageF* channel);
+
+// Convert an interleaved pixel buffer to the internal ImageBundle
+// representation. This is the opposite of ConvertToExternal().
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, const ColorEncoding& c_current,
+                           size_t bits_per_sample, JxlPixelFormat format,
+                           ThreadPool* pool, ImageBundle* ib);
+Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
+                      size_t ysize, const void* buffer, size_t size,
+                      ThreadPool* pool, ImageF* channel);
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+                           uint32_t ysize, const void* buffer, size_t size,
+                           jxl::ThreadPool* pool,
+                           const jxl::ColorEncoding& c_current,
+                           jxl::ImageBundle* ib);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_EXTERNAL_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc
new file mode 100644
index 0000000000..4b7147817a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Encoder case, deinterleaves a buffer.
+void BM_EncExternalImage_ConvertImageRGBA(benchmark::State& state) {
+  const size_t kNumIter = 5;
+  size_t xsize = state.range();
+  size_t ysize = state.range();
+
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+
+  std::vector<uint8_t> interleaved(xsize * ysize * 4);
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+  for (auto _ : state) {
+    for (size_t i = 0; i < kNumIter; ++i) {
+      JXL_CHECK(ConvertFromExternal(
+          Span<const uint8_t>(interleaved.data(), interleaved.size()), xsize,
+          ysize,
+          /*c_current=*/ColorEncoding::SRGB(),
+          /*bits_per_sample=*/8, format,
+          /*pool=*/nullptr, &ib));
+    }
+  }
+
+  // Pixels per second.
+  state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+  state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_EncExternalImage_ConvertImageRGBA)
+    ->RangeMultiplier(2)
+    ->Range(256, 2048);
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc
new file mode 100644
index 0000000000..7be8d45f2d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <array>
+#include <new>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+#if !defined(JXL_CRASH_ON_ERROR)
+TEST(ExternalImageTest, InvalidSize) {
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+
+  JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  const uint8_t buf[10 * 100 * 8] = {};
+  EXPECT_FALSE(ConvertFromExternal(
+      Span<const uint8_t>(buf, 10), /*xsize=*/10, /*ysize=*/100,
+      /*c_current=*/ColorEncoding::SRGB(),
+      /*bits_per_sample=*/16, format, nullptr, &ib));
+  EXPECT_FALSE(ConvertFromExternal(
+      Span<const uint8_t>(buf, sizeof(buf) - 1), /*xsize=*/10, /*ysize=*/100,
+      /*c_current=*/ColorEncoding::SRGB(),
+      /*bits_per_sample=*/16, format, nullptr, &ib));
+  EXPECT_TRUE(
+      ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), /*xsize=*/10,
+                          /*ysize=*/100, /*c_current=*/ColorEncoding::SRGB(),
+                          /*bits_per_sample=*/16, format, nullptr, &ib));
+}
+#endif
+
+TEST(ExternalImageTest, AlphaMissing) {
+  ImageMetadata im;
+  im.SetAlphaBits(0);  // No alpha
+  ImageBundle ib(&im);
+
+  const size_t xsize = 10;
+  const size_t ysize = 20;
+  const uint8_t buf[xsize * ysize * 4] = {};
+
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0};
+  // has_alpha is true but the ImageBundle has no alpha. Alpha channel should
+  // be ignored.
+  EXPECT_TRUE(ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), xsize,
+                                  ysize,
+                                  /*c_current=*/ColorEncoding::SRGB(),
+                                  /*bits_per_sample=*/8, format, nullptr, &ib));
+  EXPECT_FALSE(ib.HasAlpha());
+}
+
+TEST(ExternalImageTest, AlphaPremultiplied) {
+  ImageMetadata im;
+  im.SetAlphaBits(8, true);
+
+  ImageBundle ib(&im);
+  const size_t xsize = 10;
+  const size_t ysize = 20;
+  const size_t size = xsize * ysize * 8;
+  const uint8_t buf[size] = {};
+
+  JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  EXPECT_TRUE(BufferToImageBundle(format, xsize, ysize, buf, size, nullptr,
+                                  ColorEncoding::SRGB(), &ib));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc
new file mode 100644
index 0000000000..62acfbcf65
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc
@@ -0,0 +1,3870 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef FJXL_SELF_INCLUDE
+
+#include "lib/jxl/enc_fast_lossless.h"
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <memory>
+#include <vector>
+
+// Enable NEON and AVX2/AVX512 if not asked to do otherwise and the compilers
+// support it.
+#if defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+
+#ifndef FJXL_ENABLE_NEON
+#define FJXL_ENABLE_NEON 1
+#endif
+
+#elif (defined(__x86_64__) || defined(_M_X64)) && !defined(_MSC_VER)
+#include <immintrin.h>
+
+// manually add _mm512_cvtsi512_si32 definition if missing
+// (e.g. with Xcode on macOS Mojave)
+// copied from gcc 11.1.0 include/avx512fintrin.h line 14367-14373
+#if defined(__clang__) &&                                           \
+    ((!defined(__apple_build_version__) && __clang_major__ < 10) || \
+     (defined(__apple_build_version__) && __apple_build_version__ < 12000032))
+inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsi512_si32(__m512i __A) {
+  __v16si __B = (__v16si)__A;
+  return __B[0];
+}
+#endif
+
+// TODO(veluca): MSVC support for dynamic dispatch.
+#if defined(__clang__) || defined(__GNUC__)
+
+#ifndef FJXL_ENABLE_AVX2
+#define FJXL_ENABLE_AVX2 1
+#endif
+
+#ifndef FJXL_ENABLE_AVX512
+// On clang-7 or earlier, and gcc-10 or earlier, AVX512 seems broken.
+#if (defined(__clang__) &&                                             \
+         (!defined(__apple_build_version__) && __clang_major__ > 7) || \
+     (defined(__apple_build_version__) &&                              \
+      __apple_build_version__ > 10010046)) ||                          \
+    (defined(__GNUC__) && __GNUC__ > 10)
+#define FJXL_ENABLE_AVX512 1
+#endif
+#endif
+
+#endif
+
+#endif
+
+#ifndef FJXL_ENABLE_NEON
+#define FJXL_ENABLE_NEON 0
+#endif
+
+#ifndef FJXL_ENABLE_AVX2
+#define FJXL_ENABLE_AVX2 0
+#endif
+
+#ifndef FJXL_ENABLE_AVX512
+#define FJXL_ENABLE_AVX512 0
+#endif
+
+namespace {
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FJXL_INLINE __forceinline
+FJXL_INLINE uint32_t FloorLog2(uint32_t v) {
+  unsigned long index;
+  _BitScanReverse(&index, v);
+  return index;
+}
+FJXL_INLINE uint32_t CtzNonZero(uint64_t v) {
+  unsigned long index;
+  _BitScanForward(&index, v);
+  return index;
+}
+#else
+#define FJXL_INLINE inline __attribute__((always_inline))
+FJXL_INLINE uint32_t FloorLog2(uint32_t v) {
+  return v ? 31 - __builtin_clz(v) : 0;
+}
+FJXL_INLINE uint32_t CtzNonZero(uint64_t v) { return __builtin_ctzll(v); }
+#endif
+
+// Compiles to a memcpy on little-endian systems.
+FJXL_INLINE void StoreLE64(uint8_t* tgt, uint64_t data) {
+#if (!defined(__BYTE_ORDER__) || (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__))
+  for (int i = 0; i < 8; i++) {
+    tgt[i] = (data >> (i * 8)) & 0xFF;
+  }
+#else
+  memcpy(tgt, &data, 8);
+#endif
+}
+
+FJXL_INLINE size_t AddBits(uint32_t count, uint64_t bits, uint8_t* data_buf,
+                           size_t& bits_in_buffer, uint64_t& bit_buffer) {
+  bit_buffer |= bits << bits_in_buffer;
+  bits_in_buffer += count;
+  StoreLE64(data_buf, bit_buffer);
+  size_t bytes_in_buffer = bits_in_buffer / 8;
+  bits_in_buffer -= bytes_in_buffer * 8;
+  bit_buffer >>= bytes_in_buffer * 8;
+  return bytes_in_buffer;
+}
+
+struct BitWriter {
+  void Allocate(size_t maximum_bit_size) {
+    assert(data == nullptr);
+    // Leave some padding.
+    data.reset(static_cast<uint8_t*>(malloc(maximum_bit_size / 8 + 64)));
+  }
+
+  void Write(uint32_t count, uint64_t bits) {
+    bytes_written += AddBits(count, bits, data.get() + bytes_written,
+                             bits_in_buffer, buffer);
+  }
+
+  void ZeroPadToByte() {
+    if (bits_in_buffer != 0) {
+      Write(8 - bits_in_buffer, 0);
+    }
+  }
+
+  FJXL_INLINE void WriteMultiple(const uint64_t* nbits, const uint64_t* bits,
+                                 size_t n) {
+    // Necessary because Write() is only guaranteed to work with <=56 bits.
+    // Trying to SIMD-fy this code results in lower speed (and definitely less
+    // clarity).
+    {
+      for (size_t i = 0; i < n; i++) {
+        this->buffer |= bits[i] << this->bits_in_buffer;
+        memcpy(this->data.get() + this->bytes_written, &this->buffer, 8);
+        uint64_t shift = 64 - this->bits_in_buffer;
+        this->bits_in_buffer += nbits[i];
+        // This `if` seems to be faster than using ternaries.
+        if (this->bits_in_buffer >= 64) {
+          uint64_t next_buffer = bits[i] >> shift;
+          this->buffer = next_buffer;
+          this->bits_in_buffer -= 64;
+          this->bytes_written += 8;
+        }
+      }
+      memcpy(this->data.get() + this->bytes_written, &this->buffer, 8);
+      size_t bytes_in_buffer = this->bits_in_buffer / 8;
+      this->bits_in_buffer -= bytes_in_buffer * 8;
+      this->buffer >>= bytes_in_buffer * 8;
+      this->bytes_written += bytes_in_buffer;
+    }
+  }
+
+  std::unique_ptr<uint8_t[], void (*)(void*)> data = {nullptr, free};
+  size_t bytes_written = 0;
+  size_t bits_in_buffer = 0;
+  uint64_t buffer = 0;
+};
+
+}  // namespace
+
+extern "C" {
+
+struct JxlFastLosslessFrameState {
+  size_t width;
+  size_t height;
+  size_t nb_chans;
+  size_t bitdepth;
+  BitWriter header;
+  std::vector<std::array<BitWriter, 4>> group_data;
+  size_t current_bit_writer = 0;
+  size_t bit_writer_byte_pos = 0;
+  size_t bits_in_buffer = 0;
+  uint64_t bit_buffer = 0;
+};
+
+size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame) {
+  size_t total_size_groups = 0;
+  for (size_t i = 0; i < frame->group_data.size(); i++) {
+    size_t sz = 0;
+    for (size_t j = 0; j < frame->nb_chans; j++) {
+      const auto& writer = frame->group_data[i][j];
+      sz += writer.bytes_written * 8 + writer.bits_in_buffer;
+    }
+    sz = (sz + 7) / 8;
+    total_size_groups += sz;
+  }
+  return frame->header.bytes_written + total_size_groups;
+}
+
+size_t JxlFastLosslessMaxRequiredOutput(
+    const JxlFastLosslessFrameState* frame) {
+  return JxlFastLosslessOutputSize(frame) + 32;
+}
+
+void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
+                                  int add_image_header, int is_last) {
+  BitWriter* output = &frame->header;
+  output->Allocate(1000 + frame->group_data.size() * 32);
+
+  std::vector<size_t> group_sizes(frame->group_data.size());
+  for (size_t i = 0; i < frame->group_data.size(); i++) {
+    size_t sz = 0;
+    for (size_t j = 0; j < frame->nb_chans; j++) {
+      const auto& writer = frame->group_data[i][j];
+      sz += writer.bytes_written * 8 + writer.bits_in_buffer;
+    }
+    sz = (sz + 7) / 8;
+    group_sizes[i] = sz;
+  }
+
+  bool have_alpha = (frame->nb_chans == 2 || frame->nb_chans == 4);
+
+#if FJXL_STANDALONE
+  if (add_image_header) {
+    // Signature
+    output->Write(16, 0x0AFF);
+
+    // Size header, hand-crafted.
+    // Not small
+    output->Write(1, 0);
+
+    auto wsz = [output](size_t size) {
+      if (size - 1 < (1 << 9)) {
+        output->Write(2, 0b00);
+        output->Write(9, size - 1);
+      } else if (size - 1 < (1 << 13)) {
+        output->Write(2, 0b01);
+        output->Write(13, size - 1);
+      } else if (size - 1 < (1 << 18)) {
+        output->Write(2, 0b10);
+        output->Write(18, size - 1);
+      } else {
+        output->Write(2, 0b11);
+        output->Write(30, size - 1);
+      }
+    };
+
+    wsz(frame->height);
+
+    // No special ratio.
+    output->Write(3, 0);
+
+    wsz(frame->width);
+
+    // Hand-crafted ImageMetadata.
+    output->Write(1, 0);  // all_default
+    output->Write(1, 0);  // extra_fields
+    output->Write(1, 0);  // bit_depth.floating_point_sample
+    if (frame->bitdepth == 8) {
+      output->Write(2, 0b00);  // bit_depth.bits_per_sample = 8
+    } else if (frame->bitdepth == 10) {
+      output->Write(2, 0b01);  // bit_depth.bits_per_sample = 10
+    } else if (frame->bitdepth == 12) {
+      output->Write(2, 0b10);  // bit_depth.bits_per_sample = 12
+    } else {
+      output->Write(2, 0b11);  // 1 + u(6)
+      output->Write(6, frame->bitdepth - 1);
+    }
+    if (frame->bitdepth <= 14) {
+      output->Write(1, 1);  // 16-bit-buffer sufficient
+    } else {
+      output->Write(1, 0);  // 16-bit-buffer NOT sufficient
+    }
+    if (have_alpha) {
+      output->Write(2, 0b01);  // One extra channel
+      output->Write(1, 1);     // ... all_default (ie. 8-bit alpha)
+    } else {
+      output->Write(2, 0b00);  // No extra channel
+    }
+    output->Write(1, 0);  // Not XYB
+    if (frame->nb_chans > 2) {
+      output->Write(1, 1);  // color_encoding.all_default (sRGB)
+    } else {
+      output->Write(1, 0);     // color_encoding.all_default false
+      output->Write(1, 0);     // color_encoding.want_icc false
+      output->Write(2, 1);     // grayscale
+      output->Write(2, 1);     // D65
+      output->Write(1, 0);     // no gamma transfer function
+      output->Write(2, 0b10);  // tf: 2 + u(4)
+      output->Write(4, 11);    // tf of sRGB
+      output->Write(2, 1);     // relative rendering intent
+    }
+    output->Write(2, 0b00);  // No extensions.
+
+    output->Write(1, 1);  // all_default transform data
+
+    // No ICC, no preview. Frame should start at byte boundery.
+    output->ZeroPadToByte();
+  }
+#else
+  assert(!add_image_header);
+#endif
+
+  // Handcrafted frame header.
+  output->Write(1, 0);     // all_default
+  output->Write(2, 0b00);  // regular frame
+  output->Write(1, 1);     // modular
+  output->Write(2, 0b00);  // default flags
+  output->Write(1, 0);     // not YCbCr
+  output->Write(2, 0b00);  // no upsampling
+  if (have_alpha) {
+    output->Write(2, 0b00);  // no alpha upsampling
+  }
+  output->Write(2, 0b01);  // default group size
+  output->Write(2, 0b00);  // exactly one pass
+  output->Write(1, 0);     // no custom size or origin
+  output->Write(2, 0b00);  // kReplace blending mode
+  if (have_alpha) {
+    output->Write(2, 0b00);  // kReplace blending mode for alpha channel
+  }
+  output->Write(1, is_last);  // is_last
+  output->Write(2, 0b00);     // a frame has no name
+  output->Write(1, 0);        // loop filter is not all_default
+  output->Write(1, 0);        // no gaborish
+  output->Write(2, 0);        // 0 EPF iters
+  output->Write(2, 0b00);     // No LF extensions
+  output->Write(2, 0b00);     // No FH extensions
+
+  output->Write(1, 0);      // No TOC permutation
+  output->ZeroPadToByte();  // TOC is byte-aligned.
+  for (size_t i = 0; i < frame->group_data.size(); i++) {
+    size_t sz = group_sizes[i];
+    if (sz < (1 << 10)) {
+      output->Write(2, 0b00);
+      output->Write(10, sz);
+    } else if (sz - 1024 < (1 << 14)) {
+      output->Write(2, 0b01);
+      output->Write(14, sz - 1024);
+    } else if (sz - 17408 < (1 << 22)) {
+      output->Write(2, 0b10);
+      output->Write(22, sz - 17408);
+    } else {
+      output->Write(2, 0b11);
+      output->Write(30, sz - 4211712);
+    }
+  }
+  output->ZeroPadToByte();  // Groups are byte-aligned.
+}
+
+#if FJXL_ENABLE_AVX512
+__attribute__((target("avx512vbmi2"))) static size_t AppendBytesWithBitOffset(
+    const uint8_t* data, size_t n, size_t bit_buffer_nbits,
+    unsigned char* output, uint64_t& bit_buffer) {
+  if (n < 128) {
+    return 0;
+  }
+
+  size_t i = 0;
+  __m512i shift = _mm512_set1_epi64(64 - bit_buffer_nbits);
+  __m512i carry = _mm512_set1_epi64(bit_buffer << (64 - bit_buffer_nbits));
+
+  for (; i + 64 <= n; i += 64) {
+    __m512i current = _mm512_loadu_si512(data + i);
+    __m512i previous_u64 = _mm512_alignr_epi64(current, carry, 7);
+    carry = current;
+    __m512i out = _mm512_shrdv_epi64(previous_u64, current, shift);
+    _mm512_storeu_si512(output + i, out);
+  }
+
+  bit_buffer = data[i - 1] >> (8 - bit_buffer_nbits);
+
+  return i;
+}
+#endif
+
+size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame,
+                                  unsigned char* output, size_t output_size) {
+  assert(output_size >= 32);
+  unsigned char* initial_output = output;
+  size_t (*append_bytes_with_bit_offset)(const uint8_t*, size_t, size_t,
+                                         unsigned char*, uint64_t&) = nullptr;
+
+#if FJXL_ENABLE_AVX512
+  if (__builtin_cpu_supports("avx512vbmi2")) {
+    append_bytes_with_bit_offset = AppendBytesWithBitOffset;
+  }
+#endif
+
+  while (true) {
+    size_t& cur = frame->current_bit_writer;
+    size_t& bw_pos = frame->bit_writer_byte_pos;
+    if (cur >= 1 + frame->group_data.size() * frame->nb_chans) {
+      return output - initial_output;
+    }
+    if (output_size <= 8) {
+      return output - initial_output;
+    }
+    size_t nbc = frame->nb_chans;
+    const BitWriter& writer =
+        cur == 0 ? frame->header
+                 : frame->group_data[(cur - 1) / nbc][(cur - 1) % nbc];
+    size_t full_byte_count =
+        std::min(output_size - 8, writer.bytes_written - bw_pos);
+    if (frame->bits_in_buffer == 0) {
+      memcpy(output, writer.data.get() + bw_pos, full_byte_count);
+    } else {
+      size_t i = 0;
+      if (append_bytes_with_bit_offset) {
+        i += append_bytes_with_bit_offset(
+            writer.data.get() + bw_pos, full_byte_count, frame->bits_in_buffer,
+            output, frame->bit_buffer);
+      }
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+      // Copy 8 bytes at a time until we reach the border.
+      for (; i + 8 < full_byte_count; i += 8) {
+        uint64_t chunk;
+        memcpy(&chunk, writer.data.get() + bw_pos + i, 8);
+        uint64_t out = frame->bit_buffer | (chunk << frame->bits_in_buffer);
+        memcpy(output + i, &out, 8);
+        frame->bit_buffer = chunk >> (64 - frame->bits_in_buffer);
+      }
+#endif
+      for (; i < full_byte_count; i++) {
+        AddBits(8, writer.data.get()[bw_pos + i], output + i,
+                frame->bits_in_buffer, frame->bit_buffer);
+      }
+    }
+    output += full_byte_count;
+    output_size -= full_byte_count;
+    bw_pos += full_byte_count;
+    if (bw_pos == writer.bytes_written) {
+      auto write = [&](size_t num, uint64_t bits) {
+        size_t n = AddBits(num, bits, output, frame->bits_in_buffer,
+                           frame->bit_buffer);
+        output += n;
+        output_size -= n;
+      };
+      if (writer.bits_in_buffer) {
+        write(writer.bits_in_buffer, writer.buffer);
+      }
+      bw_pos = 0;
+      cur++;
+      if ((cur - 1) % nbc == 0 && frame->bits_in_buffer != 0) {
+        write(8 - frame->bits_in_buffer, 0);
+      }
+    }
+  }
+}
+
+void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame) {
+  delete frame;
+}
+
+}  // extern "C"
+
+#endif
+
+#ifdef FJXL_SELF_INCLUDE
+
+namespace {
+
+constexpr size_t kNumRawSymbols = 19;
+constexpr size_t kNumLZ77 = 33;
+constexpr size_t kLZ77CacheSize = 32;
+
+constexpr size_t kLZ77Offset = 224;
+constexpr size_t kLZ77MinLength = 7;
+
+void EncodeHybridUintLZ77(uint32_t value, uint32_t* token, uint32_t* nbits,
+                          uint32_t* bits) {
+  // 400 config
+  uint32_t n = FloorLog2(value);
+  *token = value < 16 ? value : 16 + n - 4;
+  *nbits = value < 16 ? 0 : n;
+  *bits = value < 16 ? 0 : value - (1 << *nbits);
+}
+
+struct PrefixCode {
+  uint8_t raw_nbits[kNumRawSymbols] = {};
+  uint8_t raw_bits[kNumRawSymbols] = {};
+
+  alignas(64) uint8_t raw_nbits_simd[16] = {};
+  alignas(64) uint8_t raw_bits_simd[16] = {};
+
+  uint8_t lz77_nbits[kNumLZ77] = {};
+  uint16_t lz77_bits[kNumLZ77] = {};
+
+  uint64_t lz77_cache_bits[kLZ77CacheSize] = {};
+  uint8_t lz77_cache_nbits[kLZ77CacheSize] = {};
+
+  static uint16_t BitReverse(size_t nbits, uint16_t bits) {
+    constexpr uint16_t kNibbleLookup[16] = {
+        0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110,
+        0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111,
+    };
+    uint16_t rev16 = (kNibbleLookup[bits & 0xF] << 12) |
+                     (kNibbleLookup[(bits >> 4) & 0xF] << 8) |
+                     (kNibbleLookup[(bits >> 8) & 0xF] << 4) |
+                     (kNibbleLookup[bits >> 12]);
+    return rev16 >> (16 - nbits);
+  }
+
+  // Create the prefix codes given the code lengths.
+  // Supports the code lengths being split into two halves.
+  static void ComputeCanonicalCode(const uint8_t* first_chunk_nbits,
+                                   uint8_t* first_chunk_bits,
+                                   size_t first_chunk_size,
+                                   const uint8_t* second_chunk_nbits,
+                                   uint16_t* second_chunk_bits,
+                                   size_t second_chunk_size) {
+    constexpr size_t kMaxCodeLength = 15;
+    uint8_t code_length_counts[kMaxCodeLength + 1] = {};
+    for (size_t i = 0; i < first_chunk_size; i++) {
+      code_length_counts[first_chunk_nbits[i]]++;
+      assert(first_chunk_nbits[i] <= kMaxCodeLength);
+      assert(first_chunk_nbits[i] <= 8);
+      assert(first_chunk_nbits[i] > 0);
+    }
+    for (size_t i = 0; i < second_chunk_size; i++) {
+      code_length_counts[second_chunk_nbits[i]]++;
+      assert(second_chunk_nbits[i] <= kMaxCodeLength);
+    }
+
+    uint16_t next_code[kMaxCodeLength + 1] = {};
+
+    uint16_t code = 0;
+    for (size_t i = 1; i < kMaxCodeLength + 1; i++) {
+      code = (code + code_length_counts[i - 1]) << 1;
+      next_code[i] = code;
+    }
+
+    for (size_t i = 0; i < first_chunk_size; i++) {
+      first_chunk_bits[i] =
+          BitReverse(first_chunk_nbits[i], next_code[first_chunk_nbits[i]]++);
+    }
+    for (size_t i = 0; i < second_chunk_size; i++) {
+      second_chunk_bits[i] =
+          BitReverse(second_chunk_nbits[i], next_code[second_chunk_nbits[i]]++);
+    }
+  }
+
+  template <typename T>
+  static void ComputeCodeLengthsNonZeroImpl(const uint64_t* freqs, size_t n,
+                                            size_t precision, T infty,
+                                            uint8_t* min_limit,
+                                            uint8_t* max_limit,
+                                            uint8_t* nbits) {
+    std::vector<T> dynp(((1U << precision) + 1) * (n + 1), infty);
+    auto d = [&](size_t sym, size_t off) -> T& {
+      return dynp[sym * ((1 << precision) + 1) + off];
+    };
+    d(0, 0) = 0;
+    for (size_t sym = 0; sym < n; sym++) {
+      for (T bits = min_limit[sym]; bits <= max_limit[sym]; bits++) {
+        size_t off_delta = 1U << (precision - bits);
+        for (size_t off = 0; off + off_delta <= (1U << precision); off++) {
+          d(sym + 1, off + off_delta) =
+              std::min(d(sym, off) + static_cast<T>(freqs[sym]) * bits,
+                       d(sym + 1, off + off_delta));
+        }
+      }
+    }
+
+    size_t sym = n;
+    size_t off = 1U << precision;
+
+    assert(d(sym, off) != infty);
+
+    while (sym-- > 0) {
+      assert(off > 0);
+      for (size_t bits = min_limit[sym]; bits <= max_limit[sym]; bits++) {
+        size_t off_delta = 1U << (precision - bits);
+        if (off_delta <= off &&
+            d(sym + 1, off) == d(sym, off - off_delta) + freqs[sym] * bits) {
+          off -= off_delta;
+          nbits[sym] = bits;
+          break;
+        }
+      }
+    }
+  }
+
+  // Computes nbits[i] for i <= n, subject to min_limit[i] <= nbits[i] <=
+  // max_limit[i] and sum 2**-nbits[i] == 1, so to minimize sum(nbits[i] *
+  // freqs[i]).
+  static void ComputeCodeLengthsNonZero(const uint64_t* freqs, size_t n,
+                                        uint8_t* min_limit, uint8_t* max_limit,
+                                        uint8_t* nbits) {
+    size_t precision = 0;
+    size_t shortest_length = 255;
+    uint64_t freqsum = 0;
+    for (size_t i = 0; i < n; i++) {
+      assert(freqs[i] != 0);
+      freqsum += freqs[i];
+      if (min_limit[i] < 1) min_limit[i] = 1;
+      assert(min_limit[i] <= max_limit[i]);
+      precision = std::max<size_t>(max_limit[i], precision);
+      shortest_length = std::min<size_t>(min_limit[i], shortest_length);
+    }
+    // If all the minimum limits are greater than 1, shift precision so that we
+    // behave as if the shortest was 1.
+    precision -= shortest_length - 1;
+    uint64_t infty = freqsum * precision;
+    if (infty < std::numeric_limits<uint32_t>::max() / 2) {
+      ComputeCodeLengthsNonZeroImpl(freqs, n, precision,
+                                    static_cast<uint32_t>(infty), min_limit,
+                                    max_limit, nbits);
+    } else {
+      ComputeCodeLengthsNonZeroImpl(freqs, n, precision, infty, min_limit,
+                                    max_limit, nbits);
+    }
+  }
+
+  static constexpr size_t kMaxNumSymbols =
+      kNumRawSymbols + 1 < kNumLZ77 ? kNumLZ77 : kNumRawSymbols + 1;
+  static void ComputeCodeLengths(const uint64_t* freqs, size_t n,
+                                 const uint8_t* min_limit_in,
+                                 const uint8_t* max_limit_in, uint8_t* nbits) {
+    assert(n <= kMaxNumSymbols);
+    uint64_t compact_freqs[kMaxNumSymbols];
+    uint8_t min_limit[kMaxNumSymbols];
+    uint8_t max_limit[kMaxNumSymbols];
+    size_t ni = 0;
+    for (size_t i = 0; i < n; i++) {
+      if (freqs[i]) {
+        compact_freqs[ni] = freqs[i];
+        min_limit[ni] = min_limit_in[i];
+        max_limit[ni] = max_limit_in[i];
+        ni++;
+      }
+    }
+    uint8_t num_bits[kMaxNumSymbols] = {};
+    ComputeCodeLengthsNonZero(compact_freqs, ni, min_limit, max_limit,
+                              num_bits);
+    ni = 0;
+    for (size_t i = 0; i < n; i++) {
+      nbits[i] = 0;
+      if (freqs[i]) {
+        nbits[i] = num_bits[ni++];
+      }
+    }
+  }
+
+  // Invalid code, used to construct arrays.
+  PrefixCode() {}
+
+  template <typename BitDepth>
+  PrefixCode(BitDepth, uint64_t* raw_counts, uint64_t* lz77_counts) {
+    // "merge" together all the lz77 counts in a single symbol for the level 1
+    // table (containing just the raw symbols, up to length 7).
+    uint64_t level1_counts[kNumRawSymbols + 1];
+    memcpy(level1_counts, raw_counts, kNumRawSymbols * sizeof(uint64_t));
+    size_t numraw = kNumRawSymbols;
+    while (numraw > 0 && level1_counts[numraw - 1] == 0) numraw--;
+
+    level1_counts[numraw] = 0;
+    for (size_t i = 0; i < kNumLZ77; i++) {
+      level1_counts[numraw] += lz77_counts[i];
+    }
+    uint8_t level1_nbits[kNumRawSymbols + 1] = {};
+    ComputeCodeLengths(level1_counts, numraw + 1, BitDepth::kMinRawLength,
+                       BitDepth::kMaxRawLength, level1_nbits);
+
+    uint8_t level2_nbits[kNumLZ77] = {};
+    uint8_t min_lengths[kNumLZ77] = {};
+    uint8_t l = 15 - level1_nbits[numraw];
+    uint8_t max_lengths[kNumLZ77];
+    for (size_t i = 0; i < kNumLZ77; i++) {
+      max_lengths[i] = l;
+    }
+    size_t num_lz77 = kNumLZ77;
+    while (num_lz77 > 0 && lz77_counts[num_lz77 - 1] == 0) num_lz77--;
+    ComputeCodeLengths(lz77_counts, num_lz77, min_lengths, max_lengths,
+                       level2_nbits);
+    for (size_t i = 0; i < numraw; i++) {
+      raw_nbits[i] = level1_nbits[i];
+    }
+    for (size_t i = 0; i < num_lz77; i++) {
+      lz77_nbits[i] =
+          level2_nbits[i] ? level1_nbits[numraw] + level2_nbits[i] : 0;
+    }
+
+    ComputeCanonicalCode(raw_nbits, raw_bits, numraw, lz77_nbits, lz77_bits,
+                         kNumLZ77);
+    BitDepth::PrepareForSimd(raw_nbits, raw_bits, numraw, raw_nbits_simd,
+                             raw_bits_simd);
+
+    // Prepare lz77 cache
+    for (size_t count = 0; count < kLZ77CacheSize; count++) {
+      unsigned token, nbits, bits;
+      EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+      lz77_cache_nbits[count] = lz77_nbits[token] + nbits + raw_nbits[0];
+      lz77_cache_bits[count] =
+          (((bits << lz77_nbits[token]) | lz77_bits[token]) << raw_nbits[0]) |
+          raw_bits[0];
+    }
+  }
+
+  void WriteTo(BitWriter* writer) const {
+    uint64_t code_length_counts[18] = {};
+    code_length_counts[17] = 3 + 2 * (kNumLZ77 - 1);
+    for (size_t i = 0; i < kNumRawSymbols; i++) {
+      code_length_counts[raw_nbits[i]]++;
+    }
+    for (size_t i = 0; i < kNumLZ77; i++) {
+      code_length_counts[lz77_nbits[i]]++;
+    }
+    uint8_t code_length_nbits[18] = {};
+    uint8_t code_length_nbits_min[18] = {};
+    uint8_t code_length_nbits_max[18] = {
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    };
+    ComputeCodeLengths(code_length_counts, 18, code_length_nbits_min,
+                       code_length_nbits_max, code_length_nbits);
+    writer->Write(2, 0b00);  // HSKIP = 0, i.e. don't skip code lengths.
+
+    // As per Brotli RFC.
+    uint8_t code_length_order[18] = {1, 2, 3, 4,  0,  5,  17, 6,  16,
+                                     7, 8, 9, 10, 11, 12, 13, 14, 15};
+    uint8_t code_length_length_nbits[] = {2, 4, 3, 2, 2, 4};
+    uint8_t code_length_length_bits[] = {0, 7, 3, 2, 1, 15};
+
+    // Encode lengths of code lengths.
+    size_t num_code_lengths = 18;
+    while (code_length_nbits[code_length_order[num_code_lengths - 1]] == 0) {
+      num_code_lengths--;
+    }
+    for (size_t i = 0; i < num_code_lengths; i++) {
+      int symbol = code_length_nbits[code_length_order[i]];
+      writer->Write(code_length_length_nbits[symbol],
+                    code_length_length_bits[symbol]);
+    }
+
+    // Compute the canonical codes for the codes that represent the lengths of
+    // the actual codes for data.
+    uint16_t code_length_bits[18] = {};
+    ComputeCanonicalCode(nullptr, nullptr, 0, code_length_nbits,
+                         code_length_bits, 18);
+    // Encode raw bit code lengths.
+    for (size_t i = 0; i < kNumRawSymbols; i++) {
+      writer->Write(code_length_nbits[raw_nbits[i]],
+                    code_length_bits[raw_nbits[i]]);
+    }
+    size_t num_lz77 = kNumLZ77;
+    while (lz77_nbits[num_lz77 - 1] == 0) {
+      num_lz77--;
+    }
+    // Encode 0s until 224 (start of LZ77 symbols). This is in total 224-19 =
+    // 205.
+    static_assert(kLZ77Offset == 224, "");
+    static_assert(kNumRawSymbols == 19, "");
+    writer->Write(code_length_nbits[17], code_length_bits[17]);
+    writer->Write(3, 0b010);  // 5
+    writer->Write(code_length_nbits[17], code_length_bits[17]);
+    writer->Write(3, 0b000);  // (5-2)*8 + 3 = 27
+    writer->Write(code_length_nbits[17], code_length_bits[17]);
+    writer->Write(3, 0b010);  // (27-2)*8 + 5 = 205
+    // Encode LZ77 symbols, with values 224+i.
+    for (size_t i = 0; i < num_lz77; i++) {
+      writer->Write(code_length_nbits[lz77_nbits[i]],
+                    code_length_bits[lz77_nbits[i]]);
+    }
+  }
+};
+
+template <typename T>
+struct VecPair {
+  T low;
+  T hi;
+};
+
+#ifdef FJXL_GENERIC_SIMD
+#undef FJXL_GENERIC_SIMD
+#endif
+
+#ifdef FJXL_AVX512
+#define FJXL_GENERIC_SIMD
+struct SIMDVec32;
+struct Mask32 {
+  __mmask16 mask;
+  SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+  size_t CountPrefix() const {
+    return CtzNonZero(~uint64_t{_cvtmask16_u32(mask)});
+  }
+};
+
+struct SIMDVec32 {
+  __m512i vec;
+
+  static constexpr size_t kLanes = 16;
+
+  FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+    return SIMDVec32{_mm512_loadu_si512((__m512i*)data)};
+  }
+  FJXL_INLINE void Store(uint32_t* data) {
+    _mm512_storeu_si512((__m512i*)data, vec);
+  }
+  FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+    return SIMDVec32{_mm512_set1_epi32(v)};
+  }
+  FJXL_INLINE SIMDVec32 ValToToken() const {
+    return SIMDVec32{
+        _mm512_sub_epi32(_mm512_set1_epi32(32), _mm512_lzcnt_epi32(vec))};
+  }
+  FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{_mm512_sub_epi32(_mm512_max_epu32(vec, to_subtract.vec),
+                                      to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{_mm512_sub_epi32(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+    return SIMDVec32{_mm512_add_epi32(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+    return SIMDVec32{_mm512_xor_epi32(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+    return Mask32{_mm512_cmpeq_epi32_mask(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+    return Mask32{_mm512_cmpgt_epi32_mask(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Pow2() const {
+    return SIMDVec32{_mm512_sllv_epi32(_mm512_set1_epi32(1), vec)};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+    return SIMDVec32{_mm512_srai_epi32(vec, i)};
+  }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+  __mmask32 mask;
+  SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+  Mask16 And(const Mask16& oth) const {
+    return Mask16{_kand_mask32(mask, oth.mask)};
+  }
+  size_t CountPrefix() const {
+    return CtzNonZero(~uint64_t{_cvtmask32_u32(mask)});
+  }
+};
+
+struct SIMDVec16 {
+  __m512i vec;
+
+  static constexpr size_t kLanes = 32;
+
+  FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+    return SIMDVec16{_mm512_loadu_si512((__m512i*)data)};
+  }
+  FJXL_INLINE void Store(uint16_t* data) {
+    _mm512_storeu_si512((__m512i*)data, vec);
+  }
+  FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+    return SIMDVec16{_mm512_set1_epi16(v)};
+  }
+  FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+                                         const SIMDVec32& hi) {
+    auto tmp = _mm512_packus_epi32(lo.vec, hi.vec);
+    alignas(64) uint64_t perm[8] = {0, 2, 4, 6, 1, 3, 5, 7};
+    return SIMDVec16{
+        _mm512_permutex2var_epi64(tmp, _mm512_load_si512((__m512i*)perm), tmp)};
+  }
+
+  FJXL_INLINE SIMDVec16 ValToToken() const {
+    auto c16 = _mm512_set1_epi32(16);
+    auto c32 = _mm512_set1_epi32(32);
+    auto low16bit = _mm512_set1_epi32(0x0000FFFF);
+    auto lzhi =
+        _mm512_sub_epi32(c16, _mm512_min_epu32(c16, _mm512_lzcnt_epi32(vec)));
+    auto lzlo = _mm512_sub_epi32(
+        c32, _mm512_lzcnt_epi32(_mm512_and_si512(low16bit, vec)));
+    return SIMDVec16{_mm512_or_si512(lzlo, _mm512_slli_epi32(lzhi, 16))};
+  }
+
+  FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{_mm512_subs_epu16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{_mm512_sub_epi16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_add_epi16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_min_epu16(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+    return Mask16{_mm512_cmpeq_epi16_mask(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+    return Mask16{_mm512_cmpgt_epi16_mask(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Pow2() const {
+    return SIMDVec16{_mm512_sllv_epi16(_mm512_set1_epi16(1), vec)};
+  }
+  FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_or_si512(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_xor_si512(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_and_si512(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm512_srai_epi16(_mm512_add_epi16(vec, oth.vec), 1)};
+  }
+  FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+    return SIMDVec16{_mm512_or_si512(vec, _mm512_set1_epi16(0xFF00))};
+  }
+  FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+    return SIMDVec16{_mm512_shuffle_epi8(
+        _mm512_broadcast_i32x4(_mm_loadu_si128((__m128i*)table)), vec)};
+  }
+  FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+    auto lo = _mm512_unpacklo_epi16(low.vec, vec);
+    auto hi = _mm512_unpackhi_epi16(low.vec, vec);
+    alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+    alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+    return {SIMDVec16{_mm512_permutex2var_epi64(
+                lo, _mm512_load_si512((__m512i*)perm1), hi)},
+            SIMDVec16{_mm512_permutex2var_epi64(
+                lo, _mm512_load_si512((__m512i*)perm2), hi)}};
+  }
+  FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+    auto lo = _mm512_unpacklo_epi16(vec, _mm512_setzero_si512());
+    auto hi = _mm512_unpackhi_epi16(vec, _mm512_setzero_si512());
+    alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+    alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+    return {SIMDVec32{_mm512_permutex2var_epi64(
+                lo, _mm512_load_si512((__m512i*)perm1), hi)},
+            SIMDVec32{_mm512_permutex2var_epi64(
+                lo, _mm512_load_si512((__m512i*)perm2), hi)}};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+    return SIMDVec16{_mm512_srai_epi16(vec, i)};
+  }
+
+  static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+    __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+    return {SIMDVec16{_mm512_cvtepu8_epi16(bytes)}};
+  }
+  static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+    return {Load((const uint16_t*)data)};
+  }
+
+  static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+    __m512i bytes = _mm512_loadu_si512((__m512i*)data);
+    __m512i gray = _mm512_and_si512(bytes, _mm512_set1_epi16(0xFF));
+    __m512i alpha = _mm512_srli_epi16(bytes, 8);
+    return {SIMDVec16{gray}, SIMDVec16{alpha}};
+  }
+  static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+    __m512i bytes1 = _mm512_loadu_si512((__m512i*)data);
+    __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64));
+    __m512i g_mask = _mm512_set1_epi32(0xFFFF);
+    __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+    __m512i g = _mm512_permutexvar_epi64(
+        permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, g_mask),
+                                        _mm512_and_si512(bytes2, g_mask)));
+    __m512i a = _mm512_permutexvar_epi64(
+        permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16),
+                                        _mm512_srli_epi32(bytes2, 16)));
+    return {SIMDVec16{g}, SIMDVec16{a}};
+  }
+
+  static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+    __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+    __m512i bytes1 =
+        _mm512_zextsi256_si512(_mm256_loadu_si256((__m256i*)(data + 64)));
+
+    // 0x7A = element of upper half of second vector = 0 after lookup; still in
+    // the upper half once we add 1 or 2.
+    uint8_t z = 0x7A;
+    __m512i ridx =
+        _mm512_set_epi8(z, 93, z, 90, z, 87, z, 84, z, 81, z, 78, z, 75, z, 72,
+                        z, 69, z, 66, z, 63, z, 60, z, 57, z, 54, z, 51, z, 48,
+                        z, 45, z, 42, z, 39, z, 36, z, 33, z, 30, z, 27, z, 24,
+                        z, 21, z, 18, z, 15, z, 12, z, 9, z, 6, z, 3, z, 0);
+    __m512i gidx = _mm512_add_epi8(ridx, _mm512_set1_epi8(1));
+    __m512i bidx = _mm512_add_epi8(gidx, _mm512_set1_epi8(1));
+    __m512i r = _mm512_permutex2var_epi8(bytes0, ridx, bytes1);
+    __m512i g = _mm512_permutex2var_epi8(bytes0, gidx, bytes1);
+    __m512i b = _mm512_permutex2var_epi8(bytes0, bidx, bytes1);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}};
+  }
+  static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+    __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+    __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64));
+    __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128));
+
+    __m512i ridx_lo = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 60, 57,
+                                       54, 51, 48, 45, 42, 39, 36, 33, 30, 27,
+                                       24, 21, 18, 15, 12, 9, 6, 3, 0);
+    // -1 is such that when adding 1 or 2, we get the correct index for
+    // green/blue.
+    __m512i ridx_hi =
+        _mm512_set_epi16(29, 26, 23, 20, 17, 14, 11, 8, 5, 2, -1, 0, 0, 0, 0, 0,
+                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    __m512i gidx_lo = _mm512_add_epi16(ridx_lo, _mm512_set1_epi16(1));
+    __m512i gidx_hi = _mm512_add_epi16(ridx_hi, _mm512_set1_epi16(1));
+    __m512i bidx_lo = _mm512_add_epi16(gidx_lo, _mm512_set1_epi16(1));
+    __m512i bidx_hi = _mm512_add_epi16(gidx_hi, _mm512_set1_epi16(1));
+
+    __mmask32 rmask = _cvtu32_mask32(0b11111111110000000000000000000000);
+    __mmask32 gbmask = _cvtu32_mask32(0b11111111111000000000000000000000);
+
+    __m512i rlo = _mm512_permutex2var_epi16(bytes0, ridx_lo, bytes1);
+    __m512i glo = _mm512_permutex2var_epi16(bytes0, gidx_lo, bytes1);
+    __m512i blo = _mm512_permutex2var_epi16(bytes0, bidx_lo, bytes1);
+    __m512i r = _mm512_mask_permutexvar_epi16(rlo, rmask, ridx_hi, bytes2);
+    __m512i g = _mm512_mask_permutexvar_epi16(glo, gbmask, gidx_hi, bytes2);
+    __m512i b = _mm512_mask_permutexvar_epi16(blo, gbmask, bidx_hi, bytes2);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}};
+  }
+
+  static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+    __m512i bytes1 = _mm512_loadu_si512((__m512i*)data);
+    __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64));
+    __m512i rg_mask = _mm512_set1_epi32(0xFFFF);
+    __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+    __m512i rg = _mm512_permutexvar_epi64(
+        permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, rg_mask),
+                                        _mm512_and_si512(bytes2, rg_mask)));
+    __m512i ba = _mm512_permutexvar_epi64(
+        permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16),
+                                        _mm512_srli_epi32(bytes2, 16)));
+    __m512i r = _mm512_and_si512(rg, _mm512_set1_epi16(0xFF));
+    __m512i g = _mm512_srli_epi16(rg, 8);
+    __m512i b = _mm512_and_si512(ba, _mm512_set1_epi16(0xFF));
+    __m512i a = _mm512_srli_epi16(ba, 8);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+  }
+  static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+    __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+    __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64));
+    __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128));
+    __m512i bytes3 = _mm512_loadu_si512((__m512i*)(data + 192));
+
+    auto pack32 = [](__m512i a, __m512i b) {
+      __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+      return _mm512_permutexvar_epi64(permuteidx, _mm512_packus_epi32(a, b));
+    };
+    auto packlow32 = [&pack32](__m512i a, __m512i b) {
+      __m512i mask = _mm512_set1_epi32(0xFFFF);
+      return pack32(_mm512_and_si512(a, mask), _mm512_and_si512(b, mask));
+    };
+    auto packhi32 = [&pack32](__m512i a, __m512i b) {
+      return pack32(_mm512_srli_epi32(a, 16), _mm512_srli_epi32(b, 16));
+    };
+
+    __m512i rb0 = packlow32(bytes0, bytes1);
+    __m512i rb1 = packlow32(bytes2, bytes3);
+    __m512i ga0 = packhi32(bytes0, bytes1);
+    __m512i ga1 = packhi32(bytes2, bytes3);
+
+    __m512i r = packlow32(rb0, rb1);
+    __m512i g = packlow32(ga0, ga1);
+    __m512i b = packhi32(rb0, rb1);
+    __m512i a = packhi32(ga0, ga1);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+  }
+
+  void SwapEndian() {
+    auto indices = _mm512_broadcast_i32x4(
+        _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
+    vec = _mm512_shuffle_epi8(vec, indices);
+  }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+                             const SIMDVec16& if_false) {
+  return SIMDVec16{_mm512_mask_blend_epi16(mask, if_false.vec, if_true.vec)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+                             const SIMDVec32& if_false) {
+  return SIMDVec32{_mm512_mask_blend_epi32(mask, if_false.vec, if_true.vec)};
+}
+
+struct Bits64 {
+  static constexpr size_t kLanes = 8;
+
+  __m512i nbits;
+  __m512i bits;
+
+  FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+    _mm512_storeu_si512((__m512i*)nbits_out, nbits);
+    _mm512_storeu_si512((__m512i*)bits_out, bits);
+  }
+};
+
+struct Bits32 {
+  __m512i nbits;
+  __m512i bits;
+
+  static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+    return Bits32{nbits.vec, bits.vec};
+  }
+
+  Bits64 Merge() const {
+    auto nbits_hi32 = _mm512_srli_epi64(nbits, 32);
+    auto nbits_lo32 = _mm512_and_si512(nbits, _mm512_set1_epi64(0xFFFFFFFF));
+    auto bits_hi32 = _mm512_srli_epi64(bits, 32);
+    auto bits_lo32 = _mm512_and_si512(bits, _mm512_set1_epi64(0xFFFFFFFF));
+
+    auto nbits64 = _mm512_add_epi64(nbits_hi32, nbits_lo32);
+    auto bits64 =
+        _mm512_or_si512(_mm512_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32);
+    return Bits64{nbits64, bits64};
+  }
+
+  void Interleave(const Bits32& low) {
+    bits = _mm512_or_si512(_mm512_sllv_epi32(bits, low.nbits), low.bits);
+    nbits = _mm512_add_epi32(nbits, low.nbits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 16);
+    constexpr uint32_t kMask[32] = {
+        ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+        ~0u, ~0u, ~0u, ~0u, ~0u, 0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    };
+    __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n));
+    nbits = _mm512_and_si512(mask, nbits);
+    bits = _mm512_and_si512(mask, bits);
+  }
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 16);
+    constexpr uint32_t kMask[32] = {
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+        ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+    };
+    __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n));
+    nbits = _mm512_and_si512(mask, nbits);
+    bits = _mm512_and_si512(mask, bits);
+  }
+};
+
+struct Bits16 {
+  __m512i nbits;
+  __m512i bits;
+
+  static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+    return Bits16{nbits.vec, bits.vec};
+  }
+
+  Bits32 Merge() const {
+    auto nbits_hi16 = _mm512_srli_epi32(nbits, 16);
+    auto nbits_lo16 = _mm512_and_si512(nbits, _mm512_set1_epi32(0xFFFF));
+    auto bits_hi16 = _mm512_srli_epi32(bits, 16);
+    auto bits_lo16 = _mm512_and_si512(bits, _mm512_set1_epi32(0xFFFF));
+
+    auto nbits32 = _mm512_add_epi32(nbits_hi16, nbits_lo16);
+    auto bits32 =
+        _mm512_or_si512(_mm512_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16);
+    return Bits32{nbits32, bits32};
+  }
+
+  void Interleave(const Bits16& low) {
+    bits = _mm512_or_si512(_mm512_sllv_epi16(bits, low.nbits), low.bits);
+    nbits = _mm512_add_epi16(nbits, low.nbits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 32);
+    constexpr uint16_t kMask[64] = {
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+    };
+    __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n));
+    nbits = _mm512_and_si512(mask, nbits);
+    bits = _mm512_and_si512(mask, bits);
+  }
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 32);
+    constexpr uint16_t kMask[64] = {
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+    };
+    __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n));
+    nbits = _mm512_and_si512(mask, nbits);
+    bits = _mm512_and_si512(mask, bits);
+  }
+};
+
+#endif
+
+#ifdef FJXL_AVX2
+#define FJXL_GENERIC_SIMD
+
+struct SIMDVec32;
+
+struct Mask32 {
+  __m256i mask;
+  SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+  size_t CountPrefix() const {
+    return CtzNonZero(~static_cast<uint64_t>(
+        (uint8_t)_mm256_movemask_ps(_mm256_castsi256_ps(mask))));
+  }
+};
+
+struct SIMDVec32 {
+  __m256i vec;
+
+  static constexpr size_t kLanes = 8;
+
+  FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+    return SIMDVec32{_mm256_loadu_si256((__m256i*)data)};
+  }
+  FJXL_INLINE void Store(uint32_t* data) {
+    _mm256_storeu_si256((__m256i*)data, vec);
+  }
+  FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+    return SIMDVec32{_mm256_set1_epi32(v)};
+  }
+  FJXL_INLINE SIMDVec32 ValToToken() const {
+    // we know that each value has at most 20 bits, so we just need 5 nibbles
+    // and don't need to mask the fifth. However we do need to set the higher
+    // bytes to 0xFF, which will make table lookups return 0.
+    auto nibble0 =
+        _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi32(0xF)),
+                        _mm256_set1_epi32(0xFFFFFF00));
+    auto nibble1 = _mm256_or_si256(
+        _mm256_and_si256(_mm256_srli_epi32(vec, 4), _mm256_set1_epi32(0xF)),
+        _mm256_set1_epi32(0xFFFFFF00));
+    auto nibble2 = _mm256_or_si256(
+        _mm256_and_si256(_mm256_srli_epi32(vec, 8), _mm256_set1_epi32(0xF)),
+        _mm256_set1_epi32(0xFFFFFF00));
+    auto nibble3 = _mm256_or_si256(
+        _mm256_and_si256(_mm256_srli_epi32(vec, 12), _mm256_set1_epi32(0xF)),
+        _mm256_set1_epi32(0xFFFFFF00));
+    auto nibble4 = _mm256_or_si256(_mm256_srli_epi32(vec, 16),
+                                   _mm256_set1_epi32(0xFFFFFF00));
+
+    auto lut0 = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4));
+    auto lut1 = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8));
+    auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12));
+    auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16));
+    auto lut4 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20));
+
+    auto token0 = _mm256_shuffle_epi8(lut0, nibble0);
+    auto token1 = _mm256_shuffle_epi8(lut1, nibble1);
+    auto token2 = _mm256_shuffle_epi8(lut2, nibble2);
+    auto token3 = _mm256_shuffle_epi8(lut3, nibble3);
+    auto token4 = _mm256_shuffle_epi8(lut4, nibble4);
+
+    auto token =
+        _mm256_max_epi32(_mm256_max_epi32(_mm256_max_epi32(token0, token1),
+                                          _mm256_max_epi32(token2, token3)),
+                         token4);
+    return SIMDVec32{token};
+  }
+  FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{_mm256_sub_epi32(_mm256_max_epu32(vec, to_subtract.vec),
+                                      to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{_mm256_sub_epi32(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+    return SIMDVec32{_mm256_add_epi32(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+    return SIMDVec32{_mm256_xor_si256(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Pow2() const {
+    return SIMDVec32{_mm256_sllv_epi32(_mm256_set1_epi32(1), vec)};
+  }
+  FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+    return Mask32{_mm256_cmpeq_epi32(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+    return Mask32{_mm256_cmpgt_epi32(vec, oth.vec)};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+    return SIMDVec32{_mm256_srai_epi32(vec, i)};
+  }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+  __m256i mask;
+  SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+  Mask16 And(const Mask16& oth) const {
+    return Mask16{_mm256_and_si256(mask, oth.mask)};
+  }
+  size_t CountPrefix() const {
+    return CtzNonZero(
+               ~static_cast<uint64_t>((uint32_t)_mm256_movemask_epi8(mask))) /
+           2;
+  }
+};
+
+struct SIMDVec16 {
+  __m256i vec;
+
+  static constexpr size_t kLanes = 16;
+
+  FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+    return SIMDVec16{_mm256_loadu_si256((__m256i*)data)};
+  }
+  FJXL_INLINE void Store(uint16_t* data) {
+    _mm256_storeu_si256((__m256i*)data, vec);
+  }
+  FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+    return SIMDVec16{_mm256_set1_epi16(v)};
+  }
+  FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+                                         const SIMDVec32& hi) {
+    auto tmp = _mm256_packus_epi32(lo.vec, hi.vec);
+    return SIMDVec16{_mm256_permute4x64_epi64(tmp, 0b11011000)};
+  }
+
+  FJXL_INLINE SIMDVec16 ValToToken() const {
+    auto nibble0 =
+        _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi16(0xF)),
+                        _mm256_set1_epi16(0xFF00));
+    auto nibble1 = _mm256_or_si256(
+        _mm256_and_si256(_mm256_srli_epi16(vec, 4), _mm256_set1_epi16(0xF)),
+        _mm256_set1_epi16(0xFF00));
+    auto nibble2 = _mm256_or_si256(
+        _mm256_and_si256(_mm256_srli_epi16(vec, 8), _mm256_set1_epi16(0xF)),
+        _mm256_set1_epi16(0xFF00));
+    auto nibble3 =
+        _mm256_or_si256(_mm256_srli_epi16(vec, 12), _mm256_set1_epi16(0xFF00));
+
+    auto lut0 = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4));
+    auto lut1 = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8));
+    auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12));
+    auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16));
+
+    auto token0 = _mm256_shuffle_epi8(lut0, nibble0);
+    auto token1 = _mm256_shuffle_epi8(lut1, nibble1);
+    auto token2 = _mm256_shuffle_epi8(lut2, nibble2);
+    auto token3 = _mm256_shuffle_epi8(lut3, nibble3);
+
+    auto token = _mm256_max_epi16(_mm256_max_epi16(token0, token1),
+                                  _mm256_max_epi16(token2, token3));
+    return SIMDVec16{token};
+  }
+
+  FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{_mm256_subs_epu16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{_mm256_sub_epi16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_add_epi16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_min_epu16(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+    return Mask16{_mm256_cmpeq_epi16(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+    return Mask16{_mm256_cmpgt_epi16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Pow2() const {
+    auto pow2_lo_lut = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                      1u << 7, 0, 0, 0, 0, 0, 0, 0, 0));
+    auto pow2_hi_lut = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3,
+                      1 << 4, 1 << 5, 1 << 6, 1u << 7));
+
+    auto masked = _mm256_or_si256(vec, _mm256_set1_epi16(0xFF00));
+
+    auto pow2_lo = _mm256_shuffle_epi8(pow2_lo_lut, masked);
+    auto pow2_hi = _mm256_shuffle_epi8(pow2_hi_lut, masked);
+
+    auto pow2 = _mm256_or_si256(_mm256_slli_epi16(pow2_hi, 8), pow2_lo);
+    return SIMDVec16{pow2};
+  }
+  FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_or_si256(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_xor_si256(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_and_si256(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+    return SIMDVec16{_mm256_srai_epi16(_mm256_add_epi16(vec, oth.vec), 1)};
+  }
+  FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+    return SIMDVec16{_mm256_or_si256(vec, _mm256_set1_epi16(0xFF00))};
+  }
+  FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+    return SIMDVec16{_mm256_shuffle_epi8(
+        _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)table)), vec)};
+  }
+  FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+    auto v02 = _mm256_unpacklo_epi16(low.vec, vec);
+    auto v13 = _mm256_unpackhi_epi16(low.vec, vec);
+    return {SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x20)},
+            SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x31)}};
+  }
+  FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+    auto v02 = _mm256_unpacklo_epi16(vec, _mm256_setzero_si256());
+    auto v13 = _mm256_unpackhi_epi16(vec, _mm256_setzero_si256());
+    return {SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x20)},
+            SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x31)}};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+    return SIMDVec16{_mm256_srai_epi16(vec, i)};
+  }
+
+  static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+    __m128i bytes = _mm_loadu_si128((__m128i*)data);
+    return {SIMDVec16{_mm256_cvtepu8_epi16(bytes)}};
+  }
+  static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+    return {Load((const uint16_t*)data)};
+  }
+
+  static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+    __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+    __m256i gray = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF));
+    __m256i alpha = _mm256_srli_epi16(bytes, 8);
+    return {SIMDVec16{gray}, SIMDVec16{alpha}};
+  }
+  static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+    __m256i bytes1 = _mm256_loadu_si256((__m256i*)data);
+    __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32));
+    __m256i g_mask = _mm256_set1_epi32(0xFFFF);
+    __m256i g = _mm256_permute4x64_epi64(
+        _mm256_packus_epi32(_mm256_and_si256(bytes1, g_mask),
+                            _mm256_and_si256(bytes2, g_mask)),
+        0b11011000);
+    __m256i a = _mm256_permute4x64_epi64(
+        _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16),
+                            _mm256_srli_epi32(bytes2, 16)),
+        0b11011000);
+    return {SIMDVec16{g}, SIMDVec16{a}};
+  }
+
+  static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+    __m128i bytes0 = _mm_loadu_si128((__m128i*)data);
+    __m128i bytes1 = _mm_loadu_si128((__m128i*)(data + 16));
+    __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 32));
+
+    __m128i idx =
+        _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
+
+    __m128i r6b5g5_0 = _mm_shuffle_epi8(bytes0, idx);
+    __m128i g6r5b5_1 = _mm_shuffle_epi8(bytes1, idx);
+    __m128i b6g5r5_2 = _mm_shuffle_epi8(bytes2, idx);
+
+    __m128i mask010 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF,
+                                    0xFF, 0, 0, 0, 0, 0);
+    __m128i mask001 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF,
+                                    0xFF, 0xFF, 0xFF);
+
+    __m128i b2g2b1 = _mm_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001);
+    __m128i b2b0b1 = _mm_blendv_epi8(b2g2b1, r6b5g5_0, mask010);
+
+    __m128i r0r1b1 = _mm_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010);
+    __m128i r0r1r2 = _mm_blendv_epi8(r0r1b1, b6g5r5_2, mask001);
+
+    __m128i g1r1g0 = _mm_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001);
+    __m128i g1g2g0 = _mm_blendv_epi8(g1r1g0, b6g5r5_2, mask010);
+
+    __m128i g0g1g2 = _mm_alignr_epi8(g1g2g0, g1g2g0, 11);
+    __m128i b0b1b2 = _mm_alignr_epi8(b2b0b1, b2b0b1, 6);
+
+    return {SIMDVec16{_mm256_cvtepu8_epi16(r0r1r2)},
+            SIMDVec16{_mm256_cvtepu8_epi16(g0g1g2)},
+            SIMDVec16{_mm256_cvtepu8_epi16(b0b1b2)}};
+  }
+  static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+    auto load_and_split_lohi = [](const unsigned char* data) {
+      // LHLHLH...
+      __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+      // L0L0L0...
+      __m256i lo = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF));
+      // H0H0H0...
+      __m256i hi = _mm256_srli_epi16(bytes, 8);
+      // LLLLLLLLHHHHHHHHLLLLLLLLHHHHHHHH
+      __m256i packed = _mm256_packus_epi16(lo, hi);
+      return _mm256_permute4x64_epi64(packed, 0b11011000);
+    };
+    __m256i bytes0 = load_and_split_lohi(data);
+    __m256i bytes1 = load_and_split_lohi(data + 32);
+    __m256i bytes2 = load_and_split_lohi(data + 64);
+
+    __m256i idx = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13));
+
+    __m256i r6b5g5_0 = _mm256_shuffle_epi8(bytes0, idx);
+    __m256i g6r5b5_1 = _mm256_shuffle_epi8(bytes1, idx);
+    __m256i b6g5r5_2 = _mm256_shuffle_epi8(bytes2, idx);
+
+    __m256i mask010 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0));
+    __m256i mask001 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF));
+
+    __m256i b2g2b1 = _mm256_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001);
+    __m256i b2b0b1 = _mm256_blendv_epi8(b2g2b1, r6b5g5_0, mask010);
+
+    __m256i r0r1b1 = _mm256_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010);
+    __m256i r0r1r2 = _mm256_blendv_epi8(r0r1b1, b6g5r5_2, mask001);
+
+    __m256i g1r1g0 = _mm256_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001);
+    __m256i g1g2g0 = _mm256_blendv_epi8(g1r1g0, b6g5r5_2, mask010);
+
+    __m256i g0g1g2 = _mm256_alignr_epi8(g1g2g0, g1g2g0, 11);
+    __m256i b0b1b2 = _mm256_alignr_epi8(b2b0b1, b2b0b1, 6);
+
+    // Now r0r1r2, g0g1g2, b0b1b2 have the low bytes of the RGB pixels in their
+    // lower half, and the high bytes in their upper half.
+
+    auto combine_low_hi = [](__m256i v) {
+      __m128i low = _mm256_extracti128_si256(v, 0);
+      __m128i hi = _mm256_extracti128_si256(v, 1);
+      __m256i low16 = _mm256_cvtepu8_epi16(low);
+      __m256i hi16 = _mm256_cvtepu8_epi16(hi);
+      return _mm256_or_si256(_mm256_slli_epi16(hi16, 8), low16);
+    };
+
+    return {SIMDVec16{combine_low_hi(r0r1r2)},
+            SIMDVec16{combine_low_hi(g0g1g2)},
+            SIMDVec16{combine_low_hi(b0b1b2)}};
+  }
+
+  static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+    __m256i bytes1 = _mm256_loadu_si256((__m256i*)data);
+    __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32));
+    __m256i rg_mask = _mm256_set1_epi32(0xFFFF);
+    __m256i rg = _mm256_permute4x64_epi64(
+        _mm256_packus_epi32(_mm256_and_si256(bytes1, rg_mask),
+                            _mm256_and_si256(bytes2, rg_mask)),
+        0b11011000);
+    __m256i ba = _mm256_permute4x64_epi64(
+        _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16),
+                            _mm256_srli_epi32(bytes2, 16)),
+        0b11011000);
+    __m256i r = _mm256_and_si256(rg, _mm256_set1_epi16(0xFF));
+    __m256i g = _mm256_srli_epi16(rg, 8);
+    __m256i b = _mm256_and_si256(ba, _mm256_set1_epi16(0xFF));
+    __m256i a = _mm256_srli_epi16(ba, 8);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+  }
+  static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+    __m256i bytes0 = _mm256_loadu_si256((__m256i*)data);
+    __m256i bytes1 = _mm256_loadu_si256((__m256i*)(data + 32));
+    __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 64));
+    __m256i bytes3 = _mm256_loadu_si256((__m256i*)(data + 96));
+
+    auto pack32 = [](__m256i a, __m256i b) {
+      return _mm256_permute4x64_epi64(_mm256_packus_epi32(a, b), 0b11011000);
+    };
+    auto packlow32 = [&pack32](__m256i a, __m256i b) {
+      __m256i mask = _mm256_set1_epi32(0xFFFF);
+      return pack32(_mm256_and_si256(a, mask), _mm256_and_si256(b, mask));
+    };
+    auto packhi32 = [&pack32](__m256i a, __m256i b) {
+      return pack32(_mm256_srli_epi32(a, 16), _mm256_srli_epi32(b, 16));
+    };
+
+    __m256i rb0 = packlow32(bytes0, bytes1);
+    __m256i rb1 = packlow32(bytes2, bytes3);
+    __m256i ga0 = packhi32(bytes0, bytes1);
+    __m256i ga1 = packhi32(bytes2, bytes3);
+
+    __m256i r = packlow32(rb0, rb1);
+    __m256i g = packlow32(ga0, ga1);
+    __m256i b = packhi32(rb0, rb1);
+    __m256i a = packhi32(ga0, ga1);
+    return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+  }
+
+  void SwapEndian() {
+    auto indices = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
+    vec = _mm256_shuffle_epi8(vec, indices);
+  }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+                             const SIMDVec16& if_false) {
+  return SIMDVec16{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+                             const SIMDVec32& if_false) {
+  return SIMDVec32{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)};
+}
+
+struct Bits64 {
+  static constexpr size_t kLanes = 4;
+
+  __m256i nbits;
+  __m256i bits;
+
+  FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+    _mm256_storeu_si256((__m256i*)nbits_out, nbits);
+    _mm256_storeu_si256((__m256i*)bits_out, bits);
+  }
+};
+
+struct Bits32 {
+  __m256i nbits;
+  __m256i bits;
+
+  static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+    return Bits32{nbits.vec, bits.vec};
+  }
+
+  Bits64 Merge() const {
+    auto nbits_hi32 = _mm256_srli_epi64(nbits, 32);
+    auto nbits_lo32 = _mm256_and_si256(nbits, _mm256_set1_epi64x(0xFFFFFFFF));
+    auto bits_hi32 = _mm256_srli_epi64(bits, 32);
+    auto bits_lo32 = _mm256_and_si256(bits, _mm256_set1_epi64x(0xFFFFFFFF));
+
+    auto nbits64 = _mm256_add_epi64(nbits_hi32, nbits_lo32);
+    auto bits64 =
+        _mm256_or_si256(_mm256_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32);
+    return Bits64{nbits64, bits64};
+  }
+
+  void Interleave(const Bits32& low) {
+    bits = _mm256_or_si256(_mm256_sllv_epi32(bits, low.nbits), low.bits);
+    nbits = _mm256_add_epi32(nbits, low.nbits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 8);
+    constexpr uint32_t kMask[16] = {
+        ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, 0, 0,
+    };
+    __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n));
+    nbits = _mm256_and_si256(mask, nbits);
+    bits = _mm256_and_si256(mask, bits);
+  }
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 8);
+    constexpr uint32_t kMask[16] = {
+        0, 0, 0, 0, 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+    };
+    __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n));
+    nbits = _mm256_and_si256(mask, nbits);
+    bits = _mm256_and_si256(mask, bits);
+  }
+};
+
+struct Bits16 {
+  __m256i nbits;
+  __m256i bits;
+
+  static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+    return Bits16{nbits.vec, bits.vec};
+  }
+
+  Bits32 Merge() const {
+    auto nbits_hi16 = _mm256_srli_epi32(nbits, 16);
+    auto nbits_lo16 = _mm256_and_si256(nbits, _mm256_set1_epi32(0xFFFF));
+    auto bits_hi16 = _mm256_srli_epi32(bits, 16);
+    auto bits_lo16 = _mm256_and_si256(bits, _mm256_set1_epi32(0xFFFF));
+
+    auto nbits32 = _mm256_add_epi32(nbits_hi16, nbits_lo16);
+    auto bits32 =
+        _mm256_or_si256(_mm256_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16);
+    return Bits32{nbits32, bits32};
+  }
+
+  void Interleave(const Bits16& low) {
+    auto pow2_lo_lut = _mm256_broadcastsi128_si256(
+        _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                      1u << 7, 0, 0, 0, 0, 0, 0, 0, 0));
+    auto low_nbits_masked =
+        _mm256_or_si256(low.nbits, _mm256_set1_epi16(0xFF00));
+
+    auto bits_shifted = _mm256_mullo_epi16(
+        bits, _mm256_shuffle_epi8(pow2_lo_lut, low_nbits_masked));
+
+    nbits = _mm256_add_epi16(nbits, low.nbits);
+    bits = _mm256_or_si256(bits_shifted, low.bits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 16);
+    constexpr uint16_t kMask[32] = {
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+    };
+    __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n));
+    nbits = _mm256_and_si256(mask, nbits);
+    bits = _mm256_and_si256(mask, bits);
+  }
+
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 16);
+    constexpr uint16_t kMask[32] = {
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+    };
+    __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n));
+    nbits = _mm256_and_si256(mask, nbits);
+    bits = _mm256_and_si256(mask, bits);
+  }
+};
+
+#endif
+
+#ifdef FJXL_NEON
+#define FJXL_GENERIC_SIMD
+
+struct SIMDVec32;
+
+struct Mask32 {
+  uint32x4_t mask;
+  SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+  Mask32 And(const Mask32& oth) const {
+    return Mask32{vandq_u32(mask, oth.mask)};
+  }
+  size_t CountPrefix() const {
+    uint32_t val_unset[4] = {0, 1, 2, 3};
+    uint32_t val_set[4] = {4, 4, 4, 4};
+    uint32x4_t val = vbslq_u32(mask, vld1q_u32(val_set), vld1q_u32(val_unset));
+    return vminvq_u32(val);
+  }
+};
+
+struct SIMDVec32 {
+  uint32x4_t vec;
+
+  static constexpr size_t kLanes = 4;
+
+  FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+    return SIMDVec32{vld1q_u32(data)};
+  }
+  FJXL_INLINE void Store(uint32_t* data) { vst1q_u32(data, vec); }
+  FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+    return SIMDVec32{vdupq_n_u32(v)};
+  }
+  FJXL_INLINE SIMDVec32 ValToToken() const {
+    return SIMDVec32{vsubq_u32(vdupq_n_u32(32), vclzq_u32(vec))};
+  }
+  FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{vqsubq_u32(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+    return SIMDVec32{vsubq_u32(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+    return SIMDVec32{vaddq_u32(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+    return SIMDVec32{veorq_u32(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec32 Pow2() const {
+    return SIMDVec32{vshlq_u32(vdupq_n_u32(1), vreinterpretq_s32_u32(vec))};
+  }
+  FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+    return Mask32{vceqq_u32(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+    return Mask32{
+        vcgtq_s32(vreinterpretq_s32_u32(vec), vreinterpretq_s32_u32(oth.vec))};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+    return SIMDVec32{
+        vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(vec), i))};
+  }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+  uint16x8_t mask;
+  SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+  Mask16 And(const Mask16& oth) const {
+    return Mask16{vandq_u16(mask, oth.mask)};
+  }
+  size_t CountPrefix() const {
+    uint16_t val_unset[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+    uint16_t val_set[8] = {8, 8, 8, 8, 8, 8, 8, 8};
+    uint16x8_t val = vbslq_u16(mask, vld1q_u16(val_set), vld1q_u16(val_unset));
+    return vminvq_u16(val);
+  }
+};
+
+struct SIMDVec16 {
+  uint16x8_t vec;
+
+  static constexpr size_t kLanes = 8;
+
+  FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+    return SIMDVec16{vld1q_u16(data)};
+  }
+  FJXL_INLINE void Store(uint16_t* data) { vst1q_u16(data, vec); }
+  FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+    return SIMDVec16{vdupq_n_u16(v)};
+  }
+  FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+                                         const SIMDVec32& hi) {
+    return SIMDVec16{vmovn_high_u32(vmovn_u32(lo.vec), hi.vec)};
+  }
+
+  FJXL_INLINE SIMDVec16 ValToToken() const {
+    return SIMDVec16{vsubq_u16(vdupq_n_u16(16), vclzq_u16(vec))};
+  }
+  FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{vqsubq_u16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+    return SIMDVec16{vsubq_u16(vec, to_subtract.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+    return SIMDVec16{vaddq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+    return SIMDVec16{vminq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+    return Mask16{vceqq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+    return Mask16{
+        vcgtq_s16(vreinterpretq_s16_u16(vec), vreinterpretq_s16_u16(oth.vec))};
+  }
+  FJXL_INLINE SIMDVec16 Pow2() const {
+    return SIMDVec16{vshlq_u16(vdupq_n_u16(1), vreinterpretq_s16_u16(vec))};
+  }
+  FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+    return SIMDVec16{vorrq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+    return SIMDVec16{veorq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+    return SIMDVec16{vandq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+    return SIMDVec16{vhaddq_u16(vec, oth.vec)};
+  }
+  FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+    return SIMDVec16{vorrq_u16(vec, vdupq_n_u16(0xFF00))};
+  }
+  FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+    uint8x16_t tbl = vld1q_u8(table);
+    uint8x16_t indices = vreinterpretq_u8_u16(vec);
+    return SIMDVec16{vreinterpretq_u16_u8(vqtbl1q_u8(tbl, indices))};
+  }
+  FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+    return {SIMDVec16{vzip1q_u16(low.vec, vec)},
+            SIMDVec16{vzip2q_u16(low.vec, vec)}};
+  }
+  FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+    uint32x4_t lo = vmovl_u16(vget_low_u16(vec));
+    uint32x4_t hi = vmovl_high_u16(vec);
+    return {SIMDVec32{lo}, SIMDVec32{hi}};
+  }
+  template <size_t i>
+  FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+    return SIMDVec16{
+        vreinterpretq_u16_s16(vshrq_n_s16(vreinterpretq_s16_u16(vec), i))};
+  }
+
+  static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+    uint8x8_t v = vld1_u8(data);
+    return {SIMDVec16{vmovl_u8(v)}};
+  }
+  static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+    return {Load((const uint16_t*)data)};
+  }
+
+  static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+    uint8x8x2_t v = vld2_u8(data);
+    return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])}};
+  }
+  static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+    uint16x8x2_t v = vld2q_u16((const uint16_t*)data);
+    return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}};
+  }
+
+  static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+    uint8x8x3_t v = vld3_u8(data);
+    return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])},
+            SIMDVec16{vmovl_u8(v.val[2])}};
+  }
+  static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+    uint16x8x3_t v = vld3q_u16((const uint16_t*)data);
+    return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]}};
+  }
+
+  static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+    uint8x8x4_t v = vld4_u8(data);
+    return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])},
+            SIMDVec16{vmovl_u8(v.val[2])}, SIMDVec16{vmovl_u8(v.val[3])}};
+  }
+  static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+    uint16x8x4_t v = vld4q_u16((const uint16_t*)data);
+    return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]},
+            SIMDVec16{v.val[3]}};
+  }
+
+  void SwapEndian() {
+    vec = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(vec)));
+  }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+                             const SIMDVec16& if_false) {
+  return SIMDVec16{vbslq_u16(mask, if_true.vec, if_false.vec)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+                             const SIMDVec32& if_false) {
+  return SIMDVec32{vbslq_u32(mask, if_true.vec, if_false.vec)};
+}
+
+struct Bits64 {
+  static constexpr size_t kLanes = 2;
+
+  uint64x2_t nbits;
+  uint64x2_t bits;
+
+  FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+    vst1q_u64(nbits_out, nbits);
+    vst1q_u64(bits_out, bits);
+  }
+};
+
+struct Bits32 {
+  uint32x4_t nbits;
+  uint32x4_t bits;
+
+  static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+    return Bits32{nbits.vec, bits.vec};
+  }
+
+  Bits64 Merge() const {
+    // TODO(veluca): can probably be optimized.
+    uint64x2_t nbits_lo32 =
+        vandq_u64(vreinterpretq_u64_u32(nbits), vdupq_n_u64(0xFFFFFFFF));
+    uint64x2_t bits_hi32 =
+        vshlq_u64(vshrq_n_u64(vreinterpretq_u64_u32(bits), 32),
+                  vreinterpretq_s64_u64(nbits_lo32));
+    uint64x2_t bits_lo32 =
+        vandq_u64(vreinterpretq_u64_u32(bits), vdupq_n_u64(0xFFFFFFFF));
+    uint64x2_t nbits64 =
+        vsraq_n_u64(nbits_lo32, vreinterpretq_u64_u32(nbits), 32);
+    uint64x2_t bits64 = vorrq_u64(bits_hi32, bits_lo32);
+    return Bits64{nbits64, bits64};
+  }
+
+  void Interleave(const Bits32& low) {
+    bits =
+        vorrq_u32(vshlq_u32(bits, vreinterpretq_s32_u32(low.nbits)), low.bits);
+    nbits = vaddq_u32(nbits, low.nbits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 4);
+    constexpr uint32_t kMask[8] = {
+        ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0,
+    };
+    uint32x4_t mask = vld1q_u32(kMask + 4 - n);
+    nbits = vandq_u32(mask, nbits);
+    bits = vandq_u32(mask, bits);
+  }
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 4);
+    constexpr uint32_t kMask[8] = {
+        0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u,
+    };
+    uint32x4_t mask = vld1q_u32(kMask + 4 - n);
+    nbits = vandq_u32(mask, nbits);
+    bits = vandq_u32(mask, bits);
+  }
+};
+
+struct Bits16 {
+  uint16x8_t nbits;
+  uint16x8_t bits;
+
+  static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+    return Bits16{nbits.vec, bits.vec};
+  }
+
+  Bits32 Merge() const {
+    // TODO(veluca): can probably be optimized.
+    uint32x4_t nbits_lo16 =
+        vandq_u32(vreinterpretq_u32_u16(nbits), vdupq_n_u32(0xFFFF));
+    uint32x4_t bits_hi16 =
+        vshlq_u32(vshrq_n_u32(vreinterpretq_u32_u16(bits), 16),
+                  vreinterpretq_s32_u32(nbits_lo16));
+    uint32x4_t bits_lo16 =
+        vandq_u32(vreinterpretq_u32_u16(bits), vdupq_n_u32(0xFFFF));
+    uint32x4_t nbits32 =
+        vsraq_n_u32(nbits_lo16, vreinterpretq_u32_u16(nbits), 16);
+    uint32x4_t bits32 = vorrq_u32(bits_hi16, bits_lo16);
+    return Bits32{nbits32, bits32};
+  }
+
+  void Interleave(const Bits16& low) {
+    bits =
+        vorrq_u16(vshlq_u16(bits, vreinterpretq_s16_u16(low.nbits)), low.bits);
+    nbits = vaddq_u16(nbits, low.nbits);
+  }
+
+  void ClipTo(size_t n) {
+    n = std::min<size_t>(n, 8);
+    constexpr uint16_t kMask[16] = {
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        0,      0,      0,      0,      0,      0,      0,      0,
+    };
+    uint16x8_t mask = vld1q_u16(kMask + 8 - n);
+    nbits = vandq_u16(mask, nbits);
+    bits = vandq_u16(mask, bits);
+  }
+  void Skip(size_t n) {
+    n = std::min<size_t>(n, 8);
+    constexpr uint16_t kMask[16] = {
+        0,      0,      0,      0,      0,      0,      0,      0,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+    };
+    uint16x8_t mask = vld1q_u16(kMask + 8 - n);
+    nbits = vandq_u16(mask, nbits);
+    bits = vandq_u16(mask, bits);
+  }
+};
+
+#endif
+
+#ifdef FJXL_GENERIC_SIMD
+constexpr size_t SIMDVec32::kLanes;
+constexpr size_t SIMDVec16::kLanes;
+
+//  Each of these functions will process SIMDVec16::kLanes worth of values.
+
+FJXL_INLINE void TokenizeSIMD(const uint16_t* residuals, uint16_t* token_out,
+                              uint16_t* nbits_out, uint16_t* bits_out) {
+  SIMDVec16 res = SIMDVec16::Load(residuals);
+  SIMDVec16 token = res.ValToToken();
+  SIMDVec16 nbits = token.SatSubU(SIMDVec16::Val(1));
+  SIMDVec16 bits = res.SatSubU(nbits.Pow2());
+  token.Store(token_out);
+  nbits.Store(nbits_out);
+  bits.Store(bits_out);
+}
+
+FJXL_INLINE void TokenizeSIMD(const uint32_t* residuals, uint16_t* token_out,
+                              uint32_t* nbits_out, uint32_t* bits_out) {
+  static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, "");
+  SIMDVec32 res_lo = SIMDVec32::Load(residuals);
+  SIMDVec32 res_hi = SIMDVec32::Load(residuals + SIMDVec32::kLanes);
+  SIMDVec32 token_lo = res_lo.ValToToken();
+  SIMDVec32 token_hi = res_hi.ValToToken();
+  SIMDVec32 nbits_lo = token_lo.SatSubU(SIMDVec32::Val(1));
+  SIMDVec32 nbits_hi = token_hi.SatSubU(SIMDVec32::Val(1));
+  SIMDVec32 bits_lo = res_lo.SatSubU(nbits_lo.Pow2());
+  SIMDVec32 bits_hi = res_hi.SatSubU(nbits_hi.Pow2());
+  SIMDVec16 token = SIMDVec16::FromTwo32(token_lo, token_hi);
+  token.Store(token_out);
+  nbits_lo.Store(nbits_out);
+  nbits_hi.Store(nbits_out + SIMDVec32::kLanes);
+  bits_lo.Store(bits_out);
+  bits_hi.Store(bits_out + SIMDVec32::kLanes);
+}
+
+FJXL_INLINE void HuffmanSIMDUpTo13(const uint16_t* tokens,
+                                   const PrefixCode& code, uint16_t* nbits_out,
+                                   uint16_t* bits_out) {
+  SIMDVec16 tok = SIMDVec16::Load(tokens).PrepareForU8Lookup();
+  tok.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+  tok.U8Lookup(code.raw_bits_simd).Store(bits_out);
+}
+
+FJXL_INLINE void HuffmanSIMD14(const uint16_t* tokens, const PrefixCode& code,
+                               uint16_t* nbits_out, uint16_t* bits_out) {
+  SIMDVec16 token_cap = SIMDVec16::Val(15);
+  SIMDVec16 tok = SIMDVec16::Load(tokens);
+  SIMDVec16 tok_index = tok.Min(token_cap).PrepareForU8Lookup();
+  SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd);
+  // Set the highest bit when token == 16; the Huffman code is constructed in
+  // such a way that the code for token 15 is the same as the code for 16,
+  // except for the highest bit.
+  Mask16 needs_high_bit = tok.Eq(SIMDVec16::Val(16));
+  SIMDVec16 huff_bits = needs_high_bit.IfThenElse(
+      huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre);
+  huff_bits.Store(bits_out);
+  tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+}
+
+FJXL_INLINE void HuffmanSIMDAbove14(const uint16_t* tokens,
+                                    const PrefixCode& code, uint16_t* nbits_out,
+                                    uint16_t* bits_out) {
+  SIMDVec16 tok = SIMDVec16::Load(tokens);
+  // We assume `tok` fits in a *signed* 16-bit integer.
+  Mask16 above = tok.Gt(SIMDVec16::Val(12));
+  // 13, 14 -> 13
+  // 15, 16 -> 14
+  // 17, 18 -> 15
+  SIMDVec16 remap_tok = above.IfThenElse(tok.HAdd(SIMDVec16::Val(13)), tok);
+  SIMDVec16 tok_index = remap_tok.PrepareForU8Lookup();
+  SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd);
+  // Set the highest bit when token == 14, 16, 18.
+  Mask16 needs_high_bit = above.And(tok.Eq(tok.And(SIMDVec16::Val(0xFFFE))));
+  SIMDVec16 huff_bits = needs_high_bit.IfThenElse(
+      huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre);
+  huff_bits.Store(bits_out);
+  tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+}
+
+FJXL_INLINE void StoreSIMDUpTo8(const uint16_t* nbits_tok,
+                                const uint16_t* bits_tok,
+                                const uint16_t* nbits_huff,
+                                const uint16_t* bits_huff, size_t n,
+                                size_t skip, Bits32* bits_out) {
+  Bits16 bits =
+      Bits16::FromRaw(SIMDVec16::Load(nbits_tok), SIMDVec16::Load(bits_tok));
+  Bits16 huff_bits =
+      Bits16::FromRaw(SIMDVec16::Load(nbits_huff), SIMDVec16::Load(bits_huff));
+  bits.Interleave(huff_bits);
+  bits.ClipTo(n);
+  bits.Skip(skip);
+  bits_out[0] = bits.Merge();
+}
+
+// Huffman and raw bits don't necessarily fit in a single u16 here.
+FJXL_INLINE void StoreSIMDUpTo14(const uint16_t* nbits_tok,
+                                 const uint16_t* bits_tok,
+                                 const uint16_t* nbits_huff,
+                                 const uint16_t* bits_huff, size_t n,
+                                 size_t skip, Bits32* bits_out) {
+  VecPair<SIMDVec16> bits =
+      SIMDVec16::Load(bits_tok).Interleave(SIMDVec16::Load(bits_huff));
+  VecPair<SIMDVec16> nbits =
+      SIMDVec16::Load(nbits_tok).Interleave(SIMDVec16::Load(nbits_huff));
+  Bits16 low = Bits16::FromRaw(nbits.low, bits.low);
+  Bits16 hi = Bits16::FromRaw(nbits.hi, bits.hi);
+  low.ClipTo(2 * n);
+  low.Skip(2 * skip);
+  hi.ClipTo(std::max(2 * n, SIMDVec16::kLanes) - SIMDVec16::kLanes);
+  hi.Skip(std::max(2 * skip, SIMDVec16::kLanes) - SIMDVec16::kLanes);
+
+  bits_out[0] = low.Merge();
+  bits_out[1] = hi.Merge();
+}
+
+FJXL_INLINE void StoreSIMDAbove14(const uint32_t* nbits_tok,
+                                  const uint32_t* bits_tok,
+                                  const uint16_t* nbits_huff,
+                                  const uint16_t* bits_huff, size_t n,
+                                  size_t skip, Bits32* bits_out) {
+  static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, "");
+  Bits32 bits_low =
+      Bits32::FromRaw(SIMDVec32::Load(nbits_tok), SIMDVec32::Load(bits_tok));
+  Bits32 bits_hi =
+      Bits32::FromRaw(SIMDVec32::Load(nbits_tok + SIMDVec32::kLanes),
+                      SIMDVec32::Load(bits_tok + SIMDVec32::kLanes));
+
+  VecPair<SIMDVec32> huff_bits = SIMDVec16::Load(bits_huff).Upcast();
+  VecPair<SIMDVec32> huff_nbits = SIMDVec16::Load(nbits_huff).Upcast();
+
+  Bits32 huff_low = Bits32::FromRaw(huff_nbits.low, huff_bits.low);
+  Bits32 huff_hi = Bits32::FromRaw(huff_nbits.hi, huff_bits.hi);
+
+  bits_low.Interleave(huff_low);
+  bits_low.ClipTo(n);
+  bits_low.Skip(skip);
+  bits_out[0] = bits_low;
+  bits_hi.Interleave(huff_hi);
+  bits_hi.ClipTo(std::max(n, SIMDVec32::kLanes) - SIMDVec32::kLanes);
+  bits_hi.Skip(std::max(skip, SIMDVec32::kLanes) - SIMDVec32::kLanes);
+  bits_out[1] = bits_hi;
+}
+
+#ifdef FJXL_AVX512
+FJXL_INLINE void StoreToWriterAVX512(const Bits32& bits32, BitWriter& output) {
+  __m512i bits = bits32.bits;
+  __m512i nbits = bits32.nbits;
+
+  // Insert the leftover bits from the bit buffer at the bottom of the vector
+  // and extract the top of the vector.
+  uint64_t trail_bits =
+      _mm512_cvtsi512_si32(_mm512_alignr_epi32(bits, bits, 15));
+  uint64_t trail_nbits =
+      _mm512_cvtsi512_si32(_mm512_alignr_epi32(nbits, nbits, 15));
+  __m512i lead_bits = _mm512_set1_epi32(output.buffer);
+  __m512i lead_nbits = _mm512_set1_epi32(output.bits_in_buffer);
+  bits = _mm512_alignr_epi32(bits, lead_bits, 15);
+  nbits = _mm512_alignr_epi32(nbits, lead_nbits, 15);
+
+  // Merge 32 -> 64 bits.
+  Bits32 b{nbits, bits};
+  Bits64 b64 = b.Merge();
+  bits = b64.bits;
+  nbits = b64.nbits;
+
+  __m512i zero = _mm512_setzero_si512();
+
+  auto sh1 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 7); };
+  auto sh2 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 6); };
+  auto sh4 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 4); };
+
+  // Compute first-past-end-bit-position.
+  __m512i end_interm0 = _mm512_add_epi64(nbits, sh1(nbits));
+  __m512i end_interm1 = _mm512_add_epi64(end_interm0, sh2(end_interm0));
+  __m512i end = _mm512_add_epi64(end_interm1, sh4(end_interm1));
+
+  uint64_t simd_nbits = _mm512_cvtsi512_si32(_mm512_alignr_epi64(end, end, 7));
+
+  // Compute begin-bit-position.
+  __m512i begin = _mm512_sub_epi64(end, nbits);
+
+  // Index of the last bit in the chunk, or the end bit if nbits==0.
+  __m512i last = _mm512_mask_sub_epi64(
+      end, _mm512_cmpneq_epi64_mask(nbits, zero), end, _mm512_set1_epi64(1));
+
+  __m512i lane_offset_mask = _mm512_set1_epi64(63);
+
+  // Starting position of the chunk that each lane will ultimately belong to.
+  __m512i chunk_start = _mm512_andnot_si512(lane_offset_mask, last);
+
+  // For all lanes that contain bits belonging to two different 64-bit chunks,
+  // compute the number of bits that belong to the first chunk.
+  // total # of bits fit in a u16, so we can satsub_u16 here.
+  __m512i first_chunk_nbits = _mm512_subs_epu16(chunk_start, begin);
+
+  // Move all the previous-chunk-bits to the previous lane.
+  __m512i negnbits = _mm512_sub_epi64(_mm512_set1_epi64(64), first_chunk_nbits);
+  __m512i first_chunk_bits =
+      _mm512_srlv_epi64(_mm512_sllv_epi64(bits, negnbits), negnbits);
+  __m512i first_chunk_bits_down =
+      _mm512_alignr_epi32(zero, first_chunk_bits, 2);
+  bits = _mm512_srlv_epi64(bits, first_chunk_nbits);
+  nbits = _mm512_sub_epi64(nbits, first_chunk_nbits);
+  bits = _mm512_or_si512(bits, _mm512_sllv_epi64(first_chunk_bits_down, nbits));
+  begin = _mm512_add_epi64(begin, first_chunk_nbits);
+
+  // We now know that every lane should give bits to only one chunk. We can
+  // shift the bits and then horizontally-or-reduce them within the same chunk.
+  __m512i offset = _mm512_and_si512(begin, lane_offset_mask);
+  __m512i aligned_bits = _mm512_sllv_epi64(bits, offset);
+  // h-or-reduce within same chunk
+  __m512i red0 = _mm512_mask_or_epi64(
+      aligned_bits, _mm512_cmpeq_epi64_mask(sh1(chunk_start), chunk_start),
+      sh1(aligned_bits), aligned_bits);
+  __m512i red1 = _mm512_mask_or_epi64(
+      red0, _mm512_cmpeq_epi64_mask(sh2(chunk_start), chunk_start), sh2(red0),
+      red0);
+  __m512i reduced = _mm512_mask_or_epi64(
+      red1, _mm512_cmpeq_epi64_mask(sh4(chunk_start), chunk_start), sh4(red1),
+      red1);
+  // Extract the highest lane that belongs to each chunk (the lane that ends up
+  // with the OR-ed value of all the other lanes of that chunk).
+  __m512i next_chunk_start =
+      _mm512_alignr_epi32(_mm512_set1_epi64(~0), chunk_start, 2);
+  __m512i result = _mm512_maskz_compress_epi64(
+      _mm512_cmpneq_epi64_mask(chunk_start, next_chunk_start), reduced);
+
+  _mm512_storeu_si512((__m512i*)(output.data.get() + output.bytes_written),
+                      result);
+
+  // Update the bit writer and add the last 32-bit lane.
+  // Note that since trail_nbits was at most 32 to begin with, operating on
+  // trail_bits does not risk overflowing.
+  output.bytes_written += simd_nbits / 8;
+  // Here we are implicitly relying on the fact that simd_nbits < 512 to know
+  // that the byte of bitreader data we access is initialized. This is
+  // guaranteed because the remaining bits in the bitreader buffer are at most
+  // 7, so simd_nbits <= 505 always.
+  trail_bits = (trail_bits << (simd_nbits % 8)) +
+               output.data.get()[output.bytes_written];
+  trail_nbits += simd_nbits % 8;
+  StoreLE64(output.data.get() + output.bytes_written, trail_bits);
+  size_t trail_bytes = trail_nbits / 8;
+  output.bits_in_buffer = trail_nbits % 8;
+  output.buffer = trail_bits >> (trail_bytes * 8);
+  output.bytes_written += trail_bytes;
+}
+
+#endif
+
+template <size_t n>
+FJXL_INLINE void StoreToWriter(const Bits32* bits, BitWriter& output) {
+#ifdef FJXL_AVX512
+  static_assert(n <= 2, "");
+  StoreToWriterAVX512(bits[0], output);
+  if (n == 2) {
+    StoreToWriterAVX512(bits[1], output);
+  }
+  return;
+#endif
+  static_assert(n <= 4, "");
+  alignas(64) uint64_t nbits64[Bits64::kLanes * n];
+  alignas(64) uint64_t bits64[Bits64::kLanes * n];
+  bits[0].Merge().Store(nbits64, bits64);
+  if (n > 1) {
+    bits[1].Merge().Store(nbits64 + Bits64::kLanes, bits64 + Bits64::kLanes);
+  }
+  if (n > 2) {
+    bits[2].Merge().Store(nbits64 + 2 * Bits64::kLanes,
+                          bits64 + 2 * Bits64::kLanes);
+  }
+  if (n > 3) {
+    bits[3].Merge().Store(nbits64 + 3 * Bits64::kLanes,
+                          bits64 + 3 * Bits64::kLanes);
+  }
+  output.WriteMultiple(nbits64, bits64, Bits64::kLanes * n);
+}
+
+namespace detail {
+template <typename T>
+struct IntegerTypes;
+
+template <>
+struct IntegerTypes<SIMDVec16> {
+  using signed_ = int16_t;
+  using unsigned_ = uint16_t;
+};
+
+template <>
+struct IntegerTypes<SIMDVec32> {
+  using signed_ = int32_t;
+  using unsigned_ = uint32_t;
+};
+
+template <typename T>
+struct SIMDType;
+
+template <>
+struct SIMDType<int16_t> {
+  using type = SIMDVec16;
+};
+
+template <>
+struct SIMDType<int32_t> {
+  using type = SIMDVec32;
+};
+
+}  // namespace detail
+
+template <typename T>
+using signed_t = typename detail::IntegerTypes<T>::signed_;
+
+template <typename T>
+using unsigned_t = typename detail::IntegerTypes<T>::unsigned_;
+
+template <typename T>
+using simd_t = typename detail::SIMDType<T>::type;
+
+// This function will process exactly one vector worth of pixels.
+
+template <typename T>
+size_t PredictPixels(const signed_t<T>* pixels, const signed_t<T>* pixels_left,
+                     const signed_t<T>* pixels_top,
+                     const signed_t<T>* pixels_topleft,
+                     unsigned_t<T>* residuals) {
+  T px = T::Load((unsigned_t<T>*)pixels);
+  T left = T::Load((unsigned_t<T>*)pixels_left);
+  T top = T::Load((unsigned_t<T>*)pixels_top);
+  T topleft = T::Load((unsigned_t<T>*)pixels_topleft);
+  T ac = left.Sub(topleft);
+  T ab = left.Sub(top);
+  T bc = top.Sub(topleft);
+  T grad = ac.Add(top);
+  T d = ab.Xor(bc);
+  T zero = T::Val(0);
+  T clamp = zero.Gt(d).IfThenElse(top, left);
+  T s = ac.Xor(bc);
+  T pred = zero.Gt(s).IfThenElse(grad, clamp);
+  T res = px.Sub(pred);
+  T res_times_2 = res.Add(res);
+  res = zero.Gt(res).IfThenElse(T::Val(-1).Sub(res_times_2), res_times_2);
+  res.Store(residuals);
+  return res.Eq(T::Val(0)).CountPrefix();
+}
+
+#endif
+
+void EncodeHybridUint000(uint32_t value, uint32_t* token, uint32_t* nbits,
+                         uint32_t* bits) {
+  uint32_t n = FloorLog2(value);
+  *token = value ? n + 1 : 0;
+  *nbits = value ? n : 0;
+  *bits = value ? value - (1 << n) : 0;
+}
+
+#ifdef FJXL_AVX512
+constexpr static size_t kLogChunkSize = 5;
+#elif defined(FJXL_AVX2) || defined(FJXL_NEON)
+// Even if NEON only has 128-bit lanes, it is still significantly (~1.3x) faster
+// to process two vectors at a time.
+constexpr static size_t kLogChunkSize = 4;
+#else
+constexpr static size_t kLogChunkSize = 3;
+#endif
+
+constexpr static size_t kChunkSize = 1 << kLogChunkSize;
+
+template <typename Residual>
+void GenericEncodeChunk(const Residual* residuals, size_t n, size_t skip,
+                        const PrefixCode& code, BitWriter& output) {
+  for (size_t ix = skip; ix < n; ix++) {
+    unsigned token, nbits, bits;
+    EncodeHybridUint000(residuals[ix], &token, &nbits, &bits);
+    output.Write(code.raw_nbits[token] + nbits,
+                 code.raw_bits[token] | bits << code.raw_nbits[token]);
+  }
+}
+
+struct UpTo8Bits {
+  size_t bitdepth;
+  explicit UpTo8Bits(size_t bitdepth) : bitdepth(bitdepth) {
+    assert(bitdepth <= 8);
+  }
+  // Here we can fit up to 9 extra bits + 7 Huffman bits in a u16; for all other
+  // symbols, we could actually go up to 8 Huffman bits as we have at most 8
+  // extra bits; however, the SIMD bit merging logic for AVX2 assumes that no
+  // Huffman length is 8 or more, so we cap at 8 anyway. Last symbol is used for
+  // LZ77 lengths and has no limitations except allowing to represent 32 symbols
+  // in total.
+  static constexpr uint8_t kMinRawLength[12] = {};
+  static constexpr uint8_t kMaxRawLength[12] = {
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10,
+  };
+  static size_t MaxEncodedBitsPerSample() { return 16; }
+  static constexpr size_t kInputBytes = 1;
+  using pixel_t = int16_t;
+  using upixel_t = uint16_t;
+
+  static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+                             size_t n, uint8_t* nbits_simd,
+                             uint8_t* bits_simd) {
+    assert(n <= 16);
+    memcpy(nbits_simd, nbits, 16);
+    memcpy(bits_simd, bits, 16);
+  }
+
+  static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+                          const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+    Bits32 bits32[kChunkSize / SIMDVec16::kLanes];
+    alignas(64) uint16_t bits[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+    alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t token[SIMDVec16::kLanes];
+    for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+      TokenizeSIMD(residuals + i, token, nbits, bits);
+      HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff);
+      StoreSIMDUpTo8(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+                     std::max(skip, i) - i, bits32 + i / SIMDVec16::kLanes);
+    }
+    StoreToWriter<kChunkSize / SIMDVec16::kLanes>(bits32, output);
+    return;
+#endif
+    GenericEncodeChunk(residuals, n, skip, code, output);
+  }
+
+  size_t NumSymbols(bool doing_ycocg) const {
+    // values gain 1 bit for YCoCg, 1 bit for prediction.
+    // Maximum symbol is 1 + effective bit depth of residuals.
+    if (doing_ycocg) {
+      return bitdepth + 3;
+    } else {
+      return bitdepth + 2;
+    }
+  }
+};
+constexpr uint8_t UpTo8Bits::kMinRawLength[];
+constexpr uint8_t UpTo8Bits::kMaxRawLength[];
+
+struct From9To13Bits {
+  size_t bitdepth;
+  explicit From9To13Bits(size_t bitdepth) : bitdepth(bitdepth) {
+    assert(bitdepth <= 13 && bitdepth >= 9);
+  }
+  // Last symbol is used for LZ77 lengths and has no limitations except allowing
+  // to represent 32 symbols in total.
+  // We cannot fit all the bits in a u16, so do not even try and use up to 8
+  // bits per raw symbol.
+  // There are at most 16 raw symbols, so Huffman coding can be SIMDfied without
+  // any special tricks.
+  static constexpr uint8_t kMinRawLength[17] = {};
+  static constexpr uint8_t kMaxRawLength[17] = {
+      8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10,
+  };
+  static size_t MaxEncodedBitsPerSample() { return 21; }
+  static constexpr size_t kInputBytes = 2;
+  using pixel_t = int16_t;
+  using upixel_t = uint16_t;
+
+  static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+                             size_t n, uint8_t* nbits_simd,
+                             uint8_t* bits_simd) {
+    assert(n <= 16);
+    memcpy(nbits_simd, nbits, 16);
+    memcpy(bits_simd, bits, 16);
+  }
+
+  static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+                          const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+    Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+    alignas(64) uint16_t bits[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+    alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t token[SIMDVec16::kLanes];
+    for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+      TokenizeSIMD(residuals + i, token, nbits, bits);
+      HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff);
+      StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+                      std::max(skip, i) - i,
+                      bits32 + 2 * i / SIMDVec16::kLanes);
+    }
+    StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+    return;
+#endif
+    GenericEncodeChunk(residuals, n, skip, code, output);
+  }
+
+  size_t NumSymbols(bool doing_ycocg) const {
+    // values gain 1 bit for YCoCg, 1 bit for prediction.
+    // Maximum symbol is 1 + effective bit depth of residuals.
+    if (doing_ycocg) {
+      return bitdepth + 3;
+    } else {
+      return bitdepth + 2;
+    }
+  }
+};
+constexpr uint8_t From9To13Bits::kMinRawLength[];
+constexpr uint8_t From9To13Bits::kMaxRawLength[];
+
+void CheckHuffmanBitsSIMD(int bits1, int nbits1, int bits2, int nbits2) {
+  assert(nbits1 == 8);
+  assert(nbits2 == 8);
+  assert(bits2 == (bits1 | 128));
+}
+
+struct Exactly14Bits {
+  explicit Exactly14Bits(size_t bitdepth) { assert(bitdepth == 14); }
+  // Force LZ77 symbols to have at least 8 bits, and raw symbols 15 and 16 to
+  // have exactly 8, and no other symbol to have 8 or more. This ensures that
+  // the representation for 15 and 16 is identical up to one bit.
+  static constexpr uint8_t kMinRawLength[18] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 7,
+  };
+  static constexpr uint8_t kMaxRawLength[18] = {
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 10,
+  };
+  static constexpr size_t bitdepth = 14;
+  static size_t MaxEncodedBitsPerSample() { return 22; }
+  static constexpr size_t kInputBytes = 2;
+  using pixel_t = int16_t;
+  using upixel_t = uint16_t;
+
+  static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+                             size_t n, uint8_t* nbits_simd,
+                             uint8_t* bits_simd) {
+    assert(n == 17);
+    CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]);
+    memcpy(nbits_simd, nbits, 16);
+    memcpy(bits_simd, bits, 16);
+  }
+
+  static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+                          const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+    Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+    alignas(64) uint16_t bits[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+    alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t token[SIMDVec16::kLanes];
+    for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+      TokenizeSIMD(residuals + i, token, nbits, bits);
+      HuffmanSIMD14(token, code, nbits_huff, bits_huff);
+      StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+                      std::max(skip, i) - i,
+                      bits32 + 2 * i / SIMDVec16::kLanes);
+    }
+    StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+    return;
+#endif
+    GenericEncodeChunk(residuals, n, skip, code, output);
+  }
+
+  size_t NumSymbols(bool) const { return 17; }
+};
+constexpr uint8_t Exactly14Bits::kMinRawLength[];
+constexpr uint8_t Exactly14Bits::kMaxRawLength[];
+
+struct MoreThan14Bits {
+  size_t bitdepth;
+  explicit MoreThan14Bits(size_t bitdepth) : bitdepth(bitdepth) {
+    assert(bitdepth > 14);
+    assert(bitdepth <= 16);
+  }
+  // Force LZ77 symbols to have at least 8 bits, and raw symbols 13 to 18 to
+  // have exactly 8, and no other symbol to have 8 or more. This ensures that
+  // the representation for (13, 14), (15, 16), (17, 18) is identical up to one
+  // bit.
+  static constexpr uint8_t kMinRawLength[20] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 7,
+  };
+  static constexpr uint8_t kMaxRawLength[20] = {
+      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 10,
+  };
+  static size_t MaxEncodedBitsPerSample() { return 24; }
+  static constexpr size_t kInputBytes = 2;
+  using pixel_t = int32_t;
+  using upixel_t = uint32_t;
+
+  static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+                             size_t n, uint8_t* nbits_simd,
+                             uint8_t* bits_simd) {
+    assert(n == 19);
+    CheckHuffmanBitsSIMD(bits[13], nbits[13], bits[14], nbits[14]);
+    CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]);
+    CheckHuffmanBitsSIMD(bits[17], nbits[17], bits[18], nbits[18]);
+    for (size_t i = 0; i < 14; i++) {
+      nbits_simd[i] = nbits[i];
+      bits_simd[i] = bits[i];
+    }
+    nbits_simd[14] = nbits[15];
+    bits_simd[14] = bits[15];
+    nbits_simd[15] = nbits[17];
+    bits_simd[15] = bits[17];
+  }
+
+  static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+                          const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+    Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+    alignas(64) uint32_t bits[SIMDVec16::kLanes];
+    alignas(64) uint32_t nbits[SIMDVec16::kLanes];
+    alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+    alignas(64) uint16_t token[SIMDVec16::kLanes];
+    for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+      TokenizeSIMD(residuals + i, token, nbits, bits);
+      HuffmanSIMDAbove14(token, code, nbits_huff, bits_huff);
+      StoreSIMDAbove14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+                       std::max(skip, i) - i,
+                       bits32 + 2 * i / SIMDVec16::kLanes);
+    }
+    StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+    return;
+#endif
+    GenericEncodeChunk(residuals, n, skip, code, output);
+  }
+  size_t NumSymbols(bool) const { return 19; }
+};
+constexpr uint8_t MoreThan14Bits::kMinRawLength[];
+constexpr uint8_t MoreThan14Bits::kMaxRawLength[];
+
+void PrepareDCGlobalCommon(bool is_single_group, size_t width, size_t height,
+                           const PrefixCode code[4], BitWriter* output) {
+  output->Allocate(100000 + (is_single_group ? width * height * 16 : 0));
+  // No patches, spline or noise.
+  output->Write(1, 1);  // default DC dequantization factors (?)
+  output->Write(1, 1);  // use global tree / histograms
+  output->Write(1, 0);  // no lz77 for the tree
+
+  output->Write(1, 1);         // simple code for the tree's context map
+  output->Write(2, 0);         // all contexts clustered together
+  output->Write(1, 1);         // use prefix code for tree
+  output->Write(4, 0);         // 000 hybrid uint
+  output->Write(6, 0b100011);  // Alphabet size is 4 (var16)
+  output->Write(2, 1);         // simple prefix code
+  output->Write(2, 3);         // with 4 symbols
+  output->Write(2, 0);
+  output->Write(2, 1);
+  output->Write(2, 2);
+  output->Write(2, 3);
+  output->Write(1, 0);  // First tree encoding option
+  // Huffman table + extra bits for the tree.
+  uint8_t symbol_bits[6] = {0b00, 0b10, 0b001, 0b101, 0b0011, 0b0111};
+  uint8_t symbol_nbits[6] = {2, 2, 3, 3, 4, 4};
+  // Write a tree with a leaf per channel, and gradient predictor for every
+  // leaf.
+  for (auto v : {1, 2, 1, 4, 1, 0, 0, 5, 0, 0, 0, 0, 5,
+                 0, 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 0, 0}) {
+    output->Write(symbol_nbits[v], symbol_bits[v]);
+  }
+
+  output->Write(1, 1);     // Enable lz77 for the main bitstream
+  output->Write(2, 0b00);  // lz77 offset 224
+  static_assert(kLZ77Offset == 224, "");
+  output->Write(4, 0b1010);  // lz77 min length 7
+  // 400 hybrid uint config for lz77
+  output->Write(4, 4);
+  output->Write(3, 0);
+  output->Write(3, 0);
+
+  output->Write(1, 1);  // simple code for the context map
+  output->Write(2, 3);  // 3 bits per entry
+  output->Write(3, 4);  // channel 3
+  output->Write(3, 3);  // channel 2
+  output->Write(3, 2);  // channel 1
+  output->Write(3, 1);  // channel 0
+  output->Write(3, 0);  // distance histogram first
+
+  output->Write(1, 1);  // use prefix codes
+  output->Write(4, 0);  // 000 hybrid uint config for distances (only need 0)
+  for (size_t i = 0; i < 4; i++) {
+    output->Write(4, 0);  // 000 hybrid uint config for symbols (only <= 10)
+  }
+
+  // Distance alphabet size:
+  output->Write(5, 0b00001);  // 2: just need 1 for RLE (i.e. distance 1)
+  // Symbol + LZ77 alphabet size:
+  for (size_t i = 0; i < 4; i++) {
+    output->Write(1, 1);    // > 1
+    output->Write(4, 8);    // <= 512
+    output->Write(8, 256);  // == 512
+  }
+
+  // Distance histogram:
+  output->Write(2, 1);  // simple prefix code
+  output->Write(2, 0);  // with one symbol
+  output->Write(1, 1);  // 1
+
+  // Symbol + lz77 histogram:
+  for (size_t i = 0; i < 4; i++) {
+    code[i].WriteTo(output);
+  }
+
+  // Group header for global modular image.
+  output->Write(1, 1);  // Global tree
+  output->Write(1, 1);  // All default wp
+}
+
+void PrepareDCGlobal(bool is_single_group, size_t width, size_t height,
+                     size_t nb_chans, const PrefixCode code[4],
+                     BitWriter* output) {
+  PrepareDCGlobalCommon(is_single_group, width, height, code, output);
+  if (nb_chans > 2) {
+    output->Write(2, 0b01);     // 1 transform
+    output->Write(2, 0b00);     // RCT
+    output->Write(5, 0b00000);  // Starting from ch 0
+    output->Write(2, 0b00);     // YCoCg
+  } else {
+    output->Write(2, 0b00);  // no transforms
+  }
+  if (!is_single_group) {
+    output->ZeroPadToByte();
+  }
+}
+
+template <typename BitDepth>
+struct ChunkEncoder {
+  FJXL_INLINE static void EncodeRle(size_t count, const PrefixCode& code,
+                                    BitWriter& output) {
+    if (count == 0) return;
+    count -= kLZ77MinLength + 1;
+    if (count < kLZ77CacheSize) {
+      output.Write(code.lz77_cache_nbits[count], code.lz77_cache_bits[count]);
+    } else {
+      unsigned token, nbits, bits;
+      EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+      uint64_t wbits = bits;
+      wbits = (wbits << code.lz77_nbits[token]) | code.lz77_bits[token];
+      wbits = (wbits << code.raw_nbits[0]) | code.raw_bits[0];
+      output.Write(code.lz77_nbits[token] + nbits + code.raw_nbits[0], wbits);
+    }
+  }
+
+  FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals,
+                         size_t skip, size_t n) {
+    EncodeRle(run, *code, *output);
+    BitDepth::EncodeChunk(residuals, n, skip, *code, *output);
+  }
+
+  inline void Finalize(size_t run) { EncodeRle(run, *code, *output); }
+
+  const PrefixCode* code;
+  BitWriter* output;
+};
+
+template <typename BitDepth>
+struct ChunkSampleCollector {
+  FJXL_INLINE void Rle(size_t count, uint64_t* lz77_counts) {
+    if (count == 0) return;
+    raw_counts[0] += 1;
+    count -= kLZ77MinLength + 1;
+    unsigned token, nbits, bits;
+    EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+    lz77_counts[token]++;
+  }
+
+  FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals,
+                         size_t skip, size_t n) {
+    // Run is broken. Encode the run and encode the individual vector.
+    Rle(run, lz77_counts);
+    for (size_t ix = skip; ix < n; ix++) {
+      unsigned token, nbits, bits;
+      EncodeHybridUint000(residuals[ix], &token, &nbits, &bits);
+      raw_counts[token]++;
+    }
+  }
+
+  // don't count final run since we don't know how long it really is
+  void Finalize(size_t run) {}
+
+  uint64_t* raw_counts;
+  uint64_t* lz77_counts;
+};
+
+constexpr uint32_t PackSigned(int32_t value) {
+  return (static_cast<uint32_t>(value) << 1) ^
+         ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+template <typename T, typename BitDepth>
+struct ChannelRowProcessor {
+  using upixel_t = typename BitDepth::upixel_t;
+  using pixel_t = typename BitDepth::pixel_t;
+  T* t;
+  void ProcessChunk(const pixel_t* row, const pixel_t* row_left,
+                    const pixel_t* row_top, const pixel_t* row_topleft,
+                    size_t n) {
+    alignas(64) upixel_t residuals[kChunkSize] = {};
+    size_t prefix_size = 0;
+    size_t required_prefix_size = 0;
+#ifdef FJXL_GENERIC_SIMD
+    constexpr size_t kNum =
+        sizeof(pixel_t) == 2 ? SIMDVec16::kLanes : SIMDVec32::kLanes;
+    for (size_t ix = 0; ix < kChunkSize; ix += kNum) {
+      size_t c =
+          PredictPixels<simd_t<pixel_t>>(row + ix, row_left + ix, row_top + ix,
+                                         row_topleft + ix, residuals + ix);
+      prefix_size =
+          prefix_size == required_prefix_size ? prefix_size + c : prefix_size;
+      required_prefix_size += kNum;
+    }
+#else
+    for (size_t ix = 0; ix < kChunkSize; ix++) {
+      pixel_t px = row[ix];
+      pixel_t left = row_left[ix];
+      pixel_t top = row_top[ix];
+      pixel_t topleft = row_topleft[ix];
+      pixel_t ac = left - topleft;
+      pixel_t ab = left - top;
+      pixel_t bc = top - topleft;
+      pixel_t grad = static_cast<pixel_t>(static_cast<upixel_t>(ac) +
+                                          static_cast<upixel_t>(top));
+      pixel_t d = ab ^ bc;
+      pixel_t clamp = d < 0 ? top : left;
+      pixel_t s = ac ^ bc;
+      pixel_t pred = s < 0 ? grad : clamp;
+      residuals[ix] = PackSigned(px - pred);
+      prefix_size = prefix_size == required_prefix_size
+                        ? prefix_size + (residuals[ix] == 0)
+                        : prefix_size;
+      required_prefix_size += 1;
+    }
+#endif
+    prefix_size = std::min(n, prefix_size);
+    if (prefix_size == n && (run > 0 || prefix_size > kLZ77MinLength)) {
+      // Run continues, nothing to do.
+      run += prefix_size;
+    } else if (prefix_size + run > kLZ77MinLength) {
+      // Run is broken. Encode the run and encode the individual vector.
+      t->Chunk(run + prefix_size, residuals, prefix_size, n);
+      run = 0;
+    } else {
+      // There was no run to begin with.
+      t->Chunk(0, residuals, 0, n);
+    }
+  }
+
+  void ProcessRow(const pixel_t* row, const pixel_t* row_left,
+                  const pixel_t* row_top, const pixel_t* row_topleft,
+                  size_t xs) {
+    for (size_t x = 0; x < xs; x += kChunkSize) {
+      ProcessChunk(row + x, row_left + x, row_top + x, row_topleft + x,
+                   std::min(kChunkSize, xs - x));
+    }
+  }
+
+  void Finalize() { t->Finalize(run); }
+  // Invariant: run == 0 or run > kLZ77MinLength.
+  size_t run = 0;
+};
+
+uint16_t LoadLE16(const unsigned char* ptr) {
+  return uint16_t{ptr[0]} | (uint16_t{ptr[1]} << 8);
+}
+
+uint16_t SwapEndian(uint16_t in) { return (in >> 8) | (in << 8); }
+
+#ifdef FJXL_GENERIC_SIMD
+void StorePixels(SIMDVec16 p, int16_t* dest) { p.Store((uint16_t*)dest); }
+
+void StorePixels(SIMDVec16 p, int32_t* dest) {
+  VecPair<SIMDVec32> p_up = p.Upcast();
+  p_up.low.Store((uint32_t*)dest);
+  p_up.hi.Store((uint32_t*)dest + SIMDVec32::kLanes);
+}
+#endif
+
+template <typename pixel_t>
+void FillRowG8(const unsigned char* rgba, size_t oxs, pixel_t* luma) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadG8(rgba + x);
+    StorePixels(rgb[0], luma + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    luma[x] = rgba[x];
+  }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowG16(const unsigned char* rgba, size_t oxs, pixel_t* luma) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadG16(rgba + 2 * x);
+    if (big_endian) {
+      rgb[0].SwapEndian();
+    }
+    StorePixels(rgb[0], luma + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t val = LoadLE16(rgba + 2 * x);
+    if (big_endian) {
+      val = SwapEndian(val);
+    }
+    luma[x] = val;
+  }
+}
+
+template <typename pixel_t>
+void FillRowGA8(const unsigned char* rgba, size_t oxs, pixel_t* luma,
+                pixel_t* alpha) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadGA8(rgba + 2 * x);
+    StorePixels(rgb[0], luma + x);
+    StorePixels(rgb[1], alpha + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    luma[x] = rgba[2 * x];
+    alpha[x] = rgba[2 * x + 1];
+  }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowGA16(const unsigned char* rgba, size_t oxs, pixel_t* luma,
+                 pixel_t* alpha) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadGA16(rgba + 4 * x);
+    if (big_endian) {
+      rgb[0].SwapEndian();
+      rgb[1].SwapEndian();
+    }
+    StorePixels(rgb[0], luma + x);
+    StorePixels(rgb[1], alpha + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t l = LoadLE16(rgba + 4 * x);
+    uint16_t a = LoadLE16(rgba + 4 * x + 2);
+    if (big_endian) {
+      l = SwapEndian(l);
+      a = SwapEndian(a);
+    }
+    luma[x] = l;
+    alpha[x] = a;
+  }
+}
+
+template <typename pixel_t>
+void StoreYCoCg(pixel_t r, pixel_t g, pixel_t b, pixel_t* y, pixel_t* co,
+                pixel_t* cg) {
+  *co = r - b;
+  pixel_t tmp = b + (*co >> 1);
+  *cg = g - tmp;
+  *y = tmp + (*cg >> 1);
+}
+
+#ifdef FJXL_GENERIC_SIMD
+void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int16_t* y, int16_t* co,
+                int16_t* cg) {
+  SIMDVec16 co_v = r.Sub(b);
+  SIMDVec16 tmp = b.Add(co_v.SignedShiftRight<1>());
+  SIMDVec16 cg_v = g.Sub(tmp);
+  SIMDVec16 y_v = tmp.Add(cg_v.SignedShiftRight<1>());
+  y_v.Store((uint16_t*)y);
+  co_v.Store((uint16_t*)co);
+  cg_v.Store((uint16_t*)cg);
+}
+
+void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int32_t* y, int32_t* co,
+                int32_t* cg) {
+  VecPair<SIMDVec32> r_up = r.Upcast();
+  VecPair<SIMDVec32> g_up = g.Upcast();
+  VecPair<SIMDVec32> b_up = b.Upcast();
+  SIMDVec32 co_lo_v = r_up.low.Sub(b_up.low);
+  SIMDVec32 tmp_lo = b_up.low.Add(co_lo_v.SignedShiftRight<1>());
+  SIMDVec32 cg_lo_v = g_up.low.Sub(tmp_lo);
+  SIMDVec32 y_lo_v = tmp_lo.Add(cg_lo_v.SignedShiftRight<1>());
+  SIMDVec32 co_hi_v = r_up.hi.Sub(b_up.hi);
+  SIMDVec32 tmp_hi = b_up.hi.Add(co_hi_v.SignedShiftRight<1>());
+  SIMDVec32 cg_hi_v = g_up.hi.Sub(tmp_hi);
+  SIMDVec32 y_hi_v = tmp_hi.Add(cg_hi_v.SignedShiftRight<1>());
+  y_lo_v.Store((uint32_t*)y);
+  co_lo_v.Store((uint32_t*)co);
+  cg_lo_v.Store((uint32_t*)cg);
+  y_hi_v.Store((uint32_t*)y + SIMDVec32::kLanes);
+  co_hi_v.Store((uint32_t*)co + SIMDVec32::kLanes);
+  cg_hi_v.Store((uint32_t*)cg + SIMDVec32::kLanes);
+}
+#endif
+
+template <typename pixel_t>
+void FillRowRGB8(const unsigned char* rgba, size_t oxs, pixel_t* y, pixel_t* co,
+                 pixel_t* cg) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadRGB8(rgba + 3 * x);
+    StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t r = rgba[3 * x];
+    uint16_t g = rgba[3 * x + 1];
+    uint16_t b = rgba[3 * x + 2];
+    StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+  }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowRGB16(const unsigned char* rgba, size_t oxs, pixel_t* y,
+                  pixel_t* co, pixel_t* cg) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadRGB16(rgba + 6 * x);
+    if (big_endian) {
+      rgb[0].SwapEndian();
+      rgb[1].SwapEndian();
+      rgb[2].SwapEndian();
+    }
+    StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t r = LoadLE16(rgba + 6 * x);
+    uint16_t g = LoadLE16(rgba + 6 * x + 2);
+    uint16_t b = LoadLE16(rgba + 6 * x + 4);
+    if (big_endian) {
+      r = SwapEndian(r);
+      g = SwapEndian(g);
+      b = SwapEndian(b);
+    }
+    StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+  }
+}
+
+template <typename pixel_t>
+void FillRowRGBA8(const unsigned char* rgba, size_t oxs, pixel_t* y,
+                  pixel_t* co, pixel_t* cg, pixel_t* alpha) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadRGBA8(rgba + 4 * x);
+    StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+    StorePixels(rgb[3], alpha + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t r = rgba[4 * x];
+    uint16_t g = rgba[4 * x + 1];
+    uint16_t b = rgba[4 * x + 2];
+    uint16_t a = rgba[4 * x + 3];
+    StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+    alpha[x] = a;
+  }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowRGBA16(const unsigned char* rgba, size_t oxs, pixel_t* y,
+                   pixel_t* co, pixel_t* cg, pixel_t* alpha) {
+  size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+  for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+    auto rgb = SIMDVec16::LoadRGBA16(rgba + 8 * x);
+    if (big_endian) {
+      rgb[0].SwapEndian();
+      rgb[1].SwapEndian();
+      rgb[2].SwapEndian();
+      rgb[3].SwapEndian();
+    }
+    StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+    StorePixels(rgb[3], alpha + x);
+  }
+#endif
+  for (; x < oxs; x++) {
+    uint16_t r = LoadLE16(rgba + 8 * x);
+    uint16_t g = LoadLE16(rgba + 8 * x + 2);
+    uint16_t b = LoadLE16(rgba + 8 * x + 4);
+    uint16_t a = LoadLE16(rgba + 8 * x + 6);
+    if (big_endian) {
+      r = SwapEndian(r);
+      g = SwapEndian(g);
+      b = SwapEndian(b);
+      a = SwapEndian(a);
+    }
+    StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+    alpha[x] = a;
+  }
+}
+
+template <typename Processor, typename BitDepth>
+void ProcessImageArea(const unsigned char* rgba, size_t x0, size_t y0,
+                      size_t xs, size_t yskip, size_t ys, size_t row_stride,
+                      BitDepth bitdepth, size_t nb_chans, bool big_endian,
+                      Processor* processors) {
+  constexpr size_t kPadding = 32;
+
+  using pixel_t = typename BitDepth::pixel_t;
+
+  constexpr size_t kAlign = 64;
+  constexpr size_t kAlignPixels = kAlign / sizeof(pixel_t);
+
+  auto align = [=](pixel_t* ptr) {
+    size_t offset = reinterpret_cast<uintptr_t>(ptr) % kAlign;
+    if (offset) {
+      ptr += offset / sizeof(pixel_t);
+    }
+    return ptr;
+  };
+
+  constexpr size_t kNumPx =
+      (256 + kPadding * 2 + kAlignPixels + kAlignPixels - 1) / kAlignPixels *
+      kAlignPixels;
+
+  std::vector<std::array<std::array<pixel_t, kNumPx>, 2>> group_data(nb_chans);
+
+  for (size_t y = 0; y < ys; y++) {
+    const auto rgba_row =
+        rgba + row_stride * (y0 + y) + x0 * nb_chans * BitDepth::kInputBytes;
+    pixel_t* crow[4] = {};
+    pixel_t* prow[4] = {};
+    for (size_t i = 0; i < nb_chans; i++) {
+      crow[i] = align(&group_data[i][y & 1][kPadding]);
+      prow[i] = align(&group_data[i][(y - 1) & 1][kPadding]);
+    }
+
+    // Pre-fill rows with YCoCg converted pixels.
+    if (nb_chans == 1) {
+      if (BitDepth::kInputBytes == 1) {
+        FillRowG8(rgba_row, xs, crow[0]);
+      } else if (big_endian) {
+        FillRowG16</*big_endian=*/true>(rgba_row, xs, crow[0]);
+      } else {
+        FillRowG16</*big_endian=*/false>(rgba_row, xs, crow[0]);
+      }
+    } else if (nb_chans == 2) {
+      if (BitDepth::kInputBytes == 1) {
+        FillRowGA8(rgba_row, xs, crow[0], crow[1]);
+      } else if (big_endian) {
+        FillRowGA16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1]);
+      } else {
+        FillRowGA16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1]);
+      }
+    } else if (nb_chans == 3) {
+      if (BitDepth::kInputBytes == 1) {
+        FillRowRGB8(rgba_row, xs, crow[0], crow[1], crow[2]);
+      } else if (big_endian) {
+        FillRowRGB16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1],
+                                          crow[2]);
+      } else {
+        FillRowRGB16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1],
+                                           crow[2]);
+      }
+    } else {
+      if (BitDepth::kInputBytes == 1) {
+        FillRowRGBA8(rgba_row, xs, crow[0], crow[1], crow[2], crow[3]);
+      } else if (big_endian) {
+        FillRowRGBA16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1],
+                                           crow[2], crow[3]);
+      } else {
+        FillRowRGBA16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1],
+                                            crow[2], crow[3]);
+      }
+    }
+    // Deal with x == 0.
+    for (size_t c = 0; c < nb_chans; c++) {
+      *(crow[c] - 1) = y > 0 ? *(prow[c]) : 0;
+      // Fix topleft.
+      *(prow[c] - 1) = y > 0 ? *(prow[c]) : 0;
+    }
+    if (y < yskip) continue;
+    for (size_t c = 0; c < nb_chans; c++) {
+      // Get pointers to px/left/top/topleft data to speedup loop.
+      const pixel_t* row = crow[c];
+      const pixel_t* row_left = crow[c] - 1;
+      const pixel_t* row_top = y == 0 ? row_left : prow[c];
+      const pixel_t* row_topleft = y == 0 ? row_left : prow[c] - 1;
+
+      processors[c].ProcessRow(row, row_left, row_top, row_topleft, xs);
+    }
+  }
+  for (size_t c = 0; c < nb_chans; c++) {
+    processors[c].Finalize();
+  }
+}
+
+template <typename BitDepth>
+void WriteACSection(const unsigned char* rgba, size_t x0, size_t y0, size_t xs,
+                    size_t ys, size_t row_stride, bool is_single_group,
+                    BitDepth bitdepth, size_t nb_chans, bool big_endian,
+                    const PrefixCode code[4],
+                    std::array<BitWriter, 4>& output) {
+  for (size_t i = 0; i < nb_chans; i++) {
+    if (is_single_group && i == 0) continue;
+    output[i].Allocate(xs * ys * bitdepth.MaxEncodedBitsPerSample() + 4);
+  }
+  if (!is_single_group) {
+    // Group header for modular image.
+    // When the image is single-group, the global modular image is the one
+    // that contains the pixel data, and there is no group header.
+    output[0].Write(1, 1);     // Global tree
+    output[0].Write(1, 1);     // All default wp
+    output[0].Write(2, 0b00);  // 0 transforms
+  }
+
+  ChunkEncoder<BitDepth> encoders[4];
+  ChannelRowProcessor<ChunkEncoder<BitDepth>, BitDepth> row_encoders[4];
+  for (size_t c = 0; c < nb_chans; c++) {
+    row_encoders[c].t = &encoders[c];
+    encoders[c].output = &output[c];
+    encoders[c].code = &code[c];
+  }
+  ProcessImageArea<ChannelRowProcessor<ChunkEncoder<BitDepth>, BitDepth>>(
+      rgba, x0, y0, xs, 0, ys, row_stride, bitdepth, nb_chans, big_endian,
+      row_encoders);
+}
+
+constexpr int kHashExp = 16;
+constexpr uint32_t kHashSize = 1 << kHashExp;
+constexpr uint32_t kHashMultiplier = 2654435761;
+constexpr int kMaxColors = 512;
+
+// can be any function that returns a value in 0 .. kHashSize-1
+// has to map 0 to 0
+inline uint32_t pixel_hash(uint32_t p) {
+  return (p * kHashMultiplier) >> (32 - kHashExp);
+}
+
+template <size_t nb_chans>
+void FillRowPalette(const unsigned char* inrow, size_t xs,
+                    const int16_t* lookup, int16_t* out) {
+  for (size_t x = 0; x < xs; x++) {
+    uint32_t p = 0;
+    memcpy(&p, inrow + x * nb_chans, nb_chans);
+    out[x] = lookup[pixel_hash(p)];
+  }
+}
+
+template <typename Processor>
+void ProcessImageAreaPalette(const unsigned char* rgba, size_t x0, size_t y0,
+                             size_t xs, size_t yskip, size_t ys,
+                             size_t row_stride, const int16_t* lookup,
+                             size_t nb_chans, Processor* processors) {
+  constexpr size_t kPadding = 32;
+
+  std::vector<std::array<int16_t, 256 + kPadding * 2>> group_data(2);
+  Processor& row_encoder = processors[0];
+
+  for (size_t y = 0; y < ys; y++) {
+    // Pre-fill rows with palette converted pixels.
+    const unsigned char* inrow = rgba + row_stride * (y0 + y) + x0 * nb_chans;
+    int16_t* outrow = &group_data[y & 1][kPadding];
+    if (nb_chans == 1) {
+      FillRowPalette<1>(inrow, xs, lookup, outrow);
+    } else if (nb_chans == 2) {
+      FillRowPalette<2>(inrow, xs, lookup, outrow);
+    } else if (nb_chans == 3) {
+      FillRowPalette<3>(inrow, xs, lookup, outrow);
+    } else if (nb_chans == 4) {
+      FillRowPalette<4>(inrow, xs, lookup, outrow);
+    }
+    // Deal with x == 0.
+    group_data[y & 1][kPadding - 1] =
+        y > 0 ? group_data[(y - 1) & 1][kPadding] : 0;
+    // Fix topleft.
+    group_data[(y - 1) & 1][kPadding - 1] =
+        y > 0 ? group_data[(y - 1) & 1][kPadding] : 0;
+    // Get pointers to px/left/top/topleft data to speedup loop.
+    const int16_t* row = &group_data[y & 1][kPadding];
+    const int16_t* row_left = &group_data[y & 1][kPadding - 1];
+    const int16_t* row_top =
+        y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding];
+    const int16_t* row_topleft =
+        y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding - 1];
+
+    row_encoder.ProcessRow(row, row_left, row_top, row_topleft, xs);
+  }
+  row_encoder.Finalize();
+}
+
+void WriteACSectionPalette(const unsigned char* rgba, size_t x0, size_t y0,
+                           size_t xs, size_t ys, size_t row_stride,
+                           bool is_single_group, const PrefixCode code[4],
+                           const int16_t* lookup, size_t nb_chans,
+                           BitWriter& output) {
+  if (!is_single_group) {
+    output.Allocate(16 * xs * ys + 4);
+    // Group header for modular image.
+    // When the image is single-group, the global modular image is the one
+    // that contains the pixel data, and there is no group header.
+    output.Write(1, 1);     // Global tree
+    output.Write(1, 1);     // All default wp
+    output.Write(2, 0b00);  // 0 transforms
+  }
+
+  ChunkEncoder<UpTo8Bits> encoder;
+  ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits> row_encoder;
+
+  row_encoder.t = &encoder;
+  encoder.output = &output;
+  encoder.code = &code[is_single_group ? 1 : 0];
+  ProcessImageAreaPalette<
+      ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits>>(
+      rgba, x0, y0, xs, 0, ys, row_stride, lookup, nb_chans, &row_encoder);
+}
+
+template <typename BitDepth>
+void CollectSamples(const unsigned char* rgba, size_t x0, size_t y0, size_t xs,
+                    size_t row_stride, size_t row_count,
+                    uint64_t raw_counts[4][kNumRawSymbols],
+                    uint64_t lz77_counts[4][kNumLZ77], bool is_single_group,
+                    bool palette, BitDepth bitdepth, size_t nb_chans,
+                    bool big_endian, const int16_t* lookup) {
+  if (palette) {
+    ChunkSampleCollector<UpTo8Bits> sample_collectors[4];
+    ChannelRowProcessor<ChunkSampleCollector<UpTo8Bits>, UpTo8Bits>
+        row_sample_collectors[4];
+    for (size_t c = 0; c < nb_chans; c++) {
+      row_sample_collectors[c].t = &sample_collectors[c];
+      sample_collectors[c].raw_counts = raw_counts[is_single_group ? 1 : 0];
+      sample_collectors[c].lz77_counts = lz77_counts[is_single_group ? 1 : 0];
+    }
+    ProcessImageAreaPalette<
+        ChannelRowProcessor<ChunkSampleCollector<UpTo8Bits>, UpTo8Bits>>(
+        rgba, x0, y0, xs, 1, 1 + row_count, row_stride, lookup, nb_chans,
+        row_sample_collectors);
+  } else {
+    ChunkSampleCollector<BitDepth> sample_collectors[4];
+    ChannelRowProcessor<ChunkSampleCollector<BitDepth>, BitDepth>
+        row_sample_collectors[4];
+    for (size_t c = 0; c < nb_chans; c++) {
+      row_sample_collectors[c].t = &sample_collectors[c];
+      sample_collectors[c].raw_counts = raw_counts[c];
+      sample_collectors[c].lz77_counts = lz77_counts[c];
+    }
+    ProcessImageArea<
+        ChannelRowProcessor<ChunkSampleCollector<BitDepth>, BitDepth>>(
+        rgba, x0, y0, xs, 1, 1 + row_count, row_stride, bitdepth, nb_chans,
+        big_endian, row_sample_collectors);
+  }
+}
+
+void PrepareDCGlobalPalette(bool is_single_group, size_t width, size_t height,
+                            size_t nb_chans, const PrefixCode code[4],
+                            const std::vector<uint32_t>& palette,
+                            size_t pcolors, BitWriter* output) {
+  PrepareDCGlobalCommon(is_single_group, width, height, code, output);
+  output->Write(2, 0b01);     // 1 transform
+  output->Write(2, 0b01);     // Palette
+  output->Write(5, 0b00000);  // Starting from ch 0
+  if (nb_chans == 1) {
+    output->Write(2, 0b00);  // 1-channel palette (Gray)
+  } else if (nb_chans == 3) {
+    output->Write(2, 0b01);  // 3-channel palette (RGB)
+  } else if (nb_chans == 4) {
+    output->Write(2, 0b10);  // 4-channel palette (RGBA)
+  } else {
+    output->Write(2, 0b11);
+    output->Write(13, nb_chans - 1);
+  }
+  // pcolors <= kMaxColors + kChunkSize - 1
+  static_assert(kMaxColors + kChunkSize < 1281,
+                "add code to signal larger palette sizes");
+  if (pcolors < 256) {
+    output->Write(2, 0b00);
+    output->Write(8, pcolors);
+  } else {
+    output->Write(2, 0b01);
+    output->Write(10, pcolors - 256);
+  }
+
+  output->Write(2, 0b00);  // nb_deltas == 0
+  output->Write(4, 0);     // Zero predictor for delta palette
+  // Encode palette
+  ChunkEncoder<UpTo8Bits> encoder;
+  ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits> row_encoder;
+  row_encoder.t = &encoder;
+  encoder.output = output;
+  encoder.code = &code[0];
+  int16_t p[4][32 + 1024] = {};
+  uint8_t prgba[4];
+  size_t i = 0;
+  size_t have_zero = 0;
+  if (palette[pcolors - 1] == 0) have_zero = 1;
+  for (; i < pcolors; i++) {
+    memcpy(prgba, &palette[i], 4);
+    p[0][16 + i + have_zero] = prgba[0];
+    p[1][16 + i + have_zero] = prgba[1];
+    p[2][16 + i + have_zero] = prgba[2];
+    p[3][16 + i + have_zero] = prgba[3];
+  }
+  p[0][15] = 0;
+  row_encoder.ProcessRow(p[0] + 16, p[0] + 15, p[0] + 15, p[0] + 15, pcolors);
+  p[1][15] = p[0][16];
+  p[0][15] = p[0][16];
+  row_encoder.ProcessRow(p[1] + 16, p[1] + 15, p[0] + 16, p[0] + 15, pcolors);
+  p[2][15] = p[1][16];
+  p[1][15] = p[1][16];
+  row_encoder.ProcessRow(p[2] + 16, p[2] + 15, p[1] + 16, p[1] + 15, pcolors);
+  p[3][15] = p[2][16];
+  p[2][15] = p[2][16];
+  row_encoder.ProcessRow(p[3] + 16, p[3] + 15, p[2] + 16, p[2] + 15, pcolors);
+  row_encoder.Finalize();
+
+  if (!is_single_group) {
+    output->ZeroPadToByte();
+  }
+}
+
+template <size_t nb_chans>
+bool detect_palette(const unsigned char* r, size_t width,
+                    std::vector<uint32_t>& palette) {
+  size_t x = 0;
+  bool collided = false;
+  // this is just an unrolling of the next loop
+  for (; x + 7 < width; x += 8) {
+    uint32_t p[8] = {}, index[8];
+    for (int i = 0; i < 8; i++) memcpy(&p[i], r + (x + i) * nb_chans, 4);
+    for (int i = 0; i < 8; i++) p[i] &= ((1llu << (8 * nb_chans)) - 1);
+    for (int i = 0; i < 8; i++) index[i] = pixel_hash(p[i]);
+    for (int i = 0; i < 8; i++) {
+      collided |= (palette[index[i]] != 0 && p[i] != palette[index[i]]);
+    }
+    for (int i = 0; i < 8; i++) palette[index[i]] = p[i];
+  }
+  for (; x < width; x++) {
+    uint32_t p = 0;
+    memcpy(&p, r + x * nb_chans, nb_chans);
+    uint32_t index = pixel_hash(p);
+    collided |= (palette[index] != 0 && p != palette[index]);
+    palette[index] = p;
+  }
+  return collided;
+}
+
+template <typename BitDepth>
+JxlFastLosslessFrameState* LLEnc(const unsigned char* rgba, size_t width,
+                                 size_t stride, size_t height,
+                                 BitDepth bitdepth, size_t nb_chans,
+                                 bool big_endian, int effort,
+                                 void* runner_opaque,
+                                 FJxlParallelRunner runner) {
+  assert(width != 0);
+  assert(height != 0);
+  assert(stride >= nb_chans * BitDepth::kInputBytes * width);
+
+  // Count colors to try palette
+  std::vector<uint32_t> palette(kHashSize);
+  std::vector<int16_t> lookup(kHashSize);
+  lookup[0] = 0;
+  int pcolors = 0;
+  bool collided = effort < 2 || bitdepth.bitdepth != 8;
+  for (size_t y = 0; y < height && !collided; y++) {
+    const unsigned char* r = rgba + stride * y;
+    if (nb_chans == 1) collided = detect_palette<1>(r, width, palette);
+    if (nb_chans == 2) collided = detect_palette<2>(r, width, palette);
+    if (nb_chans == 3) collided = detect_palette<3>(r, width, palette);
+    if (nb_chans == 4) collided = detect_palette<4>(r, width, palette);
+  }
+
+  int nb_entries = 0;
+  if (!collided) {
+    pcolors = 1;  // always have all-zero as a palette color
+    bool have_color = false;
+    uint8_t minG = 255, maxG = 0;
+    for (uint32_t k = 0; k < kHashSize; k++) {
+      if (palette[k] == 0) continue;
+      uint8_t p[4];
+      memcpy(p, &palette[k], 4);
+      // move entries to front so sort has less work
+      palette[nb_entries] = palette[k];
+      if (p[0] != p[1] || p[0] != p[2]) have_color = true;
+      if (p[1] < minG) minG = p[1];
+      if (p[1] > maxG) maxG = p[1];
+      nb_entries++;
+      // don't do palette if too many colors are needed
+      if (nb_entries + pcolors > kMaxColors) {
+        collided = true;
+        break;
+      }
+    }
+    if (!have_color) {
+      // don't do palette if it's just grayscale without many holes
+      if (maxG - minG < nb_entries * 1.4f) collided = true;
+    }
+  }
+  if (!collided) {
+    std::sort(
+        palette.begin(), palette.begin() + nb_entries,
+        [&nb_chans](uint32_t ap, uint32_t bp) {
+          if (ap == 0) return false;
+          if (bp == 0) return true;
+          uint8_t a[4], b[4];
+          memcpy(a, &ap, 4);
+          memcpy(b, &bp, 4);
+          float ay, by;
+          if (nb_chans == 4) {
+            ay = (0.299f * a[0] + 0.587f * a[1] + 0.114f * a[2] + 0.01f) * a[3];
+            by = (0.299f * b[0] + 0.587f * b[1] + 0.114f * b[2] + 0.01f) * b[3];
+          } else {
+            ay = (0.299f * a[0] + 0.587f * a[1] + 0.114f * a[2] + 0.01f);
+            by = (0.299f * b[0] + 0.587f * b[1] + 0.114f * b[2] + 0.01f);
+          }
+          return ay < by;  // sort on alpha*luma
+        });
+    for (int k = 0; k < nb_entries; k++) {
+      if (palette[k] == 0) break;
+      lookup[pixel_hash(palette[k])] = pcolors++;
+    }
+  }
+
+  size_t num_groups_x = (width + 255) / 256;
+  size_t num_groups_y = (height + 255) / 256;
+  size_t num_dc_groups_x = (width + 2047) / 2048;
+  size_t num_dc_groups_y = (height + 2047) / 2048;
+
+  uint64_t raw_counts[4][kNumRawSymbols] = {};
+  uint64_t lz77_counts[4][kNumLZ77] = {};
+
+  bool onegroup = num_groups_x == 1 && num_groups_y == 1;
+
+  // sample the middle (effort * 2) rows of every group
+  for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
+    size_t xg = g % num_groups_x;
+    size_t yg = g / num_groups_x;
+    int y_offset = yg * 256;
+    int y_max = std::min<size_t>(height - yg * 256, 256);
+    int y_begin = y_offset + std::max<int>(0, y_max - 2 * effort) / 2;
+    int y_count =
+        std::min<int>(2 * effort * y_max / 256, y_offset + y_max - y_begin - 1);
+    int x_max =
+        std::min<size_t>(width - xg * 256, 256) / kChunkSize * kChunkSize;
+    CollectSamples(rgba, xg * 256, y_begin, x_max, stride, y_count, raw_counts,
+                   lz77_counts, onegroup, !collided, bitdepth, nb_chans,
+                   big_endian, lookup.data());
+  }
+
+  // TODO(veluca): can probably improve this and make it bitdepth-dependent.
+  uint64_t base_raw_counts[kNumRawSymbols] = {
+      3843, 852, 1270, 1214, 1014, 727, 481, 300, 159, 51,
+      5,    1,   1,    1,    1,    1,   1,   1,   1};
+
+  bool doing_ycocg = nb_chans > 2 && collided;
+  for (size_t i = bitdepth.NumSymbols(doing_ycocg); i < kNumRawSymbols; i++) {
+    base_raw_counts[i] = 0;
+  }
+
+  for (size_t c = 0; c < 4; c++) {
+    for (size_t i = 0; i < kNumRawSymbols; i++) {
+      raw_counts[c][i] = (raw_counts[c][i] << 8) + base_raw_counts[i];
+    }
+  }
+
+  if (!collided) {
+    unsigned token, nbits, bits;
+    EncodeHybridUint000(PackSigned(pcolors - 1), &token, &nbits, &bits);
+    // ensure all palette indices can actually be encoded
+    for (size_t i = 0; i < token + 1; i++)
+      raw_counts[0][i] = std::max<uint64_t>(raw_counts[0][i], 1);
+    // these tokens are only used for the palette itself so they can get a bad
+    // code
+    for (size_t i = token + 1; i < 10; i++) raw_counts[0][i] = 1;
+  }
+
+  uint64_t base_lz77_counts[kNumLZ77] = {
+      29, 27, 25,  23, 21, 21, 19, 18, 21, 17, 16, 15, 15, 14,
+      13, 13, 137, 98, 61, 34, 1,  1,  1,  1,  1,  1,  1,  1,
+  };
+
+  for (size_t c = 0; c < 4; c++) {
+    for (size_t i = 0; i < kNumLZ77; i++) {
+      lz77_counts[c][i] = (lz77_counts[c][i] << 8) + base_lz77_counts[i];
+    }
+  }
+
+  alignas(64) PrefixCode hcode[4];
+  for (size_t i = 0; i < 4; i++) {
+    hcode[i] = PrefixCode(bitdepth, raw_counts[i], lz77_counts[i]);
+  }
+
+  size_t num_groups = onegroup ? 1
+                               : (2 + num_dc_groups_x * num_dc_groups_y +
+                                  num_groups_x * num_groups_y);
+
+  JxlFastLosslessFrameState* frame_state = new JxlFastLosslessFrameState();
+
+  frame_state->width = width;
+  frame_state->height = height;
+  frame_state->nb_chans = nb_chans;
+  frame_state->bitdepth = bitdepth.bitdepth;
+
+  frame_state->group_data = std::vector<std::array<BitWriter, 4>>(num_groups);
+  if (collided) {
+    PrepareDCGlobal(onegroup, width, height, nb_chans, hcode,
+                    &frame_state->group_data[0][0]);
+  } else {
+    PrepareDCGlobalPalette(onegroup, width, height, nb_chans, hcode, palette,
+                           pcolors, &frame_state->group_data[0][0]);
+  }
+
+  auto run_one = [&](size_t g) {
+    size_t xg = g % num_groups_x;
+    size_t yg = g / num_groups_x;
+    size_t group_id =
+        onegroup ? 0 : (2 + num_dc_groups_x * num_dc_groups_y + g);
+    size_t xs = std::min<size_t>(width - xg * 256, 256);
+    size_t ys = std::min<size_t>(height - yg * 256, 256);
+    size_t x0 = xg * 256;
+    size_t y0 = yg * 256;
+    auto& gd = frame_state->group_data[group_id];
+    if (collided) {
+      WriteACSection(rgba, x0, y0, xs, ys, stride, onegroup, bitdepth, nb_chans,
+                     big_endian, hcode, gd);
+
+    } else {
+      WriteACSectionPalette(rgba, x0, y0, xs, ys, stride, onegroup, hcode,
+                            lookup.data(), nb_chans, gd[0]);
+    }
+  };
+
+  runner(
+      runner_opaque, &run_one,
+      +[](void* r, size_t i) { (*reinterpret_cast<decltype(&run_one)>(r))(i); },
+      num_groups_x * num_groups_y);
+
+  return frame_state;
+}
+
+JxlFastLosslessFrameState* JxlFastLosslessEncodeImpl(
+    const unsigned char* rgba, size_t width, size_t stride, size_t height,
+    size_t nb_chans, size_t bitdepth, bool big_endian, int effort,
+    void* runner_opaque, FJxlParallelRunner runner) {
+  assert(bitdepth > 0);
+  assert(nb_chans <= 4);
+  assert(nb_chans != 0);
+  if (bitdepth <= 8) {
+    return LLEnc(rgba, width, stride, height, UpTo8Bits(bitdepth), nb_chans,
+                 big_endian, effort, runner_opaque, runner);
+  }
+  if (bitdepth <= 13) {
+    return LLEnc(rgba, width, stride, height, From9To13Bits(bitdepth), nb_chans,
+                 big_endian, effort, runner_opaque, runner);
+  }
+  if (bitdepth == 14) {
+    return LLEnc(rgba, width, stride, height, Exactly14Bits(bitdepth), nb_chans,
+                 big_endian, effort, runner_opaque, runner);
+  }
+  return LLEnc(rgba, width, stride, height, MoreThan14Bits(bitdepth), nb_chans,
+               big_endian, effort, runner_opaque, runner);
+}
+
+}  // namespace
+
+#endif  // FJXL_SELF_INCLUDE
+
+#ifndef FJXL_SELF_INCLUDE
+
+#define FJXL_SELF_INCLUDE
+
+// If we have NEON enabled, it is the default target.
+#if FJXL_ENABLE_NEON
+
+namespace default_implementation {
+#define FJXL_NEON
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_NEON
+}  // namespace default_implementation
+
+#else  // FJXL_ENABLE_NEON
+
+namespace default_implementation {
+#include "lib/jxl/enc_fast_lossless.cc"
+}
+
+#if FJXL_ENABLE_AVX2
+#ifdef __clang__
+#pragma clang attribute push(__attribute__((target("avx,avx2"))), \
+                             apply_to = function)
+// Causes spurious warnings on clang5.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#elif defined(__GNUC__)
+#pragma GCC push_options
+// Seems to cause spurious errors on GCC8.
+#pragma GCC diagnostic ignored "-Wpsabi"
+#pragma GCC target "avx,avx2"
+#endif
+
+namespace AVX2 {
+#define FJXL_AVX2
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_AVX2
+}  // namespace AVX2
+
+#ifdef __clang__
+#pragma clang attribute pop
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif  // FJXL_ENABLE_AVX2
+
+#if FJXL_ENABLE_AVX512
+#ifdef __clang__
+#pragma clang attribute push(                                                 \
+    __attribute__((target("avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi"))), \
+    apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC push_options
+#pragma GCC target "avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi"
+#endif
+
+namespace AVX512 {
+#define FJXL_AVX512
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_AVX512
+}  // namespace AVX512
+
+#ifdef __clang__
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif  // FJXL_ENABLE_AVX512
+
+#endif
+
+extern "C" {
+
+#if FJXL_STANDALONE
+size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
+                             size_t row_stride, size_t height, size_t nb_chans,
+                             size_t bitdepth, int big_endian, int effort,
+                             unsigned char** output, void* runner_opaque,
+                             FJxlParallelRunner runner) {
+  auto frame_state = JxlFastLosslessPrepareFrame(
+      rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort,
+      runner_opaque, runner);
+  JxlFastLosslessPrepareHeader(frame_state, /*add_image_header=*/1,
+                               /*is_last=*/1);
+  size_t output_size = JxlFastLosslessMaxRequiredOutput(frame_state);
+  *output = (unsigned char*)malloc(output_size);
+  size_t written = 0;
+  size_t total = 0;
+  while ((written = JxlFastLosslessWriteOutput(frame_state, *output + total,
+                                               output_size - total)) != 0) {
+    total += written;
+  }
+  return total;
+}
+#endif
+
+JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
+    const unsigned char* rgba, size_t width, size_t row_stride, size_t height,
+    size_t nb_chans, size_t bitdepth, int big_endian, int effort,
+    void* runner_opaque, FJxlParallelRunner runner) {
+  auto trivial_runner =
+      +[](void*, void* opaque, void fun(void*, size_t), size_t count) {
+        for (size_t i = 0; i < count; i++) {
+          fun(opaque, i);
+        }
+      };
+
+  if (runner == nullptr) {
+    runner = trivial_runner;
+  }
+
+#if FJXL_ENABLE_AVX512
+  if (__builtin_cpu_supports("avx512cd") &&
+      __builtin_cpu_supports("avx512vbmi") &&
+      __builtin_cpu_supports("avx512bw") && __builtin_cpu_supports("avx512f") &&
+      __builtin_cpu_supports("avx512vl")) {
+    return AVX512::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height,
+                                             nb_chans, bitdepth, big_endian,
+                                             effort, runner_opaque, runner);
+  }
+#endif
+#if FJXL_ENABLE_AVX2
+  if (__builtin_cpu_supports("avx2")) {
+    return AVX2::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height,
+                                           nb_chans, bitdepth, big_endian,
+                                           effort, runner_opaque, runner);
+  }
+#endif
+
+  return default_implementation::JxlFastLosslessEncodeImpl(
+      rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort,
+      runner_opaque, runner);
+}
+
+}  // extern "C"
+
+#endif  // FJXL_SELF_INCLUDE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h
new file mode 100644
index 0000000000..f0bcd72cbf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h
@@ -0,0 +1,85 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FAST_LOSSLESS_H_
+#define LIB_JXL_ENC_FAST_LOSSLESS_H_
+#include <stdlib.h>
+
+// FJXL_STANDALONE=1 for a stand-alone jxl encoder
+// FJXL_STANDALONE=0 for use in libjxl to encode frames (but no image header)
+#ifndef FJXL_STANDALONE
+#ifdef JPEGXL_MAJOR_VERSION
+#define FJXL_STANDALONE 0
+#else
+#define FJXL_STANDALONE 1
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Simple encoding API.
+
+// A FJxlParallelRunner must call fun(opaque, i) for all i from 0 to count. It
+// may do so in parallel.
+typedef void(FJxlParallelRunner)(void* runner_opaque, void* opaque,
+                                 void fun(void*, size_t), size_t count);
+
+#if FJXL_STANDALONE
+// You may pass `nullptr` as a runner: encoding will be sequential.
+size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
+                             size_t row_stride, size_t height, size_t nb_chans,
+                             size_t bitdepth, int big_endian, int effort,
+                             unsigned char** output, void* runner_opaque,
+                             FJxlParallelRunner runner);
+#endif
+
+// More complex API for cases in which you may want to allocate your own buffer
+// and other advanced use cases.
+
+// Opaque struct that represents an intermediate state of the computation.
+struct JxlFastLosslessFrameState;
+
+// Returned JxlFastLosslessFrameState must be freed by calling
+// JxlFastLosslessFreeFrameState.
+JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
+    const unsigned char* rgba, size_t width, size_t row_stride, size_t height,
+    size_t nb_chans, size_t bitdepth, int big_endian, int effort,
+    void* runner_opaque, FJxlParallelRunner runner);
+
+// Prepare the (image/frame) header. You may encode animations by concatenating
+// the output of multiple frames, of which the first one has add_image_header =
+// 1 and subsequent ones have add_image_header = 0, and all frames but the last
+// one have is_last = 0.
+// (when FJXL_STANDALONE=0, add_image_header has to be 0)
+void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
+                                  int add_image_header, int is_last);
+
+// Upper bound on the required output size, including any padding that may be
+// required by JxlFastLosslessWriteOutput. Cannot be called before
+// JxlFastLosslessPrepareHeader.
+size_t JxlFastLosslessMaxRequiredOutput(const JxlFastLosslessFrameState* frame);
+
+// Actual size of the frame once it is encoded. This is not identical to
+// JxlFastLosslessMaxRequiredOutput because JxlFastLosslessWriteOutput may
+// require extra padding.
+size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame);
+
+// Writes the frame to the given output buffer. Returns the number of bytes that
+// were written, which is at least 1 unless the entire output has been written
+// already. It is required that `output_size >= 32` when calling this function.
+// This function must be called repeatedly until it returns 0.
+size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame,
+                                  unsigned char* output, size_t output_size);
+
+// Frees the provided frame state.
+void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // LIB_JXL_ENC_FAST_LOSSLESS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc b/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc
new file mode 100644
index 0000000000..22c763e13f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc
@@ -0,0 +1,239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_fields.h"
+
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+namespace {
+using ::jxl::fields_internal::VisitorBase;
+class WriteVisitor : public VisitorBase {
+ public:
+  WriteVisitor(const size_t extension_bits, BitWriter* JXL_RESTRICT writer)
+      : extension_bits_(extension_bits), writer_(writer) {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    ok_ &= BitsCoder::Write(bits, *value, writer_);
+    return true;
+  }
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    ok_ &= U32Coder::Write(enc, *value, writer_);
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    ok_ &= U64Coder::Write(*value, writer_);
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    ok_ &= F16Coder::Write(*value, writer_);
+    return true;
+  }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    if (*extensions == 0) {
+      JXL_ASSERT(extension_bits_ == 0);
+      return true;
+    }
+    // TODO(janwas): extend API to pass in array of extension_bits, one per
+    // extension. We currently ascribe all bits to the first extension, but
+    // this is only an encoder limitation. NOTE: extension_bits_ can be zero
+    // if an extension does not require any additional fields.
+    ok_ &= U64Coder::Write(extension_bits_, writer_);
+    // For each nonzero bit except the lowest/first (already written):
+    for (uint64_t remaining_extensions = *extensions & (*extensions - 1);
+         remaining_extensions != 0;
+         remaining_extensions &= remaining_extensions - 1) {
+      ok_ &= U64Coder::Write(0, writer_);
+    }
+    return true;
+  }
+  // EndExtensions = default.
+
+  Status OK() const { return ok_; }
+
+ private:
+  const size_t extension_bits_;
+  BitWriter* JXL_RESTRICT writer_;
+  bool ok_ = true;
+};
+}  // namespace
+
+Status Bundle::Write(const Fields& fields, BitWriter* writer, size_t layer,
+                     AuxOut* aux_out) {
+  size_t extension_bits, total_bits;
+  JXL_RETURN_IF_ERROR(Bundle::CanEncode(fields, &extension_bits, &total_bits));
+
+  BitWriter::Allotment allotment(writer, total_bits);
+  WriteVisitor visitor(extension_bits, writer);
+  JXL_RETURN_IF_ERROR(visitor.VisitConst(fields));
+  JXL_RETURN_IF_ERROR(visitor.OK());
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+  return true;
+}
+
+// Returns false if the value is too large to encode.
+Status BitsCoder::Write(const size_t bits, const uint32_t value,
+                        BitWriter* JXL_RESTRICT writer) {
+  if (value >= (1ULL << bits)) {
+    return JXL_FAILURE("Value %d too large to encode in %" PRIu64 " bits",
+                       value, static_cast<uint64_t>(bits));
+  }
+  writer->Write(bits, value);
+  return true;
+}
+
+// Returns false if the value is too large to encode.
+Status U32Coder::Write(const U32Enc enc, const uint32_t value,
+                       BitWriter* JXL_RESTRICT writer) {
+  uint32_t selector;
+  size_t total_bits;
+  JXL_RETURN_IF_ERROR(ChooseSelector(enc, value, &selector, &total_bits));
+
+  writer->Write(2, selector);
+
+  const U32Distr d = enc.GetDistr(selector);
+  if (!d.IsDirect()) {  // Nothing more to write for direct encoding
+    const uint32_t offset = d.Offset();
+    JXL_ASSERT(value >= offset);
+    writer->Write(total_bits - 2, value - offset);
+  }
+
+  return true;
+}
+
+// Returns false if the value is too large to encode.
+Status U64Coder::Write(uint64_t value, BitWriter* JXL_RESTRICT writer) {
+  if (value == 0) {
+    // Selector: use 0 bits, value 0
+    writer->Write(2, 0);
+  } else if (value <= 16) {
+    // Selector: use 4 bits, value 1..16
+    writer->Write(2, 1);
+    writer->Write(4, value - 1);
+  } else if (value <= 272) {
+    // Selector: use 8 bits, value 17..272
+    writer->Write(2, 2);
+    writer->Write(8, value - 17);
+  } else {
+    // Selector: varint, first a 12-bit group, after that per 8-bit group.
+    writer->Write(2, 3);
+    writer->Write(12, value & 4095);
+    value >>= 12;
+    int shift = 12;
+    while (value > 0 && shift < 60) {
+      // Indicate varint not done
+      writer->Write(1, 1);
+      writer->Write(8, value & 255);
+      value >>= 8;
+      shift += 8;
+    }
+    if (value > 0) {
+      // This only could happen if shift == N - 4.
+      writer->Write(1, 1);
+      writer->Write(4, value & 15);
+      // Implicitly closed sequence, no extra stop bit is required.
+    } else {
+      // Indicate end of varint
+      writer->Write(1, 0);
+    }
+  }
+
+  return true;
+}
+
+Status F16Coder::Write(float value, BitWriter* JXL_RESTRICT writer) {
+  uint32_t bits32;
+  memcpy(&bits32, &value, sizeof(bits32));
+  const uint32_t sign = bits32 >> 31;
+  const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
+  const uint32_t mantissa32 = bits32 & 0x7FFFFF;
+
+  const int32_t exp = static_cast<int32_t>(biased_exp32) - 127;
+  if (JXL_UNLIKELY(exp > 15)) {
+    return JXL_FAILURE("Too big to encode, CanEncode should return false");
+  }
+
+  // Tiny or zero => zero.
+  if (exp < -24) {
+    writer->Write(16, 0);
+    return true;
+  }
+
+  uint32_t biased_exp16, mantissa16;
+
+  // exp = [-24, -15] => subnormal
+  if (JXL_UNLIKELY(exp < -14)) {
+    biased_exp16 = 0;
+    const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
+    JXL_ASSERT(1 <= sub_exp && sub_exp < 11);
+    mantissa16 = (1 << (10 - sub_exp)) + (mantissa32 >> (13 + sub_exp));
+  } else {
+    // exp = [-14, 15]
+    biased_exp16 = static_cast<uint32_t>(exp + 15);
+    JXL_ASSERT(1 <= biased_exp16 && biased_exp16 < 31);
+    mantissa16 = mantissa32 >> 13;
+  }
+
+  JXL_ASSERT(mantissa16 < 1024);
+  const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
+  JXL_ASSERT(bits16 < 0x10000);
+  writer->Write(16, bits16);
+  return true;
+}
+
+Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer,
+                              AuxOut* aux_out) {
+  // Marker/signature
+  BitWriter::Allotment allotment(writer, 16);
+  writer->Write(8, 0xFF);
+  writer->Write(8, kCodestreamMarker);
+  allotment.ReclaimAndCharge(writer, kLayerHeader, aux_out);
+
+  JXL_RETURN_IF_ERROR(
+      WriteSizeHeader(metadata->size, writer, kLayerHeader, aux_out));
+
+  JXL_RETURN_IF_ERROR(
+      WriteImageMetadata(metadata->m, writer, kLayerHeader, aux_out));
+
+  metadata->transform_data.nonserialized_xyb_encoded = metadata->m.xyb_encoded;
+  JXL_RETURN_IF_ERROR(
+      Bundle::Write(metadata->transform_data, writer, kLayerHeader, aux_out));
+
+  return true;
+}
+
+Status WriteFrameHeader(const FrameHeader& frame,
+                        BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+  return Bundle::Write(frame, writer, kLayerHeader, aux_out);
+}
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+                          BitWriter* JXL_RESTRICT writer, size_t layer,
+                          AuxOut* aux_out) {
+  return Bundle::Write(metadata, writer, layer, aux_out);
+}
+
+Status WriteQuantizerParams(const QuantizerParams& params,
+                            BitWriter* JXL_RESTRICT writer, size_t layer,
+                            AuxOut* aux_out) {
+  return Bundle::Write(params, writer, layer, aux_out);
+}
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+                       size_t layer, AuxOut* aux_out) {
+  return Bundle::Write(size, writer, layer, aux_out);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fields.h b/third-party/libjxl/libjxl/lib/jxl/enc_fields.h
new file mode 100644
index 0000000000..5bb179a719
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_fields.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FIELDS_H_
+#define LIB_JXL_ENC_FIELDS_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Write headers from the CodecMetadata. Also may modify nonserialized_...
+// fields of the metadata.
+Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer,
+                              AuxOut* aux_out);
+
+Status WriteFrameHeader(const FrameHeader& frame,
+                        BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+Status WriteQuantizerParams(const QuantizerParams& params,
+                            BitWriter* JXL_RESTRICT writer, size_t layer,
+                            AuxOut* aux_out);
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+                       size_t layer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_FIELDS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_file.cc b/third-party/libjxl/libjxl/lib/jxl/enc_file.cc
new file mode 100644
index 0000000000..ef5e21414e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_file.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_file.h"
+
+#include <stddef.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+namespace {
+
+Status PrepareCodecMetadataFromIO(const CompressParams& cparams,
+                                  const CodecInOut* io,
+                                  CodecMetadata* metadata) {
+  *metadata = io->metadata;
+  size_t ups = 1;
+  if (cparams.already_downsampled) ups = cparams.resampling;
+
+  JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize() * ups, io->ysize() * ups));
+
+  // Keep ICC profile in lossless modes because a reconstructed profile may be
+  // slightly different (quantization).
+  // Also keep ICC in JPEG reconstruction mode as we need byte-exact profiles.
+  if (!cparams.IsLossless() && !io->Main().IsJPEG() && cparams.cms_set) {
+    metadata->m.color_encoding.DecideIfWantICC(cparams.cms);
+  }
+
+  metadata->m.xyb_encoded =
+      cparams.color_transform == ColorTransform::kXYB ? true : false;
+
+  // TODO(firsching): move this EncodeFile to test_utils / re-implement this
+  // using API functions
+  return true;
+}
+
+}  // namespace
+
+Status EncodePreview(const CompressParams& cparams, const ImageBundle& ib,
+                     const CodecMetadata* metadata, const JxlCmsInterface& cms,
+                     ThreadPool* pool, BitWriter* JXL_RESTRICT writer) {
+  BitWriter preview_writer;
+  // TODO(janwas): also support generating preview by downsampling
+  if (ib.HasColor()) {
+    AuxOut aux_out;
+    PassesEncoderState passes_enc_state;
+    // TODO(lode): check if we want all extra channels and matching xyb_encoded
+    // for the preview, such that using the main ImageMetadata object for
+    // encoding this frame is warrented.
+    FrameInfo frame_info;
+    frame_info.is_preview = true;
+    JXL_RETURN_IF_ERROR(EncodeFrame(cparams, frame_info, metadata, ib,
+                                    &passes_enc_state, cms, pool,
+                                    &preview_writer, &aux_out));
+    preview_writer.ZeroPadToByte();
+  }
+
+  if (preview_writer.BitsWritten() != 0) {
+    writer->ZeroPadToByte();
+    writer->AppendByteAligned(preview_writer);
+  }
+
+  return true;
+}
+
+Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+                  PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+                  const JxlCmsInterface& cms, AuxOut* aux_out,
+                  ThreadPool* pool) {
+  io->CheckMetadata();
+  BitWriter writer;
+
+  CompressParams cparams = params;
+  if (io->Main().color_transform != ColorTransform::kNone) {
+    // Set the color transform to YCbCr or XYB if the original image is such.
+    cparams.color_transform = io->Main().color_transform;
+  }
+
+  JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams));
+
+  std::unique_ptr<CodecMetadata> metadata = jxl::make_unique<CodecMetadata>();
+  JXL_RETURN_IF_ERROR(PrepareCodecMetadataFromIO(cparams, io, metadata.get()));
+  JXL_RETURN_IF_ERROR(WriteCodestreamHeaders(metadata.get(), &writer, aux_out));
+
+  // Only send ICC (at least several hundred bytes) if fields aren't enough.
+  if (metadata->m.color_encoding.WantICC()) {
+    JXL_RETURN_IF_ERROR(WriteICC(metadata->m.color_encoding.ICC(), &writer,
+                                 kLayerHeader, aux_out));
+  }
+
+  if (metadata->m.have_preview) {
+    JXL_RETURN_IF_ERROR(EncodePreview(cparams, io->preview_frame,
+                                      metadata.get(), cms, pool, &writer));
+  }
+
+  // Each frame should start on byte boundaries.
+  BitWriter::Allotment allotment(&writer, 8);
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, kLayerHeader, aux_out);
+
+  for (size_t i = 0; i < io->frames.size(); i++) {
+    FrameInfo info;
+    info.is_last = i == io->frames.size() - 1;
+    if (io->frames[i].use_for_next_frame) {
+      info.save_as_reference = 1;
+    }
+    JXL_RETURN_IF_ERROR(EncodeFrame(cparams, info, metadata.get(),
+                                    io->frames[i], passes_enc_state, cms, pool,
+                                    &writer, aux_out));
+  }
+
+  // Clean up passes_enc_state in case it gets reused.
+  for (size_t i = 0; i < 4; i++) {
+    passes_enc_state->shared.dc_frames[i] = Image3F();
+    passes_enc_state->shared.reference_frames[i].frame = ImageBundle();
+  }
+
+  *compressed = std::move(writer).TakeBytes();
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_file.h b/third-party/libjxl/libjxl/lib/jxl/enc_file.h
new file mode 100644
index 0000000000..ff3ad1233d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_file.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FILE_H_
+#define LIB_JXL_ENC_FILE_H_
+
+// Facade for JXL encoding.
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+class CodecInOut;
+
+// Compresses pixels from `io` (given in any ColorEncoding).
+// `io->metadata.m.original` must be set.
+Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+                  PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+                  const JxlCmsInterface& cms, AuxOut* aux_out = nullptr,
+                  ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_FILE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc b/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc
new file mode 100644
index 0000000000..5fe2b6a712
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc
@@ -0,0 +1,1558 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+namespace {
+
+PassDefinition progressive_passes_dc_vlf_lf_full_ac[] = {
+    {/*num_coefficients=*/2, /*shift=*/0,
+     /*suitable_for_downsampling_of_at_least=*/4},
+    {/*num_coefficients=*/3, /*shift=*/0,
+     /*suitable_for_downsampling_of_at_least=*/2},
+    {/*num_coefficients=*/8, /*shift=*/0,
+     /*suitable_for_downsampling_of_at_least=*/0},
+};
+
+PassDefinition progressive_passes_dc_quant_ac_full_ac[] = {
+    {/*num_coefficients=*/8, /*shift=*/1,
+     /*suitable_for_downsampling_of_at_least=*/2},
+    {/*num_coefficients=*/8, /*shift=*/0,
+     /*suitable_for_downsampling_of_at_least=*/0},
+};
+
+uint64_t FrameFlagsFromParams(const CompressParams& cparams) {
+  uint64_t flags = 0;
+
+  const float dist = cparams.butteraugli_distance;
+
+  // We don't add noise at low butteraugli distances because the original
+  // noise is stored within the compressed image and adding noise makes things
+  // worse.
+  if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise) ||
+      cparams.photon_noise_iso > 0 ||
+      cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) {
+    flags |= FrameHeader::kNoise;
+  }
+
+  if (cparams.progressive_dc > 0 && cparams.modular_mode == false) {
+    flags |= FrameHeader::kUseDcFrame;
+  }
+
+  return flags;
+}
+
+Status LoopFilterFromParams(const CompressParams& cparams,
+                            FrameHeader* JXL_RESTRICT frame_header) {
+  LoopFilter* loop_filter = &frame_header->loop_filter;
+
+  // Gaborish defaults to enabled in Hare or slower.
+  loop_filter->gab = ApplyOverride(
+      cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
+                            frame_header->encoding == FrameEncoding::kVarDCT &&
+                            cparams.decoding_speed_tier < 4);
+
+  if (cparams.epf != -1) {
+    loop_filter->epf_iters = cparams.epf;
+  } else {
+    if (frame_header->encoding == FrameEncoding::kModular) {
+      loop_filter->epf_iters = 0;
+    } else {
+      constexpr float kThresholds[3] = {0.7, 1.5, 4.0};
+      loop_filter->epf_iters = 0;
+      if (cparams.decoding_speed_tier < 3) {
+        for (size_t i = cparams.decoding_speed_tier == 2 ? 1 : 0; i < 3; i++) {
+          if (cparams.butteraugli_distance >= kThresholds[i]) {
+            loop_filter->epf_iters++;
+          }
+        }
+      }
+    }
+  }
+  // Strength of EPF in modular mode.
+  if (frame_header->encoding == FrameEncoding::kModular &&
+      !cparams.IsLossless()) {
+    // TODO(veluca): this formula is nonsense.
+    loop_filter->epf_sigma_for_modular = cparams.butteraugli_distance;
+  }
+  if (frame_header->encoding == FrameEncoding::kModular &&
+      cparams.lossy_palette) {
+    loop_filter->epf_sigma_for_modular = 1.0f;
+  }
+
+  return true;
+}
+
+Status MakeFrameHeader(const CompressParams& cparams,
+                       const ProgressiveSplitter& progressive_splitter,
+                       const FrameInfo& frame_info, const ImageBundle& ib,
+                       FrameHeader* JXL_RESTRICT frame_header) {
+  frame_header->nonserialized_is_preview = frame_info.is_preview;
+  frame_header->is_last = frame_info.is_last;
+  frame_header->save_before_color_transform =
+      frame_info.save_before_color_transform;
+  frame_header->frame_type = frame_info.frame_type;
+  frame_header->name = ib.name;
+
+  progressive_splitter.InitPasses(&frame_header->passes);
+
+  if (cparams.modular_mode) {
+    frame_header->encoding = FrameEncoding::kModular;
+    if (cparams.modular_group_size_shift == -1) {
+      frame_header->group_size_shift = 1;
+      // no point using groups when only one group is full and the others are
+      // less than half full: multithreading will not really help much, while
+      // compression does suffer
+      if (ib.xsize() <= 400 && ib.ysize() <= 400) {
+        frame_header->group_size_shift = 2;
+      }
+    } else {
+      frame_header->group_size_shift = cparams.modular_group_size_shift;
+    }
+  }
+
+  frame_header->chroma_subsampling = ib.chroma_subsampling;
+  if (ib.IsJPEG()) {
+    // we are transcoding a JPEG, so we don't get to choose
+    frame_header->encoding = FrameEncoding::kVarDCT;
+    frame_header->color_transform = ib.color_transform;
+  } else {
+    frame_header->color_transform = cparams.color_transform;
+    if (!cparams.modular_mode &&
+        (frame_header->chroma_subsampling.MaxHShift() != 0 ||
+         frame_header->chroma_subsampling.MaxVShift() != 0)) {
+      return JXL_FAILURE(
+          "Chroma subsampling is not supported in VarDCT mode when not "
+          "recompressing JPEGs");
+    }
+  }
+  if (frame_header->color_transform != ColorTransform::kYCbCr &&
+      (frame_header->chroma_subsampling.MaxHShift() != 0 ||
+       frame_header->chroma_subsampling.MaxVShift() != 0)) {
+    return JXL_FAILURE(
+        "Chroma subsampling is not supported when color transform is not "
+        "YCbCr");
+  }
+
+  frame_header->flags = FrameFlagsFromParams(cparams);
+  // Non-photon noise is not supported in the Modular encoder for now.
+  if (frame_header->encoding != FrameEncoding::kVarDCT &&
+      cparams.photon_noise_iso == 0 && cparams.manual_noise.empty()) {
+    frame_header->UpdateFlag(false, FrameHeader::Flags::kNoise);
+  }
+
+  JXL_RETURN_IF_ERROR(LoopFilterFromParams(cparams, frame_header));
+
+  frame_header->dc_level = frame_info.dc_level;
+  if (frame_header->dc_level > 2) {
+    // With 3 or more progressive_dc frames, the implementation does not yet
+    // work, see enc_cache.cc.
+    return JXL_FAILURE("progressive_dc > 2 is not yet supported");
+  }
+  if (cparams.progressive_dc > 0 &&
+      (cparams.ec_resampling != 1 || cparams.resampling != 1)) {
+    return JXL_FAILURE("Resampling not supported with DC frames");
+  }
+  if (cparams.resampling != 1 && cparams.resampling != 2 &&
+      cparams.resampling != 4 && cparams.resampling != 8) {
+    return JXL_FAILURE("Invalid resampling factor");
+  }
+  if (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 &&
+      cparams.ec_resampling != 4 && cparams.ec_resampling != 8) {
+    return JXL_FAILURE("Invalid ec_resampling factor");
+  }
+  // Resized frames.
+  if (frame_info.frame_type != FrameType::kDCFrame) {
+    frame_header->frame_origin = ib.origin;
+    size_t ups = 1;
+    if (cparams.already_downsampled) ups = cparams.resampling;
+
+    // TODO(lode): this is not correct in case of odd original image sizes in
+    // combination with cparams.already_downsampled. Likely these values should
+    // be set to respectively frame_header->default_xsize() and
+    // frame_header->default_ysize() instead, the original (non downsampled)
+    // intended decoded image dimensions. But it may be more subtle than that
+    // if combined with crop. This issue causes custom_size_or_origin to be
+    // incorrectly set to true in case of already_downsampled with odd output
+    // image size when no cropping is used.
+    frame_header->frame_size.xsize = ib.xsize() * ups;
+    frame_header->frame_size.ysize = ib.ysize() * ups;
+    if (ib.origin.x0 != 0 || ib.origin.y0 != 0 ||
+        frame_header->frame_size.xsize != frame_header->default_xsize() ||
+        frame_header->frame_size.ysize != frame_header->default_ysize()) {
+      frame_header->custom_size_or_origin = true;
+    }
+  }
+  // Upsampling.
+  frame_header->upsampling = cparams.resampling;
+  const std::vector<ExtraChannelInfo>& extra_channels =
+      frame_header->nonserialized_metadata->m.extra_channel_info;
+  frame_header->extra_channel_upsampling.clear();
+  frame_header->extra_channel_upsampling.resize(extra_channels.size(),
+                                                cparams.ec_resampling);
+  frame_header->save_as_reference = frame_info.save_as_reference;
+
+  // Set blending-related information.
+  if (ib.blend || frame_header->custom_size_or_origin) {
+    // Set blend_channel to the first alpha channel. These values are only
+    // encoded in case a blend mode involving alpha is used and there are more
+    // than one extra channels.
+    size_t index = 0;
+    if (frame_info.alpha_channel == -1) {
+      if (extra_channels.size() > 1) {
+        for (size_t i = 0; i < extra_channels.size(); i++) {
+          if (extra_channels[i].type == ExtraChannel::kAlpha) {
+            index = i;
+            break;
+          }
+        }
+      }
+    } else {
+      index = static_cast<size_t>(frame_info.alpha_channel);
+      JXL_ASSERT(index == 0 || index < extra_channels.size());
+    }
+    frame_header->blending_info.alpha_channel = index;
+    frame_header->blending_info.mode =
+        ib.blend ? ib.blendmode : BlendMode::kReplace;
+    frame_header->blending_info.source = frame_info.source;
+    frame_header->blending_info.clamp = frame_info.clamp;
+    const auto& extra_channel_info = frame_info.extra_channel_blending_info;
+    for (size_t i = 0; i < extra_channels.size(); i++) {
+      if (i < extra_channel_info.size()) {
+        frame_header->extra_channel_blending_info[i] = extra_channel_info[i];
+      } else {
+        frame_header->extra_channel_blending_info[i].alpha_channel = index;
+        BlendMode default_blend = ib.blendmode;
+        if (extra_channels[i].type != ExtraChannel::kBlack && i != index) {
+          // K needs to be blended, spot colors and other stuff gets added
+          default_blend = BlendMode::kAdd;
+        }
+        frame_header->extra_channel_blending_info[i].mode =
+            ib.blend ? default_blend : BlendMode::kReplace;
+        frame_header->extra_channel_blending_info[i].source = 1;
+      }
+    }
+  }
+
+  frame_header->animation_frame.duration = ib.duration;
+  frame_header->animation_frame.timecode = ib.timecode;
+
+  return true;
+}
+
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      float* JXL_RESTRICT row = image->PlaneRow(c, y);
+      const float* JXL_RESTRICT prow =
+          (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+      const float* JXL_RESTRICT nrow =
+          (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+      const float* JXL_RESTRICT a = alpha.Row(y);
+      const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+      const float* JXL_RESTRICT na =
+          (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        if (a[x] == 0) {
+          if (lossless) {
+            row[x] = 0;
+            continue;
+          }
+          float d = 0.f;
+          row[x] = 0;
+          if (x > 0) {
+            row[x] += row[x - 1];
+            d++;
+            if (a[x - 1] > 0.f) {
+              row[x] += row[x - 1];
+              d++;
+            }
+          }
+          if (x + 1 < image->xsize()) {
+            if (y > 0) {
+              row[x] += prow[x + 1];
+              d++;
+            }
+            if (a[x + 1] > 0.f) {
+              row[x] += 2.f * row[x + 1];
+              d += 2.f;
+            }
+            if (y > 0 && pa[x + 1] > 0.f) {
+              row[x] += 2.f * prow[x + 1];
+              d += 2.f;
+            }
+            if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+              row[x] += 2.f * nrow[x + 1];
+              d += 2.f;
+            }
+          }
+          if (y > 0 && pa[x] > 0.f) {
+            row[x] += 2.f * prow[x];
+            d += 2.f;
+          }
+          if (y + 1 < image->ysize() && na[x] > 0.f) {
+            row[x] += 2.f * nrow[x];
+            d += 2.f;
+          }
+          if (d > 1.f) row[x] /= d;
+        }
+      }
+    }
+  }
+}
+
+struct PixelStatsForChromacityAdjustment {
+  float dx = 0;
+  float db = 0;
+  float exposed_blue = 0;
+  float CalcPlane(const ImageF* JXL_RESTRICT plane) const {
+    float xmax = 0;
+    float ymax = 0;
+    for (size_t ty = 1; ty < plane->ysize(); ++ty) {
+      for (size_t tx = 1; tx < plane->xsize(); ++tx) {
+        float cur = plane->Row(ty)[tx];
+        float prev_row = plane->Row(ty - 1)[tx];
+        float prev = plane->Row(ty)[tx - 1];
+        xmax = std::max(xmax, std::abs(cur - prev));
+        ymax = std::max(ymax, std::abs(cur - prev_row));
+      }
+    }
+    return std::max(xmax, ymax);
+  }
+  void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y,
+                       const ImageF* JXL_RESTRICT plane_b) {
+    float eb = 0;
+    float xmax = 0;
+    float ymax = 0;
+    for (size_t ty = 1; ty < plane_y->ysize(); ++ty) {
+      for (size_t tx = 1; tx < plane_y->xsize(); ++tx) {
+        float cur_y = plane_y->Row(ty)[tx];
+        float cur_b = plane_b->Row(ty)[tx];
+        float exposed_b = cur_b - cur_y * 1.2;
+        float diff_b = cur_b - cur_y;
+        float prev_row = plane_b->Row(ty - 1)[tx];
+        float prev = plane_b->Row(ty)[tx - 1];
+        float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx];
+        float diff_prev = prev - plane_y->Row(ty)[tx - 1];
+        xmax = std::max(xmax, std::abs(diff_b - diff_prev));
+        ymax = std::max(ymax, std::abs(diff_b - diff_prev_row));
+        if (exposed_b >= 0) {
+          exposed_b *= fabs(cur_b - prev) + fabs(cur_b - prev_row);
+          eb = std::max(eb, exposed_b);
+        }
+      }
+    }
+    exposed_blue = eb;
+    db = std::max(xmax, ymax);
+  }
+  void Calc(const Image3F* JXL_RESTRICT opsin) {
+    dx = CalcPlane(&opsin->Plane(0));
+    CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2));
+  }
+  int HowMuchIsXChannelPixelized() {
+    if (dx >= 0.03) {
+      return 2;
+    }
+    if (dx >= 0.017) {
+      return 1;
+    }
+    return 0;
+  }
+  int HowMuchIsBChannelPixelized() {
+    int add = exposed_blue >= 0.13 ? 1 : 0;
+    if (db > 0.38) {
+      return 2 + add;
+    }
+    if (db > 0.33) {
+      return 1 + add;
+    }
+    if (db > 0.28) {
+      return add;
+    }
+    return 0;
+  }
+};
+
+}  // namespace
+
+class LossyFrameEncoder {
+ public:
+  LossyFrameEncoder(const CompressParams& cparams,
+                    const FrameHeader& frame_header,
+                    PassesEncoderState* JXL_RESTRICT enc_state,
+                    const JxlCmsInterface& cms, ThreadPool* pool,
+                    AuxOut* aux_out)
+      : enc_state_(enc_state), cms_(cms), pool_(pool), aux_out_(aux_out) {
+    JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state_->shared,
+                                          /*encoder=*/true));
+    enc_state_->cparams = cparams;
+    enc_state_->passes.clear();
+  }
+
+  Status ComputeEncodingData(const ImageBundle* linear,
+                             Image3F* JXL_RESTRICT opsin,
+                             const JxlCmsInterface& cms, ThreadPool* pool,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             FrameHeader* frame_header) {
+    JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 &&
+               (opsin->ysize() % kBlockDim) == 0);
+    PassesSharedState& shared = enc_state_->shared;
+
+    if (!enc_state_->cparams.max_error_mode) {
+      // Compute chromacity adjustments using two approaches.
+      // 1) Distance based approach for chromacity adjustment:
+      float x_qm_scale_steps[4] = {1.25f, 7.0f, 15.0f, 24.0f};
+      shared.frame_header.x_qm_scale = 2;
+      for (float x_qm_scale_step : x_qm_scale_steps) {
+        if (enc_state_->cparams.original_butteraugli_distance >
+            x_qm_scale_step) {
+          shared.frame_header.x_qm_scale++;
+        }
+      }
+      if (enc_state_->cparams.butteraugli_distance < 0.299f) {
+        // Favor chromacity preservation for making images appear more
+        // faithful to original even with extreme (5-10x) zooming.
+        shared.frame_header.x_qm_scale++;
+      }
+      // 2) Pixel-based approach for chromacity adjustment:
+      // look at the individual pixels and make a guess how difficult
+      // the image would be based on the worst case pixel.
+      PixelStatsForChromacityAdjustment pixel_stats;
+      if (enc_state_->cparams.speed_tier <= SpeedTier::kWombat) {
+        pixel_stats.Calc(opsin);
+      }
+      // For X take the most severe adjustment.
+      shared.frame_header.x_qm_scale =
+          std::max<int>(shared.frame_header.x_qm_scale,
+                        2 + pixel_stats.HowMuchIsXChannelPixelized());
+      // B only ajudsted by pixel-based approach.
+      shared.frame_header.b_qm_scale =
+          2 + pixel_stats.HowMuchIsBChannelPixelized();
+    }
+
+    JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics(
+        enc_state_, modular_frame_encoder, linear, opsin, cms_, pool_,
+        aux_out_));
+
+    JXL_RETURN_IF_ERROR(InitializePassesEncoder(
+        *opsin, cms, pool_, enc_state_, modular_frame_encoder, aux_out_));
+
+    enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+    for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+      pass.ac_tokens.resize(shared.frame_dim.num_groups);
+    }
+
+    ComputeAllCoeffOrders(shared.frame_dim);
+    shared.num_histograms = 1;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+      group_caches_.resize(num_threads);
+      return true;
+    };
+    const auto tokenize_group = [&](const uint32_t group_index,
+                                    const size_t thread) {
+      // Tokenize coefficients.
+      const Rect rect = shared.BlockGroupRect(group_index);
+      for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+           idx_pass++) {
+        JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+        const int32_t* JXL_RESTRICT ac_rows[3] = {
+            enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+        };
+        // Ensure group cache is initialized.
+        group_caches_[thread].InitOnce();
+        TokenizeCoefficients(
+            &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+            ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+            &group_caches_[thread].num_nzeroes,
+            &enc_state_->passes[idx_pass].ac_tokens[group_index],
+            enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+            enc_state_->shared.block_ctx_map);
+      }
+    };
+    JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups,
+                                  tokenize_group_init, tokenize_group,
+                                  "TokenizeGroup"));
+
+    *frame_header = shared.frame_header;
+    return true;
+  }
+
+  Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
+                                    ModularFrameEncoder* modular_frame_encoder,
+                                    FrameHeader* frame_header) {
+    PassesSharedState& shared = enc_state_->shared;
+
+    frame_header->x_qm_scale = 2;
+    frame_header->b_qm_scale = 2;
+
+    FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+    const size_t xsize = frame_dim.xsize_padded;
+    const size_t ysize = frame_dim.ysize_padded;
+    const size_t xsize_blocks = frame_dim.xsize_blocks;
+    const size_t ysize_blocks = frame_dim.ysize_blocks;
+
+    // no-op chroma from luma
+    shared.cmap = ColorCorrelationMap(xsize, ysize, false);
+    shared.ac_strategy.FillDCT8();
+    FillImage(uint8_t(0), &shared.epf_sharpness);
+
+    enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+    for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+      pass.ac_tokens.resize(shared.frame_dim.num_groups);
+    }
+
+    enc_state_->coeffs.clear();
+    while (enc_state_->coeffs.size() < enc_state_->passes.size()) {
+      enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+          kGroupDim * kGroupDim, frame_dim.num_groups));
+    }
+
+    // convert JPEG quantization table to a Quantizer object
+    float dcquantization[3];
+    std::vector<QuantEncoding> qe(DequantMatrices::kNum,
+                                  QuantEncoding::Library(0));
+
+    auto jpeg_c_map = JpegOrder(frame_header->color_transform,
+                                jpeg_data.components.size() == 1);
+
+    std::vector<int> qt(192);
+    for (size_t c = 0; c < 3; c++) {
+      size_t jpeg_c = jpeg_c_map[c];
+      const int32_t* quant =
+          jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data();
+
+      dcquantization[c] = 255 * 8.0f / quant[0];
+      for (size_t y = 0; y < 8; y++) {
+        for (size_t x = 0; x < 8; x++) {
+          // JPEG XL transposes the DCT, JPEG doesn't.
+          qt[c * 64 + 8 * x + y] = quant[8 * y + x];
+        }
+      }
+    }
+    DequantMatricesSetCustomDC(&shared.matrices, dcquantization);
+    float dcquantization_r[3] = {1.0f / dcquantization[0],
+                                 1.0f / dcquantization[1],
+                                 1.0f / dcquantization[2]};
+
+    qe[AcStrategy::Type::DCT] = QuantEncoding::RAW(qt);
+    DequantMatricesSetCustom(&shared.matrices, qe, modular_frame_encoder);
+
+    // Ensure that InvGlobalScale() is 1.
+    shared.quantizer = Quantizer(&shared.matrices, 1, kGlobalScaleDenom);
+    // Recompute MulDC() and InvMulDC().
+    shared.quantizer.RecomputeFromGlobalScale();
+
+    // Per-block dequant scaling should be 1.
+    FillImage(static_cast<int32_t>(shared.quantizer.InvGlobalScale()),
+              &shared.raw_quant_field);
+
+    std::vector<int32_t> scaled_qtable(192);
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t i = 0; i < 64; i++) {
+        scaled_qtable[64 * c + i] =
+            (1 << kCFLFixedPointPrecision) * qt[64 + i] / qt[64 * c + i];
+      }
+    }
+
+    auto jpeg_row = [&](size_t c, size_t y) {
+      return jpeg_data.components[jpeg_c_map[c]].coeffs.data() +
+             jpeg_data.components[jpeg_c_map[c]].width_in_blocks *
+                 kDCTBlockSize * y;
+    };
+
+    Image3F dc = Image3F(xsize_blocks, ysize_blocks);
+    bool DCzero =
+        (shared.frame_header.color_transform == ColorTransform::kYCbCr);
+    // Compute chroma-from-luma for AC (doesn't seem to be useful for DC)
+    if (frame_header->chroma_subsampling.Is444() &&
+        enc_state_->cparams.force_cfl_jpeg_recompression &&
+        jpeg_data.components.size() == 3) {
+      for (size_t c : {0, 2}) {
+        ImageSB* map = (c == 0 ? &shared.cmap.ytox_map : &shared.cmap.ytob_map);
+        const float kScale = kDefaultColorFactor;
+        const int kOffset = 127;
+        const float kBase =
+            c == 0 ? shared.cmap.YtoXRatio(0) : shared.cmap.YtoBRatio(0);
+        const float kZeroThresh =
+            kScale * kZeroBiasDefault[c] *
+            0.9999f;  // just epsilon less for better rounding
+
+        auto process_row = [&](const uint32_t task, const size_t thread) {
+          size_t ty = task;
+          int8_t* JXL_RESTRICT row_out = map->Row(ty);
+          for (size_t tx = 0; tx < map->xsize(); ++tx) {
+            const size_t y0 = ty * kColorTileDimInBlocks;
+            const size_t x0 = tx * kColorTileDimInBlocks;
+            const size_t y1 = std::min(frame_dim.ysize_blocks,
+                                       (ty + 1) * kColorTileDimInBlocks);
+            const size_t x1 = std::min(frame_dim.xsize_blocks,
+                                       (tx + 1) * kColorTileDimInBlocks);
+            int32_t d_num_zeros[257] = {0};
+            // TODO(veluca): this needs SIMD + fixed point adaptation, and/or
+            // conversion to the new CfL algorithm.
+            for (size_t y = y0; y < y1; ++y) {
+              const int16_t* JXL_RESTRICT row_m = jpeg_row(1, y);
+              const int16_t* JXL_RESTRICT row_s = jpeg_row(c, y);
+              for (size_t x = x0; x < x1; ++x) {
+                for (size_t coeffpos = 1; coeffpos < kDCTBlockSize;
+                     coeffpos++) {
+                  const float scaled_m =
+                      row_m[x * kDCTBlockSize + coeffpos] *
+                      scaled_qtable[64 * c + coeffpos] *
+                      (1.0f / (1 << kCFLFixedPointPrecision));
+                  const float scaled_s =
+                      kScale * row_s[x * kDCTBlockSize + coeffpos] +
+                      (kOffset - kBase * kScale) * scaled_m;
+                  if (std::abs(scaled_m) > 1e-8f) {
+                    float from, to;
+                    if (scaled_m > 0) {
+                      from = (scaled_s - kZeroThresh) / scaled_m;
+                      to = (scaled_s + kZeroThresh) / scaled_m;
+                    } else {
+                      from = (scaled_s + kZeroThresh) / scaled_m;
+                      to = (scaled_s - kZeroThresh) / scaled_m;
+                    }
+                    if (from < 0.0f) {
+                      from = 0.0f;
+                    }
+                    if (to > 255.0f) {
+                      to = 255.0f;
+                    }
+                    // Instead of clamping the both values
+                    // we just check that range is sane.
+                    if (from <= to) {
+                      d_num_zeros[static_cast<int>(std::ceil(from))]++;
+                      d_num_zeros[static_cast<int>(std::floor(to + 1))]--;
+                    }
+                  }
+                }
+              }
+            }
+            int best = 0;
+            int32_t best_sum = 0;
+            FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum);
+            int32_t offset_sum = 0;
+            for (int i = 0; i < 256; ++i) {
+              if (i <= kOffset) {
+                offset_sum += d_num_zeros[i];
+              }
+            }
+            row_out[tx] = 0;
+            if (best_sum > offset_sum + 1) {
+              row_out[tx] = best - kOffset;
+            }
+          }
+        };
+
+        JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, map->ysize(),
+                                      ThreadPool::NoInit, process_row,
+                                      "FindCorrelation"));
+      }
+    }
+
+    if (!frame_header->chroma_subsampling.Is444()) {
+      ZeroFillImage(&dc);
+      for (auto& coeff : enc_state_->coeffs) {
+        coeff->ZeroFill();
+      }
+    }
+    // JPEG DC is from -1024 to 1023.
+    std::vector<size_t> dc_counts[3] = {};
+    dc_counts[0].resize(2048);
+    dc_counts[1].resize(2048);
+    dc_counts[2].resize(2048);
+    size_t total_dc[3] = {};
+    for (size_t c : {1, 0, 2}) {
+      if (jpeg_data.components.size() == 1 && c != 1) {
+        for (auto& coeff : enc_state_->coeffs) {
+          coeff->ZeroFillPlane(c);
+        }
+        ZeroFillImage(&dc.Plane(c));
+        // Ensure no division by 0.
+        dc_counts[c][1024] = 1;
+        total_dc[c] = 1;
+        continue;
+      }
+      size_t hshift = frame_header->chroma_subsampling.HShift(c);
+      size_t vshift = frame_header->chroma_subsampling.VShift(c);
+      ImageSB& map = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map);
+      for (size_t group_index = 0; group_index < frame_dim.num_groups;
+           group_index++) {
+        const size_t gx = group_index % frame_dim.xsize_groups;
+        const size_t gy = group_index / frame_dim.xsize_groups;
+        int32_t* coeffs[kMaxNumPasses];
+        for (size_t i = 0; i < enc_state_->coeffs.size(); i++) {
+          coeffs[i] = enc_state_->coeffs[i]->PlaneRow(c, group_index, 0).ptr32;
+        }
+        int32_t block[64];
+        for (size_t by = gy * kGroupDimInBlocks;
+             by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) {
+          if ((by >> vshift) << vshift != by) continue;
+          const int16_t* JXL_RESTRICT inputjpeg = jpeg_row(c, by >> vshift);
+          const int16_t* JXL_RESTRICT inputjpegY = jpeg_row(1, by);
+          float* JXL_RESTRICT fdc = dc.PlaneRow(c, by >> vshift);
+          const int8_t* JXL_RESTRICT cm =
+              map.ConstRow(by / kColorTileDimInBlocks);
+          for (size_t bx = gx * kGroupDimInBlocks;
+               bx < xsize_blocks && bx < (gx + 1) * kGroupDimInBlocks; ++bx) {
+            if ((bx >> hshift) << hshift != bx) continue;
+            size_t base = (bx >> hshift) * kDCTBlockSize;
+            int idc;
+            if (DCzero) {
+              idc = inputjpeg[base];
+            } else {
+              idc = inputjpeg[base] + 1024 / qt[c * 64];
+            }
+            dc_counts[c][std::min(static_cast<uint32_t>(idc + 1024),
+                                  uint32_t(2047))]++;
+            total_dc[c]++;
+            fdc[bx >> hshift] = idc * dcquantization_r[c];
+            if (c == 1 || !enc_state_->cparams.force_cfl_jpeg_recompression ||
+                !frame_header->chroma_subsampling.Is444()) {
+              for (size_t y = 0; y < 8; y++) {
+                for (size_t x = 0; x < 8; x++) {
+                  block[y * 8 + x] = inputjpeg[base + x * 8 + y];
+                }
+              }
+            } else {
+              const int32_t scale =
+                  shared.cmap.RatioJPEG(cm[bx / kColorTileDimInBlocks]);
+
+              for (size_t y = 0; y < 8; y++) {
+                for (size_t x = 0; x < 8; x++) {
+                  int Y = inputjpegY[kDCTBlockSize * bx + x * 8 + y];
+                  int QChroma = inputjpeg[kDCTBlockSize * bx + x * 8 + y];
+                  // Fixed-point multiply of CfL scale with quant table ratio
+                  // first, and Y value second.
+                  int coeff_scale = (scale * scaled_qtable[64 * c + y * 8 + x] +
+                                     (1 << (kCFLFixedPointPrecision - 1))) >>
+                                    kCFLFixedPointPrecision;
+                  int cfl_factor = (Y * coeff_scale +
+                                    (1 << (kCFLFixedPointPrecision - 1))) >>
+                                   kCFLFixedPointPrecision;
+                  int QCR = QChroma - cfl_factor;
+                  block[y * 8 + x] = QCR;
+                }
+              }
+            }
+            enc_state_->progressive_splitter.SplitACCoefficients(
+                block, AcStrategy::FromRawStrategy(AcStrategy::Type::DCT), bx,
+                by, coeffs);
+            for (size_t i = 0; i < enc_state_->coeffs.size(); i++) {
+              coeffs[i] += kDCTBlockSize;
+            }
+          }
+        }
+      }
+    }
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& num_dc_ctxs = enc_state_->shared.block_ctx_map.num_dc_ctxs;
+    num_dc_ctxs = 1;
+    for (size_t i = 0; i < 3; i++) {
+      dct[i].clear();
+      int num_thresholds = (CeilLog2Nonzero(total_dc[i]) - 12) / 2;
+      // up to 3 buckets per channel:
+      // dark/medium/bright, yellow/unsat/blue, green/unsat/red
+      num_thresholds = std::min(std::max(num_thresholds, 0), 2);
+      size_t cumsum = 0;
+      size_t cut = total_dc[i] / (num_thresholds + 1);
+      for (int j = 0; j < 2048; j++) {
+        cumsum += dc_counts[i][j];
+        if (cumsum > cut) {
+          dct[i].push_back(j - 1025);
+          cut = total_dc[i] * (dct[i].size() + 1) / (num_thresholds + 1);
+        }
+      }
+      num_dc_ctxs *= dct[i].size() + 1;
+    }
+
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+    ctx_map.clear();
+    ctx_map.resize(3 * kNumOrders * num_dc_ctxs, 0);
+
+    int lbuckets = (dct[1].size() + 1);
+    for (size_t i = 0; i < num_dc_ctxs; i++) {
+      // up to 9 contexts for luma
+      ctx_map[i] = i / lbuckets;
+      // up to 3 contexts for chroma
+      ctx_map[kNumOrders * num_dc_ctxs + i] =
+          ctx_map[2 * kNumOrders * num_dc_ctxs + i] =
+              num_dc_ctxs / lbuckets + (i % lbuckets);
+    }
+    enc_state_->shared.block_ctx_map.num_ctxs =
+        *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+
+    enc_state_->histogram_idx.resize(shared.frame_dim.num_groups);
+
+    // disable DC frame for now
+    shared.frame_header.UpdateFlag(false, FrameHeader::kUseDcFrame);
+    auto compute_dc_coeffs = [&](const uint32_t group_index,
+                                 size_t /* thread */) {
+      modular_frame_encoder->AddVarDCTDC(dc, group_index, /*nl_dc=*/false,
+                                         enc_state_, /*jpeg_transcode=*/true);
+      modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/true,
+                                           enc_state_);
+    };
+    JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_dc_groups,
+                                  ThreadPool::NoInit, compute_dc_coeffs,
+                                  "Compute DC coeffs"));
+
+    // Must happen before WriteFrameHeader!
+    shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing);
+
+    ComputeAllCoeffOrders(frame_dim);
+    shared.num_histograms = 1;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+      group_caches_.resize(num_threads);
+      return true;
+    };
+    const auto tokenize_group = [&](const uint32_t group_index,
+                                    const size_t thread) {
+      // Tokenize coefficients.
+      const Rect rect = shared.BlockGroupRect(group_index);
+      for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+           idx_pass++) {
+        JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+        const int32_t* JXL_RESTRICT ac_rows[3] = {
+            enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+        };
+        // Ensure group cache is initialized.
+        group_caches_[thread].InitOnce();
+        TokenizeCoefficients(
+            &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+            ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+            &group_caches_[thread].num_nzeroes,
+            &enc_state_->passes[idx_pass].ac_tokens[group_index],
+            enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+            enc_state_->shared.block_ctx_map);
+      }
+    };
+    JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups,
+                                  tokenize_group_init, tokenize_group,
+                                  "TokenizeGroup"));
+    *frame_header = shared.frame_header;
+    doing_jpeg_recompression = true;
+    return true;
+  }
+
+  Status EncodeGlobalDCInfo(const FrameHeader& frame_header,
+                            BitWriter* writer) const {
+    // Encode quantizer DC and global scale.
+    QuantizerParams params = enc_state_->shared.quantizer.GetParams();
+    JXL_RETURN_IF_ERROR(
+        WriteQuantizerParams(params, writer, kLayerQuant, aux_out_));
+    EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_);
+    ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC,
+                                aux_out_);
+    return true;
+  }
+
+  Status EncodeGlobalACInfo(BitWriter* writer,
+                            ModularFrameEncoder* modular_frame_encoder) {
+    JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices,
+                                              writer, kLayerQuant, aux_out_,
+                                              modular_frame_encoder));
+    size_t num_histo_bits =
+        CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+    if (num_histo_bits != 0) {
+      BitWriter::Allotment allotment(writer, num_histo_bits);
+      writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+      allotment.ReclaimAndCharge(writer, kLayerAC, aux_out_);
+    }
+
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+         i++) {
+      // Encode coefficient orders.
+      size_t order_bits = 0;
+      JXL_RETURN_IF_ERROR(U32Coder::CanEncode(
+          kOrderEnc, enc_state_->used_orders[i], &order_bits));
+      BitWriter::Allotment allotment(writer, order_bits);
+      JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer));
+      allotment.ReclaimAndCharge(writer, kLayerOrder, aux_out_);
+      EncodeCoeffOrders(
+          enc_state_->used_orders[i],
+          &enc_state_->shared
+               .coeff_orders[i * enc_state_->shared.coeff_order_size],
+          writer, kLayerOrder, aux_out_);
+
+      // Encode histograms.
+      HistogramParams hist_params(
+          enc_state_->cparams.speed_tier,
+          enc_state_->shared.block_ctx_map.NumACContexts());
+      if (enc_state_->cparams.speed_tier > SpeedTier::kTortoise) {
+        hist_params.lz77_method = HistogramParams::LZ77Method::kNone;
+      }
+      if (enc_state_->cparams.decoding_speed_tier >= 1) {
+        hist_params.max_histograms = 6;
+      }
+      BuildAndEncodeHistograms(
+          hist_params,
+          enc_state_->shared.num_histograms *
+              enc_state_->shared.block_ctx_map.NumACContexts(),
+          enc_state_->passes[i].ac_tokens, &enc_state_->passes[i].codes,
+          &enc_state_->passes[i].context_map, writer, kLayerAC, aux_out_);
+    }
+
+    return true;
+  }
+
+  Status EncodeACGroup(size_t pass, size_t group_index, BitWriter* group_code,
+                       AuxOut* local_aux_out) {
+    return EncodeGroupTokenizedCoefficients(
+        group_index, pass, enc_state_->histogram_idx[group_index], *enc_state_,
+        group_code, local_aux_out);
+  }
+
+  PassesEncoderState* State() { return enc_state_; }
+
+ private:
+  void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) {
+    // No coefficient reordering in Falcon or faster.
+    auto used_orders_info = ComputeUsedOrders(
+        enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy,
+        Rect(enc_state_->shared.raw_quant_field));
+    enc_state_->used_orders.clear();
+    enc_state_->used_orders.resize(
+        enc_state_->progressive_splitter.GetNumPasses(),
+        used_orders_info.second);
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+         i++) {
+      ComputeCoeffOrder(
+          enc_state_->cparams.speed_tier, *enc_state_->coeffs[i],
+          enc_state_->shared.ac_strategy, frame_dim, enc_state_->used_orders[i],
+          used_orders_info.first,
+          &enc_state_->shared
+               .coeff_orders[i * enc_state_->shared.coeff_order_size]);
+    }
+  }
+
+  template <typename V, typename R>
+  static inline void FindIndexOfSumMaximum(const V* array, const size_t len,
+                                           R* idx, V* sum) {
+    JXL_ASSERT(len > 0);
+    V maxval = 0;
+    V val = 0;
+    R maxidx = 0;
+    for (size_t i = 0; i < len; ++i) {
+      val += array[i];
+      if (val > maxval) {
+        maxval = val;
+        maxidx = i;
+      }
+    }
+    *idx = maxidx;
+    *sum = maxval;
+  }
+
+  PassesEncoderState* JXL_RESTRICT enc_state_;
+  JxlCmsInterface cms_;
+  ThreadPool* pool_;
+  AuxOut* aux_out_;
+  std::vector<EncCache> group_caches_;
+  bool doing_jpeg_recompression = false;
+};
+
+Status ParamsPostInit(CompressParams* p) {
+  if (!p->manual_noise.empty() &&
+      p->manual_noise.size() != NoiseParams::kNumNoisePoints) {
+    return JXL_FAILURE("Invalid number of noise lut entries");
+  }
+  if (!p->manual_xyb_factors.empty() && p->manual_xyb_factors.size() != 3) {
+    return JXL_FAILURE("Invalid number of XYB quantization factors");
+  }
+  if (!p->modular_mode && p->butteraugli_distance == 0.0) {
+    p->butteraugli_distance = kMinButteraugliDistance;
+  }
+  if (p->original_butteraugli_distance == -1.0) {
+    p->original_butteraugli_distance = p->butteraugli_distance;
+  }
+  if (p->resampling <= 0) {
+    p->resampling = 1;
+    // For very low bit rates, using 2x2 resampling gives better results on
+    // most photographic images, with an adjusted butteraugli score chosen to
+    // give roughly the same amount of bits per pixel.
+    if (!p->already_downsampled && p->butteraugli_distance >= 20) {
+      p->resampling = 2;
+      p->butteraugli_distance = 6 + ((p->butteraugli_distance - 20) * 0.25);
+    }
+  }
+  if (p->ec_resampling <= 0) {
+    p->ec_resampling = p->resampling;
+  }
+  return true;
+}
+
+Status EncodeFrame(const CompressParams& cparams_orig,
+                   const FrameInfo& frame_info, const CodecMetadata* metadata,
+                   const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+                   const JxlCmsInterface& cms, ThreadPool* pool,
+                   BitWriter* writer, AuxOut* aux_out) {
+  CompressParams cparams = cparams_orig;
+  if (cparams.speed_tier == SpeedTier::kGlacier && !cparams.IsLossless()) {
+    cparams.speed_tier = SpeedTier::kTortoise;
+  }
+  if (cparams.speed_tier == SpeedTier::kGlacier) {
+    std::vector<CompressParams> all_params;
+    std::vector<size_t> size;
+
+    CompressParams cparams_attempt = cparams_orig;
+    cparams_attempt.speed_tier = SpeedTier::kTortoise;
+    cparams_attempt.options.max_properties = 4;
+
+    for (float x : {0.0f, 80.f}) {
+      cparams_attempt.channel_colors_percent = x;
+      for (float y : {0.0f, 95.0f}) {
+        cparams_attempt.channel_colors_pre_transform_percent = y;
+        // 70000 ensures that the number of palette colors is representable in
+        // modular headers.
+        for (int K : {0, 1 << 10, 70000}) {
+          cparams_attempt.palette_colors = K;
+          for (int tree_mode : {-1, (int)ModularOptions::TreeMode::kNoWP,
+                                (int)ModularOptions::TreeMode::kDefault}) {
+            if (tree_mode == -1) {
+              // LZ77 only
+              cparams_attempt.options.nb_repeats = 0;
+            } else {
+              cparams_attempt.options.nb_repeats = 1;
+              cparams_attempt.options.wp_tree_mode =
+                  static_cast<ModularOptions::TreeMode>(tree_mode);
+            }
+            for (Predictor pred : {Predictor::Zero, Predictor::Variable}) {
+              cparams_attempt.options.predictor = pred;
+              for (int g : {0, -1, 3}) {
+                cparams_attempt.modular_group_size_shift = g;
+                for (Override patches : {Override::kDefault, Override::kOff}) {
+                  cparams_attempt.patches = patches;
+                  all_params.push_back(cparams_attempt);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    size.resize(all_params.size());
+
+    std::atomic<int> num_errors{0};
+
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, all_params.size(), ThreadPool::NoInit,
+        [&](size_t task, size_t) {
+          BitWriter w;
+          PassesEncoderState state;
+          if (!EncodeFrame(all_params[task], frame_info, metadata, ib, &state,
+                           cms, nullptr, &w, aux_out)) {
+            num_errors.fetch_add(1, std::memory_order_relaxed);
+            return;
+          }
+          size[task] = w.BitsWritten();
+        },
+        "Compress kGlacier"));
+    JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    size_t best_idx = 0;
+    for (size_t i = 1; i < all_params.size(); i++) {
+      if (size[best_idx] > size[i]) {
+        best_idx = i;
+      }
+    }
+    cparams = all_params[best_idx];
+  }
+
+  ib.VerifyMetadata();
+
+  passes_enc_state->special_frames.clear();
+
+  if (cparams.qprogressive_mode) {
+    passes_enc_state->progressive_splitter.SetProgressiveMode(
+        ProgressiveMode{progressive_passes_dc_quant_ac_full_ac});
+  } else if (cparams.progressive_mode) {
+    passes_enc_state->progressive_splitter.SetProgressiveMode(
+        ProgressiveMode{progressive_passes_dc_vlf_lf_full_ac});
+  }
+
+  JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams));
+
+  if (cparams.progressive_dc < 0) {
+    if (cparams.progressive_dc != -1) {
+      return JXL_FAILURE("Invalid progressive DC setting value (%d)",
+                         cparams.progressive_dc);
+    }
+    cparams.progressive_dc = 0;
+  }
+  if (cparams.ec_resampling < cparams.resampling) {
+    cparams.ec_resampling = cparams.resampling;
+  }
+  if (cparams.resampling > 1 || frame_info.is_preview) {
+    cparams.progressive_dc = 0;
+  }
+
+  if (frame_info.dc_level + cparams.progressive_dc > 4) {
+    return JXL_FAILURE("Too many levels of progressive DC");
+  }
+
+  if (cparams.butteraugli_distance != 0 &&
+      cparams.butteraugli_distance < kMinButteraugliDistance) {
+    return JXL_FAILURE("Butteraugli distance is too low (%f)",
+                       cparams.butteraugli_distance);
+  }
+
+  if (ib.IsJPEG()) {
+    cparams.gaborish = Override::kOff;
+    cparams.epf = 0;
+    cparams.modular_mode = false;
+  }
+
+  if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image");
+
+  // Assert that this metadata is correctly set up for the compression params,
+  // this should have been done by enc_file.cc
+  JXL_ASSERT(metadata->m.xyb_encoded ==
+             (cparams.color_transform == ColorTransform::kXYB));
+  std::unique_ptr<FrameHeader> frame_header =
+      jxl::make_unique<FrameHeader>(metadata);
+  JXL_RETURN_IF_ERROR(MakeFrameHeader(cparams,
+                                      passes_enc_state->progressive_splitter,
+                                      frame_info, ib, frame_header.get()));
+  // Check that if the codestream header says xyb_encoded, the color_transform
+  // matches the requirement. This is checked from the cparams here, even though
+  // optimally we'd be able to check this against what has actually been written
+  // in the main codestream header, but since ib is a const object and the data
+  // written to the main codestream header is (in modified form) in ib, the
+  // encoder cannot indicate this fact in the ib's metadata.
+  if (cparams_orig.color_transform == ColorTransform::kXYB) {
+    if (frame_header->color_transform != ColorTransform::kXYB) {
+      return JXL_FAILURE(
+          "The color transform of frames must be xyb if the codestream is xyb "
+          "encoded");
+    }
+  } else {
+    if (frame_header->color_transform == ColorTransform::kXYB) {
+      return JXL_FAILURE(
+          "The color transform of frames cannot be xyb if the codestream is "
+          "not xyb encoded");
+    }
+  }
+
+  FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+  const size_t num_groups = frame_dim.num_groups;
+
+  Image3F opsin;
+  const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+  std::unique_ptr<ImageMetadata> metadata_linear =
+      jxl::make_unique<ImageMetadata>();
+  metadata_linear->xyb_encoded =
+      (cparams.color_transform == ColorTransform::kXYB);
+  metadata_linear->color_encoding = c_linear;
+  ImageBundle linear_storage(metadata_linear.get());
+
+  std::vector<AuxOut> aux_outs;
+  // LossyFrameEncoder stores a reference to a std::function<Status(size_t)>
+  // so we need to keep the std::function<Status(size_t)> being referenced
+  // alive while lossy_frame_encoder is used. We could make resize_aux_outs a
+  // lambda type by making LossyFrameEncoder a template instead, but this is
+  // simpler.
+  const std::function<Status(size_t)> resize_aux_outs =
+      [&aux_outs, aux_out](const size_t num_threads) -> Status {
+    if (aux_out != nullptr) {
+      size_t old_size = aux_outs.size();
+      for (size_t i = num_threads; i < old_size; i++) {
+        aux_out->Assimilate(aux_outs[i]);
+      }
+      aux_outs.resize(num_threads);
+    }
+    return true;
+  };
+
+  LossyFrameEncoder lossy_frame_encoder(cparams, *frame_header,
+                                        passes_enc_state, cms, pool, aux_out);
+  std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+      jxl::make_unique<ModularFrameEncoder>(*frame_header, cparams);
+
+  const std::vector<ImageF>* extra_channels = &ib.extra_channels();
+  std::vector<ImageF> extra_channels_storage;
+  // Clear patches
+  passes_enc_state->shared.image_features.patches = PatchDictionary();
+  passes_enc_state->shared.image_features.patches.SetPassesSharedState(
+      &passes_enc_state->shared);
+
+  if (ib.IsJPEG()) {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeJPEGTranscodingData(
+        *ib.jpeg_data, modular_frame_encoder.get(), frame_header.get()));
+  } else if (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion(
+                 cparams, ib) ||
+             frame_header->encoding != FrameEncoding::kVarDCT) {
+    // Allocating a large enough image avoids a copy when padding.
+    opsin =
+        Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+    opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+    const bool want_linear = frame_header->encoding == FrameEncoding::kVarDCT &&
+                             cparams.speed_tier <= SpeedTier::kKitten;
+    const ImageBundle* JXL_RESTRICT ib_or_linear = &ib;
+
+    if (frame_header->color_transform == ColorTransform::kXYB &&
+        frame_info.ib_needs_color_transform) {
+      // linear_storage would only be used by the Butteraugli loop (passing
+      // linear sRGB avoids a color conversion there). Otherwise, don't
+      // fill it to reduce memory usage.
+      ib_or_linear =
+          ToXYB(ib, pool, &opsin, cms, want_linear ? &linear_storage : nullptr);
+    } else {  // RGB or YCbCr: don't do anything (forward YCbCr is not
+              // implemented, this is only used when the input is already in
+              // YCbCr)
+              // If encoding a special DC or reference frame, don't do anything:
+              // input is already in XYB.
+      CopyImageTo(ib.color(), &opsin);
+    }
+    bool lossless = cparams.IsLossless();
+    if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() &&
+        frame_header->frame_type == FrameType::kRegularFrame &&
+        !ApplyOverride(cparams.keep_invisible, lossless) &&
+        cparams.ec_resampling == cparams.resampling) {
+      // simplify invisible pixels
+      SimplifyInvisible(&opsin, ib.alpha(), lossless);
+      if (want_linear) {
+        SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()),
+                          ib.alpha(), lossless);
+      }
+    }
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+      PadImageToBlockMultipleInPlace(&opsin);
+      JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+          ib_or_linear, &opsin, cms, pool, modular_frame_encoder.get(),
+          frame_header.get()));
+    } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) {
+      // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+      // after noise, if necessary.
+      DownsampleImage(&opsin, frame_header->upsampling);
+    }
+  } else {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+        &ib, &opsin, cms, pool, modular_frame_encoder.get(),
+        frame_header.get()));
+  }
+  if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
+    extra_channels = &extra_channels_storage;
+    for (const ImageF& ec : ib.extra_channels()) {
+      ImageF d_ec(ec.xsize(), ec.ysize());
+      CopyImageTo(ec, &d_ec);
+      DownsampleImage(&d_ec, cparams.ec_resampling);
+      extra_channels_storage.emplace_back(std::move(d_ec));
+    }
+  }
+  // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+  JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+      *frame_header, *ib.metadata(), &opsin, *extra_channels,
+      lossy_frame_encoder.State(), cms, pool, aux_out,
+      /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+
+  writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+  frame_header->UpdateFlag(
+      lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+      FrameHeader::kPatches);
+  frame_header->UpdateFlag(
+      lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+      FrameHeader::kSplines);
+  JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+  const size_t num_passes =
+      passes_enc_state->progressive_splitter.GetNumPasses();
+
+  // DC global info + DC groups + AC global info + AC groups *
+  // num_passes.
+  const bool has_ac_global = true;
+  std::vector<BitWriter> group_codes(NumTocEntries(frame_dim.num_groups,
+                                                   frame_dim.num_dc_groups,
+                                                   num_passes, has_ac_global));
+  const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+  const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+  const auto get_output = [&](const size_t index) {
+    return &group_codes[is_small_image ? 0 : index];
+  };
+  auto ac_group_code = [&](size_t pass, size_t group) {
+    return get_output(AcGroupIndex(pass, group, frame_dim.num_groups,
+                                   frame_dim.num_dc_groups, has_ac_global));
+  };
+
+  if (frame_header->flags & FrameHeader::kPatches) {
+    PatchDictionaryEncoder::Encode(
+        lossy_frame_encoder.State()->shared.image_features.patches,
+        get_output(0), kLayerDictionary, aux_out);
+  }
+
+  if (frame_header->flags & FrameHeader::kSplines) {
+    EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines,
+                  get_output(0), kLayerSplines, HistogramParams(), aux_out);
+  }
+
+  if (cparams.photon_noise_iso > 0) {
+    lossy_frame_encoder.State()->shared.image_features.noise_params =
+        SimulatePhotonNoise(ib.xsize(), ib.ysize(), cparams.photon_noise_iso);
+  }
+  if (cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) {
+    for (size_t i = 0; i < NoiseParams::kNumNoisePoints; i++) {
+      lossy_frame_encoder.State()->shared.image_features.noise_params.lut[i] =
+          cparams.manual_noise[i];
+    }
+  }
+  if (frame_header->flags & FrameHeader::kNoise) {
+    EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params,
+                get_output(0), kLayerNoise, aux_out);
+  }
+
+  JXL_RETURN_IF_ERROR(
+      DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices,
+                              get_output(0), kLayerQuant, aux_out));
+  if (frame_header->encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0)));
+  }
+  JXL_RETURN_IF_ERROR(
+      modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out));
+  JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
+      get_output(0), aux_out, kLayerModularGlobal, ModularStreamId::Global()));
+
+  const auto process_dc_group = [&](const uint32_t group_index,
+                                    const size_t thread) {
+    AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+    BitWriter* output = get_output(group_index + 1);
+    if (frame_header->encoding == FrameEncoding::kVarDCT &&
+        !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+      BitWriter::Allotment allotment(output, 2);
+      output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+      allotment.ReclaimAndCharge(output, kLayerDC, my_aux_out);
+      JXL_CHECK(modular_frame_encoder->EncodeStream(
+          output, my_aux_out, kLayerDC,
+          ModularStreamId::VarDCTDC(group_index)));
+    }
+    JXL_CHECK(modular_frame_encoder->EncodeStream(
+        output, my_aux_out, kLayerModularDcGroup,
+        ModularStreamId::ModularDC(group_index)));
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+      const Rect& rect =
+          lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+      size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+      if (nb_bits != 0) {
+        BitWriter::Allotment allotment(output, nb_bits);
+        output->Write(nb_bits,
+                      modular_frame_encoder->ac_metadata_size[group_index] - 1);
+        allotment.ReclaimAndCharge(output, kLayerControlFields, my_aux_out);
+      }
+      JXL_CHECK(modular_frame_encoder->EncodeStream(
+          output, my_aux_out, kLayerControlFields,
+          ModularStreamId::ACMetadata(group_index)));
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, frame_dim.num_dc_groups,
+                                resize_aux_outs, process_dc_group,
+                                "EncodeDCGroup"));
+
+  if (frame_header->encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalACInfo(
+        get_output(global_ac_index), modular_frame_encoder.get()));
+  }
+
+  std::atomic<int> num_errors{0};
+  const auto process_group = [&](const uint32_t group_index,
+                                 const size_t thread) {
+    AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+
+    for (size_t i = 0; i < num_passes; i++) {
+      if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        if (!lossy_frame_encoder.EncodeACGroup(
+                i, group_index, ac_group_code(i, group_index), my_aux_out)) {
+          num_errors.fetch_add(1, std::memory_order_relaxed);
+          return;
+        }
+      }
+      // Write all modular encoded data (color?, alpha, depth, extra channels)
+      if (!modular_frame_encoder->EncodeStream(
+              ac_group_code(i, group_index), my_aux_out, kLayerModularAcGroup,
+              ModularStreamId::ModularAC(group_index, i))) {
+        num_errors.fetch_add(1, std::memory_order_relaxed);
+        return;
+      }
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_groups, resize_aux_outs,
+                                process_group, "EncodeGroupCoefficients"));
+
+  // Resizing aux_outs to 0 also Assimilates the array.
+  static_cast<void>(resize_aux_outs(0));
+  JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+  for (BitWriter& bw : group_codes) {
+    BitWriter::Allotment allotment(&bw, 8);
+    bw.ZeroPadToByte();  // end of group.
+    allotment.ReclaimAndCharge(&bw, kLayerAC, aux_out);
+  }
+
+  std::vector<coeff_order_t>* permutation_ptr = nullptr;
+  std::vector<coeff_order_t> permutation;
+  if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) {
+    permutation_ptr = &permutation;
+    // Don't permute global DC/AC or DC.
+    permutation.resize(global_ac_index + 1);
+    std::iota(permutation.begin(), permutation.end(), 0);
+    std::vector<coeff_order_t> ac_group_order(num_groups);
+    std::iota(ac_group_order.begin(), ac_group_order.end(), 0);
+    size_t group_dim = frame_dim.group_dim;
+
+    // The center of the image is either given by parameters or chosen
+    // to be the middle of the image by default if center_x, center_y resp.
+    // are not provided.
+
+    int64_t imag_cx;
+    if (cparams.center_x != static_cast<size_t>(-1)) {
+      JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize());
+      imag_cx = cparams.center_x;
+    } else {
+      imag_cx = ib.xsize() / 2;
+    }
+
+    int64_t imag_cy;
+    if (cparams.center_y != static_cast<size_t>(-1)) {
+      JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize());
+      imag_cy = cparams.center_y;
+    } else {
+      imag_cy = ib.ysize() / 2;
+    }
+
+    // The center of the group containing the center of the image.
+    int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2;
+    int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2;
+    // This identifies in what area of the central group the center of the image
+    // lies in.
+    double direction = -std::atan2(imag_cy - cy, imag_cx - cx);
+    // This identifies the side of the central group the center of the image
+    // lies closest to. This can take values 0, 1, 2, 3 corresponding to left,
+    // bottom, right, top.
+    int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi;
+    auto get_distance_from_center = [&](size_t gid) {
+      Rect r = passes_enc_state->shared.GroupRect(gid);
+      int64_t gcx = r.x0() + group_dim / 2;
+      int64_t gcy = r.y0() + group_dim / 2;
+      int64_t dx = gcx - cx;
+      int64_t dy = gcy - cy;
+      // The angle is determined by taking atan2 and adding an appropriate
+      // starting point depending on the side we want to start on.
+      double angle = std::remainder(
+          std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi);
+      // Concentric squares in clockwise order.
+      return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle);
+    };
+    std::sort(ac_group_order.begin(), ac_group_order.end(),
+              [&](coeff_order_t a, coeff_order_t b) {
+                return get_distance_from_center(a) <
+                       get_distance_from_center(b);
+              });
+    std::vector<coeff_order_t> inv_ac_group_order(ac_group_order.size(), 0);
+    for (size_t i = 0; i < ac_group_order.size(); i++) {
+      inv_ac_group_order[ac_group_order[i]] = i;
+    }
+    for (size_t i = 0; i < num_passes; i++) {
+      size_t pass_start = permutation.size();
+      for (coeff_order_t v : inv_ac_group_order) {
+        permutation.push_back(pass_start + v);
+      }
+    }
+    std::vector<BitWriter> new_group_codes(group_codes.size());
+    for (size_t i = 0; i < permutation.size(); i++) {
+      new_group_codes[permutation[i]] = std::move(group_codes[i]);
+    }
+    group_codes = std::move(new_group_codes);
+  }
+
+  JXL_RETURN_IF_ERROR(
+      WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out));
+  writer->AppendByteAligned(group_codes);
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_frame.h b/third-party/libjxl/libjxl/lib/jxl/enc_frame.h
new file mode 100644
index 0000000000..b1dc637eb0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_frame.h
@@ -0,0 +1,78 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FRAME_H_
+#define LIB_JXL_ENC_FRAME_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Information needed for encoding a frame that is not contained elsewhere and
+// does not belong to `cparams`.
+// TODO(lode): if possible, it might be better to replace FrameInfo and several
+// fields from ImageBundle (such as frame name and duration) by direct usage of
+// jxl::FrameHeader itself.
+struct FrameInfo {
+  // TODO(veluca): consider adding more parameters, such as custom patches.
+  bool save_before_color_transform = false;
+  // Whether or not the input image bundle is already in the codestream
+  // colorspace (as deduced by cparams).
+  // TODO(veluca): this is a hack - ImageBundle doesn't have a simple way to say
+  // "this is already in XYB".
+  bool ib_needs_color_transform = true;
+  FrameType frame_type = FrameType::kRegularFrame;
+  size_t dc_level = 0;
+  // Only used for kRegularFrame.
+  bool is_last = true;
+  bool is_preview = false;
+  // Information for storing this frame for future use (only for non-DC frames).
+  size_t save_as_reference = 0;
+  // The source frame for blending of a next frame, matching the
+  // save_as_reference value of a previous frame. Animated frames can use
+  // save_as_reference values 1, 2 and 3, while composite still frames can use
+  // save_as_reference values 0, 1, 2 and 3. The current C++ encoder
+  // implementation is assuming and using 1 for all frames of animations, so
+  // using that as the default value here.
+  // Corresponds to BlendingInfo::source from the FrameHeader.
+  size_t source = 1;
+  // Corresponds to BlendingInfo::clamp from the FrameHeader.
+  size_t clamp = 1;
+  // Corresponds to BlendingInfo::alpha_channel from the FrameHeader, or set to
+  // -1 to automatically choose it as the index of the first extra channel of
+  // type alpha.
+  int alpha_channel = -1;
+
+  // If non-empty, uses this blending info for the extra channels, otherwise
+  // automatically chooses it. The encoder API will fill this vector with the
+  // extra channel info and allows more options. The non-API cjxl leaves it
+  // empty and relies on the default behavior.
+  std::vector<BlendingInfo> extra_channel_blending_info;
+};
+
+// Checks and adjusts CompressParams when they are all initialized.
+Status ParamsPostInit(CompressParams* p);
+
+// Encodes a single frame (including its header) into a byte stream.  Groups may
+// be processed in parallel by `pool`. metadata is the ImageMetadata encoded in
+// the codestream, and must be used for the FrameHeaders, do not use
+// ib.metadata.
+Status EncodeFrame(const CompressParams& cparams_orig,
+                   const FrameInfo& frame_info, const CodecMetadata* metadata,
+                   const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+                   const JxlCmsInterface& cms, ThreadPool* pool,
+                   BitWriter* writer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_FRAME_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc
new file mode 100644
index 0000000000..32914a0dff
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc
@@ -0,0 +1,62 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_gaborish.h"
+
+#include <stddef.h>
+
+#include <hwy/base.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
+  WeightsSymmetric5 weights[3];
+  // Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
+  // are insufficient. The numbers here have been obtained by butteraugli
+  // based optimizing the whole system and the errors produced are likely
+  // more favorable for good rate-distortion compromises rather than
+  // just using mathematical optimization to find the inverse.
+  static const float kGaborish[5] = {
+      -0.090881924078487886f, -0.043663953593472138f, 0.01392497846646211f,
+      0.0036189602184591141f, 0.0030557936884763499f};
+  for (int i = 0; i < 3; ++i) {
+    double sum = 1.0 + mul[i] * 4 *
+                           (kGaborish[0] + kGaborish[1] + kGaborish[2] +
+                            kGaborish[4] + 2 * kGaborish[3]);
+    if (sum < 1e-5) {
+      sum = 1e-5;
+    }
+    const float normalize = static_cast<float>(1.0 / sum);
+    const float normalize_mul = mul[i] * normalize;
+    weights[i] = WeightsSymmetric5{{HWY_REP4(normalize)},
+                                   {HWY_REP4(normalize_mul * kGaborish[0])},
+                                   {HWY_REP4(normalize_mul * kGaborish[2])},
+                                   {HWY_REP4(normalize_mul * kGaborish[1])},
+                                   {HWY_REP4(normalize_mul * kGaborish[4])},
+                                   {HWY_REP4(normalize_mul * kGaborish[3])}};
+  }
+  // Reduce memory footprint by only allocating a single plane and swapping it
+  // into the output Image3F. Better still would be tiling.
+  // Note that we cannot *allocate* a plane, as doing so might cause Image3F to
+  // have planes of different stride. Instead, we copy one plane in a temporary
+  // image and reuse the existing planes of the in/out image.
+  ImageF temp(in_out->Plane(2).xsize(), in_out->Plane(2).ysize());
+  CopyImageTo(in_out->Plane(2), &temp);
+  Symmetric5(in_out->Plane(0), Rect(*in_out), weights[0], pool,
+             &in_out->Plane(2));
+  Symmetric5(in_out->Plane(1), Rect(*in_out), weights[1], pool,
+             &in_out->Plane(0));
+  Symmetric5(temp, Rect(*in_out), weights[2], pool, &in_out->Plane(1));
+  // Now planes are 1, 2, 0.
+  in_out->Plane(0).Swap(in_out->Plane(1));
+  // 2 1 0
+  in_out->Plane(0).Swap(in_out->Plane(2));
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h
new file mode 100644
index 0000000000..102064f9a2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GABORISH_H_
+#define LIB_JXL_GABORISH_H_
+
+// Linear smoothing (3x3 convolution) for deblocking without too much blur.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Used in encoder to reduce the impact of the decoder's smoothing.
+// This is not exact. Works in-place to reduce memory use.
+// The input is typically in XYB space.
+void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_GABORISH_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc
new file mode 100644
index 0000000000..57a18e3338
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc
@@ -0,0 +1,77 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_gaborish.h"
+
+#include <hwy/base.h>
+
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+// weight1,2 need not be normalized.
+WeightsSymmetric3 GaborishKernel(float weight1, float weight2) {
+  constexpr float weight0 = 1.0f;
+
+  // Normalize
+  const float mul = 1.0f / (weight0 + 4 * (weight1 + weight2));
+  const float w0 = weight0 * mul;
+  const float w1 = weight1 * mul;
+  const float w2 = weight2 * mul;
+
+  const WeightsSymmetric3 w = {{HWY_REP4(w0)}, {HWY_REP4(w1)}, {HWY_REP4(w2)}};
+  return w;
+}
+
+void ConvolveGaborish(const ImageF& in, float weight1, float weight2,
+                      ThreadPool* pool, ImageF* JXL_RESTRICT out) {
+  JXL_CHECK(SameSize(in, *out));
+  Symmetric3(in, Rect(in), GaborishKernel(weight1, weight2), pool, out);
+}
+
+void TestRoundTrip(const Image3F& in, float max_l1) {
+  Image3F fwd(in.xsize(), in.ysize());
+  ThreadPool* null_pool = nullptr;
+  ConvolveGaborish(in.Plane(0), 0, 0, null_pool, &fwd.Plane(0));
+  ConvolveGaborish(in.Plane(1), 0, 0, null_pool, &fwd.Plane(1));
+  ConvolveGaborish(in.Plane(2), 0, 0, null_pool, &fwd.Plane(2));
+  float w = 0.92718927264540152f;
+  float weights[3] = {
+      w,
+      w,
+      w,
+  };
+  GaborishInverse(&fwd, weights, null_pool);
+  JXL_ASSERT_OK(VerifyRelativeError(in, fwd, max_l1, 1E-4f, _));
+}
+
+TEST(GaborishTest, TestZero) {
+  Image3F in(20, 20);
+  ZeroFillImage(&in);
+  TestRoundTrip(in, 0.0f);
+}
+
+// Disabled: large difference.
+#if 0
+TEST(GaborishTest, TestDirac) {
+  Image3F in(20, 20);
+  ZeroFillImage(&in);
+  in.PlaneRow(1, 10)[10] = 10.0f;
+  TestRoundTrip(in, 0.26f);
+}
+#endif
+
+TEST(GaborishTest, TestFlat) {
+  Image3F in(20, 20);
+  FillImage(1.0f, &in);
+  TestRoundTrip(in, 1E-5f);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h b/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h
new file mode 100644
index 0000000000..0db7012bbe
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GAMMA_CORRECT_H_
+#define LIB_JXL_ENC_GAMMA_CORRECT_H_
+
+// Deprecated: sRGB transfer function. Use color_management.h instead.
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+namespace jxl {
+
+// Values are in [0, 1].
+static JXL_INLINE double Srgb8ToLinearDirect(double srgb) {
+  if (srgb <= 0.0) return 0.0;
+  if (srgb <= 0.04045) return srgb / 12.92;
+  if (srgb >= 1.0) return 1.0;
+  return std::pow((srgb + 0.055) / 1.055, 2.4);
+}
+
+// Values are in [0, 1].
+static JXL_INLINE double LinearToSrgb8Direct(double linear) {
+  if (linear <= 0.0) return 0.0;
+  if (linear >= 1.0) return 1.0;
+  if (linear <= 0.0031308) return linear * 12.92;
+  return std::pow(linear, 1.0 / 2.4) * 1.055 - 0.055;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_GAMMA_CORRECT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_group.cc b/third-party/libjxl/libjxl/lib/jxl/enc_group.cc
new file mode 100644
index 0000000000..9ff5f5526f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_group.cc
@@ -0,0 +1,515 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_group.h"
+
+#include <hwy/aligned_allocator.h>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Round;
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
+                     size_t c, float qm_multiplier, size_t quant_kind,
+                     size_t xsize, size_t ysize, float* thresholds,
+                     const float* JXL_RESTRICT block_in, int32_t* quant,
+                     int32_t* JXL_RESTRICT block_out) {
+  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+  float qac = quantizer.Scale() * (*quant);
+  // Not SIMD-ified for now.
+  if (c != 1 && xsize * ysize >= 4) {
+    for (int i = 0; i < 4; ++i) {
+      thresholds[i] -= 0.00744f * xsize * ysize;
+      if (thresholds[i] < 0.5) {
+        thresholds[i] = 0.5;
+      }
+    }
+  }
+  HWY_CAPPED(float, kBlockDim) df;
+  HWY_CAPPED(int32_t, kBlockDim) di;
+  HWY_CAPPED(uint32_t, kBlockDim) du;
+  const auto quantv = Set(df, qac * qm_multiplier);
+  for (size_t y = 0; y < ysize * kBlockDim; y++) {
+    size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
+    const size_t off = y * kBlockDim * xsize;
+    for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
+      auto thr = Zero(df);
+      if (xsize == 1) {
+        HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
+        const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
+        thr = IfThenElse(mask, Set(df, thresholds[yfix + 1]),
+                         Set(df, thresholds[yfix]));
+      } else {
+        // Same for all lanes in the vector.
+        thr = Set(
+            df,
+            thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
+      }
+      const auto q = Mul(Load(df, qm + off + x), quantv);
+      const auto in = Load(df, block_in + off + x);
+      const auto val = Mul(q, in);
+      const auto nzero_mask = Ge(Abs(val), thr);
+      const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
+      Store(v, di, block_out + off + x);
+    }
+  }
+}
+
+void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c,
+                        float qm_multiplier, size_t quant_kind, size_t xsize,
+                        size_t ysize, float* thresholds,
+                        const float* JXL_RESTRICT block_in, int32_t* quant) {
+  // No quantization adjusting for these small blocks.
+  // Quantization adjusting attempts to fix some known issues
+  // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness
+  // when there are not many non-zeros.
+  constexpr size_t kPartialBlockKinds =
+      (1 << AcStrategy::Type::IDENTITY) | (1 << AcStrategy::Type::DCT2X2) |
+      (1 << AcStrategy::Type::DCT4X4) | (1 << AcStrategy::Type::DCT4X8) |
+      (1 << AcStrategy::Type::DCT8X4) | (1 << AcStrategy::Type::AFV0) |
+      (1 << AcStrategy::Type::AFV1) | (1 << AcStrategy::Type::AFV2) |
+      (1 << AcStrategy::Type::AFV3);
+  if ((1 << quant_kind) & kPartialBlockKinds) {
+    return;
+  }
+
+  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+  float qac = quantizer.Scale() * (*quant);
+  if (xsize > 1 || ysize > 1) {
+    for (int i = 0; i < 4; ++i) {
+      thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
+      if (thresholds[i] < 0.54) {
+        thresholds[i] = 0.54;
+      }
+    }
+  }
+  float sum_of_highest_freq_row_and_column = 0;
+  float sum_of_error = 0;
+  float sum_of_vals = 0;
+  float hfNonZeros[4] = {};
+  float hfMaxError[4] = {};
+
+  for (size_t y = 0; y < ysize * kBlockDim; y++) {
+    for (size_t x = 0; x < xsize * kBlockDim; x++) {
+      const size_t pos = y * kBlockDim * xsize + x;
+      if (x < xsize && y < ysize) {
+        continue;
+      }
+      const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
+                           static_cast<size_t>(x >= xsize * kBlockDim / 2));
+      const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
+      const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val);
+      const float error = std::abs(val - v);
+      sum_of_error += error;
+      sum_of_vals += std::abs(v);
+      if (c == 1 && v == 0) {
+        if (hfMaxError[hfix] < error) {
+          hfMaxError[hfix] = error;
+        }
+      }
+      if (v != 0.0f) {
+        hfNonZeros[hfix] += std::abs(v);
+        bool in_corner = y >= 7 * ysize && x >= 7 * xsize;
+        bool on_border =
+            y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1;
+        bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize;
+        if (in_corner || (on_border && in_larger_corner)) {
+          sum_of_highest_freq_row_and_column += std::abs(val);
+        }
+      }
+    }
+  }
+  if (c == 1 && sum_of_vals < std::max(xsize, ysize)) {
+    static const double kLimit[4] = {
+        0.46,
+        0.46,
+        0.46,
+        0.46,
+    };
+    static const double kMul[4] = {
+        0.9999,
+        0.9999,
+        0.9999,
+        0.9999,
+    };
+    const int32_t orig_quant = *quant;
+    int32_t new_quant = *quant;
+    for (int i = 1; i < 4; ++i) {
+      if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) {
+        new_quant = orig_quant + 1;
+        break;
+      }
+    }
+    *quant = new_quant;
+    if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) {
+      thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant;
+    } else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) ||
+               (hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) {
+      thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) *
+                      new_quant / orig_quant;
+      thresholds[2] = thresholds[1];
+    } else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) {
+      thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant;
+    }
+  }
+  // Heuristic for improving accuracy of high-frequency patterns
+  // occurring in an environment with no medium-frequency masking
+  // patterns.
+  {
+    float all =
+        hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1;
+    float mul[3] = {70, 30, 60};
+    if (mul[c] * sum_of_highest_freq_row_and_column >= all) {
+      *quant += mul[c] * sum_of_highest_freq_row_and_column / all;
+      if (*quant >= Quantizer::kQuantMax) {
+        *quant = Quantizer::kQuantMax - 1;
+      }
+    }
+  }
+  if (quant_kind == AcStrategy::Type::DCT) {
+    // If this 8x8 block is too flat, increase the adaptive quantization level
+    // a bit to reduce visible block boundaries and requantize the block.
+    if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) {
+      *quant += 1;
+      if (*quant >= Quantizer::kQuantMax) {
+        *quant = Quantizer::kQuantMax - 1;
+      }
+    }
+  }
+  {
+    static const double kMul1[3][3] = {
+        {
+            0.13289977307244785,
+            0.13991489841351781,
+            0.083900681804010419,
+        },
+        {
+            0.69938583107168562,
+            0.19612117586770869,
+            0.15307492924107463,
+        },
+        {
+            0.099160801461836312,
+            0.16684944507307059,
+            0.16608517854968413,
+        },
+    };
+    static const double kMul2[3][3] = {
+        {
+            0.24773711435293466,
+            0.65189637683223112,
+            1.0,
+        },
+        {
+            0.46465181913392556,
+            0.3142440606068525,
+            0.30128806880068809,
+        },
+        {
+            0.45203398366713637,
+            0.15063329382779103,
+            0.067846407329923752,
+        },
+    };
+    const float kQuantNormalizer = 2.8261379721245263;
+    sum_of_error *= kQuantNormalizer;
+    sum_of_vals *= kQuantNormalizer;
+    if (quant_kind >= AcStrategy::Type::DCT16X16) {
+      int ix = 2;
+      if (quant_kind == AcStrategy::Type::DCT32X16 ||
+          quant_kind == AcStrategy::Type::DCT16X32) {
+        ix = 1;
+      } else if (quant_kind == AcStrategy::Type::DCT16X16) {
+        ix = 0;
+      }
+      int step =
+          sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
+                          kMul2[ix][c] * sum_of_vals);
+      if (step >= 2) {
+        step = 2;
+      }
+      if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
+                             kMul2[ix][c] * sum_of_vals) {
+        *quant += step;
+        if (*quant >= Quantizer::kQuantMax) {
+          *quant = Quantizer::kQuantMax - 1;
+        }
+      }
+    }
+  }
+  {
+    // Reduce quant in highly active areas.
+    int32_t div = (xsize + ysize) / 2;
+    int32_t activity = (hfNonZeros[0] + div / 2) / div;
+    int32_t orig_qp_limit = std::max(4, *quant / 2);
+    for (int i = 1; i < 4; ++i) {
+      activity = std::min<int32_t>(activity, (hfNonZeros[i] + div / 2) / div);
+    }
+    if (activity >= 15) {
+      activity = 15;
+    }
+    int32_t qp = *quant - activity;
+    if (c == 1) {
+      for (int i = 1; i < 4; ++i) {
+        thresholds[i] += 0.01 * activity;
+      }
+    }
+    if (qp < orig_qp_limit) {
+      qp = orig_qp_limit;
+    }
+    *quant = qp;
+  }
+}
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
+                               const Quantizer& quantizer,
+                               const bool error_diffusion, size_t quant_kind,
+                               size_t xsize, size_t ysize,
+                               const float* JXL_RESTRICT biases, int32_t* quant,
+                               float* JXL_RESTRICT inout,
+                               int32_t* JXL_RESTRICT quantized) {
+  float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f};
+  {
+    int32_t max_quant = 0;
+    int quant_orig = *quant;
+    float val[3] = {enc_state->x_qm_multiplier, 1.0f,
+                    enc_state->b_qm_multiplier};
+    int clut[3] = {1, 0, 2};
+    for (int ii = 0; ii < 3; ++ii) {
+      float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f};
+      int c = clut[ii];
+      *quant = quant_orig;
+      AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize,
+                         &thres[0], inout + c * size, quant);
+      // Dead zone adjustment
+      if (c == 1) {
+        for (int k = 0; k < 4; ++k) {
+          thres_y[k] = thres[k];
+        }
+      }
+      max_quant = std::max(*quant, max_quant);
+    }
+    *quant = max_quant;
+  }
+
+  QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize,
+                  &thres_y[0], inout + size, quant, quantized + size);
+
+  const float* JXL_RESTRICT dequant_matrix =
+      quantizer.DequantMatrix(quant_kind, 1);
+
+  HWY_CAPPED(float, kDCTBlockSize) df;
+  HWY_CAPPED(int32_t, kDCTBlockSize) di;
+  const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant));
+  for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
+    const auto quant = Load(di, quantized + size + k);
+    const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
+    const auto dequantm = Load(df, dequant_matrix + k);
+    Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k);
+  }
+}
+
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc) {
+  const Rect block_group_rect = enc_state->shared.BlockGroupRect(group_idx);
+  const Rect group_rect = enc_state->shared.GroupRect(group_idx);
+  const Rect cmap_rect(
+      block_group_rect.x0() / kColorTileDimInBlocks,
+      block_group_rect.y0() / kColorTileDimInBlocks,
+      DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
+      DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
+
+  const size_t xsize_blocks = block_group_rect.xsize();
+  const size_t ysize_blocks = block_group_rect.ysize();
+
+  const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
+  const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
+
+  ImageI& full_quant_field = enc_state->shared.raw_quant_field;
+  const CompressParams& cparams = enc_state->cparams;
+
+  // TODO(veluca): consider strategies to reduce this memory.
+  auto mem = hwy::AllocateAligned<int32_t>(3 * AcStrategy::kMaxCoeffArea);
+  auto fmem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+  float* JXL_RESTRICT scratch_space =
+      fmem.get() + 3 * AcStrategy::kMaxCoeffArea;
+  {
+    // Only use error diffusion in Squirrel mode or slower.
+    const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
+    constexpr HWY_CAPPED(float, kDCTBlockSize) d;
+
+    int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {};
+    size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
+    JXL_DASSERT(num_passes > 0);
+    for (size_t i = 0; i < num_passes; i++) {
+      // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
+      JXL_ASSERT(enc_state->coeffs[i]->Type() == ACType::k32);
+      for (size_t c = 0; c < 3; c++) {
+        coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
+      }
+    }
+
+    HWY_ALIGN float* coeffs_in = fmem.get();
+    HWY_ALIGN int32_t* quantized = mem.get();
+
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      int32_t* JXL_RESTRICT row_quant_ac =
+          block_group_rect.Row(&full_quant_field, by);
+      size_t ty = by / kColorTileDimInBlocks;
+      const int8_t* JXL_RESTRICT row_cmap[3] = {
+          cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
+          nullptr,
+          cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
+      };
+      const float* JXL_RESTRICT opsin_rows[3] = {
+          group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
+          group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
+          group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
+      };
+      float* JXL_RESTRICT dc_rows[3] = {
+          block_group_rect.PlaneRow(dc, 0, by),
+          block_group_rect.PlaneRow(dc, 1, by),
+          block_group_rect.PlaneRow(dc, 2, by),
+      };
+      AcStrategyRow ac_strategy_row =
+          enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
+      for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+           tx++) {
+        const auto x_factor =
+            Set(d, enc_state->shared.cmap.YtoXRatio(row_cmap[0][tx]));
+        const auto b_factor =
+            Set(d, enc_state->shared.cmap.YtoBRatio(row_cmap[2][tx]));
+        for (size_t bx = tx * kColorTileDimInBlocks;
+             bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
+          const AcStrategy acs = ac_strategy_row[bx];
+          if (!acs.IsFirstBlock()) continue;
+
+          size_t xblocks = acs.covered_blocks_x();
+          size_t yblocks = acs.covered_blocks_y();
+
+          CoefficientLayout(&yblocks, &xblocks);
+
+          size_t size = kDCTBlockSize * xblocks * yblocks;
+
+          // DCT Y channel, roundtrip-quantize it and set DC.
+          int32_t quant_ac = row_quant_ac[bx];
+          for (size_t c : {0, 1, 2}) {
+            TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
+                                opsin_stride, coeffs_in + c * size,
+                                scratch_space);
+          }
+          DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
+                                  dc_rows[1] + bx, dc_stride);
+
+          QuantizeRoundtripYBlockAC(
+              enc_state, size, enc_state->shared.quantizer, error_diffusion,
+              acs.RawStrategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac,
+              coeffs_in, quantized);
+
+          // Unapply color correlation
+          for (size_t k = 0; k < size; k += Lanes(d)) {
+            const auto in_x = Load(d, coeffs_in + k);
+            const auto in_y = Load(d, coeffs_in + size + k);
+            const auto in_b = Load(d, coeffs_in + 2 * size + k);
+            const auto out_x = NegMulAdd(x_factor, in_y, in_x);
+            const auto out_b = NegMulAdd(b_factor, in_y, in_b);
+            Store(out_x, d, coeffs_in + k);
+            Store(out_b, d, coeffs_in + 2 * size + k);
+          }
+
+          // Quantize X and B channels and set DC.
+          for (size_t c : {0, 2}) {
+            float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f};
+            QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
+                            c == 0 ? enc_state->x_qm_multiplier
+                                   : enc_state->b_qm_multiplier,
+                            acs.RawStrategy(), xblocks, yblocks, &thres[0],
+                            coeffs_in + c * size, &quant_ac,
+                            quantized + c * size);
+            DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
+                                    dc_rows[c] + bx, dc_stride);
+          }
+          row_quant_ac[bx] = quant_ac;
+          for (size_t c = 0; c < 3; c++) {
+            enc_state->progressive_splitter.SplitACCoefficients(
+                quantized + c * size, acs, bx, by, coeffs[c]);
+            for (size_t p = 0; p < num_passes; p++) {
+              coeffs[c][p] += size;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeCoefficients);
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc) {
+  return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
+                                                   dc);
+}
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer, AuxOut* aux_out) {
+  // Select which histogram to use among those of the current pass.
+  const size_t num_histograms = enc_state.shared.num_histograms;
+  // num_histograms is 0 only for lossless.
+  JXL_ASSERT(num_histograms == 0 || histogram_idx < num_histograms);
+  size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+  if (histo_selector_bits != 0) {
+    BitWriter::Allotment allotment(writer, histo_selector_bits);
+    writer->Write(histo_selector_bits, histogram_idx);
+    allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+  }
+  WriteTokens(enc_state.passes[pass_idx].ac_tokens[group_idx],
+              enc_state.passes[pass_idx].codes,
+              enc_state.passes[pass_idx].context_map, writer, kLayerACTokens,
+              aux_out);
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_group.h b/third-party/libjxl/libjxl/lib/jxl/enc_group.h
new file mode 100644
index 0000000000..0caf408a03
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_group.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GROUP_H_
+#define LIB_JXL_ENC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct PassesEncoderState;
+
+// Fills DC
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc);
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_GROUP_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc
new file mode 100644
index 0000000000..b5a8cdfa73
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc
@@ -0,0 +1,947 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_heuristics.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_xyb.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
+  if (enc_state.cparams.decoding_speed_tier >= 1) {
+    static constexpr uint8_t kSimpleCtxMap[] = {
+        // Cluster all blocks together
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  //
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  //
+    };
+    static_assert(
+        3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap,
+        "Update simple context map");
+
+    auto bcm = enc_state.shared.block_ctx_map;
+    bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
+    bcm.num_ctxs = 2;
+    bcm.num_dc_ctxs = 1;
+    return;
+  }
+  if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
+    return;
+  }
+  const ImageI& rqf = enc_state.shared.raw_quant_field;
+  // No need to change context modeling for small images.
+  size_t tot = rqf.xsize() * rqf.ysize();
+  size_t size_for_ctx_model =
+      (1 << 10) * enc_state.cparams.butteraugli_distance;
+  if (tot < size_for_ctx_model) return;
+
+  struct OccCounters {
+    // count the occurrences of each qf value and each strategy type.
+    OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) {
+      for (size_t y = 0; y < rqf.ysize(); y++) {
+        const int32_t* qf_row = rqf.Row(y);
+        AcStrategyRow acs_row = ac_strategy.ConstRow(y);
+        for (size_t x = 0; x < rqf.xsize(); x++) {
+          int ord = kStrategyOrder[acs_row[x].RawStrategy()];
+          int qf = qf_row[x] - 1;
+          qf_counts[qf]++;
+          qf_ord_counts[ord][qf]++;
+          ord_counts[ord]++;
+        }
+      }
+    }
+
+    size_t qf_counts[256] = {};
+    size_t qf_ord_counts[kNumOrders][256] = {};
+    size_t ord_counts[kNumOrders] = {};
+  };
+  // The OccCounters struct is too big to allocate on the stack.
+  std::unique_ptr<OccCounters> counters(
+      new OccCounters(rqf, enc_state.shared.ac_strategy));
+
+  // Splitting the context model according to the quantization field seems to
+  // mostly benefit only large images.
+  size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
+  size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
+  std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
+  qft.clear();
+  // Divide the quant field in up to num_qf_segments segments.
+  size_t cumsum = 0;
+  size_t next = 1;
+  size_t last_cut = 256;
+  size_t cut = tot * next / num_qf_segments;
+  for (uint32_t j = 0; j < 256; j++) {
+    cumsum += counters->qf_counts[j];
+    if (cumsum > cut) {
+      if (j != 0) {
+        qft.push_back(j);
+      }
+      last_cut = j;
+      while (cumsum > cut) {
+        next++;
+        cut = tot * next / num_qf_segments;
+      }
+    } else if (next > qft.size() + 1) {
+      if (j - 1 == last_cut && j != 0) {
+        qft.push_back(j);
+      }
+    }
+  }
+
+  // Count the occurrences of each segment.
+  std::vector<size_t> counts(kNumOrders * (qft.size() + 1));
+  size_t qft_pos = 0;
+  for (size_t j = 0; j < 256; j++) {
+    if (qft_pos < qft.size() && j == qft[qft_pos]) {
+      qft_pos++;
+    }
+    for (size_t i = 0; i < kNumOrders; i++) {
+      counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j];
+    }
+  }
+
+  // Repeatedly merge the lowest-count pair.
+  std::vector<uint8_t> remap((qft.size() + 1) * kNumOrders);
+  std::iota(remap.begin(), remap.end(), 0);
+  std::vector<uint8_t> clusters(remap);
+  size_t nb_clusters = Clamp1((int)(tot / size_for_ctx_model / 2), 2, 9);
+  size_t nb_clusters_chroma = Clamp1((int)(tot / size_for_ctx_model / 3), 1, 5);
+  // This is O(n^2 log n), but n is small.
+  while (clusters.size() > nb_clusters) {
+    std::sort(clusters.begin(), clusters.end(),
+              [&](int a, int b) { return counts[a] > counts[b]; });
+    counts[clusters[clusters.size() - 2]] += counts[clusters.back()];
+    counts[clusters.back()] = 0;
+    remap[clusters.back()] = clusters[clusters.size() - 2];
+    clusters.pop_back();
+  }
+  for (size_t i = 0; i < remap.size(); i++) {
+    while (remap[remap[i]] != remap[i]) {
+      remap[i] = remap[remap[i]];
+    }
+  }
+  // Relabel starting from 0.
+  std::vector<uint8_t> remap_remap(remap.size(), remap.size());
+  size_t num = 0;
+  for (size_t i = 0; i < remap.size(); i++) {
+    if (remap_remap[remap[i]] == remap.size()) {
+      remap_remap[remap[i]] = num++;
+    }
+    remap[i] = remap_remap[remap[i]];
+  }
+  // Write the block context map.
+  auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
+  ctx_map = remap;
+  ctx_map.resize(remap.size() * 3);
+  // for chroma, only use up to nb_clusters_chroma separate block contexts
+  // (those for the biggest clusters)
+  for (size_t i = remap.size(); i < remap.size() * 3; i++) {
+    ctx_map[i] = num + Clamp1((int)remap[i % remap.size()], 0,
+                              (int)nb_clusters_chroma - 1);
+  }
+  enc_state.shared.block_ctx_map.num_ctxs =
+      *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+}
+
+}  // namespace
+
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices) {
+  // TODO(veluca): quant matrices for no-gaborish.
+  // TODO(veluca): heuristics for in-bitstream quant tables.
+  *dequant_matrices = DequantMatrices();
+  if (cparams.max_error_mode) {
+    // Set numerators of all quantization matrices to constant values.
+    float weights[3][1] = {{1.0f / cparams.max_error[0]},
+                           {1.0f / cparams.max_error[1]},
+                           {1.0f / cparams.max_error[2]}};
+    DctQuantWeightParams dct_params(weights);
+    std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                         QuantEncoding::DCT(dct_params));
+    DequantMatricesSetCustom(dequant_matrices, encodings,
+                             modular_frame_encoder);
+    float dc_weights[3] = {1.0f / cparams.max_error[0],
+                           1.0f / cparams.max_error[1],
+                           1.0f / cparams.max_error[2]};
+    DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+  }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(
+    const CompressParams& cparams, const ImageBundle& ib) {
+  return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+         cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+         cparams.color_transform == ColorTransform::kXYB &&
+         !cparams.modular_mode && !ib.HasAlpha();
+}
+
+namespace {
+
+void StoreMin2(const float v, float& min1, float& min2) {
+  if (v < min2) {
+    if (v < min1) {
+      min2 = min1;
+      min1 = v;
+    } else {
+      min2 = v;
+    }
+  }
+}
+
+void CreateMask(const ImageF& image, ImageF& mask) {
+  for (size_t y = 0; y < image.ysize(); y++) {
+    auto* row_n = y > 0 ? image.Row(y - 1) : image.Row(y);
+    auto* row_in = image.Row(y);
+    auto* row_s = y + 1 < image.ysize() ? image.Row(y + 1) : image.Row(y);
+    auto* row_out = mask.Row(y);
+    for (size_t x = 0; x < image.xsize(); x++) {
+      // Center, west, east, north, south values and their absolute difference
+      float c = row_in[x];
+      float w = x > 0 ? row_in[x - 1] : row_in[x];
+      float e = x + 1 < image.xsize() ? row_in[x + 1] : row_in[x];
+      float n = row_n[x];
+      float s = row_s[x];
+      float dw = std::abs(c - w);
+      float de = std::abs(c - e);
+      float dn = std::abs(c - n);
+      float ds = std::abs(c - s);
+      float min = std::numeric_limits<float>::max();
+      float min2 = std::numeric_limits<float>::max();
+      StoreMin2(dw, min, min2);
+      StoreMin2(de, min, min2);
+      StoreMin2(dn, min, min2);
+      StoreMin2(ds, min, min2);
+      row_out[x] = min2;
+    }
+  }
+}
+
+// Downsamples the image by a factor of 2 with a kernel that's sharper than
+// the standard 2x2 box kernel used by DownsampleImage.
+// The kernel is optimized against the result of the 2x2 upsampling kernel used
+// by the decoder. Ringing is slightly reduced by clamping the values of the
+// resulting pixels within certain bounds of a small region in the original
+// image.
+void DownsampleImage2_Sharper(const ImageF& input, ImageF* output) {
+  const int64_t kernelx = 12;
+  const int64_t kernely = 12;
+
+  static const float kernel[144] = {
+      -0.000314256996835, -0.000314256996835, -0.000897597057705,
+      -0.000562751488849, -0.000176807273646, 0.001864627368902,
+      0.001864627368902,  -0.000176807273646, -0.000562751488849,
+      -0.000897597057705, -0.000314256996835, -0.000314256996835,
+      -0.000314256996835, -0.001527942804748, -0.000121760530512,
+      0.000191123989093,  0.010193185932466,  0.058637519197110,
+      0.058637519197110,  0.010193185932466,  0.000191123989093,
+      -0.000121760530512, -0.001527942804748, -0.000314256996835,
+      -0.000897597057705, -0.000121760530512, 0.000946363683751,
+      0.007113577630288,  0.000437956841058,  -0.000372823835211,
+      -0.000372823835211, 0.000437956841058,  0.007113577630288,
+      0.000946363683751,  -0.000121760530512, -0.000897597057705,
+      -0.000562751488849, 0.000191123989093,  0.007113577630288,
+      0.044592622228814,  0.000222278879007,  -0.162864473015945,
+      -0.162864473015945, 0.000222278879007,  0.044592622228814,
+      0.007113577630288,  0.000191123989093,  -0.000562751488849,
+      -0.000176807273646, 0.010193185932466,  0.000437956841058,
+      0.000222278879007,  -0.000913092543974, -0.017071696107902,
+      -0.017071696107902, -0.000913092543974, 0.000222278879007,
+      0.000437956841058,  0.010193185932466,  -0.000176807273646,
+      0.001864627368902,  0.058637519197110,  -0.000372823835211,
+      -0.162864473015945, -0.017071696107902, 0.414660099370354,
+      0.414660099370354,  -0.017071696107902, -0.162864473015945,
+      -0.000372823835211, 0.058637519197110,  0.001864627368902,
+      0.001864627368902,  0.058637519197110,  -0.000372823835211,
+      -0.162864473015945, -0.017071696107902, 0.414660099370354,
+      0.414660099370354,  -0.017071696107902, -0.162864473015945,
+      -0.000372823835211, 0.058637519197110,  0.001864627368902,
+      -0.000176807273646, 0.010193185932466,  0.000437956841058,
+      0.000222278879007,  -0.000913092543974, -0.017071696107902,
+      -0.017071696107902, -0.000913092543974, 0.000222278879007,
+      0.000437956841058,  0.010193185932466,  -0.000176807273646,
+      -0.000562751488849, 0.000191123989093,  0.007113577630288,
+      0.044592622228814,  0.000222278879007,  -0.162864473015945,
+      -0.162864473015945, 0.000222278879007,  0.044592622228814,
+      0.007113577630288,  0.000191123989093,  -0.000562751488849,
+      -0.000897597057705, -0.000121760530512, 0.000946363683751,
+      0.007113577630288,  0.000437956841058,  -0.000372823835211,
+      -0.000372823835211, 0.000437956841058,  0.007113577630288,
+      0.000946363683751,  -0.000121760530512, -0.000897597057705,
+      -0.000314256996835, -0.001527942804748, -0.000121760530512,
+      0.000191123989093,  0.010193185932466,  0.058637519197110,
+      0.058637519197110,  0.010193185932466,  0.000191123989093,
+      -0.000121760530512, -0.001527942804748, -0.000314256996835,
+      -0.000314256996835, -0.000314256996835, -0.000897597057705,
+      -0.000562751488849, -0.000176807273646, 0.001864627368902,
+      0.001864627368902,  -0.000176807273646, -0.000562751488849,
+      -0.000897597057705, -0.000314256996835, -0.000314256996835};
+
+  int64_t xsize = input.xsize();
+  int64_t ysize = input.ysize();
+
+  ImageF box_downsample(xsize, ysize);
+  CopyImageTo(input, &box_downsample);
+  DownsampleImage(&box_downsample, 2);
+
+  ImageF mask(box_downsample.xsize(), box_downsample.ysize());
+  CreateMask(box_downsample, mask);
+
+  for (size_t y = 0; y < output->ysize(); y++) {
+    float* row_out = output->Row(y);
+    const float* row_in[kernely];
+    const float* row_mask = mask.Row(y);
+    // get the rows in the support
+    for (size_t ky = 0; ky < kernely; ky++) {
+      int64_t iy = y * 2 + ky - (kernely - 1) / 2;
+      if (iy < 0) iy = 0;
+      if (iy >= ysize) iy = ysize - 1;
+      row_in[ky] = input.Row(iy);
+    }
+
+    for (size_t x = 0; x < output->xsize(); x++) {
+      // get min and max values of the original image in the support
+      float min = std::numeric_limits<float>::max();
+      float max = std::numeric_limits<float>::min();
+      // kernelx - R and kernely - R are the radius of a rectangular region in
+      // which the values of a pixel are bounded to reduce ringing.
+      static constexpr int64_t R = 5;
+      for (int64_t ky = R; ky + R < kernely; ky++) {
+        for (int64_t kx = R; kx + R < kernelx; kx++) {
+          int64_t ix = x * 2 + kx - (kernelx - 1) / 2;
+          if (ix < 0) ix = 0;
+          if (ix >= xsize) ix = xsize - 1;
+          min = std::min<float>(min, row_in[ky][ix]);
+          max = std::max<float>(max, row_in[ky][ix]);
+        }
+      }
+
+      float sum = 0;
+      for (int64_t ky = 0; ky < kernely; ky++) {
+        for (int64_t kx = 0; kx < kernelx; kx++) {
+          int64_t ix = x * 2 + kx - (kernelx - 1) / 2;
+          if (ix < 0) ix = 0;
+          if (ix >= xsize) ix = xsize - 1;
+          sum += row_in[ky][ix] * kernel[ky * kernelx + kx];
+        }
+      }
+
+      row_out[x] = sum;
+
+      // Clamp the pixel within the value  of a small area to prevent ringning.
+      // The mask determines how much to clamp, clamp more to reduce more
+      // ringing in smooth areas, clamp less in noisy areas to get more
+      // sharpness. Higher mask_multiplier gives less clamping, so less
+      // ringing reduction.
+      const constexpr float mask_multiplier = 1;
+      float a = row_mask[x] * mask_multiplier;
+      float clip_min = min - a;
+      float clip_max = max + a;
+      if (row_out[x] < clip_min) {
+        row_out[x] = clip_min;
+      } else if (row_out[x] > clip_max) {
+        row_out[x] = clip_max;
+      }
+    }
+  }
+}
+
+void DownsampleImage2_Sharper(Image3F* opsin) {
+  // Allocate extra space to avoid a reallocation when padding.
+  Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim,
+                      DivCeil(opsin->ysize(), 2) + kBlockDim);
+  downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+                       downsampled.ysize() - kBlockDim);
+
+  for (size_t c = 0; c < 3; c++) {
+    DownsampleImage2_Sharper(opsin->Plane(c), &downsampled.Plane(c));
+  }
+  *opsin = std::move(downsampled);
+}
+
+// The default upsampling kernels used by Upsampler in the decoder.
+static const constexpr int64_t kSize = 5;
+
+static const float kernel00[25] = {
+    -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+    -0.03452303f, 0.14111091f,  0.28896755f,  0.00278718f,  -0.01610267f,
+    -0.04022174f, 0.28896755f,  0.56661550f,  0.03777607f,  -0.01986694f,
+    -0.02921014f, 0.00278718f,  0.03777607f,  -0.03144731f, -0.01185068f,
+    -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f,
+};
+static const float kernel01[25] = {
+    -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f,
+    -0.02921014f, 0.00278718f,  0.03777607f,  -0.03144731f, -0.01185068f,
+    -0.04022174f, 0.28896755f,  0.56661550f,  0.03777607f,  -0.01986694f,
+    -0.03452303f, 0.14111091f,  0.28896755f,  0.00278718f,  -0.01610267f,
+    -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+};
+static const float kernel10[25] = {
+    -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f,
+    -0.01610267f, 0.00278718f,  0.28896755f,  0.14111091f,  -0.03452303f,
+    -0.01986694f, 0.03777607f,  0.56661550f,  0.28896755f,  -0.04022174f,
+    -0.01185068f, -0.03144731f, 0.03777607f,  0.00278718f,  -0.02921014f,
+    -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f,
+};
+static const float kernel11[25] = {
+    -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f,
+    -0.01185068f, -0.03144731f, 0.03777607f,  0.00278718f,  -0.02921014f,
+    -0.01986694f, 0.03777607f,  0.56661550f,  0.28896755f,  -0.04022174f,
+    -0.01610267f, 0.00278718f,  0.28896755f,  0.14111091f,  -0.03452303f,
+    -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f,
+};
+
+// Does exactly the same as the Upsampler in dec_upsampler for 2x2 pixels, with
+// default CustomTransformData.
+// TODO(lode): use Upsampler instead. However, it requires pre-initialization
+// and padding on the left side of the image which requires refactoring the
+// other code using this.
+static void UpsampleImage(const ImageF& input, ImageF* output) {
+  int64_t xsize = input.xsize();
+  int64_t ysize = input.ysize();
+  int64_t xsize2 = output->xsize();
+  int64_t ysize2 = output->ysize();
+  for (int64_t y = 0; y < ysize2; y++) {
+    for (int64_t x = 0; x < xsize2; x++) {
+      auto kernel = kernel00;
+      if ((x & 1) && (y & 1)) {
+        kernel = kernel11;
+      } else if (x & 1) {
+        kernel = kernel10;
+      } else if (y & 1) {
+        kernel = kernel01;
+      }
+      float sum = 0;
+      int64_t x2 = x / 2;
+      int64_t y2 = y / 2;
+
+      // get min and max values of the original image in the support
+      float min = std::numeric_limits<float>::max();
+      float max = std::numeric_limits<float>::min();
+
+      for (int64_t ky = 0; ky < kSize; ky++) {
+        for (int64_t kx = 0; kx < kSize; kx++) {
+          int64_t xi = x2 - kSize / 2 + kx;
+          int64_t yi = y2 - kSize / 2 + ky;
+          if (xi < 0) xi = 0;
+          if (xi >= xsize) xi = input.xsize() - 1;
+          if (yi < 0) yi = 0;
+          if (yi >= ysize) yi = input.ysize() - 1;
+          min = std::min<float>(min, input.Row(yi)[xi]);
+          max = std::max<float>(max, input.Row(yi)[xi]);
+        }
+      }
+
+      for (int64_t ky = 0; ky < kSize; ky++) {
+        for (int64_t kx = 0; kx < kSize; kx++) {
+          int64_t xi = x2 - kSize / 2 + kx;
+          int64_t yi = y2 - kSize / 2 + ky;
+          if (xi < 0) xi = 0;
+          if (xi >= xsize) xi = input.xsize() - 1;
+          if (yi < 0) yi = 0;
+          if (yi >= ysize) yi = input.ysize() - 1;
+          sum += input.Row(yi)[xi] * kernel[ky * kSize + kx];
+        }
+      }
+      output->Row(y)[x] = sum;
+      if (output->Row(y)[x] < min) output->Row(y)[x] = min;
+      if (output->Row(y)[x] > max) output->Row(y)[x] = max;
+    }
+  }
+}
+
+// Returns the derivative of Upsampler, with respect to input pixel x2, y2, to
+// output pixel x, y (ignoring the clamping).
+float UpsamplerDeriv(int64_t x2, int64_t y2, int64_t x, int64_t y) {
+  auto kernel = kernel00;
+  if ((x & 1) && (y & 1)) {
+    kernel = kernel11;
+  } else if (x & 1) {
+    kernel = kernel10;
+  } else if (y & 1) {
+    kernel = kernel01;
+  }
+
+  int64_t ix = x / 2;
+  int64_t iy = y / 2;
+  int64_t kx = x2 - ix + kSize / 2;
+  int64_t ky = y2 - iy + kSize / 2;
+
+  // This should not happen.
+  if (kx < 0 || kx >= kSize || ky < 0 || ky >= kSize) return 0;
+
+  return kernel[ky * kSize + kx];
+}
+
+// Apply the derivative of the Upsampler to the input, reversing the effect of
+// its coefficients. The output image is 2x2 times smaller than the input.
+void AntiUpsample(const ImageF& input, ImageF* d) {
+  int64_t xsize = input.xsize();
+  int64_t ysize = input.ysize();
+  int64_t xsize2 = d->xsize();
+  int64_t ysize2 = d->ysize();
+  int64_t k0 = kSize - 1;
+  int64_t k1 = kSize;
+  for (int64_t y2 = 0; y2 < ysize2; ++y2) {
+    auto* row = d->Row(y2);
+    for (int64_t x2 = 0; x2 < xsize2; ++x2) {
+      int64_t x0 = x2 * 2 - k0;
+      if (x0 < 0) x0 = 0;
+      int64_t x1 = x2 * 2 + k1 + 1;
+      if (x1 > xsize) x1 = xsize;
+      int64_t y0 = y2 * 2 - k0;
+      if (y0 < 0) y0 = 0;
+      int64_t y1 = y2 * 2 + k1 + 1;
+      if (y1 > ysize) y1 = ysize;
+
+      float sum = 0;
+      for (int64_t y = y0; y < y1; ++y) {
+        const auto* row_in = input.Row(y);
+        for (int64_t x = x0; x < x1; ++x) {
+          double deriv = UpsamplerDeriv(x2, y2, x, y);
+          sum += deriv * row_in[x];
+        }
+      }
+      row[x2] = sum;
+    }
+  }
+}
+
+// Element-wise multiplies two images.
+template <typename T>
+void ElwiseMul(const Plane<T>& image1, const Plane<T>& image2, Plane<T>* out) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+  JXL_CHECK(xsize == out->xsize());
+  JXL_CHECK(ysize == out->ysize());
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row1[x] * row2[x];
+    }
+  }
+}
+
+// Element-wise divides two images.
+template <typename T>
+void ElwiseDiv(const Plane<T>& image1, const Plane<T>& image2, Plane<T>* out) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+  JXL_CHECK(xsize == out->xsize());
+  JXL_CHECK(ysize == out->ysize());
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row1[x] / row2[x];
+    }
+  }
+}
+
+void ReduceRinging(const ImageF& initial, const ImageF& mask, ImageF& down) {
+  int64_t xsize2 = down.xsize();
+  int64_t ysize2 = down.ysize();
+
+  for (size_t y = 0; y < down.ysize(); y++) {
+    const float* row_mask = mask.Row(y);
+    float* row_out = down.Row(y);
+    for (size_t x = 0; x < down.xsize(); x++) {
+      float v = down.Row(y)[x];
+      float min = initial.Row(y)[x];
+      float max = initial.Row(y)[x];
+      for (int64_t yi = -1; yi < 2; yi++) {
+        for (int64_t xi = -1; xi < 2; xi++) {
+          int64_t x2 = (int64_t)x + xi;
+          int64_t y2 = (int64_t)y + yi;
+          if (x2 < 0 || y2 < 0 || x2 >= (int64_t)xsize2 ||
+              y2 >= (int64_t)ysize2)
+            continue;
+          min = std::min<float>(min, initial.Row(y2)[x2]);
+          max = std::max<float>(max, initial.Row(y2)[x2]);
+        }
+      }
+
+      row_out[x] = v;
+
+      // Clamp the pixel within the value  of a small area to prevent ringning.
+      // The mask determines how much to clamp, clamp more to reduce more
+      // ringing in smooth areas, clamp less in noisy areas to get more
+      // sharpness. Higher mask_multiplier gives less clamping, so less
+      // ringing reduction.
+      const constexpr float mask_multiplier = 2;
+      float a = row_mask[x] * mask_multiplier;
+      float clip_min = min - a;
+      float clip_max = max + a;
+      if (row_out[x] < clip_min) row_out[x] = clip_min;
+      if (row_out[x] > clip_max) row_out[x] = clip_max;
+    }
+  }
+}
+
+// TODO(lode): move this to a separate file enc_downsample.cc
+void DownsampleImage2_Iterative(const ImageF& orig, ImageF* output) {
+  int64_t xsize = orig.xsize();
+  int64_t ysize = orig.ysize();
+  int64_t xsize2 = DivCeil(orig.xsize(), 2);
+  int64_t ysize2 = DivCeil(orig.ysize(), 2);
+
+  ImageF box_downsample(xsize, ysize);
+  CopyImageTo(orig, &box_downsample);
+  DownsampleImage(&box_downsample, 2);
+  ImageF mask(box_downsample.xsize(), box_downsample.ysize());
+  CreateMask(box_downsample, mask);
+
+  output->ShrinkTo(xsize2, ysize2);
+
+  // Initial result image using the sharper downsampling.
+  // Allocate extra space to avoid a reallocation when padding.
+  ImageF initial(DivCeil(orig.xsize(), 2) + kBlockDim,
+                 DivCeil(orig.ysize(), 2) + kBlockDim);
+  initial.ShrinkTo(initial.xsize() - kBlockDim, initial.ysize() - kBlockDim);
+  DownsampleImage2_Sharper(orig, &initial);
+
+  ImageF down(initial.xsize(), initial.ysize());
+  CopyImageTo(initial, &down);
+  ImageF up(xsize, ysize);
+  ImageF corr(xsize, ysize);
+  ImageF corr2(xsize2, ysize2);
+
+  // In the weights map, relatively higher values will allow less ringing but
+  // also less sharpness. With all constant values, it optimizes equally
+  // everywhere. Even in this case, the weights2 computed from
+  // this is still used and differs at the borders of the image.
+  // TODO(lode): Make use of the weights field for anti-ringing and clamping,
+  // the values are all set to 1 for now, but it is intended to be used for
+  // reducing ringing based on the mask, and taking clamping into account.
+  ImageF weights(xsize, ysize);
+  for (size_t y = 0; y < weights.ysize(); y++) {
+    auto* row = weights.Row(y);
+    for (size_t x = 0; x < weights.xsize(); x++) {
+      row[x] = 1;
+    }
+  }
+  ImageF weights2(xsize2, ysize2);
+  AntiUpsample(weights, &weights2);
+
+  const size_t num_it = 3;
+  for (size_t it = 0; it < num_it; ++it) {
+    UpsampleImage(down, &up);
+    corr = LinComb<float>(1, orig, -1, up);
+    ElwiseMul(corr, weights, &corr);
+    AntiUpsample(corr, &corr2);
+    ElwiseDiv(corr2, weights2, &corr2);
+
+    down = LinComb<float>(1, down, 1, corr2);
+  }
+
+  ReduceRinging(initial, mask, down);
+
+  // can't just use CopyImage, because the output image was prepared with
+  // padding.
+  for (size_t y = 0; y < down.ysize(); y++) {
+    for (size_t x = 0; x < down.xsize(); x++) {
+      float v = down.Row(y)[x];
+      output->Row(y)[x] = v;
+    }
+  }
+}
+
+void DownsampleImage2_Iterative(Image3F* opsin) {
+  // Allocate extra space to avoid a reallocation when padding.
+  Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim,
+                      DivCeil(opsin->ysize(), 2) + kBlockDim);
+  downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+                       downsampled.ysize() - kBlockDim);
+
+  Image3F rgb(opsin->xsize(), opsin->ysize());
+  OpsinParams opsin_params;  // TODO: use the ones that are actually used
+  opsin_params.Init(kDefaultIntensityTarget);
+  OpsinToLinear(*opsin, Rect(rgb), nullptr, &rgb, opsin_params);
+
+  ImageF mask(opsin->xsize(), opsin->ysize());
+  ButteraugliParams butter_params;
+  ButteraugliComparator butter(rgb, butter_params);
+  butter.Mask(&mask);
+  ImageF mask_fuzzy(opsin->xsize(), opsin->ysize());
+
+  for (size_t c = 0; c < 3; c++) {
+    DownsampleImage2_Iterative(opsin->Plane(c), &downsampled.Plane(c));
+  }
+  *opsin = std::move(downsampled);
+}
+}  // namespace
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(
+    PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+    const ImageBundle* original_pixels, Image3F* opsin,
+    const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) {
+  CompressParams& cparams = enc_state->cparams;
+  PassesSharedState& shared = enc_state->shared;
+
+  // Compute parameters for noise synthesis.
+  if (shared.frame_header.flags & FrameHeader::kNoise) {
+    if (cparams.photon_noise_iso == 0) {
+      // Don't start at zero amplitude since adding noise is expensive -- it
+      // significantly slows down decoding, and this is unlikely to
+      // completely go away even with advanced optimizations. After the
+      // kNoiseModelingRampUpDistanceRange we have reached the full level,
+      // i.e. noise is no longer represented by the compressed image, so we
+      // can add full noise by the noise modeling itself.
+      static const float kNoiseModelingRampUpDistanceRange = 0.6;
+      static const float kNoiseLevelAtStartOfRampUp = 0.25;
+      static const float kNoiseRampupStart = 1.0;
+      // TODO(user) test and properly select quality_coef with smooth
+      // filter
+      float quality_coef = 1.0f;
+      const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) /
+                           kNoiseModelingRampUpDistanceRange;
+      if (rampup < 1.0f) {
+        quality_coef = kNoiseLevelAtStartOfRampUp +
+                       (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+      }
+      if (rampup < 0.0f) {
+        quality_coef = kNoiseRampupStart;
+      }
+      if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params,
+                             quality_coef)) {
+        shared.frame_header.flags &= ~FrameHeader::kNoise;
+      }
+    }
+  }
+  if (enc_state->shared.frame_header.upsampling != 1 &&
+      !cparams.already_downsampled) {
+    // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+    // after noise, if necessary.
+    if (cparams.resampling == 2) {
+      // TODO(lode): use the regular DownsampleImage, or adapt to the custom
+      // coefficients, if there is are custom upscaling coefficients in
+      // CustomTransformData
+      if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+        // TODO(lode): DownsampleImage2_Iterative is currently too slow to
+        // be used for squirrel, make it faster, and / or enable it only for
+        // kitten.
+        DownsampleImage2_Iterative(opsin);
+      } else {
+        DownsampleImage2_Sharper(opsin);
+      }
+    } else {
+      DownsampleImage(opsin, cparams.resampling);
+    }
+    PadImageToBlockMultipleInPlace(opsin);
+  }
+
+  if (cparams.butteraugli_distance < 0) {
+    return JXL_FAILURE("Expected non-negative distance");
+  }
+
+  // Find and subtract splines.
+  if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+    // If we do already have them, they were passed upstream to EncodeFile.
+    if (!shared.image_features.splines.HasAny()) {
+      shared.image_features.splines = FindSplines(*opsin);
+    }
+    JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
+        opsin->xsize(), opsin->ysize(), shared.cmap));
+    shared.image_features.splines.SubtractFrom(opsin);
+  }
+
+  // Find and subtract patches/dots.
+  if (ApplyOverride(cparams.patches,
+                    cparams.speed_tier <= SpeedTier::kSquirrel)) {
+    FindBestPatchDictionary(*opsin, enc_state, cms, pool, aux_out);
+    PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
+  }
+
+  static const float kAcQuant = 0.79f;
+  const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  // We don't know the quant field yet, but for computing the global scale
+  // assuming that it will be the same as for Falcon mode is good enough.
+  quantizer.ComputeGlobalScaleAndQuant(
+      quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+  // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+  // (excluded) one rect at a time. Do that.
+
+  // Dependency graph:
+  //
+  // input: either XYB or input image
+  //
+  // input image -> XYB [optional]
+  // XYB -> initial quant field
+  // XYB -> Gaborished XYB
+  // Gaborished XYB -> CfL1
+  // initial quant field, Gaborished XYB, CfL1 -> ACS
+  // initial quant field, ACS, Gaborished XYB -> EPF control field
+  // initial quant field -> adjusted initial quant field
+  // adjusted initial quant field, ACS -> raw quant field
+  // raw quant field, ACS, Gaborished XYB -> CfL2
+  //
+  // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+  ArControlFieldHeuristics ar_heuristics;
+  AcStrategyHeuristics acs_heuristics;
+  CfLHeuristics cfl_heuristics;
+
+  if (!opsin->xsize()) {
+    JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+    *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()),
+                     RoundUpToBlockDim(original_pixels->ysize()));
+    opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+    ToXYB(*original_pixels, pool, opsin, cms, /*linear=*/nullptr);
+    PadImageToBlockMultipleInPlace(opsin);
+  }
+
+  // Compute an initial estimate of the quantization field.
+  // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+  // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+  // mode.
+  if (cparams.speed_tier > SpeedTier::kHare) {
+    enc_state->initial_quant_field =
+        ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+    enc_state->initial_quant_masking =
+        ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+    float q = kAcQuant / cparams.butteraugli_distance;
+    FillImage(q, &enc_state->initial_quant_field);
+    FillImage(1.0f / (q + 0.001f), &enc_state->initial_quant_masking);
+  } else {
+    // Call this here, as it relies on pre-gaborish values.
+    float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+    if (!shared.frame_header.loop_filter.gab) {
+      butteraugli_distance_for_iqf *= 0.73f;
+    }
+    enc_state->initial_quant_field = InitialQuantField(
+        butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f,
+        &enc_state->initial_quant_masking);
+    quantizer.SetQuantField(quant_dc, enc_state->initial_quant_field, nullptr);
+  }
+
+  // TODO(veluca): do something about animations.
+
+  // Apply inverse-gaborish.
+  if (shared.frame_header.loop_filter.gab) {
+    // Unsure why better to do some more gaborish on X and B than Y.
+    float weight[3] = {
+        1.0036278514398933f,
+        0.99406123118127299f,
+        0.99719338015886894f,
+    };
+    GaborishInverse(opsin, weight, pool);
+  }
+
+  FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder,
+                          &enc_state->shared.matrices);
+
+  cfl_heuristics.Init(*opsin);
+  acs_heuristics.Init(*opsin, enc_state);
+
+  auto process_tile = [&](const uint32_t tid, const size_t thread) {
+    size_t n_enc_tiles =
+        DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+    size_t tx = tid % n_enc_tiles;
+    size_t ty = tid / n_enc_tiles;
+    size_t by0 = ty * kEncTileDimInBlocks;
+    size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks,
+                          enc_state->shared.frame_dim.ysize_blocks);
+    size_t bx0 = tx * kEncTileDimInBlocks;
+    size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks,
+                          enc_state->shared.frame_dim.xsize_blocks);
+    Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+    // For speeds up to Wombat, we only compute the color correlation map
+    // once we know the transform type and the quantization map.
+    if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+      cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+                                 /*ac_strategy=*/nullptr,
+                                 /*raw_quant_field=*/nullptr,
+                                 /*quantizer=*/nullptr, /*fast=*/false, thread,
+                                 &enc_state->shared.cmap);
+    }
+
+    // Choose block sizes.
+    acs_heuristics.ProcessRect(r);
+
+    // Choose amount of post-processing smoothing.
+    // TODO(veluca): should this go *after* AdjustQuantField?
+    ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+
+    // Always set the initial quant field, so we can compute the CfL map with
+    // more accuracy. The initial quant field might change in slower modes, but
+    // adjusting the quant field with butteraugli when all the other encoding
+    // parameters are fixed is likely a more reliable choice anyway.
+    AdjustQuantField(enc_state->shared.ac_strategy, r,
+                     cparams.butteraugli_distance,
+                     &enc_state->initial_quant_field);
+    quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r,
+                                &enc_state->shared.raw_quant_field);
+
+    // Compute a non-default CfL map if we are at Hare speed, or slower.
+    if (cparams.speed_tier <= SpeedTier::kHare) {
+      cfl_heuristics.ComputeTile(
+          r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy,
+          &enc_state->shared.raw_quant_field, &enc_state->shared.quantizer,
+          /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
+          &enc_state->shared.cmap);
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0,
+      DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+          DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+                  kEncTileDimInBlocks),
+      [&](const size_t num_threads) {
+        ar_heuristics.PrepareForThreads(num_threads);
+        cfl_heuristics.PrepareForThreads(num_threads);
+        return true;
+      },
+      process_tile, "Enc Heuristics"));
+
+  acs_heuristics.Finalize(aux_out);
+  if (cparams.speed_tier <= SpeedTier::kHare) {
+    cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
+                             &enc_state->shared.cmap);
+  }
+
+  // Refine quantization levels.
+  FindBestQuantizer(original_pixels, *opsin, enc_state, cms, pool, aux_out);
+
+  // Choose a context model that depends on the amount of quantization for AC.
+  if (cparams.speed_tier < SpeedTier::kFalcon) {
+    FindBestBlockEntropyModel(*enc_state);
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h
new file mode 100644
index 0000000000..3cb9b506a6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h
@@ -0,0 +1,81 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HEURISTICS_H_
+#define LIB_JXL_ENC_HEURISTICS_H_
+
+// Hook for custom encoder heuristics (VarDCT only for now).
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct PassesEncoderState;
+class DequantMatrices;
+class ImageBundle;
+class ModularFrameEncoder;
+
+class EncoderHeuristics {
+ public:
+  virtual ~EncoderHeuristics() = default;
+  // Initializes encoder structures in `enc_state` using the original image data
+  // in `original_pixels`, and the XYB image data in `opsin`. Also modifies the
+  // `opsin` image by applying Gaborish, and doing other modifications if
+  // necessary. `pool` is used for running the computations on multiple threads.
+  // `aux_out` collects statistics and can be used to print debug images.
+  virtual Status LossyFrameHeuristics(
+      PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+      const ImageBundle* original_pixels, Image3F* opsin,
+      const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) = 0;
+
+  // Custom fixed tree for lossless mode. Must set `tree` to a valid tree if
+  // the function returns true.
+  virtual bool CustomFixedTreeLossless(const FrameDimensions& frame_dim,
+                                       Tree* tree) {
+    return false;
+  }
+
+  // If this method returns `true`, the `opsin` parameter to
+  // LossyFrameHeuristics will not be initialized, and should be initialized
+  // during the call. Moreover, `original_pixels` may not be in a linear
+  // colorspace (but will be the same as the `ib` value passed to this
+  // function).
+  virtual bool HandlesColorConversion(const CompressParams& cparams,
+                                      const ImageBundle& ib) {
+    return false;
+  }
+};
+
+class DefaultEncoderHeuristics : public EncoderHeuristics {
+ public:
+  Status LossyFrameHeuristics(PassesEncoderState* enc_state,
+                              ModularFrameEncoder* modular_frame_encoder,
+                              const ImageBundle* original_pixels,
+                              Image3F* opsin, const JxlCmsInterface& cms,
+                              ThreadPool* pool, AuxOut* aux_out) override;
+  bool HandlesColorConversion(const CompressParams& cparams,
+                              const ImageBundle& ib) override;
+};
+
+// Exposed here since it may be used by other EncoderHeuristics implementations
+// outside this project.
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_HEURISTICS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc
new file mode 100644
index 0000000000..3eab2c218a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc
@@ -0,0 +1,214 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_huffman.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/enc_huffman_tree.h"
+
+namespace jxl {
+
+namespace {
+
+constexpr int kCodeLengthCodes = 18;
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
+                                            const uint8_t* code_length_bitdepth,
+                                            BitWriter* writer) {
+  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+      1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  // The bit lengths of the Huffman code over the code length alphabet
+  // are compressed with the following static Huffman code:
+  //   Symbol   Code
+  //   ------   ----
+  //   0          00
+  //   1        1110
+  //   2         110
+  //   3          01
+  //   4          10
+  //   5        1111
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3,
+                                                                 2, 1, 15};
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3,
+                                                                    2, 2, 4};
+
+  // Throw away trailing zeros:
+  size_t codes_to_store = kCodeLengthCodes;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  size_t skip_some = 0;  // skips none.
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  // skips two.
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  // skips three.
+    }
+  }
+  writer->Write(2, skip_some);
+  for (size_t i = skip_some; i < codes_to_store; ++i) {
+    size_t l = code_length_bitdepth[kStorageOrder[i]];
+    writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+                  kHuffmanBitLengthHuffmanCodeSymbols[l]);
+  }
+}
+
+void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
+                               const uint8_t* huffman_tree,
+                               const uint8_t* huffman_tree_extra_bits,
+                               const uint8_t* code_length_bitdepth,
+                               const uint16_t* code_length_bitdepth_symbols,
+                               BitWriter* writer) {
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
+    // Extra bits
+    switch (ix) {
+      case 16:
+        writer->Write(2, huffman_tree_extra_bits[i]);
+        break;
+      case 17:
+        writer->Write(3, huffman_tree_extra_bits[i]);
+        break;
+    }
+  }
+}
+
+void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
+                            size_t num_symbols, size_t max_bits,
+                            BitWriter* writer) {
+  // value of 1 indicates a simple Huffman code
+  writer->Write(2, 1);
+  writer->Write(2, num_symbols - 1);  // NSYM - 1
+
+  // Sort
+  for (size_t i = 0; i < num_symbols; i++) {
+    for (size_t j = i + 1; j < num_symbols; j++) {
+      if (depths[symbols[j]] < depths[symbols[i]]) {
+        std::swap(symbols[j], symbols[i]);
+      }
+    }
+  }
+
+  if (num_symbols == 2) {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+  } else if (num_symbols == 3) {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+    writer->Write(max_bits, symbols[2]);
+  } else {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+    writer->Write(max_bits, symbols[2]);
+    writer->Write(max_bits, symbols[3]);
+    // tree-select
+    writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
+  }
+}
+
+// num = alphabet size
+// depths = symbol depths
+void StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
+  // Write the Huffman tree into the compact representation.
+  std::unique_ptr<uint8_t[]> arena(new uint8_t[2 * num]);
+  uint8_t* huffman_tree = arena.get();
+  uint8_t* huffman_tree_extra_bits = arena.get() + num;
+  size_t huffman_tree_size = 0;
+  WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                   huffman_tree_extra_bits);
+
+  // Calculate the statistics of the Huffman tree in the compact representation.
+  uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  int num_codes = 0;
+  int code = 0;
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  // Calculate another Huffman tree to use for compressing both the
+  // earlier Huffman tree with.
+  uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
+  uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
+  CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5,
+                    &code_length_bitdepth[0]);
+  ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
+                            &code_length_bitdepth_symbols[0]);
+
+  // Now, we have all the data, let's start storing it
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                         writer);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  // Store the real huffman tree now.
+  StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree,
+                            huffman_tree_extra_bits, &code_length_bitdepth[0],
+                            code_length_bitdepth_symbols, writer);
+}
+
+}  // namespace
+
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              BitWriter* writer) {
+  size_t count = 0;
+  size_t s4[4] = {0};
+  for (size_t i = 0; i < length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  size_t max_bits_counter = length - 1;
+  size_t max_bits = 0;
+  while (max_bits_counter) {
+    max_bits_counter >>= 1;
+    ++max_bits;
+  }
+
+  if (count <= 1) {
+    // Output symbol bits and depths are initialized with 0, nothing to do.
+    writer->Write(4, 1);
+    writer->Write(max_bits, s4[0]);
+    return;
+  }
+
+  CreateHuffmanTree(histogram, length, 15, depth);
+  ConvertBitDepthsToSymbols(depth, length, bits);
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
+  } else {
+    StoreHuffmanTree(depth, length, writer);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h
new file mode 100644
index 0000000000..d7a66584e8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HUFFMAN_H_
+#define LIB_JXL_ENC_HUFFMAN_H_
+
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Builds a Huffman tree for the given histogram, and encodes it into writer
+// in a format that can be read by HuffmanDecodingData::ReadFromBitstream.
+// An allotment for `writer` must already have been created by the caller.
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              BitWriter* writer);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_HUFFMAN_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc
new file mode 100644
index 0000000000..5c40dea770
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc
@@ -0,0 +1,328 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_huffman_tree.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level) {
+  if (p.index_left >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value] = level;
+  }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+  return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+                       const int tree_limit, uint8_t* depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (uint32_t count_limit = 1;; count_limit *= 2) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (size_t i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = std::max(data[i], count_limit - 1);
+        tree.emplace_back(count, -1, static_cast<int16_t>(i));
+      }
+    }
+
+    const size_t n = tree.size();
+    if (n == 1) {
+      // Fake value; will be fixed on upper level.
+      depth[tree[0].index_right_or_value] = 1;
+      break;
+    }
+
+    std::stable_sort(tree.begin(), tree.end(), Compare);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count <= tree[j].total_count) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count <= tree[j].total_count) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      size_t j_end = tree.size() - 1;
+      tree[j_end].total_count =
+          tree[left].total_count + tree[right].total_count;
+      tree[j_end].index_left = static_cast<int16_t>(left);
+      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    JXL_DASSERT(tree.size() == 2 * n + 1);
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
+                                 const uint8_t value, size_t repetitions,
+                                 size_t* tree_size, uint8_t* tree,
+                                 uint8_t* extra_bits_data) {
+  JXL_DASSERT(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 16;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
+                                      uint8_t* tree, uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 17;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             bool* use_rle_for_non_zero,
+                             bool* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  for (size_t i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+}
+
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+                      uint8_t* tree, uint8_t* extra_bits_data) {
+  uint8_t previous_value = 8;
+
+  // Throw away trailing zeros.
+  size_t new_length = length;
+  for (size_t i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  // First gather statistics on if it is a good idea to do rle.
+  bool use_rle_for_non_zero = false;
+  bool use_rle_for_zero = false;
+  if (length > 50) {
+    // Find rle coding for longer codes.
+    // Shorter codes seem not to benefit from rle.
+    DecideOverRleUse(depth, new_length, &use_rle_for_non_zero,
+                     &use_rle_for_zero);
+  }
+
+  // Actual rle coding.
+  for (size_t i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree,
+                                  extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {// Pre-reversed 4-bit values.
+                                  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+                                  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = static_cast<uint16_t>(bits >> 4);
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return static_cast<uint16_t>(retval);
+}
+
+}  // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+                               uint16_t* bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = {0};
+  {
+    for (size_t i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (size_t i = 1; i < kMaxBits; ++i) {
+      code = (code + bl_count[i - 1]) << 1;
+      next_code[i] = static_cast<uint16_t>(code);
+    }
+  }
+  for (size_t i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h
new file mode 100644
index 0000000000..7d716cd3b5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library for creating Huffman codes from population counts.
+
+#ifndef LIB_JXL_HUFFMAN_TREE_H_
+#define LIB_JXL_HUFFMAN_TREE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count(count), index_left(left), index_right_or_value(right) {}
+  uint32_t total_count;
+  int16_t index_left;
+  int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit,
+                       uint8_t* depth);
+
+// Write a Huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+                      uint8_t* tree, uint8_t* extra_bits_data);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+                               uint16_t* bits);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HUFFMAN_TREE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc
new file mode 100644
index 0000000000..a6782f6a45
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc
@@ -0,0 +1,406 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+// Unshuffles or de-interleaves bytes, for example with width 2, turns
+// "AaBbCcDc" into "ABCDabcd", this for example de-interleaves UTF-16 bytes into
+// first all the high order bytes, then all the low order bytes.
+// Transposes a matrix of width columns and ceil(size / width) rows. There are
+// size elements, size may be < width * height, if so the
+// last elements of the bottom row are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the bottom of the rightmost column. The input is the input matrix
+// in scanline order, the output is the result matrix in scanline order, with
+// missing elements skipped over (this may occur at multiple positions).
+void Unshuffle(uint8_t* data, size_t size, size_t width) {
+  size_t height = (size + width - 1) / width;  // amount of rows of input
+  PaddedBytes result(size);
+  // i = input index, j output index
+  size_t s = 0, j = 0;
+  for (size_t i = 0; i < size; i++) {
+    result[j] = data[i];
+    j += height;
+    if (j >= size) j = ++s;
+  }
+
+  for (size_t i = 0; i < size; i++) {
+    data[i] = result[i];
+  }
+}
+
+// This is performed by the encoder, the encoder must be able to encode any
+// random byte stream (not just byte streams that are a valid ICC profile), so
+// an error returned by this function is an implementation error.
+Status PredictAndShuffle(size_t stride, size_t width, int order, size_t num,
+                         const uint8_t* data, size_t size, size_t* pos,
+                         PaddedBytes* result) {
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(*pos, num, size));
+  // Required by the specification, see decoder. stride * 4 must be < *pos.
+  if (!*pos || ((*pos - 1u) >> 2u) < stride) {
+    return JXL_FAILURE("Invalid stride");
+  }
+  if (*pos < stride * 4) return JXL_FAILURE("Too large stride");
+  size_t start = result->size();
+  for (size_t i = 0; i < num; i++) {
+    uint8_t predicted =
+        LinearPredictICCValue(data, *pos, i, stride, width, order);
+    result->push_back(data[*pos + i] - predicted);
+  }
+  *pos += num;
+  if (width > 1) Unshuffle(result->data() + start, num, width);
+  return true;
+}
+}  // namespace
+
+// Outputs a transformed form of the given icc profile. The result itself is
+// not particularly smaller than the input data in bytes, but it will be in a
+// form that is easier to compress (more zeroes, ...) and will compress better
+// with brotli.
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
+  PaddedBytes commands;
+  PaddedBytes data;
+
+  EncodeVarInt(size, result);
+
+  // Header
+  PaddedBytes header = ICCInitialHeaderPrediction();
+  EncodeUint32(0, size, &header);
+  for (size_t i = 0; i < kICCHeaderSize && i < size; i++) {
+    ICCPredictHeader(icc, size, header.data(), i);
+    data.push_back(icc[i] - header[i]);
+  }
+  if (size <= kICCHeaderSize) {
+    EncodeVarInt(0, result);  // 0 commands
+    for (size_t i = 0; i < data.size(); i++) {
+      result->push_back(data[i]);
+    }
+    return true;
+  }
+
+  std::vector<Tag> tags;
+  std::vector<size_t> tagstarts;
+  std::vector<size_t> tagsizes;
+  std::map<size_t, size_t> tagmap;
+
+  // Tag list
+  size_t pos = kICCHeaderSize;
+  if (pos + 4 <= size) {
+    uint64_t numtags = DecodeUint32(icc, size, pos);
+    pos += 4;
+    EncodeVarInt(numtags + 1, &commands);
+    uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+    uint32_t prevtagsize = 0;
+    for (size_t i = 0; i < numtags; i++) {
+      if (pos + 12 > size) break;
+
+      Tag tag = DecodeKeyword(icc, size, pos + 0);
+      uint32_t tagstart = DecodeUint32(icc, size, pos + 4);
+      uint32_t tagsize = DecodeUint32(icc, size, pos + 8);
+      pos += 12;
+
+      tags.push_back(tag);
+      tagstarts.push_back(tagstart);
+      tagsizes.push_back(tagsize);
+      tagmap[tagstart] = tags.size() - 1;
+
+      uint8_t tagcode = kCommandTagUnknown;
+      for (size_t j = 0; j < kNumTagStrings; j++) {
+        if (tag == *kTagStrings[j]) {
+          tagcode = j + kCommandTagStringFirst;
+          break;
+        }
+      }
+
+      if (tag == kRtrcTag && pos + 24 < size) {
+        bool ok = true;
+        ok &= DecodeKeyword(icc, size, pos + 0) == kGtrcTag;
+        ok &= DecodeKeyword(icc, size, pos + 12) == kBtrcTag;
+        if (ok) {
+          for (size_t kk = 0; kk < 8; kk++) {
+            if (icc[pos - 8 + kk] != icc[pos + 4 + kk]) ok = false;
+            if (icc[pos - 8 + kk] != icc[pos + 16 + kk]) ok = false;
+          }
+        }
+        if (ok) {
+          tagcode = kCommandTagTRC;
+          pos += 24;
+          i += 2;
+        }
+      }
+
+      if (tag == kRxyzTag && pos + 24 < size) {
+        bool ok = true;
+        ok &= DecodeKeyword(icc, size, pos + 0) == kGxyzTag;
+        ok &= DecodeKeyword(icc, size, pos + 12) == kBxyzTag;
+        uint32_t offsetr = tagstart;
+        uint32_t offsetg = DecodeUint32(icc, size, pos + 4);
+        uint32_t offsetb = DecodeUint32(icc, size, pos + 16);
+        uint32_t sizer = tagsize;
+        uint32_t sizeg = DecodeUint32(icc, size, pos + 8);
+        uint32_t sizeb = DecodeUint32(icc, size, pos + 20);
+        ok &= sizer == 20;
+        ok &= sizeg == 20;
+        ok &= sizeb == 20;
+        ok &= (offsetg == offsetr + 20);
+        ok &= (offsetb == offsetr + 40);
+        if (ok) {
+          tagcode = kCommandTagXYZ;
+          pos += 24;
+          i += 2;
+        }
+      }
+
+      uint8_t command = tagcode;
+      uint64_t predicted_tagstart = prevtagstart + prevtagsize;
+      if (predicted_tagstart != tagstart) command |= kFlagBitOffset;
+      size_t predicted_tagsize = prevtagsize;
+      if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+          tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+          tag == kLumiTag) {
+        predicted_tagsize = 20;
+      }
+      if (predicted_tagsize != tagsize) command |= kFlagBitSize;
+      commands.push_back(command);
+      if (tagcode == 1) {
+        AppendKeyword(tag, &data);
+      }
+      if (command & kFlagBitOffset) EncodeVarInt(tagstart, &commands);
+      if (command & kFlagBitSize) EncodeVarInt(tagsize, &commands);
+
+      prevtagstart = tagstart;
+      prevtagsize = tagsize;
+    }
+  }
+  // Indicate end of tag list or varint indicating there's none
+  commands.push_back(0);
+
+  // Main content
+  // The main content in a valid ICC profile contains tagged elements, with the
+  // tag types (4 letter names) given by the tag list above, and the tag list
+  // pointing to the start and indicating the size of each tagged element. It is
+  // allowed for tagged elements to overlap, e.g. the curve for R, G and B could
+  // all point to the same one.
+  Tag tag;
+  size_t tagstart = 0, tagsize = 0, clutstart = 0;
+
+  size_t last0 = pos;
+  // This loop appends commands to the output, processing some sub-section of a
+  // current tagged element each time. We need to keep track of the tagtype of
+  // the current element, and update it when we encounter the boundary of a
+  // next one.
+  // It is not required that the input data is a valid ICC profile, if the
+  // encoder does not recognize the data it will still be able to output bytes
+  // but will not predict as well.
+  while (pos <= size) {
+    size_t last1 = pos;
+    PaddedBytes commands_add;
+    PaddedBytes data_add;
+
+    // This means the loop brought the position beyond the tag end.
+    if (pos > tagstart + tagsize) {
+      tag = {{0, 0, 0, 0}};  // nonsensical value
+    }
+
+    if (commands_add.empty() && data_add.empty() && tagmap.count(pos) &&
+        pos + 4 <= size) {
+      size_t index = tagmap[pos];
+      tag = DecodeKeyword(icc, size, pos);
+      tagstart = tagstarts[index];
+      tagsize = tagsizes[index];
+
+      if (tag == kMlucTag && pos + tagsize <= size && tagsize > 8 &&
+          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+          icc[pos + 7] == 0) {
+        size_t num = tagsize - 8;
+        commands_add.push_back(kCommandTypeStartFirst + 3);
+        pos += 8;
+        commands_add.push_back(kCommandShuffle2);
+        EncodeVarInt(num, &commands_add);
+        size_t start = data_add.size();
+        for (size_t i = 0; i < num; i++) {
+          data_add.push_back(icc[pos]);
+          pos++;
+        }
+        Unshuffle(data_add.data() + start, num, 2);
+      }
+
+      if (tag == kCurvTag && pos + tagsize <= size && tagsize > 8 &&
+          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+          icc[pos + 7] == 0) {
+        size_t num = tagsize - 8;
+        if (num > 16 && num < (1 << 28) && pos + num <= size && pos > 0) {
+          commands_add.push_back(kCommandTypeStartFirst + 5);
+          pos += 8;
+          commands_add.push_back(kCommandPredict);
+          int order = 1, width = 2, stride = width;
+          commands_add.push_back((order << 2) | (width - 1));
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+    }
+
+    if (tag == kMab_Tag || tag == kMba_Tag) {
+      Tag subTag = DecodeKeyword(icc, size, pos);
+      if (pos + 12 < size && (subTag == kCurvTag || subTag == kVcgtTag) &&
+          DecodeUint32(icc, size, pos + 4) == 0) {
+        uint32_t num = DecodeUint32(icc, size, pos + 8) * 2;
+        if (num > 16 && num < (1 << 28) && pos + 12 + num <= size) {
+          pos += 12;
+          last1 = pos;
+          commands_add.push_back(kCommandPredict);
+          int order = 1, width = 2, stride = width;
+          commands_add.push_back((order << 2) | (width - 1));
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+
+      if (pos == tagstart + 24 && pos + 4 < size) {
+        // Note that this value can be remembered for next iterations of the
+        // loop, so the "pos == clutstart" if below can trigger during a later
+        // iteration.
+        clutstart = tagstart + DecodeUint32(icc, size, pos);
+      }
+
+      if (pos == clutstart && clutstart + 16 < size) {
+        size_t numi = icc[tagstart + 8];
+        size_t numo = icc[tagstart + 9];
+        size_t width = icc[clutstart + 16];
+        size_t stride = width * numo;
+        size_t num = width * numo;
+        for (size_t i = 0; i < numi && clutstart + i < size; i++) {
+          num *= icc[clutstart + i];
+        }
+        if ((width == 1 || width == 2) && num > 64 && num < (1 << 28) &&
+            pos + num <= size && pos > stride * 4) {
+          commands_add.push_back(kCommandPredict);
+          int order = 1;
+          uint8_t flags =
+              (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+          commands_add.push_back(flags);
+          if (flags & 16) EncodeVarInt(stride, &commands_add);
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+    }
+
+    if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag &&
+        pos == tagstart + 8 && pos + tagsize - 8 <= size && pos > 16 &&
+        tagsize > 8) {
+      size_t width = 4, order = 0, stride = width;
+      size_t num = tagsize - 8;
+      uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+      commands_add.push_back(kCommandPredict);
+      commands_add.push_back(flags);
+      if (flags & 16) EncodeVarInt(stride, &commands_add);
+      EncodeVarInt(num, &commands_add);
+      JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                            size, &pos, &data_add));
+    }
+
+    if (commands_add.empty() && data_add.empty() && pos + 20 <= size) {
+      Tag subTag = DecodeKeyword(icc, size, pos);
+      if (subTag == kXyz_Tag && DecodeUint32(icc, size, pos + 4) == 0) {
+        commands_add.push_back(kCommandXYZ);
+        pos += 8;
+        for (size_t j = 0; j < 12; j++) data_add.push_back(icc[pos++]);
+      }
+    }
+
+    if (commands_add.empty() && data_add.empty() && pos + 8 <= size) {
+      if (DecodeUint32(icc, size, pos + 4) == 0) {
+        Tag subTag = DecodeKeyword(icc, size, pos);
+        for (size_t i = 0; i < kNumTypeStrings; i++) {
+          if (subTag == *kTypeStrings[i]) {
+            commands_add.push_back(kCommandTypeStartFirst + i);
+            pos += 8;
+            break;
+          }
+        }
+      }
+    }
+
+    if (!(commands_add.empty() && data_add.empty()) || pos == size) {
+      if (last0 < last1) {
+        commands.push_back(kCommandInsert);
+        EncodeVarInt(last1 - last0, &commands);
+        while (last0 < last1) {
+          data.push_back(icc[last0++]);
+        }
+      }
+      for (size_t i = 0; i < commands_add.size(); i++) {
+        commands.push_back(commands_add[i]);
+      }
+      for (size_t i = 0; i < data_add.size(); i++) {
+        data.push_back(data_add[i]);
+      }
+      last0 = pos;
+    }
+    if (commands_add.empty() && data_add.empty()) {
+      pos++;
+    }
+  }
+
+  EncodeVarInt(commands.size(), result);
+  for (size_t i = 0; i < commands.size(); i++) {
+    result->push_back(commands[i]);
+  }
+  for (size_t i = 0; i < data.size(); i++) {
+    result->push_back(data[i]);
+  }
+
+  return true;
+}
+
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+                size_t layer, AuxOut* JXL_RESTRICT aux_out) {
+  if (icc.empty()) return JXL_FAILURE("ICC must be non-empty");
+  PaddedBytes enc;
+  JXL_RETURN_IF_ERROR(PredictICC(icc.data(), icc.size(), &enc));
+  std::vector<std::vector<Token>> tokens(1);
+  BitWriter::Allotment allotment(writer, 128);
+  JXL_RETURN_IF_ERROR(U64Coder::Write(enc.size(), writer));
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+
+  for (size_t i = 0; i < enc.size(); i++) {
+    tokens[0].emplace_back(
+        ICCANSContext(i, i > 0 ? enc[i - 1] : 0, i > 1 ? enc[i - 2] : 0),
+        enc[i]);
+  }
+  HistogramParams params;
+  params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal
+                                         : HistogramParams::LZ77Method::kLZ77;
+  EntropyEncodingData code;
+  std::vector<uint8_t> context_map;
+  params.force_huffman = true;
+  BuildAndEncodeHistograms(params, kNumICCContexts, tokens, &code, &context_map,
+                           writer, layer, aux_out);
+  WriteTokens(tokens[0], code, context_map, writer, layer, aux_out);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h
new file mode 100644
index 0000000000..c22cf5994e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ICC_CODEC_H_
+#define LIB_JXL_ENC_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Should still be called if `icc.empty()` - if so, writes only 1 bit.
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+                size_t layer, AuxOut* JXL_RESTRICT aux_out);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ICC_CODEC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc
new file mode 100644
index 0000000000..0eab1c3e08
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc
@@ -0,0 +1,155 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_image_bundle.h"
+
+#include <jxl/cms_interface.h>
+
+#include <atomic>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+namespace {
+
+// Copies ib:rect, converts, and copies into out.
+Status CopyToT(const ImageMetadata* metadata, const ImageBundle* ib,
+               const Rect& rect, const ColorEncoding& c_desired,
+               const JxlCmsInterface& cms, ThreadPool* pool, Image3F* out) {
+  ColorSpaceTransform c_transform(cms);
+  // Changing IsGray is probably a bug.
+  JXL_CHECK(ib->IsGray() == c_desired.IsGray());
+  bool is_gray = ib->IsGray();
+  if (out->xsize() < rect.xsize() || out->ysize() < rect.ysize()) {
+    *out = Image3F(rect.xsize(), rect.ysize());
+  } else {
+    out->ShrinkTo(rect.xsize(), rect.ysize());
+  }
+  std::atomic<bool> ok{true};
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, rect.ysize(),
+      [&](const size_t num_threads) {
+        return c_transform.Init(ib->c_current(), c_desired,
+                                metadata->IntensityTarget(), rect.xsize(),
+                                num_threads);
+      },
+      [&](const uint32_t y, const size_t thread) {
+        float* mutable_src_buf = c_transform.BufSrc(thread);
+        const float* src_buf = mutable_src_buf;
+        // Interleave input.
+        if (is_gray) {
+          src_buf = rect.ConstPlaneRow(ib->color(), 0, y);
+        } else if (ib->c_current().IsCMYK()) {
+          if (!ib->HasBlack()) {
+            ok.store(false);
+            return;
+          }
+          const float* JXL_RESTRICT row_in0 =
+              rect.ConstPlaneRow(ib->color(), 0, y);
+          const float* JXL_RESTRICT row_in1 =
+              rect.ConstPlaneRow(ib->color(), 1, y);
+          const float* JXL_RESTRICT row_in2 =
+              rect.ConstPlaneRow(ib->color(), 2, y);
+          const float* JXL_RESTRICT row_in3 = rect.ConstRow(ib->black(), y);
+          for (size_t x = 0; x < rect.xsize(); x++) {
+            // CMYK convention in JXL: 0 = max ink, 1 = white
+            mutable_src_buf[4 * x + 0] = row_in0[x];
+            mutable_src_buf[4 * x + 1] = row_in1[x];
+            mutable_src_buf[4 * x + 2] = row_in2[x];
+            mutable_src_buf[4 * x + 3] = row_in3[x];
+          }
+        } else {
+          const float* JXL_RESTRICT row_in0 =
+              rect.ConstPlaneRow(ib->color(), 0, y);
+          const float* JXL_RESTRICT row_in1 =
+              rect.ConstPlaneRow(ib->color(), 1, y);
+          const float* JXL_RESTRICT row_in2 =
+              rect.ConstPlaneRow(ib->color(), 2, y);
+          for (size_t x = 0; x < rect.xsize(); x++) {
+            mutable_src_buf[3 * x + 0] = row_in0[x];
+            mutable_src_buf[3 * x + 1] = row_in1[x];
+            mutable_src_buf[3 * x + 2] = row_in2[x];
+          }
+        }
+        float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread);
+        if (!c_transform.Run(thread, src_buf, dst_buf)) {
+          ok.store(false);
+          return;
+        }
+        float* JXL_RESTRICT row_out0 = out->PlaneRow(0, y);
+        float* JXL_RESTRICT row_out1 = out->PlaneRow(1, y);
+        float* JXL_RESTRICT row_out2 = out->PlaneRow(2, y);
+        // De-interleave output and convert type.
+        if (is_gray) {
+          for (size_t x = 0; x < rect.xsize(); x++) {
+            row_out0[x] = dst_buf[x];
+            row_out1[x] = dst_buf[x];
+            row_out2[x] = dst_buf[x];
+          }
+        } else {
+          for (size_t x = 0; x < rect.xsize(); x++) {
+            row_out0[x] = dst_buf[3 * x + 0];
+            row_out1[x] = dst_buf[3 * x + 1];
+            row_out2[x] = dst_buf[3 * x + 2];
+          }
+        }
+      },
+      "Colorspace transform"));
+  return ok.load();
+}
+
+}  // namespace
+
+Status ImageBundle::TransformTo(const ColorEncoding& c_desired,
+                                const JxlCmsInterface& cms, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(CopyTo(Rect(color_), c_desired, cms, &color_, pool));
+  c_current_ = c_desired;
+  return true;
+}
+Status ImageBundle::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                           const JxlCmsInterface& cms, Image3F* out,
+                           ThreadPool* pool) const {
+  return CopyToT(metadata_, this, rect, c_desired, cms, pool, out);
+}
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+                         const JxlCmsInterface& cms, ThreadPool* pool,
+                         ImageBundle* store, const ImageBundle** out) {
+  if (in.c_current().SameColorEncoding(c_desired) && !in.HasBlack()) {
+    *out = &in;
+    return true;
+  }
+  // TODO(janwas): avoid copying via createExternal+copyBackToIO
+  // instead of copy+createExternal+copyBackToIO
+  Image3F color(in.color().xsize(), in.color().ysize());
+  CopyImageTo(in.color(), &color);
+  store->SetFromImage(std::move(color), in.c_current());
+
+  // Must at least copy the alpha channel for use by external_image.
+  if (in.HasExtraChannels()) {
+    std::vector<ImageF> extra_channels;
+    for (const ImageF& extra_channel : in.extra_channels()) {
+      ImageF ec(extra_channel.xsize(), extra_channel.ysize());
+      CopyImageTo(extra_channel, &ec);
+      extra_channels.emplace_back(std::move(ec));
+    }
+    store->SetExtraChannels(std::move(extra_channels));
+  }
+
+  if (!store->TransformTo(c_desired, cms, pool)) {
+    return false;
+  }
+  *out = store;
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h
new file mode 100644
index 0000000000..85f8e14e1c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_IMAGE_BUNDLE_H_
+#define LIB_JXL_ENC_IMAGE_BUNDLE_H_
+
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Does color transformation from in.c_current() to c_desired if the color
+// encodings are different, or nothing if they are already the same.
+// If color transformation is done, stores the transformed values into store and
+// sets the out pointer to store, else leaves store untouched and sets the out
+// pointer to &in.
+// Returns false if color transform fails.
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+                         const JxlCmsInterface& cms, ThreadPool* pool,
+                         ImageBundle* store, const ImageBundle** out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_IMAGE_BUNDLE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h b/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h
new file mode 100644
index 0000000000..3c364e883d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h
@@ -0,0 +1,54 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_JXL_SKCMS_H_
+#define LIB_JXL_ENC_JXL_SKCMS_H_
+
+// skcms wrapper to rename the skcms symbols to avoid conflicting names with
+// other projects using skcms as well. When using JPEGXL_BUNDLE_SKCMS the
+// bundled functions will be renamed from skcms_ to jxl_skcms_
+
+#ifdef SKCMS_API
+#error "Must include enc_jxl_skcms.h and not skcms.h directly"
+#endif  // SKCMS_API
+
+#if JPEGXL_BUNDLE_SKCMS
+
+#define skcms_252_random_bytes jxl_skcms_252_random_bytes
+#define skcms_AdaptToXYZD50 jxl_skcms_AdaptToXYZD50
+#define skcms_ApproximateCurve jxl_skcms_ApproximateCurve
+#define skcms_ApproximatelyEqualProfiles jxl_skcms_ApproximatelyEqualProfiles
+#define skcms_AreApproximateInverses jxl_skcms_AreApproximateInverses
+#define skcms_GetCHAD jxl_skcms_GetCHAD
+#define skcms_GetTagByIndex jxl_skcms_GetTagByIndex
+#define skcms_GetTagBySignature jxl_skcms_GetTagBySignature
+#define skcms_GetWTPT jxl_skcms_GetWTPT
+#define skcms_Identity_TransferFunction jxl_skcms_Identity_TransferFunction
+#define skcms_MakeUsableAsDestination jxl_skcms_MakeUsableAsDestination
+#define skcms_MakeUsableAsDestinationWithSingleCurve \
+  jxl_skcms_MakeUsableAsDestinationWithSingleCurve
+#define skcms_Matrix3x3_concat jxl_skcms_Matrix3x3_concat
+#define skcms_Matrix3x3_invert jxl_skcms_Matrix3x3_invert
+#define skcms_MaxRoundtripError jxl_skcms_MaxRoundtripError
+#define skcms_Parse jxl_skcms_Parse
+#define skcms_PrimariesToXYZD50 jxl_skcms_PrimariesToXYZD50
+#define skcms_sRGB_Inverse_TransferFunction \
+  jxl_skcms_sRGB_Inverse_TransferFunction
+#define skcms_sRGB_profile jxl_skcms_sRGB_profile
+#define skcms_sRGB_TransferFunction jxl_skcms_sRGB_TransferFunction
+#define skcms_TransferFunction_eval jxl_skcms_TransferFunction_eval
+#define skcms_TransferFunction_invert jxl_skcms_TransferFunction_invert
+#define skcms_TransferFunction_makeHLGish jxl_skcms_TransferFunction_makeHLGish
+#define skcms_TransferFunction_makePQish jxl_skcms_TransferFunction_makePQish
+#define skcms_Transform jxl_skcms_Transform
+#define skcms_TransformWithPalette jxl_skcms_TransformWithPalette
+#define skcms_TRCs_AreApproximateInverse jxl_skcms_TRCs_AreApproximateInverse
+#define skcms_XYZD50_profile jxl_skcms_XYZD50_profile
+
+#endif  // JPEGXL_BUNDLE_SKCMS
+
+#include "skcms.h"
+
+#endif  // LIB_JXL_ENC_JXL_SKCMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc
new file mode 100644
index 0000000000..fe2090a909
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_linalg.h"
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void ConvertToDiagonal(const ImageD& A, ImageD* const JXL_RESTRICT diag,
+                       ImageD* const JXL_RESTRICT U) {
+#if JXL_ENABLE_ASSERT
+  JXL_ASSERT(A.xsize() == 2);
+  JXL_ASSERT(A.ysize() == 2);
+  JXL_ASSERT(std::abs(A.Row(0)[1] - A.Row(1)[0]) < 1e-15);
+#endif
+
+  if (std::abs(A.ConstRow(0)[1]) < 1e-15) {
+    // Already diagonal.
+    diag->Row(0)[0] = A.ConstRow(0)[0];
+    diag->Row(0)[1] = A.ConstRow(1)[1];
+    U->Row(0)[0] = U->Row(1)[1] = 1.0;
+    U->Row(0)[1] = U->Row(1)[0] = 0.0;
+    return;
+  }
+  double b = -(A.Row(0)[0] + A.Row(1)[1]);
+  double c = A.Row(0)[0] * A.Row(1)[1] - A.Row(0)[1] * A.Row(0)[1];
+  double d = b * b - 4.0 * c;
+  double sqd = std::sqrt(d);
+  double l1 = (-b - sqd) * 0.5;
+  double l2 = (-b + sqd) * 0.5;
+
+  double v1[2] = {A.Row(0)[0] - l1, A.Row(1)[0]};
+  double v1n = 1.0 / std::hypot(v1[0], v1[1]);
+  v1[0] = v1[0] * v1n;
+  v1[1] = v1[1] * v1n;
+
+  diag->Row(0)[0] = l1;
+  diag->Row(0)[1] = l2;
+
+  U->Row(0)[0] = v1[1];
+  U->Row(0)[1] = -v1[0];
+  U->Row(1)[0] = v1[0];
+  U->Row(1)[1] = v1[1];
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h
new file mode 100644
index 0000000000..791770d5d4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LINALG_H_
+#define LIB_JXL_LINALG_H_
+
+// Linear algebra.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+using ImageD = Plane<double>;
+
+// A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U).
+void ConvertToDiagonal(const ImageD& A, ImageD* JXL_RESTRICT diag,
+                       ImageD* JXL_RESTRICT U);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LINALG_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc
new file mode 100644
index 0000000000..967b9a3afb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc
@@ -0,0 +1,118 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_linalg.h"
+
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+ImageD Identity(const size_t N) {
+  ImageD out(N, N);
+  for (size_t i = 0; i < N; ++i) {
+    double* JXL_RESTRICT row = out.Row(i);
+    std::fill(row, row + N, 0);
+    row[i] = 1.0;
+  }
+  return out;
+}
+
+ImageD Diagonal(const ImageD& d) {
+  JXL_ASSERT(d.ysize() == 1);
+  ImageD out(d.xsize(), d.xsize());
+  const double* JXL_RESTRICT row_diag = d.Row(0);
+  for (size_t k = 0; k < d.xsize(); ++k) {
+    double* JXL_RESTRICT row_out = out.Row(k);
+    std::fill(row_out, row_out + d.xsize(), 0.0);
+    row_out[k] = row_diag[k];
+  }
+  return out;
+}
+
+ImageD MatMul(const ImageD& A, const ImageD& B) {
+  JXL_ASSERT(A.ysize() == B.xsize());
+  ImageD out(A.xsize(), B.ysize());
+  for (size_t y = 0; y < B.ysize(); ++y) {
+    const double* const JXL_RESTRICT row_b = B.Row(y);
+    double* const JXL_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < A.xsize(); ++x) {
+      row_out[x] = 0.0;
+      for (size_t k = 0; k < B.xsize(); ++k) {
+        row_out[x] += A.Row(k)[x] * row_b[k];
+      }
+    }
+  }
+  return out;
+}
+
+ImageD Transpose(const ImageD& A) {
+  ImageD out(A.ysize(), A.xsize());
+  for (size_t x = 0; x < A.xsize(); ++x) {
+    double* const JXL_RESTRICT row_out = out.Row(x);
+    for (size_t y = 0; y < A.ysize(); ++y) {
+      row_out[y] = A.Row(y)[x];
+    }
+  }
+  return out;
+}
+
+ImageD RandomSymmetricMatrix(const size_t N, Rng& rng, const double vmin,
+                             const double vmax) {
+  ImageD A(N, N);
+  GenerateImage(rng, &A, vmin, vmax);
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      A.Row(j)[i] = A.Row(i)[j];
+    }
+  }
+  return A;
+}
+
+void VerifyMatrixEqual(const ImageD& A, const ImageD& B, const double eps) {
+  ASSERT_EQ(A.xsize(), B.xsize());
+  ASSERT_EQ(A.ysize(), B.ysize());
+  for (size_t y = 0; y < A.ysize(); ++y) {
+    for (size_t x = 0; x < A.xsize(); ++x) {
+      ASSERT_NEAR(A.Row(y)[x], B.Row(y)[x], eps);
+    }
+  }
+}
+
+void VerifyOrthogonal(const ImageD& A, const double eps) {
+  VerifyMatrixEqual(Identity(A.xsize()), MatMul(Transpose(A), A), eps);
+}
+
+TEST(LinAlgTest, ConvertToDiagonal) {
+  {
+    ImageD I = Identity(2);
+    ImageD U(2, 2), d(2, 1);
+    ConvertToDiagonal(I, &d, &U);
+    VerifyMatrixEqual(I, U, 1e-15);
+    for (size_t k = 0; k < 2; ++k) {
+      ASSERT_NEAR(d.Row(0)[k], 1.0, 1e-15);
+    }
+  }
+  {
+    ImageD A = Identity(2);
+    A.Row(0)[1] = A.Row(1)[0] = 2.0;
+    ImageD U(2, 2), d(2, 1);
+    ConvertToDiagonal(A, &d, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+  }
+  Rng rng(0);
+  for (size_t i = 0; i < 100; ++i) {
+    ImageD A = RandomSymmetricMatrix(2, rng, -1.0, 1.0);
+    ImageD U(2, 2), d(2, 1);
+    ConvertToDiagonal(A, &d, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc b/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc
new file mode 100644
index 0000000000..3c323404b7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc
@@ -0,0 +1,1747 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_modular.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <atomic>
+#include <limits>
+#include <queue>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+// constexpr bool kPrintTree = false;
+
+// Squeeze default quantization factors
+// these quantization factors are for -Q 50  (other qualities simply scale the
+// factors; things are rounded down and obviously cannot get below 1)
+static const float squeeze_quality_factor =
+    0.35;  // for easy tweaking of the quality range (decrease this number for
+           // higher quality)
+static const float squeeze_luma_factor =
+    1.1;  // for easy tweaking of the balance between luma (or anything
+          // non-chroma) and chroma (decrease this number for higher quality
+          // luma)
+static const float squeeze_quality_factor_xyb = 2.4f;
+static const float squeeze_xyb_qtable[3][16] = {
+    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
+     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
+    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
+     0.5},  // X
+    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
+     0.5},  // B-Y
+};
+
+static const float squeeze_luma_qtable[16] = {
+    163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28,
+    0.64,   0.32,  0.16,  0.08,  0.04,  0.02, 0.01, 0.005};
+// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
+// does 4:2:0 subsampling (two most fine grained layers get quantized away)
+static const float squeeze_chroma_qtable[16] = {
+    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
+
+// `cutoffs` must be sorted.
+Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs,
+                   Predictor pred, size_t num_pixels) {
+  size_t log_px = CeilLog2Nonzero(num_pixels);
+  size_t min_gap = 0;
+  // Reduce fixed tree height when encoding small images.
+  if (log_px < 14) {
+    min_gap = 8 * (14 - log_px);
+  }
+  Tree tree;
+  struct NodeInfo {
+    size_t begin, end, pos;
+  };
+  std::queue<NodeInfo> q;
+  // Leaf IDs will be set by roundtrip decoding the tree.
+  tree.push_back(PropertyDecisionNode::Leaf(pred));
+  q.push(NodeInfo{0, cutoffs.size(), 0});
+  while (!q.empty()) {
+    NodeInfo info = q.front();
+    q.pop();
+    if (info.begin + min_gap >= info.end) continue;
+    uint32_t split = (info.begin + info.end) / 2;
+    tree[info.pos] =
+        PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
+    q.push(NodeInfo{split + 1, info.end, tree.size()});
+    tree.push_back(PropertyDecisionNode::Leaf(pred));
+    q.push(NodeInfo{info.begin, split, tree.size()});
+    tree.push_back(PropertyDecisionNode::Leaf(pred));
+  }
+  return tree;
+}
+
+Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
+  if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta ||
+      tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) {
+    // All the data is 0, so no need for a fancy tree.
+    return {PropertyDecisionNode::Leaf(Predictor::Zero)};
+  }
+  if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
+    // All the data is 0 except the quant field. TODO(veluca): make that 0 too.
+    return {PropertyDecisionNode::Leaf(Predictor::Left)};
+  }
+  if (tree_kind == ModularOptions::TreeKind::kACMeta) {
+    // Small image.
+    if (total_pixels < 1024) {
+      return {PropertyDecisionNode::Leaf(Predictor::Left)};
+    }
+    Tree tree;
+    // 0: c > 1
+    tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
+    // 1: c > 2
+    tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
+    // 2: c > 0
+    tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
+    // 3: EPF control field (all 0 or 4), top > 0
+    tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
+    // 4: ACS+QF, y > 0
+    tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
+    // 5: CfL x
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+    // 6: CfL b
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+    // 7: QF: split according to the left quant value.
+    tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
+    // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
+    // rectangular 6-11, 8x8 12+), according to previous ACS value.
+    tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
+    // QF
+    tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
+    tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    // ACS
+    tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
+    tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    // EPF, left > 0
+    tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
+    tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    return tree;
+  }
+  if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
+    std::vector<int32_t> cutoffs = {
+        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
+        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
+    return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels);
+  }
+  if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
+    std::vector<int32_t> cutoffs = {
+        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
+        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
+    return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
+                         total_pixels);
+  }
+  JXL_UNREACHABLE("Unreachable");
+  return {};
+}
+
+// Merges the trees in `trees` using nodes that decide on stream_id, as defined
+// by `tree_splits`.
+void MergeTrees(const std::vector<Tree>& trees,
+                const std::vector<size_t>& tree_splits, size_t begin,
+                size_t end, Tree* tree) {
+  JXL_ASSERT(trees.size() + 1 == tree_splits.size());
+  JXL_ASSERT(end > begin);
+  JXL_ASSERT(end <= trees.size());
+  if (end == begin + 1) {
+    // Insert the tree, adding the opportune offset to all child nodes.
+    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
+    // them.
+    size_t sz = tree->size();
+    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
+    for (size_t i = sz; i < tree->size(); i++) {
+      (*tree)[i].lchild += sz;
+      (*tree)[i].rchild += sz;
+    }
+    return;
+  }
+  size_t mid = (begin + end) / 2;
+  size_t splitval = tree_splits[mid] - 1;
+  size_t cur = tree->size();
+  tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
+  (*tree)[cur].lchild = tree->size();
+  MergeTrees(trees, tree_splits, mid, end, tree);
+  (*tree)[cur].rchild = tree->size();
+  MergeTrees(trees, tree_splits, begin, mid, tree);
+}
+
+void QuantizeChannel(Channel& ch, const int q) {
+  if (q == 1) return;
+  for (size_t y = 0; y < ch.plane.ysize(); y++) {
+    pixel_type* row = ch.plane.Row(y);
+    for (size_t x = 0; x < ch.plane.xsize(); x++) {
+      if (row[x] < 0) {
+        row[x] = -((-row[x] + q / 2) / q) * q;
+      } else {
+        row[x] = ((row[x] + q / 2) / q) * q;
+      }
+    }
+  }
+}
+
+// convert binary32 float that corresponds to custom [bits]-bit float (with
+// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
+// fit in pixel_type
+Status float_to_int(const float* const row_in, pixel_type* const row_out,
+                    size_t xsize, unsigned int bits, unsigned int exp_bits,
+                    bool fp, double dfactor) {
+  JXL_ASSERT(sizeof(pixel_type) * 8 >= bits);
+  if (!fp) {
+    if (bits > 22) {
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
+      }
+    } else {
+      float factor = dfactor;
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
+      }
+    }
+    return true;
+  }
+  if (bits == 32 && fp) {
+    JXL_ASSERT(exp_bits == 8);
+    memcpy((void*)row_out, (const void*)row_in, 4 * xsize);
+    return true;
+  }
+
+  int exp_bias = (1 << (exp_bits - 1)) - 1;
+  int max_exp = (1 << exp_bits) - 1;
+  uint32_t sign = (1u << (bits - 1));
+  int mant_bits = bits - exp_bits - 1;
+  int mant_shift = 23 - mant_bits;
+  for (size_t x = 0; x < xsize; ++x) {
+    uint32_t f;
+    memcpy(&f, &row_in[x], 4);
+    int signbit = (f >> 31);
+    f &= 0x7fffffff;
+    if (f == 0) {
+      row_out[x] = (signbit ? sign : 0);
+      continue;
+    }
+    int exp = (f >> 23) - 127;
+    if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
+    int mantissa = (f & 0x007fffff);
+    // broke up the binary32 into its parts, now reassemble into
+    // arbitrary float
+    exp += exp_bias;
+    if (exp < 0) {  // will become a subnormal number
+      // add implicit leading 1 to mantissa
+      mantissa |= 0x00800000;
+      if (exp < -mant_bits) {
+        return JXL_FAILURE(
+            "Invalid float number: %g cannot be represented with %i "
+            "exp_bits and %i mant_bits (exp %i)",
+            row_in[x], exp_bits, mant_bits, exp);
+      }
+      mantissa >>= 1 - exp;
+      exp = 0;
+    }
+    // exp should be representable in exp_bits, otherwise input was
+    // invalid
+    if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
+    if (mantissa & ((1 << mant_shift) - 1)) {
+      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
+                         mantissa);
+    }
+    mantissa >>= mant_shift;
+    f = (signbit ? sign : 0);
+    f |= (exp << mant_bits);
+    f |= mantissa;
+    row_out[x] = (pixel_type)f;
+  }
+  return true;
+}
+}  // namespace
+
+ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header,
+                                         const CompressParams& cparams_orig)
+    : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) {
+  size_t num_streams =
+      ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
+  if (cparams_.ModularPartIsLossless()) {
+    switch (cparams_.decoding_speed_tier) {
+      case 0:
+        break;
+      case 1:
+        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+        break;
+      case 2: {
+        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
+        cparams_.options.predictor = Predictor::Gradient;
+        break;
+      }
+      case 3: {  // LZ77, no Gradient.
+        cparams_.options.nb_repeats = 0;
+        cparams_.options.predictor = Predictor::Gradient;
+        break;
+      }
+      default: {  // LZ77, no predictor.
+        cparams_.options.nb_repeats = 0;
+        cparams_.options.predictor = Predictor::Zero;
+        break;
+      }
+    }
+  }
+  if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive &&
+      cparams_.ModularPartIsLossless()) {
+    cparams_.options.tree_kind =
+        ModularOptions::TreeKind::kTrivialTreeNoPredictor;
+    cparams_.options.nb_repeats = 0;
+  }
+  stream_images_.resize(num_streams);
+
+  // use a sensible default if nothing explicit is specified:
+  // Squeeze for lossy, no squeeze for lossless
+  if (cparams_.responsive < 0) {
+    if (cparams_.ModularPartIsLossless()) {
+      cparams_.responsive = 0;
+    } else {
+      cparams_.responsive = 1;
+    }
+  }
+
+  cparams_.options.splitting_heuristics_node_threshold =
+      82 + 14 * static_cast<int>(cparams_.speed_tier);
+
+  {
+    // Set properties.
+    std::vector<uint32_t> prop_order;
+    if (cparams_.responsive) {
+      // Properties in order of their likelihood of being useful for Squeeze
+      // residuals.
+      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
+    } else {
+      // Same, but for the non-Squeeze case.
+      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
+      // if few groups, don't use group as a property
+      if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise) {
+        prop_order.erase(prop_order.begin() + 1);
+      }
+    }
+    switch (cparams_.speed_tier) {
+      case SpeedTier::kHare:
+        cparams_.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 4);
+        cparams_.options.max_property_values = 24;
+        break;
+      case SpeedTier::kWombat:
+        cparams_.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 5);
+        cparams_.options.max_property_values = 32;
+        break;
+      case SpeedTier::kSquirrel:
+        cparams_.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 7);
+        cparams_.options.max_property_values = 48;
+        break;
+      case SpeedTier::kKitten:
+        cparams_.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 10);
+        cparams_.options.max_property_values = 96;
+        break;
+      case SpeedTier::kTortoise:
+        cparams_.options.splitting_heuristics_properties = prop_order;
+        cparams_.options.max_property_values = 256;
+        break;
+      default:
+        cparams_.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 3);
+        cparams_.options.max_property_values = 16;
+        break;
+    }
+    if (cparams_.speed_tier > SpeedTier::kTortoise) {
+      // Gradient in previous channels.
+      for (int i = 0; i < cparams_.options.max_properties; i++) {
+        cparams_.options.splitting_heuristics_properties.push_back(
+            kNumNonrefProperties + i * 4 + 3);
+      }
+    } else {
+      // All the extra properties in Tortoise mode.
+      for (int i = 0; i < cparams_.options.max_properties * 4; i++) {
+        cparams_.options.splitting_heuristics_properties.push_back(
+            kNumNonrefProperties + i);
+      }
+    }
+  }
+
+  if (cparams_.options.predictor == static_cast<Predictor>(-1)) {
+    // no explicit predictor(s) given, set a good default
+    if ((cparams_.speed_tier <= SpeedTier::kTortoise ||
+         cparams_.modular_mode == false) &&
+        cparams_.IsLossless() && cparams_.responsive == false) {
+      // TODO(veluca): allow all predictors that don't break residual
+      // multipliers in lossy mode.
+      cparams_.options.predictor = Predictor::Variable;
+    } else if (cparams_.responsive || cparams_.lossy_palette) {
+      // zero predictor for Squeeze residues and lossy palette
+      cparams_.options.predictor = Predictor::Zero;
+    } else if (!cparams_.IsLossless()) {
+      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
+      cparams_.options.predictor = Predictor::Gradient;
+    } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
+      // try median and weighted predictor for anything else
+      cparams_.options.predictor = Predictor::Best;
+    } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
+      // just weighted predictor in falcon mode
+      cparams_.options.predictor = Predictor::Weighted;
+    } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
+      // just gradient predictor in thunder mode
+      cparams_.options.predictor = Predictor::Gradient;
+    }
+  } else {
+    delta_pred_ = cparams_.options.predictor;
+    if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
+  }
+  if (!cparams_.ModularPartIsLossless()) {
+    if (cparams_.options.predictor == Predictor::Weighted ||
+        cparams_.options.predictor == Predictor::Variable ||
+        cparams_.options.predictor == Predictor::Best)
+      cparams_.options.predictor = Predictor::Zero;
+  }
+  tree_splits_.push_back(0);
+  if (cparams_.modular_mode == false) {
+    cparams_.options.fast_decode_multiplier = 1.0f;
+    tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
+    tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
+    tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
+    tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_));
+    tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
+    ac_metadata_size.resize(frame_dim_.num_dc_groups);
+    extra_dc_precision.resize(frame_dim_.num_dc_groups);
+  }
+  tree_splits_.push_back(num_streams);
+  cparams_.options.max_chan_size = frame_dim_.group_dim;
+  cparams_.options.group_dim = frame_dim_.group_dim;
+
+  // TODO(veluca): figure out how to use different predictor sets per channel.
+  stream_options_.resize(num_streams, cparams_.options);
+}
+
+bool do_transform(Image& image, const Transform& tr,
+                  const weighted::Header& wp_header,
+                  jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
+  Transform t = tr;
+  bool did_it = true;
+  if (force_jxlart) {
+    if (!t.MetaApply(image)) return false;
+  } else {
+    did_it = TransformForward(t, image, wp_header, pool);
+  }
+  if (did_it) image.transform.push_back(t);
+  return did_it;
+}
+
+Status ModularFrameEncoder::ComputeEncodingData(
+    const FrameHeader& frame_header, const ImageMetadata& metadata,
+    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
+    PassesEncoderState* JXL_RESTRICT enc_state, const JxlCmsInterface& cms,
+    ThreadPool* pool, AuxOut* aux_out, bool do_color) {
+  JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
+              frame_header.DebugString().c_str());
+
+  if (do_color && frame_header.loop_filter.gab) {
+    float w = 0.9908511000000001f;
+    float weights[3] = {w, w, w};
+    GaborishInverse(color, weights, pool);
+  }
+
+  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
+      cparams_.speed_tier < SpeedTier::kCheetah &&
+      cparams_.decoding_speed_tier < 2) {
+    FindBestPatchDictionary(*color, enc_state, cms, nullptr, aux_out,
+                            cparams_.color_transform == ColorTransform::kXYB);
+    PatchDictionaryEncoder::SubtractFrom(
+        enc_state->shared.image_features.patches, color);
+  }
+
+  // Convert ImageBundle to modular Image object
+  const size_t xsize = frame_dim_.xsize;
+  const size_t ysize = frame_dim_.ysize;
+
+  int nb_chans = 3;
+  if (metadata.color_encoding.IsGray() &&
+      cparams_.color_transform == ColorTransform::kNone) {
+    nb_chans = 1;
+  }
+  if (!do_color) nb_chans = 0;
+
+  nb_chans += extra_channels.size();
+
+  bool fp = metadata.bit_depth.floating_point_sample &&
+            cparams_.color_transform != ColorTransform::kXYB;
+
+  // bits_per_sample is just metadata for XYB images.
+  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+      cparams_.color_transform != ColorTransform::kXYB) {
+    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+      return JXL_FAILURE("uint32_t not supported in enc_modular");
+    } else if (metadata.bit_depth.bits_per_sample > 32) {
+      return JXL_FAILURE("bits_per_sample > 32 not supported");
+    }
+  }
+
+  // in the non-float case, there is an implicit 0 sign bit
+  int max_bitdepth =
+      do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
+  Image& gi = stream_images_[0];
+  gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans);
+  int c = 0;
+  if (cparams_.color_transform == ColorTransform::kXYB &&
+      cparams_.modular_mode == true) {
+    float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f};
+    if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
+      // quantize XYB here and then treat it as a lossless image
+      enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
+      enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
+      enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
+      cparams_.butteraugli_distance = 0;
+    }
+    if (cparams_.manual_xyb_factors.size() == 3) {
+      DequantMatricesSetCustomDC(&enc_state->shared.matrices,
+                                 cparams_.manual_xyb_factors.data());
+      // TODO(jon): update max_bitdepth in this case
+    } else {
+      DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors);
+      max_bitdepth = 12;
+    }
+  }
+  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
+  if (do_color) {
+    for (; c < 3; c++) {
+      if (metadata.color_encoding.IsGray() &&
+          cparams_.color_transform == ColorTransform::kNone &&
+          c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
+        continue;
+      int c_out = c;
+      // XYB is encoded as YX(B-Y)
+      if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
+        c_out = 1 - c_out;
+      double factor = maxval;
+      if (cparams_.color_transform == ColorTransform::kXYB)
+        factor = enc_state->shared.matrices.InvDCQuant(c);
+      if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
+        JXL_ASSERT(!fp);
+        for (size_t y = 0; y < ysize; ++y) {
+          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[x] = row_in[x] * factor + 0.5f;
+            row_out[x] -= row_Y[x];
+            // zero the lsb of B
+            row_out[x] = row_out[x] / 2 * 2;
+          }
+        }
+      } else {
+        int bits = metadata.bit_depth.bits_per_sample;
+        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
+        gi.channel[c_out].hshift =
+            enc_state->shared.frame_header.chroma_subsampling.HShift(c);
+        gi.channel[c_out].vshift =
+            enc_state->shared.frame_header.chroma_subsampling.VShift(c);
+        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
+        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
+        gi.channel[c_out].shrink(xsize_shifted, ysize_shifted);
+        std::atomic<bool> has_error{false};
+        JXL_RETURN_IF_ERROR(RunOnPool(
+            pool, 0, ysize_shifted, ThreadPool::NoInit,
+            [&](const int task, const int thread) {
+              const size_t y = task;
+              const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+              pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+              if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits,
+                                fp, factor)) {
+                has_error = true;
+              };
+            },
+            "float2int"));
+        if (has_error) {
+          return JXL_FAILURE("Error in float to integer conversion");
+        }
+      }
+    }
+    if (metadata.color_encoding.IsGray() &&
+        cparams_.color_transform == ColorTransform::kNone)
+      c = 1;
+  }
+
+  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
+    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
+    size_t ecups = frame_header.extra_channel_upsampling[ec];
+    gi.channel[c].shrink(DivCeil(frame_dim_.xsize_upsampled, ecups),
+                         DivCeil(frame_dim_.ysize_upsampled, ecups));
+    gi.channel[c].hshift = gi.channel[c].vshift =
+        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+
+    int bits = eci.bit_depth.bits_per_sample;
+    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+    bool fp = eci.bit_depth.floating_point_sample;
+    double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
+    if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1);
+    std::atomic<bool> has_error{false};
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit,
+        [&](const int task, const int thread) {
+          const size_t y = task;
+          const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y);
+          pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
+          if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits,
+                            exp_bits, fp, factor)) {
+            has_error = true;
+          };
+        },
+        "float2int"));
+    if (has_error) return JXL_FAILURE("Error in float to integer conversion");
+  }
+  JXL_ASSERT(c == nb_chans);
+
+  int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
+  if (max_bitdepth > level_max_bitdepth)
+    return JXL_FAILURE(
+        "Bitdepth too high for level %i (need %i bits, have only %i in this "
+        "level)",
+        cparams_.level, max_bitdepth, level_max_bitdepth);
+
+  // Set options and apply transformations
+  if (!cparams_.ModularPartIsLossless()) {
+    if (cparams_.palette_colors != 0) {
+      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
+    }
+    if (cparams_.color_transform == ColorTransform::kXYB) {
+      cparams_.channel_colors_pre_transform_percent = 0;
+    }
+    cparams_.channel_colors_percent = 0;
+    cparams_.palette_colors = 0;
+    cparams_.lossy_palette = false;
+  }
+
+  // Global palette
+  if (cparams_.palette_colors != 0 || cparams_.lossy_palette) {
+    // all-channel palette (e.g. RGBA)
+    if (gi.channel.size() - gi.nb_meta_channels > 1) {
+      Transform maybe_palette(TransformId::kPalette);
+      maybe_palette.begin_c = gi.nb_meta_channels;
+      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+      maybe_palette.nb_colors =
+          std::min((int)(xsize * ysize / 2), std::abs(cparams_.palette_colors));
+      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
+      maybe_palette.lossy_palette =
+          (cparams_.lossy_palette && maybe_palette.num_c == 3);
+      if (maybe_palette.lossy_palette) {
+        maybe_palette.predictor = delta_pred_;
+      }
+      // TODO(veluca): use a custom weighted header if using the weighted
+      // predictor.
+      do_transform(gi, maybe_palette, weighted::Header(), pool,
+                   cparams_.options.zero_tokens);
+    }
+    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+    // separate K)
+    if (gi.channel.size() - gi.nb_meta_channels > 3) {
+      Transform maybe_palette_3(TransformId::kPalette);
+      maybe_palette_3.begin_c = gi.nb_meta_channels;
+      maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+      maybe_palette_3.nb_colors =
+          std::min((int)(xsize * ysize / 3), std::abs(cparams_.palette_colors));
+      maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
+      maybe_palette_3.lossy_palette = cparams_.lossy_palette;
+      if (maybe_palette_3.lossy_palette) {
+        maybe_palette_3.predictor = delta_pred_;
+      }
+      do_transform(gi, maybe_palette_3, weighted::Header(), pool,
+                   cparams_.options.zero_tokens);
+    }
+  }
+
+  // Global channel palette
+  if (cparams_.channel_colors_pre_transform_percent > 0 &&
+      !cparams_.lossy_palette &&
+      (cparams_.speed_tier <= SpeedTier::kThunder ||
+       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
+    // single channel palette (like FLIF's ChannelCompact)
+    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+    int orig_bitdepth = max_bitdepth;
+    max_bitdepth = 0;
+    for (size_t i = 0; i < nb_channels; i++) {
+      int32_t min, max;
+      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+      int64_t colors = (int64_t)max - min + 1;
+      JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
+      Transform maybe_palette_1(TransformId::kPalette);
+      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+      maybe_palette_1.num_c = 1;
+      // simple heuristic: if less than X percent of the values in the range
+      // actually occur, it is probably worth it to do a compaction
+      // (but only if the channel palette is less than 6% the size of the
+      // image itself)
+      maybe_palette_1.nb_colors = std::min(
+          (int)(xsize * ysize / 16),
+          (int)(cparams_.channel_colors_pre_transform_percent / 100. * colors));
+      if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) {
+        // effective bit depth is lower, adjust quantization accordingly
+        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+        if (max < maxval) maxval = max;
+        int ch_bitdepth =
+            (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
+        if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
+      } else
+        max_bitdepth = orig_bitdepth;
+    }
+  }
+
+  // don't do an RCT if we're short on bits
+  if (cparams_.color_transform == ColorTransform::kNone && do_color &&
+      gi.channel.size() - gi.nb_meta_channels >= 3 &&
+      max_bitdepth + 1 < level_max_bitdepth) {
+    if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() ||
+                                    cparams_.speed_tier > SpeedTier::kHare)) {
+      Transform ycocg{TransformId::kRCT};
+      ycocg.rct_type = 6;
+      ycocg.begin_c = gi.nb_meta_channels;
+      do_transform(gi, ycocg, weighted::Header(), pool);
+      max_bitdepth++;
+    } else if (cparams_.colorspace > 0) {
+      Transform sg(TransformId::kRCT);
+      sg.begin_c = gi.nb_meta_channels;
+      sg.rct_type = cparams_.colorspace;
+      do_transform(gi, sg, weighted::Header(), pool);
+      max_bitdepth++;
+    }
+  }
+
+  // don't do squeeze if we don't have some spare bits
+  if (cparams_.responsive && !gi.channel.empty() &&
+      max_bitdepth + 2 < level_max_bitdepth) {
+    Transform t(TransformId::kSqueeze);
+    t.squeezes = cparams_.squeezes;
+    do_transform(gi, t, weighted::Header(), pool);
+    max_bitdepth += 2;
+  }
+
+  if (max_bitdepth + 1 > level_max_bitdepth) {
+    // force no group RCTs if we don't have a spare bit
+    cparams_.colorspace = 0;
+  }
+  JXL_ASSERT(max_bitdepth <= level_max_bitdepth);
+
+  std::vector<uint32_t> quants;
+
+  if (!cparams_.ModularPartIsLossless()) {
+    quants.resize(gi.channel.size(), 1);
+    float quantizer = 0.25f;
+    if (!cparams_.responsive) {
+      JXL_DEBUG_V(1,
+                  "Warning: lossy compression without Squeeze "
+                  "transform is just color quantization.");
+      quantizer *= 0.1f;
+    }
+    float bitdepth_correction = 1.f;
+    if (cparams_.color_transform != ColorTransform::kXYB) {
+      bitdepth_correction = maxval / 255.f;
+    }
+    std::vector<float> quantizers;
+    float dist = cparams_.butteraugli_distance;
+    for (size_t i = 0; i < 3; i++) {
+      quantizers.push_back(quantizer * dist * bitdepth_correction);
+    }
+    for (size_t i = 0; i < extra_channels.size(); i++) {
+      int ec_bitdepth =
+          metadata.extra_channel_info[i].bit_depth.bits_per_sample;
+      pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0;
+      bitdepth_correction = ec_maxval / 255.f;
+      if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i];
+      if (dist < 0) dist = cparams_.butteraugli_distance;
+      quantizers.push_back(quantizer * dist * bitdepth_correction);
+    }
+    if (cparams_.options.nb_repeats == 0) {
+      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
+    }
+    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
+      Channel& ch = gi.channel[i];
+      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
+      if (shift > 16) shift = 16;
+      if (shift > 0) shift--;
+      int q;
+      // assuming default Squeeze here
+      int component =
+          (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans);
+      // last 4 channels are final chroma residuals
+      if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) {
+        component = 1;
+      }
+      if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
+        q = quantizers[component] * squeeze_quality_factor_xyb *
+            squeeze_xyb_qtable[component][shift];
+      } else {
+        if (cparams_.colorspace != 0 && component > 0 && component < 3) {
+          q = quantizers[component] * squeeze_quality_factor *
+              squeeze_chroma_qtable[shift];
+        } else {
+          q = quantizers[component] * squeeze_quality_factor *
+              squeeze_luma_factor * squeeze_luma_qtable[shift];
+        }
+      }
+      if (q < 1) q = 1;
+      QuantizeChannel(gi.channel[i], q);
+      quants[i] = q;
+    }
+  }
+
+  // Fill other groups.
+  struct GroupParams {
+    Rect rect;
+    int minShift;
+    int maxShift;
+    ModularStreamId id;
+  };
+  std::vector<GroupParams> stream_params;
+
+  stream_options_[0] = cparams_.options;
+
+  // DC
+  for (size_t group_id = 0; group_id < frame_dim_.num_dc_groups; group_id++) {
+    const size_t gx = group_id % frame_dim_.xsize_dc_groups;
+    const size_t gy = group_id / frame_dim_.xsize_dc_groups;
+    const Rect rect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
+                    frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
+    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
+    // maxShift==1000 is infinity
+    stream_params.push_back(
+        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)});
+  }
+  // AC global -> nothing.
+  // AC
+  for (size_t group_id = 0; group_id < frame_dim_.num_groups; group_id++) {
+    const size_t gx = group_id % frame_dim_.xsize_groups;
+    const size_t gy = group_id / frame_dim_.xsize_groups;
+    const Rect mrect(gx * frame_dim_.group_dim, gy * frame_dim_.group_dim,
+                     frame_dim_.group_dim, frame_dim_.group_dim);
+    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
+         i++) {
+      int maxShift, minShift;
+      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
+      stream_params.push_back(GroupParams{
+          mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)});
+    }
+  }
+  // if there's only one group, everything ends up in GlobalModular
+  // in that case, also try RCTs/WP params for the one group
+  if (stream_params.size() == 2) {
+    stream_params.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
+                                        ModularStreamId::Global()});
+  }
+  gi_channel_.resize(stream_images_.size());
+
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, stream_params.size(), ThreadPool::NoInit,
+      [&](const uint32_t i, size_t /* thread */) {
+        stream_options_[stream_params[i].id.ID(frame_dim_)] = cparams_.options;
+        JXL_CHECK(PrepareStreamParams(
+            stream_params[i].rect, cparams_, stream_params[i].minShift,
+            stream_params[i].maxShift, stream_params[i].id, do_color));
+      },
+      "ChooseParams"));
+  {
+    // Clear out channels that have been copied to groups.
+    Image& full_image = stream_images_[0];
+    size_t c = full_image.nb_meta_channels;
+    for (; c < full_image.channel.size(); c++) {
+      Channel& fc = full_image.channel[c];
+      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
+    }
+    for (; c < full_image.channel.size(); c++) {
+      full_image.channel[c].plane = ImageI();
+    }
+  }
+
+  if (!quants.empty()) {
+    for (uint32_t stream_id = 0; stream_id < stream_images_.size();
+         stream_id++) {
+      // skip non-modular stream_ids
+      if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
+      const Image& image = stream_images_[stream_id];
+      const ModularOptions& options = stream_options_[stream_id];
+      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
+        if (i >= image.nb_meta_channels &&
+            (image.channel[i].w > options.max_chan_size ||
+             image.channel[i].h > options.max_chan_size)) {
+          continue;
+        }
+        if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
+        size_t ch_id = stream_id == 0
+                           ? i
+                           : gi_channel_[stream_id][i - image.nb_meta_channels];
+        uint32_t q = quants[ch_id];
+        // Inform the tree splitting heuristics that each channel in each group
+        // used this quantization factor. This will produce a tree with the
+        // given multipliers.
+        if (multiplier_info_.empty() ||
+            multiplier_info_.back().range[1][0] != stream_id ||
+            multiplier_info_.back().multiplier != q) {
+          StaticPropRange range;
+          range[0] = {{i, i + 1}};
+          range[1] = {{stream_id, stream_id + 1}};
+          multiplier_info_.push_back({range, (uint32_t)q});
+        } else {
+          // Previous channel in the same group had the same quantization
+          // factor. Don't provide two different ranges, as that creates
+          // unnecessary nodes.
+          multiplier_info_.back().range[0][1] = i + 1;
+        }
+      }
+    }
+    // Merge group+channel settings that have the same channels and quantization
+    // factors, to avoid unnecessary nodes.
+    std::sort(multiplier_info_.begin(), multiplier_info_.end(),
+              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
+                return std::make_tuple(a.range, a.multiplier) <
+                       std::make_tuple(b.range, b.multiplier);
+              });
+    size_t new_num = 1;
+    for (size_t i = 1; i < multiplier_info_.size(); i++) {
+      ModularMultiplierInfo& prev = multiplier_info_[new_num - 1];
+      ModularMultiplierInfo& cur = multiplier_info_[i];
+      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
+          prev.range[1][1] == cur.range[1][0]) {
+        prev.range[1][1] = cur.range[1][1];
+      } else {
+        multiplier_info_[new_num++] = multiplier_info_[i];
+      }
+    }
+    multiplier_info_.resize(new_num);
+  }
+
+  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
+
+  return PrepareEncoding(frame_header, pool, enc_state->heuristics.get(),
+                         aux_out);
+}
+
+Status ModularFrameEncoder::PrepareEncoding(const FrameHeader& frame_header,
+                                            ThreadPool* pool,
+                                            EncoderHeuristics* heuristics,
+                                            AuxOut* aux_out) {
+  if (!tree_.empty()) return true;
+
+  // Compute tree.
+  size_t num_streams = stream_images_.size();
+  stream_headers_.resize(num_streams);
+  tokens_.resize(num_streams);
+
+  if (heuristics->CustomFixedTreeLossless(frame_dim_, &tree_)) {
+    // Using a fixed tree.
+  } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
+             !cparams_.modular_mode) {
+    // Avoid creating a tree with leaves that don't correspond to any pixels.
+    std::vector<size_t> useful_splits;
+    useful_splits.reserve(tree_splits_.size());
+    for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
+      bool has_pixels = false;
+      size_t start = tree_splits_[chunk];
+      size_t stop = tree_splits_[chunk + 1];
+      for (size_t i = start; i < stop; i++) {
+        if (!stream_images_[i].empty()) has_pixels = true;
+      }
+      if (has_pixels) {
+        useful_splits.push_back(tree_splits_[chunk]);
+      }
+    }
+    // Don't do anything if modular mode does not have any pixels in this image
+    if (useful_splits.empty()) return true;
+    useful_splits.push_back(tree_splits_.back());
+
+    std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT;
+
+    std::vector<Tree> trees(useful_splits.size() - 1);
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, useful_splits.size() - 1, ThreadPool::NoInit,
+        [&](const uint32_t chunk, size_t /* thread */) {
+          // TODO(veluca): parallelize more.
+          size_t total_pixels = 0;
+          uint32_t start = useful_splits[chunk];
+          uint32_t stop = useful_splits[chunk + 1];
+          while (start < stop && stream_images_[start].empty()) ++start;
+          while (start < stop && stream_images_[stop - 1].empty()) --stop;
+          uint32_t max_c = 0;
+          if (stream_options_[start].tree_kind !=
+              ModularOptions::TreeKind::kLearn) {
+            for (size_t i = start; i < stop; i++) {
+              for (const Channel& ch : stream_images_[i].channel) {
+                total_pixels += ch.w * ch.h;
+              }
+            }
+            trees[chunk] =
+                PredefinedTree(stream_options_[start].tree_kind, total_pixels);
+            return;
+          }
+          TreeSamples tree_samples;
+          if (!tree_samples.SetPredictor(stream_options_[start].predictor,
+                                         stream_options_[start].wp_tree_mode)) {
+            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+            return;
+          }
+          if (!tree_samples.SetProperties(
+                  stream_options_[start].splitting_heuristics_properties,
+                  stream_options_[start].wp_tree_mode)) {
+            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+            return;
+          }
+          std::vector<pixel_type> pixel_samples;
+          std::vector<pixel_type> diff_samples;
+          std::vector<uint32_t> group_pixel_count;
+          std::vector<uint32_t> channel_pixel_count;
+          for (size_t i = start; i < stop; i++) {
+            max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c);
+            CollectPixelSamples(stream_images_[i], stream_options_[i], i,
+                                group_pixel_count, channel_pixel_count,
+                                pixel_samples, diff_samples);
+          }
+          StaticPropRange range;
+          range[0] = {{0, max_c}};
+          range[1] = {{start, stop}};
+          auto local_multiplier_info = multiplier_info_;
+
+          tree_samples.PreQuantizeProperties(
+              range, local_multiplier_info, group_pixel_count,
+              channel_pixel_count, pixel_samples, diff_samples,
+              stream_options_[start].max_property_values);
+          for (size_t i = start; i < stop; i++) {
+            JXL_CHECK(ModularGenericCompress(
+                stream_images_[i], stream_options_[i], /*writer=*/nullptr,
+                /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels));
+          }
+
+          // TODO(veluca): parallelize more.
+          trees[chunk] =
+              LearnTree(std::move(tree_samples), total_pixels,
+                        stream_options_[start], local_multiplier_info, range);
+        },
+        "LearnTrees"));
+    if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) {
+      return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}");
+    }
+    tree_.clear();
+    MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_);
+  } else {
+    // Fixed tree.
+    size_t total_pixels = 0;
+    for (const Image& img : stream_images_) {
+      for (const Channel& ch : img.channel) {
+        total_pixels += ch.w * ch.h;
+      }
+    }
+    if (cparams_.speed_tier <= SpeedTier::kFalcon) {
+      tree_ =
+          PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels);
+    } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
+      tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
+                             total_pixels);
+    } else {
+      tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
+    }
+  }
+  tree_tokens_.resize(1);
+  tree_tokens_[0].clear();
+  Tree decoded_tree;
+  TokenizeTree(tree_, &tree_tokens_[0], &decoded_tree);
+  JXL_ASSERT(tree_.size() == decoded_tree.size());
+  tree_ = std::move(decoded_tree);
+
+  /* TODO(szabadka) Add text output callback to cparams
+  if (kPrintTree && WantDebugOutput(aux_out)) {
+    if (frame_header.dc_level > 0) {
+      PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
+                           std::to_string(frame_header.dc_level) + "_tree");
+    } else {
+      PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
+    }
+  } */
+
+  image_widths_.resize(num_streams);
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, num_streams, ThreadPool::NoInit,
+      [&](const uint32_t stream_id, size_t /* thread */) {
+        AuxOut my_aux_out;
+        tokens_[stream_id].clear();
+        JXL_CHECK(ModularGenericCompress(
+            stream_images_[stream_id], stream_options_[stream_id],
+            /*writer=*/nullptr, &my_aux_out, 0, stream_id,
+            /*tree_samples=*/nullptr,
+            /*total_pixels=*/nullptr,
+            /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id],
+            /*tokens=*/&tokens_[stream_id],
+            /*widths=*/&image_widths_[stream_id]));
+      },
+      "ComputeTokens"));
+  return true;
+}
+
+Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer,
+                                             AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, 1);
+  // If we are using brotli, or not using modular mode.
+  if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
+    writer->Write(1, 0);
+    allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out);
+    return true;
+  }
+  writer->Write(1, 1);
+  allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out);
+
+  // Write tree
+  HistogramParams params;
+  if (cparams_.speed_tier > SpeedTier::kKitten) {
+    params.clustering = HistogramParams::ClusteringType::kFast;
+    params.ans_histogram_strategy =
+        cparams_.speed_tier > SpeedTier::kThunder
+            ? HistogramParams::ANSHistogramStrategy::kFast
+            : HistogramParams::ANSHistogramStrategy::kApproximate;
+    params.lz77_method =
+        cparams_.decoding_speed_tier >= 3 && cparams_.modular_mode
+            ? (cparams_.speed_tier >= SpeedTier::kFalcon
+                   ? HistogramParams::LZ77Method::kRLE
+                   : HistogramParams::LZ77Method::kLZ77)
+            : HistogramParams::LZ77Method::kNone;
+    // Near-lossless DC, as well as modular mode, require choosing hybrid uint
+    // more carefully.
+    if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) ||
+        (cparams_.modular_mode && cparams_.speed_tier < SpeedTier::kCheetah)) {
+      params.uint_method = HistogramParams::HybridUintMethod::kFast;
+    } else {
+      params.uint_method = HistogramParams::HybridUintMethod::kNone;
+    }
+  } else if (cparams_.speed_tier <= SpeedTier::kTortoise) {
+    params.lz77_method = HistogramParams::LZ77Method::kOptimal;
+  } else {
+    params.lz77_method = HistogramParams::LZ77Method::kLZ77;
+  }
+  if (cparams_.decoding_speed_tier >= 1) {
+    params.max_histograms = 12;
+  }
+  if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive) {
+    params.lz77_method = cparams_.speed_tier >= SpeedTier::kCheetah
+                             ? HistogramParams::LZ77Method::kRLE
+                         : cparams_.speed_tier >= SpeedTier::kKitten
+                             ? HistogramParams::LZ77Method::kLZ77
+                             : HistogramParams::LZ77Method::kOptimal;
+  }
+  if (cparams_.decoding_speed_tier >= 2 && cparams_.responsive) {
+    params.uint_method = HistogramParams::HybridUintMethod::k000;
+    params.force_huffman = true;
+  }
+  BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &code_,
+                           &context_map_, writer, kLayerModularTree, aux_out);
+  WriteTokens(tree_tokens_[0], code_, context_map_, writer, kLayerModularTree,
+              aux_out);
+  params.image_widths = image_widths_;
+  // Write histograms.
+  BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_,
+                           &context_map_, writer, kLayerModularGlobal, aux_out);
+  return true;
+}
+
+Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
+                                         size_t layer,
+                                         const ModularStreamId& stream) {
+  size_t stream_id = stream.ID(frame_dim_);
+  if (stream_images_[stream_id].channel.empty()) {
+    return true;  // Image with no channels, header never gets decoded.
+  }
+  JXL_RETURN_IF_ERROR(
+      Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
+  WriteTokens(tokens_[stream_id], code_, context_map_, writer, layer, aux_out);
+  return true;
+}
+
+namespace {
+float EstimateWPCost(const Image& img, size_t i) {
+  size_t extra_bits = 0;
+  float histo_cost = 0;
+  HybridUintConfig config;
+  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
+                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
+                       3,    5,    7,    11,   15,   23,  31,  47,  63,
+                       95,   127,  191,  255,  392,  500};
+  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+  Histogram histo[nc] = {};
+  weighted::Header wp_header;
+  PredictorMode(i, &wp_header);
+  for (const Channel& ch : img.channel) {
+    const intptr_t onerow = ch.plane.PixelsPerRow();
+    weighted::State wp_state(wp_header, ch.w, ch.h);
+    Properties properties(1);
+    for (size_t y = 0; y < ch.h; y++) {
+      const pixel_type* JXL_RESTRICT r = ch.Row(y);
+      for (size_t x = 0; x < ch.w; x++) {
+        size_t offset = 0;
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        pixel_type_w topright =
+            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
+        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
+            offset);
+        size_t ctx = 0;
+        for (int c : cutoffs) {
+          ctx += c >= properties[0];
+        }
+        pixel_type res = r[x] - guess;
+        uint32_t token, nbits, bits;
+        config.Encode(PackSigned(res), &token, &nbits, &bits);
+        histo[ctx].Add(token);
+        extra_bits += nbits;
+        wp_state.UpdateErrors(r[x], x, y, ch.w);
+      }
+    }
+    for (size_t h = 0; h < nc; h++) {
+      histo_cost += histo[h].ShannonEntropy();
+      histo[h].Clear();
+    }
+  }
+  return histo_cost + extra_bits;
+}
+
+float EstimateCost(const Image& img) {
+  // TODO(veluca): consider SIMDfication of this code.
+  size_t extra_bits = 0;
+  float histo_cost = 0;
+  HybridUintConfig config;
+  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
+                        47, 63, 95, 127, 191, 255, 392, 500};
+  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+  Histogram histo[nc] = {};
+  for (const Channel& ch : img.channel) {
+    const intptr_t onerow = ch.plane.PixelsPerRow();
+    for (size_t y = 0; y < ch.h; y++) {
+      const pixel_type* JXL_RESTRICT r = ch.Row(y);
+      for (size_t x = 0; x < ch.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        size_t maxdiff = std::max(std::max(left, top), topleft) -
+                         std::min(std::min(left, top), topleft);
+        size_t ctx = 0;
+        for (uint32_t c : cutoffs) {
+          ctx += c > maxdiff;
+        }
+        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
+        uint32_t token, nbits, bits;
+        config.Encode(PackSigned(res), &token, &nbits, &bits);
+        histo[ctx].Add(token);
+        extra_bits += nbits;
+      }
+    }
+    for (size_t h = 0; h < nc; h++) {
+      histo_cost += histo[h].ShannonEntropy();
+      histo[h].Clear();
+    }
+  }
+  return histo_cost + extra_bits;
+}
+
+}  // namespace
+
+Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
+                                                const CompressParams& cparams_,
+                                                int minShift, int maxShift,
+                                                const ModularStreamId& stream,
+                                                bool do_color) {
+  size_t stream_id = stream.ID(frame_dim_);
+  Image& full_image = stream_images_[0];
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  Image& gi = stream_images_[stream_id];
+  if (stream_id > 0) {
+    gi = Image(xsize, ysize, full_image.bitdepth, 0);
+    // start at the first bigger-than-frame_dim.group_dim non-metachannel
+    size_t c = full_image.nb_meta_channels;
+    for (; c < full_image.channel.size(); c++) {
+      Channel& fc = full_image.channel[c];
+      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
+    }
+    for (; c < full_image.channel.size(); c++) {
+      Channel& fc = full_image.channel[c];
+      int shift = std::min(fc.hshift, fc.vshift);
+      if (shift > maxShift) continue;
+      if (shift < minShift) continue;
+      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+      if (r.xsize() == 0 || r.ysize() == 0) continue;
+      gi_channel_[stream_id].push_back(c);
+      Channel gc(r.xsize(), r.ysize());
+      gc.hshift = fc.hshift;
+      gc.vshift = fc.vshift;
+      for (size_t y = 0; y < r.ysize(); ++y) {
+        memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
+               r.xsize() * sizeof(pixel_type));
+      }
+      gi.channel.emplace_back(std::move(gc));
+    }
+
+    if (gi.channel.empty()) return true;
+    // Do some per-group transforms
+
+    // Local palette
+    // TODO(veluca): make this work with quantize-after-prediction in lossy
+    // mode.
+    if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 &&
+        cparams_.speed_tier < SpeedTier::kCheetah) {
+      // all-channel palette (e.g. RGBA)
+      if (gi.channel.size() - gi.nb_meta_channels > 1) {
+        Transform maybe_palette(TransformId::kPalette);
+        maybe_palette.begin_c = gi.nb_meta_channels;
+        maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+        maybe_palette.nb_colors = std::abs(cparams_.palette_colors);
+        maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
+        do_transform(gi, maybe_palette, weighted::Header());
+      }
+      // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+      // separate K)
+      if (gi.channel.size() - gi.nb_meta_channels > 3) {
+        Transform maybe_palette_3(TransformId::kPalette);
+        maybe_palette_3.begin_c = gi.nb_meta_channels;
+        maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+        maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors);
+        maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
+        maybe_palette_3.lossy_palette = cparams_.lossy_palette;
+        if (maybe_palette_3.lossy_palette) {
+          maybe_palette_3.predictor = Predictor::Weighted;
+        }
+        do_transform(gi, maybe_palette_3, weighted::Header());
+      }
+    }
+
+    // Local channel palette
+    if (cparams_.channel_colors_percent > 0 &&
+        cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette &&
+        cparams_.speed_tier < SpeedTier::kCheetah &&
+        !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) {
+      // single channel palette (like FLIF's ChannelCompact)
+      size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+      for (size_t i = 0; i < nb_channels; i++) {
+        int32_t min, max;
+        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+        int64_t colors = (int64_t)max - min + 1;
+        JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
+        Transform maybe_palette_1(TransformId::kPalette);
+        maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+        maybe_palette_1.num_c = 1;
+        // simple heuristic: if less than X percent of the values in the range
+        // actually occur, it is probably worth it to do a compaction
+        // (but only if the channel palette is less than 80% the size of the
+        // image itself)
+        maybe_palette_1.nb_colors =
+            std::min((int)(xsize * ysize * 0.8),
+                     (int)(cparams_.channel_colors_percent / 100. * colors));
+        do_transform(gi, maybe_palette_1, weighted::Header());
+      }
+    }
+  }
+
+  // lossless and no specific color transform specified: try Nothing, YCoCg,
+  // and 17 RCTs
+  if (cparams_.color_transform == ColorTransform::kNone &&
+      cparams_.IsLossless() && cparams_.colorspace < 0 &&
+      gi.channel.size() - gi.nb_meta_channels >= 3 &&
+      cparams_.responsive == false && do_color &&
+      cparams_.speed_tier <= SpeedTier::kHare) {
+    Transform sg(TransformId::kRCT);
+    sg.begin_c = gi.nb_meta_channels;
+    size_t nb_rcts_to_try = 0;
+    switch (cparams_.speed_tier) {
+      case SpeedTier::kLightning:
+      case SpeedTier::kThunder:
+      case SpeedTier::kFalcon:
+      case SpeedTier::kCheetah:
+        nb_rcts_to_try = 0;  // Just do global YCoCg
+        break;
+      case SpeedTier::kHare:
+        nb_rcts_to_try = 4;
+        break;
+      case SpeedTier::kWombat:
+        nb_rcts_to_try = 5;
+        break;
+      case SpeedTier::kSquirrel:
+        nb_rcts_to_try = 7;
+        break;
+      case SpeedTier::kKitten:
+        nb_rcts_to_try = 9;
+        break;
+      case SpeedTier::kGlacier:
+      case SpeedTier::kTortoise:
+        nb_rcts_to_try = 19;
+        break;
+    }
+    float best_cost = std::numeric_limits<float>::max();
+    size_t best_rct = 0;
+    // These should be 19 actually different transforms; the remaining ones
+    // are equivalent to one of these (note that the first two are do-nothing
+    // and YCoCg) modulo channel reordering (which only matters in the case of
+    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
+    for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
+                  5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
+                  1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
+                  4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
+      if (nb_rcts_to_try == 0) break;
+      sg.rct_type = i;
+      nb_rcts_to_try--;
+      if (do_transform(gi, sg, weighted::Header())) {
+        float cost = EstimateCost(gi);
+        if (cost < best_cost) {
+          best_rct = i;
+          best_cost = cost;
+        }
+        Transform t = gi.transform.back();
+        JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr));
+        gi.transform.pop_back();
+      }
+    }
+    // Apply the best RCT to the image for future encoding.
+    sg.rct_type = best_rct;
+    do_transform(gi, sg, weighted::Header());
+  } else {
+    // No need to try anything, just use the default options.
+  }
+  size_t nb_wp_modes = 1;
+  if (cparams_.speed_tier <= SpeedTier::kTortoise) {
+    nb_wp_modes = 5;
+  } else if (cparams_.speed_tier <= SpeedTier::kKitten) {
+    nb_wp_modes = 2;
+  }
+  if (nb_wp_modes > 1 &&
+      (stream_options_[stream_id].predictor == Predictor::Weighted ||
+       stream_options_[stream_id].predictor == Predictor::Best ||
+       stream_options_[stream_id].predictor == Predictor::Variable)) {
+    float best_cost = std::numeric_limits<float>::max();
+    stream_options_[stream_id].wp_mode = 0;
+    for (size_t i = 0; i < nb_wp_modes; i++) {
+      float cost = EstimateWPCost(gi, i);
+      if (cost < best_cost) {
+        best_cost = cost;
+        stream_options_[stream_id].wp_mode = i;
+      }
+    }
+  }
+  return true;
+}
+
+constexpr float q_deadzone = 0.62f;
+int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
+               size_t w, weighted::State* wp_state, float value,
+               float inv_factor) {
+  float svalue = value * inv_factor;
+  PredictionResult pred =
+      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
+  svalue -= pred.guess;
+  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
+  int residual = roundf(svalue);
+  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+  return residual + pred.guess;
+}
+
+int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
+                     size_t y, size_t w, float value, float inv_factor) {
+  float svalue = value * inv_factor;
+  PredictionResult pred =
+      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
+  svalue -= pred.guess;
+  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
+  int residual = roundf(svalue);
+  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+  return residual + pred.guess;
+}
+
+void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index,
+                                      bool nl_dc, PassesEncoderState* enc_state,
+                                      bool jpeg_transcode) {
+  const Rect r = enc_state->shared.DCGroupRect(group_index);
+  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
+  float mul = 1 << extra_dc_precision[group_index];
+
+  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
+  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
+  stream_options_[stream_id].predictor = Predictor::Weighted;
+  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+  if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
+    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
+  }
+  if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
+    stream_options_[stream_id].predictor =
+        (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
+                                                  : Predictor::Best);
+    stream_options_[stream_id].wp_tree_mode =
+        ModularOptions::TreeMode::kDefault;
+    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
+  }
+  if (cparams_.decoding_speed_tier >= 1) {
+    stream_options_[stream_id].tree_kind =
+        ModularOptions::TreeKind::kGradientFixedDC;
+  }
+
+  stream_images_[stream_id] = Image(r.xsize(), r.ysize(), 8, 3);
+  if (nl_dc && stream_options_[stream_id].tree_kind ==
+                   ModularOptions::TreeKind::kGradientFixedDC) {
+    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        size_t stride = stream_images_[stream_id]
+                            .channel[c < 2 ? c ^ 1 : c]
+                            .plane.PixelsPerRow();
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
+                                            r.xsize(), row[x], inv_factor);
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images_[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeGradient(
+                quant_row, stride, c, x, y, r.xsize(),
+                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+          }
+        }
+      }
+    }
+  } else if (nl_dc) {
+    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      weighted::Header header;
+      weighted::State wp_state(header, r.xsize(), r.ysize());
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        size_t stride = stream_images_[stream_id]
+                            .channel[c < 2 ? c ^ 1 : c]
+                            .plane.PixelsPerRow();
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
+                                      &wp_state, row[x], inv_factor);
+            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images_[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeWP(
+                quant_row, stride, c, x, y, r.xsize(), &wp_state,
+                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+          }
+        }
+      }
+    }
+  } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) {
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = roundf(row[x] * inv_factor);
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images_[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] =
+                roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
+                       inv_factor);
+          }
+        }
+      }
+    }
+  } else {
+    for (size_t c : {1, 0, 2}) {
+      Rect rect(
+          r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+          r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c),
+          r.xsize() >>
+              enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+          r.ysize() >>
+              enc_state->shared.frame_header.chroma_subsampling.VShift(c));
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      size_t ys = rect.ysize();
+      size_t xs = rect.xsize();
+      Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
+      ch.w = xs;
+      ch.h = ys;
+      ch.shrink();
+      for (size_t y = 0; y < ys; y++) {
+        int32_t* quant_row = ch.plane.Row(y);
+        const float* row = rect.ConstPlaneRow(dc, c, y);
+        for (size_t x = 0; x < xs; x++) {
+          quant_row[x] = roundf(row[x] * inv_factor);
+        }
+      }
+    }
+  }
+
+  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
+            stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
+            1.0 / mul, enc_state->shared.cmap.DCFactors(),
+            enc_state->shared.frame_header.chroma_subsampling,
+            enc_state->shared.block_ctx_map);
+}
+
+void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode,
+                                        PassesEncoderState* enc_state) {
+  const Rect r = enc_state->shared.DCGroupRect(group_index);
+  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
+  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
+  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
+  if (jpeg_transcode) {
+    stream_options_[stream_id].tree_kind =
+        ModularOptions::TreeKind::kJpegTranscodeACMeta;
+  } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
+    stream_options_[stream_id].tree_kind =
+        ModularOptions::TreeKind::kFalconACMeta;
+  } else if (cparams_.speed_tier > SpeedTier::kKitten) {
+    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
+  }
+  // If we are using a non-constant CfL field, and are in a slow enough mode,
+  // re-enable tree computation for it.
+  if (cparams_.speed_tier < SpeedTier::kSquirrel &&
+      cparams_.force_cfl_jpeg_recompression) {
+    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
+  }
+  // YToX, YToB, ACS + QF, EPF
+  Image& image = stream_images_[stream_id];
+  image = Image(r.xsize(), r.ysize(), 8, 4);
+  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+  image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0);
+  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
+                       Rect(image.channel[0].plane), &image.channel[0].plane);
+  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
+                       Rect(image.channel[1].plane), &image.channel[1].plane);
+  size_t num = 0;
+  for (size_t y = 0; y < r.ysize(); y++) {
+    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
+    const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
+    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
+    int32_t* out_acs = image.channel[2].plane.Row(0);
+    int32_t* out_qf = image.channel[2].plane.Row(1);
+    int32_t* row_out_epf = image.channel[3].plane.Row(y);
+    for (size_t x = 0; x < r.xsize(); x++) {
+      row_out_epf[x] = row_epf[x];
+      if (!row_acs[x].IsFirstBlock()) continue;
+      out_acs[num] = row_acs[x].RawStrategy();
+      out_qf[num] = row_qf[x] - 1;
+      num++;
+    }
+  }
+  image.channel[2].w = num;
+  ac_metadata_size[group_index] = num;
+}
+
+void ModularFrameEncoder::EncodeQuantTable(
+    size_t size_x, size_t size_y, BitWriter* writer,
+    const QuantEncoding& encoding, size_t idx,
+    ModularFrameEncoder* modular_frame_encoder) {
+  JXL_ASSERT(encoding.qraw.qtable != nullptr);
+  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+  JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer));
+  if (modular_frame_encoder) {
+    JXL_CHECK(modular_frame_encoder->EncodeStream(
+        writer, nullptr, 0, ModularStreamId::QuantTable(idx)));
+    return;
+  }
+  Image image(size_x, size_y, 8, 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < size_y; y++) {
+      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < size_x; x++) {
+        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+      }
+    }
+  }
+  ModularOptions cfopts;
+  JXL_CHECK(ModularGenericCompress(image, cfopts, writer));
+}
+
+void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
+                                        const QuantEncoding& encoding,
+                                        size_t idx) {
+  size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_);
+  JXL_ASSERT(encoding.qraw.qtable != nullptr);
+  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+  Image& image = stream_images_[stream_id];
+  image = Image(size_x, size_y, 8, 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < size_y; y++) {
+      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < size_x; x++) {
+        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+      }
+    }
+  }
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_modular.h b/third-party/libjxl/libjxl/lib/jxl/enc_modular.h
new file mode 100644
index 0000000000..2af66e951f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_modular.h
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_MODULAR_H_
+#define LIB_JXL_ENC_MODULAR_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+class ModularFrameEncoder {
+ public:
+  ModularFrameEncoder(const FrameHeader& frame_header,
+                      const CompressParams& cparams_orig);
+  Status ComputeEncodingData(const FrameHeader& frame_header,
+                             const ImageMetadata& metadata,
+                             Image3F* JXL_RESTRICT color,
+                             const std::vector<ImageF>& extra_channels,
+                             PassesEncoderState* JXL_RESTRICT enc_state,
+                             const JxlCmsInterface& cms, ThreadPool* pool,
+                             AuxOut* aux_out, bool do_color);
+  // Encodes global info (tree + histograms) in the `writer`.
+  Status EncodeGlobalInfo(BitWriter* writer, AuxOut* aux_out);
+  // Encodes a specific modular image (identified by `stream`) in the `writer`,
+  // assigning bits to the provided `layer`.
+  Status EncodeStream(BitWriter* writer, AuxOut* aux_out, size_t layer,
+                      const ModularStreamId& stream);
+  // Creates a modular image for a given DC group of VarDCT mode. `dc` is the
+  // input DC image, not quantized; the group is specified by `group_index`, and
+  // `nl_dc` decides whether to apply a near-lossless processing to the DC or
+  // not.
+  void AddVarDCTDC(const Image3F& dc, size_t group_index, bool nl_dc,
+                   PassesEncoderState* enc_state, bool jpeg_transcode);
+  // Creates a modular image for the AC metadata of the given group
+  // (`group_index`).
+  void AddACMetadata(size_t group_index, bool jpeg_transcode,
+                     PassesEncoderState* enc_state);
+  // Encodes a RAW quantization table in `writer`. If `modular_frame_encoder` is
+  // null, the quantization table in `encoding` is used, with dimensions `size_x
+  // x size_y`. Otherwise, the table with ID `idx` is encoded from the given
+  // `modular_frame_encoder`.
+  static void EncodeQuantTable(size_t size_x, size_t size_y, BitWriter* writer,
+                               const QuantEncoding& encoding, size_t idx,
+                               ModularFrameEncoder* modular_frame_encoder);
+  // Stores a quantization table for future usage with `EncodeQuantTable`.
+  void AddQuantTable(size_t size_x, size_t size_y,
+                     const QuantEncoding& encoding, size_t idx);
+
+  std::vector<size_t> ac_metadata_size;
+  std::vector<uint8_t> extra_dc_precision;
+
+ private:
+  Status PrepareEncoding(const FrameHeader& frame_header, ThreadPool* pool,
+                         EncoderHeuristics* heuristics,
+                         AuxOut* aux_out = nullptr);
+  Status PrepareStreamParams(const Rect& rect, const CompressParams& cparams,
+                             int minShift, int maxShift,
+                             const ModularStreamId& stream, bool do_color);
+  std::vector<Image> stream_images_;
+  std::vector<ModularOptions> stream_options_;
+
+  Tree tree_;
+  std::vector<std::vector<Token>> tree_tokens_;
+  std::vector<GroupHeader> stream_headers_;
+  std::vector<std::vector<Token>> tokens_;
+  EntropyEncodingData code_;
+  std::vector<uint8_t> context_map_;
+  FrameDimensions frame_dim_;
+  CompressParams cparams_;
+  std::vector<size_t> tree_splits_;
+  std::vector<ModularMultiplierInfo> multiplier_info_;
+  std::vector<std::vector<uint32_t>> gi_channel_;
+  std::vector<size_t> image_widths_;
+  Predictor delta_pred_ = Predictor::Average4;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_MODULAR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc b/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc
new file mode 100644
index 0000000000..54bb4482e8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc
@@ -0,0 +1,374 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_optimize.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+namespace {
+
+using OptimizeArray = optimize::Array<double, NoiseParams::kNumNoisePoints>;
+
+float GetScoreSumsOfAbsoluteDifferences(const Image3F& opsin, const int x,
+                                        const int y, const int block_size) {
+  const int small_bl_size_x = 3;
+  const int small_bl_size_y = 4;
+  const int kNumSAD =
+      (block_size - small_bl_size_x) * (block_size - small_bl_size_y);
+  // block_size x block_size reference pixels
+  int counter = 0;
+  const int offset = 2;
+
+  std::vector<float> sad(kNumSAD, 0);
+  for (int y_bl = 0; y_bl + small_bl_size_y < block_size; ++y_bl) {
+    for (int x_bl = 0; x_bl + small_bl_size_x < block_size; ++x_bl) {
+      float sad_sum = 0;
+      // size of the center patch, we compare all the patches inside window with
+      // the center one
+      for (int cy = 0; cy < small_bl_size_y; ++cy) {
+        for (int cx = 0; cx < small_bl_size_x; ++cx) {
+          float wnd = 0.5f * (opsin.PlaneRow(1, y + y_bl + cy)[x + x_bl + cx] +
+                              opsin.PlaneRow(0, y + y_bl + cy)[x + x_bl + cx]);
+          float center =
+              0.5f * (opsin.PlaneRow(1, y + offset + cy)[x + offset + cx] +
+                      opsin.PlaneRow(0, y + offset + cy)[x + offset + cx]);
+          sad_sum += std::abs(center - wnd);
+        }
+      }
+      sad[counter++] = sad_sum;
+    }
+  }
+  const int kSamples = (kNumSAD) / 2;
+  // As with ROAD (rank order absolute distance), we keep the smallest half of
+  // the values in SAD (we use here the more robust patch SAD instead of
+  // absolute single-pixel differences).
+  std::sort(sad.begin(), sad.end());
+  const float total_sad_sum =
+      std::accumulate(sad.begin(), sad.begin() + kSamples, 0.0f);
+  return total_sad_sum / kSamples;
+}
+
+class NoiseHistogram {
+ public:
+  static constexpr int kBins = 256;
+
+  NoiseHistogram() { std::fill(bins, bins + kBins, 0); }
+
+  void Increment(const float x) { bins[Index(x)] += 1; }
+  int Get(const float x) const { return bins[Index(x)]; }
+  int Bin(const size_t bin) const { return bins[bin]; }
+
+  int Mode() const {
+    size_t max_idx = 0;
+    for (size_t i = 0; i < kBins; i++) {
+      if (bins[i] > bins[max_idx]) max_idx = i;
+    }
+    return max_idx;
+  }
+
+  double Quantile(double q01) const {
+    const int64_t total = std::accumulate(bins, bins + kBins, int64_t{1});
+    const int64_t target = static_cast<int64_t>(q01 * total);
+    // Until sum >= target:
+    int64_t sum = 0;
+    size_t i = 0;
+    for (; i < kBins; ++i) {
+      sum += bins[i];
+      // Exact match: assume middle of bin i
+      if (sum == target) {
+        return i + 0.5;
+      }
+      if (sum > target) break;
+    }
+
+    // Next non-empty bin (in case histogram is sparsely filled)
+    size_t next = i + 1;
+    while (next < kBins && bins[next] == 0) {
+      ++next;
+    }
+
+    // Linear interpolation according to how far into next we went
+    const double excess = target - sum;
+    const double weight_next = bins[Index(next)] / excess;
+    return ClampX(next * weight_next + i * (1.0 - weight_next));
+  }
+
+  // Inter-quartile range
+  double IQR() const { return Quantile(0.75) - Quantile(0.25); }
+
+ private:
+  template <typename T>
+  T ClampX(const T x) const {
+    return std::min(std::max(T(0), x), T(kBins - 1));
+  }
+  size_t Index(const float x) const { return ClampX(static_cast<int>(x)); }
+
+  uint32_t bins[kBins];
+};
+
+std::vector<float> GetSADScoresForPatches(const Image3F& opsin,
+                                          const size_t block_s,
+                                          const size_t num_bin,
+                                          NoiseHistogram* sad_histogram) {
+  std::vector<float> sad_scores(
+      (opsin.ysize() / block_s) * (opsin.xsize() / block_s), 0.0f);
+
+  int block_index = 0;
+
+  for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      float sad_sc = GetScoreSumsOfAbsoluteDifferences(opsin, x, y, block_s);
+      sad_scores[block_index++] = sad_sc;
+      sad_histogram->Increment(sad_sc * num_bin);
+    }
+  }
+  return sad_scores;
+}
+
+float GetSADThreshold(const NoiseHistogram& histogram, const int num_bin) {
+  // Here we assume that the most patches with similar SAD value is a "flat"
+  // patches. However, some images might contain regular texture part and
+  // generate second strong peak at the histogram
+  // TODO(user) handle bimodal and heavy-tailed case
+  const int mode = histogram.Mode();
+  return static_cast<float>(mode) / NoiseHistogram::kBins;
+}
+
+// loss = sum asym * (F(x) - nl)^2 + kReg * num_points * sum (w[i] - w[i+1])^2
+// where asym = 1 if F(x) < nl, kAsym if F(x) > nl.
+struct LossFunction {
+  explicit LossFunction(std::vector<NoiseLevel> nl0) : nl(std::move(nl0)) {}
+
+  double Compute(const OptimizeArray& w, OptimizeArray* df,
+                 bool skip_regularization = false) const {
+    constexpr double kReg = 0.005;
+    constexpr double kAsym = 1.1;
+    double loss_function = 0;
+    for (size_t i = 0; i < w.size(); i++) {
+      (*df)[i] = 0;
+    }
+    for (auto ind : nl) {
+      std::pair<int, float> pos = IndexAndFrac(ind.intensity);
+      JXL_DASSERT(pos.first >= 0 && static_cast<size_t>(pos.first) <
+                                        NoiseParams::kNumNoisePoints - 1);
+      double low = w[pos.first];
+      double hi = w[pos.first + 1];
+      double val = low * (1.0f - pos.second) + hi * pos.second;
+      double dist = val - ind.noise_level;
+      if (dist > 0) {
+        loss_function += kAsym * dist * dist;
+        (*df)[pos.first] -= kAsym * (1.0f - pos.second) * dist;
+        (*df)[pos.first + 1] -= kAsym * pos.second * dist;
+      } else {
+        loss_function += dist * dist;
+        (*df)[pos.first] -= (1.0f - pos.second) * dist;
+        (*df)[pos.first + 1] -= pos.second * dist;
+      }
+    }
+    if (skip_regularization) return loss_function;
+    for (size_t i = 0; i + 1 < w.size(); i++) {
+      double diff = w[i] - w[i + 1];
+      loss_function += kReg * nl.size() * diff * diff;
+      (*df)[i] -= kReg * diff * nl.size();
+      (*df)[i + 1] += kReg * diff * nl.size();
+    }
+    return loss_function;
+  }
+
+  std::vector<NoiseLevel> nl;
+};
+
+void OptimizeNoiseParameters(const std::vector<NoiseLevel>& noise_level,
+                             NoiseParams* noise_params) {
+  constexpr double kMaxError = 1e-3;
+  static const double kPrecision = 1e-8;
+  static const int kMaxIter = 40;
+
+  float avg = 0;
+  for (const NoiseLevel& nl : noise_level) {
+    avg += nl.noise_level;
+  }
+  avg /= noise_level.size();
+
+  LossFunction loss_function(noise_level);
+  OptimizeArray parameter_vector;
+  for (size_t i = 0; i < parameter_vector.size(); i++) {
+    parameter_vector[i] = avg;
+  }
+
+  parameter_vector = optimize::OptimizeWithScaledConjugateGradientMethod(
+      loss_function, parameter_vector, kPrecision, kMaxIter);
+
+  OptimizeArray df = parameter_vector;
+  float loss = loss_function.Compute(parameter_vector, &df,
+                                     /*skip_regularization=*/true) /
+               noise_level.size();
+
+  // Approximation went too badly: escape with no noise at all.
+  if (loss > kMaxError) {
+    noise_params->Clear();
+    return;
+  }
+
+  for (size_t i = 0; i < parameter_vector.size(); i++) {
+    noise_params->lut[i] = std::max(parameter_vector[i], 0.0);
+  }
+}
+
+std::vector<NoiseLevel> GetNoiseLevel(
+    const Image3F& opsin, const std::vector<float>& texture_strength,
+    const float threshold, const size_t block_s) {
+  std::vector<NoiseLevel> noise_level_per_intensity;
+
+  const int filt_size = 1;
+  static const float kLaplFilter[filt_size * 2 + 1][filt_size * 2 + 1] = {
+      {-0.25f, -1.0f, -0.25f},
+      {-1.0f, 5.0f, -1.0f},
+      {-0.25f, -1.0f, -0.25f},
+  };
+
+  // The noise model is built based on channel 0.5 * (X+Y) as we notice that it
+  // is similar to the model 0.5 * (Y-X)
+  size_t patch_index = 0;
+
+  for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      if (texture_strength[patch_index] <= threshold) {
+        // Calculate mean value
+        float mean_int = 0;
+        for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+            mean_int += 0.5f * (opsin.PlaneRow(1, y + y_bl)[x + x_bl] +
+                                opsin.PlaneRow(0, y + y_bl)[x + x_bl]);
+          }
+        }
+        mean_int /= block_s * block_s;
+
+        // Calculate Noise level
+        float noise_level = 0;
+        size_t count = 0;
+        for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+            float filtered_value = 0;
+            for (int y_f = -1 * filt_size; y_f <= filt_size; ++y_f) {
+              if ((static_cast<ssize_t>(y_bl) + y_f) >= 0 &&
+                  (y_bl + y_f) < block_s) {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+                      (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              } else {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+                      (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              }
+            }
+            noise_level += std::abs(filtered_value);
+            ++count;
+          }
+        }
+        noise_level /= count;
+        NoiseLevel nl;
+        nl.intensity = mean_int;
+        nl.noise_level = noise_level;
+        noise_level_per_intensity.push_back(nl);
+      }
+      ++patch_index;
+    }
+  }
+  return noise_level_per_intensity;
+}
+
+void EncodeFloatParam(float val, float precision, BitWriter* writer) {
+  JXL_ASSERT(val >= 0);
+  const int absval_quant = static_cast<int>(val * precision + 0.5f);
+  JXL_ASSERT(absval_quant < (1 << 10));
+  writer->Write(10, absval_quant);
+}
+
+}  // namespace
+
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                         float quality_coef) {
+  // The size of a patch in decoder might be different from encoder's patch
+  // size.
+  // For encoder: the patch size should be big enough to estimate
+  //              noise level, but, at the same time, it should be not too big
+  //              to be able to estimate intensity value of the patch
+  const size_t block_s = 8;
+  const size_t kNumBin = 256;
+  NoiseHistogram sad_histogram;
+  std::vector<float> sad_scores =
+      GetSADScoresForPatches(opsin, block_s, kNumBin, &sad_histogram);
+  float sad_threshold = GetSADThreshold(sad_histogram, kNumBin);
+  // If threshold is too large, the image has a strong pattern. This pattern
+  // fools our model and it will add too much noise. Therefore, we do not add
+  // noise for such images
+  if (sad_threshold > 0.15f || sad_threshold <= 0.0f) {
+    noise_params->Clear();
+    return false;
+  }
+  std::vector<NoiseLevel> nl =
+      GetNoiseLevel(opsin, sad_scores, sad_threshold, block_s);
+
+  OptimizeNoiseParameters(nl, noise_params);
+  for (float& i : noise_params->lut) {
+    i *= quality_coef * 1.4;
+  }
+  return noise_params->HasAny();
+}
+
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out) {
+  JXL_ASSERT(noise_params.HasAny());
+
+  BitWriter::Allotment allotment(writer, NoiseParams::kNumNoisePoints * 16);
+  for (float i : noise_params.lut) {
+    EncodeFloatParam(i, kNoisePrecision, writer);
+  }
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_noise.h b/third-party/libjxl/libjxl/lib/jxl/enc_noise.h
new file mode 100644
index 0000000000..851fdd12db
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_noise.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_NOISE_H_
+#define LIB_JXL_ENC_NOISE_H_
+
+// Noise parameter estimation.
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Get parameters of the noise for NoiseParams model
+// Returns whether a valid noise model (with HasAny()) is set.
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                         float quality_coef);
+
+// Does not write anything if `noise_params` are empty. Otherwise, caller must
+// set FrameHeader.flags.kNoise.
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_NOISE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc
new file mode 100644
index 0000000000..6865ff67df
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_optimize.h"
+
+#include <algorithm>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+namespace optimize {
+
+namespace {
+
+// simplex vector must be sorted by first element of its elements
+std::vector<double> Midpoint(const std::vector<std::vector<double>>& simplex) {
+  JXL_CHECK(!simplex.empty());
+  JXL_CHECK(simplex.size() == simplex[0].size());
+  int dim = simplex.size() - 1;
+  std::vector<double> result(dim + 1, 0);
+  for (int i = 0; i < dim; i++) {
+    for (int k = 0; k < dim; k++) {
+      result[i + 1] += simplex[k][i + 1];
+    }
+    result[i + 1] /= dim;
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Subtract(const std::vector<double>& a,
+                             const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = a[i] - b[i];
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Add(const std::vector<double>& a,
+                        const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = a[i] + b[i];
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Average(const std::vector<double>& a,
+                            const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = 0.5 * (a[i] + b[i]);
+  }
+  return result;
+}
+
+// vec: [0] will contain the objective function, [1:] will
+//   contain the vector position for the objective function.
+// fun: the function evaluates the value.
+void Eval(std::vector<double>* vec,
+          const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> args(vec->begin() + 1, vec->end());
+  (*vec)[0] = fun(args);
+}
+
+void Sort(std::vector<std::vector<double>>* simplex) {
+  std::sort(simplex->begin(), simplex->end());
+}
+
+// Main iteration step of Nelder-Mead like optimization.
+void Reflect(std::vector<std::vector<double>>* simplex,
+             const std::function<double(const std::vector<double>&)>& fun) {
+  Sort(simplex);
+  const std::vector<double>& last = simplex->back();
+  std::vector<double> mid = Midpoint(*simplex);
+  std::vector<double> diff = Subtract(mid, last);
+  std::vector<double> mirrored = Add(mid, diff);
+  Eval(&mirrored, fun);
+  if (mirrored[0] > (*simplex)[simplex->size() - 2][0]) {
+    // Still the worst, shrink towards the best.
+    std::vector<double> shrinking = Average(simplex->back(), (*simplex)[0]);
+    Eval(&shrinking, fun);
+    simplex->back() = shrinking;
+  } else if (mirrored[0] < (*simplex)[0][0]) {
+    // new best
+    std::vector<double> even_further = Add(mirrored, diff);
+    Eval(&even_further, fun);
+    if (even_further[0] < mirrored[0]) {
+      mirrored = even_further;
+    }
+    simplex->back() = mirrored;
+  } else {
+    // not a best, not a worst point
+    simplex->back() = mirrored;
+  }
+}
+
+// Initialize the simplex at origin.
+std::vector<std::vector<double>> InitialSimplex(
+    int dim, double amount, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> best(1 + dim, 0);
+  std::copy(init.begin(), init.end(), best.begin() + 1);
+  Eval(&best, fun);
+  std::vector<std::vector<double>> result{best};
+  for (int i = 0; i < dim; i++) {
+    best = result[0];
+    best[i + 1] += amount;
+    Eval(&best, fun);
+    result.push_back(best);
+    Sort(&result);
+  }
+  return result;
+}
+
+// For comparing the same with the python tool
+/*void RunSimplexExternal(
+    int dim, double amount, int max_iterations,
+    const std::function<double((const vector<double>&))>& fun) {
+  vector<double> vars;
+  for (int i = 0; i < dim; i++) {
+    vars.push_back(atof(getenv(StrCat("VAR", i).c_str())));
+  }
+  double result = fun(vars);
+  std::cout << "Result=" << result;
+}*/
+
+}  // namespace
+
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<std::vector<double>> simplex =
+      InitialSimplex(dim, amount, init, fun);
+  for (int i = 0; i < max_iterations; i++) {
+    Sort(&simplex);
+    Reflect(&simplex, fun);
+  }
+  return simplex[0];
+}
+
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> init(dim, 0.0);
+  return RunSimplex(dim, amount, max_iterations, init, fun);
+}
+
+}  // namespace optimize
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h
new file mode 100644
index 0000000000..0a60198214
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility functions for optimizing multi-dimensional nonlinear functions.
+
+#ifndef LIB_JXL_OPTIMIZE_H_
+#define LIB_JXL_OPTIMIZE_H_
+
+#include <stdio.h>
+
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace optimize {
+
+// An array type of numeric values that supports math operations with operator-,
+// operator+, etc.
+template <typename T, size_t N>
+class Array {
+ public:
+  Array() = default;
+  explicit Array(T v) {
+    for (size_t i = 0; i < N; i++) v_[i] = v;
+  }
+
+  size_t size() const { return N; }
+
+  T& operator[](size_t index) {
+    JXL_DASSERT(index < N);
+    return v_[index];
+  }
+  T operator[](size_t index) const {
+    JXL_DASSERT(index < N);
+    return v_[index];
+  }
+
+ private:
+  // The values used by this Array.
+  T v_[N];
+};
+
+template <typename T, size_t N>
+Array<T, N> operator+(const Array<T, N>& x, const Array<T, N>& y) {
+  Array<T, N> z;
+  for (size_t i = 0; i < N; ++i) {
+    z[i] = x[i] + y[i];
+  }
+  return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator-(const Array<T, N>& x, const Array<T, N>& y) {
+  Array<T, N> z;
+  for (size_t i = 0; i < N; ++i) {
+    z[i] = x[i] - y[i];
+  }
+  return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator*(T v, const Array<T, N>& x) {
+  Array<T, N> y;
+  for (size_t i = 0; i < N; ++i) {
+    y[i] = v * x[i];
+  }
+  return y;
+}
+
+template <typename T, size_t N>
+T operator*(const Array<T, N>& x, const Array<T, N>& y) {
+  T r = 0.0;
+  for (size_t i = 0; i < N; ++i) {
+    r += x[i] * y[i];
+  }
+  return r;
+}
+
+// Runs Nelder-Mead like optimization. Runs for max_iterations times,
+// fun gets called with a vector of size dim as argument, and returns the score
+// based on those parameters (lower is better). Returns a vector of dim+1
+// dimensions, where the first value is the optimal value of the function and
+// the rest is the argmin value. Use init to pass an initial guess or where
+// the optimal value is.
+//
+// Usage example:
+//
+// RunSimplex(2, 0.1, 100, [](const vector<float>& v) {
+//   return (v[0] - 5) * (v[0] - 5) + (v[1] - 7) * (v[1] - 7);
+// });
+//
+// Returns (0.0, 5, 7)
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations,
+    const std::function<double(const std::vector<double>&)>& fun);
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun);
+
+// Implementation of the Scaled Conjugate Gradient method described in the
+// following paper:
+//   Moller, M. "A Scaled Conjugate Gradient Algorithm for Fast Supervised
+//   Learning", Neural Networks, Vol. 6. pp. 525-533, 1993
+//   http://sci2s.ugr.es/keel/pdf/algorithm/articulo/moller1990.pdf
+//
+// The Function template parameter is a class that has the following method:
+//
+//   // Returns the value of the function at point w and sets *df to be the
+//   // negative gradient vector of the function at point w.
+//   double Compute(const optimize::Array<T, N>& w,
+//                  optimize::Array<T, N>* df) const;
+//
+// Returns a vector w, such that |df(w)| < grad_norm_threshold.
+template <typename T, size_t N, typename Function>
+Array<T, N> OptimizeWithScaledConjugateGradientMethod(
+    const Function& f, const Array<T, N>& w0, const T grad_norm_threshold,
+    size_t max_iters) {
+  const size_t n = w0.size();
+  const T rsq_threshold = grad_norm_threshold * grad_norm_threshold;
+  const T sigma0 = static_cast<T>(0.0001);
+  const T l_min = static_cast<T>(1.0e-15);
+  const T l_max = static_cast<T>(1.0e15);
+
+  Array<T, N> w = w0;
+  Array<T, N> wp;
+  Array<T, N> r;
+  Array<T, N> rt;
+  Array<T, N> e;
+  Array<T, N> p;
+  T psq;
+  T fp;
+  T D;
+  T d;
+  T m;
+  T a;
+  T b;
+  T s;
+  T t;
+
+  T fw = f.Compute(w, &r);
+  T rsq = r * r;
+  e = r;
+  p = r;
+  T l = static_cast<T>(1.0);
+  bool success = true;
+  size_t n_success = 0;
+  size_t k = 0;
+
+  while (k++ < max_iters) {
+    if (success) {
+      m = -(p * r);
+      if (m >= 0) {
+        p = r;
+        m = -(p * r);
+      }
+      psq = p * p;
+      s = sigma0 / std::sqrt(psq);
+      f.Compute(w + (s * p), &rt);
+      t = (p * (r - rt)) / s;
+    }
+
+    d = t + l * psq;
+    if (d <= 0) {
+      d = l * psq;
+      l = l - t / psq;
+    }
+
+    a = -m / d;
+    wp = w + a * p;
+    fp = f.Compute(wp, &rt);
+
+    D = 2.0 * (fp - fw) / (a * m);
+    if (D >= 0.0) {
+      success = true;
+      n_success++;
+      w = wp;
+    } else {
+      success = false;
+    }
+
+    if (success) {
+      e = r;
+      r = rt;
+      rsq = r * r;
+      fw = fp;
+      if (rsq <= rsq_threshold) {
+        break;
+      }
+    }
+
+    if (D < 0.25) {
+      l = std::min(4.0 * l, l_max);
+    } else if (D > 0.75) {
+      l = std::max(0.25 * l, l_min);
+    }
+
+    if ((n_success % n) == 0) {
+      p = r;
+      l = 1.0;
+    } else if (success) {
+      b = ((e - r) * r) / m;
+      p = b * p + r;
+    }
+  }
+
+  return w;
+}
+
+}  // namespace optimize
+}  // namespace jxl
+
+#endif  // LIB_JXL_OPTIMIZE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc
new file mode 100644
index 0000000000..1c6699f99e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_optimize.h"
+
+#include <stdio.h>
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace optimize {
+namespace {
+
+// The maximum number of iterations for the test.
+static const size_t kMaxTestIter = 100000;
+
+// F(w) = (w - w_min)^2.
+struct SimpleQuadraticFunction {
+  typedef Array<double, 2> ArrayType;
+  explicit SimpleQuadraticFunction(const ArrayType& w0) : w_min(w0) {}
+
+  double Compute(const ArrayType& w, ArrayType* df) const {
+    ArrayType dw = w - w_min;
+    *df = -2.0 * dw;
+    return dw * dw;
+  }
+
+  ArrayType w_min;
+};
+
+// F(alpha, beta, gamma| x,y) = \sum_i(y_i - (alpha x_i ^ gamma + beta))^2.
+struct PowerFunction {
+  explicit PowerFunction(const std::vector<double>& x0,
+                         const std::vector<double>& y0)
+      : x(x0), y(y0) {}
+
+  typedef Array<double, 3> ArrayType;
+  double Compute(const ArrayType& w, ArrayType* df) const {
+    double loss_function = 0;
+    (*df)[0] = 0;
+    (*df)[1] = 0;
+    (*df)[2] = 0;
+    for (size_t ind = 0; ind < y.size(); ++ind) {
+      if (x[ind] != 0) {
+        double l_f = y[ind] - (w[0] * pow(x[ind], w[1]) + w[2]);
+        (*df)[0] += 2.0 * l_f * pow(x[ind], w[1]);
+        (*df)[1] += 2.0 * l_f * w[0] * pow(x[ind], w[1]) * log(x[ind]);
+        (*df)[2] += 2.0 * l_f * 1;
+        loss_function += l_f * l_f;
+      }
+    }
+    return loss_function;
+  }
+
+  std::vector<double> x;
+  std::vector<double> y;
+};
+
+TEST(OptimizeTest, SimpleQuadraticFunction) {
+  SimpleQuadraticFunction::ArrayType w_min;
+  w_min[0] = 1.0;
+  w_min[1] = 2.0;
+  SimpleQuadraticFunction f(w_min);
+  SimpleQuadraticFunction::ArrayType w(0.);
+  static const double kPrecision = 1e-8;
+  w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+                                                          kMaxTestIter);
+  EXPECT_NEAR(w[0], 1.0, kPrecision);
+  EXPECT_NEAR(w[1], 2.0, kPrecision);
+}
+
+TEST(OptimizeTest, PowerFunction) {
+  std::vector<double> x(10);
+  std::vector<double> y(10);
+  for (int ind = 0; ind < 10; ++ind) {
+    x[ind] = 1. * ind;
+    y[ind] = 2. * pow(x[ind], 3) + 5.;
+  }
+  PowerFunction f(x, y);
+  PowerFunction::ArrayType w(0.);
+
+  static const double kPrecision = 0.01;
+  w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+                                                          kMaxTestIter);
+  EXPECT_NEAR(w[0], 2.0, kPrecision);
+  EXPECT_NEAR(w[1], 3.0, kPrecision);
+  EXPECT_NEAR(w[2], 5.0, kPrecision);
+}
+
+TEST(OptimizeTest, SimplexOptTest) {
+  auto f = [](const std::vector<double>& x) -> double {
+    double t1 = x[0] - 1.0;
+    double t2 = x[1] + 1.5;
+    return 2.0 + t1 * t1 + t2 * t2;
+  };
+  auto opt = RunSimplex(2, 0.01, 100, f);
+  EXPECT_EQ(opt.size(), 3u);
+
+  static const double kPrecision = 0.01;
+  EXPECT_NEAR(opt[0], 2.0, kPrecision);
+  EXPECT_NEAR(opt[1], 1.0, kPrecision);
+  EXPECT_NEAR(opt[2], -1.5, kPrecision);
+}
+
+}  // namespace
+}  // namespace optimize
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_params.h b/third-party/libjxl/libjxl/lib/jxl/enc_params.h
new file mode 100644
index 0000000000..bce640ba5d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_params.h
@@ -0,0 +1,220 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PARAMS_H_
+#define LIB_JXL_ENC_PARAMS_H_
+
+// Parameters and flags that govern JXL compression.
+
+#include <jxl/encode.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+enum class SpeedTier {
+  // Try multiple combinations of Tortoise flags for modular mode. Otherwise
+  // like kTortoise.
+  kGlacier = 0,
+  // Turns on FindBestQuantizationHQ loop. Equivalent to "guetzli" mode.
+  kTortoise = 1,
+  // Turns on FindBestQuantization butteraugli loop.
+  kKitten = 2,
+  // Turns on dots, patches, and spline detection by default, as well as full
+  // context clustering. Default.
+  kSquirrel = 3,
+  // Turns on error diffusion and full AC strategy heuristics. Equivalent to
+  // "fast" mode.
+  kWombat = 4,
+  // Turns on gaborish by default, non-default cmap, initial quant field.
+  kHare = 5,
+  // Turns on simple heuristics for AC strategy, quant field, and clustering;
+  // also enables coefficient reordering.
+  kCheetah = 6,
+  // Turns off most encoder features. Does context clustering.
+  // Modular: uses fixed tree with Weighted predictor.
+  kFalcon = 7,
+  // Currently fastest possible setting for VarDCT.
+  // Modular: uses fixed tree with Gradient predictor.
+  kThunder = 8,
+  // VarDCT: same as kThunder.
+  // Modular: no tree, Gradient predictor, fast histograms
+  kLightning = 9
+};
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct CompressParams {
+  float butteraugli_distance = 1.0f;
+
+  // explicit distances for extra channels (defaults to butteraugli_distance
+  // when not set; value of -1 can be used to represent 'default')
+  std::vector<float> ec_distance;
+
+  // Try to achieve a maximum pixel-by-pixel error on each channel.
+  bool max_error_mode = false;
+  float max_error[3] = {0.0, 0.0, 0.0};
+
+  SpeedTier speed_tier = SpeedTier::kSquirrel;
+  int brotli_effort = -1;
+
+  // 0 = default.
+  // 1 = slightly worse quality.
+  // 4 = fastest speed, lowest quality
+  size_t decoding_speed_tier = 0;
+
+  ColorTransform color_transform = ColorTransform::kXYB;
+
+  // If true, the "modular mode options" members below are used.
+  bool modular_mode = false;
+
+  // Change group size in modular mode (0=128, 1=256, 2=512, 3=1024, -1=encoder
+  // chooses).
+  int modular_group_size_shift = -1;
+
+  Override preview = Override::kDefault;
+  Override noise = Override::kDefault;
+  Override dots = Override::kDefault;
+  Override patches = Override::kDefault;
+  Override gaborish = Override::kDefault;
+  int epf = -1;
+
+  // Progressive mode.
+  bool progressive_mode = false;
+
+  // Quantized-progressive mode.
+  bool qprogressive_mode = false;
+
+  // Put center groups first in the bitstream.
+  bool centerfirst = false;
+
+  // Pixel coordinates of the center. First group will contain that center.
+  size_t center_x = static_cast<size_t>(-1);
+  size_t center_y = static_cast<size_t>(-1);
+
+  int progressive_dc = -1;
+
+  // If on: preserve color of invisible pixels (if off: don't care)
+  // Default: on for lossless, off for lossy
+  Override keep_invisible = Override::kDefault;
+
+  JxlCmsInterface cms;
+  bool cms_set = false;
+  void SetCms(const JxlCmsInterface& cms) {
+    this->cms = cms;
+    cms_set = true;
+  }
+
+  // Force usage of CfL when doing JPEG recompression. This can have unexpected
+  // effects on the decoded pixels, while still being JPEG-compliant and
+  // allowing reconstruction of the original JPEG.
+  bool force_cfl_jpeg_recompression = true;
+
+  // Use brotli compression for any boxes derived from a JPEG frame.
+  bool jpeg_compress_boxes = true;
+
+  // Preserve this metadata when doing JPEG recompression.
+  bool jpeg_keep_exif = true;
+  bool jpeg_keep_xmp = true;
+  bool jpeg_keep_jumbf = true;
+
+  // Set the noise to what it would approximately be if shooting at the nominal
+  // exposure for a given ISO setting on a 35mm camera.
+  float photon_noise_iso = 0;
+
+  // modular mode options below
+  ModularOptions options;
+  int responsive = -1;
+  // empty for default squeeze
+  std::vector<SqueezeParams> squeezes;
+  int colorspace = -1;
+  // Use Global channel palette if #colors < this percentage of range
+  float channel_colors_pre_transform_percent = 95.f;
+  // Use Local channel palette if #colors < this percentage of range
+  float channel_colors_percent = 80.f;
+  int palette_colors = 1 << 10;  // up to 10-bit palette is probably worthwhile
+  bool lossy_palette = false;
+
+  // Returns whether these params are lossless as defined by SetLossless();
+  bool IsLossless() const { return modular_mode && ModularPartIsLossless(); }
+
+  bool ModularPartIsLossless() const {
+    if (modular_mode) {
+      // YCbCr is also considered lossless here since it's intended for
+      // source material that is already YCbCr (we don't do the fwd transform)
+      if (butteraugli_distance != 0 ||
+          color_transform == jxl::ColorTransform::kXYB)
+        return false;
+    }
+    for (float f : ec_distance) {
+      if (f > 0) return false;
+      if (f < 0 && butteraugli_distance != 0) return false;
+    }
+    // if no explicit ec_distance given, and using vardct, then the modular part
+    // is empty or not lossless
+    if (!modular_mode && ec_distance.empty()) return false;
+    // all modular channels are encoded at distance 0
+    return true;
+  }
+
+  // Sets the parameters required to make the codec lossless.
+  void SetLossless() {
+    modular_mode = true;
+    butteraugli_distance = 0.0f;
+    for (float &f : ec_distance) f = 0.0f;
+    color_transform = jxl::ColorTransform::kNone;
+  }
+
+  // Down/upsample the image before encoding / after decoding by this factor.
+  // The resampling value can also be set to <= 0 to automatically choose based
+  // on distance, however EncodeFrame doesn't support this, so it is
+  // required to call PostInit() to set a valid positive resampling
+  // value and altered butteraugli score if this is used.
+  int resampling = -1;
+  int ec_resampling = -1;
+  // Skip the downsampling before encoding if this is true.
+  bool already_downsampled = false;
+  // Butteraugli target distance on the original full size image, this can be
+  // different from butteraugli_distance if resampling was used.
+  float original_butteraugli_distance = -1.0f;
+
+  float quant_ac_rescale = 1.0;
+
+  // Codestream level to conform to.
+  // -1: don't care
+  int level = -1;
+
+  std::vector<float> manual_noise;
+  std::vector<float> manual_xyb_factors;
+
+  JxlDebugImageCallback debug_image = nullptr;
+  void* debug_image_opaque;
+};
+
+static constexpr float kMinButteraugliForDynamicAR = 0.5f;
+static constexpr float kMinButteraugliForDots = 3.0f;
+static constexpr float kMinButteraugliToSubtractOriginalPatches = 3.0f;
+
+// Always off
+static constexpr float kMinButteraugliForNoise = 99.0f;
+
+// Minimum butteraugli distance the encoder accepts.
+static constexpr float kMinButteraugliDistance = 0.001f;
+
+// Tile size for encoder-side processing. Must be equal to color tile dim in the
+// current implementation.
+static constexpr size_t kEncTileDim = 64;
+static constexpr size_t kEncTileDimInBlocks = kEncTileDim / kBlockDim;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PARAMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc
new file mode 100644
index 0000000000..800cfc56f8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc
@@ -0,0 +1,816 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <atomic>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_debug_image.h"
+#include "lib/jxl/enc_dot_dictionary.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+static constexpr size_t kPatchFrameReferenceId = 3;
+
+// static
+void PatchDictionaryEncoder::Encode(const PatchDictionary& pdic,
+                                    BitWriter* writer, size_t layer,
+                                    AuxOut* aux_out) {
+  JXL_ASSERT(pdic.HasAny());
+  std::vector<std::vector<Token>> tokens(1);
+  size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+
+  auto add_num = [&](int context, size_t num) {
+    tokens[0].emplace_back(context, num);
+  };
+  size_t num_ref_patch = 0;
+  for (size_t i = 0; i < pdic.positions_.size();) {
+    size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+    while (i < pdic.positions_.size() &&
+           pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+      i++;
+    }
+    num_ref_patch++;
+  }
+  add_num(kNumRefPatchContext, num_ref_patch);
+  size_t blend_pos = 0;
+  for (size_t i = 0; i < pdic.positions_.size();) {
+    size_t i_start = i;
+    size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+    const auto& ref_pos = pdic.ref_positions_[ref_pos_idx];
+    while (i < pdic.positions_.size() &&
+           pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+      i++;
+    }
+    size_t num = i - i_start;
+    JXL_ASSERT(num > 0);
+    add_num(kReferenceFrameContext, ref_pos.ref);
+    add_num(kPatchReferencePositionContext, ref_pos.x0);
+    add_num(kPatchReferencePositionContext, ref_pos.y0);
+    add_num(kPatchSizeContext, ref_pos.xsize - 1);
+    add_num(kPatchSizeContext, ref_pos.ysize - 1);
+    add_num(kPatchCountContext, num - 1);
+    for (size_t j = i_start; j < i; j++) {
+      const PatchPosition& pos = pdic.positions_[j];
+      if (j == i_start) {
+        add_num(kPatchPositionContext, pos.x);
+        add_num(kPatchPositionContext, pos.y);
+      } else {
+        add_num(kPatchOffsetContext,
+                PackSigned(pos.x - pdic.positions_[j - 1].x));
+        add_num(kPatchOffsetContext,
+                PackSigned(pos.y - pdic.positions_[j - 1].y));
+      }
+      for (size_t j = 0; j < num_ec + 1; ++j, ++blend_pos) {
+        const PatchBlending& info = pdic.blendings_[blend_pos];
+        add_num(kPatchBlendModeContext, static_cast<uint32_t>(info.mode));
+        if (UsesAlpha(info.mode) &&
+            pdic.shared_->metadata->m.extra_channel_info.size() > 1) {
+          add_num(kPatchAlphaChannelContext, info.alpha_channel);
+        }
+        if (UsesClamp(info.mode)) {
+          add_num(kPatchClampContext, info.clamp);
+        }
+      }
+    }
+  }
+
+  EntropyEncodingData codes;
+  std::vector<uint8_t> context_map;
+  BuildAndEncodeHistograms(HistogramParams(), kNumPatchDictionaryContexts,
+                           tokens, &codes, &context_map, writer, layer,
+                           aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+// static
+void PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic,
+                                          Image3F* opsin) {
+  size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+  // TODO(veluca): this can likely be optimized knowing it runs on full images.
+  for (size_t y = 0; y < opsin->ysize(); y++) {
+    float* JXL_RESTRICT rows[3] = {
+        opsin->PlaneRow(0, y),
+        opsin->PlaneRow(1, y),
+        opsin->PlaneRow(2, y),
+    };
+    for (size_t pos_idx : pdic.GetPatchesForRow(y)) {
+      const size_t blending_idx = pos_idx * (num_ec + 1);
+      const PatchPosition& pos = pdic.positions_[pos_idx];
+      const PatchReferencePosition& ref_pos =
+          pdic.ref_positions_[pos.ref_pos_idx];
+      const PatchBlendMode mode = pdic.blendings_[blending_idx].mode;
+      size_t by = pos.y;
+      size_t bx = pos.x;
+      size_t xsize = ref_pos.xsize;
+      JXL_DASSERT(y >= by);
+      JXL_DASSERT(y < by + ref_pos.ysize);
+      size_t iy = y - by;
+      size_t ref = ref_pos.ref;
+      const float* JXL_RESTRICT ref_rows[3] = {
+          pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+              0, ref_pos.y0 + iy) +
+              ref_pos.x0,
+          pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+              1, ref_pos.y0 + iy) +
+              ref_pos.x0,
+          pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+              2, ref_pos.y0 + iy) +
+              ref_pos.x0,
+      };
+      for (size_t ix = 0; ix < xsize; ix++) {
+        for (size_t c = 0; c < 3; c++) {
+          if (mode == PatchBlendMode::kAdd) {
+            rows[c][bx + ix] -= ref_rows[c][ix];
+          } else if (mode == PatchBlendMode::kReplace) {
+            rows[c][bx + ix] = 0;
+          } else if (mode == PatchBlendMode::kNone) {
+            // Nothing to do.
+          } else {
+            JXL_UNREACHABLE("Blending mode %u not yet implemented",
+                            (uint32_t)mode);
+          }
+        }
+      }
+    }
+  }
+}
+
+namespace {
+
+struct PatchColorspaceInfo {
+  float kChannelDequant[3];
+  float kChannelWeights[3];
+
+  explicit PatchColorspaceInfo(bool is_xyb) {
+    if (is_xyb) {
+      kChannelDequant[0] = 0.01615;
+      kChannelDequant[1] = 0.08875;
+      kChannelDequant[2] = 0.1922;
+      kChannelWeights[0] = 30.0;
+      kChannelWeights[1] = 3.0;
+      kChannelWeights[2] = 1.0;
+    } else {
+      kChannelDequant[0] = 20.0f / 255;
+      kChannelDequant[1] = 22.0f / 255;
+      kChannelDequant[2] = 20.0f / 255;
+      kChannelWeights[0] = 0.017 * 255;
+      kChannelWeights[1] = 0.02 * 255;
+      kChannelWeights[2] = 0.017 * 255;
+    }
+  }
+
+  float ScaleForQuantization(float val, size_t c) {
+    return val / kChannelDequant[c];
+  }
+
+  int Quantize(float val, size_t c) {
+    return truncf(ScaleForQuantization(val, c));
+  }
+
+  bool is_similar_v(const float v1[3], const float v2[3], float threshold) {
+    float distance = 0;
+    for (size_t c = 0; c < 3; c++) {
+      distance += std::fabs(v1[c] - v2[c]) * kChannelWeights[c];
+    }
+    return distance <= threshold;
+  }
+};
+
+std::vector<PatchInfo> FindTextLikePatches(
+    const CompressParams& cparams, const Image3F& opsin,
+    const PassesEncoderState* JXL_RESTRICT state, ThreadPool* pool,
+    AuxOut* aux_out, bool is_xyb) {
+  if (state->cparams.patches == Override::kOff) return {};
+
+  PatchColorspaceInfo pci(is_xyb);
+  float kSimilarThreshold = 0.8f;
+
+  auto is_similar_impl = [&pci](std::pair<uint32_t, uint32_t> p1,
+                                std::pair<uint32_t, uint32_t> p2,
+                                const float* JXL_RESTRICT rows[3],
+                                size_t stride, float threshold) {
+    float v1[3], v2[3];
+    for (size_t c = 0; c < 3; c++) {
+      v1[c] = rows[c][p1.second * stride + p1.first];
+      v2[c] = rows[c][p2.second * stride + p2.first];
+    }
+    return pci.is_similar_v(v1, v2, threshold);
+  };
+
+  std::atomic<bool> has_screenshot_areas{false};
+  const size_t opsin_stride = opsin.PixelsPerRow();
+  const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0),
+                                             opsin.ConstPlaneRow(1, 0),
+                                             opsin.ConstPlaneRow(2, 0)};
+
+  auto is_same = [&opsin_rows, opsin_stride](std::pair<uint32_t, uint32_t> p1,
+                                             std::pair<uint32_t, uint32_t> p2) {
+    for (size_t c = 0; c < 3; c++) {
+      float v1 = opsin_rows[c][p1.second * opsin_stride + p1.first];
+      float v2 = opsin_rows[c][p2.second * opsin_stride + p2.first];
+      if (std::fabs(v1 - v2) > 1e-4) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  auto is_similar = [&](std::pair<uint32_t, uint32_t> p1,
+                        std::pair<uint32_t, uint32_t> p2) {
+    return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold);
+  };
+
+  constexpr int64_t kPatchSide = 4;
+  constexpr int64_t kExtraSide = 4;
+
+  // Look for kPatchSide size squares, naturally aligned, that all have the same
+  // pixel values.
+  ImageB is_screenshot_like(DivCeil(opsin.xsize(), kPatchSide),
+                            DivCeil(opsin.ysize(), kPatchSide));
+  ZeroFillImage(&is_screenshot_like);
+  uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0);
+  const size_t screenshot_stride = is_screenshot_like.PixelsPerRow();
+  const auto process_row = [&](const uint32_t y, size_t /* thread */) {
+    for (uint64_t x = 0; x < opsin.xsize() / kPatchSide; x++) {
+      bool all_same = true;
+      for (size_t iy = 0; iy < static_cast<size_t>(kPatchSide); iy++) {
+        for (size_t ix = 0; ix < static_cast<size_t>(kPatchSide); ix++) {
+          size_t cx = x * kPatchSide + ix;
+          size_t cy = y * kPatchSide + iy;
+          if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) {
+            all_same = false;
+            break;
+          }
+        }
+      }
+      if (!all_same) continue;
+      size_t num = 0;
+      size_t num_same = 0;
+      for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) {
+        for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) {
+          int64_t cx = x * kPatchSide + ix;
+          int64_t cy = y * kPatchSide + iy;
+          if (cx < 0 || static_cast<uint64_t>(cx) >= opsin.xsize() ||  //
+              cy < 0 || static_cast<uint64_t>(cy) >= opsin.ysize()) {
+            continue;
+          }
+          num++;
+          if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++;
+        }
+      }
+      // Too few equal pixels nearby.
+      if (num_same * 8 < num * 7) continue;
+      screenshot_row[y * screenshot_stride + x] = 1;
+      has_screenshot_areas = true;
+    }
+  };
+  JXL_CHECK(RunOnPool(pool, 0, opsin.ysize() / kPatchSide, ThreadPool::NoInit,
+                      process_row, "IsScreenshotLike"));
+
+  // TODO(veluca): also parallelize the rest of this function.
+  if (WantDebugOutput(cparams)) {
+    DumpPlaneNormalized(cparams, "screenshot_like", is_screenshot_like);
+  }
+
+  constexpr int kSearchRadius = 1;
+
+  if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) {
+    return {};
+  }
+
+  // Search for "similar enough" pixels near the screenshot-like areas.
+  ImageB is_background(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&is_background);
+  Image3F background(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&background);
+  constexpr size_t kDistanceLimit = 50;
+  float* JXL_RESTRICT background_rows[3] = {
+      background.PlaneRow(0, 0),
+      background.PlaneRow(1, 0),
+      background.PlaneRow(2, 0),
+  };
+  const size_t background_stride = background.PixelsPerRow();
+  uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0);
+  const size_t is_background_stride = is_background.PixelsPerRow();
+  std::vector<
+      std::pair<std::pair<uint32_t, uint32_t>, std::pair<uint32_t, uint32_t>>>
+      queue;
+  size_t queue_front = 0;
+  for (size_t y = 0; y < opsin.ysize(); y++) {
+    for (size_t x = 0; x < opsin.xsize(); x++) {
+      if (!screenshot_row[screenshot_stride * (y / kPatchSide) +
+                          (x / kPatchSide)])
+        continue;
+      queue.push_back({{x, y}, {x, y}});
+    }
+  }
+  while (queue.size() != queue_front) {
+    std::pair<uint32_t, uint32_t> cur = queue[queue_front].first;
+    std::pair<uint32_t, uint32_t> src = queue[queue_front].second;
+    queue_front++;
+    if (is_background_row[cur.second * is_background_stride + cur.first])
+      continue;
+    is_background_row[cur.second * is_background_stride + cur.first] = 1;
+    for (size_t c = 0; c < 3; c++) {
+      background_rows[c][cur.second * background_stride + cur.first] =
+          opsin_rows[c][src.second * opsin_stride + src.first];
+    }
+    for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+      for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+        if (dx == 0 && dy == 0) continue;
+        int next_first = cur.first + dx;
+        int next_second = cur.second + dy;
+        if (next_first < 0 || next_second < 0 ||
+            static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+            static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+          continue;
+        }
+        if (static_cast<uint32_t>(
+                std::abs(next_first - static_cast<int>(src.first)) +
+                std::abs(next_second - static_cast<int>(src.second))) >
+            kDistanceLimit) {
+          continue;
+        }
+        std::pair<uint32_t, uint32_t> next{next_first, next_second};
+        if (is_similar(src, next)) {
+          if (!screenshot_row[next.second / kPatchSide * screenshot_stride +
+                              next.first / kPatchSide] ||
+              is_same(src, next)) {
+            if (!is_background_row[next.second * is_background_stride +
+                                   next.first])
+              queue.emplace_back(next, src);
+          }
+        }
+      }
+    }
+  }
+  queue.clear();
+
+  ImageF ccs;
+  Rng rng(0);
+  bool paint_ccs = false;
+  if (WantDebugOutput(cparams)) {
+    DumpPlaneNormalized(cparams, "is_background", is_background);
+    if (is_xyb) {
+      DumpXybImage(cparams, "background", background);
+    } else {
+      DumpImage(cparams, "background", background);
+    }
+    ccs = ImageF(opsin.xsize(), opsin.ysize());
+    ZeroFillImage(&ccs);
+    paint_ccs = true;
+  }
+
+  constexpr float kVerySimilarThreshold = 0.03f;
+  constexpr float kHasSimilarThreshold = 0.03f;
+
+  const float* JXL_RESTRICT const_background_rows[3] = {
+      background_rows[0], background_rows[1], background_rows[2]};
+  auto is_similar_b = [&](std::pair<int, int> p1, std::pair<int, int> p2) {
+    return is_similar_impl(p1, p2, const_background_rows, background_stride,
+                           kVerySimilarThreshold);
+  };
+
+  constexpr int kMinPeak = 2;
+  constexpr int kHasSimilarRadius = 2;
+
+  std::vector<PatchInfo> info;
+
+  // Find small CC outside the "similar enough" areas, compute bounding boxes,
+  // and run heuristics to exclude some patches.
+  ImageB visited(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&visited);
+  uint8_t* JXL_RESTRICT visited_row = visited.Row(0);
+  const size_t visited_stride = visited.PixelsPerRow();
+  std::vector<std::pair<uint32_t, uint32_t>> cc;
+  std::vector<std::pair<uint32_t, uint32_t>> stack;
+  for (size_t y = 0; y < opsin.ysize(); y++) {
+    for (size_t x = 0; x < opsin.xsize(); x++) {
+      if (is_background_row[y * is_background_stride + x]) continue;
+      cc.clear();
+      stack.clear();
+      stack.emplace_back(x, y);
+      size_t min_x = x;
+      size_t max_x = x;
+      size_t min_y = y;
+      size_t max_y = y;
+      std::pair<uint32_t, uint32_t> reference;
+      bool found_border = false;
+      bool all_similar = true;
+      while (!stack.empty()) {
+        std::pair<uint32_t, uint32_t> cur = stack.back();
+        stack.pop_back();
+        if (visited_row[cur.second * visited_stride + cur.first]) continue;
+        visited_row[cur.second * visited_stride + cur.first] = 1;
+        if (cur.first < min_x) min_x = cur.first;
+        if (cur.first > max_x) max_x = cur.first;
+        if (cur.second < min_y) min_y = cur.second;
+        if (cur.second > max_y) max_y = cur.second;
+        if (paint_ccs) {
+          cc.push_back(cur);
+        }
+        for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+          for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+            if (dx == 0 && dy == 0) continue;
+            int next_first = static_cast<int32_t>(cur.first) + dx;
+            int next_second = static_cast<int32_t>(cur.second) + dy;
+            if (next_first < 0 || next_second < 0 ||
+                static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+                static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+              continue;
+            }
+            std::pair<uint32_t, uint32_t> next{next_first, next_second};
+            if (!is_background_row[next.second * is_background_stride +
+                                   next.first]) {
+              stack.push_back(next);
+            } else {
+              if (!found_border) {
+                reference = next;
+                found_border = true;
+              } else {
+                if (!is_similar_b(next, reference)) all_similar = false;
+              }
+            }
+          }
+        }
+      }
+      if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize ||
+          max_y - min_y >= kMaxPatchSize) {
+        continue;
+      }
+      size_t bpos = background_stride * reference.second + reference.first;
+      float ref[3] = {background_rows[0][bpos], background_rows[1][bpos],
+                      background_rows[2][bpos]};
+      bool has_similar = false;
+      for (size_t iy = std::max<int>(
+               static_cast<int32_t>(min_y) - kHasSimilarRadius, 0);
+           iy < std::min(max_y + kHasSimilarRadius + 1, opsin.ysize()); iy++) {
+        for (size_t ix = std::max<int>(
+                 static_cast<int32_t>(min_x) - kHasSimilarRadius, 0);
+             ix < std::min(max_x + kHasSimilarRadius + 1, opsin.xsize());
+             ix++) {
+          size_t opos = opsin_stride * iy + ix;
+          float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos],
+                         opsin_rows[2][opos]};
+          if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) {
+            has_similar = true;
+          }
+        }
+      }
+      if (!has_similar) continue;
+      info.emplace_back();
+      info.back().second.emplace_back(min_x, min_y);
+      QuantizedPatch& patch = info.back().first;
+      patch.xsize = max_x - min_x + 1;
+      patch.ysize = max_y - min_y + 1;
+      int max_value = 0;
+      for (size_t c : {1, 0, 2}) {
+        for (size_t iy = min_y; iy <= max_y; iy++) {
+          for (size_t ix = min_x; ix <= max_x; ix++) {
+            size_t offset = (iy - min_y) * patch.xsize + ix - min_x;
+            patch.fpixels[c][offset] =
+                opsin_rows[c][iy * opsin_stride + ix] - ref[c];
+            int val = pci.Quantize(patch.fpixels[c][offset], c);
+            patch.pixels[c][offset] = val;
+            if (std::abs(val) > max_value) max_value = std::abs(val);
+          }
+        }
+      }
+      if (max_value < kMinPeak) {
+        info.pop_back();
+        continue;
+      }
+      if (paint_ccs) {
+        float cc_color = rng.UniformF(0.5, 1.0);
+        for (std::pair<uint32_t, uint32_t> p : cc) {
+          ccs.Row(p.second)[p.first] = cc_color;
+        }
+      }
+    }
+  }
+
+  if (paint_ccs) {
+    JXL_ASSERT(WantDebugOutput(cparams));
+    DumpPlaneNormalized(cparams, "ccs", ccs);
+  }
+  if (info.empty()) {
+    return {};
+  }
+
+  // Remove duplicates.
+  constexpr size_t kMinPatchOccurrences = 2;
+  std::sort(info.begin(), info.end());
+  size_t unique = 0;
+  for (size_t i = 1; i < info.size(); i++) {
+    if (info[i].first == info[unique].first) {
+      info[unique].second.insert(info[unique].second.end(),
+                                 info[i].second.begin(), info[i].second.end());
+    } else {
+      if (info[unique].second.size() >= kMinPatchOccurrences) {
+        unique++;
+      }
+      info[unique] = info[i];
+    }
+  }
+  if (info[unique].second.size() >= kMinPatchOccurrences) {
+    unique++;
+  }
+  info.resize(unique);
+
+  size_t max_patch_size = 0;
+
+  for (size_t i = 0; i < info.size(); i++) {
+    size_t pixels = info[i].first.xsize * info[i].first.ysize;
+    if (pixels > max_patch_size) max_patch_size = pixels;
+  }
+
+  // don't use patches if all patches are smaller than this
+  constexpr size_t kMinMaxPatchSize = 20;
+  if (max_patch_size < kMinMaxPatchSize) return {};
+
+  return info;
+}
+
+}  // namespace
+
+void FindBestPatchDictionary(const Image3F& opsin,
+                             PassesEncoderState* JXL_RESTRICT state,
+                             const JxlCmsInterface& cms, ThreadPool* pool,
+                             AuxOut* aux_out, bool is_xyb) {
+  std::vector<PatchInfo> info =
+      FindTextLikePatches(state->cparams, opsin, state, pool, aux_out, is_xyb);
+
+  // TODO(veluca): this doesn't work if both dots and patches are enabled.
+  // For now, since dots and patches are not likely to occur in the same kind of
+  // images, disable dots if some patches were found.
+  if (info.empty() &&
+      ApplyOverride(
+          state->cparams.dots,
+          state->cparams.speed_tier <= SpeedTier::kSquirrel &&
+              state->cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+    info = FindDotDictionary(state->cparams, opsin, state->shared.cmap, pool);
+  }
+
+  if (info.empty()) return;
+
+  std::sort(
+      info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) {
+        return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize;
+      });
+
+  size_t max_x_size = 0;
+  size_t max_y_size = 0;
+  size_t total_pixels = 0;
+
+  for (size_t i = 0; i < info.size(); i++) {
+    size_t pixels = info[i].first.xsize * info[i].first.ysize;
+    if (max_x_size < info[i].first.xsize) max_x_size = info[i].first.xsize;
+    if (max_y_size < info[i].first.ysize) max_y_size = info[i].first.ysize;
+    total_pixels += pixels;
+  }
+
+  // Bin-packing & conversion of patches.
+  constexpr float kBinPackingSlackness = 1.05f;
+  size_t ref_xsize = std::max<float>(max_x_size, std::sqrt(total_pixels));
+  size_t ref_ysize = std::max<float>(max_y_size, std::sqrt(total_pixels));
+  std::vector<std::pair<size_t, size_t>> ref_positions(info.size());
+  // TODO(veluca): allow partial overlaps of patches that have the same pixels.
+  size_t max_y = 0;
+  do {
+    max_y = 0;
+    // Increase packed image size.
+    ref_xsize = ref_xsize * kBinPackingSlackness + 1;
+    ref_ysize = ref_ysize * kBinPackingSlackness + 1;
+
+    ImageB occupied(ref_xsize, ref_ysize);
+    ZeroFillImage(&occupied);
+    uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0);
+    size_t occupied_stride = occupied.PixelsPerRow();
+
+    bool success = true;
+    // For every patch...
+    for (size_t patch = 0; patch < info.size(); patch++) {
+      size_t x0 = 0;
+      size_t y0 = 0;
+      size_t xsize = info[patch].first.xsize;
+      size_t ysize = info[patch].first.ysize;
+      bool found = false;
+      // For every possible start position ...
+      for (; y0 + ysize <= ref_ysize; y0++) {
+        x0 = 0;
+        for (; x0 + xsize <= ref_xsize; x0++) {
+          bool has_occupied_pixel = false;
+          size_t x = x0;
+          // Check if it is possible to place the patch in this position in the
+          // reference frame.
+          for (size_t y = y0; y < y0 + ysize; y++) {
+            x = x0;
+            for (; x < x0 + xsize; x++) {
+              if (occupied_rows[y * occupied_stride + x]) {
+                has_occupied_pixel = true;
+                break;
+              }
+            }
+          }  // end of positioning check
+          if (!has_occupied_pixel) {
+            found = true;
+            break;
+          }
+          x0 = x;  // Jump to next pixel after the occupied one.
+        }
+        if (found) break;
+      }  // end of start position checking
+
+      // We didn't find a possible position: repeat from the beginning with a
+      // larger reference frame size.
+      if (!found) {
+        success = false;
+        break;
+      }
+
+      // We found a position: mark the corresponding positions in the reference
+      // image as used.
+      ref_positions[patch] = {x0, y0};
+      for (size_t y = y0; y < y0 + ysize; y++) {
+        for (size_t x = x0; x < x0 + xsize; x++) {
+          occupied_rows[y * occupied_stride + x] = true;
+        }
+      }
+      max_y = std::max(max_y, y0 + ysize);
+    }
+
+    if (success) break;
+  } while (true);
+
+  JXL_ASSERT(ref_ysize >= max_y);
+
+  ref_ysize = max_y;
+
+  Image3F reference_frame(ref_xsize, ref_ysize);
+  // TODO(veluca): figure out a better way to fill the image.
+  ZeroFillImage(&reference_frame);
+  std::vector<PatchPosition> positions;
+  std::vector<PatchReferencePosition> pref_positions;
+  std::vector<PatchBlending> blendings;
+  float* JXL_RESTRICT ref_rows[3] = {
+      reference_frame.PlaneRow(0, 0),
+      reference_frame.PlaneRow(1, 0),
+      reference_frame.PlaneRow(2, 0),
+  };
+  size_t ref_stride = reference_frame.PixelsPerRow();
+  size_t num_ec = state->shared.metadata->m.num_extra_channels;
+
+  for (size_t i = 0; i < info.size(); i++) {
+    PatchReferencePosition ref_pos;
+    ref_pos.xsize = info[i].first.xsize;
+    ref_pos.ysize = info[i].first.ysize;
+    ref_pos.x0 = ref_positions[i].first;
+    ref_pos.y0 = ref_positions[i].second;
+    ref_pos.ref = kPatchFrameReferenceId;
+    for (size_t y = 0; y < ref_pos.ysize; y++) {
+      for (size_t x = 0; x < ref_pos.xsize; x++) {
+        for (size_t c = 0; c < 3; c++) {
+          ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] =
+              info[i].first.fpixels[c][y * ref_pos.xsize + x];
+        }
+      }
+    }
+    for (const auto& pos : info[i].second) {
+      positions.emplace_back(
+          PatchPosition{pos.first, pos.second, pref_positions.size()});
+      // Add blending for color channels, ignore other channels.
+      blendings.push_back({PatchBlendMode::kAdd, 0, false});
+      for (size_t j = 0; j < num_ec; ++j) {
+        blendings.push_back({PatchBlendMode::kNone, 0, false});
+      }
+    }
+    pref_positions.emplace_back(std::move(ref_pos));
+  }
+
+  CompressParams cparams = state->cparams;
+  // Recursive application of patches could create very weird issues.
+  cparams.patches = Override::kOff;
+
+  RoundtripPatchFrame(&reference_frame, state, kPatchFrameReferenceId, cparams,
+                      cms, pool, aux_out, /*subtract=*/true);
+
+  // TODO(veluca): this assumes that applying patches is commutative, which is
+  // not true for all blending modes. This code only produces kAdd patches, so
+  // this works out.
+  PatchDictionaryEncoder::SetPositions(
+      &state->shared.image_features.patches, std::move(positions),
+      std::move(pref_positions), std::move(blendings));
+}
+
+void RoundtripPatchFrame(Image3F* reference_frame,
+                         PassesEncoderState* JXL_RESTRICT state, int idx,
+                         CompressParams& cparams, const JxlCmsInterface& cms,
+                         ThreadPool* pool, AuxOut* aux_out, bool subtract) {
+  FrameInfo patch_frame_info;
+  cparams.resampling = 1;
+  cparams.ec_resampling = 1;
+  cparams.dots = Override::kOff;
+  cparams.noise = Override::kOff;
+  cparams.modular_mode = true;
+  cparams.responsive = 0;
+  cparams.progressive_dc = 0;
+  cparams.progressive_mode = false;
+  cparams.qprogressive_mode = false;
+  // Use gradient predictor and not Predictor::Best.
+  cparams.options.predictor = Predictor::Gradient;
+  patch_frame_info.save_as_reference = idx;  // always saved.
+  patch_frame_info.frame_type = FrameType::kReferenceOnly;
+  patch_frame_info.save_before_color_transform = true;
+  ImageBundle ib(&state->shared.metadata->m);
+  // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is
+  // no simple way to express that yet.
+  patch_frame_info.ib_needs_color_transform = false;
+  ib.SetFromImage(std::move(*reference_frame),
+                  state->shared.metadata->m.color_encoding);
+  if (!ib.metadata()->extra_channel_info.empty()) {
+    // Add dummy extra channels to the patch image: patch encoding does not yet
+    // support extra channels, but the codec expects that the amount of extra
+    // channels in frames matches that in the metadata of the codestream.
+    std::vector<ImageF> extra_channels;
+    extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+    for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+      extra_channels.emplace_back(ib.xsize(), ib.ysize());
+      // Must initialize the image with data to not affect blending with
+      // uninitialized memory.
+      // TODO(lode): patches must copy and use the real extra channels instead.
+      ZeroFillImage(&extra_channels.back());
+    }
+    ib.SetExtraChannels(std::move(extra_channels));
+  }
+  PassesEncoderState roundtrip_state;
+  auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+  AuxOut patch_aux_out;
+  JXL_CHECK(EncodeFrame(cparams, patch_frame_info, state->shared.metadata, ib,
+                        &roundtrip_state, cms, pool, special_frame.get(),
+                        aux_out ? &patch_aux_out : nullptr));
+  if (aux_out) {
+    for (const auto& l : patch_aux_out.layers) {
+      aux_out->layers[kLayerDictionary].Assimilate(l);
+    }
+  }
+  const Span<const uint8_t> encoded = special_frame->GetSpan();
+  state->special_frames.emplace_back(std::move(special_frame));
+  if (subtract) {
+    ImageBundle decoded(&state->shared.metadata->m);
+    PassesDecoderState dec_state;
+    JXL_CHECK(dec_state.output_encoding_info.SetFromMetadata(
+        *state->shared.metadata));
+    const uint8_t* frame_start = encoded.data();
+    size_t encoded_size = encoded.size();
+    JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size, &decoded,
+                          *state->shared.metadata));
+    frame_start += decoded.decoded_bytes();
+    encoded_size -= decoded.decoded_bytes();
+    size_t ref_xsize =
+        dec_state.shared_storage.reference_frames[idx].frame.color()->xsize();
+    // if the frame itself uses patches, we need to decode another frame
+    if (!ref_xsize) {
+      JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size,
+                            &decoded, *state->shared.metadata));
+    }
+    JXL_CHECK(encoded_size == 0);
+    state->shared.reference_frames[idx] =
+        std::move(dec_state.shared_storage.reference_frames[idx]);
+  } else {
+    state->shared.reference_frames[idx].frame = std::move(ib);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h
new file mode 100644
index 0000000000..f30881b232
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PATCH_DICTIONARY_H_
+#define LIB_JXL_ENC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+constexpr size_t kMaxPatchSize = 32;
+
+struct QuantizedPatch {
+  size_t xsize;
+  size_t ysize;
+  QuantizedPatch() {
+    for (size_t i = 0; i < 3; i++) {
+      pixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+      fpixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+    }
+  }
+  std::vector<int8_t> pixels[3] = {};
+  // Not compared. Used only to retrieve original pixels to construct the
+  // reference image.
+  std::vector<float> fpixels[3] = {};
+  bool operator==(const QuantizedPatch& other) const {
+    if (xsize != other.xsize) return false;
+    if (ysize != other.ysize) return false;
+    for (size_t c = 0; c < 3; c++) {
+      if (memcmp(pixels[c].data(), other.pixels[c].data(),
+                 sizeof(int8_t) * xsize * ysize) != 0)
+        return false;
+    }
+    return true;
+  }
+
+  bool operator<(const QuantizedPatch& other) const {
+    if (xsize != other.xsize) return xsize < other.xsize;
+    if (ysize != other.ysize) return ysize < other.ysize;
+    for (size_t c = 0; c < 3; c++) {
+      int cmp = memcmp(pixels[c].data(), other.pixels[c].data(),
+                       sizeof(int8_t) * xsize * ysize);
+      if (cmp > 0) return false;
+      if (cmp < 0) return true;
+    }
+    return false;
+  }
+};
+
+// Pair (patch, vector of occurrences).
+using PatchInfo =
+    std::pair<QuantizedPatch, std::vector<std::pair<uint32_t, uint32_t>>>;
+
+// Friend class of PatchDictionary.
+class PatchDictionaryEncoder {
+ public:
+  // Only call if HasAny().
+  static void Encode(const PatchDictionary& pdic, BitWriter* writer,
+                     size_t layer, AuxOut* aux_out);
+
+  static void SetPositions(PatchDictionary* pdic,
+                           std::vector<PatchPosition> positions,
+                           std::vector<PatchReferencePosition> ref_positions,
+                           std::vector<PatchBlending> blendings) {
+    pdic->positions_ = std::move(positions);
+    pdic->ref_positions_ = std::move(ref_positions);
+    pdic->blendings_ = std::move(blendings);
+    pdic->ComputePatchTree();
+  }
+
+  static void SubtractFrom(const PatchDictionary& pdic, Image3F* opsin);
+};
+
+void FindBestPatchDictionary(const Image3F& opsin,
+                             PassesEncoderState* JXL_RESTRICT state,
+                             const JxlCmsInterface& cms, ThreadPool* pool,
+                             AuxOut* aux_out, bool is_xyb = true);
+
+void RoundtripPatchFrame(Image3F* reference_frame,
+                         PassesEncoderState* JXL_RESTRICT state, int idx,
+                         CompressParams& cparams, const JxlCmsInterface& cms,
+                         ThreadPool* pool, AuxOut* aux_out, bool subtract);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PATCH_DICTIONARY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc
new file mode 100644
index 0000000000..3786ef5cf5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+namespace jxl {
+
+namespace {
+
+// Assumes a daylight-like spectrum.
+// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
+constexpr float kPhotonsPerLxSPerUm2 = 11260;
+
+// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
+// account.
+constexpr float kEffectiveQuantumEfficiency = 0.20;
+
+// TODO(sboukortt): reevaluate whether these are good defaults, notably whether
+// it would be worth making read noise higher at lower ISO settings.
+constexpr float kPhotoResponseNonUniformity = 0.005;
+constexpr float kInputReferredReadNoise = 3;
+
+// Assumes a 35mm sensor.
+constexpr float kSensorAreaUm2 = 36000.f * 24000;
+
+template <typename T>
+inline constexpr T Square(const T x) {
+  return x * x;
+}
+template <typename T>
+inline constexpr T Cube(const T x) {
+  return x * x * x;
+}
+
+}  // namespace
+
+NoiseParams SimulatePhotonNoise(const size_t xsize, const size_t ysize,
+                                const float iso) {
+  const float kOpsinAbsorbanceBiasCbrt = std::cbrt(kOpsinAbsorbanceBias[1]);
+
+  // Focal plane exposure for 18% of kDefaultIntensityTarget, in lx·s.
+  // (ISO = 10 lx·s ÷ H)
+  const float h_18 = 10 / iso;
+
+  const float pixel_area_um2 = kSensorAreaUm2 / (xsize * ysize);
+
+  const float electrons_per_pixel_18 = kEffectiveQuantumEfficiency *
+                                       kPhotonsPerLxSPerUm2 * h_18 *
+                                       pixel_area_um2;
+
+  NoiseParams params;
+
+  for (size_t i = 0; i < NoiseParams::kNumNoisePoints; ++i) {
+    const float scaled_index = i / (NoiseParams::kNumNoisePoints - 2.f);
+    // scaled_index is used for XYB = (0, 2·scaled_index, 2·scaled_index)
+    const float y = 2 * scaled_index;
+    // 1 = default intensity target
+    const float linear = std::max(
+        0.f, Cube(y - kOpsinAbsorbanceBiasCbrt) + kOpsinAbsorbanceBias[1]);
+    const float electrons_per_pixel = electrons_per_pixel_18 * (linear / 0.18f);
+    // Quadrature sum of read noise, photon shot noise (sqrt(S) so simply not
+    // squared here) and photo response non-uniformity.
+    // https://doi.org/10.1117/3.725073
+    // Units are electrons rms.
+    const float noise =
+        std::sqrt(Square(kInputReferredReadNoise) + electrons_per_pixel +
+                  Square(kPhotoResponseNonUniformity * electrons_per_pixel));
+    const float linear_noise = noise * (0.18f / electrons_per_pixel_18);
+    const float opsin_derivative =
+        (1.f / 3) / Square(std::cbrt(linear - kOpsinAbsorbanceBias[1]));
+    const float opsin_noise = linear_noise * opsin_derivative;
+
+    // TODO(sboukortt): verify more thoroughly whether the denominator is
+    // correct.
+    params.lut[i] =
+        Clamp1(opsin_noise /
+                   (0.22f             // norm_const
+                    * std::sqrt(2.f)  // red_noise + green_noise
+                    * 1.13f  // standard deviation of a plane of generated noise
+                    ),
+               0.f, 1.f);
+  }
+
+  return params;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h
new file mode 100644
index 0000000000..f43e14d560
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PHOTON_NOISE_H_
+#define LIB_JXL_ENC_PHOTON_NOISE_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+// Constructs a NoiseParams representing the noise that would be seen at the
+// selected nominal exposure on a last-decade (as of 2021) color camera with a
+// 36×24mm sensor (“35mm format”).
+NoiseParams SimulatePhotonNoise(size_t xsize, size_t ysize, float iso);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PHOTON_NOISE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc
new file mode 100644
index 0000000000..be11b465ad
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc
@@ -0,0 +1,51 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+MATCHER(AreApproximatelyEqual, "") {
+  constexpr float kTolerance = 1e-6;
+  const float actual = std::get<0>(arg);
+  const float expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(FloatNear(expected, kTolerance), actual,
+                                     result_listener);
+}
+
+TEST(EncPhotonNoiseTest, LUTs) {
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/100).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.00259652, 0.0139648, 0.00681551, 0.00632582, 0.00694917,
+                 0.00803922, 0.00934574, 0.0107607}));
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/800).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.02077220, 0.0420923, 0.01820690, 0.01439020, 0.01293670,
+                 0.01254030, 0.01277390, 0.0134161}));
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/6400).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.1661770, 0.1691120, 0.05309080, 0.03963960, 0.03357410,
+                 0.03001650, 0.02776740, 0.0263478}));
+
+  // Lower when measured on a per-pixel basis as there are fewer of them.
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/4000, /*ysize=*/3000, /*iso=*/6400).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.0830886, 0.1008720, 0.0367748, 0.0280305, 0.0240236,
+                 0.0218040, 0.0205771, 0.0200058}));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc
new file mode 100644
index 0000000000..b65319f3fd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc
@@ -0,0 +1,82 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_progressive_split.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void ProgressiveSplitter::SplitACCoefficients(
+    const T* JXL_RESTRICT block, const AcStrategy& acs, size_t bx, size_t by,
+    T* JXL_RESTRICT output[kMaxNumPasses]) {
+  size_t size = acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
+  auto shift_right_round0 = [&](T v, int shift) {
+    T one_if_negative = static_cast<uint32_t>(v) >> 31;
+    T add = (one_if_negative << shift) - one_if_negative;
+    return (v + add) >> shift;
+  };
+  // Early quit for the simple case of only one pass.
+  if (mode_.num_passes == 1) {
+    memcpy(output[0], block, sizeof(T) * size);
+    return;
+  }
+  size_t ncoeffs_all_done_from_earlier_passes = 1;
+
+  int previous_pass_shift = 0;
+  for (size_t num_pass = 0; num_pass < mode_.num_passes; num_pass++) {  // pass
+    // Zero out output block.
+    memset(output[num_pass], 0, size * sizeof(T));
+    const int pass_shift = mode_.passes[num_pass].shift;
+    size_t frame_ncoeffs = mode_.passes[num_pass].num_coefficients;
+    size_t xsize = acs.covered_blocks_x();
+    size_t ysize = acs.covered_blocks_y();
+    CoefficientLayout(&ysize, &xsize);
+    for (size_t y = 0; y < ysize * frame_ncoeffs; y++) {    // superblk-y
+      for (size_t x = 0; x < xsize * frame_ncoeffs; x++) {  // superblk-x
+        size_t pos = y * xsize * kBlockDim + x;
+        if (x < xsize * ncoeffs_all_done_from_earlier_passes &&
+            y < ysize * ncoeffs_all_done_from_earlier_passes) {
+          // This coefficient was already included in an earlier pass,
+          // which included a genuinely smaller set of coefficients.
+          continue;
+        }
+        T v = block[pos];
+        // Previous pass discarded some bits: do not encode them again.
+        if (previous_pass_shift != 0) {
+          T previous_v = shift_right_round0(v, previous_pass_shift) *
+                         (1 << previous_pass_shift);
+          v -= previous_v;
+        }
+        output[num_pass][pos] = shift_right_round0(v, pass_shift);
+      }  // superblk-x
+    }    // superblk-y
+    // We just finished a pass.
+    // Hence, we are now guaranteed to have included all coeffs up to
+    // frame_ncoeffs in every block, unless the current pass is shifted.
+    if (mode_.passes[num_pass].shift == 0) {
+      ncoeffs_all_done_from_earlier_passes = frame_ncoeffs;
+    }
+    previous_pass_shift = mode_.passes[num_pass].shift;
+  }  // num_pass
+}
+
+template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+    const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+    int32_t* JXL_RESTRICT[kMaxNumPasses]);
+
+template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+    const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+    int16_t* JXL_RESTRICT[kMaxNumPasses]);
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h
new file mode 100644
index 0000000000..ef25944bb7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PROGRESSIVE_SPLIT_H_
+#define LIB_JXL_PROGRESSIVE_SPLIT_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/splines.h"
+
+// Functions to split DCT coefficients in multiple passes. All the passes of a
+// single frame are added together.
+
+namespace jxl {
+
+constexpr size_t kNoDownsamplingFactor = std::numeric_limits<size_t>::max();
+
+struct PassDefinition {
+  // Side of the square of the coefficients that should be kept in each 8x8
+  // block. Must be greater than 1, and at most 8. Should be in non-decreasing
+  // order.
+  size_t num_coefficients;
+
+  // How much to shift the encoded values by, with rounding.
+  size_t shift;
+
+  // If specified, this indicates that if the requested downsampling factor is
+  // sufficiently high, then it is fine to stop decoding after this pass.
+  // By default, passes are not marked as being suitable for any downsampling.
+  size_t suitable_for_downsampling_of_at_least;
+};
+
+struct ProgressiveMode {
+  size_t num_passes = 1;
+  PassDefinition passes[kMaxNumPasses] = {
+      PassDefinition{/*num_coefficients=*/8, /*shift=*/0,
+                     /*suitable_for_downsampling_of_at_least=*/1}};
+
+  ProgressiveMode() = default;
+
+  template <size_t nump>
+  explicit ProgressiveMode(const PassDefinition (&p)[nump]) {
+    JXL_ASSERT(nump <= kMaxNumPasses);
+    num_passes = nump;
+    PassDefinition previous_pass{
+        /*num_coefficients=*/1, /*shift=*/0,
+        /*suitable_for_downsampling_of_at_least=*/kNoDownsamplingFactor};
+    size_t last_downsampling_factor = kNoDownsamplingFactor;
+    for (size_t i = 0; i < nump; i++) {
+      JXL_ASSERT(p[i].num_coefficients > previous_pass.num_coefficients ||
+                 (p[i].num_coefficients == previous_pass.num_coefficients &&
+                  p[i].shift < previous_pass.shift));
+      JXL_ASSERT(p[i].suitable_for_downsampling_of_at_least ==
+                     kNoDownsamplingFactor ||
+                 p[i].suitable_for_downsampling_of_at_least <=
+                     last_downsampling_factor);
+      // Only used inside assert.
+      (void)last_downsampling_factor;
+      if (p[i].suitable_for_downsampling_of_at_least != kNoDownsamplingFactor) {
+        last_downsampling_factor = p[i].suitable_for_downsampling_of_at_least;
+      }
+      previous_pass = passes[i] = p[i];
+    }
+  }
+};
+
+class ProgressiveSplitter {
+ public:
+  void SetProgressiveMode(ProgressiveMode mode) { mode_ = mode; }
+
+  size_t GetNumPasses() const { return mode_.num_passes; }
+
+  void InitPasses(Passes* JXL_RESTRICT passes) const {
+    passes->num_passes = static_cast<uint32_t>(GetNumPasses());
+    passes->num_downsample = 0;
+    JXL_ASSERT(passes->num_passes != 0);
+    passes->shift[passes->num_passes - 1] = 0;
+    if (passes->num_passes == 1) return;  // Done, arrays are empty
+
+    for (uint32_t i = 0; i < mode_.num_passes - 1; ++i) {
+      const size_t min_downsampling_factor =
+          mode_.passes[i].suitable_for_downsampling_of_at_least;
+      passes->shift[i] = mode_.passes[i].shift;
+      if (1 < min_downsampling_factor &&
+          min_downsampling_factor != kNoDownsamplingFactor) {
+        passes->downsample[passes->num_downsample] = min_downsampling_factor;
+        passes->last_pass[passes->num_downsample] = i;
+        if (mode_.passes[i + 1].suitable_for_downsampling_of_at_least <
+            min_downsampling_factor) {
+          passes->num_downsample += 1;
+        }
+      }
+    }
+  }
+
+  template <typename T>
+  void SplitACCoefficients(const T* JXL_RESTRICT block, const AcStrategy& acs,
+                           size_t bx, size_t by,
+                           T* JXL_RESTRICT output[kMaxNumPasses]);
+
+ private:
+  ProgressiveMode mode_;
+};
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+    const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+    int32_t* JXL_RESTRICT[kMaxNumPasses]);
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+    const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+    int16_t* JXL_RESTRICT[kMaxNumPasses]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PROGRESSIVE_SPLIT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc
new file mode 100644
index 0000000000..848310e75d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc
@@ -0,0 +1,214 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+
+Status EncodeDctParams(const DctQuantWeightParams& params, BitWriter* writer) {
+  JXL_ASSERT(params.num_distance_bands >= 1);
+  writer->Write(DctQuantWeightParams::kLog2MaxDistanceBands,
+                params.num_distance_bands - 1);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params.num_distance_bands; i++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Write(
+          params.distance_bands[c][i] * (i == 0 ? (1 / 64.0f) : 1.0f), writer));
+    }
+  }
+  return true;
+}
+
+Status EncodeQuant(const QuantEncoding& encoding, size_t idx, size_t size_x,
+                   size_t size_y, BitWriter* writer,
+                   ModularFrameEncoder* modular_frame_encoder) {
+  writer->Write(kLog2NumQuantModes, encoding.mode);
+  size_x *= kBlockDim;
+  size_y *= kBlockDim;
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      writer->Write(kCeilLog2NumPredefinedTables, encoding.predefined);
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Write(encoding.idweights[c][i] * (1.0f / 64), writer));
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Write(
+              encoding.dct2weights[c][i] * (1.0f / 64), writer));
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      for (size_t c = 0; c < 3; c++) {
+        JXL_RETURN_IF_ERROR(
+            F16Coder::Write(encoding.dct4x8multipliers[c], writer));
+      }
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Write(encoding.dct4multipliers[c][i], writer));
+        }
+      }
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      ModularFrameEncoder::EncodeQuantTable(size_x, size_y, writer, encoding,
+                                            idx, modular_frame_encoder);
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 9; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Write(
+              encoding.afv_weights[c][i] * (i < 6 ? 1.0f / 64 : 1.0f), writer));
+        }
+      }
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params_afv_4x4, writer));
+      break;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DequantMatricesEncode(const DequantMatrices* matrices, BitWriter* writer,
+                             size_t layer, AuxOut* aux_out,
+                             ModularFrameEncoder* modular_frame_encoder) {
+  bool all_default = true;
+  const std::vector<QuantEncoding>& encodings = matrices->encodings();
+
+  for (size_t i = 0; i < encodings.size(); i++) {
+    if (encodings[i].mode != QuantEncoding::kQuantModeLibrary ||
+        encodings[i].predefined != 0) {
+      all_default = false;
+    }
+  }
+  // TODO(janwas): better bound
+  BitWriter::Allotment allotment(writer, 512 * 1024);
+  writer->Write(1, all_default);
+  if (!all_default) {
+    for (size_t i = 0; i < encodings.size(); i++) {
+      JXL_RETURN_IF_ERROR(EncodeQuant(
+          encodings[i], i, DequantMatrices::required_size_x[i],
+          DequantMatrices::required_size_y[i], writer, modular_frame_encoder));
+    }
+  }
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+  return true;
+}
+
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+                               BitWriter* writer, size_t layer,
+                               AuxOut* aux_out) {
+  bool all_default = true;
+  const float* dc_quant = matrices->DCQuants();
+  for (size_t c = 0; c < 3; c++) {
+    if (dc_quant[c] != kDCQuant[c]) {
+      all_default = false;
+    }
+  }
+  BitWriter::Allotment allotment(writer, 1 + sizeof(float) * kBitsPerByte * 3);
+  writer->Write(1, all_default);
+  if (!all_default) {
+    for (size_t c = 0; c < 3; c++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, writer));
+    }
+  }
+  allotment.ReclaimAndCharge(writer, layer, aux_out);
+  return true;
+}
+
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc) {
+  matrices->SetDCQuant(dc);
+  // Roundtrip encode/decode DC to ensure same values as decoder.
+  BitWriter writer;
+  JXL_CHECK(DequantMatricesEncodeDC(matrices, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  BitReader br(writer.GetSpan());
+  // Called only in the encoder: should fail only for programmer errors.
+  JXL_CHECK(matrices->DecodeDC(&br));
+  JXL_CHECK(br.Close());
+}
+
+void DequantMatricesScaleDC(DequantMatrices* matrices, const float scale) {
+  float dc[3];
+  for (size_t c = 0; c < 3; ++c) {
+    dc[c] = matrices->InvDCQuant(c) * (1.0f / scale);
+  }
+  DequantMatricesSetCustomDC(matrices, dc);
+}
+
+void DequantMatricesRoundtrip(DequantMatrices* matrices) {
+  // Do not pass modular en/decoder, as they only change entropy and not
+  // values.
+  BitWriter writer;
+  JXL_CHECK(DequantMatricesEncode(matrices, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  BitReader br(writer.GetSpan());
+  // Called only in the encoder: should fail only for programmer errors.
+  JXL_CHECK(matrices->Decode(&br));
+  JXL_CHECK(br.Close());
+}
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+                              const std::vector<QuantEncoding>& encodings,
+                              ModularFrameEncoder* encoder) {
+  JXL_ASSERT(encodings.size() == DequantMatrices::kNum);
+  matrices->SetEncodings(encodings);
+  for (size_t i = 0; i < encodings.size(); i++) {
+    if (encodings[i].mode == QuantEncodingInternal::kQuantModeRAW) {
+      encoder->AddQuantTable(DequantMatrices::required_size_x[i] * kBlockDim,
+                             DequantMatrices::required_size_y[i] * kBlockDim,
+                             encodings[i], i);
+    }
+  }
+  DequantMatricesRoundtrip(matrices);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h
new file mode 100644
index 0000000000..e0a387fed5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_QUANT_WEIGHTS_H_
+#define LIB_JXL_ENC_QUANT_WEIGHTS_H_
+
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct BitWriter;
+
+Status DequantMatricesEncode(
+    const DequantMatrices* matrices, BitWriter* writer, size_t layer,
+    AuxOut* aux_out, ModularFrameEncoder* modular_frame_encoder = nullptr);
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+                               BitWriter* writer, size_t layer,
+                               AuxOut* aux_out);
+// For consistency with QuantEncoding, higher values correspond to more
+// precision.
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc);
+
+void DequantMatricesScaleDC(DequantMatrices* matrices, float scale);
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+                              const std::vector<QuantEncoding>& encodings,
+                              ModularFrameEncoder* encoder);
+
+// Roundtrip encode/decode the matrices to ensure same values as decoder.
+void DequantMatricesRoundtrip(DequantMatrices* matrices);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_QUANT_WEIGHTS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc b/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc
new file mode 100644
index 0000000000..ddcd78a748
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <algorithm>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+class QuantizedSplineEncoder {
+ public:
+  // Only call if HasAny().
+  static void Tokenize(const QuantizedSpline& spline,
+                       std::vector<Token>* const tokens) {
+    tokens->emplace_back(kNumControlPointsContext,
+                         spline.control_points_.size());
+    for (const auto& point : spline.control_points_) {
+      tokens->emplace_back(kControlPointsContext, PackSigned(point.first));
+      tokens->emplace_back(kControlPointsContext, PackSigned(point.second));
+    }
+    const auto encode_dct = [tokens](const int dct[32]) {
+      for (int i = 0; i < 32; ++i) {
+        tokens->emplace_back(kDCTContext, PackSigned(dct[i]));
+      }
+    };
+    for (int c = 0; c < 3; ++c) {
+      encode_dct(spline.color_dct_[c]);
+    }
+    encode_dct(spline.sigma_dct_);
+  }
+};
+
+namespace {
+
+void EncodeAllStartingPoints(const std::vector<Spline::Point>& points,
+                             std::vector<Token>* tokens) {
+  int64_t last_x = 0;
+  int64_t last_y = 0;
+  for (size_t i = 0; i < points.size(); i++) {
+    const int64_t x = lroundf(points[i].x);
+    const int64_t y = lroundf(points[i].y);
+    if (i == 0) {
+      tokens->emplace_back(kStartingPositionContext, x);
+      tokens->emplace_back(kStartingPositionContext, y);
+    } else {
+      tokens->emplace_back(kStartingPositionContext, PackSigned(x - last_x));
+      tokens->emplace_back(kStartingPositionContext, PackSigned(y - last_y));
+    }
+    last_x = x;
+    last_y = y;
+  }
+}
+
+}  // namespace
+
+void EncodeSplines(const Splines& splines, BitWriter* writer,
+                   const size_t layer, const HistogramParams& histogram_params,
+                   AuxOut* aux_out) {
+  JXL_ASSERT(splines.HasAny());
+
+  const std::vector<QuantizedSpline>& quantized_splines =
+      splines.QuantizedSplines();
+  std::vector<std::vector<Token>> tokens(1);
+  tokens[0].emplace_back(kNumSplinesContext, quantized_splines.size() - 1);
+  EncodeAllStartingPoints(splines.StartingPoints(), &tokens[0]);
+
+  tokens[0].emplace_back(kQuantizationAdjustmentContext,
+                         PackSigned(splines.GetQuantizationAdjustment()));
+
+  for (const QuantizedSpline& spline : quantized_splines) {
+    QuantizedSplineEncoder::Tokenize(spline, &tokens[0]);
+  }
+
+  EntropyEncodingData codes;
+  std::vector<uint8_t> context_map;
+  BuildAndEncodeHistograms(histogram_params, kNumSplineContexts, tokens, &codes,
+                           &context_map, writer, layer, aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+Splines FindSplines(const Image3F& opsin) {
+  // TODO: implement spline detection.
+  return {};
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_splines.h b/third-party/libjxl/libjxl/lib/jxl/enc_splines.h
new file mode 100644
index 0000000000..be700dba75
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_splines.h
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_SPLINES_H_
+#define LIB_JXL_ENC_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Only call if splines.HasAny().
+void EncodeSplines(const Splines& splines, BitWriter* writer, size_t layer,
+                   const HistogramParams& histogram_params, AuxOut* aux_out);
+
+Splines FindSplines(const Image3F& opsin);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_SPLINES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc b/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc
new file mode 100644
index 0000000000..dc75fdd9ba
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+                         const std::vector<coeff_order_t>* permutation,
+                         BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, MaxBits(group_codes.size()));
+  if (permutation && !group_codes.empty()) {
+    // Don't write a permutation at all for an empty group_codes.
+    writer->Write(1, 1);  // permutation
+    JXL_DASSERT(permutation->size() == group_codes.size());
+    EncodePermutation(permutation->data(), /*skip=*/0, permutation->size(),
+                      writer, /* layer= */ 0, aux_out);
+
+  } else {
+    writer->Write(1, 0);  // no permutation
+  }
+  writer->ZeroPadToByte();  // before TOC entries
+
+  for (size_t i = 0; i < group_codes.size(); i++) {
+    JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0);
+    const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte;
+    JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer));
+  }
+  writer->ZeroPadToByte();  // before first group
+  allotment.ReclaimAndCharge(writer, kLayerTOC, aux_out);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_toc.h b/third-party/libjxl/libjxl/lib/jxl/enc_toc.h
new file mode 100644
index 0000000000..242b3efccb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_toc.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TOC_H_
+#define LIB_JXL_ENC_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Writes the group offsets. If the permutation vector is nullptr, the identity
+// permutation will be used.
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+                         const std::vector<coeff_order_t>* permutation,
+                         BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_TOC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h b/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h
new file mode 100644
index 0000000000..7459a16305
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h
@@ -0,0 +1,800 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_ENC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// Inverse of ReinterpretingDCT.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+          size_t ROWS, size_t COLS>
+HWY_INLINE void ReinterpretingIDCT(const float* input,
+                                   const size_t input_stride, float* output,
+                                   const size_t output_stride) {
+  HWY_ALIGN float block[ROWS * COLS] = {};
+  if (ROWS < COLS) {
+    for (size_t y = 0; y < LF_ROWS; y++) {
+      for (size_t x = 0; x < LF_COLS; x++) {
+        block[y * COLS + x] = input[y * input_stride + x] *
+                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
+                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < LF_COLS; y++) {
+      for (size_t x = 0; x < LF_ROWS; x++) {
+        block[y * ROWS + x] = input[y * input_stride + x] *
+                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
+                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
+      }
+    }
+  }
+
+  // ROWS, COLS <= 8, so we can put scratch space on the stack.
+  HWY_ALIGN float scratch_space[ROWS * COLS];
+  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
+                                  scratch_space);
+}
+
+template <size_t S>
+void DCT2TopBlock(const float* block, size_t stride, float* out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kDCTBlockSize];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * 2 * stride + x * 2];
+      float c01 = block[y * 2 * stride + x * 2 + 1];
+      float c10 = block[(y * 2 + 1) * stride + x * 2];
+      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      r00 *= 0.25f;
+      r01 *= 0.25f;
+      r10 *= 0.25f;
+      r11 *= 0.25f;
+      temp[y * kBlockDim + x] = r00;
+      temp[y * kBlockDim + num_2x2 + x] = r01;
+      temp[(y + num_2x2) * kBlockDim + x] = r10;
+      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
+      {
+          0.2500000000000000,
+          0.8769029297991420f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.4105377591765233f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          0.2206518106944235f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.7071067811865474f,
+          0.6235485373547691f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          0.4067007583026075f,
+          -0.2125574805828875f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          -0.4517556589999482f,
+          -0.3046847507248690f,
+          0.3017929516615495f,
+          0.4082482904638627f,
+          0.1747866975480809f,
+          -0.2110560104933578f,
+          -0.1426608480880726f,
+          -0.1381354035075859f,
+          -0.1743760259965107f,
+          0.1135498731499434f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375375f,
+          0.4444481661973445f,
+          0.3085497062849767f,
+          0.0000000000000000f,
+          -0.0643507165794627f,
+          0.1585450355184006f,
+          0.5112616136591823f,
+          0.2579236279634118f,
+          0.0000000000000000,
+          0.0812611176717539f,
+          0.1856718091610980f,
+          -0.3416446842253372f,
+          0.3302282550303788f,
+          0.0702790691196284f,
+          -0.0741750459581035f,
+      },
+      {
+          0.2500000000000000,
+          0.2206518106944236f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.7071067811865476f,
+          0.6235485373547694f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375378f,
+          0.0000000000000000,
+          0.4706702258572536f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          -0.0403851516082220f,
+          0.0000000000000000,
+          0.1627234014286620f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.7367497537172237f,
+          0.0875511500058708f,
+          -0.2921026642334881f,
+          0.1940289303259434f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          0.1957439937204294f,
+          -0.1621205195722993f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0074182263792424f,
+          -0.2904801297289980f,
+          0.0952002265347504f,
+          0.0000000000000000,
+          -0.3675398009862027f,
+          0.4921585901373873f,
+          0.2462710772207515f,
+          -0.0794670660590957f,
+          0.3623817333531167f,
+          -0.4351904965232280f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          0.2929100136981264f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.3935103426921017f,
+          -0.0657870154914280f,
+          0.0000000000000000,
+          -0.4082482904638628f,
+          -0.3078822139579090f,
+          -0.3852501370925192f,
+          -0.0857401903551931f,
+          -0.4613374887461511f,
+          0.0000000000000000,
+          0.2191868483885747f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.4067007583026072f,
+          -0.2125574805828705f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          -0.4517556589999464f,
+          0.3046847507248840f,
+          0.3017929516615503f,
+          -0.4082482904638635f,
+          -0.1747866975480813f,
+          0.2110560104933581f,
+          -0.1426608480880734f,
+          -0.1381354035075829f,
+          -0.1743760259965108f,
+          0.1135498731499426f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          -0.1957439937204287f,
+          -0.1621205195722833f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0074182263792444f,
+          0.2904801297290076f,
+          0.0952002265347505f,
+          0.0000000000000000,
+          0.3675398009862011f,
+          -0.4921585901373891f,
+          0.2462710772207514f,
+          -0.0794670660591026f,
+          0.3623817333531165f,
+          -0.4351904965232251f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375375f,
+          0.0000000000000000,
+          -0.4706702258572528f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.1107416575309343f,
+          0.0000000000000000,
+          -0.1627234014286617f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.1488339922711357f,
+          0.4972464710953509f,
+          0.2921026642334879f,
+          0.5550443808910661f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          0.1137907446044809f,
+          -0.1464291867126764f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0829816309488205f,
+          -0.2388977352334460f,
+          -0.3531238544981630f,
+          -0.4082482904638630f,
+          0.4826689115059883f,
+          0.1741941265991622f,
+          -0.0476868035022925f,
+          0.1253805944856366f,
+          -0.4326608024727445f,
+          -0.2546827712406646f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          -0.4444481661973438f,
+          0.3085497062849487f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.1585450355183970f,
+          -0.5112616136592012f,
+          0.2579236279634129f,
+          0.0000000000000000,
+          -0.0812611176717504f,
+          -0.1856718091610990f,
+          -0.3416446842253373f,
+          0.3302282550303805f,
+          0.0702790691196282f,
+          -0.0741750459581023f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.2929100136981264f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.3935103426921022f,
+          0.0657870154914254f,
+          0.0000000000000000,
+          0.4082482904638634f,
+          0.3078822139579031f,
+          0.3852501370925211f,
+          -0.0857401903551927f,
+          -0.4613374887461554f,
+          0.0000000000000000,
+          0.2191868483885728f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.1137907446044814f,
+          -0.1464291867126654f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.0829816309488214f,
+          0.2388977352334547f,
+          -0.3531238544981624f,
+          0.4082482904638630f,
+          -0.4826689115059858f,
+          -0.1741941265991621f,
+          -0.0476868035022928f,
+          0.1253805944856431f,
+          -0.4326608024727457f,
+          -0.2546827712406641f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375374f,
+          0.0000000000000000,
+          0.4251149611657548f,
+          0.0000000000000000,
+          -0.0643507165794626f,
+          -0.4517556589999480f,
+          0.0000000000000000,
+          -0.6035859033230976f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.1426608480880724f,
+          -0.1381354035075845f,
+          0.3487520519930227f,
+          0.1135498731499429f,
+      },
+  };
+
+  const HWY_CAPPED(float, 16) d;
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    auto scalar = Zero(d);
+    for (size_t j = 0; j < 16; j++) {
+      auto px = Set(d, pixels[j]);
+      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
+      scalar = MulAdd(px, basis, scalar);
+    }
+    Store(scalar, d, coeffs + i);
+  }
+}
+
+// Coefficient layout:
+//  - (even, even) positions hold AFV coefficients
+//  - (odd, even) positions hold DCT4x4 coefficients
+//  - (any, odd) positions hold DCT4x8 coefficients
+template <size_t afv_kind>
+void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
+                            size_t pixels_stride,
+                            float* JXL_RESTRICT coefficients) {
+  HWY_ALIGN float scratch_space[4 * 8 * 2];
+  size_t afv_x = afv_kind & 1;
+  size_t afv_y = afv_kind / 2;
+  HWY_ALIGN float block[4 * 8];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
+          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
+    }
+  }
+  // AFV coefficients in (even, even) positions.
+  HWY_ALIGN float coeff[4 * 4];
+  AFVDCT4x4(block, coeff);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
+    }
+  }
+  // 4x4 DCT of the block with same y and different x.
+  ComputeScaledDCT<4, 4>()(
+      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+              pixels_stride),
+      block, scratch_space);
+  // ... in (odd, even) positions.
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
+    }
+  }
+  // 4x8 DCT of the other half of the block.
+  ComputeScaledDCT<4, 8>()(
+      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+      block, scratch_space);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+    }
+  }
+  float block00 = coefficients[0] * 0.25f;
+  float block01 = coefficients[1];
+  float block10 = coefficients[8];
+  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
+  coefficients[1] = (block00 - block01) * 0.5f;
+  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
+}
+
+HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy,
+                                          const float* JXL_RESTRICT pixels,
+                                          size_t pixels_stride,
+                                          float* JXL_RESTRICT coefficients,
+                                          float* JXL_RESTRICT scratch_space) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::IDENTITY: {
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          float block_dc = 0;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
+            }
+          }
+          block_dc *= 1.0f / 16;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 1 && iy == 1) continue;
+              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
+                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
+                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
+            }
+          }
+          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
+          coefficients[y * 8 + x] = block_dc;
+        }
+      }
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+      break;
+    }
+    case Type::DCT8X4: {
+      for (size_t x = 0; x < 2; x++) {
+        HWY_ALIGN float block[4 * 8];
+        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
+                                 scratch_space);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            // Store transposed.
+            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+          }
+        }
+      }
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      coefficients[0] = (block0 + block1) * 0.5f;
+      coefficients[8] = (block0 - block1) * 0.5f;
+      break;
+    }
+    case Type::DCT4X8: {
+      for (size_t y = 0; y < 2; y++) {
+        HWY_ALIGN float block[4 * 8];
+        ComputeScaledDCT<4, 8>()(
+            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
+            scratch_space);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+          }
+        }
+      }
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      coefficients[0] = (block0 + block1) * 0.5f;
+      coefficients[8] = (block0 - block1) * 0.5f;
+      break;
+    }
+    case Type::DCT4X4: {
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          HWY_ALIGN float block[4 * 4];
+          ComputeScaledDCT<4, 4>()(
+              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+              block, scratch_space);
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
+            }
+          }
+        }
+      }
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+      break;
+    }
+    case Type::DCT2X2: {
+      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
+      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
+      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
+      break;
+    }
+    case Type::DCT16X16: {
+      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT16X8: {
+      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT8X16: {
+      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT32X8: {
+      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT8X32: {
+      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT32X16: {
+      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT16X32: {
+      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X32: {
+      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT: {
+      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+                               scratch_space);
+      break;
+    }
+    case Type::AFV0: {
+      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV1: {
+      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV2: {
+      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV3: {
+      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::DCT64X64: {
+      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT64X32: {
+      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X64: {
+      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT128X128: {
+      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT128X64: {
+      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT64X128: {
+      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT256X256: {
+      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT256X128: {
+      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT128X256: {
+      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::kNumValidStrategies:
+      JXL_UNREACHABLE("Invalid strategy");
+  }
+}
+
+HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy,
+                                              const float* block, float* dc,
+                                              size_t dc_stride) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::DCT16X8: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT8X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT16X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X8: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT8X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT16X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X64: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X64: {
+      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X64: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT256X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X256: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT256X256: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT:
+    case Type::DCT2X2:
+    case Type::DCT4X4:
+    case Type::DCT4X8:
+    case Type::DCT8X4:
+    case Type::AFV0:
+    case Type::AFV1:
+    case Type::AFV2:
+    case Type::AFV3:
+    case Type::IDENTITY:
+      dc[0] = block[0];
+      break;
+    case Type::kNumValidStrategies:
+      JXL_UNREACHABLE("Invalid strategy");
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc
new file mode 100644
index 0000000000..8978ba1dcb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_transforms.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_transforms.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformFromPixels);
+void TransformFromPixels(const AcStrategy::Type strategy,
+                         const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                         float* JXL_RESTRICT coefficients,
+                         float* scratch_space) {
+  return HWY_DYNAMIC_DISPATCH(TransformFromPixels)(
+      strategy, pixels, pixels_stride, coefficients, scratch_space);
+}
+
+HWY_EXPORT(DCFromLowestFrequencies);
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+                             float* dc, size_t dc_stride) {
+  return HWY_DYNAMIC_DISPATCH(DCFromLowestFrequencies)(strategy, block, dc,
+                                                       dc_stride);
+}
+
+HWY_EXPORT(AFVDCT4x4);
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+  return HWY_DYNAMIC_DISPATCH(AFVDCT4x4)(pixels, coeffs);
+}
+#endif  // HWY_ONCE
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h
new file mode 100644
index 0000000000..039ccc3893
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TRANSFORMS_H_
+#define LIB_JXL_ENC_TRANSFORMS_H_
+
+// Facade for (non-inlined) integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformFromPixels(const AcStrategy::Type strategy,
+                         const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                         float* JXL_RESTRICT coefficients,
+                         float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+                             float* dc, size_t dc_stride);
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_TRANSFORMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc
new file mode 100644
index 0000000000..a0a5e48e1c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc
@@ -0,0 +1,517 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_xyb.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cstdlib>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// 4x3 matrix * 3x1 SIMD vectors
+template <class V>
+JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
+                                const float* JXL_RESTRICT premul_absorb,
+                                V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
+                                V* JXL_RESTRICT mixed2) {
+  const float* bias = &kOpsinAbsorbanceBias[0];
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  const auto m0 = Load(d, premul_absorb + 0 * N);
+  const auto m1 = Load(d, premul_absorb + 1 * N);
+  const auto m2 = Load(d, premul_absorb + 2 * N);
+  const auto m3 = Load(d, premul_absorb + 3 * N);
+  const auto m4 = Load(d, premul_absorb + 4 * N);
+  const auto m5 = Load(d, premul_absorb + 5 * N);
+  const auto m6 = Load(d, premul_absorb + 6 * N);
+  const auto m7 = Load(d, premul_absorb + 7 * N);
+  const auto m8 = Load(d, premul_absorb + 8 * N);
+  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
+  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
+  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
+}
+
+template <class V>
+void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
+              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
+  const HWY_FULL(float) d;
+  const V half = Set(d, 0.5f);
+  Store(Mul(half, Sub(r, g)), d, valx);
+  Store(Mul(half, Add(r, g)), d, valy);
+  Store(b, d, valz);
+}
+
+// Converts one RGB vector to XYB.
+template <class V>
+void LinearRGBToXYB(const V r, const V g, const V b,
+                    const float* JXL_RESTRICT premul_absorb,
+                    float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
+                    float* JXL_RESTRICT valz) {
+  V mixed0, mixed1, mixed2;
+  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
+
+  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
+  mixed0 = ZeroIfNegative(mixed0);
+  mixed1 = ZeroIfNegative(mixed1);
+  mixed2 = ZeroIfNegative(mixed2);
+
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
+  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
+  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
+  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
+
+  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
+}
+
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+                       float* JXL_RESTRICT row2,
+                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
+  const HWY_FULL(float) d;
+  for (size_t x = 0; x < xsize; x += Lanes(d)) {
+    const auto r = Load(d, row0 + x);
+    const auto g = Load(d, row1 + x);
+    const auto b = Load(d, row2 + x);
+    LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x);
+  }
+}
+
+// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
+template <class V>
+V LinearFromSRGB(V encoded) {
+  return TF_SRGB().DisplayFromEncoded(encoded);
+}
+
+Status LinearSRGBToXYB(const Image3F& linear,
+                       const float* JXL_RESTRICT premul_absorb,
+                       ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
+  const size_t xsize = linear.xsize();
+
+  const HWY_FULL(float) d;
+  return RunOnPool(
+      pool, 0, static_cast<uint32_t>(linear.ysize()), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_in0 = linear.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_in1 = linear.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_in2 = linear.ConstPlaneRow(2, y);
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = Load(d, row_in0 + x);
+          const auto in_g = Load(d, row_in1 + x);
+          const auto in_b = Load(d, row_in2 + x);
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "LinearToXYB");
+}
+
+Status SRGBToXYB(const Image3F& srgb, const float* JXL_RESTRICT premul_absorb,
+                 ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
+  const size_t xsize = srgb.xsize();
+
+  const HWY_FULL(float) d;
+  return RunOnPool(
+      pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+          const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+          const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "SRGBToXYB");
+}
+
+Status SRGBToXYBAndLinear(const Image3F& srgb,
+                          const float* JXL_RESTRICT premul_absorb,
+                          ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
+                          Image3F* JXL_RESTRICT linear) {
+  const size_t xsize = srgb.xsize();
+
+  const HWY_FULL(float) d;
+  return RunOnPool(
+      pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+
+        float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
+        float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
+        float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
+
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+          const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+          const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+
+          Store(in_r, d, row_linear0 + x);
+          Store(in_g, d, row_linear1 + x);
+          Store(in_b, d, row_linear2 + x);
+
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "SRGBToXYBAndLinear");
+}
+
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  const float mul = intensity_target / 255.0f;
+  for (size_t i = 0; i < 9; ++i) {
+    const auto absorb = Set(d, kOpsinAbsorbanceMatrix[i] * mul);
+    Store(absorb, d, premul_absorb + i * N);
+  }
+  for (size_t i = 0; i < 3; ++i) {
+    const auto neg_bias_cbrt = Set(d, -cbrtf(kOpsinAbsorbanceBias[i]));
+    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
+  }
+}
+
+Image3F TransformToLinearRGB(const Image3F& in,
+                             const ColorEncoding& color_encoding,
+                             float intensity_target, const JxlCmsInterface& cms,
+                             ThreadPool* pool) {
+  ColorSpaceTransform c_transform(cms);
+  bool is_gray = color_encoding.IsGray();
+  const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(is_gray);
+  Image3F out(in.xsize(), in.ysize());
+  std::atomic<bool> ok{true};
+  JXL_CHECK(RunOnPool(
+      pool, 0, in.ysize(),
+      [&](const size_t num_threads) {
+        return c_transform.Init(color_encoding, c_desired, intensity_target,
+                                in.xsize(), num_threads);
+      },
+      [&](const uint32_t y, const size_t thread) {
+        float* mutable_src_buf = c_transform.BufSrc(thread);
+        const float* src_buf = mutable_src_buf;
+        // Interleave input.
+        if (is_gray) {
+          src_buf = in.ConstPlaneRow(0, y);
+        } else {
+          const float* JXL_RESTRICT row_in0 = in.ConstPlaneRow(0, y);
+          const float* JXL_RESTRICT row_in1 = in.ConstPlaneRow(1, y);
+          const float* JXL_RESTRICT row_in2 = in.ConstPlaneRow(2, y);
+          for (size_t x = 0; x < in.xsize(); x++) {
+            mutable_src_buf[3 * x + 0] = row_in0[x];
+            mutable_src_buf[3 * x + 1] = row_in1[x];
+            mutable_src_buf[3 * x + 2] = row_in2[x];
+          }
+        }
+        float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread);
+        if (!c_transform.Run(thread, src_buf, dst_buf)) {
+          ok.store(false);
+          return;
+        }
+        float* JXL_RESTRICT row_out0 = out.PlaneRow(0, y);
+        float* JXL_RESTRICT row_out1 = out.PlaneRow(1, y);
+        float* JXL_RESTRICT row_out2 = out.PlaneRow(2, y);
+        // De-interleave output and convert type.
+        if (is_gray) {
+          for (size_t x = 0; x < in.xsize(); x++) {
+            row_out0[x] = dst_buf[x];
+            row_out1[x] = dst_buf[x];
+            row_out2[x] = dst_buf[x];
+          }
+        } else {
+          for (size_t x = 0; x < in.xsize(); x++) {
+            row_out0[x] = dst_buf[3 * x + 0];
+            row_out1[x] = dst_buf[3 * x + 1];
+            row_out2[x] = dst_buf[3 * x + 2];
+          }
+        }
+      },
+      "Colorspace transform"));
+  JXL_CHECK(ok.load());
+  return out;
+}
+
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+                  float intensity_target, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
+  JXL_ASSERT(SameSize(in, *xyb));
+
+  const HWY_FULL(float) d;
+  // Pre-broadcasted constants
+  HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
+  ComputePremulAbsorb(intensity_target, premul_absorb);
+
+  bool is_gray = color_encoding.IsGray();
+  const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(is_gray);
+  if (c_linear_srgb.SameColorEncoding(color_encoding)) {
+    JXL_CHECK(LinearSRGBToXYB(in, premul_absorb, pool, xyb));
+  } else if (color_encoding.IsSRGB()) {
+    JXL_CHECK(SRGBToXYB(in, premul_absorb, pool, xyb));
+  } else {
+    Image3F linear =
+        TransformToLinearRGB(in, color_encoding, intensity_target, cms, pool);
+    JXL_CHECK(LinearSRGBToXYB(linear, premul_absorb, pool, xyb));
+  }
+}
+
+// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
+// it does not contain a sensitivity multiplier based on the blurred image.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+                         ImageBundle* const JXL_RESTRICT linear) {
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  JXL_ASSERT(SameSize(in, *xyb));
+
+  const HWY_FULL(float) d;
+  // Pre-broadcasted constants
+  HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
+  ComputePremulAbsorb(in.metadata()->IntensityTarget(), premul_absorb);
+
+  const bool want_linear = linear != nullptr;
+
+  const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(in.IsGray());
+  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
+  // which undoing the sRGB transfer function would be a large part of the cost.
+  if (c_linear_srgb.SameColorEncoding(in.c_current())) {
+    JXL_CHECK(LinearSRGBToXYB(in.color(), premul_absorb, pool, xyb));
+    // This only happens if kitten or slower, moving ImageBundle might be
+    // possible but the encoder is much slower than this copy.
+    if (want_linear) {
+      *linear = in.Copy();
+      return linear;
+    }
+    return &in;
+  }
+
+  // Common case: already sRGB, can avoid the color transform
+  if (in.IsSRGB()) {
+    // Common case: can avoid allocating/copying
+    if (!want_linear) {
+      JXL_CHECK(SRGBToXYB(in.color(), premul_absorb, pool, xyb));
+      return &in;
+    }
+
+    // Slow encoder also wants linear sRGB.
+    linear->SetFromImage(Image3F(xsize, ysize), c_linear_srgb);
+    JXL_CHECK(SRGBToXYBAndLinear(in.color(), premul_absorb, pool, xyb,
+                                 linear->color()));
+    return linear;
+  }
+
+  // General case: not sRGB, need color transform.
+  ImageBundle linear_storage;  // Local storage only used if !want_linear.
+
+  ImageBundle* linear_storage_ptr;
+  if (want_linear) {
+    // Caller asked for linear, use that storage directly.
+    linear_storage_ptr = linear;
+  } else {
+    // Caller didn't ask for linear, create our own local storage
+    // OK to reuse metadata, it will not be changed.
+    linear_storage = ImageBundle(const_cast<ImageMetadata*>(in.metadata()));
+    linear_storage_ptr = &linear_storage;
+  }
+
+  const ImageBundle* ptr;
+  JXL_CHECK(TransformIfNeeded(in, c_linear_srgb, cms, pool, linear_storage_ptr,
+                              &ptr));
+  // If no transform was necessary, should have taken the above codepath.
+  JXL_ASSERT(ptr == linear_storage_ptr);
+
+  JXL_CHECK(
+      LinearSRGBToXYB(*linear_storage_ptr->color(), premul_absorb, pool, xyb));
+  return want_linear ? linear : &in;
+}
+
+// Transform RGB to YCbCr.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                  ImageF* cr_plane, ThreadPool* pool) {
+  const HWY_FULL(float) df;
+  const size_t S = Lanes(df);  // Step.
+
+  const size_t xsize = r_plane.xsize();
+  const size_t ysize = r_plane.ysize();
+  if ((xsize == 0) || (ysize == 0)) return true;
+
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto k128 = Set(df, 128.0f / 255);
+  const auto kR = Set(df, 0.299f);  // NTSC luma
+  const auto kG = Set(df, 0.587f);
+  const auto kB = Set(df, 0.114f);
+  const auto kAmpR = Set(df, 0.701f);
+  const auto kAmpB = Set(df, 0.886f);
+  const auto kDiffR = Add(kAmpR, kR);
+  const auto kDiffB = Add(kAmpB, kB);
+  const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
+  const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
+
+  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
+  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
+  const size_t num_stripes = DivCeil(ysize, lines_per_group);
+  const auto transform = [&](int idx, int /* thread*/) {
+    const size_t y0 = idx * lines_per_group;
+    const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
+    for (size_t y = y0; y < y1; ++y) {
+      const float* r_row = r_plane.ConstRow(y);
+      const float* g_row = g_plane.ConstRow(y);
+      const float* b_row = b_plane.ConstRow(y);
+      float* y_row = y_plane->Row(y);
+      float* cb_row = cb_plane->Row(y);
+      float* cr_row = cr_plane->Row(y);
+      for (size_t x = 0; x < xsize; x += S) {
+        const auto r = Load(df, r_row + x);
+        const auto g = Load(df, g_row + x);
+        const auto b = Load(df, b_row + x);
+        const auto r_base = Mul(r, kR);
+        const auto r_diff = Mul(r, kDiffR);
+        const auto g_base = Mul(g, kG);
+        const auto b_base = Mul(b, kB);
+        const auto b_diff = Mul(b, kDiffB);
+        const auto y_base = Add(r_base, Add(g_base, b_base));
+        const auto y_vec = Sub(y_base, k128);
+        const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB);
+        const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR);
+        Store(y_vec, df, y_row + x);
+        Store(cb_vec, df, cb_row + x);
+        Store(cr_vec, df, cr_row + x);
+      }
+    }
+  };
+  return RunOnPool(pool, 0, static_cast<int>(num_stripes), ThreadPool::NoInit,
+                   transform, "RgbToYcbCr");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ToXYB);
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+                         ImageBundle* JXL_RESTRICT linear_storage) {
+  return HWY_DYNAMIC_DISPATCH(ToXYB)(in, pool, xyb, cms, linear_storage);
+}
+
+HWY_EXPORT(LinearRGBRowToXYB);
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+                       float* JXL_RESTRICT row2,
+                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
+  HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB)
+  (row0, row1, row2, premul_absorb, xsize);
+}
+
+HWY_EXPORT(ComputePremulAbsorb);
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
+  HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb);
+}
+
+void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+                 float* JXL_RESTRICT row2, size_t xsize) {
+  for (size_t x = 0; x < xsize; x++) {
+    row2[x] = (row2[x] - row1[x] + kScaledXYBOffset[2]) * kScaledXYBScale[2];
+    row0[x] = (row0[x] + kScaledXYBOffset[0]) * kScaledXYBScale[0];
+    row1[x] = (row1[x] + kScaledXYBOffset[1]) * kScaledXYBScale[1];
+  }
+}
+
+void ScaleXYB(Image3F* opsin) {
+  for (size_t y = 0; y < opsin->ysize(); y++) {
+    float* row0 = opsin->PlaneRow(0, y);
+    float* row1 = opsin->PlaneRow(1, y);
+    float* row2 = opsin->PlaneRow(2, y);
+    ScaleXYBRow(row0, row1, row2, opsin->xsize());
+  }
+}
+
+HWY_EXPORT(Image3FToXYB);
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+                  float intensity_target, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
+  return HWY_DYNAMIC_DISPATCH(Image3FToXYB)(in, color_encoding,
+                                            intensity_target, pool, xyb, cms);
+}
+
+HWY_EXPORT(RgbToYcbcr);
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                  ImageF* cr_plane, ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
+                                          cb_plane, cr_plane, pool);
+}
+
+// DEPRECATED
+Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms) {
+  ImageMetadata metadata;
+  metadata.SetUintSamples(8);
+  metadata.color_encoding = ColorEncoding::SRGB();
+  ImageBundle ib(&metadata);
+  ib.SetFromImage(ConvertToFloat(srgb8), metadata.color_encoding);
+  JXL_CHECK(ib.TransformTo(ColorEncoding::LinearSRGB(ib.IsGray()), cms));
+  ThreadPool* null_pool = nullptr;
+  Image3F xyb(srgb8.xsize(), srgb8.ysize());
+
+  ImageBundle linear_storage(&metadata);
+  (void)ToXYB(ib, null_pool, &xyb, cms, &linear_storage);
+  return xyb;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h
new file mode 100644
index 0000000000..fc902848ee
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_XYB_H_
+#define LIB_JXL_ENC_XYB_H_
+
+// Converts to XYB color space.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Converts any color space to XYB. If `linear` is not null, returns `linear`
+// after filling it with a linear sRGB copy of `in`. Otherwise, returns `&in`.
+//
+// NOTE this return value can avoid an extra color conversion if `in` would
+// later be passed to JxlButteraugliComparator.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+                         ImageBundle* JXL_RESTRICT linear = nullptr);
+
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+                  float intensity_target, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms);
+
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+                       float* JXL_RESTRICT row2,
+                       const float* JXL_RESTRICT premul_absorb, size_t xsize);
+
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb);
+
+// Transforms each color component of the given XYB image into the [0.0, 1.0]
+// interval with an affine transform.
+void ScaleXYB(Image3F* opsin);
+void ScaleXYBRow(float* row0, float* row1, float* row2, size_t xsize);
+
+// Bt.601 to match JPEG/JFIF. Outputs _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                  ImageF* cr_plane, ThreadPool* pool);
+
+// DEPRECATED, used by opsin_image_wrapper.
+Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_XYB_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/encode.cc b/third-party/libjxl/libjxl/lib/jxl/encode.cc
new file mode 100644
index 0000000000..7c23847ca8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/encode.cc
@@ -0,0 +1,2258 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <brotli/encode.h>
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <jxl/types.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/exif.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_ENC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(enc, error_code, format, ...)                          \
+  (enc->error = error_code,                                                  \
+   ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_ENC_ERROR)
+#define JXL_API_ERROR_NOSET(format, ...)                                     \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_ENC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(enc, error_code, format, ...)                            \
+  (enc->error = error_code,                                                    \
+   ((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_ENC_ERROR)
+#define JXL_API_ERROR_NOSET(format, ...)                                     \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   JXL_ENC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+namespace jxl {}  // namespace jxl
+
+uint32_t JxlEncoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+namespace {
+template <typename T>
+void AppendJxlpBoxCounter(uint32_t counter, bool last, T* output) {
+  if (last) counter |= 0x80000000;
+  for (size_t i = 0; i < 4; i++) {
+    output->push_back(counter >> (8 * (3 - i)) & 0xff);
+  }
+}
+
+void QueueFrame(
+    const JxlEncoderFrameSettings* frame_settings,
+    jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame>& frame) {
+  if (frame_settings->values.lossless) {
+    frame->option_values.cparams.SetLossless();
+  }
+
+  jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager);
+  queued_input.frame = std::move(frame);
+  frame_settings->enc->input_queue.emplace_back(std::move(queued_input));
+  frame_settings->enc->num_queued_frames++;
+}
+
+void QueueFastLosslessFrame(const JxlEncoderFrameSettings* frame_settings,
+                            JxlFastLosslessFrameState* fast_lossless_frame) {
+  jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager);
+  queued_input.fast_lossless_frame.reset(fast_lossless_frame);
+  frame_settings->enc->input_queue.emplace_back(std::move(queued_input));
+  frame_settings->enc->num_queued_frames++;
+}
+
+void QueueBox(JxlEncoder* enc,
+              jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedBox>& box) {
+  jxl::JxlEncoderQueuedInput queued_input(enc->memory_manager);
+  queued_input.box = std::move(box);
+  enc->input_queue.emplace_back(std::move(queued_input));
+  enc->num_queued_boxes++;
+}
+
+// TODO(lode): share this code and the Brotli compression code in enc_jpeg_data
+JxlEncoderStatus BrotliCompress(int quality, const uint8_t* in, size_t in_size,
+                                jxl::PaddedBytes* out) {
+  std::unique_ptr<BrotliEncoderState, decltype(BrotliEncoderDestroyInstance)*>
+      enc(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
+          BrotliEncoderDestroyInstance);
+  if (!enc) return JXL_API_ERROR_NOSET("BrotliEncoderCreateInstance failed");
+
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_QUALITY, quality);
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_SIZE_HINT, in_size);
+
+  constexpr size_t kBufferSize = 128 * 1024;
+  jxl::PaddedBytes temp_buffer(kBufferSize);
+
+  size_t avail_in = in_size;
+  const uint8_t* next_in = in;
+
+  size_t total_out = 0;
+
+  for (;;) {
+    size_t avail_out = kBufferSize;
+    uint8_t* next_out = temp_buffer.data();
+    jxl::msan::MemoryIsInitialized(next_in, avail_in);
+    if (!BrotliEncoderCompressStream(enc.get(), BROTLI_OPERATION_FINISH,
+                                     &avail_in, &next_in, &avail_out, &next_out,
+                                     &total_out)) {
+      return JXL_API_ERROR_NOSET("Brotli compression failed");
+    }
+    size_t out_size = next_out - temp_buffer.data();
+    jxl::msan::UnpoisonMemory(next_out - out_size, out_size);
+    out->resize(out->size() + out_size);
+    memcpy(out->data() + out->size() - out_size, temp_buffer.data(), out_size);
+    if (BrotliEncoderIsFinished(enc.get())) break;
+  }
+
+  return JXL_ENC_SUCCESS;
+}
+
+// The JXL codestream can have level 5 or level 10. Levels have certain
+// restrictions such as max allowed image dimensions. This function checks the
+// level required to support the current encoder settings. The debug_string is
+// intended to be used for developer API error messages, and may be set to
+// nullptr.
+int VerifyLevelSettings(const JxlEncoder* enc, std::string* debug_string) {
+  const auto& m = enc->metadata.m;
+
+  uint64_t xsize = enc->metadata.size.xsize();
+  uint64_t ysize = enc->metadata.size.ysize();
+  // The uncompressed ICC size, if it is used.
+  size_t icc_size = 0;
+  if (m.color_encoding.WantICC()) {
+    icc_size = m.color_encoding.ICC().size();
+  }
+
+  // Level 10 checks
+
+  if (xsize > (1ull << 30ull) || ysize > (1ull << 30ull) ||
+      xsize * ysize > (1ull << 40ull)) {
+    if (debug_string) *debug_string = "Too large image dimensions";
+    return -1;
+  }
+  if (icc_size > (1ull << 28)) {
+    if (debug_string) *debug_string = "Too large ICC profile size";
+    return -1;
+  }
+  if (m.num_extra_channels > 256) {
+    if (debug_string) *debug_string = "Too many extra channels";
+    return -1;
+  }
+
+  // Level 5 checks
+
+  if (!m.modular_16_bit_buffer_sufficient) {
+    if (debug_string) *debug_string = "Too high modular bit depth";
+    return 10;
+  }
+  if (xsize > (1ull << 18ull) || ysize > (1ull << 18ull) ||
+      xsize * ysize > (1ull << 28ull)) {
+    if (debug_string) *debug_string = "Too large image dimensions";
+    return 10;
+  }
+  if (icc_size > (1ull << 22)) {
+    if (debug_string) *debug_string = "Too large ICC profile";
+    return 10;
+  }
+  if (m.num_extra_channels > 4) {
+    if (debug_string) *debug_string = "Too many extra channels";
+    return 10;
+  }
+  for (size_t i = 0; i < m.extra_channel_info.size(); ++i) {
+    if (m.extra_channel_info[i].type == jxl::ExtraChannel::kBlack) {
+      if (debug_string) *debug_string = "CMYK channel not allowed";
+      return 10;
+    }
+  }
+
+  // TODO(lode): also need to check if consecutive composite-still frames total
+  // pixel amount doesn't exceed 2**28 in the case of level 5. This should be
+  // done when adding frame and requires ability to add composite still frames
+  // to be added first.
+
+  // TODO(lode): also need to check animation duration of a frame. This should
+  // be done when adding frame, but first requires implementing setting the
+  // JxlFrameHeader for a frame.
+
+  // TODO(lode): also need to check properties such as num_splines, num_patches,
+  // modular_16bit_buffers and multiple properties of modular trees. However
+  // these are not user-set properties so cannot be checked here, but decisions
+  // the C++ encoder should be able to make based on the level.
+
+  // All level 5 checks passes, so can return the more compatible level 5
+  return 5;
+}
+
+size_t BitsPerChannel(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+    default:
+      return 0;  // signals unhandled JxlDataType
+  }
+}
+
+template <typename T>
+uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
+                     JxlPixelFormat format) {
+  if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+    return BitsPerChannel(format.data_type);
+  } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) {
+    return metadata.bit_depth.bits_per_sample;
+  } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+    return bit_depth.bits_per_sample;
+  } else {
+    return 0;
+  }
+}
+
+JxlEncoderStatus CheckValidBitdepth(uint32_t bits_per_sample,
+                                    uint32_t exponent_bits_per_sample) {
+  if (!exponent_bits_per_sample) {
+    // The spec allows up to 31 for bits_per_sample here, but
+    // the code does not (yet) support it.
+    if (!(bits_per_sample > 0 && bits_per_sample <= 24)) {
+      return JXL_API_ERROR_NOSET("Invalid value for bits_per_sample");
+    }
+  } else if ((exponent_bits_per_sample > 8) ||
+             (bits_per_sample > 24 + exponent_bits_per_sample) ||
+             (bits_per_sample < 3 + exponent_bits_per_sample)) {
+    return JXL_API_ERROR_NOSET("Invalid float description");
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus VerifyInputBitDepth(JxlBitDepth bit_depth,
+                                     JxlPixelFormat format) {
+  return JXL_ENC_SUCCESS;
+}
+
+bool EncodeFrameIndexBox(const jxl::JxlEncoderFrameIndexBox& frame_index_box,
+                         jxl::BitWriter& writer) {
+  bool ok = true;
+  int NF = 0;
+  for (size_t i = 0; i < frame_index_box.entries.size(); ++i) {
+    if (i == 0 || frame_index_box.entries[i].to_be_indexed) {
+      ++NF;
+    }
+  }
+  // Frame index box contents varint + 8 bytes
+  // continue with NF * 3 * varint
+  // varint max length is 10 for 64 bit numbers, and these numbers
+  // are limited to 63 bits.
+  static const int kVarintMaxLength = 10;
+  static const int kFrameIndexBoxHeaderLength = kVarintMaxLength + 8;
+  static const int kFrameIndexBoxElementLength = 3 * kVarintMaxLength;
+  const int buffer_size =
+      kFrameIndexBoxHeaderLength + NF * kFrameIndexBoxElementLength;
+  std::vector<uint8_t> buffer_vec(buffer_size);
+  uint8_t* buffer = buffer_vec.data();
+  size_t output_pos = 0;
+  ok &= jxl::EncodeVarInt(NF, buffer_vec.size(), &output_pos, buffer);
+  StoreBE32(frame_index_box.TNUM, &buffer[output_pos]);
+  output_pos += 4;
+  StoreBE32(frame_index_box.TDEN, &buffer[output_pos]);
+  output_pos += 4;
+  // When we record a frame in the index, the record needs to know
+  // how many frames until the next indexed frame. That is why
+  // we store the 'prev' record. That 'prev' record needs to store
+  // the offset byte position to previously recorded indexed frame,
+  // that's why we also trace previous to the previous frame.
+  int prev_prev_ix = -1;  // For position offset (OFFi) delta coding.
+  int prev_ix = 0;
+  int T_prev = 0;
+  int T = 0;
+  for (size_t i = 1; i < frame_index_box.entries.size(); ++i) {
+    if (frame_index_box.entries[i].to_be_indexed) {
+      // Now we can record the previous entry, since we need to store
+      // there how many frames until the next one.
+      int64_t OFFi = frame_index_box.entries[prev_ix].OFFi;
+      if (prev_prev_ix != -1) {
+        // Offi needs to be offset of start byte of this frame compared to start
+        // byte of previous frame from this index in the JPEG XL codestream. For
+        // the first frame, this is the offset from the first byte of the JPEG
+        // XL codestream.
+        OFFi -= frame_index_box.entries[prev_prev_ix].OFFi;
+      }
+      int32_t Ti = T_prev;
+      int32_t Fi = i - prev_ix;
+      ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer);
+      ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer);
+      ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer);
+      prev_prev_ix = prev_ix;
+      prev_ix = i;
+      T_prev = T;
+      T += frame_index_box.entries[i].duration;
+    }
+  }
+  {
+    // Last frame.
+    size_t i = frame_index_box.entries.size();
+    int64_t OFFi = frame_index_box.entries[prev_ix].OFFi;
+    if (prev_prev_ix != -1) {
+      OFFi -= frame_index_box.entries[prev_prev_ix].OFFi;
+    }
+    int32_t Ti = T_prev;
+    int32_t Fi = i - prev_ix;
+    ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer);
+    ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer);
+    ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer);
+  }
+  // Enough buffer has been allocated, this function should never fail in
+  // writing.
+  JXL_ASSERT(ok);
+  return ok;
+}
+
+}  // namespace
+
+JxlEncoderStatus JxlEncoderStruct::RefillOutputByteQueue() {
+  jxl::PaddedBytes bytes;
+
+  jxl::JxlEncoderQueuedInput& input = input_queue[0];
+
+  // TODO(lode): split this into 3 functions: for adding the signature and other
+  // initial headers (jbrd, ...), one for adding frame, and one for adding user
+  // box.
+
+  if (!wrote_bytes) {
+    // First time encoding any data, verify the level 5 vs level 10 settings
+    std::string level_message;
+    int required_level = VerifyLevelSettings(this, &level_message);
+    // Only level 5 and 10 are defined, and the function can return -1 to
+    // indicate full incompatibility.
+    JXL_ASSERT(required_level == -1 || required_level == 5 ||
+               required_level == 10);
+    // codestream_level == -1 means auto-set to the required level
+    if (codestream_level == -1) codestream_level = required_level;
+    if (codestream_level == 5 && required_level != 5) {
+      // If the required level is 10, return error rather than automatically
+      // setting the level to 10, to avoid inadvertently creating a level 10
+      // JXL file while intending to target a level 5 decoder.
+      return JXL_API_ERROR(
+          this, JXL_ENC_ERR_API_USAGE, "%s",
+          ("Codestream level verification for level 5 failed: " + level_message)
+              .c_str());
+    }
+    if (required_level == -1) {
+      return JXL_API_ERROR(
+          this, JXL_ENC_ERR_API_USAGE, "%s",
+          ("Codestream level verification for level 10 failed: " +
+           level_message)
+              .c_str());
+    }
+    jxl::AuxOut* aux_out =
+        input.frame ? input.frame->option_values.aux_out : nullptr;
+    jxl::BitWriter writer;
+    if (!WriteCodestreamHeaders(&metadata, &writer, aux_out)) {
+      return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+                           "Failed to write codestream header");
+    }
+    // Only send ICC (at least several hundred bytes) if fields aren't enough.
+    if (metadata.m.color_encoding.WantICC()) {
+      if (!jxl::WriteICC(metadata.m.color_encoding.ICC(), &writer,
+                         jxl::kLayerHeader, aux_out)) {
+        return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+                             "Failed to write ICC profile");
+      }
+    }
+    // TODO(lode): preview should be added here if a preview image is added
+
+    jxl::BitWriter::Allotment allotment(&writer, 8);
+    writer.ZeroPadToByte();
+    allotment.ReclaimAndCharge(&writer, jxl::kLayerHeader, aux_out);
+
+    // Not actually the end of frame, but the end of metadata/ICC, but helps
+    // the next frame to start here for indexing purposes.
+    codestream_bytes_written_end_of_frame +=
+        jxl::DivCeil(writer.BitsWritten(), 8);
+
+    bytes = std::move(writer).TakeBytes();
+
+    if (MustUseContainer()) {
+      // Add "JXL " and ftyp box.
+      output_byte_queue.insert(
+          output_byte_queue.end(), jxl::kContainerHeader,
+          jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+      if (codestream_level != 5) {
+        // Add jxll box directly after the ftyp box to indicate the codestream
+        // level.
+        output_byte_queue.insert(
+            output_byte_queue.end(), jxl::kLevelBoxHeader,
+            jxl::kLevelBoxHeader + sizeof(jxl::kLevelBoxHeader));
+        output_byte_queue.push_back(codestream_level);
+      }
+
+      // Whether to write the basic info and color profile header of the
+      // codestream into an early separate jxlp box, so that it comes before
+      // metadata or jpeg reconstruction boxes. In theory this could simply
+      // always be done, but there's no reason to add an extra box with box
+      // header overhead if the codestream will already come immediately after
+      // the signature and level boxes.
+      bool partial_header =
+          store_jpeg_metadata ||
+          (use_boxes && (!input.frame && !input.fast_lossless_frame));
+
+      if (partial_header) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), bytes.size() + 4,
+                             /*unbounded=*/false, &output_byte_queue);
+        AppendJxlpBoxCounter(jxlp_counter++, /*last=*/false,
+                             &output_byte_queue);
+        output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+                                 bytes.data() + bytes.size());
+        bytes.clear();
+      }
+
+      if (store_jpeg_metadata && !jpeg_metadata.empty()) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_metadata.size(),
+                             false, &output_byte_queue);
+        output_byte_queue.insert(output_byte_queue.end(), jpeg_metadata.begin(),
+                                 jpeg_metadata.end());
+      }
+    }
+    wrote_bytes = true;
+  }
+
+  // Choose frame or box processing: exactly one of the two unique pointers (box
+  // or frame) in the input queue item is non-null.
+  if (input.frame || input.fast_lossless_frame) {
+    jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame> input_frame =
+        std::move(input.frame);
+    if (input.fast_lossless_frame) {
+      output_fast_frame_queue.push_back(std::move(input.fast_lossless_frame));
+    }
+    input_queue.erase(input_queue.begin());
+    num_queued_frames--;
+    if (input_frame) {
+      for (unsigned idx = 0; idx < input_frame->ec_initialized.size(); idx++) {
+        if (!input_frame->ec_initialized[idx]) {
+          return JXL_API_ERROR(this, JXL_ENC_ERR_API_USAGE,
+                               "Extra channel %u is not initialized", idx);
+        }
+      }
+
+      // TODO(zond): If the input queue is empty and the frames_closed is true,
+      // then mark this frame as the last.
+
+      // TODO(zond): Handle progressive mode like EncodeFile does it.
+      // TODO(zond): Handle animation like EncodeFile does it, by checking if
+      //             JxlEncoderCloseFrames has been called and if the frame
+      //             queue is empty (to see if it's the last animation frame).
+
+      if (metadata.m.xyb_encoded) {
+        input_frame->option_values.cparams.color_transform =
+            jxl::ColorTransform::kXYB;
+      } else {
+        // TODO(zond): Figure out when to use kYCbCr instead.
+        input_frame->option_values.cparams.color_transform =
+            jxl::ColorTransform::kNone;
+      }
+    }
+
+    uint32_t duration;
+    uint32_t timecode;
+    if (input_frame && metadata.m.have_animation) {
+      duration = input_frame->option_values.header.duration;
+      timecode = input_frame->option_values.header.timecode;
+    } else {
+      // If have_animation is false, the encoder should ignore the duration and
+      // timecode values. However, assigning them to ib will cause the encoder
+      // to write an invalid frame header that can't be decoded so ensure
+      // they're the default value of 0 here.
+      duration = 0;
+      timecode = 0;
+    }
+
+    bool last_frame = frames_closed && !num_queued_frames;
+
+    size_t codestream_byte_size = 0;
+
+    jxl::BitWriter writer;
+
+    if (input_frame) {
+      jxl::PassesEncoderState enc_state;
+
+      frame_index_box.AddFrame(codestream_bytes_written_end_of_frame, duration,
+                               input_frame->option_values.frame_index_box);
+
+      // EncodeFrame creates jxl::FrameHeader object internally based on the
+      // FrameInfo, imagebundle, cparams and metadata. Copy the information to
+      // these.
+      jxl::ImageBundle& ib = input_frame->frame;
+      ib.duration = duration;
+      ib.timecode = timecode;
+      ib.name = input_frame->option_values.frame_name;
+      ib.blendmode = static_cast<jxl::BlendMode>(
+          input_frame->option_values.header.layer_info.blend_info.blendmode);
+      ib.blend =
+          input_frame->option_values.header.layer_info.blend_info.blendmode !=
+          JXL_BLEND_REPLACE;
+
+      size_t save_as_reference =
+          input_frame->option_values.header.layer_info.save_as_reference;
+      if (save_as_reference >= 3) {
+        return JXL_API_ERROR(
+            this, JXL_ENC_ERR_API_USAGE,
+            "Cannot use save_as_reference values >=3 (found: %d)",
+            (int)save_as_reference);
+      }
+      ib.use_for_next_frame = !!save_as_reference;
+
+      jxl::FrameInfo frame_info;
+      frame_info.is_last = last_frame;
+      frame_info.save_as_reference = save_as_reference;
+      frame_info.source =
+          input_frame->option_values.header.layer_info.blend_info.source;
+      frame_info.clamp =
+          input_frame->option_values.header.layer_info.blend_info.clamp;
+      frame_info.alpha_channel =
+          input_frame->option_values.header.layer_info.blend_info.alpha;
+      frame_info.extra_channel_blending_info.resize(
+          metadata.m.num_extra_channels);
+      // If extra channel blend info has not been set, use the blend mode from
+      // the layer_info.
+      JxlBlendInfo default_blend_info =
+          input_frame->option_values.header.layer_info.blend_info;
+      for (size_t i = 0; i < metadata.m.num_extra_channels; ++i) {
+        auto& to = frame_info.extra_channel_blending_info[i];
+        const auto& from =
+            i < input_frame->option_values.extra_channel_blend_info.size()
+                ? input_frame->option_values.extra_channel_blend_info[i]
+                : default_blend_info;
+        to.mode = static_cast<jxl::BlendMode>(from.blendmode);
+        to.source = from.source;
+        to.alpha_channel = from.alpha;
+        to.clamp = (from.clamp != 0);
+      }
+
+      if (input_frame->option_values.header.layer_info.have_crop) {
+        ib.origin.x0 = input_frame->option_values.header.layer_info.crop_x0;
+        ib.origin.y0 = input_frame->option_values.header.layer_info.crop_y0;
+      }
+      JXL_ASSERT(writer.BitsWritten() == 0);
+      if (!jxl::EncodeFrame(input_frame->option_values.cparams, frame_info,
+                            &metadata, input_frame->frame, &enc_state, cms,
+                            thread_pool.get(), &writer,
+                            input_frame->option_values.aux_out)) {
+        return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+                             "Failed to encode frame");
+      }
+      codestream_bytes_written_beginning_of_frame =
+          codestream_bytes_written_end_of_frame;
+      codestream_bytes_written_end_of_frame +=
+          jxl::DivCeil(writer.BitsWritten(), 8);
+
+      // Possibly bytes already contains the codestream header: in case this is
+      // the first frame, and the codestream header was not encoded as jxlp
+      // above.
+      bytes.append(std::move(writer).TakeBytes());
+      codestream_byte_size = bytes.size();
+    } else {
+      JXL_CHECK(!output_fast_frame_queue.empty());
+      JxlFastLosslessPrepareHeader(output_fast_frame_queue.front().get(),
+                                   /*add_image_header=*/0, last_frame);
+      codestream_byte_size =
+          JxlFastLosslessOutputSize(output_fast_frame_queue.front().get()) +
+          bytes.size();
+    }
+
+    if (MustUseContainer()) {
+      if (last_frame && jxlp_counter == 0) {
+        // If this is the last frame and no jxlp boxes were used yet, it's
+        // slighly more efficient to write a jxlc box since it has 4 bytes
+        // less overhead.
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), codestream_byte_size,
+                             /*unbounded=*/false, &output_byte_queue);
+      } else {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), codestream_byte_size + 4,
+                             /*unbounded=*/false, &output_byte_queue);
+        AppendJxlpBoxCounter(jxlp_counter++, last_frame, &output_byte_queue);
+      }
+    }
+
+    output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+                             bytes.data() + bytes.size());
+
+    if (input_frame) {
+      last_used_cparams = input_frame->option_values.cparams;
+    }
+    if (last_frame && frame_index_box.StoreFrameIndexBox()) {
+      bytes.clear();
+      EncodeFrameIndexBox(frame_index_box, writer);
+      jxl::AppendBoxHeader(jxl::MakeBoxType("jxli"), bytes.size(),
+                           /*unbounded=*/false, &output_byte_queue);
+    }
+  } else {
+    // Not a frame, so is a box instead
+    jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedBox> box =
+        std::move(input.box);
+    input_queue.erase(input_queue.begin());
+    num_queued_boxes--;
+
+    if (box->compress_box) {
+      jxl::PaddedBytes compressed(4);
+      // Prepend the original box type in the brob box contents
+      for (size_t i = 0; i < 4; i++) {
+        compressed[i] = static_cast<uint8_t>(box->type[i]);
+      }
+      if (JXL_ENC_SUCCESS !=
+          BrotliCompress((brotli_effort >= 0 ? brotli_effort : 4),
+                         box->contents.data(), box->contents.size(),
+                         &compressed)) {
+        return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+                             "Brotli compression for brob box failed");
+      }
+      jxl::AppendBoxHeader(jxl::MakeBoxType("brob"), compressed.size(), false,
+                           &output_byte_queue);
+      output_byte_queue.insert(output_byte_queue.end(), compressed.data(),
+                               compressed.data() + compressed.size());
+    } else {
+      jxl::AppendBoxHeader(box->type, box->contents.size(), false,
+                           &output_byte_queue);
+      output_byte_queue.insert(output_byte_queue.end(), box->contents.data(),
+                               box->contents.data() + box->contents.size());
+    }
+  }
+
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetColorEncoding(JxlEncoder* enc,
+                                            const JxlColorEncoding* color) {
+  if (!enc->basic_info_set) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set");
+  }
+  if (enc->color_encoding_set) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Color encoding is already set");
+  }
+  if (!jxl::ConvertExternalToInternalColorEncoding(
+          *color, &enc->metadata.m.color_encoding)) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC, "Error in color conversion");
+  }
+  if (enc->metadata.m.color_encoding.GetColorSpace() ==
+      jxl::ColorSpace::kGray) {
+    if (enc->basic_info.num_color_channels != 1)
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_API_USAGE,
+          "Cannot use grayscale color encoding with num_color_channels != 1");
+  } else {
+    if (enc->basic_info.num_color_channels != 3)
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_API_USAGE,
+          "Cannot use RGB color encoding with num_color_channels != 3");
+  }
+  enc->color_encoding_set = true;
+  if (!enc->intensity_target_set) {
+    jxl::SetIntensityTarget(&enc->metadata.m);
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+                                         const uint8_t* icc_profile,
+                                         size_t size) {
+  if (!enc->basic_info_set) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set");
+  }
+  if (enc->color_encoding_set) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "ICC profile is already set");
+  }
+  jxl::PaddedBytes icc;
+  icc.assign(icc_profile, icc_profile + size);
+  if (!enc->metadata.m.color_encoding.SetICC(
+          std::move(icc), enc->cms_set ? &enc->cms : nullptr)) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_BAD_INPUT,
+                         "ICC profile could not be set");
+  }
+  if (enc->metadata.m.color_encoding.GetColorSpace() ==
+      jxl::ColorSpace::kGray) {
+    if (enc->basic_info.num_color_channels != 1)
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_BAD_INPUT,
+          "Cannot use grayscale ICC profile with num_color_channels != 1");
+  } else {
+    if (enc->basic_info.num_color_channels != 3)
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_BAD_INPUT,
+          "Cannot use RGB ICC profile with num_color_channels != 3");
+    // TODO(jon): also check that a kBlack extra channel is provided in the CMYK
+    // case
+  }
+  enc->color_encoding_set = true;
+  if (!enc->intensity_target_set) {
+    jxl::SetIntensityTarget(&enc->metadata.m);
+  }
+
+  if (!enc->basic_info.uses_original_profile && enc->cms_set) {
+    enc->metadata.m.color_encoding.DecideIfWantICC(enc->cms);
+  }
+
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderInitBasicInfo(JxlBasicInfo* info) {
+  info->have_container = JXL_FALSE;
+  info->xsize = 0;
+  info->ysize = 0;
+  info->bits_per_sample = 8;
+  info->exponent_bits_per_sample = 0;
+  info->intensity_target = 0.f;
+  info->min_nits = 0.f;
+  info->relative_to_max_display = JXL_FALSE;
+  info->linear_below = 0.f;
+  info->uses_original_profile = JXL_FALSE;
+  info->have_preview = JXL_FALSE;
+  info->have_animation = JXL_FALSE;
+  info->orientation = JXL_ORIENT_IDENTITY;
+  info->num_color_channels = 3;
+  info->num_extra_channels = 0;
+  info->alpha_bits = 0;
+  info->alpha_exponent_bits = 0;
+  info->alpha_premultiplied = JXL_FALSE;
+  info->preview.xsize = 0;
+  info->preview.ysize = 0;
+  info->intrinsic_xsize = 0;
+  info->intrinsic_ysize = 0;
+  info->animation.tps_numerator = 10;
+  info->animation.tps_denominator = 1;
+  info->animation.num_loops = 0;
+  info->animation.have_timecodes = JXL_FALSE;
+}
+
+void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header) {
+  // For each field, the default value of the specification is used. Depending
+  // on whether an animation frame, or a composite still blending frame,
+  // is used, different fields have to be set up by the user after initing
+  // the frame header.
+  frame_header->duration = 0;
+  frame_header->timecode = 0;
+  frame_header->name_length = 0;
+  // In the specification, the default value of is_last is !frame_type, and the
+  // default frame_type is kRegularFrame which has value 0, so is_last is true
+  // by default. However, the encoder does not use this value (the field exists
+  // for the decoder to set) since last frame is determined by usage of
+  // JxlEncoderCloseFrames instead.
+  frame_header->is_last = JXL_TRUE;
+  frame_header->layer_info.have_crop = JXL_FALSE;
+  frame_header->layer_info.crop_x0 = 0;
+  frame_header->layer_info.crop_y0 = 0;
+  // These must be set if have_crop is enabled, but the default value has
+  // have_crop false, and these dimensions 0. The user must set these to the
+  // desired size after enabling have_crop (which is not yet implemented).
+  frame_header->layer_info.xsize = 0;
+  frame_header->layer_info.ysize = 0;
+  JxlEncoderInitBlendInfo(&frame_header->layer_info.blend_info);
+  frame_header->layer_info.save_as_reference = 0;
+}
+
+void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info) {
+  // Default blend mode in the specification is 0. Note that combining
+  // blend mode of replace with a duration is not useful, but the user has to
+  // manually set duration in case of animation, or manually change the blend
+  // mode in case of composite stills, so initing to a combination that is not
+  // useful on its own is not an issue.
+  blend_info->blendmode = JXL_BLEND_REPLACE;
+  blend_info->source = 0;
+  blend_info->alpha = 0;
+  blend_info->clamp = 0;
+}
+
+JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+                                        const JxlBasicInfo* info) {
+  if (!enc->metadata.size.Set(info->xsize, info->ysize)) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid dimensions");
+  }
+  if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample,
+                                            info->exponent_bits_per_sample)) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth");
+  }
+
+  enc->metadata.m.bit_depth.bits_per_sample = info->bits_per_sample;
+  enc->metadata.m.bit_depth.exponent_bits_per_sample =
+      info->exponent_bits_per_sample;
+  enc->metadata.m.bit_depth.floating_point_sample =
+      (info->exponent_bits_per_sample != 0u);
+  enc->metadata.m.modular_16_bit_buffer_sufficient =
+      (!info->uses_original_profile || info->bits_per_sample <= 12) &&
+      info->alpha_bits <= 12;
+  if ((info->intrinsic_xsize > 0 || info->intrinsic_ysize > 0) &&
+      (info->intrinsic_xsize != info->xsize ||
+       info->intrinsic_ysize != info->ysize)) {
+    if (info->intrinsic_xsize > (1ull << 30ull) ||
+        info->intrinsic_ysize > (1ull << 30ull) ||
+        !enc->metadata.m.intrinsic_size.Set(info->intrinsic_xsize,
+                                            info->intrinsic_ysize)) {
+      return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                           "Invalid intrinsic dimensions");
+    }
+    enc->metadata.m.have_intrinsic_size = true;
+  }
+
+  // The number of extra channels includes the alpha channel, so for example and
+  // RGBA with no other extra channels, has exactly num_extra_channels == 1
+  enc->metadata.m.num_extra_channels = info->num_extra_channels;
+  enc->metadata.m.extra_channel_info.resize(enc->metadata.m.num_extra_channels);
+  if (info->num_extra_channels == 0 && info->alpha_bits) {
+    return JXL_API_ERROR(
+        enc, JXL_ENC_ERR_API_USAGE,
+        "when alpha_bits is non-zero, the number of channels must be at least "
+        "1");
+  }
+  // If the user provides non-zero alpha_bits, we make the channel info at index
+  // zero the appropriate alpha channel.
+  if (info->alpha_bits) {
+    JxlExtraChannelInfo channel_info;
+    JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &channel_info);
+    channel_info.bits_per_sample = info->alpha_bits;
+    channel_info.exponent_bits_per_sample = info->alpha_exponent_bits;
+    if (JxlEncoderSetExtraChannelInfo(enc, 0, &channel_info)) {
+      return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                           "Problem setting extra channel info for alpha");
+    }
+  }
+
+  enc->metadata.m.xyb_encoded = !info->uses_original_profile;
+  if (info->orientation > 0 && info->orientation <= 8) {
+    enc->metadata.m.orientation = info->orientation;
+  } else {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for orientation field");
+  }
+  if (info->num_color_channels != 1 && info->num_color_channels != 3) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid number of color channels");
+  }
+  if (info->intensity_target != 0) {
+    enc->metadata.m.SetIntensityTarget(info->intensity_target);
+    enc->intensity_target_set = true;
+  } else if (enc->color_encoding_set) {
+    // If this is false, JxlEncoderSetColorEncoding will be called later and we
+    // will get one more chance to call jxl::SetIntensityTarget, after the color
+    // encoding is indeed set.
+    jxl::SetIntensityTarget(&enc->metadata.m);
+    enc->intensity_target_set = true;
+  }
+  enc->metadata.m.tone_mapping.min_nits = info->min_nits;
+  enc->metadata.m.tone_mapping.relative_to_max_display =
+      info->relative_to_max_display;
+  enc->metadata.m.tone_mapping.linear_below = info->linear_below;
+  enc->basic_info = *info;
+  enc->basic_info_set = true;
+
+  enc->metadata.m.have_animation = info->have_animation;
+  if (info->have_animation) {
+    if (info->animation.tps_denominator < 1) {
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_API_USAGE,
+          "If animation is used, tps_denominator must be >= 1");
+    }
+    if (info->animation.tps_numerator < 1) {
+      return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                           "If animation is used, tps_numerator must be >= 1");
+    }
+    enc->metadata.m.animation.tps_numerator = info->animation.tps_numerator;
+    enc->metadata.m.animation.tps_denominator = info->animation.tps_denominator;
+    enc->metadata.m.animation.num_loops = info->animation.num_loops;
+    enc->metadata.m.animation.have_timecodes = info->animation.have_timecodes;
+  }
+  std::string level_message;
+  int required_level = VerifyLevelSettings(enc, &level_message);
+  if (required_level == -1 ||
+      (static_cast<int>(enc->codestream_level) < required_level &&
+       enc->codestream_level != -1)) {
+    return JXL_API_ERROR(
+        enc, JXL_ENC_ERR_API_USAGE, "%s",
+        ("Codestream level verification for level " +
+         std::to_string(enc->codestream_level) + " failed: " + level_message)
+            .c_str());
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type,
+                                    JxlExtraChannelInfo* info) {
+  info->type = type;
+  info->bits_per_sample = 8;
+  info->exponent_bits_per_sample = 0;
+  info->dim_shift = 0;
+  info->name_length = 0;
+  info->alpha_premultiplied = JXL_FALSE;
+  info->spot_color[0] = 0;
+  info->spot_color[1] = 0;
+  info->spot_color[2] = 0;
+  info->spot_color[3] = 0;
+  info->cfa_channel = 0;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetUpsamplingMode(JxlEncoder* enc,
+                                                        const int64_t factor,
+                                                        const int64_t mode) {
+  // for convenience, allow calling this with factor 1 and just make it a no-op
+  if (factor == 1) return JXL_ENC_SUCCESS;
+  if (factor != 2 && factor != 4 && factor != 8)
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid upsampling factor");
+  if (mode < -1)
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid upsampling mode");
+  if (mode > 1)
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                         "Unsupported upsampling mode");
+
+  const size_t count = (factor == 2 ? 15 : (factor == 4 ? 55 : 210));
+  auto& td = enc->metadata.transform_data;
+  float* weights = (factor == 2 ? td.upsampling2_weights
+                                : (factor == 4 ? td.upsampling4_weights
+                                               : td.upsampling8_weights));
+  if (mode == -1) {
+    // Default fancy upsampling: don't signal custom weights
+    enc->metadata.transform_data.custom_weights_mask &= ~(factor >> 1);
+  } else if (mode == 0) {
+    // Nearest neighbor upsampling
+    enc->metadata.transform_data.custom_weights_mask |= (factor >> 1);
+    memset(weights, 0, sizeof(float) * count);
+    if (factor == 2) {
+      weights[9] = 1.f;
+    } else if (factor == 4) {
+      for (int i : {19, 24, 49}) weights[i] = 1.f;
+    } else if (factor == 8) {
+      for (int i : {39, 44, 49, 54, 119, 124, 129, 174, 179, 204}) {
+        weights[i] = 1.f;
+      }
+    }
+  } else if (mode == 1) {
+    // 'Pixel dots' upsampling (nearest-neighbor with cut corners)
+    JxlEncoderSetUpsamplingMode(enc, factor, 0);
+    if (factor == 4) {
+      weights[19] = 0.f;
+      weights[24] = 0.5f;
+    } else if (factor == 8) {
+      for (int i : {39, 44, 49, 119}) weights[i] = 0.f;
+      for (int i : {54, 124}) weights[i] = 0.5f;
+    }
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo(
+    JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info) {
+  if (index >= enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for the index of extra channel");
+  }
+  if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample,
+                                            info->exponent_bits_per_sample)) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth");
+  }
+
+  jxl::ExtraChannelInfo& channel = enc->metadata.m.extra_channel_info[index];
+  channel.type = static_cast<jxl::ExtraChannel>(info->type);
+  channel.bit_depth.bits_per_sample = info->bits_per_sample;
+  enc->metadata.m.modular_16_bit_buffer_sufficient &=
+      info->bits_per_sample <= 12;
+  channel.bit_depth.exponent_bits_per_sample = info->exponent_bits_per_sample;
+  channel.bit_depth.floating_point_sample = info->exponent_bits_per_sample != 0;
+  channel.dim_shift = info->dim_shift;
+  channel.name = "";
+  channel.alpha_associated = (info->alpha_premultiplied != 0);
+  channel.cfa_channel = info->cfa_channel;
+  channel.spot_color[0] = info->spot_color[0];
+  channel.spot_color[1] = info->spot_color[1];
+  channel.spot_color[2] = info->spot_color[2];
+  channel.spot_color[3] = info->spot_color[3];
+  std::string level_message;
+  int required_level = VerifyLevelSettings(enc, &level_message);
+  if (required_level == -1 ||
+      (static_cast<int>(enc->codestream_level) < required_level &&
+       enc->codestream_level != -1)) {
+    return JXL_API_ERROR(
+        enc, JXL_ENC_ERR_API_USAGE, "%s",
+        ("Codestream level verification for level " +
+         std::to_string(enc->codestream_level) + " failed: " + level_message)
+            .c_str());
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc,
+                                                          size_t index,
+                                                          const char* name,
+                                                          size_t size) {
+  if (index >= enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for the index of extra channel");
+  }
+  enc->metadata.m.extra_channel_info[index].name =
+      std::string(name, name + size);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate(
+    JxlEncoder* enc, const JxlEncoderFrameSettings* source) {
+  auto opts = jxl::MemoryManagerMakeUnique<JxlEncoderFrameSettings>(
+      &enc->memory_manager);
+  if (!opts) return nullptr;
+  opts->enc = enc;
+  if (source != nullptr) {
+    opts->values = source->values;
+  } else {
+    opts->values.lossless = false;
+  }
+  opts->values.cparams.level = enc->codestream_level;
+  opts->values.cparams.ec_distance.resize(enc->metadata.m.num_extra_channels,
+                                          -1);
+
+  JxlEncoderFrameSettings* ret = opts.get();
+  enc->encoder_options.emplace_back(std::move(opts));
+  return ret;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameLossless(
+    JxlEncoderFrameSettings* frame_settings, const JXL_BOOL lossless) {
+  if (lossless && frame_settings->enc->basic_info_set &&
+      frame_settings->enc->metadata.m.xyb_encoded) {
+    return JXL_API_ERROR(
+        frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+        "Set uses_original_profile=true for lossless encoding");
+  }
+  frame_settings->values.lossless = lossless;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameDistance(
+    JxlEncoderFrameSettings* frame_settings, float distance) {
+  if (distance < 0.f || distance > 25.f) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Distance has to be in [0.0..25.0] (corresponding to "
+                         "quality in [0.0..100.0])");
+  }
+  if (distance > 0.f && distance < 0.01f) {
+    distance = 0.01f;
+  }
+  frame_settings->values.cparams.butteraugli_distance = distance;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetExtraChannelDistance(
+    JxlEncoderFrameSettings* frame_settings, size_t index, float distance) {
+  if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for the index of extra channel");
+  }
+  if (distance != -1.f && (distance < 0.f || distance > 25.f)) {
+    return JXL_API_ERROR(
+        frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+        "Distance has to be -1 or in [0.0..25.0] (corresponding to "
+        "quality in [0.0..100.0])");
+  }
+  if (distance > 0.f && distance < 0.01f) {
+    distance = 0.01f;
+  }
+
+  if (index >= frame_settings->values.cparams.ec_distance.size()) {
+    // This can only happen if JxlEncoderFrameSettingsCreate() was called before
+    // JxlEncoderSetBasicInfo().
+    frame_settings->values.cparams.ec_distance.resize(
+        frame_settings->enc->metadata.m.num_extra_channels, -1);
+  }
+
+  frame_settings->values.cparams.ec_distance[index] = distance;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
+    JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+    int64_t value) {
+  // check if value is -1, 0 or 1 for Override-type options
+  switch (option) {
+    case JXL_ENC_FRAME_SETTING_NOISE:
+    case JXL_ENC_FRAME_SETTING_DOTS:
+    case JXL_ENC_FRAME_SETTING_PATCHES:
+    case JXL_ENC_FRAME_SETTING_GABORISH:
+    case JXL_ENC_FRAME_SETTING_MODULAR:
+    case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+    case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+    case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+    case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+    case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+    case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+    case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+      if (value < -1 || value > 1) {
+        return JXL_API_ERROR(
+            frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+            "Option value has to be -1 (default), 0 (off) or 1 (on)");
+      }
+      break;
+    default:
+      break;
+  }
+
+  switch (option) {
+    case JXL_ENC_FRAME_SETTING_EFFORT:
+      if (frame_settings->enc->allow_expert_options) {
+        if (value < 1 || value > 10) {
+          return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                               "Encode effort has to be in [1..10]");
+        }
+      } else {
+        if (value < 1 || value > 9) {
+          return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                               "Encode effort has to be in [1..9]");
+        }
+      }
+      frame_settings->values.cparams.speed_tier =
+          static_cast<jxl::SpeedTier>(10 - value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT:
+      if (value < -1 || value > 11) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Brotli effort has to be in [-1..11]");
+      }
+      // set cparams for brotli use in JPEG frames
+      frame_settings->values.cparams.brotli_effort = value;
+      // set enc option for brotli use in brob boxes
+      frame_settings->enc->brotli_effort = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_DECODING_SPEED:
+      if (value < 0 || value > 4) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                             "Decoding speed has to be in [0..4]");
+      }
+      frame_settings->values.cparams.decoding_speed_tier = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_RESAMPLING:
+      if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Resampling factor has to be 1, 2, 4 or 8");
+      }
+      frame_settings->values.cparams.resampling = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING:
+      // TODO(lode): the jxl codestream allows choosing a different resampling
+      // factor for each extra channel, independently per frame. Move this
+      // option to a JxlEncoderFrameSettings-option that can be set per extra
+      // channel, so needs its own function rather than
+      // JxlEncoderFrameSettingsSetOption due to the extra channel index
+      // argument required.
+      if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Resampling factor has to be 1, 2, 4 or 8");
+      }
+      frame_settings->values.cparams.ec_resampling = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED:
+      if (value < 0 || value > 1) {
+        return JXL_ENC_ERROR;
+      }
+      frame_settings->values.cparams.already_downsampled = (value == 1);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_NOISE:
+      frame_settings->values.cparams.noise = static_cast<jxl::Override>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_DOTS:
+      frame_settings->values.cparams.dots = static_cast<jxl::Override>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_PATCHES:
+      frame_settings->values.cparams.patches =
+          static_cast<jxl::Override>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_EPF:
+      if (value < -1 || value > 3) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "EPF value has to be in [-1..3]");
+      }
+      frame_settings->values.cparams.epf = static_cast<int>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_GABORISH:
+      frame_settings->values.cparams.gaborish =
+          static_cast<jxl::Override>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR:
+      frame_settings->values.cparams.modular_mode = (value == 1);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+      frame_settings->values.cparams.keep_invisible =
+          static_cast<jxl::Override>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+      frame_settings->values.cparams.centerfirst = (value == 1);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X:
+      if (value < -1) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Center x coordinate has to be -1 or positive");
+      }
+      frame_settings->values.cparams.center_x = static_cast<size_t>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y:
+      if (value < -1) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Center y coordinate has to be -1 or positive");
+      }
+      frame_settings->values.cparams.center_y = static_cast<size_t>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+      frame_settings->values.cparams.responsive = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+      frame_settings->values.cparams.progressive_mode = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+      frame_settings->values.cparams.qprogressive_mode = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC:
+      if (value < -1 || value > 2) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Progressive DC has to be in [-1..2]");
+      }
+      frame_settings->values.cparams.progressive_dc = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_PALETTE_COLORS:
+      if (value < -1 || value > 70913) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..70913]");
+      }
+      if (value == -1) {
+        frame_settings->values.cparams.palette_colors = 1 << 10;
+      } else {
+        frame_settings->values.cparams.palette_colors = value;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+      // TODO(lode): the defaults of some palette settings depend on others.
+      // See the logic in cjxl. Similar for other settings. This should be
+      // handled in the encoder during JxlEncoderProcessOutput (or,
+      // alternatively, in the cjxl binary like now)
+      frame_settings->values.cparams.lossy_palette = (value == 1);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM:
+      if (value < -1 || value > 2) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..2]");
+      }
+      if (value == -1) {
+        frame_settings->values.cparams.color_transform =
+            jxl::ColorTransform::kXYB;
+      } else {
+        frame_settings->values.cparams.color_transform =
+            static_cast<jxl::ColorTransform>(value);
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE:
+      if (value < -1 || value > 41) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..41]");
+      }
+      frame_settings->values.cparams.colorspace = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE:
+      if (value < -1 || value > 3) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..3]");
+      }
+      frame_settings->values.cparams.modular_group_size_shift = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR:
+      if (value < -1 || value > 15) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..15]");
+      }
+      frame_settings->values.cparams.options.predictor =
+          static_cast<jxl::Predictor>(value);
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS:
+      // The max allowed value can in theory be higher. However, it depends on
+      // the effort setting. 11 is the highest safe value that doesn't cause
+      // tree_samples to be >= 64 in the encoder. The specification may allow
+      // more than this. With more fine tuning higher values could be allowed.
+      // For N-channel images, the largest useful value is N-1.
+      if (value < -1 || value > 11) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..11]");
+      }
+      if (value == -1) {
+        frame_settings->values.cparams.options.max_properties = 0;
+      } else {
+        frame_settings->values.cparams.options.max_properties = value;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+      if (value == -1) {
+        frame_settings->values.cparams.force_cfl_jpeg_recompression = true;
+      } else {
+        frame_settings->values.cparams.force_cfl_jpeg_recompression = value;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_INDEX_BOX:
+      frame_settings->values.frame_index_box = true;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_PHOTON_NOISE:
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                           "Float option, try setting it with "
+                           "JxlEncoderFrameSettingsSetFloatOption");
+    case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+      frame_settings->values.cparams.jpeg_compress_boxes = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_BUFFERING:
+      if (value < 0 || value > 3) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                             "Buffering has to be in [0..3]");
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF:
+      frame_settings->values.cparams.jpeg_keep_exif = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP:
+      frame_settings->values.cparams.jpeg_keep_xmp = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF:
+      frame_settings->values.cparams.jpeg_keep_jumbf = value;
+      return JXL_ENC_SUCCESS;
+
+    default:
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                           "Unknown option");
+  }
+}
+
+JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption(
+    JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+    float value) {
+  switch (option) {
+    case JXL_ENC_FRAME_SETTING_PHOTON_NOISE:
+      if (value < 0) return JXL_ENC_ERROR;
+      // TODO(lode): add encoder setting to set the 8 floating point values of
+      // the noise synthesis parameters per frame for more fine grained control.
+      frame_settings->values.cparams.photon_noise_iso = value;
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT:
+      if (value < -1.f || value > 100.f) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be smaller than 100");
+      }
+      // This value is called "iterations" or "nb_repeats" in cjxl, but is in
+      // fact a fraction in range 0.0-1.0, with the default value 0.5.
+      // Convert from floating point percentage to floating point fraction here.
+      if (value < -.5f) {
+        // TODO(lode): for this and many other settings (also in
+        // JxlEncoderFrameSettingsSetOption), avoid duplicating the default
+        // values here and in enc_params.h and options.h, have one location
+        // where the defaults are specified.
+        frame_settings->values.cparams.options.nb_repeats = 0.5f;
+      } else {
+        frame_settings->values.cparams.options.nb_repeats = value * 0.01f;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT:
+      if (value < -1.f || value > 100.f) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..100]");
+      }
+      if (value < -.5f) {
+        frame_settings->values.cparams.channel_colors_pre_transform_percent =
+            95.0f;
+      } else {
+        frame_settings->values.cparams.channel_colors_pre_transform_percent =
+            value;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT:
+      if (value < -1.f || value > 100.f) {
+        return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                             "Option value has to be in [-1..100]");
+      }
+      if (value < -.5f) {
+        frame_settings->values.cparams.channel_colors_percent = 80.0f;
+      } else {
+        frame_settings->values.cparams.channel_colors_percent = value;
+      }
+      return JXL_ENC_SUCCESS;
+    case JXL_ENC_FRAME_SETTING_EFFORT:
+    case JXL_ENC_FRAME_SETTING_DECODING_SPEED:
+    case JXL_ENC_FRAME_SETTING_RESAMPLING:
+    case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING:
+    case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED:
+    case JXL_ENC_FRAME_SETTING_NOISE:
+    case JXL_ENC_FRAME_SETTING_DOTS:
+    case JXL_ENC_FRAME_SETTING_PATCHES:
+    case JXL_ENC_FRAME_SETTING_EPF:
+    case JXL_ENC_FRAME_SETTING_GABORISH:
+    case JXL_ENC_FRAME_SETTING_MODULAR:
+    case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X:
+    case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y:
+    case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+    case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+    case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+    case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC:
+    case JXL_ENC_FRAME_SETTING_PALETTE_COLORS:
+    case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+    case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM:
+    case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE:
+    case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE:
+    case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR:
+    case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS:
+    case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+    case JXL_ENC_FRAME_INDEX_BOX:
+    case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT:
+    case JXL_ENC_FRAME_SETTING_FILL_ENUM:
+    case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+    case JXL_ENC_FRAME_SETTING_BUFFERING:
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF:
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP:
+    case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF:
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                           "Int option, try setting it with "
+                           "JxlEncoderFrameSettingsSetOption");
+    default:
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+                           "Unknown option");
+  }
+}
+JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) {
+    return nullptr;
+  }
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlEncoder));
+  if (!alloc) return nullptr;
+  JxlEncoder* enc = new (alloc) JxlEncoder();
+  enc->memory_manager = local_memory_manager;
+  // TODO(sboukortt): add an API function to set this.
+  enc->cms = jxl::GetJxlCms();
+  enc->cms_set = true;
+
+  // Initialize all the field values.
+  JxlEncoderReset(enc);
+
+  return enc;
+}
+
+void JxlEncoderReset(JxlEncoder* enc) {
+  enc->thread_pool.reset();
+  enc->input_queue.clear();
+  enc->num_queued_frames = 0;
+  enc->num_queued_boxes = 0;
+  enc->encoder_options.clear();
+  enc->output_byte_queue.clear();
+  enc->output_fast_frame_queue.clear();
+  enc->codestream_bytes_written_beginning_of_frame = 0;
+  enc->codestream_bytes_written_end_of_frame = 0;
+  enc->wrote_bytes = false;
+  enc->jxlp_counter = 0;
+  enc->metadata = jxl::CodecMetadata();
+  enc->last_used_cparams = jxl::CompressParams();
+  enc->frames_closed = false;
+  enc->boxes_closed = false;
+  enc->basic_info_set = false;
+  enc->color_encoding_set = false;
+  enc->intensity_target_set = false;
+  enc->use_container = false;
+  enc->use_boxes = false;
+  enc->codestream_level = -1;
+  JxlEncoderInitBasicInfo(&enc->basic_info);
+}
+
+void JxlEncoderDestroy(JxlEncoder* enc) {
+  if (enc) {
+    JxlMemoryManager local_memory_manager = enc->memory_manager;
+    // Call destructor directly since custom free function is used.
+    enc->~JxlEncoder();
+    jxl::MemoryManagerFree(&local_memory_manager, enc);
+  }
+}
+
+JxlEncoderError JxlEncoderGetError(JxlEncoder* enc) { return enc->error; }
+
+JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+                                        JXL_BOOL use_container) {
+  if (enc->wrote_bytes) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "this setting can only be set at the beginning");
+  }
+  enc->use_container = static_cast<bool>(use_container);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderStoreJPEGMetadata(JxlEncoder* enc,
+                                             JXL_BOOL store_jpeg_metadata) {
+  if (enc->wrote_bytes) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "this setting can only be set at the beginning");
+  }
+  enc->store_jpeg_metadata = static_cast<bool>(store_jpeg_metadata);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc, int level) {
+  if (level != -1 && level != 5 && level != 10) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_NOT_SUPPORTED, "invalid level");
+  }
+  if (enc->wrote_bytes) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "this setting can only be set at the beginning");
+  }
+  enc->codestream_level = level;
+  return JXL_ENC_SUCCESS;
+}
+
+int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc) {
+  return VerifyLevelSettings(enc, nullptr);
+}
+
+void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms) {
+  jxl::msan::MemoryIsInitialized(&cms, sizeof(cms));
+  enc->cms = cms;
+  enc->cms_set = true;
+}
+
+JxlEncoderStatus JxlEncoderSetParallelRunner(JxlEncoder* enc,
+                                             JxlParallelRunner parallel_runner,
+                                             void* parallel_runner_opaque) {
+  if (enc->thread_pool) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "parallel runner already set");
+  }
+  enc->thread_pool = jxl::MemoryManagerMakeUnique<jxl::ThreadPool>(
+      &enc->memory_manager, parallel_runner, parallel_runner_opaque);
+  if (!enc->thread_pool) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC,
+                         "error setting parallel runner");
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+namespace {
+JxlEncoderStatus GetCurrentDimensions(
+    const JxlEncoderFrameSettings* frame_settings, size_t& xsize,
+    size_t& ysize) {
+  xsize = frame_settings->enc->metadata.xsize();
+  ysize = frame_settings->enc->metadata.ysize();
+  if (frame_settings->values.header.layer_info.have_crop) {
+    xsize = frame_settings->values.header.layer_info.xsize;
+    ysize = frame_settings->values.header.layer_info.ysize;
+  }
+  if (frame_settings->values.cparams.already_downsampled) {
+    size_t factor = frame_settings->values.cparams.resampling;
+    xsize = jxl::DivCeil(xsize, factor);
+    ysize = jxl::DivCeil(ysize, factor);
+  }
+  if (xsize == 0 || ysize == 0) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "zero-sized frame is not allowed");
+  }
+  return JXL_ENC_SUCCESS;
+}
+}  // namespace
+
+JxlEncoderStatus JxlEncoderAddJPEGFrame(
+    const JxlEncoderFrameSettings* frame_settings, const uint8_t* buffer,
+    size_t size) {
+  if (frame_settings->enc->frames_closed) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Frame input is already closed");
+  }
+
+  jxl::CodecInOut io;
+  if (!jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(buffer, size), &io)) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT,
+                         "Error during decode of input JPEG");
+  }
+
+  if (!frame_settings->enc->color_encoding_set) {
+    if (!SetColorEncodingFromJpegData(
+            *io.Main().jpeg_data,
+            &frame_settings->enc->metadata.m.color_encoding)) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT,
+                           "Error in input JPEG color space");
+    }
+  }
+
+  if (!frame_settings->enc->basic_info_set) {
+    JxlBasicInfo basic_info;
+    JxlEncoderInitBasicInfo(&basic_info);
+    basic_info.xsize = io.Main().jpeg_data->width;
+    basic_info.ysize = io.Main().jpeg_data->height;
+    basic_info.uses_original_profile = true;
+    if (JxlEncoderSetBasicInfo(frame_settings->enc, &basic_info) !=
+        JXL_ENC_SUCCESS) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                           "Error setting basic info");
+    }
+  }
+
+  if (frame_settings->enc->metadata.m.xyb_encoded) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Can't XYB encode a lossless JPEG");
+  }
+  if (!io.blobs.exif.empty()) {
+    JxlOrientation orientation = static_cast<JxlOrientation>(
+        frame_settings->enc->metadata.m.orientation);
+    jxl::InterpretExif(io.blobs.exif, &orientation);
+    frame_settings->enc->metadata.m.orientation = orientation;
+  }
+  if (!io.blobs.exif.empty() && frame_settings->values.cparams.jpeg_keep_exif) {
+    size_t exif_size = io.blobs.exif.size();
+    // Exif data in JPEG is limited to 64k
+    if (exif_size > 0xFFFF) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                           "Exif larger than possible in JPEG?");
+    }
+    exif_size += 4;  // prefix 4 zero bytes for tiff offset
+    std::vector<uint8_t> exif(exif_size);
+    memcpy(exif.data() + 4, io.blobs.exif.data(), io.blobs.exif.size());
+    JxlEncoderUseBoxes(frame_settings->enc);
+    JxlEncoderAddBox(frame_settings->enc, "Exif", exif.data(), exif_size,
+                     frame_settings->values.cparams.jpeg_compress_boxes);
+  }
+  if (!io.blobs.xmp.empty() && frame_settings->values.cparams.jpeg_keep_xmp) {
+    JxlEncoderUseBoxes(frame_settings->enc);
+    JxlEncoderAddBox(frame_settings->enc, "xml ", io.blobs.xmp.data(),
+                     io.blobs.xmp.size(),
+                     frame_settings->values.cparams.jpeg_compress_boxes);
+  }
+  if (!io.blobs.jumbf.empty() &&
+      frame_settings->values.cparams.jpeg_keep_jumbf) {
+    JxlEncoderUseBoxes(frame_settings->enc);
+    JxlEncoderAddBox(frame_settings->enc, "jumb", io.blobs.jumbf.data(),
+                     io.blobs.jumbf.size(),
+                     frame_settings->values.cparams.jpeg_compress_boxes);
+  }
+  if (frame_settings->enc->store_jpeg_metadata) {
+    if (!frame_settings->values.cparams.jpeg_keep_exif ||
+        !frame_settings->values.cparams.jpeg_keep_xmp) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                           "Need to preserve EXIF and XMP to allow JPEG "
+                           "bitstream reconstruction");
+    }
+    jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data;
+    jxl::PaddedBytes jpeg_data;
+    if (!jxl::jpeg::EncodeJPEGData(data_in, &jpeg_data,
+                                   frame_settings->values.cparams)) {
+      return JXL_API_ERROR(
+          frame_settings->enc, JXL_ENC_ERR_JBRD,
+          "JPEG bitstream reconstruction data cannot be encoded");
+    }
+    frame_settings->enc->jpeg_metadata = std::vector<uint8_t>(
+        jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &frame_settings->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{
+          frame_settings->values,
+          jxl::ImageBundle(&frame_settings->enc->metadata.m),
+          {}});
+  if (!queued_frame) {
+    // TODO(jon): when can this happen? is this an API usage error?
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "No frame queued?");
+  }
+  queued_frame->frame.SetFromImage(std::move(*io.Main().color()),
+                                   io.Main().c_current());
+  size_t xsize, ysize;
+  if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "bad dimensions");
+  }
+  if (xsize != static_cast<size_t>(io.Main().jpeg_data->width) ||
+      ysize != static_cast<size_t>(io.Main().jpeg_data->height)) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "JPEG dimensions don't match frame dimensions");
+  }
+  std::vector<jxl::ImageF> extra_channels(
+      frame_settings->enc->metadata.m.num_extra_channels);
+  for (auto& extra_channel : extra_channels) {
+    extra_channel = jxl::ImageF(xsize, ysize);
+    queued_frame->ec_initialized.push_back(0);
+  }
+  queued_frame->frame.SetExtraChannels(std::move(extra_channels));
+  queued_frame->frame.jpeg_data = std::move(io.Main().jpeg_data);
+  queued_frame->frame.color_transform = io.Main().color_transform;
+  queued_frame->frame.chroma_subsampling = io.Main().chroma_subsampling;
+
+  QueueFrame(frame_settings, queued_frame);
+  return JXL_ENC_SUCCESS;
+}
+
+static bool CanDoFastLossless(const JxlEncoderFrameSettings* frame_settings,
+                              const JxlPixelFormat* pixel_format,
+                              bool has_alpha) {
+  if (!frame_settings->values.lossless) {
+    return false;
+  }
+  // TODO(veluca): many of the following options could be made to work, but are
+  // just not implemented in FJXL's frame header handling yet.
+  if (frame_settings->values.frame_index_box) {
+    return false;
+  }
+  if (frame_settings->values.header.layer_info.have_crop) {
+    return false;
+  }
+  if (frame_settings->enc->metadata.m.have_animation) {
+    return false;
+  }
+  if (frame_settings->values.cparams.speed_tier != jxl::SpeedTier::kLightning) {
+    return false;
+  }
+  if (frame_settings->values.image_bit_depth.type ==
+          JxlBitDepthType::JXL_BIT_DEPTH_CUSTOM &&
+      frame_settings->values.image_bit_depth.bits_per_sample !=
+          frame_settings->enc->metadata.m.bit_depth.bits_per_sample) {
+    return false;
+  }
+  // TODO(veluca): implement support for LSB-padded input in fast_lossless.
+  if (frame_settings->values.image_bit_depth.type ==
+          JxlBitDepthType::JXL_BIT_DEPTH_FROM_PIXEL_FORMAT &&
+      frame_settings->values.image_bit_depth.bits_per_sample % 8 != 0) {
+    return false;
+  }
+  if (!frame_settings->values.frame_name.empty()) {
+    return false;
+  }
+  // No extra channels other than alpha.
+  if (!(has_alpha && frame_settings->enc->metadata.m.num_extra_channels == 1) &&
+      frame_settings->enc->metadata.m.num_extra_channels != 0) {
+    return false;
+  }
+  if (frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 16) {
+    return false;
+  }
+  if (pixel_format->data_type != JxlDataType::JXL_TYPE_FLOAT16 &&
+      pixel_format->data_type != JxlDataType::JXL_TYPE_UINT16 &&
+      pixel_format->data_type != JxlDataType::JXL_TYPE_UINT8) {
+    return false;
+  }
+  if ((frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 8) !=
+      (pixel_format->data_type == JxlDataType::JXL_TYPE_UINT16 ||
+       pixel_format->data_type == JxlDataType::JXL_TYPE_FLOAT16)) {
+    return false;
+  }
+  if (!((pixel_format->num_channels == 1 || pixel_format->num_channels == 3) &&
+        !has_alpha) &&
+      !((pixel_format->num_channels == 2 || pixel_format->num_channels == 4) &&
+        has_alpha)) {
+    return false;
+  }
+
+  return true;
+}
+
+JxlEncoderStatus JxlEncoderAddImageFrame(
+    const JxlEncoderFrameSettings* frame_settings,
+    const JxlPixelFormat* pixel_format, const void* buffer, size_t size) {
+  if (!frame_settings->enc->basic_info_set ||
+      (!frame_settings->enc->color_encoding_set &&
+       !frame_settings->enc->metadata.m.xyb_encoded)) {
+    // Basic Info must be set, and color encoding must be set directly,
+    // or set to XYB via JxlBasicInfo.uses_original_profile = JXL_FALSE
+    // Otherwise, this is an API misuse.
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Basic info or color encoding not set yet");
+  }
+
+  if (frame_settings->enc->frames_closed) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Frame input already closed");
+  }
+  if (pixel_format->num_channels < 3) {
+    if (frame_settings->enc->basic_info.num_color_channels != 1) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                           "Grayscale pixel format input for an RGB image");
+    }
+  } else {
+    if (frame_settings->enc->basic_info.num_color_channels != 3) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                           "RGB pixel format input for a grayscale image");
+    }
+  }
+
+  bool has_alpha = frame_settings->enc->metadata.m.HasAlpha();
+
+  size_t xsize, ysize;
+  if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "bad dimensions");
+  }
+
+  // All required conditions to do fast-lossless.
+  if (CanDoFastLossless(frame_settings, pixel_format, has_alpha)) {
+    const size_t bytes_per_pixel =
+        pixel_format->data_type == JxlDataType::JXL_TYPE_UINT8
+            ? pixel_format->num_channels
+            : pixel_format->num_channels * 2;
+    const size_t last_row_size = xsize * bytes_per_pixel;
+    const size_t align = pixel_format->align;
+    const size_t row_size =
+        (align > 1 ? jxl::DivCeil(last_row_size, align) * align
+                   : last_row_size);
+    const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
+    if (bytes_to_read > size) {
+      return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                           "provided image buffer too small");
+    }
+    const bool big_endian =
+        pixel_format->endianness == JXL_BIG_ENDIAN ||
+        (pixel_format->endianness == JXL_NATIVE_ENDIAN && !IsLittleEndian());
+
+    auto runner = +[](void* void_pool, void* opaque, void fun(void*, size_t),
+                      size_t count) {
+      auto* pool = reinterpret_cast<jxl::ThreadPool*>(void_pool);
+      JXL_CHECK(jxl::RunOnPool(
+          pool, 0, count, jxl::ThreadPool::NoInit,
+          [&](size_t i, size_t) { fun(opaque, i); }, "Encode fast lossless"));
+    };
+    QueueFastLosslessFrame(
+        frame_settings,
+        JxlFastLosslessPrepareFrame(
+            reinterpret_cast<const unsigned char*>(buffer), xsize, row_size,
+            ysize, pixel_format->num_channels,
+            frame_settings->enc->metadata.m.bit_depth.bits_per_sample,
+            big_endian, /*effort=*/2, frame_settings->enc->thread_pool.get(),
+            runner));
+    return JXL_ENC_SUCCESS;
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &frame_settings->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{
+          frame_settings->values,
+          jxl::ImageBundle(&frame_settings->enc->metadata.m),
+          {}});
+
+  if (!queued_frame) {
+    // TODO(jon): when can this happen? is this an API usage error?
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "No frame queued?");
+  }
+
+  jxl::ColorEncoding c_current;
+  if (!frame_settings->enc->color_encoding_set) {
+    if ((pixel_format->data_type == JXL_TYPE_FLOAT) ||
+        (pixel_format->data_type == JXL_TYPE_FLOAT16)) {
+      c_current =
+          jxl::ColorEncoding::LinearSRGB(pixel_format->num_channels < 3);
+    } else {
+      c_current = jxl::ColorEncoding::SRGB(pixel_format->num_channels < 3);
+    }
+  } else {
+    c_current = frame_settings->enc->metadata.m.color_encoding;
+  }
+  uint32_t num_channels = pixel_format->num_channels;
+  size_t has_interleaved_alpha =
+      static_cast<size_t>(num_channels == 2 || num_channels == 4);
+  if (has_interleaved_alpha >
+      frame_settings->enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(
+        frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+        "number of extra channels mismatch (need 1 extra channel for alpha)");
+  }
+  std::vector<jxl::ImageF> extra_channels(
+      frame_settings->enc->metadata.m.num_extra_channels);
+  for (auto& extra_channel : extra_channels) {
+    extra_channel = jxl::ImageF(xsize, ysize);
+  }
+  queued_frame->frame.SetExtraChannels(std::move(extra_channels));
+  for (auto& ec_info : frame_settings->enc->metadata.m.extra_channel_info) {
+    if (has_interleaved_alpha && ec_info.type == jxl::ExtraChannel::kAlpha) {
+      queued_frame->ec_initialized.push_back(1);
+      has_interleaved_alpha = 0;  // only first Alpha is initialized
+    } else {
+      queued_frame->ec_initialized.push_back(0);
+    }
+  }
+  queued_frame->frame.origin.x0 =
+      frame_settings->values.header.layer_info.crop_x0;
+  queued_frame->frame.origin.y0 =
+      frame_settings->values.header.layer_info.crop_y0;
+  queued_frame->frame.use_for_next_frame =
+      (frame_settings->values.header.layer_info.save_as_reference != 0u);
+  queued_frame->frame.blendmode =
+      frame_settings->values.header.layer_info.blend_info.blendmode ==
+              JXL_BLEND_REPLACE
+          ? jxl::BlendMode::kReplace
+          : jxl::BlendMode::kBlend;
+  queued_frame->frame.blend =
+      frame_settings->values.header.layer_info.blend_info.source > 0;
+
+  if (JXL_ENC_SUCCESS !=
+      VerifyInputBitDepth(frame_settings->values.image_bit_depth,
+                          *pixel_format)) {
+    return JXL_API_ERROR_NOSET("Invalid input bit depth");
+  }
+  size_t bits_per_sample =
+      GetBitDepth(frame_settings->values.image_bit_depth,
+                  frame_settings->enc->metadata.m, *pixel_format);
+  const uint8_t* uint8_buffer = reinterpret_cast<const uint8_t*>(buffer);
+  if (!jxl::ConvertFromExternal(
+          jxl::Span<const uint8_t>(uint8_buffer, size), xsize, ysize, c_current,
+          bits_per_sample, *pixel_format,
+          frame_settings->enc->thread_pool.get(), &(queued_frame->frame))) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid input buffer");
+  }
+  if (frame_settings->values.lossless &&
+      frame_settings->enc->metadata.m.xyb_encoded) {
+    return JXL_API_ERROR(
+        frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+        "Set uses_original_profile=true for lossless encoding");
+  }
+  queued_frame->option_values.cparams.level =
+      frame_settings->enc->codestream_level;
+
+  QueueFrame(frame_settings, queued_frame);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc) {
+  if (enc->wrote_bytes) {
+    return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                         "this setting can only be set at the beginning");
+  }
+  enc->use_boxes = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc, const JxlBoxType type,
+                                  const uint8_t* contents, size_t size,
+                                  JXL_BOOL compress_box) {
+  if (!enc->use_boxes) {
+    return JXL_API_ERROR(
+        enc, JXL_ENC_ERR_API_USAGE,
+        "must set JxlEncoderUseBoxes at the beginning to add boxes");
+  }
+  if (compress_box) {
+    if (memcmp("jxl", type, 3) == 0) {
+      return JXL_API_ERROR(
+          enc, JXL_ENC_ERR_API_USAGE,
+          "brob box may not contain a type starting with \"jxl\"");
+    }
+    if (memcmp("jbrd", type, 4) == 0) {
+      return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                           "jbrd box may not be brob compressed");
+    }
+    if (memcmp("brob", type, 4) == 0) {
+      // The compress_box will compress an existing non-brob box into a brob
+      // box. If already giving a valid brotli-compressed brob box, set
+      // compress_box to false since it is already compressed.
+      return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+                           "a brob box cannot contain another brob box");
+    }
+  }
+
+  auto box = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedBox>(
+      &enc->memory_manager);
+
+  box->type = jxl::MakeBoxType(type);
+  box->contents.assign(contents, contents + size);
+  box->compress_box = !!compress_box;
+  QueueBox(enc, box);
+  return JXL_ENC_SUCCESS;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer(
+    const JxlEncoderFrameSettings* frame_settings,
+    const JxlPixelFormat* pixel_format, const void* buffer, size_t size,
+    uint32_t index) {
+  if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for the index of extra channel");
+  }
+  if (!frame_settings->enc->basic_info_set ||
+      !frame_settings->enc->color_encoding_set) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Basic info has to be set first");
+  }
+  if (frame_settings->enc->input_queue.empty()) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "First add image frame, then extra channels");
+  }
+  if (frame_settings->enc->frames_closed) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Frame input already closed");
+  }
+  size_t xsize, ysize;
+  if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+                         "bad dimensions");
+  }
+  JxlPixelFormat ec_format = *pixel_format;
+  ec_format.num_channels = 1;
+  if (JXL_ENC_SUCCESS !=
+      VerifyInputBitDepth(frame_settings->values.image_bit_depth, ec_format)) {
+    return JXL_API_ERROR_NOSET("Invalid input bit depth");
+  }
+  size_t bits_per_sample = GetBitDepth(
+      frame_settings->values.image_bit_depth,
+      frame_settings->enc->metadata.m.extra_channel_info[index], ec_format);
+  const uint8_t* uint8_buffer = reinterpret_cast<const uint8_t*>(buffer);
+  auto queued_frame = frame_settings->enc->input_queue.back().frame.get();
+  if (!jxl::ConvertFromExternal(jxl::Span<const uint8_t>(uint8_buffer, size),
+                                xsize, ysize, bits_per_sample, ec_format, 0,
+                                frame_settings->enc->thread_pool.get(),
+                                &queued_frame->frame.extra_channels()[index])) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Failed to set buffer for extra channel");
+  }
+  queued_frame->ec_initialized[index] = 1;
+
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderCloseFrames(JxlEncoder* enc) { enc->frames_closed = true; }
+
+void JxlEncoderCloseBoxes(JxlEncoder* enc) { enc->boxes_closed = true; }
+
+void JxlEncoderCloseInput(JxlEncoder* enc) {
+  JxlEncoderCloseFrames(enc);
+  JxlEncoderCloseBoxes(enc);
+}
+JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, uint8_t** next_out,
+                                         size_t* avail_out) {
+  while (*avail_out >= 32 &&
+         (!enc->output_byte_queue.empty() ||
+          !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty())) {
+    if (!enc->output_byte_queue.empty()) {
+      size_t to_copy = std::min(*avail_out, enc->output_byte_queue.size());
+      std::copy_n(enc->output_byte_queue.begin(), to_copy, *next_out);
+      *next_out += to_copy;
+      *avail_out -= to_copy;
+      enc->output_byte_queue.erase(enc->output_byte_queue.begin(),
+                                   enc->output_byte_queue.begin() + to_copy);
+    } else if (!enc->output_fast_frame_queue.empty()) {
+      size_t count = JxlFastLosslessWriteOutput(
+          enc->output_fast_frame_queue.front().get(), *next_out, *avail_out);
+      *next_out += count;
+      *avail_out -= count;
+      if (count == 0) {
+        enc->output_fast_frame_queue.pop_front();
+      }
+
+    } else if (!enc->input_queue.empty()) {
+      if (enc->RefillOutputByteQueue() != JXL_ENC_SUCCESS) {
+        return JXL_ENC_ERROR;
+      }
+    }
+  }
+
+  if (!enc->output_byte_queue.empty() ||
+      !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty()) {
+    return JXL_ENC_NEED_MORE_OUTPUT;
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameHeader(
+    JxlEncoderFrameSettings* frame_settings,
+    const JxlFrameHeader* frame_header) {
+  if (frame_header->layer_info.blend_info.source > 3) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "invalid blending source index");
+  }
+  // If there are no extra channels, it's ok for the value to be 0.
+  if (frame_header->layer_info.blend_info.alpha != 0 &&
+      frame_header->layer_info.blend_info.alpha >=
+          frame_settings->enc->metadata.m.extra_channel_info.size()) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "alpha blend channel index out of bounds");
+  }
+
+  frame_settings->values.header = *frame_header;
+  // Setting the frame header resets the frame name, it must be set again with
+  // JxlEncoderSetFrameName if desired.
+  frame_settings->values.frame_name = "";
+
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo(
+    JxlEncoderFrameSettings* frame_settings, size_t index,
+    const JxlBlendInfo* blend_info) {
+  if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "Invalid value for the index of extra channel");
+  }
+
+  if (frame_settings->values.extra_channel_blend_info.size() !=
+      frame_settings->enc->metadata.m.num_extra_channels) {
+    JxlBlendInfo default_blend_info;
+    JxlEncoderInitBlendInfo(&default_blend_info);
+    frame_settings->values.extra_channel_blend_info.resize(
+        frame_settings->enc->metadata.m.num_extra_channels, default_blend_info);
+  }
+  frame_settings->values.extra_channel_blend_info[index] = *blend_info;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameName(JxlEncoderFrameSettings* frame_settings,
+                                        const char* frame_name) {
+  std::string str = frame_name ? frame_name : "";
+  if (str.size() > 1071) {
+    return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+                         "frame name can be max 1071 bytes long");
+  }
+  frame_settings->values.frame_name = str;
+  frame_settings->values.header.name_length = str.size();
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameBitDepth(
+    JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth) {
+  if (bit_depth->type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT &&
+      bit_depth->type != JXL_BIT_DEPTH_FROM_CODESTREAM) {
+    return JXL_API_ERROR_NOSET(
+        "Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT and "
+        "JXL_BIT_DEPTH_FROM_CODESTREAM is implemented "
+        "for input buffers.");
+  }
+  frame_settings->values.image_bit_depth = *bit_depth;
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+                               JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(jxl::ColorEncoding::SRGB(is_gray),
+                                         color_encoding);
+}
+
+void JxlColorEncodingSetToLinearSRGB(JxlColorEncoding* color_encoding,
+                                     JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(
+      jxl::ColorEncoding::LinearSRGB(is_gray), color_encoding);
+}
+
+void JxlEncoderAllowExpertOptions(JxlEncoder* enc) {
+  enc->allow_expert_options = true;
+}
+
+JXL_EXPORT void JxlEncoderSetDebugImageCallback(
+    JxlEncoderFrameSettings* frame_settings, JxlDebugImageCallback callback,
+    void* opaque) {
+  frame_settings->values.cparams.debug_image = callback;
+  frame_settings->values.cparams.debug_image_opaque = opaque;
+}
+
+JXL_EXPORT JxlEncoderStats* JxlEncoderStatsCreate() {
+  return new JxlEncoderStats();
+}
+
+JXL_EXPORT void JxlEncoderStatsDestroy(JxlEncoderStats* stats) {
+  if (stats) delete stats;
+}
+
+JXL_EXPORT void JxlEncoderCollectStats(JxlEncoderFrameSettings* frame_settings,
+                                       JxlEncoderStats* stats) {
+  if (!stats) return;
+  frame_settings->values.aux_out = &stats->aux_out;
+}
+
+JXL_EXPORT size_t JxlEncoderStatsGet(const JxlEncoderStats* stats,
+                                     JxlEncoderStatsKey key) {
+  if (!stats) return 0;
+  const jxl::AuxOut& aux_out = stats->aux_out;
+  switch (key) {
+    case JXL_ENC_STAT_HEADER_BITS:
+      return aux_out.layers[jxl::kLayerHeader].total_bits;
+    case JXL_ENC_STAT_TOC_BITS:
+      return aux_out.layers[jxl::kLayerTOC].total_bits;
+    case JXL_ENC_STAT_DICTIONARY_BITS:
+      return aux_out.layers[jxl::kLayerDictionary].total_bits;
+    case JXL_ENC_STAT_SPLINES_BITS:
+      return aux_out.layers[jxl::kLayerSplines].total_bits;
+    case JXL_ENC_STAT_NOISE_BITS:
+      return aux_out.layers[jxl::kLayerNoise].total_bits;
+    case JXL_ENC_STAT_QUANT_BITS:
+      return aux_out.layers[jxl::kLayerQuant].total_bits;
+    case JXL_ENC_STAT_MODULAR_TREE_BITS:
+      return aux_out.layers[jxl::kLayerModularTree].total_bits;
+    case JXL_ENC_STAT_MODULAR_GLOBAL_BITS:
+      return aux_out.layers[jxl::kLayerModularGlobal].total_bits;
+    case JXL_ENC_STAT_DC_BITS:
+      return aux_out.layers[jxl::kLayerDC].total_bits;
+    case JXL_ENC_STAT_MODULAR_DC_GROUP_BITS:
+      return aux_out.layers[jxl::kLayerModularDcGroup].total_bits;
+    case JXL_ENC_STAT_CONTROL_FIELDS_BITS:
+      return aux_out.layers[jxl::kLayerControlFields].total_bits;
+    case JXL_ENC_STAT_COEF_ORDER_BITS:
+      return aux_out.layers[jxl::kLayerOrder].total_bits;
+    case JXL_ENC_STAT_AC_HISTOGRAM_BITS:
+      return aux_out.layers[jxl::kLayerAC].total_bits;
+    case JXL_ENC_STAT_AC_BITS:
+      return aux_out.layers[jxl::kLayerACTokens].total_bits;
+    case JXL_ENC_STAT_MODULAR_AC_GROUP_BITS:
+      return aux_out.layers[jxl::kLayerModularAcGroup].total_bits;
+    case JXL_ENC_STAT_NUM_SMALL_BLOCKS:
+      return aux_out.num_small_blocks;
+    case JXL_ENC_STAT_NUM_DCT4X8_BLOCKS:
+      return aux_out.num_dct4x8_blocks;
+    case JXL_ENC_STAT_NUM_AFV_BLOCKS:
+      return aux_out.num_afv_blocks;
+    case JXL_ENC_STAT_NUM_DCT8_BLOCKS:
+      return aux_out.num_dct8_blocks;
+    case JXL_ENC_STAT_NUM_DCT8X32_BLOCKS:
+      return aux_out.num_dct16_blocks;
+    case JXL_ENC_STAT_NUM_DCT16_BLOCKS:
+      return aux_out.num_dct16x32_blocks;
+    case JXL_ENC_STAT_NUM_DCT16X32_BLOCKS:
+      return aux_out.num_dct32_blocks;
+    case JXL_ENC_STAT_NUM_DCT32_BLOCKS:
+      return aux_out.num_dct32x64_blocks;
+    case JXL_ENC_STAT_NUM_DCT32X64_BLOCKS:
+      return aux_out.num_dct32x64_blocks;
+    case JXL_ENC_STAT_NUM_DCT64_BLOCKS:
+      return aux_out.num_dct64_blocks;
+    case JXL_ENC_STAT_NUM_BUTTERAUGLI_ITERS:
+      return aux_out.num_butteraugli_iters;
+    default:
+      return 0;
+  }
+}
+
+JXL_EXPORT void JxlEncoderStatsMerge(JxlEncoderStats* stats,
+                                     const JxlEncoderStats* other) {
+  if (!stats || !other) return;
+  stats->aux_out.Assimilate(other->aux_out);
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/encode_internal.h b/third-party/libjxl/libjxl/lib/jxl/encode_internal.h
new file mode 100644
index 0000000000..62096345d7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/encode_internal.h
@@ -0,0 +1,282 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+#ifndef LIB_JXL_ENCODE_INTERNAL_H_
+#define LIB_JXL_ENCODE_INTERNAL_H_
+
+#include <jxl/encode.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+
+#include <deque>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/memory_manager_internal.h"
+
+namespace jxl {
+
+/* Frame index box 'jxli' will start with Varint() for
+NF: has type Varint(): number of frames listed in the index.
+TNUM: has type u32: numerator of tick unit.
+TDEN: has type u32: denominator of tick unit. Value 0 means the file is
+ill-formed. per frame i listed: OFFi: has type Varint(): offset of start byte of
+this frame compared to start byte of previous frame from this index in the JPEG
+XL codestream. For the first frame, this is the offset from the first byte of
+the JPEG XL codestream. Ti: has type Varint(): duration in ticks between the
+start of this frame and the start of the next frame in the index. If this is the
+last frame in the index, this is the duration in ticks between the start of this
+frame and the end of the stream. A tick lasts TNUM / TDEN seconds. Fi: has type
+Varint(): amount of frames the next frame in the index occurs after this frame.
+If this is the last frame in the index, this is the amount of frames after this
+frame in the remainder of the stream. Only frames that are presented by the
+decoder are counted for this purpose, this excludes frames that are not intended
+for display but for compositing with other frames, such as frames that aren't
+the last frame with a duration of 0 ticks.
+
+All the frames listed in jxli are keyframes and the first frame is
+present in the list.
+There shall be either zero or one Frame Index boxes in a JPEG XL file.
+The offsets OFFi per frame are given as bytes in the codestream, not as
+bytes in the file format using the box structure. This means if JPEG XL Partial
+Codestream boxes are used, the offset is counted within the concatenated
+codestream, bytes from box headers or non-codestream boxes are not counted.
+*/
+
+typedef struct JxlEncoderFrameIndexBoxEntryStruct {
+  bool to_be_indexed;
+  uint32_t duration;
+  uint64_t OFFi;
+} JxlEncoderFrameIndexBoxEntry;
+
+typedef struct JxlEncoderFrameIndexBoxStruct {
+  // We always need to record the first frame entry, so presence of the
+  // first entry alone is not an indication if it was requested to be
+  // stored.
+  bool index_box_requested_through_api = false;
+
+  int64_t NF() const { return entries.size(); }
+  bool StoreFrameIndexBox() {
+    for (auto e : entries) {
+      if (e.to_be_indexed) {
+        return true;
+      }
+    }
+    return false;
+  }
+  int32_t TNUM = 1;
+  int32_t TDEN = 1000;
+
+  std::vector<JxlEncoderFrameIndexBoxEntry> entries;
+
+  // That way we can ensure that every index box will have the first frame.
+  // If the API user decides to mark it as an indexed frame, we call
+  // the AddFrame again, this time with requested.
+  void AddFrame(uint64_t OFFi, uint32_t duration, bool to_be_indexed) {
+    // We call AddFrame to every frame.
+    // Recording the first frame is required by the standard.
+    // Knowing the last frame is required, since the last indexed frame
+    // needs to know how many frames until the end.
+    // To be able to tell how many frames there are between each index
+    // entry we just record every frame here.
+    if (entries.size() == 1) {
+      if (OFFi == entries[0].OFFi) {
+        // API use for the first frame, let's clear the already recorded first
+        // frame.
+        entries.clear();
+      }
+    }
+    JxlEncoderFrameIndexBoxEntry e;
+    e.to_be_indexed = to_be_indexed;
+    e.OFFi = OFFi;
+    e.duration = duration;
+    entries.push_back(e);
+  }
+} JxlEncoderFrameIndexBox;
+
+// The encoder options (such as quality, compression speed, ...) for a single
+// frame, but not encoder-wide options such as box-related options.
+typedef struct JxlEncoderFrameSettingsValuesStruct {
+  // lossless is a separate setting from cparams because it is a combination
+  // setting that overrides multiple settings inside of cparams.
+  bool lossless;
+  CompressParams cparams;
+  JxlFrameHeader header;
+  std::vector<JxlBlendInfo> extra_channel_blend_info;
+  std::string frame_name;
+  JxlBitDepth image_bit_depth;
+  bool frame_index_box = false;
+  jxl::AuxOut* aux_out = nullptr;
+} JxlEncoderFrameSettingsValues;
+
+typedef std::array<uint8_t, 4> BoxType;
+
+// Utility function that makes a BoxType from a string literal. The string must
+// have 4 characters, a 5th null termination character is optional.
+constexpr BoxType MakeBoxType(const char* type) {
+  return BoxType(
+      {{static_cast<uint8_t>(type[0]), static_cast<uint8_t>(type[1]),
+        static_cast<uint8_t>(type[2]), static_cast<uint8_t>(type[3])}});
+}
+
+constexpr unsigned char kContainerHeader[] = {
+    0,   0,   0, 0xc, 'J',  'X', 'L', ' ', 0xd, 0xa, 0x87,
+    0xa, 0,   0, 0,   0x14, 'f', 't', 'y', 'p', 'j', 'x',
+    'l', ' ', 0, 0,   0,    0,   'j', 'x', 'l', ' '};
+
+constexpr unsigned char kLevelBoxHeader[] = {0, 0, 0, 0x9, 'j', 'x', 'l', 'l'};
+
+struct JxlEncoderQueuedFrame {
+  JxlEncoderFrameSettingsValues option_values;
+  ImageBundle frame;
+  std::vector<uint8_t> ec_initialized;
+};
+
+struct JxlEncoderQueuedBox {
+  BoxType type;
+  std::vector<uint8_t> contents;
+  bool compress_box;
+};
+
+using FJXLFrameUniquePtr =
+    std::unique_ptr<JxlFastLosslessFrameState,
+                    decltype(&JxlFastLosslessFreeFrameState)>;
+
+// Either a frame, or a box, not both.
+// Can also be a FJXL frame.
+struct JxlEncoderQueuedInput {
+  explicit JxlEncoderQueuedInput(const JxlMemoryManager& memory_manager)
+      : frame(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)),
+        box(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)) {}
+  MemoryManagerUniquePtr<JxlEncoderQueuedFrame> frame;
+  MemoryManagerUniquePtr<JxlEncoderQueuedBox> box;
+  FJXLFrameUniquePtr fast_lossless_frame = {nullptr,
+                                            JxlFastLosslessFreeFrameState};
+};
+
+// Appends a JXL container box header with given type, size, and unbounded
+// properties to output.
+template <typename T>
+void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded,
+                     T* output) {
+  uint64_t box_size = 0;
+  bool large_size = false;
+  if (!unbounded) {
+    box_size = size + 8;
+    if (box_size >= 0x100000000ull) {
+      large_size = true;
+    }
+  }
+
+  {
+    const uint64_t store = large_size ? 1 : box_size;
+    for (size_t i = 0; i < 4; i++) {
+      output->push_back(store >> (8 * (3 - i)) & 0xff);
+    }
+  }
+  for (size_t i = 0; i < 4; i++) {
+    output->push_back(type[i]);
+  }
+
+  if (large_size) {
+    for (size_t i = 0; i < 8; i++) {
+      output->push_back(box_size >> (8 * (7 - i)) & 0xff);
+    }
+  }
+}
+
+}  // namespace jxl
+
+// Internal use only struct, can only be initialized correctly by
+// JxlEncoderCreate.
+struct JxlEncoderStruct {
+  JxlEncoderError error = JxlEncoderError::JXL_ENC_ERR_OK;
+  JxlMemoryManager memory_manager;
+  jxl::MemoryManagerUniquePtr<jxl::ThreadPool> thread_pool{
+      nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)};
+  JxlCmsInterface cms;
+  bool cms_set;
+  std::vector<jxl::MemoryManagerUniquePtr<JxlEncoderFrameSettings>>
+      encoder_options;
+
+  size_t num_queued_frames;
+  size_t num_queued_boxes;
+  std::vector<jxl::JxlEncoderQueuedInput> input_queue;
+  std::deque<uint8_t> output_byte_queue;
+  std::deque<jxl::FJXLFrameUniquePtr> output_fast_frame_queue;
+
+  // How many codestream bytes have been written, i.e.,
+  // content of jxlc and jxlp boxes. Frame index box jxli
+  // requires position indices to point to codestream bytes,
+  // so we need to keep track of the total of flushed or queue
+  // codestream bytes. These bytes may be in a single jxlc box
+  // or across multiple jxlp boxes.
+  size_t codestream_bytes_written_beginning_of_frame;
+  size_t codestream_bytes_written_end_of_frame;
+  jxl::JxlEncoderFrameIndexBox frame_index_box;
+
+  // Force using the container even if not needed
+  bool use_container;
+  // User declared they will add metadata boxes
+  bool use_boxes;
+
+  // TODO(lode): move level into jxl::CompressParams since some C++
+  // implementation decisions should be based on it: level 10 allows more
+  // features to be used.
+  int32_t codestream_level;
+  bool store_jpeg_metadata;
+  jxl::CodecMetadata metadata;
+  std::vector<uint8_t> jpeg_metadata;
+
+  // Wrote any output at all, so wrote the data before the first user added
+  // frame or box, such as signature, basic info, ICC profile or jpeg
+  // reconstruction box.
+  bool wrote_bytes;
+  jxl::CompressParams last_used_cparams;
+  JxlBasicInfo basic_info;
+
+  // Encoder wrote a jxlp (partial codestream) box, so any next codestream
+  // parts must also be written in jxlp boxes, a single jxlc box cannot be
+  // used. The counter is used for the 4-byte jxlp box index header.
+  size_t jxlp_counter;
+
+  bool frames_closed;
+  bool boxes_closed;
+  bool basic_info_set;
+  bool color_encoding_set;
+  bool intensity_target_set;
+  bool allow_expert_options = false;
+  int brotli_effort = -1;
+
+  // Takes the first frame in the input_queue, encodes it, and appends
+  // the bytes to the output_byte_queue.
+  JxlEncoderStatus RefillOutputByteQueue();
+
+  bool MustUseContainer() const {
+    return use_container || (codestream_level != 5 && codestream_level != -1) ||
+           store_jpeg_metadata || use_boxes;
+  }
+
+  // Appends the bytes of a JXL box header with the provided type and size to
+  // the end of the output_byte_queue. If unbounded is true, the size won't be
+  // added to the header and the box will be assumed to continue until EOF.
+  void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded);
+};
+
+struct JxlEncoderFrameSettingsStruct {
+  JxlEncoder* enc;
+  jxl::JxlEncoderFrameSettingsValues values;
+};
+
+struct JxlEncoderStatsStruct {
+  jxl::AuxOut aux_out;
+};
+
+#endif  // LIB_JXL_ENCODE_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/encode_test.cc b/third-party/libjxl/libjxl/lib/jxl/encode_test.cc
new file mode 100644
index 0000000000..8aac853321
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/encode_test.cc
@@ -0,0 +1,1406 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/metrics.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+TEST(EncodeTest, AddFrameAfterCloseInputTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderCloseInput(enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, AddJPEGAfterCloseTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderCloseInput(enc.get());
+
+  const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeColorEncodingTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = true;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeBasicInfoTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, DefaultAllocTest) {
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+  JxlEncoderDestroy(enc);
+}
+
+TEST(EncodeTest, CustomAllocTest) {
+  struct CalledCounters {
+    int allocs = 0;
+    int frees = 0;
+  } counters;
+
+  JxlMemoryManager mm;
+  mm.opaque = &counters;
+  mm.alloc = [](void* opaque, size_t size) {
+    reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+    return malloc(size);
+  };
+  mm.free = [](void* opaque, void* address) {
+    reinterpret_cast<CalledCounters*>(opaque)->frees++;
+    free(address);
+  };
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(&mm);
+    EXPECT_NE(nullptr, enc.get());
+    EXPECT_LE(1, counters.allocs);
+    EXPECT_EQ(0, counters.frees);
+  }
+  EXPECT_LE(1, counters.frees);
+}
+
+TEST(EncodeTest, DefaultParallelRunnerTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetParallelRunner(enc.get(), nullptr, nullptr));
+}
+
+void VerifyFrameEncoding(size_t xsize, size_t ysize, JxlEncoder* enc,
+                         const JxlEncoderFrameSettings* frame_settings,
+                         size_t max_compressed_size,
+                         bool lossy_use_original_profile) {
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  if (frame_settings->values.lossless || lossy_use_original_profile) {
+    basic_info.uses_original_profile = true;
+  } else {
+    basic_info.uses_original_profile = false;
+  }
+  // 16-bit alpha means this requires level 10
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding, true);
+  EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  JxlColorEncodingSetToSRGB(&color_encoding, false);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  pixel_format.num_channels = 1;
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  pixel_format.num_channels = 4;
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_LE(compressed.size(), max_compressed_size);
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+  jxl::CodecInOut decoded_io;
+  EXPECT_TRUE(jxl::test::DecodeFile(
+      {}, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      &decoded_io));
+
+  EXPECT_LE(
+      ComputeDistance2(input_io.Main(), decoded_io.Main(), jxl::GetJxlCms()),
+#if JXL_HIGH_PRECISION
+      1.84);
+#else
+      8.7);
+#endif
+}
+
+void VerifyFrameEncoding(JxlEncoder* enc,
+                         const JxlEncoderFrameSettings* frame_settings) {
+  VerifyFrameEncoding(63, 129, enc, frame_settings, 2700,
+                      /*lossy_use_original_profile=*/false);
+}
+
+TEST(EncodeTest, FrameEncodingTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  VerifyFrameEncoding(enc.get(),
+                      JxlEncoderFrameSettingsCreate(enc.get(), nullptr));
+}
+
+TEST(EncodeTest, EncoderResetTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  VerifyFrameEncoding(50, 200, enc.get(),
+                      JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 4300,
+                      false);
+  // Encoder should become reusable for a new image from scratch after using
+  // reset.
+  JxlEncoderReset(enc.get());
+  VerifyFrameEncoding(157, 77, enc.get(),
+                      JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 2300,
+                      false);
+}
+
+TEST(EncodeTest, CmsTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  bool cms_called = false;
+  JxlCmsInterface cms = jxl::GetJxlCms();
+  struct InitData {
+    void* original_init_data;
+    jpegxl_cms_init_func original_init;
+    bool* cms_called;
+  };
+  InitData init_data = {/*original_init_data=*/cms.init_data,
+                        /*original_init=*/cms.init,
+                        /*cms_called=*/&cms_called};
+  cms.init_data = &init_data;
+  cms.init = +[](void* raw_init_data, size_t num_threads,
+                 size_t pixels_per_thread, const JxlColorProfile* input_profile,
+                 const JxlColorProfile* output_profile,
+                 float intensity_target) {
+    const InitData* init_data = static_cast<const InitData*>(raw_init_data);
+    *init_data->cms_called = true;
+    return init_data->original_init(init_data->original_init_data, num_threads,
+                                    pixels_per_thread, input_profile,
+                                    output_profile, intensity_target);
+  };
+  JxlEncoderSetCms(enc.get(), cms);
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+  JxlEncoderSetFrameLossless(frame_settings, false);
+  ASSERT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderFrameSettingsSetOption(frame_settings,
+                                             JXL_ENC_FRAME_SETTING_EFFORT, 8));
+  VerifyFrameEncoding(enc.get(), frame_settings);
+  EXPECT_TRUE(cms_called);
+}
+
+TEST(EncodeTest, frame_settingsTest) {
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 5));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(jxl::SpeedTier::kHare, enc->last_used_cparams.speed_tier);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    // Lower than currently supported values
+    EXPECT_EQ(JXL_ENC_ERROR,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 0));
+    // Higher than currently supported values
+    EXPECT_EQ(JXL_ENC_ERROR,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 11));
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE));
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3000, false);
+    EXPECT_EQ(true, enc->last_used_cparams.IsLossless());
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetFrameDistance(frame_settings, 0.5));
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3030, false);
+    EXPECT_EQ(0.5, enc->last_used_cparams.butteraugli_distance);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    // Disallowed negative distance
+    EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetFrameDistance(frame_settings, -1));
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, 2));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(2u, enc->last_used_cparams.decoding_speed_tier);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_ERROR,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 100));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 1));
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetOption(
+            frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X, 5));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(true, enc->last_used_cparams.centerfirst);
+    EXPECT_EQ(5, enc->last_used_cparams.center_x);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_RESPONSIVE, 0));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC, -1));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2));
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 2830,
+                        /*lossy_use_original_profile=*/false);
+    EXPECT_EQ(false, enc->last_used_cparams.responsive);
+    EXPECT_EQ(true, enc->last_used_cparams.progressive_mode);
+    EXPECT_EQ(2, enc->last_used_cparams.progressive_dc);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetFloatOption(
+            frame_settings, JXL_ENC_FRAME_SETTING_PHOTON_NOISE, 1777.777));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_NEAR(1777.777f, enc->last_used_cparams.photon_noise_iso, 1E-4);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetFloatOption(
+                  frame_settings,
+                  JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT, 55.0f));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetFloatOption(
+                  frame_settings,
+                  JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, 25.0f));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_PALETTE_COLORS, 70000));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_LOSSY_PALETTE, 1));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_NEAR(55.0f,
+                enc->last_used_cparams.channel_colors_pre_transform_percent,
+                1E-6);
+    EXPECT_NEAR(25.0f, enc->last_used_cparams.channel_colors_percent, 1E-6);
+    EXPECT_EQ(70000, enc->last_used_cparams.palette_colors);
+    EXPECT_EQ(true, enc->last_used_cparams.lossy_palette);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetOption(
+            frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, 30));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, 2));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 14));
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetFloatOption(
+            frame_settings,
+            JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, 77.0f));
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetOption(
+            frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, 7));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(30, enc->last_used_cparams.colorspace);
+    EXPECT_EQ(2, enc->last_used_cparams.modular_group_size_shift);
+    EXPECT_EQ(jxl::Predictor::Best, enc->last_used_cparams.options.predictor);
+    EXPECT_NEAR(0.77f, enc->last_used_cparams.options.nb_repeats, 1E-6);
+    EXPECT_EQ(7, enc->last_used_cparams.options.max_properties);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 0));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(false, enc->last_used_cparams.force_cfl_jpeg_recompression);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 1));
+    VerifyFrameEncoding(enc.get(), frame_settings);
+    EXPECT_EQ(true, enc->last_used_cparams.force_cfl_jpeg_recompression);
+  }
+}
+
+TEST(EncodeTest, LossyEncoderUseOriginalProfileTest) {
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    ASSERT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7897, true);
+  }
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    ASSERT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2));
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 8310, true);
+  }
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    ASSERT_NE(nullptr, enc.get());
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    ASSERT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 8));
+    VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7228, true);
+  }
+}
+
+namespace {
+// Returns a copy of buf from offset to offset+size, or a new zeroed vector if
+// the result would have been out of bounds taking integer overflow into
+// account.
+std::vector<uint8_t> SliceSpan(const jxl::Span<const uint8_t>& buf,
+                               size_t offset, size_t size) {
+  if (offset + size >= buf.size()) {
+    return std::vector<uint8_t>(size, 0);
+  }
+  if (offset + size < offset) {
+    return std::vector<uint8_t>(size, 0);
+  }
+  return std::vector<uint8_t>(buf.data() + offset, buf.data() + offset + size);
+}
+
+struct Box {
+  // The type of the box.
+  // If "uuid", use extended_type instead
+  char type[4] = {0, 0, 0, 0};
+
+  // The extended_type is only used when type == "uuid".
+  // Extended types are not used in JXL. However, the box format itself
+  // supports this so they are handled correctly.
+  char extended_type[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  // Box data.
+  jxl::Span<const uint8_t> data = jxl::Span<const uint8_t>(nullptr, 0);
+
+  // If the size is not given, the datasize extends to the end of the file.
+  // If this field is false, the size field is not encoded when the box is
+  // serialized.
+  bool data_size_given = true;
+
+  // If successful, returns true and sets `in` to be the rest data (if any).
+  // If `in` contains a box with a size larger than `in.size()`, will not
+  // modify `in`, and will return true but the data `Span<uint8_t>` will
+  // remain set to nullptr.
+  // If unsuccessful, returns error and doesn't modify `in`.
+  jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+    // Total box_size including this header itself.
+    uint64_t box_size = LoadBE32(SliceSpan(*in, 0, 4).data());
+    size_t pos = 4;
+
+    memcpy(type, SliceSpan(*in, pos, 4).data(), 4);
+    pos += 4;
+
+    if (box_size == 1) {
+      // If the size is 1, it indicates extended size read from 64-bit integer.
+      box_size = LoadBE64(SliceSpan(*in, pos, 8).data());
+      pos += 8;
+    }
+
+    if (!memcmp("uuid", type, 4)) {
+      memcpy(extended_type, SliceSpan(*in, pos, 16).data(), 16);
+      pos += 16;
+    }
+
+    // This is the end of the box header, the box data begins here. Handle
+    // the data size now.
+    const size_t header_size = pos;
+
+    if (box_size != 0) {
+      if (box_size < header_size) {
+        return JXL_FAILURE("Invalid box size");
+      }
+      if (box_size > in->size()) {
+        // The box is fine, but the input is too short.
+        return true;
+      }
+      data_size_given = true;
+      data = jxl::Span<const uint8_t>(in->data() + header_size,
+                                      box_size - header_size);
+    } else {
+      data_size_given = false;
+      data = jxl::Span<const uint8_t>(in->data() + header_size,
+                                      in->size() - header_size);
+    }
+
+    *in = jxl::Span<const uint8_t>(in->data() + header_size + data.size(),
+                                   in->size() - header_size - data.size());
+    return true;
+  }
+};
+
+struct Container {
+  std::vector<Box> boxes;
+
+  // If successful, returns true and sets `in` to be the rest data (if any).
+  // If unsuccessful, returns error and doesn't modify `in`.
+  jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+    boxes.clear();
+
+    Box signature_box;
+    JXL_RETURN_IF_ERROR(signature_box.Decode(in));
+    if (memcmp("JXL ", signature_box.type, 4) != 0) {
+      return JXL_FAILURE("Invalid magic signature");
+    }
+    if (signature_box.data.size() != 4)
+      return JXL_FAILURE("Invalid magic signature");
+    if (signature_box.data[0] != 0xd || signature_box.data[1] != 0xa ||
+        signature_box.data[2] != 0x87 || signature_box.data[3] != 0xa) {
+      return JXL_FAILURE("Invalid magic signature");
+    }
+
+    Box ftyp_box;
+    JXL_RETURN_IF_ERROR(ftyp_box.Decode(in));
+    if (memcmp("ftyp", ftyp_box.type, 4) != 0) {
+      return JXL_FAILURE("Invalid ftyp");
+    }
+    if (ftyp_box.data.size() != 12) return JXL_FAILURE("Invalid ftyp");
+    const char* expected = "jxl \0\0\0\0jxl ";
+    if (memcmp(expected, ftyp_box.data.data(), 12) != 0)
+      return JXL_FAILURE("Invalid ftyp");
+
+    while (!in->empty()) {
+      Box box = {};
+      JXL_RETURN_IF_ERROR(box.Decode(in));
+      if (box.data.data() == nullptr) {
+        // The decoding encountered a box, but not enough data yet.
+        return true;
+      }
+      boxes.emplace_back(box);
+    }
+
+    return true;
+  }
+};
+
+}  // namespace
+
+TEST(EncodeTest, SingleFrameBoundedJXLCTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), true));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  size_t xsize = 71;
+  size_t ysize = 23;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/false);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  Container container = {};
+  jxl::Span<const uint8_t> encoded_span =
+      jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+  EXPECT_TRUE(container.Decode(&encoded_span));
+  EXPECT_EQ(0u, encoded_span.size());
+  bool found_jxlc = false;
+  bool found_jxlp = false;
+  // The encoder is allowed to either emit a jxlc or one or more jxlp.
+  for (size_t i = 0; i < container.boxes.size(); ++i) {
+    if (memcmp("jxlc", container.boxes[i].type, 4) == 0) {
+      EXPECT_EQ(false, found_jxlc);  // Max 1 jxlc
+      EXPECT_EQ(false, found_jxlp);  // Can't mix jxlc and jxlp
+      found_jxlc = true;
+    }
+    if (memcmp("jxlp", container.boxes[i].type, 4) == 0) {
+      EXPECT_EQ(false, found_jxlc);  // Can't mix jxlc and jxlp
+      found_jxlp = true;
+    }
+    // The encoder shouldn't create an unbounded box in this case, with the
+    // single frame it knows the full size in time, so can help make decoding
+    // more efficient by giving the full box size of the final box.
+    EXPECT_EQ(true, container.boxes[i].data_size_given);
+  }
+  EXPECT_EQ(true, found_jxlc || found_jxlp);
+}
+
+TEST(EncodeTest, CodestreamLevelTest) {
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  Container container = {};
+  jxl::Span<const uint8_t> encoded_span =
+      jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+  EXPECT_TRUE(container.Decode(&encoded_span));
+  EXPECT_EQ(0u, encoded_span.size());
+  EXPECT_EQ(0, memcmp("jxll", container.boxes[0].type, 4));
+}
+
+TEST(EncodeTest, CodestreamLevelVerificationTest) {
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0};
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = 64;
+  basic_info.ysize = 64;
+  basic_info.uses_original_profile = false;
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+
+  EXPECT_EQ(5, JxlEncoderGetRequiredCodestreamLevel(enc.get()));
+
+  // Set an image dimension that is too large for level 5, but fits in level 10
+
+  basic_info.xsize = 1ull << 30ull;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 5));
+  EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  EXPECT_EQ(10, JxlEncoderGetRequiredCodestreamLevel(enc.get()));
+
+  // Set an image dimension that is too large even for level 10
+
+  basic_info.xsize = 1ull << 31ull;
+  EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+  const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  jxl::extras::JXLDecompressParams dparams;
+  jxl::test::DefaultAcceptedFormats(dparams);
+  std::vector<uint8_t> decoded_jpeg_bytes;
+  jxl::extras::PackedPixelFile ppf;
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+                             nullptr, &ppf, &decoded_jpeg_bytes));
+
+  EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size());
+  EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(ProgressiveJPEGReconstructionTest)) {
+  const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  frame_settings->values.cparams.progressive_mode = true;
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  jxl::extras::JXLDecompressParams dparams;
+  jxl::test::DefaultAcceptedFormats(dparams);
+  std::vector<uint8_t> decoded_jpeg_bytes;
+  jxl::extras::PackedPixelFile ppf;
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+                             nullptr, &ppf, &decoded_jpeg_bytes));
+
+  EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size());
+  EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size()));
+}
+
+static void ProcessEncoder(JxlEncoder* enc, std::vector<uint8_t>& compressed,
+                           uint8_t*& next_out, size_t& avail_out) {
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  size_t offset = next_out - compressed.data();
+  compressed.resize(next_out - compressed.data());
+  next_out = compressed.data() + offset;
+  avail_out = compressed.size() - offset;
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+}
+
+TEST(EncodeTest, BasicInfoTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  size_t xsize = 1;
+  size_t ysize = 1;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  basic_info.have_animation = true;
+  basic_info.intensity_target = 123.4;
+  basic_info.min_nits = 5.0;
+  basic_info.linear_below = 12.7;
+  basic_info.orientation = JXL_ORIENT_ROTATE_90_CW;
+  basic_info.intrinsic_xsize = 88;
+  basic_info.intrinsic_ysize = 99;
+  basic_info.animation.tps_numerator = 55;
+  basic_info.animation.tps_denominator = 77;
+  basic_info.animation.num_loops = 10;
+  basic_info.animation.have_timecodes = JXL_TRUE;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseFrames(enc.get());
+  ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+  // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_NE(nullptr, dec.get());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO));
+  // Allow testing the orientation field, without this setting it will be
+  // overridden to identity.
+  JxlDecoderSetKeepOrientation(dec.get(), JXL_TRUE);
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+  JxlDecoderCloseInput(dec.get());
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+    if (status == JXL_DEC_ERROR) {
+      FAIL();
+    } else if (status == JXL_DEC_SUCCESS) {
+      break;
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      JxlBasicInfo basic_info2;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderGetBasicInfo(dec.get(), &basic_info2));
+      EXPECT_EQ(basic_info.xsize, basic_info2.xsize);
+      EXPECT_EQ(basic_info.ysize, basic_info2.ysize);
+      EXPECT_EQ(basic_info.bits_per_sample, basic_info2.bits_per_sample);
+      EXPECT_EQ(basic_info.exponent_bits_per_sample,
+                basic_info2.exponent_bits_per_sample);
+      EXPECT_NEAR(basic_info.intensity_target, basic_info2.intensity_target,
+                  0.5);
+      EXPECT_NEAR(basic_info.min_nits, basic_info2.min_nits, 0.5);
+      EXPECT_NEAR(basic_info.linear_below, basic_info2.linear_below, 0.5);
+      EXPECT_EQ(basic_info.relative_to_max_display,
+                basic_info2.relative_to_max_display);
+      EXPECT_EQ(basic_info.uses_original_profile,
+                basic_info2.uses_original_profile);
+      EXPECT_EQ(basic_info.orientation, basic_info2.orientation);
+      EXPECT_EQ(basic_info.intrinsic_xsize, basic_info2.intrinsic_xsize);
+      EXPECT_EQ(basic_info.intrinsic_ysize, basic_info2.intrinsic_ysize);
+      EXPECT_EQ(basic_info.num_color_channels, basic_info2.num_color_channels);
+      // TODO(lode): also test num_extra_channels, but currently there may be a
+      // mismatch between 0 and 1 if there is alpha, until encoder support for
+      // extra channels is fully implemented.
+      EXPECT_EQ(basic_info.alpha_bits, basic_info2.alpha_bits);
+      EXPECT_EQ(basic_info.alpha_exponent_bits,
+                basic_info2.alpha_exponent_bits);
+      EXPECT_EQ(basic_info.alpha_premultiplied,
+                basic_info2.alpha_premultiplied);
+
+      EXPECT_EQ(basic_info.have_preview, basic_info2.have_preview);
+      if (basic_info.have_preview) {
+        EXPECT_EQ(basic_info.preview.xsize, basic_info2.preview.xsize);
+        EXPECT_EQ(basic_info.preview.ysize, basic_info2.preview.ysize);
+      }
+
+      EXPECT_EQ(basic_info.have_animation, basic_info2.have_animation);
+      if (basic_info.have_animation) {
+        EXPECT_EQ(basic_info.animation.tps_numerator,
+                  basic_info2.animation.tps_numerator);
+        EXPECT_EQ(basic_info.animation.tps_denominator,
+                  basic_info2.animation.tps_denominator);
+        EXPECT_EQ(basic_info.animation.num_loops,
+                  basic_info2.animation.num_loops);
+        EXPECT_EQ(basic_info.animation.have_timecodes,
+                  basic_info2.animation.have_timecodes);
+      }
+    } else {
+      FAIL();  // unexpected status
+    }
+  }
+}
+
+TEST(EncodeTest, AnimationHeaderTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  size_t xsize = 1;
+  size_t ysize = 1;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.have_animation = true;
+  basic_info.animation.tps_numerator = 1000;
+  basic_info.animation.tps_denominator = 1;
+  basic_info.animation.have_timecodes = JXL_TRUE;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+  std::string frame_name = "test frame";
+  JxlFrameHeader header;
+  JxlEncoderInitFrameHeader(&header);
+  header.duration = 50;
+  header.timecode = 800;
+  header.layer_info.blend_info.blendmode = JXL_BLEND_BLEND;
+  header.layer_info.blend_info.source = 2;
+  header.layer_info.blend_info.clamp = 1;
+  JxlBlendInfo extra_channel_blend_info;
+  JxlEncoderInitBlendInfo(&extra_channel_blend_info);
+  extra_channel_blend_info.blendmode = JXL_BLEND_MULADD;
+  JxlEncoderSetFrameHeader(frame_settings, &header);
+  JxlEncoderSetExtraChannelBlendInfo(frame_settings, 0,
+                                     &extra_channel_blend_info);
+  JxlEncoderSetFrameName(frame_settings, frame_name.c_str());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseFrames(enc.get());
+  ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+  // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_NE(nullptr, dec.get());
+
+  // To test the blend_info fields, coalescing must be set to false in the
+  // decoder.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME));
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+  JxlDecoderCloseInput(dec.get());
+
+  bool seen_frame = false;
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+    if (status == JXL_DEC_ERROR) {
+      FAIL();
+    } else if (status == JXL_DEC_SUCCESS) {
+      break;
+    } else if (status == JXL_DEC_FRAME) {
+      seen_frame = true;
+      JxlFrameHeader header2;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2));
+      EXPECT_EQ(header.duration, header2.duration);
+      EXPECT_EQ(header.timecode, header2.timecode);
+      EXPECT_EQ(header.layer_info.blend_info.blendmode,
+                header2.layer_info.blend_info.blendmode);
+      EXPECT_EQ(header.layer_info.blend_info.clamp,
+                header2.layer_info.blend_info.clamp);
+      EXPECT_EQ(header.layer_info.blend_info.source,
+                header2.layer_info.blend_info.source);
+      EXPECT_EQ(frame_name.size(), header2.name_length);
+      JxlBlendInfo extra_channel_blend_info2;
+      JxlDecoderGetExtraChannelBlendInfo(dec.get(), 0,
+                                         &extra_channel_blend_info2);
+      EXPECT_EQ(extra_channel_blend_info.blendmode,
+                extra_channel_blend_info2.blendmode);
+      if (header2.name_length > 0) {
+        std::string frame_name2(header2.name_length + 1, '\0');
+        EXPECT_EQ(JXL_DEC_SUCCESS,
+                  JxlDecoderGetFrameName(dec.get(), &frame_name2.front(),
+                                         frame_name2.size()));
+        frame_name2.resize(header2.name_length);
+        EXPECT_EQ(frame_name, frame_name2);
+      }
+    } else {
+      FAIL();  // unexpected status
+    }
+  }
+
+  EXPECT_EQ(true, seen_frame);
+}
+TEST(EncodeTest, CroppedFrameTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+  size_t xsize = 300;
+  size_t ysize = 300;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  std::vector<uint8_t> pixels2(pixels.size());
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  // Encoding a 300x300 frame in an image that is only 100x100
+  basic_info.xsize = 100;
+  basic_info.ysize = 100;
+  basic_info.uses_original_profile = JXL_TRUE;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+  JxlFrameHeader header;
+  JxlEncoderInitFrameHeader(&header);
+  header.layer_info.have_crop = JXL_TRUE;
+  header.layer_info.xsize = xsize;
+  header.layer_info.ysize = ysize;
+  header.layer_info.crop_x0 = -50;
+  header.layer_info.crop_y0 = -250;
+  JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE);
+  JxlEncoderSetFrameHeader(frame_settings, &header);
+  JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT,
+                                   1);
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(100);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    pixels.data(), pixels.size()));
+  JxlEncoderCloseFrames(enc.get());
+  ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_NE(nullptr, dec.get());
+  // Non-coalesced decoding so we can get the full uncropped frame
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE));
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+  JxlDecoderCloseInput(dec.get());
+
+  bool seen_frame = false;
+  bool checked_frame = false;
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+    if (status == JXL_DEC_ERROR) {
+      FAIL();
+    } else if (status == JXL_DEC_SUCCESS) {
+      break;
+    } else if (status == JXL_DEC_FRAME) {
+      seen_frame = true;
+      JxlFrameHeader header2;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2));
+      EXPECT_EQ(header.layer_info.xsize, header2.layer_info.xsize);
+      EXPECT_EQ(header.layer_info.ysize, header2.layer_info.ysize);
+      EXPECT_EQ(header.layer_info.crop_x0, header2.layer_info.crop_x0);
+      EXPECT_EQ(header.layer_info.crop_y0, header2.layer_info.crop_y0);
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec.get(), &pixel_format,
+                                            pixels2.data(), pixels2.size()));
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      EXPECT_EQ(0, memcmp(pixels.data(), pixels2.data(), pixels.size()));
+      checked_frame = true;
+    } else {
+      FAIL();  // unexpected status
+    }
+  }
+  EXPECT_EQ(true, checked_frame);
+  EXPECT_EQ(true, seen_frame);
+}
+
+TEST(EncodeTest, JXL_BOXES_TEST(BoxTest)) {
+  // Test with uncompressed boxes and with brob boxes
+  for (int compress_box = 0; compress_box <= 1; ++compress_box) {
+    // Tests adding two metadata boxes with the encoder: an exif box before the
+    // image frame, and an xml box after the image frame. Then verifies the
+    // decoder can decode them, they are in the expected place, and have the
+    // correct content after decoding.
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseBoxes(enc.get()));
+
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+    size_t xsize = 50;
+    size_t ysize = 17;
+    JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+    JxlBasicInfo basic_info;
+    jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+    basic_info.xsize = xsize;
+    basic_info.ysize = ysize;
+    basic_info.uses_original_profile = false;
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+    JxlColorEncoding color_encoding;
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/false);
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+    std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+    uint8_t* next_out = compressed.data();
+    size_t avail_out = compressed.size() - (next_out - compressed.data());
+
+    // Add an early metadata box. Also add a valid 4-byte TIFF offset header
+    // before the fake exif data of these box contents.
+    constexpr const char* exif_test_string = "\0\0\0\0exif test data";
+    const uint8_t* exif_data =
+        reinterpret_cast<const uint8_t*>(exif_test_string);
+    // Skip the 4 zeroes for strlen
+    const size_t exif_size = 4 + strlen(exif_test_string + 4);
+    JxlEncoderAddBox(enc.get(), "Exif", exif_data, exif_size, compress_box);
+
+    // Write to output
+    ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+    // Add image frame
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                      pixels.data(), pixels.size()));
+    // Indicate this is the last frame
+    JxlEncoderCloseFrames(enc.get());
+
+    // Write to output
+    ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+    // Add a late metadata box
+    constexpr const char* xml_test_string = "<some random xml data>";
+    const uint8_t* xml_data = reinterpret_cast<const uint8_t*>(xml_test_string);
+    size_t xml_size = strlen(xml_test_string);
+    JxlEncoderAddBox(enc.get(), "XML ", xml_data, xml_size, compress_box);
+
+    // Indicate this is the last box
+    JxlEncoderCloseBoxes(enc.get());
+
+    // Write to output
+    ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+    // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+    JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+    EXPECT_NE(nullptr, dec.get());
+
+    if (compress_box) {
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE));
+    }
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                   dec.get(), JXL_DEC_FRAME | JXL_DEC_BOX));
+
+    JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+    JxlDecoderCloseInput(dec.get());
+
+    std::vector<uint8_t> dec_exif_box(exif_size);
+    std::vector<uint8_t> dec_xml_box(xml_size);
+
+    for (bool post_frame = false;;) {
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+      if (status == JXL_DEC_ERROR) {
+        FAIL();
+      } else if (status == JXL_DEC_SUCCESS) {
+        EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get()));
+        break;
+      } else if (status == JXL_DEC_FRAME) {
+        post_frame = true;
+      } else if (status == JXL_DEC_BOX) {
+        // Since we gave the exif/xml box output buffer of the exact known
+        // correct size, 0 bytes should be released. Same when no buffer was
+        // set.
+        EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get()));
+        JxlBoxType type;
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec.get(), type, true));
+        if (!memcmp(type, "Exif", 4)) {
+          // This box should have been encoded before the image frame
+          EXPECT_EQ(false, post_frame);
+          JxlDecoderSetBoxBuffer(dec.get(), dec_exif_box.data(),
+                                 dec_exif_box.size());
+        } else if (!memcmp(type, "XML ", 4)) {
+          // This box should have been encoded after the image frame
+          EXPECT_EQ(true, post_frame);
+          JxlDecoderSetBoxBuffer(dec.get(), dec_xml_box.data(),
+                                 dec_xml_box.size());
+        }
+      } else {
+        FAIL();  // unexpected status
+      }
+    }
+
+    EXPECT_EQ(0, memcmp(exif_data, dec_exif_box.data(), exif_size));
+    EXPECT_EQ(0, memcmp(xml_data, dec_xml_box.data(), xml_size));
+  }
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGFrameTest)) {
+  TEST_LIBJPEG_SUPPORT();
+  for (int skip_basic_info = 0; skip_basic_info < 2; skip_basic_info++) {
+    for (int skip_color_encoding = 0; skip_color_encoding < 2;
+         skip_color_encoding++) {
+      // cannot set color encoding if basic info is not set
+      if (skip_basic_info && !skip_color_encoding) continue;
+      const std::string jpeg_path = "jxl/flower/flower_cropped.jpg";
+      const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+      jxl::CodecInOut orig_io;
+      ASSERT_TRUE(SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io,
+                               /*pool=*/nullptr));
+
+      JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+      JxlEncoderFrameSettings* frame_settings =
+          JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+      JxlEncoderFrameSettingsSetOption(frame_settings,
+                                       JXL_ENC_FRAME_SETTING_EFFORT, 1);
+      if (!skip_basic_info) {
+        JxlBasicInfo basic_info;
+        JxlEncoderInitBasicInfo(&basic_info);
+        basic_info.xsize = orig_io.xsize();
+        basic_info.ysize = orig_io.ysize();
+        basic_info.uses_original_profile = true;
+        EXPECT_EQ(JXL_ENC_SUCCESS,
+                  JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+      }
+      if (!skip_color_encoding) {
+        JxlColorEncoding color_encoding;
+        JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+        EXPECT_EQ(JXL_ENC_SUCCESS,
+                  JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+      }
+      EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderAddJPEGFrame(
+                                     frame_settings, orig.data(), orig.size()));
+      JxlEncoderCloseInput(enc.get());
+
+      std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+      uint8_t* next_out = compressed.data();
+      size_t avail_out = compressed.size() - (next_out - compressed.data());
+      JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+      while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+        process_result =
+            JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+        if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+          size_t offset = next_out - compressed.data();
+          compressed.resize(compressed.size() * 2);
+          next_out = compressed.data() + offset;
+          avail_out = compressed.size() - offset;
+        }
+      }
+      compressed.resize(next_out - compressed.data());
+      EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+      jxl::CodecInOut decoded_io;
+      EXPECT_TRUE(jxl::test::DecodeFile(
+          {}, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          &decoded_io));
+
+      EXPECT_LE(
+          ComputeDistance2(orig_io.Main(), decoded_io.Main(), jxl::GetJxlCms()),
+          3.5);
+    }
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc
new file mode 100644
index 0000000000..62ca1becf8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc
@@ -0,0 +1,69 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map) {
+  auto& dct = block_ctx_map->dc_thresholds;
+  auto& qft = block_ctx_map->qf_thresholds;
+  auto& ctx_map = block_ctx_map->ctx_map;
+  bool is_default = br->ReadFixedBits<1>();
+  if (is_default) {
+    *block_ctx_map = BlockCtxMap();
+    return true;
+  }
+  block_ctx_map->num_dc_ctxs = 1;
+  for (int j : {0, 1, 2}) {
+    dct[j].resize(br->ReadFixedBits<4>());
+    block_ctx_map->num_dc_ctxs *= dct[j].size() + 1;
+    for (int& i : dct[j]) {
+      i = UnpackSigned(U32Coder::Read(kDCThresholdDist, br));
+    }
+  }
+  qft.resize(br->ReadFixedBits<4>());
+  for (uint32_t& i : qft) {
+    i = U32Coder::Read(kQFThresholdDist, br) + 1;
+  }
+
+  if (block_ctx_map->num_dc_ctxs * (qft.size() + 1) > 64) {
+    return JXL_FAILURE("Invalid block context map: too big");
+  }
+
+  ctx_map.resize(3 * kNumOrders * block_ctx_map->num_dc_ctxs *
+                 (qft.size() + 1));
+  JXL_RETURN_IF_ERROR(DecodeContextMap(&ctx_map, &block_ctx_map->num_ctxs, br));
+  if (block_ctx_map->num_ctxs > 16) {
+    return JXL_FAILURE("Invalid block context map: too many distinct contexts");
+  }
+  return true;
+}
+
+constexpr uint8_t BlockCtxMap::kDefaultCtxMap[];  // from ac_context.h
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h
new file mode 100644
index 0000000000..e4afa7a631
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENTROPY_CODER_H_
+#define LIB_JXL_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+static JXL_INLINE int32_t PredictFromTopAndLeft(
+    const int32_t* const JXL_RESTRICT row_top,
+    const int32_t* const JXL_RESTRICT row, size_t x, int32_t default_val) {
+  if (x == 0) {
+    return row_top == nullptr ? default_val : row_top[x];
+  }
+  if (row_top == nullptr) {
+    return row[x - 1];
+  }
+  return (row_top[x] + row[x - 1] + 1) / 2;
+}
+
+static constexpr U32Enc kDCThresholdDist(Bits(4), BitsOffset(8, 16),
+                                         BitsOffset(16, 272),
+                                         BitsOffset(32, 65808));
+
+static constexpr U32Enc kQFThresholdDist(Bits(2), BitsOffset(3, 4),
+                                         BitsOffset(5, 12), BitsOffset(8, 44));
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENTROPY_CODER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc b/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc
new file mode 100644
index 0000000000..9dbeb137af
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(deymo): Move these tests to dec_ans.h and common.h
+
+#include <stdint.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(EntropyCoderTest, PackUnpack) {
+  for (int32_t i = -31; i < 32; ++i) {
+    uint32_t packed = PackSigned(i);
+    EXPECT_LT(packed, 63u);
+    int32_t unpacked = UnpackSigned(packed);
+    EXPECT_EQ(i, unpacked);
+  }
+}
+
+struct DummyBitReader {
+  uint32_t nbits, bits;
+  void Consume(uint32_t nbits) {}
+  uint32_t PeekBits(uint32_t n) {
+    EXPECT_EQ(n, nbits);
+    return bits;
+  }
+};
+
+void HybridUintRoundtrip(HybridUintConfig config, size_t limit = 1 << 24) {
+  Rng rng(0);
+  constexpr size_t kNumIntegers = 1 << 20;
+  std::vector<uint32_t> integers(kNumIntegers);
+  std::vector<uint32_t> token(kNumIntegers);
+  std::vector<uint32_t> nbits(kNumIntegers);
+  std::vector<uint32_t> bits(kNumIntegers);
+  for (size_t i = 0; i < kNumIntegers; i++) {
+    integers[i] = rng.UniformU(0, limit + 1);
+    config.Encode(integers[i], &token[i], &nbits[i], &bits[i]);
+  }
+  for (size_t i = 0; i < kNumIntegers; i++) {
+    DummyBitReader br{nbits[i], bits[i]};
+    EXPECT_EQ(integers[i],
+              ANSSymbolReader::ReadHybridUintConfig(config, token[i], &br));
+  }
+}
+
+TEST(HybridUintTest, Test000) {
+  HybridUintRoundtrip(HybridUintConfig{0, 0, 0});
+}
+TEST(HybridUintTest, Test411) {
+  HybridUintRoundtrip(HybridUintConfig{4, 1, 1});
+}
+TEST(HybridUintTest, Test420) {
+  HybridUintRoundtrip(HybridUintConfig{4, 2, 0});
+}
+TEST(HybridUintTest, Test421) {
+  HybridUintRoundtrip(HybridUintConfig{4, 2, 1}, 256);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/epf.cc b/third-party/libjxl/libjxl/lib/jxl/epf.cc
new file mode 100644
index 0000000000..7288ed9ca6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/epf.cc
@@ -0,0 +1,146 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Edge-preserving smoothing: weighted average based on L1 patch similarity.
+
+#include "lib/jxl/epf.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+#include <numeric>  // std::accumulate
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// Mirror n floats starting at *p and store them before p.
+JXL_INLINE void LeftMirror(float* p, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    *(p - 1 - i) = p[i];
+  }
+}
+
+// Mirror n floats starting at *(p - n) and store them at *p.
+JXL_INLINE void RightMirror(float* p, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    p[i] = *(p - 1 - i);
+  }
+}
+
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state) {
+  const LoopFilter& lf = state->shared->frame_header.loop_filter;
+  JXL_CHECK(lf.epf_iters > 0);
+  const AcStrategyImage& ac_strategy = state->shared->ac_strategy;
+  const float quant_scale = state->shared->quantizer.Scale();
+
+  const size_t sigma_stride = state->sigma.PixelsPerRow();
+  const size_t sharpness_stride = state->shared->epf_sharpness.PixelsPerRow();
+
+  for (size_t by = 0; by < block_rect.ysize(); ++by) {
+    float* JXL_RESTRICT sigma_row = block_rect.Row(&state->sigma, by);
+    const uint8_t* JXL_RESTRICT sharpness_row =
+        block_rect.ConstRow(state->shared->epf_sharpness, by);
+    AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+    const int32_t* const JXL_RESTRICT row_quant =
+        block_rect.ConstRow(state->shared->raw_quant_field, by);
+
+    for (size_t bx = 0; bx < block_rect.xsize(); bx++) {
+      AcStrategy acs = acs_row[bx];
+      size_t llf_x = acs.covered_blocks_x();
+      if (!acs.IsFirstBlock()) continue;
+      // quant_scale is smaller for low quality.
+      // quant_scale is roughly 0.08 / butteraugli score.
+      //
+      // row_quant is smaller for low quality.
+      // row_quant is a quantization multiplier of form 1.0 /
+      // row_quant[bx]
+      //
+      // lf.epf_quant_mul is a parameter in the format
+      // kInvSigmaNum is a constant
+      float sigma_quant =
+          lf.epf_quant_mul / (quant_scale * row_quant[bx] * kInvSigmaNum);
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          float sigma =
+              sigma_quant *
+              lf.epf_sharp_lut[sharpness_row[bx + ix + iy * sharpness_stride]];
+          // Avoid infinities.
+          sigma = std::min(-1e-4f, sigma);  // TODO(veluca): remove this.
+          sigma_row[bx + ix + kSigmaPadding +
+                    (iy + kSigmaPadding) * sigma_stride] = 1.0f / sigma;
+        }
+      }
+      // TODO(veluca): remove this padding.
+      // Left padding with mirroring.
+      if (bx + block_rect.x0() == 0) {
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          LeftMirror(
+              sigma_row + kSigmaPadding + (iy + kSigmaPadding) * sigma_stride,
+              kSigmaBorder);
+        }
+      }
+      // Right padding with mirroring.
+      if (bx + block_rect.x0() + llf_x ==
+          state->shared->frame_dim.xsize_blocks) {
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          RightMirror(sigma_row + kSigmaPadding + bx + llf_x +
+                          (iy + kSigmaPadding) * sigma_stride,
+                      kSigmaBorder);
+        }
+      }
+      // Offsets for row copying, in blocks.
+      size_t offset_before = bx + block_rect.x0() == 0 ? 1 : bx + kSigmaPadding;
+      size_t offset_after =
+          bx + block_rect.x0() + llf_x == state->shared->frame_dim.xsize_blocks
+              ? kSigmaPadding + llf_x + bx + kSigmaBorder
+              : kSigmaPadding + llf_x + bx;
+      size_t num = offset_after - offset_before;
+      // Above
+      if (by + block_rect.y0() == 0) {
+        for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+          memcpy(
+              sigma_row + offset_before +
+                  (kSigmaPadding - 1 - iy) * sigma_stride,
+              sigma_row + offset_before + (kSigmaPadding + iy) * sigma_stride,
+              num * sizeof(*sigma_row));
+        }
+      }
+      // Below
+      if (by + block_rect.y0() + acs.covered_blocks_y() ==
+          state->shared->frame_dim.ysize_blocks) {
+        for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+          memcpy(
+              sigma_row + offset_before +
+                  sigma_stride * (acs.covered_blocks_y() + kSigmaPadding + iy),
+              sigma_row + offset_before +
+                  sigma_stride *
+                      (acs.covered_blocks_y() + kSigmaPadding - 1 - iy),
+              num * sizeof(*sigma_row));
+        }
+      }
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/epf.h b/third-party/libjxl/libjxl/lib/jxl/epf.h
new file mode 100644
index 0000000000..7a0834ed97
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/epf.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_EPF_H_
+#define LIB_JXL_EPF_H_
+
+// Fast SIMD "in-loop" edge preserving filter (adaptive, nonlinear).
+
+#include <stddef.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/passes_state.h"
+
+namespace jxl {
+
+// 4 * (sqrt(0.5)-1), so that Weight(sigma) = 0.5.
+static constexpr float kInvSigmaNum = -1.1715728752538099024f;
+
+// kInvSigmaNum / 0.3
+constexpr float kMinSigma = -3.90524291751269967465540850526868f;
+
+// Fills the `state->filter_weights.sigma` image with the precomputed sigma
+// values in the area inside `block_rect`. Accesses the AC strategy, quant field
+// and epf_sharpness fields in the corresponding positions.
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_EPF_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/exif.h b/third-party/libjxl/libjxl/lib/jxl/exif.h
new file mode 100644
index 0000000000..0cf493fc71
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/exif.h
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_EXIF_H_
+#define LIB_JXL_EXIF_H_
+
+// Basic parsing of Exif (just enough for the render-impacting things
+// like orientation)
+
+#include <jxl/codestream_header.h>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/image_metadata.h"
+
+namespace jxl {
+
+constexpr uint16_t kExifOrientationTag = 274;
+
+// Checks if a blob looks like Exif, and if so, sets bigendian
+// according to the tiff endianness
+inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
+  if (exif.size() < 12) return false;  // not enough bytes for a valid exif blob
+  const uint8_t* t = exif.data();
+  if (LoadLE32(t) == 0x2A004D4D) {
+    *bigendian = true;
+    return true;
+  } else if (LoadLE32(t) == 0x002A4949) {
+    *bigendian = false;
+    return true;
+  }
+  return false;  // not a valid tiff header
+}
+
+// Finds the position of an Exif tag, or 0 if it is not found
+inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
+                                  uint16_t tagname) {
+  bool bigendian;
+  if (!IsExif(exif, &bigendian)) return 0;
+  const uint8_t* t = exif.data() + 4;
+  uint64_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t));
+  if (exif.size() < 12 + offset + 2 || offset < 8) return 0;
+  t += offset - 4;
+  if (offset + 2 >= exif.size()) return 0;
+  uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t));
+  t += 2;
+  while (nb_tags > 0) {
+    if (t + 12 >= exif.data() + exif.size()) return 0;
+    uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 2;
+    if (tag == tagname) return static_cast<size_t>(t - exif.data());
+    t += 10;
+    nb_tags--;
+  }
+  return 0;
+}
+
+// TODO (jon): tag 1 can be used to represent Adobe RGB 1998 if it has value
+// "R03"
+// TODO (jon): set intrinsic dimensions according to
+// https://discourse.wicg.io/t/proposal-exif-image-resolution-auto-and-from-image/4326/24
+// Parses the Exif data just enough to extract any render-impacting info.
+// If the Exif data is invalid or could not be parsed, then it is treated
+// as a no-op.
+inline void InterpretExif(const std::vector<uint8_t>& exif,
+                          JxlOrientation* orientation) {
+  bool bigendian;
+  if (!IsExif(exif, &bigendian)) return;
+  size_t o_pos = FindExifTagPosition(exif, kExifOrientationTag);
+  if (o_pos) {
+    const uint8_t* t = exif.data() + o_pos;
+    uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 2;
+    uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t));
+    t += 4;
+    uint16_t value = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 4;
+    if (type == 3 && count == 1 && value >= 1 && value <= 8) {
+      *orientation = static_cast<JxlOrientation>(value);
+    }
+  }
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_EXIF_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h b/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h
new file mode 100644
index 0000000000..508d808cc5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
+#define LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
+
+#include <jxl/parallel_runner.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/random.h"
+
+namespace jxl {
+
+// A parallel runner implementation that runs all the jobs in a single thread
+// (the caller thread) but runs them pretending to use multiple threads and
+// potentially out of order. This is useful for testing conditions that only
+// occur under heavy load where the order of operations is different.
+class FakeParallelRunner {
+ public:
+  FakeParallelRunner(uint32_t order_seed, uint32_t num_threads)
+      : order_seed_(order_seed), rng_(order_seed), num_threads_(num_threads) {
+    if (num_threads_ < 1) num_threads_ = 1;
+  }
+
+  JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init,
+                         JxlParallelRunFunction func, uint32_t start,
+                         uint32_t end) {
+    JxlParallelRetCode ret = init(jxl_opaque, num_threads_);
+    if (ret != 0) return ret;
+
+    if (order_seed_ == 0) {
+      for (uint32_t i = start; i < end; i++) {
+        func(jxl_opaque, i, i % num_threads_);
+      }
+    } else {
+      std::vector<uint32_t> order(end - start);
+      for (uint32_t i = start; i < end; i++) {
+        order[i - start] = i;
+      }
+      rng_.Shuffle(order.data(), order.size());
+      for (uint32_t i = start; i < end; i++) {
+        func(jxl_opaque, order[i - start], i % num_threads_);
+      }
+    }
+    return ret;
+  }
+
+ private:
+  // Seed for the RNG for defining the execution order. A value of 0 means
+  // sequential order from start to end.
+  uint32_t order_seed_;
+
+  // The PRNG object, initialized with the order_seed_. Only used if the seed is
+  // not 0.
+  Rng rng_;
+
+  // Number of fake threads. All the tasks are run on the same thread, but using
+  // different thread_id values based on this num_threads.
+  uint32_t num_threads_;
+};
+
+}  // namespace jxl
+
+extern "C" {
+// Function to pass as the parallel runner.
+JXL_INLINE JxlParallelRetCode JxlFakeParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  return static_cast<jxl::FakeParallelRunner*>(runner_opaque)
+      ->Run(jpegxl_opaque, init, func, start_range, end_range);
+}
+}
+
+#endif  // LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h
new file mode 100644
index 0000000000..e359c6ab71
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h
@@ -0,0 +1,237 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_FAST_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_FAST_DCT_INL_H_
+#undef LIB_JXL_FAST_DCT_INL_H_
+#else
+#define LIB_JXL_FAST_DCT_INL_H_
+#endif
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#if HWY_TARGET == HWY_NEON
+HWY_NOINLINE void FastTransposeBlock(const int16_t* JXL_RESTRICT data_in,
+                                     size_t stride_in, size_t N, size_t M,
+                                     int16_t* JXL_RESTRICT data_out,
+                                     size_t stride_out) {
+  JXL_DASSERT(N % 8 == 0);
+  JXL_DASSERT(M % 8 == 0);
+  for (size_t i = 0; i < N; i += 8) {
+    for (size_t j = 0; j < M; j += 8) {
+      // TODO(veluca): one could optimize the M==8, stride_in==8 case further
+      // with vld4.
+      // This code is about 40% faster for N == M == stride_in ==
+      // stride_out == 8
+      // Using loads + stores to reshuffle things to be able to
+      // use vld4 doesn't help.
+      /*
+      auto a0 = vld4q_s16(data_in); auto a1 = vld4q_s16(data_in + 32);
+      int16x8x4_t out0;
+      int16x8x4_t out1;
+      out0.val[0] = vuzp1q_s16(a0.val[0], a1.val[0]);
+      out0.val[1] = vuzp1q_s16(a0.val[1], a1.val[1]);
+      out0.val[2] = vuzp1q_s16(a0.val[2], a1.val[2]);
+      out0.val[3] = vuzp1q_s16(a0.val[3], a1.val[3]);
+      out1.val[0] = vuzp2q_s16(a0.val[0], a1.val[0]);
+      out1.val[1] = vuzp2q_s16(a0.val[1], a1.val[1]);
+      out1.val[2] = vuzp2q_s16(a0.val[2], a1.val[2]);
+      out1.val[3] = vuzp2q_s16(a0.val[3], a1.val[3]);
+      vst1q_s16_x4(data_out, out0);
+      vst1q_s16_x4(data_out + 32, out1);
+      */
+      auto a0 = vld1q_s16(data_in + i * stride_in + j);
+      auto a1 = vld1q_s16(data_in + (i + 1) * stride_in + j);
+      auto a2 = vld1q_s16(data_in + (i + 2) * stride_in + j);
+      auto a3 = vld1q_s16(data_in + (i + 3) * stride_in + j);
+
+      auto a01 = vtrnq_s16(a0, a1);
+      auto a23 = vtrnq_s16(a2, a3);
+
+      auto four0 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[0]),
+                             vreinterpretq_s32_s16(a23.val[0]));
+      auto four1 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[1]),
+                             vreinterpretq_s32_s16(a23.val[1]));
+
+      auto a4 = vld1q_s16(data_in + (i + 4) * stride_in + j);
+      auto a5 = vld1q_s16(data_in + (i + 5) * stride_in + j);
+      auto a6 = vld1q_s16(data_in + (i + 6) * stride_in + j);
+      auto a7 = vld1q_s16(data_in + (i + 7) * stride_in + j);
+
+      auto a45 = vtrnq_s16(a4, a5);
+      auto a67 = vtrnq_s16(a6, a7);
+
+      auto four2 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[0]),
+                             vreinterpretq_s32_s16(a67.val[0]));
+      auto four3 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[1]),
+                             vreinterpretq_s32_s16(a67.val[1]));
+
+      auto out0 =
+          vcombine_s32(vget_low_s32(four0.val[0]), vget_low_s32(four2.val[0]));
+      auto out1 =
+          vcombine_s32(vget_low_s32(four1.val[0]), vget_low_s32(four3.val[0]));
+      auto out2 =
+          vcombine_s32(vget_low_s32(four0.val[1]), vget_low_s32(four2.val[1]));
+      auto out3 =
+          vcombine_s32(vget_low_s32(four1.val[1]), vget_low_s32(four3.val[1]));
+      auto out4 = vcombine_s32(vget_high_s32(four0.val[0]),
+                               vget_high_s32(four2.val[0]));
+      auto out5 = vcombine_s32(vget_high_s32(four1.val[0]),
+                               vget_high_s32(four3.val[0]));
+      auto out6 = vcombine_s32(vget_high_s32(four0.val[1]),
+                               vget_high_s32(four2.val[1]));
+      auto out7 = vcombine_s32(vget_high_s32(four1.val[1]),
+                               vget_high_s32(four3.val[1]));
+      vst1q_s16(data_out + j * stride_out + i, vreinterpretq_s16_s32(out0));
+      vst1q_s16(data_out + (j + 1) * stride_out + i,
+                vreinterpretq_s16_s32(out1));
+      vst1q_s16(data_out + (j + 2) * stride_out + i,
+                vreinterpretq_s16_s32(out2));
+      vst1q_s16(data_out + (j + 3) * stride_out + i,
+                vreinterpretq_s16_s32(out3));
+      vst1q_s16(data_out + (j + 4) * stride_out + i,
+                vreinterpretq_s16_s32(out4));
+      vst1q_s16(data_out + (j + 5) * stride_out + i,
+                vreinterpretq_s16_s32(out5));
+      vst1q_s16(data_out + (j + 6) * stride_out + i,
+                vreinterpretq_s16_s32(out6));
+      vst1q_s16(data_out + (j + 7) * stride_out + i,
+                vreinterpretq_s16_s32(out7));
+    }
+  }
+}
+
+template <size_t N>
+struct FastDCTTag {};
+
+#include "lib/jxl/fast_dct128-inl.h"
+#include "lib/jxl/fast_dct16-inl.h"
+#include "lib/jxl/fast_dct256-inl.h"
+#include "lib/jxl/fast_dct32-inl.h"
+#include "lib/jxl/fast_dct64-inl.h"
+#include "lib/jxl/fast_dct8-inl.h"
+
+template <size_t ROWS, size_t COLS>
+struct ComputeFastScaledIDCT {
+  // scratch_space must be aligned, and should have space for ROWS*COLS
+  // int16_ts.
+  HWY_MAYBE_UNUSED void operator()(int16_t* JXL_RESTRICT from, int16_t* to,
+                                   size_t to_stride,
+                                   int16_t* JXL_RESTRICT scratch_space) {
+    // Reverse the steps done in ComputeScaledDCT.
+    if (ROWS < COLS) {
+      FastTransposeBlock(from, COLS, ROWS, COLS, scratch_space, ROWS);
+      FastIDCT(FastDCTTag<COLS>(), scratch_space, ROWS, from, ROWS, ROWS);
+      FastTransposeBlock(from, ROWS, COLS, ROWS, scratch_space, COLS);
+      FastIDCT(FastDCTTag<ROWS>(), scratch_space, COLS, to, to_stride, COLS);
+    } else {
+      FastIDCT(FastDCTTag<COLS>(), from, ROWS, scratch_space, ROWS, ROWS);
+      FastTransposeBlock(scratch_space, ROWS, COLS, ROWS, from, COLS);
+      FastIDCT(FastDCTTag<ROWS>(), from, COLS, to, to_stride, COLS);
+    }
+  }
+};
+#endif
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFastIDCT() {
+#if HWY_TARGET == HWY_NEON
+  auto pixels_mem = hwy::AllocateAligned<float>(N * M);
+  float* pixels = pixels_mem.get();
+  auto dct_mem = hwy::AllocateAligned<float>(N * M);
+  float* dct = dct_mem.get();
+  auto dct_i_mem = hwy::AllocateAligned<int16_t>(N * M);
+  int16_t* dct_i = dct_i_mem.get();
+  auto dct_in_mem = hwy::AllocateAligned<int16_t>(N * M);
+  int16_t* dct_in = dct_in_mem.get();
+  auto idct_mem = hwy::AllocateAligned<int16_t>(N * M);
+  int16_t* idct = idct_mem.get();
+
+  auto scratch_space_mem = hwy::AllocateAligned<float>(N * M * 2);
+  float* scratch_space = scratch_space_mem.get();
+  auto scratch_space_i_mem = hwy::AllocateAligned<int16_t>(N * M * 2);
+  int16_t* scratch_space_i = scratch_space_i_mem.get();
+
+  Rng rng(0);
+  for (size_t i = 0; i < N * M; i++) {
+    pixels[i] = rng.UniformF(-1, 1);
+  }
+  ComputeScaledDCT<M, N>()(DCTFrom(pixels, N), dct, scratch_space);
+  size_t integer_bits = std::max(FastIDCTIntegerBits(FastDCTTag<N>()),
+                                 FastIDCTIntegerBits(FastDCTTag<M>()));
+  // Enough range for [-2, 2] output values.
+  JXL_ASSERT(integer_bits <= 14);
+  float scale = (1 << (14 - integer_bits));
+  for (size_t i = 0; i < N * M; i++) {
+    dct_i[i] = std::round(dct[i] * scale);
+  }
+
+  for (size_t j = 0; j < 40000000 / (M * N); j++) {
+    memcpy(dct_in, dct_i, sizeof(*dct_i) * N * M);
+    ComputeFastScaledIDCT<M, N>()(dct_in, idct, N, scratch_space_i);
+  }
+  float max_error = 0;
+  for (size_t i = 0; i < M * N; i++) {
+    float err = std::abs(idct[i] * (1.0f / scale) - pixels[i]);
+    if (std::abs(err) > max_error) {
+      max_error = std::abs(err);
+    }
+  }
+  printf("max error: %f mantissa bits: %d\n", max_error,
+         14 - (int)integer_bits);
+#endif
+}
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFloatIDCT() {
+  auto pixels_mem = hwy::AllocateAligned<float>(N * M);
+  float* pixels = pixels_mem.get();
+  auto dct_mem = hwy::AllocateAligned<float>(N * M);
+  float* dct = dct_mem.get();
+  auto idct_mem = hwy::AllocateAligned<float>(N * M);
+  float* idct = idct_mem.get();
+
+  auto dct_in_mem = hwy::AllocateAligned<float>(N * M);
+  float* dct_in = dct_mem.get();
+
+  auto scratch_space_mem = hwy::AllocateAligned<float>(N * M * 2);
+  float* scratch_space = scratch_space_mem.get();
+
+  Rng rng(0);
+  for (size_t i = 0; i < N * M; i++) {
+    pixels[i] = rng.UniformF(-1, 1);
+  }
+  ComputeScaledDCT<M, N>()(DCTFrom(pixels, N), dct, scratch_space);
+
+  for (size_t j = 0; j < 40000000 / (M * N); j++) {
+    memcpy(dct_in, dct, sizeof(*dct) * N * M);
+    ComputeScaledIDCT<M, N>()(dct_in, DCTTo(idct, N), scratch_space);
+  }
+  float max_error = 0;
+  for (size_t i = 0; i < M * N; i++) {
+    float err = std::abs(idct[i] - pixels[i]);
+    if (std::abs(err) > max_error) {
+      max_error = std::abs(err);
+    }
+  }
+  printf("max error: %e\n", max_error);
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_FAST_DCT_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc b/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc
new file mode 100644
index 0000000000..d796018fd0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/fast_dct-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+void BenchmarkFloatIDCT32x32() { TestFloatIDCT<32, 32>(); }
+void BenchmarkFastIDCT32x32() { TestFastIDCT<32, 32>(); }
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(BenchmarkFloatIDCT32x32);
+HWY_EXPORT(BenchmarkFastIDCT32x32);
+void BenchmarkFloatIDCT32x32() {
+  HWY_DYNAMIC_DISPATCH(BenchmarkFloatIDCT32x32)();
+}
+void BenchmarkFastIDCT32x32() {
+  HWY_DYNAMIC_DISPATCH(BenchmarkFastIDCT32x32)();
+}
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct.h
new file mode 100644
index 0000000000..641933d8a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct.h
@@ -0,0 +1,9 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+namespace jxl {
+void BenchmarkFloatIDCT32x32();
+void BenchmarkFastIDCT32x32();
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h
new file mode 100644
index 0000000000..1a94d3ee92
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h
@@ -0,0 +1,2137 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<128>) { return 2; }
+
+void FastIDCT(FastDCTTag<128>, const int16_t* in, size_t in_stride,
+              int16_t* out, size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 64 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 32 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 96 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 16 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 80 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 48 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vaddq_s16(v13, v10);
+    int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573);
+    int16x8_t v17 = vaddq_s16(v17_tmp, v16);
+    int16x8_t v18 = vld1q_s16(in + in_stride * 112 + i);
+    int16x8_t v19 = vaddq_s16(v18, v12);
+    int16x8_t v20 = vaddq_s16(v19, v16);
+    int16x8_t v21 = vaddq_s16(v17, v20);
+    int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vld1q_s16(in + in_stride * 8 + i);
+    int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+    int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+    int16x8_t v28 = vld1q_s16(in + in_stride * 72 + i);
+    int16x8_t v29 = vld1q_s16(in + in_stride * 56 + i);
+    int16x8_t v30 = vaddq_s16(v28, v29);
+    int16x8_t v31 = vaddq_s16(v27, v30);
+    int16x8_t v32 = vld1q_s16(in + in_stride * 40 + i);
+    int16x8_t v33 = vld1q_s16(in + in_stride * 24 + i);
+    int16x8_t v34 = vaddq_s16(v32, v33);
+    int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573);
+    int16x8_t v35 = vaddq_s16(v35_tmp, v34);
+    int16x8_t v36 = vld1q_s16(in + in_stride * 104 + i);
+    int16x8_t v37 = vld1q_s16(in + in_stride * 88 + i);
+    int16x8_t v38 = vaddq_s16(v36, v37);
+    int16x8_t v39 = vaddq_s16(v38, v34);
+    int16x8_t v40 = vaddq_s16(v35, v39);
+    int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734);
+    int16x8_t v42 = vaddq_s16(v31, v41);
+    int16x8_t v43 = vaddq_s16(v33, v26);
+    int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+    int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+    int16x8_t v45 = vaddq_s16(v37, v28);
+    int16x8_t v46 = vaddq_s16(v29, v32);
+    int16x8_t v47 = vaddq_s16(v45, v46);
+    int16x8_t v48 = vaddq_s16(v44, v47);
+    int16x8_t v49 = vaddq_s16(v46, v43);
+    int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573);
+    int16x8_t v50 = vaddq_s16(v50_tmp, v49);
+    int16x8_t v51 = vld1q_s16(in + in_stride * 120 + i);
+    int16x8_t v52 = vaddq_s16(v51, v36);
+    int16x8_t v53 = vaddq_s16(v52, v45);
+    int16x8_t v54 = vaddq_s16(v53, v49);
+    int16x8_t v55 = vaddq_s16(v50, v54);
+    int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734);
+    int16x8_t v57 = vaddq_s16(v48, v56);
+    int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705);
+    int16x8_t v59 = vaddq_s16(v42, v58);
+    int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+    int16x8_t v61 = vaddq_s16(v25, v60);
+    int16x8_t v62 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+    int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+    int16x8_t v64 = vld1q_s16(in + in_stride * 68 + i);
+    int16x8_t v65 = vld1q_s16(in + in_stride * 60 + i);
+    int16x8_t v66 = vaddq_s16(v64, v65);
+    int16x8_t v67 = vaddq_s16(v63, v66);
+    int16x8_t v68 = vld1q_s16(in + in_stride * 36 + i);
+    int16x8_t v69 = vld1q_s16(in + in_stride * 28 + i);
+    int16x8_t v70 = vaddq_s16(v68, v69);
+    int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573);
+    int16x8_t v71 = vaddq_s16(v71_tmp, v70);
+    int16x8_t v72 = vld1q_s16(in + in_stride * 100 + i);
+    int16x8_t v73 = vld1q_s16(in + in_stride * 92 + i);
+    int16x8_t v74 = vaddq_s16(v72, v73);
+    int16x8_t v75 = vaddq_s16(v74, v70);
+    int16x8_t v76 = vaddq_s16(v71, v75);
+    int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+    int16x8_t v78 = vaddq_s16(v67, v77);
+    int16x8_t v79 = vld1q_s16(in + in_stride * 20 + i);
+    int16x8_t v80 = vld1q_s16(in + in_stride * 12 + i);
+    int16x8_t v81 = vaddq_s16(v79, v80);
+    int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+    int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+    int16x8_t v83 = vld1q_s16(in + in_stride * 84 + i);
+    int16x8_t v84 = vld1q_s16(in + in_stride * 76 + i);
+    int16x8_t v85 = vaddq_s16(v83, v84);
+    int16x8_t v86 = vld1q_s16(in + in_stride * 52 + i);
+    int16x8_t v87 = vld1q_s16(in + in_stride * 44 + i);
+    int16x8_t v88 = vaddq_s16(v86, v87);
+    int16x8_t v89 = vaddq_s16(v85, v88);
+    int16x8_t v90 = vaddq_s16(v82, v89);
+    int16x8_t v91 = vaddq_s16(v88, v81);
+    int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573);
+    int16x8_t v92 = vaddq_s16(v92_tmp, v91);
+    int16x8_t v93 = vld1q_s16(in + in_stride * 116 + i);
+    int16x8_t v94 = vld1q_s16(in + in_stride * 108 + i);
+    int16x8_t v95 = vaddq_s16(v93, v94);
+    int16x8_t v96 = vaddq_s16(v95, v85);
+    int16x8_t v97 = vaddq_s16(v96, v91);
+    int16x8_t v98 = vaddq_s16(v92, v97);
+    int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+    int16x8_t v100 = vaddq_s16(v90, v99);
+    int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705);
+    int16x8_t v102 = vaddq_s16(v78, v101);
+    int16x8_t v103 = vaddq_s16(v80, v62);
+    int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573);
+    int16x8_t v104 = vaddq_s16(v104_tmp, v103);
+    int16x8_t v105 = vaddq_s16(v84, v64);
+    int16x8_t v106 = vaddq_s16(v65, v86);
+    int16x8_t v107 = vaddq_s16(v105, v106);
+    int16x8_t v108 = vaddq_s16(v104, v107);
+    int16x8_t v109 = vaddq_s16(v87, v68);
+    int16x8_t v110 = vaddq_s16(v69, v79);
+    int16x8_t v111 = vaddq_s16(v109, v110);
+    int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573);
+    int16x8_t v112 = vaddq_s16(v112_tmp, v111);
+    int16x8_t v113 = vaddq_s16(v94, v72);
+    int16x8_t v114 = vaddq_s16(v73, v83);
+    int16x8_t v115 = vaddq_s16(v113, v114);
+    int16x8_t v116 = vaddq_s16(v115, v111);
+    int16x8_t v117 = vaddq_s16(v112, v116);
+    int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734);
+    int16x8_t v119 = vaddq_s16(v108, v118);
+    int16x8_t v120 = vaddq_s16(v110, v103);
+    int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573);
+    int16x8_t v121 = vaddq_s16(v121_tmp, v120);
+    int16x8_t v122 = vaddq_s16(v114, v105);
+    int16x8_t v123 = vaddq_s16(v106, v109);
+    int16x8_t v124 = vaddq_s16(v122, v123);
+    int16x8_t v125 = vaddq_s16(v121, v124);
+    int16x8_t v126 = vaddq_s16(v123, v120);
+    int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573);
+    int16x8_t v127 = vaddq_s16(v127_tmp, v126);
+    int16x8_t v128 = vld1q_s16(in + in_stride * 124 + i);
+    int16x8_t v129 = vaddq_s16(v128, v93);
+    int16x8_t v130 = vaddq_s16(v129, v113);
+    int16x8_t v131 = vaddq_s16(v130, v122);
+    int16x8_t v132 = vaddq_s16(v131, v126);
+    int16x8_t v133 = vaddq_s16(v127, v132);
+    int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+    int16x8_t v135 = vaddq_s16(v125, v134);
+    int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705);
+    int16x8_t v137 = vaddq_s16(v119, v136);
+    int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463);
+    int16x8_t v139 = vaddq_s16(v102, v138);
+    int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404);
+    int16x8_t v141 = vaddq_s16(v61, v140);
+    int16x8_t v142 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573);
+    int16x8_t v143 = vaddq_s16(v143_tmp, v142);
+    int16x8_t v144 = vld1q_s16(in + in_stride * 66 + i);
+    int16x8_t v145 = vld1q_s16(in + in_stride * 62 + i);
+    int16x8_t v146 = vaddq_s16(v144, v145);
+    int16x8_t v147 = vaddq_s16(v143, v146);
+    int16x8_t v148 = vld1q_s16(in + in_stride * 34 + i);
+    int16x8_t v149 = vld1q_s16(in + in_stride * 30 + i);
+    int16x8_t v150 = vaddq_s16(v148, v149);
+    int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573);
+    int16x8_t v151 = vaddq_s16(v151_tmp, v150);
+    int16x8_t v152 = vld1q_s16(in + in_stride * 98 + i);
+    int16x8_t v153 = vld1q_s16(in + in_stride * 94 + i);
+    int16x8_t v154 = vaddq_s16(v152, v153);
+    int16x8_t v155 = vaddq_s16(v154, v150);
+    int16x8_t v156 = vaddq_s16(v151, v155);
+    int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734);
+    int16x8_t v158 = vaddq_s16(v147, v157);
+    int16x8_t v159 = vld1q_s16(in + in_stride * 18 + i);
+    int16x8_t v160 = vld1q_s16(in + in_stride * 14 + i);
+    int16x8_t v161 = vaddq_s16(v159, v160);
+    int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573);
+    int16x8_t v162 = vaddq_s16(v162_tmp, v161);
+    int16x8_t v163 = vld1q_s16(in + in_stride * 82 + i);
+    int16x8_t v164 = vld1q_s16(in + in_stride * 78 + i);
+    int16x8_t v165 = vaddq_s16(v163, v164);
+    int16x8_t v166 = vld1q_s16(in + in_stride * 50 + i);
+    int16x8_t v167 = vld1q_s16(in + in_stride * 46 + i);
+    int16x8_t v168 = vaddq_s16(v166, v167);
+    int16x8_t v169 = vaddq_s16(v165, v168);
+    int16x8_t v170 = vaddq_s16(v162, v169);
+    int16x8_t v171 = vaddq_s16(v168, v161);
+    int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573);
+    int16x8_t v172 = vaddq_s16(v172_tmp, v171);
+    int16x8_t v173 = vld1q_s16(in + in_stride * 114 + i);
+    int16x8_t v174 = vld1q_s16(in + in_stride * 110 + i);
+    int16x8_t v175 = vaddq_s16(v173, v174);
+    int16x8_t v176 = vaddq_s16(v175, v165);
+    int16x8_t v177 = vaddq_s16(v176, v171);
+    int16x8_t v178 = vaddq_s16(v172, v177);
+    int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734);
+    int16x8_t v180 = vaddq_s16(v170, v179);
+    int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705);
+    int16x8_t v182 = vaddq_s16(v158, v181);
+    int16x8_t v183 = vld1q_s16(in + in_stride * 10 + i);
+    int16x8_t v184 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v185 = vaddq_s16(v183, v184);
+    int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573);
+    int16x8_t v186 = vaddq_s16(v186_tmp, v185);
+    int16x8_t v187 = vld1q_s16(in + in_stride * 74 + i);
+    int16x8_t v188 = vld1q_s16(in + in_stride * 70 + i);
+    int16x8_t v189 = vaddq_s16(v187, v188);
+    int16x8_t v190 = vld1q_s16(in + in_stride * 58 + i);
+    int16x8_t v191 = vld1q_s16(in + in_stride * 54 + i);
+    int16x8_t v192 = vaddq_s16(v190, v191);
+    int16x8_t v193 = vaddq_s16(v189, v192);
+    int16x8_t v194 = vaddq_s16(v186, v193);
+    int16x8_t v195 = vld1q_s16(in + in_stride * 42 + i);
+    int16x8_t v196 = vld1q_s16(in + in_stride * 38 + i);
+    int16x8_t v197 = vaddq_s16(v195, v196);
+    int16x8_t v198 = vld1q_s16(in + in_stride * 26 + i);
+    int16x8_t v199 = vld1q_s16(in + in_stride * 22 + i);
+    int16x8_t v200 = vaddq_s16(v198, v199);
+    int16x8_t v201 = vaddq_s16(v197, v200);
+    int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573);
+    int16x8_t v202 = vaddq_s16(v202_tmp, v201);
+    int16x8_t v203 = vld1q_s16(in + in_stride * 106 + i);
+    int16x8_t v204 = vld1q_s16(in + in_stride * 102 + i);
+    int16x8_t v205 = vaddq_s16(v203, v204);
+    int16x8_t v206 = vld1q_s16(in + in_stride * 90 + i);
+    int16x8_t v207 = vld1q_s16(in + in_stride * 86 + i);
+    int16x8_t v208 = vaddq_s16(v206, v207);
+    int16x8_t v209 = vaddq_s16(v205, v208);
+    int16x8_t v210 = vaddq_s16(v209, v201);
+    int16x8_t v211 = vaddq_s16(v202, v210);
+    int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734);
+    int16x8_t v213 = vaddq_s16(v194, v212);
+    int16x8_t v214 = vaddq_s16(v200, v185);
+    int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573);
+    int16x8_t v215 = vaddq_s16(v215_tmp, v214);
+    int16x8_t v216 = vaddq_s16(v208, v189);
+    int16x8_t v217 = vaddq_s16(v192, v197);
+    int16x8_t v218 = vaddq_s16(v216, v217);
+    int16x8_t v219 = vaddq_s16(v215, v218);
+    int16x8_t v220 = vaddq_s16(v217, v214);
+    int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573);
+    int16x8_t v221 = vaddq_s16(v221_tmp, v220);
+    int16x8_t v222 = vld1q_s16(in + in_stride * 122 + i);
+    int16x8_t v223 = vld1q_s16(in + in_stride * 118 + i);
+    int16x8_t v224 = vaddq_s16(v222, v223);
+    int16x8_t v225 = vaddq_s16(v224, v205);
+    int16x8_t v226 = vaddq_s16(v225, v216);
+    int16x8_t v227 = vaddq_s16(v226, v220);
+    int16x8_t v228 = vaddq_s16(v221, v227);
+    int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734);
+    int16x8_t v230 = vaddq_s16(v219, v229);
+    int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705);
+    int16x8_t v232 = vaddq_s16(v213, v231);
+    int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463);
+    int16x8_t v234 = vaddq_s16(v182, v233);
+    int16x8_t v235 = vaddq_s16(v184, v142);
+    int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573);
+    int16x8_t v236 = vaddq_s16(v236_tmp, v235);
+    int16x8_t v237 = vaddq_s16(v188, v144);
+    int16x8_t v238 = vaddq_s16(v145, v190);
+    int16x8_t v239 = vaddq_s16(v237, v238);
+    int16x8_t v240 = vaddq_s16(v236, v239);
+    int16x8_t v241 = vaddq_s16(v196, v148);
+    int16x8_t v242 = vaddq_s16(v149, v198);
+    int16x8_t v243 = vaddq_s16(v241, v242);
+    int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573);
+    int16x8_t v244 = vaddq_s16(v244_tmp, v243);
+    int16x8_t v245 = vaddq_s16(v204, v152);
+    int16x8_t v246 = vaddq_s16(v153, v206);
+    int16x8_t v247 = vaddq_s16(v245, v246);
+    int16x8_t v248 = vaddq_s16(v247, v243);
+    int16x8_t v249 = vaddq_s16(v244, v248);
+    int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734);
+    int16x8_t v251 = vaddq_s16(v240, v250);
+    int16x8_t v252 = vaddq_s16(v199, v159);
+    int16x8_t v253 = vaddq_s16(v160, v183);
+    int16x8_t v254 = vaddq_s16(v252, v253);
+    int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573);
+    int16x8_t v255 = vaddq_s16(v255_tmp, v254);
+    int16x8_t v256 = vaddq_s16(v207, v163);
+    int16x8_t v257 = vaddq_s16(v164, v187);
+    int16x8_t v258 = vaddq_s16(v256, v257);
+    int16x8_t v259 = vaddq_s16(v191, v166);
+    int16x8_t v260 = vaddq_s16(v167, v195);
+    int16x8_t v261 = vaddq_s16(v259, v260);
+    int16x8_t v262 = vaddq_s16(v258, v261);
+    int16x8_t v263 = vaddq_s16(v255, v262);
+    int16x8_t v264 = vaddq_s16(v261, v254);
+    int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573);
+    int16x8_t v265 = vaddq_s16(v265_tmp, v264);
+    int16x8_t v266 = vaddq_s16(v223, v173);
+    int16x8_t v267 = vaddq_s16(v174, v203);
+    int16x8_t v268 = vaddq_s16(v266, v267);
+    int16x8_t v269 = vaddq_s16(v268, v258);
+    int16x8_t v270 = vaddq_s16(v269, v264);
+    int16x8_t v271 = vaddq_s16(v265, v270);
+    int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734);
+    int16x8_t v273 = vaddq_s16(v263, v272);
+    int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705);
+    int16x8_t v275 = vaddq_s16(v251, v274);
+    int16x8_t v276 = vaddq_s16(v253, v235);
+    int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573);
+    int16x8_t v277 = vaddq_s16(v277_tmp, v276);
+    int16x8_t v278 = vaddq_s16(v257, v237);
+    int16x8_t v279 = vaddq_s16(v238, v259);
+    int16x8_t v280 = vaddq_s16(v278, v279);
+    int16x8_t v281 = vaddq_s16(v277, v280);
+    int16x8_t v282 = vaddq_s16(v260, v241);
+    int16x8_t v283 = vaddq_s16(v242, v252);
+    int16x8_t v284 = vaddq_s16(v282, v283);
+    int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573);
+    int16x8_t v285 = vaddq_s16(v285_tmp, v284);
+    int16x8_t v286 = vaddq_s16(v267, v245);
+    int16x8_t v287 = vaddq_s16(v246, v256);
+    int16x8_t v288 = vaddq_s16(v286, v287);
+    int16x8_t v289 = vaddq_s16(v288, v284);
+    int16x8_t v290 = vaddq_s16(v285, v289);
+    int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734);
+    int16x8_t v292 = vaddq_s16(v281, v291);
+    int16x8_t v293 = vaddq_s16(v283, v276);
+    int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573);
+    int16x8_t v294 = vaddq_s16(v294_tmp, v293);
+    int16x8_t v295 = vaddq_s16(v287, v278);
+    int16x8_t v296 = vaddq_s16(v279, v282);
+    int16x8_t v297 = vaddq_s16(v295, v296);
+    int16x8_t v298 = vaddq_s16(v294, v297);
+    int16x8_t v299 = vaddq_s16(v296, v293);
+    int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573);
+    int16x8_t v300 = vaddq_s16(v300_tmp, v299);
+    int16x8_t v301 = vld1q_s16(in + in_stride * 126 + i);
+    int16x8_t v302 = vaddq_s16(v301, v222);
+    int16x8_t v303 = vaddq_s16(v302, v266);
+    int16x8_t v304 = vaddq_s16(v303, v286);
+    int16x8_t v305 = vaddq_s16(v304, v295);
+    int16x8_t v306 = vaddq_s16(v305, v299);
+    int16x8_t v307 = vaddq_s16(v300, v306);
+    int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734);
+    int16x8_t v309 = vaddq_s16(v298, v308);
+    int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705);
+    int16x8_t v311 = vaddq_s16(v292, v310);
+    int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463);
+    int16x8_t v313 = vaddq_s16(v275, v312);
+    int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404);
+    int16x8_t v315 = vaddq_s16(v234, v314);
+    int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389);
+    int16x8_t v317 = vaddq_s16(v141, v316);
+    int16x8_t v318 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573);
+    int16x8_t v319 = vaddq_s16(v319_tmp, v318);
+    int16x8_t v320 = vld1q_s16(in + in_stride * 65 + i);
+    int16x8_t v321 = vld1q_s16(in + in_stride * 63 + i);
+    int16x8_t v322 = vaddq_s16(v320, v321);
+    int16x8_t v323 = vaddq_s16(v319, v322);
+    int16x8_t v324 = vld1q_s16(in + in_stride * 33 + i);
+    int16x8_t v325 = vld1q_s16(in + in_stride * 31 + i);
+    int16x8_t v326 = vaddq_s16(v324, v325);
+    int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573);
+    int16x8_t v327 = vaddq_s16(v327_tmp, v326);
+    int16x8_t v328 = vld1q_s16(in + in_stride * 97 + i);
+    int16x8_t v329 = vld1q_s16(in + in_stride * 95 + i);
+    int16x8_t v330 = vaddq_s16(v328, v329);
+    int16x8_t v331 = vaddq_s16(v330, v326);
+    int16x8_t v332 = vaddq_s16(v327, v331);
+    int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734);
+    int16x8_t v334 = vaddq_s16(v323, v333);
+    int16x8_t v335 = vld1q_s16(in + in_stride * 17 + i);
+    int16x8_t v336 = vld1q_s16(in + in_stride * 15 + i);
+    int16x8_t v337 = vaddq_s16(v335, v336);
+    int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573);
+    int16x8_t v338 = vaddq_s16(v338_tmp, v337);
+    int16x8_t v339 = vld1q_s16(in + in_stride * 81 + i);
+    int16x8_t v340 = vld1q_s16(in + in_stride * 79 + i);
+    int16x8_t v341 = vaddq_s16(v339, v340);
+    int16x8_t v342 = vld1q_s16(in + in_stride * 49 + i);
+    int16x8_t v343 = vld1q_s16(in + in_stride * 47 + i);
+    int16x8_t v344 = vaddq_s16(v342, v343);
+    int16x8_t v345 = vaddq_s16(v341, v344);
+    int16x8_t v346 = vaddq_s16(v338, v345);
+    int16x8_t v347 = vaddq_s16(v344, v337);
+    int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573);
+    int16x8_t v348 = vaddq_s16(v348_tmp, v347);
+    int16x8_t v349 = vld1q_s16(in + in_stride * 113 + i);
+    int16x8_t v350 = vld1q_s16(in + in_stride * 111 + i);
+    int16x8_t v351 = vaddq_s16(v349, v350);
+    int16x8_t v352 = vaddq_s16(v351, v341);
+    int16x8_t v353 = vaddq_s16(v352, v347);
+    int16x8_t v354 = vaddq_s16(v348, v353);
+    int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734);
+    int16x8_t v356 = vaddq_s16(v346, v355);
+    int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705);
+    int16x8_t v358 = vaddq_s16(v334, v357);
+    int16x8_t v359 = vld1q_s16(in + in_stride * 9 + i);
+    int16x8_t v360 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v361 = vaddq_s16(v359, v360);
+    int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573);
+    int16x8_t v362 = vaddq_s16(v362_tmp, v361);
+    int16x8_t v363 = vld1q_s16(in + in_stride * 73 + i);
+    int16x8_t v364 = vld1q_s16(in + in_stride * 71 + i);
+    int16x8_t v365 = vaddq_s16(v363, v364);
+    int16x8_t v366 = vld1q_s16(in + in_stride * 57 + i);
+    int16x8_t v367 = vld1q_s16(in + in_stride * 55 + i);
+    int16x8_t v368 = vaddq_s16(v366, v367);
+    int16x8_t v369 = vaddq_s16(v365, v368);
+    int16x8_t v370 = vaddq_s16(v362, v369);
+    int16x8_t v371 = vld1q_s16(in + in_stride * 41 + i);
+    int16x8_t v372 = vld1q_s16(in + in_stride * 39 + i);
+    int16x8_t v373 = vaddq_s16(v371, v372);
+    int16x8_t v374 = vld1q_s16(in + in_stride * 25 + i);
+    int16x8_t v375 = vld1q_s16(in + in_stride * 23 + i);
+    int16x8_t v376 = vaddq_s16(v374, v375);
+    int16x8_t v377 = vaddq_s16(v373, v376);
+    int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573);
+    int16x8_t v378 = vaddq_s16(v378_tmp, v377);
+    int16x8_t v379 = vld1q_s16(in + in_stride * 105 + i);
+    int16x8_t v380 = vld1q_s16(in + in_stride * 103 + i);
+    int16x8_t v381 = vaddq_s16(v379, v380);
+    int16x8_t v382 = vld1q_s16(in + in_stride * 89 + i);
+    int16x8_t v383 = vld1q_s16(in + in_stride * 87 + i);
+    int16x8_t v384 = vaddq_s16(v382, v383);
+    int16x8_t v385 = vaddq_s16(v381, v384);
+    int16x8_t v386 = vaddq_s16(v385, v377);
+    int16x8_t v387 = vaddq_s16(v378, v386);
+    int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734);
+    int16x8_t v389 = vaddq_s16(v370, v388);
+    int16x8_t v390 = vaddq_s16(v376, v361);
+    int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573);
+    int16x8_t v391 = vaddq_s16(v391_tmp, v390);
+    int16x8_t v392 = vaddq_s16(v384, v365);
+    int16x8_t v393 = vaddq_s16(v368, v373);
+    int16x8_t v394 = vaddq_s16(v392, v393);
+    int16x8_t v395 = vaddq_s16(v391, v394);
+    int16x8_t v396 = vaddq_s16(v393, v390);
+    int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573);
+    int16x8_t v397 = vaddq_s16(v397_tmp, v396);
+    int16x8_t v398 = vld1q_s16(in + in_stride * 121 + i);
+    int16x8_t v399 = vld1q_s16(in + in_stride * 119 + i);
+    int16x8_t v400 = vaddq_s16(v398, v399);
+    int16x8_t v401 = vaddq_s16(v400, v381);
+    int16x8_t v402 = vaddq_s16(v401, v392);
+    int16x8_t v403 = vaddq_s16(v402, v396);
+    int16x8_t v404 = vaddq_s16(v397, v403);
+    int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734);
+    int16x8_t v406 = vaddq_s16(v395, v405);
+    int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705);
+    int16x8_t v408 = vaddq_s16(v389, v407);
+    int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463);
+    int16x8_t v410 = vaddq_s16(v358, v409);
+    int16x8_t v411 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v412 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v413 = vaddq_s16(v411, v412);
+    int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573);
+    int16x8_t v414 = vaddq_s16(v414_tmp, v413);
+    int16x8_t v415 = vld1q_s16(in + in_stride * 69 + i);
+    int16x8_t v416 = vld1q_s16(in + in_stride * 67 + i);
+    int16x8_t v417 = vaddq_s16(v415, v416);
+    int16x8_t v418 = vld1q_s16(in + in_stride * 61 + i);
+    int16x8_t v419 = vld1q_s16(in + in_stride * 59 + i);
+    int16x8_t v420 = vaddq_s16(v418, v419);
+    int16x8_t v421 = vaddq_s16(v417, v420);
+    int16x8_t v422 = vaddq_s16(v414, v421);
+    int16x8_t v423 = vld1q_s16(in + in_stride * 37 + i);
+    int16x8_t v424 = vld1q_s16(in + in_stride * 35 + i);
+    int16x8_t v425 = vaddq_s16(v423, v424);
+    int16x8_t v426 = vld1q_s16(in + in_stride * 29 + i);
+    int16x8_t v427 = vld1q_s16(in + in_stride * 27 + i);
+    int16x8_t v428 = vaddq_s16(v426, v427);
+    int16x8_t v429 = vaddq_s16(v425, v428);
+    int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573);
+    int16x8_t v430 = vaddq_s16(v430_tmp, v429);
+    int16x8_t v431 = vld1q_s16(in + in_stride * 101 + i);
+    int16x8_t v432 = vld1q_s16(in + in_stride * 99 + i);
+    int16x8_t v433 = vaddq_s16(v431, v432);
+    int16x8_t v434 = vld1q_s16(in + in_stride * 93 + i);
+    int16x8_t v435 = vld1q_s16(in + in_stride * 91 + i);
+    int16x8_t v436 = vaddq_s16(v434, v435);
+    int16x8_t v437 = vaddq_s16(v433, v436);
+    int16x8_t v438 = vaddq_s16(v437, v429);
+    int16x8_t v439 = vaddq_s16(v430, v438);
+    int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734);
+    int16x8_t v441 = vaddq_s16(v422, v440);
+    int16x8_t v442 = vld1q_s16(in + in_stride * 21 + i);
+    int16x8_t v443 = vld1q_s16(in + in_stride * 19 + i);
+    int16x8_t v444 = vaddq_s16(v442, v443);
+    int16x8_t v445 = vld1q_s16(in + in_stride * 13 + i);
+    int16x8_t v446 = vld1q_s16(in + in_stride * 11 + i);
+    int16x8_t v447 = vaddq_s16(v445, v446);
+    int16x8_t v448 = vaddq_s16(v444, v447);
+    int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573);
+    int16x8_t v449 = vaddq_s16(v449_tmp, v448);
+    int16x8_t v450 = vld1q_s16(in + in_stride * 85 + i);
+    int16x8_t v451 = vld1q_s16(in + in_stride * 83 + i);
+    int16x8_t v452 = vaddq_s16(v450, v451);
+    int16x8_t v453 = vld1q_s16(in + in_stride * 77 + i);
+    int16x8_t v454 = vld1q_s16(in + in_stride * 75 + i);
+    int16x8_t v455 = vaddq_s16(v453, v454);
+    int16x8_t v456 = vaddq_s16(v452, v455);
+    int16x8_t v457 = vld1q_s16(in + in_stride * 53 + i);
+    int16x8_t v458 = vld1q_s16(in + in_stride * 51 + i);
+    int16x8_t v459 = vaddq_s16(v457, v458);
+    int16x8_t v460 = vld1q_s16(in + in_stride * 45 + i);
+    int16x8_t v461 = vld1q_s16(in + in_stride * 43 + i);
+    int16x8_t v462 = vaddq_s16(v460, v461);
+    int16x8_t v463 = vaddq_s16(v459, v462);
+    int16x8_t v464 = vaddq_s16(v456, v463);
+    int16x8_t v465 = vaddq_s16(v449, v464);
+    int16x8_t v466 = vaddq_s16(v463, v448);
+    int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573);
+    int16x8_t v467 = vaddq_s16(v467_tmp, v466);
+    int16x8_t v468 = vld1q_s16(in + in_stride * 117 + i);
+    int16x8_t v469 = vld1q_s16(in + in_stride * 115 + i);
+    int16x8_t v470 = vaddq_s16(v468, v469);
+    int16x8_t v471 = vld1q_s16(in + in_stride * 109 + i);
+    int16x8_t v472 = vld1q_s16(in + in_stride * 107 + i);
+    int16x8_t v473 = vaddq_s16(v471, v472);
+    int16x8_t v474 = vaddq_s16(v470, v473);
+    int16x8_t v475 = vaddq_s16(v474, v456);
+    int16x8_t v476 = vaddq_s16(v475, v466);
+    int16x8_t v477 = vaddq_s16(v467, v476);
+    int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734);
+    int16x8_t v479 = vaddq_s16(v465, v478);
+    int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705);
+    int16x8_t v481 = vaddq_s16(v441, v480);
+    int16x8_t v482 = vaddq_s16(v447, v413);
+    int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573);
+    int16x8_t v483 = vaddq_s16(v483_tmp, v482);
+    int16x8_t v484 = vaddq_s16(v455, v417);
+    int16x8_t v485 = vaddq_s16(v420, v459);
+    int16x8_t v486 = vaddq_s16(v484, v485);
+    int16x8_t v487 = vaddq_s16(v483, v486);
+    int16x8_t v488 = vaddq_s16(v462, v425);
+    int16x8_t v489 = vaddq_s16(v428, v444);
+    int16x8_t v490 = vaddq_s16(v488, v489);
+    int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573);
+    int16x8_t v491 = vaddq_s16(v491_tmp, v490);
+    int16x8_t v492 = vaddq_s16(v473, v433);
+    int16x8_t v493 = vaddq_s16(v436, v452);
+    int16x8_t v494 = vaddq_s16(v492, v493);
+    int16x8_t v495 = vaddq_s16(v494, v490);
+    int16x8_t v496 = vaddq_s16(v491, v495);
+    int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734);
+    int16x8_t v498 = vaddq_s16(v487, v497);
+    int16x8_t v499 = vaddq_s16(v489, v482);
+    int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573);
+    int16x8_t v500 = vaddq_s16(v500_tmp, v499);
+    int16x8_t v501 = vaddq_s16(v493, v484);
+    int16x8_t v502 = vaddq_s16(v485, v488);
+    int16x8_t v503 = vaddq_s16(v501, v502);
+    int16x8_t v504 = vaddq_s16(v500, v503);
+    int16x8_t v505 = vaddq_s16(v502, v499);
+    int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573);
+    int16x8_t v506 = vaddq_s16(v506_tmp, v505);
+    int16x8_t v507 = vld1q_s16(in + in_stride * 125 + i);
+    int16x8_t v508 = vld1q_s16(in + in_stride * 123 + i);
+    int16x8_t v509 = vaddq_s16(v507, v508);
+    int16x8_t v510 = vaddq_s16(v509, v470);
+    int16x8_t v511 = vaddq_s16(v510, v492);
+    int16x8_t v512 = vaddq_s16(v511, v501);
+    int16x8_t v513 = vaddq_s16(v512, v505);
+    int16x8_t v514 = vaddq_s16(v506, v513);
+    int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734);
+    int16x8_t v516 = vaddq_s16(v504, v515);
+    int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705);
+    int16x8_t v518 = vaddq_s16(v498, v517);
+    int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463);
+    int16x8_t v520 = vaddq_s16(v481, v519);
+    int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404);
+    int16x8_t v522 = vaddq_s16(v410, v521);
+    int16x8_t v523 = vaddq_s16(v412, v318);
+    int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573);
+    int16x8_t v524 = vaddq_s16(v524_tmp, v523);
+    int16x8_t v525 = vaddq_s16(v416, v320);
+    int16x8_t v526 = vaddq_s16(v321, v418);
+    int16x8_t v527 = vaddq_s16(v525, v526);
+    int16x8_t v528 = vaddq_s16(v524, v527);
+    int16x8_t v529 = vaddq_s16(v424, v324);
+    int16x8_t v530 = vaddq_s16(v325, v426);
+    int16x8_t v531 = vaddq_s16(v529, v530);
+    int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573);
+    int16x8_t v532 = vaddq_s16(v532_tmp, v531);
+    int16x8_t v533 = vaddq_s16(v432, v328);
+    int16x8_t v534 = vaddq_s16(v329, v434);
+    int16x8_t v535 = vaddq_s16(v533, v534);
+    int16x8_t v536 = vaddq_s16(v535, v531);
+    int16x8_t v537 = vaddq_s16(v532, v536);
+    int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734);
+    int16x8_t v539 = vaddq_s16(v528, v538);
+    int16x8_t v540 = vaddq_s16(v443, v335);
+    int16x8_t v541 = vaddq_s16(v336, v445);
+    int16x8_t v542 = vaddq_s16(v540, v541);
+    int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573);
+    int16x8_t v543 = vaddq_s16(v543_tmp, v542);
+    int16x8_t v544 = vaddq_s16(v451, v339);
+    int16x8_t v545 = vaddq_s16(v340, v453);
+    int16x8_t v546 = vaddq_s16(v544, v545);
+    int16x8_t v547 = vaddq_s16(v458, v342);
+    int16x8_t v548 = vaddq_s16(v343, v460);
+    int16x8_t v549 = vaddq_s16(v547, v548);
+    int16x8_t v550 = vaddq_s16(v546, v549);
+    int16x8_t v551 = vaddq_s16(v543, v550);
+    int16x8_t v552 = vaddq_s16(v549, v542);
+    int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573);
+    int16x8_t v553 = vaddq_s16(v553_tmp, v552);
+    int16x8_t v554 = vaddq_s16(v469, v349);
+    int16x8_t v555 = vaddq_s16(v350, v471);
+    int16x8_t v556 = vaddq_s16(v554, v555);
+    int16x8_t v557 = vaddq_s16(v556, v546);
+    int16x8_t v558 = vaddq_s16(v557, v552);
+    int16x8_t v559 = vaddq_s16(v553, v558);
+    int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734);
+    int16x8_t v561 = vaddq_s16(v551, v560);
+    int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705);
+    int16x8_t v563 = vaddq_s16(v539, v562);
+    int16x8_t v564 = vaddq_s16(v446, v359);
+    int16x8_t v565 = vaddq_s16(v360, v411);
+    int16x8_t v566 = vaddq_s16(v564, v565);
+    int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573);
+    int16x8_t v567 = vaddq_s16(v567_tmp, v566);
+    int16x8_t v568 = vaddq_s16(v454, v363);
+    int16x8_t v569 = vaddq_s16(v364, v415);
+    int16x8_t v570 = vaddq_s16(v568, v569);
+    int16x8_t v571 = vaddq_s16(v419, v366);
+    int16x8_t v572 = vaddq_s16(v367, v457);
+    int16x8_t v573 = vaddq_s16(v571, v572);
+    int16x8_t v574 = vaddq_s16(v570, v573);
+    int16x8_t v575 = vaddq_s16(v567, v574);
+    int16x8_t v576 = vaddq_s16(v461, v371);
+    int16x8_t v577 = vaddq_s16(v372, v423);
+    int16x8_t v578 = vaddq_s16(v576, v577);
+    int16x8_t v579 = vaddq_s16(v427, v374);
+    int16x8_t v580 = vaddq_s16(v375, v442);
+    int16x8_t v581 = vaddq_s16(v579, v580);
+    int16x8_t v582 = vaddq_s16(v578, v581);
+    int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573);
+    int16x8_t v583 = vaddq_s16(v583_tmp, v582);
+    int16x8_t v584 = vaddq_s16(v472, v379);
+    int16x8_t v585 = vaddq_s16(v380, v431);
+    int16x8_t v586 = vaddq_s16(v584, v585);
+    int16x8_t v587 = vaddq_s16(v435, v382);
+    int16x8_t v588 = vaddq_s16(v383, v450);
+    int16x8_t v589 = vaddq_s16(v587, v588);
+    int16x8_t v590 = vaddq_s16(v586, v589);
+    int16x8_t v591 = vaddq_s16(v590, v582);
+    int16x8_t v592 = vaddq_s16(v583, v591);
+    int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734);
+    int16x8_t v594 = vaddq_s16(v575, v593);
+    int16x8_t v595 = vaddq_s16(v581, v566);
+    int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573);
+    int16x8_t v596 = vaddq_s16(v596_tmp, v595);
+    int16x8_t v597 = vaddq_s16(v589, v570);
+    int16x8_t v598 = vaddq_s16(v573, v578);
+    int16x8_t v599 = vaddq_s16(v597, v598);
+    int16x8_t v600 = vaddq_s16(v596, v599);
+    int16x8_t v601 = vaddq_s16(v598, v595);
+    int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573);
+    int16x8_t v602 = vaddq_s16(v602_tmp, v601);
+    int16x8_t v603 = vaddq_s16(v508, v398);
+    int16x8_t v604 = vaddq_s16(v399, v468);
+    int16x8_t v605 = vaddq_s16(v603, v604);
+    int16x8_t v606 = vaddq_s16(v605, v586);
+    int16x8_t v607 = vaddq_s16(v606, v597);
+    int16x8_t v608 = vaddq_s16(v607, v601);
+    int16x8_t v609 = vaddq_s16(v602, v608);
+    int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734);
+    int16x8_t v611 = vaddq_s16(v600, v610);
+    int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705);
+    int16x8_t v613 = vaddq_s16(v594, v612);
+    int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463);
+    int16x8_t v615 = vaddq_s16(v563, v614);
+    int16x8_t v616 = vaddq_s16(v565, v523);
+    int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573);
+    int16x8_t v617 = vaddq_s16(v617_tmp, v616);
+    int16x8_t v618 = vaddq_s16(v569, v525);
+    int16x8_t v619 = vaddq_s16(v526, v571);
+    int16x8_t v620 = vaddq_s16(v618, v619);
+    int16x8_t v621 = vaddq_s16(v617, v620);
+    int16x8_t v622 = vaddq_s16(v577, v529);
+    int16x8_t v623 = vaddq_s16(v530, v579);
+    int16x8_t v624 = vaddq_s16(v622, v623);
+    int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573);
+    int16x8_t v625 = vaddq_s16(v625_tmp, v624);
+    int16x8_t v626 = vaddq_s16(v585, v533);
+    int16x8_t v627 = vaddq_s16(v534, v587);
+    int16x8_t v628 = vaddq_s16(v626, v627);
+    int16x8_t v629 = vaddq_s16(v628, v624);
+    int16x8_t v630 = vaddq_s16(v625, v629);
+    int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734);
+    int16x8_t v632 = vaddq_s16(v621, v631);
+    int16x8_t v633 = vaddq_s16(v580, v540);
+    int16x8_t v634 = vaddq_s16(v541, v564);
+    int16x8_t v635 = vaddq_s16(v633, v634);
+    int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573);
+    int16x8_t v636 = vaddq_s16(v636_tmp, v635);
+    int16x8_t v637 = vaddq_s16(v588, v544);
+    int16x8_t v638 = vaddq_s16(v545, v568);
+    int16x8_t v639 = vaddq_s16(v637, v638);
+    int16x8_t v640 = vaddq_s16(v572, v547);
+    int16x8_t v641 = vaddq_s16(v548, v576);
+    int16x8_t v642 = vaddq_s16(v640, v641);
+    int16x8_t v643 = vaddq_s16(v639, v642);
+    int16x8_t v644 = vaddq_s16(v636, v643);
+    int16x8_t v645 = vaddq_s16(v642, v635);
+    int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573);
+    int16x8_t v646 = vaddq_s16(v646_tmp, v645);
+    int16x8_t v647 = vaddq_s16(v604, v554);
+    int16x8_t v648 = vaddq_s16(v555, v584);
+    int16x8_t v649 = vaddq_s16(v647, v648);
+    int16x8_t v650 = vaddq_s16(v649, v639);
+    int16x8_t v651 = vaddq_s16(v650, v645);
+    int16x8_t v652 = vaddq_s16(v646, v651);
+    int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734);
+    int16x8_t v654 = vaddq_s16(v644, v653);
+    int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705);
+    int16x8_t v656 = vaddq_s16(v632, v655);
+    int16x8_t v657 = vaddq_s16(v634, v616);
+    int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573);
+    int16x8_t v658 = vaddq_s16(v658_tmp, v657);
+    int16x8_t v659 = vaddq_s16(v638, v618);
+    int16x8_t v660 = vaddq_s16(v619, v640);
+    int16x8_t v661 = vaddq_s16(v659, v660);
+    int16x8_t v662 = vaddq_s16(v658, v661);
+    int16x8_t v663 = vaddq_s16(v641, v622);
+    int16x8_t v664 = vaddq_s16(v623, v633);
+    int16x8_t v665 = vaddq_s16(v663, v664);
+    int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573);
+    int16x8_t v666 = vaddq_s16(v666_tmp, v665);
+    int16x8_t v667 = vaddq_s16(v648, v626);
+    int16x8_t v668 = vaddq_s16(v627, v637);
+    int16x8_t v669 = vaddq_s16(v667, v668);
+    int16x8_t v670 = vaddq_s16(v669, v665);
+    int16x8_t v671 = vaddq_s16(v666, v670);
+    int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734);
+    int16x8_t v673 = vaddq_s16(v662, v672);
+    int16x8_t v674 = vaddq_s16(v664, v657);
+    int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573);
+    int16x8_t v675 = vaddq_s16(v675_tmp, v674);
+    int16x8_t v676 = vaddq_s16(v668, v659);
+    int16x8_t v677 = vaddq_s16(v660, v663);
+    int16x8_t v678 = vaddq_s16(v676, v677);
+    int16x8_t v679 = vaddq_s16(v675, v678);
+    int16x8_t v680 = vaddq_s16(v677, v674);
+    int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573);
+    int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+    int16x8_t v682 = vld1q_s16(in + in_stride * 127 + i);
+    int16x8_t v683 = vaddq_s16(v682, v507);
+    int16x8_t v684 = vaddq_s16(v683, v603);
+    int16x8_t v685 = vaddq_s16(v684, v647);
+    int16x8_t v686 = vaddq_s16(v685, v667);
+    int16x8_t v687 = vaddq_s16(v686, v676);
+    int16x8_t v688 = vaddq_s16(v687, v680);
+    int16x8_t v689 = vaddq_s16(v681, v688);
+    int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734);
+    int16x8_t v691 = vaddq_s16(v679, v690);
+    int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705);
+    int16x8_t v693 = vaddq_s16(v673, v692);
+    int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463);
+    int16x8_t v695 = vaddq_s16(v656, v694);
+    int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404);
+    int16x8_t v697 = vaddq_s16(v615, v696);
+    int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389);
+    int16x8_t v699 = vaddq_s16(v522, v698);
+    int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385);
+    int16x8_t v701 = vaddq_s16(v317, v700);
+    int16x8_t v702 = vsubq_s16(v0, v1);
+    int16x8_t v703 = vsubq_s16(v4, v6);
+    int16x8_t v704_tmp = vqrdmulhq_n_s16(v703, 10045);
+    int16x8_t v704 = vaddq_s16(v704_tmp, v703);
+    int16x8_t v705 = vaddq_s16(v702, v704);
+    int16x8_t v706 = vsubq_s16(v11, v14);
+    int16x8_t v707 = vsubq_s16(v17, v20);
+    int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 10045);
+    int16x8_t v708 = vaddq_s16(v708_tmp, v707);
+    int16x8_t v709 = vaddq_s16(v706, v708);
+    int16x8_t v710 = vqrdmulhq_n_s16(v709, 19705);
+    int16x8_t v711 = vaddq_s16(v705, v710);
+    int16x8_t v712 = vsubq_s16(v27, v30);
+    int16x8_t v713 = vsubq_s16(v35, v39);
+    int16x8_t v714_tmp = vqrdmulhq_n_s16(v713, 10045);
+    int16x8_t v714 = vaddq_s16(v714_tmp, v713);
+    int16x8_t v715 = vaddq_s16(v712, v714);
+    int16x8_t v716 = vsubq_s16(v44, v47);
+    int16x8_t v717 = vsubq_s16(v50, v54);
+    int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 10045);
+    int16x8_t v718 = vaddq_s16(v718_tmp, v717);
+    int16x8_t v719 = vaddq_s16(v716, v718);
+    int16x8_t v720 = vqrdmulhq_n_s16(v719, 19705);
+    int16x8_t v721 = vaddq_s16(v715, v720);
+    int16x8_t v722 = vqrdmulhq_n_s16(v721, 17121);
+    int16x8_t v723 = vaddq_s16(v711, v722);
+    int16x8_t v724 = vsubq_s16(v63, v66);
+    int16x8_t v725 = vsubq_s16(v71, v75);
+    int16x8_t v726_tmp = vqrdmulhq_n_s16(v725, 10045);
+    int16x8_t v726 = vaddq_s16(v726_tmp, v725);
+    int16x8_t v727 = vaddq_s16(v724, v726);
+    int16x8_t v728 = vsubq_s16(v82, v89);
+    int16x8_t v729 = vsubq_s16(v92, v97);
+    int16x8_t v730_tmp = vqrdmulhq_n_s16(v729, 10045);
+    int16x8_t v730 = vaddq_s16(v730_tmp, v729);
+    int16x8_t v731 = vaddq_s16(v728, v730);
+    int16x8_t v732 = vqrdmulhq_n_s16(v731, 19705);
+    int16x8_t v733 = vaddq_s16(v727, v732);
+    int16x8_t v734 = vsubq_s16(v104, v107);
+    int16x8_t v735 = vsubq_s16(v112, v116);
+    int16x8_t v736_tmp = vqrdmulhq_n_s16(v735, 10045);
+    int16x8_t v736 = vaddq_s16(v736_tmp, v735);
+    int16x8_t v737 = vaddq_s16(v734, v736);
+    int16x8_t v738 = vsubq_s16(v121, v124);
+    int16x8_t v739 = vsubq_s16(v127, v132);
+    int16x8_t v740_tmp = vqrdmulhq_n_s16(v739, 10045);
+    int16x8_t v740 = vaddq_s16(v740_tmp, v739);
+    int16x8_t v741 = vaddq_s16(v738, v740);
+    int16x8_t v742 = vqrdmulhq_n_s16(v741, 19705);
+    int16x8_t v743 = vaddq_s16(v737, v742);
+    int16x8_t v744 = vqrdmulhq_n_s16(v743, 17121);
+    int16x8_t v745 = vaddq_s16(v733, v744);
+    int16x8_t v746 = vqrdmulhq_n_s16(v745, 16563);
+    int16x8_t v747 = vaddq_s16(v723, v746);
+    int16x8_t v748 = vsubq_s16(v143, v146);
+    int16x8_t v749 = vsubq_s16(v151, v155);
+    int16x8_t v750_tmp = vqrdmulhq_n_s16(v749, 10045);
+    int16x8_t v750 = vaddq_s16(v750_tmp, v749);
+    int16x8_t v751 = vaddq_s16(v748, v750);
+    int16x8_t v752 = vsubq_s16(v162, v169);
+    int16x8_t v753 = vqrdmulhq_n_s16(v752, 19705);
+    int16x8_t v754 = vsubq_s16(v172, v177);
+    int16x8_t v755 = vqrdmulhq_n_s16(v754, 25746);
+    int16x8_t v756 = vaddq_s16(v753, v755);
+    int16x8_t v757 = vaddq_s16(v751, v756);
+    int16x8_t v758 = vsubq_s16(v186, v193);
+    int16x8_t v759 = vsubq_s16(v202, v210);
+    int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 10045);
+    int16x8_t v760 = vaddq_s16(v760_tmp, v759);
+    int16x8_t v761 = vaddq_s16(v758, v760);
+    int16x8_t v762 = vsubq_s16(v215, v218);
+    int16x8_t v763 = vsubq_s16(v221, v227);
+    int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 10045);
+    int16x8_t v764 = vaddq_s16(v764_tmp, v763);
+    int16x8_t v765 = vaddq_s16(v762, v764);
+    int16x8_t v766 = vqrdmulhq_n_s16(v765, 19705);
+    int16x8_t v767 = vaddq_s16(v761, v766);
+    int16x8_t v768 = vqrdmulhq_n_s16(v767, 17121);
+    int16x8_t v769 = vaddq_s16(v757, v768);
+    int16x8_t v770 = vsubq_s16(v236, v239);
+    int16x8_t v771 = vsubq_s16(v244, v248);
+    int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 10045);
+    int16x8_t v772 = vaddq_s16(v772_tmp, v771);
+    int16x8_t v773 = vaddq_s16(v770, v772);
+    int16x8_t v774 = vsubq_s16(v255, v262);
+    int16x8_t v775 = vsubq_s16(v265, v270);
+    int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 10045);
+    int16x8_t v776 = vaddq_s16(v776_tmp, v775);
+    int16x8_t v777 = vaddq_s16(v774, v776);
+    int16x8_t v778 = vqrdmulhq_n_s16(v777, 19705);
+    int16x8_t v779 = vaddq_s16(v773, v778);
+    int16x8_t v780 = vsubq_s16(v277, v280);
+    int16x8_t v781 = vsubq_s16(v285, v289);
+    int16x8_t v782_tmp = vqrdmulhq_n_s16(v781, 10045);
+    int16x8_t v782 = vaddq_s16(v782_tmp, v781);
+    int16x8_t v783 = vaddq_s16(v780, v782);
+    int16x8_t v784 = vsubq_s16(v294, v297);
+    int16x8_t v785 = vsubq_s16(v300, v306);
+    int16x8_t v786_tmp = vqrdmulhq_n_s16(v785, 10045);
+    int16x8_t v786 = vaddq_s16(v786_tmp, v785);
+    int16x8_t v787 = vaddq_s16(v784, v786);
+    int16x8_t v788 = vqrdmulhq_n_s16(v787, 19705);
+    int16x8_t v789 = vaddq_s16(v783, v788);
+    int16x8_t v790 = vqrdmulhq_n_s16(v789, 17121);
+    int16x8_t v791 = vaddq_s16(v779, v790);
+    int16x8_t v792 = vqrdmulhq_n_s16(v791, 16563);
+    int16x8_t v793 = vaddq_s16(v769, v792);
+    int16x8_t v794 = vqrdmulhq_n_s16(v793, 16429);
+    int16x8_t v795 = vaddq_s16(v747, v794);
+    int16x8_t v796 = vsubq_s16(v319, v322);
+    int16x8_t v797 = vsubq_s16(v327, v331);
+    int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 10045);
+    int16x8_t v798 = vaddq_s16(v798_tmp, v797);
+    int16x8_t v799 = vaddq_s16(v796, v798);
+    int16x8_t v800 = vsubq_s16(v338, v345);
+    int16x8_t v801 = vsubq_s16(v348, v353);
+    int16x8_t v802_tmp = vqrdmulhq_n_s16(v801, 10045);
+    int16x8_t v802 = vaddq_s16(v802_tmp, v801);
+    int16x8_t v803 = vaddq_s16(v800, v802);
+    int16x8_t v804 = vqrdmulhq_n_s16(v803, 19705);
+    int16x8_t v805 = vaddq_s16(v799, v804);
+    int16x8_t v806 = vsubq_s16(v362, v369);
+    int16x8_t v807 = vsubq_s16(v378, v386);
+    int16x8_t v808_tmp = vqrdmulhq_n_s16(v807, 10045);
+    int16x8_t v808 = vaddq_s16(v808_tmp, v807);
+    int16x8_t v809 = vaddq_s16(v806, v808);
+    int16x8_t v810 = vsubq_s16(v391, v394);
+    int16x8_t v811 = vsubq_s16(v397, v403);
+    int16x8_t v812_tmp = vqrdmulhq_n_s16(v811, 10045);
+    int16x8_t v812 = vaddq_s16(v812_tmp, v811);
+    int16x8_t v813 = vaddq_s16(v810, v812);
+    int16x8_t v814 = vqrdmulhq_n_s16(v813, 19705);
+    int16x8_t v815 = vaddq_s16(v809, v814);
+    int16x8_t v816 = vqrdmulhq_n_s16(v815, 17121);
+    int16x8_t v817 = vaddq_s16(v805, v816);
+    int16x8_t v818 = vsubq_s16(v414, v421);
+    int16x8_t v819 = vsubq_s16(v430, v438);
+    int16x8_t v820_tmp = vqrdmulhq_n_s16(v819, 10045);
+    int16x8_t v820 = vaddq_s16(v820_tmp, v819);
+    int16x8_t v821 = vaddq_s16(v818, v820);
+    int16x8_t v822 = vsubq_s16(v449, v464);
+    int16x8_t v823 = vsubq_s16(v467, v476);
+    int16x8_t v824_tmp = vqrdmulhq_n_s16(v823, 10045);
+    int16x8_t v824 = vaddq_s16(v824_tmp, v823);
+    int16x8_t v825 = vaddq_s16(v822, v824);
+    int16x8_t v826 = vqrdmulhq_n_s16(v825, 19705);
+    int16x8_t v827 = vaddq_s16(v821, v826);
+    int16x8_t v828 = vsubq_s16(v483, v486);
+    int16x8_t v829 = vsubq_s16(v491, v495);
+    int16x8_t v830_tmp = vqrdmulhq_n_s16(v829, 10045);
+    int16x8_t v830 = vaddq_s16(v830_tmp, v829);
+    int16x8_t v831 = vaddq_s16(v828, v830);
+    int16x8_t v832 = vsubq_s16(v500, v503);
+    int16x8_t v833 = vsubq_s16(v506, v513);
+    int16x8_t v834_tmp = vqrdmulhq_n_s16(v833, 10045);
+    int16x8_t v834 = vaddq_s16(v834_tmp, v833);
+    int16x8_t v835 = vaddq_s16(v832, v834);
+    int16x8_t v836 = vqrdmulhq_n_s16(v835, 19705);
+    int16x8_t v837 = vaddq_s16(v831, v836);
+    int16x8_t v838 = vqrdmulhq_n_s16(v837, 17121);
+    int16x8_t v839 = vaddq_s16(v827, v838);
+    int16x8_t v840 = vqrdmulhq_n_s16(v839, 16563);
+    int16x8_t v841 = vaddq_s16(v817, v840);
+    int16x8_t v842 = vsubq_s16(v524, v527);
+    int16x8_t v843 = vsubq_s16(v532, v536);
+    int16x8_t v844_tmp = vqrdmulhq_n_s16(v843, 10045);
+    int16x8_t v844 = vaddq_s16(v844_tmp, v843);
+    int16x8_t v845 = vaddq_s16(v842, v844);
+    int16x8_t v846 = vsubq_s16(v543, v550);
+    int16x8_t v847 = vsubq_s16(v553, v558);
+    int16x8_t v848_tmp = vqrdmulhq_n_s16(v847, 10045);
+    int16x8_t v848 = vaddq_s16(v848_tmp, v847);
+    int16x8_t v849 = vaddq_s16(v846, v848);
+    int16x8_t v850 = vqrdmulhq_n_s16(v849, 19705);
+    int16x8_t v851 = vaddq_s16(v845, v850);
+    int16x8_t v852 = vsubq_s16(v567, v574);
+    int16x8_t v853 = vsubq_s16(v583, v591);
+    int16x8_t v854_tmp = vqrdmulhq_n_s16(v853, 10045);
+    int16x8_t v854 = vaddq_s16(v854_tmp, v853);
+    int16x8_t v855 = vaddq_s16(v852, v854);
+    int16x8_t v856 = vsubq_s16(v596, v599);
+    int16x8_t v857 = vsubq_s16(v602, v608);
+    int16x8_t v858_tmp = vqrdmulhq_n_s16(v857, 10045);
+    int16x8_t v858 = vaddq_s16(v858_tmp, v857);
+    int16x8_t v859 = vaddq_s16(v856, v858);
+    int16x8_t v860 = vqrdmulhq_n_s16(v859, 19705);
+    int16x8_t v861 = vaddq_s16(v855, v860);
+    int16x8_t v862 = vqrdmulhq_n_s16(v861, 17121);
+    int16x8_t v863 = vaddq_s16(v851, v862);
+    int16x8_t v864 = vsubq_s16(v617, v620);
+    int16x8_t v865 = vsubq_s16(v625, v629);
+    int16x8_t v866_tmp = vqrdmulhq_n_s16(v865, 10045);
+    int16x8_t v866 = vaddq_s16(v866_tmp, v865);
+    int16x8_t v867 = vaddq_s16(v864, v866);
+    int16x8_t v868 = vsubq_s16(v636, v643);
+    int16x8_t v869 = vsubq_s16(v646, v651);
+    int16x8_t v870_tmp = vqrdmulhq_n_s16(v869, 10045);
+    int16x8_t v870 = vaddq_s16(v870_tmp, v869);
+    int16x8_t v871 = vaddq_s16(v868, v870);
+    int16x8_t v872 = vqrdmulhq_n_s16(v871, 19705);
+    int16x8_t v873 = vaddq_s16(v867, v872);
+    int16x8_t v874 = vsubq_s16(v658, v661);
+    int16x8_t v875 = vsubq_s16(v666, v670);
+    int16x8_t v876_tmp = vqrdmulhq_n_s16(v875, 10045);
+    int16x8_t v876 = vaddq_s16(v876_tmp, v875);
+    int16x8_t v877 = vaddq_s16(v874, v876);
+    int16x8_t v878 = vsubq_s16(v675, v678);
+    int16x8_t v879 = vsubq_s16(v681, v688);
+    int16x8_t v880_tmp = vqrdmulhq_n_s16(v879, 10045);
+    int16x8_t v880 = vaddq_s16(v880_tmp, v879);
+    int16x8_t v881 = vaddq_s16(v878, v880);
+    int16x8_t v882 = vqrdmulhq_n_s16(v881, 19705);
+    int16x8_t v883 = vaddq_s16(v877, v882);
+    int16x8_t v884 = vqrdmulhq_n_s16(v883, 17121);
+    int16x8_t v885 = vaddq_s16(v873, v884);
+    int16x8_t v886 = vqrdmulhq_n_s16(v885, 16563);
+    int16x8_t v887 = vaddq_s16(v863, v886);
+    int16x8_t v888 = vqrdmulhq_n_s16(v887, 16429);
+    int16x8_t v889 = vaddq_s16(v841, v888);
+    int16x8_t v890 = vqrdmulhq_n_s16(v889, 16395);
+    int16x8_t v891 = vaddq_s16(v795, v890);
+    int16x8_t v892 = vsubq_s16(v702, v704);
+    int16x8_t v893 = vsubq_s16(v706, v708);
+    int16x8_t v894 = vqrdmulhq_n_s16(v893, 29490);
+    int16x8_t v895 = vaddq_s16(v892, v894);
+    int16x8_t v896 = vsubq_s16(v712, v714);
+    int16x8_t v897 = vsubq_s16(v716, v718);
+    int16x8_t v898 = vqrdmulhq_n_s16(v897, 29490);
+    int16x8_t v899 = vaddq_s16(v896, v898);
+    int16x8_t v900 = vqrdmulhq_n_s16(v899, 18578);
+    int16x8_t v901 = vaddq_s16(v895, v900);
+    int16x8_t v902 = vsubq_s16(v724, v726);
+    int16x8_t v903 = vsubq_s16(v728, v730);
+    int16x8_t v904 = vqrdmulhq_n_s16(v903, 29490);
+    int16x8_t v905 = vaddq_s16(v902, v904);
+    int16x8_t v906 = vsubq_s16(v734, v736);
+    int16x8_t v907 = vsubq_s16(v738, v740);
+    int16x8_t v908 = vqrdmulhq_n_s16(v907, 29490);
+    int16x8_t v909 = vaddq_s16(v906, v908);
+    int16x8_t v910 = vqrdmulhq_n_s16(v909, 18578);
+    int16x8_t v911 = vaddq_s16(v905, v910);
+    int16x8_t v912 = vqrdmulhq_n_s16(v911, 16890);
+    int16x8_t v913 = vaddq_s16(v901, v912);
+    int16x8_t v914 = vsubq_s16(v748, v750);
+    int16x8_t v915_tmp = vqrdmulhq_n_s16(v754, 10045);
+    int16x8_t v915 = vaddq_s16(v915_tmp, v754);
+    int16x8_t v916 = vsubq_s16(v752, v915);
+    int16x8_t v917 = vqrdmulhq_n_s16(v916, 29490);
+    int16x8_t v918 = vaddq_s16(v914, v917);
+    int16x8_t v919 = vsubq_s16(v758, v760);
+    int16x8_t v920 = vsubq_s16(v762, v764);
+    int16x8_t v921 = vqrdmulhq_n_s16(v920, 29490);
+    int16x8_t v922 = vaddq_s16(v919, v921);
+    int16x8_t v923 = vqrdmulhq_n_s16(v922, 18578);
+    int16x8_t v924 = vaddq_s16(v918, v923);
+    int16x8_t v925 = vsubq_s16(v770, v772);
+    int16x8_t v926 = vsubq_s16(v774, v776);
+    int16x8_t v927 = vqrdmulhq_n_s16(v926, 29490);
+    int16x8_t v928 = vaddq_s16(v925, v927);
+    int16x8_t v929 = vsubq_s16(v780, v782);
+    int16x8_t v930 = vsubq_s16(v784, v786);
+    int16x8_t v931 = vqrdmulhq_n_s16(v930, 29490);
+    int16x8_t v932 = vaddq_s16(v929, v931);
+    int16x8_t v933 = vqrdmulhq_n_s16(v932, 18578);
+    int16x8_t v934 = vaddq_s16(v928, v933);
+    int16x8_t v935 = vqrdmulhq_n_s16(v934, 16890);
+    int16x8_t v936 = vaddq_s16(v924, v935);
+    int16x8_t v937 = vqrdmulhq_n_s16(v936, 16508);
+    int16x8_t v938 = vaddq_s16(v913, v937);
+    int16x8_t v939 = vsubq_s16(v796, v798);
+    int16x8_t v940 = vsubq_s16(v800, v802);
+    int16x8_t v941 = vqrdmulhq_n_s16(v940, 29490);
+    int16x8_t v942 = vaddq_s16(v939, v941);
+    int16x8_t v943 = vsubq_s16(v806, v808);
+    int16x8_t v944 = vsubq_s16(v810, v812);
+    int16x8_t v945 = vqrdmulhq_n_s16(v944, 29490);
+    int16x8_t v946 = vaddq_s16(v943, v945);
+    int16x8_t v947 = vqrdmulhq_n_s16(v946, 18578);
+    int16x8_t v948 = vaddq_s16(v942, v947);
+    int16x8_t v949 = vsubq_s16(v818, v820);
+    int16x8_t v950 = vsubq_s16(v822, v824);
+    int16x8_t v951 = vqrdmulhq_n_s16(v950, 29490);
+    int16x8_t v952 = vaddq_s16(v949, v951);
+    int16x8_t v953 = vsubq_s16(v828, v830);
+    int16x8_t v954 = vsubq_s16(v832, v834);
+    int16x8_t v955 = vqrdmulhq_n_s16(v954, 29490);
+    int16x8_t v956 = vaddq_s16(v953, v955);
+    int16x8_t v957 = vqrdmulhq_n_s16(v956, 18578);
+    int16x8_t v958 = vaddq_s16(v952, v957);
+    int16x8_t v959 = vqrdmulhq_n_s16(v958, 16890);
+    int16x8_t v960 = vaddq_s16(v948, v959);
+    int16x8_t v961 = vsubq_s16(v842, v844);
+    int16x8_t v962 = vsubq_s16(v846, v848);
+    int16x8_t v963 = vqrdmulhq_n_s16(v962, 29490);
+    int16x8_t v964 = vaddq_s16(v961, v963);
+    int16x8_t v965 = vsubq_s16(v852, v854);
+    int16x8_t v966 = vsubq_s16(v856, v858);
+    int16x8_t v967 = vqrdmulhq_n_s16(v966, 29490);
+    int16x8_t v968 = vaddq_s16(v965, v967);
+    int16x8_t v969 = vqrdmulhq_n_s16(v968, 18578);
+    int16x8_t v970 = vaddq_s16(v964, v969);
+    int16x8_t v971 = vsubq_s16(v864, v866);
+    int16x8_t v972 = vsubq_s16(v868, v870);
+    int16x8_t v973 = vqrdmulhq_n_s16(v972, 29490);
+    int16x8_t v974 = vaddq_s16(v971, v973);
+    int16x8_t v975 = vsubq_s16(v874, v876);
+    int16x8_t v976 = vsubq_s16(v878, v880);
+    int16x8_t v977 = vqrdmulhq_n_s16(v976, 29490);
+    int16x8_t v978 = vaddq_s16(v975, v977);
+    int16x8_t v979 = vqrdmulhq_n_s16(v978, 18578);
+    int16x8_t v980 = vaddq_s16(v974, v979);
+    int16x8_t v981 = vqrdmulhq_n_s16(v980, 16890);
+    int16x8_t v982 = vaddq_s16(v970, v981);
+    int16x8_t v983 = vqrdmulhq_n_s16(v982, 16508);
+    int16x8_t v984 = vaddq_s16(v960, v983);
+    int16x8_t v985 = vqrdmulhq_n_s16(v984, 16415);
+    int16x8_t v986 = vaddq_s16(v938, v985);
+    int16x8_t v987 = vsubq_s16(v2, v8);
+    int16x8_t v988 = vsubq_s16(v15, v22);
+    int16x8_t v989_tmp = vqrdmulhq_n_s16(v988, 18446);
+    int16x8_t v989 = vmlaq_n_s16(v989_tmp, v988, 2);
+    int16x8_t v990 = vaddq_s16(v987, v989);
+    int16x8_t v991 = vsubq_s16(v31, v41);
+    int16x8_t v992 = vsubq_s16(v48, v56);
+    int16x8_t v993_tmp = vqrdmulhq_n_s16(v992, 18446);
+    int16x8_t v993 = vmlaq_n_s16(v993_tmp, v992, 2);
+    int16x8_t v994 = vaddq_s16(v991, v993);
+    int16x8_t v995 = vqrdmulhq_n_s16(v994, 21195);
+    int16x8_t v996 = vaddq_s16(v990, v995);
+    int16x8_t v997 = vsubq_s16(v67, v77);
+    int16x8_t v998 = vsubq_s16(v90, v99);
+    int16x8_t v999_tmp = vqrdmulhq_n_s16(v998, 18446);
+    int16x8_t v999 = vmlaq_n_s16(v999_tmp, v998, 2);
+    int16x8_t v1000 = vaddq_s16(v997, v999);
+    int16x8_t v1001 = vsubq_s16(v108, v118);
+    int16x8_t v1002 = vsubq_s16(v125, v134);
+    int16x8_t v1003_tmp = vqrdmulhq_n_s16(v1002, 18446);
+    int16x8_t v1003 = vmlaq_n_s16(v1003_tmp, v1002, 2);
+    int16x8_t v1004 = vaddq_s16(v1001, v1003);
+    int16x8_t v1005 = vqrdmulhq_n_s16(v1004, 21195);
+    int16x8_t v1006 = vaddq_s16(v1000, v1005);
+    int16x8_t v1007 = vqrdmulhq_n_s16(v1006, 17401);
+    int16x8_t v1008 = vaddq_s16(v996, v1007);
+    int16x8_t v1009 = vsubq_s16(v147, v157);
+    int16x8_t v1010 = vsubq_s16(v170, v179);
+    int16x8_t v1011_tmp = vqrdmulhq_n_s16(v1010, 18446);
+    int16x8_t v1011 = vmlaq_n_s16(v1011_tmp, v1010, 2);
+    int16x8_t v1012 = vaddq_s16(v1009, v1011);
+    int16x8_t v1013 = vsubq_s16(v194, v212);
+    int16x8_t v1014 = vsubq_s16(v219, v229);
+    int16x8_t v1015_tmp = vqrdmulhq_n_s16(v1014, 18446);
+    int16x8_t v1015 = vmlaq_n_s16(v1015_tmp, v1014, 2);
+    int16x8_t v1016 = vaddq_s16(v1013, v1015);
+    int16x8_t v1017 = vqrdmulhq_n_s16(v1016, 21195);
+    int16x8_t v1018 = vaddq_s16(v1012, v1017);
+    int16x8_t v1019 = vsubq_s16(v240, v250);
+    int16x8_t v1020 = vsubq_s16(v263, v272);
+    int16x8_t v1021_tmp = vqrdmulhq_n_s16(v1020, 18446);
+    int16x8_t v1021 = vmlaq_n_s16(v1021_tmp, v1020, 2);
+    int16x8_t v1022 = vaddq_s16(v1019, v1021);
+    int16x8_t v1023 = vsubq_s16(v281, v291);
+    int16x8_t v1024 = vsubq_s16(v298, v308);
+    int16x8_t v1025_tmp = vqrdmulhq_n_s16(v1024, 18446);
+    int16x8_t v1025 = vmlaq_n_s16(v1025_tmp, v1024, 2);
+    int16x8_t v1026 = vaddq_s16(v1023, v1025);
+    int16x8_t v1027 = vqrdmulhq_n_s16(v1026, 21195);
+    int16x8_t v1028 = vaddq_s16(v1022, v1027);
+    int16x8_t v1029 = vqrdmulhq_n_s16(v1028, 17401);
+    int16x8_t v1030 = vaddq_s16(v1018, v1029);
+    int16x8_t v1031 = vqrdmulhq_n_s16(v1030, 16629);
+    int16x8_t v1032 = vaddq_s16(v1008, v1031);
+    int16x8_t v1033 = vsubq_s16(v323, v333);
+    int16x8_t v1034 = vsubq_s16(v346, v355);
+    int16x8_t v1035_tmp = vqrdmulhq_n_s16(v1034, 18446);
+    int16x8_t v1035 = vmlaq_n_s16(v1035_tmp, v1034, 2);
+    int16x8_t v1036 = vaddq_s16(v1033, v1035);
+    int16x8_t v1037 = vsubq_s16(v370, v388);
+    int16x8_t v1038 = vsubq_s16(v395, v405);
+    int16x8_t v1039_tmp = vqrdmulhq_n_s16(v1038, 18446);
+    int16x8_t v1039 = vmlaq_n_s16(v1039_tmp, v1038, 2);
+    int16x8_t v1040 = vaddq_s16(v1037, v1039);
+    int16x8_t v1041 = vqrdmulhq_n_s16(v1040, 21195);
+    int16x8_t v1042 = vaddq_s16(v1036, v1041);
+    int16x8_t v1043 = vsubq_s16(v422, v440);
+    int16x8_t v1044 = vsubq_s16(v465, v478);
+    int16x8_t v1045_tmp = vqrdmulhq_n_s16(v1044, 18446);
+    int16x8_t v1045 = vmlaq_n_s16(v1045_tmp, v1044, 2);
+    int16x8_t v1046 = vaddq_s16(v1043, v1045);
+    int16x8_t v1047 = vsubq_s16(v487, v497);
+    int16x8_t v1048 = vsubq_s16(v504, v515);
+    int16x8_t v1049_tmp = vqrdmulhq_n_s16(v1048, 18446);
+    int16x8_t v1049 = vmlaq_n_s16(v1049_tmp, v1048, 2);
+    int16x8_t v1050 = vaddq_s16(v1047, v1049);
+    int16x8_t v1051 = vqrdmulhq_n_s16(v1050, 21195);
+    int16x8_t v1052 = vaddq_s16(v1046, v1051);
+    int16x8_t v1053 = vqrdmulhq_n_s16(v1052, 17401);
+    int16x8_t v1054 = vaddq_s16(v1042, v1053);
+    int16x8_t v1055 = vsubq_s16(v528, v538);
+    int16x8_t v1056 = vsubq_s16(v551, v560);
+    int16x8_t v1057_tmp = vqrdmulhq_n_s16(v1056, 18446);
+    int16x8_t v1057 = vmlaq_n_s16(v1057_tmp, v1056, 2);
+    int16x8_t v1058 = vaddq_s16(v1055, v1057);
+    int16x8_t v1059 = vsubq_s16(v575, v593);
+    int16x8_t v1060 = vsubq_s16(v600, v610);
+    int16x8_t v1061_tmp = vqrdmulhq_n_s16(v1060, 18446);
+    int16x8_t v1061 = vmlaq_n_s16(v1061_tmp, v1060, 2);
+    int16x8_t v1062 = vaddq_s16(v1059, v1061);
+    int16x8_t v1063 = vqrdmulhq_n_s16(v1062, 21195);
+    int16x8_t v1064 = vaddq_s16(v1058, v1063);
+    int16x8_t v1065 = vsubq_s16(v621, v631);
+    int16x8_t v1066 = vsubq_s16(v644, v653);
+    int16x8_t v1067_tmp = vqrdmulhq_n_s16(v1066, 18446);
+    int16x8_t v1067 = vmlaq_n_s16(v1067_tmp, v1066, 2);
+    int16x8_t v1068 = vaddq_s16(v1065, v1067);
+    int16x8_t v1069 = vsubq_s16(v662, v672);
+    int16x8_t v1070 = vsubq_s16(v679, v690);
+    int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 18446);
+    int16x8_t v1071 = vmlaq_n_s16(v1071_tmp, v1070, 2);
+    int16x8_t v1072 = vaddq_s16(v1069, v1071);
+    int16x8_t v1073 = vqrdmulhq_n_s16(v1072, 21195);
+    int16x8_t v1074 = vaddq_s16(v1068, v1073);
+    int16x8_t v1075 = vqrdmulhq_n_s16(v1074, 17401);
+    int16x8_t v1076 = vaddq_s16(v1064, v1075);
+    int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 16629);
+    int16x8_t v1078 = vaddq_s16(v1054, v1077);
+    int16x8_t v1079 = vqrdmulhq_n_s16(v1078, 16445);
+    int16x8_t v1080 = vaddq_s16(v1032, v1079);
+    int16x8_t v1081 = vsubq_s16(v987, v989);
+    int16x8_t v1082 = vsubq_s16(v991, v993);
+    int16x8_t v1083 = vqrdmulhq_n_s16(v1082, 25826);
+    int16x8_t v1084 = vaddq_s16(v1081, v1083);
+    int16x8_t v1085 = vsubq_s16(v997, v999);
+    int16x8_t v1086 = vsubq_s16(v1001, v1003);
+    int16x8_t v1087 = vqrdmulhq_n_s16(v1086, 25826);
+    int16x8_t v1088 = vaddq_s16(v1085, v1087);
+    int16x8_t v1089 = vqrdmulhq_n_s16(v1088, 18124);
+    int16x8_t v1090 = vaddq_s16(v1084, v1089);
+    int16x8_t v1091 = vsubq_s16(v1009, v1011);
+    int16x8_t v1092 = vsubq_s16(v1013, v1015);
+    int16x8_t v1093 = vqrdmulhq_n_s16(v1092, 25826);
+    int16x8_t v1094 = vaddq_s16(v1091, v1093);
+    int16x8_t v1095 = vsubq_s16(v1019, v1021);
+    int16x8_t v1096 = vsubq_s16(v1023, v1025);
+    int16x8_t v1097 = vqrdmulhq_n_s16(v1096, 25826);
+    int16x8_t v1098 = vaddq_s16(v1095, v1097);
+    int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 18124);
+    int16x8_t v1100 = vaddq_s16(v1094, v1099);
+    int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16792);
+    int16x8_t v1102 = vaddq_s16(v1090, v1101);
+    int16x8_t v1103 = vsubq_s16(v1033, v1035);
+    int16x8_t v1104 = vsubq_s16(v1037, v1039);
+    int16x8_t v1105 = vqrdmulhq_n_s16(v1104, 25826);
+    int16x8_t v1106 = vaddq_s16(v1103, v1105);
+    int16x8_t v1107 = vsubq_s16(v1043, v1045);
+    int16x8_t v1108 = vsubq_s16(v1047, v1049);
+    int16x8_t v1109 = vqrdmulhq_n_s16(v1108, 25826);
+    int16x8_t v1110 = vaddq_s16(v1107, v1109);
+    int16x8_t v1111 = vqrdmulhq_n_s16(v1110, 18124);
+    int16x8_t v1112 = vaddq_s16(v1106, v1111);
+    int16x8_t v1113 = vsubq_s16(v1055, v1057);
+    int16x8_t v1114 = vsubq_s16(v1059, v1061);
+    int16x8_t v1115 = vqrdmulhq_n_s16(v1114, 25826);
+    int16x8_t v1116 = vaddq_s16(v1113, v1115);
+    int16x8_t v1117 = vsubq_s16(v1065, v1067);
+    int16x8_t v1118 = vsubq_s16(v1069, v1071);
+    int16x8_t v1119 = vqrdmulhq_n_s16(v1118, 25826);
+    int16x8_t v1120 = vaddq_s16(v1117, v1119);
+    int16x8_t v1121 = vqrdmulhq_n_s16(v1120, 18124);
+    int16x8_t v1122 = vaddq_s16(v1116, v1121);
+    int16x8_t v1123 = vqrdmulhq_n_s16(v1122, 16792);
+    int16x8_t v1124 = vaddq_s16(v1112, v1123);
+    int16x8_t v1125 = vqrdmulhq_n_s16(v1124, 16484);
+    int16x8_t v1126 = vaddq_s16(v1102, v1125);
+    int16x8_t v1127 = vsubq_s16(v892, v894);
+    int16x8_t v1128 = vsubq_s16(v896, v898);
+    int16x8_t v1129_tmp = vqrdmulhq_n_s16(v1128, 1988);
+    int16x8_t v1129 = vaddq_s16(v1129_tmp, v1128);
+    int16x8_t v1130 = vaddq_s16(v1127, v1129);
+    int16x8_t v1131 = vsubq_s16(v902, v904);
+    int16x8_t v1132 = vsubq_s16(v906, v908);
+    int16x8_t v1133_tmp = vqrdmulhq_n_s16(v1132, 1988);
+    int16x8_t v1133 = vaddq_s16(v1133_tmp, v1132);
+    int16x8_t v1134 = vaddq_s16(v1131, v1133);
+    int16x8_t v1135 = vqrdmulhq_n_s16(v1134, 19102);
+    int16x8_t v1136 = vaddq_s16(v1130, v1135);
+    int16x8_t v1137 = vsubq_s16(v914, v917);
+    int16x8_t v1138 = vsubq_s16(v919, v921);
+    int16x8_t v1139_tmp = vqrdmulhq_n_s16(v1138, 1988);
+    int16x8_t v1139 = vaddq_s16(v1139_tmp, v1138);
+    int16x8_t v1140 = vaddq_s16(v1137, v1139);
+    int16x8_t v1141 = vsubq_s16(v925, v927);
+    int16x8_t v1142 = vsubq_s16(v929, v931);
+    int16x8_t v1143_tmp = vqrdmulhq_n_s16(v1142, 1988);
+    int16x8_t v1143 = vaddq_s16(v1143_tmp, v1142);
+    int16x8_t v1144 = vaddq_s16(v1141, v1143);
+    int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 19102);
+    int16x8_t v1146 = vaddq_s16(v1140, v1145);
+    int16x8_t v1147 = vqrdmulhq_n_s16(v1146, 17000);
+    int16x8_t v1148 = vaddq_s16(v1136, v1147);
+    int16x8_t v1149 = vsubq_s16(v939, v941);
+    int16x8_t v1150 = vsubq_s16(v943, v945);
+    int16x8_t v1151_tmp = vqrdmulhq_n_s16(v1150, 1988);
+    int16x8_t v1151 = vaddq_s16(v1151_tmp, v1150);
+    int16x8_t v1152 = vaddq_s16(v1149, v1151);
+    int16x8_t v1153 = vsubq_s16(v949, v951);
+    int16x8_t v1154 = vsubq_s16(v953, v955);
+    int16x8_t v1155_tmp = vqrdmulhq_n_s16(v1154, 1988);
+    int16x8_t v1155 = vaddq_s16(v1155_tmp, v1154);
+    int16x8_t v1156 = vaddq_s16(v1153, v1155);
+    int16x8_t v1157 = vqrdmulhq_n_s16(v1156, 19102);
+    int16x8_t v1158 = vaddq_s16(v1152, v1157);
+    int16x8_t v1159 = vsubq_s16(v961, v963);
+    int16x8_t v1160 = vsubq_s16(v965, v967);
+    int16x8_t v1161_tmp = vqrdmulhq_n_s16(v1160, 1988);
+    int16x8_t v1161 = vaddq_s16(v1161_tmp, v1160);
+    int16x8_t v1162 = vaddq_s16(v1159, v1161);
+    int16x8_t v1163 = vsubq_s16(v971, v973);
+    int16x8_t v1164 = vsubq_s16(v975, v977);
+    int16x8_t v1165_tmp = vqrdmulhq_n_s16(v1164, 1988);
+    int16x8_t v1165 = vaddq_s16(v1165_tmp, v1164);
+    int16x8_t v1166 = vaddq_s16(v1163, v1165);
+    int16x8_t v1167 = vqrdmulhq_n_s16(v1166, 19102);
+    int16x8_t v1168 = vaddq_s16(v1162, v1167);
+    int16x8_t v1169 = vqrdmulhq_n_s16(v1168, 17000);
+    int16x8_t v1170 = vaddq_s16(v1158, v1169);
+    int16x8_t v1171 = vqrdmulhq_n_s16(v1170, 16534);
+    int16x8_t v1172 = vaddq_s16(v1148, v1171);
+    int16x8_t v1173 = vsubq_s16(v705, v710);
+    int16x8_t v1174 = vsubq_s16(v715, v720);
+    int16x8_t v1175_tmp = vqrdmulhq_n_s16(v1174, 23673);
+    int16x8_t v1175 = vaddq_s16(v1175_tmp, v1174);
+    int16x8_t v1176 = vaddq_s16(v1173, v1175);
+    int16x8_t v1177 = vsubq_s16(v727, v732);
+    int16x8_t v1178 = vsubq_s16(v737, v742);
+    int16x8_t v1179_tmp = vqrdmulhq_n_s16(v1178, 23673);
+    int16x8_t v1179 = vaddq_s16(v1179_tmp, v1178);
+    int16x8_t v1180 = vaddq_s16(v1177, v1179);
+    int16x8_t v1181 = vqrdmulhq_n_s16(v1180, 20398);
+    int16x8_t v1182 = vaddq_s16(v1176, v1181);
+    int16x8_t v1183 = vsubq_s16(v751, v756);
+    int16x8_t v1184 = vsubq_s16(v761, v766);
+    int16x8_t v1185_tmp = vqrdmulhq_n_s16(v1184, 23673);
+    int16x8_t v1185 = vaddq_s16(v1185_tmp, v1184);
+    int16x8_t v1186 = vaddq_s16(v1183, v1185);
+    int16x8_t v1187 = vsubq_s16(v773, v778);
+    int16x8_t v1188 = vsubq_s16(v783, v788);
+    int16x8_t v1189_tmp = vqrdmulhq_n_s16(v1188, 23673);
+    int16x8_t v1189 = vaddq_s16(v1189_tmp, v1188);
+    int16x8_t v1190 = vaddq_s16(v1187, v1189);
+    int16x8_t v1191 = vqrdmulhq_n_s16(v1190, 20398);
+    int16x8_t v1192 = vaddq_s16(v1186, v1191);
+    int16x8_t v1193 = vqrdmulhq_n_s16(v1192, 17255);
+    int16x8_t v1194 = vaddq_s16(v1182, v1193);
+    int16x8_t v1195 = vsubq_s16(v799, v804);
+    int16x8_t v1196 = vsubq_s16(v809, v814);
+    int16x8_t v1197_tmp = vqrdmulhq_n_s16(v1196, 23673);
+    int16x8_t v1197 = vaddq_s16(v1197_tmp, v1196);
+    int16x8_t v1198 = vaddq_s16(v1195, v1197);
+    int16x8_t v1199 = vsubq_s16(v821, v826);
+    int16x8_t v1200 = vsubq_s16(v831, v836);
+    int16x8_t v1201_tmp = vqrdmulhq_n_s16(v1200, 23673);
+    int16x8_t v1201 = vaddq_s16(v1201_tmp, v1200);
+    int16x8_t v1202 = vaddq_s16(v1199, v1201);
+    int16x8_t v1203 = vqrdmulhq_n_s16(v1202, 20398);
+    int16x8_t v1204 = vaddq_s16(v1198, v1203);
+    int16x8_t v1205 = vsubq_s16(v845, v850);
+    int16x8_t v1206 = vsubq_s16(v855, v860);
+    int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 23673);
+    int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206);
+    int16x8_t v1208 = vaddq_s16(v1205, v1207);
+    int16x8_t v1209 = vsubq_s16(v867, v872);
+    int16x8_t v1210 = vsubq_s16(v877, v882);
+    int16x8_t v1211_tmp = vqrdmulhq_n_s16(v1210, 23673);
+    int16x8_t v1211 = vaddq_s16(v1211_tmp, v1210);
+    int16x8_t v1212 = vaddq_s16(v1209, v1211);
+    int16x8_t v1213 = vqrdmulhq_n_s16(v1212, 20398);
+    int16x8_t v1214 = vaddq_s16(v1208, v1213);
+    int16x8_t v1215 = vqrdmulhq_n_s16(v1214, 17255);
+    int16x8_t v1216 = vaddq_s16(v1204, v1215);
+    int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 16595);
+    int16x8_t v1218 = vaddq_s16(v1194, v1217);
+    int16x8_t v1219 = vsubq_s16(v9, v24);
+    int16x8_t v1220 = vsubq_s16(v42, v58);
+    int16x8_t v1221_tmp = vqrdmulhq_n_s16(v1220, 3314);
+    int16x8_t v1221 = vmlaq_n_s16(v1221_tmp, v1220, 5);
+    int16x8_t v1222 = vaddq_s16(v1219, v1221);
+    int16x8_t v1223 = vsubq_s16(v78, v101);
+    int16x8_t v1224 = vsubq_s16(v119, v136);
+    int16x8_t v1225_tmp = vqrdmulhq_n_s16(v1224, 3314);
+    int16x8_t v1225 = vmlaq_n_s16(v1225_tmp, v1224, 5);
+    int16x8_t v1226 = vaddq_s16(v1223, v1225);
+    int16x8_t v1227 = vqrdmulhq_n_s16(v1226, 22112);
+    int16x8_t v1228 = vaddq_s16(v1222, v1227);
+    int16x8_t v1229 = vsubq_s16(v158, v181);
+    int16x8_t v1230 = vsubq_s16(v213, v231);
+    int16x8_t v1231_tmp = vqrdmulhq_n_s16(v1230, 3314);
+    int16x8_t v1231 = vmlaq_n_s16(v1231_tmp, v1230, 5);
+    int16x8_t v1232 = vaddq_s16(v1229, v1231);
+    int16x8_t v1233 = vsubq_s16(v251, v274);
+    int16x8_t v1234 = vsubq_s16(v292, v310);
+    int16x8_t v1235_tmp = vqrdmulhq_n_s16(v1234, 3314);
+    int16x8_t v1235 = vmlaq_n_s16(v1235_tmp, v1234, 5);
+    int16x8_t v1236 = vaddq_s16(v1233, v1235);
+    int16x8_t v1237 = vqrdmulhq_n_s16(v1236, 22112);
+    int16x8_t v1238 = vaddq_s16(v1232, v1237);
+    int16x8_t v1239 = vqrdmulhq_n_s16(v1238, 17561);
+    int16x8_t v1240 = vaddq_s16(v1228, v1239);
+    int16x8_t v1241 = vsubq_s16(v334, v357);
+    int16x8_t v1242 = vsubq_s16(v389, v407);
+    int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 3314);
+    int16x8_t v1243 = vmlaq_n_s16(v1243_tmp, v1242, 5);
+    int16x8_t v1244 = vaddq_s16(v1241, v1243);
+    int16x8_t v1245 = vsubq_s16(v441, v480);
+    int16x8_t v1246 = vsubq_s16(v498, v517);
+    int16x8_t v1247_tmp = vqrdmulhq_n_s16(v1246, 3314);
+    int16x8_t v1247 = vmlaq_n_s16(v1247_tmp, v1246, 5);
+    int16x8_t v1248 = vaddq_s16(v1245, v1247);
+    int16x8_t v1249 = vqrdmulhq_n_s16(v1248, 22112);
+    int16x8_t v1250 = vaddq_s16(v1244, v1249);
+    int16x8_t v1251 = vsubq_s16(v539, v562);
+    int16x8_t v1252 = vsubq_s16(v594, v612);
+    int16x8_t v1253_tmp = vqrdmulhq_n_s16(v1252, 3314);
+    int16x8_t v1253 = vmlaq_n_s16(v1253_tmp, v1252, 5);
+    int16x8_t v1254 = vaddq_s16(v1251, v1253);
+    int16x8_t v1255 = vsubq_s16(v632, v655);
+    int16x8_t v1256 = vsubq_s16(v673, v692);
+    int16x8_t v1257_tmp = vqrdmulhq_n_s16(v1256, 3314);
+    int16x8_t v1257 = vmlaq_n_s16(v1257_tmp, v1256, 5);
+    int16x8_t v1258 = vaddq_s16(v1255, v1257);
+    int16x8_t v1259 = vqrdmulhq_n_s16(v1258, 22112);
+    int16x8_t v1260 = vaddq_s16(v1254, v1259);
+    int16x8_t v1261 = vqrdmulhq_n_s16(v1260, 17561);
+    int16x8_t v1262 = vaddq_s16(v1250, v1261);
+    int16x8_t v1263 = vqrdmulhq_n_s16(v1262, 16666);
+    int16x8_t v1264 = vaddq_s16(v1240, v1263);
+    int16x8_t v1265 = vsubq_s16(v1219, v1221);
+    int16x8_t v1266 = vsubq_s16(v1223, v1225);
+    int16x8_t v1267 = vqrdmulhq_n_s16(v1266, 24397);
+    int16x8_t v1268 = vaddq_s16(v1265, v1267);
+    int16x8_t v1269 = vsubq_s16(v1229, v1231);
+    int16x8_t v1270 = vsubq_s16(v1233, v1235);
+    int16x8_t v1271 = vqrdmulhq_n_s16(v1270, 24397);
+    int16x8_t v1272 = vaddq_s16(v1269, v1271);
+    int16x8_t v1273 = vqrdmulhq_n_s16(v1272, 17921);
+    int16x8_t v1274 = vaddq_s16(v1268, v1273);
+    int16x8_t v1275 = vsubq_s16(v1241, v1243);
+    int16x8_t v1276 = vsubq_s16(v1245, v1247);
+    int16x8_t v1277 = vqrdmulhq_n_s16(v1276, 24397);
+    int16x8_t v1278 = vaddq_s16(v1275, v1277);
+    int16x8_t v1279 = vsubq_s16(v1251, v1253);
+    int16x8_t v1280 = vsubq_s16(v1255, v1257);
+    int16x8_t v1281 = vqrdmulhq_n_s16(v1280, 24397);
+    int16x8_t v1282 = vaddq_s16(v1279, v1281);
+    int16x8_t v1283 = vqrdmulhq_n_s16(v1282, 17921);
+    int16x8_t v1284 = vaddq_s16(v1278, v1283);
+    int16x8_t v1285 = vqrdmulhq_n_s16(v1284, 16747);
+    int16x8_t v1286 = vaddq_s16(v1274, v1285);
+    int16x8_t v1287 = vsubq_s16(v1173, v1175);
+    int16x8_t v1288 = vsubq_s16(v1177, v1179);
+    int16x8_t v1289 = vqrdmulhq_n_s16(v1288, 27504);
+    int16x8_t v1290 = vaddq_s16(v1287, v1289);
+    int16x8_t v1291 = vsubq_s16(v1183, v1185);
+    int16x8_t v1292 = vsubq_s16(v1187, v1189);
+    int16x8_t v1293 = vqrdmulhq_n_s16(v1292, 27504);
+    int16x8_t v1294 = vaddq_s16(v1291, v1293);
+    int16x8_t v1295 = vqrdmulhq_n_s16(v1294, 18343);
+    int16x8_t v1296 = vaddq_s16(v1290, v1295);
+    int16x8_t v1297 = vsubq_s16(v1195, v1197);
+    int16x8_t v1298 = vsubq_s16(v1199, v1201);
+    int16x8_t v1299 = vqrdmulhq_n_s16(v1298, 27504);
+    int16x8_t v1300 = vaddq_s16(v1297, v1299);
+    int16x8_t v1301 = vsubq_s16(v1205, v1207);
+    int16x8_t v1302 = vsubq_s16(v1209, v1211);
+    int16x8_t v1303 = vqrdmulhq_n_s16(v1302, 27504);
+    int16x8_t v1304 = vaddq_s16(v1301, v1303);
+    int16x8_t v1305 = vqrdmulhq_n_s16(v1304, 18343);
+    int16x8_t v1306 = vaddq_s16(v1300, v1305);
+    int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 16840);
+    int16x8_t v1308 = vaddq_s16(v1296, v1307);
+    int16x8_t v1309 = vsubq_s16(v1127, v1129);
+    int16x8_t v1310 = vsubq_s16(v1131, v1133);
+    int16x8_t v1311 = vqrdmulhq_n_s16(v1310, 31869);
+    int16x8_t v1312 = vaddq_s16(v1309, v1311);
+    int16x8_t v1313 = vsubq_s16(v1137, v1139);
+    int16x8_t v1314 = vsubq_s16(v1141, v1143);
+    int16x8_t v1315 = vqrdmulhq_n_s16(v1314, 31869);
+    int16x8_t v1316 = vaddq_s16(v1313, v1315);
+    int16x8_t v1317 = vqrdmulhq_n_s16(v1316, 18830);
+    int16x8_t v1318 = vaddq_s16(v1312, v1317);
+    int16x8_t v1319 = vsubq_s16(v1149, v1151);
+    int16x8_t v1320 = vsubq_s16(v1153, v1155);
+    int16x8_t v1321 = vqrdmulhq_n_s16(v1320, 31869);
+    int16x8_t v1322 = vaddq_s16(v1319, v1321);
+    int16x8_t v1323 = vsubq_s16(v1159, v1161);
+    int16x8_t v1324 = vsubq_s16(v1163, v1165);
+    int16x8_t v1325 = vqrdmulhq_n_s16(v1324, 31869);
+    int16x8_t v1326 = vaddq_s16(v1323, v1325);
+    int16x8_t v1327 = vqrdmulhq_n_s16(v1326, 18830);
+    int16x8_t v1328 = vaddq_s16(v1322, v1327);
+    int16x8_t v1329 = vqrdmulhq_n_s16(v1328, 16944);
+    int16x8_t v1330 = vaddq_s16(v1318, v1329);
+    int16x8_t v1331 = vsubq_s16(v1081, v1083);
+    int16x8_t v1332 = vsubq_s16(v1085, v1087);
+    int16x8_t v1333_tmp = vqrdmulhq_n_s16(v1332, 5552);
+    int16x8_t v1333 = vaddq_s16(v1333_tmp, v1332);
+    int16x8_t v1334 = vaddq_s16(v1331, v1333);
+    int16x8_t v1335 = vsubq_s16(v1091, v1093);
+    int16x8_t v1336 = vsubq_s16(v1095, v1097);
+    int16x8_t v1337_tmp = vqrdmulhq_n_s16(v1336, 5552);
+    int16x8_t v1337 = vaddq_s16(v1337_tmp, v1336);
+    int16x8_t v1338 = vaddq_s16(v1335, v1337);
+    int16x8_t v1339 = vqrdmulhq_n_s16(v1338, 19393);
+    int16x8_t v1340 = vaddq_s16(v1334, v1339);
+    int16x8_t v1341 = vsubq_s16(v1103, v1105);
+    int16x8_t v1342 = vsubq_s16(v1107, v1109);
+    int16x8_t v1343_tmp = vqrdmulhq_n_s16(v1342, 5552);
+    int16x8_t v1343 = vaddq_s16(v1343_tmp, v1342);
+    int16x8_t v1344 = vaddq_s16(v1341, v1343);
+    int16x8_t v1345 = vsubq_s16(v1113, v1115);
+    int16x8_t v1346 = vsubq_s16(v1117, v1119);
+    int16x8_t v1347_tmp = vqrdmulhq_n_s16(v1346, 5552);
+    int16x8_t v1347 = vaddq_s16(v1347_tmp, v1346);
+    int16x8_t v1348 = vaddq_s16(v1345, v1347);
+    int16x8_t v1349 = vqrdmulhq_n_s16(v1348, 19393);
+    int16x8_t v1350 = vaddq_s16(v1344, v1349);
+    int16x8_t v1351 = vqrdmulhq_n_s16(v1350, 17059);
+    int16x8_t v1352 = vaddq_s16(v1340, v1351);
+    int16x8_t v1353 = vsubq_s16(v990, v995);
+    int16x8_t v1354 = vsubq_s16(v1000, v1005);
+    int16x8_t v1355_tmp = vqrdmulhq_n_s16(v1354, 15865);
+    int16x8_t v1355 = vaddq_s16(v1355_tmp, v1354);
+    int16x8_t v1356 = vaddq_s16(v1353, v1355);
+    int16x8_t v1357 = vsubq_s16(v1012, v1017);
+    int16x8_t v1358 = vsubq_s16(v1022, v1027);
+    int16x8_t v1359_tmp = vqrdmulhq_n_s16(v1358, 15865);
+    int16x8_t v1359 = vaddq_s16(v1359_tmp, v1358);
+    int16x8_t v1360 = vaddq_s16(v1357, v1359);
+    int16x8_t v1361 = vqrdmulhq_n_s16(v1360, 20040);
+    int16x8_t v1362 = vaddq_s16(v1356, v1361);
+    int16x8_t v1363 = vsubq_s16(v1036, v1041);
+    int16x8_t v1364 = vsubq_s16(v1046, v1051);
+    int16x8_t v1365_tmp = vqrdmulhq_n_s16(v1364, 15865);
+    int16x8_t v1365 = vaddq_s16(v1365_tmp, v1364);
+    int16x8_t v1366 = vaddq_s16(v1363, v1365);
+    int16x8_t v1367 = vsubq_s16(v1058, v1063);
+    int16x8_t v1368 = vsubq_s16(v1068, v1073);
+    int16x8_t v1369_tmp = vqrdmulhq_n_s16(v1368, 15865);
+    int16x8_t v1369 = vaddq_s16(v1369_tmp, v1368);
+    int16x8_t v1370 = vaddq_s16(v1367, v1369);
+    int16x8_t v1371 = vqrdmulhq_n_s16(v1370, 20040);
+    int16x8_t v1372 = vaddq_s16(v1366, v1371);
+    int16x8_t v1373 = vqrdmulhq_n_s16(v1372, 17187);
+    int16x8_t v1374 = vaddq_s16(v1362, v1373);
+    int16x8_t v1375 = vsubq_s16(v895, v900);
+    int16x8_t v1376 = vsubq_s16(v905, v910);
+    int16x8_t v1377_tmp = vqrdmulhq_n_s16(v1376, 1893);
+    int16x8_t v1377 = vmlaq_n_s16(v1377_tmp, v1376, 2);
+    int16x8_t v1378 = vaddq_s16(v1375, v1377);
+    int16x8_t v1379 = vsubq_s16(v918, v923);
+    int16x8_t v1380 = vsubq_s16(v928, v933);
+    int16x8_t v1381_tmp = vqrdmulhq_n_s16(v1380, 1893);
+    int16x8_t v1381 = vmlaq_n_s16(v1381_tmp, v1380, 2);
+    int16x8_t v1382 = vaddq_s16(v1379, v1381);
+    int16x8_t v1383 = vqrdmulhq_n_s16(v1382, 20783);
+    int16x8_t v1384 = vaddq_s16(v1378, v1383);
+    int16x8_t v1385 = vsubq_s16(v942, v947);
+    int16x8_t v1386 = vsubq_s16(v952, v957);
+    int16x8_t v1387_tmp = vqrdmulhq_n_s16(v1386, 1893);
+    int16x8_t v1387 = vmlaq_n_s16(v1387_tmp, v1386, 2);
+    int16x8_t v1388 = vaddq_s16(v1385, v1387);
+    int16x8_t v1389 = vsubq_s16(v964, v969);
+    int16x8_t v1390 = vsubq_s16(v974, v979);
+    int16x8_t v1391_tmp = vqrdmulhq_n_s16(v1390, 1893);
+    int16x8_t v1391 = vmlaq_n_s16(v1391_tmp, v1390, 2);
+    int16x8_t v1392 = vaddq_s16(v1389, v1391);
+    int16x8_t v1393 = vqrdmulhq_n_s16(v1392, 20783);
+    int16x8_t v1394 = vaddq_s16(v1388, v1393);
+    int16x8_t v1395 = vqrdmulhq_n_s16(v1394, 17326);
+    int16x8_t v1396 = vaddq_s16(v1384, v1395);
+    int16x8_t v1397 = vsubq_s16(v711, v722);
+    int16x8_t v1398 = vsubq_s16(v733, v744);
+    int16x8_t v1399_tmp = vqrdmulhq_n_s16(v1398, 13357);
+    int16x8_t v1399 = vmlaq_n_s16(v1399_tmp, v1398, 3);
+    int16x8_t v1400 = vaddq_s16(v1397, v1399);
+    int16x8_t v1401 = vsubq_s16(v757, v768);
+    int16x8_t v1402 = vsubq_s16(v779, v790);
+    int16x8_t v1403_tmp = vqrdmulhq_n_s16(v1402, 13357);
+    int16x8_t v1403 = vmlaq_n_s16(v1403_tmp, v1402, 3);
+    int16x8_t v1404 = vaddq_s16(v1401, v1403);
+    int16x8_t v1405 = vqrdmulhq_n_s16(v1404, 21637);
+    int16x8_t v1406 = vaddq_s16(v1400, v1405);
+    int16x8_t v1407 = vsubq_s16(v805, v816);
+    int16x8_t v1408 = vsubq_s16(v827, v838);
+    int16x8_t v1409_tmp = vqrdmulhq_n_s16(v1408, 13357);
+    int16x8_t v1409 = vmlaq_n_s16(v1409_tmp, v1408, 3);
+    int16x8_t v1410 = vaddq_s16(v1407, v1409);
+    int16x8_t v1411 = vsubq_s16(v851, v862);
+    int16x8_t v1412 = vsubq_s16(v873, v884);
+    int16x8_t v1413_tmp = vqrdmulhq_n_s16(v1412, 13357);
+    int16x8_t v1413 = vmlaq_n_s16(v1413_tmp, v1412, 3);
+    int16x8_t v1414 = vaddq_s16(v1411, v1413);
+    int16x8_t v1415 = vqrdmulhq_n_s16(v1414, 21637);
+    int16x8_t v1416 = vaddq_s16(v1410, v1415);
+    int16x8_t v1417 = vqrdmulhq_n_s16(v1416, 17479);
+    int16x8_t v1418 = vaddq_s16(v1406, v1417);
+    int16x8_t v1419 = vsubq_s16(v25, v60);
+    int16x8_t v1420 = vsubq_s16(v102, v138);
+    int16x8_t v1421_tmp = vqrdmulhq_n_s16(v1420, 6226);
+    int16x8_t v1421 = vmlaq_n_s16(v1421_tmp, v1420, 10);
+    int16x8_t v1422 = vaddq_s16(v1419, v1421);
+    int16x8_t v1423 = vsubq_s16(v182, v233);
+    int16x8_t v1424 = vsubq_s16(v275, v312);
+    int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 6226);
+    int16x8_t v1425 = vmlaq_n_s16(v1425_tmp, v1424, 10);
+    int16x8_t v1426 = vaddq_s16(v1423, v1425);
+    int16x8_t v1427 = vqrdmulhq_n_s16(v1426, 22622);
+    int16x8_t v1428 = vaddq_s16(v1422, v1427);
+    int16x8_t v1429 = vsubq_s16(v358, v409);
+    int16x8_t v1430 = vsubq_s16(v481, v519);
+    int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 6226);
+    int16x8_t v1431 = vmlaq_n_s16(v1431_tmp, v1430, 10);
+    int16x8_t v1432 = vaddq_s16(v1429, v1431);
+    int16x8_t v1433 = vsubq_s16(v563, v614);
+    int16x8_t v1434 = vsubq_s16(v656, v694);
+    int16x8_t v1435_tmp = vqrdmulhq_n_s16(v1434, 6226);
+    int16x8_t v1435 = vmlaq_n_s16(v1435_tmp, v1434, 10);
+    int16x8_t v1436 = vaddq_s16(v1433, v1435);
+    int16x8_t v1437 = vqrdmulhq_n_s16(v1436, 22622);
+    int16x8_t v1438 = vaddq_s16(v1432, v1437);
+    int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17646);
+    int16x8_t v1440 = vaddq_s16(v1428, v1439);
+    int16x8_t v1441 = vsubq_s16(v1419, v1421);
+    int16x8_t v1442 = vsubq_s16(v1423, v1425);
+    int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 23761);
+    int16x8_t v1444 = vaddq_s16(v1441, v1443);
+    int16x8_t v1445 = vsubq_s16(v1429, v1431);
+    int16x8_t v1446 = vsubq_s16(v1433, v1435);
+    int16x8_t v1447 = vqrdmulhq_n_s16(v1446, 23761);
+    int16x8_t v1448 = vaddq_s16(v1445, v1447);
+    int16x8_t v1449 = vqrdmulhq_n_s16(v1448, 17826);
+    int16x8_t v1450 = vaddq_s16(v1444, v1449);
+    int16x8_t v1451 = vsubq_s16(v1397, v1399);
+    int16x8_t v1452 = vsubq_s16(v1401, v1403);
+    int16x8_t v1453 = vqrdmulhq_n_s16(v1452, 25084);
+    int16x8_t v1454 = vaddq_s16(v1451, v1453);
+    int16x8_t v1455 = vsubq_s16(v1407, v1409);
+    int16x8_t v1456 = vsubq_s16(v1411, v1413);
+    int16x8_t v1457 = vqrdmulhq_n_s16(v1456, 25084);
+    int16x8_t v1458 = vaddq_s16(v1455, v1457);
+    int16x8_t v1459 = vqrdmulhq_n_s16(v1458, 18021);
+    int16x8_t v1460 = vaddq_s16(v1454, v1459);
+    int16x8_t v1461 = vsubq_s16(v1375, v1377);
+    int16x8_t v1462 = vsubq_s16(v1379, v1381);
+    int16x8_t v1463 = vqrdmulhq_n_s16(v1462, 26631);
+    int16x8_t v1464 = vaddq_s16(v1461, v1463);
+    int16x8_t v1465 = vsubq_s16(v1385, v1387);
+    int16x8_t v1466 = vsubq_s16(v1389, v1391);
+    int16x8_t v1467 = vqrdmulhq_n_s16(v1466, 26631);
+    int16x8_t v1468 = vaddq_s16(v1465, v1467);
+    int16x8_t v1469 = vqrdmulhq_n_s16(v1468, 18231);
+    int16x8_t v1470 = vaddq_s16(v1464, v1469);
+    int16x8_t v1471 = vsubq_s16(v1353, v1355);
+    int16x8_t v1472 = vsubq_s16(v1357, v1359);
+    int16x8_t v1473 = vqrdmulhq_n_s16(v1472, 28454);
+    int16x8_t v1474 = vaddq_s16(v1471, v1473);
+    int16x8_t v1475 = vsubq_s16(v1363, v1365);
+    int16x8_t v1476 = vsubq_s16(v1367, v1369);
+    int16x8_t v1477 = vqrdmulhq_n_s16(v1476, 28454);
+    int16x8_t v1478 = vaddq_s16(v1475, v1477);
+    int16x8_t v1479 = vqrdmulhq_n_s16(v1478, 18458);
+    int16x8_t v1480 = vaddq_s16(v1474, v1479);
+    int16x8_t v1481 = vsubq_s16(v1331, v1333);
+    int16x8_t v1482 = vsubq_s16(v1335, v1337);
+    int16x8_t v1483 = vqrdmulhq_n_s16(v1482, 30624);
+    int16x8_t v1484 = vaddq_s16(v1481, v1483);
+    int16x8_t v1485 = vsubq_s16(v1341, v1343);
+    int16x8_t v1486 = vsubq_s16(v1345, v1347);
+    int16x8_t v1487 = vqrdmulhq_n_s16(v1486, 30624);
+    int16x8_t v1488 = vaddq_s16(v1485, v1487);
+    int16x8_t v1489 = vqrdmulhq_n_s16(v1488, 18702);
+    int16x8_t v1490 = vaddq_s16(v1484, v1489);
+    int16x8_t v1491 = vsubq_s16(v1309, v1311);
+    int16x8_t v1492 = vsubq_s16(v1313, v1315);
+    int16x8_t v1493_tmp = vqrdmulhq_n_s16(v1492, 472);
+    int16x8_t v1493 = vaddq_s16(v1493_tmp, v1492);
+    int16x8_t v1494 = vaddq_s16(v1491, v1493);
+    int16x8_t v1495 = vsubq_s16(v1319, v1321);
+    int16x8_t v1496 = vsubq_s16(v1323, v1325);
+    int16x8_t v1497_tmp = vqrdmulhq_n_s16(v1496, 472);
+    int16x8_t v1497 = vaddq_s16(v1497_tmp, v1496);
+    int16x8_t v1498 = vaddq_s16(v1495, v1497);
+    int16x8_t v1499 = vqrdmulhq_n_s16(v1498, 18964);
+    int16x8_t v1500 = vaddq_s16(v1494, v1499);
+    int16x8_t v1501 = vsubq_s16(v1287, v1289);
+    int16x8_t v1502 = vsubq_s16(v1291, v1293);
+    int16x8_t v1503_tmp = vqrdmulhq_n_s16(v1502, 3672);
+    int16x8_t v1503 = vaddq_s16(v1503_tmp, v1502);
+    int16x8_t v1504 = vaddq_s16(v1501, v1503);
+    int16x8_t v1505 = vsubq_s16(v1297, v1299);
+    int16x8_t v1506 = vsubq_s16(v1301, v1303);
+    int16x8_t v1507_tmp = vqrdmulhq_n_s16(v1506, 3672);
+    int16x8_t v1507 = vaddq_s16(v1507_tmp, v1506);
+    int16x8_t v1508 = vaddq_s16(v1505, v1507);
+    int16x8_t v1509 = vqrdmulhq_n_s16(v1508, 19245);
+    int16x8_t v1510 = vaddq_s16(v1504, v1509);
+    int16x8_t v1511 = vsubq_s16(v1265, v1267);
+    int16x8_t v1512 = vsubq_s16(v1269, v1271);
+    int16x8_t v1513_tmp = vqrdmulhq_n_s16(v1512, 7662);
+    int16x8_t v1513 = vaddq_s16(v1513_tmp, v1512);
+    int16x8_t v1514 = vaddq_s16(v1511, v1513);
+    int16x8_t v1515 = vsubq_s16(v1275, v1277);
+    int16x8_t v1516 = vsubq_s16(v1279, v1281);
+    int16x8_t v1517_tmp = vqrdmulhq_n_s16(v1516, 7662);
+    int16x8_t v1517 = vaddq_s16(v1517_tmp, v1516);
+    int16x8_t v1518 = vaddq_s16(v1515, v1517);
+    int16x8_t v1519 = vqrdmulhq_n_s16(v1518, 19546);
+    int16x8_t v1520 = vaddq_s16(v1514, v1519);
+    int16x8_t v1521 = vsubq_s16(v1222, v1227);
+    int16x8_t v1522 = vsubq_s16(v1232, v1237);
+    int16x8_t v1523_tmp = vqrdmulhq_n_s16(v1522, 12756);
+    int16x8_t v1523 = vaddq_s16(v1523_tmp, v1522);
+    int16x8_t v1524 = vaddq_s16(v1521, v1523);
+    int16x8_t v1525 = vsubq_s16(v1244, v1249);
+    int16x8_t v1526 = vsubq_s16(v1254, v1259);
+    int16x8_t v1527_tmp = vqrdmulhq_n_s16(v1526, 12756);
+    int16x8_t v1527 = vaddq_s16(v1527_tmp, v1526);
+    int16x8_t v1528 = vaddq_s16(v1525, v1527);
+    int16x8_t v1529 = vqrdmulhq_n_s16(v1528, 19869);
+    int16x8_t v1530 = vaddq_s16(v1524, v1529);
+    int16x8_t v1531 = vsubq_s16(v1176, v1181);
+    int16x8_t v1532 = vsubq_s16(v1186, v1191);
+    int16x8_t v1533_tmp = vqrdmulhq_n_s16(v1532, 19463);
+    int16x8_t v1533 = vaddq_s16(v1533_tmp, v1532);
+    int16x8_t v1534 = vaddq_s16(v1531, v1533);
+    int16x8_t v1535 = vsubq_s16(v1198, v1203);
+    int16x8_t v1536 = vsubq_s16(v1208, v1213);
+    int16x8_t v1537_tmp = vqrdmulhq_n_s16(v1536, 19463);
+    int16x8_t v1537 = vaddq_s16(v1537_tmp, v1536);
+    int16x8_t v1538 = vaddq_s16(v1535, v1537);
+    int16x8_t v1539 = vqrdmulhq_n_s16(v1538, 20216);
+    int16x8_t v1540 = vaddq_s16(v1534, v1539);
+    int16x8_t v1541 = vsubq_s16(v1130, v1135);
+    int16x8_t v1542 = vsubq_s16(v1140, v1145);
+    int16x8_t v1543_tmp = vqrdmulhq_n_s16(v1542, 28661);
+    int16x8_t v1543 = vaddq_s16(v1543_tmp, v1542);
+    int16x8_t v1544 = vaddq_s16(v1541, v1543);
+    int16x8_t v1545 = vsubq_s16(v1152, v1157);
+    int16x8_t v1546 = vsubq_s16(v1162, v1167);
+    int16x8_t v1547_tmp = vqrdmulhq_n_s16(v1546, 28661);
+    int16x8_t v1547 = vaddq_s16(v1547_tmp, v1546);
+    int16x8_t v1548 = vaddq_s16(v1545, v1547);
+    int16x8_t v1549 = vqrdmulhq_n_s16(v1548, 20587);
+    int16x8_t v1550 = vaddq_s16(v1544, v1549);
+    int16x8_t v1551 = vsubq_s16(v1084, v1089);
+    int16x8_t v1552 = vsubq_s16(v1094, v1099);
+    int16x8_t v1553_tmp = vqrdmulhq_n_s16(v1552, 9242);
+    int16x8_t v1553 = vmlaq_n_s16(v1553_tmp, v1552, 2);
+    int16x8_t v1554 = vaddq_s16(v1551, v1553);
+    int16x8_t v1555 = vsubq_s16(v1106, v1111);
+    int16x8_t v1556 = vsubq_s16(v1116, v1121);
+    int16x8_t v1557_tmp = vqrdmulhq_n_s16(v1556, 9242);
+    int16x8_t v1557 = vmlaq_n_s16(v1557_tmp, v1556, 2);
+    int16x8_t v1558 = vaddq_s16(v1555, v1557);
+    int16x8_t v1559 = vqrdmulhq_n_s16(v1558, 20985);
+    int16x8_t v1560 = vaddq_s16(v1554, v1559);
+    int16x8_t v1561 = vsubq_s16(v996, v1007);
+    int16x8_t v1562 = vsubq_s16(v1018, v1029);
+    int16x8_t v1563_tmp = vqrdmulhq_n_s16(v1562, 30298);
+    int16x8_t v1563 = vmlaq_n_s16(v1563_tmp, v1562, 2);
+    int16x8_t v1564 = vaddq_s16(v1561, v1563);
+    int16x8_t v1565 = vsubq_s16(v1042, v1053);
+    int16x8_t v1566 = vsubq_s16(v1064, v1075);
+    int16x8_t v1567_tmp = vqrdmulhq_n_s16(v1566, 30298);
+    int16x8_t v1567 = vmlaq_n_s16(v1567_tmp, v1566, 2);
+    int16x8_t v1568 = vaddq_s16(v1565, v1567);
+    int16x8_t v1569 = vqrdmulhq_n_s16(v1568, 21412);
+    int16x8_t v1570 = vaddq_s16(v1564, v1569);
+    int16x8_t v1571 = vsubq_s16(v901, v912);
+    int16x8_t v1572 = vsubq_s16(v924, v935);
+    int16x8_t v1573_tmp = vqrdmulhq_n_s16(v1572, 2773);
+    int16x8_t v1573 = vmlaq_n_s16(v1573_tmp, v1572, 4);
+    int16x8_t v1574 = vaddq_s16(v1571, v1573);
+    int16x8_t v1575 = vsubq_s16(v948, v959);
+    int16x8_t v1576 = vsubq_s16(v970, v981);
+    int16x8_t v1577_tmp = vqrdmulhq_n_s16(v1576, 2773);
+    int16x8_t v1577 = vmlaq_n_s16(v1577_tmp, v1576, 4);
+    int16x8_t v1578 = vaddq_s16(v1575, v1577);
+    int16x8_t v1579 = vqrdmulhq_n_s16(v1578, 21871);
+    int16x8_t v1580 = vaddq_s16(v1574, v1579);
+    int16x8_t v1581 = vsubq_s16(v723, v746);
+    int16x8_t v1582 = vsubq_s16(v769, v792);
+    int16x8_t v1583_tmp = vqrdmulhq_n_s16(v1582, 26108);
+    int16x8_t v1583 = vmlaq_n_s16(v1583_tmp, v1582, 6);
+    int16x8_t v1584 = vaddq_s16(v1581, v1583);
+    int16x8_t v1585 = vsubq_s16(v817, v840);
+    int16x8_t v1586 = vsubq_s16(v863, v886);
+    int16x8_t v1587_tmp = vqrdmulhq_n_s16(v1586, 26108);
+    int16x8_t v1587 = vmlaq_n_s16(v1587_tmp, v1586, 6);
+    int16x8_t v1588 = vaddq_s16(v1585, v1587);
+    int16x8_t v1589 = vqrdmulhq_n_s16(v1588, 22363);
+    int16x8_t v1590 = vaddq_s16(v1584, v1589);
+    int16x8_t v1591 = vsubq_s16(v61, v140);
+    int16x8_t v1592 = vsubq_s16(v234, v314);
+    int16x8_t v1593_tmp = vqrdmulhq_n_s16(v1592, 12251);
+    int16x8_t v1593 = vmlaq_n_s16(v1593_tmp, v1592, 20);
+    int16x8_t v1594 = vaddq_s16(v1591, v1593);
+    int16x8_t v1595 = vsubq_s16(v410, v521);
+    int16x8_t v1596 = vsubq_s16(v615, v696);
+    int16x8_t v1597_tmp = vqrdmulhq_n_s16(v1596, 12251);
+    int16x8_t v1597 = vmlaq_n_s16(v1597_tmp, v1596, 20);
+    int16x8_t v1598 = vaddq_s16(v1595, v1597);
+    int16x8_t v1599 = vqrdmulhq_n_s16(v1598, 22891);
+    int16x8_t v1600 = vaddq_s16(v1594, v1599);
+    int16x8_t v1601 = vsubq_s16(v1591, v1593);
+    int16x8_t v1602 = vsubq_s16(v1595, v1597);
+    int16x8_t v1603 = vqrdmulhq_n_s16(v1602, 23460);
+    int16x8_t v1604 = vaddq_s16(v1601, v1603);
+    int16x8_t v1605 = vsubq_s16(v1581, v1583);
+    int16x8_t v1606 = vsubq_s16(v1585, v1587);
+    int16x8_t v1607 = vqrdmulhq_n_s16(v1606, 24073);
+    int16x8_t v1608 = vaddq_s16(v1605, v1607);
+    int16x8_t v1609 = vsubq_s16(v1571, v1573);
+    int16x8_t v1610 = vsubq_s16(v1575, v1577);
+    int16x8_t v1611 = vqrdmulhq_n_s16(v1610, 24734);
+    int16x8_t v1612 = vaddq_s16(v1609, v1611);
+    int16x8_t v1613 = vsubq_s16(v1561, v1563);
+    int16x8_t v1614 = vsubq_s16(v1565, v1567);
+    int16x8_t v1615 = vqrdmulhq_n_s16(v1614, 25448);
+    int16x8_t v1616 = vaddq_s16(v1613, v1615);
+    int16x8_t v1617 = vsubq_s16(v1551, v1553);
+    int16x8_t v1618 = vsubq_s16(v1555, v1557);
+    int16x8_t v1619 = vqrdmulhq_n_s16(v1618, 26220);
+    int16x8_t v1620 = vaddq_s16(v1617, v1619);
+    int16x8_t v1621 = vsubq_s16(v1541, v1543);
+    int16x8_t v1622 = vsubq_s16(v1545, v1547);
+    int16x8_t v1623 = vqrdmulhq_n_s16(v1622, 27058);
+    int16x8_t v1624 = vaddq_s16(v1621, v1623);
+    int16x8_t v1625 = vsubq_s16(v1531, v1533);
+    int16x8_t v1626 = vsubq_s16(v1535, v1537);
+    int16x8_t v1627 = vqrdmulhq_n_s16(v1626, 27969);
+    int16x8_t v1628 = vaddq_s16(v1625, v1627);
+    int16x8_t v1629 = vsubq_s16(v1521, v1523);
+    int16x8_t v1630 = vsubq_s16(v1525, v1527);
+    int16x8_t v1631 = vqrdmulhq_n_s16(v1630, 28961);
+    int16x8_t v1632 = vaddq_s16(v1629, v1631);
+    int16x8_t v1633 = vsubq_s16(v1511, v1513);
+    int16x8_t v1634 = vsubq_s16(v1515, v1517);
+    int16x8_t v1635 = vqrdmulhq_n_s16(v1634, 30044);
+    int16x8_t v1636 = vaddq_s16(v1633, v1635);
+    int16x8_t v1637 = vsubq_s16(v1501, v1503);
+    int16x8_t v1638 = vsubq_s16(v1505, v1507);
+    int16x8_t v1639 = vqrdmulhq_n_s16(v1638, 31232);
+    int16x8_t v1640 = vaddq_s16(v1637, v1639);
+    int16x8_t v1641 = vsubq_s16(v1491, v1493);
+    int16x8_t v1642 = vsubq_s16(v1495, v1497);
+    int16x8_t v1643 = vqrdmulhq_n_s16(v1642, 32538);
+    int16x8_t v1644 = vaddq_s16(v1641, v1643);
+    int16x8_t v1645 = vsubq_s16(v1481, v1483);
+    int16x8_t v1646 = vsubq_s16(v1485, v1487);
+    int16x8_t v1647_tmp = vqrdmulhq_n_s16(v1646, 1211);
+    int16x8_t v1647 = vaddq_s16(v1647_tmp, v1646);
+    int16x8_t v1648 = vaddq_s16(v1645, v1647);
+    int16x8_t v1649 = vsubq_s16(v1471, v1473);
+    int16x8_t v1650 = vsubq_s16(v1475, v1477);
+    int16x8_t v1651_tmp = vqrdmulhq_n_s16(v1650, 2808);
+    int16x8_t v1651 = vaddq_s16(v1651_tmp, v1650);
+    int16x8_t v1652 = vaddq_s16(v1649, v1651);
+    int16x8_t v1653 = vsubq_s16(v1461, v1463);
+    int16x8_t v1654 = vsubq_s16(v1465, v1467);
+    int16x8_t v1655_tmp = vqrdmulhq_n_s16(v1654, 4586);
+    int16x8_t v1655 = vaddq_s16(v1655_tmp, v1654);
+    int16x8_t v1656 = vaddq_s16(v1653, v1655);
+    int16x8_t v1657 = vsubq_s16(v1451, v1453);
+    int16x8_t v1658 = vsubq_s16(v1455, v1457);
+    int16x8_t v1659_tmp = vqrdmulhq_n_s16(v1658, 6576);
+    int16x8_t v1659 = vaddq_s16(v1659_tmp, v1658);
+    int16x8_t v1660 = vaddq_s16(v1657, v1659);
+    int16x8_t v1661 = vsubq_s16(v1441, v1443);
+    int16x8_t v1662 = vsubq_s16(v1445, v1447);
+    int16x8_t v1663_tmp = vqrdmulhq_n_s16(v1662, 8817);
+    int16x8_t v1663 = vaddq_s16(v1663_tmp, v1662);
+    int16x8_t v1664 = vaddq_s16(v1661, v1663);
+    int16x8_t v1665 = vsubq_s16(v1422, v1427);
+    int16x8_t v1666 = vsubq_s16(v1432, v1437);
+    int16x8_t v1667_tmp = vqrdmulhq_n_s16(v1666, 11356);
+    int16x8_t v1667 = vaddq_s16(v1667_tmp, v1666);
+    int16x8_t v1668 = vaddq_s16(v1665, v1667);
+    int16x8_t v1669 = vsubq_s16(v1400, v1405);
+    int16x8_t v1670 = vsubq_s16(v1410, v1415);
+    int16x8_t v1671_tmp = vqrdmulhq_n_s16(v1670, 14256);
+    int16x8_t v1671 = vaddq_s16(v1671_tmp, v1670);
+    int16x8_t v1672 = vaddq_s16(v1669, v1671);
+    int16x8_t v1673 = vsubq_s16(v1378, v1383);
+    int16x8_t v1674 = vsubq_s16(v1388, v1393);
+    int16x8_t v1675_tmp = vqrdmulhq_n_s16(v1674, 17596);
+    int16x8_t v1675 = vaddq_s16(v1675_tmp, v1674);
+    int16x8_t v1676 = vaddq_s16(v1673, v1675);
+    int16x8_t v1677 = vsubq_s16(v1356, v1361);
+    int16x8_t v1678 = vsubq_s16(v1366, v1371);
+    int16x8_t v1679_tmp = vqrdmulhq_n_s16(v1678, 21483);
+    int16x8_t v1679 = vaddq_s16(v1679_tmp, v1678);
+    int16x8_t v1680 = vaddq_s16(v1677, v1679);
+    int16x8_t v1681 = vsubq_s16(v1334, v1339);
+    int16x8_t v1682 = vsubq_s16(v1344, v1349);
+    int16x8_t v1683_tmp = vqrdmulhq_n_s16(v1682, 26057);
+    int16x8_t v1683 = vaddq_s16(v1683_tmp, v1682);
+    int16x8_t v1684 = vaddq_s16(v1681, v1683);
+    int16x8_t v1685 = vsubq_s16(v1312, v1317);
+    int16x8_t v1686 = vsubq_s16(v1322, v1327);
+    int16x8_t v1687_tmp = vqrdmulhq_n_s16(v1686, 31517);
+    int16x8_t v1687 = vaddq_s16(v1687_tmp, v1686);
+    int16x8_t v1688 = vaddq_s16(v1685, v1687);
+    int16x8_t v1689 = vsubq_s16(v1290, v1295);
+    int16x8_t v1690 = vsubq_s16(v1300, v1305);
+    int16x8_t v1691_tmp = vqrdmulhq_n_s16(v1690, 5373);
+    int16x8_t v1691 = vmlaq_n_s16(v1691_tmp, v1690, 2);
+    int16x8_t v1692 = vaddq_s16(v1689, v1691);
+    int16x8_t v1693 = vsubq_s16(v1268, v1273);
+    int16x8_t v1694 = vsubq_s16(v1278, v1283);
+    int16x8_t v1695_tmp = vqrdmulhq_n_s16(v1694, 13571);
+    int16x8_t v1695 = vmlaq_n_s16(v1695_tmp, v1694, 2);
+    int16x8_t v1696 = vaddq_s16(v1693, v1695);
+    int16x8_t v1697 = vsubq_s16(v1228, v1239);
+    int16x8_t v1698 = vsubq_s16(v1250, v1261);
+    int16x8_t v1699_tmp = vqrdmulhq_n_s16(v1698, 23975);
+    int16x8_t v1699 = vmlaq_n_s16(v1699_tmp, v1698, 2);
+    int16x8_t v1700 = vaddq_s16(v1697, v1699);
+    int16x8_t v1701 = vsubq_s16(v1182, v1193);
+    int16x8_t v1702 = vsubq_s16(v1204, v1215);
+    int16x8_t v1703_tmp = vqrdmulhq_n_s16(v1702, 4832);
+    int16x8_t v1703 = vmlaq_n_s16(v1703_tmp, v1702, 3);
+    int16x8_t v1704 = vaddq_s16(v1701, v1703);
+    int16x8_t v1705 = vsubq_s16(v1136, v1147);
+    int16x8_t v1706 = vsubq_s16(v1158, v1169);
+    int16x8_t v1707_tmp = vqrdmulhq_n_s16(v1706, 23437);
+    int16x8_t v1707 = vmlaq_n_s16(v1707_tmp, v1706, 3);
+    int16x8_t v1708 = vaddq_s16(v1705, v1707);
+    int16x8_t v1709 = vsubq_s16(v1090, v1101);
+    int16x8_t v1710 = vsubq_s16(v1112, v1123);
+    int16x8_t v1711_tmp = vqrdmulhq_n_s16(v1710, 17573);
+    int16x8_t v1711 = vmlaq_n_s16(v1711_tmp, v1710, 4);
+    int16x8_t v1712 = vaddq_s16(v1709, v1711);
+    int16x8_t v1713 = vsubq_s16(v1008, v1031);
+    int16x8_t v1714 = vsubq_s16(v1054, v1077);
+    int16x8_t v1715_tmp = vqrdmulhq_n_s16(v1714, 27122);
+    int16x8_t v1715 = vmlaq_n_s16(v1715_tmp, v1714, 5);
+    int16x8_t v1716 = vaddq_s16(v1713, v1715);
+    int16x8_t v1717 = vsubq_s16(v913, v937);
+    int16x8_t v1718 = vsubq_s16(v960, v983);
+    int16x8_t v1719_tmp = vqrdmulhq_n_s16(v1718, 5041);
+    int16x8_t v1719 = vmlaq_n_s16(v1719_tmp, v1718, 8);
+    int16x8_t v1720 = vaddq_s16(v1717, v1719);
+    int16x8_t v1721 = vsubq_s16(v747, v794);
+    int16x8_t v1722 = vsubq_s16(v841, v888);
+    int16x8_t v1723_tmp = vqrdmulhq_n_s16(v1722, 19146);
+    int16x8_t v1723 = vmlaq_n_s16(v1723_tmp, v1722, 13);
+    int16x8_t v1724 = vaddq_s16(v1721, v1723);
+    int16x8_t v1725 = vsubq_s16(v141, v316);
+    int16x8_t v1726 = vsubq_s16(v522, v698);
+    int16x8_t v1727_tmp = vqrdmulhq_n_s16(v1726, 24402);
+    int16x8_t v1727 = vmlaq_n_s16(v1727_tmp, v1726, 40);
+    int16x8_t v1728 = vaddq_s16(v1725, v1727);
+    int16x8_t v1729 = vsubq_s16(v1725, v1727);
+    int16x8_t v1730 = vsubq_s16(v1721, v1723);
+    int16x8_t v1731 = vsubq_s16(v1717, v1719);
+    int16x8_t v1732 = vsubq_s16(v1713, v1715);
+    int16x8_t v1733 = vsubq_s16(v1709, v1711);
+    int16x8_t v1734 = vsubq_s16(v1705, v1707);
+    int16x8_t v1735 = vsubq_s16(v1701, v1703);
+    int16x8_t v1736 = vsubq_s16(v1697, v1699);
+    int16x8_t v1737 = vsubq_s16(v1693, v1695);
+    int16x8_t v1738 = vsubq_s16(v1689, v1691);
+    int16x8_t v1739 = vsubq_s16(v1685, v1687);
+    int16x8_t v1740 = vsubq_s16(v1681, v1683);
+    int16x8_t v1741 = vsubq_s16(v1677, v1679);
+    int16x8_t v1742 = vsubq_s16(v1673, v1675);
+    int16x8_t v1743 = vsubq_s16(v1669, v1671);
+    int16x8_t v1744 = vsubq_s16(v1665, v1667);
+    int16x8_t v1745 = vsubq_s16(v1661, v1663);
+    int16x8_t v1746 = vsubq_s16(v1657, v1659);
+    int16x8_t v1747 = vsubq_s16(v1653, v1655);
+    int16x8_t v1748 = vsubq_s16(v1649, v1651);
+    int16x8_t v1749 = vsubq_s16(v1645, v1647);
+    int16x8_t v1750 = vsubq_s16(v1641, v1643);
+    int16x8_t v1751 = vsubq_s16(v1637, v1639);
+    int16x8_t v1752 = vsubq_s16(v1633, v1635);
+    int16x8_t v1753 = vsubq_s16(v1629, v1631);
+    int16x8_t v1754 = vsubq_s16(v1625, v1627);
+    int16x8_t v1755 = vsubq_s16(v1621, v1623);
+    int16x8_t v1756 = vsubq_s16(v1617, v1619);
+    int16x8_t v1757 = vsubq_s16(v1613, v1615);
+    int16x8_t v1758 = vsubq_s16(v1609, v1611);
+    int16x8_t v1759 = vsubq_s16(v1605, v1607);
+    int16x8_t v1760 = vsubq_s16(v1601, v1603);
+    int16x8_t v1761 = vsubq_s16(v1594, v1599);
+    int16x8_t v1762 = vsubq_s16(v1584, v1589);
+    int16x8_t v1763 = vsubq_s16(v1574, v1579);
+    int16x8_t v1764 = vsubq_s16(v1564, v1569);
+    int16x8_t v1765 = vsubq_s16(v1554, v1559);
+    int16x8_t v1766 = vsubq_s16(v1544, v1549);
+    int16x8_t v1767 = vsubq_s16(v1534, v1539);
+    int16x8_t v1768 = vsubq_s16(v1524, v1529);
+    int16x8_t v1769 = vsubq_s16(v1514, v1519);
+    int16x8_t v1770 = vsubq_s16(v1504, v1509);
+    int16x8_t v1771 = vsubq_s16(v1494, v1499);
+    int16x8_t v1772 = vsubq_s16(v1484, v1489);
+    int16x8_t v1773 = vsubq_s16(v1474, v1479);
+    int16x8_t v1774 = vsubq_s16(v1464, v1469);
+    int16x8_t v1775 = vsubq_s16(v1454, v1459);
+    int16x8_t v1776 = vsubq_s16(v1444, v1449);
+    int16x8_t v1777 = vsubq_s16(v1428, v1439);
+    int16x8_t v1778 = vsubq_s16(v1406, v1417);
+    int16x8_t v1779 = vsubq_s16(v1384, v1395);
+    int16x8_t v1780 = vsubq_s16(v1362, v1373);
+    int16x8_t v1781 = vsubq_s16(v1340, v1351);
+    int16x8_t v1782 = vsubq_s16(v1318, v1329);
+    int16x8_t v1783 = vsubq_s16(v1296, v1307);
+    int16x8_t v1784 = vsubq_s16(v1274, v1285);
+    int16x8_t v1785 = vsubq_s16(v1240, v1263);
+    int16x8_t v1786 = vsubq_s16(v1194, v1217);
+    int16x8_t v1787 = vsubq_s16(v1148, v1171);
+    int16x8_t v1788 = vsubq_s16(v1102, v1125);
+    int16x8_t v1789 = vsubq_s16(v1032, v1079);
+    int16x8_t v1790 = vsubq_s16(v938, v985);
+    int16x8_t v1791 = vsubq_s16(v795, v890);
+    int16x8_t v1792 = vsubq_s16(v317, v700);
+    vst1q_s16(out + out_stride * 0 + i, v701);
+    vst1q_s16(out + out_stride * 1 + i, v891);
+    vst1q_s16(out + out_stride * 2 + i, v986);
+    vst1q_s16(out + out_stride * 3 + i, v1080);
+    vst1q_s16(out + out_stride * 4 + i, v1126);
+    vst1q_s16(out + out_stride * 5 + i, v1172);
+    vst1q_s16(out + out_stride * 6 + i, v1218);
+    vst1q_s16(out + out_stride * 7 + i, v1264);
+    vst1q_s16(out + out_stride * 8 + i, v1286);
+    vst1q_s16(out + out_stride * 9 + i, v1308);
+    vst1q_s16(out + out_stride * 10 + i, v1330);
+    vst1q_s16(out + out_stride * 11 + i, v1352);
+    vst1q_s16(out + out_stride * 12 + i, v1374);
+    vst1q_s16(out + out_stride * 13 + i, v1396);
+    vst1q_s16(out + out_stride * 14 + i, v1418);
+    vst1q_s16(out + out_stride * 15 + i, v1440);
+    vst1q_s16(out + out_stride * 16 + i, v1450);
+    vst1q_s16(out + out_stride * 17 + i, v1460);
+    vst1q_s16(out + out_stride * 18 + i, v1470);
+    vst1q_s16(out + out_stride * 19 + i, v1480);
+    vst1q_s16(out + out_stride * 20 + i, v1490);
+    vst1q_s16(out + out_stride * 21 + i, v1500);
+    vst1q_s16(out + out_stride * 22 + i, v1510);
+    vst1q_s16(out + out_stride * 23 + i, v1520);
+    vst1q_s16(out + out_stride * 24 + i, v1530);
+    vst1q_s16(out + out_stride * 25 + i, v1540);
+    vst1q_s16(out + out_stride * 26 + i, v1550);
+    vst1q_s16(out + out_stride * 27 + i, v1560);
+    vst1q_s16(out + out_stride * 28 + i, v1570);
+    vst1q_s16(out + out_stride * 29 + i, v1580);
+    vst1q_s16(out + out_stride * 30 + i, v1590);
+    vst1q_s16(out + out_stride * 31 + i, v1600);
+    vst1q_s16(out + out_stride * 32 + i, v1604);
+    vst1q_s16(out + out_stride * 33 + i, v1608);
+    vst1q_s16(out + out_stride * 34 + i, v1612);
+    vst1q_s16(out + out_stride * 35 + i, v1616);
+    vst1q_s16(out + out_stride * 36 + i, v1620);
+    vst1q_s16(out + out_stride * 37 + i, v1624);
+    vst1q_s16(out + out_stride * 38 + i, v1628);
+    vst1q_s16(out + out_stride * 39 + i, v1632);
+    vst1q_s16(out + out_stride * 40 + i, v1636);
+    vst1q_s16(out + out_stride * 41 + i, v1640);
+    vst1q_s16(out + out_stride * 42 + i, v1644);
+    vst1q_s16(out + out_stride * 43 + i, v1648);
+    vst1q_s16(out + out_stride * 44 + i, v1652);
+    vst1q_s16(out + out_stride * 45 + i, v1656);
+    vst1q_s16(out + out_stride * 46 + i, v1660);
+    vst1q_s16(out + out_stride * 47 + i, v1664);
+    vst1q_s16(out + out_stride * 48 + i, v1668);
+    vst1q_s16(out + out_stride * 49 + i, v1672);
+    vst1q_s16(out + out_stride * 50 + i, v1676);
+    vst1q_s16(out + out_stride * 51 + i, v1680);
+    vst1q_s16(out + out_stride * 52 + i, v1684);
+    vst1q_s16(out + out_stride * 53 + i, v1688);
+    vst1q_s16(out + out_stride * 54 + i, v1692);
+    vst1q_s16(out + out_stride * 55 + i, v1696);
+    vst1q_s16(out + out_stride * 56 + i, v1700);
+    vst1q_s16(out + out_stride * 57 + i, v1704);
+    vst1q_s16(out + out_stride * 58 + i, v1708);
+    vst1q_s16(out + out_stride * 59 + i, v1712);
+    vst1q_s16(out + out_stride * 60 + i, v1716);
+    vst1q_s16(out + out_stride * 61 + i, v1720);
+    vst1q_s16(out + out_stride * 62 + i, v1724);
+    vst1q_s16(out + out_stride * 63 + i, v1728);
+    vst1q_s16(out + out_stride * 64 + i, v1729);
+    vst1q_s16(out + out_stride * 65 + i, v1730);
+    vst1q_s16(out + out_stride * 66 + i, v1731);
+    vst1q_s16(out + out_stride * 67 + i, v1732);
+    vst1q_s16(out + out_stride * 68 + i, v1733);
+    vst1q_s16(out + out_stride * 69 + i, v1734);
+    vst1q_s16(out + out_stride * 70 + i, v1735);
+    vst1q_s16(out + out_stride * 71 + i, v1736);
+    vst1q_s16(out + out_stride * 72 + i, v1737);
+    vst1q_s16(out + out_stride * 73 + i, v1738);
+    vst1q_s16(out + out_stride * 74 + i, v1739);
+    vst1q_s16(out + out_stride * 75 + i, v1740);
+    vst1q_s16(out + out_stride * 76 + i, v1741);
+    vst1q_s16(out + out_stride * 77 + i, v1742);
+    vst1q_s16(out + out_stride * 78 + i, v1743);
+    vst1q_s16(out + out_stride * 79 + i, v1744);
+    vst1q_s16(out + out_stride * 80 + i, v1745);
+    vst1q_s16(out + out_stride * 81 + i, v1746);
+    vst1q_s16(out + out_stride * 82 + i, v1747);
+    vst1q_s16(out + out_stride * 83 + i, v1748);
+    vst1q_s16(out + out_stride * 84 + i, v1749);
+    vst1q_s16(out + out_stride * 85 + i, v1750);
+    vst1q_s16(out + out_stride * 86 + i, v1751);
+    vst1q_s16(out + out_stride * 87 + i, v1752);
+    vst1q_s16(out + out_stride * 88 + i, v1753);
+    vst1q_s16(out + out_stride * 89 + i, v1754);
+    vst1q_s16(out + out_stride * 90 + i, v1755);
+    vst1q_s16(out + out_stride * 91 + i, v1756);
+    vst1q_s16(out + out_stride * 92 + i, v1757);
+    vst1q_s16(out + out_stride * 93 + i, v1758);
+    vst1q_s16(out + out_stride * 94 + i, v1759);
+    vst1q_s16(out + out_stride * 95 + i, v1760);
+    vst1q_s16(out + out_stride * 96 + i, v1761);
+    vst1q_s16(out + out_stride * 97 + i, v1762);
+    vst1q_s16(out + out_stride * 98 + i, v1763);
+    vst1q_s16(out + out_stride * 99 + i, v1764);
+    vst1q_s16(out + out_stride * 100 + i, v1765);
+    vst1q_s16(out + out_stride * 101 + i, v1766);
+    vst1q_s16(out + out_stride * 102 + i, v1767);
+    vst1q_s16(out + out_stride * 103 + i, v1768);
+    vst1q_s16(out + out_stride * 104 + i, v1769);
+    vst1q_s16(out + out_stride * 105 + i, v1770);
+    vst1q_s16(out + out_stride * 106 + i, v1771);
+    vst1q_s16(out + out_stride * 107 + i, v1772);
+    vst1q_s16(out + out_stride * 108 + i, v1773);
+    vst1q_s16(out + out_stride * 109 + i, v1774);
+    vst1q_s16(out + out_stride * 110 + i, v1775);
+    vst1q_s16(out + out_stride * 111 + i, v1776);
+    vst1q_s16(out + out_stride * 112 + i, v1777);
+    vst1q_s16(out + out_stride * 113 + i, v1778);
+    vst1q_s16(out + out_stride * 114 + i, v1779);
+    vst1q_s16(out + out_stride * 115 + i, v1780);
+    vst1q_s16(out + out_stride * 116 + i, v1781);
+    vst1q_s16(out + out_stride * 117 + i, v1782);
+    vst1q_s16(out + out_stride * 118 + i, v1783);
+    vst1q_s16(out + out_stride * 119 + i, v1784);
+    vst1q_s16(out + out_stride * 120 + i, v1785);
+    vst1q_s16(out + out_stride * 121 + i, v1786);
+    vst1q_s16(out + out_stride * 122 + i, v1787);
+    vst1q_s16(out + out_stride * 123 + i, v1788);
+    vst1q_s16(out + out_stride * 124 + i, v1789);
+    vst1q_s16(out + out_stride * 125 + i, v1790);
+    vst1q_s16(out + out_stride * 126 + i, v1791);
+    vst1q_s16(out + out_stride * 127 + i, v1792);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h
new file mode 100644
index 0000000000..472ec20d42
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h
@@ -0,0 +1,180 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<16>) { return 1; }
+
+void FastIDCT(FastDCTTag<16>, const int16_t* in, size_t in_stride, int16_t* out,
+              size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 8 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 12 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 10 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vaddq_s16(v13, v10);
+    int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080);
+    int16x8_t v18 = vld1q_s16(in + in_stride * 14 + i);
+    int16x8_t v19 = vaddq_s16(v18, v12);
+    int16x8_t v20 = vaddq_s16(v16, v19);
+    int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734);
+    int16x8_t v22 = vaddq_s16(v17, v21);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vld1q_s16(in + in_stride * 15 + i);
+    int16x8_t v27 = vld1q_s16(in + in_stride * 13 + i);
+    int16x8_t v28 = vaddq_s16(v26, v27);
+    int16x8_t v29 = vld1q_s16(in + in_stride * 11 + i);
+    int16x8_t v30 = vld1q_s16(in + in_stride * 9 + i);
+    int16x8_t v31 = vaddq_s16(v29, v30);
+    int16x8_t v32 = vaddq_s16(v28, v31);
+    int16x8_t v33 = vqrdmulhq_n_s16(v32, 17734);
+    int16x8_t v34 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v35 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v36 = vaddq_s16(v34, v35);
+    int16x8_t v37 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v38 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v39 = vaddq_s16(v37, v38);
+    int16x8_t v40 = vaddq_s16(v36, v39);
+    int16x8_t v41_tmp = vqrdmulhq_n_s16(v40, 10045);
+    int16x8_t v41 = vaddq_s16(v41_tmp, v40);
+    int16x8_t v42 = vaddq_s16(v33, v41);
+    int16x8_t v43 = vqrdmulhq_n_s16(v42, 16705);
+    int16x8_t v44_tmp = vqrdmulhq_n_s16(v36, 13573);
+    int16x8_t v44 = vaddq_s16(v44_tmp, v36);
+    int16x8_t v45 = vaddq_s16(v39, v31);
+    int16x8_t v46 = vaddq_s16(v44, v45);
+    int16x8_t v47 = vqrdmulhq_n_s16(v46, 16705);
+    int16x8_t v48 = vaddq_s16(v43, v47);
+    int16x8_t v49_tmp = vqrdmulhq_n_s16(v35, 13573);
+    int16x8_t v49 = vaddq_s16(v49_tmp, v35);
+    int16x8_t v50 = vaddq_s16(v30, v37);
+    int16x8_t v51 = vaddq_s16(v49, v50);
+    int16x8_t v52 = vaddq_s16(v38, v34);
+    int16x8_t v53 = vaddq_s16(v27, v29);
+    int16x8_t v54 = vaddq_s16(v52, v53);
+    int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734);
+    int16x8_t v56 = vqrdmulhq_n_s16(v52, 25080);
+    int16x8_t v57 = vaddq_s16(v55, v56);
+    int16x8_t v58 = vaddq_s16(v51, v57);
+    int16x8_t v59 = vaddq_s16(v48, v58);
+    int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+    int16x8_t v61 = vaddq_s16(v25, v60);
+    int16x8_t v62 = vsubq_s16(v0, v1);
+    int16x8_t v63 = vsubq_s16(v4, v6);
+    int16x8_t v64_tmp = vqrdmulhq_n_s16(v63, 10045);
+    int16x8_t v64 = vaddq_s16(v64_tmp, v63);
+    int16x8_t v65 = vaddq_s16(v62, v64);
+    int16x8_t v66 = vsubq_s16(v11, v14);
+    int16x8_t v67 = vqrdmulhq_n_s16(v16, 17734);
+    int16x8_t v68_tmp = vqrdmulhq_n_s16(v19, 10045);
+    int16x8_t v68 = vaddq_s16(v68_tmp, v19);
+    int16x8_t v69 = vsubq_s16(v67, v68);
+    int16x8_t v70 = vaddq_s16(v66, v69);
+    int16x8_t v71 = vqrdmulhq_n_s16(v70, 19705);
+    int16x8_t v72 = vaddq_s16(v65, v71);
+    int16x8_t v73 = vsubq_s16(v49, v50);
+    int16x8_t v74 = vqrdmulhq_n_s16(v52, 17734);
+    int16x8_t v75_tmp = vqrdmulhq_n_s16(v53, 10045);
+    int16x8_t v75 = vaddq_s16(v75_tmp, v53);
+    int16x8_t v76 = vsubq_s16(v74, v75);
+    int16x8_t v77 = vaddq_s16(v73, v76);
+    int16x8_t v78 = vsubq_s16(v44, v45);
+    int16x8_t v79 = vqrdmulhq_n_s16(v78, 19705);
+    int16x8_t v80 = vqrdmulhq_n_s16(v40, 13573);
+    int16x8_t v81 = vsubq_s16(v80, v32);
+    int16x8_t v82 = vqrdmulhq_n_s16(v81, 25746);
+    int16x8_t v83 = vaddq_s16(v79, v82);
+    int16x8_t v84 = vaddq_s16(v77, v83);
+    int16x8_t v85 = vqrdmulhq_n_s16(v84, 17121);
+    int16x8_t v86 = vaddq_s16(v72, v85);
+    int16x8_t v87 = vsubq_s16(v62, v64);
+    int16x8_t v88 = vsubq_s16(v66, v69);
+    int16x8_t v89 = vqrdmulhq_n_s16(v88, 29490);
+    int16x8_t v90 = vaddq_s16(v87, v89);
+    int16x8_t v91 = vsubq_s16(v73, v76);
+    int16x8_t v92 = vqrdmulhq_n_s16(v78, 29490);
+    int16x8_t v93_tmp = vqrdmulhq_n_s16(v81, 5763);
+    int16x8_t v93 = vaddq_s16(v93_tmp, v81);
+    int16x8_t v94 = vsubq_s16(v92, v93);
+    int16x8_t v95 = vaddq_s16(v91, v94);
+    int16x8_t v96 = vqrdmulhq_n_s16(v95, 18578);
+    int16x8_t v97 = vaddq_s16(v90, v96);
+    int16x8_t v98 = vsubq_s16(v46, v42);
+    int16x8_t v99_tmp = vqrdmulhq_n_s16(v98, 18446);
+    int16x8_t v99 = vmlaq_n_s16(v99_tmp, v98, 2);
+    int16x8_t v100 = vsubq_s16(v51, v57);
+    int16x8_t v101 = vaddq_s16(v99, v100);
+    int16x8_t v102 = vqrdmulhq_n_s16(v101, 21195);
+    int16x8_t v103 = vsubq_s16(v2, v8);
+    int16x8_t v104 = vsubq_s16(v15, v22);
+    int16x8_t v105_tmp = vqrdmulhq_n_s16(v104, 18446);
+    int16x8_t v105 = vmlaq_n_s16(v105_tmp, v104, 2);
+    int16x8_t v106 = vaddq_s16(v103, v105);
+    int16x8_t v107 = vaddq_s16(v102, v106);
+    int16x8_t v108 = vsubq_s16(v103, v105);
+    int16x8_t v109 = vsubq_s16(v100, v99);
+    int16x8_t v110 = vqrdmulhq_n_s16(v109, 25826);
+    int16x8_t v111 = vaddq_s16(v108, v110);
+    int16x8_t v112 = vsubq_s16(v87, v89);
+    int16x8_t v113 = vsubq_s16(v91, v94);
+    int16x8_t v114_tmp = vqrdmulhq_n_s16(v113, 1988);
+    int16x8_t v114 = vaddq_s16(v114_tmp, v113);
+    int16x8_t v115 = vaddq_s16(v112, v114);
+    int16x8_t v116 = vsubq_s16(v65, v71);
+    int16x8_t v117 = vsubq_s16(v77, v83);
+    int16x8_t v118_tmp = vqrdmulhq_n_s16(v117, 23673);
+    int16x8_t v118 = vaddq_s16(v118_tmp, v117);
+    int16x8_t v119 = vaddq_s16(v116, v118);
+    int16x8_t v120 = vsubq_s16(v58, v48);
+    int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 3314);
+    int16x8_t v121 = vmlaq_n_s16(v121_tmp, v120, 5);
+    int16x8_t v122 = vsubq_s16(v9, v24);
+    int16x8_t v123 = vaddq_s16(v121, v122);
+    int16x8_t v124 = vsubq_s16(v122, v121);
+    int16x8_t v125 = vsubq_s16(v116, v118);
+    int16x8_t v126 = vsubq_s16(v112, v114);
+    int16x8_t v127 = vsubq_s16(v108, v110);
+    int16x8_t v128 = vsubq_s16(v106, v102);
+    int16x8_t v129 = vsubq_s16(v90, v96);
+    int16x8_t v130 = vsubq_s16(v72, v85);
+    int16x8_t v131 = vsubq_s16(v25, v60);
+    vst1q_s16(out + out_stride * 0 + i, v61);
+    vst1q_s16(out + out_stride * 1 + i, v86);
+    vst1q_s16(out + out_stride * 2 + i, v97);
+    vst1q_s16(out + out_stride * 3 + i, v107);
+    vst1q_s16(out + out_stride * 4 + i, v111);
+    vst1q_s16(out + out_stride * 5 + i, v115);
+    vst1q_s16(out + out_stride * 6 + i, v119);
+    vst1q_s16(out + out_stride * 7 + i, v123);
+    vst1q_s16(out + out_stride * 8 + i, v124);
+    vst1q_s16(out + out_stride * 9 + i, v125);
+    vst1q_s16(out + out_stride * 10 + i, v126);
+    vst1q_s16(out + out_stride * 11 + i, v127);
+    vst1q_s16(out + out_stride * 12 + i, v128);
+    vst1q_s16(out + out_stride * 13 + i, v129);
+    vst1q_s16(out + out_stride * 14 + i, v130);
+    vst1q_s16(out + out_stride * 15 + i, v131);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h
new file mode 100644
index 0000000000..a823440af2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h
@@ -0,0 +1,4811 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<256>) { return 3; }
+
+void FastIDCT(FastDCTTag<256>, const int16_t* in, size_t in_stride,
+              int16_t* out, size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 128 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 64 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 192 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 32 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 160 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 96 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vaddq_s16(v13, v10);
+    int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573);
+    int16x8_t v17 = vaddq_s16(v17_tmp, v16);
+    int16x8_t v18 = vld1q_s16(in + in_stride * 224 + i);
+    int16x8_t v19 = vaddq_s16(v18, v12);
+    int16x8_t v20 = vaddq_s16(v19, v16);
+    int16x8_t v21 = vaddq_s16(v17, v20);
+    int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vld1q_s16(in + in_stride * 16 + i);
+    int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+    int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+    int16x8_t v28 = vld1q_s16(in + in_stride * 144 + i);
+    int16x8_t v29 = vld1q_s16(in + in_stride * 112 + i);
+    int16x8_t v30 = vaddq_s16(v28, v29);
+    int16x8_t v31 = vaddq_s16(v27, v30);
+    int16x8_t v32 = vld1q_s16(in + in_stride * 80 + i);
+    int16x8_t v33 = vld1q_s16(in + in_stride * 48 + i);
+    int16x8_t v34 = vaddq_s16(v32, v33);
+    int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573);
+    int16x8_t v35 = vaddq_s16(v35_tmp, v34);
+    int16x8_t v36 = vld1q_s16(in + in_stride * 208 + i);
+    int16x8_t v37 = vld1q_s16(in + in_stride * 176 + i);
+    int16x8_t v38 = vaddq_s16(v36, v37);
+    int16x8_t v39 = vaddq_s16(v38, v34);
+    int16x8_t v40 = vaddq_s16(v35, v39);
+    int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734);
+    int16x8_t v42 = vaddq_s16(v31, v41);
+    int16x8_t v43 = vaddq_s16(v33, v26);
+    int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+    int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+    int16x8_t v45 = vaddq_s16(v37, v28);
+    int16x8_t v46 = vaddq_s16(v29, v32);
+    int16x8_t v47 = vaddq_s16(v45, v46);
+    int16x8_t v48 = vaddq_s16(v44, v47);
+    int16x8_t v49 = vaddq_s16(v46, v43);
+    int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573);
+    int16x8_t v50 = vaddq_s16(v50_tmp, v49);
+    int16x8_t v51 = vld1q_s16(in + in_stride * 240 + i);
+    int16x8_t v52 = vaddq_s16(v51, v36);
+    int16x8_t v53 = vaddq_s16(v52, v45);
+    int16x8_t v54 = vaddq_s16(v53, v49);
+    int16x8_t v55 = vaddq_s16(v50, v54);
+    int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734);
+    int16x8_t v57 = vaddq_s16(v48, v56);
+    int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705);
+    int16x8_t v59 = vaddq_s16(v42, v58);
+    int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+    int16x8_t v61 = vaddq_s16(v25, v60);
+    int16x8_t v62 = vld1q_s16(in + in_stride * 8 + i);
+    int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+    int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+    int16x8_t v64 = vld1q_s16(in + in_stride * 136 + i);
+    int16x8_t v65 = vld1q_s16(in + in_stride * 120 + i);
+    int16x8_t v66 = vaddq_s16(v64, v65);
+    int16x8_t v67 = vaddq_s16(v63, v66);
+    int16x8_t v68 = vld1q_s16(in + in_stride * 72 + i);
+    int16x8_t v69 = vld1q_s16(in + in_stride * 56 + i);
+    int16x8_t v70 = vaddq_s16(v68, v69);
+    int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573);
+    int16x8_t v71 = vaddq_s16(v71_tmp, v70);
+    int16x8_t v72 = vld1q_s16(in + in_stride * 200 + i);
+    int16x8_t v73 = vld1q_s16(in + in_stride * 184 + i);
+    int16x8_t v74 = vaddq_s16(v72, v73);
+    int16x8_t v75 = vaddq_s16(v74, v70);
+    int16x8_t v76 = vaddq_s16(v71, v75);
+    int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+    int16x8_t v78 = vaddq_s16(v67, v77);
+    int16x8_t v79 = vld1q_s16(in + in_stride * 40 + i);
+    int16x8_t v80 = vld1q_s16(in + in_stride * 24 + i);
+    int16x8_t v81 = vaddq_s16(v79, v80);
+    int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+    int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+    int16x8_t v83 = vld1q_s16(in + in_stride * 168 + i);
+    int16x8_t v84 = vld1q_s16(in + in_stride * 152 + i);
+    int16x8_t v85 = vaddq_s16(v83, v84);
+    int16x8_t v86 = vld1q_s16(in + in_stride * 104 + i);
+    int16x8_t v87 = vld1q_s16(in + in_stride * 88 + i);
+    int16x8_t v88 = vaddq_s16(v86, v87);
+    int16x8_t v89 = vaddq_s16(v85, v88);
+    int16x8_t v90 = vaddq_s16(v82, v89);
+    int16x8_t v91 = vaddq_s16(v88, v81);
+    int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573);
+    int16x8_t v92 = vaddq_s16(v92_tmp, v91);
+    int16x8_t v93 = vld1q_s16(in + in_stride * 232 + i);
+    int16x8_t v94 = vld1q_s16(in + in_stride * 216 + i);
+    int16x8_t v95 = vaddq_s16(v93, v94);
+    int16x8_t v96 = vaddq_s16(v95, v85);
+    int16x8_t v97 = vaddq_s16(v96, v91);
+    int16x8_t v98 = vaddq_s16(v92, v97);
+    int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+    int16x8_t v100 = vaddq_s16(v90, v99);
+    int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705);
+    int16x8_t v102 = vaddq_s16(v78, v101);
+    int16x8_t v103 = vaddq_s16(v80, v62);
+    int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573);
+    int16x8_t v104 = vaddq_s16(v104_tmp, v103);
+    int16x8_t v105 = vaddq_s16(v84, v64);
+    int16x8_t v106 = vaddq_s16(v65, v86);
+    int16x8_t v107 = vaddq_s16(v105, v106);
+    int16x8_t v108 = vaddq_s16(v104, v107);
+    int16x8_t v109 = vaddq_s16(v87, v68);
+    int16x8_t v110 = vaddq_s16(v69, v79);
+    int16x8_t v111 = vaddq_s16(v109, v110);
+    int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573);
+    int16x8_t v112 = vaddq_s16(v112_tmp, v111);
+    int16x8_t v113 = vaddq_s16(v94, v72);
+    int16x8_t v114 = vaddq_s16(v73, v83);
+    int16x8_t v115 = vaddq_s16(v113, v114);
+    int16x8_t v116 = vaddq_s16(v115, v111);
+    int16x8_t v117 = vaddq_s16(v112, v116);
+    int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734);
+    int16x8_t v119 = vaddq_s16(v108, v118);
+    int16x8_t v120 = vaddq_s16(v110, v103);
+    int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573);
+    int16x8_t v121 = vaddq_s16(v121_tmp, v120);
+    int16x8_t v122 = vaddq_s16(v114, v105);
+    int16x8_t v123 = vaddq_s16(v106, v109);
+    int16x8_t v124 = vaddq_s16(v122, v123);
+    int16x8_t v125 = vaddq_s16(v121, v124);
+    int16x8_t v126 = vaddq_s16(v123, v120);
+    int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573);
+    int16x8_t v127 = vaddq_s16(v127_tmp, v126);
+    int16x8_t v128 = vld1q_s16(in + in_stride * 248 + i);
+    int16x8_t v129 = vaddq_s16(v128, v93);
+    int16x8_t v130 = vaddq_s16(v129, v113);
+    int16x8_t v131 = vaddq_s16(v130, v122);
+    int16x8_t v132 = vaddq_s16(v131, v126);
+    int16x8_t v133 = vaddq_s16(v127, v132);
+    int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+    int16x8_t v135 = vaddq_s16(v125, v134);
+    int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705);
+    int16x8_t v137 = vaddq_s16(v119, v136);
+    int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463);
+    int16x8_t v139 = vaddq_s16(v102, v138);
+    int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404);
+    int16x8_t v141 = vaddq_s16(v61, v140);
+    int16x8_t v142 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573);
+    int16x8_t v143 = vaddq_s16(v143_tmp, v142);
+    int16x8_t v144 = vld1q_s16(in + in_stride * 132 + i);
+    int16x8_t v145 = vld1q_s16(in + in_stride * 124 + i);
+    int16x8_t v146 = vaddq_s16(v144, v145);
+    int16x8_t v147 = vaddq_s16(v143, v146);
+    int16x8_t v148 = vld1q_s16(in + in_stride * 68 + i);
+    int16x8_t v149 = vld1q_s16(in + in_stride * 60 + i);
+    int16x8_t v150 = vaddq_s16(v148, v149);
+    int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573);
+    int16x8_t v151 = vaddq_s16(v151_tmp, v150);
+    int16x8_t v152 = vld1q_s16(in + in_stride * 196 + i);
+    int16x8_t v153 = vld1q_s16(in + in_stride * 188 + i);
+    int16x8_t v154 = vaddq_s16(v152, v153);
+    int16x8_t v155 = vaddq_s16(v154, v150);
+    int16x8_t v156 = vaddq_s16(v151, v155);
+    int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734);
+    int16x8_t v158 = vaddq_s16(v147, v157);
+    int16x8_t v159 = vld1q_s16(in + in_stride * 36 + i);
+    int16x8_t v160 = vld1q_s16(in + in_stride * 28 + i);
+    int16x8_t v161 = vaddq_s16(v159, v160);
+    int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573);
+    int16x8_t v162 = vaddq_s16(v162_tmp, v161);
+    int16x8_t v163 = vld1q_s16(in + in_stride * 164 + i);
+    int16x8_t v164 = vld1q_s16(in + in_stride * 156 + i);
+    int16x8_t v165 = vaddq_s16(v163, v164);
+    int16x8_t v166 = vld1q_s16(in + in_stride * 100 + i);
+    int16x8_t v167 = vld1q_s16(in + in_stride * 92 + i);
+    int16x8_t v168 = vaddq_s16(v166, v167);
+    int16x8_t v169 = vaddq_s16(v165, v168);
+    int16x8_t v170 = vaddq_s16(v162, v169);
+    int16x8_t v171 = vaddq_s16(v168, v161);
+    int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573);
+    int16x8_t v172 = vaddq_s16(v172_tmp, v171);
+    int16x8_t v173 = vld1q_s16(in + in_stride * 228 + i);
+    int16x8_t v174 = vld1q_s16(in + in_stride * 220 + i);
+    int16x8_t v175 = vaddq_s16(v173, v174);
+    int16x8_t v176 = vaddq_s16(v175, v165);
+    int16x8_t v177 = vaddq_s16(v176, v171);
+    int16x8_t v178 = vaddq_s16(v172, v177);
+    int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734);
+    int16x8_t v180 = vaddq_s16(v170, v179);
+    int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705);
+    int16x8_t v182 = vaddq_s16(v158, v181);
+    int16x8_t v183 = vld1q_s16(in + in_stride * 20 + i);
+    int16x8_t v184 = vld1q_s16(in + in_stride * 12 + i);
+    int16x8_t v185 = vaddq_s16(v183, v184);
+    int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573);
+    int16x8_t v186 = vaddq_s16(v186_tmp, v185);
+    int16x8_t v187 = vld1q_s16(in + in_stride * 148 + i);
+    int16x8_t v188 = vld1q_s16(in + in_stride * 140 + i);
+    int16x8_t v189 = vaddq_s16(v187, v188);
+    int16x8_t v190 = vld1q_s16(in + in_stride * 116 + i);
+    int16x8_t v191 = vld1q_s16(in + in_stride * 108 + i);
+    int16x8_t v192 = vaddq_s16(v190, v191);
+    int16x8_t v193 = vaddq_s16(v189, v192);
+    int16x8_t v194 = vaddq_s16(v186, v193);
+    int16x8_t v195 = vld1q_s16(in + in_stride * 84 + i);
+    int16x8_t v196 = vld1q_s16(in + in_stride * 76 + i);
+    int16x8_t v197 = vaddq_s16(v195, v196);
+    int16x8_t v198 = vld1q_s16(in + in_stride * 52 + i);
+    int16x8_t v199 = vld1q_s16(in + in_stride * 44 + i);
+    int16x8_t v200 = vaddq_s16(v198, v199);
+    int16x8_t v201 = vaddq_s16(v197, v200);
+    int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573);
+    int16x8_t v202 = vaddq_s16(v202_tmp, v201);
+    int16x8_t v203 = vld1q_s16(in + in_stride * 212 + i);
+    int16x8_t v204 = vld1q_s16(in + in_stride * 204 + i);
+    int16x8_t v205 = vaddq_s16(v203, v204);
+    int16x8_t v206 = vld1q_s16(in + in_stride * 180 + i);
+    int16x8_t v207 = vld1q_s16(in + in_stride * 172 + i);
+    int16x8_t v208 = vaddq_s16(v206, v207);
+    int16x8_t v209 = vaddq_s16(v205, v208);
+    int16x8_t v210 = vaddq_s16(v209, v201);
+    int16x8_t v211 = vaddq_s16(v202, v210);
+    int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734);
+    int16x8_t v213 = vaddq_s16(v194, v212);
+    int16x8_t v214 = vaddq_s16(v200, v185);
+    int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573);
+    int16x8_t v215 = vaddq_s16(v215_tmp, v214);
+    int16x8_t v216 = vaddq_s16(v208, v189);
+    int16x8_t v217 = vaddq_s16(v192, v197);
+    int16x8_t v218 = vaddq_s16(v216, v217);
+    int16x8_t v219 = vaddq_s16(v215, v218);
+    int16x8_t v220 = vaddq_s16(v217, v214);
+    int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573);
+    int16x8_t v221 = vaddq_s16(v221_tmp, v220);
+    int16x8_t v222 = vld1q_s16(in + in_stride * 244 + i);
+    int16x8_t v223 = vld1q_s16(in + in_stride * 236 + i);
+    int16x8_t v224 = vaddq_s16(v222, v223);
+    int16x8_t v225 = vaddq_s16(v224, v205);
+    int16x8_t v226 = vaddq_s16(v225, v216);
+    int16x8_t v227 = vaddq_s16(v226, v220);
+    int16x8_t v228 = vaddq_s16(v221, v227);
+    int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734);
+    int16x8_t v230 = vaddq_s16(v219, v229);
+    int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705);
+    int16x8_t v232 = vaddq_s16(v213, v231);
+    int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463);
+    int16x8_t v234 = vaddq_s16(v182, v233);
+    int16x8_t v235 = vaddq_s16(v184, v142);
+    int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573);
+    int16x8_t v236 = vaddq_s16(v236_tmp, v235);
+    int16x8_t v237 = vaddq_s16(v188, v144);
+    int16x8_t v238 = vaddq_s16(v145, v190);
+    int16x8_t v239 = vaddq_s16(v237, v238);
+    int16x8_t v240 = vaddq_s16(v236, v239);
+    int16x8_t v241 = vaddq_s16(v196, v148);
+    int16x8_t v242 = vaddq_s16(v149, v198);
+    int16x8_t v243 = vaddq_s16(v241, v242);
+    int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573);
+    int16x8_t v244 = vaddq_s16(v244_tmp, v243);
+    int16x8_t v245 = vaddq_s16(v204, v152);
+    int16x8_t v246 = vaddq_s16(v153, v206);
+    int16x8_t v247 = vaddq_s16(v245, v246);
+    int16x8_t v248 = vaddq_s16(v247, v243);
+    int16x8_t v249 = vaddq_s16(v244, v248);
+    int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734);
+    int16x8_t v251 = vaddq_s16(v240, v250);
+    int16x8_t v252 = vaddq_s16(v199, v159);
+    int16x8_t v253 = vaddq_s16(v160, v183);
+    int16x8_t v254 = vaddq_s16(v252, v253);
+    int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573);
+    int16x8_t v255 = vaddq_s16(v255_tmp, v254);
+    int16x8_t v256 = vaddq_s16(v207, v163);
+    int16x8_t v257 = vaddq_s16(v164, v187);
+    int16x8_t v258 = vaddq_s16(v256, v257);
+    int16x8_t v259 = vaddq_s16(v191, v166);
+    int16x8_t v260 = vaddq_s16(v167, v195);
+    int16x8_t v261 = vaddq_s16(v259, v260);
+    int16x8_t v262 = vaddq_s16(v258, v261);
+    int16x8_t v263 = vaddq_s16(v255, v262);
+    int16x8_t v264 = vaddq_s16(v261, v254);
+    int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573);
+    int16x8_t v265 = vaddq_s16(v265_tmp, v264);
+    int16x8_t v266 = vaddq_s16(v223, v173);
+    int16x8_t v267 = vaddq_s16(v174, v203);
+    int16x8_t v268 = vaddq_s16(v266, v267);
+    int16x8_t v269 = vaddq_s16(v268, v258);
+    int16x8_t v270 = vaddq_s16(v269, v264);
+    int16x8_t v271 = vaddq_s16(v265, v270);
+    int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734);
+    int16x8_t v273 = vaddq_s16(v263, v272);
+    int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705);
+    int16x8_t v275 = vaddq_s16(v251, v274);
+    int16x8_t v276 = vaddq_s16(v253, v235);
+    int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573);
+    int16x8_t v277 = vaddq_s16(v277_tmp, v276);
+    int16x8_t v278 = vaddq_s16(v257, v237);
+    int16x8_t v279 = vaddq_s16(v238, v259);
+    int16x8_t v280 = vaddq_s16(v278, v279);
+    int16x8_t v281 = vaddq_s16(v277, v280);
+    int16x8_t v282 = vaddq_s16(v260, v241);
+    int16x8_t v283 = vaddq_s16(v242, v252);
+    int16x8_t v284 = vaddq_s16(v282, v283);
+    int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573);
+    int16x8_t v285 = vaddq_s16(v285_tmp, v284);
+    int16x8_t v286 = vaddq_s16(v267, v245);
+    int16x8_t v287 = vaddq_s16(v246, v256);
+    int16x8_t v288 = vaddq_s16(v286, v287);
+    int16x8_t v289 = vaddq_s16(v288, v284);
+    int16x8_t v290 = vaddq_s16(v285, v289);
+    int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734);
+    int16x8_t v292 = vaddq_s16(v281, v291);
+    int16x8_t v293 = vaddq_s16(v283, v276);
+    int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573);
+    int16x8_t v294 = vaddq_s16(v294_tmp, v293);
+    int16x8_t v295 = vaddq_s16(v287, v278);
+    int16x8_t v296 = vaddq_s16(v279, v282);
+    int16x8_t v297 = vaddq_s16(v295, v296);
+    int16x8_t v298 = vaddq_s16(v294, v297);
+    int16x8_t v299 = vaddq_s16(v296, v293);
+    int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573);
+    int16x8_t v300 = vaddq_s16(v300_tmp, v299);
+    int16x8_t v301 = vld1q_s16(in + in_stride * 252 + i);
+    int16x8_t v302 = vaddq_s16(v301, v222);
+    int16x8_t v303 = vaddq_s16(v302, v266);
+    int16x8_t v304 = vaddq_s16(v303, v286);
+    int16x8_t v305 = vaddq_s16(v304, v295);
+    int16x8_t v306 = vaddq_s16(v305, v299);
+    int16x8_t v307 = vaddq_s16(v300, v306);
+    int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734);
+    int16x8_t v309 = vaddq_s16(v298, v308);
+    int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705);
+    int16x8_t v311 = vaddq_s16(v292, v310);
+    int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463);
+    int16x8_t v313 = vaddq_s16(v275, v312);
+    int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404);
+    int16x8_t v315 = vaddq_s16(v234, v314);
+    int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389);
+    int16x8_t v317 = vaddq_s16(v141, v316);
+    int16x8_t v318 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573);
+    int16x8_t v319 = vaddq_s16(v319_tmp, v318);
+    int16x8_t v320 = vld1q_s16(in + in_stride * 130 + i);
+    int16x8_t v321 = vld1q_s16(in + in_stride * 126 + i);
+    int16x8_t v322 = vaddq_s16(v320, v321);
+    int16x8_t v323 = vaddq_s16(v319, v322);
+    int16x8_t v324 = vld1q_s16(in + in_stride * 66 + i);
+    int16x8_t v325 = vld1q_s16(in + in_stride * 62 + i);
+    int16x8_t v326 = vaddq_s16(v324, v325);
+    int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573);
+    int16x8_t v327 = vaddq_s16(v327_tmp, v326);
+    int16x8_t v328 = vld1q_s16(in + in_stride * 194 + i);
+    int16x8_t v329 = vld1q_s16(in + in_stride * 190 + i);
+    int16x8_t v330 = vaddq_s16(v328, v329);
+    int16x8_t v331 = vaddq_s16(v330, v326);
+    int16x8_t v332 = vaddq_s16(v327, v331);
+    int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734);
+    int16x8_t v334 = vaddq_s16(v323, v333);
+    int16x8_t v335 = vld1q_s16(in + in_stride * 34 + i);
+    int16x8_t v336 = vld1q_s16(in + in_stride * 30 + i);
+    int16x8_t v337 = vaddq_s16(v335, v336);
+    int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573);
+    int16x8_t v338 = vaddq_s16(v338_tmp, v337);
+    int16x8_t v339 = vld1q_s16(in + in_stride * 162 + i);
+    int16x8_t v340 = vld1q_s16(in + in_stride * 158 + i);
+    int16x8_t v341 = vaddq_s16(v339, v340);
+    int16x8_t v342 = vld1q_s16(in + in_stride * 98 + i);
+    int16x8_t v343 = vld1q_s16(in + in_stride * 94 + i);
+    int16x8_t v344 = vaddq_s16(v342, v343);
+    int16x8_t v345 = vaddq_s16(v341, v344);
+    int16x8_t v346 = vaddq_s16(v338, v345);
+    int16x8_t v347 = vaddq_s16(v344, v337);
+    int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573);
+    int16x8_t v348 = vaddq_s16(v348_tmp, v347);
+    int16x8_t v349 = vld1q_s16(in + in_stride * 226 + i);
+    int16x8_t v350 = vld1q_s16(in + in_stride * 222 + i);
+    int16x8_t v351 = vaddq_s16(v349, v350);
+    int16x8_t v352 = vaddq_s16(v351, v341);
+    int16x8_t v353 = vaddq_s16(v352, v347);
+    int16x8_t v354 = vaddq_s16(v348, v353);
+    int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734);
+    int16x8_t v356 = vaddq_s16(v346, v355);
+    int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705);
+    int16x8_t v358 = vaddq_s16(v334, v357);
+    int16x8_t v359 = vld1q_s16(in + in_stride * 18 + i);
+    int16x8_t v360 = vld1q_s16(in + in_stride * 14 + i);
+    int16x8_t v361 = vaddq_s16(v359, v360);
+    int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573);
+    int16x8_t v362 = vaddq_s16(v362_tmp, v361);
+    int16x8_t v363 = vld1q_s16(in + in_stride * 146 + i);
+    int16x8_t v364 = vld1q_s16(in + in_stride * 142 + i);
+    int16x8_t v365 = vaddq_s16(v363, v364);
+    int16x8_t v366 = vld1q_s16(in + in_stride * 114 + i);
+    int16x8_t v367 = vld1q_s16(in + in_stride * 110 + i);
+    int16x8_t v368 = vaddq_s16(v366, v367);
+    int16x8_t v369 = vaddq_s16(v365, v368);
+    int16x8_t v370 = vaddq_s16(v362, v369);
+    int16x8_t v371 = vld1q_s16(in + in_stride * 82 + i);
+    int16x8_t v372 = vld1q_s16(in + in_stride * 78 + i);
+    int16x8_t v373 = vaddq_s16(v371, v372);
+    int16x8_t v374 = vld1q_s16(in + in_stride * 50 + i);
+    int16x8_t v375 = vld1q_s16(in + in_stride * 46 + i);
+    int16x8_t v376 = vaddq_s16(v374, v375);
+    int16x8_t v377 = vaddq_s16(v373, v376);
+    int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573);
+    int16x8_t v378 = vaddq_s16(v378_tmp, v377);
+    int16x8_t v379 = vld1q_s16(in + in_stride * 210 + i);
+    int16x8_t v380 = vld1q_s16(in + in_stride * 206 + i);
+    int16x8_t v381 = vaddq_s16(v379, v380);
+    int16x8_t v382 = vld1q_s16(in + in_stride * 178 + i);
+    int16x8_t v383 = vld1q_s16(in + in_stride * 174 + i);
+    int16x8_t v384 = vaddq_s16(v382, v383);
+    int16x8_t v385 = vaddq_s16(v381, v384);
+    int16x8_t v386 = vaddq_s16(v385, v377);
+    int16x8_t v387 = vaddq_s16(v378, v386);
+    int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734);
+    int16x8_t v389 = vaddq_s16(v370, v388);
+    int16x8_t v390 = vaddq_s16(v376, v361);
+    int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573);
+    int16x8_t v391 = vaddq_s16(v391_tmp, v390);
+    int16x8_t v392 = vaddq_s16(v384, v365);
+    int16x8_t v393 = vaddq_s16(v368, v373);
+    int16x8_t v394 = vaddq_s16(v392, v393);
+    int16x8_t v395 = vaddq_s16(v391, v394);
+    int16x8_t v396 = vaddq_s16(v393, v390);
+    int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573);
+    int16x8_t v397 = vaddq_s16(v397_tmp, v396);
+    int16x8_t v398 = vld1q_s16(in + in_stride * 242 + i);
+    int16x8_t v399 = vld1q_s16(in + in_stride * 238 + i);
+    int16x8_t v400 = vaddq_s16(v398, v399);
+    int16x8_t v401 = vaddq_s16(v400, v381);
+    int16x8_t v402 = vaddq_s16(v401, v392);
+    int16x8_t v403 = vaddq_s16(v402, v396);
+    int16x8_t v404 = vaddq_s16(v397, v403);
+    int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734);
+    int16x8_t v406 = vaddq_s16(v395, v405);
+    int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705);
+    int16x8_t v408 = vaddq_s16(v389, v407);
+    int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463);
+    int16x8_t v410 = vaddq_s16(v358, v409);
+    int16x8_t v411 = vld1q_s16(in + in_stride * 10 + i);
+    int16x8_t v412 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v413 = vaddq_s16(v411, v412);
+    int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573);
+    int16x8_t v414 = vaddq_s16(v414_tmp, v413);
+    int16x8_t v415 = vld1q_s16(in + in_stride * 138 + i);
+    int16x8_t v416 = vld1q_s16(in + in_stride * 134 + i);
+    int16x8_t v417 = vaddq_s16(v415, v416);
+    int16x8_t v418 = vld1q_s16(in + in_stride * 122 + i);
+    int16x8_t v419 = vld1q_s16(in + in_stride * 118 + i);
+    int16x8_t v420 = vaddq_s16(v418, v419);
+    int16x8_t v421 = vaddq_s16(v417, v420);
+    int16x8_t v422 = vaddq_s16(v414, v421);
+    int16x8_t v423 = vld1q_s16(in + in_stride * 74 + i);
+    int16x8_t v424 = vld1q_s16(in + in_stride * 70 + i);
+    int16x8_t v425 = vaddq_s16(v423, v424);
+    int16x8_t v426 = vld1q_s16(in + in_stride * 58 + i);
+    int16x8_t v427 = vld1q_s16(in + in_stride * 54 + i);
+    int16x8_t v428 = vaddq_s16(v426, v427);
+    int16x8_t v429 = vaddq_s16(v425, v428);
+    int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573);
+    int16x8_t v430 = vaddq_s16(v430_tmp, v429);
+    int16x8_t v431 = vld1q_s16(in + in_stride * 202 + i);
+    int16x8_t v432 = vld1q_s16(in + in_stride * 198 + i);
+    int16x8_t v433 = vaddq_s16(v431, v432);
+    int16x8_t v434 = vld1q_s16(in + in_stride * 186 + i);
+    int16x8_t v435 = vld1q_s16(in + in_stride * 182 + i);
+    int16x8_t v436 = vaddq_s16(v434, v435);
+    int16x8_t v437 = vaddq_s16(v433, v436);
+    int16x8_t v438 = vaddq_s16(v437, v429);
+    int16x8_t v439 = vaddq_s16(v430, v438);
+    int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734);
+    int16x8_t v441 = vaddq_s16(v422, v440);
+    int16x8_t v442 = vld1q_s16(in + in_stride * 42 + i);
+    int16x8_t v443 = vld1q_s16(in + in_stride * 38 + i);
+    int16x8_t v444 = vaddq_s16(v442, v443);
+    int16x8_t v445 = vld1q_s16(in + in_stride * 26 + i);
+    int16x8_t v446 = vld1q_s16(in + in_stride * 22 + i);
+    int16x8_t v447 = vaddq_s16(v445, v446);
+    int16x8_t v448 = vaddq_s16(v444, v447);
+    int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573);
+    int16x8_t v449 = vaddq_s16(v449_tmp, v448);
+    int16x8_t v450 = vld1q_s16(in + in_stride * 170 + i);
+    int16x8_t v451 = vld1q_s16(in + in_stride * 166 + i);
+    int16x8_t v452 = vaddq_s16(v450, v451);
+    int16x8_t v453 = vld1q_s16(in + in_stride * 154 + i);
+    int16x8_t v454 = vld1q_s16(in + in_stride * 150 + i);
+    int16x8_t v455 = vaddq_s16(v453, v454);
+    int16x8_t v456 = vaddq_s16(v452, v455);
+    int16x8_t v457 = vld1q_s16(in + in_stride * 106 + i);
+    int16x8_t v458 = vld1q_s16(in + in_stride * 102 + i);
+    int16x8_t v459 = vaddq_s16(v457, v458);
+    int16x8_t v460 = vld1q_s16(in + in_stride * 90 + i);
+    int16x8_t v461 = vld1q_s16(in + in_stride * 86 + i);
+    int16x8_t v462 = vaddq_s16(v460, v461);
+    int16x8_t v463 = vaddq_s16(v459, v462);
+    int16x8_t v464 = vaddq_s16(v456, v463);
+    int16x8_t v465 = vaddq_s16(v449, v464);
+    int16x8_t v466 = vaddq_s16(v463, v448);
+    int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573);
+    int16x8_t v467 = vaddq_s16(v467_tmp, v466);
+    int16x8_t v468 = vld1q_s16(in + in_stride * 234 + i);
+    int16x8_t v469 = vld1q_s16(in + in_stride * 230 + i);
+    int16x8_t v470 = vaddq_s16(v468, v469);
+    int16x8_t v471 = vld1q_s16(in + in_stride * 218 + i);
+    int16x8_t v472 = vld1q_s16(in + in_stride * 214 + i);
+    int16x8_t v473 = vaddq_s16(v471, v472);
+    int16x8_t v474 = vaddq_s16(v470, v473);
+    int16x8_t v475 = vaddq_s16(v474, v456);
+    int16x8_t v476 = vaddq_s16(v475, v466);
+    int16x8_t v477 = vaddq_s16(v467, v476);
+    int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734);
+    int16x8_t v479 = vaddq_s16(v465, v478);
+    int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705);
+    int16x8_t v481 = vaddq_s16(v441, v480);
+    int16x8_t v482 = vaddq_s16(v447, v413);
+    int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573);
+    int16x8_t v483 = vaddq_s16(v483_tmp, v482);
+    int16x8_t v484 = vaddq_s16(v455, v417);
+    int16x8_t v485 = vaddq_s16(v420, v459);
+    int16x8_t v486 = vaddq_s16(v484, v485);
+    int16x8_t v487 = vaddq_s16(v483, v486);
+    int16x8_t v488 = vaddq_s16(v462, v425);
+    int16x8_t v489 = vaddq_s16(v428, v444);
+    int16x8_t v490 = vaddq_s16(v488, v489);
+    int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573);
+    int16x8_t v491 = vaddq_s16(v491_tmp, v490);
+    int16x8_t v492 = vaddq_s16(v473, v433);
+    int16x8_t v493 = vaddq_s16(v436, v452);
+    int16x8_t v494 = vaddq_s16(v492, v493);
+    int16x8_t v495 = vaddq_s16(v494, v490);
+    int16x8_t v496 = vaddq_s16(v491, v495);
+    int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734);
+    int16x8_t v498 = vaddq_s16(v487, v497);
+    int16x8_t v499 = vaddq_s16(v489, v482);
+    int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573);
+    int16x8_t v500 = vaddq_s16(v500_tmp, v499);
+    int16x8_t v501 = vaddq_s16(v493, v484);
+    int16x8_t v502 = vaddq_s16(v485, v488);
+    int16x8_t v503 = vaddq_s16(v501, v502);
+    int16x8_t v504 = vaddq_s16(v500, v503);
+    int16x8_t v505 = vaddq_s16(v502, v499);
+    int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573);
+    int16x8_t v506 = vaddq_s16(v506_tmp, v505);
+    int16x8_t v507 = vld1q_s16(in + in_stride * 250 + i);
+    int16x8_t v508 = vld1q_s16(in + in_stride * 246 + i);
+    int16x8_t v509 = vaddq_s16(v507, v508);
+    int16x8_t v510 = vaddq_s16(v509, v470);
+    int16x8_t v511 = vaddq_s16(v510, v492);
+    int16x8_t v512 = vaddq_s16(v511, v501);
+    int16x8_t v513 = vaddq_s16(v512, v505);
+    int16x8_t v514 = vaddq_s16(v506, v513);
+    int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734);
+    int16x8_t v516 = vaddq_s16(v504, v515);
+    int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705);
+    int16x8_t v518 = vaddq_s16(v498, v517);
+    int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463);
+    int16x8_t v520 = vaddq_s16(v481, v519);
+    int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404);
+    int16x8_t v522 = vaddq_s16(v410, v521);
+    int16x8_t v523 = vaddq_s16(v412, v318);
+    int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573);
+    int16x8_t v524 = vaddq_s16(v524_tmp, v523);
+    int16x8_t v525 = vaddq_s16(v416, v320);
+    int16x8_t v526 = vaddq_s16(v321, v418);
+    int16x8_t v527 = vaddq_s16(v525, v526);
+    int16x8_t v528 = vaddq_s16(v524, v527);
+    int16x8_t v529 = vaddq_s16(v424, v324);
+    int16x8_t v530 = vaddq_s16(v325, v426);
+    int16x8_t v531 = vaddq_s16(v529, v530);
+    int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573);
+    int16x8_t v532 = vaddq_s16(v532_tmp, v531);
+    int16x8_t v533 = vaddq_s16(v432, v328);
+    int16x8_t v534 = vaddq_s16(v329, v434);
+    int16x8_t v535 = vaddq_s16(v533, v534);
+    int16x8_t v536 = vaddq_s16(v535, v531);
+    int16x8_t v537 = vaddq_s16(v532, v536);
+    int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734);
+    int16x8_t v539 = vaddq_s16(v528, v538);
+    int16x8_t v540 = vaddq_s16(v443, v335);
+    int16x8_t v541 = vaddq_s16(v336, v445);
+    int16x8_t v542 = vaddq_s16(v540, v541);
+    int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573);
+    int16x8_t v543 = vaddq_s16(v543_tmp, v542);
+    int16x8_t v544 = vaddq_s16(v451, v339);
+    int16x8_t v545 = vaddq_s16(v340, v453);
+    int16x8_t v546 = vaddq_s16(v544, v545);
+    int16x8_t v547 = vaddq_s16(v458, v342);
+    int16x8_t v548 = vaddq_s16(v343, v460);
+    int16x8_t v549 = vaddq_s16(v547, v548);
+    int16x8_t v550 = vaddq_s16(v546, v549);
+    int16x8_t v551 = vaddq_s16(v543, v550);
+    int16x8_t v552 = vaddq_s16(v549, v542);
+    int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573);
+    int16x8_t v553 = vaddq_s16(v553_tmp, v552);
+    int16x8_t v554 = vaddq_s16(v469, v349);
+    int16x8_t v555 = vaddq_s16(v350, v471);
+    int16x8_t v556 = vaddq_s16(v554, v555);
+    int16x8_t v557 = vaddq_s16(v556, v546);
+    int16x8_t v558 = vaddq_s16(v557, v552);
+    int16x8_t v559 = vaddq_s16(v553, v558);
+    int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734);
+    int16x8_t v561 = vaddq_s16(v551, v560);
+    int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705);
+    int16x8_t v563 = vaddq_s16(v539, v562);
+    int16x8_t v564 = vaddq_s16(v446, v359);
+    int16x8_t v565 = vaddq_s16(v360, v411);
+    int16x8_t v566 = vaddq_s16(v564, v565);
+    int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573);
+    int16x8_t v567 = vaddq_s16(v567_tmp, v566);
+    int16x8_t v568 = vaddq_s16(v454, v363);
+    int16x8_t v569 = vaddq_s16(v364, v415);
+    int16x8_t v570 = vaddq_s16(v568, v569);
+    int16x8_t v571 = vaddq_s16(v419, v366);
+    int16x8_t v572 = vaddq_s16(v367, v457);
+    int16x8_t v573 = vaddq_s16(v571, v572);
+    int16x8_t v574 = vaddq_s16(v570, v573);
+    int16x8_t v575 = vaddq_s16(v567, v574);
+    int16x8_t v576 = vaddq_s16(v461, v371);
+    int16x8_t v577 = vaddq_s16(v372, v423);
+    int16x8_t v578 = vaddq_s16(v576, v577);
+    int16x8_t v579 = vaddq_s16(v427, v374);
+    int16x8_t v580 = vaddq_s16(v375, v442);
+    int16x8_t v581 = vaddq_s16(v579, v580);
+    int16x8_t v582 = vaddq_s16(v578, v581);
+    int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573);
+    int16x8_t v583 = vaddq_s16(v583_tmp, v582);
+    int16x8_t v584 = vaddq_s16(v472, v379);
+    int16x8_t v585 = vaddq_s16(v380, v431);
+    int16x8_t v586 = vaddq_s16(v584, v585);
+    int16x8_t v587 = vaddq_s16(v435, v382);
+    int16x8_t v588 = vaddq_s16(v383, v450);
+    int16x8_t v589 = vaddq_s16(v587, v588);
+    int16x8_t v590 = vaddq_s16(v586, v589);
+    int16x8_t v591 = vaddq_s16(v590, v582);
+    int16x8_t v592 = vaddq_s16(v583, v591);
+    int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734);
+    int16x8_t v594 = vaddq_s16(v575, v593);
+    int16x8_t v595 = vaddq_s16(v581, v566);
+    int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573);
+    int16x8_t v596 = vaddq_s16(v596_tmp, v595);
+    int16x8_t v597 = vaddq_s16(v589, v570);
+    int16x8_t v598 = vaddq_s16(v573, v578);
+    int16x8_t v599 = vaddq_s16(v597, v598);
+    int16x8_t v600 = vaddq_s16(v596, v599);
+    int16x8_t v601 = vaddq_s16(v598, v595);
+    int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573);
+    int16x8_t v602 = vaddq_s16(v602_tmp, v601);
+    int16x8_t v603 = vaddq_s16(v508, v398);
+    int16x8_t v604 = vaddq_s16(v399, v468);
+    int16x8_t v605 = vaddq_s16(v603, v604);
+    int16x8_t v606 = vaddq_s16(v605, v586);
+    int16x8_t v607 = vaddq_s16(v606, v597);
+    int16x8_t v608 = vaddq_s16(v607, v601);
+    int16x8_t v609 = vaddq_s16(v602, v608);
+    int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734);
+    int16x8_t v611 = vaddq_s16(v600, v610);
+    int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705);
+    int16x8_t v613 = vaddq_s16(v594, v612);
+    int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463);
+    int16x8_t v615 = vaddq_s16(v563, v614);
+    int16x8_t v616 = vaddq_s16(v565, v523);
+    int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573);
+    int16x8_t v617 = vaddq_s16(v617_tmp, v616);
+    int16x8_t v618 = vaddq_s16(v569, v525);
+    int16x8_t v619 = vaddq_s16(v526, v571);
+    int16x8_t v620 = vaddq_s16(v618, v619);
+    int16x8_t v621 = vaddq_s16(v617, v620);
+    int16x8_t v622 = vaddq_s16(v577, v529);
+    int16x8_t v623 = vaddq_s16(v530, v579);
+    int16x8_t v624 = vaddq_s16(v622, v623);
+    int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573);
+    int16x8_t v625 = vaddq_s16(v625_tmp, v624);
+    int16x8_t v626 = vaddq_s16(v585, v533);
+    int16x8_t v627 = vaddq_s16(v534, v587);
+    int16x8_t v628 = vaddq_s16(v626, v627);
+    int16x8_t v629 = vaddq_s16(v628, v624);
+    int16x8_t v630 = vaddq_s16(v625, v629);
+    int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734);
+    int16x8_t v632 = vaddq_s16(v621, v631);
+    int16x8_t v633 = vaddq_s16(v580, v540);
+    int16x8_t v634 = vaddq_s16(v541, v564);
+    int16x8_t v635 = vaddq_s16(v633, v634);
+    int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573);
+    int16x8_t v636 = vaddq_s16(v636_tmp, v635);
+    int16x8_t v637 = vaddq_s16(v588, v544);
+    int16x8_t v638 = vaddq_s16(v545, v568);
+    int16x8_t v639 = vaddq_s16(v637, v638);
+    int16x8_t v640 = vaddq_s16(v572, v547);
+    int16x8_t v641 = vaddq_s16(v548, v576);
+    int16x8_t v642 = vaddq_s16(v640, v641);
+    int16x8_t v643 = vaddq_s16(v639, v642);
+    int16x8_t v644 = vaddq_s16(v636, v643);
+    int16x8_t v645 = vaddq_s16(v642, v635);
+    int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573);
+    int16x8_t v646 = vaddq_s16(v646_tmp, v645);
+    int16x8_t v647 = vaddq_s16(v604, v554);
+    int16x8_t v648 = vaddq_s16(v555, v584);
+    int16x8_t v649 = vaddq_s16(v647, v648);
+    int16x8_t v650 = vaddq_s16(v649, v639);
+    int16x8_t v651 = vaddq_s16(v650, v645);
+    int16x8_t v652 = vaddq_s16(v646, v651);
+    int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734);
+    int16x8_t v654 = vaddq_s16(v644, v653);
+    int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705);
+    int16x8_t v656 = vaddq_s16(v632, v655);
+    int16x8_t v657 = vaddq_s16(v634, v616);
+    int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573);
+    int16x8_t v658 = vaddq_s16(v658_tmp, v657);
+    int16x8_t v659 = vaddq_s16(v638, v618);
+    int16x8_t v660 = vaddq_s16(v619, v640);
+    int16x8_t v661 = vaddq_s16(v659, v660);
+    int16x8_t v662 = vaddq_s16(v658, v661);
+    int16x8_t v663 = vaddq_s16(v641, v622);
+    int16x8_t v664 = vaddq_s16(v623, v633);
+    int16x8_t v665 = vaddq_s16(v663, v664);
+    int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573);
+    int16x8_t v666 = vaddq_s16(v666_tmp, v665);
+    int16x8_t v667 = vaddq_s16(v648, v626);
+    int16x8_t v668 = vaddq_s16(v627, v637);
+    int16x8_t v669 = vaddq_s16(v667, v668);
+    int16x8_t v670 = vaddq_s16(v669, v665);
+    int16x8_t v671 = vaddq_s16(v666, v670);
+    int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734);
+    int16x8_t v673 = vaddq_s16(v662, v672);
+    int16x8_t v674 = vaddq_s16(v664, v657);
+    int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573);
+    int16x8_t v675 = vaddq_s16(v675_tmp, v674);
+    int16x8_t v676 = vaddq_s16(v668, v659);
+    int16x8_t v677 = vaddq_s16(v660, v663);
+    int16x8_t v678 = vaddq_s16(v676, v677);
+    int16x8_t v679 = vaddq_s16(v675, v678);
+    int16x8_t v680 = vaddq_s16(v677, v674);
+    int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573);
+    int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+    int16x8_t v682 = vld1q_s16(in + in_stride * 254 + i);
+    int16x8_t v683 = vaddq_s16(v682, v507);
+    int16x8_t v684 = vaddq_s16(v683, v603);
+    int16x8_t v685 = vaddq_s16(v684, v647);
+    int16x8_t v686 = vaddq_s16(v685, v667);
+    int16x8_t v687 = vaddq_s16(v686, v676);
+    int16x8_t v688 = vaddq_s16(v687, v680);
+    int16x8_t v689 = vaddq_s16(v681, v688);
+    int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734);
+    int16x8_t v691 = vaddq_s16(v679, v690);
+    int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705);
+    int16x8_t v693 = vaddq_s16(v673, v692);
+    int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463);
+    int16x8_t v695 = vaddq_s16(v656, v694);
+    int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404);
+    int16x8_t v697 = vaddq_s16(v615, v696);
+    int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389);
+    int16x8_t v699 = vaddq_s16(v522, v698);
+    int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385);
+    int16x8_t v701 = vaddq_s16(v317, v700);
+    int16x8_t v702 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 13573);
+    int16x8_t v703 = vaddq_s16(v703_tmp, v702);
+    int16x8_t v704 = vld1q_s16(in + in_stride * 129 + i);
+    int16x8_t v705 = vld1q_s16(in + in_stride * 127 + i);
+    int16x8_t v706 = vaddq_s16(v704, v705);
+    int16x8_t v707 = vaddq_s16(v703, v706);
+    int16x8_t v708 = vld1q_s16(in + in_stride * 65 + i);
+    int16x8_t v709 = vld1q_s16(in + in_stride * 63 + i);
+    int16x8_t v710 = vaddq_s16(v708, v709);
+    int16x8_t v711_tmp = vqrdmulhq_n_s16(v710, 13573);
+    int16x8_t v711 = vaddq_s16(v711_tmp, v710);
+    int16x8_t v712 = vld1q_s16(in + in_stride * 193 + i);
+    int16x8_t v713 = vld1q_s16(in + in_stride * 191 + i);
+    int16x8_t v714 = vaddq_s16(v712, v713);
+    int16x8_t v715 = vaddq_s16(v714, v710);
+    int16x8_t v716 = vaddq_s16(v711, v715);
+    int16x8_t v717 = vqrdmulhq_n_s16(v716, 17734);
+    int16x8_t v718 = vaddq_s16(v707, v717);
+    int16x8_t v719 = vld1q_s16(in + in_stride * 33 + i);
+    int16x8_t v720 = vld1q_s16(in + in_stride * 31 + i);
+    int16x8_t v721 = vaddq_s16(v719, v720);
+    int16x8_t v722_tmp = vqrdmulhq_n_s16(v721, 13573);
+    int16x8_t v722 = vaddq_s16(v722_tmp, v721);
+    int16x8_t v723 = vld1q_s16(in + in_stride * 161 + i);
+    int16x8_t v724 = vld1q_s16(in + in_stride * 159 + i);
+    int16x8_t v725 = vaddq_s16(v723, v724);
+    int16x8_t v726 = vld1q_s16(in + in_stride * 97 + i);
+    int16x8_t v727 = vld1q_s16(in + in_stride * 95 + i);
+    int16x8_t v728 = vaddq_s16(v726, v727);
+    int16x8_t v729 = vaddq_s16(v725, v728);
+    int16x8_t v730 = vaddq_s16(v722, v729);
+    int16x8_t v731 = vaddq_s16(v728, v721);
+    int16x8_t v732_tmp = vqrdmulhq_n_s16(v731, 13573);
+    int16x8_t v732 = vaddq_s16(v732_tmp, v731);
+    int16x8_t v733 = vld1q_s16(in + in_stride * 225 + i);
+    int16x8_t v734 = vld1q_s16(in + in_stride * 223 + i);
+    int16x8_t v735 = vaddq_s16(v733, v734);
+    int16x8_t v736 = vaddq_s16(v735, v725);
+    int16x8_t v737 = vaddq_s16(v736, v731);
+    int16x8_t v738 = vaddq_s16(v732, v737);
+    int16x8_t v739 = vqrdmulhq_n_s16(v738, 17734);
+    int16x8_t v740 = vaddq_s16(v730, v739);
+    int16x8_t v741 = vqrdmulhq_n_s16(v740, 16705);
+    int16x8_t v742 = vaddq_s16(v718, v741);
+    int16x8_t v743 = vld1q_s16(in + in_stride * 17 + i);
+    int16x8_t v744 = vld1q_s16(in + in_stride * 15 + i);
+    int16x8_t v745 = vaddq_s16(v743, v744);
+    int16x8_t v746_tmp = vqrdmulhq_n_s16(v745, 13573);
+    int16x8_t v746 = vaddq_s16(v746_tmp, v745);
+    int16x8_t v747 = vld1q_s16(in + in_stride * 145 + i);
+    int16x8_t v748 = vld1q_s16(in + in_stride * 143 + i);
+    int16x8_t v749 = vaddq_s16(v747, v748);
+    int16x8_t v750 = vld1q_s16(in + in_stride * 113 + i);
+    int16x8_t v751 = vld1q_s16(in + in_stride * 111 + i);
+    int16x8_t v752 = vaddq_s16(v750, v751);
+    int16x8_t v753 = vaddq_s16(v749, v752);
+    int16x8_t v754 = vaddq_s16(v746, v753);
+    int16x8_t v755 = vld1q_s16(in + in_stride * 81 + i);
+    int16x8_t v756 = vld1q_s16(in + in_stride * 79 + i);
+    int16x8_t v757 = vaddq_s16(v755, v756);
+    int16x8_t v758 = vld1q_s16(in + in_stride * 49 + i);
+    int16x8_t v759 = vld1q_s16(in + in_stride * 47 + i);
+    int16x8_t v760 = vaddq_s16(v758, v759);
+    int16x8_t v761 = vaddq_s16(v757, v760);
+    int16x8_t v762_tmp = vqrdmulhq_n_s16(v761, 13573);
+    int16x8_t v762 = vaddq_s16(v762_tmp, v761);
+    int16x8_t v763 = vld1q_s16(in + in_stride * 209 + i);
+    int16x8_t v764 = vld1q_s16(in + in_stride * 207 + i);
+    int16x8_t v765 = vaddq_s16(v763, v764);
+    int16x8_t v766 = vld1q_s16(in + in_stride * 177 + i);
+    int16x8_t v767 = vld1q_s16(in + in_stride * 175 + i);
+    int16x8_t v768 = vaddq_s16(v766, v767);
+    int16x8_t v769 = vaddq_s16(v765, v768);
+    int16x8_t v770 = vaddq_s16(v769, v761);
+    int16x8_t v771 = vaddq_s16(v762, v770);
+    int16x8_t v772 = vqrdmulhq_n_s16(v771, 17734);
+    int16x8_t v773 = vaddq_s16(v754, v772);
+    int16x8_t v774 = vaddq_s16(v760, v745);
+    int16x8_t v775_tmp = vqrdmulhq_n_s16(v774, 13573);
+    int16x8_t v775 = vaddq_s16(v775_tmp, v774);
+    int16x8_t v776 = vaddq_s16(v768, v749);
+    int16x8_t v777 = vaddq_s16(v752, v757);
+    int16x8_t v778 = vaddq_s16(v776, v777);
+    int16x8_t v779 = vaddq_s16(v775, v778);
+    int16x8_t v780 = vaddq_s16(v777, v774);
+    int16x8_t v781_tmp = vqrdmulhq_n_s16(v780, 13573);
+    int16x8_t v781 = vaddq_s16(v781_tmp, v780);
+    int16x8_t v782 = vld1q_s16(in + in_stride * 241 + i);
+    int16x8_t v783 = vld1q_s16(in + in_stride * 239 + i);
+    int16x8_t v784 = vaddq_s16(v782, v783);
+    int16x8_t v785 = vaddq_s16(v784, v765);
+    int16x8_t v786 = vaddq_s16(v785, v776);
+    int16x8_t v787 = vaddq_s16(v786, v780);
+    int16x8_t v788 = vaddq_s16(v781, v787);
+    int16x8_t v789 = vqrdmulhq_n_s16(v788, 17734);
+    int16x8_t v790 = vaddq_s16(v779, v789);
+    int16x8_t v791 = vqrdmulhq_n_s16(v790, 16705);
+    int16x8_t v792 = vaddq_s16(v773, v791);
+    int16x8_t v793 = vqrdmulhq_n_s16(v792, 16463);
+    int16x8_t v794 = vaddq_s16(v742, v793);
+    int16x8_t v795 = vld1q_s16(in + in_stride * 9 + i);
+    int16x8_t v796 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v797 = vaddq_s16(v795, v796);
+    int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 13573);
+    int16x8_t v798 = vaddq_s16(v798_tmp, v797);
+    int16x8_t v799 = vld1q_s16(in + in_stride * 137 + i);
+    int16x8_t v800 = vld1q_s16(in + in_stride * 135 + i);
+    int16x8_t v801 = vaddq_s16(v799, v800);
+    int16x8_t v802 = vld1q_s16(in + in_stride * 121 + i);
+    int16x8_t v803 = vld1q_s16(in + in_stride * 119 + i);
+    int16x8_t v804 = vaddq_s16(v802, v803);
+    int16x8_t v805 = vaddq_s16(v801, v804);
+    int16x8_t v806 = vaddq_s16(v798, v805);
+    int16x8_t v807 = vld1q_s16(in + in_stride * 73 + i);
+    int16x8_t v808 = vld1q_s16(in + in_stride * 71 + i);
+    int16x8_t v809 = vaddq_s16(v807, v808);
+    int16x8_t v810 = vld1q_s16(in + in_stride * 57 + i);
+    int16x8_t v811 = vld1q_s16(in + in_stride * 55 + i);
+    int16x8_t v812 = vaddq_s16(v810, v811);
+    int16x8_t v813 = vaddq_s16(v809, v812);
+    int16x8_t v814_tmp = vqrdmulhq_n_s16(v813, 13573);
+    int16x8_t v814 = vaddq_s16(v814_tmp, v813);
+    int16x8_t v815 = vld1q_s16(in + in_stride * 201 + i);
+    int16x8_t v816 = vld1q_s16(in + in_stride * 199 + i);
+    int16x8_t v817 = vaddq_s16(v815, v816);
+    int16x8_t v818 = vld1q_s16(in + in_stride * 185 + i);
+    int16x8_t v819 = vld1q_s16(in + in_stride * 183 + i);
+    int16x8_t v820 = vaddq_s16(v818, v819);
+    int16x8_t v821 = vaddq_s16(v817, v820);
+    int16x8_t v822 = vaddq_s16(v821, v813);
+    int16x8_t v823 = vaddq_s16(v814, v822);
+    int16x8_t v824 = vqrdmulhq_n_s16(v823, 17734);
+    int16x8_t v825 = vaddq_s16(v806, v824);
+    int16x8_t v826 = vld1q_s16(in + in_stride * 41 + i);
+    int16x8_t v827 = vld1q_s16(in + in_stride * 39 + i);
+    int16x8_t v828 = vaddq_s16(v826, v827);
+    int16x8_t v829 = vld1q_s16(in + in_stride * 25 + i);
+    int16x8_t v830 = vld1q_s16(in + in_stride * 23 + i);
+    int16x8_t v831 = vaddq_s16(v829, v830);
+    int16x8_t v832 = vaddq_s16(v828, v831);
+    int16x8_t v833_tmp = vqrdmulhq_n_s16(v832, 13573);
+    int16x8_t v833 = vaddq_s16(v833_tmp, v832);
+    int16x8_t v834 = vld1q_s16(in + in_stride * 169 + i);
+    int16x8_t v835 = vld1q_s16(in + in_stride * 167 + i);
+    int16x8_t v836 = vaddq_s16(v834, v835);
+    int16x8_t v837 = vld1q_s16(in + in_stride * 153 + i);
+    int16x8_t v838 = vld1q_s16(in + in_stride * 151 + i);
+    int16x8_t v839 = vaddq_s16(v837, v838);
+    int16x8_t v840 = vaddq_s16(v836, v839);
+    int16x8_t v841 = vld1q_s16(in + in_stride * 105 + i);
+    int16x8_t v842 = vld1q_s16(in + in_stride * 103 + i);
+    int16x8_t v843 = vaddq_s16(v841, v842);
+    int16x8_t v844 = vld1q_s16(in + in_stride * 89 + i);
+    int16x8_t v845 = vld1q_s16(in + in_stride * 87 + i);
+    int16x8_t v846 = vaddq_s16(v844, v845);
+    int16x8_t v847 = vaddq_s16(v843, v846);
+    int16x8_t v848 = vaddq_s16(v840, v847);
+    int16x8_t v849 = vaddq_s16(v833, v848);
+    int16x8_t v850 = vaddq_s16(v847, v832);
+    int16x8_t v851_tmp = vqrdmulhq_n_s16(v850, 13573);
+    int16x8_t v851 = vaddq_s16(v851_tmp, v850);
+    int16x8_t v852 = vld1q_s16(in + in_stride * 233 + i);
+    int16x8_t v853 = vld1q_s16(in + in_stride * 231 + i);
+    int16x8_t v854 = vaddq_s16(v852, v853);
+    int16x8_t v855 = vld1q_s16(in + in_stride * 217 + i);
+    int16x8_t v856 = vld1q_s16(in + in_stride * 215 + i);
+    int16x8_t v857 = vaddq_s16(v855, v856);
+    int16x8_t v858 = vaddq_s16(v854, v857);
+    int16x8_t v859 = vaddq_s16(v858, v840);
+    int16x8_t v860 = vaddq_s16(v859, v850);
+    int16x8_t v861 = vaddq_s16(v851, v860);
+    int16x8_t v862 = vqrdmulhq_n_s16(v861, 17734);
+    int16x8_t v863 = vaddq_s16(v849, v862);
+    int16x8_t v864 = vqrdmulhq_n_s16(v863, 16705);
+    int16x8_t v865 = vaddq_s16(v825, v864);
+    int16x8_t v866 = vaddq_s16(v831, v797);
+    int16x8_t v867_tmp = vqrdmulhq_n_s16(v866, 13573);
+    int16x8_t v867 = vaddq_s16(v867_tmp, v866);
+    int16x8_t v868 = vaddq_s16(v839, v801);
+    int16x8_t v869 = vaddq_s16(v804, v843);
+    int16x8_t v870 = vaddq_s16(v868, v869);
+    int16x8_t v871 = vaddq_s16(v867, v870);
+    int16x8_t v872 = vaddq_s16(v846, v809);
+    int16x8_t v873 = vaddq_s16(v812, v828);
+    int16x8_t v874 = vaddq_s16(v872, v873);
+    int16x8_t v875_tmp = vqrdmulhq_n_s16(v874, 13573);
+    int16x8_t v875 = vaddq_s16(v875_tmp, v874);
+    int16x8_t v876 = vaddq_s16(v857, v817);
+    int16x8_t v877 = vaddq_s16(v820, v836);
+    int16x8_t v878 = vaddq_s16(v876, v877);
+    int16x8_t v879 = vaddq_s16(v878, v874);
+    int16x8_t v880 = vaddq_s16(v875, v879);
+    int16x8_t v881 = vqrdmulhq_n_s16(v880, 17734);
+    int16x8_t v882 = vaddq_s16(v871, v881);
+    int16x8_t v883 = vaddq_s16(v873, v866);
+    int16x8_t v884_tmp = vqrdmulhq_n_s16(v883, 13573);
+    int16x8_t v884 = vaddq_s16(v884_tmp, v883);
+    int16x8_t v885 = vaddq_s16(v877, v868);
+    int16x8_t v886 = vaddq_s16(v869, v872);
+    int16x8_t v887 = vaddq_s16(v885, v886);
+    int16x8_t v888 = vaddq_s16(v884, v887);
+    int16x8_t v889 = vaddq_s16(v886, v883);
+    int16x8_t v890_tmp = vqrdmulhq_n_s16(v889, 13573);
+    int16x8_t v890 = vaddq_s16(v890_tmp, v889);
+    int16x8_t v891 = vld1q_s16(in + in_stride * 249 + i);
+    int16x8_t v892 = vld1q_s16(in + in_stride * 247 + i);
+    int16x8_t v893 = vaddq_s16(v891, v892);
+    int16x8_t v894 = vaddq_s16(v893, v854);
+    int16x8_t v895 = vaddq_s16(v894, v876);
+    int16x8_t v896 = vaddq_s16(v895, v885);
+    int16x8_t v897 = vaddq_s16(v896, v889);
+    int16x8_t v898 = vaddq_s16(v890, v897);
+    int16x8_t v899 = vqrdmulhq_n_s16(v898, 17734);
+    int16x8_t v900 = vaddq_s16(v888, v899);
+    int16x8_t v901 = vqrdmulhq_n_s16(v900, 16705);
+    int16x8_t v902 = vaddq_s16(v882, v901);
+    int16x8_t v903 = vqrdmulhq_n_s16(v902, 16463);
+    int16x8_t v904 = vaddq_s16(v865, v903);
+    int16x8_t v905 = vqrdmulhq_n_s16(v904, 16404);
+    int16x8_t v906 = vaddq_s16(v794, v905);
+    int16x8_t v907 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v908 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v909 = vaddq_s16(v907, v908);
+    int16x8_t v910_tmp = vqrdmulhq_n_s16(v909, 13573);
+    int16x8_t v910 = vaddq_s16(v910_tmp, v909);
+    int16x8_t v911 = vld1q_s16(in + in_stride * 133 + i);
+    int16x8_t v912 = vld1q_s16(in + in_stride * 131 + i);
+    int16x8_t v913 = vaddq_s16(v911, v912);
+    int16x8_t v914 = vld1q_s16(in + in_stride * 125 + i);
+    int16x8_t v915 = vld1q_s16(in + in_stride * 123 + i);
+    int16x8_t v916 = vaddq_s16(v914, v915);
+    int16x8_t v917 = vaddq_s16(v913, v916);
+    int16x8_t v918 = vaddq_s16(v910, v917);
+    int16x8_t v919 = vld1q_s16(in + in_stride * 69 + i);
+    int16x8_t v920 = vld1q_s16(in + in_stride * 67 + i);
+    int16x8_t v921 = vaddq_s16(v919, v920);
+    int16x8_t v922 = vld1q_s16(in + in_stride * 61 + i);
+    int16x8_t v923 = vld1q_s16(in + in_stride * 59 + i);
+    int16x8_t v924 = vaddq_s16(v922, v923);
+    int16x8_t v925 = vaddq_s16(v921, v924);
+    int16x8_t v926_tmp = vqrdmulhq_n_s16(v925, 13573);
+    int16x8_t v926 = vaddq_s16(v926_tmp, v925);
+    int16x8_t v927 = vld1q_s16(in + in_stride * 197 + i);
+    int16x8_t v928 = vld1q_s16(in + in_stride * 195 + i);
+    int16x8_t v929 = vaddq_s16(v927, v928);
+    int16x8_t v930 = vld1q_s16(in + in_stride * 189 + i);
+    int16x8_t v931 = vld1q_s16(in + in_stride * 187 + i);
+    int16x8_t v932 = vaddq_s16(v930, v931);
+    int16x8_t v933 = vaddq_s16(v929, v932);
+    int16x8_t v934 = vaddq_s16(v933, v925);
+    int16x8_t v935 = vaddq_s16(v926, v934);
+    int16x8_t v936 = vqrdmulhq_n_s16(v935, 17734);
+    int16x8_t v937 = vaddq_s16(v918, v936);
+    int16x8_t v938 = vld1q_s16(in + in_stride * 37 + i);
+    int16x8_t v939 = vld1q_s16(in + in_stride * 35 + i);
+    int16x8_t v940 = vaddq_s16(v938, v939);
+    int16x8_t v941 = vld1q_s16(in + in_stride * 29 + i);
+    int16x8_t v942 = vld1q_s16(in + in_stride * 27 + i);
+    int16x8_t v943 = vaddq_s16(v941, v942);
+    int16x8_t v944 = vaddq_s16(v940, v943);
+    int16x8_t v945_tmp = vqrdmulhq_n_s16(v944, 13573);
+    int16x8_t v945 = vaddq_s16(v945_tmp, v944);
+    int16x8_t v946 = vld1q_s16(in + in_stride * 165 + i);
+    int16x8_t v947 = vld1q_s16(in + in_stride * 163 + i);
+    int16x8_t v948 = vaddq_s16(v946, v947);
+    int16x8_t v949 = vld1q_s16(in + in_stride * 157 + i);
+    int16x8_t v950 = vld1q_s16(in + in_stride * 155 + i);
+    int16x8_t v951 = vaddq_s16(v949, v950);
+    int16x8_t v952 = vaddq_s16(v948, v951);
+    int16x8_t v953 = vld1q_s16(in + in_stride * 101 + i);
+    int16x8_t v954 = vld1q_s16(in + in_stride * 99 + i);
+    int16x8_t v955 = vaddq_s16(v953, v954);
+    int16x8_t v956 = vld1q_s16(in + in_stride * 93 + i);
+    int16x8_t v957 = vld1q_s16(in + in_stride * 91 + i);
+    int16x8_t v958 = vaddq_s16(v956, v957);
+    int16x8_t v959 = vaddq_s16(v955, v958);
+    int16x8_t v960 = vaddq_s16(v952, v959);
+    int16x8_t v961 = vaddq_s16(v945, v960);
+    int16x8_t v962 = vaddq_s16(v959, v944);
+    int16x8_t v963_tmp = vqrdmulhq_n_s16(v962, 13573);
+    int16x8_t v963 = vaddq_s16(v963_tmp, v962);
+    int16x8_t v964 = vld1q_s16(in + in_stride * 229 + i);
+    int16x8_t v965 = vld1q_s16(in + in_stride * 227 + i);
+    int16x8_t v966 = vaddq_s16(v964, v965);
+    int16x8_t v967 = vld1q_s16(in + in_stride * 221 + i);
+    int16x8_t v968 = vld1q_s16(in + in_stride * 219 + i);
+    int16x8_t v969 = vaddq_s16(v967, v968);
+    int16x8_t v970 = vaddq_s16(v966, v969);
+    int16x8_t v971 = vaddq_s16(v970, v952);
+    int16x8_t v972 = vaddq_s16(v971, v962);
+    int16x8_t v973 = vaddq_s16(v963, v972);
+    int16x8_t v974 = vqrdmulhq_n_s16(v973, 17734);
+    int16x8_t v975 = vaddq_s16(v961, v974);
+    int16x8_t v976 = vqrdmulhq_n_s16(v975, 16705);
+    int16x8_t v977 = vaddq_s16(v937, v976);
+    int16x8_t v978 = vld1q_s16(in + in_stride * 21 + i);
+    int16x8_t v979 = vld1q_s16(in + in_stride * 19 + i);
+    int16x8_t v980 = vaddq_s16(v978, v979);
+    int16x8_t v981 = vld1q_s16(in + in_stride * 13 + i);
+    int16x8_t v982 = vld1q_s16(in + in_stride * 11 + i);
+    int16x8_t v983 = vaddq_s16(v981, v982);
+    int16x8_t v984 = vaddq_s16(v980, v983);
+    int16x8_t v985_tmp = vqrdmulhq_n_s16(v984, 13573);
+    int16x8_t v985 = vaddq_s16(v985_tmp, v984);
+    int16x8_t v986 = vld1q_s16(in + in_stride * 149 + i);
+    int16x8_t v987 = vld1q_s16(in + in_stride * 147 + i);
+    int16x8_t v988 = vaddq_s16(v986, v987);
+    int16x8_t v989 = vld1q_s16(in + in_stride * 141 + i);
+    int16x8_t v990 = vld1q_s16(in + in_stride * 139 + i);
+    int16x8_t v991 = vaddq_s16(v989, v990);
+    int16x8_t v992 = vaddq_s16(v988, v991);
+    int16x8_t v993 = vld1q_s16(in + in_stride * 117 + i);
+    int16x8_t v994 = vld1q_s16(in + in_stride * 115 + i);
+    int16x8_t v995 = vaddq_s16(v993, v994);
+    int16x8_t v996 = vld1q_s16(in + in_stride * 109 + i);
+    int16x8_t v997 = vld1q_s16(in + in_stride * 107 + i);
+    int16x8_t v998 = vaddq_s16(v996, v997);
+    int16x8_t v999 = vaddq_s16(v995, v998);
+    int16x8_t v1000 = vaddq_s16(v992, v999);
+    int16x8_t v1001 = vaddq_s16(v985, v1000);
+    int16x8_t v1002 = vld1q_s16(in + in_stride * 85 + i);
+    int16x8_t v1003 = vld1q_s16(in + in_stride * 83 + i);
+    int16x8_t v1004 = vaddq_s16(v1002, v1003);
+    int16x8_t v1005 = vld1q_s16(in + in_stride * 77 + i);
+    int16x8_t v1006 = vld1q_s16(in + in_stride * 75 + i);
+    int16x8_t v1007 = vaddq_s16(v1005, v1006);
+    int16x8_t v1008 = vaddq_s16(v1004, v1007);
+    int16x8_t v1009 = vld1q_s16(in + in_stride * 53 + i);
+    int16x8_t v1010 = vld1q_s16(in + in_stride * 51 + i);
+    int16x8_t v1011 = vaddq_s16(v1009, v1010);
+    int16x8_t v1012 = vld1q_s16(in + in_stride * 45 + i);
+    int16x8_t v1013 = vld1q_s16(in + in_stride * 43 + i);
+    int16x8_t v1014 = vaddq_s16(v1012, v1013);
+    int16x8_t v1015 = vaddq_s16(v1011, v1014);
+    int16x8_t v1016 = vaddq_s16(v1008, v1015);
+    int16x8_t v1017_tmp = vqrdmulhq_n_s16(v1016, 13573);
+    int16x8_t v1017 = vaddq_s16(v1017_tmp, v1016);
+    int16x8_t v1018 = vld1q_s16(in + in_stride * 213 + i);
+    int16x8_t v1019 = vld1q_s16(in + in_stride * 211 + i);
+    int16x8_t v1020 = vaddq_s16(v1018, v1019);
+    int16x8_t v1021 = vld1q_s16(in + in_stride * 205 + i);
+    int16x8_t v1022 = vld1q_s16(in + in_stride * 203 + i);
+    int16x8_t v1023 = vaddq_s16(v1021, v1022);
+    int16x8_t v1024 = vaddq_s16(v1020, v1023);
+    int16x8_t v1025 = vld1q_s16(in + in_stride * 181 + i);
+    int16x8_t v1026 = vld1q_s16(in + in_stride * 179 + i);
+    int16x8_t v1027 = vaddq_s16(v1025, v1026);
+    int16x8_t v1028 = vld1q_s16(in + in_stride * 173 + i);
+    int16x8_t v1029 = vld1q_s16(in + in_stride * 171 + i);
+    int16x8_t v1030 = vaddq_s16(v1028, v1029);
+    int16x8_t v1031 = vaddq_s16(v1027, v1030);
+    int16x8_t v1032 = vaddq_s16(v1024, v1031);
+    int16x8_t v1033 = vaddq_s16(v1032, v1016);
+    int16x8_t v1034 = vaddq_s16(v1017, v1033);
+    int16x8_t v1035 = vqrdmulhq_n_s16(v1034, 17734);
+    int16x8_t v1036 = vaddq_s16(v1001, v1035);
+    int16x8_t v1037 = vaddq_s16(v1015, v984);
+    int16x8_t v1038_tmp = vqrdmulhq_n_s16(v1037, 13573);
+    int16x8_t v1038 = vaddq_s16(v1038_tmp, v1037);
+    int16x8_t v1039 = vaddq_s16(v1031, v992);
+    int16x8_t v1040 = vaddq_s16(v999, v1008);
+    int16x8_t v1041 = vaddq_s16(v1039, v1040);
+    int16x8_t v1042 = vaddq_s16(v1038, v1041);
+    int16x8_t v1043 = vaddq_s16(v1040, v1037);
+    int16x8_t v1044_tmp = vqrdmulhq_n_s16(v1043, 13573);
+    int16x8_t v1044 = vaddq_s16(v1044_tmp, v1043);
+    int16x8_t v1045 = vld1q_s16(in + in_stride * 245 + i);
+    int16x8_t v1046 = vld1q_s16(in + in_stride * 243 + i);
+    int16x8_t v1047 = vaddq_s16(v1045, v1046);
+    int16x8_t v1048 = vld1q_s16(in + in_stride * 237 + i);
+    int16x8_t v1049 = vld1q_s16(in + in_stride * 235 + i);
+    int16x8_t v1050 = vaddq_s16(v1048, v1049);
+    int16x8_t v1051 = vaddq_s16(v1047, v1050);
+    int16x8_t v1052 = vaddq_s16(v1051, v1024);
+    int16x8_t v1053 = vaddq_s16(v1052, v1039);
+    int16x8_t v1054 = vaddq_s16(v1053, v1043);
+    int16x8_t v1055 = vaddq_s16(v1044, v1054);
+    int16x8_t v1056 = vqrdmulhq_n_s16(v1055, 17734);
+    int16x8_t v1057 = vaddq_s16(v1042, v1056);
+    int16x8_t v1058 = vqrdmulhq_n_s16(v1057, 16705);
+    int16x8_t v1059 = vaddq_s16(v1036, v1058);
+    int16x8_t v1060 = vqrdmulhq_n_s16(v1059, 16463);
+    int16x8_t v1061 = vaddq_s16(v977, v1060);
+    int16x8_t v1062 = vaddq_s16(v983, v909);
+    int16x8_t v1063_tmp = vqrdmulhq_n_s16(v1062, 13573);
+    int16x8_t v1063 = vaddq_s16(v1063_tmp, v1062);
+    int16x8_t v1064 = vaddq_s16(v991, v913);
+    int16x8_t v1065 = vaddq_s16(v916, v995);
+    int16x8_t v1066 = vaddq_s16(v1064, v1065);
+    int16x8_t v1067 = vaddq_s16(v1063, v1066);
+    int16x8_t v1068 = vaddq_s16(v1007, v921);
+    int16x8_t v1069 = vaddq_s16(v924, v1011);
+    int16x8_t v1070 = vaddq_s16(v1068, v1069);
+    int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 13573);
+    int16x8_t v1071 = vaddq_s16(v1071_tmp, v1070);
+    int16x8_t v1072 = vaddq_s16(v1023, v929);
+    int16x8_t v1073 = vaddq_s16(v932, v1027);
+    int16x8_t v1074 = vaddq_s16(v1072, v1073);
+    int16x8_t v1075 = vaddq_s16(v1074, v1070);
+    int16x8_t v1076 = vaddq_s16(v1071, v1075);
+    int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 17734);
+    int16x8_t v1078 = vaddq_s16(v1067, v1077);
+    int16x8_t v1079 = vaddq_s16(v1014, v940);
+    int16x8_t v1080 = vaddq_s16(v943, v980);
+    int16x8_t v1081 = vaddq_s16(v1079, v1080);
+    int16x8_t v1082_tmp = vqrdmulhq_n_s16(v1081, 13573);
+    int16x8_t v1082 = vaddq_s16(v1082_tmp, v1081);
+    int16x8_t v1083 = vaddq_s16(v1030, v948);
+    int16x8_t v1084 = vaddq_s16(v951, v988);
+    int16x8_t v1085 = vaddq_s16(v1083, v1084);
+    int16x8_t v1086 = vaddq_s16(v998, v955);
+    int16x8_t v1087 = vaddq_s16(v958, v1004);
+    int16x8_t v1088 = vaddq_s16(v1086, v1087);
+    int16x8_t v1089 = vaddq_s16(v1085, v1088);
+    int16x8_t v1090 = vaddq_s16(v1082, v1089);
+    int16x8_t v1091 = vaddq_s16(v1088, v1081);
+    int16x8_t v1092_tmp = vqrdmulhq_n_s16(v1091, 13573);
+    int16x8_t v1092 = vaddq_s16(v1092_tmp, v1091);
+    int16x8_t v1093 = vaddq_s16(v1050, v966);
+    int16x8_t v1094 = vaddq_s16(v969, v1020);
+    int16x8_t v1095 = vaddq_s16(v1093, v1094);
+    int16x8_t v1096 = vaddq_s16(v1095, v1085);
+    int16x8_t v1097 = vaddq_s16(v1096, v1091);
+    int16x8_t v1098 = vaddq_s16(v1092, v1097);
+    int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 17734);
+    int16x8_t v1100 = vaddq_s16(v1090, v1099);
+    int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16705);
+    int16x8_t v1102 = vaddq_s16(v1078, v1101);
+    int16x8_t v1103 = vaddq_s16(v1080, v1062);
+    int16x8_t v1104_tmp = vqrdmulhq_n_s16(v1103, 13573);
+    int16x8_t v1104 = vaddq_s16(v1104_tmp, v1103);
+    int16x8_t v1105 = vaddq_s16(v1084, v1064);
+    int16x8_t v1106 = vaddq_s16(v1065, v1086);
+    int16x8_t v1107 = vaddq_s16(v1105, v1106);
+    int16x8_t v1108 = vaddq_s16(v1104, v1107);
+    int16x8_t v1109 = vaddq_s16(v1087, v1068);
+    int16x8_t v1110 = vaddq_s16(v1069, v1079);
+    int16x8_t v1111 = vaddq_s16(v1109, v1110);
+    int16x8_t v1112_tmp = vqrdmulhq_n_s16(v1111, 13573);
+    int16x8_t v1112 = vaddq_s16(v1112_tmp, v1111);
+    int16x8_t v1113 = vaddq_s16(v1094, v1072);
+    int16x8_t v1114 = vaddq_s16(v1073, v1083);
+    int16x8_t v1115 = vaddq_s16(v1113, v1114);
+    int16x8_t v1116 = vaddq_s16(v1115, v1111);
+    int16x8_t v1117 = vaddq_s16(v1112, v1116);
+    int16x8_t v1118 = vqrdmulhq_n_s16(v1117, 17734);
+    int16x8_t v1119 = vaddq_s16(v1108, v1118);
+    int16x8_t v1120 = vaddq_s16(v1110, v1103);
+    int16x8_t v1121_tmp = vqrdmulhq_n_s16(v1120, 13573);
+    int16x8_t v1121 = vaddq_s16(v1121_tmp, v1120);
+    int16x8_t v1122 = vaddq_s16(v1114, v1105);
+    int16x8_t v1123 = vaddq_s16(v1106, v1109);
+    int16x8_t v1124 = vaddq_s16(v1122, v1123);
+    int16x8_t v1125 = vaddq_s16(v1121, v1124);
+    int16x8_t v1126 = vaddq_s16(v1123, v1120);
+    int16x8_t v1127_tmp = vqrdmulhq_n_s16(v1126, 13573);
+    int16x8_t v1127 = vaddq_s16(v1127_tmp, v1126);
+    int16x8_t v1128 = vld1q_s16(in + in_stride * 253 + i);
+    int16x8_t v1129 = vld1q_s16(in + in_stride * 251 + i);
+    int16x8_t v1130 = vaddq_s16(v1128, v1129);
+    int16x8_t v1131 = vaddq_s16(v1130, v1047);
+    int16x8_t v1132 = vaddq_s16(v1131, v1093);
+    int16x8_t v1133 = vaddq_s16(v1132, v1113);
+    int16x8_t v1134 = vaddq_s16(v1133, v1122);
+    int16x8_t v1135 = vaddq_s16(v1134, v1126);
+    int16x8_t v1136 = vaddq_s16(v1127, v1135);
+    int16x8_t v1137 = vqrdmulhq_n_s16(v1136, 17734);
+    int16x8_t v1138 = vaddq_s16(v1125, v1137);
+    int16x8_t v1139 = vqrdmulhq_n_s16(v1138, 16705);
+    int16x8_t v1140 = vaddq_s16(v1119, v1139);
+    int16x8_t v1141 = vqrdmulhq_n_s16(v1140, 16463);
+    int16x8_t v1142 = vaddq_s16(v1102, v1141);
+    int16x8_t v1143 = vqrdmulhq_n_s16(v1142, 16404);
+    int16x8_t v1144 = vaddq_s16(v1061, v1143);
+    int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 16389);
+    int16x8_t v1146 = vaddq_s16(v906, v1145);
+    int16x8_t v1147 = vaddq_s16(v908, v702);
+    int16x8_t v1148_tmp = vqrdmulhq_n_s16(v1147, 13573);
+    int16x8_t v1148 = vaddq_s16(v1148_tmp, v1147);
+    int16x8_t v1149 = vaddq_s16(v912, v704);
+    int16x8_t v1150 = vaddq_s16(v705, v914);
+    int16x8_t v1151 = vaddq_s16(v1149, v1150);
+    int16x8_t v1152 = vaddq_s16(v1148, v1151);
+    int16x8_t v1153 = vaddq_s16(v920, v708);
+    int16x8_t v1154 = vaddq_s16(v709, v922);
+    int16x8_t v1155 = vaddq_s16(v1153, v1154);
+    int16x8_t v1156_tmp = vqrdmulhq_n_s16(v1155, 13573);
+    int16x8_t v1156 = vaddq_s16(v1156_tmp, v1155);
+    int16x8_t v1157 = vaddq_s16(v928, v712);
+    int16x8_t v1158 = vaddq_s16(v713, v930);
+    int16x8_t v1159 = vaddq_s16(v1157, v1158);
+    int16x8_t v1160 = vaddq_s16(v1159, v1155);
+    int16x8_t v1161 = vaddq_s16(v1156, v1160);
+    int16x8_t v1162 = vqrdmulhq_n_s16(v1161, 17734);
+    int16x8_t v1163 = vaddq_s16(v1152, v1162);
+    int16x8_t v1164 = vaddq_s16(v939, v719);
+    int16x8_t v1165 = vaddq_s16(v720, v941);
+    int16x8_t v1166 = vaddq_s16(v1164, v1165);
+    int16x8_t v1167_tmp = vqrdmulhq_n_s16(v1166, 13573);
+    int16x8_t v1167 = vaddq_s16(v1167_tmp, v1166);
+    int16x8_t v1168 = vaddq_s16(v947, v723);
+    int16x8_t v1169 = vaddq_s16(v724, v949);
+    int16x8_t v1170 = vaddq_s16(v1168, v1169);
+    int16x8_t v1171 = vaddq_s16(v954, v726);
+    int16x8_t v1172 = vaddq_s16(v727, v956);
+    int16x8_t v1173 = vaddq_s16(v1171, v1172);
+    int16x8_t v1174 = vaddq_s16(v1170, v1173);
+    int16x8_t v1175 = vaddq_s16(v1167, v1174);
+    int16x8_t v1176 = vaddq_s16(v1173, v1166);
+    int16x8_t v1177_tmp = vqrdmulhq_n_s16(v1176, 13573);
+    int16x8_t v1177 = vaddq_s16(v1177_tmp, v1176);
+    int16x8_t v1178 = vaddq_s16(v965, v733);
+    int16x8_t v1179 = vaddq_s16(v734, v967);
+    int16x8_t v1180 = vaddq_s16(v1178, v1179);
+    int16x8_t v1181 = vaddq_s16(v1180, v1170);
+    int16x8_t v1182 = vaddq_s16(v1181, v1176);
+    int16x8_t v1183 = vaddq_s16(v1177, v1182);
+    int16x8_t v1184 = vqrdmulhq_n_s16(v1183, 17734);
+    int16x8_t v1185 = vaddq_s16(v1175, v1184);
+    int16x8_t v1186 = vqrdmulhq_n_s16(v1185, 16705);
+    int16x8_t v1187 = vaddq_s16(v1163, v1186);
+    int16x8_t v1188 = vaddq_s16(v979, v743);
+    int16x8_t v1189 = vaddq_s16(v744, v981);
+    int16x8_t v1190 = vaddq_s16(v1188, v1189);
+    int16x8_t v1191_tmp = vqrdmulhq_n_s16(v1190, 13573);
+    int16x8_t v1191 = vaddq_s16(v1191_tmp, v1190);
+    int16x8_t v1192 = vaddq_s16(v987, v747);
+    int16x8_t v1193 = vaddq_s16(v748, v989);
+    int16x8_t v1194 = vaddq_s16(v1192, v1193);
+    int16x8_t v1195 = vaddq_s16(v994, v750);
+    int16x8_t v1196 = vaddq_s16(v751, v996);
+    int16x8_t v1197 = vaddq_s16(v1195, v1196);
+    int16x8_t v1198 = vaddq_s16(v1194, v1197);
+    int16x8_t v1199 = vaddq_s16(v1191, v1198);
+    int16x8_t v1200 = vaddq_s16(v1003, v755);
+    int16x8_t v1201 = vaddq_s16(v756, v1005);
+    int16x8_t v1202 = vaddq_s16(v1200, v1201);
+    int16x8_t v1203 = vaddq_s16(v1010, v758);
+    int16x8_t v1204 = vaddq_s16(v759, v1012);
+    int16x8_t v1205 = vaddq_s16(v1203, v1204);
+    int16x8_t v1206 = vaddq_s16(v1202, v1205);
+    int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 13573);
+    int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206);
+    int16x8_t v1208 = vaddq_s16(v1019, v763);
+    int16x8_t v1209 = vaddq_s16(v764, v1021);
+    int16x8_t v1210 = vaddq_s16(v1208, v1209);
+    int16x8_t v1211 = vaddq_s16(v1026, v766);
+    int16x8_t v1212 = vaddq_s16(v767, v1028);
+    int16x8_t v1213 = vaddq_s16(v1211, v1212);
+    int16x8_t v1214 = vaddq_s16(v1210, v1213);
+    int16x8_t v1215 = vaddq_s16(v1214, v1206);
+    int16x8_t v1216 = vaddq_s16(v1207, v1215);
+    int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 17734);
+    int16x8_t v1218 = vaddq_s16(v1199, v1217);
+    int16x8_t v1219 = vaddq_s16(v1205, v1190);
+    int16x8_t v1220_tmp = vqrdmulhq_n_s16(v1219, 13573);
+    int16x8_t v1220 = vaddq_s16(v1220_tmp, v1219);
+    int16x8_t v1221 = vaddq_s16(v1213, v1194);
+    int16x8_t v1222 = vaddq_s16(v1197, v1202);
+    int16x8_t v1223 = vaddq_s16(v1221, v1222);
+    int16x8_t v1224 = vaddq_s16(v1220, v1223);
+    int16x8_t v1225 = vaddq_s16(v1222, v1219);
+    int16x8_t v1226_tmp = vqrdmulhq_n_s16(v1225, 13573);
+    int16x8_t v1226 = vaddq_s16(v1226_tmp, v1225);
+    int16x8_t v1227 = vaddq_s16(v1046, v782);
+    int16x8_t v1228 = vaddq_s16(v783, v1048);
+    int16x8_t v1229 = vaddq_s16(v1227, v1228);
+    int16x8_t v1230 = vaddq_s16(v1229, v1210);
+    int16x8_t v1231 = vaddq_s16(v1230, v1221);
+    int16x8_t v1232 = vaddq_s16(v1231, v1225);
+    int16x8_t v1233 = vaddq_s16(v1226, v1232);
+    int16x8_t v1234 = vqrdmulhq_n_s16(v1233, 17734);
+    int16x8_t v1235 = vaddq_s16(v1224, v1234);
+    int16x8_t v1236 = vqrdmulhq_n_s16(v1235, 16705);
+    int16x8_t v1237 = vaddq_s16(v1218, v1236);
+    int16x8_t v1238 = vqrdmulhq_n_s16(v1237, 16463);
+    int16x8_t v1239 = vaddq_s16(v1187, v1238);
+    int16x8_t v1240 = vaddq_s16(v982, v795);
+    int16x8_t v1241 = vaddq_s16(v796, v907);
+    int16x8_t v1242 = vaddq_s16(v1240, v1241);
+    int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 13573);
+    int16x8_t v1243 = vaddq_s16(v1243_tmp, v1242);
+    int16x8_t v1244 = vaddq_s16(v990, v799);
+    int16x8_t v1245 = vaddq_s16(v800, v911);
+    int16x8_t v1246 = vaddq_s16(v1244, v1245);
+    int16x8_t v1247 = vaddq_s16(v915, v802);
+    int16x8_t v1248 = vaddq_s16(v803, v993);
+    int16x8_t v1249 = vaddq_s16(v1247, v1248);
+    int16x8_t v1250 = vaddq_s16(v1246, v1249);
+    int16x8_t v1251 = vaddq_s16(v1243, v1250);
+    int16x8_t v1252 = vaddq_s16(v1006, v807);
+    int16x8_t v1253 = vaddq_s16(v808, v919);
+    int16x8_t v1254 = vaddq_s16(v1252, v1253);
+    int16x8_t v1255 = vaddq_s16(v923, v810);
+    int16x8_t v1256 = vaddq_s16(v811, v1009);
+    int16x8_t v1257 = vaddq_s16(v1255, v1256);
+    int16x8_t v1258 = vaddq_s16(v1254, v1257);
+    int16x8_t v1259_tmp = vqrdmulhq_n_s16(v1258, 13573);
+    int16x8_t v1259 = vaddq_s16(v1259_tmp, v1258);
+    int16x8_t v1260 = vaddq_s16(v1022, v815);
+    int16x8_t v1261 = vaddq_s16(v816, v927);
+    int16x8_t v1262 = vaddq_s16(v1260, v1261);
+    int16x8_t v1263 = vaddq_s16(v931, v818);
+    int16x8_t v1264 = vaddq_s16(v819, v1025);
+    int16x8_t v1265 = vaddq_s16(v1263, v1264);
+    int16x8_t v1266 = vaddq_s16(v1262, v1265);
+    int16x8_t v1267 = vaddq_s16(v1266, v1258);
+    int16x8_t v1268 = vaddq_s16(v1259, v1267);
+    int16x8_t v1269 = vqrdmulhq_n_s16(v1268, 17734);
+    int16x8_t v1270 = vaddq_s16(v1251, v1269);
+    int16x8_t v1271 = vaddq_s16(v1013, v826);
+    int16x8_t v1272 = vaddq_s16(v827, v938);
+    int16x8_t v1273 = vaddq_s16(v1271, v1272);
+    int16x8_t v1274 = vaddq_s16(v942, v829);
+    int16x8_t v1275 = vaddq_s16(v830, v978);
+    int16x8_t v1276 = vaddq_s16(v1274, v1275);
+    int16x8_t v1277 = vaddq_s16(v1273, v1276);
+    int16x8_t v1278_tmp = vqrdmulhq_n_s16(v1277, 13573);
+    int16x8_t v1278 = vaddq_s16(v1278_tmp, v1277);
+    int16x8_t v1279 = vaddq_s16(v1029, v834);
+    int16x8_t v1280 = vaddq_s16(v835, v946);
+    int16x8_t v1281 = vaddq_s16(v1279, v1280);
+    int16x8_t v1282 = vaddq_s16(v950, v837);
+    int16x8_t v1283 = vaddq_s16(v838, v986);
+    int16x8_t v1284 = vaddq_s16(v1282, v1283);
+    int16x8_t v1285 = vaddq_s16(v1281, v1284);
+    int16x8_t v1286 = vaddq_s16(v997, v841);
+    int16x8_t v1287 = vaddq_s16(v842, v953);
+    int16x8_t v1288 = vaddq_s16(v1286, v1287);
+    int16x8_t v1289 = vaddq_s16(v957, v844);
+    int16x8_t v1290 = vaddq_s16(v845, v1002);
+    int16x8_t v1291 = vaddq_s16(v1289, v1290);
+    int16x8_t v1292 = vaddq_s16(v1288, v1291);
+    int16x8_t v1293 = vaddq_s16(v1285, v1292);
+    int16x8_t v1294 = vaddq_s16(v1278, v1293);
+    int16x8_t v1295 = vaddq_s16(v1292, v1277);
+    int16x8_t v1296_tmp = vqrdmulhq_n_s16(v1295, 13573);
+    int16x8_t v1296 = vaddq_s16(v1296_tmp, v1295);
+    int16x8_t v1297 = vaddq_s16(v1049, v852);
+    int16x8_t v1298 = vaddq_s16(v853, v964);
+    int16x8_t v1299 = vaddq_s16(v1297, v1298);
+    int16x8_t v1300 = vaddq_s16(v968, v855);
+    int16x8_t v1301 = vaddq_s16(v856, v1018);
+    int16x8_t v1302 = vaddq_s16(v1300, v1301);
+    int16x8_t v1303 = vaddq_s16(v1299, v1302);
+    int16x8_t v1304 = vaddq_s16(v1303, v1285);
+    int16x8_t v1305 = vaddq_s16(v1304, v1295);
+    int16x8_t v1306 = vaddq_s16(v1296, v1305);
+    int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 17734);
+    int16x8_t v1308 = vaddq_s16(v1294, v1307);
+    int16x8_t v1309 = vqrdmulhq_n_s16(v1308, 16705);
+    int16x8_t v1310 = vaddq_s16(v1270, v1309);
+    int16x8_t v1311 = vaddq_s16(v1276, v1242);
+    int16x8_t v1312_tmp = vqrdmulhq_n_s16(v1311, 13573);
+    int16x8_t v1312 = vaddq_s16(v1312_tmp, v1311);
+    int16x8_t v1313 = vaddq_s16(v1284, v1246);
+    int16x8_t v1314 = vaddq_s16(v1249, v1288);
+    int16x8_t v1315 = vaddq_s16(v1313, v1314);
+    int16x8_t v1316 = vaddq_s16(v1312, v1315);
+    int16x8_t v1317 = vaddq_s16(v1291, v1254);
+    int16x8_t v1318 = vaddq_s16(v1257, v1273);
+    int16x8_t v1319 = vaddq_s16(v1317, v1318);
+    int16x8_t v1320_tmp = vqrdmulhq_n_s16(v1319, 13573);
+    int16x8_t v1320 = vaddq_s16(v1320_tmp, v1319);
+    int16x8_t v1321 = vaddq_s16(v1302, v1262);
+    int16x8_t v1322 = vaddq_s16(v1265, v1281);
+    int16x8_t v1323 = vaddq_s16(v1321, v1322);
+    int16x8_t v1324 = vaddq_s16(v1323, v1319);
+    int16x8_t v1325 = vaddq_s16(v1320, v1324);
+    int16x8_t v1326 = vqrdmulhq_n_s16(v1325, 17734);
+    int16x8_t v1327 = vaddq_s16(v1316, v1326);
+    int16x8_t v1328 = vaddq_s16(v1318, v1311);
+    int16x8_t v1329_tmp = vqrdmulhq_n_s16(v1328, 13573);
+    int16x8_t v1329 = vaddq_s16(v1329_tmp, v1328);
+    int16x8_t v1330 = vaddq_s16(v1322, v1313);
+    int16x8_t v1331 = vaddq_s16(v1314, v1317);
+    int16x8_t v1332 = vaddq_s16(v1330, v1331);
+    int16x8_t v1333 = vaddq_s16(v1329, v1332);
+    int16x8_t v1334 = vaddq_s16(v1331, v1328);
+    int16x8_t v1335_tmp = vqrdmulhq_n_s16(v1334, 13573);
+    int16x8_t v1335 = vaddq_s16(v1335_tmp, v1334);
+    int16x8_t v1336 = vaddq_s16(v1129, v891);
+    int16x8_t v1337 = vaddq_s16(v892, v1045);
+    int16x8_t v1338 = vaddq_s16(v1336, v1337);
+    int16x8_t v1339 = vaddq_s16(v1338, v1299);
+    int16x8_t v1340 = vaddq_s16(v1339, v1321);
+    int16x8_t v1341 = vaddq_s16(v1340, v1330);
+    int16x8_t v1342 = vaddq_s16(v1341, v1334);
+    int16x8_t v1343 = vaddq_s16(v1335, v1342);
+    int16x8_t v1344 = vqrdmulhq_n_s16(v1343, 17734);
+    int16x8_t v1345 = vaddq_s16(v1333, v1344);
+    int16x8_t v1346 = vqrdmulhq_n_s16(v1345, 16705);
+    int16x8_t v1347 = vaddq_s16(v1327, v1346);
+    int16x8_t v1348 = vqrdmulhq_n_s16(v1347, 16463);
+    int16x8_t v1349 = vaddq_s16(v1310, v1348);
+    int16x8_t v1350 = vqrdmulhq_n_s16(v1349, 16404);
+    int16x8_t v1351 = vaddq_s16(v1239, v1350);
+    int16x8_t v1352 = vaddq_s16(v1241, v1147);
+    int16x8_t v1353_tmp = vqrdmulhq_n_s16(v1352, 13573);
+    int16x8_t v1353 = vaddq_s16(v1353_tmp, v1352);
+    int16x8_t v1354 = vaddq_s16(v1245, v1149);
+    int16x8_t v1355 = vaddq_s16(v1150, v1247);
+    int16x8_t v1356 = vaddq_s16(v1354, v1355);
+    int16x8_t v1357 = vaddq_s16(v1353, v1356);
+    int16x8_t v1358 = vaddq_s16(v1253, v1153);
+    int16x8_t v1359 = vaddq_s16(v1154, v1255);
+    int16x8_t v1360 = vaddq_s16(v1358, v1359);
+    int16x8_t v1361_tmp = vqrdmulhq_n_s16(v1360, 13573);
+    int16x8_t v1361 = vaddq_s16(v1361_tmp, v1360);
+    int16x8_t v1362 = vaddq_s16(v1261, v1157);
+    int16x8_t v1363 = vaddq_s16(v1158, v1263);
+    int16x8_t v1364 = vaddq_s16(v1362, v1363);
+    int16x8_t v1365 = vaddq_s16(v1364, v1360);
+    int16x8_t v1366 = vaddq_s16(v1361, v1365);
+    int16x8_t v1367 = vqrdmulhq_n_s16(v1366, 17734);
+    int16x8_t v1368 = vaddq_s16(v1357, v1367);
+    int16x8_t v1369 = vaddq_s16(v1272, v1164);
+    int16x8_t v1370 = vaddq_s16(v1165, v1274);
+    int16x8_t v1371 = vaddq_s16(v1369, v1370);
+    int16x8_t v1372_tmp = vqrdmulhq_n_s16(v1371, 13573);
+    int16x8_t v1372 = vaddq_s16(v1372_tmp, v1371);
+    int16x8_t v1373 = vaddq_s16(v1280, v1168);
+    int16x8_t v1374 = vaddq_s16(v1169, v1282);
+    int16x8_t v1375 = vaddq_s16(v1373, v1374);
+    int16x8_t v1376 = vaddq_s16(v1287, v1171);
+    int16x8_t v1377 = vaddq_s16(v1172, v1289);
+    int16x8_t v1378 = vaddq_s16(v1376, v1377);
+    int16x8_t v1379 = vaddq_s16(v1375, v1378);
+    int16x8_t v1380 = vaddq_s16(v1372, v1379);
+    int16x8_t v1381 = vaddq_s16(v1378, v1371);
+    int16x8_t v1382_tmp = vqrdmulhq_n_s16(v1381, 13573);
+    int16x8_t v1382 = vaddq_s16(v1382_tmp, v1381);
+    int16x8_t v1383 = vaddq_s16(v1298, v1178);
+    int16x8_t v1384 = vaddq_s16(v1179, v1300);
+    int16x8_t v1385 = vaddq_s16(v1383, v1384);
+    int16x8_t v1386 = vaddq_s16(v1385, v1375);
+    int16x8_t v1387 = vaddq_s16(v1386, v1381);
+    int16x8_t v1388 = vaddq_s16(v1382, v1387);
+    int16x8_t v1389 = vqrdmulhq_n_s16(v1388, 17734);
+    int16x8_t v1390 = vaddq_s16(v1380, v1389);
+    int16x8_t v1391 = vqrdmulhq_n_s16(v1390, 16705);
+    int16x8_t v1392 = vaddq_s16(v1368, v1391);
+    int16x8_t v1393 = vaddq_s16(v1275, v1188);
+    int16x8_t v1394 = vaddq_s16(v1189, v1240);
+    int16x8_t v1395 = vaddq_s16(v1393, v1394);
+    int16x8_t v1396_tmp = vqrdmulhq_n_s16(v1395, 13573);
+    int16x8_t v1396 = vaddq_s16(v1396_tmp, v1395);
+    int16x8_t v1397 = vaddq_s16(v1283, v1192);
+    int16x8_t v1398 = vaddq_s16(v1193, v1244);
+    int16x8_t v1399 = vaddq_s16(v1397, v1398);
+    int16x8_t v1400 = vaddq_s16(v1248, v1195);
+    int16x8_t v1401 = vaddq_s16(v1196, v1286);
+    int16x8_t v1402 = vaddq_s16(v1400, v1401);
+    int16x8_t v1403 = vaddq_s16(v1399, v1402);
+    int16x8_t v1404 = vaddq_s16(v1396, v1403);
+    int16x8_t v1405 = vaddq_s16(v1290, v1200);
+    int16x8_t v1406 = vaddq_s16(v1201, v1252);
+    int16x8_t v1407 = vaddq_s16(v1405, v1406);
+    int16x8_t v1408 = vaddq_s16(v1256, v1203);
+    int16x8_t v1409 = vaddq_s16(v1204, v1271);
+    int16x8_t v1410 = vaddq_s16(v1408, v1409);
+    int16x8_t v1411 = vaddq_s16(v1407, v1410);
+    int16x8_t v1412_tmp = vqrdmulhq_n_s16(v1411, 13573);
+    int16x8_t v1412 = vaddq_s16(v1412_tmp, v1411);
+    int16x8_t v1413 = vaddq_s16(v1301, v1208);
+    int16x8_t v1414 = vaddq_s16(v1209, v1260);
+    int16x8_t v1415 = vaddq_s16(v1413, v1414);
+    int16x8_t v1416 = vaddq_s16(v1264, v1211);
+    int16x8_t v1417 = vaddq_s16(v1212, v1279);
+    int16x8_t v1418 = vaddq_s16(v1416, v1417);
+    int16x8_t v1419 = vaddq_s16(v1415, v1418);
+    int16x8_t v1420 = vaddq_s16(v1419, v1411);
+    int16x8_t v1421 = vaddq_s16(v1412, v1420);
+    int16x8_t v1422 = vqrdmulhq_n_s16(v1421, 17734);
+    int16x8_t v1423 = vaddq_s16(v1404, v1422);
+    int16x8_t v1424 = vaddq_s16(v1410, v1395);
+    int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 13573);
+    int16x8_t v1425 = vaddq_s16(v1425_tmp, v1424);
+    int16x8_t v1426 = vaddq_s16(v1418, v1399);
+    int16x8_t v1427 = vaddq_s16(v1402, v1407);
+    int16x8_t v1428 = vaddq_s16(v1426, v1427);
+    int16x8_t v1429 = vaddq_s16(v1425, v1428);
+    int16x8_t v1430 = vaddq_s16(v1427, v1424);
+    int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 13573);
+    int16x8_t v1431 = vaddq_s16(v1431_tmp, v1430);
+    int16x8_t v1432 = vaddq_s16(v1337, v1227);
+    int16x8_t v1433 = vaddq_s16(v1228, v1297);
+    int16x8_t v1434 = vaddq_s16(v1432, v1433);
+    int16x8_t v1435 = vaddq_s16(v1434, v1415);
+    int16x8_t v1436 = vaddq_s16(v1435, v1426);
+    int16x8_t v1437 = vaddq_s16(v1436, v1430);
+    int16x8_t v1438 = vaddq_s16(v1431, v1437);
+    int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17734);
+    int16x8_t v1440 = vaddq_s16(v1429, v1439);
+    int16x8_t v1441 = vqrdmulhq_n_s16(v1440, 16705);
+    int16x8_t v1442 = vaddq_s16(v1423, v1441);
+    int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 16463);
+    int16x8_t v1444 = vaddq_s16(v1392, v1443);
+    int16x8_t v1445 = vaddq_s16(v1394, v1352);
+    int16x8_t v1446_tmp = vqrdmulhq_n_s16(v1445, 13573);
+    int16x8_t v1446 = vaddq_s16(v1446_tmp, v1445);
+    int16x8_t v1447 = vaddq_s16(v1398, v1354);
+    int16x8_t v1448 = vaddq_s16(v1355, v1400);
+    int16x8_t v1449 = vaddq_s16(v1447, v1448);
+    int16x8_t v1450 = vaddq_s16(v1446, v1449);
+    int16x8_t v1451 = vaddq_s16(v1406, v1358);
+    int16x8_t v1452 = vaddq_s16(v1359, v1408);
+    int16x8_t v1453 = vaddq_s16(v1451, v1452);
+    int16x8_t v1454_tmp = vqrdmulhq_n_s16(v1453, 13573);
+    int16x8_t v1454 = vaddq_s16(v1454_tmp, v1453);
+    int16x8_t v1455 = vaddq_s16(v1414, v1362);
+    int16x8_t v1456 = vaddq_s16(v1363, v1416);
+    int16x8_t v1457 = vaddq_s16(v1455, v1456);
+    int16x8_t v1458 = vaddq_s16(v1457, v1453);
+    int16x8_t v1459 = vaddq_s16(v1454, v1458);
+    int16x8_t v1460 = vqrdmulhq_n_s16(v1459, 17734);
+    int16x8_t v1461 = vaddq_s16(v1450, v1460);
+    int16x8_t v1462 = vaddq_s16(v1409, v1369);
+    int16x8_t v1463 = vaddq_s16(v1370, v1393);
+    int16x8_t v1464 = vaddq_s16(v1462, v1463);
+    int16x8_t v1465_tmp = vqrdmulhq_n_s16(v1464, 13573);
+    int16x8_t v1465 = vaddq_s16(v1465_tmp, v1464);
+    int16x8_t v1466 = vaddq_s16(v1417, v1373);
+    int16x8_t v1467 = vaddq_s16(v1374, v1397);
+    int16x8_t v1468 = vaddq_s16(v1466, v1467);
+    int16x8_t v1469 = vaddq_s16(v1401, v1376);
+    int16x8_t v1470 = vaddq_s16(v1377, v1405);
+    int16x8_t v1471 = vaddq_s16(v1469, v1470);
+    int16x8_t v1472 = vaddq_s16(v1468, v1471);
+    int16x8_t v1473 = vaddq_s16(v1465, v1472);
+    int16x8_t v1474 = vaddq_s16(v1471, v1464);
+    int16x8_t v1475_tmp = vqrdmulhq_n_s16(v1474, 13573);
+    int16x8_t v1475 = vaddq_s16(v1475_tmp, v1474);
+    int16x8_t v1476 = vaddq_s16(v1433, v1383);
+    int16x8_t v1477 = vaddq_s16(v1384, v1413);
+    int16x8_t v1478 = vaddq_s16(v1476, v1477);
+    int16x8_t v1479 = vaddq_s16(v1478, v1468);
+    int16x8_t v1480 = vaddq_s16(v1479, v1474);
+    int16x8_t v1481 = vaddq_s16(v1475, v1480);
+    int16x8_t v1482 = vqrdmulhq_n_s16(v1481, 17734);
+    int16x8_t v1483 = vaddq_s16(v1473, v1482);
+    int16x8_t v1484 = vqrdmulhq_n_s16(v1483, 16705);
+    int16x8_t v1485 = vaddq_s16(v1461, v1484);
+    int16x8_t v1486 = vaddq_s16(v1463, v1445);
+    int16x8_t v1487_tmp = vqrdmulhq_n_s16(v1486, 13573);
+    int16x8_t v1487 = vaddq_s16(v1487_tmp, v1486);
+    int16x8_t v1488 = vaddq_s16(v1467, v1447);
+    int16x8_t v1489 = vaddq_s16(v1448, v1469);
+    int16x8_t v1490 = vaddq_s16(v1488, v1489);
+    int16x8_t v1491 = vaddq_s16(v1487, v1490);
+    int16x8_t v1492 = vaddq_s16(v1470, v1451);
+    int16x8_t v1493 = vaddq_s16(v1452, v1462);
+    int16x8_t v1494 = vaddq_s16(v1492, v1493);
+    int16x8_t v1495_tmp = vqrdmulhq_n_s16(v1494, 13573);
+    int16x8_t v1495 = vaddq_s16(v1495_tmp, v1494);
+    int16x8_t v1496 = vaddq_s16(v1477, v1455);
+    int16x8_t v1497 = vaddq_s16(v1456, v1466);
+    int16x8_t v1498 = vaddq_s16(v1496, v1497);
+    int16x8_t v1499 = vaddq_s16(v1498, v1494);
+    int16x8_t v1500 = vaddq_s16(v1495, v1499);
+    int16x8_t v1501 = vqrdmulhq_n_s16(v1500, 17734);
+    int16x8_t v1502 = vaddq_s16(v1491, v1501);
+    int16x8_t v1503 = vaddq_s16(v1493, v1486);
+    int16x8_t v1504_tmp = vqrdmulhq_n_s16(v1503, 13573);
+    int16x8_t v1504 = vaddq_s16(v1504_tmp, v1503);
+    int16x8_t v1505 = vaddq_s16(v1497, v1488);
+    int16x8_t v1506 = vaddq_s16(v1489, v1492);
+    int16x8_t v1507 = vaddq_s16(v1505, v1506);
+    int16x8_t v1508 = vaddq_s16(v1504, v1507);
+    int16x8_t v1509 = vaddq_s16(v1506, v1503);
+    int16x8_t v1510_tmp = vqrdmulhq_n_s16(v1509, 13573);
+    int16x8_t v1510 = vaddq_s16(v1510_tmp, v1509);
+    int16x8_t v1511 = vld1q_s16(in + in_stride * 255 + i);
+    int16x8_t v1512 = vaddq_s16(v1511, v1128);
+    int16x8_t v1513 = vaddq_s16(v1512, v1336);
+    int16x8_t v1514 = vaddq_s16(v1513, v1432);
+    int16x8_t v1515 = vaddq_s16(v1514, v1476);
+    int16x8_t v1516 = vaddq_s16(v1515, v1496);
+    int16x8_t v1517 = vaddq_s16(v1516, v1505);
+    int16x8_t v1518 = vaddq_s16(v1517, v1509);
+    int16x8_t v1519 = vaddq_s16(v1510, v1518);
+    int16x8_t v1520 = vqrdmulhq_n_s16(v1519, 17734);
+    int16x8_t v1521 = vaddq_s16(v1508, v1520);
+    int16x8_t v1522 = vqrdmulhq_n_s16(v1521, 16705);
+    int16x8_t v1523 = vaddq_s16(v1502, v1522);
+    int16x8_t v1524 = vqrdmulhq_n_s16(v1523, 16463);
+    int16x8_t v1525 = vaddq_s16(v1485, v1524);
+    int16x8_t v1526 = vqrdmulhq_n_s16(v1525, 16404);
+    int16x8_t v1527 = vaddq_s16(v1444, v1526);
+    int16x8_t v1528 = vqrdmulhq_n_s16(v1527, 16389);
+    int16x8_t v1529 = vaddq_s16(v1351, v1528);
+    int16x8_t v1530 = vqrdmulhq_n_s16(v1529, 16385);
+    int16x8_t v1531 = vaddq_s16(v1146, v1530);
+    int16x8_t v1532 = vqrdmulhq_n_s16(v1531, 16384);
+    int16x8_t v1533 = vaddq_s16(v701, v1532);
+    int16x8_t v1534 = vsubq_s16(v0, v1);
+    int16x8_t v1535 = vsubq_s16(v4, v6);
+    int16x8_t v1536_tmp = vqrdmulhq_n_s16(v1535, 10045);
+    int16x8_t v1536 = vaddq_s16(v1536_tmp, v1535);
+    int16x8_t v1537 = vaddq_s16(v1534, v1536);
+    int16x8_t v1538 = vsubq_s16(v11, v14);
+    int16x8_t v1539 = vsubq_s16(v17, v20);
+    int16x8_t v1540_tmp = vqrdmulhq_n_s16(v1539, 10045);
+    int16x8_t v1540 = vaddq_s16(v1540_tmp, v1539);
+    int16x8_t v1541 = vaddq_s16(v1538, v1540);
+    int16x8_t v1542 = vqrdmulhq_n_s16(v1541, 19705);
+    int16x8_t v1543 = vaddq_s16(v1537, v1542);
+    int16x8_t v1544 = vsubq_s16(v27, v30);
+    int16x8_t v1545 = vsubq_s16(v35, v39);
+    int16x8_t v1546_tmp = vqrdmulhq_n_s16(v1545, 10045);
+    int16x8_t v1546 = vaddq_s16(v1546_tmp, v1545);
+    int16x8_t v1547 = vaddq_s16(v1544, v1546);
+    int16x8_t v1548 = vsubq_s16(v44, v47);
+    int16x8_t v1549 = vsubq_s16(v50, v54);
+    int16x8_t v1550_tmp = vqrdmulhq_n_s16(v1549, 10045);
+    int16x8_t v1550 = vaddq_s16(v1550_tmp, v1549);
+    int16x8_t v1551 = vaddq_s16(v1548, v1550);
+    int16x8_t v1552 = vqrdmulhq_n_s16(v1551, 19705);
+    int16x8_t v1553 = vaddq_s16(v1547, v1552);
+    int16x8_t v1554 = vqrdmulhq_n_s16(v1553, 17121);
+    int16x8_t v1555 = vaddq_s16(v1543, v1554);
+    int16x8_t v1556 = vsubq_s16(v63, v66);
+    int16x8_t v1557 = vsubq_s16(v71, v75);
+    int16x8_t v1558_tmp = vqrdmulhq_n_s16(v1557, 10045);
+    int16x8_t v1558 = vaddq_s16(v1558_tmp, v1557);
+    int16x8_t v1559 = vaddq_s16(v1556, v1558);
+    int16x8_t v1560 = vsubq_s16(v82, v89);
+    int16x8_t v1561 = vsubq_s16(v92, v97);
+    int16x8_t v1562_tmp = vqrdmulhq_n_s16(v1561, 10045);
+    int16x8_t v1562 = vaddq_s16(v1562_tmp, v1561);
+    int16x8_t v1563 = vaddq_s16(v1560, v1562);
+    int16x8_t v1564 = vqrdmulhq_n_s16(v1563, 19705);
+    int16x8_t v1565 = vaddq_s16(v1559, v1564);
+    int16x8_t v1566 = vsubq_s16(v104, v107);
+    int16x8_t v1567 = vsubq_s16(v112, v116);
+    int16x8_t v1568_tmp = vqrdmulhq_n_s16(v1567, 10045);
+    int16x8_t v1568 = vaddq_s16(v1568_tmp, v1567);
+    int16x8_t v1569 = vaddq_s16(v1566, v1568);
+    int16x8_t v1570 = vsubq_s16(v121, v124);
+    int16x8_t v1571 = vsubq_s16(v127, v132);
+    int16x8_t v1572_tmp = vqrdmulhq_n_s16(v1571, 10045);
+    int16x8_t v1572 = vaddq_s16(v1572_tmp, v1571);
+    int16x8_t v1573 = vaddq_s16(v1570, v1572);
+    int16x8_t v1574 = vqrdmulhq_n_s16(v1573, 19705);
+    int16x8_t v1575 = vaddq_s16(v1569, v1574);
+    int16x8_t v1576 = vqrdmulhq_n_s16(v1575, 17121);
+    int16x8_t v1577 = vaddq_s16(v1565, v1576);
+    int16x8_t v1578 = vqrdmulhq_n_s16(v1577, 16563);
+    int16x8_t v1579 = vaddq_s16(v1555, v1578);
+    int16x8_t v1580 = vsubq_s16(v143, v146);
+    int16x8_t v1581 = vsubq_s16(v151, v155);
+    int16x8_t v1582_tmp = vqrdmulhq_n_s16(v1581, 10045);
+    int16x8_t v1582 = vaddq_s16(v1582_tmp, v1581);
+    int16x8_t v1583 = vaddq_s16(v1580, v1582);
+    int16x8_t v1584 = vsubq_s16(v162, v169);
+    int16x8_t v1585 = vsubq_s16(v172, v177);
+    int16x8_t v1586_tmp = vqrdmulhq_n_s16(v1585, 10045);
+    int16x8_t v1586 = vaddq_s16(v1586_tmp, v1585);
+    int16x8_t v1587 = vaddq_s16(v1584, v1586);
+    int16x8_t v1588 = vqrdmulhq_n_s16(v1587, 19705);
+    int16x8_t v1589 = vaddq_s16(v1583, v1588);
+    int16x8_t v1590 = vsubq_s16(v186, v193);
+    int16x8_t v1591 = vsubq_s16(v202, v210);
+    int16x8_t v1592_tmp = vqrdmulhq_n_s16(v1591, 10045);
+    int16x8_t v1592 = vaddq_s16(v1592_tmp, v1591);
+    int16x8_t v1593 = vaddq_s16(v1590, v1592);
+    int16x8_t v1594 = vsubq_s16(v215, v218);
+    int16x8_t v1595 = vsubq_s16(v221, v227);
+    int16x8_t v1596_tmp = vqrdmulhq_n_s16(v1595, 10045);
+    int16x8_t v1596 = vaddq_s16(v1596_tmp, v1595);
+    int16x8_t v1597 = vaddq_s16(v1594, v1596);
+    int16x8_t v1598 = vqrdmulhq_n_s16(v1597, 19705);
+    int16x8_t v1599 = vaddq_s16(v1593, v1598);
+    int16x8_t v1600 = vqrdmulhq_n_s16(v1599, 17121);
+    int16x8_t v1601 = vaddq_s16(v1589, v1600);
+    int16x8_t v1602 = vsubq_s16(v236, v239);
+    int16x8_t v1603 = vsubq_s16(v244, v248);
+    int16x8_t v1604_tmp = vqrdmulhq_n_s16(v1603, 10045);
+    int16x8_t v1604 = vaddq_s16(v1604_tmp, v1603);
+    int16x8_t v1605 = vaddq_s16(v1602, v1604);
+    int16x8_t v1606 = vsubq_s16(v255, v262);
+    int16x8_t v1607 = vsubq_s16(v265, v270);
+    int16x8_t v1608_tmp = vqrdmulhq_n_s16(v1607, 10045);
+    int16x8_t v1608 = vaddq_s16(v1608_tmp, v1607);
+    int16x8_t v1609 = vaddq_s16(v1606, v1608);
+    int16x8_t v1610 = vqrdmulhq_n_s16(v1609, 19705);
+    int16x8_t v1611 = vaddq_s16(v1605, v1610);
+    int16x8_t v1612 = vsubq_s16(v277, v280);
+    int16x8_t v1613 = vsubq_s16(v285, v289);
+    int16x8_t v1614_tmp = vqrdmulhq_n_s16(v1613, 10045);
+    int16x8_t v1614 = vaddq_s16(v1614_tmp, v1613);
+    int16x8_t v1615 = vaddq_s16(v1612, v1614);
+    int16x8_t v1616 = vsubq_s16(v294, v297);
+    int16x8_t v1617 = vsubq_s16(v300, v306);
+    int16x8_t v1618_tmp = vqrdmulhq_n_s16(v1617, 10045);
+    int16x8_t v1618 = vaddq_s16(v1618_tmp, v1617);
+    int16x8_t v1619 = vaddq_s16(v1616, v1618);
+    int16x8_t v1620 = vqrdmulhq_n_s16(v1619, 19705);
+    int16x8_t v1621 = vaddq_s16(v1615, v1620);
+    int16x8_t v1622 = vqrdmulhq_n_s16(v1621, 17121);
+    int16x8_t v1623 = vaddq_s16(v1611, v1622);
+    int16x8_t v1624 = vqrdmulhq_n_s16(v1623, 16563);
+    int16x8_t v1625 = vaddq_s16(v1601, v1624);
+    int16x8_t v1626 = vqrdmulhq_n_s16(v1625, 16429);
+    int16x8_t v1627 = vaddq_s16(v1579, v1626);
+    int16x8_t v1628 = vsubq_s16(v319, v322);
+    int16x8_t v1629 = vsubq_s16(v327, v331);
+    int16x8_t v1630_tmp = vqrdmulhq_n_s16(v1629, 10045);
+    int16x8_t v1630 = vaddq_s16(v1630_tmp, v1629);
+    int16x8_t v1631 = vaddq_s16(v1628, v1630);
+    int16x8_t v1632 = vsubq_s16(v338, v345);
+    int16x8_t v1633 = vsubq_s16(v348, v353);
+    int16x8_t v1634_tmp = vqrdmulhq_n_s16(v1633, 10045);
+    int16x8_t v1634 = vaddq_s16(v1634_tmp, v1633);
+    int16x8_t v1635 = vaddq_s16(v1632, v1634);
+    int16x8_t v1636 = vqrdmulhq_n_s16(v1635, 19705);
+    int16x8_t v1637 = vaddq_s16(v1631, v1636);
+    int16x8_t v1638 = vsubq_s16(v362, v369);
+    int16x8_t v1639 = vsubq_s16(v378, v386);
+    int16x8_t v1640_tmp = vqrdmulhq_n_s16(v1639, 10045);
+    int16x8_t v1640 = vaddq_s16(v1640_tmp, v1639);
+    int16x8_t v1641 = vaddq_s16(v1638, v1640);
+    int16x8_t v1642 = vsubq_s16(v391, v394);
+    int16x8_t v1643 = vsubq_s16(v397, v403);
+    int16x8_t v1644_tmp = vqrdmulhq_n_s16(v1643, 10045);
+    int16x8_t v1644 = vaddq_s16(v1644_tmp, v1643);
+    int16x8_t v1645 = vaddq_s16(v1642, v1644);
+    int16x8_t v1646 = vqrdmulhq_n_s16(v1645, 19705);
+    int16x8_t v1647 = vaddq_s16(v1641, v1646);
+    int16x8_t v1648 = vqrdmulhq_n_s16(v1647, 17121);
+    int16x8_t v1649 = vaddq_s16(v1637, v1648);
+    int16x8_t v1650 = vsubq_s16(v414, v421);
+    int16x8_t v1651 = vsubq_s16(v430, v438);
+    int16x8_t v1652_tmp = vqrdmulhq_n_s16(v1651, 10045);
+    int16x8_t v1652 = vaddq_s16(v1652_tmp, v1651);
+    int16x8_t v1653 = vaddq_s16(v1650, v1652);
+    int16x8_t v1654 = vsubq_s16(v449, v464);
+    int16x8_t v1655 = vsubq_s16(v467, v476);
+    int16x8_t v1656_tmp = vqrdmulhq_n_s16(v1655, 10045);
+    int16x8_t v1656 = vaddq_s16(v1656_tmp, v1655);
+    int16x8_t v1657 = vaddq_s16(v1654, v1656);
+    int16x8_t v1658 = vqrdmulhq_n_s16(v1657, 19705);
+    int16x8_t v1659 = vaddq_s16(v1653, v1658);
+    int16x8_t v1660 = vsubq_s16(v483, v486);
+    int16x8_t v1661 = vsubq_s16(v491, v495);
+    int16x8_t v1662_tmp = vqrdmulhq_n_s16(v1661, 10045);
+    int16x8_t v1662 = vaddq_s16(v1662_tmp, v1661);
+    int16x8_t v1663 = vaddq_s16(v1660, v1662);
+    int16x8_t v1664 = vsubq_s16(v500, v503);
+    int16x8_t v1665 = vsubq_s16(v506, v513);
+    int16x8_t v1666_tmp = vqrdmulhq_n_s16(v1665, 10045);
+    int16x8_t v1666 = vaddq_s16(v1666_tmp, v1665);
+    int16x8_t v1667 = vaddq_s16(v1664, v1666);
+    int16x8_t v1668 = vqrdmulhq_n_s16(v1667, 19705);
+    int16x8_t v1669 = vaddq_s16(v1663, v1668);
+    int16x8_t v1670 = vqrdmulhq_n_s16(v1669, 17121);
+    int16x8_t v1671 = vaddq_s16(v1659, v1670);
+    int16x8_t v1672 = vqrdmulhq_n_s16(v1671, 16563);
+    int16x8_t v1673 = vaddq_s16(v1649, v1672);
+    int16x8_t v1674 = vsubq_s16(v524, v527);
+    int16x8_t v1675 = vsubq_s16(v532, v536);
+    int16x8_t v1676_tmp = vqrdmulhq_n_s16(v1675, 10045);
+    int16x8_t v1676 = vaddq_s16(v1676_tmp, v1675);
+    int16x8_t v1677 = vaddq_s16(v1674, v1676);
+    int16x8_t v1678 = vsubq_s16(v543, v550);
+    int16x8_t v1679 = vsubq_s16(v553, v558);
+    int16x8_t v1680_tmp = vqrdmulhq_n_s16(v1679, 10045);
+    int16x8_t v1680 = vaddq_s16(v1680_tmp, v1679);
+    int16x8_t v1681 = vaddq_s16(v1678, v1680);
+    int16x8_t v1682 = vqrdmulhq_n_s16(v1681, 19705);
+    int16x8_t v1683 = vaddq_s16(v1677, v1682);
+    int16x8_t v1684 = vsubq_s16(v567, v574);
+    int16x8_t v1685 = vsubq_s16(v583, v591);
+    int16x8_t v1686_tmp = vqrdmulhq_n_s16(v1685, 10045);
+    int16x8_t v1686 = vaddq_s16(v1686_tmp, v1685);
+    int16x8_t v1687 = vaddq_s16(v1684, v1686);
+    int16x8_t v1688 = vsubq_s16(v596, v599);
+    int16x8_t v1689 = vsubq_s16(v602, v608);
+    int16x8_t v1690_tmp = vqrdmulhq_n_s16(v1689, 10045);
+    int16x8_t v1690 = vaddq_s16(v1690_tmp, v1689);
+    int16x8_t v1691 = vaddq_s16(v1688, v1690);
+    int16x8_t v1692 = vqrdmulhq_n_s16(v1691, 19705);
+    int16x8_t v1693 = vaddq_s16(v1687, v1692);
+    int16x8_t v1694 = vqrdmulhq_n_s16(v1693, 17121);
+    int16x8_t v1695 = vaddq_s16(v1683, v1694);
+    int16x8_t v1696 = vsubq_s16(v617, v620);
+    int16x8_t v1697 = vsubq_s16(v625, v629);
+    int16x8_t v1698_tmp = vqrdmulhq_n_s16(v1697, 10045);
+    int16x8_t v1698 = vaddq_s16(v1698_tmp, v1697);
+    int16x8_t v1699 = vaddq_s16(v1696, v1698);
+    int16x8_t v1700 = vsubq_s16(v636, v643);
+    int16x8_t v1701 = vsubq_s16(v646, v651);
+    int16x8_t v1702_tmp = vqrdmulhq_n_s16(v1701, 10045);
+    int16x8_t v1702 = vaddq_s16(v1702_tmp, v1701);
+    int16x8_t v1703 = vaddq_s16(v1700, v1702);
+    int16x8_t v1704 = vqrdmulhq_n_s16(v1703, 19705);
+    int16x8_t v1705 = vaddq_s16(v1699, v1704);
+    int16x8_t v1706 = vsubq_s16(v658, v661);
+    int16x8_t v1707 = vsubq_s16(v666, v670);
+    int16x8_t v1708_tmp = vqrdmulhq_n_s16(v1707, 10045);
+    int16x8_t v1708 = vaddq_s16(v1708_tmp, v1707);
+    int16x8_t v1709 = vaddq_s16(v1706, v1708);
+    int16x8_t v1710 = vsubq_s16(v675, v678);
+    int16x8_t v1711 = vsubq_s16(v681, v688);
+    int16x8_t v1712_tmp = vqrdmulhq_n_s16(v1711, 10045);
+    int16x8_t v1712 = vaddq_s16(v1712_tmp, v1711);
+    int16x8_t v1713 = vaddq_s16(v1710, v1712);
+    int16x8_t v1714 = vqrdmulhq_n_s16(v1713, 19705);
+    int16x8_t v1715 = vaddq_s16(v1709, v1714);
+    int16x8_t v1716 = vqrdmulhq_n_s16(v1715, 17121);
+    int16x8_t v1717 = vaddq_s16(v1705, v1716);
+    int16x8_t v1718 = vqrdmulhq_n_s16(v1717, 16563);
+    int16x8_t v1719 = vaddq_s16(v1695, v1718);
+    int16x8_t v1720 = vqrdmulhq_n_s16(v1719, 16429);
+    int16x8_t v1721 = vaddq_s16(v1673, v1720);
+    int16x8_t v1722 = vqrdmulhq_n_s16(v1721, 16395);
+    int16x8_t v1723 = vaddq_s16(v1627, v1722);
+    int16x8_t v1724 = vsubq_s16(v703, v706);
+    int16x8_t v1725 = vsubq_s16(v711, v715);
+    int16x8_t v1726_tmp = vqrdmulhq_n_s16(v1725, 10045);
+    int16x8_t v1726 = vaddq_s16(v1726_tmp, v1725);
+    int16x8_t v1727 = vaddq_s16(v1724, v1726);
+    int16x8_t v1728 = vsubq_s16(v722, v729);
+    int16x8_t v1729 = vsubq_s16(v732, v737);
+    int16x8_t v1730_tmp = vqrdmulhq_n_s16(v1729, 10045);
+    int16x8_t v1730 = vaddq_s16(v1730_tmp, v1729);
+    int16x8_t v1731 = vaddq_s16(v1728, v1730);
+    int16x8_t v1732 = vqrdmulhq_n_s16(v1731, 19705);
+    int16x8_t v1733 = vaddq_s16(v1727, v1732);
+    int16x8_t v1734 = vsubq_s16(v746, v753);
+    int16x8_t v1735 = vsubq_s16(v762, v770);
+    int16x8_t v1736_tmp = vqrdmulhq_n_s16(v1735, 10045);
+    int16x8_t v1736 = vaddq_s16(v1736_tmp, v1735);
+    int16x8_t v1737 = vaddq_s16(v1734, v1736);
+    int16x8_t v1738 = vsubq_s16(v775, v778);
+    int16x8_t v1739 = vsubq_s16(v781, v787);
+    int16x8_t v1740_tmp = vqrdmulhq_n_s16(v1739, 10045);
+    int16x8_t v1740 = vaddq_s16(v1740_tmp, v1739);
+    int16x8_t v1741 = vaddq_s16(v1738, v1740);
+    int16x8_t v1742 = vqrdmulhq_n_s16(v1741, 19705);
+    int16x8_t v1743 = vaddq_s16(v1737, v1742);
+    int16x8_t v1744 = vqrdmulhq_n_s16(v1743, 17121);
+    int16x8_t v1745 = vaddq_s16(v1733, v1744);
+    int16x8_t v1746 = vsubq_s16(v798, v805);
+    int16x8_t v1747 = vsubq_s16(v814, v822);
+    int16x8_t v1748_tmp = vqrdmulhq_n_s16(v1747, 10045);
+    int16x8_t v1748 = vaddq_s16(v1748_tmp, v1747);
+    int16x8_t v1749 = vaddq_s16(v1746, v1748);
+    int16x8_t v1750 = vsubq_s16(v833, v848);
+    int16x8_t v1751 = vsubq_s16(v851, v860);
+    int16x8_t v1752_tmp = vqrdmulhq_n_s16(v1751, 10045);
+    int16x8_t v1752 = vaddq_s16(v1752_tmp, v1751);
+    int16x8_t v1753 = vaddq_s16(v1750, v1752);
+    int16x8_t v1754 = vqrdmulhq_n_s16(v1753, 19705);
+    int16x8_t v1755 = vaddq_s16(v1749, v1754);
+    int16x8_t v1756 = vsubq_s16(v867, v870);
+    int16x8_t v1757 = vsubq_s16(v875, v879);
+    int16x8_t v1758_tmp = vqrdmulhq_n_s16(v1757, 10045);
+    int16x8_t v1758 = vaddq_s16(v1758_tmp, v1757);
+    int16x8_t v1759 = vaddq_s16(v1756, v1758);
+    int16x8_t v1760 = vsubq_s16(v884, v887);
+    int16x8_t v1761 = vsubq_s16(v890, v897);
+    int16x8_t v1762_tmp = vqrdmulhq_n_s16(v1761, 10045);
+    int16x8_t v1762 = vaddq_s16(v1762_tmp, v1761);
+    int16x8_t v1763 = vaddq_s16(v1760, v1762);
+    int16x8_t v1764 = vqrdmulhq_n_s16(v1763, 19705);
+    int16x8_t v1765 = vaddq_s16(v1759, v1764);
+    int16x8_t v1766 = vqrdmulhq_n_s16(v1765, 17121);
+    int16x8_t v1767 = vaddq_s16(v1755, v1766);
+    int16x8_t v1768 = vqrdmulhq_n_s16(v1767, 16563);
+    int16x8_t v1769 = vaddq_s16(v1745, v1768);
+    int16x8_t v1770 = vsubq_s16(v910, v917);
+    int16x8_t v1771 = vsubq_s16(v926, v934);
+    int16x8_t v1772_tmp = vqrdmulhq_n_s16(v1771, 10045);
+    int16x8_t v1772 = vaddq_s16(v1772_tmp, v1771);
+    int16x8_t v1773 = vaddq_s16(v1770, v1772);
+    int16x8_t v1774 = vsubq_s16(v945, v960);
+    int16x8_t v1775 = vsubq_s16(v963, v972);
+    int16x8_t v1776_tmp = vqrdmulhq_n_s16(v1775, 10045);
+    int16x8_t v1776 = vaddq_s16(v1776_tmp, v1775);
+    int16x8_t v1777 = vaddq_s16(v1774, v1776);
+    int16x8_t v1778 = vqrdmulhq_n_s16(v1777, 19705);
+    int16x8_t v1779 = vaddq_s16(v1773, v1778);
+    int16x8_t v1780 = vsubq_s16(v985, v1000);
+    int16x8_t v1781 = vsubq_s16(v1017, v1033);
+    int16x8_t v1782_tmp = vqrdmulhq_n_s16(v1781, 10045);
+    int16x8_t v1782 = vaddq_s16(v1782_tmp, v1781);
+    int16x8_t v1783 = vaddq_s16(v1780, v1782);
+    int16x8_t v1784 = vsubq_s16(v1038, v1041);
+    int16x8_t v1785 = vsubq_s16(v1044, v1054);
+    int16x8_t v1786_tmp = vqrdmulhq_n_s16(v1785, 10045);
+    int16x8_t v1786 = vaddq_s16(v1786_tmp, v1785);
+    int16x8_t v1787 = vaddq_s16(v1784, v1786);
+    int16x8_t v1788 = vqrdmulhq_n_s16(v1787, 19705);
+    int16x8_t v1789 = vaddq_s16(v1783, v1788);
+    int16x8_t v1790 = vqrdmulhq_n_s16(v1789, 17121);
+    int16x8_t v1791 = vaddq_s16(v1779, v1790);
+    int16x8_t v1792 = vsubq_s16(v1063, v1066);
+    int16x8_t v1793 = vsubq_s16(v1071, v1075);
+    int16x8_t v1794_tmp = vqrdmulhq_n_s16(v1793, 10045);
+    int16x8_t v1794 = vaddq_s16(v1794_tmp, v1793);
+    int16x8_t v1795 = vaddq_s16(v1792, v1794);
+    int16x8_t v1796 = vsubq_s16(v1082, v1089);
+    int16x8_t v1797 = vsubq_s16(v1092, v1097);
+    int16x8_t v1798_tmp = vqrdmulhq_n_s16(v1797, 10045);
+    int16x8_t v1798 = vaddq_s16(v1798_tmp, v1797);
+    int16x8_t v1799 = vaddq_s16(v1796, v1798);
+    int16x8_t v1800 = vqrdmulhq_n_s16(v1799, 19705);
+    int16x8_t v1801 = vaddq_s16(v1795, v1800);
+    int16x8_t v1802 = vsubq_s16(v1104, v1107);
+    int16x8_t v1803 = vsubq_s16(v1112, v1116);
+    int16x8_t v1804_tmp = vqrdmulhq_n_s16(v1803, 10045);
+    int16x8_t v1804 = vaddq_s16(v1804_tmp, v1803);
+    int16x8_t v1805 = vaddq_s16(v1802, v1804);
+    int16x8_t v1806 = vsubq_s16(v1121, v1124);
+    int16x8_t v1807 = vsubq_s16(v1127, v1135);
+    int16x8_t v1808_tmp = vqrdmulhq_n_s16(v1807, 10045);
+    int16x8_t v1808 = vaddq_s16(v1808_tmp, v1807);
+    int16x8_t v1809 = vaddq_s16(v1806, v1808);
+    int16x8_t v1810 = vqrdmulhq_n_s16(v1809, 19705);
+    int16x8_t v1811 = vaddq_s16(v1805, v1810);
+    int16x8_t v1812 = vqrdmulhq_n_s16(v1811, 17121);
+    int16x8_t v1813 = vaddq_s16(v1801, v1812);
+    int16x8_t v1814 = vqrdmulhq_n_s16(v1813, 16563);
+    int16x8_t v1815 = vaddq_s16(v1791, v1814);
+    int16x8_t v1816 = vqrdmulhq_n_s16(v1815, 16429);
+    int16x8_t v1817 = vaddq_s16(v1769, v1816);
+    int16x8_t v1818 = vsubq_s16(v1148, v1151);
+    int16x8_t v1819 = vsubq_s16(v1156, v1160);
+    int16x8_t v1820_tmp = vqrdmulhq_n_s16(v1819, 10045);
+    int16x8_t v1820 = vaddq_s16(v1820_tmp, v1819);
+    int16x8_t v1821 = vaddq_s16(v1818, v1820);
+    int16x8_t v1822 = vsubq_s16(v1167, v1174);
+    int16x8_t v1823 = vsubq_s16(v1177, v1182);
+    int16x8_t v1824_tmp = vqrdmulhq_n_s16(v1823, 10045);
+    int16x8_t v1824 = vaddq_s16(v1824_tmp, v1823);
+    int16x8_t v1825 = vaddq_s16(v1822, v1824);
+    int16x8_t v1826 = vqrdmulhq_n_s16(v1825, 19705);
+    int16x8_t v1827 = vaddq_s16(v1821, v1826);
+    int16x8_t v1828 = vsubq_s16(v1191, v1198);
+    int16x8_t v1829 = vsubq_s16(v1207, v1215);
+    int16x8_t v1830_tmp = vqrdmulhq_n_s16(v1829, 10045);
+    int16x8_t v1830 = vaddq_s16(v1830_tmp, v1829);
+    int16x8_t v1831 = vaddq_s16(v1828, v1830);
+    int16x8_t v1832 = vsubq_s16(v1220, v1223);
+    int16x8_t v1833 = vsubq_s16(v1226, v1232);
+    int16x8_t v1834_tmp = vqrdmulhq_n_s16(v1833, 10045);
+    int16x8_t v1834 = vaddq_s16(v1834_tmp, v1833);
+    int16x8_t v1835 = vaddq_s16(v1832, v1834);
+    int16x8_t v1836 = vqrdmulhq_n_s16(v1835, 19705);
+    int16x8_t v1837 = vaddq_s16(v1831, v1836);
+    int16x8_t v1838 = vqrdmulhq_n_s16(v1837, 17121);
+    int16x8_t v1839 = vaddq_s16(v1827, v1838);
+    int16x8_t v1840 = vsubq_s16(v1243, v1250);
+    int16x8_t v1841 = vsubq_s16(v1259, v1267);
+    int16x8_t v1842_tmp = vqrdmulhq_n_s16(v1841, 10045);
+    int16x8_t v1842 = vaddq_s16(v1842_tmp, v1841);
+    int16x8_t v1843 = vaddq_s16(v1840, v1842);
+    int16x8_t v1844 = vsubq_s16(v1278, v1293);
+    int16x8_t v1845 = vsubq_s16(v1296, v1305);
+    int16x8_t v1846_tmp = vqrdmulhq_n_s16(v1845, 10045);
+    int16x8_t v1846 = vaddq_s16(v1846_tmp, v1845);
+    int16x8_t v1847 = vaddq_s16(v1844, v1846);
+    int16x8_t v1848 = vqrdmulhq_n_s16(v1847, 19705);
+    int16x8_t v1849 = vaddq_s16(v1843, v1848);
+    int16x8_t v1850 = vsubq_s16(v1312, v1315);
+    int16x8_t v1851 = vsubq_s16(v1320, v1324);
+    int16x8_t v1852_tmp = vqrdmulhq_n_s16(v1851, 10045);
+    int16x8_t v1852 = vaddq_s16(v1852_tmp, v1851);
+    int16x8_t v1853 = vaddq_s16(v1850, v1852);
+    int16x8_t v1854 = vsubq_s16(v1329, v1332);
+    int16x8_t v1855 = vsubq_s16(v1335, v1342);
+    int16x8_t v1856_tmp = vqrdmulhq_n_s16(v1855, 10045);
+    int16x8_t v1856 = vaddq_s16(v1856_tmp, v1855);
+    int16x8_t v1857 = vaddq_s16(v1854, v1856);
+    int16x8_t v1858 = vqrdmulhq_n_s16(v1857, 19705);
+    int16x8_t v1859 = vaddq_s16(v1853, v1858);
+    int16x8_t v1860 = vqrdmulhq_n_s16(v1859, 17121);
+    int16x8_t v1861 = vaddq_s16(v1849, v1860);
+    int16x8_t v1862 = vqrdmulhq_n_s16(v1861, 16563);
+    int16x8_t v1863 = vaddq_s16(v1839, v1862);
+    int16x8_t v1864 = vsubq_s16(v1353, v1356);
+    int16x8_t v1865 = vsubq_s16(v1361, v1365);
+    int16x8_t v1866_tmp = vqrdmulhq_n_s16(v1865, 10045);
+    int16x8_t v1866 = vaddq_s16(v1866_tmp, v1865);
+    int16x8_t v1867 = vaddq_s16(v1864, v1866);
+    int16x8_t v1868 = vsubq_s16(v1372, v1379);
+    int16x8_t v1869 = vsubq_s16(v1382, v1387);
+    int16x8_t v1870_tmp = vqrdmulhq_n_s16(v1869, 10045);
+    int16x8_t v1870 = vaddq_s16(v1870_tmp, v1869);
+    int16x8_t v1871 = vaddq_s16(v1868, v1870);
+    int16x8_t v1872 = vqrdmulhq_n_s16(v1871, 19705);
+    int16x8_t v1873 = vaddq_s16(v1867, v1872);
+    int16x8_t v1874 = vsubq_s16(v1396, v1403);
+    int16x8_t v1875 = vsubq_s16(v1412, v1420);
+    int16x8_t v1876_tmp = vqrdmulhq_n_s16(v1875, 10045);
+    int16x8_t v1876 = vaddq_s16(v1876_tmp, v1875);
+    int16x8_t v1877 = vaddq_s16(v1874, v1876);
+    int16x8_t v1878 = vsubq_s16(v1425, v1428);
+    int16x8_t v1879 = vsubq_s16(v1431, v1437);
+    int16x8_t v1880_tmp = vqrdmulhq_n_s16(v1879, 10045);
+    int16x8_t v1880 = vaddq_s16(v1880_tmp, v1879);
+    int16x8_t v1881 = vaddq_s16(v1878, v1880);
+    int16x8_t v1882 = vqrdmulhq_n_s16(v1881, 19705);
+    int16x8_t v1883 = vaddq_s16(v1877, v1882);
+    int16x8_t v1884 = vqrdmulhq_n_s16(v1883, 17121);
+    int16x8_t v1885 = vaddq_s16(v1873, v1884);
+    int16x8_t v1886 = vsubq_s16(v1446, v1449);
+    int16x8_t v1887 = vsubq_s16(v1454, v1458);
+    int16x8_t v1888_tmp = vqrdmulhq_n_s16(v1887, 10045);
+    int16x8_t v1888 = vaddq_s16(v1888_tmp, v1887);
+    int16x8_t v1889 = vaddq_s16(v1886, v1888);
+    int16x8_t v1890 = vsubq_s16(v1465, v1472);
+    int16x8_t v1891 = vsubq_s16(v1475, v1480);
+    int16x8_t v1892_tmp = vqrdmulhq_n_s16(v1891, 10045);
+    int16x8_t v1892 = vaddq_s16(v1892_tmp, v1891);
+    int16x8_t v1893 = vaddq_s16(v1890, v1892);
+    int16x8_t v1894 = vqrdmulhq_n_s16(v1893, 19705);
+    int16x8_t v1895 = vaddq_s16(v1889, v1894);
+    int16x8_t v1896 = vsubq_s16(v1487, v1490);
+    int16x8_t v1897 = vsubq_s16(v1495, v1499);
+    int16x8_t v1898_tmp = vqrdmulhq_n_s16(v1897, 10045);
+    int16x8_t v1898 = vaddq_s16(v1898_tmp, v1897);
+    int16x8_t v1899 = vaddq_s16(v1896, v1898);
+    int16x8_t v1900 = vsubq_s16(v1504, v1507);
+    int16x8_t v1901 = vsubq_s16(v1510, v1518);
+    int16x8_t v1902_tmp = vqrdmulhq_n_s16(v1901, 10045);
+    int16x8_t v1902 = vaddq_s16(v1902_tmp, v1901);
+    int16x8_t v1903 = vaddq_s16(v1900, v1902);
+    int16x8_t v1904 = vqrdmulhq_n_s16(v1903, 19705);
+    int16x8_t v1905 = vaddq_s16(v1899, v1904);
+    int16x8_t v1906 = vqrdmulhq_n_s16(v1905, 17121);
+    int16x8_t v1907 = vaddq_s16(v1895, v1906);
+    int16x8_t v1908 = vqrdmulhq_n_s16(v1907, 16563);
+    int16x8_t v1909 = vaddq_s16(v1885, v1908);
+    int16x8_t v1910 = vqrdmulhq_n_s16(v1909, 16429);
+    int16x8_t v1911 = vaddq_s16(v1863, v1910);
+    int16x8_t v1912 = vqrdmulhq_n_s16(v1911, 16395);
+    int16x8_t v1913 = vaddq_s16(v1817, v1912);
+    int16x8_t v1914 = vqrdmulhq_n_s16(v1913, 16387);
+    int16x8_t v1915 = vaddq_s16(v1723, v1914);
+    int16x8_t v1916 = vsubq_s16(v1534, v1536);
+    int16x8_t v1917 = vsubq_s16(v1538, v1540);
+    int16x8_t v1918 = vqrdmulhq_n_s16(v1917, 29490);
+    int16x8_t v1919 = vaddq_s16(v1916, v1918);
+    int16x8_t v1920 = vsubq_s16(v1544, v1546);
+    int16x8_t v1921 = vsubq_s16(v1548, v1550);
+    int16x8_t v1922 = vqrdmulhq_n_s16(v1921, 29490);
+    int16x8_t v1923 = vaddq_s16(v1920, v1922);
+    int16x8_t v1924 = vqrdmulhq_n_s16(v1923, 18578);
+    int16x8_t v1925 = vaddq_s16(v1919, v1924);
+    int16x8_t v1926 = vsubq_s16(v1556, v1558);
+    int16x8_t v1927 = vsubq_s16(v1560, v1562);
+    int16x8_t v1928 = vqrdmulhq_n_s16(v1927, 29490);
+    int16x8_t v1929 = vaddq_s16(v1926, v1928);
+    int16x8_t v1930 = vsubq_s16(v1566, v1568);
+    int16x8_t v1931 = vsubq_s16(v1570, v1572);
+    int16x8_t v1932 = vqrdmulhq_n_s16(v1931, 29490);
+    int16x8_t v1933 = vaddq_s16(v1930, v1932);
+    int16x8_t v1934 = vqrdmulhq_n_s16(v1933, 18578);
+    int16x8_t v1935 = vaddq_s16(v1929, v1934);
+    int16x8_t v1936 = vqrdmulhq_n_s16(v1935, 16890);
+    int16x8_t v1937 = vaddq_s16(v1925, v1936);
+    int16x8_t v1938 = vsubq_s16(v1580, v1582);
+    int16x8_t v1939 = vsubq_s16(v1584, v1586);
+    int16x8_t v1940 = vqrdmulhq_n_s16(v1939, 29490);
+    int16x8_t v1941 = vaddq_s16(v1938, v1940);
+    int16x8_t v1942 = vsubq_s16(v1590, v1592);
+    int16x8_t v1943 = vsubq_s16(v1594, v1596);
+    int16x8_t v1944 = vqrdmulhq_n_s16(v1943, 29490);
+    int16x8_t v1945 = vaddq_s16(v1942, v1944);
+    int16x8_t v1946 = vqrdmulhq_n_s16(v1945, 18578);
+    int16x8_t v1947 = vaddq_s16(v1941, v1946);
+    int16x8_t v1948 = vsubq_s16(v1602, v1604);
+    int16x8_t v1949 = vsubq_s16(v1606, v1608);
+    int16x8_t v1950 = vqrdmulhq_n_s16(v1949, 29490);
+    int16x8_t v1951 = vaddq_s16(v1948, v1950);
+    int16x8_t v1952 = vsubq_s16(v1612, v1614);
+    int16x8_t v1953 = vsubq_s16(v1616, v1618);
+    int16x8_t v1954 = vqrdmulhq_n_s16(v1953, 29490);
+    int16x8_t v1955 = vaddq_s16(v1952, v1954);
+    int16x8_t v1956 = vqrdmulhq_n_s16(v1955, 18578);
+    int16x8_t v1957 = vaddq_s16(v1951, v1956);
+    int16x8_t v1958 = vqrdmulhq_n_s16(v1957, 16890);
+    int16x8_t v1959 = vaddq_s16(v1947, v1958);
+    int16x8_t v1960 = vqrdmulhq_n_s16(v1959, 16508);
+    int16x8_t v1961 = vaddq_s16(v1937, v1960);
+    int16x8_t v1962 = vsubq_s16(v1628, v1630);
+    int16x8_t v1963 = vsubq_s16(v1632, v1634);
+    int16x8_t v1964 = vqrdmulhq_n_s16(v1963, 29490);
+    int16x8_t v1965 = vaddq_s16(v1962, v1964);
+    int16x8_t v1966 = vsubq_s16(v1638, v1640);
+    int16x8_t v1967 = vsubq_s16(v1642, v1644);
+    int16x8_t v1968 = vqrdmulhq_n_s16(v1967, 29490);
+    int16x8_t v1969 = vaddq_s16(v1966, v1968);
+    int16x8_t v1970 = vqrdmulhq_n_s16(v1969, 18578);
+    int16x8_t v1971 = vaddq_s16(v1965, v1970);
+    int16x8_t v1972 = vsubq_s16(v1650, v1652);
+    int16x8_t v1973 = vsubq_s16(v1654, v1656);
+    int16x8_t v1974 = vqrdmulhq_n_s16(v1973, 29490);
+    int16x8_t v1975 = vaddq_s16(v1972, v1974);
+    int16x8_t v1976 = vsubq_s16(v1660, v1662);
+    int16x8_t v1977 = vsubq_s16(v1664, v1666);
+    int16x8_t v1978 = vqrdmulhq_n_s16(v1977, 29490);
+    int16x8_t v1979 = vaddq_s16(v1976, v1978);
+    int16x8_t v1980 = vqrdmulhq_n_s16(v1979, 18578);
+    int16x8_t v1981 = vaddq_s16(v1975, v1980);
+    int16x8_t v1982 = vqrdmulhq_n_s16(v1981, 16890);
+    int16x8_t v1983 = vaddq_s16(v1971, v1982);
+    int16x8_t v1984 = vsubq_s16(v1674, v1676);
+    int16x8_t v1985 = vsubq_s16(v1678, v1680);
+    int16x8_t v1986 = vqrdmulhq_n_s16(v1985, 29490);
+    int16x8_t v1987 = vaddq_s16(v1984, v1986);
+    int16x8_t v1988 = vsubq_s16(v1684, v1686);
+    int16x8_t v1989 = vsubq_s16(v1688, v1690);
+    int16x8_t v1990 = vqrdmulhq_n_s16(v1989, 29490);
+    int16x8_t v1991 = vaddq_s16(v1988, v1990);
+    int16x8_t v1992 = vqrdmulhq_n_s16(v1991, 18578);
+    int16x8_t v1993 = vaddq_s16(v1987, v1992);
+    int16x8_t v1994 = vsubq_s16(v1696, v1698);
+    int16x8_t v1995 = vsubq_s16(v1700, v1702);
+    int16x8_t v1996 = vqrdmulhq_n_s16(v1995, 29490);
+    int16x8_t v1997 = vaddq_s16(v1994, v1996);
+    int16x8_t v1998 = vsubq_s16(v1706, v1708);
+    int16x8_t v1999 = vsubq_s16(v1710, v1712);
+    int16x8_t v2000 = vqrdmulhq_n_s16(v1999, 29490);
+    int16x8_t v2001 = vaddq_s16(v1998, v2000);
+    int16x8_t v2002 = vqrdmulhq_n_s16(v2001, 18578);
+    int16x8_t v2003 = vaddq_s16(v1997, v2002);
+    int16x8_t v2004 = vqrdmulhq_n_s16(v2003, 16890);
+    int16x8_t v2005 = vaddq_s16(v1993, v2004);
+    int16x8_t v2006 = vqrdmulhq_n_s16(v2005, 16508);
+    int16x8_t v2007 = vaddq_s16(v1983, v2006);
+    int16x8_t v2008 = vqrdmulhq_n_s16(v2007, 16415);
+    int16x8_t v2009 = vaddq_s16(v1961, v2008);
+    int16x8_t v2010 = vsubq_s16(v1724, v1726);
+    int16x8_t v2011 = vsubq_s16(v1728, v1730);
+    int16x8_t v2012 = vqrdmulhq_n_s16(v2011, 29490);
+    int16x8_t v2013 = vaddq_s16(v2010, v2012);
+    int16x8_t v2014 = vsubq_s16(v1734, v1736);
+    int16x8_t v2015 = vsubq_s16(v1738, v1740);
+    int16x8_t v2016 = vqrdmulhq_n_s16(v2015, 29490);
+    int16x8_t v2017 = vaddq_s16(v2014, v2016);
+    int16x8_t v2018 = vqrdmulhq_n_s16(v2017, 18578);
+    int16x8_t v2019 = vaddq_s16(v2013, v2018);
+    int16x8_t v2020 = vsubq_s16(v1746, v1748);
+    int16x8_t v2021 = vsubq_s16(v1750, v1752);
+    int16x8_t v2022 = vqrdmulhq_n_s16(v2021, 29490);
+    int16x8_t v2023 = vaddq_s16(v2020, v2022);
+    int16x8_t v2024 = vsubq_s16(v1756, v1758);
+    int16x8_t v2025 = vsubq_s16(v1760, v1762);
+    int16x8_t v2026 = vqrdmulhq_n_s16(v2025, 29490);
+    int16x8_t v2027 = vaddq_s16(v2024, v2026);
+    int16x8_t v2028 = vqrdmulhq_n_s16(v2027, 18578);
+    int16x8_t v2029 = vaddq_s16(v2023, v2028);
+    int16x8_t v2030 = vqrdmulhq_n_s16(v2029, 16890);
+    int16x8_t v2031 = vaddq_s16(v2019, v2030);
+    int16x8_t v2032 = vsubq_s16(v1770, v1772);
+    int16x8_t v2033 = vsubq_s16(v1774, v1776);
+    int16x8_t v2034 = vqrdmulhq_n_s16(v2033, 29490);
+    int16x8_t v2035 = vaddq_s16(v2032, v2034);
+    int16x8_t v2036 = vsubq_s16(v1780, v1782);
+    int16x8_t v2037 = vsubq_s16(v1784, v1786);
+    int16x8_t v2038 = vqrdmulhq_n_s16(v2037, 29490);
+    int16x8_t v2039 = vaddq_s16(v2036, v2038);
+    int16x8_t v2040 = vqrdmulhq_n_s16(v2039, 18578);
+    int16x8_t v2041 = vaddq_s16(v2035, v2040);
+    int16x8_t v2042 = vsubq_s16(v1792, v1794);
+    int16x8_t v2043 = vsubq_s16(v1796, v1798);
+    int16x8_t v2044 = vqrdmulhq_n_s16(v2043, 29490);
+    int16x8_t v2045 = vaddq_s16(v2042, v2044);
+    int16x8_t v2046 = vsubq_s16(v1802, v1804);
+    int16x8_t v2047 = vsubq_s16(v1806, v1808);
+    int16x8_t v2048 = vqrdmulhq_n_s16(v2047, 29490);
+    int16x8_t v2049 = vaddq_s16(v2046, v2048);
+    int16x8_t v2050 = vqrdmulhq_n_s16(v2049, 18578);
+    int16x8_t v2051 = vaddq_s16(v2045, v2050);
+    int16x8_t v2052 = vqrdmulhq_n_s16(v2051, 16890);
+    int16x8_t v2053 = vaddq_s16(v2041, v2052);
+    int16x8_t v2054 = vqrdmulhq_n_s16(v2053, 16508);
+    int16x8_t v2055 = vaddq_s16(v2031, v2054);
+    int16x8_t v2056 = vsubq_s16(v1818, v1820);
+    int16x8_t v2057 = vsubq_s16(v1822, v1824);
+    int16x8_t v2058 = vqrdmulhq_n_s16(v2057, 29490);
+    int16x8_t v2059 = vaddq_s16(v2056, v2058);
+    int16x8_t v2060 = vsubq_s16(v1828, v1830);
+    int16x8_t v2061 = vsubq_s16(v1832, v1834);
+    int16x8_t v2062 = vqrdmulhq_n_s16(v2061, 29490);
+    int16x8_t v2063 = vaddq_s16(v2060, v2062);
+    int16x8_t v2064 = vqrdmulhq_n_s16(v2063, 18578);
+    int16x8_t v2065 = vaddq_s16(v2059, v2064);
+    int16x8_t v2066 = vsubq_s16(v1840, v1842);
+    int16x8_t v2067 = vsubq_s16(v1844, v1846);
+    int16x8_t v2068 = vqrdmulhq_n_s16(v2067, 29490);
+    int16x8_t v2069 = vaddq_s16(v2066, v2068);
+    int16x8_t v2070 = vsubq_s16(v1850, v1852);
+    int16x8_t v2071 = vqrdmulhq_n_s16(v2070, 18578);
+    int16x8_t v2072 = vsubq_s16(v1854, v1856);
+    int16x8_t v2073 = vqrdmulhq_n_s16(v2072, 16719);
+    int16x8_t v2074 = vaddq_s16(v2071, v2073);
+    int16x8_t v2075 = vaddq_s16(v2069, v2074);
+    int16x8_t v2076 = vqrdmulhq_n_s16(v2075, 16890);
+    int16x8_t v2077 = vaddq_s16(v2065, v2076);
+    int16x8_t v2078 = vsubq_s16(v1864, v1866);
+    int16x8_t v2079 = vsubq_s16(v1868, v1870);
+    int16x8_t v2080 = vqrdmulhq_n_s16(v2079, 29490);
+    int16x8_t v2081 = vaddq_s16(v2078, v2080);
+    int16x8_t v2082 = vsubq_s16(v1874, v1876);
+    int16x8_t v2083 = vsubq_s16(v1878, v1880);
+    int16x8_t v2084 = vqrdmulhq_n_s16(v2083, 29490);
+    int16x8_t v2085 = vaddq_s16(v2082, v2084);
+    int16x8_t v2086 = vqrdmulhq_n_s16(v2085, 18578);
+    int16x8_t v2087 = vaddq_s16(v2081, v2086);
+    int16x8_t v2088 = vsubq_s16(v1886, v1888);
+    int16x8_t v2089 = vsubq_s16(v1890, v1892);
+    int16x8_t v2090 = vqrdmulhq_n_s16(v2089, 29490);
+    int16x8_t v2091 = vaddq_s16(v2088, v2090);
+    int16x8_t v2092 = vsubq_s16(v1896, v1898);
+    int16x8_t v2093 = vsubq_s16(v1900, v1902);
+    int16x8_t v2094 = vqrdmulhq_n_s16(v2093, 29490);
+    int16x8_t v2095 = vaddq_s16(v2092, v2094);
+    int16x8_t v2096 = vqrdmulhq_n_s16(v2095, 18578);
+    int16x8_t v2097 = vaddq_s16(v2091, v2096);
+    int16x8_t v2098 = vqrdmulhq_n_s16(v2097, 16890);
+    int16x8_t v2099 = vaddq_s16(v2087, v2098);
+    int16x8_t v2100 = vqrdmulhq_n_s16(v2099, 16508);
+    int16x8_t v2101 = vaddq_s16(v2077, v2100);
+    int16x8_t v2102 = vqrdmulhq_n_s16(v2101, 16415);
+    int16x8_t v2103 = vaddq_s16(v2055, v2102);
+    int16x8_t v2104 = vqrdmulhq_n_s16(v2103, 16392);
+    int16x8_t v2105 = vaddq_s16(v2009, v2104);
+    int16x8_t v2106 = vsubq_s16(v2, v8);
+    int16x8_t v2107 = vsubq_s16(v15, v22);
+    int16x8_t v2108_tmp = vqrdmulhq_n_s16(v2107, 18446);
+    int16x8_t v2108 = vmlaq_n_s16(v2108_tmp, v2107, 2);
+    int16x8_t v2109 = vaddq_s16(v2106, v2108);
+    int16x8_t v2110 = vsubq_s16(v31, v41);
+    int16x8_t v2111 = vsubq_s16(v48, v56);
+    int16x8_t v2112_tmp = vqrdmulhq_n_s16(v2111, 18446);
+    int16x8_t v2112 = vmlaq_n_s16(v2112_tmp, v2111, 2);
+    int16x8_t v2113 = vaddq_s16(v2110, v2112);
+    int16x8_t v2114 = vqrdmulhq_n_s16(v2113, 21195);
+    int16x8_t v2115 = vaddq_s16(v2109, v2114);
+    int16x8_t v2116 = vsubq_s16(v67, v77);
+    int16x8_t v2117 = vsubq_s16(v90, v99);
+    int16x8_t v2118_tmp = vqrdmulhq_n_s16(v2117, 18446);
+    int16x8_t v2118 = vmlaq_n_s16(v2118_tmp, v2117, 2);
+    int16x8_t v2119 = vaddq_s16(v2116, v2118);
+    int16x8_t v2120 = vsubq_s16(v108, v118);
+    int16x8_t v2121 = vsubq_s16(v125, v134);
+    int16x8_t v2122_tmp = vqrdmulhq_n_s16(v2121, 18446);
+    int16x8_t v2122 = vmlaq_n_s16(v2122_tmp, v2121, 2);
+    int16x8_t v2123 = vaddq_s16(v2120, v2122);
+    int16x8_t v2124 = vqrdmulhq_n_s16(v2123, 21195);
+    int16x8_t v2125 = vaddq_s16(v2119, v2124);
+    int16x8_t v2126 = vqrdmulhq_n_s16(v2125, 17401);
+    int16x8_t v2127 = vaddq_s16(v2115, v2126);
+    int16x8_t v2128 = vsubq_s16(v147, v157);
+    int16x8_t v2129 = vsubq_s16(v170, v179);
+    int16x8_t v2130_tmp = vqrdmulhq_n_s16(v2129, 18446);
+    int16x8_t v2130 = vmlaq_n_s16(v2130_tmp, v2129, 2);
+    int16x8_t v2131 = vaddq_s16(v2128, v2130);
+    int16x8_t v2132 = vsubq_s16(v194, v212);
+    int16x8_t v2133 = vsubq_s16(v219, v229);
+    int16x8_t v2134_tmp = vqrdmulhq_n_s16(v2133, 18446);
+    int16x8_t v2134 = vmlaq_n_s16(v2134_tmp, v2133, 2);
+    int16x8_t v2135 = vaddq_s16(v2132, v2134);
+    int16x8_t v2136 = vqrdmulhq_n_s16(v2135, 21195);
+    int16x8_t v2137 = vaddq_s16(v2131, v2136);
+    int16x8_t v2138 = vsubq_s16(v240, v250);
+    int16x8_t v2139 = vsubq_s16(v263, v272);
+    int16x8_t v2140_tmp = vqrdmulhq_n_s16(v2139, 18446);
+    int16x8_t v2140 = vmlaq_n_s16(v2140_tmp, v2139, 2);
+    int16x8_t v2141 = vaddq_s16(v2138, v2140);
+    int16x8_t v2142 = vsubq_s16(v281, v291);
+    int16x8_t v2143 = vsubq_s16(v298, v308);
+    int16x8_t v2144_tmp = vqrdmulhq_n_s16(v2143, 18446);
+    int16x8_t v2144 = vmlaq_n_s16(v2144_tmp, v2143, 2);
+    int16x8_t v2145 = vaddq_s16(v2142, v2144);
+    int16x8_t v2146 = vqrdmulhq_n_s16(v2145, 21195);
+    int16x8_t v2147 = vaddq_s16(v2141, v2146);
+    int16x8_t v2148 = vqrdmulhq_n_s16(v2147, 17401);
+    int16x8_t v2149 = vaddq_s16(v2137, v2148);
+    int16x8_t v2150 = vqrdmulhq_n_s16(v2149, 16629);
+    int16x8_t v2151 = vaddq_s16(v2127, v2150);
+    int16x8_t v2152 = vsubq_s16(v323, v333);
+    int16x8_t v2153 = vsubq_s16(v346, v355);
+    int16x8_t v2154_tmp = vqrdmulhq_n_s16(v2153, 18446);
+    int16x8_t v2154 = vmlaq_n_s16(v2154_tmp, v2153, 2);
+    int16x8_t v2155 = vaddq_s16(v2152, v2154);
+    int16x8_t v2156 = vsubq_s16(v370, v388);
+    int16x8_t v2157 = vsubq_s16(v395, v405);
+    int16x8_t v2158_tmp = vqrdmulhq_n_s16(v2157, 18446);
+    int16x8_t v2158 = vmlaq_n_s16(v2158_tmp, v2157, 2);
+    int16x8_t v2159 = vaddq_s16(v2156, v2158);
+    int16x8_t v2160 = vqrdmulhq_n_s16(v2159, 21195);
+    int16x8_t v2161 = vaddq_s16(v2155, v2160);
+    int16x8_t v2162 = vsubq_s16(v422, v440);
+    int16x8_t v2163 = vsubq_s16(v465, v478);
+    int16x8_t v2164_tmp = vqrdmulhq_n_s16(v2163, 18446);
+    int16x8_t v2164 = vmlaq_n_s16(v2164_tmp, v2163, 2);
+    int16x8_t v2165 = vaddq_s16(v2162, v2164);
+    int16x8_t v2166 = vsubq_s16(v487, v497);
+    int16x8_t v2167 = vsubq_s16(v504, v515);
+    int16x8_t v2168_tmp = vqrdmulhq_n_s16(v2167, 18446);
+    int16x8_t v2168 = vmlaq_n_s16(v2168_tmp, v2167, 2);
+    int16x8_t v2169 = vaddq_s16(v2166, v2168);
+    int16x8_t v2170 = vqrdmulhq_n_s16(v2169, 21195);
+    int16x8_t v2171 = vaddq_s16(v2165, v2170);
+    int16x8_t v2172 = vqrdmulhq_n_s16(v2171, 17401);
+    int16x8_t v2173 = vaddq_s16(v2161, v2172);
+    int16x8_t v2174 = vsubq_s16(v528, v538);
+    int16x8_t v2175 = vsubq_s16(v551, v560);
+    int16x8_t v2176_tmp = vqrdmulhq_n_s16(v2175, 18446);
+    int16x8_t v2176 = vmlaq_n_s16(v2176_tmp, v2175, 2);
+    int16x8_t v2177 = vaddq_s16(v2174, v2176);
+    int16x8_t v2178 = vsubq_s16(v575, v593);
+    int16x8_t v2179 = vsubq_s16(v600, v610);
+    int16x8_t v2180_tmp = vqrdmulhq_n_s16(v2179, 18446);
+    int16x8_t v2180 = vmlaq_n_s16(v2180_tmp, v2179, 2);
+    int16x8_t v2181 = vaddq_s16(v2178, v2180);
+    int16x8_t v2182 = vqrdmulhq_n_s16(v2181, 21195);
+    int16x8_t v2183 = vaddq_s16(v2177, v2182);
+    int16x8_t v2184 = vsubq_s16(v621, v631);
+    int16x8_t v2185 = vsubq_s16(v644, v653);
+    int16x8_t v2186_tmp = vqrdmulhq_n_s16(v2185, 18446);
+    int16x8_t v2186 = vmlaq_n_s16(v2186_tmp, v2185, 2);
+    int16x8_t v2187 = vaddq_s16(v2184, v2186);
+    int16x8_t v2188 = vsubq_s16(v662, v672);
+    int16x8_t v2189 = vsubq_s16(v679, v690);
+    int16x8_t v2190_tmp = vqrdmulhq_n_s16(v2189, 18446);
+    int16x8_t v2190 = vmlaq_n_s16(v2190_tmp, v2189, 2);
+    int16x8_t v2191 = vaddq_s16(v2188, v2190);
+    int16x8_t v2192 = vqrdmulhq_n_s16(v2191, 21195);
+    int16x8_t v2193 = vaddq_s16(v2187, v2192);
+    int16x8_t v2194 = vqrdmulhq_n_s16(v2193, 17401);
+    int16x8_t v2195 = vaddq_s16(v2183, v2194);
+    int16x8_t v2196 = vqrdmulhq_n_s16(v2195, 16629);
+    int16x8_t v2197 = vaddq_s16(v2173, v2196);
+    int16x8_t v2198 = vqrdmulhq_n_s16(v2197, 16445);
+    int16x8_t v2199 = vaddq_s16(v2151, v2198);
+    int16x8_t v2200 = vsubq_s16(v707, v717);
+    int16x8_t v2201 = vsubq_s16(v730, v739);
+    int16x8_t v2202_tmp = vqrdmulhq_n_s16(v2201, 18446);
+    int16x8_t v2202 = vmlaq_n_s16(v2202_tmp, v2201, 2);
+    int16x8_t v2203 = vaddq_s16(v2200, v2202);
+    int16x8_t v2204 = vsubq_s16(v754, v772);
+    int16x8_t v2205 = vsubq_s16(v779, v789);
+    int16x8_t v2206_tmp = vqrdmulhq_n_s16(v2205, 18446);
+    int16x8_t v2206 = vmlaq_n_s16(v2206_tmp, v2205, 2);
+    int16x8_t v2207 = vaddq_s16(v2204, v2206);
+    int16x8_t v2208 = vqrdmulhq_n_s16(v2207, 21195);
+    int16x8_t v2209 = vaddq_s16(v2203, v2208);
+    int16x8_t v2210 = vsubq_s16(v806, v824);
+    int16x8_t v2211 = vsubq_s16(v849, v862);
+    int16x8_t v2212_tmp = vqrdmulhq_n_s16(v2211, 18446);
+    int16x8_t v2212 = vmlaq_n_s16(v2212_tmp, v2211, 2);
+    int16x8_t v2213 = vaddq_s16(v2210, v2212);
+    int16x8_t v2214 = vsubq_s16(v871, v881);
+    int16x8_t v2215 = vsubq_s16(v888, v899);
+    int16x8_t v2216_tmp = vqrdmulhq_n_s16(v2215, 18446);
+    int16x8_t v2216 = vmlaq_n_s16(v2216_tmp, v2215, 2);
+    int16x8_t v2217 = vaddq_s16(v2214, v2216);
+    int16x8_t v2218 = vqrdmulhq_n_s16(v2217, 21195);
+    int16x8_t v2219 = vaddq_s16(v2213, v2218);
+    int16x8_t v2220 = vqrdmulhq_n_s16(v2219, 17401);
+    int16x8_t v2221 = vaddq_s16(v2209, v2220);
+    int16x8_t v2222 = vsubq_s16(v918, v936);
+    int16x8_t v2223 = vsubq_s16(v961, v974);
+    int16x8_t v2224_tmp = vqrdmulhq_n_s16(v2223, 18446);
+    int16x8_t v2224 = vmlaq_n_s16(v2224_tmp, v2223, 2);
+    int16x8_t v2225 = vaddq_s16(v2222, v2224);
+    int16x8_t v2226 = vsubq_s16(v1001, v1035);
+    int16x8_t v2227 = vsubq_s16(v1042, v1056);
+    int16x8_t v2228_tmp = vqrdmulhq_n_s16(v2227, 18446);
+    int16x8_t v2228 = vmlaq_n_s16(v2228_tmp, v2227, 2);
+    int16x8_t v2229 = vaddq_s16(v2226, v2228);
+    int16x8_t v2230 = vqrdmulhq_n_s16(v2229, 21195);
+    int16x8_t v2231 = vaddq_s16(v2225, v2230);
+    int16x8_t v2232 = vsubq_s16(v1067, v1077);
+    int16x8_t v2233 = vsubq_s16(v1090, v1099);
+    int16x8_t v2234_tmp = vqrdmulhq_n_s16(v2233, 18446);
+    int16x8_t v2234 = vmlaq_n_s16(v2234_tmp, v2233, 2);
+    int16x8_t v2235 = vaddq_s16(v2232, v2234);
+    int16x8_t v2236 = vsubq_s16(v1108, v1118);
+    int16x8_t v2237 = vsubq_s16(v1125, v1137);
+    int16x8_t v2238_tmp = vqrdmulhq_n_s16(v2237, 18446);
+    int16x8_t v2238 = vmlaq_n_s16(v2238_tmp, v2237, 2);
+    int16x8_t v2239 = vaddq_s16(v2236, v2238);
+    int16x8_t v2240 = vqrdmulhq_n_s16(v2239, 21195);
+    int16x8_t v2241 = vaddq_s16(v2235, v2240);
+    int16x8_t v2242 = vqrdmulhq_n_s16(v2241, 17401);
+    int16x8_t v2243 = vaddq_s16(v2231, v2242);
+    int16x8_t v2244 = vqrdmulhq_n_s16(v2243, 16629);
+    int16x8_t v2245 = vaddq_s16(v2221, v2244);
+    int16x8_t v2246 = vsubq_s16(v1152, v1162);
+    int16x8_t v2247 = vsubq_s16(v1175, v1184);
+    int16x8_t v2248_tmp = vqrdmulhq_n_s16(v2247, 18446);
+    int16x8_t v2248 = vmlaq_n_s16(v2248_tmp, v2247, 2);
+    int16x8_t v2249 = vaddq_s16(v2246, v2248);
+    int16x8_t v2250 = vsubq_s16(v1199, v1217);
+    int16x8_t v2251 = vsubq_s16(v1224, v1234);
+    int16x8_t v2252_tmp = vqrdmulhq_n_s16(v2251, 18446);
+    int16x8_t v2252 = vmlaq_n_s16(v2252_tmp, v2251, 2);
+    int16x8_t v2253 = vaddq_s16(v2250, v2252);
+    int16x8_t v2254 = vqrdmulhq_n_s16(v2253, 21195);
+    int16x8_t v2255 = vaddq_s16(v2249, v2254);
+    int16x8_t v2256 = vsubq_s16(v1251, v1269);
+    int16x8_t v2257 = vsubq_s16(v1294, v1307);
+    int16x8_t v2258_tmp = vqrdmulhq_n_s16(v2257, 18446);
+    int16x8_t v2258 = vmlaq_n_s16(v2258_tmp, v2257, 2);
+    int16x8_t v2259 = vaddq_s16(v2256, v2258);
+    int16x8_t v2260 = vsubq_s16(v1316, v1326);
+    int16x8_t v2261 = vsubq_s16(v1333, v1344);
+    int16x8_t v2262_tmp = vqrdmulhq_n_s16(v2261, 18446);
+    int16x8_t v2262 = vmlaq_n_s16(v2262_tmp, v2261, 2);
+    int16x8_t v2263 = vaddq_s16(v2260, v2262);
+    int16x8_t v2264 = vqrdmulhq_n_s16(v2263, 21195);
+    int16x8_t v2265 = vaddq_s16(v2259, v2264);
+    int16x8_t v2266 = vqrdmulhq_n_s16(v2265, 17401);
+    int16x8_t v2267 = vaddq_s16(v2255, v2266);
+    int16x8_t v2268 = vsubq_s16(v1357, v1367);
+    int16x8_t v2269 = vsubq_s16(v1380, v1389);
+    int16x8_t v2270_tmp = vqrdmulhq_n_s16(v2269, 18446);
+    int16x8_t v2270 = vmlaq_n_s16(v2270_tmp, v2269, 2);
+    int16x8_t v2271 = vaddq_s16(v2268, v2270);
+    int16x8_t v2272 = vsubq_s16(v1404, v1422);
+    int16x8_t v2273 = vsubq_s16(v1429, v1439);
+    int16x8_t v2274_tmp = vqrdmulhq_n_s16(v2273, 18446);
+    int16x8_t v2274 = vmlaq_n_s16(v2274_tmp, v2273, 2);
+    int16x8_t v2275 = vaddq_s16(v2272, v2274);
+    int16x8_t v2276 = vqrdmulhq_n_s16(v2275, 21195);
+    int16x8_t v2277 = vaddq_s16(v2271, v2276);
+    int16x8_t v2278 = vsubq_s16(v1450, v1460);
+    int16x8_t v2279 = vsubq_s16(v1473, v1482);
+    int16x8_t v2280_tmp = vqrdmulhq_n_s16(v2279, 18446);
+    int16x8_t v2280 = vmlaq_n_s16(v2280_tmp, v2279, 2);
+    int16x8_t v2281 = vaddq_s16(v2278, v2280);
+    int16x8_t v2282 = vsubq_s16(v1491, v1501);
+    int16x8_t v2283 = vsubq_s16(v1508, v1520);
+    int16x8_t v2284_tmp = vqrdmulhq_n_s16(v2283, 18446);
+    int16x8_t v2284 = vmlaq_n_s16(v2284_tmp, v2283, 2);
+    int16x8_t v2285 = vaddq_s16(v2282, v2284);
+    int16x8_t v2286 = vqrdmulhq_n_s16(v2285, 21195);
+    int16x8_t v2287 = vaddq_s16(v2281, v2286);
+    int16x8_t v2288 = vqrdmulhq_n_s16(v2287, 17401);
+    int16x8_t v2289 = vaddq_s16(v2277, v2288);
+    int16x8_t v2290 = vqrdmulhq_n_s16(v2289, 16629);
+    int16x8_t v2291 = vaddq_s16(v2267, v2290);
+    int16x8_t v2292 = vqrdmulhq_n_s16(v2291, 16445);
+    int16x8_t v2293 = vaddq_s16(v2245, v2292);
+    int16x8_t v2294 = vqrdmulhq_n_s16(v2293, 16399);
+    int16x8_t v2295 = vaddq_s16(v2199, v2294);
+    int16x8_t v2296 = vsubq_s16(v2106, v2108);
+    int16x8_t v2297 = vsubq_s16(v2110, v2112);
+    int16x8_t v2298 = vqrdmulhq_n_s16(v2297, 25826);
+    int16x8_t v2299 = vaddq_s16(v2296, v2298);
+    int16x8_t v2300 = vsubq_s16(v2116, v2118);
+    int16x8_t v2301 = vsubq_s16(v2120, v2122);
+    int16x8_t v2302 = vqrdmulhq_n_s16(v2301, 25826);
+    int16x8_t v2303 = vaddq_s16(v2300, v2302);
+    int16x8_t v2304 = vqrdmulhq_n_s16(v2303, 18124);
+    int16x8_t v2305 = vaddq_s16(v2299, v2304);
+    int16x8_t v2306 = vsubq_s16(v2128, v2130);
+    int16x8_t v2307 = vsubq_s16(v2132, v2134);
+    int16x8_t v2308 = vqrdmulhq_n_s16(v2307, 25826);
+    int16x8_t v2309 = vaddq_s16(v2306, v2308);
+    int16x8_t v2310 = vsubq_s16(v2138, v2140);
+    int16x8_t v2311 = vsubq_s16(v2142, v2144);
+    int16x8_t v2312 = vqrdmulhq_n_s16(v2311, 25826);
+    int16x8_t v2313 = vaddq_s16(v2310, v2312);
+    int16x8_t v2314 = vqrdmulhq_n_s16(v2313, 18124);
+    int16x8_t v2315 = vaddq_s16(v2309, v2314);
+    int16x8_t v2316 = vqrdmulhq_n_s16(v2315, 16792);
+    int16x8_t v2317 = vaddq_s16(v2305, v2316);
+    int16x8_t v2318 = vsubq_s16(v2152, v2154);
+    int16x8_t v2319 = vsubq_s16(v2156, v2158);
+    int16x8_t v2320 = vqrdmulhq_n_s16(v2319, 25826);
+    int16x8_t v2321 = vaddq_s16(v2318, v2320);
+    int16x8_t v2322 = vsubq_s16(v2162, v2164);
+    int16x8_t v2323 = vsubq_s16(v2166, v2168);
+    int16x8_t v2324 = vqrdmulhq_n_s16(v2323, 25826);
+    int16x8_t v2325 = vaddq_s16(v2322, v2324);
+    int16x8_t v2326 = vqrdmulhq_n_s16(v2325, 18124);
+    int16x8_t v2327 = vaddq_s16(v2321, v2326);
+    int16x8_t v2328 = vsubq_s16(v2174, v2176);
+    int16x8_t v2329 = vsubq_s16(v2178, v2180);
+    int16x8_t v2330 = vqrdmulhq_n_s16(v2329, 25826);
+    int16x8_t v2331 = vaddq_s16(v2328, v2330);
+    int16x8_t v2332 = vsubq_s16(v2184, v2186);
+    int16x8_t v2333 = vsubq_s16(v2188, v2190);
+    int16x8_t v2334 = vqrdmulhq_n_s16(v2333, 25826);
+    int16x8_t v2335 = vaddq_s16(v2332, v2334);
+    int16x8_t v2336 = vqrdmulhq_n_s16(v2335, 18124);
+    int16x8_t v2337 = vaddq_s16(v2331, v2336);
+    int16x8_t v2338 = vqrdmulhq_n_s16(v2337, 16792);
+    int16x8_t v2339 = vaddq_s16(v2327, v2338);
+    int16x8_t v2340 = vqrdmulhq_n_s16(v2339, 16484);
+    int16x8_t v2341 = vaddq_s16(v2317, v2340);
+    int16x8_t v2342 = vsubq_s16(v2200, v2202);
+    int16x8_t v2343 = vsubq_s16(v2204, v2206);
+    int16x8_t v2344 = vqrdmulhq_n_s16(v2343, 25826);
+    int16x8_t v2345 = vaddq_s16(v2342, v2344);
+    int16x8_t v2346 = vsubq_s16(v2210, v2212);
+    int16x8_t v2347 = vsubq_s16(v2214, v2216);
+    int16x8_t v2348 = vqrdmulhq_n_s16(v2347, 25826);
+    int16x8_t v2349 = vaddq_s16(v2346, v2348);
+    int16x8_t v2350 = vqrdmulhq_n_s16(v2349, 18124);
+    int16x8_t v2351 = vaddq_s16(v2345, v2350);
+    int16x8_t v2352 = vsubq_s16(v2222, v2224);
+    int16x8_t v2353 = vsubq_s16(v2226, v2228);
+    int16x8_t v2354 = vqrdmulhq_n_s16(v2353, 25826);
+    int16x8_t v2355 = vaddq_s16(v2352, v2354);
+    int16x8_t v2356 = vsubq_s16(v2232, v2234);
+    int16x8_t v2357 = vsubq_s16(v2236, v2238);
+    int16x8_t v2358 = vqrdmulhq_n_s16(v2357, 25826);
+    int16x8_t v2359 = vaddq_s16(v2356, v2358);
+    int16x8_t v2360 = vqrdmulhq_n_s16(v2359, 18124);
+    int16x8_t v2361 = vaddq_s16(v2355, v2360);
+    int16x8_t v2362 = vqrdmulhq_n_s16(v2361, 16792);
+    int16x8_t v2363 = vaddq_s16(v2351, v2362);
+    int16x8_t v2364 = vsubq_s16(v2246, v2248);
+    int16x8_t v2365 = vsubq_s16(v2250, v2252);
+    int16x8_t v2366 = vqrdmulhq_n_s16(v2365, 25826);
+    int16x8_t v2367 = vaddq_s16(v2364, v2366);
+    int16x8_t v2368 = vsubq_s16(v2256, v2258);
+    int16x8_t v2369 = vsubq_s16(v2260, v2262);
+    int16x8_t v2370 = vqrdmulhq_n_s16(v2369, 25826);
+    int16x8_t v2371 = vaddq_s16(v2368, v2370);
+    int16x8_t v2372 = vqrdmulhq_n_s16(v2371, 18124);
+    int16x8_t v2373 = vaddq_s16(v2367, v2372);
+    int16x8_t v2374 = vsubq_s16(v2268, v2270);
+    int16x8_t v2375 = vsubq_s16(v2272, v2274);
+    int16x8_t v2376 = vqrdmulhq_n_s16(v2375, 25826);
+    int16x8_t v2377 = vaddq_s16(v2374, v2376);
+    int16x8_t v2378 = vsubq_s16(v2278, v2280);
+    int16x8_t v2379 = vsubq_s16(v2282, v2284);
+    int16x8_t v2380 = vqrdmulhq_n_s16(v2379, 25826);
+    int16x8_t v2381 = vaddq_s16(v2378, v2380);
+    int16x8_t v2382 = vqrdmulhq_n_s16(v2381, 18124);
+    int16x8_t v2383 = vaddq_s16(v2377, v2382);
+    int16x8_t v2384 = vqrdmulhq_n_s16(v2383, 16792);
+    int16x8_t v2385 = vaddq_s16(v2373, v2384);
+    int16x8_t v2386 = vqrdmulhq_n_s16(v2385, 16484);
+    int16x8_t v2387 = vaddq_s16(v2363, v2386);
+    int16x8_t v2388 = vqrdmulhq_n_s16(v2387, 16409);
+    int16x8_t v2389 = vaddq_s16(v2341, v2388);
+    int16x8_t v2390 = vsubq_s16(v1916, v1918);
+    int16x8_t v2391 = vsubq_s16(v1920, v1922);
+    int16x8_t v2392_tmp = vqrdmulhq_n_s16(v2391, 1988);
+    int16x8_t v2392 = vaddq_s16(v2392_tmp, v2391);
+    int16x8_t v2393 = vaddq_s16(v2390, v2392);
+    int16x8_t v2394 = vsubq_s16(v1926, v1928);
+    int16x8_t v2395 = vsubq_s16(v1930, v1932);
+    int16x8_t v2396_tmp = vqrdmulhq_n_s16(v2395, 1988);
+    int16x8_t v2396 = vaddq_s16(v2396_tmp, v2395);
+    int16x8_t v2397 = vaddq_s16(v2394, v2396);
+    int16x8_t v2398 = vqrdmulhq_n_s16(v2397, 19102);
+    int16x8_t v2399 = vaddq_s16(v2393, v2398);
+    int16x8_t v2400 = vsubq_s16(v1938, v1940);
+    int16x8_t v2401 = vsubq_s16(v1942, v1944);
+    int16x8_t v2402_tmp = vqrdmulhq_n_s16(v2401, 1988);
+    int16x8_t v2402 = vaddq_s16(v2402_tmp, v2401);
+    int16x8_t v2403 = vaddq_s16(v2400, v2402);
+    int16x8_t v2404 = vsubq_s16(v1948, v1950);
+    int16x8_t v2405 = vsubq_s16(v1952, v1954);
+    int16x8_t v2406_tmp = vqrdmulhq_n_s16(v2405, 1988);
+    int16x8_t v2406 = vaddq_s16(v2406_tmp, v2405);
+    int16x8_t v2407 = vaddq_s16(v2404, v2406);
+    int16x8_t v2408 = vqrdmulhq_n_s16(v2407, 19102);
+    int16x8_t v2409 = vaddq_s16(v2403, v2408);
+    int16x8_t v2410 = vqrdmulhq_n_s16(v2409, 17000);
+    int16x8_t v2411 = vaddq_s16(v2399, v2410);
+    int16x8_t v2412 = vsubq_s16(v1962, v1964);
+    int16x8_t v2413 = vsubq_s16(v1966, v1968);
+    int16x8_t v2414_tmp = vqrdmulhq_n_s16(v2413, 1988);
+    int16x8_t v2414 = vaddq_s16(v2414_tmp, v2413);
+    int16x8_t v2415 = vaddq_s16(v2412, v2414);
+    int16x8_t v2416 = vsubq_s16(v1972, v1974);
+    int16x8_t v2417 = vsubq_s16(v1976, v1978);
+    int16x8_t v2418_tmp = vqrdmulhq_n_s16(v2417, 1988);
+    int16x8_t v2418 = vaddq_s16(v2418_tmp, v2417);
+    int16x8_t v2419 = vaddq_s16(v2416, v2418);
+    int16x8_t v2420 = vqrdmulhq_n_s16(v2419, 19102);
+    int16x8_t v2421 = vaddq_s16(v2415, v2420);
+    int16x8_t v2422 = vsubq_s16(v1984, v1986);
+    int16x8_t v2423 = vsubq_s16(v1988, v1990);
+    int16x8_t v2424_tmp = vqrdmulhq_n_s16(v2423, 1988);
+    int16x8_t v2424 = vaddq_s16(v2424_tmp, v2423);
+    int16x8_t v2425 = vaddq_s16(v2422, v2424);
+    int16x8_t v2426 = vsubq_s16(v1994, v1996);
+    int16x8_t v2427 = vsubq_s16(v1998, v2000);
+    int16x8_t v2428_tmp = vqrdmulhq_n_s16(v2427, 1988);
+    int16x8_t v2428 = vaddq_s16(v2428_tmp, v2427);
+    int16x8_t v2429 = vaddq_s16(v2426, v2428);
+    int16x8_t v2430 = vqrdmulhq_n_s16(v2429, 19102);
+    int16x8_t v2431 = vaddq_s16(v2425, v2430);
+    int16x8_t v2432 = vqrdmulhq_n_s16(v2431, 17000);
+    int16x8_t v2433 = vaddq_s16(v2421, v2432);
+    int16x8_t v2434 = vqrdmulhq_n_s16(v2433, 16534);
+    int16x8_t v2435 = vaddq_s16(v2411, v2434);
+    int16x8_t v2436 = vsubq_s16(v2010, v2012);
+    int16x8_t v2437 = vsubq_s16(v2014, v2016);
+    int16x8_t v2438_tmp = vqrdmulhq_n_s16(v2437, 1988);
+    int16x8_t v2438 = vaddq_s16(v2438_tmp, v2437);
+    int16x8_t v2439 = vaddq_s16(v2436, v2438);
+    int16x8_t v2440 = vsubq_s16(v2020, v2022);
+    int16x8_t v2441 = vsubq_s16(v2024, v2026);
+    int16x8_t v2442_tmp = vqrdmulhq_n_s16(v2441, 1988);
+    int16x8_t v2442 = vaddq_s16(v2442_tmp, v2441);
+    int16x8_t v2443 = vaddq_s16(v2440, v2442);
+    int16x8_t v2444 = vqrdmulhq_n_s16(v2443, 19102);
+    int16x8_t v2445 = vaddq_s16(v2439, v2444);
+    int16x8_t v2446 = vsubq_s16(v2032, v2034);
+    int16x8_t v2447 = vsubq_s16(v2036, v2038);
+    int16x8_t v2448_tmp = vqrdmulhq_n_s16(v2447, 1988);
+    int16x8_t v2448 = vaddq_s16(v2448_tmp, v2447);
+    int16x8_t v2449 = vaddq_s16(v2446, v2448);
+    int16x8_t v2450 = vsubq_s16(v2042, v2044);
+    int16x8_t v2451 = vsubq_s16(v2046, v2048);
+    int16x8_t v2452_tmp = vqrdmulhq_n_s16(v2451, 1988);
+    int16x8_t v2452 = vaddq_s16(v2452_tmp, v2451);
+    int16x8_t v2453 = vaddq_s16(v2450, v2452);
+    int16x8_t v2454 = vqrdmulhq_n_s16(v2453, 19102);
+    int16x8_t v2455 = vaddq_s16(v2449, v2454);
+    int16x8_t v2456 = vqrdmulhq_n_s16(v2455, 17000);
+    int16x8_t v2457 = vaddq_s16(v2445, v2456);
+    int16x8_t v2458 = vsubq_s16(v2056, v2058);
+    int16x8_t v2459 = vsubq_s16(v2060, v2062);
+    int16x8_t v2460_tmp = vqrdmulhq_n_s16(v2459, 1988);
+    int16x8_t v2460 = vaddq_s16(v2460_tmp, v2459);
+    int16x8_t v2461 = vaddq_s16(v2458, v2460);
+    int16x8_t v2462 = vsubq_s16(v2066, v2068);
+    int16x8_t v2463 = vqrdmulhq_n_s16(v2072, 29490);
+    int16x8_t v2464 = vsubq_s16(v2070, v2463);
+    int16x8_t v2465_tmp = vqrdmulhq_n_s16(v2464, 1988);
+    int16x8_t v2465 = vaddq_s16(v2465_tmp, v2464);
+    int16x8_t v2466 = vaddq_s16(v2462, v2465);
+    int16x8_t v2467 = vqrdmulhq_n_s16(v2466, 19102);
+    int16x8_t v2468 = vaddq_s16(v2461, v2467);
+    int16x8_t v2469 = vsubq_s16(v2078, v2080);
+    int16x8_t v2470 = vsubq_s16(v2082, v2084);
+    int16x8_t v2471_tmp = vqrdmulhq_n_s16(v2470, 1988);
+    int16x8_t v2471 = vaddq_s16(v2471_tmp, v2470);
+    int16x8_t v2472 = vaddq_s16(v2469, v2471);
+    int16x8_t v2473 = vsubq_s16(v2088, v2090);
+    int16x8_t v2474 = vsubq_s16(v2092, v2094);
+    int16x8_t v2475_tmp = vqrdmulhq_n_s16(v2474, 1988);
+    int16x8_t v2475 = vaddq_s16(v2475_tmp, v2474);
+    int16x8_t v2476 = vaddq_s16(v2473, v2475);
+    int16x8_t v2477 = vqrdmulhq_n_s16(v2476, 19102);
+    int16x8_t v2478 = vaddq_s16(v2472, v2477);
+    int16x8_t v2479 = vqrdmulhq_n_s16(v2478, 17000);
+    int16x8_t v2480 = vaddq_s16(v2468, v2479);
+    int16x8_t v2481 = vqrdmulhq_n_s16(v2480, 16534);
+    int16x8_t v2482 = vaddq_s16(v2457, v2481);
+    int16x8_t v2483 = vqrdmulhq_n_s16(v2482, 16421);
+    int16x8_t v2484 = vaddq_s16(v2435, v2483);
+    int16x8_t v2485 = vsubq_s16(v1537, v1542);
+    int16x8_t v2486 = vsubq_s16(v1547, v1552);
+    int16x8_t v2487_tmp = vqrdmulhq_n_s16(v2486, 23673);
+    int16x8_t v2487 = vaddq_s16(v2487_tmp, v2486);
+    int16x8_t v2488 = vaddq_s16(v2485, v2487);
+    int16x8_t v2489 = vsubq_s16(v1559, v1564);
+    int16x8_t v2490 = vsubq_s16(v1569, v1574);
+    int16x8_t v2491_tmp = vqrdmulhq_n_s16(v2490, 23673);
+    int16x8_t v2491 = vaddq_s16(v2491_tmp, v2490);
+    int16x8_t v2492 = vaddq_s16(v2489, v2491);
+    int16x8_t v2493 = vqrdmulhq_n_s16(v2492, 20398);
+    int16x8_t v2494 = vaddq_s16(v2488, v2493);
+    int16x8_t v2495 = vsubq_s16(v1583, v1588);
+    int16x8_t v2496 = vsubq_s16(v1593, v1598);
+    int16x8_t v2497_tmp = vqrdmulhq_n_s16(v2496, 23673);
+    int16x8_t v2497 = vaddq_s16(v2497_tmp, v2496);
+    int16x8_t v2498 = vaddq_s16(v2495, v2497);
+    int16x8_t v2499 = vsubq_s16(v1605, v1610);
+    int16x8_t v2500 = vsubq_s16(v1615, v1620);
+    int16x8_t v2501_tmp = vqrdmulhq_n_s16(v2500, 23673);
+    int16x8_t v2501 = vaddq_s16(v2501_tmp, v2500);
+    int16x8_t v2502 = vaddq_s16(v2499, v2501);
+    int16x8_t v2503 = vqrdmulhq_n_s16(v2502, 20398);
+    int16x8_t v2504 = vaddq_s16(v2498, v2503);
+    int16x8_t v2505 = vqrdmulhq_n_s16(v2504, 17255);
+    int16x8_t v2506 = vaddq_s16(v2494, v2505);
+    int16x8_t v2507 = vsubq_s16(v1631, v1636);
+    int16x8_t v2508 = vsubq_s16(v1641, v1646);
+    int16x8_t v2509_tmp = vqrdmulhq_n_s16(v2508, 23673);
+    int16x8_t v2509 = vaddq_s16(v2509_tmp, v2508);
+    int16x8_t v2510 = vaddq_s16(v2507, v2509);
+    int16x8_t v2511 = vsubq_s16(v1653, v1658);
+    int16x8_t v2512 = vsubq_s16(v1663, v1668);
+    int16x8_t v2513_tmp = vqrdmulhq_n_s16(v2512, 23673);
+    int16x8_t v2513 = vaddq_s16(v2513_tmp, v2512);
+    int16x8_t v2514 = vaddq_s16(v2511, v2513);
+    int16x8_t v2515 = vqrdmulhq_n_s16(v2514, 20398);
+    int16x8_t v2516 = vaddq_s16(v2510, v2515);
+    int16x8_t v2517 = vsubq_s16(v1677, v1682);
+    int16x8_t v2518 = vsubq_s16(v1687, v1692);
+    int16x8_t v2519_tmp = vqrdmulhq_n_s16(v2518, 23673);
+    int16x8_t v2519 = vaddq_s16(v2519_tmp, v2518);
+    int16x8_t v2520 = vaddq_s16(v2517, v2519);
+    int16x8_t v2521 = vsubq_s16(v1699, v1704);
+    int16x8_t v2522 = vsubq_s16(v1709, v1714);
+    int16x8_t v2523_tmp = vqrdmulhq_n_s16(v2522, 23673);
+    int16x8_t v2523 = vaddq_s16(v2523_tmp, v2522);
+    int16x8_t v2524 = vaddq_s16(v2521, v2523);
+    int16x8_t v2525 = vqrdmulhq_n_s16(v2524, 20398);
+    int16x8_t v2526 = vaddq_s16(v2520, v2525);
+    int16x8_t v2527 = vqrdmulhq_n_s16(v2526, 17255);
+    int16x8_t v2528 = vaddq_s16(v2516, v2527);
+    int16x8_t v2529 = vqrdmulhq_n_s16(v2528, 16595);
+    int16x8_t v2530 = vaddq_s16(v2506, v2529);
+    int16x8_t v2531 = vsubq_s16(v1727, v1732);
+    int16x8_t v2532 = vsubq_s16(v1737, v1742);
+    int16x8_t v2533_tmp = vqrdmulhq_n_s16(v2532, 23673);
+    int16x8_t v2533 = vaddq_s16(v2533_tmp, v2532);
+    int16x8_t v2534 = vaddq_s16(v2531, v2533);
+    int16x8_t v2535 = vsubq_s16(v1749, v1754);
+    int16x8_t v2536 = vsubq_s16(v1759, v1764);
+    int16x8_t v2537_tmp = vqrdmulhq_n_s16(v2536, 23673);
+    int16x8_t v2537 = vaddq_s16(v2537_tmp, v2536);
+    int16x8_t v2538 = vaddq_s16(v2535, v2537);
+    int16x8_t v2539 = vqrdmulhq_n_s16(v2538, 20398);
+    int16x8_t v2540 = vaddq_s16(v2534, v2539);
+    int16x8_t v2541 = vsubq_s16(v1773, v1778);
+    int16x8_t v2542 = vsubq_s16(v1783, v1788);
+    int16x8_t v2543_tmp = vqrdmulhq_n_s16(v2542, 23673);
+    int16x8_t v2543 = vaddq_s16(v2543_tmp, v2542);
+    int16x8_t v2544 = vaddq_s16(v2541, v2543);
+    int16x8_t v2545 = vsubq_s16(v1795, v1800);
+    int16x8_t v2546 = vsubq_s16(v1805, v1810);
+    int16x8_t v2547_tmp = vqrdmulhq_n_s16(v2546, 23673);
+    int16x8_t v2547 = vaddq_s16(v2547_tmp, v2546);
+    int16x8_t v2548 = vaddq_s16(v2545, v2547);
+    int16x8_t v2549 = vqrdmulhq_n_s16(v2548, 20398);
+    int16x8_t v2550 = vaddq_s16(v2544, v2549);
+    int16x8_t v2551 = vqrdmulhq_n_s16(v2550, 17255);
+    int16x8_t v2552 = vaddq_s16(v2540, v2551);
+    int16x8_t v2553 = vsubq_s16(v1821, v1826);
+    int16x8_t v2554 = vsubq_s16(v1831, v1836);
+    int16x8_t v2555_tmp = vqrdmulhq_n_s16(v2554, 23673);
+    int16x8_t v2555 = vaddq_s16(v2555_tmp, v2554);
+    int16x8_t v2556 = vaddq_s16(v2553, v2555);
+    int16x8_t v2557 = vsubq_s16(v1843, v1848);
+    int16x8_t v2558 = vsubq_s16(v1853, v1858);
+    int16x8_t v2559_tmp = vqrdmulhq_n_s16(v2558, 23673);
+    int16x8_t v2559 = vaddq_s16(v2559_tmp, v2558);
+    int16x8_t v2560 = vaddq_s16(v2557, v2559);
+    int16x8_t v2561 = vqrdmulhq_n_s16(v2560, 20398);
+    int16x8_t v2562 = vaddq_s16(v2556, v2561);
+    int16x8_t v2563 = vsubq_s16(v1867, v1872);
+    int16x8_t v2564 = vsubq_s16(v1877, v1882);
+    int16x8_t v2565_tmp = vqrdmulhq_n_s16(v2564, 23673);
+    int16x8_t v2565 = vaddq_s16(v2565_tmp, v2564);
+    int16x8_t v2566 = vaddq_s16(v2563, v2565);
+    int16x8_t v2567 = vsubq_s16(v1889, v1894);
+    int16x8_t v2568 = vsubq_s16(v1899, v1904);
+    int16x8_t v2569_tmp = vqrdmulhq_n_s16(v2568, 23673);
+    int16x8_t v2569 = vaddq_s16(v2569_tmp, v2568);
+    int16x8_t v2570 = vaddq_s16(v2567, v2569);
+    int16x8_t v2571 = vqrdmulhq_n_s16(v2570, 20398);
+    int16x8_t v2572 = vaddq_s16(v2566, v2571);
+    int16x8_t v2573 = vqrdmulhq_n_s16(v2572, 17255);
+    int16x8_t v2574 = vaddq_s16(v2562, v2573);
+    int16x8_t v2575 = vqrdmulhq_n_s16(v2574, 16595);
+    int16x8_t v2576 = vaddq_s16(v2552, v2575);
+    int16x8_t v2577 = vqrdmulhq_n_s16(v2576, 16436);
+    int16x8_t v2578 = vaddq_s16(v2530, v2577);
+    int16x8_t v2579 = vsubq_s16(v9, v24);
+    int16x8_t v2580 = vsubq_s16(v42, v58);
+    int16x8_t v2581_tmp = vqrdmulhq_n_s16(v2580, 3314);
+    int16x8_t v2581 = vmlaq_n_s16(v2581_tmp, v2580, 5);
+    int16x8_t v2582 = vaddq_s16(v2579, v2581);
+    int16x8_t v2583 = vsubq_s16(v78, v101);
+    int16x8_t v2584 = vsubq_s16(v119, v136);
+    int16x8_t v2585_tmp = vqrdmulhq_n_s16(v2584, 3314);
+    int16x8_t v2585 = vmlaq_n_s16(v2585_tmp, v2584, 5);
+    int16x8_t v2586 = vaddq_s16(v2583, v2585);
+    int16x8_t v2587 = vqrdmulhq_n_s16(v2586, 22112);
+    int16x8_t v2588 = vaddq_s16(v2582, v2587);
+    int16x8_t v2589 = vsubq_s16(v158, v181);
+    int16x8_t v2590 = vsubq_s16(v213, v231);
+    int16x8_t v2591_tmp = vqrdmulhq_n_s16(v2590, 3314);
+    int16x8_t v2591 = vmlaq_n_s16(v2591_tmp, v2590, 5);
+    int16x8_t v2592 = vaddq_s16(v2589, v2591);
+    int16x8_t v2593 = vsubq_s16(v251, v274);
+    int16x8_t v2594 = vsubq_s16(v292, v310);
+    int16x8_t v2595_tmp = vqrdmulhq_n_s16(v2594, 3314);
+    int16x8_t v2595 = vmlaq_n_s16(v2595_tmp, v2594, 5);
+    int16x8_t v2596 = vaddq_s16(v2593, v2595);
+    int16x8_t v2597 = vqrdmulhq_n_s16(v2596, 22112);
+    int16x8_t v2598 = vaddq_s16(v2592, v2597);
+    int16x8_t v2599 = vqrdmulhq_n_s16(v2598, 17561);
+    int16x8_t v2600 = vaddq_s16(v2588, v2599);
+    int16x8_t v2601 = vsubq_s16(v334, v357);
+    int16x8_t v2602 = vsubq_s16(v389, v407);
+    int16x8_t v2603_tmp = vqrdmulhq_n_s16(v2602, 3314);
+    int16x8_t v2603 = vmlaq_n_s16(v2603_tmp, v2602, 5);
+    int16x8_t v2604 = vaddq_s16(v2601, v2603);
+    int16x8_t v2605 = vsubq_s16(v441, v480);
+    int16x8_t v2606 = vsubq_s16(v498, v517);
+    int16x8_t v2607_tmp = vqrdmulhq_n_s16(v2606, 3314);
+    int16x8_t v2607 = vmlaq_n_s16(v2607_tmp, v2606, 5);
+    int16x8_t v2608 = vaddq_s16(v2605, v2607);
+    int16x8_t v2609 = vqrdmulhq_n_s16(v2608, 22112);
+    int16x8_t v2610 = vaddq_s16(v2604, v2609);
+    int16x8_t v2611 = vsubq_s16(v539, v562);
+    int16x8_t v2612 = vsubq_s16(v594, v612);
+    int16x8_t v2613_tmp = vqrdmulhq_n_s16(v2612, 3314);
+    int16x8_t v2613 = vmlaq_n_s16(v2613_tmp, v2612, 5);
+    int16x8_t v2614 = vaddq_s16(v2611, v2613);
+    int16x8_t v2615 = vsubq_s16(v632, v655);
+    int16x8_t v2616 = vsubq_s16(v673, v692);
+    int16x8_t v2617_tmp = vqrdmulhq_n_s16(v2616, 3314);
+    int16x8_t v2617 = vmlaq_n_s16(v2617_tmp, v2616, 5);
+    int16x8_t v2618 = vaddq_s16(v2615, v2617);
+    int16x8_t v2619 = vqrdmulhq_n_s16(v2618, 22112);
+    int16x8_t v2620 = vaddq_s16(v2614, v2619);
+    int16x8_t v2621 = vqrdmulhq_n_s16(v2620, 17561);
+    int16x8_t v2622 = vaddq_s16(v2610, v2621);
+    int16x8_t v2623 = vqrdmulhq_n_s16(v2622, 16666);
+    int16x8_t v2624 = vaddq_s16(v2600, v2623);
+    int16x8_t v2625 = vsubq_s16(v718, v741);
+    int16x8_t v2626 = vsubq_s16(v773, v791);
+    int16x8_t v2627_tmp = vqrdmulhq_n_s16(v2626, 3314);
+    int16x8_t v2627 = vmlaq_n_s16(v2627_tmp, v2626, 5);
+    int16x8_t v2628 = vaddq_s16(v2625, v2627);
+    int16x8_t v2629 = vsubq_s16(v825, v864);
+    int16x8_t v2630 = vsubq_s16(v882, v901);
+    int16x8_t v2631_tmp = vqrdmulhq_n_s16(v2630, 3314);
+    int16x8_t v2631 = vmlaq_n_s16(v2631_tmp, v2630, 5);
+    int16x8_t v2632 = vaddq_s16(v2629, v2631);
+    int16x8_t v2633 = vqrdmulhq_n_s16(v2632, 22112);
+    int16x8_t v2634 = vaddq_s16(v2628, v2633);
+    int16x8_t v2635 = vsubq_s16(v937, v976);
+    int16x8_t v2636 = vsubq_s16(v1036, v1058);
+    int16x8_t v2637_tmp = vqrdmulhq_n_s16(v2636, 3314);
+    int16x8_t v2637 = vmlaq_n_s16(v2637_tmp, v2636, 5);
+    int16x8_t v2638 = vaddq_s16(v2635, v2637);
+    int16x8_t v2639 = vsubq_s16(v1078, v1101);
+    int16x8_t v2640 = vsubq_s16(v1119, v1139);
+    int16x8_t v2641_tmp = vqrdmulhq_n_s16(v2640, 3314);
+    int16x8_t v2641 = vmlaq_n_s16(v2641_tmp, v2640, 5);
+    int16x8_t v2642 = vaddq_s16(v2639, v2641);
+    int16x8_t v2643 = vqrdmulhq_n_s16(v2642, 22112);
+    int16x8_t v2644 = vaddq_s16(v2638, v2643);
+    int16x8_t v2645 = vqrdmulhq_n_s16(v2644, 17561);
+    int16x8_t v2646 = vaddq_s16(v2634, v2645);
+    int16x8_t v2647 = vsubq_s16(v1163, v1186);
+    int16x8_t v2648 = vsubq_s16(v1218, v1236);
+    int16x8_t v2649_tmp = vqrdmulhq_n_s16(v2648, 3314);
+    int16x8_t v2649 = vmlaq_n_s16(v2649_tmp, v2648, 5);
+    int16x8_t v2650 = vaddq_s16(v2647, v2649);
+    int16x8_t v2651 = vsubq_s16(v1270, v1309);
+    int16x8_t v2652 = vsubq_s16(v1327, v1346);
+    int16x8_t v2653_tmp = vqrdmulhq_n_s16(v2652, 3314);
+    int16x8_t v2653 = vmlaq_n_s16(v2653_tmp, v2652, 5);
+    int16x8_t v2654 = vaddq_s16(v2651, v2653);
+    int16x8_t v2655 = vqrdmulhq_n_s16(v2654, 22112);
+    int16x8_t v2656 = vaddq_s16(v2650, v2655);
+    int16x8_t v2657 = vsubq_s16(v1368, v1391);
+    int16x8_t v2658 = vsubq_s16(v1423, v1441);
+    int16x8_t v2659_tmp = vqrdmulhq_n_s16(v2658, 3314);
+    int16x8_t v2659 = vmlaq_n_s16(v2659_tmp, v2658, 5);
+    int16x8_t v2660 = vaddq_s16(v2657, v2659);
+    int16x8_t v2661 = vsubq_s16(v1461, v1484);
+    int16x8_t v2662 = vsubq_s16(v1502, v1522);
+    int16x8_t v2663_tmp = vqrdmulhq_n_s16(v2662, 3314);
+    int16x8_t v2663 = vmlaq_n_s16(v2663_tmp, v2662, 5);
+    int16x8_t v2664 = vaddq_s16(v2661, v2663);
+    int16x8_t v2665 = vqrdmulhq_n_s16(v2664, 22112);
+    int16x8_t v2666 = vaddq_s16(v2660, v2665);
+    int16x8_t v2667 = vqrdmulhq_n_s16(v2666, 17561);
+    int16x8_t v2668 = vaddq_s16(v2656, v2667);
+    int16x8_t v2669 = vqrdmulhq_n_s16(v2668, 16666);
+    int16x8_t v2670 = vaddq_s16(v2646, v2669);
+    int16x8_t v2671 = vqrdmulhq_n_s16(v2670, 16454);
+    int16x8_t v2672 = vaddq_s16(v2624, v2671);
+    int16x8_t v2673 = vsubq_s16(v2579, v2581);
+    int16x8_t v2674 = vsubq_s16(v2583, v2585);
+    int16x8_t v2675 = vqrdmulhq_n_s16(v2674, 24397);
+    int16x8_t v2676 = vaddq_s16(v2673, v2675);
+    int16x8_t v2677 = vsubq_s16(v2589, v2591);
+    int16x8_t v2678 = vsubq_s16(v2593, v2595);
+    int16x8_t v2679 = vqrdmulhq_n_s16(v2678, 24397);
+    int16x8_t v2680 = vaddq_s16(v2677, v2679);
+    int16x8_t v2681 = vqrdmulhq_n_s16(v2680, 17921);
+    int16x8_t v2682 = vaddq_s16(v2676, v2681);
+    int16x8_t v2683 = vsubq_s16(v2601, v2603);
+    int16x8_t v2684 = vsubq_s16(v2605, v2607);
+    int16x8_t v2685 = vqrdmulhq_n_s16(v2684, 24397);
+    int16x8_t v2686 = vaddq_s16(v2683, v2685);
+    int16x8_t v2687 = vsubq_s16(v2611, v2613);
+    int16x8_t v2688 = vsubq_s16(v2615, v2617);
+    int16x8_t v2689 = vqrdmulhq_n_s16(v2688, 24397);
+    int16x8_t v2690 = vaddq_s16(v2687, v2689);
+    int16x8_t v2691 = vqrdmulhq_n_s16(v2690, 17921);
+    int16x8_t v2692 = vaddq_s16(v2686, v2691);
+    int16x8_t v2693 = vqrdmulhq_n_s16(v2692, 16747);
+    int16x8_t v2694 = vaddq_s16(v2682, v2693);
+    int16x8_t v2695 = vsubq_s16(v2625, v2627);
+    int16x8_t v2696 = vsubq_s16(v2629, v2631);
+    int16x8_t v2697 = vqrdmulhq_n_s16(v2696, 24397);
+    int16x8_t v2698 = vaddq_s16(v2695, v2697);
+    int16x8_t v2699 = vsubq_s16(v2635, v2637);
+    int16x8_t v2700 = vsubq_s16(v2639, v2641);
+    int16x8_t v2701 = vqrdmulhq_n_s16(v2700, 24397);
+    int16x8_t v2702 = vaddq_s16(v2699, v2701);
+    int16x8_t v2703 = vqrdmulhq_n_s16(v2702, 17921);
+    int16x8_t v2704 = vaddq_s16(v2698, v2703);
+    int16x8_t v2705 = vsubq_s16(v2647, v2649);
+    int16x8_t v2706 = vsubq_s16(v2651, v2653);
+    int16x8_t v2707 = vqrdmulhq_n_s16(v2706, 24397);
+    int16x8_t v2708 = vaddq_s16(v2705, v2707);
+    int16x8_t v2709 = vsubq_s16(v2657, v2659);
+    int16x8_t v2710 = vsubq_s16(v2661, v2663);
+    int16x8_t v2711 = vqrdmulhq_n_s16(v2710, 24397);
+    int16x8_t v2712 = vaddq_s16(v2709, v2711);
+    int16x8_t v2713 = vqrdmulhq_n_s16(v2712, 17921);
+    int16x8_t v2714 = vaddq_s16(v2708, v2713);
+    int16x8_t v2715 = vqrdmulhq_n_s16(v2714, 16747);
+    int16x8_t v2716 = vaddq_s16(v2704, v2715);
+    int16x8_t v2717 = vqrdmulhq_n_s16(v2716, 16474);
+    int16x8_t v2718 = vaddq_s16(v2694, v2717);
+    int16x8_t v2719 = vsubq_s16(v2485, v2487);
+    int16x8_t v2720 = vsubq_s16(v2489, v2491);
+    int16x8_t v2721 = vqrdmulhq_n_s16(v2720, 27504);
+    int16x8_t v2722 = vaddq_s16(v2719, v2721);
+    int16x8_t v2723 = vsubq_s16(v2495, v2497);
+    int16x8_t v2724 = vsubq_s16(v2499, v2501);
+    int16x8_t v2725 = vqrdmulhq_n_s16(v2724, 27504);
+    int16x8_t v2726 = vaddq_s16(v2723, v2725);
+    int16x8_t v2727 = vqrdmulhq_n_s16(v2726, 18343);
+    int16x8_t v2728 = vaddq_s16(v2722, v2727);
+    int16x8_t v2729 = vsubq_s16(v2507, v2509);
+    int16x8_t v2730 = vsubq_s16(v2511, v2513);
+    int16x8_t v2731 = vqrdmulhq_n_s16(v2730, 27504);
+    int16x8_t v2732 = vaddq_s16(v2729, v2731);
+    int16x8_t v2733 = vsubq_s16(v2517, v2519);
+    int16x8_t v2734 = vsubq_s16(v2521, v2523);
+    int16x8_t v2735 = vqrdmulhq_n_s16(v2734, 27504);
+    int16x8_t v2736 = vaddq_s16(v2733, v2735);
+    int16x8_t v2737 = vqrdmulhq_n_s16(v2736, 18343);
+    int16x8_t v2738 = vaddq_s16(v2732, v2737);
+    int16x8_t v2739 = vqrdmulhq_n_s16(v2738, 16840);
+    int16x8_t v2740 = vaddq_s16(v2728, v2739);
+    int16x8_t v2741 = vsubq_s16(v2531, v2533);
+    int16x8_t v2742 = vsubq_s16(v2535, v2537);
+    int16x8_t v2743 = vqrdmulhq_n_s16(v2742, 27504);
+    int16x8_t v2744 = vaddq_s16(v2741, v2743);
+    int16x8_t v2745 = vsubq_s16(v2541, v2543);
+    int16x8_t v2746 = vsubq_s16(v2545, v2547);
+    int16x8_t v2747 = vqrdmulhq_n_s16(v2746, 27504);
+    int16x8_t v2748 = vaddq_s16(v2745, v2747);
+    int16x8_t v2749 = vqrdmulhq_n_s16(v2748, 18343);
+    int16x8_t v2750 = vaddq_s16(v2744, v2749);
+    int16x8_t v2751 = vsubq_s16(v2553, v2555);
+    int16x8_t v2752 = vsubq_s16(v2557, v2559);
+    int16x8_t v2753 = vqrdmulhq_n_s16(v2752, 27504);
+    int16x8_t v2754 = vaddq_s16(v2751, v2753);
+    int16x8_t v2755 = vsubq_s16(v2563, v2565);
+    int16x8_t v2756 = vsubq_s16(v2567, v2569);
+    int16x8_t v2757 = vqrdmulhq_n_s16(v2756, 27504);
+    int16x8_t v2758 = vaddq_s16(v2755, v2757);
+    int16x8_t v2759 = vqrdmulhq_n_s16(v2758, 18343);
+    int16x8_t v2760 = vaddq_s16(v2754, v2759);
+    int16x8_t v2761 = vqrdmulhq_n_s16(v2760, 16840);
+    int16x8_t v2762 = vaddq_s16(v2750, v2761);
+    int16x8_t v2763 = vqrdmulhq_n_s16(v2762, 16496);
+    int16x8_t v2764 = vaddq_s16(v2740, v2763);
+    int16x8_t v2765 = vsubq_s16(v2390, v2392);
+    int16x8_t v2766 = vsubq_s16(v2394, v2396);
+    int16x8_t v2767 = vqrdmulhq_n_s16(v2766, 31869);
+    int16x8_t v2768 = vaddq_s16(v2765, v2767);
+    int16x8_t v2769 = vsubq_s16(v2400, v2402);
+    int16x8_t v2770 = vsubq_s16(v2404, v2406);
+    int16x8_t v2771 = vqrdmulhq_n_s16(v2770, 31869);
+    int16x8_t v2772 = vaddq_s16(v2769, v2771);
+    int16x8_t v2773 = vqrdmulhq_n_s16(v2772, 18830);
+    int16x8_t v2774 = vaddq_s16(v2768, v2773);
+    int16x8_t v2775 = vsubq_s16(v2412, v2414);
+    int16x8_t v2776 = vsubq_s16(v2416, v2418);
+    int16x8_t v2777 = vqrdmulhq_n_s16(v2776, 31869);
+    int16x8_t v2778 = vaddq_s16(v2775, v2777);
+    int16x8_t v2779 = vsubq_s16(v2422, v2424);
+    int16x8_t v2780 = vsubq_s16(v2426, v2428);
+    int16x8_t v2781 = vqrdmulhq_n_s16(v2780, 31869);
+    int16x8_t v2782 = vaddq_s16(v2779, v2781);
+    int16x8_t v2783 = vqrdmulhq_n_s16(v2782, 18830);
+    int16x8_t v2784 = vaddq_s16(v2778, v2783);
+    int16x8_t v2785 = vqrdmulhq_n_s16(v2784, 16944);
+    int16x8_t v2786 = vaddq_s16(v2774, v2785);
+    int16x8_t v2787 = vsubq_s16(v2436, v2438);
+    int16x8_t v2788 = vsubq_s16(v2440, v2442);
+    int16x8_t v2789 = vqrdmulhq_n_s16(v2788, 31869);
+    int16x8_t v2790 = vaddq_s16(v2787, v2789);
+    int16x8_t v2791 = vsubq_s16(v2446, v2448);
+    int16x8_t v2792 = vsubq_s16(v2450, v2452);
+    int16x8_t v2793 = vqrdmulhq_n_s16(v2792, 31869);
+    int16x8_t v2794 = vaddq_s16(v2791, v2793);
+    int16x8_t v2795 = vqrdmulhq_n_s16(v2794, 18830);
+    int16x8_t v2796 = vaddq_s16(v2790, v2795);
+    int16x8_t v2797 = vsubq_s16(v2458, v2460);
+    int16x8_t v2798 = vsubq_s16(v2462, v2465);
+    int16x8_t v2799 = vqrdmulhq_n_s16(v2798, 31869);
+    int16x8_t v2800 = vaddq_s16(v2797, v2799);
+    int16x8_t v2801 = vsubq_s16(v2469, v2471);
+    int16x8_t v2802 = vsubq_s16(v2473, v2475);
+    int16x8_t v2803 = vqrdmulhq_n_s16(v2802, 31869);
+    int16x8_t v2804 = vaddq_s16(v2801, v2803);
+    int16x8_t v2805 = vqrdmulhq_n_s16(v2804, 18830);
+    int16x8_t v2806 = vaddq_s16(v2800, v2805);
+    int16x8_t v2807 = vqrdmulhq_n_s16(v2806, 16944);
+    int16x8_t v2808 = vaddq_s16(v2796, v2807);
+    int16x8_t v2809 = vqrdmulhq_n_s16(v2808, 16521);
+    int16x8_t v2810 = vaddq_s16(v2786, v2809);
+    int16x8_t v2811 = vsubq_s16(v2296, v2298);
+    int16x8_t v2812 = vsubq_s16(v2300, v2302);
+    int16x8_t v2813_tmp = vqrdmulhq_n_s16(v2812, 5552);
+    int16x8_t v2813 = vaddq_s16(v2813_tmp, v2812);
+    int16x8_t v2814 = vaddq_s16(v2811, v2813);
+    int16x8_t v2815 = vsubq_s16(v2306, v2308);
+    int16x8_t v2816 = vsubq_s16(v2310, v2312);
+    int16x8_t v2817_tmp = vqrdmulhq_n_s16(v2816, 5552);
+    int16x8_t v2817 = vaddq_s16(v2817_tmp, v2816);
+    int16x8_t v2818 = vaddq_s16(v2815, v2817);
+    int16x8_t v2819 = vqrdmulhq_n_s16(v2818, 19393);
+    int16x8_t v2820 = vaddq_s16(v2814, v2819);
+    int16x8_t v2821 = vsubq_s16(v2318, v2320);
+    int16x8_t v2822 = vsubq_s16(v2322, v2324);
+    int16x8_t v2823_tmp = vqrdmulhq_n_s16(v2822, 5552);
+    int16x8_t v2823 = vaddq_s16(v2823_tmp, v2822);
+    int16x8_t v2824 = vaddq_s16(v2821, v2823);
+    int16x8_t v2825 = vsubq_s16(v2328, v2330);
+    int16x8_t v2826 = vsubq_s16(v2332, v2334);
+    int16x8_t v2827_tmp = vqrdmulhq_n_s16(v2826, 5552);
+    int16x8_t v2827 = vaddq_s16(v2827_tmp, v2826);
+    int16x8_t v2828 = vaddq_s16(v2825, v2827);
+    int16x8_t v2829 = vqrdmulhq_n_s16(v2828, 19393);
+    int16x8_t v2830 = vaddq_s16(v2824, v2829);
+    int16x8_t v2831 = vqrdmulhq_n_s16(v2830, 17059);
+    int16x8_t v2832 = vaddq_s16(v2820, v2831);
+    int16x8_t v2833 = vsubq_s16(v2342, v2344);
+    int16x8_t v2834 = vsubq_s16(v2346, v2348);
+    int16x8_t v2835_tmp = vqrdmulhq_n_s16(v2834, 5552);
+    int16x8_t v2835 = vaddq_s16(v2835_tmp, v2834);
+    int16x8_t v2836 = vaddq_s16(v2833, v2835);
+    int16x8_t v2837 = vsubq_s16(v2352, v2354);
+    int16x8_t v2838 = vsubq_s16(v2356, v2358);
+    int16x8_t v2839_tmp = vqrdmulhq_n_s16(v2838, 5552);
+    int16x8_t v2839 = vaddq_s16(v2839_tmp, v2838);
+    int16x8_t v2840 = vaddq_s16(v2837, v2839);
+    int16x8_t v2841 = vqrdmulhq_n_s16(v2840, 19393);
+    int16x8_t v2842 = vaddq_s16(v2836, v2841);
+    int16x8_t v2843 = vsubq_s16(v2364, v2366);
+    int16x8_t v2844 = vsubq_s16(v2368, v2370);
+    int16x8_t v2845_tmp = vqrdmulhq_n_s16(v2844, 5552);
+    int16x8_t v2845 = vaddq_s16(v2845_tmp, v2844);
+    int16x8_t v2846 = vaddq_s16(v2843, v2845);
+    int16x8_t v2847 = vsubq_s16(v2374, v2376);
+    int16x8_t v2848 = vsubq_s16(v2378, v2380);
+    int16x8_t v2849_tmp = vqrdmulhq_n_s16(v2848, 5552);
+    int16x8_t v2849 = vaddq_s16(v2849_tmp, v2848);
+    int16x8_t v2850 = vaddq_s16(v2847, v2849);
+    int16x8_t v2851 = vqrdmulhq_n_s16(v2850, 19393);
+    int16x8_t v2852 = vaddq_s16(v2846, v2851);
+    int16x8_t v2853 = vqrdmulhq_n_s16(v2852, 17059);
+    int16x8_t v2854 = vaddq_s16(v2842, v2853);
+    int16x8_t v2855 = vqrdmulhq_n_s16(v2854, 16549);
+    int16x8_t v2856 = vaddq_s16(v2832, v2855);
+    int16x8_t v2857 = vsubq_s16(v2109, v2114);
+    int16x8_t v2858 = vsubq_s16(v2119, v2124);
+    int16x8_t v2859_tmp = vqrdmulhq_n_s16(v2858, 15865);
+    int16x8_t v2859 = vaddq_s16(v2859_tmp, v2858);
+    int16x8_t v2860 = vaddq_s16(v2857, v2859);
+    int16x8_t v2861 = vsubq_s16(v2131, v2136);
+    int16x8_t v2862 = vsubq_s16(v2141, v2146);
+    int16x8_t v2863_tmp = vqrdmulhq_n_s16(v2862, 15865);
+    int16x8_t v2863 = vaddq_s16(v2863_tmp, v2862);
+    int16x8_t v2864 = vaddq_s16(v2861, v2863);
+    int16x8_t v2865 = vqrdmulhq_n_s16(v2864, 20040);
+    int16x8_t v2866 = vaddq_s16(v2860, v2865);
+    int16x8_t v2867 = vsubq_s16(v2155, v2160);
+    int16x8_t v2868 = vsubq_s16(v2165, v2170);
+    int16x8_t v2869_tmp = vqrdmulhq_n_s16(v2868, 15865);
+    int16x8_t v2869 = vaddq_s16(v2869_tmp, v2868);
+    int16x8_t v2870 = vaddq_s16(v2867, v2869);
+    int16x8_t v2871 = vsubq_s16(v2177, v2182);
+    int16x8_t v2872 = vsubq_s16(v2187, v2192);
+    int16x8_t v2873_tmp = vqrdmulhq_n_s16(v2872, 15865);
+    int16x8_t v2873 = vaddq_s16(v2873_tmp, v2872);
+    int16x8_t v2874 = vaddq_s16(v2871, v2873);
+    int16x8_t v2875 = vqrdmulhq_n_s16(v2874, 20040);
+    int16x8_t v2876 = vaddq_s16(v2870, v2875);
+    int16x8_t v2877 = vqrdmulhq_n_s16(v2876, 17187);
+    int16x8_t v2878 = vaddq_s16(v2866, v2877);
+    int16x8_t v2879 = vsubq_s16(v2203, v2208);
+    int16x8_t v2880 = vsubq_s16(v2213, v2218);
+    int16x8_t v2881_tmp = vqrdmulhq_n_s16(v2880, 15865);
+    int16x8_t v2881 = vaddq_s16(v2881_tmp, v2880);
+    int16x8_t v2882 = vaddq_s16(v2879, v2881);
+    int16x8_t v2883 = vsubq_s16(v2225, v2230);
+    int16x8_t v2884 = vsubq_s16(v2235, v2240);
+    int16x8_t v2885_tmp = vqrdmulhq_n_s16(v2884, 15865);
+    int16x8_t v2885 = vaddq_s16(v2885_tmp, v2884);
+    int16x8_t v2886 = vaddq_s16(v2883, v2885);
+    int16x8_t v2887 = vqrdmulhq_n_s16(v2886, 20040);
+    int16x8_t v2888 = vaddq_s16(v2882, v2887);
+    int16x8_t v2889 = vsubq_s16(v2249, v2254);
+    int16x8_t v2890 = vsubq_s16(v2259, v2264);
+    int16x8_t v2891_tmp = vqrdmulhq_n_s16(v2890, 15865);
+    int16x8_t v2891 = vaddq_s16(v2891_tmp, v2890);
+    int16x8_t v2892 = vaddq_s16(v2889, v2891);
+    int16x8_t v2893 = vsubq_s16(v2271, v2276);
+    int16x8_t v2894 = vsubq_s16(v2281, v2286);
+    int16x8_t v2895_tmp = vqrdmulhq_n_s16(v2894, 15865);
+    int16x8_t v2895 = vaddq_s16(v2895_tmp, v2894);
+    int16x8_t v2896 = vaddq_s16(v2893, v2895);
+    int16x8_t v2897 = vqrdmulhq_n_s16(v2896, 20040);
+    int16x8_t v2898 = vaddq_s16(v2892, v2897);
+    int16x8_t v2899 = vqrdmulhq_n_s16(v2898, 17187);
+    int16x8_t v2900 = vaddq_s16(v2888, v2899);
+    int16x8_t v2901 = vqrdmulhq_n_s16(v2900, 16579);
+    int16x8_t v2902 = vaddq_s16(v2878, v2901);
+    int16x8_t v2903 = vsubq_s16(v1919, v1924);
+    int16x8_t v2904 = vsubq_s16(v1929, v1934);
+    int16x8_t v2905_tmp = vqrdmulhq_n_s16(v2904, 1893);
+    int16x8_t v2905 = vmlaq_n_s16(v2905_tmp, v2904, 2);
+    int16x8_t v2906 = vaddq_s16(v2903, v2905);
+    int16x8_t v2907 = vsubq_s16(v1941, v1946);
+    int16x8_t v2908 = vsubq_s16(v1951, v1956);
+    int16x8_t v2909_tmp = vqrdmulhq_n_s16(v2908, 1893);
+    int16x8_t v2909 = vmlaq_n_s16(v2909_tmp, v2908, 2);
+    int16x8_t v2910 = vaddq_s16(v2907, v2909);
+    int16x8_t v2911 = vqrdmulhq_n_s16(v2910, 20783);
+    int16x8_t v2912 = vaddq_s16(v2906, v2911);
+    int16x8_t v2913 = vsubq_s16(v1965, v1970);
+    int16x8_t v2914 = vsubq_s16(v1975, v1980);
+    int16x8_t v2915_tmp = vqrdmulhq_n_s16(v2914, 1893);
+    int16x8_t v2915 = vmlaq_n_s16(v2915_tmp, v2914, 2);
+    int16x8_t v2916 = vaddq_s16(v2913, v2915);
+    int16x8_t v2917 = vsubq_s16(v1987, v1992);
+    int16x8_t v2918 = vsubq_s16(v1997, v2002);
+    int16x8_t v2919_tmp = vqrdmulhq_n_s16(v2918, 1893);
+    int16x8_t v2919 = vmlaq_n_s16(v2919_tmp, v2918, 2);
+    int16x8_t v2920 = vaddq_s16(v2917, v2919);
+    int16x8_t v2921 = vqrdmulhq_n_s16(v2920, 20783);
+    int16x8_t v2922 = vaddq_s16(v2916, v2921);
+    int16x8_t v2923 = vqrdmulhq_n_s16(v2922, 17326);
+    int16x8_t v2924 = vaddq_s16(v2912, v2923);
+    int16x8_t v2925 = vsubq_s16(v2013, v2018);
+    int16x8_t v2926 = vsubq_s16(v2023, v2028);
+    int16x8_t v2927_tmp = vqrdmulhq_n_s16(v2926, 1893);
+    int16x8_t v2927 = vmlaq_n_s16(v2927_tmp, v2926, 2);
+    int16x8_t v2928 = vaddq_s16(v2925, v2927);
+    int16x8_t v2929 = vsubq_s16(v2035, v2040);
+    int16x8_t v2930 = vsubq_s16(v2045, v2050);
+    int16x8_t v2931_tmp = vqrdmulhq_n_s16(v2930, 1893);
+    int16x8_t v2931 = vmlaq_n_s16(v2931_tmp, v2930, 2);
+    int16x8_t v2932 = vaddq_s16(v2929, v2931);
+    int16x8_t v2933 = vqrdmulhq_n_s16(v2932, 20783);
+    int16x8_t v2934 = vaddq_s16(v2928, v2933);
+    int16x8_t v2935 = vsubq_s16(v2059, v2064);
+    int16x8_t v2936 = vsubq_s16(v2069, v2074);
+    int16x8_t v2937_tmp = vqrdmulhq_n_s16(v2936, 1893);
+    int16x8_t v2937 = vmlaq_n_s16(v2937_tmp, v2936, 2);
+    int16x8_t v2938 = vaddq_s16(v2935, v2937);
+    int16x8_t v2939 = vsubq_s16(v2081, v2086);
+    int16x8_t v2940 = vsubq_s16(v2091, v2096);
+    int16x8_t v2941_tmp = vqrdmulhq_n_s16(v2940, 1893);
+    int16x8_t v2941 = vmlaq_n_s16(v2941_tmp, v2940, 2);
+    int16x8_t v2942 = vaddq_s16(v2939, v2941);
+    int16x8_t v2943 = vqrdmulhq_n_s16(v2942, 20783);
+    int16x8_t v2944 = vaddq_s16(v2938, v2943);
+    int16x8_t v2945 = vqrdmulhq_n_s16(v2944, 17326);
+    int16x8_t v2946 = vaddq_s16(v2934, v2945);
+    int16x8_t v2947 = vqrdmulhq_n_s16(v2946, 16611);
+    int16x8_t v2948 = vaddq_s16(v2924, v2947);
+    int16x8_t v2949 = vsubq_s16(v1543, v1554);
+    int16x8_t v2950 = vsubq_s16(v1565, v1576);
+    int16x8_t v2951_tmp = vqrdmulhq_n_s16(v2950, 13357);
+    int16x8_t v2951 = vmlaq_n_s16(v2951_tmp, v2950, 3);
+    int16x8_t v2952 = vaddq_s16(v2949, v2951);
+    int16x8_t v2953 = vsubq_s16(v1589, v1600);
+    int16x8_t v2954 = vsubq_s16(v1611, v1622);
+    int16x8_t v2955_tmp = vqrdmulhq_n_s16(v2954, 13357);
+    int16x8_t v2955 = vmlaq_n_s16(v2955_tmp, v2954, 3);
+    int16x8_t v2956 = vaddq_s16(v2953, v2955);
+    int16x8_t v2957 = vqrdmulhq_n_s16(v2956, 21637);
+    int16x8_t v2958 = vaddq_s16(v2952, v2957);
+    int16x8_t v2959 = vsubq_s16(v1637, v1648);
+    int16x8_t v2960 = vsubq_s16(v1659, v1670);
+    int16x8_t v2961_tmp = vqrdmulhq_n_s16(v2960, 13357);
+    int16x8_t v2961 = vmlaq_n_s16(v2961_tmp, v2960, 3);
+    int16x8_t v2962 = vaddq_s16(v2959, v2961);
+    int16x8_t v2963 = vsubq_s16(v1683, v1694);
+    int16x8_t v2964 = vsubq_s16(v1705, v1716);
+    int16x8_t v2965_tmp = vqrdmulhq_n_s16(v2964, 13357);
+    int16x8_t v2965 = vmlaq_n_s16(v2965_tmp, v2964, 3);
+    int16x8_t v2966 = vaddq_s16(v2963, v2965);
+    int16x8_t v2967 = vqrdmulhq_n_s16(v2966, 21637);
+    int16x8_t v2968 = vaddq_s16(v2962, v2967);
+    int16x8_t v2969 = vqrdmulhq_n_s16(v2968, 17479);
+    int16x8_t v2970 = vaddq_s16(v2958, v2969);
+    int16x8_t v2971 = vsubq_s16(v1733, v1744);
+    int16x8_t v2972 = vsubq_s16(v1755, v1766);
+    int16x8_t v2973_tmp = vqrdmulhq_n_s16(v2972, 13357);
+    int16x8_t v2973 = vmlaq_n_s16(v2973_tmp, v2972, 3);
+    int16x8_t v2974 = vaddq_s16(v2971, v2973);
+    int16x8_t v2975 = vsubq_s16(v1779, v1790);
+    int16x8_t v2976 = vsubq_s16(v1801, v1812);
+    int16x8_t v2977_tmp = vqrdmulhq_n_s16(v2976, 13357);
+    int16x8_t v2977 = vmlaq_n_s16(v2977_tmp, v2976, 3);
+    int16x8_t v2978 = vaddq_s16(v2975, v2977);
+    int16x8_t v2979 = vqrdmulhq_n_s16(v2978, 21637);
+    int16x8_t v2980 = vaddq_s16(v2974, v2979);
+    int16x8_t v2981 = vsubq_s16(v1827, v1838);
+    int16x8_t v2982 = vsubq_s16(v1849, v1860);
+    int16x8_t v2983_tmp = vqrdmulhq_n_s16(v2982, 13357);
+    int16x8_t v2983 = vmlaq_n_s16(v2983_tmp, v2982, 3);
+    int16x8_t v2984 = vaddq_s16(v2981, v2983);
+    int16x8_t v2985 = vsubq_s16(v1873, v1884);
+    int16x8_t v2986 = vsubq_s16(v1895, v1906);
+    int16x8_t v2987_tmp = vqrdmulhq_n_s16(v2986, 13357);
+    int16x8_t v2987 = vmlaq_n_s16(v2987_tmp, v2986, 3);
+    int16x8_t v2988 = vaddq_s16(v2985, v2987);
+    int16x8_t v2989 = vqrdmulhq_n_s16(v2988, 21637);
+    int16x8_t v2990 = vaddq_s16(v2984, v2989);
+    int16x8_t v2991 = vqrdmulhq_n_s16(v2990, 17479);
+    int16x8_t v2992 = vaddq_s16(v2980, v2991);
+    int16x8_t v2993 = vqrdmulhq_n_s16(v2992, 16647);
+    int16x8_t v2994 = vaddq_s16(v2970, v2993);
+    int16x8_t v2995 = vsubq_s16(v25, v60);
+    int16x8_t v2996 = vsubq_s16(v102, v138);
+    int16x8_t v2997_tmp = vqrdmulhq_n_s16(v2996, 6226);
+    int16x8_t v2997 = vmlaq_n_s16(v2997_tmp, v2996, 10);
+    int16x8_t v2998 = vaddq_s16(v2995, v2997);
+    int16x8_t v2999 = vsubq_s16(v182, v233);
+    int16x8_t v3000 = vsubq_s16(v275, v312);
+    int16x8_t v3001_tmp = vqrdmulhq_n_s16(v3000, 6226);
+    int16x8_t v3001 = vmlaq_n_s16(v3001_tmp, v3000, 10);
+    int16x8_t v3002 = vaddq_s16(v2999, v3001);
+    int16x8_t v3003 = vqrdmulhq_n_s16(v3002, 22622);
+    int16x8_t v3004 = vaddq_s16(v2998, v3003);
+    int16x8_t v3005 = vsubq_s16(v358, v409);
+    int16x8_t v3006 = vsubq_s16(v481, v519);
+    int16x8_t v3007_tmp = vqrdmulhq_n_s16(v3006, 6226);
+    int16x8_t v3007 = vmlaq_n_s16(v3007_tmp, v3006, 10);
+    int16x8_t v3008 = vaddq_s16(v3005, v3007);
+    int16x8_t v3009 = vsubq_s16(v563, v614);
+    int16x8_t v3010 = vsubq_s16(v656, v694);
+    int16x8_t v3011_tmp = vqrdmulhq_n_s16(v3010, 6226);
+    int16x8_t v3011 = vmlaq_n_s16(v3011_tmp, v3010, 10);
+    int16x8_t v3012 = vaddq_s16(v3009, v3011);
+    int16x8_t v3013 = vqrdmulhq_n_s16(v3012, 22622);
+    int16x8_t v3014 = vaddq_s16(v3008, v3013);
+    int16x8_t v3015 = vqrdmulhq_n_s16(v3014, 17646);
+    int16x8_t v3016 = vaddq_s16(v3004, v3015);
+    int16x8_t v3017 = vsubq_s16(v742, v793);
+    int16x8_t v3018 = vsubq_s16(v865, v903);
+    int16x8_t v3019_tmp = vqrdmulhq_n_s16(v3018, 6226);
+    int16x8_t v3019 = vmlaq_n_s16(v3019_tmp, v3018, 10);
+    int16x8_t v3020 = vaddq_s16(v3017, v3019);
+    int16x8_t v3021 = vsubq_s16(v977, v1060);
+    int16x8_t v3022 = vsubq_s16(v1102, v1141);
+    int16x8_t v3023_tmp = vqrdmulhq_n_s16(v3022, 6226);
+    int16x8_t v3023 = vmlaq_n_s16(v3023_tmp, v3022, 10);
+    int16x8_t v3024 = vaddq_s16(v3021, v3023);
+    int16x8_t v3025 = vqrdmulhq_n_s16(v3024, 22622);
+    int16x8_t v3026 = vaddq_s16(v3020, v3025);
+    int16x8_t v3027 = vsubq_s16(v1187, v1238);
+    int16x8_t v3028 = vsubq_s16(v1310, v1348);
+    int16x8_t v3029_tmp = vqrdmulhq_n_s16(v3028, 6226);
+    int16x8_t v3029 = vmlaq_n_s16(v3029_tmp, v3028, 10);
+    int16x8_t v3030 = vaddq_s16(v3027, v3029);
+    int16x8_t v3031 = vsubq_s16(v1392, v1443);
+    int16x8_t v3032 = vsubq_s16(v1485, v1524);
+    int16x8_t v3033_tmp = vqrdmulhq_n_s16(v3032, 6226);
+    int16x8_t v3033 = vmlaq_n_s16(v3033_tmp, v3032, 10);
+    int16x8_t v3034 = vaddq_s16(v3031, v3033);
+    int16x8_t v3035 = vqrdmulhq_n_s16(v3034, 22622);
+    int16x8_t v3036 = vaddq_s16(v3030, v3035);
+    int16x8_t v3037 = vqrdmulhq_n_s16(v3036, 17646);
+    int16x8_t v3038 = vaddq_s16(v3026, v3037);
+    int16x8_t v3039 = vqrdmulhq_n_s16(v3038, 16685);
+    int16x8_t v3040 = vaddq_s16(v3016, v3039);
+    int16x8_t v3041 = vsubq_s16(v2995, v2997);
+    int16x8_t v3042 = vsubq_s16(v2999, v3001);
+    int16x8_t v3043 = vqrdmulhq_n_s16(v3042, 23761);
+    int16x8_t v3044 = vaddq_s16(v3041, v3043);
+    int16x8_t v3045 = vsubq_s16(v3005, v3007);
+    int16x8_t v3046 = vsubq_s16(v3009, v3011);
+    int16x8_t v3047 = vqrdmulhq_n_s16(v3046, 23761);
+    int16x8_t v3048 = vaddq_s16(v3045, v3047);
+    int16x8_t v3049 = vqrdmulhq_n_s16(v3048, 17826);
+    int16x8_t v3050 = vaddq_s16(v3044, v3049);
+    int16x8_t v3051 = vsubq_s16(v3017, v3019);
+    int16x8_t v3052 = vsubq_s16(v3021, v3023);
+    int16x8_t v3053 = vqrdmulhq_n_s16(v3052, 23761);
+    int16x8_t v3054 = vaddq_s16(v3051, v3053);
+    int16x8_t v3055 = vsubq_s16(v3027, v3029);
+    int16x8_t v3056 = vsubq_s16(v3031, v3033);
+    int16x8_t v3057 = vqrdmulhq_n_s16(v3056, 23761);
+    int16x8_t v3058 = vaddq_s16(v3055, v3057);
+    int16x8_t v3059 = vqrdmulhq_n_s16(v3058, 17826);
+    int16x8_t v3060 = vaddq_s16(v3054, v3059);
+    int16x8_t v3061 = vqrdmulhq_n_s16(v3060, 16726);
+    int16x8_t v3062 = vaddq_s16(v3050, v3061);
+    int16x8_t v3063 = vsubq_s16(v2949, v2951);
+    int16x8_t v3064 = vsubq_s16(v2953, v2955);
+    int16x8_t v3065 = vqrdmulhq_n_s16(v3064, 25084);
+    int16x8_t v3066 = vaddq_s16(v3063, v3065);
+    int16x8_t v3067 = vsubq_s16(v2959, v2961);
+    int16x8_t v3068 = vsubq_s16(v2963, v2965);
+    int16x8_t v3069 = vqrdmulhq_n_s16(v3068, 25084);
+    int16x8_t v3070 = vaddq_s16(v3067, v3069);
+    int16x8_t v3071 = vqrdmulhq_n_s16(v3070, 18021);
+    int16x8_t v3072 = vaddq_s16(v3066, v3071);
+    int16x8_t v3073 = vsubq_s16(v2971, v2973);
+    int16x8_t v3074 = vsubq_s16(v2975, v2977);
+    int16x8_t v3075 = vqrdmulhq_n_s16(v3074, 25084);
+    int16x8_t v3076 = vaddq_s16(v3073, v3075);
+    int16x8_t v3077 = vsubq_s16(v2981, v2983);
+    int16x8_t v3078 = vsubq_s16(v2985, v2987);
+    int16x8_t v3079 = vqrdmulhq_n_s16(v3078, 25084);
+    int16x8_t v3080 = vaddq_s16(v3077, v3079);
+    int16x8_t v3081 = vqrdmulhq_n_s16(v3080, 18021);
+    int16x8_t v3082 = vaddq_s16(v3076, v3081);
+    int16x8_t v3083 = vqrdmulhq_n_s16(v3082, 16769);
+    int16x8_t v3084 = vaddq_s16(v3072, v3083);
+    int16x8_t v3085 = vsubq_s16(v2903, v2905);
+    int16x8_t v3086 = vsubq_s16(v2907, v2909);
+    int16x8_t v3087 = vqrdmulhq_n_s16(v3086, 26631);
+    int16x8_t v3088 = vaddq_s16(v3085, v3087);
+    int16x8_t v3089 = vsubq_s16(v2913, v2915);
+    int16x8_t v3090 = vsubq_s16(v2917, v2919);
+    int16x8_t v3091 = vqrdmulhq_n_s16(v3090, 26631);
+    int16x8_t v3092 = vaddq_s16(v3089, v3091);
+    int16x8_t v3093 = vqrdmulhq_n_s16(v3092, 18231);
+    int16x8_t v3094 = vaddq_s16(v3088, v3093);
+    int16x8_t v3095 = vsubq_s16(v2925, v2927);
+    int16x8_t v3096 = vsubq_s16(v2929, v2931);
+    int16x8_t v3097 = vqrdmulhq_n_s16(v3096, 26631);
+    int16x8_t v3098 = vaddq_s16(v3095, v3097);
+    int16x8_t v3099 = vsubq_s16(v2935, v2937);
+    int16x8_t v3100 = vsubq_s16(v2939, v2941);
+    int16x8_t v3101 = vqrdmulhq_n_s16(v3100, 26631);
+    int16x8_t v3102 = vaddq_s16(v3099, v3101);
+    int16x8_t v3103 = vqrdmulhq_n_s16(v3102, 18231);
+    int16x8_t v3104 = vaddq_s16(v3098, v3103);
+    int16x8_t v3105 = vqrdmulhq_n_s16(v3104, 16815);
+    int16x8_t v3106 = vaddq_s16(v3094, v3105);
+    int16x8_t v3107 = vsubq_s16(v2857, v2859);
+    int16x8_t v3108 = vsubq_s16(v2861, v2863);
+    int16x8_t v3109 = vqrdmulhq_n_s16(v3108, 28454);
+    int16x8_t v3110 = vaddq_s16(v3107, v3109);
+    int16x8_t v3111 = vsubq_s16(v2867, v2869);
+    int16x8_t v3112 = vsubq_s16(v2871, v2873);
+    int16x8_t v3113 = vqrdmulhq_n_s16(v3112, 28454);
+    int16x8_t v3114 = vaddq_s16(v3111, v3113);
+    int16x8_t v3115 = vqrdmulhq_n_s16(v3114, 18458);
+    int16x8_t v3116 = vaddq_s16(v3110, v3115);
+    int16x8_t v3117 = vsubq_s16(v2879, v2881);
+    int16x8_t v3118 = vsubq_s16(v2883, v2885);
+    int16x8_t v3119 = vqrdmulhq_n_s16(v3118, 28454);
+    int16x8_t v3120 = vaddq_s16(v3117, v3119);
+    int16x8_t v3121 = vsubq_s16(v2889, v2891);
+    int16x8_t v3122 = vsubq_s16(v2893, v2895);
+    int16x8_t v3123 = vqrdmulhq_n_s16(v3122, 28454);
+    int16x8_t v3124 = vaddq_s16(v3121, v3123);
+    int16x8_t v3125 = vqrdmulhq_n_s16(v3124, 18458);
+    int16x8_t v3126 = vaddq_s16(v3120, v3125);
+    int16x8_t v3127 = vqrdmulhq_n_s16(v3126, 16865);
+    int16x8_t v3128 = vaddq_s16(v3116, v3127);
+    int16x8_t v3129 = vsubq_s16(v2811, v2813);
+    int16x8_t v3130 = vsubq_s16(v2815, v2817);
+    int16x8_t v3131 = vqrdmulhq_n_s16(v3130, 30624);
+    int16x8_t v3132 = vaddq_s16(v3129, v3131);
+    int16x8_t v3133 = vsubq_s16(v2821, v2823);
+    int16x8_t v3134 = vsubq_s16(v2825, v2827);
+    int16x8_t v3135 = vqrdmulhq_n_s16(v3134, 30624);
+    int16x8_t v3136 = vaddq_s16(v3133, v3135);
+    int16x8_t v3137 = vqrdmulhq_n_s16(v3136, 18702);
+    int16x8_t v3138 = vaddq_s16(v3132, v3137);
+    int16x8_t v3139 = vsubq_s16(v2833, v2835);
+    int16x8_t v3140 = vsubq_s16(v2837, v2839);
+    int16x8_t v3141 = vqrdmulhq_n_s16(v3140, 30624);
+    int16x8_t v3142 = vaddq_s16(v3139, v3141);
+    int16x8_t v3143 = vsubq_s16(v2843, v2845);
+    int16x8_t v3144 = vsubq_s16(v2847, v2849);
+    int16x8_t v3145 = vqrdmulhq_n_s16(v3144, 30624);
+    int16x8_t v3146 = vaddq_s16(v3143, v3145);
+    int16x8_t v3147 = vqrdmulhq_n_s16(v3146, 18702);
+    int16x8_t v3148 = vaddq_s16(v3142, v3147);
+    int16x8_t v3149 = vqrdmulhq_n_s16(v3148, 16916);
+    int16x8_t v3150 = vaddq_s16(v3138, v3149);
+    int16x8_t v3151 = vsubq_s16(v2765, v2767);
+    int16x8_t v3152 = vsubq_s16(v2769, v2771);
+    int16x8_t v3153_tmp = vqrdmulhq_n_s16(v3152, 472);
+    int16x8_t v3153 = vaddq_s16(v3153_tmp, v3152);
+    int16x8_t v3154 = vaddq_s16(v3151, v3153);
+    int16x8_t v3155 = vsubq_s16(v2775, v2777);
+    int16x8_t v3156 = vsubq_s16(v2779, v2781);
+    int16x8_t v3157_tmp = vqrdmulhq_n_s16(v3156, 472);
+    int16x8_t v3157 = vaddq_s16(v3157_tmp, v3156);
+    int16x8_t v3158 = vaddq_s16(v3155, v3157);
+    int16x8_t v3159 = vqrdmulhq_n_s16(v3158, 18964);
+    int16x8_t v3160 = vaddq_s16(v3154, v3159);
+    int16x8_t v3161 = vsubq_s16(v2787, v2789);
+    int16x8_t v3162 = vsubq_s16(v2791, v2793);
+    int16x8_t v3163_tmp = vqrdmulhq_n_s16(v3162, 472);
+    int16x8_t v3163 = vaddq_s16(v3163_tmp, v3162);
+    int16x8_t v3164 = vaddq_s16(v3161, v3163);
+    int16x8_t v3165 = vsubq_s16(v2797, v2799);
+    int16x8_t v3166 = vsubq_s16(v2801, v2803);
+    int16x8_t v3167_tmp = vqrdmulhq_n_s16(v3166, 472);
+    int16x8_t v3167 = vaddq_s16(v3167_tmp, v3166);
+    int16x8_t v3168 = vaddq_s16(v3165, v3167);
+    int16x8_t v3169 = vqrdmulhq_n_s16(v3168, 18964);
+    int16x8_t v3170 = vaddq_s16(v3164, v3169);
+    int16x8_t v3171 = vqrdmulhq_n_s16(v3170, 16971);
+    int16x8_t v3172 = vaddq_s16(v3160, v3171);
+    int16x8_t v3173 = vsubq_s16(v2719, v2721);
+    int16x8_t v3174 = vsubq_s16(v2723, v2725);
+    int16x8_t v3175_tmp = vqrdmulhq_n_s16(v3174, 3672);
+    int16x8_t v3175 = vaddq_s16(v3175_tmp, v3174);
+    int16x8_t v3176 = vaddq_s16(v3173, v3175);
+    int16x8_t v3177 = vsubq_s16(v2729, v2731);
+    int16x8_t v3178 = vsubq_s16(v2733, v2735);
+    int16x8_t v3179_tmp = vqrdmulhq_n_s16(v3178, 3672);
+    int16x8_t v3179 = vaddq_s16(v3179_tmp, v3178);
+    int16x8_t v3180 = vaddq_s16(v3177, v3179);
+    int16x8_t v3181 = vqrdmulhq_n_s16(v3180, 19245);
+    int16x8_t v3182 = vaddq_s16(v3176, v3181);
+    int16x8_t v3183 = vsubq_s16(v2741, v2743);
+    int16x8_t v3184 = vsubq_s16(v2745, v2747);
+    int16x8_t v3185_tmp = vqrdmulhq_n_s16(v3184, 3672);
+    int16x8_t v3185 = vaddq_s16(v3185_tmp, v3184);
+    int16x8_t v3186 = vaddq_s16(v3183, v3185);
+    int16x8_t v3187 = vsubq_s16(v2751, v2753);
+    int16x8_t v3188 = vsubq_s16(v2755, v2757);
+    int16x8_t v3189_tmp = vqrdmulhq_n_s16(v3188, 3672);
+    int16x8_t v3189 = vaddq_s16(v3189_tmp, v3188);
+    int16x8_t v3190 = vaddq_s16(v3187, v3189);
+    int16x8_t v3191 = vqrdmulhq_n_s16(v3190, 19245);
+    int16x8_t v3192 = vaddq_s16(v3186, v3191);
+    int16x8_t v3193 = vqrdmulhq_n_s16(v3192, 17029);
+    int16x8_t v3194 = vaddq_s16(v3182, v3193);
+    int16x8_t v3195 = vsubq_s16(v2673, v2675);
+    int16x8_t v3196 = vsubq_s16(v2677, v2679);
+    int16x8_t v3197_tmp = vqrdmulhq_n_s16(v3196, 7662);
+    int16x8_t v3197 = vaddq_s16(v3197_tmp, v3196);
+    int16x8_t v3198 = vaddq_s16(v3195, v3197);
+    int16x8_t v3199 = vsubq_s16(v2683, v2685);
+    int16x8_t v3200 = vsubq_s16(v2687, v2689);
+    int16x8_t v3201_tmp = vqrdmulhq_n_s16(v3200, 7662);
+    int16x8_t v3201 = vaddq_s16(v3201_tmp, v3200);
+    int16x8_t v3202 = vaddq_s16(v3199, v3201);
+    int16x8_t v3203 = vqrdmulhq_n_s16(v3202, 19546);
+    int16x8_t v3204 = vaddq_s16(v3198, v3203);
+    int16x8_t v3205 = vsubq_s16(v2695, v2697);
+    int16x8_t v3206 = vsubq_s16(v2699, v2701);
+    int16x8_t v3207_tmp = vqrdmulhq_n_s16(v3206, 7662);
+    int16x8_t v3207 = vaddq_s16(v3207_tmp, v3206);
+    int16x8_t v3208 = vaddq_s16(v3205, v3207);
+    int16x8_t v3209 = vsubq_s16(v2705, v2707);
+    int16x8_t v3210 = vsubq_s16(v2709, v2711);
+    int16x8_t v3211_tmp = vqrdmulhq_n_s16(v3210, 7662);
+    int16x8_t v3211 = vaddq_s16(v3211_tmp, v3210);
+    int16x8_t v3212 = vaddq_s16(v3209, v3211);
+    int16x8_t v3213 = vqrdmulhq_n_s16(v3212, 19546);
+    int16x8_t v3214 = vaddq_s16(v3208, v3213);
+    int16x8_t v3215 = vqrdmulhq_n_s16(v3214, 17090);
+    int16x8_t v3216 = vaddq_s16(v3204, v3215);
+    int16x8_t v3217 = vsubq_s16(v2582, v2587);
+    int16x8_t v3218 = vsubq_s16(v2592, v2597);
+    int16x8_t v3219_tmp = vqrdmulhq_n_s16(v3218, 12756);
+    int16x8_t v3219 = vaddq_s16(v3219_tmp, v3218);
+    int16x8_t v3220 = vaddq_s16(v3217, v3219);
+    int16x8_t v3221 = vsubq_s16(v2604, v2609);
+    int16x8_t v3222 = vsubq_s16(v2614, v2619);
+    int16x8_t v3223_tmp = vqrdmulhq_n_s16(v3222, 12756);
+    int16x8_t v3223 = vaddq_s16(v3223_tmp, v3222);
+    int16x8_t v3224 = vaddq_s16(v3221, v3223);
+    int16x8_t v3225 = vqrdmulhq_n_s16(v3224, 19869);
+    int16x8_t v3226 = vaddq_s16(v3220, v3225);
+    int16x8_t v3227 = vsubq_s16(v2628, v2633);
+    int16x8_t v3228 = vsubq_s16(v2638, v2643);
+    int16x8_t v3229_tmp = vqrdmulhq_n_s16(v3228, 12756);
+    int16x8_t v3229 = vaddq_s16(v3229_tmp, v3228);
+    int16x8_t v3230 = vaddq_s16(v3227, v3229);
+    int16x8_t v3231 = vsubq_s16(v2650, v2655);
+    int16x8_t v3232 = vsubq_s16(v2660, v2665);
+    int16x8_t v3233_tmp = vqrdmulhq_n_s16(v3232, 12756);
+    int16x8_t v3233 = vaddq_s16(v3233_tmp, v3232);
+    int16x8_t v3234 = vaddq_s16(v3231, v3233);
+    int16x8_t v3235 = vqrdmulhq_n_s16(v3234, 19869);
+    int16x8_t v3236 = vaddq_s16(v3230, v3235);
+    int16x8_t v3237 = vqrdmulhq_n_s16(v3236, 17153);
+    int16x8_t v3238 = vaddq_s16(v3226, v3237);
+    int16x8_t v3239 = vsubq_s16(v2488, v2493);
+    int16x8_t v3240 = vsubq_s16(v2498, v2503);
+    int16x8_t v3241_tmp = vqrdmulhq_n_s16(v3240, 19463);
+    int16x8_t v3241 = vaddq_s16(v3241_tmp, v3240);
+    int16x8_t v3242 = vaddq_s16(v3239, v3241);
+    int16x8_t v3243 = vsubq_s16(v2510, v2515);
+    int16x8_t v3244 = vsubq_s16(v2520, v2525);
+    int16x8_t v3245_tmp = vqrdmulhq_n_s16(v3244, 19463);
+    int16x8_t v3245 = vaddq_s16(v3245_tmp, v3244);
+    int16x8_t v3246 = vaddq_s16(v3243, v3245);
+    int16x8_t v3247 = vqrdmulhq_n_s16(v3246, 20216);
+    int16x8_t v3248 = vaddq_s16(v3242, v3247);
+    int16x8_t v3249 = vsubq_s16(v2534, v2539);
+    int16x8_t v3250 = vsubq_s16(v2544, v2549);
+    int16x8_t v3251_tmp = vqrdmulhq_n_s16(v3250, 19463);
+    int16x8_t v3251 = vaddq_s16(v3251_tmp, v3250);
+    int16x8_t v3252 = vaddq_s16(v3249, v3251);
+    int16x8_t v3253 = vsubq_s16(v2556, v2561);
+    int16x8_t v3254 = vsubq_s16(v2566, v2571);
+    int16x8_t v3255_tmp = vqrdmulhq_n_s16(v3254, 19463);
+    int16x8_t v3255 = vaddq_s16(v3255_tmp, v3254);
+    int16x8_t v3256 = vaddq_s16(v3253, v3255);
+    int16x8_t v3257 = vqrdmulhq_n_s16(v3256, 20216);
+    int16x8_t v3258 = vaddq_s16(v3252, v3257);
+    int16x8_t v3259 = vqrdmulhq_n_s16(v3258, 17220);
+    int16x8_t v3260 = vaddq_s16(v3248, v3259);
+    int16x8_t v3261 = vsubq_s16(v2393, v2398);
+    int16x8_t v3262 = vsubq_s16(v2403, v2408);
+    int16x8_t v3263_tmp = vqrdmulhq_n_s16(v3262, 28661);
+    int16x8_t v3263 = vaddq_s16(v3263_tmp, v3262);
+    int16x8_t v3264 = vaddq_s16(v3261, v3263);
+    int16x8_t v3265 = vsubq_s16(v2415, v2420);
+    int16x8_t v3266 = vsubq_s16(v2425, v2430);
+    int16x8_t v3267_tmp = vqrdmulhq_n_s16(v3266, 28661);
+    int16x8_t v3267 = vaddq_s16(v3267_tmp, v3266);
+    int16x8_t v3268 = vaddq_s16(v3265, v3267);
+    int16x8_t v3269 = vqrdmulhq_n_s16(v3268, 20587);
+    int16x8_t v3270 = vaddq_s16(v3264, v3269);
+    int16x8_t v3271 = vsubq_s16(v2439, v2444);
+    int16x8_t v3272 = vsubq_s16(v2449, v2454);
+    int16x8_t v3273_tmp = vqrdmulhq_n_s16(v3272, 28661);
+    int16x8_t v3273 = vaddq_s16(v3273_tmp, v3272);
+    int16x8_t v3274 = vaddq_s16(v3271, v3273);
+    int16x8_t v3275 = vsubq_s16(v2461, v2467);
+    int16x8_t v3276 = vsubq_s16(v2472, v2477);
+    int16x8_t v3277_tmp = vqrdmulhq_n_s16(v3276, 28661);
+    int16x8_t v3277 = vaddq_s16(v3277_tmp, v3276);
+    int16x8_t v3278 = vaddq_s16(v3275, v3277);
+    int16x8_t v3279 = vqrdmulhq_n_s16(v3278, 20587);
+    int16x8_t v3280 = vaddq_s16(v3274, v3279);
+    int16x8_t v3281 = vqrdmulhq_n_s16(v3280, 17290);
+    int16x8_t v3282 = vaddq_s16(v3270, v3281);
+    int16x8_t v3283 = vsubq_s16(v2299, v2304);
+    int16x8_t v3284 = vsubq_s16(v2309, v2314);
+    int16x8_t v3285_tmp = vqrdmulhq_n_s16(v3284, 9242);
+    int16x8_t v3285 = vmlaq_n_s16(v3285_tmp, v3284, 2);
+    int16x8_t v3286 = vaddq_s16(v3283, v3285);
+    int16x8_t v3287 = vsubq_s16(v2321, v2326);
+    int16x8_t v3288 = vsubq_s16(v2331, v2336);
+    int16x8_t v3289_tmp = vqrdmulhq_n_s16(v3288, 9242);
+    int16x8_t v3289 = vmlaq_n_s16(v3289_tmp, v3288, 2);
+    int16x8_t v3290 = vaddq_s16(v3287, v3289);
+    int16x8_t v3291 = vqrdmulhq_n_s16(v3290, 20985);
+    int16x8_t v3292 = vaddq_s16(v3286, v3291);
+    int16x8_t v3293 = vsubq_s16(v2345, v2350);
+    int16x8_t v3294 = vsubq_s16(v2355, v2360);
+    int16x8_t v3295_tmp = vqrdmulhq_n_s16(v3294, 9242);
+    int16x8_t v3295 = vmlaq_n_s16(v3295_tmp, v3294, 2);
+    int16x8_t v3296 = vaddq_s16(v3293, v3295);
+    int16x8_t v3297 = vsubq_s16(v2367, v2372);
+    int16x8_t v3298 = vsubq_s16(v2377, v2382);
+    int16x8_t v3299_tmp = vqrdmulhq_n_s16(v3298, 9242);
+    int16x8_t v3299 = vmlaq_n_s16(v3299_tmp, v3298, 2);
+    int16x8_t v3300 = vaddq_s16(v3297, v3299);
+    int16x8_t v3301 = vqrdmulhq_n_s16(v3300, 20985);
+    int16x8_t v3302 = vaddq_s16(v3296, v3301);
+    int16x8_t v3303 = vqrdmulhq_n_s16(v3302, 17363);
+    int16x8_t v3304 = vaddq_s16(v3292, v3303);
+    int16x8_t v3305 = vsubq_s16(v2115, v2126);
+    int16x8_t v3306 = vsubq_s16(v2137, v2148);
+    int16x8_t v3307_tmp = vqrdmulhq_n_s16(v3306, 30298);
+    int16x8_t v3307 = vmlaq_n_s16(v3307_tmp, v3306, 2);
+    int16x8_t v3308 = vaddq_s16(v3305, v3307);
+    int16x8_t v3309 = vsubq_s16(v2161, v2172);
+    int16x8_t v3310 = vsubq_s16(v2183, v2194);
+    int16x8_t v3311_tmp = vqrdmulhq_n_s16(v3310, 30298);
+    int16x8_t v3311 = vmlaq_n_s16(v3311_tmp, v3310, 2);
+    int16x8_t v3312 = vaddq_s16(v3309, v3311);
+    int16x8_t v3313 = vqrdmulhq_n_s16(v3312, 21412);
+    int16x8_t v3314 = vaddq_s16(v3308, v3313);
+    int16x8_t v3315 = vsubq_s16(v2209, v2220);
+    int16x8_t v3316 = vsubq_s16(v2231, v2242);
+    int16x8_t v3317_tmp = vqrdmulhq_n_s16(v3316, 30298);
+    int16x8_t v3317 = vmlaq_n_s16(v3317_tmp, v3316, 2);
+    int16x8_t v3318 = vaddq_s16(v3315, v3317);
+    int16x8_t v3319 = vsubq_s16(v2255, v2266);
+    int16x8_t v3320 = vsubq_s16(v2277, v2288);
+    int16x8_t v3321_tmp = vqrdmulhq_n_s16(v3320, 30298);
+    int16x8_t v3321 = vmlaq_n_s16(v3321_tmp, v3320, 2);
+    int16x8_t v3322 = vaddq_s16(v3319, v3321);
+    int16x8_t v3323 = vqrdmulhq_n_s16(v3322, 21412);
+    int16x8_t v3324 = vaddq_s16(v3318, v3323);
+    int16x8_t v3325 = vqrdmulhq_n_s16(v3324, 17440);
+    int16x8_t v3326 = vaddq_s16(v3314, v3325);
+    int16x8_t v3327 = vsubq_s16(v1925, v1936);
+    int16x8_t v3328 = vsubq_s16(v1947, v1958);
+    int16x8_t v3329_tmp = vqrdmulhq_n_s16(v3328, 2773);
+    int16x8_t v3329 = vmlaq_n_s16(v3329_tmp, v3328, 4);
+    int16x8_t v3330 = vaddq_s16(v3327, v3329);
+    int16x8_t v3331 = vsubq_s16(v1971, v1982);
+    int16x8_t v3332 = vsubq_s16(v1993, v2004);
+    int16x8_t v3333_tmp = vqrdmulhq_n_s16(v3332, 2773);
+    int16x8_t v3333 = vmlaq_n_s16(v3333_tmp, v3332, 4);
+    int16x8_t v3334 = vaddq_s16(v3331, v3333);
+    int16x8_t v3335 = vqrdmulhq_n_s16(v3334, 21871);
+    int16x8_t v3336 = vaddq_s16(v3330, v3335);
+    int16x8_t v3337 = vsubq_s16(v2019, v2030);
+    int16x8_t v3338 = vsubq_s16(v2041, v2052);
+    int16x8_t v3339_tmp = vqrdmulhq_n_s16(v3338, 2773);
+    int16x8_t v3339 = vmlaq_n_s16(v3339_tmp, v3338, 4);
+    int16x8_t v3340 = vaddq_s16(v3337, v3339);
+    int16x8_t v3341 = vsubq_s16(v2065, v2076);
+    int16x8_t v3342 = vsubq_s16(v2087, v2098);
+    int16x8_t v3343_tmp = vqrdmulhq_n_s16(v3342, 2773);
+    int16x8_t v3343 = vmlaq_n_s16(v3343_tmp, v3342, 4);
+    int16x8_t v3344 = vaddq_s16(v3341, v3343);
+    int16x8_t v3345 = vqrdmulhq_n_s16(v3344, 21871);
+    int16x8_t v3346 = vaddq_s16(v3340, v3345);
+    int16x8_t v3347 = vqrdmulhq_n_s16(v3346, 17520);
+    int16x8_t v3348 = vaddq_s16(v3336, v3347);
+    int16x8_t v3349 = vsubq_s16(v1555, v1578);
+    int16x8_t v3350 = vsubq_s16(v1601, v1624);
+    int16x8_t v3351_tmp = vqrdmulhq_n_s16(v3350, 26108);
+    int16x8_t v3351 = vmlaq_n_s16(v3351_tmp, v3350, 6);
+    int16x8_t v3352 = vaddq_s16(v3349, v3351);
+    int16x8_t v3353 = vsubq_s16(v1649, v1672);
+    int16x8_t v3354 = vsubq_s16(v1695, v1718);
+    int16x8_t v3355_tmp = vqrdmulhq_n_s16(v3354, 26108);
+    int16x8_t v3355 = vmlaq_n_s16(v3355_tmp, v3354, 6);
+    int16x8_t v3356 = vaddq_s16(v3353, v3355);
+    int16x8_t v3357 = vqrdmulhq_n_s16(v3356, 22363);
+    int16x8_t v3358 = vaddq_s16(v3352, v3357);
+    int16x8_t v3359 = vsubq_s16(v1745, v1768);
+    int16x8_t v3360 = vsubq_s16(v1791, v1814);
+    int16x8_t v3361_tmp = vqrdmulhq_n_s16(v3360, 26108);
+    int16x8_t v3361 = vmlaq_n_s16(v3361_tmp, v3360, 6);
+    int16x8_t v3362 = vaddq_s16(v3359, v3361);
+    int16x8_t v3363 = vsubq_s16(v1839, v1862);
+    int16x8_t v3364 = vsubq_s16(v1885, v1908);
+    int16x8_t v3365_tmp = vqrdmulhq_n_s16(v3364, 26108);
+    int16x8_t v3365 = vmlaq_n_s16(v3365_tmp, v3364, 6);
+    int16x8_t v3366 = vaddq_s16(v3363, v3365);
+    int16x8_t v3367 = vqrdmulhq_n_s16(v3366, 22363);
+    int16x8_t v3368 = vaddq_s16(v3362, v3367);
+    int16x8_t v3369 = vqrdmulhq_n_s16(v3368, 17603);
+    int16x8_t v3370 = vaddq_s16(v3358, v3369);
+    int16x8_t v3371 = vsubq_s16(v61, v140);
+    int16x8_t v3372 = vsubq_s16(v234, v314);
+    int16x8_t v3373_tmp = vqrdmulhq_n_s16(v3372, 12251);
+    int16x8_t v3373 = vmlaq_n_s16(v3373_tmp, v3372, 20);
+    int16x8_t v3374 = vaddq_s16(v3371, v3373);
+    int16x8_t v3375 = vsubq_s16(v410, v521);
+    int16x8_t v3376 = vsubq_s16(v615, v696);
+    int16x8_t v3377_tmp = vqrdmulhq_n_s16(v3376, 12251);
+    int16x8_t v3377 = vmlaq_n_s16(v3377_tmp, v3376, 20);
+    int16x8_t v3378 = vaddq_s16(v3375, v3377);
+    int16x8_t v3379 = vqrdmulhq_n_s16(v3378, 22891);
+    int16x8_t v3380 = vaddq_s16(v3374, v3379);
+    int16x8_t v3381 = vsubq_s16(v794, v905);
+    int16x8_t v3382 = vsubq_s16(v1061, v1143);
+    int16x8_t v3383_tmp = vqrdmulhq_n_s16(v3382, 12251);
+    int16x8_t v3383 = vmlaq_n_s16(v3383_tmp, v3382, 20);
+    int16x8_t v3384 = vaddq_s16(v3381, v3383);
+    int16x8_t v3385 = vsubq_s16(v1239, v1350);
+    int16x8_t v3386 = vsubq_s16(v1444, v1526);
+    int16x8_t v3387_tmp = vqrdmulhq_n_s16(v3386, 12251);
+    int16x8_t v3387 = vmlaq_n_s16(v3387_tmp, v3386, 20);
+    int16x8_t v3388 = vaddq_s16(v3385, v3387);
+    int16x8_t v3389 = vqrdmulhq_n_s16(v3388, 22891);
+    int16x8_t v3390 = vaddq_s16(v3384, v3389);
+    int16x8_t v3391 = vqrdmulhq_n_s16(v3390, 17689);
+    int16x8_t v3392 = vaddq_s16(v3380, v3391);
+    int16x8_t v3393 = vsubq_s16(v3371, v3373);
+    int16x8_t v3394 = vsubq_s16(v3375, v3377);
+    int16x8_t v3395 = vqrdmulhq_n_s16(v3394, 23460);
+    int16x8_t v3396 = vaddq_s16(v3393, v3395);
+    int16x8_t v3397 = vsubq_s16(v3381, v3383);
+    int16x8_t v3398 = vsubq_s16(v3385, v3387);
+    int16x8_t v3399 = vqrdmulhq_n_s16(v3398, 23460);
+    int16x8_t v3400 = vaddq_s16(v3397, v3399);
+    int16x8_t v3401 = vqrdmulhq_n_s16(v3400, 17779);
+    int16x8_t v3402 = vaddq_s16(v3396, v3401);
+    int16x8_t v3403 = vsubq_s16(v3349, v3351);
+    int16x8_t v3404 = vsubq_s16(v3353, v3355);
+    int16x8_t v3405 = vqrdmulhq_n_s16(v3404, 24073);
+    int16x8_t v3406 = vaddq_s16(v3403, v3405);
+    int16x8_t v3407 = vsubq_s16(v3359, v3361);
+    int16x8_t v3408 = vsubq_s16(v3363, v3365);
+    int16x8_t v3409 = vqrdmulhq_n_s16(v3408, 24073);
+    int16x8_t v3410 = vaddq_s16(v3407, v3409);
+    int16x8_t v3411 = vqrdmulhq_n_s16(v3410, 17873);
+    int16x8_t v3412 = vaddq_s16(v3406, v3411);
+    int16x8_t v3413 = vsubq_s16(v3327, v3329);
+    int16x8_t v3414 = vsubq_s16(v3331, v3333);
+    int16x8_t v3415 = vqrdmulhq_n_s16(v3414, 24734);
+    int16x8_t v3416 = vaddq_s16(v3413, v3415);
+    int16x8_t v3417 = vsubq_s16(v3337, v3339);
+    int16x8_t v3418 = vsubq_s16(v3341, v3343);
+    int16x8_t v3419 = vqrdmulhq_n_s16(v3418, 24734);
+    int16x8_t v3420 = vaddq_s16(v3417, v3419);
+    int16x8_t v3421 = vqrdmulhq_n_s16(v3420, 17971);
+    int16x8_t v3422 = vaddq_s16(v3416, v3421);
+    int16x8_t v3423 = vsubq_s16(v3305, v3307);
+    int16x8_t v3424 = vsubq_s16(v3309, v3311);
+    int16x8_t v3425 = vqrdmulhq_n_s16(v3424, 25448);
+    int16x8_t v3426 = vaddq_s16(v3423, v3425);
+    int16x8_t v3427 = vsubq_s16(v3315, v3317);
+    int16x8_t v3428 = vsubq_s16(v3319, v3321);
+    int16x8_t v3429 = vqrdmulhq_n_s16(v3428, 25448);
+    int16x8_t v3430 = vaddq_s16(v3427, v3429);
+    int16x8_t v3431 = vqrdmulhq_n_s16(v3430, 18072);
+    int16x8_t v3432 = vaddq_s16(v3426, v3431);
+    int16x8_t v3433 = vsubq_s16(v3283, v3285);
+    int16x8_t v3434 = vsubq_s16(v3287, v3289);
+    int16x8_t v3435 = vqrdmulhq_n_s16(v3434, 26220);
+    int16x8_t v3436 = vaddq_s16(v3433, v3435);
+    int16x8_t v3437 = vsubq_s16(v3293, v3295);
+    int16x8_t v3438 = vsubq_s16(v3297, v3299);
+    int16x8_t v3439 = vqrdmulhq_n_s16(v3438, 26220);
+    int16x8_t v3440 = vaddq_s16(v3437, v3439);
+    int16x8_t v3441 = vqrdmulhq_n_s16(v3440, 18177);
+    int16x8_t v3442 = vaddq_s16(v3436, v3441);
+    int16x8_t v3443 = vsubq_s16(v3261, v3263);
+    int16x8_t v3444 = vsubq_s16(v3265, v3267);
+    int16x8_t v3445 = vqrdmulhq_n_s16(v3444, 27058);
+    int16x8_t v3446 = vaddq_s16(v3443, v3445);
+    int16x8_t v3447 = vsubq_s16(v3271, v3273);
+    int16x8_t v3448 = vsubq_s16(v3275, v3277);
+    int16x8_t v3449 = vqrdmulhq_n_s16(v3448, 27058);
+    int16x8_t v3450 = vaddq_s16(v3447, v3449);
+    int16x8_t v3451 = vqrdmulhq_n_s16(v3450, 18286);
+    int16x8_t v3452 = vaddq_s16(v3446, v3451);
+    int16x8_t v3453 = vsubq_s16(v3239, v3241);
+    int16x8_t v3454 = vsubq_s16(v3243, v3245);
+    int16x8_t v3455 = vqrdmulhq_n_s16(v3454, 27969);
+    int16x8_t v3456 = vaddq_s16(v3453, v3455);
+    int16x8_t v3457 = vsubq_s16(v3249, v3251);
+    int16x8_t v3458 = vsubq_s16(v3253, v3255);
+    int16x8_t v3459 = vqrdmulhq_n_s16(v3458, 27969);
+    int16x8_t v3460 = vaddq_s16(v3457, v3459);
+    int16x8_t v3461 = vqrdmulhq_n_s16(v3460, 18400);
+    int16x8_t v3462 = vaddq_s16(v3456, v3461);
+    int16x8_t v3463 = vsubq_s16(v3217, v3219);
+    int16x8_t v3464 = vsubq_s16(v3221, v3223);
+    int16x8_t v3465 = vqrdmulhq_n_s16(v3464, 28961);
+    int16x8_t v3466 = vaddq_s16(v3463, v3465);
+    int16x8_t v3467 = vsubq_s16(v3227, v3229);
+    int16x8_t v3468 = vsubq_s16(v3231, v3233);
+    int16x8_t v3469 = vqrdmulhq_n_s16(v3468, 28961);
+    int16x8_t v3470 = vaddq_s16(v3467, v3469);
+    int16x8_t v3471 = vqrdmulhq_n_s16(v3470, 18517);
+    int16x8_t v3472 = vaddq_s16(v3466, v3471);
+    int16x8_t v3473 = vsubq_s16(v3195, v3197);
+    int16x8_t v3474 = vsubq_s16(v3199, v3201);
+    int16x8_t v3475 = vqrdmulhq_n_s16(v3474, 30044);
+    int16x8_t v3476 = vaddq_s16(v3473, v3475);
+    int16x8_t v3477 = vsubq_s16(v3205, v3207);
+    int16x8_t v3478 = vsubq_s16(v3209, v3211);
+    int16x8_t v3479 = vqrdmulhq_n_s16(v3478, 30044);
+    int16x8_t v3480 = vaddq_s16(v3477, v3479);
+    int16x8_t v3481 = vqrdmulhq_n_s16(v3480, 18639);
+    int16x8_t v3482 = vaddq_s16(v3476, v3481);
+    int16x8_t v3483 = vsubq_s16(v3173, v3175);
+    int16x8_t v3484 = vsubq_s16(v3177, v3179);
+    int16x8_t v3485 = vqrdmulhq_n_s16(v3484, 31232);
+    int16x8_t v3486 = vaddq_s16(v3483, v3485);
+    int16x8_t v3487 = vsubq_s16(v3183, v3185);
+    int16x8_t v3488 = vsubq_s16(v3187, v3189);
+    int16x8_t v3489 = vqrdmulhq_n_s16(v3488, 31232);
+    int16x8_t v3490 = vaddq_s16(v3487, v3489);
+    int16x8_t v3491 = vqrdmulhq_n_s16(v3490, 18765);
+    int16x8_t v3492 = vaddq_s16(v3486, v3491);
+    int16x8_t v3493 = vsubq_s16(v3151, v3153);
+    int16x8_t v3494 = vsubq_s16(v3155, v3157);
+    int16x8_t v3495 = vqrdmulhq_n_s16(v3494, 32538);
+    int16x8_t v3496 = vaddq_s16(v3493, v3495);
+    int16x8_t v3497 = vsubq_s16(v3161, v3163);
+    int16x8_t v3498 = vsubq_s16(v3165, v3167);
+    int16x8_t v3499 = vqrdmulhq_n_s16(v3498, 32538);
+    int16x8_t v3500 = vaddq_s16(v3497, v3499);
+    int16x8_t v3501 = vqrdmulhq_n_s16(v3500, 18896);
+    int16x8_t v3502 = vaddq_s16(v3496, v3501);
+    int16x8_t v3503 = vsubq_s16(v3129, v3131);
+    int16x8_t v3504 = vsubq_s16(v3133, v3135);
+    int16x8_t v3505_tmp = vqrdmulhq_n_s16(v3504, 1211);
+    int16x8_t v3505 = vaddq_s16(v3505_tmp, v3504);
+    int16x8_t v3506 = vaddq_s16(v3503, v3505);
+    int16x8_t v3507 = vsubq_s16(v3139, v3141);
+    int16x8_t v3508 = vsubq_s16(v3143, v3145);
+    int16x8_t v3509_tmp = vqrdmulhq_n_s16(v3508, 1211);
+    int16x8_t v3509 = vaddq_s16(v3509_tmp, v3508);
+    int16x8_t v3510 = vaddq_s16(v3507, v3509);
+    int16x8_t v3511 = vqrdmulhq_n_s16(v3510, 19032);
+    int16x8_t v3512 = vaddq_s16(v3506, v3511);
+    int16x8_t v3513 = vsubq_s16(v3107, v3109);
+    int16x8_t v3514 = vsubq_s16(v3111, v3113);
+    int16x8_t v3515_tmp = vqrdmulhq_n_s16(v3514, 2808);
+    int16x8_t v3515 = vaddq_s16(v3515_tmp, v3514);
+    int16x8_t v3516 = vaddq_s16(v3513, v3515);
+    int16x8_t v3517 = vsubq_s16(v3117, v3119);
+    int16x8_t v3518 = vsubq_s16(v3121, v3123);
+    int16x8_t v3519_tmp = vqrdmulhq_n_s16(v3518, 2808);
+    int16x8_t v3519 = vaddq_s16(v3519_tmp, v3518);
+    int16x8_t v3520 = vaddq_s16(v3517, v3519);
+    int16x8_t v3521 = vqrdmulhq_n_s16(v3520, 19172);
+    int16x8_t v3522 = vaddq_s16(v3516, v3521);
+    int16x8_t v3523 = vsubq_s16(v3085, v3087);
+    int16x8_t v3524 = vsubq_s16(v3089, v3091);
+    int16x8_t v3525_tmp = vqrdmulhq_n_s16(v3524, 4586);
+    int16x8_t v3525 = vaddq_s16(v3525_tmp, v3524);
+    int16x8_t v3526 = vaddq_s16(v3523, v3525);
+    int16x8_t v3527 = vsubq_s16(v3095, v3097);
+    int16x8_t v3528 = vsubq_s16(v3099, v3101);
+    int16x8_t v3529_tmp = vqrdmulhq_n_s16(v3528, 4586);
+    int16x8_t v3529 = vaddq_s16(v3529_tmp, v3528);
+    int16x8_t v3530 = vaddq_s16(v3527, v3529);
+    int16x8_t v3531 = vqrdmulhq_n_s16(v3530, 19318);
+    int16x8_t v3532 = vaddq_s16(v3526, v3531);
+    int16x8_t v3533 = vsubq_s16(v3063, v3065);
+    int16x8_t v3534 = vsubq_s16(v3067, v3069);
+    int16x8_t v3535_tmp = vqrdmulhq_n_s16(v3534, 6576);
+    int16x8_t v3535 = vaddq_s16(v3535_tmp, v3534);
+    int16x8_t v3536 = vaddq_s16(v3533, v3535);
+    int16x8_t v3537 = vsubq_s16(v3073, v3075);
+    int16x8_t v3538 = vsubq_s16(v3077, v3079);
+    int16x8_t v3539_tmp = vqrdmulhq_n_s16(v3538, 6576);
+    int16x8_t v3539 = vaddq_s16(v3539_tmp, v3538);
+    int16x8_t v3540 = vaddq_s16(v3537, v3539);
+    int16x8_t v3541 = vqrdmulhq_n_s16(v3540, 19469);
+    int16x8_t v3542 = vaddq_s16(v3536, v3541);
+    int16x8_t v3543 = vsubq_s16(v3041, v3043);
+    int16x8_t v3544 = vsubq_s16(v3045, v3047);
+    int16x8_t v3545_tmp = vqrdmulhq_n_s16(v3544, 8817);
+    int16x8_t v3545 = vaddq_s16(v3545_tmp, v3544);
+    int16x8_t v3546 = vaddq_s16(v3543, v3545);
+    int16x8_t v3547 = vsubq_s16(v3051, v3053);
+    int16x8_t v3548 = vsubq_s16(v3055, v3057);
+    int16x8_t v3549_tmp = vqrdmulhq_n_s16(v3548, 8817);
+    int16x8_t v3549 = vaddq_s16(v3549_tmp, v3548);
+    int16x8_t v3550 = vaddq_s16(v3547, v3549);
+    int16x8_t v3551 = vqrdmulhq_n_s16(v3550, 19625);
+    int16x8_t v3552 = vaddq_s16(v3546, v3551);
+    int16x8_t v3553 = vsubq_s16(v2998, v3003);
+    int16x8_t v3554 = vsubq_s16(v3008, v3013);
+    int16x8_t v3555_tmp = vqrdmulhq_n_s16(v3554, 11356);
+    int16x8_t v3555 = vaddq_s16(v3555_tmp, v3554);
+    int16x8_t v3556 = vaddq_s16(v3553, v3555);
+    int16x8_t v3557 = vsubq_s16(v3020, v3025);
+    int16x8_t v3558 = vsubq_s16(v3030, v3035);
+    int16x8_t v3559_tmp = vqrdmulhq_n_s16(v3558, 11356);
+    int16x8_t v3559 = vaddq_s16(v3559_tmp, v3558);
+    int16x8_t v3560 = vaddq_s16(v3557, v3559);
+    int16x8_t v3561 = vqrdmulhq_n_s16(v3560, 19786);
+    int16x8_t v3562 = vaddq_s16(v3556, v3561);
+    int16x8_t v3563 = vsubq_s16(v2952, v2957);
+    int16x8_t v3564 = vsubq_s16(v2962, v2967);
+    int16x8_t v3565_tmp = vqrdmulhq_n_s16(v3564, 14256);
+    int16x8_t v3565 = vaddq_s16(v3565_tmp, v3564);
+    int16x8_t v3566 = vaddq_s16(v3563, v3565);
+    int16x8_t v3567 = vsubq_s16(v2974, v2979);
+    int16x8_t v3568 = vsubq_s16(v2984, v2989);
+    int16x8_t v3569_tmp = vqrdmulhq_n_s16(v3568, 14256);
+    int16x8_t v3569 = vaddq_s16(v3569_tmp, v3568);
+    int16x8_t v3570 = vaddq_s16(v3567, v3569);
+    int16x8_t v3571 = vqrdmulhq_n_s16(v3570, 19954);
+    int16x8_t v3572 = vaddq_s16(v3566, v3571);
+    int16x8_t v3573 = vsubq_s16(v2906, v2911);
+    int16x8_t v3574 = vsubq_s16(v2916, v2921);
+    int16x8_t v3575_tmp = vqrdmulhq_n_s16(v3574, 17596);
+    int16x8_t v3575 = vaddq_s16(v3575_tmp, v3574);
+    int16x8_t v3576 = vaddq_s16(v3573, v3575);
+    int16x8_t v3577 = vsubq_s16(v2928, v2933);
+    int16x8_t v3578 = vsubq_s16(v2938, v2943);
+    int16x8_t v3579_tmp = vqrdmulhq_n_s16(v3578, 17596);
+    int16x8_t v3579 = vaddq_s16(v3579_tmp, v3578);
+    int16x8_t v3580 = vaddq_s16(v3577, v3579);
+    int16x8_t v3581 = vqrdmulhq_n_s16(v3580, 20127);
+    int16x8_t v3582 = vaddq_s16(v3576, v3581);
+    int16x8_t v3583 = vsubq_s16(v2860, v2865);
+    int16x8_t v3584 = vsubq_s16(v2870, v2875);
+    int16x8_t v3585_tmp = vqrdmulhq_n_s16(v3584, 21483);
+    int16x8_t v3585 = vaddq_s16(v3585_tmp, v3584);
+    int16x8_t v3586 = vaddq_s16(v3583, v3585);
+    int16x8_t v3587 = vsubq_s16(v2882, v2887);
+    int16x8_t v3588 = vsubq_s16(v2892, v2897);
+    int16x8_t v3589_tmp = vqrdmulhq_n_s16(v3588, 21483);
+    int16x8_t v3589 = vaddq_s16(v3589_tmp, v3588);
+    int16x8_t v3590 = vaddq_s16(v3587, v3589);
+    int16x8_t v3591 = vqrdmulhq_n_s16(v3590, 20306);
+    int16x8_t v3592 = vaddq_s16(v3586, v3591);
+    int16x8_t v3593 = vsubq_s16(v2814, v2819);
+    int16x8_t v3594 = vsubq_s16(v2824, v2829);
+    int16x8_t v3595_tmp = vqrdmulhq_n_s16(v3594, 26057);
+    int16x8_t v3595 = vaddq_s16(v3595_tmp, v3594);
+    int16x8_t v3596 = vaddq_s16(v3593, v3595);
+    int16x8_t v3597 = vsubq_s16(v2836, v2841);
+    int16x8_t v3598 = vsubq_s16(v2846, v2851);
+    int16x8_t v3599_tmp = vqrdmulhq_n_s16(v3598, 26057);
+    int16x8_t v3599 = vaddq_s16(v3599_tmp, v3598);
+    int16x8_t v3600 = vaddq_s16(v3597, v3599);
+    int16x8_t v3601 = vqrdmulhq_n_s16(v3600, 20492);
+    int16x8_t v3602 = vaddq_s16(v3596, v3601);
+    int16x8_t v3603 = vsubq_s16(v2768, v2773);
+    int16x8_t v3604 = vsubq_s16(v2778, v2783);
+    int16x8_t v3605_tmp = vqrdmulhq_n_s16(v3604, 31517);
+    int16x8_t v3605 = vaddq_s16(v3605_tmp, v3604);
+    int16x8_t v3606 = vaddq_s16(v3603, v3605);
+    int16x8_t v3607 = vsubq_s16(v2790, v2795);
+    int16x8_t v3608 = vsubq_s16(v2800, v2805);
+    int16x8_t v3609_tmp = vqrdmulhq_n_s16(v3608, 31517);
+    int16x8_t v3609 = vaddq_s16(v3609_tmp, v3608);
+    int16x8_t v3610 = vaddq_s16(v3607, v3609);
+    int16x8_t v3611 = vqrdmulhq_n_s16(v3610, 20684);
+    int16x8_t v3612 = vaddq_s16(v3606, v3611);
+    int16x8_t v3613 = vsubq_s16(v2722, v2727);
+    int16x8_t v3614 = vsubq_s16(v2732, v2737);
+    int16x8_t v3615_tmp = vqrdmulhq_n_s16(v3614, 5373);
+    int16x8_t v3615 = vmlaq_n_s16(v3615_tmp, v3614, 2);
+    int16x8_t v3616 = vaddq_s16(v3613, v3615);
+    int16x8_t v3617 = vsubq_s16(v2744, v2749);
+    int16x8_t v3618 = vsubq_s16(v2754, v2759);
+    int16x8_t v3619_tmp = vqrdmulhq_n_s16(v3618, 5373);
+    int16x8_t v3619 = vmlaq_n_s16(v3619_tmp, v3618, 2);
+    int16x8_t v3620 = vaddq_s16(v3617, v3619);
+    int16x8_t v3621 = vqrdmulhq_n_s16(v3620, 20883);
+    int16x8_t v3622 = vaddq_s16(v3616, v3621);
+    int16x8_t v3623 = vsubq_s16(v2676, v2681);
+    int16x8_t v3624 = vsubq_s16(v2686, v2691);
+    int16x8_t v3625_tmp = vqrdmulhq_n_s16(v3624, 13571);
+    int16x8_t v3625 = vmlaq_n_s16(v3625_tmp, v3624, 2);
+    int16x8_t v3626 = vaddq_s16(v3623, v3625);
+    int16x8_t v3627 = vsubq_s16(v2698, v2703);
+    int16x8_t v3628 = vsubq_s16(v2708, v2713);
+    int16x8_t v3629_tmp = vqrdmulhq_n_s16(v3628, 13571);
+    int16x8_t v3629 = vmlaq_n_s16(v3629_tmp, v3628, 2);
+    int16x8_t v3630 = vaddq_s16(v3627, v3629);
+    int16x8_t v3631 = vqrdmulhq_n_s16(v3630, 21089);
+    int16x8_t v3632 = vaddq_s16(v3626, v3631);
+    int16x8_t v3633 = vsubq_s16(v2588, v2599);
+    int16x8_t v3634 = vsubq_s16(v2610, v2621);
+    int16x8_t v3635_tmp = vqrdmulhq_n_s16(v3634, 23975);
+    int16x8_t v3635 = vmlaq_n_s16(v3635_tmp, v3634, 2);
+    int16x8_t v3636 = vaddq_s16(v3633, v3635);
+    int16x8_t v3637 = vsubq_s16(v2634, v2645);
+    int16x8_t v3638 = vsubq_s16(v2656, v2667);
+    int16x8_t v3639_tmp = vqrdmulhq_n_s16(v3638, 23975);
+    int16x8_t v3639 = vmlaq_n_s16(v3639_tmp, v3638, 2);
+    int16x8_t v3640 = vaddq_s16(v3637, v3639);
+    int16x8_t v3641 = vqrdmulhq_n_s16(v3640, 21303);
+    int16x8_t v3642 = vaddq_s16(v3636, v3641);
+    int16x8_t v3643 = vsubq_s16(v2494, v2505);
+    int16x8_t v3644 = vsubq_s16(v2516, v2527);
+    int16x8_t v3645_tmp = vqrdmulhq_n_s16(v3644, 4832);
+    int16x8_t v3645 = vmlaq_n_s16(v3645_tmp, v3644, 3);
+    int16x8_t v3646 = vaddq_s16(v3643, v3645);
+    int16x8_t v3647 = vsubq_s16(v2540, v2551);
+    int16x8_t v3648 = vsubq_s16(v2562, v2573);
+    int16x8_t v3649_tmp = vqrdmulhq_n_s16(v3648, 4832);
+    int16x8_t v3649 = vmlaq_n_s16(v3649_tmp, v3648, 3);
+    int16x8_t v3650 = vaddq_s16(v3647, v3649);
+    int16x8_t v3651 = vqrdmulhq_n_s16(v3650, 21524);
+    int16x8_t v3652 = vaddq_s16(v3646, v3651);
+    int16x8_t v3653 = vsubq_s16(v2399, v2410);
+    int16x8_t v3654 = vsubq_s16(v2421, v2432);
+    int16x8_t v3655_tmp = vqrdmulhq_n_s16(v3654, 23437);
+    int16x8_t v3655 = vmlaq_n_s16(v3655_tmp, v3654, 3);
+    int16x8_t v3656 = vaddq_s16(v3653, v3655);
+    int16x8_t v3657 = vsubq_s16(v2445, v2456);
+    int16x8_t v3658 = vsubq_s16(v2468, v2479);
+    int16x8_t v3659_tmp = vqrdmulhq_n_s16(v3658, 23437);
+    int16x8_t v3659 = vmlaq_n_s16(v3659_tmp, v3658, 3);
+    int16x8_t v3660 = vaddq_s16(v3657, v3659);
+    int16x8_t v3661 = vqrdmulhq_n_s16(v3660, 21753);
+    int16x8_t v3662 = vaddq_s16(v3656, v3661);
+    int16x8_t v3663 = vsubq_s16(v2305, v2316);
+    int16x8_t v3664 = vsubq_s16(v2327, v2338);
+    int16x8_t v3665_tmp = vqrdmulhq_n_s16(v3664, 17573);
+    int16x8_t v3665 = vmlaq_n_s16(v3665_tmp, v3664, 4);
+    int16x8_t v3666 = vaddq_s16(v3663, v3665);
+    int16x8_t v3667 = vsubq_s16(v2351, v2362);
+    int16x8_t v3668 = vsubq_s16(v2373, v2384);
+    int16x8_t v3669_tmp = vqrdmulhq_n_s16(v3668, 17573);
+    int16x8_t v3669 = vmlaq_n_s16(v3669_tmp, v3668, 4);
+    int16x8_t v3670 = vaddq_s16(v3667, v3669);
+    int16x8_t v3671 = vqrdmulhq_n_s16(v3670, 21990);
+    int16x8_t v3672 = vaddq_s16(v3666, v3671);
+    int16x8_t v3673 = vsubq_s16(v2127, v2150);
+    int16x8_t v3674 = vsubq_s16(v2173, v2196);
+    int16x8_t v3675_tmp = vqrdmulhq_n_s16(v3674, 27122);
+    int16x8_t v3675 = vmlaq_n_s16(v3675_tmp, v3674, 5);
+    int16x8_t v3676 = vaddq_s16(v3673, v3675);
+    int16x8_t v3677 = vsubq_s16(v2221, v2244);
+    int16x8_t v3678 = vsubq_s16(v2267, v2290);
+    int16x8_t v3679_tmp = vqrdmulhq_n_s16(v3678, 27122);
+    int16x8_t v3679 = vmlaq_n_s16(v3679_tmp, v3678, 5);
+    int16x8_t v3680 = vaddq_s16(v3677, v3679);
+    int16x8_t v3681 = vqrdmulhq_n_s16(v3680, 22236);
+    int16x8_t v3682 = vaddq_s16(v3676, v3681);
+    int16x8_t v3683 = vsubq_s16(v1937, v1960);
+    int16x8_t v3684 = vsubq_s16(v1983, v2006);
+    int16x8_t v3685_tmp = vqrdmulhq_n_s16(v3684, 5041);
+    int16x8_t v3685 = vmlaq_n_s16(v3685_tmp, v3684, 8);
+    int16x8_t v3686 = vaddq_s16(v3683, v3685);
+    int16x8_t v3687 = vsubq_s16(v2031, v2054);
+    int16x8_t v3688 = vsubq_s16(v2077, v2100);
+    int16x8_t v3689_tmp = vqrdmulhq_n_s16(v3688, 5041);
+    int16x8_t v3689 = vmlaq_n_s16(v3689_tmp, v3688, 8);
+    int16x8_t v3690 = vaddq_s16(v3687, v3689);
+    int16x8_t v3691 = vqrdmulhq_n_s16(v3690, 22491);
+    int16x8_t v3692 = vaddq_s16(v3686, v3691);
+    int16x8_t v3693 = vsubq_s16(v1579, v1626);
+    int16x8_t v3694 = vsubq_s16(v1673, v1720);
+    int16x8_t v3695_tmp = vqrdmulhq_n_s16(v3694, 19146);
+    int16x8_t v3695 = vmlaq_n_s16(v3695_tmp, v3694, 13);
+    int16x8_t v3696 = vaddq_s16(v3693, v3695);
+    int16x8_t v3697 = vsubq_s16(v1769, v1816);
+    int16x8_t v3698 = vsubq_s16(v1863, v1910);
+    int16x8_t v3699_tmp = vqrdmulhq_n_s16(v3698, 19146);
+    int16x8_t v3699 = vmlaq_n_s16(v3699_tmp, v3698, 13);
+    int16x8_t v3700 = vaddq_s16(v3697, v3699);
+    int16x8_t v3701 = vqrdmulhq_n_s16(v3700, 22755);
+    int16x8_t v3702 = vaddq_s16(v3696, v3701);
+    int16x8_t v3703 = vsubq_s16(v141, v316);
+    int16x8_t v3704 = vsubq_s16(v522, v698);
+    int16x8_t v3705_tmp = vqrdmulhq_n_s16(v3704, 24402);
+    int16x8_t v3705 = vmlaq_n_s16(v3705_tmp, v3704, 40);
+    int16x8_t v3706 = vaddq_s16(v3703, v3705);
+    int16x8_t v3707 = vsubq_s16(v906, v1145);
+    int16x8_t v3708 = vsubq_s16(v1351, v1528);
+    int16x8_t v3709_tmp = vqrdmulhq_n_s16(v3708, 24402);
+    int16x8_t v3709 = vmlaq_n_s16(v3709_tmp, v3708, 40);
+    int16x8_t v3710 = vaddq_s16(v3707, v3709);
+    int16x8_t v3711 = vqrdmulhq_n_s16(v3710, 23030);
+    int16x8_t v3712 = vaddq_s16(v3706, v3711);
+    int16x8_t v3713 = vsubq_s16(v3703, v3705);
+    int16x8_t v3714 = vsubq_s16(v3707, v3709);
+    int16x8_t v3715 = vqrdmulhq_n_s16(v3714, 23314);
+    int16x8_t v3716 = vaddq_s16(v3713, v3715);
+    int16x8_t v3717 = vsubq_s16(v3693, v3695);
+    int16x8_t v3718 = vsubq_s16(v3697, v3699);
+    int16x8_t v3719 = vqrdmulhq_n_s16(v3718, 23609);
+    int16x8_t v3720 = vaddq_s16(v3717, v3719);
+    int16x8_t v3721 = vsubq_s16(v3683, v3685);
+    int16x8_t v3722 = vsubq_s16(v3687, v3689);
+    int16x8_t v3723 = vqrdmulhq_n_s16(v3722, 23915);
+    int16x8_t v3724 = vaddq_s16(v3721, v3723);
+    int16x8_t v3725 = vsubq_s16(v3673, v3675);
+    int16x8_t v3726 = vsubq_s16(v3677, v3679);
+    int16x8_t v3727 = vqrdmulhq_n_s16(v3726, 24233);
+    int16x8_t v3728 = vaddq_s16(v3725, v3727);
+    int16x8_t v3729 = vsubq_s16(v3663, v3665);
+    int16x8_t v3730 = vsubq_s16(v3667, v3669);
+    int16x8_t v3731 = vqrdmulhq_n_s16(v3730, 24564);
+    int16x8_t v3732 = vaddq_s16(v3729, v3731);
+    int16x8_t v3733 = vsubq_s16(v3653, v3655);
+    int16x8_t v3734 = vsubq_s16(v3657, v3659);
+    int16x8_t v3735 = vqrdmulhq_n_s16(v3734, 24907);
+    int16x8_t v3736 = vaddq_s16(v3733, v3735);
+    int16x8_t v3737 = vsubq_s16(v3643, v3645);
+    int16x8_t v3738 = vsubq_s16(v3647, v3649);
+    int16x8_t v3739 = vqrdmulhq_n_s16(v3738, 25264);
+    int16x8_t v3740 = vaddq_s16(v3737, v3739);
+    int16x8_t v3741 = vsubq_s16(v3633, v3635);
+    int16x8_t v3742 = vsubq_s16(v3637, v3639);
+    int16x8_t v3743 = vqrdmulhq_n_s16(v3742, 25635);
+    int16x8_t v3744 = vaddq_s16(v3741, v3743);
+    int16x8_t v3745 = vsubq_s16(v3623, v3625);
+    int16x8_t v3746 = vsubq_s16(v3627, v3629);
+    int16x8_t v3747 = vqrdmulhq_n_s16(v3746, 26021);
+    int16x8_t v3748 = vaddq_s16(v3745, v3747);
+    int16x8_t v3749 = vsubq_s16(v3613, v3615);
+    int16x8_t v3750 = vsubq_s16(v3617, v3619);
+    int16x8_t v3751 = vqrdmulhq_n_s16(v3750, 26423);
+    int16x8_t v3752 = vaddq_s16(v3749, v3751);
+    int16x8_t v3753 = vsubq_s16(v3603, v3605);
+    int16x8_t v3754 = vsubq_s16(v3607, v3609);
+    int16x8_t v3755 = vqrdmulhq_n_s16(v3754, 26842);
+    int16x8_t v3756 = vaddq_s16(v3753, v3755);
+    int16x8_t v3757 = vsubq_s16(v3593, v3595);
+    int16x8_t v3758 = vsubq_s16(v3597, v3599);
+    int16x8_t v3759 = vqrdmulhq_n_s16(v3758, 27279);
+    int16x8_t v3760 = vaddq_s16(v3757, v3759);
+    int16x8_t v3761 = vsubq_s16(v3583, v3585);
+    int16x8_t v3762 = vsubq_s16(v3587, v3589);
+    int16x8_t v3763 = vqrdmulhq_n_s16(v3762, 27734);
+    int16x8_t v3764 = vaddq_s16(v3761, v3763);
+    int16x8_t v3765 = vsubq_s16(v3573, v3575);
+    int16x8_t v3766 = vsubq_s16(v3577, v3579);
+    int16x8_t v3767 = vqrdmulhq_n_s16(v3766, 28209);
+    int16x8_t v3768 = vaddq_s16(v3765, v3767);
+    int16x8_t v3769 = vsubq_s16(v3563, v3565);
+    int16x8_t v3770 = vsubq_s16(v3567, v3569);
+    int16x8_t v3771 = vqrdmulhq_n_s16(v3770, 28705);
+    int16x8_t v3772 = vaddq_s16(v3769, v3771);
+    int16x8_t v3773 = vsubq_s16(v3553, v3555);
+    int16x8_t v3774 = vsubq_s16(v3557, v3559);
+    int16x8_t v3775 = vqrdmulhq_n_s16(v3774, 29223);
+    int16x8_t v3776 = vaddq_s16(v3773, v3775);
+    int16x8_t v3777 = vsubq_s16(v3543, v3545);
+    int16x8_t v3778 = vsubq_s16(v3547, v3549);
+    int16x8_t v3779 = vqrdmulhq_n_s16(v3778, 29764);
+    int16x8_t v3780 = vaddq_s16(v3777, v3779);
+    int16x8_t v3781 = vsubq_s16(v3533, v3535);
+    int16x8_t v3782 = vsubq_s16(v3537, v3539);
+    int16x8_t v3783 = vqrdmulhq_n_s16(v3782, 30331);
+    int16x8_t v3784 = vaddq_s16(v3781, v3783);
+    int16x8_t v3785 = vsubq_s16(v3523, v3525);
+    int16x8_t v3786 = vsubq_s16(v3527, v3529);
+    int16x8_t v3787 = vqrdmulhq_n_s16(v3786, 30925);
+    int16x8_t v3788 = vaddq_s16(v3785, v3787);
+    int16x8_t v3789 = vsubq_s16(v3513, v3515);
+    int16x8_t v3790 = vsubq_s16(v3517, v3519);
+    int16x8_t v3791 = vqrdmulhq_n_s16(v3790, 31547);
+    int16x8_t v3792 = vaddq_s16(v3789, v3791);
+    int16x8_t v3793 = vsubq_s16(v3503, v3505);
+    int16x8_t v3794 = vsubq_s16(v3507, v3509);
+    int16x8_t v3795 = vqrdmulhq_n_s16(v3794, 32199);
+    int16x8_t v3796 = vaddq_s16(v3793, v3795);
+    int16x8_t v3797 = vsubq_s16(v3493, v3495);
+    int16x8_t v3798 = vsubq_s16(v3497, v3499);
+    int16x8_t v3799_tmp = vqrdmulhq_n_s16(v3798, 117);
+    int16x8_t v3799 = vaddq_s16(v3799_tmp, v3798);
+    int16x8_t v3800 = vaddq_s16(v3797, v3799);
+    int16x8_t v3801 = vsubq_s16(v3483, v3485);
+    int16x8_t v3802 = vsubq_s16(v3487, v3489);
+    int16x8_t v3803_tmp = vqrdmulhq_n_s16(v3802, 837);
+    int16x8_t v3803 = vaddq_s16(v3803_tmp, v3802);
+    int16x8_t v3804 = vaddq_s16(v3801, v3803);
+    int16x8_t v3805 = vsubq_s16(v3473, v3475);
+    int16x8_t v3806 = vsubq_s16(v3477, v3479);
+    int16x8_t v3807_tmp = vqrdmulhq_n_s16(v3806, 1594);
+    int16x8_t v3807 = vaddq_s16(v3807_tmp, v3806);
+    int16x8_t v3808 = vaddq_s16(v3805, v3807);
+    int16x8_t v3809 = vsubq_s16(v3463, v3465);
+    int16x8_t v3810 = vsubq_s16(v3467, v3469);
+    int16x8_t v3811_tmp = vqrdmulhq_n_s16(v3810, 2393);
+    int16x8_t v3811 = vaddq_s16(v3811_tmp, v3810);
+    int16x8_t v3812 = vaddq_s16(v3809, v3811);
+    int16x8_t v3813 = vsubq_s16(v3453, v3455);
+    int16x8_t v3814 = vsubq_s16(v3457, v3459);
+    int16x8_t v3815_tmp = vqrdmulhq_n_s16(v3814, 3234);
+    int16x8_t v3815 = vaddq_s16(v3815_tmp, v3814);
+    int16x8_t v3816 = vaddq_s16(v3813, v3815);
+    int16x8_t v3817 = vsubq_s16(v3443, v3445);
+    int16x8_t v3818 = vsubq_s16(v3447, v3449);
+    int16x8_t v3819_tmp = vqrdmulhq_n_s16(v3818, 4123);
+    int16x8_t v3819 = vaddq_s16(v3819_tmp, v3818);
+    int16x8_t v3820 = vaddq_s16(v3817, v3819);
+    int16x8_t v3821 = vsubq_s16(v3433, v3435);
+    int16x8_t v3822 = vsubq_s16(v3437, v3439);
+    int16x8_t v3823_tmp = vqrdmulhq_n_s16(v3822, 5062);
+    int16x8_t v3823 = vaddq_s16(v3823_tmp, v3822);
+    int16x8_t v3824 = vaddq_s16(v3821, v3823);
+    int16x8_t v3825 = vsubq_s16(v3423, v3425);
+    int16x8_t v3826 = vsubq_s16(v3427, v3429);
+    int16x8_t v3827_tmp = vqrdmulhq_n_s16(v3826, 6057);
+    int16x8_t v3827 = vaddq_s16(v3827_tmp, v3826);
+    int16x8_t v3828 = vaddq_s16(v3825, v3827);
+    int16x8_t v3829 = vsubq_s16(v3413, v3415);
+    int16x8_t v3830 = vsubq_s16(v3417, v3419);
+    int16x8_t v3831_tmp = vqrdmulhq_n_s16(v3830, 7111);
+    int16x8_t v3831 = vaddq_s16(v3831_tmp, v3830);
+    int16x8_t v3832 = vaddq_s16(v3829, v3831);
+    int16x8_t v3833 = vsubq_s16(v3403, v3405);
+    int16x8_t v3834 = vsubq_s16(v3407, v3409);
+    int16x8_t v3835_tmp = vqrdmulhq_n_s16(v3834, 8231);
+    int16x8_t v3835 = vaddq_s16(v3835_tmp, v3834);
+    int16x8_t v3836 = vaddq_s16(v3833, v3835);
+    int16x8_t v3837 = vsubq_s16(v3393, v3395);
+    int16x8_t v3838 = vsubq_s16(v3397, v3399);
+    int16x8_t v3839_tmp = vqrdmulhq_n_s16(v3838, 9421);
+    int16x8_t v3839 = vaddq_s16(v3839_tmp, v3838);
+    int16x8_t v3840 = vaddq_s16(v3837, v3839);
+    int16x8_t v3841 = vsubq_s16(v3374, v3379);
+    int16x8_t v3842 = vsubq_s16(v3384, v3389);
+    int16x8_t v3843_tmp = vqrdmulhq_n_s16(v3842, 10690);
+    int16x8_t v3843 = vaddq_s16(v3843_tmp, v3842);
+    int16x8_t v3844 = vaddq_s16(v3841, v3843);
+    int16x8_t v3845 = vsubq_s16(v3352, v3357);
+    int16x8_t v3846 = vsubq_s16(v3362, v3367);
+    int16x8_t v3847_tmp = vqrdmulhq_n_s16(v3846, 12044);
+    int16x8_t v3847 = vaddq_s16(v3847_tmp, v3846);
+    int16x8_t v3848 = vaddq_s16(v3845, v3847);
+    int16x8_t v3849 = vsubq_s16(v3330, v3335);
+    int16x8_t v3850 = vsubq_s16(v3340, v3345);
+    int16x8_t v3851_tmp = vqrdmulhq_n_s16(v3850, 13493);
+    int16x8_t v3851 = vaddq_s16(v3851_tmp, v3850);
+    int16x8_t v3852 = vaddq_s16(v3849, v3851);
+    int16x8_t v3853 = vsubq_s16(v3308, v3313);
+    int16x8_t v3854 = vsubq_s16(v3318, v3323);
+    int16x8_t v3855_tmp = vqrdmulhq_n_s16(v3854, 15046);
+    int16x8_t v3855 = vaddq_s16(v3855_tmp, v3854);
+    int16x8_t v3856 = vaddq_s16(v3853, v3855);
+    int16x8_t v3857 = vsubq_s16(v3286, v3291);
+    int16x8_t v3858 = vsubq_s16(v3296, v3301);
+    int16x8_t v3859_tmp = vqrdmulhq_n_s16(v3858, 16715);
+    int16x8_t v3859 = vaddq_s16(v3859_tmp, v3858);
+    int16x8_t v3860 = vaddq_s16(v3857, v3859);
+    int16x8_t v3861 = vsubq_s16(v3264, v3269);
+    int16x8_t v3862 = vsubq_s16(v3274, v3279);
+    int16x8_t v3863_tmp = vqrdmulhq_n_s16(v3862, 18512);
+    int16x8_t v3863 = vaddq_s16(v3863_tmp, v3862);
+    int16x8_t v3864 = vaddq_s16(v3861, v3863);
+    int16x8_t v3865 = vsubq_s16(v3242, v3247);
+    int16x8_t v3866 = vsubq_s16(v3252, v3257);
+    int16x8_t v3867_tmp = vqrdmulhq_n_s16(v3866, 20453);
+    int16x8_t v3867 = vaddq_s16(v3867_tmp, v3866);
+    int16x8_t v3868 = vaddq_s16(v3865, v3867);
+    int16x8_t v3869 = vsubq_s16(v3220, v3225);
+    int16x8_t v3870 = vsubq_s16(v3230, v3235);
+    int16x8_t v3871_tmp = vqrdmulhq_n_s16(v3870, 22555);
+    int16x8_t v3871 = vaddq_s16(v3871_tmp, v3870);
+    int16x8_t v3872 = vaddq_s16(v3869, v3871);
+    int16x8_t v3873 = vsubq_s16(v3198, v3203);
+    int16x8_t v3874 = vsubq_s16(v3208, v3213);
+    int16x8_t v3875_tmp = vqrdmulhq_n_s16(v3874, 24839);
+    int16x8_t v3875 = vaddq_s16(v3875_tmp, v3874);
+    int16x8_t v3876 = vaddq_s16(v3873, v3875);
+    int16x8_t v3877 = vsubq_s16(v3176, v3181);
+    int16x8_t v3878 = vsubq_s16(v3186, v3191);
+    int16x8_t v3879_tmp = vqrdmulhq_n_s16(v3878, 27330);
+    int16x8_t v3879 = vaddq_s16(v3879_tmp, v3878);
+    int16x8_t v3880 = vaddq_s16(v3877, v3879);
+    int16x8_t v3881 = vsubq_s16(v3154, v3159);
+    int16x8_t v3882 = vsubq_s16(v3164, v3169);
+    int16x8_t v3883_tmp = vqrdmulhq_n_s16(v3882, 30056);
+    int16x8_t v3883 = vaddq_s16(v3883_tmp, v3882);
+    int16x8_t v3884 = vaddq_s16(v3881, v3883);
+    int16x8_t v3885 = vsubq_s16(v3132, v3137);
+    int16x8_t v3886 = vsubq_s16(v3142, v3147);
+    int16x8_t v3887_tmp = vqrdmulhq_n_s16(v3886, 282);
+    int16x8_t v3887 = vmlaq_n_s16(v3887_tmp, v3886, 2);
+    int16x8_t v3888 = vaddq_s16(v3885, v3887);
+    int16x8_t v3889 = vsubq_s16(v3110, v3115);
+    int16x8_t v3890 = vsubq_s16(v3120, v3125);
+    int16x8_t v3891_tmp = vqrdmulhq_n_s16(v3890, 3588);
+    int16x8_t v3891 = vmlaq_n_s16(v3891_tmp, v3890, 2);
+    int16x8_t v3892 = vaddq_s16(v3889, v3891);
+    int16x8_t v3893 = vsubq_s16(v3088, v3093);
+    int16x8_t v3894 = vsubq_s16(v3098, v3103);
+    int16x8_t v3895_tmp = vqrdmulhq_n_s16(v3894, 7255);
+    int16x8_t v3895 = vmlaq_n_s16(v3895_tmp, v3894, 2);
+    int16x8_t v3896 = vaddq_s16(v3893, v3895);
+    int16x8_t v3897 = vsubq_s16(v3066, v3071);
+    int16x8_t v3898 = vsubq_s16(v3076, v3081);
+    int16x8_t v3899_tmp = vqrdmulhq_n_s16(v3898, 11344);
+    int16x8_t v3899 = vmlaq_n_s16(v3899_tmp, v3898, 2);
+    int16x8_t v3900 = vaddq_s16(v3897, v3899);
+    int16x8_t v3901 = vsubq_s16(v3044, v3049);
+    int16x8_t v3902 = vsubq_s16(v3054, v3059);
+    int16x8_t v3903_tmp = vqrdmulhq_n_s16(v3902, 15934);
+    int16x8_t v3903 = vmlaq_n_s16(v3903_tmp, v3902, 2);
+    int16x8_t v3904 = vaddq_s16(v3901, v3903);
+    int16x8_t v3905 = vsubq_s16(v3004, v3015);
+    int16x8_t v3906 = vsubq_s16(v3026, v3037);
+    int16x8_t v3907_tmp = vqrdmulhq_n_s16(v3906, 21120);
+    int16x8_t v3907 = vmlaq_n_s16(v3907_tmp, v3906, 2);
+    int16x8_t v3908 = vaddq_s16(v3905, v3907);
+    int16x8_t v3909 = vsubq_s16(v2958, v2969);
+    int16x8_t v3910 = vsubq_s16(v2980, v2991);
+    int16x8_t v3911_tmp = vqrdmulhq_n_s16(v3910, 27027);
+    int16x8_t v3911 = vmlaq_n_s16(v3911_tmp, v3910, 2);
+    int16x8_t v3912 = vaddq_s16(v3909, v3911);
+    int16x8_t v3913 = vsubq_s16(v2912, v2923);
+    int16x8_t v3914 = vsubq_s16(v2934, v2945);
+    int16x8_t v3915_tmp = vqrdmulhq_n_s16(v3914, 1045);
+    int16x8_t v3915 = vmlaq_n_s16(v3915_tmp, v3914, 3);
+    int16x8_t v3916 = vaddq_s16(v3913, v3915);
+    int16x8_t v3917 = vsubq_s16(v2866, v2877);
+    int16x8_t v3918 = vsubq_s16(v2888, v2899);
+    int16x8_t v3919_tmp = vqrdmulhq_n_s16(v3918, 8923);
+    int16x8_t v3919 = vmlaq_n_s16(v3919_tmp, v3918, 3);
+    int16x8_t v3920 = vaddq_s16(v3917, v3919);
+    int16x8_t v3921 = vsubq_s16(v2820, v2831);
+    int16x8_t v3922 = vsubq_s16(v2842, v2853);
+    int16x8_t v3923_tmp = vqrdmulhq_n_s16(v3922, 18177);
+    int16x8_t v3923 = vmlaq_n_s16(v3923_tmp, v3922, 3);
+    int16x8_t v3924 = vaddq_s16(v3921, v3923);
+    int16x8_t v3925 = vsubq_s16(v2774, v2785);
+    int16x8_t v3926 = vsubq_s16(v2796, v2807);
+    int16x8_t v3927_tmp = vqrdmulhq_n_s16(v3926, 29200);
+    int16x8_t v3927 = vmlaq_n_s16(v3927_tmp, v3926, 3);
+    int16x8_t v3928 = vaddq_s16(v3925, v3927);
+    int16x8_t v3929 = vsubq_s16(v2728, v2739);
+    int16x8_t v3930 = vsubq_s16(v2750, v2761);
+    int16x8_t v3931_tmp = vqrdmulhq_n_s16(v3930, 9782);
+    int16x8_t v3931 = vmlaq_n_s16(v3931_tmp, v3930, 4);
+    int16x8_t v3932 = vaddq_s16(v3929, v3931);
+    int16x8_t v3933 = vsubq_s16(v2682, v2693);
+    int16x8_t v3934 = vsubq_s16(v2704, v2715);
+    int16x8_t v3935_tmp = vqrdmulhq_n_s16(v3934, 26282);
+    int16x8_t v3935 = vmlaq_n_s16(v3935_tmp, v3934, 4);
+    int16x8_t v3936 = vaddq_s16(v3933, v3935);
+    int16x8_t v3937 = vsubq_s16(v2600, v2623);
+    int16x8_t v3938 = vsubq_s16(v2646, v2669);
+    int16x8_t v3939_tmp = vqrdmulhq_n_s16(v3938, 14423);
+    int16x8_t v3939 = vmlaq_n_s16(v3939_tmp, v3938, 5);
+    int16x8_t v3940 = vaddq_s16(v3937, v3939);
+    int16x8_t v3941 = vsubq_s16(v2506, v2529);
+    int16x8_t v3942 = vsubq_s16(v2552, v2575);
+    int16x8_t v3943_tmp = vqrdmulhq_n_s16(v3942, 9008);
+    int16x8_t v3943 = vmlaq_n_s16(v3943_tmp, v3942, 6);
+    int16x8_t v3944 = vaddq_s16(v3941, v3943);
+    int16x8_t v3945 = vsubq_s16(v2411, v2434);
+    int16x8_t v3946 = vsubq_s16(v2457, v2481);
+    int16x8_t v3947_tmp = vqrdmulhq_n_s16(v3946, 13552);
+    int16x8_t v3947 = vmlaq_n_s16(v3947_tmp, v3946, 7);
+    int16x8_t v3948 = vaddq_s16(v3945, v3947);
+    int16x8_t v3949 = vsubq_s16(v2317, v2340);
+    int16x8_t v3950 = vsubq_s16(v2363, v2386);
+    int16x8_t v3951_tmp = vqrdmulhq_n_s16(v3950, 1925);
+    int16x8_t v3951 = vmlaq_n_s16(v3951_tmp, v3950, 9);
+    int16x8_t v3952 = vaddq_s16(v3949, v3951);
+    int16x8_t v3953 = vsubq_s16(v2151, v2198);
+    int16x8_t v3954 = vsubq_s16(v2245, v2292);
+    int16x8_t v3955_tmp = vqrdmulhq_n_s16(v3954, 21123);
+    int16x8_t v3955 = vmlaq_n_s16(v3955_tmp, v3954, 11);
+    int16x8_t v3956 = vaddq_s16(v3953, v3955);
+    int16x8_t v3957 = vsubq_s16(v1961, v2008);
+    int16x8_t v3958 = vsubq_s16(v2055, v2102);
+    int16x8_t v3959_tmp = vqrdmulhq_n_s16(v3958, 9831);
+    int16x8_t v3959 = vmlaq_n_s16(v3959_tmp, v3958, 16);
+    int16x8_t v3960 = vaddq_s16(v3957, v3959);
+    int16x8_t v3961 = vsubq_s16(v1627, v1722);
+    int16x8_t v3962 = vsubq_s16(v1817, v1912);
+    int16x8_t v3963_tmp = vqrdmulhq_n_s16(v3962, 5373);
+    int16x8_t v3963 = vmlaq_n_s16(v3963_tmp, v3962, 27);
+    int16x8_t v3964 = vaddq_s16(v3961, v3963);
+    int16x8_t v3965 = vsubq_s16(v317, v700);
+    int16x8_t v3966 = vsubq_s16(v1146, v1530);
+    int16x8_t v3967_tmp = vqrdmulhq_n_s16(v3966, 15986);
+    int16x8_t v3967 = vmlaq_n_s16(v3967_tmp, v3966, 81);
+    int16x8_t v3968 = vaddq_s16(v3965, v3967);
+    int16x8_t v3969 = vsubq_s16(v3965, v3967);
+    int16x8_t v3970 = vsubq_s16(v3961, v3963);
+    int16x8_t v3971 = vsubq_s16(v3957, v3959);
+    int16x8_t v3972 = vsubq_s16(v3953, v3955);
+    int16x8_t v3973 = vsubq_s16(v3949, v3951);
+    int16x8_t v3974 = vsubq_s16(v3945, v3947);
+    int16x8_t v3975 = vsubq_s16(v3941, v3943);
+    int16x8_t v3976 = vsubq_s16(v3937, v3939);
+    int16x8_t v3977 = vsubq_s16(v3933, v3935);
+    int16x8_t v3978 = vsubq_s16(v3929, v3931);
+    int16x8_t v3979 = vsubq_s16(v3925, v3927);
+    int16x8_t v3980 = vsubq_s16(v3921, v3923);
+    int16x8_t v3981 = vsubq_s16(v3917, v3919);
+    int16x8_t v3982 = vsubq_s16(v3913, v3915);
+    int16x8_t v3983 = vsubq_s16(v3909, v3911);
+    int16x8_t v3984 = vsubq_s16(v3905, v3907);
+    int16x8_t v3985 = vsubq_s16(v3901, v3903);
+    int16x8_t v3986 = vsubq_s16(v3897, v3899);
+    int16x8_t v3987 = vsubq_s16(v3893, v3895);
+    int16x8_t v3988 = vsubq_s16(v3889, v3891);
+    int16x8_t v3989 = vsubq_s16(v3885, v3887);
+    int16x8_t v3990 = vsubq_s16(v3881, v3883);
+    int16x8_t v3991 = vsubq_s16(v3877, v3879);
+    int16x8_t v3992 = vsubq_s16(v3873, v3875);
+    int16x8_t v3993 = vsubq_s16(v3869, v3871);
+    int16x8_t v3994 = vsubq_s16(v3865, v3867);
+    int16x8_t v3995 = vsubq_s16(v3861, v3863);
+    int16x8_t v3996 = vsubq_s16(v3857, v3859);
+    int16x8_t v3997 = vsubq_s16(v3853, v3855);
+    int16x8_t v3998 = vsubq_s16(v3849, v3851);
+    int16x8_t v3999 = vsubq_s16(v3845, v3847);
+    int16x8_t v4000 = vsubq_s16(v3841, v3843);
+    int16x8_t v4001 = vsubq_s16(v3837, v3839);
+    int16x8_t v4002 = vsubq_s16(v3833, v3835);
+    int16x8_t v4003 = vsubq_s16(v3829, v3831);
+    int16x8_t v4004 = vsubq_s16(v3825, v3827);
+    int16x8_t v4005 = vsubq_s16(v3821, v3823);
+    int16x8_t v4006 = vsubq_s16(v3817, v3819);
+    int16x8_t v4007 = vsubq_s16(v3813, v3815);
+    int16x8_t v4008 = vsubq_s16(v3809, v3811);
+    int16x8_t v4009 = vsubq_s16(v3805, v3807);
+    int16x8_t v4010 = vsubq_s16(v3801, v3803);
+    int16x8_t v4011 = vsubq_s16(v3797, v3799);
+    int16x8_t v4012 = vsubq_s16(v3793, v3795);
+    int16x8_t v4013 = vsubq_s16(v3789, v3791);
+    int16x8_t v4014 = vsubq_s16(v3785, v3787);
+    int16x8_t v4015 = vsubq_s16(v3781, v3783);
+    int16x8_t v4016 = vsubq_s16(v3777, v3779);
+    int16x8_t v4017 = vsubq_s16(v3773, v3775);
+    int16x8_t v4018 = vsubq_s16(v3769, v3771);
+    int16x8_t v4019 = vsubq_s16(v3765, v3767);
+    int16x8_t v4020 = vsubq_s16(v3761, v3763);
+    int16x8_t v4021 = vsubq_s16(v3757, v3759);
+    int16x8_t v4022 = vsubq_s16(v3753, v3755);
+    int16x8_t v4023 = vsubq_s16(v3749, v3751);
+    int16x8_t v4024 = vsubq_s16(v3745, v3747);
+    int16x8_t v4025 = vsubq_s16(v3741, v3743);
+    int16x8_t v4026 = vsubq_s16(v3737, v3739);
+    int16x8_t v4027 = vsubq_s16(v3733, v3735);
+    int16x8_t v4028 = vsubq_s16(v3729, v3731);
+    int16x8_t v4029 = vsubq_s16(v3725, v3727);
+    int16x8_t v4030 = vsubq_s16(v3721, v3723);
+    int16x8_t v4031 = vsubq_s16(v3717, v3719);
+    int16x8_t v4032 = vsubq_s16(v3713, v3715);
+    int16x8_t v4033 = vsubq_s16(v3706, v3711);
+    int16x8_t v4034 = vsubq_s16(v3696, v3701);
+    int16x8_t v4035 = vsubq_s16(v3686, v3691);
+    int16x8_t v4036 = vsubq_s16(v3676, v3681);
+    int16x8_t v4037 = vsubq_s16(v3666, v3671);
+    int16x8_t v4038 = vsubq_s16(v3656, v3661);
+    int16x8_t v4039 = vsubq_s16(v3646, v3651);
+    int16x8_t v4040 = vsubq_s16(v3636, v3641);
+    int16x8_t v4041 = vsubq_s16(v3626, v3631);
+    int16x8_t v4042 = vsubq_s16(v3616, v3621);
+    int16x8_t v4043 = vsubq_s16(v3606, v3611);
+    int16x8_t v4044 = vsubq_s16(v3596, v3601);
+    int16x8_t v4045 = vsubq_s16(v3586, v3591);
+    int16x8_t v4046 = vsubq_s16(v3576, v3581);
+    int16x8_t v4047 = vsubq_s16(v3566, v3571);
+    int16x8_t v4048 = vsubq_s16(v3556, v3561);
+    int16x8_t v4049 = vsubq_s16(v3546, v3551);
+    int16x8_t v4050 = vsubq_s16(v3536, v3541);
+    int16x8_t v4051 = vsubq_s16(v3526, v3531);
+    int16x8_t v4052 = vsubq_s16(v3516, v3521);
+    int16x8_t v4053 = vsubq_s16(v3506, v3511);
+    int16x8_t v4054 = vsubq_s16(v3496, v3501);
+    int16x8_t v4055 = vsubq_s16(v3486, v3491);
+    int16x8_t v4056 = vsubq_s16(v3476, v3481);
+    int16x8_t v4057 = vsubq_s16(v3466, v3471);
+    int16x8_t v4058 = vsubq_s16(v3456, v3461);
+    int16x8_t v4059 = vsubq_s16(v3446, v3451);
+    int16x8_t v4060 = vsubq_s16(v3436, v3441);
+    int16x8_t v4061 = vsubq_s16(v3426, v3431);
+    int16x8_t v4062 = vsubq_s16(v3416, v3421);
+    int16x8_t v4063 = vsubq_s16(v3406, v3411);
+    int16x8_t v4064 = vsubq_s16(v3396, v3401);
+    int16x8_t v4065 = vsubq_s16(v3380, v3391);
+    int16x8_t v4066 = vsubq_s16(v3358, v3369);
+    int16x8_t v4067 = vsubq_s16(v3336, v3347);
+    int16x8_t v4068 = vsubq_s16(v3314, v3325);
+    int16x8_t v4069 = vsubq_s16(v3292, v3303);
+    int16x8_t v4070 = vsubq_s16(v3270, v3281);
+    int16x8_t v4071 = vsubq_s16(v3248, v3259);
+    int16x8_t v4072 = vsubq_s16(v3226, v3237);
+    int16x8_t v4073 = vsubq_s16(v3204, v3215);
+    int16x8_t v4074 = vsubq_s16(v3182, v3193);
+    int16x8_t v4075 = vsubq_s16(v3160, v3171);
+    int16x8_t v4076 = vsubq_s16(v3138, v3149);
+    int16x8_t v4077 = vsubq_s16(v3116, v3127);
+    int16x8_t v4078 = vsubq_s16(v3094, v3105);
+    int16x8_t v4079 = vsubq_s16(v3072, v3083);
+    int16x8_t v4080 = vsubq_s16(v3050, v3061);
+    int16x8_t v4081 = vsubq_s16(v3016, v3039);
+    int16x8_t v4082 = vsubq_s16(v2970, v2993);
+    int16x8_t v4083 = vsubq_s16(v2924, v2947);
+    int16x8_t v4084 = vsubq_s16(v2878, v2901);
+    int16x8_t v4085 = vsubq_s16(v2832, v2855);
+    int16x8_t v4086 = vsubq_s16(v2786, v2809);
+    int16x8_t v4087 = vsubq_s16(v2740, v2763);
+    int16x8_t v4088 = vsubq_s16(v2694, v2717);
+    int16x8_t v4089 = vsubq_s16(v2624, v2671);
+    int16x8_t v4090 = vsubq_s16(v2530, v2577);
+    int16x8_t v4091 = vsubq_s16(v2435, v2483);
+    int16x8_t v4092 = vsubq_s16(v2341, v2388);
+    int16x8_t v4093 = vsubq_s16(v2199, v2294);
+    int16x8_t v4094 = vsubq_s16(v2009, v2104);
+    int16x8_t v4095 = vsubq_s16(v1723, v1914);
+    int16x8_t v4096 = vsubq_s16(v701, v1532);
+    vst1q_s16(out + out_stride * 0 + i, v1533);
+    vst1q_s16(out + out_stride * 1 + i, v1915);
+    vst1q_s16(out + out_stride * 2 + i, v2105);
+    vst1q_s16(out + out_stride * 3 + i, v2295);
+    vst1q_s16(out + out_stride * 4 + i, v2389);
+    vst1q_s16(out + out_stride * 5 + i, v2484);
+    vst1q_s16(out + out_stride * 6 + i, v2578);
+    vst1q_s16(out + out_stride * 7 + i, v2672);
+    vst1q_s16(out + out_stride * 8 + i, v2718);
+    vst1q_s16(out + out_stride * 9 + i, v2764);
+    vst1q_s16(out + out_stride * 10 + i, v2810);
+    vst1q_s16(out + out_stride * 11 + i, v2856);
+    vst1q_s16(out + out_stride * 12 + i, v2902);
+    vst1q_s16(out + out_stride * 13 + i, v2948);
+    vst1q_s16(out + out_stride * 14 + i, v2994);
+    vst1q_s16(out + out_stride * 15 + i, v3040);
+    vst1q_s16(out + out_stride * 16 + i, v3062);
+    vst1q_s16(out + out_stride * 17 + i, v3084);
+    vst1q_s16(out + out_stride * 18 + i, v3106);
+    vst1q_s16(out + out_stride * 19 + i, v3128);
+    vst1q_s16(out + out_stride * 20 + i, v3150);
+    vst1q_s16(out + out_stride * 21 + i, v3172);
+    vst1q_s16(out + out_stride * 22 + i, v3194);
+    vst1q_s16(out + out_stride * 23 + i, v3216);
+    vst1q_s16(out + out_stride * 24 + i, v3238);
+    vst1q_s16(out + out_stride * 25 + i, v3260);
+    vst1q_s16(out + out_stride * 26 + i, v3282);
+    vst1q_s16(out + out_stride * 27 + i, v3304);
+    vst1q_s16(out + out_stride * 28 + i, v3326);
+    vst1q_s16(out + out_stride * 29 + i, v3348);
+    vst1q_s16(out + out_stride * 30 + i, v3370);
+    vst1q_s16(out + out_stride * 31 + i, v3392);
+    vst1q_s16(out + out_stride * 32 + i, v3402);
+    vst1q_s16(out + out_stride * 33 + i, v3412);
+    vst1q_s16(out + out_stride * 34 + i, v3422);
+    vst1q_s16(out + out_stride * 35 + i, v3432);
+    vst1q_s16(out + out_stride * 36 + i, v3442);
+    vst1q_s16(out + out_stride * 37 + i, v3452);
+    vst1q_s16(out + out_stride * 38 + i, v3462);
+    vst1q_s16(out + out_stride * 39 + i, v3472);
+    vst1q_s16(out + out_stride * 40 + i, v3482);
+    vst1q_s16(out + out_stride * 41 + i, v3492);
+    vst1q_s16(out + out_stride * 42 + i, v3502);
+    vst1q_s16(out + out_stride * 43 + i, v3512);
+    vst1q_s16(out + out_stride * 44 + i, v3522);
+    vst1q_s16(out + out_stride * 45 + i, v3532);
+    vst1q_s16(out + out_stride * 46 + i, v3542);
+    vst1q_s16(out + out_stride * 47 + i, v3552);
+    vst1q_s16(out + out_stride * 48 + i, v3562);
+    vst1q_s16(out + out_stride * 49 + i, v3572);
+    vst1q_s16(out + out_stride * 50 + i, v3582);
+    vst1q_s16(out + out_stride * 51 + i, v3592);
+    vst1q_s16(out + out_stride * 52 + i, v3602);
+    vst1q_s16(out + out_stride * 53 + i, v3612);
+    vst1q_s16(out + out_stride * 54 + i, v3622);
+    vst1q_s16(out + out_stride * 55 + i, v3632);
+    vst1q_s16(out + out_stride * 56 + i, v3642);
+    vst1q_s16(out + out_stride * 57 + i, v3652);
+    vst1q_s16(out + out_stride * 58 + i, v3662);
+    vst1q_s16(out + out_stride * 59 + i, v3672);
+    vst1q_s16(out + out_stride * 60 + i, v3682);
+    vst1q_s16(out + out_stride * 61 + i, v3692);
+    vst1q_s16(out + out_stride * 62 + i, v3702);
+    vst1q_s16(out + out_stride * 63 + i, v3712);
+    vst1q_s16(out + out_stride * 64 + i, v3716);
+    vst1q_s16(out + out_stride * 65 + i, v3720);
+    vst1q_s16(out + out_stride * 66 + i, v3724);
+    vst1q_s16(out + out_stride * 67 + i, v3728);
+    vst1q_s16(out + out_stride * 68 + i, v3732);
+    vst1q_s16(out + out_stride * 69 + i, v3736);
+    vst1q_s16(out + out_stride * 70 + i, v3740);
+    vst1q_s16(out + out_stride * 71 + i, v3744);
+    vst1q_s16(out + out_stride * 72 + i, v3748);
+    vst1q_s16(out + out_stride * 73 + i, v3752);
+    vst1q_s16(out + out_stride * 74 + i, v3756);
+    vst1q_s16(out + out_stride * 75 + i, v3760);
+    vst1q_s16(out + out_stride * 76 + i, v3764);
+    vst1q_s16(out + out_stride * 77 + i, v3768);
+    vst1q_s16(out + out_stride * 78 + i, v3772);
+    vst1q_s16(out + out_stride * 79 + i, v3776);
+    vst1q_s16(out + out_stride * 80 + i, v3780);
+    vst1q_s16(out + out_stride * 81 + i, v3784);
+    vst1q_s16(out + out_stride * 82 + i, v3788);
+    vst1q_s16(out + out_stride * 83 + i, v3792);
+    vst1q_s16(out + out_stride * 84 + i, v3796);
+    vst1q_s16(out + out_stride * 85 + i, v3800);
+    vst1q_s16(out + out_stride * 86 + i, v3804);
+    vst1q_s16(out + out_stride * 87 + i, v3808);
+    vst1q_s16(out + out_stride * 88 + i, v3812);
+    vst1q_s16(out + out_stride * 89 + i, v3816);
+    vst1q_s16(out + out_stride * 90 + i, v3820);
+    vst1q_s16(out + out_stride * 91 + i, v3824);
+    vst1q_s16(out + out_stride * 92 + i, v3828);
+    vst1q_s16(out + out_stride * 93 + i, v3832);
+    vst1q_s16(out + out_stride * 94 + i, v3836);
+    vst1q_s16(out + out_stride * 95 + i, v3840);
+    vst1q_s16(out + out_stride * 96 + i, v3844);
+    vst1q_s16(out + out_stride * 97 + i, v3848);
+    vst1q_s16(out + out_stride * 98 + i, v3852);
+    vst1q_s16(out + out_stride * 99 + i, v3856);
+    vst1q_s16(out + out_stride * 100 + i, v3860);
+    vst1q_s16(out + out_stride * 101 + i, v3864);
+    vst1q_s16(out + out_stride * 102 + i, v3868);
+    vst1q_s16(out + out_stride * 103 + i, v3872);
+    vst1q_s16(out + out_stride * 104 + i, v3876);
+    vst1q_s16(out + out_stride * 105 + i, v3880);
+    vst1q_s16(out + out_stride * 106 + i, v3884);
+    vst1q_s16(out + out_stride * 107 + i, v3888);
+    vst1q_s16(out + out_stride * 108 + i, v3892);
+    vst1q_s16(out + out_stride * 109 + i, v3896);
+    vst1q_s16(out + out_stride * 110 + i, v3900);
+    vst1q_s16(out + out_stride * 111 + i, v3904);
+    vst1q_s16(out + out_stride * 112 + i, v3908);
+    vst1q_s16(out + out_stride * 113 + i, v3912);
+    vst1q_s16(out + out_stride * 114 + i, v3916);
+    vst1q_s16(out + out_stride * 115 + i, v3920);
+    vst1q_s16(out + out_stride * 116 + i, v3924);
+    vst1q_s16(out + out_stride * 117 + i, v3928);
+    vst1q_s16(out + out_stride * 118 + i, v3932);
+    vst1q_s16(out + out_stride * 119 + i, v3936);
+    vst1q_s16(out + out_stride * 120 + i, v3940);
+    vst1q_s16(out + out_stride * 121 + i, v3944);
+    vst1q_s16(out + out_stride * 122 + i, v3948);
+    vst1q_s16(out + out_stride * 123 + i, v3952);
+    vst1q_s16(out + out_stride * 124 + i, v3956);
+    vst1q_s16(out + out_stride * 125 + i, v3960);
+    vst1q_s16(out + out_stride * 126 + i, v3964);
+    vst1q_s16(out + out_stride * 127 + i, v3968);
+    vst1q_s16(out + out_stride * 128 + i, v3969);
+    vst1q_s16(out + out_stride * 129 + i, v3970);
+    vst1q_s16(out + out_stride * 130 + i, v3971);
+    vst1q_s16(out + out_stride * 131 + i, v3972);
+    vst1q_s16(out + out_stride * 132 + i, v3973);
+    vst1q_s16(out + out_stride * 133 + i, v3974);
+    vst1q_s16(out + out_stride * 134 + i, v3975);
+    vst1q_s16(out + out_stride * 135 + i, v3976);
+    vst1q_s16(out + out_stride * 136 + i, v3977);
+    vst1q_s16(out + out_stride * 137 + i, v3978);
+    vst1q_s16(out + out_stride * 138 + i, v3979);
+    vst1q_s16(out + out_stride * 139 + i, v3980);
+    vst1q_s16(out + out_stride * 140 + i, v3981);
+    vst1q_s16(out + out_stride * 141 + i, v3982);
+    vst1q_s16(out + out_stride * 142 + i, v3983);
+    vst1q_s16(out + out_stride * 143 + i, v3984);
+    vst1q_s16(out + out_stride * 144 + i, v3985);
+    vst1q_s16(out + out_stride * 145 + i, v3986);
+    vst1q_s16(out + out_stride * 146 + i, v3987);
+    vst1q_s16(out + out_stride * 147 + i, v3988);
+    vst1q_s16(out + out_stride * 148 + i, v3989);
+    vst1q_s16(out + out_stride * 149 + i, v3990);
+    vst1q_s16(out + out_stride * 150 + i, v3991);
+    vst1q_s16(out + out_stride * 151 + i, v3992);
+    vst1q_s16(out + out_stride * 152 + i, v3993);
+    vst1q_s16(out + out_stride * 153 + i, v3994);
+    vst1q_s16(out + out_stride * 154 + i, v3995);
+    vst1q_s16(out + out_stride * 155 + i, v3996);
+    vst1q_s16(out + out_stride * 156 + i, v3997);
+    vst1q_s16(out + out_stride * 157 + i, v3998);
+    vst1q_s16(out + out_stride * 158 + i, v3999);
+    vst1q_s16(out + out_stride * 159 + i, v4000);
+    vst1q_s16(out + out_stride * 160 + i, v4001);
+    vst1q_s16(out + out_stride * 161 + i, v4002);
+    vst1q_s16(out + out_stride * 162 + i, v4003);
+    vst1q_s16(out + out_stride * 163 + i, v4004);
+    vst1q_s16(out + out_stride * 164 + i, v4005);
+    vst1q_s16(out + out_stride * 165 + i, v4006);
+    vst1q_s16(out + out_stride * 166 + i, v4007);
+    vst1q_s16(out + out_stride * 167 + i, v4008);
+    vst1q_s16(out + out_stride * 168 + i, v4009);
+    vst1q_s16(out + out_stride * 169 + i, v4010);
+    vst1q_s16(out + out_stride * 170 + i, v4011);
+    vst1q_s16(out + out_stride * 171 + i, v4012);
+    vst1q_s16(out + out_stride * 172 + i, v4013);
+    vst1q_s16(out + out_stride * 173 + i, v4014);
+    vst1q_s16(out + out_stride * 174 + i, v4015);
+    vst1q_s16(out + out_stride * 175 + i, v4016);
+    vst1q_s16(out + out_stride * 176 + i, v4017);
+    vst1q_s16(out + out_stride * 177 + i, v4018);
+    vst1q_s16(out + out_stride * 178 + i, v4019);
+    vst1q_s16(out + out_stride * 179 + i, v4020);
+    vst1q_s16(out + out_stride * 180 + i, v4021);
+    vst1q_s16(out + out_stride * 181 + i, v4022);
+    vst1q_s16(out + out_stride * 182 + i, v4023);
+    vst1q_s16(out + out_stride * 183 + i, v4024);
+    vst1q_s16(out + out_stride * 184 + i, v4025);
+    vst1q_s16(out + out_stride * 185 + i, v4026);
+    vst1q_s16(out + out_stride * 186 + i, v4027);
+    vst1q_s16(out + out_stride * 187 + i, v4028);
+    vst1q_s16(out + out_stride * 188 + i, v4029);
+    vst1q_s16(out + out_stride * 189 + i, v4030);
+    vst1q_s16(out + out_stride * 190 + i, v4031);
+    vst1q_s16(out + out_stride * 191 + i, v4032);
+    vst1q_s16(out + out_stride * 192 + i, v4033);
+    vst1q_s16(out + out_stride * 193 + i, v4034);
+    vst1q_s16(out + out_stride * 194 + i, v4035);
+    vst1q_s16(out + out_stride * 195 + i, v4036);
+    vst1q_s16(out + out_stride * 196 + i, v4037);
+    vst1q_s16(out + out_stride * 197 + i, v4038);
+    vst1q_s16(out + out_stride * 198 + i, v4039);
+    vst1q_s16(out + out_stride * 199 + i, v4040);
+    vst1q_s16(out + out_stride * 200 + i, v4041);
+    vst1q_s16(out + out_stride * 201 + i, v4042);
+    vst1q_s16(out + out_stride * 202 + i, v4043);
+    vst1q_s16(out + out_stride * 203 + i, v4044);
+    vst1q_s16(out + out_stride * 204 + i, v4045);
+    vst1q_s16(out + out_stride * 205 + i, v4046);
+    vst1q_s16(out + out_stride * 206 + i, v4047);
+    vst1q_s16(out + out_stride * 207 + i, v4048);
+    vst1q_s16(out + out_stride * 208 + i, v4049);
+    vst1q_s16(out + out_stride * 209 + i, v4050);
+    vst1q_s16(out + out_stride * 210 + i, v4051);
+    vst1q_s16(out + out_stride * 211 + i, v4052);
+    vst1q_s16(out + out_stride * 212 + i, v4053);
+    vst1q_s16(out + out_stride * 213 + i, v4054);
+    vst1q_s16(out + out_stride * 214 + i, v4055);
+    vst1q_s16(out + out_stride * 215 + i, v4056);
+    vst1q_s16(out + out_stride * 216 + i, v4057);
+    vst1q_s16(out + out_stride * 217 + i, v4058);
+    vst1q_s16(out + out_stride * 218 + i, v4059);
+    vst1q_s16(out + out_stride * 219 + i, v4060);
+    vst1q_s16(out + out_stride * 220 + i, v4061);
+    vst1q_s16(out + out_stride * 221 + i, v4062);
+    vst1q_s16(out + out_stride * 222 + i, v4063);
+    vst1q_s16(out + out_stride * 223 + i, v4064);
+    vst1q_s16(out + out_stride * 224 + i, v4065);
+    vst1q_s16(out + out_stride * 225 + i, v4066);
+    vst1q_s16(out + out_stride * 226 + i, v4067);
+    vst1q_s16(out + out_stride * 227 + i, v4068);
+    vst1q_s16(out + out_stride * 228 + i, v4069);
+    vst1q_s16(out + out_stride * 229 + i, v4070);
+    vst1q_s16(out + out_stride * 230 + i, v4071);
+    vst1q_s16(out + out_stride * 231 + i, v4072);
+    vst1q_s16(out + out_stride * 232 + i, v4073);
+    vst1q_s16(out + out_stride * 233 + i, v4074);
+    vst1q_s16(out + out_stride * 234 + i, v4075);
+    vst1q_s16(out + out_stride * 235 + i, v4076);
+    vst1q_s16(out + out_stride * 236 + i, v4077);
+    vst1q_s16(out + out_stride * 237 + i, v4078);
+    vst1q_s16(out + out_stride * 238 + i, v4079);
+    vst1q_s16(out + out_stride * 239 + i, v4080);
+    vst1q_s16(out + out_stride * 240 + i, v4081);
+    vst1q_s16(out + out_stride * 241 + i, v4082);
+    vst1q_s16(out + out_stride * 242 + i, v4083);
+    vst1q_s16(out + out_stride * 243 + i, v4084);
+    vst1q_s16(out + out_stride * 244 + i, v4085);
+    vst1q_s16(out + out_stride * 245 + i, v4086);
+    vst1q_s16(out + out_stride * 246 + i, v4087);
+    vst1q_s16(out + out_stride * 247 + i, v4088);
+    vst1q_s16(out + out_stride * 248 + i, v4089);
+    vst1q_s16(out + out_stride * 249 + i, v4090);
+    vst1q_s16(out + out_stride * 250 + i, v4091);
+    vst1q_s16(out + out_stride * 251 + i, v4092);
+    vst1q_s16(out + out_stride * 252 + i, v4093);
+    vst1q_s16(out + out_stride * 253 + i, v4094);
+    vst1q_s16(out + out_stride * 254 + i, v4095);
+    vst1q_s16(out + out_stride * 255 + i, v4096);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h
new file mode 100644
index 0000000000..0f3b31cfea
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h
@@ -0,0 +1,419 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<32>) { return 1; }
+
+void FastIDCT(FastDCTTag<32>, const int16_t* in, size_t in_stride, int16_t* out,
+              size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 16 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 8 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 24 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 20 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 12 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vld1q_s16(in + in_stride * 28 + i);
+    int16x8_t v17 = vaddq_s16(v16, v12);
+    int16x8_t v18 = vaddq_s16(v13, v10);
+    int16x8_t v19 = vaddq_s16(v17, v18);
+    int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734);
+    int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080);
+    int16x8_t v22 = vaddq_s16(v20, v21);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+    int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+    int16x8_t v28 = vld1q_s16(in + in_stride * 18 + i);
+    int16x8_t v29 = vld1q_s16(in + in_stride * 14 + i);
+    int16x8_t v30 = vaddq_s16(v28, v29);
+    int16x8_t v31 = vaddq_s16(v27, v30);
+    int16x8_t v32 = vld1q_s16(in + in_stride * 10 + i);
+    int16x8_t v33 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v34 = vaddq_s16(v32, v33);
+    int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080);
+    int16x8_t v36 = vld1q_s16(in + in_stride * 26 + i);
+    int16x8_t v37 = vld1q_s16(in + in_stride * 22 + i);
+    int16x8_t v38 = vaddq_s16(v36, v37);
+    int16x8_t v39 = vaddq_s16(v38, v34);
+    int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734);
+    int16x8_t v41 = vaddq_s16(v35, v40);
+    int16x8_t v42 = vaddq_s16(v31, v41);
+    int16x8_t v43 = vaddq_s16(v33, v26);
+    int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+    int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+    int16x8_t v45 = vaddq_s16(v29, v32);
+    int16x8_t v46 = vaddq_s16(v37, v28);
+    int16x8_t v47 = vaddq_s16(v45, v46);
+    int16x8_t v48 = vaddq_s16(v44, v47);
+    int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705);
+    int16x8_t v50 = vld1q_s16(in + in_stride * 30 + i);
+    int16x8_t v51 = vaddq_s16(v50, v36);
+    int16x8_t v52 = vaddq_s16(v51, v46);
+    int16x8_t v53 = vqrdmulhq_n_s16(v52, 17734);
+    int16x8_t v54 = vaddq_s16(v45, v43);
+    int16x8_t v55_tmp = vqrdmulhq_n_s16(v54, 10045);
+    int16x8_t v55 = vaddq_s16(v55_tmp, v54);
+    int16x8_t v56 = vaddq_s16(v53, v55);
+    int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705);
+    int16x8_t v58 = vaddq_s16(v49, v57);
+    int16x8_t v59 = vaddq_s16(v42, v58);
+    int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+    int16x8_t v61 = vaddq_s16(v25, v60);
+    int16x8_t v62 = vld1q_s16(in + in_stride * 13 + i);
+    int16x8_t v63 = vld1q_s16(in + in_stride * 11 + i);
+    int16x8_t v64 = vaddq_s16(v62, v63);
+    int16x8_t v65 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v66 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v67 = vaddq_s16(v65, v66);
+    int16x8_t v68 = vaddq_s16(v64, v67);
+    int16x8_t v69_tmp = vqrdmulhq_n_s16(v68, 10045);
+    int16x8_t v69 = vaddq_s16(v69_tmp, v68);
+    int16x8_t v70 = vld1q_s16(in + in_stride * 21 + i);
+    int16x8_t v71 = vld1q_s16(in + in_stride * 19 + i);
+    int16x8_t v72 = vaddq_s16(v70, v71);
+    int16x8_t v73 = vld1q_s16(in + in_stride * 29 + i);
+    int16x8_t v74 = vld1q_s16(in + in_stride * 27 + i);
+    int16x8_t v75 = vaddq_s16(v73, v74);
+    int16x8_t v76 = vaddq_s16(v72, v75);
+    int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+    int16x8_t v78 = vaddq_s16(v69, v77);
+    int16x8_t v79 = vqrdmulhq_n_s16(v78, 16705);
+    int16x8_t v80_tmp = vqrdmulhq_n_s16(v67, 13573);
+    int16x8_t v80 = vaddq_s16(v80_tmp, v67);
+    int16x8_t v81 = vaddq_s16(v64, v72);
+    int16x8_t v82 = vaddq_s16(v80, v81);
+    int16x8_t v83 = vqrdmulhq_n_s16(v82, 16705);
+    int16x8_t v84 = vaddq_s16(v79, v83);
+    int16x8_t v85 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v86_tmp = vqrdmulhq_n_s16(v85, 13573);
+    int16x8_t v86 = vaddq_s16(v86_tmp, v85);
+    int16x8_t v87 = vld1q_s16(in + in_stride * 17 + i);
+    int16x8_t v88 = vld1q_s16(in + in_stride * 15 + i);
+    int16x8_t v89 = vaddq_s16(v87, v88);
+    int16x8_t v90 = vaddq_s16(v86, v89);
+    int16x8_t v91 = vld1q_s16(in + in_stride * 9 + i);
+    int16x8_t v92 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v93 = vaddq_s16(v91, v92);
+    int16x8_t v94 = vqrdmulhq_n_s16(v93, 25080);
+    int16x8_t v95 = vld1q_s16(in + in_stride * 25 + i);
+    int16x8_t v96 = vld1q_s16(in + in_stride * 23 + i);
+    int16x8_t v97 = vaddq_s16(v95, v96);
+    int16x8_t v98 = vaddq_s16(v97, v93);
+    int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+    int16x8_t v100 = vaddq_s16(v94, v99);
+    int16x8_t v101 = vaddq_s16(v90, v100);
+    int16x8_t v102 = vaddq_s16(v84, v101);
+    int16x8_t v103 = vaddq_s16(v92, v65);
+    int16x8_t v104 = vaddq_s16(v66, v85);
+    int16x8_t v105 = vaddq_s16(v103, v104);
+    int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573);
+    int16x8_t v106 = vaddq_s16(v106_tmp, v105);
+    int16x8_t v107 = vaddq_s16(v96, v70);
+    int16x8_t v108 = vaddq_s16(v71, v87);
+    int16x8_t v109 = vaddq_s16(v107, v108);
+    int16x8_t v110 = vaddq_s16(v63, v91);
+    int16x8_t v111 = vaddq_s16(v88, v62);
+    int16x8_t v112 = vaddq_s16(v110, v111);
+    int16x8_t v113 = vaddq_s16(v109, v112);
+    int16x8_t v114 = vaddq_s16(v106, v113);
+    int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705);
+    int16x8_t v116 = vaddq_s16(v112, v105);
+    int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080);
+    int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734);
+    int16x8_t v119 = vaddq_s16(v74, v95);
+    int16x8_t v120 = vld1q_s16(in + in_stride * 31 + i);
+    int16x8_t v121 = vaddq_s16(v120, v73);
+    int16x8_t v122 = vaddq_s16(v119, v121);
+    int16x8_t v123 = vaddq_s16(v122, v109);
+    int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734);
+    int16x8_t v125 = vaddq_s16(v118, v124);
+    int16x8_t v126 = vaddq_s16(v117, v125);
+    int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705);
+    int16x8_t v128 = vaddq_s16(v115, v127);
+    int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463);
+    int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573);
+    int16x8_t v130 = vaddq_s16(v130_tmp, v104);
+    int16x8_t v131 = vaddq_s16(v108, v111);
+    int16x8_t v132 = vaddq_s16(v130, v131);
+    int16x8_t v133 = vaddq_s16(v119, v107);
+    int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+    int16x8_t v135 = vaddq_s16(v110, v103);
+    int16x8_t v136_tmp = vqrdmulhq_n_s16(v135, 10045);
+    int16x8_t v136 = vaddq_s16(v136_tmp, v135);
+    int16x8_t v137 = vaddq_s16(v134, v136);
+    int16x8_t v138 = vaddq_s16(v132, v137);
+    int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463);
+    int16x8_t v140 = vaddq_s16(v129, v139);
+    int16x8_t v141 = vaddq_s16(v102, v140);
+    int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404);
+    int16x8_t v143 = vaddq_s16(v61, v142);
+    int16x8_t v144 = vsubq_s16(v0, v1);
+    int16x8_t v145 = vsubq_s16(v4, v6);
+    int16x8_t v146_tmp = vqrdmulhq_n_s16(v145, 10045);
+    int16x8_t v146 = vaddq_s16(v146_tmp, v145);
+    int16x8_t v147 = vaddq_s16(v144, v146);
+    int16x8_t v148 = vsubq_s16(v11, v14);
+    int16x8_t v149 = vqrdmulhq_n_s16(v18, 17734);
+    int16x8_t v150_tmp = vqrdmulhq_n_s16(v17, 10045);
+    int16x8_t v150 = vaddq_s16(v150_tmp, v17);
+    int16x8_t v151 = vsubq_s16(v149, v150);
+    int16x8_t v152 = vaddq_s16(v148, v151);
+    int16x8_t v153 = vqrdmulhq_n_s16(v152, 19705);
+    int16x8_t v154 = vaddq_s16(v147, v153);
+    int16x8_t v155 = vsubq_s16(v27, v30);
+    int16x8_t v156 = vqrdmulhq_n_s16(v34, 17734);
+    int16x8_t v157_tmp = vqrdmulhq_n_s16(v38, 10045);
+    int16x8_t v157 = vaddq_s16(v157_tmp, v38);
+    int16x8_t v158 = vsubq_s16(v156, v157);
+    int16x8_t v159 = vaddq_s16(v155, v158);
+    int16x8_t v160 = vqrdmulhq_n_s16(v54, 13573);
+    int16x8_t v161 = vsubq_s16(v160, v52);
+    int16x8_t v162 = vqrdmulhq_n_s16(v161, 25746);
+    int16x8_t v163 = vsubq_s16(v44, v47);
+    int16x8_t v164 = vqrdmulhq_n_s16(v163, 19705);
+    int16x8_t v165 = vaddq_s16(v162, v164);
+    int16x8_t v166 = vaddq_s16(v159, v165);
+    int16x8_t v167 = vqrdmulhq_n_s16(v166, 17121);
+    int16x8_t v168 = vaddq_s16(v154, v167);
+    int16x8_t v169 = vsubq_s16(v86, v89);
+    int16x8_t v170 = vqrdmulhq_n_s16(v93, 17734);
+    int16x8_t v171_tmp = vqrdmulhq_n_s16(v97, 10045);
+    int16x8_t v171 = vaddq_s16(v171_tmp, v97);
+    int16x8_t v172 = vsubq_s16(v170, v171);
+    int16x8_t v173 = vaddq_s16(v169, v172);
+    int16x8_t v174 = vsubq_s16(v80, v81);
+    int16x8_t v175 = vqrdmulhq_n_s16(v174, 19705);
+    int16x8_t v176 = vqrdmulhq_n_s16(v68, 13573);
+    int16x8_t v177 = vsubq_s16(v176, v76);
+    int16x8_t v178 = vqrdmulhq_n_s16(v177, 25746);
+    int16x8_t v179 = vaddq_s16(v175, v178);
+    int16x8_t v180 = vaddq_s16(v173, v179);
+    int16x8_t v181 = vsubq_s16(v130, v131);
+    int16x8_t v182 = vqrdmulhq_n_s16(v135, 13573);
+    int16x8_t v183 = vsubq_s16(v182, v133);
+    int16x8_t v184_tmp = vqrdmulhq_n_s16(v183, 10045);
+    int16x8_t v184 = vaddq_s16(v184_tmp, v183);
+    int16x8_t v185 = vaddq_s16(v181, v184);
+    int16x8_t v186 = vqrdmulhq_n_s16(v185, 17121);
+    int16x8_t v187 = vqrdmulhq_n_s16(v105, 27867);
+    int16x8_t v188 = vqrdmulhq_n_s16(v113, 19705);
+    int16x8_t v189 = vsubq_s16(v187, v188);
+    int16x8_t v190 = vqrdmulhq_n_s16(v116, 13573);
+    int16x8_t v191 = vsubq_s16(v190, v123);
+    int16x8_t v192 = vqrdmulhq_n_s16(v191, 25746);
+    int16x8_t v193 = vaddq_s16(v189, v192);
+    int16x8_t v194 = vqrdmulhq_n_s16(v193, 17121);
+    int16x8_t v195 = vaddq_s16(v186, v194);
+    int16x8_t v196 = vaddq_s16(v180, v195);
+    int16x8_t v197 = vqrdmulhq_n_s16(v196, 16563);
+    int16x8_t v198 = vaddq_s16(v168, v197);
+    int16x8_t v199 = vsubq_s16(v144, v146);
+    int16x8_t v200 = vsubq_s16(v148, v151);
+    int16x8_t v201 = vqrdmulhq_n_s16(v200, 29490);
+    int16x8_t v202 = vaddq_s16(v199, v201);
+    int16x8_t v203 = vsubq_s16(v155, v158);
+    int16x8_t v204 = vqrdmulhq_n_s16(v163, 29490);
+    int16x8_t v205_tmp = vqrdmulhq_n_s16(v161, 5763);
+    int16x8_t v205 = vaddq_s16(v205_tmp, v161);
+    int16x8_t v206 = vsubq_s16(v204, v205);
+    int16x8_t v207 = vaddq_s16(v203, v206);
+    int16x8_t v208 = vqrdmulhq_n_s16(v207, 18578);
+    int16x8_t v209 = vaddq_s16(v202, v208);
+    int16x8_t v210 = vsubq_s16(v169, v172);
+    int16x8_t v211 = vqrdmulhq_n_s16(v174, 29490);
+    int16x8_t v212_tmp = vqrdmulhq_n_s16(v177, 5763);
+    int16x8_t v212 = vaddq_s16(v212_tmp, v177);
+    int16x8_t v213 = vsubq_s16(v211, v212);
+    int16x8_t v214 = vaddq_s16(v210, v213);
+    int16x8_t v215 = vsubq_s16(v181, v184);
+    int16x8_t v216 = vqrdmulhq_n_s16(v215, 18578);
+    int16x8_t v217 = vqrdmulhq_n_s16(v189, 27803);
+    int16x8_t v218 = vqrdmulhq_n_s16(v191, 21845);
+    int16x8_t v219 = vsubq_s16(v217, v218);
+    int16x8_t v220 = vaddq_s16(v216, v219);
+    int16x8_t v221 = vaddq_s16(v214, v220);
+    int16x8_t v222 = vqrdmulhq_n_s16(v221, 16890);
+    int16x8_t v223 = vaddq_s16(v209, v222);
+    int16x8_t v224 = vsubq_s16(v2, v8);
+    int16x8_t v225 = vsubq_s16(v15, v22);
+    int16x8_t v226_tmp = vqrdmulhq_n_s16(v225, 18446);
+    int16x8_t v226 = vmlaq_n_s16(v226_tmp, v225, 2);
+    int16x8_t v227 = vaddq_s16(v224, v226);
+    int16x8_t v228 = vsubq_s16(v31, v41);
+    int16x8_t v229 = vsubq_s16(v48, v56);
+    int16x8_t v230_tmp = vqrdmulhq_n_s16(v229, 18446);
+    int16x8_t v230 = vmlaq_n_s16(v230_tmp, v229, 2);
+    int16x8_t v231 = vaddq_s16(v228, v230);
+    int16x8_t v232 = vqrdmulhq_n_s16(v231, 21195);
+    int16x8_t v233 = vaddq_s16(v227, v232);
+    int16x8_t v234 = vsubq_s16(v82, v78);
+    int16x8_t v235_tmp = vqrdmulhq_n_s16(v234, 18446);
+    int16x8_t v235 = vmlaq_n_s16(v235_tmp, v234, 2);
+    int16x8_t v236 = vsubq_s16(v90, v100);
+    int16x8_t v237 = vaddq_s16(v235, v236);
+    int16x8_t v238 = vsubq_s16(v132, v137);
+    int16x8_t v239 = vsubq_s16(v114, v126);
+    int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 18446);
+    int16x8_t v240 = vmlaq_n_s16(v240_tmp, v239, 2);
+    int16x8_t v241 = vaddq_s16(v238, v240);
+    int16x8_t v242 = vqrdmulhq_n_s16(v241, 21195);
+    int16x8_t v243 = vaddq_s16(v237, v242);
+    int16x8_t v244 = vqrdmulhq_n_s16(v243, 17401);
+    int16x8_t v245 = vaddq_s16(v233, v244);
+    int16x8_t v246 = vsubq_s16(v228, v230);
+    int16x8_t v247 = vqrdmulhq_n_s16(v246, 25826);
+    int16x8_t v248 = vsubq_s16(v224, v226);
+    int16x8_t v249 = vaddq_s16(v247, v248);
+    int16x8_t v250 = vsubq_s16(v238, v240);
+    int16x8_t v251 = vqrdmulhq_n_s16(v250, 25826);
+    int16x8_t v252 = vsubq_s16(v236, v235);
+    int16x8_t v253 = vaddq_s16(v251, v252);
+    int16x8_t v254 = vqrdmulhq_n_s16(v253, 18124);
+    int16x8_t v255 = vaddq_s16(v249, v254);
+    int16x8_t v256 = vsubq_s16(v199, v201);
+    int16x8_t v257 = vsubq_s16(v203, v206);
+    int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 1988);
+    int16x8_t v258 = vaddq_s16(v258_tmp, v257);
+    int16x8_t v259 = vaddq_s16(v256, v258);
+    int16x8_t v260 = vsubq_s16(v210, v213);
+    int16x8_t v261_tmp = vqrdmulhq_n_s16(v219, 25030);
+    int16x8_t v261 = vaddq_s16(v261_tmp, v219);
+    int16x8_t v262 = vsubq_s16(v215, v261);
+    int16x8_t v263_tmp = vqrdmulhq_n_s16(v262, 1988);
+    int16x8_t v263 = vaddq_s16(v263_tmp, v262);
+    int16x8_t v264 = vaddq_s16(v260, v263);
+    int16x8_t v265 = vqrdmulhq_n_s16(v264, 19102);
+    int16x8_t v266 = vaddq_s16(v259, v265);
+    int16x8_t v267 = vsubq_s16(v147, v153);
+    int16x8_t v268 = vsubq_s16(v159, v165);
+    int16x8_t v269_tmp = vqrdmulhq_n_s16(v268, 23673);
+    int16x8_t v269 = vaddq_s16(v269_tmp, v268);
+    int16x8_t v270 = vaddq_s16(v267, v269);
+    int16x8_t v271 = vsubq_s16(v173, v179);
+    int16x8_t v272 = vsubq_s16(v185, v193);
+    int16x8_t v273_tmp = vqrdmulhq_n_s16(v272, 23673);
+    int16x8_t v273 = vaddq_s16(v273_tmp, v272);
+    int16x8_t v274 = vaddq_s16(v271, v273);
+    int16x8_t v275 = vqrdmulhq_n_s16(v274, 20398);
+    int16x8_t v276 = vaddq_s16(v270, v275);
+    int16x8_t v277 = vsubq_s16(v9, v24);
+    int16x8_t v278 = vsubq_s16(v42, v58);
+    int16x8_t v279_tmp = vqrdmulhq_n_s16(v278, 3314);
+    int16x8_t v279 = vmlaq_n_s16(v279_tmp, v278, 5);
+    int16x8_t v280 = vaddq_s16(v277, v279);
+    int16x8_t v281 = vsubq_s16(v138, v128);
+    int16x8_t v282_tmp = vqrdmulhq_n_s16(v281, 3314);
+    int16x8_t v282 = vmlaq_n_s16(v282_tmp, v281, 5);
+    int16x8_t v283 = vsubq_s16(v101, v84);
+    int16x8_t v284 = vaddq_s16(v282, v283);
+    int16x8_t v285 = vqrdmulhq_n_s16(v284, 22112);
+    int16x8_t v286 = vaddq_s16(v280, v285);
+    int16x8_t v287 = vsubq_s16(v277, v279);
+    int16x8_t v288 = vsubq_s16(v283, v282);
+    int16x8_t v289 = vqrdmulhq_n_s16(v288, 24397);
+    int16x8_t v290 = vaddq_s16(v287, v289);
+    int16x8_t v291 = vsubq_s16(v267, v269);
+    int16x8_t v292 = vsubq_s16(v271, v273);
+    int16x8_t v293 = vqrdmulhq_n_s16(v292, 27504);
+    int16x8_t v294 = vaddq_s16(v291, v293);
+    int16x8_t v295 = vsubq_s16(v260, v263);
+    int16x8_t v296 = vqrdmulhq_n_s16(v295, 31869);
+    int16x8_t v297 = vsubq_s16(v256, v258);
+    int16x8_t v298 = vaddq_s16(v296, v297);
+    int16x8_t v299 = vsubq_s16(v248, v247);
+    int16x8_t v300 = vsubq_s16(v252, v251);
+    int16x8_t v301_tmp = vqrdmulhq_n_s16(v300, 5552);
+    int16x8_t v301 = vaddq_s16(v301_tmp, v300);
+    int16x8_t v302 = vaddq_s16(v299, v301);
+    int16x8_t v303 = vsubq_s16(v227, v232);
+    int16x8_t v304 = vsubq_s16(v237, v242);
+    int16x8_t v305_tmp = vqrdmulhq_n_s16(v304, 15865);
+    int16x8_t v305 = vaddq_s16(v305_tmp, v304);
+    int16x8_t v306 = vaddq_s16(v303, v305);
+    int16x8_t v307 = vsubq_s16(v202, v208);
+    int16x8_t v308 = vsubq_s16(v214, v220);
+    int16x8_t v309_tmp = vqrdmulhq_n_s16(v308, 1893);
+    int16x8_t v309 = vmlaq_n_s16(v309_tmp, v308, 2);
+    int16x8_t v310 = vaddq_s16(v307, v309);
+    int16x8_t v311 = vsubq_s16(v154, v167);
+    int16x8_t v312 = vsubq_s16(v180, v195);
+    int16x8_t v313_tmp = vqrdmulhq_n_s16(v312, 13357);
+    int16x8_t v313 = vmlaq_n_s16(v313_tmp, v312, 3);
+    int16x8_t v314 = vaddq_s16(v311, v313);
+    int16x8_t v315 = vsubq_s16(v102, v140);
+    int16x8_t v316_tmp = vqrdmulhq_n_s16(v315, 6226);
+    int16x8_t v316 = vmlaq_n_s16(v316_tmp, v315, 10);
+    int16x8_t v317 = vsubq_s16(v25, v60);
+    int16x8_t v318 = vaddq_s16(v316, v317);
+    int16x8_t v319 = vsubq_s16(v317, v316);
+    int16x8_t v320 = vsubq_s16(v311, v313);
+    int16x8_t v321 = vsubq_s16(v307, v309);
+    int16x8_t v322 = vsubq_s16(v303, v305);
+    int16x8_t v323 = vsubq_s16(v299, v301);
+    int16x8_t v324 = vsubq_s16(v297, v296);
+    int16x8_t v325 = vsubq_s16(v291, v293);
+    int16x8_t v326 = vsubq_s16(v287, v289);
+    int16x8_t v327 = vsubq_s16(v280, v285);
+    int16x8_t v328 = vsubq_s16(v270, v275);
+    int16x8_t v329 = vsubq_s16(v259, v265);
+    int16x8_t v330 = vsubq_s16(v249, v254);
+    int16x8_t v331 = vsubq_s16(v233, v244);
+    int16x8_t v332 = vsubq_s16(v209, v222);
+    int16x8_t v333 = vsubq_s16(v168, v197);
+    int16x8_t v334 = vsubq_s16(v61, v142);
+    vst1q_s16(out + out_stride * 0 + i, v143);
+    vst1q_s16(out + out_stride * 1 + i, v198);
+    vst1q_s16(out + out_stride * 2 + i, v223);
+    vst1q_s16(out + out_stride * 3 + i, v245);
+    vst1q_s16(out + out_stride * 4 + i, v255);
+    vst1q_s16(out + out_stride * 5 + i, v266);
+    vst1q_s16(out + out_stride * 6 + i, v276);
+    vst1q_s16(out + out_stride * 7 + i, v286);
+    vst1q_s16(out + out_stride * 8 + i, v290);
+    vst1q_s16(out + out_stride * 9 + i, v294);
+    vst1q_s16(out + out_stride * 10 + i, v298);
+    vst1q_s16(out + out_stride * 11 + i, v302);
+    vst1q_s16(out + out_stride * 12 + i, v306);
+    vst1q_s16(out + out_stride * 13 + i, v310);
+    vst1q_s16(out + out_stride * 14 + i, v314);
+    vst1q_s16(out + out_stride * 15 + i, v318);
+    vst1q_s16(out + out_stride * 16 + i, v319);
+    vst1q_s16(out + out_stride * 17 + i, v320);
+    vst1q_s16(out + out_stride * 18 + i, v321);
+    vst1q_s16(out + out_stride * 19 + i, v322);
+    vst1q_s16(out + out_stride * 20 + i, v323);
+    vst1q_s16(out + out_stride * 21 + i, v324);
+    vst1q_s16(out + out_stride * 22 + i, v325);
+    vst1q_s16(out + out_stride * 23 + i, v326);
+    vst1q_s16(out + out_stride * 24 + i, v327);
+    vst1q_s16(out + out_stride * 25 + i, v328);
+    vst1q_s16(out + out_stride * 26 + i, v329);
+    vst1q_s16(out + out_stride * 27 + i, v330);
+    vst1q_s16(out + out_stride * 28 + i, v331);
+    vst1q_s16(out + out_stride * 29 + i, v332);
+    vst1q_s16(out + out_stride * 30 + i, v333);
+    vst1q_s16(out + out_stride * 31 + i, v334);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h
new file mode 100644
index 0000000000..400da1a9de
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h
@@ -0,0 +1,985 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<64>) { return 1; }
+
+void FastIDCT(FastDCTTag<64>, const int16_t* in, size_t in_stride, int16_t* out,
+              size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 32 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 16 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 48 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 8 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 40 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 24 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vld1q_s16(in + in_stride * 56 + i);
+    int16x8_t v17 = vaddq_s16(v16, v12);
+    int16x8_t v18 = vaddq_s16(v13, v10);
+    int16x8_t v19 = vaddq_s16(v17, v18);
+    int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734);
+    int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080);
+    int16x8_t v22 = vaddq_s16(v20, v21);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+    int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+    int16x8_t v28 = vld1q_s16(in + in_stride * 36 + i);
+    int16x8_t v29 = vld1q_s16(in + in_stride * 28 + i);
+    int16x8_t v30 = vaddq_s16(v28, v29);
+    int16x8_t v31 = vaddq_s16(v27, v30);
+    int16x8_t v32 = vld1q_s16(in + in_stride * 20 + i);
+    int16x8_t v33 = vld1q_s16(in + in_stride * 12 + i);
+    int16x8_t v34 = vaddq_s16(v32, v33);
+    int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080);
+    int16x8_t v36 = vld1q_s16(in + in_stride * 52 + i);
+    int16x8_t v37 = vld1q_s16(in + in_stride * 44 + i);
+    int16x8_t v38 = vaddq_s16(v36, v37);
+    int16x8_t v39 = vaddq_s16(v38, v34);
+    int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734);
+    int16x8_t v41 = vaddq_s16(v35, v40);
+    int16x8_t v42 = vaddq_s16(v31, v41);
+    int16x8_t v43 = vaddq_s16(v33, v26);
+    int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+    int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+    int16x8_t v45 = vaddq_s16(v37, v28);
+    int16x8_t v46 = vaddq_s16(v29, v32);
+    int16x8_t v47 = vaddq_s16(v45, v46);
+    int16x8_t v48 = vaddq_s16(v44, v47);
+    int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705);
+    int16x8_t v50 = vaddq_s16(v46, v43);
+    int16x8_t v51_tmp = vqrdmulhq_n_s16(v50, 10045);
+    int16x8_t v51 = vaddq_s16(v51_tmp, v50);
+    int16x8_t v52 = vld1q_s16(in + in_stride * 60 + i);
+    int16x8_t v53 = vaddq_s16(v52, v36);
+    int16x8_t v54 = vaddq_s16(v53, v45);
+    int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734);
+    int16x8_t v56 = vaddq_s16(v51, v55);
+    int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705);
+    int16x8_t v58 = vaddq_s16(v49, v57);
+    int16x8_t v59 = vaddq_s16(v42, v58);
+    int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+    int16x8_t v61 = vaddq_s16(v25, v60);
+    int16x8_t v62 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+    int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+    int16x8_t v64 = vld1q_s16(in + in_stride * 34 + i);
+    int16x8_t v65 = vld1q_s16(in + in_stride * 30 + i);
+    int16x8_t v66 = vaddq_s16(v64, v65);
+    int16x8_t v67 = vaddq_s16(v63, v66);
+    int16x8_t v68 = vld1q_s16(in + in_stride * 18 + i);
+    int16x8_t v69 = vld1q_s16(in + in_stride * 14 + i);
+    int16x8_t v70 = vaddq_s16(v68, v69);
+    int16x8_t v71 = vqrdmulhq_n_s16(v70, 25080);
+    int16x8_t v72 = vld1q_s16(in + in_stride * 50 + i);
+    int16x8_t v73 = vld1q_s16(in + in_stride * 46 + i);
+    int16x8_t v74 = vaddq_s16(v72, v73);
+    int16x8_t v75 = vaddq_s16(v74, v70);
+    int16x8_t v76 = vqrdmulhq_n_s16(v75, 17734);
+    int16x8_t v77 = vaddq_s16(v71, v76);
+    int16x8_t v78 = vaddq_s16(v67, v77);
+    int16x8_t v79 = vld1q_s16(in + in_stride * 10 + i);
+    int16x8_t v80 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v81 = vaddq_s16(v79, v80);
+    int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+    int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+    int16x8_t v83 = vld1q_s16(in + in_stride * 42 + i);
+    int16x8_t v84 = vld1q_s16(in + in_stride * 38 + i);
+    int16x8_t v85 = vaddq_s16(v83, v84);
+    int16x8_t v86 = vld1q_s16(in + in_stride * 26 + i);
+    int16x8_t v87 = vld1q_s16(in + in_stride * 22 + i);
+    int16x8_t v88 = vaddq_s16(v86, v87);
+    int16x8_t v89 = vaddq_s16(v85, v88);
+    int16x8_t v90 = vaddq_s16(v82, v89);
+    int16x8_t v91 = vqrdmulhq_n_s16(v90, 16705);
+    int16x8_t v92 = vaddq_s16(v88, v81);
+    int16x8_t v93_tmp = vqrdmulhq_n_s16(v92, 10045);
+    int16x8_t v93 = vaddq_s16(v93_tmp, v92);
+    int16x8_t v94 = vld1q_s16(in + in_stride * 58 + i);
+    int16x8_t v95 = vld1q_s16(in + in_stride * 54 + i);
+    int16x8_t v96 = vaddq_s16(v94, v95);
+    int16x8_t v97 = vaddq_s16(v96, v85);
+    int16x8_t v98 = vqrdmulhq_n_s16(v97, 17734);
+    int16x8_t v99 = vaddq_s16(v93, v98);
+    int16x8_t v100 = vqrdmulhq_n_s16(v99, 16705);
+    int16x8_t v101 = vaddq_s16(v91, v100);
+    int16x8_t v102 = vaddq_s16(v78, v101);
+    int16x8_t v103 = vaddq_s16(v69, v79);
+    int16x8_t v104 = vaddq_s16(v80, v62);
+    int16x8_t v105 = vaddq_s16(v103, v104);
+    int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573);
+    int16x8_t v106 = vaddq_s16(v106_tmp, v105);
+    int16x8_t v107 = vaddq_s16(v73, v83);
+    int16x8_t v108 = vaddq_s16(v84, v64);
+    int16x8_t v109 = vaddq_s16(v107, v108);
+    int16x8_t v110 = vaddq_s16(v65, v86);
+    int16x8_t v111 = vaddq_s16(v87, v68);
+    int16x8_t v112 = vaddq_s16(v110, v111);
+    int16x8_t v113 = vaddq_s16(v109, v112);
+    int16x8_t v114 = vaddq_s16(v106, v113);
+    int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705);
+    int16x8_t v116 = vaddq_s16(v112, v105);
+    int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080);
+    int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734);
+    int16x8_t v119 = vld1q_s16(in + in_stride * 62 + i);
+    int16x8_t v120 = vaddq_s16(v119, v94);
+    int16x8_t v121 = vaddq_s16(v95, v72);
+    int16x8_t v122 = vaddq_s16(v120, v121);
+    int16x8_t v123 = vaddq_s16(v122, v109);
+    int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734);
+    int16x8_t v125 = vaddq_s16(v118, v124);
+    int16x8_t v126 = vaddq_s16(v117, v125);
+    int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705);
+    int16x8_t v128 = vaddq_s16(v115, v127);
+    int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463);
+    int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573);
+    int16x8_t v130 = vaddq_s16(v130_tmp, v104);
+    int16x8_t v131 = vaddq_s16(v108, v110);
+    int16x8_t v132 = vaddq_s16(v130, v131);
+    int16x8_t v133 = vaddq_s16(v111, v103);
+    int16x8_t v134_tmp = vqrdmulhq_n_s16(v133, 10045);
+    int16x8_t v134 = vaddq_s16(v134_tmp, v133);
+    int16x8_t v135 = vaddq_s16(v121, v107);
+    int16x8_t v136 = vqrdmulhq_n_s16(v135, 17734);
+    int16x8_t v137 = vaddq_s16(v134, v136);
+    int16x8_t v138 = vaddq_s16(v132, v137);
+    int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463);
+    int16x8_t v140 = vaddq_s16(v129, v139);
+    int16x8_t v141 = vaddq_s16(v102, v140);
+    int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404);
+    int16x8_t v143 = vaddq_s16(v61, v142);
+    int16x8_t v144 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v145_tmp = vqrdmulhq_n_s16(v144, 13573);
+    int16x8_t v145 = vaddq_s16(v145_tmp, v144);
+    int16x8_t v146 = vld1q_s16(in + in_stride * 33 + i);
+    int16x8_t v147 = vld1q_s16(in + in_stride * 31 + i);
+    int16x8_t v148 = vaddq_s16(v146, v147);
+    int16x8_t v149 = vaddq_s16(v145, v148);
+    int16x8_t v150 = vld1q_s16(in + in_stride * 17 + i);
+    int16x8_t v151 = vld1q_s16(in + in_stride * 15 + i);
+    int16x8_t v152 = vaddq_s16(v150, v151);
+    int16x8_t v153 = vqrdmulhq_n_s16(v152, 25080);
+    int16x8_t v154 = vld1q_s16(in + in_stride * 49 + i);
+    int16x8_t v155 = vld1q_s16(in + in_stride * 47 + i);
+    int16x8_t v156 = vaddq_s16(v154, v155);
+    int16x8_t v157 = vaddq_s16(v156, v152);
+    int16x8_t v158 = vqrdmulhq_n_s16(v157, 17734);
+    int16x8_t v159 = vaddq_s16(v153, v158);
+    int16x8_t v160 = vaddq_s16(v149, v159);
+    int16x8_t v161 = vld1q_s16(in + in_stride * 9 + i);
+    int16x8_t v162 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v163 = vaddq_s16(v161, v162);
+    int16x8_t v164_tmp = vqrdmulhq_n_s16(v163, 13573);
+    int16x8_t v164 = vaddq_s16(v164_tmp, v163);
+    int16x8_t v165 = vld1q_s16(in + in_stride * 41 + i);
+    int16x8_t v166 = vld1q_s16(in + in_stride * 39 + i);
+    int16x8_t v167 = vaddq_s16(v165, v166);
+    int16x8_t v168 = vld1q_s16(in + in_stride * 25 + i);
+    int16x8_t v169 = vld1q_s16(in + in_stride * 23 + i);
+    int16x8_t v170 = vaddq_s16(v168, v169);
+    int16x8_t v171 = vaddq_s16(v167, v170);
+    int16x8_t v172 = vaddq_s16(v164, v171);
+    int16x8_t v173 = vqrdmulhq_n_s16(v172, 16705);
+    int16x8_t v174 = vaddq_s16(v170, v163);
+    int16x8_t v175_tmp = vqrdmulhq_n_s16(v174, 10045);
+    int16x8_t v175 = vaddq_s16(v175_tmp, v174);
+    int16x8_t v176 = vld1q_s16(in + in_stride * 57 + i);
+    int16x8_t v177 = vld1q_s16(in + in_stride * 55 + i);
+    int16x8_t v178 = vaddq_s16(v176, v177);
+    int16x8_t v179 = vaddq_s16(v178, v167);
+    int16x8_t v180 = vqrdmulhq_n_s16(v179, 17734);
+    int16x8_t v181 = vaddq_s16(v175, v180);
+    int16x8_t v182 = vqrdmulhq_n_s16(v181, 16705);
+    int16x8_t v183 = vaddq_s16(v173, v182);
+    int16x8_t v184 = vaddq_s16(v160, v183);
+    int16x8_t v185 = vld1q_s16(in + in_stride * 37 + i);
+    int16x8_t v186 = vld1q_s16(in + in_stride * 35 + i);
+    int16x8_t v187 = vaddq_s16(v185, v186);
+    int16x8_t v188 = vld1q_s16(in + in_stride * 45 + i);
+    int16x8_t v189 = vld1q_s16(in + in_stride * 43 + i);
+    int16x8_t v190 = vaddq_s16(v188, v189);
+    int16x8_t v191 = vaddq_s16(v187, v190);
+    int16x8_t v192 = vld1q_s16(in + in_stride * 29 + i);
+    int16x8_t v193 = vld1q_s16(in + in_stride * 27 + i);
+    int16x8_t v194 = vaddq_s16(v192, v193);
+    int16x8_t v195 = vld1q_s16(in + in_stride * 21 + i);
+    int16x8_t v196 = vld1q_s16(in + in_stride * 19 + i);
+    int16x8_t v197 = vaddq_s16(v195, v196);
+    int16x8_t v198 = vaddq_s16(v194, v197);
+    int16x8_t v199 = vaddq_s16(v191, v198);
+    int16x8_t v200 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v201 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v202 = vaddq_s16(v200, v201);
+    int16x8_t v203 = vld1q_s16(in + in_stride * 13 + i);
+    int16x8_t v204 = vld1q_s16(in + in_stride * 11 + i);
+    int16x8_t v205 = vaddq_s16(v203, v204);
+    int16x8_t v206 = vaddq_s16(v202, v205);
+    int16x8_t v207_tmp = vqrdmulhq_n_s16(v206, 13573);
+    int16x8_t v207 = vaddq_s16(v207_tmp, v206);
+    int16x8_t v208 = vaddq_s16(v199, v207);
+    int16x8_t v209 = vqrdmulhq_n_s16(v208, 16705);
+    int16x8_t v210 = vaddq_s16(v198, v206);
+    int16x8_t v211 = vqrdmulhq_n_s16(v210, 25080);
+    int16x8_t v212 = vqrdmulhq_n_s16(v210, 17734);
+    int16x8_t v213 = vld1q_s16(in + in_stride * 53 + i);
+    int16x8_t v214 = vld1q_s16(in + in_stride * 51 + i);
+    int16x8_t v215 = vaddq_s16(v213, v214);
+    int16x8_t v216 = vld1q_s16(in + in_stride * 61 + i);
+    int16x8_t v217 = vld1q_s16(in + in_stride * 59 + i);
+    int16x8_t v218 = vaddq_s16(v216, v217);
+    int16x8_t v219 = vaddq_s16(v215, v218);
+    int16x8_t v220 = vaddq_s16(v219, v191);
+    int16x8_t v221 = vqrdmulhq_n_s16(v220, 17734);
+    int16x8_t v222 = vaddq_s16(v212, v221);
+    int16x8_t v223 = vaddq_s16(v211, v222);
+    int16x8_t v224 = vqrdmulhq_n_s16(v223, 16705);
+    int16x8_t v225 = vaddq_s16(v209, v224);
+    int16x8_t v226 = vqrdmulhq_n_s16(v225, 16463);
+    int16x8_t v227_tmp = vqrdmulhq_n_s16(v202, 13573);
+    int16x8_t v227 = vaddq_s16(v227_tmp, v202);
+    int16x8_t v228 = vaddq_s16(v187, v194);
+    int16x8_t v229 = vaddq_s16(v227, v228);
+    int16x8_t v230 = vaddq_s16(v215, v190);
+    int16x8_t v231 = vqrdmulhq_n_s16(v230, 17734);
+    int16x8_t v232 = vaddq_s16(v197, v205);
+    int16x8_t v233_tmp = vqrdmulhq_n_s16(v232, 10045);
+    int16x8_t v233 = vaddq_s16(v233_tmp, v232);
+    int16x8_t v234 = vaddq_s16(v231, v233);
+    int16x8_t v235 = vaddq_s16(v229, v234);
+    int16x8_t v236 = vqrdmulhq_n_s16(v235, 16463);
+    int16x8_t v237 = vaddq_s16(v226, v236);
+    int16x8_t v238 = vaddq_s16(v184, v237);
+    int16x8_t v239 = vaddq_s16(v201, v144);
+    int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 13573);
+    int16x8_t v240 = vaddq_s16(v240_tmp, v239);
+    int16x8_t v241 = vaddq_s16(v186, v146);
+    int16x8_t v242 = vaddq_s16(v147, v192);
+    int16x8_t v243 = vaddq_s16(v241, v242);
+    int16x8_t v244 = vaddq_s16(v240, v243);
+    int16x8_t v245 = vaddq_s16(v196, v150);
+    int16x8_t v246 = vaddq_s16(v151, v203);
+    int16x8_t v247 = vaddq_s16(v245, v246);
+    int16x8_t v248_tmp = vqrdmulhq_n_s16(v247, 10045);
+    int16x8_t v248 = vaddq_s16(v248_tmp, v247);
+    int16x8_t v249 = vaddq_s16(v155, v188);
+    int16x8_t v250 = vaddq_s16(v214, v154);
+    int16x8_t v251 = vaddq_s16(v249, v250);
+    int16x8_t v252 = vqrdmulhq_n_s16(v251, 17734);
+    int16x8_t v253 = vaddq_s16(v248, v252);
+    int16x8_t v254 = vaddq_s16(v244, v253);
+    int16x8_t v255 = vaddq_s16(v204, v161);
+    int16x8_t v256 = vaddq_s16(v162, v200);
+    int16x8_t v257 = vaddq_s16(v255, v256);
+    int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 13573);
+    int16x8_t v258 = vaddq_s16(v258_tmp, v257);
+    int16x8_t v259 = vaddq_s16(v189, v165);
+    int16x8_t v260 = vaddq_s16(v166, v185);
+    int16x8_t v261 = vaddq_s16(v259, v260);
+    int16x8_t v262 = vaddq_s16(v169, v195);
+    int16x8_t v263 = vaddq_s16(v193, v168);
+    int16x8_t v264 = vaddq_s16(v262, v263);
+    int16x8_t v265 = vaddq_s16(v261, v264);
+    int16x8_t v266 = vaddq_s16(v258, v265);
+    int16x8_t v267 = vqrdmulhq_n_s16(v266, 16705);
+    int16x8_t v268 = vaddq_s16(v264, v257);
+    int16x8_t v269 = vqrdmulhq_n_s16(v268, 25080);
+    int16x8_t v270 = vaddq_s16(v217, v176);
+    int16x8_t v271 = vaddq_s16(v177, v213);
+    int16x8_t v272 = vaddq_s16(v270, v271);
+    int16x8_t v273 = vaddq_s16(v272, v261);
+    int16x8_t v274 = vqrdmulhq_n_s16(v273, 17734);
+    int16x8_t v275 = vqrdmulhq_n_s16(v268, 17734);
+    int16x8_t v276 = vaddq_s16(v274, v275);
+    int16x8_t v277 = vaddq_s16(v269, v276);
+    int16x8_t v278 = vqrdmulhq_n_s16(v277, 16705);
+    int16x8_t v279 = vaddq_s16(v267, v278);
+    int16x8_t v280 = vaddq_s16(v254, v279);
+    int16x8_t v281 = vqrdmulhq_n_s16(v280, 16404);
+    int16x8_t v282 = vaddq_s16(v256, v239);
+    int16x8_t v283_tmp = vqrdmulhq_n_s16(v282, 13573);
+    int16x8_t v283 = vaddq_s16(v283_tmp, v282);
+    int16x8_t v284 = vaddq_s16(v260, v241);
+    int16x8_t v285 = vaddq_s16(v242, v263);
+    int16x8_t v286 = vaddq_s16(v284, v285);
+    int16x8_t v287 = vaddq_s16(v283, v286);
+    int16x8_t v288 = vaddq_s16(v262, v245);
+    int16x8_t v289 = vaddq_s16(v246, v255);
+    int16x8_t v290 = vaddq_s16(v288, v289);
+    int16x8_t v291 = vqrdmulhq_n_s16(v290, 25080);
+    int16x8_t v292 = vqrdmulhq_n_s16(v290, 17734);
+    int16x8_t v293 = vaddq_s16(v271, v250);
+    int16x8_t v294 = vaddq_s16(v249, v259);
+    int16x8_t v295 = vaddq_s16(v293, v294);
+    int16x8_t v296 = vqrdmulhq_n_s16(v295, 17734);
+    int16x8_t v297 = vaddq_s16(v292, v296);
+    int16x8_t v298 = vaddq_s16(v291, v297);
+    int16x8_t v299 = vaddq_s16(v287, v298);
+    int16x8_t v300 = vqrdmulhq_n_s16(v299, 16463);
+    int16x8_t v301 = vaddq_s16(v289, v282);
+    int16x8_t v302 = vqrdmulhq_n_s16(v301, 23624);
+    int16x8_t v303 = vaddq_s16(v294, v284);
+    int16x8_t v304 = vqrdmulhq_n_s16(v303, 19705);
+    int16x8_t v305 = vaddq_s16(v285, v288);
+    int16x8_t v306 = vqrdmulhq_n_s16(v305, 19705);
+    int16x8_t v307 = vaddq_s16(v304, v306);
+    int16x8_t v308 = vqrdmulhq_n_s16(v307, 27779);
+    int16x8_t v309 = vaddq_s16(v302, v308);
+    int16x8_t v310 = vaddq_s16(v305, v301);
+    int16x8_t v311 = vqrdmulhq_n_s16(v310, 25080);
+    int16x8_t v312 = vqrdmulhq_n_s16(v310, 17734);
+    int16x8_t v313 = vld1q_s16(in + in_stride * 63 + i);
+    int16x8_t v314 = vaddq_s16(v313, v216);
+    int16x8_t v315 = vaddq_s16(v314, v270);
+    int16x8_t v316 = vaddq_s16(v315, v293);
+    int16x8_t v317 = vqrdmulhq_n_s16(v316, 25746);
+    int16x8_t v318 = vqrdmulhq_n_s16(v303, 25746);
+    int16x8_t v319 = vaddq_s16(v317, v318);
+    int16x8_t v320 = vqrdmulhq_n_s16(v319, 22571);
+    int16x8_t v321 = vaddq_s16(v312, v320);
+    int16x8_t v322 = vaddq_s16(v311, v321);
+    int16x8_t v323 = vqrdmulhq_n_s16(v322, 16705);
+    int16x8_t v324 = vaddq_s16(v309, v323);
+    int16x8_t v325 = vqrdmulhq_n_s16(v324, 16463);
+    int16x8_t v326 = vaddq_s16(v300, v325);
+    int16x8_t v327 = vqrdmulhq_n_s16(v326, 16404);
+    int16x8_t v328 = vaddq_s16(v281, v327);
+    int16x8_t v329 = vaddq_s16(v238, v328);
+    int16x8_t v330 = vqrdmulhq_n_s16(v329, 16389);
+    int16x8_t v331 = vaddq_s16(v143, v330);
+    int16x8_t v332 = vsubq_s16(v82, v89);
+    int16x8_t v333 = vqrdmulhq_n_s16(v332, 19705);
+    int16x8_t v334 = vqrdmulhq_n_s16(v92, 13573);
+    int16x8_t v335 = vsubq_s16(v334, v97);
+    int16x8_t v336 = vqrdmulhq_n_s16(v335, 25746);
+    int16x8_t v337 = vaddq_s16(v333, v336);
+    int16x8_t v338 = vsubq_s16(v63, v66);
+    int16x8_t v339 = vqrdmulhq_n_s16(v70, 17734);
+    int16x8_t v340_tmp = vqrdmulhq_n_s16(v74, 10045);
+    int16x8_t v340 = vaddq_s16(v340_tmp, v74);
+    int16x8_t v341 = vsubq_s16(v339, v340);
+    int16x8_t v342 = vaddq_s16(v338, v341);
+    int16x8_t v343 = vaddq_s16(v337, v342);
+    int16x8_t v344 = vsubq_s16(v130, v131);
+    int16x8_t v345 = vqrdmulhq_n_s16(v133, 13573);
+    int16x8_t v346 = vsubq_s16(v345, v135);
+    int16x8_t v347_tmp = vqrdmulhq_n_s16(v346, 10045);
+    int16x8_t v347 = vaddq_s16(v347_tmp, v346);
+    int16x8_t v348 = vaddq_s16(v344, v347);
+    int16x8_t v349 = vqrdmulhq_n_s16(v348, 17121);
+    int16x8_t v350 = vqrdmulhq_n_s16(v105, 27867);
+    int16x8_t v351 = vqrdmulhq_n_s16(v113, 19705);
+    int16x8_t v352 = vsubq_s16(v350, v351);
+    int16x8_t v353 = vqrdmulhq_n_s16(v116, 13573);
+    int16x8_t v354 = vsubq_s16(v353, v123);
+    int16x8_t v355 = vqrdmulhq_n_s16(v354, 25746);
+    int16x8_t v356 = vaddq_s16(v352, v355);
+    int16x8_t v357 = vqrdmulhq_n_s16(v356, 17121);
+    int16x8_t v358 = vaddq_s16(v349, v357);
+    int16x8_t v359 = vaddq_s16(v343, v358);
+    int16x8_t v360 = vqrdmulhq_n_s16(v359, 16563);
+    int16x8_t v361 = vsubq_s16(v27, v30);
+    int16x8_t v362 = vqrdmulhq_n_s16(v34, 17734);
+    int16x8_t v363_tmp = vqrdmulhq_n_s16(v38, 10045);
+    int16x8_t v363 = vaddq_s16(v363_tmp, v38);
+    int16x8_t v364 = vsubq_s16(v362, v363);
+    int16x8_t v365 = vaddq_s16(v361, v364);
+    int16x8_t v366 = vsubq_s16(v44, v47);
+    int16x8_t v367 = vqrdmulhq_n_s16(v366, 19705);
+    int16x8_t v368 = vqrdmulhq_n_s16(v50, 13573);
+    int16x8_t v369 = vsubq_s16(v368, v54);
+    int16x8_t v370 = vqrdmulhq_n_s16(v369, 25746);
+    int16x8_t v371 = vaddq_s16(v367, v370);
+    int16x8_t v372 = vaddq_s16(v365, v371);
+    int16x8_t v373 = vqrdmulhq_n_s16(v372, 17121);
+    int16x8_t v374 = vsubq_s16(v0, v1);
+    int16x8_t v375 = vsubq_s16(v4, v6);
+    int16x8_t v376_tmp = vqrdmulhq_n_s16(v375, 10045);
+    int16x8_t v376 = vaddq_s16(v376_tmp, v375);
+    int16x8_t v377 = vaddq_s16(v374, v376);
+    int16x8_t v378 = vsubq_s16(v11, v14);
+    int16x8_t v379 = vqrdmulhq_n_s16(v18, 17734);
+    int16x8_t v380_tmp = vqrdmulhq_n_s16(v17, 10045);
+    int16x8_t v380 = vaddq_s16(v380_tmp, v17);
+    int16x8_t v381 = vsubq_s16(v379, v380);
+    int16x8_t v382 = vaddq_s16(v378, v381);
+    int16x8_t v383 = vqrdmulhq_n_s16(v382, 19705);
+    int16x8_t v384 = vaddq_s16(v377, v383);
+    int16x8_t v385 = vaddq_s16(v373, v384);
+    int16x8_t v386 = vaddq_s16(v360, v385);
+    int16x8_t v387 = vsubq_s16(v145, v148);
+    int16x8_t v388 = vqrdmulhq_n_s16(v152, 17734);
+    int16x8_t v389_tmp = vqrdmulhq_n_s16(v156, 10045);
+    int16x8_t v389 = vaddq_s16(v389_tmp, v156);
+    int16x8_t v390 = vsubq_s16(v388, v389);
+    int16x8_t v391 = vaddq_s16(v387, v390);
+    int16x8_t v392 = vsubq_s16(v164, v171);
+    int16x8_t v393 = vqrdmulhq_n_s16(v392, 19705);
+    int16x8_t v394 = vqrdmulhq_n_s16(v174, 13573);
+    int16x8_t v395 = vsubq_s16(v394, v179);
+    int16x8_t v396 = vqrdmulhq_n_s16(v395, 25746);
+    int16x8_t v397 = vaddq_s16(v393, v396);
+    int16x8_t v398 = vaddq_s16(v391, v397);
+    int16x8_t v399 = vsubq_s16(v227, v228);
+    int16x8_t v400 = vqrdmulhq_n_s16(v232, 13573);
+    int16x8_t v401 = vsubq_s16(v400, v230);
+    int16x8_t v402_tmp = vqrdmulhq_n_s16(v401, 10045);
+    int16x8_t v402 = vaddq_s16(v402_tmp, v401);
+    int16x8_t v403 = vaddq_s16(v399, v402);
+    int16x8_t v404 = vqrdmulhq_n_s16(v403, 17121);
+    int16x8_t v405 = vqrdmulhq_n_s16(v206, 27867);
+    int16x8_t v406 = vqrdmulhq_n_s16(v199, 19705);
+    int16x8_t v407 = vsubq_s16(v405, v406);
+    int16x8_t v408 = vqrdmulhq_n_s16(v210, 13573);
+    int16x8_t v409 = vsubq_s16(v408, v220);
+    int16x8_t v410 = vqrdmulhq_n_s16(v409, 25746);
+    int16x8_t v411 = vaddq_s16(v407, v410);
+    int16x8_t v412 = vqrdmulhq_n_s16(v411, 17121);
+    int16x8_t v413 = vaddq_s16(v404, v412);
+    int16x8_t v414 = vaddq_s16(v398, v413);
+    int16x8_t v415 = vsubq_s16(v240, v243);
+    int16x8_t v416 = vqrdmulhq_n_s16(v247, 13573);
+    int16x8_t v417 = vsubq_s16(v416, v251);
+    int16x8_t v418_tmp = vqrdmulhq_n_s16(v417, 10045);
+    int16x8_t v418 = vaddq_s16(v418_tmp, v417);
+    int16x8_t v419 = vaddq_s16(v415, v418);
+    int16x8_t v420 = vqrdmulhq_n_s16(v257, 27867);
+    int16x8_t v421 = vqrdmulhq_n_s16(v265, 19705);
+    int16x8_t v422 = vsubq_s16(v420, v421);
+    int16x8_t v423 = vqrdmulhq_n_s16(v268, 13573);
+    int16x8_t v424 = vsubq_s16(v423, v273);
+    int16x8_t v425 = vqrdmulhq_n_s16(v424, 25746);
+    int16x8_t v426 = vaddq_s16(v422, v425);
+    int16x8_t v427 = vaddq_s16(v419, v426);
+    int16x8_t v428 = vqrdmulhq_n_s16(v427, 16563);
+    int16x8_t v429 = vqrdmulhq_n_s16(v301, 27867);
+    int16x8_t v430 = vsubq_s16(v429, v307);
+    int16x8_t v431 = vqrdmulhq_n_s16(v310, 10664);
+    int16x8_t v432 = vsubq_s16(v431, v319);
+    int16x8_t v433 = vaddq_s16(v430, v432);
+    int16x8_t v434 = vqrdmulhq_n_s16(v433, 17121);
+    int16x8_t v435 = vsubq_s16(v283, v286);
+    int16x8_t v436 = vqrdmulhq_n_s16(v290, 13573);
+    int16x8_t v437 = vsubq_s16(v436, v295);
+    int16x8_t v438_tmp = vqrdmulhq_n_s16(v437, 10045);
+    int16x8_t v438 = vaddq_s16(v438_tmp, v437);
+    int16x8_t v439 = vaddq_s16(v435, v438);
+    int16x8_t v440 = vqrdmulhq_n_s16(v439, 17121);
+    int16x8_t v441 = vaddq_s16(v434, v440);
+    int16x8_t v442 = vqrdmulhq_n_s16(v441, 16563);
+    int16x8_t v443 = vaddq_s16(v428, v442);
+    int16x8_t v444 = vaddq_s16(v414, v443);
+    int16x8_t v445 = vqrdmulhq_n_s16(v444, 16429);
+    int16x8_t v446 = vaddq_s16(v386, v445);
+    int16x8_t v447 = vsubq_s16(v374, v376);
+    int16x8_t v448 = vsubq_s16(v378, v381);
+    int16x8_t v449 = vqrdmulhq_n_s16(v448, 29490);
+    int16x8_t v450 = vaddq_s16(v447, v449);
+    int16x8_t v451 = vsubq_s16(v361, v364);
+    int16x8_t v452 = vqrdmulhq_n_s16(v366, 29490);
+    int16x8_t v453_tmp = vqrdmulhq_n_s16(v369, 5763);
+    int16x8_t v453 = vaddq_s16(v453_tmp, v369);
+    int16x8_t v454 = vsubq_s16(v452, v453);
+    int16x8_t v455 = vaddq_s16(v451, v454);
+    int16x8_t v456 = vqrdmulhq_n_s16(v455, 18578);
+    int16x8_t v457 = vaddq_s16(v450, v456);
+    int16x8_t v458 = vsubq_s16(v338, v341);
+    int16x8_t v459 = vqrdmulhq_n_s16(v332, 29490);
+    int16x8_t v460_tmp = vqrdmulhq_n_s16(v335, 5763);
+    int16x8_t v460 = vaddq_s16(v460_tmp, v335);
+    int16x8_t v461 = vsubq_s16(v459, v460);
+    int16x8_t v462 = vaddq_s16(v458, v461);
+    int16x8_t v463 = vqrdmulhq_n_s16(v352, 27803);
+    int16x8_t v464 = vqrdmulhq_n_s16(v354, 21845);
+    int16x8_t v465 = vsubq_s16(v463, v464);
+    int16x8_t v466 = vsubq_s16(v344, v347);
+    int16x8_t v467 = vqrdmulhq_n_s16(v466, 18578);
+    int16x8_t v468 = vaddq_s16(v465, v467);
+    int16x8_t v469 = vaddq_s16(v462, v468);
+    int16x8_t v470 = vqrdmulhq_n_s16(v469, 16890);
+    int16x8_t v471 = vaddq_s16(v457, v470);
+    int16x8_t v472 = vsubq_s16(v415, v418);
+    int16x8_t v473_tmp = vqrdmulhq_n_s16(v422, 16273);
+    int16x8_t v473 = vaddq_s16(v473_tmp, v422);
+    int16x8_t v474_tmp = vqrdmulhq_n_s16(v424, 5763);
+    int16x8_t v474 = vaddq_s16(v474_tmp, v424);
+    int16x8_t v475 = vsubq_s16(v473, v474);
+    int16x8_t v476 = vaddq_s16(v472, v475);
+    int16x8_t v477 = vqrdmulhq_n_s16(v476, 16890);
+    int16x8_t v478 = vqrdmulhq_n_s16(v435, 20261);
+    int16x8_t v479 = vqrdmulhq_n_s16(v437, 26472);
+    int16x8_t v480 = vsubq_s16(v478, v479);
+    int16x8_t v481 = vqrdmulhq_n_s16(v480, 30046);
+    int16x8_t v482 = vqrdmulhq_n_s16(v430, 30322);
+    int16x8_t v483 = vqrdmulhq_n_s16(v432, 30322);
+    int16x8_t v484 = vsubq_s16(v482, v483);
+    int16x8_t v485 = vqrdmulhq_n_s16(v484, 30046);
+    int16x8_t v486 = vaddq_s16(v481, v485);
+    int16x8_t v487 = vqrdmulhq_n_s16(v486, 16890);
+    int16x8_t v488 = vaddq_s16(v477, v487);
+    int16x8_t v489 = vsubq_s16(v387, v390);
+    int16x8_t v490 = vqrdmulhq_n_s16(v392, 29490);
+    int16x8_t v491_tmp = vqrdmulhq_n_s16(v395, 5763);
+    int16x8_t v491 = vaddq_s16(v491_tmp, v395);
+    int16x8_t v492 = vsubq_s16(v490, v491);
+    int16x8_t v493 = vaddq_s16(v489, v492);
+    int16x8_t v494 = vsubq_s16(v399, v402);
+    int16x8_t v495 = vqrdmulhq_n_s16(v494, 18578);
+    int16x8_t v496 = vqrdmulhq_n_s16(v407, 27803);
+    int16x8_t v497 = vqrdmulhq_n_s16(v409, 21845);
+    int16x8_t v498 = vsubq_s16(v496, v497);
+    int16x8_t v499 = vaddq_s16(v495, v498);
+    int16x8_t v500 = vaddq_s16(v493, v499);
+    int16x8_t v501 = vaddq_s16(v488, v500);
+    int16x8_t v502 = vqrdmulhq_n_s16(v501, 16508);
+    int16x8_t v503 = vaddq_s16(v471, v502);
+    int16x8_t v504 = vsubq_s16(v2, v8);
+    int16x8_t v505 = vsubq_s16(v15, v22);
+    int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 18446);
+    int16x8_t v506 = vmlaq_n_s16(v506_tmp, v505, 2);
+    int16x8_t v507 = vaddq_s16(v504, v506);
+    int16x8_t v508 = vsubq_s16(v31, v41);
+    int16x8_t v509 = vsubq_s16(v48, v56);
+    int16x8_t v510_tmp = vqrdmulhq_n_s16(v509, 18446);
+    int16x8_t v510 = vmlaq_n_s16(v510_tmp, v509, 2);
+    int16x8_t v511 = vaddq_s16(v508, v510);
+    int16x8_t v512 = vqrdmulhq_n_s16(v511, 21195);
+    int16x8_t v513 = vaddq_s16(v507, v512);
+    int16x8_t v514 = vsubq_s16(v67, v77);
+    int16x8_t v515 = vsubq_s16(v90, v99);
+    int16x8_t v516_tmp = vqrdmulhq_n_s16(v515, 18446);
+    int16x8_t v516 = vmlaq_n_s16(v516_tmp, v515, 2);
+    int16x8_t v517 = vaddq_s16(v514, v516);
+    int16x8_t v518 = vsubq_s16(v114, v126);
+    int16x8_t v519_tmp = vqrdmulhq_n_s16(v518, 18446);
+    int16x8_t v519 = vmlaq_n_s16(v519_tmp, v518, 2);
+    int16x8_t v520 = vsubq_s16(v132, v137);
+    int16x8_t v521 = vaddq_s16(v519, v520);
+    int16x8_t v522 = vqrdmulhq_n_s16(v521, 21195);
+    int16x8_t v523 = vaddq_s16(v517, v522);
+    int16x8_t v524 = vqrdmulhq_n_s16(v523, 17401);
+    int16x8_t v525 = vaddq_s16(v513, v524);
+    int16x8_t v526 = vsubq_s16(v172, v181);
+    int16x8_t v527_tmp = vqrdmulhq_n_s16(v526, 18446);
+    int16x8_t v527 = vmlaq_n_s16(v527_tmp, v526, 2);
+    int16x8_t v528 = vsubq_s16(v149, v159);
+    int16x8_t v529 = vaddq_s16(v527, v528);
+    int16x8_t v530 = vsubq_s16(v229, v234);
+    int16x8_t v531 = vsubq_s16(v208, v223);
+    int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 18446);
+    int16x8_t v532 = vmlaq_n_s16(v532_tmp, v531, 2);
+    int16x8_t v533 = vaddq_s16(v530, v532);
+    int16x8_t v534 = vqrdmulhq_n_s16(v533, 21195);
+    int16x8_t v535 = vaddq_s16(v529, v534);
+    int16x8_t v536 = vsubq_s16(v244, v253);
+    int16x8_t v537 = vsubq_s16(v266, v277);
+    int16x8_t v538_tmp = vqrdmulhq_n_s16(v537, 18446);
+    int16x8_t v538 = vmlaq_n_s16(v538_tmp, v537, 2);
+    int16x8_t v539 = vaddq_s16(v536, v538);
+    int16x8_t v540 = vqrdmulhq_n_s16(v539, 17401);
+    int16x8_t v541 = vqrdmulhq_n_s16(v287, 25826);
+    int16x8_t v542 = vqrdmulhq_n_s16(v298, 25826);
+    int16x8_t v543 = vsubq_s16(v541, v542);
+    int16x8_t v544 = vqrdmulhq_n_s16(v543, 14281);
+    int16x8_t v545_tmp = vqrdmulhq_n_s16(v309, 31509);
+    int16x8_t v545 = vaddq_s16(v545_tmp, v309);
+    int16x8_t v546 = vsubq_s16(v545, v322);
+    int16x8_t v547 = vqrdmulhq_n_s16(v546, 28847);
+    int16x8_t v548 = vaddq_s16(v544, v547);
+    int16x8_t v549 = vaddq_s16(v540, v548);
+    int16x8_t v550 = vaddq_s16(v535, v549);
+    int16x8_t v551 = vqrdmulhq_n_s16(v550, 16629);
+    int16x8_t v552 = vaddq_s16(v525, v551);
+    int16x8_t v553 = vsubq_s16(v504, v506);
+    int16x8_t v554 = vsubq_s16(v508, v510);
+    int16x8_t v555 = vqrdmulhq_n_s16(v554, 25826);
+    int16x8_t v556 = vaddq_s16(v553, v555);
+    int16x8_t v557 = vsubq_s16(v514, v516);
+    int16x8_t v558 = vsubq_s16(v520, v519);
+    int16x8_t v559 = vqrdmulhq_n_s16(v558, 25826);
+    int16x8_t v560 = vaddq_s16(v557, v559);
+    int16x8_t v561 = vqrdmulhq_n_s16(v560, 18124);
+    int16x8_t v562 = vaddq_s16(v556, v561);
+    int16x8_t v563 = vsubq_s16(v528, v527);
+    int16x8_t v564 = vsubq_s16(v530, v532);
+    int16x8_t v565 = vqrdmulhq_n_s16(v564, 25826);
+    int16x8_t v566 = vaddq_s16(v563, v565);
+    int16x8_t v567 = vsubq_s16(v536, v538);
+    int16x8_t v568 = vqrdmulhq_n_s16(v567, 18124);
+    int16x8_t v569_tmp = vqrdmulhq_n_s16(v546, 654);
+    int16x8_t v569 = vmlaq_n_s16(v569_tmp, v546, 2);
+    int16x8_t v570 = vsubq_s16(v543, v569);
+    int16x8_t v571 = vqrdmulhq_n_s16(v570, 18124);
+    int16x8_t v572 = vaddq_s16(v568, v571);
+    int16x8_t v573 = vaddq_s16(v566, v572);
+    int16x8_t v574 = vqrdmulhq_n_s16(v573, 16792);
+    int16x8_t v575 = vaddq_s16(v562, v574);
+    int16x8_t v576 = vsubq_s16(v458, v461);
+    int16x8_t v577_tmp = vqrdmulhq_n_s16(v465, 25030);
+    int16x8_t v577 = vaddq_s16(v577_tmp, v465);
+    int16x8_t v578 = vsubq_s16(v466, v577);
+    int16x8_t v579_tmp = vqrdmulhq_n_s16(v578, 1988);
+    int16x8_t v579 = vaddq_s16(v579_tmp, v578);
+    int16x8_t v580 = vaddq_s16(v576, v579);
+    int16x8_t v581 = vqrdmulhq_n_s16(v580, 19102);
+    int16x8_t v582 = vsubq_s16(v447, v449);
+    int16x8_t v583 = vsubq_s16(v451, v454);
+    int16x8_t v584_tmp = vqrdmulhq_n_s16(v583, 1988);
+    int16x8_t v584 = vaddq_s16(v584_tmp, v583);
+    int16x8_t v585 = vaddq_s16(v582, v584);
+    int16x8_t v586 = vaddq_s16(v581, v585);
+    int16x8_t v587 = vsubq_s16(v489, v492);
+    int16x8_t v588_tmp = vqrdmulhq_n_s16(v498, 25030);
+    int16x8_t v588 = vaddq_s16(v588_tmp, v498);
+    int16x8_t v589 = vsubq_s16(v494, v588);
+    int16x8_t v590_tmp = vqrdmulhq_n_s16(v589, 1988);
+    int16x8_t v590 = vaddq_s16(v590_tmp, v589);
+    int16x8_t v591 = vaddq_s16(v587, v590);
+    int16x8_t v592 = vsubq_s16(v472, v475);
+    int16x8_t v593 = vqrdmulhq_n_s16(v592, 19102);
+    int16x8_t v594 = vsubq_s16(v480, v484);
+    int16x8_t v595 = vaddq_s16(v593, v594);
+    int16x8_t v596 = vaddq_s16(v591, v595);
+    int16x8_t v597 = vqrdmulhq_n_s16(v596, 17000);
+    int16x8_t v598 = vaddq_s16(v586, v597);
+    int16x8_t v599 = vsubq_s16(v365, v371);
+    int16x8_t v600_tmp = vqrdmulhq_n_s16(v599, 23673);
+    int16x8_t v600 = vaddq_s16(v600_tmp, v599);
+    int16x8_t v601 = vsubq_s16(v377, v383);
+    int16x8_t v602 = vaddq_s16(v600, v601);
+    int16x8_t v603 = vsubq_s16(v348, v356);
+    int16x8_t v604_tmp = vqrdmulhq_n_s16(v603, 23673);
+    int16x8_t v604 = vaddq_s16(v604_tmp, v603);
+    int16x8_t v605 = vsubq_s16(v342, v337);
+    int16x8_t v606 = vaddq_s16(v604, v605);
+    int16x8_t v607 = vqrdmulhq_n_s16(v606, 20398);
+    int16x8_t v608 = vaddq_s16(v602, v607);
+    int16x8_t v609 = vsubq_s16(v391, v397);
+    int16x8_t v610 = vsubq_s16(v403, v411);
+    int16x8_t v611_tmp = vqrdmulhq_n_s16(v610, 23673);
+    int16x8_t v611 = vaddq_s16(v611_tmp, v610);
+    int16x8_t v612 = vaddq_s16(v609, v611);
+    int16x8_t v613 = vsubq_s16(v419, v426);
+    int16x8_t v614 = vqrdmulhq_n_s16(v613, 20398);
+    int16x8_t v615 = vsubq_s16(v439, v433);
+    int16x8_t v616_tmp = vqrdmulhq_n_s16(v615, 2367);
+    int16x8_t v616 = vaddq_s16(v616_tmp, v615);
+    int16x8_t v617 = vaddq_s16(v614, v616);
+    int16x8_t v618 = vaddq_s16(v612, v617);
+    int16x8_t v619 = vqrdmulhq_n_s16(v618, 17255);
+    int16x8_t v620 = vaddq_s16(v608, v619);
+    int16x8_t v621 = vsubq_s16(v160, v183);
+    int16x8_t v622 = vsubq_s16(v235, v225);
+    int16x8_t v623_tmp = vqrdmulhq_n_s16(v622, 3314);
+    int16x8_t v623 = vmlaq_n_s16(v623_tmp, v622, 5);
+    int16x8_t v624 = vaddq_s16(v621, v623);
+    int16x8_t v625 = vsubq_s16(v254, v279);
+    int16x8_t v626 = vsubq_s16(v299, v324);
+    int16x8_t v627_tmp = vqrdmulhq_n_s16(v626, 3314);
+    int16x8_t v627 = vmlaq_n_s16(v627_tmp, v626, 5);
+    int16x8_t v628 = vaddq_s16(v625, v627);
+    int16x8_t v629 = vqrdmulhq_n_s16(v628, 22112);
+    int16x8_t v630 = vaddq_s16(v624, v629);
+    int16x8_t v631 = vqrdmulhq_n_s16(v630, 17561);
+    int16x8_t v632 = vsubq_s16(v9, v24);
+    int16x8_t v633 = vsubq_s16(v42, v58);
+    int16x8_t v634_tmp = vqrdmulhq_n_s16(v633, 3314);
+    int16x8_t v634 = vmlaq_n_s16(v634_tmp, v633, 5);
+    int16x8_t v635 = vaddq_s16(v632, v634);
+    int16x8_t v636 = vsubq_s16(v78, v101);
+    int16x8_t v637 = vsubq_s16(v138, v128);
+    int16x8_t v638_tmp = vqrdmulhq_n_s16(v637, 3314);
+    int16x8_t v638 = vmlaq_n_s16(v638_tmp, v637, 5);
+    int16x8_t v639 = vaddq_s16(v636, v638);
+    int16x8_t v640 = vqrdmulhq_n_s16(v639, 22112);
+    int16x8_t v641 = vaddq_s16(v635, v640);
+    int16x8_t v642 = vaddq_s16(v631, v641);
+    int16x8_t v643 = vsubq_s16(v632, v634);
+    int16x8_t v644 = vsubq_s16(v636, v638);
+    int16x8_t v645 = vqrdmulhq_n_s16(v644, 24397);
+    int16x8_t v646 = vaddq_s16(v643, v645);
+    int16x8_t v647 = vsubq_s16(v621, v623);
+    int16x8_t v648 = vsubq_s16(v625, v627);
+    int16x8_t v649 = vqrdmulhq_n_s16(v648, 24397);
+    int16x8_t v650 = vaddq_s16(v647, v649);
+    int16x8_t v651 = vqrdmulhq_n_s16(v650, 17921);
+    int16x8_t v652 = vaddq_s16(v646, v651);
+    int16x8_t v653 = vsubq_s16(v601, v600);
+    int16x8_t v654 = vsubq_s16(v605, v604);
+    int16x8_t v655 = vqrdmulhq_n_s16(v654, 27504);
+    int16x8_t v656 = vaddq_s16(v653, v655);
+    int16x8_t v657 = vsubq_s16(v609, v611);
+    int16x8_t v658 = vqrdmulhq_n_s16(v613, 27504);
+    int16x8_t v659_tmp = vqrdmulhq_n_s16(v615, 14606);
+    int16x8_t v659 = vaddq_s16(v659_tmp, v615);
+    int16x8_t v660 = vsubq_s16(v658, v659);
+    int16x8_t v661 = vaddq_s16(v657, v660);
+    int16x8_t v662 = vqrdmulhq_n_s16(v661, 18343);
+    int16x8_t v663 = vaddq_s16(v656, v662);
+    int16x8_t v664 = vsubq_s16(v582, v584);
+    int16x8_t v665 = vsubq_s16(v576, v579);
+    int16x8_t v666 = vqrdmulhq_n_s16(v665, 31869);
+    int16x8_t v667 = vaddq_s16(v664, v666);
+    int16x8_t v668 = vsubq_s16(v587, v590);
+    int16x8_t v669_tmp = vqrdmulhq_n_s16(v594, 23444);
+    int16x8_t v669 = vaddq_s16(v669_tmp, v594);
+    int16x8_t v670 = vsubq_s16(v592, v669);
+    int16x8_t v671 = vqrdmulhq_n_s16(v670, 31869);
+    int16x8_t v672 = vaddq_s16(v668, v671);
+    int16x8_t v673 = vqrdmulhq_n_s16(v672, 18830);
+    int16x8_t v674 = vaddq_s16(v667, v673);
+    int16x8_t v675 = vsubq_s16(v553, v555);
+    int16x8_t v676 = vsubq_s16(v557, v559);
+    int16x8_t v677_tmp = vqrdmulhq_n_s16(v676, 5552);
+    int16x8_t v677 = vaddq_s16(v677_tmp, v676);
+    int16x8_t v678 = vaddq_s16(v675, v677);
+    int16x8_t v679 = vsubq_s16(v563, v565);
+    int16x8_t v680 = vsubq_s16(v567, v570);
+    int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 5552);
+    int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+    int16x8_t v682 = vaddq_s16(v679, v681);
+    int16x8_t v683 = vqrdmulhq_n_s16(v682, 19393);
+    int16x8_t v684 = vaddq_s16(v678, v683);
+    int16x8_t v685 = vsubq_s16(v507, v512);
+    int16x8_t v686 = vsubq_s16(v517, v522);
+    int16x8_t v687_tmp = vqrdmulhq_n_s16(v686, 15865);
+    int16x8_t v687 = vaddq_s16(v687_tmp, v686);
+    int16x8_t v688 = vaddq_s16(v685, v687);
+    int16x8_t v689 = vsubq_s16(v529, v534);
+    int16x8_t v690_tmp = vqrdmulhq_n_s16(v548, 28937);
+    int16x8_t v690 = vaddq_s16(v690_tmp, v548);
+    int16x8_t v691 = vsubq_s16(v539, v690);
+    int16x8_t v692_tmp = vqrdmulhq_n_s16(v691, 15865);
+    int16x8_t v692 = vaddq_s16(v692_tmp, v691);
+    int16x8_t v693 = vaddq_s16(v689, v692);
+    int16x8_t v694 = vqrdmulhq_n_s16(v693, 20040);
+    int16x8_t v695 = vaddq_s16(v688, v694);
+    int16x8_t v696 = vsubq_s16(v476, v486);
+    int16x8_t v697_tmp = vqrdmulhq_n_s16(v696, 1893);
+    int16x8_t v697 = vmlaq_n_s16(v697_tmp, v696, 2);
+    int16x8_t v698 = vsubq_s16(v493, v499);
+    int16x8_t v699 = vaddq_s16(v697, v698);
+    int16x8_t v700 = vqrdmulhq_n_s16(v699, 20783);
+    int16x8_t v701 = vsubq_s16(v450, v456);
+    int16x8_t v702 = vsubq_s16(v462, v468);
+    int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 1893);
+    int16x8_t v703 = vmlaq_n_s16(v703_tmp, v702, 2);
+    int16x8_t v704 = vaddq_s16(v701, v703);
+    int16x8_t v705 = vaddq_s16(v700, v704);
+    int16x8_t v706 = vsubq_s16(v384, v373);
+    int16x8_t v707 = vsubq_s16(v343, v358);
+    int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 13357);
+    int16x8_t v708 = vmlaq_n_s16(v708_tmp, v707, 3);
+    int16x8_t v709 = vaddq_s16(v706, v708);
+    int16x8_t v710 = vsubq_s16(v398, v413);
+    int16x8_t v711 = vsubq_s16(v427, v441);
+    int16x8_t v712_tmp = vqrdmulhq_n_s16(v711, 13357);
+    int16x8_t v712 = vmlaq_n_s16(v712_tmp, v711, 3);
+    int16x8_t v713 = vaddq_s16(v710, v712);
+    int16x8_t v714 = vqrdmulhq_n_s16(v713, 21637);
+    int16x8_t v715 = vaddq_s16(v709, v714);
+    int16x8_t v716 = vsubq_s16(v25, v60);
+    int16x8_t v717 = vsubq_s16(v102, v140);
+    int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 6226);
+    int16x8_t v718 = vmlaq_n_s16(v718_tmp, v717, 10);
+    int16x8_t v719 = vaddq_s16(v716, v718);
+    int16x8_t v720 = vsubq_s16(v280, v326);
+    int16x8_t v721_tmp = vqrdmulhq_n_s16(v720, 6226);
+    int16x8_t v721 = vmlaq_n_s16(v721_tmp, v720, 10);
+    int16x8_t v722 = vsubq_s16(v184, v237);
+    int16x8_t v723 = vaddq_s16(v721, v722);
+    int16x8_t v724 = vqrdmulhq_n_s16(v723, 22622);
+    int16x8_t v725 = vaddq_s16(v719, v724);
+    int16x8_t v726 = vsubq_s16(v716, v718);
+    int16x8_t v727 = vsubq_s16(v722, v721);
+    int16x8_t v728 = vqrdmulhq_n_s16(v727, 23761);
+    int16x8_t v729 = vaddq_s16(v726, v728);
+    int16x8_t v730 = vsubq_s16(v706, v708);
+    int16x8_t v731 = vsubq_s16(v710, v712);
+    int16x8_t v732 = vqrdmulhq_n_s16(v731, 25084);
+    int16x8_t v733 = vaddq_s16(v730, v732);
+    int16x8_t v734 = vsubq_s16(v701, v703);
+    int16x8_t v735 = vsubq_s16(v698, v697);
+    int16x8_t v736 = vqrdmulhq_n_s16(v735, 26631);
+    int16x8_t v737 = vaddq_s16(v734, v736);
+    int16x8_t v738 = vsubq_s16(v685, v687);
+    int16x8_t v739 = vsubq_s16(v689, v692);
+    int16x8_t v740 = vqrdmulhq_n_s16(v739, 28454);
+    int16x8_t v741 = vaddq_s16(v738, v740);
+    int16x8_t v742 = vsubq_s16(v675, v677);
+    int16x8_t v743 = vsubq_s16(v679, v681);
+    int16x8_t v744 = vqrdmulhq_n_s16(v743, 30624);
+    int16x8_t v745 = vaddq_s16(v742, v744);
+    int16x8_t v746 = vsubq_s16(v664, v666);
+    int16x8_t v747 = vsubq_s16(v668, v671);
+    int16x8_t v748_tmp = vqrdmulhq_n_s16(v747, 472);
+    int16x8_t v748 = vaddq_s16(v748_tmp, v747);
+    int16x8_t v749 = vaddq_s16(v746, v748);
+    int16x8_t v750 = vsubq_s16(v653, v655);
+    int16x8_t v751 = vsubq_s16(v657, v660);
+    int16x8_t v752_tmp = vqrdmulhq_n_s16(v751, 3672);
+    int16x8_t v752 = vaddq_s16(v752_tmp, v751);
+    int16x8_t v753 = vaddq_s16(v750, v752);
+    int16x8_t v754 = vsubq_s16(v643, v645);
+    int16x8_t v755 = vsubq_s16(v647, v649);
+    int16x8_t v756_tmp = vqrdmulhq_n_s16(v755, 7662);
+    int16x8_t v756 = vaddq_s16(v756_tmp, v755);
+    int16x8_t v757 = vaddq_s16(v754, v756);
+    int16x8_t v758 = vsubq_s16(v635, v640);
+    int16x8_t v759 = vsubq_s16(v624, v629);
+    int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 12756);
+    int16x8_t v760 = vaddq_s16(v760_tmp, v759);
+    int16x8_t v761 = vaddq_s16(v758, v760);
+    int16x8_t v762 = vsubq_s16(v602, v607);
+    int16x8_t v763 = vsubq_s16(v612, v617);
+    int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 19463);
+    int16x8_t v764 = vaddq_s16(v764_tmp, v763);
+    int16x8_t v765 = vaddq_s16(v762, v764);
+    int16x8_t v766 = vsubq_s16(v585, v581);
+    int16x8_t v767 = vsubq_s16(v591, v595);
+    int16x8_t v768_tmp = vqrdmulhq_n_s16(v767, 28661);
+    int16x8_t v768 = vaddq_s16(v768_tmp, v767);
+    int16x8_t v769 = vaddq_s16(v766, v768);
+    int16x8_t v770 = vsubq_s16(v556, v561);
+    int16x8_t v771 = vsubq_s16(v566, v572);
+    int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 9242);
+    int16x8_t v772 = vmlaq_n_s16(v772_tmp, v771, 2);
+    int16x8_t v773 = vaddq_s16(v770, v772);
+    int16x8_t v774 = vsubq_s16(v513, v524);
+    int16x8_t v775 = vsubq_s16(v535, v549);
+    int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 30298);
+    int16x8_t v776 = vmlaq_n_s16(v776_tmp, v775, 2);
+    int16x8_t v777 = vaddq_s16(v774, v776);
+    int16x8_t v778 = vsubq_s16(v457, v470);
+    int16x8_t v779 = vsubq_s16(v500, v488);
+    int16x8_t v780_tmp = vqrdmulhq_n_s16(v779, 2773);
+    int16x8_t v780 = vmlaq_n_s16(v780_tmp, v779, 4);
+    int16x8_t v781 = vaddq_s16(v778, v780);
+    int16x8_t v782 = vsubq_s16(v385, v360);
+    int16x8_t v783 = vsubq_s16(v414, v443);
+    int16x8_t v784_tmp = vqrdmulhq_n_s16(v783, 26108);
+    int16x8_t v784 = vmlaq_n_s16(v784_tmp, v783, 6);
+    int16x8_t v785 = vaddq_s16(v782, v784);
+    int16x8_t v786 = vsubq_s16(v61, v142);
+    int16x8_t v787 = vsubq_s16(v238, v328);
+    int16x8_t v788_tmp = vqrdmulhq_n_s16(v787, 12251);
+    int16x8_t v788 = vmlaq_n_s16(v788_tmp, v787, 20);
+    int16x8_t v789 = vaddq_s16(v786, v788);
+    int16x8_t v790 = vsubq_s16(v786, v788);
+    int16x8_t v791 = vsubq_s16(v782, v784);
+    int16x8_t v792 = vsubq_s16(v778, v780);
+    int16x8_t v793 = vsubq_s16(v774, v776);
+    int16x8_t v794 = vsubq_s16(v770, v772);
+    int16x8_t v795 = vsubq_s16(v766, v768);
+    int16x8_t v796 = vsubq_s16(v762, v764);
+    int16x8_t v797 = vsubq_s16(v758, v760);
+    int16x8_t v798 = vsubq_s16(v754, v756);
+    int16x8_t v799 = vsubq_s16(v750, v752);
+    int16x8_t v800 = vsubq_s16(v746, v748);
+    int16x8_t v801 = vsubq_s16(v742, v744);
+    int16x8_t v802 = vsubq_s16(v738, v740);
+    int16x8_t v803 = vsubq_s16(v734, v736);
+    int16x8_t v804 = vsubq_s16(v730, v732);
+    int16x8_t v805 = vsubq_s16(v726, v728);
+    int16x8_t v806 = vsubq_s16(v719, v724);
+    int16x8_t v807 = vsubq_s16(v709, v714);
+    int16x8_t v808 = vsubq_s16(v704, v700);
+    int16x8_t v809 = vsubq_s16(v688, v694);
+    int16x8_t v810 = vsubq_s16(v678, v683);
+    int16x8_t v811 = vsubq_s16(v667, v673);
+    int16x8_t v812 = vsubq_s16(v656, v662);
+    int16x8_t v813 = vsubq_s16(v646, v651);
+    int16x8_t v814 = vsubq_s16(v641, v631);
+    int16x8_t v815 = vsubq_s16(v608, v619);
+    int16x8_t v816 = vsubq_s16(v586, v597);
+    int16x8_t v817 = vsubq_s16(v562, v574);
+    int16x8_t v818 = vsubq_s16(v525, v551);
+    int16x8_t v819 = vsubq_s16(v471, v502);
+    int16x8_t v820 = vsubq_s16(v386, v445);
+    int16x8_t v821 = vsubq_s16(v143, v330);
+    vst1q_s16(out + out_stride * 0 + i, v331);
+    vst1q_s16(out + out_stride * 1 + i, v446);
+    vst1q_s16(out + out_stride * 2 + i, v503);
+    vst1q_s16(out + out_stride * 3 + i, v552);
+    vst1q_s16(out + out_stride * 4 + i, v575);
+    vst1q_s16(out + out_stride * 5 + i, v598);
+    vst1q_s16(out + out_stride * 6 + i, v620);
+    vst1q_s16(out + out_stride * 7 + i, v642);
+    vst1q_s16(out + out_stride * 8 + i, v652);
+    vst1q_s16(out + out_stride * 9 + i, v663);
+    vst1q_s16(out + out_stride * 10 + i, v674);
+    vst1q_s16(out + out_stride * 11 + i, v684);
+    vst1q_s16(out + out_stride * 12 + i, v695);
+    vst1q_s16(out + out_stride * 13 + i, v705);
+    vst1q_s16(out + out_stride * 14 + i, v715);
+    vst1q_s16(out + out_stride * 15 + i, v725);
+    vst1q_s16(out + out_stride * 16 + i, v729);
+    vst1q_s16(out + out_stride * 17 + i, v733);
+    vst1q_s16(out + out_stride * 18 + i, v737);
+    vst1q_s16(out + out_stride * 19 + i, v741);
+    vst1q_s16(out + out_stride * 20 + i, v745);
+    vst1q_s16(out + out_stride * 21 + i, v749);
+    vst1q_s16(out + out_stride * 22 + i, v753);
+    vst1q_s16(out + out_stride * 23 + i, v757);
+    vst1q_s16(out + out_stride * 24 + i, v761);
+    vst1q_s16(out + out_stride * 25 + i, v765);
+    vst1q_s16(out + out_stride * 26 + i, v769);
+    vst1q_s16(out + out_stride * 27 + i, v773);
+    vst1q_s16(out + out_stride * 28 + i, v777);
+    vst1q_s16(out + out_stride * 29 + i, v781);
+    vst1q_s16(out + out_stride * 30 + i, v785);
+    vst1q_s16(out + out_stride * 31 + i, v789);
+    vst1q_s16(out + out_stride * 32 + i, v790);
+    vst1q_s16(out + out_stride * 33 + i, v791);
+    vst1q_s16(out + out_stride * 34 + i, v792);
+    vst1q_s16(out + out_stride * 35 + i, v793);
+    vst1q_s16(out + out_stride * 36 + i, v794);
+    vst1q_s16(out + out_stride * 37 + i, v795);
+    vst1q_s16(out + out_stride * 38 + i, v796);
+    vst1q_s16(out + out_stride * 39 + i, v797);
+    vst1q_s16(out + out_stride * 40 + i, v798);
+    vst1q_s16(out + out_stride * 41 + i, v799);
+    vst1q_s16(out + out_stride * 42 + i, v800);
+    vst1q_s16(out + out_stride * 43 + i, v801);
+    vst1q_s16(out + out_stride * 44 + i, v802);
+    vst1q_s16(out + out_stride * 45 + i, v803);
+    vst1q_s16(out + out_stride * 46 + i, v804);
+    vst1q_s16(out + out_stride * 47 + i, v805);
+    vst1q_s16(out + out_stride * 48 + i, v806);
+    vst1q_s16(out + out_stride * 49 + i, v807);
+    vst1q_s16(out + out_stride * 50 + i, v808);
+    vst1q_s16(out + out_stride * 51 + i, v809);
+    vst1q_s16(out + out_stride * 52 + i, v810);
+    vst1q_s16(out + out_stride * 53 + i, v811);
+    vst1q_s16(out + out_stride * 54 + i, v812);
+    vst1q_s16(out + out_stride * 55 + i, v813);
+    vst1q_s16(out + out_stride * 56 + i, v814);
+    vst1q_s16(out + out_stride * 57 + i, v815);
+    vst1q_s16(out + out_stride * 58 + i, v816);
+    vst1q_s16(out + out_stride * 59 + i, v817);
+    vst1q_s16(out + out_stride * 60 + i, v818);
+    vst1q_s16(out + out_stride * 61 + i, v819);
+    vst1q_s16(out + out_stride * 62 + i, v820);
+    vst1q_s16(out + out_stride * 63 + i, v821);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h
new file mode 100644
index 0000000000..946ace4a0c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h
@@ -0,0 +1,80 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<8>) { return 1; }
+
+void FastIDCT(FastDCTTag<8>, const int16_t* in, size_t in_stride, int16_t* out,
+              size_t out_stride, size_t count) {
+  JXL_ASSERT(count % 8 == 0);
+  for (size_t i = 0; i < count; i += 8) {
+    int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+    int16x8_t v1 = vld1q_s16(in + in_stride * 4 + i);
+    int16x8_t v2 = vaddq_s16(v0, v1);
+    int16x8_t v3 = vld1q_s16(in + in_stride * 2 + i);
+    int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+    int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+    int16x8_t v5 = vld1q_s16(in + in_stride * 6 + i);
+    int16x8_t v6 = vaddq_s16(v5, v3);
+    int16x8_t v7 = vaddq_s16(v4, v6);
+    int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+    int16x8_t v9 = vaddq_s16(v2, v8);
+    int16x8_t v10 = vld1q_s16(in + in_stride * 1 + i);
+    int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+    int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+    int16x8_t v12 = vld1q_s16(in + in_stride * 5 + i);
+    int16x8_t v13 = vld1q_s16(in + in_stride * 3 + i);
+    int16x8_t v14 = vaddq_s16(v12, v13);
+    int16x8_t v15 = vaddq_s16(v11, v14);
+    int16x8_t v16 = vaddq_s16(v13, v10);
+    int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080);
+    int16x8_t v18 = vld1q_s16(in + in_stride * 7 + i);
+    int16x8_t v19 = vaddq_s16(v18, v12);
+    int16x8_t v20 = vaddq_s16(v16, v19);
+    int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734);
+    int16x8_t v22 = vaddq_s16(v17, v21);
+    int16x8_t v23 = vaddq_s16(v15, v22);
+    int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+    int16x8_t v25 = vaddq_s16(v9, v24);
+    int16x8_t v26 = vsubq_s16(v0, v1);
+    int16x8_t v27 = vsubq_s16(v4, v6);
+    int16x8_t v28_tmp = vqrdmulhq_n_s16(v27, 10045);
+    int16x8_t v28 = vaddq_s16(v28_tmp, v27);
+    int16x8_t v29 = vaddq_s16(v26, v28);
+    int16x8_t v30 = vsubq_s16(v11, v14);
+    int16x8_t v31 = vqrdmulhq_n_s16(v16, 17734);
+    int16x8_t v32_tmp = vqrdmulhq_n_s16(v19, 10045);
+    int16x8_t v32 = vaddq_s16(v32_tmp, v19);
+    int16x8_t v33 = vsubq_s16(v31, v32);
+    int16x8_t v34 = vaddq_s16(v30, v33);
+    int16x8_t v35 = vqrdmulhq_n_s16(v34, 19705);
+    int16x8_t v36 = vaddq_s16(v29, v35);
+    int16x8_t v37 = vsubq_s16(v26, v28);
+    int16x8_t v38 = vsubq_s16(v30, v33);
+    int16x8_t v39 = vqrdmulhq_n_s16(v38, 29490);
+    int16x8_t v40 = vaddq_s16(v37, v39);
+    int16x8_t v41 = vsubq_s16(v2, v8);
+    int16x8_t v42 = vsubq_s16(v15, v22);
+    int16x8_t v43_tmp = vqrdmulhq_n_s16(v42, 18446);
+    int16x8_t v43 = vmlaq_n_s16(v43_tmp, v42, 2);
+    int16x8_t v44 = vaddq_s16(v41, v43);
+    int16x8_t v45 = vsubq_s16(v41, v43);
+    int16x8_t v46 = vsubq_s16(v37, v39);
+    int16x8_t v47 = vsubq_s16(v29, v35);
+    int16x8_t v48 = vsubq_s16(v9, v24);
+    vst1q_s16(out + out_stride * 0 + i, v25);
+    vst1q_s16(out + out_stride * 1 + i, v36);
+    vst1q_s16(out + out_stride * 2 + i, v40);
+    vst1q_s16(out + out_stride * 3 + i, v44);
+    vst1q_s16(out + out_stride * 4 + i, v45);
+    vst1q_s16(out + out_stride * 5 + i, v46);
+    vst1q_s16(out + out_stride * 6 + i, v47);
+    vst1q_s16(out + out_stride * 7 + i, v48);
+  }
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc b/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc
new file mode 100644
index 0000000000..5bb1a79cc5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc
@@ -0,0 +1,378 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <numeric>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/fast_dct-inl.h"
+#include "lib/jxl/fast_dct.h"
+#include "lib/jxl/transpose-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFastTranspose() {
+#if HWY_TARGET == HWY_NEON
+  auto array_mem = hwy::AllocateAligned<int16_t>(N * M);
+  int16_t* array = array_mem.get();
+  auto transposed_mem = hwy::AllocateAligned<int16_t>(N * M);
+  int16_t* transposed = transposed_mem.get();
+  std::iota(array, array + N * M, 0);
+  for (size_t j = 0; j < 100000000 / (N * M); j++) {
+    FastTransposeBlock(array, M, N, M, transposed, N);
+  }
+  for (size_t i = 0; i < M; i++) {
+    for (size_t j = 0; j < N; j++) {
+      EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
+    }
+  }
+#endif
+}
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFloatTranspose() {
+  auto array_mem = hwy::AllocateAligned<float>(N * M);
+  float* array = array_mem.get();
+  auto transposed_mem = hwy::AllocateAligned<float>(N * M);
+  float* transposed = transposed_mem.get();
+  std::iota(array, array + N * M, 0);
+  for (size_t j = 0; j < 100000000 / (N * M); j++) {
+    Transpose<N, M>::Run(DCTFrom(array, M), DCTTo(transposed, N));
+  }
+  for (size_t i = 0; i < M; i++) {
+    for (size_t j = 0; j < N; j++) {
+      EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
+    }
+  }
+}
+
+// TODO(sboukortt): re-enable the FloatIDCT tests once we find out why they fail
+// in ASAN mode in the CI runners and seemingly not locally.
+
+HWY_NOINLINE void TestFastTranspose8x8() { TestFastTranspose<8, 8>(); }
+HWY_NOINLINE void TestFloatTranspose8x8() { TestFloatTranspose<8, 8>(); }
+HWY_NOINLINE void TestFastIDCT8x8() { TestFastIDCT<8, 8>(); }
+HWY_NOINLINE void TestFloatIDCT8x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<8, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose8x16() { TestFastTranspose<8, 16>(); }
+HWY_NOINLINE void TestFloatTranspose8x16() { TestFloatTranspose<8, 16>(); }
+HWY_NOINLINE void TestFastIDCT8x16() { TestFastIDCT<8, 16>(); }
+HWY_NOINLINE void TestFloatIDCT8x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<8, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose8x32() { TestFastTranspose<8, 32>(); }
+HWY_NOINLINE void TestFloatTranspose8x32() { TestFloatTranspose<8, 32>(); }
+HWY_NOINLINE void TestFastIDCT8x32() { TestFastIDCT<8, 32>(); }
+HWY_NOINLINE void TestFloatIDCT8x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<8, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x8() { TestFastTranspose<16, 8>(); }
+HWY_NOINLINE void TestFloatTranspose16x8() { TestFloatTranspose<16, 8>(); }
+HWY_NOINLINE void TestFastIDCT16x8() { TestFastIDCT<16, 8>(); }
+HWY_NOINLINE void TestFloatIDCT16x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<16, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x16() { TestFastTranspose<16, 16>(); }
+HWY_NOINLINE void TestFloatTranspose16x16() { TestFloatTranspose<16, 16>(); }
+HWY_NOINLINE void TestFastIDCT16x16() { TestFastIDCT<16, 16>(); }
+HWY_NOINLINE void TestFloatIDCT16x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<16, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x32() { TestFastTranspose<16, 32>(); }
+HWY_NOINLINE void TestFloatTranspose16x32() { TestFloatTranspose<16, 32>(); }
+HWY_NOINLINE void TestFastIDCT16x32() { TestFastIDCT<16, 32>(); }
+HWY_NOINLINE void TestFloatIDCT16x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<16, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x8() { TestFastTranspose<32, 8>(); }
+HWY_NOINLINE void TestFloatTranspose32x8() { TestFloatTranspose<32, 8>(); }
+HWY_NOINLINE void TestFastIDCT32x8() { TestFastIDCT<32, 8>(); }
+HWY_NOINLINE void TestFloatIDCT32x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<32, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x16() { TestFastTranspose<32, 16>(); }
+HWY_NOINLINE void TestFloatTranspose32x16() { TestFloatTranspose<32, 16>(); }
+HWY_NOINLINE void TestFastIDCT32x16() { TestFastIDCT<32, 16>(); }
+HWY_NOINLINE void TestFloatIDCT32x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<32, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x32() { TestFastTranspose<32, 32>(); }
+HWY_NOINLINE void TestFloatTranspose32x32() { TestFloatTranspose<32, 32>(); }
+HWY_NOINLINE void TestFastIDCT32x32() { TestFastIDCT<32, 32>(); }
+HWY_NOINLINE void TestFloatIDCT32x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<32, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x64() { TestFastTranspose<32, 64>(); }
+HWY_NOINLINE void TestFloatTranspose32x64() { TestFloatTranspose<32, 64>(); }
+HWY_NOINLINE void TestFastIDCT32x64() { TestFastIDCT<32, 64>(); }
+HWY_NOINLINE void TestFloatIDCT32x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<32, 64>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x32() { TestFastTranspose<64, 32>(); }
+HWY_NOINLINE void TestFloatTranspose64x32() { TestFloatTranspose<64, 32>(); }
+HWY_NOINLINE void TestFastIDCT64x32() { TestFastIDCT<64, 32>(); }
+HWY_NOINLINE void TestFloatIDCT64x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<64, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x64() { TestFastTranspose<64, 64>(); }
+HWY_NOINLINE void TestFloatTranspose64x64() { TestFloatTranspose<64, 64>(); }
+HWY_NOINLINE void TestFastIDCT64x64() { TestFastIDCT<64, 64>(); }
+HWY_NOINLINE void TestFloatIDCT64x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<64, 64>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x128() { TestFastTranspose<64, 128>(); }
+HWY_NOINLINE void TestFloatTranspose64x128() { TestFloatTranspose<64, 128>(); }
+/*
+HWY_NOINLINE void TestFastIDCT64x128() { TestFastIDCT<64, 128>(); }
+HWY_NOINLINE void TestFloatIDCT64x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<64, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x64() { TestFastTranspose<128, 64>(); }
+HWY_NOINLINE void TestFloatTranspose128x64() { TestFloatTranspose<128, 64>(); }
+/*
+HWY_NOINLINE void TestFastIDCT128x64() { TestFastIDCT<128, 64>(); }
+HWY_NOINLINE void TestFloatIDCT128x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<128, 64>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x128() { TestFastTranspose<128, 128>(); }
+HWY_NOINLINE void TestFloatTranspose128x128() {
+  TestFloatTranspose<128, 128>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT128x128() { TestFastIDCT<128, 128>(); }
+HWY_NOINLINE void TestFloatIDCT128x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<128, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x256() { TestFastTranspose<128, 256>(); }
+HWY_NOINLINE void TestFloatTranspose128x256() {
+  TestFloatTranspose<128, 256>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT128x256() { TestFastIDCT<128, 256>(); }
+HWY_NOINLINE void TestFloatIDCT128x256() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<128, 256>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose256x128() { TestFastTranspose<256, 128>(); }
+HWY_NOINLINE void TestFloatTranspose256x128() {
+  TestFloatTranspose<256, 128>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT256x128() { TestFastIDCT<256, 128>(); }
+HWY_NOINLINE void TestFloatIDCT256x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<256, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose256x256() { TestFastTranspose<256, 256>(); }
+HWY_NOINLINE void TestFloatTranspose256x256() {
+  TestFloatTranspose<256, 256>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT256x256() { TestFastIDCT<256, 256>(); }
+HWY_NOINLINE void TestFloatIDCT256x256() {
+#if HWY_TARGET == HWY_SCALAR && \
+    (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+  GTEST_SKIP();
+#else
+  TestFloatIDCT<256, 256>();
+#endif
+}
+*/
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class FastDCTTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastDCTTargetTest);
+
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x64);
+/*
+ * DCT-128 and above have very large errors just by rounding inputs.
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x256);
+*/
+
+TEST(FastDCTTest, TestWrapperFloat) { BenchmarkFloatIDCT32x32(); }
+TEST(FastDCTTest, TestWrapperFast) { BenchmarkFastIDCT32x32(); }
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h
new file mode 100644
index 0000000000..5c48034290
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h
@@ -0,0 +1,236 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD math ops (log2, encoder only, cos, erf for splines)
+
+#if defined(LIB_JXL_FAST_MATH_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_FAST_MATH_INL_H_
+#undef LIB_JXL_FAST_MATH_INL_H_
+#else
+#define LIB_JXL_FAST_MATH_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::Floor;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Le;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Xor;
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+  // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+  HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+                                          HWY_REP4(1.4287160470083755E+00f),
+                                          HWY_REP4(7.4245873327820566E-01f)};
+  HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+                                          HWY_REP4(1.0096718572241148E+00f),
+                                          HWY_REP4(1.7409343003366853E-01f)};
+
+  const Rebind<int32_t, DF> di;
+  const auto x_bits = BitCast(di, x);
+
+  // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+  const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab));  // = 2/3
+  // Shifted exponent = log2; also used to clear mantissa.
+  const auto exp_shifted = ShiftRight<23>(exp_bits);
+  const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+  const auto exp_val = ConvertTo(df, exp_shifted);
+  return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q),
+             exp_val);
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+  const Rebind<int32_t, DF> di;
+  auto floorx = Floor(x);
+  auto exp =
+      BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127))));
+  auto frac = Sub(x, floorx);
+  auto num = Add(frac, Set(df, 1.01749063e+01));
+  num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+  num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+  num = Mul(num, exp);
+  auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+  den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+  den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+  return Div(num, den);
+}
+
+// max relative error ~3e-5
+template <class DF, class V>
+V FastPowf(const DF df, V base, V exponent) {
+  return FastPow2f(df, Mul(FastLog2f(df, base), exponent));
+}
+
+// Computes cosine like std::cos.
+// L1 error 7e-5.
+template <class DF, class V>
+V FastCosf(const DF df, V x) {
+  // Step 1: range reduction to [0, 2pi)
+  const auto pi2 = Set(df, kPi * 2.0f);
+  const auto pi2_inv = Set(df, 0.5f / kPi);
+  const auto npi2 = Mul(Floor(Mul(x, pi2_inv)), pi2);
+  const auto xmodpi2 = Sub(x, npi2);
+  // Step 2: range reduction to [0, pi]
+  const auto x_pi = Min(xmodpi2, Sub(pi2, xmodpi2));
+  // Step 3: range reduction to [0, pi/2]
+  const auto above_pihalf = Ge(x_pi, Set(df, kPi / 2.0f));
+  const auto x_pihalf = IfThenElse(above_pihalf, Sub(Set(df, kPi), x_pi), x_pi);
+  // Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle
+  // duplication steps faster, on x/4.
+  const auto xs = Mul(x_pihalf, Set(df, 0.25f));
+  const auto x2 = Mul(xs, xs);
+  const auto x4 = Mul(x2, x2);
+  const auto cosx_prescaling =
+      MulAdd(x4, Set(df, 0.06960438),
+             MulAdd(x2, Set(df, -0.84087373), Set(df, 1.68179268)));
+  // Step 5: angle duplication.
+  const auto cosx_scale1 =
+      MulAdd(cosx_prescaling, cosx_prescaling, Set(df, -1.414213562));
+  const auto cosx_scale2 = MulAdd(cosx_scale1, cosx_scale1, Set(df, -1));
+  // Step 6: change sign if needed.
+  const Rebind<uint32_t, DF> du;
+  auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, above_pihalf)));
+  return BitCast(df, Xor(signbit, BitCast(du, cosx_scale2)));
+}
+
+// Computes the error function like std::erf.
+// L1 error 7e-4.
+template <class DF, class V>
+V FastErff(const DF df, V x) {
+  // Formula from
+  // https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
+  // but constants have been recomputed.
+  const auto xle0 = Le(x, Zero(df));
+  const auto absx = Abs(x);
+  // Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4
+  const auto denom1 =
+      MulAdd(absx, Set(df, 7.77394369e-02), Set(df, 2.05260015e-04));
+  const auto denom2 = MulAdd(denom1, absx, Set(df, 2.32120216e-01));
+  const auto denom3 = MulAdd(denom2, absx, Set(df, 2.77820801e-01));
+  const auto denom4 = MulAdd(denom3, absx, Set(df, 1.0f));
+  const auto denom5 = Mul(denom4, denom4);
+  const auto inv_denom5 = Div(Set(df, 1.0f), denom5);
+  const auto result = NegMulAdd(inv_denom5, inv_denom5, Set(df, 1.0f));
+  // Change sign if needed.
+  const Rebind<uint32_t, DF> du;
+  auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, xle0)));
+  return BitCast(df, Xor(signbit, BitCast(du, result)));
+}
+
+inline float FastLog2f(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastLog2f(D, Set(D, f)));
+}
+
+inline float FastPow2f(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+inline float FastPowf(float b, float e) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastPowf(D, Set(D, b), Set(D, e)));
+}
+
+inline float FastCosf(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastCosf(D, Set(D, f)));
+}
+
+inline float FastErff(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastErff(D, Set(D, f)));
+}
+
+// Returns cbrt(x) + add with 6 ulp max error.
+// Modified from vectormath_exp.h, Apache 2 license.
+// https://www.agner.org/optimize/vectorclass.zip
+template <class V>
+V CubeRootAndAdd(const V x, const V add) {
+  const HWY_FULL(float) df;
+  const HWY_FULL(int32_t) di;
+
+  const auto kExpBias = Set(di, 0x54800000);  // cast(1.) + cast(1.) / 3
+  const auto kExpMul = Set(di, 0x002AAAAA);   // shifted 1/3
+  const auto k1_3 = Set(df, 1.0f / 3);
+  const auto k4_3 = Set(df, 4.0f / 3);
+
+  const auto xa = x;  // assume inputs never negative
+  const auto xa_3 = Mul(k1_3, xa);
+
+  // Multiply exponent by -1/3
+  const auto m1 = BitCast(di, xa);
+  // Special case for 0. 0 is represented with an exponent of 0, so the
+  // "kExpBias - 1/3 * exp" below gives the wrong result. The IfThenZeroElse()
+  // sets those values as 0, which prevents having NaNs in the computations
+  // below.
+  // TODO(eustas): use fused op
+  const auto m2 = IfThenZeroElse(
+      Eq(m1, Zero(di)), Sub(kExpBias, Mul((ShiftRight<23>(m1)), kExpMul)));
+  auto r = BitCast(df, m2);
+
+  // Newton-Raphson iterations
+  for (int i = 0; i < 3; i++) {
+    const auto r2 = Mul(r, r);
+    r = NegMulAdd(xa_3, Mul(r2, r2), Mul(k4_3, r));
+  }
+  // Final iteration
+  auto r2 = Mul(r, r);
+  r = MulAdd(k1_3, NegMulAdd(xa, Mul(r2, r2), r), r);
+  r2 = Mul(r, r);
+  r = MulAdd(r2, x, add);
+
+  return r;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_FAST_MATH_INL_H_
+
+#if HWY_ONCE
+#ifndef FAST_MATH_ONCE
+#define FAST_MATH_ONCE
+
+namespace jxl {
+inline float FastLog2f(float f) { return HWY_STATIC_DISPATCH(FastLog2f)(f); }
+inline float FastPow2f(float f) { return HWY_STATIC_DISPATCH(FastPow2f)(f); }
+inline float FastPowf(float b, float e) {
+  return HWY_STATIC_DISPATCH(FastPowf)(b, e);
+}
+inline float FastCosf(float f) { return HWY_STATIC_DISPATCH(FastCosf)(f); }
+inline float FastErff(float f) { return HWY_STATIC_DISPATCH(FastErff)(f); }
+}  // namespace jxl
+
+#endif  // FAST_MATH_ONCE
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc b/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc
new file mode 100644
index 0000000000..897aadc120
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc
@@ -0,0 +1,288 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_math_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+HWY_NOINLINE void TestFastLog2() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(1e-7f, 1e3f);
+    const auto actual_v = FastLog2f(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::log2(f) - actual);
+    EXPECT_LT(abs_err, 3.1E-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPow2() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_rel_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(-100, 100);
+    const auto actual_v = FastPow2f(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float expected = std::pow(2, f);
+    const float rel_err = std::abs(expected - actual) / expected;
+    EXPECT_LT(rel_err, 3.1E-6) << "f = " << f;
+    max_rel_err = std::max(max_rel_err, rel_err);
+  }
+  printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastPow() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_rel_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float b = rng.UniformF(1e-3f, 1e3f);
+    const float e = rng.UniformF(-10, 10);
+    const auto actual_v = FastPowf(d, Set(d, b), Set(d, e));
+    const float actual = GetLane(actual_v);
+    const float expected = std::pow(b, e);
+    const float rel_err = std::abs(expected - actual) / expected;
+    EXPECT_LT(rel_err, 3E-5) << "b = " << b << " e = " << e;
+    max_rel_err = std::max(max_rel_err, rel_err);
+  }
+  printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastCos() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(-1e3f, 1e3f);
+    const auto actual_v = FastCosf(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::cos(f) - actual);
+    EXPECT_LT(abs_err, 7E-5) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastErf() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(-5.f, 5.f);
+    const auto actual_v = FastErff(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::erf(f) - actual);
+    EXPECT_LT(abs_err, 7E-4) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestCubeRoot() {
+  const HWY_FULL(float) d;
+  for (uint64_t x5 = 0; x5 < 2000000; x5++) {
+    const float x = x5 * 1E-5f;
+    const float expected = cbrtf(x);
+    HWY_ALIGN float approx[MaxLanes(d)];
+    Store(CubeRootAndAdd(Set(d, x), Zero(d)), d, approx);
+
+    // All lanes are same
+    for (size_t i = 1; i < Lanes(d); ++i) {
+      EXPECT_NEAR(approx[0], approx[i], 5E-7f);
+    }
+    EXPECT_NEAR(approx[0], expected, 8E-7f);
+  }
+}
+
+HWY_NOINLINE void TestFastSRGB() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(0.0f, 1.0f);
+    const auto actual_v = FastLinearToSRGB(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float expected = GetLane(TF_SRGB().EncodedFromDisplay(d, Set(d, f)));
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 1.2E-4) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQEFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(0.0f, 1.0f);
+    const float actual = GetLane(TF_PQ().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_PQ().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 7e-7) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastHLGEFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(0.0f, 1.0f);
+    const float actual = GetLane(TF_HLG().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_HLG().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 5e-7) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFast709EFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(0.0f, 1.0f);
+    const float actual = GetLane(TF_709().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_709().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 2e-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQDFE() {
+  constexpr size_t kNumTrials = 1 << 23;
+  Rng rng(1);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = rng.UniformF(0.0f, 1.0f);
+    const float actual = GetLane(TF_PQ().DisplayFromEncoded(d, Set(d, f)));
+    const float expected = TF_PQ().DisplayFromEncoded(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 3E-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastXYB() {
+  if (!HasFastXYBTosRGB8()) return;
+  ImageMetadata metadata;
+  ImageBundle ib(&metadata);
+  int scaling = 1;
+  int n = 256 * scaling;
+  float inv_scaling = 1.0f / scaling;
+  int kChunk = 32;
+  // The image is divided in chunks to reduce total memory usage.
+  for (int cr = 0; cr < n; cr += kChunk) {
+    for (int cg = 0; cg < n; cg += kChunk) {
+      for (int cb = 0; cb < n; cb += kChunk) {
+        Image3F chunk(kChunk * kChunk, kChunk);
+        for (int ir = 0; ir < kChunk; ir++) {
+          for (int ig = 0; ig < kChunk; ig++) {
+            for (int ib = 0; ib < kChunk; ib++) {
+              float r = (cr + ir) * inv_scaling;
+              float g = (cg + ig) * inv_scaling;
+              float b = (cb + ib) * inv_scaling;
+              chunk.PlaneRow(0, ir)[ig * kChunk + ib] = r * (1.0f / 255);
+              chunk.PlaneRow(1, ir)[ig * kChunk + ib] = g * (1.0f / 255);
+              chunk.PlaneRow(2, ir)[ig * kChunk + ib] = b * (1.0f / 255);
+            }
+          }
+        }
+        ib.SetFromImage(std::move(chunk), ColorEncoding::SRGB());
+        Image3F xyb(kChunk * kChunk, kChunk);
+        std::vector<uint8_t> roundtrip(kChunk * kChunk * kChunk * 3);
+        ToXYB(ib, nullptr, &xyb, GetJxlCms());
+        for (int y = 0; y < kChunk; y++) {
+          const float* xyba[4] = {xyb.PlaneRow(0, y), xyb.PlaneRow(1, y),
+                                  xyb.PlaneRow(2, y), nullptr};
+          jxl::HWY_NAMESPACE::FastXYBTosRGB8(
+              xyba, roundtrip.data() + 3 * xyb.xsize() * y, false, xyb.xsize());
+        }
+        for (int ir = 0; ir < kChunk; ir++) {
+          for (int ig = 0; ig < kChunk; ig++) {
+            for (int ib = 0; ib < kChunk; ib++) {
+              float r = (cr + ir) * inv_scaling;
+              float g = (cg + ig) * inv_scaling;
+              float b = (cb + ib) * inv_scaling;
+              size_t idx = ir * kChunk * kChunk + ig * kChunk + ib;
+              int rr = roundtrip[3 * idx];
+              int rg = roundtrip[3 * idx + 1];
+              int rb = roundtrip[3 * idx + 2];
+              EXPECT_LT(abs(r - rr), 2) << "expected " << r << " got " << rr;
+              EXPECT_LT(abs(g - rg), 2) << "expected " << g << " got " << rg;
+              EXPECT_LT(abs(b - rb), 2) << "expected " << b << " got " << rb;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class FastMathTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastMathTargetTest);
+
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastLog2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastCos);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastErf);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestCubeRoot);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastSRGB);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQDFE);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastHLGEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFast709EFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastXYB);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/field_encodings.h b/third-party/libjxl/libjxl/lib/jxl/field_encodings.h
new file mode 100644
index 0000000000..613e8fad33
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/field_encodings.h
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELD_ENCODINGS_H_
+#define LIB_JXL_FIELD_ENCODINGS_H_
+
+// Constants needed to encode/decode fields; avoids including the full fields.h.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/base.h>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Macro to define the Fields' derived class Name when compiling with debug
+// names.
+#if JXL_IS_DEBUG_BUILD
+#define JXL_FIELDS_NAME(X) \
+  const char* Name() const override { return #X; }
+#else
+#define JXL_FIELDS_NAME(X)
+#endif  // JXL_IS_DEBUG_BUILD
+
+class Visitor;
+class Fields {
+ public:
+  virtual ~Fields() = default;
+#if JXL_IS_DEBUG_BUILD
+  virtual const char* Name() const = 0;
+#endif  // JXL_IS_DEBUG_BUILD
+  virtual Status VisitFields(Visitor* JXL_RESTRICT visitor) = 0;
+};
+
+// Distribution of U32 values for one particular selector. Represents either a
+// power of two-sized range, or a single value. A separate type ensures this is
+// only passed to the U32Enc ctor.
+struct U32Distr {
+  // No need to validate - all `d` are legitimate.
+  constexpr explicit U32Distr(uint32_t d) : d(d) {}
+
+  static constexpr uint32_t kDirect = 0x80000000u;
+
+  constexpr bool IsDirect() const { return (d & kDirect) != 0; }
+
+  // Only call if IsDirect().
+  constexpr uint32_t Direct() const { return d & (kDirect - 1); }
+
+  // Only call if !IsDirect().
+  constexpr size_t ExtraBits() const { return (d & 0x1F) + 1; }
+  uint32_t Offset() const { return (d >> 5) & 0x3FFFFFF; }
+
+  uint32_t d;
+};
+
+// A direct-coded 31-bit value occupying 2 bits in the bitstream.
+constexpr U32Distr Val(uint32_t value) {
+  return U32Distr(value | U32Distr::kDirect);
+}
+
+// Value - `offset` will be signaled in `bits` extra bits.
+constexpr U32Distr BitsOffset(uint32_t bits, uint32_t offset) {
+  return U32Distr(((bits - 1) & 0x1F) + ((offset & 0x3FFFFFF) << 5));
+}
+
+// Value will be signaled in `bits` extra bits.
+constexpr U32Distr Bits(uint32_t bits) { return BitsOffset(bits, 0); }
+
+// See U32Coder documentation in fields.h.
+class U32Enc {
+ public:
+  constexpr U32Enc(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+                   const U32Distr d3)
+      : d_{d0, d1, d2, d3} {}
+
+  // Returns the U32Distr at `selector` = 0..3, least-significant first.
+  U32Distr GetDistr(const uint32_t selector) const {
+    JXL_ASSERT(selector < 4);
+    return d_[selector];
+  }
+
+ private:
+  U32Distr d_[4];
+};
+
+// Returns bit with the given `index` (0 = least significant).
+template <typename T>
+static inline constexpr uint64_t MakeBit(T index) {
+  return 1ULL << static_cast<uint32_t>(index);
+}
+
+// Returns vector of all possible values of an Enum type. Relies on each Enum
+// providing an overload of EnumBits() that returns a bit array of its values,
+// which implies values must be in [0, 64).
+template <typename Enum>
+std::vector<Enum> Values() {
+  uint64_t bits = EnumBits(Enum());
+
+  std::vector<Enum> values;
+  values.reserve(hwy::PopCount(bits));
+
+  // For each 1-bit in bits: add its index as value
+  while (bits != 0) {
+    const int index = Num0BitsBelowLS1Bit_Nonzero(bits);
+    values.push_back(static_cast<Enum>(index));
+    bits &= bits - 1;  // clear least-significant bit
+  }
+  return values;
+}
+
+// Returns true if value is one of Values<Enum>().
+template <class Enum>
+Status EnumValid(const Enum value) {
+  if (static_cast<uint32_t>(value) >= 64) {
+    return JXL_FAILURE("Value %u too large for %s\n",
+                       static_cast<uint32_t>(value), EnumName(Enum()));
+  }
+  const uint64_t bit = MakeBit(value);
+  if ((EnumBits(Enum()) & bit) == 0) {
+    return JXL_FAILURE("Invalid value %u for %s\n",
+                       static_cast<uint32_t>(value), EnumName(Enum()));
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FIELD_ENCODINGS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/fields.cc b/third-party/libjxl/libjxl/lib/jxl/fields.cc
new file mode 100644
index 0000000000..47a75638c2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fields.cc
@@ -0,0 +1,656 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jxl {
+
+namespace {
+
+using ::jxl::fields_internal::VisitorBase;
+
+struct InitVisitor : public VisitorBase {
+  Status Bits(const size_t /*unused*/, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  // Always visit conditional fields to ensure they are initialized.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    // Just initialize this field and don't skip initializing others.
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;
+  }
+
+  Status VisitNested(Fields* /*fields*/) override {
+    // Avoid re-initializing nested bundles (their ctors already called
+    // Bundle::Init for their fields).
+    return true;
+  }
+};
+
+// Similar to InitVisitor, but also initializes nested fields.
+struct SetDefaultVisitor : public VisitorBase {
+  Status Bits(const size_t /*unused*/, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  // Always visit conditional fields to ensure they are initialized.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    // Just initialize this field and don't skip initializing others.
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;
+  }
+};
+
+class AllDefaultVisitor : public VisitorBase {
+ public:
+  explicit AllDefaultVisitor() : VisitorBase() {}
+
+  Status Bits(const size_t bits, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    all_default_ &= std::abs(*value - default_value) < 1E-6f;
+    return true;
+  }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT /*all_default*/) override {
+    // Visit all fields so we can compute the actual all_default_ value.
+    return false;
+  }
+
+  bool AllDefault() const { return all_default_; }
+
+ private:
+  bool all_default_ = true;
+};
+
+class ReadVisitor : public VisitorBase {
+ public:
+  explicit ReadVisitor(BitReader* reader) : VisitorBase(), reader_(reader) {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = BitsCoder::Read(bits, reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    return true;
+  }
+
+  Status U32(const U32Enc dist, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = U32Coder::Read(dist, reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = U64Coder::Read(reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    ok_ &= F16Coder::Read(reader_, value);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    return true;
+  }
+
+  void SetDefault(Fields* fields) override { Bundle::SetDefault(fields); }
+
+  bool IsReading() const override { return true; }
+
+  // This never fails because visitors are expected to keep reading until
+  // EndExtensions, see comment there.
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    if (*extensions == 0) return true;
+
+    // For each nonzero bit, i.e. extension that is present:
+    for (uint64_t remaining_extensions = *extensions; remaining_extensions != 0;
+         remaining_extensions &= remaining_extensions - 1) {
+      const size_t idx_extension =
+          Num0BitsBelowLS1Bit_Nonzero(remaining_extensions);
+      // Read additional U64 (one per extension) indicating the number of bits
+      // (allows skipping individual extensions).
+      JXL_RETURN_IF_ERROR(U64(0, &extension_bits_[idx_extension]));
+      if (!SafeAdd(total_extension_bits_, extension_bits_[idx_extension],
+                   total_extension_bits_)) {
+        return JXL_FAILURE("Extension bits overflowed, invalid codestream");
+      }
+    }
+    // Used by EndExtensions to skip past any _remaining_ extensions.
+    pos_after_ext_size_ = reader_->TotalBitsConsumed();
+    JXL_ASSERT(pos_after_ext_size_ != 0);
+    return true;
+  }
+
+  Status EndExtensions() override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::EndExtensions());
+    // Happens if extensions == 0: don't read size, done.
+    if (pos_after_ext_size_ == 0) return true;
+
+    // Not enough bytes as set by BeginExtensions or earlier. Do not return
+    // this as a JXL_FAILURE or false (which can also propagate to error
+    // through e.g. JXL_RETURN_IF_ERROR), since this may be used while
+    // silently checking whether there are enough bytes. If this case must be
+    // treated as an error, reader_>Close() will do this, just like is already
+    // done for non-extension fields.
+    if (!enough_bytes_) return true;
+
+    // Skip new fields this (old?) decoder didn't know about, if any.
+    const size_t bits_read = reader_->TotalBitsConsumed();
+    uint64_t end;
+    if (!SafeAdd(pos_after_ext_size_, total_extension_bits_, end)) {
+      return JXL_FAILURE("Invalid extension size, caused overflow");
+    }
+    if (bits_read > end) {
+      return JXL_FAILURE("Read more extension bits than budgeted");
+    }
+    const size_t remaining_bits = end - bits_read;
+    if (remaining_bits != 0) {
+      JXL_WARNING("Skipping %" PRIuS "-bit extension(s)", remaining_bits);
+      reader_->SkipBits(remaining_bits);
+      if (!reader_->AllReadsWithinBounds()) {
+        return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                          "Not enough bytes for header");
+      }
+    }
+    return true;
+  }
+
+  Status OK() const { return ok_; }
+
+ private:
+  // Whether any error other than not enough bytes occurred.
+  bool ok_ = true;
+
+  // Whether there are enough input bytes to read from.
+  bool enough_bytes_ = true;
+  BitReader* const reader_;
+  // May be 0 even if the corresponding extension is present.
+  uint64_t extension_bits_[Bundle::kMaxExtensions] = {0};
+  uint64_t total_extension_bits_ = 0;
+  size_t pos_after_ext_size_ = 0;  // 0 iff extensions == 0.
+
+  friend Status jxl::CheckHasEnoughBits(Visitor*, size_t);
+};
+
+class MaxBitsVisitor : public VisitorBase {
+ public:
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += BitsCoder::MaxEncodedBits(bits);
+    return true;
+  }
+
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += U32Coder::MaxEncodedBits(enc);
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += U64Coder::MaxEncodedBits();
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT /*value*/) override {
+    max_bits_ += F16Coder::MaxEncodedBits();
+    return true;
+  }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;  // For max bits, assume nothing is default
+  }
+
+  // Always visit conditional fields to get a (loose) upper bound.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT /*extensions*/) override {
+    // Skip - extensions are not included in "MaxBits" because their length
+    // is potentially unbounded.
+    return true;
+  }
+
+  Status EndExtensions() override { return true; }
+
+  size_t MaxBits() const { return max_bits_; }
+
+ private:
+  size_t max_bits_ = 0;
+};
+
+class CanEncodeVisitor : public VisitorBase {
+ public:
+  explicit CanEncodeVisitor() : VisitorBase() {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= BitsCoder::CanEncode(bits, *value, &encoded_bits);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= U32Coder::CanEncode(enc, *value, &encoded_bits);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= U64Coder::CanEncode(*value, &encoded_bits);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= F16Coder::CanEncode(*value, &encoded_bits);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status AllDefault(const Fields& fields,
+                    bool* JXL_RESTRICT all_default) override {
+    *all_default = Bundle::AllDefault(fields);
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return *all_default;
+  }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    extensions_ = *extensions;
+    if (*extensions != 0) {
+      JXL_ASSERT(pos_after_ext_ == 0);
+      pos_after_ext_ = encoded_bits_;
+      JXL_ASSERT(pos_after_ext_ != 0);  // visited "extensions"
+    }
+    return true;
+  }
+  // EndExtensions = default.
+
+  Status GetSizes(size_t* JXL_RESTRICT extension_bits,
+                  size_t* JXL_RESTRICT total_bits) {
+    JXL_RETURN_IF_ERROR(ok_);
+    *extension_bits = 0;
+    *total_bits = encoded_bits_;
+    // Only if extension field was nonzero will we encode their sizes.
+    if (pos_after_ext_ != 0) {
+      JXL_ASSERT(encoded_bits_ >= pos_after_ext_);
+      *extension_bits = encoded_bits_ - pos_after_ext_;
+      // Also need to encode *extension_bits and bill it to *total_bits.
+      size_t encoded_bits = 0;
+      ok_ &= U64Coder::CanEncode(*extension_bits, &encoded_bits);
+      *total_bits += encoded_bits;
+
+      // TODO(janwas): support encoding individual extension sizes. We
+      // currently ascribe all bits to the first and send zeros for the
+      // others.
+      for (size_t i = 1; i < hwy::PopCount(extensions_); ++i) {
+        encoded_bits = 0;
+        ok_ &= U64Coder::CanEncode(0, &encoded_bits);
+        *total_bits += encoded_bits;
+      }
+    }
+    return true;
+  }
+
+ private:
+  bool ok_ = true;
+  size_t encoded_bits_ = 0;
+  uint64_t extensions_ = 0;
+  // Snapshot of encoded_bits_ after visiting the extension field, but NOT
+  // including the hidden extension sizes.
+  uint64_t pos_after_ext_ = 0;
+};
+}  // namespace
+
+void Bundle::Init(Fields* fields) {
+  InitVisitor visitor;
+  if (!visitor.Visit(fields)) {
+    JXL_UNREACHABLE("Init should never fail");
+  }
+}
+void Bundle::SetDefault(Fields* fields) {
+  SetDefaultVisitor visitor;
+  if (!visitor.Visit(fields)) {
+    JXL_UNREACHABLE("SetDefault should never fail");
+  }
+}
+bool Bundle::AllDefault(const Fields& fields) {
+  AllDefaultVisitor visitor;
+  if (!visitor.VisitConst(fields)) {
+    JXL_UNREACHABLE("AllDefault should never fail");
+  }
+  return visitor.AllDefault();
+}
+size_t Bundle::MaxBits(const Fields& fields) {
+  MaxBitsVisitor visitor;
+#if JXL_ENABLE_ASSERT
+  Status ret =
+#else
+  (void)
+#endif  // JXL_ENABLE_ASSERT
+      visitor.VisitConst(fields);
+  JXL_ASSERT(ret);
+  return visitor.MaxBits();
+}
+Status Bundle::CanEncode(const Fields& fields, size_t* extension_bits,
+                         size_t* total_bits) {
+  CanEncodeVisitor visitor;
+  JXL_QUIET_RETURN_IF_ERROR(visitor.VisitConst(fields));
+  JXL_QUIET_RETURN_IF_ERROR(visitor.GetSizes(extension_bits, total_bits));
+  return true;
+}
+Status Bundle::Read(BitReader* reader, Fields* fields) {
+  ReadVisitor visitor(reader);
+  JXL_RETURN_IF_ERROR(visitor.Visit(fields));
+  return visitor.OK();
+}
+bool Bundle::CanRead(BitReader* reader, Fields* fields) {
+  ReadVisitor visitor(reader);
+  Status status = visitor.Visit(fields);
+  // We are only checking here whether there are enough bytes. We still return
+  // true for other errors because it means there are enough bytes to determine
+  // there's an error. Use Read() to determine which error it is.
+  return status.code() != StatusCode::kNotEnoughBytes;
+}
+
+size_t BitsCoder::MaxEncodedBits(const size_t bits) { return bits; }
+
+Status BitsCoder::CanEncode(const size_t bits, const uint32_t value,
+                            size_t* JXL_RESTRICT encoded_bits) {
+  *encoded_bits = bits;
+  if (value >= (1ULL << bits)) {
+    return JXL_FAILURE("Value %u too large for %" PRIu64 " bits", value,
+                       static_cast<uint64_t>(bits));
+  }
+  return true;
+}
+
+uint32_t BitsCoder::Read(const size_t bits, BitReader* JXL_RESTRICT reader) {
+  return reader->ReadBits(bits);
+}
+
+size_t U32Coder::MaxEncodedBits(const U32Enc enc) {
+  size_t extra_bits = 0;
+  for (uint32_t selector = 0; selector < 4; ++selector) {
+    const U32Distr d = enc.GetDistr(selector);
+    if (d.IsDirect()) {
+      continue;
+    } else {
+      extra_bits = std::max<size_t>(extra_bits, d.ExtraBits());
+    }
+  }
+  return 2 + extra_bits;
+}
+
+Status U32Coder::CanEncode(const U32Enc enc, const uint32_t value,
+                           size_t* JXL_RESTRICT encoded_bits) {
+  uint32_t selector;
+  size_t total_bits;
+  const Status ok = ChooseSelector(enc, value, &selector, &total_bits);
+  *encoded_bits = ok ? total_bits : 0;
+  return ok;
+}
+
+uint32_t U32Coder::Read(const U32Enc enc, BitReader* JXL_RESTRICT reader) {
+  const uint32_t selector = reader->ReadFixedBits<2>();
+  const U32Distr d = enc.GetDistr(selector);
+  if (d.IsDirect()) {
+    return d.Direct();
+  } else {
+    return reader->ReadBits(d.ExtraBits()) + d.Offset();
+  }
+}
+
+Status U32Coder::ChooseSelector(const U32Enc enc, const uint32_t value,
+                                uint32_t* JXL_RESTRICT selector,
+                                size_t* JXL_RESTRICT total_bits) {
+#if JXL_ENABLE_ASSERT
+  const size_t bits_required = 32 - Num0BitsAboveMS1Bit(value);
+#endif  // JXL_ENABLE_ASSERT
+  JXL_ASSERT(bits_required <= 32);
+
+  *selector = 0;
+  *total_bits = 0;
+
+  // It is difficult to verify whether Dist32Byte are sorted, so check all
+  // selectors and keep the one with the fewest total_bits.
+  *total_bits = 64;  // more than any valid encoding
+  for (uint32_t s = 0; s < 4; ++s) {
+    const U32Distr d = enc.GetDistr(s);
+    if (d.IsDirect()) {
+      if (d.Direct() == value) {
+        *selector = s;
+        *total_bits = 2;
+        return true;  // Done, direct is always the best possible.
+      }
+      continue;
+    }
+    const size_t extra_bits = d.ExtraBits();
+    const uint32_t offset = d.Offset();
+    if (value < offset || value >= offset + (1ULL << extra_bits)) continue;
+
+    // Better than prior encoding, remember it:
+    if (2 + extra_bits < *total_bits) {
+      *selector = s;
+      *total_bits = 2 + extra_bits;
+    }
+  }
+
+  if (*total_bits == 64) {
+    return JXL_FAILURE("No feasible selector for %u", value);
+  }
+
+  return true;
+}
+
+uint64_t U64Coder::Read(BitReader* JXL_RESTRICT reader) {
+  uint64_t selector = reader->ReadFixedBits<2>();
+  if (selector == 0) {
+    return 0;
+  }
+  if (selector == 1) {
+    return 1 + reader->ReadFixedBits<4>();
+  }
+  if (selector == 2) {
+    return 17 + reader->ReadFixedBits<8>();
+  }
+
+  // selector 3, varint, groups have first 12, then 8, and last 4 bits.
+  uint64_t result = reader->ReadFixedBits<12>();
+
+  uint64_t shift = 12;
+  while (reader->ReadFixedBits<1>()) {
+    if (shift == 60) {
+      result |= static_cast<uint64_t>(reader->ReadFixedBits<4>()) << shift;
+      break;
+    }
+    result |= static_cast<uint64_t>(reader->ReadFixedBits<8>()) << shift;
+    shift += 8;
+  }
+
+  return result;
+}
+
+// Can always encode, but useful because it also returns bit size.
+Status U64Coder::CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits) {
+  if (value == 0) {
+    *encoded_bits = 2;  // 2 selector bits
+  } else if (value <= 16) {
+    *encoded_bits = 2 + 4;  // 2 selector bits + 4 payload bits
+  } else if (value <= 272) {
+    *encoded_bits = 2 + 8;  // 2 selector bits + 8 payload bits
+  } else {
+    *encoded_bits = 2 + 12;  // 2 selector bits + 12 payload bits
+    value >>= 12;
+    int shift = 12;
+    while (value > 0 && shift < 60) {
+      *encoded_bits += 1 + 8;  // 1 continuation bit + 8 payload bits
+      value >>= 8;
+      shift += 8;
+    }
+    if (value > 0) {
+      // This only could happen if shift == N - 4.
+      *encoded_bits += 1 + 4;  // 1 continuation bit + 4 payload bits
+    } else {
+      *encoded_bits += 1;  // 1 stop bit
+    }
+  }
+
+  return true;
+}
+
+Status F16Coder::Read(BitReader* JXL_RESTRICT reader,
+                      float* JXL_RESTRICT value) {
+  const uint32_t bits16 = reader->ReadFixedBits<16>();
+  const uint32_t sign = bits16 >> 15;
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  if (JXL_UNLIKELY(biased_exp == 31)) {
+    return JXL_FAILURE("F16 infinity or NaN are not supported");
+  }
+
+  // Subnormal or zero
+  if (JXL_UNLIKELY(biased_exp == 0)) {
+    *value = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+    if (sign) *value = -*value;
+    return true;
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+  memcpy(value, &bits32, sizeof(bits32));
+  return true;
+}
+
+Status F16Coder::CanEncode(float value, size_t* JXL_RESTRICT encoded_bits) {
+  *encoded_bits = MaxEncodedBits();
+  if (std::isnan(value) || std::isinf(value)) {
+    return JXL_FAILURE("Should not attempt to store NaN and infinity");
+  }
+  return std::abs(value) <= 65504.0f;
+}
+
+Status CheckHasEnoughBits(Visitor* visitor, size_t bits) {
+  if (!visitor->IsReading()) return false;
+  ReadVisitor* rv = static_cast<ReadVisitor*>(visitor);
+  size_t have_bits = rv->reader_->TotalBytes() * kBitsPerByte;
+  size_t want_bits = bits + rv->reader_->TotalBitsConsumed();
+  if (have_bits < want_bits) {
+    return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                      "Not enough bytes for header");
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/fields.h b/third-party/libjxl/libjxl/lib/jxl/fields.h
new file mode 100644
index 0000000000..60fbbfcba0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fields.h
@@ -0,0 +1,379 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELDS_H_
+#define LIB_JXL_FIELDS_H_
+
+// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cinttypes>
+#include <cmath>  // abs
+#include <cstdarg>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct BitWriter;
+
+// Integer coders: BitsCoder (raw), U32Coder (table), U64Coder (varint).
+
+// Reads/writes a given (fixed) number of bits <= 32.
+namespace BitsCoder {
+size_t MaxEncodedBits(size_t bits);
+
+Status CanEncode(size_t bits, uint32_t value,
+                 size_t* JXL_RESTRICT encoded_bits);
+
+uint32_t Read(size_t bits, BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(size_t bits, uint32_t value, BitWriter* JXL_RESTRICT writer);
+}  // namespace BitsCoder
+
+// Encodes u32 using a lookup table and/or extra bits, governed by a per-field
+// encoding `enc` which consists of four distributions `d` chosen via a 2-bit
+// selector (least significant = 0). Each d may have two modes:
+// - direct: if d.IsDirect(), the value is d.Direct();
+// - offset: the value is derived from d.ExtraBits() extra bits plus d.Offset();
+// This encoding is denser than Exp-Golomb or Gamma codes when both small and
+// large values occur.
+//
+// Examples:
+// Direct: U32Enc(Val(8), Val(16), Val(32), Bits(6)), value 32 => 10b.
+// Offset: U32Enc(Val(0), BitsOffset(1, 1), BitsOffset(2, 3), BitsOffset(8, 8))
+//   defines the following prefix code:
+//   00 -> 0
+//   01x -> 1..2
+//   10xx -> 3..7
+//   11xxxxxxxx -> 8..263
+namespace U32Coder {
+size_t MaxEncodedBits(U32Enc enc);
+Status CanEncode(U32Enc enc, uint32_t value, size_t* JXL_RESTRICT encoded_bits);
+uint32_t Read(U32Enc enc, BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(U32Enc enc, uint32_t value, BitWriter* JXL_RESTRICT writer);
+
+// "private"
+Status ChooseSelector(U32Enc enc, uint32_t value,
+                      uint32_t* JXL_RESTRICT selector,
+                      size_t* JXL_RESTRICT total_bits);
+}  // namespace U32Coder
+
+// Encodes 64-bit unsigned integers with a fixed distribution, taking 2 bits
+// to encode 0, 6 bits to encode 1 to 16, 10 bits to encode 17 to 272, 15 bits
+// to encode up to 4095, and on the order of log2(value) * 1.125 bits for
+// larger values.
+namespace U64Coder {
+constexpr size_t MaxEncodedBits() { return 2 + 12 + 6 * (8 + 1) + (4 + 1); }
+
+uint64_t Read(BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(uint64_t value, BitWriter* JXL_RESTRICT writer);
+
+// Can always encode, but useful because it also returns bit size.
+Status CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits);
+}  // namespace U64Coder
+
+// IEEE 754 half-precision (binary16). Refuses to read/write NaN/Inf.
+namespace F16Coder {
+constexpr size_t MaxEncodedBits() { return 16; }
+
+// Returns false if the bit representation is NaN or infinity
+Status Read(BitReader* JXL_RESTRICT reader, float* JXL_RESTRICT value);
+
+// Returns false if the value is too large to encode.
+Status Write(float value, BitWriter* JXL_RESTRICT writer);
+Status CanEncode(float value, size_t* JXL_RESTRICT encoded_bits);
+}  // namespace F16Coder
+
+// A "bundle" is a forward- and backward compatible collection of fields.
+// They are used for SizeHeader/FrameHeader/GroupHeader. Bundles can be
+// extended by appending(!) fields. Optional fields may be omitted from the
+// bitstream by conditionally visiting them. When reading new bitstreams with
+// old code, we skip unknown fields at the end of the bundle. This requires
+// storing the amount of extra appended bits, and that fields are visited in
+// chronological order of being added to the format, because old decoders
+// cannot skip some future fields and resume reading old fields. Similarly,
+// new readers query bits in an "extensions" field to skip (groups of) fields
+// not present in old bitstreams. Note that each bundle must include an
+// "extensions" field prior to freezing the format, otherwise it cannot be
+// extended.
+//
+// To ensure interoperability, there will be no opaque fields.
+//
+// HOWTO:
+// - basic usage: define a struct with member variables ("fields") and a
+//   VisitFields(v) member function that calls v->U32/Bool etc. for each
+//   field, specifying their default values. The ctor must call
+//   Bundle::Init(this).
+//
+// - print a trace of visitors: ensure each bundle has a static Name() member
+//   function, and change Bundle::Print* to return true.
+//
+// - optional fields: in VisitFields, add if (v->Conditional(your_condition))
+//   { v->Bool(default, &field); }. This prevents reading/writing field
+//   if !your_condition, which is typically computed from a prior field.
+//   WARNING: to ensure all fields are initialized, do not add an else branch;
+//   instead add another if (v->Conditional(!your_condition)).
+//
+// - repeated fields: for dynamic sizes, use e.g. std::vector and in
+//   VisitFields, if (v->IsReading()) field.resize(size) before accessing field.
+//   For static or bounded sizes, use an array or std::array. In all cases,
+//   simply visit each array element as if it were a normal field.
+//
+// - nested bundles: add a bundle as a normal field and in VisitFields call
+//   JXL_RETURN_IF_ERROR(v->VisitNested(&nested));
+//
+// - allow future extensions: define a "uint64_t extensions" field and call
+//   v->BeginExtensions(&extensions) after visiting all non-extension fields,
+//   and `return v->EndExtensions();` after the last extension field.
+//
+// - encode an entire bundle in one bit if ALL its fields equal their default
+//   values: add a "mutable bool all_default" field and as the first visitor:
+//   if (v->AllDefault(*this, &all_default)) {
+//     // Overwrite all serialized fields, but not any nonserialized_*.
+//     v->SetDefault(this);
+//     return true;
+//   }
+//   Note: if extensions are present, AllDefault() == false.
+
+namespace Bundle {
+constexpr size_t kMaxExtensions = 64;  // bits in u64
+
+// Initializes fields to the default values. It is not recursive to nested
+// fields, this function is intended to be called in the constructors so
+// each nested field will already Init itself.
+void Init(Fields* JXL_RESTRICT fields);
+
+// Similar to Init, but recursive to nested fields.
+void SetDefault(Fields* JXL_RESTRICT fields);
+
+// Returns whether ALL fields (including `extensions`, if present) are equal
+// to their default value.
+bool AllDefault(const Fields& fields);
+
+// Returns max number of bits required to encode a T.
+size_t MaxBits(const Fields& fields);
+
+// Returns whether a header's fields can all be encoded, i.e. they have a
+// valid representation. If so, "*total_bits" is the exact number of bits
+// required. Called by Write.
+Status CanEncode(const Fields& fields, size_t* JXL_RESTRICT extension_bits,
+                 size_t* JXL_RESTRICT total_bits);
+
+Status Read(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+// Returns whether enough bits are available to fully read this bundle using
+// Read. Also returns true in case of a codestream error (other than not being
+// large enough): that means enough bits are available to determine there's an
+// error, use Read to get such error status.
+// NOTE: this advances the BitReader, a different one pointing back at the
+// original bit position in the codestream must be created to use Read after
+// this.
+bool CanRead(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+Status Write(const Fields& fields, BitWriter* JXL_RESTRICT writer, size_t layer,
+             AuxOut* aux_out);
+}  // namespace Bundle
+
+// Different subclasses of Visitor are passed to implementations of Fields
+// throughout their lifetime. Templates used to be used for this but dynamic
+// polymorphism produces more compact executables than template reification did.
+class Visitor {
+ public:
+  virtual ~Visitor() = default;
+  virtual Status Visit(Fields* fields) = 0;
+
+  virtual Status Bool(bool default_value, bool* JXL_RESTRICT value) = 0;
+  virtual Status U32(U32Enc, uint32_t, uint32_t*) = 0;
+
+  // Helper to construct U32Enc from U32Distr.
+  Status U32(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+             const U32Distr d3, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) {
+    return U32(U32Enc(d0, d1, d2, d3), default_value, value);
+  }
+
+  template <typename EnumT>
+  Status Enum(const EnumT default_value, EnumT* JXL_RESTRICT value) {
+    uint32_t u32 = static_cast<uint32_t>(*value);
+    // 00 -> 0
+    // 01 -> 1
+    // 10xxxx -> 2..17
+    // 11yyyyyy -> 18..81
+    JXL_RETURN_IF_ERROR(U32(Val(0), Val(1), BitsOffset(4, 2), BitsOffset(6, 18),
+                            static_cast<uint32_t>(default_value), &u32));
+    *value = static_cast<EnumT>(u32);
+    return EnumValid(*value);
+  }
+
+  virtual Status Bits(size_t bits, uint32_t default_value,
+                      uint32_t* JXL_RESTRICT value) = 0;
+  virtual Status U64(uint64_t default_value, uint64_t* JXL_RESTRICT value) = 0;
+  virtual Status F16(float default_value, float* JXL_RESTRICT value) = 0;
+
+  // Returns whether VisitFields should visit some subsequent fields.
+  // "condition" is typically from prior fields, e.g. flags.
+  // Overridden by InitVisitor and MaxBitsVisitor.
+  virtual Status Conditional(bool condition) { return condition; }
+
+  // Overridden by InitVisitor, AllDefaultVisitor and CanEncodeVisitor.
+  virtual Status AllDefault(const Fields& /*fields*/,
+                            bool* JXL_RESTRICT all_default) {
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return *all_default;
+  }
+
+  virtual void SetDefault(Fields* /*fields*/) {
+    // Do nothing by default, this is overridden by ReadVisitor.
+  }
+
+  // Returns the result of visiting a nested Bundle.
+  // Overridden by InitVisitor.
+  virtual Status VisitNested(Fields* fields) { return Visit(fields); }
+
+  // Overridden by ReadVisitor. Enables dynamically-sized fields.
+  virtual bool IsReading() const { return false; }
+
+  virtual Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) = 0;
+  virtual Status EndExtensions() = 0;
+};
+
+namespace fields_internal {
+// A bundle can be in one of three states concerning extensions: not-begun,
+// active, ended. Bundles may be nested, so we need a stack of states.
+class ExtensionStates {
+ public:
+  void Push() {
+    // Initial state = not-begun.
+    begun_ <<= 1;
+    ended_ <<= 1;
+  }
+
+  // Clears current state; caller must check IsEnded beforehand.
+  void Pop() {
+    begun_ >>= 1;
+    ended_ >>= 1;
+  }
+
+  // Returns true if state == active || state == ended.
+  Status IsBegun() const { return (begun_ & 1) != 0; }
+  // Returns true if state != not-begun && state != active.
+  Status IsEnded() const { return (ended_ & 1) != 0; }
+
+  void Begin() {
+    JXL_ASSERT(!IsBegun());
+    JXL_ASSERT(!IsEnded());
+    begun_ += 1;
+  }
+
+  void End() {
+    JXL_ASSERT(IsBegun());
+    JXL_ASSERT(!IsEnded());
+    ended_ += 1;
+  }
+
+ private:
+  // Current state := least-significant bit of begun_ and ended_.
+  uint64_t begun_ = 0;
+  uint64_t ended_ = 0;
+};
+
+// Visitors generate Init/AllDefault/Read/Write logic for all fields. Each
+// bundle's VisitFields member function calls visitor->U32 etc. We do not
+// overload operator() because a function name is easier to search for.
+
+class VisitorBase : public Visitor {
+ public:
+  explicit VisitorBase() {}
+  ~VisitorBase() override { JXL_ASSERT(depth_ == 0); }
+
+  // This is the only call site of Fields::VisitFields.
+  // Ensures EndExtensions was called.
+  Status Visit(Fields* fields) override {
+    depth_ += 1;
+    JXL_ASSERT(depth_ <= Bundle::kMaxExtensions);
+    extension_states_.Push();
+
+    const Status ok = fields->VisitFields(this);
+
+    if (ok) {
+      // If VisitFields called BeginExtensions, must also call
+      // EndExtensions.
+      JXL_ASSERT(!extension_states_.IsBegun() || extension_states_.IsEnded());
+    } else {
+      // Failed, undefined state: don't care whether EndExtensions was
+      // called.
+    }
+
+    extension_states_.Pop();
+    JXL_ASSERT(depth_ != 0);
+    depth_ -= 1;
+
+    return ok;
+  }
+
+  // For visitors accepting a const Visitor, need to const-cast so we can call
+  // the non-const Visitor::VisitFields. NOTE: C is not modified except the
+  // `all_default` field by CanEncodeVisitor.
+  Status VisitConst(const Fields& t) { return Visit(const_cast<Fields*>(&t)); }
+
+  // Derived types (overridden by InitVisitor because it is unsafe to read
+  // from *value there)
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    uint32_t bits = *value ? 1 : 0;
+    JXL_RETURN_IF_ERROR(Bits(1, static_cast<uint32_t>(default_value), &bits));
+    JXL_DASSERT(bits <= 1);
+    *value = bits == 1;
+    return true;
+  }
+
+  // Overridden by ReadVisitor and WriteVisitor.
+  // Called before any conditional visit based on "extensions".
+  // Overridden by ReadVisitor, CanEncodeVisitor and WriteVisitor.
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_RETURN_IF_ERROR(U64(0, extensions));
+
+    extension_states_.Begin();
+    return true;
+  }
+
+  // Called after all extension fields (if any). Although non-extension
+  // fields could be visited afterward, we prefer the convention that
+  // extension fields are always the last to be visited. Overridden by
+  // ReadVisitor.
+  Status EndExtensions() override {
+    extension_states_.End();
+    return true;
+  }
+
+ private:
+  size_t depth_ = 0;  // to check nesting
+  ExtensionStates extension_states_;
+};
+}  // namespace fields_internal
+
+Status CheckHasEnoughBits(Visitor* visitor, size_t bits);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FIELDS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/fields_test.cc b/third-party/libjxl/libjxl/lib/jxl/fields_test.cc
new file mode 100644
index 0000000000..cf54c780ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/fields_test.cc
@@ -0,0 +1,429 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+// Ensures `value` round-trips and in exactly `expected_bits_written`.
+void TestU32Coder(const uint32_t value, const size_t expected_bits_written) {
+  const U32Enc enc(Val(0), Bits(4), Val(0x7FFFFFFF), Bits(32));
+
+  BitWriter writer;
+  BitWriter::Allotment allotment(
+      &writer, RoundUpBitsToByteMultiple(U32Coder::MaxEncodedBits(enc)));
+
+  size_t precheck_pos;
+  EXPECT_TRUE(U32Coder::CanEncode(enc, value, &precheck_pos));
+  EXPECT_EQ(expected_bits_written, precheck_pos);
+
+  EXPECT_TRUE(U32Coder::Write(enc, value, &writer));
+  EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  const uint32_t decoded_value = U32Coder::Read(enc, &reader);
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U32CoderTest) {
+  TestU32Coder(0, 2);
+  TestU32Coder(1, 6);
+  TestU32Coder(15, 6);
+  TestU32Coder(0x7FFFFFFF, 2);
+  TestU32Coder(128, 34);
+  TestU32Coder(0x7FFFFFFEu, 34);
+  TestU32Coder(0x80000000u, 34);
+  TestU32Coder(0xFFFFFFFFu, 34);
+}
+
+void TestU64Coder(const uint64_t value, const size_t expected_bits_written) {
+  BitWriter writer;
+  BitWriter::Allotment allotment(
+      &writer, RoundUpBitsToByteMultiple(U64Coder::MaxEncodedBits()));
+
+  size_t precheck_pos;
+  EXPECT_TRUE(U64Coder::CanEncode(value, &precheck_pos));
+  EXPECT_EQ(expected_bits_written, precheck_pos);
+
+  EXPECT_TRUE(U64Coder::Write(value, &writer));
+  EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  const uint64_t decoded_value = U64Coder::Read(&reader);
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U64CoderTest) {
+  // Values that should take 2 bits (selector 00): 0
+  TestU64Coder(0, 2);
+
+  // Values that should take 6 bits (2 for selector, 4 for value): 1..16
+  TestU64Coder(1, 6);
+  TestU64Coder(2, 6);
+  TestU64Coder(8, 6);
+  TestU64Coder(15, 6);
+  TestU64Coder(16, 6);
+
+  // Values that should take 10 bits (2 for selector, 8 for value): 17..272
+  TestU64Coder(17, 10);
+  TestU64Coder(18, 10);
+  TestU64Coder(100, 10);
+  TestU64Coder(271, 10);
+  TestU64Coder(272, 10);
+
+  // Values that should take 15 bits (2 for selector, 12 for value, 1 for varint
+  // end): (0)..273..4095
+  TestU64Coder(273, 15);
+  TestU64Coder(274, 15);
+  TestU64Coder(1000, 15);
+  TestU64Coder(4094, 15);
+  TestU64Coder(4095, 15);
+
+  // Take 24 bits (of which 20 actual value): (0)..4096..1048575
+  TestU64Coder(4096, 24);
+  TestU64Coder(4097, 24);
+  TestU64Coder(10000, 24);
+  TestU64Coder(1048574, 24);
+  TestU64Coder(1048575, 24);
+
+  // Take 33 bits (of which 28 actual value): (0)..1048576..268435455
+  TestU64Coder(1048576, 33);
+  TestU64Coder(1048577, 33);
+  TestU64Coder(10000000, 33);
+  TestU64Coder(268435454, 33);
+  TestU64Coder(268435455, 33);
+
+  // Take 42 bits (of which 36 actual value): (0)..268435456..68719476735
+  TestU64Coder(268435456ull, 42);
+  TestU64Coder(268435457ull, 42);
+  TestU64Coder(1000000000ull, 42);
+  TestU64Coder(68719476734ull, 42);
+  TestU64Coder(68719476735ull, 42);
+
+  // Take 51 bits (of which 44 actual value): (0)..68719476736..17592186044415
+  TestU64Coder(68719476736ull, 51);
+  TestU64Coder(68719476737ull, 51);
+  TestU64Coder(1000000000000ull, 51);
+  TestU64Coder(17592186044414ull, 51);
+  TestU64Coder(17592186044415ull, 51);
+
+  // Take 60 bits (of which 52 actual value):
+  // (0)..17592186044416..4503599627370495
+  TestU64Coder(17592186044416ull, 60);
+  TestU64Coder(17592186044417ull, 60);
+  TestU64Coder(100000000000000ull, 60);
+  TestU64Coder(4503599627370494ull, 60);
+  TestU64Coder(4503599627370495ull, 60);
+
+  // Take 69 bits (of which 60 actual value):
+  // (0)..4503599627370496..1152921504606846975
+  TestU64Coder(4503599627370496ull, 69);
+  TestU64Coder(4503599627370497ull, 69);
+  TestU64Coder(10000000000000000ull, 69);
+  TestU64Coder(1152921504606846974ull, 69);
+  TestU64Coder(1152921504606846975ull, 69);
+
+  // Take 73 bits (of which 64 actual value):
+  // (0)..1152921504606846976..18446744073709551615
+  TestU64Coder(1152921504606846976ull, 73);
+  TestU64Coder(1152921504606846977ull, 73);
+  TestU64Coder(10000000000000000000ull, 73);
+  TestU64Coder(18446744073709551614ull, 73);
+  TestU64Coder(18446744073709551615ull, 73);
+}
+
+Status TestF16Coder(const float value) {
+  size_t max_encoded_bits;
+  // It is not a fatal error if it can't be encoded.
+  if (!F16Coder::CanEncode(value, &max_encoded_bits)) return false;
+  EXPECT_EQ(F16Coder::MaxEncodedBits(), max_encoded_bits);
+
+  BitWriter writer;
+  BitWriter::Allotment allotment(&writer,
+                                 RoundUpBitsToByteMultiple(max_encoded_bits));
+
+  EXPECT_TRUE(F16Coder::Write(value, &writer));
+  EXPECT_EQ(F16Coder::MaxEncodedBits(), writer.BitsWritten());
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  float decoded_value;
+  EXPECT_TRUE(F16Coder::Read(&reader, &decoded_value));
+  // All values we test can be represented exactly.
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+  return true;
+}
+
+TEST(FieldsTest, F16CoderTest) {
+  for (float sign : {-1.0f, 1.0f}) {
+    // (anything less than 1E-3 are subnormals)
+    for (float mag : {0.0f, 0.5f, 1.0f, 2.0f, 2.5f, 16.015625f, 1.0f / 4096,
+                      1.0f / 16384, 65504.0f}) {
+      EXPECT_TRUE(TestF16Coder(sign * mag));
+    }
+  }
+
+  // Out of range
+  EXPECT_FALSE(TestF16Coder(65504.01f));
+  EXPECT_FALSE(TestF16Coder(-65505.0f));
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripSize) {
+  for (int i = 0; i < 8; i++) {
+    SizeHeader size;
+    ASSERT_TRUE(size.Set(123 + 77 * i, 7 + i));
+
+    size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+    ASSERT_TRUE(Bundle::CanEncode(size, &extension_bits, &total_bits));
+    EXPECT_EQ(0u, extension_bits);
+
+    BitWriter writer;
+    ASSERT_TRUE(WriteSizeHeader(size, &writer, 0, nullptr));
+    EXPECT_EQ(total_bits, writer.BitsWritten());
+    writer.ZeroPadToByte();
+
+    SizeHeader size2;
+    BitReader reader(writer.GetSpan());
+    ASSERT_TRUE(ReadSizeHeader(&reader, &size2));
+    EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+    EXPECT_TRUE(reader.Close());
+
+    EXPECT_EQ(size.xsize(), size2.xsize());
+    EXPECT_EQ(size.ysize(), size2.ysize());
+  }
+}
+
+// Ensure all values can be reached by the encoding.
+TEST(FieldsTest, TestCropRect) {
+  CodecMetadata metadata;
+  for (int32_t i = -999; i < 19000; ++i) {
+    FrameHeader f(&metadata);
+    f.custom_size_or_origin = true;
+    f.frame_origin.x0 = i;
+    f.frame_origin.y0 = i;
+    f.frame_size.xsize = 1000 + i;
+    f.frame_size.ysize = 1000 + i;
+    size_t extension_bits = 0, total_bits = 0;
+    ASSERT_TRUE(Bundle::CanEncode(f, &extension_bits, &total_bits));
+    EXPECT_EQ(0u, extension_bits);
+    EXPECT_GE(total_bits, 9u);
+  }
+}
+TEST(FieldsTest, TestPreview) {
+  // (div8 cannot represent 4360, but !div8 can go a little higher)
+  for (uint32_t i = 1; i < 4360; ++i) {
+    PreviewHeader p;
+    ASSERT_TRUE(p.Set(i, i));
+    size_t extension_bits = 0, total_bits = 0;
+    ASSERT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+    EXPECT_EQ(0u, extension_bits);
+    EXPECT_GE(total_bits, 6u);
+  }
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripFrame) {
+  CodecMetadata metadata;
+  FrameHeader h(&metadata);
+  h.extensions = 0x800;
+
+  size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+  ASSERT_TRUE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+  EXPECT_EQ(0u, extension_bits);
+  BitWriter writer;
+  ASSERT_TRUE(WriteFrameHeader(h, &writer, nullptr));
+  EXPECT_EQ(total_bits, writer.BitsWritten());
+  writer.ZeroPadToByte();
+
+  FrameHeader h2(&metadata);
+  BitReader reader(writer.GetSpan());
+  ASSERT_TRUE(ReadFrameHeader(&reader, &h2));
+  EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+  EXPECT_TRUE(reader.Close());
+
+  EXPECT_EQ(h.extensions, h2.extensions);
+  EXPECT_EQ(h.flags, h2.flags);
+}
+
+#ifndef JXL_CRASH_ON_ERROR
+// Ensure out-of-bounds values cause an error.
+TEST(FieldsTest, TestOutOfRange) {
+  SizeHeader h;
+  ASSERT_TRUE(h.Set(0xFFFFFFFFull, 0xFFFFFFFFull));
+  size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+  ASSERT_FALSE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+}
+#endif
+
+struct OldBundle : public Fields {
+  OldBundle() { Bundle::Init(this); }
+  JXL_FIELDS_NAME(OldBundle)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+    return visitor->EndExtensions();
+  }
+
+  uint32_t old_small;
+  float old_f;
+  uint32_t old_large;
+  uint64_t extensions;
+};
+
+struct NewBundle : public Fields {
+  NewBundle() { Bundle::Init(this); }
+  JXL_FIELDS_NAME(NewBundle)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+    if (visitor->Conditional(extensions & 1)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(2), Bits(2), Bits(3), Bits(4), 2, &new_small));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(-2.0f, &new_f));
+    }
+    if (visitor->Conditional(extensions & 2)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Bits(9), Bits(12), Bits(16), Bits(32), 0, &new_large));
+    }
+    return visitor->EndExtensions();
+  }
+
+  uint32_t old_small;
+  float old_f;
+  uint32_t old_large;
+  uint64_t extensions;
+
+  // If extensions & 1
+  uint32_t new_small = 2;
+  float new_f = -2.0f;
+  // If extensions & 2
+  uint32_t new_large = 0;
+};
+
+TEST(FieldsTest, TestNewDecoderOldData) {
+  OldBundle old_bundle;
+  old_bundle.old_large = 123;
+  old_bundle.old_f = 3.75f;
+  old_bundle.extensions = 0;
+
+  // Write to bit stream
+  const size_t kMaxOutBytes = 999;
+  BitWriter writer;
+  // Make sure values are initialized by code under test.
+  size_t extension_bits = 12345, total_bits = 12345;
+  ASSERT_TRUE(Bundle::CanEncode(old_bundle, &extension_bits, &total_bits));
+  ASSERT_LE(total_bits, kMaxOutBytes * kBitsPerByte);
+  EXPECT_EQ(0u, extension_bits);
+  AuxOut aux_out;
+  ASSERT_TRUE(Bundle::Write(old_bundle, &writer, kLayerHeader, &aux_out));
+
+  BitWriter::Allotment allotment(&writer,
+                                 kMaxOutBytes * kBitsPerByte - total_bits);
+  writer.Write(20, 0xA55A);  // sentinel
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr);
+
+  ASSERT_LE(writer.GetSpan().size(), kMaxOutBytes);
+  BitReader reader(writer.GetSpan());
+  NewBundle new_bundle;
+  ASSERT_TRUE(Bundle::Read(&reader, &new_bundle));
+  EXPECT_EQ(reader.TotalBitsConsumed(),
+            aux_out.layers[kLayerHeader].total_bits);
+  EXPECT_EQ(reader.ReadBits(20), 0xA55Au);
+  EXPECT_TRUE(reader.Close());
+
+  // Old fields are the same in both
+  EXPECT_EQ(old_bundle.extensions, new_bundle.extensions);
+  EXPECT_EQ(old_bundle.old_small, new_bundle.old_small);
+  EXPECT_EQ(old_bundle.old_f, new_bundle.old_f);
+  EXPECT_EQ(old_bundle.old_large, new_bundle.old_large);
+  // New fields match their defaults
+  EXPECT_EQ(2u, new_bundle.new_small);
+  EXPECT_EQ(-2.0f, new_bundle.new_f);
+  EXPECT_EQ(0u, new_bundle.new_large);
+}
+
+TEST(FieldsTest, TestOldDecoderNewData) {
+  NewBundle new_bundle;
+  new_bundle.old_large = 123;
+  new_bundle.extensions = 3;
+  new_bundle.new_f = 999.0f;
+  new_bundle.new_large = 456;
+
+  // Write to bit stream
+  constexpr size_t kMaxOutBytes = 999;
+  BitWriter writer;
+  // Make sure values are initialized by code under test.
+  size_t extension_bits = 12345, total_bits = 12345;
+  ASSERT_TRUE(Bundle::CanEncode(new_bundle, &extension_bits, &total_bits));
+  EXPECT_NE(0u, extension_bits);
+  AuxOut aux_out;
+  ASSERT_TRUE(Bundle::Write(new_bundle, &writer, kLayerHeader, &aux_out));
+  ASSERT_LE(aux_out.layers[kLayerHeader].total_bits,
+            kMaxOutBytes * kBitsPerByte);
+
+  BitWriter::Allotment allotment(
+      &writer,
+      kMaxOutBytes * kBitsPerByte - aux_out.layers[kLayerHeader].total_bits);
+  // Ensure Read skips the additional fields
+  writer.Write(20, 0xA55A);  // sentinel
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  OldBundle old_bundle;
+  ASSERT_TRUE(Bundle::Read(&reader, &old_bundle));
+  EXPECT_EQ(reader.TotalBitsConsumed(),
+            aux_out.layers[kLayerHeader].total_bits);
+  EXPECT_EQ(reader.ReadBits(20), 0xA55Au);
+  EXPECT_TRUE(reader.Close());
+
+  // Old fields are the same in both
+  EXPECT_EQ(new_bundle.extensions, old_bundle.extensions);
+  EXPECT_EQ(new_bundle.old_small, old_bundle.old_small);
+  EXPECT_EQ(new_bundle.old_f, old_bundle.old_f);
+  EXPECT_EQ(new_bundle.old_large, old_bundle.old_large);
+  // (Can't check new fields because old decoder doesn't know about them)
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/frame_header.cc b/third-party/libjxl/libjxl/lib/jxl/frame_header.cc
new file mode 100644
index 0000000000..6b4b145112
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/frame_header.cc
@@ -0,0 +1,500 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/frame_header.h"
+
+#include <sstream>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+constexpr uint8_t YCbCrChromaSubsampling::kHShift[] = {0, 1, 1, 0};
+constexpr uint8_t YCbCrChromaSubsampling::kVShift[] = {0, 1, 0, 1};
+
+static Status VisitBlendMode(Visitor* JXL_RESTRICT visitor,
+                             BlendMode default_value, BlendMode* blend_mode) {
+  uint32_t encoded = static_cast<uint32_t>(*blend_mode);
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+      Val(static_cast<uint32_t>(BlendMode::kReplace)),
+      Val(static_cast<uint32_t>(BlendMode::kAdd)),
+      Val(static_cast<uint32_t>(BlendMode::kBlend)), BitsOffset(2, 3),
+      static_cast<uint32_t>(default_value), &encoded));
+  if (encoded > 4) {
+    return JXL_FAILURE("Invalid blend_mode");
+  }
+  *blend_mode = static_cast<BlendMode>(encoded);
+  return true;
+}
+
+static Status VisitFrameType(Visitor* JXL_RESTRICT visitor,
+                             FrameType default_value, FrameType* frame_type) {
+  uint32_t encoded = static_cast<uint32_t>(*frame_type);
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(static_cast<uint32_t>(FrameType::kRegularFrame)),
+                   Val(static_cast<uint32_t>(FrameType::kDCFrame)),
+                   Val(static_cast<uint32_t>(FrameType::kReferenceOnly)),
+                   Val(static_cast<uint32_t>(FrameType::kSkipProgressive)),
+                   static_cast<uint32_t>(default_value), &encoded));
+  *frame_type = static_cast<FrameType>(encoded);
+  return true;
+}
+
+BlendingInfo::BlendingInfo() { Bundle::Init(this); }
+
+Status BlendingInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(
+      VisitBlendMode(visitor, BlendMode::kReplace, &mode));
+  if (visitor->Conditional(nonserialized_num_extra_channels > 0 &&
+                           (mode == BlendMode::kBlend ||
+                            mode == BlendMode::kAlphaWeightedAdd))) {
+    // Up to 11 alpha channels for blending.
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(0), Val(1), Val(2), BitsOffset(3, 3), 0, &alpha_channel));
+    if (visitor->IsReading() &&
+        alpha_channel >= nonserialized_num_extra_channels) {
+      return JXL_FAILURE("Invalid alpha channel for blending");
+    }
+  }
+  if (visitor->Conditional((nonserialized_num_extra_channels > 0 &&
+                            (mode == BlendMode::kBlend ||
+                             mode == BlendMode::kAlphaWeightedAdd)) ||
+                           mode == BlendMode::kMul)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &clamp));
+  }
+  // 'old' frame for blending. Only necessary if this is not a full frame, or
+  // blending is not kReplace.
+  if (visitor->Conditional(mode != BlendMode::kReplace ||
+                           nonserialized_is_partial_frame)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &source));
+  }
+  return true;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string BlendingInfo::DebugString() const {
+  std::ostringstream os;
+  os << (mode == BlendMode::kReplace            ? "Replace"
+         : mode == BlendMode::kAdd              ? "Add"
+         : mode == BlendMode::kBlend            ? "Blend"
+         : mode == BlendMode::kAlphaWeightedAdd ? "AlphaWeightedAdd"
+                                                : "Mul");
+  if (nonserialized_num_extra_channels > 0 &&
+      (mode == BlendMode::kBlend || mode == BlendMode::kAlphaWeightedAdd)) {
+    os << ",alpha=" << alpha_channel << ",clamp=" << clamp;
+  } else if (mode == BlendMode::kMul) {
+    os << ",clamp=" << clamp;
+  }
+  if (mode != BlendMode::kReplace || nonserialized_is_partial_frame) {
+    os << ",source=" << source;
+  }
+  return os.str();
+}
+#endif
+
+AnimationFrame::AnimationFrame(const CodecMetadata* metadata)
+    : nonserialized_metadata(metadata) {
+  Bundle::Init(this);
+}
+Status AnimationFrame::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->Conditional(nonserialized_metadata != nullptr &&
+                           nonserialized_metadata->m.have_animation)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Bits(8), Bits(32), 0, &duration));
+  }
+
+  if (visitor->Conditional(
+          nonserialized_metadata != nullptr &&
+          nonserialized_metadata->m.animation.have_timecodes)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(32, 0, &timecode));
+  }
+  return true;
+}
+
+YCbCrChromaSubsampling::YCbCrChromaSubsampling() { Bundle::Init(this); }
+Passes::Passes() { Bundle::Init(this); }
+Status Passes::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(1), Val(2), Val(3), BitsOffset(3, 4), 1, &num_passes));
+  JXL_ASSERT(num_passes <= kMaxNumPasses);  // Cannot happen when reading
+
+  if (visitor->Conditional(num_passes != 1)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(0), Val(1), Val(2), BitsOffset(1, 3), 0, &num_downsample));
+    JXL_ASSERT(num_downsample <= 4);  // 1,2,4,8
+    if (num_downsample > num_passes) {
+      return JXL_FAILURE("num_downsample %u > num_passes %u", num_downsample,
+                         num_passes);
+    }
+
+    for (uint32_t i = 0; i < num_passes - 1; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &shift[i]));
+    }
+    shift[num_passes - 1] = 0;
+
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &downsample[i]));
+      if (i > 0 && downsample[i] >= downsample[i - 1]) {
+        return JXL_FAILURE("downsample sequence should be decreasing");
+      }
+    }
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), Val(1), Val(2), Bits(3), 0, &last_pass[i]));
+      if (i > 0 && last_pass[i] <= last_pass[i - 1]) {
+        return JXL_FAILURE("last_pass sequence should be increasing");
+      }
+      if (last_pass[i] >= num_passes) {
+        return JXL_FAILURE("last_pass %u >= num_passes %u", last_pass[i],
+                           num_passes);
+      }
+    }
+  }
+
+  return true;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string Passes::DebugString() const {
+  std::ostringstream os;
+  os << "p=" << num_passes;
+  if (num_downsample) {
+    os << ",ds=";
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      os << last_pass[i] << ":" << downsample[i];
+      if (i + 1 < num_downsample) os << ";";
+    }
+  }
+  bool have_shifts = false;
+  for (uint32_t i = 0; i < num_passes; ++i) {
+    if (shift[i]) have_shifts = true;
+  }
+  if (have_shifts) {
+    os << ",shifts=";
+    for (uint32_t i = 0; i < num_passes; ++i) {
+      os << shift[i];
+      if (i + 1 < num_passes) os << ";";
+    }
+  }
+  return os.str();
+}
+#endif
+
+FrameHeader::FrameHeader(const CodecMetadata* metadata)
+    : animation_frame(metadata), nonserialized_metadata(metadata) {
+  Bundle::Init(this);
+}
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+                       FrameHeader* JXL_RESTRICT frame) {
+  return Bundle::Read(reader, frame);
+}
+
+Status FrameHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      VisitFrameType(visitor, FrameType::kRegularFrame, &frame_type));
+  if (visitor->IsReading() && nonserialized_is_preview &&
+      frame_type != kRegularFrame) {
+    return JXL_FAILURE("Only regular frame could be a preview");
+  }
+
+  // FrameEncoding.
+  bool is_modular = (encoding == FrameEncoding::kModular);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &is_modular));
+  encoding = (is_modular ? FrameEncoding::kModular : FrameEncoding::kVarDCT);
+
+  // Flags
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U64(0, &flags));
+
+  // Color transform
+  bool xyb_encoded = nonserialized_metadata == nullptr ||
+                     nonserialized_metadata->m.xyb_encoded;
+
+  if (xyb_encoded) {
+    color_transform = ColorTransform::kXYB;
+  } else {
+    // Alternate if kYCbCr.
+    bool alternate = color_transform == ColorTransform::kYCbCr;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alternate));
+    color_transform =
+        (alternate ? ColorTransform::kYCbCr : ColorTransform::kNone);
+  }
+
+  // Chroma subsampling for YCbCr, if no DC frame is used.
+  if (visitor->Conditional(color_transform == ColorTransform::kYCbCr &&
+                           ((flags & kUseDcFrame) == 0))) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&chroma_subsampling));
+  }
+
+  size_t num_extra_channels =
+      nonserialized_metadata != nullptr
+          ? nonserialized_metadata->m.extra_channel_info.size()
+          : 0;
+
+  // Upsampling
+  if (visitor->Conditional((flags & kUseDcFrame) == 0)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &upsampling));
+    if (nonserialized_metadata != nullptr &&
+        visitor->Conditional(num_extra_channels != 0)) {
+      const std::vector<ExtraChannelInfo>& extra_channels =
+          nonserialized_metadata->m.extra_channel_info;
+      extra_channel_upsampling.resize(extra_channels.size(), 1);
+      for (size_t i = 0; i < extra_channels.size(); ++i) {
+        uint32_t dim_shift =
+            nonserialized_metadata->m.extra_channel_info[i].dim_shift;
+        uint32_t& ec_upsampling = extra_channel_upsampling[i];
+        ec_upsampling >>= dim_shift;
+        JXL_QUIET_RETURN_IF_ERROR(
+            visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &ec_upsampling));
+        ec_upsampling <<= dim_shift;
+        if (ec_upsampling < upsampling) {
+          return JXL_FAILURE(
+              "EC upsampling (%u) < color upsampling (%u), which is invalid.",
+              ec_upsampling, upsampling);
+        }
+        if (ec_upsampling > 8) {
+          return JXL_FAILURE("EC upsampling too large (%u)", ec_upsampling);
+        }
+      }
+    } else {
+      extra_channel_upsampling.clear();
+    }
+  }
+
+  // Modular- or VarDCT-specific data.
+  if (visitor->Conditional(encoding == FrameEncoding::kModular)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 1, &group_size_shift));
+  }
+  if (visitor->Conditional(encoding == FrameEncoding::kVarDCT &&
+                           color_transform == ColorTransform::kXYB)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 3, &x_qm_scale));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 2, &b_qm_scale));
+  } else {
+    x_qm_scale = b_qm_scale = 2;  // noop
+  }
+
+  // Not useful for kPatchSource
+  if (visitor->Conditional(frame_type != FrameType::kReferenceOnly)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&passes));
+  }
+
+  if (visitor->Conditional(frame_type == FrameType::kDCFrame)) {
+    // Up to 4 pyramid levels - for up to 16384x downsampling.
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &dc_level));
+  }
+  if (frame_type != FrameType::kDCFrame) {
+    dc_level = 0;
+  }
+
+  bool is_partial_frame = false;
+  if (visitor->Conditional(frame_type != FrameType::kDCFrame)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &custom_size_or_origin));
+    if (visitor->Conditional(custom_size_or_origin)) {
+      const U32Enc enc(Bits(8), BitsOffset(11, 256), BitsOffset(14, 2304),
+                       BitsOffset(30, 18688));
+      // Frame offset, only if kRegularFrame or kSkipProgressive.
+      if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+                               frame_type == FrameType::kSkipProgressive)) {
+        uint32_t ux0 = PackSigned(frame_origin.x0);
+        uint32_t uy0 = PackSigned(frame_origin.y0);
+        JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &ux0));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &uy0));
+        frame_origin.x0 = UnpackSigned(ux0);
+        frame_origin.y0 = UnpackSigned(uy0);
+      }
+      // Frame size
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.xsize));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.ysize));
+      if (custom_size_or_origin &&
+          (frame_size.xsize == 0 || frame_size.ysize == 0)) {
+        return JXL_FAILURE(
+            "Invalid crop dimensions for frame: zero width or height");
+      }
+      int32_t image_xsize = default_xsize();
+      int32_t image_ysize = default_ysize();
+      if (frame_type == FrameType::kRegularFrame ||
+          frame_type == FrameType::kSkipProgressive) {
+        is_partial_frame |= frame_origin.x0 > 0;
+        is_partial_frame |= frame_origin.y0 > 0;
+        is_partial_frame |= (static_cast<int32_t>(frame_size.xsize) +
+                             frame_origin.x0) < image_xsize;
+        is_partial_frame |= (static_cast<int32_t>(frame_size.ysize) +
+                             frame_origin.y0) < image_ysize;
+      }
+    }
+  }
+
+  // Blending info, animation info and whether this is the last frame or not.
+  if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+                           frame_type == FrameType::kSkipProgressive)) {
+    blending_info.nonserialized_num_extra_channels = num_extra_channels;
+    blending_info.nonserialized_is_partial_frame = is_partial_frame;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blending_info));
+    bool replace_all = (blending_info.mode == BlendMode::kReplace);
+    extra_channel_blending_info.resize(num_extra_channels);
+    for (size_t i = 0; i < num_extra_channels; i++) {
+      auto& ec_blending_info = extra_channel_blending_info[i];
+      ec_blending_info.nonserialized_is_partial_frame = is_partial_frame;
+      ec_blending_info.nonserialized_num_extra_channels = num_extra_channels;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&ec_blending_info));
+      replace_all &= (ec_blending_info.mode == BlendMode::kReplace);
+    }
+    if (visitor->IsReading() && nonserialized_is_preview) {
+      if (!replace_all || custom_size_or_origin) {
+        return JXL_FAILURE("Preview is not compatible with blending");
+      }
+    }
+    if (visitor->Conditional(nonserialized_metadata != nullptr &&
+                             nonserialized_metadata->m.have_animation)) {
+      animation_frame.nonserialized_metadata = nonserialized_metadata;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation_frame));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &is_last));
+  }
+  if (frame_type != FrameType::kRegularFrame) {
+    is_last = false;
+  }
+
+  // ID of that can be used to refer to this frame. 0 for a non-zero-duration
+  // frame means that it will not be referenced. Not necessary for the last
+  // frame.
+  if (visitor->Conditional(frame_type != kDCFrame && !is_last)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &save_as_reference));
+  }
+
+  // If this frame is not blended on another frame post-color-transform, it may
+  // be stored for being referenced either before or after the color transform.
+  // If it is blended post-color-transform, it must be blended after. It must
+  // also be blended after if this is a kRegular frame that does not cover the
+  // full frame, as samples outside the partial region are from a
+  // post-color-transform frame.
+  if (frame_type != FrameType::kDCFrame) {
+    if (visitor->Conditional(CanBeReferenced() &&
+                             blending_info.mode == BlendMode::kReplace &&
+                             !is_partial_frame &&
+                             (frame_type == FrameType::kRegularFrame ||
+                              frame_type == FrameType::kSkipProgressive))) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bool(false, &save_before_color_transform));
+    } else if (visitor->Conditional(frame_type == FrameType::kReferenceOnly)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bool(true, &save_before_color_transform));
+      if (!save_before_color_transform &&
+          (frame_size.xsize < nonserialized_metadata->xsize() ||
+           frame_size.ysize < nonserialized_metadata->ysize() ||
+           frame_origin.x0 != 0 || frame_origin.y0 != 0)) {
+        return JXL_FAILURE(
+            "non-patch reference frame with invalid crop: %" PRIuS "x%" PRIuS
+            "%+d%+d",
+            static_cast<size_t>(frame_size.xsize),
+            static_cast<size_t>(frame_size.ysize),
+            static_cast<int>(frame_origin.x0),
+            static_cast<int>(frame_origin.y0));
+      }
+    }
+  } else {
+    save_before_color_transform = true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+  loop_filter.nonserialized_is_modular = is_modular;
+  JXL_RETURN_IF_ERROR(visitor->VisitNested(&loop_filter));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string FrameHeader::DebugString() const {
+  std::ostringstream os;
+  os << (encoding == FrameEncoding::kVarDCT ? "VarDCT" : "Modular");
+  os << ",";
+  os << (frame_type == FrameType::kRegularFrame    ? "Regular"
+         : frame_type == FrameType::kDCFrame       ? "DC"
+         : frame_type == FrameType::kReferenceOnly ? "Reference"
+                                                   : "SkipProgressive");
+  if (frame_type == FrameType::kDCFrame) {
+    os << "(lv" << dc_level << ")";
+  }
+
+  if (flags) {
+    os << ",";
+    uint32_t remaining = flags;
+
+#define TEST_FLAG(name)           \
+  if (flags & Flags::k##name) {   \
+    remaining &= ~Flags::k##name; \
+    os << #name;                  \
+    if (remaining) os << "|";     \
+  }
+    TEST_FLAG(Noise);
+    TEST_FLAG(Patches);
+    TEST_FLAG(Splines);
+    TEST_FLAG(UseDcFrame);
+    TEST_FLAG(SkipAdaptiveDCSmoothing);
+#undef TEST_FLAG
+  }
+
+  os << ",";
+  os << (color_transform == ColorTransform::kXYB     ? "XYB"
+         : color_transform == ColorTransform::kYCbCr ? "YCbCr"
+                                                     : "None");
+
+  if (encoding == FrameEncoding::kModular) {
+    os << ",shift=" << group_size_shift;
+  } else if (color_transform == ColorTransform::kXYB) {
+    os << ",qm=" << x_qm_scale << ";" << b_qm_scale;
+  }
+  if (frame_type != FrameType::kReferenceOnly) {
+    os << "," << passes.DebugString();
+  }
+  if (custom_size_or_origin) {
+    os << ",xs=" << frame_size.xsize;
+    os << ",ys=" << frame_size.ysize;
+    if (frame_type == FrameType::kRegularFrame ||
+        frame_type == FrameType::kSkipProgressive) {
+      os << ",x0=" << frame_origin.x0;
+      os << ",y0=" << frame_origin.y0;
+    }
+  }
+  if (upsampling > 1) os << ",up=" << upsampling;
+  if (loop_filter.gab) os << ",Gaborish";
+  if (loop_filter.epf_iters > 0) os << ",epf=" << loop_filter.epf_iters;
+  if (animation_frame.duration > 0) os << ",dur=" << animation_frame.duration;
+  if (frame_type == FrameType::kRegularFrame ||
+      frame_type == FrameType::kSkipProgressive) {
+    os << ",";
+    os << blending_info.DebugString();
+    for (size_t i = 0; i < extra_channel_blending_info.size(); ++i) {
+      os << (i == 0 ? "[" : ";");
+      os << extra_channel_blending_info[i].DebugString();
+      if (i + 1 == extra_channel_blending_info.size()) os << "]";
+    }
+  }
+  if (save_as_reference > 0) os << ",ref=" << save_as_reference;
+  os << "," << (save_before_color_transform ? "before" : "after") << "_ct";
+  if (is_last) os << ",last";
+  return os.str();
+}
+#endif
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/frame_header.h b/third-party/libjxl/libjxl/lib/jxl/frame_header.h
new file mode 100644
index 0000000000..5580bcd6fe
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/frame_header.h
@@ -0,0 +1,503 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FRAME_HEADER_H_
+#define LIB_JXL_FRAME_HEADER_H_
+
+// Frame header with backward and forward-compatible extension capability and
+// compressed integer fields.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/loop_filter.h"
+
+namespace jxl {
+
+// TODO(eustas): move to proper place?
+// Also used by extra channel names.
+static inline Status VisitNameString(Visitor* JXL_RESTRICT visitor,
+                                     std::string* name) {
+  uint32_t name_length = static_cast<uint32_t>(name->length());
+  // Allows layer name lengths up to 1071 bytes
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Bits(4), BitsOffset(5, 16),
+                                         BitsOffset(10, 48), 0, &name_length));
+  if (visitor->IsReading()) {
+    name->resize(name_length);
+  }
+  for (size_t i = 0; i < name_length; i++) {
+    uint32_t c = static_cast<uint8_t>((*name)[i]);
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(8, 0, &c));
+    (*name)[i] = static_cast<char>(c);
+  }
+  return true;
+}
+
+enum class FrameEncoding : uint32_t {
+  kVarDCT,
+  kModular,
+};
+
+enum class ColorTransform : uint32_t {
+  kXYB,    // Values are encoded with XYB. May only be used if
+           // ImageBundle::xyb_encoded.
+  kNone,   // Values are encoded according to the attached color profile. May
+           // only be used if !ImageBundle::xyb_encoded.
+  kYCbCr,  // Values are encoded according to the attached color profile, but
+           // transformed to YCbCr. May only be used if
+           // !ImageBundle::xyb_encoded.
+};
+
+inline std::array<int, 3> JpegOrder(ColorTransform ct, bool is_gray) {
+  if (is_gray) {
+    return {{0, 0, 0}};
+  }
+  JXL_ASSERT(ct != ColorTransform::kXYB);
+  if (ct == ColorTransform::kYCbCr) {
+    return {{1, 0, 2}};
+  } else {
+    return {{0, 1, 2}};
+  }
+}
+
+struct YCbCrChromaSubsampling : public Fields {
+  YCbCrChromaSubsampling();
+  JXL_FIELDS_NAME(YCbCrChromaSubsampling)
+  size_t HShift(size_t c) const { return maxhs_ - kHShift[channel_mode_[c]]; }
+  size_t VShift(size_t c) const { return maxvs_ - kVShift[channel_mode_[c]]; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    // TODO(veluca): consider allowing 4x downsamples
+    for (size_t i = 0; i < 3; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &channel_mode_[i]));
+    }
+    Recompute();
+    return true;
+  }
+
+  uint8_t MaxHShift() const { return maxhs_; }
+  uint8_t MaxVShift() const { return maxvs_; }
+
+  uint8_t RawHShift(size_t c) const { return kHShift[channel_mode_[c]]; }
+  uint8_t RawVShift(size_t c) const { return kVShift[channel_mode_[c]]; }
+
+  // Uses JPEG channel order (Y, Cb, Cr).
+  Status Set(const uint8_t* hsample, const uint8_t* vsample) {
+    for (size_t c = 0; c < 3; c++) {
+      size_t cjpeg = c < 2 ? c ^ 1 : c;
+      size_t i = 0;
+      for (; i < 4; i++) {
+        if (1 << kHShift[i] == hsample[cjpeg] &&
+            1 << kVShift[i] == vsample[cjpeg]) {
+          channel_mode_[c] = i;
+          break;
+        }
+      }
+      if (i == 4) {
+        return JXL_FAILURE("Invalid subsample mode");
+      }
+    }
+    Recompute();
+    return true;
+  }
+
+  bool Is444() const {
+    return HShift(0) == 0 && VShift(0) == 0 &&  // Cb
+           HShift(2) == 0 && VShift(2) == 0 &&  // Cr
+           HShift(1) == 0 && VShift(1) == 0;    // Y
+  }
+
+  bool Is420() const {
+    return HShift(0) == 1 && VShift(0) == 1 &&  // Cb
+           HShift(2) == 1 && VShift(2) == 1 &&  // Cr
+           HShift(1) == 0 && VShift(1) == 0;    // Y
+  }
+
+  bool Is422() const {
+    return HShift(0) == 1 && VShift(0) == 0 &&  // Cb
+           HShift(2) == 1 && VShift(2) == 0 &&  // Cr
+           HShift(1) == 0 && VShift(1) == 0;    // Y
+  }
+
+  bool Is440() const {
+    return HShift(0) == 0 && VShift(0) == 1 &&  // Cb
+           HShift(2) == 0 && VShift(2) == 1 &&  // Cr
+           HShift(1) == 0 && VShift(1) == 0;    // Y
+  }
+
+  std::string DebugString() const {
+    if (Is444()) return "444";
+    if (Is420()) return "420";
+    if (Is422()) return "422";
+    if (Is440()) return "440";
+    return "cs" + std::to_string(channel_mode_[0]) +
+           std::to_string(channel_mode_[1]) + std::to_string(channel_mode_[2]);
+  }
+
+ private:
+  void Recompute() {
+    maxhs_ = 0;
+    maxvs_ = 0;
+    for (size_t i = 0; i < 3; i++) {
+      maxhs_ = std::max(maxhs_, kHShift[channel_mode_[i]]);
+      maxvs_ = std::max(maxvs_, kVShift[channel_mode_[i]]);
+    }
+  }
+  static const uint8_t kHShift[4];
+  static const uint8_t kVShift[4];
+  uint32_t channel_mode_[3];
+  uint8_t maxhs_;
+  uint8_t maxvs_;
+};
+
+// Indicates how to combine the current frame with a previously-saved one. Can
+// be independently controlled for color and extra channels. Formulas are
+// indicative and treat alpha as if it is in range 0.0-1.0. In descriptions
+// below, alpha channel is the extra channel of type alpha used for blending
+// according to the blend_channel, or fully opaque if there is no alpha channel.
+// The blending specified here is used for performing blending *after* color
+// transforms - in linear sRGB if blending a XYB-encoded frame on another
+// XYB-encoded frame, in sRGB if blending a frame with kColorSpace == kSRGB, or
+// in the original colorspace otherwise. Blending in XYB or YCbCr is done by
+// using patches.
+enum class BlendMode {
+  // The new values (in the crop) replace the old ones: sample = new
+  kReplace = 0,
+  // The new values (in the crop) get added to the old ones: sample = old + new
+  kAdd = 1,
+  // The new values (in the crop) replace the old ones if alpha>0:
+  // For the alpha channel that is used as source:
+  // alpha = old + new * (1 - old)
+  // For other channels if !alpha_associated:
+  // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+  // For other channels if alpha_associated:
+  // sample = (1 - new_alpha) * old + new
+  // The alpha formula applies to the alpha used for the division in the other
+  // channels formula, and applies to the alpha channel itself if its
+  // blend_channel value matches itself.
+  kBlend = 2,
+  // The new values (in the crop) are added to the old ones if alpha>0:
+  // For the alpha channel that is used as source:
+  // sample = sample = old + new * (1 - old)
+  // For other channels: sample = old + alpha * new
+  kAlphaWeightedAdd = 3,
+  // The new values (in the crop) get multiplied by the old ones:
+  // sample = old * new
+  // The range of the new value matters for multiplication purposes, and its
+  // nominal range of 0..1 is computed the same way as this is done for the
+  // alpha values in kBlend and kAlphaWeightedAdd.
+  // If using kMul as a blend mode for color channels, no color transform is
+  // performed on the current frame.
+  kMul = 4,
+};
+
+struct BlendingInfo : public Fields {
+  BlendingInfo();
+  JXL_FIELDS_NAME(BlendingInfo)
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+  BlendMode mode;
+  // Which extra channel to use as alpha channel for blending, only encoded
+  // for blend modes that involve alpha and if there are more than 1 extra
+  // channels.
+  uint32_t alpha_channel;
+  // Clamp alpha or channel values to 0-1 range.
+  bool clamp;
+  // Frame ID to copy from (0-3). Only encoded if blend_mode is not kReplace.
+  uint32_t source;
+
+  std::string DebugString() const;
+
+  size_t nonserialized_num_extra_channels = 0;
+  bool nonserialized_is_partial_frame = false;
+};
+
+// Origin of the current frame. Not present for frames of type
+// kOnlyPatches.
+struct FrameOrigin {
+  int32_t x0, y0;  // can be negative.
+};
+
+// Size of the current frame.
+struct FrameSize {
+  uint32_t xsize, ysize;
+};
+
+// AnimationFrame defines duration of animation frames.
+struct AnimationFrame : public Fields {
+  explicit AnimationFrame(const CodecMetadata* metadata);
+  JXL_FIELDS_NAME(AnimationFrame)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // How long to wait [in ticks, see Animation{}] after rendering.
+  // May be 0 if the current frame serves as a foundation for another frame.
+  uint32_t duration;
+
+  uint32_t timecode;  // 0xHHMMSSFF
+
+  // Must be set to the one ImageMetadata acting as the full codestream header,
+  // with correct xyb_encoded, list of extra channels, etc...
+  const CodecMetadata* nonserialized_metadata = nullptr;
+};
+
+// For decoding to lower resolutions. Only used for kRegular frames.
+struct Passes : public Fields {
+  Passes();
+  JXL_FIELDS_NAME(Passes)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  void GetDownsamplingBracket(size_t pass, int& minShift, int& maxShift) const {
+    maxShift = 2;
+    minShift = 3;
+    for (size_t i = 0;; i++) {
+      for (uint32_t j = 0; j < num_downsample; ++j) {
+        if (i == last_pass[j]) {
+          if (downsample[j] == 8) minShift = 3;
+          if (downsample[j] == 4) minShift = 2;
+          if (downsample[j] == 2) minShift = 1;
+          if (downsample[j] == 1) minShift = 0;
+        }
+      }
+      if (i == num_passes - 1) minShift = 0;
+      if (i == pass) return;
+      maxShift = minShift - 1;
+    }
+  }
+
+  uint32_t GetDownsamplingTargetForCompletedPasses(uint32_t num_p) const {
+    if (num_p >= num_passes) return 1;
+    uint32_t retval = 8;
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      if (num_p > last_pass[i]) {
+        retval = std::min(retval, downsample[i]);
+      }
+    }
+    return retval;
+  }
+
+  std::string DebugString() const;
+
+  uint32_t num_passes;      // <= kMaxNumPasses
+  uint32_t num_downsample;  // <= num_passes
+
+  // Array of num_downsample pairs. downsample=1/last_pass=num_passes-1 and
+  // downsample=8/last_pass=0 need not be specified; they are implicit.
+  uint32_t downsample[kMaxNumPasses];
+  uint32_t last_pass[kMaxNumPasses];
+  // Array of shift values for each pass. It is implicitly assumed to be 0 for
+  // the last pass.
+  uint32_t shift[kMaxNumPasses];
+};
+
+enum FrameType {
+  // A "regular" frame: might be a crop, and will be blended on a previous
+  // frame, if any, and displayed or blended in future frames.
+  kRegularFrame = 0,
+  // A DC frame: this frame is downsampled and will be *only* used as the DC of
+  // a future frame and, possibly, for previews. Cannot be cropped, blended, or
+  // referenced by patches or blending modes. Frames that *use* a DC frame
+  // cannot have non-default sizes either.
+  kDCFrame = 1,
+  // A PatchesSource frame: this frame will be only used as a source frame for
+  // taking patches. Can be cropped, but cannot have non-(0, 0) x0 and y0.
+  kReferenceOnly = 2,
+  // Same as kRegularFrame, but not used for progressive rendering. This also
+  // implies no early display of DC.
+  kSkipProgressive = 3,
+};
+
+// Image/frame := one of more of these, where the last has is_last = true.
+// Starts at a byte-aligned address "a"; the next pass starts at "a + size".
+struct FrameHeader : public Fields {
+  // Optional postprocessing steps. These flags are the source of truth;
+  // Override must set/clear them rather than change their meaning. Values
+  // chosen such that typical flags == 0 (encoded in only two bits).
+  enum Flags {
+    // Often but not always off => low bit value:
+
+    // Inject noise into decoded output.
+    kNoise = 1,
+
+    // Overlay patches.
+    kPatches = 2,
+
+    // 4, 8 = reserved for future sometimes-off
+
+    // Overlay splines.
+    kSplines = 16,
+
+    kUseDcFrame = 32,  // Implies kSkipAdaptiveDCSmoothing.
+
+    // 64 = reserved for future often-off
+
+    // Almost always on => negated:
+
+    kSkipAdaptiveDCSmoothing = 128,
+  };
+
+  explicit FrameHeader(const CodecMetadata* metadata);
+  JXL_FIELDS_NAME(FrameHeader)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Sets/clears `flag` based upon `condition`.
+  void UpdateFlag(const bool condition, const uint64_t flag) {
+    if (condition) {
+      flags |= flag;
+    } else {
+      flags &= ~flag;
+    }
+  }
+
+  // Returns true if this frame is supposed to be saved for future usage by
+  // other frames.
+  bool CanBeReferenced() const {
+    // DC frames cannot be referenced. The last frame cannot be referenced. A
+    // duration 0 frame makes little sense if it is not referenced. A
+    // non-duration 0 frame may or may not be referenced.
+    return !is_last && frame_type != FrameType::kDCFrame &&
+           (animation_frame.duration == 0 || save_as_reference != 0);
+  }
+
+  mutable bool all_default;
+
+  // Always present
+  FrameEncoding encoding;
+  // Some versions of UBSAN complain in VisitFrameType if not initialized.
+  FrameType frame_type = FrameType::kRegularFrame;
+
+  uint64_t flags;
+
+  ColorTransform color_transform;
+  YCbCrChromaSubsampling chroma_subsampling;
+
+  uint32_t group_size_shift;  // only if encoding == kModular;
+
+  uint32_t x_qm_scale;  // only if VarDCT and color_transform == kXYB
+  uint32_t b_qm_scale;  // only if VarDCT and color_transform == kXYB
+
+  std::string name;
+
+  // Skipped for kReferenceOnly.
+  Passes passes;
+
+  // Skipped for kDCFrame
+  bool custom_size_or_origin;
+  FrameSize frame_size;
+
+  // upsampling factors for color and extra channels.
+  // Upsampling is always performed before applying any inverse color transform.
+  // Skipped (1) if kUseDCFrame
+  uint32_t upsampling;
+  std::vector<uint32_t> extra_channel_upsampling;
+
+  // Only for kRegular frames.
+  FrameOrigin frame_origin;
+
+  BlendingInfo blending_info;
+  std::vector<BlendingInfo> extra_channel_blending_info;
+
+  // Animation info for this frame.
+  AnimationFrame animation_frame;
+
+  // This is the last frame.
+  bool is_last;
+
+  // ID to refer to this frame with. 0-3, not present if kDCFrame.
+  // 0 has a special meaning for kRegular frames of nonzero duration: it defines
+  // a frame that will not be referenced in the future.
+  uint32_t save_as_reference;
+
+  // Whether to save this frame before or after the color transform. A frame
+  // that is saved before the color tansform can only be used for blending
+  // through patches. On the contrary, a frame that is saved after the color
+  // transform can only be used for blending through blending modes.
+  // Irrelevant for extra channel blending. Can only be true if
+  // blending_info.mode == kReplace and this is not a partial kRegularFrame; if
+  // this is a DC frame, it is always true.
+  bool save_before_color_transform;
+
+  uint32_t dc_level;  // 1-4 if kDCFrame (0 otherwise).
+
+  // Must be set to the one ImageMetadata acting as the full codestream header,
+  // with correct xyb_encoded, list of extra channels, etc...
+  const CodecMetadata* nonserialized_metadata = nullptr;
+
+  // NOTE: This is ignored by AllDefault.
+  LoopFilter loop_filter;
+
+  bool nonserialized_is_preview = false;
+
+  size_t default_xsize() const {
+    if (!nonserialized_metadata) return 0;
+    if (nonserialized_is_preview) {
+      return nonserialized_metadata->m.preview_size.xsize();
+    }
+    return nonserialized_metadata->xsize();
+  }
+
+  size_t default_ysize() const {
+    if (!nonserialized_metadata) return 0;
+    if (nonserialized_is_preview) {
+      return nonserialized_metadata->m.preview_size.ysize();
+    }
+    return nonserialized_metadata->ysize();
+  }
+
+  FrameDimensions ToFrameDimensions() const {
+    size_t xsize = default_xsize();
+    size_t ysize = default_ysize();
+
+    xsize = frame_size.xsize ? frame_size.xsize : xsize;
+    ysize = frame_size.ysize ? frame_size.ysize : ysize;
+
+    if (dc_level != 0) {
+      xsize = DivCeil(xsize, 1 << (3 * dc_level));
+      ysize = DivCeil(ysize, 1 << (3 * dc_level));
+    }
+
+    FrameDimensions frame_dim;
+    frame_dim.Set(xsize, ysize, group_size_shift,
+                  chroma_subsampling.MaxHShift(),
+                  chroma_subsampling.MaxVShift(),
+                  encoding == FrameEncoding::kModular, upsampling);
+    return frame_dim;
+  }
+
+  // True if a color transform should be applied to this frame.
+  bool needs_color_transform() const {
+    return !save_before_color_transform ||
+           frame_type == FrameType::kRegularFrame ||
+           frame_type == FrameType::kSkipProgressive;
+  }
+
+  std::string DebugString() const;
+
+  uint64_t extensions;
+};
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+                       FrameHeader* JXL_RESTRICT frame);
+
+// Shared by enc/dec. 5F and 13 are by far the most common for d1/2/4/8, 0
+// ensures low overhead for small images.
+static constexpr U32Enc kOrderEnc =
+    U32Enc(Val(0x5F), Val(0x13), Val(0), Bits(kNumOrders));
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FRAME_HEADER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc b/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc
new file mode 100644
index 0000000000..131ec4fa83
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <algorithm>
+
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(GammaCorrectTest, TestLinearToSrgbEdgeCases) {
+  EXPECT_EQ(0, LinearToSrgb8Direct(0.0));
+  EXPECT_NEAR(0, LinearToSrgb8Direct(1E-6f), 2E-5);
+  EXPECT_EQ(0, LinearToSrgb8Direct(-1E-6f));
+  EXPECT_EQ(0, LinearToSrgb8Direct(-1E6));
+  EXPECT_NEAR(1, LinearToSrgb8Direct(1 - 1E-6f), 1E-5);
+  EXPECT_EQ(1, LinearToSrgb8Direct(1 + 1E-6f));
+  EXPECT_EQ(1, LinearToSrgb8Direct(1E6));
+}
+
+TEST(GammaCorrectTest, TestRoundTrip) {
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (double linear = 0.0; linear <= 1.0; linear += 1E-7) {
+    const double srgb = LinearToSrgb8Direct(linear);
+    const double linear2 = Srgb8ToLinearDirect(srgb);
+    ASSERT_LT(std::abs(linear - linear2), 2E-13)
+        << "linear = " << linear << ", linear2 = " << linear2;
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc
new file mode 100644
index 0000000000..d9bc297d45
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc
@@ -0,0 +1,619 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/gauss_blur.cc"
+#include <hwy/cache_control.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/matrix_ops.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Broadcast;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulSub;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::ShiftLeftLanes;
+#endif
+using hwy::HWY_NAMESPACE::Vec;
+
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out) {
+  // Although the current output depends on the previous output, we can unroll
+  // up to 4x by precomputing up to fourth powers of the constants. Beyond that,
+  // numerical precision might become a problem. Macro because this is tested
+  // in #if alongside HWY_TARGET.
+#define JXL_GAUSS_MAX_LANES 4
+  using D = HWY_CAPPED(float, JXL_GAUSS_MAX_LANES);
+  using V = Vec<D>;
+  const D d;
+  const V mul_in_1 = Load(d, rg->mul_in + 0 * 4);
+  const V mul_in_3 = Load(d, rg->mul_in + 1 * 4);
+  const V mul_in_5 = Load(d, rg->mul_in + 2 * 4);
+  const V mul_prev_1 = Load(d, rg->mul_prev + 0 * 4);
+  const V mul_prev_3 = Load(d, rg->mul_prev + 1 * 4);
+  const V mul_prev_5 = Load(d, rg->mul_prev + 2 * 4);
+  const V mul_prev2_1 = Load(d, rg->mul_prev2 + 0 * 4);
+  const V mul_prev2_3 = Load(d, rg->mul_prev2 + 1 * 4);
+  const V mul_prev2_5 = Load(d, rg->mul_prev2 + 2 * 4);
+  V prev_1 = Zero(d);
+  V prev_3 = Zero(d);
+  V prev_5 = Zero(d);
+  V prev2_1 = Zero(d);
+  V prev2_3 = Zero(d);
+  V prev2_5 = Zero(d);
+
+  const intptr_t N = rg->radius;
+
+  intptr_t n = -N + 1;
+  // Left side with bounds checks and only write output after n >= 0.
+  const intptr_t first_aligned = RoundUpTo(N + 1, Lanes(d));
+  for (; n < std::min(first_aligned, width); ++n) {
+    const intptr_t left = n - N - 1;
+    const intptr_t right = n + N - 1;
+    const float left_val = left >= 0 ? in[left] : 0.0f;
+    const float right_val = right < width ? in[right] : 0.0f;
+    const V sum = Set(d, left_val + right_val);
+
+    // (Only processing a single lane here, no need to broadcast)
+    V out_1 = Mul(sum, mul_in_1);
+    V out_3 = Mul(sum, mul_in_3);
+    V out_5 = Mul(sum, mul_in_5);
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+
+    if (n >= 0) {
+      out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
+    }
+  }
+
+  // The above loop is effectively scalar but it is convenient to use the same
+  // prev/prev2 variables, so broadcast to each lane before the unrolled loop.
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES > 1
+  prev2_1 = Broadcast<0>(prev2_1);
+  prev2_3 = Broadcast<0>(prev2_3);
+  prev2_5 = Broadcast<0>(prev2_5);
+  prev_1 = Broadcast<0>(prev_1);
+  prev_3 = Broadcast<0>(prev_3);
+  prev_5 = Broadcast<0>(prev_5);
+#endif
+
+  // Unrolled, no bounds checking needed.
+  for (; n < width - N + 1 - (JXL_GAUSS_MAX_LANES - 1); n += Lanes(d)) {
+    const V sum = Add(LoadU(d, in + n - N - 1), LoadU(d, in + n + N - 1));
+
+    // To get a vector of output(s), we multiply broadcasted vectors (of each
+    // input plus the two previous outputs) and add them all together.
+    // Incremental broadcasting and shifting is expected to be cheaper than
+    // horizontal adds or transposing 4x4 values because they run on a different
+    // port, concurrently with the FMA.
+    const V in0 = Broadcast<0>(sum);
+    V out_1 = Mul(in0, mul_in_1);
+    V out_3 = Mul(in0, mul_in_3);
+    V out_5 = Mul(in0, mul_in_5);
+
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES >= 2
+    const V in1 = Broadcast<1>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<1>(mul_in_1), in1, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<1>(mul_in_3), in1, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<1>(mul_in_5), in1, out_5);
+
+#if JXL_GAUSS_MAX_LANES >= 4
+    const V in2 = Broadcast<2>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<2>(mul_in_1), in2, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<2>(mul_in_3), in2, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<2>(mul_in_5), in2, out_5);
+
+    const V in3 = Broadcast<3>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<3>(mul_in_1), in3, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<3>(mul_in_3), in3, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<3>(mul_in_5), in3, out_5);
+#endif
+#endif
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+#if HWY_TARGET == HWY_SCALAR || JXL_GAUSS_MAX_LANES == 1
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+#else
+    prev2_1 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_1);
+    prev2_3 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_3);
+    prev2_5 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_5);
+    prev_1 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_1);
+    prev_3 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_3);
+    prev_5 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_5);
+#endif
+
+    Store(Add(out_1, Add(out_3, out_5)), d, out + n);
+  }
+
+  // Remainder handling with bounds checks
+  for (; n < width; ++n) {
+    const intptr_t left = n - N - 1;
+    const intptr_t right = n + N - 1;
+    const float left_val = left >= 0 ? in[left] : 0.0f;
+    const float right_val = right < width ? in[right] : 0.0f;
+    const V sum = Set(d, left_val + right_val);
+
+    // (Only processing a single lane here, no need to broadcast)
+    V out_1 = Mul(sum, mul_in_1);
+    V out_3 = Mul(sum, mul_in_3);
+    V out_5 = Mul(sum, mul_in_5);
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+
+    out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
+  }
+}
+
+// Ring buffer is for n, n-1, n-2; round up to 4 for faster modulo.
+constexpr size_t kMod = 4;
+
+// Avoids an unnecessary store during warmup.
+struct OutputNone {
+  template <class V>
+  void operator()(const V& /*unused*/, float* JXL_RESTRICT /*pos*/,
+                  ptrdiff_t /*offset*/) const {}
+};
+
+// Common case: write output vectors in all VerticalBlock except warmup.
+struct OutputStore {
+  template <class V>
+  void operator()(const V& out, float* JXL_RESTRICT pos,
+                  ptrdiff_t offset) const {
+    // Stream helps for large images but is slower for images that fit in cache.
+    Store(out, HWY_FULL(float)(), pos + offset);
+  }
+};
+
+// At top/bottom borders, we don't have two inputs to load, so avoid addition.
+// pos may even point to all zeros if the row is outside the input image.
+class SingleInput {
+ public:
+  explicit SingleInput(const float* pos) : pos_(pos) {}
+  Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+    return Load(HWY_FULL(float)(), pos_ + offset);
+  }
+  const float* pos_;
+};
+
+// In the middle of the image, we need to load from a row above and below, and
+// return the sum.
+class TwoInputs {
+ public:
+  TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {}
+  Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+    const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset);
+    const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset);
+    return Add(in1, in2);
+  }
+
+ private:
+  const float* pos1_;
+  const float* pos2_;
+};
+
+// Block := kVectors consecutive full vectors (one cache line except on the
+// right boundary, where we can only rely on having one vector). Unrolling to
+// the cache line size improves cache utilization.
+template <size_t kVectors, class V, class Input, class Output>
+void VerticalBlock(const V& d1_1, const V& d1_3, const V& d1_5, const V& n2_1,
+                   const V& n2_3, const V& n2_5, const Input& input,
+                   size_t& ctr, float* ring_buffer, const Output output,
+                   float* JXL_RESTRICT out_pos) {
+  const HWY_FULL(float) d;
+  constexpr size_t kVN = MaxLanes(d);
+  // More cache-friendly to process an entirely cache line at a time
+  constexpr size_t kLanes = kVectors * kVN;
+
+  float* JXL_RESTRICT y_1 = ring_buffer + 0 * kLanes * kMod;
+  float* JXL_RESTRICT y_3 = ring_buffer + 1 * kLanes * kMod;
+  float* JXL_RESTRICT y_5 = ring_buffer + 2 * kLanes * kMod;
+
+  const size_t n_0 = (++ctr) % kMod;
+  const size_t n_1 = (ctr - 1) % kMod;
+  const size_t n_2 = (ctr - 2) % kMod;
+
+  for (size_t idx_vec = 0; idx_vec < kVectors; ++idx_vec) {
+    const V sum = input(idx_vec * kVN);
+
+    const V y_n1_1 = Load(d, y_1 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n1_3 = Load(d, y_3 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n1_5 = Load(d, y_5 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n2_1 = Load(d, y_1 + kLanes * n_2 + idx_vec * kVN);
+    const V y_n2_3 = Load(d, y_3 + kLanes * n_2 + idx_vec * kVN);
+    const V y_n2_5 = Load(d, y_5 + kLanes * n_2 + idx_vec * kVN);
+    // (35)
+    const V y1 = MulAdd(n2_1, sum, NegMulSub(d1_1, y_n1_1, y_n2_1));
+    const V y3 = MulAdd(n2_3, sum, NegMulSub(d1_3, y_n1_3, y_n2_3));
+    const V y5 = MulAdd(n2_5, sum, NegMulSub(d1_5, y_n1_5, y_n2_5));
+    Store(y1, d, y_1 + kLanes * n_0 + idx_vec * kVN);
+    Store(y3, d, y_3 + kLanes * n_0 + idx_vec * kVN);
+    Store(y5, d, y_5 + kLanes * n_0 + idx_vec * kVN);
+    output(Add(y1, Add(y3, y5)), out_pos, idx_vec * kVN);
+  }
+  // NOTE: flushing cache line out_pos hurts performance - less so with
+  // clflushopt than clflush but still a significant slowdown.
+}
+
+// Reads/writes one block (kVectors full vectors) in each row.
+template <size_t kVectors>
+void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                   const ImageF& in, const size_t x, ImageF* JXL_RESTRICT out) {
+  // We're iterating vertically, so use multiple full-length vectors (each lane
+  // is one column of row n).
+  using D = HWY_FULL(float);
+  using V = Vec<D>;
+  const D d;
+  constexpr size_t kVN = MaxLanes(d);
+  // More cache-friendly to process an entirely cache line at a time
+  constexpr size_t kLanes = kVectors * kVN;
+#if HWY_TARGET == HWY_SCALAR
+  const V d1_1 = Set(d, rg->d1[0 * 4]);
+  const V d1_3 = Set(d, rg->d1[1 * 4]);
+  const V d1_5 = Set(d, rg->d1[2 * 4]);
+  const V n2_1 = Set(d, rg->n2[0 * 4]);
+  const V n2_3 = Set(d, rg->n2[1 * 4]);
+  const V n2_5 = Set(d, rg->n2[2 * 4]);
+#else
+  const V d1_1 = LoadDup128(d, rg->d1 + 0 * 4);
+  const V d1_3 = LoadDup128(d, rg->d1 + 1 * 4);
+  const V d1_5 = LoadDup128(d, rg->d1 + 2 * 4);
+  const V n2_1 = LoadDup128(d, rg->n2 + 0 * 4);
+  const V n2_3 = LoadDup128(d, rg->n2 + 1 * 4);
+  const V n2_5 = LoadDup128(d, rg->n2 + 2 * 4);
+#endif
+
+  const size_t N = rg->radius;
+  const size_t ysize = in.ysize();
+
+  size_t ctr = 0;
+  HWY_ALIGN float ring_buffer[3 * kLanes * kMod] = {0};
+  HWY_ALIGN static constexpr float zero[kLanes] = {0};
+
+  // Warmup: top is out of bounds (zero padded), bottom is usually in-bounds.
+  ssize_t n = -static_cast<ssize_t>(N) + 1;
+  for (; n < 0; ++n) {
+    // bottom is always non-negative since n is initialized in -N + 1.
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+        ring_buffer, OutputNone(), nullptr);
+  }
+  JXL_DASSERT(n >= 0);
+
+  // Start producing output; top is still out of bounds.
+  for (; static_cast<size_t>(n) < std::min(N + 1, ysize); ++n) {
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+        ring_buffer, OutputStore(), out->Row(n) + x);
+  }
+
+  // Interior outputs with prefetching and without bounds checks.
+  constexpr size_t kPrefetchRows = 8;
+  for (; n < static_cast<ssize_t>(ysize - N + 1 - kPrefetchRows); ++n) {
+    const size_t top = n - N - 1;
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        TwoInputs(in.ConstRow(top) + x, in.ConstRow(bottom) + x), ctr,
+        ring_buffer, OutputStore(), out->Row(n) + x);
+    hwy::Prefetch(in.ConstRow(top + kPrefetchRows) + x);
+    hwy::Prefetch(in.ConstRow(bottom + kPrefetchRows) + x);
+  }
+
+  // Bottom border without prefetching and with bounds checks.
+  for (; static_cast<size_t>(n) < ysize; ++n) {
+    const size_t top = n - N - 1;
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        TwoInputs(in.ConstRow(top) + x,
+                  bottom < ysize ? in.ConstRow(bottom) + x : zero),
+        ctr, ring_buffer, OutputStore(), out->Row(n) + x);
+  }
+}
+
+// Apply 1D vertical scan to multiple columns (one per vector lane).
+// Not yet parallelized.
+void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                          const ImageF& in, ThreadPool* /*pool*/,
+                          ImageF* JXL_RESTRICT out) {
+  JXL_CHECK(SameSize(in, *out));
+
+  constexpr size_t kCacheLineLanes = 64 / sizeof(float);
+  constexpr size_t kVN = MaxLanes(HWY_FULL(float)());
+  constexpr size_t kCacheLineVectors =
+      (kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4;
+  constexpr size_t kFastPace = kCacheLineVectors * kVN;
+
+  size_t x = 0;
+  for (; x + kFastPace <= in.xsize(); x += kFastPace) {
+    VerticalStrip<kCacheLineVectors>(rg, in, x, out);
+  }
+  for (; x < in.xsize(); x += kVN) {
+    VerticalStrip<1>(rg, in, x, out);
+  }
+}
+
+// TODO(veluca): consider replacing with FastGaussian.
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res) {
+  JXL_ASSERT(kernel.size() % 2 == 1);
+  JXL_ASSERT(in.xsize() % res == 0);
+  const size_t offset = res / 2;
+  const size_t out_xsize = in.xsize() / res;
+  ImageF out(in.ysize(), out_xsize);
+  const int r = kernel.size() / 2;
+  HWY_FULL(float) df;
+  std::vector<float> row_tmp(in.xsize() + 2 * r + Lanes(df));
+  float* const JXL_RESTRICT rowp = &row_tmp[r];
+  std::vector<float> padded_k = kernel;
+  padded_k.resize(padded_k.size() + Lanes(df));
+  const float* const kernelp = &padded_k[r];
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+    size_t x = offset, ox = 0;
+    for (; x < static_cast<uint32_t>(r) && x < in.xsize(); x += res, ++ox) {
+      float sum = 0.0f;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = sum;
+    }
+    for (; x + r < in.xsize(); x += res, ++ox) {
+      auto sum = Zero(df);
+      for (int i = -r; i <= r; i += Lanes(df)) {
+        sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum);
+      }
+      out.Row(ox)[y] = GetLane(SumOfLanes(df, sum));
+    }
+    for (; x < in.xsize(); x += res, ++ox) {
+      float sum = 0.0f;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = sum;
+    }
+  }
+  return out;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FastGaussian1D);
+HWY_EXPORT(ConvolveXSampleAndTranspose);
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(FastGaussian1D)(rg, in, width, out);
+}
+
+HWY_EXPORT(FastGaussianVertical);  // Local function.
+
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+                        float* const JXL_RESTRICT row_out, const int xsize,
+                        const int radius) {
+  const int lastcol = xsize - 1;
+  for (int x = 1; x <= radius; ++x) {
+    row_out[-x] = row_in[std::min(x, xsize - 1)];
+  }
+  memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+  for (int x = 1; x <= radius; ++x) {
+    row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
+  }
+}
+
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res) {
+  return HWY_DYNAMIC_DISPATCH(ConvolveXSampleAndTranspose)(in, kernel, res);
+}
+
+Image3F ConvolveXSampleAndTranspose(const Image3F& in,
+                                    const std::vector<float>& kernel,
+                                    const size_t res) {
+  return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(1), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(2), kernel, res));
+}
+
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res) {
+  ImageF tmp = ConvolveXSampleAndTranspose(in, kernel, res);
+  return ConvolveXSampleAndTranspose(tmp, kernel, res);
+}
+
+// Implements "Recursive Implementation of the Gaussian Filter Using Truncated
+// Cosine Functions" by Charalampidis [2016].
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
+  auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>();
+  constexpr double kPi = 3.141592653589793238;
+
+  const double radius = roundf(3.2795 * sigma + 0.2546);  // (57), "N"
+
+  // Table I, first row
+  const double pi_div_2r = kPi / (2.0 * radius);
+  const double omega[3] = {pi_div_2r, 3.0 * pi_div_2r, 5.0 * pi_div_2r};
+
+  // (37), k={1,3,5}
+  const double p_1 = +1.0 / std::tan(0.5 * omega[0]);
+  const double p_3 = -1.0 / std::tan(0.5 * omega[1]);
+  const double p_5 = +1.0 / std::tan(0.5 * omega[2]);
+
+  // (44), k={1,3,5}
+  const double r_1 = +p_1 * p_1 / std::sin(omega[0]);
+  const double r_3 = -p_3 * p_3 / std::sin(omega[1]);
+  const double r_5 = +p_5 * p_5 / std::sin(omega[2]);
+
+  // (50), k={1,3,5}
+  const double neg_half_sigma2 = -0.5 * sigma * sigma;
+  const double recip_radius = 1.0 / radius;
+  double rho[3];
+  for (size_t i = 0; i < 3; ++i) {
+    rho[i] = std::exp(neg_half_sigma2 * omega[i] * omega[i]) * recip_radius;
+  }
+
+  // second part of (52), k1,k2 = 1,3; 3,5; 5,1
+  const double D_13 = p_1 * r_3 - r_1 * p_3;
+  const double D_35 = p_3 * r_5 - r_3 * p_5;
+  const double D_51 = p_5 * r_1 - r_5 * p_1;
+
+  // (52), k=5
+  const double recip_d13 = 1.0 / D_13;
+  const double zeta_15 = D_35 * recip_d13;
+  const double zeta_35 = D_51 * recip_d13;
+
+  double A[9] = {p_1,     p_3,     p_5,  //
+                 r_1,     r_3,     r_5,  //  (56)
+                 zeta_15, zeta_35, 1};
+  JXL_CHECK(Inv3x3Matrix(A));
+  const double gamma[3] = {1, radius * radius - sigma * sigma,  // (55)
+                           zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]};
+  double beta[3];
+  Mul3x3Vector(A, gamma, beta);  // (53)
+
+  // Sanity check: correctly solved for beta (IIR filter weights are normalized)
+  const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5;  // (39)
+  JXL_ASSERT(std::abs(sum - 1) < 1E-12);
+  (void)sum;
+
+  rg->radius = static_cast<int>(radius);
+
+  double n2[3];
+  double d1[3];
+  for (size_t i = 0; i < 3; ++i) {
+    n2[i] = -beta[i] * std::cos(omega[i] * (radius + 1.0));  // (33)
+    d1[i] = -2.0 * std::cos(omega[i]);                       // (33)
+
+    for (size_t lane = 0; lane < 4; ++lane) {
+      rg->n2[4 * i + lane] = static_cast<float>(n2[i]);
+      rg->d1[4 * i + lane] = static_cast<float>(d1[i]);
+    }
+
+    const double d_2 = d1[i] * d1[i];
+
+    // Obtained by expanding (35) for four consecutive outputs via sympy:
+    // n, d, p, pp = symbols('n d p pp')
+    // i0, i1, i2, i3 = symbols('i0 i1 i2 i3')
+    // o0, o1, o2, o3 = symbols('o0 o1 o2 o3')
+    // o0 = n*i0 - d*p - pp
+    // o1 = n*i1 - d*o0 - p
+    // o2 = n*i2 - d*o1 - o0
+    // o3 = n*i3 - d*o2 - o1
+    // Then expand(o3) and gather terms for p(prev), pp(prev2) etc.
+    rg->mul_prev[4 * i + 0] = -d1[i];
+    rg->mul_prev[4 * i + 1] = d_2 - 1.0;
+    rg->mul_prev[4 * i + 2] = -d_2 * d1[i] + 2.0 * d1[i];
+    rg->mul_prev[4 * i + 3] = d_2 * d_2 - 3.0 * d_2 + 1.0;
+    rg->mul_prev2[4 * i + 0] = -1.0;
+    rg->mul_prev2[4 * i + 1] = d1[i];
+    rg->mul_prev2[4 * i + 2] = -d_2 + 1.0;
+    rg->mul_prev2[4 * i + 3] = d_2 * d1[i] - 2.0 * d1[i];
+    rg->mul_in[4 * i + 0] = n2[i];
+    rg->mul_in[4 * i + 1] = -d1[i] * n2[i];
+    rg->mul_in[4 * i + 2] = d_2 * n2[i] - n2[i];
+    rg->mul_in[4 * i + 3] = -d_2 * d1[i] * n2[i] + 2.0 * d1[i] * n2[i];
+  }
+  return rg;
+}
+
+namespace {
+
+// Apply 1D horizontal scan to each row.
+void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                            const ImageF& in, ThreadPool* pool,
+                            ImageF* JXL_RESTRICT out) {
+  JXL_CHECK(SameSize(in, *out));
+
+  const intptr_t xsize = in.xsize();
+  JXL_CHECK(RunOnPool(
+      pool, 0, in.ysize(), ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /*thread*/) {
+        const size_t y = task;
+        const float* row_in = in.ConstRow(y);
+        float* JXL_RESTRICT row_out = out->Row(y);
+        FastGaussian1D(rg, row_in, xsize, row_out);
+      },
+      "FastGaussianHorizontal"));
+}
+
+}  // namespace
+
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                  const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+                  ImageF* JXL_RESTRICT out) {
+  FastGaussianHorizontal(rg, in, pool, temp);
+  HWY_DYNAMIC_DISPATCH(FastGaussianVertical)(rg, *temp, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h
new file mode 100644
index 0000000000..fb4741f03a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GAUSS_BLUR_H_
+#define LIB_JXL_GAUSS_BLUR_H_
+
+#include <stddef.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+std::vector<T> GaussianKernel(int radius, T sigma) {
+  JXL_ASSERT(sigma > 0.0);
+  std::vector<T> kernel(2 * radius + 1);
+  const T scaler = -1.0 / (2 * sigma * sigma);
+  double sum = 0.0;
+  for (int i = -radius; i <= radius; ++i) {
+    const T val = std::exp(scaler * i * i);
+    kernel[i + radius] = val;
+    sum += val;
+  }
+  for (size_t i = 0; i < kernel.size(); ++i) {
+    kernel[i] /= sum;
+  }
+  return kernel;
+}
+
+// All convolution functions below apply mirroring of the input on the borders
+// in the following way:
+//
+//     input: [a0 a1 a2 ...  aN]
+//     mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
+//
+// where R is the radius of the kernel (i.e. kernel size is 2*R+1).
+
+// REQUIRES: in.xsize() and in.ysize() are integer multiples of res.
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res);
+
+// Private, used by test.
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+                        float* const JXL_RESTRICT row_out, const int xsize,
+                        const int radius);
+
+// Only for use by CreateRecursiveGaussian and FastGaussian*.
+#pragma pack(push, 1)
+struct RecursiveGaussian {
+  // For k={1,3,5} in that order, each broadcasted 4x for LoadDup128. Used only
+  // for vertical passes.
+  float n2[3 * 4];
+  float d1[3 * 4];
+
+  // We unroll horizontal passes 4x - one output per lane. These are each lane's
+  // multiplier for the previous output (relative to the first of the four
+  // outputs). Indexing: 4 * 0..2 (for {1,3,5}) + 0..3 for the lane index.
+  float mul_prev[3 * 4];
+  // Ditto for the second to last output.
+  float mul_prev2[3 * 4];
+
+  // We multiply a vector of inputs 0..3 by a vector shifted from this array.
+  // in=0 uses all 4 (nonzero) terms; for in=3, the lower three lanes are 0.
+  float mul_in[3 * 4];
+
+  size_t radius;
+};
+#pragma pack(pop)
+
+// Precomputation for FastGaussian*; users may use the same pointer/storage in
+// subsequent calls to FastGaussian* with the same sigma.
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma);
+
+// 1D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out);
+
+// 2D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                  const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+                  ImageF* JXL_RESTRICT out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_GAUSS_BLUR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc
new file mode 100644
index 0000000000..b1bb64abc5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <hwy/targets.h>
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+JXL_MAYBE_UNUSED ImageF Convolve(const ImageF& in,
+                                 const std::vector<float>& kernel) {
+  return ConvolveAndSample(in, kernel, 1);
+}
+
+void BM_GaussBlur1d(benchmark::State& state) {
+  // Uncomment to disable SIMD and force and scalar implementation
+  // hwy::DisableTargets(~HWY_SCALAR);
+  // Uncomment to run AVX2
+  // hwy::DisableTargets(HWY_AVX3);
+
+  const size_t length = state.range();
+  const double sigma = 7.0;  // (from Butteraugli application)
+  ImageF in(length, 1);
+  const float expected = length;
+  FillImage(expected, &in);
+
+  ImageF temp(length, 1);
+  ImageF out(length, 1);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  for (auto _ : state) {
+    FastGaussian1D(rg, in.Row(0), length, out.Row(0));
+    // Prevent optimizing out
+    JXL_ASSERT(std::abs(out.ConstRow(0)[length / 2] - expected) / expected <
+               9E-5);
+  }
+  state.SetItemsProcessed(length * state.iterations());
+}
+
+void BM_GaussBlur2d(benchmark::State& state) {
+  // See GaussBlur1d for SIMD changes.
+
+  const size_t xsize = state.range();
+  const size_t ysize = xsize;
+  const double sigma = 7.0;  // (from Butteraugli application)
+  ImageF in(xsize, ysize);
+  const float expected = xsize + ysize;
+  FillImage(expected, &in);
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  ThreadPool* null_pool = nullptr;
+  const auto rg = CreateRecursiveGaussian(sigma);
+  for (auto _ : state) {
+    FastGaussian(rg, in, null_pool, &temp, &out);
+    // Prevent optimizing out
+    JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
+                   expected <
+               9E-5);
+  }
+  state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+void BM_GaussBlurFir(benchmark::State& state) {
+  // See GaussBlur1d for SIMD changes.
+
+  const size_t xsize = state.range();
+  const size_t ysize = xsize;
+  const double sigma = 7.0;  // (from Butteraugli application)
+  ImageF in(xsize, ysize);
+  const float expected = xsize + ysize;
+  FillImage(expected, &in);
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  const std::vector<float> kernel =
+      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+  for (auto _ : state) {
+    // Prevent optimizing out
+    JXL_ASSERT(std::abs(Convolve(in, kernel).ConstRow(ysize / 2)[xsize / 2] -
+                        expected) /
+                   expected <
+               9E-5);
+  }
+  state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+void BM_GaussBlurSep7(benchmark::State& state) {
+  // See GaussBlur1d for SIMD changes.
+
+  const size_t xsize = state.range();
+  const size_t ysize = xsize;
+  ImageF in(xsize, ysize);
+  const float expected = xsize + ysize;
+  FillImage(expected, &in);
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  ThreadPool* null_pool = nullptr;
+  // Gaussian with sigma 1
+  const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
+                                     {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
+  for (auto _ : state) {
+    Separable7(in, Rect(in), weights, null_pool, &out);
+    // Prevent optimizing out
+    JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
+                   expected <
+               9E-5);
+  }
+  state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+BENCHMARK(BM_GaussBlur1d)->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_GaussBlur2d)->Range(1 << 7, 1 << 10);
+BENCHMARK(BM_GaussBlurFir)->Range(1 << 7, 1 << 10);
+BENCHMARK(BM_GaussBlurSep7)->Range(1 << 7, 1 << 10);
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc
new file mode 100644
index 0000000000..097c1aa8df
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc
@@ -0,0 +1,453 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <cmath>
+#include <hwy/targets.h>
+#include <vector>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+bool NearEdge(const int64_t width, const int64_t peak) {
+  // When around 3*sigma from the edge, there is negligible truncation.
+  return peak < 10 || peak > width - 10;
+}
+
+// Follow the curve downwards by scanning right from `peak` and verifying
+// identical values at the same offset to the left.
+void VerifySymmetric(const int64_t width, const int64_t peak,
+                     const float* out) {
+  const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7;
+  for (int64_t i = 1;; ++i) {
+    // Stop if we passed either end of the array
+    if (peak - i < 0 || peak + i >= width) break;
+    EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]);  // descending
+    EXPECT_NEAR(out[peak - i], out[peak + i], tolerance);     // symmetric
+  }
+}
+
+void TestImpulseResponse(size_t width, size_t peak) {
+  const auto rg3 = CreateRecursiveGaussian(3.0);
+  const auto rg4 = CreateRecursiveGaussian(4.0);
+  const auto rg5 = CreateRecursiveGaussian(5.0);
+
+  // Extra padding for 4x unrolling
+  auto in = hwy::AllocateAligned<float>(width + 3);
+  memset(in.get(), 0, sizeof(float) * (width + 3));
+  in[peak] = 1.0f;
+
+  auto out3 = hwy::AllocateAligned<float>(width + 3);
+  auto out4 = hwy::AllocateAligned<float>(width + 3);
+  auto out5 = hwy::AllocateAligned<float>(width + 3);
+  FastGaussian1D(rg3, in.get(), width, out3.get());
+  FastGaussian1D(rg4, out3.get(), width, out4.get());
+  FastGaussian1D(rg5, in.get(), width, out5.get());
+
+  VerifySymmetric(width, peak, out3.get());
+  VerifySymmetric(width, peak, out4.get());
+  VerifySymmetric(width, peak, out5.get());
+
+  // Wider kernel has flatter peak
+  EXPECT_LT(out5[peak] + 0.05, out3[peak]);
+
+  // Gauss3 o Gauss4 ~= Gauss5
+  const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01;
+  for (size_t i = 0; i < width; ++i) {
+    EXPECT_NEAR(out4[i], out5[i], tolerance);
+  }
+}
+
+void TestImpulseResponseForWidth(size_t width) {
+  for (size_t i = 0; i < width; ++i) {
+    TestImpulseResponse(width, i);
+  }
+}
+
+TEST(GaussBlurTest, ImpulseResponse) {
+  TestImpulseResponseForWidth(10);  // tiny even
+  TestImpulseResponseForWidth(15);  // small odd
+  TestImpulseResponseForWidth(32);  // power of two
+  TestImpulseResponseForWidth(31);  // power of two - 1
+  TestImpulseResponseForWidth(33);  // power of two + 1
+}
+
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
+  return ConvolveAndSample(in, kernel, 1);
+}
+
+// Higher-precision version for accuracy test.
+ImageF ConvolveAndTransposeF64(const ImageF& in,
+                               const std::vector<double>& kernel) {
+  JXL_ASSERT(kernel.size() % 2 == 1);
+  ImageF out(in.ysize(), in.xsize());
+  const int r = kernel.size() / 2;
+  std::vector<float> row_tmp(in.xsize() + 2 * r);
+  float* const JXL_RESTRICT rowp = &row_tmp[r];
+  const double* const kernelp = &kernel[r];
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+    for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) {
+      double sum = 0.0;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = static_cast<float>(sum);
+    }
+  }
+  return out;
+}
+
+ImageF ConvolveF64(const ImageF& in, const std::vector<double>& kernel) {
+  ImageF tmp = ConvolveAndTransposeF64(in, kernel);
+  return ConvolveAndTransposeF64(tmp, kernel);
+}
+
+void TestDirac2D(size_t xsize, size_t ysize, double sigma) {
+  ImageF in(xsize, ysize);
+  ZeroFillImage(&in);
+  // We anyway ignore the border below, so might as well choose the middle.
+  in.Row(ysize / 2)[xsize / 2] = 1.0f;
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out);
+
+  const std::vector<float> kernel =
+      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+  const ImageF expected = Convolve(in, kernel);
+
+  const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4;
+  const size_t border = 2 * sigma;
+
+  JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, 1E-8, _, border));
+}
+
+TEST(GaussBlurTest, Test2D) {
+  const std::vector<int> dimensions{6, 15, 17, 64, 50, 49};
+  for (int xsize : dimensions) {
+    for (int ysize : dimensions) {
+      for (double sigma : {1.0, 2.5, 3.6, 7.0}) {
+        TestDirac2D(static_cast<size_t>(xsize), static_cast<size_t>(ysize),
+                    sigma);
+      }
+    }
+  }
+}
+
+// Slow (44 sec). To run, remove the disabled prefix.
+TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) {
+  const double sigma = 7.0;
+  const auto rg = CreateRecursiveGaussian(sigma);
+
+  // IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11.
+  const size_t radius = static_cast<size_t>(7 * sigma);
+  const std::vector<double> kernel = GaussianKernel(radius, sigma);
+
+  const size_t length = 16384;
+  ImageF inputs(length, 1);
+  ZeroFillImage(&inputs);
+
+  auto outputs = hwy::AllocateAligned<float>(length);
+
+  // One per center position
+  auto sum_abs_err = hwy::AllocateAligned<double>(length);
+  std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0);
+
+  for (size_t center = radius; center < length - radius; ++center) {
+    inputs.Row(0)[center - 1] = 0.0f;  // reset last peak, entire array now 0
+    inputs.Row(0)[center] = 1.0f;
+    FastGaussian1D(rg, inputs.Row(0), length, outputs.get());
+
+    const ImageF outputs_fir = ConvolveF64(inputs, kernel);
+
+    for (size_t i = 0; i < length; ++i) {
+      const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]);
+      sum_abs_err[i] += static_cast<double>(abs_err);
+    }
+  }
+
+  const double max_abs_err =
+      *std::max_element(sum_abs_err.get(), sum_abs_err.get() + length);
+  printf("Max abs err: %.8e\n", max_abs_err);
+}
+
+void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma,
+                double max_l1, double max_rel) {
+  printf("%4" PRIuS " x %4" PRIuS " %4.1f %4.1f sigma %.1f\n", xsize, ysize,
+         min, max, sigma);
+  ImageF in(xsize, ysize);
+  RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize);
+  // FastGaussian/Convolve handle borders differently, so keep those pixels 0.
+  const size_t border = 4 * sigma;
+  SetBorder(border, 0.0f, &in);
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out);
+
+  const std::vector<float> kernel =
+      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+  const ImageF expected = Convolve(in, kernel);
+
+  JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, max_rel, _, border));
+}
+
+void TestRandomForSizes(float min, float max, double sigma) {
+  double max_l1 = 6E-3;
+  double max_rel = 3E-3;
+  TestRandom(128, 1, min, max, sigma, max_l1, max_rel);
+  TestRandom(1, 128, min, max, sigma, max_l1, max_rel);
+  TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+  TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+  TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2);
+}
+
+TEST(GaussBlurTest, TestRandom) {
+  // small non-negative
+  TestRandomForSizes(0.0f, 10.0f, 3.0f);
+  TestRandomForSizes(0.0f, 10.0f, 7.0f);
+
+  // small negative
+  TestRandomForSizes(-4.0f, -1.0f, 3.0f);
+  TestRandomForSizes(-4.0f, -1.0f, 7.0f);
+
+  // mixed positive/negative
+  TestRandomForSizes(-6.0f, 6.0f, 3.0f);
+  TestRandomForSizes(-6.0f, 6.0f, 7.0f);
+}
+
+TEST(GaussBlurTest, TestSign) {
+  const size_t xsize = 500;
+  const size_t ysize = 606;
+  ImageF in(xsize, ysize);
+
+  ZeroFillImage(&in);
+  const float center[33 * 33] = {
+      -0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f,  -0.271332f,
+      -0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f,  4.787300f,
+      5.834560f,  5.272720f,  3.967960f,  3.547935f,  3.432732f,  3.383015f,
+      3.239326f,  3.290806f,  3.298954f,  3.397808f,  3.359730f,  3.533844f,
+      3.511856f,  3.436787f,  3.428310f,  3.460209f,  3.550011f,  3.590942f,
+      3.593109f,  3.560005f,  3.443165f,  0.089741f,  0.179230f,  -0.032997f,
+      -0.182610f, 0.005669f,  -0.244759f, -0.395123f, -0.514961f, -1.003529f,
+      -1.798656f, -2.377975f, 0.222191f,  3.957664f,  5.946804f,  5.543129f,
+      4.290096f,  3.621010f,  3.407257f,  3.392494f,  3.345367f,  3.391903f,
+      3.441605f,  3.429260f,  3.444969f,  3.507130f,  3.518612f,  3.443111f,
+      3.475948f,  3.536148f,  3.470333f,  3.628311f,  3.600243f,  3.292892f,
+      -0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f,
+      -0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f,
+      -0.491032f, 2.817703f,  5.213242f,  5.675629f,  4.864548f,  3.876324f,
+      3.535587f,  3.530312f,  3.413765f,  3.386261f,  3.404854f,  3.383472f,
+      3.420830f,  3.326496f,  3.257877f,  3.362152f,  3.489609f,  3.619587f,
+      3.555805f,  3.423164f,  3.309708f,  -0.483940f, -0.502926f, -0.592983f,
+      -0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f,
+      -0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f,
+      4.871609f,  5.767548f,  5.103741f,  3.983868f,  3.633003f,  3.458263f,
+      3.507309f,  3.247021f,  3.220612f,  3.326061f,  3.352814f,  3.291061f,
+      3.322739f,  3.444302f,  3.506207f,  3.556839f,  3.529575f,  3.457024f,
+      -0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f,
+      -0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f,
+      -0.984100f, -1.805332f, -2.028198f, 0.773205f,  4.423046f,  5.604839f,
+      5.231617f,  4.080299f,  3.603008f,  3.498741f,  3.517010f,  3.333897f,
+      3.381336f,  3.342617f,  3.369686f,  3.434155f,  3.490452f,  3.607029f,
+      3.555298f,  3.702297f,  3.618679f,  -0.503609f, -0.578564f, -0.419014f,
+      -0.239883f, 0.269836f,  0.022984f,  -0.455067f, -0.621777f, -0.304176f,
+      -0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f,
+      -1.895836f, 0.036537f,  3.462456f,  5.586445f,  5.658791f,  4.434784f,
+      3.423435f,  3.318848f,  3.202328f,  3.532764f,  3.436687f,  3.354881f,
+      3.356941f,  3.382645f,  3.503902f,  3.512867f,  3.632366f,  3.537312f,
+      -0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f,  -0.064991f,
+      -0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f,
+      -0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f,
+      2.451153f,  5.555855f,  6.096313f,  5.230045f,  4.068172f,  3.404274f,
+      3.392586f,  3.326065f,  3.156670f,  3.284828f,  3.347012f,  3.319252f,
+      3.352310f,  3.610790f,  3.499847f,  -0.150600f, -0.314445f, -0.093575f,
+      -0.057384f, 0.053688f,  -0.189255f, -0.263515f, -0.318653f, 0.053246f,
+      0.080627f,  -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f,
+      -0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f,  4.626896f,
+      5.888351f,  5.450989f,  4.070587f,  3.539200f,  3.383492f,  3.296318f,
+      3.267334f,  3.436028f,  3.463005f,  3.502625f,  3.522282f,  3.403763f,
+      -0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f,
+      -0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f,
+      -0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f,
+      -1.684219f, -1.972137f, 0.448399f,  4.019985f,  5.648144f,  5.647846f,
+      4.295094f,  3.641884f,  3.374790f,  3.197342f,  3.425545f,  3.507481f,
+      3.478065f,  3.430889f,  3.341900f,  -1.016304f, -0.959221f, -0.909466f,
+      -0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f,
+      -0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f,
+      -0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f,
+      -1.142891f, 2.885162f,  5.652863f,  5.718426f,  4.911140f,  3.234222f,
+      3.473373f,  3.577183f,  3.271603f,  3.410435f,  3.505489f,  3.434032f,
+      -0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f,
+      -0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f,
+      -0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f,
+      -0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f,
+      4.929745f,  5.630871f,  5.444199f,  4.042740f,  3.739189f,  3.691399f,
+      3.391956f,  3.469696f,  3.431232f,  0.204849f,  0.205433f,  -0.131927f,
+      -0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f,
+      -0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f,
+      -0.245853f, -0.043140f, 0.024242f,  -0.038998f, -0.044151f, -0.425991f,
+      -1.240753f, -1.943146f, -2.174755f, 0.523415f,  4.376751f,  5.956558f,
+      5.850082f,  4.403152f,  3.517399f,  3.560753f,  3.554836f,  3.471985f,
+      -0.508503f, -0.109783f, 0.057747f,  0.190079f,  -0.257153f, -0.591980f,
+      -0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f,
+      -0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f,
+      -0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f,
+      -1.994583f, -0.588058f, 3.415840f,  5.603470f,  5.811581f,  4.959423f,
+      3.721760f,  3.710499f,  3.785461f,  -0.554588f, -0.565517f, -0.434578f,
+      -0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f,
+      -0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f,
+      -0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f,
+      -0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f,
+      1.572483f,  4.826286f,  5.741898f,  5.162853f,  4.028049f,  3.686325f,
+      -0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f,
+      0.076477f,  0.187706f,  -0.068787f, -0.293491f, -0.361145f, -0.292515f,
+      -0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f,
+      -0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f,
+      -1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f,  4.276672f,
+      5.501350f,  5.331257f,  3.809288f,  -0.727722f, -0.533619f, -0.511524f,
+      -0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f,
+      -0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f,
+      -0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f,
+      -0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f,
+      -1.860954f, -2.290253f, -0.424774f, 3.050060f,  5.083332f,  5.291920f,
+      -0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f,
+      -0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f,
+      -0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f,
+      -0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f,
+      -0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f,
+      -0.323456f, 2.042049f,  4.605103f,  -0.310631f, -0.279568f, -0.012695f,
+      -0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f,
+      -0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f,
+      0.219872f,  0.264984f,  -0.193432f, -0.363182f, -0.467472f, -0.462009f,
+      -0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f,
+      -0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f,
+      -0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f,
+      -0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f,
+      -0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f,
+      -0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f,
+      -0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f,
+      -1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f,
+      -0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f,
+      -0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f,
+      -0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f,
+      -0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f,
+      -0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f,
+      -0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f,
+      -0.520420f, -0.675438f, -0.255753f, 0.141357f,  -0.079639f, -0.419476f,
+      -0.374069f, -0.046253f, 0.116116f,  -0.145847f, -0.380371f, -0.563412f,
+      -0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f,
+      -0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f,  -0.050201f,
+      -0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f,
+      -0.180031f, 0.129711f,  -0.127811f, -0.384750f, -0.499542f, -0.418613f,
+      -0.121635f, 0.203197f,  -0.167290f, -0.397270f, -0.355461f, -0.218746f,
+      -0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f,
+      -0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f,
+      -0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f,
+      -0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f,
+      -0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f,
+      -0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f,
+      -0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f,
+      -0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f,
+      0.084972f,  0.039902f,  0.000618f,  -0.404430f, -0.447456f, -0.418076f,
+      -0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f,
+      -0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f,
+      -0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f,
+      -0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f,
+      -0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f,
+      -0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f,
+      -0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f,
+      -0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f,
+      -0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f,
+      -0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f,
+      -0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f,
+      -0.382458f, -0.319771f, -0.049927f, 0.124329f,  0.266102f,  -0.106606f,
+      -0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f,
+      -0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f,
+      -0.379494f, 0.041626f,  -0.113361f, -0.277767f, 0.083366f,  0.126476f,
+      0.139057f,  0.038040f,  0.038162f,  -0.242126f, -0.411736f, -0.370049f,
+      -0.455357f, -0.039257f, 0.264442f,  -0.271492f, -0.425346f, -0.514847f,
+      -0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f,
+      -0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f,
+      -0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f,  -0.063602f,
+      -0.364529f, -0.179867f, 0.078197f,  0.117947f,  -0.093410f, -0.359119f,
+      -0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f,
+      -0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f,
+      -0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f,
+      -0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f,
+      -0.223667f, 0.174572f,  -0.348743f, -0.798901f, -0.692307f, -0.607609f,
+      -0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f,
+      -0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f,
+      -0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f,
+      -0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f,
+      -0.741020f, -0.404503f, 0.184814f,  -0.075315f, -0.281513f, -0.532906f,
+      -0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f,  0.103310f,
+      -0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f,
+      -0.371559f, -0.125580f, 0.028625f,  -0.154713f, -0.442024f, -0.492764f,
+      -0.199371f, 0.236305f,  0.225925f,  0.075577f,  -0.285812f, -0.437145f,
+      -0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f,
+      -0.313152f, -0.337982f, -0.358421f, 0.040070f,  0.038925f,  -0.333313f,
+      -0.351662f, 0.023014f,  0.091362f,  -0.282890f, -0.373253f, -0.389050f,
+      -0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f,
+      -0.222801f, -0.106323f, -0.056358f, 0.027222f,  0.390732f,  0.033558f,
+      -0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f,
+      -0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f,
+      -0.246254f, 0.132244f,  0.090134f,  0.234634f,  -0.089249f, -0.460854f,
+      -0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f,
+      -0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f,
+      -0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f,
+      -0.263580f, -0.268929f, 0.080826f,  0.115616f,  -0.097324f, -0.325310f,
+      -0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f,
+      -0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f,
+      -0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f,
+      -0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f,
+      -0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f,
+      0.103271f,  -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f,
+      -0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f,
+      -0.370911f, -0.564224f, -0.242714f};
+  const size_t xtest = xsize / 2;
+  const size_t ytest = ysize / 2;
+
+  for (intptr_t dy = -16; dy <= 16; ++dy) {
+    float* row = in.Row(ytest + dy);
+    for (intptr_t dx = -16; dx <= 16; ++dx)
+      row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)];
+  }
+
+  const double sigma = 7.155933;
+
+  ImageF temp(xsize, ysize);
+  ImageF out_rg(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out_rg);
+
+  ImageF out_old;
+  {
+    const std::vector<float> kernel =
+        GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+    printf("old kernel size %" PRIuS "\n", kernel.size());
+    out_old = Convolve(in, kernel);
+  }
+
+  printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest],
+         out_old.Row(ytest)[xtest]);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc b/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc
new file mode 100644
index 0000000000..282fe89f0a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+
+// Returns distance of point p to line p0..p1, the result is signed and is not
+// normalized.
+double PointLineDist(double x0, double y0, double x1, double y1, double x,
+                     double y) {
+  return (y1 - y0) * x - (x1 - x0) * y + x1 * y0 - y1 * x0;
+}
+
+// Generates a test image with a gradient from one color to another.
+// Angle in degrees, colors can be given in hex as 0xRRGGBB. The angle is the
+// angle in which the change direction happens.
+Image3F GenerateTestGradient(uint32_t color0, uint32_t color1, double angle,
+                             size_t xsize, size_t ysize) {
+  Image3F image(xsize, ysize);
+
+  double x0 = xsize / 2;
+  double y0 = ysize / 2;
+  double x1 = x0 + std::sin(angle / 360.0 * 2.0 * kPi);
+  double y1 = y0 + std::cos(angle / 360.0 * 2.0 * kPi);
+
+  double maxdist =
+      std::max<double>(fabs(PointLineDist(x0, y0, x1, y1, 0, 0)),
+                       fabs(PointLineDist(x0, y0, x1, y1, xsize, 0)));
+
+  for (size_t c = 0; c < 3; ++c) {
+    float c0 = ((color0 >> (8 * (2 - c))) & 255);
+    float c1 = ((color1 >> (8 * (2 - c))) & 255);
+    for (size_t y = 0; y < ysize; ++y) {
+      float* row = image.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        double dist = PointLineDist(x0, y0, x1, y1, x, y);
+        double v = ((dist / maxdist) + 1.0) / 2.0;
+        float color = c0 * (1.0 - v) + c1 * v;
+        row[x] = color;
+      }
+    }
+  }
+
+  return image;
+}
+
+// Computes the max of the horizontal and vertical second derivative for each
+// pixel, where second derivative means absolute value of difference of left
+// delta and right delta (top/bottom for vertical direction).
+// The radius over which the derivative is computed is only 1 pixel and it only
+// checks two angles (hor and ver), but this approximation works well enough.
+static ImageF Gradient2(const ImageF& image) {
+  size_t xsize = image.xsize();
+  size_t ysize = image.ysize();
+  ImageF image2(image.xsize(), image.ysize());
+  for (size_t y = 1; y + 1 < ysize; y++) {
+    const auto* JXL_RESTRICT row0 = image.Row(y - 1);
+    const auto* JXL_RESTRICT row1 = image.Row(y);
+    const auto* JXL_RESTRICT row2 = image.Row(y + 1);
+    auto* row_out = image2.Row(y);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      float ddx = (row1[x] - row1[x - 1]) - (row1[x + 1] - row1[x]);
+      float ddy = (row1[x] - row0[x]) - (row2[x] - row1[x]);
+      row_out[x] = std::max(fabsf(ddx), fabsf(ddy));
+    }
+  }
+  // Copy to the borders
+  if (ysize > 2) {
+    auto* JXL_RESTRICT row0 = image2.Row(0);
+    const auto* JXL_RESTRICT row1 = image2.Row(1);
+    const auto* JXL_RESTRICT row2 = image2.Row(ysize - 2);
+    auto* JXL_RESTRICT row3 = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      row0[x] = row1[x];
+      row3[x] = row2[x];
+    }
+  } else {
+    const auto* row0_in = image.Row(0);
+    const auto* row1_in = image.Row(ysize - 1);
+    auto* row0_out = image2.Row(0);
+    auto* row1_out = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      // Image too narrow, take first derivative instead
+      row0_out[x] = row1_out[x] = fabsf(row0_in[x] - row1_in[x]);
+    }
+  }
+  if (xsize > 2) {
+    for (size_t y = 0; y < ysize; y++) {
+      auto* row = image2.Row(y);
+      row[0] = row[1];
+      row[xsize - 1] = row[xsize - 2];
+    }
+  } else {
+    for (size_t y = 0; y < ysize; y++) {
+      const auto* JXL_RESTRICT row_in = image.Row(y);
+      auto* row_out = image2.Row(y);
+      // Image too narrow, take first derivative instead
+      row_out[0] = row_out[xsize - 1] = fabsf(row_in[0] - row_in[xsize - 1]);
+    }
+  }
+  return image2;
+}
+
+static Image3F Gradient2(const Image3F& image) {
+  return Image3F(Gradient2(image.Plane(0)), Gradient2(image.Plane(1)),
+                 Gradient2(image.Plane(2)));
+}
+
+/*
+Tests if roundtrip with jxl on a gradient image doesn't cause banding.
+Only tests if use_gradient is true. Set to false for debugging to see the
+distance values.
+Angle in degrees, colors can be given in hex as 0xRRGGBB.
+*/
+void TestGradient(ThreadPool* pool, uint32_t color0, uint32_t color1,
+                  size_t xsize, size_t ysize, float angle, bool fast_mode,
+                  float butteraugli_distance, bool use_gradient = true) {
+  CompressParams cparams;
+  cparams.butteraugli_distance = butteraugli_distance;
+  if (fast_mode) {
+    cparams.speed_tier = SpeedTier::kSquirrel;
+  }
+  Image3F gradient = GenerateTestGradient(color0, color1, angle, xsize, ysize);
+
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(8);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+  io.SetFromImage(std::move(gradient), io.metadata.m.color_encoding);
+
+  CodecInOut io2;
+
+  PaddedBytes compressed;
+  AuxOut* aux_out = nullptr;
+  PassesEncoderState enc_state;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         aux_out, pool));
+  EXPECT_TRUE(
+      test::DecodeFile({}, Span<const uint8_t>(compressed), &io2, pool));
+  EXPECT_TRUE(
+      io2.Main().TransformTo(io2.metadata.m.color_encoding, GetJxlCms(), pool));
+
+  if (use_gradient) {
+    // Test that the gradient map worked. For that, we take a second derivative
+    // of the image with Gradient2 to measure how linear the change is in x and
+    // y direction. For a well handled gradient, we expect max values around
+    // 0.1, while if there is noticeable banding, which means the gradient map
+    // failed, the values are around 0.5-1.0 (regardless of
+    // butteraugli_distance).
+    Image3F gradient2 = Gradient2(*io2.Main().color());
+
+    std::array<float, 3> image_max;
+    Image3Max(gradient2, &image_max);
+
+    // TODO(jyrki): These values used to work with 0.2, 0.2, 0.2.
+    EXPECT_LE(image_max[0], 3.15);
+    EXPECT_LE(image_max[1], 1.72);
+    EXPECT_LE(image_max[2], 5.05);
+  }
+}
+
+static constexpr bool fast_mode = true;
+
+TEST(GradientTest, SteepGradient) {
+  test::ThreadPoolForTests pool(8);
+  // Relatively steep gradients, colors from the sky of stp.png
+  TestGradient(&pool, 0xd99d58, 0x889ab1, 512, 512, 90, fast_mode, 3.0);
+}
+
+TEST(GradientTest, SubtleGradient) {
+  test::ThreadPoolForTests pool(8);
+  // Very subtle gradient
+  TestGradient(&pool, 0xb89b7b, 0xa89b8d, 512, 512, 90, fast_mode, 4.0);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/headers.cc b/third-party/libjxl/libjxl/lib/jxl/headers.cc
new file mode 100644
index 0000000000..dc53726385
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/headers.cc
@@ -0,0 +1,194 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/headers.h"
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+struct Rational {
+  constexpr explicit Rational(uint32_t num, uint32_t den)
+      : num(num), den(den) {}
+
+  // Returns floor(multiplicand * rational).
+  constexpr uint32_t MulTruncate(uint32_t multiplicand) const {
+    return uint64_t(multiplicand) * num / den;
+  }
+
+  uint32_t num;
+  uint32_t den;
+};
+
+Rational FixedAspectRatios(uint32_t ratio) {
+  JXL_ASSERT(0 != ratio && ratio < 8);
+  // Other candidates: 5/4, 7/5, 14/9, 16/10, 5/3, 21/9, 12/5
+  constexpr Rational kRatios[7] = {Rational(1, 1),    // square
+                                   Rational(12, 10),  //
+                                   Rational(4, 3),    // camera
+                                   Rational(3, 2),    // mobile camera
+                                   Rational(16, 9),   // camera/display
+                                   Rational(5, 4),    //
+                                   Rational(2, 1)};   //
+  return kRatios[ratio - 1];
+}
+
+uint32_t FindAspectRatio(uint32_t xsize, uint32_t ysize) {
+  for (uint32_t r = 1; r < 8; ++r) {
+    if (xsize == FixedAspectRatios(r).MulTruncate(ysize)) {
+      return r;
+    }
+  }
+  return 0;  // Must send xsize instead
+}
+
+}  // namespace
+
+size_t SizeHeader::xsize() const {
+  if (ratio_ != 0) {
+    return FixedAspectRatios(ratio_).MulTruncate(
+        static_cast<uint32_t>(ysize()));
+  }
+  return small_ ? ((xsize_div8_minus_1_ + 1) * 8) : xsize_;
+}
+
+Status SizeHeader::Set(size_t xsize64, size_t ysize64) {
+  if (xsize64 > 0xFFFFFFFFull || ysize64 > 0xFFFFFFFFull) {
+    return JXL_FAILURE("Image too large");
+  }
+  const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+  const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+  if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty image");
+  ratio_ = FindAspectRatio(xsize32, ysize32);
+  small_ = ysize64 <= 256 && (ysize64 % kBlockDim) == 0 &&
+           (ratio_ != 0 || (xsize64 <= 256 && (xsize64 % kBlockDim) == 0));
+  if (small_) {
+    ysize_div8_minus_1_ = ysize32 / 8 - 1;
+  } else {
+    ysize_ = ysize32;
+  }
+
+  if (ratio_ == 0) {
+    if (small_) {
+      xsize_div8_minus_1_ = xsize32 / 8 - 1;
+    } else {
+      xsize_ = xsize32;
+    }
+  }
+  JXL_ASSERT(xsize() == xsize64);
+  JXL_ASSERT(ysize() == ysize64);
+  return true;
+}
+
+Status PreviewHeader::Set(size_t xsize64, size_t ysize64) {
+  const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+  const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+  if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty preview");
+  div8_ = (xsize64 % kBlockDim) == 0 && (ysize64 % kBlockDim) == 0;
+  if (div8_) {
+    ysize_div8_ = ysize32 / 8;
+  } else {
+    ysize_ = ysize32;
+  }
+
+  ratio_ = FindAspectRatio(xsize32, ysize32);
+  if (ratio_ == 0) {
+    if (div8_) {
+      xsize_div8_ = xsize32 / 8;
+    } else {
+      xsize_ = xsize32;
+    }
+  }
+  JXL_ASSERT(xsize() == xsize64);
+  JXL_ASSERT(ysize() == ysize64);
+  return true;
+}
+
+size_t PreviewHeader::xsize() const {
+  if (ratio_ != 0) {
+    return FixedAspectRatios(ratio_).MulTruncate(
+        static_cast<uint32_t>(ysize()));
+  }
+  return div8_ ? (xsize_div8_ * 8) : xsize_;
+}
+
+SizeHeader::SizeHeader() { Bundle::Init(this); }
+Status SizeHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &small_));
+
+  if (visitor->Conditional(small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &ysize_div8_minus_1_));
+  }
+  if (visitor->Conditional(!small_)) {
+    // (Could still be small, but non-multiple of 8.)
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+                                           BitsOffset(18, 1), BitsOffset(30, 1),
+                                           1, &ysize_));
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+  if (visitor->Conditional(ratio_ == 0 && small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &xsize_div8_minus_1_));
+  }
+  if (visitor->Conditional(ratio_ == 0 && !small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+                                           BitsOffset(18, 1), BitsOffset(30, 1),
+                                           1, &xsize_));
+  }
+
+  return true;
+}
+
+PreviewHeader::PreviewHeader() { Bundle::Init(this); }
+Status PreviewHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &div8_));
+
+  if (visitor->Conditional(div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+                                           BitsOffset(9, 33), 1, &ysize_div8_));
+  }
+  if (visitor->Conditional(!div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+                                           BitsOffset(10, 321),
+                                           BitsOffset(12, 1345), 1, &ysize_));
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+  if (visitor->Conditional(ratio_ == 0 && div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+                                           BitsOffset(9, 33), 1, &xsize_div8_));
+  }
+  if (visitor->Conditional(ratio_ == 0 && !div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+                                           BitsOffset(10, 321),
+                                           BitsOffset(12, 1345), 1, &xsize_));
+  }
+
+  return true;
+}
+
+AnimationHeader::AnimationHeader() { Bundle::Init(this); }
+Status AnimationHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(100), Val(1000), BitsOffset(10, 1),
+                                         BitsOffset(30, 1), 1, &tps_numerator));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Val(1001), BitsOffset(8, 1),
+                                         BitsOffset(10, 1), 1,
+                                         &tps_denominator));
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(0), Bits(3), Bits(16), Bits(32), 0, &num_loops));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_timecodes));
+  return true;
+}
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+                      SizeHeader* JXL_RESTRICT size) {
+  return Bundle::Read(reader, size);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/headers.h b/third-party/libjxl/libjxl/lib/jxl/headers.h
new file mode 100644
index 0000000000..3cce84dabc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/headers.h
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HEADERS_H_
+#define LIB_JXL_HEADERS_H_
+
+// Codestream headers.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// Reserved by ISO/IEC 10918-1. LF causes files opened in text mode to be
+// rejected because the marker changes to 0x0D instead. The 0xFF prefix also
+// ensures there were no 7-bit transmission limitations.
+static constexpr uint8_t kCodestreamMarker = 0x0A;
+
+// Compact representation of image dimensions (best case: 9 bits) so decoders
+// can preallocate early.
+class SizeHeader : public Fields {
+ public:
+  SizeHeader();
+  JXL_FIELDS_NAME(SizeHeader)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  Status Set(size_t xsize, size_t ysize);
+
+  size_t xsize() const;
+  size_t ysize() const {
+    return small_ ? ((ysize_div8_minus_1_ + 1) * 8) : ysize_;
+  }
+
+ private:
+  bool small_;  // xsize and ysize <= 256 and divisible by 8.
+
+  uint32_t ysize_div8_minus_1_;
+  uint32_t ysize_;
+
+  uint32_t ratio_;
+  uint32_t xsize_div8_minus_1_;
+  uint32_t xsize_;
+};
+
+// (Similar to SizeHeader but different encoding because previews are smaller)
+class PreviewHeader : public Fields {
+ public:
+  PreviewHeader();
+  JXL_FIELDS_NAME(PreviewHeader)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  Status Set(size_t xsize, size_t ysize);
+
+  size_t xsize() const;
+  size_t ysize() const { return div8_ ? (ysize_div8_ * 8) : ysize_; }
+
+ private:
+  bool div8_;  // xsize and ysize divisible by 8.
+
+  uint32_t ysize_div8_;
+  uint32_t ysize_;
+
+  uint32_t ratio_;
+  uint32_t xsize_div8_;
+  uint32_t xsize_;
+};
+
+struct AnimationHeader : public Fields {
+  AnimationHeader();
+  JXL_FIELDS_NAME(AnimationHeader)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Ticks per second (expressed as rational number to support NTSC)
+  uint32_t tps_numerator;
+  uint32_t tps_denominator;
+
+  uint32_t num_loops;  // 0 means to repeat infinitely.
+
+  bool have_timecodes;
+};
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+                      SizeHeader* JXL_RESTRICT size);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HEADERS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc b/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc
new file mode 100644
index 0000000000..9ae7865af6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc
@@ -0,0 +1,161 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/huffman_table.h"
+
+#include <cstring> /* for memcpy */
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/dec_huffman.h"
+
+namespace jxl {
+
+/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+   bit-wise reversal of the len least significant bits of key. */
+static inline int GetNextKey(int key, int len) {
+  int step = 1u << (len - 1);
+  while (key & step) {
+    step >>= 1;
+  }
+  return (key & (step - 1)) + step;
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static inline void ReplicateValue(HuffmanCode* table, int step, int end,
+                                  HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. count is the histogram
+   of bit lengths for the remaining symbols, len is the code length of the next
+   processed symbol */
+static inline size_t NextTableBitSize(const uint16_t* const count, size_t len,
+                                      int root_bits) {
+  size_t left = 1u << (len - root_bits);
+  while (len < PREFIX_MAX_BITS) {
+    if (left <= count[len]) break;
+    left -= count[len];
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+                           const uint8_t* const code_lengths,
+                           size_t code_lengths_size, uint16_t* count) {
+  HuffmanCode code;   /* current table entry */
+  HuffmanCode* table; /* next available space in table */
+  size_t len;         /* current code length */
+  size_t symbol;      /* symbol index in original or sorted table */
+  int key;            /* reversed prefix code */
+  int step;           /* step size to replicate values in current table */
+  int low;            /* low bits for current root entry */
+  int mask;           /* mask for low bits */
+  size_t table_bits;  /* key length of current table */
+  int table_size;     /* size of current table */
+  int total_size;     /* sum of root table size and 2nd level table sizes */
+  /* offsets in sorted table for each length */
+  uint16_t offset[PREFIX_MAX_BITS + 1];
+  size_t max_length = 1;
+
+  if (code_lengths_size > 1u << PREFIX_MAX_BITS) return 0;
+
+  /* symbols sorted by code length */
+  std::vector<uint16_t> sorted_storage(code_lengths_size);
+  uint16_t* sorted = sorted_storage.data();
+
+  /* generate offsets into sorted symbol table by code length */
+  {
+    uint16_t sum = 0;
+    for (len = 1; len <= PREFIX_MAX_BITS; len++) {
+      offset[len] = sum;
+      if (count[len]) {
+        sum = static_cast<uint16_t>(sum + count[len]);
+        max_length = len;
+      }
+    }
+  }
+
+  /* sort symbols by length, by symbol order within each length */
+  for (symbol = 0; symbol < code_lengths_size; symbol++) {
+    if (code_lengths[symbol] != 0) {
+      sorted[offset[code_lengths[symbol]]++] = symbol;
+    }
+  }
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1u << table_bits;
+  total_size = table_size;
+
+  /* special case code with only one value */
+  if (offset[PREFIX_MAX_BITS] == 1) {
+    code.bits = 0;
+    code.value = static_cast<uint16_t>(sorted[0]);
+    for (key = 0; key < total_size; ++key) {
+      table[key] = code;
+    }
+    return total_size;
+  }
+
+  /* fill in root table */
+  /* let's reduce the table size to a smaller size if possible, and */
+  /* create the repetitions by memcpy if possible in the coming loop */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1u << table_bits;
+  }
+  key = 0;
+  symbol = 0;
+  code.bits = 1;
+  step = 2;
+  do {
+    for (; count[code.bits] != 0; --count[code.bits]) {
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&table[key], step, table_size, code);
+      key = GetNextKey(key, code.bits);
+    }
+    step <<= 1;
+  } while (++code.bits <= table_bits);
+
+  /* if root_bits != table_bits we only created one fraction of the */
+  /* table, and we need to replicate it now. */
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0], table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  /* fill in 2nd level tables and add pointers to root table */
+  mask = total_size - 1;
+  low = -1;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
+    for (; count[len] != 0; --count[len]) {
+      if ((key & mask) != low) {
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1u << table_bits;
+        total_size += table_size;
+        low = key & mask;
+        root_table[low].bits = static_cast<uint8_t>(table_bits + root_bits);
+        root_table[low].value =
+            static_cast<uint16_t>((table - root_table) - low);
+      }
+      code.bits = static_cast<uint8_t>(len - root_bits);
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&table[key >> root_bits], step, table_size, code);
+      key = GetNextKey(key, len);
+    }
+  }
+
+  return total_size;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/huffman_table.h b/third-party/libjxl/libjxl/lib/jxl/huffman_table.h
new file mode 100644
index 0000000000..11cdb2fc45
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/huffman_table.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HUFFMAN_TABLE_H_
+#define LIB_JXL_HUFFMAN_TABLE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+struct HuffmanCode {
+  uint8_t bits;   /* number of bits used for this symbol */
+  uint16_t value; /* symbol value or table offset */
+};
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+/* Returns 0 in case of error (invalid tree or memory error), otherwise
+   populated size of table. */
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+                           const uint8_t* code_lengths,
+                           size_t code_lengths_size, uint16_t* count);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HUFFMAN_TABLE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc b/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc
new file mode 100644
index 0000000000..e25d9316d5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/iaca.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(IacaTest, MarkersDefaultToDisabledAndDoNotCrash) {
+  BeginIACA();
+  EndIACA();
+}
+
+TEST(IacaTest, ScopeDefaultToDisabledAndDoNotCrash) { ScopeIACA iaca; }
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc
new file mode 100644
index 0000000000..f367461c0f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc
@@ -0,0 +1,389 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+// Shuffles or interleaves bytes, for example with width 2, turns "ABCDabcd"
+// into "AaBbCcDc". Transposes a matrix of ceil(size / width) columns and
+// width rows. There are size elements, size may be < width * height, if so the
+// last elements of the rightmost column are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the end of the bottom row. The input is the input matrix in
+// scanline order but with missing elements skipped (which may occur in multiple
+// locations), the output is the result matrix in scanline order (with
+// no need to skip missing elements as they are past the end of the data).
+void Shuffle(uint8_t* data, size_t size, size_t width) {
+  size_t height = (size + width - 1) / width;  // amount of rows of output
+  PaddedBytes result(size);
+  // i = output index, j input index
+  size_t s = 0, j = 0;
+  for (size_t i = 0; i < size; i++) {
+    result[i] = data[j];
+    j += height;
+    if (j >= size) j = ++s;
+  }
+
+  for (size_t i = 0; i < size; i++) {
+    data[i] = result[i];
+  }
+}
+
+// TODO(eustas): should be 20, or even 18, once DecodeVarInt is improved;
+//               currently DecodeVarInt does not signal the errors, and marks
+//               11 bytes as used even if only 10 are used (and 9 is enough for
+//               63-bit values).
+constexpr const size_t kPreambleSize = 22;  // enough for reading 2 VarInts
+
+}  // namespace
+
+// Mimics the beginning of UnpredictICC for quick validity check.
+// At least kPreambleSize bytes of data should be valid at invocation time.
+Status CheckPreamble(const PaddedBytes& data, size_t enc_size,
+                     size_t output_limit) {
+  const uint8_t* enc = data.data();
+  size_t size = data.size();
+  size_t pos = 0;
+  uint64_t osize = DecodeVarInt(enc, size, &pos);
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t csize = DecodeVarInt(enc, size, &pos);
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+  // We expect that UnpredictICC inflates input, not the other way round.
+  if (osize + 65536 < enc_size) return JXL_FAILURE("Malformed ICC");
+  if (output_limit && osize > output_limit) {
+    return JXL_FAILURE("Decoded ICC is too large");
+  }
+  return true;
+}
+
+// Decodes the result of PredictICC back to a valid ICC profile.
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result) {
+  if (!result->empty()) return JXL_FAILURE("result must be empty initially");
+  size_t pos = 0;
+  // TODO(lode): technically speaking we need to check that the entire varint
+  // decoding never goes out of bounds, not just the first byte. This requires
+  // a DecodeVarInt function that returns an error code. It is safe to use
+  // DecodeVarInt with out of bounds values, it silently returns, but the
+  // specification requires an error. Idem for all DecodeVarInt below.
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t osize = DecodeVarInt(enc, size, &pos);  // Output size
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t csize = DecodeVarInt(enc, size, &pos);  // Commands size
+  // Every command is translated to at least on byte.
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+  size_t cpos = pos;  // pos in commands stream
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+  size_t commands_end = cpos + csize;
+  pos = commands_end;  // pos in data stream
+
+  // Header
+  PaddedBytes header = ICCInitialHeaderPrediction();
+  EncodeUint32(0, osize, &header);
+  for (size_t i = 0; i <= kICCHeaderSize; i++) {
+    if (result->size() == osize) {
+      if (cpos != commands_end) return JXL_FAILURE("Not all commands used");
+      if (pos != size) return JXL_FAILURE("Not all data used");
+      return true;  // Valid end
+    }
+    if (i == kICCHeaderSize) break;  // Done
+    ICCPredictHeader(result->data(), result->size(), header.data(), i);
+    if (pos >= size) return JXL_FAILURE("Out of bounds");
+    result->push_back(enc[pos++] + header[i]);
+  }
+  if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+
+  // Tag list
+  uint64_t numtags = DecodeVarInt(enc, size, &cpos);
+
+  if (numtags != 0) {
+    numtags--;
+    JXL_RETURN_IF_ERROR(CheckIs32Bit(numtags));
+    AppendUint32(numtags, result);
+    uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+    uint64_t prevtagsize = 0;
+    for (;;) {
+      if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+      if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+      if (cpos == commands_end) break;  // Valid end
+      uint8_t command = enc[cpos++];
+      uint8_t tagcode = command & 63;
+      Tag tag;
+      if (tagcode == 0) {
+        break;
+      } else if (tagcode == kCommandTagUnknown) {
+        JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 4, size));
+        tag = DecodeKeyword(enc, size, pos);
+        pos += 4;
+      } else if (tagcode == kCommandTagTRC) {
+        tag = kRtrcTag;
+      } else if (tagcode == kCommandTagXYZ) {
+        tag = kRxyzTag;
+      } else {
+        if (tagcode - kCommandTagStringFirst >= kNumTagStrings) {
+          return JXL_FAILURE("Unknown tagcode");
+        }
+        tag = *kTagStrings[tagcode - kCommandTagStringFirst];
+      }
+      AppendKeyword(tag, result);
+
+      uint64_t tagstart;
+      uint64_t tagsize = prevtagsize;
+      if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+          tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+          tag == kLumiTag) {
+        tagsize = 20;
+      }
+
+      if (command & kFlagBitOffset) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        tagstart = DecodeVarInt(enc, size, &cpos);
+      } else {
+        JXL_RETURN_IF_ERROR(CheckIs32Bit(prevtagstart));
+        tagstart = prevtagstart + prevtagsize;
+      }
+      JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart));
+      AppendUint32(tagstart, result);
+      if (command & kFlagBitSize) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        tagsize = DecodeVarInt(enc, size, &cpos);
+      }
+      JXL_RETURN_IF_ERROR(CheckIs32Bit(tagsize));
+      AppendUint32(tagsize, result);
+      prevtagstart = tagstart;
+      prevtagsize = tagsize;
+
+      if (tagcode == kCommandTagTRC) {
+        AppendKeyword(kGtrcTag, result);
+        AppendUint32(tagstart, result);
+        AppendUint32(tagsize, result);
+        AppendKeyword(kBtrcTag, result);
+        AppendUint32(tagstart, result);
+        AppendUint32(tagsize, result);
+      }
+
+      if (tagcode == kCommandTagXYZ) {
+        JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart + tagsize * 2));
+        AppendKeyword(kGxyzTag, result);
+        AppendUint32(tagstart + tagsize, result);
+        AppendUint32(tagsize, result);
+        AppendKeyword(kBxyzTag, result);
+        AppendUint32(tagstart + tagsize * 2, result);
+        AppendUint32(tagsize, result);
+      }
+    }
+  }
+
+  // Main Content
+  for (;;) {
+    if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+    if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+    if (cpos == commands_end) break;  // Valid end
+    uint8_t command = enc[cpos++];
+    if (command == kCommandInsert) {
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+      for (size_t i = 0; i < num; i++) {
+        result->push_back(enc[pos++]);
+      }
+    } else if (command == kCommandShuffle2 || command == kCommandShuffle4) {
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+      PaddedBytes shuffled(num);
+      for (size_t i = 0; i < num; i++) {
+        shuffled[i] = enc[pos + i];
+      }
+      if (command == kCommandShuffle2) {
+        Shuffle(shuffled.data(), num, 2);
+      } else if (command == kCommandShuffle4) {
+        Shuffle(shuffled.data(), num, 4);
+      }
+      for (size_t i = 0; i < num; i++) {
+        result->push_back(shuffled[i]);
+        pos++;
+      }
+    } else if (command == kCommandPredict) {
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(cpos, 2, commands_end));
+      uint8_t flags = enc[cpos++];
+
+      size_t width = (flags & 3) + 1;
+      if (width == 3) return JXL_FAILURE("Invalid width");
+
+      int order = (flags & 12) >> 2;
+      if (order == 3) return JXL_FAILURE("Invalid order");
+
+      uint64_t stride = width;
+      if (flags & 16) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        stride = DecodeVarInt(enc, size, &cpos);
+        if (stride < width) {
+          return JXL_FAILURE("Invalid stride");
+        }
+      }
+      // If stride * 4 >= result->size(), return failure. The check
+      // "size == 0 || ((size - 1) >> 2) < stride" corresponds to
+      // "stride * 4 >= size", but does not suffer from integer overflow.
+      // This check is more strict than necessary but follows the specification
+      // and the encoder should ensure this is followed.
+      if (result->empty() || ((result->size() - 1u) >> 2u) < stride) {
+        return JXL_FAILURE("Invalid stride");
+      }
+
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);  // in bytes
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+
+      PaddedBytes shuffled(num);
+      for (size_t i = 0; i < num; i++) {
+        shuffled[i] = enc[pos + i];
+      }
+      if (width > 1) Shuffle(shuffled.data(), num, width);
+
+      size_t start = result->size();
+      for (size_t i = 0; i < num; i++) {
+        uint8_t predicted = LinearPredictICCValue(result->data(), start, i,
+                                                  stride, width, order);
+        result->push_back(predicted + shuffled[i]);
+      }
+      pos += num;
+    } else if (command == kCommandXYZ) {
+      AppendKeyword(kXyz_Tag, result);
+      for (int i = 0; i < 4; i++) result->push_back(0);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 12, size));
+      for (size_t i = 0; i < 12; i++) {
+        result->push_back(enc[pos++]);
+      }
+    } else if (command >= kCommandTypeStartFirst &&
+               command < kCommandTypeStartFirst + kNumTypeStrings) {
+      AppendKeyword(*kTypeStrings[command - kCommandTypeStartFirst], result);
+      for (size_t i = 0; i < 4; i++) {
+        result->push_back(0);
+      }
+    } else {
+      return JXL_FAILURE("Unknown command");
+    }
+  }
+
+  if (pos != size) return JXL_FAILURE("Not all data used");
+  if (result->size() != osize) return JXL_FAILURE("Invalid result size");
+
+  return true;
+}
+
+Status ICCReader::Init(BitReader* reader, size_t output_limit) {
+  JXL_RETURN_IF_ERROR(CheckEOI(reader));
+  used_bits_base_ = reader->TotalBitsConsumed();
+  if (bits_to_skip_ == 0) {
+    enc_size_ = U64Coder::Read(reader);
+    if (enc_size_ > 268435456) {
+      // Avoid too large memory allocation for invalid file.
+      return JXL_FAILURE("Too large encoded profile");
+    }
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(reader, kNumICCContexts, &code_, &context_map_));
+    ans_reader_ = ANSSymbolReader(&code_, reader);
+    i_ = 0;
+    decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+    for (; i_ < std::min<size_t>(2, enc_size_); i_++) {
+      decompressed_[i_] = ans_reader_.ReadHybridUint(
+          ICCANSContext(i_, i_ > 0 ? decompressed_[i_ - 1] : 0,
+                        i_ > 1 ? decompressed_[i_ - 2] : 0),
+          reader, context_map_);
+    }
+    if (enc_size_ > kPreambleSize) {
+      for (; i_ < kPreambleSize; i_++) {
+        decompressed_[i_] = ans_reader_.ReadHybridUint(
+            ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]),
+            reader, context_map_);
+      }
+      JXL_RETURN_IF_ERROR(CheckEOI(reader));
+      JXL_RETURN_IF_ERROR(
+          CheckPreamble(decompressed_, enc_size_, output_limit));
+    }
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  } else {
+    reader->SkipBits(bits_to_skip_);
+  }
+  return true;
+}
+
+Status ICCReader::Process(BitReader* reader, PaddedBytes* icc) {
+  ANSSymbolReader::Checkpoint checkpoint;
+  size_t saved_i = 0;
+  auto save = [&]() {
+    ans_reader_.Save(&checkpoint);
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+    saved_i = i_;
+  };
+  save();
+  auto check_and_restore = [&]() {
+    Status status = CheckEOI(reader);
+    if (!status) {
+      // not enough bytes.
+      ans_reader_.Restore(checkpoint);
+      i_ = saved_i;
+      return status;
+    }
+    return Status(true);
+  };
+  for (; i_ < enc_size_; i_++) {
+    if (i_ % ANSSymbolReader::kMaxCheckpointInterval == 0 && i_ > 0) {
+      JXL_RETURN_IF_ERROR(check_and_restore());
+      save();
+      if ((i_ > 0) && (((i_ & 0xFFFF) == 0))) {
+        float used_bytes =
+            (reader->TotalBitsConsumed() - used_bits_base_) / 8.0f;
+        if (i_ > used_bytes * 256) return JXL_FAILURE("Corrupted stream");
+      }
+      decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+    }
+    JXL_DASSERT(i_ >= 2);
+    decompressed_[i_] = ans_reader_.ReadHybridUint(
+        ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), reader,
+        context_map_);
+  }
+  JXL_RETURN_IF_ERROR(check_and_restore());
+  bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  if (!ans_reader_.CheckANSFinalState()) {
+    return JXL_FAILURE("Corrupted ICC profile");
+  }
+
+  icc->clear();
+  return UnpredictICC(decompressed_.data(), decompressed_.size(), icc);
+}
+
+Status ICCReader::CheckEOI(BitReader* reader) {
+  if (reader->AllReadsWithinBounds()) return true;
+  return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                    "Not enough bytes for reading ICC profile");
+}
+
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+               size_t output_limit) {
+  ICCReader icc_reader;
+  JXL_RETURN_IF_ERROR(icc_reader.Init(reader, output_limit));
+  JXL_RETURN_IF_ERROR(icc_reader.Process(reader, icc));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec.h b/third-party/libjxl/libjxl/lib/jxl/icc_codec.h
new file mode 100644
index 0000000000..a6c7477c60
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_H_
+#define LIB_JXL_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+struct ICCReader {
+  Status Init(BitReader* reader, size_t output_limit);
+  Status Process(BitReader* reader, PaddedBytes* icc);
+  void Reset() {
+    bits_to_skip_ = 0;
+    decompressed_.clear();
+  }
+
+ private:
+  Status CheckEOI(BitReader* reader);
+  size_t i_ = 0;
+  size_t bits_to_skip_ = 0;
+  size_t used_bits_base_ = 0;
+  uint64_t enc_size_ = 0;
+  std::vector<uint8_t> context_map_;
+  ANSCode code_;
+  ANSSymbolReader ans_reader_;
+  PaddedBytes decompressed_;
+};
+
+// `icc` may be empty afterwards - if so, call CreateProfile. Does not append,
+// clears any original data that was in icc.
+// If `output_limit` is not 0, then returns error if resulting profile would be
+// longer than `output_limit`
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+               size_t output_limit = 0);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+// Exposed only for testing
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ICC_CODEC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc
new file mode 100644
index 0000000000..212387e78f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc
@@ -0,0 +1,190 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec_common.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+static uint8_t ByteKind1(uint8_t b) {
+  if ('a' <= b && b <= 'z') return 0;
+  if ('A' <= b && b <= 'Z') return 0;
+  if ('0' <= b && b <= '9') return 1;
+  if (b == '.' || b == ',') return 1;
+  if (b == 0) return 2;
+  if (b == 1) return 3;
+  if (b < 16) return 4;
+  if (b == 255) return 6;
+  if (b > 240) return 5;
+  return 7;
+}
+
+static uint8_t ByteKind2(uint8_t b) {
+  if ('a' <= b && b <= 'z') return 0;
+  if ('A' <= b && b <= 'Z') return 0;
+  if ('0' <= b && b <= '9') return 1;
+  if (b == '.' || b == ',') return 1;
+  if (b < 16) return 2;
+  if (b > 240) return 3;
+  return 4;
+}
+
+template <typename T>
+T PredictValue(T p1, T p2, T p3, int order) {
+  if (order == 0) return p1;
+  if (order == 1) return 2 * p1 - p2;
+  if (order == 2) return 3 * p1 - 3 * p2 + p3;
+  return 0;
+}
+}  // namespace
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos) {
+  return pos + 4 > size ? 0 : LoadBE32(data + pos);
+}
+
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data) {
+  if (pos + 4 > data->size()) return;
+  StoreBE32(value, data->data() + pos);
+}
+
+void AppendUint32(uint32_t value, PaddedBytes* data) {
+  data->resize(data->size() + 4);
+  EncodeUint32(data->size() - 4, value, data);
+}
+
+typedef std::array<uint8_t, 4> Tag;
+
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos) {
+  if (pos + 4 > size) return {{' ', ' ', ' ', ' '}};
+  return {{data[pos], data[pos + 1], data[pos + 2], data[pos + 3]}};
+}
+
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos) {
+  if (keyword.size() != 4 || pos + 3 >= size) return;
+  for (size_t i = 0; i < 4; ++i) data[pos + i] = keyword[i];
+}
+
+void AppendKeyword(const Tag& keyword, PaddedBytes* data) {
+  JXL_ASSERT(keyword.size() == 4);
+  data->append(keyword);
+}
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return JXL_FAILURE("Out of bounds");
+  if (pos < a) return JXL_FAILURE("Out of bounds");  // overflow happened
+  return true;
+}
+
+Status CheckIs32Bit(uint64_t v) {
+  static constexpr const uint64_t kUpper32 = ~static_cast<uint64_t>(0xFFFFFFFF);
+  if ((v & kUpper32) != 0) return JXL_FAILURE("32-bit value expected");
+  return true;
+}
+
+PaddedBytes ICCInitialHeaderPrediction() {
+  PaddedBytes result(kICCHeaderSize);
+  for (size_t i = 0; i < kICCHeaderSize; i++) {
+    result[i] = 0;
+  }
+  result[8] = 4;
+  EncodeKeyword(kMntrTag, result.data(), result.size(), 12);
+  EncodeKeyword(kRgb_Tag, result.data(), result.size(), 16);
+  EncodeKeyword(kXyz_Tag, result.data(), result.size(), 20);
+  EncodeKeyword(kAcspTag, result.data(), result.size(), 36);
+  result[68] = 0;
+  result[69] = 0;
+  result[70] = 246;
+  result[71] = 214;
+  result[72] = 0;
+  result[73] = 1;
+  result[74] = 0;
+  result[75] = 0;
+  result[76] = 0;
+  result[77] = 0;
+  result[78] = 211;
+  result[79] = 45;
+  return result;
+}
+
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+                      size_t pos) {
+  if (pos == 8 && size >= 8) {
+    header[80] = icc[4];
+    header[81] = icc[5];
+    header[82] = icc[6];
+    header[83] = icc[7];
+  }
+  if (pos == 41 && size >= 41) {
+    if (icc[40] == 'A') {
+      header[41] = 'P';
+      header[42] = 'P';
+      header[43] = 'L';
+    }
+    if (icc[40] == 'M') {
+      header[41] = 'S';
+      header[42] = 'F';
+      header[43] = 'T';
+    }
+  }
+  if (pos == 42 && size >= 42) {
+    if (icc[40] == 'S' && icc[41] == 'G') {
+      header[42] = 'I';
+      header[43] = ' ';
+    }
+    if (icc[40] == 'S' && icc[41] == 'U') {
+      header[42] = 'N';
+      header[43] = 'W';
+    }
+  }
+}
+
+// Predicts a value with linear prediction of given order (0-2), for integers
+// with width bytes and given stride in bytes between values.
+// The start position is at start + i, and the relevant modulus of i describes
+// which byte of the multi-byte integer is being handled.
+// The value start + i must be at least stride * 4.
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+                              size_t stride, size_t width, int order) {
+  size_t pos = start + i;
+  if (width == 1) {
+    uint8_t p1 = data[pos - stride];
+    uint8_t p2 = data[pos - stride * 2];
+    uint8_t p3 = data[pos - stride * 3];
+    return PredictValue(p1, p2, p3, order);
+  } else if (width == 2) {
+    size_t p = start + (i & ~1);
+    uint16_t p1 = (data[p - stride * 1] << 8) + data[p - stride * 1 + 1];
+    uint16_t p2 = (data[p - stride * 2] << 8) + data[p - stride * 2 + 1];
+    uint16_t p3 = (data[p - stride * 3] << 8) + data[p - stride * 3 + 1];
+    uint16_t pred = PredictValue(p1, p2, p3, order);
+    return (i & 1) ? (pred & 255) : ((pred >> 8) & 255);
+  } else {
+    size_t p = start + (i & ~3);
+    uint32_t p1 = DecodeUint32(data, pos, p - stride);
+    uint32_t p2 = DecodeUint32(data, pos, p - stride * 2);
+    uint32_t p3 = DecodeUint32(data, pos, p - stride * 3);
+    uint32_t pred = PredictValue(p1, p2, p3, order);
+    unsigned shiftbytes = 3 - (i & 3);
+    return (pred >> (shiftbytes * 8)) & 255;
+  }
+}
+
+size_t ICCANSContext(size_t i, size_t b1, size_t b2) {
+  if (i <= 128) return 0;
+  return 1 + ByteKind1(b1) + ByteKind2(b2) * 8;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h
new file mode 100644
index 0000000000..e91e908669
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_COMMON_H_
+#define LIB_JXL_ICC_CODEC_COMMON_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+static constexpr size_t kICCHeaderSize = 128;
+
+typedef std::array<uint8_t, 4> Tag;
+
+static const Tag kAcspTag = {{'a', 'c', 's', 'p'}};
+static const Tag kBkptTag = {{'b', 'k', 'p', 't'}};
+static const Tag kBtrcTag = {{'b', 'T', 'R', 'C'}};
+static const Tag kBxyzTag = {{'b', 'X', 'Y', 'Z'}};
+static const Tag kChadTag = {{'c', 'h', 'a', 'd'}};
+static const Tag kChrmTag = {{'c', 'h', 'r', 'm'}};
+static const Tag kCprtTag = {{'c', 'p', 'r', 't'}};
+static const Tag kCurvTag = {{'c', 'u', 'r', 'v'}};
+static const Tag kDescTag = {{'d', 'e', 's', 'c'}};
+static const Tag kDmddTag = {{'d', 'm', 'd', 'd'}};
+static const Tag kDmndTag = {{'d', 'm', 'n', 'd'}};
+static const Tag kGbd_Tag = {{'g', 'b', 'd', ' '}};
+static const Tag kGtrcTag = {{'g', 'T', 'R', 'C'}};
+static const Tag kGxyzTag = {{'g', 'X', 'Y', 'Z'}};
+static const Tag kKtrcTag = {{'k', 'T', 'R', 'C'}};
+static const Tag kKxyzTag = {{'k', 'X', 'Y', 'Z'}};
+static const Tag kLumiTag = {{'l', 'u', 'm', 'i'}};
+static const Tag kMab_Tag = {{'m', 'A', 'B', ' '}};
+static const Tag kMba_Tag = {{'m', 'B', 'A', ' '}};
+static const Tag kMlucTag = {{'m', 'l', 'u', 'c'}};
+static const Tag kMntrTag = {{'m', 'n', 't', 'r'}};
+static const Tag kParaTag = {{'p', 'a', 'r', 'a'}};
+static const Tag kRgb_Tag = {{'R', 'G', 'B', ' '}};
+static const Tag kRtrcTag = {{'r', 'T', 'R', 'C'}};
+static const Tag kRxyzTag = {{'r', 'X', 'Y', 'Z'}};
+static const Tag kSf32Tag = {{'s', 'f', '3', '2'}};
+static const Tag kTextTag = {{'t', 'e', 'x', 't'}};
+static const Tag kVcgtTag = {{'v', 'c', 'g', 't'}};
+static const Tag kWtptTag = {{'w', 't', 'p', 't'}};
+static const Tag kXyz_Tag = {{'X', 'Y', 'Z', ' '}};
+
+// Tag names focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTagStrings = 17;
+static constexpr const Tag* kTagStrings[kNumTagStrings] = {
+    &kCprtTag, &kWtptTag, &kBkptTag, &kRxyzTag, &kGxyzTag, &kBxyzTag,
+    &kKxyzTag, &kRtrcTag, &kGtrcTag, &kBtrcTag, &kKtrcTag, &kChadTag,
+    &kDescTag, &kChrmTag, &kDmndTag, &kDmddTag, &kLumiTag};
+
+static constexpr size_t kCommandTagUnknown = 1;
+static constexpr size_t kCommandTagTRC = 2;
+static constexpr size_t kCommandTagXYZ = 3;
+static constexpr size_t kCommandTagStringFirst = 4;
+
+// Tag types focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTypeStrings = 8;
+static constexpr const Tag* kTypeStrings[kNumTypeStrings] = {
+    &kXyz_Tag, &kDescTag, &kTextTag, &kMlucTag,
+    &kParaTag, &kCurvTag, &kSf32Tag, &kGbd_Tag};
+
+static constexpr size_t kCommandInsert = 1;
+static constexpr size_t kCommandShuffle2 = 2;
+static constexpr size_t kCommandShuffle4 = 3;
+static constexpr size_t kCommandPredict = 4;
+static constexpr size_t kCommandXYZ = 10;
+static constexpr size_t kCommandTypeStartFirst = 16;
+
+static constexpr size_t kFlagBitOffset = 64;
+static constexpr size_t kFlagBitSize = 128;
+
+static constexpr size_t kNumICCContexts = 41;
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos);
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data);
+void AppendUint32(uint32_t value, PaddedBytes* data);
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos);
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos);
+void AppendKeyword(const Tag& keyword, PaddedBytes* data);
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size);
+Status CheckIs32Bit(uint64_t v);
+
+PaddedBytes ICCInitialHeaderPrediction();
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+                      size_t pos);
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+                              size_t stride, size_t width, int order);
+size_t ICCANSContext(size_t i, size_t b1, size_t b2);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ICC_CODEC_COMMON_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc
new file mode 100644
index 0000000000..af02094e99
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <string>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void TestProfile(const PaddedBytes& icc) {
+  BitWriter writer;
+  ASSERT_TRUE(WriteICC(icc, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  PaddedBytes dec;
+  BitReader reader(writer.GetSpan());
+  ASSERT_TRUE(ReadICC(&reader, &dec));
+  ASSERT_TRUE(reader.Close());
+  EXPECT_EQ(icc.size(), dec.size());
+  if (icc.size() == dec.size()) {
+    for (size_t i = 0; i < icc.size(); i++) {
+      EXPECT_EQ(icc[i], dec[i]);
+      if (icc[i] != dec[i]) break;  // One output is enough
+    }
+  }
+}
+
+void TestProfile(const std::string& icc) {
+  PaddedBytes bytes(icc.size());
+  for (size_t i = 0; i < icc.size(); i++) {
+    bytes[i] = icc[i];
+  }
+  TestProfile(bytes);
+}
+
+// Valid profile from one of the images output by the decoder.
+static const unsigned char kTestProfile[] = {
+    0x00, 0x00, 0x03, 0x80, 0x6c, 0x63, 0x6d, 0x73, 0x04, 0x30, 0x00, 0x00,
+    0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+    0x07, 0xe3, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x0f, 0x00, 0x32, 0x00, 0x2e,
+    0x61, 0x63, 0x73, 0x70, 0x41, 0x50, 0x50, 0x4c, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xf6, 0xd6,
+    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x6c, 0x63, 0x6d, 0x73,
+    0x5f, 0x07, 0x0d, 0x3e, 0x4d, 0x32, 0xf2, 0x6e, 0x5d, 0x77, 0x26, 0xcc,
+    0x23, 0xb0, 0x6a, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d,
+    0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x42,
+    0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x64, 0x00, 0x00, 0x01, 0x00,
+    0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x02, 0x64, 0x00, 0x00, 0x00, 0x14,
+    0x63, 0x68, 0x61, 0x64, 0x00, 0x00, 0x02, 0x78, 0x00, 0x00, 0x00, 0x2c,
+    0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xa4, 0x00, 0x00, 0x00, 0x14,
+    0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xb8, 0x00, 0x00, 0x00, 0x14,
+    0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xcc, 0x00, 0x00, 0x00, 0x14,
+    0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x24,
+    0x64, 0x6d, 0x6e, 0x64, 0x00, 0x00, 0x03, 0x24, 0x00, 0x00, 0x00, 0x28,
+    0x64, 0x6d, 0x64, 0x64, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00, 0x32,
+    0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x26,
+    0x00, 0x00, 0x00, 0x1c, 0x00, 0x52, 0x00, 0x47, 0x00, 0x42, 0x00, 0x5f,
+    0x00, 0x44, 0x00, 0x36, 0x00, 0x35, 0x00, 0x5f, 0x00, 0x53, 0x00, 0x52,
+    0x00, 0x47, 0x00, 0x5f, 0x00, 0x52, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x5f,
+    0x00, 0x37, 0x00, 0x30, 0x00, 0x39, 0x00, 0x00, 0x6d, 0x6c, 0x75, 0x63,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c,
+    0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x1c,
+    0x00, 0x43, 0x00, 0x6f, 0x00, 0x70, 0x00, 0x79, 0x00, 0x72, 0x00, 0x69,
+    0x00, 0x67, 0x00, 0x68, 0x00, 0x74, 0x00, 0x20, 0x00, 0x32, 0x00, 0x30,
+    0x00, 0x31, 0x00, 0x38, 0x00, 0x20, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f,
+    0x00, 0x67, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x20, 0x00, 0x4c, 0x00, 0x4c,
+    0x00, 0x43, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x43, 0x00, 0x43, 0x00, 0x2d,
+    0x00, 0x42, 0x00, 0x59, 0x00, 0x2d, 0x00, 0x53, 0x00, 0x41, 0x00, 0x20,
+    0x00, 0x33, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e,
+    0x00, 0x70, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x74, 0x00, 0x65, 0x00, 0x64,
+    0x00, 0x20, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+    0x00, 0x73, 0x00, 0x65, 0x00, 0x28, 0x00, 0x68, 0x00, 0x74, 0x00, 0x74,
+    0x00, 0x70, 0x00, 0x73, 0x00, 0x3a, 0x00, 0x2f, 0x00, 0x2f, 0x00, 0x63,
+    0x00, 0x72, 0x00, 0x65, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x76,
+    0x00, 0x65, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x6f,
+    0x00, 0x6e, 0x00, 0x73, 0x00, 0x2e, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x67,
+    0x00, 0x2f, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+    0x00, 0x73, 0x00, 0x65, 0x00, 0x73, 0x00, 0x2f, 0x00, 0x62, 0x00, 0x79,
+    0x00, 0x2d, 0x00, 0x73, 0x00, 0x61, 0x00, 0x2f, 0x00, 0x33, 0x00, 0x2e,
+    0x00, 0x30, 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x67, 0x00, 0x61,
+    0x00, 0x6c, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x65, 0x00, 0x29,
+    0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x73, 0x66, 0x33, 0x32,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x42, 0x00, 0x00, 0x05, 0xde,
+    0xff, 0xff, 0xf3, 0x25, 0x00, 0x00, 0x07, 0x93, 0x00, 0x00, 0xfd, 0x90,
+    0xff, 0xff, 0xfb, 0xa1, 0xff, 0xff, 0xfd, 0xa2, 0x00, 0x00, 0x03, 0xdc,
+    0x00, 0x00, 0xc0, 0x6e, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x6f, 0xa0, 0x00, 0x00, 0x38, 0xf5, 0x00, 0x00, 0x03, 0x90,
+    0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x9f,
+    0x00, 0x00, 0x0f, 0x84, 0x00, 0x00, 0xb6, 0xc4, 0x58, 0x59, 0x5a, 0x20,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x97, 0x00, 0x00, 0xb7, 0x87,
+    0x00, 0x00, 0x18, 0xd9, 0x70, 0x61, 0x72, 0x61, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x38, 0xe4, 0x00, 0x00, 0xe8, 0xf0,
+    0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x38, 0xe4, 0x00, 0x00, 0x14, 0xbc,
+    0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+    0x00, 0x00, 0xa3, 0xd7, 0x00, 0x00, 0x54, 0x7c, 0x00, 0x00, 0x4c, 0xcd,
+    0x00, 0x00, 0x99, 0x9a, 0x00, 0x00, 0x26, 0x67, 0x00, 0x00, 0x0f, 0x5c,
+    0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x0c,
+    0x00, 0x00, 0x00, 0x1c, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x67,
+    0x00, 0x6c, 0x00, 0x65, 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53,
+    0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x49, 0x00, 0x6d,
+    0x00, 0x61, 0x00, 0x67, 0x00, 0x65, 0x00, 0x20, 0x00, 0x63, 0x00, 0x6f,
+    0x00, 0x64, 0x00, 0x65, 0x00, 0x63, 0x00, 0x00,
+};
+
+}  // namespace
+
+TEST(IccCodecTest, Icc) {
+  // Empty string cannot be tested, encoder checks against writing it.
+  TestProfile("a");
+  TestProfile("ab");
+  TestProfile("aaaa");
+
+  {
+    // Exactly the ICC header size
+    PaddedBytes profile(128);
+    for (size_t i = 0; i < 128; i++) {
+      profile[i] = 0;
+    }
+    TestProfile(profile);
+  }
+
+  {
+    PaddedBytes profile;
+    profile.append(kTestProfile, kTestProfile + sizeof(kTestProfile));
+    TestProfile(profile);
+  }
+
+  // Test substrings of full profile
+  {
+    PaddedBytes profile;
+    for (size_t i = 0; i <= 256; i++) {
+      profile.push_back(kTestProfile[i]);
+      TestProfile(profile);
+    }
+  }
+}
+
+// kTestProfile after encoding with the ICC codec
+static const unsigned char kEncodedTestProfile[] = {
+    0x1f, 0x8b, 0x1,  0x13, 0x10, 0x0,  0x0,  0x0,  0x20, 0x4c, 0xcc, 0x3,
+    0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26,
+    0x57, 0x54, 0xef, 0x0,  0xe8, 0x97, 0x2,  0xce, 0xa1, 0xd7, 0x85, 0x16,
+    0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c,
+    0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4,  0x75, 0x12, 0xc9, 0xcc,
+    0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae,
+    0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd,  0x40,
+    0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7,  0xa6, 0xb9,
+    0x27, 0x92, 0x38, 0x0,  0x3,  0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf,
+    0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66,
+    0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4,
+    0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd,
+    0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f,
+    0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93,
+    0x92, 0x3,  0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7,
+    0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7,  0x38, 0x38, 0xd4, 0xa,  0x66,
+    0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb,  0x7a, 0x24,
+    0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3,
+    0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8,  0xa8, 0xc4, 0x2a, 0x86,
+    0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6,
+    0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9,
+    0x8,  0x98, 0xe1, 0x21, 0x4a, 0x9,  0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0,
+    0x69, 0x1a, 0xeb, 0x52, 0x1,  0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29,
+    0x70, 0xee, 0x4,  0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff,
+    0xfe, 0xdb, 0xaf, 0x8,  0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8,
+    0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8,  0xe9, 0x7,  0xee, 0x4b, 0x80, 0xda,
+    0x4a, 0x4,  0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60,
+    0xb,  0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6,  0x20, 0xb8, 0x64, 0x18,
+    0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b,
+    0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9,  0xf7,
+    0x72, 0xf0, 0x7a, 0xe,  0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc,
+    0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28,
+    0xc1, 0xa7, 0x59, 0xe3, 0x0,
+};
+
+// Tests that the decoded kEncodedTestProfile matches kTestProfile.
+TEST(IccCodecTest, EncodedIccProfile) {
+  jxl::BitReader reader(jxl::Span<const uint8_t>(kEncodedTestProfile,
+                                                 sizeof(kEncodedTestProfile)));
+  jxl::PaddedBytes dec;
+  ASSERT_TRUE(ReadICC(&reader, &dec));
+  ASSERT_TRUE(reader.Close());
+  EXPECT_EQ(sizeof(kTestProfile), dec.size());
+  if (sizeof(kTestProfile) == dec.size()) {
+    for (size_t i = 0; i < dec.size(); i++) {
+      EXPECT_EQ(kTestProfile[i], dec[i]);
+      if (kTestProfile[i] != dec[i]) break;  // One output is enough
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/image.cc b/third-party/libjxl/libjxl/lib/jxl/image.cc
new file mode 100644
index 0000000000..088ff7699a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image.cc
@@ -0,0 +1,204 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image.h"
+
+#include <algorithm>  // swap
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(GetVectorSize);  // Local function.
+
+// Returns distance [bytes] between the start of two consecutive rows, a
+// multiple of vector/cache line size but NOT CacheAligned::kAlias - see below.
+size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+  const size_t vec_size = VectorSize();
+  size_t valid_bytes = xsize * sizeof_t;
+
+  // Allow unaligned accesses starting at the last valid value - this may raise
+  // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+  // Skip for the scalar case because no extra lanes will be loaded.
+  if (vec_size != 0) {
+    valid_bytes += vec_size - sizeof_t;
+  }
+
+  // Round up to vector and cache line size.
+  const size_t align = std::max(vec_size, CacheAligned::kAlignment);
+  size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+  // During the lengthy window before writes are committed to memory, CPUs
+  // guard against read after write hazards by checking the address, but
+  // only the lower 11 bits. We avoid a false dependency between writes to
+  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+  // Avoid2K prevents the same problem for the planes of an Image3.
+  if (bytes_per_row % CacheAligned::kAlias == 0) {
+    bytes_per_row += align;
+  }
+
+  JXL_ASSERT(bytes_per_row % align == 0);
+  return bytes_per_row;
+}
+
+}  // namespace
+
+size_t VectorSize() {
+  static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+  return bytes;
+}
+
+PlaneBase::PlaneBase(const size_t xsize, const size_t ysize,
+                     const size_t sizeof_t)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      orig_xsize_(static_cast<uint32_t>(xsize)),
+      orig_ysize_(static_cast<uint32_t>(ysize)) {
+  JXL_CHECK(xsize == xsize_);
+  JXL_CHECK(ysize == ysize_);
+
+  JXL_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+  bytes_per_row_ = 0;
+  // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+  // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+  if (xsize != 0 && ysize != 0) {
+    bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+    bytes_ = AllocateArray(bytes_per_row_ * ysize);
+    JXL_CHECK(bytes_.get());
+    InitializePadding(sizeof_t, Padding::kRoundUp);
+  }
+}
+
+void PlaneBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if defined(MEMORY_SANITIZER) || HWY_IDE
+  if (xsize_ == 0 || ysize_ == 0) return;
+
+  const size_t vec_size = VectorSize();
+  if (vec_size == 0) return;  // Scalar mode: no padding needed
+
+  const size_t valid_size = xsize_ * sizeof_t;
+  const size_t initialize_size = padding == Padding::kRoundUp
+                                     ? RoundUpTo(valid_size, vec_size)
+                                     : valid_size + vec_size - sizeof_t;
+  if (valid_size == initialize_size) return;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    uint8_t* JXL_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) &&                                           \
+    ((!defined(__apple_build_version__) && __clang_major__ <= 6) || \
+     (defined(__apple_build_version__) &&                           \
+      __apple_build_version__ <= 10001145))
+    // There's a bug in msan in clang-6 when handling AVX2 operations. This
+    // workaround allows tests to pass on msan, although it is slower and
+    // prevents msan warnings from uninitialized images.
+    std::fill(row, msan::kSanitizerSentinelByte, initialize_size);
+#else
+    memset(row + valid_size, msan::kSanitizerSentinelByte,
+           initialize_size - valid_size);
+#endif  // clang6
+  }
+#endif  // MEMORY_SANITIZER
+}
+
+void PlaneBase::Swap(PlaneBase& other) {
+  std::swap(xsize_, other.xsize_);
+  std::swap(ysize_, other.ysize_);
+  std::swap(orig_xsize_, other.orig_xsize_);
+  std::swap(orig_ysize_, other.orig_ysize_);
+  std::swap(bytes_per_row_, other.bytes_per_row_);
+  std::swap(bytes_, other.bytes_);
+}
+
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in,
+                                    size_t block_dim) {
+  const size_t xsize_orig = in->xsize();
+  const size_t ysize_orig = in->ysize();
+  const size_t xsize = RoundUpTo(xsize_orig, block_dim);
+  const size_t ysize = RoundUpTo(ysize_orig, block_dim);
+  // Expands image size to the originally-allocated size.
+  in->ShrinkTo(xsize, ysize);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ysize_orig; y++) {
+      float* JXL_RESTRICT row = in->PlaneRow(c, y);
+      for (size_t x = xsize_orig; x < xsize; x++) {
+        row[x] = row[xsize_orig - 1];
+      }
+    }
+    const float* JXL_RESTRICT row_src = in->ConstPlaneRow(c, ysize_orig - 1);
+    for (size_t y = ysize_orig; y < ysize; y++) {
+      memcpy(in->PlaneRow(c, y), row_src, xsize * sizeof(float));
+    }
+  }
+}
+
+static void DownsampleImage(const ImageF& input, size_t factor,
+                            ImageF* output) {
+  JXL_ASSERT(factor != 1);
+  output->ShrinkTo(DivCeil(input.xsize(), factor),
+                   DivCeil(input.ysize(), factor));
+  size_t in_stride = input.PixelsPerRow();
+  for (size_t y = 0; y < output->ysize(); y++) {
+    float* row_out = output->Row(y);
+    const float* row_in = input.Row(factor * y);
+    for (size_t x = 0; x < output->xsize(); x++) {
+      size_t cnt = 0;
+      float sum = 0;
+      for (size_t iy = 0; iy < factor && iy + factor * y < input.ysize();
+           iy++) {
+        for (size_t ix = 0; ix < factor && ix + factor * x < input.xsize();
+             ix++) {
+          sum += row_in[iy * in_stride + x * factor + ix];
+          cnt++;
+        }
+      }
+      row_out[x] = sum / cnt;
+    }
+  }
+}
+
+void DownsampleImage(ImageF* image, size_t factor) {
+  // Allocate extra space to avoid a reallocation when padding.
+  ImageF downsampled(DivCeil(image->xsize(), factor) + kBlockDim,
+                     DivCeil(image->ysize(), factor) + kBlockDim);
+  DownsampleImage(*image, factor, &downsampled);
+  *image = std::move(downsampled);
+}
+
+void DownsampleImage(Image3F* opsin, size_t factor) {
+  JXL_ASSERT(factor != 1);
+  // Allocate extra space to avoid a reallocation when padding.
+  Image3F downsampled(DivCeil(opsin->xsize(), factor) + kBlockDim,
+                      DivCeil(opsin->ysize(), factor) + kBlockDim);
+  downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+                       downsampled.ysize() - kBlockDim);
+  for (size_t c = 0; c < 3; c++) {
+    DownsampleImage(opsin->Plane(c), factor, &downsampled.Plane(c));
+  }
+  *opsin = std::move(downsampled);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/image.h b/third-party/libjxl/libjxl/lib/jxl/image.h
new file mode 100644
index 0000000000..e66534220c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image.h
@@ -0,0 +1,497 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_H_
+#define LIB_JXL_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <sstream>
+#include <utility>  // std::move
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+// Helper function to create rows that are multiples of SIMD vector size.
+size_t VectorSize();
+
+// Type-independent parts of Plane<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct PlaneBase {
+  PlaneBase()
+      : xsize_(0),
+        ysize_(0),
+        orig_xsize_(0),
+        orig_ysize_(0),
+        bytes_per_row_(0),
+        bytes_(nullptr) {}
+  PlaneBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo() instead.
+  PlaneBase(const PlaneBase& other) = delete;
+  PlaneBase& operator=(const PlaneBase& other) = delete;
+
+  // Move constructor (required for returning Image from function)
+  PlaneBase(PlaneBase&& other) noexcept = default;
+
+  // Move assignment (required for std::vector)
+  PlaneBase& operator=(PlaneBase&& other) noexcept = default;
+
+  void Swap(PlaneBase& other);
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. May also be used to
+  // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+  // the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    JXL_CHECK(xsize <= orig_xsize_);
+    JXL_CHECK(ysize <= orig_ysize_);
+    xsize_ = static_cast<uint32_t>(xsize);
+    ysize_ = static_cast<uint32_t>(ysize);
+    // NOTE: we can't recompute bytes_per_row for more compact storage and
+    // better locality because that would invalidate the image contents.
+  }
+
+  // How many pixels.
+  JXL_INLINE size_t xsize() const { return xsize_; }
+  JXL_INLINE size_t ysize() const { return ysize_; }
+
+  // NOTE: do not use this for copying rows - the valid xsize may be much less.
+  JXL_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  JXL_INLINE uint8_t* bytes() {
+    void* p = bytes_.get();
+    return static_cast<uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+  }
+  JXL_INLINE const uint8_t* bytes() const {
+    const void* p = bytes_.get();
+    return static_cast<const uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+  }
+
+ protected:
+  // Returns pointer to the start of a row.
+  JXL_INLINE void* VoidRow(const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (y >= ysize_) {
+      JXL_ABORT("Row(%" PRIu64 ") in (%u x %u) image\n", (uint64_t)y, xsize_,
+                ysize_);
+    }
+#endif
+
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return JXL_ASSUME_ALIGNED(row, 64);
+  }
+
+  enum class Padding {
+    // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+    kRoundUp,
+    // Allow LoadU(d, row + x) for x = xsize() - 1. This requires an extra
+    // vector to be initialized. If done by default, this would suppress
+    // legitimate msan warnings. We therefore require users to explicitly call
+    // InitializePadding before using unaligned loads (e.g. convolution).
+    kUnaligned
+  };
+
+  // Initializes the minimum bytes required to suppress msan warnings from
+  // legitimate (according to Padding mode) vector loads/stores on the right
+  // border, where some lanes are uninitialized and assumed to be unused.
+  void InitializePadding(size_t sizeof_t, Padding padding);
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  uint32_t xsize_;  // In valid pixels, not including any padding.
+  uint32_t ysize_;
+  uint32_t orig_xsize_;
+  uint32_t orig_ysize_;
+  size_t bytes_per_row_;  // Includes padding.
+  CacheAlignedUniquePtr bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Plane : public PlaneBase {
+ public:
+  using T = ComponentType;
+  static constexpr size_t kNumPlanes = 1;
+
+  Plane() = default;
+  Plane(const size_t xsize, const size_t ysize)
+      : PlaneBase(xsize, ysize, sizeof(T)) {}
+
+  void InitializePaddingForUnalignedAccesses() {
+    InitializePadding(sizeof(T), Padding::kUnaligned);
+  }
+
+  JXL_INLINE T* Row(const size_t y) { return static_cast<T*>(VoidRow(y)); }
+
+  // Returns pointer to const (see above).
+  JXL_INLINE const T* Row(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Documents that the access is const.
+  JXL_INLINE const T* ConstRow(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must
+  // NOT be used to determine xsize.
+  JXL_INLINE intptr_t PixelsPerRow() const {
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+};
+
+using ImageSB = Plane<int8_t>;
+using ImageB = Plane<uint8_t>;
+using ImageS = Plane<int16_t>;  // signed integer or half-float
+using ImageU = Plane<uint16_t>;
+using ImageI = Plane<int32_t>;
+using ImageF = Plane<float>;
+using ImageD = Plane<double>;
+
+// Also works for Image3 and mixed argument types.
+template <class Image1, class Image2>
+bool SameSize(const Image1& image1, const Image2& image2) {
+  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+template <typename T>
+class Image3;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions (e.g. color transform and quantization field).
+// Can compare using SameSize(rect1, rect2).
+template <typename T>
+class RectT {
+ public:
+  // Most windows are xsize_max * ysize_max, except those on the borders where
+  // begin + size_max > end.
+  constexpr RectT(T xbegin, T ybegin, size_t xsize_max, size_t ysize_max,
+                  T xend, T yend)
+      : x0_(xbegin),
+        y0_(ybegin),
+        xsize_(ClampedSize(xbegin, xsize_max, xend)),
+        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+  // Construct with origin and known size (typically from another Rect).
+  constexpr RectT(T xbegin, T ybegin, size_t xsize, size_t ysize)
+      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+  // Construct a rect that covers a whole image/plane/ImageBundle etc.
+  template <typename ImageT>
+  explicit RectT(const ImageT& image)
+      : RectT(0, 0, image.xsize(), image.ysize()) {}
+
+  RectT() : RectT(0, 0, 0, 0) {}
+
+  RectT(const RectT&) = default;
+  RectT& operator=(const RectT&) = default;
+
+  // Construct a subrect that resides in an image/plane/ImageBundle etc.
+  template <typename ImageT>
+  RectT Crop(const ImageT& image) const {
+    return Intersection(RectT(image));
+  }
+
+  // Construct a subrect that resides in the [0, ysize) x [0, xsize) region of
+  // the current rect.
+  RectT Crop(size_t area_xsize, size_t area_ysize) const {
+    return Intersection(RectT(0, 0, area_xsize, area_ysize));
+  }
+
+  // Returns a rect that only contains `num` lines with offset `y` from `y0()`.
+  RectT Lines(size_t y, size_t num) const {
+    JXL_DASSERT(y + num <= ysize_);
+    return RectT(x0_, y0_ + y, xsize_, num);
+  }
+
+  RectT Line(size_t y) const { return Lines(y, 1); }
+
+  JXL_MUST_USE_RESULT RectT Intersection(const RectT& other) const {
+    return RectT(std::max(x0_, other.x0_), std::max(y0_, other.y0_), xsize_,
+                 ysize_, std::min(x1(), other.x1()),
+                 std::min(y1(), other.y1()));
+  }
+
+  JXL_MUST_USE_RESULT RectT Translate(int64_t x_offset,
+                                      int64_t y_offset) const {
+    return RectT(x0_ + x_offset, y0_ + y_offset, xsize_, ysize_);
+  }
+
+  template <typename V>
+  V* Row(Plane<V>* image, size_t y) const {
+    JXL_DASSERT(y + y0_ >= 0);
+    return image->Row(y + y0_) + x0_;
+  }
+
+  template <typename V>
+  const V* Row(const Plane<V>* image, size_t y) const {
+    JXL_DASSERT(y + y0_ >= 0);
+    return image->Row(y + y0_) + x0_;
+  }
+
+  template <typename V>
+  V* PlaneRow(Image3<V>* image, const size_t c, size_t y) const {
+    JXL_DASSERT(y + y0_ >= 0);
+    return image->PlaneRow(c, y + y0_) + x0_;
+  }
+
+  template <typename V>
+  const V* ConstRow(const Plane<V>& image, size_t y) const {
+    JXL_DASSERT(y + y0_ >= 0);
+    return image.ConstRow(y + y0_) + x0_;
+  }
+
+  template <typename V>
+  const V* ConstPlaneRow(const Image3<V>& image, size_t c, size_t y) const {
+    JXL_DASSERT(y + y0_ >= 0);
+    return image.ConstPlaneRow(c, y + y0_) + x0_;
+  }
+
+  bool IsInside(const RectT& other) const {
+    return x0_ >= other.x0() && x1() <= other.x1() && y0_ >= other.y0() &&
+           y1() <= other.y1();
+  }
+
+  // Returns true if this Rect fully resides in the given image. ImageT could be
+  // Plane<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+  template <class ImageT>
+  bool IsInside(const ImageT& image) const {
+    return IsInside(RectT(image));
+  }
+
+  T x0() const { return x0_; }
+  T y0() const { return y0_; }
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+  T x1() const { return x0_ + xsize_; }
+  T y1() const { return y0_ + ysize_; }
+
+  RectT<T> ShiftLeft(size_t shiftx, size_t shifty) const {
+    return RectT<T>(x0_ * (1 << shiftx), y0_ * (1 << shifty), xsize_ << shiftx,
+                    ysize_ << shifty);
+  }
+  RectT<T> ShiftLeft(size_t shift) const { return ShiftLeft(shift, shift); }
+
+  // Requires x0(), y0() to be multiples of 1<<shiftx, 1<<shifty.
+  RectT<T> CeilShiftRight(size_t shiftx, size_t shifty) const {
+    JXL_ASSERT(x0_ % (1 << shiftx) == 0);
+    JXL_ASSERT(y0_ % (1 << shifty) == 0);
+    return RectT<T>(x0_ / (1 << shiftx), y0_ / (1 << shifty),
+                    DivCeil(xsize_, T{1} << shiftx),
+                    DivCeil(ysize_, T{1} << shifty));
+  }
+  RectT<T> CeilShiftRight(std::pair<size_t, size_t> shift) const {
+    return CeilShiftRight(shift.first, shift.second);
+  }
+  RectT<T> CeilShiftRight(size_t shift) const {
+    return CeilShiftRight(shift, shift);
+  }
+
+  template <typename U>
+  RectT<U> As() const {
+    return RectT<U>(U(x0_), U(y0_), U(xsize_), U(ysize_));
+  }
+
+ private:
+  // Returns size_max, or whatever is left in [begin, end).
+  static constexpr size_t ClampedSize(T begin, size_t size_max, T end) {
+    return (static_cast<T>(begin + size_max) <= end)
+               ? size_max
+               : (end > begin ? end - begin : 0);
+  }
+
+  T x0_;
+  T y0_;
+
+  size_t xsize_;
+  size_t ysize_;
+};
+
+template <typename T>
+std::string Description(RectT<T> r) {
+  std::ostringstream os;
+  os << "[" << r.x0() << ".." << r.x1() << ")x"
+     << "[" << r.y0() << ".." << r.y1() << ")";
+  return os.str();
+}
+
+using Rect = RectT<size_t>;
+
+// Currently, we abuse Image to either refer to an image that owns its storage
+// or one that doesn't. In similar vein, we abuse Image* function parameters to
+// either mean "assign to me" or "fill the provided image with data".
+// Hopefully, the "assign to me" meaning will go away and most images in the
+// codebase will not be backed by own storage. When this happens we can redesign
+// Image to be a non-storage-holding view class and introduce BackedImage in
+// those places that actually need it.
+
+// NOTE: we can't use Image as a view because invariants are violated
+// (alignment and the presence of padding before/after each "row").
+
+// A bundle of 3 same-sized images. Typically constructed by moving from three
+// rvalue references to Image. To overwrite an existing Image3 using
+// single-channel producers, we also need access to Image*. Constructing
+// temporary non-owning Image pointing to one plane of an existing Image3 risks
+// dangling references, especially if the wrapper is moved. Therefore, we
+// store an array of Image (which are compact enough that size is not a concern)
+// and provide Plane+Row accessors.
+template <typename ComponentType>
+class Image3 {
+ public:
+  using T = ComponentType;
+  using PlaneT = jxl::Plane<T>;
+  static constexpr size_t kNumPlanes = 3;
+
+  Image3() : planes_{PlaneT(), PlaneT(), PlaneT()} {}
+
+  Image3(const size_t xsize, const size_t ysize)
+      : planes_{PlaneT(xsize, ysize), PlaneT(xsize, ysize),
+                PlaneT(xsize, ysize)} {}
+
+  Image3(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+  }
+
+  Image3(PlaneT&& plane0, PlaneT&& plane1, PlaneT&& plane2) {
+    JXL_CHECK(SameSize(plane0, plane1));
+    JXL_CHECK(SameSize(plane0, plane2));
+    planes_[0] = std::move(plane0);
+    planes_[1] = std::move(plane1);
+    planes_[2] = std::move(plane2);
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo instead.
+  Image3(const Image3& other) = delete;
+  Image3& operator=(const Image3& other) = delete;
+
+  Image3& operator=(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+    return *this;
+  }
+
+  // Returns row pointer; usage: PlaneRow(idx_plane, y)[x] = val.
+  JXL_INLINE T* PlaneRow(const size_t c, const size_t y) {
+    // Custom implementation instead of calling planes_[c].Row ensures only a
+    // single multiplication is needed for PlaneRow(0..2, y).
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    void* row = planes_[c].bytes() + row_offset;
+    return static_cast<T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer; usage: val = PlaneRow(idx_plane, y)[x].
+  JXL_INLINE const T* PlaneRow(const size_t c, const size_t y) const {
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    const void* row = planes_[c].bytes() + row_offset;
+    return static_cast<const T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer, even if called from a non-const Image3.
+  JXL_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+    PlaneRowBoundsCheck(c, y);
+    return PlaneRow(c, y);
+  }
+
+  JXL_INLINE const PlaneT& Plane(size_t idx) const { return planes_[idx]; }
+
+  JXL_INLINE PlaneT& Plane(size_t idx) { return planes_[idx]; }
+
+  void Swap(Image3& other) {
+    for (size_t c = 0; c < 3; ++c) {
+      other.planes_[c].Swap(planes_[c]);
+    }
+  }
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. May also be used to
+  // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+  // the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    for (PlaneT& plane : planes_) {
+      plane.ShrinkTo(xsize, ysize);
+    }
+  }
+
+  // Sizes of all three images are guaranteed to be equal.
+  JXL_INLINE size_t xsize() const { return planes_[0].xsize(); }
+  JXL_INLINE size_t ysize() const { return planes_[0].ysize(); }
+  // Returns offset [bytes] from one row to the next row of the same plane.
+  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+  // the valid xsize may be much less.
+  JXL_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+  // to determine xsize.
+  JXL_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+  void PlaneRowBoundsCheck(const size_t c, const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (c >= kNumPlanes || y >= ysize()) {
+      JXL_ABORT("PlaneRow(%" PRIu64 ", %" PRIu64 ") in (%" PRIu64 " x %" PRIu64
+                ") image\n",
+                static_cast<uint64_t>(c), static_cast<uint64_t>(y),
+                static_cast<uint64_t>(xsize()), static_cast<uint64_t>(ysize()));
+    }
+#endif
+  }
+
+ private:
+  PlaneT planes_[kNumPlanes];
+};
+
+using Image3B = Image3<uint8_t>;
+using Image3S = Image3<int16_t>;
+using Image3U = Image3<uint16_t>;
+using Image3I = Image3<int32_t>;
+using Image3F = Image3<float>;
+using Image3D = Image3<double>;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc b/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc
new file mode 100644
index 0000000000..a9bb40cf7e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc
@@ -0,0 +1,125 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+void ImageBundle::ShrinkTo(size_t xsize, size_t ysize) {
+  if (HasColor()) color_.ShrinkTo(xsize, ysize);
+  for (ImageF& ec : extra_channels_) {
+    ec.ShrinkTo(xsize, ysize);
+  }
+}
+
+// Called by all other SetFrom*.
+void ImageBundle::SetFromImage(Image3F&& color,
+                               const ColorEncoding& c_current) {
+  JXL_CHECK(color.xsize() != 0 && color.ysize() != 0);
+  JXL_CHECK(metadata_->color_encoding.IsGray() == c_current.IsGray());
+  color_ = std::move(color);
+  c_current_ = c_current;
+  VerifySizes();
+}
+
+void ImageBundle::VerifyMetadata() const {
+  JXL_CHECK(!c_current_.ICC().empty());
+  JXL_CHECK(metadata_->color_encoding.IsGray() == IsGray());
+
+  if (metadata_->HasAlpha() && alpha().xsize() == 0) {
+    JXL_UNREACHABLE("MD alpha_bits %u IB alpha %" PRIuS " x %" PRIuS "\n",
+                    metadata_->GetAlphaBits(), alpha().xsize(),
+                    alpha().ysize());
+  }
+  const uint32_t alpha_bits = metadata_->GetAlphaBits();
+  JXL_CHECK(alpha_bits <= 32);
+
+  // metadata_->num_extra_channels may temporarily differ from
+  // extra_channels_.size(), e.g. after SetAlpha. They are synced by the next
+  // call to VisitFields.
+}
+
+void ImageBundle::VerifySizes() const {
+  const size_t xs = xsize();
+  const size_t ys = ysize();
+
+  if (HasExtraChannels()) {
+    JXL_CHECK(xs != 0 && ys != 0);
+    for (const ImageF& ec : extra_channels_) {
+      JXL_CHECK(ec.xsize() == xs);
+      JXL_CHECK(ec.ysize() == ys);
+    }
+  }
+}
+
+size_t ImageBundle::DetectRealBitdepth() const {
+  return metadata_->bit_depth.bits_per_sample;
+
+  // TODO(lode): let this function return lower bit depth if possible, e.g.
+  // return 8 bits in case the original image came from a 16-bit PNG that
+  // was in fact representable as 8-bit PNG. Ensure that the implementation
+  // returns 16 if e.g. two consecutive 16-bit values appeared in the original
+  // image (such as 32768 and 32769), take into account that e.g. the values
+  // 3-bit can represent is not a superset of the values 2-bit can represent,
+  // and there may be slight imprecisions in the floating point image.
+}
+
+const ImageF& ImageBundle::black() const {
+  JXL_ASSERT(HasBlack());
+  const size_t ec = metadata_->Find(ExtraChannel::kBlack) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return extra_channels_[ec];
+}
+const ImageF& ImageBundle::alpha() const {
+  JXL_ASSERT(HasAlpha());
+  const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return extra_channels_[ec];
+}
+ImageF* ImageBundle::alpha() {
+  JXL_ASSERT(HasAlpha());
+  const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return &extra_channels_[ec];
+}
+
+void ImageBundle::SetAlpha(ImageF&& alpha) {
+  const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+  // Must call SetAlphaBits first, otherwise we don't know which channel index
+  JXL_CHECK(eci != nullptr);
+  JXL_CHECK(alpha.xsize() != 0 && alpha.ysize() != 0);
+  if (extra_channels_.size() < metadata_->extra_channel_info.size()) {
+    // TODO(jon): get rid of this case
+    extra_channels_.insert(
+        extra_channels_.begin() + (eci - metadata_->extra_channel_info.data()),
+        std::move(alpha));
+  } else {
+    extra_channels_[eci - metadata_->extra_channel_info.data()] =
+        std::move(alpha);
+  }
+  // num_extra_channels is automatically set in visitor
+  VerifySizes();
+}
+
+void ImageBundle::SetExtraChannels(std::vector<ImageF>&& extra_channels) {
+  for (const ImageF& plane : extra_channels) {
+    JXL_CHECK(plane.xsize() != 0 && plane.ysize() != 0);
+  }
+  extra_channels_ = std::move(extra_channels);
+  VerifySizes();
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle.h b/third-party/libjxl/libjxl/lib/jxl/image_bundle.h
new file mode 100644
index 0000000000..f64f8cfcd6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle.h
@@ -0,0 +1,257 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_BUNDLE_H_
+#define LIB_JXL_IMAGE_BUNDLE_H_
+
+// The main image or frame consists of a bundle of associated images.
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// A bundle of color/alpha/depth/plane images.
+class ImageBundle {
+ public:
+  // Uninitialized state for use as output parameter.
+  ImageBundle() : metadata_(nullptr) {}
+  // Caller is responsible for setting metadata before calling Set*.
+  explicit ImageBundle(const ImageMetadata* metadata) : metadata_(metadata) {}
+
+  // Move-only (allows storing in std::vector).
+  ImageBundle(ImageBundle&&) = default;
+  ImageBundle& operator=(ImageBundle&&) = default;
+
+  ImageBundle Copy() const {
+    ImageBundle copy(metadata_);
+    copy.color_ = Image3F(color_.xsize(), color_.ysize());
+    CopyImageTo(color_, &copy.color_);
+    copy.c_current_ = c_current_;
+    copy.extra_channels_.reserve(extra_channels_.size());
+    for (const ImageF& plane : extra_channels_) {
+      ImageF ec(plane.xsize(), plane.ysize());
+      CopyImageTo(plane, &ec);
+      copy.extra_channels_.emplace_back(std::move(ec));
+    }
+
+    copy.jpeg_data =
+        jpeg_data ? make_unique<jpeg::JPEGData>(*jpeg_data) : nullptr;
+    copy.color_transform = color_transform;
+    copy.chroma_subsampling = chroma_subsampling;
+
+    return copy;
+  }
+
+  // -- SIZE
+
+  size_t xsize() const {
+    if (IsJPEG()) return jpeg_data->width;
+    if (color_.xsize() != 0) return color_.xsize();
+    return extra_channels_.empty() ? 0 : extra_channels_[0].xsize();
+  }
+  size_t ysize() const {
+    if (IsJPEG()) return jpeg_data->height;
+    if (color_.ysize() != 0) return color_.ysize();
+    return extra_channels_.empty() ? 0 : extra_channels_[0].ysize();
+  }
+  void ShrinkTo(size_t xsize, size_t ysize);
+
+  // sizes taking orientation into account
+  size_t oriented_xsize() const {
+    if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+      return ysize();
+    } else {
+      return xsize();
+    }
+  }
+  size_t oriented_ysize() const {
+    if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+      return xsize();
+    } else {
+      return ysize();
+    }
+  }
+
+  // -- COLOR
+
+  // Whether color() is valid/usable. Returns true in most cases. Even images
+  // with spot colors (one example of when !planes().empty()) typically have a
+  // part that can be converted to RGB.
+  bool HasColor() const { return color_.xsize() != 0; }
+
+  // For resetting the size when switching from a reference to main frame.
+  void RemoveColor() { color_ = Image3F(); }
+
+  // Do not use if !HasColor().
+  const Image3F& color() const {
+    // If this fails, Set* was not called - perhaps because decoding failed?
+    JXL_DASSERT(HasColor());
+    return color_;
+  }
+
+  // Do not use if !HasColor().
+  Image3F* color() {
+    JXL_DASSERT(HasColor());
+    return &color_;
+  }
+
+  // If c_current.IsGray(), all planes must be identical. NOTE: c_current is
+  // independent of metadata()->color_encoding, which is the original, whereas
+  // a decoder might return pixels in a different c_current.
+  // This only sets the color channels, you must also make extra channels
+  // match the amount that is in the metadata.
+  void SetFromImage(Image3F&& color, const ColorEncoding& c_current);
+
+  // -- COLOR ENCODING
+
+  const ColorEncoding& c_current() const { return c_current_; }
+
+  // Returns whether the color image has identical planes. Once established by
+  // Set*, remains unchanged until a subsequent Set* or TransformTo.
+  bool IsGray() const { return c_current_.IsGray(); }
+
+  bool IsSRGB() const { return c_current_.IsSRGB(); }
+  bool IsLinearSRGB() const {
+    return c_current_.white_point == WhitePoint::kD65 &&
+           c_current_.primaries == Primaries::kSRGB && c_current_.tf.IsLinear();
+  }
+
+  // Set the c_current profile without doing any transformation, e.g. if the
+  // transformation was already applied.
+  void OverrideProfile(const ColorEncoding& new_c_current) {
+    c_current_ = new_c_current;
+  }
+
+  // TODO(lode): TransformTo and CopyTo are implemented in enc_image_bundle.cc,
+  // move these functions out of this header file and class, to
+  // enc_image_bundle.h.
+
+  // Transforms color to c_desired and sets c_current to c_desired. Alpha and
+  // metadata remains unchanged.
+  Status TransformTo(const ColorEncoding& c_desired, const JxlCmsInterface& cms,
+                     ThreadPool* pool = nullptr);
+  // Copies this:rect, converts to c_desired, and allocates+fills out.
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                const JxlCmsInterface& cms, Image3F* out,
+                ThreadPool* pool = nullptr) const;
+
+  // Detect 'real' bit depth, which can be lower than nominal bit depth
+  // (this is common in PNG), returns 'real' bit depth
+  size_t DetectRealBitdepth() const;
+
+  // -- ALPHA
+
+  void SetAlpha(ImageF&& alpha);
+  bool HasAlpha() const {
+    return metadata_->Find(ExtraChannel::kAlpha) != nullptr;
+  }
+  bool AlphaIsPremultiplied() const {
+    const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+    return (eci == nullptr) ? false : eci->alpha_associated;
+  }
+  const ImageF& alpha() const;
+  ImageF* alpha();
+
+  // -- EXTRA CHANNELS
+  bool HasBlack() const {
+    return metadata_->Find(ExtraChannel::kBlack) != nullptr;
+  }
+  const ImageF& black() const;
+
+  // Extra channels of unknown interpretation (e.g. spot colors).
+  void SetExtraChannels(std::vector<ImageF>&& extra_channels);
+  void ClearExtraChannels() { extra_channels_.clear(); }
+  bool HasExtraChannels() const { return !extra_channels_.empty(); }
+  const std::vector<ImageF>& extra_channels() const { return extra_channels_; }
+  std::vector<ImageF>& extra_channels() { return extra_channels_; }
+
+  const ImageMetadata* metadata() const { return metadata_; }
+
+  void VerifyMetadata() const;
+
+  void SetDecodedBytes(size_t decoded_bytes) { decoded_bytes_ = decoded_bytes; }
+  size_t decoded_bytes() const { return decoded_bytes_; }
+
+  // -- JPEG transcoding:
+
+  // Returns true if image does or will represent quantized DCT-8 coefficients,
+  // stored in 8x8 pixel regions.
+  bool IsJPEG() const {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+    return jpeg_data != nullptr;
+#else   // JPEGXL_ENABLE_TRANSCODE_JPEG
+    return false;
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+  }
+
+  std::unique_ptr<jpeg::JPEGData> jpeg_data;
+  // these fields are used to signal the input JPEG color space
+  // NOTE: JPEG doesn't actually provide a way to determine whether YCbCr was
+  // applied or not.
+  ColorTransform color_transform = ColorTransform::kNone;
+  YCbCrChromaSubsampling chroma_subsampling;
+
+  FrameOrigin origin{0, 0};
+
+  // Animation-related information, corresponding to the timecode and duration
+  // fields of the jxl::AnimationFrame of the jxl::FrameHeader.
+  // TODO(lode): ImageBundle is used here to carry the information from
+  // jxl::FrameHeader, consider instead passing a jxl::FrameHeader directly to
+  // EncodeFrame or having a field of that type here.
+  uint32_t duration = 0;
+  uint32_t timecode = 0;
+
+  // TODO(lode): these fields do not match the JXL frame header, it should be
+  // possible to specify up to 4 (3 if nonzero duration) slots to save this
+  // frame as reference (see save_as_reference).
+  bool use_for_next_frame = false;
+  bool blend = false;
+  BlendMode blendmode = BlendMode::kBlend;
+
+  std::string name;
+
+ private:
+  // Called after any Set* to ensure their sizes are compatible.
+  void VerifySizes() const;
+
+  // Required for TransformTo so that an ImageBundle is self-sufficient. Always
+  // points to the same thing, but cannot be const-pointer because that prevents
+  // the compiler from generating a move ctor.
+  const ImageMetadata* metadata_;
+
+  // Initialized by Set*:
+  Image3F color_;  // If empty, planes_ is not; all planes equal if IsGray().
+  ColorEncoding c_current_;  // of color_
+
+  // Initialized by SetPlanes; size = ImageMetadata.num_extra_channels
+  std::vector<ImageF> extra_channels_;
+
+  // How many bytes of the input were actually read.
+  size_t decoded_bytes_ = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_BUNDLE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc b/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc
new file mode 100644
index 0000000000..1a10598fe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ImageBundleTest, ExtraChannelName) {
+  AuxOut aux_out;
+  BitWriter writer;
+  BitWriter::Allotment allotment(&writer, 99);
+
+  ImageMetadata metadata;
+  ExtraChannelInfo eci;
+  eci.type = ExtraChannel::kBlack;
+  eci.name = "testK";
+  metadata.extra_channel_info.push_back(std::move(eci));
+  ASSERT_TRUE(WriteImageMetadata(metadata, &writer, /*layer=*/0, &aux_out));
+  writer.ZeroPadToByte();
+  allotment.ReclaimAndCharge(&writer, /*layer=*/0, &aux_out);
+
+  BitReader reader(writer.GetSpan());
+  ImageMetadata metadata_out;
+  ASSERT_TRUE(ReadImageMetadata(&reader, &metadata_out));
+  EXPECT_TRUE(reader.Close());
+  EXPECT_EQ("testK", metadata_out.Find(ExtraChannel::kBlack)->name);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc b/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc
new file mode 100644
index 0000000000..eef1f1f447
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc
@@ -0,0 +1,477 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_metadata.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+BitDepth::BitDepth() { Bundle::Init(this); }
+Status BitDepth::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &floating_point_sample));
+  // The same fields (bits_per_sample and exponent_bits_per_sample) are read
+  // in a different way depending on floating_point_sample's value. It's still
+  // default-initialized correctly so using visitor->Conditional is not
+  // required.
+  if (!floating_point_sample) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(8), Val(10), Val(12), BitsOffset(6, 1), 8, &bits_per_sample));
+    exponent_bits_per_sample = 0;
+  } else {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(32), Val(16), Val(24), BitsOffset(6, 1), 32, &bits_per_sample));
+    // The encoded value is exponent_bits_per_sample - 1, encoded in 3 bits
+    // so the value can be in range [1, 8].
+    const uint32_t offset = 1;
+    exponent_bits_per_sample -= offset;
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->Bits(4, 8 - offset, &exponent_bits_per_sample));
+    exponent_bits_per_sample += offset;
+  }
+
+  // Error-checking for floating point ranges.
+  if (floating_point_sample) {
+    if (exponent_bits_per_sample < 2 || exponent_bits_per_sample > 8) {
+      return JXL_FAILURE("Invalid exponent_bits_per_sample: %u",
+                         exponent_bits_per_sample);
+    }
+    int mantissa_bits =
+        static_cast<int>(bits_per_sample) - exponent_bits_per_sample - 1;
+    if (mantissa_bits < 2 || mantissa_bits > 23) {
+      return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+    }
+  } else {
+    if (bits_per_sample > 31) {
+      return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+    }
+  }
+  return true;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string BitDepth::DebugString() const {
+  std::ostringstream os;
+  os << (floating_point_sample ? "F" : "U");
+  os << bits_per_sample;
+  if (floating_point_sample) os << "." << exponent_bits_per_sample;
+  return os.str();
+}
+#endif
+
+CustomTransformData::CustomTransformData() { Bundle::Init(this); }
+Status CustomTransformData::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+  if (visitor->Conditional(nonserialized_xyb_encoded)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&opsin_inverse_matrix));
+  }
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &custom_weights_mask));
+  if (visitor->Conditional((custom_weights_mask & 0x1) != 0)) {
+    // 4 5x5 kernels, but all of them can be obtained by symmetry from one,
+    // which is symmetric along its main diagonal. The top-left kernel is
+    // defined by
+    //
+    // 0  1  2  3  4
+    // 1  5  6  7  8
+    // 2  6  9 10 11
+    // 3  7 10 12 13
+    // 4  8 11 13 14
+    float constexpr kWeights2[15] = {
+        -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+        0.14111091f,  0.28896755f,  0.00278718f,  -0.01610267f, 0.56661550f,
+        0.03777607f,  -0.01986694f, -0.03144731f, -0.01185068f, -0.00213539f};
+    for (size_t i = 0; i < 15; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights2[i], &upsampling2_weights[i]));
+    }
+  }
+  if (visitor->Conditional((custom_weights_mask & 0x2) != 0)) {
+    // 16 5x5 kernels, but all of them can be obtained by symmetry from
+    // three, two of which are symmetric along their main diagonals. The top
+    // left 4 kernels are defined by
+    //
+    // 0  1  2  3  4   5  6  7  8  9
+    // 1 10 11 12 13  14 15 16 17 18
+    // 2 11 19 20 21  22 23 24 25 26
+    // 3 12 20 27 28  29 30 31 32 33
+    // 4 13 21 28 34  35 36 37 38 39
+    //
+    // 5 14 22 29 35  40 41 42 43 44
+    // 6 15 23 30 36  41 45 46 47 48
+    // 7 16 24 31 37  42 46 49 50 51
+    // 8 17 25 32 38  43 47 50 52 53
+    // 9 18 26 33 39  44 48 51 53 54
+    constexpr float kWeights4[55] = {
+        -0.02419067f, -0.03491987f, -0.03693351f, -0.03094285f, -0.00529785f,
+        -0.01663432f, -0.03556863f, -0.03888905f, -0.03516850f, -0.00989469f,
+        0.23651958f,  0.33392945f,  -0.01073543f, -0.01313181f, -0.03556694f,
+        0.13048175f,  0.40103025f,  0.03951150f,  -0.02077584f, 0.46914198f,
+        -0.00209270f, -0.01484589f, -0.04064806f, 0.18942530f,  0.56279892f,
+        0.06674400f,  -0.02335494f, -0.03551682f, -0.00754830f, -0.02267919f,
+        -0.02363578f, 0.00315804f,  -0.03399098f, -0.01359519f, -0.00091653f,
+        -0.00335467f, -0.01163294f, -0.01610294f, -0.00974088f, -0.00191622f,
+        -0.01095446f, -0.03198464f, -0.04455121f, -0.02799790f, -0.00645912f,
+        0.06390599f,  0.22963888f,  0.00630981f,  -0.01897349f, 0.67537268f,
+        0.08483369f,  -0.02534994f, -0.02205197f, -0.01667999f, -0.00384443f};
+    for (size_t i = 0; i < 55; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights4[i], &upsampling4_weights[i]));
+    }
+  }
+  if (visitor->Conditional((custom_weights_mask & 0x4) != 0)) {
+    // 64 5x5 kernels, all of them can be obtained by symmetry from
+    // 10, 4 of which are symmetric along their main diagonals. The top
+    // left 16 kernels are defined by
+    //  0  1  2  3  4   5  6  7  8  9   a  b  c  d  e   f 10 11 12 13
+    //  1 14 15 16 17  18 19 1a 1b 1c  1d 1e 1f 20 21  22 23 24 25 26
+    //  2 15 27 28 29  2a 2b 2c 2d 2e  2f 30 31 32 33  34 35 36 37 38
+    //  3 16 28 39 3a  3b 3c 3d 3e 3f  40 41 42 43 44  45 46 47 48 49
+    //  4 17 29 3a 4a  4b 4c 4d 4e 4f  50 51 52 53 54  55 56 57 58 59
+
+    //  5 18 2a 3b 4b  5a 5b 5c 5d 5e  5f 60 61 62 63  64 65 66 67 68
+    //  6 19 2b 3c 4c  5b 69 6a 6b 6c  6d 6e 6f 70 71  72 73 74 75 76
+    //  7 1a 2c 3d 4d  5c 6a 77 78 79  7a 7b 7c 7d 7e  7f 80 81 82 83
+    //  8 1b 2d 3e 4e  5d 6b 78 84 85  86 87 88 89 8a  8b 8c 8d 8e 8f
+    //  9 1c 2e 3f 4f  5e 6c 79 85 90  91 92 93 94 95  96 97 98 99 9a
+
+    //  a 1d 2f 40 50  5f 6d 7a 86 91  9b 9c 9d 9e 9f  a0 a1 a2 a3 a4
+    //  b 1e 30 41 51  60 6e 7b 87 92  9c a5 a6 a7 a8  a9 aa ab ac ad
+    //  c 1f 31 42 52  61 6f 7c 88 93  9d a6 ae af b0  b1 b2 b3 b4 b5
+    //  d 20 32 43 53  62 70 7d 89 94  9e a7 af b6 b7  b8 b9 ba bb bc
+    //  e 21 33 44 54  63 71 7e 8a 95  9f a8 b0 b7 bd  be bf c0 c1 c2
+
+    //  f 22 34 45 55  64 72 7f 8b 96  a0 a9 b1 b8 be  c3 c4 c5 c6 c7
+    // 10 23 35 46 56  65 73 80 8c 97  a1 aa b2 b9 bf  c4 c8 c9 ca cb
+    // 11 24 36 47 57  66 74 81 8d 98  a2 ab b3 ba c0  c5 c9 cc cd ce
+    // 12 25 37 48 58  67 75 82 8e 99  a3 ac b4 bb c1  c6 ca cd cf d0
+    // 13 26 38 49 59  68 76 83 8f 9a  a4 ad b5 bc c2  c7 cb ce d0 d1
+    constexpr float kWeights8[210] = {
+        -0.02928613f, -0.03706353f, -0.03783812f, -0.03324558f, -0.00447632f,
+        -0.02519406f, -0.03752601f, -0.03901508f, -0.03663285f, -0.00646649f,
+        -0.02066407f, -0.03838633f, -0.04002101f, -0.03900035f, -0.00901973f,
+        -0.01626393f, -0.03954148f, -0.04046620f, -0.03979621f, -0.01224485f,
+        0.29895328f,  0.35757708f,  -0.02447552f, -0.01081748f, -0.04314594f,
+        0.23903219f,  0.41119301f,  -0.00573046f, -0.01450239f, -0.04246845f,
+        0.17567618f,  0.45220643f,  0.02287757f,  -0.01936783f, -0.03583255f,
+        0.11572472f,  0.47416733f,  0.06284440f,  -0.02685066f, 0.42720050f,
+        -0.02248939f, -0.01155273f, -0.04562755f, 0.28689496f,  0.49093869f,
+        -0.00007891f, -0.01545926f, -0.04562659f, 0.21238920f,  0.53980934f,
+        0.03369474f,  -0.02070211f, -0.03866988f, 0.14229550f,  0.56593398f,
+        0.08045181f,  -0.02888298f, -0.03680918f, -0.00542229f, -0.02920477f,
+        -0.02788574f, -0.02118180f, -0.03942402f, -0.00775547f, -0.02433614f,
+        -0.03193943f, -0.02030828f, -0.04044014f, -0.01074016f, -0.01930822f,
+        -0.03620399f, -0.01974125f, -0.03919545f, -0.01456093f, -0.00045072f,
+        -0.00360110f, -0.01020207f, -0.01231907f, -0.00638988f, -0.00071592f,
+        -0.00279122f, -0.00957115f, -0.01288327f, -0.00730937f, -0.00107783f,
+        -0.00210156f, -0.00890705f, -0.01317668f, -0.00813895f, -0.00153491f,
+        -0.02128481f, -0.04173044f, -0.04831487f, -0.03293190f, -0.00525260f,
+        -0.01720322f, -0.04052736f, -0.05045706f, -0.03607317f, -0.00738030f,
+        -0.01341764f, -0.03965629f, -0.05151616f, -0.03814886f, -0.01005819f,
+        0.18968273f,  0.33063684f,  -0.01300105f, -0.01372950f, -0.04017465f,
+        0.13727832f,  0.36402234f,  0.01027890f,  -0.01832107f, -0.03365072f,
+        0.08734506f,  0.38194295f,  0.04338228f,  -0.02525993f, 0.56408126f,
+        0.00458352f,  -0.01648227f, -0.04887868f, 0.24585519f,  0.62026135f,
+        0.04314807f,  -0.02213737f, -0.04158014f, 0.16637289f,  0.65027023f,
+        0.09621636f,  -0.03101388f, -0.04082742f, -0.00904519f, -0.02790922f,
+        -0.02117818f, 0.00798662f,  -0.03995711f, -0.01243427f, -0.02231705f,
+        -0.02946266f, 0.00992055f,  -0.03600283f, -0.01684920f, -0.00111684f,
+        -0.00411204f, -0.01297130f, -0.01723725f, -0.01022545f, -0.00165306f,
+        -0.00313110f, -0.01218016f, -0.01763266f, -0.01125620f, -0.00231663f,
+        -0.01374149f, -0.03797620f, -0.05142937f, -0.03117307f, -0.00581914f,
+        -0.01064003f, -0.03608089f, -0.05272168f, -0.03375670f, -0.00795586f,
+        0.09628104f,  0.27129991f,  -0.00353779f, -0.01734151f, -0.03153981f,
+        0.05686230f,  0.28500998f,  0.02230594f,  -0.02374955f, 0.68214326f,
+        0.05018048f,  -0.02320852f, -0.04383616f, 0.18459474f,  0.71517975f,
+        0.10805613f,  -0.03263677f, -0.03637639f, -0.01394373f, -0.02511203f,
+        -0.01728636f, 0.05407331f,  -0.02867568f, -0.01893131f, -0.00240854f,
+        -0.00446511f, -0.01636187f, -0.02377053f, -0.01522848f, -0.00333334f,
+        -0.00819975f, -0.02964169f, -0.04499287f, -0.02745350f, -0.00612408f,
+        0.02727416f,  0.19446600f,  0.00159832f,  -0.02232473f, 0.74982506f,
+        0.11452620f,  -0.03348048f, -0.01605681f, -0.02070339f, -0.00458223f};
+    for (size_t i = 0; i < 210; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights8[i], &upsampling8_weights[i]));
+    }
+  }
+  return true;
+}
+
+ExtraChannelInfo::ExtraChannelInfo() { Bundle::Init(this); }
+Status ExtraChannelInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  // General
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ExtraChannel::kAlpha, &type));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(0), Val(3), Val(4), BitsOffset(3, 1), 0, &dim_shift));
+  if ((1U << dim_shift) > 8) {
+    return JXL_FAILURE("dim_shift %u too large", dim_shift);
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+  // Conditional
+  if (visitor->Conditional(type == ExtraChannel::kAlpha)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alpha_associated));
+  }
+  if (visitor->Conditional(type == ExtraChannel::kSpotColor)) {
+    for (float& c : spot_color) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0, &c));
+    }
+  }
+  if (visitor->Conditional(type == ExtraChannel::kCFA)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Bits(2), BitsOffset(4, 3),
+                                           BitsOffset(8, 19), 1, &cfa_channel));
+  }
+
+  if (type == ExtraChannel::kUnknown ||
+      (int(ExtraChannel::kReserved0) <= int(type) &&
+       int(type) <= int(ExtraChannel::kReserved7))) {
+    return JXL_FAILURE("Unknown extra channel (bits %u, shift %u, name '%s')\n",
+                       bit_depth.bits_per_sample, dim_shift, name.c_str());
+  }
+  return true;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string ExtraChannelInfo::DebugString() const {
+  std::ostringstream os;
+  os << (type == ExtraChannel::kAlpha           ? "Alpha"
+         : type == ExtraChannel::kDepth         ? "Depth"
+         : type == ExtraChannel::kSpotColor     ? "Spot"
+         : type == ExtraChannel::kSelectionMask ? "Mask"
+         : type == ExtraChannel::kBlack         ? "Black"
+         : type == ExtraChannel::kCFA           ? "CFA"
+         : type == ExtraChannel::kThermal       ? "Thermal"
+                                                : "Unknown");
+  if (type == ExtraChannel::kAlpha && alpha_associated) os << "(premul)";
+  os << " " << bit_depth.DebugString();
+  os << " shift: " << dim_shift;
+  return os.str();
+}
+#endif
+
+ImageMetadata::ImageMetadata() { Bundle::Init(this); }
+Status ImageMetadata::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  // Bundle::AllDefault does not allow usage when reading (it may abort the
+  // program when a codestream has invalid values), but when reading we
+  // overwrite the extra_fields value, so do not need to call AllDefault.
+  bool tone_mapping_default =
+      visitor->IsReading() ? false : Bundle::AllDefault(tone_mapping);
+
+  bool extra_fields = (orientation != 1 || have_preview || have_animation ||
+                       have_intrinsic_size || !tone_mapping_default);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &extra_fields));
+  if (visitor->Conditional(extra_fields)) {
+    orientation--;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &orientation));
+    orientation++;
+    // (No need for bounds checking because we read exactly 3 bits)
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_intrinsic_size));
+    if (visitor->Conditional(have_intrinsic_size)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&intrinsic_size));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_preview));
+    if (visitor->Conditional(have_preview)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&preview_size));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_animation));
+    if (visitor->Conditional(have_animation)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation));
+    }
+  } else {
+    orientation = 1;  // identity
+    have_intrinsic_size = false;
+    have_preview = false;
+    have_animation = false;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->Bool(true, &modular_16_bit_buffer_sufficient));
+
+  num_extra_channels = extra_channel_info.size();
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+                                         BitsOffset(12, 1), 0,
+                                         &num_extra_channels));
+
+  if (visitor->Conditional(num_extra_channels != 0)) {
+    if (visitor->IsReading()) {
+      extra_channel_info.resize(num_extra_channels);
+    }
+    for (ExtraChannelInfo& eci : extra_channel_info) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&eci));
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &xyb_encoded));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&color_encoding));
+  if (visitor->Conditional(extra_fields)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tone_mapping));
+  }
+
+  // Treat as if only the fields up to extra channels exist.
+  if (visitor->IsReading() && nonserialized_only_parse_basic_info) {
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+OpsinInverseMatrix::OpsinInverseMatrix() { Bundle::Init(this); }
+Status OpsinInverseMatrix::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+  for (int i = 0; i < 9; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+        DefaultInverseOpsinAbsorbanceMatrix()[i], &inverse_matrix[i]));
+  }
+  for (int i = 0; i < 3; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->F16(kNegOpsinAbsorbanceBiasRGB[i], &opsin_biases[i]));
+  }
+  for (int i = 0; i < 4; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->F16(kDefaultQuantBias[i], &quant_biases[i]));
+  }
+  return true;
+}
+
+ToneMapping::ToneMapping() { Bundle::Init(this); }
+Status ToneMapping::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->F16(kDefaultIntensityTarget, &intensity_target));
+  if (intensity_target <= 0.f) {
+    return JXL_FAILURE("invalid intensity target");
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &min_nits));
+  if (min_nits < 0.f || min_nits > intensity_target) {
+    return JXL_FAILURE("invalid min %f vs max %f", min_nits, intensity_target);
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &relative_to_max_display));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &linear_below));
+  if (linear_below < 0 || (relative_to_max_display && linear_below > 1.0f)) {
+    return JXL_FAILURE("invalid linear_below %f (%s)", linear_below,
+                       relative_to_max_display ? "relative" : "absolute");
+  }
+
+  return true;
+}
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+                         ImageMetadata* JXL_RESTRICT metadata) {
+  return Bundle::Read(reader, metadata);
+}
+
+void ImageMetadata::SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied) {
+  std::vector<ExtraChannelInfo>& eciv = extra_channel_info;
+  ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+  if (bits == 0) {
+    if (alpha != nullptr) {
+      // Remove the alpha channel from the extra channel info. It's
+      // theoretically possible that there are multiple, remove all in that
+      // case. This ensure a next HasAlpha() will return false.
+      const auto is_alpha = [](const ExtraChannelInfo& eci) {
+        return eci.type == ExtraChannel::kAlpha;
+      };
+      eciv.erase(std::remove_if(eciv.begin(), eciv.end(), is_alpha),
+                 eciv.end());
+    }
+  } else {
+    if (alpha == nullptr) {
+      ExtraChannelInfo info;
+      info.type = ExtraChannel::kAlpha;
+      info.bit_depth.bits_per_sample = bits;
+      info.dim_shift = 0;
+      info.alpha_associated = alpha_is_premultiplied;
+      // Prepend rather than append: in case there already are other extra
+      // channels, prefer alpha channel to be listed first.
+      eciv.insert(eciv.begin(), info);
+    } else {
+      // Ignores potential extra alpha channels, only sets to first one.
+      alpha->bit_depth.bits_per_sample = bits;
+      alpha->bit_depth.floating_point_sample = false;
+      alpha->bit_depth.exponent_bits_per_sample = 0;
+      alpha->alpha_associated = alpha_is_premultiplied;
+    }
+  }
+  num_extra_channels = extra_channel_info.size();
+  if (bits > 12) modular_16_bit_buffer_sufficient = false;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string ImageMetadata::DebugString() const {
+  std::ostringstream os;
+  os << bit_depth.DebugString();
+  if (modular_16_bit_buffer_sufficient) {
+    os << " (modular 16)";
+  }
+  os << (xyb_encoded ? " xyb encoded" : " orig profile");
+  os << " " << Description(color_encoding);
+  if (num_extra_channels > 0) {
+    os << " extra channels:";
+    for (size_t i = 0; i < num_extra_channels; ++i) {
+      os << " (" << extra_channel_info[i].DebugString() << ")";
+      if (i + 1 < num_extra_channels) os << ",";
+    }
+  }
+  if (have_preview) {
+    os << " preview: " << preview_size.xsize() << "x" << preview_size.ysize();
+  }
+  if (orientation != 1) {
+    os << " orientation: " << orientation;
+  }
+  return os.str();
+}
+
+std::string CodecMetadata::DebugString() const {
+  std::ostringstream os;
+  os << size.xsize() << "x" << size.ysize();
+  os << " " << m.DebugString();
+  return os.str();
+}
+#endif
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_metadata.h b/third-party/libjxl/libjxl/lib/jxl/image_metadata.h
new file mode 100644
index 0000000000..ca69eb3a3d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_metadata.h
@@ -0,0 +1,425 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Main codestream header bundles, the metadata that applies to all frames.
+// Enums must align with the C API definitions in codestream_header.h.
+
+#ifndef LIB_JXL_IMAGE_METADATA_H_
+#define LIB_JXL_IMAGE_METADATA_H_
+
+#include <jxl/codestream_header.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// EXIF orientation of the image. This field overrides any field present in
+// actual EXIF metadata. The value tells which transformation the decoder must
+// apply after decoding to display the image with the correct orientation.
+enum class Orientation : uint32_t {
+  // Values 1..8 match the EXIF definitions.
+  kIdentity = JXL_ORIENT_IDENTITY,
+  kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL,
+  kRotate180 = JXL_ORIENT_ROTATE_180,
+  kFlipVertical = JXL_ORIENT_FLIP_VERTICAL,
+  kTranspose = JXL_ORIENT_TRANSPOSE,
+  kRotate90 = JXL_ORIENT_ROTATE_90_CW,
+  kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE,
+  kRotate270 = JXL_ORIENT_ROTATE_90_CCW,
+};
+// Don't need an EnumBits because Orientation is not read via Enum().
+
+enum class ExtraChannel : uint32_t {
+  // First two enumerators (most common) are cheaper to encode
+  kAlpha = JXL_CHANNEL_ALPHA,
+  kDepth = JXL_CHANNEL_DEPTH,
+
+  kSpotColor = JXL_CHANNEL_SPOT_COLOR,
+  kSelectionMask = JXL_CHANNEL_SELECTION_MASK,
+  kBlack = JXL_CHANNEL_BLACK,  // for CMYK
+  kCFA = JXL_CHANNEL_CFA,      // Bayer channel
+  kThermal = JXL_CHANNEL_THERMAL,
+  kReserved0 = JXL_CHANNEL_RESERVED0,
+  kReserved1 = JXL_CHANNEL_RESERVED1,
+  kReserved2 = JXL_CHANNEL_RESERVED2,
+  kReserved3 = JXL_CHANNEL_RESERVED3,
+  kReserved4 = JXL_CHANNEL_RESERVED4,
+  kReserved5 = JXL_CHANNEL_RESERVED5,
+  kReserved6 = JXL_CHANNEL_RESERVED6,
+  kReserved7 = JXL_CHANNEL_RESERVED7,
+  // disambiguated via name string, raise warning if unsupported
+  kUnknown = JXL_CHANNEL_UNKNOWN,
+  // like kUnknown but can silently be ignored
+  kOptional = JXL_CHANNEL_OPTIONAL
+};
+static inline const char* EnumName(ExtraChannel /*unused*/) {
+  return "ExtraChannel";
+}
+static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) {
+  using EC = ExtraChannel;
+  return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) |
+         MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) |
+         MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional);
+}
+
+// Used in ImageMetadata and ExtraChannelInfo.
+struct BitDepth : public Fields {
+  BitDepth();
+  JXL_FIELDS_NAME(BitDepth)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  std::string DebugString() const;
+
+  // Whether the original (uncompressed) samples are floating point or
+  // unsigned integer.
+  bool floating_point_sample;
+
+  // Bit depth of the original (uncompressed) image samples. Must be in the
+  // range [1, 32].
+  uint32_t bits_per_sample;
+
+  // Floating point exponent bits of the original (uncompressed) image samples,
+  // only used if floating_point_sample is true.
+  // If used, the samples are floating point with:
+  // - 1 sign bit
+  // - exponent_bits_per_sample exponent bits
+  // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits
+  // If used, exponent_bits_per_sample must be in the range
+  // [2, 8] and amount of mantissa bits must be in the range [2, 23].
+  // NOTE: exponent_bits_per_sample is 8 for single precision binary32
+  // point, 5 for half precision binary16, 7 for fp24.
+  uint32_t exponent_bits_per_sample;
+};
+
+// Describes one extra channel.
+struct ExtraChannelInfo : public Fields {
+  ExtraChannelInfo();
+  JXL_FIELDS_NAME(ExtraChannelInfo)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  std::string DebugString() const;
+
+  mutable bool all_default;
+
+  ExtraChannel type;
+  BitDepth bit_depth;
+  uint32_t dim_shift;  // downsampled by 2^dim_shift on each axis
+
+  std::string name;  // UTF-8
+
+  // Conditional:
+  bool alpha_associated;  // i.e. premultiplied
+  float spot_color[4];    // spot color in linear RGBA
+  uint32_t cfa_channel;
+};
+
+struct OpsinInverseMatrix : public Fields {
+  OpsinInverseMatrix();
+  JXL_FIELDS_NAME(OpsinInverseMatrix)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  mutable bool all_default;
+
+  float inverse_matrix[9];
+  float opsin_biases[3];
+  float quant_biases[4];
+};
+
+// Information useful for mapping HDR images to lower dynamic range displays.
+struct ToneMapping : public Fields {
+  ToneMapping();
+  JXL_FIELDS_NAME(ToneMapping)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  mutable bool all_default;
+
+  // Upper bound on the intensity level present in the image. For unsigned
+  // integer pixel encodings, this is the brightness of the largest
+  // representable value. The image does not necessarily contain a pixel
+  // actually this bright. An encoder is allowed to set 255 for SDR images
+  // without computing a histogram.
+  float intensity_target;  // [nits]
+
+  // Lower bound on the intensity level present in the image. This may be
+  // loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+  // decoder will map [min_nits, intensity_target] to the display range.
+  float min_nits;
+
+  bool relative_to_max_display;  // see below
+  // The tone mapping will leave unchanged (linear mapping) any pixels whose
+  // brightness is strictly below this. The interpretation depends on
+  // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+  // display brightness [nits], otherwise an absolute brightness [nits].
+  float linear_below;
+};
+
+// Contains weights to customize some trasnforms - in particular, XYB and
+// upsampling.
+struct CustomTransformData : public Fields {
+  CustomTransformData();
+  JXL_FIELDS_NAME(CustomTransformData)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Must be set before calling VisitFields. Must equal xyb_encoded of
+  // ImageMetadata, should be set by ImageMetadata during VisitFields.
+  bool nonserialized_xyb_encoded = false;
+
+  mutable bool all_default;
+
+  OpsinInverseMatrix opsin_inverse_matrix;
+
+  uint32_t custom_weights_mask;
+  float upsampling2_weights[15];
+  float upsampling4_weights[55];
+  float upsampling8_weights[210];
+};
+
+// Properties of the original image bundle. This enables Encode(Decode()) to
+// re-create an equivalent image without user input.
+struct ImageMetadata : public Fields {
+  ImageMetadata();
+  JXL_FIELDS_NAME(ImageMetadata)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha
+  // channel present. In the theoretical case that there are multiple alpha
+  // channels, returns the bit depht of the first.
+  uint32_t GetAlphaBits() const {
+    const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+    if (alpha == nullptr) return 0;
+    JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0);
+    return alpha->bit_depth.bits_per_sample;
+  }
+
+  // Sets bit depth of alpha channel, adding extra channel if needed, or
+  // removing all alpha channels if bits is 0.
+  // Assumes integer alpha channel and not designed to support multiple
+  // alpha channels (it's possible to use those features by manipulating
+  // extra_channel_info directly).
+  //
+  // Callers must insert the actual channel image at the same index before any
+  // further modifications to extra_channel_info.
+  void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false);
+
+  bool HasAlpha() const { return GetAlphaBits() != 0; }
+
+  // Sets the original bit depth fields to indicate unsigned integer of the
+  // given bit depth.
+  // TODO(lode): move function to BitDepth
+  void SetUintSamples(uint32_t bits) {
+    bit_depth.bits_per_sample = bits;
+    bit_depth.exponent_bits_per_sample = 0;
+    bit_depth.floating_point_sample = false;
+    // RCT / Squeeze may add one bit each, and this is about int16_t,
+    // so uint13 should still be OK but limiting it to 12 seems safer.
+    // TODO(jon): figure out a better way to set this header field.
+    // (in particular, if modular mode is not used it doesn't matter,
+    // and if transforms are restricted, up to 15-bit could be done)
+    if (bits > 12) modular_16_bit_buffer_sufficient = false;
+  }
+  // Sets the original bit depth fields to indicate single precision floating
+  // point.
+  // TODO(lode): move function to BitDepth
+  void SetFloat32Samples() {
+    bit_depth.bits_per_sample = 32;
+    bit_depth.exponent_bits_per_sample = 8;
+    bit_depth.floating_point_sample = true;
+    modular_16_bit_buffer_sufficient = false;
+  }
+
+  void SetFloat16Samples() {
+    bit_depth.bits_per_sample = 16;
+    bit_depth.exponent_bits_per_sample = 5;
+    bit_depth.floating_point_sample = true;
+    modular_16_bit_buffer_sufficient = false;
+  }
+
+  void SetIntensityTarget(float intensity_target) {
+    tone_mapping.intensity_target = intensity_target;
+  }
+  float IntensityTarget() const {
+    JXL_ASSERT(tone_mapping.intensity_target != 0);
+    return tone_mapping.intensity_target;
+  }
+
+  // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+  const ExtraChannelInfo* Find(ExtraChannel type) const {
+    for (const ExtraChannelInfo& eci : extra_channel_info) {
+      if (eci.type == type) return &eci;
+    }
+    return nullptr;
+  }
+
+  // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+  ExtraChannelInfo* Find(ExtraChannel type) {
+    for (ExtraChannelInfo& eci : extra_channel_info) {
+      if (eci.type == type) return &eci;
+    }
+    return nullptr;
+  }
+
+  Orientation GetOrientation() const {
+    return static_cast<Orientation>(orientation);
+  }
+
+  bool ExtraFieldsDefault() const;
+
+  std::string DebugString() const;
+
+  mutable bool all_default;
+
+  BitDepth bit_depth;
+  bool modular_16_bit_buffer_sufficient;  // otherwise 32 is.
+
+  // Whether the colors values of the pixels of frames are encoded in the
+  // codestream using the absolute XYB color space, or the using values that
+  // follow the color space defined by the ColorEncoding or ICC profile. This
+  // determines when or whether a CMS (Color Management System) is needed to get
+  // the pixels in a desired color space. In one case, the pixels have one known
+  // color space and a CMS is needed to convert them to the original image's
+  // color space, in the other case the pixels have the color space of the
+  // original image and a CMS is required if a different display space, or a
+  // single known consistent color space for multiple decoded images, is
+  // desired. In all cases, the color space of all frames from a single image is
+  // the same, both VarDCT and modular frames.
+  //
+  // If true: then frames can be decoded to XYB (which can also be converted to
+  // linear and non-linear sRGB with the built in conversion without CMS). The
+  // attached ColorEncoding or ICC profile has no effect on the meaning of the
+  // pixel's color values, but instead indicates what the color profile of the
+  // original image was, and what color profile one should convert to when
+  // decoding to integers to prevent clipping and precision loss. To do that
+  // conversion requires a CMS.
+  //
+  // If false: then the color values of decoded frames are in the space defined
+  // by the attached ColorEncoding or ICC profile. To instead get the pixels in
+  // a chosen known color space, such as sRGB, requires a CMS, since the
+  // attached ColorEncoding or ICC profile could be any arbitrary color space.
+  // This mode is typically used for lossless images encoded as integers.
+  // Frames can also use YCbCr encoding, some frames may and some may not, but
+  // this is not a different color space but a certain encoding of the RGB
+  // values.
+  //
+  // Note: if !xyb_encoded, but the attached color profile indicates XYB (which
+  // can happen either if it's a ColorEncoding with color_space_ ==
+  // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to
+  // represent XYB), then the frames still may not use ColorEncoding kXYB, they
+  // must still use kNone (or kYCbCr, which would mean applying the YCbCr
+  // transform to the 3-channel XYB data), since with !xyb_encoded, the 3
+  // channels are stored as-is, no matter what meaning the color profile assigns
+  // to them. To use ColorEncoding::kXYB, xyb_encoded must be true.
+  //
+  // This value is defined in image metadata because this is the global
+  // codestream header. This value does not affect the image itself, so is not
+  // image metadata per se, it only affects the encoding, and what color space
+  // the decoder can receive the pixels in without needing a CMS.
+  bool xyb_encoded;
+
+  ColorEncoding color_encoding;
+
+  // These values are initialized to defaults such that the 'extra_fields'
+  // condition in VisitFields uses correctly initialized values.
+  uint32_t orientation = 1;
+  bool have_preview = false;
+  bool have_animation = false;
+  bool have_intrinsic_size = false;
+
+  // If present, the stored image has the dimensions of the first SizeHeader,
+  // but decoders are advised to resample or display per `intrinsic_size`.
+  SizeHeader intrinsic_size;  // only if have_intrinsic_size
+
+  ToneMapping tone_mapping;
+
+  // When reading: deserialized. When writing: automatically set from vector.
+  uint32_t num_extra_channels;
+  std::vector<ExtraChannelInfo> extra_channel_info;
+
+  // Only present if m.have_preview.
+  PreviewHeader preview_size;
+  // Only present if m.have_animation.
+  AnimationHeader animation;
+
+  uint64_t extensions;
+
+  // Option to stop parsing after basic info, and treat as if the later
+  // fields do not participate. Use to parse only basic image information
+  // excluding the final larger or variable sized data.
+  bool nonserialized_only_parse_basic_info = false;
+};
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+                         ImageMetadata* JXL_RESTRICT metadata);
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+                          BitWriter* JXL_RESTRICT writer, size_t layer,
+                          AuxOut* aux_out);
+
+// All metadata applicable to the entire codestream (dimensions, extra channels,
+// ...)
+struct CodecMetadata {
+  // TODO(lode): use the preview and animation fields too, in place of the
+  // nonserialized_ ones in ImageMetadata.
+  ImageMetadata m;
+  // The size of the codestream: this is the nominal size applicable to all
+  // frames, although some frames can have a different effective size through
+  // crop, dc_level or representing a the preview.
+  SizeHeader size;
+  // Often default.
+  CustomTransformData transform_data;
+
+  size_t xsize() const { return size.xsize(); }
+  size_t ysize() const { return size.ysize(); }
+  size_t oriented_xsize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return ysize();
+    } else {
+      return xsize();
+    }
+  }
+  size_t oriented_preview_xsize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return m.preview_size.ysize();
+    } else {
+      return m.preview_size.xsize();
+    }
+  }
+  size_t oriented_ysize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return xsize();
+    } else {
+      return ysize();
+    }
+  }
+  size_t oriented_preview_ysize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return m.preview_size.xsize();
+    } else {
+      return m.preview_size.ysize();
+    }
+  }
+
+  std::string DebugString() const;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_METADATA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_ops.h b/third-party/libjxl/libjxl/lib/jxl/image_ops.h
new file mode 100644
index 0000000000..574a6104d4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_ops.h
@@ -0,0 +1,561 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_OPS_H_
+#define LIB_JXL_IMAGE_OPS_H_
+
+// Operations on images.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void CopyImageTo(const Plane<T>& from, Plane<T>* JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(from, *to));
+  if (from.ysize() == 0 || from.xsize() == 0) return;
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = from.ConstRow(y);
+    T* JXL_RESTRICT row_to = to->Row(y);
+    memcpy(row_to, row_from, from.xsize() * sizeof(T));
+  }
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Plane<T>& from,
+                 const Rect& rect_to, Plane<T>* JXL_RESTRICT to) {
+  JXL_DASSERT(SameSize(rect_from, rect_to));
+  JXL_DASSERT(rect_from.IsInside(from));
+  JXL_DASSERT(rect_to.IsInside(*to));
+  if (rect_from.xsize() == 0) return;
+  for (size_t y = 0; y < rect_from.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+    T* JXL_RESTRICT row_to = rect_to.Row(to, y);
+    memcpy(row_to, row_from, rect_from.xsize() * sizeof(T));
+  }
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Image3<T>& from,
+                 const Rect& rect_to, Image3<T>* JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(rect_from, rect_to));
+  for (size_t c = 0; c < 3; c++) {
+    CopyImageTo(rect_from, from.Plane(c), rect_to, &to->Plane(c));
+  }
+}
+
+template <typename T, typename U>
+void ConvertPlaneAndClamp(const Rect& rect_from, const Plane<T>& from,
+                          const Rect& rect_to, Plane<U>* JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(rect_from, rect_to));
+  using M = decltype(T() + U());
+  for (size_t y = 0; y < rect_to.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+    U* JXL_RESTRICT row_to = rect_to.Row(to, y);
+    for (size_t x = 0; x < rect_to.xsize(); ++x) {
+      row_to[x] =
+          std::min<M>(std::max<M>(row_from[x], std::numeric_limits<U>::min()),
+                      std::numeric_limits<U>::max());
+    }
+  }
+}
+
+// Copies `from` to `to`.
+template <typename T>
+void CopyImageTo(const T& from, T* JXL_RESTRICT to) {
+  return CopyImageTo(Rect(from), from, Rect(*to), to);
+}
+
+// Copies `from:rect_from` to `to:rect_to`; also copies `padding` pixels of
+// border around `from:rect_from`, in all directions, whenever they are inside
+// the first image.
+template <typename T>
+void CopyImageToWithPadding(const Rect& from_rect, const T& from,
+                            size_t padding, const Rect& to_rect, T* to) {
+  size_t xextra0 = std::min(padding, from_rect.x0());
+  size_t xextra1 =
+      std::min(padding, from.xsize() - from_rect.x0() - from_rect.xsize());
+  size_t yextra0 = std::min(padding, from_rect.y0());
+  size_t yextra1 =
+      std::min(padding, from.ysize() - from_rect.y0() - from_rect.ysize());
+  JXL_DASSERT(to_rect.x0() >= xextra0);
+  JXL_DASSERT(to_rect.y0() >= yextra0);
+
+  return CopyImageTo(Rect(from_rect.x0() - xextra0, from_rect.y0() - yextra0,
+                          from_rect.xsize() + xextra0 + xextra1,
+                          from_rect.ysize() + yextra0 + yextra1),
+                     from,
+                     Rect(to_rect.x0() - xextra0, to_rect.y0() - yextra0,
+                          to_rect.xsize() + xextra0 + xextra1,
+                          to_rect.ysize() + yextra0 + yextra1),
+                     to);
+}
+
+template <class ImageIn, class ImageOut>
+void Subtract(const ImageIn& image1, const ImageIn& image2, ImageOut* out) {
+  using T = typename ImageIn::T;
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row1[x] - row2[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void SubtractFrom(const Plane<Tin>& what, Plane<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+    Tout* JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] -= row_what[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void AddTo(const Plane<Tin>& what, Plane<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+    Tout* JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] += row_what[x];
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(Rect rectFrom, const Plane<Tin>& what, Rect rectTo,
+           Plane<Tout>* to) {
+  JXL_ASSERT(SameSize(rectFrom, rectTo));
+  const size_t xsize = rectTo.xsize();
+  const size_t ysize = rectTo.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = rectFrom.ConstRow(what, y);
+    Tout* JXL_RESTRICT row_to = rectTo.Row(to, y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] += row_what[x];
+    }
+  }
+}
+
+// Returns linear combination of two grayscale images.
+template <typename T>
+Plane<T> LinComb(const T lambda1, const Plane<T>& image1, const T lambda2,
+                 const Plane<T>& image2) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+  Plane<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = lambda1 * row1[x] + lambda2 * row2[x];
+    }
+  }
+  return out;
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = lambda * row[x];
+    }
+  }
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    ScaleImage(lambda, &image->Plane(c));
+  }
+}
+
+template <typename T>
+Plane<T> Product(const Plane<T>& a, const Plane<T>& b) {
+  Plane<T> c(a.xsize(), a.ysize());
+  for (size_t y = 0; y < a.ysize(); ++y) {
+    const T* const JXL_RESTRICT row_a = a.Row(y);
+    const T* const JXL_RESTRICT row_b = b.Row(y);
+    T* const JXL_RESTRICT row_c = c.Row(y);
+    for (size_t x = 0; x < a.xsize(); ++x) {
+      row_c[x] = row_a[x] * row_b[x];
+    }
+  }
+  return c;
+}
+
+template <typename T>
+void FillImage(const T value, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Plane<T>* image) {
+  if (image->xsize() == 0) return;
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    memset(row, 0, image->xsize() * sizeof(T));
+  }
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static inline int64_t Mirror(int64_t x, const int64_t xsize) {
+  JXL_DASSERT(xsize != 0);
+
+  // TODO(janwas): replace with branchless version
+  while (x < 0 || x >= xsize) {
+    if (x < 0) {
+      x = -x - 1;
+    } else {
+      x = 2 * xsize - 1 - x;
+    }
+  }
+  return x;
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+  JXL_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+    return Mirror(coord, size);
+  }
+};
+
+// Returns the same coordinate: required for TFNode with Border(), or useful
+// when we know "coord" is already valid (e.g. interior of an image).
+struct WrapUnchanged {
+  JXL_INLINE int64_t operator()(const int64_t coord, int64_t /*size*/) const {
+    return coord;
+  }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+  template <class ImageOrView>
+  WrapRowMirror(const ImageOrView& image, size_t ysize)
+      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+  const float* operator()(const float* const JXL_RESTRICT row,
+                          const int64_t stride) const {
+    if (row < first_row_) {
+      const int64_t num_before = first_row_ - row;
+      // Mirrored; one row before => row 0, two before = row 1, ...
+      return first_row_ + num_before - stride;
+    }
+    if (row > last_row_) {
+      const int64_t num_after = row - last_row_;
+      // Mirrored; one row after => last row, two after = last - 1, ...
+      return last_row_ - num_after + stride;
+    }
+    return row;
+  }
+
+ private:
+  const float* const JXL_RESTRICT first_row_;
+  const float* const JXL_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+  JXL_INLINE const float* operator()(const float* const JXL_RESTRICT row,
+                                     int64_t /*stride*/) const {
+    return row;
+  }
+};
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Plane<T>* image) {
+  const size_t xsize = image->xsize();
+  const size_t ysize = image->ysize();
+  // Top: fill entire row
+  for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Bottom: fill entire row
+  for (size_t y = ysize - thickness; y < ysize; ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Left/right: fill the 'columns' on either side, but only if the image is
+  // big enough that they don't already belong to the top/bottom rows.
+  if (ysize >= 2 * thickness) {
+    for (size_t y = thickness; y < ysize - thickness; ++y) {
+      T* const JXL_RESTRICT row = image->Row(y);
+      std::fill(row, row + thickness, value);
+      std::fill(row + xsize - thickness, row + xsize, value);
+    }
+  }
+}
+
+// Computes the minimum and maximum pixel value.
+template <typename T>
+void ImageMinMax(const Plane<T>& image, T* const JXL_RESTRICT min,
+                 T* const JXL_RESTRICT max) {
+  *min = std::numeric_limits<T>::max();
+  *max = std::numeric_limits<T>::lowest();
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const JXL_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      *min = std::min(*min, row[x]);
+      *max = std::max(*max, row[x]);
+    }
+  }
+}
+
+// Copies pixels, scaling their value relative to the "from" min/max by
+// "to_range". Example: U8 [0, 255] := [0.0, 1.0], to_range = 1.0 =>
+// outputs [0.0, 1.0].
+template <typename FromType, typename ToType>
+void ImageConvert(const Plane<FromType>& from, const float to_range,
+                  Plane<ToType>* const JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(from, *to));
+  FromType min_from, max_from;
+  ImageMinMax(from, &min_from, &max_from);
+  const float scale = to_range / (max_from - min_from);
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const FromType* const JXL_RESTRICT row_from = from.Row(y);
+    ToType* const JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = static_cast<ToType>((row_from[x] - min_from) * scale);
+    }
+  }
+}
+
+template <typename From>
+Plane<float> ConvertToFloat(const Plane<From>& from) {
+  float factor = 1.0f / std::numeric_limits<From>::max();
+  if (std::is_same<From, double>::value || std::is_same<From, float>::value) {
+    factor = 1.0f;
+  }
+  Plane<float> to(from.xsize(), from.ysize());
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const From* const JXL_RESTRICT row_from = from.Row(y);
+    float* const JXL_RESTRICT row_to = to.Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = row_from[x] * factor;
+    }
+  }
+  return to;
+}
+
+template <typename T>
+Plane<T> ImageFromPacked(const std::vector<T>& packed, const size_t xsize,
+                         const size_t ysize) {
+  Plane<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    T* const JXL_RESTRICT row = out.Row(y);
+    const T* const JXL_RESTRICT packed_row = &packed[y * xsize];
+    memcpy(row, packed_row, xsize * sizeof(T));
+  }
+  return out;
+}
+
+template <typename T>
+void Image3Max(const Image3<T>& image, std::array<T, 3>* out_max) {
+  for (size_t c = 0; c < 3; ++c) {
+    T max = std::numeric_limits<T>::min();
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        max = std::max(max, row[x]);
+      }
+    }
+    (*out_max)[c] = max;
+  }
+}
+
+// Computes the sum of the pixels in `rect`.
+template <typename T>
+T ImageSum(const Plane<T>& image, const Rect& rect) {
+  T result = 0;
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const T* JXL_RESTRICT row = rect.ConstRow(image, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      result += row[x];
+    }
+  }
+  return result;
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image, const Rect& rect) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  std::vector<T> packed(xsize * ysize);
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    memcpy(&packed[y * xsize], rect.ConstRow(image, y), xsize * sizeof(T));
+  }
+  return packed;
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image) {
+  return PackedFromImage(image, Rect(image));
+}
+
+template <typename From>
+Image3F ConvertToFloat(const Image3<From>& from) {
+  return Image3F(ConvertToFloat(from.Plane(0)), ConvertToFloat(from.Plane(1)),
+                 ConvertToFloat(from.Plane(2)));
+}
+
+template <typename Tin, typename Tout>
+void Subtract(const Image3<Tin>& image1, const Image3<Tin>& image2,
+              Image3<Tout>* out) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* const JXL_RESTRICT row1 = image1.ConstPlaneRow(c, y);
+      const Tin* const JXL_RESTRICT row2 = image2.ConstPlaneRow(c, y);
+      Tout* const JXL_RESTRICT row_out = out->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row1[x] - row2[x];
+      }
+    }
+  }
+}
+
+// Adds `what` of the size of `rect` to `to` in the position of `rect`.
+template <typename Tin, typename Tout>
+void AddTo(const Rect& rect, const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  JXL_ASSERT(xsize == rect.xsize());
+  JXL_ASSERT(ysize == rect.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* JXL_RESTRICT row_to = rect.PlaneRow(to, c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] += row_what[x];
+      }
+    }
+  }
+}
+
+// Initializes all planes to the same "value".
+template <typename T>
+void FillImage(const T value, Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = value;
+      }
+    }
+  }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void FillImage(const T value, Image3<T>* image, Rect rect) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      T* JXL_RESTRICT row = rect.PlaneRow(image, c, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        row[x] = value;
+      }
+    }
+  }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image, Rect rect) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    T* JXL_RESTRICT row = rect.Row(image, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      if (image->xsize() != 0) memset(row, 0, image->xsize() * sizeof(T));
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillPlane(Plane<T>* image, Rect rect) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    T* JXL_RESTRICT row = rect.Row(image, y);
+    memset(row, 0, rect.xsize() * sizeof(T));
+  }
+}
+
+// Same as above, but operates in-place. Assumes that the `in` image was
+// allocated large enough.
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in,
+                                    size_t block_dim = kBlockDim);
+
+// Downsamples an image by a given factor.
+void DownsampleImage(Image3F* opsin, size_t factor);
+void DownsampleImage(ImageF* image, size_t factor);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_OPS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc b/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc
new file mode 100644
index 0000000000..44c021513d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc
@@ -0,0 +1,164 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_ops.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <utility>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void TestPacked(const size_t xsize, const size_t ysize) {
+  Plane<T> image1(xsize, ysize);
+  RandomFillImage(&image1);
+  const std::vector<T>& packed = PackedFromImage(image1);
+  const Plane<T>& image2 = ImageFromPacked(packed, xsize, ysize);
+  JXL_EXPECT_OK(SamePixels(image1, image2, _));
+}
+
+TEST(ImageTest, TestPacked) {
+  TestPacked<uint8_t>(1, 1);
+  TestPacked<uint8_t>(7, 1);
+  TestPacked<uint8_t>(1, 7);
+
+  TestPacked<int16_t>(1, 1);
+  TestPacked<int16_t>(7, 1);
+  TestPacked<int16_t>(1, 7);
+
+  TestPacked<uint16_t>(1, 1);
+  TestPacked<uint16_t>(7, 1);
+  TestPacked<uint16_t>(1, 7);
+
+  TestPacked<float>(1, 1);
+  TestPacked<float>(7, 1);
+  TestPacked<float>(1, 7);
+}
+
+// Ensure entire payload is readable/writable for various size/offset combos.
+TEST(ImageTest, TestAllocator) {
+  Rng rng(0);
+  const size_t k32 = 32;
+  const size_t kAlign = CacheAligned::kAlignment;
+  for (size_t size : {k32 * 1, k32 * 2, k32 * 3, k32 * 4, k32 * 5,
+                      CacheAligned::kAlias, 2 * CacheAligned::kAlias + 4}) {
+    for (size_t offset = 0; offset <= CacheAligned::kAlias; offset += kAlign) {
+      uint8_t* bytes =
+          static_cast<uint8_t*>(CacheAligned::Allocate(size, offset));
+      JXL_CHECK(reinterpret_cast<uintptr_t>(bytes) % kAlign == 0);
+      // Ensure we can write/read the last byte. Use RNG to fool the compiler
+      // into thinking the write is necessary.
+      memset(bytes, 0, size);
+      bytes[size - 1] = 1;                       // greatest element
+      uint32_t pos = rng.UniformU(0, size - 1);  // random but != greatest
+      JXL_CHECK(bytes[pos] < bytes[size - 1]);
+
+      CacheAligned::Free(bytes);
+    }
+  }
+}
+
+template <typename T>
+void TestFillImpl(Image3<T>* img, const char* layout) {
+  FillImage(T(1), img);
+  for (size_t y = 0; y < img->ysize(); ++y) {
+    for (size_t c = 0; c < 3; ++c) {
+      T* JXL_RESTRICT row = img->PlaneRow(c, y);
+      for (size_t x = 0; x < img->xsize(); ++x) {
+        if (row[x] != T(1)) {
+          printf("Not 1 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS
+                 " x %" PRIuS ") (%s)\n",
+                 c, x, y, img->xsize(), img->ysize(), layout);
+          abort();
+        }
+        row[x] = T(2);
+      }
+    }
+  }
+
+  // Same for ZeroFillImage and swapped c/y loop ordering.
+  ZeroFillImage(img);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < img->ysize(); ++y) {
+      T* JXL_RESTRICT row = img->PlaneRow(c, y);
+      for (size_t x = 0; x < img->xsize(); ++x) {
+        if (row[x] != T(0)) {
+          printf("Not 0 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS
+                 " x %" PRIuS ") (%s)\n",
+                 c, x, y, img->xsize(), img->ysize(), layout);
+          abort();
+        }
+        row[x] = T(3);
+      }
+    }
+  }
+}
+
+template <typename T>
+void TestFillT() {
+  for (uint32_t xsize : {0, 1, 15, 16, 31, 32}) {
+    for (uint32_t ysize : {0, 1, 15, 16, 31, 32}) {
+      Image3<T> image(xsize, ysize);
+      TestFillImpl(&image, "size ctor");
+
+      Image3<T> planar(Plane<T>(xsize, ysize), Plane<T>(xsize, ysize),
+                       Plane<T>(xsize, ysize));
+      TestFillImpl(&planar, "planar");
+    }
+  }
+}
+
+// Ensure y/c/x and c/y/x loops visit pixels no more than once.
+TEST(ImageTest, TestFill) {
+  TestFillT<uint8_t>();
+  TestFillT<int16_t>();
+  TestFillT<float>();
+  TestFillT<double>();
+}
+
+TEST(ImageTest, CopyImageToWithPaddingTest) {
+  Plane<uint32_t> src(100, 61);
+  for (size_t y = 0; y < src.ysize(); y++) {
+    for (size_t x = 0; x < src.xsize(); x++) {
+      src.Row(y)[x] = x * 1000 + y;
+    }
+  }
+  Rect src_rect(10, 20, 30, 40);
+  EXPECT_TRUE(src_rect.IsInside(src));
+
+  Plane<uint32_t> dst(60, 50);
+  FillImage(0u, &dst);
+  Rect dst_rect(20, 5, 30, 40);
+  EXPECT_TRUE(dst_rect.IsInside(dst));
+
+  CopyImageToWithPadding(src_rect, src, /*padding=*/2, dst_rect, &dst);
+
+  // ysize is + 3 instead of + 4 because we are at the y image boundary on the
+  // source image.
+  Rect padded_dst_rect(20 - 2, 5 - 2, 30 + 4, 40 + 3);
+  for (size_t y = 0; y < dst.ysize(); y++) {
+    for (size_t x = 0; x < dst.xsize(); x++) {
+      if (Rect(x, y, 1, 1).IsInside(padded_dst_rect)) {
+        EXPECT_EQ((x - dst_rect.x0() + src_rect.x0()) * 1000 +
+                      (y - dst_rect.y0() + src_rect.y0()),
+                  dst.Row(y)[x]);
+      } else {
+        EXPECT_EQ(0u, dst.Row(y)[x]);
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h b/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h
new file mode 100644
index 0000000000..e7d72285e6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h
@@ -0,0 +1,257 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_TEST_UTILS_H_
+#define LIB_JXL_IMAGE_TEST_UTILS_H_
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+bool SamePixels(const Plane<T>& image1, const Plane<T>& image2,
+                std::stringstream& failures) {
+  const Rect rect(image1);
+  JXL_CHECK(SameSize(image1, image2));
+  size_t mismatches = 0;
+  for (size_t y = rect.y0(); y < rect.ysize(); ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    for (size_t x = rect.x0(); x < rect.xsize(); ++x) {
+      if (row1[x] != row2[x]) {
+        failures << "pixel mismatch" << x << ", " << y << ": "
+                 << double(row1[x]) << " != " << double(row2[x]) << "\n";
+        if (++mismatches > 4) {
+          return false;
+        }
+      }
+    }
+  }
+  return mismatches == 0;
+}
+
+template <typename T>
+bool SamePixels(const Image3<T>& image1, const Image3<T>& image2,
+                std::stringstream& failures) {
+  JXL_CHECK(SameSize(image1, image2));
+  for (size_t c = 0; c < 3; ++c) {
+    if (!SamePixels(image1.Plane(c), image2.Plane(c), failures)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Use for floating-point images with fairly large numbers; tolerates small
+// absolute errors and/or small relative errors.
+template <typename T>
+bool VerifyRelativeError(const Plane<T>& expected, const Plane<T>& actual,
+                         const double threshold_l1,
+                         const double threshold_relative,
+                         std::stringstream& failures, const intptr_t border = 0,
+                         const size_t c = 0) {
+  JXL_CHECK(SameSize(expected, actual));
+  const intptr_t xsize = expected.xsize();
+  const intptr_t ysize = expected.ysize();
+
+  // Max over current scanline to give a better idea whether there are
+  // systematic errors or just one outlier. Invalid if negative.
+  double max_l1 = -1;
+  double max_relative = -1;
+  bool any_bad = false;
+  for (intptr_t y = border; y < ysize - border; ++y) {
+    const T* const JXL_RESTRICT row_expected = expected.Row(y);
+    const T* const JXL_RESTRICT row_actual = actual.Row(y);
+    for (intptr_t x = border; x < xsize - border; ++x) {
+      const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+      // Cannot compute relative, only check/update L1.
+      if (std::abs(row_expected[x]) < 1E-10) {
+        if (l1 > threshold_l1) {
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+        }
+      } else {
+        const double relative = l1 / std::abs(double(row_expected[x]));
+        if (l1 > threshold_l1 && relative > threshold_relative) {
+          // Fails both tolerances => will exit below, update max_*.
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+          max_relative = std::max(max_relative, relative);
+        }
+      }
+    }
+  }
+  if (!any_bad) {
+    return true;
+  }
+  // Never had a valid relative value, don't print it.
+  if (max_relative < 0) {
+    fprintf(stderr, "c=%" PRIu64 ": max +/- %E exceeds +/- %.2E\n",
+            static_cast<uint64_t>(c), max_l1, threshold_l1);
+  } else {
+    fprintf(stderr,
+            "c=%" PRIu64 ": max +/- %E, x %E exceeds +/- %.2E, x %.2E\n",
+            static_cast<uint64_t>(c), max_l1, max_relative, threshold_l1,
+            threshold_relative);
+  }
+  // Dump the expected image and actual image if the region is small enough.
+  const intptr_t kMaxTestDumpSize = 16;
+  if (xsize <= kMaxTestDumpSize + 2 * border &&
+      ysize <= kMaxTestDumpSize + 2 * border) {
+    fprintf(stderr, "Expected image:\n");
+    for (intptr_t y = border; y < ysize - border; ++y) {
+      const T* const JXL_RESTRICT row_expected = expected.Row(y);
+      for (intptr_t x = border; x < xsize - border; ++x) {
+        fprintf(stderr, "%10lf ", static_cast<double>(row_expected[x]));
+      }
+      fprintf(stderr, "\n");
+    }
+
+    fprintf(stderr, "Actual image:\n");
+    for (intptr_t y = border; y < ysize - border; ++y) {
+      const T* const JXL_RESTRICT row_expected = expected.Row(y);
+      const T* const JXL_RESTRICT row_actual = actual.Row(y);
+      for (intptr_t x = border; x < xsize - border; ++x) {
+        const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+        bool bad = l1 > threshold_l1;
+        if (row_expected[x] > 1E-10) {
+          const double relative = l1 / std::abs(double(row_expected[x]));
+          bad &= relative > threshold_relative;
+        }
+        if (bad) {
+          fprintf(stderr, "%10lf ", static_cast<double>(row_actual[x]));
+        } else {
+          fprintf(stderr, "%10s ", "==");
+        }
+      }
+      fprintf(stderr, "\n");
+    }
+  }
+
+  // Find first failing x for further debugging.
+  for (intptr_t y = border; y < ysize - border; ++y) {
+    const T* const JXL_RESTRICT row_expected = expected.Row(y);
+    const T* const JXL_RESTRICT row_actual = actual.Row(y);
+
+    for (intptr_t x = border; x < xsize - border; ++x) {
+      const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+      bool bad = l1 > threshold_l1;
+      if (row_expected[x] > 1E-10) {
+        const double relative = l1 / std::abs(double(row_expected[x]));
+        bad &= relative > threshold_relative;
+      }
+      if (bad) {
+        failures << x << ", " << y << " (" << expected.xsize() << " x "
+                 << expected.ysize() << ") expected "
+                 << static_cast<double>(row_expected[x]) << " actual "
+                 << static_cast<double>(row_actual[x]);
+        return false;
+      }
+    }
+  }
+  return false;
+}
+
+template <typename T>
+bool VerifyRelativeError(const Image3<T>& expected, const Image3<T>& actual,
+                         const float threshold_l1,
+                         const float threshold_relative,
+                         std::stringstream& failures,
+                         const intptr_t border = 0) {
+  for (size_t c = 0; c < 3; ++c) {
+    bool ok =
+        VerifyRelativeError(expected.Plane(c), actual.Plane(c), threshold_l1,
+                            threshold_relative, failures, border, c);
+    if (!ok) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename T, typename U = T>
+void GenerateImage(Rng& rng, Plane<T>* image, U begin, U end) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      if (std::is_same<T, float>::value || std::is_same<T, double>::value) {
+        row[x] = rng.UniformF(begin, end);
+      } else if (std::is_signed<T>::value) {
+        row[x] = rng.UniformI(begin, end);
+      } else {
+        row[x] = rng.UniformU(begin, end);
+      }
+    }
+  }
+}
+
+template <typename T>
+void RandomFillImage(Plane<T>* image, const T begin, const T end,
+                     const int seed = 129) {
+  Rng rng(seed);
+  GenerateImage(rng, image, begin, end);
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value>::type RandomFillImage(
+    Plane<T>* image) {
+  Rng rng(129);
+  GenerateImage(rng, image, int64_t(0),
+                int64_t(std::numeric_limits<T>::max()) + 1);
+}
+
+JXL_INLINE void RandomFillImage(Plane<float>* image) {
+  Rng rng(129);
+  GenerateImage(rng, image, 0.0f, std::numeric_limits<float>::max());
+}
+
+template <typename T, typename U>
+void GenerateImage(Rng& rng, Image3<T>* image, U begin, U end) {
+  for (size_t c = 0; c < 3; ++c) {
+    GenerateImage(rng, &image->Plane(c), begin, end);
+  }
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value>::type RandomFillImage(
+    Image3<T>* image) {
+  Rng rng(129);
+  GenerateImage(rng, image, int64_t(0),
+                int64_t(std::numeric_limits<T>::max()) + 1);
+}
+
+JXL_INLINE void RandomFillImage(Image3F* image) {
+  Rng rng(129);
+  GenerateImage(rng, image, 0.0f, std::numeric_limits<float>::max());
+}
+
+template <typename T, typename U>
+void RandomFillImage(Image3<T>* image, const U begin, const U end,
+                     const int seed = 129) {
+  Rng rng(seed);
+  GenerateImage(rng, image, begin, end);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_TEST_UTILS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h b/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h
new file mode 100644
index 0000000000..fcb01d7396
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// SIMDified inverse-move-to-front transform.
+
+#if defined(LIB_JXL_INVERSE_MTF_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_INVERSE_MTF_INL_H_
+#undef LIB_JXL_INVERSE_MTF_INL_H_
+#else
+#define LIB_JXL_INVERSE_MTF_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::FirstN;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Load;
+using hwy::HWY_NAMESPACE::LoadU;
+using hwy::HWY_NAMESPACE::StoreU;
+
+inline void MoveToFront(uint8_t* v, uint8_t index) {
+  uint8_t value = v[index];
+  uint8_t i = index;
+  if (i < 4) {
+    for (; i; --i) v[i] = v[i - 1];
+  } else {
+    const HWY_CAPPED(uint8_t, 64) d;
+    int tail = i & (Lanes(d) - 1);
+    if (tail) {
+      i -= tail;
+      const auto vec = Load(d, v + i);
+      const auto prev = LoadU(d, v + i + 1);
+      StoreU(IfThenElse(FirstN(d, tail), vec, prev), d, v + i + 1);
+    }
+    while (i) {
+      i -= Lanes(d);
+      const auto vec = Load(d, v + i);
+      StoreU(vec, d, v + i + 1);
+    }
+  }
+  v[0] = value;
+}
+
+inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+  HWY_ALIGN uint8_t mtf[256 + 64];
+  int i;
+  for (i = 0; i < 256; ++i) {
+    mtf[i] = static_cast<uint8_t>(i);
+  }
+#if JXL_MEMORY_SANITIZER
+  const HWY_CAPPED(uint8_t, 64) d;
+  for (size_t j = 0; j < Lanes(d); ++j) {
+    mtf[256 + j] = 0;
+  }
+#endif  // JXL_MEMORY_SANITIZER
+  for (i = 0; i < v_len; ++i) {
+    uint8_t index = v[i];
+    v[i] = mtf[index];
+    if (index) MoveToFront(mtf, index);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_INVERSE_MTF_INL_H_
+
+#if HWY_ONCE
+#ifndef INVERSE_MTF_ONCE
+#define INVERSE_MTF_ONCE
+
+namespace jxl {
+inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+  return HWY_STATIC_DISPATCH(InverseMoveToFrontTransform)(v, v_len);
+}
+}  // namespace jxl
+
+#endif  // INVERSE_MTF_ONCE
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc
new file mode 100644
index 0000000000..db49a1c215
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc
@@ -0,0 +1,145 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+
+#include <brotli/decode.h>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data) {
+  Status ret = true;
+  const uint8_t* in = encoded.data();
+  size_t available_in = encoded.size();
+  {
+    BitReader br(encoded);
+    BitReaderScopedCloser br_closer(&br, &ret);
+    JXL_RETURN_IF_ERROR(Bundle::Read(&br, jpeg_data));
+    JXL_RETURN_IF_ERROR(br.JumpToByteBoundary());
+    in += br.TotalBitsConsumed() / 8;
+    available_in -= br.TotalBitsConsumed() / 8;
+  }
+  JXL_RETURN_IF_ERROR(ret);
+
+  BrotliDecoderState* brotli_dec =
+      BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+
+  struct BrotliDecDeleter {
+    BrotliDecoderState* brotli_dec;
+    ~BrotliDecDeleter() { BrotliDecoderDestroyInstance(brotli_dec); }
+  } brotli_dec_deleter{brotli_dec};
+
+  BrotliDecoderResult result =
+      BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS;
+
+  auto br_read = [&](std::vector<uint8_t>& data) -> Status {
+    size_t available_out = data.size();
+    uint8_t* out = data.data();
+    while (available_out != 0) {
+      if (BrotliDecoderIsFinished(brotli_dec)) {
+        return JXL_FAILURE("Not enough decompressed output");
+      }
+      uint8_t* next_out_before = out;
+      size_t avail_out_before = available_out;
+      msan::MemoryIsInitialized(in, available_in);
+      result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+                                             &available_out, &out, nullptr);
+      if (result !=
+              BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT &&
+          result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+        return JXL_FAILURE(
+            "Brotli decoding error: %s\n",
+            BrotliDecoderErrorString(BrotliDecoderGetErrorCode(brotli_dec)));
+      }
+      msan::UnpoisonMemory(next_out_before, avail_out_before - available_out);
+    }
+    return true;
+  };
+  size_t num_icc = 0;
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    auto& marker = jpeg_data->app_data[i];
+    if (jpeg_data->app_marker_type[i] != AppMarkerType::kUnknown) {
+      // Set the size of the marker.
+      size_t size_minus_1 = marker.size() - 1;
+      marker[1] = size_minus_1 >> 8;
+      marker[2] = size_minus_1 & 0xFF;
+      if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+        if (marker.size() < 17) {
+          return JXL_FAILURE("ICC markers must be at least 17 bytes");
+        }
+        marker[0] = 0xE2;
+        memcpy(&marker[3], kIccProfileTag, sizeof kIccProfileTag);
+        marker[15] = ++num_icc;
+      }
+    } else {
+      JXL_RETURN_IF_ERROR(br_read(marker));
+      if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+        return JXL_FAILURE("Incorrect marker size");
+      }
+    }
+  }
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    auto& marker = jpeg_data->app_data[i];
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+      marker[16] = num_icc;
+    }
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kExif) {
+      marker[0] = 0xE1;
+      if (marker.size() < 3 + sizeof kExifTag) {
+        return JXL_FAILURE("Incorrect Exif marker size");
+      }
+      memcpy(&marker[3], kExifTag, sizeof kExifTag);
+    }
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kXMP) {
+      marker[0] = 0xE1;
+      if (marker.size() < 3 + sizeof kXMPTag) {
+        return JXL_FAILURE("Incorrect XMP marker size");
+      }
+      memcpy(&marker[3], kXMPTag, sizeof kXMPTag);
+    }
+  }
+  // TODO(eustas): actually inject ICC profile and check it fits perfectly.
+  for (size_t i = 0; i < jpeg_data->com_data.size(); i++) {
+    auto& marker = jpeg_data->com_data[i];
+    JXL_RETURN_IF_ERROR(br_read(marker));
+    if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+      return JXL_FAILURE("Incorrect marker size");
+    }
+  }
+  for (size_t i = 0; i < jpeg_data->inter_marker_data.size(); i++) {
+    JXL_RETURN_IF_ERROR(br_read(jpeg_data->inter_marker_data[i]));
+  }
+  JXL_RETURN_IF_ERROR(br_read(jpeg_data->tail_data));
+
+  // Check if there is more decompressed output.
+  size_t available_out = 1;
+  uint64_t dummy;
+  uint8_t* next_out = reinterpret_cast<uint8_t*>(&dummy);
+  result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+                                         &available_out, &next_out, nullptr);
+  if (available_out == 0 ||
+      result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+    return JXL_FAILURE("Excess data in compressed stream");
+  }
+  if (result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+    return JXL_FAILURE("Incomplete brotli-stream");
+  }
+  if (!BrotliDecoderIsFinished(brotli_dec) ||
+      result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+    return JXL_FAILURE("Corrupted brotli-stream");
+  }
+  if (available_in != 0) {
+    return JXL_FAILURE("Unused data after brotli stream");
+  }
+
+  return true;
+}
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h
new file mode 100644
index 0000000000..b9d50bf9f8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data);
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_DATA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc
new file mode 100644
index 0000000000..1714c2b4fd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc
@@ -0,0 +1,1042 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+
+#include <stdlib.h>
+#include <string.h> /* for memset, memcpy */
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+enum struct SerializationStatus {
+  NEEDS_MORE_INPUT,
+  NEEDS_MORE_OUTPUT,
+  ERROR,
+  DONE
+};
+
+const int kJpegPrecision = 8;
+
+// JpegBitWriter: buffer size
+const size_t kJpegBitWriterChunkSize = 16384;
+
+// DCTCodingState: maximum number of correction bits to buffer
+const int kJPEGMaxCorrectionBits = 1u << 16;
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+  return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+void JpegBitWriterInit(JpegBitWriter* bw,
+                       std::deque<OutputChunk>* output_queue) {
+  bw->output = output_queue;
+  bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+  bw->pos = 0;
+  bw->put_buffer = 0;
+  bw->put_bits = 64;
+  bw->healthy = true;
+  bw->data = bw->chunk.buffer->data();
+}
+
+static JXL_NOINLINE void SwapBuffer(JpegBitWriter* bw) {
+  bw->chunk.len = bw->pos;
+  bw->output->emplace_back(std::move(bw->chunk));
+  bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+  bw->data = bw->chunk.buffer->data();
+  bw->pos = 0;
+}
+
+static JXL_INLINE void Reserve(JpegBitWriter* bw, size_t n_bytes) {
+  if (JXL_UNLIKELY((bw->pos + n_bytes) > kJpegBitWriterChunkSize)) {
+    SwapBuffer(bw);
+  }
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+  bw->data[bw->pos] = byte;
+  bw->data[bw->pos + 1] = 0;
+  bw->pos += (byte != 0xFF ? 1 : 2);
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw, int nbits,
+                                          uint64_t bits) {
+  // At this point we are ready to emit the put_buffer to the output.
+  // The JPEG format requires that after every 0xff byte in the entropy
+  // coded section, there is a zero byte, therefore we first check if any of
+  // the 8 bytes of put_buffer is 0xFF.
+  bw->put_buffer |= (bits >> -bw->put_bits);
+  if (JXL_UNLIKELY(HasZeroByte(~bw->put_buffer))) {
+    // We have a 0xFF byte somewhere, examine each byte and append a zero
+    // byte if necessary.
+    EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 8) & 0xFF);
+    EmitByte(bw, (bw->put_buffer) & 0xFF);
+  } else {
+    // We don't have any 0xFF bytes, output all 8 bytes without checking.
+    StoreBE64(bw->put_buffer, bw->data + bw->pos);
+    bw->pos += 8;
+  }
+
+  bw->put_bits += 64;
+  bw->put_buffer = bits << bw->put_bits;
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+  JXL_DASSERT(nbits > 0);
+  bw->put_bits -= nbits;
+  if (JXL_UNLIKELY(bw->put_bits < 0)) {
+    if (JXL_UNLIKELY(nbits > 64)) {
+      bw->put_bits += nbits;
+      bw->healthy = false;
+    } else {
+      DischargeBitBuffer(bw, nbits, bits);
+    }
+  } else {
+    bw->put_buffer |= (bits << bw->put_bits);
+  }
+}
+
+void EmitMarker(JpegBitWriter* bw, int marker) {
+  Reserve(bw, 2);
+  JXL_DASSERT(marker != 0xFF);
+  bw->data[bw->pos++] = 0xFF;
+  bw->data[bw->pos++] = marker;
+}
+
+bool JumpToByteBoundary(JpegBitWriter* bw, const uint8_t** pad_bits,
+                        const uint8_t* pad_bits_end) {
+  size_t n_bits = bw->put_bits & 7u;
+  uint8_t pad_pattern;
+  if (*pad_bits == nullptr) {
+    pad_pattern = (1u << n_bits) - 1;
+  } else {
+    pad_pattern = 0;
+    const uint8_t* src = *pad_bits;
+    // TODO(eustas): bitwise reading looks insanely ineffective...
+    while (n_bits--) {
+      pad_pattern <<= 1;
+      if (src >= pad_bits_end) return false;
+      // TODO(eustas): DCHECK *src == {0, 1}
+      pad_pattern |= !!*(src++);
+    }
+    *pad_bits = src;
+  }
+
+  Reserve(bw, 16);
+
+  while (bw->put_bits <= 56) {
+    int c = (bw->put_buffer >> 56) & 0xFF;
+    EmitByte(bw, c);
+    bw->put_buffer <<= 8;
+    bw->put_bits += 8;
+  }
+  if (bw->put_bits < 64) {
+    int pad_mask = 0xFFu >> (64 - bw->put_bits);
+    int c = ((bw->put_buffer >> 56) & ~pad_mask) | pad_pattern;
+    EmitByte(bw, c);
+  }
+  bw->put_buffer = 0;
+  bw->put_bits = 64;
+
+  return true;
+}
+
+void JpegBitWriterFinish(JpegBitWriter* bw) {
+  if (bw->pos == 0) return;
+  bw->chunk.len = bw->pos;
+  bw->output->emplace_back(std::move(bw->chunk));
+  bw->chunk = OutputChunk(nullptr, 0);
+  bw->data = nullptr;
+  bw->pos = 0;
+}
+
+void DCTCodingStateInit(DCTCodingState* s) {
+  s->eob_run_ = 0;
+  s->cur_ac_huff_ = nullptr;
+  s->refinement_bits_.clear();
+  s->refinement_bits_.reserve(kJPEGMaxCorrectionBits);
+}
+
+static JXL_INLINE void WriteSymbol(int symbol, HuffmanCodeTable* table,
+                                   JpegBitWriter* bw) {
+  WriteBits(bw, table->depth[symbol], table->code[symbol]);
+}
+
+static JXL_INLINE void WriteSymbolBits(int symbol, HuffmanCodeTable* table,
+                                       JpegBitWriter* bw, int nbits,
+                                       uint64_t bits) {
+  WriteBits(bw, nbits + table->depth[symbol],
+            bits | (table->code[symbol] << nbits));
+}
+
+// Emit all buffered data to the bit stream using the given Huffman code and
+// bit writer.
+static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) {
+  if (s->eob_run_ > 0) {
+    int nbits = FloorLog2Nonzero<uint32_t>(s->eob_run_);
+    int symbol = nbits << 4u;
+    WriteSymbol(symbol, s->cur_ac_huff_, bw);
+    if (nbits > 0) {
+      WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1));
+    }
+    s->eob_run_ = 0;
+  }
+  for (size_t i = 0; i < s->refinement_bits_.size(); ++i) {
+    WriteBits(bw, 1, s->refinement_bits_[i]);
+  }
+  s->refinement_bits_.clear();
+}
+
+// Buffer some more data at the end-of-band (the last non-zero or newly
+// non-zero coefficient within the [Ss, Se] spectral band).
+static JXL_INLINE void BufferEndOfBand(DCTCodingState* s,
+                                       HuffmanCodeTable* ac_huff,
+                                       const std::vector<int>* new_bits,
+                                       JpegBitWriter* bw) {
+  if (s->eob_run_ == 0) {
+    s->cur_ac_huff_ = ac_huff;
+  }
+  ++s->eob_run_;
+  if (new_bits) {
+    s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(),
+                               new_bits->end());
+  }
+  if (s->eob_run_ == 0x7FFF ||
+      s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+    Flush(s, bw);
+  }
+}
+
+bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff,
+                           HuffmanCodeTable* table) {
+  int huff_code[kJpegHuffmanAlphabetSize];
+  // +1 for a sentinel element.
+  uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+  int p = 0;
+  for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+    int i = huff.counts[l];
+    if (p + i > kJpegHuffmanAlphabetSize + 1) {
+      return false;
+    }
+    while (i--) huff_size[p++] = l;
+  }
+
+  if (p == 0) {
+    return true;
+  }
+
+  // Reuse sentinel element.
+  int last_p = p - 1;
+  huff_size[last_p] = 0;
+
+  int code = 0;
+  uint32_t si = huff_size[0];
+  p = 0;
+  while (huff_size[p]) {
+    while ((huff_size[p]) == si) {
+      huff_code[p++] = code;
+      code++;
+    }
+    code <<= 1;
+    si++;
+  }
+  for (p = 0; p < last_p; p++) {
+    int i = huff.values[p];
+    table->depth[i] = huff_size[p];
+    table->code[i] = huff_code[p];
+  }
+  return true;
+}
+
+bool EncodeSOI(SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, 0xD8}));
+  return true;
+}
+
+bool EncodeEOI(const JPEGData& jpg, SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, 0xD9}));
+  state->output_queue.emplace_back(jpg.tail_data);
+  return true;
+}
+
+bool EncodeSOF(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+  if (marker <= 0xC2) state->is_progressive = (marker == 0xC2);
+
+  const size_t n_comps = jpg.components.size();
+  const size_t marker_len = 8 + 3 * n_comps;
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = marker;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = kJpegPrecision;
+  data[pos++] = jpg.height >> 8u;
+  data[pos++] = jpg.height & 0xFFu;
+  data[pos++] = jpg.width >> 8u;
+  data[pos++] = jpg.width & 0xFFu;
+  data[pos++] = n_comps;
+  for (size_t i = 0; i < n_comps; ++i) {
+    data[pos++] = jpg.components[i].id;
+    data[pos++] = ((jpg.components[i].h_samp_factor << 4u) |
+                   (jpg.components[i].v_samp_factor));
+    const size_t quant_idx = jpg.components[i].quant_idx;
+    if (quant_idx >= jpg.quant.size()) return false;
+    data[pos++] = jpg.quant[quant_idx].index;
+  }
+  return true;
+}
+
+bool EncodeSOS(const JPEGData& jpg, const JPEGScanInfo& scan_info,
+               SerializationState* state) {
+  const size_t n_scans = scan_info.num_components;
+  const size_t marker_len = 6 + 2 * n_scans;
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDA;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = n_scans;
+  for (size_t i = 0; i < n_scans; ++i) {
+    const JPEGComponentScanInfo& si = scan_info.components[i];
+    if (si.comp_idx >= jpg.components.size()) return false;
+    data[pos++] = jpg.components[si.comp_idx].id;
+    data[pos++] = (si.dc_tbl_idx << 4u) + si.ac_tbl_idx;
+  }
+  data[pos++] = scan_info.Ss;
+  data[pos++] = scan_info.Se;
+  data[pos++] = ((scan_info.Ah << 4u) | (scan_info.Al));
+  return true;
+}
+
+bool EncodeDHT(const JPEGData& jpg, SerializationState* state) {
+  const std::vector<JPEGHuffmanCode>& huffman_code = jpg.huffman_code;
+
+  size_t marker_len = 2;
+  for (size_t i = state->dht_index; i < huffman_code.size(); ++i) {
+    const JPEGHuffmanCode& huff = huffman_code[i];
+    marker_len += kJpegHuffmanMaxBitLength;
+    for (size_t j = 0; j < huff.counts.size(); ++j) {
+      marker_len += huff.counts[j];
+    }
+    if (huff.is_last) break;
+  }
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xC4;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  while (true) {
+    const size_t huffman_code_index = state->dht_index++;
+    if (huffman_code_index >= huffman_code.size()) {
+      return false;
+    }
+    const JPEGHuffmanCode& huff = huffman_code[huffman_code_index];
+    size_t index = huff.slot_id;
+    HuffmanCodeTable* huff_table;
+    if (index & 0x10) {
+      index -= 0x10;
+      huff_table = &state->ac_huff_table[index];
+    } else {
+      huff_table = &state->dc_huff_table[index];
+    }
+    // TODO(eustas): cache
+    huff_table->InitDepths(127);
+    if (!BuildHuffmanCodeTable(huff, huff_table)) {
+      return false;
+    }
+    huff_table->initialized = true;
+    size_t total_count = 0;
+    size_t max_length = 0;
+    for (size_t i = 0; i < huff.counts.size(); ++i) {
+      if (huff.counts[i] != 0) {
+        max_length = i;
+      }
+      total_count += huff.counts[i];
+    }
+    --total_count;
+    data[pos++] = huff.slot_id;
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]);
+    }
+    for (size_t i = 0; i < total_count; ++i) {
+      data[pos++] = huff.values[i];
+    }
+    if (huff.is_last) break;
+  }
+  return true;
+}
+
+bool EncodeDQT(const JPEGData& jpg, SerializationState* state) {
+  int marker_len = 2;
+  for (size_t i = state->dqt_index; i < jpg.quant.size(); ++i) {
+    const JPEGQuantTable& table = jpg.quant[i];
+    marker_len += 1 + (table.precision ? 2 : 1) * kDCTBlockSize;
+    if (table.is_last) break;
+  }
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDB;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  while (true) {
+    const size_t idx = state->dqt_index++;
+    if (idx >= jpg.quant.size()) {
+      return false;  // corrupt input
+    }
+    const JPEGQuantTable& table = jpg.quant[idx];
+    data[pos++] = (table.precision << 4u) + table.index;
+    for (size_t i = 0; i < kDCTBlockSize; ++i) {
+      int val_idx = kJPEGNaturalOrder[i];
+      int val = table.values[val_idx];
+      if (table.precision) {
+        data[pos++] = val >> 8u;
+      }
+      data[pos++] = val & 0xFFu;
+    }
+    if (table.is_last) break;
+  }
+  return true;
+}
+
+bool EncodeDRI(const JPEGData& jpg, SerializationState* state) {
+  state->seen_dri_marker = true;
+  OutputChunk dri_marker = {0xFF,
+                            0xDD,
+                            0,
+                            4,
+                            static_cast<uint8_t>(jpg.restart_interval >> 8),
+                            static_cast<uint8_t>(jpg.restart_interval & 0xFF)};
+  state->output_queue.push_back(std::move(dri_marker));
+  return true;
+}
+
+bool EncodeRestart(uint8_t marker, SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, marker}));
+  return true;
+}
+
+bool EncodeAPP(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+  // TODO(eustas): check that marker corresponds to payload?
+  (void)marker;
+
+  size_t app_index = state->app_index++;
+  if (app_index >= jpg.app_data.size()) return false;
+  state->output_queue.push_back(OutputChunk({0xFF}));
+  state->output_queue.emplace_back(jpg.app_data[app_index]);
+  return true;
+}
+
+bool EncodeCOM(const JPEGData& jpg, SerializationState* state) {
+  size_t com_index = state->com_index++;
+  if (com_index >= jpg.com_data.size()) return false;
+  state->output_queue.push_back(OutputChunk({0xFF}));
+  state->output_queue.emplace_back(jpg.com_data[com_index]);
+  return true;
+}
+
+bool EncodeInterMarkerData(const JPEGData& jpg, SerializationState* state) {
+  size_t index = state->data_index++;
+  if (index >= jpg.inter_marker_data.size()) return false;
+  state->output_queue.emplace_back(jpg.inter_marker_data[index]);
+  return true;
+}
+
+bool EncodeDCTBlockSequential(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+                              HuffmanCodeTable* ac_huff, int num_zero_runs,
+                              coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+  coeff_t temp2;
+  coeff_t temp;
+  coeff_t litmus = 0;
+  temp2 = coeffs[0];
+  temp = temp2 - *last_dc_coeff;
+  *last_dc_coeff = temp2;
+  temp2 = temp >> (8 * sizeof(coeff_t) - 1);
+  temp += temp2;
+  temp2 ^= temp;
+
+  int dc_nbits = (temp2 == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp2) + 1);
+  WriteSymbol(dc_nbits, dc_huff, bw);
+#if false
+  // If the input is corrupt, this could be triggered. Checking is
+  // costly though, so it makes more sense to avoid this branch.
+  // (producing a corrupt JPEG when the input is corrupt, instead
+  // of catching it and returning error)
+  if (dc_nbits >= 12) return false;
+#endif
+  if (dc_nbits) {
+    WriteBits(bw, dc_nbits, temp & ((1u << dc_nbits) - 1));
+  }
+  int16_t r = 0;
+
+  for (size_t i = 1; i < 64; i++) {
+    if ((temp = coeffs[kJPEGNaturalOrder[i]]) == 0) {
+      r++;
+    } else {
+      temp2 = temp >> (8 * sizeof(coeff_t) - 1);
+      temp += temp2;
+      temp2 ^= temp;
+      if (JXL_UNLIKELY(r > 15)) {
+        WriteSymbol(0xf0, ac_huff, bw);
+        r -= 16;
+        if (r > 15) {
+          WriteSymbol(0xf0, ac_huff, bw);
+          r -= 16;
+        }
+        if (r > 15) {
+          WriteSymbol(0xf0, ac_huff, bw);
+          r -= 16;
+        }
+      }
+      litmus |= temp2;
+      int ac_nbits =
+          FloorLog2Nonzero<uint32_t>(static_cast<uint16_t>(temp2)) + 1;
+      int symbol = (r << 4u) + ac_nbits;
+      WriteSymbolBits(symbol, ac_huff, bw, ac_nbits,
+                      temp & ((1 << ac_nbits) - 1));
+      r = 0;
+    }
+  }
+
+  for (int i = 0; i < num_zero_runs; ++i) {
+    WriteSymbol(0xf0, ac_huff, bw);
+    r -= 16;
+  }
+  if (r > 0) {
+    WriteSymbol(0, ac_huff, bw);
+  }
+  return (litmus >= 0);
+}
+
+bool EncodeDCTBlockProgressive(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+                               HuffmanCodeTable* ac_huff, int Ss, int Se,
+                               int Al, int num_zero_runs,
+                               DCTCodingState* coding_state,
+                               coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+  bool eob_run_allowed = Ss > 0;
+  coeff_t temp2;
+  coeff_t temp;
+  if (Ss == 0) {
+    temp2 = coeffs[0] >> Al;
+    temp = temp2 - *last_dc_coeff;
+    *last_dc_coeff = temp2;
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;
+      if (temp < 0) return false;
+      temp2--;
+    }
+    int nbits = (temp == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp) + 1);
+    WriteSymbol(nbits, dc_huff, bw);
+    if (nbits) {
+      WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+    }
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int r = 0;
+  for (int k = Ss; k <= Se; ++k) {
+    if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+      r++;
+      continue;
+    }
+    if (temp < 0) {
+      temp = -temp;
+      if (temp < 0) return false;
+      temp >>= Al;
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;
+      temp2 = temp;
+    }
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+    Flush(coding_state, bw);
+    while (r > 15) {
+      WriteSymbol(0xf0, ac_huff, bw);
+      r -= 16;
+    }
+    int nbits = FloorLog2Nonzero<uint32_t>(temp) + 1;
+    int symbol = (r << 4u) + nbits;
+    WriteSymbol(symbol, ac_huff, bw);
+    WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+    r = 0;
+  }
+  if (num_zero_runs > 0) {
+    Flush(coding_state, bw);
+    for (int i = 0; i < num_zero_runs; ++i) {
+      WriteSymbol(0xf0, ac_huff, bw);
+      r -= 16;
+    }
+  }
+  if (r > 0) {
+    BufferEndOfBand(coding_state, ac_huff, nullptr, bw);
+    if (!eob_run_allowed) {
+      Flush(coding_state, bw);
+    }
+  }
+  return true;
+}
+
+bool EncodeRefinementBits(const coeff_t* coeffs, HuffmanCodeTable* ac_huff,
+                          int Ss, int Se, int Al, DCTCodingState* coding_state,
+                          JpegBitWriter* bw) {
+  bool eob_run_allowed = Ss > 0;
+  if (Ss == 0) {
+    // Emit next bit of DC component.
+    WriteBits(bw, 1, (coeffs[0] >> Al) & 1);
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int abs_values[kDCTBlockSize];
+  int eob = 0;
+  for (int k = Ss; k <= Se; k++) {
+    const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+    abs_values[k] = abs_val >> Al;
+    if (abs_values[k] == 1) {
+      eob = k;
+    }
+  }
+  int r = 0;
+  std::vector<int> refinement_bits;
+  refinement_bits.reserve(kDCTBlockSize);
+  for (int k = Ss; k <= Se; k++) {
+    if (abs_values[k] == 0) {
+      r++;
+      continue;
+    }
+    while (r > 15 && k <= eob) {
+      Flush(coding_state, bw);
+      WriteSymbol(0xf0, ac_huff, bw);
+      r -= 16;
+      for (int bit : refinement_bits) {
+        WriteBits(bw, 1, bit);
+      }
+      refinement_bits.clear();
+    }
+    if (abs_values[k] > 1) {
+      refinement_bits.push_back(abs_values[k] & 1u);
+      continue;
+    }
+    Flush(coding_state, bw);
+    int symbol = (r << 4u) + 1;
+    int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1;
+    WriteSymbol(symbol, ac_huff, bw);
+    WriteBits(bw, 1, new_non_zero_bit);
+    for (int bit : refinement_bits) {
+      WriteBits(bw, 1, bit);
+    }
+    refinement_bits.clear();
+    r = 0;
+  }
+  if (r > 0 || !refinement_bits.empty()) {
+    BufferEndOfBand(coding_state, ac_huff, &refinement_bits, bw);
+    if (!eob_run_allowed) {
+      Flush(coding_state, bw);
+    }
+  }
+  return true;
+}
+
+size_t NumHistograms(const JPEGData& jpg) {
+  size_t num = 0;
+  for (const auto& si : jpg.scan_info) {
+    num += si.num_components;
+  }
+  return num;
+}
+
+size_t HistogramIndex(const JPEGData& jpg, size_t scan_index,
+                      size_t component_index) {
+  size_t idx = 0;
+  for (size_t i = 0; i < scan_index; ++i) {
+    idx += jpg.scan_info[i].num_components;
+  }
+  return idx + component_index;
+}
+
+template <int kMode>
+SerializationStatus JXL_NOINLINE DoEncodeScan(const JPEGData& jpg,
+                                              SerializationState* state) {
+  const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+  EncodeScanState& ss = state->scan_state;
+
+  const int restart_interval =
+      state->seen_dri_marker ? jpg.restart_interval : 0;
+
+  const auto get_next_extra_zero_run_index = [&ss, &scan_info]() -> int {
+    if (ss.extra_zero_runs_pos < scan_info.extra_zero_runs.size()) {
+      return scan_info.extra_zero_runs[ss.extra_zero_runs_pos].block_idx;
+    } else {
+      return -1;
+    }
+  };
+
+  const auto get_next_reset_point = [&ss, &scan_info]() -> int {
+    if (ss.next_reset_point_pos < scan_info.reset_points.size()) {
+      return scan_info.reset_points[ss.next_reset_point_pos++];
+    } else {
+      return -1;
+    }
+  };
+
+  if (ss.stage == EncodeScanState::HEAD) {
+    if (!EncodeSOS(jpg, scan_info, state)) return SerializationStatus::ERROR;
+    JpegBitWriterInit(&ss.bw, &state->output_queue);
+    DCTCodingStateInit(&ss.coding_state);
+    ss.restarts_to_go = restart_interval;
+    ss.next_restart_marker = 0;
+    ss.block_scan_index = 0;
+    ss.extra_zero_runs_pos = 0;
+    ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+    ss.next_reset_point_pos = 0;
+    ss.next_reset_point = get_next_reset_point();
+    ss.mcu_y = 0;
+    memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+    ss.stage = EncodeScanState::BODY;
+  }
+  JpegBitWriter* bw = &ss.bw;
+  DCTCodingState* coding_state = &ss.coding_state;
+
+  JXL_DASSERT(ss.stage == EncodeScanState::BODY);
+
+  // "Non-interleaved" means color data comes in separate scans, in other words
+  // each scan can contain only one color component.
+  const bool is_interleaved = (scan_info.num_components > 1);
+  int MCUs_per_row = 0;
+  int MCU_rows = 0;
+  jpg.CalculateMcuSize(scan_info, &MCUs_per_row, &MCU_rows);
+  const bool is_progressive = state->is_progressive;
+  const int Al = is_progressive ? scan_info.Al : 0;
+  const int Ss = is_progressive ? scan_info.Ss : 0;
+  const int Se = is_progressive ? scan_info.Se : 63;
+
+  // DC-only is defined by [0..0] spectral range.
+  const bool want_ac = ((Ss != 0) || (Se != 0));
+  const bool want_dc = (Ss == 0);
+  // TODO: support streaming decoding again.
+  const bool complete_ac = true;
+  const bool has_ac = true;
+  if (want_ac && !has_ac) return SerializationStatus::NEEDS_MORE_INPUT;
+
+  // |has_ac| implies |complete_dc| but not vice versa; for the sake of
+  // simplicity we pretend they are equal, because they are separated by just a
+  // few bytes of input.
+  const bool complete_dc = has_ac;
+  const bool complete = want_ac ? complete_ac : complete_dc;
+  // When "incomplete" |ac_dc| tracks information about current ("incomplete")
+  // band parsing progress.
+
+  // FIXME: Is this always complete?
+  // const int last_mcu_y =
+  //     complete ? MCU_rows : parsing_state.internal->ac_dc.next_mcu_y *
+  //     v_group;
+  (void)complete;
+  const int last_mcu_y = complete ? MCU_rows : 0;
+
+  for (; ss.mcu_y < last_mcu_y; ++ss.mcu_y) {
+    for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+      // Possibly emit a restart marker.
+      if (restart_interval > 0 && ss.restarts_to_go == 0) {
+        Flush(coding_state, bw);
+        if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+          return SerializationStatus::ERROR;
+        }
+        EmitMarker(bw, 0xD0 + ss.next_restart_marker);
+        ss.next_restart_marker += 1;
+        ss.next_restart_marker &= 0x7;
+        ss.restarts_to_go = restart_interval;
+        memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+      }
+
+      // Encode one MCU
+      for (size_t i = 0; i < scan_info.num_components; ++i) {
+        const JPEGComponentScanInfo& si = scan_info.components[i];
+        const JPEGComponent& c = jpg.components[si.comp_idx];
+        size_t dc_tbl_idx = si.dc_tbl_idx;
+        size_t ac_tbl_idx = si.ac_tbl_idx;
+        HuffmanCodeTable* dc_huff = &state->dc_huff_table[dc_tbl_idx];
+        HuffmanCodeTable* ac_huff = &state->ac_huff_table[ac_tbl_idx];
+        if (want_dc && !dc_huff->initialized) {
+          return SerializationStatus::ERROR;
+        }
+        if (want_ac && !ac_huff->initialized) {
+          return SerializationStatus::ERROR;
+        }
+        int n_blocks_y = is_interleaved ? c.v_samp_factor : 1;
+        int n_blocks_x = is_interleaved ? c.h_samp_factor : 1;
+        // compressed size per block cannot be more than 512 bytes per component
+        Reserve(bw, 512 * n_blocks_y * n_blocks_x);
+        for (int iy = 0; iy < n_blocks_y; ++iy) {
+          for (int ix = 0; ix < n_blocks_x; ++ix) {
+            int block_y = ss.mcu_y * n_blocks_y + iy;
+            int block_x = mcu_x * n_blocks_x + ix;
+            int block_idx = block_y * c.width_in_blocks + block_x;
+            if (ss.block_scan_index == ss.next_reset_point) {
+              Flush(coding_state, bw);
+              ss.next_reset_point = get_next_reset_point();
+            }
+            int num_zero_runs = 0;
+            if (ss.block_scan_index == ss.next_extra_zero_run_index) {
+              num_zero_runs = scan_info.extra_zero_runs[ss.extra_zero_runs_pos]
+                                  .num_extra_zero_runs;
+              ++ss.extra_zero_runs_pos;
+              ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+            }
+            const coeff_t* coeffs = &c.coeffs[block_idx << 6];
+            bool ok;
+            if (kMode == 0) {
+              ok = EncodeDCTBlockSequential(coeffs, dc_huff, ac_huff,
+                                            num_zero_runs,
+                                            ss.last_dc_coeff + si.comp_idx, bw);
+            } else if (kMode == 1) {
+              ok = EncodeDCTBlockProgressive(
+                  coeffs, dc_huff, ac_huff, Ss, Se, Al, num_zero_runs,
+                  coding_state, ss.last_dc_coeff + si.comp_idx, bw);
+            } else {
+              ok = EncodeRefinementBits(coeffs, ac_huff, Ss, Se, Al,
+                                        coding_state, bw);
+            }
+            if (!ok) return SerializationStatus::ERROR;
+            ++ss.block_scan_index;
+          }
+        }
+      }
+      --ss.restarts_to_go;
+    }
+  }
+  if (ss.mcu_y < MCU_rows) {
+    if (!bw->healthy) return SerializationStatus::ERROR;
+    return SerializationStatus::NEEDS_MORE_INPUT;
+  }
+  Flush(coding_state, bw);
+  if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+    return SerializationStatus::ERROR;
+  }
+  JpegBitWriterFinish(bw);
+  ss.stage = EncodeScanState::HEAD;
+  state->scan_index++;
+  if (!bw->healthy) return SerializationStatus::ERROR;
+
+  return SerializationStatus::DONE;
+}
+
+static SerializationStatus JXL_INLINE EncodeScan(const JPEGData& jpg,
+                                                 SerializationState* state) {
+  const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+  const bool is_progressive = state->is_progressive;
+  const int Al = is_progressive ? scan_info.Al : 0;
+  const int Ah = is_progressive ? scan_info.Ah : 0;
+  const int Ss = is_progressive ? scan_info.Ss : 0;
+  const int Se = is_progressive ? scan_info.Se : 63;
+  const bool need_sequential =
+      !is_progressive || (Ah == 0 && Al == 0 && Ss == 0 && Se == 63);
+  if (need_sequential) {
+    return DoEncodeScan<0>(jpg, state);
+  } else if (Ah == 0) {
+    return DoEncodeScan<1>(jpg, state);
+  } else {
+    return DoEncodeScan<2>(jpg, state);
+  }
+}
+
+SerializationStatus SerializeSection(uint8_t marker, SerializationState* state,
+                                     const JPEGData& jpg) {
+  const auto to_status = [](bool result) {
+    return result ? SerializationStatus::DONE : SerializationStatus::ERROR;
+  };
+  // TODO(eustas): add and use marker enum
+  switch (marker) {
+    case 0xC0:
+    case 0xC1:
+    case 0xC2:
+    case 0xC9:
+    case 0xCA:
+      return to_status(EncodeSOF(jpg, marker, state));
+
+    case 0xC4:
+      return to_status(EncodeDHT(jpg, state));
+
+    case 0xD0:
+    case 0xD1:
+    case 0xD2:
+    case 0xD3:
+    case 0xD4:
+    case 0xD5:
+    case 0xD6:
+    case 0xD7:
+      return to_status(EncodeRestart(marker, state));
+
+    case 0xD9:
+      return to_status(EncodeEOI(jpg, state));
+
+    case 0xDA:
+      return EncodeScan(jpg, state);
+
+    case 0xDB:
+      return to_status(EncodeDQT(jpg, state));
+
+    case 0xDD:
+      return to_status(EncodeDRI(jpg, state));
+
+    case 0xE0:
+    case 0xE1:
+    case 0xE2:
+    case 0xE3:
+    case 0xE4:
+    case 0xE5:
+    case 0xE6:
+    case 0xE7:
+    case 0xE8:
+    case 0xE9:
+    case 0xEA:
+    case 0xEB:
+    case 0xEC:
+    case 0xED:
+    case 0xEE:
+    case 0xEF:
+      return to_status(EncodeAPP(jpg, marker, state));
+
+    case 0xFE:
+      return to_status(EncodeCOM(jpg, state));
+
+    case 0xFF:
+      return to_status(EncodeInterMarkerData(jpg, state));
+
+    default:
+      return SerializationStatus::ERROR;
+  }
+}
+
+// TODO(veluca): add streaming support again.
+Status WriteJpegInternal(const JPEGData& jpg, const JPEGOutput& out,
+                         SerializationState* ss) {
+  const auto maybe_push_output = [&]() -> Status {
+    if (ss->stage != SerializationState::STAGE_ERROR) {
+      while (!ss->output_queue.empty()) {
+        auto& chunk = ss->output_queue.front();
+        size_t num_written = out(chunk.next, chunk.len);
+        if (num_written == 0 && chunk.len > 0) {
+          return StatusMessage(Status(StatusCode::kNotEnoughBytes),
+                               "Failed to write output");
+        }
+        chunk.len -= num_written;
+        if (chunk.len == 0) {
+          ss->output_queue.pop_front();
+        }
+      }
+    }
+    return true;
+  };
+
+  while (true) {
+    switch (ss->stage) {
+      case SerializationState::STAGE_INIT: {
+        // Valid Brunsli requires, at least, 0xD9 marker.
+        // This might happen on corrupted stream, or on unconditioned JPEGData.
+        // TODO(eustas): check D9 in the only one and is the last one.
+        if (jpg.marker_order.empty()) {
+          ss->stage = SerializationState::STAGE_ERROR;
+          break;
+        }
+        ss->dc_huff_table.resize(kMaxHuffmanTables);
+        ss->ac_huff_table.resize(kMaxHuffmanTables);
+        if (jpg.has_zero_padding_bit) {
+          ss->pad_bits = jpg.padding_bits.data();
+          ss->pad_bits_end = ss->pad_bits + jpg.padding_bits.size();
+        }
+
+        EncodeSOI(ss);
+        JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+        ss->stage = SerializationState::STAGE_SERIALIZE_SECTION;
+        break;
+      }
+
+      case SerializationState::STAGE_SERIALIZE_SECTION: {
+        if (ss->section_index >= jpg.marker_order.size()) {
+          ss->stage = SerializationState::STAGE_DONE;
+          break;
+        }
+        uint8_t marker = jpg.marker_order[ss->section_index];
+        SerializationStatus status = SerializeSection(marker, ss, jpg);
+        if (status == SerializationStatus::ERROR) {
+          JXL_WARNING("Failed to encode marker 0x%.2x", marker);
+          ss->stage = SerializationState::STAGE_ERROR;
+          break;
+        }
+        JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+        if (status == SerializationStatus::NEEDS_MORE_INPUT) {
+          return JXL_FAILURE("Incomplete serialization data");
+        } else if (status != SerializationStatus::DONE) {
+          JXL_DASSERT(false);
+          ss->stage = SerializationState::STAGE_ERROR;
+          break;
+        }
+        ++ss->section_index;
+        break;
+      }
+
+      case SerializationState::STAGE_DONE:
+        JXL_ASSERT(ss->output_queue.empty());
+        if (ss->pad_bits != nullptr && ss->pad_bits != ss->pad_bits_end) {
+          return JXL_FAILURE("Invalid number of padding bits.");
+        }
+        return true;
+
+      case SerializationState::STAGE_ERROR:
+        return JXL_FAILURE("JPEG serialization error");
+    }
+  }
+}
+
+}  // namespace
+
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out) {
+  auto ss = jxl::make_unique<SerializationState>();
+  return WriteJpegInternal(jpg, out, ss.get());
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h
new file mode 100644
index 0000000000..c6f70ff8b1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for writing a JPEGData object into a jpeg byte stream.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <functional>
+
+#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Function type used to write len bytes into buf. Returns the number of bytes
+// written.
+using JPEGOutput = std::function<size_t(const uint8_t* buf, size_t len)>;
+
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h
new file mode 100644
index 0000000000..e003c04952
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+#define LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+namespace jxl {
+namespace jpeg {
+
+/**
+ * A chunk of output data.
+ *
+ * Data producer creates OutputChunks and adds them to the end output queue.
+ * Once control flow leaves the producer code, it is considered that chunk of
+ * data is final and can not be changed; to underline this fact |next| is a
+ * const-pointer.
+ *
+ * Data consumer removes OutputChunks from the beginning of the output queue.
+ * It is possible to consume OutputChunks partially, by updating |next| and
+ * |len|.
+ *
+ * There are 2 types of output chunks:
+ *  - owning: actual data is stored in |buffer| field; producer fills data after
+ *    the instance it created; it is legal to reduce |len| to show that not all
+ *    the capacity of |buffer| is used
+ *  - non-owning: represents the data stored (owned) somewhere else
+ */
+struct OutputChunk {
+  // Non-owning
+  template <typename Bytes>
+  explicit OutputChunk(Bytes& bytes) : len(bytes.size()) {
+    // Deal both with const qualifier and data type.
+    const void* src = bytes.data();
+    next = reinterpret_cast<const uint8_t*>(src);
+  }
+
+  // Non-owning
+  OutputChunk(const uint8_t* data, size_t size) : next(data), len(size) {}
+
+  // Owning
+  explicit OutputChunk(size_t size = 0) {
+    buffer.reset(new std::vector<uint8_t>(size));
+    next = buffer->data();
+    len = size;
+  }
+
+  // Owning
+  OutputChunk(std::initializer_list<uint8_t> bytes) {
+    buffer.reset(new std::vector<uint8_t>(bytes));
+    next = buffer->data();
+    len = bytes.size();
+  }
+
+  const uint8_t* next;
+  size_t len;
+  // TODO(veluca): consider removing the unique_ptr.
+  std::unique_ptr<std::vector<uint8_t>> buffer;
+};
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h
new file mode 100644
index 0000000000..4fca3ed643
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+#define LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+
+#include <deque>
+#include <vector>
+
+#include "lib/jxl/jpeg/dec_jpeg_output_chunk.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+struct HuffmanCodeTable {
+  int8_t depth[256];
+  uint16_t code[256];
+  bool initialized = false;
+  void InitDepths(int value = 0) {
+    std::fill(std::begin(depth), std::end(depth), value);
+  }
+};
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+  bool healthy;
+  std::deque<OutputChunk>* output;
+  OutputChunk chunk;
+  uint8_t* data;
+  size_t pos;
+  uint64_t put_buffer;
+  int put_bits;
+};
+
+// Holds data that is buffered between 8x8 blocks in progressive mode.
+struct DCTCodingState {
+  // The run length of end-of-band symbols in a progressive scan.
+  int eob_run_;
+  // The huffman table to be used when flushing the state.
+  HuffmanCodeTable* cur_ac_huff_;
+  // The sequence of currently buffered refinement bits for a successive
+  // approximation scan (one where Ah > 0).
+  std::vector<int> refinement_bits_;
+};
+
+struct EncodeScanState {
+  enum Stage { HEAD, BODY };
+
+  Stage stage = HEAD;
+
+  int mcu_y;
+  JpegBitWriter bw;
+  coeff_t last_dc_coeff[kMaxComponents] = {0};
+  int restarts_to_go;
+  int next_restart_marker;
+  int block_scan_index;
+  DCTCodingState coding_state;
+  size_t extra_zero_runs_pos;
+  int next_extra_zero_run_index;
+  size_t next_reset_point_pos;
+  int next_reset_point;
+};
+
+struct SerializationState {
+  enum Stage {
+    STAGE_INIT,
+    STAGE_SERIALIZE_SECTION,
+    STAGE_DONE,
+    STAGE_ERROR,
+  };
+
+  Stage stage = STAGE_INIT;
+
+  std::deque<OutputChunk> output_queue;
+
+  size_t section_index = 0;
+  int dht_index = 0;
+  int dqt_index = 0;
+  int app_index = 0;
+  int com_index = 0;
+  int data_index = 0;
+  int scan_index = 0;
+  std::vector<HuffmanCodeTable> dc_huff_table;
+  std::vector<HuffmanCodeTable> ac_huff_table;
+  const uint8_t* pad_bits = nullptr;
+  const uint8_t* pad_bits_end = nullptr;
+  bool seen_dri_marker = false;
+  bool is_progressive = false;
+
+  EncodeScanState scan_state;
+};
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc
new file mode 100644
index 0000000000..460fc2f812
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc
@@ -0,0 +1,384 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+
+#include <brotli/encode.h>
+#include <stdio.h>
+
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+constexpr int BITS_IN_JSAMPLE = 8;
+using ByteSpan = Span<const uint8_t>;
+
+// TODO(eustas): move to jpeg_data, to use from codec_jpg as well.
+// See if there is a canonically chunked ICC profile and mark corresponding
+// app-tags with AppMarkerType::kICC.
+Status DetectIccProfile(JPEGData& jpeg_data) {
+  JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+  size_t num_icc = 0;
+  size_t num_icc_jpeg = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    const auto& app = jpeg_data.app_data[i];
+    size_t pos = 0;
+    if (app[pos++] != 0xE2) continue;
+    // At least APPn + size; otherwise it should be intermarker-data.
+    JXL_DASSERT(app.size() >= 3);
+    size_t tag_length = (app[pos] << 8) + app[pos + 1];
+    pos += 2;
+    JXL_DASSERT(app.size() == tag_length + 1);
+    // Empty payload is 2 bytes for tag length itself + signature
+    if (tag_length < 2 + sizeof kIccProfileTag) continue;
+
+    if (memcmp(&app[pos], kIccProfileTag, sizeof kIccProfileTag) != 0) continue;
+    pos += sizeof kIccProfileTag;
+    uint8_t chunk_id = app[pos++];
+    uint8_t num_chunks = app[pos++];
+    if (chunk_id != num_icc + 1) continue;
+    if (num_icc_jpeg == 0) num_icc_jpeg = num_chunks;
+    if (num_icc_jpeg != num_chunks) continue;
+    num_icc++;
+    jpeg_data.app_marker_type[i] = AppMarkerType::kICC;
+  }
+  if (num_icc != num_icc_jpeg) {
+    return JXL_FAILURE("Invalid ICC chunks");
+  }
+  return true;
+}
+
+bool GetMarkerPayload(const uint8_t* data, size_t size, ByteSpan* payload) {
+  if (size < 3) {
+    return false;
+  }
+  size_t hi = data[1];
+  size_t lo = data[2];
+  size_t internal_size = (hi << 8u) | lo;
+  // Second byte of marker is not counted towards size.
+  if (internal_size != size - 1) {
+    return false;
+  }
+  // cut second marker byte and "length" from payload.
+  *payload = ByteSpan(data, size);
+  payload->remove_prefix(3);
+  return true;
+}
+
+Status DetectBlobs(jpeg::JPEGData& jpeg_data) {
+  JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+  bool have_exif = false, have_xmp = false;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    auto& marker = jpeg_data.app_data[i];
+    if (marker.empty() || marker[0] != kApp1) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if (!have_exif && payload.size() >= sizeof kExifTag &&
+        !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+      jpeg_data.app_marker_type[i] = AppMarkerType::kExif;
+      have_exif = true;
+    }
+    if (!have_xmp && payload.size() >= sizeof kXMPTag &&
+        !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+      jpeg_data.app_marker_type[i] = AppMarkerType::kXMP;
+      have_xmp = true;
+    }
+  }
+  return true;
+}
+
+Status ParseChunkedMarker(const jpeg::JPEGData& src, uint8_t marker_type,
+                          const ByteSpan& tag, PaddedBytes* output,
+                          bool allow_permutations = false) {
+  output->clear();
+
+  std::vector<ByteSpan> chunks;
+  std::vector<bool> presence;
+  size_t expected_number_of_parts = 0;
+  bool is_first_chunk = true;
+  size_t ordinal = 0;
+  for (const auto& marker : src.app_data) {
+    if (marker.empty() || marker[0] != marker_type) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if ((payload.size() < tag.size()) ||
+        memcmp(payload.data(), tag.data(), tag.size()) != 0) {
+      continue;
+    }
+    payload.remove_prefix(tag.size());
+    if (payload.size() < 2) {
+      return JXL_FAILURE("Chunk is too small.");
+    }
+    uint8_t index = payload[0];
+    uint8_t total = payload[1];
+    ordinal++;
+    if (!allow_permutations) {
+      if (index != ordinal) return JXL_FAILURE("Invalid chunk order.");
+    }
+
+    payload.remove_prefix(2);
+
+    JXL_RETURN_IF_ERROR(total != 0);
+    if (is_first_chunk) {
+      is_first_chunk = false;
+      expected_number_of_parts = total;
+      // 1-based indices; 0-th element is added for convenience.
+      chunks.resize(total + 1);
+      presence.resize(total + 1);
+    } else {
+      JXL_RETURN_IF_ERROR(expected_number_of_parts == total);
+    }
+
+    if (index == 0 || index > total) {
+      return JXL_FAILURE("Invalid chunk index.");
+    }
+
+    if (presence[index]) {
+      return JXL_FAILURE("Duplicate chunk.");
+    }
+    presence[index] = true;
+    chunks[index] = payload;
+  }
+
+  for (size_t i = 0; i < expected_number_of_parts; ++i) {
+    // 0-th element is not used.
+    size_t index = i + 1;
+    if (!presence[index]) {
+      return JXL_FAILURE("Missing chunk.");
+    }
+    output->append(chunks[index]);
+  }
+
+  return true;
+}
+
+Status SetBlobsFromJpegData(const jpeg::JPEGData& jpeg_data, Blobs* blobs) {
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    auto& marker = jpeg_data.app_data[i];
+    if (marker.empty() || marker[0] != kApp1) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if (payload.size() >= sizeof kExifTag &&
+        !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+      if (blobs->exif.empty()) {
+        blobs->exif.resize(payload.size() - sizeof kExifTag);
+        memcpy(blobs->exif.data(), payload.data() + sizeof kExifTag,
+               payload.size() - sizeof kExifTag);
+      } else {
+        JXL_WARNING(
+            "ReJPEG: multiple Exif blobs, storing only first one in the JPEG "
+            "XL container\n");
+      }
+    }
+    if (payload.size() >= sizeof kXMPTag &&
+        !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+      if (blobs->xmp.empty()) {
+        blobs->xmp.resize(payload.size() - sizeof kXMPTag);
+        memcpy(blobs->xmp.data(), payload.data() + sizeof kXMPTag,
+               payload.size() - sizeof kXMPTag);
+      } else {
+        JXL_WARNING(
+            "ReJPEG: multiple XMP blobs, storing only first one in the JPEG "
+            "XL container\n");
+      }
+    }
+  }
+  return true;
+}
+
+static inline bool IsJPG(const Span<const uint8_t> bytes) {
+  return bytes.size() >= 2 && bytes[0] == 0xFF && bytes[1] == 0xD8;
+}
+
+}  // namespace
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+                                    ColorEncoding* color_encoding) {
+  PaddedBytes icc_profile;
+  if (!ParseChunkedMarker(jpg, kApp2, ByteSpan(kIccProfileTag), &icc_profile)) {
+    JXL_WARNING("ReJPEG: corrupted ICC profile\n");
+    icc_profile.clear();
+  }
+
+  if (icc_profile.empty()) {
+    bool is_gray = (jpg.components.size() == 1);
+    *color_encoding = ColorEncoding::SRGB(is_gray);
+    return true;
+  }
+
+  return color_encoding->SetICC(std::move(icc_profile), /*cms=*/nullptr);
+}
+
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes,
+                      const CompressParams& cparams) {
+  jpeg_data.app_marker_type.resize(jpeg_data.app_data.size(),
+                                   AppMarkerType::kUnknown);
+  JXL_RETURN_IF_ERROR(DetectIccProfile(jpeg_data));
+  JXL_RETURN_IF_ERROR(DetectBlobs(jpeg_data));
+  BitWriter writer;
+  JXL_RETURN_IF_ERROR(Bundle::Write(jpeg_data, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  *bytes = std::move(writer).TakeBytes();
+  BrotliEncoderState* brotli_enc =
+      BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+  int effort = cparams.brotli_effort;
+  if (effort < 0) effort = 11 - static_cast<int>(cparams.speed_tier);
+  BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_QUALITY, effort);
+  size_t total_data = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+      continue;
+    }
+    total_data += jpeg_data.app_data[i].size();
+  }
+  for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+    total_data += jpeg_data.com_data[i].size();
+  }
+  for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+    total_data += jpeg_data.inter_marker_data[i].size();
+  }
+  total_data += jpeg_data.tail_data.size();
+  size_t initial_size = bytes->size();
+  size_t brotli_capacity = BrotliEncoderMaxCompressedSize(total_data);
+  BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_SIZE_HINT, total_data);
+  bytes->resize(bytes->size() + brotli_capacity);
+  size_t enc_size = 0;
+  auto br_append = [&](const std::vector<uint8_t>& data, bool last) {
+    size_t available_in = data.size();
+    const uint8_t* in = data.data();
+    uint8_t* out = &(*bytes)[initial_size + enc_size];
+    do {
+      uint8_t* out_before = out;
+      msan::MemoryIsInitialized(in, available_in);
+      JXL_CHECK(BrotliEncoderCompressStream(
+          brotli_enc, last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+          &available_in, &in, &brotli_capacity, &out, &enc_size));
+      msan::UnpoisonMemory(out_before, out - out_before);
+    } while (BrotliEncoderHasMoreOutput(brotli_enc) || available_in > 0);
+  };
+
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+      continue;
+    }
+    br_append(jpeg_data.app_data[i], /*last=*/false);
+  }
+  for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+    br_append(jpeg_data.com_data[i], /*last=*/false);
+  }
+  for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+    br_append(jpeg_data.inter_marker_data[i], /*last=*/false);
+  }
+  br_append(jpeg_data.tail_data, /*last=*/true);
+  BrotliEncoderDestroyInstance(brotli_enc);
+  bytes->resize(initial_size + enc_size);
+  return true;
+}
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes, CodecInOut* io) {
+  if (!IsJPG(bytes)) return false;
+  io->frames.clear();
+  io->frames.reserve(1);
+  io->frames.emplace_back(&io->metadata.m);
+  io->Main().jpeg_data = make_unique<jpeg::JPEGData>();
+  jpeg::JPEGData* jpeg_data = io->Main().jpeg_data.get();
+  if (!jpeg::ReadJpeg(bytes.data(), bytes.size(), jpeg::JpegReadMode::kReadAll,
+                      jpeg_data)) {
+    return JXL_FAILURE("Error reading JPEG");
+  }
+  JXL_RETURN_IF_ERROR(
+      SetColorEncodingFromJpegData(*jpeg_data, &io->metadata.m.color_encoding));
+  JXL_RETURN_IF_ERROR(SetBlobsFromJpegData(*jpeg_data, &io->blobs));
+  size_t nbcomp = jpeg_data->components.size();
+  if (nbcomp != 1 && nbcomp != 3) {
+    return JXL_FAILURE("Cannot recompress JPEGs with neither 1 nor 3 channels");
+  }
+  YCbCrChromaSubsampling cs;
+  if (nbcomp == 3) {
+    uint8_t hsample[3], vsample[3];
+    for (size_t i = 0; i < nbcomp; i++) {
+      hsample[i] = jpeg_data->components[i].h_samp_factor;
+      vsample[i] = jpeg_data->components[i].v_samp_factor;
+    }
+    JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+  } else if (nbcomp == 1) {
+    uint8_t hsample[3], vsample[3];
+    for (size_t i = 0; i < 3; i++) {
+      hsample[i] = jpeg_data->components[0].h_samp_factor;
+      vsample[i] = jpeg_data->components[0].v_samp_factor;
+    }
+    JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+  }
+  bool is_rgb = false;
+  {
+    const auto& markers = jpeg_data->marker_order;
+    // If there is a JFIF marker, this is YCbCr. Otherwise...
+    if (std::find(markers.begin(), markers.end(), 0xE0) == markers.end()) {
+      // Try to find an 'Adobe' marker.
+      size_t app_markers = 0;
+      size_t i = 0;
+      for (; i < markers.size(); i++) {
+        // This is an APP marker.
+        if ((markers[i] & 0xF0) == 0xE0) {
+          JXL_CHECK(app_markers < jpeg_data->app_data.size());
+          // APP14 marker
+          if (markers[i] == 0xEE) {
+            const auto& data = jpeg_data->app_data[app_markers];
+            if (data.size() == 15 && data[3] == 'A' && data[4] == 'd' &&
+                data[5] == 'o' && data[6] == 'b' && data[7] == 'e') {
+              // 'Adobe' marker.
+              is_rgb = data[14] == 0;
+              break;
+            }
+          }
+          app_markers++;
+        }
+      }
+
+      if (i == markers.size()) {
+        // No 'Adobe' marker, guess from component IDs.
+        is_rgb = nbcomp == 3 && jpeg_data->components[0].id == 'R' &&
+                 jpeg_data->components[1].id == 'G' &&
+                 jpeg_data->components[2].id == 'B';
+      }
+    }
+  }
+
+  io->Main().chroma_subsampling = cs;
+  io->Main().color_transform =
+      (!is_rgb || nbcomp == 1) ? ColorTransform::kYCbCr : ColorTransform::kNone;
+
+  io->metadata.m.SetIntensityTarget(kDefaultIntensityTarget);
+  io->metadata.m.SetUintSamples(BITS_IN_JSAMPLE);
+  io->SetFromImage(Image3F(jpeg_data->width, jpeg_data->height),
+                   io->metadata.m.color_encoding);
+  SetIntensityTarget(&io->metadata.m);
+  return true;
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h
new file mode 100644
index 0000000000..806128c465
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes,
+                      const CompressParams& cparams);
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+                                    ColorEncoding* color_encoding);
+
+/**
+ * Decodes bytes containing JPEG codestream into a CodecInOut as coefficients
+ * only, for lossless JPEG transcoding.
+ */
+Status DecodeImageJPG(Span<const uint8_t> bytes, CodecInOut* io);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_DATA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc
new file mode 100644
index 0000000000..f569b73363
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc
@@ -0,0 +1,1053 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+
+#include <inttypes.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+static const int kBrunsliMaxSampling = 15;
+
+// Macros for commonly used error conditions.
+
+#define JXL_JPEG_VERIFY_LEN(n)                                \
+  if (*pos + (n) > len) {                                     \
+    return JXL_FAILURE("Unexpected end of input: pos=%" PRIuS \
+                       " need=%d len=%" PRIuS,                \
+                       *pos, static_cast<int>(n), len);       \
+  }
+
+#define JXL_JPEG_VERIFY_INPUT(var, low, high, code)                    \
+  if ((var) < (low) || (var) > (high)) {                               \
+    return JXL_FAILURE("Invalid " #var ": %d", static_cast<int>(var)); \
+  }
+
+#define JXL_JPEG_VERIFY_MARKER_END()                             \
+  if (start_pos + marker_len != *pos) {                          \
+    return JXL_FAILURE("Invalid marker length: declared=%" PRIuS \
+                       " actual=%" PRIuS,                        \
+                       marker_len, (*pos - start_pos));          \
+  }
+
+#define JXL_JPEG_EXPECT_MARKER()                                 \
+  if (pos + 2 > len || data[pos] != 0xff) {                      \
+    return JXL_FAILURE(                                          \
+        "Marker byte (0xff) expected, found: 0x%.2x pos=%" PRIuS \
+        " len=%" PRIuS,                                          \
+        (pos < len ? data[pos] : 0), pos, len);                  \
+  }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+  return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+  int v = (data[*pos] << 8) + data[*pos + 1];
+  *pos += 2;
+  return v;
+}
+
+// Reads the Start of Frame (SOF) marker segment and fills in *jpg with the
+// parsed data.
+bool ProcessSOF(const uint8_t* data, const size_t len, JpegReadMode mode,
+                size_t* pos, JPEGData* jpg) {
+  if (jpg->width != 0) {
+    return JXL_FAILURE("Duplicate SOF marker.");
+  }
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(8);
+  size_t marker_len = ReadUint16(data, pos);
+  int precision = ReadUint8(data, pos);
+  int height = ReadUint16(data, pos);
+  int width = ReadUint16(data, pos);
+  int num_components = ReadUint8(data, pos);
+  // 'jbrd' is hardcoded for 8bits:
+  JXL_JPEG_VERIFY_INPUT(precision, 8, 8, PRECISION);
+  JXL_JPEG_VERIFY_INPUT(height, 1, kMaxDimPixels, HEIGHT);
+  JXL_JPEG_VERIFY_INPUT(width, 1, kMaxDimPixels, WIDTH);
+  JXL_JPEG_VERIFY_INPUT(num_components, 1, kMaxComponents, NUMCOMP);
+  JXL_JPEG_VERIFY_LEN(3 * num_components);
+  jpg->height = height;
+  jpg->width = width;
+  jpg->components.resize(num_components);
+
+  // Read sampling factors and quant table index for each component.
+  std::vector<bool> ids_seen(256, false);
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    const int id = ReadUint8(data, pos);
+    if (ids_seen[id]) {  // (cf. section B.2.2, syntax of Ci)
+      return JXL_FAILURE("Duplicate ID %d in SOF.", id);
+    }
+    ids_seen[id] = true;
+    jpg->components[i].id = id;
+    int factor = ReadUint8(data, pos);
+    int h_samp_factor = factor >> 4;
+    int v_samp_factor = factor & 0xf;
+    JXL_JPEG_VERIFY_INPUT(h_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+    JXL_JPEG_VERIFY_INPUT(v_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+    jpg->components[i].h_samp_factor = h_samp_factor;
+    jpg->components[i].v_samp_factor = v_samp_factor;
+    jpg->components[i].quant_idx = ReadUint8(data, pos);
+    max_h_samp_factor = std::max(max_h_samp_factor, h_samp_factor);
+    max_v_samp_factor = std::max(max_v_samp_factor, v_samp_factor);
+  }
+
+  // We have checked above that none of the sampling factors are 0, so the max
+  // sampling factors can not be 0.
+  int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+  int MCU_cols = DivCeil(jpg->width, max_h_samp_factor * 8);
+  // Compute the block dimensions for each component.
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    JPEGComponent* c = &jpg->components[i];
+    if (max_h_samp_factor % c->h_samp_factor != 0 ||
+        max_v_samp_factor % c->v_samp_factor != 0) {
+      return JXL_FAILURE("Non-integral subsampling ratios.");
+    }
+    c->width_in_blocks = MCU_cols * c->h_samp_factor;
+    c->height_in_blocks = MCU_rows * c->v_samp_factor;
+    const uint64_t num_blocks =
+        static_cast<uint64_t>(c->width_in_blocks) * c->height_in_blocks;
+    if (mode == JpegReadMode::kReadAll) {
+      c->coeffs.resize(num_blocks * kDCTBlockSize);
+    }
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Start of Scan (SOS) marker segment and fills in *scan_info with the
+// parsed data.
+bool ProcessSOS(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(3);
+  size_t marker_len = ReadUint16(data, pos);
+  size_t comps_in_scan = ReadUint8(data, pos);
+  JXL_JPEG_VERIFY_INPUT(comps_in_scan, 1, jpg->components.size(),
+                        COMPS_IN_SCAN);
+
+  JPEGScanInfo scan_info;
+  scan_info.num_components = comps_in_scan;
+  JXL_JPEG_VERIFY_LEN(2 * comps_in_scan);
+  std::vector<bool> ids_seen(256, false);
+  for (size_t i = 0; i < comps_in_scan; ++i) {
+    uint32_t id = ReadUint8(data, pos);
+    if (ids_seen[id]) {  // (cf. section B.2.3, regarding CSj)
+      return JXL_FAILURE("Duplicate ID %d in SOS.", id);
+    }
+    ids_seen[id] = true;
+    bool found_index = false;
+    for (size_t j = 0; j < jpg->components.size(); ++j) {
+      if (jpg->components[j].id == id) {
+        scan_info.components[i].comp_idx = j;
+        found_index = true;
+      }
+    }
+    if (!found_index) {
+      return JXL_FAILURE("SOS marker: Could not find component with id %d", id);
+    }
+    int c = ReadUint8(data, pos);
+    int dc_tbl_idx = c >> 4;
+    int ac_tbl_idx = c & 0xf;
+    JXL_JPEG_VERIFY_INPUT(dc_tbl_idx, 0, 3, HUFFMAN_INDEX);
+    JXL_JPEG_VERIFY_INPUT(ac_tbl_idx, 0, 3, HUFFMAN_INDEX);
+    scan_info.components[i].dc_tbl_idx = dc_tbl_idx;
+    scan_info.components[i].ac_tbl_idx = ac_tbl_idx;
+  }
+  JXL_JPEG_VERIFY_LEN(3);
+  scan_info.Ss = ReadUint8(data, pos);
+  scan_info.Se = ReadUint8(data, pos);
+  JXL_JPEG_VERIFY_INPUT(static_cast<int>(scan_info.Ss), 0, 63, START_OF_SCAN);
+  JXL_JPEG_VERIFY_INPUT(scan_info.Se, scan_info.Ss, 63, END_OF_SCAN);
+  int c = ReadUint8(data, pos);
+  scan_info.Ah = c >> 4;
+  scan_info.Al = c & 0xf;
+  if (scan_info.Ah != 0 && scan_info.Al != scan_info.Ah - 1) {
+    // section G.1.1.1.2 : Successive approximation control only improves
+    // by one bit at a time. But it's not always respected, so we just issue
+    // a warning.
+    JXL_WARNING("Invalid progressive parameters: Al=%d Ah=%d", scan_info.Al,
+                scan_info.Ah);
+  }
+  // Check that all the Huffman tables needed for this scan are defined.
+  for (size_t i = 0; i < comps_in_scan; ++i) {
+    bool found_dc_table = false;
+    bool found_ac_table = false;
+    for (size_t j = 0; j < jpg->huffman_code.size(); ++j) {
+      uint32_t slot_id = jpg->huffman_code[j].slot_id;
+      if (slot_id == scan_info.components[i].dc_tbl_idx) {
+        found_dc_table = true;
+      } else if (slot_id == scan_info.components[i].ac_tbl_idx + 16) {
+        found_ac_table = true;
+      }
+    }
+    if (scan_info.Ss == 0 && !found_dc_table) {
+      return JXL_FAILURE(
+          "SOS marker: Could not find DC Huffman table with index %d",
+          scan_info.components[i].dc_tbl_idx);
+    }
+    if (scan_info.Se > 0 && !found_ac_table) {
+      return JXL_FAILURE(
+          "SOS marker: Could not find AC Huffman table with index %d",
+          scan_info.components[i].ac_tbl_idx);
+    }
+  }
+  jpg->scan_info.push_back(scan_info);
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and fills in *jpg with
+// the parsed data. Builds the Huffman decoding table in either dc_huff_lut or
+// ac_huff_lut, depending on the type and solt_id of Huffman code being read.
+bool ProcessDHT(const uint8_t* data, const size_t len, JpegReadMode mode,
+                std::vector<HuffmanTableEntry>* dc_huff_lut,
+                std::vector<HuffmanTableEntry>* ac_huff_lut, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  if (marker_len == 2) {
+    return JXL_FAILURE("DHT marker: no Huffman table found");
+  }
+  while (*pos < start_pos + marker_len) {
+    JXL_JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+    JPEGHuffmanCode huff;
+    huff.slot_id = ReadUint8(data, pos);
+    int huffman_index = huff.slot_id;
+    int is_ac_table = (huff.slot_id & 0x10) != 0;
+    HuffmanTableEntry* huff_lut;
+    if (is_ac_table) {
+      huffman_index -= 0x10;
+      JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+      huff_lut = &(*ac_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+    } else {
+      JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+      huff_lut = &(*dc_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+    }
+    huff.counts[0] = 0;
+    int total_count = 0;
+    int space = 1 << kJpegHuffmanMaxBitLength;
+    int max_depth = 1;
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      int count = ReadUint8(data, pos);
+      if (count != 0) {
+        max_depth = i;
+      }
+      huff.counts[i] = count;
+      total_count += count;
+      space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+    }
+    if (is_ac_table) {
+      JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize,
+                            HUFFMAN_CODE);
+    } else {
+      JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegDCAlphabetSize, HUFFMAN_CODE);
+    }
+    JXL_JPEG_VERIFY_LEN(total_count);
+    std::vector<bool> values_seen(256, false);
+    for (int i = 0; i < total_count; ++i) {
+      int value = ReadUint8(data, pos);
+      if (!is_ac_table) {
+        JXL_JPEG_VERIFY_INPUT(value, 0, kJpegDCAlphabetSize - 1, HUFFMAN_CODE);
+      }
+      if (values_seen[value]) {
+        return JXL_FAILURE("Duplicate Huffman code value %d", value);
+      }
+      values_seen[value] = true;
+      huff.values[i] = value;
+    }
+    // Add an invalid symbol that will have the all 1 code.
+    ++huff.counts[max_depth];
+    huff.values[total_count] = kJpegHuffmanAlphabetSize;
+    space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+    if (space < 0) {
+      return JXL_FAILURE("Invalid Huffman code lengths.");
+    } else if (space > 0 && huff_lut[0].value != 0xffff) {
+      // Re-initialize the values to an invalid symbol so that we can recognize
+      // it when reading the bit stream using a Huffman code with space > 0.
+      for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+        huff_lut[i].bits = 0;
+        huff_lut[i].value = 0xffff;
+      }
+    }
+    huff.is_last = (*pos == start_pos + marker_len);
+    if (mode == JpegReadMode::kReadAll) {
+      BuildJpegHuffmanTable(&huff.counts[0], &huff.values[0], huff_lut);
+    }
+    jpg->huffman_code.push_back(huff);
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Define Quantization Table (DQT) marker segment and fills in *jpg
+// with the parsed data.
+bool ProcessDQT(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  if (marker_len == 2) {
+    return JXL_FAILURE("DQT marker: no quantization table found");
+  }
+  while (*pos < start_pos + marker_len && jpg->quant.size() < kMaxQuantTables) {
+    JXL_JPEG_VERIFY_LEN(1);
+    int quant_table_index = ReadUint8(data, pos);
+    int quant_table_precision = quant_table_index >> 4;
+    JXL_JPEG_VERIFY_INPUT(quant_table_precision, 0, 1, QUANT_TBL_PRECISION);
+    quant_table_index &= 0xf;
+    JXL_JPEG_VERIFY_INPUT(quant_table_index, 0, 3, QUANT_TBL_INDEX);
+    JXL_JPEG_VERIFY_LEN((quant_table_precision + 1) * kDCTBlockSize);
+    JPEGQuantTable table;
+    table.index = quant_table_index;
+    table.precision = quant_table_precision;
+    for (size_t i = 0; i < kDCTBlockSize; ++i) {
+      int quant_val =
+          quant_table_precision ? ReadUint16(data, pos) : ReadUint8(data, pos);
+      JXL_JPEG_VERIFY_INPUT(quant_val, 1, 65535, QUANT_VAL);
+      table.values[kJPEGNaturalOrder[i]] = quant_val;
+    }
+    table.is_last = (*pos == start_pos + marker_len);
+    jpg->quant.push_back(table);
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the DRI marker and saves the restart interval into *jpg.
+bool ProcessDRI(const uint8_t* data, const size_t len, size_t* pos,
+                bool* found_dri, JPEGData* jpg) {
+  if (*found_dri) {
+    return JXL_FAILURE("Duplicate DRI marker.");
+  }
+  *found_dri = true;
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(4);
+  size_t marker_len = ReadUint16(data, pos);
+  int restart_interval = ReadUint16(data, pos);
+  jpg->restart_interval = restart_interval;
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Saves the APP marker segment as a string to *jpg.
+bool ProcessAPP(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+  JXL_JPEG_VERIFY_LEN(marker_len - 2);
+  JXL_DASSERT(*pos >= 3);
+  // Save the marker type together with the app data.
+  const uint8_t* app_str_start = data + *pos - 3;
+  std::vector<uint8_t> app_str(app_str_start, app_str_start + marker_len + 1);
+  *pos += marker_len - 2;
+  jpg->app_data.push_back(app_str);
+  return true;
+}
+
+// Saves the COM marker segment as a string to *jpg.
+bool ProcessCOM(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+  JXL_JPEG_VERIFY_LEN(marker_len - 2);
+  const uint8_t* com_str_start = data + *pos - 3;
+  std::vector<uint8_t> com_str(com_str_start, com_str_start + marker_len + 1);
+  *pos += marker_len - 2;
+  jpg->com_data.push_back(com_str);
+  return true;
+}
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+  BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+      : data_(data), len_(len) {
+    Reset(pos);
+  }
+
+  void Reset(size_t pos) {
+    pos_ = pos;
+    val_ = 0;
+    bits_left_ = 0;
+    next_marker_pos_ = len_ - 2;
+    FillBitWindow();
+  }
+
+  // Returns the next byte and skips the 0xff/0x00 escape sequences.
+  uint8_t GetNextByte() {
+    if (pos_ >= next_marker_pos_) {
+      ++pos_;
+      return 0;
+    }
+    uint8_t c = data_[pos_++];
+    if (c == 0xff) {
+      uint8_t escape = data_[pos_];
+      if (escape == 0) {
+        ++pos_;
+      } else {
+        // 0xff was followed by a non-zero byte, which means that we found the
+        // start of the next marker segment.
+        next_marker_pos_ = pos_ - 1;
+      }
+    }
+    return c;
+  }
+
+  void FillBitWindow() {
+    if (bits_left_ <= 16) {
+      while (bits_left_ <= 56) {
+        val_ <<= 8;
+        val_ |= (uint64_t)GetNextByte();
+        bits_left_ += 8;
+      }
+    }
+  }
+
+  int ReadBits(int nbits) {
+    FillBitWindow();
+    uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+    bits_left_ -= nbits;
+    return val;
+  }
+
+  // Sets *pos to the next stream position where parsing should continue.
+  // Enqueue the padding bits seen (0 or 1).
+  // Returns false if there is inconsistent or invalid padding or the stream
+  // ended too early.
+  bool FinishStream(JPEGData* jpg, size_t* pos) {
+    int npadbits = bits_left_ & 7;
+    if (npadbits > 0) {
+      uint64_t padmask = (1ULL << npadbits) - 1;
+      uint64_t padbits = (val_ >> (bits_left_ - npadbits)) & padmask;
+      if (padbits != padmask) {
+        jpg->has_zero_padding_bit = true;
+      }
+      for (int i = npadbits - 1; i >= 0; --i) {
+        jpg->padding_bits.push_back((padbits >> i) & 1);
+      }
+    }
+    // Give back some bytes that we did not use.
+    int unused_bytes_left = bits_left_ >> 3;
+    while (unused_bytes_left-- > 0) {
+      --pos_;
+      // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+      // sequence, and if yes, we need to give back one more byte.
+      if (pos_ < next_marker_pos_ && data_[pos_] == 0 &&
+          data_[pos_ - 1] == 0xff) {
+        --pos_;
+      }
+    }
+    if (pos_ > next_marker_pos_) {
+      // Data ran out before the scan was complete.
+      return JXL_FAILURE("Unexpected end of scan.");
+    }
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* data_;
+  const size_t len_;
+  size_t pos_;
+  uint64_t val_;
+  int bits_left_;
+  size_t next_marker_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+  int nbits;
+  br->FillBitWindow();
+  int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+  table += val;
+  nbits = table->bits - 8;
+  if (nbits > 0) {
+    br->bits_left_ -= 8;
+    table += table->value;
+    val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+    table += val;
+  }
+  br->bits_left_ -= table->bits;
+  return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ *  SSSS | DIFF values
+ * ------+--------------------------
+ *     0 | 0
+ *     1 | –1, 1
+ *     2 | –3, –2, 2, 3
+ *     3 | –7..–4, 4..7
+ * ......|..........................
+ *    11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+  JXL_DASSERT(s >= 1);
+  int half = 1 << (s - 1);
+  if (x >= half) {
+    JXL_DASSERT(x < (1 << s));
+    return x;
+  } else {
+    return x - (1 << s) + 1;
+  }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+                    const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, bool* reset_state, int* num_zero_runs,
+                    BitReaderState* br, JPEGData* jpg, coeff_t* last_dc_coeff,
+                    coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = ReadSymbol(dc_huff, br);
+    if (s >= kJpegDCAlphabetSize) {
+      return JXL_FAILURE("Invalid Huffman symbol %d  for DC coefficient.", s);
+    }
+    int diff = 0;
+    if (s > 0) {
+      int bits = br->ReadBits(s);
+      diff = HuffExtend(bits, s);
+    }
+    int coeff = diff + *last_dc_coeff;
+    const int dc_coeff = coeff * Am;
+    coeffs[0] = dc_coeff;
+    // TODO(eustas): is there a more elegant / explicit way to check this?
+    if (dc_coeff != coeffs[0]) {
+      return JXL_FAILURE("Invalid DC coefficient %d", dc_coeff);
+    }
+    *last_dc_coeff = coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  if (*eobrun > 0) {
+    --(*eobrun);
+    return true;
+  }
+  *num_zero_runs = 0;
+  for (int k = Ss; k <= Se; k++) {
+    int sr = ReadSymbol(ac_huff, br);
+    if (sr >= kJpegHuffmanAlphabetSize) {
+      return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", sr,
+                         k);
+    }
+    int r = sr >> 4;
+    int s = sr & 15;
+    if (s > 0) {
+      k += r;
+      if (k > Se) {
+        return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss,
+                           Se);
+      }
+      if (s + Al >= kJpegDCAlphabetSize) {
+        return JXL_FAILURE(
+            "Out of range AC coefficient value: s = %d Al = %d k = %d", s, Al,
+            k);
+      }
+      int bits = br->ReadBits(s);
+      int coeff = HuffExtend(bits, s);
+      coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+      *num_zero_runs = 0;
+    } else if (r == 15) {
+      k += 15;
+      ++(*num_zero_runs);
+    } else {
+      if (eobrun_allowed && k == Ss && *eobrun == 0) {
+        // We have two end-of-block runs right after each other, so we signal
+        // the jpeg encoder to force a state reset at this point.
+        *reset_state = true;
+      }
+      *eobrun = 1 << r;
+      if (r > 0) {
+        if (!eobrun_allowed) {
+          return JXL_FAILURE("End-of-block run crossing DC coeff.");
+        }
+        *eobrun += br->ReadBits(r);
+      }
+      break;
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, bool* reset_state, BitReaderState* br,
+                    JPEGData* jpg, coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = br->ReadBits(1);
+    coeff_t dc_coeff = coeffs[0];
+    dc_coeff |= s * Am;
+    coeffs[0] = dc_coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int p1 = Am;
+  int m1 = -Am;
+  int k = Ss;
+  int r;
+  int s;
+  bool in_zero_run = false;
+  if (*eobrun <= 0) {
+    for (; k <= Se; k++) {
+      s = ReadSymbol(ac_huff, br);
+      if (s >= kJpegHuffmanAlphabetSize) {
+        return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", s,
+                           k);
+      }
+      r = s >> 4;
+      s &= 15;
+      if (s) {
+        if (s != 1) {
+          return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d",
+                             s, k);
+        }
+        s = br->ReadBits(1) ? p1 : m1;
+        in_zero_run = false;
+      } else {
+        if (r != 15) {
+          if (eobrun_allowed && k == Ss && *eobrun == 0) {
+            // We have two end-of-block runs right after each other, so we
+            // signal the jpeg encoder to force a state reset at this point.
+            *reset_state = true;
+          }
+          *eobrun = 1 << r;
+          if (r > 0) {
+            if (!eobrun_allowed) {
+              return JXL_FAILURE("End-of-block run crossing DC coeff.");
+            }
+            *eobrun += br->ReadBits(r);
+          }
+          break;
+        }
+        in_zero_run = true;
+      }
+      do {
+        coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+        if (thiscoef != 0) {
+          if (br->ReadBits(1)) {
+            if ((thiscoef & p1) == 0) {
+              if (thiscoef >= 0) {
+                thiscoef += p1;
+              } else {
+                thiscoef += m1;
+              }
+            }
+          }
+          coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+        } else {
+          if (--r < 0) {
+            break;
+          }
+        }
+        k++;
+      } while (k <= Se);
+      if (s) {
+        if (k > Se) {
+          return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss,
+                             Se);
+        }
+        coeffs[kJPEGNaturalOrder[k]] = s;
+      }
+    }
+  }
+  if (in_zero_run) {
+    return JXL_FAILURE("Extra zero run before end-of-block.");
+  }
+  if (*eobrun > 0) {
+    for (; k <= Se; k++) {
+      coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+      if (thiscoef != 0) {
+        if (br->ReadBits(1)) {
+          if ((thiscoef & p1) == 0) {
+            if (thiscoef >= 0) {
+              thiscoef += p1;
+            } else {
+              thiscoef += m1;
+            }
+          }
+        }
+        coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+      }
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+bool ProcessRestart(const uint8_t* data, const size_t len,
+                    int* next_restart_marker, BitReaderState* br,
+                    JPEGData* jpg) {
+  size_t pos = 0;
+  if (!br->FinishStream(jpg, &pos)) {
+    return JXL_FAILURE("Invalid scan");
+  }
+  int expected_marker = 0xd0 + *next_restart_marker;
+  JXL_JPEG_EXPECT_MARKER();
+  int marker = data[pos + 1];
+  if (marker != expected_marker) {
+    return JXL_FAILURE("Did not find expected restart marker %d actual %d",
+                       expected_marker, marker);
+  }
+  br->Reset(pos + 2);
+  *next_restart_marker += 1;
+  *next_restart_marker &= 0x7;
+  return true;
+}
+
+bool ProcessScan(const uint8_t* data, const size_t len,
+                 const std::vector<HuffmanTableEntry>& dc_huff_lut,
+                 const std::vector<HuffmanTableEntry>& ac_huff_lut,
+                 uint16_t scan_progression[kMaxComponents][kDCTBlockSize],
+                 bool is_progressive, size_t* pos, JPEGData* jpg) {
+  if (!ProcessSOS(data, len, pos, jpg)) {
+    return false;
+  }
+  JPEGScanInfo* scan_info = &jpg->scan_info.back();
+  bool is_interleaved = (scan_info->num_components > 1);
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    max_h_samp_factor =
+        std::max(max_h_samp_factor, jpg->components[i].h_samp_factor);
+    max_v_samp_factor =
+        std::max(max_v_samp_factor, jpg->components[i].v_samp_factor);
+  }
+
+  int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+  int MCUs_per_row = DivCeil(jpg->width, max_h_samp_factor * 8);
+  if (!is_interleaved) {
+    const JPEGComponent& c = jpg->components[scan_info->components[0].comp_idx];
+    MCUs_per_row = DivCeil(jpg->width * c.h_samp_factor, 8 * max_h_samp_factor);
+    MCU_rows = DivCeil(jpg->height * c.v_samp_factor, 8 * max_v_samp_factor);
+  }
+  coeff_t last_dc_coeff[kMaxComponents] = {0};
+  BitReaderState br(data, len, *pos);
+  int restarts_to_go = jpg->restart_interval;
+  int next_restart_marker = 0;
+  int eobrun = -1;
+  int block_scan_index = 0;
+  const int Al = is_progressive ? scan_info->Al : 0;
+  const int Ah = is_progressive ? scan_info->Ah : 0;
+  const int Ss = is_progressive ? scan_info->Ss : 0;
+  const int Se = is_progressive ? scan_info->Se : 63;
+  const uint16_t scan_bitmask = Ah == 0 ? (0xffff << Al) : (1u << Al);
+  const uint16_t refinement_bitmask = (1 << Al) - 1;
+  for (size_t i = 0; i < scan_info->num_components; ++i) {
+    int comp_idx = scan_info->components[i].comp_idx;
+    for (int k = Ss; k <= Se; ++k) {
+      if (scan_progression[comp_idx][k] & scan_bitmask) {
+        return JXL_FAILURE(
+            "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+            comp_idx, k, scan_progression[i][k], scan_bitmask);
+      }
+      if (scan_progression[comp_idx][k] & refinement_bitmask) {
+        return JXL_FAILURE(
+            "Invalid scan order, a more refined scan was already done: "
+            "component=%d k=%d prev_mask=%u cur_mask=%u",
+            comp_idx, k, scan_progression[i][k], scan_bitmask);
+      }
+      scan_progression[comp_idx][k] |= scan_bitmask;
+    }
+  }
+  if (Al > 10) {
+    return JXL_FAILURE("Scan parameter Al=%d is not supported.", Al);
+  }
+  for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+    for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+      // Handle the restart intervals.
+      if (jpg->restart_interval > 0) {
+        if (restarts_to_go == 0) {
+          if (ProcessRestart(data, len, &next_restart_marker, &br, jpg)) {
+            restarts_to_go = jpg->restart_interval;
+            memset(static_cast<void*>(last_dc_coeff), 0, sizeof(last_dc_coeff));
+            if (eobrun > 0) {
+              return JXL_FAILURE("End-of-block run too long.");
+            }
+            eobrun = -1;  // fresh start
+          } else {
+            return JXL_FAILURE("Could not process restart.");
+          }
+        }
+        --restarts_to_go;
+      }
+      // Decode one MCU.
+      for (size_t i = 0; i < scan_info->num_components; ++i) {
+        JPEGComponentScanInfo* si = &scan_info->components[i];
+        JPEGComponent* c = &jpg->components[si->comp_idx];
+        const HuffmanTableEntry* dc_lut =
+            &dc_huff_lut[si->dc_tbl_idx * kJpegHuffmanLutSize];
+        const HuffmanTableEntry* ac_lut =
+            &ac_huff_lut[si->ac_tbl_idx * kJpegHuffmanLutSize];
+        int nblocks_y = is_interleaved ? c->v_samp_factor : 1;
+        int nblocks_x = is_interleaved ? c->h_samp_factor : 1;
+        for (int iy = 0; iy < nblocks_y; ++iy) {
+          for (int ix = 0; ix < nblocks_x; ++ix) {
+            int block_y = mcu_y * nblocks_y + iy;
+            int block_x = mcu_x * nblocks_x + ix;
+            int block_idx = block_y * c->width_in_blocks + block_x;
+            bool reset_state = false;
+            int num_zero_runs = 0;
+            coeff_t* coeffs = &c->coeffs[block_idx * kDCTBlockSize];
+            if (Ah == 0) {
+              if (!DecodeDCTBlock(dc_lut, ac_lut, Ss, Se, Al, &eobrun,
+                                  &reset_state, &num_zero_runs, &br, jpg,
+                                  &last_dc_coeff[si->comp_idx], coeffs)) {
+                return false;
+              }
+            } else {
+              if (!RefineDCTBlock(ac_lut, Ss, Se, Al, &eobrun, &reset_state,
+                                  &br, jpg, coeffs)) {
+                return false;
+              }
+            }
+            if (reset_state) {
+              scan_info->reset_points.emplace_back(block_scan_index);
+            }
+            if (num_zero_runs > 0) {
+              JPEGScanInfo::ExtraZeroRunInfo info;
+              info.block_idx = block_scan_index;
+              info.num_extra_zero_runs = num_zero_runs;
+              scan_info->extra_zero_runs.push_back(info);
+            }
+            ++block_scan_index;
+          }
+        }
+      }
+    }
+  }
+  if (eobrun > 0) {
+    return JXL_FAILURE("End-of-block run too long.");
+  }
+  if (!br.FinishStream(jpg, pos)) {
+    return JXL_FAILURE("Invalid scan.");
+  }
+  if (*pos > len) {
+    return JXL_FAILURE("Unexpected end of file during scan. pos=%" PRIuS
+                       " len=%" PRIuS,
+                       *pos, len);
+  }
+  return true;
+}
+
+// Changes the quant_idx field of the components to refer to the index of the
+// quant table in the jpg->quant array.
+bool FixupIndexes(JPEGData* jpg) {
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    JPEGComponent* c = &jpg->components[i];
+    bool found_index = false;
+    for (size_t j = 0; j < jpg->quant.size(); ++j) {
+      if (jpg->quant[j].index == c->quant_idx) {
+        c->quant_idx = j;
+        found_index = true;
+        break;
+      }
+    }
+    if (!found_index) {
+      return JXL_FAILURE("Quantization table with index %u not found",
+                         c->quant_idx);
+    }
+  }
+  return true;
+}
+
+size_t FindNextMarker(const uint8_t* data, const size_t len, size_t pos) {
+  // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+  static const uint8_t kIsValidMarker[] = {
+      1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+      1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+  };
+  size_t num_skipped = 0;
+  while (pos + 1 < len && (data[pos] != 0xff || data[pos + 1] < 0xc0 ||
+                           !kIsValidMarker[data[pos + 1] - 0xc0])) {
+    ++pos;
+    ++num_skipped;
+  }
+  return num_skipped;
+}
+
+}  // namespace
+
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+              JPEGData* jpg) {
+  size_t pos = 0;
+  // Check SOI marker.
+  JXL_JPEG_EXPECT_MARKER();
+  int marker = data[pos + 1];
+  pos += 2;
+  if (marker != 0xd8) {
+    return JXL_FAILURE("Did not find expected SOI marker, actual=%d", marker);
+  }
+  int lut_size = kMaxHuffmanTables * kJpegHuffmanLutSize;
+  std::vector<HuffmanTableEntry> dc_huff_lut(lut_size);
+  std::vector<HuffmanTableEntry> ac_huff_lut(lut_size);
+  bool found_sof = false;
+  bool found_dri = false;
+  uint16_t scan_progression[kMaxComponents][kDCTBlockSize] = {{0}};
+
+  jpg->padding_bits.resize(0);
+  bool is_progressive = false;  // default
+  do {
+    // Read next marker.
+    size_t num_skipped = FindNextMarker(data, len, pos);
+    if (num_skipped > 0) {
+      // Add a fake marker to indicate arbitrary in-between-markers data.
+      jpg->marker_order.push_back(0xff);
+      jpg->inter_marker_data.emplace_back(data + pos, data + pos + num_skipped);
+      pos += num_skipped;
+    }
+    JXL_JPEG_EXPECT_MARKER();
+    marker = data[pos + 1];
+    pos += 2;
+    bool ok = true;
+    switch (marker) {
+      case 0xc0:
+      case 0xc1:
+      case 0xc2:
+        is_progressive = (marker == 0xc2);
+        ok = ProcessSOF(data, len, mode, &pos, jpg);
+        found_sof = true;
+        break;
+      case 0xc4:
+        ok = ProcessDHT(data, len, mode, &dc_huff_lut, &ac_huff_lut, &pos, jpg);
+        break;
+      case 0xd0:
+      case 0xd1:
+      case 0xd2:
+      case 0xd3:
+      case 0xd4:
+      case 0xd5:
+      case 0xd6:
+      case 0xd7:
+        // RST markers do not have any data.
+        break;
+      case 0xd9:
+        // Found end marker.
+        break;
+      case 0xda:
+        if (mode == JpegReadMode::kReadAll) {
+          ok = ProcessScan(data, len, dc_huff_lut, ac_huff_lut,
+                           scan_progression, is_progressive, &pos, jpg);
+        }
+        break;
+      case 0xdb:
+        ok = ProcessDQT(data, len, &pos, jpg);
+        break;
+      case 0xdd:
+        ok = ProcessDRI(data, len, &pos, &found_dri, jpg);
+        break;
+      case 0xe0:
+      case 0xe1:
+      case 0xe2:
+      case 0xe3:
+      case 0xe4:
+      case 0xe5:
+      case 0xe6:
+      case 0xe7:
+      case 0xe8:
+      case 0xe9:
+      case 0xea:
+      case 0xeb:
+      case 0xec:
+      case 0xed:
+      case 0xee:
+      case 0xef:
+        if (mode != JpegReadMode::kReadTables) {
+          ok = ProcessAPP(data, len, &pos, jpg);
+        }
+        break;
+      case 0xfe:
+        if (mode != JpegReadMode::kReadTables) {
+          ok = ProcessCOM(data, len, &pos, jpg);
+        }
+        break;
+      default:
+        return JXL_FAILURE("Unsupported marker: %d pos=%" PRIuS " len=%" PRIuS,
+                           marker, pos, len);
+    }
+    if (!ok) {
+      return false;
+    }
+    jpg->marker_order.push_back(marker);
+    if (mode == JpegReadMode::kReadHeader && found_sof) {
+      break;
+    }
+  } while (marker != 0xd9);
+
+  if (!found_sof) {
+    return JXL_FAILURE("Missing SOF marker.");
+  }
+
+  // Supplemental checks.
+  if (mode == JpegReadMode::kReadAll) {
+    if (pos < len) {
+      jpg->tail_data = std::vector<uint8_t>(data + pos, data + len);
+    }
+    if (!FixupIndexes(jpg)) {
+      return false;
+    }
+    if (jpg->huffman_code.empty()) {
+      // Section B.2.4.2: "If a table has never been defined for a particular
+      // destination, then when this destination is specified in a scan header,
+      // the results are unpredictable."
+      return JXL_FAILURE("Need at least one Huffman code table.");
+    }
+    if (jpg->huffman_code.size() >= kMaxDHTMarkers) {
+      return JXL_FAILURE("Too many Huffman tables.");
+    }
+  }
+  return true;
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h
new file mode 100644
index 0000000000..3fad820e9d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for reading a jpeg byte stream into a JPEGData object.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+enum class JpegReadMode {
+  kReadHeader,  // only basic headers
+  kReadTables,  // headers and tables (quant, Huffman, ...)
+  kReadAll,     // everything
+};
+
+// Parses the JPEG stream contained in data[*pos ... len) and fills in *jpg with
+// the parsed information.
+// If mode is kReadHeader, it fills in only the image dimensions in *jpg.
+// Returns false if the data is not valid JPEG, or if it contains an unsupported
+// JPEG feature.
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+              JPEGData* jpg);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
new file mode 100644
index 0000000000..38282e640a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+  int left = 1 << (len - kJpegHuffmanRootTableBits);
+  while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+                           HuffmanTableEntry* lut) {
+  HuffmanTableEntry code;    // current table entry
+  HuffmanTableEntry* table;  // next available space in table
+  int len;                   // current code length
+  int idx;                   // symbol index
+  int key;                   // prefix code
+  int reps;                  // number of replicate key values in current table
+  int low;                   // low bits for current root entry
+  int table_bits;            // key length of current table
+  int table_size;            // size of current table
+
+  // Make a local copy of the input bit length histogram.
+  int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+  int total_count = 0;
+  for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    tmp_count[len] = count[len];
+    total_count += tmp_count[len];
+  }
+
+  table = lut;
+  table_bits = kJpegHuffmanRootTableBits;
+  table_size = 1 << table_bits;
+
+  // Special case code with only one value.
+  if (total_count == 1) {
+    code.bits = 0;
+    code.value = symbols[0];
+    for (key = 0; key < table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  // Fill in root table.
+  key = 0;
+  idx = 0;
+  for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      code.bits = len;
+      code.value = symbols[idx++];
+      reps = 1 << (kJpegHuffmanRootTableBits - len);
+      while (reps--) {
+        table[key++] = code;
+      }
+    }
+  }
+
+  // Fill in 2nd level tables and add pointers to root table.
+  table += table_size;
+  table_size = 0;
+  low = 0;
+  for (len = kJpegHuffmanRootTableBits + 1;
+       len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      // Start a new sub-table if the previous one is full.
+      if (low >= table_size) {
+        table += table_size;
+        table_bits = NextTableBitSize(tmp_count, len);
+        table_size = 1 << table_bits;
+        low = 0;
+        lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+        lut[key].value = (table - lut) - key;
+        ++key;
+      }
+      code.bits = len - kJpegHuffmanRootTableBits;
+      code.value = symbols[idx++];
+      reps = 1 << (table_bits - code.bits);
+      while (reps--) {
+        table[low++] = code;
+      }
+    }
+  }
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
new file mode 100644
index 0000000000..b8a60e4107
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility function for building a Huffman lookup table for the jpeg decoder.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+#define LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+
+#include <stdint.h>
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+  // Initialize the value to an invalid symbol so that we can recognize it
+  // when reading the bit stream using a Huffman code with space > 0.
+  HuffmanTableEntry() : bits(0), value(0xffff) {}
+
+  uint8_t bits;    // number of bits used for this symbol
+  uint16_t value;  // symbol value or table offset
+};
+
+// Builds jpeg-style Huffman lookup table from the given symbols.
+// The symbols are in order of increasing bit lengths. The number of symbols
+// with bit length n is given in counts[n] for each n >= 1.
+void BuildJpegHuffmanTable(const uint32_t* counts, const uint32_t* symbols,
+                           HuffmanTableEntry* lut);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc
new file mode 100644
index 0000000000..9b4c778c9c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc
@@ -0,0 +1,478 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace jpeg {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace {
+enum JPEGComponentType : uint32_t {
+  kGray = 0,
+  kYCbCr = 1,
+  kRGB = 2,
+  kCustom = 3,
+};
+
+struct JPEGInfo {
+  size_t num_app_markers = 0;
+  size_t num_com_markers = 0;
+  size_t num_scans = 0;
+  size_t num_intermarker = 0;
+  bool has_dri = false;
+};
+
+Status VisitMarker(uint8_t* marker, Visitor* visitor, JPEGInfo* info) {
+  uint32_t marker32 = *marker - 0xc0;
+  JXL_RETURN_IF_ERROR(visitor->Bits(6, 0x00, &marker32));
+  *marker = marker32 + 0xc0;
+  if ((*marker & 0xf0) == 0xe0) {
+    info->num_app_markers++;
+  }
+  if (*marker == 0xfe) {
+    info->num_com_markers++;
+  }
+  if (*marker == 0xda) {
+    info->num_scans++;
+  }
+  // We use a fake 0xff marker to signal intermarker data.
+  if (*marker == 0xff) {
+    info->num_intermarker++;
+  }
+  if (*marker == 0xdd) {
+    info->has_dri = true;
+  }
+  return true;
+}
+
+}  // namespace
+
+Status JPEGData::VisitFields(Visitor* visitor) {
+  bool is_gray = components.size() == 1;
+  JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_gray));
+  if (visitor->IsReading()) {
+    components.resize(is_gray ? 1 : 3);
+  }
+  JPEGInfo info;
+  if (visitor->IsReading()) {
+    uint8_t marker = 0xc0;
+    do {
+      JXL_RETURN_IF_ERROR(VisitMarker(&marker, visitor, &info));
+      marker_order.push_back(marker);
+      if (marker_order.size() > 16384) {
+        return JXL_FAILURE("Too many markers: %" PRIuS "\n",
+                           marker_order.size());
+      }
+    } while (marker != 0xd9);
+  } else {
+    if (marker_order.size() > 16384) {
+      return JXL_FAILURE("Too many markers: %" PRIuS "\n", marker_order.size());
+    }
+    for (size_t i = 0; i < marker_order.size(); i++) {
+      JXL_RETURN_IF_ERROR(VisitMarker(&marker_order[i], visitor, &info));
+    }
+    if (!marker_order.empty()) {
+      // Last marker should always be EOI marker.
+      JXL_CHECK(marker_order.back() == 0xd9);
+    }
+  }
+
+  // Size of the APP and COM markers.
+  if (visitor->IsReading()) {
+    app_data.resize(info.num_app_markers);
+    app_marker_type.resize(info.num_app_markers);
+    com_data.resize(info.num_com_markers);
+    scan_info.resize(info.num_scans);
+  }
+  JXL_ASSERT(app_data.size() == info.num_app_markers);
+  JXL_ASSERT(app_marker_type.size() == info.num_app_markers);
+  JXL_ASSERT(com_data.size() == info.num_com_markers);
+  JXL_ASSERT(scan_info.size() == info.num_scans);
+  for (size_t i = 0; i < app_data.size(); i++) {
+    auto& app = app_data[i];
+    // Encodes up to 8 different values.
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), BitsOffset(1, 2), BitsOffset(2, 4), 0,
+                     reinterpret_cast<uint32_t*>(&app_marker_type[i])));
+    if (app_marker_type[i] != AppMarkerType::kUnknown &&
+        app_marker_type[i] != AppMarkerType::kICC &&
+        app_marker_type[i] != AppMarkerType::kExif &&
+        app_marker_type[i] != AppMarkerType::kXMP) {
+      return JXL_FAILURE("Unknown app marker type %u",
+                         static_cast<uint32_t>(app_marker_type[i]));
+    }
+    uint32_t len = app.size() - 1;
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) app.resize(len + 1);
+    if (app.size() < 3) {
+      return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", app.size());
+    }
+  }
+  for (auto& com : com_data) {
+    uint32_t len = com.size() - 1;
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) com.resize(len + 1);
+    if (com.size() < 3) {
+      return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", com.size());
+    }
+  }
+
+  uint32_t num_quant_tables = quant.size();
+  JXL_RETURN_IF_ERROR(
+      visitor->U32(Val(1), Val(2), Val(3), Val(4), 2, &num_quant_tables));
+  if (num_quant_tables == 4) {
+    return JXL_FAILURE("Invalid number of quant tables");
+  }
+  if (visitor->IsReading()) {
+    quant.resize(num_quant_tables);
+  }
+  for (size_t i = 0; i < num_quant_tables; i++) {
+    if (quant[i].precision > 1) {
+      return JXL_FAILURE(
+          "Quant tables with more than 16 bits are not supported");
+    }
+    JXL_RETURN_IF_ERROR(visitor->Bits(1, 0, &quant[i].precision));
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, i, &quant[i].index));
+    JXL_RETURN_IF_ERROR(visitor->Bool(true, &quant[i].is_last));
+  }
+
+  JPEGComponentType component_type =
+      components.size() == 1 && components[0].id == 1 ? JPEGComponentType::kGray
+      : components.size() == 3 && components[0].id == 1 &&
+              components[1].id == 2 && components[2].id == 3
+          ? JPEGComponentType::kYCbCr
+      : components.size() == 3 && components[0].id == 'R' &&
+              components[1].id == 'G' && components[2].id == 'B'
+          ? JPEGComponentType::kRGB
+          : JPEGComponentType::kCustom;
+  JXL_RETURN_IF_ERROR(
+      visitor->Bits(2, JPEGComponentType::kYCbCr,
+                    reinterpret_cast<uint32_t*>(&component_type)));
+  uint32_t num_components;
+  if (component_type == JPEGComponentType::kGray) {
+    num_components = 1;
+  } else if (component_type != JPEGComponentType::kCustom) {
+    num_components = 3;
+  } else {
+    num_components = components.size();
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 3, &num_components));
+    if (num_components != 1 && num_components != 3) {
+      return JXL_FAILURE("Invalid number of components: %u", num_components);
+    }
+  }
+  if (visitor->IsReading()) {
+    components.resize(num_components);
+  }
+  if (component_type == JPEGComponentType::kCustom) {
+    for (size_t i = 0; i < components.size(); i++) {
+      JXL_RETURN_IF_ERROR(visitor->Bits(8, 0, &components[i].id));
+    }
+  } else if (component_type == JPEGComponentType::kGray) {
+    components[0].id = 1;
+  } else if (component_type == JPEGComponentType::kRGB) {
+    components[0].id = 'R';
+    components[1].id = 'G';
+    components[2].id = 'B';
+  } else {
+    components[0].id = 1;
+    components[1].id = 2;
+    components[2].id = 3;
+  }
+  size_t used_tables = 0;
+  for (size_t i = 0; i < components.size(); i++) {
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &components[i].quant_idx));
+    if (components[i].quant_idx >= quant.size()) {
+      return JXL_FAILURE("Invalid quant table for component %" PRIuS ": %u\n",
+                         i, components[i].quant_idx);
+    }
+    used_tables |= 1U << components[i].quant_idx;
+  }
+  for (size_t i = 0; i < quant.size(); i++) {
+    if (used_tables & (1 << i)) continue;
+    if (i == 0) return JXL_FAILURE("First quant table unused.");
+    // Unused quant table has to be set to copy of previous quant table
+    for (size_t j = 0; j < 64; j++) {
+      if (quant[i].values[j] != quant[i - 1].values[j]) {
+        return JXL_FAILURE("Non-trivial unused quant table");
+      }
+    }
+  }
+
+  uint32_t num_huff = huffman_code.size();
+  JXL_RETURN_IF_ERROR(visitor->U32(Val(4), BitsOffset(3, 2), BitsOffset(4, 10),
+                                   BitsOffset(6, 26), 4, &num_huff));
+  if (visitor->IsReading()) {
+    huffman_code.resize(num_huff);
+  }
+  for (JPEGHuffmanCode& hc : huffman_code) {
+    bool is_ac = hc.slot_id >> 4;
+    uint32_t id = hc.slot_id & 0xF;
+    JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_ac));
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &id));
+    hc.slot_id = (static_cast<uint32_t>(is_ac) << 4) | id;
+    JXL_RETURN_IF_ERROR(visitor->Bool(true, &hc.is_last));
+    size_t num_symbols = 0;
+    for (size_t i = 0; i <= 16; i++) {
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(3, 2),
+                                       Bits(8), 0, &hc.counts[i]));
+      num_symbols += hc.counts[i];
+    }
+    if (num_symbols < 1) {
+      // Actually, at least 2 symbols are required, since one of them is EOI.
+      return JXL_FAILURE("Empty Huffman table");
+    }
+    if (num_symbols > hc.values.size()) {
+      return JXL_FAILURE("Huffman code too large (%" PRIuS ")", num_symbols);
+    }
+    // Presence flags for 4 * 64 + 1 values.
+    uint64_t value_slots[5] = {};
+    for (size_t i = 0; i < num_symbols; i++) {
+      // Goes up to 256, included. Might have the same symbol appear twice...
+      JXL_RETURN_IF_ERROR(visitor->U32(Bits(2), BitsOffset(2, 4),
+                                       BitsOffset(4, 8), BitsOffset(8, 1), 0,
+                                       &hc.values[i]));
+      value_slots[hc.values[i] >> 6] |= (uint64_t)1 << (hc.values[i] & 0x3F);
+    }
+    if (hc.values[num_symbols - 1] != kJpegHuffmanAlphabetSize) {
+      return JXL_FAILURE("Missing EOI symbol");
+    }
+    // Last element, denoting EOI, have to be 1 after the loop.
+    JXL_ASSERT(value_slots[4] == 1);
+    size_t num_values = 1;
+    for (size_t i = 0; i < 4; ++i) num_values += hwy::PopCount(value_slots[i]);
+    if (num_values != num_symbols) {
+      return JXL_FAILURE("Duplicate Huffman symbols");
+    }
+    if (!is_ac) {
+      bool only_dc = ((value_slots[0] >> kJpegDCAlphabetSize) | value_slots[1] |
+                      value_slots[2] | value_slots[3]) == 0;
+      if (!only_dc) return JXL_FAILURE("Huffman symbols out of DC range");
+    }
+  }
+
+  for (auto& scan : scan_info) {
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &scan.num_components));
+    if (scan.num_components >= 4) {
+      return JXL_FAILURE("Invalid number of components in SOS marker");
+    }
+    JXL_RETURN_IF_ERROR(visitor->Bits(6, 0, &scan.Ss));
+    JXL_RETURN_IF_ERROR(visitor->Bits(6, 63, &scan.Se));
+    JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Al));
+    JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Ah));
+    for (size_t i = 0; i < scan.num_components; i++) {
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].comp_idx));
+      if (scan.components[i].comp_idx >= components.size()) {
+        return JXL_FAILURE("Invalid component idx in SOS marker");
+      }
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].ac_tbl_idx));
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].dc_tbl_idx));
+    }
+    // TODO(veluca): actually set and use this value.
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), Val(2), BitsOffset(3, 3),
+                                     kMaxNumPasses - 1,
+                                     &scan.last_needed_pass));
+  }
+
+  // From here on, this is data that is not strictly necessary to get a valid
+  // JPEG, but necessary for bit-exact JPEG reconstruction.
+  if (info.has_dri) {
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &restart_interval));
+  }
+
+  for (auto& scan : scan_info) {
+    uint32_t num_reset_points = scan.reset_points.size();
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+                                     BitsOffset(16, 20), 0, &num_reset_points));
+    if (visitor->IsReading()) {
+      scan.reset_points.resize(num_reset_points);
+    }
+    int last_block_idx = -1;
+    for (auto& block_idx : scan.reset_points) {
+      block_idx -= last_block_idx + 1;
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+                                       BitsOffset(5, 9), BitsOffset(28, 41), 0,
+                                       &block_idx));
+      block_idx += last_block_idx + 1;
+      if (block_idx >= (3u << 26)) {
+        // At most 8K x 8K x num_channels blocks are possible in a JPEG.
+        // So valid block indices are below 3 * 2^26.
+        return JXL_FAILURE("Invalid block ID: %u", block_idx);
+      }
+      last_block_idx = block_idx;
+    }
+
+    uint32_t num_extra_zero_runs = scan.extra_zero_runs.size();
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+                                     BitsOffset(16, 20), 0,
+                                     &num_extra_zero_runs));
+    if (visitor->IsReading()) {
+      scan.extra_zero_runs.resize(num_extra_zero_runs);
+    }
+    last_block_idx = -1;
+    for (size_t i = 0; i < scan.extra_zero_runs.size(); ++i) {
+      uint32_t& block_idx = scan.extra_zero_runs[i].block_idx;
+      JXL_RETURN_IF_ERROR(visitor->U32(
+          Val(1), BitsOffset(2, 2), BitsOffset(4, 5), BitsOffset(8, 20), 1,
+          &scan.extra_zero_runs[i].num_extra_zero_runs));
+      block_idx -= last_block_idx + 1;
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+                                       BitsOffset(5, 9), BitsOffset(28, 41), 0,
+                                       &block_idx));
+      block_idx += last_block_idx + 1;
+      if (block_idx > (3u << 26)) {
+        return JXL_FAILURE("Invalid block ID: %u", block_idx);
+      }
+      last_block_idx = block_idx;
+    }
+  }
+  std::vector<uint32_t> inter_marker_data_sizes;
+  inter_marker_data_sizes.reserve(info.num_intermarker);
+  for (size_t i = 0; i < info.num_intermarker; ++i) {
+    uint32_t len = visitor->IsReading() ? 0 : inter_marker_data[i].size();
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) inter_marker_data_sizes.emplace_back(len);
+  }
+  uint32_t tail_data_len = tail_data.size();
+  if (!visitor->IsReading() && tail_data_len > 4260096) {
+    return JXL_FAILURE("Tail data too large (max size = 4260096, size = %u)",
+                       tail_data_len);
+  }
+  JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(8, 1),
+                                   BitsOffset(16, 257), BitsOffset(22, 65793),
+                                   0, &tail_data_len));
+
+  JXL_RETURN_IF_ERROR(visitor->Bool(false, &has_zero_padding_bit));
+  if (has_zero_padding_bit) {
+    uint32_t nbit = padding_bits.size();
+    JXL_RETURN_IF_ERROR(visitor->Bits(24, 0, &nbit));
+    if (visitor->IsReading()) {
+      JXL_RETURN_IF_ERROR(CheckHasEnoughBits(visitor, nbit));
+      padding_bits.reserve(std::min<uint32_t>(1024u, nbit));
+      for (uint32_t i = 0; i < nbit; i++) {
+        bool bbit = false;
+        JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit));
+        padding_bits.push_back(bbit);
+      }
+    } else {
+      for (uint8_t& bit : padding_bits) {
+        bool bbit = bit;
+        JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit));
+        bit = bbit;
+      }
+    }
+  }
+
+  {
+    size_t dht_index = 0;
+    size_t scan_index = 0;
+    bool is_progressive = false;
+    bool ac_ok[kMaxHuffmanTables] = {false};
+    bool dc_ok[kMaxHuffmanTables] = {false};
+    for (uint8_t marker : marker_order) {
+      if (marker == 0xC2) {
+        is_progressive = true;
+      } else if (marker == 0xC4) {
+        for (; dht_index < huffman_code.size();) {
+          const JPEGHuffmanCode& huff = huffman_code[dht_index++];
+          size_t index = huff.slot_id;
+          if (index & 0x10) {
+            index -= 0x10;
+            ac_ok[index] = true;
+          } else {
+            dc_ok[index] = true;
+          }
+          if (huff.is_last) break;
+        }
+      } else if (marker == 0xDA) {
+        const JPEGScanInfo& si = scan_info[scan_index++];
+        for (size_t i = 0; i < si.num_components; ++i) {
+          const JPEGComponentScanInfo& csi = si.components[i];
+          size_t dc_tbl_idx = csi.dc_tbl_idx;
+          size_t ac_tbl_idx = csi.ac_tbl_idx;
+          bool want_dc = !is_progressive || (si.Ss == 0);
+          if (want_dc && !dc_ok[dc_tbl_idx]) {
+            return JXL_FAILURE("DC Huffman table used before defined");
+          }
+          bool want_ac = !is_progressive || (si.Ss != 0) || (si.Se != 0);
+          if (want_ac && !ac_ok[ac_tbl_idx]) {
+            return JXL_FAILURE("AC Huffman table used before defined");
+          }
+        }
+      }
+    }
+  }
+
+  // Apply postponed actions.
+  if (visitor->IsReading()) {
+    tail_data.resize(tail_data_len);
+    JXL_ASSERT(inter_marker_data_sizes.size() == info.num_intermarker);
+    inter_marker_data.reserve(info.num_intermarker);
+    for (size_t i = 0; i < info.num_intermarker; ++i) {
+      inter_marker_data.emplace_back(inter_marker_data_sizes[i]);
+    }
+  }
+
+  return true;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+void JPEGData::CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+                                int* MCU_rows) const {
+  const bool is_interleaved = (scan.num_components > 1);
+  const JPEGComponent& base_component = components[scan.components[0].comp_idx];
+  // h_group / v_group act as numerators for converting number of blocks to
+  // number of MCU. In interleaved mode it is 1, so MCU is represented with
+  // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+  // be the samping factor, consequently MCU is always represented with single
+  // block.
+  const int h_group = is_interleaved ? 1 : base_component.h_samp_factor;
+  const int v_group = is_interleaved ? 1 : base_component.v_samp_factor;
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (const auto& c : components) {
+    max_h_samp_factor = std::max(c.h_samp_factor, max_h_samp_factor);
+    max_v_samp_factor = std::max(c.v_samp_factor, max_v_samp_factor);
+  }
+  *MCUs_per_row = DivCeil(width * h_group, 8 * max_h_samp_factor);
+  *MCU_rows = DivCeil(height * v_group, 8 * max_v_samp_factor);
+}
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data) {
+  size_t icc_pos = 0;
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    if (jpeg_data->app_marker_type[i] != jpeg::AppMarkerType::kICC) {
+      continue;
+    }
+    size_t len = jpeg_data->app_data[i].size() - 17;
+    if (icc_pos + len > icc.size()) {
+      return JXL_FAILURE(
+          "ICC length is less than APP markers: requested %" PRIuS
+          " more bytes, "
+          "%" PRIuS " available",
+          len, icc.size() - icc_pos);
+    }
+    memcpy(&jpeg_data->app_data[i][17], icc.data() + icc_pos, len);
+    icc_pos += len;
+  }
+  if (icc_pos != icc.size() && icc_pos != 0) {
+    return JXL_FAILURE("ICC length is more than APP markers");
+  }
+  return true;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h
new file mode 100644
index 0000000000..a9c86ac139
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h
@@ -0,0 +1,216 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Data structures that represent the non-pixel contents of a jpeg file.
+
+#ifndef LIB_JXL_JPEG_JPEG_DATA_H_
+#define LIB_JXL_JPEG_JPEG_DATA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/common.h"  // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+  0,   1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13,  6,  7, 14, 21, 28,
+  35, 42, 49, 56, 57, 50, 43, 36,
+  29, 22, 15, 23, 30, 37, 44, 51,
+  58, 59, 52, 45, 38, 31, 39, 46,
+  53, 60, 61, 54, 47, 55, 62, 63,
+  // extra entries for safety in decoder
+  63, 63, 63, 63, 63, 63, 63, 63,
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+  0,   1,  5,  6, 14, 15, 27, 28,
+  2,   4,  7, 13, 16, 26, 29, 42,
+  3,   8, 12, 17, 25, 30, 41, 43,
+  9,  11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+// Quantization values for an 8x8 pixel block.
+struct JPEGQuantTable {
+  std::array<int32_t, kDCTBlockSize> values;
+  uint32_t precision = 0;
+  // The index of this quantization table as it was parsed from the input JPEG.
+  // Each DQT marker segment contains an 'index' field, and we save this index
+  // here. Valid values are 0 to 3.
+  uint32_t index = 0;
+  // Set to true if this table is the last one within its marker segment.
+  bool is_last = true;
+};
+
+// Huffman code and decoding lookup table used for DC and AC coefficients.
+struct JPEGHuffmanCode {
+  // Bit length histogram.
+  std::array<uint32_t, kJpegHuffmanMaxBitLength + 1> counts = {};
+  // Symbol values sorted by increasing bit lengths.
+  std::array<uint32_t, kJpegHuffmanAlphabetSize + 1> values = {};
+  // The index of the Huffman code in the current set of Huffman codes. For AC
+  // component Huffman codes, 0x10 is added to the index.
+  int slot_id = 0;
+  // Set to true if this Huffman code is the last one within its marker segment.
+  bool is_last = true;
+};
+
+// Huffman table indexes used for one component of one scan.
+struct JPEGComponentScanInfo {
+  uint32_t comp_idx;
+  uint32_t dc_tbl_idx;
+  uint32_t ac_tbl_idx;
+};
+
+// Contains information that is used in one scan.
+struct JPEGScanInfo {
+  // Parameters used for progressive scans (named the same way as in the spec):
+  //   Ss : Start of spectral band in zig-zag sequence.
+  //   Se : End of spectral band in zig-zag sequence.
+  //   Ah : Successive approximation bit position, high.
+  //   Al : Successive approximation bit position, low.
+  uint32_t Ss;
+  uint32_t Se;
+  uint32_t Ah;
+  uint32_t Al;
+  uint32_t num_components = 0;
+  std::array<JPEGComponentScanInfo, 4> components;
+  // Last codestream pass that is needed to write this scan.
+  uint32_t last_needed_pass = 0;
+
+  // Extra information required for bit-precise JPEG file reconstruction.
+
+  // Set of block indexes where the JPEG encoder has to flush the end-of-block
+  // runs and refinement bits.
+  std::vector<uint32_t> reset_points;
+  // The number of extra zero runs (Huffman symbol 0xf0) before the end of
+  // block (if nonzero), indexed by block index.
+  // All of these symbols can be omitted without changing the pixel values, but
+  // some jpeg encoders put these at the end of blocks.
+  typedef struct {
+    uint32_t block_idx;
+    uint32_t num_extra_zero_runs;
+  } ExtraZeroRunInfo;
+  std::vector<ExtraZeroRunInfo> extra_zero_runs;
+};
+
+typedef int16_t coeff_t;
+
+// Represents one component of a jpeg file.
+struct JPEGComponent {
+  JPEGComponent()
+      : id(0),
+        h_samp_factor(1),
+        v_samp_factor(1),
+        quant_idx(0),
+        width_in_blocks(0),
+        height_in_blocks(0) {}
+
+  // One-byte id of the component.
+  uint32_t id;
+  // Horizontal and vertical sampling factors.
+  // In interleaved mode, each minimal coded unit (MCU) has
+  // h_samp_factor x v_samp_factor DCT blocks from this component.
+  int h_samp_factor;
+  int v_samp_factor;
+  // The index of the quantization table used for this component.
+  uint32_t quant_idx;
+  // The dimensions of the component measured in 8x8 blocks.
+  uint32_t width_in_blocks;
+  uint32_t height_in_blocks;
+  // The DCT coefficients of this component, laid out block-by-block, divided
+  // through the quantization matrix values.
+  std::vector<coeff_t> coeffs;
+};
+
+enum class AppMarkerType : uint32_t {
+  kUnknown = 0,
+  kICC = 1,
+  kExif = 2,
+  kXMP = 3,
+};
+
+// Represents a parsed jpeg file.
+struct JPEGData : public Fields {
+  JPEGData()
+      : width(0), height(0), restart_interval(0), has_zero_padding_bit(false) {}
+
+  JXL_FIELDS_NAME(JPEGData)
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  // Doesn't serialize everything - skips brotli-encoded data and what is
+  // already encoded in the codestream.
+  Status VisitFields(Visitor* visitor) override;
+#else
+  Status VisitFields(Visitor* /* visitor */) override {
+    JXL_UNREACHABLE("JPEG transcoding support not enabled");
+  }
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+  void CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+                        int* MCU_rows) const;
+
+  int width;
+  int height;
+  uint32_t restart_interval;
+  std::vector<std::vector<uint8_t>> app_data;
+  std::vector<AppMarkerType> app_marker_type;
+  std::vector<std::vector<uint8_t>> com_data;
+  std::vector<JPEGQuantTable> quant;
+  std::vector<JPEGHuffmanCode> huffman_code;
+  std::vector<JPEGComponent> components;
+  std::vector<JPEGScanInfo> scan_info;
+  std::vector<uint8_t> marker_order;
+  std::vector<std::vector<uint8_t>> inter_marker_data;
+  std::vector<uint8_t> tail_data;
+
+  // Extra information required for bit-precise JPEG file reconstruction.
+
+  bool has_zero_padding_bit;
+  std::vector<uint8_t> padding_bits;
+};
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+// Set ICC profile in jpeg_data.
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data);
+#else
+static JXL_INLINE Status SetJPEGDataFromICC(const PaddedBytes& /* icc */,
+                                            jpeg::JPEGData* /* jpeg_data */) {
+  JXL_UNREACHABLE("JPEG transcoding support not enabled");
+}
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_JPEG_DATA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl.syms b/third-party/libjxl/libjxl/lib/jxl/jxl.syms
new file mode 100644
index 0000000000..0f398d7151
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jxl.syms
@@ -0,0 +1,5 @@
+{
+  extern "C" {
+    jpegxl_*;
+  };
+};
diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl.version b/third-party/libjxl/libjxl/lib/jxl/jxl.version
new file mode 100644
index 0000000000..26b0e9e54d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jxl.version
@@ -0,0 +1,17 @@
+JXL_0 {
+  global:
+    Jxl*;
+
+  local:
+    # Hide all the std namespace symbols. std namespace is explicitly marked
+    # as visibility(default) and header-only functions or methods (such as those
+    # from templates) should be exposed in shared libraries as weak symbols but
+    # this is only needed when we expose those types in the shared library API
+    # in any way. We don't use C++ std types in the API and we also don't
+    # support exceptions in the library.
+    # See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36022 for a discussion
+    # about this.
+    extern "C++" {
+      *std::*;
+    };
+};
diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms b/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms
new file mode 100644
index 0000000000..96bc568025
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms
@@ -0,0 +1 @@
+_Jxl*
diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc b/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc
new file mode 100644
index 0000000000..c43942a939
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc
@@ -0,0 +1,1569 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jxl.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <future>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/fake_parallel_runner_testonly.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+#include "tools/box/box.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+using extras::JXLCompressParams;
+using extras::JXLDecompressParams;
+using extras::PackedPixelFile;
+using test::ButteraugliDistance;
+using test::ComputeDistance2;
+using test::Roundtrip;
+using test::TestImage;
+using test::ThreadPoolForTests;
+
+#define JXL_TEST_NL 0  // Disabled in code
+
+TEST(JxlTest, RoundtripSinglePixel) {
+  TestImage t;
+  t.SetDimensions(1, 1).AddFrame().ZeroFill();
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 55);
+}
+
+TEST(JxlTest, RoundtripSinglePixelWithAlpha) {
+  TestImage t;
+  t.SetDimensions(1, 1).SetChannels(4).AddFrame().ZeroFill();
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 59);
+}
+
+// Changing serialized signature causes Decode to fail.
+#ifndef JXL_CRASH_ON_ERROR
+TEST(JxlTest, RoundtripMarker) {
+  TestImage t;
+  t.SetDimensions(1, 1).AddFrame().ZeroFill();
+  for (size_t i = 0; i < 2; ++i) {
+    std::vector<uint8_t> compressed;
+    EXPECT_TRUE(extras::EncodeImageJXL({}, t.ppf(), /*jpeg_bytes=*/nullptr,
+                                       &compressed));
+    compressed[i] ^= 0xFF;
+    PackedPixelFile ppf_out;
+    EXPECT_FALSE(extras::DecodeImageJXL(compressed.data(), compressed.size(),
+                                        {}, /*decodec_bytes=*/nullptr,
+                                        &ppf_out));
+  }
+}
+#endif
+
+TEST(JxlTest, RoundtripTinyFast) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(32, 32);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);
+  cparams.distance = 4.0f;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 181, 15);
+}
+
+TEST(JxlTest, RoundtripSmallD1) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  size_t xsize = t.ppf().info.xsize / 8;
+  size_t ysize = t.ppf().info.ysize / 8;
+  t.SetDimensions(xsize, ysize);
+
+  {
+    PackedPixelFile ppf_out;
+    EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 816, 40);
+    EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.888));
+  }
+
+  // With a lower intensity target than the default, the bitrate should be
+  // smaller.
+  t.ppf().info.intensity_target = 100.0f;
+
+  {
+    PackedPixelFile ppf_out;
+    EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 659, 20);
+    EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.3));
+    EXPECT_EQ(ppf_out.info.intensity_target, t.ppf().info.intensity_target);
+  }
+}
+TEST(JxlTest, RoundtripResample2) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3);  // kFalcon
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 18500, 200);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(90));
+}
+
+TEST(JxlTest, RoundtripResample2Slow) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9);  // kTortoise
+  cparams.distance = 10.0;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 3888, 200);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(250));
+}
+
+TEST(JxlTest, RoundtripResample2MT) {
+  ThreadPoolForTests pool(4);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  // image has to be large enough to have multiple groups after downsampling
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3);  // kFalcon
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 223310, 2000);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(340));
+}
+
+// Roundtrip the image using a parallel runner that executes single-threaded but
+// in random order.
+TEST(JxlTest, RoundtripOutOfOrderProcessing) {
+  FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+  ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  // Image size is selected so that the block border needed is larger than the
+  // amount of pixels available on the next block.
+  t.SetDimensions(513, 515);
+
+  JXLCompressParams cparams;
+  // Force epf so we end up needing a lot of border.
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 22999, 400);
+  EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 1.35);
+}
+
+TEST(JxlTest, RoundtripOutOfOrderProcessingBorder) {
+  FakeParallelRunner fake_pool(/*order_seed=*/47, /*num_threads=*/8);
+  ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  // Image size is selected so that the block border needed is larger than the
+  // amount of pixels available on the next block.
+  t.SetDimensions(513, 515);
+
+  JXLCompressParams cparams;
+  // Force epf so we end up needing a lot of border.
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 11015, 200);
+  EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 2.9);
+}
+
+TEST(JxlTest, RoundtripResample4) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 4);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 5758, 100);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(22));
+}
+
+TEST(JxlTest, RoundtripResample8) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 8);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 2036, 50);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(50));
+}
+
+TEST(JxlTest, RoundtripUnalignedD2) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  size_t xsize = t.ppf().info.xsize / 12;
+  size_t ysize = t.ppf().info.ysize / 7;
+  t.SetDimensions(xsize, ysize);
+
+  JXLCompressParams cparams;
+  cparams.distance = 2.0;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 506, 30);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.72));
+}
+
+TEST(JxlTest, RoundtripMultiGroup) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+  auto test = [&](jxl::SpeedTier speed_tier, float target_distance,
+                  size_t expected_size, float expected_distance) {
+    ThreadPoolForTests pool(4);
+    JXLCompressParams cparams;
+    int64_t effort = 10 - static_cast<int>(speed_tier);
+    cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, effort);
+    cparams.distance = target_distance;
+
+    PackedPixelFile ppf_out;
+    EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), expected_size,
+                700);
+    EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out),
+                IsSlightlyBelow(expected_distance));
+  };
+
+  auto run_kitten = std::async(std::launch::async, test, SpeedTier::kKitten,
+                               1.0f, 55602u, 11.7);
+  auto run_wombat = std::async(std::launch::async, test, SpeedTier::kWombat,
+                               2.0f, 33624u, 20.0);
+}
+
+TEST(JxlTest, RoundtripRGBToGrayscale) {
+  ThreadPoolForTests pool(4);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  io.ShrinkTo(600, 1024);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0f;
+  cparams.speed_tier = SpeedTier::kFalcon;
+
+  JXLDecompressParams dparams;
+  dparams.color_space = "Gra_D65_Rel_SRG";
+
+  CodecInOut io2;
+  EXPECT_FALSE(io.Main().IsGray());
+  size_t compressed_size;
+  JXL_EXPECT_OK(
+      Roundtrip(&io, cparams, dparams, &io2, _, &compressed_size, &pool));
+  EXPECT_LE(compressed_size, 65000u);
+  EXPECT_TRUE(io2.Main().IsGray());
+
+  // Convert original to grayscale here, because TransformTo refuses to
+  // convert between grayscale and RGB.
+  ColorEncoding srgb_lin = ColorEncoding::LinearSRGB(/*is_gray=*/false);
+  ASSERT_TRUE(io.frames[0].TransformTo(srgb_lin, GetJxlCms()));
+  Image3F* color = io.Main().color();
+  for (size_t y = 0; y < color->ysize(); ++y) {
+    float* row_r = color->PlaneRow(0, y);
+    float* row_g = color->PlaneRow(1, y);
+    float* row_b = color->PlaneRow(2, y);
+    for (size_t x = 0; x < color->xsize(); ++x) {
+      float luma = 0.2126 * row_r[x] + 0.7152 * row_g[x] + 0.0722 * row_b[x];
+      row_r[x] = row_g[x] = row_b[x] = luma;
+    }
+  }
+  ColorEncoding srgb_gamma = ColorEncoding::SRGB(/*is_gray=*/false);
+  ASSERT_TRUE(io.frames[0].TransformTo(srgb_gamma, GetJxlCms()));
+  io.metadata.m.color_encoding = io2.Main().c_current();
+  io.Main().OverrideProfile(io2.Main().c_current());
+  EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr, &pool),
+              IsSlightlyBelow(1.36));
+}
+
+TEST(JxlTest, RoundtripLargeFast) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 445555, 5000);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(100));
+}
+
+TEST(JxlTest, RoundtripDotsForceEpf) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 41777, 300);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(18));
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(JxlTest, RoundtripD2Consistent) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+  cparams.distance = 2.0;
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  for (size_t xsize = 48; xsize > 40; --xsize) {
+    t.SetDimensions(xsize, 15);
+
+    PackedPixelFile ppf2;
+    const size_t size2 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2);
+
+    PackedPixelFile ppf3;
+    const size_t size3 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf3);
+
+    // Exact same compressed size.
+    EXPECT_EQ(size2, size3);
+
+    // Exact same distance.
+    const float dist2 = ComputeDistance2(t.ppf(), ppf2);
+    const float dist3 = ComputeDistance2(t.ppf(), ppf3);
+    EXPECT_EQ(dist2, dist3);
+  }
+}
+
+// Same as above, but for full image, testing multiple groups.
+TEST(JxlTest, RoundtripLargeConsistent) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+  cparams.distance = 2.0;
+
+  auto roundtrip_and_compare = [&]() {
+    ThreadPoolForTests pool(8);
+    PackedPixelFile ppf2;
+    size_t size = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2);
+    double dist = ComputeDistance2(t.ppf(), ppf2);
+    return std::tuple<size_t, double>(size, dist);
+  };
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  auto future2 = std::async(std::launch::async, roundtrip_and_compare);
+  auto future3 = std::async(std::launch::async, roundtrip_and_compare);
+
+  const auto result2 = future2.get();
+  const auto result3 = future3.get();
+
+  // Exact same compressed size.
+  EXPECT_EQ(std::get<0>(result2), std::get<0>(result3));
+
+  // Exact same distance.
+  EXPECT_EQ(std::get<1>(result2), std::get<1>(result3));
+}
+
+TEST(JxlTest, RoundtripSmallNL) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  size_t xsize = t.ppf().info.xsize / 8;
+  size_t ysize = t.ppf().info.ysize / 8;
+  t.SetDimensions(xsize, ysize);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 801, 45);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1));
+}
+
+TEST(JxlTest, RoundtripNoGaborishNoAR) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 0);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 38900, 200);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.8));
+}
+
+TEST(JxlTest, RoundtripSmallNoGaborish) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  size_t xsize = t.ppf().info.xsize / 8;
+  size_t ysize = t.ppf().info.ysize / 8;
+  t.SetDimensions(xsize, ysize);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 811, 20);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1));
+}
+
+TEST(JxlTest, RoundtripSmallPatchesAlpha) {
+  ThreadPool* pool = nullptr;
+  TestImage t;
+  t.SetDimensions(256, 256).SetChannels(4);
+  t.SetColorEncoding("RGB_D65_SRG_Rel_Lin");
+  TestImage::Frame frame = t.AddFrame();
+  frame.ZeroFill();
+  // This pattern should be picked up by the patch detection heuristics.
+  for (size_t y = 0; y < t.ppf().info.ysize; ++y) {
+    for (size_t x = 0; x < t.ppf().info.xsize; ++x) {
+      if (x % 4 == 0 && (y / 32) % 4 == 0) {
+        frame.SetValue(y, x, 1, 127.0f / 255.0f);
+      }
+      frame.SetValue(y, x, 3, 1.0f);
+    }
+  }
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+  cparams.distance = 0.1f;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 597, 100);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.018f));
+}
+
+TEST(JxlTest, RoundtripSmallPatches) {
+  ThreadPool* pool = nullptr;
+  TestImage t;
+  t.SetDimensions(256, 256);
+  t.SetColorEncoding("RGB_D65_SRG_Rel_Lin");
+  TestImage::Frame frame = t.AddFrame();
+  frame.ZeroFill();
+  // This pattern should be picked up by the patch detection heuristics.
+  for (size_t y = 0; y < t.ppf().info.ysize; ++y) {
+    for (size_t x = 0; x < t.ppf().info.xsize; ++x) {
+      if (x % 4 == 0 && (y / 32) % 4 == 0) {
+        frame.SetValue(y, x, 1, 127.0f / 255.0f);
+      }
+    }
+  }
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSquirrel
+  cparams.distance = 0.1f;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 486, 100);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.018f));
+}
+
+// TODO(szabadka) Add encoder and decoder API functions that accept frame
+// buffers in arbitrary unsigned and floating point formats, and then roundtrip
+// test the lossless codepath to make sure the exact binary representations
+// are preserved.
+#if 0
+TEST(JxlTest, RoundtripImageBundleOriginalBits) {
+  // Image does not matter, only io.metadata.m and io2.metadata.m are tested.
+  Image3F image(1, 1);
+  ZeroFillImage(&image);
+  CodecInOut io;
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  io.SetFromImage(std::move(image), ColorEncoding::LinearSRGB());
+
+  CompressParams cparams;
+
+  // Test unsigned integers from 1 to 32 bits
+  for (uint32_t bit_depth = 1; bit_depth <= 32; bit_depth++) {
+    if (bit_depth == 32) {
+      // TODO(lode): allow testing 32, however the code below ends up in
+      // enc_modular which does not support 32. We only want to test the header
+      // encoding though, so try without modular.
+      break;
+    }
+
+    io.metadata.m.SetUintSamples(bit_depth);
+    CodecInOut io2;
+    JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+
+    EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(0u, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+    EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits());
+  }
+
+  // Test various existing and non-existing floating point formats
+  for (uint32_t bit_depth = 8; bit_depth <= 32; bit_depth++) {
+    if (bit_depth != 32) {
+      // TODO: test other float types once they work
+      break;
+    }
+
+    uint32_t exponent_bit_depth;
+    if (bit_depth < 10) {
+      exponent_bit_depth = 2;
+    } else if (bit_depth < 12) {
+      exponent_bit_depth = 3;
+    } else if (bit_depth < 16) {
+      exponent_bit_depth = 4;
+    } else if (bit_depth < 20) {
+      exponent_bit_depth = 5;
+    } else if (bit_depth < 24) {
+      exponent_bit_depth = 6;
+    } else if (bit_depth < 28) {
+      exponent_bit_depth = 7;
+    } else {
+      exponent_bit_depth = 8;
+    }
+
+    io.metadata.m.bit_depth.bits_per_sample = bit_depth;
+    io.metadata.m.bit_depth.floating_point_sample = true;
+    io.metadata.m.bit_depth.exponent_bits_per_sample = exponent_bit_depth;
+
+    CodecInOut io2;
+    JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2));
+
+    EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_TRUE(io2.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(exponent_bit_depth,
+              io2.metadata.m.bit_depth.exponent_bits_per_sample);
+    EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits());
+  }
+}
+#endif
+
+TEST(JxlTest, RoundtripGrayscale) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  ASSERT_NE(io.xsize(), 0u);
+  io.ShrinkTo(128, 128);
+  EXPECT_TRUE(io.Main().IsGray());
+  EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+
+  {
+    CompressParams cparams;
+    cparams.butteraugli_distance = 1.0;
+
+    PaddedBytes compressed;
+    EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                           aux_out));
+    CodecInOut io2;
+    EXPECT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &io2));
+    EXPECT_TRUE(io2.Main().IsGray());
+
+    EXPECT_LE(compressed.size(), 7000u);
+    EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                    GetJxlCms(),
+                                    /*distmap=*/nullptr),
+                IsSlightlyBelow(1.6));
+  }
+
+  // Test with larger butteraugli distance and other settings enabled so
+  // different jxl codepaths trigger.
+  {
+    CompressParams cparams;
+    cparams.butteraugli_distance = 8.0;
+
+    PaddedBytes compressed;
+    EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                           aux_out));
+    CodecInOut io2;
+    EXPECT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &io2));
+    EXPECT_TRUE(io2.Main().IsGray());
+
+    EXPECT_LE(compressed.size(), 1300u);
+    EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                    GetJxlCms(),
+                                    /*distmap=*/nullptr),
+                IsSlightlyBelow(6.0));
+  }
+
+  {
+    CompressParams cparams;
+    cparams.butteraugli_distance = 1.0;
+
+    PaddedBytes compressed;
+    EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                           aux_out));
+
+    CodecInOut io2;
+    JXLDecompressParams dparams;
+    dparams.color_space = "RGB_D65_SRG_Rel_SRG";
+    EXPECT_TRUE(
+        test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+    EXPECT_FALSE(io2.Main().IsGray());
+
+    EXPECT_LE(compressed.size(), 7000u);
+    EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                    GetJxlCms(),
+                                    /*distmap=*/nullptr),
+                IsSlightlyBelow(1.6));
+  }
+}
+
+TEST(JxlTest, RoundtripAlpha) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+  ASSERT_NE(io.xsize(), 0u);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+  io.ShrinkTo(300, 300);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+
+  EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(
+      EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out));
+
+  EXPECT_LE(compressed.size(), 10077u);
+
+  for (bool use_image_callback : {false, true}) {
+    for (bool unpremul_alpha : {false, true}) {
+      CodecInOut io2;
+      JXLDecompressParams dparams;
+      dparams.use_image_callback = use_image_callback;
+      dparams.unpremultiply_alpha = unpremul_alpha;
+      EXPECT_TRUE(
+          test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+      EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames,
+                                      ButteraugliParams(), GetJxlCms(),
+                                      /*distmap=*/nullptr),
+                  IsSlightlyBelow(1.15));
+    }
+  }
+}
+
+namespace {
+// Performs "PremultiplyAlpha" for each ImageBundle (preview/frames).
+bool PremultiplyAlpha(CodecInOut& io) {
+  const auto doPremultiplyAlpha = [](ImageBundle& bundle) {
+    if (!bundle.HasAlpha()) return;
+    if (!bundle.HasColor()) return;
+    auto* color = bundle.color();
+    const auto* alpha = bundle.alpha();
+    JXL_CHECK(color->ysize() == alpha->ysize());
+    JXL_CHECK(color->xsize() == alpha->xsize());
+    for (size_t y = 0; y < color->ysize(); y++) {
+      ::jxl::PremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y),
+                              color->PlaneRow(2, y), alpha->Row(y),
+                              color->xsize());
+    }
+  };
+  ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha);
+  if (eci == nullptr || eci->alpha_associated) return false;
+  if (io.metadata.m.have_preview) {
+    doPremultiplyAlpha(io.preview_frame);
+  }
+  for (ImageBundle& ib : io.frames) {
+    doPremultiplyAlpha(ib);
+  }
+  eci->alpha_associated = true;
+  return true;
+}
+
+bool UnpremultiplyAlpha(CodecInOut& io) {
+  const auto doUnpremultiplyAlpha = [](ImageBundle& bundle) {
+    if (!bundle.HasAlpha()) return;
+    if (!bundle.HasColor()) return;
+    auto* color = bundle.color();
+    const auto* alpha = bundle.alpha();
+    JXL_CHECK(color->ysize() == alpha->ysize());
+    JXL_CHECK(color->xsize() == alpha->xsize());
+    for (size_t y = 0; y < color->ysize(); y++) {
+      ::jxl::UnpremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y),
+                                color->PlaneRow(2, y), alpha->Row(y),
+                                color->xsize());
+    }
+  };
+  ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha);
+  if (eci == nullptr || !eci->alpha_associated) return false;
+  if (io.metadata.m.have_preview) {
+    doUnpremultiplyAlpha(io.preview_frame);
+  }
+  for (ImageBundle& ib : io.frames) {
+    doUnpremultiplyAlpha(ib);
+  }
+  eci->alpha_associated = false;
+  return true;
+}
+}  // namespace
+
+TEST(JxlTest, RoundtripAlphaPremultiplied) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io, io_nopremul;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_nopremul));
+
+  ASSERT_NE(io.xsize(), 0u);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+  io.ShrinkTo(300, 300);
+  io_nopremul.ShrinkTo(300, 300);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  cparams.SetCms(GetJxlCms());
+
+  EXPECT_FALSE(io.Main().AlphaIsPremultiplied());
+  EXPECT_TRUE(PremultiplyAlpha(io));
+  EXPECT_TRUE(io.Main().AlphaIsPremultiplied());
+
+  EXPECT_FALSE(io_nopremul.Main().AlphaIsPremultiplied());
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(
+      EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out));
+  EXPECT_LE(compressed.size(), 10000u);
+
+  for (bool use_image_callback : {false, true}) {
+    for (bool unpremul_alpha : {false, true}) {
+      for (bool use_uint8 : {false, true}) {
+        printf(
+            "Testing premultiplied alpha using %s %s requesting "
+            "%spremultiplied output.\n",
+            use_uint8 ? "uint8" : "float",
+            use_image_callback ? "image callback" : "image_buffer",
+            unpremul_alpha ? "un" : "");
+        CodecInOut io2;
+        JXLDecompressParams dparams;
+        dparams.use_image_callback = use_image_callback;
+        dparams.unpremultiply_alpha = unpremul_alpha;
+        if (use_uint8) {
+          dparams.accepted_formats = {
+              {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}};
+        }
+        EXPECT_TRUE(
+            test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+
+        EXPECT_EQ(unpremul_alpha, !io2.Main().AlphaIsPremultiplied());
+        if (!unpremul_alpha) {
+          EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames,
+                                          ButteraugliParams(), GetJxlCms(),
+                                          /*distmap=*/nullptr),
+                      IsSlightlyBelow(1.111));
+          EXPECT_TRUE(UnpremultiplyAlpha(io2));
+          EXPECT_FALSE(io2.Main().AlphaIsPremultiplied());
+        }
+        EXPECT_THAT(ButteraugliDistance(io_nopremul.frames, io2.frames,
+                                        ButteraugliParams(), GetJxlCms(),
+                                        /*distmap=*/nullptr),
+                    IsSlightlyBelow(1.55));
+      }
+    }
+  }
+}
+
+TEST(JxlTest, RoundtripAlphaResampling) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 5);  // kHare
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 13155, 130);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(5.2));
+}
+
+TEST(JxlTest, RoundtripAlphaResamplingOnlyAlpha) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3);  // kFalcon
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 33571, 400);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.49));
+}
+
+TEST(JxlTest, RoundtripAlphaNonMultipleOf8) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(12, 12);
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+  EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 107, 10);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.95));
+}
+
+TEST(JxlTest, RoundtripAlpha16) {
+  ThreadPoolForTests pool(4);
+  // The image is wider than 512 pixels to ensure multiple groups are tested.
+  size_t xsize = 1200, ysize = 160;
+  TestImage t;
+  t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+  TestImage::Frame frame = t.AddFrame();
+  // Generate 16-bit pattern that uses various colors and alpha values.
+  const float mul = 1.0f / 65535;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = y * 65535 / ysize;
+      uint16_t g = x * 65535 / xsize;
+      uint16_t b = (y + x) * 65535 / (xsize + ysize);
+      frame.SetValue(y, x, 0, r * mul);
+      frame.SetValue(y, x, 1, g * mul);
+      frame.SetValue(y, x, 2, b * mul);
+      frame.SetValue(y, x, 3, g * mul);
+    }
+  }
+
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 6);  // kWombat
+  cparams.distance = 0.5;
+
+  PackedPixelFile ppf_out;
+  // TODO(szabadka) Investigate big size difference on i686
+  // This still keeps happening (2023-04-18).
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 3466, 120);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.65));
+}
+
+namespace {
+JXLCompressParams CompressParamsForLossless() {
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR, 1);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, 1);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 6);  // Weighted
+  cparams.distance = 0;
+  return cparams;
+}
+}  // namespace
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 223058);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8ThunderGradient)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 2);             // kThunder
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 5);  // Gradient
+
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 261684);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8LightningGradient)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1);  // kLightning
+
+  PackedPixelFile ppf_out;
+  // Lax comparison because different SIMD will cause different compression.
+  EXPECT_THAT(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out),
+              IsSlightlyBelow(286848u));
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8Falcon)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3);  // kFalcon
+
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 230766);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, RoundtripLossless8Alpha) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_EQ(t.ppf().info.alpha_bits, 8);
+  EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8);
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 251470);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+  EXPECT_EQ(ppf_out.info.alpha_bits, 8);
+  EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripLossless16Alpha) {
+  ThreadPool* pool = nullptr;
+  size_t xsize = 1200, ysize = 160;
+  TestImage t;
+  t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+  TestImage::Frame frame = t.AddFrame();
+  // Generate 16-bit pattern that uses various colors and alpha values.
+  const float mul = 1.0f / 65535;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = y * 65535 / ysize;
+      uint16_t g = x * 65535 / xsize + 37;
+      uint16_t b = (y + x) * 65535 / (xsize + ysize);
+      frame.SetValue(y, x, 0, r * mul);
+      frame.SetValue(y, x, 1, g * mul);
+      frame.SetValue(y, x, 2, b * mul);
+      frame.SetValue(y, x, 3, g * mul);
+    }
+  }
+  ASSERT_EQ(t.ppf().info.bits_per_sample, 16);
+  ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  // TODO(szabadka) Investigate big size difference on i686
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 4884, 100);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+  EXPECT_EQ(ppf_out.info.alpha_bits, 16);
+  EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripLossless16AlphaNotMisdetectedAs8Bit) {
+  ThreadPool* pool = nullptr;
+  size_t xsize = 128, ysize = 128;
+  TestImage t;
+  t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+  TestImage::Frame frame = t.AddFrame();
+  // All 16-bit values, both color and alpha, of this image are below 64.
+  // This allows testing if a code path wrongly concludes it's an 8-bit instead
+  // of 16-bit image (or even 6-bit).
+  const float mul = 1.0f / 65535;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = y * 64 / ysize;
+      uint16_t g = x * 64 / xsize + 37;
+      uint16_t b = (y + x) * 64 / (xsize + ysize);
+      frame.SetValue(y, x, 0, r * mul);
+      frame.SetValue(y, x, 1, g * mul);
+      frame.SetValue(y, x, 2, b * mul);
+      frame.SetValue(y, x, 3, g * mul);
+    }
+  }
+  ASSERT_EQ(t.ppf().info.bits_per_sample, 16);
+  ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 591, 50);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+  EXPECT_EQ(ppf_out.info.bits_per_sample, 16);
+  EXPECT_EQ(ppf_out.info.alpha_bits, 16);
+  EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripDots) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+  EXPECT_EQ(t.ppf().color_encoding.transfer_function,
+            JXL_TRANSFER_FUNCTION_SRGB);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSkirrel
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1);
+  cparams.distance = 0.04;
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 273333, 4000);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.35));
+}
+
+TEST(JxlTest, RoundtripNoise) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_NE(t.ppf().info.xsize, 0);
+  EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+  EXPECT_EQ(t.ppf().color_encoding.transfer_function,
+            JXL_TRANSFER_FUNCTION_SRGB);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);  // kSkirrel
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_NOISE, 1);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 39261, 750);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.35));
+}
+
+TEST(JxlTest, RoundtripLossless8Gray) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  TestImage t;
+  t.SetColorEncoding("Gra_D65_Rel_SRG").DecodeFromBytes(orig).ClearMetadata();
+  EXPECT_EQ(t.ppf().color_encoding.color_space, JXL_COLOR_SPACE_GRAY);
+  EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 92185);
+  EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+  EXPECT_EQ(ppf_out.color_encoding.color_space, JXL_COLOR_SPACE_GRAY);
+  EXPECT_EQ(ppf_out.info.bits_per_sample, 8);
+}
+
+TEST(JxlTest, RoundtripAnimation) {
+  if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) {
+    fprintf(stderr, "Skipping test because of missing GIF decoder.\n");
+    return;
+  }
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  EXPECT_EQ(4, t.ppf().frames.size());
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  EXPECT_THAT(Roundtrip(t.ppf(), {}, dparams, pool, &ppf_out),
+              IsSlightlyBelow(2600));
+
+  t.CoalesceGIFAnimationWithAlpha();
+  ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+  EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out),
+#if JXL_HIGH_PRECISION
+            1.55);
+#else
+            1.75);
+#endif
+}
+
+TEST(JxlTest, RoundtripLosslessAnimation) {
+  if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) {
+    fprintf(stderr, "Skipping test because of missing GIF decoder.\n");
+    return;
+  }
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  EXPECT_EQ(4, t.ppf().frames.size());
+
+  JXLCompressParams cparams = CompressParamsForLossless();
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out),
+              IsSlightlyBelow(958));
+
+  t.CoalesceGIFAnimationWithAlpha();
+  ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+  EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 5e-4);
+}
+
+TEST(JxlTest, RoundtripAnimationPatches) {
+  if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) {
+    fprintf(stderr, "Skipping test because of missing GIF decoder.\n");
+    return;
+  }
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/animation_patches.gif");
+
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata();
+  ASSERT_EQ(2u, t.ppf().frames.size());
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_PATCHES, 1);
+
+  JXLDecompressParams dparams;
+  dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+  PackedPixelFile ppf_out;
+  // 40k with no patches, 27k with patch frames encoded multiple times.
+  EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out),
+              IsSlightlyBelow(16789));
+  EXPECT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+  // >10 with broken patches
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.0999));
+}
+
+size_t RoundtripJpeg(const PaddedBytes& jpeg_in, ThreadPool* pool) {
+  std::vector<uint8_t> jpeg_bytes(jpeg_in.data(),
+                                  jpeg_in.data() + jpeg_in.size());
+  std::vector<uint8_t> compressed;
+  EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes,
+                                     &compressed));
+
+  jxl::JXLDecompressParams dparams;
+  test::DefaultAcceptedFormats(dparams);
+  test::SetThreadParallelRunner(dparams, pool);
+  std::vector<uint8_t> out;
+  jxl::PackedPixelFile ppf;
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+                             nullptr, &ppf, &out));
+  EXPECT_EQ(out.size(), jpeg_in.size());
+  size_t failures = 0;
+  for (size_t i = 0; i < std::min(out.size(), jpeg_in.size()); i++) {
+    if (out[i] != jpeg_in[i]) {
+      EXPECT_EQ(out[i], jpeg_in[i])
+          << "byte mismatch " << i << " " << out[i] << " != " << jpeg_in[i];
+      if (++failures > 4) {
+        return compressed.size();
+      }
+    }
+  }
+  return compressed.size();
+}
+
+void RoundtripJpegToPixels(const PaddedBytes& jpeg_in,
+                           JXLDecompressParams dparams, ThreadPool* pool,
+                           PackedPixelFile* ppf_out) {
+  std::vector<uint8_t> jpeg_bytes(jpeg_in.data(),
+                                  jpeg_in.data() + jpeg_in.size());
+  std::vector<uint8_t> compressed;
+  EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes,
+                                     &compressed));
+
+  test::DefaultAcceptedFormats(dparams);
+  test::SetThreadParallelRunner(dparams, pool);
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+                             nullptr, ppf_out, nullptr));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg");
+  // JPEG size is 696,659 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 568940u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels)) {
+  TEST_LIBJPEG_SUPPORT();
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg");
+  TestImage t;
+  t.DecodeFromBytes(orig);
+
+  PackedPixelFile ppf_out;
+  RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(12));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420)) {
+  TEST_LIBJPEG_SUPPORT();
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+  TestImage t;
+  t.DecodeFromBytes(orig);
+
+  PackedPixelFile ppf_out;
+  RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(11));
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420EarlyFlush)) {
+  TEST_LIBJPEG_SUPPORT();
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+  TestImage t;
+  t.DecodeFromBytes(orig);
+
+  JXLDecompressParams dparams;
+  dparams.max_downsampling = 8;
+
+  PackedPixelFile ppf_out;
+  RoundtripJpegToPixels(orig, dparams, &pool, &ppf_out);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4410));
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420Mul16)) {
+  TEST_LIBJPEG_SUPPORT();
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower_cropped.jpg");
+  TestImage t;
+  t.DecodeFromBytes(orig);
+
+  PackedPixelFile ppf_out;
+  RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4));
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels_asymmetric)) {
+  TEST_LIBJPEG_SUPPORT();
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg");
+  TestImage t;
+  t.DecodeFromBytes(orig);
+
+  PackedPixelFile ppf_out;
+  RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+  EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(10));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionGray)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_gray.jpg");
+  // JPEG size is 456,528 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 387496u, 200);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+  // JPEG size is 546,797 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455560u, 10);
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_luma_subsample)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "jxl/flower/flower.png.im_q85_luma_subsample.jpg");
+  // JPEG size is 400,724 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 325354u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444_12)) {
+  // 444 JPEG that has an interesting sampling-factor (1x2, 1x2, 1x2).
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444_1x2.jpg");
+  // JPEG size is 703,874 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 569679u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression422)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_422.jpg");
+  // JPEG size is 522,057 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 499282u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression440)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_440.jpg");
+  // JPEG size is 603,623 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 501151u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_asymmetric)) {
+  // 2x vertical downsample of one chroma channel, 2x horizontal downsample of
+  // the other.
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg");
+  // JPEG size is 604,601 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 500602u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420Progr)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420_progr.jpg");
+  // JPEG size is 522,057 bytes.
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455499u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionMetadata)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/jpeg_reconstruction/1x1_exif_xmp.jpg");
+  // JPEG size is 4290 bytes
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 1400u, 30);
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionOrientationICC)) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig =
+      jxl::test::ReadTestData("jxl/jpeg_reconstruction/sideways_bench.jpg");
+  // JPEG size is 15252 bytes
+  EXPECT_NEAR(RoundtripJpeg(orig, &pool), 12000u, 470);
+  // TODO(jon): investigate why 'Cross-compiling i686-linux-gnu' produces a
+  // larger result
+}
+
+TEST(JxlTest, RoundtripProgressive) {
+  ThreadPoolForTests pool(4);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 1);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 62160, 750);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.4));
+}
+
+TEST(JxlTest, RoundtripProgressiveLevel2Slow) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  TestImage t;
+  t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+  JXLCompressParams cparams;
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9);  // kTortoise
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1);
+  cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
+
+  PackedPixelFile ppf_out;
+  EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 71111, 1000);
+  EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.17));
+}
+
+TEST(JxlTest, RoundtripUnsignedCustomBitdepthLossless) {
+  ThreadPool* pool = nullptr;
+  for (uint32_t num_channels = 1; num_channels < 6; ++num_channels) {
+    for (JxlEndianness endianness : {JXL_LITTLE_ENDIAN, JXL_BIG_ENDIAN}) {
+      for (uint32_t bitdepth = 3; bitdepth <= 16; ++bitdepth) {
+        if (bitdepth <= 8 && endianness == JXL_BIG_ENDIAN) continue;
+        printf("Testing %u channel unsigned %u bit %s endian lossless.\n",
+               num_channels, bitdepth,
+               endianness == JXL_LITTLE_ENDIAN ? "little" : "big");
+        TestImage t;
+        t.SetDimensions(256, 256).SetChannels(num_channels);
+        t.SetAllBitDepths(bitdepth).SetEndianness(endianness);
+        TestImage::Frame frame = t.AddFrame();
+        frame.RandomFill();
+
+        JXLCompressParams cparams = CompressParamsForLossless();
+        cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+        JXLDecompressParams dparams;
+        dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+        dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+        PackedPixelFile ppf_out;
+        Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out);
+
+        ASSERT_TRUE(test::SamePixels(t.ppf(), ppf_out));
+      }
+    }
+  }
+}
+
+TEST(JxlTest, LosslessPNMRoundtrip) {
+  static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"};
+  static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"};
+  for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) {
+    for (size_t channels = 1; channels <= 4; ++channels) {
+      if (bit_depth == 1 && (channels == 2 || channels == 4)) continue;
+      std::string extension(kExtension[channels]);
+      std::string filename = "jxl/flower/flower_small." +
+                             std::string(kChannels[channels]) + ".depth" +
+                             std::to_string(bit_depth) + extension;
+      const PaddedBytes orig = jxl::test::ReadTestData(filename);
+      test::TestImage t;
+      if (channels < 3) t.SetColorEncoding("Gra_D65_Rel_SRG");
+      t.DecodeFromBytes(orig);
+
+      JXLCompressParams cparams = CompressParamsForLossless();
+      cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1);  // kLightning
+      cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+      JXLDecompressParams dparams;
+      dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+      dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+      PackedPixelFile ppf_out;
+      Roundtrip(t.ppf(), cparams, dparams, nullptr, &ppf_out);
+
+      extras::EncodedImage encoded;
+      auto encoder = extras::Encoder::FromExtension(extension);
+      ASSERT_TRUE(encoder.get());
+      ASSERT_TRUE(encoder->Encode(ppf_out, &encoded, nullptr));
+      ASSERT_EQ(encoded.bitstreams.size(), 1);
+      ASSERT_EQ(orig.size(), encoded.bitstreams[0].size());
+      EXPECT_EQ(0,
+                memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size()));
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h b/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h
new file mode 100644
index 0000000000..dd1d21c6f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LEHMER_CODE_H_
+#define LIB_JXL_LEHMER_CODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Permutation <=> factorial base representation (Lehmer code).
+
+using LehmerT = uint32_t;
+
+template <typename T>
+constexpr T ValueOfLowest1Bit(T t) {
+  return t & -t;
+}
+
+// Computes the Lehmer (factorial basis) code of permutation, an array of n
+// unique indices in [0..n), and stores it in code[0..len). N*logN time.
+// temp must have n + 1 elements but need not be initialized.
+template <typename PermutationT>
+void ComputeLehmerCode(const PermutationT* JXL_RESTRICT permutation,
+                       uint32_t* JXL_RESTRICT temp, const size_t n,
+                       LehmerT* JXL_RESTRICT code) {
+  for (size_t idx = 0; idx < n + 1; ++idx) temp[idx] = 0;
+
+  for (size_t idx = 0; idx < n; ++idx) {
+    const PermutationT s = permutation[idx];
+
+    // Compute sum in Fenwick tree
+    uint32_t penalty = 0;
+    uint32_t i = s + 1;
+    while (i != 0) {
+      penalty += temp[i];
+      i &= i - 1;  // clear lowest bit
+    }
+    JXL_DASSERT(s >= penalty);
+    code[idx] = s - penalty;
+    i = s + 1;
+    // Add operation in Fenwick tree
+    while (i < n + 1) {
+      temp[i] += 1;
+      i += ValueOfLowest1Bit(i);
+    }
+  }
+}
+
+// Decodes the Lehmer code in code[0..n) into permutation[0..n).
+// temp must have 1 << CeilLog2(n) elements but need not be initialized.
+template <typename PermutationT>
+void DecodeLehmerCode(const LehmerT* JXL_RESTRICT code,
+                      uint32_t* JXL_RESTRICT temp, size_t n,
+                      PermutationT* JXL_RESTRICT permutation) {
+  JXL_DASSERT(n != 0);
+  const size_t log2n = CeilLog2Nonzero(n);
+  const size_t padded_n = 1ull << log2n;
+
+  for (size_t i = 0; i < padded_n; i++) {
+    const int32_t i1 = static_cast<int32_t>(i + 1);
+    temp[i] = static_cast<uint32_t>(ValueOfLowest1Bit(i1));
+  }
+
+  for (size_t i = 0; i < n; i++) {
+    JXL_DASSERT(code[i] + i < n);
+    uint32_t rank = code[i] + 1;
+
+    // Extract i-th unused element via implicit order-statistics tree.
+    size_t bit = padded_n;
+    size_t next = 0;
+    for (size_t i = 0; i <= log2n; i++) {
+      const size_t cand = next + bit;
+      JXL_DASSERT(cand >= 1);
+      bit >>= 1;
+      if (temp[cand - 1] < rank) {
+        next = cand;
+        rank -= temp[cand - 1];
+      }
+    }
+
+    permutation[i] = next;
+
+    // Mark as used
+    next += 1;
+    while (next <= padded_n) {
+      temp[next - 1] -= 1;
+      next += ValueOfLowest1Bit(next);
+    }
+  }
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LEHMER_CODE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc b/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc
new file mode 100644
index 0000000000..acda762545
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/lehmer_code.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+template <typename PermutationT>
+struct WorkingSet {
+  explicit WorkingSet(size_t max_n)
+      : padded_n(1ull << CeilLog2Nonzero(max_n + 1)),
+        permutation(max_n),
+        temp(padded_n),
+        lehmer(max_n),
+        decoded(max_n) {}
+
+  size_t padded_n;
+  std::vector<PermutationT> permutation;
+  std::vector<uint32_t> temp;
+  std::vector<LehmerT> lehmer;
+  std::vector<PermutationT> decoded;
+};
+
+template <typename PermutationT>
+void Roundtrip(size_t n, WorkingSet<PermutationT>* ws) {
+  JXL_ASSERT(n != 0);
+  const size_t padded_n = 1ull << CeilLog2Nonzero(n);
+
+  Rng rng(n * 65537 + 13);
+
+  // Ensure indices fit into PermutationT
+  EXPECT_LE(n, 1ULL << (sizeof(PermutationT) * 8));
+
+  std::iota(ws->permutation.begin(), ws->permutation.begin() + n, 0);
+
+  // For various random permutations:
+  for (size_t rep = 0; rep < 3; ++rep) {
+    rng.Shuffle(ws->permutation.data(), n);
+
+    // Must decode to the same permutation
+    ComputeLehmerCode(ws->permutation.data(), ws->temp.data(), n,
+                      ws->lehmer.data());
+    memset(ws->temp.data(), 0, padded_n * 4);
+    DecodeLehmerCode(ws->lehmer.data(), ws->temp.data(), n, ws->decoded.data());
+
+    for (size_t i = 0; i < n; ++i) {
+      EXPECT_EQ(ws->permutation[i], ws->decoded[i]);
+    }
+  }
+}
+
+// Preallocates arrays and tests n = [begin, end).
+template <typename PermutationT>
+void RoundtripSizeRange(ThreadPool* pool, uint32_t begin, uint32_t end) {
+  ASSERT_NE(0u, begin);  // n = 0 not allowed.
+  std::vector<WorkingSet<PermutationT>> working_sets;
+
+  JXL_CHECK(RunOnPool(
+      pool, begin, end,
+      [&working_sets, end](const size_t num_threads) {
+        for (size_t i = 0; i < num_threads; i++) {
+          working_sets.emplace_back(end - 1);
+        }
+        return true;
+      },
+      [&working_sets](const uint32_t n, const size_t thread) {
+        Roundtrip(n, &working_sets[thread]);
+      },
+      "lehmer test"));
+}
+
+TEST(LehmerCodeTest, TestRoundtrips) {
+  test::ThreadPoolForTests pool(8);
+
+  RoundtripSizeRange<uint16_t>(&pool, 1, 1026);
+
+  // Ensures PermutationT can fit > 16 bit values.
+  RoundtripSizeRange<uint32_t>(&pool, 65536, 65540);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in b/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in
new file mode 100644
index 0000000000..4a7af65b7c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in
@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@PKGCONFIG_TARGET_LIBS@
+includedir=@PKGCONFIG_TARGET_INCLUDES@
+
+Name: libjxl
+Description: Loads and saves JPEG XL files
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl
+Libs.private: -lm
+Cflags: -I${includedir}
+Cflags.private: -DJXL_STATIC_DEFINE
diff --git a/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc b/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc
new file mode 100644
index 0000000000..5afe87617d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/loop_filter.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+LoopFilter::LoopFilter() { Bundle::Init(this); }
+Status LoopFilter::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  // Must come before AllDefault.
+
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &gab));
+  if (visitor->Conditional(gab)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &gab_custom));
+    if (visitor->Conditional(gab_custom)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_x_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_x_weight2));
+      if (std::abs(1.0f + (gab_x_weight1 + gab_x_weight2) * 4) < 1e-8) {
+        return JXL_FAILURE(
+            "Gaborish x weights lead to near 0 unnormalized kernel");
+      }
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_y_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_y_weight2));
+      if (std::abs(1.0f + (gab_y_weight1 + gab_y_weight2) * 4) < 1e-8) {
+        return JXL_FAILURE(
+            "Gaborish y weights lead to near 0 unnormalized kernel");
+      }
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_b_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_b_weight2));
+      if (std::abs(1.0f + (gab_b_weight1 + gab_b_weight2) * 4) < 1e-8) {
+        return JXL_FAILURE(
+            "Gaborish b weights lead to near 0 unnormalized kernel");
+      }
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 2, &epf_iters));
+  if (visitor->Conditional(epf_iters > 0)) {
+    if (visitor->Conditional(!nonserialized_is_modular)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sharp_custom));
+      if (visitor->Conditional(epf_sharp_custom)) {
+        for (size_t i = 0; i < kEpfSharpEntries; ++i) {
+          JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+              float(i) / float(kEpfSharpEntries - 1), &epf_sharp_lut[i]));
+        }
+      }
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_weight_custom));
+    if (visitor->Conditional(epf_weight_custom)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(40.0f, &epf_channel_scale[0]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(5.0f, &epf_channel_scale[1]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(3.5f, &epf_channel_scale[2]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.45f, &epf_pass1_zeroflush));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.6f, &epf_pass2_zeroflush));
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sigma_custom));
+    if (visitor->Conditional(epf_sigma_custom)) {
+      if (visitor->Conditional(!nonserialized_is_modular)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.46f, &epf_quant_mul));
+      }
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.9f, &epf_pass0_sigma_scale));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(6.5f, &epf_pass2_sigma_scale));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(0.6666666666666666f, &epf_border_sad_mul));
+    }
+    if (visitor->Conditional(nonserialized_is_modular)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.0f, &epf_sigma_for_modular));
+      if (epf_sigma_for_modular < 1e-8) {
+        return JXL_FAILURE("EPF: sigma for modular is too small");
+      }
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/loop_filter.h b/third-party/libjxl/libjxl/lib/jxl/loop_filter.h
new file mode 100644
index 0000000000..e4b418ba2b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/loop_filter.h
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LOOP_FILTER_H_
+#define LIB_JXL_LOOP_FILTER_H_
+
+// Parameters for loop filter(s), stored in each frame.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+struct LoopFilter : public Fields {
+  LoopFilter();
+  JXL_FIELDS_NAME(LoopFilter)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  size_t Padding() const {
+    static const size_t padding_per_epf_iter[4] = {0, 2, 3, 6};
+    return padding_per_epf_iter[epf_iters] + (gab ? 1 : 0);
+  }
+
+  mutable bool all_default;
+
+  // --- Gaborish convolution
+  bool gab;
+
+  bool gab_custom;
+  float gab_x_weight1;
+  float gab_x_weight2;
+  float gab_y_weight1;
+  float gab_y_weight2;
+  float gab_b_weight1;
+  float gab_b_weight2;
+
+  // --- Edge-preserving filter
+
+  // Number of EPF stages to apply. 0 means EPF disabled. 1 applies only the
+  // first stage, 2 applies both stages and 3 applies the first stage twice and
+  // the second stage once.
+  uint32_t epf_iters;
+
+  bool epf_sharp_custom;
+  enum { kEpfSharpEntries = 8 };
+  float epf_sharp_lut[kEpfSharpEntries];
+
+  bool epf_weight_custom;      // Custom weight params
+  float epf_channel_scale[3];  // Relative weight of each channel
+  float epf_pass1_zeroflush;   // Minimum weight for first pass
+  float epf_pass2_zeroflush;   // Minimum weight for second pass
+
+  bool epf_sigma_custom;        // Custom sigma parameters
+  float epf_quant_mul;          // Sigma is ~ this * quant
+  float epf_pass0_sigma_scale;  // Multiplier for sigma in pass 0
+  float epf_pass2_sigma_scale;  // Multiplier for sigma in the second pass
+  float epf_border_sad_mul;     // (inverse) multiplier for sigma on borders
+
+  float epf_sigma_for_modular;
+
+  uint64_t extensions;
+
+  bool nonserialized_is_modular = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LOOP_FILTER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/luminance.cc b/third-party/libjxl/libjxl/lib/jxl/luminance.cc
new file mode 100644
index 0000000000..10151f4267
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/luminance.cc
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/luminance.h"
+
+#include "lib/jxl/image_metadata.h"
+
+namespace jxl {
+
+void SetIntensityTarget(ImageMetadata* m) {
+  if (m->color_encoding.tf.IsPQ()) {
+    // Peak luminance of PQ as defined by SMPTE ST 2084:2014.
+    m->SetIntensityTarget(10000);
+  } else if (m->color_encoding.tf.IsHLG()) {
+    // Nominal display peak luminance used as a reference by
+    // Rec. ITU-R BT.2100-2.
+    m->SetIntensityTarget(1000);
+  } else {
+    // SDR
+    m->SetIntensityTarget(kDefaultIntensityTarget);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/luminance.h b/third-party/libjxl/libjxl/lib/jxl/luminance.h
new file mode 100644
index 0000000000..3181576823
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/luminance.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LUMINANCE_H_
+#define LIB_JXL_LUMINANCE_H_
+
+namespace jxl {
+
+// Chooses a default intensity target based on the transfer function of the
+// image, if known. For SDR images or images not known to be HDR, returns
+// kDefaultIntensityTarget, for images known to have PQ or HLG transfer function
+// returns a higher value.
+
+struct ImageMetadata;
+// TODO(eustas): rename
+void SetIntensityTarget(ImageMetadata* m);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LUMINANCE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h b/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h
new file mode 100644
index 0000000000..1a969bd4f0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h
@@ -0,0 +1,84 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MATRIX_OPS_H_
+#define LIB_JXL_MATRIX_OPS_H_
+
+// 3x3 matrix operations.
+
+#include <cmath>  // abs
+#include <cstddef>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Computes C = A * B, where A, B, C are 3x3 matrices.
+template <typename T>
+void Mul3x3Matrix(const T* a, const T* b, T* c) {
+  alignas(16) T temp[3];  // For transposed column
+  for (size_t x = 0; x < 3; x++) {
+    for (size_t z = 0; z < 3; z++) {
+      temp[z] = b[z * 3 + x];
+    }
+    for (size_t y = 0; y < 3; y++) {
+      double e = 0;
+      for (size_t z = 0; z < 3; z++) {
+        e += a[y * 3 + z] * temp[z];
+      }
+      c[y * 3 + x] = e;
+    }
+  }
+}
+
+// Computes C = A * B, where A is 3x3 matrix and B is vector.
+template <typename T>
+void Mul3x3Vector(const T* a, const T* b, T* c) {
+  for (size_t y = 0; y < 3; y++) {
+    double e = 0;
+    for (size_t x = 0; x < 3; x++) {
+      e += a[y * 3 + x] * b[x];
+    }
+    c[y] = e;
+  }
+}
+
+// Inverts a 3x3 matrix in place.
+template <typename T>
+Status Inv3x3Matrix(T* matrix) {
+  // Intermediate computation is done in double precision.
+  double temp[9];
+  temp[0] = static_cast<double>(matrix[4]) * matrix[8] -
+            static_cast<double>(matrix[5]) * matrix[7];
+  temp[1] = static_cast<double>(matrix[2]) * matrix[7] -
+            static_cast<double>(matrix[1]) * matrix[8];
+  temp[2] = static_cast<double>(matrix[1]) * matrix[5] -
+            static_cast<double>(matrix[2]) * matrix[4];
+  temp[3] = static_cast<double>(matrix[5]) * matrix[6] -
+            static_cast<double>(matrix[3]) * matrix[8];
+  temp[4] = static_cast<double>(matrix[0]) * matrix[8] -
+            static_cast<double>(matrix[2]) * matrix[6];
+  temp[5] = static_cast<double>(matrix[2]) * matrix[3] -
+            static_cast<double>(matrix[0]) * matrix[5];
+  temp[6] = static_cast<double>(matrix[3]) * matrix[7] -
+            static_cast<double>(matrix[4]) * matrix[6];
+  temp[7] = static_cast<double>(matrix[1]) * matrix[6] -
+            static_cast<double>(matrix[0]) * matrix[7];
+  temp[8] = static_cast<double>(matrix[0]) * matrix[4] -
+            static_cast<double>(matrix[1]) * matrix[3];
+  double det = matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6];
+  if (std::abs(det) < 1e-10) {
+    return JXL_FAILURE("Matrix determinant is too close to 0");
+  }
+  double idet = 1.0 / det;
+  for (size_t i = 0; i < 9; i++) {
+    matrix[i] = temp[i] * idet;
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MATRIX_OPS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc
new file mode 100644
index 0000000000..87727e75cd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/memory_manager_internal.h"
+
+#include <stdlib.h>
+
+namespace jxl {
+
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size) {
+  return malloc(size);
+}
+
+void MemoryManagerDefaultFree(void* opaque, void* address) { free(address); }
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h
new file mode 100644
index 0000000000..f8a5cd8d59
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+#define LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <jxl/memory_manager.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>  // memcpy
+
+#include <atomic>
+#include <memory>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Default alloc and free functions.
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size);
+void MemoryManagerDefaultFree(void* opaque, void* address);
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+static JXL_INLINE Status MemoryManagerInit(
+    JxlMemoryManager* self, const JxlMemoryManager* memory_manager) {
+  if (memory_manager) {
+    *self = *memory_manager;
+  } else {
+    memset(self, 0, sizeof(*self));
+  }
+  if (!self->alloc != !self->free) {
+    return false;
+  }
+  if (!self->alloc) self->alloc = jxl::MemoryManagerDefaultAlloc;
+  if (!self->free) self->free = jxl::MemoryManagerDefaultFree;
+
+  return true;
+}
+
+static JXL_INLINE void* MemoryManagerAlloc(
+    const JxlMemoryManager* memory_manager, size_t size) {
+  return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+static JXL_INLINE void MemoryManagerFree(const JxlMemoryManager* memory_manager,
+                                         void* address) {
+  return memory_manager->free(memory_manager->opaque, address);
+}
+
+// Helper class to be used as a deleter in a unique_ptr<T> call.
+class MemoryManagerDeleteHelper {
+ public:
+  explicit MemoryManagerDeleteHelper(const JxlMemoryManager* memory_manager)
+      : memory_manager_(memory_manager) {}
+
+  // Delete and free the passed pointer using the memory_manager.
+  template <typename T>
+  void operator()(T* address) const {
+    if (!address) {
+      return;
+    }
+    address->~T();
+    return memory_manager_->free(memory_manager_->opaque, address);
+  }
+
+ private:
+  const JxlMemoryManager* memory_manager_;
+};
+
+template <typename T>
+using MemoryManagerUniquePtr = std::unique_ptr<T, MemoryManagerDeleteHelper>;
+
+// Creates a new object T allocating it with the memory allocator into a
+// unique_ptr.
+template <typename T, typename... Args>
+JXL_INLINE MemoryManagerUniquePtr<T> MemoryManagerMakeUnique(
+    const JxlMemoryManager* memory_manager, Args&&... args) {
+  T* mem =
+      static_cast<T*>(memory_manager->alloc(memory_manager->opaque, sizeof(T)));
+  if (!mem) {
+    // Allocation error case.
+    return MemoryManagerUniquePtr<T>(nullptr,
+                                     MemoryManagerDeleteHelper(memory_manager));
+  }
+  return MemoryManagerUniquePtr<T>(new (mem) T(std::forward<Args>(args)...),
+                                   MemoryManagerDeleteHelper(memory_manager));
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h
new file mode 100644
index 0000000000..4c3a33a52a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h
@@ -0,0 +1,672 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+#define LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+namespace weighted {
+constexpr static size_t kNumPredictors = 4;
+constexpr static int64_t kPredExtraBits = 3;
+constexpr static int64_t kPredictionRound = ((1 << kPredExtraBits) >> 1) - 1;
+constexpr static size_t kNumProperties = 1;
+
+struct Header : public Fields {
+  JXL_FIELDS_NAME(WeightedPredictorHeader)
+  // TODO(janwas): move to cc file, avoid including fields.h.
+  Header() { Bundle::Init(this); }
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    if (visitor->AllDefault(*this, &all_default)) {
+      // Overwrite all serialized fields, but not any nonserialized_*.
+      visitor->SetDefault(this);
+      return true;
+    }
+    auto visit_p = [visitor](pixel_type val, pixel_type *p) {
+      uint32_t up = *p;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, val, &up));
+      *p = up;
+      return Status(true);
+    };
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(16, &p1C));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(10, &p2C));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Ca));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cb));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cc));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Cd));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Ce));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xd, &w[0]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[1]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[2]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[3]));
+    return true;
+  }
+
+  bool all_default;
+  pixel_type p1C = 0, p2C = 0, p3Ca = 0, p3Cb = 0, p3Cc = 0, p3Cd = 0, p3Ce = 0;
+  uint32_t w[kNumPredictors] = {};
+};
+
+struct State {
+  pixel_type_w prediction[kNumPredictors] = {};
+  pixel_type_w pred = 0;  // *before* removing the added bits.
+  std::vector<uint32_t> pred_errors[kNumPredictors];
+  std::vector<int32_t> error;
+  const Header header;
+
+  // Allows to approximate division by a number from 1 to 64.
+  //  for (int i = 0; i < 64; i++) divlookup[i] = (1 << 24) / (i + 1);
+
+  const uint32_t divlookup[64] = {
+      16777216, 8388608, 5592405, 4194304, 3355443, 2796202, 2396745, 2097152,
+      1864135,  1677721, 1525201, 1398101, 1290555, 1198372, 1118481, 1048576,
+      986895,   932067,  883011,  838860,  798915,  762600,  729444,  699050,
+      671088,   645277,  621378,  599186,  578524,  559240,  541200,  524288,
+      508400,   493447,  479349,  466033,  453438,  441505,  430185,  419430,
+      409200,   399457,  390167,  381300,  372827,  364722,  356962,  349525,
+      342392,   335544,  328965,  322638,  316551,  310689,  305040,  299593,
+      294337,   289262,  284359,  279620,  275036,  270600,  266305,  262144};
+
+  constexpr static pixel_type_w AddBits(pixel_type_w x) {
+    return uint64_t(x) << kPredExtraBits;
+  }
+
+  State(Header header, size_t xsize, size_t ysize) : header(header) {
+    // Extra margin to avoid out-of-bounds writes.
+    // All have space for two rows of data.
+    for (size_t i = 0; i < 4; i++) {
+      pred_errors[i].resize((xsize + 2) * 2);
+    }
+    error.resize((xsize + 2) * 2);
+  }
+
+  // Approximates 4+(maxweight<<24)/(x+1), avoiding division
+  JXL_INLINE uint32_t ErrorWeight(uint64_t x, uint32_t maxweight) const {
+    int shift = static_cast<int>(FloorLog2Nonzero(x + 1)) - 5;
+    if (shift < 0) shift = 0;
+    return 4 + ((maxweight * divlookup[x >> shift]) >> shift);
+  }
+
+  // Approximates the weighted average of the input values with the given
+  // weights, avoiding division. Weights must sum to at least 16.
+  JXL_INLINE pixel_type_w
+  WeightedAverage(const pixel_type_w *JXL_RESTRICT p,
+                  std::array<uint32_t, kNumPredictors> w) const {
+    uint32_t weight_sum = 0;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      weight_sum += w[i];
+    }
+    JXL_DASSERT(weight_sum > 15);
+    uint32_t log_weight = FloorLog2Nonzero(weight_sum);  // at least 4.
+    weight_sum = 0;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      w[i] >>= log_weight - 4;
+      weight_sum += w[i];
+    }
+    // for rounding.
+    pixel_type_w sum = (weight_sum >> 1) - 1;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      sum += p[i] * w[i];
+    }
+    return (sum * divlookup[weight_sum - 1]) >> 24;
+  }
+
+  template <bool compute_properties>
+  JXL_INLINE pixel_type_w Predict(size_t x, size_t y, size_t xsize,
+                                  pixel_type_w N, pixel_type_w W,
+                                  pixel_type_w NE, pixel_type_w NW,
+                                  pixel_type_w NN, Properties *properties,
+                                  size_t offset) {
+    size_t cur_row = y & 1 ? 0 : (xsize + 2);
+    size_t prev_row = y & 1 ? (xsize + 2) : 0;
+    size_t pos_N = prev_row + x;
+    size_t pos_NE = x < xsize - 1 ? pos_N + 1 : pos_N;
+    size_t pos_NW = x > 0 ? pos_N - 1 : pos_N;
+    std::array<uint32_t, kNumPredictors> weights;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      // pred_errors[pos_N] also contains the error of pixel W.
+      // pred_errors[pos_NW] also contains the error of pixel WW.
+      weights[i] = pred_errors[i][pos_N] + pred_errors[i][pos_NE] +
+                   pred_errors[i][pos_NW];
+      weights[i] = ErrorWeight(weights[i], header.w[i]);
+    }
+
+    N = AddBits(N);
+    W = AddBits(W);
+    NE = AddBits(NE);
+    NW = AddBits(NW);
+    NN = AddBits(NN);
+
+    pixel_type_w teW = x == 0 ? 0 : error[cur_row + x - 1];
+    pixel_type_w teN = error[pos_N];
+    pixel_type_w teNW = error[pos_NW];
+    pixel_type_w sumWN = teN + teW;
+    pixel_type_w teNE = error[pos_NE];
+
+    if (compute_properties) {
+      pixel_type_w p = teW;
+      if (std::abs(teN) > std::abs(p)) p = teN;
+      if (std::abs(teNW) > std::abs(p)) p = teNW;
+      if (std::abs(teNE) > std::abs(p)) p = teNE;
+      (*properties)[offset++] = p;
+    }
+
+    prediction[0] = W + NE - N;
+    prediction[1] = N - (((sumWN + teNE) * header.p1C) >> 5);
+    prediction[2] = W - (((sumWN + teNW) * header.p2C) >> 5);
+    prediction[3] =
+        N - ((teNW * header.p3Ca + teN * header.p3Cb + teNE * header.p3Cc +
+              (NN - N) * header.p3Cd + (NW - W) * header.p3Ce) >>
+             5);
+
+    pred = WeightedAverage(prediction, weights);
+
+    // If all three have the same sign, skip clamping.
+    if (((teN ^ teW) | (teN ^ teNW)) > 0) {
+      return (pred + kPredictionRound) >> kPredExtraBits;
+    }
+
+    // Otherwise, clamp to min/max of neighbouring pixels (just W, NE, N).
+    pixel_type_w mx = std::max(W, std::max(NE, N));
+    pixel_type_w mn = std::min(W, std::min(NE, N));
+    pred = std::max(mn, std::min(mx, pred));
+    return (pred + kPredictionRound) >> kPredExtraBits;
+  }
+
+  JXL_INLINE void UpdateErrors(pixel_type_w val, size_t x, size_t y,
+                               size_t xsize) {
+    size_t cur_row = y & 1 ? 0 : (xsize + 2);
+    size_t prev_row = y & 1 ? (xsize + 2) : 0;
+    val = AddBits(val);
+    error[cur_row + x] = pred - val;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      pixel_type_w err =
+          (std::abs(prediction[i] - val) + kPredictionRound) >> kPredExtraBits;
+      // For predicting in the next row.
+      pred_errors[i][cur_row + x] = err;
+      // Add the error on this pixel to the error on the NE pixel. This has the
+      // effect of adding the error on this pixel to the E and EE pixels.
+      pred_errors[i][prev_row + x + 1] += err;
+    }
+  }
+};
+
+// Encoder helper function to set the parameters to some presets.
+inline void PredictorMode(int i, Header *header) {
+  switch (i) {
+    case 0:
+      // ~ lossless16 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 16;
+      header->p2C = 10;
+      header->p3Ca = 7;
+      header->p3Cb = 7;
+      header->p3Cc = 7;
+      header->p3Cd = 0;
+      header->p3Ce = 0;
+      break;
+    case 1:
+      // ~ default lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xb;
+      header->p1C = 8;
+      header->p2C = 8;
+      header->p3Ca = 4;
+      header->p3Cb = 0;
+      header->p3Cc = 3;
+      header->p3Cd = 23;
+      header->p3Ce = 2;
+      break;
+    case 2:
+      // ~ west lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xd;
+      header->w[3] = 0xc;
+      header->p1C = 10;
+      header->p2C = 9;
+      header->p3Ca = 7;
+      header->p3Cb = 0;
+      header->p3Cc = 0;
+      header->p3Cd = 16;
+      header->p3Ce = 9;
+      break;
+    case 3:
+      // ~ north lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xd;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 16;
+      header->p2C = 8;
+      header->p3Ca = 0;
+      header->p3Cb = 16;
+      header->p3Cc = 0;
+      header->p3Cd = 23;
+      header->p3Ce = 0;
+      break;
+    case 4:
+    default:
+      // something else, because why not
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 10;
+      header->p2C = 10;
+      header->p3Ca = 5;
+      header->p3Cb = 5;
+      header->p3Cc = 5;
+      header->p3Cd = 12;
+      header->p3Ce = 4;
+      break;
+  }
+}
+}  // namespace weighted
+
+// Stores a node and its two children at the same time. This significantly
+// reduces the number of branches needed during decoding.
+struct FlatDecisionNode {
+  // Property + splitval of the top node.
+  int32_t property0;  // -1 if leaf.
+  union {
+    PropertyVal splitval0;
+    Predictor predictor;
+  };
+  // Property+splitval of the two child nodes.
+  union {
+    PropertyVal splitvals[2];
+    int32_t multiplier;
+  };
+  uint32_t childID;  // childID is ctx id if leaf.
+  union {
+    int16_t properties[2];
+    int32_t predictor_offset;
+  };
+};
+using FlatTree = std::vector<FlatDecisionNode>;
+
+class MATreeLookup {
+ public:
+  explicit MATreeLookup(const FlatTree &tree) : nodes_(tree) {}
+  struct LookupResult {
+    uint32_t context;
+    Predictor predictor;
+    int32_t offset;
+    int32_t multiplier;
+  };
+  JXL_INLINE LookupResult Lookup(const Properties &properties) const {
+    uint32_t pos = 0;
+    while (true) {
+#define TRAVERSE_THE_TREE                                                      \
+  {                                                                            \
+    const FlatDecisionNode &node = nodes_[pos];                                \
+    if (node.property0 < 0) {                                                  \
+      return {node.childID, node.predictor, node.predictor_offset,             \
+              node.multiplier};                                                \
+    }                                                                          \
+    bool p0 = properties[node.property0] <= node.splitval0;                    \
+    uint32_t off0 = properties[node.properties[0]] <= node.splitvals[0];       \
+    uint32_t off1 = 2 | (properties[node.properties[1]] <= node.splitvals[1]); \
+    pos = node.childID + (p0 ? off1 : off0);                                   \
+  }
+
+      TRAVERSE_THE_TREE;
+      TRAVERSE_THE_TREE;
+    }
+  }
+
+ private:
+  const FlatTree &nodes_;
+};
+
+static constexpr size_t kExtraPropsPerChannel = 4;
+static constexpr size_t kNumNonrefProperties =
+    kNumStaticProperties + 13 + weighted::kNumProperties;
+
+constexpr size_t kWPProp = kNumNonrefProperties - weighted::kNumProperties;
+constexpr size_t kGradientProp = 9;
+
+// Clamps gradient to the min/max of n, w (and l, implicitly).
+static JXL_INLINE int32_t ClampedGradient(const int32_t n, const int32_t w,
+                                          const int32_t l) {
+  const int32_t m = std::min(n, w);
+  const int32_t M = std::max(n, w);
+  // The end result of this operation doesn't overflow or underflow if the
+  // result is between m and M, but the intermediate value may overflow, so we
+  // do the intermediate operations in uint32_t and check later if we had an
+  // overflow or underflow condition comparing m, M and l directly.
+  // grad = M + m - l = n + w - l
+  const int32_t grad =
+      static_cast<int32_t>(static_cast<uint32_t>(n) + static_cast<uint32_t>(w) -
+                           static_cast<uint32_t>(l));
+  // We use two sets of ternary operators to force the evaluation of them in
+  // any case, allowing the compiler to avoid branches and use cmovl/cmovg in
+  // x86.
+  const int32_t grad_clamp_M = (l < m) ? M : grad;
+  return (l > M) ? m : grad_clamp_M;
+}
+
+inline pixel_type_w Select(pixel_type_w a, pixel_type_w b, pixel_type_w c) {
+  pixel_type_w p = a + b - c;
+  pixel_type_w pa = std::abs(p - a);
+  pixel_type_w pb = std::abs(p - b);
+  return pa < pb ? a : b;
+}
+
+inline void PrecomputeReferences(const Channel &ch, size_t y,
+                                 const Image &image, uint32_t i,
+                                 Channel *references) {
+  ZeroFillImage(&references->plane);
+  uint32_t offset = 0;
+  size_t num_extra_props = references->w;
+  intptr_t onerow = references->plane.PixelsPerRow();
+  for (int32_t j = static_cast<int32_t>(i) - 1;
+       j >= 0 && offset < num_extra_props; j--) {
+    if (image.channel[j].w != image.channel[i].w ||
+        image.channel[j].h != image.channel[i].h) {
+      continue;
+    }
+    if (image.channel[j].hshift != image.channel[i].hshift) continue;
+    if (image.channel[j].vshift != image.channel[i].vshift) continue;
+    pixel_type *JXL_RESTRICT rp = references->Row(0) + offset;
+    const pixel_type *JXL_RESTRICT rpp = image.channel[j].Row(y);
+    const pixel_type *JXL_RESTRICT rpprev = image.channel[j].Row(y ? y - 1 : 0);
+    for (size_t x = 0; x < ch.w; x++, rp += onerow) {
+      pixel_type_w v = rpp[x];
+      rp[0] = std::abs(v);
+      rp[1] = v;
+      pixel_type_w vleft = (x ? rpp[x - 1] : 0);
+      pixel_type_w vtop = (y ? rpprev[x] : vleft);
+      pixel_type_w vtopleft = (x && y ? rpprev[x - 1] : vleft);
+      pixel_type_w vpredicted = ClampedGradient(vleft, vtop, vtopleft);
+      rp[2] = std::abs(v - vpredicted);
+      rp[3] = v - vpredicted;
+    }
+
+    offset += kExtraPropsPerChannel;
+  }
+}
+
+struct PredictionResult {
+  int context = 0;
+  pixel_type_w guess = 0;
+  Predictor predictor;
+  int32_t multiplier;
+};
+
+inline void InitPropsRow(
+    Properties *p,
+    const std::array<pixel_type, kNumStaticProperties> &static_props,
+    const int y) {
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    (*p)[i] = static_props[i];
+  }
+  (*p)[2] = y;
+  (*p)[9] = 0;  // local gradient.
+}
+
+namespace detail {
+enum PredictorMode {
+  kUseTree = 1,
+  kUseWP = 2,
+  kForceComputeProperties = 4,
+  kAllPredictions = 8,
+  kNoEdgeCases = 16
+};
+
+JXL_INLINE pixel_type_w PredictOne(Predictor p, pixel_type_w left,
+                                   pixel_type_w top, pixel_type_w toptop,
+                                   pixel_type_w topleft, pixel_type_w topright,
+                                   pixel_type_w leftleft,
+                                   pixel_type_w toprightright,
+                                   pixel_type_w wp_pred) {
+  switch (p) {
+    case Predictor::Zero:
+      return pixel_type_w{0};
+    case Predictor::Left:
+      return left;
+    case Predictor::Top:
+      return top;
+    case Predictor::Select:
+      return Select(left, top, topleft);
+    case Predictor::Weighted:
+      return wp_pred;
+    case Predictor::Gradient:
+      return pixel_type_w{ClampedGradient(left, top, topleft)};
+    case Predictor::TopLeft:
+      return topleft;
+    case Predictor::TopRight:
+      return topright;
+    case Predictor::LeftLeft:
+      return leftleft;
+    case Predictor::Average0:
+      return (left + top) / 2;
+    case Predictor::Average1:
+      return (left + topleft) / 2;
+    case Predictor::Average2:
+      return (topleft + top) / 2;
+    case Predictor::Average3:
+      return (top + topright) / 2;
+    case Predictor::Average4:
+      return (6 * top - 2 * toptop + 7 * left + 1 * leftleft +
+              1 * toprightright + 3 * topright + 8) /
+             16;
+    default:
+      return pixel_type_w{0};
+  }
+}
+
+template <int mode>
+JXL_INLINE PredictionResult Predict(
+    Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
+    const intptr_t onerow, const size_t x, const size_t y, Predictor predictor,
+    const MATreeLookup *lookup, const Channel *references,
+    weighted::State *wp_state, pixel_type_w *predictions) {
+  // We start in position 3 because of 2 static properties + y.
+  size_t offset = 3;
+  constexpr bool compute_properties =
+      mode & kUseTree || mode & kForceComputeProperties;
+  constexpr bool nec = mode & kNoEdgeCases;
+  pixel_type_w left = (nec || x ? pp[-1] : (y ? pp[-onerow] : 0));
+  pixel_type_w top = (nec || y ? pp[-onerow] : left);
+  pixel_type_w topleft = (nec || (x && y) ? pp[-1 - onerow] : left);
+  pixel_type_w topright = (nec || (x + 1 < w && y) ? pp[1 - onerow] : top);
+  pixel_type_w leftleft = (nec || x > 1 ? pp[-2] : left);
+  pixel_type_w toptop = (nec || y > 1 ? pp[-onerow - onerow] : top);
+  pixel_type_w toprightright =
+      (nec || (x + 2 < w && y) ? pp[2 - onerow] : topright);
+
+  if (compute_properties) {
+    // location
+    (*p)[offset++] = x;
+    // neighbors
+    (*p)[offset++] = top > 0 ? top : -top;
+    (*p)[offset++] = left > 0 ? left : -left;
+    (*p)[offset++] = top;
+    (*p)[offset++] = left;
+
+    // local gradient
+    (*p)[offset] = left - (*p)[offset + 1];
+    offset++;
+    // local gradient
+    (*p)[offset++] = left + top - topleft;
+
+    // FFV1 context properties
+    (*p)[offset++] = left - topleft;
+    (*p)[offset++] = topleft - top;
+    (*p)[offset++] = top - topright;
+    (*p)[offset++] = top - toptop;
+    (*p)[offset++] = left - leftleft;
+  }
+
+  pixel_type_w wp_pred = 0;
+  if (mode & kUseWP) {
+    wp_pred = wp_state->Predict<compute_properties>(
+        x, y, w, top, left, topright, topleft, toptop, p, offset);
+  }
+  if (!nec && compute_properties) {
+    offset += weighted::kNumProperties;
+    // Extra properties.
+    const pixel_type *JXL_RESTRICT rp = references->Row(x);
+    for (size_t i = 0; i < references->w; i++) {
+      (*p)[offset++] = rp[i];
+    }
+  }
+  PredictionResult result;
+  if (mode & kUseTree) {
+    MATreeLookup::LookupResult lr = lookup->Lookup(*p);
+    result.context = lr.context;
+    result.guess = lr.offset;
+    result.multiplier = lr.multiplier;
+    predictor = lr.predictor;
+  }
+  if (mode & kAllPredictions) {
+    for (size_t i = 0; i < kNumModularPredictors; i++) {
+      predictions[i] = PredictOne((Predictor)i, left, top, toptop, topleft,
+                                  topright, leftleft, toprightright, wp_pred);
+    }
+  }
+  result.guess += PredictOne(predictor, left, top, toptop, topleft, topright,
+                             leftleft, toprightright, wp_pred);
+  result.predictor = predictor;
+
+  return result;
+}
+}  // namespace detail
+
+inline PredictionResult PredictNoTreeNoWP(size_t w,
+                                          const pixel_type *JXL_RESTRICT pp,
+                                          const intptr_t onerow, const int x,
+                                          const int y, Predictor predictor) {
+  return detail::Predict</*mode=*/0>(
+      /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+      /*references=*/nullptr, /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictNoTreeWP(size_t w,
+                                        const pixel_type *JXL_RESTRICT pp,
+                                        const intptr_t onerow, const int x,
+                                        const int y, Predictor predictor,
+                                        weighted::State *wp_state) {
+  return detail::Predict<detail::kUseWP>(
+      /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+      /*references=*/nullptr, wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeNoWP(Properties *p, size_t w,
+                                        const pixel_type *JXL_RESTRICT pp,
+                                        const intptr_t onerow, const int x,
+                                        const int y,
+                                        const MATreeLookup &tree_lookup,
+                                        const Channel &references) {
+  return detail::Predict<detail::kUseTree>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+// Only use for y > 1, x > 1, x < w-2, and empty references
+JXL_INLINE PredictionResult
+PredictTreeNoWPNEC(Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
+                   const intptr_t onerow, const int x, const int y,
+                   const MATreeLookup &tree_lookup, const Channel &references) {
+  return detail::Predict<detail::kUseTree | detail::kNoEdgeCases>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeWP(Properties *p, size_t w,
+                                      const pixel_type *JXL_RESTRICT pp,
+                                      const intptr_t onerow, const int x,
+                                      const int y,
+                                      const MATreeLookup &tree_lookup,
+                                      const Channel &references,
+                                      weighted::State *wp_state) {
+  return detail::Predict<detail::kUseTree | detail::kUseWP>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+JXL_INLINE PredictionResult PredictTreeWPNEC(Properties *p, size_t w,
+                                             const pixel_type *JXL_RESTRICT pp,
+                                             const intptr_t onerow, const int x,
+                                             const int y,
+                                             const MATreeLookup &tree_lookup,
+                                             const Channel &references,
+                                             weighted::State *wp_state) {
+  return detail::Predict<detail::kUseTree | detail::kUseWP |
+                         detail::kNoEdgeCases>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictLearn(Properties *p, size_t w,
+                                     const pixel_type *JXL_RESTRICT pp,
+                                     const intptr_t onerow, const int x,
+                                     const int y, Predictor predictor,
+                                     const Channel &references,
+                                     weighted::State *wp_state) {
+  return detail::Predict<detail::kForceComputeProperties | detail::kUseWP>(
+      p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+
+inline void PredictLearnAll(Properties *p, size_t w,
+                            const pixel_type *JXL_RESTRICT pp,
+                            const intptr_t onerow, const int x, const int y,
+                            const Channel &references,
+                            weighted::State *wp_state,
+                            pixel_type_w *predictions) {
+  detail::Predict<detail::kForceComputeProperties | detail::kUseWP |
+                  detail::kAllPredictions>(
+      p, w, pp, onerow, x, y, Predictor::Zero,
+      /*lookup=*/nullptr, &references, wp_state, predictions);
+}
+inline PredictionResult PredictLearnNEC(Properties *p, size_t w,
+                                        const pixel_type *JXL_RESTRICT pp,
+                                        const intptr_t onerow, const int x,
+                                        const int y, Predictor predictor,
+                                        const Channel &references,
+                                        weighted::State *wp_state) {
+  return detail::Predict<detail::kForceComputeProperties | detail::kUseWP |
+                         detail::kNoEdgeCases>(
+      p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+
+inline void PredictLearnAllNEC(Properties *p, size_t w,
+                               const pixel_type *JXL_RESTRICT pp,
+                               const intptr_t onerow, const int x, const int y,
+                               const Channel &references,
+                               weighted::State *wp_state,
+                               pixel_type_w *predictions) {
+  detail::Predict<detail::kForceComputeProperties | detail::kUseWP |
+                  detail::kAllPredictions | detail::kNoEdgeCases>(
+      p, w, pp, onerow, x, y, Predictor::Zero,
+      /*lookup=*/nullptr, &references, wp_state, predictions);
+}
+
+inline void PredictAllNoWP(size_t w, const pixel_type *JXL_RESTRICT pp,
+                           const intptr_t onerow, const int x, const int y,
+                           pixel_type_w *predictions) {
+  detail::Predict<detail::kAllPredictions>(
+      /*p=*/nullptr, w, pp, onerow, x, y, Predictor::Zero,
+      /*lookup=*/nullptr,
+      /*references=*/nullptr, /*wp_state=*/nullptr, predictions);
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc
new file mode 100644
index 0000000000..66562f7dfd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/dec_ma.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+namespace {
+
+Status ValidateTree(
+    const Tree &tree,
+    const std::vector<std::pair<pixel_type, pixel_type>> &prop_bounds,
+    size_t root) {
+  if (tree[root].property == -1) return true;
+  size_t p = tree[root].property;
+  int val = tree[root].splitval;
+  if (prop_bounds[p].first > val) return JXL_FAILURE("Invalid tree");
+  // Splitting at max value makes no sense: left range will be exactly same
+  // as parent, right range will be invalid (min > max).
+  if (prop_bounds[p].second <= val) return JXL_FAILURE("Invalid tree");
+  auto new_bounds = prop_bounds;
+  new_bounds[p].first = val + 1;
+  JXL_RETURN_IF_ERROR(ValidateTree(tree, new_bounds, tree[root].lchild));
+  new_bounds[p] = prop_bounds[p];
+  new_bounds[p].second = val;
+  return ValidateTree(tree, new_bounds, tree[root].rchild);
+}
+
+Status DecodeTree(BitReader *br, ANSSymbolReader *reader,
+                  const std::vector<uint8_t> &context_map, Tree *tree,
+                  size_t tree_size_limit) {
+  size_t leaf_id = 0;
+  size_t to_decode = 1;
+  tree->clear();
+  while (to_decode > 0) {
+    JXL_RETURN_IF_ERROR(br->AllReadsWithinBounds());
+    if (tree->size() > tree_size_limit) {
+      return JXL_FAILURE("Tree is too large: %" PRIuS " nodes vs %" PRIuS
+                         " max nodes",
+                         tree->size(), tree_size_limit);
+    }
+    to_decode--;
+    uint32_t prop1 = reader->ReadHybridUint(kPropertyContext, br, context_map);
+    if (prop1 > 256) return JXL_FAILURE("Invalid tree property value");
+    int property = prop1 - 1;
+    if (property == -1) {
+      size_t predictor =
+          reader->ReadHybridUint(kPredictorContext, br, context_map);
+      if (predictor >= kNumModularPredictors) {
+        return JXL_FAILURE("Invalid predictor");
+      }
+      int64_t predictor_offset =
+          UnpackSigned(reader->ReadHybridUint(kOffsetContext, br, context_map));
+      uint32_t mul_log =
+          reader->ReadHybridUint(kMultiplierLogContext, br, context_map);
+      if (mul_log >= 31) {
+        return JXL_FAILURE("Invalid multiplier logarithm");
+      }
+      uint32_t mul_bits =
+          reader->ReadHybridUint(kMultiplierBitsContext, br, context_map);
+      if (mul_bits + 1 >= 1u << (31u - mul_log)) {
+        return JXL_FAILURE("Invalid multiplier");
+      }
+      uint32_t multiplier = (mul_bits + 1U) << mul_log;
+      tree->emplace_back(-1, 0, leaf_id++, 0, static_cast<Predictor>(predictor),
+                         predictor_offset, multiplier);
+      continue;
+    }
+    int splitval =
+        UnpackSigned(reader->ReadHybridUint(kSplitValContext, br, context_map));
+    tree->emplace_back(property, splitval, tree->size() + to_decode + 1,
+                       tree->size() + to_decode + 2, Predictor::Zero, 0, 1);
+    to_decode += 2;
+  }
+  std::vector<std::pair<pixel_type, pixel_type>> prop_bounds;
+  prop_bounds.resize(256, {std::numeric_limits<pixel_type>::min(),
+                           std::numeric_limits<pixel_type>::max()});
+  return ValidateTree(*tree, prop_bounds, 0);
+}
+}  // namespace
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit) {
+  std::vector<uint8_t> tree_context_map;
+  ANSCode tree_code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumTreeContexts, &tree_code, &tree_context_map));
+  // TODO(eustas): investigate more infinite tree cases.
+  if (tree_code.degenerate_symbols[tree_context_map[kPropertyContext]] > 0) {
+    return JXL_FAILURE("Infinite tree");
+  }
+  ANSSymbolReader reader(&tree_code, br);
+  JXL_RETURN_IF_ERROR(DecodeTree(br, &reader, tree_context_map, tree,
+                                 std::min(tree_size_limit, kMaxTreeSize)));
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS decode final state failed");
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h
new file mode 100644
index 0000000000..a910c4deb1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// inner nodes
+struct PropertyDecisionNode {
+  PropertyVal splitval;
+  int16_t property;  // -1: leaf node, lchild points to leaf node
+  uint32_t lchild;
+  uint32_t rchild;
+  Predictor predictor;
+  int64_t predictor_offset;
+  uint32_t multiplier;
+
+  PropertyDecisionNode(int p, int split_val, int lchild, int rchild,
+                       Predictor predictor, int64_t predictor_offset,
+                       uint32_t multiplier)
+      : splitval(split_val),
+        property(p),
+        lchild(lchild),
+        rchild(rchild),
+        predictor(predictor),
+        predictor_offset(predictor_offset),
+        multiplier(multiplier) {}
+  PropertyDecisionNode()
+      : splitval(0),
+        property(-1),
+        lchild(0),
+        rchild(0),
+        predictor(Predictor::Zero),
+        predictor_offset(0),
+        multiplier(1) {}
+  static PropertyDecisionNode Leaf(Predictor predictor, int64_t offset = 0,
+                                   uint32_t multiplier = 1) {
+    return PropertyDecisionNode(-1, 0, 0, 0, predictor, offset, multiplier);
+  }
+  static PropertyDecisionNode Split(int p, int split_val, int lchild,
+                                    int rchild = -1) {
+    if (rchild == -1) rchild = lchild + 1;
+    return PropertyDecisionNode(p, split_val, lchild, rchild, Predictor::Zero,
+                                0, 1);
+  }
+};
+
+using Tree = std::vector<PropertyDecisionNode>;
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc
new file mode 100644
index 0000000000..f2a1705e4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc
@@ -0,0 +1,124 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/base/os_macros.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/options.h"
+
+#if JXL_OS_IOS
+#define JXL_ENABLE_DOT 0
+#else
+#define JXL_ENABLE_DOT 1  // iOS lacks C89 system()
+#endif
+
+namespace jxl {
+
+const char *PredictorName(Predictor p) {
+  switch (p) {
+    case Predictor::Zero:
+      return "Zero";
+    case Predictor::Left:
+      return "Left";
+    case Predictor::Top:
+      return "Top";
+    case Predictor::Average0:
+      return "Avg0";
+    case Predictor::Average1:
+      return "Avg1";
+    case Predictor::Average2:
+      return "Avg2";
+    case Predictor::Average3:
+      return "Avg3";
+    case Predictor::Average4:
+      return "Avg4";
+    case Predictor::Select:
+      return "Sel";
+    case Predictor::Gradient:
+      return "Grd";
+    case Predictor::Weighted:
+      return "Wgh";
+    case Predictor::TopLeft:
+      return "TopL";
+    case Predictor::TopRight:
+      return "TopR";
+    case Predictor::LeftLeft:
+      return "LL";
+    default:
+      return "INVALID";
+  };
+}
+
+std::string PropertyName(size_t i) {
+  static_assert(kNumNonrefProperties == 16, "Update this function");
+  switch (i) {
+    case 0:
+      return "c";
+    case 1:
+      return "g";
+    case 2:
+      return "y";
+    case 3:
+      return "x";
+    case 4:
+      return "|N|";
+    case 5:
+      return "|W|";
+    case 6:
+      return "N";
+    case 7:
+      return "W";
+    case 8:
+      return "W-WW-NW+NWW";
+    case 9:
+      return "W+N-NW";
+    case 10:
+      return "W-NW";
+    case 11:
+      return "NW-N";
+    case 12:
+      return "N-NE";
+    case 13:
+      return "N-NN";
+    case 14:
+      return "W-WW";
+    case 15:
+      return "WGH";
+    default:
+      return "ch[" + ToString(15 - (int)i) + "]";
+  }
+}
+
+void PrintTree(const Tree &tree, const std::string &path) {
+  FILE *f = fopen((path + ".dot").c_str(), "w");
+  fprintf(f, "graph{\n");
+  for (size_t cur = 0; cur < tree.size(); cur++) {
+    if (tree[cur].property < 0) {
+      fprintf(f, "n%05" PRIuS " [label=\"%s%+" PRId64 " (x%u)\"];\n", cur,
+              PredictorName(tree[cur].predictor), tree[cur].predictor_offset,
+              tree[cur].multiplier);
+    } else {
+      fprintf(f, "n%05" PRIuS " [label=\"%s>%d\"];\n", cur,
+              PropertyName(tree[cur].property).c_str(), tree[cur].splitval);
+      fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].lchild);
+      fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].rchild);
+    }
+  }
+  fprintf(f, "}\n");
+  fclose(f);
+#if JXL_ENABLE_DOT
+  JXL_ASSERT(
+      system(("dot " + path + ".dot -T svg -o " + path + ".svg").c_str()) == 0);
+#endif
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h
new file mode 100644
index 0000000000..78deaab1b8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+const char *PredictorName(Predictor p);
+std::string PropertyName(size_t i);
+
+void PrintTree(const Tree &tree, const std::string &path);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc
new file mode 100644
index 0000000000..12a9774b81
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc
@@ -0,0 +1,599 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cinttypes>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+// Plot tree (if enabled) and predictor usage map.
+constexpr bool kWantDebug = true;
+// constexpr bool kPrintTree = false;
+
+inline std::array<uint8_t, 3> PredictorColor(Predictor p) {
+  switch (p) {
+    case Predictor::Zero:
+      return {{0, 0, 0}};
+    case Predictor::Left:
+      return {{255, 0, 0}};
+    case Predictor::Top:
+      return {{0, 255, 0}};
+    case Predictor::Average0:
+      return {{0, 0, 255}};
+    case Predictor::Average4:
+      return {{192, 128, 128}};
+    case Predictor::Select:
+      return {{255, 255, 0}};
+    case Predictor::Gradient:
+      return {{255, 0, 255}};
+    case Predictor::Weighted:
+      return {{0, 255, 255}};
+      // TODO
+    default:
+      return {{255, 255, 255}};
+  };
+}
+
+}  // namespace
+
+void GatherTreeData(const Image &image, pixel_type chan, size_t group_id,
+                    const weighted::Header &wp_header,
+                    const ModularOptions &options, TreeSamples &tree_samples,
+                    size_t *total_pixels) {
+  const Channel &channel = image.channel[chan];
+
+  JXL_DEBUG_V(7, "Learning %" PRIuS "x%" PRIuS " channel %d", channel.w,
+              channel.h, chan);
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {
+      {chan, (int)group_id}};
+  Properties properties(kNumNonrefProperties +
+                        kExtraPropsPerChannel * options.max_properties);
+  double pixel_fraction = std::min(1.0f, options.nb_repeats);
+  // a fraction of 0 is used to disable learning entirely.
+  if (pixel_fraction > 0) {
+    pixel_fraction = std::max(pixel_fraction,
+                              std::min(1.0, 1024.0 / (channel.w * channel.h)));
+  }
+  uint64_t threshold =
+      (std::numeric_limits<uint64_t>::max() >> 32) * pixel_fraction;
+  uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
+                   static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
+  // Xorshift128+ adapted from xorshift128+-inl.h
+  auto use_sample = [&]() {
+    auto s1 = s[0];
+    const auto s0 = s[1];
+    const auto bits = s1 + s0;  // b, c
+    s[0] = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s[1] = s1;
+    return (bits >> 32) <= threshold;
+  };
+
+  const intptr_t onerow = channel.plane.PixelsPerRow();
+  Channel references(properties.size() - kNumNonrefProperties, channel.w);
+  weighted::State wp_state(wp_header, channel.w, channel.h);
+  tree_samples.PrepareForSamples(pixel_fraction * channel.h * channel.w + 64);
+  const bool multiple_predictors = tree_samples.NumPredictors() != 1;
+  auto compute_sample = [&](const pixel_type *p, size_t x, size_t y) {
+    pixel_type_w pred[kNumModularPredictors];
+    if (multiple_predictors) {
+      PredictLearnAll(&properties, channel.w, p + x, onerow, x, y, references,
+                      &wp_state, pred);
+    } else {
+      pred[static_cast<int>(tree_samples.PredictorFromIndex(0))] =
+          PredictLearn(&properties, channel.w, p + x, onerow, x, y,
+                       tree_samples.PredictorFromIndex(0), references,
+                       &wp_state)
+              .guess;
+    }
+    (*total_pixels)++;
+    if (use_sample()) {
+      tree_samples.AddSample(p[x], properties, pred);
+    }
+    wp_state.UpdateErrors(p[x], x, y, channel.w);
+  };
+
+  for (size_t y = 0; y < channel.h; y++) {
+    const pixel_type *JXL_RESTRICT p = channel.Row(y);
+    PrecomputeReferences(channel, y, image, chan, &references);
+    InitPropsRow(&properties, static_props, y);
+
+    // TODO(veluca): avoid computing WP if we don't use its property or
+    // predictions.
+    if (y > 1 && channel.w > 8 && references.w == 0) {
+      for (size_t x = 0; x < 2; x++) {
+        compute_sample(p, x, y);
+      }
+      for (size_t x = 2; x < channel.w - 2; x++) {
+        pixel_type_w pred[kNumModularPredictors];
+        if (multiple_predictors) {
+          PredictLearnAllNEC(&properties, channel.w, p + x, onerow, x, y,
+                             references, &wp_state, pred);
+        } else {
+          pred[static_cast<int>(tree_samples.PredictorFromIndex(0))] =
+              PredictLearnNEC(&properties, channel.w, p + x, onerow, x, y,
+                              tree_samples.PredictorFromIndex(0), references,
+                              &wp_state)
+                  .guess;
+        }
+        (*total_pixels)++;
+        if (use_sample()) {
+          tree_samples.AddSample(p[x], properties, pred);
+        }
+        wp_state.UpdateErrors(p[x], x, y, channel.w);
+      }
+      for (size_t x = channel.w - 2; x < channel.w; x++) {
+        compute_sample(p, x, y);
+      }
+    } else {
+      for (size_t x = 0; x < channel.w; x++) {
+        compute_sample(p, x, y);
+      }
+    }
+  }
+}
+
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+               const ModularOptions &options,
+               const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+               StaticPropRange static_prop_range = {}) {
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    if (static_prop_range[i][1] == 0) {
+      static_prop_range[i][1] = std::numeric_limits<uint32_t>::max();
+    }
+  }
+  if (!tree_samples.HasSamples()) {
+    Tree tree;
+    tree.emplace_back();
+    tree.back().predictor = tree_samples.PredictorFromIndex(0);
+    tree.back().property = -1;
+    tree.back().predictor_offset = 0;
+    tree.back().multiplier = 1;
+    return tree;
+  }
+  float pixel_fraction = tree_samples.NumSamples() * 1.0f / total_pixels;
+  float required_cost = pixel_fraction * 0.9 + 0.1;
+  tree_samples.AllSamplesDone();
+  Tree tree;
+  ComputeBestTree(tree_samples,
+                  options.splitting_heuristics_node_threshold * required_cost,
+                  multiplier_info, static_prop_range,
+                  options.fast_decode_multiplier, &tree);
+  return tree;
+}
+
+Status EncodeModularChannelMAANS(const Image &image, pixel_type chan,
+                                 const weighted::Header &wp_header,
+                                 const Tree &global_tree, Token **tokenpp,
+                                 AuxOut *aux_out, size_t group_id,
+                                 bool skip_encoder_fast_path) {
+  const Channel &channel = image.channel[chan];
+  Token *tokenp = *tokenpp;
+  JXL_ASSERT(channel.w != 0 && channel.h != 0);
+
+  Image3F predictor_img;
+  if (kWantDebug) predictor_img = Image3F(channel.w, channel.h);
+
+  JXL_DEBUG_V(6,
+              "Encoding %" PRIuS "x%" PRIuS
+              " channel %d, "
+              "(shift=%i,%i)",
+              channel.w, channel.h, chan, channel.hshift, channel.vshift);
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {
+      {chan, (int)group_id}};
+  bool use_wp, is_wp_only;
+  bool is_gradient_only;
+  size_t num_props;
+  FlatTree tree = FilterTree(global_tree, static_props, &num_props, &use_wp,
+                             &is_wp_only, &is_gradient_only);
+  Properties properties(num_props);
+  MATreeLookup tree_lookup(tree);
+  JXL_DEBUG_V(3, "Encoding using a MA tree with %" PRIuS " nodes", tree.size());
+
+  // Check if this tree is a WP-only tree with a small enough property value
+  // range.
+  // Initialized to avoid clang-tidy complaining.
+  auto tree_lut = jxl::make_unique<TreeLut<uint16_t, false>>();
+  if (is_wp_only) {
+    is_wp_only = TreeToLookupTable(tree, *tree_lut);
+  }
+  if (is_gradient_only) {
+    is_gradient_only = TreeToLookupTable(tree, *tree_lut);
+  }
+
+  if (is_wp_only && !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Weighted)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    Properties properties(1);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        size_t offset = 0;
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        pixel_type_w topright =
+            (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top);
+        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, top, left, topright, topleft, toptop, &properties,
+            offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut->context_lookup[pos];
+        int32_t residual = r[x] - guess - tree_lut->offsets[pos];
+        *tokenp++ = Token(ctx_id, PackSigned(residual));
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor == Predictor::Gradient &&
+             tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+             !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        int32_t residual = r[x] - guess;
+        *tokenp++ = Token(tree[0].childID, PackSigned(residual));
+      }
+    }
+  } else if (is_gradient_only && !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        uint32_t pos =
+            kPropRangeFast +
+            std::min<pixel_type_w>(
+                std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+                kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut->context_lookup[pos];
+        int32_t residual = r[x] - guess - tree_lut->offsets[pos];
+        *tokenp++ = Token(ctx_id, PackSigned(residual));
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor == Predictor::Zero &&
+             tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+             !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Zero)[c]),
+                &predictor_img.Plane(c));
+    }
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        *tokenp++ = Token(tree[0].childID, PackSigned(p[x]));
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor != Predictor::Weighted &&
+             (tree[0].multiplier & (tree[0].multiplier - 1)) == 0 &&
+             tree[0].predictor_offset == 0 && !skip_encoder_fast_path) {
+    // multiplier is a power of 2.
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(tree[0].predictor)[c]),
+                &predictor_img.Plane(c));
+    }
+    uint32_t mul_shift = FloorLog2Nonzero((uint32_t)tree[0].multiplier);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult pred = PredictNoTreeNoWP(channel.w, r + x, onerow, x,
+                                                  y, tree[0].predictor);
+        pixel_type_w residual = r[x] - pred.guess;
+        JXL_DASSERT((residual >> mul_shift) * tree[0].multiplier == residual);
+        *tokenp++ = Token(tree[0].childID, PackSigned(residual >> mul_shift));
+      }
+    }
+
+  } else if (!use_wp && !skip_encoder_fast_path) {
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, image, chan, &references);
+      float *pred_img_row[3];
+      if (kWantDebug) {
+        for (size_t c = 0; c < 3; c++) {
+          pred_img_row[c] = predictor_img.PlaneRow(c, y);
+        }
+      }
+      InitPropsRow(&properties, static_props, y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references);
+        if (kWantDebug) {
+          for (size_t i = 0; i < 3; i++) {
+            pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+          }
+        }
+        pixel_type_w residual = p[x] - res.guess;
+        JXL_DASSERT(residual % res.multiplier == 0);
+        *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier));
+      }
+    }
+  } else {
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, image, chan, &references);
+      float *pred_img_row[3];
+      if (kWantDebug) {
+        for (size_t c = 0; c < 3; c++) {
+          pred_img_row[c] = predictor_img.PlaneRow(c, y);
+        }
+      }
+      InitPropsRow(&properties, static_props, y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                          tree_lookup, references, &wp_state);
+        if (kWantDebug) {
+          for (size_t i = 0; i < 3; i++) {
+            pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+          }
+        }
+        pixel_type_w residual = p[x] - res.guess;
+        JXL_DASSERT(residual % res.multiplier == 0);
+        *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier));
+        wp_state.UpdateErrors(p[x], x, y, channel.w);
+      }
+    }
+  }
+  /* TODO(szabadka): Add cparams to the call stack here.
+  if (kWantDebug && WantDebugOutput(cparams)) {
+    DumpImage(
+        cparams,
+        ("pred_" + ToString(group_id) + "_" + ToString(chan)).c_str(),
+        predictor_img);
+  }
+  */
+  *tokenpp = tokenp;
+  return true;
+}
+
+Status ModularEncode(const Image &image, const ModularOptions &options,
+                     BitWriter *writer, AuxOut *aux_out, size_t layer,
+                     size_t group_id, TreeSamples *tree_samples,
+                     size_t *total_pixels, const Tree *tree,
+                     GroupHeader *header, std::vector<Token> *tokens,
+                     size_t *width) {
+  if (image.error) return JXL_FAILURE("Invalid image");
+  size_t nb_channels = image.channel.size();
+  JXL_DEBUG_V(
+      2, "Encoding %" PRIuS "-channel, %i-bit, %" PRIuS "x%" PRIuS " image.",
+      nb_channels, image.bitdepth, image.w, image.h);
+
+  if (nb_channels < 1) {
+    return true;  // is there any use for a zero-channel image?
+  }
+
+  // encode transforms
+  GroupHeader header_storage;
+  if (header == nullptr) header = &header_storage;
+  Bundle::Init(header);
+  if (options.predictor == Predictor::Weighted) {
+    weighted::PredictorMode(options.wp_mode, &header->wp_header);
+  }
+  header->transforms = image.transform;
+  // This doesn't actually work
+  if (tree != nullptr) {
+    header->use_global_tree = true;
+  }
+  if (tree_samples == nullptr && tree == nullptr) {
+    JXL_RETURN_IF_ERROR(Bundle::Write(*header, writer, layer, aux_out));
+  }
+
+  TreeSamples tree_samples_storage;
+  size_t total_pixels_storage = 0;
+  if (!total_pixels) total_pixels = &total_pixels_storage;
+  // If there's no tree, compute one (or gather data to).
+  if (tree == nullptr) {
+    bool gather_data = tree_samples != nullptr;
+    if (tree_samples == nullptr) {
+      JXL_RETURN_IF_ERROR(tree_samples_storage.SetPredictor(
+          options.predictor, options.wp_tree_mode));
+      JXL_RETURN_IF_ERROR(tree_samples_storage.SetProperties(
+          options.splitting_heuristics_properties, options.wp_tree_mode));
+      std::vector<pixel_type> pixel_samples;
+      std::vector<pixel_type> diff_samples;
+      std::vector<uint32_t> group_pixel_count;
+      std::vector<uint32_t> channel_pixel_count;
+      CollectPixelSamples(image, options, 0, group_pixel_count,
+                          channel_pixel_count, pixel_samples, diff_samples);
+      std::vector<ModularMultiplierInfo> dummy_multiplier_info;
+      StaticPropRange range;
+      tree_samples_storage.PreQuantizeProperties(
+          range, dummy_multiplier_info, group_pixel_count, channel_pixel_count,
+          pixel_samples, diff_samples, options.max_property_values);
+    }
+    for (size_t i = 0; i < nb_channels; i++) {
+      if (!image.channel[i].w || !image.channel[i].h) {
+        continue;  // skip empty channels
+      }
+      if (i >= image.nb_meta_channels &&
+          (image.channel[i].w > options.max_chan_size ||
+           image.channel[i].h > options.max_chan_size)) {
+        break;
+      }
+      GatherTreeData(image, i, group_id, header->wp_header, options,
+                     gather_data ? *tree_samples : tree_samples_storage,
+                     total_pixels);
+    }
+    if (gather_data) return true;
+  }
+
+  JXL_ASSERT((tree == nullptr) == (tokens == nullptr));
+
+  Tree tree_storage;
+  std::vector<std::vector<Token>> tokens_storage(1);
+  // Compute tree.
+  if (tree == nullptr) {
+    EntropyEncodingData code;
+    std::vector<uint8_t> context_map;
+
+    std::vector<std::vector<Token>> tree_tokens(1);
+    tree_storage =
+        LearnTree(std::move(tree_samples_storage), *total_pixels, options);
+    tree = &tree_storage;
+    tokens = &tokens_storage[0];
+
+    Tree decoded_tree;
+    TokenizeTree(*tree, &tree_tokens[0], &decoded_tree);
+    JXL_ASSERT(tree->size() == decoded_tree.size());
+    tree_storage = std::move(decoded_tree);
+
+    /* TODO(szabadka) Add text output callback
+    if (kWantDebug && kPrintTree && WantDebugOutput(aux_out)) {
+      PrintTree(*tree, aux_out->debug_prefix + "/tree_" + ToString(group_id));
+    } */
+
+    // Write tree
+    BuildAndEncodeHistograms(HistogramParams(), kNumTreeContexts, tree_tokens,
+                             &code, &context_map, writer, kLayerModularTree,
+                             aux_out);
+    WriteTokens(tree_tokens[0], code, context_map, writer, kLayerModularTree,
+                aux_out);
+  }
+
+  size_t image_width = 0;
+  size_t total_tokens = 0;
+  for (size_t i = 0; i < nb_channels; i++) {
+    if (i >= image.nb_meta_channels &&
+        (image.channel[i].w > options.max_chan_size ||
+         image.channel[i].h > options.max_chan_size)) {
+      break;
+    }
+    if (image.channel[i].w > image_width) image_width = image.channel[i].w;
+    total_tokens += image.channel[i].w * image.channel[i].h;
+  }
+  if (options.zero_tokens) {
+    tokens->resize(tokens->size() + total_tokens, {0, 0});
+  } else {
+    // Do one big allocation for all the tokens we'll need,
+    // to avoid reallocs that might require copying.
+    size_t pos = tokens->size();
+    tokens->resize(pos + total_tokens);
+    Token *tokenp = tokens->data() + pos;
+    for (size_t i = 0; i < nb_channels; i++) {
+      if (!image.channel[i].w || !image.channel[i].h) {
+        continue;  // skip empty channels
+      }
+      if (i >= image.nb_meta_channels &&
+          (image.channel[i].w > options.max_chan_size ||
+           image.channel[i].h > options.max_chan_size)) {
+        break;
+      }
+      JXL_RETURN_IF_ERROR(EncodeModularChannelMAANS(
+          image, i, header->wp_header, *tree, &tokenp, aux_out, group_id,
+          options.skip_encoder_fast_path));
+    }
+    // Make sure we actually wrote all tokens
+    JXL_CHECK(tokenp == tokens->data() + tokens->size());
+  }
+
+  // Write data if not using a global tree/ANS stream.
+  if (!header->use_global_tree) {
+    EntropyEncodingData code;
+    std::vector<uint8_t> context_map;
+    HistogramParams histo_params;
+    histo_params.image_widths.push_back(image_width);
+    BuildAndEncodeHistograms(histo_params, (tree->size() + 1) / 2,
+                             tokens_storage, &code, &context_map, writer, layer,
+                             aux_out);
+    WriteTokens(tokens_storage[0], code, context_map, writer, layer, aux_out);
+  } else {
+    *width = image_width;
+  }
+  return true;
+}
+
+Status ModularGenericCompress(Image &image, const ModularOptions &opts,
+                              BitWriter *writer, AuxOut *aux_out, size_t layer,
+                              size_t group_id, TreeSamples *tree_samples,
+                              size_t *total_pixels, const Tree *tree,
+                              GroupHeader *header, std::vector<Token> *tokens,
+                              size_t *width) {
+  if (image.w == 0 || image.h == 0) return true;
+  ModularOptions options = opts;  // Make a copy to modify it.
+
+  if (options.predictor == static_cast<Predictor>(-1)) {
+    options.predictor = Predictor::Gradient;
+  }
+
+  size_t bits = writer ? writer->BitsWritten() : 0;
+  JXL_RETURN_IF_ERROR(ModularEncode(image, options, writer, aux_out, layer,
+                                    group_id, tree_samples, total_pixels, tree,
+                                    header, tokens, width));
+  bits = writer ? writer->BitsWritten() - bits : 0;
+  if (writer) {
+    JXL_DEBUG_V(4,
+                "Modular-encoded a %" PRIuS "x%" PRIuS
+                " bitdepth=%i nbchans=%" PRIuS " image in %" PRIuS " bytes",
+                image.w, image.h, image.bitdepth, image.channel.size(),
+                bits / 8);
+  }
+  (void)bits;
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h
new file mode 100644
index 0000000000..04df504750
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+               const ModularOptions &options,
+               const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+               StaticPropRange static_prop_range = {});
+
+// TODO(veluca): make cleaner interfaces.
+
+Status ModularGenericCompress(
+    Image &image, const ModularOptions &opts, BitWriter *writer,
+    AuxOut *aux_out = nullptr, size_t layer = 0, size_t group_id = 0,
+    // For gathering data for producing a global tree.
+    TreeSamples *tree_samples = nullptr, size_t *total_pixels = nullptr,
+    // For encoding with global tree.
+    const Tree *tree = nullptr, GroupHeader *header = nullptr,
+    std::vector<Token> *tokens = nullptr, size_t *widths = nullptr);
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc
new file mode 100644
index 0000000000..72b027906d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc
@@ -0,0 +1,1011 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/modular/encoding/ma_common.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/encoding/enc_ma.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Max;
+
+const HWY_FULL(float) df;
+const HWY_FULL(int32_t) di;
+size_t Padded(size_t x) { return RoundUpTo(x, Lanes(df)); }
+
+// Compute entropy of the histogram, taking into account the minimum probability
+// for symbols with non-zero counts.
+float EstimateBits(const int32_t *counts, size_t num_symbols) {
+  int32_t total = std::accumulate(counts, counts + num_symbols, 0);
+  const auto zero = Zero(df);
+  const auto minprob = Set(df, 1.0f / ANS_TAB_SIZE);
+  const auto inv_total = Set(df, 1.0f / total);
+  auto bits_lanes = Zero(df);
+  auto total_v = Set(di, total);
+  for (size_t i = 0; i < num_symbols; i += Lanes(df)) {
+    const auto counts_iv = LoadU(di, &counts[i]);
+    const auto counts_fv = ConvertTo(df, counts_iv);
+    const auto probs = Mul(counts_fv, inv_total);
+    const auto mprobs = Max(probs, minprob);
+    const auto nbps = IfThenElse(Eq(counts_iv, total_v), BitCast(di, zero),
+                                 BitCast(di, FastLog2f(df, mprobs)));
+    bits_lanes = Sub(bits_lanes, Mul(counts_fv, BitCast(df, nbps)));
+  }
+  return GetLane(SumOfLanes(df, bits_lanes));
+}
+
+void MakeSplitNode(size_t pos, int property, int splitval, Predictor lpred,
+                   int64_t loff, Predictor rpred, int64_t roff, Tree *tree) {
+  // Note that the tree splits on *strictly greater*.
+  (*tree)[pos].lchild = tree->size();
+  (*tree)[pos].rchild = tree->size() + 1;
+  (*tree)[pos].splitval = splitval;
+  (*tree)[pos].property = property;
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = rpred;
+  tree->back().predictor_offset = roff;
+  tree->back().multiplier = 1;
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = lpred;
+  tree->back().predictor_offset = loff;
+  tree->back().multiplier = 1;
+}
+
+enum class IntersectionType { kNone, kPartial, kInside };
+IntersectionType BoxIntersects(StaticPropRange needle, StaticPropRange haystack,
+                               uint32_t &partial_axis, uint32_t &partial_val) {
+  bool partial = false;
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    if (haystack[i][0] >= needle[i][1]) {
+      return IntersectionType::kNone;
+    }
+    if (haystack[i][1] <= needle[i][0]) {
+      return IntersectionType::kNone;
+    }
+    if (haystack[i][0] <= needle[i][0] && haystack[i][1] >= needle[i][1]) {
+      continue;
+    }
+    partial = true;
+    partial_axis = i;
+    if (haystack[i][0] > needle[i][0] && haystack[i][0] < needle[i][1]) {
+      partial_val = haystack[i][0] - 1;
+    } else {
+      JXL_DASSERT(haystack[i][1] > needle[i][0] &&
+                  haystack[i][1] < needle[i][1]);
+      partial_val = haystack[i][1] - 1;
+    }
+  }
+  return partial ? IntersectionType::kPartial : IntersectionType::kInside;
+}
+
+void SplitTreeSamples(TreeSamples &tree_samples, size_t begin, size_t pos,
+                      size_t end, size_t prop) {
+  auto cmp = [&](size_t a, size_t b) {
+    return int32_t(tree_samples.Property(prop, a)) -
+           int32_t(tree_samples.Property(prop, b));
+  };
+  Rng rng(0);
+  while (end > begin + 1) {
+    {
+      size_t pivot = rng.UniformU(begin, end);
+      tree_samples.Swap(begin, pivot);
+    }
+    size_t pivot_begin = begin;
+    size_t pivot_end = pivot_begin + 1;
+    for (size_t i = begin + 1; i < end; i++) {
+      JXL_DASSERT(i >= pivot_end);
+      JXL_DASSERT(pivot_end > pivot_begin);
+      int32_t cmp_result = cmp(i, pivot_begin);
+      if (cmp_result < 0) {  // i < pivot, move pivot forward and put i before
+                             // the pivot.
+        tree_samples.ThreeShuffle(pivot_begin, pivot_end, i);
+        pivot_begin++;
+        pivot_end++;
+      } else if (cmp_result == 0) {
+        tree_samples.Swap(pivot_end, i);
+        pivot_end++;
+      }
+    }
+    JXL_DASSERT(pivot_begin >= begin);
+    JXL_DASSERT(pivot_end > pivot_begin);
+    JXL_DASSERT(pivot_end <= end);
+    for (size_t i = begin; i < pivot_begin; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) < 0);
+    }
+    for (size_t i = pivot_end; i < end; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) > 0);
+    }
+    for (size_t i = pivot_begin; i < pivot_end; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) == 0);
+    }
+    // We now have that [begin, pivot_begin) is < pivot, [pivot_begin,
+    // pivot_end) is = pivot, and [pivot_end, end) is > pivot.
+    // If pos falls in the first or the last interval, we continue in that
+    // interval; otherwise, we are done.
+    if (pivot_begin > pos) {
+      end = pivot_begin;
+    } else if (pivot_end < pos) {
+      begin = pivot_end;
+    } else {
+      break;
+    }
+  }
+}
+
+void FindBestSplit(TreeSamples &tree_samples, float threshold,
+                   const std::vector<ModularMultiplierInfo> &mul_info,
+                   StaticPropRange initial_static_prop_range,
+                   float fast_decode_multiplier, Tree *tree) {
+  struct NodeInfo {
+    size_t pos;
+    size_t begin;
+    size_t end;
+    uint64_t used_properties;
+    StaticPropRange static_prop_range;
+  };
+  std::vector<NodeInfo> nodes;
+  nodes.push_back(NodeInfo{0, 0, tree_samples.NumDistinctSamples(), 0,
+                           initial_static_prop_range});
+
+  size_t num_predictors = tree_samples.NumPredictors();
+  size_t num_properties = tree_samples.NumProperties();
+
+  // TODO(veluca): consider parallelizing the search (processing multiple nodes
+  // at a time).
+  while (!nodes.empty()) {
+    size_t pos = nodes.back().pos;
+    size_t begin = nodes.back().begin;
+    size_t end = nodes.back().end;
+    uint64_t used_properties = nodes.back().used_properties;
+    StaticPropRange static_prop_range = nodes.back().static_prop_range;
+    nodes.pop_back();
+    if (begin == end) continue;
+
+    struct SplitInfo {
+      size_t prop = 0;
+      uint32_t val = 0;
+      size_t pos = 0;
+      float lcost = std::numeric_limits<float>::max();
+      float rcost = std::numeric_limits<float>::max();
+      Predictor lpred = Predictor::Zero;
+      Predictor rpred = Predictor::Zero;
+      float Cost() { return lcost + rcost; }
+    };
+
+    SplitInfo best_split_static_constant;
+    SplitInfo best_split_static;
+    SplitInfo best_split_nonstatic;
+    SplitInfo best_split_nowp;
+
+    JXL_DASSERT(begin <= end);
+    JXL_DASSERT(end <= tree_samples.NumDistinctSamples());
+
+    // Compute the maximum token in the range.
+    size_t max_symbols = 0;
+    for (size_t pred = 0; pred < num_predictors; pred++) {
+      for (size_t i = begin; i < end; i++) {
+        uint32_t tok = tree_samples.Token(pred, i);
+        max_symbols = max_symbols > tok + 1 ? max_symbols : tok + 1;
+      }
+    }
+    max_symbols = Padded(max_symbols);
+    std::vector<int32_t> counts(max_symbols * num_predictors);
+    std::vector<uint32_t> tot_extra_bits(num_predictors);
+    for (size_t pred = 0; pred < num_predictors; pred++) {
+      for (size_t i = begin; i < end; i++) {
+        counts[pred * max_symbols + tree_samples.Token(pred, i)] +=
+            tree_samples.Count(i);
+        tot_extra_bits[pred] +=
+            tree_samples.NBits(pred, i) * tree_samples.Count(i);
+      }
+    }
+
+    float base_bits;
+    {
+      size_t pred = tree_samples.PredictorIndex((*tree)[pos].predictor);
+      base_bits =
+          EstimateBits(counts.data() + pred * max_symbols, max_symbols) +
+          tot_extra_bits[pred];
+    }
+
+    SplitInfo *best = &best_split_nonstatic;
+
+    SplitInfo forced_split;
+    // The multiplier ranges cut halfway through the current ranges of static
+    // properties. We do this even if the current node is not a leaf, to
+    // minimize the number of nodes in the resulting tree.
+    for (size_t i = 0; i < mul_info.size(); i++) {
+      uint32_t axis, val;
+      IntersectionType t =
+          BoxIntersects(static_prop_range, mul_info[i].range, axis, val);
+      if (t == IntersectionType::kNone) continue;
+      if (t == IntersectionType::kInside) {
+        (*tree)[pos].multiplier = mul_info[i].multiplier;
+        break;
+      }
+      if (t == IntersectionType::kPartial) {
+        forced_split.val = tree_samples.QuantizeProperty(axis, val);
+        forced_split.prop = axis;
+        forced_split.lcost = forced_split.rcost = base_bits / 2 - threshold;
+        forced_split.lpred = forced_split.rpred = (*tree)[pos].predictor;
+        best = &forced_split;
+        best->pos = begin;
+        JXL_ASSERT(best->prop == tree_samples.PropertyFromIndex(best->prop));
+        for (size_t x = begin; x < end; x++) {
+          if (tree_samples.Property(best->prop, x) <= best->val) {
+            best->pos++;
+          }
+        }
+        break;
+      }
+    }
+
+    if (best != &forced_split) {
+      std::vector<int> prop_value_used_count;
+      std::vector<int> count_increase;
+      std::vector<size_t> extra_bits_increase;
+      // For each property, compute which of its values are used, and what
+      // tokens correspond to those usages. Then, iterate through the values,
+      // and compute the entropy of each side of the split (of the form `prop >
+      // threshold`). Finally, find the split that minimizes the cost.
+      struct CostInfo {
+        float cost = std::numeric_limits<float>::max();
+        float extra_cost = 0;
+        float Cost() const { return cost + extra_cost; }
+        Predictor pred;  // will be uninitialized in some cases, but never used.
+      };
+      std::vector<CostInfo> costs_l;
+      std::vector<CostInfo> costs_r;
+
+      std::vector<int32_t> counts_above(max_symbols);
+      std::vector<int32_t> counts_below(max_symbols);
+
+      // The lower the threshold, the higher the expected noisiness of the
+      // estimate. Thus, discourage changing predictors.
+      float change_pred_penalty = 800.0f / (100.0f + threshold);
+      for (size_t prop = 0; prop < num_properties && base_bits > threshold;
+           prop++) {
+        costs_l.clear();
+        costs_r.clear();
+        size_t prop_size = tree_samples.NumPropertyValues(prop);
+        if (extra_bits_increase.size() < prop_size) {
+          count_increase.resize(prop_size * max_symbols);
+          extra_bits_increase.resize(prop_size);
+        }
+        // Clear prop_value_used_count (which cannot be cleared "on the go")
+        prop_value_used_count.clear();
+        prop_value_used_count.resize(prop_size);
+
+        size_t first_used = prop_size;
+        size_t last_used = 0;
+
+        // TODO(veluca): consider finding multiple splits along a single
+        // property at the same time, possibly with a bottom-up approach.
+        for (size_t i = begin; i < end; i++) {
+          size_t p = tree_samples.Property(prop, i);
+          prop_value_used_count[p]++;
+          last_used = std::max(last_used, p);
+          first_used = std::min(first_used, p);
+        }
+        costs_l.resize(last_used - first_used);
+        costs_r.resize(last_used - first_used);
+        // For all predictors, compute the right and left costs of each split.
+        for (size_t pred = 0; pred < num_predictors; pred++) {
+          // Compute cost and histogram increments for each property value.
+          for (size_t i = begin; i < end; i++) {
+            size_t p = tree_samples.Property(prop, i);
+            size_t cnt = tree_samples.Count(i);
+            size_t sym = tree_samples.Token(pred, i);
+            count_increase[p * max_symbols + sym] += cnt;
+            extra_bits_increase[p] += tree_samples.NBits(pred, i) * cnt;
+          }
+          memcpy(counts_above.data(), counts.data() + pred * max_symbols,
+                 max_symbols * sizeof counts_above[0]);
+          memset(counts_below.data(), 0, max_symbols * sizeof counts_below[0]);
+          size_t extra_bits_below = 0;
+          // Exclude last used: this ensures neither counts_above nor
+          // counts_below is empty.
+          for (size_t i = first_used; i < last_used; i++) {
+            if (!prop_value_used_count[i]) continue;
+            extra_bits_below += extra_bits_increase[i];
+            // The increase for this property value has been used, and will not
+            // be used again: clear it. Also below.
+            extra_bits_increase[i] = 0;
+            for (size_t sym = 0; sym < max_symbols; sym++) {
+              counts_above[sym] -= count_increase[i * max_symbols + sym];
+              counts_below[sym] += count_increase[i * max_symbols + sym];
+              count_increase[i * max_symbols + sym] = 0;
+            }
+            float rcost = EstimateBits(counts_above.data(), max_symbols) +
+                          tot_extra_bits[pred] - extra_bits_below;
+            float lcost = EstimateBits(counts_below.data(), max_symbols) +
+                          extra_bits_below;
+            JXL_DASSERT(extra_bits_below <= tot_extra_bits[pred]);
+            float penalty = 0;
+            // Never discourage moving away from the Weighted predictor.
+            if (tree_samples.PredictorFromIndex(pred) !=
+                    (*tree)[pos].predictor &&
+                (*tree)[pos].predictor != Predictor::Weighted) {
+              penalty = change_pred_penalty;
+            }
+            // If everything else is equal, disfavour Weighted (slower) and
+            // favour Zero (faster if it's the only predictor used in a
+            // group+channel combination)
+            if (tree_samples.PredictorFromIndex(pred) == Predictor::Weighted) {
+              penalty += 1e-8;
+            }
+            if (tree_samples.PredictorFromIndex(pred) == Predictor::Zero) {
+              penalty -= 1e-8;
+            }
+            if (rcost + penalty < costs_r[i - first_used].Cost()) {
+              costs_r[i - first_used].cost = rcost;
+              costs_r[i - first_used].extra_cost = penalty;
+              costs_r[i - first_used].pred =
+                  tree_samples.PredictorFromIndex(pred);
+            }
+            if (lcost + penalty < costs_l[i - first_used].Cost()) {
+              costs_l[i - first_used].cost = lcost;
+              costs_l[i - first_used].extra_cost = penalty;
+              costs_l[i - first_used].pred =
+                  tree_samples.PredictorFromIndex(pred);
+            }
+          }
+        }
+        // Iterate through the possible splits and find the one with minimum sum
+        // of costs of the two sides.
+        size_t split = begin;
+        for (size_t i = first_used; i < last_used; i++) {
+          if (!prop_value_used_count[i]) continue;
+          split += prop_value_used_count[i];
+          float rcost = costs_r[i - first_used].cost;
+          float lcost = costs_l[i - first_used].cost;
+          // WP was not used + we would use the WP property or predictor
+          bool adds_wp =
+              (tree_samples.PropertyFromIndex(prop) == kWPProp &&
+               (used_properties & (1LU << prop)) == 0) ||
+              ((costs_l[i - first_used].pred == Predictor::Weighted ||
+                costs_r[i - first_used].pred == Predictor::Weighted) &&
+               (*tree)[pos].predictor != Predictor::Weighted);
+          bool zero_entropy_side = rcost == 0 || lcost == 0;
+
+          SplitInfo &best =
+              prop < kNumStaticProperties
+                  ? (zero_entropy_side ? best_split_static_constant
+                                       : best_split_static)
+                  : (adds_wp ? best_split_nonstatic : best_split_nowp);
+          if (lcost + rcost < best.Cost()) {
+            best.prop = prop;
+            best.val = i;
+            best.pos = split;
+            best.lcost = lcost;
+            best.lpred = costs_l[i - first_used].pred;
+            best.rcost = rcost;
+            best.rpred = costs_r[i - first_used].pred;
+          }
+        }
+        // Clear extra_bits_increase and cost_increase for last_used.
+        extra_bits_increase[last_used] = 0;
+        for (size_t sym = 0; sym < max_symbols; sym++) {
+          count_increase[last_used * max_symbols + sym] = 0;
+        }
+      }
+
+      // Try to avoid introducing WP.
+      if (best_split_nowp.Cost() + threshold < base_bits &&
+          best_split_nowp.Cost() <= fast_decode_multiplier * best->Cost()) {
+        best = &best_split_nowp;
+      }
+      // Split along static props if possible and not significantly more
+      // expensive.
+      if (best_split_static.Cost() + threshold < base_bits &&
+          best_split_static.Cost() <= fast_decode_multiplier * best->Cost()) {
+        best = &best_split_static;
+      }
+      // Split along static props to create constant nodes if possible.
+      if (best_split_static_constant.Cost() + threshold < base_bits) {
+        best = &best_split_static_constant;
+      }
+    }
+
+    if (best->Cost() + threshold < base_bits) {
+      uint32_t p = tree_samples.PropertyFromIndex(best->prop);
+      pixel_type dequant =
+          tree_samples.UnquantizeProperty(best->prop, best->val);
+      // Split node and try to split children.
+      MakeSplitNode(pos, p, dequant, best->lpred, 0, best->rpred, 0, tree);
+      // "Sort" according to winning property
+      SplitTreeSamples(tree_samples, begin, best->pos, end, best->prop);
+      if (p >= kNumStaticProperties) {
+        used_properties |= 1 << best->prop;
+      }
+      auto new_sp_range = static_prop_range;
+      if (p < kNumStaticProperties) {
+        JXL_ASSERT(static_cast<uint32_t>(dequant + 1) <= new_sp_range[p][1]);
+        new_sp_range[p][1] = dequant + 1;
+        JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+      }
+      nodes.push_back(NodeInfo{(*tree)[pos].rchild, begin, best->pos,
+                               used_properties, new_sp_range});
+      new_sp_range = static_prop_range;
+      if (p < kNumStaticProperties) {
+        JXL_ASSERT(new_sp_range[p][0] <= static_cast<uint32_t>(dequant + 1));
+        new_sp_range[p][0] = dequant + 1;
+        JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+      }
+      nodes.push_back(NodeInfo{(*tree)[pos].lchild, best->pos, end,
+                               used_properties, new_sp_range});
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FindBestSplit);  // Local function.
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+                     const std::vector<ModularMultiplierInfo> &mul_info,
+                     StaticPropRange static_prop_range,
+                     float fast_decode_multiplier, Tree *tree) {
+  // TODO(veluca): take into account that different contexts can have different
+  // uint configs.
+  //
+  // Initialize tree.
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = tree_samples.PredictorFromIndex(0);
+  tree->back().predictor_offset = 0;
+  tree->back().multiplier = 1;
+  JXL_ASSERT(tree_samples.NumProperties() < 64);
+
+  JXL_ASSERT(tree_samples.NumDistinctSamples() <=
+             std::numeric_limits<uint32_t>::max());
+  HWY_DYNAMIC_DISPATCH(FindBestSplit)
+  (tree_samples, threshold, mul_info, static_prop_range, fast_decode_multiplier,
+   tree);
+}
+
+constexpr int32_t TreeSamples::kPropertyRange;
+constexpr uint32_t TreeSamples::kDedupEntryUnused;
+
+Status TreeSamples::SetPredictor(Predictor predictor,
+                                 ModularOptions::TreeMode wp_tree_mode) {
+  if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+    predictors = {Predictor::Weighted};
+    residuals.resize(1);
+    return true;
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP &&
+      predictor == Predictor::Weighted) {
+    return JXL_FAILURE("Invalid predictor settings");
+  }
+  if (predictor == Predictor::Variable) {
+    for (size_t i = 0; i < kNumModularPredictors; i++) {
+      predictors.push_back(static_cast<Predictor>(i));
+    }
+    std::swap(predictors[0], predictors[static_cast<int>(Predictor::Weighted)]);
+    std::swap(predictors[1], predictors[static_cast<int>(Predictor::Gradient)]);
+  } else if (predictor == Predictor::Best) {
+    predictors = {Predictor::Weighted, Predictor::Gradient};
+  } else {
+    predictors = {predictor};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+    auto wp_it =
+        std::find(predictors.begin(), predictors.end(), Predictor::Weighted);
+    if (wp_it != predictors.end()) {
+      predictors.erase(wp_it);
+    }
+  }
+  residuals.resize(predictors.size());
+  return true;
+}
+
+Status TreeSamples::SetProperties(const std::vector<uint32_t> &properties,
+                                  ModularOptions::TreeMode wp_tree_mode) {
+  props_to_use = properties;
+  if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+    props_to_use = {static_cast<uint32_t>(kWPProp)};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kGradientOnly) {
+    props_to_use = {static_cast<uint32_t>(kGradientProp)};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+    auto it = std::find(props_to_use.begin(), props_to_use.end(), kWPProp);
+    if (it != props_to_use.end()) {
+      props_to_use.erase(it);
+    }
+  }
+  if (props_to_use.empty()) {
+    return JXL_FAILURE("Invalid property set configuration");
+  }
+  props.resize(props_to_use.size());
+  return true;
+}
+
+void TreeSamples::InitTable(size_t size) {
+  JXL_DASSERT((size & (size - 1)) == 0);
+  if (dedup_table_.size() == size) return;
+  dedup_table_.resize(size, kDedupEntryUnused);
+  for (size_t i = 0; i < NumDistinctSamples(); i++) {
+    if (sample_counts[i] != std::numeric_limits<uint16_t>::max()) {
+      AddToTable(i);
+    }
+  }
+}
+
+bool TreeSamples::AddToTableAndMerge(size_t a) {
+  size_t pos1 = Hash1(a);
+  size_t pos2 = Hash2(a);
+  if (dedup_table_[pos1] != kDedupEntryUnused &&
+      IsSameSample(a, dedup_table_[pos1])) {
+    JXL_DASSERT(sample_counts[a] == 1);
+    sample_counts[dedup_table_[pos1]]++;
+    // Remove from hash table samples that are saturated.
+    if (sample_counts[dedup_table_[pos1]] ==
+        std::numeric_limits<uint16_t>::max()) {
+      dedup_table_[pos1] = kDedupEntryUnused;
+    }
+    return true;
+  }
+  if (dedup_table_[pos2] != kDedupEntryUnused &&
+      IsSameSample(a, dedup_table_[pos2])) {
+    JXL_DASSERT(sample_counts[a] == 1);
+    sample_counts[dedup_table_[pos2]]++;
+    // Remove from hash table samples that are saturated.
+    if (sample_counts[dedup_table_[pos2]] ==
+        std::numeric_limits<uint16_t>::max()) {
+      dedup_table_[pos2] = kDedupEntryUnused;
+    }
+    return true;
+  }
+  AddToTable(a);
+  return false;
+}
+
+void TreeSamples::AddToTable(size_t a) {
+  size_t pos1 = Hash1(a);
+  size_t pos2 = Hash2(a);
+  if (dedup_table_[pos1] == kDedupEntryUnused) {
+    dedup_table_[pos1] = a;
+  } else if (dedup_table_[pos2] == kDedupEntryUnused) {
+    dedup_table_[pos2] = a;
+  }
+}
+
+void TreeSamples::PrepareForSamples(size_t num_samples) {
+  for (auto &res : residuals) {
+    res.reserve(res.size() + num_samples);
+  }
+  for (auto &p : props) {
+    p.reserve(p.size() + num_samples);
+  }
+  size_t total_num_samples = num_samples + sample_counts.size();
+  size_t next_pow2 = 1LLU << CeilLog2Nonzero(total_num_samples * 3 / 2);
+  InitTable(next_pow2);
+}
+
+size_t TreeSamples::Hash1(size_t a) const {
+  constexpr uint64_t constant = 0x1e35a7bd;
+  uint64_t h = constant;
+  for (const auto &r : residuals) {
+    h = h * constant + r[a].tok;
+    h = h * constant + r[a].nbits;
+  }
+  for (const auto &p : props) {
+    h = h * constant + p[a];
+  }
+  return (h >> 16) & (dedup_table_.size() - 1);
+}
+size_t TreeSamples::Hash2(size_t a) const {
+  constexpr uint64_t constant = 0x1e35a7bd1e35a7bd;
+  uint64_t h = constant;
+  for (const auto &p : props) {
+    h = h * constant ^ p[a];
+  }
+  for (const auto &r : residuals) {
+    h = h * constant ^ r[a].tok;
+    h = h * constant ^ r[a].nbits;
+  }
+  return (h >> 16) & (dedup_table_.size() - 1);
+}
+
+bool TreeSamples::IsSameSample(size_t a, size_t b) const {
+  bool ret = true;
+  for (const auto &r : residuals) {
+    if (r[a].tok != r[b].tok) {
+      ret = false;
+    }
+    if (r[a].nbits != r[b].nbits) {
+      ret = false;
+    }
+  }
+  for (const auto &p : props) {
+    if (p[a] != p[b]) {
+      ret = false;
+    }
+  }
+  return ret;
+}
+
+void TreeSamples::AddSample(pixel_type_w pixel, const Properties &properties,
+                            const pixel_type_w *predictions) {
+  for (size_t i = 0; i < predictors.size(); i++) {
+    pixel_type v = pixel - predictions[static_cast<int>(predictors[i])];
+    uint32_t tok, nbits, bits;
+    HybridUintConfig(4, 1, 2).Encode(PackSigned(v), &tok, &nbits, &bits);
+    JXL_DASSERT(tok < 256);
+    JXL_DASSERT(nbits < 256);
+    residuals[i].emplace_back(
+        ResidualToken{static_cast<uint8_t>(tok), static_cast<uint8_t>(nbits)});
+  }
+  for (size_t i = 0; i < props_to_use.size(); i++) {
+    props[i].push_back(QuantizeProperty(i, properties[props_to_use[i]]));
+  }
+  sample_counts.push_back(1);
+  num_samples++;
+  if (AddToTableAndMerge(sample_counts.size() - 1)) {
+    for (auto &r : residuals) r.pop_back();
+    for (auto &p : props) p.pop_back();
+    sample_counts.pop_back();
+  }
+}
+
+void TreeSamples::Swap(size_t a, size_t b) {
+  if (a == b) return;
+  for (auto &r : residuals) {
+    std::swap(r[a], r[b]);
+  }
+  for (auto &p : props) {
+    std::swap(p[a], p[b]);
+  }
+  std::swap(sample_counts[a], sample_counts[b]);
+}
+
+void TreeSamples::ThreeShuffle(size_t a, size_t b, size_t c) {
+  if (b == c) return Swap(a, b);
+  for (auto &r : residuals) {
+    auto tmp = r[a];
+    r[a] = r[c];
+    r[c] = r[b];
+    r[b] = tmp;
+  }
+  for (auto &p : props) {
+    auto tmp = p[a];
+    p[a] = p[c];
+    p[c] = p[b];
+    p[b] = tmp;
+  }
+  auto tmp = sample_counts[a];
+  sample_counts[a] = sample_counts[c];
+  sample_counts[c] = sample_counts[b];
+  sample_counts[b] = tmp;
+}
+
+namespace {
+std::vector<int32_t> QuantizeHistogram(const std::vector<uint32_t> &histogram,
+                                       size_t num_chunks) {
+  if (histogram.empty()) return {};
+  // TODO(veluca): selecting distinct quantiles is likely not the best
+  // way to go about this.
+  std::vector<int32_t> thresholds;
+  uint64_t sum = std::accumulate(histogram.begin(), histogram.end(), 0LU);
+  uint64_t cumsum = 0;
+  uint64_t threshold = 1;
+  for (size_t i = 0; i + 1 < histogram.size(); i++) {
+    cumsum += histogram[i];
+    if (cumsum >= threshold * sum / num_chunks) {
+      thresholds.push_back(i);
+      while (cumsum > threshold * sum / num_chunks) threshold++;
+    }
+  }
+  return thresholds;
+}
+
+std::vector<int32_t> QuantizeSamples(const std::vector<int32_t> &samples,
+                                     size_t num_chunks) {
+  if (samples.empty()) return {};
+  int min = *std::min_element(samples.begin(), samples.end());
+  constexpr int kRange = 512;
+  min = std::min(std::max(min, -kRange), kRange);
+  std::vector<uint32_t> counts(2 * kRange + 1);
+  for (int s : samples) {
+    uint32_t sample_offset = std::min(std::max(s, -kRange), kRange) - min;
+    counts[sample_offset]++;
+  }
+  std::vector<int32_t> thresholds = QuantizeHistogram(counts, num_chunks);
+  for (auto &v : thresholds) v += min;
+  return thresholds;
+}
+}  // namespace
+
+void TreeSamples::PreQuantizeProperties(
+    const StaticPropRange &range,
+    const std::vector<ModularMultiplierInfo> &multiplier_info,
+    const std::vector<uint32_t> &group_pixel_count,
+    const std::vector<uint32_t> &channel_pixel_count,
+    std::vector<pixel_type> &pixel_samples,
+    std::vector<pixel_type> &diff_samples, size_t max_property_values) {
+  // If we have forced splits because of multipliers, choose channel and group
+  // thresholds accordingly.
+  std::vector<int32_t> group_multiplier_thresholds;
+  std::vector<int32_t> channel_multiplier_thresholds;
+  for (const auto &v : multiplier_info) {
+    if (v.range[0][0] != range[0][0]) {
+      channel_multiplier_thresholds.push_back(v.range[0][0] - 1);
+    }
+    if (v.range[0][1] != range[0][1]) {
+      channel_multiplier_thresholds.push_back(v.range[0][1] - 1);
+    }
+    if (v.range[1][0] != range[1][0]) {
+      group_multiplier_thresholds.push_back(v.range[1][0] - 1);
+    }
+    if (v.range[1][1] != range[1][1]) {
+      group_multiplier_thresholds.push_back(v.range[1][1] - 1);
+    }
+  }
+  std::sort(channel_multiplier_thresholds.begin(),
+            channel_multiplier_thresholds.end());
+  channel_multiplier_thresholds.resize(
+      std::unique(channel_multiplier_thresholds.begin(),
+                  channel_multiplier_thresholds.end()) -
+      channel_multiplier_thresholds.begin());
+  std::sort(group_multiplier_thresholds.begin(),
+            group_multiplier_thresholds.end());
+  group_multiplier_thresholds.resize(
+      std::unique(group_multiplier_thresholds.begin(),
+                  group_multiplier_thresholds.end()) -
+      group_multiplier_thresholds.begin());
+
+  compact_properties.resize(props_to_use.size());
+  auto quantize_channel = [&]() {
+    if (!channel_multiplier_thresholds.empty()) {
+      return channel_multiplier_thresholds;
+    }
+    return QuantizeHistogram(channel_pixel_count, max_property_values);
+  };
+  auto quantize_group_id = [&]() {
+    if (!group_multiplier_thresholds.empty()) {
+      return group_multiplier_thresholds;
+    }
+    return QuantizeHistogram(group_pixel_count, max_property_values);
+  };
+  auto quantize_coordinate = [&]() {
+    std::vector<int32_t> quantized;
+    quantized.reserve(max_property_values - 1);
+    for (size_t i = 0; i + 1 < max_property_values; i++) {
+      quantized.push_back((i + 1) * 256 / max_property_values - 1);
+    }
+    return quantized;
+  };
+  std::vector<int32_t> abs_pixel_thr;
+  std::vector<int32_t> pixel_thr;
+  auto quantize_pixel_property = [&]() {
+    if (pixel_thr.empty()) {
+      pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+    }
+    return pixel_thr;
+  };
+  auto quantize_abs_pixel_property = [&]() {
+    if (abs_pixel_thr.empty()) {
+      quantize_pixel_property();  // Compute the non-abs thresholds.
+      for (auto &v : pixel_samples) v = std::abs(v);
+      abs_pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+    }
+    return abs_pixel_thr;
+  };
+  std::vector<int32_t> abs_diff_thr;
+  std::vector<int32_t> diff_thr;
+  auto quantize_diff_property = [&]() {
+    if (diff_thr.empty()) {
+      diff_thr = QuantizeSamples(diff_samples, max_property_values);
+    }
+    return diff_thr;
+  };
+  auto quantize_abs_diff_property = [&]() {
+    if (abs_diff_thr.empty()) {
+      quantize_diff_property();  // Compute the non-abs thresholds.
+      for (auto &v : diff_samples) v = std::abs(v);
+      abs_diff_thr = QuantizeSamples(diff_samples, max_property_values);
+    }
+    return abs_diff_thr;
+  };
+  auto quantize_wp = [&]() {
+    if (max_property_values < 32) {
+      return std::vector<int32_t>{-127, -63, -31, -15, -7, -3, -1, 0,
+                                  1,    3,   7,   15,  31, 63, 127};
+    }
+    if (max_property_values < 64) {
+      return std::vector<int32_t>{-255, -191, -127, -95, -63, -47, -31, -23,
+                                  -15,  -11,  -7,   -5,  -3,  -1,  0,   1,
+                                  3,    5,    7,    11,  15,  23,  31,  47,
+                                  63,   95,   127,  191, 255};
+    }
+    return std::vector<int32_t>{
+        -255, -223, -191, -159, -127, -111, -95, -79, -63, -55, -47,
+        -39,  -31,  -27,  -23,  -19,  -15,  -13, -11, -9,  -7,  -6,
+        -5,   -4,   -3,   -2,   -1,   0,    1,   2,   3,   4,   5,
+        6,    7,    9,    11,   13,   15,   19,  23,  27,  31,  39,
+        47,   55,   63,   79,   95,   111,  127, 159, 191, 223, 255};
+  };
+
+  property_mapping.resize(props_to_use.size());
+  for (size_t i = 0; i < props_to_use.size(); i++) {
+    if (props_to_use[i] == 0) {
+      compact_properties[i] = quantize_channel();
+    } else if (props_to_use[i] == 1) {
+      compact_properties[i] = quantize_group_id();
+    } else if (props_to_use[i] == 2 || props_to_use[i] == 3) {
+      compact_properties[i] = quantize_coordinate();
+    } else if (props_to_use[i] == 6 || props_to_use[i] == 7 ||
+               props_to_use[i] == 8 ||
+               (props_to_use[i] >= kNumNonrefProperties &&
+                (props_to_use[i] - kNumNonrefProperties) % 4 == 1)) {
+      compact_properties[i] = quantize_pixel_property();
+    } else if (props_to_use[i] == 4 || props_to_use[i] == 5 ||
+               (props_to_use[i] >= kNumNonrefProperties &&
+                (props_to_use[i] - kNumNonrefProperties) % 4 == 0)) {
+      compact_properties[i] = quantize_abs_pixel_property();
+    } else if (props_to_use[i] >= kNumNonrefProperties &&
+               (props_to_use[i] - kNumNonrefProperties) % 4 == 2) {
+      compact_properties[i] = quantize_abs_diff_property();
+    } else if (props_to_use[i] == kWPProp) {
+      compact_properties[i] = quantize_wp();
+    } else {
+      compact_properties[i] = quantize_diff_property();
+    }
+    property_mapping[i].resize(kPropertyRange * 2 + 1);
+    size_t mapped = 0;
+    for (size_t j = 0; j < property_mapping[i].size(); j++) {
+      while (mapped < compact_properties[i].size() &&
+             static_cast<int>(j) - kPropertyRange >
+                 compact_properties[i][mapped]) {
+        mapped++;
+      }
+      // property_mapping[i] of a value V is `mapped` if
+      // compact_properties[i][mapped] <= j and
+      // compact_properties[i][mapped-1] > j
+      // This is because the decision node in the tree splits on (property) > j,
+      // hence everything that is not > of a threshold should be clustered
+      // together.
+      property_mapping[i][j] = mapped;
+    }
+  }
+}
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+                         size_t group_id,
+                         std::vector<uint32_t> &group_pixel_count,
+                         std::vector<uint32_t> &channel_pixel_count,
+                         std::vector<pixel_type> &pixel_samples,
+                         std::vector<pixel_type> &diff_samples) {
+  if (options.nb_repeats == 0) return;
+  if (group_pixel_count.size() <= group_id) {
+    group_pixel_count.resize(group_id + 1);
+  }
+  if (channel_pixel_count.size() < image.channel.size()) {
+    channel_pixel_count.resize(image.channel.size());
+  }
+  Rng rng(group_id);
+  // Sample 10% of the final number of samples for property quantization.
+  float fraction = std::min(options.nb_repeats * 0.1, 0.99);
+  Rng::GeometricDistribution dist(fraction);
+  size_t total_pixels = 0;
+  std::vector<size_t> channel_ids;
+  for (size_t i = 0; i < image.channel.size(); i++) {
+    if (image.channel[i].w <= 1 || image.channel[i].h == 0) {
+      continue;  // skip empty or width-1 channels.
+    }
+    if (i >= image.nb_meta_channels &&
+        (image.channel[i].w > options.max_chan_size ||
+         image.channel[i].h > options.max_chan_size)) {
+      break;
+    }
+    channel_ids.push_back(i);
+    group_pixel_count[group_id] += image.channel[i].w * image.channel[i].h;
+    channel_pixel_count[i] += image.channel[i].w * image.channel[i].h;
+    total_pixels += image.channel[i].w * image.channel[i].h;
+  }
+  if (channel_ids.empty()) return;
+  pixel_samples.reserve(pixel_samples.size() + fraction * total_pixels);
+  diff_samples.reserve(diff_samples.size() + fraction * total_pixels);
+  size_t i = 0;
+  size_t y = 0;
+  size_t x = 0;
+  auto advance = [&](size_t amount) {
+    x += amount;
+    // Detect row overflow (rare).
+    while (x >= image.channel[channel_ids[i]].w) {
+      x -= image.channel[channel_ids[i]].w;
+      y++;
+      // Detect end-of-channel (even rarer).
+      if (y == image.channel[channel_ids[i]].h) {
+        i++;
+        y = 0;
+        if (i >= channel_ids.size()) {
+          return;
+        }
+      }
+    }
+  };
+  advance(rng.Geometric(dist));
+  for (; i < channel_ids.size(); advance(rng.Geometric(dist) + 1)) {
+    const pixel_type *row = image.channel[channel_ids[i]].Row(y);
+    pixel_samples.push_back(row[x]);
+    size_t xp = x == 0 ? 1 : x - 1;
+    diff_samples.push_back((int64_t)row[x] - row[xp]);
+  }
+}
+
+// TODO(veluca): very simple encoding scheme. This should be improved.
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+                  Tree *decoder_tree) {
+  JXL_ASSERT(tree.size() <= kMaxTreeSize);
+  std::queue<int> q;
+  q.push(0);
+  size_t leaf_id = 0;
+  decoder_tree->clear();
+  while (!q.empty()) {
+    int cur = q.front();
+    q.pop();
+    JXL_ASSERT(tree[cur].property >= -1);
+    tokens->emplace_back(kPropertyContext, tree[cur].property + 1);
+    if (tree[cur].property == -1) {
+      tokens->emplace_back(kPredictorContext,
+                           static_cast<int>(tree[cur].predictor));
+      tokens->emplace_back(kOffsetContext,
+                           PackSigned(tree[cur].predictor_offset));
+      uint32_t mul_log = Num0BitsBelowLS1Bit_Nonzero(tree[cur].multiplier);
+      uint32_t mul_bits = (tree[cur].multiplier >> mul_log) - 1;
+      tokens->emplace_back(kMultiplierLogContext, mul_log);
+      tokens->emplace_back(kMultiplierBitsContext, mul_bits);
+      JXL_ASSERT(tree[cur].predictor < Predictor::Best);
+      decoder_tree->emplace_back(-1, 0, leaf_id++, 0, tree[cur].predictor,
+                                 tree[cur].predictor_offset,
+                                 tree[cur].multiplier);
+      continue;
+    }
+    decoder_tree->emplace_back(tree[cur].property, tree[cur].splitval,
+                               decoder_tree->size() + q.size() + 1,
+                               decoder_tree->size() + q.size() + 2,
+                               Predictor::Zero, 0, 1);
+    q.push(tree[cur].lchild);
+    q.push(tree[cur].rchild);
+    tokens->emplace_back(kSplitValContext, PackSigned(tree[cur].splitval));
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h
new file mode 100644
index 0000000000..ede37c8023
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+
+#include <numeric>
+
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Struct to collect all the data needed to build a tree.
+struct TreeSamples {
+  bool HasSamples() const {
+    return !residuals.empty() && !residuals[0].empty();
+  }
+  size_t NumDistinctSamples() const { return sample_counts.size(); }
+  size_t NumSamples() const { return num_samples; }
+  // Set the predictor to use. Must be called before adding any samples.
+  Status SetPredictor(Predictor predictor,
+                      ModularOptions::TreeMode wp_tree_mode);
+  // Set the properties to use. Must be called before adding any samples.
+  Status SetProperties(const std::vector<uint32_t> &properties,
+                       ModularOptions::TreeMode wp_tree_mode);
+
+  size_t Token(size_t pred, size_t i) const { return residuals[pred][i].tok; }
+  size_t NBits(size_t pred, size_t i) const { return residuals[pred][i].nbits; }
+  size_t Count(size_t i) const { return sample_counts[i]; }
+  size_t PredictorIndex(Predictor predictor) const {
+    const auto predictor_elem =
+        std::find(predictors.begin(), predictors.end(), predictor);
+    JXL_DASSERT(predictor_elem != predictors.end());
+    return predictor_elem - predictors.begin();
+  }
+  size_t PropertyIndex(size_t property) const {
+    const auto property_elem =
+        std::find(props_to_use.begin(), props_to_use.end(), property);
+    JXL_DASSERT(property_elem != props_to_use.end());
+    return property_elem - props_to_use.begin();
+  }
+  size_t NumPropertyValues(size_t property_index) const {
+    return compact_properties[property_index].size() + 1;
+  }
+  // Returns the *quantized* property value.
+  size_t Property(size_t property_index, size_t i) const {
+    return props[property_index][i];
+  }
+  int UnquantizeProperty(size_t property_index, uint32_t quant) const {
+    JXL_ASSERT(quant < compact_properties[property_index].size());
+    return compact_properties[property_index][quant];
+  }
+
+  Predictor PredictorFromIndex(size_t index) const {
+    JXL_DASSERT(index < predictors.size());
+    return predictors[index];
+  }
+  size_t PropertyFromIndex(size_t index) const {
+    JXL_DASSERT(index < props_to_use.size());
+    return props_to_use[index];
+  }
+  size_t NumPredictors() const { return predictors.size(); }
+  size_t NumProperties() const { return props_to_use.size(); }
+
+  // Preallocate data for a given number of samples. MUST be called before
+  // adding any sample.
+  void PrepareForSamples(size_t num_samples);
+  // Add a sample.
+  void AddSample(pixel_type_w pixel, const Properties &properties,
+                 const pixel_type_w *predictions);
+  // Pre-cluster property values.
+  void PreQuantizeProperties(
+      const StaticPropRange &range,
+      const std::vector<ModularMultiplierInfo> &multiplier_info,
+      const std::vector<uint32_t> &group_pixel_count,
+      const std::vector<uint32_t> &channel_pixel_count,
+      std::vector<pixel_type> &pixel_samples,
+      std::vector<pixel_type> &diff_samples, size_t max_property_values);
+
+  void AllSamplesDone() { dedup_table_ = std::vector<uint32_t>(); }
+
+  uint32_t QuantizeProperty(uint32_t prop, pixel_type v) const {
+    v = std::min(std::max(v, -kPropertyRange), kPropertyRange) + kPropertyRange;
+    return property_mapping[prop][v];
+  }
+
+  // Swaps samples in position a and b. Does nothing if a == b.
+  void Swap(size_t a, size_t b);
+
+  // Cycles samples: a -> b -> c -> a. We assume a <= b <= c, so that we can
+  // just call Swap(a, b) if b==c.
+  void ThreeShuffle(size_t a, size_t b, size_t c);
+
+ private:
+  // TODO(veluca): as the total number of properties and predictors are known
+  // before adding any samples, it might be better to interleave predictors,
+  // properties and counts in a single vector to improve locality.
+  // A first attempt at doing this actually results in much slower encoding,
+  // possibly because of the more complex addressing.
+  struct ResidualToken {
+    uint8_t tok;
+    uint8_t nbits;
+  };
+  // Residual information: token and number of extra bits, per predictor.
+  std::vector<std::vector<ResidualToken>> residuals;
+  // Number of occurrences of each sample.
+  std::vector<uint16_t> sample_counts;
+  // Property values, quantized to at most 256 distinct values.
+  std::vector<std::vector<uint8_t>> props;
+  // Decompactification info for `props`.
+  std::vector<std::vector<int32_t>> compact_properties;
+  // List of properties to use.
+  std::vector<uint32_t> props_to_use;
+  // List of predictors to use.
+  std::vector<Predictor> predictors;
+  // Mapping property value -> quantized property value.
+  static constexpr int32_t kPropertyRange = 511;
+  std::vector<std::vector<uint8_t>> property_mapping;
+  // Number of samples seen.
+  size_t num_samples = 0;
+  // Table for deduplication.
+  static constexpr uint32_t kDedupEntryUnused{static_cast<uint32_t>(-1)};
+  std::vector<uint32_t> dedup_table_;
+
+  // Functions for sample deduplication.
+  bool IsSameSample(size_t a, size_t b) const;
+  size_t Hash1(size_t a) const;
+  size_t Hash2(size_t a) const;
+  void InitTable(size_t size);
+  // Returns true if `a` was already present in the table.
+  bool AddToTableAndMerge(size_t a);
+  void AddToTable(size_t a);
+};
+
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+                  Tree *decoder_tree);
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+                         size_t group_id,
+                         std::vector<uint32_t> &group_pixel_count,
+                         std::vector<uint32_t> &channel_pixel_count,
+                         std::vector<pixel_type> &pixel_samples,
+                         std::vector<pixel_type> &diff_samples);
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+                     const std::vector<ModularMultiplierInfo> &mul_info,
+                     StaticPropRange static_prop_range,
+                     float fast_decode_multiplier, Tree *tree);
+
+}  // namespace jxl
+#endif  // LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc
new file mode 100644
index 0000000000..09f21c0cce
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc
@@ -0,0 +1,684 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/encoding.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <queue>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/scope_guard.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Removes all nodes that use a static property (i.e. channel or group ID) from
+// the tree and collapses each node on even levels with its two children to
+// produce a flatter tree. Also computes whether the resulting tree requires
+// using the weighted predictor.
+FlatTree FilterTree(const Tree &global_tree,
+                    std::array<pixel_type, kNumStaticProperties> &static_props,
+                    size_t *num_props, bool *use_wp, bool *wp_only,
+                    bool *gradient_only) {
+  *num_props = 0;
+  bool has_wp = false;
+  bool has_non_wp = false;
+  *gradient_only = true;
+  const auto mark_property = [&](int32_t p) {
+    if (p == kWPProp) {
+      has_wp = true;
+    } else if (p >= kNumStaticProperties) {
+      has_non_wp = true;
+    }
+    if (p >= kNumStaticProperties && p != kGradientProp) {
+      *gradient_only = false;
+    }
+  };
+  FlatTree output;
+  std::queue<size_t> nodes;
+  nodes.push(0);
+  // Produces a trimmed and flattened tree by doing a BFS visit of the original
+  // tree, ignoring branches that are known to be false and proceeding two
+  // levels at a time to collapse nodes in a flatter tree; if an inner parent
+  // node has a leaf as a child, the leaf is duplicated and an implicit fake
+  // node is added. This allows to reduce the number of branches when traversing
+  // the resulting flat tree.
+  while (!nodes.empty()) {
+    size_t cur = nodes.front();
+    nodes.pop();
+    // Skip nodes that we can decide now, by jumping directly to their children.
+    while (global_tree[cur].property < kNumStaticProperties &&
+           global_tree[cur].property != -1) {
+      if (static_props[global_tree[cur].property] > global_tree[cur].splitval) {
+        cur = global_tree[cur].lchild;
+      } else {
+        cur = global_tree[cur].rchild;
+      }
+    }
+    FlatDecisionNode flat;
+    if (global_tree[cur].property == -1) {
+      flat.property0 = -1;
+      flat.childID = global_tree[cur].lchild;
+      flat.predictor = global_tree[cur].predictor;
+      flat.predictor_offset = global_tree[cur].predictor_offset;
+      flat.multiplier = global_tree[cur].multiplier;
+      *gradient_only &= flat.predictor == Predictor::Gradient;
+      has_wp |= flat.predictor == Predictor::Weighted;
+      has_non_wp |= flat.predictor != Predictor::Weighted;
+      output.push_back(flat);
+      continue;
+    }
+    flat.childID = output.size() + nodes.size() + 1;
+
+    flat.property0 = global_tree[cur].property;
+    *num_props = std::max<size_t>(flat.property0 + 1, *num_props);
+    flat.splitval0 = global_tree[cur].splitval;
+
+    for (size_t i = 0; i < 2; i++) {
+      size_t cur_child =
+          i == 0 ? global_tree[cur].lchild : global_tree[cur].rchild;
+      // Skip nodes that we can decide now.
+      while (global_tree[cur_child].property < kNumStaticProperties &&
+             global_tree[cur_child].property != -1) {
+        if (static_props[global_tree[cur_child].property] >
+            global_tree[cur_child].splitval) {
+          cur_child = global_tree[cur_child].lchild;
+        } else {
+          cur_child = global_tree[cur_child].rchild;
+        }
+      }
+      // We ended up in a leaf, add a dummy decision and two copies of the leaf.
+      if (global_tree[cur_child].property == -1) {
+        flat.properties[i] = 0;
+        flat.splitvals[i] = 0;
+        nodes.push(cur_child);
+        nodes.push(cur_child);
+      } else {
+        flat.properties[i] = global_tree[cur_child].property;
+        flat.splitvals[i] = global_tree[cur_child].splitval;
+        nodes.push(global_tree[cur_child].lchild);
+        nodes.push(global_tree[cur_child].rchild);
+        *num_props = std::max<size_t>(flat.properties[i] + 1, *num_props);
+      }
+    }
+
+    for (size_t j = 0; j < 2; j++) mark_property(flat.properties[j]);
+    mark_property(flat.property0);
+    output.push_back(flat);
+  }
+  if (*num_props > kNumNonrefProperties) {
+    *num_props =
+        DivCeil(*num_props - kNumNonrefProperties, kExtraPropsPerChannel) *
+            kExtraPropsPerChannel +
+        kNumNonrefProperties;
+  } else {
+    *num_props = kNumNonrefProperties;
+  }
+  *use_wp = has_wp;
+  *wp_only = has_wp && !has_non_wp;
+
+  return output;
+}
+
+namespace detail {
+template <bool uses_lz77>
+Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader,
+                                 const std::vector<uint8_t> &context_map,
+                                 const Tree &global_tree,
+                                 const weighted::Header &wp_header,
+                                 pixel_type chan, size_t group_id,
+                                 TreeLut<uint8_t, true> &tree_lut,
+                                 Image *image) {
+  Channel &channel = image->channel[chan];
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {
+      {chan, (int)group_id}};
+  // TODO(veluca): filter the tree according to static_props.
+
+  // zero pixel channel? could happen
+  if (channel.w == 0 || channel.h == 0) return true;
+
+  bool tree_has_wp_prop_or_pred = false;
+  bool is_wp_only = false;
+  bool is_gradient_only = false;
+  size_t num_props;
+  FlatTree tree =
+      FilterTree(global_tree, static_props, &num_props,
+                 &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only);
+
+  // From here on, tree lookup returns a *clustered* context ID.
+  // This avoids an extra memory lookup after tree traversal.
+  for (size_t i = 0; i < tree.size(); i++) {
+    if (tree[i].property0 == -1) {
+      tree[i].childID = context_map[tree[i].childID];
+    }
+  }
+
+  JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size());
+
+  // MAANS decode
+  const auto make_pixel = [](uint64_t v, pixel_type multiplier,
+                             pixel_type_w offset) -> pixel_type {
+    JXL_DASSERT((v & 0xFFFFFFFF) == v);
+    pixel_type_w val = UnpackSigned(v);
+    // if it overflows, it overflows, and we have a problem anyway
+    return val * multiplier + offset;
+  };
+
+  if (tree.size() == 1) {
+    // special optimized case: no meta-adaptation, so no need
+    // to compute properties.
+    Predictor predictor = tree[0].predictor;
+    int64_t offset = tree[0].predictor_offset;
+    int32_t multiplier = tree[0].multiplier;
+    size_t ctx_id = tree[0].childID;
+    if (predictor == Predictor::Zero) {
+      uint32_t value;
+      if (reader->IsSingleValueAndAdvance(ctx_id, &value,
+                                          channel.w * channel.h)) {
+        // Special-case: histogram has a single symbol, with no extra bits, and
+        // we use ANS mode.
+        JXL_DEBUG_V(8, "Fastest track.");
+        pixel_type v = make_pixel(value, multiplier, offset);
+        for (size_t y = 0; y < channel.h; y++) {
+          pixel_type *JXL_RESTRICT r = channel.Row(y);
+          std::fill(r, r + channel.w, v);
+        }
+      } else {
+        JXL_DEBUG_V(8, "Fast track.");
+        if (multiplier == 1 && offset == 0) {
+          for (size_t y = 0; y < channel.h; y++) {
+            pixel_type *JXL_RESTRICT r = channel.Row(y);
+            for (size_t x = 0; x < channel.w; x++) {
+              uint32_t v =
+                  reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br);
+              r[x] = UnpackSigned(v);
+            }
+          }
+        } else {
+          for (size_t y = 0; y < channel.h; y++) {
+            pixel_type *JXL_RESTRICT r = channel.Row(y);
+            for (size_t x = 0; x < channel.w; x++) {
+              uint32_t v =
+                  reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id,
+                                                                         br);
+              r[x] = make_pixel(v, multiplier, offset);
+            }
+          }
+        }
+      }
+      return true;
+    } else if (uses_lz77 && predictor == Predictor::Gradient && offset == 0 &&
+               multiplier == 1 && reader->HuffRleOnly()) {
+      JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track.");
+      uint32_t run = 0;
+      uint32_t v = 0;
+      pixel_type_w sv = 0;
+      for (size_t y = 0; y < channel.h; y++) {
+        pixel_type *JXL_RESTRICT r = channel.Row(y);
+        const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1);
+        const pixel_type *JXL_RESTRICT rtopleft =
+            (y ? channel.Row(y - 1) - 1 : r - 1);
+        pixel_type_w guess = (y ? rtop[0] : 0);
+        if (run == 0) {
+          reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run);
+          sv = UnpackSigned(v);
+        } else {
+          run--;
+        }
+        r[0] = sv + guess;
+        for (size_t x = 1; x < channel.w; x++) {
+          pixel_type left = r[x - 1];
+          pixel_type top = rtop[x];
+          pixel_type topleft = rtopleft[x];
+          pixel_type_w guess = ClampedGradient(top, left, topleft);
+          if (!run) {
+            reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run);
+            sv = UnpackSigned(v);
+          } else {
+            run--;
+          }
+          r[x] = sv + guess;
+        }
+      }
+      return true;
+    } else if (predictor == Predictor::Gradient && offset == 0 &&
+               multiplier == 1) {
+      JXL_DEBUG_V(8, "Gradient very fast track.");
+      const intptr_t onerow = channel.plane.PixelsPerRow();
+      for (size_t y = 0; y < channel.h; y++) {
+        pixel_type *JXL_RESTRICT r = channel.Row(y);
+        for (size_t x = 0; x < channel.w; x++) {
+          pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+          pixel_type top = (y ? *(r + x - onerow) : left);
+          pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left);
+          pixel_type guess = ClampedGradient(top, left, topleft);
+          uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(
+              ctx_id, br);
+          r[x] = make_pixel(v, 1, guess);
+        }
+      }
+      return true;
+    }
+  }
+
+  // Check if this tree is a WP-only tree with a small enough property value
+  // range.
+  if (is_wp_only) {
+    is_wp_only = TreeToLookupTable(tree, tree_lut);
+  }
+  if (is_gradient_only) {
+    is_gradient_only = TreeToLookupTable(tree, tree_lut);
+  }
+
+  if (is_gradient_only) {
+    JXL_DEBUG_V(8, "Gradient fast track.");
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        uint32_t pos =
+            kPropRangeFast +
+            std::min<pixel_type_w>(
+                std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+                kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut.context_lookup[pos];
+        uint64_t v =
+            reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, br);
+        r[x] = make_pixel(
+            v, tree_lut.multipliers[pos],
+            static_cast<pixel_type_w>(tree_lut.offsets[pos]) + guess);
+      }
+    }
+  } else if (!uses_lz77 && is_wp_only && channel.w > 8) {
+    JXL_DEBUG_V(8, "WP fast track.");
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    Properties properties(1);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT r = channel.Row(y);
+      const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1);
+      const pixel_type *JXL_RESTRICT rtoptop =
+          (y > 1 ? channel.Row(y - 2) : rtop);
+      const pixel_type *JXL_RESTRICT rtopleft =
+          (y ? channel.Row(y - 1) - 1 : r - 1);
+      const pixel_type *JXL_RESTRICT rtopright =
+          (y ? channel.Row(y - 1) + 1 : r - 1);
+      size_t x = 0;
+      {
+        size_t offset = 0;
+        pixel_type_w left = y ? rtop[x] : 0;
+        pixel_type_w toptop = y ? rtoptop[x] : 0;
+        pixel_type_w topright = (x + 1 < channel.w && y ? rtop[x + 1] : left);
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, left, left, topright, left, toptop, &properties,
+            offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut.context_lookup[pos];
+        uint64_t v =
+            reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br);
+        r[x] = make_pixel(
+            v, tree_lut.multipliers[pos],
+            static_cast<pixel_type_w>(tree_lut.offsets[pos]) + guess);
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+      for (x = 1; x + 1 < channel.w; x++) {
+        size_t offset = 0;
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, rtop[x], r[x - 1], rtopright[x], rtopleft[x],
+            rtoptop[x], &properties, offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut.context_lookup[pos];
+        uint64_t v =
+            reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br);
+        r[x] = make_pixel(
+            v, tree_lut.multipliers[pos],
+            static_cast<pixel_type_w>(tree_lut.offsets[pos]) + guess);
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+      {
+        size_t offset = 0;
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, rtop[x], r[x - 1], rtop[x], rtopleft[x],
+            rtoptop[x], &properties, offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = tree_lut.context_lookup[pos];
+        uint64_t v =
+            reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br);
+        r[x] = make_pixel(
+            v, tree_lut.multipliers[pos],
+            static_cast<pixel_type_w>(tree_lut.offsets[pos]) + guess);
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+    }
+  } else if (!tree_has_wp_prop_or_pred) {
+    // special optimized case: the weighted predictor and its properties are not
+    // used, so no need to compute weights and properties.
+    JXL_DEBUG_V(8, "Slow track.");
+    MATreeLookup tree_lookup(tree);
+    Properties properties = Properties(num_props);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, *image, chan, &references);
+      InitPropsRow(&properties, static_props, y);
+      if (y > 1 && channel.w > 8 && references.w == 0) {
+        for (size_t x = 0; x < 2; x++) {
+          PredictionResult res =
+              PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                              tree_lookup, references);
+          uint64_t v =
+              reader->ReadHybridUintClustered<uses_lz77>(res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+        }
+        for (size_t x = 2; x < channel.w - 2; x++) {
+          PredictionResult res =
+              PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y,
+                                 tree_lookup, references);
+          uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>(
+              res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+        }
+        for (size_t x = channel.w - 2; x < channel.w; x++) {
+          PredictionResult res =
+              PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                              tree_lookup, references);
+          uint64_t v =
+              reader->ReadHybridUintClustered<uses_lz77>(res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+        }
+      } else {
+        for (size_t x = 0; x < channel.w; x++) {
+          PredictionResult res =
+              PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                              tree_lookup, references);
+          uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(
+              res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+        }
+      }
+    }
+  } else {
+    JXL_DEBUG_V(8, "Slowest track.");
+    MATreeLookup tree_lookup(tree);
+    Properties properties = Properties(num_props);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT p = channel.Row(y);
+      InitPropsRow(&properties, static_props, y);
+      PrecomputeReferences(channel, y, *image, chan, &references);
+      if (!uses_lz77 && y > 1 && channel.w > 8 && references.w == 0) {
+        for (size_t x = 0; x < 2; x++) {
+          PredictionResult res =
+              PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references, &wp_state);
+          uint64_t v =
+              reader->ReadHybridUintClustered<uses_lz77>(res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+          wp_state.UpdateErrors(p[x], x, y, channel.w);
+        }
+        for (size_t x = 2; x < channel.w - 2; x++) {
+          PredictionResult res =
+              PredictTreeWPNEC(&properties, channel.w, p + x, onerow, x, y,
+                               tree_lookup, references, &wp_state);
+          uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>(
+              res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+          wp_state.UpdateErrors(p[x], x, y, channel.w);
+        }
+        for (size_t x = channel.w - 2; x < channel.w; x++) {
+          PredictionResult res =
+              PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references, &wp_state);
+          uint64_t v =
+              reader->ReadHybridUintClustered<uses_lz77>(res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+          wp_state.UpdateErrors(p[x], x, y, channel.w);
+        }
+      } else {
+        for (size_t x = 0; x < channel.w; x++) {
+          PredictionResult res =
+              PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references, &wp_state);
+          uint64_t v =
+              reader->ReadHybridUintClustered<uses_lz77>(res.context, br);
+          p[x] = make_pixel(v, res.multiplier, res.guess);
+          wp_state.UpdateErrors(p[x], x, y, channel.w);
+        }
+      }
+    }
+  }
+  return true;
+}
+}  // namespace detail
+
+Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader,
+                                 const std::vector<uint8_t> &context_map,
+                                 const Tree &global_tree,
+                                 const weighted::Header &wp_header,
+                                 pixel_type chan, size_t group_id,
+                                 TreeLut<uint8_t, true> &tree_lut,
+                                 Image *image) {
+  if (reader->UsesLZ77()) {
+    return detail::DecodeModularChannelMAANS</*uses_lz77=*/true>(
+        br, reader, context_map, global_tree, wp_header, chan, group_id,
+        tree_lut, image);
+  } else {
+    return detail::DecodeModularChannelMAANS</*uses_lz77=*/false>(
+        br, reader, context_map, global_tree, wp_header, chan, group_id,
+        tree_lut, image);
+  }
+}
+
+GroupHeader::GroupHeader() { Bundle::Init(this); }
+
+Status ValidateChannelDimensions(const Image &image,
+                                 const ModularOptions &options) {
+  size_t nb_channels = image.channel.size();
+  for (bool is_dc : {true, false}) {
+    size_t group_dim = options.group_dim * (is_dc ? kBlockDim : 1);
+    size_t c = image.nb_meta_channels;
+    for (; c < nb_channels; c++) {
+      const Channel &ch = image.channel[c];
+      if (ch.w > options.group_dim || ch.h > options.group_dim) break;
+    }
+    for (; c < nb_channels; c++) {
+      const Channel &ch = image.channel[c];
+      if (ch.w == 0 || ch.h == 0) continue;  // skip empty
+      bool is_dc_channel = std::min(ch.hshift, ch.vshift) >= 3;
+      if (is_dc_channel != is_dc) continue;
+      size_t tile_dim = group_dim >> std::max(ch.hshift, ch.vshift);
+      if (tile_dim == 0) {
+        return JXL_FAILURE("Inconsistent transforms");
+      }
+    }
+  }
+  return true;
+}
+
+Status ModularDecode(BitReader *br, Image &image, GroupHeader &header,
+                     size_t group_id, ModularOptions *options,
+                     const Tree *global_tree, const ANSCode *global_code,
+                     const std::vector<uint8_t> *global_ctx_map,
+                     const bool allow_truncated_group) {
+  if (image.channel.empty()) return true;
+
+  // decode transforms
+  Status status = Bundle::Read(br, &header);
+  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(status);
+  if (status.IsFatalError()) return status;
+  if (!br->AllReadsWithinBounds()) {
+    // Don't do/undo transforms if header is incomplete.
+    header.transforms.clear();
+    image.transform = header.transforms;
+    for (size_t c = 0; c < image.channel.size(); c++) {
+      ZeroFillImage(&image.channel[c].plane);
+    }
+    return Status(StatusCode::kNotEnoughBytes);
+  }
+
+  JXL_DEBUG_V(3, "Image data underwent %" PRIuS " transformations: ",
+              header.transforms.size());
+  image.transform = header.transforms;
+  for (Transform &transform : image.transform) {
+    JXL_RETURN_IF_ERROR(transform.MetaApply(image));
+  }
+  if (image.error) {
+    return JXL_FAILURE("Corrupt file. Aborting.");
+  }
+  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(image, *options));
+
+  size_t nb_channels = image.channel.size();
+
+  size_t num_chans = 0;
+  size_t distance_multiplier = 0;
+  for (size_t i = 0; i < nb_channels; i++) {
+    Channel &channel = image.channel[i];
+    if (!channel.w || !channel.h) {
+      continue;  // skip empty channels
+    }
+    if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+                                        channel.h > options->max_chan_size)) {
+      break;
+    }
+    if (channel.w > distance_multiplier) {
+      distance_multiplier = channel.w;
+    }
+    num_chans++;
+  }
+  if (num_chans == 0) return true;
+
+  size_t next_channel = 0;
+  auto scope_guard = MakeScopeGuard([&]() {
+    for (size_t c = next_channel; c < image.channel.size(); c++) {
+      ZeroFillImage(&image.channel[c].plane);
+    }
+  });
+  // Do not do anything if truncated groups are not allowed.
+  if (allow_truncated_group) scope_guard.Disarm();
+
+  // Read tree.
+  Tree tree_storage;
+  std::vector<uint8_t> context_map_storage;
+  ANSCode code_storage;
+  const Tree *tree = &tree_storage;
+  const ANSCode *code = &code_storage;
+  const std::vector<uint8_t> *context_map = &context_map_storage;
+  if (!header.use_global_tree) {
+    uint64_t max_tree_size = 1024;
+    for (size_t i = 0; i < nb_channels; i++) {
+      Channel &channel = image.channel[i];
+      if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+                                          channel.h > options->max_chan_size)) {
+        break;
+      }
+      uint64_t pixels = channel.w * channel.h;
+      max_tree_size += pixels;
+    }
+    max_tree_size = std::min(static_cast<uint64_t>(1 << 20), max_tree_size);
+    JXL_RETURN_IF_ERROR(DecodeTree(br, &tree_storage, max_tree_size));
+    JXL_RETURN_IF_ERROR(DecodeHistograms(br, (tree_storage.size() + 1) / 2,
+                                         &code_storage, &context_map_storage));
+  } else {
+    if (!global_tree || !global_code || !global_ctx_map ||
+        global_tree->empty()) {
+      return JXL_FAILURE("No global tree available but one was requested");
+    }
+    tree = global_tree;
+    code = global_code;
+    context_map = global_ctx_map;
+  }
+
+  // Read channels
+  ANSSymbolReader reader(code, br, distance_multiplier);
+  auto tree_lut = jxl::make_unique<TreeLut<uint8_t, true>>();
+  for (; next_channel < nb_channels; next_channel++) {
+    Channel &channel = image.channel[next_channel];
+    if (!channel.w || !channel.h) {
+      continue;  // skip empty channels
+    }
+    if (next_channel >= image.nb_meta_channels &&
+        (channel.w > options->max_chan_size ||
+         channel.h > options->max_chan_size)) {
+      break;
+    }
+    JXL_RETURN_IF_ERROR(DecodeModularChannelMAANS(
+        br, &reader, *context_map, *tree, header.wp_header, next_channel,
+        group_id, *tree_lut, &image));
+
+    // Truncated group.
+    if (!br->AllReadsWithinBounds()) {
+      if (!allow_truncated_group) return JXL_FAILURE("Truncated input");
+      return Status(StatusCode::kNotEnoughBytes);
+    }
+  }
+
+  // Make sure no zero-filling happens even if next_channel < nb_channels.
+  scope_guard.Disarm();
+
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS decode final state failed");
+  }
+  return true;
+}
+
+Status ModularGenericDecompress(BitReader *br, Image &image,
+                                GroupHeader *header, size_t group_id,
+                                ModularOptions *options, bool undo_transforms,
+                                const Tree *tree, const ANSCode *code,
+                                const std::vector<uint8_t> *ctx_map,
+                                bool allow_truncated_group) {
+#ifdef JXL_ENABLE_ASSERT
+  std::vector<std::pair<uint32_t, uint32_t>> req_sizes(image.channel.size());
+  for (size_t c = 0; c < req_sizes.size(); c++) {
+    req_sizes[c] = {image.channel[c].w, image.channel[c].h};
+  }
+#endif
+  GroupHeader local_header;
+  if (header == nullptr) header = &local_header;
+  size_t bit_pos = br->TotalBitsConsumed();
+  auto dec_status = ModularDecode(br, image, *header, group_id, options, tree,
+                                  code, ctx_map, allow_truncated_group);
+  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+  if (dec_status.IsFatalError()) return dec_status;
+  if (undo_transforms) image.undo_transforms(header->wp_header);
+  if (image.error) return JXL_FAILURE("Corrupt file. Aborting.");
+  JXL_DEBUG_V(4,
+              "Modular-decoded a %" PRIuS "x%" PRIuS " nbchans=%" PRIuS
+              " image from %" PRIuS " bytes",
+              image.w, image.h, image.channel.size(),
+              (br->TotalBitsConsumed() - bit_pos) / 8);
+  JXL_DEBUG_V(5, "Modular image: %s", image.DebugString().c_str());
+  (void)bit_pos;
+#ifdef JXL_ENABLE_ASSERT
+  // Check that after applying all transforms we are back to the requested image
+  // sizes, otherwise there's a programming error with the transformations.
+  if (undo_transforms) {
+    JXL_ASSERT(image.channel.size() == req_sizes.size());
+    for (size_t c = 0; c < req_sizes.size(); c++) {
+      JXL_ASSERT(req_sizes[c].first == image.channel[c].w);
+      JXL_ASSERT(req_sizes[c].second == image.channel[c].h);
+    }
+  }
+#endif
+  return dec_status;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h
new file mode 100644
index 0000000000..4004e27be4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h
@@ -0,0 +1,142 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+// Valid range of properties for using lookup tables instead of trees.
+constexpr int32_t kPropRangeFast = 512;
+
+struct GroupHeader : public Fields {
+  GroupHeader();
+
+  JXL_FIELDS_NAME(GroupHeader)
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &use_global_tree));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&wp_header));
+    uint32_t num_transforms = static_cast<uint32_t>(transforms.size());
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+                                           BitsOffset(8, 18), 0,
+                                           &num_transforms));
+    if (visitor->IsReading()) transforms.resize(num_transforms);
+    for (size_t i = 0; i < num_transforms; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&transforms[i]));
+    }
+    return true;
+  }
+
+  bool use_global_tree;
+  weighted::Header wp_header;
+
+  std::vector<Transform> transforms;
+};
+
+FlatTree FilterTree(const Tree &global_tree,
+                    std::array<pixel_type, kNumStaticProperties> &static_props,
+                    size_t *num_props, bool *use_wp, bool *wp_only,
+                    bool *gradient_only);
+
+template <typename T, bool HAS_MULTIPLIERS>
+struct TreeLut {
+  std::array<T, 2 * kPropRangeFast> context_lookup;
+  std::array<int8_t, 2 * kPropRangeFast> offsets;
+  std::array<int8_t, HAS_MULTIPLIERS ? (2 * kPropRangeFast) : 0> multipliers;
+};
+
+template <typename T, bool HAS_MULTIPLIERS>
+bool TreeToLookupTable(const FlatTree &tree, TreeLut<T, HAS_MULTIPLIERS> &lut) {
+  struct TreeRange {
+    // Begin *excluded*, end *included*. This works best with > vs <= decision
+    // nodes.
+    int begin, end;
+    size_t pos;
+  };
+  std::vector<TreeRange> ranges;
+  ranges.push_back(TreeRange{-kPropRangeFast - 1, kPropRangeFast - 1, 0});
+  while (!ranges.empty()) {
+    TreeRange cur = ranges.back();
+    ranges.pop_back();
+    if (cur.begin < -kPropRangeFast - 1 || cur.begin >= kPropRangeFast - 1 ||
+        cur.end > kPropRangeFast - 1) {
+      // Tree is outside the allowed range, exit.
+      return false;
+    }
+    auto &node = tree[cur.pos];
+    // Leaf.
+    if (node.property0 == -1) {
+      if (node.predictor_offset < std::numeric_limits<int8_t>::min() ||
+          node.predictor_offset > std::numeric_limits<int8_t>::max()) {
+        return false;
+      }
+      if (node.multiplier < std::numeric_limits<int8_t>::min() ||
+          node.multiplier > std::numeric_limits<int8_t>::max()) {
+        return false;
+      }
+      if (!HAS_MULTIPLIERS && node.multiplier != 1) {
+        return false;
+      }
+      for (int i = cur.begin + 1; i < cur.end + 1; i++) {
+        lut.context_lookup[i + kPropRangeFast] = node.childID;
+        if (HAS_MULTIPLIERS) {
+          lut.multipliers[i + kPropRangeFast] = node.multiplier;
+        }
+        lut.offsets[i + kPropRangeFast] = node.predictor_offset;
+      }
+      continue;
+    }
+    // > side of top node.
+    if (node.properties[0] >= kNumStaticProperties) {
+      ranges.push_back(TreeRange({node.splitvals[0], cur.end, node.childID}));
+      ranges.push_back(
+          TreeRange({node.splitval0, node.splitvals[0], node.childID + 1}));
+    } else {
+      ranges.push_back(TreeRange({node.splitval0, cur.end, node.childID}));
+    }
+    // <= side
+    if (node.properties[1] >= kNumStaticProperties) {
+      ranges.push_back(
+          TreeRange({node.splitvals[1], node.splitval0, node.childID + 2}));
+      ranges.push_back(
+          TreeRange({cur.begin, node.splitvals[1], node.childID + 3}));
+    } else {
+      ranges.push_back(
+          TreeRange({cur.begin, node.splitval0, node.childID + 2}));
+    }
+  }
+  return true;
+}
+// TODO(veluca): make cleaner interfaces.
+
+Status ValidateChannelDimensions(const Image &image,
+                                 const ModularOptions &options);
+
+Status ModularGenericDecompress(BitReader *br, Image &image,
+                                GroupHeader *header, size_t group_id,
+                                ModularOptions *options,
+                                bool undo_transforms = true,
+                                const Tree *tree = nullptr,
+                                const ANSCode *code = nullptr,
+                                const std::vector<uint8_t> *ctx_map = nullptr,
+                                bool allow_truncated_group = false);
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_ENCODING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h
new file mode 100644
index 0000000000..71b7847321
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+#define LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+
+#include <stddef.h>
+
+namespace jxl {
+
+enum MATreeContext : size_t {
+  kSplitValContext = 0,
+  kPropertyContext = 1,
+  kPredictorContext = 2,
+  kOffsetContext = 3,
+  kMultiplierLogContext = 4,
+  kMultiplierBitsContext = 5,
+
+  kNumTreeContexts = 6,
+};
+
+static constexpr size_t kMaxTreeSize = 1 << 22;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc
new file mode 100644
index 0000000000..55b4af1cb3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/modular_image.h"
+
+#include <sstream>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void Image::undo_transforms(const weighted::Header &wp_header,
+                            jxl::ThreadPool *pool) {
+  while (!transform.empty()) {
+    Transform t = transform.back();
+    JXL_DEBUG_V(4, "Undoing transform");
+    Status result = t.Inverse(*this, wp_header, pool);
+    if (result == false) {
+      JXL_NOTIFY_ERROR("Error while undoing transform.");
+      error = true;
+      return;
+    }
+    JXL_DEBUG_V(8, "Undoing transform: done");
+    transform.pop_back();
+  }
+}
+
+Image::Image(size_t iw, size_t ih, int bitdepth, int nb_chans)
+    : w(iw), h(ih), bitdepth(bitdepth), nb_meta_channels(0), error(false) {
+  for (int i = 0; i < nb_chans; i++) channel.emplace_back(Channel(iw, ih));
+}
+
+Image::Image() : w(0), h(0), bitdepth(8), nb_meta_channels(0), error(true) {}
+
+Image &Image::operator=(Image &&other) noexcept {
+  w = other.w;
+  h = other.h;
+  bitdepth = other.bitdepth;
+  nb_meta_channels = other.nb_meta_channels;
+  error = other.error;
+  channel = std::move(other.channel);
+  transform = std::move(other.transform);
+  return *this;
+}
+
+Image Image::clone() {
+  Image c(w, h, bitdepth, 0);
+  c.nb_meta_channels = nb_meta_channels;
+  c.error = error;
+  c.transform = transform;
+  for (Channel &ch : channel) {
+    Channel a(ch.w, ch.h, ch.hshift, ch.vshift);
+    CopyImageTo(ch.plane, &a.plane);
+    c.channel.push_back(std::move(a));
+  }
+  return c;
+}
+
+#if JXL_DEBUG_V_LEVEL >= 1
+std::string Image::DebugString() const {
+  std::ostringstream os;
+  os << w << "x" << h << ", depth: " << bitdepth;
+  if (!channel.empty()) {
+    os << ", channels:";
+    for (size_t i = 0; i < channel.size(); ++i) {
+      os << " " << channel[i].w << "x" << channel[i].h
+         << "(shift: " << channel[i].hshift << "," << channel[i].vshift << ")";
+      if (i < nb_meta_channels) os << "*";
+    }
+  }
+  return os.str();
+}
+#endif
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h
new file mode 100644
index 0000000000..3e9b5a8a08
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h
@@ -0,0 +1,118 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+#define LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+typedef int32_t pixel_type;  // can use int16_t if it's only for 8-bit images.
+                             // Need some wiggle room for YCoCg / Squeeze etc
+
+typedef int64_t pixel_type_w;
+
+namespace weighted {
+struct Header;
+}
+
+class Channel {
+ public:
+  jxl::Plane<pixel_type> plane;
+  size_t w, h;
+  int hshift, vshift;  // w ~= image.w >> hshift;  h ~= image.h >> vshift
+  Channel(size_t iw, size_t ih, int hsh = 0, int vsh = 0)
+      : plane(iw, ih), w(iw), h(ih), hshift(hsh), vshift(vsh) {}
+
+  Channel(const Channel& other) = delete;
+  Channel& operator=(const Channel& other) = delete;
+
+  // Move assignment
+  Channel& operator=(Channel&& other) noexcept {
+    w = other.w;
+    h = other.h;
+    hshift = other.hshift;
+    vshift = other.vshift;
+    plane = std::move(other.plane);
+    return *this;
+  }
+
+  // Move constructor
+  Channel(Channel&& other) noexcept = default;
+
+  void shrink() {
+    if (plane.xsize() == w && plane.ysize() == h) return;
+    jxl::Plane<pixel_type> resizedplane(w, h);
+    plane = std::move(resizedplane);
+  }
+  void shrink(int nw, int nh) {
+    w = nw;
+    h = nh;
+    shrink();
+  }
+
+  JXL_INLINE pixel_type* Row(const size_t y) { return plane.Row(y); }
+  JXL_INLINE const pixel_type* Row(const size_t y) const {
+    return plane.Row(y);
+  }
+};
+
+class Transform;
+
+class Image {
+ public:
+  // image data, transforms can dramatically change the number of channels and
+  // their semantics
+  std::vector<Channel> channel;
+  // transforms that have been applied (and that have to be undone)
+  std::vector<Transform> transform;
+
+  // image dimensions (channels may have different dimensions due to transforms)
+  size_t w, h;
+  int bitdepth;
+  size_t nb_meta_channels;  // first few channels might contain palette(s)
+  bool error;               // true if a fatal error occurred, false otherwise
+
+  Image(size_t iw, size_t ih, int bitdepth, int nb_chans);
+  Image();
+
+  Image(const Image& other) = delete;
+  Image& operator=(const Image& other) = delete;
+
+  Image& operator=(Image&& other) noexcept;
+  Image(Image&& other) noexcept = default;
+
+  bool empty() const {
+    for (const auto& ch : channel) {
+      if (ch.w && ch.h) return false;
+    }
+    return true;
+  }
+
+  Image clone();
+
+  void undo_transforms(const weighted::Header& wp_header,
+                       jxl::ThreadPool* pool = nullptr);
+
+  std::string DebugString() const;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_MODULAR_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/options.h b/third-party/libjxl/libjxl/lib/jxl/modular/options.h
new file mode 100644
index 0000000000..ce6596b912
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/options.h
@@ -0,0 +1,117 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_OPTIONS_H_
+#define LIB_JXL_MODULAR_OPTIONS_H_
+
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+namespace jxl {
+
+using PropertyVal = int32_t;
+using Properties = std::vector<PropertyVal>;
+
+enum class Predictor : uint32_t {
+  Zero = 0,
+  Left = 1,
+  Top = 2,
+  Average0 = 3,
+  Select = 4,
+  Gradient = 5,
+  Weighted = 6,
+  TopRight = 7,
+  TopLeft = 8,
+  LeftLeft = 9,
+  Average1 = 10,
+  Average2 = 11,
+  Average3 = 12,
+  Average4 = 13,
+  // The following predictors are encoder-only.
+  Best = 14,  // Best of Gradient and Weighted
+  Variable =
+      15,  // Find the best decision tree for predictors/predictor per row
+};
+
+constexpr size_t kNumModularPredictors =
+    static_cast<size_t>(Predictor::Average4) + 1;
+constexpr size_t kNumModularEncoderPredictors =
+    static_cast<size_t>(Predictor::Variable) + 1;
+
+static constexpr ssize_t kNumStaticProperties = 2;  // channel, group_id.
+
+using StaticPropRange =
+    std::array<std::array<uint32_t, 2>, kNumStaticProperties>;
+
+struct ModularMultiplierInfo {
+  StaticPropRange range;
+  uint32_t multiplier;
+};
+
+struct ModularOptions {
+  /// Used in both encode and decode:
+
+  // Stop encoding/decoding when reaching a (non-meta) channel that has a
+  // dimension bigger than max_chan_size.
+  size_t max_chan_size = 0xFFFFFF;
+
+  // Used during decoding for validation of transforms (sqeeezing) scheme.
+  size_t group_dim = 0x1FFFFFFF;
+
+  /// Encode options:
+  // Fraction of pixels to look at to learn a MA tree
+  // Number of iterations to do to learn a MA tree
+  // (if zero there is no MA context model)
+  float nb_repeats = .5f;
+
+  // Maximum number of (previous channel) properties to use in the MA trees
+  int max_properties = 0;  // no previous channels
+
+  // Alternative heuristic tweaks.
+  // Properties default to channel, group, weighted, gradient residual, W-NW,
+  // NW-N, N-NE, N-NN
+  std::vector<uint32_t> splitting_heuristics_properties = {0,  1,  15, 9,
+                                                           10, 11, 12, 13};
+  float splitting_heuristics_node_threshold = 96;
+  size_t max_property_values = 32;
+
+  // Predictor to use for each channel.
+  Predictor predictor = static_cast<Predictor>(-1);
+
+  int wp_mode = 0;
+
+  float fast_decode_multiplier = 1.01f;
+
+  // Forces the encoder to produce a tree that is compatible with the WP-only
+  // decode path (or with the no-wp path, or the gradient-only path).
+  enum class TreeMode { kGradientOnly, kWPOnly, kNoWP, kDefault };
+  TreeMode wp_tree_mode = TreeMode::kDefault;
+
+  // Skip fast paths in the encoder.
+  bool skip_encoder_fast_path = false;
+
+  // Kind of tree to use.
+  // TODO(veluca): add tree kinds for JPEG recompression with CfL enabled,
+  // general AC metadata, different DC qualities, and others.
+  enum class TreeKind {
+    kTrivialTreeNoPredictor,
+    kLearn,
+    kJpegTranscodeACMeta,
+    kFalconACMeta,
+    kACMeta,
+    kWPFixedDC,
+    kGradientFixedDC,
+  };
+  TreeKind tree_kind = TreeKind::kLearn;
+
+  // Ignore the image and just pretend all tokens are zeroes
+  bool zero_tokens = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_OPTIONS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc
new file mode 100644
index 0000000000..e400e15e98
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc
@@ -0,0 +1,595 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+
+#include <array>
+#include <map>
+#include <set>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr bool kEncodeToHighQualityImplicitPalette = true;
+
+// Inclusive.
+static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1);
+
+float ColorDistance(const std::vector<float> &JXL_RESTRICT a,
+                    const std::vector<pixel_type> &JXL_RESTRICT b) {
+  JXL_ASSERT(a.size() == b.size());
+  float distance = 0;
+  float ave3 = 0;
+  if (a.size() >= 3) {
+    ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f);
+  }
+  float sum_a = 0, sum_b = 0;
+  for (size_t c = 0; c < a.size(); ++c) {
+    const float difference =
+        static_cast<float>(a[c]) - static_cast<float>(b[c]);
+    float weight = c == 0 ? 3 : c == 1 ? 5 : 2;
+    if (c < 3 && (a[c] + b[c] >= ave3)) {
+      const float add_w[3] = {
+          1.15,
+          1.15,
+          1.12,
+      };
+      weight += add_w[c];
+      if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) {
+        weight -= 0.5;
+      }
+    }
+    distance += difference * difference * weight * weight;
+    const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1;
+    sum_a += a[c] * sum_weight;
+    sum_b += b[c] * sum_weight;
+  }
+  distance *= 4;
+  float sum_difference = sum_a - sum_b;
+  distance += sum_difference * sum_difference;
+  return distance;
+}
+
+static int QuantizeColorToImplicitPaletteIndex(
+    const std::vector<pixel_type> &color, const int palette_size,
+    const int bit_depth, bool high_quality) {
+  int index = 0;
+  if (high_quality) {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      index += quantized * multiplier;
+      multiplier *= kLargeCube;
+    }
+    return index + palette_size + kLargeCubeOffset;
+  } else {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int value = color[c];
+      value -= 1 << (std::max(0, bit_depth - 3));
+      value = std::max(0, value);
+      int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      if (quantized > kSmallCube - 1) {
+        quantized = kSmallCube - 1;
+      }
+      index += quantized * multiplier;
+      multiplier *= kSmallCube;
+    }
+    return index + palette_size;
+  }
+}
+
+}  // namespace palette_internal
+
+int RoundInt(int value, int div) {  // symmetric rounding around 0
+  if (value < 0) return -RoundInt(-value, div);
+  return (value + div / 2) / div;
+}
+
+struct PaletteIterationData {
+  static constexpr int kMaxDeltas = 128;
+  bool final_run = false;
+  std::vector<pixel_type> deltas[3];
+  std::vector<double> delta_distances;
+  std::vector<pixel_type> frequent_deltas[3];
+
+  // Populates `frequent_deltas` with items from `deltas` based on frequencies
+  // and color distances.
+  void FindFrequentColorDeltas(int num_pixels, int bitdepth) {
+    using pixel_type_3d = std::array<pixel_type, 3>;
+    std::map<pixel_type_3d, double> delta_frequency_map;
+    pixel_type bucket_size = 3 << std::max(0, bitdepth - 8);
+    // Store frequency weighted by delta distance from quantized value.
+    for (size_t i = 0; i < deltas[0].size(); ++i) {
+      pixel_type_3d delta = {
+          {RoundInt(deltas[0][i], bucket_size),
+           RoundInt(deltas[1][i], bucket_size),
+           RoundInt(deltas[2][i], bucket_size)}};  // a basic form of clustering
+      if (delta[0] == 0 && delta[1] == 0 && delta[2] == 0) continue;
+      delta_frequency_map[delta] += sqrt(sqrt(delta_distances[i]));
+    }
+
+    const float delta_distance_multiplier = 1.0f / num_pixels;
+
+    // Weigh frequencies by magnitude and normalize.
+    for (auto &delta_frequency : delta_frequency_map) {
+      std::vector<pixel_type> current_delta = {delta_frequency.first[0],
+                                               delta_frequency.first[1],
+                                               delta_frequency.first[2]};
+      float delta_distance =
+          sqrt(palette_internal::ColorDistance({0, 0, 0}, current_delta)) + 1;
+      delta_frequency.second *= delta_distance * delta_distance_multiplier;
+    }
+
+    // Sort by weighted frequency.
+    using pixel_type_3d_frequency = std::pair<pixel_type_3d, double>;
+    std::vector<pixel_type_3d_frequency> sorted_delta_frequency_map(
+        delta_frequency_map.begin(), delta_frequency_map.end());
+    std::sort(
+        sorted_delta_frequency_map.begin(), sorted_delta_frequency_map.end(),
+        [](const pixel_type_3d_frequency &a, const pixel_type_3d_frequency &b) {
+          return a.second > b.second;
+        });
+
+    // Store the top deltas.
+    for (auto &delta_frequency : sorted_delta_frequency_map) {
+      if (frequent_deltas[0].size() >= kMaxDeltas) break;
+      // Number obtained by optimizing on jyrki31 corpus:
+      if (delta_frequency.second < 17) break;
+      for (int c = 0; c < 3; ++c) {
+        frequent_deltas[c].push_back(delta_frequency.first[c] * bucket_size);
+      }
+    }
+  }
+};
+
+Status FwdPaletteIteration(Image &input, uint32_t begin_c, uint32_t end_c,
+                           uint32_t &nb_colors, uint32_t &nb_deltas,
+                           bool ordered, bool lossy, Predictor &predictor,
+                           const weighted::Header &wp_header,
+                           PaletteIterationData &palette_iteration_data) {
+  JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+  JXL_ASSERT(begin_c >= input.nb_meta_channels);
+  uint32_t nb = end_c - begin_c + 1;
+
+  size_t w = input.channel[begin_c].w;
+  size_t h = input.channel[begin_c].h;
+
+  if (!lossy && nb == 1) {
+    // Channel palette special case
+    if (nb_colors == 0) return false;
+    std::vector<pixel_type> lookup;
+    pixel_type minval, maxval;
+    compute_minmax(input.channel[begin_c], &minval, &maxval);
+    size_t lookup_table_size =
+        static_cast<int64_t>(maxval) - static_cast<int64_t>(minval) + 1;
+    if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) {
+      // a lookup table would use too much memory, instead use a slower approach
+      // with std::set
+      std::set<pixel_type> chpalette;
+      pixel_type idx = 0;
+      for (size_t y = 0; y < h; y++) {
+        const pixel_type *p = input.channel[begin_c].Row(y);
+        for (size_t x = 0; x < w; x++) {
+          const bool new_color = chpalette.insert(p[x]).second;
+          if (new_color) {
+            idx++;
+            if (idx > (int)nb_colors) return false;
+          }
+        }
+      }
+      JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+      Channel pch(idx, 1);
+      pch.hshift = -1;
+      pch.vshift = -1;
+      nb_colors = idx;
+      idx = 0;
+      pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+      for (pixel_type p : chpalette) {
+        p_palette[idx++] = p;
+      }
+      for (size_t y = 0; y < h; y++) {
+        pixel_type *p = input.channel[begin_c].Row(y);
+        for (size_t x = 0; x < w; x++) {
+          for (idx = 0; p[x] != p_palette[idx] && idx < (int)nb_colors; idx++) {
+          }
+          JXL_DASSERT(idx < (int)nb_colors);
+          p[x] = idx;
+        }
+      }
+      predictor = Predictor::Zero;
+      input.nb_meta_channels++;
+      input.channel.insert(input.channel.begin(), std::move(pch));
+
+      return true;
+    }
+    lookup.resize(lookup_table_size, 0);
+    pixel_type idx = 0;
+    for (size_t y = 0; y < h; y++) {
+      const pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) {
+        if (lookup[p[x] - minval] == 0) {
+          lookup[p[x] - minval] = 1;
+          idx++;
+          if (idx > (int)nb_colors) return false;
+        }
+      }
+    }
+    JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+    Channel pch(idx, 1);
+    pch.hshift = -1;
+    pch.vshift = -1;
+    nb_colors = idx;
+    idx = 0;
+    pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+    for (size_t i = 0; i < lookup_table_size; i++) {
+      if (lookup[i]) {
+        p_palette[idx] = i + minval;
+        lookup[i] = idx;
+        idx++;
+      }
+    }
+    for (size_t y = 0; y < h; y++) {
+      pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval];
+    }
+    predictor = Predictor::Zero;
+    input.nb_meta_channels++;
+    input.channel.insert(input.channel.begin(), std::move(pch));
+    return true;
+  }
+
+  Image quantized_input;
+  if (lossy) {
+    quantized_input = Image(w, h, input.bitdepth, nb);
+    for (size_t c = 0; c < nb; c++) {
+      CopyImageTo(input.channel[begin_c + c].plane,
+                  &quantized_input.channel[c].plane);
+    }
+  }
+
+  JXL_DEBUG_V(
+      7, "Trying to represent channels %i-%i using at most a %i-color palette.",
+      begin_c, end_c, nb_colors);
+  nb_deltas = 0;
+  bool delta_used = false;
+  std::set<std::vector<pixel_type>> candidate_palette;
+  std::vector<std::vector<pixel_type>> candidate_palette_imageorder;
+  std::vector<pixel_type> color(nb);
+  std::vector<float> color_with_error(nb);
+  std::vector<const pixel_type *> p_in(nb);
+  std::map<std::vector<pixel_type>, size_t> inv_palette;
+
+  if (lossy) {
+    palette_iteration_data.FindFrequentColorDeltas(w * h, input.bitdepth);
+    nb_deltas = palette_iteration_data.frequent_deltas[0].size();
+
+    // Count color frequency for colors that make a cross.
+    std::map<std::vector<pixel_type>, size_t> color_freq_map;
+    for (size_t y = 1; y + 1 < h; y++) {
+      for (uint32_t c = 0; c < nb; c++) {
+        p_in[c] = input.channel[begin_c + c].Row(y);
+      }
+      for (size_t x = 1; x + 1 < w; x++) {
+        for (uint32_t c = 0; c < nb; c++) {
+          color[c] = p_in[c][x];
+        }
+        int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}};
+        bool makes_cross = true;
+        for (int i = 0; i < 4 && makes_cross; ++i) {
+          int dx = offsets[i][0];
+          int dy = offsets[i][1];
+          for (uint32_t c = 0; c < nb && makes_cross; c++) {
+            if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) {
+              makes_cross = false;
+            }
+          }
+        }
+        if (makes_cross) color_freq_map[color] += 1;
+      }
+    }
+    // Add colors satisfying frequency condition to the palette.
+    constexpr float kImageFraction = 0.01f;
+    size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction;
+    for (const auto &color_freq : color_freq_map) {
+      if (color_freq.second > color_frequency_lower_bound) {
+        candidate_palette.insert(color_freq.first);
+        candidate_palette_imageorder.push_back(color_freq.first);
+      }
+    }
+  }
+
+  for (size_t y = 0; y < h; y++) {
+    for (uint32_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+    }
+    for (size_t x = 0; x < w; x++) {
+      if (lossy && candidate_palette.size() >= nb_colors) break;
+      for (uint32_t c = 0; c < nb; c++) {
+        color[c] = p_in[c][x];
+      }
+      const bool new_color = candidate_palette.insert(color).second;
+      if (new_color) {
+        candidate_palette_imageorder.push_back(color);
+      }
+      if (candidate_palette.size() > nb_colors) {
+        return false;  // too many colors
+      }
+    }
+  }
+
+  nb_colors = nb_deltas + candidate_palette.size();
+  JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.",
+              begin_c, end_c, nb_colors);
+
+  Channel pch(nb_colors, nb);
+  pch.hshift = -1;
+  pch.vshift = -1;
+  pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+  intptr_t onerow = pch.plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow();
+  const int bit_depth = std::min(input.bitdepth, 24);
+
+  if (lossy) {
+    for (uint32_t i = 0; i < nb_deltas; i++) {
+      for (size_t c = 0; c < 3; c++) {
+        p_palette[c * onerow + i] =
+            palette_iteration_data.frequent_deltas[c][i];
+      }
+    }
+  }
+
+  int x = 0;
+  if (ordered && nb >= 3) {
+    JXL_DEBUG_V(7, "Palette of %i colors, using luma order", nb_colors);
+    // sort on luma (multiplied by alpha if available)
+    std::sort(candidate_palette_imageorder.begin(),
+              candidate_palette_imageorder.end(),
+              [](std::vector<pixel_type> ap, std::vector<pixel_type> bp) {
+                float ay, by;
+                ay = (0.299f * ap[0] + 0.587f * ap[1] + 0.114f * ap[2] + 0.1f);
+                if (ap.size() > 3) ay *= 1.f + ap[3];
+                by = (0.299f * bp[0] + 0.587f * bp[1] + 0.114f * bp[2] + 0.1f);
+                if (bp.size() > 3) by *= 1.f + bp[3];
+                return ay < by;
+              });
+  } else {
+    JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors);
+  }
+  for (auto pcol : candidate_palette_imageorder) {
+    JXL_DEBUG_V(9, "  Color %i :  ", x);
+    for (size_t i = 0; i < nb; i++) {
+      p_palette[nb_deltas + i * onerow + x] = pcol[i];
+      JXL_DEBUG_V(9, "%i ", pcol[i]);
+    }
+    inv_palette[pcol] = x;
+    x++;
+  }
+  std::vector<weighted::State> wp_states;
+  for (size_t c = 0; c < nb; c++) {
+    wp_states.emplace_back(wp_header, w, h);
+  }
+  std::vector<pixel_type *> p_quant(nb);
+  // Three rows of error for dithering: y to y + 2.
+  // Each row has two pixels of padding in the ends, which is
+  // beneficial for both precision and encoding speed.
+  std::vector<std::vector<float>> error_row[3];
+  if (lossy) {
+    for (int i = 0; i < 3; ++i) {
+      error_row[i].resize(nb);
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[i][c].resize(w + 4);
+      }
+    }
+  }
+  for (size_t y = 0; y < h; y++) {
+    for (size_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+      if (lossy) p_quant[c] = quantized_input.channel[c].Row(y);
+    }
+    pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y);
+    for (size_t x = 0; x < w; x++) {
+      int index;
+      if (!lossy) {
+        for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x];
+        index = inv_palette[color];
+      } else {
+        int best_index = 0;
+        bool best_is_delta = false;
+        float best_distance = std::numeric_limits<float>::infinity();
+        std::vector<pixel_type> best_val(nb, 0);
+        std::vector<pixel_type> ideal_residual(nb, 0);
+        std::vector<pixel_type> quantized_val(nb);
+        std::vector<pixel_type> predictions(nb);
+        static const double kDiffusionMultiplier[] = {0.55, 0.75};
+        for (int diffusion_index = 0; diffusion_index < 2; ++diffusion_index) {
+          for (size_t c = 0; c < nb; c++) {
+            color_with_error[c] =
+                p_in[c][x] + palette_iteration_data.final_run *
+                                 kDiffusionMultiplier[diffusion_index] *
+                                 error_row[0][c][x + 2];
+            color[c] = Clamp1(lroundf(color_with_error[c]), 0l,
+                              (1l << input.bitdepth) - 1);
+          }
+
+          for (size_t c = 0; c < nb; ++c) {
+            predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x,
+                                             y, predictor, &wp_states[c])
+                                 .guess;
+          }
+          const auto TryIndex = [&](const int index) {
+            for (size_t c = 0; c < nb; c++) {
+              quantized_val[c] = palette_internal::GetPaletteValue(
+                  p_palette, index, /*c=*/c,
+                  /*palette_size=*/nb_colors,
+                  /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+              if (index < static_cast<int>(nb_deltas)) {
+                quantized_val[c] += predictions[c];
+              }
+            }
+            const float color_distance =
+                32.0 / (1LL << std::max(0, 2 * (bit_depth - 8))) *
+                palette_internal::ColorDistance(color_with_error,
+                                                quantized_val);
+            float index_penalty = 0;
+            if (index == -1) {
+              index_penalty = -124;
+            } else if (index < 0) {
+              index_penalty = -2 * index;
+            } else if (index < static_cast<int>(nb_deltas)) {
+              index_penalty = 250;
+            } else if (index < static_cast<int>(nb_colors)) {
+              index_penalty = 150;
+            } else if (index < static_cast<int>(nb_colors) +
+                                   palette_internal::kLargeCubeOffset) {
+              index_penalty = 70;
+            } else {
+              index_penalty = 256;
+            }
+            const float distance = color_distance + index_penalty;
+            if (distance < best_distance) {
+              best_distance = distance;
+              best_index = index;
+              best_is_delta = index < static_cast<int>(nb_deltas);
+              best_val.swap(quantized_val);
+              for (size_t c = 0; c < nb; ++c) {
+                ideal_residual[c] = color_with_error[c] - predictions[c];
+              }
+            }
+          };
+          for (index = palette_internal::kMinImplicitPaletteIndex;
+               index < static_cast<int32_t>(nb_colors); index++) {
+            TryIndex(index);
+          }
+          TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+              color, nb_colors, bit_depth,
+              /*high_quality=*/false));
+          if (palette_internal::kEncodeToHighQualityImplicitPalette) {
+            TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+                color, nb_colors, bit_depth,
+                /*high_quality=*/true));
+          }
+        }
+        index = best_index;
+        delta_used |= best_is_delta;
+        if (!palette_iteration_data.final_run) {
+          for (size_t c = 0; c < 3; ++c) {
+            palette_iteration_data.deltas[c].push_back(ideal_residual[c]);
+          }
+          palette_iteration_data.delta_distances.push_back(best_distance);
+        }
+
+        for (size_t c = 0; c < nb; ++c) {
+          wp_states[c].UpdateErrors(best_val[c], x, y, w);
+          p_quant[c][x] = best_val[c];
+        }
+        float len_error = 0;
+        for (size_t c = 0; c < nb; ++c) {
+          float local_error = color_with_error[c] - best_val[c];
+          len_error += local_error * local_error;
+        }
+        len_error = sqrt(len_error);
+        float modulate = 1.0;
+        int len_limit = 38 << std::max(0, bit_depth - 8);
+        if (len_error > len_limit) {
+          modulate *= len_limit / len_error;
+        }
+        for (size_t c = 0; c < nb; ++c) {
+          float total_error = (color_with_error[c] - best_val[c]);
+
+          // If the neighboring pixels have some error in the opposite
+          // direction of total_error, cancel some or all of it out before
+          // spreading among them.
+          constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1},
+                                          {1, 3}, {2, 2}, {1, 0}, {1, 4},
+                                          {2, 1}, {2, 3}, {2, 0}, {2, 4}};
+          float total_available = 0;
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_available += error_row[row][c][x + col];
+            }
+          }
+          float weight =
+              std::abs(total_error) / (std::abs(total_available) + 1e-3);
+          weight = std::min(weight, 1.0f);
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_error += weight * error_row[row][c][x + col];
+              error_row[row][c][x + col] *= (1 - weight);
+            }
+          }
+          total_error *= modulate;
+          const float remaining_error = (1.0f / 14.) * total_error;
+          error_row[0][c][x + 3] += 2 * remaining_error;
+          error_row[0][c][x + 4] += remaining_error;
+          error_row[1][c][x + 0] += remaining_error;
+          for (int i = 0; i < 5; ++i) {
+            error_row[1][c][x + i] += remaining_error;
+            error_row[2][c][x + i] += remaining_error;
+          }
+        }
+      }
+      if (palette_iteration_data.final_run) p[x] = index;
+    }
+    if (lossy) {
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[0][c].swap(error_row[1][c]);
+        error_row[1][c].swap(error_row[2][c]);
+        std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f);
+      }
+    }
+  }
+  if (!delta_used) {
+    predictor = Predictor::Zero;
+  }
+  if (palette_iteration_data.final_run) {
+    input.nb_meta_channels++;
+    input.channel.erase(input.channel.begin() + begin_c + 1,
+                        input.channel.begin() + end_c + 1);
+    input.channel.insert(input.channel.begin(), std::move(pch));
+  }
+  nb_colors -= nb_deltas;
+  return true;
+}
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+                  bool lossy, Predictor &predictor,
+                  const weighted::Header &wp_header) {
+  PaletteIterationData palette_iteration_data;
+  uint32_t nb_colors_orig = nb_colors;
+  uint32_t nb_deltas_orig = nb_deltas;
+  // preprocessing pass in case of lossy palette
+  if (lossy && input.bitdepth >= 8) {
+    JXL_RETURN_IF_ERROR(FwdPaletteIteration(
+        input, begin_c, end_c, nb_colors_orig, nb_deltas_orig, ordered, lossy,
+        predictor, wp_header, palette_iteration_data));
+  }
+  palette_iteration_data.final_run = true;
+  return FwdPaletteIteration(input, begin_c, end_c, nb_colors, nb_deltas,
+                             ordered, lossy, predictor, wp_header,
+                             palette_iteration_data);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h
new file mode 100644
index 0000000000..0f3d66825b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+                  bool lossy, Predictor &predictor,
+                  const weighted::Header &wp_header);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc
new file mode 100644
index 0000000000..050563a3c2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_rct.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+Status FwdRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  if (rct_type == 0) {  // noop
+    return false;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  size_t m = begin_c;
+  size_t w = input.channel[m + 0].w;
+  size_t h = input.channel[m + 0].h;
+  int second = (custom % 7) >> 1;
+  int third = (custom % 7) & 1;
+  const auto do_rct = [&](const int y, const int thread) {
+    const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y);
+    const pixel_type* in1 =
+        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+    const pixel_type* in2 =
+        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+    pixel_type* out0 = input.channel[m].Row(y);
+    pixel_type* out1 = input.channel[m + 1].Row(y);
+    pixel_type* out2 = input.channel[m + 2].Row(y);
+    if (custom == 6) {
+      for (size_t x = 0; x < w; x++) {
+        pixel_type R = in0[x];
+        pixel_type G = in1[x];
+        pixel_type B = in2[x];
+        out1[x] = R - B;
+        pixel_type tmp = B + (out1[x] >> 1);
+        out2[x] = G - tmp;
+        out0[x] = tmp + (out2[x] >> 1);
+      }
+    } else {
+      for (size_t x = 0; x < w; x++) {
+        pixel_type First = in0[x];
+        pixel_type Second = in1[x];
+        pixel_type Third = in2[x];
+        if (second == 1) {
+          Second = Second - First;
+        } else if (second == 2) {
+          Second = Second - ((First + Third) >> 1);
+        }
+        if (third) Third = Third - First;
+        out0[x] = First;
+        out1[x] = Second;
+        out2[x] = Third;
+      }
+    }
+  };
+  return RunOnPool(pool, 0, h, ThreadPool::NoInit, do_rct, "FwdRCT");
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h
new file mode 100644
index 0000000000..cb5a193c8d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdRCT(Image &input, size_t begin_c, size_t rct_type, ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc
new file mode 100644
index 0000000000..dfd90cde68
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void FwdHSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift);
+  Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1,
+                         chin.vshift);
+
+  for (size_t y = 0; y < chout.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout_residual.w; x++) {
+      pixel_type A = p_in[x * 2];
+      pixel_type B = p_in[x * 2 + 1];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (x + 1 < chout_residual.w) {
+        next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] +
+                    (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >>
+                   1;  // which will be chout.value(y,x+1)
+      } else if (chin.w & 1)
+        next_avg = p_in[x * 2 + 2];
+      pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg);
+      pixel_type tendency = SmoothTendency(left, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+    if (chin.w & 1) {
+      int x = chout.w - 1;
+      p_out[x] = p_in[x * 2];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+void FwdVSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1);
+  Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift,
+                         chin.vshift + 1);
+  intptr_t onerow_in = chin.plane.PixelsPerRow();
+  for (size_t y = 0; y < chout_residual.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      pixel_type A = p_in[x];
+      pixel_type B = p_in[x + onerow_in];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (y + 1 < chout_residual.h) {
+        next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] +
+                    (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >>
+                   1;  // which will be chout.value(y+1,x)
+      } else if (chin.h & 1) {
+        next_avg = p_in[x + 2 * onerow_in];
+      }
+      pixel_type top =
+          (y > 0 ? p_in[static_cast<ssize_t>(x) - onerow_in] : avg);
+      pixel_type tendency = SmoothTendency(top, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+  }
+  if (chin.h & 1) {
+    size_t y = chout.h - 1;
+    const pixel_type *p_in = chin.Row(y * 2);
+    pixel_type *p_out = chout.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      p_out[x] = p_in[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  if (parameters.empty()) {
+    DefaultSqueezeParameters(&parameters, input);
+  }
+  // if nothing to do, don't do squeeze
+  if (parameters.empty()) return false;
+  for (size_t i = 0; i < parameters.size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (horizontal) {
+        FwdHSqueeze(input, c, offset + c - beginc);
+      } else {
+        FwdVSqueeze(input, c, offset + c - beginc);
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h
new file mode 100644
index 0000000000..39b001017b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc
new file mode 100644
index 0000000000..bdaaf9f87e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_transform.h"
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+#include "lib/jxl/modular/transform/enc_rct.h"
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool) {
+  switch (t.id) {
+    case TransformId::kRCT:
+      return FwdRCT(input, t.begin_c, t.rct_type, pool);
+    case TransformId::kSqueeze:
+      return FwdSqueeze(input, t.squeezes, pool);
+    case TransformId::kPalette:
+      return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors,
+                        t.nb_deltas, t.ordered_palette, t.lossy_palette,
+                        t.predictor, wp_header);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(t.id));
+  }
+}
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) {
+  pixel_type realmin = std::numeric_limits<pixel_type>::max();
+  pixel_type realmax = std::numeric_limits<pixel_type>::min();
+  for (size_t y = 0; y < ch.h; y++) {
+    const pixel_type *JXL_RESTRICT p = ch.Row(y);
+    for (size_t x = 0; x < ch.w; x++) {
+      if (p[x] < realmin) realmin = p[x];
+      if (p[x] > realmax) realmax = p[x];
+    }
+  }
+
+  if (min) *min = realmin;
+  if (max) *max = realmax;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h
new file mode 100644
index 0000000000..07659e1b0a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool);
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc
new file mode 100644
index 0000000000..bffbacf160
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc
@@ -0,0 +1,177 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+                  uint32_t nb_deltas, Predictor predictor,
+                  const weighted::Header &wp_header, ThreadPool *pool) {
+  if (input.nb_meta_channels < 1) {
+    return JXL_FAILURE("Error: Palette transform without palette.");
+  }
+  std::atomic<int> num_errors{0};
+  int nb = input.channel[0].h;
+  uint32_t c0 = begin_c + 1;
+  if (c0 >= input.channel.size()) {
+    return JXL_FAILURE("Channel is out of range.");
+  }
+  size_t w = input.channel[c0].w;
+  size_t h = input.channel[c0].h;
+  if (nb < 1) return JXL_FAILURE("Corrupted transforms");
+  for (int i = 1; i < nb; i++) {
+    input.channel.insert(
+        input.channel.begin() + c0 + 1,
+        Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift));
+  }
+  const Channel &palette = input.channel[0];
+  const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0);
+  intptr_t onerow = input.channel[0].plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow();
+  const int bit_depth = std::min(input.bitdepth, 24);
+
+  if (w == 0) {
+    // Nothing to do.
+    // Avoid touching "empty" channels with non-zero height.
+  } else if (nb_deltas == 0 && predictor == Predictor::Zero) {
+    if (nb == 1) {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, h, ThreadPool::NoInit,
+          [&](const uint32_t task, size_t /* thread */) {
+            const size_t y = task;
+            pixel_type *p = input.channel[c0].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = Clamp1<int>(p[x], 0, (pixel_type)palette.w - 1);
+              p[x] = palette_internal::GetPaletteValue(
+                  p_palette, index, /*c=*/0,
+                  /*palette_size=*/palette.w,
+                  /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+            }
+          },
+          "UndoChannelPalette"));
+    } else {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, h, ThreadPool::NoInit,
+          [&](const uint32_t task, size_t /* thread */) {
+            const size_t y = task;
+            std::vector<pixel_type *> p_out(nb);
+            const pixel_type *p_index = input.channel[c0].Row(y);
+            for (int c = 0; c < nb; c++)
+              p_out[c] = input.channel[c0 + c].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = p_index[x];
+              for (int c = 0; c < nb; c++) {
+                p_out[c][x] = palette_internal::GetPaletteValue(
+                    p_palette, index, /*c=*/c,
+                    /*palette_size=*/palette.w,
+                    /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+              }
+            }
+          },
+          "UndoPalette"));
+    }
+  } else {
+    // Parallelized per channel.
+    ImageI indices = std::move(input.channel[c0].plane);
+    input.channel[c0].plane = ImageI(indices.xsize(), indices.ysize());
+    if (predictor == Predictor::Weighted) {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, nb, ThreadPool::NoInit,
+          [&](const uint32_t c, size_t /* thread */) {
+            Channel &channel = input.channel[c0 + c];
+            weighted::State wp_state(wp_header, channel.w, channel.h);
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w, /*onerow=*/onerow,
+                        /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred =
+                      PredictNoTreeWP(channel.w, p + x, onerow_image, x, y,
+                                      predictor, &wp_state);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+                wp_state.UpdateErrors(p[x], x, y, channel.w);
+              }
+            }
+          },
+          "UndoDeltaPaletteWP"));
+    } else {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, nb, ThreadPool::NoInit,
+          [&](const uint32_t c, size_t /* thread */) {
+            Channel &channel = input.channel[c0 + c];
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w,
+                        /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred = PredictNoTreeNoWP(
+                      channel.w, p + x, onerow_image, x, y, predictor);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+              }
+            }
+          },
+          "UndoDeltaPaletteNoWP"));
+    }
+  }
+  if (c0 >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels--;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(static_cast<int>(input.nb_meta_channels) >= 2 - nb);
+    input.nb_meta_channels -= 2 - nb;
+    JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels);
+  }
+  input.channel.erase(input.channel.begin(), input.channel.begin() + 1);
+  return num_errors.load(std::memory_order_relaxed) == 0;
+}
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                   uint32_t nb_colors, uint32_t nb_deltas, bool lossy) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+
+  size_t nb = end_c - begin_c + 1;
+  if (begin_c >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels++;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(end_c < input.nb_meta_channels);
+    // we remove nb-1 metachannels and add one
+    input.nb_meta_channels += 2 - nb;
+  }
+  input.channel.erase(input.channel.begin() + begin_c + 1,
+                      input.channel.begin() + end_c + 1);
+  Channel pch(nb_colors + nb_deltas, nb);
+  pch.hshift = -1;
+  pch.vshift = -1;
+  input.channel.insert(input.channel.begin(), std::move(pch));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h
new file mode 100644
index 0000000000..cc0f67960b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr int kMaxPaletteLookupTableSize = 1 << 16;
+
+static constexpr int kRgbChannels = 3;
+
+// 5x5x5 color cube for the larger cube.
+static constexpr int kLargeCube = 5;
+
+// Smaller interleaved color cube to fill the holes of the larger cube.
+static constexpr int kSmallCube = 4;
+static constexpr int kSmallCubeBits = 2;
+// kSmallCube ** 3
+static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube;
+
+static inline pixel_type Scale(uint64_t value, uint64_t bit_depth,
+                               uint64_t denom) {
+  // return (value * ((static_cast<pixel_type_w>(1) << bit_depth) - 1)) / denom;
+  // We only call this function with kSmallCube or kLargeCube - 1 as denom,
+  // allowing us to avoid a division here.
+  JXL_ASSERT(denom == 4);
+  return (value * ((static_cast<uint64_t>(1) << bit_depth) - 1)) >> 2;
+}
+
+// The purpose of this function is solely to extend the interpretation of
+// palette indices to implicit values. If index < nb_deltas, indicating that the
+// result is a delta palette entry, it is the responsibility of the caller to
+// treat it as such.
+static JXL_MAYBE_UNUSED pixel_type
+GetPaletteValue(const pixel_type *const palette, int index, const size_t c,
+                const int palette_size, const int onerow, const int bit_depth) {
+  if (index < 0) {
+    static constexpr std::array<std::array<pixel_type, 3>, 72> kDeltaPalette = {
+        {
+            {{0, 0, 0}},       {{4, 4, 4}},       {{11, 0, 0}},
+            {{0, 0, -13}},     {{0, -12, 0}},     {{-10, -10, -10}},
+            {{-18, -18, -18}}, {{-27, -27, -27}}, {{-18, -18, 0}},
+            {{0, 0, -32}},     {{-32, 0, 0}},     {{-37, -37, -37}},
+            {{0, -32, -32}},   {{24, 24, 45}},    {{50, 50, 50}},
+            {{-45, -24, -24}}, {{-24, -45, -45}}, {{0, -24, -24}},
+            {{-34, -34, 0}},   {{-24, 0, -24}},   {{-45, -45, -24}},
+            {{64, 64, 64}},    {{-32, 0, -32}},   {{0, -32, 0}},
+            {{-32, 0, 32}},    {{-24, -45, -24}}, {{45, 24, 45}},
+            {{24, -24, -45}},  {{-45, -24, 24}},  {{80, 80, 80}},
+            {{64, 0, 0}},      {{0, 0, -64}},     {{0, -64, -64}},
+            {{-24, -24, 45}},  {{96, 96, 96}},    {{64, 64, 0}},
+            {{45, -24, -24}},  {{34, -34, 0}},    {{112, 112, 112}},
+            {{24, -45, -45}},  {{45, 45, -24}},   {{0, -32, 32}},
+            {{24, -24, 45}},   {{0, 96, 96}},     {{45, -24, 24}},
+            {{24, -45, -24}},  {{-24, -45, 24}},  {{0, -64, 0}},
+            {{96, 0, 0}},      {{128, 128, 128}}, {{64, 0, 64}},
+            {{144, 144, 144}}, {{96, 96, 0}},     {{-36, -36, 36}},
+            {{45, -24, -45}},  {{45, -45, -24}},  {{0, 0, -96}},
+            {{0, 128, 128}},   {{0, 96, 0}},      {{45, 24, -45}},
+            {{-128, 0, 0}},    {{24, -45, 24}},   {{-45, 24, -45}},
+            {{64, 0, -64}},    {{64, -64, -64}},  {{96, 0, 96}},
+            {{45, -45, 24}},   {{24, 45, -45}},   {{64, 64, -64}},
+            {{128, 128, 0}},   {{0, 0, -128}},    {{-24, 45, -45}},
+        }};
+    if (c >= kRgbChannels) {
+      return 0;
+    }
+    // Do not open the brackets, otherwise INT32_MIN negation could overflow.
+    index = -(index + 1);
+    index %= 1 + 2 * (kDeltaPalette.size() - 1);
+    static constexpr int kMultiplier[] = {-1, 1};
+    pixel_type result =
+        kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1];
+    if (bit_depth > 8) {
+      result *= static_cast<pixel_type>(1) << (bit_depth - 8);
+    }
+    return result;
+  } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) {
+    if (c >= kRgbChannels) return 0;
+    index -= palette_size;
+    index >>= c * kSmallCubeBits;
+    return Scale(index % kSmallCube, bit_depth, kSmallCube) +
+           (1 << (std::max(0, bit_depth - 3)));
+  } else if (palette_size + kLargeCubeOffset <= index) {
+    if (c >= kRgbChannels) return 0;
+    index -= palette_size + kLargeCubeOffset;
+    // TODO(eustas): should we take care of ambiguity created by
+    //               index >= kLargeCube ** 3 ?
+    switch (c) {
+      case 0:
+        break;
+      case 1:
+        index /= kLargeCube;
+        break;
+      case 2:
+        index /= kLargeCube * kLargeCube;
+        break;
+    }
+    return Scale(index % kLargeCube, bit_depth, kLargeCube - 1);
+  }
+  return palette[c * onerow + static_cast<size_t>(index)];
+}
+
+}  // namespace palette_internal
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+                  uint32_t nb_deltas, Predictor predictor,
+                  const weighted::Header &wp_header, ThreadPool *pool);
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                   uint32_t nb_colors, uint32_t nb_deltas, bool lossy);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc
new file mode 100644
index 0000000000..f3002a5ac3
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/rct.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+template <int transform_type>
+void InvRCTRow(const pixel_type* in0, const pixel_type* in1,
+               const pixel_type* in2, pixel_type* out0, pixel_type* out1,
+               pixel_type* out2, size_t w) {
+  static_assert(transform_type >= 0 && transform_type < 7,
+                "Invalid transform type");
+  int second = transform_type >> 1;
+  int third = transform_type & 1;
+
+  size_t x = 0;
+  const HWY_FULL(pixel_type) d;
+  const size_t N = Lanes(d);
+  for (; x + N - 1 < w; x += N) {
+    if (transform_type == 6) {
+      auto Y = Load(d, in0 + x);
+      auto Co = Load(d, in1 + x);
+      auto Cg = Load(d, in2 + x);
+      Y = Sub(Y, ShiftRight<1>(Cg));
+      auto G = Add(Cg, Y);
+      Y = Sub(Y, ShiftRight<1>(Co));
+      auto R = Add(Y, Co);
+      Store(R, d, out0 + x);
+      Store(G, d, out1 + x);
+      Store(Y, d, out2 + x);
+    } else {
+      auto First = Load(d, in0 + x);
+      auto Second = Load(d, in1 + x);
+      auto Third = Load(d, in2 + x);
+      if (third) Third = Add(Third, First);
+      if (second == 1) {
+        Second = Add(Second, First);
+      } else if (second == 2) {
+        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
+      }
+      Store(First, d, out0 + x);
+      Store(Second, d, out1 + x);
+      Store(Third, d, out2 + x);
+    }
+  }
+  for (; x < w; x++) {
+    if (transform_type == 6) {
+      pixel_type Y = in0[x];
+      pixel_type Co = in1[x];
+      pixel_type Cg = in2[x];
+      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
+      pixel_type G = PixelAdd(Cg, tmp);
+      pixel_type B = PixelAdd(tmp, -(Co >> 1));
+      pixel_type R = PixelAdd(B, Co);
+      out0[x] = R;
+      out1[x] = G;
+      out2[x] = B;
+    } else {
+      pixel_type First = in0[x];
+      pixel_type Second = in1[x];
+      pixel_type Third = in2[x];
+      if (third) Third = PixelAdd(Third, First);
+      if (second == 1) {
+        Second = PixelAdd(Second, First);
+      } else if (second == 2) {
+        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
+      }
+      out0[x] = First;
+      out1[x] = Second;
+      out2[x] = Third;
+    }
+  }
+}
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  size_t m = begin_c;
+  Channel& c0 = input.channel[m + 0];
+  size_t w = c0.w;
+  size_t h = c0.h;
+  if (rct_type == 0) {  // noop
+    return true;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  JXL_CHECK(permutation < 6);
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  // Special case: permute-only. Swap channels around.
+  if (custom == 0) {
+    Channel ch0 = std::move(input.channel[m]);
+    Channel ch1 = std::move(input.channel[m + 1]);
+    Channel ch2 = std::move(input.channel[m + 2]);
+    input.channel[m + (permutation % 3)] = std::move(ch0);
+    input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
+        std::move(ch1);
+    input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
+        std::move(ch2);
+    return true;
+  }
+  constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
+      InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
+      InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, h, ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /* thread */) {
+        const size_t y = task;
+        const pixel_type* in0 = input.channel[m].Row(y);
+        const pixel_type* in1 = input.channel[m + 1].Row(y);
+        const pixel_type* in2 = input.channel[m + 2].Row(y);
+        pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
+        pixel_type* out1 =
+            input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+        pixel_type* out2 =
+            input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+        inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
+      },
+      "InvRCT"));
+  return true;
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InvRCT);
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h
new file mode 100644
index 0000000000..aef65621d5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_RCT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc
new file mode 100644
index 0000000000..8440d9e804
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc
@@ -0,0 +1,478 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/squeeze.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::MulEven;
+using hwy::HWY_NAMESPACE::Ne;
+using hwy::HWY_NAMESPACE::Neg;
+using hwy::HWY_NAMESPACE::OddEven;
+using hwy::HWY_NAMESPACE::RebindToUnsigned;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Xor;
+
+#if HWY_TARGET != HWY_SCALAR
+
+JXL_INLINE void FastUnsqueeze(const pixel_type *JXL_RESTRICT p_residual,
+                              const pixel_type *JXL_RESTRICT p_avg,
+                              const pixel_type *JXL_RESTRICT p_navg,
+                              const pixel_type *p_pout,
+                              pixel_type *JXL_RESTRICT p_out,
+                              pixel_type *p_nout) {
+  const HWY_CAPPED(pixel_type, 8) d;
+  const RebindToUnsigned<decltype(d)> du;
+  const size_t N = Lanes(d);
+  auto onethird = Set(d, 0x55555556);
+  for (size_t x = 0; x < 8; x += N) {
+    auto avg = Load(d, p_avg + x);
+    auto next_avg = Load(d, p_navg + x);
+    auto top = Load(d, p_pout + x);
+    // Equivalent to SmoothTendency(top,avg,next_avg), but without branches
+    auto Ba = Sub(top, avg);
+    auto an = Sub(avg, next_avg);
+    auto nonmono = Xor(Ba, an);
+    auto absBa = Abs(Ba);
+    auto absan = Abs(an);
+    auto absBn = Abs(Sub(top, next_avg));
+    // Compute a3 = absBa / 3
+    auto a3e = BitCast(d, ShiftRight<32>(MulEven(absBa, onethird)));
+    auto a3oi = MulEven(Reverse(d, absBa), onethird);
+    auto a3o = BitCast(
+        d, Reverse(hwy::HWY_NAMESPACE::Repartition<pixel_type_w, decltype(d)>(),
+                   a3oi));
+    auto a3 = OddEven(a3o, a3e);
+    a3 = Add(a3, Add(absBn, Set(d, 2)));
+    auto absdiff = ShiftRight<2>(a3);
+    auto skipdiff = Ne(Ba, Zero(d));
+    skipdiff = And(skipdiff, Ne(an, Zero(d)));
+    skipdiff = And(skipdiff, Lt(nonmono, Zero(d)));
+    auto absBa2 = Add(ShiftLeft<1>(absBa), And(absdiff, Set(d, 1)));
+    absdiff = IfThenElse(Gt(absdiff, absBa2),
+                         Add(ShiftLeft<1>(absBa), Set(d, 1)), absdiff);
+    auto absan2 = ShiftLeft<1>(absan);
+    absdiff = IfThenElse(Gt(Add(absdiff, And(absdiff, Set(d, 1))), absan2),
+                         absan2, absdiff);
+    auto diff1 = IfThenElse(Lt(top, next_avg), Neg(absdiff), absdiff);
+    auto tendency = IfThenZeroElse(skipdiff, diff1);
+
+    auto diff_minus_tendency = Load(d, p_residual + x);
+    auto diff = Add(diff_minus_tendency, tendency);
+    auto out =
+        Add(avg, ShiftRight<1>(
+                     Add(diff, BitCast(d, ShiftRight<31>(BitCast(du, diff))))));
+    Store(out, d, p_out + x);
+    Store(Sub(out, diff), d, p_nout + x);
+  }
+}
+
+#endif
+
+Status InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2));
+  JXL_ASSERT(chin.h == chin_residual.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].hshift--;
+    return true;
+  }
+
+  // Note: chin.w >= chin_residual.w and at most 1 different.
+  Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift);
+  JXL_DEBUG_V(4,
+              "Undoing horizontal squeeze of channel %i using residuals in "
+              "channel %i (going from width %" PRIuS " to %" PRIuS ")",
+              c, rc, chin.w, chout.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return true;
+  }
+  auto unsqueeze_row = [&](size_t y, size_t x0) {
+    const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y);
+    const pixel_type *JXL_RESTRICT p_avg = chin.Row(y);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    for (size_t x = x0; x < chin_residual.w; x++) {
+      pixel_type_w diff_minus_tendency = p_residual[x];
+      pixel_type_w avg = p_avg[x];
+      pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg);
+      pixel_type_w left = (x ? p_out[(x << 1) - 1] : avg);
+      pixel_type_w tendency = SmoothTendency(left, avg, next_avg);
+      pixel_type_w diff = diff_minus_tendency + tendency;
+      pixel_type_w A = avg + (diff / 2);
+      p_out[(x << 1)] = A;
+      pixel_type_w B = A - diff;
+      p_out[(x << 1) + 1] = B;
+    }
+    if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1];
+  };
+
+  // somewhat complicated trickery just to be able to SIMD this.
+  // Horizontal unsqueeze has horizontal data dependencies, so we do
+  // 8 rows at a time and treat it as a vertical unsqueeze of a
+  // transposed 8x8 block (or 9x8 for one input).
+  static constexpr const size_t kRowsPerThread = 8;
+  const auto unsqueeze_span = [&](const uint32_t task, size_t /* thread */) {
+    const size_t y0 = task * kRowsPerThread;
+    const size_t rows = std::min(kRowsPerThread, chin.h - y0);
+    size_t x = 0;
+
+#if HWY_TARGET != HWY_SCALAR
+    intptr_t onerow_in = chin.plane.PixelsPerRow();
+    intptr_t onerow_inr = chin_residual.plane.PixelsPerRow();
+    intptr_t onerow_out = chout.plane.PixelsPerRow();
+    const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y0);
+    const pixel_type *JXL_RESTRICT p_avg = chin.Row(y0);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y0);
+    HWY_ALIGN pixel_type b_p_avg[9 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_residual[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_even[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_odd[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_evenT[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_oddT[8 * kRowsPerThread];
+    const HWY_CAPPED(pixel_type, 8) d;
+    const size_t N = Lanes(d);
+    if (chin_residual.w > 16 && rows == kRowsPerThread) {
+      for (; x < chin_residual.w - 9; x += 8) {
+        Transpose8x8Block(p_residual + x, b_p_residual, onerow_inr);
+        Transpose8x8Block(p_avg + x, b_p_avg, onerow_in);
+        for (size_t y = 0; y < kRowsPerThread; y++) {
+          b_p_avg[8 * 8 + y] = p_avg[x + 8 + onerow_in * y];
+        }
+        for (size_t i = 0; i < 8; i++) {
+          FastUnsqueeze(
+              b_p_residual + 8 * i, b_p_avg + 8 * i, b_p_avg + 8 * (i + 1),
+              (x + i ? b_p_out_odd + 8 * ((x + i - 1) & 7) : b_p_avg + 8 * i),
+              b_p_out_even + 8 * i, b_p_out_odd + 8 * i);
+        }
+
+        Transpose8x8Block(b_p_out_even, b_p_out_evenT, 8);
+        Transpose8x8Block(b_p_out_odd, b_p_out_oddT, 8);
+        for (size_t y = 0; y < kRowsPerThread; y++) {
+          for (size_t i = 0; i < kRowsPerThread; i += N) {
+            auto even = Load(d, b_p_out_evenT + 8 * y + i);
+            auto odd = Load(d, b_p_out_oddT + 8 * y + i);
+            StoreInterleaved(d, even, odd,
+                             p_out + ((x + i) << 1) + onerow_out * y);
+          }
+        }
+      }
+    }
+#endif
+    for (size_t y = 0; y < rows; y++) {
+      unsqueeze_row(y0 + y, x);
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.h, kRowsPerThread),
+                                ThreadPool::NoInit, unsqueeze_span,
+                                "InvHorizontalSqueeze"));
+  input.channel[c] = std::move(chout);
+  return true;
+}
+
+Status InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  const Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2));
+  JXL_ASSERT(chin.w == chin_residual.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].vshift--;
+    return true;
+  }
+
+  // Note: chin.h >= chin_residual.h and at most 1 different.
+  Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1);
+  JXL_DEBUG_V(
+      4,
+      "Undoing vertical squeeze of channel %i using residuals in channel "
+      "%i (going from height %" PRIuS " to %" PRIuS ")",
+      c, rc, chin.h, chout.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return true;
+  }
+
+  static constexpr const int kColsPerThread = 64;
+  const auto unsqueeze_slice = [&](const uint32_t task, size_t /* thread */) {
+    const size_t x0 = task * kColsPerThread;
+    const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w);
+    const size_t w = x1 - x0;
+    // We only iterate up to std::min(chin_residual.h, chin.h) which is
+    // always chin_residual.h.
+    for (size_t y = 0; y < chin_residual.h; y++) {
+      const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y) + x0;
+      const pixel_type *JXL_RESTRICT p_avg = chin.Row(y) + x0;
+      const pixel_type *JXL_RESTRICT p_navg =
+          chin.Row(y + 1 < chin.h ? y + 1 : y) + x0;
+      pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1) + x0;
+      pixel_type *JXL_RESTRICT p_nout = chout.Row((y << 1) + 1) + x0;
+      const pixel_type *p_pout = y > 0 ? chout.Row((y << 1) - 1) + x0 : p_avg;
+      size_t x = 0;
+#if HWY_TARGET != HWY_SCALAR
+      for (; x + 7 < w; x += 8) {
+        FastUnsqueeze(p_residual + x, p_avg + x, p_navg + x, p_pout + x,
+                      p_out + x, p_nout + x);
+      }
+#endif
+      for (; x < w; x++) {
+        pixel_type_w avg = p_avg[x];
+        pixel_type_w next_avg = p_navg[x];
+        pixel_type_w top = p_pout[x];
+        pixel_type_w tendency = SmoothTendency(top, avg, next_avg);
+        pixel_type_w diff_minus_tendency = p_residual[x];
+        pixel_type_w diff = diff_minus_tendency + tendency;
+        pixel_type_w out = avg + (diff / 2);
+        p_out[x] = out;
+        // If the chin_residual.h == chin.h, the output has an even number
+        // of rows so the next line is fine. Otherwise, this loop won't
+        // write to the last output row which is handled separately.
+        p_nout[x] = out - diff;
+      }
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.w, kColsPerThread),
+                                ThreadPool::NoInit, unsqueeze_slice,
+                                "InvVertSqueeze"));
+
+  if (chout.h & 1) {
+    size_t y = chin.h - 1;
+    const pixel_type *p_avg = chin.Row(y);
+    pixel_type *p_out = chout.Row(y << 1);
+    for (size_t x = 0; x < chin.w; x++) {
+      p_out[x] = p_avg[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  return true;
+}
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  for (int i = parameters.size() - 1; i >= 0; i--) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size() + beginc - endc - 1;
+    }
+    if (beginc < input.nb_meta_channels) {
+      // This is checked in MetaSqueeze.
+      JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c);
+      input.nb_meta_channels -= parameters[i].num_c;
+    }
+
+    for (uint32_t c = beginc; c <= endc; c++) {
+      uint32_t rc = offset + c - beginc;
+      // MetaApply should imply that `rc` is within range, otherwise there's a
+      // programming bug.
+      JXL_ASSERT(rc < input.channel.size());
+      if ((input.channel[c].w < input.channel[rc].w) ||
+          (input.channel[c].h < input.channel[rc].h)) {
+        return JXL_FAILURE("Corrupted squeeze transform");
+      }
+      if (horizontal) {
+        JXL_RETURN_IF_ERROR(InvHSqueeze(input, c, rc, pool));
+      } else {
+        JXL_RETURN_IF_ERROR(InvVSqueeze(input, c, rc, pool));
+      }
+    }
+    input.channel.erase(input.channel.begin() + offset,
+                        input.channel.begin() + offset + (endc - beginc + 1));
+  }
+  return true;
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+
+HWY_EXPORT(InvSqueeze);
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  return HWY_DYNAMIC_DISPATCH(InvSqueeze)(input, parameters, pool);
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image) {
+  int nb_channels = image.channel.size() - image.nb_meta_channels;
+
+  parameters->clear();
+  size_t w = image.channel[image.nb_meta_channels].w;
+  size_t h = image.channel[image.nb_meta_channels].h;
+  JXL_DEBUG_V(
+      7, "Default squeeze parameters for %" PRIuS "x%" PRIuS " image: ", w, h);
+
+  // do horizontal first on wide images; vertical first on tall images
+  bool wide = (w > h);
+
+  if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w &&
+      image.channel[image.nb_meta_channels + 1].h == h) {
+    // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0
+    // previews
+    JXL_DEBUG_V(7, "(4:2:0 chroma), %" PRIuS "x%" PRIuS " image", w, h);
+    SqueezeParams params;
+    // horizontal chroma squeeze
+    params.horizontal = true;
+    params.in_place = false;
+    params.begin_c = image.nb_meta_channels + 1;
+    params.num_c = 2;
+    parameters->push_back(params);
+    params.horizontal = false;
+    // vertical chroma squeeze
+    parameters->push_back(params);
+  }
+  SqueezeParams params;
+  params.begin_c = image.nb_meta_channels;
+  params.num_c = nb_channels;
+  params.in_place = true;
+
+  if (!wide) {
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+  }
+  while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+    if (w > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = true;
+      parameters->push_back(params);
+      w = (w + 1) / 2;
+      JXL_DEBUG_V(7, "Horizontal (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+  }
+  JXL_DEBUG_V(7, "that's it");
+}
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter,
+                              int num_channels) {
+  int c1 = parameter.begin_c;
+  int c2 = parameter.begin_c + parameter.num_c - 1;
+  if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range");
+  }
+  return true;
+}
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters) {
+  if (parameters->empty()) {
+    DefaultSqueezeParameters(parameters, image);
+  }
+
+  for (size_t i = 0; i < parameters->size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams((*parameters)[i], image.channel.size()));
+    bool horizontal = (*parameters)[i].horizontal;
+    bool in_place = (*parameters)[i].in_place;
+    uint32_t beginc = (*parameters)[i].begin_c;
+    uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1;
+
+    uint32_t offset;
+    if (beginc < image.nb_meta_channels) {
+      if (endc >= image.nb_meta_channels) {
+        return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels");
+      }
+      if (!in_place) {
+        return JXL_FAILURE(
+            "Invalid squeeze: meta channels require in-place residuals");
+      }
+      image.nb_meta_channels += (*parameters)[i].num_c;
+    }
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = image.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) {
+        return JXL_FAILURE("Too many squeezes: shift > 30");
+      }
+      size_t w = image.channel[c].w;
+      size_t h = image.channel[c].h;
+      if (w == 0 || h == 0) return JXL_FAILURE("Squeezing empty channel");
+      if (horizontal) {
+        image.channel[c].w = (w + 1) / 2;
+        if (image.channel[c].hshift >= 0) image.channel[c].hshift++;
+        w = w - (w + 1) / 2;
+      } else {
+        image.channel[c].h = (h + 1) / 2;
+        if (image.channel[c].vshift >= 0) image.channel[c].vshift++;
+        h = h - (h + 1) / 2;
+      }
+      image.channel[c].shrink();
+      Channel dummy(w, h);
+      dummy.hshift = image.channel[c].hshift;
+      dummy.vshift = image.channel[c].vshift;
+
+      image.channel.insert(image.channel.begin() + offset + (c - beginc),
+                           std::move(dummy));
+      JXL_DEBUG_V(8, "MetaSqueeze applied, current image: %s",
+                  image.DebugString().c_str());
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h
new file mode 100644
index 0000000000..fb18710a6f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+
+// Haar-like transform: halves the resolution in one direction
+// A B   -> (A+B)>>1              in one channel (average)  -> same range as
+// original channel
+//          A-B - tendency        in a new channel ('residual' needed to make
+//          the transform reversible)
+//                                        -> theoretically range could be 2.5
+//                                        times larger (2 times without the
+//                                        'tendency'), but there should be lots
+//                                        of zeroes
+// Repeated application (alternating horizontal and vertical squeezes) results
+// in downscaling
+//
+// The default coefficient ordering is low-frequency to high-frequency, as in
+// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using
+// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp.
+// 205-220, April 1992, doi: 10.1109/83.136597.
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+#define JXL_MAX_FIRST_PREVIEW_SIZE 8
+
+namespace jxl {
+
+/*
+        int avg=(A+B)>>1;
+        int diff=(A-B);
+        int rA=(diff+(avg<<1)+(diff&1))>>1;
+        int rB=rA-diff;
+
+*/
+//         |A B|C D|E F|
+//           p   a   n             p=avg(A,B), a=avg(C,D), n=avg(E,F)
+//
+// Goal: estimate C-D (avoiding ringing artifacts)
+// (ensuring that in smooth areas, a zero residual corresponds to a smooth
+// gradient)
+
+// best estimate for C: (B + 2*a)/3
+// best estimate for D: (n + 3*a)/4
+// best estimate for C-D:  4*B - 3*n - a /12
+
+// avoid ringing by 1) only doing this if B <= a <= n  or  B >= a >= n
+// (otherwise, this is not a smooth area and we cannot really estimate C-D)
+//                  2) making sure that B <= C <= D <= n  or B >= C >= D >= n
+
+inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a,
+                                   pixel_type_w n) {
+  pixel_type_w diff = 0;
+  if (B >= a && a >= n) {
+    diff = (4 * B - 3 * n - a + 6) / 12;
+    //      2C = a<<1 + diff - diff&1 <= 2B  so diff - diff&1 <= 2B - 2a
+    //      2D = a<<1 - diff - diff&1 >= 2n  so diff + diff&1 <= 2a - 2n
+    if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1;
+    if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n);
+  } else if (B <= a && a <= n) {
+    diff = (4 * B - 3 * n - a - 6) / 12;
+    //      2C = a<<1 + diff + diff&1 >= 2B  so diff + diff&1 >= 2B - 2a
+    //      2D = a<<1 - diff + diff&1 <= 2n  so diff - diff&1 >= 2a - 2n
+    if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1;
+    if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n);
+  }
+  return diff;
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image);
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter, int num_channels);
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters);
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc
new file mode 100644
index 0000000000..d9f2b435bf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/transform.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/palette.h"
+#include "lib/jxl/modular/transform/rct.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+
+namespace jxl {
+
+SqueezeParams::SqueezeParams() { Bundle::Init(this); }
+Transform::Transform(TransformId id) {
+  Bundle::Init(this);
+  this->id = id;
+}
+
+Status Transform::Inverse(Image &input, const weighted::Header &wp_header,
+                          ThreadPool *pool) {
+  JXL_DEBUG_V(6, "Input channels (%" PRIuS ", %" PRIuS " meta): ",
+              input.channel.size(), input.nb_meta_channels);
+  switch (id) {
+    case TransformId::kRCT:
+      return InvRCT(input, begin_c, rct_type, pool);
+    case TransformId::kSqueeze:
+      return InvSqueeze(input, squeezes, pool);
+    case TransformId::kPalette:
+      return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor,
+                        wp_header, pool);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status Transform::MetaApply(Image &input) {
+  JXL_DEBUG_V(6, "MetaApply input: %s", input.DebugString().c_str());
+  switch (id) {
+    case TransformId::kRCT:
+      JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type);
+      return CheckEqualChannels(input, begin_c, begin_c + 2);
+    case TransformId::kSqueeze:
+      JXL_DEBUG_V(2, "Transform: kSqueeze:");
+#if JXL_DEBUG_V_LEVEL >= 2
+      {
+        auto squeezes_copy = squeezes;
+        if (squeezes_copy.empty()) {
+          DefaultSqueezeParameters(&squeezes_copy, input);
+        }
+        for (const auto &params : squeezes_copy) {
+          JXL_DEBUG_V(
+              2,
+              "  squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32
+              ", num_c=%" PRIu32,
+              params.horizontal, params.in_place, params.begin_c, params.num_c);
+        }
+      }
+#endif
+      return MetaSqueeze(input, &squeezes);
+    case TransformId::kPalette:
+      JXL_DEBUG_V(2,
+                  "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32
+                  ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32,
+                  begin_c, num_c, nb_colors, nb_deltas);
+      return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors,
+                         nb_deltas, lossy_palette);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) {
+  if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range: %u..%u (there are only %" PRIuS
+                       " channels)",
+                       c1, c2, image.channel.size());
+  }
+  if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) {
+    return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta");
+  }
+  const auto &ch1 = image.channel[c1];
+  for (size_t c = c1 + 1; c <= c2; c++) {
+    const auto &ch2 = image.channel[c];
+    if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift ||
+        ch1.vshift != ch2.vshift) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h
new file mode 100644
index 0000000000..d5d3259f7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+enum class TransformId : uint32_t {
+  // G, R-G, B-G and variants (including YCoCg).
+  kRCT = 0,
+
+  // Color palette. Parameters are: [begin_c] [end_c] [nb_colors]
+  kPalette = 1,
+
+  // Squeezing (Haar-style)
+  kSqueeze = 2,
+
+  // Invalid for now.
+  kInvalid = 3,
+};
+
+struct SqueezeParams : public Fields {
+  JXL_FIELDS_NAME(SqueezeParams)
+  bool horizontal;
+  bool in_place;
+  uint32_t begin_c;
+  uint32_t num_c;
+  SqueezeParams();
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8),
+                                           BitsOffset(10, 72),
+                                           BitsOffset(13, 1096), 0, &begin_c));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c));
+    return true;
+  }
+};
+
+class Transform : public Fields {
+ public:
+  TransformId id;
+  // for Palette and RCT.
+  uint32_t begin_c;
+  // for RCT. 42 possible values starting from 0.
+  uint32_t rct_type;
+  // Only for Palette and NearLossless.
+  uint32_t num_c;
+  // Only for Palette.
+  uint32_t nb_colors;
+  uint32_t nb_deltas;
+  // for Squeeze. Default squeeze if empty.
+  std::vector<SqueezeParams> squeezes;
+  // for NearLossless, not serialized.
+  int max_delta_error;
+  // Serialized for Palette.
+  Predictor predictor;
+  // for Palette, not serialized.
+  bool ordered_palette = true;
+  bool lossy_palette = false;
+
+  explicit Transform(TransformId id);
+  // default constructor for bundles.
+  Transform() : Transform(TransformId::kInvalid) {}
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette),
+        Val((uint32_t)TransformId::kSqueeze),
+        Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT,
+        reinterpret_cast<uint32_t *>(&id)));
+    if (id == TransformId::kInvalid) {
+      return JXL_FAILURE("Invalid transform ID");
+    }
+    if (visitor->Conditional(id == TransformId::kRCT ||
+                             id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72),
+                       BitsOffset(13, 1096), 0, &begin_c));
+    }
+    if (visitor->Conditional(id == TransformId::kRCT)) {
+      // 0-41, default YCoCg.
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2),
+                                             BitsOffset(6, 10), 6, &rct_type));
+      if (rct_type >= 42) {
+        return JXL_FAILURE("Invalid transform RCT type");
+      }
+    }
+    if (visitor->Conditional(id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+          BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280),
+          BitsOffset(16, 5376), 256, &nb_colors));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257),
+                       BitsOffset(16, 1281), 0, &nb_deltas));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bits(4, (uint32_t)Predictor::Zero,
+                        reinterpret_cast<uint32_t *>(&predictor)));
+      if (predictor >= Predictor::Best) {
+        return JXL_FAILURE("Invalid predictor");
+      }
+    }
+
+    if (visitor->Conditional(id == TransformId::kSqueeze)) {
+      uint32_t num_squeezes = static_cast<uint32_t>(squeezes.size());
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9),
+                       BitsOffset(8, 41), 0, &num_squeezes));
+      if (visitor->IsReading()) squeezes.resize(num_squeezes);
+      for (size_t i = 0; i < num_squeezes; i++) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i]));
+      }
+    }
+    return true;
+  }
+
+  JXL_FIELDS_NAME(Transform)
+
+  Status Inverse(Image &input, const weighted::Header &wp_header,
+                 ThreadPool *pool = nullptr);
+  Status MetaApply(Image &input);
+};
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2);
+
+static inline pixel_type PixelAdd(pixel_type a, pixel_type b) {
+  return static_cast<pixel_type>(static_cast<uint32_t>(a) +
+                                 static_cast<uint32_t>(b));
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/modular_test.cc b/third-party/libjxl/libjxl/lib/jxl/modular_test.cc
new file mode 100644
index 0000000000..76f4a28425
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/modular_test.cc
@@ -0,0 +1,538 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/metrics.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+void TestLosslessGroups(size_t group_size_shift) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CompressParams cparams;
+  cparams.SetLossless();
+  cparams.modular_group_size_shift = group_size_shift;
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(io.xsize() / 4, io.ysize() / 4);
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 280000u);
+  JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
+}
+
+TEST(ModularTest, RoundtripLosslessGroups128) { TestLosslessGroups(0); }
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups512)) {
+  TestLosslessGroups(2);
+}
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups1024)) {
+  TestLosslessGroups(3);
+}
+
+TEST(ModularTest, RoundtripLosslessCustomWP_PermuteRCT) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.SetLossless();
+  // 9 = permute to GBR, to test the special case of permutation-only
+  cparams.colorspace = 9;
+  // slowest speed so different WP modes are tried
+  cparams.speed_tier = SpeedTier::kTortoise;
+  cparams.options.predictor = {Predictor::Weighted};
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(100, 100);
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 10169u);
+  JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
+}
+
+TEST(ModularTest, RoundtripLossyDeltaPalette) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.lossy_palette = true;
+  cparams.palette_colors = 0;
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(300, 100);
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 6800u);
+  EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(1.5));
+}
+TEST(ModularTest, RoundtripLossyDeltaPaletteWP) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.SetLossless();
+  cparams.lossy_palette = true;
+  cparams.palette_colors = 0;
+  cparams.options.predictor = jxl::Predictor::Weighted;
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(300, 100);
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 7000u);
+  EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(10.1));
+}
+
+TEST(ModularTest, RoundtripLossy) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.butteraugli_distance = 2.f;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 30000u);
+  EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(2.3));
+}
+
+TEST(ModularTest, RoundtripLossy16) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/raw.pixls/DJI-FC6310-16bit_709_v4_krita.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.butteraugli_distance = 2.f;
+
+  CodecInOut io_out;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  JXL_CHECK(!io.metadata.m.have_preview);
+  JXL_CHECK(io.frames.size() == 1);
+  JXL_CHECK(io.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms()));
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+  EXPECT_LE(compressed_size, 300u);
+  EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(1.6));
+}
+
+TEST(ModularTest, RoundtripExtraProperties) {
+  constexpr size_t kSize = 250;
+  Image image(kSize, kSize, /*bitdepth=*/8, 3);
+  ModularOptions options;
+  options.max_properties = 4;
+  options.predictor = Predictor::Zero;
+  Rng rng(0);
+  for (size_t y = 0; y < kSize; y++) {
+    for (size_t x = 0; x < kSize; x++) {
+      image.channel[0].plane.Row(y)[x] = image.channel[2].plane.Row(y)[x] =
+          rng.UniformU(0, 9);
+    }
+  }
+  ZeroFillImage(&image.channel[1].plane);
+  BitWriter writer;
+  ASSERT_TRUE(ModularGenericCompress(image, options, &writer));
+  writer.ZeroPadToByte();
+  Image decoded(kSize, kSize, /*bitdepth=*/8, image.channel.size());
+  for (size_t i = 0; i < image.channel.size(); i++) {
+    const Channel& ch = image.channel[i];
+    decoded.channel[i] = Channel(ch.w, ch.h, ch.hshift, ch.vshift);
+  }
+  Status status = true;
+  {
+    BitReader reader(writer.GetSpan());
+    BitReaderScopedCloser closer(&reader, &status);
+    ASSERT_TRUE(ModularGenericDecompress(&reader, decoded, /*header=*/nullptr,
+                                         /*group_id=*/0, &options));
+  }
+  ASSERT_TRUE(status);
+  ASSERT_EQ(image.channel.size(), decoded.channel.size());
+  for (size_t c = 0; c < image.channel.size(); c++) {
+    for (size_t y = 0; y < image.channel[c].plane.ysize(); y++) {
+      for (size_t x = 0; x < image.channel[c].plane.xsize(); x++) {
+        EXPECT_EQ(image.channel[c].plane.Row(y)[x],
+                  decoded.channel[c].plane.Row(y)[x])
+            << "c = " << c << ", x = " << x << ",  y = " << y;
+      }
+    }
+  }
+}
+
+TEST(ModularTest, RoundtripLosslessCustomSqueeze) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.butteraugli_distance = 0.f;
+  cparams.options.predictor = {Predictor::Zero};
+  cparams.speed_tier = SpeedTier::kThunder;
+  cparams.responsive = 1;
+  // Custom squeeze params, atm just for testing
+  SqueezeParams p;
+  p.horizontal = true;
+  p.in_place = false;
+  p.begin_c = 0;
+  p.num_c = 3;
+  cparams.squeezes.push_back(p);
+  p.begin_c = 1;
+  p.in_place = true;
+  p.horizontal = false;
+  cparams.squeezes.push_back(p);
+
+  CodecInOut io2;
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+  EXPECT_LE(compressed_size, 265000u);
+  JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _));
+}
+
+struct RoundtripLosslessConfig {
+  int bitdepth;
+  int responsive;
+};
+class ModularTestParam
+    : public ::testing::TestWithParam<RoundtripLosslessConfig> {};
+
+std::vector<RoundtripLosslessConfig> GenerateLosslessTests() {
+  std::vector<RoundtripLosslessConfig> all;
+  for (int responsive = 0; responsive <= 1; responsive++) {
+    for (int bitdepth = 1; bitdepth < 32; bitdepth++) {
+      if (responsive && bitdepth > 30) continue;
+      all.push_back({bitdepth, responsive});
+    }
+  }
+  return all;
+}
+std::string LosslessTestDescription(
+    const testing::TestParamInfo<ModularTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param.bitdepth << "bit";
+  if (info.param.responsive) name << "Squeeze";
+  return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RoundtripLossless, ModularTestParam,
+                                   testing::ValuesIn(GenerateLosslessTests()),
+                                   LosslessTestDescription);
+
+TEST_P(ModularTestParam, RoundtripLossless) {
+  RoundtripLosslessConfig config = GetParam();
+  int bitdepth = config.bitdepth;
+  int responsive = config.responsive;
+
+  ThreadPool* pool = nullptr;
+  Rng generator(123);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io1;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io1, pool));
+
+  // vary the dimensions a bit, in case of bugs related to
+  // even vs odd width or height.
+  size_t xsize = 423 + bitdepth;
+  size_t ysize = 467 + bitdepth;
+
+  CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.SetUintSamples(bitdepth);
+
+  double factor = ((1lu << bitdepth) - 1lu);
+  double ifactor = 1.0 / factor;
+  Image3F noise_added(xsize, ysize);
+
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ysize; y++) {
+      const float* in = io1.Main().color()->PlaneRow(c, y);
+      float* out = noise_added.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; x++) {
+        // make the least significant bits random
+        float f = in[x] + generator.UniformF(0.0f, 1.f / 255.f);
+        if (f > 1.f) f = 1.f;
+        // quantize to the bitdepth we're testing
+        unsigned int u = f * factor + 0.5;
+        out[x] = u * ifactor;
+      }
+    }
+  }
+  io.SetFromImage(std::move(noise_added), jxl::ColorEncoding::SRGB(false));
+
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.butteraugli_distance = 0.f;
+  cparams.options.predictor = {Predictor::Zero};
+  cparams.speed_tier = SpeedTier::kThunder;
+  cparams.responsive = responsive;
+  CodecInOut io2;
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+  EXPECT_LE(compressed_size, bitdepth * xsize * ysize / 3);
+  EXPECT_LE(0, ComputeDistance2(io.Main(), io2.Main(), GetJxlCms()));
+  size_t different = 0;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ysize; y++) {
+      const float* in = io.Main().color()->PlaneRow(c, y);
+      const float* out = io2.Main().color()->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; x++) {
+        uint32_t uin = in[x] * factor + 0.5;
+        uint32_t uout = out[x] * factor + 0.5;
+        // check that the integer values are identical
+        if (uin != uout) different++;
+      }
+    }
+  }
+  EXPECT_EQ(different, 0);
+}
+
+TEST(ModularTest, RoundtripLosslessCustomFloat) {
+  CodecInOut io;
+  size_t xsize = 100, ysize = 300;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.bit_depth.bits_per_sample = 18;
+  io.metadata.m.bit_depth.exponent_bits_per_sample = 6;
+  io.metadata.m.bit_depth.floating_point_sample = true;
+  io.metadata.m.modular_16_bit_buffer_sufficient = false;
+  ColorEncoding color_encoding;
+  color_encoding.tf.SetTransferFunction(TransferFunction::kLinear);
+  color_encoding.SetColorSpace(ColorSpace::kRGB);
+  Image3F testimage(xsize, ysize);
+  float factor = 1.f / (1 << 14);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ysize; y++) {
+      float* const JXL_RESTRICT row = testimage.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; x++) {
+        row[x] = factor * (x ^ y);
+      }
+    }
+  }
+  io.SetFromImage(std::move(testimage), color_encoding);
+  io.metadata.m.color_encoding = color_encoding;
+  io.metadata.m.SetIntensityTarget(255);
+
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.butteraugli_distance = 0.f;
+  cparams.options.predictor = {Predictor::Zero};
+  cparams.speed_tier = SpeedTier::kThunder;
+  cparams.decoding_speed_tier = 2;
+
+  CodecInOut io2;
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+  EXPECT_LE(compressed_size, 23000u);
+  JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _));
+}
+
+void WriteHeaders(BitWriter* writer, size_t xsize, size_t ysize) {
+  BitWriter::Allotment allotment(writer, 16);
+  writer->Write(8, 0xFF);
+  writer->Write(8, kCodestreamMarker);
+  allotment.ReclaimAndCharge(writer, 0, nullptr);
+  CodecMetadata metadata;
+  EXPECT_TRUE(metadata.size.Set(xsize, ysize));
+  EXPECT_TRUE(WriteSizeHeader(metadata.size, writer, 0, nullptr));
+  metadata.m.color_encoding = ColorEncoding::LinearSRGB(/*is_gray=*/true);
+  metadata.m.xyb_encoded = false;
+  metadata.m.SetUintSamples(31);
+  EXPECT_TRUE(WriteImageMetadata(metadata.m, writer, 0, nullptr));
+  metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+  EXPECT_TRUE(Bundle::Write(metadata.transform_data, writer, 0, nullptr));
+  writer->ZeroPadToByte();
+  FrameHeader frame_header(&metadata);
+  frame_header.encoding = FrameEncoding::kModular;
+  frame_header.loop_filter.gab = false;
+  frame_header.loop_filter.epf_iters = 0;
+  EXPECT_TRUE(WriteFrameHeader(frame_header, writer, nullptr));
+}
+
+// Tree with single node, zero predictor, offset is 1 and multiplier is 1,
+// entropy code is prefix tree with alphabet size 256 and all bits lengths 8.
+void WriteHistograms(BitWriter* writer) {
+  writer->Write(1, 1);  // default DC quant
+  writer->Write(1, 1);  // has_tree
+  // tree histograms
+  writer->Write(1, 0);         // LZ77 disabled
+  writer->Write(3, 1);         // simple context map
+  writer->Write(1, 1);         // prefix code
+  writer->Write(7, 0x63);      // UnintConfig(3, 2, 1)
+  writer->Write(12, 0xfef);    // alphabet_size = 256
+  writer->Write(32, 0x10003);  // all bit lengths 8
+  // tree tokens
+  writer->Write(8, 0);   // tree leaf
+  writer->Write(8, 0);   // zero predictor
+  writer->Write(8, 64);  // offset = UnpackSigned(ReverseBits(64)) = 1
+  writer->Write(16, 0);  // multiplier = 1
+  // histograms
+  writer->Write(1, 0);         // LZ77 disabled
+  writer->Write(1, 1);         // prefix code
+  writer->Write(7, 0x63);      // UnintConfig(3, 2, 1)
+  writer->Write(12, 0xfef);    // alphabet_size = 256
+  writer->Write(32, 0x10003);  // all bit lengths 8
+}
+
+TEST(ModularTest, PredictorIntegerOverflow) {
+  const size_t xsize = 1;
+  const size_t ysize = 1;
+  BitWriter writer;
+  WriteHeaders(&writer, xsize, ysize);
+  std::vector<BitWriter> group_codes(1);
+  {
+    BitWriter* bw = &group_codes[0];
+    BitWriter::Allotment allotment(bw, 1 << 20);
+    WriteHistograms(bw);
+    GroupHeader header;
+    header.use_global_tree = true;
+    EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr));
+    // After UnpackSigned this becomes (1 << 31) - 1, the largest pixel_type,
+    // and after adding the offset we get -(1 << 31).
+    bw->Write(8, 119);
+    bw->Write(28, 0xfffffff);
+    bw->ZeroPadToByte();
+    allotment.ReclaimAndCharge(bw, 0, nullptr);
+  }
+  EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr));
+  writer.AppendByteAligned(group_codes);
+
+  PaddedBytes compressed = std::move(writer).TakeBytes();
+  extras::PackedPixelFile ppf;
+  extras::JXLDecompressParams params;
+  params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0});
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params,
+                             nullptr, &ppf));
+  ASSERT_EQ(1, ppf.frames.size());
+  const auto& img = ppf.frames[0].color;
+  const auto pixels = reinterpret_cast<const float*>(img.pixels());
+  EXPECT_EQ(-1.0f, pixels[0]);
+}
+
+TEST(ModularTest, UnsqueezeIntegerOverflow) {
+  // Image width is 9 so we can test both the SIMD and non-vector code paths.
+  const size_t xsize = 9;
+  const size_t ysize = 2;
+  BitWriter writer;
+  WriteHeaders(&writer, xsize, ysize);
+  std::vector<BitWriter> group_codes(1);
+  {
+    BitWriter* bw = &group_codes[0];
+    BitWriter::Allotment allotment(bw, 1 << 20);
+    WriteHistograms(bw);
+    GroupHeader header;
+    header.use_global_tree = true;
+    header.transforms.emplace_back();
+    header.transforms[0].id = TransformId::kSqueeze;
+    SqueezeParams params;
+    params.horizontal = false;
+    params.in_place = true;
+    params.begin_c = 0;
+    params.num_c = 1;
+    header.transforms[0].squeezes.emplace_back(params);
+    EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr));
+    for (size_t i = 0; i < xsize * ysize; ++i) {
+      // After UnpackSigned and adding offset, this becomes (1 << 31) - 1, both
+      // in the image and in the residual channels, and unsqueeze makes them
+      // ~(3 << 30) and (1 << 30) (in pixel_type_w) and the first wraps around
+      // to about -(1 << 30).
+      bw->Write(8, 119);
+      bw->Write(28, 0xffffffe);
+    }
+    bw->ZeroPadToByte();
+    allotment.ReclaimAndCharge(bw, 0, nullptr);
+  }
+  EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr));
+  writer.AppendByteAligned(group_codes);
+
+  PaddedBytes compressed = std::move(writer).TakeBytes();
+  extras::PackedPixelFile ppf;
+  extras::JXLDecompressParams params;
+  params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0});
+  EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params,
+                             nullptr, &ppf));
+  ASSERT_EQ(1, ppf.frames.size());
+  const auto& img = ppf.frames[0].color;
+  const auto pixels = reinterpret_cast<const float*>(img.pixels());
+  for (size_t x = 0; x < xsize; ++x) {
+    EXPECT_NEAR(-0.5f, pixels[x], 1e-10);
+    EXPECT_NEAR(0.5f, pixels[xsize + x], 1e-10);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/noise.h b/third-party/libjxl/libjxl/lib/jxl/noise.h
new file mode 100644
index 0000000000..d897ea3abe
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/noise.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_NOISE_H_
+#define LIB_JXL_NOISE_H_
+
+// Noise parameters shared by encoder/decoder.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+const float kNoisePrecision = 1 << 10;
+
+struct NoiseParams {
+  // LUT index is an intensity of pixel / mean intensity of patch
+  static constexpr size_t kNumNoisePoints = 8;
+  float lut[kNumNoisePoints];
+
+  void Clear() {
+    for (float& i : lut) i = 0.f;
+  }
+  bool HasAny() const {
+    for (float i : lut) {
+      if (std::abs(i) > 1e-3f) return true;
+    }
+    return false;
+  }
+};
+
+static inline std::pair<int, float> IndexAndFrac(float x) {
+  constexpr size_t kScaleNumerator = NoiseParams::kNumNoisePoints - 2;
+  // TODO: instead of 1, this should be a proper Y range.
+  constexpr float kScale = kScaleNumerator / 1;
+  float scaled_x = std::max(0.f, x * kScale);
+  float floor_x;
+  float frac_x = std::modf(scaled_x, &floor_x);
+  if (JXL_UNLIKELY(scaled_x >= kScaleNumerator + 1)) {
+    floor_x = kScaleNumerator;
+    frac_x = 1.f;
+  }
+  return std::make_pair(static_cast<int>(floor_x), frac_x);
+}
+
+struct NoiseLevel {
+  float noise_level;
+  float intensity;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_NOISE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc
new file mode 100644
index 0000000000..07fd824f14
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+namespace {
+
+// Convert a single linear sRGB color to xyb, using the exact image conversion
+// procedure that jpeg xl uses.
+void LinearSrgbToOpsin(float rgb_r, float rgb_g, float rgb_b,
+                       float* JXL_RESTRICT xyb_x, float* JXL_RESTRICT xyb_y,
+                       float* JXL_RESTRICT xyb_b) {
+  Image3F linear(1, 1);
+  linear.PlaneRow(0, 0)[0] = rgb_r;
+  linear.PlaneRow(1, 0)[0] = rgb_g;
+  linear.PlaneRow(2, 0)[0] = rgb_b;
+
+  ImageMetadata metadata;
+  metadata.SetFloat32Samples();
+  metadata.color_encoding = ColorEncoding::LinearSRGB();
+  ImageBundle ib(&metadata);
+  ib.SetFromImage(std::move(linear), metadata.color_encoding);
+  Image3F opsin(1, 1);
+  (void)ToXYB(ib, /*pool=*/nullptr, &opsin, GetJxlCms());
+
+  *xyb_x = opsin.PlaneRow(0, 0)[0];
+  *xyb_y = opsin.PlaneRow(1, 0)[0];
+  *xyb_b = opsin.PlaneRow(2, 0)[0];
+}
+
+// Convert a single XYB color to linear sRGB, using the exact image conversion
+// procedure that jpeg xl uses.
+void OpsinToLinearSrgb(float xyb_x, float xyb_y, float xyb_b,
+                       float* JXL_RESTRICT rgb_r, float* JXL_RESTRICT rgb_g,
+                       float* JXL_RESTRICT rgb_b) {
+  Image3F opsin(1, 1);
+  opsin.PlaneRow(0, 0)[0] = xyb_x;
+  opsin.PlaneRow(1, 0)[0] = xyb_y;
+  opsin.PlaneRow(2, 0)[0] = xyb_b;
+  Image3F linear(1, 1);
+  OpsinParams opsin_params;
+  opsin_params.Init(/*intensity_target=*/255.0f);
+  OpsinToLinear(opsin, Rect(opsin), nullptr, &linear, opsin_params);
+  *rgb_r = linear.PlaneRow(0, 0)[0];
+  *rgb_g = linear.PlaneRow(1, 0)[0];
+  *rgb_b = linear.PlaneRow(2, 0)[0];
+}
+
+void OpsinRoundtripTestRGB(float r, float g, float b) {
+  float xyb_x, xyb_y, xyb_b;
+  LinearSrgbToOpsin(r, g, b, &xyb_x, &xyb_y, &xyb_b);
+  float r2, g2, b2;
+  OpsinToLinearSrgb(xyb_x, xyb_y, xyb_b, &r2, &g2, &b2);
+  EXPECT_NEAR(r, r2, 1e-3);
+  EXPECT_NEAR(g, g2, 1e-3);
+  EXPECT_NEAR(b, b2, 1e-3);
+}
+
+TEST(OpsinImageTest, VerifyOpsinAbsorbanceInverseMatrix) {
+  float matrix[9];  // writable copy
+  for (int i = 0; i < 9; i++) {
+    matrix[i] = GetOpsinAbsorbanceInverseMatrix()[i];
+  }
+  EXPECT_TRUE(Inv3x3Matrix(matrix));
+  for (int i = 0; i < 9; i++) {
+    EXPECT_NEAR(matrix[i], kOpsinAbsorbanceMatrix[i], 1e-6);
+  }
+}
+
+TEST(OpsinImageTest, OpsinRoundtrip) {
+  OpsinRoundtripTestRGB(0, 0, 0);
+  OpsinRoundtripTestRGB(1. / 255, 1. / 255, 1. / 255);
+  OpsinRoundtripTestRGB(128. / 255, 128. / 255, 128. / 255);
+  OpsinRoundtripTestRGB(1, 1, 1);
+
+  OpsinRoundtripTestRGB(0, 0, 1. / 255);
+  OpsinRoundtripTestRGB(0, 0, 128. / 255);
+  OpsinRoundtripTestRGB(0, 0, 1);
+
+  OpsinRoundtripTestRGB(0, 1. / 255, 0);
+  OpsinRoundtripTestRGB(0, 128. / 255, 0);
+  OpsinRoundtripTestRGB(0, 1, 0);
+
+  OpsinRoundtripTestRGB(1. / 255, 0, 0);
+  OpsinRoundtripTestRGB(128. / 255, 0, 0);
+  OpsinRoundtripTestRGB(1, 0, 0);
+}
+
+TEST(OpsinImageTest, VerifyZero) {
+  // Test that black color (zero energy) is 0,0,0 in xyb.
+  float x, y, b;
+  LinearSrgbToOpsin(0, 0, 0, &x, &y, &b);
+  EXPECT_NEAR(0, x, 1e-9);
+  EXPECT_NEAR(0, y, 1e-7);
+  EXPECT_NEAR(0, b, 1e-7);
+}
+
+TEST(OpsinImageTest, VerifyGray) {
+  // Test that grayscale colors have a fixed y/b ratio and x==0.
+  for (size_t i = 1; i < 255; i++) {
+    float x, y, b;
+    LinearSrgbToOpsin(i / 255., i / 255., i / 255., &x, &y, &b);
+    EXPECT_NEAR(0, x, 1e-6);
+    EXPECT_NEAR(kYToBRatio, b / y, 3e-5);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc
new file mode 100644
index 0000000000..a948693ac6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(OpsinInverseTest, LinearInverseInverts) {
+  Image3F linear(128, 128);
+  RandomFillImage(&linear, 0.0f, 1.0f);
+
+  CodecInOut io;
+  io.metadata.m.SetFloat32Samples();
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  Image3F linear2(128, 128);
+  CopyImageTo(linear, &linear2);
+  io.SetFromImage(std::move(linear2), io.metadata.m.color_encoding);
+  ThreadPool* null_pool = nullptr;
+  Image3F opsin(io.xsize(), io.ysize());
+  (void)ToXYB(io.Main(), null_pool, &opsin, GetJxlCms());
+
+  OpsinParams opsin_params;
+  opsin_params.Init(/*intensity_target=*/255.0f);
+  OpsinToLinearInplace(&opsin, /*pool=*/nullptr, opsin_params);
+
+  JXL_ASSERT_OK(VerifyRelativeError(linear, opsin, 3E-3, 2E-4, _));
+}
+
+TEST(OpsinInverseTest, YcbCrInverts) {
+  Image3F rgb(128, 128);
+  RandomFillImage(&rgb, 0.0f, 1.0f);
+
+  ThreadPool* null_pool = nullptr;
+  Image3F ycbcr(rgb.xsize(), rgb.ysize());
+  EXPECT_TRUE(RgbToYcbcr(rgb.Plane(0), rgb.Plane(1), rgb.Plane(2),
+                         &ycbcr.Plane(1), &ycbcr.Plane(0), &ycbcr.Plane(2),
+                         null_pool));
+
+  Image3F rgb2(rgb.xsize(), rgb.ysize());
+  YcbcrToRgb(ycbcr, &rgb2, Rect(rgb));
+
+  JXL_ASSERT_OK(VerifyRelativeError(rgb, rgb2, 4E-5, 4E-7, _));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc
new file mode 100644
index 0000000000..ec3db4ee76
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/opsin_params.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/matrix_ops.h"
+
+namespace jxl {
+
+#define INVERSE_OPSIN_FROM_SPEC 1
+
+const float* GetOpsinAbsorbanceInverseMatrix() {
+#if INVERSE_OPSIN_FROM_SPEC
+  return DefaultInverseOpsinAbsorbanceMatrix();
+#else   // INVERSE_OPSIN_FROM_SPEC
+  // Compute the inverse opsin matrix from the forward matrix. Less precise
+  // than taking the values from the specification, but must be used if the
+  // forward transform is changed and the spec will require updating.
+  static const float* const kInverse = [] {
+    static float inverse[9];
+    for (int i = 0; i < 9; i++) {
+      inverse[i] = kOpsinAbsorbanceMatrix[i];
+    }
+    Inv3x3Matrix(inverse);
+    return inverse;
+  }();
+  return kInverse;
+#endif  // INVERSE_OPSIN_FROM_SPEC
+}
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+                           float* JXL_RESTRICT simd_inverse,
+                           float intensity_target) {
+  for (size_t i = 0; i < 9; ++i) {
+    simd_inverse[4 * i] = simd_inverse[4 * i + 1] = simd_inverse[4 * i + 2] =
+        simd_inverse[4 * i + 3] = inverse[i] * (255.0f / intensity_target);
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_params.h b/third-party/libjxl/libjxl/lib/jxl/opsin_params.h
new file mode 100644
index 0000000000..3a7da97d8a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/opsin_params.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_OPSIN_PARAMS_H_
+#define LIB_JXL_OPSIN_PARAMS_H_
+
+// Constants that define the XYB color space.
+
+#include <stdlib.h>
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Parameters for opsin absorbance.
+static const float kM02 = 0.078f;
+static const float kM00 = 0.30f;
+static const float kM01 = 1.0f - kM02 - kM00;
+
+static const float kM12 = 0.078f;
+static const float kM10 = 0.23f;
+static const float kM11 = 1.0f - kM12 - kM10;
+
+static const float kM20 = 0.24342268924547819f;
+static const float kM21 = 0.20476744424496821f;
+static const float kM22 = 1.0f - kM20 - kM21;
+
+static const float kBScale = 1.0f;
+static const float kYToBRatio = 1.0f;  // works better with 0.50017729543783418
+static const float kBToYRatio = 1.0f / kYToBRatio;
+
+static const float kB0 = 0.0037930732552754493f;
+static const float kB1 = kB0;
+static const float kB2 = kB0;
+
+// Opsin absorbance matrix is now frozen.
+static const float kOpsinAbsorbanceMatrix[9] = {
+    kM00, kM01, kM02, kM10, kM11, kM12, kM20, kM21, kM22,
+};
+
+// Must be the inverse matrix of kOpsinAbsorbanceMatrix and match the spec.
+static inline const float* DefaultInverseOpsinAbsorbanceMatrix() {
+  static float kDefaultInverseOpsinAbsorbanceMatrix[9] = {
+      11.031566901960783f,  -9.866943921568629f, -0.16462299647058826f,
+      -3.254147380392157f,  4.418770392156863f,  -0.16462299647058826f,
+      -3.6588512862745097f, 2.7129230470588235f, 1.9459282392156863f};
+  return kDefaultInverseOpsinAbsorbanceMatrix;
+}
+
+// Returns 3x3 row-major matrix inverse of kOpsinAbsorbanceMatrix.
+// opsin_image_test verifies this is actually the inverse.
+const float* GetOpsinAbsorbanceInverseMatrix();
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+                           float* JXL_RESTRICT simd_inverse,
+                           float intensity_target);
+
+static const float kOpsinAbsorbanceBias[3] = {
+    kB0,
+    kB1,
+    kB2,
+};
+
+static const float kNegOpsinAbsorbanceBiasRGB[4] = {
+    -kOpsinAbsorbanceBias[0], -kOpsinAbsorbanceBias[1],
+    -kOpsinAbsorbanceBias[2], 1.0f};
+
+static const float kScaledXYBOffset[3] = {
+    0.015386134f,
+    0.0f,
+    0.27770459f,
+};
+
+static const float kScaledXYBScale[3] = {
+    22.995788804f,
+    1.183000077f,
+    1.502141333f,
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_OPSIN_PARAMS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc b/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc
new file mode 100644
index 0000000000..9ca7a22423
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+#include <numeric>  // iota
+#include <vector>
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(PaddedBytesTest, TestNonEmptyFirstByteZero) {
+  PaddedBytes pb(1);
+  EXPECT_EQ(0, pb[0]);
+  // Even after resizing..
+  pb.resize(20);
+  EXPECT_EQ(0, pb[0]);
+  // And reserving.
+  pb.reserve(200);
+  EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestEmptyFirstByteZero) {
+  PaddedBytes pb(0);
+  // After resizing - new zero is written despite there being nothing to copy.
+  pb.resize(20);
+  EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestFillWithoutReserve) {
+  PaddedBytes pb;
+  for (size_t i = 0; i < 170u; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170u, pb.size());
+  EXPECT_GE(pb.capacity(), 170u);
+}
+
+TEST(PaddedBytesTest, TestFillWithExactReserve) {
+  PaddedBytes pb;
+  pb.reserve(170);
+  for (size_t i = 0; i < 170u; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170u, pb.size());
+  EXPECT_EQ(pb.capacity(), 170u);
+}
+
+TEST(PaddedBytesTest, TestFillWithMoreReserve) {
+  PaddedBytes pb;
+  pb.reserve(171);
+  for (size_t i = 0; i < 170u; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170u, pb.size());
+  EXPECT_GT(pb.capacity(), 170u);
+}
+
+// Can assign() a subset of the valid data.
+TEST(PaddedBytesTest, TestAssignFromWithin) {
+  PaddedBytes pb;
+  pb.reserve(256);
+  for (size_t i = 0; i < 256; ++i) {
+    pb.push_back(i);
+  }
+  pb.assign(pb.data() + 64, pb.data() + 192);
+  EXPECT_EQ(128u, pb.size());
+  for (size_t i = 0; i < 128; ++i) {
+    EXPECT_EQ(i + 64, pb[i]);
+  }
+}
+
+// Can assign() a range with both valid and previously-allocated data.
+TEST(PaddedBytesTest, TestAssignReclaim) {
+  PaddedBytes pb;
+  pb.reserve(256);
+  for (size_t i = 0; i < 256; ++i) {
+    pb.push_back(i);
+  }
+
+  const uint8_t* mem = pb.data();
+  pb.resize(200);
+  // Just shrank without reallocating
+  EXPECT_EQ(mem, pb.data());
+  EXPECT_EQ(256u, pb.capacity());
+
+  // Reclaim part of initial allocation
+  pb.assign(pb.data() + 100, pb.data() + 240);
+  EXPECT_EQ(140u, pb.size());
+
+  for (size_t i = 0; i < 140; ++i) {
+    EXPECT_EQ(i + 100, pb[i]);
+  }
+}
+
+// Can assign() smaller and larger ranges outside the current allocation.
+TEST(PaddedBytesTest, TestAssignOutside) {
+  PaddedBytes pb;
+  pb.resize(400);
+  std::iota(pb.begin(), pb.end(), 1);
+
+  std::vector<uint8_t> small(64);
+  std::iota(small.begin(), small.end(), 500);
+
+  pb.assign(small.data(), small.data() + small.size());
+  EXPECT_EQ(64u, pb.size());
+  for (size_t i = 0; i < 64; ++i) {
+    EXPECT_EQ((i + 500) & 0xFF, pb[i]);
+  }
+
+  std::vector<uint8_t> large(1000);
+  std::iota(large.begin(), large.end(), 600);
+
+  pb.assign(large.data(), large.data() + large.size());
+  EXPECT_EQ(1000u, pb.size());
+  for (size_t i = 0; i < 1000; ++i) {
+    EXPECT_EQ((i + 600) & 0xFF, pb[i]);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_state.cc b/third-party/libjxl/libjxl/lib/jxl/passes_state.cc
new file mode 100644
index 0000000000..2f287ec9b6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/passes_state.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/passes_state.h"
+
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+                                   PassesSharedState* JXL_RESTRICT shared,
+                                   bool encoder) {
+  JXL_ASSERT(frame_header.nonserialized_metadata != nullptr);
+  shared->frame_header = frame_header;
+  shared->metadata = frame_header.nonserialized_metadata;
+  shared->frame_dim = frame_header.ToFrameDimensions();
+  shared->image_features.patches.SetPassesSharedState(shared);
+
+  const FrameDimensions& frame_dim = shared->frame_dim;
+
+  shared->ac_strategy =
+      AcStrategyImage(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->raw_quant_field =
+      ImageI(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->epf_sharpness =
+      ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->cmap = ColorCorrelationMap(frame_dim.xsize, frame_dim.ysize);
+
+  // In the decoder, we allocate coeff orders afterwards, when we know how many
+  // we will actually need.
+  shared->coeff_order_size = kCoeffOrderMaxSize;
+  if (encoder &&
+      shared->coeff_orders.size() <
+          frame_header.passes.num_passes * kCoeffOrderMaxSize &&
+      frame_header.encoding == FrameEncoding::kVarDCT) {
+    shared->coeff_orders.resize(frame_header.passes.num_passes *
+                                kCoeffOrderMaxSize);
+  }
+
+  shared->quant_dc = ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+
+  bool use_dc_frame = !!(frame_header.flags & FrameHeader::kUseDcFrame);
+  if (!encoder && use_dc_frame) {
+    if (frame_header.dc_level == 4) {
+      return JXL_FAILURE("Invalid DC level for kUseDcFrame: %u",
+                         frame_header.dc_level);
+    }
+    shared->dc_storage = Image3F();
+    shared->dc = &shared->dc_frames[frame_header.dc_level];
+    if (shared->dc->xsize() == 0) {
+      return JXL_FAILURE(
+          "kUseDcFrame specified for dc_level %u, but no frame was decoded "
+          "with level %u",
+          frame_header.dc_level, frame_header.dc_level + 1);
+    }
+    ZeroFillImage(&shared->quant_dc);
+  } else {
+    shared->dc_storage =
+        Image3F(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+    shared->dc = &shared->dc_storage;
+  }
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_state.h b/third-party/libjxl/libjxl/lib/jxl/passes_state.h
new file mode 100644
index 0000000000..8d648a8feb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/passes_state.h
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PASSES_STATE_H_
+#define LIB_JXL_PASSES_STATE_H_
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/noise.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Structures that hold the (en/de)coder state for a JPEG XL kVarDCT
+// (en/de)coder.
+
+namespace jxl {
+
+struct ImageFeatures {
+  NoiseParams noise_params;
+  PatchDictionary patches;
+  Splines splines;
+};
+
+// State common to both encoder and decoder.
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct PassesSharedState {
+  PassesSharedState() : frame_header(nullptr) {}
+
+  // Headers and metadata.
+  const CodecMetadata* metadata;
+  FrameHeader frame_header;
+
+  FrameDimensions frame_dim;
+
+  // Control fields and parameters.
+  AcStrategyImage ac_strategy;
+
+  // Dequant matrices + quantizer.
+  DequantMatrices matrices;
+  Quantizer quantizer{&matrices};
+  ImageI raw_quant_field;
+
+  // Per-block side information for EPF detail preservation.
+  ImageB epf_sharpness;
+
+  ColorCorrelationMap cmap;
+
+  ImageFeatures image_features;
+
+  // Memory area for storing coefficient orders.
+  // `coeff_order_size` is the size used by *one* set of coefficient orders (at
+  // most kMaxCoeffOrderSize). A set of coefficient orders is present for each
+  // pass.
+  size_t coeff_order_size = 0;
+  std::vector<coeff_order_t> coeff_orders;
+
+  // Decoder-side DC and quantized DC.
+  ImageB quant_dc;
+  Image3F dc_storage;
+  const Image3F* JXL_RESTRICT dc = &dc_storage;
+
+  BlockCtxMap block_ctx_map;
+
+  Image3F dc_frames[4];
+
+  struct {
+    ImageBundle frame;
+    // ImageBundle doesn't yet have a simple way to state it is in XYB.
+    bool ib_is_in_xyb = false;
+  } reference_frames[4] = {};
+
+  // Number of pre-clustered set of histograms (with the same ctx map), per
+  // pass. Encoded as num_histograms_ - 1.
+  size_t num_histograms = 0;
+
+  bool IsGrayscale() const { return metadata->m.color_encoding.IsGray(); }
+
+  Rect GroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim, frame_dim.xsize,
+                    frame_dim.ysize);
+    return rect;
+  }
+
+  Rect PaddedGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim,
+                    frame_dim.xsize_padded, frame_dim.ysize_padded);
+    return rect;
+  }
+
+  Rect BlockGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * (frame_dim.group_dim >> 3),
+                    gy * (frame_dim.group_dim >> 3), frame_dim.group_dim >> 3,
+                    frame_dim.group_dim >> 3, frame_dim.xsize_blocks,
+                    frame_dim.ysize_blocks);
+    return rect;
+  }
+
+  Rect DCGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_dc_groups;
+    const size_t gy = group_index / frame_dim.xsize_dc_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim,
+                    frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+    return rect;
+  }
+};
+
+// Initialized the state information that is shared between encoder and decoder.
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+                                   PassesSharedState* JXL_RESTRICT shared,
+                                   bool encoder = false);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PASSES_STATE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_test.cc b/third-party/libjxl/libjxl/lib/jxl/passes_test.cc
new file mode 100644
index 0000000000..b1bc7fb314
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/passes_test.cc
@@ -0,0 +1,408 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <future>
+#include <string>
+#include <utility>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+using test::Roundtrip;
+using test::ThreadPoolForTests;
+
+namespace {
+
+TEST(PassesTest, RoundtripSmallPasses) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  cparams.progressive_mode = true;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+  EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(1.1));
+}
+
+TEST(PassesTest, RoundtripUnalignedPasses) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(io.xsize() / 12, io.ysize() / 7);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 2.0;
+  cparams.progressive_mode = true;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+  EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(1.72));
+}
+
+TEST(PassesTest, RoundtripMultiGroupPasses) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io;
+  {
+    ThreadPoolForTests pool(4);
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  }
+  io.ShrinkTo(600, 1024);  // partial X, full Y group
+
+  auto test = [&](float target_distance, float threshold) {
+    ThreadPoolForTests pool(4);
+    CompressParams cparams;
+    cparams.butteraugli_distance = target_distance;
+    cparams.progressive_mode = true;
+    cparams.SetCms(GetJxlCms());
+    CodecInOut io2;
+    JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _,
+                            /* compressed_size */ nullptr, &pool));
+    EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                    GetJxlCms(),
+                                    /*distmap=*/nullptr, &pool),
+                IsSlightlyBelow(target_distance + threshold));
+  };
+
+  auto run1 = std::async(std::launch::async, test, 1.0f, 0.5f);
+  auto run2 = std::async(std::launch::async, test, 2.0f, 0.0f);
+}
+
+TEST(PassesTest, RoundtripLargeFastPasses) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _,
+                          /* compressed_size */ nullptr, &pool));
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(PassesTest, RoundtripProgressiveConsistent) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  cparams.butteraugli_distance = 2.0;
+  cparams.SetCms(GetJxlCms());
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  for (size_t xsize = 48; xsize > 40; --xsize) {
+    io.ShrinkTo(xsize, 15);
+
+    CodecInOut io2;
+    size_t size2;
+    JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &size2, &pool));
+
+    CodecInOut io3;
+    size_t size3;
+    JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io3, _, &size3, &pool));
+
+    // Exact same compressed size.
+    EXPECT_EQ(size2, size3);
+
+    // Exact same distance.
+    const float dist2 = ButteraugliDistance(io.frames, io2.frames,
+                                            ButteraugliParams(), GetJxlCms(),
+                                            /*distmap=*/nullptr, &pool);
+    const float dist3 = ButteraugliDistance(io.frames, io3.frames,
+                                            ButteraugliParams(), GetJxlCms(),
+                                            /*distmap=*/nullptr, &pool);
+    EXPECT_EQ(dist2, dist3);
+  }
+}
+
+TEST(PassesTest, AllDownsampleFeasible) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 240000u);
+  float target_butteraugli[9] = {};
+  target_butteraugli[1] = 2.5f;
+  target_butteraugli[2] = 16.0f;
+  target_butteraugli[4] = 20.0f;
+  target_butteraugli[8] = 80.0f;
+
+  // The default progressive encoding scheme should make all these downsampling
+  // factors achievable.
+  // TODO(veluca): re-enable downsampling 16.
+  std::vector<size_t> downsamplings = {1, 2, 4, 8};  //, 16};
+
+  auto check = [&](const uint32_t task, size_t /* thread */) -> void {
+    const size_t downsampling = downsamplings[task];
+    extras::JXLDecompressParams dparams;
+    dparams.max_downsampling = downsampling;
+    CodecInOut output;
+    ASSERT_TRUE(
+        test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+    EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+    EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+    EXPECT_LE(ButteraugliDistance(io.frames, output.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr, nullptr),
+              target_butteraugli[downsampling])
+        << "downsampling: " << downsampling;
+  };
+  EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit,
+                        check, "TestDownsampling"));
+}
+
+TEST(PassesTest, AllDownsampleFeasibleQProgressive) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 220000u);
+
+  float target_butteraugli[9] = {};
+  target_butteraugli[1] = 3.0f;
+  target_butteraugli[2] = 6.0f;
+  target_butteraugli[4] = 10.0f;
+  target_butteraugli[8] = 80.0f;
+
+  // The default progressive encoding scheme should make all these downsampling
+  // factors achievable.
+  std::vector<size_t> downsamplings = {1, 2, 4, 8};
+
+  auto check = [&](const uint32_t task, size_t /* thread */) -> void {
+    const size_t downsampling = downsamplings[task];
+    extras::JXLDecompressParams dparams;
+    dparams.max_downsampling = downsampling;
+    CodecInOut output;
+    ASSERT_TRUE(
+        test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+    EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+    EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+    EXPECT_LE(ButteraugliDistance(io.frames, output.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              target_butteraugli[downsampling])
+        << "downsampling: " << downsampling;
+  };
+  EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit,
+                        check, "TestQProgressive"));
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectlyGrayscale) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  CodecInOut io_orig;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+  Rect rect(0, 0, io_orig.xsize(), 128);
+  // need 2 DC groups for the DC frame to actually be progressive.
+  Image3F large(4242, rect.ysize());
+  ZeroFillImage(&large);
+  CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+  CodecInOut io;
+  io.metadata = io_orig.metadata;
+  io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_dc = 1;
+  cparams.responsive = true;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 10000u);
+
+  extras::JXLDecompressParams dparams;
+  dparams.max_downsampling = 1;
+  CodecInOut output;
+  ASSERT_TRUE(
+      test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+
+  dparams.max_downsampling = 2;
+  CodecInOut output_d2;
+  ASSERT_TRUE(
+      test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output_d2));
+
+  // 0 if reading all the passes, ~15 if skipping the 8x pass.
+  float butteraugli_distance_down2_full = ButteraugliDistance(
+      output.frames, output_d2.frames, ButteraugliParams(), GetJxlCms(),
+      /*distmap=*/nullptr);
+
+  EXPECT_LE(butteraugli_distance_down2_full, 3.2f);
+  EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectly) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io_orig;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+  Rect rect(0, 0, io_orig.xsize(), 128);
+  // need 2 DC groups for the DC frame to actually be progressive.
+  Image3F large(4242, rect.ysize());
+  ZeroFillImage(&large);
+  CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+  CodecInOut io;
+  io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_dc = 1;
+  cparams.responsive = true;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 220000u);
+
+  extras::JXLDecompressParams dparams;
+  dparams.max_downsampling = 1;
+  CodecInOut output;
+  ASSERT_TRUE(
+      test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+
+  dparams.max_downsampling = 2;
+  CodecInOut output_d2;
+  ASSERT_TRUE(
+      test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output_d2));
+
+  // 0 if reading all the passes, ~15 if skipping the 8x pass.
+  float butteraugli_distance_down2_full = ButteraugliDistance(
+      output.frames, output_d2.frames, ButteraugliParams(), GetJxlCms(),
+      /*distmap=*/nullptr);
+
+  EXPECT_LE(butteraugli_distance_down2_full, 3.0f);
+  EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, NonProgressiveDCImage) {
+  ThreadPoolForTests pool(8);
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = false;
+  cparams.butteraugli_distance = 2.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+                         &aux, &pool));
+
+  // Even in non-progressive mode, it should be possible to return a DC-only
+  // image.
+  extras::JXLDecompressParams dparams;
+  dparams.max_downsampling = 100;
+  CodecInOut output;
+  ASSERT_TRUE(test::DecodeFile(dparams, Span<const uint8_t>(compressed),
+                               &output, &pool));
+  EXPECT_EQ(output.xsize(), io.xsize());
+  EXPECT_EQ(output.ysize(), io.ysize());
+}
+
+TEST(PassesTest, RoundtripSmallNoGaborishPasses) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.gaborish = Override::kOff;
+  cparams.butteraugli_distance = 1.0;
+  cparams.progressive_mode = true;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+  EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                  GetJxlCms(),
+                                  /*distmap=*/nullptr),
+              IsSlightlyBelow(1.2));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h
new file mode 100644
index 0000000000..e4172f6db6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+#define LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/passes_state.h"  // for PassesSharedState
+
+namespace jxl {
+
+// Context numbers as specified in Section C.4.5, Listing C.2:
+enum Contexts {
+  kNumRefPatchContext = 0,
+  kReferenceFrameContext = 1,
+  kPatchSizeContext = 2,
+  kPatchReferencePositionContext = 3,
+  kPatchPositionContext = 4,
+  kPatchBlendModeContext = 5,
+  kPatchOffsetContext = 6,
+  kPatchCountContext = 7,
+  kPatchAlphaChannelContext = 8,
+  kPatchClampContext = 9,
+  kNumPatchDictionaryContexts
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc
new file mode 100644
index 0000000000..a2fe49bce2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::jxl::test::Roundtrip;
+
+TEST(PatchDictionaryTest, GrayscaleModular) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+  CompressParams cparams;
+  cparams.SetLossless();
+  cparams.patches = jxl::Override::kOn;
+
+  CodecInOut io2;
+  // Without patches: ~25k
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+  EXPECT_LE(compressed_size, 8000u);
+  JXL_ASSERT_OK(VerifyRelativeError(*io.Main().color(), *io2.Main().color(),
+                                    1e-7f, 0, _));
+}
+
+TEST(PatchDictionaryTest, GrayscaleVarDCT) {
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+  CompressParams cparams;
+  cparams.patches = jxl::Override::kOn;
+
+  CodecInOut io2;
+  // Without patches: ~47k
+  size_t compressed_size;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+  EXPECT_LE(compressed_size, 14000u);
+  // Without patches: ~1.2
+  EXPECT_LE(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                GetJxlCms(),
+                                /*distmap=*/nullptr),
+            1.1);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/preview_test.cc b/third-party/libjxl/libjxl/lib/jxl/preview_test.cc
new file mode 100644
index 0000000000..6c08821b94
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/preview_test.cc
@@ -0,0 +1,69 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+TEST(PreviewTest, RoundtripGivenPreview) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+  // Same as main image
+  io.preview_frame = io.Main().Copy();
+  const size_t preview_xsize = 15;
+  const size_t preview_ysize = 27;
+  io.preview_frame.ShrinkTo(preview_xsize, preview_ysize);
+  io.metadata.m.have_preview = true;
+  ASSERT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+                                             io.preview_frame.ysize()));
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 2.0;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+  EXPECT_EQ(preview_xsize, io2.metadata.m.preview_size.xsize());
+  EXPECT_EQ(preview_ysize, io2.metadata.m.preview_size.ysize());
+  EXPECT_EQ(preview_xsize, io2.preview_frame.xsize());
+  EXPECT_EQ(preview_ysize, io2.preview_frame.ysize());
+
+  EXPECT_LE(ButteraugliDistance(io.preview_frame, io2.preview_frame,
+                                ButteraugliParams(), GetJxlCms(),
+                                /*distmap=*/nullptr),
+            2.5);
+  EXPECT_LE(ButteraugliDistance(io.Main(), io2.Main(), ButteraugliParams(),
+                                GetJxlCms(),
+                                /*distmap=*/nullptr),
+            2.5);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc b/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc
new file mode 100644
index 0000000000..5e3f3424aa
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc
@@ -0,0 +1,1239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/quant_weights.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/fast_math-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+// kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y)
+// coefficient in component c. Higher weights correspond to finer quantization
+// intervals and more bits spent in encoding.
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights,
+                         float* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    size_t start = c * 64;
+    weights[start] = 0xBAD;
+    weights[start + 1] = weights[start + 8] = dct2weights[c][0];
+    weights[start + 9] = dct2weights[c][1];
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + y * 8 + x + 2] = dct2weights[c][2];
+        weights[start + (y + 2) * 8 + x] = dct2weights[c][2];
+      }
+    }
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + y * 8 + x + 4] = dct2weights[c][4];
+        weights[start + (y + 4) * 8 + x] = dct2weights[c][4];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5];
+      }
+    }
+  }
+}
+
+void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights,
+                             float* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    for (int i = 0; i < 64; i++) {
+      weights[64 * c + i] = idweights[c][0];
+    }
+    weights[64 * c + 1] = idweights[c][1];
+    weights[64 * c + 8] = idweights[c][1];
+    weights[64 * c + 9] = idweights[c][2];
+  }
+}
+
+float Interpolate(float pos, float max, const float* array, size_t len) {
+  float scaled_pos = pos * (len - 1) / max;
+  size_t idx = scaled_pos;
+  JXL_DASSERT(idx + 1 < len);
+  float a = array[idx];
+  float b = array[idx + 1];
+  return a * FastPowf(b / a, scaled_pos - idx);
+}
+
+float Mult(float v) {
+  if (v > 0.0f) return 1.0f + v;
+  return 1.0f / (1.0f - v);
+}
+
+using DF4 = HWY_CAPPED(float, 4);
+
+hwy::HWY_NAMESPACE::Vec<DF4> InterpolateVec(
+    hwy::HWY_NAMESPACE::Vec<DF4> scaled_pos, const float* array) {
+  HWY_CAPPED(int32_t, 4) di;
+
+  auto idx = ConvertTo(di, scaled_pos);
+
+  auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx));
+
+  // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but
+  // it's probably slower.
+  auto a = GatherIndex(DF4(), array, idx);
+  auto b = GatherIndex(DF4(), array + 1, idx);
+
+  return Mul(a, FastPowf(DF4(), Div(b, a), frac));
+}
+
+// Computes quant weights for a COLS*ROWS-sized transform, using num_bands
+// eccentricity bands and num_ebands eccentricity bands. If print_mode is 1,
+// prints the resulting matrix; if print_mode is 2, prints the matrix in a
+// format suitable for a 3d plot with gnuplot.
+Status GetQuantWeights(
+    size_t ROWS, size_t COLS,
+    const DctQuantWeightParams::DistanceBandsArray& distance_bands,
+    size_t num_bands, float* out) {
+  for (size_t c = 0; c < 3; c++) {
+    float bands[DctQuantWeightParams::kMaxDistanceBands] = {
+        distance_bands[c][0]};
+    if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+    for (size_t i = 1; i < num_bands; i++) {
+      bands[i] = bands[i - 1] * Mult(distance_bands[c][i]);
+      if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+    }
+    float scale = (num_bands - 1) / (kSqrt2 + 1e-6f);
+    float rcpcol = scale / (COLS - 1);
+    float rcprow = scale / (ROWS - 1);
+    JXL_ASSERT(COLS >= Lanes(DF4()));
+    HWY_ALIGN float l0123[4] = {0, 1, 2, 3};
+    for (uint32_t y = 0; y < ROWS; y++) {
+      float dy = y * rcprow;
+      float dy2 = dy * dy;
+      for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) {
+        auto dx =
+            Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol));
+        auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2)));
+        auto weight = num_bands == 1 ? Set(DF4(), bands[0])
+                                     : InterpolateVec(scaled_distance, bands);
+        StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x);
+      }
+    }
+  }
+  return true;
+}
+
+// TODO(veluca): SIMD-fy. With 256x256, this is actually slow.
+Status ComputeQuantTable(const QuantEncoding& encoding,
+                         float* JXL_RESTRICT table,
+                         float* JXL_RESTRICT inv_table, size_t table_num,
+                         DequantMatrices::QuantTable kind, size_t* pos) {
+  constexpr size_t N = kBlockDim;
+  size_t wrows = 8 * DequantMatrices::required_size_x[kind],
+         wcols = 8 * DequantMatrices::required_size_y[kind];
+  size_t num = wrows * wcols;
+
+  std::vector<float> weights(3 * num);
+
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      // Library and copy quant encoding should get replaced by the actual
+      // parameters by the caller.
+      JXL_ASSERT(false);
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      GetQuantWeightsIdentity(encoding.idweights, weights.data());
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      GetQuantWeightsDCT2(encoding.dct2weights, weights.data());
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      float weights4x4[3 * 4 * 4];
+      // Always use 4x4 GetQuantWeights for DCT4 quantization tables.
+      JXL_RETURN_IF_ERROR(
+          GetQuantWeights(4, 4, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x4));
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < kBlockDim; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            weights[c * num + y * kBlockDim + x] =
+                weights4x4[c * 16 + (y / 2) * 4 + (x / 2)];
+          }
+        }
+        weights[c * num + 1] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N + 1] /= encoding.dct4multipliers[c][1];
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      float weights4x8[3 * 4 * 8];
+      // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables.
+      JXL_RETURN_IF_ERROR(
+          GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x8));
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < kBlockDim; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            weights[c * num + y * kBlockDim + x] =
+                weights4x8[c * 32 + (y / 2) * 8 + x];
+          }
+        }
+        weights[c * num + N] /= encoding.dct4x8multipliers[c];
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(GetQuantWeights(
+          wrows, wcols, encoding.dct_params.distance_bands,
+          encoding.dct_params.num_distance_bands, weights.data()));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) {
+        return JXL_FAILURE("Invalid table encoding");
+      }
+      for (size_t i = 0; i < 3 * num; i++) {
+        weights[i] =
+            1.f / (encoding.qraw.qtable_den * (*encoding.qraw.qtable)[i]);
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      constexpr float kFreqs[] = {
+          0xBAD,
+          0xBAD,
+          0.8517778890324296,
+          5.37778436506804,
+          0xBAD,
+          0xBAD,
+          4.734747904497923,
+          5.449245381693219,
+          1.6598270267479331,
+          4,
+          7.275749096817861,
+          10.423227632456525,
+          2.662932286148962,
+          7.630657783650829,
+          8.962388608184032,
+          12.97166202570235,
+      };
+
+      float weights4x8[3 * 4 * 8];
+      JXL_RETURN_IF_ERROR((
+          GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x8)));
+      float weights4x4[3 * 4 * 4];
+      JXL_RETURN_IF_ERROR((GetQuantWeights(
+          4, 4, encoding.dct_params_afv_4x4.distance_bands,
+          encoding.dct_params_afv_4x4.num_distance_bands, weights4x4)));
+
+      constexpr float lo = 0.8517778890324296;
+      constexpr float hi = 12.97166202570235f - lo + 1e-6f;
+      for (size_t c = 0; c < 3; c++) {
+        float bands[4];
+        bands[0] = encoding.afv_weights[c][5];
+        if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+        for (size_t i = 1; i < 4; i++) {
+          bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]);
+          if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+        }
+        size_t start = c * 64;
+        auto set_weight = [&start, &weights](size_t x, size_t y, float val) {
+          weights[start + y * 8 + x] = val;
+        };
+        weights[start] = 1;  // Not used, but causes MSAN error otherwise.
+        // Weights for (0, 1) and (1, 0).
+        set_weight(0, 1, encoding.afv_weights[c][0]);
+        set_weight(1, 0, encoding.afv_weights[c][1]);
+        // AFV special weights for 3-pixel corner.
+        set_weight(0, 2, encoding.afv_weights[c][2]);
+        set_weight(2, 0, encoding.afv_weights[c][3]);
+        set_weight(2, 2, encoding.afv_weights[c][4]);
+
+        // All other AFV weights.
+        for (size_t y = 0; y < 4; y++) {
+          for (size_t x = 0; x < 4; x++) {
+            if (x < 2 && y < 2) continue;
+            float val = Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4);
+            set_weight(2 * x, 2 * y, val);
+          }
+        }
+
+        // Put 4x8 weights in odd rows, except (1, 0).
+        for (size_t y = 0; y < kBlockDim / 2; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            if (x == 0 && y == 0) continue;
+            weights[c * num + (2 * y + 1) * kBlockDim + x] =
+                weights4x8[c * 32 + y * 8 + x];
+          }
+        }
+        // Put 4x4 weights in even rows / odd columns, except (0, 1).
+        for (size_t y = 0; y < kBlockDim / 2; y++) {
+          for (size_t x = 0; x < kBlockDim / 2; x++) {
+            if (x == 0 && y == 0) continue;
+            weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] =
+                weights4x4[c * 16 + y * 4 + x];
+          }
+        }
+      }
+      break;
+    }
+  }
+  size_t prev_pos = *pos;
+  HWY_CAPPED(float, 64) d;
+  for (size_t i = 0; i < num * 3; i += Lanes(d)) {
+    auto inv_val = LoadU(d, weights.data() + i);
+    if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) ||
+                     !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) {
+      return JXL_FAILURE("Invalid quantization table");
+    }
+    auto val = Div(Set(d, 1.0f), inv_val);
+    StoreU(val, d, table + *pos + i);
+    StoreU(inv_val, d, inv_table + *pos + i);
+  }
+  (*pos) += 3 * num;
+
+  // Ensure that the lowest frequencies have a 0 inverse table.
+  // This does not affect en/decoding, but allows AC strategy selection to be
+  // slightly simpler.
+  size_t xs = DequantMatrices::required_size_x[kind];
+  size_t ys = DequantMatrices::required_size_y[kind];
+  CoefficientLayout(&ys, &xs);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ys; y++) {
+      for (size_t x = 0; x < xs; x++) {
+        inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs +
+                  x] = 0;
+      }
+    }
+  }
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+namespace {
+
+HWY_EXPORT(ComputeQuantTable);
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) {
+  params->num_distance_bands =
+      br->ReadFixedBits<DctQuantWeightParams::kLog2MaxDistanceBands>() + 1;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params->num_distance_bands; i++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Read(br, &params->distance_bands[c][i]));
+    }
+    if (params->distance_bands[c][0] < kAlmostZero) {
+      return JXL_FAILURE("Distance band seed is too small");
+    }
+    params->distance_bands[c][0] *= 64.0f;
+  }
+  return true;
+}
+
+Status Decode(BitReader* br, QuantEncoding* encoding, size_t required_size_x,
+              size_t required_size_y, size_t idx,
+              ModularFrameDecoder* modular_frame_decoder) {
+  size_t required_size = required_size_x * required_size_y;
+  required_size_x *= kBlockDim;
+  required_size_y *= kBlockDim;
+  int mode = br->ReadFixedBits<kLog2NumQuantModes>();
+  switch (mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      encoding->predefined = br->ReadFixedBits<kCeilLog2NumPredefinedTables>();
+      if (encoding->predefined >= kNumPredefinedTables) {
+        return JXL_FAILURE("Invalid predefined table");
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i]));
+          if (std::abs(encoding->idweights[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("ID Quantizer is too small");
+          }
+          encoding->idweights[c][i] *= 64;
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i]));
+          if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("Quantizer is too small");
+          }
+          encoding->dct2weights[c][i] *= 64;
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        JXL_RETURN_IF_ERROR(
+            F16Coder::Read(br, &encoding->dct4x8multipliers[c]));
+        if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) {
+          return JXL_FAILURE("DCT4X8 multiplier is too small");
+        }
+      }
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Read(br, &encoding->dct4multipliers[c][i]));
+          if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("DCT4 multiplier is too small");
+          }
+        }
+      }
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 9; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i]));
+        }
+        for (size_t i = 0; i < 6; i++) {
+          encoding->afv_weights[c][i] *= 64;
+        }
+      }
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      // Set mode early, to avoid mem-leak.
+      encoding->mode = QuantEncoding::kQuantModeRAW;
+      JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable(
+          required_size_x, required_size_y, br, encoding, idx,
+          modular_frame_decoder));
+      break;
+    }
+    default:
+      return JXL_FAILURE("Invalid quantization table encoding");
+  }
+  encoding->mode = QuantEncoding::Mode(mode);
+  return true;
+}
+
+}  // namespace
+
+// These definitions are needed before C++17.
+constexpr size_t DequantMatrices::required_size_[];
+constexpr size_t DequantMatrices::required_size_x[];
+constexpr size_t DequantMatrices::required_size_y[];
+constexpr DequantMatrices::QuantTable DequantMatrices::kQuantTable[];
+
+Status DequantMatrices::Decode(BitReader* br,
+                               ModularFrameDecoder* modular_frame_decoder) {
+  size_t all_default = br->ReadBits(1);
+  size_t num_tables = all_default ? 0 : static_cast<size_t>(kNum);
+  encodings_.clear();
+  encodings_.resize(kNum, QuantEncoding::Library(0));
+  for (size_t i = 0; i < num_tables; i++) {
+    JXL_RETURN_IF_ERROR(
+        jxl::Decode(br, &encodings_[i], required_size_x[i % kNum],
+                    required_size_y[i % kNum], i, modular_frame_decoder));
+  }
+  computed_mask_ = 0;
+  return true;
+}
+
+Status DequantMatrices::DecodeDC(BitReader* br) {
+  bool all_default = br->ReadBits(1);
+  if (!br->AllReadsWithinBounds()) return JXL_FAILURE("EOS during DecodeDC");
+  if (!all_default) {
+    for (size_t c = 0; c < 3; c++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c]));
+      dc_quant_[c] *= 1.0f / 128.0f;
+      // Negative values and nearly zero are invalid values.
+      if (dc_quant_[c] < kAlmostZero) {
+        return JXL_FAILURE("Invalid dc_quant: coefficient is too small.");
+      }
+      inv_dc_quant_[c] = 1.0f / dc_quant_[c];
+    }
+  }
+  return true;
+}
+
+constexpr float V(float v) { return static_cast<float>(v); }
+
+namespace {
+struct DequantMatricesLibraryDef {
+  // DCT8
+  static constexpr QuantEncodingInternal DCT() {
+    return QuantEncodingInternal::DCT(DctQuantWeightParams({{{{
+                                                                 V(3150.0),
+                                                                 V(0.0),
+                                                                 V(-0.4),
+                                                                 V(-0.4),
+                                                                 V(-0.4),
+                                                                 V(-2.0),
+                                                             }},
+                                                             {{
+                                                                 V(560.0),
+                                                                 V(0.0),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                             }},
+                                                             {{
+                                                                 V(512.0),
+                                                                 V(-2.0),
+                                                                 V(-1.0),
+                                                                 V(0.0),
+                                                                 V(-1.0),
+                                                                 V(-2.0),
+                                                             }}}},
+                                                           6));
+  }
+
+  // Identity
+  static constexpr QuantEncodingInternal IDENTITY() {
+    return QuantEncodingInternal::Identity({{{{
+                                                 V(280.0),
+                                                 V(3160.0),
+                                                 V(3160.0),
+                                             }},
+                                             {{
+                                                 V(60.0),
+                                                 V(864.0),
+                                                 V(864.0),
+                                             }},
+                                             {{
+                                                 V(18.0),
+                                                 V(200.0),
+                                                 V(200.0),
+                                             }}}});
+  }
+
+  // DCT2
+  static constexpr QuantEncodingInternal DCT2X2() {
+    return QuantEncodingInternal::DCT2({{{{
+                                             V(3840.0),
+                                             V(2560.0),
+                                             V(1280.0),
+                                             V(640.0),
+                                             V(480.0),
+                                             V(300.0),
+                                         }},
+                                         {{
+                                             V(960.0),
+                                             V(640.0),
+                                             V(320.0),
+                                             V(180.0),
+                                             V(140.0),
+                                             V(120.0),
+                                         }},
+                                         {{
+                                             V(640.0),
+                                             V(320.0),
+                                             V(128.0),
+                                             V(64.0),
+                                             V(32.0),
+                                             V(16.0),
+                                         }}}});
+  }
+
+  // DCT4 (quant_kind 3)
+  static constexpr QuantEncodingInternal DCT4X4() {
+    return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{{
+                                                                  V(2200.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                              }},
+                                                              {{
+                                                                  V(392.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                              }},
+                                                              {{
+                                                                  V(112.0),
+                                                                  V(-0.25),
+                                                                  V(-0.25),
+                                                                  V(-0.5),
+                                                              }}}},
+                                                            4),
+                                       /* kMul */
+                                       {{{{
+                                             V(1.0),
+                                             V(1.0),
+                                         }},
+                                         {{
+                                             V(1.0),
+                                             V(1.0),
+                                         }},
+                                         {{
+                                             V(1.0),
+                                             V(1.0),
+                                         }}}});
+  }
+
+  // DCT16
+  static constexpr QuantEncodingInternal DCT16X16() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(8996.8725711814115328),
+                                   V(-1.3000777393353804),
+                                   V(-0.49424529824571225),
+                                   V(-0.439093774457103443),
+                                   V(-0.6350101832695744),
+                                   V(-0.90177264050827612),
+                                   V(-1.6162099239887414),
+                               }},
+                               {{
+                                   V(3191.48366296844234752),
+                                   V(-0.67424582104194355),
+                                   V(-0.80745813428471001),
+                                   V(-0.44925837484843441),
+                                   V(-0.35865440981033403),
+                                   V(-0.31322389111877305),
+                                   V(-0.37615025315725483),
+                               }},
+                               {{
+                                   V(1157.50408145487200256),
+                                   V(-2.0531423165804414),
+                                   V(-1.4),
+                                   V(-0.50687130033378396),
+                                   V(-0.42708730624733904),
+                                   V(-1.4856834539296244),
+                                   V(-4.9209142884401604),
+                               }}}},
+                             7));
+  }
+
+  // DCT32
+  static constexpr QuantEncodingInternal DCT32X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(15718.40830982518931456),
+                                   V(-1.025),
+                                   V(-0.98),
+                                   V(-0.9012),
+                                   V(-0.4),
+                                   V(-0.48819395464),
+                                   V(-0.421064),
+                                   V(-0.27),
+                               }},
+                               {{
+                                   V(7305.7636810695983104),
+                                   V(-0.8041958212306401),
+                                   V(-0.7633036457487539),
+                                   V(-0.55660379990111464),
+                                   V(-0.49785304658857626),
+                                   V(-0.43699592683512467),
+                                   V(-0.40180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(3803.53173721215041536),
+                                   V(-3.060733579805728),
+                                   V(-2.0413270132490346),
+                                   V(-2.0235650159727417),
+                                   V(-0.5495389509954993),
+                                   V(-0.4),
+                                   V(-0.4),
+                                   V(-0.3),
+                               }}}},
+                             8));
+  }
+
+  // DCT16X8
+  static constexpr QuantEncodingInternal DCT8X16() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(7240.7734393502),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.5),
+                               }},
+                               {{
+                                   V(1448.15468787004),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.2),
+                               }},
+                               {{
+                                   V(506.854140754517),
+                                   V(-1.4),
+                                   V(-0.2),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-1.5),
+                                   V(-3.6),
+                               }}}},
+                             7));
+  }
+
+  // DCT32X8
+  static constexpr QuantEncodingInternal DCT8X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(16283.2494710648897),
+                                   V(-1.7812845336559429),
+                                   V(-1.6309059012653515),
+                                   V(-1.0382179034313539),
+                                   V(-0.85),
+                                   V(-0.7),
+                                   V(-0.9),
+                                   V(-1.2360638576849587),
+                               }},
+                               {{
+                                   V(5089.15750884921511936),
+                                   V(-0.320049391452786891),
+                                   V(-0.35362849922161446),
+                                   V(-0.30340000000000003),
+                                   V(-0.61),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.6),
+                               }},
+                               {{
+                                   V(3397.77603275308720128),
+                                   V(-0.321327362693153371),
+                                   V(-0.34507619223117997),
+                                   V(-0.70340000000000003),
+                                   V(-0.9),
+                                   V(-1.0),
+                                   V(-1.0),
+                                   V(-1.1754605576265209),
+                               }}}},
+                             8));
+  }
+
+  // DCT32X16
+  static constexpr QuantEncodingInternal DCT16X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(13844.97076442300573),
+                                   V(-0.97113799999999995),
+                                   V(-0.658),
+                                   V(-0.42026),
+                                   V(-0.22712),
+                                   V(-0.2206),
+                                   V(-0.226),
+                                   V(-0.6),
+                               }},
+                               {{
+                                   V(4798.964084220744293),
+                                   V(-0.61125308982767057),
+                                   V(-0.83770786552491361),
+                                   V(-0.79014862079498627),
+                                   V(-0.2692727459704829),
+                                   V(-0.38272769465388551),
+                                   V(-0.22924222653091453),
+                                   V(-0.20719098826199578),
+                               }},
+                               {{
+                                   V(1807.236946760964614),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+
+  // DCT4X8 and 8x4
+  static constexpr QuantEncodingInternal DCT4X8() {
+    return QuantEncodingInternal::DCT4X8(
+        DctQuantWeightParams({{
+                                 {{
+                                     V(2198.050556016380522),
+                                     V(-0.96269623020744692),
+                                     V(-0.76194253026666783),
+                                     V(-0.6551140670773547),
+                                 }},
+                                 {{
+                                     V(764.3655248643528689),
+                                     V(-0.92630200888366945),
+                                     V(-0.9675229603596517),
+                                     V(-0.27845290869168118),
+                                 }},
+                                 {{
+                                     V(527.107573587542228),
+                                     V(-1.4594385811273854),
+                                     V(-1.450082094097871593),
+                                     V(-1.5843722511996204),
+                                 }},
+                             }},
+                             4),
+        /* kMuls */
+        {{
+            V(1.0),
+            V(1.0),
+            V(1.0),
+        }});
+  }
+  // AFV
+  static QuantEncodingInternal AFV0() {
+    return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params,
+                                      {{{{
+                                            // 4x4/4x8 DC tendency.
+                                            V(3072.0),
+                                            V(3072.0),
+                                            // AFV corner.
+                                            V(256.0),
+                                            V(256.0),
+                                            V(256.0),
+                                            // AFV high freqs.
+                                            V(414.0),
+                                            V(0.0),
+                                            V(0.0),
+                                            V(0.0),
+                                        }},
+                                        {{
+                                            // 4x4/4x8 DC tendency.
+                                            V(1024.0),
+                                            V(1024.0),
+                                            // AFV corner.
+                                            V(50),
+                                            V(50),
+                                            V(50),
+                                            // AFV high freqs.
+                                            V(58.0),
+                                            V(0.0),
+                                            V(0.0),
+                                            V(0.0),
+                                        }},
+                                        {{
+                                            // 4x4/4x8 DC tendency.
+                                            V(384.0),
+                                            V(384.0),
+                                            // AFV corner.
+                                            V(12.0),
+                                            V(12.0),
+                                            V(12.0),
+                                            // AFV high freqs.
+                                            V(22.0),
+                                            V(-0.25),
+                                            V(-0.25),
+                                            V(-0.25),
+                                        }}}});
+  }
+
+  // DCT64
+  static QuantEncodingInternal DCT64X64() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(0.9 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(0.9 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(0.9 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+
+  // DCT64X32
+  static QuantEncodingInternal DCT32X64() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(0.65 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(0.65 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(0.65 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+  // DCT128X128
+  static QuantEncodingInternal DCT128X128() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(1.8 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(1.8 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(1.8 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+
+  // DCT128X64
+  static QuantEncodingInternal DCT64X128() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(1.3 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(1.3 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(1.3 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+  // DCT256X256
+  static QuantEncodingInternal DCT256X256() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(3.6 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(3.6 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(3.6 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+
+  // DCT256X128
+  static QuantEncodingInternal DCT128X256() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{{
+                                   V(2.6 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               }},
+                               {{
+                                   V(2.6 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               }},
+                               {{
+                                   V(2.6 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}}},
+                             8));
+  }
+};
+}  // namespace
+
+DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() {
+  static_assert(kNum == 17,
+                "Update this function when adding new quantization kinds.");
+  static_assert(kNumPredefinedTables == 1,
+                "Update this function when adding new quantization matrices to "
+                "the library.");
+
+  // The library and the indices need to be kept in sync manually.
+  static_assert(0 == DCT, "Update the DequantLibrary array below.");
+  static_assert(1 == IDENTITY, "Update the DequantLibrary array below.");
+  static_assert(2 == DCT2X2, "Update the DequantLibrary array below.");
+  static_assert(3 == DCT4X4, "Update the DequantLibrary array below.");
+  static_assert(4 == DCT16X16, "Update the DequantLibrary array below.");
+  static_assert(5 == DCT32X32, "Update the DequantLibrary array below.");
+  static_assert(6 == DCT8X16, "Update the DequantLibrary array below.");
+  static_assert(7 == DCT8X32, "Update the DequantLibrary array below.");
+  static_assert(8 == DCT16X32, "Update the DequantLibrary array below.");
+  static_assert(9 == DCT4X8, "Update the DequantLibrary array below.");
+  static_assert(10 == AFV0, "Update the DequantLibrary array below.");
+  static_assert(11 == DCT64X64, "Update the DequantLibrary array below.");
+  static_assert(12 == DCT32X64, "Update the DequantLibrary array below.");
+  static_assert(13 == DCT128X128, "Update the DequantLibrary array below.");
+  static_assert(14 == DCT64X128, "Update the DequantLibrary array below.");
+  static_assert(15 == DCT256X256, "Update the DequantLibrary array below.");
+  static_assert(16 == DCT128X256, "Update the DequantLibrary array below.");
+  return DequantMatrices::DequantLibraryInternal{{
+      DequantMatricesLibraryDef::DCT(),
+      DequantMatricesLibraryDef::IDENTITY(),
+      DequantMatricesLibraryDef::DCT2X2(),
+      DequantMatricesLibraryDef::DCT4X4(),
+      DequantMatricesLibraryDef::DCT16X16(),
+      DequantMatricesLibraryDef::DCT32X32(),
+      DequantMatricesLibraryDef::DCT8X16(),
+      DequantMatricesLibraryDef::DCT8X32(),
+      DequantMatricesLibraryDef::DCT16X32(),
+      DequantMatricesLibraryDef::DCT4X8(),
+      DequantMatricesLibraryDef::AFV0(),
+      DequantMatricesLibraryDef::DCT64X64(),
+      DequantMatricesLibraryDef::DCT32X64(),
+      // Same default for large transforms (128+) as for 64x* transforms.
+      DequantMatricesLibraryDef::DCT128X128(),
+      DequantMatricesLibraryDef::DCT64X128(),
+      DequantMatricesLibraryDef::DCT256X256(),
+      DequantMatricesLibraryDef::DCT128X256(),
+  }};
+}
+
+const QuantEncoding* DequantMatrices::Library() {
+  static const DequantMatrices::DequantLibraryInternal kDequantLibrary =
+      DequantMatrices::LibraryInit();
+  // Downcast the result to a const QuantEncoding* from QuantEncodingInternal*
+  // since the subclass (QuantEncoding) doesn't add any new members and users
+  // will need to upcast to QuantEncodingInternal to access the members of that
+  // class. This allows to have kDequantLibrary as a constexpr value while still
+  // allowing to create QuantEncoding::RAW() instances that use std::vector in
+  // C++11.
+  return reinterpret_cast<const QuantEncoding*>(kDequantLibrary.data());
+}
+
+DequantMatrices::DequantMatrices() {
+  encodings_.resize(size_t(QuantTable::kNum), QuantEncoding::Library(0));
+  size_t pos = 0;
+  size_t offsets[kNum * 3];
+  for (size_t i = 0; i < size_t(QuantTable::kNum); i++) {
+    size_t num = required_size_[i] * kDCTBlockSize;
+    for (size_t c = 0; c < 3; c++) {
+      offsets[3 * i + c] = pos + c * num;
+    }
+    pos += 3 * num;
+  }
+  for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+    for (size_t c = 0; c < 3; c++) {
+      table_offsets_[i * 3 + c] = offsets[kQuantTable[i] * 3 + c];
+    }
+  }
+}
+
+Status DequantMatrices::EnsureComputed(uint32_t acs_mask) {
+  const QuantEncoding* library = Library();
+
+  if (!table_storage_) {
+    table_storage_ = hwy::AllocateAligned<float>(2 * kTotalTableSize);
+    table_ = table_storage_.get();
+    inv_table_ = table_storage_.get() + kTotalTableSize;
+  }
+
+  size_t offsets[kNum * 3 + 1];
+  size_t pos = 0;
+  for (size_t i = 0; i < kNum; i++) {
+    size_t num = required_size_[i] * kDCTBlockSize;
+    for (size_t c = 0; c < 3; c++) {
+      offsets[3 * i + c] = pos + c * num;
+    }
+    pos += 3 * num;
+  }
+  offsets[kNum * 3] = pos;
+  JXL_ASSERT(pos == kTotalTableSize);
+
+  uint32_t kind_mask = 0;
+  for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+    if (acs_mask & (1u << i)) {
+      kind_mask |= 1u << kQuantTable[i];
+    }
+  }
+  uint32_t computed_kind_mask = 0;
+  for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+    if (computed_mask_ & (1u << i)) {
+      computed_kind_mask |= 1u << kQuantTable[i];
+    }
+  }
+  for (size_t table = 0; table < kNum; table++) {
+    if ((1 << table) & computed_kind_mask) continue;
+    if ((1 << table) & ~kind_mask) continue;
+    size_t pos = offsets[table * 3];
+    if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) {
+      JXL_CHECK(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)(
+          library[table], table_storage_.get(),
+          table_storage_.get() + kTotalTableSize, table, QuantTable(table),
+          &pos));
+    } else {
+      JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)(
+          encodings_[table], table_storage_.get(),
+          table_storage_.get() + kTotalTableSize, table, QuantTable(table),
+          &pos));
+    }
+    JXL_ASSERT(pos == offsets[table * 3 + 3]);
+  }
+  computed_mask_ |= acs_mask;
+
+  return true;
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights.h b/third-party/libjxl/libjxl/lib/jxl/quant_weights.h
new file mode 100644
index 0000000000..d76fc1d1e6
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights.h
@@ -0,0 +1,448 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANT_WEIGHTS_H_
+#define LIB_JXL_QUANT_WEIGHTS_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include <array>
+#include <hwy/aligned_allocator.h>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T, size_t N>
+constexpr T ArraySum(T (&a)[N], size_t i = N - 1) {
+  static_assert(N > 0, "Trying to compute the sum of an empty array");
+  return i == 0 ? a[0] : a[i] + ArraySum(a, i - 1);
+}
+
+static constexpr size_t kMaxQuantTableSize = AcStrategy::kMaxCoeffArea;
+static constexpr size_t kNumPredefinedTables = 1;
+static constexpr size_t kCeilLog2NumPredefinedTables = 0;
+static constexpr size_t kLog2NumQuantModes = 3;
+
+struct DctQuantWeightParams {
+  static constexpr size_t kLog2MaxDistanceBands = 4;
+  static constexpr size_t kMaxDistanceBands = 1 + (1 << kLog2MaxDistanceBands);
+  typedef std::array<std::array<float, kMaxDistanceBands>, 3>
+      DistanceBandsArray;
+
+  size_t num_distance_bands = 0;
+  DistanceBandsArray distance_bands = {};
+
+  constexpr DctQuantWeightParams() : num_distance_bands(0) {}
+
+  constexpr DctQuantWeightParams(const DistanceBandsArray& dist_bands,
+                                 size_t num_dist_bands)
+      : num_distance_bands(num_dist_bands), distance_bands(dist_bands) {}
+
+  template <size_t num_dist_bands>
+  explicit DctQuantWeightParams(const float dist_bands[3][num_dist_bands]) {
+    num_distance_bands = num_dist_bands;
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(distance_bands[c].data(), dist_bands[c],
+             sizeof(float) * num_dist_bands);
+    }
+  }
+};
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct QuantEncodingInternal {
+  enum Mode {
+    kQuantModeLibrary,
+    kQuantModeID,
+    kQuantModeDCT2,
+    kQuantModeDCT4,
+    kQuantModeDCT4X8,
+    kQuantModeAFV,
+    kQuantModeDCT,
+    kQuantModeRAW,
+  };
+
+  template <Mode mode>
+  struct Tag {};
+
+  typedef std::array<std::array<float, 3>, 3> IdWeights;
+  typedef std::array<std::array<float, 6>, 3> DCT2Weights;
+  typedef std::array<std::array<float, 2>, 3> DCT4Multipliers;
+  typedef std::array<std::array<float, 9>, 3> AFVWeights;
+  typedef std::array<float, 3> DCT4x8Multipliers;
+
+  static constexpr QuantEncodingInternal Library(uint8_t predefined) {
+    return ((predefined < kNumPredefinedTables) ||
+            JXL_ABORT("Assert predefined < kNumPredefinedTables")),
+           QuantEncodingInternal(Tag<kQuantModeLibrary>(), predefined);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeLibrary> /* tag */,
+                                  uint8_t predefined)
+      : mode(kQuantModeLibrary), predefined(predefined) {}
+
+  // Identity
+  // xybweights is an array of {xweights, yweights, bweights}.
+  static constexpr QuantEncodingInternal Identity(const IdWeights& xybweights) {
+    return QuantEncodingInternal(Tag<kQuantModeID>(), xybweights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeID> /* tag */,
+                                  const IdWeights& xybweights)
+      : mode(kQuantModeID), idweights(xybweights) {}
+
+  // DCT2
+  static constexpr QuantEncodingInternal DCT2(const DCT2Weights& xybweights) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT2>(), xybweights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT2> /* tag */,
+                                  const DCT2Weights& xybweights)
+      : mode(kQuantModeDCT2), dct2weights(xybweights) {}
+
+  // DCT4
+  static constexpr QuantEncodingInternal DCT4(
+      const DctQuantWeightParams& params, const DCT4Multipliers& xybmul) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT4>(), params, xybmul);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT4> /* tag */,
+                                  const DctQuantWeightParams& params,
+                                  const DCT4Multipliers& xybmul)
+      : mode(kQuantModeDCT4), dct_params(params), dct4multipliers(xybmul) {}
+
+  // DCT4x8
+  static constexpr QuantEncodingInternal DCT4X8(
+      const DctQuantWeightParams& params, const DCT4x8Multipliers& xybmul) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT4X8>(), params, xybmul);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT4X8> /* tag */,
+                                  const DctQuantWeightParams& params,
+                                  const DCT4x8Multipliers& xybmul)
+      : mode(kQuantModeDCT4X8), dct_params(params), dct4x8multipliers(xybmul) {}
+
+  // DCT
+  static constexpr QuantEncodingInternal DCT(
+      const DctQuantWeightParams& params) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT>(), params);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT> /* tag */,
+                                  const DctQuantWeightParams& params)
+      : mode(kQuantModeDCT), dct_params(params) {}
+
+  // AFV
+  static constexpr QuantEncodingInternal AFV(
+      const DctQuantWeightParams& params4x8,
+      const DctQuantWeightParams& params4x4, const AFVWeights& weights) {
+    return QuantEncodingInternal(Tag<kQuantModeAFV>(), params4x8, params4x4,
+                                 weights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeAFV> /* tag */,
+                                  const DctQuantWeightParams& params4x8,
+                                  const DctQuantWeightParams& params4x4,
+                                  const AFVWeights& weights)
+      : mode(kQuantModeAFV),
+        dct_params(params4x8),
+        afv_weights(weights),
+        dct_params_afv_4x4(params4x4) {}
+
+  // This constructor is not constexpr so it can't be used in any of the
+  // constexpr cases above.
+  explicit QuantEncodingInternal(Mode mode) : mode(mode) {}
+
+  Mode mode;
+
+  // Weights for DCT4+ tables.
+  DctQuantWeightParams dct_params;
+
+  union {
+    // Weights for identity.
+    IdWeights idweights;
+
+    // Weights for DCT2.
+    DCT2Weights dct2weights;
+
+    // Extra multipliers for coefficients 01/10 and 11 for DCT4 and AFV.
+    DCT4Multipliers dct4multipliers;
+
+    // Weights for AFV. {0, 1} are used directly for coefficients (0, 1) and (1,
+    // 0);  {2, 3, 4} are used directly corner DC, (1,0) - (0,1) and (0, 1) +
+    // (1, 0) - (0, 0) inside the AFV block. Values from 5 to 8 are interpolated
+    // as in GetQuantWeights for DC and are used for other coefficients.
+    AFVWeights afv_weights = {};
+
+    // Extra multipliers for coefficients 01 or 10 for DCT4X8 and DCT8X4.
+    DCT4x8Multipliers dct4x8multipliers;
+
+    // Only used in kQuantModeRAW mode.
+    struct {
+      // explicit quantization table (like in JPEG)
+      std::vector<int>* qtable = nullptr;
+      float qtable_den = 1.f / (8 * 255);
+    } qraw;
+  };
+
+  // Weights for 4x4 sub-block in AFV.
+  DctQuantWeightParams dct_params_afv_4x4;
+
+  union {
+    // Which predefined table to use. Only used if mode is kQuantModeLibrary.
+    uint8_t predefined = 0;
+
+    // Which other quant table to copy; must copy from a table that comes before
+    // the current one. Only used if mode is kQuantModeCopy.
+    uint8_t source;
+  };
+};
+
+class QuantEncoding final : public QuantEncodingInternal {
+ public:
+  QuantEncoding(const QuantEncoding& other)
+      : QuantEncodingInternal(
+            static_cast<const QuantEncodingInternal&>(other)) {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      // Need to make a copy of the passed *qtable.
+      qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+    }
+  }
+  QuantEncoding(QuantEncoding&& other) noexcept
+      : QuantEncodingInternal(
+            static_cast<const QuantEncodingInternal&>(other)) {
+    // Steal the qtable from the other object if any.
+    if (mode == kQuantModeRAW) {
+      other.qraw.qtable = nullptr;
+    }
+  }
+  QuantEncoding& operator=(const QuantEncoding& other) {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      delete qraw.qtable;
+    }
+    *static_cast<QuantEncodingInternal*>(this) =
+        QuantEncodingInternal(static_cast<const QuantEncodingInternal&>(other));
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      // Need to make a copy of the passed *qtable.
+      qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+    }
+    return *this;
+  }
+
+  ~QuantEncoding() {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      delete qraw.qtable;
+    }
+  }
+
+  // Wrappers of the QuantEncodingInternal:: static functions that return a
+  // QuantEncoding instead. This is using the explicit and private cast from
+  // QuantEncodingInternal to QuantEncoding, which would be inlined anyway.
+  // In general, you should use this wrappers. The only reason to directly
+  // create a QuantEncodingInternal instance is if you need a constexpr version
+  // of this class. Note that RAW() is not supported in that case since it uses
+  // a std::vector.
+  static QuantEncoding Library(uint8_t predefined_arg) {
+    return QuantEncoding(QuantEncodingInternal::Library(predefined_arg));
+  }
+  static QuantEncoding Identity(const IdWeights& xybweights) {
+    return QuantEncoding(QuantEncodingInternal::Identity(xybweights));
+  }
+  static QuantEncoding DCT2(const DCT2Weights& xybweights) {
+    return QuantEncoding(QuantEncodingInternal::DCT2(xybweights));
+  }
+  static QuantEncoding DCT4(const DctQuantWeightParams& params,
+                            const DCT4Multipliers& xybmul) {
+    return QuantEncoding(QuantEncodingInternal::DCT4(params, xybmul));
+  }
+  static QuantEncoding DCT4X8(const DctQuantWeightParams& params,
+                              const DCT4x8Multipliers& xybmul) {
+    return QuantEncoding(QuantEncodingInternal::DCT4X8(params, xybmul));
+  }
+  static QuantEncoding DCT(const DctQuantWeightParams& params) {
+    return QuantEncoding(QuantEncodingInternal::DCT(params));
+  }
+  static QuantEncoding AFV(const DctQuantWeightParams& params4x8,
+                           const DctQuantWeightParams& params4x4,
+                           const AFVWeights& weights) {
+    return QuantEncoding(
+        QuantEncodingInternal::AFV(params4x8, params4x4, weights));
+  }
+
+  // RAW, note that this one is not a constexpr one.
+  static QuantEncoding RAW(const std::vector<int>& qtable, int shift = 0) {
+    QuantEncoding encoding(kQuantModeRAW);
+    encoding.qraw.qtable = new std::vector<int>();
+    *encoding.qraw.qtable = qtable;
+    encoding.qraw.qtable_den = (1 << shift) * (1.f / (8 * 255));
+    return encoding;
+  }
+
+ private:
+  explicit QuantEncoding(const QuantEncodingInternal& other)
+      : QuantEncodingInternal(other) {}
+
+  explicit QuantEncoding(QuantEncodingInternal::Mode mode_arg)
+      : QuantEncodingInternal(mode_arg) {}
+};
+
+// A constexpr QuantEncodingInternal instance is often downcasted to the
+// QuantEncoding subclass even if the instance wasn't an instance of the
+// subclass. This is safe because user will upcast to QuantEncodingInternal to
+// access any of its members.
+static_assert(sizeof(QuantEncoding) == sizeof(QuantEncodingInternal),
+              "Don't add any members to QuantEncoding");
+
+// Let's try to keep these 2**N for possible future simplicity.
+const float kInvDCQuant[3] = {
+    4096.0f,
+    512.0f,
+    256.0f,
+};
+
+const float kDCQuant[3] = {
+    1.0f / kInvDCQuant[0],
+    1.0f / kInvDCQuant[1],
+    1.0f / kInvDCQuant[2],
+};
+
+class ModularFrameEncoder;
+class ModularFrameDecoder;
+
+class DequantMatrices {
+ public:
+  enum QuantTable : size_t {
+    DCT = 0,
+    IDENTITY,
+    DCT2X2,
+    DCT4X4,
+    DCT16X16,
+    DCT32X32,
+    // DCT16X8
+    DCT8X16,
+    // DCT32X8
+    DCT8X32,
+    // DCT32X16
+    DCT16X32,
+    DCT4X8,
+    // DCT8X4
+    AFV0,
+    // AFV1
+    // AFV2
+    // AFV3
+    DCT64X64,
+    // DCT64X32,
+    DCT32X64,
+    DCT128X128,
+    // DCT128X64,
+    DCT64X128,
+    DCT256X256,
+    // DCT256X128,
+    DCT128X256,
+    kNum
+  };
+
+  static constexpr QuantTable kQuantTable[] = {
+      QuantTable::DCT,        QuantTable::IDENTITY,   QuantTable::DCT2X2,
+      QuantTable::DCT4X4,     QuantTable::DCT16X16,   QuantTable::DCT32X32,
+      QuantTable::DCT8X16,    QuantTable::DCT8X16,    QuantTable::DCT8X32,
+      QuantTable::DCT8X32,    QuantTable::DCT16X32,   QuantTable::DCT16X32,
+      QuantTable::DCT4X8,     QuantTable::DCT4X8,     QuantTable::AFV0,
+      QuantTable::AFV0,       QuantTable::AFV0,       QuantTable::AFV0,
+      QuantTable::DCT64X64,   QuantTable::DCT32X64,   QuantTable::DCT32X64,
+      QuantTable::DCT128X128, QuantTable::DCT64X128,  QuantTable::DCT64X128,
+      QuantTable::DCT256X256, QuantTable::DCT128X256, QuantTable::DCT128X256,
+  };
+  static_assert(AcStrategy::kNumValidStrategies ==
+                    sizeof(kQuantTable) / sizeof *kQuantTable,
+                "Update this array when adding or removing AC strategies.");
+
+  DequantMatrices();
+
+  static const QuantEncoding* Library();
+
+  typedef std::array<QuantEncodingInternal, kNumPredefinedTables * kNum>
+      DequantLibraryInternal;
+  // Return the array of library kNumPredefinedTables QuantEncoding entries as
+  // a constexpr array. Use Library() to obtain a pointer to the copy in the
+  // .cc file.
+  static DequantLibraryInternal LibraryInit();
+
+  // Returns aligned memory.
+  JXL_INLINE const float* Matrix(size_t quant_kind, size_t c) const {
+    JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+    JXL_DASSERT((1 << quant_kind) & computed_mask_);
+    return &table_[table_offsets_[quant_kind * 3 + c]];
+  }
+
+  JXL_INLINE const float* InvMatrix(size_t quant_kind, size_t c) const {
+    JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+    JXL_DASSERT((1 << quant_kind) & computed_mask_);
+    return &inv_table_[table_offsets_[quant_kind * 3 + c]];
+  }
+
+  // DC quants are used in modular mode for XYB multipliers.
+  JXL_INLINE float DCQuant(size_t c) const { return dc_quant_[c]; }
+  JXL_INLINE const float* DCQuants() const { return dc_quant_; }
+
+  JXL_INLINE float InvDCQuant(size_t c) const { return inv_dc_quant_[c]; }
+
+  // For encoder.
+  void SetEncodings(const std::vector<QuantEncoding>& encodings) {
+    encodings_ = encodings;
+    computed_mask_ = 0;
+  }
+
+  // For encoder.
+  void SetDCQuant(const float dc[3]) {
+    for (size_t c = 0; c < 3; c++) {
+      dc_quant_[c] = 1.0f / dc[c];
+      inv_dc_quant_[c] = dc[c];
+    }
+  }
+
+  Status Decode(BitReader* br,
+                ModularFrameDecoder* modular_frame_decoder = nullptr);
+  Status DecodeDC(BitReader* br);
+
+  const std::vector<QuantEncoding>& encodings() const { return encodings_; }
+
+  static constexpr size_t required_size_x[] = {1, 1, 1, 1, 2,  4, 1,  1, 2,
+                                               1, 1, 8, 4, 16, 8, 32, 16};
+  static_assert(kNum == sizeof(required_size_x) / sizeof(*required_size_x),
+                "Update this array when adding or removing quant tables.");
+
+  static constexpr size_t required_size_y[] = {1, 1, 1, 1, 2,  4,  2,  4, 4,
+                                               1, 1, 8, 8, 16, 16, 32, 32};
+  static_assert(kNum == sizeof(required_size_y) / sizeof(*required_size_y),
+                "Update this array when adding or removing quant tables.");
+
+  Status EnsureComputed(uint32_t acs_mask);
+
+ private:
+  static constexpr size_t required_size_[] = {
+      1, 1, 1, 1, 4, 16, 2, 4, 8, 1, 1, 64, 32, 256, 128, 1024, 512};
+  static_assert(kNum == sizeof(required_size_) / sizeof(*required_size_),
+                "Update this array when adding or removing quant tables.");
+  static constexpr size_t kTotalTableSize =
+      ArraySum(required_size_) * kDCTBlockSize * 3;
+
+  uint32_t computed_mask_ = 0;
+  // kTotalTableSize entries followed by kTotalTableSize for inv_table
+  hwy::AlignedFreeUniquePtr<float[]> table_storage_;
+  const float* table_;
+  const float* inv_table_;
+  float dc_quant_[3] = {kDCQuant[0], kDCQuant[1], kDCQuant[2]};
+  float inv_dc_quant_[3] = {kInvDCQuant[0], kInvDCQuant[1], kInvDCQuant[2]};
+  size_t table_offsets_[AcStrategy::kNumValidStrategies * 3];
+  std::vector<QuantEncoding> encodings_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_QUANT_WEIGHTS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc b/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc
new file mode 100644
index 0000000000..f0497948a7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc
@@ -0,0 +1,240 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <numeric>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void CheckSimilar(T a, T b) {
+  EXPECT_EQ(a, b);
+}
+// minimum exponent = -15.
+template <>
+void CheckSimilar(float a, float b) {
+  float m = std::max(std::abs(a), std::abs(b));
+  // 10 bits of precision are used in the format. Relative error should be
+  // below 2^-10.
+  EXPECT_LE(std::abs(a - b), m / 1024.0f) << "a: " << a << " b: " << b;
+}
+
+TEST(QuantWeightsTest, DC) {
+  DequantMatrices mat;
+  float dc_quant[3] = {1e+5, 1e+3, 1e+1};
+  DequantMatricesSetCustomDC(&mat, dc_quant);
+  for (size_t c = 0; c < 3; c++) {
+    CheckSimilar(mat.InvDCQuant(c), dc_quant[c]);
+  }
+}
+
+void RoundtripMatrices(const std::vector<QuantEncoding>& encodings) {
+  ASSERT_TRUE(encodings.size() == DequantMatrices::kNum);
+  DequantMatrices mat;
+  CodecMetadata metadata;
+  FrameHeader frame_header(&metadata);
+  ModularFrameEncoder encoder(frame_header, CompressParams{});
+  DequantMatricesSetCustom(&mat, encodings, &encoder);
+  const std::vector<QuantEncoding>& encodings_dec = mat.encodings();
+  for (size_t i = 0; i < encodings.size(); i++) {
+    const QuantEncoding& e = encodings[i];
+    const QuantEncoding& d = encodings_dec[i];
+    // Check values roundtripped correctly.
+    EXPECT_EQ(e.mode, d.mode);
+    EXPECT_EQ(e.predefined, d.predefined);
+    EXPECT_EQ(e.source, d.source);
+
+    EXPECT_EQ(static_cast<uint64_t>(e.dct_params.num_distance_bands),
+              static_cast<uint64_t>(d.dct_params.num_distance_bands));
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+        CheckSimilar(e.dct_params.distance_bands[c][j],
+                     d.dct_params.distance_bands[c][j]);
+      }
+    }
+
+    if (e.mode == QuantEncoding::kQuantModeRAW) {
+      EXPECT_FALSE(!e.qraw.qtable);
+      EXPECT_FALSE(!d.qraw.qtable);
+      EXPECT_EQ(e.qraw.qtable->size(), d.qraw.qtable->size());
+      for (size_t j = 0; j < e.qraw.qtable->size(); j++) {
+        EXPECT_EQ((*e.qraw.qtable)[j], (*d.qraw.qtable)[j]);
+      }
+      EXPECT_NEAR(e.qraw.qtable_den, d.qraw.qtable_den, 1e-7f);
+    } else {
+      // modes different than kQuantModeRAW use one of the other fields used
+      // here, which all happen to be arrays of floats.
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t j = 0; j < 3; j++) {
+          CheckSimilar(e.idweights[c][j], d.idweights[c][j]);
+        }
+        for (size_t j = 0; j < 6; j++) {
+          CheckSimilar(e.dct2weights[c][j], d.dct2weights[c][j]);
+        }
+        for (size_t j = 0; j < 2; j++) {
+          CheckSimilar(e.dct4multipliers[c][j], d.dct4multipliers[c][j]);
+        }
+        CheckSimilar(e.dct4x8multipliers[c], d.dct4x8multipliers[c]);
+        for (size_t j = 0; j < 9; j++) {
+          CheckSimilar(e.afv_weights[c][j], d.afv_weights[c][j]);
+        }
+        for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+          CheckSimilar(e.dct_params_afv_4x4.distance_bands[c][j],
+                       d.dct_params_afv_4x4.distance_bands[c][j]);
+        }
+      }
+    }
+  }
+}
+
+TEST(QuantWeightsTest, AllDefault) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  RoundtripMatrices(encodings);
+}
+
+void TestSingleQuantMatrix(DequantMatrices::QuantTable kind) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  encodings[kind] = DequantMatrices::Library()[kind];
+  RoundtripMatrices(encodings);
+}
+
+// Ensure we can reasonably represent default quant tables.
+TEST(QuantWeightsTest, DCT) { TestSingleQuantMatrix(DequantMatrices::DCT); }
+TEST(QuantWeightsTest, IDENTITY) {
+  TestSingleQuantMatrix(DequantMatrices::IDENTITY);
+}
+TEST(QuantWeightsTest, DCT2X2) {
+  TestSingleQuantMatrix(DequantMatrices::DCT2X2);
+}
+TEST(QuantWeightsTest, DCT4X4) {
+  TestSingleQuantMatrix(DequantMatrices::DCT4X4);
+}
+TEST(QuantWeightsTest, DCT16X16) {
+  TestSingleQuantMatrix(DequantMatrices::DCT16X16);
+}
+TEST(QuantWeightsTest, DCT32X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT32X32);
+}
+TEST(QuantWeightsTest, DCT8X16) {
+  TestSingleQuantMatrix(DequantMatrices::DCT8X16);
+}
+TEST(QuantWeightsTest, DCT8X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT8X32);
+}
+TEST(QuantWeightsTest, DCT16X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT16X32);
+}
+TEST(QuantWeightsTest, DCT4X8) {
+  TestSingleQuantMatrix(DequantMatrices::DCT4X8);
+}
+TEST(QuantWeightsTest, AFV0) { TestSingleQuantMatrix(DequantMatrices::AFV0); }
+TEST(QuantWeightsTest, RAW) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  std::vector<int> matrix(3 * 32 * 32);
+  Rng rng(0);
+  for (size_t i = 0; i < matrix.size(); i++) matrix[i] = rng.UniformI(1, 256);
+  encodings[DequantMatrices::kQuantTable[AcStrategy::DCT32X32]] =
+      QuantEncoding::RAW(matrix, 2);
+  RoundtripMatrices(encodings);
+}
+
+class QuantWeightsTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(QuantWeightsTargetTest);
+
+TEST_P(QuantWeightsTargetTest, DCTUniform) {
+  constexpr float kUniformQuant = 4;
+  float weights[3][2] = {{1.0f / kUniformQuant, 0},
+                         {1.0f / kUniformQuant, 0},
+                         {1.0f / kUniformQuant, 0}};
+  DctQuantWeightParams dct_params(weights);
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::DCT(dct_params));
+  DequantMatrices dequant_matrices;
+  CodecMetadata metadata;
+  FrameHeader frame_header(&metadata);
+  ModularFrameEncoder encoder(frame_header, CompressParams{});
+  DequantMatricesSetCustom(&dequant_matrices, encodings, &encoder);
+  JXL_CHECK(dequant_matrices.EnsureComputed(~0u));
+
+  const float dc_quant[3] = {1.0f / kUniformQuant, 1.0f / kUniformQuant,
+                             1.0f / kUniformQuant};
+  DequantMatricesSetCustomDC(&dequant_matrices, dc_quant);
+
+  HWY_ALIGN_MAX float scratch_space[16 * 16 * 2];
+
+  // DCT8
+  {
+    HWY_ALIGN_MAX float pixels[64];
+    std::iota(std::begin(pixels), std::end(pixels), 0);
+    HWY_ALIGN_MAX float coeffs[64];
+    const AcStrategy::Type dct = AcStrategy::DCT;
+    TransformFromPixels(dct, pixels, 8, coeffs, scratch_space);
+    HWY_ALIGN_MAX double slow_coeffs[64];
+    for (size_t i = 0; i < 64; i++) slow_coeffs[i] = pixels[i];
+    DCTSlow<8>(slow_coeffs);
+
+    for (size_t i = 0; i < 64; i++) {
+      // DCTSlow doesn't multiply/divide by 1/N, so we do it manually.
+      slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+      coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+                  dequant_matrices.Matrix(dct, 0)[i];
+    }
+    IDCTSlow<8>(slow_coeffs);
+    TransformToPixels(dct, coeffs, pixels, 8, scratch_space);
+    for (size_t i = 0; i < 64; i++) {
+      EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+    }
+  }
+
+  // DCT16
+  {
+    HWY_ALIGN_MAX float pixels[64 * 4];
+    std::iota(std::begin(pixels), std::end(pixels), 0);
+    HWY_ALIGN_MAX float coeffs[64 * 4];
+    const AcStrategy::Type dct = AcStrategy::DCT16X16;
+    TransformFromPixels(dct, pixels, 16, coeffs, scratch_space);
+    HWY_ALIGN_MAX double slow_coeffs[64 * 4];
+    for (size_t i = 0; i < 64 * 4; i++) slow_coeffs[i] = pixels[i];
+    DCTSlow<16>(slow_coeffs);
+
+    for (size_t i = 0; i < 64 * 4; i++) {
+      slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+      coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+                  dequant_matrices.Matrix(dct, 0)[i];
+    }
+
+    IDCTSlow<16>(slow_coeffs);
+    TransformToPixels(dct, coeffs, pixels, 16, scratch_space);
+    for (size_t i = 0; i < 64 * 4; i++) {
+      EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+    }
+  }
+
+  // Check that all matrices have the same DC quantization, i.e. that they all
+  // have the same scaling.
+  for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+    EXPECT_NEAR(dequant_matrices.Matrix(i, 0)[0], kUniformQuant, 1e-6);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h b/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h
new file mode 100644
index 0000000000..64d273c552
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_QUANTIZER_INL_H_
+#undef LIB_JXL_QUANTIZER_INL_H_
+#else
+#define LIB_JXL_QUANTIZER_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::ApproximateReciprocal;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::Xor;
+
+template <class DI>
+HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
+    DI di, const size_t c, const Vec<DI> quant_i,
+    const float* HWY_RESTRICT biases) {
+  const Rebind<float, DI> df;
+
+  const auto quant = ConvertTo(df, quant_i);
+
+  // Compare |quant|, keep sign bit for negating result.
+  const auto kSign = BitCast(df, Set(di, INT32_MIN));
+  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
+  const auto abs_quant = AndNot(kSign, quant);
+
+  // If |x| is 1, kZeroBias creates a different bias for each channel.
+  // We're implementing the following:
+  // if (quant == 0) return 0;
+  // if (quant == 1) return biases[c];
+  // if (quant == -1) return -biases[c];
+  // return quant - biases[3] / quant;
+
+  // Integer comparison is not helpful because Clang incurs bypass penalties
+  // from unnecessarily mixing integer and float.
+  const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
+  const auto not_0 = Gt(abs_quant, Zero(df));
+
+  // Bitwise logic is faster than quant * biases[c].
+  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
+
+  // About 2E-5 worse than ReciprocalNR or division.
+  const auto bias =
+      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
+
+  return IfThenElse(is_01, one_bias, bias);
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_QUANTIZER_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer.cc b/third-party/libjxl/libjxl/lib/jxl/quantizer.cc
new file mode 100644
index 0000000000..153cf19b21
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quantizer.cc
@@ -0,0 +1,156 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+static const int32_t kDefaultQuant = 64;
+
+constexpr int32_t Quantizer::kQuantMax;
+
+Quantizer::Quantizer(const DequantMatrices* dequant)
+    : Quantizer(dequant, kDefaultQuant, kGlobalScaleDenom / kDefaultQuant) {}
+
+Quantizer::Quantizer(const DequantMatrices* dequant, int quant_dc,
+                     int global_scale)
+    : global_scale_(global_scale), quant_dc_(quant_dc), dequant_(dequant) {
+  JXL_ASSERT(dequant_ != nullptr);
+  RecomputeFromGlobalScale();
+  inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+
+  memcpy(zero_bias_, kZeroBiasDefault, sizeof(kZeroBiasDefault));
+}
+
+void Quantizer::ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+                                           float quant_median_absd) {
+  // Target value for the median value in the quant field.
+  const float kQuantFieldTarget = 5;
+  // We reduce the median of the quant field by the median absolute deviation:
+  // higher resolution on highly varying quant fields.
+  float scale = kGlobalScaleDenom * (quant_median - quant_median_absd) /
+                kQuantFieldTarget;
+  // Ensure that new_global_scale is positive and no more than 1<<15.
+  if (scale < 1) scale = 1;
+  if (scale > (1 << 15)) scale = 1 << 15;
+  int new_global_scale = static_cast<int>(scale);
+  // Ensure that quant_dc_ will always be at least
+  // 0.625 * kGlobalScaleDenom/kGlobalScaleNumerator = 10.
+  const int scaled_quant_dc =
+      static_cast<int>(quant_dc * kGlobalScaleNumerator * 1.6);
+  if (new_global_scale > scaled_quant_dc) {
+    new_global_scale = scaled_quant_dc;
+    if (new_global_scale <= 0) new_global_scale = 1;
+  }
+  global_scale_ = new_global_scale;
+  // Code below uses inv_global_scale_.
+  RecomputeFromGlobalScale();
+
+  float fval = quant_dc * inv_global_scale_ + 0.5f;
+  fval = std::min<float>(1 << 16, fval);
+  const int new_quant_dc = static_cast<int>(fval);
+  quant_dc_ = new_quant_dc;
+
+  // quant_dc_ was updated, recompute values.
+  RecomputeFromGlobalScale();
+}
+
+void Quantizer::SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+                                  ImageI* JXL_RESTRICT raw_quant_field) const {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const float* JXL_RESTRICT row_qf = rect.ConstRow(qf, y);
+    int32_t* JXL_RESTRICT row_qi = rect.Row(raw_quant_field, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      int val = ClampVal(row_qf[x] * inv_global_scale_ + 0.5f);
+      row_qi[x] = val;
+    }
+  }
+}
+
+void Quantizer::SetQuantField(const float quant_dc, const ImageF& qf,
+                              ImageI* JXL_RESTRICT raw_quant_field) {
+  std::vector<float> data(qf.xsize() * qf.ysize());
+  for (size_t y = 0; y < qf.ysize(); ++y) {
+    const float* JXL_RESTRICT row_qf = qf.Row(y);
+    for (size_t x = 0; x < qf.xsize(); ++x) {
+      float quant = row_qf[x];
+      data[qf.xsize() * y + x] = quant;
+    }
+  }
+  std::nth_element(data.begin(), data.begin() + data.size() / 2, data.end());
+  const float quant_median = data[data.size() / 2];
+  std::vector<float> deviations(data.size());
+  for (size_t i = 0; i < data.size(); i++) {
+    deviations[i] = fabsf(data[i] - quant_median);
+  }
+  std::nth_element(deviations.begin(),
+                   deviations.begin() + deviations.size() / 2,
+                   deviations.end());
+  const float quant_median_absd = deviations[deviations.size() / 2];
+  ComputeGlobalScaleAndQuant(quant_dc, quant_median, quant_median_absd);
+  if (raw_quant_field) {
+    JXL_CHECK(SameSize(*raw_quant_field, qf));
+    SetQuantFieldRect(qf, Rect(qf), raw_quant_field);
+  }
+}
+
+void Quantizer::SetQuant(float quant_dc, float quant_ac,
+                         ImageI* JXL_RESTRICT raw_quant_field) {
+  ComputeGlobalScaleAndQuant(quant_dc, quant_ac, 0);
+  int32_t val = ClampVal(quant_ac * inv_global_scale_ + 0.5f);
+  FillImage(val, raw_quant_field);
+}
+
+Status QuantizerParams::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+      BitsOffset(11, 1), BitsOffset(11, 2049), BitsOffset(12, 4097),
+      BitsOffset(16, 8193), 1, &global_scale));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), BitsOffset(5, 1),
+                                         BitsOffset(8, 1), BitsOffset(16, 1), 1,
+                                         &quant_dc));
+  return true;
+}
+
+QuantizerParams Quantizer::GetParams() const {
+  QuantizerParams params;
+  params.global_scale = global_scale_;
+  params.quant_dc = quant_dc_;
+  return params;
+}
+
+Status Quantizer::Decode(BitReader* reader) {
+  QuantizerParams params;
+  JXL_RETURN_IF_ERROR(Bundle::Read(reader, &params));
+  global_scale_ = static_cast<int>(params.global_scale);
+  quant_dc_ = static_cast<int>(params.quant_dc);
+  RecomputeFromGlobalScale();
+  return true;
+}
+
+void Quantizer::DumpQuantizationMap(const ImageI& raw_quant_field) const {
+  printf("Global scale: %d (%.7f)\nDC quant: %d\n", global_scale_,
+         global_scale_ * 1.0 / kGlobalScaleDenom, quant_dc_);
+  printf("AC quantization Map:\n");
+  for (size_t y = 0; y < raw_quant_field.ysize(); ++y) {
+    for (size_t x = 0; x < raw_quant_field.xsize(); ++x) {
+      printf(" %3d", raw_quant_field.Row(y)[x]);
+    }
+    printf("\n");
+  }
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer.h b/third-party/libjxl/libjxl/lib/jxl/quantizer.h
new file mode 100644
index 0000000000..2829575729
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quantizer.h
@@ -0,0 +1,181 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANTIZER_H_
+#define LIB_JXL_QUANTIZER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// Quantizes DC and AC coefficients, with separate quantization tables according
+// to the quant_kind (which is currently computed from the AC strategy and the
+// block index inside that strategy).
+
+namespace jxl {
+
+static constexpr int kGlobalScaleDenom = 1 << 16;
+static constexpr int kGlobalScaleNumerator = 4096;
+
+// zero-biases for quantizing channels X, Y, B
+static constexpr float kZeroBiasDefault[3] = {0.5f, 0.5f, 0.5f};
+
+// Returns adjusted version of a quantized integer, such that its value is
+// closer to the expected value of the original.
+// The residuals of AC coefficients that we quantize are not uniformly
+// distributed. Numerical experiments show that they have a distribution with
+// the "shape" of 1/(1+x^2) [up to some coefficients]. This means that the
+// expected value of a coefficient that gets quantized to x will not be x
+// itself, but (at least with reasonable approximation):
+// - 0 if x is 0
+// - x * biases[c] if x is 1 or -1
+// - x - biases[3]/x otherwise
+// This follows from computing the distribution of the quantization bias, which
+// can be approximated fairly well by <constant>/x when |x| is at least two.
+static constexpr float kBiasNumerator = 0.145f;
+
+static constexpr float kDefaultQuantBias[4] = {
+    1.0f - 0.05465007330715401f,
+    1.0f - 0.07005449891748593f,
+    1.0f - 0.049935103337343655f,
+    0.145f,
+};
+
+struct QuantizerParams;
+
+class Quantizer {
+ public:
+  explicit Quantizer(const DequantMatrices* dequant);
+  Quantizer(const DequantMatrices* dequant, int quant_dc, int global_scale);
+
+  static constexpr int32_t kQuantMax = 256;
+
+  static JXL_INLINE int32_t ClampVal(float val) {
+    return static_cast<int32_t>(
+        std::max(1.0f, std::min<float>(val, kQuantMax)));
+  }
+
+  float ScaleGlobalScale(const float scale) {
+    int new_global_scale = static_cast<int>(global_scale_ * scale + 0.5f);
+    float scale_out = new_global_scale * 1.0f / global_scale_;
+    global_scale_ = new_global_scale;
+    RecomputeFromGlobalScale();
+    return scale_out;
+  }
+
+  // Recomputes other derived fields after global_scale_ has changed.
+  void RecomputeFromGlobalScale() {
+    global_scale_float_ = global_scale_ * (1.0 / kGlobalScaleDenom);
+    inv_global_scale_ = 1.0 * kGlobalScaleDenom / global_scale_;
+    inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+    for (size_t c = 0; c < 3; c++) {
+      mul_dc_[c] = GetDcStep(c);
+      inv_mul_dc_[c] = GetInvDcStep(c);
+    }
+  }
+
+  // Returns scaling factor such that Scale() * (RawDC() or RawQuantField())
+  // pixels yields the same float values returned by GetQuantField.
+  JXL_INLINE float Scale() const { return global_scale_float_; }
+
+  // Reciprocal of Scale().
+  JXL_INLINE float InvGlobalScale() const { return inv_global_scale_; }
+
+  void SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+                         ImageI* JXL_RESTRICT raw_quant_field) const;
+
+  void SetQuantField(float quant_dc, const ImageF& qf,
+                     ImageI* JXL_RESTRICT raw_quant_field);
+
+  void SetQuant(float quant_dc, float quant_ac,
+                ImageI* JXL_RESTRICT raw_quant_field);
+
+  // Returns the DC quantization base value, which is currently global (not
+  // adaptive). The actual scale factor used to dequantize pixels in channel c
+  // is: inv_quant_dc() * dequant_->DCQuant(c).
+  float inv_quant_dc() const { return inv_quant_dc_; }
+
+  // Dequantize by multiplying with this times dequant_matrix.
+  float inv_quant_ac(int32_t quant) const { return inv_global_scale_ / quant; }
+
+  QuantizerParams GetParams() const;
+
+  Status Decode(BitReader* reader);
+
+  void DumpQuantizationMap(const ImageI& raw_quant_field) const;
+
+  JXL_INLINE const float* DequantMatrix(size_t quant_kind, size_t c) const {
+    return dequant_->Matrix(quant_kind, c);
+  }
+
+  JXL_INLINE const float* InvDequantMatrix(size_t quant_kind, size_t c) const {
+    return dequant_->InvMatrix(quant_kind, c);
+  }
+
+  // Calculates DC quantization step.
+  JXL_INLINE float GetDcStep(size_t c) const {
+    return inv_quant_dc_ * dequant_->DCQuant(c);
+  }
+  JXL_INLINE float GetInvDcStep(size_t c) const {
+    return dequant_->InvDCQuant(c) * (global_scale_float_ * quant_dc_);
+  }
+
+  JXL_INLINE const float* MulDC() const { return mul_dc_; }
+  JXL_INLINE const float* InvMulDC() const { return inv_mul_dc_; }
+
+  JXL_INLINE void ClearDCMul() {
+    std::fill(mul_dc_, mul_dc_ + 4, 1.f);
+    std::fill(inv_mul_dc_, inv_mul_dc_ + 4, 1.f);
+  }
+
+  void ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+                                  float quant_median_absd);
+
+ private:
+  float mul_dc_[4];
+  float inv_mul_dc_[4];
+
+  // These are serialized:
+  int global_scale_;
+  int quant_dc_;
+
+  // These are derived from global_scale_:
+  float inv_global_scale_;
+  float global_scale_float_;  // reciprocal of inv_global_scale_
+  float inv_quant_dc_;
+
+  float zero_bias_[3];
+  const DequantMatrices* dequant_;
+};
+
+struct QuantizerParams : public Fields {
+  QuantizerParams() { Bundle::Init(this); }
+  JXL_FIELDS_NAME(QuantizerParams)
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  uint32_t global_scale;
+  uint32_t quant_dc;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_QUANTIZER_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc b/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc
new file mode 100644
index 0000000000..f9cf2c838e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc
@@ -0,0 +1,81 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void TestEquivalence(int qxsize, int qysize, const Quantizer& quantizer1,
+                     const Quantizer& quantizer2) {
+  ASSERT_NEAR(quantizer1.inv_quant_dc(), quantizer2.inv_quant_dc(), 1e-7);
+}
+
+TEST(QuantizerTest, QuantizerParams) {
+  for (uint32_t i = 1; i < 10000; ++i) {
+    QuantizerParams p;
+    p.global_scale = i;
+    size_t extension_bits = 0, total_bits = 0;
+    EXPECT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+    EXPECT_EQ(0u, extension_bits);
+    EXPECT_GE(total_bits, 4u);
+  }
+}
+
+TEST(QuantizerTest, BitStreamRoundtripSameQuant) {
+  const int qxsize = 8;
+  const int qysize = 8;
+  DequantMatrices dequant;
+  Quantizer quantizer1(&dequant);
+  ImageI raw_quant_field(qxsize, qysize);
+  quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+  BitWriter writer;
+  QuantizerParams params = quantizer1.GetParams();
+  EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+  Quantizer quantizer2(&dequant);
+  BitReader reader(writer.GetSpan());
+  EXPECT_TRUE(quantizer2.Decode(&reader));
+  EXPECT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  EXPECT_TRUE(reader.Close());
+  TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+
+TEST(QuantizerTest, BitStreamRoundtripRandomQuant) {
+  const int qxsize = 8;
+  const int qysize = 8;
+  DequantMatrices dequant;
+  Quantizer quantizer1(&dequant);
+  ImageI raw_quant_field(qxsize, qysize);
+  quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+  float quant_dc = 0.17f;
+  ImageF qf(qxsize, qysize);
+  RandomFillImage(&qf, 0.0f, 1.0f);
+  quantizer1.SetQuantField(quant_dc, qf, &raw_quant_field);
+  BitWriter writer;
+  QuantizerParams params = quantizer1.GetParams();
+  EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+  Quantizer quantizer2(&dequant);
+  BitReader reader(writer.GetSpan());
+  EXPECT_TRUE(quantizer2.Decode(&reader));
+  EXPECT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  EXPECT_TRUE(reader.Close());
+  TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h
new file mode 100644
index 0000000000..176e24092c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD evaluation of rational polynomials for approximating functions.
+
+#if defined(LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#undef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#else
+#define LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+  HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+  // One Newton-Raphson iteration.
+  static HWY_INLINE V ReciprocalNR(const V x) {
+    const auto rcp = ApproximateReciprocal(x);
+    const auto sum = Add(rcp, rcp);
+    const auto x_rcp = Mul(x, rcp);
+    return NegMulAdd(x_rcp, rcp, sum);
+  }
+
+  V operator()(const V n, const V d) const {
+#if 1  // Faster on SKX
+    return Div(n, d);
+#else
+    return n * ReciprocalNR(d);
+#endif
+  }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+                                                     const T (&p)[NP],
+                                                     const T (&q)[NQ]) {
+  constexpr size_t kDegP = NP / 4 - 1;
+  constexpr size_t kDegQ = NQ / 4 - 1;
+  auto yp = LoadDup128(d, &p[kDegP * 4]);
+  auto yq = LoadDup128(d, &q[kDegQ * 4]);
+  // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+  // compiler warning that the index is out of bounds since we are already
+  // checking that it is not out of bounds with (kDegP >= n) and the access
+  // will be optimized away. Similarly with q and kDegQ.
+  HWY_FENCE;
+  if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+  if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+  if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+  if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+  if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+  if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+  if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+  if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+  return FastDivision<T, V>()(yp, yq);
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc
new file mode 100644
index 0000000000..13fc044a55
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc
@@ -0,0 +1,238 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <string>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/rational_polynomial_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using T = float;  // required by EvalLog2
+using D = HWY_FULL(T);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Generic: only computes polynomial
+struct EvalPoly {
+  template <size_t NP, size_t NQ>
+  T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+    const HWY_FULL(T) d;
+    const auto vx = Set(d, x);
+    const auto approx = EvalRationalPolynomial(d, vx, p, q);
+    return GetLane(approx);
+  }
+};
+
+// Range reduction for log2
+struct EvalLog2 {
+  template <size_t NP, size_t NQ>
+  T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+    const HWY_FULL(T) d;
+    auto vx = Set(d, x);
+
+    const HWY_FULL(int32_t) di;
+    const auto x_bits = BitCast(di, vx);
+    // Cannot handle negative numbers / NaN.
+    JXL_DASSERT(AllTrue(di, Eq(Abs(x_bits), x_bits)));
+
+    // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+    const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab));  // = 2/3
+    // Shifted exponent = log2; also used to clear mantissa.
+    const auto exp_shifted = ShiftRight<23>(exp_bits);
+    const auto mantissa = BitCast(d, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+    const auto exp_val = ConvertTo(d, exp_shifted);
+    vx = Sub(mantissa, Set(d, 1.0f));
+
+    const auto approx = Add(EvalRationalPolynomial(d, vx, p, q), exp_val);
+    return GetLane(approx);
+  }
+};
+
+// Functions to approximate:
+
+T LinearToSrgb8Direct(T val) {
+  if (val < 0.0) return 0.0;
+  if (val >= 255.0) return 255.0;
+  if (val <= 10.0 / 12.92) return val * 12.92;
+  return 255.0 * (std::pow(val / 255.0, 1.0 / 2.4) * 1.055 - 0.055);
+}
+
+T SimpleGamma(T v) {
+  static const T kGamma = 0.387494322593;
+  static const T limit = 43.01745241042018;
+  T bright = v - limit;
+  if (bright >= 0) {
+    static const T mul = 0.0383723643799;
+    v -= bright * mul;
+  }
+  static const T limit2 = 94.68634353321337;
+  T bright2 = v - limit2;
+  if (bright2 >= 0) {
+    static const T mul = 0.22885405968;
+    v -= bright2 * mul;
+  }
+  static const T offset = 0.156775786057;
+  static const T scale = 8.898059160493739;
+  T retval = scale * (offset + pow(v, kGamma));
+  return retval;
+}
+
+// Runs CaratheodoryFejer and verifies the polynomial using a lot of samples to
+// return the biggest error.
+template <size_t NP, size_t NQ, class Eval>
+T RunApproximation(T x0, T x1, const T (&p)[NP], const T (&q)[NQ],
+                   const Eval& eval, T func_to_approx(T)) {
+  float maxerr = 0;
+  T lastPrint = 0;
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (T x = x0; x <= x1; x += (x1 - x0) / 10000.0) {
+    const T f = func_to_approx(x);
+    const T g = eval(x, p, q);
+    maxerr = std::max(fabsf(g - f), maxerr);
+    if (x == x0 || x - lastPrint > (x1 - x0) / 20.0) {
+      printf("x: %11.6f, f: %11.6f, g: %11.6f, e: %11.6f\n", x, f, g,
+             fabs(g - f));
+      lastPrint = x;
+    }
+  }
+  return maxerr;
+}
+
+void TestSimpleGamma() {
+  const T p[4 * (6 + 1)] = {
+      HWY_REP4(-5.0646949363741811E-05), HWY_REP4(6.7369380528439771E-05),
+      HWY_REP4(8.9376652530412794E-05),  HWY_REP4(2.1153513301520462E-06),
+      HWY_REP4(-6.9130322970386449E-08), HWY_REP4(3.9424752749293728E-10),
+      HWY_REP4(1.2360288207619576E-13)};
+
+  const T q[4 * (6 + 1)] = {
+      HWY_REP4(-6.6389733798591366E-06), HWY_REP4(1.3299859726565908E-05),
+      HWY_REP4(3.8538748358398873E-06),  HWY_REP4(-2.8707687262928236E-08),
+      HWY_REP4(-6.6897385800005434E-10), HWY_REP4(6.1428748869186003E-12),
+      HWY_REP4(-2.5475738169252870E-15)};
+
+  const T err = RunApproximation(0.77, 274.579999999999984, p, q, EvalPoly(),
+                                 SimpleGamma);
+  EXPECT_LT(err, 0.05);
+}
+
+void TestLinearToSrgb8Direct() {
+  const T p[4 * (5 + 1)] = {
+      HWY_REP4(-9.5357499040105154E-05), HWY_REP4(4.6761186249798248E-04),
+      HWY_REP4(2.5708174333943594E-04),  HWY_REP4(1.5250087770436082E-05),
+      HWY_REP4(1.1946768008931187E-07),  HWY_REP4(5.9916446295972850E-11)};
+
+  const T q[4 * (4 + 1)] = {
+      HWY_REP4(1.8932479758079768E-05), HWY_REP4(2.7312342474687321E-05),
+      HWY_REP4(4.3901204783327006E-06), HWY_REP4(1.0417787306920273E-07),
+      HWY_REP4(3.0084206762140419E-10)};
+
+  const T err =
+      RunApproximation(0.77, 255, p, q, EvalPoly(), LinearToSrgb8Direct);
+  EXPECT_LT(err, 0.05);
+}
+
+void TestExp() {
+  const T p[4 * (2 + 1)] = {HWY_REP4(9.6266879665530902E-01),
+                            HWY_REP4(4.8961265681586763E-01),
+                            HWY_REP4(8.2619259189548433E-02)};
+  const T q[4 * (2 + 1)] = {HWY_REP4(9.6259895571622622E-01),
+                            HWY_REP4(-4.7272457588933831E-01),
+                            HWY_REP4(7.4802088567547664E-02)};
+  const T err =
+      RunApproximation(-1, 1, p, q, EvalPoly(), [](T x) { return T(exp(x)); });
+  EXPECT_LT(err, 1E-4);
+}
+
+void TestNegExp() {
+  // 4,3 is the min required for monotonicity; max error in 0,10: 751 ppm
+  // no benefit for k>50.
+  const T p[4 * (4 + 1)] = {
+      HWY_REP4(5.9580258551150123E-02), HWY_REP4(-2.5073728806886408E-02),
+      HWY_REP4(4.1561830213689248E-03), HWY_REP4(-3.1815408488900372E-04),
+      HWY_REP4(9.3866690094906802E-06)};
+  const T q[4 * (3 + 1)] = {
+      HWY_REP4(5.9579108238812878E-02), HWY_REP4(3.4542074345478582E-02),
+      HWY_REP4(8.7263562483501714E-03), HWY_REP4(1.4095109143061216E-03)};
+
+  const T err =
+      RunApproximation(0, 10, p, q, EvalPoly(), [](T x) { return T(exp(-x)); });
+  EXPECT_LT(err, sizeof(T) == 8 ? 2E-5 : 3E-5);
+}
+
+void TestSin() {
+  const T p[4 * (6 + 1)] = {
+      HWY_REP4(1.5518122109203780E-05),  HWY_REP4(2.3388958643675966E+00),
+      HWY_REP4(-8.6705520940849157E-01), HWY_REP4(-1.9702294764873535E-01),
+      HWY_REP4(1.2193404314472320E-01),  HWY_REP4(-1.7373966109788839E-02),
+      HWY_REP4(7.8829435883034796E-04)};
+  const T q[4 * (5 + 1)] = {
+      HWY_REP4(2.3394371422557279E+00), HWY_REP4(-8.7028221081288615E-01),
+      HWY_REP4(2.0052872219658430E-01), HWY_REP4(-3.2460335995264836E-02),
+      HWY_REP4(3.1546157932479282E-03), HWY_REP4(-1.6692542019380155E-04)};
+
+  const T err = RunApproximation(0, Pi<T>(1) * 2, p, q, EvalPoly(),
+                                 [](T x) { return T(sin(x)); });
+  EXPECT_LT(err, sizeof(T) == 8 ? 5E-4 : 7E-4);
+}
+
+void TestLog() {
+  HWY_ALIGN const T p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06),
+                                      HWY_REP4(1.4287160470083755E+00),
+                                      HWY_REP4(7.4245873327820566E-01)};
+  HWY_ALIGN const T q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01),
+                                      HWY_REP4(1.0096718572241148E+00),
+                                      HWY_REP4(1.7409343003366853E-01)};
+  const T err = RunApproximation(1E-6, 1000, p, q, EvalLog2(), std::log2);
+  printf("%E\n", err);
+}
+
+HWY_NOINLINE void TestRationalPolynomial() {
+  TestSimpleGamma();
+  TestLinearToSrgb8Direct();
+  TestExp();
+  TestNegExp();
+  TestSin();
+  TestLog();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class RationalPolynomialTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(RationalPolynomialTest);
+
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSimpleGamma);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLinearToSrgb8Direct);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestNegExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSin);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLog);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc
new file mode 100644
index 0000000000..7116326e4a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc
@@ -0,0 +1,865 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h"
+
+#include <algorithm>
+#include <queue>
+#include <tuple>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+std::pair<size_t, size_t>
+LowMemoryRenderPipeline::ColorDimensionsToChannelDimensions(
+    std::pair<size_t, size_t> in, size_t c, size_t stage) const {
+  std::pair<size_t, size_t> ret;
+  std::pair<size_t, size_t> shift = channel_shifts_[stage][c];
+  ret.first =
+      ((in.first << base_color_shift_) + (1 << shift.first) - 1) >> shift.first;
+  ret.second = ((in.second << base_color_shift_) + (1 << shift.second) - 1) >>
+               shift.second;
+  return ret;
+}
+
+std::pair<size_t, size_t> LowMemoryRenderPipeline::BorderToStore(
+    size_t c) const {
+  auto ret = ColorDimensionsToChannelDimensions(group_border_, c, 0);
+  ret.first += padding_[0][c].first;
+  ret.second += padding_[0][c].second;
+  return ret;
+}
+
+void LowMemoryRenderPipeline::SaveBorders(size_t group_id, size_t c,
+                                          const ImageF& in) {
+  size_t gy = group_id / frame_dimensions_.xsize_groups;
+  size_t gx = group_id % frame_dimensions_.xsize_groups;
+  size_t hshift = channel_shifts_[0][c].first;
+  size_t vshift = channel_shifts_[0][c].second;
+  size_t x0 = gx * GroupInputXSize(c);
+  size_t x1 = std::min((gx + 1) * GroupInputXSize(c),
+                       DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+  size_t y0 = gy * GroupInputYSize(c);
+  size_t y1 = std::min((gy + 1) * GroupInputYSize(c),
+                       DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+  auto borders = BorderToStore(c);
+  size_t borderx_write = borders.first;
+  size_t bordery_write = borders.second;
+
+  if (gy > 0) {
+    Rect from(group_data_x_border_, group_data_y_border_, x1 - x0,
+              bordery_write);
+    Rect to(x0, (gy * 2 - 1) * bordery_write, x1 - x0, bordery_write);
+    CopyImageTo(from, in, to, &borders_horizontal_[c]);
+  }
+  if (gy + 1 < frame_dimensions_.ysize_groups) {
+    Rect from(group_data_x_border_,
+              group_data_y_border_ + y1 - y0 - bordery_write, x1 - x0,
+              bordery_write);
+    Rect to(x0, (gy * 2) * bordery_write, x1 - x0, bordery_write);
+    CopyImageTo(from, in, to, &borders_horizontal_[c]);
+  }
+  if (gx > 0) {
+    Rect from(group_data_x_border_, group_data_y_border_, borderx_write,
+              y1 - y0);
+    Rect to((gx * 2 - 1) * borderx_write, y0, borderx_write, y1 - y0);
+    CopyImageTo(from, in, to, &borders_vertical_[c]);
+  }
+  if (gx + 1 < frame_dimensions_.xsize_groups) {
+    Rect from(group_data_x_border_ + x1 - x0 - borderx_write,
+              group_data_y_border_, borderx_write, y1 - y0);
+    Rect to((gx * 2) * borderx_write, y0, borderx_write, y1 - y0);
+    CopyImageTo(from, in, to, &borders_vertical_[c]);
+  }
+}
+
+void LowMemoryRenderPipeline::LoadBorders(size_t group_id, size_t c,
+                                          const Rect& r, ImageF* out) {
+  size_t gy = group_id / frame_dimensions_.xsize_groups;
+  size_t gx = group_id % frame_dimensions_.xsize_groups;
+  size_t hshift = channel_shifts_[0][c].first;
+  size_t vshift = channel_shifts_[0][c].second;
+  // Coordinates of the group in the image.
+  size_t x0 = gx * GroupInputXSize(c);
+  size_t x1 = std::min((gx + 1) * GroupInputXSize(c),
+                       DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+  size_t y0 = gy * GroupInputYSize(c);
+  size_t y1 = std::min((gy + 1) * GroupInputYSize(c),
+                       DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+  size_t paddingx = padding_[0][c].first;
+  size_t paddingy = padding_[0][c].second;
+
+  auto borders = BorderToStore(c);
+  size_t borderx_write = borders.first;
+  size_t bordery_write = borders.second;
+
+  // Limits of the area to copy from, in image coordinates.
+  JXL_DASSERT(r.x0() == 0 || (r.x0() << base_color_shift_) >= paddingx);
+  size_t x0src = DivCeil(r.x0() << base_color_shift_, 1 << hshift);
+  if (x0src != 0) {
+    x0src -= paddingx;
+  }
+  // r may be such that r.x1 (namely x0() + xsize()) is within paddingx of the
+  // right side of the image, so we use min() here.
+  size_t x1src =
+      DivCeil((r.x0() + r.xsize()) << base_color_shift_, 1 << hshift);
+  x1src = std::min(x1src + paddingx,
+                   DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+
+  // Similar computation for y.
+  JXL_DASSERT(r.y0() == 0 || (r.y0() << base_color_shift_) >= paddingy);
+  size_t y0src = DivCeil(r.y0() << base_color_shift_, 1 << vshift);
+  if (y0src != 0) {
+    y0src -= paddingy;
+  }
+  size_t y1src =
+      DivCeil((r.y0() + r.ysize()) << base_color_shift_, 1 << vshift);
+  y1src = std::min(y1src + paddingy,
+                   DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+  // Copy other groups' borders from the border storage.
+  if (y0src < y0) {
+    JXL_DASSERT(gy > 0);
+    CopyImageTo(
+        Rect(x0src, (gy * 2 - 2) * bordery_write, x1src - x0src, bordery_write),
+        borders_horizontal_[c],
+        Rect(group_data_x_border_ + x0src - x0,
+             group_data_y_border_ - bordery_write, x1src - x0src,
+             bordery_write),
+        out);
+  }
+  if (y1src > y1) {
+    // When copying the bottom border we must not be on the bottom groups.
+    JXL_DASSERT(gy + 1 < frame_dimensions_.ysize_groups);
+    CopyImageTo(
+        Rect(x0src, (gy * 2 + 1) * bordery_write, x1src - x0src, bordery_write),
+        borders_horizontal_[c],
+        Rect(group_data_x_border_ + x0src - x0, group_data_y_border_ + y1 - y0,
+             x1src - x0src, bordery_write),
+        out);
+  }
+  if (x0src < x0) {
+    JXL_DASSERT(gx > 0);
+    CopyImageTo(
+        Rect((gx * 2 - 2) * borderx_write, y0src, borderx_write, y1src - y0src),
+        borders_vertical_[c],
+        Rect(group_data_x_border_ - borderx_write,
+             group_data_y_border_ + y0src - y0, borderx_write, y1src - y0src),
+        out);
+  }
+  if (x1src > x1) {
+    // When copying the right border we must not be on the rightmost groups.
+    JXL_DASSERT(gx + 1 < frame_dimensions_.xsize_groups);
+    CopyImageTo(
+        Rect((gx * 2 + 1) * borderx_write, y0src, borderx_write, y1src - y0src),
+        borders_vertical_[c],
+        Rect(group_data_x_border_ + x1 - x0, group_data_y_border_ + y0src - y0,
+             borderx_write, y1src - y0src),
+        out);
+  }
+}
+
+size_t LowMemoryRenderPipeline::GroupInputXSize(size_t c) const {
+  return (frame_dimensions_.group_dim << base_color_shift_) >>
+         channel_shifts_[0][c].first;
+}
+
+size_t LowMemoryRenderPipeline::GroupInputYSize(size_t c) const {
+  return (frame_dimensions_.group_dim << base_color_shift_) >>
+         channel_shifts_[0][c].second;
+}
+
+void LowMemoryRenderPipeline::EnsureBordersStorage() {
+  const auto& shifts = channel_shifts_[0];
+  if (borders_horizontal_.size() < shifts.size()) {
+    borders_horizontal_.resize(shifts.size());
+    borders_vertical_.resize(shifts.size());
+  }
+  for (size_t c = 0; c < shifts.size(); c++) {
+    auto borders = BorderToStore(c);
+    size_t borderx = borders.first;
+    size_t bordery = borders.second;
+    JXL_DASSERT(frame_dimensions_.xsize_groups > 0);
+    size_t num_xborders = (frame_dimensions_.xsize_groups - 1) * 2;
+    JXL_DASSERT(frame_dimensions_.ysize_groups > 0);
+    size_t num_yborders = (frame_dimensions_.ysize_groups - 1) * 2;
+    size_t downsampled_xsize =
+        DivCeil(frame_dimensions_.xsize_upsampled_padded, 1 << shifts[c].first);
+    size_t downsampled_ysize = DivCeil(frame_dimensions_.ysize_upsampled_padded,
+                                       1 << shifts[c].second);
+    Rect horizontal = Rect(0, 0, downsampled_xsize, bordery * num_yborders);
+    if (!SameSize(horizontal, borders_horizontal_[c])) {
+      borders_horizontal_[c] = ImageF(horizontal.xsize(), horizontal.ysize());
+    }
+    Rect vertical = Rect(0, 0, borderx * num_xborders, downsampled_ysize);
+    if (!SameSize(vertical, borders_vertical_[c])) {
+      borders_vertical_[c] = ImageF(vertical.xsize(), vertical.ysize());
+    }
+  }
+}
+
+void LowMemoryRenderPipeline::Init() {
+  group_border_ = {0, 0};
+  base_color_shift_ = CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded /
+                                      frame_dimensions_.xsize_padded);
+
+  const auto& shifts = channel_shifts_[0];
+
+  // Ensure that each channel has enough many border pixels.
+  for (size_t c = 0; c < shifts.size(); c++) {
+    group_border_.first =
+        std::max(group_border_.first,
+                 DivCeil(padding_[0][c].first << channel_shifts_[0][c].first,
+                         1 << base_color_shift_));
+    group_border_.second =
+        std::max(group_border_.second,
+                 DivCeil(padding_[0][c].second << channel_shifts_[0][c].second,
+                         1 << base_color_shift_));
+  }
+
+  // Ensure that all channels have an integer number of border pixels in the
+  // input.
+  for (size_t c = 0; c < shifts.size(); c++) {
+    if (channel_shifts_[0][c].first >= base_color_shift_) {
+      group_border_.first =
+          RoundUpTo(group_border_.first,
+                    1 << (channel_shifts_[0][c].first - base_color_shift_));
+    }
+    if (channel_shifts_[0][c].second >= base_color_shift_) {
+      group_border_.second =
+          RoundUpTo(group_border_.second,
+                    1 << (channel_shifts_[0][c].second - base_color_shift_));
+    }
+  }
+  // Ensure that the X border on color channels is a multiple of kBlockDim or
+  // the vector size (required for EPF stages). Vectors on ARM NEON are never
+  // wider than 4 floats, so rounding to multiples of 4 is enough.
+#if JXL_ARCH_ARM
+  constexpr size_t kGroupXAlign = 4;
+#else
+  constexpr size_t kGroupXAlign = 16;
+#endif
+  group_border_.first = RoundUpTo(group_border_.first, kGroupXAlign);
+  // Allocate borders in group images that are just enough for storing the
+  // borders to be copied in, plus any rounding to ensure alignment.
+  std::pair<size_t, size_t> max_border = {0, 0};
+  for (size_t c = 0; c < shifts.size(); c++) {
+    max_border.first = std::max(BorderToStore(c).first, max_border.first);
+    max_border.second = std::max(BorderToStore(c).second, max_border.second);
+  }
+  group_data_x_border_ = RoundUpTo(max_border.first, kGroupXAlign);
+  group_data_y_border_ = max_border.second;
+
+  EnsureBordersStorage();
+  group_border_assigner_.Init(frame_dimensions_);
+
+  for (first_trailing_stage_ = stages_.size(); first_trailing_stage_ > 0;
+       first_trailing_stage_--) {
+    bool has_inout_c = false;
+    for (size_t c = 0; c < shifts.size(); c++) {
+      if (stages_[first_trailing_stage_ - 1]->GetChannelMode(c) ==
+          RenderPipelineChannelMode::kInOut) {
+        has_inout_c = true;
+      }
+    }
+    if (has_inout_c) {
+      break;
+    }
+  }
+
+  first_image_dim_stage_ = stages_.size();
+  for (size_t i = 0; i < stages_.size(); i++) {
+    std::vector<std::pair<size_t, size_t>> input_sizes(shifts.size());
+    for (size_t c = 0; c < shifts.size(); c++) {
+      input_sizes[c] =
+          std::make_pair(DivCeil(frame_dimensions_.xsize_upsampled,
+                                 1 << channel_shifts_[i][c].first),
+                         DivCeil(frame_dimensions_.ysize_upsampled,
+                                 1 << channel_shifts_[i][c].second));
+    }
+    stages_[i]->SetInputSizes(input_sizes);
+    if (stages_[i]->SwitchToImageDimensions()) {
+      // We don't allow kInOut after switching to image dimensions.
+      JXL_ASSERT(i >= first_trailing_stage_);
+      first_image_dim_stage_ = i + 1;
+      stages_[i]->GetImageDimensions(&full_image_xsize_, &full_image_ysize_,
+                                     &frame_origin_);
+      break;
+    }
+  }
+  for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) {
+    if (stages_[i]->SwitchToImageDimensions()) {
+      JXL_UNREACHABLE("Cannot switch to image dimensions multiple times");
+    }
+    std::vector<std::pair<size_t, size_t>> input_sizes(shifts.size());
+    for (size_t c = 0; c < shifts.size(); c++) {
+      input_sizes[c] = {full_image_xsize_, full_image_ysize_};
+    }
+    stages_[i]->SetInputSizes(input_sizes);
+  }
+
+  anyc_.resize(stages_.size());
+  for (size_t i = 0; i < stages_.size(); i++) {
+    for (size_t c = 0; c < shifts.size(); c++) {
+      if (stages_[i]->GetChannelMode(c) !=
+          RenderPipelineChannelMode::kIgnored) {
+        anyc_[i] = c;
+      }
+    }
+  }
+
+  stage_input_for_channel_ = std::vector<std::vector<int32_t>>(
+      stages_.size(), std::vector<int32_t>(shifts.size()));
+  for (size_t c = 0; c < shifts.size(); c++) {
+    int input = -1;
+    for (size_t i = 0; i < stages_.size(); i++) {
+      stage_input_for_channel_[i][c] = input;
+      if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+        input = i;
+      }
+    }
+  }
+
+  image_rect_.resize(stages_.size());
+  for (size_t i = 0; i < stages_.size(); i++) {
+    size_t x1 = DivCeil(frame_dimensions_.xsize_upsampled,
+                        1 << channel_shifts_[i][anyc_[i]].first);
+    size_t y1 = DivCeil(frame_dimensions_.ysize_upsampled,
+                        1 << channel_shifts_[i][anyc_[i]].second);
+    image_rect_[i] = Rect(0, 0, x1, y1);
+  }
+
+  virtual_ypadding_for_output_.resize(stages_.size());
+  xpadding_for_output_.resize(stages_.size());
+  for (size_t c = 0; c < shifts.size(); c++) {
+    int ypad = 0;
+    int xpad = 0;
+    for (size_t i = stages_.size(); i-- > 0;) {
+      if (stages_[i]->GetChannelMode(c) !=
+          RenderPipelineChannelMode::kIgnored) {
+        virtual_ypadding_for_output_[i] =
+            std::max(ypad, virtual_ypadding_for_output_[i]);
+        xpadding_for_output_[i] = std::max(xpad, xpadding_for_output_[i]);
+      }
+      if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+        ypad = (DivCeil(ypad, 1 << channel_shifts_[i][c].second) +
+                stages_[i]->settings_.border_y)
+               << channel_shifts_[i][c].second;
+        xpad = DivCeil(xpad, 1 << stages_[i]->settings_.shift_x) +
+               stages_[i]->settings_.border_x;
+      }
+    }
+  }
+}
+
+void LowMemoryRenderPipeline::PrepareForThreadsInternal(size_t num,
+                                                        bool use_group_ids) {
+  const auto& shifts = channel_shifts_[0];
+
+  use_group_ids_ = use_group_ids;
+  size_t num_buffers = use_group_ids_ ? frame_dimensions_.num_groups : num;
+  for (size_t t = group_data_.size(); t < num_buffers; t++) {
+    group_data_.emplace_back();
+    group_data_[t].resize(shifts.size());
+    for (size_t c = 0; c < shifts.size(); c++) {
+      group_data_[t][c] = ImageF(GroupInputXSize(c) + group_data_x_border_ * 2,
+                                 GroupInputYSize(c) + group_data_y_border_ * 2);
+    }
+  }
+  // TODO(veluca): avoid reallocating buffers if not needed.
+  stage_data_.resize(num);
+  size_t upsampling = 1u << base_color_shift_;
+  size_t group_dim = frame_dimensions_.group_dim * upsampling;
+  size_t padding =
+      2 * group_data_x_border_ * upsampling +  // maximum size of a rect
+      2 * kRenderPipelineXOffset;              // extra padding for processing
+  size_t stage_buffer_xsize = group_dim + padding;
+  for (size_t t = 0; t < num; t++) {
+    stage_data_[t].resize(shifts.size());
+    for (size_t c = 0; c < shifts.size(); c++) {
+      stage_data_[t][c].resize(stages_.size());
+      size_t next_y_border = 0;
+      for (size_t i = stages_.size(); i-- > 0;) {
+        if (stages_[i]->GetChannelMode(c) ==
+            RenderPipelineChannelMode::kInOut) {
+          size_t stage_buffer_ysize =
+              2 * next_y_border + (1 << stages_[i]->settings_.shift_y);
+          stage_buffer_ysize = 1 << CeilLog2Nonzero(stage_buffer_ysize);
+          next_y_border = stages_[i]->settings_.border_y;
+          stage_data_[t][c][i] = ImageF(stage_buffer_xsize, stage_buffer_ysize);
+        }
+      }
+    }
+  }
+  if (first_image_dim_stage_ != stages_.size()) {
+    RectT<ssize_t> image_rect(0, 0, frame_dimensions_.xsize_upsampled,
+                              frame_dimensions_.ysize_upsampled);
+    RectT<ssize_t> full_image_rect(0, 0, full_image_xsize_, full_image_ysize_);
+    image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+    image_rect = image_rect.Intersection(full_image_rect);
+    if (image_rect.xsize() == 0 || image_rect.ysize() == 0) {
+      image_rect = RectT<ssize_t>(0, 0, 0, 0);
+    }
+    size_t left_padding = image_rect.x0();
+    size_t middle_padding = group_dim;
+    size_t right_padding = full_image_xsize_ - image_rect.x1();
+    size_t out_of_frame_xsize =
+        padding +
+        std::max(left_padding, std::max(middle_padding, right_padding));
+    out_of_frame_data_.resize(num);
+    for (size_t t = 0; t < num; t++) {
+      out_of_frame_data_[t] = ImageF(out_of_frame_xsize, shifts.size());
+    }
+  }
+}
+
+std::vector<std::pair<ImageF*, Rect>> LowMemoryRenderPipeline::PrepareBuffers(
+    size_t group_id, size_t thread_id) {
+  std::vector<std::pair<ImageF*, Rect>> ret(channel_shifts_[0].size());
+  const size_t gx = group_id % frame_dimensions_.xsize_groups;
+  const size_t gy = group_id / frame_dimensions_.xsize_groups;
+  for (size_t c = 0; c < channel_shifts_[0].size(); c++) {
+    ret[c].first = &group_data_[use_group_ids_ ? group_id : thread_id][c];
+    ret[c].second = Rect(group_data_x_border_, group_data_y_border_,
+                         GroupInputXSize(c), GroupInputYSize(c),
+                         DivCeil(frame_dimensions_.xsize_upsampled,
+                                 1 << channel_shifts_[0][c].first) -
+                             gx * GroupInputXSize(c) + group_data_x_border_,
+                         DivCeil(frame_dimensions_.ysize_upsampled,
+                                 1 << channel_shifts_[0][c].second) -
+                             gy * GroupInputYSize(c) + group_data_y_border_);
+  }
+  return ret;
+}
+
+namespace {
+
+JXL_INLINE int GetMirroredY(int y, ssize_t group_y0, ssize_t image_ysize) {
+  if (group_y0 == 0 && (y < 0 || y + group_y0 >= image_ysize)) {
+    return Mirror(y, image_ysize);
+  }
+  if (y + group_y0 >= image_ysize) {
+    // Here we know that the one mirroring step is sufficient.
+    return 2 * image_ysize - (y + group_y0) - 1 - group_y0;
+  }
+  return y;
+}
+
+JXL_INLINE void ApplyXMirroring(float* row, ssize_t borderx, ssize_t group_x0,
+                                ssize_t group_xsize, ssize_t image_xsize) {
+  if (image_xsize <= borderx) {
+    if (group_x0 == 0) {
+      for (ssize_t ix = 0; ix < borderx; ix++) {
+        row[kRenderPipelineXOffset - ix - 1] =
+            row[kRenderPipelineXOffset + Mirror(-ix - 1, image_xsize)];
+      }
+    }
+    if (group_xsize + borderx + group_x0 >= image_xsize) {
+      for (ssize_t ix = 0; ix < borderx; ix++) {
+        row[kRenderPipelineXOffset + image_xsize + ix - group_x0] =
+            row[kRenderPipelineXOffset + Mirror(image_xsize + ix, image_xsize) -
+                group_x0];
+      }
+    }
+  } else {
+    // Here we know that the one mirroring step is sufficient.
+    if (group_x0 == 0) {
+      for (ssize_t ix = 0; ix < borderx; ix++) {
+        row[kRenderPipelineXOffset - ix - 1] = row[kRenderPipelineXOffset + ix];
+      }
+    }
+    if (group_xsize + borderx + group_x0 >= image_xsize) {
+      for (ssize_t ix = 0; ix < borderx; ix++) {
+        row[kRenderPipelineXOffset + image_xsize - group_x0 + ix] =
+            row[kRenderPipelineXOffset + image_xsize - group_x0 - ix - 1];
+      }
+    }
+  }
+}
+
+// Information about where the *output* of each stage is stored.
+class Rows {
+ public:
+  Rows(const std::vector<std::unique_ptr<RenderPipelineStage>>& stages,
+       const Rect data_max_color_channel_rect, int group_data_x_border,
+       int group_data_y_border,
+       const std::vector<std::pair<size_t, size_t>>& group_data_shift,
+       size_t base_color_shift, std::vector<std::vector<ImageF>>& thread_data,
+       std::vector<ImageF>& input_data) {
+    size_t num_stages = stages.size();
+    size_t num_channels = input_data.size();
+
+    JXL_ASSERT(thread_data.size() == num_channels);
+    JXL_ASSERT(group_data_shift.size() == num_channels);
+
+#if JXL_ENABLE_ASSERT
+    for (const auto& td : thread_data) {
+      JXL_ASSERT(td.size() == num_stages);
+    }
+#endif
+
+    rows_.resize(num_stages + 1, std::vector<RowInfo>(num_channels));
+
+    for (size_t i = 0; i < num_stages; i++) {
+      for (size_t c = 0; c < input_data.size(); c++) {
+        if (stages[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+          rows_[i + 1][c].ymod_minus_1 = thread_data[c][i].ysize() - 1;
+          rows_[i + 1][c].base_ptr = thread_data[c][i].Row(0);
+          rows_[i + 1][c].stride = thread_data[c][i].PixelsPerRow();
+        }
+      }
+    }
+
+    for (size_t c = 0; c < input_data.size(); c++) {
+      auto channel_group_data_rect =
+          data_max_color_channel_rect.As<ssize_t>()
+              .Translate(-group_data_x_border, -group_data_y_border)
+              .ShiftLeft(base_color_shift)
+              .CeilShiftRight(group_data_shift[c])
+              .Translate(group_data_x_border - ssize_t(kRenderPipelineXOffset),
+                         group_data_y_border);
+      rows_[0][c].base_ptr = channel_group_data_rect.Row(&input_data[c], 0);
+      rows_[0][c].stride = input_data[c].PixelsPerRow();
+      rows_[0][c].ymod_minus_1 = -1;
+    }
+  }
+
+  // Stage -1 refers to the input data; all other values must be nonnegative and
+  // refer to the data for the output of that stage.
+  JXL_INLINE float* GetBuffer(int stage, int y, size_t c) const {
+    JXL_DASSERT(stage >= -1);
+    const RowInfo& info = rows_[stage + 1][c];
+    return info.base_ptr + ssize_t(info.stride) * (y & info.ymod_minus_1);
+  }
+
+ private:
+  struct RowInfo {
+    // Pointer to beginning of the first row.
+    float* base_ptr;
+    // Modulo value for the y axis minus 1 (ymod is guaranteed to be a power of
+    // 2, which allows efficient mod computation by masking).
+    int ymod_minus_1;
+    // Number of floats per row.
+    size_t stride;
+  };
+  std::vector<std::vector<RowInfo>> rows_;
+};
+
+}  // namespace
+
+void LowMemoryRenderPipeline::RenderRect(size_t thread_id,
+                                         std::vector<ImageF>& input_data,
+                                         Rect data_max_color_channel_rect,
+                                         Rect image_max_color_channel_rect) {
+  // For each stage, the rect corresponding to the image area currently being
+  // processed, in the coordinates of that stage (i.e. with the scaling factor
+  // that that stage has).
+  std::vector<Rect> group_rect;
+  group_rect.resize(stages_.size());
+  Rect image_area_rect =
+      image_max_color_channel_rect.ShiftLeft(base_color_shift_)
+          .Crop(frame_dimensions_.xsize_upsampled,
+                frame_dimensions_.ysize_upsampled);
+  for (size_t i = 0; i < stages_.size(); i++) {
+    group_rect[i] =
+        image_area_rect.CeilShiftRight(channel_shifts_[i][anyc_[i]]);
+  }
+
+  ssize_t frame_x0 =
+      first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.x0;
+  ssize_t frame_y0 =
+      first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.y0;
+  size_t full_image_xsize = first_image_dim_stage_ == stages_.size()
+                                ? frame_dimensions_.xsize_upsampled
+                                : full_image_xsize_;
+  size_t full_image_ysize = first_image_dim_stage_ == stages_.size()
+                                ? frame_dimensions_.ysize_upsampled
+                                : full_image_ysize_;
+
+  // Compute actual x-axis bounds for the current image area in the context of
+  // the full image this frame is part of. As the left boundary may be negative,
+  // we also create the x_pixels_skip value, defined as follows:
+  // - both x_pixels_skip and full_image_x0 are >= 0, and at least one is 0;
+  // - full_image_x0 - x_pixels_skip is the position of the current frame area
+  //   in the full image.
+  ssize_t full_image_x0 = frame_x0 + image_area_rect.x0();
+  ssize_t x_pixels_skip = 0;
+  if (full_image_x0 < 0) {
+    x_pixels_skip = -full_image_x0;
+    full_image_x0 = 0;
+  }
+  ssize_t full_image_x1 = frame_x0 + image_area_rect.x1();
+  full_image_x1 = std::min<ssize_t>(full_image_x1, full_image_xsize);
+
+  // If the current image area is entirely outside of the visible image, there
+  // is no point in proceeding. Note: this uses the assumption that if there is
+  // a stage with observable effects (i.e. a kInput stage), it only appears
+  // after the stage that switches to image dimensions.
+  if (full_image_x1 <= full_image_x0) return;
+
+  // Data structures to hold information about input/output rows and their
+  // buffers.
+  Rows rows(stages_, data_max_color_channel_rect, group_data_x_border_,
+            group_data_y_border_, channel_shifts_[0], base_color_shift_,
+            stage_data_[thread_id], input_data);
+
+  std::vector<RenderPipelineStage::RowInfo> input_rows(first_trailing_stage_ +
+                                                       1);
+  for (size_t i = 0; i < first_trailing_stage_; i++) {
+    input_rows[i].resize(input_data.size());
+  }
+  input_rows[first_trailing_stage_].resize(input_data.size(),
+                                           std::vector<float*>(1));
+
+  // Maximum possible shift is 3.
+  RenderPipelineStage::RowInfo output_rows(input_data.size(),
+                                           std::vector<float*>(8));
+
+  // Fills in input_rows and output_rows for a given y value (relative to the
+  // start of the group, measured in actual pixels at the appropriate vertical
+  // scaling factor) and a given stage, applying mirroring if necessary. This
+  // function is somewhat inefficient for trailing kInOut or kInput stages,
+  // where just filling the input row once ought to be sufficient.
+  auto prepare_io_rows = [&](int y, size_t i) {
+    ssize_t bordery = stages_[i]->settings_.border_y;
+    size_t shifty = stages_[i]->settings_.shift_y;
+    auto make_row = [&](size_t c, ssize_t iy) {
+      size_t mirrored_y = GetMirroredY(y + iy - bordery, group_rect[i].y0(),
+                                       image_rect_[i].ysize());
+      input_rows[i][c][iy] =
+          rows.GetBuffer(stage_input_for_channel_[i][c], mirrored_y, c);
+      ApplyXMirroring(input_rows[i][c][iy], stages_[i]->settings_.border_x,
+                      group_rect[i].x0(), group_rect[i].xsize(),
+                      image_rect_[i].xsize());
+    };
+    for (size_t c = 0; c < input_data.size(); c++) {
+      RenderPipelineChannelMode mode = stages_[i]->GetChannelMode(c);
+      if (mode == RenderPipelineChannelMode::kIgnored) {
+        continue;
+      }
+      // If we already have rows from a previous iteration, we can just shift
+      // the rows by 1 and insert the new one.
+      if (input_rows[i][c].size() == 2 * size_t(bordery) + 1) {
+        for (ssize_t iy = 0; iy < 2 * bordery; iy++) {
+          input_rows[i][c][iy] = input_rows[i][c][iy + 1];
+        }
+        make_row(c, bordery * 2);
+      } else {
+        input_rows[i][c].resize(2 * bordery + 1);
+        for (ssize_t iy = 0; iy < 2 * bordery + 1; iy++) {
+          make_row(c, iy);
+        }
+      }
+
+      // If necessary, get the output buffers.
+      if (mode == RenderPipelineChannelMode::kInOut) {
+        for (size_t iy = 0; iy < (1u << shifty); iy++) {
+          output_rows[c][iy] = rows.GetBuffer(i, y * (1 << shifty) + iy, c);
+        }
+      }
+    }
+  };
+
+  // We pretend that every stage has a vertical shift of 0, i.e. it is as tall
+  // as the final image.
+  // We call each such row a "virtual" row, because it may or may not correspond
+  // to an actual row of the current processing stage; actual processing happens
+  // when vy % (1<<vshift) == 0.
+
+  int num_extra_rows = *std::max_element(virtual_ypadding_for_output_.begin(),
+                                         virtual_ypadding_for_output_.end());
+
+  for (int vy = -num_extra_rows;
+       vy < int(image_area_rect.ysize()) + num_extra_rows; vy++) {
+    for (size_t i = 0; i < first_trailing_stage_; i++) {
+      int stage_vy = vy - num_extra_rows + virtual_ypadding_for_output_[i];
+
+      if (stage_vy % (1 << channel_shifts_[i][anyc_[i]].second) != 0) {
+        continue;
+      }
+
+      if (stage_vy < -virtual_ypadding_for_output_[i]) {
+        continue;
+      }
+
+      int y = stage_vy >> channel_shifts_[i][anyc_[i]].second;
+
+      ssize_t image_y = ssize_t(group_rect[i].y0()) + y;
+      // Do not produce rows in out-of-bounds areas.
+      if (image_y < 0 || image_y >= ssize_t(image_rect_[i].ysize())) {
+        continue;
+      }
+
+      // Get the input/output rows and potentially apply mirroring to the input.
+      prepare_io_rows(y, i);
+
+      // Produce output rows.
+      stages_[i]->ProcessRow(input_rows[i], output_rows,
+                             xpadding_for_output_[i], group_rect[i].xsize(),
+                             group_rect[i].x0(), image_y, thread_id);
+    }
+
+    // Process trailing stages, i.e. the final set of non-kInOut stages; they
+    // all have the same input buffer and no need to use any mirroring.
+
+    int y = vy - num_extra_rows;
+
+    for (size_t c = 0; c < input_data.size(); c++) {
+      // Skip pixels that are not part of the actual final image area.
+      input_rows[first_trailing_stage_][c][0] =
+          rows.GetBuffer(stage_input_for_channel_[first_trailing_stage_][c], y,
+                         c) +
+          x_pixels_skip;
+    }
+
+    // Check that we are not outside of the bounds for the current rendering
+    // rect. Not doing so might result in overwriting some rows that have been
+    // written (or will be written) by other threads.
+    if (y < 0 || y >= ssize_t(image_area_rect.ysize())) {
+      continue;
+    }
+
+    // Avoid running pipeline stages on pixels that are outside the full image
+    // area. As trailing stages have no borders, this is a free optimization
+    // (and may be necessary for correctness, as some stages assume coordinates
+    // are within bounds).
+    ssize_t full_image_y = frame_y0 + image_area_rect.y0() + y;
+    if (full_image_y < 0 || full_image_y >= ssize_t(full_image_ysize)) {
+      continue;
+    }
+
+    for (size_t i = first_trailing_stage_; i < stages_.size(); i++) {
+      // Before the first_image_dim_stage_, coordinates are relative to the
+      // current frame.
+      size_t x0 =
+          i < first_image_dim_stage_ ? full_image_x0 - frame_x0 : full_image_x0;
+      size_t y =
+          i < first_image_dim_stage_ ? full_image_y - frame_y0 : full_image_y;
+      stages_[i]->ProcessRow(input_rows[first_trailing_stage_], output_rows,
+                             /*xextra=*/0, full_image_x1 - full_image_x0, x0, y,
+                             thread_id);
+    }
+  }
+}
+
+void LowMemoryRenderPipeline::RenderPadding(size_t thread_id, Rect rect) {
+  if (rect.xsize() == 0) return;
+  size_t numc = channel_shifts_[0].size();
+  RenderPipelineStage::RowInfo input_rows(numc, std::vector<float*>(1));
+  RenderPipelineStage::RowInfo output_rows;
+
+  for (size_t c = 0; c < numc; c++) {
+    input_rows[c][0] = out_of_frame_data_[thread_id].Row(c);
+  }
+
+  for (size_t y = 0; y < rect.ysize(); y++) {
+    stages_[first_image_dim_stage_ - 1]->ProcessPaddingRow(
+        input_rows, rect.xsize(), rect.x0(), rect.y0() + y);
+    for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) {
+      stages_[i]->ProcessRow(input_rows, output_rows,
+                             /*xextra=*/0, rect.xsize(), rect.x0(),
+                             rect.y0() + y, thread_id);
+    }
+  }
+}
+
+void LowMemoryRenderPipeline::ProcessBuffers(size_t group_id,
+                                             size_t thread_id) {
+  std::vector<ImageF>& input_data =
+      group_data_[use_group_ids_ ? group_id : thread_id];
+
+  // Copy the group borders to the border storage.
+  for (size_t c = 0; c < input_data.size(); c++) {
+    SaveBorders(group_id, c, input_data[c]);
+  }
+
+  size_t gy = group_id / frame_dimensions_.xsize_groups;
+  size_t gx = group_id % frame_dimensions_.xsize_groups;
+
+  if (first_image_dim_stage_ != stages_.size()) {
+    size_t group_dim = frame_dimensions_.group_dim << base_color_shift_;
+    RectT<ssize_t> group_rect(gx * group_dim, gy * group_dim, group_dim,
+                              group_dim);
+    RectT<ssize_t> image_rect(0, 0, frame_dimensions_.xsize_upsampled,
+                              frame_dimensions_.ysize_upsampled);
+    RectT<ssize_t> full_image_rect(0, 0, full_image_xsize_, full_image_ysize_);
+    group_rect = group_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+    image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+    image_rect = image_rect.Intersection(full_image_rect);
+    group_rect = group_rect.Intersection(image_rect);
+    size_t x0 = group_rect.x0();
+    size_t y0 = group_rect.y0();
+    size_t x1 = group_rect.x1();
+    size_t y1 = group_rect.y1();
+    JXL_DEBUG_V(6,
+                "Rendering padding for full image rect %s "
+                "outside group rect %s",
+                Description(full_image_rect).c_str(),
+                Description(group_rect).c_str());
+
+    if (group_id == 0 && (image_rect.xsize() == 0 || image_rect.ysize() == 0)) {
+      // If this frame does not intersect with the full image, we have to
+      // initialize the whole image area with RenderPadding.
+      RenderPadding(thread_id,
+                    Rect(0, 0, full_image_xsize_, full_image_ysize_));
+    }
+
+    // Render padding for groups that intersect with the full image. The case
+    // where no groups intersect was handled above.
+    if (group_rect.xsize() > 0 && group_rect.ysize() > 0) {
+      if (gx == 0 && gy == 0) {
+        RenderPadding(thread_id, Rect(0, 0, x0, y0));
+      }
+      if (gy == 0) {
+        RenderPadding(thread_id, Rect(x0, 0, x1 - x0, y0));
+      }
+      if (gx == 0) {
+        RenderPadding(thread_id, Rect(0, y0, x0, y1 - y0));
+      }
+      if (gx == 0 && gy + 1 == frame_dimensions_.ysize_groups) {
+        RenderPadding(thread_id, Rect(0, y1, x0, full_image_ysize_ - y1));
+      }
+      if (gy + 1 == frame_dimensions_.ysize_groups) {
+        RenderPadding(thread_id, Rect(x0, y1, x1 - x0, full_image_ysize_ - y1));
+      }
+      if (gy == 0 && gx + 1 == frame_dimensions_.xsize_groups) {
+        RenderPadding(thread_id, Rect(x1, 0, full_image_xsize_ - x1, y0));
+      }
+      if (gx + 1 == frame_dimensions_.xsize_groups) {
+        RenderPadding(thread_id, Rect(x1, y0, full_image_xsize_ - x1, y1 - y0));
+      }
+      if (gy + 1 == frame_dimensions_.ysize_groups &&
+          gx + 1 == frame_dimensions_.xsize_groups) {
+        RenderPadding(thread_id, Rect(x1, y1, full_image_xsize_ - x1,
+                                      full_image_ysize_ - y1));
+      }
+    }
+  }
+
+  Rect ready_rects[GroupBorderAssigner::kMaxToFinalize];
+  size_t num_ready_rects = 0;
+  group_border_assigner_.GroupDone(group_id, group_border_.first,
+                                   group_border_.second, ready_rects,
+                                   &num_ready_rects);
+  for (size_t i = 0; i < num_ready_rects; i++) {
+    const Rect& image_max_color_channel_rect = ready_rects[i];
+    for (size_t c = 0; c < input_data.size(); c++) {
+      LoadBorders(group_id, c, image_max_color_channel_rect, &input_data[c]);
+    }
+    Rect data_max_color_channel_rect(
+        group_data_x_border_ + image_max_color_channel_rect.x0() -
+            gx * frame_dimensions_.group_dim,
+        group_data_y_border_ + image_max_color_channel_rect.y0() -
+            gy * frame_dimensions_.group_dim,
+        image_max_color_channel_rect.xsize(),
+        image_max_color_channel_rect.ysize());
+    RenderRect(thread_id, input_data, data_max_color_channel_rect,
+               image_max_color_channel_rect);
+  }
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h
new file mode 100644
index 0000000000..b386f7c078
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+namespace jxl {
+
+// A multithreaded, low-memory rendering pipeline that only allocates a minimal
+// amount of buffers.
+class LowMemoryRenderPipeline final : public RenderPipeline {
+ private:
+  std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+      size_t group_id, size_t thread_id) override;
+
+  void PrepareForThreadsInternal(size_t num, bool use_group_ids) override;
+
+  void ProcessBuffers(size_t group_id, size_t thread_id) override;
+
+  void ClearDone(size_t i) override { group_border_assigner_.ClearDone(i); }
+
+  void Init() override;
+
+  void EnsureBordersStorage();
+  size_t GroupInputXSize(size_t c) const;
+  size_t GroupInputYSize(size_t c) const;
+  void RenderRect(size_t thread_id, std::vector<ImageF>& input_data,
+                  Rect data_max_color_channel_rect,
+                  Rect image_max_color_channel_rect);
+  void RenderPadding(size_t thread_id, Rect rect);
+
+  void SaveBorders(size_t group_id, size_t c, const ImageF& in);
+  void LoadBorders(size_t group_id, size_t c, const Rect& r, ImageF* out);
+
+  std::pair<size_t, size_t> ColorDimensionsToChannelDimensions(
+      std::pair<size_t, size_t> in, size_t c, size_t stage) const;
+
+  std::pair<size_t, size_t> BorderToStore(size_t c) const;
+
+  bool use_group_ids_;
+
+  // Storage for borders between groups. Borders of adjacent groups are stacked
+  // together, e.g. bottom border of current group is followed by top border
+  // of next group.
+  std::vector<ImageF> borders_horizontal_;
+  std::vector<ImageF> borders_vertical_;
+
+  // Manages the status of borders.
+  GroupBorderAssigner group_border_assigner_;
+
+  // Size (in color-channel-pixels) of the border around each group that might
+  // be assigned to that group.
+  std::pair<size_t, size_t> group_border_;
+  // base_color_shift_ defines the size of groups in terms of final image
+  // pixels.
+  size_t base_color_shift_;
+
+  // Buffer for decoded pixel data for a group, indexed by [thread][channel] or
+  // [group][channel] depending on `use_group_ids_`.
+  std::vector<std::vector<ImageF>> group_data_;
+
+  // Borders for storing group data.
+  size_t group_data_x_border_;
+  size_t group_data_y_border_;
+
+  // Buffers for intermediate rows for the various stages, indexed by
+  // [thread][channel][stage].
+  std::vector<std::vector<std::vector<ImageF>>> stage_data_;
+
+  // Buffers for out-of-frame data, indexed by [thread]; every row is a
+  // different channel.
+  std::vector<ImageF> out_of_frame_data_;
+
+  // For each stage, a non-kIgnored channel.
+  std::vector<int32_t> anyc_;
+
+  // Size of the image at each stage.
+  std::vector<Rect> image_rect_;
+
+  // For each stage, for each channel, keep track of the kInOut stage that
+  // produced the input to that stage (which corresponds to the buffer index
+  // containing the data). -1 if data comes from the original input.
+  std::vector<std::vector<int32_t>> stage_input_for_channel_;
+
+  // Number of (virtual) extra rows that must be processed at each stage
+  // to produce sufficient output for future stages.
+  std::vector<int> virtual_ypadding_for_output_;
+
+  // Same thing for columns, except these are real columns and not virtual ones.
+  std::vector<int> xpadding_for_output_;
+
+  // First stage that doesn't have any kInOut channel.
+  size_t first_trailing_stage_;
+
+  // Origin and size of the frame after switching to image dimensions.
+  FrameOrigin frame_origin_;
+  size_t full_image_xsize_;
+  size_t full_image_ysize_;
+  size_t first_image_dim_stage_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc
new file mode 100644
index 0000000000..68b6ef613f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+#include <algorithm>
+
+#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h"
+#include "lib/jxl/render_pipeline/simple_render_pipeline.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+void RenderPipeline::Builder::AddStage(
+    std::unique_ptr<RenderPipelineStage> stage) {
+  stages_.push_back(std::move(stage));
+}
+
+std::unique_ptr<RenderPipeline> RenderPipeline::Builder::Finalize(
+    FrameDimensions frame_dimensions) && {
+#if JXL_ENABLE_ASSERT
+  // Check that the last stage is not an kInOut stage for any channel, and that
+  // there is at least one stage.
+  JXL_ASSERT(!stages_.empty());
+  for (size_t c = 0; c < num_c_; c++) {
+    JXL_ASSERT(stages_.back()->GetChannelMode(c) !=
+               RenderPipelineChannelMode::kInOut);
+  }
+#endif
+
+  std::unique_ptr<RenderPipeline> res;
+  if (use_simple_implementation_) {
+    res = jxl::make_unique<SimpleRenderPipeline>();
+  } else {
+    res = jxl::make_unique<LowMemoryRenderPipeline>();
+  }
+
+  res->padding_.resize(stages_.size());
+  for (size_t i = stages_.size(); i-- > 0;) {
+    const auto& stage = stages_[i];
+    res->padding_[i].resize(num_c_);
+    if (i + 1 == stages_.size()) {
+      continue;
+    }
+    for (size_t c = 0; c < num_c_; c++) {
+      if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+        res->padding_[i][c].first = DivCeil(res->padding_[i + 1][c].first,
+                                            1 << stage->settings_.shift_x) +
+                                    stage->settings_.border_x;
+        res->padding_[i][c].second = DivCeil(res->padding_[i + 1][c].second,
+                                             1 << stage->settings_.shift_y) +
+                                     stage->settings_.border_y;
+      } else {
+        res->padding_[i][c] = res->padding_[i + 1][c];
+      }
+    }
+  }
+
+  res->frame_dimensions_ = frame_dimensions;
+  res->group_completed_passes_.resize(frame_dimensions.num_groups);
+  res->channel_shifts_.resize(stages_.size());
+  res->channel_shifts_[0].resize(num_c_);
+  for (size_t i = 1; i < stages_.size(); i++) {
+    auto& stage = stages_[i - 1];
+    for (size_t c = 0; c < num_c_; c++) {
+      if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+        res->channel_shifts_[0][c].first += stage->settings_.shift_x;
+        res->channel_shifts_[0][c].second += stage->settings_.shift_y;
+      }
+    }
+  }
+  for (size_t i = 1; i < stages_.size(); i++) {
+    auto& stage = stages_[i - 1];
+    res->channel_shifts_[i].resize(num_c_);
+    for (size_t c = 0; c < num_c_; c++) {
+      if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+        res->channel_shifts_[i][c].first =
+            res->channel_shifts_[i - 1][c].first - stage->settings_.shift_x;
+        res->channel_shifts_[i][c].second =
+            res->channel_shifts_[i - 1][c].second - stage->settings_.shift_y;
+      } else {
+        res->channel_shifts_[i][c].first = res->channel_shifts_[i - 1][c].first;
+        res->channel_shifts_[i][c].second =
+            res->channel_shifts_[i - 1][c].second;
+      }
+    }
+  }
+  res->stages_ = std::move(stages_);
+  res->Init();
+  return res;
+}
+
+RenderPipelineInput RenderPipeline::GetInputBuffers(size_t group_id,
+                                                    size_t thread_id) {
+  RenderPipelineInput ret;
+  JXL_DASSERT(group_id < group_completed_passes_.size());
+  ret.group_id_ = group_id;
+  ret.thread_id_ = thread_id;
+  ret.pipeline_ = this;
+  ret.buffers_ = PrepareBuffers(group_id, thread_id);
+  return ret;
+}
+
+void RenderPipeline::InputReady(
+    size_t group_id, size_t thread_id,
+    const std::vector<std::pair<ImageF*, Rect>>& buffers) {
+  JXL_DASSERT(group_id < group_completed_passes_.size());
+  group_completed_passes_[group_id]++;
+  for (size_t i = 0; i < buffers.size(); ++i) {
+    (void)i;
+    JXL_CHECK_PLANE_INITIALIZED(*buffers[i].first, buffers[i].second, i);
+  }
+
+  ProcessBuffers(group_id, thread_id);
+}
+
+Status RenderPipeline::PrepareForThreads(size_t num, bool use_group_ids) {
+  for (const auto& stage : stages_) {
+    JXL_RETURN_IF_ERROR(stage->PrepareForThreads(num));
+  }
+  PrepareForThreadsInternal(num, use_group_ids);
+  return true;
+}
+
+void RenderPipelineInput::Done() {
+  JXL_ASSERT(pipeline_);
+  pipeline_->InputReady(group_id_, thread_id_, buffers_);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h
new file mode 100644
index 0000000000..bf3ad4975e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h
@@ -0,0 +1,139 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Interface to provide input to the rendering pipeline. When this object is
+// destroyed, all the data in the provided ImageF's Rects must have been
+// initialized.
+class RenderPipelineInput {
+ public:
+  RenderPipelineInput(const RenderPipelineInput&) = delete;
+  RenderPipelineInput(RenderPipelineInput&& other) noexcept {
+    *this = std::move(other);
+  }
+  RenderPipelineInput& operator=(RenderPipelineInput&& other) noexcept {
+    pipeline_ = other.pipeline_;
+    group_id_ = other.group_id_;
+    thread_id_ = other.thread_id_;
+    buffers_ = std::move(other.buffers_);
+    other.pipeline_ = nullptr;
+    return *this;
+  }
+
+  RenderPipelineInput() = default;
+  void Done();
+
+  const std::pair<ImageF*, Rect>& GetBuffer(size_t c) const {
+    JXL_ASSERT(c < buffers_.size());
+    return buffers_[c];
+  }
+
+ private:
+  RenderPipeline* pipeline_ = nullptr;
+  size_t group_id_;
+  size_t thread_id_;
+  std::vector<std::pair<ImageF*, Rect>> buffers_;
+  friend class RenderPipeline;
+};
+
+class RenderPipeline {
+ public:
+  class Builder {
+   public:
+    explicit Builder(size_t num_c) : num_c_(num_c) { JXL_ASSERT(num_c > 0); }
+
+    // Adds a stage to the pipeline. Must be called at least once; the last
+    // added stage cannot have kInOut channels.
+    void AddStage(std::unique_ptr<RenderPipelineStage> stage);
+
+    // Enables using the simple (i.e. non-memory-efficient) implementation of
+    // the pipeline.
+    void UseSimpleImplementation() { use_simple_implementation_ = true; }
+
+    // Finalizes setup of the pipeline. Shifts for all channels should be 0 at
+    // this point.
+    std::unique_ptr<RenderPipeline> Finalize(
+        FrameDimensions frame_dimensions) &&;
+
+   private:
+    std::vector<std::unique_ptr<RenderPipelineStage>> stages_;
+    size_t num_c_;
+    bool use_simple_implementation_ = false;
+  };
+
+  friend class Builder;
+
+  virtual ~RenderPipeline() = default;
+
+  Status IsInitialized() const {
+    for (const auto& stage : stages_) {
+      JXL_RETURN_IF_ERROR(stage->IsInitialized());
+    }
+    return true;
+  }
+
+  // Allocates storage to run with `num` threads. If `use_group_ids` is true,
+  // storage is allocated for each group, not each thread. The behaviour is
+  // undefined if calling this function multiple times with a different value
+  // for `use_group_ids`.
+  Status PrepareForThreads(size_t num, bool use_group_ids);
+
+  // Retrieves a buffer where input data should be stored by the callee. When
+  // input has been provided for all buffers, the pipeline will complete its
+  // processing. This method may be called multiple times concurrently from
+  // different threads, provided that a different `thread_id` is given.
+  RenderPipelineInput GetInputBuffers(size_t group_id, size_t thread_id);
+
+  size_t PassesWithAllInput() const {
+    return *std::min_element(group_completed_passes_.begin(),
+                             group_completed_passes_.end());
+  }
+
+  virtual void ClearDone(size_t i) {}
+
+ protected:
+  std::vector<std::unique_ptr<RenderPipelineStage>> stages_;
+  // Shifts for every channel at the input of each stage.
+  std::vector<std::vector<std::pair<size_t, size_t>>> channel_shifts_;
+
+  // Amount of (cumulative) padding required by each stage and channel, in
+  // either direction.
+  std::vector<std::vector<std::pair<size_t, size_t>>> padding_;
+
+  FrameDimensions frame_dimensions_;
+
+  std::vector<uint8_t> group_completed_passes_;
+
+  friend class RenderPipelineInput;
+
+ private:
+  void InputReady(size_t group_id, size_t thread_id,
+                  const std::vector<std::pair<ImageF*, Rect>>& buffers);
+
+  virtual std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+      size_t group_id, size_t thread_id) = 0;
+
+  virtual void ProcessBuffers(size_t group_id, size_t thread_id) = 0;
+
+  // Note that this method may be called multiple times with different (or
+  // equal) `num`.
+  virtual void PrepareForThreadsInternal(size_t num, bool use_group_ids) = 0;
+
+  // Called once frame dimensions and stages are known.
+  virtual void Init() {}
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h
new file mode 100644
index 0000000000..d1a0074161
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h
@@ -0,0 +1,171 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
+#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/frame_header.h"
+
+namespace jxl {
+
+// The first pixel in the input to RenderPipelineStage will be located at
+// this position. Pixels before this position may be accessed as padding.
+// This should be at least the RoundUpTo(maximum padding / 2, maximum vector
+// size) times 2: this is realized when using Gaborish + EPF + upsampling +
+// chroma subsampling.
+#if JXL_ARCH_ARM
+constexpr size_t kRenderPipelineXOffset = 16;
+#else
+constexpr size_t kRenderPipelineXOffset = 32;
+#endif
+
+enum class RenderPipelineChannelMode {
+  // This channel is not modified by this stage.
+  kIgnored = 0,
+  // This channel is modified in-place.
+  kInPlace = 1,
+  // This channel is modified and written to a new buffer.
+  kInOut = 2,
+  // This channel is only read. These are the only stages that are assumed to
+  // have observable effects, i.e. calls to ProcessRow for other stages may be
+  // omitted if it can be shown they can't affect any kInput stage ProcessRow
+  // call that happens inside image boundaries.
+  kInput = 3,
+};
+
+class RenderPipeline;
+
+class RenderPipelineStage {
+ protected:
+  using Row = float*;
+  using ChannelRows = std::vector<Row>;
+
+ public:
+  using RowInfo = std::vector<ChannelRows>;
+  struct Settings {
+    // Amount of padding required in the various directions by all channels
+    // that have kInOut mode.
+    size_t border_x = 0;
+    size_t border_y = 0;
+
+    // Log2 of the number of columns/rows of output that this stage will produce
+    // for every input row for kInOut channels.
+    size_t shift_x = 0;
+    size_t shift_y = 0;
+
+    static Settings ShiftX(size_t shift, size_t border) {
+      Settings settings;
+      settings.border_x = border;
+      settings.shift_x = shift;
+      return settings;
+    }
+
+    static Settings ShiftY(size_t shift, size_t border) {
+      Settings settings;
+      settings.border_y = border;
+      settings.shift_y = shift;
+      return settings;
+    }
+
+    static Settings Symmetric(size_t shift, size_t border) {
+      Settings settings;
+      settings.border_x = settings.border_y = border;
+      settings.shift_x = settings.shift_y = shift;
+      return settings;
+    }
+
+    static Settings SymmetricBorderOnly(size_t border) {
+      return Symmetric(0, border);
+    }
+  };
+
+  virtual ~RenderPipelineStage() = default;
+
+  // Processes one row of input, producing the appropriate number of rows of
+  // output. Input/output rows can be obtained by calls to
+  // `GetInputRow`/`GetOutputRow`. `xsize+2*xextra` represents the total number
+  // of pixels to be processed in the input row, where the first pixel is at
+  // position `kRenderPipelineXOffset-xextra`. All pixels in the
+  // `[kRenderPipelineXOffset-xextra-border_x,
+  // kRenderPipelineXOffset+xsize+xextra+border_x)` range are initialized and
+  // accessible. `xpos` and `ypos` represent the position of the first
+  // (non-extra, i.e. in position kRenderPipelineXOffset) pixel in the center
+  // row of the input in the full image. `xpos` is a multiple of
+  // `GroupBorderAssigner::kPaddingXRound`. If `settings_.temp_buffer_size` is
+  // nonzero, `temp` will point to an HWY-aligned buffer of at least that number
+  // of floats; concurrent calls will have different buffers.
+  virtual void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                          size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                          size_t thread_id) const = 0;
+
+  // How each channel will be processed. Channels are numbered starting from
+  // color channels (always 3) and followed by all other channels.
+  virtual RenderPipelineChannelMode GetChannelMode(size_t c) const = 0;
+
+ protected:
+  explicit RenderPipelineStage(Settings settings) : settings_(settings) {}
+
+  virtual Status IsInitialized() const { return true; }
+
+  // Informs the stage about the total size of each channel. Few stages will
+  // actually need to use this information.
+  virtual void SetInputSizes(
+      const std::vector<std::pair<size_t, size_t>>& input_sizes) {}
+
+  virtual Status PrepareForThreads(size_t num_threads) { return true; }
+
+  // Returns a pointer to the input row of channel `c` with offset `y`.
+  // `y` must be in [-settings_.border_y, settings_.border_y]. `c` must be such
+  // that `GetChannelMode(c) != kIgnored`. The returned pointer points to the
+  // offset-ed row (i.e. kRenderPipelineXOffset has been applied).
+  float* GetInputRow(const RowInfo& input_rows, size_t c, int offset) const {
+    JXL_DASSERT(GetChannelMode(c) != RenderPipelineChannelMode::kIgnored);
+    JXL_DASSERT(-offset <= static_cast<int>(settings_.border_y));
+    JXL_DASSERT(offset <= static_cast<int>(settings_.border_y));
+    return input_rows[c][settings_.border_y + offset] + kRenderPipelineXOffset;
+  }
+  // Similar to `GetInputRow`, but can only be used if `GetChannelMode(c) ==
+  // kInOut`. Offset must be less than `1<<settings_.shift_y`.. The returned
+  // pointer points to the offset-ed row (i.e. kRenderPipelineXOffset has been
+  // applied).
+  float* GetOutputRow(const RowInfo& output_rows, size_t c,
+                      size_t offset) const {
+    JXL_DASSERT(GetChannelMode(c) == RenderPipelineChannelMode::kInOut);
+    JXL_DASSERT(offset <= 1ul << settings_.shift_y);
+    return output_rows[c][offset] + kRenderPipelineXOffset;
+  }
+
+  // Indicates whether, from this stage on, the pipeline will operate on an
+  // image- rather than frame-sized buffer. Only one stage in the pipeline
+  // should return true, and it should implement ProcessPaddingRow below too.
+  // It is assumed that, if there is a SwitchToImageDimensions() == true stage,
+  // all kInput stages appear after it.
+  virtual bool SwitchToImageDimensions() const { return false; }
+
+  // If SwitchToImageDimensions returns true, then this should set xsize and
+  // ysize to the image size, and frame_origin to the location of the frame
+  // within the image. Otherwise, this is not called at all.
+  virtual void GetImageDimensions(size_t* xsize, size_t* ysize,
+                                  FrameOrigin* frame_origin) const {}
+
+  // Produces the appropriate output data outside of the frame dimensions. xpos
+  // and ypos are now relative to the full image.
+  virtual void ProcessPaddingRow(const RowInfo& output_rows, size_t xsize,
+                                 size_t xpos, size_t ypos) const {}
+
+  virtual const char* GetName() const = 0;
+
+  Settings settings_;
+  friend class RenderPipeline;
+  friend class SimpleRenderPipeline;
+  friend class LowMemoryRenderPipeline;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_test.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_test.cc
new file mode 100644
index 0000000000..80954ab4aa
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_test.cc
@@ -0,0 +1,562 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/fake_parallel_runner_testonly.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/render_pipeline/test_render_pipeline_stages.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+Status DecodeFile(const Span<const uint8_t> file, bool use_slow_pipeline,
+                  CodecInOut* io, ThreadPool* pool) {
+  Status ret = true;
+  {
+    BitReader reader(file);
+    BitReaderScopedCloser reader_closer(&reader, &ret);
+    JXL_RETURN_IF_ERROR(reader.ReadFixedBits<16>() == 0x0AFF);
+    JXL_RETURN_IF_ERROR(ReadSizeHeader(&reader, &io->metadata.size));
+    JXL_RETURN_IF_ERROR(ReadImageMetadata(&reader, &io->metadata.m));
+    io->metadata.transform_data.nonserialized_xyb_encoded =
+        io->metadata.m.xyb_encoded;
+    JXL_RETURN_IF_ERROR(Bundle::Read(&reader, &io->metadata.transform_data));
+    if (io->metadata.m.color_encoding.WantICC()) {
+      PaddedBytes icc;
+      JXL_RETURN_IF_ERROR(ReadICC(&reader, &icc));
+      JXL_RETURN_IF_ERROR(
+          io->metadata.m.color_encoding.SetICC(std::move(icc), &GetJxlCms()));
+    }
+    PassesDecoderState dec_state;
+    JXL_RETURN_IF_ERROR(
+        dec_state.output_encoding_info.SetFromMetadata(io->metadata));
+    JXL_RETURN_IF_ERROR(reader.JumpToByteBoundary());
+    io->frames.clear();
+    do {
+      io->frames.emplace_back(&io->metadata.m);
+      // Skip frames that are not displayed.
+      do {
+        size_t frame_start = reader.TotalBitsConsumed() / kBitsPerByte;
+        size_t size_left = file.size() - frame_start;
+        JXL_RETURN_IF_ERROR(
+            DecodeFrame(&dec_state, pool, file.data() + frame_start, size_left,
+                        &io->frames.back(), io->metadata, use_slow_pipeline));
+        reader.SkipBits(io->frames.back().decoded_bytes() * kBitsPerByte);
+      } while (dec_state.shared->frame_header.frame_type !=
+                   FrameType::kRegularFrame &&
+               dec_state.shared->frame_header.frame_type !=
+                   FrameType::kSkipProgressive);
+    } while (!dec_state.shared->frame_header.is_last);
+
+    if (io->frames.empty()) return JXL_FAILURE("Not enough data.");
+
+    if (reader.TotalBitsConsumed() != file.size() * kBitsPerByte) {
+      return JXL_FAILURE("Reader position not at EOF.");
+    }
+    if (!reader.AllReadsWithinBounds()) {
+      return JXL_FAILURE("Reader out of bounds read.");
+    }
+    io->CheckMetadata();
+    // reader is closed here.
+  }
+  return ret;
+}
+
+TEST(RenderPipelineTest, Build) {
+  RenderPipeline::Builder builder(/*num_c=*/1);
+  builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+  builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+  builder.AddStage(jxl::make_unique<Check0FinalStage>());
+  builder.UseSimpleImplementation();
+  FrameDimensions frame_dimensions;
+  frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+                       /*max_hshift=*/0, /*max_vshift=*/0,
+                       /*modular_mode=*/false, /*upsampling=*/1);
+  std::move(builder).Finalize(frame_dimensions);
+}
+
+TEST(RenderPipelineTest, CallAllGroups) {
+  RenderPipeline::Builder builder(/*num_c=*/1);
+  builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+  builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+  builder.AddStage(jxl::make_unique<Check0FinalStage>());
+  builder.UseSimpleImplementation();
+  FrameDimensions frame_dimensions;
+  frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+                       /*max_hshift=*/0, /*max_vshift=*/0,
+                       /*modular_mode=*/false, /*upsampling=*/1);
+  auto pipeline = std::move(builder).Finalize(frame_dimensions);
+  ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false));
+
+  for (size_t i = 0; i < frame_dimensions.num_groups; i++) {
+    auto input_buffers = pipeline->GetInputBuffers(i, 0);
+    FillPlane(0.0f, input_buffers.GetBuffer(0).first,
+              input_buffers.GetBuffer(0).second);
+    input_buffers.Done();
+  }
+
+  EXPECT_EQ(pipeline->PassesWithAllInput(), 1);
+}
+
+TEST(RenderPipelineTest, BuildFast) {
+  RenderPipeline::Builder builder(/*num_c=*/1);
+  builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+  builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+  builder.AddStage(jxl::make_unique<Check0FinalStage>());
+  FrameDimensions frame_dimensions;
+  frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+                       /*max_hshift=*/0, /*max_vshift=*/0,
+                       /*modular_mode=*/false, /*upsampling=*/1);
+  std::move(builder).Finalize(frame_dimensions);
+}
+
+TEST(RenderPipelineTest, CallAllGroupsFast) {
+  RenderPipeline::Builder builder(/*num_c=*/1);
+  builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+  builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+  builder.AddStage(jxl::make_unique<Check0FinalStage>());
+  builder.UseSimpleImplementation();
+  FrameDimensions frame_dimensions;
+  frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+                       /*max_hshift=*/0, /*max_vshift=*/0,
+                       /*modular_mode=*/false, /*upsampling=*/1);
+  auto pipeline = std::move(builder).Finalize(frame_dimensions);
+  ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false));
+
+  for (size_t i = 0; i < frame_dimensions.num_groups; i++) {
+    auto input_buffers = pipeline->GetInputBuffers(i, 0);
+    FillPlane(0.0f, input_buffers.GetBuffer(0).first,
+              input_buffers.GetBuffer(0).second);
+    input_buffers.Done();
+  }
+
+  EXPECT_EQ(pipeline->PassesWithAllInput(), 1);
+}
+
+struct RenderPipelineTestInputSettings {
+  // Input image.
+  std::string input_path;
+  size_t xsize, ysize;
+  bool jpeg_transcode = false;
+  // Encoding settings.
+  CompressParams cparams;
+  // Short name for the encoder settings.
+  std::string cparams_descr;
+
+  bool add_spot_color = false;
+
+  Splines splines;
+};
+
+class RenderPipelineTestParam
+    : public ::testing::TestWithParam<RenderPipelineTestInputSettings> {};
+
+TEST_P(RenderPipelineTestParam, PipelineTest) {
+  RenderPipelineTestInputSettings config = GetParam();
+
+  // Use a parallel runner that randomly shuffles tasks to detect possible
+  // border handling bugs.
+  FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+  ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+  const PaddedBytes orig = jxl::test::ReadTestData(config.input_path);
+
+  CodecInOut io;
+  if (config.jpeg_transcode) {
+    ASSERT_TRUE(jpeg::DecodeImageJPG(Span<const uint8_t>(orig), &io));
+  } else {
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  }
+  io.ShrinkTo(config.xsize, config.ysize);
+
+  if (config.add_spot_color) {
+    jxl::ImageF spot(config.xsize, config.ysize);
+    jxl::ZeroFillImage(&spot);
+
+    for (size_t y = 0; y < config.ysize; y++) {
+      float* JXL_RESTRICT row = spot.Row(y);
+      for (size_t x = 0; x < config.xsize; x++) {
+        row[x] = ((x ^ y) & 255) * (1.f / 255.f);
+      }
+    }
+    ExtraChannelInfo info;
+    info.bit_depth.bits_per_sample = 8;
+    info.dim_shift = 0;
+    info.type = jxl::ExtraChannel::kSpotColor;
+    info.spot_color[0] = 0.5f;
+    info.spot_color[1] = 0.2f;
+    info.spot_color[2] = 1.f;
+    info.spot_color[3] = 0.5f;
+
+    io.metadata.m.extra_channel_info.push_back(info);
+    std::vector<jxl::ImageF> ec;
+    ec.push_back(std::move(spot));
+    io.frames[0].SetExtraChannels(std::move(ec));
+  }
+
+  PaddedBytes compressed;
+
+  PassesEncoderState enc_state;
+  enc_state.shared.image_features.splines = config.splines;
+  ASSERT_TRUE(EncodeFile(config.cparams, &io, &enc_state, &compressed,
+                         GetJxlCms(), /*aux_out=*/nullptr, &pool));
+
+  CodecInOut io_default;
+  ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+                         /*use_slow_pipeline=*/false, &io_default, &pool));
+  CodecInOut io_slow_pipeline;
+  ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+                         /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool));
+
+  ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size());
+  for (size_t i = 0; i < io_default.frames.size(); i++) {
+#if JXL_HIGH_PRECISION
+    constexpr float kMaxError = 1e-5;
+#else
+    constexpr float kMaxError = 5e-4;
+#endif
+    Image3F def = std::move(*io_default.frames[i].color());
+    Image3F pip = std::move(*io_slow_pipeline.frames[i].color());
+    JXL_ASSERT_OK(VerifyRelativeError(pip, def, kMaxError, kMaxError, _));
+    for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size();
+         ec++) {
+      JXL_ASSERT_OK(VerifyRelativeError(
+          io_slow_pipeline.frames[i].extra_channels()[ec],
+          io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _));
+    }
+  }
+}
+
+Splines CreateTestSplines() {
+  const ColorCorrelationMap cmap;
+  std::vector<Spline::Point> control_points{{9, 54},  {118, 159}, {97, 3},
+                                            {10, 40}, {150, 25},  {120, 300}};
+  const Spline spline{
+      control_points,
+      /*color_dct=*/
+      {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+      /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}};
+  std::vector<Spline> spline_data = {spline};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, /*quantization_adjustment=*/0,
+                                   cmap.YtoXRatio(0), cmap.YtoBRatio(0));
+    starting_points.push_back(spline.control_points.front());
+  }
+  return Splines(/*quantization_adjustment=*/0, std::move(quantized_splines),
+                 std::move(starting_points));
+}
+
+std::vector<RenderPipelineTestInputSettings> GeneratePipelineTests() {
+  std::vector<RenderPipelineTestInputSettings> all_tests;
+
+  std::pair<size_t, size_t> sizes[] = {
+      {3, 8}, {128, 128}, {256, 256}, {258, 258}, {533, 401}, {777, 777},
+  };
+
+  for (auto size : sizes) {
+    RenderPipelineTestInputSettings settings;
+    settings.input_path = "jxl/flower/flower.png";
+    settings.xsize = size.first;
+    settings.ysize = size.second;
+
+    // Base settings.
+    settings.cparams.butteraugli_distance = 1.0;
+    settings.cparams.patches = Override::kOff;
+    settings.cparams.dots = Override::kOff;
+    settings.cparams.gaborish = Override::kOff;
+    settings.cparams.epf = 0;
+    settings.cparams.color_transform = ColorTransform::kXYB;
+
+    {
+      auto s = settings;
+      s.cparams_descr = "NoGabNoEpfNoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.color_transform = ColorTransform::kNone;
+      s.cparams_descr = "NoGabNoEpfNoPatchesNoXYB";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.gaborish = Override::kOn;
+      s.cparams_descr = "GabNoEpfNoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.epf = 1;
+      s.cparams_descr = "NoGabEpf1NoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.epf = 2;
+      s.cparams_descr = "NoGabEpf2NoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.epf = 3;
+      s.cparams_descr = "NoGabEpf3NoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.gaborish = Override::kOn;
+      s.cparams.epf = 3;
+      s.cparams_descr = "GabEpf3NoPatches";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "Splines";
+      s.splines = CreateTestSplines();
+      all_tests.push_back(s);
+    }
+
+    for (size_t ups : {2, 4, 8}) {
+      {
+        auto s = settings;
+        s.cparams.resampling = ups;
+        s.cparams_descr = "Ups" + std::to_string(ups);
+        all_tests.push_back(s);
+      }
+      {
+        auto s = settings;
+        s.cparams.resampling = ups;
+        s.cparams.epf = 1;
+        s.cparams_descr = "Ups" + std::to_string(ups) + "EPF1";
+        all_tests.push_back(s);
+      }
+      {
+        auto s = settings;
+        s.cparams.resampling = ups;
+        s.cparams.gaborish = Override::kOn;
+        s.cparams.epf = 1;
+        s.cparams_descr = "Ups" + std::to_string(ups) + "GabEPF1";
+        all_tests.push_back(s);
+      }
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "Noise";
+      s.cparams.photon_noise_iso = 3200;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "NoiseUps";
+      s.cparams.photon_noise_iso = 3200;
+      s.cparams.resampling = 2;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "ModularLossless";
+      s.cparams.modular_mode = true;
+      s.cparams.butteraugli_distance = 0;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "ProgressiveDC";
+      s.cparams.progressive_dc = 1;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "ModularLossy";
+      s.cparams.modular_mode = true;
+      s.cparams.butteraugli_distance = 1.f;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.input_path = "jxl/flower/flower_alpha.png";
+      s.cparams_descr = "AlphaVarDCT";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.input_path = "jxl/flower/flower_alpha.png";
+      s.cparams_descr = "AlphaVarDCTUpsamplingEPF";
+      s.cparams.epf = 1;
+      s.cparams.ec_resampling = 2;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams.modular_mode = true;
+      s.cparams.butteraugli_distance = 0;
+      s.input_path = "jxl/flower/flower_alpha.png";
+      s.cparams_descr = "AlphaLossless";
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.input_path = "jxl/flower/flower_alpha.png";
+      s.cparams_descr = "AlphaDownsample";
+      s.cparams.ec_resampling = 2;
+      all_tests.push_back(s);
+    }
+
+    {
+      auto s = settings;
+      s.cparams_descr = "SpotColor";
+      s.add_spot_color = true;
+      all_tests.push_back(s);
+    }
+  }
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  for (const char* input : {"jxl/flower/flower.png.im_q85_444.jpg",
+                            "jxl/flower/flower.png.im_q85_420.jpg",
+                            "jxl/flower/flower.png.im_q85_422.jpg",
+                            "jxl/flower/flower.png.im_q85_440.jpg"}) {
+    RenderPipelineTestInputSettings settings;
+    settings.input_path = input;
+    settings.jpeg_transcode = true;
+    settings.xsize = 2268;
+    settings.ysize = 1512;
+    settings.cparams_descr = "Default";
+    all_tests.push_back(settings);
+  }
+
+#endif
+
+  {
+    RenderPipelineTestInputSettings settings;
+    settings.input_path = "jxl/grayscale_patches.png";
+    settings.xsize = 1011;
+    settings.ysize = 277;
+    settings.cparams_descr = "Patches";
+    all_tests.push_back(settings);
+  }
+
+  {
+    RenderPipelineTestInputSettings settings;
+    settings.input_path = "jxl/grayscale_patches.png";
+    settings.xsize = 1011;
+    settings.ysize = 277;
+    settings.cparams.photon_noise_iso = 1000;
+    settings.cparams_descr = "PatchesAndNoise";
+    all_tests.push_back(settings);
+  }
+
+  {
+    RenderPipelineTestInputSettings settings;
+    settings.input_path = "jxl/grayscale_patches.png";
+    settings.xsize = 1011;
+    settings.ysize = 277;
+    settings.cparams.resampling = 2;
+    settings.cparams_descr = "PatchesAndUps2";
+    all_tests.push_back(settings);
+  }
+
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const RenderPipelineTestInputSettings& c) {
+  std::string filename;
+  size_t pos = c.input_path.find_last_of('/');
+  if (pos == std::string::npos) {
+    filename = c.input_path;
+  } else {
+    filename = c.input_path.substr(pos + 1);
+  }
+  std::replace_if(
+      filename.begin(), filename.end(), [](char c) { return !isalnum(c); },
+      '_');
+  os << filename << "_" << (c.jpeg_transcode ? "JPEG_" : "") << c.xsize << "x"
+     << c.ysize << "_" << c.cparams_descr;
+  return os;
+}
+
+std::string PipelineTestDescription(
+    const testing::TestParamInfo<RenderPipelineTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RenderPipelineTest, RenderPipelineTestParam,
+                                   testing::ValuesIn(GeneratePipelineTests()),
+                                   PipelineTestDescription);
+
+TEST(RenderPipelineDecodingTest, Animation) {
+  FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+  ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+
+  PaddedBytes compressed =
+      jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl");
+
+  CodecInOut io_default;
+  ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+                         /*use_slow_pipeline=*/false, &io_default, &pool));
+  CodecInOut io_slow_pipeline;
+  ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+                         /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool));
+
+  ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size());
+  for (size_t i = 0; i < io_default.frames.size(); i++) {
+#if JXL_HIGH_PRECISION
+    constexpr float kMaxError = 1e-5;
+#else
+    constexpr float kMaxError = 1e-4;
+#endif
+
+    Image3F fast_pipeline = std::move(*io_default.frames[i].color());
+    Image3F slow_pipeline = std::move(*io_slow_pipeline.frames[i].color());
+    JXL_ASSERT_OK(VerifyRelativeError(slow_pipeline, fast_pipeline, kMaxError,
+                                      kMaxError, _))
+    for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size();
+         ec++) {
+      JXL_ASSERT_OK(VerifyRelativeError(
+          io_slow_pipeline.frames[i].extra_channels()[ec],
+          io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _));
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc
new file mode 100644
index 0000000000..4495288860
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc
@@ -0,0 +1,266 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/simple_render_pipeline.h"
+
+#include <hwy/base.h>
+
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+void SimpleRenderPipeline::PrepareForThreadsInternal(size_t num,
+                                                     bool use_group_ids) {
+  if (!channel_data_.empty()) {
+    return;
+  }
+  auto ch_size = [](size_t frame_size, size_t shift) {
+    return DivCeil(frame_size, 1 << shift) + kRenderPipelineXOffset * 2;
+  };
+  for (size_t c = 0; c < channel_shifts_[0].size(); c++) {
+    channel_data_.push_back(ImageF(
+        ch_size(frame_dimensions_.xsize_upsampled, channel_shifts_[0][c].first),
+        ch_size(frame_dimensions_.ysize_upsampled,
+                channel_shifts_[0][c].second)));
+    msan::PoisonImage(channel_data_.back());
+  }
+}
+
+Rect SimpleRenderPipeline::MakeChannelRect(size_t group_id, size_t channel) {
+  size_t base_color_shift =
+      CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded /
+                      frame_dimensions_.xsize_padded);
+
+  const size_t gx = group_id % frame_dimensions_.xsize_groups;
+  const size_t gy = group_id / frame_dimensions_.xsize_groups;
+  size_t xgroupdim = (frame_dimensions_.group_dim << base_color_shift) >>
+                     channel_shifts_[0][channel].first;
+  size_t ygroupdim = (frame_dimensions_.group_dim << base_color_shift) >>
+                     channel_shifts_[0][channel].second;
+  return Rect(
+      kRenderPipelineXOffset + gx * xgroupdim,
+      kRenderPipelineXOffset + gy * ygroupdim, xgroupdim, ygroupdim,
+      kRenderPipelineXOffset + DivCeil(frame_dimensions_.xsize_upsampled,
+                                       1 << channel_shifts_[0][channel].first),
+      kRenderPipelineXOffset +
+          DivCeil(frame_dimensions_.ysize_upsampled,
+                  1 << channel_shifts_[0][channel].second));
+}
+
+std::vector<std::pair<ImageF*, Rect>> SimpleRenderPipeline::PrepareBuffers(
+    size_t group_id, size_t thread_id) {
+  std::vector<std::pair<ImageF*, Rect>> ret;
+  for (size_t c = 0; c < channel_data_.size(); c++) {
+    ret.emplace_back(&channel_data_[c], MakeChannelRect(group_id, c));
+  }
+  return ret;
+}
+
+void SimpleRenderPipeline::ProcessBuffers(size_t group_id, size_t thread_id) {
+  for (size_t c = 0; c < channel_data_.size(); c++) {
+    Rect r = MakeChannelRect(group_id, c);
+    (void)r;
+    JXL_CHECK_PLANE_INITIALIZED(channel_data_[c], r, c);
+  }
+
+  if (PassesWithAllInput() <= processed_passes_) return;
+  processed_passes_++;
+
+  for (size_t stage_id = 0; stage_id < stages_.size(); stage_id++) {
+    const auto& stage = stages_[stage_id];
+    // Prepare buffers for kInOut channels.
+    std::vector<ImageF> new_channels(channel_data_.size());
+    std::vector<ImageF*> output_channels(channel_data_.size());
+
+    std::vector<std::pair<size_t, size_t>> input_sizes(channel_data_.size());
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      input_sizes[c] =
+          std::make_pair(channel_data_[c].xsize() - kRenderPipelineXOffset * 2,
+                         channel_data_[c].ysize() - kRenderPipelineXOffset * 2);
+    }
+
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+        continue;
+      }
+      // Ensure that the newly allocated channels are large enough to avoid
+      // problems with padding.
+      new_channels[c] =
+          ImageF(frame_dimensions_.xsize_upsampled_padded +
+                     kRenderPipelineXOffset * 2 + hwy::kMaxVectorSize * 8,
+                 frame_dimensions_.ysize_upsampled_padded +
+                     kRenderPipelineXOffset * 2);
+      new_channels[c].ShrinkTo(
+          (input_sizes[c].first << stage->settings_.shift_x) +
+              kRenderPipelineXOffset * 2,
+          (input_sizes[c].second << stage->settings_.shift_y) +
+              kRenderPipelineXOffset * 2);
+      output_channels[c] = &new_channels[c];
+    }
+
+    auto get_row = [&](size_t c, int64_t y) {
+      return channel_data_[c].Row(kRenderPipelineXOffset + y) +
+             kRenderPipelineXOffset;
+    };
+
+    // Add mirrored pixes to all kInOut channels.
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+        continue;
+      }
+      // Horizontal mirroring.
+      for (size_t y = 0; y < input_sizes[c].second; y++) {
+        float* row = get_row(c, y);
+        for (size_t ix = 0; ix < stage->settings_.border_x; ix++) {
+          *(row - ix - 1) = row[Mirror(-ssize_t(ix) - 1, input_sizes[c].first)];
+        }
+        for (size_t ix = 0; ix < stage->settings_.border_x; ix++) {
+          *(row + ix + input_sizes[c].first) =
+              row[Mirror(ix + input_sizes[c].first, input_sizes[c].first)];
+        }
+      }
+      // Vertical mirroring.
+      for (int y = 0; y < static_cast<int>(stage->settings_.border_y); y++) {
+        memcpy(get_row(c, -y - 1) - stage->settings_.border_x,
+               get_row(c, Mirror(-ssize_t(y) - 1, input_sizes[c].second)) -
+                   stage->settings_.border_x,
+               sizeof(float) *
+                   (input_sizes[c].first + 2 * stage->settings_.border_x));
+      }
+      for (int y = 0; y < static_cast<int>(stage->settings_.border_y); y++) {
+        memcpy(
+            get_row(c, input_sizes[c].second + y) - stage->settings_.border_x,
+            get_row(c,
+                    Mirror(input_sizes[c].second + y, input_sizes[c].second)) -
+                stage->settings_.border_x,
+            sizeof(float) *
+                (input_sizes[c].first + 2 * stage->settings_.border_x));
+      }
+    }
+
+    size_t ysize = 0;
+    size_t xsize = 0;
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) {
+        continue;
+      }
+      ysize = std::max(input_sizes[c].second, ysize);
+      xsize = std::max(input_sizes[c].first, xsize);
+    }
+
+    JXL_ASSERT(ysize != 0);
+    JXL_ASSERT(xsize != 0);
+
+    RenderPipelineStage::RowInfo input_rows(channel_data_.size());
+    RenderPipelineStage::RowInfo output_rows(channel_data_.size());
+
+    // Run the pipeline.
+    {
+      stage->SetInputSizes(input_sizes);
+      int border_y = stage->settings_.border_y;
+      for (size_t y = 0; y < ysize; y++) {
+        // Prepare input rows.
+        for (size_t c = 0; c < channel_data_.size(); c++) {
+          if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) {
+            continue;
+          }
+          input_rows[c].resize(2 * border_y + 1);
+          for (int iy = -border_y; iy <= border_y; iy++) {
+            input_rows[c][iy + border_y] =
+                channel_data_[c].Row(y + kRenderPipelineXOffset + iy);
+          }
+        }
+        // Prepare output rows.
+        for (size_t c = 0; c < channel_data_.size(); c++) {
+          if (!output_channels[c]) continue;
+          output_rows[c].resize(1 << stage->settings_.shift_y);
+          for (size_t iy = 0; iy < output_rows[c].size(); iy++) {
+            output_rows[c][iy] = output_channels[c]->Row(
+                (y << stage->settings_.shift_y) + iy + kRenderPipelineXOffset);
+          }
+        }
+        stage->ProcessRow(input_rows, output_rows, /*xextra=*/0, xsize,
+                          /*xpos=*/0, y, thread_id);
+      }
+    }
+
+    // Move new channels to current channels.
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+        continue;
+      }
+      channel_data_[c] = std::move(new_channels[c]);
+    }
+    for (size_t c = 0; c < channel_data_.size(); c++) {
+      size_t next_stage = std::min(stage_id + 1, channel_shifts_.size() - 1);
+      size_t xsize = DivCeil(frame_dimensions_.xsize_upsampled,
+                             1 << channel_shifts_[next_stage][c].first);
+      size_t ysize = DivCeil(frame_dimensions_.ysize_upsampled,
+                             1 << channel_shifts_[next_stage][c].second);
+      channel_data_[c].ShrinkTo(xsize + 2 * kRenderPipelineXOffset,
+                                ysize + 2 * kRenderPipelineXOffset);
+      JXL_CHECK_PLANE_INITIALIZED(
+          channel_data_[c],
+          Rect(kRenderPipelineXOffset, kRenderPipelineXOffset, xsize, ysize),
+          c);
+    }
+
+    if (stage->SwitchToImageDimensions()) {
+      size_t image_xsize, image_ysize;
+      FrameOrigin frame_origin;
+      stage->GetImageDimensions(&image_xsize, &image_ysize, &frame_origin);
+      frame_dimensions_.Set(image_xsize, image_ysize, 0, 0, 0, false, 1);
+      std::vector<ImageF> old_channels = std::move(channel_data_);
+      channel_data_.clear();
+      channel_data_.reserve(old_channels.size());
+      for (size_t c = 0; c < old_channels.size(); c++) {
+        channel_data_.emplace_back(2 * kRenderPipelineXOffset + image_xsize,
+                                   2 * kRenderPipelineXOffset + image_ysize);
+      }
+      for (size_t y = 0; y < image_ysize; ++y) {
+        for (size_t c = 0; c < channel_data_.size(); c++) {
+          output_rows[c].resize(1);
+          output_rows[c][0] = channel_data_[c].Row(kRenderPipelineXOffset + y);
+        }
+        // TODO(sboukortt): consider doing this only on the parts of the
+        // background that won't be occluded.
+        stage->ProcessPaddingRow(output_rows, image_xsize, 0, y);
+      }
+      ssize_t x0 = frame_origin.x0;
+      ssize_t y0 = frame_origin.y0;
+      size_t x0_fg = 0;
+      size_t y0_fg = 0;
+      if (x0 < 0) {
+        xsize += x0;
+        x0_fg -= x0;
+        x0 = 0;
+      }
+      if (x0 + xsize > image_xsize) {
+        xsize = image_xsize - x0;
+      }
+      if (y0 < 0) {
+        ysize += y0;
+        y0_fg -= x0;
+        y0 = 0;
+      }
+      if (y0 + ysize > image_ysize) {
+        ysize = image_ysize - y0;
+      }
+      const Rect rect_fg_relative_to_image =
+          Rect(x0, y0, xsize, ysize)
+              .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset);
+      const Rect rect_fg =
+          Rect(x0_fg, y0_fg, xsize, ysize)
+              .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset);
+      for (size_t c = 0; c < channel_data_.size(); c++) {
+        CopyImageTo(rect_fg, old_channels[c], rect_fg_relative_to_image,
+                    &channel_data_[c]);
+      }
+    }
+  }
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h
new file mode 100644
index 0000000000..10f4505912
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+namespace jxl {
+
+// A RenderPipeline that is "obviously correct"; it may use potentially large
+// amounts of memory and be slow. It is intended to be used mostly for testing
+// purposes.
+class SimpleRenderPipeline : public RenderPipeline {
+  std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+      size_t group_id, size_t thread_id) override;
+
+  void ProcessBuffers(size_t group_id, size_t thread_id) override;
+
+  void PrepareForThreadsInternal(size_t num, bool use_group_ids) override;
+
+  // Full frame buffers. Both X and Y dimensions are padded by
+  // kRenderPipelineXOffset.
+  std::vector<ImageF> channel_data_;
+  size_t processed_passes_ = 0;
+
+ private:
+  Rect MakeChannelRect(size_t group_id, size_t channel);
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc
new file mode 100644
index 0000000000..a66a60daec
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc
@@ -0,0 +1,247 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_blending.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_blending.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/blending.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class BlendingStage : public RenderPipelineStage {
+ public:
+  explicit BlendingStage(const PassesDecoderState* dec_state,
+                         const ColorEncoding& frame_color_encoding)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        state_(*dec_state->shared) {
+    image_xsize_ = state_.frame_header.nonserialized_metadata->xsize();
+    image_ysize_ = state_.frame_header.nonserialized_metadata->ysize();
+    extra_channel_info_ =
+        &state_.frame_header.nonserialized_metadata->m.extra_channel_info;
+    info_ = state_.frame_header.blending_info;
+    const std::vector<BlendingInfo>& ec_info =
+        state_.frame_header.extra_channel_blending_info;
+    const ImageBundle& bg = state_.reference_frames[info_.source].frame;
+    bg_ = &bg;
+    if (bg.xsize() == 0 || bg.ysize() == 0) {
+      zeroes_.resize(image_xsize_, 0.f);
+    } else if (state_.reference_frames[info_.source].ib_is_in_xyb) {
+      initialized_ = JXL_FAILURE(
+          "Trying to blend XYB reference frame %i and non-XYB frame",
+          info_.source);
+      return;
+    } else if (std::any_of(ec_info.begin(), ec_info.end(),
+                           [this](const BlendingInfo& info) {
+                             const ImageBundle& bg =
+                                 state_.reference_frames[info.source].frame;
+                             return bg.xsize() == 0 || bg.ysize() == 0;
+                           })) {
+      zeroes_.resize(image_xsize_, 0.f);
+    }
+
+    auto verify_bg_size = [&](const ImageBundle& bg) -> Status {
+      if (bg.xsize() != 0 && bg.ysize() != 0 &&
+          (bg.xsize() < image_xsize_ || bg.ysize() < image_ysize_ ||
+           bg.origin.x0 != 0 || bg.origin.y0 != 0)) {
+        return JXL_FAILURE("Trying to use a %" PRIuS "x%" PRIuS
+                           " crop as a background",
+                           bg.xsize(), bg.ysize());
+      }
+      return true;
+    };
+
+    Status ok = verify_bg_size(bg);
+    for (const auto& info : ec_info) {
+      const ImageBundle& bg = state_.reference_frames[info.source].frame;
+      if (!!ok) ok = verify_bg_size(bg);
+    }
+    if (!ok) {
+      initialized_ = ok;
+      return;
+    }
+
+    if (state_.metadata->m.xyb_encoded) {
+      if (!dec_state->output_encoding_info.color_encoding_is_original) {
+        initialized_ = JXL_FAILURE("Blending in unsupported color space");
+        return;
+      }
+    }
+
+    blending_info_.resize(ec_info.size() + 1);
+    auto make_blending = [&](const BlendingInfo& info, PatchBlending* pb) {
+      pb->alpha_channel = info.alpha_channel;
+      pb->clamp = info.clamp;
+      switch (info.mode) {
+        case BlendMode::kReplace: {
+          pb->mode = PatchBlendMode::kReplace;
+          break;
+        }
+        case BlendMode::kAdd: {
+          pb->mode = PatchBlendMode::kAdd;
+          break;
+        }
+        case BlendMode::kMul: {
+          pb->mode = PatchBlendMode::kMul;
+          break;
+        }
+        case BlendMode::kBlend: {
+          pb->mode = PatchBlendMode::kBlendAbove;
+          break;
+        }
+        case BlendMode::kAlphaWeightedAdd: {
+          pb->mode = PatchBlendMode::kAlphaWeightedAddAbove;
+          break;
+        }
+        default: {
+          JXL_UNREACHABLE(
+              "Invalid blend mode");  // should have failed to decode
+        }
+      }
+    };
+    make_blending(info_, &blending_info_[0]);
+    for (size_t i = 0; i < ec_info.size(); i++) {
+      make_blending(ec_info[i], &blending_info_[1 + i]);
+    }
+  }
+
+  Status IsInitialized() const override { return initialized_; }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    JXL_ASSERT(initialized_);
+    const FrameOrigin& frame_origin = state_.frame_header.frame_origin;
+    ssize_t bg_xpos = frame_origin.x0 + static_cast<ssize_t>(xpos);
+    ssize_t bg_ypos = frame_origin.y0 + static_cast<ssize_t>(ypos);
+    int offset = 0;
+    if (bg_xpos + static_cast<ssize_t>(xsize) <= 0 ||
+        frame_origin.x0 >= static_cast<ssize_t>(image_xsize_) || bg_ypos < 0 ||
+        bg_ypos >= static_cast<ssize_t>(image_ysize_)) {
+      return;
+    }
+    if (bg_xpos < 0) {
+      offset -= bg_xpos;
+      xsize += bg_xpos;
+      bg_xpos = 0;
+    }
+    if (bg_xpos + xsize > image_xsize_) {
+      xsize =
+          std::max<ssize_t>(0, static_cast<ssize_t>(image_xsize_) - bg_xpos);
+    }
+    std::vector<const float*> bg_row_ptrs_(input_rows.size());
+    std::vector<float*> fg_row_ptrs_(input_rows.size());
+    size_t num_c = std::min(input_rows.size(), extra_channel_info_->size() + 3);
+    for (size_t c = 0; c < num_c; ++c) {
+      fg_row_ptrs_[c] = GetInputRow(input_rows, c, 0) + offset;
+      if (c < 3) {
+        bg_row_ptrs_[c] = bg_->xsize() != 0 && bg_->ysize() != 0
+                              ? bg_->color().ConstPlaneRow(c, bg_ypos) + bg_xpos
+                              : zeroes_.data();
+      } else {
+        const ImageBundle& ec_bg =
+            state_
+                .reference_frames[state_.frame_header
+                                      .extra_channel_blending_info[c - 3]
+                                      .source]
+                .frame;
+        bg_row_ptrs_[c] =
+            ec_bg.xsize() != 0 && ec_bg.ysize() != 0
+                ? ec_bg.extra_channels()[c - 3].ConstRow(bg_ypos) + bg_xpos
+                : zeroes_.data();
+      }
+    }
+    PerformBlending(bg_row_ptrs_.data(), fg_row_ptrs_.data(),
+                    fg_row_ptrs_.data(), 0, xsize, blending_info_[0],
+                    blending_info_.data() + 1, *extra_channel_info_);
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return RenderPipelineChannelMode::kInPlace;
+  }
+
+  bool SwitchToImageDimensions() const override { return true; }
+
+  void GetImageDimensions(size_t* xsize, size_t* ysize,
+                          FrameOrigin* frame_origin) const override {
+    *xsize = image_xsize_;
+    *ysize = image_ysize_;
+    *frame_origin = state_.frame_header.frame_origin;
+  }
+
+  void ProcessPaddingRow(const RowInfo& output_rows, size_t xsize, size_t xpos,
+                         size_t ypos) const override {
+    if (bg_->xsize() == 0 || bg_->ysize() == 0) {
+      for (size_t c = 0; c < 3; ++c) {
+        memset(GetInputRow(output_rows, c, 0), 0, xsize * sizeof(float));
+      }
+    } else {
+      for (size_t c = 0; c < 3; ++c) {
+        memcpy(GetInputRow(output_rows, c, 0),
+               bg_->color().ConstPlaneRow(c, ypos) + xpos,
+               xsize * sizeof(float));
+      }
+    }
+    for (size_t ec = 0; ec < extra_channel_info_->size(); ++ec) {
+      const ImageBundle& ec_bg =
+          state_
+              .reference_frames
+                  [state_.frame_header.extra_channel_blending_info[ec].source]
+              .frame;
+      if (ec_bg.xsize() == 0 || ec_bg.ysize() == 0) {
+        memset(GetInputRow(output_rows, 3 + ec, 0), 0, xsize * sizeof(float));
+      } else {
+        memcpy(GetInputRow(output_rows, 3 + ec, 0),
+               ec_bg.extra_channels()[ec].ConstRow(ypos) + xpos,
+               xsize * sizeof(float));
+      }
+    }
+  }
+
+  const char* GetName() const override { return "Blending"; }
+
+ private:
+  const PassesSharedState& state_;
+  BlendingInfo info_;
+  const ImageBundle* bg_;
+  Status initialized_ = true;
+  size_t image_xsize_;
+  size_t image_ysize_;
+  std::vector<PatchBlending> blending_info_;
+  const std::vector<ExtraChannelInfo>* extra_channel_info_;
+  std::vector<float> zeroes_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+    const PassesDecoderState* dec_state,
+    const ColorEncoding& frame_color_encoding) {
+  return jxl::make_unique<BlendingStage>(dec_state, frame_color_encoding);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetBlendingStage);
+
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+    const PassesDecoderState* dec_state,
+    const ColorEncoding& frame_color_encoding) {
+  return HWY_DYNAMIC_DISPATCH(GetBlendingStage)(dec_state,
+                                                frame_color_encoding);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h
new file mode 100644
index 0000000000..c8db7490cd
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
+
+#include <utility>
+
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Applies blending if applicable.
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+    const PassesDecoderState* dec_state,
+    const ColorEncoding& frame_color_encoding);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc
new file mode 100644
index 0000000000..936fbd3a44
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc
@@ -0,0 +1,127 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_chroma_upsampling.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class HorizontalChromaUpsamplingStage : public RenderPipelineStage {
+ public:
+  explicit HorizontalChromaUpsamplingStage(size_t channel)
+      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(
+            /*shift=*/1, /*border=*/1)),
+        c_(channel) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    HWY_FULL(float) df;
+    xextra = RoundUpTo(xextra, Lanes(df));
+    auto threefour = Set(df, 0.75f);
+    auto onefour = Set(df, 0.25f);
+    const float* row_in = GetInputRow(input_rows, c_, 0);
+    float* row_out = GetOutputRow(output_rows, c_, 0);
+    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+         x += Lanes(df)) {
+      auto current = Mul(LoadU(df, row_in + x), threefour);
+      auto prev = LoadU(df, row_in + x - 1);
+      auto next = LoadU(df, row_in + x + 1);
+      auto left = MulAdd(onefour, prev, current);
+      auto right = MulAdd(onefour, next, current);
+      StoreInterleaved(df, left, right, row_out + x * 2);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c == c_ ? RenderPipelineChannelMode::kInOut
+                   : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "HChromaUps"; }
+
+ private:
+  size_t c_;
+};
+
+class VerticalChromaUpsamplingStage : public RenderPipelineStage {
+ public:
+  explicit VerticalChromaUpsamplingStage(size_t channel)
+      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(
+            /*shift=*/1, /*border=*/1)),
+        c_(channel) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    HWY_FULL(float) df;
+    xextra = RoundUpTo(xextra, Lanes(df));
+    auto threefour = Set(df, 0.75f);
+    auto onefour = Set(df, 0.25f);
+    const float* row_top = GetInputRow(input_rows, c_, -1);
+    const float* row_mid = GetInputRow(input_rows, c_, 0);
+    const float* row_bot = GetInputRow(input_rows, c_, 1);
+    float* row_out0 = GetOutputRow(output_rows, c_, 0);
+    float* row_out1 = GetOutputRow(output_rows, c_, 1);
+    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+         x += Lanes(df)) {
+      auto it = LoadU(df, row_top + x);
+      auto im = LoadU(df, row_mid + x);
+      auto ib = LoadU(df, row_bot + x);
+      auto im_scaled = Mul(im, threefour);
+      Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
+      Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c == c_ ? RenderPipelineChannelMode::kInOut
+                   : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "VChromaUps"; }
+
+ private:
+  size_t c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+                                                              bool horizontal) {
+  if (horizontal) {
+    return jxl::make_unique<HorizontalChromaUpsamplingStage>(channel);
+  } else {
+    return jxl::make_unique<VerticalChromaUpsamplingStage>(channel);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetChromaUpsamplingStage);
+
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+                                                              bool horizontal) {
+  return HWY_DYNAMIC_DISPATCH(GetChromaUpsamplingStage)(channel, horizontal);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h
new file mode 100644
index 0000000000..b8bfc15f5f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies simple upsampling, either horizontal or vertical, to the given
+// channel.
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+                                                              bool horizontal);
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc
new file mode 100644
index 0000000000..a75652db0e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc
@@ -0,0 +1,524 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_epf.h"
+
+#include "lib/jxl/epf.h"
+#include "lib/jxl/sanitizers.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_epf.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+// TODO(veluca): In principle, vectors could be not capped, if we want to deal
+// with having two different sigma values in a single vector.
+using DF = HWY_CAPPED(float, 8);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::VFromD;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+JXL_INLINE Vec<DF> Weight(Vec<DF> sad, Vec<DF> inv_sigma, Vec<DF> thres) {
+  auto v = MulAdd(sad, inv_sigma, Set(DF(), 1.0f));
+  return ZeroIfNegative(v);
+}
+
+// 5x5 plus-shaped kernel with 5 SADs per pixel (3x3 plus-shaped). So this makes
+// this filter a 7x7 filter.
+class EPF0Stage : public RenderPipelineStage {
+ public:
+  EPF0Stage(const LoopFilter& lf, const ImageF& sigma)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/3)),
+        lf_(lf),
+        sigma_(&sigma) {}
+
+  template <bool aligned>
+  JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][7], ssize_t x,
+                           Vec<DF> sad, Vec<DF> inv_sigma,
+                           Vec<DF>* JXL_RESTRICT X, Vec<DF>* JXL_RESTRICT Y,
+                           Vec<DF>* JXL_RESTRICT B,
+                           Vec<DF>* JXL_RESTRICT w) const {
+    auto cx = aligned ? Load(DF(), rows[0][3 + row] + x)
+                      : LoadU(DF(), rows[0][3 + row] + x);
+    auto cy = aligned ? Load(DF(), rows[1][3 + row] + x)
+                      : LoadU(DF(), rows[1][3 + row] + x);
+    auto cb = aligned ? Load(DF(), rows[2][3 + row] + x)
+                      : LoadU(DF(), rows[2][3 + row] + x);
+
+    auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush));
+    *w = Add(*w, weight);
+    *X = MulAdd(weight, cx, *X);
+    *Y = MulAdd(weight, cy, *Y);
+    *B = MulAdd(weight, cb, *B);
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    DF df;
+
+    using V = decltype(Zero(df));
+    V t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, tA, tB;
+    V* sads[12] = {&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7, &t8, &t9, &tA, &tB};
+
+    xextra = RoundUpTo(xextra, Lanes(df));
+    const float* JXL_RESTRICT row_sigma =
+        sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+    float sm = lf_.epf_pass0_sigma_scale * 1.65;
+    float bsm = sm * lf_.epf_border_sad_mul;
+
+    HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+                                                 sm,  sm, sm, bsm};
+    HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+                                                 bsm, bsm, bsm, bsm};
+    float* JXL_RESTRICT rows[3][7];
+    for (size_t c = 0; c < 3; c++) {
+      for (int i = 0; i < 7; i++) {
+        rows[c][i] = GetInputRow(input_rows, c, i - 3);
+      }
+    }
+
+    const float* sad_mul =
+        (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+            ? sad_mul_border
+            : sad_mul_center;
+
+    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+         x += Lanes(df)) {
+      size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+      size_t ix = (x + xpos) % kBlockDim;
+
+      if (row_sigma[bx] < kMinSigma) {
+        for (size_t c = 0; c < 3; c++) {
+          auto px = Load(df, rows[c][3 + 0] + x);
+          StoreU(px, df, GetOutputRow(output_rows, c, 0) + x);
+        }
+        continue;
+      }
+
+      const auto sm = Load(df, sad_mul + ix);
+      const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+
+      for (size_t i = 0; i < 12; i++) *sads[i] = Zero(df);
+      constexpr std::array<int, 2> sads_off[12] = {
+          {{-2, 0}}, {{-1, -1}}, {{-1, 0}}, {{-1, 1}}, {{0, -2}}, {{0, -1}},
+          {{0, 1}},  {{0, 2}},   {{1, -1}}, {{1, 0}},  {{1, 1}},  {{2, 0}},
+      };
+
+      // compute sads
+      // TODO(veluca): consider unrolling and optimizing this.
+      for (size_t c = 0; c < 3; c++) {
+        auto scale = Set(df, lf_.epf_channel_scale[c]);
+        for (size_t i = 0; i < 12; i++) {
+          auto sad = Zero(df);
+          constexpr std::array<int, 2> plus_off[] = {
+              {{0, 0}}, {{-1, 0}}, {{0, -1}}, {{1, 0}}, {{0, 1}}};
+          for (size_t j = 0; j < 5; j++) {
+            const auto r11 =
+                LoadU(df, rows[c][3 + plus_off[j][0]] + x + plus_off[j][1]);
+            const auto c11 =
+                LoadU(df, rows[c][3 + sads_off[i][0] + plus_off[j][0]] + x +
+                              sads_off[i][1] + plus_off[j][1]);
+            sad = Add(sad, AbsDiff(r11, c11));
+          }
+          *sads[i] = MulAdd(sad, scale, *sads[i]);
+        }
+      }
+      const auto x_cc = Load(df, rows[0][3 + 0] + x);
+      const auto y_cc = Load(df, rows[1][3 + 0] + x);
+      const auto b_cc = Load(df, rows[2][3 + 0] + x);
+
+      auto w = Set(df, 1);
+      auto X = x_cc;
+      auto Y = y_cc;
+      auto B = b_cc;
+
+      for (size_t i = 0; i < 12; i++) {
+        AddPixel</*aligned=*/false>(/*row=*/sads_off[i][0], rows,
+                                    x + sads_off[i][1], *sads[i], inv_sigma, &X,
+                                    &Y, &B, &w);
+      }
+#if JXL_HIGH_PRECISION
+      auto inv_w = Div(Set(df, 1.0f), w);
+#else
+      auto inv_w = ApproximateReciprocal(w);
+#endif
+      StoreU(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+      StoreU(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+      StoreU(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInOut
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "EPF0"; }
+
+ private:
+  LoopFilter lf_;
+  const ImageF* sigma_;
+};
+
+// 3x3 plus-shaped kernel with 5 SADs per pixel (also 3x3 plus-shaped). So this
+// makes this filter a 5x5 filter.
+class EPF1Stage : public RenderPipelineStage {
+ public:
+  EPF1Stage(const LoopFilter& lf, const ImageF& sigma)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/2)),
+        lf_(lf),
+        sigma_(&sigma) {}
+
+  template <bool aligned>
+  JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][5], ssize_t x,
+                           Vec<DF> sad, Vec<DF> inv_sigma,
+                           Vec<DF>* JXL_RESTRICT X, Vec<DF>* JXL_RESTRICT Y,
+                           Vec<DF>* JXL_RESTRICT B,
+                           Vec<DF>* JXL_RESTRICT w) const {
+    auto cx = aligned ? Load(DF(), rows[0][2 + row] + x)
+                      : LoadU(DF(), rows[0][2 + row] + x);
+    auto cy = aligned ? Load(DF(), rows[1][2 + row] + x)
+                      : LoadU(DF(), rows[1][2 + row] + x);
+    auto cb = aligned ? Load(DF(), rows[2][2 + row] + x)
+                      : LoadU(DF(), rows[2][2 + row] + x);
+
+    auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush));
+    *w = Add(*w, weight);
+    *X = MulAdd(weight, cx, *X);
+    *Y = MulAdd(weight, cy, *Y);
+    *B = MulAdd(weight, cb, *B);
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    DF df;
+    xextra = RoundUpTo(xextra, Lanes(df));
+    const float* JXL_RESTRICT row_sigma =
+        sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+    float sm = 1.65f;
+    float bsm = sm * lf_.epf_border_sad_mul;
+
+    HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+                                                 sm,  sm, sm, bsm};
+    HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+                                                 bsm, bsm, bsm, bsm};
+
+    float* JXL_RESTRICT rows[3][5];
+    for (size_t c = 0; c < 3; c++) {
+      for (int i = 0; i < 5; i++) {
+        rows[c][i] = GetInputRow(input_rows, c, i - 2);
+      }
+    }
+
+    const float* sad_mul =
+        (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+            ? sad_mul_border
+            : sad_mul_center;
+
+    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+         x += Lanes(df)) {
+      size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+      size_t ix = (x + xpos) % kBlockDim;
+
+      if (row_sigma[bx] < kMinSigma) {
+        for (size_t c = 0; c < 3; c++) {
+          auto px = Load(df, rows[c][2 + 0] + x);
+          Store(px, df, GetOutputRow(output_rows, c, 0) + x);
+        }
+        continue;
+      }
+
+      const auto sm = Load(df, sad_mul + ix);
+      const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+      auto sad0 = Zero(df);
+      auto sad1 = Zero(df);
+      auto sad2 = Zero(df);
+      auto sad3 = Zero(df);
+
+      // compute sads
+      for (size_t c = 0; c < 3; c++) {
+        // center px = 22, px above = 21
+        auto t = Undefined(df);
+
+        const auto p20 = Load(df, rows[c][2 + -2] + x);
+        const auto p21 = Load(df, rows[c][2 + -1] + x);
+        auto sad0c = AbsDiff(p20, p21);  // SAD 2, 1
+
+        const auto p11 = LoadU(df, rows[c][2 + -1] + x - 1);
+        auto sad1c = AbsDiff(p11, p21);  // SAD 1, 2
+
+        const auto p31 = LoadU(df, rows[c][2 + -1] + x + 1);
+        auto sad2c = AbsDiff(p31, p21);  // SAD 3, 2
+
+        const auto p02 = LoadU(df, rows[c][2 + 0] + x - 2);
+        const auto p12 = LoadU(df, rows[c][2 + 0] + x - 1);
+        sad1c = Add(sad1c, AbsDiff(p02, p12));  // SAD 1, 2
+        sad0c = Add(sad0c, AbsDiff(p11, p12));  // SAD 2, 1
+
+        const auto p22 = LoadU(df, rows[c][2 + 0] + x);
+        t = AbsDiff(p12, p22);
+        sad1c = Add(sad1c, t);  // SAD 1, 2
+        sad2c = Add(sad2c, t);  // SAD 3, 2
+        t = AbsDiff(p22, p21);
+        auto sad3c = t;  // SAD 2, 3
+        sad0c = Add(sad0c, t);  // SAD 2, 1
+
+        const auto p32 = LoadU(df, rows[c][2 + 0] + x + 1);
+        sad0c = Add(sad0c, AbsDiff(p31, p32));  // SAD 2, 1
+        t = AbsDiff(p22, p32);
+        sad1c = Add(sad1c, t);  // SAD 1, 2
+        sad2c = Add(sad2c, t);  // SAD 3, 2
+
+        const auto p42 = LoadU(df, rows[c][2 + 0] + x + 2);
+        sad2c = Add(sad2c, AbsDiff(p42, p32));  // SAD 3, 2
+
+        const auto p13 = LoadU(df, rows[c][2 + 1] + x - 1);
+        sad3c = Add(sad3c, AbsDiff(p13, p12));  // SAD 2, 3
+
+        const auto p23 = Load(df, rows[c][2 + 1] + x);
+        t = AbsDiff(p22, p23);
+        sad0c = Add(sad0c, t);                  // SAD 2, 1
+        sad3c = Add(sad3c, t);                  // SAD 2, 3
+        sad1c = Add(sad1c, AbsDiff(p13, p23));  // SAD 1, 2
+
+        const auto p33 = LoadU(df, rows[c][2 + 1] + x + 1);
+        sad2c = Add(sad2c, AbsDiff(p33, p23));  // SAD 3, 2
+        sad3c = Add(sad3c, AbsDiff(p33, p32));  // SAD 2, 3
+
+        const auto p24 = Load(df, rows[c][2 + 2] + x);
+        sad3c = Add(sad3c, AbsDiff(p24, p23));  // SAD 2, 3
+
+        auto scale = Set(df, lf_.epf_channel_scale[c]);
+        sad0 = MulAdd(sad0c, scale, sad0);
+        sad1 = MulAdd(sad1c, scale, sad1);
+        sad2 = MulAdd(sad2c, scale, sad2);
+        sad3 = MulAdd(sad3c, scale, sad3);
+      }
+      const auto x_cc = Load(df, rows[0][2 + 0] + x);
+      const auto y_cc = Load(df, rows[1][2 + 0] + x);
+      const auto b_cc = Load(df, rows[2][2 + 0] + x);
+
+      auto w = Set(df, 1);
+      auto X = x_cc;
+      auto Y = y_cc;
+      auto B = b_cc;
+
+      // Top row
+      AddPixel</*aligned=*/true>(/*row=*/-1, rows, x, sad0, inv_sigma, &X, &Y,
+                                 &B, &w);
+      // Center
+      AddPixel</*aligned=*/false>(/*row=*/0, rows, x - 1, sad1, inv_sigma, &X,
+                                  &Y, &B, &w);
+      AddPixel</*aligned=*/false>(/*row=*/0, rows, x + 1, sad2, inv_sigma, &X,
+                                  &Y, &B, &w);
+      // Bottom
+      AddPixel</*aligned=*/true>(/*row=*/1, rows, x, sad3, inv_sigma, &X, &Y,
+                                 &B, &w);
+#if JXL_HIGH_PRECISION
+      auto inv_w = Div(Set(df, 1.0f), w);
+#else
+      auto inv_w = ApproximateReciprocal(w);
+#endif
+      Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+      Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+      Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInOut
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "EPF1"; }
+
+ private:
+  LoopFilter lf_;
+  const ImageF* sigma_;
+};
+
+// 3x3 plus-shaped kernel with 1 SAD per pixel. So this makes this filter a 3x3
+// filter.
+class EPF2Stage : public RenderPipelineStage {
+ public:
+  EPF2Stage(const LoopFilter& lf, const ImageF& sigma)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/1)),
+        lf_(lf),
+        sigma_(&sigma) {}
+
+  template <bool aligned>
+  JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][3], ssize_t x,
+                           Vec<DF> rx, Vec<DF> ry, Vec<DF> rb,
+                           Vec<DF> inv_sigma, Vec<DF>* JXL_RESTRICT X,
+                           Vec<DF>* JXL_RESTRICT Y, Vec<DF>* JXL_RESTRICT B,
+                           Vec<DF>* JXL_RESTRICT w) const {
+    auto cx = aligned ? Load(DF(), rows[0][1 + row] + x)
+                      : LoadU(DF(), rows[0][1 + row] + x);
+    auto cy = aligned ? Load(DF(), rows[1][1 + row] + x)
+                      : LoadU(DF(), rows[1][1 + row] + x);
+    auto cb = aligned ? Load(DF(), rows[2][1 + row] + x)
+                      : LoadU(DF(), rows[2][1 + row] + x);
+
+    auto sad = Mul(AbsDiff(cx, rx), Set(DF(), lf_.epf_channel_scale[0]));
+    sad = MulAdd(AbsDiff(cy, ry), Set(DF(), lf_.epf_channel_scale[1]), sad);
+    sad = MulAdd(AbsDiff(cb, rb), Set(DF(), lf_.epf_channel_scale[2]), sad);
+
+    auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass2_zeroflush));
+
+    *w = Add(*w, weight);
+    *X = MulAdd(weight, cx, *X);
+    *Y = MulAdd(weight, cy, *Y);
+    *B = MulAdd(weight, cb, *B);
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    DF df;
+    xextra = RoundUpTo(xextra, Lanes(df));
+    const float* JXL_RESTRICT row_sigma =
+        sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+    float sm = lf_.epf_pass2_sigma_scale * 1.65;
+    float bsm = sm * lf_.epf_border_sad_mul;
+
+    HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+                                                 sm,  sm, sm, bsm};
+    HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+                                                 bsm, bsm, bsm, bsm};
+
+    float* JXL_RESTRICT rows[3][3];
+    for (size_t c = 0; c < 3; c++) {
+      for (int i = 0; i < 3; i++) {
+        rows[c][i] = GetInputRow(input_rows, c, i - 1);
+      }
+    }
+
+    const float* sad_mul =
+        (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+            ? sad_mul_border
+            : sad_mul_center;
+
+    for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+         x += Lanes(df)) {
+      size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+      size_t ix = (x + xpos) % kBlockDim;
+
+      if (row_sigma[bx] < kMinSigma) {
+        for (size_t c = 0; c < 3; c++) {
+          auto px = Load(df, rows[c][1 + 0] + x);
+          Store(px, df, GetOutputRow(output_rows, c, 0) + x);
+        }
+        continue;
+      }
+
+      const auto sm = Load(df, sad_mul + ix);
+      const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+
+      const auto x_cc = Load(df, rows[0][1 + 0] + x);
+      const auto y_cc = Load(df, rows[1][1 + 0] + x);
+      const auto b_cc = Load(df, rows[2][1 + 0] + x);
+
+      auto w = Set(df, 1);
+      auto X = x_cc;
+      auto Y = y_cc;
+      auto B = b_cc;
+
+      // Top row
+      AddPixel</*aligned=*/true>(/*row=*/-1, rows, x, x_cc, y_cc, b_cc,
+                                 inv_sigma, &X, &Y, &B, &w);
+      // Center
+      AddPixel</*aligned=*/false>(/*row=*/0, rows, x - 1, x_cc, y_cc, b_cc,
+                                  inv_sigma, &X, &Y, &B, &w);
+      AddPixel</*aligned=*/false>(/*row=*/0, rows, x + 1, x_cc, y_cc, b_cc,
+                                  inv_sigma, &X, &Y, &B, &w);
+      // Bottom
+      AddPixel</*aligned=*/true>(/*row=*/1, rows, x, x_cc, y_cc, b_cc,
+                                 inv_sigma, &X, &Y, &B, &w);
+#if JXL_HIGH_PRECISION
+      auto inv_w = Div(Set(df, 1.0f), w);
+#else
+      auto inv_w = ApproximateReciprocal(w);
+#endif
+      Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+      Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+      Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInOut
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "EPF2"; }
+
+ private:
+  LoopFilter lf_;
+  const ImageF* sigma_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage0(const LoopFilter& lf,
+                                                  const ImageF& sigma) {
+  return jxl::make_unique<EPF0Stage>(lf, sigma);
+}
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage1(const LoopFilter& lf,
+                                                  const ImageF& sigma) {
+  return jxl::make_unique<EPF1Stage>(lf, sigma);
+}
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage2(const LoopFilter& lf,
+                                                  const ImageF& sigma) {
+  return jxl::make_unique<EPF2Stage>(lf, sigma);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetEPFStage0);
+HWY_EXPORT(GetEPFStage1);
+HWY_EXPORT(GetEPFStage2);
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage(const LoopFilter& lf,
+                                                 const ImageF& sigma,
+                                                 size_t epf_stage) {
+  JXL_ASSERT(lf.epf_iters != 0);
+  switch (epf_stage) {
+    case 0:
+      return HWY_DYNAMIC_DISPATCH(GetEPFStage0)(lf, sigma);
+    case 1:
+      return HWY_DYNAMIC_DISPATCH(GetEPFStage1)(lf, sigma);
+    case 2:
+      return HWY_DYNAMIC_DISPATCH(GetEPFStage2)(lf, sigma);
+    default:
+      JXL_UNREACHABLE("Invalid EPF stage");
+  }
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h
new file mode 100644
index 0000000000..c9d0d0c785
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies the `epf_stage`-th EPF step with the given settings and `sigma`.
+// `sigma` will be accessed with an offset of (kSigmaPadding, kSigmaPadding),
+// and should have (kSigmaBorder, kSigmaBorder) mirrored sigma values available
+// around the main image. See also filters.(h|cc)
+std::unique_ptr<RenderPipelineStage> GetEPFStage(const LoopFilter& lf,
+                                                 const ImageF& sigma,
+                                                 size_t epf_stage);
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc
new file mode 100644
index 0000000000..bc8f1ad8db
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc
@@ -0,0 +1,190 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_from_linear.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_from_linear.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <typename Op>
+struct PerChannelOp {
+  explicit PerChannelOp(Op op) : op(op) {}
+  template <typename D, typename T>
+  void Transform(D d, T* r, T* g, T* b) const {
+    *r = op.Transform(d, *r);
+    *g = op.Transform(d, *g);
+    *b = op.Transform(d, *b);
+  }
+
+  Op op;
+};
+template <typename Op>
+PerChannelOp<Op> MakePerChannelOp(Op&& op) {
+  return PerChannelOp<Op>(std::forward<Op>(op));
+}
+
+struct OpLinear {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) const {
+    return linear;
+  }
+};
+
+struct OpRgb {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) const {
+#if JXL_HIGH_PRECISION
+    return TF_SRGB().EncodedFromDisplay(d, linear);
+#else
+    return FastLinearToSRGB(d, linear);
+#endif
+  }
+};
+
+struct OpPq {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) const {
+    return TF_PQ().EncodedFromDisplay(d, linear);
+  }
+};
+
+struct OpHlg {
+  explicit OpHlg(const float luminances[3], const float intensity_target)
+      : hlg_ootf_(HlgOOTF::ToSceneLight(/*display_luminance=*/intensity_target,
+                                        luminances)) {}
+
+  template <typename D, typename T>
+  void Transform(D d, T* r, T* g, T* b) const {
+    hlg_ootf_.Apply(r, g, b);
+    *r = TF_HLG().EncodedFromDisplay(d, *r);
+    *g = TF_HLG().EncodedFromDisplay(d, *g);
+    *b = TF_HLG().EncodedFromDisplay(d, *b);
+  }
+  HlgOOTF hlg_ootf_;
+};
+
+struct Op709 {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) const {
+    return TF_709().EncodedFromDisplay(d, linear);
+  }
+};
+
+struct OpGamma {
+  const float inverse_gamma;
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) const {
+    return IfThenZeroElse(Le(linear, Set(d, 1e-5f)),
+                          FastPowf(d, linear, Set(d, inverse_gamma)));
+  }
+};
+
+template <typename Op>
+class FromLinearStage : public RenderPipelineStage {
+ public:
+  explicit FromLinearStage(Op op)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        op_(std::move(op)) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) d;
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+    float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+    // All calculations are lane-wise, still some might require
+    // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+    // vector tail.
+    msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+    for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+      auto r = LoadU(d, row0 + x);
+      auto g = LoadU(d, row1 + x);
+      auto b = LoadU(d, row2 + x);
+      op_.Transform(d, &r, &g, &b);
+      StoreU(r, d, row0 + x);
+      StoreU(g, d, row1 + x);
+      StoreU(b, d, row2 + x);
+    }
+    msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "FromLinear"; }
+
+ private:
+  Op op_;
+};
+
+template <typename Op>
+std::unique_ptr<FromLinearStage<Op>> MakeFromLinearStage(Op&& op) {
+  return jxl::make_unique<FromLinearStage<Op>>(std::forward<Op>(op));
+}
+
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  if (output_encoding_info.color_encoding.tf.IsLinear()) {
+    return MakeFromLinearStage(MakePerChannelOp(OpLinear()));
+  } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
+    return MakeFromLinearStage(MakePerChannelOp(OpRgb()));
+  } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
+    return MakeFromLinearStage(MakePerChannelOp(OpPq()));
+  } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
+    return MakeFromLinearStage(
+        OpHlg(output_encoding_info.luminances,
+              output_encoding_info.desired_intensity_target));
+  } else if (output_encoding_info.color_encoding.tf.Is709()) {
+    return MakeFromLinearStage(MakePerChannelOp(Op709()));
+  } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
+             output_encoding_info.color_encoding.tf.IsDCI()) {
+    return MakeFromLinearStage(
+        MakePerChannelOp(OpGamma{output_encoding_info.inverse_gamma}));
+  } else {
+    // This is a programming error.
+    JXL_UNREACHABLE("Invalid target encoding");
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetFromLinearStage);
+
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  return HWY_DYNAMIC_DISPATCH(GetFromLinearStage)(output_encoding_info);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h
new file mode 100644
index 0000000000..548ab50b8c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from linear to the specified output encoding.
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+    const OutputEncodingInfo& output_encoding_info);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc
new file mode 100644
index 0000000000..0917db3f9a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc
@@ -0,0 +1,120 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_gaborish.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_gaborish.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class GaborishStage : public RenderPipelineStage {
+ public:
+  explicit GaborishStage(const LoopFilter& lf)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/1)) {
+    weights_[0] = 1;
+    weights_[1] = lf.gab_x_weight1;
+    weights_[2] = lf.gab_x_weight2;
+    weights_[3] = 1;
+    weights_[4] = lf.gab_y_weight1;
+    weights_[5] = lf.gab_y_weight2;
+    weights_[6] = 1;
+    weights_[7] = lf.gab_b_weight1;
+    weights_[8] = lf.gab_b_weight2;
+    // Normalize
+    for (size_t c = 0; c < 3; c++) {
+      const float div =
+          weights_[3 * c] + 4 * (weights_[3 * c + 1] + weights_[3 * c + 2]);
+      const float mul = 1.0f / div;
+      weights_[3 * c] *= mul;
+      weights_[3 * c + 1] *= mul;
+      weights_[3 * c + 2] *= mul;
+    }
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) d;
+    for (size_t c = 0; c < 3; c++) {
+      float* JXL_RESTRICT row_t = GetInputRow(input_rows, c, -1);
+      float* JXL_RESTRICT row_m = GetInputRow(input_rows, c, 0);
+      float* JXL_RESTRICT row_b = GetInputRow(input_rows, c, 1);
+      float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0);
+      const auto w0 = Set(d, weights_[3 * c + 0]);
+      const auto w1 = Set(d, weights_[3 * c + 1]);
+      const auto w2 = Set(d, weights_[3 * c + 2]);
+// Group data need only be aligned to a block; for >=512 bit vectors, this may
+// result in unaligned loads.
+#if HWY_CAP_GE512
+#define LoadMaybeU LoadU
+#else
+#define LoadMaybeU Load
+#endif
+      // Since GetInputRow(input_rows, c, {-1, 0, 1}) is aligned, rounding
+      // xextra up to Lanes(d) doesn't access anything problematic.
+      for (ssize_t x = -RoundUpTo(xextra, Lanes(d));
+           x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+        const auto t = LoadMaybeU(d, row_t + x);
+        const auto tl = LoadU(d, row_t + x - 1);
+        const auto tr = LoadU(d, row_t + x + 1);
+        const auto m = LoadMaybeU(d, row_m + x);
+        const auto l = LoadU(d, row_m + x - 1);
+        const auto r = LoadU(d, row_m + x + 1);
+        const auto b = LoadMaybeU(d, row_b + x);
+        const auto bl = LoadU(d, row_b + x - 1);
+        const auto br = LoadU(d, row_b + x + 1);
+        const auto sum0 = m;
+        const auto sum1 = Add(Add(l, r), Add(t, b));
+        const auto sum2 = Add(Add(tl, tr), Add(bl, br));
+        auto pixels = MulAdd(sum2, w2, MulAdd(sum1, w1, Mul(sum0, w0)));
+        Store(pixels, d, row_out + x);
+      }
+    }
+  }
+#undef LoadMaybeU
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInOut
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "Gab"; }
+
+ private:
+  float weights_[9];
+};
+
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf) {
+  return jxl::make_unique<GaborishStage>(lf);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetGaborishStage);
+
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf) {
+  JXL_ASSERT(lf.gab == 1);
+  return HWY_DYNAMIC_DISPATCH(GetGaborishStage)(lf);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h
new file mode 100644
index 0000000000..761800f668
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies decoder-side Gaborish with the given settings. `lf.gab` must be 1.
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf);
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc
new file mode 100644
index 0000000000..62abd8fb0f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc
@@ -0,0 +1,306 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_noise.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_noise.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+using D = HWY_CAPPED(float, kBlockDim);
+using DI = hwy::HWY_NAMESPACE::Rebind<int32_t, D>;
+using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
+
+// [0, max_value]
+template <class D, class V>
+static HWY_INLINE V Clamp0ToMax(D d, const V x, const V max_value) {
+  const auto clamped = Min(x, max_value);
+  return ZeroIfNegative(clamped);
+}
+
+// x is in [0+delta, 1+delta], delta ~= 0.06
+template <class StrengthEval>
+typename StrengthEval::V NoiseStrength(const StrengthEval& eval,
+                                       const typename StrengthEval::V x) {
+  return Clamp0ToMax(D(), eval(x), Set(D(), 1.0f));
+}
+
+// TODO(veluca): SIMD-fy.
+class StrengthEvalLut {
+ public:
+  using V = Vec<D>;
+
+  explicit StrengthEvalLut(const NoiseParams& noise_params)
+#if HWY_TARGET == HWY_SCALAR
+      : noise_params_(noise_params)
+#endif
+  {
+#if HWY_TARGET != HWY_SCALAR
+    uint32_t lut[8];
+    memcpy(lut, noise_params.lut, sizeof(lut));
+    for (size_t i = 0; i < 8; i++) {
+      low16_lut[2 * i] = (lut[i] >> 0) & 0xFF;
+      low16_lut[2 * i + 1] = (lut[i] >> 8) & 0xFF;
+      high16_lut[2 * i] = (lut[i] >> 16) & 0xFF;
+      high16_lut[2 * i + 1] = (lut[i] >> 24) & 0xFF;
+    }
+#endif
+  }
+
+  V operator()(const V vx) const {
+    constexpr size_t kScale = NoiseParams::kNumNoisePoints - 2;
+    auto scaled_vx = Max(Zero(D()), Mul(vx, Set(D(), kScale)));
+    auto floor_x = Floor(scaled_vx);
+    auto frac_x = Sub(scaled_vx, floor_x);
+    floor_x = IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), kScale),
+                         floor_x);
+    frac_x =
+        IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), 1), frac_x);
+    auto floor_x_int = ConvertTo(DI(), floor_x);
+#if HWY_TARGET == HWY_SCALAR
+    auto low = Set(D(), noise_params_.lut[floor_x_int.raw]);
+    auto hi = Set(D(), noise_params_.lut[floor_x_int.raw + 1]);
+#else
+    // Set each lane's bytes to {0, 0, 2x+1, 2x}.
+    auto floorx_indices_low =
+        Add(Mul(floor_x_int, Set(DI(), 0x0202)), Set(DI(), 0x0100));
+    // Set each lane's bytes to {2x+1, 2x, 0, 0}.
+    auto floorx_indices_hi =
+        Add(Mul(floor_x_int, Set(DI(), 0x02020000)), Set(DI(), 0x01000000));
+    // load LUT
+    auto low16 = BitCast(DI(), LoadDup128(DI8(), low16_lut));
+    auto lowm = Set(DI(), 0xFFFF);
+    auto hi16 = BitCast(DI(), LoadDup128(DI8(), high16_lut));
+    auto him = Set(DI(), 0xFFFF0000);
+    // low = noise_params.lut[floor_x]
+    auto low =
+        BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm),
+                        And(TableLookupBytes(hi16, floorx_indices_hi), him)));
+    // hi = noise_params.lut[floor_x+1]
+    floorx_indices_low = Add(floorx_indices_low, Set(DI(), 0x0202));
+    floorx_indices_hi = Add(floorx_indices_hi, Set(DI(), 0x02020000));
+    auto hi =
+        BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm),
+                        And(TableLookupBytes(hi16, floorx_indices_hi), him)));
+#endif
+    return MulAdd(Sub(hi, low), frac_x, low);
+  }
+
+ private:
+#if HWY_TARGET != HWY_SCALAR
+  // noise_params.lut transformed into two 16-bit lookup tables.
+  HWY_ALIGN uint8_t high16_lut[16];
+  HWY_ALIGN uint8_t low16_lut[16];
+#else
+  const NoiseParams& noise_params_;
+#endif
+};
+
+template <class D>
+void AddNoiseToRGB(const D d, const Vec<D> rnd_noise_r,
+                   const Vec<D> rnd_noise_g, const Vec<D> rnd_noise_cor,
+                   const Vec<D> noise_strength_g, const Vec<D> noise_strength_r,
+                   float ytox, float ytob, float* JXL_RESTRICT out_x,
+                   float* JXL_RESTRICT out_y, float* JXL_RESTRICT out_b) {
+  const auto kRGCorr = Set(d, 0.9921875f);   // 127/128
+  const auto kRGNCorr = Set(d, 0.0078125f);  // 1/128
+
+  const auto red_noise =
+      Mul(noise_strength_r,
+          MulAdd(kRGNCorr, rnd_noise_r, Mul(kRGCorr, rnd_noise_cor)));
+  const auto green_noise =
+      Mul(noise_strength_g,
+          MulAdd(kRGNCorr, rnd_noise_g, Mul(kRGCorr, rnd_noise_cor)));
+
+  auto vx = LoadU(d, out_x);
+  auto vy = LoadU(d, out_y);
+  auto vb = LoadU(d, out_b);
+
+  const auto rg_noise = Add(red_noise, green_noise);
+  vx = Add(MulAdd(Set(d, ytox), rg_noise, Sub(red_noise, green_noise)), vx);
+  vy = Add(vy, rg_noise);
+  vb = MulAdd(Set(d, ytob), rg_noise, vb);
+
+  StoreU(vx, d, out_x);
+  StoreU(vy, d, out_y);
+  StoreU(vb, d, out_b);
+}
+
+class AddNoiseStage : public RenderPipelineStage {
+ public:
+  AddNoiseStage(const NoiseParams& noise_params,
+                const ColorCorrelationMap& cmap, size_t first_c)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/0)),
+        noise_params_(noise_params),
+        cmap_(cmap),
+        first_c_(first_c) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    if (!noise_params_.HasAny()) return;
+    const StrengthEvalLut noise_model(noise_params_);
+    D d;
+    const auto half = Set(d, 0.5f);
+
+    // With the prior subtract-random Laplacian approximation, rnd_* ranges were
+    // about [-1.5, 1.6]; Laplacian3 about doubles this to [-3.6, 3.6], so the
+    // normalizer is half of what it was before (0.5).
+    const auto norm_const = Set(d, 0.22f);
+
+    float ytox = cmap_.YtoXRatio(0);
+    float ytob = cmap_.YtoBRatio(0);
+
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+
+    float* JXL_RESTRICT row_x = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row_y = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row_b = GetInputRow(input_rows, 2, 0);
+    const float* JXL_RESTRICT row_rnd_r =
+        GetInputRow(input_rows, first_c_ + 0, 0);
+    const float* JXL_RESTRICT row_rnd_g =
+        GetInputRow(input_rows, first_c_ + 1, 0);
+    const float* JXL_RESTRICT row_rnd_c =
+        GetInputRow(input_rows, first_c_ + 2, 0);
+    // Needed by the calls to Floor() in StrengthEvalLut. Only arithmetic and
+    // shuffles are otherwise done on the data, so this is safe.
+    msan::UnpoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::UnpoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+    for (size_t x = 0; x < xsize_v; x += Lanes(d)) {
+      const auto vx = LoadU(d, row_x + x);
+      const auto vy = LoadU(d, row_y + x);
+      const auto in_g = Sub(vy, vx);
+      const auto in_r = Add(vy, vx);
+      const auto noise_strength_g = NoiseStrength(noise_model, Mul(in_g, half));
+      const auto noise_strength_r = NoiseStrength(noise_model, Mul(in_r, half));
+      const auto addit_rnd_noise_red = Mul(LoadU(d, row_rnd_r + x), norm_const);
+      const auto addit_rnd_noise_green =
+          Mul(LoadU(d, row_rnd_g + x), norm_const);
+      const auto addit_rnd_noise_correlated =
+          Mul(LoadU(d, row_rnd_c + x), norm_const);
+      AddNoiseToRGB(D(), addit_rnd_noise_red, addit_rnd_noise_green,
+                    addit_rnd_noise_correlated, noise_strength_g,
+                    noise_strength_r, ytox, ytob, row_x + x, row_y + x,
+                    row_b + x);
+    }
+    msan::PoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::PoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::PoisonMemory(row_b + xsize, (xsize_v - xsize) * sizeof(float));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c >= first_c_ ? RenderPipelineChannelMode::kInput
+           : c < 3       ? RenderPipelineChannelMode::kInPlace
+                         : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "AddNoise"; }
+
+ private:
+  const NoiseParams& noise_params_;
+  const ColorCorrelationMap& cmap_;
+  size_t first_c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+    const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+    size_t noise_c_start) {
+  return jxl::make_unique<AddNoiseStage>(noise_params, cmap, noise_c_start);
+}
+
+class ConvolveNoiseStage : public RenderPipelineStage {
+ public:
+  explicit ConvolveNoiseStage(size_t first_c)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/0, /*border=*/2)),
+        first_c_(first_c) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) d;
+    for (size_t c = first_c_; c < first_c_ + 3; c++) {
+      float* JXL_RESTRICT rows[5];
+      for (size_t i = 0; i < 5; i++) {
+        rows[i] = GetInputRow(input_rows, c, i - 2);
+      }
+      float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0);
+      for (ssize_t x = -RoundUpTo(xextra, Lanes(d));
+           x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+        const auto p00 = LoadU(d, rows[2] + x);
+        auto others = Zero(d);
+        // TODO(eustas): sum loaded values to reduce the calculation chain
+        for (ssize_t i = -2; i <= 2; i++) {
+          others = Add(others, LoadU(d, rows[0] + x + i));
+          others = Add(others, LoadU(d, rows[1] + x + i));
+          others = Add(others, LoadU(d, rows[3] + x + i));
+          others = Add(others, LoadU(d, rows[4] + x + i));
+        }
+        others = Add(others, LoadU(d, rows[2] + x - 2));
+        others = Add(others, LoadU(d, rows[2] + x - 1));
+        others = Add(others, LoadU(d, rows[2] + x + 1));
+        others = Add(others, LoadU(d, rows[2] + x + 2));
+        // 4 * (1 - box kernel)
+        auto pixels = MulAdd(others, Set(d, 0.16), Mul(p00, Set(d, -3.84)));
+        StoreU(pixels, d, row_out + x);
+      }
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c >= first_c_ ? RenderPipelineChannelMode::kInOut
+                         : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "ConvNoise"; }
+
+ private:
+  size_t first_c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+    size_t noise_c_start) {
+  return jxl::make_unique<ConvolveNoiseStage>(noise_c_start);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetAddNoiseStage);
+HWY_EXPORT(GetConvolveNoiseStage);
+
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+    const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+    size_t noise_c_start) {
+  return HWY_DYNAMIC_DISPATCH(GetAddNoiseStage)(noise_params, cmap,
+                                                noise_c_start);
+}
+
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+    size_t noise_c_start) {
+  return HWY_DYNAMIC_DISPATCH(GetConvolveNoiseStage)(noise_c_start);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h
new file mode 100644
index 0000000000..bd7797f991
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Adds noise to color channels.
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+    const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+    size_t noise_c_start);
+
+// Applies a 5x5 subtract-box-filter convolution to the noise input channels.
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+    size_t noise_c_start);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc
new file mode 100644
index 0000000000..c5a75b09f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_patches.h"
+
+namespace jxl {
+namespace {
+class PatchDictionaryStage : public RenderPipelineStage {
+ public:
+  PatchDictionaryStage(const PatchDictionary* patches, size_t num_channels)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        patches_(*patches),
+        num_channels_(num_channels) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    JXL_ASSERT(xpos == 0 || xpos >= xextra);
+    size_t x0 = xpos ? xpos - xextra : 0;
+    std::vector<float*> row_ptrs(num_channels_);
+    for (size_t i = 0; i < num_channels_; i++) {
+      row_ptrs[i] = GetInputRow(input_rows, i, 0) + x0 - xpos;
+    }
+    patches_.AddOneRow(row_ptrs.data(), ypos, x0, xsize + xextra + xpos - x0);
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < num_channels_ ? RenderPipelineChannelMode::kInPlace
+                             : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "Patches"; }
+
+ private:
+  const PatchDictionary& patches_;
+  const size_t num_channels_;
+};
+}  // namespace
+
+std::unique_ptr<RenderPipelineStage> GetPatchesStage(
+    const PatchDictionary* patches, size_t num_channels) {
+  return jxl::make_unique<PatchDictionaryStage>(patches, num_channels);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h
new file mode 100644
index 0000000000..b35abdc2eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
+
+#include <utility>
+
+#include "lib/jxl/patch_dictionary_internal.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Draws patches if applicable.
+std::unique_ptr<RenderPipelineStage> GetPatchesStage(
+    const PatchDictionary* patches, size_t num_channels);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc
new file mode 100644
index 0000000000..4a0529ce2c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc
@@ -0,0 +1,62 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_splines.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_splines.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class SplineStage : public RenderPipelineStage {
+ public:
+  explicit SplineStage(const Splines* splines)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        splines_(*splines) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    float* row_x = GetInputRow(input_rows, 0, 0);
+    float* row_y = GetInputRow(input_rows, 1, 0);
+    float* row_b = GetInputRow(input_rows, 2, 0);
+    splines_.AddToRow(row_x, row_y, row_b, Rect(xpos, ypos, xsize, 1));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "Splines"; }
+
+ private:
+  const Splines& splines_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines) {
+  return jxl::make_unique<SplineStage>(splines);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetSplineStage);
+
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines) {
+  return HWY_DYNAMIC_DISPATCH(GetSplineStage)(splines);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h
new file mode 100644
index 0000000000..363af393ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
+
+#include <utility>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Draws splines if applicable.
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc
new file mode 100644
index 0000000000..a43cb4e1ab
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc
@@ -0,0 +1,51 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_spot.h"
+
+namespace jxl {
+class SpotColorStage : public RenderPipelineStage {
+ public:
+  explicit SpotColorStage(size_t spot_c, const float* spot_color)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        spot_c_(spot_c),
+        spot_color_(spot_color) {
+    JXL_ASSERT(spot_c_ >= 3);
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    // TODO(veluca): add SIMD.
+    float scale = spot_color_[3];
+    for (size_t c = 0; c < 3; c++) {
+      float* JXL_RESTRICT p = GetInputRow(input_rows, c, 0);
+      const float* JXL_RESTRICT s = GetInputRow(input_rows, spot_c_, 0);
+      for (ssize_t x = -xextra; x < ssize_t(xsize + xextra); x++) {
+        float mix = scale * s[x];
+        p[x] = mix * spot_color_[c] + (1.0f - mix) * p[x];
+      }
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3          ? RenderPipelineChannelMode::kInPlace
+           : c == spot_c_ ? RenderPipelineChannelMode::kInput
+                          : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "Spot"; }
+
+ private:
+  size_t spot_c_;
+  const float* spot_color_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetSpotColorStage(
+    size_t spot_c, const float* spot_color) {
+  return jxl::make_unique<SpotColorStage>(spot_c, spot_color);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h
new file mode 100644
index 0000000000..3e79c75823
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
+
+#include <utility>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Render the spot color channels.
+std::unique_ptr<RenderPipelineStage> GetSpotColorStage(size_t spot_c,
+                                                       const float* spot_color);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc
new file mode 100644
index 0000000000..5a543d2b44
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc
@@ -0,0 +1,200 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_to_linear.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_to_linear.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <typename Op>
+struct PerChannelOp {
+  explicit PerChannelOp(Op op) : op(op) {}
+  template <typename D, typename T>
+  void Transform(D d, T* r, T* g, T* b) const {
+    *r = op.Transform(d, *r);
+    *g = op.Transform(d, *g);
+    *b = op.Transform(d, *b);
+  }
+
+  Op op;
+};
+template <typename Op>
+PerChannelOp<Op> MakePerChannelOp(Op&& op) {
+  return PerChannelOp<Op>(std::forward<Op>(op));
+}
+
+struct OpLinear {
+  template <typename D, typename T>
+  T Transform(D d, const T& encoded) const {
+    return encoded;
+  }
+};
+
+struct OpRgb {
+  template <typename D, typename T>
+  T Transform(D d, const T& encoded) const {
+    return TF_SRGB().DisplayFromEncoded(encoded);
+  }
+};
+
+struct OpPq {
+  template <typename D, typename T>
+  T Transform(D d, const T& encoded) const {
+    return TF_PQ().DisplayFromEncoded(d, encoded);
+  }
+};
+
+struct OpHlg {
+  explicit OpHlg(const float luminances[3], const float intensity_target)
+      : hlg_ootf_(HlgOOTF::FromSceneLight(
+            /*display_luminance=*/intensity_target, luminances)) {}
+
+  template <typename D, typename T>
+  void Transform(D d, T* r, T* g, T* b) const {
+    for (T* val : {r, g, b}) {
+      HWY_ALIGN float vals[MaxLanes(d)];
+      Store(*val, d, vals);
+      for (size_t i = 0; i < Lanes(d); ++i) {
+        vals[i] = TF_HLG().DisplayFromEncoded(vals[i]);
+      }
+      *val = Load(d, vals);
+    }
+    hlg_ootf_.Apply(r, g, b);
+  }
+  HlgOOTF hlg_ootf_;
+};
+
+struct Op709 {
+  template <typename D, typename T>
+  T Transform(D d, const T& encoded) const {
+    return TF_709().DisplayFromEncoded(d, encoded);
+  }
+};
+
+struct OpGamma {
+  const float gamma;
+  template <typename D, typename T>
+  T Transform(D d, const T& encoded) const {
+    return IfThenZeroElse(Le(encoded, Set(d, 1e-5f)),
+                          FastPowf(d, encoded, Set(d, gamma)));
+  }
+};
+
+struct OpInvalid {
+  template <typename D, typename T>
+  void Transform(D d, T* r, T* g, T* b) const {}
+};
+
+template <typename Op>
+class ToLinearStage : public RenderPipelineStage {
+ public:
+  explicit ToLinearStage(Op op)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        op_(std::move(op)) {}
+
+  explicit ToLinearStage()
+      : RenderPipelineStage(RenderPipelineStage::Settings()), valid_(false) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) d;
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+    float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+    // All calculations are lane-wise, still some might require
+    // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+    // vector tail.
+    msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+    for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+      auto r = LoadU(d, row0 + x);
+      auto g = LoadU(d, row1 + x);
+      auto b = LoadU(d, row2 + x);
+      op_.Transform(d, &r, &g, &b);
+      StoreU(r, d, row0 + x);
+      StoreU(g, d, row1 + x);
+      StoreU(b, d, row2 + x);
+    }
+    msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "ToLinear"; }
+
+ private:
+  Status IsInitialized() const override { return valid_; }
+
+  Op op_;
+  bool valid_ = true;
+};
+
+template <typename Op>
+std::unique_ptr<ToLinearStage<Op>> MakeToLinearStage(Op&& op) {
+  return jxl::make_unique<ToLinearStage<Op>>(std::forward<Op>(op));
+}
+
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  if (output_encoding_info.color_encoding.tf.IsLinear()) {
+    return MakeToLinearStage(MakePerChannelOp(OpLinear()));
+  } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
+    return MakeToLinearStage(MakePerChannelOp(OpRgb()));
+  } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
+    return MakeToLinearStage(MakePerChannelOp(OpPq()));
+  } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
+    return MakeToLinearStage(OpHlg(output_encoding_info.luminances,
+                                   output_encoding_info.orig_intensity_target));
+  } else if (output_encoding_info.color_encoding.tf.Is709()) {
+    return MakeToLinearStage(MakePerChannelOp(Op709()));
+  } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
+             output_encoding_info.color_encoding.tf.IsDCI()) {
+    return MakeToLinearStage(
+        MakePerChannelOp(OpGamma{1.f / output_encoding_info.inverse_gamma}));
+  } else {
+    return jxl::make_unique<ToLinearStage<OpInvalid>>();
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetToLinearStage);
+
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  return HWY_DYNAMIC_DISPATCH(GetToLinearStage)(output_encoding_info);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h
new file mode 100644
index 0000000000..ccee7b09f0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from `output_encoding_info.color_encoding` to
+// linear.
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+    const OutputEncodingInfo& output_encoding_info);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc
new file mode 100644
index 0000000000..a3b4a3e7cf
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_tone_mapping.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_tone_mapping.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class ToneMappingStage : public RenderPipelineStage {
+ public:
+  explicit ToneMappingStage(OutputEncodingInfo output_encoding_info)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        output_encoding_info_(std::move(output_encoding_info)) {
+    if (output_encoding_info_.desired_intensity_target ==
+        output_encoding_info_.orig_intensity_target) {
+      // No tone mapping requested.
+      return;
+    }
+    if (output_encoding_info_.orig_color_encoding.tf.IsPQ() &&
+        output_encoding_info_.desired_intensity_target <
+            output_encoding_info_.orig_intensity_target) {
+      tone_mapper_ = jxl::make_unique<ToneMapper>(
+          /*source_range=*/std::pair<float, float>(
+              0, output_encoding_info_.orig_intensity_target),
+          /*target_range=*/
+          std::pair<float, float>(
+              0, output_encoding_info_.desired_intensity_target),
+          output_encoding_info_.luminances);
+    } else if (output_encoding_info_.orig_color_encoding.tf.IsHLG() &&
+               !output_encoding_info_.color_encoding.tf.IsHLG()) {
+      hlg_ootf_ = jxl::make_unique<HlgOOTF>(
+          /*source_luminance=*/output_encoding_info_.orig_intensity_target,
+          /*target_luminance=*/output_encoding_info_.desired_intensity_target,
+          output_encoding_info_.luminances);
+    }
+
+    if (output_encoding_info_.color_encoding.tf.IsPQ() &&
+        (tone_mapper_ || hlg_ootf_)) {
+      to_intensity_target_ =
+          10000.f / output_encoding_info_.orig_intensity_target;
+      from_desired_intensity_target_ =
+          output_encoding_info_.desired_intensity_target / 10000.f;
+    }
+  }
+
+  bool IsNeeded() const { return tone_mapper_ || hlg_ootf_; }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    if (!(tone_mapper_ || hlg_ootf_)) return;
+
+    const HWY_FULL(float) d;
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+    float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+    // All calculations are lane-wise, still some might require
+    // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+    // vector tail.
+    msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+    for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+      auto r = LoadU(d, row0 + x);
+      auto g = LoadU(d, row1 + x);
+      auto b = LoadU(d, row2 + x);
+      if (tone_mapper_ || hlg_ootf_) {
+        r = Mul(r, Set(d, to_intensity_target_));
+        g = Mul(g, Set(d, to_intensity_target_));
+        b = Mul(b, Set(d, to_intensity_target_));
+        if (tone_mapper_) {
+          tone_mapper_->ToneMap(&r, &g, &b);
+        } else {
+          JXL_ASSERT(hlg_ootf_);
+          hlg_ootf_->Apply(&r, &g, &b);
+        }
+        if (tone_mapper_ || hlg_ootf_->WarrantsGamutMapping()) {
+          GamutMap(&r, &g, &b, output_encoding_info_.luminances);
+        }
+        r = Mul(r, Set(d, from_desired_intensity_target_));
+        g = Mul(g, Set(d, from_desired_intensity_target_));
+        b = Mul(b, Set(d, from_desired_intensity_target_));
+      }
+      StoreU(r, d, row0 + x);
+      StoreU(g, d, row1 + x);
+      StoreU(b, d, row2 + x);
+    }
+    msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "ToneMapping"; }
+
+ private:
+  using ToneMapper = Rec2408ToneMapper<HWY_FULL(float)>;
+  OutputEncodingInfo output_encoding_info_;
+  std::unique_ptr<ToneMapper> tone_mapper_;
+  std::unique_ptr<HlgOOTF> hlg_ootf_;
+  // When the target colorspace is PQ, 1 represents 10000 nits instead of
+  // orig_intensity_target. This temporarily changes this if the tone mappers
+  // require it.
+  float to_intensity_target_ = 1.f;
+  float from_desired_intensity_target_ = 1.f;
+};
+
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  auto stage = jxl::make_unique<ToneMappingStage>(output_encoding_info);
+  if (!stage->IsNeeded()) return nullptr;
+  return stage;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetToneMappingStage);
+
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  return HWY_DYNAMIC_DISPATCH(GetToneMappingStage)(output_encoding_info);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h
new file mode 100644
index 0000000000..99824f8511
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Tone maps the image if appropriate. It must be in linear space and
+// `output_encoding_info.luminances` must contain the luminance for the
+// primaries of that space. It must also be encoded such that (1, 1, 1)
+// represents `output_encoding_info.orig_intensity_target` nits, unless
+// `output_encoding_info.color_encoding.tf.IsPQ()`, in which case (1, 1, 1) must
+// represent 10000 nits. This corresponds to what XYBStage outputs. After this
+// stage, (1, 1, 1) will represent
+// `output_encoding_info.desired_intensity_target` nits, except in the PQ
+// special case in which it remains 10000.
+//
+// If no tone mapping is necessary, this will return nullptr.
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+    const OutputEncodingInfo& output_encoding_info);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc
new file mode 100644
index 0000000000..bb8d9b2081
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc
@@ -0,0 +1,186 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_upsampling.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class UpsamplingStage : public RenderPipelineStage {
+ public:
+  explicit UpsamplingStage(const CustomTransformData& ups_factors, size_t c,
+                           size_t shift)
+      : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+            /*shift=*/shift, /*border=*/2)),
+        c_(c) {
+    const float* weights = shift == 1   ? ups_factors.upsampling2_weights
+                           : shift == 2 ? ups_factors.upsampling4_weights
+                                        : ups_factors.upsampling8_weights;
+    size_t N = 1 << (shift - 1);
+    for (size_t i = 0; i < 5 * N; i++) {
+      for (size_t j = 0; j < 5 * N; j++) {
+        size_t y = std::min(i, j);
+        size_t x = std::max(i, j);
+        kernel_[j / 5][i / 5][j % 5][i % 5] =
+            weights[5 * N * y - y * (y - 1) / 2 + x - y];
+      }
+    }
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    static HWY_FULL(float) df;
+    size_t shift = settings_.shift_x;
+    size_t N = 1 << shift;
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(df));
+    for (ssize_t iy = -2; iy <= 2; iy++) {
+      msan::UnpoisonMemory(GetInputRow(input_rows, c_, iy) + xsize + 2,
+                           sizeof(float) * (xsize_v - xsize));
+    }
+    JXL_ASSERT(xextra == 0);
+    ssize_t x0 = 0;
+    ssize_t x1 = xsize;
+    if (N == 2) {
+      ProcessRowImpl<2>(input_rows, output_rows, x0, x1);
+    }
+    if (N == 4) {
+      ProcessRowImpl<4>(input_rows, output_rows, x0, x1);
+    }
+    if (N == 8) {
+      ProcessRowImpl<8>(input_rows, output_rows, x0, x1);
+    }
+    for (size_t oy = 0; oy < N; oy++) {
+      float* dst_row = GetOutputRow(output_rows, c_, oy);
+      msan::PoisonMemory(dst_row + xsize * N,
+                         sizeof(float) * (xsize_v - xsize) * N);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c == c_ ? RenderPipelineChannelMode::kInOut
+                   : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "Upsample"; }
+
+ private:
+  template <size_t N>
+  JXL_INLINE float Kernel(size_t x, size_t y, ssize_t ix, ssize_t iy) const {
+    ix += 2;
+    iy += 2;
+    if (N == 2) {
+      return kernel_[0][0][y % 2 ? 4 - iy : iy][x % 2 ? 4 - ix : ix];
+    }
+    if (N == 4) {
+      return kernel_[y % 4 < 2 ? y % 2 : 1 - y % 2]
+                    [x % 4 < 2 ? x % 2 : 1 - x % 2][y % 4 < 2 ? iy : 4 - iy]
+                    [x % 4 < 2 ? ix : 4 - ix];
+    }
+    if (N == 8) {
+      return kernel_[y % 8 < 4 ? y % 4 : 3 - y % 4]
+                    [x % 8 < 4 ? x % 4 : 3 - x % 4][y % 8 < 4 ? iy : 4 - iy]
+                    [x % 8 < 4 ? ix : 4 - ix];
+    }
+    JXL_UNREACHABLE("Invalid upsample");
+  }
+
+  template <ssize_t N>
+  void ProcessRowImpl(const RowInfo& input_rows, const RowInfo& output_rows,
+                      ssize_t x0, ssize_t x1) const {
+    static HWY_FULL(float) df;
+    using V = hwy::HWY_NAMESPACE::Vec<HWY_FULL(float)>;
+    V ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7;
+    (void)ups2, (void)ups3, (void)ups4, (void)ups5, (void)ups6, (void)ups7;
+    V* ups[N];
+    if (N >= 2) {
+      ups[0] = &ups0;
+      ups[1] = &ups1;
+    }
+    if (N >= 4) {
+      ups[2] = &ups2;
+      ups[3] = &ups3;
+    }
+    if (N == 8) {
+      ups[4] = &ups4;
+      ups[5] = &ups5;
+      ups[6] = &ups6;
+      ups[7] = &ups7;
+    }
+    for (size_t oy = 0; oy < N; oy++) {
+      float* dst_row = GetOutputRow(output_rows, c_, oy);
+      for (ssize_t x = x0; x < x1; x += Lanes(df)) {
+        for (size_t ox = 0; ox < N; ox++) {
+          auto result = Zero(df);
+          auto min = LoadU(df, GetInputRow(input_rows, c_, 0) + x);
+          auto max = min;
+          for (ssize_t iy = -2; iy <= 2; iy++) {
+            for (ssize_t ix = -2; ix <= 2; ix++) {
+              auto v = LoadU(df, GetInputRow(input_rows, c_, iy) + x + ix);
+              result = MulAdd(Set(df, Kernel<N>(ox, oy, ix, iy)), v, result);
+              min = Min(v, min);
+              max = Max(v, max);
+            }
+          }
+          // Avoid overshooting.
+          *ups[ox] = Clamp(result, min, max);
+        }
+        if (N == 2) {
+          StoreInterleaved(df, ups0, ups1, dst_row + x * N);
+        }
+        if (N == 4) {
+          StoreInterleaved(df, ups0, ups1, ups2, ups3, dst_row + x * N);
+        }
+        if (N == 8) {
+          StoreInterleaved(df, ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7,
+                           dst_row + x * N);
+        }
+      }
+    }
+  }
+
+  size_t c_;
+  float kernel_[4][4][5][5];
+};
+
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+    const CustomTransformData& ups_factors, size_t c, size_t shift) {
+  return jxl::make_unique<UpsamplingStage>(ups_factors, c, shift);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetUpsamplingStage);
+
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+    const CustomTransformData& ups_factors, size_t c, size_t shift) {
+  JXL_ASSERT(shift != 0);
+  JXL_ASSERT(shift <= 3);
+  return HWY_DYNAMIC_DISPATCH(GetUpsamplingStage)(ups_factors, c, shift);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h
new file mode 100644
index 0000000000..7d5defd23c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Upsamples the given channel by the given factor.
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+    const CustomTransformData& ups_factors, size_t c, size_t shift);
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc
new file mode 100644
index 0000000000..902fc33b7e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc
@@ -0,0 +1,601 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_write.h"
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/sanitizers.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_write.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::NearestInt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeftSame;
+using hwy::HWY_NAMESPACE::ShiftRightSame;
+
+class WriteToOutputStage : public RenderPipelineStage {
+ public:
+  WriteToOutputStage(const ImageOutput& main_output, size_t width,
+                     size_t height, bool has_alpha, bool unpremul_alpha,
+                     size_t alpha_c, Orientation undo_orientation,
+                     const std::vector<ImageOutput>& extra_output)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        width_(width),
+        height_(height),
+        main_(main_output),
+        num_color_(main_.num_channels_ < 3 ? 1 : 3),
+        want_alpha_(main_.num_channels_ == 2 || main_.num_channels_ == 4),
+        has_alpha_(has_alpha),
+        unpremul_alpha_(unpremul_alpha),
+        alpha_c_(alpha_c),
+        flip_x_(ShouldFlipX(undo_orientation)),
+        flip_y_(ShouldFlipY(undo_orientation)),
+        transpose_(ShouldTranspose(undo_orientation)),
+        opaque_alpha_(kMaxPixelsPerCall, 1.0f) {
+    for (size_t ec = 0; ec < extra_output.size(); ++ec) {
+      if (extra_output[ec].callback.IsPresent() || extra_output[ec].buffer) {
+        Output extra(extra_output[ec]);
+        extra.channel_index_ = 3 + ec;
+        extra_channels_.push_back(extra);
+      }
+    }
+  }
+
+  WriteToOutputStage(const WriteToOutputStage&) = delete;
+  WriteToOutputStage& operator=(const WriteToOutputStage&) = delete;
+  WriteToOutputStage(WriteToOutputStage&&) = delete;
+  WriteToOutputStage& operator=(WriteToOutputStage&&) = delete;
+
+  ~WriteToOutputStage() override {
+    if (main_.run_opaque_) {
+      main_.pixel_callback_.destroy(main_.run_opaque_);
+    }
+    for (auto& extra : extra_channels_) {
+      if (extra.run_opaque_) {
+        extra.pixel_callback_.destroy(extra.run_opaque_);
+      }
+    }
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    JXL_DASSERT(xextra == 0);
+    JXL_DASSERT(main_.run_opaque_ || main_.buffer_);
+    if (ypos >= height_) return;
+    if (xpos >= width_) return;
+    if (flip_y_) {
+      ypos = height_ - 1u - ypos;
+    }
+    size_t limit = std::min(xsize, width_ - xpos);
+    for (size_t x0 = 0; x0 < limit; x0 += kMaxPixelsPerCall) {
+      size_t xstart = xpos + x0;
+      size_t len = std::min<size_t>(kMaxPixelsPerCall, limit - x0);
+
+      const float* line_buffers[4];
+      for (size_t c = 0; c < num_color_; c++) {
+        line_buffers[c] = GetInputRow(input_rows, c, 0) + x0;
+      }
+      if (has_alpha_) {
+        line_buffers[num_color_] = GetInputRow(input_rows, alpha_c_, 0) + x0;
+      } else {
+        // opaque_alpha_ is a way to set all values to 1.0f.
+        line_buffers[num_color_] = opaque_alpha_.data();
+      }
+      if (has_alpha_ && want_alpha_ && unpremul_alpha_) {
+        UnpremulAlpha(thread_id, len, line_buffers);
+      }
+      OutputBuffers(main_, thread_id, ypos, xstart, len, line_buffers);
+      for (const auto& extra : extra_channels_) {
+        line_buffers[0] = GetInputRow(input_rows, extra.channel_index_, 0) + x0;
+        OutputBuffers(extra, thread_id, ypos, xstart, len, line_buffers);
+      }
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    if (c < num_color_ || (has_alpha_ && c == alpha_c_)) {
+      return RenderPipelineChannelMode::kInput;
+    }
+    for (const auto& extra : extra_channels_) {
+      if (c == extra.channel_index_) {
+        return RenderPipelineChannelMode::kInput;
+      }
+    }
+    return RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "WritePixelCB"; }
+
+ private:
+  struct Output {
+    Output(const ImageOutput& image_out)
+        : pixel_callback_(image_out.callback),
+          buffer_(image_out.buffer),
+          buffer_size_(image_out.buffer_size),
+          stride_(image_out.stride),
+          num_channels_(image_out.format.num_channels),
+          swap_endianness_(SwapEndianness(image_out.format.endianness)),
+          data_type_(image_out.format.data_type),
+          bits_per_sample_(image_out.bits_per_sample) {}
+
+    Status PrepareForThreads(size_t num_threads) {
+      if (pixel_callback_.IsPresent()) {
+        run_opaque_ =
+            pixel_callback_.Init(num_threads, /*num_pixels=*/kMaxPixelsPerCall);
+        JXL_RETURN_IF_ERROR(run_opaque_ != nullptr);
+      } else {
+        JXL_RETURN_IF_ERROR(buffer_ != nullptr);
+      }
+      return true;
+    }
+
+    PixelCallback pixel_callback_;
+    void* run_opaque_ = nullptr;
+    void* buffer_ = nullptr;
+    size_t buffer_size_;
+    size_t stride_;
+    size_t num_channels_;
+    bool swap_endianness_;
+    JxlDataType data_type_;
+    size_t bits_per_sample_;
+    size_t channel_index_;  // used for extra_channels
+  };
+
+  Status PrepareForThreads(size_t num_threads) override {
+    JXL_RETURN_IF_ERROR(main_.PrepareForThreads(num_threads));
+    for (auto& extra : extra_channels_) {
+      JXL_RETURN_IF_ERROR(extra.PrepareForThreads(num_threads));
+    }
+    temp_out_.resize(num_threads);
+    for (CacheAlignedUniquePtr& temp : temp_out_) {
+      temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall *
+                           main_.num_channels_);
+    }
+    if ((has_alpha_ && want_alpha_ && unpremul_alpha_) || flip_x_) {
+      temp_in_.resize(num_threads * main_.num_channels_);
+      for (CacheAlignedUniquePtr& temp : temp_in_) {
+        temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall);
+      }
+    }
+    return true;
+  }
+  static bool ShouldFlipX(Orientation undo_orientation) {
+    return (undo_orientation == Orientation::kFlipHorizontal ||
+            undo_orientation == Orientation::kRotate180 ||
+            undo_orientation == Orientation::kRotate270 ||
+            undo_orientation == Orientation::kAntiTranspose);
+  }
+  static bool ShouldFlipY(Orientation undo_orientation) {
+    return (undo_orientation == Orientation::kFlipVertical ||
+            undo_orientation == Orientation::kRotate180 ||
+            undo_orientation == Orientation::kRotate90 ||
+            undo_orientation == Orientation::kAntiTranspose);
+  }
+  static bool ShouldTranspose(Orientation undo_orientation) {
+    return (undo_orientation == Orientation::kTranspose ||
+            undo_orientation == Orientation::kRotate90 ||
+            undo_orientation == Orientation::kRotate270 ||
+            undo_orientation == Orientation::kAntiTranspose);
+  }
+
+  void UnpremulAlpha(size_t thread_id, size_t len,
+                     const float** line_buffers) const {
+    const HWY_FULL(float) d;
+    auto one = Set(d, 1.0f);
+    float* temp_in[4];
+    for (size_t c = 0; c < main_.num_channels_; ++c) {
+      size_t tix = thread_id * main_.num_channels_ + c;
+      temp_in[c] = reinterpret_cast<float*>(temp_in_[tix].get());
+      memcpy(temp_in[c], line_buffers[c], sizeof(float) * len);
+    }
+    auto small_alpha = Set(d, kSmallAlpha);
+    for (size_t ix = 0; ix < len; ix += Lanes(d)) {
+      auto alpha = LoadU(d, temp_in[num_color_] + ix);
+      auto mul = Div(one, Max(small_alpha, alpha));
+      for (size_t c = 0; c < num_color_; ++c) {
+        auto val = LoadU(d, temp_in[c] + ix);
+        StoreU(Mul(val, mul), d, temp_in[c] + ix);
+      }
+    }
+    for (size_t c = 0; c < main_.num_channels_; ++c) {
+      line_buffers[c] = temp_in[c];
+    }
+  }
+
+  void OutputBuffers(const Output& out, size_t thread_id, size_t ypos,
+                     size_t xstart, size_t len, const float* input[4]) const {
+    if (flip_x_) {
+      FlipX(out, thread_id, len, &xstart, input);
+    }
+    if (out.data_type_ == JXL_TYPE_UINT8) {
+      uint8_t* JXL_RESTRICT temp =
+          reinterpret_cast<uint8_t*>(temp_out_[thread_id].get());
+      StoreUnsignedRow(out, input, len, temp);
+      WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+    } else if (out.data_type_ == JXL_TYPE_UINT16 ||
+               out.data_type_ == JXL_TYPE_FLOAT16) {
+      uint16_t* JXL_RESTRICT temp =
+          reinterpret_cast<uint16_t*>(temp_out_[thread_id].get());
+      if (out.data_type_ == JXL_TYPE_UINT16) {
+        StoreUnsignedRow(out, input, len, temp);
+      } else {
+        StoreFloat16Row(out, input, len, temp);
+      }
+      if (out.swap_endianness_) {
+        const HWY_FULL(uint16_t) du;
+        size_t output_len = len * out.num_channels_;
+        for (size_t j = 0; j < output_len; j += Lanes(du)) {
+          auto v = LoadU(du, temp + j);
+          auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8));
+          StoreU(vswap, du, temp + j);
+        }
+      }
+      WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+    } else if (out.data_type_ == JXL_TYPE_FLOAT) {
+      float* JXL_RESTRICT temp =
+          reinterpret_cast<float*>(temp_out_[thread_id].get());
+      StoreFloatRow(out, input, len, temp);
+      if (out.swap_endianness_) {
+        size_t output_len = len * out.num_channels_;
+        for (size_t j = 0; j < output_len; ++j) {
+          temp[j] = BSwapFloat(temp[j]);
+        }
+      }
+      WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+    }
+  }
+
+  void FlipX(const Output& out, size_t thread_id, size_t len, size_t* xstart,
+             const float** line_buffers) const {
+    float* temp_in[4];
+    for (size_t c = 0; c < out.num_channels_; ++c) {
+      size_t tix = thread_id * main_.num_channels_ + c;
+      temp_in[c] = reinterpret_cast<float*>(temp_in_[tix].get());
+      if (temp_in[c] != line_buffers[c]) {
+        memcpy(temp_in[c], line_buffers[c], sizeof(float) * len);
+      }
+    }
+    size_t last = (len - 1u);
+    size_t num = (len / 2);
+    for (size_t i = 0; i < num; ++i) {
+      for (size_t c = 0; c < out.num_channels_; ++c) {
+        std::swap(temp_in[c][i], temp_in[c][last - i]);
+      }
+    }
+    for (size_t c = 0; c < out.num_channels_; ++c) {
+      line_buffers[c] = temp_in[c];
+    }
+    *xstart = width_ - *xstart - len;
+  }
+
+  template <typename T>
+  void StoreUnsignedRow(const Output& out, const float* input[4], size_t len,
+                        T* output) const {
+    const HWY_FULL(float) d;
+    auto zero = Zero(d);
+    auto one = Set(d, 1.0f);
+    auto mul = Set(d, (1u << (out.bits_per_sample_)) - 1);
+    const Rebind<T, decltype(d)> du;
+    const size_t padding = RoundUpTo(len, Lanes(d)) - len;
+    for (size_t c = 0; c < out.num_channels_; ++c) {
+      msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding);
+    }
+    if (out.num_channels_ == 1) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+        StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
+      }
+    } else if (out.num_channels_ == 2) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+        auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+        StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
+                          DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
+      }
+    } else if (out.num_channels_ == 3) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+        auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+        auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
+        StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
+                          DemoteTo(du, NearestInt(v1)),
+                          DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
+      }
+    } else if (out.num_channels_ == 4) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+        auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+        auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
+        auto v3 = Mul(Clamp(zero, LoadU(d, &input[3][i]), one), mul);
+        StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
+                          DemoteTo(du, NearestInt(v1)),
+                          DemoteTo(du, NearestInt(v2)),
+                          DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
+      }
+    }
+    msan::PoisonMemory(output + out.num_channels_ * len,
+                       sizeof(output[0]) * out.num_channels_ * padding);
+  }
+
+  void StoreFloat16Row(const Output& out, const float* input[4], size_t len,
+                       uint16_t* output) const {
+    const HWY_FULL(float) d;
+    const Rebind<uint16_t, decltype(d)> du;
+    const Rebind<hwy::float16_t, decltype(d)> df16;
+    const size_t padding = RoundUpTo(len, Lanes(d)) - len;
+    for (size_t c = 0; c < out.num_channels_; ++c) {
+      msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding);
+    }
+    if (out.num_channels_ == 1) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = LoadU(d, &input[0][i]);
+        StoreU(BitCast(du, DemoteTo(df16, v0)), du, &output[i]);
+      }
+    } else if (out.num_channels_ == 2) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = LoadU(d, &input[0][i]);
+        auto v1 = LoadU(d, &input[1][i]);
+        StoreInterleaved2(BitCast(du, DemoteTo(df16, v0)),
+                          BitCast(du, DemoteTo(df16, v1)), du, &output[2 * i]);
+      }
+    } else if (out.num_channels_ == 3) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = LoadU(d, &input[0][i]);
+        auto v1 = LoadU(d, &input[1][i]);
+        auto v2 = LoadU(d, &input[2][i]);
+        StoreInterleaved3(BitCast(du, DemoteTo(df16, v0)),
+                          BitCast(du, DemoteTo(df16, v1)),
+                          BitCast(du, DemoteTo(df16, v2)), du, &output[3 * i]);
+      }
+    } else if (out.num_channels_ == 4) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        auto v0 = LoadU(d, &input[0][i]);
+        auto v1 = LoadU(d, &input[1][i]);
+        auto v2 = LoadU(d, &input[2][i]);
+        auto v3 = LoadU(d, &input[3][i]);
+        StoreInterleaved4(BitCast(du, DemoteTo(df16, v0)),
+                          BitCast(du, DemoteTo(df16, v1)),
+                          BitCast(du, DemoteTo(df16, v2)),
+                          BitCast(du, DemoteTo(df16, v3)), du, &output[4 * i]);
+      }
+    }
+    msan::PoisonMemory(output + out.num_channels_ * len,
+                       sizeof(output[0]) * out.num_channels_ * padding);
+  }
+
+  void StoreFloatRow(const Output& out, const float* input[4], size_t len,
+                     float* output) const {
+    const HWY_FULL(float) d;
+    if (out.num_channels_ == 1) {
+      memcpy(output, input[0], len * sizeof(output[0]));
+    } else if (out.num_channels_ == 2) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        StoreInterleaved2(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]), d,
+                          &output[2 * i]);
+      }
+    } else if (out.num_channels_ == 3) {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        StoreInterleaved3(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]),
+                          LoadU(d, &input[2][i]), d, &output[3 * i]);
+      }
+    } else {
+      for (size_t i = 0; i < len; i += Lanes(d)) {
+        StoreInterleaved4(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]),
+                          LoadU(d, &input[2][i]), LoadU(d, &input[3][i]), d,
+                          &output[4 * i]);
+      }
+    }
+  }
+
+  template <typename T>
+  void WriteToOutput(const Output& out, size_t thread_id, size_t ypos,
+                     size_t xstart, size_t len, T* output) const {
+    if (transpose_) {
+      // TODO(szabadka) Buffer 8x8 chunks and transpose with SIMD.
+      if (out.run_opaque_) {
+        for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) {
+          out.pixel_callback_.run(out.run_opaque_, thread_id, ypos, xstart + i,
+                                  1, output + j);
+        }
+      } else {
+        const size_t pixel_stride = out.num_channels_ * sizeof(T);
+        const size_t offset = xstart * out.stride_ + ypos * pixel_stride;
+        for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) {
+          const size_t ix = offset + i * out.stride_;
+          JXL_DASSERT(ix + pixel_stride <= out.buffer_size_);
+          memcpy(reinterpret_cast<uint8_t*>(out.buffer_) + ix, output + j,
+                 pixel_stride);
+        }
+      }
+    } else {
+      if (out.run_opaque_) {
+        out.pixel_callback_.run(out.run_opaque_, thread_id, xstart, ypos, len,
+                                output);
+      } else {
+        const size_t pixel_stride = out.num_channels_ * sizeof(T);
+        const size_t offset = ypos * out.stride_ + xstart * pixel_stride;
+        JXL_DASSERT(offset + len * pixel_stride <= out.buffer_size_);
+        memcpy(reinterpret_cast<uint8_t*>(out.buffer_) + offset, output,
+               len * pixel_stride);
+      }
+    }
+  }
+
+  static constexpr size_t kMaxPixelsPerCall = 1024;
+  size_t width_;
+  size_t height_;
+  Output main_;  // color + alpha
+  size_t num_color_;
+  bool want_alpha_;
+  bool has_alpha_;
+  bool unpremul_alpha_;
+  size_t alpha_c_;
+  bool flip_x_;
+  bool flip_y_;
+  bool transpose_;
+  std::vector<Output> extra_channels_;
+  std::vector<float> opaque_alpha_;
+  std::vector<CacheAlignedUniquePtr> temp_in_;
+  std::vector<CacheAlignedUniquePtr> temp_out_;
+};
+
+constexpr size_t WriteToOutputStage::kMaxPixelsPerCall;
+
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+    const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+    bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+    std::vector<ImageOutput>& extra_output) {
+  return jxl::make_unique<WriteToOutputStage>(
+      main_output, width, height, has_alpha, unpremul_alpha, alpha_c,
+      undo_orientation, extra_output);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+
+HWY_EXPORT(GetWriteToOutputStage);
+
+namespace {
+class WriteToImageBundleStage : public RenderPipelineStage {
+ public:
+  explicit WriteToImageBundleStage(ImageBundle* image_bundle,
+                                   ColorEncoding color_encoding)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        image_bundle_(image_bundle),
+        color_encoding_(std::move(color_encoding)) {}
+
+  void SetInputSizes(
+      const std::vector<std::pair<size_t, size_t>>& input_sizes) override {
+#if JXL_ENABLE_ASSERT
+    JXL_ASSERT(input_sizes.size() >= 3);
+    for (size_t c = 1; c < input_sizes.size(); c++) {
+      JXL_ASSERT(input_sizes[c].first == input_sizes[0].first);
+      JXL_ASSERT(input_sizes[c].second == input_sizes[0].second);
+    }
+#endif
+    // TODO(eustas): what should we do in the case of "want only ECs"?
+    image_bundle_->SetFromImage(
+        Image3F(input_sizes[0].first, input_sizes[0].second), color_encoding_);
+    // TODO(veluca): consider not reallocating ECs if not needed.
+    image_bundle_->extra_channels().clear();
+    for (size_t c = 3; c < input_sizes.size(); c++) {
+      image_bundle_->extra_channels().emplace_back(input_sizes[c].first,
+                                                   input_sizes[c].second);
+    }
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(image_bundle_->color()->PlaneRow(c, ypos) + xpos - xextra,
+             GetInputRow(input_rows, c, 0) - xextra,
+             sizeof(float) * (xsize + 2 * xextra));
+    }
+    for (size_t ec = 0; ec < image_bundle_->extra_channels().size(); ec++) {
+      JXL_ASSERT(image_bundle_->extra_channels()[ec].xsize() >=
+                 xpos + xsize + xextra);
+      memcpy(image_bundle_->extra_channels()[ec].Row(ypos) + xpos - xextra,
+             GetInputRow(input_rows, 3 + ec, 0) - xextra,
+             sizeof(float) * (xsize + 2 * xextra));
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return RenderPipelineChannelMode::kInput;
+  }
+
+  const char* GetName() const override { return "WriteIB"; }
+
+ private:
+  ImageBundle* image_bundle_;
+  ColorEncoding color_encoding_;
+};
+
+class WriteToImage3FStage : public RenderPipelineStage {
+ public:
+  explicit WriteToImage3FStage(Image3F* image)
+      : RenderPipelineStage(RenderPipelineStage::Settings()), image_(image) {}
+
+  void SetInputSizes(
+      const std::vector<std::pair<size_t, size_t>>& input_sizes) override {
+#if JXL_ENABLE_ASSERT
+    JXL_ASSERT(input_sizes.size() >= 3);
+    for (size_t c = 1; c < 3; ++c) {
+      JXL_ASSERT(input_sizes[c].first == input_sizes[0].first);
+      JXL_ASSERT(input_sizes[c].second == input_sizes[0].second);
+    }
+#endif
+    *image_ = Image3F(input_sizes[0].first, input_sizes[0].second);
+  }
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(image_->PlaneRow(c, ypos) + xpos - xextra,
+             GetInputRow(input_rows, c, 0) - xextra,
+             sizeof(float) * (xsize + 2 * xextra));
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInput
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "WriteI3F"; }
+
+ private:
+  Image3F* image_;
+};
+
+}  // namespace
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImageBundleStage(
+    ImageBundle* image_bundle, ColorEncoding color_encoding) {
+  return jxl::make_unique<WriteToImageBundleStage>(image_bundle,
+                                                   std::move(color_encoding));
+}
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImage3FStage(Image3F* image) {
+  return jxl::make_unique<WriteToImage3FStage>(image);
+}
+
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+    const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+    bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+    std::vector<ImageOutput>& extra_output) {
+  return HWY_DYNAMIC_DISPATCH(GetWriteToOutputStage)(
+      main_output, width, height, has_alpha, unpremul_alpha, alpha_c,
+      undo_orientation, extra_output);
+}
+
+}  // namespace jxl
+
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h
new file mode 100644
index 0000000000..c5f844ebe8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
+
+#include <functional>
+
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImageBundleStage(
+    ImageBundle* image_bundle, ColorEncoding color_encoding);
+
+// Gets a stage to write color channels to an Image3F.
+std::unique_ptr<RenderPipelineStage> GetWriteToImage3FStage(Image3F* image);
+
+// Gets a stage to write to a pixel callback or image buffer.
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+    const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+    bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+    std::vector<ImageOutput>& extra_output);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc
new file mode 100644
index 0000000000..093f3d1b4d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc
@@ -0,0 +1,176 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_xyb.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class XYBStage : public RenderPipelineStage {
+ public:
+  explicit XYBStage(const OutputEncodingInfo& output_encoding_info)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        opsin_params_(output_encoding_info.opsin_params),
+        output_is_xyb_(output_encoding_info.color_encoding.GetColorSpace() ==
+                       ColorSpace::kXYB) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) d;
+    JXL_ASSERT(xextra == 0);
+    const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+    float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+    // All calculations are lane-wise, still some might require
+    // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+    // vector tail.
+    msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+    // TODO(eustas): when using frame origin, addresses might be unaligned;
+    //               making them aligned will void performance penalty.
+    if (output_is_xyb_) {
+      const auto scale_x = Set(d, kScaledXYBScale[0]);
+      const auto scale_y = Set(d, kScaledXYBScale[1]);
+      const auto scale_bmy = Set(d, kScaledXYBScale[2]);
+      const auto offset_x = Set(d, kScaledXYBOffset[0]);
+      const auto offset_y = Set(d, kScaledXYBOffset[1]);
+      const auto offset_bmy = Set(d, kScaledXYBOffset[2]);
+      for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+        const auto in_x = LoadU(d, row0 + x);
+        const auto in_y = LoadU(d, row1 + x);
+        const auto in_b = LoadU(d, row2 + x);
+        auto out_x = Mul(Add(in_x, offset_x), scale_x);
+        auto out_y = Mul(Add(in_y, offset_y), scale_y);
+        auto out_b = Mul(Add(Sub(in_b, in_y), offset_bmy), scale_bmy);
+        StoreU(out_x, d, row0 + x);
+        StoreU(out_y, d, row1 + x);
+        StoreU(out_b, d, row2 + x);
+      }
+    } else {
+      for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+        const auto in_opsin_x = LoadU(d, row0 + x);
+        const auto in_opsin_y = LoadU(d, row1 + x);
+        const auto in_opsin_b = LoadU(d, row2 + x);
+        auto r = Undefined(d);
+        auto g = Undefined(d);
+        auto b = Undefined(d);
+        XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params_, &r, &g,
+                 &b);
+        StoreU(r, d, row0 + x);
+        StoreU(g, d, row1 + x);
+        StoreU(b, d, row2 + x);
+      }
+    }
+    msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "XYB"; }
+
+ private:
+  const OpsinParams opsin_params_;
+  const bool output_is_xyb_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  return jxl::make_unique<XYBStage>(output_encoding_info);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetXYBStage);
+
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+    const OutputEncodingInfo& output_encoding_info) {
+  return HWY_DYNAMIC_DISPATCH(GetXYBStage)(output_encoding_info);
+}
+
+#if !JXL_HIGH_PRECISION
+namespace {
+class FastXYBStage : public RenderPipelineStage {
+ public:
+  FastXYBStage(uint8_t* rgb, size_t stride, size_t width, size_t height,
+               bool rgba, bool has_alpha, size_t alpha_c)
+      : RenderPipelineStage(RenderPipelineStage::Settings()),
+        rgb_(rgb),
+        stride_(stride),
+        width_(width),
+        height_(height),
+        rgba_(rgba),
+        has_alpha_(has_alpha),
+        alpha_c_(alpha_c) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    if (ypos >= height_) return;
+    JXL_ASSERT(xextra == 0);
+    const float* xyba[4] = {
+        GetInputRow(input_rows, 0, 0), GetInputRow(input_rows, 1, 0),
+        GetInputRow(input_rows, 2, 0),
+        has_alpha_ ? GetInputRow(input_rows, alpha_c_, 0) : nullptr};
+    uint8_t* out_buf = rgb_ + stride_ * ypos + (rgba_ ? 4 : 3) * xpos;
+    FastXYBTosRGB8(xyba, out_buf, rgba_,
+                   xsize + xpos <= width_ ? xsize : width_ - xpos);
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 || (has_alpha_ && c == alpha_c_)
+               ? RenderPipelineChannelMode::kInput
+               : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "FastXYB"; }
+
+ private:
+  uint8_t* rgb_;
+  size_t stride_;
+  size_t width_;
+  size_t height_;
+  bool rgba_;
+  bool has_alpha_;
+  size_t alpha_c_;
+  std::vector<float> opaque_alpha_;
+};
+
+}  // namespace
+
+std::unique_ptr<RenderPipelineStage> GetFastXYBTosRGB8Stage(
+    uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba,
+    bool has_alpha, size_t alpha_c) {
+  JXL_ASSERT(HasFastXYBTosRGB8());
+  return make_unique<FastXYBStage>(rgb, stride, width, height, rgba, has_alpha,
+                                   alpha_c);
+}
+#endif
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h
new file mode 100644
index 0000000000..7b06345c36
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
+#include <stdint.h>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from XYB to linear with appropriate primaries.
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+    const OutputEncodingInfo& output_encoding_info);
+
+// Gets a stage to convert with fixed point arithmetic from XYB to sRGB8 and
+// write to a uint8 buffer.
+std::unique_ptr<RenderPipelineStage> GetFastXYBTosRGB8Stage(
+    uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba,
+    bool has_alpha, size_t alpha_c);
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc
new file mode 100644
index 0000000000..30ad327221
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc
@@ -0,0 +1,83 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_ycbcr.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_ycbcr.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class kYCbCrStage : public RenderPipelineStage {
+ public:
+  kYCbCrStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    const HWY_FULL(float) df;
+
+    // Full-range BT.601 as defined by JFIF Clause 7:
+    // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+    const auto c128 = Set(df, 128.0f / 255);
+    const auto crcr = Set(df, 1.402f);
+    const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+    const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+    const auto cbcb = Set(df, 1.772f);
+
+    float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+    float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+    float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+    // TODO(eustas): when using frame origin, addresses might be unaligned;
+    //               making them aligned will void performance penalty.
+    for (size_t x = 0; x < xsize; x += Lanes(df)) {
+      const auto y_vec = Add(LoadU(df, row1 + x), c128);
+      const auto cb_vec = LoadU(df, row0 + x);
+      const auto cr_vec = LoadU(df, row2 + x);
+      const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+      const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+      const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+      StoreU(r_vec, df, row0 + x);
+      StoreU(g_vec, df, row1 + x);
+      StoreU(b_vec, df, row2 + x);
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return c < 3 ? RenderPipelineChannelMode::kInPlace
+                 : RenderPipelineChannelMode::kIgnored;
+  }
+
+  const char* GetName() const override { return "YCbCr"; }
+};
+
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage() {
+  return jxl::make_unique<kYCbCrStage>();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetYCbCrStage);
+
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage() {
+  return HWY_DYNAMIC_DISPATCH(GetYCbCrStage)();
+}
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h
new file mode 100644
index 0000000000..9320c9723f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from YCbCr to RGB.
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage();
+}  // namespace jxl
+
+#endif  // LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h
new file mode 100644
index 0000000000..789a52f8b2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+class UpsampleXSlowStage : public RenderPipelineStage {
+ public:
+  UpsampleXSlowStage()
+      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(1, 1)) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    for (size_t c = 0; c < input_rows.size(); c++) {
+      const float* row = GetInputRow(input_rows, c, 0);
+      float* row_out = GetOutputRow(output_rows, c, 0);
+      for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) {
+        float xp = *(row + x - 1);
+        float xc = *(row + x);
+        float xn = *(row + x + 1);
+        float xout0 = xp * 0.25f + xc * 0.75f;
+        float xout1 = xc * 0.75f + xn * 0.25f;
+        *(row_out + 2 * x + 0) = xout0;
+        *(row_out + 2 * x + 1) = xout1;
+      }
+    }
+  }
+
+  const char* GetName() const override { return "TEST::UpsampleXSlowStage"; }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return RenderPipelineChannelMode::kInOut;
+  }
+};
+
+class UpsampleYSlowStage : public RenderPipelineStage {
+ public:
+  UpsampleYSlowStage()
+      : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(1, 1)) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    for (size_t c = 0; c < input_rows.size(); c++) {
+      const float* rowp = GetInputRow(input_rows, c, -1);
+      const float* rowc = GetInputRow(input_rows, c, 0);
+      const float* rown = GetInputRow(input_rows, c, 1);
+      float* row_out0 = GetOutputRow(output_rows, c, 0);
+      float* row_out1 = GetOutputRow(output_rows, c, 1);
+      for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) {
+        float xp = *(rowp + x);
+        float xc = *(rowc + x);
+        float xn = *(rown + x);
+        float yout0 = xp * 0.25f + xc * 0.75f;
+        float yout1 = xc * 0.75f + xn * 0.25f;
+        *(row_out0 + x) = yout0;
+        *(row_out1 + x) = yout1;
+      }
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return RenderPipelineChannelMode::kInOut;
+  }
+
+  const char* GetName() const override { return "TEST::UpsampleYSlowStage"; }
+};
+
+class Check0FinalStage : public RenderPipelineStage {
+ public:
+  Check0FinalStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {}
+
+  void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+                  size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+                  size_t thread_id) const final {
+    for (size_t c = 0; c < input_rows.size(); c++) {
+      for (size_t x = 0; x < xsize; x++) {
+        JXL_CHECK(fabsf(GetInputRow(input_rows, c, 0)[x]) < 1e-8);
+      }
+    }
+  }
+
+  RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+    return RenderPipelineChannelMode::kInput;
+  }
+  const char* GetName() const override { return "TEST::Check0FinalStage"; }
+};
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc b/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc
new file mode 100644
index 0000000000..7640ca7c5f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc
@@ -0,0 +1,833 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/codestream_header.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/types.h>
+
+#include <cmath>  // std::abs
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_comparator.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace {
+
+// Converts a test image to a CodecInOut.
+// icc_profile can be empty to automatically deduce profile from the pixel
+// format, or filled in to force this ICC profile
+jxl::CodecInOut ConvertTestImage(const std::vector<uint8_t>& buf,
+                                 const size_t xsize, const size_t ysize,
+                                 const JxlPixelFormat& pixel_format,
+                                 const jxl::PaddedBytes& icc_profile) {
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+
+  bool is_gray = pixel_format.num_channels < 3;
+  bool has_alpha =
+      pixel_format.num_channels == 2 || pixel_format.num_channels == 4;
+
+  io.metadata.m.color_encoding.SetColorSpace(is_gray ? jxl::ColorSpace::kGray
+                                                     : jxl::ColorSpace::kRGB);
+  if (has_alpha) {
+    // Note: alpha > 16 not yet supported by the C++ codec
+    switch (pixel_format.data_type) {
+      case JXL_TYPE_UINT8:
+        io.metadata.m.SetAlphaBits(8);
+        break;
+      case JXL_TYPE_UINT16:
+      case JXL_TYPE_FLOAT:
+      case JXL_TYPE_FLOAT16:
+        io.metadata.m.SetAlphaBits(16);
+        break;
+      default:
+        ADD_FAILURE() << "Roundtrip tests for data type "
+                      << pixel_format.data_type << " not yet implemented.";
+    }
+  }
+  size_t bitdepth = 0;
+  switch (pixel_format.data_type) {
+    case JXL_TYPE_FLOAT:
+      bitdepth = 32;
+      io.metadata.m.SetFloat32Samples();
+      break;
+    case JXL_TYPE_FLOAT16:
+      bitdepth = 16;
+      io.metadata.m.SetFloat16Samples();
+      break;
+    case JXL_TYPE_UINT8:
+      bitdepth = 8;
+      io.metadata.m.SetUintSamples(8);
+      break;
+    case JXL_TYPE_UINT16:
+      bitdepth = 16;
+      io.metadata.m.SetUintSamples(16);
+      break;
+    default:
+      ADD_FAILURE() << "Roundtrip tests for data type "
+                    << pixel_format.data_type << " not yet implemented.";
+  }
+  jxl::ColorEncoding color_encoding;
+  if (!icc_profile.empty()) {
+    jxl::PaddedBytes icc_profile_copy(icc_profile);
+    EXPECT_TRUE(
+        color_encoding.SetICC(std::move(icc_profile_copy), &jxl::GetJxlCms()));
+  } else if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+    color_encoding = jxl::ColorEncoding::LinearSRGB(is_gray);
+  } else {
+    color_encoding = jxl::ColorEncoding::SRGB(is_gray);
+  }
+  EXPECT_TRUE(
+      ConvertFromExternal(jxl::Span<const uint8_t>(buf.data(), buf.size()),
+                          xsize, ysize, color_encoding,
+                          /*bits_per_sample=*/bitdepth, pixel_format,
+                          /*pool=*/nullptr, &io.Main()));
+  return io;
+}
+
+template <typename T>
+T ConvertTestPixel(float val);
+
+template <>
+float ConvertTestPixel<float>(const float val) {
+  return val;
+}
+
+template <>
+uint16_t ConvertTestPixel<uint16_t>(const float val) {
+  return (uint16_t)(val * UINT16_MAX);
+}
+
+template <>
+uint8_t ConvertTestPixel<uint8_t>(const float val) {
+  return (uint8_t)(val * UINT8_MAX);
+}
+
+// Returns a test image.
+template <typename T>
+std::vector<uint8_t> GetTestImage(const size_t xsize, const size_t ysize,
+                                  const JxlPixelFormat& pixel_format) {
+  std::vector<T> pixels(xsize * ysize * pixel_format.num_channels);
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      for (size_t chan = 0; chan < pixel_format.num_channels; chan++) {
+        float val;
+        switch (chan % 4) {
+          case 0:
+            val = static_cast<float>(y) / static_cast<float>(ysize);
+            break;
+          case 1:
+            val = static_cast<float>(x) / static_cast<float>(xsize);
+            break;
+          case 2:
+            val = static_cast<float>(x + y) / static_cast<float>(xsize + ysize);
+            break;
+          case 3:
+            val = static_cast<float>(x * y) / static_cast<float>(xsize * ysize);
+            break;
+        }
+        pixels[(y * xsize + x) * pixel_format.num_channels + chan] =
+            ConvertTestPixel<T>(val);
+      }
+    }
+  }
+  std::vector<uint8_t> bytes(pixels.size() * sizeof(T));
+  memcpy(bytes.data(), pixels.data(), sizeof(T) * pixels.size());
+  return bytes;
+}
+
+void EncodeWithEncoder(JxlEncoder* enc, std::vector<uint8_t>* compressed) {
+  compressed->resize(64);
+  uint8_t* next_out = compressed->data();
+  size_t avail_out = compressed->size() - (next_out - compressed->data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed->data();
+      compressed->resize(compressed->size() * 2);
+      next_out = compressed->data() + offset;
+      avail_out = compressed->size() - offset;
+    }
+  }
+  compressed->resize(next_out - compressed->data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+}
+
+// Generates some pixels using some dimensions and pixel_format,
+// compresses them, and verifies that the decoded version is similar to the
+// original pixels.
+// TODO(firsching): change this to be a parameterized test, like in
+// decode_test.cc
+template <typename T>
+void VerifyRoundtripCompression(
+    const size_t xsize, const size_t ysize,
+    const JxlPixelFormat& input_pixel_format,
+    const JxlPixelFormat& output_pixel_format, const bool lossless,
+    const bool use_container, const uint32_t resampling = 1,
+    const bool already_downsampled = false,
+    const std::vector<std::pair<JxlExtraChannelType, std::string>>&
+        extra_channels = {}) {
+  size_t orig_xsize = xsize;
+  size_t orig_ysize = ysize;
+  if (already_downsampled) {
+    orig_xsize = jxl::DivCeil(xsize, resampling);
+    orig_ysize = jxl::DivCeil(ysize, resampling);
+  }
+
+  JxlPixelFormat extra_channel_pixel_format = input_pixel_format;
+  extra_channel_pixel_format.num_channels = 1;
+  const std::vector<uint8_t> extra_channel_bytes =
+      GetTestImage<T>(xsize, ysize, extra_channel_pixel_format);
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<T>(orig_xsize, orig_ysize, input_pixel_format);
+  jxl::CodecInOut original_io = ConvertTestImage(
+      original_bytes, orig_xsize, orig_ysize, input_pixel_format, {});
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, use_container));
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &input_pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = lossless;
+  uint32_t num_channels = input_pixel_format.num_channels;
+  size_t has_interleaved_alpha = num_channels == 2 || num_channels == 4;
+  JxlPixelFormat output_pixel_format_with_extra_channel_alpha =
+      output_pixel_format;
+
+  // In the case where we have an alpha channel, but it is provided as an extra
+  // channel and not interleaved, we do two things here:
+  // 1. modify the original_io to have the correct alpha channel
+  // 2. change the output_format_with_extra_alpha to have an alpha channel
+  bool alpha_in_extra_channels_vector = false;
+  for (const auto& extra_channel : extra_channels) {
+    if (extra_channel.first == JXL_CHANNEL_ALPHA) {
+      alpha_in_extra_channels_vector = true;
+    }
+  }
+  if (alpha_in_extra_channels_vector && !has_interleaved_alpha) {
+    jxl::ImageF alpha_channel(xsize, ysize);
+    EXPECT_TRUE(jxl::ConvertFromExternal(
+        jxl::Span<const uint8_t>(extra_channel_bytes.data(),
+                                 extra_channel_bytes.size()),
+        xsize, ysize, basic_info.bits_per_sample, extra_channel_pixel_format, 0,
+        /*pool=*/nullptr, &alpha_channel));
+
+    original_io.metadata.m.SetAlphaBits(basic_info.bits_per_sample);
+    original_io.Main().SetAlpha(std::move(alpha_channel));
+    output_pixel_format_with_extra_channel_alpha.num_channels++;
+  }
+  // Those are the num_extra_channels including a potential alpha channel.
+  basic_info.num_extra_channels = extra_channels.size() + has_interleaved_alpha;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  EXPECT_EQ(enc->metadata.m.num_extra_channels,
+            extra_channels.size() + has_interleaved_alpha);
+  JxlColorEncoding color_encoding;
+  if (input_pixel_format.data_type == JXL_TYPE_FLOAT) {
+    JxlColorEncodingSetToLinearSRGB(
+        &color_encoding,
+        /*is_gray=*/input_pixel_format.num_channels < 3);
+  } else {
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/input_pixel_format.num_channels < 3);
+  }
+
+  std::vector<JxlExtraChannelInfo> channel_infos;
+  for (const auto& extra_channel : extra_channels) {
+    auto channel_type = extra_channel.first;
+    JxlExtraChannelInfo channel_info;
+    JxlEncoderInitExtraChannelInfo(channel_type, &channel_info);
+    channel_info.bits_per_sample = (lossless ? basic_info.bits_per_sample : 8);
+    channel_info.exponent_bits_per_sample =
+        (lossless ? basic_info.exponent_bits_per_sample : 0);
+    channel_infos.push_back(channel_info);
+  }
+  for (size_t index = 0; index < channel_infos.size(); index++) {
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderSetExtraChannelInfo(enc, index + has_interleaved_alpha,
+                                            &channel_infos[index]));
+    std::string name = extra_channels[index].second;
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderSetExtraChannelName(enc, index + has_interleaved_alpha,
+                                            name.c_str(), name.length()));
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc, nullptr);
+  JxlEncoderSetFrameLossless(frame_settings, lossless);
+  if (resampling > 1) {
+    EXPECT_EQ(
+        JXL_ENC_SUCCESS,
+        JxlEncoderFrameSettingsSetOption(
+            frame_settings, JXL_ENC_FRAME_SETTING_RESAMPLING, resampling));
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderFrameSettingsSetOption(
+                  frame_settings, JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED,
+                  already_downsampled));
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &input_pixel_format,
+                                    (void*)original_bytes.data(),
+                                    original_bytes.size()));
+  EXPECT_EQ(frame_settings->enc->input_queue.back()
+                .frame->frame.extra_channels()
+                .size(),
+            has_interleaved_alpha + extra_channels.size());
+  EXPECT_EQ(frame_settings->enc->input_queue.empty(), false);
+  for (size_t index = 0; index < channel_infos.size(); index++) {
+    EXPECT_EQ(JXL_ENC_SUCCESS,
+              JxlEncoderSetExtraChannelBuffer(
+                  frame_settings, &extra_channel_pixel_format,
+                  (void*)extra_channel_bytes.data(), extra_channel_bytes.size(),
+                  index + has_interleaved_alpha));
+  }
+  JxlEncoderCloseInput(enc);
+  EXPECT_EQ(frame_settings->enc->input_queue.back()
+                .frame->frame.extra_channels()
+                .size(),
+            has_interleaved_alpha + extra_channels.size());
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderImageOutBufferSize(
+          dec, &output_pixel_format_with_extra_channel_alpha, &buffer_size));
+  if (&input_pixel_format == &output_pixel_format_with_extra_channel_alpha &&
+      !already_downsampled) {
+    EXPECT_EQ(buffer_size, original_bytes.size());
+  }
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+  EXPECT_EQ(extra_channels.size() + has_interleaved_alpha,
+            info.num_extra_channels);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t icc_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &icc_profile_size));
+  jxl::PaddedBytes icc_profile(icc_profile_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                 dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                 icc_profile.data(), icc_profile.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetImageOutBuffer(
+                dec, &output_pixel_format_with_extra_channel_alpha,
+                decoded_bytes.data(), decoded_bytes.size()));
+  std::vector<std::vector<uint8_t>> extra_channel_decoded_bytes(
+      info.num_extra_channels - has_interleaved_alpha);
+
+  for (size_t index = has_interleaved_alpha; index < info.num_extra_channels;
+       index++) {
+    JxlExtraChannelInfo channel_info;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetExtraChannelInfo(dec, index, &channel_info));
+    EXPECT_EQ(channel_info.type,
+              extra_channels[index - has_interleaved_alpha].first);
+    std::string input_name =
+        extra_channels[index - has_interleaved_alpha].second;
+    const size_t name_length = channel_info.name_length;
+    EXPECT_EQ(input_name.size(), name_length);
+    std::vector<char> output_name(name_length + 1);
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetExtraChannelName(dec, index, output_name.data(),
+                                            output_name.size()));
+    EXPECT_EQ(0,
+              memcmp(input_name.data(), output_name.data(), input_name.size()));
+    size_t extra_buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderExtraChannelBufferSize(dec, &output_pixel_format,
+                                               &extra_buffer_size, index));
+    std::vector<uint8_t> extra_decoded_bytes(extra_buffer_size);
+    extra_channel_decoded_bytes[index - has_interleaved_alpha] =
+        std::move(extra_decoded_bytes);
+    EXPECT_EQ(
+        JXL_DEC_SUCCESS,
+        JxlDecoderSetExtraChannelBuffer(
+            dec, &output_pixel_format,
+            extra_channel_decoded_bytes[index - has_interleaved_alpha].data(),
+            extra_channel_decoded_bytes[index - has_interleaved_alpha].size(),
+            index));
+  }
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+  // Check if there are no further errors after getting the full image, e.g.
+  // check that the final codestream box is actually marked as last.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+
+  jxl::CodecInOut decoded_io = ConvertTestImage(
+      decoded_bytes, xsize, ysize, output_pixel_format_with_extra_channel_alpha,
+      icc_profile);
+
+  if (already_downsampled) {
+    jxl::Image3F* color = decoded_io.Main().color();
+    jxl::DownsampleImage(color, resampling);
+    if (decoded_io.Main().HasAlpha()) {
+      jxl::ImageF* alpha = decoded_io.Main().alpha();
+      jxl::DownsampleImage(alpha, resampling);
+    }
+    decoded_io.SetSize(color->xsize(), color->ysize());
+  }
+
+  if (lossless && !already_downsampled) {
+    JXL_EXPECT_OK(jxl::SamePixels(*original_io.Main().color(),
+                                  *decoded_io.Main().color(), _));
+  } else {
+    jxl::ButteraugliParams ba;
+    float butteraugli_score = ButteraugliDistance(
+        original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(),
+        /*distmap=*/nullptr, nullptr);
+    EXPECT_LE(butteraugli_score, 2.0f);
+  }
+  JxlPixelFormat extra_channel_output_pixel_format = output_pixel_format;
+  extra_channel_output_pixel_format.num_channels = 1;
+  for (auto& extra_channel : extra_channel_decoded_bytes) {
+    EXPECT_EQ(extra_channel.size(), extra_channel_bytes.size());
+    if (lossless) {
+      EXPECT_EQ(jxl::test::ComparePixels(extra_channel.data(),
+                                         extra_channel_bytes.data(), xsize,
+                                         ysize, extra_channel_pixel_format,
+                                         extra_channel_output_pixel_format),
+                0u);
+      EXPECT_EQ(extra_channel, extra_channel_bytes);
+    }
+  }
+}
+
+}  // namespace
+
+TEST(RoundtripTest, FloatFrameRoundtripTest) {
+  std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+      extra_channels_cases = {{},
+                              {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+                              {{JXL_CHANNEL_CFA, "my cfa channel"}},
+                              {{JXL_CHANNEL_DEPTH, "depth"},
+                               {JXL_CHANNEL_SELECTION_MASK, "mask"},
+                               {JXL_CHANNEL_BLACK, "black"},
+                               {JXL_CHANNEL_CFA, "my cfa channel"},
+                               {JXL_CHANNEL_OPTIONAL, "optional channel"}},
+                              {{JXL_CHANNEL_DEPTH, "very deep"}}};
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        for (auto& extra_channels : extra_channels_cases) {
+          uint32_t has_alpha = static_cast<uint32_t>(num_channels % 2 == 0);
+          uint32_t total_extra_channels = has_alpha + extra_channels.size();
+          // There's no support (yet) for lossless extra float
+          // channels, so we don't test it.
+          if (total_extra_channels == 0 || !lossless) {
+            JxlPixelFormat pixel_format = JxlPixelFormat{
+                num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+            VerifyRoundtripCompression<float>(
+                63, 129, pixel_format, pixel_format, (bool)lossless,
+                (bool)use_container, 1, false, extra_channels);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, Uint16FrameRoundtripTest) {
+  std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+      extra_channels_cases = {{},
+                              {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+                              {{JXL_CHANNEL_CFA, "my cfa channel"}},
+                              {{JXL_CHANNEL_CFA, "my cfa channel"},
+                               {JXL_CHANNEL_BLACK, "k_channel"}},
+                              {{JXL_CHANNEL_DEPTH, "very deep"}}};
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        for (auto& extra_channels : extra_channels_cases) {
+          JxlPixelFormat pixel_format = JxlPixelFormat{
+              num_channels, JXL_TYPE_UINT16, JXL_NATIVE_ENDIAN, 0};
+          VerifyRoundtripCompression<uint16_t>(
+              63, 129, pixel_format, pixel_format, (bool)lossless,
+              (bool)use_container, 1, false, extra_channels);
+        }
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, Uint8FrameRoundtripTest) {
+  std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+      extra_channels_cases = {{},
+                              {{JXL_CHANNEL_THERMAL, "temperature"}},
+                              {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+                              {{JXL_CHANNEL_CFA, "my cfa channel"}},
+                              {{JXL_CHANNEL_CFA, "my cfa channel"},
+                               {JXL_CHANNEL_BLACK, "k_channel"}},
+                              {{JXL_CHANNEL_DEPTH, "very deep"}}};
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        for (auto& extra_channels : extra_channels_cases) {
+          JxlPixelFormat pixel_format = JxlPixelFormat{
+              num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+          VerifyRoundtripCompression<uint8_t>(
+              63, 129, pixel_format, pixel_format, (bool)lossless,
+              (bool)use_container, 1, false, extra_channels);
+        }
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, TestNonlinearSrgbAsXybEncoded) {
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+      JxlPixelFormat pixel_format_in =
+          JxlPixelFormat{num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+      JxlPixelFormat pixel_format_out =
+          JxlPixelFormat{num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+      VerifyRoundtripCompression<uint8_t>(
+          63, 129, pixel_format_in, pixel_format_out,
+          /*lossless=*/false, (bool)use_container, {});
+    }
+  }
+}
+
+TEST(RoundtripTest, Resampling) {
+  JxlPixelFormat pixel_format =
+      JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+  VerifyRoundtripCompression<uint8_t>(63, 129, pixel_format, pixel_format,
+                                      /*lossless=*/false,
+                                      /*use_container=*/false, 2,
+                                      /*already_downsampled=*/false);
+
+  // TODO(lode): also make this work for odd sizes. This requires a fix in
+  // enc_frame.cc to not set custom_size_or_origin to true due to even/odd
+  // mismatch.
+  VerifyRoundtripCompression<uint8_t>(64, 128, pixel_format, pixel_format,
+                                      /*lossless=*/true,
+                                      /*use_container=*/false, 2,
+                                      /*already_downsampled=*/true);
+}
+
+TEST(RoundtripTest, ExtraBoxesTest) {
+  JxlPixelFormat pixel_format =
+      JxlPixelFormat{4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+  const size_t xsize = 61;
+  const size_t ysize = 71;
+
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<float>(xsize, ysize, pixel_format);
+  jxl::CodecInOut original_io =
+      ConvertTestImage(original_bytes, xsize, ysize, pixel_format, {});
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, true));
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  JxlColorEncoding color_encoding;
+  if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+    JxlColorEncodingSetToLinearSRGB(&color_encoding,
+                                    /*is_gray=*/pixel_format.num_channels < 3);
+  } else {
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/pixel_format.num_channels < 3);
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc, nullptr);
+  JxlEncoderSetFrameLossless(frame_settings, false);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                    (void*)original_bytes.data(),
+                                    original_bytes.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  std::vector<uint8_t> extra_data(1023);
+  jxl::AppendBoxHeader(jxl::MakeBoxType("crud"), extra_data.size(), false,
+                       &compressed);
+  compressed.insert(compressed.end(), extra_data.begin(), extra_data.end());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &pixel_format, &buffer_size));
+  EXPECT_EQ(buffer_size, original_bytes.size());
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t icc_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &icc_profile_size));
+  jxl::PaddedBytes icc_profile(icc_profile_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                 dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                 icc_profile.data(), icc_profile.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(dec, &pixel_format,
+                                                         decoded_bytes.data(),
+                                                         decoded_bytes.size()));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+
+  jxl::CodecInOut decoded_io =
+      ConvertTestImage(decoded_bytes, xsize, ysize, pixel_format, icc_profile);
+
+  jxl::ButteraugliParams ba;
+  float butteraugli_score = ButteraugliDistance(
+      original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(),
+      /*distmap=*/nullptr, nullptr);
+  EXPECT_LE(butteraugli_score, 2.0f);
+}
+
+static const unsigned char kEncodedTestProfile[] = {
+    0x1f, 0x8b, 0x1,  0x13, 0x10, 0x0,  0x0,  0x0,  0x20, 0x4c, 0xcc, 0x3,
+    0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26,
+    0x57, 0x54, 0xef, 0x0,  0xe8, 0x97, 0x2,  0xce, 0xa1, 0xd7, 0x85, 0x16,
+    0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c,
+    0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4,  0x75, 0x12, 0xc9, 0xcc,
+    0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae,
+    0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd,  0x40,
+    0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7,  0xa6, 0xb9,
+    0x27, 0x92, 0x38, 0x0,  0x3,  0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf,
+    0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66,
+    0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4,
+    0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd,
+    0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f,
+    0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93,
+    0x92, 0x3,  0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7,
+    0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7,  0x38, 0x38, 0xd4, 0xa,  0x66,
+    0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb,  0x7a, 0x24,
+    0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3,
+    0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8,  0xa8, 0xc4, 0x2a, 0x86,
+    0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6,
+    0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9,
+    0x8,  0x98, 0xe1, 0x21, 0x4a, 0x9,  0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0,
+    0x69, 0x1a, 0xeb, 0x52, 0x1,  0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29,
+    0x70, 0xee, 0x4,  0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff,
+    0xfe, 0xdb, 0xaf, 0x8,  0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8,
+    0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8,  0xe9, 0x7,  0xee, 0x4b, 0x80, 0xda,
+    0x4a, 0x4,  0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60,
+    0xb,  0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6,  0x20, 0xb8, 0x64, 0x18,
+    0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b,
+    0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9,  0xf7,
+    0x72, 0xf0, 0x7a, 0xe,  0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc,
+    0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28,
+    0xc1, 0xa7, 0x59, 0xe3, 0x0,
+};
+
+TEST(RoundtripTest, TestICCProfile) {
+  // JxlEncoderSetICCProfile parses the ICC profile, so a valid profile is
+  // needed. The profile should be passed correctly through the roundtrip.
+  jxl::BitReader reader(jxl::Span<const uint8_t>(kEncodedTestProfile,
+                                                 sizeof(kEncodedTestProfile)));
+  jxl::PaddedBytes icc;
+  ASSERT_TRUE(ReadICC(&reader, &icc));
+  ASSERT_TRUE(reader.Close());
+
+  JxlPixelFormat format =
+      JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+
+  size_t xsize = 25;
+  size_t ysize = 37;
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<uint8_t>(xsize, ysize, format);
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = JXL_TRUE;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetICCProfile(enc, icc.data(), icc.size()));
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc, nullptr);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(frame_settings, &format,
+                                    (void*)original_bytes.data(),
+                                    original_bytes.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(buffer_size, original_bytes.size());
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t dec_icc_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        &dec_icc_size));
+  EXPECT_EQ(icc.size(), dec_icc_size);
+  jxl::PaddedBytes dec_icc(dec_icc_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                 dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                 dec_icc.data(), dec_icc.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetImageOutBuffer(dec, &format, decoded_bytes.data(),
+                                        decoded_bytes.size()));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(icc, dec_icc);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(RoundtripTest, JXL_TRANSCODE_JPEG_TEST(TestJPEGReconstruction)) {
+  TEST_LIBJPEG_SUPPORT();
+  const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io, /*pool=*/nullptr));
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  JxlEncoderFrameSettings* frame_settings =
+      JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc.get(), &compressed);
+
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+  EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+  std::vector<uint8_t> reconstructed_buffer(128);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+                                    reconstructed_buffer.size()));
+  size_t used = 0;
+  JxlDecoderStatus dec_process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+  while (dec_process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+    used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+    reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+    EXPECT_EQ(
+        JXL_DEC_SUCCESS,
+        JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+                                reconstructed_buffer.size() - used));
+    dec_process_result = JxlDecoderProcessInput(dec.get());
+  }
+  ASSERT_EQ(JXL_DEC_FULL_IMAGE, dec_process_result);
+  used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+  ASSERT_EQ(used, orig.size());
+  EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), orig.data(), used));
+}
diff --git a/third-party/libjxl/libjxl/lib/jxl/sanitizers.h b/third-party/libjxl/libjxl/lib/jxl/sanitizers.h
new file mode 100644
index 0000000000..ce0bd8dc63
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/sanitizers.h
@@ -0,0 +1,242 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SANITIZERS_H_
+#define LIB_JXL_SANITIZERS_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/sanitizer_definitions.h"
+#include "lib/jxl/image.h"
+
+#if JXL_MEMORY_SANITIZER
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "sanitizer/msan_interface.h"
+#endif
+
+namespace jxl {
+namespace msan {
+
+#if JXL_MEMORY_SANITIZER
+
+// Chosen so that kSanitizerSentinel is four copies of kSanitizerSentinelByte.
+constexpr uint8_t kSanitizerSentinelByte = 0x48;
+constexpr float kSanitizerSentinel = 205089.125f;
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const volatile void* m,
+                                                     size_t size) {
+  __msan_poison(m, size);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const volatile void* m,
+                                                       size_t size) {
+  __msan_unpoison(m, size);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char* c) {
+  do {
+    UnpoisonMemory(c, 1);
+  } while (*c++);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized(
+    const volatile void* m, size_t size) {
+  __msan_check_mem_is_initialized(m, size);
+}
+
+// Mark all the bytes of an image (including padding) as poisoned bytes.
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {
+  PoisonMemory(im.bytes(), im.bytes_per_row() * im.ysize());
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Image3<T>& im) {
+  PoisonImage(im.Plane(0));
+  PoisonImage(im.Plane(1));
+  PoisonImage(im.Plane(2));
+}
+
+// Print the uninitialized regions of an image.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PrintImageUninitialized(
+    const Plane<T>& im) {
+  fprintf(stderr,
+          "Uninitialized regions for image of size %" PRIu64 "x%" PRIu64 ":\n",
+          static_cast<uint64_t>(im.xsize()), static_cast<uint64_t>(im.ysize()));
+
+  // A segment of uninitialized pixels in a row, in the format [first, second).
+  typedef std::pair<size_t, size_t> PixelSegment;
+
+  // Helper class to merge and print a list of rows of PixelSegment that may be
+  // the same over big ranges of rows. This compacts the output to ranges of
+  // rows like "[y0, y1): [x0, x1) [x2, x3)".
+  class RowsMerger {
+   public:
+    // Add a new row the list of rows. If the row is the same as the previous
+    // one it will be merged showing a range of rows [y0, y1), but if the new
+    // row is different the current range of rows (if any) will be printed and a
+    // new one will be started.
+    void AddRow(size_t y, std::vector<PixelSegment>&& new_row) {
+      if (start_y_ != -1 && new_row != segments_) {
+        PrintRow(y);
+      }
+      if (new_row.empty()) {
+        // Skip ranges with no uninitialized pixels.
+        start_y_ = -1;
+        segments_.clear();
+        return;
+      }
+      if (start_y_ == -1) {
+        start_y_ = y;
+        segments_ = std::move(new_row);
+      }
+    }
+
+    // Print the contents of the range of rows [start_y_, end_y) if any.
+    void PrintRow(size_t end_y) {
+      if (start_y_ == -1) return;
+      if (segments_.empty()) {
+        start_y_ = -1;
+        return;
+      }
+      if (end_y - start_y_ > 1) {
+        fprintf(stderr, " y=[%" PRId64 ", %" PRIu64 "):",
+                static_cast<int64_t>(start_y_), static_cast<uint64_t>(end_y));
+      } else {
+        fprintf(stderr, " y=[%" PRId64 "]:", static_cast<int64_t>(start_y_));
+      }
+      for (const auto& seg : segments_) {
+        if (seg.first + 1 == seg.second) {
+          fprintf(stderr, " [%" PRId64 "]", static_cast<int64_t>(seg.first));
+        } else {
+          fprintf(stderr, " [%" PRId64 ", %" PRIu64 ")",
+                  static_cast<int64_t>(seg.first),
+                  static_cast<uint64_t>(seg.second));
+        }
+      }
+      fprintf(stderr, "\n");
+      start_y_ = -1;
+    }
+
+   private:
+    std::vector<PixelSegment> segments_;
+    // Row number of the first row in the range of rows that have |segments| as
+    // the undefined segments.
+    ssize_t start_y_ = -1;
+  } rows_merger;
+
+  class SegmentsMerger {
+   public:
+    void AddValue(size_t x) {
+      if (row.empty() || row.back().second != x) {
+        row.emplace_back(x, x + 1);
+      } else {
+        row.back().second = x + 1;
+      }
+    }
+
+    std::vector<PixelSegment> row;
+  };
+
+  for (size_t y = 0; y < im.ysize(); y++) {
+    auto* row = im.Row(y);
+    SegmentsMerger seg_merger;
+    size_t x = 0;
+    while (x < im.xsize()) {
+      intptr_t ret =
+          __msan_test_shadow(row + x, (im.xsize() - x) * sizeof(row[0]));
+      if (ret < 0) break;
+      size_t next_x = x + ret / sizeof(row[0]);
+      seg_merger.AddValue(next_x);
+      x = next_x + 1;
+    }
+    rows_merger.AddRow(y, std::move(seg_merger.row));
+  }
+  rows_merger.PrintRow(im.ysize());
+}
+
+// Check that all the pixels in the provided rect of the image are initialized
+// (not poisoned). If any of the values is poisoned it will abort.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+    const Plane<T>& im, const Rect& r, size_t c, const char* message) {
+  JXL_ASSERT(r.x0() <= im.xsize());
+  JXL_ASSERT(r.x0() + r.xsize() <= im.xsize());
+  JXL_ASSERT(r.y0() <= im.ysize());
+  JXL_ASSERT(r.y0() + r.ysize() <= im.ysize());
+  for (size_t y = r.y0(); y < r.y0() + r.ysize(); y++) {
+    const auto* row = im.Row(y);
+    intptr_t ret = __msan_test_shadow(row + r.x0(), sizeof(*row) * r.xsize());
+    if (ret != -1) {
+      JXL_DEBUG(
+          1,
+          "Checking an image of %" PRIu64 " x %" PRIu64 ", rect x0=%" PRIu64
+          ", y0=%" PRIu64
+          ", "
+          "xsize=%" PRIu64 ", ysize=%" PRIu64,
+          static_cast<uint64_t>(im.xsize()), static_cast<uint64_t>(im.ysize()),
+          static_cast<uint64_t>(r.x0()), static_cast<uint64_t>(r.y0()),
+          static_cast<uint64_t>(r.xsize()), static_cast<uint64_t>(r.ysize()));
+      size_t x = ret / sizeof(*row);
+      JXL_DEBUG(1,
+                "CheckImageInitialized failed at x=%" PRIu64 ", y=%" PRIu64
+                ", c=%" PRIu64 ": %s",
+                static_cast<uint64_t>(r.x0() + x), static_cast<uint64_t>(y),
+                static_cast<uint64_t>(c), message ? message : "");
+      PrintImageUninitialized(im);
+    }
+    // This will report an error if memory is not initialized.
+    __msan_check_mem_is_initialized(row + r.x0(), sizeof(*row) * r.xsize());
+  }
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+    const Image3<T>& im, const Rect& r, const char* message) {
+  for (size_t c = 0; c < 3; c++) {
+    std::string str_message(message);
+    str_message += " c=" + std::to_string(c);
+    CheckImageInitialized(im.Plane(c), r, c, str_message.c_str());
+  }
+}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r) \
+  ::jxl::msan::CheckImageInitialized(im, r, "im=" #im ", r=" #r);
+
+#define JXL_CHECK_PLANE_INITIALIZED(im, r, c) \
+  ::jxl::msan::CheckImageInitialized(im, r, c, "im=" #im ", r=" #r ", c=" #c);
+
+#else  // JXL_MEMORY_SANITIZER
+
+// In non-msan mode these functions don't use volatile since it is not needed
+// for the empty functions.
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const void*, size_t) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const void*, size_t) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char*) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized(const void*,
+                                                            size_t) {}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Plane<T>& im) {}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r)
+#define JXL_CHECK_PLANE_INITIALIZED(im, r, c)
+
+#endif
+
+}  // namespace msan
+}  // namespace jxl
+
+#endif  // LIB_JXL_SANITIZERS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h b/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h
new file mode 100644
index 0000000000..77b207ffe8
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h
@@ -0,0 +1,349 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Misc utilities for SIMD operations
+
+#if defined(LIB_JXL_SIMD_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_SIMD_UTIL_INL_H_
+#undef LIB_JXL_SIMD_UTIL_INL_H_
+#else
+#define LIB_JXL_SIMD_UTIL_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+#if HWY_CAP_GE512
+using hwy::HWY_NAMESPACE::Half;
+using hwy::HWY_NAMESPACE::Vec;
+template <size_t i, class DF, class V>
+HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
+  using HF = Half<DF>;
+  using HHF = Half<HF>;
+  auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
+  return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
+}
+
+template <class DF, class V>
+HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
+  using HF = Half<DF>;
+  return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
+}
+
+#endif
+
+// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
+// aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
+  static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+  Store(v0, df, mem);
+  Store(v1, df, mem + 1);
+#elif !HWY_CAP_GE256
+  Store(InterleaveLower(df, v0, v1), df, mem);
+  Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
+#else
+  if (!HWY_CAP_GE512 || Lanes(df) == 8) {
+    auto t0 = InterleaveLower(df, v0, v1);
+    auto t1 = InterleaveUpper(df, v0, v1);
+    Store(ConcatLowerLower(df, t1, t0), df, mem);
+    Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
+  } else {
+#if HWY_CAP_GE512
+    auto t0 = InterleaveLower(df, v0, v1);
+    auto t1 = InterleaveUpper(df, v0, v1);
+    Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
+                  Quarter<1>(df, t0), Quarter<1>(df, t1)),
+          df, mem);
+    Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
+                  Quarter<3>(df, t0), Quarter<3>(df, t1)),
+          df, mem + Lanes(df));
+#endif
+  }
+#endif
+}
+
+// Stores v0[0], v1[0], v2[0], v3[0], v0[1] ... to mem, in this order. Mem must
+// be aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, T* mem) {
+  static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+  Store(v0, df, mem);
+  Store(v1, df, mem + 1);
+  Store(v2, df, mem + 2);
+  Store(v3, df, mem + 3);
+#elif !HWY_CAP_GE256
+  auto t0 = InterleaveLower(df, v0, v2);
+  auto t1 = InterleaveLower(df, v1, v3);
+  auto t2 = InterleaveUpper(df, v0, v2);
+  auto t3 = InterleaveUpper(df, v1, v3);
+  Store(InterleaveLower(df, t0, t1), df, mem);
+  Store(InterleaveUpper(df, t0, t1), df, mem + Lanes(df));
+  Store(InterleaveLower(df, t2, t3), df, mem + 2 * Lanes(df));
+  Store(InterleaveUpper(df, t2, t3), df, mem + 3 * Lanes(df));
+#elif !HWY_CAP_GE512
+  auto t0 = InterleaveLower(df, v0, v2);
+  auto t1 = InterleaveLower(df, v1, v3);
+  auto t2 = InterleaveUpper(df, v0, v2);
+  auto t3 = InterleaveUpper(df, v1, v3);
+
+  auto m0 = InterleaveLower(df, t0, t1);
+  auto m1 = InterleaveUpper(df, t0, t1);
+  auto m2 = InterleaveLower(df, t2, t3);
+  auto m3 = InterleaveUpper(df, t2, t3);
+
+  Store(ConcatLowerLower(df, m1, m0), df, mem);
+  Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df));
+  Store(ConcatUpperUpper(df, m1, m0), df, mem + 2 * Lanes(df));
+  Store(ConcatUpperUpper(df, m3, m2), df, mem + 3 * Lanes(df));
+#else
+  auto t0 = InterleaveLower(df, v0, v2);
+  auto t1 = InterleaveLower(df, v1, v3);
+  auto t2 = InterleaveUpper(df, v0, v2);
+  auto t3 = InterleaveUpper(df, v1, v3);
+
+  auto m0 = InterleaveLower(df, t0, t1);
+  auto m1 = InterleaveUpper(df, t0, t1);
+  auto m2 = InterleaveLower(df, t2, t3);
+  auto m3 = InterleaveUpper(df, t2, t3);
+
+  Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2),
+                Quarter<0>(df, m3)),
+        df, mem);
+  Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2),
+                Quarter<1>(df, m3)),
+        df, mem + Lanes(df));
+  Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2),
+                Quarter<2>(df, m3)),
+        df, mem + 2 * Lanes(df));
+  Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2),
+                Quarter<3>(df, m3)),
+        df, mem + 3 * Lanes(df));
+#endif
+}
+
+// Stores v0[0], v1[0], v2[0], v3[0], v4[0], v5[0], v6[0], v7[0], v0[1] ... to
+// mem, in this order. Mem must be aligned.
+template <class DF, class V>
+void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, V v4, V v5, V v6,
+                      V v7, float* mem) {
+#if HWY_TARGET == HWY_SCALAR
+  Store(v0, df, mem);
+  Store(v1, df, mem + 1);
+  Store(v2, df, mem + 2);
+  Store(v3, df, mem + 3);
+  Store(v4, df, mem + 4);
+  Store(v5, df, mem + 5);
+  Store(v6, df, mem + 6);
+  Store(v7, df, mem + 7);
+#elif !HWY_CAP_GE256
+  auto t0 = InterleaveLower(df, v0, v4);
+  auto t1 = InterleaveLower(df, v1, v5);
+  auto t2 = InterleaveLower(df, v2, v6);
+  auto t3 = InterleaveLower(df, v3, v7);
+  auto t4 = InterleaveUpper(df, v0, v4);
+  auto t5 = InterleaveUpper(df, v1, v5);
+  auto t6 = InterleaveUpper(df, v2, v6);
+  auto t7 = InterleaveUpper(df, v3, v7);
+
+  auto w0 = InterleaveLower(df, t0, t2);
+  auto w1 = InterleaveLower(df, t1, t3);
+  auto w2 = InterleaveUpper(df, t0, t2);
+  auto w3 = InterleaveUpper(df, t1, t3);
+  auto w4 = InterleaveLower(df, t4, t6);
+  auto w5 = InterleaveLower(df, t5, t7);
+  auto w6 = InterleaveUpper(df, t4, t6);
+  auto w7 = InterleaveUpper(df, t5, t7);
+
+  Store(InterleaveLower(df, w0, w1), df, mem);
+  Store(InterleaveUpper(df, w0, w1), df, mem + Lanes(df));
+  Store(InterleaveLower(df, w2, w3), df, mem + 2 * Lanes(df));
+  Store(InterleaveUpper(df, w2, w3), df, mem + 3 * Lanes(df));
+  Store(InterleaveLower(df, w4, w5), df, mem + 4 * Lanes(df));
+  Store(InterleaveUpper(df, w4, w5), df, mem + 5 * Lanes(df));
+  Store(InterleaveLower(df, w6, w7), df, mem + 6 * Lanes(df));
+  Store(InterleaveUpper(df, w6, w7), df, mem + 7 * Lanes(df));
+#elif !HWY_CAP_GE512
+  auto t0 = InterleaveLower(df, v0, v4);
+  auto t1 = InterleaveLower(df, v1, v5);
+  auto t2 = InterleaveLower(df, v2, v6);
+  auto t3 = InterleaveLower(df, v3, v7);
+  auto t4 = InterleaveUpper(df, v0, v4);
+  auto t5 = InterleaveUpper(df, v1, v5);
+  auto t6 = InterleaveUpper(df, v2, v6);
+  auto t7 = InterleaveUpper(df, v3, v7);
+
+  auto w0 = InterleaveLower(df, t0, t2);
+  auto w1 = InterleaveLower(df, t1, t3);
+  auto w2 = InterleaveUpper(df, t0, t2);
+  auto w3 = InterleaveUpper(df, t1, t3);
+  auto w4 = InterleaveLower(df, t4, t6);
+  auto w5 = InterleaveLower(df, t5, t7);
+  auto w6 = InterleaveUpper(df, t4, t6);
+  auto w7 = InterleaveUpper(df, t5, t7);
+
+  auto m0 = InterleaveLower(df, w0, w1);
+  auto m1 = InterleaveUpper(df, w0, w1);
+  auto m2 = InterleaveLower(df, w2, w3);
+  auto m3 = InterleaveUpper(df, w2, w3);
+  auto m4 = InterleaveLower(df, w4, w5);
+  auto m5 = InterleaveUpper(df, w4, w5);
+  auto m6 = InterleaveLower(df, w6, w7);
+  auto m7 = InterleaveUpper(df, w6, w7);
+
+  Store(ConcatLowerLower(df, m1, m0), df, mem);
+  Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df));
+  Store(ConcatLowerLower(df, m5, m4), df, mem + 2 * Lanes(df));
+  Store(ConcatLowerLower(df, m7, m6), df, mem + 3 * Lanes(df));
+  Store(ConcatUpperUpper(df, m1, m0), df, mem + 4 * Lanes(df));
+  Store(ConcatUpperUpper(df, m3, m2), df, mem + 5 * Lanes(df));
+  Store(ConcatUpperUpper(df, m5, m4), df, mem + 6 * Lanes(df));
+  Store(ConcatUpperUpper(df, m7, m6), df, mem + 7 * Lanes(df));
+#else
+  auto t0 = InterleaveLower(df, v0, v4);
+  auto t1 = InterleaveLower(df, v1, v5);
+  auto t2 = InterleaveLower(df, v2, v6);
+  auto t3 = InterleaveLower(df, v3, v7);
+  auto t4 = InterleaveUpper(df, v0, v4);
+  auto t5 = InterleaveUpper(df, v1, v5);
+  auto t6 = InterleaveUpper(df, v2, v6);
+  auto t7 = InterleaveUpper(df, v3, v7);
+
+  auto w0 = InterleaveLower(df, t0, t2);
+  auto w1 = InterleaveLower(df, t1, t3);
+  auto w2 = InterleaveUpper(df, t0, t2);
+  auto w3 = InterleaveUpper(df, t1, t3);
+  auto w4 = InterleaveLower(df, t4, t6);
+  auto w5 = InterleaveLower(df, t5, t7);
+  auto w6 = InterleaveUpper(df, t4, t6);
+  auto w7 = InterleaveUpper(df, t5, t7);
+
+  auto m0 = InterleaveLower(df, w0, w1);
+  auto m1 = InterleaveUpper(df, w0, w1);
+  auto m2 = InterleaveLower(df, w2, w3);
+  auto m3 = InterleaveUpper(df, w2, w3);
+  auto m4 = InterleaveLower(df, w4, w5);
+  auto m5 = InterleaveUpper(df, w4, w5);
+  auto m6 = InterleaveLower(df, w6, w7);
+  auto m7 = InterleaveUpper(df, w6, w7);
+
+  Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2),
+                Quarter<0>(df, m3)),
+        df, mem);
+  Store(Concat4(df, Quarter<0>(df, m4), Quarter<0>(df, m5), Quarter<0>(df, m6),
+                Quarter<0>(df, m7)),
+        df, mem + Lanes(df));
+  Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2),
+                Quarter<1>(df, m3)),
+        df, mem + 2 * Lanes(df));
+  Store(Concat4(df, Quarter<1>(df, m4), Quarter<1>(df, m5), Quarter<1>(df, m6),
+                Quarter<1>(df, m7)),
+        df, mem + 3 * Lanes(df));
+  Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2),
+                Quarter<2>(df, m3)),
+        df, mem + 4 * Lanes(df));
+  Store(Concat4(df, Quarter<2>(df, m4), Quarter<2>(df, m5), Quarter<2>(df, m6),
+                Quarter<2>(df, m7)),
+        df, mem + 5 * Lanes(df));
+  Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2),
+                Quarter<3>(df, m3)),
+        df, mem + 6 * Lanes(df));
+  Store(Concat4(df, Quarter<3>(df, m4), Quarter<3>(df, m5), Quarter<3>(df, m6),
+                Quarter<3>(df, m7)),
+        df, mem + 7 * Lanes(df));
+#endif
+}
+
+#if HWY_CAP_GE256
+JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from,
+                                  int32_t* JXL_RESTRICT to, size_t fromstride) {
+  const HWY_CAPPED(int32_t, 8) d;
+  auto i0 = Load(d, from);
+  auto i1 = Load(d, from + 1 * fromstride);
+  auto i2 = Load(d, from + 2 * fromstride);
+  auto i3 = Load(d, from + 3 * fromstride);
+  auto i4 = Load(d, from + 4 * fromstride);
+  auto i5 = Load(d, from + 5 * fromstride);
+  auto i6 = Load(d, from + 6 * fromstride);
+  auto i7 = Load(d, from + 7 * fromstride);
+
+  const auto q0 = InterleaveLower(d, i0, i2);
+  const auto q1 = InterleaveLower(d, i1, i3);
+  const auto q2 = InterleaveUpper(d, i0, i2);
+  const auto q3 = InterleaveUpper(d, i1, i3);
+  const auto q4 = InterleaveLower(d, i4, i6);
+  const auto q5 = InterleaveLower(d, i5, i7);
+  const auto q6 = InterleaveUpper(d, i4, i6);
+  const auto q7 = InterleaveUpper(d, i5, i7);
+
+  const auto r0 = InterleaveLower(d, q0, q1);
+  const auto r1 = InterleaveUpper(d, q0, q1);
+  const auto r2 = InterleaveLower(d, q2, q3);
+  const auto r3 = InterleaveUpper(d, q2, q3);
+  const auto r4 = InterleaveLower(d, q4, q5);
+  const auto r5 = InterleaveUpper(d, q4, q5);
+  const auto r6 = InterleaveLower(d, q6, q7);
+  const auto r7 = InterleaveUpper(d, q6, q7);
+
+  i0 = ConcatLowerLower(d, r4, r0);
+  i1 = ConcatLowerLower(d, r5, r1);
+  i2 = ConcatLowerLower(d, r6, r2);
+  i3 = ConcatLowerLower(d, r7, r3);
+  i4 = ConcatUpperUpper(d, r4, r0);
+  i5 = ConcatUpperUpper(d, r5, r1);
+  i6 = ConcatUpperUpper(d, r6, r2);
+  i7 = ConcatUpperUpper(d, r7, r3);
+
+  Store(i0, d, to);
+  Store(i1, d, to + 1 * 8);
+  Store(i2, d, to + 2 * 8);
+  Store(i3, d, to + 3 * 8);
+  Store(i4, d, to + 4 * 8);
+  Store(i5, d, to + 5 * 8);
+  Store(i6, d, to + 6 * 8);
+  Store(i7, d, to + 7 * 8);
+}
+#elif HWY_TARGET != HWY_SCALAR
+JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from,
+                                  int32_t* JXL_RESTRICT to, size_t fromstride) {
+  const HWY_CAPPED(int32_t, 4) d;
+  for (size_t n = 0; n < 8; n += 4) {
+    for (size_t m = 0; m < 8; m += 4) {
+      auto p0 = Load(d, from + n * fromstride + m);
+      auto p1 = Load(d, from + (n + 1) * fromstride + m);
+      auto p2 = Load(d, from + (n + 2) * fromstride + m);
+      auto p3 = Load(d, from + (n + 3) * fromstride + m);
+      const auto q0 = InterleaveLower(d, p0, p2);
+      const auto q1 = InterleaveLower(d, p1, p3);
+      const auto q2 = InterleaveUpper(d, p0, p2);
+      const auto q3 = InterleaveUpper(d, p1, p3);
+
+      const auto r0 = InterleaveLower(d, q0, q1);
+      const auto r1 = InterleaveUpper(d, q0, q1);
+      const auto r2 = InterleaveLower(d, q2, q3);
+      const auto r3 = InterleaveUpper(d, q2, q3);
+      Store(r0, d, to + m * 8 + n);
+      Store(r1, d, to + (1 + m) * 8 + n);
+      Store(r2, d, to + (2 + m) * 8 + n);
+      Store(r3, d, to + (3 + m) * 8 + n);
+    }
+  }
+}
+
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_SIMD_UTIL_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc b/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc
new file mode 100644
index 0000000000..b81f5d1279
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc
@@ -0,0 +1,84 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/simd_util_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+HWY_NOINLINE void TestInterleave2() {
+  HWY_FULL(float) d;
+  auto vec1 = Iota(d, 0 * 128.0);
+  auto vec2 = Iota(d, 1 * 128.0);
+  HWY_ALIGN float mem[MaxLanes(d) * 2];
+  StoreInterleaved(d, vec1, vec2, mem);
+  for (size_t i = 0; i < Lanes(d); i++) {
+    for (size_t j = 0; j < 2; j++) {
+      EXPECT_EQ(mem[2 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+    }
+  }
+}
+HWY_NOINLINE void TestInterleave4() {
+  HWY_FULL(float) d;
+  auto vec1 = Iota(d, 0 * 128.0);
+  auto vec2 = Iota(d, 1 * 128.0);
+  auto vec3 = Iota(d, 2 * 128.0);
+  auto vec4 = Iota(d, 3 * 128.0);
+  HWY_ALIGN float mem[MaxLanes(d) * 4];
+  StoreInterleaved(d, vec1, vec2, vec3, vec4, mem);
+  for (size_t i = 0; i < Lanes(d); i++) {
+    for (size_t j = 0; j < 4; j++) {
+      EXPECT_EQ(mem[4 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+    }
+  }
+}
+HWY_NOINLINE void TestInterleave8() {
+  HWY_FULL(float) d;
+  auto vec1 = Iota(d, 0 * 128.0);
+  auto vec2 = Iota(d, 1 * 128.0);
+  auto vec3 = Iota(d, 2 * 128.0);
+  auto vec4 = Iota(d, 3 * 128.0);
+  auto vec5 = Iota(d, 4 * 128.0);
+  auto vec6 = Iota(d, 5 * 128.0);
+  auto vec7 = Iota(d, 6 * 128.0);
+  auto vec8 = Iota(d, 7 * 128.0);
+  HWY_ALIGN float mem[MaxLanes(d) * 8];
+  StoreInterleaved(d, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, mem);
+  for (size_t i = 0; i < Lanes(d); i++) {
+    for (size_t j = 0; j < 8; j++) {
+      EXPECT_EQ(mem[8 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+    }
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class SimdUtilTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(SimdUtilTargetTest);
+
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave2);
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave4);
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave8);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc b/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc
new file mode 100644
index 0000000000..61d580d2cb
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+struct SpeedTierTestParams {
+  explicit SpeedTierTestParams(const SpeedTier speed_tier,
+                               const bool shrink8 = false)
+      : speed_tier(speed_tier), shrink8(shrink8) {}
+  SpeedTier speed_tier;
+  bool shrink8;
+};
+
+std::ostream& operator<<(std::ostream& os, SpeedTierTestParams params) {
+  auto previous_flags = os.flags();
+  os << std::boolalpha;
+  os << "SpeedTierTestParams{" << static_cast<size_t>(params.speed_tier)
+     << ", /*shrink8=*/" << params.shrink8 << "}";
+  os.flags(previous_flags);
+  return os;
+}
+
+class SpeedTierTest : public testing::TestWithParam<SpeedTierTestParams> {};
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
+    SpeedTierTestInstantiation, SpeedTierTest,
+    testing::Values(SpeedTierTestParams{SpeedTier::kCheetah,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kCheetah,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kThunder,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kThunder,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kLightning,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kLightning,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kFalcon,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kFalcon,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kHare,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kHare,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kWombat,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kWombat,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kSquirrel,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kSquirrel,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kKitten,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kKitten,
+                                        /*shrink8=*/false},
+                    // Only downscaled image for Tortoise mode.
+                    SpeedTierTestParams{SpeedTier::kTortoise,
+                                        /*shrink8=*/true}));
+
+TEST_P(SpeedTierTest, Roundtrip) {
+  const PaddedBytes orig = jxl::test::ReadTestData(
+      "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  test::ThreadPoolForTests pool(8);
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  const SpeedTierTestParams& params = GetParam();
+
+  if (params.shrink8) {
+    io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+  }
+
+  CompressParams cparams;
+  cparams.speed_tier = params.speed_tier;
+  cparams.SetCms(GetJxlCms());
+
+  CodecInOut io2;
+  JXL_EXPECT_OK(test::Roundtrip(&io, cparams, {}, &io2, _));
+
+  // Can be 2.2 in non-hare mode.
+  EXPECT_LE(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(),
+                                GetJxlCms(),
+                                /*distmap=*/nullptr, /*pool=*/nullptr),
+            2.8);
+}
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/splines.cc b/third-party/libjxl/libjxl/lib/jxl/splines.cc
new file mode 100644
index 0000000000..15fd6ce5b5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/splines.cc
@@ -0,0 +1,711 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::MulSub;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Given a set of DCT coefficients, this returns the result of performing cosine
+// interpolation on the original samples.
+float ContinuousIDCT(const float dct[32], const float t) {
+  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
+  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
+  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
+  // treatment of all the coefficients.
+  constexpr float kMultipliers[32] = {
+      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
+      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
+      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
+      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
+      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
+      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
+      kPi / 32 * 30, kPi / 32 * 31,
+  };
+  HWY_CAPPED(float, 32) df;
+  auto result = Zero(df);
+  const auto tandhalf = Set(df, t + 0.5f);
+  for (int i = 0; i < 32; i += Lanes(df)) {
+    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
+    auto cos = FastCosf(df, cos_arg);
+    auto local_res = Mul(LoadU(df, dct + i), cos);
+    result = MulAdd(Set(df, kSqrt2), local_res, result);
+  }
+  return GetLane(SumOfLanes(df, result));
+}
+
+template <typename DF>
+void DrawSegment(DF df, const SplineSegment& segment, const bool add,
+                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
+  Rebind<int32_t, DF> di;
+  const auto inv_sigma = Set(df, segment.inv_sigma);
+  const auto half = Set(df, 0.5f);
+  const auto one_over_2s2 = Set(df, 0.353553391f);
+  const auto sigma_over_4_times_intensity =
+      Set(df, segment.sigma_over_4_times_intensity);
+  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
+  const auto dy = Set(df, y - segment.center_y);
+  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
+  const auto distance = Sqrt(sqd);
+  const auto one_dimensional_factor =
+      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
+          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
+  auto local_intensity =
+      Mul(sigma_over_4_times_intensity,
+          Mul(one_dimensional_factor, one_dimensional_factor));
+  for (size_t c = 0; c < 3; ++c) {
+    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
+    const auto in = LoadU(df, rows[c] + x);
+    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
+  }
+}
+
+void DrawSegment(const SplineSegment& segment, const bool add, const size_t y,
+                 const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
+  ssize_t x =
+      std::max<ssize_t>(x0, segment.center_x - segment.maximum_distance + 0.5f);
+  // one-past-the-end
+  x1 =
+      std::min<ssize_t>(x1, segment.center_x + segment.maximum_distance + 1.5f);
+  HWY_FULL(float) df;
+  for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
+    DrawSegment(df, segment, add, y, x, rows);
+  }
+  for (; x < x1; ++x) {
+    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
+  }
+}
+
+void ComputeSegments(const Spline::Point& center, const float intensity,
+                     const float color[3], const float sigma,
+                     std::vector<SplineSegment>& segments,
+                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
+  // Sanity check sigma, inverse sigma and intensity
+  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
+        std::isfinite(intensity))) {
+    return;
+  }
+#if JXL_HIGH_PRECISION
+  constexpr float kDistanceExp = 5;
+#else
+  // About 30% faster.
+  constexpr float kDistanceExp = 3;
+#endif
+  // We cap from below colors to at least 0.01.
+  float max_color = 0.01f;
+  for (size_t c = 0; c < 3; c++) {
+    max_color = std::max(max_color, std::abs(color[c] * intensity));
+  }
+  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
+  // below 10^-kDistanceExp.
+  const float maximum_distance =
+      std::sqrt(-2 * sigma * sigma *
+                (std::log(0.1) * kDistanceExp - std::log(max_color)));
+  SplineSegment segment;
+  segment.center_y = center.y;
+  segment.center_x = center.x;
+  memcpy(segment.color, color, sizeof(segment.color));
+  segment.inv_sigma = 1.0f / sigma;
+  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
+  segment.maximum_distance = maximum_distance;
+  ssize_t y0 = center.y - maximum_distance + .5f;
+  ssize_t y1 = center.y + maximum_distance + 1.5f;  // one-past-the-end
+  for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
+    segments_by_y.emplace_back(y, segments.size());
+  }
+  segments.push_back(segment);
+}
+
+void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+                  float* JXL_RESTRICT row_b, const Rect& image_rect,
+                  const bool add, const SplineSegment* segments,
+                  const size_t* segment_indices,
+                  const size_t* segment_y_start) {
+  JXL_ASSERT(image_rect.ysize() == 1);
+  float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
+                                 row_y - image_rect.x0(),
+                                 row_b - image_rect.x0()};
+  size_t y = image_rect.y0();
+  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
+    DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
+                image_rect.x0() + image_rect.xsize(), rows);
+  }
+}
+
+void SegmentsFromPoints(
+    const Spline& spline,
+    const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
+    const float arc_length, std::vector<SplineSegment>& segments,
+    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
+  const float inv_arc_length = 1.0f / arc_length;
+  int k = 0;
+  for (const auto& point_to_draw : points_to_draw) {
+    const Spline::Point& point = point_to_draw.first;
+    const float multiplier = point_to_draw.second;
+    const float progress_along_arc =
+        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
+    ++k;
+    float color[3];
+    for (size_t c = 0; c < 3; ++c) {
+      color[c] =
+          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
+    }
+    const float sigma =
+        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
+    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
+  }
+}
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(SegmentsFromPoints);
+HWY_EXPORT(DrawSegments);
+
+namespace {
+
+// It is not in spec, but reasonable limit to avoid overflows.
+template <typename T>
+Status ValidateSplinePointPos(const T& x, const T& y) {
+  constexpr T kSplinePosLimit = 1u << 23;
+  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
+      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
+    return JXL_FAILURE("Spline coordinates out of bounds");
+  }
+  return true;
+}
+
+// Maximum number of spline control points per frame is
+//   std::min(kMaxNumControlPoints, xsize * ysize / 2)
+constexpr size_t kMaxNumControlPoints = 1u << 20u;
+constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
+
+float AdjustedQuant(const int32_t adjustment) {
+  return (adjustment >= 0) ? (1.f + .125f * adjustment)
+                           : 1.f / (1.f - .125f * adjustment);
+}
+
+float InvAdjustedQuant(const int32_t adjustment) {
+  return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment)
+                           : (1.f - .125f * adjustment);
+}
+
+// X, Y, B, sigma.
+static constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
+
+Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
+                               BitReader* const br, ANSSymbolReader* reader,
+                               const std::vector<uint8_t>& context_map,
+                               const size_t num_splines) {
+  points->clear();
+  points->reserve(num_splines);
+  int64_t last_x = 0;
+  int64_t last_y = 0;
+  for (size_t i = 0; i < num_splines; i++) {
+    int64_t x =
+        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+    int64_t y =
+        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+    if (i != 0) {
+      x = UnpackSigned(x) + last_x;
+      y = UnpackSigned(y) + last_y;
+    }
+    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y));
+    points->emplace_back(static_cast<float>(x), static_cast<float>(y));
+    last_x = x;
+    last_y = y;
+  }
+  return true;
+}
+
+struct Vector {
+  float x, y;
+  Vector operator-() const { return {-x, -y}; }
+  Vector operator+(const Vector& other) const {
+    return {x + other.x, y + other.y};
+  }
+  float SquaredNorm() const { return x * x + y * y; }
+};
+Vector operator*(const float k, const Vector& vec) {
+  return {k * vec.x, k * vec.y};
+}
+
+Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
+  return {p.x + vec.x, p.y + vec.y};
+}
+Vector operator-(const Spline::Point& a, const Spline::Point& b) {
+  return {a.x - b.x, a.y - b.y};
+}
+
+// TODO(eustas): avoid making a copy of "points".
+void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points,
+                                     std::vector<Spline::Point>& result) {
+  if (points.empty()) return;
+  if (points.size() == 1) {
+    result.push_back(points[0]);
+    return;
+  }
+  // Number of points to compute between each control point.
+  static constexpr int kNumPoints = 16;
+  result.reserve((points.size() - 1) * kNumPoints + 1);
+  points.insert(points.begin(), points[0] + (points[0] - points[1]));
+  points.push_back(points[points.size() - 1] +
+                   (points[points.size() - 1] - points[points.size() - 2]));
+  // points has at least 4 elements at this point.
+  for (size_t start = 0; start < points.size() - 3; ++start) {
+    // 4 of them are used, and we draw from p[1] to p[2].
+    const Spline::Point* const p = &points[start];
+    result.push_back(p[1]);
+    float d[3];
+    float t[4];
+    t[0] = 0;
+    for (int k = 0; k < 3; ++k) {
+      // TODO(eustas): for each segment delta is calculated 3 times...
+      // TODO(eustas): restrict d[k] with reasonable limit and spec it.
+      d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y));
+      t[k + 1] = t[k] + d[k];
+    }
+    for (int i = 1; i < kNumPoints; ++i) {
+      const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1];
+      Spline::Point a[3];
+      for (int k = 0; k < 3; ++k) {
+        // TODO(eustas): reciprocal multiplication would be faster.
+        a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]);
+      }
+      Spline::Point b[2];
+      for (int k = 0; k < 2; ++k) {
+        b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]);
+      }
+      result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0]));
+    }
+  }
+  result.push_back(points[points.size() - 2]);
+}
+
+// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
+// pixels at a time, and call `functor` with each point and the actual distance
+// to the previous point (which will always be kDesiredRenderingDistance except
+// possibly for the very last point).
+// TODO(eustas): this method always adds the last point, but never the first
+//               (unless those are one); I believe both ends matter.
+template <typename Points, typename Functor>
+void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
+  JXL_ASSERT(!points.empty());
+  Spline::Point current = points.front();
+  functor(current, kDesiredRenderingDistance);
+  auto next = points.begin();
+  while (next != points.end()) {
+    const Spline::Point* previous = &current;
+    float arclength_from_previous = 0.f;
+    for (;;) {
+      if (next == points.end()) {
+        functor(*previous, arclength_from_previous);
+        return;
+      }
+      const float arclength_to_next =
+          std::sqrt((*next - *previous).SquaredNorm());
+      if (arclength_from_previous + arclength_to_next >=
+          kDesiredRenderingDistance) {
+        current =
+            *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
+                         arclength_to_next) *
+                            (*next - *previous);
+        functor(current, kDesiredRenderingDistance);
+        break;
+      }
+      arclength_from_previous += arclength_to_next;
+      previous = &*next;
+      ++next;
+    }
+  }
+}
+
+}  // namespace
+
+QuantizedSpline::QuantizedSpline(const Spline& original,
+                                 const int32_t quantization_adjustment,
+                                 const float y_to_x, const float y_to_b) {
+  JXL_ASSERT(!original.control_points.empty());
+  control_points_.reserve(original.control_points.size() - 1);
+  const Spline::Point& starting_point = original.control_points.front();
+  int previous_x = static_cast<int>(std::roundf(starting_point.x));
+  int previous_y = static_cast<int>(std::roundf(starting_point.y));
+  int previous_delta_x = 0, previous_delta_y = 0;
+  for (auto it = original.control_points.begin() + 1;
+       it != original.control_points.end(); ++it) {
+    const int new_x = static_cast<int>(std::roundf(it->x));
+    const int new_y = static_cast<int>(std::roundf(it->y));
+    const int new_delta_x = new_x - previous_x;
+    const int new_delta_y = new_y - previous_y;
+    control_points_.emplace_back(new_delta_x - previous_delta_x,
+                                 new_delta_y - previous_delta_y);
+    previous_delta_x = new_delta_x;
+    previous_delta_y = new_delta_y;
+    previous_x = new_x;
+    previous_y = new_y;
+  }
+
+  const auto to_int = [](float v) -> int {
+    // Maximal int representable with float.
+    constexpr float kMax = std::numeric_limits<int>::max() - 127;
+    constexpr float kMin = -kMax;
+    return static_cast<int>(std::roundf(Clamp1(v, kMin, kMax)));
+  };
+
+  const auto quant = AdjustedQuant(quantization_adjustment);
+  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
+  for (int c : {1, 0, 2}) {
+    float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b;
+    for (int i = 0; i < 32; ++i) {
+      const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
+      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+      auto restored_y =
+          color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant;
+      auto decorellated = original.color_dct[c][i] - factor * restored_y;
+      color_dct_[c][i] =
+          to_int(decorellated * dct_factor * quant / kChannelWeight[c]);
+    }
+  }
+  for (int i = 0; i < 32; ++i) {
+    const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
+    sigma_dct_[i] =
+        to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]);
+  }
+}
+
+Status QuantizedSpline::Dequantize(const Spline::Point& starting_point,
+                                   const int32_t quantization_adjustment,
+                                   const float y_to_x, const float y_to_b,
+                                   const uint64_t image_size,
+                                   uint64_t* total_estimated_area_reached,
+                                   Spline& result) const {
+  constexpr uint64_t kOne = static_cast<uint64_t>(1);
+  const uint64_t area_limit =
+      std::min(1024 * image_size + (kOne << 32), kOne << 42);
+
+  result.control_points.clear();
+  result.control_points.reserve(control_points_.size() + 1);
+  float px = std::roundf(starting_point.x);
+  float py = std::roundf(starting_point.y);
+  JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py));
+  int current_x = static_cast<int>(px);
+  int current_y = static_cast<int>(py);
+  result.control_points.push_back(Spline::Point{static_cast<float>(current_x),
+                                                static_cast<float>(current_y)});
+  int current_delta_x = 0, current_delta_y = 0;
+  uint64_t manhattan_distance = 0;
+  for (const auto& point : control_points_) {
+    current_delta_x += point.first;
+    current_delta_y += point.second;
+    manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y);
+    if (manhattan_distance > area_limit) {
+      return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64,
+                         manhattan_distance);
+    }
+    JXL_RETURN_IF_ERROR(
+        ValidateSplinePointPos(current_delta_x, current_delta_y));
+    current_x += current_delta_x;
+    current_y += current_delta_y;
+    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y));
+    result.control_points.push_back(Spline::Point{
+        static_cast<float>(current_x), static_cast<float>(current_y)});
+  }
+
+  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < 32; ++i) {
+      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+      result.color_dct[c][i] =
+          color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant;
+    }
+  }
+  for (int i = 0; i < 32; ++i) {
+    result.color_dct[0][i] += y_to_x * result.color_dct[1][i];
+    result.color_dct[2][i] += y_to_b * result.color_dct[1][i];
+  }
+  uint64_t width_estimate = 0;
+
+  uint64_t color[3] = {};
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < 32; ++i) {
+      color[c] += static_cast<uint64_t>(
+          std::ceil(inv_quant * std::abs(color_dct_[c][i])));
+    }
+  }
+  color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1];
+  color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1];
+  // This is not taking kChannelWeight into account, but up to constant factors
+  // it gives an indication of the influence of the color values on the area
+  // that will need to be rendered.
+  const uint64_t max_color = std::max({color[1], color[0], color[2]});
+  uint64_t logcolor =
+      std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color)));
+
+  const float weight_limit =
+      std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) /
+                          std::max<size_t>(1, manhattan_distance)));
+
+  for (int i = 0; i < 32; ++i) {
+    const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+    result.sigma_dct[i] =
+        sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant;
+    // If we include the factor kChannelWeight[3]=.3333f here, we get a
+    // realistic area estimate. We leave it out to simplify the calculations,
+    // and understand that this way we underestimate the area by a factor of
+    // 1/(0.3333*0.3333). This is taken into account in the limits below.
+    float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i]));
+    uint64_t weight =
+        static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f)));
+    width_estimate += weight * weight * logcolor;
+  }
+  *total_estimated_area_reached += (width_estimate * manhattan_distance);
+  if (*total_estimated_area_reached > area_limit) {
+    return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64,
+                       *total_estimated_area_reached);
+  }
+
+  return true;
+}
+
+Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
+                               ANSSymbolReader* const decoder,
+                               BitReader* const br,
+                               const size_t max_control_points,
+                               size_t* total_num_control_points) {
+  const size_t num_control_points =
+      decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
+  *total_num_control_points += num_control_points;
+  if (*total_num_control_points > max_control_points) {
+    return JXL_FAILURE("Too many control points: %" PRIuS,
+                       *total_num_control_points);
+  }
+  control_points_.resize(num_control_points);
+  // Maximal image dimension.
+  constexpr int64_t kDeltaLimit = 1u << 30;
+  for (std::pair<int64_t, int64_t>& control_point : control_points_) {
+    control_point.first = UnpackSigned(
+        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+    control_point.second = UnpackSigned(
+        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+    // Check delta-deltas are not outrageous; it is not in spec, but there is
+    // no reason to allow larger values.
+    if ((control_point.first >= kDeltaLimit) ||
+        (control_point.first <= -kDeltaLimit) ||
+        (control_point.second >= kDeltaLimit) ||
+        (control_point.second <= -kDeltaLimit)) {
+      return JXL_FAILURE("Spline delta-delta is out of bounds");
+    }
+  }
+
+  const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
+    constexpr int kWeirdNumber = std::numeric_limits<int>::min();
+    for (int i = 0; i < 32; ++i) {
+      dct[i] =
+          UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
+      if (dct[i] == kWeirdNumber) {
+        return JXL_FAILURE("The weird number in spline DCT");
+      }
+    }
+    return true;
+  };
+  for (int c = 0; c < 3; ++c) {
+    JXL_RETURN_IF_ERROR(decode_dct(color_dct_[c]));
+  }
+  JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
+  return true;
+}
+
+void Splines::Clear() {
+  quantization_adjustment_ = 0;
+  splines_.clear();
+  starting_points_.clear();
+  segments_.clear();
+  segment_indices_.clear();
+  segment_y_start_.clear();
+}
+
+Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumSplineContexts, &code, &context_map));
+  ANSSymbolReader decoder(&code, br);
+  const size_t num_splines =
+      1 + decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
+  size_t max_control_points = std::min(
+      kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
+  if (num_splines > max_control_points) {
+    return JXL_FAILURE("Too many splines: %" PRIuS, num_splines);
+  }
+  JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder,
+                                              context_map, num_splines));
+
+  quantization_adjustment_ = UnpackSigned(
+      decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
+
+  splines_.clear();
+  splines_.reserve(num_splines);
+  size_t num_control_points = num_splines;
+  for (size_t i = 0; i < num_splines; ++i) {
+    QuantizedSpline spline;
+    JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
+                                      max_control_points, &num_control_points));
+    splines_.push_back(std::move(spline));
+  }
+
+  JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
+
+  if (!HasAny()) {
+    return JXL_FAILURE("Decoded splines but got none");
+  }
+
+  return true;
+}
+
+void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect,
+                    const Rect& image_rect) const {
+  return Apply</*add=*/true>(opsin, opsin_rect, image_rect);
+}
+void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+                       float* JXL_RESTRICT row_b, const Rect& image_row) const {
+  return ApplyToRow</*add=*/true>(row_x, row_y, row_b, image_row);
+}
+
+void Splines::SubtractFrom(Image3F* const opsin) const {
+  return Apply</*add=*/false>(opsin, Rect(*opsin), Rect(*opsin));
+}
+
+Status Splines::InitializeDrawCache(const size_t image_xsize,
+                                    const size_t image_ysize,
+                                    const ColorCorrelationMap& cmap) {
+  // TODO(veluca): avoid storing segments that are entirely outside image
+  // boundaries.
+  segments_.clear();
+  segment_indices_.clear();
+  segment_y_start_.clear();
+  std::vector<std::pair<size_t, size_t>> segments_by_y;
+  std::vector<Spline::Point> intermediate_points;
+  uint64_t total_estimated_area_reached = 0;
+  std::vector<Spline> splines;
+  for (size_t i = 0; i < splines_.size(); ++i) {
+    Spline spline;
+    JXL_RETURN_IF_ERROR(splines_[i].Dequantize(
+        starting_points_[i], quantization_adjustment_, cmap.YtoXRatio(0),
+        cmap.YtoBRatio(0), image_xsize * image_ysize,
+        &total_estimated_area_reached, spline));
+    if (std::adjacent_find(spline.control_points.begin(),
+                           spline.control_points.end()) !=
+        spline.control_points.end()) {
+      // Otherwise division by zero might occur. Once control points coincide,
+      // the direction of curve is undefined...
+      return JXL_FAILURE(
+          "identical successive control points in spline %" PRIuS, i);
+    }
+    splines.push_back(spline);
+  }
+  // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams.
+  if (total_estimated_area_reached >
+      std::min((8 * image_xsize * image_ysize + (uint64_t(1) << 25)),
+               (uint64_t(1) << 30))) {
+    JXL_WARNING(
+        "Large total_estimated_area_reached, expect slower decoding: %" PRIu64,
+        total_estimated_area_reached);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    return JXL_FAILURE("Total spline area is too large");
+#endif
+  }
+
+  for (Spline& spline : splines) {
+    std::vector<std::pair<Spline::Point, float>> points_to_draw;
+    auto add_point = [&](const Spline::Point& point, const float multiplier) {
+      points_to_draw.emplace_back(point, multiplier);
+    };
+    intermediate_points.clear();
+    DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points);
+    ForEachEquallySpacedPoint(intermediate_points, add_point);
+    const float arc_length =
+        (points_to_draw.size() - 2) * kDesiredRenderingDistance +
+        points_to_draw.back().second;
+    if (arc_length <= 0.f) {
+      // This spline wouldn't have any effect.
+      continue;
+    }
+    HWY_DYNAMIC_DISPATCH(SegmentsFromPoints)
+    (spline, points_to_draw, arc_length, segments_, segments_by_y);
+  }
+
+  // TODO(eustas): consider linear sorting here.
+  std::sort(segments_by_y.begin(), segments_by_y.end());
+  segment_indices_.resize(segments_by_y.size());
+  segment_y_start_.resize(image_ysize + 1);
+  for (size_t i = 0; i < segments_by_y.size(); i++) {
+    segment_indices_[i] = segments_by_y[i].second;
+    size_t y = segments_by_y[i].first;
+    if (y < image_ysize) {
+      segment_y_start_[y + 1]++;
+    }
+  }
+  for (size_t y = 0; y < image_ysize; y++) {
+    segment_y_start_[y + 1] += segment_y_start_[y];
+  }
+  return true;
+}
+
+template <bool add>
+void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+                         float* JXL_RESTRICT row_b,
+                         const Rect& image_row) const {
+  if (segments_.empty()) return;
+  JXL_ASSERT(image_row.ysize() == 1);
+  for (size_t iy = 0; iy < image_row.ysize(); iy++) {
+    HWY_DYNAMIC_DISPATCH(DrawSegments)
+    (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(),
+     segment_indices_.data(), segment_y_start_.data());
+  }
+}
+
+template <bool add>
+void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect,
+                    const Rect& image_rect) const {
+  if (segments_.empty()) return;
+  for (size_t iy = 0; iy < image_rect.ysize(); iy++) {
+    const size_t y0 = opsin_rect.Line(iy).y0();
+    const size_t x0 = opsin_rect.x0();
+    ApplyToRow<add>(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0,
+                    opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy));
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/lib/jxl/splines.h b/third-party/libjxl/libjxl/lib/jxl/splines.h
new file mode 100644
index 0000000000..c8dad3417c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/splines.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SPLINES_H_
+#define LIB_JXL_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+static constexpr float kDesiredRenderingDistance = 1.f;
+
+enum SplineEntropyContexts : size_t {
+  kQuantizationAdjustmentContext = 0,
+  kStartingPositionContext,
+  kNumSplinesContext,
+  kNumControlPointsContext,
+  kControlPointsContext,
+  kDCTContext,
+  kNumSplineContexts
+};
+
+struct Spline {
+  struct Point {
+    Point() : x(0.0f), y(0.0f) {}
+    Point(float x, float y) : x(x), y(y) {}
+    float x, y;
+    bool operator==(const Point& other) const {
+      return std::fabs(x - other.x) < 1e-3f && std::fabs(y - other.y) < 1e-3f;
+    }
+  };
+  std::vector<Point> control_points;
+  // X, Y, B.
+  float color_dct[3][32];
+  // Splines are draws by normalized Gaussian splatting. This controls the
+  // Gaussian's parameter along the spline.
+  float sigma_dct[32];
+};
+
+class QuantizedSplineEncoder;
+
+class QuantizedSpline {
+ public:
+  QuantizedSpline() = default;
+  explicit QuantizedSpline(const Spline& original,
+                           int32_t quantization_adjustment, float y_to_x,
+                           float y_to_b);
+
+  Status Dequantize(const Spline::Point& starting_point,
+                    int32_t quantization_adjustment, float y_to_x, float y_to_b,
+                    uint64_t image_size, uint64_t* total_estimated_area_reached,
+                    Spline& result) const;
+
+  Status Decode(const std::vector<uint8_t>& context_map,
+                ANSSymbolReader* decoder, BitReader* br,
+                size_t max_control_points, size_t* total_num_control_points);
+
+ private:
+  friend class QuantizedSplineEncoder;
+
+  std::vector<std::pair<int64_t, int64_t>>
+      control_points_;  // Double delta-encoded.
+  int color_dct_[3][32] = {};
+  int sigma_dct_[32] = {};
+};
+
+// A single "drawable unit" of a spline, i.e. a line of the region in which we
+// render each Gaussian. The structure doesn't actually depend on the exact
+// row, which allows reuse for different y values (which are tracked
+// separately).
+struct SplineSegment {
+  float center_x, center_y;
+  float maximum_distance;
+  float inv_sigma;
+  float sigma_over_4_times_intensity;
+  float color[3];
+};
+
+class Splines {
+ public:
+  Splines() = default;
+  explicit Splines(const int32_t quantization_adjustment,
+                   std::vector<QuantizedSpline> splines,
+                   std::vector<Spline::Point> starting_points)
+      : quantization_adjustment_(quantization_adjustment),
+        splines_(std::move(splines)),
+        starting_points_(std::move(starting_points)) {}
+
+  bool HasAny() const { return !splines_.empty(); }
+
+  void Clear();
+
+  Status Decode(BitReader* br, size_t num_pixels);
+
+  void AddTo(Image3F* opsin, const Rect& opsin_rect,
+             const Rect& image_rect) const;
+  void AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+                float* JXL_RESTRICT row_b, const Rect& image_row) const;
+  void SubtractFrom(Image3F* opsin) const;
+
+  const std::vector<QuantizedSpline>& QuantizedSplines() const {
+    return splines_;
+  }
+  const std::vector<Spline::Point>& StartingPoints() const {
+    return starting_points_;
+  }
+
+  int32_t GetQuantizationAdjustment() const { return quantization_adjustment_; }
+
+  Status InitializeDrawCache(size_t image_xsize, size_t image_ysize,
+                             const ColorCorrelationMap& cmap);
+
+ private:
+  template <bool>
+  void ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+                  float* JXL_RESTRICT row_b, const Rect& image_row) const;
+  template <bool>
+  void Apply(Image3F* opsin, const Rect& opsin_rect,
+             const Rect& image_rect) const;
+
+  // If positive, quantization weights are multiplied by 1 + this/8, which
+  // increases precision. If negative, they are divided by 1 - this/8. If 0,
+  // they are unchanged.
+  int32_t quantization_adjustment_ = 0;
+  std::vector<QuantizedSpline> splines_;
+  std::vector<Spline::Point> starting_points_;
+  std::vector<SplineSegment> segments_;
+  std::vector<size_t> segment_indices_;
+  std::vector<size_t> segment_y_start_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_SPLINES_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc
new file mode 100644
index 0000000000..78ff6d41c0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+namespace {
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+void BM_Splines(benchmark::State& state) {
+  const size_t n = state.range();
+
+  std::vector<Spline> spline_data = {
+      {/*control_points=*/{
+           {9, 54}, {118, 159}, {97, 3}, {10, 40}, {150, 25}, {120, 300}},
+       /*color_dct=*/
+       {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+       /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}}};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F drawing_area(320, 320);
+  ZeroFillImage(&drawing_area);
+  for (auto _ : state) {
+    for (size_t i = 0; i < n; ++i) {
+      JXL_CHECK(splines.InitializeDrawCache(drawing_area.xsize(),
+                                            drawing_area.ysize(), *cmap));
+      splines.AddTo(&drawing_area, Rect(drawing_area), Rect(drawing_area));
+    }
+  }
+
+  state.SetItemsProcessed(n * state.iterations());
+}
+
+BENCHMARK(BM_Splines)->Range(1, 1 << 10);
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/splines_test.cc b/third-party/libjxl/libjxl/lib/jxl/splines_test.cc
new file mode 100644
index 0000000000..358ba3f632
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/splines_test.cc
@@ -0,0 +1,350 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const Spline::Point& p) {
+  return os << "(" << p.x << ", " << p.y << ")";
+}
+
+std::ostream& operator<<(std::ostream& os, const Spline& spline) {
+  return os << "(spline with " << spline.control_points.size()
+            << " control points)";
+}
+
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Field;
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+constexpr float kTolerance = 0.003125;
+
+std::vector<Spline> DequantizeSplines(const Splines& splines) {
+  const auto& quantized_splines = splines.QuantizedSplines();
+  const auto& starting_points = splines.StartingPoints();
+  JXL_CHECK(quantized_splines.size() == starting_points.size());
+
+  std::vector<Spline> dequantized;
+  uint64_t total = 0;
+  for (size_t i = 0; i < quantized_splines.size(); ++i) {
+    dequantized.emplace_back();
+    JXL_CHECK(quantized_splines[i].Dequantize(
+        starting_points[i], kQuantizationAdjustment, kYToX, kYToB, 2u << 30u,
+        &total, dequantized.back()));
+  }
+  return dequantized;
+}
+
+MATCHER(ControlPointIs, "") {
+  const Spline::Point& actual = std::get<0>(arg);
+  const Spline::Point& expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(
+      AllOf(Field(&Spline::Point::x, FloatNear(expected.x, kTolerance)),
+            Field(&Spline::Point::y, FloatNear(expected.y, kTolerance))),
+      actual, result_listener);
+}
+
+MATCHER(ControlPointsMatch, "") {
+  const Spline& actual = std::get<0>(arg);
+  const Spline& expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(
+      Field(&Spline::control_points,
+            Pointwise(ControlPointIs(), expected.control_points)),
+      actual, result_listener);
+}
+
+MATCHER(SplinesMatch, "") {
+  const Spline& actual = std::get<0>(arg);
+  const Spline& expected = std::get<1>(arg);
+  if (!testing::ExplainMatchResult(ControlPointsMatch(), arg,
+                                   result_listener)) {
+    return false;
+  }
+  for (int i = 0; i < 3; ++i) {
+    size_t color_dct_size =
+        sizeof(expected.color_dct[i]) / sizeof(expected.color_dct[i][0]);
+    for (size_t j = 0; j < color_dct_size; j++) {
+      testing::StringMatchResultListener color_dct_listener;
+      if (!testing::ExplainMatchResult(
+              FloatNear(expected.color_dct[i][j], kTolerance),
+              actual.color_dct[i][j], &color_dct_listener)) {
+        *result_listener << ", where color_dct[" << i << "][" << j
+                         << "] don't match, " << color_dct_listener.str();
+        return false;
+      }
+    }
+  }
+  size_t sigma_dct_size =
+      sizeof(expected.sigma_dct) / sizeof(expected.sigma_dct[0]);
+  for (size_t i = 0; i < sigma_dct_size; i++) {
+    testing::StringMatchResultListener sigma_listener;
+    if (!testing::ExplainMatchResult(
+            FloatNear(expected.sigma_dct[i], kTolerance), actual.sigma_dct[i],
+            &sigma_listener)) {
+      *result_listener << ", where sigma_dct[" << i << "] don't match, "
+                       << sigma_listener.str();
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+TEST(SplinesTest, Serialization) {
+  std::vector<Spline> spline_data = {
+      {/*control_points=*/{
+           {109, 54}, {218, 159}, {80, 3}, {110, 274}, {94, 185}, {17, 277}},
+       /*color_dct=*/
+       {{36.3, 39.7, 23.2, 67.5, 4.4,  71.5, 62.3, 32.3, 92.2, 10.1, 10.8,
+         9.2,  6.1,  10.5, 79.1, 7,    24.6, 90.8, 5.5,  84,   43.8, 49,
+         33.5, 78.9, 54.5, 77.9, 62.1, 51.4, 36.4, 14.3, 83.7, 35.4},
+        {9.4,  53.4, 9.5,  74.9, 72.7, 26.7, 7.9,  0.9, 84.9, 23.2, 26.5,
+         31.1, 91,   11.7, 74.1, 39.3, 23.7, 82.5, 4.8, 2.7,  61.2, 96.4,
+         13.7, 66.7, 62.9, 82.4, 5.9,  98.7, 21.5, 7.9, 51.7, 63.1},
+        {48,   39.3, 6.9,  26.3, 33.3, 6.2,  1.7,  98.9, 59.9, 59.6, 95,
+         61.3, 82.7, 53,   6.1,  30.4, 34.7, 96.9, 93.4, 17,   38.8, 80.8,
+         63,   18.6, 43.6, 32.3, 61,   20.2, 24.3, 28.3, 69.1, 62.4}},
+       /*sigma_dct=*/{32.7, 21.5, 44.4, 1.8,  45.8, 90.6, 29.3, 59.2,
+                      23.7, 85.2, 84.8, 27.2, 42.1, 84.1, 50.6, 17.6,
+                      93.7, 4.9,  2.6,  69.8, 94.9, 52,   24.3, 18.8,
+                      12.1, 95.7, 28.5, 81.4, 89.9, 31.4, 74.8, 52}},
+      {/*control_points=*/{{172, 309},
+                           {196, 277},
+                           {42, 238},
+                           {114, 350},
+                           {307, 290},
+                           {316, 269},
+                           {124, 66},
+                           {233, 267}},
+       /*color_dct=*/
+       {{15,   28.9, 22, 6.6,  41.8, 83,   8.6,  56.8, 68.9, 9.7,  5.4,
+         19.8, 70.8, 90, 52.5, 65.2, 7.8,  23.5, 26.4, 72.2, 64.7, 87.1,
+         1.3,  67.5, 46, 68.4, 65.4, 35.5, 29.1, 13,   41.6, 23.9},
+        {47.7, 79.4, 62.7, 29.1, 96.8, 18.5, 17.6, 15.2, 80.5, 56,  96.2,
+         59.9, 26.7, 96.1, 92.3, 42.1, 35.8, 54,   23.2, 55,   76,  35.8,
+         58.4, 88.7, 2.4,  78.1, 95.6, 27.5, 6.6,  78.5, 24.1, 69.8},
+        {43.8, 96.5, 0.9,  95.1, 49.1, 71.2, 25.1, 33.6, 75.2, 95,  82.1,
+         19.7, 10.5, 44.9, 50,   93.3, 83.5, 99.5, 64.6, 54,   3.5, 99.7,
+         45.3, 82.1, 22.4, 37.9, 60,   32.2, 12.6, 4.6,  65.5, 96.4}},
+       /*sigma_dct=*/{72.5, 2.6,  41.7, 2.2,  39.7, 79.1, 69.6, 19.9,
+                      92.3, 71.5, 41.9, 62.1, 30,   49.4, 70.3, 45.3,
+                      62.5, 47.2, 46.7, 41.2, 90.8, 46.8, 91.2, 55,
+                      8.1,  69.6, 25.4, 84.7, 61.7, 27.6, 3.7,  46.9}},
+      {/*control_points=*/{{100, 186},
+                           {257, 97},
+                           {170, 49},
+                           {25, 169},
+                           {309, 104},
+                           {232, 237},
+                           {385, 101},
+                           {122, 168},
+                           {26, 300},
+                           {390, 88}},
+       /*color_dct=*/
+       {{16.9, 64.8, 4.2,  10.6, 23.5, 17,   79.3, 5.7,  60.4, 16.6, 94.9,
+         63.7, 87.6, 10.5, 3.8,  61.1, 22.9, 81.9, 80.4, 40.5, 45.9, 25.4,
+         39.8, 30,   50.2, 90.4, 27.9, 93.7, 65.1, 48.2, 22.3, 43.9},
+        {24.9, 66,   3.5,  90.2, 97.1, 15.8, 35.6, 0.6,  68,   39.6, 24.4,
+         85.9, 57.7, 77.6, 47.5, 67.9, 4.3,  5.4,  91.2, 58.5, 0.1,  52.2,
+         3.5,  47.8, 63.2, 43.5, 85.8, 35.8, 50.2, 35.9, 19.2, 48.2},
+        {82.8, 44.9, 76.4, 39.5, 94.1, 14.3, 89.8, 10,   10.5, 74.5, 56.3,
+         65.8, 7.8,  23.3, 52.8, 99.3, 56.8, 46,   76.7, 13.5, 67,   22.4,
+         29.9, 43.3, 70.3, 26,   74.3, 53.9, 62,   19.1, 49.3, 46.7}},
+       /*sigma_dct=*/{83.5, 1.7,  25.1, 18.7, 46.5, 75.3, 28,   62.3,
+                      50.3, 23.3, 85.6, 96,   45.8, 33.1, 33.4, 52.9,
+                      26.3, 58.5, 19.6, 70,   92.6, 22.5, 57,   21.6,
+                      76.8, 87.5, 22.9, 66.3, 35.7, 35.6, 56.8, 67.2}},
+  };
+
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+  const std::vector<Spline> quantized_spline_data = DequantizeSplines(splines);
+  EXPECT_THAT(quantized_spline_data,
+              Pointwise(ControlPointsMatch(), spline_data));
+
+  BitWriter writer;
+  EncodeSplines(splines, &writer, kLayerSplines, HistogramParams(), nullptr);
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+
+  printf("Wrote %" PRIuS " bits of splines.\n", bits_written);
+
+  BitReader reader(writer.GetSpan());
+  Splines decoded_splines;
+  ASSERT_TRUE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+  ASSERT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  ASSERT_TRUE(reader.Close());
+
+  const std::vector<Spline> decoded_spline_data =
+      DequantizeSplines(decoded_splines);
+  EXPECT_THAT(decoded_spline_data,
+              Pointwise(SplinesMatch(), quantized_spline_data));
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_TooManySplinesTest) {
+#else
+TEST(SplinesTest, TooManySplinesTest) {
+#endif
+  // This is more than the limit for 1000 pixels.
+  const size_t kNumSplines = 300;
+
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (size_t i = 0; i < kNumSplines; i++) {
+    Spline spline = {
+        /*control_points=*/{{1.f + i, 2}, {10.f + i, 25}, {30.f + i, 300}},
+        /*color_dct=*/
+        {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+        /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+  BitWriter writer;
+  EncodeSplines(splines, &writer, kLayerSplines,
+                HistogramParams(SpeedTier::kFalcon, 1), nullptr);
+  writer.ZeroPadToByte();
+  // Re-read splines.
+  BitReader reader(writer.GetSpan());
+  Splines decoded_splines;
+  EXPECT_FALSE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+  EXPECT_TRUE(reader.Close());
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_DuplicatePoints) {
+#else
+TEST(SplinesTest, DuplicatePoints) {
+#endif
+  std::vector<Spline::Point> control_points{
+      {9, 54}, {118, 159}, {97, 3},  // Repeated.
+      {97, 3}, {10, 40},   {150, 25}, {120, 300}};
+  Spline spline{control_points,
+                /*color_dct=*/
+                {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+                /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+  std::vector<Spline> spline_data{spline};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F image(320, 320);
+  ZeroFillImage(&image);
+  EXPECT_FALSE(
+      splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap));
+}
+
+TEST(SplinesTest, Drawing) {
+  CodecInOut io_expected;
+  const PaddedBytes orig = jxl::test::ReadTestData("jxl/splines.pfm");
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_expected,
+                           /*pool=*/nullptr));
+
+  std::vector<Spline::Point> control_points{{9, 54},  {118, 159}, {97, 3},
+                                            {10, 40}, {150, 25},  {120, 300}};
+  // Use values that survive quant/decorellation roundtrip.
+  const Spline spline{
+      control_points,
+      /*color_dct=*/
+      {{0.4989345073699951171875000f, 0.4997999966144561767578125f},
+       {0.4772970676422119140625000f, 0.f, 0.5250000357627868652343750f},
+       {-0.0176776945590972900390625f, 0.4900000095367431640625000f,
+        0.5250000357627868652343750f}},
+      /*sigma_dct=*/
+      {0.9427147507667541503906250f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+       0.6665999889373779296875000f}};
+  std::vector<Spline> spline_data = {spline};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F image(320, 320);
+  ZeroFillImage(&image);
+  ASSERT_TRUE(splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap));
+  splines.AddTo(&image, Rect(image), Rect(image));
+
+  CodecInOut io_actual;
+  Image3F image2(320, 320);
+  CopyImageTo(image, &image2);
+  io_actual.SetFromImage(std::move(image2), ColorEncoding::SRGB());
+  ASSERT_TRUE(io_actual.frames[0].TransformTo(io_expected.Main().c_current(),
+                                              GetJxlCms()));
+
+  JXL_ASSERT_OK(VerifyRelativeError(
+      *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _));
+}
+
+TEST(SplinesTest, ClearedEveryFrame) {
+  CodecInOut io_expected;
+  const PaddedBytes bytes_expected =
+      jxl::test::ReadTestData("jxl/spline_on_first_frame.png");
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(bytes_expected), &io_expected,
+                           /*pool=*/nullptr));
+  CodecInOut io_actual;
+  const PaddedBytes bytes_actual =
+      jxl::test::ReadTestData("jxl/spline_on_first_frame.jxl");
+  ASSERT_TRUE(
+      test::DecodeFile({}, Span<const uint8_t>(bytes_actual), &io_actual));
+
+  ASSERT_TRUE(
+      io_actual.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms()));
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < io_actual.ysize(); ++y) {
+      float* const JXL_RESTRICT row = io_actual.Main().color()->PlaneRow(c, y);
+      for (size_t x = 0; x < io_actual.xsize(); ++x) {
+        row[x] = Clamp1(row[x], 0.f, 1.f);
+      }
+    }
+  }
+  JXL_ASSERT_OK(VerifyRelativeError(
+      *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _));
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/test_image.cc b/third-party/libjxl/libjxl/lib/jxl/test_image.cc
new file mode 100644
index 0000000000..af1d1293ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/test_image.cc
@@ -0,0 +1,453 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/test_image.h"
+
+#include <jxl/encode.h>
+
+#include <algorithm>
+#include <cstring>
+#include <utility>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+namespace test {
+
+namespace {
+
+void StoreValue(float val, size_t bits_per_sample, JxlPixelFormat format,
+                uint8_t** out) {
+  const float mul = (1u << bits_per_sample) - 1;
+  if (format.data_type == JXL_TYPE_UINT8) {
+    **out = val * mul;
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    uint16_t uval = val * mul;
+    if (SwapEndianness(format.endianness)) {
+      uval = JXL_BSWAP16(uval);
+    }
+    memcpy(*out, &uval, 2);
+  } else if (format.data_type == JXL_TYPE_FLOAT) {
+    // TODO(szabadka) Add support for custom bits / exponent bits floats.
+    if (SwapEndianness(format.endianness)) {
+      val = BSwapFloat(val);
+    }
+    memcpy(*out, &val, 4);
+  } else {
+    // TODO(szabadka) Add support for FLOAT16.
+  }
+  *out += extras::PackedImage::BitsPerChannel(format.data_type) / 8;
+}
+
+void FillPackedImage(size_t bits_per_sample, uint16_t seed,
+                     extras::PackedImage* image) {
+  const size_t xsize = image->xsize;
+  const size_t ysize = image->ysize;
+  const JxlPixelFormat format = image->format;
+
+  // Cause more significant image difference for successive seeds.
+  Rng generator(seed);
+
+  // Returns random integer in interval [0, max_value)
+  auto rngu = [&generator](size_t max_value) -> size_t {
+    return generator.UniformU(0, max_value);
+  };
+
+  // Returns random float in interval [0.0, max_value)
+  auto rngf = [&generator](float max_value) {
+    return generator.UniformF(0.0f, max_value);
+  };
+
+  // Dark background gradient color
+  float r0 = rngf(0.5f);
+  float g0 = rngf(0.5f);
+  float b0 = rngf(0.5f);
+  float a0 = rngf(0.5f);
+  float r1 = rngf(0.5f);
+  float g1 = rngf(0.5f);
+  float b1 = rngf(0.5f);
+  float a1 = rngf(0.5f);
+
+  // Circle with different color
+  size_t circle_x = rngu(xsize);
+  size_t circle_y = rngu(ysize);
+  size_t circle_r = rngu(std::min(xsize, ysize));
+
+  // Rectangle with random noise
+  size_t rect_x0 = rngu(xsize);
+  size_t rect_y0 = rngu(ysize);
+  size_t rect_x1 = rngu(xsize);
+  size_t rect_y1 = rngu(ysize);
+  if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+  if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+  // Create pixel content to test, actual content does not matter as long as it
+  // can be compared after roundtrip.
+  uint8_t* out = reinterpret_cast<uint8_t*>(image->pixels());
+  const float imul16 = 1.0f / 65536.0f;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      float r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+      float g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+      float b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+      float a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize;
+      // put some shape in there for visual debugging
+      if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+          circle_r * circle_r) {
+        r = std::min(1.0f, ((65535 - x * y) ^ seed) * imul16);
+        g = std::min(1.0f, ((x << 8) + y + seed) * imul16);
+        b = std::min(1.0f, ((y << 8) + x * seed) * imul16);
+        a = std::min(1.0f, (32768 + x * 256 - y) * imul16);
+      } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+        r = rngf(1.0f);
+        g = rngf(1.0f);
+        b = rngf(1.0f);
+        a = rngf(1.0f);
+      }
+      if (format.num_channels == 1) {
+        StoreValue(g, bits_per_sample, format, &out);
+      } else if (format.num_channels == 2) {
+        StoreValue(g, bits_per_sample, format, &out);
+        StoreValue(a, bits_per_sample, format, &out);
+      } else if (format.num_channels == 3) {
+        StoreValue(r, bits_per_sample, format, &out);
+        StoreValue(g, bits_per_sample, format, &out);
+        StoreValue(b, bits_per_sample, format, &out);
+      } else if (format.num_channels == 4) {
+        StoreValue(r, bits_per_sample, format, &out);
+        StoreValue(g, bits_per_sample, format, &out);
+        StoreValue(b, bits_per_sample, format, &out);
+        StoreValue(a, bits_per_sample, format, &out);
+      }
+    }
+  }
+}
+
+}  // namespace
+
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+                                      size_t num_channels, uint16_t seed) {
+  // Cause more significant image difference for successive seeds.
+  Rng generator(seed);
+
+  // Returns random integer in interval [0, max_value)
+  auto rng = [&generator](size_t max_value) -> size_t {
+    return generator.UniformU(0, max_value);
+  };
+
+  // Dark background gradient color
+  uint16_t r0 = rng(32768);
+  uint16_t g0 = rng(32768);
+  uint16_t b0 = rng(32768);
+  uint16_t a0 = rng(32768);
+  uint16_t r1 = rng(32768);
+  uint16_t g1 = rng(32768);
+  uint16_t b1 = rng(32768);
+  uint16_t a1 = rng(32768);
+
+  // Circle with different color
+  size_t circle_x = rng(xsize);
+  size_t circle_y = rng(ysize);
+  size_t circle_r = rng(std::min(xsize, ysize));
+
+  // Rectangle with random noise
+  size_t rect_x0 = rng(xsize);
+  size_t rect_y0 = rng(ysize);
+  size_t rect_x1 = rng(xsize);
+  size_t rect_y1 = rng(ysize);
+  if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+  if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+  size_t num_pixels = xsize * ysize;
+  // 16 bits per channel, big endian, 4 channels
+  std::vector<uint8_t> pixels(num_pixels * num_channels * 2);
+  // Create pixel content to test, actual content does not matter as long as it
+  // can be compared after roundtrip.
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+      uint16_t g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+      uint16_t b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+      uint16_t a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize;
+      // put some shape in there for visual debugging
+      if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+          circle_r * circle_r) {
+        r = (65535 - x * y) ^ seed;
+        g = (x << 8) + y + seed;
+        b = (y << 8) + x * seed;
+        a = 32768 + x * 256 - y;
+      } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+        r = rng(65536);
+        g = rng(65536);
+        b = rng(65536);
+        a = rng(65536);
+      }
+      size_t i = (y * xsize + x) * 2 * num_channels;
+      pixels[i + 0] = (r >> 8);
+      pixels[i + 1] = (r & 255);
+      if (num_channels >= 2) {
+        // This may store what is called 'g' in the alpha channel of a 2-channel
+        // image, but that's ok since the content is arbitrary
+        pixels[i + 2] = (g >> 8);
+        pixels[i + 3] = (g & 255);
+      }
+      if (num_channels >= 3) {
+        pixels[i + 4] = (b >> 8);
+        pixels[i + 5] = (b & 255);
+      }
+      if (num_channels >= 4) {
+        pixels[i + 6] = (a >> 8);
+        pixels[i + 7] = (a & 255);
+      }
+    }
+  }
+  return pixels;
+}
+
+TestImage::TestImage() {
+  SetChannels(3);
+  SetAllBitDepths(8);
+  SetColorEncoding("RGB_D65_SRG_Rel_SRG");
+}
+
+TestImage& TestImage::DecodeFromBytes(const PaddedBytes& bytes) {
+  ColorEncoding c_enc;
+  JXL_CHECK(
+      ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc));
+  extras::ColorHints color_hints;
+  color_hints.Add("color_space", Description(c_enc));
+  JXL_CHECK(
+      extras::DecodeBytes(Span<const uint8_t>(bytes), color_hints, &ppf_));
+  return *this;
+}
+
+TestImage& TestImage::ClearMetadata() {
+  ppf_.metadata = extras::PackedMetadata();
+  return *this;
+}
+
+TestImage& TestImage::SetDimensions(size_t xsize, size_t ysize) {
+  if (xsize <= ppf_.info.xsize && ysize <= ppf_.info.ysize) {
+    for (auto& frame : ppf_.frames) {
+      CropLayerInfo(xsize, ysize, &frame.frame_info.layer_info);
+      CropImage(xsize, ysize, &frame.color);
+      for (auto& ec : frame.extra_channels) {
+        CropImage(xsize, ysize, &ec);
+      }
+    }
+  } else {
+    JXL_CHECK(ppf_.info.xsize == 0 && ppf_.info.ysize == 0);
+  }
+  ppf_.info.xsize = xsize;
+  ppf_.info.ysize = ysize;
+  return *this;
+}
+
+TestImage& TestImage::SetChannels(size_t num_channels) {
+  JXL_CHECK(ppf_.frames.empty());
+  JXL_CHECK(!ppf_.preview_frame);
+  ppf_.info.num_color_channels = num_channels < 3 ? 1 : 3;
+  ppf_.info.num_extra_channels = num_channels - ppf_.info.num_color_channels;
+  if (ppf_.info.num_extra_channels > 0 && ppf_.info.alpha_bits == 0) {
+    ppf_.info.alpha_bits = ppf_.info.bits_per_sample;
+    ppf_.info.alpha_exponent_bits = ppf_.info.exponent_bits_per_sample;
+  }
+  ppf_.extra_channels_info.clear();
+  for (size_t i = 1; i < ppf_.info.num_extra_channels; ++i) {
+    extras::PackedExtraChannel ec;
+    ec.index = i;
+    JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &ec.ec_info);
+    if (ec.ec_info.bits_per_sample == 0) {
+      ec.ec_info.bits_per_sample = ppf_.info.bits_per_sample;
+      ec.ec_info.exponent_bits_per_sample = ppf_.info.exponent_bits_per_sample;
+    }
+    ppf_.extra_channels_info.emplace_back(std::move(ec));
+  }
+  format_.num_channels = std::min(static_cast<size_t>(4), num_channels);
+  if (ppf_.info.num_color_channels == 1 &&
+      ppf_.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) {
+    SetColorEncoding("Gra_D65_Rel_SRG");
+  }
+  return *this;
+}
+
+// Sets the same bit depth on color, alpha and all extra channels.
+TestImage& TestImage::SetAllBitDepths(uint32_t bits_per_sample,
+                                      uint32_t exponent_bits_per_sample) {
+  ppf_.info.bits_per_sample = bits_per_sample;
+  ppf_.info.exponent_bits_per_sample = exponent_bits_per_sample;
+  if (ppf_.info.num_extra_channels > 0) {
+    ppf_.info.alpha_bits = bits_per_sample;
+    ppf_.info.alpha_exponent_bits = exponent_bits_per_sample;
+  }
+  for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+    extras::PackedExtraChannel& ec = ppf_.extra_channels_info[i];
+    ec.ec_info.bits_per_sample = bits_per_sample;
+    ec.ec_info.exponent_bits_per_sample = exponent_bits_per_sample;
+  }
+  format_.data_type = DefaultDataType(ppf_.info);
+  return *this;
+}
+
+TestImage& TestImage::SetDataType(JxlDataType data_type) {
+  format_.data_type = data_type;
+  return *this;
+}
+
+TestImage& TestImage::SetEndianness(JxlEndianness endianness) {
+  format_.endianness = endianness;
+  return *this;
+}
+
+TestImage& TestImage::SetColorEncoding(const std::string& description) {
+  JXL_CHECK(ParseDescription(description, &ppf_.color_encoding));
+  ColorEncoding c_enc;
+  JXL_CHECK(
+      ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc));
+  JXL_CHECK(c_enc.CreateICC());
+  PaddedBytes icc = c_enc.ICC();
+  ppf_.icc.assign(icc.begin(), icc.end());
+  return *this;
+}
+
+TestImage& TestImage::CoalesceGIFAnimationWithAlpha() {
+  extras::PackedFrame canvas = ppf_.frames[0].Copy();
+  JXL_CHECK(canvas.color.format.num_channels == 3);
+  JXL_CHECK(canvas.color.format.data_type == JXL_TYPE_UINT8);
+  JXL_CHECK(canvas.extra_channels.size() == 1);
+  for (size_t i = 1; i < ppf_.frames.size(); i++) {
+    const extras::PackedFrame& frame = ppf_.frames[i];
+    JXL_CHECK(frame.extra_channels.size() == 1);
+    const JxlLayerInfo& layer_info = frame.frame_info.layer_info;
+    extras::PackedFrame rendered = canvas.Copy();
+    uint8_t* pixels_rendered =
+        reinterpret_cast<uint8_t*>(rendered.color.pixels());
+    const uint8_t* pixels_frame =
+        reinterpret_cast<const uint8_t*>(frame.color.pixels());
+    uint8_t* alpha_rendered =
+        reinterpret_cast<uint8_t*>(rendered.extra_channels[0].pixels());
+    const uint8_t* alpha_frame =
+        reinterpret_cast<const uint8_t*>(frame.extra_channels[0].pixels());
+    for (size_t y = 0; y < frame.color.ysize; y++) {
+      for (size_t x = 0; x < frame.color.xsize; x++) {
+        size_t idx_frame = y * frame.color.xsize + x;
+        size_t idx_rendered = ((layer_info.crop_y0 + y) * rendered.color.xsize +
+                               (layer_info.crop_x0 + x));
+        if (alpha_frame[idx_frame] != 0) {
+          memcpy(&pixels_rendered[idx_rendered * 3],
+                 &pixels_frame[idx_frame * 3], 3);
+          alpha_rendered[idx_rendered] = alpha_frame[idx_frame];
+        }
+      }
+    }
+    if (layer_info.save_as_reference != 0) {
+      canvas = rendered.Copy();
+    }
+    ppf_.frames[i] = std::move(rendered);
+  }
+  return *this;
+}
+
+TestImage::Frame::Frame(TestImage* parent, bool is_preview, size_t index)
+    : parent_(parent), is_preview_(is_preview), index_(index) {}
+
+void TestImage::Frame::ZeroFill() {
+  memset(frame().color.pixels(), 0, frame().color.pixels_size);
+  for (auto& ec : frame().extra_channels) {
+    memset(ec.pixels(), 0, ec.pixels_size);
+  }
+}
+
+void TestImage::Frame::RandomFill(uint16_t seed) {
+  FillPackedImage(ppf().info.bits_per_sample, seed, &frame().color);
+  for (size_t i = 0; i < ppf().extra_channels_info.size(); ++i) {
+    FillPackedImage(ppf().extra_channels_info[i].ec_info.bits_per_sample,
+                    seed + 1 + i, &frame().extra_channels[i]);
+  }
+}
+
+void TestImage::Frame::SetValue(size_t y, size_t x, size_t c, float val) {
+  const extras::PackedImage& color = frame().color;
+  JxlPixelFormat format = color.format;
+  JXL_CHECK(y < ppf().info.ysize);
+  JXL_CHECK(x < ppf().info.xsize);
+  JXL_CHECK(c < format.num_channels);
+  size_t pwidth = extras::PackedImage::BitsPerChannel(format.data_type) / 8;
+  size_t idx = ((y * color.xsize + x) * format.num_channels + c) * pwidth;
+  uint8_t* pixels = reinterpret_cast<uint8_t*>(frame().color.pixels());
+  uint8_t* p = pixels + idx;
+  StoreValue(val, ppf().info.bits_per_sample, frame().color.format, &p);
+}
+
+TestImage::Frame TestImage::AddFrame() {
+  size_t index = ppf_.frames.size();
+  extras::PackedFrame frame(ppf_.info.xsize, ppf_.info.ysize, format_);
+  for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+    JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0};
+    extras::PackedImage image(ppf_.info.xsize, ppf_.info.ysize, ec_format);
+    frame.extra_channels.emplace_back(std::move(image));
+  }
+  ppf_.frames.emplace_back(std::move(frame));
+  return Frame(this, false, index);
+}
+
+TestImage::Frame TestImage::AddPreview(size_t xsize, size_t ysize) {
+  extras::PackedFrame frame(xsize, ysize, format_);
+  for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+    JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0};
+    extras::PackedImage image(xsize, ysize, ec_format);
+    frame.extra_channels.emplace_back(std::move(image));
+  }
+  ppf_.preview_frame = make_unique<extras::PackedFrame>(std::move(frame));
+  return Frame(this, true, 0);
+}
+
+void TestImage::CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info) {
+  if (info->crop_x0 < static_cast<ssize_t>(xsize)) {
+    info->xsize = std::min<size_t>(info->xsize, xsize - info->crop_x0);
+  } else {
+    info->xsize = 0;
+  }
+  if (info->crop_y0 < static_cast<ssize_t>(ysize)) {
+    info->ysize = std::min<size_t>(info->ysize, ysize - info->crop_y0);
+  } else {
+    info->ysize = 0;
+  }
+}
+
+void TestImage::CropImage(size_t xsize, size_t ysize,
+                          extras::PackedImage* image) {
+  size_t new_stride = (image->stride / image->xsize) * xsize;
+  uint8_t* buf = reinterpret_cast<uint8_t*>(image->pixels());
+  for (size_t y = 0; y < ysize; ++y) {
+    memmove(&buf[y * new_stride], &buf[y * image->stride], new_stride);
+  }
+  image->xsize = xsize;
+  image->ysize = ysize;
+  image->stride = new_stride;
+  image->pixels_size = ysize * new_stride;
+}
+
+JxlDataType TestImage::DefaultDataType(const JxlBasicInfo& info) {
+  if (info.bits_per_sample == 16 && info.exponent_bits_per_sample == 5) {
+    return JXL_TYPE_FLOAT16;
+  } else if (info.exponent_bits_per_sample > 0 || info.bits_per_sample > 16) {
+    return JXL_TYPE_FLOAT;
+  } else if (info.bits_per_sample > 8) {
+    return JXL_TYPE_UINT16;
+  } else {
+    return JXL_TYPE_UINT8;
+  }
+}
+
+}  // namespace test
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/test_image.h b/third-party/libjxl/libjxl/lib/jxl/test_image.h
new file mode 100644
index 0000000000..0106a4b341
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/test_image.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TEST_IMAGE_H_
+#define LIB_JXL_TEST_IMAGE_H_
+
+#include <jxl/codestream_header.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/padded_bytes.h"
+
+namespace jxl {
+namespace test {
+
+// Returns a test image with some autogenerated pixel content, using 16 bits per
+// channel, big endian order, 1 to 4 channels
+// The seed parameter allows to create images with different pixel content.
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+                                      size_t num_channels, uint16_t seed);
+
+class TestImage {
+ public:
+  TestImage();
+
+  extras::PackedPixelFile& ppf() { return ppf_; }
+
+  TestImage& DecodeFromBytes(const PaddedBytes& bytes);
+
+  TestImage& ClearMetadata();
+
+  TestImage& SetDimensions(size_t xsize, size_t ysize);
+
+  TestImage& SetChannels(size_t num_channels);
+
+  // Sets the same bit depth on color, alpha and all extra channels.
+  TestImage& SetAllBitDepths(uint32_t bits_per_sample,
+                             uint32_t exponent_bits_per_sample = 0);
+
+  TestImage& SetDataType(JxlDataType data_type);
+
+  TestImage& SetEndianness(JxlEndianness endianness);
+
+  TestImage& SetColorEncoding(const std::string& description);
+
+  TestImage& CoalesceGIFAnimationWithAlpha();
+
+  class Frame {
+   public:
+    Frame(TestImage* parent, bool is_preview, size_t index);
+
+    void ZeroFill();
+    void RandomFill(uint16_t seed = 177);
+
+    void SetValue(size_t y, size_t x, size_t c, float val);
+
+   private:
+    extras::PackedPixelFile& ppf() const { return parent_->ppf(); }
+
+    extras::PackedFrame& frame() {
+      return is_preview_ ? *ppf().preview_frame : ppf().frames[index_];
+    }
+
+    TestImage* parent_;
+    bool is_preview_;
+    size_t index_;
+  };
+
+  Frame AddFrame();
+
+  Frame AddPreview(size_t xsize, size_t ysize);
+
+ private:
+  extras::PackedPixelFile ppf_;
+  JxlPixelFormat format_ = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+  static void CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info);
+
+  static void CropImage(size_t xsize, size_t ysize, extras::PackedImage* image);
+
+  static JxlDataType DefaultDataType(const JxlBasicInfo& info);
+};
+
+}  // namespace test
+}  // namespace jxl
+
+#endif  // LIB_JXL_TEST_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/test_utils.cc b/third-party/libjxl/libjxl/lib/jxl/test_utils.cc
new file mode 100644
index 0000000000..eb2e3c4ce0
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/test_utils.cc
@@ -0,0 +1,672 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/test_utils.h"
+
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lib/extras/metrics.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/base/float.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+
+#if !defined(TEST_DATA_PATH)
+#include "tools/cpp/runfiles/runfiles.h"
+#endif
+
+namespace jxl {
+namespace test {
+
+#if defined(TEST_DATA_PATH)
+std::string GetTestDataPath(const std::string& filename) {
+  return std::string(TEST_DATA_PATH "/") + filename;
+}
+#else
+using bazel::tools::cpp::runfiles::Runfiles;
+const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create(""));
+std::string GetTestDataPath(const std::string& filename) {
+  std::string root(JPEGXL_ROOT_PACKAGE "/testdata/");
+  return kRunfiles->Rlocation(root + filename);
+}
+#endif
+
+PaddedBytes ReadTestData(const std::string& filename) {
+  std::string full_path = GetTestDataPath(filename);
+  fprintf(stderr, "ReadTestData %s\n", full_path.c_str());
+  std::ifstream file(full_path, std::ios::binary);
+  std::vector<char> str((std::istreambuf_iterator<char>(file)),
+                        std::istreambuf_iterator<char>());
+  JXL_CHECK(file.good());
+  const uint8_t* raw = reinterpret_cast<const uint8_t*>(str.data());
+  std::vector<uint8_t> data(raw, raw + str.size());
+  printf("Test data %s is %d bytes long.\n", filename.c_str(),
+         static_cast<int>(data.size()));
+  PaddedBytes result;
+  result.append(data);
+  return result;
+}
+
+void DefaultAcceptedFormats(extras::JXLDecompressParams& dparams) {
+  if (dparams.accepted_formats.empty()) {
+    for (const uint32_t num_channels : {1, 2, 3, 4}) {
+      dparams.accepted_formats.push_back(
+          {num_channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, /*align=*/0});
+    }
+  }
+}
+
+Status DecodeFile(extras::JXLDecompressParams dparams,
+                  const Span<const uint8_t> file, CodecInOut* JXL_RESTRICT io,
+                  ThreadPool* pool) {
+  DefaultAcceptedFormats(dparams);
+  SetThreadParallelRunner(dparams, pool);
+  extras::PackedPixelFile ppf;
+  JXL_RETURN_IF_ERROR(DecodeImageJXL(file.data(), file.size(), dparams,
+                                     /*decoded_bytes=*/nullptr, &ppf));
+  JXL_RETURN_IF_ERROR(ConvertPackedPixelFileToCodecInOut(ppf, pool, io));
+  return true;
+}
+
+void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info,
+                                    const JxlPixelFormat* pixel_format) {
+  JxlEncoderInitBasicInfo(basic_info);
+  switch (pixel_format->data_type) {
+    case JXL_TYPE_FLOAT:
+      basic_info->bits_per_sample = 32;
+      basic_info->exponent_bits_per_sample = 8;
+      break;
+    case JXL_TYPE_FLOAT16:
+      basic_info->bits_per_sample = 16;
+      basic_info->exponent_bits_per_sample = 5;
+      break;
+    case JXL_TYPE_UINT8:
+      basic_info->bits_per_sample = 8;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_UINT16:
+      basic_info->bits_per_sample = 16;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+    default:
+      JXL_ABORT("Unhandled JxlDataType");
+  }
+  if (pixel_format->num_channels < 3) {
+    basic_info->num_color_channels = 1;
+  } else {
+    basic_info->num_color_channels = 3;
+  }
+  if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) {
+    basic_info->alpha_exponent_bits = basic_info->exponent_bits_per_sample;
+    basic_info->alpha_bits = basic_info->bits_per_sample;
+    basic_info->num_extra_channels = 1;
+  } else {
+    basic_info->alpha_exponent_bits = 0;
+    basic_info->alpha_bits = 0;
+  }
+}
+
+ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc) {
+  ColorEncoding c;
+  c.SetColorSpace(desc.color_space);
+  if (desc.color_space != ColorSpace::kXYB) {
+    c.white_point = desc.white_point;
+    c.primaries = desc.primaries;
+    c.tf.SetTransferFunction(desc.tf);
+  }
+  c.rendering_intent = desc.rendering_intent;
+  JXL_CHECK(c.CreateICC());
+  return c;
+}
+
+namespace {
+void CheckSameEncodings(const std::vector<ColorEncoding>& a,
+                        const std::vector<ColorEncoding>& b,
+                        const std::string& check_name,
+                        std::stringstream& failures) {
+  JXL_CHECK(a.size() == b.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    if ((a[i].ICC() == b[i].ICC()) ||
+        ((a[i].primaries == b[i].primaries) && a[i].tf.IsSame(b[i].tf))) {
+      continue;
+    }
+    failures << "CheckSameEncodings " << check_name << ": " << i
+             << "-th encoding mismatch\n";
+  }
+}
+}  // namespace
+
+bool Roundtrip(const CodecInOut* io, const CompressParams& cparams,
+               extras::JXLDecompressParams dparams,
+               CodecInOut* JXL_RESTRICT io2, std::stringstream& failures,
+               size_t* compressed_size, ThreadPool* pool, AuxOut* aux_out) {
+  DefaultAcceptedFormats(dparams);
+  if (compressed_size) {
+    *compressed_size = static_cast<size_t>(-1);
+  }
+  PaddedBytes compressed;
+
+  std::vector<ColorEncoding> original_metadata_encodings;
+  std::vector<ColorEncoding> original_current_encodings;
+  std::vector<ColorEncoding> metadata_encodings_1;
+  std::vector<ColorEncoding> metadata_encodings_2;
+  std::vector<ColorEncoding> current_encodings_2;
+  original_metadata_encodings.reserve(io->frames.size());
+  original_current_encodings.reserve(io->frames.size());
+  metadata_encodings_1.reserve(io->frames.size());
+  metadata_encodings_2.reserve(io->frames.size());
+  current_encodings_2.reserve(io->frames.size());
+
+  for (const ImageBundle& ib : io->frames) {
+    // Remember original encoding, will be returned by decoder.
+    original_metadata_encodings.push_back(ib.metadata()->color_encoding);
+    // c_current should not change during encoding.
+    original_current_encodings.push_back(ib.c_current());
+  }
+
+  std::unique_ptr<PassesEncoderState> enc_state =
+      jxl::make_unique<PassesEncoderState>();
+  JXL_CHECK(EncodeFile(cparams, io, enc_state.get(), &compressed, GetJxlCms(),
+                       aux_out, pool));
+
+  for (const ImageBundle& ib1 : io->frames) {
+    metadata_encodings_1.push_back(ib1.metadata()->color_encoding);
+  }
+
+  // Should still be in the same color space after encoding.
+  CheckSameEncodings(metadata_encodings_1, original_metadata_encodings,
+                     "original vs after encoding", failures);
+
+  JXL_CHECK(DecodeFile(dparams, Span<const uint8_t>(compressed), io2, pool));
+  JXL_CHECK(io2->frames.size() == io->frames.size());
+
+  for (const ImageBundle& ib2 : io2->frames) {
+    metadata_encodings_2.push_back(ib2.metadata()->color_encoding);
+    current_encodings_2.push_back(ib2.c_current());
+  }
+
+  // We always produce the original color encoding if a color transform hook is
+  // set.
+  CheckSameEncodings(current_encodings_2, original_current_encodings,
+                     "current: original vs decoded", failures);
+
+  // Decoder returns the originals passed to the encoder.
+  CheckSameEncodings(metadata_encodings_2, original_metadata_encodings,
+                     "metadata: original vs decoded", failures);
+
+  if (compressed_size) {
+    *compressed_size = compressed.size();
+  }
+
+  return failures.str().empty();
+}
+
+size_t Roundtrip(const extras::PackedPixelFile& ppf_in,
+                 extras::JXLCompressParams cparams,
+                 extras::JXLDecompressParams dparams, ThreadPool* pool,
+                 extras::PackedPixelFile* ppf_out) {
+  DefaultAcceptedFormats(dparams);
+  SetThreadParallelRunner(cparams, pool);
+  SetThreadParallelRunner(dparams, pool);
+  std::vector<uint8_t> compressed;
+  JXL_CHECK(extras::EncodeImageJXL(cparams, ppf_in, /*jpeg_bytes=*/nullptr,
+                                   &compressed));
+  size_t decoded_bytes = 0;
+  JXL_CHECK(extras::DecodeImageJXL(compressed.data(), compressed.size(),
+                                   dparams, &decoded_bytes, ppf_out));
+  JXL_CHECK(decoded_bytes == compressed.size());
+  return compressed.size();
+}
+
+std::vector<ColorEncodingDescriptor> AllEncodings() {
+  std::vector<ColorEncodingDescriptor> all_encodings;
+  all_encodings.reserve(300);
+  ColorEncoding c;
+
+  for (ColorSpace cs : Values<ColorSpace>()) {
+    if (cs == ColorSpace::kUnknown || cs == ColorSpace::kXYB) continue;
+    c.SetColorSpace(cs);
+
+    for (WhitePoint wp : Values<WhitePoint>()) {
+      if (wp == WhitePoint::kCustom) continue;
+      if (c.ImplicitWhitePoint() && c.white_point != wp) continue;
+      c.white_point = wp;
+
+      for (Primaries primaries : Values<Primaries>()) {
+        if (primaries == Primaries::kCustom) continue;
+        if (!c.HasPrimaries()) continue;
+        c.primaries = primaries;
+
+        for (TransferFunction tf : Values<TransferFunction>()) {
+          if (tf == TransferFunction::kUnknown) continue;
+          if (c.tf.SetImplicit() &&
+              (c.tf.IsGamma() || c.tf.GetTransferFunction() != tf)) {
+            continue;
+          }
+          c.tf.SetTransferFunction(tf);
+
+          for (RenderingIntent ri : Values<RenderingIntent>()) {
+            ColorEncodingDescriptor cdesc;
+            cdesc.color_space = cs;
+            cdesc.white_point = wp;
+            cdesc.primaries = primaries;
+            cdesc.tf = tf;
+            cdesc.rendering_intent = ri;
+            all_encodings.push_back(cdesc);
+          }
+        }
+      }
+    }
+  }
+
+  return all_encodings;
+}
+
+jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector<uint8_t>& buf,
+                                          size_t num_channels, size_t xsize,
+                                          size_t ysize) {
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetAlphaBits(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(
+      /*is_gray=*/num_channels == 1 || num_channels == 2);
+  JxlPixelFormat format = {static_cast<uint32_t>(num_channels), JXL_TYPE_UINT16,
+                           JXL_BIG_ENDIAN, 0};
+  JXL_CHECK(ConvertFromExternal(
+      jxl::Span<const uint8_t>(buf.data(), buf.size()), xsize, ysize,
+      jxl::ColorEncoding::SRGB(/*is_gray=*/num_channels < 3),
+      /*bits_per_sample=*/16, format,
+      /*pool=*/nullptr,
+      /*ib=*/&io.Main()));
+  return io;
+}
+
+bool Near(double expected, double value, double max_dist) {
+  double dist = expected > value ? expected - value : value - expected;
+  return dist <= max_dist;
+}
+
+float LoadLEFloat16(const uint8_t* p) {
+  uint16_t bits16 = LoadLE16(p);
+  return LoadFloat16(bits16);
+}
+
+float LoadBEFloat16(const uint8_t* p) {
+  uint16_t bits16 = LoadBE16(p);
+  return LoadFloat16(bits16);
+}
+
+size_t GetPrecision(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_FLOAT:
+      // Floating point mantissa precision
+      return 24;
+    case JXL_TYPE_FLOAT16:
+      return 11;
+    default:
+      JXL_ABORT("Unhandled JxlDataType");
+  }
+}
+
+size_t GetDataBits(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+    default:
+      JXL_ABORT("Unhandled JxlDataType");
+  }
+}
+
+std::vector<double> ConvertToRGBA32(const uint8_t* pixels, size_t xsize,
+                                    size_t ysize, const JxlPixelFormat& format,
+                                    double factor) {
+  std::vector<double> result(xsize * ysize * 4);
+  size_t num_channels = format.num_channels;
+  bool gray = num_channels == 1 || num_channels == 2;
+  bool alpha = num_channels == 2 || num_channels == 4;
+  JxlEndianness endianness = format.endianness;
+  // Compute actual type:
+  if (endianness == JXL_NATIVE_ENDIAN) {
+    endianness = IsLittleEndian() ? JXL_LITTLE_ENDIAN : JXL_BIG_ENDIAN;
+  }
+
+  size_t stride =
+      xsize * jxl::DivCeil(GetDataBits(format.data_type) * num_channels,
+                           jxl::kBitsPerByte);
+  if (format.align > 1) stride = jxl::RoundUpTo(stride, format.align);
+
+  if (format.data_type == JXL_TYPE_UINT8) {
+    // Multiplier to bring to 0-1.0 range
+    double mul = factor > 0.0 ? factor : 1.0 / 255.0;
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels;
+        double r = pixels[i];
+        double g = gray ? r : pixels[i + 1];
+        double b = gray ? r : pixels[i + 2];
+        double a = alpha ? pixels[i + num_channels - 1] : 255;
+        result[j + 0] = r * mul;
+        result[j + 1] = g * mul;
+        result[j + 2] = b * mul;
+        result[j + 3] = a * mul;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+    // Multiplier to bring to 0-1.0 range
+    double mul = factor > 0.0 ? factor : 1.0 / 65535.0;
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 2;
+        double r, g, b, a;
+        if (endianness == JXL_BIG_ENDIAN) {
+          r = (pixels[i + 0] << 8) + pixels[i + 1];
+          g = gray ? r : (pixels[i + 2] << 8) + pixels[i + 3];
+          b = gray ? r : (pixels[i + 4] << 8) + pixels[i + 5];
+          a = alpha ? (pixels[i + num_channels * 2 - 2] << 8) +
+                          pixels[i + num_channels * 2 - 1]
+                    : 65535;
+        } else {
+          r = (pixels[i + 1] << 8) + pixels[i + 0];
+          g = gray ? r : (pixels[i + 3] << 8) + pixels[i + 2];
+          b = gray ? r : (pixels[i + 5] << 8) + pixels[i + 4];
+          a = alpha ? (pixels[i + num_channels * 2 - 1] << 8) +
+                          pixels[i + num_channels * 2 - 2]
+                    : 65535;
+        }
+        result[j + 0] = r * mul;
+        result[j + 1] = g * mul;
+        result[j + 2] = b * mul;
+        result[j + 3] = a * mul;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT) {
+    JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 4;
+        double r, g, b, a;
+        if (endianness == JXL_BIG_ENDIAN) {
+          r = LoadBEFloat(pixels + i);
+          g = gray ? r : LoadBEFloat(pixels + i + 4);
+          b = gray ? r : LoadBEFloat(pixels + i + 8);
+          a = alpha ? LoadBEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+        } else {
+          r = LoadLEFloat(pixels + i);
+          g = gray ? r : LoadLEFloat(pixels + i + 4);
+          b = gray ? r : LoadLEFloat(pixels + i + 8);
+          a = alpha ? LoadLEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+        }
+        result[j + 0] = r;
+        result[j + 1] = g;
+        result[j + 2] = b;
+        result[j + 3] = a;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT16) {
+    JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 2;
+        double r, g, b, a;
+        if (endianness == JXL_BIG_ENDIAN) {
+          r = LoadBEFloat16(pixels + i);
+          g = gray ? r : LoadBEFloat16(pixels + i + 2);
+          b = gray ? r : LoadBEFloat16(pixels + i + 4);
+          a = alpha ? LoadBEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+        } else {
+          r = LoadLEFloat16(pixels + i);
+          g = gray ? r : LoadLEFloat16(pixels + i + 2);
+          b = gray ? r : LoadLEFloat16(pixels + i + 4);
+          a = alpha ? LoadLEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+        }
+        result[j + 0] = r;
+        result[j + 1] = g;
+        result[j + 2] = b;
+        result[j + 3] = a;
+      }
+    }
+  } else {
+    JXL_ASSERT(false);  // Unsupported type
+  }
+  return result;
+}
+
+size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
+                     size_t ysize, const JxlPixelFormat& format_a,
+                     const JxlPixelFormat& format_b,
+                     double threshold_multiplier) {
+  // Convert both images to equal full precision for comparison.
+  std::vector<double> a_full = ConvertToRGBA32(a, xsize, ysize, format_a);
+  std::vector<double> b_full = ConvertToRGBA32(b, xsize, ysize, format_b);
+  bool gray_a = format_a.num_channels < 3;
+  bool gray_b = format_b.num_channels < 3;
+  bool alpha_a = !(format_a.num_channels & 1);
+  bool alpha_b = !(format_b.num_channels & 1);
+  size_t bits_a = GetPrecision(format_a.data_type);
+  size_t bits_b = GetPrecision(format_b.data_type);
+  size_t bits = std::min(bits_a, bits_b);
+  // How much distance is allowed in case of pixels with lower bit depths, given
+  // that the double precision float images use range 0-1.0.
+  // E.g. in case of 1-bit this is 0.5 since 0.499 must map to 0 and 0.501 must
+  // map to 1.
+  double precision = 0.5 * threshold_multiplier / ((1ull << bits) - 1ull);
+  if (format_a.data_type == JXL_TYPE_FLOAT16 ||
+      format_b.data_type == JXL_TYPE_FLOAT16) {
+    // Lower the precision for float16, because it currently looks like the
+    // scalar and wasm implementations of hwy have 1 less bit of precision
+    // than the x86 implementations.
+    // TODO(lode): Set the required precision back to 11 bits when possible.
+    precision = 0.5 * threshold_multiplier / ((1ull << (bits - 1)) - 1ull);
+  }
+  size_t numdiff = 0;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      size_t i = (y * xsize + x) * 4;
+      bool ok = true;
+      if (gray_a || gray_b) {
+        if (!Near(a_full[i + 0], b_full[i + 0], precision)) ok = false;
+        // If the input was grayscale and the output not, then the output must
+        // have all channels equal.
+        if (gray_a && b_full[i + 0] != b_full[i + 1] &&
+            b_full[i + 2] != b_full[i + 2]) {
+          ok = false;
+        }
+      } else {
+        if (!Near(a_full[i + 0], b_full[i + 0], precision) ||
+            !Near(a_full[i + 1], b_full[i + 1], precision) ||
+            !Near(a_full[i + 2], b_full[i + 2], precision)) {
+          ok = false;
+        }
+      }
+      if (alpha_a && alpha_b) {
+        if (!Near(a_full[i + 3], b_full[i + 3], precision)) ok = false;
+      } else {
+        // If the input had no alpha channel, the output should be opaque
+        // after roundtrip.
+        if (alpha_b && !Near(1.0, b_full[i + 3], precision)) ok = false;
+      }
+      if (!ok) numdiff++;
+    }
+  }
+  return numdiff;
+}
+
+double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize,
+                   size_t ysize, const JxlPixelFormat& format) {
+  // Convert both images to equal full precision for comparison.
+  std::vector<double> a_full = ConvertToRGBA32(a, xsize, ysize, format);
+  std::vector<double> b_full = ConvertToRGBA32(b, xsize, ysize, format);
+  double sum = 0.0;
+  for (size_t y = 0; y < ysize; y++) {
+    double row_sum = 0.0;
+    for (size_t x = 0; x < xsize; x++) {
+      size_t i = (y * xsize + x) * 4;
+      for (size_t c = 0; c < format.num_channels; ++c) {
+        double diff = a_full[i + c] - b_full[i + c];
+        row_sum += diff * diff;
+      }
+    }
+    sum += row_sum;
+  }
+  sum /= (xsize * ysize);
+  return sqrt(sum);
+}
+
+float ButteraugliDistance(const extras::PackedPixelFile& a,
+                          const extras::PackedPixelFile& b, ThreadPool* pool) {
+  CodecInOut io0;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0));
+  CodecInOut io1;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1));
+  // TODO(eustas): simplify?
+  return ButteraugliDistance(io0.frames, io1.frames, ButteraugliParams(),
+                             GetJxlCms(),
+                             /*distmap=*/nullptr, pool);
+}
+
+float Butteraugli3Norm(const extras::PackedPixelFile& a,
+                       const extras::PackedPixelFile& b, ThreadPool* pool) {
+  CodecInOut io0;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0));
+  CodecInOut io1;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1));
+  ButteraugliParams ba;
+  ImageF distmap;
+  ButteraugliDistance(io0.frames, io1.frames, ba, GetJxlCms(), &distmap, pool);
+  return ComputeDistanceP(distmap, ba, 3);
+}
+
+float ComputeDistance2(const extras::PackedPixelFile& a,
+                       const extras::PackedPixelFile& b) {
+  CodecInOut io0;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, nullptr, &io0));
+  CodecInOut io1;
+  JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, nullptr, &io1));
+  return ComputeDistance2(io0.Main(), io1.Main(), GetJxlCms());
+}
+
+bool SameAlpha(const extras::PackedPixelFile& a,
+               const extras::PackedPixelFile& b) {
+  JXL_CHECK(a.info.xsize == b.info.xsize);
+  JXL_CHECK(a.info.ysize == b.info.ysize);
+  JXL_CHECK(a.info.alpha_bits == b.info.alpha_bits);
+  JXL_CHECK(a.info.alpha_exponent_bits == b.info.alpha_exponent_bits);
+  JXL_CHECK(a.info.alpha_bits > 0);
+  JXL_CHECK(a.frames.size() == b.frames.size());
+  for (size_t i = 0; i < a.frames.size(); ++i) {
+    const extras::PackedImage& color_a = a.frames[i].color;
+    const extras::PackedImage& color_b = b.frames[i].color;
+    JXL_CHECK(color_a.format.num_channels == color_b.format.num_channels);
+    JXL_CHECK(color_a.format.data_type == color_b.format.data_type);
+    JXL_CHECK(color_a.format.endianness == color_b.format.endianness);
+    JXL_CHECK(color_a.pixels_size == color_b.pixels_size);
+    size_t pwidth =
+        extras::PackedImage::BitsPerChannel(color_a.format.data_type) / 8;
+    size_t num_color = color_a.format.num_channels < 3 ? 1 : 3;
+    const uint8_t* p_a = reinterpret_cast<const uint8_t*>(color_a.pixels());
+    const uint8_t* p_b = reinterpret_cast<const uint8_t*>(color_b.pixels());
+    for (size_t y = 0; y < a.info.ysize; ++y) {
+      for (size_t x = 0; x < a.info.xsize; ++x) {
+        size_t idx =
+            ((y * a.info.xsize + x) * color_a.format.num_channels + num_color) *
+            pwidth;
+        if (memcmp(&p_a[idx], &p_b[idx], pwidth) != 0) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b) {
+  JXL_CHECK(a.xsize == b.xsize);
+  JXL_CHECK(a.ysize == b.ysize);
+  JXL_CHECK(a.format.num_channels == b.format.num_channels);
+  JXL_CHECK(a.format.data_type == b.format.data_type);
+  JXL_CHECK(a.format.endianness == b.format.endianness);
+  JXL_CHECK(a.pixels_size == b.pixels_size);
+  const uint8_t* p_a = reinterpret_cast<const uint8_t*>(a.pixels());
+  const uint8_t* p_b = reinterpret_cast<const uint8_t*>(b.pixels());
+  for (size_t y = 0; y < a.ysize; ++y) {
+    for (size_t x = 0; x < a.xsize; ++x) {
+      size_t idx = (y * a.xsize + x) * a.pixel_stride();
+      if (memcmp(&p_a[idx], &p_b[idx], a.pixel_stride()) != 0) {
+        printf("Mismatch at row %" PRIuS " col %" PRIuS "\n", y, x);
+        printf("  a: ");
+        for (size_t j = 0; j < a.pixel_stride(); ++j) {
+          printf(" %3u", p_a[idx + j]);
+        }
+        printf("\n  b: ");
+        for (size_t j = 0; j < a.pixel_stride(); ++j) {
+          printf(" %3u", p_b[idx + j]);
+        }
+        printf("\n");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool SamePixels(const extras::PackedPixelFile& a,
+                const extras::PackedPixelFile& b) {
+  JXL_CHECK(a.info.xsize == b.info.xsize);
+  JXL_CHECK(a.info.ysize == b.info.ysize);
+  JXL_CHECK(a.info.bits_per_sample == b.info.bits_per_sample);
+  JXL_CHECK(a.info.exponent_bits_per_sample == b.info.exponent_bits_per_sample);
+  JXL_CHECK(a.frames.size() == b.frames.size());
+  for (size_t i = 0; i < a.frames.size(); ++i) {
+    const auto& frame_a = a.frames[i];
+    const auto& frame_b = b.frames[i];
+    if (!SamePixels(frame_a.color, frame_b.color)) {
+      return false;
+    }
+    JXL_CHECK(frame_a.extra_channels.size() == frame_b.extra_channels.size());
+    for (size_t j = 0; j < frame_a.extra_channels.size(); ++j) {
+      if (!SamePixels(frame_a.extra_channels[i], frame_b.extra_channels[i])) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace test
+
+bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+  if (a.size() != b.size()) return false;
+  if (memcmp(a.data(), b.data(), a.size()) != 0) return false;
+  return true;
+}
+
+// Allow using EXPECT_EQ on jxl::PaddedBytes
+bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+  return !(a == b);
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/test_utils.h b/third-party/libjxl/libjxl/lib/jxl/test_utils.h
new file mode 100644
index 0000000000..c6fab66ddc
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/test_utils.h
@@ -0,0 +1,183 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TEST_UTILS_H_
+#define LIB_JXL_TEST_UTILS_H_
+
+// TODO(eustas): reduce includes (move to .cc)
+
+// Macros and functions useful for tests.
+
+#include <jxl/codestream_header.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <ostream>
+#include <vector>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/enc/jxl.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_params.h"
+
+#define TEST_LIBJPEG_SUPPORT()                                              \
+  do {                                                                      \
+    if (!jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) {                \
+      fprintf(stderr, "Skipping test because of missing libjpeg codec.\n"); \
+      return;                                                               \
+    }                                                                       \
+  } while (0)
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace test {
+
+std::string GetTestDataPath(const std::string& filename);
+PaddedBytes ReadTestData(const std::string& filename);
+
+void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info,
+                                    const JxlPixelFormat* pixel_format);
+
+void DefaultAcceptedFormats(extras::JXLDecompressParams& dparams);
+
+template <typename Params>
+void SetThreadParallelRunner(Params params, ThreadPool* pool) {
+  if (pool && !params.runner_opaque) {
+    params.runner = pool->runner();
+    params.runner_opaque = pool->runner_opaque();
+  }
+}
+
+Status DecodeFile(extras::JXLDecompressParams dparams,
+                  const Span<const uint8_t> file, CodecInOut* JXL_RESTRICT io,
+                  ThreadPool* pool = nullptr);
+
+bool Roundtrip(const CodecInOut* io, const CompressParams& cparams,
+               extras::JXLDecompressParams dparams,
+               CodecInOut* JXL_RESTRICT io2, std::stringstream& failures,
+               size_t* compressed_size = nullptr, ThreadPool* pool = nullptr,
+               AuxOut* aux_out = nullptr);
+
+// Returns compressed size [bytes].
+size_t Roundtrip(const extras::PackedPixelFile& ppf_in,
+                 extras::JXLCompressParams cparams,
+                 extras::JXLDecompressParams dparams, ThreadPool* pool,
+                 extras::PackedPixelFile* ppf_out);
+
+// A POD descriptor of a ColorEncoding. Only used in tests as the return value
+// of AllEncodings().
+struct ColorEncodingDescriptor {
+  ColorSpace color_space;
+  WhitePoint white_point;
+  Primaries primaries;
+  TransferFunction tf;
+  RenderingIntent rendering_intent;
+};
+
+ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc);
+
+// Define the operator<< for tests.
+static inline ::std::ostream& operator<<(::std::ostream& os,
+                                         const ColorEncodingDescriptor& c) {
+  return os << "ColorEncoding/" << Description(ColorEncodingFromDescriptor(c));
+}
+
+// Returns ColorEncodingDescriptors, which are only used in tests. To obtain a
+// ColorEncoding object call ColorEncodingFromDescriptor and then call
+// ColorEncoding::CreateProfile() on that object to generate a profile.
+std::vector<ColorEncodingDescriptor> AllEncodings();
+
+// Returns a CodecInOut based on the buf, xsize, ysize, and the assumption
+// that the buffer was created using `GetSomeTestImage`.
+jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector<uint8_t>& buf,
+                                          size_t num_channels, size_t xsize,
+                                          size_t ysize);
+
+bool Near(double expected, double value, double max_dist);
+
+float LoadLEFloat16(const uint8_t* p);
+
+float LoadBEFloat16(const uint8_t* p);
+
+size_t GetPrecision(JxlDataType data_type);
+
+size_t GetDataBits(JxlDataType data_type);
+
+// Procedure to convert pixels to double precision, not efficient, but
+// well-controlled for testing. It uses double, to be able to represent all
+// precisions needed for the maximum data types the API supports: uint32_t
+// integers, and, single precision float. The values are in range 0-1 for SDR.
+std::vector<double> ConvertToRGBA32(const uint8_t* pixels, size_t xsize,
+                                    size_t ysize, const JxlPixelFormat& format,
+                                    double factor = 0.0);
+
+// Returns amount of pixels which differ between the two pictures. Image b is
+// the image after roundtrip after roundtrip, image a before roundtrip. There
+// are more strict requirements for the alpha channel and grayscale values of
+// the output image.
+size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
+                     size_t ysize, const JxlPixelFormat& format_a,
+                     const JxlPixelFormat& format_b,
+                     double threshold_multiplier = 1.0);
+
+double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize,
+                   size_t ysize, const JxlPixelFormat& format);
+
+float ButteraugliDistance(const extras::PackedPixelFile& a,
+                          const extras::PackedPixelFile& b,
+                          ThreadPool* pool = nullptr);
+
+float Butteraugli3Norm(const extras::PackedPixelFile& a,
+                       const extras::PackedPixelFile& b,
+                       ThreadPool* pool = nullptr);
+
+float ComputeDistance2(const extras::PackedPixelFile& a,
+                       const extras::PackedPixelFile& b);
+
+bool SameAlpha(const extras::PackedPixelFile& a,
+               const extras::PackedPixelFile& b);
+
+bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b);
+
+bool SamePixels(const extras::PackedPixelFile& a,
+                const extras::PackedPixelFile& b);
+
+class ThreadPoolForTests {
+ public:
+  explicit ThreadPoolForTests(int num_threads) {
+    runner_ =
+        JxlThreadParallelRunnerMake(/* memory_manager */ nullptr, num_threads);
+    pool_ =
+        jxl::make_unique<ThreadPool>(JxlThreadParallelRunner, runner_.get());
+  }
+  ThreadPoolForTests(const ThreadPoolForTests&) = delete;
+  ThreadPoolForTests& operator&(const ThreadPoolForTests&) = delete;
+  ThreadPool* operator&() { return pool_.get(); }
+
+ private:
+  JxlThreadParallelRunnerPtr runner_;
+  std::unique_ptr<ThreadPool> pool_;
+};
+
+}  // namespace test
+
+bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b);
+
+// Allow using EXPECT_EQ on jxl::PaddedBytes
+bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_TEST_UTILS_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/testing.h b/third-party/libjxl/libjxl/lib/jxl/testing.h
new file mode 100644
index 0000000000..d10b0c3c54
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/testing.h
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TESTING_H_
+#define LIB_JXL_TESTING_H_
+
+// GTest/GMock specific macros / wrappers.
+
+// gmock unconditionally redefines those macros (to wrong values).
+// Lets include it only here and mitigate the problem.
+#pragma push_macro("PRIdS")
+#pragma push_macro("PRIuS")
+#include "gmock/gmock.h"
+#pragma pop_macro("PRIuS")
+#pragma pop_macro("PRIdS")
+
+#include <sstream>
+
+#include "gtest/gtest.h"
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+#define JXL_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_SLOW_TEST(X) X
+#endif  // JXL_DISABLE_SLOW_TESTS
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JXL_TRANSCODE_JPEG_TEST(X) X
+#else
+#define JXL_TRANSCODE_JPEG_TEST(X) DISABLED_##X
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+#if JPEGXL_ENABLE_BOXES
+#define JXL_BOXES_TEST(X) X
+#else
+#define JXL_BOXES_TEST(X) DISABLED_##X
+#endif  // JPEGXL_ENABLE_BOXES
+
+#ifdef THREAD_SANITIZER
+#define JXL_TSAN_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_TSAN_SLOW_TEST(X) X
+#endif  // THREAD_SANITIZER
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Ensures that we don't make our test bounds too lax, effectively disabling the
+// tests.
+MATCHER_P(IsSlightlyBelow, max, "") {
+  return max * 0.75 <= arg && arg <= max * 1.0;
+}
+
+#define JXL_EXPECT_OK(F)       \
+  {                            \
+    std::stringstream _;       \
+    EXPECT_TRUE(F) << _.str(); \
+  }
+
+#define JXL_ASSERT_OK(F)       \
+  {                            \
+    std::stringstream _;       \
+    ASSERT_TRUE(F) << _.str(); \
+  }
+
+#endif  // LIB_JXL_TESTING_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc
new file mode 100644
index 0000000000..9c010d460a
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/image_ops.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#define RUN_BENCHMARK(F)                                            \
+  constexpr size_t kNum = 1 << 12;                                  \
+  HWY_FULL(float) d;                                                \
+  /* Three parallel runs, as this will run on R, G and B. */        \
+  auto sum1 = Zero(d);                                              \
+  auto sum2 = Zero(d);                                              \
+  auto sum3 = Zero(d);                                              \
+  for (auto _ : state) {                                            \
+    auto x = Set(d, 1e-5);                                          \
+    auto v1 = Set(d, 1e-5);                                         \
+    auto v2 = Set(d, 1.1e-5);                                       \
+    auto v3 = Set(d, 1.2e-5);                                       \
+    for (size_t i = 0; i < kNum; i++) {                             \
+      sum1 += F(d, v1);                                             \
+      sum2 += F(d, v2);                                             \
+      sum3 += F(d, v3);                                             \
+      v1 += x;                                                      \
+      v2 += x;                                                      \
+      v3 += x;                                                      \
+    }                                                               \
+  }                                                                 \
+  /* floats per second */                                           \
+  state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \
+  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+#define RUN_BENCHMARK_SCALAR(F)                              \
+  constexpr size_t kNum = 1 << 12;                           \
+  /* Three parallel runs, as this will run on R, G and B. */ \
+  float sum1 = 0, sum2 = 0, sum3 = 0;                        \
+  for (auto _ : state) {                                     \
+    float x = 1e-5;                                          \
+    float v1 = 1e-5;                                         \
+    float v2 = 1.1e-5;                                       \
+    float v3 = 1.2e-5;                                       \
+    for (size_t i = 0; i < kNum; i++) {                      \
+      sum1 += F(v1);                                         \
+      sum2 += F(v2);                                         \
+      sum3 += F(v3);                                         \
+      v1 += x;                                               \
+      v2 += x;                                               \
+      v3 += x;                                               \
+    }                                                        \
+  }                                                          \
+  /* floats per second */                                    \
+  state.SetItemsProcessed(kNum* state.iterations() * 3);     \
+  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) {
+  RUN_BENCHMARK(FastLinearToSRGB);
+}
+
+HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) {
+  RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQDFE(benchmark::State& state) {
+  RUN_BENCHMARK(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQEFD(benchmark::State& state) {
+  RUN_BENCHMARK(TF_PQ().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) {
+  RUN_BENCHMARK_SCALAR(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) {
+  RUN_BENCHMARK_SCALAR(TF_PQ().EncodedFromDisplay);
+}
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(BM_FastSRGB);
+HWY_EXPORT(BM_TFSRGB);
+HWY_EXPORT(BM_PQDFE);
+HWY_EXPORT(BM_PQEFD);
+HWY_EXPORT(BM_PQSlowDFE);
+HWY_EXPORT(BM_PQSlowEFD);
+
+float SRGB_pow(float x) {
+  return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f;
+}
+
+void BM_FastSRGB(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state);
+}
+void BM_TFSRGB(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state);
+}
+void BM_PQDFE(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state);
+}
+void BM_PQEFD(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state);
+}
+void BM_PQSlowDFE(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state);
+}
+void BM_PQSlowEFD(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state);
+}
+
+void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow); }
+
+BENCHMARK(BM_FastSRGB);
+BENCHMARK(BM_TFSRGB);
+BENCHMARK(BM_SRGB_pow);
+BENCHMARK(BM_PQDFE);
+BENCHMARK(BM_PQEFD);
+BENCHMARK(BM_PQSlowDFE);
+BENCHMARK(BM_PQSlowEFD);
+
+}  // namespace
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl/toc.cc b/third-party/libjxl/libjxl/lib/jxl/toc.cc
new file mode 100644
index 0000000000..fd7740c144
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/toc.cc
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+size_t MaxBits(const size_t num_sizes) {
+  const size_t entry_bits = U32Coder::MaxEncodedBits(kTocDist) * num_sizes;
+  // permutation bit (not its tokens!), padding, entries, padding.
+  return 1 + kBitsPerByte + entry_bits + kBitsPerByte;
+}
+
+Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+               std::vector<uint32_t>* JXL_RESTRICT sizes,
+               std::vector<coeff_order_t>* JXL_RESTRICT permutation) {
+  if (toc_entries > 65536) {
+    // Prevent out of memory if invalid JXL codestream causes a bogus amount
+    // of toc_entries such as 2720436919446 to be computed.
+    // TODO(lode): verify whether 65536 is a reasonable upper bound
+    return JXL_FAILURE("too many toc entries");
+  }
+
+  sizes->clear();
+  sizes->resize(toc_entries);
+  if (reader->TotalBitsConsumed() >= reader->TotalBytes() * kBitsPerByte) {
+    return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC");
+  }
+  const auto check_bit_budget = [&](size_t num_entries) -> Status {
+    // U32Coder reads 2 bits to recognize variant and kTocDist cheapest variant
+    // is Bits(10), this way at least 12 bits are required per toc-entry.
+    size_t minimal_bit_cost = num_entries * (2 + 10);
+    size_t bit_budget = reader->TotalBytes() * 8;
+    size_t expenses = reader->TotalBitsConsumed();
+    if ((expenses <= bit_budget) &&
+        (minimal_bit_cost <= bit_budget - expenses)) {
+      return true;
+    }
+    return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC");
+  };
+
+  JXL_DASSERT(toc_entries > 0);
+  if (reader->ReadFixedBits<1>() == 1) {
+    JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+    permutation->resize(toc_entries);
+    JXL_RETURN_IF_ERROR(DecodePermutation(/*skip=*/0, toc_entries,
+                                          permutation->data(), reader));
+  }
+  JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+  for (size_t i = 0; i < toc_entries; ++i) {
+    (*sizes)[i] = U32Coder::Read(kTocDist, reader);
+  }
+  JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  JXL_RETURN_IF_ERROR(check_bit_budget(0));
+  return true;
+}
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+                        std::vector<uint64_t>* JXL_RESTRICT offsets,
+                        std::vector<uint32_t>* JXL_RESTRICT sizes,
+                        uint64_t* total_size) {
+  std::vector<coeff_order_t> permutation;
+  JXL_RETURN_IF_ERROR(ReadToc(toc_entries, reader, sizes, &permutation));
+
+  offsets->clear();
+  offsets->resize(toc_entries);
+
+  // Prefix sum starting with 0 and ending with the offset of the last group
+  uint64_t offset = 0;
+  for (size_t i = 0; i < toc_entries; ++i) {
+    if (offset + (*sizes)[i] < offset) {
+      return JXL_FAILURE("group offset overflow");
+    }
+    (*offsets)[i] = offset;
+    offset += (*sizes)[i];
+  }
+  if (total_size) {
+    *total_size = offset;
+  }
+
+  if (!permutation.empty()) {
+    std::vector<uint64_t> permuted_offsets;
+    std::vector<uint32_t> permuted_sizes;
+    permuted_offsets.reserve(toc_entries);
+    permuted_sizes.reserve(toc_entries);
+    for (coeff_order_t index : permutation) {
+      permuted_offsets.push_back((*offsets)[index]);
+      permuted_sizes.push_back((*sizes)[index]);
+    }
+    std::swap(*offsets, permuted_offsets);
+    std::swap(*sizes, permuted_sizes);
+  }
+
+  return true;
+}
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/toc.h b/third-party/libjxl/libjxl/lib/jxl/toc.h
new file mode 100644
index 0000000000..a97197ad45
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/toc.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TOC_H_
+#define LIB_JXL_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (2+bits) = 2,3,4 bytes so encoders can patch TOC after encoding.
+// 30 is sufficient for 4K channels of uncompressed 16-bit samples.
+constexpr U32Enc kTocDist(Bits(10), BitsOffset(14, 1024), BitsOffset(22, 17408),
+                          BitsOffset(30, 4211712));
+
+size_t MaxBits(const size_t num_sizes);
+
+// TODO(veluca): move these to FrameDimensions.
+static JXL_INLINE size_t AcGroupIndex(size_t pass, size_t group,
+                                      size_t num_groups, size_t num_dc_groups,
+                                      bool has_ac_global) {
+  return 1 + num_dc_groups + static_cast<size_t>(has_ac_global) +
+         pass * num_groups + group;
+}
+
+static JXL_INLINE size_t NumTocEntries(size_t num_groups, size_t num_dc_groups,
+                                       size_t num_passes, bool has_ac_global) {
+  if (num_groups == 1 && num_passes == 1) return 1;
+  return AcGroupIndex(0, 0, num_groups, num_dc_groups, has_ac_global) +
+         num_groups * num_passes;
+}
+
+Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+               std::vector<uint32_t>* JXL_RESTRICT sizes,
+               std::vector<coeff_order_t>* JXL_RESTRICT permutation);
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+                        std::vector<uint64_t>* JXL_RESTRICT offsets,
+                        std::vector<uint32_t>* JXL_RESTRICT sizes,
+                        uint64_t* total_size);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_TOC_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/toc_test.cc b/third-party/libjxl/libjxl/lib/jxl/toc_test.cc
new file mode 100644
index 0000000000..a7f0f2c27b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/toc_test.cc
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void Roundtrip(size_t num_entries, bool permute, Rng* rng) {
+  // Generate a random permutation.
+  std::vector<coeff_order_t> permutation(num_entries);
+  std::vector<coeff_order_t> inv_permutation(num_entries);
+  for (size_t i = 0; i < num_entries; i++) {
+    permutation[i] = i;
+    inv_permutation[i] = i;
+  }
+  if (permute) {
+    rng->Shuffle(permutation.data(), permutation.size());
+    for (size_t i = 0; i < num_entries; i++) {
+      inv_permutation[permutation[i]] = i;
+    }
+  }
+
+  // Generate num_entries groups of random (byte-aligned) length
+  std::vector<BitWriter> group_codes(num_entries);
+  for (BitWriter& writer : group_codes) {
+    const size_t max_bits = (*rng)() & 0xFFF;
+    BitWriter::Allotment allotment(&writer, max_bits + kBitsPerByte);
+    size_t i = 0;
+    for (; i + BitWriter::kMaxBitsPerCall < max_bits;
+         i += BitWriter::kMaxBitsPerCall) {
+      writer.Write(BitWriter::kMaxBitsPerCall, 0);
+    }
+    for (; i < max_bits; i += 1) {
+      writer.Write(/*n_bits=*/1, 0);
+    }
+    writer.ZeroPadToByte();
+    AuxOut aux_out;
+    allotment.ReclaimAndCharge(&writer, 0, &aux_out);
+  }
+
+  BitWriter writer;
+  AuxOut aux_out;
+  ASSERT_TRUE(WriteGroupOffsets(group_codes, permute ? &permutation : nullptr,
+                                &writer, &aux_out));
+
+  BitReader reader(writer.GetSpan());
+  std::vector<uint64_t> group_offsets;
+  std::vector<uint32_t> group_sizes;
+  uint64_t total_size;
+  ASSERT_TRUE(ReadGroupOffsets(num_entries, &reader, &group_offsets,
+                               &group_sizes, &total_size));
+  ASSERT_EQ(num_entries, group_offsets.size());
+  ASSERT_EQ(num_entries, group_sizes.size());
+  EXPECT_TRUE(reader.Close());
+
+  uint64_t prefix_sum = 0;
+  for (size_t i = 0; i < num_entries; ++i) {
+    EXPECT_EQ(prefix_sum, group_offsets[inv_permutation[i]]);
+
+    EXPECT_EQ(0u, group_codes[i].BitsWritten() % kBitsPerByte);
+    prefix_sum += group_codes[i].BitsWritten() / kBitsPerByte;
+
+    if (i + 1 < num_entries) {
+      EXPECT_EQ(
+          group_offsets[inv_permutation[i]] + group_sizes[inv_permutation[i]],
+          group_offsets[inv_permutation[i + 1]]);
+    }
+  }
+  EXPECT_EQ(prefix_sum, total_size);
+}
+
+TEST(TocTest, Test) {
+  Rng rng(0);
+  for (size_t num_entries = 1; num_entries < 10; ++num_entries) {
+    for (bool permute : std::vector<bool>{false, true}) {
+      Roundtrip(num_entries, permute, &rng);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h b/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h
new file mode 100644
index 0000000000..9f4c10c76d
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h
@@ -0,0 +1,413 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Transfer functions for color encodings.
+
+#if defined(LIB_JXL_TRANSFER_FUNCTIONS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#undef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#else
+#define LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::TableLookupBytes;
+
+// Definitions for BT.2100-2 transfer functions (used inside/outside SIMD):
+// "display" is linear light (nits) normalized to [0, 1].
+// "encoded" is a nonlinear encoding (e.g. PQ) in [0, 1].
+// "scene" is a linear function of photon counts, normalized to [0, 1].
+
+// Despite the stated ranges, we need unbounded transfer functions: see
+// http://www.littlecms.com/CIC18_UnboundedCMM.pdf. Inputs can be negative or
+// above 1 due to chromatic adaptation. To avoid severe round-trip errors caused
+// by clamping, we mirror negative inputs via copysign (f(-x) = -f(x), see
+// https://developer.apple.com/documentation/coregraphics/cgcolorspace/1644735-extendedsrgb)
+// and extend the function domains above 1.
+
+// Hybrid Log-Gamma.
+class TF_HLG {
+ public:
+  // EOTF. e = encoded.
+  JXL_INLINE double DisplayFromEncoded(const double e) const {
+    return OOTF(InvOETF(e));
+  }
+
+  // Inverse EOTF. d = display.
+  JXL_INLINE double EncodedFromDisplay(const double d) const {
+    return OETF(InvOOTF(d));
+  }
+
+  // Maximum error 5e-7.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    const V below_div12 = Sqrt(Mul(Set(d, 3.0f), x));
+    const V e =
+        MulAdd(Set(d, kA * 0.693147181f),
+               FastLog2f(d, MulAdd(Set(d, 12), x, Set(d, -kB))), Set(d, kC));
+    const V magnitude = IfThenElse(Le(x, Set(d, kDiv12)), below_div12, e);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+ private:
+  // OETF (defines the HLG approach). s = scene, returns encoded.
+  JXL_INLINE double OETF(double s) const {
+    if (s == 0.0) return 0.0;
+    const double original_sign = s;
+    s = std::abs(s);
+
+    if (s <= kDiv12) return copysignf(std::sqrt(3.0 * s), original_sign);
+
+    const double e = kA * std::log(12 * s - kB) + kC;
+    JXL_ASSERT(e > 0.0);
+    return copysignf(e, original_sign);
+  }
+
+  // e = encoded, returns scene.
+  JXL_INLINE double InvOETF(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    if (e <= 0.5) return copysignf(e * e * (1.0 / 3), original_sign);
+
+    const double s = (std::exp((e - kC) * kRA) + kB) * kDiv12;
+    JXL_ASSERT(s >= 0);
+    return copysignf(s, original_sign);
+  }
+
+  // s = scene, returns display.
+  JXL_INLINE double OOTF(const double s) const {
+    // The actual (red channel) OOTF is RD = alpha * YS^(gamma-1) * RS, where
+    // YS = 0.2627 * RS + 0.6780 * GS + 0.0593 * BS. Let alpha = 1 so we return
+    // "display" (normalized [0, 1]) instead of nits. Our transfer function
+    // interface does not allow a dependency on YS. Fortunately, the system
+    // gamma at 334 nits is 1.0, so this reduces to RD = RS.
+    return s;
+  }
+
+  // d = display, returns scene.
+  JXL_INLINE double InvOOTF(const double d) const {
+    return d;  // see OOTF().
+  }
+
+  static constexpr double kA = 0.17883277;
+  static constexpr double kRA = 1.0 / kA;
+  static constexpr double kB = 1 - 4 * kA;
+  static constexpr double kC = 0.5599107295;
+  static constexpr double kDiv12 = 1.0 / 12;
+};
+
+class TF_709 {
+ public:
+  JXL_INLINE double EncodedFromDisplay(const double d) const {
+    if (d < kThresh) return kMulLow * d;
+    return kMulHi * std::pow(d, kPowHi) + kSub;
+  }
+
+  // Maximum error 1e-6.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    auto low = Mul(Set(d, kMulLow), x);
+    auto hi =
+        MulAdd(Set(d, kMulHi), FastPowf(d, x, Set(d, kPowHi)), Set(d, kSub));
+    return IfThenElse(Le(x, Set(d, kThresh)), low, hi);
+  }
+
+  template <class D, class V>
+  JXL_INLINE V DisplayFromEncoded(D d, V x) const {
+    auto low = Mul(Set(d, kInvMulLow), x);
+    auto hi = FastPowf(d, MulAdd(x, Set(d, kInvMulHi), Set(d, kInvAdd)),
+                       Set(d, kInvPowHi));
+    return IfThenElse(Lt(x, Set(d, kInvThresh)), low, hi);
+  }
+
+ private:
+  static constexpr double kThresh = 0.018;
+  static constexpr double kMulLow = 4.5;
+  static constexpr double kMulHi = 1.099;
+  static constexpr double kPowHi = 0.45;
+  static constexpr double kSub = -0.099;
+
+  static constexpr double kInvThresh = 0.081;
+  static constexpr double kInvMulLow = 1 / 4.5;
+  static constexpr double kInvMulHi = 1 / 1.099;
+  static constexpr double kInvPowHi = 1 / 0.45;
+  static constexpr double kInvAdd = 0.099 * kInvMulHi;
+};
+
+// Perceptual Quantization
+class TF_PQ {
+ public:
+  // EOTF (defines the PQ approach). e = encoded.
+  JXL_INLINE double DisplayFromEncoded(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    const double xp = std::pow(e, 1.0 / kM2);
+    const double num = std::max(xp - kC1, 0.0);
+    const double den = kC2 - kC3 * xp;
+    JXL_DASSERT(den != 0.0);
+    const double d = std::pow(num / den, 1.0 / kM1);
+    JXL_DASSERT(d >= 0.0);  // Equal for e ~= 1E-9
+    return copysignf(d, original_sign);
+  }
+
+  // Maximum error 3e-6
+  template <class D, class V>
+  JXL_INLINE V DisplayFromEncoded(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    // 4-over-4-degree rational polynomial approximation on x+x*x. This improves
+    // the maximum error by about 5x over a rational polynomial for x.
+    auto xpxx = MulAdd(x, x, x);
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        HWY_REP4(2.62975656e-04f), HWY_REP4(-6.23553089e-03f),
+        HWY_REP4(7.38602301e-01f), HWY_REP4(2.64553172e+00f),
+        HWY_REP4(5.50034862e-01f),
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        HWY_REP4(4.21350107e+02f), HWY_REP4(-4.28736818e+02f),
+        HWY_REP4(1.74364667e+02f), HWY_REP4(-3.39078883e+01f),
+        HWY_REP4(2.67718770e+00f),
+    };
+    auto magnitude = EvalRationalPolynomial(d, xpxx, p, q);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+  // Inverse EOTF. d = display.
+  JXL_INLINE double EncodedFromDisplay(double d) const {
+    if (d == 0.0) return 0.0;
+    const double original_sign = d;
+    d = std::abs(d);
+
+    const double xp = std::pow(d, kM1);
+    const double num = kC1 + xp * kC2;
+    const double den = 1.0 + xp * kC3;
+    const double e = std::pow(num / den, kM2);
+    JXL_DASSERT(e > 0.0);
+    return copysignf(e, original_sign);
+  }
+
+  // Maximum error 7e-7.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    // 4-over-4-degree rational polynomial approximation on x**0.25, with two
+    // different polynomials above and below 1e-4.
+    auto xto025 = Sqrt(Sqrt(x));
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        HWY_REP4(1.351392e-02f), HWY_REP4(-1.095778e+00f),
+        HWY_REP4(5.522776e+01f), HWY_REP4(1.492516e+02f),
+        HWY_REP4(4.838434e+01f),
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        HWY_REP4(1.012416e+00f), HWY_REP4(2.016708e+01f),
+        HWY_REP4(9.263710e+01f), HWY_REP4(1.120607e+02f),
+        HWY_REP4(2.590418e+01f),
+    };
+
+    HWY_ALIGN constexpr float plo[(4 + 1) * 4] = {
+        HWY_REP4(9.863406e-06f),  HWY_REP4(3.881234e-01f),
+        HWY_REP4(1.352821e+02f),  HWY_REP4(6.889862e+04f),
+        HWY_REP4(-2.864824e+05f),
+    };
+    HWY_ALIGN constexpr float qlo[(4 + 1) * 4] = {
+        HWY_REP4(3.371868e+01f),  HWY_REP4(1.477719e+03f),
+        HWY_REP4(1.608477e+04f),  HWY_REP4(-4.389884e+04f),
+        HWY_REP4(-2.072546e+05f),
+    };
+
+    auto magnitude = IfThenElse(Lt(x, Set(d, 1e-4f)),
+                                EvalRationalPolynomial(d, xto025, plo, qlo),
+                                EvalRationalPolynomial(d, xto025, p, q));
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+ private:
+  static constexpr double kM1 = 2610.0 / 16384;
+  static constexpr double kM2 = (2523.0 / 4096) * 128;
+  static constexpr double kC1 = 3424.0 / 4096;
+  static constexpr double kC2 = (2413.0 / 4096) * 32;
+  static constexpr double kC3 = (2392.0 / 4096) * 32;
+};
+
+// sRGB
+class TF_SRGB {
+ public:
+  template <typename V>
+  JXL_INLINE V DisplayFromEncoded(V x) const {
+    const HWY_FULL(float) d;
+    const HWY_FULL(uint32_t) du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+
+    // TODO(janwas): range reduction
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f,
+        1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f,
+        1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f,
+        7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f,
+        8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f,
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        2.631846970e-01f,  2.631846970e-01f,  2.631846970e-01f,
+        2.631846970e-01f,  1.076976492e+00f,  1.076976492e+00f,
+        1.076976492e+00f,  1.076976492e+00f,  4.987528350e-01f,
+        4.987528350e-01f,  4.987528350e-01f,  4.987528350e-01f,
+        -5.512498495e-02f, -5.512498495e-02f, -5.512498495e-02f,
+        -5.512498495e-02f, 6.521209011e-03f,  6.521209011e-03f,
+        6.521209011e-03f,  6.521209011e-03f,
+    };
+    const V linear = Mul(x, Set(d, kLowDivInv));
+    const V poly = EvalRationalPolynomial(d, x, p, q);
+    const V magnitude =
+        IfThenElse(Gt(x, Set(d, kThreshSRGBToLinear)), poly, linear);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+  // Error ~5e-07
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        -5.135152395e-04f, -5.135152395e-04f, -5.135152395e-04f,
+        -5.135152395e-04f, 5.287254571e-03f,  5.287254571e-03f,
+        5.287254571e-03f,  5.287254571e-03f,  3.903842876e-01f,
+        3.903842876e-01f,  3.903842876e-01f,  3.903842876e-01f,
+        1.474205315e+00f,  1.474205315e+00f,  1.474205315e+00f,
+        1.474205315e+00f,  7.352629620e-01f,  7.352629620e-01f,
+        7.352629620e-01f,  7.352629620e-01f,
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f,
+        3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f,
+        1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f,
+        9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f,
+        2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f,
+    };
+    const V linear = Mul(x, Set(d, kLowDiv));
+    const V poly = EvalRationalPolynomial(d, Sqrt(x), p, q);
+    const V magnitude =
+        IfThenElse(Gt(x, Set(d, kThreshLinearToSRGB)), poly, linear);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+ private:
+  static constexpr float kThreshSRGBToLinear = 0.04045f;
+  static constexpr float kThreshLinearToSRGB = 0.0031308f;
+  static constexpr float kLowDiv = 12.92f;
+  static constexpr float kLowDivInv = 1.0f / kLowDiv;
+};
+
+// Linear to sRGB conversion with error of at most 1.2e-4.
+template <typename D, typename V>
+V FastLinearToSRGB(D d, V v) {
+  const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+  const hwy::HWY_NAMESPACE::Rebind<int32_t, D> di;
+  // Convert to 0.25 - 0.5 range.
+  auto v025_05 = BitCast(
+      d, And(Or(BitCast(du, v), Set(du, 0x3e800000)), Set(du, 0x3effffff)));
+  // third degree polynomial approximation between 0.25 and 0.5
+  // of 1.055/2^(7/2.4) * x^(1/2.4) * 0.5. A degree 4 polynomial only improves
+  // accuracy by about 3x.
+  auto d1 = MulAdd(v025_05, Set(d, 0.059914046f), Set(d, -0.108894556f));
+  auto d2 = MulAdd(d1, v025_05, Set(d, 0.107963754f));
+  auto pow = MulAdd(d2, v025_05, Set(d, 0.018092343f));
+  // Compute extra multiplier depending on exponent. Valid exponent range for
+  // [0.0031308f, 1.0) is 0...8 after subtracting 118.
+  // The next three constants contain a representation of the powers of
+  // 2**(1/2.4) = 2**(5/12) times two; in particular, bits from 26 to 31 are
+  // always the same and in k2to512powers_basebits, and the two arrays contain
+  // the next groups of 8 bits. This ends up being a 22-bit representation (with
+  // a mantissa of 13 bits). The choice of polynomial to approximate is such
+  // that the multiplication factor has the highest 5 bits constant, and that
+  // the factor for the lowest possible exponent is a power of two (thus making
+  // the additional bits 0, which is used to correctly merge back together the
+  // floats).
+  constexpr uint32_t k2to512powers_basebits = 0x40000000;
+  HWY_ALIGN constexpr uint8_t k2to512powers_25to18bits[16] = {
+      0x0,  0xa,  0x19, 0x26, 0x32, 0x41, 0x4d, 0x5c,
+      0x68, 0x75, 0x83, 0x8f, 0xa0, 0xaa, 0xb9, 0xc6,
+  };
+  HWY_ALIGN constexpr uint8_t k2to512powers_17to10bits[16] = {
+      0x0,  0xb7, 0x4,  0xd,  0xcb, 0xe7, 0x41, 0x68,
+      0x51, 0xd1, 0xeb, 0xf2, 0x0,  0xb7, 0x4,  0xd,
+  };
+  // Note that vld1q_s8_x2 on ARM seems to actually be slower.
+#if HWY_TARGET != HWY_SCALAR
+  using hwy::HWY_NAMESPACE::ShiftLeft;
+  using hwy::HWY_NAMESPACE::ShiftRight;
+  // Every lane of exp is now (if cast to byte) {0, 0, 0, <index for lookup>}.
+  auto exp = Sub(ShiftRight<23>(BitCast(di, v)), Set(di, 118));
+  auto pow25to18bits = TableLookupBytes(
+      LoadDup128(di,
+                 reinterpret_cast<const int32_t*>(k2to512powers_25to18bits)),
+      exp);
+  auto pow17to10bits = TableLookupBytes(
+      LoadDup128(di,
+                 reinterpret_cast<const int32_t*>(k2to512powers_17to10bits)),
+      exp);
+  // Now, pow* contain {0, 0, 0, <part of float repr of multiplier>}. Here
+  // we take advantage of the fact that each table has its position 0 equal to
+  // 0.
+  // We can now just reassemble the float.
+  auto mul = BitCast(
+      d, Or(Or(ShiftLeft<18>(pow25to18bits), ShiftLeft<10>(pow17to10bits)),
+            Set(di, k2to512powers_basebits)));
+#else
+  // Fallback for scalar.
+  uint32_t exp = ((BitCast(di, v).raw >> 23) - 118) & 0xf;
+  auto mul = BitCast(d, Set(di, (k2to512powers_25to18bits[exp] << 18) |
+                                    (k2to512powers_17to10bits[exp] << 10) |
+                                    k2to512powers_basebits));
+#endif
+  return IfThenElse(Lt(v, Set(d, 0.0031308f)), Mul(v, Set(d, 12.92f)),
+                    MulAdd(pow, mul, Set(d, -0.055)));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h b/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h
new file mode 100644
index 0000000000..4674420737
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h
@@ -0,0 +1,203 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Block transpose for DCT/IDCT
+
+#if defined(LIB_JXL_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSPOSE_INL_H_
+#undef LIB_JXL_TRANSPOSE_INL_H_
+#else
+#define LIB_JXL_TRANSPOSE_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+#include <type_traits>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dct_block-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#ifndef JXL_INLINE_TRANSPOSE
+// Workaround for issue #42 - (excessive?) inlining causes invalid codegen.
+#if defined(__arm__)
+#define JXL_INLINE_TRANSPOSE HWY_NOINLINE
+#else
+#define JXL_INLINE_TRANSPOSE HWY_INLINE
+#endif
+#endif  // JXL_INLINE_TRANSPOSE
+
+// Simple wrapper that ensures that a function will not be inlined.
+template <typename T, typename... Args>
+JXL_NOINLINE void NoInlineWrapper(const T& f, const Args&... args) {
+  return f(args...);
+}
+
+template <bool enabled>
+struct TransposeSimdTag {};
+
+// TODO(veluca): it's not super useful to have this in the SIMD namespace.
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<false>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  for (size_t n = 0; n < ROWS; ++n) {
+    for (size_t m = 0; m < COLS; ++m) {
+      to.Write(from.Read(n, m), m, n);
+    }
+  }
+}
+
+// TODO(veluca): AVX3?
+#if HWY_CAP_GE256
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+  return ROWS % 8 == 0 && COLS % 8 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  static_assert(MaxLanes(BlockDesc<8>()) == 8, "Invalid descriptor size");
+  static_assert(ROWS_or_0 % 8 == 0, "Invalid number of rows");
+  static_assert(COLS_or_0 % 8 == 0, "Invalid number of columns");
+  for (size_t n = 0; n < ROWS; n += 8) {
+    for (size_t m = 0; m < COLS; m += 8) {
+      const BlockDesc<8> d;
+      auto i0 = from.LoadPart(d, n + 0, m + 0);
+      auto i1 = from.LoadPart(d, n + 1, m + 0);
+      auto i2 = from.LoadPart(d, n + 2, m + 0);
+      auto i3 = from.LoadPart(d, n + 3, m + 0);
+      auto i4 = from.LoadPart(d, n + 4, m + 0);
+      auto i5 = from.LoadPart(d, n + 5, m + 0);
+      auto i6 = from.LoadPart(d, n + 6, m + 0);
+      auto i7 = from.LoadPart(d, n + 7, m + 0);
+      // Surprisingly, this straightforward implementation (24 cycles on port5)
+      // is faster than load128+insert and LoadDup128+ConcatUpperLower+blend.
+      const auto q0 = InterleaveLower(d, i0, i2);
+      const auto q1 = InterleaveLower(d, i1, i3);
+      const auto q2 = InterleaveUpper(d, i0, i2);
+      const auto q3 = InterleaveUpper(d, i1, i3);
+      const auto q4 = InterleaveLower(d, i4, i6);
+      const auto q5 = InterleaveLower(d, i5, i7);
+      const auto q6 = InterleaveUpper(d, i4, i6);
+      const auto q7 = InterleaveUpper(d, i5, i7);
+
+      const auto r0 = InterleaveLower(d, q0, q1);
+      const auto r1 = InterleaveUpper(d, q0, q1);
+      const auto r2 = InterleaveLower(d, q2, q3);
+      const auto r3 = InterleaveUpper(d, q2, q3);
+      const auto r4 = InterleaveLower(d, q4, q5);
+      const auto r5 = InterleaveUpper(d, q4, q5);
+      const auto r6 = InterleaveLower(d, q6, q7);
+      const auto r7 = InterleaveUpper(d, q6, q7);
+
+      i0 = ConcatLowerLower(d, r4, r0);
+      i1 = ConcatLowerLower(d, r5, r1);
+      i2 = ConcatLowerLower(d, r6, r2);
+      i3 = ConcatLowerLower(d, r7, r3);
+      i4 = ConcatUpperUpper(d, r4, r0);
+      i5 = ConcatUpperUpper(d, r5, r1);
+      i6 = ConcatUpperUpper(d, r6, r2);
+      i7 = ConcatUpperUpper(d, r7, r3);
+      to.StorePart(d, i0, m + 0, n + 0);
+      to.StorePart(d, i1, m + 1, n + 0);
+      to.StorePart(d, i2, m + 2, n + 0);
+      to.StorePart(d, i3, m + 3, n + 0);
+      to.StorePart(d, i4, m + 4, n + 0);
+      to.StorePart(d, i5, m + 5, n + 0);
+      to.StorePart(d, i6, m + 6, n + 0);
+      to.StorePart(d, i7, m + 7, n + 0);
+    }
+  }
+}
+#elif HWY_TARGET != HWY_SCALAR
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+  return ROWS % 4 == 0 && COLS % 4 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  static_assert(MaxLanes(BlockDesc<4>()) == 4, "Invalid descriptor size");
+  static_assert(ROWS_or_0 % 4 == 0, "Invalid number of rows");
+  static_assert(COLS_or_0 % 4 == 0, "Invalid number of columns");
+  for (size_t n = 0; n < ROWS; n += 4) {
+    for (size_t m = 0; m < COLS; m += 4) {
+      const BlockDesc<4> d;
+      const auto p0 = from.LoadPart(d, n + 0, m + 0);
+      const auto p1 = from.LoadPart(d, n + 1, m + 0);
+      const auto p2 = from.LoadPart(d, n + 2, m + 0);
+      const auto p3 = from.LoadPart(d, n + 3, m + 0);
+
+      const auto q0 = InterleaveLower(d, p0, p2);
+      const auto q1 = InterleaveLower(d, p1, p3);
+      const auto q2 = InterleaveUpper(d, p0, p2);
+      const auto q3 = InterleaveUpper(d, p1, p3);
+
+      const auto r0 = InterleaveLower(d, q0, q1);
+      const auto r1 = InterleaveUpper(d, q0, q1);
+      const auto r2 = InterleaveLower(d, q2, q3);
+      const auto r3 = InterleaveUpper(d, q2, q3);
+
+      to.StorePart(d, r0, m + 0, n + 0);
+      to.StorePart(d, r1, m + 1, n + 0);
+      to.StorePart(d, r2, m + 2, n + 0);
+      to.StorePart(d, r3, m + 3, n + 0);
+    }
+  }
+}
+#else
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { return false; }
+#endif
+
+template <size_t N, size_t M, typename = void>
+struct Transpose {
+  template <typename From, typename To>
+  static void Run(const From& from, const To& to) {
+    // This does not guarantee anything, just saves from the most stupid
+    // mistakes.
+    JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+    TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+    GenericTransposeBlock<N, M>(tag, from, to, N, M);
+  }
+};
+
+// Avoid inlining and unrolling transposes for large blocks.
+template <size_t N, size_t M>
+struct Transpose<
+    N, M, typename std::enable_if<(N >= 8 && M >= 8 && N * M >= 512)>::type> {
+  template <typename From, typename To>
+  static void Run(const From& from, const To& to) {
+    // This does not guarantee anything, just saves from the most stupid
+    // mistakes.
+    JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+    TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+    constexpr void (*transpose)(TransposeSimdTag<TransposeUseSimd(N, M)>,
+                                const From&, const To&, size_t, size_t) =
+        GenericTransposeBlock<0, 0, From, To>;
+    NoInlineWrapper(transpose, tag, from, to, N, M);
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_TRANSPOSE_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/version.h.in b/third-party/libjxl/libjxl/lib/jxl/version.h.in
new file mode 100644
index 0000000000..d077abec79
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/version.h.in
@@ -0,0 +1,39 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file version.h
+ * @brief libjxl version information
+ */
+
+#ifndef JXL_VERSION_H_
+#define JXL_VERSION_H_
+
+#define JPEGXL_MAJOR_VERSION @JPEGXL_MAJOR_VERSION@ ///< JPEG XL Major version
+#define JPEGXL_MINOR_VERSION @JPEGXL_MINOR_VERSION@ ///< JPEG XL Minor version
+#define JPEGXL_PATCH_VERSION @JPEGXL_PATCH_VERSION@ ///< JPEG XL Patch version
+
+/** Can be used to conditionally compile code for a specific JXL version
+ * @param[maj] major version
+ * @param[min] minor version
+ *
+ * @code
+ * #if JPEGXL_NUMERIC_VERSION < JPEGXL_COMPUTE_NUMERIC_VERSION(0,8,0)
+ * // use old/deprecated api
+ * #else
+ * // use current api
+ * #endif
+ * @endcode
+ */
+#define JPEGXL_COMPUTE_NUMERIC_VERSION(major,minor,patch) ((major<<24) | (minor<<16) | (patch<<8) | 0)
+
+/* Numeric representation of the version */
+#define JPEGXL_NUMERIC_VERSION JPEGXL_COMPUTE_NUMERIC_VERSION(JPEGXL_MAJOR_VERSION,JPEGXL_MINOR_VERSION,JPEGXL_PATCH_VERSION)
+
+#endif /* JXL_VERSION_H_ */
+
+/** @}*/
diff --git a/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h
new file mode 100644
index 0000000000..a473d591f2
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast but weak random generator.
+
+#if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#undef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#else
+#define LIB_JXL_XORSHIFT128PLUS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Xor;
+
+// Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/
+// (MIT-license)
+class Xorshift128Plus {
+ public:
+  // 8 independent generators (= single iteration for AVX-512)
+  enum { N = 8 };
+
+  explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) {
+    // Init state using SplitMix64 generator
+    s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+    s1_[0] = SplitMix64(s0_[0]);
+    for (size_t i = 1; i < N; ++i) {
+      s0_[i] = SplitMix64(s1_[i - 1]);
+      s1_[i] = SplitMix64(s0_[i]);
+    }
+  }
+
+  HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2,
+                                   const uint32_t seed3, const uint32_t seed4) {
+    // Init state using SplitMix64 generator
+    s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) +
+                        0x9E3779B97F4A7C15ull);
+    s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) +
+                        0x9E3779B97F4A7C15ull);
+    for (size_t i = 1; i < N; ++i) {
+      s0_[i] = SplitMix64(s0_[i - 1]);
+      s1_[i] = SplitMix64(s1_[i - 1]);
+    }
+  }
+
+  HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) {
+#if HWY_CAP_INTEGER64
+    const HWY_FULL(uint64_t) d;
+    for (size_t i = 0; i < N; i += Lanes(d)) {
+      auto s1 = Load(d, s0_ + i);
+      const auto s0 = Load(d, s1_ + i);
+      const auto bits = Add(s1, s0);  // b, c
+      Store(s0, d, s0_ + i);
+      s1 = Xor(s1, ShiftLeft<23>(s1));
+      Store(bits, d, random_bits + i);
+      s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0))));
+      Store(s1, d, s1_ + i);
+    }
+#else
+    for (size_t i = 0; i < N; ++i) {
+      auto s1 = s0_[i];
+      const auto s0 = s1_[i];
+      const auto bits = s1 + s0;  // b, c
+      s0_[i] = s0;
+      s1 ^= s1 << 23;
+      random_bits[i] = bits;
+      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+      s1_[i] = s1;
+    }
+#endif
+  }
+
+ private:
+  static uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+  HWY_ALIGN uint64_t s0_[N];
+  HWY_ALIGN uint64_t s1_[N];
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_XORSHIFT128PLUS_INL_H_
diff --git a/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc
new file mode 100644
index 0000000000..2b0c78b1d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc
@@ -0,0 +1,378 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/xorshift128plus_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Define to nonzero in order to print the (new) golden outputs.
+#define PRINT_RESULTS 0
+
+const size_t kVectors = 64;
+
+#if PRINT_RESULTS
+
+template <int kNumLanes>
+void Print(const uint64_t (&result)[kNumLanes]) {
+  printf("{ ");
+  for (int i = 0; i < kNumLanes; ++i) {
+    if (i != 0) {
+      printf(", ");
+    }
+    printf("0x%016llXull", result[i]);
+  }
+  printf("},\n");
+}
+
+#else  // PRINT_RESULTS
+
+const uint64_t kExpected[kVectors][Xorshift128Plus::N] = {
+    {0x6E901576D477CBB1ull, 0xE9E53789195DA2A2ull, 0xB681F6DDA5E0AE99ull,
+     0x8EFD18CE21FD6896ull, 0xA898A80DF75CF532ull, 0x50CEB2C9E2DE7E32ull,
+     0x3CA7C2FEB25C0DD0ull, 0xA4D0866B80B4D836ull},
+    {0x8CD6A1E6233D3A26ull, 0x3D4603ADE98B112Dull, 0xDC427AF674019E36ull,
+     0xE28B4D230705AC53ull, 0x7297E9BBA88783DDull, 0x34D3D23CFCD9B41Aull,
+     0x5A223615ADBE96B8ull, 0xE5EB529027CFBD01ull},
+    {0xC1894CF00DFAC6A2ull, 0x18EDF8AE9085E404ull, 0x8E936625296B4CCDull,
+     0x31971EF3A14A899Bull, 0xBE87535FCE0BF26Aull, 0x576F7A752BC6649Full,
+     0xA44CBADCE0C6B937ull, 0x3DBA819BB17A353Aull},
+    {0x27CE38DFCC1C5EB6ull, 0x920BEB5606340256ull, 0x3986CBC40C9AFC2Cull,
+     0xE22BCB3EEB1E191Eull, 0x6E1FCDD3602A8FBAull, 0x052CB044E5415A29ull,
+     0x46266646EFB9ECD7ull, 0x8F44914618D29335ull},
+    {0xDD30AEDF72A362C5ull, 0xBC1D824E16BB98F4ull, 0x9EA6009C2AA3D2F1ull,
+     0xF65C0FBBE17AF081ull, 0x22424D06A8738991ull, 0x8A62763F2B7611D2ull,
+     0x2F3E89F722637939ull, 0x84D338BEF50AFD50ull},
+    {0x00F46494898E2B0Bull, 0x81239DC4FB8E8003ull, 0x414AD93EC5773FE7ull,
+     0x791473C450E4110Full, 0x87F127BF68C959ACull, 0x6429282D695EF67Bull,
+     0x661082E11546CBA8ull, 0x5815D53FA5436BFDull},
+    {0xB3DEADAB9BE6E0F9ull, 0xAA1B7B8F7CED0202ull, 0x4C5ED437699D279Eull,
+     0xA4471727F1CB39D3ull, 0xE439DA193F802F70ull, 0xF89401BB04FA6493ull,
+     0x3B08045A4FE898BAull, 0x32137BFE98227950ull},
+    {0xFBAE4A092897FEF3ull, 0x0639F6CE56E71C8Eull, 0xF0AD6465C07F0C1Eull,
+     0xFF8E28563361DCE5ull, 0xC2013DB7F86BC6B9ull, 0x8EFCC0503330102Full,
+     0x3F6B767EA5C4DA40ull, 0xB9864B950B2232E1ull},
+    {0x76EB58DE8E5EC22Aull, 0x9BBBF49A18B32F4Full, 0xC8405F02B2B2FAB9ull,
+     0xC3E122A5F146BC34ull, 0xC90BB046660F5765ull, 0xB933981310DBECCFull,
+     0x5A2A7BFC9126FD1Cull, 0x8BB388C94DF87901ull},
+    {0x753EB89AD63EF3C3ull, 0xF24AAF40C89D65ADull, 0x23F68931C1A6AA6Dull,
+     0xF47E79BF702C6DD0ull, 0xA3AD113244EE7EAEull, 0xD42CBEA28F793DC3ull,
+     0xD896FCF1820F497Cull, 0x042B86D2818948C1ull},
+    {0x8F2A4FC5A4265763ull, 0xEC499E6F95EAA10Cull, 0xE3786D4ECCD0DEB5ull,
+     0xC725C53D3AC4CC43ull, 0x065A4ACBBF83610Eull, 0x35C61C9FEF167129ull,
+     0x7B720AEAA7D70048ull, 0x14206B841377D039ull},
+    {0xAD27D78BF96055F6ull, 0x5F43B20FF47ADCD4ull, 0xE184C2401E2BF71Eull,
+     0x30B263D78990045Dull, 0xC22F00EBFF9BA201ull, 0xAE7F86522B53A562ull,
+     0x2853312BC039F0A4ull, 0x868D619E6549C3C8ull},
+    {0xFD5493D8AE9A8371ull, 0x773D5E224DF61B3Bull, 0x5377C54FBB1A8280ull,
+     0xCAD4DE3B8265CAFAull, 0xCDF3F19C91EBD5F6ull, 0xC8EA0F182D73BD78ull,
+     0x220502D593433FF1ull, 0xB81205E612DC31B1ull},
+    {0x8F32A39EAEDA4C70ull, 0x1D4B0914AA4DAC7Full, 0x56EF1570F3A8B405ull,
+     0x29812CB17404A592ull, 0x97A2AAF69CAE90F2ull, 0x12BF5E02778BBFE5ull,
+     0x9D4B55AD42A05FD2ull, 0x06C2BAB5E6086620ull},
+    {0x8DB4B9648302B253ull, 0xD756AD9E3AEA12C7ull, 0x68709B7F11D4B188ull,
+     0x7CC299DDCD707A4Bull, 0x97B860C370A7661Dull, 0xCECD314FC20E64F5ull,
+     0x55F412CDFB4C7EC3ull, 0x55EE97591193B525ull},
+    {0xCF70F3ACA96E6254ull, 0x022FEDECA2E09F46ull, 0x686823DB60AE1ECFull,
+     0xFD36190D3739830Eull, 0x74E1C09027F68120ull, 0xB5883A835C093842ull,
+     0x93E1EFB927E9E4E3ull, 0xB2721E249D7E5EBEull},
+    {0x69B6E21C44188CB8ull, 0x5D6CFB853655A7AAull, 0x3E001A0B425A66DCull,
+     0x8C57451103A5138Full, 0x7BF8B4BE18EAB402ull, 0x494102EB8761A365ull,
+     0xB33796A9F6A81F0Eull, 0x10005AB3BCCFD960ull},
+    {0xB2CF25740AE965DCull, 0x6F7C1DF7EF53D670ull, 0x648DD6087AC2251Eull,
+     0x040955D9851D487Dull, 0xBD550FC7E21A7F66ull, 0x57408F484DEB3AB5ull,
+     0x481E24C150B506C1ull, 0x72C0C3EAF91A40D6ull},
+    {0x1997A481858A5D39ull, 0x539718F4BEF50DC1ull, 0x2EC4DC4787E7E368ull,
+     0xFF1CE78879419845ull, 0xE219A93DD6F6DD30ull, 0x85328618D02FEC1Aull,
+     0xC86E02D969181B20ull, 0xEBEC8CD8BBA34E6Eull},
+    {0x28B55088A16CE947ull, 0xDD25AC11E6350195ull, 0xBD1F176694257B1Cull,
+     0x09459CCF9FCC9402ull, 0xF8047341E386C4E4ull, 0x7E8E9A9AD984C6C0ull,
+     0xA4661E95062AA092ull, 0x70A9947005ED1152ull},
+    {0x4C01CF75DBE98CCDull, 0x0BA076CDFC7373B9ull, 0x6C5E7A004B57FB59ull,
+     0x336B82297FD3BC56ull, 0x7990C0BE74E8D60Full, 0xF0275CC00EC5C8C8ull,
+     0x6CF29E682DFAD2E9ull, 0xFA4361524BD95D72ull},
+    {0x631D2A19FF62F018ull, 0x41C43863B985B3FAull, 0xE052B2267038EFD9ull,
+     0xE2A535FAC575F430ull, 0xE004EEA90B1FF5B8ull, 0x42DFE2CA692A1F26ull,
+     0x90FB0BFC9A189ECCull, 0x4484102BD3536BD0ull},
+    {0xD027134E9ACCA5A5ull, 0xBBAB4F966D476A9Bull, 0x713794A96E03D693ull,
+     0x9F6335E6B94CD44Aull, 0xC5090C80E7471617ull, 0x6D9C1B0C87B58E33ull,
+     0x1969CE82E31185A5ull, 0x2099B97E87754EBEull},
+    {0x60EBAF4ED934350Full, 0xC26FBF0BA5E6ECFFull, 0x9E54150F0312EC57ull,
+     0x0973B48364ED0041ull, 0x800A523241426CFCull, 0x03AB5EC055F75989ull,
+     0x8CF315935DEEB40Aull, 0x83D3FC0190BD1409ull},
+    {0x26D35394CF720A51ull, 0xCE9EAA15243CBAFEull, 0xE2B45FBAF21B29E0ull,
+     0xDB92E98EDE73F9E0ull, 0x79B16F5101C26387ull, 0x1AC15959DE88C86Full,
+     0x387633AEC6D6A580ull, 0xA6FC05807BFC5EB8ull},
+    {0x2D26C8E47C6BADA9ull, 0x820E6EC832D52D73ull, 0xB8432C3E0ED0EE5Bull,
+     0x0F84B3C4063AAA87ull, 0xF393E4366854F651ull, 0x749E1B4D2366A567ull,
+     0x805EACA43480D004ull, 0x244EBF3AA54400A5ull},
+    {0xBFDC3763AA79F75Aull, 0x9E3A74CC751F41DBull, 0xF401302A149DBC55ull,
+     0x6B25F7973D7BF7BCull, 0x13371D34FDBC3DAEull, 0xC5E1998C8F484DCDull,
+     0x7031B8AE5C364464ull, 0x3847F0C4F3DA2C25ull},
+    {0x24C6387D2C0F1225ull, 0x77CCE960255C67A4ull, 0x21A0947E497B10EBull,
+     0xBB5DB73A825A9D7Eull, 0x26294A41999E553Dull, 0x3953E0089F87D925ull,
+     0x3DAE6E5D4E5EAAFEull, 0x74B545460341A7AAull},
+    {0x710E5EB08A7DB820ull, 0x7E43C4E77CAEA025ull, 0xD4C91529C8B060C1ull,
+     0x09AE26D8A7B0CA29ull, 0xAB9F356BB360A772ull, 0xB68834A25F19F6E9ull,
+     0x79B8D9894C5734E2ull, 0xC6847E7C8FFD265Full},
+    {0x10C4BCB06A5111E6ull, 0x57CB50955B6A2516ull, 0xEF53C87798B6995Full,
+     0xAB38E15BBD8D0197ull, 0xA51C6106EFF73C93ull, 0x83D7F0E2270A7134ull,
+     0x0923FD330397FCE5ull, 0xF9DE54EDFE58FB45ull},
+    {0x07D44833ACCD1A94ull, 0xAAD3C9E945E2F9F3ull, 0xABF4C879B876AA37ull,
+     0xF29C69A21B301619ull, 0x2DDCE959111C788Bull, 0x7CEDB48F8AC1729Bull,
+     0x93F3BA9A02B659BEull, 0xF20A87FF17933CBEull},
+    {0x8E96EBE93180CFE6ull, 0x94CAA12873937079ull, 0x05F613D9380D4189ull,
+     0xBCAB40C1DC79F38Aull, 0x0AD8907B7C61D19Eull, 0x88534E189D103910ull,
+     0x2DB2FAABA160AB8Full, 0xA070E7506B06F15Cull},
+    {0x6FB1FCDAFFEF87A9ull, 0xE735CF25337A090Dull, 0x172C6EDCEFEF1825ull,
+     0x76957EA49EF0542Dull, 0x819BF4CD250F7C49ull, 0xD6FF23E4AD00C4D4ull,
+     0xE79673C1EC358FF0ull, 0xAC9C048144337938ull},
+    {0x4C5387FF258B3AF4ull, 0xEDB68FAEC2CB1AA3ull, 0x02A624E67B4E1DA4ull,
+     0x5C44797A38E08AF2ull, 0x36546A70E9411B4Bull, 0x47C17B24D2FD9675ull,
+     0x101957AAA020CA26ull, 0x47A1619D4779F122ull},
+    {0xF84B8BCDC92D9A3Cull, 0x951D7D2C74B3066Bull, 0x7AC287C06EDDD9B2ull,
+     0x4C38FC476608D38Full, 0x224D793B19CB4BCDull, 0x835A255899BF1A41ull,
+     0x4AD250E9F62DB4ABull, 0xD9B44F4B58781096ull},
+    {0xABBAF99A8EB5C6B8ull, 0xFB568E900D3A9F56ull, 0x11EDF63D23C5DF11ull,
+     0xA9C3011D3FA7C5A8ull, 0xAEDD3CF11AFFF725ull, 0xABCA472B5F1EDD6Bull,
+     0x0600B6BB5D879804ull, 0xDB4DE007F22191A0ull},
+    {0xD76CC9EFF0CE9392ull, 0xF5E0A772B59BA49Aull, 0x7D1AE1ED0C1261B5ull,
+     0x79224A33B5EA4F4Aull, 0x6DD825D80C40EA60ull, 0x47FC8E747E51C953ull,
+     0x695C05F72888BF98ull, 0x1A012428440B9015ull},
+    {0xD754DD61F9B772BFull, 0xC4A2FCF4C0F9D4EBull, 0x461167CDF67A24A2ull,
+     0x434748490EBCB9D4ull, 0x274DD9CDCA5781DEull, 0x36BAC63BA9A85209ull,
+     0x30324DAFDA36B70Full, 0x337570DB4FE6DAB3ull},
+    {0xF46CBDD57C551546ull, 0x8E02507E676DA3E3ull, 0xD826245A8C15406Dull,
+     0xDFB38A5B71113B72ull, 0x5EA38454C95B16B5ull, 0x28C054FB87ABF3E1ull,
+     0xAA2724C0BA1A8096ull, 0xECA83EC980304F2Full},
+    {0x6AA76EC294EB3303ull, 0x42D4CDB2A8032E3Bull, 0x7999EDF75DCD8735ull,
+     0xB422BFFE696CCDCCull, 0x8F721461FD7CCDFEull, 0x148E1A5814FDE253ull,
+     0x4DC941F4375EF8FFull, 0x27B2A9E0EB5B49CFull},
+    {0xCEA592EF9343EBE1ull, 0xF7D38B5FA7698903ull, 0x6CCBF352203FEAB6ull,
+     0x830F3095FCCDA9C5ull, 0xDBEEF4B81B81C8F4ull, 0x6D7EB9BCEECA5CF9ull,
+     0xC58ABB0FBE436C69ull, 0xE4B97E6DB2041A4Bull},
+    {0x7E40FC772978AF14ull, 0xCDDA4BBAE28354A1ull, 0xE4F993B832C32613ull,
+     0xD3608093C68A4B35ull, 0x9A3B60E01BEE3699ull, 0x03BEF248F3288713ull,
+     0x70B9294318F3E9B4ull, 0x8D2ABB913B8610DEull},
+    {0x37F209128E7D8B2Cull, 0x81D2AB375BD874BCull, 0xA716A1B7373F7408ull,
+     0x0CEE97BEC4706540ull, 0xA40C5FD9CDBC1512ull, 0x73CAF6C8918409E7ull,
+     0x45E11BCEDF0BBAA1ull, 0x612C612BFF6E6605ull},
+    {0xF8ECB14A12D0F649ull, 0xDA683CD7C01BA1ACull, 0xA2203F7510E124C1ull,
+     0x7F83E52E162F3C78ull, 0x77D2BB73456ACADBull, 0x37FC34FC840BBA6Full,
+     0x3076BC7D4C6EBC1Full, 0x4F514123632B5FA9ull},
+    {0x44D789DED935E884ull, 0xF8291591E09FEC9Full, 0xD9CED2CF32A2E4B7ull,
+     0x95F70E1EB604904Aull, 0xDE438FE43C14F6ABull, 0x4C8D23E4FAFCF8D8ull,
+     0xC716910A3067EB86ull, 0x3D6B7915315095D3ull},
+    {0x3170FDBADAB92095ull, 0x8F1963933FC5650Bull, 0x72F94F00ABECFEABull,
+     0x6E3AE826C6AAB4CEull, 0xA677A2BF31068258ull, 0x9660CDC4F363AF10ull,
+     0xD81A15A152379EF1ull, 0x5D7D285E1080A3F9ull},
+    {0xDAD5DDFF9A2249B3ull, 0x6F9721D926103FAEull, 0x1418CBB83FFA349Aull,
+     0xE71A30AD48C012B2ull, 0xBE76376C63751132ull, 0x3496467ACA713AE6ull,
+     0x8D7EC01369F991A3ull, 0xD8C73A88B96B154Eull},
+    {0x8B5D9C74AEB4833Aull, 0xF914FB3F867B912Full, 0xB894EA034936B1DCull,
+     0x8A16D21BE51C4F5Bull, 0x31FF048ED582D98Eull, 0xB95AB2F4DC65B820ull,
+     0x04082B9170561AF7ull, 0xA215610A5DC836FAull},
+    {0xB2ADE592C092FAACull, 0x7A1E683BCBF13294ull, 0xC7A4DBF86858C096ull,
+     0x3A49940F97BFF316ull, 0xCAE5C06B82C46703ull, 0xC7F413A0F951E2BDull,
+     0x6665E7BB10EB5916ull, 0x86F84A5A94EDE319ull},
+    {0x4EA199D8FAA79CA3ull, 0xDFA26E5BF1981704ull, 0x0F5E081D37FA4E01ull,
+     0x9CB632F89CD675CDull, 0x4A09DB89D48C0304ull, 0x88142742EA3C7672ull,
+     0xAC4F149E6D2E9BDBull, 0x6D9E1C23F8B1C6C6ull},
+    {0xD58BE47B92DEC0E9ull, 0x8E57573645E34328ull, 0x4CC094CCB5FB5126ull,
+     0x5F1D66AF6FB40E3Cull, 0x2BA15509132D3B00ull, 0x0D6545646120E567ull,
+     0x3CF680C45C223666ull, 0x96B28E32930179DAull},
+    {0x5900C45853AC7990ull, 0x61881E3E8B7FF169ull, 0x4DE5F835DF2230FFull,
+     0x4427A9E7932F73FFull, 0x9B641BAD379A8C8Dull, 0xDF271E5BF98F4E5Cull,
+     0xDFDA16DB830FF5EEull, 0x371C7E7CFB89C0E9ull},
+    {0x4410A8576247A250ull, 0x6AD2DA12B45AC0D9ull, 0x18DFC72AAC85EECCull,
+     0x06FC8BB2A0EF25C8ull, 0xEB287619C85E6118ull, 0x19553ECA67F25A2Cull,
+     0x3B9557F1DCEC5BAAull, 0x7BAD9E8B710D1079ull},
+    {0x34F365D66BD22B28ull, 0xE6E124B9F10F835Dull, 0x0573C38ABF2B24DCull,
+     0xD32E6AF10A0125AEull, 0x383590ACEA979519ull, 0x8376ED7A39E28205ull,
+     0xF0B7F184DCBDA435ull, 0x062A203390E31794ull},
+    {0xA2AFFD7E41918760ull, 0x7F90FC1BD0819C86ull, 0x5033C08E5A969533ull,
+     0x2707AF5C6D039590ull, 0x57BBD5980F17DF9Cull, 0xD3FE6E61D763268Aull,
+     0x9E0A0AE40F335A3Bull, 0x43CF4EB0A99613C5ull},
+    {0xD4D2A397CE1A7C2Eull, 0x3DF7CE7CC3212DADull, 0x0880F0D5D356C75Aull,
+     0xA8AFC44DD03B1346ull, 0x79263B46C13A29E0ull, 0x11071B3C0ED58E7Aull,
+     0xED46DC9F538406BFull, 0x2C94974F2B94843Dull},
+    {0xE246E13C39AB5D5Eull, 0xAC1018489D955B20ull, 0x8601B558771852B8ull,
+     0x110BD4C06DB40173ull, 0x738FC8A18CCA0EBBull, 0x6673E09BE0EA76E5ull,
+     0x024BC7A0C7527877ull, 0x45E6B4652E2EC34Eull},
+    {0xD1ED26A1A375CDC8ull, 0xAABC4E896A617CB8ull, 0x0A9C9E8E57D753C6ull,
+     0xA3774A75FEB4C30Eull, 0x30B816C01C93E49Eull, 0xF405BABC06D2408Cull,
+     0xCC0CE6B4CE788ABCull, 0x75E7922D0447956Cull},
+    {0xD07C1676A698BC95ull, 0x5F9AEA4840E2D860ull, 0xD5FC10D58BDF6F02ull,
+     0xF190A2AD4BC2EEA7ull, 0x0C24D11F51726931ull, 0xDB646899A16B6512ull,
+     0x7BC10670047B1DD8ull, 0x2413A5ABCD45F092ull},
+    {0x4E66892190CFD923ull, 0xF10162440365EC8Eull, 0x158ACA5A6A2280AEull,
+     0x0D60ED11C0224166ull, 0x7CD2E9A71B9D7488ull, 0x450D7289706AB2A3ull,
+     0x88FAE34EC9A0D7DCull, 0x96FF9103575A97DAull},
+    {0x77990FAC6046C446ull, 0xB174B5FB30C76676ull, 0xE352CE3EB56CF82Aull,
+     0xC6039B6873A9A082ull, 0xE3F80F3AE333148Aull, 0xB853BA24BA3539B9ull,
+     0xE8863E52ECCB0C74ull, 0x309B4CC1092CC245ull},
+    {0xBC2B70BEE8388D9Full, 0xE48D92AE22216DCEull, 0xF15F3BF3E2C15D8Full,
+     0x1DD964D4812D8B24ull, 0xD56AF02FB4665E4Cull, 0x98002200595BD9A3ull,
+     0x049246D50BB8FA12ull, 0x1B542DF485B579B9ull},
+    {0x2347409ADFA8E497ull, 0x36015C2211D62498ull, 0xE9F141F32EB82690ull,
+     0x1F839912D0449FB9ull, 0x4E4DCFFF2D02D97Cull, 0xF8A03AB4C0F625C9ull,
+     0x0605F575795DAC5Cull, 0x4746C9BEA0DDA6B1ull},
+    {0xCA5BB519ECE7481Bull, 0xFD496155E55CA945ull, 0xF753B9DBB1515F81ull,
+     0x50549E8BAC0F70E7ull, 0x8614FB0271E21C60ull, 0x60C72947EB0F0070ull,
+     0xA6511C10AEE742B6ull, 0x48FB48F2CACCB43Eull}};
+
+#endif  // PRINT_RESULTS
+
+// Ensures Xorshift128+ returns consistent and unchanging values.
+void TestGolden() {
+  HWY_ALIGN Xorshift128Plus rng(12345);
+  for (uint64_t vector = 0; vector < kVectors; ++vector) {
+    HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+    rng.Fill(lanes);
+#if PRINT_RESULTS
+    Print(lanes);
+#else
+    for (size_t i = 0; i < Xorshift128Plus::N; ++i) {
+      ASSERT_EQ(kExpected[vector][i], lanes[i])
+          << "Where vector=" << vector << " i=" << i;
+    }
+#endif
+  }
+}
+
+// Output changes when given different seeds
+void TestSeedChanges() {
+  HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+  std::vector<uint64_t> first;
+  constexpr size_t kNumSeeds = 16384;
+  first.reserve(kNumSeeds);
+
+  // All 14-bit seeds
+  for (size_t seed = 0; seed < kNumSeeds; ++seed) {
+    HWY_ALIGN Xorshift128Plus rng(seed);
+
+    rng.Fill(lanes);
+    first.push_back(lanes[0]);
+  }
+
+  // All outputs are unique
+  ASSERT_EQ(kNumSeeds, first.size());
+  std::sort(first.begin(), first.end());
+  first.erase(std::unique(first.begin(), first.end()), first.end());
+  EXPECT_EQ(kNumSeeds, first.size());
+}
+
+void TestFloat() {
+  test::ThreadPoolForTests pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 256;
+#else   // JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 4096;
+#endif  // JXL_DISABLE_SLOW_TESTS
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, kMaxSeed, ThreadPool::NoInit,
+      [](const uint32_t seed, size_t /*thread*/) {
+        HWY_ALIGN Xorshift128Plus rng(seed);
+
+        const HWY_FULL(uint32_t) du;
+        const HWY_FULL(float) df;
+        HWY_ALIGN uint64_t batch[Xorshift128Plus::N];
+        HWY_ALIGN float lanes[MaxLanes(df)];
+        double sum = 0.0;
+        size_t count = 0;
+        const size_t kReps = 2000;
+        for (size_t reps = 0; reps < kReps; ++reps) {
+          rng.Fill(batch);
+          for (size_t i = 0; i < Xorshift128Plus::N * 2; i += Lanes(df)) {
+            const auto bits =
+                Load(du, reinterpret_cast<const uint32_t*>(batch) + i);
+            // 1.0 + 23 random mantissa bits = [1, 2)
+            const auto rand12 =
+                BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000)));
+            const auto rand01 = Sub(rand12, Set(df, 1.0f));
+            Store(rand01, df, lanes);
+            for (float lane : lanes) {
+              sum += lane;
+              count += 1;
+              EXPECT_LE(lane, 1.0f);
+              EXPECT_GE(lane, 0.0f);
+            }
+          }
+        }
+
+        // Verify average (uniform distribution)
+        EXPECT_NEAR(0.5, sum / count, 0.00702);
+      },
+      "TestXorShift"));
+}
+
+// Not more than one 64-bit zero
+void TestNotZero() {
+  test::ThreadPoolForTests pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 500;
+#else   // JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 2000;
+#endif  // JXL_DISABLE_SLOW_TESTS
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, kMaxSeed, ThreadPool::NoInit,
+      [](const uint32_t task, size_t /*thread*/) {
+        HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+        HWY_ALIGN Xorshift128Plus rng(task);
+        size_t num_zero = 0;
+        for (size_t vectors = 0; vectors < 10000; ++vectors) {
+          rng.Fill(lanes);
+          for (uint64_t lane : lanes) {
+            num_zero += static_cast<size_t>(lane == 0);
+          }
+        }
+        EXPECT_LE(num_zero, 1u);
+      },
+      "TestNotZero"));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class Xorshift128Test : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(Xorshift128Test);
+
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestNotZero);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestGolden);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestSeedChanges);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestFloat);
+
+}  // namespace jxl
+#endif
diff --git a/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake b/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake
new file mode 100644
index 0000000000..10871e3073
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake
@@ -0,0 +1,36 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(jxl_lists.cmake)
+
+# benchmark.h doesn't work in our MINGW set up since it ends up including the
+# wrong stdlib header. We don't run gbench on MINGW targets anyway.
+if(NOT MINGW)
+
+# This is the Google benchmark project (https://github.com/google/benchmark).
+find_package(benchmark QUIET)
+
+if(benchmark_FOUND)
+  if(JPEGXL_STATIC AND NOT MINGW)
+    # benchmark::benchmark hardcodes the librt.so which obviously doesn't
+    # compile in static mode.
+    set_target_properties(benchmark::benchmark PROPERTIES
+      INTERFACE_LINK_LIBRARIES "Threads::Threads;-lrt")
+  endif()
+
+  # Compiles all the benchmark files into a single binary. Individual benchmarks
+  # can be run with --benchmark_filter.
+  add_executable(jxl_gbench "${JPEGXL_INTERNAL_GBENCH_SOURCES}" gbench_main.cc)
+
+  target_compile_definitions(jxl_gbench PRIVATE
+    -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+  target_link_libraries(jxl_gbench
+    jxl_extras-static
+    jxl-static
+    benchmark::benchmark
+  )
+endif() # benchmark_FOUND
+
+endif() # MINGW
diff --git a/third-party/libjxl/libjxl/lib/jxl_extras.cmake b/third-party/libjxl/libjxl/lib/jxl_extras.cmake
new file mode 100644
index 0000000000..c38d4f4d2e
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_extras.cmake
@@ -0,0 +1,187 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+# Object library for those parts of extras that do not depend on jxl internals
+# or jpegli. We will create two versions of these object files, one with and one
+# without external codec support compiled in.
+list(APPEND JPEGXL_EXTRAS_CORE_SOURCES
+  "${JPEGXL_INTERNAL_EXTRAS_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_APNG_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_EXR_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_JPG_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_JXL_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_PGX_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_PNM_SOURCES}"
+  "${JPEGXL_INTERNAL_CODEC_NPY_SOURCES}"
+  extras/dec/gif.cc
+  extras/dec/gif.h
+)
+foreach(LIB jxl_extras_core-obj jxl_extras_core_nocodec-obj)
+  add_library("${LIB}" OBJECT "${JPEGXL_EXTRAS_CORE_SOURCES}")
+  list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES "${LIB}")
+endforeach()
+list(APPEND JXL_EXTRAS_OBJECTS $<TARGET_OBJECTS:jxl_extras_core-obj>)
+
+# Object library for those parts of extras that depend on jxl internals.
+add_library(jxl_extras_internal-obj OBJECT
+  "${JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES}"
+)
+list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES jxl_extras_internal-obj)
+list(APPEND JXL_EXTRAS_OBJECTS $<TARGET_OBJECTS:jxl_extras_internal-obj>)
+
+set(JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES)
+
+find_package(GIF 5.1)
+if(GIF_FOUND)
+  target_include_directories(jxl_extras_core-obj PRIVATE "${GIF_INCLUDE_DIRS}")
+  target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_GIF=1)
+  list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${GIF_LIBRARIES})
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libgif-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.libgif COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+find_package(JPEG)
+if(JPEG_FOUND)
+  target_include_directories(jxl_extras_core-obj PRIVATE "${JPEG_INCLUDE_DIRS}")
+  target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_JPEG=1)
+  list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${JPEG_LIBRARIES})
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libjpeg-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.libjpeg COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+if (JPEGXL_ENABLE_SJPEG)
+  target_compile_definitions(jxl_extras_core-obj PRIVATE
+    -DJPEGXL_ENABLE_SJPEG=1)
+  target_include_directories(jxl_extras_core-obj PRIVATE
+    ../third_party/sjpeg/src)
+  list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES sjpeg)
+endif()
+
+if(JPEGXL_ENABLE_JPEGLI)
+  add_library(jxl_extras_jpegli-obj OBJECT
+    "${JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES}"
+  )
+  target_include_directories(jxl_extras_jpegli-obj PRIVATE
+    "${CMAKE_CURRENT_BINARY_DIR}/include/jpegli"
+  )
+  list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES jxl_extras_jpegli-obj)
+  list(APPEND JXL_EXTRAS_OBJECTS $<TARGET_OBJECTS:jxl_extras_jpegli-obj>)
+endif()
+
+if(NOT JPEGXL_BUNDLE_LIBPNG)
+  find_package(PNG)
+endif()
+if(PNG_FOUND)
+  target_include_directories(jxl_extras_core-obj PRIVATE "${PNG_INCLUDE_DIRS}")
+  target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_APNG=1)
+  list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${PNG_LIBRARIES})
+  configure_file(extras/LICENSE.apngdis
+                 ${PROJECT_BINARY_DIR}/LICENSE.apngdis COPYONLY)
+endif()
+
+if (JPEGXL_ENABLE_OPENEXR)
+pkg_check_modules(OpenEXR IMPORTED_TARGET OpenEXR)
+if (OpenEXR_FOUND)
+  target_include_directories(jxl_extras_core-obj PRIVATE
+    "${OpenEXR_INCLUDE_DIRS}"
+  )
+  target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_EXR=1)
+  list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES PkgConfig::OpenEXR)
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libopenexr-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.libopenexr COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+  # OpenEXR generates exceptions, so we need exception support to catch them.
+  # Actually those flags counteract the ones set in JPEGXL_INTERNAL_FLAGS.
+  if (NOT WIN32)
+    set_source_files_properties(
+      extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS -fexceptions)
+    if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+      set_source_files_properties(
+	extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS
+	-fcxx-exceptions)
+    endif()
+  endif()
+endif() # OpenEXR_FOUND
+endif() # JPEGXL_ENABLE_OPENEXR
+
+# Common settings for the object libraries.
+foreach(LIB ${JXL_EXTRAS_OBJECT_LIBRARIES})
+  target_compile_options("${LIB}" PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+  target_compile_definitions("${LIB}" PRIVATE -DJXL_EXPORT=)
+  set_property(TARGET "${LIB}" PROPERTY POSITION_INDEPENDENT_CODE ON)
+  target_include_directories("${LIB}" PRIVATE
+    ${PROJECT_SOURCE_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_BINARY_DIR}/include
+    ${JXL_HWY_INCLUDE_DIRS}
+  )
+endforeach()
+
+# Define an extras library that does not have the image codecs, only the core
+# extras code. This is needed for some of the fuzzers.
+add_library(jxl_extras_nocodec-static STATIC EXCLUDE_FROM_ALL
+  $<TARGET_OBJECTS:jxl_extras_core_nocodec-obj>
+  $<TARGET_OBJECTS:jxl_extras_internal-obj>
+)
+target_link_libraries(jxl_extras_nocodec-static PUBLIC
+  jxl-static
+  jxl_threads-static
+)
+
+# We only define a static library jxl_extras since it uses internal parts of
+# jxl library which are not accessible from outside the library in the
+# shared library case.
+add_library(jxl_extras-static STATIC EXCLUDE_FROM_ALL ${JXL_EXTRAS_OBJECTS})
+target_link_libraries(jxl_extras-static PUBLIC
+  ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES}
+  jxl-static
+  jxl_threads-static
+)
+if(JPEGXL_ENABLE_JPEGLI)
+  target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_JPEGLI=1)
+  target_link_libraries(jxl_extras-static PRIVATE jpegli-static)
+endif()
+
+### Static library that does not depend on internal parts of jxl library.
+add_library(jxl_extras_codec-static STATIC
+  $<TARGET_OBJECTS:jxl_extras_core-obj>
+)
+target_link_libraries(jxl_extras_codec-static PRIVATE
+  ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES}
+  jxl
+)
+
+### Shared library that does not depend on internal parts of jxl library.
+### Used by cjxl and djxl binaries.
+if (BUILD_SHARED_LIBS)
+add_library(jxl_extras_codec SHARED
+  $<TARGET_OBJECTS:jxl_extras_core-obj>
+)
+target_link_libraries(jxl_extras_codec PRIVATE
+  ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES}
+  jxl
+)
+set_target_properties(jxl_extras_codec PROPERTIES
+  VERSION ${JPEGXL_LIBRARY_VERSION}
+  SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+)
+install(TARGETS jxl_extras_codec
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+else()
+add_library(jxl_extras_codec ALIAS jxl_extras_codec-static)
+endif()  # BUILD_SHARED_LIBS
diff --git a/third-party/libjxl/libjxl/lib/jxl_lists.bzl b/third-party/libjxl/libjxl/lib/jxl_lists.bzl
new file mode 100644
index 0000000000..e3418cd9a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_lists.bzl
@@ -0,0 +1,644 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file is generated, do not modify by manually.
+# Run `tools/scripts/build_cleaner.py --update` to regenerate it.
+
+libjxl_base_sources = [
+    "jxl/base/arch_macros.h",
+    "jxl/base/bits.h",
+    "jxl/base/byte_order.h",
+    "jxl/base/cache_aligned.cc",
+    "jxl/base/cache_aligned.h",
+    "jxl/base/compiler_specific.h",
+    "jxl/base/data_parallel.cc",
+    "jxl/base/data_parallel.h",
+    "jxl/base/float.h",
+    "jxl/base/iaca.h",
+    "jxl/base/os_macros.h",
+    "jxl/base/override.h",
+    "jxl/base/padded_bytes.cc",
+    "jxl/base/padded_bytes.h",
+    "jxl/base/printf_macros.h",
+    "jxl/base/random.cc",
+    "jxl/base/random.h",
+    "jxl/base/sanitizer_definitions.h",
+    "jxl/base/scope_guard.h",
+    "jxl/base/span.h",
+    "jxl/base/status.h",
+]
+
+libjxl_codec_apng_sources = [
+    "extras/dec/apng.cc",
+    "extras/dec/apng.h",
+    "extras/enc/apng.cc",
+    "extras/enc/apng.h",
+]
+
+libjxl_codec_exr_sources = [
+    "extras/dec/exr.cc",
+    "extras/dec/exr.h",
+    "extras/enc/exr.cc",
+    "extras/enc/exr.h",
+]
+
+libjxl_codec_gif_sources = [
+    "extras/dec/gif.cc",
+    "extras/dec/gif.h",
+]
+
+libjxl_codec_jpegli_sources = [
+    "extras/dec/jpegli.cc",
+    "extras/dec/jpegli.h",
+    "extras/enc/jpegli.cc",
+    "extras/enc/jpegli.h",
+]
+
+libjxl_codec_jpg_sources = [
+    "extras/dec/jpg.cc",
+    "extras/dec/jpg.h",
+    "extras/enc/jpg.cc",
+    "extras/enc/jpg.h",
+]
+
+libjxl_codec_jxl_sources = [
+    "extras/dec/jxl.cc",
+    "extras/dec/jxl.h",
+    "extras/enc/jxl.cc",
+    "extras/enc/jxl.h",
+]
+
+libjxl_codec_npy_sources = [
+    "extras/enc/npy.cc",
+    "extras/enc/npy.h",
+]
+
+libjxl_codec_pgx_sources = [
+    "extras/dec/pgx.cc",
+    "extras/dec/pgx.h",
+    "extras/enc/pgx.cc",
+    "extras/enc/pgx.h",
+]
+
+libjxl_codec_pnm_sources = [
+    "extras/dec/pnm.cc",
+    "extras/dec/pnm.h",
+    "extras/enc/pnm.cc",
+    "extras/enc/pnm.h",
+]
+
+libjxl_dec_box_sources = [
+    "jxl/box_content_decoder.cc",
+    "jxl/box_content_decoder.h",
+]
+
+libjxl_dec_jpeg_sources = [
+    "jxl/decode_to_jpeg.cc",
+    "jxl/decode_to_jpeg.h",
+    "jxl/jpeg/dec_jpeg_data.cc",
+    "jxl/jpeg/dec_jpeg_data.h",
+    "jxl/jpeg/dec_jpeg_data_writer.cc",
+    "jxl/jpeg/dec_jpeg_data_writer.h",
+    "jxl/jpeg/dec_jpeg_output_chunk.h",
+    "jxl/jpeg/dec_jpeg_serialization_state.h",
+    "jxl/jpeg/jpeg_data.cc",
+    "jxl/jpeg/jpeg_data.h",
+]
+
+libjxl_dec_sources = [
+    "jxl/ac_context.h",
+    "jxl/ac_strategy.cc",
+    "jxl/ac_strategy.h",
+    "jxl/alpha.cc",
+    "jxl/alpha.h",
+    "jxl/ans_common.cc",
+    "jxl/ans_common.h",
+    "jxl/ans_params.h",
+    "jxl/blending.cc",
+    "jxl/blending.h",
+    "jxl/chroma_from_luma.cc",
+    "jxl/chroma_from_luma.h",
+    "jxl/codec_in_out.h",
+    "jxl/coeff_order.cc",
+    "jxl/coeff_order.h",
+    "jxl/coeff_order_fwd.h",
+    "jxl/color_encoding_internal.cc",
+    "jxl/color_encoding_internal.h",
+    "jxl/color_management.cc",
+    "jxl/color_management.h",
+    "jxl/common.h",
+    "jxl/compressed_dc.cc",
+    "jxl/compressed_dc.h",
+    "jxl/convolve-inl.h",
+    "jxl/convolve.h",
+    "jxl/convolve_separable5.cc",
+    "jxl/convolve_separable7.cc",
+    "jxl/convolve_slow.cc",
+    "jxl/convolve_symmetric3.cc",
+    "jxl/convolve_symmetric5.cc",
+    "jxl/dct-inl.h",
+    "jxl/dct_block-inl.h",
+    "jxl/dct_scales.cc",
+    "jxl/dct_scales.h",
+    "jxl/dct_util.h",
+    "jxl/dec_ans.cc",
+    "jxl/dec_ans.h",
+    "jxl/dec_bit_reader.h",
+    "jxl/dec_cache.cc",
+    "jxl/dec_cache.h",
+    "jxl/dec_context_map.cc",
+    "jxl/dec_context_map.h",
+    "jxl/dec_external_image.cc",
+    "jxl/dec_external_image.h",
+    "jxl/dec_frame.cc",
+    "jxl/dec_frame.h",
+    "jxl/dec_group.cc",
+    "jxl/dec_group.h",
+    "jxl/dec_group_border.cc",
+    "jxl/dec_group_border.h",
+    "jxl/dec_huffman.cc",
+    "jxl/dec_huffman.h",
+    "jxl/dec_modular.cc",
+    "jxl/dec_modular.h",
+    "jxl/dec_noise.cc",
+    "jxl/dec_noise.h",
+    "jxl/dec_patch_dictionary.cc",
+    "jxl/dec_patch_dictionary.h",
+    "jxl/dec_tone_mapping-inl.h",
+    "jxl/dec_transforms-inl.h",
+    "jxl/dec_xyb-inl.h",
+    "jxl/dec_xyb.cc",
+    "jxl/dec_xyb.h",
+    "jxl/decode.cc",
+    "jxl/entropy_coder.cc",
+    "jxl/entropy_coder.h",
+    "jxl/epf.cc",
+    "jxl/epf.h",
+    "jxl/exif.h",
+    "jxl/fast_dct-inl.h",
+    "jxl/fast_dct.cc",
+    "jxl/fast_dct.h",
+    "jxl/fast_dct128-inl.h",
+    "jxl/fast_dct16-inl.h",
+    "jxl/fast_dct256-inl.h",
+    "jxl/fast_dct32-inl.h",
+    "jxl/fast_dct64-inl.h",
+    "jxl/fast_dct8-inl.h",
+    "jxl/fast_math-inl.h",
+    "jxl/field_encodings.h",
+    "jxl/fields.cc",
+    "jxl/fields.h",
+    "jxl/frame_header.cc",
+    "jxl/frame_header.h",
+    "jxl/gauss_blur.cc",
+    "jxl/gauss_blur.h",
+    "jxl/headers.cc",
+    "jxl/headers.h",
+    "jxl/huffman_table.cc",
+    "jxl/huffman_table.h",
+    "jxl/icc_codec.cc",
+    "jxl/icc_codec.h",
+    "jxl/icc_codec_common.cc",
+    "jxl/icc_codec_common.h",
+    "jxl/image.cc",
+    "jxl/image.h",
+    "jxl/image_bundle.cc",
+    "jxl/image_bundle.h",
+    "jxl/image_metadata.cc",
+    "jxl/image_metadata.h",
+    "jxl/image_ops.h",
+    "jxl/inverse_mtf-inl.h",
+    "jxl/lehmer_code.h",
+    "jxl/loop_filter.cc",
+    "jxl/loop_filter.h",
+    "jxl/luminance.cc",
+    "jxl/luminance.h",
+    "jxl/matrix_ops.h",
+    "jxl/memory_manager_internal.cc",
+    "jxl/memory_manager_internal.h",
+    "jxl/modular/encoding/context_predict.h",
+    "jxl/modular/encoding/dec_ma.cc",
+    "jxl/modular/encoding/dec_ma.h",
+    "jxl/modular/encoding/encoding.cc",
+    "jxl/modular/encoding/encoding.h",
+    "jxl/modular/encoding/ma_common.h",
+    "jxl/modular/modular_image.cc",
+    "jxl/modular/modular_image.h",
+    "jxl/modular/options.h",
+    "jxl/modular/transform/palette.cc",
+    "jxl/modular/transform/palette.h",
+    "jxl/modular/transform/rct.cc",
+    "jxl/modular/transform/rct.h",
+    "jxl/modular/transform/squeeze.cc",
+    "jxl/modular/transform/squeeze.h",
+    "jxl/modular/transform/transform.cc",
+    "jxl/modular/transform/transform.h",
+    "jxl/noise.h",
+    "jxl/opsin_params.cc",
+    "jxl/opsin_params.h",
+    "jxl/passes_state.cc",
+    "jxl/passes_state.h",
+    "jxl/patch_dictionary_internal.h",
+    "jxl/quant_weights.cc",
+    "jxl/quant_weights.h",
+    "jxl/quantizer-inl.h",
+    "jxl/quantizer.cc",
+    "jxl/quantizer.h",
+    "jxl/rational_polynomial-inl.h",
+    "jxl/render_pipeline/low_memory_render_pipeline.cc",
+    "jxl/render_pipeline/low_memory_render_pipeline.h",
+    "jxl/render_pipeline/render_pipeline.cc",
+    "jxl/render_pipeline/render_pipeline.h",
+    "jxl/render_pipeline/render_pipeline_stage.h",
+    "jxl/render_pipeline/simple_render_pipeline.cc",
+    "jxl/render_pipeline/simple_render_pipeline.h",
+    "jxl/render_pipeline/stage_blending.cc",
+    "jxl/render_pipeline/stage_blending.h",
+    "jxl/render_pipeline/stage_chroma_upsampling.cc",
+    "jxl/render_pipeline/stage_chroma_upsampling.h",
+    "jxl/render_pipeline/stage_epf.cc",
+    "jxl/render_pipeline/stage_epf.h",
+    "jxl/render_pipeline/stage_from_linear.cc",
+    "jxl/render_pipeline/stage_from_linear.h",
+    "jxl/render_pipeline/stage_gaborish.cc",
+    "jxl/render_pipeline/stage_gaborish.h",
+    "jxl/render_pipeline/stage_noise.cc",
+    "jxl/render_pipeline/stage_noise.h",
+    "jxl/render_pipeline/stage_patches.cc",
+    "jxl/render_pipeline/stage_patches.h",
+    "jxl/render_pipeline/stage_splines.cc",
+    "jxl/render_pipeline/stage_splines.h",
+    "jxl/render_pipeline/stage_spot.cc",
+    "jxl/render_pipeline/stage_spot.h",
+    "jxl/render_pipeline/stage_to_linear.cc",
+    "jxl/render_pipeline/stage_to_linear.h",
+    "jxl/render_pipeline/stage_tone_mapping.cc",
+    "jxl/render_pipeline/stage_tone_mapping.h",
+    "jxl/render_pipeline/stage_upsampling.cc",
+    "jxl/render_pipeline/stage_upsampling.h",
+    "jxl/render_pipeline/stage_write.cc",
+    "jxl/render_pipeline/stage_write.h",
+    "jxl/render_pipeline/stage_xyb.cc",
+    "jxl/render_pipeline/stage_xyb.h",
+    "jxl/render_pipeline/stage_ycbcr.cc",
+    "jxl/render_pipeline/stage_ycbcr.h",
+    "jxl/sanitizers.h",
+    "jxl/simd_util-inl.h",
+    "jxl/splines.cc",
+    "jxl/splines.h",
+    "jxl/toc.cc",
+    "jxl/toc.h",
+    "jxl/transfer_functions-inl.h",
+    "jxl/transpose-inl.h",
+    "jxl/xorshift128plus-inl.h",
+]
+
+libjxl_enc_sources = [
+    "jxl/butteraugli/butteraugli.cc",
+    "jxl/butteraugli/butteraugli.h",
+    "jxl/enc_ac_strategy.cc",
+    "jxl/enc_ac_strategy.h",
+    "jxl/enc_adaptive_quantization.cc",
+    "jxl/enc_adaptive_quantization.h",
+    "jxl/enc_ans.cc",
+    "jxl/enc_ans.h",
+    "jxl/enc_ans_params.h",
+    "jxl/enc_ar_control_field.cc",
+    "jxl/enc_ar_control_field.h",
+    "jxl/enc_aux_out.cc",
+    "jxl/enc_aux_out.h",
+    "jxl/enc_bit_writer.cc",
+    "jxl/enc_bit_writer.h",
+    "jxl/enc_butteraugli_comparator.cc",
+    "jxl/enc_butteraugli_comparator.h",
+    "jxl/enc_cache.cc",
+    "jxl/enc_cache.h",
+    "jxl/enc_chroma_from_luma.cc",
+    "jxl/enc_chroma_from_luma.h",
+    "jxl/enc_cluster.cc",
+    "jxl/enc_cluster.h",
+    "jxl/enc_coeff_order.cc",
+    "jxl/enc_coeff_order.h",
+    "jxl/enc_color_management.cc",
+    "jxl/enc_color_management.h",
+    "jxl/enc_comparator.cc",
+    "jxl/enc_comparator.h",
+    "jxl/enc_context_map.cc",
+    "jxl/enc_context_map.h",
+    "jxl/enc_debug_image.cc",
+    "jxl/enc_debug_image.h",
+    "jxl/enc_detect_dots.cc",
+    "jxl/enc_detect_dots.h",
+    "jxl/enc_dot_dictionary.cc",
+    "jxl/enc_dot_dictionary.h",
+    "jxl/enc_entropy_coder.cc",
+    "jxl/enc_entropy_coder.h",
+    "jxl/enc_external_image.cc",
+    "jxl/enc_external_image.h",
+    "jxl/enc_fast_lossless.cc",
+    "jxl/enc_fast_lossless.h",
+    "jxl/enc_fields.cc",
+    "jxl/enc_fields.h",
+    "jxl/enc_file.cc",
+    "jxl/enc_file.h",
+    "jxl/enc_frame.cc",
+    "jxl/enc_frame.h",
+    "jxl/enc_gaborish.cc",
+    "jxl/enc_gaborish.h",
+    "jxl/enc_gamma_correct.h",
+    "jxl/enc_group.cc",
+    "jxl/enc_group.h",
+    "jxl/enc_heuristics.cc",
+    "jxl/enc_heuristics.h",
+    "jxl/enc_huffman.cc",
+    "jxl/enc_huffman.h",
+    "jxl/enc_huffman_tree.cc",
+    "jxl/enc_huffman_tree.h",
+    "jxl/enc_icc_codec.cc",
+    "jxl/enc_icc_codec.h",
+    "jxl/enc_image_bundle.cc",
+    "jxl/enc_image_bundle.h",
+    "jxl/enc_jxl_skcms.h",
+    "jxl/enc_linalg.cc",
+    "jxl/enc_linalg.h",
+    "jxl/enc_modular.cc",
+    "jxl/enc_modular.h",
+    "jxl/enc_noise.cc",
+    "jxl/enc_noise.h",
+    "jxl/enc_optimize.cc",
+    "jxl/enc_optimize.h",
+    "jxl/enc_params.h",
+    "jxl/enc_patch_dictionary.cc",
+    "jxl/enc_patch_dictionary.h",
+    "jxl/enc_photon_noise.cc",
+    "jxl/enc_photon_noise.h",
+    "jxl/enc_progressive_split.cc",
+    "jxl/enc_progressive_split.h",
+    "jxl/enc_quant_weights.cc",
+    "jxl/enc_quant_weights.h",
+    "jxl/enc_splines.cc",
+    "jxl/enc_splines.h",
+    "jxl/enc_toc.cc",
+    "jxl/enc_toc.h",
+    "jxl/enc_transforms-inl.h",
+    "jxl/enc_transforms.cc",
+    "jxl/enc_transforms.h",
+    "jxl/enc_xyb.cc",
+    "jxl/enc_xyb.h",
+    "jxl/encode.cc",
+    "jxl/encode_internal.h",
+    "jxl/jpeg/enc_jpeg_data.cc",
+    "jxl/jpeg/enc_jpeg_data.h",
+    "jxl/jpeg/enc_jpeg_data_reader.cc",
+    "jxl/jpeg/enc_jpeg_data_reader.h",
+    "jxl/jpeg/enc_jpeg_huffman_decode.cc",
+    "jxl/jpeg/enc_jpeg_huffman_decode.h",
+    "jxl/modular/encoding/enc_debug_tree.cc",
+    "jxl/modular/encoding/enc_debug_tree.h",
+    "jxl/modular/encoding/enc_encoding.cc",
+    "jxl/modular/encoding/enc_encoding.h",
+    "jxl/modular/encoding/enc_ma.cc",
+    "jxl/modular/encoding/enc_ma.h",
+    "jxl/modular/transform/enc_palette.cc",
+    "jxl/modular/transform/enc_palette.h",
+    "jxl/modular/transform/enc_rct.cc",
+    "jxl/modular/transform/enc_rct.h",
+    "jxl/modular/transform/enc_squeeze.cc",
+    "jxl/modular/transform/enc_squeeze.h",
+    "jxl/modular/transform/enc_transform.cc",
+    "jxl/modular/transform/enc_transform.h",
+]
+
+libjxl_extras_for_tools_sources = [
+    "extras/codec.cc",
+    "extras/codec.h",
+    "extras/hlg.cc",
+    "extras/hlg.h",
+    "extras/metrics.cc",
+    "extras/metrics.h",
+    "extras/packed_image_convert.cc",
+    "extras/packed_image_convert.h",
+    "extras/tone_mapping.cc",
+    "extras/tone_mapping.h",
+]
+
+libjxl_extras_sources = [
+    "extras/dec/color_description.cc",
+    "extras/dec/color_description.h",
+    "extras/dec/color_hints.cc",
+    "extras/dec/color_hints.h",
+    "extras/dec/decode.cc",
+    "extras/dec/decode.h",
+    "extras/enc/encode.cc",
+    "extras/enc/encode.h",
+    "extras/exif.cc",
+    "extras/exif.h",
+    "extras/packed_image.h",
+    "extras/size_constraints.h",
+    "extras/time.cc",
+    "extras/time.h",
+]
+
+libjxl_gbench_sources = [
+    "extras/tone_mapping_gbench.cc",
+    "jxl/dec_external_image_gbench.cc",
+    "jxl/enc_external_image_gbench.cc",
+    "jxl/gauss_blur_gbench.cc",
+    "jxl/splines_gbench.cc",
+    "jxl/tf_gbench.cc",
+]
+
+libjxl_jpegli_lib_version = 62
+
+libjxl_jpegli_libjpeg_helper_files = [
+    "jpegli/libjpeg_test_util.cc",
+    "jpegli/libjpeg_test_util.h",
+]
+
+libjxl_jpegli_sources = [
+    "jpegli/adaptive_quantization.cc",
+    "jpegli/adaptive_quantization.h",
+    "jpegli/bit_writer.cc",
+    "jpegli/bit_writer.h",
+    "jpegli/bitstream.cc",
+    "jpegli/bitstream.h",
+    "jpegli/color_quantize.cc",
+    "jpegli/color_quantize.h",
+    "jpegli/color_transform.cc",
+    "jpegli/color_transform.h",
+    "jpegli/common.cc",
+    "jpegli/common.h",
+    "jpegli/common_internal.h",
+    "jpegli/dct-inl.h",
+    "jpegli/decode.cc",
+    "jpegli/decode.h",
+    "jpegli/decode_internal.h",
+    "jpegli/decode_marker.cc",
+    "jpegli/decode_marker.h",
+    "jpegli/decode_scan.cc",
+    "jpegli/decode_scan.h",
+    "jpegli/destination_manager.cc",
+    "jpegli/downsample.cc",
+    "jpegli/downsample.h",
+    "jpegli/encode.cc",
+    "jpegli/encode.h",
+    "jpegli/encode_finish.cc",
+    "jpegli/encode_finish.h",
+    "jpegli/encode_internal.h",
+    "jpegli/encode_streaming.cc",
+    "jpegli/encode_streaming.h",
+    "jpegli/entropy_coding-inl.h",
+    "jpegli/entropy_coding.cc",
+    "jpegli/entropy_coding.h",
+    "jpegli/error.cc",
+    "jpegli/error.h",
+    "jpegli/huffman.cc",
+    "jpegli/huffman.h",
+    "jpegli/idct.cc",
+    "jpegli/idct.h",
+    "jpegli/input.cc",
+    "jpegli/input.h",
+    "jpegli/memory_manager.cc",
+    "jpegli/memory_manager.h",
+    "jpegli/quant.cc",
+    "jpegli/quant.h",
+    "jpegli/render.cc",
+    "jpegli/render.h",
+    "jpegli/simd.cc",
+    "jpegli/simd.h",
+    "jpegli/source_manager.cc",
+    "jpegli/transpose-inl.h",
+    "jpegli/types.h",
+    "jpegli/upsample.cc",
+    "jpegli/upsample.h",
+]
+
+libjxl_jpegli_testlib_files = [
+    "jpegli/test_params.h",
+    "jpegli/test_utils-inl.h",
+    "jpegli/test_utils.cc",
+    "jpegli/test_utils.h",
+]
+
+libjxl_jpegli_tests = [
+    "jpegli/decode_api_test.cc",
+    "jpegli/encode_api_test.cc",
+    "jpegli/error_handling_test.cc",
+    "jpegli/input_suspension_test.cc",
+    "jpegli/output_suspension_test.cc",
+    "jpegli/source_manager_test.cc",
+    "jpegli/streaming_test.cc",
+    "jpegli/transcode_api_test.cc",
+]
+
+libjxl_jpegli_wrapper_sources = [
+    "jpegli/libjpeg_wrapper.cc",
+]
+
+libjxl_major_version = 0
+
+libjxl_minor_version = 9
+
+libjxl_patch_version = 0
+
+libjxl_public_headers = [
+    "include/jxl/cms_interface.h",
+    "include/jxl/codestream_header.h",
+    "include/jxl/color_encoding.h",
+    "include/jxl/decode.h",
+    "include/jxl/decode_cxx.h",
+    "include/jxl/encode.h",
+    "include/jxl/encode_cxx.h",
+    "include/jxl/memory_manager.h",
+    "include/jxl/parallel_runner.h",
+    "include/jxl/stats.h",
+    "include/jxl/types.h",
+]
+
+libjxl_testlib_files = [
+    "jxl/dct_for_test.h",
+    "jxl/dec_transforms_testonly.cc",
+    "jxl/dec_transforms_testonly.h",
+    "jxl/fake_parallel_runner_testonly.h",
+    "jxl/image_test_utils.h",
+    "jxl/render_pipeline/test_render_pipeline_stages.h",
+    "jxl/test_image.cc",
+    "jxl/test_image.h",
+    "jxl/test_utils.cc",
+    "jxl/test_utils.h",
+]
+
+libjxl_tests = [
+    "extras/codec_test.cc",
+    "extras/dec/color_description_test.cc",
+    "extras/dec/pgx_test.cc",
+    "extras/jpegli_test.cc",
+    "jxl/ac_strategy_test.cc",
+    "jxl/alpha_test.cc",
+    "jxl/ans_common_test.cc",
+    "jxl/ans_test.cc",
+    "jxl/bit_reader_test.cc",
+    "jxl/bits_test.cc",
+    "jxl/blending_test.cc",
+    "jxl/byte_order_test.cc",
+    "jxl/coeff_order_test.cc",
+    "jxl/color_encoding_internal_test.cc",
+    "jxl/color_management_test.cc",
+    "jxl/convolve_test.cc",
+    "jxl/data_parallel_test.cc",
+    "jxl/dct_test.cc",
+    "jxl/decode_test.cc",
+    "jxl/enc_external_image_test.cc",
+    "jxl/enc_gaborish_test.cc",
+    "jxl/enc_linalg_test.cc",
+    "jxl/enc_optimize_test.cc",
+    "jxl/enc_photon_noise_test.cc",
+    "jxl/encode_test.cc",
+    "jxl/entropy_coder_test.cc",
+    "jxl/fast_dct_test.cc",
+    "jxl/fast_math_test.cc",
+    "jxl/fields_test.cc",
+    "jxl/gamma_correct_test.cc",
+    "jxl/gauss_blur_test.cc",
+    "jxl/gradient_test.cc",
+    "jxl/iaca_test.cc",
+    "jxl/icc_codec_test.cc",
+    "jxl/image_bundle_test.cc",
+    "jxl/image_ops_test.cc",
+    "jxl/jxl_test.cc",
+    "jxl/lehmer_code_test.cc",
+    "jxl/modular_test.cc",
+    "jxl/opsin_image_test.cc",
+    "jxl/opsin_inverse_test.cc",
+    "jxl/padded_bytes_test.cc",
+    "jxl/passes_test.cc",
+    "jxl/patch_dictionary_test.cc",
+    "jxl/preview_test.cc",
+    "jxl/quant_weights_test.cc",
+    "jxl/quantizer_test.cc",
+    "jxl/rational_polynomial_test.cc",
+    "jxl/render_pipeline/render_pipeline_test.cc",
+    "jxl/roundtrip_test.cc",
+    "jxl/simd_util_test.cc",
+    "jxl/speed_tier_test.cc",
+    "jxl/splines_test.cc",
+    "jxl/toc_test.cc",
+    "jxl/xorshift128plus_test.cc",
+    "threads/thread_parallel_runner_test.cc",
+]
+
+libjxl_threads_public_headers = [
+    "include/jxl/resizable_parallel_runner.h",
+    "include/jxl/resizable_parallel_runner_cxx.h",
+    "include/jxl/thread_parallel_runner.h",
+    "include/jxl/thread_parallel_runner_cxx.h",
+]
+
+libjxl_threads_sources = [
+    "threads/resizable_parallel_runner.cc",
+    "threads/thread_parallel_runner.cc",
+    "threads/thread_parallel_runner_internal.cc",
+    "threads/thread_parallel_runner_internal.h",
+]
diff --git a/third-party/libjxl/libjxl/lib/jxl_lists.cmake b/third-party/libjxl/libjxl/lib/jxl_lists.cmake
new file mode 100644
index 0000000000..7813c46618
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_lists.cmake
@@ -0,0 +1,636 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file is generated, do not modify by manually.
+# Run `tools/scripts/build_cleaner.py --update` to regenerate it.
+
+set(JPEGXL_INTERNAL_BASE_SOURCES
+  jxl/base/arch_macros.h
+  jxl/base/bits.h
+  jxl/base/byte_order.h
+  jxl/base/cache_aligned.cc
+  jxl/base/cache_aligned.h
+  jxl/base/compiler_specific.h
+  jxl/base/data_parallel.cc
+  jxl/base/data_parallel.h
+  jxl/base/float.h
+  jxl/base/iaca.h
+  jxl/base/os_macros.h
+  jxl/base/override.h
+  jxl/base/padded_bytes.cc
+  jxl/base/padded_bytes.h
+  jxl/base/printf_macros.h
+  jxl/base/random.cc
+  jxl/base/random.h
+  jxl/base/sanitizer_definitions.h
+  jxl/base/scope_guard.h
+  jxl/base/span.h
+  jxl/base/status.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_APNG_SOURCES
+  extras/dec/apng.cc
+  extras/dec/apng.h
+  extras/enc/apng.cc
+  extras/enc/apng.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_EXR_SOURCES
+  extras/dec/exr.cc
+  extras/dec/exr.h
+  extras/enc/exr.cc
+  extras/enc/exr.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_GIF_SOURCES
+  extras/dec/gif.cc
+  extras/dec/gif.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES
+  extras/dec/jpegli.cc
+  extras/dec/jpegli.h
+  extras/enc/jpegli.cc
+  extras/enc/jpegli.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JPG_SOURCES
+  extras/dec/jpg.cc
+  extras/dec/jpg.h
+  extras/enc/jpg.cc
+  extras/enc/jpg.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JXL_SOURCES
+  extras/dec/jxl.cc
+  extras/dec/jxl.h
+  extras/enc/jxl.cc
+  extras/enc/jxl.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_NPY_SOURCES
+  extras/enc/npy.cc
+  extras/enc/npy.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_PGX_SOURCES
+  extras/dec/pgx.cc
+  extras/dec/pgx.h
+  extras/enc/pgx.cc
+  extras/enc/pgx.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_PNM_SOURCES
+  extras/dec/pnm.cc
+  extras/dec/pnm.h
+  extras/enc/pnm.cc
+  extras/enc/pnm.h
+)
+
+set(JPEGXL_INTERNAL_DEC_BOX_SOURCES
+  jxl/box_content_decoder.cc
+  jxl/box_content_decoder.h
+)
+
+set(JPEGXL_INTERNAL_DEC_JPEG_SOURCES
+  jxl/decode_to_jpeg.cc
+  jxl/decode_to_jpeg.h
+  jxl/jpeg/dec_jpeg_data.cc
+  jxl/jpeg/dec_jpeg_data.h
+  jxl/jpeg/dec_jpeg_data_writer.cc
+  jxl/jpeg/dec_jpeg_data_writer.h
+  jxl/jpeg/dec_jpeg_output_chunk.h
+  jxl/jpeg/dec_jpeg_serialization_state.h
+  jxl/jpeg/jpeg_data.cc
+  jxl/jpeg/jpeg_data.h
+)
+
+set(JPEGXL_INTERNAL_DEC_SOURCES
+  jxl/ac_context.h
+  jxl/ac_strategy.cc
+  jxl/ac_strategy.h
+  jxl/alpha.cc
+  jxl/alpha.h
+  jxl/ans_common.cc
+  jxl/ans_common.h
+  jxl/ans_params.h
+  jxl/blending.cc
+  jxl/blending.h
+  jxl/chroma_from_luma.cc
+  jxl/chroma_from_luma.h
+  jxl/codec_in_out.h
+  jxl/coeff_order.cc
+  jxl/coeff_order.h
+  jxl/coeff_order_fwd.h
+  jxl/color_encoding_internal.cc
+  jxl/color_encoding_internal.h
+  jxl/color_management.cc
+  jxl/color_management.h
+  jxl/common.h
+  jxl/compressed_dc.cc
+  jxl/compressed_dc.h
+  jxl/convolve-inl.h
+  jxl/convolve.h
+  jxl/convolve_separable5.cc
+  jxl/convolve_separable7.cc
+  jxl/convolve_slow.cc
+  jxl/convolve_symmetric3.cc
+  jxl/convolve_symmetric5.cc
+  jxl/dct-inl.h
+  jxl/dct_block-inl.h
+  jxl/dct_scales.cc
+  jxl/dct_scales.h
+  jxl/dct_util.h
+  jxl/dec_ans.cc
+  jxl/dec_ans.h
+  jxl/dec_bit_reader.h
+  jxl/dec_cache.cc
+  jxl/dec_cache.h
+  jxl/dec_context_map.cc
+  jxl/dec_context_map.h
+  jxl/dec_external_image.cc
+  jxl/dec_external_image.h
+  jxl/dec_frame.cc
+  jxl/dec_frame.h
+  jxl/dec_group.cc
+  jxl/dec_group.h
+  jxl/dec_group_border.cc
+  jxl/dec_group_border.h
+  jxl/dec_huffman.cc
+  jxl/dec_huffman.h
+  jxl/dec_modular.cc
+  jxl/dec_modular.h
+  jxl/dec_noise.cc
+  jxl/dec_noise.h
+  jxl/dec_patch_dictionary.cc
+  jxl/dec_patch_dictionary.h
+  jxl/dec_tone_mapping-inl.h
+  jxl/dec_transforms-inl.h
+  jxl/dec_xyb-inl.h
+  jxl/dec_xyb.cc
+  jxl/dec_xyb.h
+  jxl/decode.cc
+  jxl/entropy_coder.cc
+  jxl/entropy_coder.h
+  jxl/epf.cc
+  jxl/epf.h
+  jxl/exif.h
+  jxl/fast_dct-inl.h
+  jxl/fast_dct.cc
+  jxl/fast_dct.h
+  jxl/fast_dct128-inl.h
+  jxl/fast_dct16-inl.h
+  jxl/fast_dct256-inl.h
+  jxl/fast_dct32-inl.h
+  jxl/fast_dct64-inl.h
+  jxl/fast_dct8-inl.h
+  jxl/fast_math-inl.h
+  jxl/field_encodings.h
+  jxl/fields.cc
+  jxl/fields.h
+  jxl/frame_header.cc
+  jxl/frame_header.h
+  jxl/gauss_blur.cc
+  jxl/gauss_blur.h
+  jxl/headers.cc
+  jxl/headers.h
+  jxl/huffman_table.cc
+  jxl/huffman_table.h
+  jxl/icc_codec.cc
+  jxl/icc_codec.h
+  jxl/icc_codec_common.cc
+  jxl/icc_codec_common.h
+  jxl/image.cc
+  jxl/image.h
+  jxl/image_bundle.cc
+  jxl/image_bundle.h
+  jxl/image_metadata.cc
+  jxl/image_metadata.h
+  jxl/image_ops.h
+  jxl/inverse_mtf-inl.h
+  jxl/lehmer_code.h
+  jxl/loop_filter.cc
+  jxl/loop_filter.h
+  jxl/luminance.cc
+  jxl/luminance.h
+  jxl/matrix_ops.h
+  jxl/memory_manager_internal.cc
+  jxl/memory_manager_internal.h
+  jxl/modular/encoding/context_predict.h
+  jxl/modular/encoding/dec_ma.cc
+  jxl/modular/encoding/dec_ma.h
+  jxl/modular/encoding/encoding.cc
+  jxl/modular/encoding/encoding.h
+  jxl/modular/encoding/ma_common.h
+  jxl/modular/modular_image.cc
+  jxl/modular/modular_image.h
+  jxl/modular/options.h
+  jxl/modular/transform/palette.cc
+  jxl/modular/transform/palette.h
+  jxl/modular/transform/rct.cc
+  jxl/modular/transform/rct.h
+  jxl/modular/transform/squeeze.cc
+  jxl/modular/transform/squeeze.h
+  jxl/modular/transform/transform.cc
+  jxl/modular/transform/transform.h
+  jxl/noise.h
+  jxl/opsin_params.cc
+  jxl/opsin_params.h
+  jxl/passes_state.cc
+  jxl/passes_state.h
+  jxl/patch_dictionary_internal.h
+  jxl/quant_weights.cc
+  jxl/quant_weights.h
+  jxl/quantizer-inl.h
+  jxl/quantizer.cc
+  jxl/quantizer.h
+  jxl/rational_polynomial-inl.h
+  jxl/render_pipeline/low_memory_render_pipeline.cc
+  jxl/render_pipeline/low_memory_render_pipeline.h
+  jxl/render_pipeline/render_pipeline.cc
+  jxl/render_pipeline/render_pipeline.h
+  jxl/render_pipeline/render_pipeline_stage.h
+  jxl/render_pipeline/simple_render_pipeline.cc
+  jxl/render_pipeline/simple_render_pipeline.h
+  jxl/render_pipeline/stage_blending.cc
+  jxl/render_pipeline/stage_blending.h
+  jxl/render_pipeline/stage_chroma_upsampling.cc
+  jxl/render_pipeline/stage_chroma_upsampling.h
+  jxl/render_pipeline/stage_epf.cc
+  jxl/render_pipeline/stage_epf.h
+  jxl/render_pipeline/stage_from_linear.cc
+  jxl/render_pipeline/stage_from_linear.h
+  jxl/render_pipeline/stage_gaborish.cc
+  jxl/render_pipeline/stage_gaborish.h
+  jxl/render_pipeline/stage_noise.cc
+  jxl/render_pipeline/stage_noise.h
+  jxl/render_pipeline/stage_patches.cc
+  jxl/render_pipeline/stage_patches.h
+  jxl/render_pipeline/stage_splines.cc
+  jxl/render_pipeline/stage_splines.h
+  jxl/render_pipeline/stage_spot.cc
+  jxl/render_pipeline/stage_spot.h
+  jxl/render_pipeline/stage_to_linear.cc
+  jxl/render_pipeline/stage_to_linear.h
+  jxl/render_pipeline/stage_tone_mapping.cc
+  jxl/render_pipeline/stage_tone_mapping.h
+  jxl/render_pipeline/stage_upsampling.cc
+  jxl/render_pipeline/stage_upsampling.h
+  jxl/render_pipeline/stage_write.cc
+  jxl/render_pipeline/stage_write.h
+  jxl/render_pipeline/stage_xyb.cc
+  jxl/render_pipeline/stage_xyb.h
+  jxl/render_pipeline/stage_ycbcr.cc
+  jxl/render_pipeline/stage_ycbcr.h
+  jxl/sanitizers.h
+  jxl/simd_util-inl.h
+  jxl/splines.cc
+  jxl/splines.h
+  jxl/toc.cc
+  jxl/toc.h
+  jxl/transfer_functions-inl.h
+  jxl/transpose-inl.h
+  jxl/xorshift128plus-inl.h
+)
+
+set(JPEGXL_INTERNAL_ENC_SOURCES
+  jxl/butteraugli/butteraugli.cc
+  jxl/butteraugli/butteraugli.h
+  jxl/enc_ac_strategy.cc
+  jxl/enc_ac_strategy.h
+  jxl/enc_adaptive_quantization.cc
+  jxl/enc_adaptive_quantization.h
+  jxl/enc_ans.cc
+  jxl/enc_ans.h
+  jxl/enc_ans_params.h
+  jxl/enc_ar_control_field.cc
+  jxl/enc_ar_control_field.h
+  jxl/enc_aux_out.cc
+  jxl/enc_aux_out.h
+  jxl/enc_bit_writer.cc
+  jxl/enc_bit_writer.h
+  jxl/enc_butteraugli_comparator.cc
+  jxl/enc_butteraugli_comparator.h
+  jxl/enc_cache.cc
+  jxl/enc_cache.h
+  jxl/enc_chroma_from_luma.cc
+  jxl/enc_chroma_from_luma.h
+  jxl/enc_cluster.cc
+  jxl/enc_cluster.h
+  jxl/enc_coeff_order.cc
+  jxl/enc_coeff_order.h
+  jxl/enc_color_management.cc
+  jxl/enc_color_management.h
+  jxl/enc_comparator.cc
+  jxl/enc_comparator.h
+  jxl/enc_context_map.cc
+  jxl/enc_context_map.h
+  jxl/enc_debug_image.cc
+  jxl/enc_debug_image.h
+  jxl/enc_detect_dots.cc
+  jxl/enc_detect_dots.h
+  jxl/enc_dot_dictionary.cc
+  jxl/enc_dot_dictionary.h
+  jxl/enc_entropy_coder.cc
+  jxl/enc_entropy_coder.h
+  jxl/enc_external_image.cc
+  jxl/enc_external_image.h
+  jxl/enc_fast_lossless.cc
+  jxl/enc_fast_lossless.h
+  jxl/enc_fields.cc
+  jxl/enc_fields.h
+  jxl/enc_file.cc
+  jxl/enc_file.h
+  jxl/enc_frame.cc
+  jxl/enc_frame.h
+  jxl/enc_gaborish.cc
+  jxl/enc_gaborish.h
+  jxl/enc_gamma_correct.h
+  jxl/enc_group.cc
+  jxl/enc_group.h
+  jxl/enc_heuristics.cc
+  jxl/enc_heuristics.h
+  jxl/enc_huffman.cc
+  jxl/enc_huffman.h
+  jxl/enc_huffman_tree.cc
+  jxl/enc_huffman_tree.h
+  jxl/enc_icc_codec.cc
+  jxl/enc_icc_codec.h
+  jxl/enc_image_bundle.cc
+  jxl/enc_image_bundle.h
+  jxl/enc_jxl_skcms.h
+  jxl/enc_linalg.cc
+  jxl/enc_linalg.h
+  jxl/enc_modular.cc
+  jxl/enc_modular.h
+  jxl/enc_noise.cc
+  jxl/enc_noise.h
+  jxl/enc_optimize.cc
+  jxl/enc_optimize.h
+  jxl/enc_params.h
+  jxl/enc_patch_dictionary.cc
+  jxl/enc_patch_dictionary.h
+  jxl/enc_photon_noise.cc
+  jxl/enc_photon_noise.h
+  jxl/enc_progressive_split.cc
+  jxl/enc_progressive_split.h
+  jxl/enc_quant_weights.cc
+  jxl/enc_quant_weights.h
+  jxl/enc_splines.cc
+  jxl/enc_splines.h
+  jxl/enc_toc.cc
+  jxl/enc_toc.h
+  jxl/enc_transforms-inl.h
+  jxl/enc_transforms.cc
+  jxl/enc_transforms.h
+  jxl/enc_xyb.cc
+  jxl/enc_xyb.h
+  jxl/encode.cc
+  jxl/encode_internal.h
+  jxl/jpeg/enc_jpeg_data.cc
+  jxl/jpeg/enc_jpeg_data.h
+  jxl/jpeg/enc_jpeg_data_reader.cc
+  jxl/jpeg/enc_jpeg_data_reader.h
+  jxl/jpeg/enc_jpeg_huffman_decode.cc
+  jxl/jpeg/enc_jpeg_huffman_decode.h
+  jxl/modular/encoding/enc_debug_tree.cc
+  jxl/modular/encoding/enc_debug_tree.h
+  jxl/modular/encoding/enc_encoding.cc
+  jxl/modular/encoding/enc_encoding.h
+  jxl/modular/encoding/enc_ma.cc
+  jxl/modular/encoding/enc_ma.h
+  jxl/modular/transform/enc_palette.cc
+  jxl/modular/transform/enc_palette.h
+  jxl/modular/transform/enc_rct.cc
+  jxl/modular/transform/enc_rct.h
+  jxl/modular/transform/enc_squeeze.cc
+  jxl/modular/transform/enc_squeeze.h
+  jxl/modular/transform/enc_transform.cc
+  jxl/modular/transform/enc_transform.h
+)
+
+set(JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES
+  extras/codec.cc
+  extras/codec.h
+  extras/hlg.cc
+  extras/hlg.h
+  extras/metrics.cc
+  extras/metrics.h
+  extras/packed_image_convert.cc
+  extras/packed_image_convert.h
+  extras/tone_mapping.cc
+  extras/tone_mapping.h
+)
+
+set(JPEGXL_INTERNAL_EXTRAS_SOURCES
+  extras/dec/color_description.cc
+  extras/dec/color_description.h
+  extras/dec/color_hints.cc
+  extras/dec/color_hints.h
+  extras/dec/decode.cc
+  extras/dec/decode.h
+  extras/enc/encode.cc
+  extras/enc/encode.h
+  extras/exif.cc
+  extras/exif.h
+  extras/packed_image.h
+  extras/size_constraints.h
+  extras/time.cc
+  extras/time.h
+)
+
+set(JPEGXL_INTERNAL_GBENCH_SOURCES
+  extras/tone_mapping_gbench.cc
+  jxl/dec_external_image_gbench.cc
+  jxl/enc_external_image_gbench.cc
+  jxl/gauss_blur_gbench.cc
+  jxl/splines_gbench.cc
+  jxl/tf_gbench.cc
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_LIBJPEG_HELPER_FILES
+  jpegli/libjpeg_test_util.cc
+  jpegli/libjpeg_test_util.h
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_SOURCES
+  jpegli/adaptive_quantization.cc
+  jpegli/adaptive_quantization.h
+  jpegli/bit_writer.cc
+  jpegli/bit_writer.h
+  jpegli/bitstream.cc
+  jpegli/bitstream.h
+  jpegli/color_quantize.cc
+  jpegli/color_quantize.h
+  jpegli/color_transform.cc
+  jpegli/color_transform.h
+  jpegli/common.cc
+  jpegli/common.h
+  jpegli/common_internal.h
+  jpegli/dct-inl.h
+  jpegli/decode.cc
+  jpegli/decode.h
+  jpegli/decode_internal.h
+  jpegli/decode_marker.cc
+  jpegli/decode_marker.h
+  jpegli/decode_scan.cc
+  jpegli/decode_scan.h
+  jpegli/destination_manager.cc
+  jpegli/downsample.cc
+  jpegli/downsample.h
+  jpegli/encode.cc
+  jpegli/encode.h
+  jpegli/encode_finish.cc
+  jpegli/encode_finish.h
+  jpegli/encode_internal.h
+  jpegli/encode_streaming.cc
+  jpegli/encode_streaming.h
+  jpegli/entropy_coding-inl.h
+  jpegli/entropy_coding.cc
+  jpegli/entropy_coding.h
+  jpegli/error.cc
+  jpegli/error.h
+  jpegli/huffman.cc
+  jpegli/huffman.h
+  jpegli/idct.cc
+  jpegli/idct.h
+  jpegli/input.cc
+  jpegli/input.h
+  jpegli/memory_manager.cc
+  jpegli/memory_manager.h
+  jpegli/quant.cc
+  jpegli/quant.h
+  jpegli/render.cc
+  jpegli/render.h
+  jpegli/simd.cc
+  jpegli/simd.h
+  jpegli/source_manager.cc
+  jpegli/transpose-inl.h
+  jpegli/types.h
+  jpegli/upsample.cc
+  jpegli/upsample.h
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES
+  jpegli/test_params.h
+  jpegli/test_utils-inl.h
+  jpegli/test_utils.cc
+  jpegli/test_utils.h
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_TESTS
+  jpegli/decode_api_test.cc
+  jpegli/encode_api_test.cc
+  jpegli/error_handling_test.cc
+  jpegli/input_suspension_test.cc
+  jpegli/output_suspension_test.cc
+  jpegli/source_manager_test.cc
+  jpegli/streaming_test.cc
+  jpegli/transcode_api_test.cc
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES
+  jpegli/libjpeg_wrapper.cc
+)
+
+set(JPEGXL_INTERNAL_PUBLIC_HEADERS
+  include/jxl/cms_interface.h
+  include/jxl/codestream_header.h
+  include/jxl/color_encoding.h
+  include/jxl/decode.h
+  include/jxl/decode_cxx.h
+  include/jxl/encode.h
+  include/jxl/encode_cxx.h
+  include/jxl/memory_manager.h
+  include/jxl/parallel_runner.h
+  include/jxl/stats.h
+  include/jxl/types.h
+)
+
+set(JPEGXL_INTERNAL_TESTLIB_FILES
+  jxl/dct_for_test.h
+  jxl/dec_transforms_testonly.cc
+  jxl/dec_transforms_testonly.h
+  jxl/fake_parallel_runner_testonly.h
+  jxl/image_test_utils.h
+  jxl/render_pipeline/test_render_pipeline_stages.h
+  jxl/test_image.cc
+  jxl/test_image.h
+  jxl/test_utils.cc
+  jxl/test_utils.h
+)
+
+set(JPEGXL_INTERNAL_TESTS
+  extras/codec_test.cc
+  extras/dec/color_description_test.cc
+  extras/dec/pgx_test.cc
+  extras/jpegli_test.cc
+  jxl/ac_strategy_test.cc
+  jxl/alpha_test.cc
+  jxl/ans_common_test.cc
+  jxl/ans_test.cc
+  jxl/bit_reader_test.cc
+  jxl/bits_test.cc
+  jxl/blending_test.cc
+  jxl/byte_order_test.cc
+  jxl/coeff_order_test.cc
+  jxl/color_encoding_internal_test.cc
+  jxl/color_management_test.cc
+  jxl/convolve_test.cc
+  jxl/data_parallel_test.cc
+  jxl/dct_test.cc
+  jxl/decode_test.cc
+  jxl/enc_external_image_test.cc
+  jxl/enc_gaborish_test.cc
+  jxl/enc_linalg_test.cc
+  jxl/enc_optimize_test.cc
+  jxl/enc_photon_noise_test.cc
+  jxl/encode_test.cc
+  jxl/entropy_coder_test.cc
+  jxl/fast_dct_test.cc
+  jxl/fast_math_test.cc
+  jxl/fields_test.cc
+  jxl/gamma_correct_test.cc
+  jxl/gauss_blur_test.cc
+  jxl/gradient_test.cc
+  jxl/iaca_test.cc
+  jxl/icc_codec_test.cc
+  jxl/image_bundle_test.cc
+  jxl/image_ops_test.cc
+  jxl/jxl_test.cc
+  jxl/lehmer_code_test.cc
+  jxl/modular_test.cc
+  jxl/opsin_image_test.cc
+  jxl/opsin_inverse_test.cc
+  jxl/padded_bytes_test.cc
+  jxl/passes_test.cc
+  jxl/patch_dictionary_test.cc
+  jxl/preview_test.cc
+  jxl/quant_weights_test.cc
+  jxl/quantizer_test.cc
+  jxl/rational_polynomial_test.cc
+  jxl/render_pipeline/render_pipeline_test.cc
+  jxl/roundtrip_test.cc
+  jxl/simd_util_test.cc
+  jxl/speed_tier_test.cc
+  jxl/splines_test.cc
+  jxl/toc_test.cc
+  jxl/xorshift128plus_test.cc
+  threads/thread_parallel_runner_test.cc
+)
+
+set(JPEGXL_INTERNAL_THREADS_PUBLIC_HEADERS
+  include/jxl/resizable_parallel_runner.h
+  include/jxl/resizable_parallel_runner_cxx.h
+  include/jxl/thread_parallel_runner.h
+  include/jxl/thread_parallel_runner_cxx.h
+)
+
+set(JPEGXL_INTERNAL_THREADS_SOURCES
+  threads/resizable_parallel_runner.cc
+  threads/thread_parallel_runner.cc
+  threads/thread_parallel_runner_internal.cc
+  threads/thread_parallel_runner_internal.h
+)
diff --git a/third-party/libjxl/libjxl/lib/jxl_tests.cmake b/third-party/libjxl/libjxl/lib/jxl_tests.cmake
new file mode 100644
index 0000000000..88c5a89f5c
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_tests.cmake
@@ -0,0 +1,84 @@
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+if(BUILD_TESTING OR JPEGXL_ENABLE_TOOLS)
+# Library with test-only code shared between all tests / fuzzers.
+add_library(jxl_testlib-static STATIC ${JPEGXL_INTERNAL_TESTLIB_FILES})
+target_compile_options(jxl_testlib-static PRIVATE
+  ${JPEGXL_INTERNAL_FLAGS}
+  ${JPEGXL_COVERAGE_FLAGS}
+)
+target_compile_definitions(jxl_testlib-static PUBLIC
+  -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+target_include_directories(jxl_testlib-static PUBLIC
+  "${PROJECT_SOURCE_DIR}"
+)
+target_link_libraries(jxl_testlib-static
+  hwy
+  jxl_extras_nocodec-static
+  jxl-static
+)
+endif()
+
+if(NOT BUILD_TESTING)
+  return()
+endif()
+
+list(APPEND JPEGXL_INTERNAL_TESTS
+  # TODO(deymo): Move this to tools/
+  ../tools/box/box_test.cc
+  ../tools/djxl_fuzzer_test.cc
+)
+
+find_package(GTest)
+
+# Individual test binaries:
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_TESTS)
+  # The TESTNAME is the name without the extension or directory.
+  get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+  if(TESTFILE STREQUAL ../tools/djxl_fuzzer_test.cc)
+    add_executable(${TESTNAME} ${TESTFILE} ../tools/djxl_fuzzer.cc)
+  else()
+    add_executable(${TESTNAME} ${TESTFILE})
+  endif()
+  if(JPEGXL_EMSCRIPTEN)
+    # The emscripten linking step takes too much memory and crashes during the
+    # wasm-opt step when using -O2 optimization level
+    set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "\
+      -O1 \
+      -s USE_LIBPNG=1 \
+      -s ALLOW_MEMORY_GROWTH=1 \
+      -s SINGLE_FILE=1 \
+      -s PROXY_TO_PTHREAD \
+      -s EXIT_RUNTIME=1 \
+      -s USE_PTHREADS=1 \
+      -s NODERAWFS=1 \
+    ")
+  else()
+    set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}")
+  endif()
+  target_compile_options(${TESTNAME} PRIVATE
+    ${JPEGXL_INTERNAL_FLAGS}
+    # Add coverage flags to the test binary so code in the private headers of
+    # the library is also instrumented when running tests that execute it.
+    ${JPEGXL_COVERAGE_FLAGS}
+  )
+  target_link_libraries(${TESTNAME}
+    box
+    gmock
+    GTest::GTest
+    GTest::Main
+    jxl_extras-static
+    jxl_testlib-static
+  )
+  # Output test targets in the test directory.
+  set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/")
+  if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error")
+  endif ()
+  jxl_discover_tests(${TESTNAME})
+endforeach ()
diff --git a/third-party/libjxl/libjxl/lib/jxl_threads.cmake b/third-party/libjxl/libjxl/lib/jxl_threads.cmake
new file mode 100644
index 0000000000..2f5ac17c83
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_threads.cmake
@@ -0,0 +1,120 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(Threads REQUIRED)
+
+include(jxl_lists.cmake)
+
+### Define the jxl_threads shared or static target library. The ${target}
+# parameter should already be created with add_library(), but this function
+# sets all the remaining common properties.
+function(_set_jxl_threads _target)
+  target_compile_options(${_target} PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+  target_compile_options(${_target} PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+  set_property(TARGET ${_target} PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+  target_include_directories(${_target}
+    PRIVATE
+      "${PROJECT_SOURCE_DIR}"
+    PUBLIC
+      "${CMAKE_CURRENT_SOURCE_DIR}/include"
+      "${CMAKE_CURRENT_BINARY_DIR}/include")
+
+  target_link_libraries(${_target}
+    PUBLIC ${JPEGXL_COVERAGE_FLAGS} Threads::Threads
+  )
+
+  set_target_properties(${_target} PROPERTIES
+    CXX_VISIBILITY_PRESET hidden
+    VISIBILITY_INLINES_HIDDEN 1
+    DEFINE_SYMBOL JXL_THREADS_INTERNAL_LIBRARY_BUILD
+  )
+
+  # Always install the library as jxl_threads.{a,so} file without the "-static"
+  # suffix, except in Windows.
+  if (NOT WIN32 OR MINGW)
+    set_target_properties(${_target} PROPERTIES OUTPUT_NAME "jxl_threads")
+  endif()
+  install(TARGETS ${_target}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+endfunction()
+
+### Static library.
+add_library(jxl_threads-static STATIC ${JPEGXL_INTERNAL_THREADS_SOURCES})
+_set_jxl_threads(jxl_threads-static)
+
+# Make jxl_threads symbols neither imported nor exported when using the static
+# library. These will have hidden visibility anyway in the static library case
+# in unix.
+target_compile_definitions(jxl_threads-static
+  PUBLIC -DJXL_THREADS_STATIC_DEFINE)
+
+
+### Public shared library.
+if (BUILD_SHARED_LIBS)
+add_library(jxl_threads SHARED ${JPEGXL_INTERNAL_THREADS_SOURCES})
+_set_jxl_threads(jxl_threads)
+
+set_target_properties(jxl_threads PROPERTIES
+  VERSION ${JPEGXL_LIBRARY_VERSION}
+  SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+  set_target_properties(jxl_threads PROPERTIES
+      LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version)
+  if(APPLE)
+  set_property(TARGET ${target} APPEND_STRING PROPERTY
+      LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms")
+  elseif(WIN32)
+    # Nothing needed here, we use __declspec(dllexport) (jxl_threads_export.h)
+  else()
+  set_property(TARGET jxl_threads APPEND_STRING PROPERTY
+      LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version")
+  endif()  # APPLE
+
+# Compile the shared library such that the JXL_THREADS_EXPORT symbols are
+# exported. Users of the library will not set this flag and therefore import
+# those symbols.
+target_compile_definitions(jxl_threads
+  PRIVATE -DJXL_THREADS_INTERNAL_LIBRARY_BUILD)
+
+# Generate the jxl/jxl_threads_export.h header, we only need to generate it once
+# but we can use it from both libraries.
+generate_export_header(jxl_threads
+  BASE_NAME JXL_THREADS
+  EXPORT_FILE_NAME include/jxl/jxl_threads_export.h)
+else()
+add_library(jxl_threads ALIAS jxl_threads-static)
+# When not building the shared library generate the jxl_threads_export.h header
+# only based on the static target.
+generate_export_header(jxl_threads-static
+  BASE_NAME JXL_THREADS
+  EXPORT_FILE_NAME include/jxl/jxl_threads_export.h)
+endif()  # BUILD_SHARED_LIBS
+
+
+### Add a pkg-config file for libjxl_threads.
+
+# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}")
+    set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}")
+else()
+    set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
+endif()
+# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}")
+    set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}")
+else()
+    set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+endif()
+
+set(JPEGXL_THREADS_LIBRARY_REQUIRES "")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/threads/libjxl_threads.pc.in"
+               "libjxl_threads.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl_threads.pc"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
diff --git a/third-party/libjxl/libjxl/lib/jxl_vars.bzl b/third-party/libjxl/libjxl/lib/jxl_vars.bzl
new file mode 100644
index 0000000000..7efa84cc44
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/jxl_vars.bzl
@@ -0,0 +1,46 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Extra build variables.
+
+libjxl_root_package = "__main__"
+
+libjxl_deps_brotli = ["@brotli//:brotlidec", "@brotli//:brotlienc"]
+libjxl_deps_gif = ["@gif//:gif"]
+libjxl_deps_gtest = ["@googletest//:gtest_main"]
+libjxl_deps_hwy = ["@highway//:hwy"]
+libjxl_deps_hwy_nanobenchmark = ["@highway//:nanobenchmark"]
+libjxl_deps_hwy_test_util = ["@highway//:hwy_test_util"]
+libjxl_deps_jpeg = ["@libjpeg_turbo//:jpeg"]
+libjxl_deps_jxl_box = ["//tools:box"]
+libjxl_deps_exr = ["@openexr//:OpenEXR"]
+libjxl_deps_png = ["@png//:png"]
+libjxl_deps_runfiles = ["@bazel_tools//tools/cpp/runfiles"]
+libjxl_deps_skcms = ["@skcms//:skcms"]
+libjxl_deps_testdata = ["//:testdata"]
+
+libjxl_test_shards = {
+    "jpegli/decode_api_test": 10,
+    "jpegli/encode_api_test": 4,
+    "jpegli/input_suspension_test": 6,
+    "jpegli/output_suspension_test": 2,
+    "jxl/ans_test": 2,
+    "jxl/linalg_test": 2,
+    "jxl/modular_test": 4,
+    "jxl/roundtrip_test": 4,
+    "jxl/xorshift128plus_test": 2,
+    "jxl/ac_strategy_test": 10,  # TODO(eustas): separate heavy shard
+    "jxl/dct_test": 32,
+    "jxl/decode_test": 10,  # TODO(eustas): separate heavy shard
+    "jxl/fast_dct_test": 8,  # TODO(eustas): separate ultra-heavy shard
+    "jxl/fast_math_test": 10,  # TODO(eustas): separate heavy shard
+    "jxl/jxl_test": 10,  # TODO(eustas): separate heavy shard
+    "jxl/render_pipeline/render_pipeline_test": 10,
+}
+
+libjxl_test_timeouts = {
+    "jxl/fast_dct_test": "long",
+    "jxl/dct_test": "long",
+}
diff --git a/third-party/libjxl/libjxl/lib/lib.gni b/third-party/libjxl/libjxl/lib/lib.gni
new file mode 120000
index 0000000000..416aa0c9e4
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/lib.gni
@@ -0,0 +1 @@
+jxl_lists.bzl
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in b/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in
new file mode 100644
index 0000000000..50b937a840
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in
@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@PKGCONFIG_TARGET_LIBS@
+includedir=@PKGCONFIG_TARGET_INCLUDES@
+
+Name: libjxl_threads
+Description: JPEG XL multi-thread runner using std::threads.
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_THREADS_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl_threads
+Libs.private: -lm
+Cflags: -I${includedir}
+Cflags.private: -DJXL_THREADS_STATIC_DEFINE
diff --git a/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc b/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc
new file mode 100644
index 0000000000..db27286dea
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc
@@ -0,0 +1,195 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/resizable_parallel_runner.h>
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace jpegxl {
+namespace {
+
+// A thread pool that allows changing the number of threads it runs. It also
+// runs tasks on the calling thread, which can work better on schedulers for
+// heterogeneous architectures.
+struct ResizeableParallelRunner {
+  void SetNumThreads(size_t num) {
+    if (num > 0) {
+      num -= 1;
+    }
+    {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      num_desired_workers_ = num;
+      workers_can_proceed_.notify_all();
+    }
+    if (workers_.size() < num) {
+      for (size_t i = workers_.size(); i < num; i++) {
+        workers_.emplace_back([this, i]() { WorkerBody(i); });
+      }
+    }
+    if (workers_.size() > num) {
+      for (size_t i = num; i < workers_.size(); i++) {
+        workers_[i].join();
+      }
+      workers_.resize(num);
+    }
+  }
+
+  ~ResizeableParallelRunner() { SetNumThreads(0); }
+
+  JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init,
+                         JxlParallelRunFunction func, uint32_t start,
+                         uint32_t end) {
+    if (start + 1 == end) {
+      JxlParallelRetCode ret = init(jxl_opaque, 1);
+      if (ret != 0) return ret;
+
+      func(jxl_opaque, start, 0);
+      return ret;
+    }
+
+    size_t num_workers = std::min<size_t>(workers_.size() + 1, end - start);
+    JxlParallelRetCode ret = init(jxl_opaque, num_workers);
+    if (ret != 0) {
+      return ret;
+    }
+
+    {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      // Avoid waking up more workers than needed.
+      max_running_workers_ = end - start - 1;
+      next_task_ = start;
+      end_task_ = end;
+      func_ = func;
+      jxl_opaque_ = jxl_opaque;
+      work_available_ = true;
+      num_running_workers_++;
+      workers_can_proceed_.notify_all();
+    }
+
+    DequeueTasks(0);
+
+    while (true) {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      if (num_running_workers_ == 0) break;
+      work_done_.wait(l);
+    }
+
+    return ret;
+  }
+
+ private:
+  void WorkerBody(size_t worker_id) {
+    while (true) {
+      {
+        std::unique_lock<std::mutex> l(state_mutex_);
+        // Worker pool was reduced, resize down.
+        if (worker_id >= num_desired_workers_) {
+          return;
+        }
+        // Nothing to do this time.
+        if (!work_available_ || worker_id >= max_running_workers_) {
+          workers_can_proceed_.wait(l);
+          continue;
+        }
+        num_running_workers_++;
+      }
+      DequeueTasks(worker_id + 1);
+    }
+  }
+
+  void DequeueTasks(size_t thread_id) {
+    while (true) {
+      uint32_t task = next_task_++;
+      if (task >= end_task_) {
+        std::unique_lock<std::mutex> l(state_mutex_);
+        num_running_workers_--;
+        work_available_ = false;
+        if (num_running_workers_ == 0) {
+          work_done_.notify_all();
+        }
+        break;
+      }
+      func_(jxl_opaque_, task, thread_id);
+    }
+  }
+
+  // Checks when the worker has something to do, which can be one of:
+  // - quitting (when worker_id >= num_desired_workers_)
+  // - having work available for them (work_available_ is true and worker_id >=
+  // max_running_workers_)
+  std::condition_variable workers_can_proceed_;
+
+  // Workers are done, and the main thread can proceed (num_running_workers_ ==
+  // 0)
+  std::condition_variable work_done_;
+
+  std::vector<std::thread> workers_;
+
+  // Protects all the remaining variables, except for func_, jxl_opaque_ and
+  // end_task_ (for which only the write by the main thread is protected, and
+  // subsequent uses by workers happen-after it) and next_task_ (which is
+  // atomic).
+  std::mutex state_mutex_;
+
+  // Range of tasks still need to be done.
+  std::atomic<uint32_t> next_task_;
+  uint32_t end_task_;
+
+  // Function to run and its argument.
+  JxlParallelRunFunction func_;
+  void* jxl_opaque_;  // not owned
+
+  // Variables that control the workers:
+  // - work_available_ is set to true after a call to Run() and to false at the
+  // end of it.
+  // - num_desired_workers_ represents the number of workers that should be
+  // present.
+  // - max_running_workers_ represents the number of workers that should be
+  // executing tasks.
+  // - num_running_workers_ represents the number of workers that are executing
+  // tasks.
+  size_t num_desired_workers_ = 0;
+  size_t max_running_workers_ = 0;
+  size_t num_running_workers_ = 0;
+  bool work_available_ = false;
+};
+}  // namespace
+}  // namespace jpegxl
+
+extern "C" {
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  return static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+      ->Run(jpegxl_opaque, init, func, start_range, end_range);
+}
+
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager) {
+  return new jpegxl::ResizeableParallelRunner();
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+    void* runner_opaque, size_t num_threads) {
+  static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+      ->SetNumThreads(num_threads);
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque) {
+  delete static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque);
+}
+
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize) {
+  // ~one thread per group.
+  return std::min<uint64_t>(std::thread::hardware_concurrency(),
+                            xsize * ysize / (256 * 256));
+}
+}
diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc
new file mode 100644
index 0000000000..47b81bdb16
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/thread_parallel_runner.h>
+#include <string.h>
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+namespace {
+
+// Default JxlMemoryManager using malloc and free for the jpegxl_threads
+// library. Same as the default JxlMemoryManager for the jpegxl library
+// itself.
+
+// Default alloc and free functions.
+void* ThreadMemoryManagerDefaultAlloc(void* opaque, size_t size) {
+  return malloc(size);
+}
+
+void ThreadMemoryManagerDefaultFree(void* opaque, void* address) {
+  free(address);
+}
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+bool ThreadMemoryManagerInit(JxlMemoryManager* self,
+                             const JxlMemoryManager* memory_manager) {
+  if (memory_manager) {
+    *self = *memory_manager;
+  } else {
+    memset(self, 0, sizeof(*self));
+  }
+  if (!self->alloc != !self->free) {
+    return false;
+  }
+  if (!self->alloc) self->alloc = ThreadMemoryManagerDefaultAlloc;
+  if (!self->free) self->free = ThreadMemoryManagerDefaultFree;
+
+  return true;
+}
+
+void* ThreadMemoryManagerAlloc(const JxlMemoryManager* memory_manager,
+                               size_t size) {
+  return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+void ThreadMemoryManagerFree(const JxlMemoryManager* memory_manager,
+                             void* address) {
+  return memory_manager->free(memory_manager->opaque, address);
+}
+
+}  // namespace
+
+JxlParallelRetCode JxlThreadParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  return jpegxl::ThreadParallelRunner::Runner(
+      runner_opaque, jpegxl_opaque, init, func, start_range, end_range);
+}
+
+/// Starts the given number of worker threads and blocks until they are ready.
+/// "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+/// run on the main thread.
+void* JxlThreadParallelRunnerCreate(const JxlMemoryManager* memory_manager,
+                                    size_t num_worker_threads) {
+  JxlMemoryManager local_memory_manager;
+  if (!ThreadMemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc = ThreadMemoryManagerAlloc(&local_memory_manager,
+                                         sizeof(jpegxl::ThreadParallelRunner));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  jpegxl::ThreadParallelRunner* runner =
+      new (alloc) jpegxl::ThreadParallelRunner(num_worker_threads);
+  runner->memory_manager = local_memory_manager;
+
+  return runner;
+}
+
+void JxlThreadParallelRunnerDestroy(void* runner_opaque) {
+  jpegxl::ThreadParallelRunner* runner =
+      reinterpret_cast<jpegxl::ThreadParallelRunner*>(runner_opaque);
+  if (runner) {
+    JxlMemoryManager local_memory_manager = runner->memory_manager;
+    // Call destructor directly since custom free function is used.
+    runner->~ThreadParallelRunner();
+    ThreadMemoryManagerFree(&local_memory_manager, runner);
+  }
+}
+
+// Get default value for num_worker_threads parameter of
+// InitJxlThreadParallelRunner.
+size_t JxlThreadParallelRunnerDefaultNumWorkerThreads() {
+  return std::thread::hardware_concurrency();
+}
diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc
new file mode 100644
index 0000000000..cfc7e22f9f
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc
@@ -0,0 +1,206 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+#include <algorithm>
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif                                        // defined(*_SANITIZER)
+
+#include <jxl/thread_parallel_runner.h>
+
+namespace {
+
+// Important: JXL_ASSERT does not guarantee running the `condition` code,
+// use only for debug mode checks.
+
+#if JXL_ENABLE_ASSERT
+// Exits the program after printing a stack trace when possible.
+bool Abort() {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+  // If compiled with any sanitizer print a stack trace. This call doesn't crash
+  // the program, instead the trap below will crash it also allowing gdb to
+  // break there.
+  __sanitizer_print_stack_trace();
+#endif  // defined(*_SANITIZER)
+
+#ifdef _MSC_VER
+  __debugbreak();
+  abort();
+#else
+  __builtin_trap();
+#endif
+}
+#define JXL_ASSERT(condition) \
+  do {                        \
+    if (!(condition)) {       \
+      Abort();                \
+    }                         \
+  } while (0)
+#else
+#define JXL_ASSERT(condition) \
+  do {                        \
+  } while (0)
+#endif
+}  // namespace
+
+namespace jpegxl {
+
+// static
+JxlParallelRetCode ThreadParallelRunner::Runner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  ThreadParallelRunner* self =
+      static_cast<ThreadParallelRunner*>(runner_opaque);
+  if (start_range > end_range) return -1;
+  if (start_range == end_range) return 0;
+
+  int ret = init(jpegxl_opaque, std::max<size_t>(self->num_worker_threads_, 1));
+  if (ret != 0) return ret;
+
+  // Use a sequential run when num_worker_threads_ is zero since we have no
+  // worker threads.
+  if (self->num_worker_threads_ == 0) {
+    const size_t thread = 0;
+    for (uint32_t task = start_range; task < end_range; ++task) {
+      func(jpegxl_opaque, task, thread);
+    }
+    return 0;
+  }
+
+  if (self->depth_.fetch_add(1, std::memory_order_acq_rel) != 0) {
+    return -1;  // Must not re-enter.
+  }
+
+  const WorkerCommand worker_command =
+      (static_cast<WorkerCommand>(start_range) << 32) + end_range;
+  // Ensure the inputs do not result in a reserved command.
+  JXL_ASSERT(worker_command != kWorkerWait);
+  JXL_ASSERT(worker_command != kWorkerOnce);
+  JXL_ASSERT(worker_command != kWorkerExit);
+
+  self->data_func_ = func;
+  self->jpegxl_opaque_ = jpegxl_opaque;
+  self->num_reserved_.store(0, std::memory_order_relaxed);
+
+  self->StartWorkers(worker_command);
+  self->WorkersReadyBarrier();
+
+  if (self->depth_.fetch_add(-1, std::memory_order_acq_rel) != 1) {
+    return -1;
+  }
+  return 0;
+}
+
+// static
+void ThreadParallelRunner::RunRange(ThreadParallelRunner* self,
+                                    const WorkerCommand command,
+                                    const int thread) {
+  const uint32_t begin = command >> 32;
+  const uint32_t end = command & 0xFFFFFFFF;
+  const uint32_t num_tasks = end - begin;
+  const uint32_t num_worker_threads = self->num_worker_threads_;
+
+  // OpenMP introduced several "schedule" strategies:
+  // "single" (static assignment of exactly one chunk per thread): slower.
+  // "dynamic" (allocates k tasks at a time): competitive for well-chosen k.
+  // "guided" (allocates k tasks, decreases k): computing k = remaining/n
+  //   is faster than halving k each iteration. We prefer this strategy
+  //   because it avoids user-specified parameters.
+
+  for (;;) {
+#if 0
+      // dynamic
+      const uint32_t my_size = std::max(num_tasks / (num_worker_threads * 4), 1);
+#else
+    // guided
+    const uint32_t num_reserved =
+        self->num_reserved_.load(std::memory_order_relaxed);
+    // It is possible that more tasks are reserved than ready to run.
+    const uint32_t num_remaining =
+        num_tasks - std::min(num_reserved, num_tasks);
+    const uint32_t my_size =
+        std::max(num_remaining / (num_worker_threads * 4), 1u);
+#endif
+    const uint32_t my_begin = begin + self->num_reserved_.fetch_add(
+                                          my_size, std::memory_order_relaxed);
+    const uint32_t my_end = std::min(my_begin + my_size, begin + num_tasks);
+    // Another thread already reserved the last task.
+    if (my_begin >= my_end) {
+      break;
+    }
+    for (uint32_t task = my_begin; task < my_end; ++task) {
+      self->data_func_(self->jpegxl_opaque_, task, thread);
+    }
+  }
+}
+
+// static
+void ThreadParallelRunner::ThreadFunc(ThreadParallelRunner* self,
+                                      const int thread) {
+  // Until kWorkerExit command received:
+  for (;;) {
+    std::unique_lock<std::mutex> lock(self->mutex_);
+    // Notify main thread that this thread is ready.
+    if (++self->workers_ready_ == self->num_threads_) {
+      self->workers_ready_cv_.notify_one();
+    }
+  RESUME_WAIT:
+    // Wait for a command.
+    self->worker_start_cv_.wait(lock);
+    const WorkerCommand command = self->worker_start_command_;
+    switch (command) {
+      case kWorkerWait:    // spurious wakeup:
+        goto RESUME_WAIT;  // lock still held, avoid incrementing ready.
+      case kWorkerOnce:
+        lock.unlock();
+        self->data_func_(self->jpegxl_opaque_, thread, thread);
+        break;
+      case kWorkerExit:
+        return;  // exits thread
+      default:
+        lock.unlock();
+        RunRange(self, command, thread);
+        break;
+    }
+  }
+}
+
+ThreadParallelRunner::ThreadParallelRunner(const int num_worker_threads)
+    : num_worker_threads_(num_worker_threads),
+      num_threads_(std::max(num_worker_threads, 1)) {
+  threads_.reserve(num_worker_threads_);
+
+  // Suppress "unused-private-field" warning.
+  (void)padding1;
+  (void)padding2;
+
+  // Safely handle spurious worker wakeups.
+  worker_start_command_ = kWorkerWait;
+
+  for (uint32_t i = 0; i < num_worker_threads_; ++i) {
+    threads_.emplace_back(ThreadFunc, this, i);
+  }
+
+  if (num_worker_threads_ != 0) {
+    WorkersReadyBarrier();
+  }
+}
+
+ThreadParallelRunner::~ThreadParallelRunner() {
+  if (num_worker_threads_ != 0) {
+    StartWorkers(kWorkerExit);
+  }
+
+  for (std::thread& thread : threads_) {
+    JXL_ASSERT(thread.joinable());
+    thread.join();
+  }
+}
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h
new file mode 100644
index 0000000000..199a5f2a8b
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h
@@ -0,0 +1,166 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+
+// C++ implementation using std::thread of a ::JxlParallelRunner.
+
+// The main class in this module, ThreadParallelRunner, implements a static
+// method ThreadParallelRunner::Runner than can be passed as a
+// JxlParallelRunner when using the JPEG XL library. This uses std::thread
+// internally and related synchronization functions. The number of threads
+// created is fixed at construction time and the threads are re-used for every
+// ThreadParallelRunner::Runner call. Only one concurrent Runner() call per
+// instance is allowed at a time.
+//
+// This is a scalable, lower-overhead thread pool runner, especially suitable
+// for data-parallel computations in the fork-join model, where clients need to
+// know when all tasks have completed.
+//
+// This thread pool can efficiently load-balance millions of tasks using an
+// atomic counter, thus avoiding per-task virtual or system calls. With 48
+// hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+// 10-20x higher when using std::async, and ~200x for a queue-based thread
+// pool.
+//
+// Usage:
+//   ThreadParallelRunner runner;
+//   JxlDecode(
+//       ... , &ThreadParallelRunner::Runner, static_cast<void*>(&runner));
+
+#ifndef LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+#define LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <atomic>
+#include <condition_variable>  //NOLINT
+#include <mutex>               //NOLINT
+#include <thread>              //NOLINT
+#include <vector>
+
+namespace jpegxl {
+
+// Main helper class implementing the ::JxlParallelRunner interface.
+class ThreadParallelRunner {
+ public:
+  // ::JxlParallelRunner interface.
+  static JxlParallelRetCode Runner(void* runner_opaque, void* jpegxl_opaque,
+                                   JxlParallelRunInit init,
+                                   JxlParallelRunFunction func,
+                                   uint32_t start_range, uint32_t end_range);
+
+  // Starts the given number of worker threads and blocks until they are ready.
+  // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+  // run on the main thread.
+  explicit ThreadParallelRunner(
+      int num_worker_threads = std::thread::hardware_concurrency());
+
+  // Waits for all threads to exit.
+  ~ThreadParallelRunner();
+
+  // Returns maximum number of main/worker threads that may call Func. Useful
+  // for allocating per-thread storage.
+  size_t NumThreads() const { return num_threads_; }
+
+  // Runs func(thread, thread) on all thread(s) that may participate in Run.
+  // If NumThreads() == 0, runs on the main thread with thread == 0, otherwise
+  // concurrently called by each worker thread in [0, NumThreads()).
+  template <class Func>
+  void RunOnEachThread(const Func& func) {
+    if (num_worker_threads_ == 0) {
+      const int thread = 0;
+      func(thread, thread);
+      return;
+    }
+
+    data_func_ = reinterpret_cast<JxlParallelRunFunction>(&CallClosure<Func>);
+    jpegxl_opaque_ = const_cast<void*>(static_cast<const void*>(&func));
+    StartWorkers(kWorkerOnce);
+    WorkersReadyBarrier();
+  }
+
+  JxlMemoryManager memory_manager;
+
+ private:
+  // After construction and between calls to Run, workers are "ready", i.e.
+  // waiting on worker_start_cv_. They are "started" by sending a "command"
+  // and notifying all worker_start_cv_ waiters. (That is why all workers
+  // must be ready/waiting - otherwise, the notification will not reach all of
+  // them and the main thread waits in vain for them to report readiness.)
+  using WorkerCommand = uint64_t;
+
+  // Special values; all others encode the begin/end parameters. Note that all
+  // these are no-op ranges (begin >= end) and therefore never used to encode
+  // ranges.
+  static constexpr WorkerCommand kWorkerWait = ~1ULL;
+  static constexpr WorkerCommand kWorkerOnce = ~2ULL;
+  static constexpr WorkerCommand kWorkerExit = ~3ULL;
+
+  // Calls f(task, thread). Used for type erasure of Func arguments. The
+  // signature must match JxlParallelRunFunction, hence a void* argument.
+  template <class Closure>
+  static void CallClosure(void* f, const uint32_t task, const size_t thread) {
+    (*reinterpret_cast<const Closure*>(f))(task, thread);
+  }
+
+  void WorkersReadyBarrier() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    // Typically only a single iteration.
+    while (workers_ready_ != threads_.size()) {
+      workers_ready_cv_.wait(lock);
+    }
+    workers_ready_ = 0;
+
+    // Safely handle spurious worker wakeups.
+    worker_start_command_ = kWorkerWait;
+  }
+
+  // Precondition: all workers are ready.
+  void StartWorkers(const WorkerCommand worker_command) {
+    mutex_.lock();
+    worker_start_command_ = worker_command;
+    // Workers will need this lock, so release it before they wake up.
+    mutex_.unlock();
+    worker_start_cv_.notify_all();
+  }
+
+  // Attempts to reserve and perform some work from the global range of tasks,
+  // which is encoded within "command". Returns after all tasks are reserved.
+  static void RunRange(ThreadParallelRunner* self, const WorkerCommand command,
+                       const int thread);
+
+  static void ThreadFunc(ThreadParallelRunner* self, int thread);
+
+  // Unmodified after ctor, but cannot be const because we call thread::join().
+  std::vector<std::thread> threads_;
+
+  const uint32_t num_worker_threads_;  // == threads_.size()
+  const uint32_t num_threads_;
+
+  std::atomic<int> depth_{0};  // detects if Run is re-entered (not supported).
+
+  std::mutex mutex_;  // guards both cv and their variables.
+  std::condition_variable workers_ready_cv_;
+  uint32_t workers_ready_ = 0;
+  std::condition_variable worker_start_cv_;
+  WorkerCommand worker_start_command_;
+
+  // Written by main thread, read by workers (after mutex lock/unlock).
+  JxlParallelRunFunction data_func_;
+  void* jpegxl_opaque_;
+
+  // Updated by workers; padding avoids false sharing.
+  uint8_t padding1[64];
+  std::atomic<uint32_t> num_reserved_{0};
+  uint8_t padding2[64];
+};
+
+}  // namespace jpegxl
+
+#endif  // LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc
new file mode 100644
index 0000000000..7c8e602764
--- /dev/null
+++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc
@@ -0,0 +1,122 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+using jxl::test::ThreadPoolForTests;
+
+namespace jpegxl {
+namespace {
+
+int PopulationCount(uint64_t bits) {
+  int num_set = 0;
+  while (bits != 0) {
+    num_set += bits & 1;
+    bits >>= 1;
+  }
+  return num_set;
+}
+
+// Ensures task parameter is in bounds, every parameter is reached,
+// pool can be reused (multiple consecutive Run calls), pool can be destroyed
+// (joining with its threads), num_threads=0 works (runs on current thread).
+TEST(ThreadParallelRunnerTest, TestPool) {
+  for (int num_threads = 0; num_threads <= 18; ++num_threads) {
+    ThreadPoolForTests pool(num_threads);
+    for (int num_tasks = 0; num_tasks < 32; ++num_tasks) {
+      std::vector<int> mementos(num_tasks);
+      for (int begin = 0; begin < 32; ++begin) {
+        std::fill(mementos.begin(), mementos.end(), 0);
+        EXPECT_TRUE(RunOnPool(
+            &pool, begin, begin + num_tasks, jxl::ThreadPool::NoInit,
+            [begin, num_tasks, &mementos](const int task, const int thread) {
+              // Parameter is in the given range
+              EXPECT_GE(task, begin);
+              EXPECT_LT(task, begin + num_tasks);
+
+              // Store mementos to be sure we visited each task.
+              mementos.at(task - begin) = 1000 + task;
+            },
+            "TestPool"));
+        for (int task = begin; task < begin + num_tasks; ++task) {
+          EXPECT_EQ(1000 + task, mementos.at(task - begin));
+        }
+      }
+    }
+  }
+}
+
+// Verify "thread" parameter when processing few tasks.
+TEST(ThreadParallelRunnerTest, TestSmallAssignments) {
+  const int kMaxThreads = 8;
+  for (int num_threads = 1; num_threads <= kMaxThreads; ++num_threads) {
+    ThreadPoolForTests pool(num_threads);
+
+    // (Avoid mutex because it may perturb the worker thread scheduling)
+    std::atomic<uint64_t> id_bits{0};
+    std::atomic<int> num_calls{0};
+
+    EXPECT_TRUE(RunOnPool(
+        &pool, 0, num_threads, jxl::ThreadPool::NoInit,
+        [&num_calls, num_threads, &id_bits](const int task, const int thread) {
+          num_calls.fetch_add(1, std::memory_order_relaxed);
+
+          EXPECT_LT(thread, num_threads);
+          uint64_t bits = id_bits.load(std::memory_order_relaxed);
+          while (
+              !id_bits.compare_exchange_weak(bits, bits | (1ULL << thread))) {
+          }
+        },
+        "TestSmallAssignments"));
+
+    // Correct number of tasks.
+    EXPECT_EQ(num_threads, num_calls.load());
+
+    const int num_participants = PopulationCount(id_bits.load());
+    // Can't expect equality because other workers may have woken up too late.
+    EXPECT_LE(num_participants, num_threads);
+  }
+}
+
+struct Counter {
+  Counter() {
+    // Suppress "unused-field" warning.
+    (void)padding;
+  }
+  void Assimilate(const Counter& victim) { counter += victim.counter; }
+  int counter = 0;
+  int padding[31];
+};
+
+TEST(ThreadParallelRunnerTest, TestCounter) {
+  const int kNumThreads = 12;
+  ThreadPoolForTests pool(kNumThreads);
+  alignas(128) Counter counters[kNumThreads];
+
+  const int kNumTasks = kNumThreads * 19;
+  EXPECT_TRUE(RunOnPool(
+      &pool, 0, kNumTasks, jxl::ThreadPool::NoInit,
+      [&counters](const int task, const int thread) {
+        counters[thread].counter += task;
+      },
+      "TestCounter"));
+
+  int expected = 0;
+  for (int i = 0; i < kNumTasks; ++i) {
+    expected += i;
+  }
+
+  for (int i = 1; i < kNumThreads; ++i) {
+    counters[0].Assimilate(counters[i]);
+  }
+  EXPECT_EQ(expected, counters[0].counter);
+}
+
+}  // namespace
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/plugins/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/CMakeLists.txt
new file mode 100644
index 0000000000..bff1bff29d
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+if(NOT MSVC)
+  option(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF "Enable plugin for GdkPixbuf image loading library" ON)
+  if(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF)
+    add_subdirectory(gdk-pixbuf)
+  endif()
+endif()
+
+option(JPEGXL_ENABLE_PLUGIN_GIMP210 "Enable plugin for GIMP 2.10.x series" ON)
+if(JPEGXL_ENABLE_PLUGIN_GIMP210)
+  add_subdirectory(gimp)
+endif()
+
+option(JPEGXL_ENABLE_PLUGIN_MIME "Enable image/jxl declaration for shared-mime-info" ON)
+if(JPEGXL_ENABLE_PLUGIN_MIME)
+  add_subdirectory(mime)
+endif()
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt
new file mode 100644
index 0000000000..7b53b98c66
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt
@@ -0,0 +1,83 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig)
+pkg_check_modules(Gdk-Pixbuf IMPORTED_TARGET gdk-pixbuf-2.0>=2.36)
+
+include(GNUInstallDirs)
+
+if (NOT Gdk-Pixbuf_FOUND)
+  message(WARNING "GDK Pixbuf development libraries not found, \
+                   the Gdk-Pixbuf plugin will not be built")
+  return ()
+endif ()
+
+add_library(pixbufloader-jxl MODULE pixbufloader-jxl.c)
+
+# Mark all symbols as hidden by default. The PkgConfig::Gdk-Pixbuf dependency
+# will cause fill_info and fill_vtable entry points to be made public.
+set_target_properties(pixbufloader-jxl PROPERTIES
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN 1
+)
+
+# Note: This only needs the decoder library, but we don't install the decoder
+# shared library.
+target_link_libraries(pixbufloader-jxl jxl jxl_threads lcms2 PkgConfig::Gdk-Pixbuf)
+
+execute_process(COMMAND ${PKG_CONFIG_EXECUTABLE} gdk-pixbuf-2.0 --variable gdk_pixbuf_moduledir --define-variable=prefix=${CMAKE_INSTALL_PREFIX} OUTPUT_VARIABLE GDK_PIXBUF_MODULEDIR OUTPUT_STRIP_TRAILING_WHITESPACE)
+install(TARGETS pixbufloader-jxl DESTINATION "${GDK_PIXBUF_MODULEDIR}")
+
+# Instead of the following, we might instead add the
+# mime type image/jxl to
+# /usr/share/thumbnailers/gdk-pixbuf-thumbnailer.thumbnailer
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/jxl.thumbnailer DESTINATION "${CMAKE_INSTALL_DATADIR}/thumbnailers/")
+
+if(BUILD_TESTING AND NOT CMAKE_CROSSCOMPILING)
+  pkg_check_modules(Gdk IMPORTED_TARGET gdk-2.0)
+  if (Gdk_FOUND)
+    # Test for loading a .jxl file using the pixbufloader library via GDK. This
+    # requires to have the image/jxl mime type and loader library configured,
+    # which we do in a fake environment in the CMAKE_CURRENT_BINARY_DIR.
+    add_executable(pixbufloader_test pixbufloader_test.cc)
+    target_link_libraries(pixbufloader_test PkgConfig::Gdk)
+
+    # Create a mime cache for test.
+    add_custom_command(
+      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache"
+      COMMAND env XDG_DATA_HOME=${CMAKE_CURRENT_BINARY_DIR}
+        xdg-mime install --novendor
+        "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml"
+      DEPENDS "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml"
+    )
+    add_custom_target(pixbufloader_test_mime
+      DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache"
+    )
+    add_dependencies(pixbufloader_test pixbufloader_test_mime)
+
+    # Use a fake X server to run the test if xvfb is installed.
+    find_program (XVFB_PROGRAM xvfb-run)
+    if(XVFB_PROGRAM)
+      set(XVFB_PROGRAM_PREFIX "${XVFB_PROGRAM};-a")
+    else()
+      set(XVFB_PROGRAM_PREFIX "")
+    endif()
+
+    # libX11.so and libgdk-x11-2.0.so are not compiled with MSAN -> report
+    # use-of-uninitialized-value for string some internal string value.
+    # TODO(eustas): investigate direct memory leak (32 bytes).
+    if (NOT (SANITIZER STREQUAL "msan") AND NOT (SANITIZER STREQUAL "asan"))
+      add_test(
+        NAME pixbufloader_test_jxl
+        COMMAND
+          ${XVFB_PROGRAM_PREFIX} $<TARGET_FILE:pixbufloader_test>
+          "${CMAKE_CURRENT_SOURCE_DIR}/loaders_test.cache"
+          "${CMAKE_SOURCE_DIR}/testdata/jxl/blending/cropped_traffic_light.jxl"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      )
+      set_tests_properties(pixbufloader_test_jxl PROPERTIES SKIP_RETURN_CODE 254)
+    endif()
+  endif()  # Gdk_FOUND
+endif()  # BUILD_TESTING
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md
new file mode 100644
index 0000000000..185919436f
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md
@@ -0,0 +1,50 @@
+## JPEG XL GDK Pixbuf
+
+
+The plugin may already have been installed when following the instructions from the
+[Installing section of BUILDING.md](../../BUILDING.md#installing), in which case it should
+already be in the correct place, e.g.
+
+```/usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so```
+
+Otherwise we can copy it manually:
+
+```bash
+sudo cp $your_build_directory/plugins/gdk-pixbuf/libpixbufloader-jxl.so /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so
+```
+
+
+Then we need to update the cache, for example with:
+
+```bash
+sudo /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/gdk-pixbuf-query-loaders --update-cache
+```
+
+In order to get thumbnails with this, first one has to add the jxl MIME type, see
+[../mime/README.md](../mime/README.md).
+
+Ensure that the thumbnailer file is installed in the correct place,
+`/usr/share/thumbnailers/jxl.thumbnailer` or `/usr/local/share/thumbnailers/jxl.thumbnailer`.
+
+The file should have been copied automatically when following the instructions
+in the [Installing section of README.md](../../README.md#installing), but
+otherwise it can be copied manually:
+
+```bash
+sudo cp plugins/gdk-pixbuf/jxl.thumbnailer /usr/local/share/thumbnailers/jxl.thumbnailer
+```
+
+Update the Mime database with
+```bash
+update-mime --local
+```
+or
+```bash
+sudo update-desktop-database
+```
+
+Then possibly delete the thumbnail cache with
+```bash
+rm -r ~/.cache/thumbnails
+```
+and restart the application displaying thumbnails, e.g. `nautilus -q` to display thumbnails.
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer
new file mode 100644
index 0000000000..1bcaab61fc
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer
@@ -0,0 +1,4 @@
+[Thumbnailer Entry]
+TryExec=/usr/bin/gdk-pixbuf-thumbnailer
+Exec=/usr/bin/gdk-pixbuf-thumbnailer -s %s %u %o
+MimeType=image/jxl;
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache
new file mode 100644
index 0000000000..95c62c8fc3
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache
@@ -0,0 +1,16 @@
+# GdkPixbuf Image Loader Modules file for testing
+# Automatically generated file, do not edit
+# Created by gdk-pixbuf-query-loaders from gdk-pixbuf-2.42.2
+#
+# Generated with:
+#  GDK_PIXBUF_MODULEDIR=`pwd`/build/plugins/gdk-pixbuf/ gdk-pixbuf-query-loaders
+#
+# Modified to use the library from the current working directory at runtime.
+"./libpixbufloader-jxl.so"
+"jxl" 4 "gdk-pixbuf" "JPEG XL image" "BSD-3"
+"image/jxl" ""
+"jxl" ""
+"\377\n" "  " 100
+"...\fJXL \r\n\207\n" "zzz         " 100
+
+
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c
new file mode 100644
index 0000000000..28eb140da7
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c
@@ -0,0 +1,814 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/codestream_header.h>
+#include <jxl/decode.h>
+#include <jxl/encode.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/types.h>
+
+#include "lcms2.h"
+
+#define GDK_PIXBUF_ENABLE_BACKEND
+#include <gdk-pixbuf/gdk-pixbuf.h>
+#undef GDK_PIXBUF_ENABLE_BACKEND
+
+G_BEGIN_DECLS
+
+// Information about a single frame.
+typedef struct {
+  uint64_t duration_ms;
+  GdkPixbuf *data;
+  gboolean decoded;
+} GdkPixbufJxlAnimationFrame;
+
+// Represent a whole JPEG XL animation; all its fields are owned; as a GObject,
+// the Animation struct itself is reference counted (as are the GdkPixbufs for
+// individual frames).
+struct _GdkPixbufJxlAnimation {
+  GdkPixbufAnimation parent_instance;
+
+  // GDK interface implementation callbacks.
+  GdkPixbufModuleSizeFunc image_size_callback;
+  GdkPixbufModulePreparedFunc pixbuf_prepared_callback;
+  GdkPixbufModuleUpdatedFunc area_updated_callback;
+  gpointer user_data;
+
+  // All frames known so far; a frame is added when the JXL_DEC_FRAME event is
+  // received from the decoder; initially frame.decoded is FALSE, until
+  // the JXL_DEC_IMAGE event is received.
+  GArray *frames;
+
+  // JPEG XL decoder and related structures.
+  JxlParallelRunner *parallel_runner;
+  JxlDecoder *decoder;
+  JxlPixelFormat pixel_format;
+
+  // Decoding is `done` when JXL_DEC_SUCCESS is received; calling
+  // load_increment afterwards gives an error.
+  gboolean done;
+
+  // Image information.
+  size_t xsize;
+  size_t ysize;
+  gboolean alpha_premultiplied;
+  gboolean has_animation;
+  gboolean has_alpha;
+  uint64_t total_duration_ms;
+  uint64_t tick_duration_us;
+  uint64_t repetition_count;  // 0 = loop forever
+
+  gpointer icc_buff;
+  cmsContext context;
+  cmsHPROFILE profile, srgb;
+  cmsHTRANSFORM transform;
+};
+
+#define GDK_TYPE_PIXBUF_JXL_ANIMATION (gdk_pixbuf_jxl_animation_get_type())
+G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation, GDK,
+                     JXL_ANIMATION, GdkPixbufAnimation);
+
+G_DEFINE_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation,
+              GDK_TYPE_PIXBUF_ANIMATION);
+
+// Iterator to a given point in time in the animation; contains a pointer to the
+// full animation.
+struct _GdkPixbufJxlAnimationIter {
+  GdkPixbufAnimationIter parent_instance;
+  GdkPixbufJxlAnimation *animation;
+  size_t current_frame;
+  uint64_t time_offset;
+};
+
+#define GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER \
+  (gdk_pixbuf_jxl_animation_iter_get_type())
+G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter,
+                     GDK, JXL_ANIMATION_ITER, GdkPixbufAnimationIter);
+G_DEFINE_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter,
+              GDK_TYPE_PIXBUF_ANIMATION_ITER);
+
+static void gdk_pixbuf_jxl_animation_init(GdkPixbufJxlAnimation *obj) {
+  // Suppress "unused function" warnings.
+  (void)glib_autoptr_cleanup_GdkPixbufJxlAnimation;
+  (void)GDK_JXL_ANIMATION;
+  (void)GDK_IS_JXL_ANIMATION;
+}
+
+static gboolean gdk_pixbuf_jxl_animation_is_static_image(
+    GdkPixbufAnimation *anim) {
+  GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+  return !jxl_anim->has_animation;
+}
+
+static GdkPixbuf *gdk_pixbuf_jxl_animation_get_static_image(
+    GdkPixbufAnimation *anim) {
+  GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+  if (jxl_anim->frames == NULL || jxl_anim->frames->len == 0) return NULL;
+  GdkPixbufJxlAnimationFrame *frame =
+      &g_array_index(jxl_anim->frames, GdkPixbufJxlAnimationFrame, 0);
+  return frame->decoded ? frame->data : NULL;
+}
+
+static void gdk_pixbuf_jxl_animation_get_size(GdkPixbufAnimation *anim,
+                                              int *width, int *height) {
+  GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+  if (width) *width = jxl_anim->xsize;
+  if (height) *height = jxl_anim->ysize;
+}
+
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+static gboolean gdk_pixbuf_jxl_animation_iter_advance(
+    GdkPixbufAnimationIter *iter, const GTimeVal *current_time);
+
+static GdkPixbufAnimationIter *gdk_pixbuf_jxl_animation_get_iter(
+    GdkPixbufAnimation *anim, const GTimeVal *start_time) {
+  GdkPixbufJxlAnimationIter *iter =
+      g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER, NULL);
+  iter->animation = (GdkPixbufJxlAnimation *)anim;
+  iter->time_offset = start_time->tv_sec * 1000ULL + start_time->tv_usec / 1000;
+  g_object_ref(iter->animation);
+  gdk_pixbuf_jxl_animation_iter_advance((GdkPixbufAnimationIter *)iter,
+                                        start_time);
+  return (GdkPixbufAnimationIter *)iter;
+}
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+static void gdk_pixbuf_jxl_animation_finalize(GObject *obj) {
+  GdkPixbufJxlAnimation *decoder_state = (GdkPixbufJxlAnimation *)obj;
+  if (decoder_state->frames != NULL) {
+    for (size_t i = 0; i < decoder_state->frames->len; i++) {
+      g_object_unref(
+          g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, i)
+              .data);
+    }
+    g_array_free(decoder_state->frames, /*free_segment=*/TRUE);
+  }
+  JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner);
+  JxlDecoderDestroy(decoder_state->decoder);
+  cmsDeleteTransform(decoder_state->transform);
+  cmsCloseProfile(decoder_state->srgb);
+  cmsCloseProfile(decoder_state->profile);
+  cmsDeleteContext(decoder_state->context);
+  g_free(decoder_state->icc_buff);
+}
+
+static void gdk_pixbuf_jxl_animation_class_init(
+    GdkPixbufJxlAnimationClass *klass) {
+  G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_finalize;
+  klass->parent_class.is_static_image =
+      gdk_pixbuf_jxl_animation_is_static_image;
+  klass->parent_class.get_static_image =
+      gdk_pixbuf_jxl_animation_get_static_image;
+  klass->parent_class.get_size = gdk_pixbuf_jxl_animation_get_size;
+  klass->parent_class.get_iter = gdk_pixbuf_jxl_animation_get_iter;
+}
+
+static void gdk_pixbuf_jxl_animation_iter_init(GdkPixbufJxlAnimationIter *obj) {
+  (void)glib_autoptr_cleanup_GdkPixbufJxlAnimationIter;
+  (void)GDK_JXL_ANIMATION_ITER;
+  (void)GDK_IS_JXL_ANIMATION_ITER;
+}
+
+static int gdk_pixbuf_jxl_animation_iter_get_delay_time(
+    GdkPixbufAnimationIter *iter) {
+  GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+  if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+    return 0;
+  }
+  return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+                       jxl_iter->current_frame)
+      .duration_ms;
+}
+
+static GdkPixbuf *gdk_pixbuf_jxl_animation_iter_get_pixbuf(
+    GdkPixbufAnimationIter *iter) {
+  GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+  if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+    return NULL;
+  }
+  return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+                       jxl_iter->current_frame)
+      .data;
+}
+
+static gboolean gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame(
+    GdkPixbufAnimationIter *iter) {
+  GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+  if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+    return TRUE;
+  }
+  return !g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+                        jxl_iter->current_frame)
+              .decoded;
+}
+
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+static gboolean gdk_pixbuf_jxl_animation_iter_advance(
+    GdkPixbufAnimationIter *iter, const GTimeVal *current_time) {
+  GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+  size_t old_frame = jxl_iter->current_frame;
+
+  uint64_t current_time_ms = current_time->tv_sec * 1000ULL +
+                             current_time->tv_usec / 1000 -
+                             jxl_iter->time_offset;
+
+  if (jxl_iter->animation->frames->len == 0) {
+    jxl_iter->current_frame = 0;
+  } else if (!jxl_iter->animation->done &&
+             current_time_ms >= jxl_iter->animation->total_duration_ms) {
+    jxl_iter->current_frame = jxl_iter->animation->frames->len - 1;
+  } else if (jxl_iter->animation->repetition_count != 0 &&
+             current_time_ms > jxl_iter->animation->repetition_count *
+                                   jxl_iter->animation->total_duration_ms) {
+    jxl_iter->current_frame = jxl_iter->animation->frames->len - 1;
+  } else {
+    uint64_t total_duration_ms = jxl_iter->animation->total_duration_ms;
+    // Guard against divide-by-0 in malicious files.
+    if (total_duration_ms == 0) total_duration_ms = 1;
+    uint64_t loop_offset = current_time_ms % total_duration_ms;
+    jxl_iter->current_frame = 0;
+    while (TRUE) {
+      uint64_t duration =
+          g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+                        jxl_iter->current_frame)
+              .duration_ms;
+      if (duration >= loop_offset) {
+        break;
+      }
+      loop_offset -= duration;
+      jxl_iter->current_frame++;
+    }
+  }
+
+  return old_frame != jxl_iter->current_frame;
+}
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+static void gdk_pixbuf_jxl_animation_iter_finalize(GObject *obj) {
+  GdkPixbufJxlAnimationIter *iter = (GdkPixbufJxlAnimationIter *)obj;
+  g_object_unref(iter->animation);
+}
+
+static void gdk_pixbuf_jxl_animation_iter_class_init(
+    GdkPixbufJxlAnimationIterClass *klass) {
+  G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_iter_finalize;
+  klass->parent_class.get_delay_time =
+      gdk_pixbuf_jxl_animation_iter_get_delay_time;
+  klass->parent_class.get_pixbuf = gdk_pixbuf_jxl_animation_iter_get_pixbuf;
+  klass->parent_class.on_currently_loading_frame =
+      gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame;
+  klass->parent_class.advance = gdk_pixbuf_jxl_animation_iter_advance;
+}
+
+G_END_DECLS
+
+static gpointer begin_load(GdkPixbufModuleSizeFunc size_func,
+                           GdkPixbufModulePreparedFunc prepare_func,
+                           GdkPixbufModuleUpdatedFunc update_func,
+                           gpointer user_data, GError **error) {
+  GdkPixbufJxlAnimation *decoder_state =
+      g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION, NULL);
+  if (decoder_state == NULL) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the animation state failed");
+    return NULL;
+  }
+  decoder_state->image_size_callback = size_func;
+  decoder_state->pixbuf_prepared_callback = prepare_func;
+  decoder_state->area_updated_callback = update_func;
+  decoder_state->user_data = user_data;
+  decoder_state->frames =
+      g_array_new(/*zero_terminated=*/FALSE, /*clear_=*/TRUE,
+                  sizeof(GdkPixbufJxlAnimationFrame));
+
+  if (decoder_state->frames == NULL) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the frame array failed");
+    goto cleanup;
+  }
+
+  if (!(decoder_state->parallel_runner =
+            JxlResizableParallelRunnerCreate(NULL))) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the JXL parallel runner failed");
+    goto cleanup;
+  }
+
+  if (!(decoder_state->decoder = JxlDecoderCreate(NULL))) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the JXL decoder failed");
+    goto cleanup;
+  }
+
+  JxlDecoderStatus status;
+
+  if ((status = JxlDecoderSetParallelRunner(
+           decoder_state->decoder, JxlResizableParallelRunner,
+           decoder_state->parallel_runner)) != JXL_DEC_SUCCESS) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlDecoderSetParallelRunner failed: %x", status);
+    goto cleanup;
+  }
+  if ((status = JxlDecoderSubscribeEvents(
+           decoder_state->decoder, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+                                       JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME)) !=
+      JXL_DEC_SUCCESS) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlDecoderSubscribeEvents failed: %x", status);
+    goto cleanup;
+  }
+
+  decoder_state->pixel_format.data_type = JXL_TYPE_FLOAT;
+  decoder_state->pixel_format.endianness = JXL_NATIVE_ENDIAN;
+
+  return decoder_state;
+cleanup:
+  JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner);
+  JxlDecoderDestroy(decoder_state->decoder);
+  g_object_unref(decoder_state);
+  return NULL;
+}
+
+static gboolean stop_load(gpointer context, GError **error) {
+  g_object_unref(context);
+  return TRUE;
+}
+
+static void draw_pixels(void *context, size_t x, size_t y, size_t num_pixels,
+                        const void *pixels) {
+  GdkPixbufJxlAnimation *decoder_state = context;
+
+  GdkPixbuf *output =
+      g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+                    decoder_state->frames->len - 1)
+          .data;
+
+  guchar *dst = gdk_pixbuf_get_pixels(output) +
+                decoder_state->pixel_format.num_channels * x +
+                gdk_pixbuf_get_rowstride(output) * y;
+
+  cmsDoTransform(decoder_state->transform, pixels, dst, num_pixels);
+}
+
+static gboolean load_increment(gpointer context, const guchar *buf, guint size,
+                               GError **error) {
+  GdkPixbufJxlAnimation *decoder_state = context;
+  if (decoder_state->done == TRUE) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JXL decoder load_increment called after end of file");
+    return FALSE;
+  }
+
+  JxlDecoderStatus status;
+
+  if ((status = JxlDecoderSetInput(decoder_state->decoder, buf, size)) !=
+      JXL_DEC_SUCCESS) {
+    // Should never happen if things are done properly.
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JXL decoder logic error: %x", status);
+    return FALSE;
+  }
+
+  for (;;) {
+    status = JxlDecoderProcessInput(decoder_state->decoder);
+    switch (status) {
+      case JXL_DEC_NEED_MORE_INPUT: {
+        JxlDecoderReleaseInput(decoder_state->decoder);
+        return TRUE;
+      }
+
+      case JXL_DEC_BASIC_INFO: {
+        JxlBasicInfo info;
+        if (JxlDecoderGetBasicInfo(decoder_state->decoder, &info) !=
+            JXL_DEC_SUCCESS) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "JXLDecoderGetBasicInfo failed");
+          return FALSE;
+        }
+        decoder_state->pixel_format.num_channels = info.alpha_bits > 0 ? 4 : 3;
+        decoder_state->alpha_premultiplied = info.alpha_premultiplied;
+        decoder_state->xsize = info.xsize;
+        decoder_state->ysize = info.ysize;
+        decoder_state->has_animation = info.have_animation;
+        decoder_state->has_alpha = info.alpha_bits > 0;
+        if (info.have_animation) {
+          decoder_state->repetition_count = info.animation.num_loops;
+          decoder_state->tick_duration_us = 1000000ULL *
+                                            info.animation.tps_denominator /
+                                            info.animation.tps_numerator;
+        }
+        gint width = info.xsize;
+        gint height = info.ysize;
+        if (decoder_state->image_size_callback) {
+          decoder_state->image_size_callback(&width, &height,
+                                             decoder_state->user_data);
+        }
+
+        // GDK convention for signaling being interested only in the basic info.
+        if (width == 0 || height == 0) {
+          decoder_state->done = TRUE;
+          return TRUE;
+        }
+
+        // Set an appropriate number of threads for the image size.
+        JxlResizableParallelRunnerSetThreads(
+            decoder_state->parallel_runner,
+            JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+        break;
+      }
+
+      case JXL_DEC_COLOR_ENCODING: {
+        // Get the ICC color profile of the pixel data
+        size_t icc_size;
+        if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(
+                                   decoder_state->decoder,
+                                   JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "JxlDecoderGetICCProfileSize failed");
+          return FALSE;
+        }
+        if (!(decoder_state->icc_buff = g_malloc(icc_size))) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Allocating ICC profile failed");
+          return FALSE;
+        }
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetColorAsICCProfile(decoder_state->decoder,
+                                           JXL_COLOR_PROFILE_TARGET_DATA,
+                                           decoder_state->icc_buff, icc_size)) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "JxlDecoderGetColorAsICCProfile failed");
+          return FALSE;
+        }
+        decoder_state->context = cmsCreateContext(NULL, NULL);
+        if (!decoder_state->context) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Failed to create LCMS2 context");
+          return FALSE;
+        }
+        decoder_state->profile = cmsOpenProfileFromMemTHR(
+            decoder_state->context, decoder_state->icc_buff, icc_size);
+        if (!decoder_state->profile) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Invalid ICC profile from JXL image decoder");
+          return FALSE;
+        }
+        decoder_state->srgb = cmsCreate_sRGBProfileTHR(decoder_state->context);
+        if (!decoder_state->srgb) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Failed to create sRGB profile");
+          return FALSE;
+        }
+        decoder_state->transform = cmsCreateTransformTHR(
+            decoder_state->context, decoder_state->profile,
+            decoder_state->has_alpha ? TYPE_RGBA_FLT : TYPE_RGB_FLT,
+            decoder_state->srgb,
+            decoder_state->has_alpha ? TYPE_RGBA_8 : TYPE_RGB_8,
+            INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA);
+        if (!decoder_state->transform) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Failed to create LCMS2 color transform");
+          return FALSE;
+        }
+
+        break;
+      }
+
+      case JXL_DEC_FRAME: {
+        // TODO(veluca): support rescaling.
+        JxlFrameHeader frame_header;
+        if (JxlDecoderGetFrameHeader(decoder_state->decoder, &frame_header) !=
+            JXL_DEC_SUCCESS) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "Failed to retrieve frame info");
+          return FALSE;
+        }
+
+        {
+          GdkPixbufJxlAnimationFrame frame;
+          frame.decoded = FALSE;
+          frame.duration_ms =
+              frame_header.duration * decoder_state->tick_duration_us / 1000;
+          decoder_state->total_duration_ms += frame.duration_ms;
+          frame.data =
+              gdk_pixbuf_new(GDK_COLORSPACE_RGB, decoder_state->has_alpha,
+                             /*bits_per_sample=*/8, decoder_state->xsize,
+                             decoder_state->ysize);
+          if (frame.data == NULL) {
+            g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                        "Failed to allocate output pixel buffer");
+            return FALSE;
+          }
+          decoder_state->pixel_format.align =
+              gdk_pixbuf_get_rowstride(frame.data);
+          g_array_append_val(decoder_state->frames, frame);
+        }
+        if (decoder_state->pixbuf_prepared_callback &&
+            decoder_state->frames->len == 1) {
+          decoder_state->pixbuf_prepared_callback(
+              g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+                            0)
+                  .data,
+              decoder_state->has_animation ? (GdkPixbufAnimation *)decoder_state
+                                           : NULL,
+              decoder_state->user_data);
+        }
+        break;
+      }
+
+      case JXL_DEC_NEED_IMAGE_OUT_BUFFER: {
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetImageOutCallback(decoder_state->decoder,
+                                          &decoder_state->pixel_format,
+                                          draw_pixels, decoder_state)) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                      "JxlDecoderSetImageOutCallback failed");
+          return FALSE;
+        }
+        break;
+      }
+
+      case JXL_DEC_FULL_IMAGE: {
+        // TODO(veluca): consider doing partial updates.
+        if (decoder_state->area_updated_callback) {
+          GdkPixbuf *output = g_array_index(decoder_state->frames,
+                                            GdkPixbufJxlAnimationFrame, 0)
+                                  .data;
+          decoder_state->area_updated_callback(
+              output, 0, 0, gdk_pixbuf_get_width(output),
+              gdk_pixbuf_get_height(output), decoder_state->user_data);
+        }
+        g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+                      decoder_state->frames->len - 1)
+            .decoded = TRUE;
+        break;
+      }
+
+      case JXL_DEC_SUCCESS: {
+        decoder_state->done = TRUE;
+        return TRUE;
+      }
+
+      default: {
+        g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                    "Unexpected JxlDecoderProcessInput return code: %x",
+                    status);
+        return FALSE;
+      }
+    }
+  }
+  return TRUE;
+}
+
+static gboolean jxl_is_save_option_supported(const gchar *option_key) {
+  if (g_strcmp0(option_key, "quality") == 0) {
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+static gboolean jxl_image_saver(FILE *f, GdkPixbuf *pixbuf, gchar **keys,
+                                gchar **values, GError **error) {
+  long quality = 90; /* default; must be between 0 and 100 */
+  double distance;
+  gboolean save_alpha;
+  JxlEncoder *encoder;
+  void *parallel_runner;
+  JxlEncoderFrameSettings *frame_settings;
+  JxlBasicInfo output_info;
+  JxlPixelFormat pixel_format;
+  JxlColorEncoding color_profile;
+  JxlEncoderStatus status;
+
+  GByteArray *compressed;
+  size_t offset = 0;
+  uint8_t *next_out;
+  size_t avail_out;
+
+  if (f == NULL || pixbuf == NULL) {
+    return FALSE;
+  }
+
+  if (keys && *keys) {
+    gchar **kiter = keys;
+    gchar **viter = values;
+
+    while (*kiter) {
+      if (strcmp(*kiter, "quality") == 0) {
+        char *endptr = NULL;
+        quality = strtol(*viter, &endptr, 10);
+
+        if (endptr == *viter) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION,
+                      "JXL quality must be a value between 0 and 100; value "
+                      "\"%s\" could not be parsed.",
+                      *viter);
+
+          return FALSE;
+        }
+
+        if (quality < 0 || quality > 100) {
+          g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION,
+                      "JXL quality must be a value between 0 and 100; value "
+                      "\"%ld\" is not allowed.",
+                      quality);
+
+          return FALSE;
+        }
+      } else {
+        g_warning("Unrecognized parameter (%s) passed to JXL saver.", *kiter);
+      }
+
+      ++kiter;
+      ++viter;
+    }
+  }
+
+  if (gdk_pixbuf_get_bits_per_sample(pixbuf) != 8) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+                "Sorry, only 8bit images are supported by this JXL saver");
+    return FALSE;
+  }
+
+  JxlEncoderInitBasicInfo(&output_info);
+  output_info.have_container = JXL_FALSE;
+  output_info.xsize = gdk_pixbuf_get_width(pixbuf);
+  output_info.ysize = gdk_pixbuf_get_height(pixbuf);
+  output_info.bits_per_sample = 8;
+  output_info.orientation = JXL_ORIENT_IDENTITY;
+  output_info.num_color_channels = 3;
+
+  if (output_info.xsize == 0 || output_info.ysize == 0) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_CORRUPT_IMAGE,
+                "Empty image, nothing to save");
+    return FALSE;
+  }
+
+  save_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
+
+  pixel_format.data_type = JXL_TYPE_UINT8;
+  pixel_format.endianness = JXL_NATIVE_ENDIAN;
+  pixel_format.align = gdk_pixbuf_get_rowstride(pixbuf);
+
+  if (save_alpha) {
+    if (gdk_pixbuf_get_n_channels(pixbuf) != 4) {
+      g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+                  "Unsupported number of channels");
+      return FALSE;
+    }
+
+    output_info.num_extra_channels = 1;
+    output_info.alpha_bits = 8;
+    pixel_format.num_channels = 4;
+  } else {
+    if (gdk_pixbuf_get_n_channels(pixbuf) != 3) {
+      g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+                  "Unsupported number of channels");
+      return FALSE;
+    }
+
+    output_info.num_extra_channels = 0;
+    output_info.alpha_bits = 0;
+    pixel_format.num_channels = 3;
+  }
+
+  encoder = JxlEncoderCreate(NULL);
+  if (!encoder) {
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the JXL encoder failed");
+    return FALSE;
+  }
+
+  parallel_runner = JxlResizableParallelRunnerCreate(NULL);
+  if (!parallel_runner) {
+    JxlEncoderDestroy(encoder);
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "Creation of the JXL decoder failed");
+    return FALSE;
+  }
+
+  JxlResizableParallelRunnerSetThreads(
+      parallel_runner, JxlResizableParallelRunnerSuggestThreads(
+                           output_info.xsize, output_info.ysize));
+
+  status = JxlEncoderSetParallelRunner(encoder, JxlResizableParallelRunner,
+                                       parallel_runner);
+  if (status != JXL_ENC_SUCCESS) {
+    JxlResizableParallelRunnerDestroy(parallel_runner);
+    JxlEncoderDestroy(encoder);
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlDecoderSetParallelRunner failed: %x", status);
+    return FALSE;
+  }
+
+  if (quality > 99) {
+    output_info.uses_original_profile = JXL_TRUE;
+    distance = 0;
+  } else {
+    output_info.uses_original_profile = JXL_FALSE;
+    if (quality >= 30) {
+      distance = 0.1 + (100 - quality) * 0.09;
+    } else {
+      distance =
+          53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0;
+    }
+  }
+
+  status = JxlEncoderSetBasicInfo(encoder, &output_info);
+  if (status != JXL_ENC_SUCCESS) {
+    JxlResizableParallelRunnerDestroy(parallel_runner);
+    JxlEncoderDestroy(encoder);
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlEncoderSetBasicInfo failed: %x", status);
+    return FALSE;
+  }
+
+  JxlColorEncodingSetToSRGB(&color_profile, JXL_FALSE);
+  status = JxlEncoderSetColorEncoding(encoder, &color_profile);
+  if (status != JXL_ENC_SUCCESS) {
+    JxlResizableParallelRunnerDestroy(parallel_runner);
+    JxlEncoderDestroy(encoder);
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlEncoderSetColorEncoding failed: %x", status);
+    return FALSE;
+  }
+
+  frame_settings = JxlEncoderFrameSettingsCreate(encoder, NULL);
+  JxlEncoderSetFrameDistance(frame_settings, distance);
+  JxlEncoderSetFrameLossless(frame_settings, output_info.uses_original_profile);
+
+  status = JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+                                   gdk_pixbuf_read_pixels(pixbuf),
+                                   gdk_pixbuf_get_byte_length(pixbuf));
+  if (status != JXL_ENC_SUCCESS) {
+    JxlResizableParallelRunnerDestroy(parallel_runner);
+    JxlEncoderDestroy(encoder);
+    g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+                "JxlEncoderAddImageFrame failed: %x", status);
+    return FALSE;
+  }
+
+  JxlEncoderCloseInput(encoder);
+
+  compressed = g_byte_array_sized_new(4096);
+  g_byte_array_set_size(compressed, 4096);
+  do {
+    next_out = compressed->data + offset;
+    avail_out = compressed->len - offset;
+    status = JxlEncoderProcessOutput(encoder, &next_out, &avail_out);
+
+    if (status == JXL_ENC_NEED_MORE_OUTPUT) {
+      offset = next_out - compressed->data;
+      g_byte_array_set_size(compressed, compressed->len * 2);
+    } else if (status == JXL_ENC_ERROR) {
+      JxlResizableParallelRunnerDestroy(parallel_runner);
+      JxlEncoderDestroy(encoder);
+      g_set_error(error, G_FILE_ERROR, 0, "JxlEncoderProcessOutput failed: %x",
+                  status);
+      return FALSE;
+    }
+  } while (status != JXL_ENC_SUCCESS);
+
+  JxlResizableParallelRunnerDestroy(parallel_runner);
+  JxlEncoderDestroy(encoder);
+
+  g_byte_array_set_size(compressed, next_out - compressed->data);
+  if (compressed->len > 0) {
+    fwrite(compressed->data, 1, compressed->len, f);
+    g_byte_array_free(compressed, TRUE);
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+void fill_vtable(GdkPixbufModule *module) {
+  module->begin_load = begin_load;
+  module->stop_load = stop_load;
+  module->load_increment = load_increment;
+  module->is_save_option_supported = jxl_is_save_option_supported;
+  module->save = jxl_image_saver;
+}
+
+void fill_info(GdkPixbufFormat *info) {
+  static GdkPixbufModulePattern signature[] = {
+      {"\xFF\x0A", "  ", 100},
+      {"...\x0CJXL \x0D\x0A\x87\x0A", "zzz         ", 100},
+      {NULL, NULL, 0},
+  };
+
+  static gchar *mime_types[] = {"image/jxl", NULL};
+
+  static gchar *extensions[] = {"jxl", NULL};
+
+  info->name = "jxl";
+  info->signature = signature;
+  info->description = "JPEG XL image";
+  info->mime_types = mime_types;
+  info->extensions = extensions;
+  info->flags = GDK_PIXBUF_FORMAT_WRITABLE | GDK_PIXBUF_FORMAT_THREADSAFE;
+  info->license = "BSD-3";
+}
diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc
new file mode 100644
index 0000000000..5e5642d491
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <gdk-pixbuf/gdk-pixbuf.h>
+#include <gdk/gdk.h>
+#include <glib.h>
+#include <stdlib.h>
+
+int main(int argc, char* argv[]) {
+  if (argc != 3) {
+    fprintf(stderr, "Usage: %s <loaders.cache> <image.jxl>\n", argv[0]);
+    return 1;
+  }
+
+  const char* loaders_cache = argv[1];
+  const char* filename = argv[2];
+  setenv("GDK_PIXBUF_MODULE_FILE", loaders_cache, true);
+
+  // XDG_DATA_HOME is the path where we look for the mime cache.
+  // XDG_DATA_DIRS directories are used in addition to XDG_DATA_HOME.
+  setenv("XDG_DATA_HOME", ".", true);
+  setenv("XDG_DATA_DIRS", "", true);
+
+  if (!gdk_init_check(nullptr, nullptr)) {
+    fprintf(stderr, "This test requires a DISPLAY\n");
+    // Signals ctest that we should mark this test as skipped.
+    return 254;
+  }
+  GError* error = nullptr;
+  GdkPixbuf* pb = gdk_pixbuf_new_from_file(filename, &error);
+  if (pb != nullptr) {
+    g_object_unref(pb);
+    return 0;
+  } else {
+    fprintf(stderr, "Error loading file: %s\n", filename);
+    g_assert_no_error(error);
+    return 1;
+  }
+}
diff --git a/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt
new file mode 100644
index 0000000000..f0a49005ed
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig)
+pkg_check_modules(Gimp IMPORTED_TARGET gimp-2.0>=2.10 gimpui-2.0>=2.10)
+
+if (NOT Gimp_FOUND)
+  message(WARNING "Gimp development libraries not found, the Gimp plugin will not be built")
+  return ()
+endif ()
+
+add_executable(file-jxl WIN32
+  common.h
+  common.cc
+  file-jxl-load.cc
+  file-jxl-load.h
+  file-jxl-save.cc
+  file-jxl-save.h
+  file-jxl.cc)
+target_link_libraries(file-jxl jxl jxl_threads PkgConfig::Gimp)
+
+target_include_directories(file-jxl PUBLIC
+    ${PROJECT_SOURCE_DIR})  # for plugins/gimp absolute paths.
+
+pkg_get_variable(GIMP_LIB_DIR gimp-2.0 gimplibdir)
+install(TARGETS file-jxl RUNTIME DESTINATION "${GIMP_LIB_DIR}/plug-ins/file-jxl/")
diff --git a/third-party/libjxl/libjxl/plugins/gimp/common.cc b/third-party/libjxl/libjxl/plugins/gimp/common.cc
new file mode 100644
index 0000000000..1a884570cb
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/common.cc
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+JpegXlGimpProgress::JpegXlGimpProgress(const char *message) {
+  cur_progress = 0;
+  max_progress = 100;
+
+  gimp_progress_init_printf("%s\n", message);
+}
+
+void JpegXlGimpProgress::update() {
+  gimp_progress_update((float)++cur_progress / (float)max_progress);
+  return;
+}
+
+void JpegXlGimpProgress::finished() {
+  gimp_progress_update(1.0);
+  return;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/plugins/gimp/common.h b/third-party/libjxl/libjxl/plugins/gimp/common.h
new file mode 100644
index 0000000000..3fe63c1a47
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/common.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_COMMON_H_
+#define PLUGINS_GIMP_COMMON_H_
+
+#include <libgimp/gimp.h>
+#include <libgimp/gimpui.h>
+#include <math.h>
+
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <vector>
+
+#define PLUG_IN_BINARY "file-jxl"
+#define SAVE_PROC "file-jxl-save"
+
+// Defined by both FUIF and glib.
+#undef MAX
+#undef MIN
+#undef CLAMP
+
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+
+namespace jxl {
+
+class JpegXlGimpProgress {
+ public:
+  explicit JpegXlGimpProgress(const char *message);
+  void update();
+  void finished();
+
+ private:
+  int cur_progress;
+  int max_progress;
+
+};  // class JpegXlGimpProgress
+
+}  // namespace jxl
+
+#endif  // PLUGINS_GIMP_COMMON_H_
diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc
new file mode 100644
index 0000000000..ec2ac19e69
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc
@@ -0,0 +1,486 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/file-jxl-load.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+
+#define _PROFILE_ORIGIN_ JXL_COLOR_PROFILE_TARGET_ORIGINAL
+#define _PROFILE_TARGET_ JXL_COLOR_PROFILE_TARGET_DATA
+#define LOAD_PROC "file-jxl-load"
+
+namespace jxl {
+
+bool SetJpegXlOutBuffer(
+    std::unique_ptr<JxlDecoderStruct, JxlDecoderDestroyStruct> *dec,
+    JxlPixelFormat *format, size_t *buffer_size, gpointer *pixels_buffer_1) {
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderImageOutBufferSize(dec->get(), format, buffer_size)) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderImageOutBufferSize failed\n");
+    return false;
+  }
+  *pixels_buffer_1 = g_malloc(*buffer_size);
+  if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec->get(), format,
+                                                     *pixels_buffer_1,
+                                                     *buffer_size)) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n");
+    return false;
+  }
+  return true;
+}
+
+bool LoadJpegXlImage(const gchar *const filename, gint32 *const image_id) {
+  bool stop_processing = false;
+  JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;
+  std::vector<uint8_t> icc_profile;
+  GimpColorProfile *profile_icc = nullptr;
+  GimpColorProfile *profile_int = nullptr;
+  bool is_linear = false;
+  unsigned long xsize = 0, ysize = 0;
+  long crop_x0 = 0, crop_y0 = 0;
+  size_t layer_idx = 0;
+  uint32_t frame_duration = 0;
+  double tps_denom = 1.f, tps_numer = 1.f;
+
+  gint32 layer;
+
+  gpointer pixels_buffer_1 = nullptr;
+  gpointer pixels_buffer_2 = nullptr;
+  size_t buffer_size = 0;
+
+  GimpImageBaseType image_type = GIMP_RGB;
+  GimpImageType layer_type = GIMP_RGB_IMAGE;
+  GimpPrecision precision = GIMP_PRECISION_U16_GAMMA;
+  JxlBasicInfo info = {};
+  JxlPixelFormat format = {};
+  JxlAnimationHeader animation = {};
+  JxlBlendMode blend_mode = JXL_BLEND_BLEND;
+  char *frame_name = nullptr;  // will be realloced
+  size_t frame_name_len = 0;
+
+  format.num_channels = 4;
+  format.data_type = JXL_TYPE_FLOAT;
+  format.endianness = JXL_NATIVE_ENDIAN;
+  format.align = 0;
+
+  bool is_gray = false;
+
+  JpegXlGimpProgress gimp_load_progress(
+      ("Opening JPEG XL file:" + std::string(filename)).c_str());
+  gimp_load_progress.update();
+
+  // read file
+  std::ifstream instream(filename, std::ios::in | std::ios::binary);
+  std::vector<uint8_t> compressed((std::istreambuf_iterator<char>(instream)),
+                                  std::istreambuf_iterator<char>());
+  instream.close();
+
+  gimp_load_progress.update();
+
+  // multi-threaded parallel runner.
+  auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+  auto dec = JxlDecoderMake(nullptr);
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderSubscribeEvents(
+          dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+                         JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION |
+                         JXL_DEC_FRAME)) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderSubscribeEvents failed\n");
+    return false;
+  }
+
+  if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+                                                     JxlResizableParallelRunner,
+                                                     runner.get())) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderSetParallelRunner failed\n");
+    return false;
+  }
+  // TODO: make this work with coalescing set to false, while handling frames
+  // with duration 0 and references to earlier frames correctly.
+  if (JXL_DEC_SUCCESS != JxlDecoderSetCoalescing(dec.get(), JXL_TRUE)) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderSetCoalescing failed\n");
+    return false;
+  }
+
+  // grand decode loop...
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+
+  if (JXL_DEC_SUCCESS != JxlDecoderSetProgressiveDetail(
+                             dec.get(), JxlProgressiveDetail::kPasses)) {
+    g_printerr(LOAD_PROC " Error: JxlDecoderSetProgressiveDetail failed\n");
+    return false;
+  }
+
+  while (true) {
+    gimp_load_progress.update();
+
+    if (!stop_processing) status = JxlDecoderProcessInput(dec.get());
+
+    if (status == JXL_DEC_BASIC_INFO) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+        g_printerr(LOAD_PROC " Error: JxlDecoderGetBasicInfo failed\n");
+        return false;
+      }
+
+      xsize = info.xsize;
+      ysize = info.ysize;
+      if (info.have_animation) {
+        animation = info.animation;
+        tps_denom = animation.tps_denominator;
+        tps_numer = animation.tps_numerator;
+      }
+
+      JxlResizableParallelRunnerSetThreads(
+          runner.get(), JxlResizableParallelRunnerSuggestThreads(xsize, ysize));
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      // check for ICC profile
+      size_t icc_size = 0;
+      JxlColorEncoding color_encoding;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetColorAsEncodedProfile(dec.get(), _PROFILE_ORIGIN_,
+                                             &color_encoding)) {
+        // Attempt to load ICC profile when no internal color encoding
+        if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(
+                                   dec.get(), _PROFILE_ORIGIN_, &icc_size)) {
+          g_printerr(LOAD_PROC
+                     " Warning: JxlDecoderGetICCProfileSize failed\n");
+        }
+
+        if (icc_size > 0) {
+          icc_profile.resize(icc_size);
+          if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                     dec.get(), _PROFILE_ORIGIN_,
+                                     icc_profile.data(), icc_profile.size())) {
+            g_printerr(LOAD_PROC
+                       " Warning: JxlDecoderGetColorAsICCProfile failed\n");
+          }
+
+          profile_icc = gimp_color_profile_new_from_icc_profile(
+              icc_profile.data(), icc_profile.size(), nullptr);
+
+          if (profile_icc) {
+            is_linear = gimp_color_profile_is_linear(profile_icc);
+            g_printerr(LOAD_PROC " Info: Color profile is_linear = %d\n",
+                       is_linear);
+          } else {
+            g_printerr(LOAD_PROC " Warning: Failed to read ICC profile.\n");
+          }
+        } else {
+          g_printerr(LOAD_PROC " Warning: Empty ICC data.\n");
+        }
+      }
+
+      // Internal color profile detection...
+      if (JXL_DEC_SUCCESS ==
+          JxlDecoderGetColorAsEncodedProfile(dec.get(), _PROFILE_TARGET_,
+                                             &color_encoding)) {
+        g_printerr(LOAD_PROC " Info: Internal color encoding detected.\n");
+
+        // figure out linearity of internal profile
+        switch (color_encoding.transfer_function) {
+          case JXL_TRANSFER_FUNCTION_LINEAR:
+            is_linear = true;
+            break;
+
+          case JXL_TRANSFER_FUNCTION_709:
+          case JXL_TRANSFER_FUNCTION_PQ:
+          case JXL_TRANSFER_FUNCTION_HLG:
+          case JXL_TRANSFER_FUNCTION_GAMMA:
+          case JXL_TRANSFER_FUNCTION_DCI:
+          case JXL_TRANSFER_FUNCTION_SRGB:
+            is_linear = false;
+            break;
+
+          case JXL_TRANSFER_FUNCTION_UNKNOWN:
+          default:
+            if (profile_icc) {
+              g_printerr(LOAD_PROC
+                         " Info: Unknown transfer function.  "
+                         "ICC profile is present.");
+            } else {
+              g_printerr(LOAD_PROC
+                         " Info: Unknown transfer function.  "
+                         "No ICC profile present.");
+            }
+            break;
+        }
+
+        switch (color_encoding.color_space) {
+          case JXL_COLOR_SPACE_RGB:
+            if (color_encoding.white_point == JXL_WHITE_POINT_D65 &&
+                color_encoding.primaries == JXL_PRIMARIES_SRGB) {
+              if (is_linear) {
+                profile_int = gimp_color_profile_new_rgb_srgb_linear();
+              } else {
+                profile_int = gimp_color_profile_new_rgb_srgb();
+              }
+            } else if (!is_linear &&
+                       color_encoding.white_point == JXL_WHITE_POINT_D65 &&
+                       (color_encoding.primaries_green_xy[0] == 0.2100 ||
+                        color_encoding.primaries_green_xy[1] == 0.7100)) {
+              // Probably Adobe RGB
+              profile_int = gimp_color_profile_new_rgb_adobe();
+            } else if (profile_icc) {
+              g_printerr(LOAD_PROC
+                         " Info: Unknown RGB colorspace.  "
+                         "Using ICC profile.\n");
+            } else {
+              g_printerr(LOAD_PROC
+                         " Info: Unknown RGB colorspace.  "
+                         "Treating as sRGB.\n");
+              if (is_linear) {
+                profile_int = gimp_color_profile_new_rgb_srgb_linear();
+              } else {
+                profile_int = gimp_color_profile_new_rgb_srgb();
+              }
+            }
+            break;
+
+          case JXL_COLOR_SPACE_GRAY:
+            is_gray = true;
+            if (!profile_icc ||
+                color_encoding.white_point == JXL_WHITE_POINT_D65) {
+              if (is_linear) {
+                profile_int = gimp_color_profile_new_d65_gray_linear();
+              } else {
+                profile_int = gimp_color_profile_new_d65_gray_srgb_trc();
+              }
+            }
+            break;
+          case JXL_COLOR_SPACE_XYB:
+          case JXL_COLOR_SPACE_UNKNOWN:
+          default:
+            if (profile_icc) {
+              g_printerr(LOAD_PROC
+                         " Info: Unknown colorspace.  Using ICC profile.\n");
+            } else {
+              g_error(
+                  LOAD_PROC
+                  " Warning: Unknown colorspace. Treating as sRGB profile.\n");
+
+              if (is_linear) {
+                profile_int = gimp_color_profile_new_rgb_srgb_linear();
+              } else {
+                profile_int = gimp_color_profile_new_rgb_srgb();
+              }
+            }
+            break;
+        }
+      }
+
+      // set pixel format
+      if (info.num_color_channels > 1) {
+        if (info.alpha_bits == 0) {
+          image_type = GIMP_RGB;
+          layer_type = GIMP_RGB_IMAGE;
+          format.num_channels = info.num_color_channels;
+        } else {
+          image_type = GIMP_RGB;
+          layer_type = GIMP_RGBA_IMAGE;
+          format.num_channels = info.num_color_channels + 1;
+        }
+      } else if (info.num_color_channels == 1) {
+        if (info.alpha_bits == 0) {
+          image_type = GIMP_GRAY;
+          layer_type = GIMP_GRAY_IMAGE;
+          format.num_channels = info.num_color_channels;
+        } else {
+          image_type = GIMP_GRAY;
+          layer_type = GIMP_GRAYA_IMAGE;
+          format.num_channels = info.num_color_channels + 1;
+        }
+      }
+
+      // Set image bit depth and linearity
+      if (info.bits_per_sample <= 8) {
+        if (is_linear) {
+          precision = GIMP_PRECISION_U8_LINEAR;
+        } else {
+          precision = GIMP_PRECISION_U8_GAMMA;
+        }
+      } else if (info.bits_per_sample <= 16) {
+        if (info.exponent_bits_per_sample > 0) {
+          if (is_linear) {
+            precision = GIMP_PRECISION_HALF_LINEAR;
+          } else {
+            precision = GIMP_PRECISION_HALF_GAMMA;
+          }
+        } else if (is_linear) {
+          precision = GIMP_PRECISION_U16_LINEAR;
+        } else {
+          precision = GIMP_PRECISION_U16_GAMMA;
+        }
+      } else {
+        if (info.exponent_bits_per_sample > 0) {
+          if (is_linear) {
+            precision = GIMP_PRECISION_FLOAT_LINEAR;
+          } else {
+            precision = GIMP_PRECISION_FLOAT_GAMMA;
+          }
+        } else if (is_linear) {
+          precision = GIMP_PRECISION_U32_LINEAR;
+        } else {
+          precision = GIMP_PRECISION_U32_GAMMA;
+        }
+      }
+
+      // create new image
+      if (is_linear) {
+        *image_id = gimp_image_new_with_precision(xsize, ysize, image_type,
+                                                  GIMP_PRECISION_FLOAT_LINEAR);
+      } else {
+        *image_id = gimp_image_new_with_precision(xsize, ysize, image_type,
+                                                  GIMP_PRECISION_FLOAT_GAMMA);
+      }
+
+      if (profile_int) {
+        gimp_image_set_color_profile(*image_id, profile_int);
+      } else if (!profile_icc) {
+        g_printerr(LOAD_PROC " Warning: No color profile.\n");
+      }
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      // get image from decoder in FLOAT
+      format.data_type = JXL_TYPE_FLOAT;
+      if (!SetJpegXlOutBuffer(&dec, &format, &buffer_size, &pixels_buffer_1))
+        return false;
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      // create and insert layer
+      gchar *layer_name;
+      if (layer_idx == 0 && !info.have_animation) {
+        layer_name = g_strdup_printf("Background");
+      } else {
+        const GString *blend_null_flag = g_string_new("");
+        const GString *blend_replace_flag = g_string_new(" (replace)");
+        const GString *blend_combine_flag = g_string_new(" (combine)");
+        GString *blend;
+        if (blend_mode == JXL_BLEND_REPLACE) {
+          blend = (GString *)blend_replace_flag;
+        } else if (blend_mode == JXL_BLEND_BLEND) {
+          blend = (GString *)blend_combine_flag;
+        } else {
+          blend = (GString *)blend_null_flag;
+        }
+        char *temp_frame_name = nullptr;
+        bool must_free_frame_name = false;
+        if (frame_name_len == 0) {
+          temp_frame_name = g_strdup_printf("Frame %lu", layer_idx + 1);
+          must_free_frame_name = true;
+        } else {
+          temp_frame_name = frame_name;
+        }
+        double fduration = frame_duration * 1000.f * tps_denom / tps_numer;
+        layer_name = g_strdup_printf("%s (%.15gms)%s", temp_frame_name,
+                                     fduration, blend->str);
+        if (must_free_frame_name) free(temp_frame_name);
+      }
+      layer = gimp_layer_new(*image_id, layer_name, xsize, ysize, layer_type,
+                             /*opacity=*/100,
+                             gimp_image_get_default_new_layer_mode(*image_id));
+
+      gimp_image_insert_layer(*image_id, layer, /*parent_id=*/-1,
+                              /*position=*/0);
+
+      pixels_buffer_2 = g_malloc(buffer_size);
+      GeglBuffer *buffer = gimp_drawable_get_buffer(layer);
+      const Babl *destination_format = gegl_buffer_set_format(buffer, nullptr);
+
+      std::string babl_format_str = "";
+      if (is_gray) {
+        babl_format_str += "Y'";
+      } else {
+        babl_format_str += "R'G'B'";
+      }
+      if (info.alpha_bits > 0) {
+        babl_format_str += "A";
+      }
+      babl_format_str += " float";
+
+      const Babl *source_format = babl_format(babl_format_str.c_str());
+
+      babl_process(babl_fish(source_format, destination_format),
+                   pixels_buffer_1, pixels_buffer_2, xsize * ysize);
+
+      gegl_buffer_set(buffer, GEGL_RECTANGLE(0, 0, xsize, ysize), 0, nullptr,
+                      pixels_buffer_2, GEGL_AUTO_ROWSTRIDE);
+      gimp_item_transform_translate(layer, crop_x0, crop_y0);
+
+      g_clear_object(&buffer);
+      g_free(pixels_buffer_1);
+      g_free(pixels_buffer_2);
+      if (stop_processing) status = JXL_DEC_SUCCESS;
+      g_free(layer_name);
+      layer_idx++;
+    } else if (status == JXL_DEC_FRAME) {
+      JxlFrameHeader frame_header;
+      if (JxlDecoderGetFrameHeader(dec.get(), &frame_header) !=
+          JXL_DEC_SUCCESS) {
+        g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n");
+        return false;
+      }
+      xsize = frame_header.layer_info.xsize;
+      ysize = frame_header.layer_info.ysize;
+      crop_x0 = frame_header.layer_info.crop_x0;
+      crop_y0 = frame_header.layer_info.crop_y0;
+      frame_duration = frame_header.duration;
+      blend_mode = frame_header.layer_info.blend_info.blendmode;
+      if (blend_mode != JXL_BLEND_BLEND && blend_mode != JXL_BLEND_REPLACE) {
+        g_printerr(
+            LOAD_PROC
+            " Warning: JxlDecoderGetFrameHeader: Unhandled blend mode: %d\n",
+            blend_mode);
+      }
+      if ((frame_name_len = frame_header.name_length) > 0) {
+        frame_name = (char *)realloc(frame_name, frame_name_len);
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetFrameName(dec.get(), frame_name, frame_name_len)) {
+          g_printerr(LOAD_PROC "Error: JxlDecoderGetFrameName failed");
+          return false;
+        };
+      }
+    } else if (status == JXL_DEC_SUCCESS) {
+      // All decoding successfully finished.
+      // It's not required to call JxlDecoderReleaseInput(dec.get())
+      // since the decoder will be destroyed.
+      break;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT ||
+               status == JXL_DEC_FRAME_PROGRESSION) {
+      stop_processing = status != JXL_DEC_FRAME_PROGRESSION;
+      if (JxlDecoderFlushImage(dec.get()) == JXL_DEC_SUCCESS) {
+        status = JXL_DEC_FULL_IMAGE;
+        continue;
+      }
+      g_printerr(LOAD_PROC " Error: Already provided all input\n");
+      return false;
+    } else if (status == JXL_DEC_ERROR) {
+      g_printerr(LOAD_PROC " Error: Decoder error\n");
+      return false;
+    } else {
+      g_printerr(LOAD_PROC " Error: Unknown decoder status\n");
+      return false;
+    }
+  }  // end grand decode loop
+
+  gimp_load_progress.update();
+
+  if (profile_icc) {
+    gimp_image_set_color_profile(*image_id, profile_icc);
+  }
+
+  gimp_load_progress.update();
+
+  // TODO(xiota): Add option to keep image as float
+  if (info.bits_per_sample < 32) {
+    gimp_image_convert_precision(*image_id, precision);
+  }
+
+  gimp_image_set_filename(*image_id, filename);
+
+  gimp_load_progress.finished();
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h
new file mode 100644
index 0000000000..ef5b92fef6
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_FILE_JXL_LOAD_H_
+#define PLUGINS_GIMP_FILE_JXL_LOAD_H_
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+bool LoadJpegXlImage(const gchar* filename, gint32* image_id);
+
+}  // namespace jxl
+
+#endif  // PLUGINS_GIMP_FILE_JXL_LOAD_H_
diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc
new file mode 100644
index 0000000000..f6702283e9
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc
@@ -0,0 +1,893 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/file-jxl-save.h"
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+
+#include <cmath>
+#include <utility>
+
+#include "gobject/gsignal.h"
+
+#define PLUG_IN_BINARY "file-jxl"
+#define SAVE_PROC "file-jxl-save"
+
+#define SCALE_WIDTH 200
+
+namespace jxl {
+
+namespace {
+
+#ifndef g_clear_signal_handler
+// g_clear_signal_handler was added in glib 2.62
+void g_clear_signal_handler(gulong* handler, gpointer instance) {
+  if (handler != nullptr && *handler != 0) {
+    g_signal_handler_disconnect(instance, *handler);
+    *handler = 0;
+  }
+}
+#endif  // g_clear_signal_handler
+
+class JpegXlSaveOpts {
+ public:
+  float distance;
+  float quality;
+
+  bool lossless = false;
+  bool is_linear = false;
+  bool has_alpha = false;
+  bool is_gray = false;
+  bool icc_attached = false;
+
+  bool advanced_mode = false;
+  bool use_container = true;
+  bool save_exif = false;
+  int encoding_effort = 7;
+  int faster_decoding = 0;
+
+  std::string babl_format_str = "RGB u16";
+  std::string babl_type_str = "u16";
+  std::string babl_model_str = "RGB";
+
+  JxlPixelFormat pixel_format;
+  JxlBasicInfo basic_info;
+
+  // functions
+  JpegXlSaveOpts();
+
+  bool SetDistance(float dist);
+  bool SetQuality(float qual);
+  bool SetDimensions(int x, int y);
+  bool SetNumChannels(int channels);
+
+  bool UpdateDistance();
+  bool UpdateQuality();
+
+  bool SetModel(bool is_linear_);
+
+  bool UpdateBablFormat();
+  bool SetBablModel(std::string model);
+  bool SetBablType(std::string type);
+
+  bool SetPrecision(int gimp_precision);
+
+ private:
+};  // class JpegXlSaveOpts
+
+JpegXlSaveOpts jxl_save_opts;
+
+class JpegXlSaveGui {
+ public:
+  bool SaveDialog();
+
+ private:
+  GtkWidget* toggle_lossless = nullptr;
+  GtkAdjustment* entry_distance = nullptr;
+  GtkAdjustment* entry_quality = nullptr;
+  GtkAdjustment* entry_effort = nullptr;
+  GtkAdjustment* entry_faster = nullptr;
+  GtkWidget* frame_advanced = nullptr;
+  GtkWidget* toggle_no_xyb = nullptr;
+  GtkWidget* toggle_raw = nullptr;
+  gulong handle_toggle_lossless = 0;
+  gulong handle_entry_quality = 0;
+  gulong handle_entry_distance = 0;
+
+  static bool GuiOnChangeQuality(GtkAdjustment* adj_qual, void* this_pointer);
+
+  static bool GuiOnChangeDistance(GtkAdjustment* adj_dist, void* this_pointer);
+
+  static bool GuiOnChangeEffort(GtkAdjustment* adj_effort);
+  static bool GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer);
+  static bool GuiOnChangeCodestream(GtkWidget* toggle);
+  static bool GuiOnChangeNoXYB(GtkWidget* toggle);
+
+  static bool GuiOnChangeAdvancedMode(GtkWidget* toggle, void* this_pointer);
+};  // class JpegXlSaveGui
+
+JpegXlSaveGui jxl_save_gui;
+
+bool JpegXlSaveGui::GuiOnChangeQuality(GtkAdjustment* adj_qual,
+                                       void* this_pointer) {
+  JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+
+  g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+  g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+  g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+  GtkAdjustment* adj_dist = self->entry_distance;
+  jxl_save_opts.SetQuality(gtk_adjustment_get_value(adj_qual));
+  gtk_adjustment_set_value(adj_dist, jxl_save_opts.distance);
+
+  self->handle_toggle_lossless = g_signal_connect(
+      self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+  self->handle_entry_distance =
+      g_signal_connect(self->entry_distance, "value-changed",
+                       G_CALLBACK(GuiOnChangeDistance), self);
+  self->handle_entry_quality =
+      g_signal_connect(self->entry_quality, "value-changed",
+                       G_CALLBACK(GuiOnChangeQuality), self);
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeDistance(GtkAdjustment* adj_dist,
+                                        void* this_pointer) {
+  JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+  GtkAdjustment* adj_qual = self->entry_quality;
+
+  g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+  g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+  g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+  jxl_save_opts.SetDistance(gtk_adjustment_get_value(adj_dist));
+  gtk_adjustment_set_value(adj_qual, jxl_save_opts.quality);
+
+  if (!(jxl_save_opts.distance < 0.001)) {
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_lossless),
+                                 false);
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+  }
+
+  self->handle_toggle_lossless = g_signal_connect(
+      self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+  self->handle_entry_distance =
+      g_signal_connect(self->entry_distance, "value-changed",
+                       G_CALLBACK(GuiOnChangeDistance), self);
+  self->handle_entry_quality =
+      g_signal_connect(self->entry_quality, "value-changed",
+                       G_CALLBACK(GuiOnChangeQuality), self);
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeEffort(GtkAdjustment* adj_effort) {
+  float new_effort = 10 - gtk_adjustment_get_value(adj_effort);
+  jxl_save_opts.encoding_effort = new_effort;
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer) {
+  JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+  GtkAdjustment* adj_distance = self->entry_distance;
+  GtkAdjustment* adj_quality = self->entry_quality;
+  GtkAdjustment* adj_effort = self->entry_effort;
+
+  jxl_save_opts.lossless =
+      gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+
+  g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+  g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+  g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+  if (jxl_save_opts.lossless) {
+    gtk_adjustment_set_value(adj_quality, 100.0);
+    gtk_adjustment_set_value(adj_distance, 0.0);
+    jxl_save_opts.distance = 0;
+    jxl_save_opts.UpdateQuality();
+    gtk_adjustment_set_value(adj_effort, 7);
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), true);
+  } else {
+    gtk_adjustment_set_value(adj_quality, 90.0);
+    gtk_adjustment_set_value(adj_distance, 1.0);
+    jxl_save_opts.distance = 1.0;
+    jxl_save_opts.UpdateQuality();
+    gtk_adjustment_set_value(adj_effort, 3);
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+  }
+  self->handle_toggle_lossless = g_signal_connect(
+      self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+  self->handle_entry_distance =
+      g_signal_connect(self->entry_distance, "value-changed",
+                       G_CALLBACK(GuiOnChangeDistance), self);
+  self->handle_entry_quality =
+      g_signal_connect(self->entry_quality, "value-changed",
+                       G_CALLBACK(GuiOnChangeQuality), self);
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeCodestream(GtkWidget* toggle) {
+  jxl_save_opts.use_container =
+      !gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeNoXYB(GtkWidget* toggle) {
+  jxl_save_opts.basic_info.uses_original_profile =
+      gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+  return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeAdvancedMode(GtkWidget* toggle,
+                                            void* this_pointer) {
+  JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+  jxl_save_opts.advanced_mode =
+      gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+
+  gtk_widget_set_sensitive(self->frame_advanced, jxl_save_opts.advanced_mode);
+
+  if (!jxl_save_opts.advanced_mode) {
+    jxl_save_opts.basic_info.uses_original_profile = false;
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+
+    jxl_save_opts.use_container = true;
+    gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_raw), false);
+
+    jxl_save_opts.faster_decoding = 0;
+    gtk_adjustment_set_value(GTK_ADJUSTMENT(self->entry_faster), 0);
+  }
+  return true;
+}
+
+bool JpegXlSaveGui::SaveDialog() {
+  gboolean run;
+  GtkWidget* dialog;
+  GtkWidget* content_area;
+  GtkWidget* main_vbox;
+  GtkWidget* frame;
+  GtkWidget* toggle;
+  GtkWidget* table;
+  GtkWidget* vbox;
+  GtkWidget* separator;
+
+  // initialize export dialog
+  gimp_ui_init(PLUG_IN_BINARY, true);
+  dialog = gimp_export_dialog_new("JPEG XL", PLUG_IN_BINARY, SAVE_PROC);
+
+  gtk_window_set_resizable(GTK_WINDOW(dialog), false);
+  content_area = gimp_export_dialog_get_content_area(dialog);
+
+  main_vbox = gtk_vbox_new(false, 6);
+  gtk_container_set_border_width(GTK_CONTAINER(main_vbox), 6);
+  gtk_box_pack_start(GTK_BOX(content_area), main_vbox, true, true, 0);
+  gtk_widget_show(main_vbox);
+
+  // Standard Settings Frame
+  frame = gtk_frame_new(nullptr);
+  gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_ETCHED_IN);
+  gtk_box_pack_start(GTK_BOX(main_vbox), frame, false, false, 0);
+  gtk_widget_show(frame);
+
+  vbox = gtk_vbox_new(false, 6);
+  gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+  gtk_container_add(GTK_CONTAINER(frame), vbox);
+  gtk_widget_show(vbox);
+
+  // Layout Table
+  table = gtk_table_new(20, 3, false);
+  gtk_table_set_col_spacings(GTK_TABLE(table), 6);
+  gtk_box_pack_start(GTK_BOX(vbox), table, false, false, 0);
+  gtk_widget_show(table);
+
+  // Distance Slider
+  static gchar distance_help[] =
+      "Butteraugli distance target.  Suggested values:"
+      "\n\td\u00A0=\u00A00.3\tExcellent"
+      "\n\td\u00A0=\u00A01\tVery Good"
+      "\n\td\u00A0=\u00A02\tGood"
+      "\n\td\u00A0=\u00A03\tFair"
+      "\n\td\u00A0=\u00A06\tPoor";
+
+  entry_distance = (GtkAdjustment*)gimp_scale_entry_new(
+      GTK_TABLE(table), 0, 0, "Distance", SCALE_WIDTH, 0,
+      jxl_save_opts.distance, 0.0, 15.0, 0.001, 1.0, 3, true, 0.0, 0.0,
+      distance_help, SAVE_PROC);
+  gimp_scale_entry_set_logarithmic((GtkObject*)entry_distance, true);
+
+  // Quality Slider
+  static gchar quality_help[] =
+      "JPEG-style Quality is remapped to distance.  "
+      "Values roughly match libjpeg quality settings.";
+  entry_quality = (GtkAdjustment*)gimp_scale_entry_new(
+      GTK_TABLE(table), 0, 1, "Quality", SCALE_WIDTH, 0, jxl_save_opts.quality,
+      8.26, 100.0, 1.0, 10.0, 2, true, 0.0, 0.0, quality_help, SAVE_PROC);
+
+  // Distance and Quality Signals
+  handle_entry_distance = g_signal_connect(
+      entry_distance, "value-changed", G_CALLBACK(GuiOnChangeDistance), this);
+  handle_entry_quality = g_signal_connect(entry_quality, "value-changed",
+                                          G_CALLBACK(GuiOnChangeQuality), this);
+
+  // ----------
+  separator = gtk_vseparator_new();
+  gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 2, 3, GTK_EXPAND,
+                   GTK_EXPAND, 9, 9);
+  gtk_widget_show(separator);
+
+  // Encoding Effort / Speed
+  static gchar effort_help[] =
+      "Adjust encoding speed.  Higher values are faster because "
+      "the encoder uses less effort to hit distance targets.  "
+      "As\u00A0a\u00A0result, image quality may be decreased.  "
+      "Default\u00A0=\u00A03.";
+  entry_effort = (GtkAdjustment*)gimp_scale_entry_new(
+      GTK_TABLE(table), 0, 3, "Speed", SCALE_WIDTH, 0,
+      10 - jxl_save_opts.encoding_effort, 1, 9, 1, 2, 0, true, 0.0, 0.0,
+      effort_help, SAVE_PROC);
+
+  // effort signal
+  g_signal_connect(entry_effort, "value-changed", G_CALLBACK(GuiOnChangeEffort),
+                   nullptr);
+
+  // ----------
+  separator = gtk_vseparator_new();
+  gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 4, 5, GTK_EXPAND,
+                   GTK_EXPAND, 9, 9);
+  gtk_widget_show(separator);
+
+  // Lossless Mode Convenience Checkbox
+  static gchar lossless_help[] =
+      "Compress using modular lossless mode.  "
+      "Speed\u00A0is adjusted to improve performance.";
+  toggle_lossless = gtk_check_button_new_with_label("Lossless Mode");
+  gimp_help_set_help_data(toggle_lossless, lossless_help, nullptr);
+  gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_lossless),
+                               jxl_save_opts.lossless);
+  gtk_table_attach_defaults(GTK_TABLE(table), toggle_lossless, 0, 2, 5, 6);
+  gtk_widget_show(toggle_lossless);
+
+  // lossless signal
+  handle_toggle_lossless = g_signal_connect(
+      toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), this);
+
+  // ----------
+  separator = gtk_vseparator_new();
+  gtk_box_pack_start(GTK_BOX(main_vbox), separator, false, false, 1);
+  gtk_widget_show(separator);
+
+  // Advanced Settings Frame
+  frame_advanced = gtk_frame_new("Advanced Settings");
+  gimp_help_set_help_data(frame_advanced,
+                          "Some advanced settings may produce malformed files.",
+                          nullptr);
+  gtk_frame_set_shadow_type(GTK_FRAME(frame_advanced), GTK_SHADOW_ETCHED_IN);
+  gtk_box_pack_start(GTK_BOX(main_vbox), frame_advanced, true, true, 0);
+  gtk_widget_show(frame_advanced);
+
+  gtk_widget_set_sensitive(frame_advanced, false);
+
+  vbox = gtk_vbox_new(false, 6);
+  gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+  gtk_container_add(GTK_CONTAINER(frame_advanced), vbox);
+  gtk_widget_show(vbox);
+
+  // uses_original_profile
+  static gchar uses_original_profile_help[] =
+      "Prevents conversion to the XYB colorspace.  "
+      "File sizes are approximately doubled.";
+  toggle_no_xyb = gtk_check_button_new_with_label("Do not use XYB colorspace");
+  gimp_help_set_help_data(toggle_no_xyb, uses_original_profile_help, nullptr);
+  gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_no_xyb),
+                               jxl_save_opts.basic_info.uses_original_profile);
+  gtk_box_pack_start(GTK_BOX(vbox), toggle_no_xyb, false, false, 0);
+  gtk_widget_show(toggle_no_xyb);
+
+  g_signal_connect(toggle_no_xyb, "toggled", G_CALLBACK(GuiOnChangeNoXYB),
+                   nullptr);
+
+  // save raw codestream
+  static gchar codestream_help[] =
+      "Save the raw codestream, without a container.  "
+      "The container is required for metadata and some other features.";
+  toggle_raw = gtk_check_button_new_with_label("Save Raw Codestream");
+  gimp_help_set_help_data(toggle_raw, codestream_help, nullptr);
+  gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_raw),
+                               !jxl_save_opts.use_container);
+  gtk_box_pack_start(GTK_BOX(vbox), toggle_raw, false, false, 0);
+  gtk_widget_show(toggle_raw);
+
+  g_signal_connect(toggle_raw, "toggled", G_CALLBACK(GuiOnChangeCodestream),
+                   nullptr);
+
+  // ----------
+  separator = gtk_vseparator_new();
+  gtk_box_pack_start(GTK_BOX(vbox), separator, false, false, 1);
+  gtk_widget_show(separator);
+
+  // Faster Decoding / Decoding Speed
+  static gchar faster_help[] =
+      "Improve decoding speed at the expense of quality.  "
+      "Default\u00A0=\u00A00.";
+  table = gtk_table_new(1, 3, false);
+  gtk_table_set_col_spacings(GTK_TABLE(table), 6);
+  gtk_container_add(GTK_CONTAINER(vbox), table);
+  gtk_widget_show(table);
+
+  entry_faster = (GtkAdjustment*)gimp_scale_entry_new(
+      GTK_TABLE(table), 0, 0, "Faster Decoding", SCALE_WIDTH, 0,
+      jxl_save_opts.faster_decoding, 0, 4, 1, 1, 0, true, 0.0, 0.0, faster_help,
+      SAVE_PROC);
+
+  // Faster Decoding Signals
+  g_signal_connect(entry_faster, "value-changed",
+                   G_CALLBACK(gimp_int_adjustment_update),
+                   &jxl_save_opts.faster_decoding);
+
+  // Enable Advanced Settings
+  frame = gtk_frame_new(nullptr);
+  gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_NONE);
+  gtk_box_pack_start(GTK_BOX(main_vbox), frame, true, true, 0);
+  gtk_widget_show(frame);
+
+  vbox = gtk_vbox_new(false, 6);
+  gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+  gtk_container_add(GTK_CONTAINER(frame), vbox);
+  gtk_widget_show(vbox);
+
+  static gchar advanced_help[] =
+      "Some advanced settings may produce malformed files.";
+  toggle = gtk_check_button_new_with_label("Enable Advanced Settings");
+  gimp_help_set_help_data(toggle, advanced_help, nullptr);
+  gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle),
+                               jxl_save_opts.advanced_mode);
+  gtk_box_pack_start(GTK_BOX(vbox), toggle, false, false, 0);
+  gtk_widget_show(toggle);
+
+  g_signal_connect(toggle, "toggled", G_CALLBACK(GuiOnChangeAdvancedMode),
+                   this);
+
+  // show dialog
+  gtk_widget_show(dialog);
+
+  GtkAllocation allocation;
+  gtk_widget_get_allocation(dialog, &allocation);
+
+  int height = allocation.height;
+  gtk_widget_set_size_request(dialog, height * 1.5, height);
+
+  run = (gimp_dialog_run(GIMP_DIALOG(dialog)) == GTK_RESPONSE_OK);
+  gtk_widget_destroy(dialog);
+
+  return run;
+}  // JpegXlSaveGui::SaveDialog
+
+JpegXlSaveOpts::JpegXlSaveOpts() {
+  SetDistance(1.0);
+
+  pixel_format.num_channels = 4;
+  pixel_format.data_type = JXL_TYPE_FLOAT;
+  pixel_format.endianness = JXL_NATIVE_ENDIAN;
+  pixel_format.align = 0;
+
+  JxlEncoderInitBasicInfo(&basic_info);
+  return;
+}  // JpegXlSaveOpts constructor
+
+bool JpegXlSaveOpts::SetModel(bool is_linear_) {
+  int channels;
+  std::string model;
+
+  if (is_gray) {
+    channels = 1;
+    if (is_linear_) {
+      model = "Y";
+    } else {
+      model = "Y'";
+    }
+  } else {
+    channels = 3;
+    if (is_linear_) {
+      model = "RGB";
+    } else {
+      model = "R'G'B'";
+    }
+  }
+  if (has_alpha) {
+    SetBablModel(model + "A");
+    SetNumChannels(channels + 1);
+  } else {
+    SetBablModel(model);
+    SetNumChannels(channels);
+  }
+  return true;
+}  // JpegXlSaveOpts::SetModel
+
+bool JpegXlSaveOpts::SetDistance(float dist) {
+  distance = dist;
+  return UpdateQuality();
+}
+
+bool JpegXlSaveOpts::SetQuality(float qual) {
+  quality = qual;
+  return UpdateDistance();
+}
+
+bool JpegXlSaveOpts::UpdateQuality() {
+  float qual;
+
+  if (distance < 0.1) {
+    qual = 100;
+  } else if (distance > 6.4) {
+    qual = -5.0 / 53.0 * sqrt(6360.0 * distance - 39975.0) + 1725.0 / 53.0;
+    lossless = false;
+  } else {
+    qual = 100 - (distance - 0.1) / 0.09;
+    lossless = false;
+  }
+
+  if (qual < 0) {
+    quality = 0.0;
+  } else if (qual >= 100) {
+    quality = 100.0;
+  } else {
+    quality = qual;
+  }
+
+  return true;
+}
+
+bool JpegXlSaveOpts::UpdateDistance() {
+  float dist;
+  if (quality >= 30) {
+    dist = 0.1 + (100 - quality) * 0.09;
+  } else {
+    dist = 53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0;
+  }
+
+  if (dist > 25) {
+    distance = 25;
+  } else {
+    distance = dist;
+  }
+  return true;
+}
+
+bool JpegXlSaveOpts::SetDimensions(int x, int y) {
+  basic_info.xsize = x;
+  basic_info.ysize = y;
+  return true;
+}
+
+bool JpegXlSaveOpts::SetNumChannels(int channels) {
+  switch (channels) {
+    case 1:
+      pixel_format.num_channels = 1;
+      basic_info.num_color_channels = 1;
+      basic_info.num_extra_channels = 0;
+      basic_info.alpha_bits = 0;
+      basic_info.alpha_exponent_bits = 0;
+      break;
+    case 2:
+      pixel_format.num_channels = 2;
+      basic_info.num_color_channels = 1;
+      basic_info.num_extra_channels = 1;
+      basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample));
+      basic_info.alpha_exponent_bits = 0;
+      break;
+    case 3:
+      pixel_format.num_channels = 3;
+      basic_info.num_color_channels = 3;
+      basic_info.num_extra_channels = 0;
+      basic_info.alpha_bits = 0;
+      basic_info.alpha_exponent_bits = 0;
+      break;
+    case 4:
+      pixel_format.num_channels = 4;
+      basic_info.num_color_channels = 3;
+      basic_info.num_extra_channels = 1;
+      basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample));
+      basic_info.alpha_exponent_bits = 0;
+      break;
+    default:
+      SetNumChannels(3);
+  }  // switch
+  return true;
+}  // JpegXlSaveOpts::SetNumChannels
+
+bool JpegXlSaveOpts::UpdateBablFormat() {
+  babl_format_str = babl_model_str + " " + babl_type_str;
+  return true;
+}
+
+bool JpegXlSaveOpts::SetBablModel(std::string model) {
+  babl_model_str = std::move(model);
+  return UpdateBablFormat();
+}
+
+bool JpegXlSaveOpts::SetBablType(std::string type) {
+  babl_type_str = std::move(type);
+  return UpdateBablFormat();
+}
+
+bool JpegXlSaveOpts::SetPrecision(int gimp_precision) {
+  switch (gimp_precision) {
+    case GIMP_PRECISION_HALF_GAMMA:
+    case GIMP_PRECISION_HALF_LINEAR:
+      basic_info.bits_per_sample = 16;
+      basic_info.exponent_bits_per_sample = 5;
+      break;
+
+    // UINT32 not supported by encoder; using FLOAT instead
+    case GIMP_PRECISION_U32_GAMMA:
+    case GIMP_PRECISION_U32_LINEAR:
+    case GIMP_PRECISION_FLOAT_GAMMA:
+    case GIMP_PRECISION_FLOAT_LINEAR:
+      basic_info.bits_per_sample = 32;
+      basic_info.exponent_bits_per_sample = 8;
+      break;
+
+    case GIMP_PRECISION_U16_GAMMA:
+    case GIMP_PRECISION_U16_LINEAR:
+      basic_info.bits_per_sample = 16;
+      basic_info.exponent_bits_per_sample = 0;
+      break;
+
+    default:
+    case GIMP_PRECISION_U8_LINEAR:
+    case GIMP_PRECISION_U8_GAMMA:
+      basic_info.bits_per_sample = 8;
+      basic_info.exponent_bits_per_sample = 0;
+      break;
+  }
+  return true;
+}  // JpegXlSaveOpts::SetPrecision
+
+}  // namespace
+
+bool SaveJpegXlImage(const gint32 image_id, const gint32 drawable_id,
+                     const gint32 orig_image_id, const gchar* const filename) {
+  if (!jxl_save_gui.SaveDialog()) {
+    return true;
+  }
+
+  gint32 nlayers;
+  gint32* layers;
+  gint32 duplicate = gimp_image_duplicate(image_id);
+
+  JpegXlGimpProgress gimp_save_progress(
+      ("Saving JPEG XL file:" + std::string(filename)).c_str());
+  gimp_save_progress.update();
+
+  // try to get ICC color profile...
+  std::vector<uint8_t> icc;
+
+  GimpColorProfile* profile = gimp_image_get_effective_color_profile(image_id);
+  jxl_save_opts.is_gray = gimp_color_profile_is_gray(profile);
+  jxl_save_opts.is_linear = gimp_color_profile_is_linear(profile);
+
+  profile = gimp_image_get_color_profile(image_id);
+  if (profile) {
+    g_printerr(SAVE_PROC " Info: Extracting ICC Profile...\n");
+    gsize icc_size;
+    const guint8* const icc_bytes =
+        gimp_color_profile_get_icc_profile(profile, &icc_size);
+
+    icc.assign(icc_bytes, icc_bytes + icc_size);
+  } else {
+    g_printerr(SAVE_PROC " Info: No ICC profile.  Exporting image anyway.\n");
+  }
+
+  gimp_save_progress.update();
+
+  jxl_save_opts.SetDimensions(gimp_image_width(image_id),
+                              gimp_image_height(image_id));
+
+  jxl_save_opts.SetPrecision(gimp_image_get_precision(image_id));
+  layers = gimp_image_get_layers(duplicate, &nlayers);
+
+  for (int i = 0; i < nlayers; i++) {
+    if (gimp_drawable_has_alpha(layers[i])) {
+      jxl_save_opts.has_alpha = true;
+      break;
+    }
+  }
+
+  gimp_save_progress.update();
+
+  // layers need to match image size, for now
+  for (int i = 0; i < nlayers; i++) {
+    gimp_layer_resize_to_image_size(layers[i]);
+  }
+
+  // treat layers as animation frames, for now
+  if (nlayers > 1) {
+    jxl_save_opts.basic_info.have_animation = true;
+    jxl_save_opts.basic_info.animation.tps_numerator = 100;
+  }
+
+  gimp_save_progress.update();
+
+  // multi-threaded parallel runner.
+  auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+  JxlResizableParallelRunnerSetThreads(
+      runner.get(),
+      JxlResizableParallelRunnerSuggestThreads(jxl_save_opts.basic_info.xsize,
+                                               jxl_save_opts.basic_info.ysize));
+
+  auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
+  JxlEncoderUseContainer(enc.get(), jxl_save_opts.use_container);
+
+  if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(),
+                                                     JxlResizableParallelRunner,
+                                                     runner.get())) {
+    g_printerr(SAVE_PROC " Error: JxlEncoderSetParallelRunner failed\n");
+    return false;
+  }
+
+  // this sets some basic_info properties
+  jxl_save_opts.SetModel(jxl_save_opts.is_linear);
+
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderSetBasicInfo(enc.get(), &jxl_save_opts.basic_info)) {
+    g_printerr(SAVE_PROC " Error: JxlEncoderSetBasicInfo failed\n");
+    return false;
+  }
+
+  // try to use ICC profile
+  if (!icc.empty() && !jxl_save_opts.is_gray) {
+    if (JXL_ENC_SUCCESS ==
+        JxlEncoderSetICCProfile(enc.get(), icc.data(), icc.size())) {
+      jxl_save_opts.icc_attached = true;
+    } else {
+      g_printerr(SAVE_PROC " Warning: JxlEncoderSetICCProfile failed.\n");
+      jxl_save_opts.basic_info.uses_original_profile = false;
+      jxl_save_opts.lossless = false;
+    }
+  } else {
+    g_printerr(SAVE_PROC " Warning: Using internal profile.\n");
+    jxl_save_opts.basic_info.uses_original_profile = false;
+    jxl_save_opts.lossless = false;
+  }
+
+  // set up internal color profile
+  JxlColorEncoding color_encoding = {};
+
+  if (jxl_save_opts.is_linear) {
+    JxlColorEncodingSetToLinearSRGB(&color_encoding, jxl_save_opts.is_gray);
+  } else {
+    JxlColorEncodingSetToSRGB(&color_encoding, jxl_save_opts.is_gray);
+  }
+
+  if (JXL_ENC_SUCCESS !=
+      JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) {
+    g_printerr(SAVE_PROC " Warning: JxlEncoderSetColorEncoding failed\n");
+  }
+
+  // set encoder options
+  JxlEncoderFrameSettings* frame_settings;
+  frame_settings = JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+
+  JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT,
+                                   jxl_save_opts.encoding_effort);
+  JxlEncoderFrameSettingsSetOption(frame_settings,
+                                   JXL_ENC_FRAME_SETTING_DECODING_SPEED,
+                                   jxl_save_opts.faster_decoding);
+
+  // lossless mode
+  if (jxl_save_opts.lossless || jxl_save_opts.distance < 0.01) {
+    if (jxl_save_opts.basic_info.exponent_bits_per_sample > 0) {
+      // lossless mode doesn't work well with floating point
+      jxl_save_opts.distance = 0.01;
+      jxl_save_opts.lossless = false;
+      JxlEncoderSetFrameLossless(frame_settings, false);
+      JxlEncoderSetFrameDistance(frame_settings, 0.01);
+    } else {
+      JxlEncoderSetFrameDistance(frame_settings, 0);
+      JxlEncoderSetFrameLossless(frame_settings, true);
+    }
+  } else {
+    jxl_save_opts.lossless = false;
+    JxlEncoderSetFrameLossless(frame_settings, false);
+    JxlEncoderSetFrameDistance(frame_settings, jxl_save_opts.distance);
+  }
+
+  // convert precision and colorspace
+  if (jxl_save_opts.is_linear &&
+      jxl_save_opts.basic_info.bits_per_sample < 32) {
+    gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_LINEAR);
+  } else {
+    gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_GAMMA);
+  }
+
+  // process layers and compress into JXL
+  size_t buffer_size =
+      jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize *
+      jxl_save_opts.pixel_format.num_channels * 4;  // bytes per sample
+
+  for (int i = nlayers - 1; i >= 0; i--) {
+    gimp_save_progress.update();
+
+    // copy image into buffer...
+    gpointer pixels_buffer_1;
+    gpointer pixels_buffer_2;
+    pixels_buffer_1 = g_malloc(buffer_size);
+    pixels_buffer_2 = g_malloc(buffer_size);
+
+    gimp_layer_resize_to_image_size(layers[i]);
+
+    GeglBuffer* buffer = gimp_drawable_get_buffer(layers[i]);
+
+    // using gegl_buffer_set_format to get the format because
+    // gegl_buffer_get_format doesn't always get the original format
+    const Babl* native_format = gegl_buffer_set_format(buffer, nullptr);
+
+    gegl_buffer_get(buffer,
+                    GEGL_RECTANGLE(0, 0, jxl_save_opts.basic_info.xsize,
+                                   jxl_save_opts.basic_info.ysize),
+                    1.0, native_format, pixels_buffer_1, GEGL_AUTO_ROWSTRIDE,
+                    GEGL_ABYSS_NONE);
+    g_clear_object(&buffer);
+
+    // use babl to fix gamma mismatch issues
+    jxl_save_opts.SetModel(jxl_save_opts.is_linear);
+    jxl_save_opts.pixel_format.data_type = JXL_TYPE_FLOAT;
+    jxl_save_opts.SetBablType("float");
+    const Babl* destination_format =
+        babl_format(jxl_save_opts.babl_format_str.c_str());
+
+    babl_process(
+        babl_fish(native_format, destination_format), pixels_buffer_1,
+        pixels_buffer_2,
+        jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize);
+
+    gimp_save_progress.update();
+
+    // send layer to encoder
+    if (JXL_ENC_SUCCESS !=
+        JxlEncoderAddImageFrame(frame_settings, &jxl_save_opts.pixel_format,
+                                pixels_buffer_2, buffer_size)) {
+      g_printerr(SAVE_PROC " Error: JxlEncoderAddImageFrame failed\n");
+      return false;
+    }
+  }
+
+  JxlEncoderCloseInput(enc.get());
+
+  // get data from encoder
+  std::vector<uint8_t> compressed;
+  compressed.resize(262144);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size();
+
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    gimp_save_progress.update();
+
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() + 262144);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+
+  if (JXL_ENC_SUCCESS != process_result) {
+    g_printerr(SAVE_PROC " Error: JxlEncoderProcessOutput failed\n");
+    return false;
+  }
+
+  // write file
+  std::ofstream outstream(filename, std::ios::out | std::ios::binary);
+  copy(compressed.begin(), compressed.end(),
+       std::ostream_iterator<uint8_t>(outstream));
+
+  gimp_save_progress.finished();
+  return true;
+}  // SaveJpegXlImage()
+
+}  // namespace jxl
diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h
new file mode 100644
index 0000000000..c9d0e8091f
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_FILE_JXL_SAVE_H_
+#define PLUGINS_GIMP_FILE_JXL_SAVE_H_
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+bool SaveJpegXlImage(gint32 image_id, gint32 drawable_id, gint32 orig_image_id,
+                     const gchar* filename);
+
+}  // namespace jxl
+
+#endif  // PLUGINS_GIMP_FILE_JXL_SAVE_H_
diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc
new file mode 100644
index 0000000000..743495a2e0
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include <string>
+
+#include "plugins/gimp/common.h"
+#include "plugins/gimp/file-jxl-load.h"
+#include "plugins/gimp/file-jxl-save.h"
+
+namespace jxl {
+namespace {
+
+constexpr char kLoadProc[] = "file-jxl-load";
+constexpr char kSaveProc[] = "file-jxl-save";
+
+void Query() {
+  {
+    static char run_mode_name[] = "run-mode";
+    static char run_mode_description[] = "Run mode";
+    static char filename_name[] = "filename";
+    static char filename_description[] = "The name of the file to load";
+    static char raw_filename_name[] = "raw-filename";
+    static char raw_filename_description[] =
+        "The name of the file, as entered by the user";
+    static const GimpParamDef load_args[] = {
+        {GIMP_PDB_INT32, run_mode_name, run_mode_description},
+        {GIMP_PDB_STRING, filename_name, filename_description},
+        {GIMP_PDB_STRING, raw_filename_name, raw_filename_description},
+    };
+    static char image_name[] = "image";
+    static char image_description[] = "Loaded image";
+    static const GimpParamDef load_return_vals[] = {
+        {GIMP_PDB_IMAGE, image_name, image_description},
+    };
+
+    gimp_install_procedure(
+        /*name=*/kLoadProc, /*blurb=*/"Loads JPEG XL image files",
+        /*help=*/"Loads JPEG XL image files", /*author=*/"JPEG XL Project",
+        /*copyright=*/"JPEG XL Project", /*date=*/"2019",
+        /*menu_label=*/"JPEG XL image", /*image_types=*/nullptr,
+        /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(load_args),
+        /*n_return_vals=*/G_N_ELEMENTS(load_return_vals), /*params=*/load_args,
+        /*return_vals=*/load_return_vals);
+    gimp_register_file_handler_mime(kLoadProc, "image/jxl");
+    gimp_register_magic_load_handler(
+        kLoadProc, "jxl", "",
+        "0,string,\xFF\x0A,"
+        "0,string,\\000\\000\\000\x0CJXL\\040\\015\\012\x87\\012");
+  }
+
+  {
+    static char run_mode_name[] = "run-mode";
+    static char run_mode_description[] = "Run mode";
+    static char image_name[] = "image";
+    static char image_description[] = "Input image";
+    static char drawable_name[] = "drawable";
+    static char drawable_description[] = "Drawable to save";
+    static char filename_name[] = "filename";
+    static char filename_description[] = "The name of the file to save";
+    static char raw_filename_name[] = "raw-filename";
+    static char raw_filename_description[] = "The name of the file to save";
+    static const GimpParamDef save_args[] = {
+        {GIMP_PDB_INT32, run_mode_name, run_mode_description},
+        {GIMP_PDB_IMAGE, image_name, image_description},
+        {GIMP_PDB_DRAWABLE, drawable_name, drawable_description},
+        {GIMP_PDB_STRING, filename_name, filename_description},
+        {GIMP_PDB_STRING, raw_filename_name, raw_filename_description},
+    };
+
+    gimp_install_procedure(
+        /*name=*/kSaveProc, /*blurb=*/"Saves JPEG XL image files",
+        /*help=*/"Saves JPEG XL image files", /*author=*/"JPEG XL Project",
+        /*copyright=*/"JPEG XL Project", /*date=*/"2019",
+        /*menu_label=*/"JPEG XL image", /*image_types=*/"RGB*, GRAY*",
+        /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(save_args),
+        /*n_return_vals=*/0, /*params=*/save_args,
+        /*return_vals=*/nullptr);
+    gimp_register_file_handler_mime(kSaveProc, "image/jxl");
+    gimp_register_save_handler(kSaveProc, "jxl", "");
+  }
+}
+
+void Run(const gchar* const name, const gint nparams,
+         const GimpParam* const params, gint* const nreturn_vals,
+         GimpParam** const return_vals) {
+  gegl_init(nullptr, nullptr);
+
+  static GimpParam values[2];
+
+  *nreturn_vals = 1;
+  *return_vals = values;
+
+  values[0].type = GIMP_PDB_STATUS;
+  values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR;
+
+  if (strcmp(name, kLoadProc) == 0) {
+    if (nparams != 3) {
+      values[0].data.d_status = GIMP_PDB_CALLING_ERROR;
+      return;
+    }
+
+    const gchar* const filename = params[1].data.d_string;
+    gint32 image_id;
+    if (!LoadJpegXlImage(filename, &image_id)) {
+      values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR;
+      return;
+    }
+
+    *nreturn_vals = 2;
+    values[0].data.d_status = GIMP_PDB_SUCCESS;
+    values[1].type = GIMP_PDB_IMAGE;
+    values[1].data.d_image = image_id;
+  } else if (strcmp(name, kSaveProc) == 0) {
+    if (nparams != 5) {
+      values[0].data.d_status = GIMP_PDB_CALLING_ERROR;
+      return;
+    }
+
+    gint32 image_id = params[1].data.d_image;
+    gint32 drawable_id = params[2].data.d_drawable;
+    const gchar* const filename = params[3].data.d_string;
+    const gint32 orig_image_id = image_id;
+    const GimpExportReturn export_result = gimp_export_image(
+        &image_id, &drawable_id, "JPEG XL",
+        static_cast<GimpExportCapabilities>(GIMP_EXPORT_CAN_HANDLE_RGB |
+                                            GIMP_EXPORT_CAN_HANDLE_GRAY |
+                                            GIMP_EXPORT_CAN_HANDLE_ALPHA));
+    switch (export_result) {
+      case GIMP_EXPORT_CANCEL:
+        values[0].data.d_status = GIMP_PDB_CANCEL;
+        return;
+      case GIMP_EXPORT_IGNORE:
+        break;
+      case GIMP_EXPORT_EXPORT:
+        break;
+    }
+    if (!SaveJpegXlImage(image_id, drawable_id, orig_image_id, filename)) {
+      return;
+    }
+    if (image_id != orig_image_id) {
+      gimp_image_delete(image_id);
+    }
+    values[0].data.d_status = GIMP_PDB_SUCCESS;
+  }
+}
+
+}  // namespace
+}  // namespace jxl
+
+static const GimpPlugInInfo PLUG_IN_INFO = {nullptr, nullptr, &jxl::Query,
+                                            &jxl::Run};
+
+MAIN()
diff --git a/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt
new file mode 100644
index 0000000000..6f2a0f919c
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+install(FILES image-jxl.xml DESTINATION share/mime/packages/)
diff --git a/third-party/libjxl/libjxl/plugins/mime/README.md b/third-party/libjxl/libjxl/plugins/mime/README.md
new file mode 100644
index 0000000000..4d398c7b90
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/mime/README.md
@@ -0,0 +1,37 @@
+## :warning: Not needed anymore
+
+As `image/jxl` is now supported by [shared-mine-info 2.2](https://gitlab.freedesktop.org/xdg/shared-mime-info/-/releases/2.2), it should not be necessary anymore to install this plugin.
+
+You can test if your system correctly understand the MIME type of JPEG XL image by obtaining a JPEG XL image, e.g. with
+```bash
+wget https://raw.githubusercontent.com/libjxl/conformance/master/testcases/bicycles/input.jxl
+```
+and with that sample JPEG XL file `input.jxl` (or any other valid JPEG XL file), run any of the following commands:
+```bash
+xdg-mime query filetype input.jxl
+file --mime-type input.jxl
+mimetype input.jxl
+```
+If the output contains `image/jxl` you are all set!
+
+
+## JPEG XL MIME type
+
+If not already installed by the [Installing section of BUILDING.md](../../BUILDING.md#installing), then it can be done manually:
+
+### Install
+```bash
+sudo xdg-mime install --novendor image-jxl.xml
+```
+
+Then run:
+```
+update-mime --local
+```
+
+
+### Uninstall
+```bash
+sudo xdg-mime uninstall image-jxl.xml
+```
+
diff --git a/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml b/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml
new file mode 100644
index 0000000000..cab9018c7d
--- /dev/null
+++ b/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info">
+  <mime-type type="image/jxl">
+    <comment>JPEG XL image</comment>
+    <comment xml:lang="fr">image JPEG XL</comment>
+    <comment xml:lang="nl">JPEG XL afbeelding</comment>
+    <magic priority="50">
+      <match type="string" offset="0" value="\xFF\x0A"/>
+      <match type="string" offset="0" value="\0\0\0\x0CJXL \x0D\x0A\x87\x0A"/>
+    </magic>
+    <glob pattern="*.jxl"/>
+  </mime-type>
+</mime-info>
diff --git a/third-party/libjxl/libjxl/third_party/CMakeLists.txt b/third-party/libjxl/libjxl/third_party/CMakeLists.txt
new file mode 100644
index 0000000000..d22441f668
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/CMakeLists.txt
@@ -0,0 +1,175 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan"))
+  set(BUILD_TESTING OFF)
+endif()
+
+# Highway
+set(HWY_SYSTEM_GTEST ON CACHE INTERNAL "")
+set(HWY_FORCE_STATIC_LIBS ON CACHE INTERNAL "")
+set(HWY_ENABLE_CONTRIB OFF CACHE INTERNAL "")
+set(HWY_ENABLE_EXAMPLES OFF CACHE INTERNAL "")
+set(HWY_ENABLE_TESTS OFF CACHE INTERNAL "")
+if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan"))
+  set(HWY_ENABLE_INSTALL OFF CACHE INTERNAL "")
+endif()
+if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/highway/CMakeLists.txt" AND
+    NOT JPEGXL_FORCE_SYSTEM_HWY)
+  add_subdirectory(highway)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/highway/LICENSE"
+                 ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY)
+else()
+  find_package(HWY 1.0.6)
+  if (NOT HWY_FOUND)
+    message(FATAL_ERROR
+        "Highway library (hwy) not found. Install libhwy-dev or download it "
+        "to third_party/highway from https://github.com/google/highway . "
+        "Highway is required to build JPEG XL. You can run "
+        "${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.")
+  endif()
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libhwy-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+# brotli
+if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/brotli/c/include/brotli/decode.h" OR
+    JPEGXL_FORCE_SYSTEM_BROTLI)
+  find_package(Brotli)
+  if (NOT Brotli_FOUND)
+    message(FATAL_ERROR
+        "Brotli not found, install brotli-dev or download brotli source code to"
+        " third_party/brotli from https://github.com/google/brotli. You can use"
+        " ${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.")
+  endif ()
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libbrotli-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+else()
+  # Compile brotli from sources.
+  set(BROTLI_DISABLE_TESTS ON CACHE STRING "Disable Brotli tests")
+  # Override default "no-install" policy.
+  if((NOT SANITIZER STREQUAL "asan") AND (NOT SANITIZER STREQUAL "msan"))
+    set(BROTLI_BUNDLED_MODE OFF CACHE INTERNAL "")
+  endif()
+  add_subdirectory(brotli)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/brotli/LICENSE"
+                 ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY)
+  if(APPLE)
+    if(NOT DEFINED CMAKE_MACOSX_RPATH)
+      # Use @rpath in install_name when CMAKE_MACOSX_RPATH is not set.
+      set_property(TARGET brotlienc PROPERTY MACOSX_RPATH TRUE)
+      set_property(TARGET brotlidec PROPERTY MACOSX_RPATH TRUE)
+      set_property(TARGET brotlicommon PROPERTY MACOSX_RPATH TRUE)
+    endif()
+    if((NOT DEFINED CMAKE_MACOSX_RPATH) OR CMAKE_MACOSX_RPATH)
+      # Set library search path when @rpath is used.
+      if(NOT DEFINED CMAKE_INSTALL_RPATH)
+        set_property(TARGET brotlienc PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+        set_property(TARGET brotlidec PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+        set_property(TARGET brotlicommon PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+      endif()
+    else()
+      # Set conventional install_name when @rpath is not used.
+      if(NOT DEFINED CMAKE_INSTALL_NAME_DIR)
+        set_property(TARGET brotlienc PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+        set_property(TARGET brotlidec PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+        set_property(TARGET brotlicommon PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+      endif()
+    endif()
+  endif()  # APPLE
+endif()
+
+# *cms
+if (JPEGXL_ENABLE_SKCMS)
+  if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/skcms/skcms.h" )
+    message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+            "build dependencies.")
+  endif()
+  include(skcms.cmake)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/skcms/LICENSE"
+                 ${PROJECT_BINARY_DIR}/LICENSE.skcms COPYONLY)
+endif ()
+if (JPEGXL_ENABLE_VIEWERS OR NOT JPEGXL_ENABLE_SKCMS OR JPEGXL_ENABLE_PLUGINS)
+  if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/lcms/.git" OR JPEGXL_FORCE_SYSTEM_LCMS2 )
+    find_package(LCMS2 2.13)
+    if ( NOT LCMS2_FOUND )
+      message(FATAL_ERROR "Please install lcms2 or run git submodule update --init")
+    endif ()
+  else()
+    include(lcms2.cmake)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lcms/COPYING"
+                   ${PROJECT_BINARY_DIR}/LICENSE.lcms COPYONLY)
+  endif()
+endif()
+
+# libpng
+if (JPEGXL_BUNDLE_LIBPNG AND JPEGXL_EMSCRIPTEN)
+  if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt")
+  message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+          "build dependencies.")
+  endif()
+  file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/libpng/scripts/pnglibconf.h.prebuilt" DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/libpng")
+  file(RENAME "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h.prebuilt" "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h")
+  set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/")
+  set(ZLIB_LIBRARY "")
+  set(PNG_FOUND YES PARENT_SCOPE)
+  set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE)
+  set(PNG_LIBRARIES "" PARENT_SCOPE)
+elseif (JPEGXL_BUNDLE_LIBPNG)
+  if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt")
+    message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+            "build dependencies.")
+  endif()
+  add_subdirectory(zlib)
+  set(PNG_STATIC ON CACHE BOOL "")
+  set(PNG_EXECUTABLES OFF CACHE BOOL "")
+  set(PNG_BUILD_ZLIB ON CACHE BOOL "")
+  set(PNG_TESTS OFF CACHE BOOL "")
+  set(SKIP_INSTALL_ALL ON CACHE BOOL "")
+  set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/")
+  set(ZLIB_LIBRARY zlibstatic)
+  add_subdirectory(libpng EXCLUDE_FROM_ALL)
+  set(PNG_FOUND YES PARENT_SCOPE)
+  set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE)
+  set(PNG_LIBRARIES png_static PARENT_SCOPE)
+  set_property(TARGET png_static PROPERTY POSITION_INDEPENDENT_CODE ON)
+  set_property(TARGET zlibstatic PROPERTY POSITION_INDEPENDENT_CODE ON)
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libpng/LICENSE"
+                   ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY)
+  endif()
+else()
+  find_package(PNG)
+  if(PNG_FOUND AND JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/zlib1g-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.zlib COPYONLY)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/libpng-dev/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+# sjpeg
+if (JPEGXL_ENABLE_SJPEG)
+  if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/CMakeLists.txt")
+    message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+            "build dependencies.")
+  endif()
+  include(sjpeg.cmake)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/COPYING"
+                 ${PROJECT_BINARY_DIR}/LICENSE.sjpeg COPYONLY)
+endif ()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/BUILD b/third-party/libjxl/libjxl/third_party/brotli/BUILD
new file mode 100644
index 0000000000..07a6793054
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/BUILD
@@ -0,0 +1,147 @@
+# Description:
+#   Brotli is a generic-purpose lossless compression algorithm.
+
+load(":compiler_config_setting.bzl", "create_msvc_config")
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # MIT
+
+exports_files(["LICENSE"])
+
+config_setting(
+    name = "darwin",
+    values = {"cpu": "darwin"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "darwin_x86_64",
+    values = {"cpu": "darwin_x86_64"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "windows",
+    values = {"cpu": "x64_windows"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "windows_msvc",
+    values = {"cpu": "x64_windows_msvc"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "windows_msys",
+    values = {"cpu": "x64_windows_msys"},
+    visibility = ["//visibility:public"],
+)
+
+create_msvc_config()
+
+STRICT_C_OPTIONS = select({
+    ":msvc": [],
+    "//conditions:default": [
+        "--pedantic-errors",
+        "-Wall",
+        "-Wconversion",
+        "-Werror",
+        "-Wextra",
+        "-Wlong-long",
+        "-Wmissing-declarations",
+        "-Wmissing-prototypes",
+        "-Wno-strict-aliasing",
+        "-Wshadow",
+        "-Wsign-compare",
+    ],
+})
+
+filegroup(
+    name = "public_headers",
+    srcs = glob(["c/include/brotli/*.h"]),
+)
+
+filegroup(
+    name = "common_headers",
+    srcs = glob(["c/common/*.h"]),
+)
+
+filegroup(
+    name = "common_sources",
+    srcs = glob(["c/common/*.c"]),
+)
+
+filegroup(
+    name = "dec_headers",
+    srcs = glob(["c/dec/*.h"]),
+)
+
+filegroup(
+    name = "dec_sources",
+    srcs = glob(["c/dec/*.c"]),
+)
+
+filegroup(
+    name = "enc_headers",
+    srcs = glob(["c/enc/*.h"]),
+)
+
+filegroup(
+    name = "enc_sources",
+    srcs = glob(["c/enc/*.c"]),
+)
+
+cc_library(
+    name = "brotli_inc",
+    hdrs = [":public_headers"],
+    copts = STRICT_C_OPTIONS,
+    strip_include_prefix = "c/include",
+)
+
+cc_library(
+    name = "brotlicommon",
+    srcs = [":common_sources"],
+    hdrs = [":common_headers"],
+    copts = STRICT_C_OPTIONS,
+    deps = [":brotli_inc"],
+)
+
+cc_library(
+    name = "brotlidec",
+    srcs = [":dec_sources"],
+    hdrs = [":dec_headers"],
+    copts = STRICT_C_OPTIONS,
+    deps = [":brotlicommon"],
+)
+
+cc_library(
+    name = "brotlienc",
+    srcs = [":enc_sources"],
+    hdrs = [":enc_headers"],
+    copts = STRICT_C_OPTIONS,
+    linkopts = select({
+        ":msvc": [],
+        "//conditions:default": ["-lm"],
+    }),
+    deps = [":brotlicommon"],
+)
+
+cc_binary(
+    name = "brotli",
+    srcs = ["c/tools/brotli.c"],
+    copts = STRICT_C_OPTIONS,
+    linkstatic = 1,
+    deps = [
+        ":brotlidec",
+        ":brotlienc",
+    ],
+)
+
+filegroup(
+    name = "dictionary",
+    srcs = ["c/common/dictionary.bin"],
+)
diff --git a/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt b/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt
new file mode 100644
index 0000000000..9e4cf40830
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt
@@ -0,0 +1,401 @@
+# Available CMake versions:
+#  - Ubuntu 18.04 LTS (deprecated on GitHub Actions) : 3.10.4
+#  - Solaris 11.4 SRU 15                             : 3.15 
+cmake_minimum_required(VERSION 3.10.4)
+
+# Since this project's version is loaded from other files, this policy
+# will help suppress the warning generated by cmake.
+# This policy is set because we can't provide "VERSION" in "project" command.
+# Use `cmake --help-policy CMP0048` for more information.
+cmake_policy(SET CMP0048 NEW)
+project(brotli C)
+
+option(BUILD_SHARED_LIBS "Build shared libraries" ON)
+
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to Release as none was specified.")
+  set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE)
+else()
+  message(STATUS "Build type is '${CMAKE_BUILD_TYPE}'")
+endif()
+
+include(CheckCSourceCompiles)
+check_c_source_compiles(
+  "#if defined(__EMSCRIPTEN__)
+   int main() {return 0;}
+   #endif"
+  BROTLI_EMSCRIPTEN
+)
+if (BROTLI_EMSCRIPTEN)
+  message("-- Compiler is EMSCRIPTEN")
+else()
+  message("-- Compiler is not EMSCRIPTEN")
+endif()
+
+# If Brotli is being bundled in another project, we don't want to
+# install anything.  However, we want to let people override this, so
+# we'll use the BROTLI_BUNDLED_MODE variable to let them do that; just
+# set it to OFF in your project before you add_subdirectory(brotli).
+get_directory_property(BROTLI_PARENT_DIRECTORY PARENT_DIRECTORY)
+if(NOT DEFINED BROTLI_BUNDLED_MODE)
+  # Bundled mode hasn't been set one way or the other, set the default
+  # depending on whether or not we are the top-level project.
+  if(BROTLI_PARENT_DIRECTORY)
+    set(BROTLI_BUNDLED_MODE ON)
+  else()
+    set(BROTLI_BUNDLED_MODE OFF)
+  endif()
+endif()
+mark_as_advanced(BROTLI_BUNDLED_MODE)
+
+include(GNUInstallDirs)
+
+# Parse version information from common/version.h. Normally we would
+# define these values here and write them out to configuration file(s)
+# (i.e., config.h), but in this case we parse them from
+# common/version.h to be less intrusive.
+function(hex_to_dec HEXADECIMAL DECIMAL)
+  string(TOUPPER "${HEXADECIMAL}" _tail)
+  set(_decimal 0)
+  string(LENGTH "${_tail}" _tail_length)
+  while (_tail_length GREATER 0)
+    math(EXPR _decimal "${_decimal} * 16")
+    string(SUBSTRING "${_tail}" 0 1 _digit)
+    string(SUBSTRING "${_tail}" 1 -1 _tail)
+    if (_digit STREQUAL "A")
+      math(EXPR _decimal "${_decimal} + 10")
+    elseif (_digit STREQUAL "B")
+      math(EXPR _decimal "${_decimal} + 11")
+    elseif (_digit STREQUAL "C")
+      math(EXPR _decimal "${_decimal} + 12")
+    elseif (_digit STREQUAL "D")
+      math(EXPR _decimal "${_decimal} + 13")
+    elseif (_digit STREQUAL "E")
+      math(EXPR _decimal "${_decimal} + 14")
+    elseif (_digit STREQUAL "F")
+      math(EXPR _decimal "${_decimal} + 15")
+    else()
+      math(EXPR _decimal "${_decimal} + ${_digit}")
+    endif()
+    string(LENGTH "${_tail}" _tail_length)
+  endwhile()
+  set(${DECIMAL} ${_decimal} PARENT_SCOPE)
+endfunction(hex_to_dec)
+
+# Version information
+file(STRINGS "c/common/version.h" _brotli_version_line REGEX "^#define BROTLI_VERSION (0x[0-9a-fA-F]+)$")
+string(REGEX REPLACE "^#define BROTLI_VERSION 0x([0-9a-fA-F]+)$" "\\1" _brotli_version_hex "${_brotli_version_line}")
+hex_to_dec("${_brotli_version_hex}" _brotli_version)
+math(EXPR BROTLI_VERSION_MAJOR "${_brotli_version} >> 24")
+math(EXPR BROTLI_VERSION_MINOR "(${_brotli_version} >> 12) & 4095")
+math(EXPR BROTLI_VERSION_PATCH "${_brotli_version} & 4095")
+set(BROTLI_VERSION "${BROTLI_VERSION_MAJOR}.${BROTLI_VERSION_MINOR}.${BROTLI_VERSION_PATCH}")
+mark_as_advanced(BROTLI_VERSION BROTLI_VERSION_MAJOR BROTLI_VERSION_MINOR BROTLI_VERSION_PATCH)
+
+# ABI Version information
+file(STRINGS "c/common/version.h" _brotli_abi_info_line REGEX "^#define BROTLI_ABI_VERSION (0x[0-9a-fA-F]+)$")
+string(REGEX REPLACE "^#define BROTLI_ABI_VERSION 0x([0-9a-fA-F]+)$" "\\1" _brotli_abi_info_hex "${_brotli_abi_info_line}")
+hex_to_dec("${_brotli_abi_info_hex}" _brotli_abi_info)
+math(EXPR BROTLI_ABI_CURRENT "${_brotli_abi_info} >> 24")
+math(EXPR BROTLI_ABI_REVISION "(${_brotli_abi_info} >> 12) & 4095")
+math(EXPR BROTLI_ABI_AGE "${_brotli_abi_info} & 4095")
+math(EXPR BROTLI_ABI_COMPATIBILITY "${BROTLI_ABI_CURRENT} - ${BROTLI_ABI_AGE}")
+mark_as_advanced(BROTLI_ABI_CURRENT BROTLI_ABI_REVISION BROTLI_ABI_AGE BROTLI_ABI_COMPATIBILITY)
+
+if (ENABLE_SANITIZER)
+  set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
+  set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
+endif ()
+
+include(CheckFunctionExists)
+set(LIBM_LIBRARY)
+CHECK_FUNCTION_EXISTS(log2 LOG2_RES)
+if(NOT LOG2_RES)
+  set(orig_req_libs "${CMAKE_REQUIRED_LIBRARIES}")
+  set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};m")
+  CHECK_FUNCTION_EXISTS(log2 LOG2_LIBM_RES)
+  if(LOG2_LIBM_RES)
+    set(LIBM_LIBRARY "m")
+    add_definitions(-DBROTLI_HAVE_LOG2=1)
+  else()
+    add_definitions(-DBROTLI_HAVE_LOG2=0)
+  endif()
+
+  set(CMAKE_REQUIRED_LIBRARIES "${orig_req_libs}")
+  unset(LOG2_LIBM_RES)
+  unset(orig_req_libs)
+else()
+  add_definitions(-DBROTLI_HAVE_LOG2=1)
+endif()
+unset(LOG2_RES)
+
+set(BROTLI_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/c/include")
+mark_as_advanced(BROTLI_INCLUDE_DIRS)
+
+set(BROTLI_LIBRARIES_CORE brotlienc brotlidec brotlicommon)
+set(BROTLI_LIBRARIES ${BROTLI_LIBRARIES_CORE} ${LIBM_LIBRARY})
+mark_as_advanced(BROTLI_LIBRARIES)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+  add_definitions(-DOS_LINUX)
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
+  add_definitions(-DOS_FREEBSD)
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  add_definitions(-DOS_MACOSX)
+  set(CMAKE_MACOS_RPATH TRUE)
+  set(CMAKE_INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+endif()
+
+function(transform_sources_list INPUT_FILE OUTPUT_FILE)
+  file(READ ${INPUT_FILE} TEXT)
+  string(REGEX REPLACE "\\\\\n" "~continuation~" TEXT ${TEXT})
+  string(REGEX REPLACE "([a-zA-Z_][a-zA-Z0-9_]*)[\t ]*=[\t ]*([^\n]*)" "SET(\\1 \\2)" TEXT ${TEXT})
+  string(REPLACE "~continuation~" "\n" TEXT ${TEXT})
+  file(WRITE ${OUTPUT_FILE} ${TEXT})
+endfunction()
+
+transform_sources_list("scripts/sources.lst" "${CMAKE_CURRENT_BINARY_DIR}/sources.lst.cmake")
+include("${CMAKE_CURRENT_BINARY_DIR}/sources.lst.cmake")
+
+if(BROTLI_EMSCRIPTEN)
+  set(BUILD_SHARED_LIBS OFF)
+endif()
+
+add_library(brotlicommon ${BROTLI_COMMON_C})
+add_library(brotlidec ${BROTLI_DEC_C})
+add_library(brotlienc ${BROTLI_ENC_C})
+
+# Older CMake versions does not understand INCLUDE_DIRECTORIES property.
+include_directories(${BROTLI_INCLUDE_DIRS})
+
+if(BUILD_SHARED_LIBS)
+  foreach(lib ${BROTLI_LIBRARIES_CORE})
+    target_compile_definitions(${lib} PUBLIC "BROTLI_SHARED_COMPILATION" )
+    string(TOUPPER "${lib}" LIB)
+    set_target_properties (${lib} PROPERTIES DEFINE_SYMBOL "${LIB}_SHARED_COMPILATION")
+  endforeach()
+endif()
+
+foreach(lib ${BROTLI_LIBRARIES_CORE})
+  target_link_libraries(${lib} ${LIBM_LIBRARY})
+  set_property(TARGET ${lib} APPEND PROPERTY INCLUDE_DIRECTORIES ${BROTLI_INCLUDE_DIRS})
+  set_target_properties(${lib} PROPERTIES
+    VERSION "${BROTLI_ABI_COMPATIBILITY}.${BROTLI_ABI_AGE}.${BROTLI_ABI_REVISION}"
+    SOVERSION "${BROTLI_ABI_COMPATIBILITY}")
+  if(NOT BROTLI_EMSCRIPTEN)
+    set_target_properties(${lib} PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+  endif()
+  set_property(TARGET ${lib} APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "$<BUILD_INTERFACE:${BROTLI_INCLUDE_DIRS}>")
+endforeach()
+
+if(NOT BROTLI_EMSCRIPTEN)
+target_link_libraries(brotlidec brotlicommon)
+target_link_libraries(brotlienc brotlicommon)
+endif()
+
+# For projects stuck on older versions of CMake, this will set the
+# BROTLI_INCLUDE_DIRS and BROTLI_LIBRARIES variables so they still
+# have a relatively easy way to use Brotli:
+#
+#   include_directories(${BROTLI_INCLUDE_DIRS})
+#   target_link_libraries(foo ${BROTLI_LIBRARIES})
+if(BROTLI_PARENT_DIRECTORY)
+  set(BROTLI_INCLUDE_DIRS "${BROTLI_INCLUDE_DIRS}" PARENT_SCOPE)
+  set(BROTLI_LIBRARIES "${BROTLI_LIBRARIES}" PARENT_SCOPE)
+endif()
+
+# Build the brotli executable
+add_executable(brotli ${BROTLI_CLI_C})
+target_link_libraries(brotli ${BROTLI_LIBRARIES})
+
+# Installation
+if(NOT BROTLI_BUNDLED_MODE)
+  install(
+    TARGETS brotli
+    RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
+  )
+
+  install(
+    TARGETS ${BROTLI_LIBRARIES_CORE}
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
+  )
+
+  install(
+    DIRECTORY ${BROTLI_INCLUDE_DIRS}/brotli
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+  )
+endif()  # BROTLI_BUNDLED_MODE
+
+# Tests
+
+# If we're targeting Windows but not running on Windows, we need Wine
+# to run the tests...
+if(NOT BROTLI_DISABLE_TESTS)
+  if(WIN32 AND NOT CMAKE_HOST_WIN32)
+    find_program(BROTLI_WRAPPER NAMES wine)
+
+    if(NOT BROTLI_WRAPPER)
+      message(STATUS "wine not found, disabling tests")
+      set(BROTLI_DISABLE_TESTS TRUE)
+    endif()
+  endif()
+endif()
+
+# If our compiler is a cross-compiler that we know about (arm/aarch64),
+# then we need to use qemu to execute the tests.
+if(NOT BROTLI_DISABLE_TESTS)
+  if ("${CMAKE_C_COMPILER}" MATCHES "^.*/arm-linux-gnueabihf-.*$")
+    message(STATUS "Detected arm-linux-gnueabihf cross-compilation")
+    set(BROTLI_WRAPPER "qemu-arm")
+    set(BROTLI_WRAPPER_LD_PREFIX "/usr/arm-linux-gnueabihf")
+  endif()
+
+  if ("${CMAKE_C_COMPILER}" MATCHES "^.*/arm-linux-gnueabi-.*$")
+    message(STATUS "Detected arm-linux-gnueabi cross-compilation")
+    set(BROTLI_WRAPPER "qemu-arm")
+    set(BROTLI_WRAPPER_LD_PREFIX "/usr/arm-linux-gnueabi")
+  endif()
+
+  if ("${CMAKE_C_COMPILER}" MATCHES "^.*/aarch64-linux-gnu-.*$")
+    message(STATUS "Detected aarch64-linux-gnu cross-compilation")
+    set(BROTLI_WRAPPER "qemu-aarch64")
+    set(BROTLI_WRAPPER_LD_PREFIX "/usr/aarch64-linux-gnu")
+  endif()
+endif()
+
+if(NOT BROTLI_DISABLE_TESTS)
+  include(CTest)
+  enable_testing()
+
+  set(ROUNDTRIP_INPUTS
+    tests/testdata/alice29.txt
+    tests/testdata/asyoulik.txt
+    tests/testdata/lcet10.txt
+    tests/testdata/plrabn12.txt
+    c/enc/encode.c
+    c/common/dictionary.h
+    c/dec/decode.c)
+
+  foreach(INPUT ${ROUNDTRIP_INPUTS})
+    get_filename_component(OUTPUT_NAME "${INPUT}" NAME)
+
+    set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}")
+    set(INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${INPUT}")
+
+    if (EXISTS "${INPUT_FILE}")
+      foreach(quality 1 6 9 11)
+        add_test(NAME "${BROTLI_TEST_PREFIX}roundtrip/${INPUT}/${quality}"
+          COMMAND "${CMAKE_COMMAND}"
+            -DBROTLI_WRAPPER=${BROTLI_WRAPPER}
+            -DBROTLI_WRAPPER_LD_PREFIX=${BROTLI_WRAPPER_LD_PREFIX}
+            -DBROTLI_CLI=$<TARGET_FILE:brotli>
+            -DQUALITY=${quality}
+            -DINPUT=${INPUT_FILE}
+            -DOUTPUT=${OUTPUT_FILE}.${quality}
+            -P ${CMAKE_CURRENT_SOURCE_DIR}/tests/run-roundtrip-test.cmake)
+      endforeach()
+    else()
+      message(WARNING "Test file ${INPUT} does not exist.")
+    endif()
+  endforeach()
+
+  file(GLOB_RECURSE
+    COMPATIBILITY_INPUTS
+    RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    tests/testdata/*.compressed*)
+
+  foreach(INPUT ${COMPATIBILITY_INPUTS})
+    add_test(NAME "${BROTLI_TEST_PREFIX}compatibility/${INPUT}"
+      COMMAND "${CMAKE_COMMAND}"
+        -DBROTLI_WRAPPER=${BROTLI_WRAPPER}
+        -DBROTLI_WRAPPER_LD_PREFIX=${BROTLI_WRAPPER_LD_PREFIX}
+        -DBROTLI_CLI=$<TARGET_FILE:brotli>
+        -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/${INPUT}
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/tests/run-compatibility-test.cmake)
+  endforeach()
+endif()
+
+# Generate a pkg-config files
+
+function(generate_pkg_config_path outvar path)
+  string(LENGTH "${path}" path_length)
+
+  set(path_args ${ARGV})
+  list(REMOVE_AT path_args 0 1)
+  list(LENGTH path_args path_args_remaining)
+
+  set("${outvar}" "${path}")
+
+  while(path_args_remaining GREATER 1)
+    list(GET path_args 0 name)
+    list(GET path_args 1 value)
+
+    get_filename_component(value_full "${value}" ABSOLUTE)
+    string(LENGTH "${value}" value_length)
+
+    if(path_length EQUAL value_length AND path STREQUAL value)
+      set("${outvar}" "\${${name}}")
+      break()
+    elseif(path_length GREATER value_length)
+      # We might be in a subdirectory of the value, but we have to be
+      # careful about a prefix matching but not being a subdirectory
+      # (for example, /usr/lib64 is not a subdirectory of /usr/lib).
+      # We'll do this by making sure the next character is a directory
+      # separator.
+      string(SUBSTRING "${path}" ${value_length} 1 sep)
+      if(sep STREQUAL "/")
+        string(SUBSTRING "${path}" 0 ${value_length} s)
+        if(s STREQUAL value)
+          string(SUBSTRING "${path}" "${value_length}" -1 suffix)
+          set("${outvar}" "\${${name}}${suffix}")
+          break()
+        endif()
+      endif()
+    endif()
+
+    list(REMOVE_AT path_args 0 1)
+    list(LENGTH path_args path_args_remaining)
+  endwhile()
+
+  set("${outvar}" "${${outvar}}" PARENT_SCOPE)
+endfunction(generate_pkg_config_path)
+
+function(transform_pc_file INPUT_FILE OUTPUT_FILE VERSION)
+  file(READ ${INPUT_FILE} TEXT)
+
+  set(PREFIX "${CMAKE_INSTALL_PREFIX}")
+  string(REGEX REPLACE "@prefix@" "${PREFIX}" TEXT ${TEXT})
+  string(REGEX REPLACE "@exec_prefix@" "${PREFIX}" TEXT ${TEXT})
+
+  generate_pkg_config_path(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}" prefix "${PREFIX}")
+  string(REGEX REPLACE "@libdir@" "${LIBDIR}" TEXT ${TEXT})
+
+  generate_pkg_config_path(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}" prefix "${PREFIX}")
+  string(REGEX REPLACE "@includedir@" "${INCLUDEDIR}" TEXT ${TEXT})
+
+  string(REGEX REPLACE "@PACKAGE_VERSION@" "${VERSION}" TEXT ${TEXT})
+
+  file(WRITE ${OUTPUT_FILE} ${TEXT})
+endfunction()
+
+transform_pc_file("scripts/libbrotlicommon.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlicommon.pc" "${BROTLI_VERSION}")
+
+transform_pc_file("scripts/libbrotlidec.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlidec.pc" "${BROTLI_VERSION}")
+
+transform_pc_file("scripts/libbrotlienc.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlienc.pc" "${BROTLI_VERSION}")
+
+if(NOT BROTLI_BUNDLED_MODE)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlicommon.pc"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlidec.pc"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlienc.pc"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+endif()  # BROTLI_BUNDLED_MODE
+
+if (ENABLE_COVERAGE STREQUAL "yes")
+  SETUP_TARGET_FOR_COVERAGE(coverage test coverage)
+endif ()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md b/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md
new file mode 100644
index 0000000000..a00e37d17f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md
@@ -0,0 +1,27 @@
+Want to contribute? Great! First, read this page (including the small print at
+the end).
+
+### Before you contribute
+Before we can use your code, you must sign the
+[Google Individual Contributor License Agreement]
+(https://cla.developers.google.com/about/google-individual)
+(CLA), which you can do online. The CLA is necessary mainly because you own the
+copyright to your changes, even after your contribution becomes part of our
+codebase, so we need your permission to use and distribute your code. We also
+need to be sure of various other things—for instance that you'll tell us if you
+know that your code infringes on other people's patents. You don't have to sign
+the CLA until after you've submitted your code for review and a member has
+approved it, but you must do it before we can put your code into our codebase.
+Before you start working on a larger contribution, you should get in touch with
+us first through the issue tracker with your idea so that we can help out and
+possibly guide you. Coordinating up front makes it much easier to avoid
+frustration later on.
+
+### Code reviews
+All submissions, including submissions by project members, require review. We
+use Github pull requests for this purpose.
+
+### The small print
+Contributions made by corporations are covered by a different agreement than
+the one above, the [Software Grant and Corporate Contributor License Agreement]
+(https://cla.developers.google.com/about/google-corporate).
diff --git a/third-party/libjxl/libjxl/third_party/brotli/LICENSE b/third-party/libjxl/libjxl/third_party/brotli/LICENSE
new file mode 100644
index 0000000000..33b7cdd2db
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in b/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in
new file mode 100644
index 0000000000..ff8d600656
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in
@@ -0,0 +1,19 @@
+include CONTRIBUTING.md
+include c/common/*.c
+include c/common/*.h
+include c/dec/*.c
+include c/dec/*.h
+include c/enc/*.c
+include c/enc/*.h
+include c/include/brotli/*.h
+include LICENSE
+include MANIFEST.in
+include python/_brotli.cc
+include python/bro.py
+include python/brotli.py
+include python/README.md
+include python/tests/*
+include README.md
+include setup.py
+include tests/testdata/*
+include c/tools/brotli.c
diff --git a/third-party/libjxl/libjxl/third_party/brotli/Makefile b/third-party/libjxl/libjxl/third_party/brotli/Makefile
new file mode 100644
index 0000000000..4890940907
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/Makefile
@@ -0,0 +1,55 @@
+OS := $(shell uname)
+LIBSOURCES = $(wildcard c/common/*.c) $(wildcard c/dec/*.c) \
+             $(wildcard c/enc/*.c)
+SOURCES = $(LIBSOURCES) c/tools/brotli.c
+BINDIR = bin
+OBJDIR = $(BINDIR)/obj
+LIBOBJECTS = $(addprefix $(OBJDIR)/, $(LIBSOURCES:.c=.o))
+OBJECTS = $(addprefix $(OBJDIR)/, $(SOURCES:.c=.o))
+LIB_A = libbrotli.a
+EXECUTABLE = brotli
+DIRS = $(OBJDIR)/c/common $(OBJDIR)/c/dec $(OBJDIR)/c/enc \
+       $(OBJDIR)/c/tools $(BINDIR)/tmp
+CFLAGS += -O2
+ifeq ($(os), Darwin)
+  CPPFLAGS += -DOS_MACOSX
+endif
+
+ifneq ($(strip $(CROSS_COMPILE)), )
+	CC=$(CROSS_COMPILE)-gcc
+	ARCH=$(firstword $(subst -, ,$(CROSS_COMPILE)))
+	BROTLI_WRAPPER="qemu-$(ARCH) -L /usr/$(CROSS_COMPILE)"
+endif
+
+# The arm-linux-gnueabi compiler defaults to Armv5. Since we only support Armv7
+# and beyond, we need to select Armv7 explicitly with march.
+ifeq ($(ARCH), arm)
+	CFLAGS += -march=armv7-a -mfloat-abi=hard -mfpu=neon
+endif
+
+all: test
+	@:
+
+.PHONY: all clean test
+
+$(DIRS):
+	mkdir -p $@
+
+$(EXECUTABLE): $(OBJECTS)
+	$(CC) $(LDFLAGS) $(OBJECTS) -lm -o $(BINDIR)/$(EXECUTABLE)
+
+lib: $(LIBOBJECTS)
+	rm -f $(LIB_A)
+	ar -crs $(LIB_A) $(LIBOBJECTS)
+
+test: $(EXECUTABLE)
+	tests/compatibility_test.sh $(BROTLI_WRAPPER)
+	tests/roundtrip_test.sh $(BROTLI_WRAPPER)
+
+clean:
+	rm -rf $(BINDIR) $(LIB_A)
+
+.SECONDEXPANSION:
+$(OBJECTS): $$(patsubst %.o,%.c,$$(patsubst $$(OBJDIR)/%,%,$$@)) | $(DIRS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) -Ic/include \
+        -c $(patsubst %.o,%.c,$(patsubst $(OBJDIR)/%,%,$@)) -o $@
diff --git a/third-party/libjxl/libjxl/third_party/brotli/Makefile.am b/third-party/libjxl/libjxl/third_party/brotli/Makefile.am
new file mode 100644
index 0000000000..ace7a8506e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/Makefile.am
@@ -0,0 +1,38 @@
+AUTOMAKE_OPTIONS = foreign nostdinc subdir-objects
+
+ACLOCAL_AMFLAGS = -I m4
+
+# Actual ABI version is substituted by bootstrap
+LIBBROTLI_VERSION_INFO = -version-info 0:0:0
+
+bin_PROGRAMS = brotli
+lib_LTLIBRARIES = libbrotlicommon.la libbrotlidec.la libbrotlienc.la
+
+include scripts/sources.lst
+
+brotliincludedir = $(includedir)/brotli
+brotliinclude_HEADERS = $(BROTLI_INCLUDE)
+
+AM_CFLAGS = -I$(top_srcdir)/c/include
+
+brotli_SOURCES = $(BROTLI_CLI_C)
+brotli_LDADD = libbrotlidec.la libbrotlienc.la libbrotlicommon.la -lm
+#brotli_LDFLAGS = -static
+
+libbrotlicommon_la_SOURCES = $(BROTLI_COMMON_C) $(BROTLI_COMMON_H)
+libbrotlicommon_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
+libbrotlidec_la_SOURCES = $(BROTLI_DEC_C) $(BROTLI_DEC_H)
+libbrotlidec_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
+libbrotlidec_la_LIBADD = libbrotlicommon.la -lm
+libbrotlienc_la_SOURCES = $(BROTLI_ENC_C) $(BROTLI_ENC_H)
+libbrotlienc_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
+libbrotlienc_la_LIBADD = libbrotlicommon.la -lm
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = \
+  scripts/libbrotlicommon.pc \
+  scripts/libbrotlidec.pc \
+  scripts/libbrotlienc.pc
+pkgincludedir= $(brotliincludedir)
+
+dist_doc_DATA = README
diff --git a/third-party/libjxl/libjxl/third_party/brotli/README b/third-party/libjxl/libjxl/third_party/brotli/README
new file mode 100644
index 0000000000..dea7291306
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/README
@@ -0,0 +1,15 @@
+BROTLI DATA COMPRESSION LIBRARY
+
+Brotli is a generic-purpose lossless compression algorithm that compresses data
+using a combination of a modern variant of the LZ77 algorithm, Huffman coding
+and 2nd order context modeling, with a compression ratio comparable to the best
+currently available general-purpose compression methods. It is similar in speed
+with deflate but offers more dense compression.
+
+The specification of the Brotli Compressed Data Format is defined in RFC 7932
+https://tools.ietf.org/html/rfc7932
+
+Brotli is open-sourced under the MIT License, see the LICENSE file.
+
+Brotli mailing list:
+https://groups.google.com/forum/#!forum/brotli
diff --git a/third-party/libjxl/libjxl/third_party/brotli/README.md b/third-party/libjxl/libjxl/third_party/brotli/README.md
new file mode 100644
index 0000000000..0f905e3f2c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/README.md
@@ -0,0 +1,110 @@
+<p align="center"><img src="https://brotli.org/brotli.svg" alt="Brotli" width="64"></p>
+
+# SECURITY NOTE
+
+Please consider updating brotli to version 1.0.9 (latest).
+
+Version 1.0.9 contains a fix to "integer overflow" problem. This happens when "one-shot" decoding API is used (or input chunk for streaming API is not limited), input size (chunk size) is larger than 2GiB, and input contains uncompressed blocks. After the overflow happens, `memcpy` is invoked with a gigantic `num` value, that will likely cause the crash.
+
+### Introduction
+
+Brotli is a generic-purpose lossless compression algorithm that compresses data
+using a combination of a modern variant of the LZ77 algorithm, Huffman coding
+and 2nd order context modeling, with a compression ratio comparable to the best
+currently available general-purpose compression methods. It is similar in speed
+with deflate but offers more dense compression.
+
+The specification of the Brotli Compressed Data Format is defined in [RFC 7932](https://tools.ietf.org/html/rfc7932).
+
+Brotli is open-sourced under the MIT License, see the LICENSE file.
+
+> **Please note:** brotli is a "stream" format; it does not contain
+> meta-information, like checksums or uncompresssed data length. It is possible
+> to modify "raw" ranges of the compressed stream and the decoder will not
+> notice that.
+
+Brotli mailing list:
+https://groups.google.com/forum/#!forum/brotli
+
+![GitHub Actions Build Status](https://github.com/google/brotli/actions/workflows/build_test.yml/badge.svg)
+[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/brotli.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#brotli)
+
+### Build instructions
+
+#### Vcpkg
+
+You can download and install brotli using the [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    ./vcpkg install brotli
+
+The brotli port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
+#### Autotools-style CMake
+
+[configure-cmake](https://github.com/nemequ/configure-cmake) is an
+autotools-style configure script for CMake-based projects (not supported on Windows).
+
+The basic commands to build, test and install brotli are:
+
+    $ mkdir out && cd out
+    $ ../configure-cmake
+    $ make
+    $ make test
+    $ make install
+
+By default, debug binaries are built. To generate "release" `Makefile` specify `--disable-debug` option to `configure-cmake`.
+
+#### Bazel
+
+See [Bazel](http://www.bazel.build/)
+
+#### CMake
+
+The basic commands to build and install brotli are:
+
+    $ mkdir out && cd out
+    $ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./installed ..
+    $ cmake --build . --config Release --target install
+
+You can use other [CMake](https://cmake.org/) configuration.
+
+#### Premake5
+
+See [Premake5](https://premake.github.io/)
+
+#### Python
+
+To install the latest release of the Python module, run the following:
+
+    $ pip install brotli
+
+To install the tip-of-the-tree version, run:
+
+    $ pip install --upgrade git+https://github.com/google/brotli
+
+See the [Python readme](python/README.md) for more details on installing
+from source, development, and testing.
+
+### Benchmarks
+* [Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/) / [Unstable Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/unstable/)
+* [Large Text Compression Benchmark](http://mattmahoney.net/dc/text.html)
+* [Lzturbo Benchmark](https://sites.google.com/site/powturbo/home/benchmark)
+
+### Related projects
+> **Disclaimer:** Brotli authors take no responsibility for the third party projects mentioned in this section.
+
+Independent [decoder](https://github.com/madler/brotli) implementation by Mark Adler, based entirely on format specification.
+
+JavaScript port of brotli [decoder](https://github.com/devongovett/brotli.js). Could be used directly via `npm install brotli`
+
+Hand ported [decoder / encoder](https://github.com/dominikhlbg/BrotliHaxe) in haxe by Dominik Homberger. Output source code: JavaScript, PHP, Python, Java and C#
+
+7Zip [plugin](https://github.com/mcmilk/7-Zip-Zstd)
+
+Dart [native bindings](https://github.com/thosakwe/brotli)
+
+Dart compression framework with [fast FFI-based Brotli implementation](https://pub.dev/documentation/es_compression/latest/brotli/brotli-library.html) with ready-to-use prebuilt binaries for Win/Linux/Mac
diff --git a/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md b/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md
new file mode 100644
index 0000000000..c2a44c6666
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md
@@ -0,0 +1,6 @@
+### Reporting
+
+To report a security issue, please use [https://g.co/vulnz](https://g.co/vulnz).
+We use g.co/vulnz for our intake, and do coordination and disclosure here on
+GitHub (including using GitHub Security Advisory). The Google Security Team will
+respond within 5 working days of your report on g.co/vulnz.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE b/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE
new file mode 100644
index 0000000000..75f376828f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE
@@ -0,0 +1,21 @@
+workspace(name = "org_brotli")
+
+local_repository(
+    name = "ignore_org_brotli_go",
+    path = "go",
+)
+
+local_repository(
+    name = "ignore_org_brotli_java",
+    path = "java",
+)
+
+local_repository(
+    name = "ignore_org_brotli_js",
+    path = "js",
+)
+
+local_repository(
+    name = "ignore_org_brotli_research",
+    path = "research",
+)
diff --git a/third-party/libjxl/libjxl/third_party/brotli/bootstrap b/third-party/libjxl/libjxl/third_party/brotli/bootstrap
new file mode 100644
index 0000000000..1da6d60f78
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/bootstrap
@@ -0,0 +1,35 @@
+#!/bin/sh -e
+
+REQUIRED='is required, but not installed.'
+bc -v >/dev/null 2>&1 || { echo >&2 "'bc' $REQUIRED"; exit 1; }
+[ "x`echo hello | sed s/hello/world/ 2>/dev/null`" = "xworld" ] || { echo >&2 "'sed' $REQUIRED"; exit 1; }
+autoreconf --version >/dev/null 2>&1 || { echo >&2 "'autoconf' $REQUIRED"; exit 1; }
+
+# Determine which flag sed uses for extended regular expressions.
+# -E is POSIX. -r is for GNU sed older than 4.2.
+echo hello | sed -E s/hello/world/ >/dev/null 2>&1 && SED_ERE=-E || SED_ERE=-r
+
+# If libtool is not installed -> "error: Libtool library used but 'LIBTOOL' is undefined"
+
+if [ ! -e "./m4" ]; then
+mkdir m4 2>/dev/null
+fi
+
+BROTLI_ABI_HEX=`sed -n 's/#define BROTLI_ABI_VERSION 0x//p' c/common/version.h`
+BROTLI_ABI_INT=`echo "ibase=16;$BROTLI_ABI_HEX" | bc`
+BROTLI_ABI_CURRENT=`echo "scale=0;$BROTLI_ABI_INT / 16777216" | bc`
+BROTLI_ABI_REVISION=`echo "scale=0;$BROTLI_ABI_INT / 4096 % 4096" | bc`
+BROTLI_ABI_AGE=`echo "scale=0;$BROTLI_ABI_INT % 4096" | bc`
+BROTLI_ABI_INFO="$BROTLI_ABI_CURRENT:$BROTLI_ABI_REVISION:$BROTLI_ABI_AGE"
+
+BROTLI_VERSION_HEX=`sed -n 's/#define BROTLI_VERSION 0x//p' c/common/version.h`
+BROTLI_VERSION_INT=`echo "ibase=16;$BROTLI_VERSION_HEX" | bc`
+BROTLI_VERSION_MAJOR=`echo "scale=0;$BROTLI_VERSION_INT / 16777216" | bc`
+BROTLI_VERSION_MINOR=`echo "scale=0;$BROTLI_VERSION_INT / 4096 % 4096" | bc`
+BROTLI_VERSION_PATCH=`echo "scale=0;$BROTLI_VERSION_INT % 4096" | bc`
+BROTLI_VERSION="$BROTLI_VERSION_MAJOR.$BROTLI_VERSION_MINOR.$BROTLI_VERSION_PATCH"
+
+sed -i.bak "$SED_ERE" "s/[0-9]+:[0-9]+:[0-9]+/$BROTLI_ABI_INFO/" Makefile.am
+sed -i.bak "$SED_ERE" "s/\[[0-9]+\.[0-9]+\.[0-9]+\]/[$BROTLI_VERSION]/" configure.ac
+
+autoreconf --install --force --symlink || exit $?
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c
new file mode 100644
index 0000000000..89866b1505
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c
@@ -0,0 +1,15 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "constants.h"
+
+const BrotliPrefixCodeRange
+    _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+        {1, 2},     {5, 2},     {9, 2},   {13, 2},    {17, 3},    {25, 3},
+        {33, 3},    {41, 3},    {49, 4},  {65, 4},    {81, 4},    {97, 4},
+        {113, 5},   {145, 5},   {177, 5}, {209, 5},   {241, 6},   {305, 6},
+        {369, 7},   {497, 8},   {753, 9}, {1265, 10}, {2289, 11}, {4337, 12},
+        {8433, 13}, {16625, 24}};
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h
new file mode 100644
index 0000000000..31e5bd376e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h
@@ -0,0 +1,201 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * Common constants used in decoder and encoder API.
+ */
+
+#ifndef BROTLI_COMMON_CONSTANTS_H_
+#define BROTLI_COMMON_CONSTANTS_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#include "platform.h"
+
+/* Specification: 7.3. Encoding of the context map */
+#define BROTLI_CONTEXT_MAP_MAX_RLE 16
+
+/* Specification: 2. Compressed representation overview */
+#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
+
+/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
+#define BROTLI_NUM_LITERAL_SYMBOLS 256
+#define BROTLI_NUM_COMMAND_SYMBOLS 704
+#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
+#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
+                                        BROTLI_CONTEXT_MAP_MAX_RLE)
+#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
+
+/* Specification: 3.5. Complex prefix codes */
+#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
+#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
+#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
+/* "code length of 8 is repeated" */
+#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
+
+/* "Large Window Brotli" */
+
+/**
+ * The theoretical maximum number of distance bits specified for large window
+ * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
+ * encoders and decoders only support up to 30 max distance bits, the value is
+ * set to 62 because it affects the large window brotli file format.
+ * Specifically, it affects the encoding of simple huffman tree for distances,
+ * see Specification RFC 7932 chapter 3.4.
+ */
+#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
+#define BROTLI_LARGE_MIN_WBITS 10
+/**
+ * The maximum supported large brotli window bits by the encoder and decoder.
+ * Large window brotli allows up to 62 bits, however the current encoder and
+ * decoder, designed for 32-bit integers, only support up to 30 bits maximum.
+ */
+#define BROTLI_LARGE_MAX_WBITS 30
+
+/* Specification: 4. Encoding of distances */
+#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
+/**
+ * Maximal number of "postfix" bits.
+ *
+ * Number of "postfix" bits is stored as 2 bits in meta-block header.
+ */
+#define BROTLI_MAX_NPOSTFIX 3
+#define BROTLI_MAX_NDIRECT 120
+#define BROTLI_MAX_DISTANCE_BITS 24U
+#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
+    BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) +                    \
+    ((MAXNBITS) << ((NPOSTFIX) + 1)))
+/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
+#define BROTLI_NUM_DISTANCE_SYMBOLS \
+    BROTLI_DISTANCE_ALPHABET_SIZE(  \
+        BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
+
+/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
+   brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
+   NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
+#define BROTLI_MAX_DISTANCE 0x3FFFFFC
+
+/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
+   allows safe distance calculation without overflows, given the distance
+   alphabet size is limited to corresponding size
+   (see kLargeWindowDistanceCodeLimits). */
+#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
+
+
+/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
+#define BROTLI_NUM_INS_COPY_CODES 24
+
+/* 7.1. Context modes and context ID lookup for literals */
+/* "context IDs for literals are in the range of 0..63" */
+#define BROTLI_LITERAL_CONTEXT_BITS 6
+
+/* 7.2. Context ID for distances */
+#define BROTLI_DISTANCE_CONTEXT_BITS 2
+
+/* 9.1. Format of the Stream Header */
+/* Number of slack bytes for window size. Don't confuse
+   with BROTLI_NUM_DISTANCE_SHORT_CODES. */
+#define BROTLI_WINDOW_GAP 16
+#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
+
+typedef struct BrotliDistanceCodeLimit {
+  uint32_t max_alphabet_size;
+  uint32_t max_distance;
+} BrotliDistanceCodeLimit;
+
+/* This function calculates maximal size of distance alphabet, such that the
+   distances greater than the given values can not be represented.
+
+   This limits are designed to support fast and safe 32-bit decoders.
+   "32-bit" means that signed integer values up to ((1 << 31) - 1) could be
+   safely expressed.
+
+   Brotli distance alphabet symbols do not represent consecutive distance
+   ranges. Each distance alphabet symbol (excluding direct distances and short
+   codes), represent interleaved (for NPOSTFIX > 0) range of distances.
+   A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
+   range. Two consecutive groups require the same amount of "extra bits".
+
+   It is important that distance alphabet represents complete "groups".
+   To avoid complex logic on encoder side about interleaved ranges
+   it was decided to restrict both sides to complete distance code "groups".
+ */
+BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
+    uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
+  BrotliDistanceCodeLimit result;
+  /* Marking this function as unused, because not all files
+     including "constants.h" use it -> compiler warns about that. */
+  BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
+  if (max_distance <= ndirect) {
+    /* This case never happens / exists only for the sake of completeness. */
+    result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
+    result.max_distance = max_distance;
+    return result;
+  } else {
+    /* The first prohibited value. */
+    uint32_t forbidden_distance = max_distance + 1;
+    /* Subtract "directly" encoded region. */
+    uint32_t offset = forbidden_distance - ndirect - 1;
+    uint32_t ndistbits = 0;
+    uint32_t tmp;
+    uint32_t half;
+    uint32_t group;
+    /* Postfix for the last dcode in the group. */
+    uint32_t postfix = (1u << npostfix) - 1;
+    uint32_t extra;
+    uint32_t start;
+    /* Remove postfix and "head-start". */
+    offset = (offset >> npostfix) + 4;
+    /* Calculate the number of distance bits. */
+    tmp = offset / 2;
+    /* Poor-man's log2floor, to avoid extra dependencies. */
+    while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
+    /* One bit is covered with subrange addressing ("half"). */
+    ndistbits--;
+    /* Find subrange. */
+    half = (offset >> ndistbits) & 1;
+    /* Calculate the "group" part of dcode. */
+    group = ((ndistbits - 1) << 1) | half;
+    /* Calculated "group" covers the prohibited distance value. */
+    if (group == 0) {
+      /* This case is added for correctness; does not occur for limit > 128. */
+      result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
+      result.max_distance = ndirect;
+      return result;
+    }
+    /* Decrement "group", so it is the last permitted "group". */
+    group--;
+    /* After group was decremented, ndistbits and half must be recalculated. */
+    ndistbits = (group >> 1) + 1;
+    /* The last available distance in the subrange has all extra bits set. */
+    extra = (1u << ndistbits) - 1;
+    /* Calculate region start. NB: ndistbits >= 1. */
+    start = (1u << (ndistbits + 1)) - 4;
+    /* Move to subregion. */
+    start += (group & 1) << ndistbits;
+    /* Calculate the alphabet size. */
+    result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
+        BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
+    /* Calculate the maximal distance representable by alphabet. */
+    result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
+    return result;
+  }
+}
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+typedef struct {
+  uint16_t offset;
+  uint8_t nbits;
+} BrotliPrefixCodeRange;
+
+/* "Soft-private", it is exported, but not "advertised" as API. */
+BROTLI_COMMON_API extern const BrotliPrefixCodeRange
+    _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+
+#endif  /* BROTLI_COMMON_CONSTANTS_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c
new file mode 100644
index 0000000000..7f9c958699
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c
@@ -0,0 +1,156 @@
+#include "context.h"
+
+#include <brotli/types.h>
+
+/* Common context lookup table for all context modes. */
+const uint8_t _kBrotliContextLookupTable[2048] = {
+  /* CONTEXT_LSB6, last byte. */
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+
+  /* CONTEXT_LSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_MSB6, last byte. */
+   0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+   4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+   8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+  12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+  16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+  20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+  24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+  28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+  32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+  36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+  40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+  44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+  48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+  52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+  56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+  60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+
+  /* CONTEXT_MSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_UTF8, last byte. */
+  /* ASCII range. */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  /* UTF8 continuation byte range. */
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  /* UTF8 lead byte range. */
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+
+  /* CONTEXT_UTF8 second last byte. */
+  /* ASCII range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  /* UTF8 continuation byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* UTF8 lead byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+  /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
+   0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
+
+  /* CONTEXT_SIGNED, second last byte. */
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h
new file mode 100644
index 0000000000..685a279dc0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h
@@ -0,0 +1,113 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table to map the previous two bytes to a context id.
+
+  There are four different context modeling modes defined here:
+    CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+    CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+    CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+    CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+  If |p1| and |p2| are the previous two bytes, and |mode| is current context
+  mode, we calculate the context as:
+
+    context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
+
+  For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
+  (i.e. < 128), this will be equivalent to
+
+    context = 4 * context1(p1) + context2(p2),
+
+  where context1 is based on the previous byte in the following way:
+
+    0  : non-ASCII control
+    1  : \t, \n, \r
+    2  : space
+    3  : other punctuation
+    4  : " '
+    5  : %
+    6  : ( < [ {
+    7  : ) > ] }
+    8  : , ; :
+    9  : .
+    10 : =
+    11 : number
+    12 : upper-case vowel
+    13 : upper-case consonant
+    14 : lower-case vowel
+    15 : lower-case consonant
+
+  and context2 is based on the second last byte:
+
+    0 : control, space
+    1 : punctuation
+    2 : upper-case letter, number
+    3 : lower-case letter
+
+  If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+  stream it will be a continuation byte, value between 128 and 191), the
+  context is the same as if the second last byte was an ASCII control or space.
+
+  If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+  be a continuation byte and the context id is 2 or 3 depending on the LSB of
+  the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+  If the last byte is a UTF8 continuation byte, the second last byte can be:
+    - continuation byte: the next byte is probably ASCII or lead byte (assuming
+      4-byte UTF8 characters are rare) and the context id is 0 or 1.
+    - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+    - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+  The possible value combinations of the previous two bytes, the range of
+  context ids and the type of the next byte is summarized in the table below:
+
+  |--------\-----------------------------------------------------------------|
+  |         \                         Last byte                              |
+  | Second   \---------------------------------------------------------------|
+  | last byte \    ASCII            |   cont. byte        |   lead byte      |
+  |            \   (0-127)          |   (128-191)         |   (192-)         |
+  |=============|===================|=====================|==================|
+  |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+  |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+  |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+  |  (192-207)  |                   |  context: 0 - 1     |                  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: cont.        |  not valid       |
+  |  (208-)     |                   |  context: 2 - 3     |                  |
+  |-------------|-------------------|---------------------|------------------|
+*/
+
+#ifndef BROTLI_COMMON_CONTEXT_H_
+#define BROTLI_COMMON_CONTEXT_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+typedef enum ContextType {
+  CONTEXT_LSB6 = 0,
+  CONTEXT_MSB6 = 1,
+  CONTEXT_UTF8 = 2,
+  CONTEXT_SIGNED = 3
+} ContextType;
+
+/* "Soft-private", it is exported, but not "advertised" as API. */
+/* Common context lookup table for all context modes. */
+BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048];
+
+typedef const uint8_t* ContextLut;
+
+/* typeof(MODE) == ContextType; returns ContextLut */
+#define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9])
+
+/* typeof(LUT) == ContextLut */
+#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
+
+#endif  /* BROTLI_COMMON_CONTEXT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin
new file mode 100644
index 0000000000..a585c0e292
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin
@@ -0,0 +1,432 @@
+timedownlifeleftbackcodedatashowonlysitecityopenjustlikefreeworktextyearoverbodyloveformbookplaylivelinehelphomesidemorewordlongthemviewfindpagedaysfullheadtermeachareafromtruemarkableuponhighdatelandnewsevennextcasebothpostusedmadehandherewhatnameLinkblogsizebaseheldmakemainuser') +holdendswithNewsreadweresigntakehavegameseencallpathwellplusmenufilmpartjointhislistgoodneedwayswestjobsmindalsologorichuseslastteamarmyfoodkingwilleastwardbestfirePageknowaway.pngmovethanloadgiveselfnotemuchfeedmanyrockicononcelookhidediedHomerulehostajaxinfoclublawslesshalfsomesuchzone100%onescareTimeracebluefourweekfacehopegavehardlostwhenparkkeptpassshiproomHTMLplanTypedonesavekeepflaglinksoldfivetookratetownjumpthusdarkcardfilefearstaykillthatfallautoever.comtalkshopvotedeepmoderestturnbornbandfellroseurl(skinrolecomeactsagesmeetgold.jpgitemvaryfeltthensenddropViewcopy1.0"</a>stopelseliestourpack.gifpastcss?graymean&gt;rideshotlatesaidroadvar feeljohnrickportfast'UA-dead</b>poorbilltypeU.S.woodmust2px;Inforankwidewantwalllead[0];paulwavesure$('#waitmassarmsgoesgainlangpaid!-- lockunitrootwalkfirmwifexml"songtest20pxkindrowstoolfontmailsafestarmapscorerainflowbabyspansays4px;6px;artsfootrealwikiheatsteptriporg/lakeweaktoldFormcastfansbankveryrunsjulytask1px;goalgrewslowedgeid="sets5px;.js?40pxif (soonseatnonetubezerosentreedfactintogiftharm18pxcamehillboldzoomvoideasyringfillpeakinitcost3px;jacktagsbitsrolleditknewnear<!--growJSONdutyNamesaleyou lotspainjazzcoldeyesfishwww.risktabsprev10pxrise25pxBlueding300,ballfordearnwildbox.fairlackverspairjunetechif(!pickevil$("#warmlorddoespull,000ideadrawhugespotfundburnhrefcellkeystickhourlossfuel12pxsuitdealRSS"agedgreyGET"easeaimsgirlaids8px;navygridtips#999warsladycars); }php?helltallwhomzh:�*/
+ 100hall.
+
+A7px;pushchat0px;crew*/</hash75pxflatrare && tellcampontolaidmissskiptentfinemalegetsplot400,
+
+coolfeet.php<br>ericmostguidbelldeschairmathatom/img&#82luckcent000;tinygonehtmlselldrugFREEnodenick?id=losenullvastwindRSS wearrelybeensamedukenasacapewishgulfT23:hitsslotgatekickblurthey15px''););">msiewinsbirdsortbetaseekT18:ordstreemall60pxfarm’sboys[0].');"POSTbearkids);}}marytend(UK)quadzh:�-siz----prop');liftT19:viceandydebt>RSSpoolneckblowT16:doorevalT17:letsfailoralpollnovacolsgene —softrometillross<h3>pourfadepink<tr>mini)|!(minezh:�barshear00);milk -->ironfreddiskwentsoilputs/js/holyT22:ISBNT20:adamsees<h2>json', 'contT21: RSSloopasiamoon</p>soulLINEfortcartT14:<h1>80px!--<9px;T04:mike:46ZniceinchYorkricezh:�'));puremageparatonebond:37Z_of_']);000,zh:�tankyardbowlbush:56ZJava30px
+|}
+%C3%:34ZjeffEXPIcashvisagolfsnowzh:�quer.csssickmeatmin.binddellhirepicsrent:36ZHTTP-201fotowolfEND xbox:54ZBODYdick;
+}
+exit:35Zvarsbeat'});diet999;anne}}</[i].Langkm²wiretoysaddssealalex;
+	}echonine.org005)tonyjewssandlegsroof000) 200winegeardogsbootgarycutstyletemption.xmlcockgang$('.50pxPh.Dmiscalanloandeskmileryanunixdisc);}
+dustclip).
+
+70px-200DVDs7]><tapedemoi++)wageeurophiloptsholeFAQsasin-26TlabspetsURL bulkcook;}
+HEAD[0])abbrjuan(198leshtwin</i>sonyguysfuckpipe|-
+!002)ndow[1];[];
+Log salt
+		bangtrimbath){
+00px
+});ko:�feesad>s:// [];tollplug(){
+{
+ .js'200pdualboat.JPG);
+}quot);
+
+');
+
+}201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990198919881987198619851984198319821981198019791978197719761975197419731972197119701969196819671966196519641963196219611960195919581957195619551954195319521951195010001024139400009999comomásesteestaperotodohacecadaañobiendíaasívidacasootroforosolootracualdijosidograntipotemadebealgoquéestonadatrespococasabajotodasinoaguapuesunosantediceluisellamayozonaamorpisoobraclicellodioshoracasiзанаомрарутанепоотизнодотожеонихНаеебымыВысовывоНообПолиниРФНеМытыОнимдаЗаДаНуОбтеИзейнуммТыужفيأنمامعكلأورديافىهولملكاولهبسالإنهيأيقدهلثمبهلوليبلايبكشيامأمنتبيلنحبهممشوشfirstvideolightworldmediawhitecloseblackrightsmallbooksplacemusicfieldorderpointvalueleveltableboardhousegroupworksyearsstatetodaywaterstartstyledeathpowerphonenighterrorinputabouttermstitletoolseventlocaltimeslargewordsgamesshortspacefocusclearmodelblockguideradiosharewomenagainmoneyimagenamesyounglineslatercolorgreenfront&amp;watchforcepricerulesbeginaftervisitissueareasbelowindextotalhourslabelprintpressbuiltlinksspeedstudytradefoundsenseundershownformsrangeaddedstillmovedtakenaboveflashfixedoftenotherviewschecklegalriveritemsquickshapehumanexistgoingmoviethirdbasicpeacestagewidthloginideaswrotepagesusersdrivestorebreaksouthvoicesitesmonthwherebuildwhichearthforumthreesportpartyClicklowerlivesclasslayerentrystoryusagesoundcourtyour birthpopuptypesapplyImagebeinguppernoteseveryshowsmeansextramatchtrackknownearlybegansuperpapernorthlearngivennamedendedTermspartsGroupbrandusingwomanfalsereadyaudiotakeswhile.com/livedcasesdailychildgreatjudgethoseunitsneverbroadcoastcoverapplefilescyclesceneplansclickwritequeenpieceemailframeolderphotolimitcachecivilscaleenterthemetheretouchboundroyalaskedwholesincestock namefaithheartemptyofferscopeownedmightalbumthinkbloodarraymajortrustcanonunioncountvalidstoneStyleLoginhappyoccurleft:freshquitefilmsgradeneedsurbanfightbasishoverauto;route.htmlmixedfinalYour slidetopicbrownalonedrawnsplitreachRightdatesmarchquotegoodsLinksdoubtasyncthumballowchiefyouthnovel10px;serveuntilhandsCheckSpacequeryjamesequaltwice0,000Startpanelsongsroundeightshiftworthpostsleadsweeksavoidthesemilesplanesmartalphaplantmarksratesplaysclaimsalestextsstarswrong</h3>thing.org/multiheardPowerstandtokensolid(thisbringshipsstafftriedcallsfullyfactsagentThis //-->adminegyptEvent15px;Emailtrue"crossspentblogsbox">notedleavechinasizesguest</h4>robotheavytrue,sevengrandcrimesignsawaredancephase><!--en_US&#39;200px_namelatinenjoyajax.ationsmithU.S. holdspeterindianav">chainscorecomesdoingpriorShare1990sromanlistsjapanfallstrialowneragree</h2>abusealertopera"-//WcardshillsteamsPhototruthclean.php?saintmetallouismeantproofbriefrow">genretrucklooksValueFrame.net/-->
+<try {
+var makescostsplainadultquesttrainlaborhelpscausemagicmotortheir250pxleaststepsCountcouldglasssidesfundshotelawardmouthmovesparisgivesdutchtexasfruitnull,||[];top">
+<!--POST"ocean<br/>floorspeakdepth sizebankscatchchart20px;aligndealswould50px;url="parksmouseMost ...</amongbrainbody none;basedcarrydraftreferpage_home.meterdelaydreamprovejoint</tr>drugs<!-- aprilidealallenexactforthcodeslogicView seemsblankports (200saved_linkgoalsgrantgreekhomesringsrated30px;whoseparse();" Blocklinuxjonespixel');">);if(-leftdavidhorseFocusraiseboxesTrackement</em>bar">.src=toweralt="cablehenry24px;setupitalysharpminortastewantsthis.resetwheelgirls/css/100%;clubsstuffbiblevotes 1000korea});
+bandsqueue= {};80px;cking{
+		aheadclockirishlike ratiostatsForm"yahoo)[0];Aboutfinds</h1>debugtasksURL =cells})();12px;primetellsturns0x600.jpg"spainbeachtaxesmicroangel--></giftssteve-linkbody.});
+	mount (199FAQ</rogerfrankClass28px;feeds<h1><scotttests22px;drink) || lewisshall#039; for lovedwaste00px;ja:�simon<fontreplymeetsuntercheaptightBrand) != dressclipsroomsonkeymobilmain.Name platefunnytreescom/"1.jpgwmodeparamSTARTleft idden, 201);
+}
+form.viruschairtransworstPagesitionpatch<!--
+o-cacfirmstours,000 asiani++){adobe')[0]id=10both;menu .2.mi.png"kevincoachChildbruce2.jpgURL)+.jpg|suitesliceharry120" sweettr>
+name=diegopage swiss-->
+
+#fff;">Log.com"treatsheet) && 14px;sleepntentfiledja:�id="cName"worseshots-box-delta
+&lt;bears:48Z<data-rural</a> spendbakershops= "";php">ction13px;brianhellosize=o=%2F joinmaybe<img img">, fjsimg" ")[0]MTopBType"newlyDanskczechtrailknows</h5>faq">zh-cn10);
+-1");type=bluestrulydavis.js';>
+<!steel you h2>
+form jesus100% menu.
+	
+walesrisksumentddingb-likteachgif" vegasdanskeestishqipsuomisobredesdeentretodospuedeañosestátienehastaotrospartedondenuevohacerformamismomejormundoaquídíassóloayudafechatodastantomenosdatosotrassitiomuchoahoralugarmayorestoshorastenerantesfotosestaspaísnuevasaludforosmedioquienmesespoderchileserávecesdecirjoséestarventagrupohechoellostengoamigocosasnivelgentemismaairesjuliotemashaciafavorjuniolibrepuntobuenoautorabrilbuenatextomarzosaberlistaluegocómoenerojuegoperúhaberestoynuncamujervalorfueralibrogustaigualvotoscasosguíapuedosomosavisousteddebennochebuscafaltaeurosseriedichocursoclavecasasleónplazolargoobrasvistaapoyojuntotratavistocrearcampohemoscincocargopisosordenhacenáreadiscopedrocercapuedapapelmenorútilclarojorgecalleponertardenadiemarcasigueellassiglocochemotosmadreclaserestoniñoquedapasarbancohijosviajepabloéstevienereinodejarfondocanalnorteletracausatomarmanoslunesautosvillavendopesartipostengamarcollevapadreunidovamoszonasambosbandamariaabusomuchasubirriojavivirgradochicaallíjovendichaestantalessalirsuelopesosfinesllamabuscoéstalleganegroplazahumorpagarjuntadobleislasbolsabañohablaluchaÁreadicenjugarnotasvalleallácargadolorabajoestégustomentemariofirmacostofichaplatahogarartesleyesaquelmuseobasespocosmitadcielochicomiedoganarsantoetapadebesplayaredessietecortecoreadudasdeseoviejodeseaaguas&quot;domaincommonstatuseventsmastersystemactionbannerremovescrollupdateglobalmediumfilternumberchangeresultpublicscreenchoosenormaltravelissuessourcetargetspringmodulemobileswitchphotosborderregionitselfsocialactivecolumnrecordfollowtitle>eitherlengthfamilyfriendlayoutauthorcreatereviewsummerserverplayedplayerexpandpolicyformatdoublepointsseriespersonlivingdesignmonthsforcesuniqueweightpeopleenergynaturesearchfigurehavingcustomoffsetletterwindowsubmitrendergroupsuploadhealthmethodvideosschoolfutureshadowdebatevaluesObjectothersrightsleaguechromesimplenoticesharedendingseasonreportonlinesquarebuttonimagesenablemovinglatestwinterFranceperiodstrongrepeatLondondetailformeddemandsecurepassedtoggleplacesdevicestaticcitiesstreamyellowattackstreetflighthiddeninfo">openedusefulvalleycausesleadersecretseconddamagesportsexceptratingsignedthingseffectfieldsstatesofficevisualeditorvolumeReportmuseummoviesparentaccessmostlymother" id="marketgroundchancesurveybeforesymbolmomentspeechmotioninsidematterCenterobjectexistsmiddleEuropegrowthlegacymannerenoughcareeransweroriginportalclientselectrandomclosedtopicscomingfatheroptionsimplyraisedescapechosenchurchdefinereasoncorneroutputmemoryiframepolicemodelsNumberduringoffersstyleskilledlistedcalledsilvermargindeletebetterbrowselimitsGlobalsinglewidgetcenterbudgetnowrapcreditclaimsenginesafetychoicespirit-stylespreadmakingneededrussiapleaseextentScriptbrokenallowschargedividefactormember-basedtheoryconfigaroundworkedhelpedChurchimpactshouldalwayslogo" bottomlist">){var prefixorangeHeader.push(couplegardenbridgelaunchReviewtakingvisionlittledatingButtonbeautythemesforgotSearchanchoralmostloadedChangereturnstringreloadMobileincomesupplySourceordersviewed&nbsp;courseAbout island<html cookiename="amazonmodernadvicein</a>: The dialoghousesBEGIN MexicostartscentreheightaddingIslandassetsEmpireSchooleffortdirectnearlymanualSelect.
+
+Onejoinedmenu">PhilipawardshandleimportOfficeregardskillsnationSportsdegreeweekly (e.g.behinddoctorloggedunited</b></beginsplantsassistartistissued300px|canadaagencyschemeremainBrazilsamplelogo">beyond-scaleacceptservedmarineFootercamera</h1>
+_form"leavesstress" />
+.gif" onloadloaderOxfordsistersurvivlistenfemaleDesignsize="appealtext">levelsthankshigherforcedanimalanyoneAfricaagreedrecentPeople<br />wonderpricesturned|| {};main">inlinesundaywrap">failedcensusminutebeaconquotes150px|estateremoteemail"linkedright;signalformal1.htmlsignupprincefloat:.png" forum.AccesspaperssoundsextendHeightsliderUTF-8"&amp; Before. WithstudioownersmanageprofitjQueryannualparamsboughtfamousgooglelongeri++) {israelsayingdecidehome">headerensurebranchpiecesblock;statedtop"><racingresize--&gt;pacitysexualbureau.jpg" 10,000obtaintitlesamount, Inc.comedymenu" lyricstoday.indeedcounty_logo.FamilylookedMarketlse ifPlayerturkey);var forestgivingerrorsDomain}else{insertBlog</footerlogin.fasteragents<body 10px 0pragmafridayjuniordollarplacedcoversplugin5,000 page">boston.test(avatartested_countforumsschemaindex,filledsharesreaderalert(appearSubmitline">body">
+* TheThoughseeingjerseyNews</verifyexpertinjurywidth=CookieSTART across_imagethreadnativepocketbox">
+System DavidcancertablesprovedApril reallydriveritem">more">boardscolorscampusfirst || [];media.guitarfinishwidth:showedOther .php" assumelayerswilsonstoresreliefswedenCustomeasily your String
+
+Whiltaylorclear:resortfrenchthough") + "<body>buyingbrandsMembername">oppingsector5px;">vspacepostermajor coffeemartinmaturehappen</nav>kansaslink">Images=falsewhile hspace0&amp; 
+
+In  powerPolski-colorjordanBottomStart -count2.htmlnews">01.jpgOnline-rightmillerseniorISBN 00,000 guidesvalue)ectionrepair.xml"  rights.html-blockregExp:hoverwithinvirginphones</tr>using 
+	var >');
+	</td>
+</tr>
+bahasabrasilgalegomagyarpolskisrpskiردو中文简体繁體信息中国我们一个公司管理论坛可以服务时间个人产品自己企业查看工作联系没有网站所有评论中心文章用户首页作者技术问题相关下载搜索使用软件在线主题资料视频回复注册网络收藏内容推荐市场消息空间发布什么好友生活图片发展如果手机新闻最新方式北京提供关于更多这个系统知道游戏广告其他发表安全第一会员进行点击版权电子世界设计免费教育加入活动他们商品博客现在上海如何已经留言详细社区登录本站需要价格支持国际链接国家建设朋友阅读法律位置经济选择这样当前分类排行因为交易最后音乐不能通过行业科技可能设备合作大家社会研究专业全部项目这里还是开始情况电脑文件品牌帮助文化资源大学学习地址浏览投资工程要求怎么时候功能主要目前资讯城市方法电影招聘声明任何健康数据美国汽车介绍但是交流生产所以电话显示一些单位人员分析地图旅游工具学生系列网友帖子密码频道控制地区基本全国网上重要第二喜欢进入友情这些考试发现培训以上政府成为环境香港同时娱乐发送一定开发作品标准欢迎解决地方一下以及责任或者客户代表积分女人数码销售出现离线应用列表不同编辑统计查询不要有关机构很多播放组织政策直接能力来源時間看到热门关键专区非常英语百度希望美女比较知识规定建议部门意见精彩日本提高发言方面基金处理权限影片银行还有分享物品经营添加专家这种话题起来业务公告记录简介质量男人影响引用报告部分快速咨询时尚注意申请学校应该历史只是返回购买名称为了成功说明供应孩子专题程序一般會員只有其它保护而且今天窗口动态状态特别认为必须更新小说我們作为媒体包括那么一样国内是否根据电视学院具有过程由于人才出来不过正在明星故事关系标题商务输入一直基础教学了解建筑结果全球通知计划对于艺术相册发生真的建立等级类型经验实现制作来自标签以下原创无法其中個人一切指南关闭集团第三关注因此照片深圳商业广州日期高级最近综合表示专辑行为交通评价觉得精华家庭完成感觉安装得到邮件制度食品虽然转载报价记者方案行政人民用品东西提出酒店然后付款热点以前完全发帖设置领导工业医院看看经典原因平台各种增加材料新增之后职业效果今年论文我国告诉版主修改参与打印快乐机械观点存在精神获得利用继续你们这么模式语言能够雅虎操作风格一起科学体育短信条件治疗运动产业会议导航先生联盟可是問題结构作用调查資料自动负责农业访问实施接受讨论那个反馈加强女性范围服務休闲今日客服觀看参加的话一点保证图书有效测试移动才能决定股票不断需求不得办法之间采用营销投诉目标爱情摄影有些複製文学机会数字装修购物农村全面精品其实事情水平提示上市谢谢普通教师上传类别歌曲拥有创新配件只要时代資訊达到人生订阅老师展示心理贴子網站主題自然级别简单改革那些来说打开代码删除证券节目重点次數多少规划资金找到以后大全主页最佳回答天下保障现代检查投票小时沒有正常甚至代理目录公开复制金融幸福版本形成准备行情回到思想怎样协议认证最好产生按照服装广东动漫采购新手组图面板参考政治容易天地努力人们升级速度人物调整流行造成文字韩国贸易开展相關表现影视如此美容大小报道条款心情许多法规家居书店连接立即举报技巧奥运登入以来理论事件自由中华办公妈妈真正不错全文合同价值别人监督具体世纪团队创业承担增长有人保持商家维修台湾左右股份答案实际电信经理生命宣传任务正式特色下来协会只能当然重新內容指导运行日志賣家超过土地浙江支付推出站长杭州执行制造之一推广现场描述变化传统歌手保险课程医疗经过过去之前收入年度杂志美丽最高登陆未来加工免责教程版块身体重庆出售成本形式土豆出價东方邮箱南京求职取得职位相信页面分钟网页确定图例网址积极错误目的宝贝机关风险授权病毒宠物除了評論疾病及时求购站点儿童每天中央认识每个天津字体台灣维护本页个性官方常见相机战略应当律师方便校园股市房屋栏目员工导致突然道具本网结合档案劳动另外美元引起改变第四会计說明隐私宝宝规范消费共同忘记体系带来名字發表开放加盟受到二手大量成人数量共享区域女孩原则所在结束通信超级配置当时优秀性感房产遊戲出口提交就业保健程度参数事业整个山东情感特殊分類搜尋属于门户财务声音及其财经坚持干部成立利益考虑成都包装用戶比赛文明招商完整真是眼睛伙伴威望领域卫生优惠論壇公共良好充分符合附件特点不可英文资产根本明显密碼公众民族更加享受同学启动适合原来问答本文美食绿色稳定终于生物供求搜狐力量严重永远写真有限竞争对象费用不好绝对十分促进点评影音优势不少欣赏并且有点方向全新信用设施形象资格突破随着重大于是毕业智能化工完美商城统一出版打造產品概况用于保留因素中國存储贴图最愛长期口价理财基地安排武汉里面创建天空首先完善驱动下面不再诚信意义阳光英国漂亮军事玩家群众农民即可名稱家具动画想到注明小学性能考研硬件观看清楚搞笑首頁黄金适用江苏真实主管阶段註冊翻译权利做好似乎通讯施工狀態也许环保培养概念大型机票理解匿名cuandoenviarmadridbuscariniciotiempoporquecuentaestadopuedenjuegoscontraestánnombretienenperfilmaneraamigosciudadcentroaunquepuedesdentroprimerpreciosegúnbuenosvolverpuntossemanahabíaagostonuevosunidoscarlosequiponiñosmuchosalgunacorreoimagenpartirarribamaríahombreempleoverdadcambiomuchasfueronpasadolíneaparecenuevascursosestabaquierolibroscuantoaccesomiguelvarioscuatrotienesgruposseráneuropamediosfrenteacercademásofertacochesmodeloitalialetrasalgúncompracualesexistecuerposiendoprensallegarviajesdineromurciapodrápuestodiariopuebloquieremanuelpropiocrisisciertoseguromuertefuentecerrargrandeefectopartesmedidapropiaofrecetierrae-mailvariasformasfuturoobjetoseguirriesgonormasmismosúnicocaminositiosrazóndebidopruebatoledoteníajesúsesperococinaorigentiendacientocádizhablarseríalatinafuerzaestiloguerraentraréxitolópezagendavídeoevitarpaginametrosjavierpadresfácilcabezaáreassalidaenvíojapónabusosbienestextosllevarpuedanfuertecomúnclaseshumanotenidobilbaounidadestáseditarcreadoдлячтокакилиэтовсеегопритакещеужеКакбезбылониВсеподЭтотомчемнетлетразонагдемнеДляПринаснихтемктогодвоттамСШАмаяЧтовасвамемуТакдванамэтиэтуВамтехпротутнаддняВоттринейВаснимсамтотрубОнимирнееОООлицэтаОнанемдоммойдвеоносудकेहैकीसेकाकोऔरपरनेएककिभीइसकरतोहोआपहीयहयातकथाjagranआजजोअबदोगईजागएहमइनवहयेथेथीघरजबदीकईजीवेनईनएहरउसमेकमवोलेसबमईदेओरआमबसभरबनचलमनआगसीलीعلىإلىهذاآخرعددالىهذهصورغيركانولابينعرضذلكهنايومقالعليانالكنحتىقبلوحةاخرفقطعبدركنإذاكمااحدإلافيهبعضكيفبحثومنوهوأناجدالهاسلمعندليسعبرصلىمنذبهاأنهمثلكنتالاحيثمصرشرححولوفياذالكلمرةانتالفأبوخاصأنتانهاليعضووقدابنخيربنتلكمشاءوهيابوقصصومارقمأحدنحنعدمرأياحةكتبدونيجبمنهتحتجهةسنةيتمكرةغزةنفسبيتللهلناتلكقلبلماعنهأولشيءنورأمافيكبكلذاترتببأنهمسانكبيعفقدحسنلهمشعرأهلشهرقطرطلبprofileservicedefaulthimselfdetailscontentsupportstartedmessagesuccessfashion<title>countryaccountcreatedstoriesresultsrunningprocesswritingobjectsvisiblewelcomearticleunknownnetworkcompanydynamicbrowserprivacyproblemServicerespectdisplayrequestreservewebsitehistoryfriendsoptionsworkingversionmillionchannelwindow.addressvisitedweathercorrectproductedirectforwardyou canremovedsubjectcontrolarchivecurrentreadinglibrarylimitedmanagerfurthersummarymachineminutesprivatecontextprogramsocietynumberswrittenenabledtriggersourcesloadingelementpartnerfinallyperfectmeaningsystemskeepingculture&quot;,journalprojectsurfaces&quot;expiresreviewsbalanceEnglishContentthroughPlease opinioncontactaverageprimaryvillageSpanishgallerydeclinemeetingmissionpopularqualitymeasuregeneralspeciessessionsectionwriterscounterinitialreportsfiguresmembersholdingdisputeearlierexpressdigitalpictureAnothermarriedtrafficleadingchangedcentralvictoryimages/reasonsstudiesfeaturelistingmust beschoolsVersionusuallyepisodeplayinggrowingobviousoverlaypresentactions</ul>
+wrapperalreadycertainrealitystorageanotherdesktopofferedpatternunusualDigitalcapitalWebsitefailureconnectreducedAndroiddecadesregular &amp; animalsreleaseAutomatgettingmethodsnothingPopularcaptionletterscapturesciencelicensechangesEngland=1&amp;History = new CentralupdatedSpecialNetworkrequirecommentwarningCollegetoolbarremainsbecauseelectedDeutschfinanceworkersquicklybetweenexactlysettingdiseaseSocietyweaponsexhibit&lt;!--Controlclassescoveredoutlineattacksdevices(windowpurposetitle="Mobile killingshowingItaliandroppedheavilyeffects-1']);
+confirmCurrentadvancesharingopeningdrawingbillionorderedGermanyrelated</form>includewhetherdefinedSciencecatalogArticlebuttonslargestuniformjourneysidebarChicagoholidayGeneralpassage,&quot;animatefeelingarrivedpassingnaturalroughly.
+
+The but notdensityBritainChineselack oftributeIreland" data-factorsreceivethat isLibraryhusbandin factaffairsCharlesradicalbroughtfindinglanding:lang="return leadersplannedpremiumpackageAmericaEdition]&quot;Messageneed tovalue="complexlookingstationbelievesmaller-mobilerecordswant tokind ofFirefoxyou aresimilarstudiedmaximumheadingrapidlyclimatekingdomemergedamountsfoundedpioneerformuladynastyhow to SupportrevenueeconomyResultsbrothersoldierlargelycalling.&quot;AccountEdward segmentRobert effortsPacificlearnedup withheight:we haveAngelesnations_searchappliedacquiremassivegranted: falsetreatedbiggestbenefitdrivingStudiesminimumperhapsmorningsellingis usedreversevariant role="missingachievepromotestudentsomeoneextremerestorebottom:evolvedall thesitemapenglishway to  AugustsymbolsCompanymattersmusicalagainstserving})();
+paymenttroubleconceptcompareparentsplayersregionsmonitor ''The winningexploreadaptedGalleryproduceabilityenhancecareers). The collectSearch ancientexistedfooter handlerprintedconsoleEasternexportswindowsChannelillegalneutralsuggest_headersigning.html">settledwesterncausing-webkitclaimedJusticechaptervictimsThomas mozillapromisepartieseditionoutside:false,hundredOlympic_buttonauthorsreachedchronicdemandssecondsprotectadoptedprepareneithergreatlygreateroverallimprovecommandspecialsearch.worshipfundingthoughthighestinsteadutilityquarterCulturetestingclearlyexposedBrowserliberal} catchProjectexamplehide();FloridaanswersallowedEmperordefenseseriousfreedomSeveral-buttonFurtherout of != nulltrainedDenmarkvoid(0)/all.jspreventRequestStephen
+
+When observe</h2>
+Modern provide" alt="borders.
+
+For 
+
+Many artistspoweredperformfictiontype ofmedicalticketsopposedCouncilwitnessjusticeGeorge Belgium...</a>twitternotablywaitingwarfare Other rankingphrasesmentionsurvivescholar</p>
+ Countryignoredloss ofjust asGeorgiastrange<head><stopped1']);
+islandsnotableborder:list ofcarried100,000</h3>
+ severalbecomesselect wedding00.htmlmonarchoff theteacherhighly biologylife ofor evenrise of&raquo;plusonehunting(thoughDouglasjoiningcirclesFor theAncientVietnamvehiclesuch ascrystalvalue =Windowsenjoyeda smallassumed<a id="foreign All rihow theDisplayretiredhoweverhidden;battlesseekingcabinetwas notlook atconductget theJanuaryhappensturninga:hoverOnline French lackingtypicalextractenemieseven ifgeneratdecidedare not/searchbeliefs-image:locatedstatic.login">convertviolententeredfirst">circuitFinlandchemistshe was10px;">as suchdivided</span>will beline ofa greatmystery/index.fallingdue to railwaycollegemonsterdescentit withnuclearJewish protestBritishflowerspredictreformsbutton who waslectureinstantsuicidegenericperiodsmarketsSocial fishingcombinegraphicwinners<br /><by the NaturalPrivacycookiesoutcomeresolveSwedishbrieflyPersianso muchCenturydepictscolumnshousingscriptsnext tobearingmappingrevisedjQuery(-width:title">tooltipSectiondesignsTurkishyounger.match(})();
+
+burningoperatedegreessource=Richardcloselyplasticentries</tr>
+color:#ul id="possessrollingphysicsfailingexecutecontestlink toDefault<br />
+: true,chartertourismclassicproceedexplain</h1>
+online.?xml vehelpingdiamonduse theairlineend -->).attr(readershosting#ffffffrealizeVincentsignals src="/ProductdespitediversetellingPublic held inJoseph theatreaffects<style>a largedoesn'tlater, ElementfaviconcreatorHungaryAirportsee theso thatMichaelSystemsPrograms, and  width=e&quot;tradingleft">
+personsGolden Affairsgrammarformingdestroyidea ofcase ofoldest this is.src = cartoonregistrCommonsMuslimsWhat isin manymarkingrevealsIndeed,equally/show_aoutdoorescape(Austriageneticsystem,In the sittingHe alsoIslandsAcademy
+		<!--Daniel bindingblock">imposedutilizeAbraham(except{width:putting).html(|| [];
+DATA[ *kitchenmountedactual dialectmainly _blank'installexpertsif(typeIt also&copy; ">Termsborn inOptionseasterntalkingconcerngained ongoingjustifycriticsfactoryits ownassaultinvitedlastinghis ownhref="/" rel="developconcertdiagramdollarsclusterphp?id=alcohol);})();using a><span>vesselsrevivalAddressamateurandroidallegedillnesswalkingcentersqualifymatchesunifiedextinctDefensedied in
+	<!-- customslinkingLittle Book ofeveningmin.js?are thekontakttoday's.html" target=wearingAll Rig;
+})();raising Also, crucialabout">declare-->
+<scfirefoxas muchappliesindex, s, but type = 
+
+<!--towardsRecordsPrivateForeignPremierchoicesVirtualreturnsCommentPoweredinline;povertychamberLiving volumesAnthonylogin" RelatedEconomyreachescuttinggravitylife inChapter-shadowNotable</td>
+ returnstadiumwidgetsvaryingtravelsheld bywho arework infacultyangularwho hadairporttown of
+
+Some 'click'chargeskeywordit willcity of(this);Andrew unique checkedor more300px; return;rsion="pluginswithin herselfStationFederalventurepublishsent totensionactresscome tofingersDuke ofpeople,exploitwhat isharmonya major":"httpin his menu">
+monthlyofficercouncilgainingeven inSummarydate ofloyaltyfitnessand wasemperorsupremeSecond hearingRussianlongestAlbertalateralset of small">.appenddo withfederalbank ofbeneathDespiteCapitalgrounds), and percentit fromclosingcontainInsteadfifteenas well.yahoo.respondfighterobscurereflectorganic= Math.editingonline paddinga wholeonerroryear ofend of barrierwhen itheader home ofresumedrenamedstrong>heatingretainscloudfrway of March 1knowingin partBetweenlessonsclosestvirtuallinks">crossedEND -->famous awardedLicenseHealth fairly wealthyminimalAfricancompetelabel">singingfarmersBrasil)discussreplaceGregoryfont copursuedappearsmake uproundedboth ofblockedsaw theofficescoloursif(docuwhen heenforcepush(fuAugust UTF-8">Fantasyin mostinjuredUsuallyfarmingclosureobject defenceuse of Medical<body>
+evidentbe usedkeyCodesixteenIslamic#000000entire widely active (typeofone cancolor =speakerextendsPhysicsterrain<tbody>funeralviewingmiddle cricketprophetshifteddoctorsRussell targetcompactalgebrasocial-bulk ofman and</td>
+ he left).val()false);logicalbankinghome tonaming Arizonacredits);
+});
+founderin turnCollinsbefore But thechargedTitle">CaptainspelledgoddessTag -->Adding:but wasRecent patientback in=false&Lincolnwe knowCounterJudaismscript altered']);
+  has theunclearEvent',both innot all
+
+<!-- placinghard to centersort ofclientsstreetsBernardassertstend tofantasydown inharbourFreedomjewelry/about..searchlegendsis mademodern only ononly toimage" linear painterand notrarely acronymdelivershorter00&amp;as manywidth="/* <![Ctitle =of the lowest picked escapeduses ofpeoples PublicMatthewtacticsdamagedway forlaws ofeasy to windowstrong  simple}catch(seventhinfoboxwent topaintedcitizenI don'tretreat. Some ww.");
+bombingmailto:made in. Many carries||{};wiwork ofsynonymdefeatsfavoredopticalpageTraunless sendingleft"><comScorAll thejQuery.touristClassicfalse" Wilhelmsuburbsgenuinebishops.split(global followsbody ofnominalContactsecularleft tochiefly-hidden-banner</li>
+
+. When in bothdismissExplorealways via thespañolwelfareruling arrangecaptainhis sonrule ofhe tookitself,=0&amp;(calledsamplesto makecom/pagMartin Kennedyacceptsfull ofhandledBesides//--></able totargetsessencehim to its by common.mineralto takeways tos.org/ladvisedpenaltysimple:if theyLettersa shortHerbertstrikes groups.lengthflightsoverlapslowly lesser social </p>
+		it intoranked rate oful>
+  attemptpair ofmake itKontaktAntoniohaving ratings activestreamstrapped").css(hostilelead tolittle groups,Picture-->
+
+ rows=" objectinverse<footerCustomV><\/scrsolvingChamberslaverywoundedwhereas!= 'undfor allpartly -right:Arabianbacked centuryunit ofmobile-Europe,is homerisk ofdesiredClintoncost ofage of become none ofp&quot;Middle ead')[0Criticsstudios>&copy;group">assemblmaking pressedwidget.ps:" ? rebuiltby someFormer editorsdelayedCanonichad thepushingclass="but arepartialBabylonbottom carrierCommandits useAs withcoursesa thirddenotesalso inHouston20px;">accuseddouble goal ofFamous ).bind(priests Onlinein Julyst + "gconsultdecimalhelpfulrevivedis veryr'+'iptlosing femalesis alsostringsdays ofarrivalfuture <objectforcingString(" />
+		here isencoded.  The balloondone by/commonbgcolorlaw of Indianaavoidedbut the2px 3pxjquery.after apolicy.men andfooter-= true;for usescreen.Indian image =family,http:// &nbsp;driverseternalsame asnoticedviewers})();
+ is moreseasonsformer the newis justconsent Searchwas thewhy theshippedbr><br>width: height=made ofcuisineis thata very Admiral fixed;normal MissionPress, ontariocharsettry to invaded="true"spacingis mosta more totallyfall of});
+  immensetime inset outsatisfyto finddown tolot of Playersin Junequantumnot thetime todistantFinnishsrc = (single help ofGerman law andlabeledforestscookingspace">header-well asStanleybridges/globalCroatia About [0];
+  it, andgroupedbeing a){throwhe madelighterethicalFFFFFF"bottom"like a employslive inas seenprintermost ofub-linkrejectsand useimage">succeedfeedingNuclearinformato helpWomen'sNeitherMexicanprotein<table by manyhealthylawsuitdevised.push({sellerssimply Through.cookie Image(older">us.js"> Since universlarger open to!-- endlies in']);
+  marketwho is ("DOMComanagedone fortypeof Kingdomprofitsproposeto showcenter;made itdressedwere inmixtureprecisearisingsrc = 'make a securedBaptistvoting 
+		var March 2grew upClimate.removeskilledway the</head>face ofacting right">to workreduceshas haderectedshow();action=book ofan area== "htt<header
+<html>conformfacing cookie.rely onhosted .customhe wentbut forspread Family a meansout theforums.footage">MobilClements" id="as highintense--><!--female is seenimpliedset thea stateand hisfastestbesidesbutton_bounded"><img Infoboxevents,a youngand areNative cheaperTimeoutand hasengineswon the(mostlyright: find a -bottomPrince area ofmore ofsearch_nature,legallyperiod,land ofor withinducedprovingmissilelocallyAgainstthe wayk&quot;px;">
+pushed abandonnumeralCertainIn thismore inor somename isand, incrownedISBN 0-createsOctobermay notcenter late inDefenceenactedwish tobroadlycoolingonload=it. TherecoverMembersheight assumes<html>
+people.in one =windowfooter_a good reklamaothers,to this_cookiepanel">London,definescrushedbaptismcoastalstatus title" move tolost inbetter impliesrivalryservers SystemPerhapses and contendflowinglasted rise inGenesisview ofrising seem tobut in backinghe willgiven agiving cities.flow of Later all butHighwayonly bysign ofhe doesdiffersbattery&amp;lasinglesthreatsintegertake onrefusedcalled =US&ampSee thenativesby thissystem.head of:hover,lesbiansurnameand allcommon/header__paramsHarvard/pixel.removalso longrole ofjointlyskyscraUnicodebr />
+AtlantanucleusCounty,purely count">easily build aonclicka givenpointerh&quot;events else {
+ditionsnow the, with man whoorg/Webone andcavalryHe diedseattle00,000 {windowhave toif(windand itssolely m&quot;renewedDetroitamongsteither them inSenatorUs</a><King ofFrancis-produche usedart andhim andused byscoringat hometo haverelatesibilityfactionBuffalolink"><what hefree toCity ofcome insectorscountedone daynervoussquare };if(goin whatimg" alis onlysearch/tuesdaylooselySolomonsexual - <a hrmedium"DO NOT France,with a war andsecond take a >
+
+
+market.highwaydone inctivity"last">obligedrise to"undefimade to Early praisedin its for hisathleteJupiterYahoo! termed so manyreally s. The a woman?value=direct right" bicycleacing="day andstatingRather,higher Office are nowtimes, when a pay foron this-link">;borderaround annual the Newput the.com" takin toa brief(in thegroups.; widthenzymessimple in late{returntherapya pointbanninginks">
+();" rea place\u003Caabout atr>
+		ccount gives a<SCRIPTRailwaythemes/toolboxById("xhumans,watchesin some if (wicoming formats Under but hashanded made bythan infear ofdenoted/iframeleft involtagein eacha&quot;base ofIn manyundergoregimesaction </p>
+<ustomVa;&gt;</importsor thatmostly &amp;re size="</a></ha classpassiveHost = WhetherfertileVarious=[];(fucameras/></td>acts asIn some>
+
+<!organis <br />Beijingcatalàdeutscheuropeueuskaragaeilgesvenskaespañamensajeusuariotrabajoméxicopáginasiempresistemaoctubreduranteañadirempresamomentonuestroprimeratravésgraciasnuestraprocesoestadoscalidadpersonanúmeroacuerdomúsicamiembroofertasalgunospaísesejemploderechoademásprivadoagregarenlacesposiblehotelessevillaprimeroúltimoeventosarchivoculturamujeresentradaanuncioembargomercadograndesestudiomejoresfebrerodiseñoturismocódigoportadaespaciofamiliaantoniopermiteguardaralgunaspreciosalguiensentidovisitastítuloconocersegundoconsejofranciaminutossegundatenemosefectosmálagasesiónrevistagranadacompraringresogarcíaacciónecuadorquienesinclusodeberámateriahombresmuestrapodríamañanaúltimaestamosoficialtambienningúnsaludospodemosmejorarpositionbusinesshomepagesecuritylanguagestandardcampaignfeaturescategoryexternalchildrenreservedresearchexchangefavoritetemplatemilitaryindustryservicesmaterialproductsz-index:commentssoftwarecompletecalendarplatformarticlesrequiredmovementquestionbuildingpoliticspossiblereligionphysicalfeedbackregisterpicturesdisabledprotocolaudiencesettingsactivityelementslearninganythingabstractprogressoverviewmagazineeconomictrainingpressurevarious <strong>propertyshoppingtogetheradvancedbehaviordownloadfeaturedfootballselectedLanguagedistanceremembertrackingpasswordmodifiedstudentsdirectlyfightingnortherndatabasefestivalbreakinglocationinternetdropdownpracticeevidencefunctionmarriageresponseproblemsnegativeprogramsanalysisreleasedbanner">purchasepoliciesregionalcreativeargumentbookmarkreferrerchemicaldivisioncallbackseparateprojectsconflicthardwareinterestdeliverymountainobtained= false;for(var acceptedcapacitycomputeridentityaircraftemployedproposeddomesticincludesprovidedhospitalverticalcollapseapproachpartnerslogo"><adaughterauthor" culturalfamilies/images/assemblypowerfulteachingfinisheddistrictcriticalcgi-bin/purposesrequireselectionbecomingprovidesacademicexerciseactuallymedicineconstantaccidentMagazinedocumentstartingbottom">observed: &quot;extendedpreviousSoftwarecustomerdecisionstrengthdetailedslightlyplanningtextareacurrencyeveryonestraighttransferpositiveproducedheritageshippingabsolutereceivedrelevantbutton" violenceanywherebenefitslaunchedrecentlyalliancefollowedmultiplebulletinincludedoccurredinternal$(this).republic><tr><tdcongressrecordedultimatesolution<ul id="discoverHome</a>websitesnetworksalthoughentirelymemorialmessagescontinueactive">somewhatvictoriaWestern  title="LocationcontractvisitorsDownloadwithout right">
+measureswidth = variableinvolvedvirginianormallyhappenedaccountsstandingnationalRegisterpreparedcontrolsaccuratebirthdaystrategyofficialgraphicscriminalpossiblyconsumerPersonalspeakingvalidateachieved.jpg" />machines</h2>
+  keywordsfriendlybrotherscombinedoriginalcomposedexpectedadequatepakistanfollow" valuable</label>relativebringingincreasegovernorplugins/List of Header">" name=" (&quot;graduate</head>
+commercemalaysiadirectormaintain;height:schedulechangingback to catholicpatternscolor: #greatestsuppliesreliable</ul>
+		<select citizensclothingwatching<li id="specificcarryingsentence<center>contrastthinkingcatch(e)southernMichael merchantcarouselpadding:interior.split("lizationOctober ){returnimproved--&gt;
+
+coveragechairman.png" />subjectsRichard whateverprobablyrecoverybaseballjudgmentconnect..css" /> websitereporteddefault"/></a>
+electricscotlandcreationquantity. ISBN 0did not instance-search-" lang="speakersComputercontainsarchivesministerreactiondiscountItalianocriteriastrongly: 'http:'script'coveringofferingappearedBritish identifyFacebooknumerousvehiclesconcernsAmericanhandlingdiv id="William provider_contentaccuracysection andersonflexibleCategorylawrence<script>layout="approved maximumheader"></table>Serviceshamiltoncurrent canadianchannels/themes//articleoptionalportugalvalue=""intervalwirelessentitledagenciesSearch" measuredthousandspending&hellip;new Date" size="pageNamemiddle" " /></a>hidden">sequencepersonaloverflowopinionsillinoislinks">
+	<title>versionssaturdayterminalitempropengineersectionsdesignerproposal="false"Españolreleasessubmit" er&quot;additionsymptomsorientedresourceright"><pleasurestationshistory.leaving  border=contentscenter">.
+
+Some directedsuitablebulgaria.show();designedGeneral conceptsExampleswilliamsOriginal"><span>search">operatorrequestsa &quot;allowingDocumentrevision. 
+
+The yourselfContact michiganEnglish columbiapriorityprintingdrinkingfacilityreturnedContent officersRussian generate-8859-1"indicatefamiliar qualitymargin:0 contentviewportcontacts-title">portable.length eligibleinvolvesatlanticonload="default.suppliedpaymentsglossary
+
+After guidance</td><tdencodingmiddle">came to displaysscottishjonathanmajoritywidgets.clinicalthailandteachers<head>
+	affectedsupportspointer;toString</small>oklahomawill be investor0" alt="holidaysResourcelicensed (which . After considervisitingexplorerprimary search" android"quickly meetingsestimate;return ;color:# height=approval, &quot; checked.min.js"magnetic></a></hforecast. While thursdaydvertise&eacute;hasClassevaluateorderingexistingpatients Online coloradoOptions"campbell<!-- end</span><<br />
+_popups|sciences,&quot; quality Windows assignedheight: <b classle&quot; value=" Companyexamples<iframe believespresentsmarshallpart of properly).
+
+The taxonomymuch of </span>
+" data-srtuguêsscrollTo project<head>
+attorneyemphasissponsorsfancyboxworld's wildlifechecked=sessionsprogrammpx;font- Projectjournalsbelievedvacationthompsonlightingand the special border=0checking</tbody><button Completeclearfix
+<head>
+article <sectionfindingsrole in popular  Octoberwebsite exposureused to  changesoperatedclickingenteringcommandsinformed numbers  </div>creatingonSubmitmarylandcollegesanalyticlistingscontact.loggedInadvisorysiblingscontent"s&quot;)s. This packagescheckboxsuggestspregnanttomorrowspacing=icon.pngjapanesecodebasebutton">gamblingsuch as , while </span> missourisportingtop:1px .</span>tensionswidth="2lazyloadnovemberused in height="cript">
+&nbsp;</<tr><td height:2/productcountry include footer" &lt;!-- title"></jquery.</form>
+(简体)(繁體)hrvatskiitalianoromânătürkçeاردوtambiénnoticiasmensajespersonasderechosnacionalserviciocontactousuariosprogramagobiernoempresasanunciosvalenciacolombiadespuésdeportesproyectoproductopúbliconosotroshistoriapresentemillonesmediantepreguntaanteriorrecursosproblemasantiagonuestrosopiniónimprimirmientrasaméricavendedorsociedadrespectorealizarregistropalabrasinterésentoncesespecialmiembrosrealidadcórdobazaragozapáginassocialesbloqueargestiónalquilersistemascienciascompletoversióncompletaestudiospúblicaobjetivoalicantebuscadorcantidadentradasaccionesarchivossuperiormayoríaalemaniafunciónúltimoshaciendoaquellosediciónfernandoambientefacebooknuestrasclientesprocesosbastantepresentareportarcongresopublicarcomerciocontratojóvenesdistritotécnicaconjuntoenergíatrabajarasturiasrecienteutilizarboletínsalvadorcorrectatrabajosprimerosnegocioslibertaddetallespantallapróximoalmeríaanimalesquiénescorazónsecciónbuscandoopcionesexteriorconceptotodavíagaleríaescribirmedicinalicenciaconsultaaspectoscríticadólaresjusticiadeberánperíodonecesitamantenerpequeñorecibidatribunaltenerifecancióncanariasdescargadiversosmallorcarequieretécnicodeberíaviviendafinanzasadelantefuncionaconsejosdifícilciudadesantiguasavanzadatérminounidadessánchezcampañasoftonicrevistascontienesectoresmomentosfacultadcréditodiversassupuestofactoressegundospequeñaгодаеслиестьбылобытьэтомЕслитогоменявсехэтойдажебылигодуденьэтотбыласебяодинсебенадосайтфотонегосвоисвойигрытожевсемсвоюлишьэтихпокаднейдомамиралиботемухотядвухсетилюдиделомиретебясвоевидечегоэтимсчеттемыценысталведьтемеводытебевышенамитипатомуправлицаоднагодызнаюмогудругвсейидеткиноодноделаделесрокиюнявесьЕстьразанашиاللهالتيجميعخاصةالذيعليهجديدالآنالردتحكمصفحةكانتاللييكونشبكةفيهابناتحواءأكثرخلالالحبدليلدروساضغطتكونهناكساحةناديالطبعليكشكرايمكنمنهاشركةرئيسنشيطماذاالفنشبابتعبررحمةكافةيقولمركزكلمةأحمدقلبييعنيصورةطريقشاركجوالأخرىمعناابحثعروضبشكلمسجلبنانخالدكتابكليةبدونأيضايوجدفريقكتبتأفضلمطبخاكثرباركافضلاحلىنفسهأيامردودأنهاديناالانمعرضتعلمداخلممكن                      	
+
+	����        ����                  ��      ��                resourcescountriesquestionsequipmentcommunityavailablehighlightDTD/xhtmlmarketingknowledgesomethingcontainerdirectionsubscribeadvertisecharacter" value="</select>Australia" class="situationauthorityfollowingprimarilyoperationchallengedevelopedanonymousfunction functionscompaniesstructureagreement" title="potentialeducationargumentssecondarycopyrightlanguagesexclusivecondition</form>
+statementattentionBiography} else {
+solutionswhen the Analyticstemplatesdangeroussatellitedocumentspublisherimportantprototypeinfluence&raquo;</effectivegenerallytransformbeautifultransportorganizedpublishedprominentuntil thethumbnailNational .focus();over the migrationannouncedfooter">
+exceptionless thanexpensiveformationframeworkterritoryndicationcurrentlyclassNamecriticismtraditionelsewhereAlexanderappointedmaterialsbroadcastmentionedaffiliate</option>treatmentdifferent/default.Presidentonclick="biographyotherwisepermanentFrançaisHollywoodexpansionstandards</style>
+reductionDecember preferredCambridgeopponentsBusiness confusion>
+<title>presentedexplaineddoes not worldwideinterfacepositionsnewspaper</table>
+mountainslike the essentialfinancialselectionaction="/abandonedEducationparseInt(stabilityunable to</title>
+relationsNote thatefficientperformedtwo yearsSince thethereforewrapper">alternateincreasedBattle ofperceivedtrying tonecessaryportrayedelectionsElizabeth</iframe>discoveryinsurances.length;legendaryGeographycandidatecorporatesometimesservices.inherited</strong>CommunityreligiouslocationsCommitteebuildingsthe worldno longerbeginningreferencecannot befrequencytypicallyinto the relative;recordingpresidentinitiallytechniquethe otherit can beexistenceunderlinethis timetelephoneitemscopepracticesadvantage);return For otherprovidingdemocracyboth the extensivesufferingsupportedcomputers functionpracticalsaid thatit may beEnglish</from the scheduleddownloads</label>
+suspectedmargin: 0spiritual</head>
+
+microsoftgraduallydiscussedhe becameexecutivejquery.jshouseholdconfirmedpurchasedliterallydestroyedup to thevariationremainingit is notcenturiesJapanese among thecompletedalgorithminterestsrebellionundefinedencourageresizableinvolvingsensitiveuniversalprovision(althoughfeaturingconducted), which continued-header">February numerous overflow:componentfragmentsexcellentcolspan="technicalnear the Advanced source ofexpressedHong Kong Facebookmultiple mechanismelevationoffensive</form>
+	sponsoreddocument.or &quot;there arethose whomovementsprocessesdifficultsubmittedrecommendconvincedpromoting" width=".replace(classicalcoalitionhis firstdecisionsassistantindicatedevolution-wrapper"enough toalong thedelivered-->
+<!--American protectedNovember </style><furnitureInternet  onblur="suspendedrecipientbased on Moreover,abolishedcollectedwere madeemotionalemergencynarrativeadvocatespx;bordercommitteddir="ltr"employeesresearch. selectedsuccessorcustomersdisplayedSeptemberaddClass(Facebook suggestedand lateroperatingelaborateSometimesInstitutecertainlyinstalledfollowersJerusalemthey havecomputinggeneratedprovincesguaranteearbitraryrecognizewanted topx;width:theory ofbehaviourWhile theestimatedbegan to it becamemagnitudemust havemore thanDirectoryextensionsecretarynaturallyoccurringvariablesgiven theplatform.</label><failed tocompoundskinds of societiesalongside --&gt;
+
+southwestthe rightradiationmay have unescape(spoken in" href="/programmeonly the come fromdirectoryburied ina similarthey were</font></Norwegianspecifiedproducingpassenger(new DatetemporaryfictionalAfter theequationsdownload.regularlydeveloperabove thelinked tophenomenaperiod oftooltip">substanceautomaticaspect ofAmong theconnectedestimatesAir Forcesystem ofobjectiveimmediatemaking itpaintingsconqueredare stillproceduregrowth ofheaded byEuropean divisionsmoleculesfranchiseintentionattractedchildhoodalso useddedicatedsingaporedegree offather ofconflicts</a></p>
+came fromwere usednote thatreceivingExecutiveeven moreaccess tocommanderPoliticalmusiciansdeliciousprisonersadvent ofUTF-8" /><![CDATA[">ContactSouthern bgcolor="series of. It was in Europepermittedvalidate.appearingofficialsseriously-languageinitiatedextendinglong-terminflationsuch thatgetCookiemarked by</button>implementbut it isincreasesdown the requiringdependent-->
+<!-- interviewWith the copies ofconsensuswas builtVenezuela(formerlythe statepersonnelstrategicfavour ofinventionWikipediacontinentvirtuallywhich wasprincipleComplete identicalshow thatprimitiveaway frommolecularpreciselydissolvedUnder theversion=">&nbsp;</It is the This is will haveorganismssome timeFriedrichwas firstthe only fact thatform id="precedingTechnicalphysicistoccurs innavigatorsection">span id="sought tobelow thesurviving}</style>his deathas in thecaused bypartiallyexisting using thewas givena list oflevels ofnotion ofOfficial dismissedscientistresemblesduplicateexplosiverecoveredall othergalleries{padding:people ofregion ofaddressesassociateimg alt="in modernshould bemethod ofreportingtimestampneeded tothe Greatregardingseemed toviewed asimpact onidea thatthe Worldheight ofexpandingThese arecurrent">carefullymaintainscharge ofClassicaladdressedpredictedownership<div id="right">
+residenceleave thecontent">are often  })();
+probably Professor-button" respondedsays thathad to beplaced inHungarianstatus ofserves asUniversalexecutionaggregatefor whichinfectionagreed tohowever, popular">placed onconstructelectoralsymbol ofincludingreturn toarchitectChristianprevious living ineasier toprofessor
+&lt;!-- effect ofanalyticswas takenwhere thetook overbelief inAfrikaansas far aspreventedwork witha special<fieldsetChristmasRetrieved
+
+In the back intonortheastmagazines><strong>committeegoverninggroups ofstored inestablisha generalits firsttheir ownpopulatedan objectCaribbeanallow thedistrictswisconsinlocation.; width: inhabitedSocialistJanuary 1</footer>similarlychoice ofthe same specific business The first.length; desire todeal withsince theuserAgentconceivedindex.phpas &quot;engage inrecently,few yearswere also
+<head>
+<edited byare knowncities inaccesskeycondemnedalso haveservices,family ofSchool ofconvertednature of languageministers</object>there is a popularsequencesadvocatedThey wereany otherlocation=enter themuch morereflectedwas namedoriginal a typicalwhen theyengineerscould notresidentswednesdaythe third productsJanuary 2what theya certainreactionsprocessorafter histhe last contained"></div>
+</a></td>depend onsearch">
+pieces ofcompetingReferencetennesseewhich has version=</span> <</header>gives thehistorianvalue="">padding:0view thattogether,the most was foundsubset ofattack onchildren,points ofpersonal position:allegedlyClevelandwas laterand afterare givenwas stillscrollingdesign ofmakes themuch lessAmericans.
+
+After , but theMuseum oflouisiana(from theminnesotaparticlesa processDominicanvolume ofreturningdefensive00px|righmade frommouseover" style="states of(which iscontinuesFranciscobuilding without awith somewho woulda form ofa part ofbefore itknown as  Serviceslocation and oftenmeasuringand it ispaperbackvalues of
+<title>= window.determineer&quot; played byand early</center>from thisthe threepower andof &quot;innerHTML<a href="y:inline;Church ofthe eventvery highofficial -height: content="/cgi-bin/to createafrikaansesperantofrançaislatviešulietuviųČeštinačeštinaไทย日本語简体字繁體字한국어为什么计算机笔记本討論區服务器互联网房地产俱乐部出版社排行榜部落格进一步支付宝验证码委员会数据库消费者办公室讨论区深圳市播放器北京市大学生越来越管理员信息网serviciosartículoargentinabarcelonacualquierpublicadoproductospolíticarespuestawikipediasiguientebúsquedacomunidadseguridadprincipalpreguntascontenidorespondervenezuelaproblemasdiciembrerelaciónnoviembresimilaresproyectosprogramasinstitutoactividadencuentraeconomíaimágenescontactardescargarnecesarioatenciónteléfonocomisióncancionescapacidadencontraranálisisfavoritostérminosprovinciaetiquetaselementosfuncionesresultadocarácterpropiedadprincipionecesidadmunicipalcreacióndescargaspresenciacomercialopinionesejercicioeditorialsalamancagonzálezdocumentopelícularecientesgeneralestarragonaprácticanovedadespropuestapacientestécnicasobjetivoscontactosमेंलिएहैंगयासाथएवंरहेकोईकुछरहाबादकहासभीहुएरहीमैंदिनबातdiplodocsसमयरूपनामपताफिरऔसततरहलोगहुआबारदेशहुईखेलयदिकामवेबतीनबीचमौतसाललेखजॉबमददतथानहीशहरअलगकभीनगरपासरातकिएउसेगयीहूँआगेटीमखोजकारअभीगयेतुमवोटदेंअगरऐसेमेललगाहालऊपरचारऐसादेरजिसदिलबंदबनाहूंलाखजीतबटनमिलइसेआनेनयाकुललॉगभागरेलजगहरामलगेपेजहाथइसीसहीकलाठीकहाँदूरतहतसातयादआयापाककौनशामदेखयहीरायखुदलगीcategoriesexperience</title>
+Copyright javascriptconditionseverything<p class="technologybackground<a class="management&copy; 201javaScriptcharactersbreadcrumbthemselveshorizontalgovernmentCaliforniaactivitiesdiscoveredNavigationtransitionconnectionnavigationappearance</title><mcheckbox" techniquesprotectionapparentlyas well asunt', 'UA-resolutionoperationstelevisiontranslatedWashingtonnavigator. = window.impression&lt;br&gt;literaturepopulationbgcolor="#especially content="productionnewsletterpropertiesdefinitionleadershipTechnologyParliamentcomparisonul class=".indexOf("conclusiondiscussioncomponentsbiologicalRevolution_containerunderstoodnoscript><permissioneach otheratmosphere onfocus="<form id="processingthis.valuegenerationConferencesubsequentwell-knownvariationsreputationphenomenondisciplinelogo.png" (document,boundariesexpressionsettlementBackgroundout of theenterprise("https:" unescape("password" democratic<a href="/wrapper">
+membershiplinguisticpx;paddingphilosophyassistanceuniversityfacilitiesrecognizedpreferenceif (typeofmaintainedvocabularyhypothesis.submit();&amp;nbsp;annotationbehind theFoundationpublisher"assumptionintroducedcorruptionscientistsexplicitlyinstead ofdimensions onClick="considereddepartmentoccupationsoon afterinvestmentpronouncedidentifiedexperimentManagementgeographic" height="link rel=".replace(/depressionconferencepunishmenteliminatedresistanceadaptationoppositionwell knownsupplementdeterminedh1 class="0px;marginmechanicalstatisticscelebratedGovernment
+
+During tdevelopersartificialequivalentoriginatedCommissionattachment<span id="there wereNederlandsbeyond theregisteredjournalistfrequentlyall of thelang="en" </style>
+absolute; supportingextremely mainstream</strong> popularityemployment</table>
+ colspan="</form>
+  conversionabout the </p></div>integrated" lang="enPortuguesesubstituteindividualimpossiblemultimediaalmost allpx solid #apart fromsubject toin Englishcriticizedexcept forguidelinesoriginallyremarkablethe secondh2 class="<a title="(includingparametersprohibited= "http://dictionaryperceptionrevolutionfoundationpx;height:successfulsupportersmillenniumhis fatherthe &quot;no-repeat;commercialindustrialencouragedamount of unofficialefficiencyReferencescoordinatedisclaimerexpeditiondevelopingcalculatedsimplifiedlegitimatesubstring(0" class="completelyillustratefive yearsinstrumentPublishing1" class="psychologyconfidencenumber of absence offocused onjoined thestructurespreviously></iframe>once againbut ratherimmigrantsof course,a group ofLiteratureUnlike the</a>&nbsp;
+function it was theConventionautomobileProtestantaggressiveafter the Similarly," /></div>collection
+functionvisibilitythe use ofvolunteersattractionunder the threatened*<![CDATA[importancein generalthe latter</form>
+</.indexOf('i = 0; i <differencedevoted totraditionssearch forultimatelytournamentattributesso-called }
+</style>evaluationemphasizedaccessible</section>successionalong withMeanwhile,industries</a><br />has becomeaspects ofTelevisionsufficientbasketballboth sidescontinuingan article<img alt="adventureshis mothermanchesterprinciplesparticularcommentaryeffects ofdecided to"><strong>publishersJournal ofdifficultyfacilitateacceptablestyle.css"	function innovation>Copyrightsituationswould havebusinessesDictionarystatementsoften usedpersistentin Januarycomprising</title>
+	diplomaticcontainingperformingextensionsmay not beconcept of onclick="It is alsofinancial making theLuxembourgadditionalare calledengaged in"script");but it waselectroniconsubmit="
+<!-- End electricalofficiallysuggestiontop of theunlike theAustralianOriginallyreferences
+</head>
+recognisedinitializelimited toAlexandriaretirementAdventuresfour years
+
+&lt;!-- increasingdecorationh3 class="origins ofobligationregulationclassified(function(advantagesbeing the historians<base hrefrepeatedlywilling tocomparabledesignatednominationfunctionalinside therevelationend of thes for the authorizedrefused totake placeautonomouscompromisepolitical restauranttwo of theFebruary 2quality ofswfobject.understandnearly allwritten byinterviews" width="1withdrawalfloat:leftis usuallycandidatesnewspapersmysteriousDepartmentbest knownparliamentsuppressedconvenientremembereddifferent systematichas led topropagandacontrolledinfluencesceremonialproclaimedProtectionli class="Scientificclass="no-trademarksmore than widespreadLiberationtook placeday of theas long asimprisonedAdditional
+<head>
+<mLaboratoryNovember 2exceptionsIndustrialvariety offloat: lefDuring theassessmenthave been deals withStatisticsoccurrence/ul></div>clearfix">the publicmany yearswhich wereover time,synonymouscontent">
+presumablyhis familyuserAgent.unexpectedincluding challengeda minorityundefined"belongs totaken fromin Octoberposition: said to bereligious Federation rowspan="only a fewmeant thatled to the-->
+<div <fieldset>Archbishop class="nobeing usedapproachesprivilegesnoscript>
+results inmay be theEaster eggmechanismsreasonablePopulationCollectionselected">noscript>/index.phparrival of-jssdk'));managed toincompletecasualtiescompletionChristiansSeptember arithmeticproceduresmight haveProductionit appearsPhilosophyfriendshipleading togiving thetoward theguaranteeddocumentedcolor:#000video gamecommissionreflectingchange theassociatedsans-serifonkeypress; padding:He was theunderlyingtypically , and the srcElementsuccessivesince the should be networkingaccountinguse of thelower thanshows that</span>
+		complaintscontinuousquantitiesastronomerhe did notdue to itsapplied toan averageefforts tothe futureattempt toTherefore,capabilityRepublicanwas formedElectronickilometerschallengespublishingthe formerindigenousdirectionssubsidiaryconspiracydetails ofand in theaffordablesubstancesreason forconventionitemtype="absolutelysupposedlyremained aattractivetravellingseparatelyfocuses onelementaryapplicablefound thatstylesheetmanuscriptstands for no-repeat(sometimesCommercialin Americaundertakenquarter ofan examplepersonallyindex.php?</button>
+percentagebest-knowncreating a" dir="ltrLieutenant
+<div id="they wouldability ofmade up ofnoted thatclear thatargue thatto anotherchildren'spurpose offormulatedbased uponthe regionsubject ofpassengerspossession.
+
+In the Before theafterwardscurrently across thescientificcommunity.capitalismin Germanyright-wingthe systemSociety ofpoliticiandirection:went on toremoval of New York apartmentsindicationduring theunless thehistoricalhad been adefinitiveingredientattendanceCenter forprominencereadyStatestrategiesbut in theas part ofconstituteclaim thatlaboratorycompatiblefailure of, such as began withusing the to providefeature offrom which/" class="geologicalseveral ofdeliberateimportant holds thating&quot; valign=topthe Germanoutside ofnegotiatedhis careerseparationid="searchwas calledthe fourthrecreationother thanpreventionwhile the education,connectingaccuratelywere builtwas killedagreementsmuch more Due to thewidth: 100some otherKingdom ofthe entirefamous forto connectobjectivesthe Frenchpeople andfeatured">is said tostructuralreferendummost oftena separate->
+<div id Official worldwide.aria-labelthe planetand it wasd" value="looking atbeneficialare in themonitoringreportedlythe modernworking onallowed towhere the innovative</a></div>soundtracksearchFormtend to beinput id="opening ofrestrictedadopted byaddressingtheologianmethods ofvariant ofChristian very largeautomotiveby far therange frompursuit offollow thebrought toin Englandagree thataccused ofcomes frompreventingdiv style=his or hertremendousfreedom ofconcerning0 1em 1em;Basketball/style.cssan earliereven after/" title=".com/indextaking thepittsburghcontent"><script>(fturned outhaving the</span>
+ occasionalbecause itstarted tophysically></div>
+  created byCurrently, bgcolor="tabindex="disastrousAnalytics also has a><div id="</style>
+<called forsinger and.src = "//violationsthis pointconstantlyis locatedrecordingsd from thenederlandsportuguêsעבריתفارسیdesarrollocomentarioeducaciónseptiembreregistradodirecciónubicaciónpublicidadrespuestasresultadosimportantereservadosartículosdiferentessiguientesrepúblicasituaciónministerioprivacidaddirectorioformaciónpoblaciónpresidentecontenidosaccesoriostechnoratipersonalescategoríaespecialesdisponibleactualidadreferenciavalladolidbibliotecarelacionescalendariopolíticasanterioresdocumentosnaturalezamaterialesdiferenciaeconómicatransporterodríguezparticiparencuentrandiscusiónestructurafundaciónfrecuentespermanentetotalmenteможнобудетможетвремятакжечтобыболееоченьэтогокогдапослевсегосайтечерезмогутсайтажизнимеждубудутПоискздесьвидеосвязинужносвоейлюдейпорномногодетейсвоихправатакойместоимеетжизньоднойлучшепередчастичастьработновыхправособойпотомменеечисленовыеуслугоколоназадтакоетогдапочтиПослетакиеновыйстоиттакихсразуСанктфорумКогдакнигислованашейнайтисвоимсвязьлюбойчастосредиКромеФорумрынкесталипоисктысячмесяццентртрудасамыхрынкаНовыйчасовместафильммартастранместетекстнашихминутимениимеютномергородсамомэтомуконцесвоемкакойАрхивمنتدىإرسالرسالةالعامكتبهابرامجاليومالصورجديدةالعضوإضافةالقسمالعابتحميلملفاتملتقىتعديلالشعرأخبارتطويرعليكمإرفاقطلباتاللغةترتيبالناسالشيخمنتديالعربالقصصافلامعليهاتحديثاللهمالعملمكتبةيمكنكالطفلفيديوإدارةتاريخالصحةتسجيلالوقتعندمامدينةتصميمأرشيفالذينعربيةبوابةألعابالسفرمشاكلتعالىالأولالسنةجامعةالصحفالدينكلماتالخاصالملفأعضاءكتابةالخيررسائلالقلبالأدبمقاطعمراسلمنطقةالكتبالرجلاشتركالقدميعطيكsByTagName(.jpg" alt="1px solid #.gif" alt="transparentinformationapplication" onclick="establishedadvertising.png" alt="environmentperformanceappropriate&amp;mdash;immediately</strong></rather thantemperaturedevelopmentcompetitionplaceholdervisibility:copyright">0" height="even thoughreplacementdestinationCorporation<ul class="AssociationindividualsperspectivesetTimeout(url(http://mathematicsmargin-top:eventually description) no-repeatcollections.JPG|thumb|participate/head><bodyfloat:left;<li class="hundreds of
+
+However, compositionclear:both;cooperationwithin the label for="border-top:New Zealandrecommendedphotographyinteresting&lt;sup&gt;controversyNetherlandsalternativemaxlength="switzerlandDevelopmentessentially
+
+Although </textarea>thunderbirdrepresented&amp;ndash;speculationcommunitieslegislationelectronics
+	<div id="illustratedengineeringterritoriesauthoritiesdistributed6" height="sans-serif;capable of disappearedinteractivelooking forit would beAfghanistanwas createdMath.floor(surroundingcan also beobservationmaintenanceencountered<h2 class="more recentit has beeninvasion of).getTime()fundamentalDespite the"><div id="inspirationexaminationpreparationexplanation<input id="</a></span>versions ofinstrumentsbefore the  = 'http://Descriptionrelatively .substring(each of theexperimentsinfluentialintegrationmany peopledue to the combinationdo not haveMiddle East<noscript><copyright" perhaps theinstitutionin Decemberarrangementmost famouspersonalitycreation oflimitationsexclusivelysovereignty-content">
+<td class="undergroundparallel todoctrine ofoccupied byterminologyRenaissancea number ofsupport forexplorationrecognitionpredecessor<img src="/<h1 class="publicationmay also bespecialized</fieldset>progressivemillions ofstates thatenforcementaround the one another.parentNodeagricultureAlternativeresearcherstowards theMost of themany other (especially<td width=";width:100%independent<h3 class=" onchange=").addClass(interactionOne of the daughter ofaccessoriesbranches of
+<div id="the largestdeclarationregulationsInformationtranslationdocumentaryin order to">
+<head>
+<" height="1across the orientation);</script>implementedcan be seenthere was ademonstratecontainer">connectionsthe Britishwas written!important;px; margin-followed byability to complicatedduring the immigrationalso called<h4 class="distinctionreplaced bygovernmentslocation ofin Novemberwhether the</p>
+</div>acquisitioncalled the persecutiondesignation{font-size:appeared ininvestigateexperiencedmost likelywidely useddiscussionspresence of (document.extensivelyIt has beenit does notcontrary toinhabitantsimprovementscholarshipconsumptioninstructionfor exampleone or morepx; paddingthe currenta series ofare usuallyrole in thepreviously derivativesevidence ofexperiencescolorschemestated thatcertificate</a></div>
+ selected="high schoolresponse tocomfortableadoption ofthree yearsthe countryin Februaryso that thepeople who provided by<param nameaffected byin terms ofappointmentISO-8859-1"was born inhistorical regarded asmeasurementis based on and other : function(significantcelebrationtransmitted/js/jquery.is known astheoretical tabindex="it could be<noscript>
+having been
+<head>
+< &quot;The compilationhe had beenproduced byphilosopherconstructedintended toamong othercompared toto say thatEngineeringa differentreferred todifferencesbelief thatphotographsidentifyingHistory of Republic ofnecessarilyprobabilitytechnicallyleaving thespectacularfraction ofelectricityhead of therestaurantspartnershipemphasis onmost recentshare with saying thatfilled withdesigned toit is often"></iframe>as follows:merged withthrough thecommercial pointed outopportunityview of therequirementdivision ofprogramminghe receivedsetInterval"></span></in New Yorkadditional compression
+
+<div id="incorporate;</script><attachEventbecame the " target="_carried outSome of thescience andthe time ofContainer">maintainingChristopherMuch of thewritings of" height="2size of theversion of mixture of between theExamples ofeducationalcompetitive onsubmit="director ofdistinctive/DTD XHTML relating totendency toprovince ofwhich woulddespite thescientific legislature.innerHTML allegationsAgriculturewas used inapproach tointelligentyears later,sans-serifdeterminingPerformanceappearances, which is foundationsabbreviatedhigher thans from the individual composed ofsupposed toclaims thatattributionfont-size:1elements ofHistorical his brotherat the timeanniversarygoverned byrelated to ultimately innovationsit is stillcan only bedefinitionstoGMTStringA number ofimg class="Eventually,was changedoccurred inneighboringdistinguishwhen he wasintroducingterrestrialMany of theargues thatan Americanconquest ofwidespread were killedscreen and In order toexpected todescendantsare locatedlegislativegenerations backgroundmost peopleyears afterthere is nothe highestfrequently they do notargued thatshowed thatpredominanttheologicalby the timeconsideringshort-lived</span></a>can be usedvery littleone of the had alreadyinterpretedcommunicatefeatures ofgovernment,</noscript>entered the" height="3Independentpopulationslarge-scale. Although used in thedestructionpossibilitystarting intwo or moreexpressionssubordinatelarger thanhistory and</option>
+Continentaleliminatingwill not bepractice ofin front ofsite of theensure thatto create amississippipotentiallyoutstandingbetter thanwhat is nowsituated inmeta name="TraditionalsuggestionsTranslationthe form ofatmosphericideologicalenterprisescalculatingeast of theremnants ofpluginspage/index.php?remained intransformedHe was alsowas alreadystatisticalin favor ofMinistry ofmovement offormulationis required<link rel="This is the <a href="/popularizedinvolved inare used toand severalmade by theseems to belikely thatPalestiniannamed afterit had beenmost commonto refer tobut this isconsecutivetemporarilyIn general,conventionstakes placesubdivisionterritorialoperationalpermanentlywas largelyoutbreak ofin the pastfollowing a xmlns:og="><a class="class="textConversion may be usedmanufactureafter beingclearfix">
+question ofwas electedto become abecause of some peopleinspired bysuccessful a time whenmore commonamongst thean officialwidth:100%;technology,was adoptedto keep thesettlementslive birthsindex.html"Connecticutassigned to&amp;times;account foralign=rightthe companyalways beenreturned toinvolvementBecause thethis period" name="q" confined toa result ofvalue="" />is actuallyEnvironment
+</head>
+Conversely,>
+<div id="0" width="1is probablyhave becomecontrollingthe problemcitizens ofpoliticiansreached theas early as:none; over<table cellvalidity ofdirectly toonmousedownwhere it iswhen it wasmembers of relation toaccommodatealong with In the latethe Englishdelicious">this is notthe presentif they areand finallya matter of
+	</div>
+
+</script>faster thanmajority ofafter whichcomparativeto maintainimprove theawarded theer" class="frameborderrestorationin the sameanalysis oftheir firstDuring the continentalsequence offunction(){font-size: work on the</script>
+<begins withjavascript:constituentwas foundedequilibriumassume thatis given byneeds to becoordinatesthe variousare part ofonly in thesections ofis a commontheories ofdiscoveriesassociationedge of thestrength ofposition inpresent-dayuniversallyto form thebut insteadcorporationattached tois commonlyreasons for &quot;the can be madewas able towhich meansbut did notonMouseOveras possibleoperated bycoming fromthe primaryaddition offor severaltransferreda period ofare able tohowever, itshould havemuch larger
+	</script>adopted theproperty ofdirected byeffectivelywas broughtchildren ofProgramminglonger thanmanuscriptswar againstby means ofand most ofsimilar to proprietaryoriginatingprestigiousgrammaticalexperience.to make theIt was alsois found incompetitorsin the U.S.replace thebrought thecalculationfall of thethe generalpracticallyin honor ofreleased inresidentialand some ofking of thereaction to1st Earl ofculture andprincipally</title>
+  they can beback to thesome of hisexposure toare similarform of theaddFavoritecitizenshippart in thepeople within practiceto continue&amp;minus;approved by the first allowed theand for thefunctioningplaying thesolution toheight="0" in his bookmore than afollows thecreated thepresence in&nbsp;</td>nationalistthe idea ofa characterwere forced class="btndays of thefeatured inshowing theinterest inin place ofturn of thethe head ofLord of thepoliticallyhas its ownEducationalapproval ofsome of theeach other,behavior ofand becauseand anotherappeared onrecorded inblack&quot;may includethe world'scan lead torefers to aborder="0" government winning theresulted in while the Washington,the subjectcity in the></div>
+		reflect theto completebecame moreradioactiverejected bywithout anyhis father,which couldcopy of theto indicatea politicalaccounts ofconstitutesworked wither</a></li>of his lifeaccompaniedclientWidthprevent theLegislativedifferentlytogether inhas severalfor anothertext of thefounded thee with the is used forchanged theusually theplace wherewhereas the> <a href=""><a href="themselves,although hethat can betraditionalrole of theas a resultremoveChilddesigned bywest of theSome peopleproduction,side of thenewslettersused by thedown to theaccepted bylive in theattempts tooutside thefrequenciesHowever, inprogrammersat least inapproximatealthough itwas part ofand variousGovernor ofthe articleturned into><a href="/the economyis the mostmost widelywould laterand perhapsrise to theoccurs whenunder whichconditions.the westerntheory thatis producedthe city ofin which heseen in thethe centralbuilding ofmany of hisarea of theis the onlymost of themany of thethe WesternThere is noextended toStatisticalcolspan=2 |short storypossible totopologicalcritical ofreported toa Christiandecision tois equal toproblems ofThis can bemerchandisefor most ofno evidenceeditions ofelements in&quot;. Thecom/images/which makesthe processremains theliterature,is a memberthe popularthe ancientproblems intime of thedefeated bybody of thea few yearsmuch of thethe work ofCalifornia,served as agovernment.concepts ofmovement in		<div id="it" value="language ofas they areproduced inis that theexplain thediv></div>
+However thelead to the	<a href="/was grantedpeople havecontinuallywas seen asand relatedthe role ofproposed byof the besteach other.Constantinepeople fromdialects ofto revisionwas renameda source ofthe initiallaunched inprovide theto the westwhere thereand similarbetween twois also theEnglish andconditions,that it wasentitled tothemselves.quantity ofransparencythe same asto join thecountry andthis is theThis led toa statementcontrast tolastIndexOfthrough hisis designedthe term isis providedprotect theng</a></li>The currentthe site ofsubstantialexperience,in the Westthey shouldslovenčinacomentariosuniversidadcondicionesactividadesexperienciatecnologíaproducciónpuntuaciónaplicacióncontraseñacategoríasregistrarseprofesionaltratamientoregístratesecretaríaprincipalesprotecciónimportantesimportanciaposibilidadinteresantecrecimientonecesidadessuscribirseasociacióndisponiblesevaluaciónestudiantesresponsableresoluciónguadalajararegistradosoportunidadcomercialesfotografíaautoridadesingenieríatelevisióncompetenciaoperacionesestablecidosimplementeactualmentenavegaciónconformidadline-height:font-family:" : "http://applicationslink" href="specifically//<![CDATA[
+Organizationdistribution0px; height:relationshipdevice-width<div class="<label for="registration</noscript>
+/index.html"window.open( !important;application/independence//www.googleorganizationautocompleterequirementsconservative<form name="intellectualmargin-left:18th centuryan importantinstitutionsabbreviation<img class="organisationcivilization19th centuryarchitectureincorporated20th century-container">most notably/></a></div>notification'undefined')Furthermore,believe thatinnerHTML = prior to thedramaticallyreferring tonegotiationsheadquartersSouth AfricaunsuccessfulPennsylvaniaAs a result,<html lang="&lt;/sup&gt;dealing withphiladelphiahistorically);</script>
+padding-top:experimentalgetAttributeinstructionstechnologiespart of the =function(){subscriptionl.dtd">
+<htgeographicalConstitution', function(supported byagriculturalconstructionpublicationsfont-size: 1a variety of<div style="Encyclopediaiframe src="demonstratedaccomplisheduniversitiesDemographics);</script><dedicated toknowledge ofsatisfactionparticularly</div></div>English (US)appendChild(transmissions. However, intelligence" tabindex="float:right;Commonwealthranging fromin which theat least onereproductionencyclopedia;font-size:1jurisdictionat that time"><a class="In addition,description+conversationcontact withis generallyr" content="representing&lt;math&gt;presentationoccasionally<img width="navigation">compensationchampionshipmedia="all" violation ofreference toreturn true;Strict//EN" transactionsinterventionverificationInformation difficultiesChampionshipcapabilities<![endif]-->}
+</script>
+Christianityfor example,Professionalrestrictionssuggest thatwas released(such as theremoveClass(unemploymentthe Americanstructure of/index.html published inspan class=""><a href="/introductionbelonging toclaimed thatconsequences<meta name="Guide to theoverwhelmingagainst the concentrated,
+.nontouch observations</a>
+</div>
+f (document.border: 1px {font-size:1treatment of0" height="1modificationIndependencedivided intogreater thanachievementsestablishingJavaScript" neverthelesssignificanceBroadcasting>&nbsp;</td>container">
+such as the influence ofa particularsrc='http://navigation" half of the substantial &nbsp;</div>advantage ofdiscovery offundamental metropolitanthe opposite" xml:lang="deliberatelyalign=centerevolution ofpreservationimprovementsbeginning inJesus ChristPublicationsdisagreementtext-align:r, function()similaritiesbody></html>is currentlyalphabeticalis sometimestype="image/many of the flow:hidden;available indescribe theexistence ofall over thethe Internet	<ul class="installationneighborhoodarmed forcesreducing thecontinues toNonetheless,temperatures
+		<a href="close to theexamples of is about the(see below)." id="searchprofessionalis availablethe official		</script>
+
+		<div id="accelerationthrough the Hall of Famedescriptionstranslationsinterference type='text/recent yearsin the worldvery popular{background:traditional some of the connected toexploitationemergence ofconstitutionA History ofsignificant manufacturedexpectations><noscript><can be foundbecause the has not beenneighbouringwithout the added to the	<li class="instrumentalSoviet Unionacknowledgedwhich can bename for theattention toattempts to developmentsIn fact, the<li class="aimplicationssuitable formuch of the colonizationpresidentialcancelBubble Informationmost of the is describedrest of the more or lessin SeptemberIntelligencesrc="http://px; height: available tomanufacturerhuman rightslink href="/availabilityproportionaloutside the astronomicalhuman beingsname of the are found inare based onsmaller thana person whoexpansion ofarguing thatnow known asIn the earlyintermediatederived fromScandinavian</a></div>
+consider thean estimatedthe National<div id="pagresulting incommissionedanalogous toare required/ul>
+</div>
+was based onand became a&nbsp;&nbsp;t" value="" was capturedno more thanrespectivelycontinue to >
+<head>
+<were createdmore generalinformation used for theindependent the Imperialcomponent ofto the northinclude the Constructionside of the would not befor instanceinvention ofmore complexcollectivelybackground: text-align: its originalinto accountthis processan extensivehowever, thethey are notrejected thecriticism ofduring whichprobably thethis article(function(){It should bean agreementaccidentallydiffers fromArchitecturebetter knownarrangementsinfluence onattended theidentical tosouth of thepass throughxml" title="weight:bold;creating thedisplay:nonereplaced the<img src="/ihttps://www.World War IItestimonialsfound in therequired to and that thebetween the was designedconsists of considerablypublished bythe languageConservationconsisted ofrefer to theback to the css" media="People from available onproved to besuggestions"was known asvarieties oflikely to becomprised ofsupport the hands of thecoupled withconnect and border:none;performancesbefore beinglater becamecalculationsoften calledresidents ofmeaning that><li class="evidence forexplanationsenvironments"></a></div>which allowsIntroductiondeveloped bya wide rangeon behalf ofvalign="top"principle ofat the time,</noscript>said to havein the firstwhile othershypotheticalphilosopherspower of thecontained inperformed byinability towere writtenspan style="input name="the questionintended forrejection ofimplies thatinvented thethe standardwas probablylink betweenprofessor ofinteractionschanging theIndian Ocean class="lastworking with'http://www.years beforeThis was therecreationalentering themeasurementsan extremelyvalue of thestart of the
+</script>
+
+an effort toincrease theto the southspacing="0">sufficientlythe Europeanconverted toclearTimeoutdid not haveconsequentlyfor the nextextension ofeconomic andalthough theare producedand with theinsufficientgiven by thestating thatexpenditures</span></a>
+thought thaton the basiscellpadding=image of thereturning toinformation,separated byassassinateds" content="authority ofnorthwestern</div>
+<div "></div>
+  consultationcommunity ofthe nationalit should beparticipants align="leftthe greatestselection ofsupernaturaldependent onis mentionedallowing thewas inventedaccompanyinghis personalavailable atstudy of theon the otherexecution ofHuman Rightsterms of theassociationsresearch andsucceeded bydefeated theand from thebut they arecommander ofstate of theyears of agethe study of<ul class="splace in thewhere he was<li class="fthere are nowhich becamehe publishedexpressed into which thecommissionerfont-weight:territory ofextensions">Roman Empireequal to theIn contrast,however, andis typicallyand his wife(also called><ul class="effectively evolved intoseem to havewhich is thethere was noan excellentall of thesedescribed byIn practice,broadcastingcharged withreflected insubjected tomilitary andto the pointeconomicallysetTargetingare actuallyvictory over();</script>continuouslyrequired forevolutionaryan effectivenorth of the, which was front of theor otherwisesome form ofhad not beengenerated byinformation.permitted toincludes thedevelopment,entered intothe previousconsistentlyare known asthe field ofthis type ofgiven to thethe title ofcontains theinstances ofin the northdue to theirare designedcorporationswas that theone of thesemore popularsucceeded insupport fromin differentdominated bydesigned forownership ofand possiblystandardizedresponseTextwas intendedreceived theassumed thatareas of theprimarily inthe basis ofin the senseaccounts fordestroyed byat least twowas declaredcould not beSecretary ofappear to bemargin-top:1/^\s+|\s+$/ge){throw e};the start oftwo separatelanguage andwho had beenoperation ofdeath of thereal numbers	<link rel="provided thethe story ofcompetitionsenglish (UK)english (US)МонголСрпскисрпскисрпскоلعربية正體中文简体中文繁体中文有限公司人民政府阿里巴巴社会主义操作系统政策法规informaciónherramientaselectrónicodescripciónclasificadosconocimientopublicaciónrelacionadasinformáticarelacionadosdepartamentotrabajadoresdirectamenteayuntamientomercadoLibrecontáctenoshabitacionescumplimientorestaurantesdisposiciónconsecuenciaelectrónicaaplicacionesdesconectadoinstalaciónrealizaciónutilizaciónenciclopediaenfermedadesinstrumentosexperienciasinstituciónparticularessubcategoriaтолькоРоссииработыбольшепростоможетедругихслучаесейчасвсегдаРоссияМоскведругиегородавопросданныхдолжныименноМосквырублейМосквастраныничегоработедолженуслугитеперьОднакопотомуработуапрелявообщеодногосвоегостатьидругойфорумехорошопротивссылкакаждыйвластигруппывместеработасказалпервыйделатьденьгипериодбизнесосновемоменткупитьдолжнарамкахначалоРаботаТолькосовсемвторойначаласписокслужбысистемпечатиновогопомощисайтовпочемупомощьдолжноссылкибыстроданныемногиепроектСейчасмоделитакогоонлайнгородеверсиястранефильмыуровняразныхискатьнеделюянваряменьшемногихданнойзначитнельзяфорумаТеперьмесяцазащитыЛучшиеनहींकरनेअपनेकियाकरेंअन्यक्यागाइडबारेकिसीदियापहलेसिंहभारतअपनीवालेसेवाकरतेमेरेहोनेसकतेबहुतसाइटहोगाजानेमिनटकरताकरनाउनकेयहाँसबसेभाषाआपकेलियेशुरूइसकेघंटेमेरीसकतामेरालेकरअधिकअपनासमाजमुझेकारणहोताकड़ीयहांहोटलशब्दलियाजीवनजाताकैसेआपकावालीदेनेपूरीपानीउसकेहोगीबैठकआपकीवर्षगांवआपकोजिलाजानासहमतहमेंउनकीयाहूदर्जसूचीपसंदसवालहोनाहोतीजैसेवापसजनतानेताजारीघायलजिलेनीचेजांचपत्रगूगलजातेबाहरआपनेवाहनइसकासुबहरहनेइससेसहितबड़ेघटनातलाशपांचश्रीबड़ीहोतेसाईटशायदसकतीजातीवालाहजारपटनारखनेसड़कमिलाउसकीकेवललगताखानाअर्थजहांदेखापहलीनियमबिनाबैंककहींकहनादेताहमलेकाफीजबकितुरतमांगवहींरोज़मिलीआरोपसेनायादवलेनेखाताकरीबउनकाजवाबपूराबड़ासौदाशेयरकियेकहांअकसरबनाएवहांस्थलमिलेलेखकविषयक्रंसमूहथानाتستطيعمشاركةبواسطةالصفحةمواضيعالخاصةالمزيدالعامةالكاتبالردودبرنامجالدولةالعالمالموقعالعربيالسريعالجوالالذهابالحياةالحقوقالكريمالعراقمحفوظةالثانيمشاهدةالمرأةالقرآنالشبابالحوارالجديدالأسرةالعلوممجموعةالرحمنالنقاطفلسطينالكويتالدنيابركاتهالرياضتحياتيبتوقيتالأولىالبريدالكلامالرابطالشخصيسياراتالثالثالصلاةالحديثالزوارالخليجالجميعالعامهالجمالالساعةمشاهدهالرئيسالدخولالفنيةالكتابالدوريالدروساستغرقتصاميمالبناتالعظيمentertainmentunderstanding = function().jpg" width="configuration.png" width="<body class="Math.random()contemporary United Statescircumstances.appendChild(organizations<span class=""><img src="/distinguishedthousands of communicationclear"></div>investigationfavicon.ico" margin-right:based on the Massachusettstable border=internationalalso known aspronunciationbackground:#fpadding-left:For example, miscellaneous&lt;/math&gt;psychologicalin particularearch" type="form method="as opposed toSupreme Courtoccasionally Additionally,North Americapx;backgroundopportunitiesEntertainment.toLowerCase(manufacturingprofessional combined withFor instance,consisting of" maxlength="return false;consciousnessMediterraneanextraordinaryassassinationsubsequently button type="the number ofthe original comprehensiverefers to the</ul>
+</div>
+philosophicallocation.hrefwas publishedSan Francisco(function(){
+<div id="mainsophisticatedmathematical /head>
+<bodysuggests thatdocumentationconcentrationrelationshipsmay have been(for example,This article in some casesparts of the definition ofGreat Britain cellpadding=equivalent toplaceholder="; font-size: justificationbelieved thatsuffered fromattempted to leader of thecript" src="/(function() {are available
+	<link rel=" src='http://interested inconventional " alt="" /></are generallyhas also beenmost popular correspondingcredited withtyle="border:</a></span></.gif" width="<iframe src="table class="inline-block;according to together withapproximatelyparliamentarymore and moredisplay:none;traditionallypredominantly&nbsp;|&nbsp;&nbsp;</span> cellspacing=<input name="or" content="controversialproperty="og:/x-shockwave-demonstrationsurrounded byNevertheless,was the firstconsiderable Although the collaborationshould not beproportion of<span style="known as the shortly afterfor instance,described as /head>
+<body starting withincreasingly the fact thatdiscussion ofmiddle of thean individualdifficult to point of viewhomosexualityacceptance of</span></div>manufacturersorigin of thecommonly usedimportance ofdenominationsbackground: #length of thedeterminationa significant" border="0">revolutionaryprinciples ofis consideredwas developedIndo-Europeanvulnerable toproponents ofare sometimescloser to theNew York City name="searchattributed tocourse of themathematicianby the end ofat the end of" border="0" technological.removeClass(branch of theevidence that![endif]-->
+Institute of into a singlerespectively.and thereforeproperties ofis located insome of whichThere is alsocontinued to appearance of &amp;ndash; describes theconsiderationauthor of theindependentlyequipped withdoes not have</a><a href="confused with<link href="/at the age ofappear in theThese includeregardless ofcould be used style=&quot;several timesrepresent thebody>
+</html>thought to bepopulation ofpossibilitiespercentage ofaccess to thean attempt toproduction ofjquery/jquerytwo differentbelong to theestablishmentreplacing thedescription" determine theavailable forAccording to wide range of	<div class="more commonlyorganisationsfunctionalitywas completed &amp;mdash; participationthe characteran additionalappears to befact that thean example ofsignificantlyonmouseover="because they async = true;problems withseems to havethe result of src="http://familiar withpossession offunction () {took place inand sometimessubstantially<span></span>is often usedin an attemptgreat deal ofEnvironmentalsuccessfully virtually all20th century,professionalsnecessary to determined bycompatibilitybecause it isDictionary ofmodificationsThe followingmay refer to:Consequently,Internationalalthough somethat would beworld's firstclassified asbottom of the(particularlyalign="left" most commonlybasis for thefoundation ofcontributionspopularity ofcenter of theto reduce thejurisdictionsapproximation onmouseout="New Testamentcollection of</span></a></in the Unitedfilm director-strict.dtd">has been usedreturn to thealthough thischange in theseveral otherbut there areunprecedentedis similar toespecially inweight: bold;is called thecomputationalindicate thatrestricted to	<meta name="are typicallyconflict withHowever, the An example ofcompared withquantities ofrather than aconstellationnecessary forreported thatspecificationpolitical and&nbsp;&nbsp;<references tothe same yearGovernment ofgeneration ofhave not beenseveral yearscommitment to		<ul class="visualization19th century,practitionersthat he wouldand continuedoccupation ofis defined ascentre of thethe amount of><div style="equivalent ofdifferentiatebrought aboutmargin-left: automaticallythought of asSome of these
+<div class="input class="replaced withis one of theeducation andinfluenced byreputation as
+<meta name="accommodation</div>
+</div>large part ofInstitute forthe so-called against the In this case,was appointedclaimed to beHowever, thisDepartment ofthe remainingeffect on theparticularly deal with the
+<div style="almost alwaysare currentlyexpression ofphilosophy offor more thancivilizationson the islandselectedIndexcan result in" value="" />the structure /></a></div>Many of thesecaused by theof the Unitedspan class="mcan be tracedis related tobecame one ofis frequentlyliving in thetheoreticallyFollowing theRevolutionarygovernment inis determinedthe politicalintroduced insufficient todescription">short storiesseparation ofas to whetherknown for itswas initiallydisplay:blockis an examplethe principalconsists of arecognized as/body></html>a substantialreconstructedhead of stateresistance toundergraduateThere are twogravitationalare describedintentionallyserved as theclass="headeropposition tofundamentallydominated theand the otheralliance withwas forced torespectively,and politicalin support ofpeople in the20th century.and publishedloadChartbeatto understandmember statesenvironmentalfirst half ofcountries andarchitecturalbe consideredcharacterizedclearIntervalauthoritativeFederation ofwas succeededand there area consequencethe Presidentalso includedfree softwaresuccession ofdeveloped thewas destroyedaway from the;
+</script>
+<although theyfollowed by amore powerfulresulted in aUniversity ofHowever, manythe presidentHowever, someis thought tountil the endwas announcedare importantalso includes><input type=the center of DO NOT ALTERused to referthemes/?sort=that had beenthe basis forhas developedin the summercomparativelydescribed thesuch as thosethe resultingis impossiblevarious otherSouth Africanhave the sameeffectivenessin which case; text-align:structure and; background:regarding thesupported theis also knownstyle="marginincluding thebahasa Melayunorsk bokmålnorsk nynorskslovenščinainternacionalcalificacióncomunicaciónconstrucción"><div class="disambiguationDomainName', 'administrationsimultaneouslytransportationInternational margin-bottom:responsibility<![endif]-->
+</><meta name="implementationinfrastructurerepresentationborder-bottom:</head>
+<body>=http%3A%2F%2F<form method="method="post" /favicon.ico" });
+</script>
+.setAttribute(Administration= new Array();<![endif]-->
+display:block;Unfortunately,">&nbsp;</div>/favicon.ico">='stylesheet' identification, for example,<li><a href="/an alternativeas a result ofpt"></script>
+type="submit" 
+(function() {recommendationform action="/transformationreconstruction.style.display According to hidden" name="along with thedocument.body.approximately Communicationspost" action="meaning &quot;--<![endif]-->Prime Ministercharacteristic</a> <a class=the history of onmouseover="the governmenthref="https://was originallywas introducedclassificationrepresentativeare considered<![endif]-->
+
+depends on theUniversity of in contrast to placeholder="in the case ofinternational constitutionalstyle="border-: function() {Because of the-strict.dtd">
+<table class="accompanied byaccount of the<script src="/nature of the the people in in addition tos); js.id = id" width="100%"regarding the Roman Catholican independentfollowing the .gif" width="1the following discriminationarchaeologicalprime minister.js"></script>combination of marginwidth="createElement(w.attachEvent(</a></td></tr>src="https://aIn particular, align="left" Czech RepublicUnited Kingdomcorrespondenceconcluded that.html" title="(function () {comes from theapplication of<span class="sbelieved to beement('script'</a>
+</li>
+<livery different><span class="option value="(also known as	<li><a href="><input name="separated fromreferred to as valign="top">founder of theattempting to carbon dioxide
+
+<div class="class="search-/body>
+</html>opportunity tocommunications</head>
+<body style="width:Tiếng Việtchanges in theborder-color:#0" border="0" </span></div><was discovered" type="text" );
+</script>
+
+Department of ecclesiasticalthere has beenresulting from</body></html>has never beenthe first timein response toautomatically </div>
+
+<div iwas consideredpercent of the" /></a></div>collection of descended fromsection of theaccept-charsetto be confusedmember of the padding-right:translation ofinterpretation href='http://whether or notThere are alsothere are manya small numberother parts ofimpossible to  class="buttonlocated in the. However, theand eventuallyAt the end of because of itsrepresents the<form action=" method="post"it is possiblemore likely toan increase inhave also beencorresponds toannounced thatalign="right">many countriesfor many yearsearliest knownbecause it waspt"></script> valign="top" inhabitants offollowing year
+<div class="million peoplecontroversial concerning theargue that thegovernment anda reference totransferred todescribing the style="color:although therebest known forsubmit" name="multiplicationmore than one recognition ofCouncil of theedition of the  <meta name="Entertainment away from the ;margin-right:at the time ofinvestigationsconnected withand many otheralthough it isbeginning with <span class="descendants of<span class="i align="right"</head>
+<body aspects of thehas since beenEuropean Unionreminiscent ofmore difficultVice Presidentcomposition ofpassed throughmore importantfont-size:11pxexplanation ofthe concept ofwritten in the	<span class="is one of the resemblance toon the groundswhich containsincluding the defined by thepublication ofmeans that theoutside of thesupport of the<input class="<span class="t(Math.random()most prominentdescription ofConstantinoplewere published<div class="seappears in the1" height="1" most importantwhich includeswhich had beendestruction ofthe population
+	<div class="possibility ofsometimes usedappear to havesuccess of theintended to bepresent in thestyle="clear:b
+</script>
+<was founded ininterview with_id" content="capital of the
+<link rel="srelease of thepoint out thatxMLHttpRequestand subsequentsecond largestvery importantspecificationssurface of theapplied to theforeign policy_setDomainNameestablished inis believed toIn addition tomeaning of theis named afterto protect theis representedDeclaration ofmore efficientClassificationother forms ofhe returned to<span class="cperformance of(function() {if and only ifregions of theleading to therelations withUnited Nationsstyle="height:other than theype" content="Association of
+</head>
+<bodylocated on theis referred to(including theconcentrationsthe individualamong the mostthan any other/>
+<link rel=" return false;the purpose ofthe ability to;color:#fff}
+.
+<span class="the subject ofdefinitions of>
+<link rel="claim that thehave developed<table width="celebration ofFollowing the to distinguish<span class="btakes place inunder the namenoted that the><![endif]-->
+style="margin-instead of theintroduced thethe process ofincreasing thedifferences inestimated thatespecially the/div><div id="was eventuallythroughout histhe differencesomething thatspan></span></significantly ></script>
+
+environmental to prevent thehave been usedespecially forunderstand theis essentiallywere the firstis the largesthave been made" src="http://interpreted assecond half ofcrolling="no" is composed ofII, Holy Romanis expected tohave their owndefined as thetraditionally have differentare often usedto ensure thatagreement withcontaining theare frequentlyinformation onexample is theresulting in a</a></li></ul> class="footerand especiallytype="button" </span></span>which included>
+<meta name="considered thecarried out byHowever, it isbecame part ofin relation topopular in thethe capital ofwas officiallywhich has beenthe History ofalternative todifferent fromto support thesuggested thatin the process  <div class="the foundationbecause of hisconcerned withthe universityopposed to thethe context of<span class="ptext" name="q"		<div class="the scientificrepresented bymathematicianselected by thethat have been><div class="cdiv id="headerin particular,converted into);
+</script>
+<philosophical srpskohrvatskitiếng ViệtРусскийрусскийinvestigaciónparticipaciónкоторыеобластикоторыйчеловексистемыНовостикоторыхобластьвременикотораясегодняскачатьновостиУкраинывопросыкоторойсделатьпомощьюсредствобразомстороныучастиетечениеГлавнаяисториисистемарешенияСкачатьпоэтомуследуетсказатьтоваровконечнорешениекотороеоргановкоторомРекламаالمنتدىمنتدياتالموضوعالبرامجالمواقعالرسائلمشاركاتالأعضاءالرياضةالتصميمالاعضاءالنتائجالألعابالتسجيلالأقسامالضغطاتالفيديوالترحيبالجديدةالتعليمالأخبارالافلامالأفلامالتاريخالتقنيةالالعابالخواطرالمجتمعالديكورالسياحةعبداللهالتربيةالروابطالأدبيةالاخبارالمتحدةالاغانيcursor:pointer;</title>
+<meta " href="http://"><span class="members of the window.locationvertical-align:/a> | <a href="<!doctype html>media="screen" <option value="favicon.ico" />
+		<div class="characteristics" method="get" /body>
+</html>
+shortcut icon" document.write(padding-bottom:representativessubmit" value="align="center" throughout the science fiction
+  <div class="submit" class="one of the most valign="top"><was established);
+</script>
+return false;">).style.displaybecause of the document.cookie<form action="/}body{margin:0;Encyclopedia ofversion of the .createElement(name" content="</div>
+</div>
+
+administrative </body>
+</html>history of the "><input type="portion of the as part of the &nbsp;<a href="other countries">
+<div class="</span></span><In other words,display: block;control of the introduction of/>
+<meta name="as well as the in recent years
+	<div class="</div>
+	</div>
+inspired by thethe end of the compatible withbecame known as style="margin:.js"></script>< International there have beenGerman language style="color:#Communist Partyconsistent withborder="0" cell marginheight="the majority of" align="centerrelated to the many different Orthodox Churchsimilar to the />
+<link rel="swas one of the until his death})();
+</script>other languagescompared to theportions of thethe Netherlandsthe most commonbackground:url(argued that thescrolling="no" included in theNorth American the name of theinterpretationsthe traditionaldevelopment of frequently useda collection ofvery similar tosurrounding theexample of thisalign="center">would have beenimage_caption =attached to thesuggesting thatin the form of involved in theis derived fromnamed after theIntroduction torestrictions on style="width: can be used to the creation ofmost important information andresulted in thecollapse of theThis means thatelements of thewas replaced byanalysis of theinspiration forregarded as themost successfulknown as &quot;a comprehensiveHistory of the were consideredreturned to theare referred toUnsourced image>
+	<div class="consists of thestopPropagationinterest in theavailability ofappears to haveelectromagneticenableServices(function of theIt is important</script></div>function(){var relative to theas a result of the position ofFor example, in method="post" was followed by&amp;mdash; thethe applicationjs"></script>
+ul></div></div>after the deathwith respect tostyle="padding:is particularlydisplay:inline; type="submit" is divided into中文 (简体)responsabilidadadministracióninternacionalescorrespondienteउपयोगपूर्वहमारेलोगोंचुनावलेकिनसरकारपुलिसखोजेंचाहिएभेजेंशामिलहमारीजागरणबनानेकुमारब्लॉगमालिकमहिलापृष्ठबढ़तेभाजपाक्लिकट्रेनखिलाफदौरानमामलेमतदानबाजारविकासक्योंचाहतेपहुँचबतायासंवाददेखनेपिछलेविशेषराज्यउत्तरमुंबईदोनोंउपकरणपढ़ेंस्थितफिल्ममुख्यअच्छाछूटतीसंगीतजाएगाविभागघण्टेदूसरेदिनोंहत्यासेक्सगांधीविश्वरातेंदैट्सनक्शासामनेअदालतबिजलीपुरूषहिंदीमित्रकवितारुपयेस्थानकरोड़मुक्तयोजनाकृपयापोस्टघरेलूकार्यविचारसूचनामूल्यदेखेंहमेशास्कूलमैंनेतैयारजिसकेrss+xml" title="-type" content="title" content="at the same time.js"></script>
+<" method="post" </span></a></li>vertical-align:t/jquery.min.js">.click(function( style="padding-})();
+</script>
+</span><a href="<a href="http://); return false;text-decoration: scrolling="no" border-collapse:associated with Bahasa IndonesiaEnglish language<text xml:space=.gif" border="0"</body>
+</html>
+overflow:hidden;img src="http://addEventListenerresponsible for s.js"></script>
+/favicon.ico" />operating system" style="width:1target="_blank">State Universitytext-align:left;
+document.write(, including the around the world);
+</script>
+<" style="height:;overflow:hiddenmore informationan internationala member of the one of the firstcan be found in </div>
+		</div>
+display: none;">" />
+<link rel="
+  (function() {the 15th century.preventDefault(large number of Byzantine Empire.jpg|thumb|left|vast majority ofmajority of the  align="center">University Pressdominated by theSecond World Wardistribution of style="position:the rest of the characterized by rel="nofollow">derives from therather than the a combination ofstyle="width:100English-speakingcomputer scienceborder="0" alt="the existence ofDemocratic Party" style="margin-For this reason,.js"></script>
+	sByTagName(s)[0]js"></script>
+<.js"></script>
+link rel="icon" ' alt='' class='formation of theversions of the </a></div></div>/page>
+  <page>
+<div class="contbecame the firstbahasa Indonesiaenglish (simple)ΕλληνικάхрватскикомпанииявляетсяДобавитьчеловекаразвитияИнтернетОтветитьнапримеринтернеткоторогостраницыкачествеусловияхпроблемыполучитьявляютсянаиболеекомпаниявниманиесредстваالمواضيعالرئيسيةالانتقالمشاركاتكالسياراتالمكتوبةالسعوديةاحصائياتالعالميةالصوتياتالانترنتالتصاميمالإسلاميالمشاركةالمرئياتrobots" content="<div id="footer">the United States<img src="http://.jpg|right|thumb|.js"></script>
+<location.protocolframeborder="0" s" />
+<meta name="</a></div></div><font-weight:bold;&quot; and &quot;depending on the margin:0;padding:" rel="nofollow" President of the twentieth centuryevision>
+  </pageInternet Explorera.async = true;
+information about<div id="header">" action="http://<a href="https://<div id="content"</div>
+</div>
+<derived from the <img src='http://according to the 
+</body>
+</html>
+style="font-size:script language="Arial, Helvetica,</a><span class="</script><script political partiestd></tr></table><href="http://www.interpretation ofrel="stylesheet" document.write('<charset="utf-8">
+beginning of the revealed that thetelevision series" rel="nofollow"> target="_blank">claiming that thehttp%3A%2F%2Fwww.manifestations ofPrime Minister ofinfluenced by theclass="clearfix">/div>
+</div>
+
+three-dimensionalChurch of Englandof North Carolinasquare kilometres.addEventListenerdistinct from thecommonly known asPhonetic Alphabetdeclared that thecontrolled by theBenjamin Franklinrole-playing gamethe University ofin Western Europepersonal computerProject Gutenbergregardless of thehas been proposedtogether with the></li><li class="in some countriesmin.js"></script>of the populationofficial language<img src="images/identified by thenatural resourcesclassification ofcan be consideredquantum mechanicsNevertheless, themillion years ago</body>
+</html>Ελληνικά
+take advantage ofand, according toattributed to theMicrosoft Windowsthe first centuryunder the controldiv class="headershortly after thenotable exceptiontens of thousandsseveral differentaround the world.reaching militaryisolated from theopposition to thethe Old TestamentAfrican Americansinserted into theseparate from themetropolitan areamakes it possibleacknowledged thatarguably the mosttype="text/css">
+the InternationalAccording to the pe="text/css" />
+coincide with thetwo-thirds of theDuring this time,during the periodannounced that hethe internationaland more recentlybelieved that theconsciousness andformerly known assurrounded by thefirst appeared inoccasionally usedposition:absolute;" target="_blank" position:relative;text-align:center;jax/libs/jquery/1.background-color:#type="application/anguage" content="<meta http-equiv="Privacy Policy</a>e("%3Cscript src='" target="_blank">On the other hand,.jpg|thumb|right|2</div><div class="<div style="float:nineteenth century</body>
+</html>
+<img src="http://s;text-align:centerfont-weight: bold; According to the difference between" frameborder="0" " style="position:link href="http://html4/loose.dtd">
+during this period</td></tr></table>closely related tofor the first time;font-weight:bold;input type="text" <span style="font-onreadystatechange	<div class="cleardocument.location. For example, the a wide variety of <!DOCTYPE html>
+<&nbsp;&nbsp;&nbsp;"><a href="http://style="float:left;concerned with the=http%3A%2F%2Fwww.in popular culturetype="text/css" />it is possible to Harvard Universitytylesheet" href="/the main characterOxford University  name="keywords" cstyle="text-align:the United Kingdomfederal government<div style="margin depending on the description of the<div class="header.min.js"></script>destruction of theslightly differentin accordance withtelecommunicationsindicates that theshortly thereafterespecially in the European countriesHowever, there aresrc="http://staticsuggested that the" src="http://www.a large number of Telecommunications" rel="nofollow" tHoly Roman Emperoralmost exclusively" border="0" alt="Secretary of Stateculminating in theCIA World Factbookthe most importantanniversary of thestyle="background-<li><em><a href="/the Atlantic Oceanstrictly speaking,shortly before thedifferent types ofthe Ottoman Empire><img src="http://An Introduction toconsequence of thedeparture from theConfederate Statesindigenous peoplesProceedings of theinformation on thetheories have beeninvolvement in thedivided into threeadjacent countriesis responsible fordissolution of thecollaboration withwidely regarded ashis contemporariesfounding member ofDominican Republicgenerally acceptedthe possibility ofare also availableunder constructionrestoration of thethe general publicis almost entirelypasses through thehas been suggestedcomputer and videoGermanic languages according to the different from theshortly afterwardshref="https://www.recent developmentBoard of Directors<div class="search| <a href="http://In particular, theMultiple footnotesor other substancethousands of yearstranslation of the</div>
+</div>
+
+<a href="index.phpwas established inmin.js"></script>
+participate in thea strong influencestyle="margin-top:represented by thegraduated from theTraditionally, theElement("script");However, since the/div>
+</div>
+<div left; margin-left:protection against0; vertical-align:Unfortunately, thetype="image/x-icon/div>
+<div class=" class="clearfix"><div class="footer		</div>
+		</div>
+the motion pictureБългарскибългарскиФедерациинесколькосообщениесообщенияпрограммыОтправитьбесплатноматериалыпозволяетпоследниеразличныхпродукциипрограммаполностьюнаходитсяизбранноенаселенияизменениякатегорииАлександрद्वारामैनुअलप्रदानभारतीयअनुदेशहिन्दीइंडियादिल्लीअधिकारवीडियोचिट्ठेसमाचारजंक्शनदुनियाप्रयोगअनुसारऑनलाइनपार्टीशर्तोंलोकसभाफ़्लैशशर्तेंप्रदेशप्लेयरकेंद्रस्थितिउत्पादउन्हेंचिट्ठायात्राज्यादापुरानेजोड़ेंअनुवादश्रेणीशिक्षासरकारीसंग्रहपरिणामब्रांडबच्चोंउपलब्धमंत्रीसंपर्कउम्मीदमाध्यमसहायताशब्दोंमीडियाआईपीएलमोबाइलसंख्याआपरेशनअनुबंधबाज़ारनवीनतमप्रमुखप्रश्नपरिवारनुकसानसमर्थनआयोजितसोमवारالمشاركاتالمنتدياتالكمبيوترالمشاهداتعددالزوارعددالردودالإسلاميةالفوتوشوبالمسابقاتالمعلوماتالمسلسلاتالجرافيكسالاسلاميةالاتصالاتkeywords" content="w3.org/1999/xhtml"><a target="_blank" text/html; charset=" target="_blank"><table cellpadding="autocomplete="off" text-align: center;to last version by background-color: #" href="http://www./div></div><div id=<a href="#" class=""><img src="http://cript" src="http://
+<script language="//EN" "http://www.wencodeURIComponent(" href="javascript:<div class="contentdocument.write('<scposition: absolute;script src="http:// style="margin-top:.min.js"></script>
+</div>
+<div class="w3.org/1999/xhtml" 
+
+</body>
+</html>distinction between/" target="_blank"><link href="http://encoding="utf-8"?>
+w.addEventListener?action="http://www.icon" href="http:// style="background:type="text/css" />
+meta property="og:t<input type="text"  style="text-align:the development of tylesheet" type="tehtml; charset=utf-8is considered to betable width="100%" In addition to the contributed to the differences betweendevelopment of the It is important to </script>
+
+<script  style="font-size:1></span><span id=gbLibrary of Congress<img src="http://imEnglish translationAcademy of Sciencesdiv style="display:construction of the.getElementById(id)in conjunction withElement('script'); <meta property="og:Български
+ type="text" name=">Privacy Policy</a>administered by theenableSingleRequeststyle=&quot;margin:</div></div></div><><img src="http://i style=&quot;float:referred to as the total population ofin Washington, D.C. style="background-among other things,organization of theparticipated in thethe introduction ofidentified with thefictional character Oxford University misunderstanding ofThere are, however,stylesheet" href="/Columbia Universityexpanded to includeusually referred toindicating that thehave suggested thataffiliated with thecorrelation betweennumber of different></td></tr></table>Republic of Ireland
+</script>
+<script under the influencecontribution to theOfficial website ofheadquarters of thecentered around theimplications of thehave been developedFederal Republic ofbecame increasinglycontinuation of theNote, however, thatsimilar to that of capabilities of theaccordance with theparticipants in thefurther developmentunder the directionis often consideredhis younger brother</td></tr></table><a http-equiv="X-UA-physical propertiesof British Columbiahas been criticized(with the exceptionquestions about thepassing through the0" cellpadding="0" thousands of peopleredirects here. Forhave children under%3E%3C/script%3E"));<a href="http://www.<li><a href="http://site_name" content="text-decoration:nonestyle="display: none<meta http-equiv="X-new Date().getTime() type="image/x-icon"</span><span class="language="javascriptwindow.location.href<a href="javascript:-->
+<script type="t<a href='http://www.hortcut icon" href="</div>
+<div class="<script src="http://" rel="stylesheet" t</div>
+<script type=/a> <a href="http:// allowTransparency="X-UA-Compatible" conrelationship between
+</script>
+<script </a></li></ul></div>associated with the programming language</a><a href="http://</a></li><li class="form action="http://<div style="display:type="text" name="q"<table width="100%" background-position:" border="0" width="rel="shortcut icon" h6><ul><li><a href="  <meta http-equiv="css" media="screen" responsible for the " type="application/" style="background-html; charset=utf-8" allowtransparency="stylesheet" type="te
+<meta http-equiv="></span><span class="0" cellspacing="0">;
+</script>
+<script sometimes called thedoes not necessarilyFor more informationat the beginning of <!DOCTYPE html><htmlparticularly in the type="hidden" name="javascript:void(0);"effectiveness of the autocomplete="off" generally considered><input type="text" "></script>
+<scriptthroughout the worldcommon misconceptionassociation with the</div>
+</div>
+<div cduring his lifetime,corresponding to thetype="image/x-icon" an increasing numberdiplomatic relationsare often consideredmeta charset="utf-8" <input type="text" examples include the"><img src="http://iparticipation in thethe establishment of
+</div>
+<div class="&amp;nbsp;&amp;nbsp;to determine whetherquite different frommarked the beginningdistance between thecontributions to theconflict between thewidely considered towas one of the firstwith varying degreeshave speculated that(document.getElementparticipating in theoriginally developedeta charset="utf-8"> type="text/css" />
+interchangeably withmore closely relatedsocial and politicalthat would otherwiseperpendicular to thestyle type="text/csstype="submit" name="families residing indeveloping countriescomputer programmingeconomic developmentdetermination of thefor more informationon several occasionsportuguês (Europeu)УкраїнськаукраїнськаРоссийскойматериаловинформацииуправлениянеобходимоинформацияИнформацияРеспубликиколичествоинформациютерриториидостаточноالمتواجدونالاشتراكاتالاقتراحاتhtml; charset=UTF-8" setTimeout(function()display:inline-block;<input type="submit" type = 'text/javascri<img src="http://www." "http://www.w3.org/shortcut icon" href="" autocomplete="off" </a></div><div class=</a></li>
+<li class="css" type="text/css" <form action="http://xt/css" href="http://link rel="alternate" 
+<script type="text/ onclick="javascript:(new Date).getTime()}height="1" width="1" People's Republic of  <a href="http://www.text-decoration:underthe beginning of the </div>
+</div>
+</div>
+establishment of the </div></div></div></d#viewport{min-height:
+<script src="http://option><option value=often referred to as /option>
+<option valu<!DOCTYPE html>
+<!--[International Airport>
+<a href="http://www</a><a href="http://wภาษาไทยქართული正體中文 (繁體)निर्देशडाउनलोडक्षेत्रजानकारीसंबंधितस्थापनास्वीकारसंस्करणसामग्रीचिट्ठोंविज्ञानअमेरिकाविभिन्नगाडियाँक्योंकिसुरक्षापहुँचतीप्रबंधनटिप्पणीक्रिकेटप्रारंभप्राप्तमालिकोंरफ़्तारनिर्माणलिमिटेडdescription" content="document.location.prot.getElementsByTagName(<!DOCTYPE html>
+<html <meta charset="utf-8">:url" content="http://.css" rel="stylesheet"style type="text/css">type="text/css" href="w3.org/1999/xhtml" xmltype="text/javascript" method="get" action="link rel="stylesheet"  = document.getElementtype="image/x-icon" />cellpadding="0" cellsp.css" type="text/css" </a></li><li><a href="" width="1" height="1""><a href="http://www.style="display:none;">alternate" type="appli-//W3C//DTD XHTML 1.0 ellspacing="0" cellpad type="hidden" value="/a>&nbsp;<span role="s
+<input type="hidden" language="JavaScript"  document.getElementsBg="0" cellspacing="0" ype="text/css" media="type='text/javascript'with the exception of ype="text/css" rel="st height="1" width="1" ='+encodeURIComponent(<link rel="alternate" 
+body, tr, input, textmeta name="robots" conmethod="post" action=">
+<a href="http://www.css" rel="stylesheet" </div></div><div classlanguage="javascript">aria-hidden="true">·<ript" type="text/javasl=0;})();
+(function(){background-image: url(/a></li><li><a href="h		<li><a href="http://ator" aria-hidden="tru> <a href="http://www.language="javascript" /option>
+<option value/div></div><div class=rator" aria-hidden="tre=(new Date).getTime()português (do Brasil)организациивозможностьобразованиярегистрациивозможностиобязательна<!DOCTYPE html PUBLIC "nt-Type" content="text/<meta http-equiv="Conteransitional//EN" "http:<html xmlns="http://www-//W3C//DTD XHTML 1.0 TDTD/xhtml1-transitional//www.w3.org/TR/xhtml1/pe = 'text/javascript';<meta name="descriptionparentNode.insertBefore<input type="hidden" najs" type="text/javascri(document).ready(functiscript type="text/javasimage" content="http://UA-Compatible" content=tml; charset=utf-8" />
+link rel="shortcut icon<link rel="stylesheet" </script>
+<script type== document.createElemen<a target="_blank" href= document.getElementsBinput type="text" name=a.type = 'text/javascrinput type="hidden" namehtml; charset=utf-8" />dtd">
+<html xmlns="http-//W3C//DTD HTML 4.01 TentsByTagName('script')input type="hidden" nam<script type="text/javas" style="display:none;">document.getElementById(=document.createElement(' type='text/javascript'input type="text" name="d.getElementsByTagName(snical" href="http://www.C//DTD HTML 4.01 Transit<style type="text/css">
+
+<style type="text/css">ional.dtd">
+<html xmlns=http-equiv="Content-Typeding="0" cellspacing="0"html; charset=utf-8" />
+ style="display:none;"><<li><a href="http://www. type='text/javascript'>деятельностисоответствиипроизводствабезопасностиपुस्तिकाकांग्रेसउन्होंनेविधानसभाफिक्सिंगसुरक्षितकॉपीराइटविज्ञापनकार्रवाईसक्रियता
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin.br b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin.br
new file mode 100644
index 0000000000..6a55d420a8
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin.br differ
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.c
new file mode 100644
index 0000000000..7c015ab0ba
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.c
@@ -0,0 +1,5916 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "dictionary.h"
+#include "platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+static const uint8_t kBrotliDictionaryData[] =
+/* GENERATED CODE START */
+{
+116,105,109,101,100,111,119,110,108,105,102,101,108,101,102,116,98,97,99,107,99,
+111,100,101,100,97,116,97,115,104,111,119,111,110,108,121,115,105,116,101,99,105
+,116,121,111,112,101,110,106,117,115,116,108,105,107,101,102,114,101,101,119,111
+,114,107,116,101,120,116,121,101,97,114,111,118,101,114,98,111,100,121,108,111,
+118,101,102,111,114,109,98,111,111,107,112,108,97,121,108,105,118,101,108,105,
+110,101,104,101,108,112,104,111,109,101,115,105,100,101,109,111,114,101,119,111,
+114,100,108,111,110,103,116,104,101,109,118,105,101,119,102,105,110,100,112,97,
+103,101,100,97,121,115,102,117,108,108,104,101,97,100,116,101,114,109,101,97,99,
+104,97,114,101,97,102,114,111,109,116,114,117,101,109,97,114,107,97,98,108,101,
+117,112,111,110,104,105,103,104,100,97,116,101,108,97,110,100,110,101,119,115,
+101,118,101,110,110,101,120,116,99,97,115,101,98,111,116,104,112,111,115,116,117
+,115,101,100,109,97,100,101,104,97,110,100,104,101,114,101,119,104,97,116,110,97
+,109,101,76,105,110,107,98,108,111,103,115,105,122,101,98,97,115,101,104,101,108
+,100,109,97,107,101,109,97,105,110,117,115,101,114,39,41,32,43,104,111,108,100,
+101,110,100,115,119,105,116,104,78,101,119,115,114,101,97,100,119,101,114,101,
+115,105,103,110,116,97,107,101,104,97,118,101,103,97,109,101,115,101,101,110,99,
+97,108,108,112,97,116,104,119,101,108,108,112,108,117,115,109,101,110,117,102,
+105,108,109,112,97,114,116,106,111,105,110,116,104,105,115,108,105,115,116,103,
+111,111,100,110,101,101,100,119,97,121,115,119,101,115,116,106,111,98,115,109,
+105,110,100,97,108,115,111,108,111,103,111,114,105,99,104,117,115,101,115,108,97
+,115,116,116,101,97,109,97,114,109,121,102,111,111,100,107,105,110,103,119,105,
+108,108,101,97,115,116,119,97,114,100,98,101,115,116,102,105,114,101,80,97,103,
+101,107,110,111,119,97,119,97,121,46,112,110,103,109,111,118,101,116,104,97,110,
+108,111,97,100,103,105,118,101,115,101,108,102,110,111,116,101,109,117,99,104,
+102,101,101,100,109,97,110,121,114,111,99,107,105,99,111,110,111,110,99,101,108,
+111,111,107,104,105,100,101,100,105,101,100,72,111,109,101,114,117,108,101,104,
+111,115,116,97,106,97,120,105,110,102,111,99,108,117,98,108,97,119,115,108,101,
+115,115,104,97,108,102,115,111,109,101,115,117,99,104,122,111,110,101,49,48,48,
+37,111,110,101,115,99,97,114,101,84,105,109,101,114,97,99,101,98,108,117,101,102
+,111,117,114,119,101,101,107,102,97,99,101,104,111,112,101,103,97,118,101,104,97
+,114,100,108,111,115,116,119,104,101,110,112,97,114,107,107,101,112,116,112,97,
+115,115,115,104,105,112,114,111,111,109,72,84,77,76,112,108,97,110,84,121,112,
+101,100,111,110,101,115,97,118,101,107,101,101,112,102,108,97,103,108,105,110,
+107,115,111,108,100,102,105,118,101,116,111,111,107,114,97,116,101,116,111,119,
+110,106,117,109,112,116,104,117,115,100,97,114,107,99,97,114,100,102,105,108,101
+,102,101,97,114,115,116,97,121,107,105,108,108,116,104,97,116,102,97,108,108,97,
+117,116,111,101,118,101,114,46,99,111,109,116,97,108,107,115,104,111,112,118,111
+,116,101,100,101,101,112,109,111,100,101,114,101,115,116,116,117,114,110,98,111,
+114,110,98,97,110,100,102,101,108,108,114,111,115,101,117,114,108,40,115,107,105
+,110,114,111,108,101,99,111,109,101,97,99,116,115,97,103,101,115,109,101,101,116
+,103,111,108,100,46,106,112,103,105,116,101,109,118,97,114,121,102,101,108,116,
+116,104,101,110,115,101,110,100,100,114,111,112,86,105,101,119,99,111,112,121,49
+,46,48,34,60,47,97,62,115,116,111,112,101,108,115,101,108,105,101,115,116,111,
+117,114,112,97,99,107,46,103,105,102,112,97,115,116,99,115,115,63,103,114,97,121
+,109,101,97,110,38,103,116,59,114,105,100,101,115,104,111,116,108,97,116,101,115
+,97,105,100,114,111,97,100,118,97,114,32,102,101,101,108,106,111,104,110,114,105
+,99,107,112,111,114,116,102,97,115,116,39,85,65,45,100,101,97,100,60,47,98,62,
+112,111,111,114,98,105,108,108,116,121,112,101,85,46,83,46,119,111,111,100,109,
+117,115,116,50,112,120,59,73,110,102,111,114,97,110,107,119,105,100,101,119,97,
+110,116,119,97,108,108,108,101,97,100,91,48,93,59,112,97,117,108,119,97,118,101,
+115,117,114,101,36,40,39,35,119,97,105,116,109,97,115,115,97,114,109,115,103,111
+,101,115,103,97,105,110,108,97,110,103,112,97,105,100,33,45,45,32,108,111,99,107
+,117,110,105,116,114,111,111,116,119,97,108,107,102,105,114,109,119,105,102,101,
+120,109,108,34,115,111,110,103,116,101,115,116,50,48,112,120,107,105,110,100,114
+,111,119,115,116,111,111,108,102,111,110,116,109,97,105,108,115,97,102,101,115,
+116,97,114,109,97,112,115,99,111,114,101,114,97,105,110,102,108,111,119,98,97,98
+,121,115,112,97,110,115,97,121,115,52,112,120,59,54,112,120,59,97,114,116,115,
+102,111,111,116,114,101,97,108,119,105,107,105,104,101,97,116,115,116,101,112,
+116,114,105,112,111,114,103,47,108,97,107,101,119,101,97,107,116,111,108,100,70,
+111,114,109,99,97,115,116,102,97,110,115,98,97,110,107,118,101,114,121,114,117,
+110,115,106,117,108,121,116,97,115,107,49,112,120,59,103,111,97,108,103,114,101,
+119,115,108,111,119,101,100,103,101,105,100,61,34,115,101,116,115,53,112,120,59,
+46,106,115,63,52,48,112,120,105,102,32,40,115,111,111,110,115,101,97,116,110,111
+,110,101,116,117,98,101,122,101,114,111,115,101,110,116,114,101,101,100,102,97,
+99,116,105,110,116,111,103,105,102,116,104,97,114,109,49,56,112,120,99,97,109,
+101,104,105,108,108,98,111,108,100,122,111,111,109,118,111,105,100,101,97,115,
+121,114,105,110,103,102,105,108,108,112,101,97,107,105,110,105,116,99,111,115,
+116,51,112,120,59,106,97,99,107,116,97,103,115,98,105,116,115,114,111,108,108,
+101,100,105,116,107,110,101,119,110,101,97,114,60,33,45,45,103,114,111,119,74,83
+,79,78,100,117,116,121,78,97,109,101,115,97,108,101,121,111,117,32,108,111,116,
+115,112,97,105,110,106,97,122,122,99,111,108,100,101,121,101,115,102,105,115,104
+,119,119,119,46,114,105,115,107,116,97,98,115,112,114,101,118,49,48,112,120,114,
+105,115,101,50,53,112,120,66,108,117,101,100,105,110,103,51,48,48,44,98,97,108,
+108,102,111,114,100,101,97,114,110,119,105,108,100,98,111,120,46,102,97,105,114,
+108,97,99,107,118,101,114,115,112,97,105,114,106,117,110,101,116,101,99,104,105,
+102,40,33,112,105,99,107,101,118,105,108,36,40,34,35,119,97,114,109,108,111,114,
+100,100,111,101,115,112,117,108,108,44,48,48,48,105,100,101,97,100,114,97,119,
+104,117,103,101,115,112,111,116,102,117,110,100,98,117,114,110,104,114,101,102,
+99,101,108,108,107,101,121,115,116,105,99,107,104,111,117,114,108,111,115,115,
+102,117,101,108,49,50,112,120,115,117,105,116,100,101,97,108,82,83,83,34,97,103,
+101,100,103,114,101,121,71,69,84,34,101,97,115,101,97,105,109,115,103,105,114,
+108,97,105,100,115,56,112,120,59,110,97,118,121,103,114,105,100,116,105,112,115,
+35,57,57,57,119,97,114,115,108,97,100,121,99,97,114,115,41,59,32,125,112,104,112
+,63,104,101,108,108,116,97,108,108,119,104,111,109,122,104,58,229,42,47,13,10,32
+,49,48,48,104,97,108,108,46,10,10,65,55,112,120,59,112,117,115,104,99,104,97,116
+,48,112,120,59,99,114,101,119,42,47,60,47,104,97,115,104,55,53,112,120,102,108,
+97,116,114,97,114,101,32,38,38,32,116,101,108,108,99,97,109,112,111,110,116,111,
+108,97,105,100,109,105,115,115,115,107,105,112,116,101,110,116,102,105,110,101,
+109,97,108,101,103,101,116,115,112,108,111,116,52,48,48,44,13,10,13,10,99,111,
+111,108,102,101,101,116,46,112,104,112,60,98,114,62,101,114,105,99,109,111,115,
+116,103,117,105,100,98,101,108,108,100,101,115,99,104,97,105,114,109,97,116,104,
+97,116,111,109,47,105,109,103,38,35,56,50,108,117,99,107,99,101,110,116,48,48,48
+,59,116,105,110,121,103,111,110,101,104,116,109,108,115,101,108,108,100,114,117,
+103,70,82,69,69,110,111,100,101,110,105,99,107,63,105,100,61,108,111,115,101,110
+,117,108,108,118,97,115,116,119,105,110,100,82,83,83,32,119,101,97,114,114,101,
+108,121,98,101,101,110,115,97,109,101,100,117,107,101,110,97,115,97,99,97,112,
+101,119,105,115,104,103,117,108,102,84,50,51,58,104,105,116,115,115,108,111,116,
+103,97,116,101,107,105,99,107,98,108,117,114,116,104,101,121,49,53,112,120,39,39
+,41,59,41,59,34,62,109,115,105,101,119,105,110,115,98,105,114,100,115,111,114,
+116,98,101,116,97,115,101,101,107,84,49,56,58,111,114,100,115,116,114,101,101,
+109,97,108,108,54,48,112,120,102,97,114,109,226,128,153,115,98,111,121,115,91,48
+,93,46,39,41,59,34,80,79,83,84,98,101,97,114,107,105,100,115,41,59,125,125,109,
+97,114,121,116,101,110,100,40,85,75,41,113,117,97,100,122,104,58,230,45,115,105,
+122,45,45,45,45,112,114,111,112,39,41,59,13,108,105,102,116,84,49,57,58,118,105,
+99,101,97,110,100,121,100,101,98,116,62,82,83,83,112,111,111,108,110,101,99,107,
+98,108,111,119,84,49,54,58,100,111,111,114,101,118,97,108,84,49,55,58,108,101,
+116,115,102,97,105,108,111,114,97,108,112,111,108,108,110,111,118,97,99,111,108,
+115,103,101,110,101,32,226,128,148,115,111,102,116,114,111,109,101,116,105,108,
+108,114,111,115,115,60,104,51,62,112,111,117,114,102,97,100,101,112,105,110,107,
+60,116,114,62,109,105,110,105,41,124,33,40,109,105,110,101,122,104,58,232,98,97,
+114,115,104,101,97,114,48,48,41,59,109,105,108,107,32,45,45,62,105,114,111,110,
+102,114,101,100,100,105,115,107,119,101,110,116,115,111,105,108,112,117,116,115,
+47,106,115,47,104,111,108,121,84,50,50,58,73,83,66,78,84,50,48,58,97,100,97,109,
+115,101,101,115,60,104,50,62,106,115,111,110,39,44,32,39,99,111,110,116,84,50,49
+,58,32,82,83,83,108,111,111,112,97,115,105,97,109,111,111,110,60,47,112,62,115,
+111,117,108,76,73,78,69,102,111,114,116,99,97,114,116,84,49,52,58,60,104,49,62,
+56,48,112,120,33,45,45,60,57,112,120,59,84,48,52,58,109,105,107,101,58,52,54,90,
+110,105,99,101,105,110,99,104,89,111,114,107,114,105,99,101,122,104,58,228,39,41
+,41,59,112,117,114,101,109,97,103,101,112,97,114,97,116,111,110,101,98,111,110,
+100,58,51,55,90,95,111,102,95,39,93,41,59,48,48,48,44,122,104,58,231,116,97,110,
+107,121,97,114,100,98,111,119,108,98,117,115,104,58,53,54,90,74,97,118,97,51,48,
+112,120,10,124,125,10,37,67,51,37,58,51,52,90,106,101,102,102,69,88,80,73,99,97,
+115,104,118,105,115,97,103,111,108,102,115,110,111,119,122,104,58,233,113,117,
+101,114,46,99,115,115,115,105,99,107,109,101,97,116,109,105,110,46,98,105,110,
+100,100,101,108,108,104,105,114,101,112,105,99,115,114,101,110,116,58,51,54,90,
+72,84,84,80,45,50,48,49,102,111,116,111,119,111,108,102,69,78,68,32,120,98,111,
+120,58,53,52,90,66,79,68,89,100,105,99,107,59,10,125,10,101,120,105,116,58,51,53
+,90,118,97,114,115,98,101,97,116,39,125,41,59,100,105,101,116,57,57,57,59,97,110
+,110,101,125,125,60,47,91,105,93,46,76,97,110,103,107,109,194,178,119,105,114,
+101,116,111,121,115,97,100,100,115,115,101,97,108,97,108,101,120,59,10,9,125,101
+,99,104,111,110,105,110,101,46,111,114,103,48,48,53,41,116,111,110,121,106,101,
+119,115,115,97,110,100,108,101,103,115,114,111,111,102,48,48,48,41,32,50,48,48,
+119,105,110,101,103,101,97,114,100,111,103,115,98,111,111,116,103,97,114,121,99,
+117,116,115,116,121,108,101,116,101,109,112,116,105,111,110,46,120,109,108,99,
+111,99,107,103,97,110,103,36,40,39,46,53,48,112,120,80,104,46,68,109,105,115,99,
+97,108,97,110,108,111,97,110,100,101,115,107,109,105,108,101,114,121,97,110,117,
+110,105,120,100,105,115,99,41,59,125,10,100,117,115,116,99,108,105,112,41,46,10,
+10,55,48,112,120,45,50,48,48,68,86,68,115,55,93,62,60,116,97,112,101,100,101,109
+,111,105,43,43,41,119,97,103,101,101,117,114,111,112,104,105,108,111,112,116,115
+,104,111,108,101,70,65,81,115,97,115,105,110,45,50,54,84,108,97,98,115,112,101,
+116,115,85,82,76,32,98,117,108,107,99,111,111,107,59,125,13,10,72,69,65,68,91,48
+,93,41,97,98,98,114,106,117,97,110,40,49,57,56,108,101,115,104,116,119,105,110,
+60,47,105,62,115,111,110,121,103,117,121,115,102,117,99,107,112,105,112,101,124,
+45,10,33,48,48,50,41,110,100,111,119,91,49,93,59,91,93,59,10,76,111,103,32,115,
+97,108,116,13,10,9,9,98,97,110,103,116,114,105,109,98,97,116,104,41,123,13,10,48
+,48,112,120,10,125,41,59,107,111,58,236,102,101,101,115,97,100,62,13,115,58,47,
+47,32,91,93,59,116,111,108,108,112,108,117,103,40,41,123,10,123,13,10,32,46,106,
+115,39,50,48,48,112,100,117,97,108,98,111,97,116,46,74,80,71,41,59,10,125,113,
+117,111,116,41,59,10,10,39,41,59,10,13,10,125,13,50,48,49,52,50,48,49,53,50,48,
+49,54,50,48,49,55,50,48,49,56,50,48,49,57,50,48,50,48,50,48,50,49,50,48,50,50,50
+,48,50,51,50,48,50,52,50,48,50,53,50,48,50,54,50,48,50,55,50,48,50,56,50,48,50,
+57,50,48,51,48,50,48,51,49,50,48,51,50,50,48,51,51,50,48,51,52,50,48,51,53,50,48
+,51,54,50,48,51,55,50,48,49,51,50,48,49,50,50,48,49,49,50,48,49,48,50,48,48,57,
+50,48,48,56,50,48,48,55,50,48,48,54,50,48,48,53,50,48,48,52,50,48,48,51,50,48,48
+,50,50,48,48,49,50,48,48,48,49,57,57,57,49,57,57,56,49,57,57,55,49,57,57,54,49,
+57,57,53,49,57,57,52,49,57,57,51,49,57,57,50,49,57,57,49,49,57,57,48,49,57,56,57
+,49,57,56,56,49,57,56,55,49,57,56,54,49,57,56,53,49,57,56,52,49,57,56,51,49,57,
+56,50,49,57,56,49,49,57,56,48,49,57,55,57,49,57,55,56,49,57,55,55,49,57,55,54,49
+,57,55,53,49,57,55,52,49,57,55,51,49,57,55,50,49,57,55,49,49,57,55,48,49,57,54,
+57,49,57,54,56,49,57,54,55,49,57,54,54,49,57,54,53,49,57,54,52,49,57,54,51,49,57
+,54,50,49,57,54,49,49,57,54,48,49,57,53,57,49,57,53,56,49,57,53,55,49,57,53,54,
+49,57,53,53,49,57,53,52,49,57,53,51,49,57,53,50,49,57,53,49,49,57,53,48,49,48,48
+,48,49,48,50,52,49,51,57,52,48,48,48,48,57,57,57,57,99,111,109,111,109,195,161,
+115,101,115,116,101,101,115,116,97,112,101,114,111,116,111,100,111,104,97,99,101
+,99,97,100,97,97,195,177,111,98,105,101,110,100,195,173,97,97,115,195,173,118,
+105,100,97,99,97,115,111,111,116,114,111,102,111,114,111,115,111,108,111,111,116
+,114,97,99,117,97,108,100,105,106,111,115,105,100,111,103,114,97,110,116,105,112
+,111,116,101,109,97,100,101,98,101,97,108,103,111,113,117,195,169,101,115,116,
+111,110,97,100,97,116,114,101,115,112,111,99,111,99,97,115,97,98,97,106,111,116,
+111,100,97,115,105,110,111,97,103,117,97,112,117,101,115,117,110,111,115,97,110,
+116,101,100,105,99,101,108,117,105,115,101,108,108,97,109,97,121,111,122,111,110
+,97,97,109,111,114,112,105,115,111,111,98,114,97,99,108,105,99,101,108,108,111,
+100,105,111,115,104,111,114,97,99,97,115,105,208,183,208,176,208,189,208,176,208
+,190,208,188,209,128,208,176,209,128,209,131,209,130,208,176,208,189,208,181,208
+,191,208,190,208,190,209,130,208,184,208,183,208,189,208,190,208,180,208,190,209
+,130,208,190,208,182,208,181,208,190,208,189,208,184,209,133,208,157,208,176,208
+,181,208,181,208,177,209,139,208,188,209,139,208,146,209,139,209,129,208,190,208
+,178,209,139,208,178,208,190,208,157,208,190,208,190,208,177,208,159,208,190,208
+,187,208,184,208,189,208,184,208,160,208,164,208,157,208,181,208,156,209,139,209
+,130,209,139,208,158,208,189,208,184,208,188,208,180,208,176,208,151,208,176,208
+,148,208,176,208,157,209,131,208,158,208,177,209,130,208,181,208,152,208,183,208
+,181,208,185,208,189,209,131,208,188,208,188,208,162,209,139,209,131,208,182,217
+,129,217,138,216,163,217,134,217,133,216,167,217,133,216,185,217,131,217,132,216
+,163,217,136,216,177,216,175,217,138,216,167,217,129,217,137,217,135,217,136,217
+,132,217,133,217,132,217,131,216,167,217,136,217,132,217,135,216,168,216,179,216
+,167,217,132,216,165,217,134,217,135,217,138,216,163,217,138,217,130,216,175,217
+,135,217,132,216,171,217,133,216,168,217,135,217,132,217,136,217,132,217,138,216
+,168,217,132,216,167,217,138,216,168,217,131,216,180,217,138,216,167,217,133,216
+,163,217,133,217,134,216,170,216,168,217,138,217,132,217,134,216,173,216,168,217
+,135,217,133,217,133,216,180,217,136,216,180,102,105,114,115,116,118,105,100,101
+,111,108,105,103,104,116,119,111,114,108,100,109,101,100,105,97,119,104,105,116,
+101,99,108,111,115,101,98,108,97,99,107,114,105,103,104,116,115,109,97,108,108,
+98,111,111,107,115,112,108,97,99,101,109,117,115,105,99,102,105,101,108,100,111,
+114,100,101,114,112,111,105,110,116,118,97,108,117,101,108,101,118,101,108,116,
+97,98,108,101,98,111,97,114,100,104,111,117,115,101,103,114,111,117,112,119,111,
+114,107,115,121,101,97,114,115,115,116,97,116,101,116,111,100,97,121,119,97,116,
+101,114,115,116,97,114,116,115,116,121,108,101,100,101,97,116,104,112,111,119,
+101,114,112,104,111,110,101,110,105,103,104,116,101,114,114,111,114,105,110,112,
+117,116,97,98,111,117,116,116,101,114,109,115,116,105,116,108,101,116,111,111,
+108,115,101,118,101,110,116,108,111,99,97,108,116,105,109,101,115,108,97,114,103
+,101,119,111,114,100,115,103,97,109,101,115,115,104,111,114,116,115,112,97,99,
+101,102,111,99,117,115,99,108,101,97,114,109,111,100,101,108,98,108,111,99,107,
+103,117,105,100,101,114,97,100,105,111,115,104,97,114,101,119,111,109,101,110,97
+,103,97,105,110,109,111,110,101,121,105,109,97,103,101,110,97,109,101,115,121,
+111,117,110,103,108,105,110,101,115,108,97,116,101,114,99,111,108,111,114,103,
+114,101,101,110,102,114,111,110,116,38,97,109,112,59,119,97,116,99,104,102,111,
+114,99,101,112,114,105,99,101,114,117,108,101,115,98,101,103,105,110,97,102,116,
+101,114,118,105,115,105,116,105,115,115,117,101,97,114,101,97,115,98,101,108,111
+,119,105,110,100,101,120,116,111,116,97,108,104,111,117,114,115,108,97,98,101,
+108,112,114,105,110,116,112,114,101,115,115,98,117,105,108,116,108,105,110,107,
+115,115,112,101,101,100,115,116,117,100,121,116,114,97,100,101,102,111,117,110,
+100,115,101,110,115,101,117,110,100,101,114,115,104,111,119,110,102,111,114,109,
+115,114,97,110,103,101,97,100,100,101,100,115,116,105,108,108,109,111,118,101,
+100,116,97,107,101,110,97,98,111,118,101,102,108,97,115,104,102,105,120,101,100,
+111,102,116,101,110,111,116,104,101,114,118,105,101,119,115,99,104,101,99,107,
+108,101,103,97,108,114,105,118,101,114,105,116,101,109,115,113,117,105,99,107,
+115,104,97,112,101,104,117,109,97,110,101,120,105,115,116,103,111,105,110,103,
+109,111,118,105,101,116,104,105,114,100,98,97,115,105,99,112,101,97,99,101,115,
+116,97,103,101,119,105,100,116,104,108,111,103,105,110,105,100,101,97,115,119,
+114,111,116,101,112,97,103,101,115,117,115,101,114,115,100,114,105,118,101,115,
+116,111,114,101,98,114,101,97,107,115,111,117,116,104,118,111,105,99,101,115,105
+,116,101,115,109,111,110,116,104,119,104,101,114,101,98,117,105,108,100,119,104,
+105,99,104,101,97,114,116,104,102,111,114,117,109,116,104,114,101,101,115,112,
+111,114,116,112,97,114,116,121,67,108,105,99,107,108,111,119,101,114,108,105,118
+,101,115,99,108,97,115,115,108,97,121,101,114,101,110,116,114,121,115,116,111,
+114,121,117,115,97,103,101,115,111,117,110,100,99,111,117,114,116,121,111,117,
+114,32,98,105,114,116,104,112,111,112,117,112,116,121,112,101,115,97,112,112,108
+,121,73,109,97,103,101,98,101,105,110,103,117,112,112,101,114,110,111,116,101,
+115,101,118,101,114,121,115,104,111,119,115,109,101,97,110,115,101,120,116,114,
+97,109,97,116,99,104,116,114,97,99,107,107,110,111,119,110,101,97,114,108,121,98
+,101,103,97,110,115,117,112,101,114,112,97,112,101,114,110,111,114,116,104,108,
+101,97,114,110,103,105,118,101,110,110,97,109,101,100,101,110,100,101,100,84,101
+,114,109,115,112,97,114,116,115,71,114,111,117,112,98,114,97,110,100,117,115,105
+,110,103,119,111,109,97,110,102,97,108,115,101,114,101,97,100,121,97,117,100,105
+,111,116,97,107,101,115,119,104,105,108,101,46,99,111,109,47,108,105,118,101,100
+,99,97,115,101,115,100,97,105,108,121,99,104,105,108,100,103,114,101,97,116,106,
+117,100,103,101,116,104,111,115,101,117,110,105,116,115,110,101,118,101,114,98,
+114,111,97,100,99,111,97,115,116,99,111,118,101,114,97,112,112,108,101,102,105,
+108,101,115,99,121,99,108,101,115,99,101,110,101,112,108,97,110,115,99,108,105,
+99,107,119,114,105,116,101,113,117,101,101,110,112,105,101,99,101,101,109,97,105
+,108,102,114,97,109,101,111,108,100,101,114,112,104,111,116,111,108,105,109,105,
+116,99,97,99,104,101,99,105,118,105,108,115,99,97,108,101,101,110,116,101,114,
+116,104,101,109,101,116,104,101,114,101,116,111,117,99,104,98,111,117,110,100,
+114,111,121,97,108,97,115,107,101,100,119,104,111,108,101,115,105,110,99,101,115
+,116,111,99,107,32,110,97,109,101,102,97,105,116,104,104,101,97,114,116,101,109,
+112,116,121,111,102,102,101,114,115,99,111,112,101,111,119,110,101,100,109,105,
+103,104,116,97,108,98,117,109,116,104,105,110,107,98,108,111,111,100,97,114,114,
+97,121,109,97,106,111,114,116,114,117,115,116,99,97,110,111,110,117,110,105,111,
+110,99,111,117,110,116,118,97,108,105,100,115,116,111,110,101,83,116,121,108,101
+,76,111,103,105,110,104,97,112,112,121,111,99,99,117,114,108,101,102,116,58,102,
+114,101,115,104,113,117,105,116,101,102,105,108,109,115,103,114,97,100,101,110,
+101,101,100,115,117,114,98,97,110,102,105,103,104,116,98,97,115,105,115,104,111,
+118,101,114,97,117,116,111,59,114,111,117,116,101,46,104,116,109,108,109,105,120
+,101,100,102,105,110,97,108,89,111,117,114,32,115,108,105,100,101,116,111,112,
+105,99,98,114,111,119,110,97,108,111,110,101,100,114,97,119,110,115,112,108,105,
+116,114,101,97,99,104,82,105,103,104,116,100,97,116,101,115,109,97,114,99,104,
+113,117,111,116,101,103,111,111,100,115,76,105,110,107,115,100,111,117,98,116,97
+,115,121,110,99,116,104,117,109,98,97,108,108,111,119,99,104,105,101,102,121,111
+,117,116,104,110,111,118,101,108,49,48,112,120,59,115,101,114,118,101,117,110,
+116,105,108,104,97,110,100,115,67,104,101,99,107,83,112,97,99,101,113,117,101,
+114,121,106,97,109,101,115,101,113,117,97,108,116,119,105,99,101,48,44,48,48,48,
+83,116,97,114,116,112,97,110,101,108,115,111,110,103,115,114,111,117,110,100,101
+,105,103,104,116,115,104,105,102,116,119,111,114,116,104,112,111,115,116,115,108
+,101,97,100,115,119,101,101,107,115,97,118,111,105,100,116,104,101,115,101,109,
+105,108,101,115,112,108,97,110,101,115,109,97,114,116,97,108,112,104,97,112,108,
+97,110,116,109,97,114,107,115,114,97,116,101,115,112,108,97,121,115,99,108,97,
+105,109,115,97,108,101,115,116,101,120,116,115,115,116,97,114,115,119,114,111,
+110,103,60,47,104,51,62,116,104,105,110,103,46,111,114,103,47,109,117,108,116,
+105,104,101,97,114,100,80,111,119,101,114,115,116,97,110,100,116,111,107,101,110
+,115,111,108,105,100,40,116,104,105,115,98,114,105,110,103,115,104,105,112,115,
+115,116,97,102,102,116,114,105,101,100,99,97,108,108,115,102,117,108,108,121,102
+,97,99,116,115,97,103,101,110,116,84,104,105,115,32,47,47,45,45,62,97,100,109,
+105,110,101,103,121,112,116,69,118,101,110,116,49,53,112,120,59,69,109,97,105,
+108,116,114,117,101,34,99,114,111,115,115,115,112,101,110,116,98,108,111,103,115
+,98,111,120,34,62,110,111,116,101,100,108,101,97,118,101,99,104,105,110,97,115,
+105,122,101,115,103,117,101,115,116,60,47,104,52,62,114,111,98,111,116,104,101,
+97,118,121,116,114,117,101,44,115,101,118,101,110,103,114,97,110,100,99,114,105,
+109,101,115,105,103,110,115,97,119,97,114,101,100,97,110,99,101,112,104,97,115,
+101,62,60,33,45,45,101,110,95,85,83,38,35,51,57,59,50,48,48,112,120,95,110,97,
+109,101,108,97,116,105,110,101,110,106,111,121,97,106,97,120,46,97,116,105,111,
+110,115,109,105,116,104,85,46,83,46,32,104,111,108,100,115,112,101,116,101,114,
+105,110,100,105,97,110,97,118,34,62,99,104,97,105,110,115,99,111,114,101,99,111,
+109,101,115,100,111,105,110,103,112,114,105,111,114,83,104,97,114,101,49,57,57,
+48,115,114,111,109,97,110,108,105,115,116,115,106,97,112,97,110,102,97,108,108,
+115,116,114,105,97,108,111,119,110,101,114,97,103,114,101,101,60,47,104,50,62,97
+,98,117,115,101,97,108,101,114,116,111,112,101,114,97,34,45,47,47,87,99,97,114,
+100,115,104,105,108,108,115,116,101,97,109,115,80,104,111,116,111,116,114,117,
+116,104,99,108,101,97,110,46,112,104,112,63,115,97,105,110,116,109,101,116,97,
+108,108,111,117,105,115,109,101,97,110,116,112,114,111,111,102,98,114,105,101,
+102,114,111,119,34,62,103,101,110,114,101,116,114,117,99,107,108,111,111,107,115
+,86,97,108,117,101,70,114,97,109,101,46,110,101,116,47,45,45,62,10,60,116,114,
+121,32,123,10,118,97,114,32,109,97,107,101,115,99,111,115,116,115,112,108,97,105
+,110,97,100,117,108,116,113,117,101,115,116,116,114,97,105,110,108,97,98,111,114
+,104,101,108,112,115,99,97,117,115,101,109,97,103,105,99,109,111,116,111,114,116
+,104,101,105,114,50,53,48,112,120,108,101,97,115,116,115,116,101,112,115,67,111,
+117,110,116,99,111,117,108,100,103,108,97,115,115,115,105,100,101,115,102,117,
+110,100,115,104,111,116,101,108,97,119,97,114,100,109,111,117,116,104,109,111,
+118,101,115,112,97,114,105,115,103,105,118,101,115,100,117,116,99,104,116,101,
+120,97,115,102,114,117,105,116,110,117,108,108,44,124,124,91,93,59,116,111,112,
+34,62,10,60,33,45,45,80,79,83,84,34,111,99,101,97,110,60,98,114,47,62,102,108,
+111,111,114,115,112,101,97,107,100,101,112,116,104,32,115,105,122,101,98,97,110,
+107,115,99,97,116,99,104,99,104,97,114,116,50,48,112,120,59,97,108,105,103,110,
+100,101,97,108,115,119,111,117,108,100,53,48,112,120,59,117,114,108,61,34,112,97
+,114,107,115,109,111,117,115,101,77,111,115,116,32,46,46,46,60,47,97,109,111,110
+,103,98,114,97,105,110,98,111,100,121,32,110,111,110,101,59,98,97,115,101,100,99
+,97,114,114,121,100,114,97,102,116,114,101,102,101,114,112,97,103,101,95,104,111
+,109,101,46,109,101,116,101,114,100,101,108,97,121,100,114,101,97,109,112,114,
+111,118,101,106,111,105,110,116,60,47,116,114,62,100,114,117,103,115,60,33,45,45
+,32,97,112,114,105,108,105,100,101,97,108,97,108,108,101,110,101,120,97,99,116,
+102,111,114,116,104,99,111,100,101,115,108,111,103,105,99,86,105,101,119,32,115,
+101,101,109,115,98,108,97,110,107,112,111,114,116,115,32,40,50,48,48,115,97,118,
+101,100,95,108,105,110,107,103,111,97,108,115,103,114,97,110,116,103,114,101,101
+,107,104,111,109,101,115,114,105,110,103,115,114,97,116,101,100,51,48,112,120,59
+,119,104,111,115,101,112,97,114,115,101,40,41,59,34,32,66,108,111,99,107,108,105
+,110,117,120,106,111,110,101,115,112,105,120,101,108,39,41,59,34,62,41,59,105,
+102,40,45,108,101,102,116,100,97,118,105,100,104,111,114,115,101,70,111,99,117,
+115,114,97,105,115,101,98,111,120,101,115,84,114,97,99,107,101,109,101,110,116,
+60,47,101,109,62,98,97,114,34,62,46,115,114,99,61,116,111,119,101,114,97,108,116
+,61,34,99,97,98,108,101,104,101,110,114,121,50,52,112,120,59,115,101,116,117,112
+,105,116,97,108,121,115,104,97,114,112,109,105,110,111,114,116,97,115,116,101,
+119,97,110,116,115,116,104,105,115,46,114,101,115,101,116,119,104,101,101,108,
+103,105,114,108,115,47,99,115,115,47,49,48,48,37,59,99,108,117,98,115,115,116,
+117,102,102,98,105,98,108,101,118,111,116,101,115,32,49,48,48,48,107,111,114,101
+,97,125,41,59,13,10,98,97,110,100,115,113,117,101,117,101,61,32,123,125,59,56,48
+,112,120,59,99,107,105,110,103,123,13,10,9,9,97,104,101,97,100,99,108,111,99,107
+,105,114,105,115,104,108,105,107,101,32,114,97,116,105,111,115,116,97,116,115,70
+,111,114,109,34,121,97,104,111,111,41,91,48,93,59,65,98,111,117,116,102,105,110,
+100,115,60,47,104,49,62,100,101,98,117,103,116,97,115,107,115,85,82,76,32,61,99,
+101,108,108,115,125,41,40,41,59,49,50,112,120,59,112,114,105,109,101,116,101,108
+,108,115,116,117,114,110,115,48,120,54,48,48,46,106,112,103,34,115,112,97,105,
+110,98,101,97,99,104,116,97,120,101,115,109,105,99,114,111,97,110,103,101,108,45
+,45,62,60,47,103,105,102,116,115,115,116,101,118,101,45,108,105,110,107,98,111,
+100,121,46,125,41,59,10,9,109,111,117,110,116,32,40,49,57,57,70,65,81,60,47,114,
+111,103,101,114,102,114,97,110,107,67,108,97,115,115,50,56,112,120,59,102,101,
+101,100,115,60,104,49,62,60,115,99,111,116,116,116,101,115,116,115,50,50,112,120
+,59,100,114,105,110,107,41,32,124,124,32,108,101,119,105,115,115,104,97,108,108,
+35,48,51,57,59,32,102,111,114,32,108,111,118,101,100,119,97,115,116,101,48,48,
+112,120,59,106,97,58,227,130,115,105,109,111,110,60,102,111,110,116,114,101,112,
+108,121,109,101,101,116,115,117,110,116,101,114,99,104,101,97,112,116,105,103,
+104,116,66,114,97,110,100,41,32,33,61,32,100,114,101,115,115,99,108,105,112,115,
+114,111,111,109,115,111,110,107,101,121,109,111,98,105,108,109,97,105,110,46,78,
+97,109,101,32,112,108,97,116,101,102,117,110,110,121,116,114,101,101,115,99,111,
+109,47,34,49,46,106,112,103,119,109,111,100,101,112,97,114,97,109,83,84,65,82,84
+,108,101,102,116,32,105,100,100,101,110,44,32,50,48,49,41,59,10,125,10,102,111,
+114,109,46,118,105,114,117,115,99,104,97,105,114,116,114,97,110,115,119,111,114,
+115,116,80,97,103,101,115,105,116,105,111,110,112,97,116,99,104,60,33,45,45,10,
+111,45,99,97,99,102,105,114,109,115,116,111,117,114,115,44,48,48,48,32,97,115,
+105,97,110,105,43,43,41,123,97,100,111,98,101,39,41,91,48,93,105,100,61,49,48,98
+,111,116,104,59,109,101,110,117,32,46,50,46,109,105,46,112,110,103,34,107,101,
+118,105,110,99,111,97,99,104,67,104,105,108,100,98,114,117,99,101,50,46,106,112,
+103,85,82,76,41,43,46,106,112,103,124,115,117,105,116,101,115,108,105,99,101,104
+,97,114,114,121,49,50,48,34,32,115,119,101,101,116,116,114,62,13,10,110,97,109,
+101,61,100,105,101,103,111,112,97,103,101,32,115,119,105,115,115,45,45,62,10,10,
+35,102,102,102,59,34,62,76,111,103,46,99,111,109,34,116,114,101,97,116,115,104,
+101,101,116,41,32,38,38,32,49,52,112,120,59,115,108,101,101,112,110,116,101,110,
+116,102,105,108,101,100,106,97,58,227,131,105,100,61,34,99,78,97,109,101,34,119,
+111,114,115,101,115,104,111,116,115,45,98,111,120,45,100,101,108,116,97,10,38,
+108,116,59,98,101,97,114,115,58,52,56,90,60,100,97,116,97,45,114,117,114,97,108,
+60,47,97,62,32,115,112,101,110,100,98,97,107,101,114,115,104,111,112,115,61,32,
+34,34,59,112,104,112,34,62,99,116,105,111,110,49,51,112,120,59,98,114,105,97,110
+,104,101,108,108,111,115,105,122,101,61,111,61,37,50,70,32,106,111,105,110,109,
+97,121,98,101,60,105,109,103,32,105,109,103,34,62,44,32,102,106,115,105,109,103,
+34,32,34,41,91,48,93,77,84,111,112,66,84,121,112,101,34,110,101,119,108,121,68,
+97,110,115,107,99,122,101,99,104,116,114,97,105,108,107,110,111,119,115,60,47,
+104,53,62,102,97,113,34,62,122,104,45,99,110,49,48,41,59,10,45,49,34,41,59,116,
+121,112,101,61,98,108,117,101,115,116,114,117,108,121,100,97,118,105,115,46,106,
+115,39,59,62,13,10,60,33,115,116,101,101,108,32,121,111,117,32,104,50,62,13,10,
+102,111,114,109,32,106,101,115,117,115,49,48,48,37,32,109,101,110,117,46,13,10,9
+,13,10,119,97,108,101,115,114,105,115,107,115,117,109,101,110,116,100,100,105,
+110,103,98,45,108,105,107,116,101,97,99,104,103,105,102,34,32,118,101,103,97,115
+,100,97,110,115,107,101,101,115,116,105,115,104,113,105,112,115,117,111,109,105,
+115,111,98,114,101,100,101,115,100,101,101,110,116,114,101,116,111,100,111,115,
+112,117,101,100,101,97,195,177,111,115,101,115,116,195,161,116,105,101,110,101,
+104,97,115,116,97,111,116,114,111,115,112,97,114,116,101,100,111,110,100,101,110
+,117,101,118,111,104,97,99,101,114,102,111,114,109,97,109,105,115,109,111,109,
+101,106,111,114,109,117,110,100,111,97,113,117,195,173,100,195,173,97,115,115,
+195,179,108,111,97,121,117,100,97,102,101,99,104,97,116,111,100,97,115,116,97,
+110,116,111,109,101,110,111,115,100,97,116,111,115,111,116,114,97,115,115,105,
+116,105,111,109,117,99,104,111,97,104,111,114,97,108,117,103,97,114,109,97,121,
+111,114,101,115,116,111,115,104,111,114,97,115,116,101,110,101,114,97,110,116,
+101,115,102,111,116,111,115,101,115,116,97,115,112,97,195,173,115,110,117,101,
+118,97,115,97,108,117,100,102,111,114,111,115,109,101,100,105,111,113,117,105,
+101,110,109,101,115,101,115,112,111,100,101,114,99,104,105,108,101,115,101,114,
+195,161,118,101,99,101,115,100,101,99,105,114,106,111,115,195,169,101,115,116,97
+,114,118,101,110,116,97,103,114,117,112,111,104,101,99,104,111,101,108,108,111,
+115,116,101,110,103,111,97,109,105,103,111,99,111,115,97,115,110,105,118,101,108
+,103,101,110,116,101,109,105,115,109,97,97,105,114,101,115,106,117,108,105,111,
+116,101,109,97,115,104,97,99,105,97,102,97,118,111,114,106,117,110,105,111,108,
+105,98,114,101,112,117,110,116,111,98,117,101,110,111,97,117,116,111,114,97,98,
+114,105,108,98,117,101,110,97,116,101,120,116,111,109,97,114,122,111,115,97,98,
+101,114,108,105,115,116,97,108,117,101,103,111,99,195,179,109,111,101,110,101,
+114,111,106,117,101,103,111,112,101,114,195,186,104,97,98,101,114,101,115,116,
+111,121,110,117,110,99,97,109,117,106,101,114,118,97,108,111,114,102,117,101,114
+,97,108,105,98,114,111,103,117,115,116,97,105,103,117,97,108,118,111,116,111,115
+,99,97,115,111,115,103,117,195,173,97,112,117,101,100,111,115,111,109,111,115,97
+,118,105,115,111,117,115,116,101,100,100,101,98,101,110,110,111,99,104,101,98,
+117,115,99,97,102,97,108,116,97,101,117,114,111,115,115,101,114,105,101,100,105,
+99,104,111,99,117,114,115,111,99,108,97,118,101,99,97,115,97,115,108,101,195,179
+,110,112,108,97,122,111,108,97,114,103,111,111,98,114,97,115,118,105,115,116,97,
+97,112,111,121,111,106,117,110,116,111,116,114,97,116,97,118,105,115,116,111,99,
+114,101,97,114,99,97,109,112,111,104,101,109,111,115,99,105,110,99,111,99,97,114
+,103,111,112,105,115,111,115,111,114,100,101,110,104,97,99,101,110,195,161,114,
+101,97,100,105,115,99,111,112,101,100,114,111,99,101,114,99,97,112,117,101,100,
+97,112,97,112,101,108,109,101,110,111,114,195,186,116,105,108,99,108,97,114,111,
+106,111,114,103,101,99,97,108,108,101,112,111,110,101,114,116,97,114,100,101,110
+,97,100,105,101,109,97,114,99,97,115,105,103,117,101,101,108,108,97,115,115,105,
+103,108,111,99,111,99,104,101,109,111,116,111,115,109,97,100,114,101,99,108,97,
+115,101,114,101,115,116,111,110,105,195,177,111,113,117,101,100,97,112,97,115,97
+,114,98,97,110,99,111,104,105,106,111,115,118,105,97,106,101,112,97,98,108,111,
+195,169,115,116,101,118,105,101,110,101,114,101,105,110,111,100,101,106,97,114,
+102,111,110,100,111,99,97,110,97,108,110,111,114,116,101,108,101,116,114,97,99,
+97,117,115,97,116,111,109,97,114,109,97,110,111,115,108,117,110,101,115,97,117,
+116,111,115,118,105,108,108,97,118,101,110,100,111,112,101,115,97,114,116,105,
+112,111,115,116,101,110,103,97,109,97,114,99,111,108,108,101,118,97,112,97,100,
+114,101,117,110,105,100,111,118,97,109,111,115,122,111,110,97,115,97,109,98,111,
+115,98,97,110,100,97,109,97,114,105,97,97,98,117,115,111,109,117,99,104,97,115,
+117,98,105,114,114,105,111,106,97,118,105,118,105,114,103,114,97,100,111,99,104,
+105,99,97,97,108,108,195,173,106,111,118,101,110,100,105,99,104,97,101,115,116,
+97,110,116,97,108,101,115,115,97,108,105,114,115,117,101,108,111,112,101,115,111
+,115,102,105,110,101,115,108,108,97,109,97,98,117,115,99,111,195,169,115,116,97,
+108,108,101,103,97,110,101,103,114,111,112,108,97,122,97,104,117,109,111,114,112
+,97,103,97,114,106,117,110,116,97,100,111,98,108,101,105,115,108,97,115,98,111,
+108,115,97,98,97,195,177,111,104,97,98,108,97,108,117,99,104,97,195,129,114,101,
+97,100,105,99,101,110,106,117,103,97,114,110,111,116,97,115,118,97,108,108,101,
+97,108,108,195,161,99,97,114,103,97,100,111,108,111,114,97,98,97,106,111,101,115
+,116,195,169,103,117,115,116,111,109,101,110,116,101,109,97,114,105,111,102,105,
+114,109,97,99,111,115,116,111,102,105,99,104,97,112,108,97,116,97,104,111,103,97
+,114,97,114,116,101,115,108,101,121,101,115,97,113,117,101,108,109,117,115,101,
+111,98,97,115,101,115,112,111,99,111,115,109,105,116,97,100,99,105,101,108,111,
+99,104,105,99,111,109,105,101,100,111,103,97,110,97,114,115,97,110,116,111,101,
+116,97,112,97,100,101,98,101,115,112,108,97,121,97,114,101,100,101,115,115,105,
+101,116,101,99,111,114,116,101,99,111,114,101,97,100,117,100,97,115,100,101,115,
+101,111,118,105,101,106,111,100,101,115,101,97,97,103,117,97,115,38,113,117,111,
+116,59,100,111,109,97,105,110,99,111,109,109,111,110,115,116,97,116,117,115,101,
+118,101,110,116,115,109,97,115,116,101,114,115,121,115,116,101,109,97,99,116,105
+,111,110,98,97,110,110,101,114,114,101,109,111,118,101,115,99,114,111,108,108,
+117,112,100,97,116,101,103,108,111,98,97,108,109,101,100,105,117,109,102,105,108
+,116,101,114,110,117,109,98,101,114,99,104,97,110,103,101,114,101,115,117,108,
+116,112,117,98,108,105,99,115,99,114,101,101,110,99,104,111,111,115,101,110,111,
+114,109,97,108,116,114,97,118,101,108,105,115,115,117,101,115,115,111,117,114,99
+,101,116,97,114,103,101,116,115,112,114,105,110,103,109,111,100,117,108,101,109,
+111,98,105,108,101,115,119,105,116,99,104,112,104,111,116,111,115,98,111,114,100
+,101,114,114,101,103,105,111,110,105,116,115,101,108,102,115,111,99,105,97,108,
+97,99,116,105,118,101,99,111,108,117,109,110,114,101,99,111,114,100,102,111,108,
+108,111,119,116,105,116,108,101,62,101,105,116,104,101,114,108,101,110,103,116,
+104,102,97,109,105,108,121,102,114,105,101,110,100,108,97,121,111,117,116,97,117
+,116,104,111,114,99,114,101,97,116,101,114,101,118,105,101,119,115,117,109,109,
+101,114,115,101,114,118,101,114,112,108,97,121,101,100,112,108,97,121,101,114,
+101,120,112,97,110,100,112,111,108,105,99,121,102,111,114,109,97,116,100,111,117
+,98,108,101,112,111,105,110,116,115,115,101,114,105,101,115,112,101,114,115,111,
+110,108,105,118,105,110,103,100,101,115,105,103,110,109,111,110,116,104,115,102,
+111,114,99,101,115,117,110,105,113,117,101,119,101,105,103,104,116,112,101,111,
+112,108,101,101,110,101,114,103,121,110,97,116,117,114,101,115,101,97,114,99,104
+,102,105,103,117,114,101,104,97,118,105,110,103,99,117,115,116,111,109,111,102,
+102,115,101,116,108,101,116,116,101,114,119,105,110,100,111,119,115,117,98,109,
+105,116,114,101,110,100,101,114,103,114,111,117,112,115,117,112,108,111,97,100,
+104,101,97,108,116,104,109,101,116,104,111,100,118,105,100,101,111,115,115,99,
+104,111,111,108,102,117,116,117,114,101,115,104,97,100,111,119,100,101,98,97,116
+,101,118,97,108,117,101,115,79,98,106,101,99,116,111,116,104,101,114,115,114,105
+,103,104,116,115,108,101,97,103,117,101,99,104,114,111,109,101,115,105,109,112,
+108,101,110,111,116,105,99,101,115,104,97,114,101,100,101,110,100,105,110,103,
+115,101,97,115,111,110,114,101,112,111,114,116,111,110,108,105,110,101,115,113,
+117,97,114,101,98,117,116,116,111,110,105,109,97,103,101,115,101,110,97,98,108,
+101,109,111,118,105,110,103,108,97,116,101,115,116,119,105,110,116,101,114,70,
+114,97,110,99,101,112,101,114,105,111,100,115,116,114,111,110,103,114,101,112,
+101,97,116,76,111,110,100,111,110,100,101,116,97,105,108,102,111,114,109,101,100
+,100,101,109,97,110,100,115,101,99,117,114,101,112,97,115,115,101,100,116,111,
+103,103,108,101,112,108,97,99,101,115,100,101,118,105,99,101,115,116,97,116,105,
+99,99,105,116,105,101,115,115,116,114,101,97,109,121,101,108,108,111,119,97,116,
+116,97,99,107,115,116,114,101,101,116,102,108,105,103,104,116,104,105,100,100,
+101,110,105,110,102,111,34,62,111,112,101,110,101,100,117,115,101,102,117,108,
+118,97,108,108,101,121,99,97,117,115,101,115,108,101,97,100,101,114,115,101,99,
+114,101,116,115,101,99,111,110,100,100,97,109,97,103,101,115,112,111,114,116,115
+,101,120,99,101,112,116,114,97,116,105,110,103,115,105,103,110,101,100,116,104,
+105,110,103,115,101,102,102,101,99,116,102,105,101,108,100,115,115,116,97,116,
+101,115,111,102,102,105,99,101,118,105,115,117,97,108,101,100,105,116,111,114,
+118,111,108,117,109,101,82,101,112,111,114,116,109,117,115,101,117,109,109,111,
+118,105,101,115,112,97,114,101,110,116,97,99,99,101,115,115,109,111,115,116,108,
+121,109,111,116,104,101,114,34,32,105,100,61,34,109,97,114,107,101,116,103,114,
+111,117,110,100,99,104,97,110,99,101,115,117,114,118,101,121,98,101,102,111,114,
+101,115,121,109,98,111,108,109,111,109,101,110,116,115,112,101,101,99,104,109,
+111,116,105,111,110,105,110,115,105,100,101,109,97,116,116,101,114,67,101,110,
+116,101,114,111,98,106,101,99,116,101,120,105,115,116,115,109,105,100,100,108,
+101,69,117,114,111,112,101,103,114,111,119,116,104,108,101,103,97,99,121,109,97,
+110,110,101,114,101,110,111,117,103,104,99,97,114,101,101,114,97,110,115,119,101
+,114,111,114,105,103,105,110,112,111,114,116,97,108,99,108,105,101,110,116,115,
+101,108,101,99,116,114,97,110,100,111,109,99,108,111,115,101,100,116,111,112,105
+,99,115,99,111,109,105,110,103,102,97,116,104,101,114,111,112,116,105,111,110,
+115,105,109,112,108,121,114,97,105,115,101,100,101,115,99,97,112,101,99,104,111,
+115,101,110,99,104,117,114,99,104,100,101,102,105,110,101,114,101,97,115,111,110
+,99,111,114,110,101,114,111,117,116,112,117,116,109,101,109,111,114,121,105,102,
+114,97,109,101,112,111,108,105,99,101,109,111,100,101,108,115,78,117,109,98,101,
+114,100,117,114,105,110,103,111,102,102,101,114,115,115,116,121,108,101,115,107,
+105,108,108,101,100,108,105,115,116,101,100,99,97,108,108,101,100,115,105,108,
+118,101,114,109,97,114,103,105,110,100,101,108,101,116,101,98,101,116,116,101,
+114,98,114,111,119,115,101,108,105,109,105,116,115,71,108,111,98,97,108,115,105,
+110,103,108,101,119,105,100,103,101,116,99,101,110,116,101,114,98,117,100,103,
+101,116,110,111,119,114,97,112,99,114,101,100,105,116,99,108,97,105,109,115,101,
+110,103,105,110,101,115,97,102,101,116,121,99,104,111,105,99,101,115,112,105,114
+,105,116,45,115,116,121,108,101,115,112,114,101,97,100,109,97,107,105,110,103,
+110,101,101,100,101,100,114,117,115,115,105,97,112,108,101,97,115,101,101,120,
+116,101,110,116,83,99,114,105,112,116,98,114,111,107,101,110,97,108,108,111,119,
+115,99,104,97,114,103,101,100,105,118,105,100,101,102,97,99,116,111,114,109,101,
+109,98,101,114,45,98,97,115,101,100,116,104,101,111,114,121,99,111,110,102,105,
+103,97,114,111,117,110,100,119,111,114,107,101,100,104,101,108,112,101,100,67,
+104,117,114,99,104,105,109,112,97,99,116,115,104,111,117,108,100,97,108,119,97,
+121,115,108,111,103,111,34,32,98,111,116,116,111,109,108,105,115,116,34,62,41,
+123,118,97,114,32,112,114,101,102,105,120,111,114,97,110,103,101,72,101,97,100,
+101,114,46,112,117,115,104,40,99,111,117,112,108,101,103,97,114,100,101,110,98,
+114,105,100,103,101,108,97,117,110,99,104,82,101,118,105,101,119,116,97,107,105,
+110,103,118,105,115,105,111,110,108,105,116,116,108,101,100,97,116,105,110,103,
+66,117,116,116,111,110,98,101,97,117,116,121,116,104,101,109,101,115,102,111,114
+,103,111,116,83,101,97,114,99,104,97,110,99,104,111,114,97,108,109,111,115,116,
+108,111,97,100,101,100,67,104,97,110,103,101,114,101,116,117,114,110,115,116,114
+,105,110,103,114,101,108,111,97,100,77,111,98,105,108,101,105,110,99,111,109,101
+,115,117,112,112,108,121,83,111,117,114,99,101,111,114,100,101,114,115,118,105,
+101,119,101,100,38,110,98,115,112,59,99,111,117,114,115,101,65,98,111,117,116,32
+,105,115,108,97,110,100,60,104,116,109,108,32,99,111,111,107,105,101,110,97,109,
+101,61,34,97,109,97,122,111,110,109,111,100,101,114,110,97,100,118,105,99,101,
+105,110,60,47,97,62,58,32,84,104,101,32,100,105,97,108,111,103,104,111,117,115,
+101,115,66,69,71,73,78,32,77,101,120,105,99,111,115,116,97,114,116,115,99,101,
+110,116,114,101,104,101,105,103,104,116,97,100,100,105,110,103,73,115,108,97,110
+,100,97,115,115,101,116,115,69,109,112,105,114,101,83,99,104,111,111,108,101,102
+,102,111,114,116,100,105,114,101,99,116,110,101,97,114,108,121,109,97,110,117,97
+,108,83,101,108,101,99,116,46,10,10,79,110,101,106,111,105,110,101,100,109,101,
+110,117,34,62,80,104,105,108,105,112,97,119,97,114,100,115,104,97,110,100,108,
+101,105,109,112,111,114,116,79,102,102,105,99,101,114,101,103,97,114,100,115,107
+,105,108,108,115,110,97,116,105,111,110,83,112,111,114,116,115,100,101,103,114,
+101,101,119,101,101,107,108,121,32,40,101,46,103,46,98,101,104,105,110,100,100,
+111,99,116,111,114,108,111,103,103,101,100,117,110,105,116,101,100,60,47,98,62,
+60,47,98,101,103,105,110,115,112,108,97,110,116,115,97,115,115,105,115,116,97,
+114,116,105,115,116,105,115,115,117,101,100,51,48,48,112,120,124,99,97,110,97,
+100,97,97,103,101,110,99,121,115,99,104,101,109,101,114,101,109,97,105,110,66,
+114,97,122,105,108,115,97,109,112,108,101,108,111,103,111,34,62,98,101,121,111,
+110,100,45,115,99,97,108,101,97,99,99,101,112,116,115,101,114,118,101,100,109,97
+,114,105,110,101,70,111,111,116,101,114,99,97,109,101,114,97,60,47,104,49,62,10,
+95,102,111,114,109,34,108,101,97,118,101,115,115,116,114,101,115,115,34,32,47,62
+,13,10,46,103,105,102,34,32,111,110,108,111,97,100,108,111,97,100,101,114,79,120
+,102,111,114,100,115,105,115,116,101,114,115,117,114,118,105,118,108,105,115,116
+,101,110,102,101,109,97,108,101,68,101,115,105,103,110,115,105,122,101,61,34,97,
+112,112,101,97,108,116,101,120,116,34,62,108,101,118,101,108,115,116,104,97,110,
+107,115,104,105,103,104,101,114,102,111,114,99,101,100,97,110,105,109,97,108,97,
+110,121,111,110,101,65,102,114,105,99,97,97,103,114,101,101,100,114,101,99,101,
+110,116,80,101,111,112,108,101,60,98,114,32,47,62,119,111,110,100,101,114,112,
+114,105,99,101,115,116,117,114,110,101,100,124,124,32,123,125,59,109,97,105,110,
+34,62,105,110,108,105,110,101,115,117,110,100,97,121,119,114,97,112,34,62,102,97
+,105,108,101,100,99,101,110,115,117,115,109,105,110,117,116,101,98,101,97,99,111
+,110,113,117,111,116,101,115,49,53,48,112,120,124,101,115,116,97,116,101,114,101
+,109,111,116,101,101,109,97,105,108,34,108,105,110,107,101,100,114,105,103,104,
+116,59,115,105,103,110,97,108,102,111,114,109,97,108,49,46,104,116,109,108,115,
+105,103,110,117,112,112,114,105,110,99,101,102,108,111,97,116,58,46,112,110,103,
+34,32,102,111,114,117,109,46,65,99,99,101,115,115,112,97,112,101,114,115,115,111
+,117,110,100,115,101,120,116,101,110,100,72,101,105,103,104,116,115,108,105,100,
+101,114,85,84,70,45,56,34,38,97,109,112,59,32,66,101,102,111,114,101,46,32,87,
+105,116,104,115,116,117,100,105,111,111,119,110,101,114,115,109,97,110,97,103,
+101,112,114,111,102,105,116,106,81,117,101,114,121,97,110,110,117,97,108,112,97,
+114,97,109,115,98,111,117,103,104,116,102,97,109,111,117,115,103,111,111,103,108
+,101,108,111,110,103,101,114,105,43,43,41,32,123,105,115,114,97,101,108,115,97,
+121,105,110,103,100,101,99,105,100,101,104,111,109,101,34,62,104,101,97,100,101,
+114,101,110,115,117,114,101,98,114,97,110,99,104,112,105,101,99,101,115,98,108,
+111,99,107,59,115,116,97,116,101,100,116,111,112,34,62,60,114,97,99,105,110,103,
+114,101,115,105,122,101,45,45,38,103,116,59,112,97,99,105,116,121,115,101,120,
+117,97,108,98,117,114,101,97,117,46,106,112,103,34,32,49,48,44,48,48,48,111,98,
+116,97,105,110,116,105,116,108,101,115,97,109,111,117,110,116,44,32,73,110,99,46
+,99,111,109,101,100,121,109,101,110,117,34,32,108,121,114,105,99,115,116,111,100
+,97,121,46,105,110,100,101,101,100,99,111,117,110,116,121,95,108,111,103,111,46,
+70,97,109,105,108,121,108,111,111,107,101,100,77,97,114,107,101,116,108,115,101,
+32,105,102,80,108,97,121,101,114,116,117,114,107,101,121,41,59,118,97,114,32,102
+,111,114,101,115,116,103,105,118,105,110,103,101,114,114,111,114,115,68,111,109,
+97,105,110,125,101,108,115,101,123,105,110,115,101,114,116,66,108,111,103,60,47,
+102,111,111,116,101,114,108,111,103,105,110,46,102,97,115,116,101,114,97,103,101
+,110,116,115,60,98,111,100,121,32,49,48,112,120,32,48,112,114,97,103,109,97,102,
+114,105,100,97,121,106,117,110,105,111,114,100,111,108,108,97,114,112,108,97,99,
+101,100,99,111,118,101,114,115,112,108,117,103,105,110,53,44,48,48,48,32,112,97,
+103,101,34,62,98,111,115,116,111,110,46,116,101,115,116,40,97,118,97,116,97,114,
+116,101,115,116,101,100,95,99,111,117,110,116,102,111,114,117,109,115,115,99,104
+,101,109,97,105,110,100,101,120,44,102,105,108,108,101,100,115,104,97,114,101,
+115,114,101,97,100,101,114,97,108,101,114,116,40,97,112,112,101,97,114,83,117,98
+,109,105,116,108,105,110,101,34,62,98,111,100,121,34,62,10,42,32,84,104,101,84,
+104,111,117,103,104,115,101,101,105,110,103,106,101,114,115,101,121,78,101,119,
+115,60,47,118,101,114,105,102,121,101,120,112,101,114,116,105,110,106,117,114,
+121,119,105,100,116,104,61,67,111,111,107,105,101,83,84,65,82,84,32,97,99,114,
+111,115,115,95,105,109,97,103,101,116,104,114,101,97,100,110,97,116,105,118,101,
+112,111,99,107,101,116,98,111,120,34,62,10,83,121,115,116,101,109,32,68,97,118,
+105,100,99,97,110,99,101,114,116,97,98,108,101,115,112,114,111,118,101,100,65,
+112,114,105,108,32,114,101,97,108,108,121,100,114,105,118,101,114,105,116,101,
+109,34,62,109,111,114,101,34,62,98,111,97,114,100,115,99,111,108,111,114,115,99,
+97,109,112,117,115,102,105,114,115,116,32,124,124,32,91,93,59,109,101,100,105,97
+,46,103,117,105,116,97,114,102,105,110,105,115,104,119,105,100,116,104,58,115,
+104,111,119,101,100,79,116,104,101,114,32,46,112,104,112,34,32,97,115,115,117,
+109,101,108,97,121,101,114,115,119,105,108,115,111,110,115,116,111,114,101,115,
+114,101,108,105,101,102,115,119,101,100,101,110,67,117,115,116,111,109,101,97,
+115,105,108,121,32,121,111,117,114,32,83,116,114,105,110,103,10,10,87,104,105,
+108,116,97,121,108,111,114,99,108,101,97,114,58,114,101,115,111,114,116,102,114,
+101,110,99,104,116,104,111,117,103,104,34,41,32,43,32,34,60,98,111,100,121,62,98
+,117,121,105,110,103,98,114,97,110,100,115,77,101,109,98,101,114,110,97,109,101,
+34,62,111,112,112,105,110,103,115,101,99,116,111,114,53,112,120,59,34,62,118,115
+,112,97,99,101,112,111,115,116,101,114,109,97,106,111,114,32,99,111,102,102,101,
+101,109,97,114,116,105,110,109,97,116,117,114,101,104,97,112,112,101,110,60,47,
+110,97,118,62,107,97,110,115,97,115,108,105,110,107,34,62,73,109,97,103,101,115,
+61,102,97,108,115,101,119,104,105,108,101,32,104,115,112,97,99,101,48,38,97,109,
+112,59,32,10,10,73,110,32,32,112,111,119,101,114,80,111,108,115,107,105,45,99,
+111,108,111,114,106,111,114,100,97,110,66,111,116,116,111,109,83,116,97,114,116,
+32,45,99,111,117,110,116,50,46,104,116,109,108,110,101,119,115,34,62,48,49,46,
+106,112,103,79,110,108,105,110,101,45,114,105,103,104,116,109,105,108,108,101,
+114,115,101,110,105,111,114,73,83,66,78,32,48,48,44,48,48,48,32,103,117,105,100,
+101,115,118,97,108,117,101,41,101,99,116,105,111,110,114,101,112,97,105,114,46,
+120,109,108,34,32,32,114,105,103,104,116,115,46,104,116,109,108,45,98,108,111,99
+,107,114,101,103,69,120,112,58,104,111,118,101,114,119,105,116,104,105,110,118,
+105,114,103,105,110,112,104,111,110,101,115,60,47,116,114,62,13,117,115,105,110,
+103,32,10,9,118,97,114,32,62,39,41,59,10,9,60,47,116,100,62,10,60,47,116,114,62,
+10,98,97,104,97,115,97,98,114,97,115,105,108,103,97,108,101,103,111,109,97,103,
+121,97,114,112,111,108,115,107,105,115,114,112,115,107,105,216,177,216,175,217,
+136,228,184,173,230,150,135,231,174,128,228,189,147,231,185,129,233,171,148,228,
+191,161,230,129,175,228,184,173,229,155,189,230,136,145,228,187,172,228,184,128,
+228,184,170,229,133,172,229,143,184,231,174,161,231,144,134,232,174,186,229,157,
+155,229,143,175,228,187,165,230,156,141,229,138,161,230,151,182,233,151,180,228,
+184,170,228,186,186,228,186,167,229,147,129,232,135,170,229,183,177,228,188,129,
+228,184,154,230,159,165,231,156,139,229,183,165,228,189,156,232,129,148,231,179,
+187,230,178,161,230,156,137,231,189,145,231,171,153,230,137,128,230,156,137,232,
+175,132,232,174,186,228,184,173,229,191,131,230,150,135,231,171,160,231,148,168,
+230,136,183,233,166,150,233,161,181,228,189,156,232,128,133,230,138,128,230,156,
+175,233,151,174,233,162,152,231,155,184,229,133,179,228,184,139,232,189,189,230,
+144,156,231,180,162,228,189,191,231,148,168,232,189,175,228,187,182,229,156,168,
+231,186,191,228,184,187,233,162,152,232,181,132,230,150,153,232,167,134,233,162,
+145,229,155,158,229,164,141,230,179,168,229,134,140,231,189,145,231,187,156,230,
+148,182,232,151,143,229,134,133,229,174,185,230,142,168,232,141,144,229,184,130,
+229,156,186,230,182,136,230,129,175,231,169,186,233,151,180,229,143,145,229,184,
+131,228,187,128,228,185,136,229,165,189,229,143,139,231,148,159,230,180,187,229,
+155,190,231,137,135,229,143,145,229,177,149,229,166,130,230,158,156,230,137,139,
+230,156,186,230,150,176,233,151,187,230,156,128,230,150,176,230,150,185,229,188,
+143,229,140,151,228,186,172,230,143,144,228,190,155,229,133,179,228,186,142,230,
+155,180,229,164,154,232,191,153,228,184,170,231,179,187,231,187,159,231,159,165,
+233,129,147,230,184,184,230,136,143,229,185,191,229,145,138,229,133,182,228,187,
+150,229,143,145,232,161,168,229,174,137,229,133,168,231,172,172,228,184,128,228,
+188,154,229,145,152,232,191,155,232,161,140,231,130,185,229,135,187,231,137,136,
+230,157,131,231,148,181,229,173,144,228,184,150,231,149,140,232,174,190,232,174,
+161,229,133,141,232,180,185,230,149,153,232,130,178,229,138,160,229,133,165,230,
+180,187,229,138,168,228,187,150,228,187,172,229,149,134,229,147,129,229,141,154,
+229,174,162,231,142,176,229,156,168,228,184,138,230,181,183,229,166,130,228,189,
+149,229,183,178,231,187,143,231,149,153,232,168,128,232,175,166,231,187,134,231,
+164,190,229,140,186,231,153,187,229,189,149,230,156,172,231,171,153,233,156,128,
+232,166,129,228,187,183,230,160,188,230,148,175,230,140,129,229,155,189,233,153,
+133,233,147,190,230,142,165,229,155,189,229,174,182,229,187,186,232,174,190,230,
+156,139,229,143,139,233,152,133,232,175,187,230,179,149,229,190,139,228,189,141,
+231,189,174,231,187,143,230,181,142,233,128,137,230,139,169,232,191,153,230,160,
+183,229,189,147,229,137,141,229,136,134,231,177,187,230,142,146,232,161,140,229,
+155,160,228,184,186,228,186,164,230,152,147,230,156,128,229,144,142,233,159,179,
+228,185,144,228,184,141,232,131,189,233,128,154,232,191,135,232,161,140,228,184,
+154,231,167,145,230,138,128,229,143,175,232,131,189,232,174,190,229,164,135,229,
+144,136,228,189,156,229,164,167,229,174,182,231,164,190,228,188,154,231,160,148,
+231,169,182,228,184,147,228,184,154,229,133,168,233,131,168,233,161,185,231,155,
+174,232,191,153,233,135,140,232,191,152,230,152,175,229,188,128,229,167,139,230,
+131,133,229,134,181,231,148,181,232,132,145,230,150,135,228,187,182,229,147,129,
+231,137,140,229,184,174,229,138,169,230,150,135,229,140,150,232,181,132,230,186,
+144,229,164,167,229,173,166,229,173,166,228,185,160,229,156,176,229,157,128,230,
+181,143,232,167,136,230,138,149,232,181,132,229,183,165,231,168,139,232,166,129,
+230,177,130,230,128,142,228,185,136,230,151,182,229,128,153,229,138,159,232,131,
+189,228,184,187,232,166,129,231,155,174,229,137,141,232,181,132,232,174,175,229,
+159,142,229,184,130,230,150,185,230,179,149,231,148,181,229,189,177,230,139,155,
+232,129,152,229,163,176,230,152,142,228,187,187,228,189,149,229,129,165,229,186,
+183,230,149,176,230,141,174,231,190,142,229,155,189,230,177,189,232,189,166,228,
+187,139,231,187,141,228,189,134,230,152,175,228,186,164,230,181,129,231,148,159,
+228,186,167,230,137,128,228,187,165,231,148,181,232,175,157,230,152,190,231,164,
+186,228,184,128,228,186,155,229,141,149,228,189,141,228,186,186,229,145,152,229,
+136,134,230,158,144,229,156,176,229,155,190,230,151,133,230,184,184,229,183,165,
+229,133,183,229,173,166,231,148,159,231,179,187,229,136,151,231,189,145,229,143,
+139,229,184,150,229,173,144,229,175,134,231,160,129,233,162,145,233,129,147,230,
+142,167,229,136,182,229,156,176,229,140,186,229,159,186,230,156,172,229,133,168,
+229,155,189,231,189,145,228,184,138,233,135,141,232,166,129,231,172,172,228,186,
+140,229,150,156,230,172,162,232,191,155,229,133,165,229,143,139,230,131,133,232,
+191,153,228,186,155,232,128,131,232,175,149,229,143,145,231,142,176,229,159,185,
+232,174,173,228,187,165,228,184,138,230,148,191,229,186,156,230,136,144,228,184,
+186,231,142,175,229,162,131,233,166,153,230,184,175,229,144,140,230,151,182,229,
+168,177,228,185,144,229,143,145,233,128,129,228,184,128,229,174,154,229,188,128,
+229,143,145,228,189,156,229,147,129,230,160,135,229,135,134,230,172,162,232,191,
+142,232,167,163,229,134,179,229,156,176,230,150,185,228,184,128,228,184,139,228,
+187,165,229,143,138,232,180,163,228,187,187,230,136,150,232,128,133,229,174,162,
+230,136,183,228,187,163,232,161,168,231,167,175,229,136,134,229,165,179,228,186,
+186,230,149,176,231,160,129,233,148,128,229,148,174,229,135,186,231,142,176,231,
+166,187,231,186,191,229,186,148,231,148,168,229,136,151,232,161,168,228,184,141,
+229,144,140,231,188,150,232,190,145,231,187,159,232,174,161,230,159,165,232,175,
+162,228,184,141,232,166,129,230,156,137,229,133,179,230,156,186,230,158,132,229,
+190,136,229,164,154,230,146,173,230,148,190,231,187,132,231,187,135,230,148,191,
+231,173,150,231,155,180,230,142,165,232,131,189,229,138,155,230,157,165,230,186,
+144,230,153,130,233,150,147,231,156,139,229,136,176,231,131,173,233,151,168,229,
+133,179,233,148,174,228,184,147,229,140,186,233,157,158,229,184,184,232,139,177,
+232,175,173,231,153,190,229,186,166,229,184,140,230,156,155,231,190,142,229,165,
+179,230,175,148,232,190,131,231,159,165,232,175,134,232,167,132,229,174,154,229,
+187,186,232,174,174,233,131,168,233,151,168,230,132,143,232,167,129,231,178,190,
+229,189,169,230,151,165,230,156,172,230,143,144,233,171,152,229,143,145,232,168,
+128,230,150,185,233,157,162,229,159,186,233,135,145,229,164,132,231,144,134,230,
+157,131,233,153,144,229,189,177,231,137,135,233,147,182,232,161,140,232,191,152,
+230,156,137,229,136,134,228,186,171,231,137,169,229,147,129,231,187,143,232,144,
+165,230,183,187,229,138,160,228,184,147,229,174,182,232,191,153,231,167,141,232,
+175,157,233,162,152,232,181,183,230,157,165,228,184,154,229,138,161,229,133,172,
+229,145,138,232,174,176,229,189,149,231,174,128,228,187,139,232,180,168,233,135,
+143,231,148,183,228,186,186,229,189,177,229,147,141,229,188,149,231,148,168,230,
+138,165,229,145,138,233,131,168,229,136,134,229,191,171,233,128,159,229,146,168,
+232,175,162,230,151,182,229,176,154,230,179,168,230,132,143,231,148,179,232,175,
+183,229,173,166,230,160,161,229,186,148,232,175,165,229,142,134,229,143,178,229,
+143,170,230,152,175,232,191,148,229,155,158,232,180,173,228,185,176,229,144,141,
+231,167,176,228,184,186,228,186,134,230,136,144,229,138,159,232,175,180,230,152,
+142,228,190,155,229,186,148,229,173,169,229,173,144,228,184,147,233,162,152,231,
+168,139,229,186,143,228,184,128,232,136,172,230,156,131,229,147,161,229,143,170,
+230,156,137,229,133,182,229,174,131,228,191,157,230,138,164,232,128,140,228,184,
+148,228,187,138,229,164,169,231,170,151,229,143,163,229,138,168,230,128,129,231,
+138,182,230,128,129,231,137,185,229,136,171,232,174,164,228,184,186,229,191,133,
+233,161,187,230,155,180,230,150,176,229,176,143,232,175,180,230,136,145,229,128,
+145,228,189,156,228,184,186,229,170,146,228,189,147,229,140,133,230,139,172,233,
+130,163,228,185,136,228,184,128,230,160,183,229,155,189,229,134,133,230,152,175,
+229,144,166,230,160,185,230,141,174,231,148,181,232,167,134,229,173,166,233,153,
+162,229,133,183,230,156,137,232,191,135,231,168,139,231,148,177,228,186,142,228,
+186,186,230,137,141,229,135,186,230,157,165,228,184,141,232,191,135,230,173,163,
+229,156,168,230,152,142,230,152,159,230,149,133,228,186,139,229,133,179,231,179,
+187,230,160,135,233,162,152,229,149,134,229,138,161,232,190,147,229,133,165,228,
+184,128,231,155,180,229,159,186,231,161,128,230,149,153,229,173,166,228,186,134,
+232,167,163,229,187,186,231,173,145,231,187,147,230,158,156,229,133,168,231,144,
+131,233,128,154,231,159,165,232,174,161,229,136,146,229,175,185,228,186,142,232,
+137,186,230,156,175,231,155,184,229,134,140,229,143,145,231,148,159,231,156,159,
+231,154,132,229,187,186,231,171,139,231,173,137,231,186,167,231,177,187,229,158,
+139,231,187,143,233,170,140,229,174,158,231,142,176,229,136,182,228,189,156,230,
+157,165,232,135,170,230,160,135,231,173,190,228,187,165,228,184,139,229,142,159,
+229,136,155,230,151,160,230,179,149,229,133,182,228,184,173,229,128,139,228,186,
+186,228,184,128,229,136,135,230,140,135,229,141,151,229,133,179,233,151,173,233,
+155,134,229,155,162,231,172,172,228,184,137,229,133,179,230,179,168,229,155,160,
+230,173,164,231,133,167,231,137,135,230,183,177,229,156,179,229,149,134,228,184,
+154,229,185,191,229,183,158,230,151,165,230,156,159,233,171,152,231,186,167,230,
+156,128,232,191,145,231,187,188,229,144,136,232,161,168,231,164,186,228,184,147,
+232,190,145,232,161,140,228,184,186,228,186,164,233,128,154,232,175,132,228,187,
+183,232,167,137,229,190,151,231,178,190,229,141,142,229,174,182,229,186,173,229,
+174,140,230,136,144,230,132,159,232,167,137,229,174,137,232,163,133,229,190,151,
+229,136,176,233,130,174,228,187,182,229,136,182,229,186,166,233,163,159,229,147,
+129,232,153,189,231,132,182,232,189,172,232,189,189,230,138,165,228,187,183,232,
+174,176,232,128,133,230,150,185,230,161,136,232,161,140,230,148,191,228,186,186,
+230,176,145,231,148,168,229,147,129,228,184,156,232,165,191,230,143,144,229,135,
+186,233,133,146,229,186,151,231,132,182,229,144,142,228,187,152,230,172,190,231,
+131,173,231,130,185,228,187,165,229,137,141,229,174,140,229,133,168,229,143,145,
+229,184,150,232,174,190,231,189,174,233,162,134,229,175,188,229,183,165,228,184,
+154,229,140,187,233,153,162,231,156,139,231,156,139,231,187,143,229,133,184,229,
+142,159,229,155,160,229,185,179,229,143,176,229,144,132,231,167,141,229,162,158,
+229,138,160,230,157,144,230,150,153,230,150,176,229,162,158,228,185,139,229,144,
+142,232,129,140,228,184,154,230,149,136,230,158,156,228,187,138,229,185,180,232,
+174,186,230,150,135,230,136,145,229,155,189,229,145,138,232,175,137,231,137,136,
+228,184,187,228,191,174,230,148,185,229,143,130,228,184,142,230,137,147,229,141,
+176,229,191,171,228,185,144,230,156,186,230,162,176,232,167,130,231,130,185,229,
+173,152,229,156,168,231,178,190,231,165,158,232,142,183,229,190,151,229,136,169,
+231,148,168,231,187,167,231,187,173,228,189,160,228,187,172,232,191,153,228,185,
+136,230,168,161,229,188,143,232,175,173,232,168,128,232,131,189,229,164,159,233,
+155,133,232,153,142,230,147,141,228,189,156,233,163,142,230,160,188,228,184,128,
+232,181,183,231,167,145,229,173,166,228,189,147,232,130,178,231,159,173,228,191,
+161,230,157,161,228,187,182,230,178,187,231,150,151,232,191,144,229,138,168,228,
+186,167,228,184,154,228,188,154,232,174,174,229,175,188,232,136,170,229,133,136,
+231,148,159,232,129,148,231,155,159,229,143,175,230,152,175,229,149,143,233,161,
+140,231,187,147,230,158,132,228,189,156,231,148,168,232,176,131,230,159,165,232,
+179,135,230,150,153,232,135,170,229,138,168,232,180,159,232,180,163,229,134,156,
+228,184,154,232,174,191,233,151,174,229,174,158,230,150,189,230,142,165,229,143,
+151,232,174,168,232,174,186,233,130,163,228,184,170,229,143,141,233,166,136,229,
+138,160,229,188,186,229,165,179,230,128,167,232,140,131,229,155,180,230,156,141,
+229,139,153,228,188,145,233,151,178,228,187,138,230,151,165,229,174,162,230,156,
+141,232,167,128,231,156,139,229,143,130,229,138,160,231,154,132,232,175,157,228,
+184,128,231,130,185,228,191,157,232,175,129,229,155,190,228,185,166,230,156,137,
+230,149,136,230,181,139,232,175,149,231,167,187,229,138,168,230,137,141,232,131,
+189,229,134,179,229,174,154,232,130,161,231,165,168,228,184,141,230,150,173,233,
+156,128,230,177,130,228,184,141,229,190,151,229,138,158,230,179,149,228,185,139,
+233,151,180,233,135,135,231,148,168,232,144,165,233,148,128,230,138,149,232,175,
+137,231,155,174,230,160,135,231,136,177,230,131,133,230,145,132,229,189,177,230,
+156,137,228,186,155,232,164,135,232,163,189,230,150,135,229,173,166,230,156,186,
+228,188,154,230,149,176,229,173,151,232,163,133,228,191,174,232,180,173,231,137,
+169,229,134,156,230,157,145,229,133,168,233,157,162,231,178,190,229,147,129,229,
+133,182,229,174,158,228,186,139,230,131,133,230,176,180,229,185,179,230,143,144,
+231,164,186,228,184,138,229,184,130,232,176,162,232,176,162,230,153,174,233,128,
+154,230,149,153,229,184,136,228,184,138,228,188,160,231,177,187,229,136,171,230,
+173,140,230,155,178,230,139,165,230,156,137,229,136,155,230,150,176,233,133,141,
+228,187,182,229,143,170,232,166,129,230,151,182,228,187,163,232,179,135,232,168,
+138,232,190,190,229,136,176,228,186,186,231,148,159,232,174,162,233,152,133,232,
+128,129,229,184,136,229,177,149,231,164,186,229,191,131,231,144,134,232,180,180,
+229,173,144,231,182,178,231,171,153,228,184,187,233,161,140,232,135,170,231,132,
+182,231,186,167,229,136,171,231,174,128,229,141,149,230,148,185,233,157,169,233,
+130,163,228,186,155,230,157,165,232,175,180,230,137,147,229,188,128,228,187,163,
+231,160,129,229,136,160,233,153,164,232,175,129,229,136,184,232,138,130,231,155,
+174,233,135,141,231,130,185,230,172,161,230,149,184,229,164,154,229,176,145,232,
+167,132,229,136,146,232,181,132,233,135,145,230,137,190,229,136,176,228,187,165,
+229,144,142,229,164,167,229,133,168,228,184,187,233,161,181,230,156,128,228,189,
+179,229,155,158,231,173,148,229,164,169,228,184,139,228,191,157,233,154,156,231,
+142,176,228,187,163,230,163,128,230,159,165,230,138,149,231,165,168,229,176,143,
+230,151,182,230,178,146,230,156,137,230,173,163,229,184,184,231,148,154,232,135,
+179,228,187,163,231,144,134,231,155,174,229,189,149,229,133,172,229,188,128,229,
+164,141,229,136,182,233,135,145,232,158,141,229,185,184,231,166,143,231,137,136,
+230,156,172,229,189,162,230,136,144,229,135,134,229,164,135,232,161,140,230,131,
+133,229,155,158,229,136,176,230,128,157,230,131,179,230,128,142,230,160,183,229,
+141,143,232,174,174,232,174,164,232,175,129,230,156,128,229,165,189,228,186,167,
+231,148,159,230,140,137,231,133,167,230,156,141,232,163,133,229,185,191,228,184,
+156,229,138,168,230,188,171,233,135,135,232,180,173,230,150,176,230,137,139,231,
+187,132,229,155,190,233,157,162,230,157,191,229,143,130,232,128,131,230,148,191,
+230,178,187,229,174,185,230,152,147,229,164,169,229,156,176,229,138,170,229,138,
+155,228,186,186,228,187,172,229,141,135,231,186,167,233,128,159,229,186,166,228,
+186,186,231,137,169,232,176,131,230,149,180,230,181,129,232,161,140,233,128,160,
+230,136,144,230,150,135,229,173,151,233,159,169,229,155,189,232,180,184,230,152,
+147,229,188,128,229,177,149,231,155,184,233,151,156,232,161,168,231,142,176,229,
+189,177,232,167,134,229,166,130,230,173,164,231,190,142,229,174,185,229,164,167,
+229,176,143,230,138,165,233,129,147,230,157,161,230,172,190,229,191,131,230,131,
+133,232,174,184,229,164,154,230,179,149,232,167,132,229,174,182,229,177,133,228,
+185,166,229,186,151,232,191,158,230,142,165,231,171,139,229,141,179,228,184,190,
+230,138,165,230,138,128,229,183,167,229,165,165,232,191,144,231,153,187,229,133,
+165,228,187,165,230,157,165,231,144,134,232,174,186,228,186,139,228,187,182,232,
+135,170,231,148,177,228,184,173,229,141,142,229,138,158,229,133,172,229,166,136,
+229,166,136,231,156,159,230,173,163,228,184,141,233,148,153,229,133,168,230,150,
+135,229,144,136,229,144,140,228,187,183,229,128,188,229,136,171,228,186,186,231,
+155,145,231,157,163,229,133,183,228,189,147,228,184,150,231,186,170,229,155,162,
+233,152,159,229,136,155,228,184,154,230,137,191,230,139,133,229,162,158,233,149,
+191,230,156,137,228,186,186,228,191,157,230,140,129,229,149,134,229,174,182,231,
+187,180,228,191,174,229,143,176,230,185,190,229,183,166,229,143,179,232,130,161,
+228,187,189,231,173,148,230,161,136,229,174,158,233,153,133,231,148,181,228,191,
+161,231,187,143,231,144,134,231,148,159,229,145,189,229,174,163,228,188,160,228,
+187,187,229,138,161,230,173,163,229,188,143,231,137,185,232,137,178,228,184,139,
+230,157,165,229,141,143,228,188,154,229,143,170,232,131,189,229,189,147,231,132,
+182,233,135,141,230,150,176,229,133,167,229,174,185,230,140,135,229,175,188,232,
+191,144,232,161,140,230,151,165,229,191,151,232,179,163,229,174,182,232,182,133,
+232,191,135,229,156,159,229,156,176,230,181,153,230,177,159,230,148,175,228,187,
+152,230,142,168,229,135,186,231,171,153,233,149,191,230,157,173,229,183,158,230,
+137,167,232,161,140,229,136,182,233,128,160,228,185,139,228,184,128,230,142,168,
+229,185,191,231,142,176,229,156,186,230,143,143,232,191,176,229,143,152,229,140,
+150,228,188,160,231,187,159,230,173,140,230,137,139,228,191,157,233,153,169,232,
+175,190,231,168,139,229,140,187,231,150,151,231,187,143,232,191,135,232,191,135,
+229,142,187,228,185,139,229,137,141,230,148,182,229,133,165,229,185,180,229,186,
+166,230,157,130,229,191,151,231,190,142,228,184,189,230,156,128,233,171,152,231,
+153,187,233,153,134,230,156,170,230,157,165,229,138,160,229,183,165,229,133,141,
+232,180,163,230,149,153,231,168,139,231,137,136,229,157,151,232,186,171,228,189,
+147,233,135,141,229,186,134,229,135,186,229,148,174,230,136,144,230,156,172,229,
+189,162,229,188,143,229,156,159,232,177,134,229,135,186,229,131,185,228,184,156,
+230,150,185,233,130,174,231,174,177,229,141,151,228,186,172,230,177,130,232,129,
+140,229,143,150,229,190,151,232,129,140,228,189,141,231,155,184,228,191,161,233,
+161,181,233,157,162,229,136,134,233,146,159,231,189,145,233,161,181,231,161,174,
+229,174,154,229,155,190,228,190,139,231,189,145,229,157,128,231,167,175,230,158,
+129,233,148,153,232,175,175,231,155,174,231,154,132,229,174,157,232,180,157,230,
+156,186,229,133,179,233,163,142,233,153,169,230,142,136,230,157,131,231,151,133,
+230,175,146,229,174,160,231,137,169,233,153,164,228,186,134,232,169,149,232,171,
+150,231,150,190,231,151,133,229,143,138,230,151,182,230,177,130,232,180,173,231,
+171,153,231,130,185,229,132,191,231,171,165,230,175,143,229,164,169,228,184,173,
+229,164,174,232,174,164,232,175,134,230,175,143,228,184,170,229,164,169,230,180,
+165,229,173,151,228,189,147,229,143,176,231,129,163,231,187,180,230,138,164,230,
+156,172,233,161,181,228,184,170,230,128,167,229,174,152,230,150,185,229,184,184,
+232,167,129,231,155,184,230,156,186,230,136,152,231,149,165,229,186,148,229,189,
+147,229,190,139,229,184,136,230,150,185,228,190,191,230,160,161,229,155,173,232,
+130,161,229,184,130,230,136,191,229,177,139,230,160,143,231,155,174,229,145,152,
+229,183,165,229,175,188,232,135,180,231,170,129,231,132,182,233,129,147,229,133,
+183,230,156,172,231,189,145,231,187,147,229,144,136,230,161,163,230,161,136,229,
+138,179,229,138,168,229,143,166,229,164,150,231,190,142,229,133,131,229,188,149,
+232,181,183,230,148,185,229,143,152,231,172,172,229,155,155,228,188,154,232,174,
+161,232,170,170,230,152,142,233,154,144,231,167,129,229,174,157,229,174,157,232,
+167,132,232,140,131,230,182,136,232,180,185,229,133,177,229,144,140,229,191,152,
+232,174,176,228,189,147,231,179,187,229,184,166,230,157,165,229,144,141,229,173,
+151,231,153,188,232,161,168,229,188,128,230,148,190,229,138,160,231,155,159,229,
+143,151,229,136,176,228,186,140,230,137,139,229,164,167,233,135,143,230,136,144,
+228,186,186,230,149,176,233,135,143,229,133,177,228,186,171,229,140,186,229,159,
+159,229,165,179,229,173,169,229,142,159,229,136,153,230,137,128,229,156,168,231,
+187,147,230,157,159,233,128,154,228,191,161,232,182,133,231,186,167,233,133,141,
+231,189,174,229,189,147,230,151,182,228,188,152,231,167,128,230,128,167,230,132,
+159,230,136,191,228,186,167,233,129,138,230,136,178,229,135,186,229,143,163,230,
+143,144,228,186,164,229,176,177,228,184,154,228,191,157,229,129,165,231,168,139,
+229,186,166,229,143,130,230,149,176,228,186,139,228,184,154,230,149,180,228,184,
+170,229,177,177,228,184,156,230,131,133,230,132,159,231,137,185,230,174,138,229,
+136,134,233,161,158,230,144,156,229,176,139,229,177,158,228,186,142,233,151,168,
+230,136,183,232,180,162,229,138,161,229,163,176,233,159,179,229,143,138,229,133,
+182,232,180,162,231,187,143,229,157,154,230,140,129,229,185,178,233,131,168,230,
+136,144,231,171,139,229,136,169,231,155,138,232,128,131,232,153,145,230,136,144,
+233,131,189,229,140,133,232,163,133,231,148,168,230,136,182,230,175,148,232,181,
+155,230,150,135,230,152,142,230,139,155,229,149,134,229,174,140,230,149,180,231,
+156,159,230,152,175,231,156,188,231,157,155,228,188,153,228,188,180,229,168,129,
+230,156,155,233,162,134,229,159,159,229,141,171,231,148,159,228,188,152,230,131,
+160,232,171,150,229,163,135,229,133,172,229,133,177,232,137,175,229,165,189,229,
+133,133,229,136,134,231,172,166,229,144,136,233,153,132,228,187,182,231,137,185,
+231,130,185,228,184,141,229,143,175,232,139,177,230,150,135,232,181,132,228,186,
+167,230,160,185,230,156,172,230,152,142,230,152,190,229,175,134,231,162,188,229,
+133,172,228,188,151,230,176,145,230,151,143,230,155,180,229,138,160,228,186,171,
+229,143,151,229,144,140,229,173,166,229,144,175,229,138,168,233,128,130,229,144,
+136,229,142,159,230,157,165,233,151,174,231,173,148,230,156,172,230,150,135,231,
+190,142,233,163,159,231,187,191,232,137,178,231,168,179,229,174,154,231,187,136,
+228,186,142,231,148,159,231,137,169,228,190,155,230,177,130,230,144,156,231,139,
+144,229,138,155,233,135,143,228,184,165,233,135,141,230,176,184,232,191,156,229,
+134,153,231,156,159,230,156,137,233,153,144,231,171,158,228,186,137,229,175,185,
+232,177,161,232,180,185,231,148,168,228,184,141,229,165,189,231,187,157,229,175,
+185,229,141,129,229,136,134,228,191,131,232,191,155,231,130,185,232,175,132,229,
+189,177,233,159,179,228,188,152,229,138,191,228,184,141,229,176,145,230,172,163,
+232,181,143,229,185,182,228,184,148,230,156,137,231,130,185,230,150,185,229,144,
+145,229,133,168,230,150,176,228,191,161,231,148,168,232,174,190,230,150,189,229,
+189,162,232,177,161,232,181,132,230,160,188,231,170,129,231,160,180,233,154,143,
+231,157,128,233,135,141,229,164,167,228,186,142,230,152,175,230,175,149,228,184,
+154,230,153,186,232,131,189,229,140,150,229,183,165,229,174,140,231,190,142,229,
+149,134,229,159,142,231,187,159,228,184,128,229,135,186,231,137,136,230,137,147,
+233,128,160,231,148,162,229,147,129,230,166,130,229,134,181,231,148,168,228,186,
+142,228,191,157,231,149,153,229,155,160,231,180,160,228,184,173,229,156,139,229,
+173,152,229,130,168,232,180,180,229,155,190,230,156,128,230,132,155,233,149,191,
+230,156,159,229,143,163,228,187,183,231,144,134,232,180,162,229,159,186,229,156,
+176,229,174,137,230,142,146,230,173,166,230,177,137,233,135,140,233,157,162,229,
+136,155,229,187,186,229,164,169,231,169,186,233,166,150,229,133,136,229,174,140,
+229,150,132,233,169,177,229,138,168,228,184,139,233,157,162,228,184,141,229,134,
+141,232,175,154,228,191,161,230,132,143,228,185,137,233,152,179,229,133,137,232,
+139,177,229,155,189,230,188,130,228,186,174,229,134,155,228,186,139,231,142,169,
+229,174,182,231,190,164,228,188,151,229,134,156,230,176,145,229,141,179,229,143,
+175,229,144,141,231,168,177,229,174,182,229,133,183,229,138,168,231,148,187,230,
+131,179,229,136,176,230,179,168,230,152,142,229,176,143,229,173,166,230,128,167,
+232,131,189,232,128,131,231,160,148,231,161,172,228,187,182,232,167,130,231,156,
+139,230,184,133,230,165,154,230,144,158,231,172,145,233,166,150,233,160,129,233,
+187,132,233,135,145,233,128,130,231,148,168,230,177,159,232,139,143,231,156,159,
+229,174,158,228,184,187,231,174,161,233,152,182,230,174,181,232,168,187,229,134,
+138,231,191,187,232,175,145,230,157,131,229,136,169,229,129,154,229,165,189,228,
+188,188,228,185,142,233,128,154,232,174,175,230,150,189,229,183,165,231,139,128,
+230,133,139,228,185,159,232,174,184,231,142,175,228,191,157,229,159,185,229,133,
+187,230,166,130,229,191,181,229,164,167,229,158,139,230,156,186,231,165,168,231,
+144,134,232,167,163,229,140,191,229,144,141,99,117,97,110,100,111,101,110,118,
+105,97,114,109,97,100,114,105,100,98,117,115,99,97,114,105,110,105,99,105,111,
+116,105,101,109,112,111,112,111,114,113,117,101,99,117,101,110,116,97,101,115,
+116,97,100,111,112,117,101,100,101,110,106,117,101,103,111,115,99,111,110,116,
+114,97,101,115,116,195,161,110,110,111,109,98,114,101,116,105,101,110,101,110,
+112,101,114,102,105,108,109,97,110,101,114,97,97,109,105,103,111,115,99,105,117,
+100,97,100,99,101,110,116,114,111,97,117,110,113,117,101,112,117,101,100,101,115
+,100,101,110,116,114,111,112,114,105,109,101,114,112,114,101,99,105,111,115,101,
+103,195,186,110,98,117,101,110,111,115,118,111,108,118,101,114,112,117,110,116,
+111,115,115,101,109,97,110,97,104,97,98,195,173,97,97,103,111,115,116,111,110,
+117,101,118,111,115,117,110,105,100,111,115,99,97,114,108,111,115,101,113,117,
+105,112,111,110,105,195,177,111,115,109,117,99,104,111,115,97,108,103,117,110,97
+,99,111,114,114,101,111,105,109,97,103,101,110,112,97,114,116,105,114,97,114,114
+,105,98,97,109,97,114,195,173,97,104,111,109,98,114,101,101,109,112,108,101,111,
+118,101,114,100,97,100,99,97,109,98,105,111,109,117,99,104,97,115,102,117,101,
+114,111,110,112,97,115,97,100,111,108,195,173,110,101,97,112,97,114,101,99,101,
+110,117,101,118,97,115,99,117,114,115,111,115,101,115,116,97,98,97,113,117,105,
+101,114,111,108,105,98,114,111,115,99,117,97,110,116,111,97,99,99,101,115,111,
+109,105,103,117,101,108,118,97,114,105,111,115,99,117,97,116,114,111,116,105,101
+,110,101,115,103,114,117,112,111,115,115,101,114,195,161,110,101,117,114,111,112
+,97,109,101,100,105,111,115,102,114,101,110,116,101,97,99,101,114,99,97,100,101,
+109,195,161,115,111,102,101,114,116,97,99,111,99,104,101,115,109,111,100,101,108
+,111,105,116,97,108,105,97,108,101,116,114,97,115,97,108,103,195,186,110,99,111,
+109,112,114,97,99,117,97,108,101,115,101,120,105,115,116,101,99,117,101,114,112,
+111,115,105,101,110,100,111,112,114,101,110,115,97,108,108,101,103,97,114,118,
+105,97,106,101,115,100,105,110,101,114,111,109,117,114,99,105,97,112,111,100,114
+,195,161,112,117,101,115,116,111,100,105,97,114,105,111,112,117,101,98,108,111,
+113,117,105,101,114,101,109,97,110,117,101,108,112,114,111,112,105,111,99,114,
+105,115,105,115,99,105,101,114,116,111,115,101,103,117,114,111,109,117,101,114,
+116,101,102,117,101,110,116,101,99,101,114,114,97,114,103,114,97,110,100,101,101
+,102,101,99,116,111,112,97,114,116,101,115,109,101,100,105,100,97,112,114,111,
+112,105,97,111,102,114,101,99,101,116,105,101,114,114,97,101,45,109,97,105,108,
+118,97,114,105,97,115,102,111,114,109,97,115,102,117,116,117,114,111,111,98,106,
+101,116,111,115,101,103,117,105,114,114,105,101,115,103,111,110,111,114,109,97,
+115,109,105,115,109,111,115,195,186,110,105,99,111,99,97,109,105,110,111,115,105
+,116,105,111,115,114,97,122,195,179,110,100,101,98,105,100,111,112,114,117,101,
+98,97,116,111,108,101,100,111,116,101,110,195,173,97,106,101,115,195,186,115,101
+,115,112,101,114,111,99,111,99,105,110,97,111,114,105,103,101,110,116,105,101,
+110,100,97,99,105,101,110,116,111,99,195,161,100,105,122,104,97,98,108,97,114,
+115,101,114,195,173,97,108,97,116,105,110,97,102,117,101,114,122,97,101,115,116,
+105,108,111,103,117,101,114,114,97,101,110,116,114,97,114,195,169,120,105,116,
+111,108,195,179,112,101,122,97,103,101,110,100,97,118,195,173,100,101,111,101,
+118,105,116,97,114,112,97,103,105,110,97,109,101,116,114,111,115,106,97,118,105,
+101,114,112,97,100,114,101,115,102,195,161,99,105,108,99,97,98,101,122,97,195,
+161,114,101,97,115,115,97,108,105,100,97,101,110,118,195,173,111,106,97,112,195,
+179,110,97,98,117,115,111,115,98,105,101,110,101,115,116,101,120,116,111,115,108
+,108,101,118,97,114,112,117,101,100,97,110,102,117,101,114,116,101,99,111,109,
+195,186,110,99,108,97,115,101,115,104,117,109,97,110,111,116,101,110,105,100,111
+,98,105,108,98,97,111,117,110,105,100,97,100,101,115,116,195,161,115,101,100,105
+,116,97,114,99,114,101,97,100,111,208,180,208,187,209,143,209,135,209,130,208,
+190,208,186,208,176,208,186,208,184,208,187,208,184,209,141,209,130,208,190,208,
+178,209,129,208,181,208,181,208,179,208,190,208,191,209,128,208,184,209,130,208,
+176,208,186,208,181,209,137,208,181,209,131,208,182,208,181,208,154,208,176,208,
+186,208,177,208,181,208,183,208,177,209,139,208,187,208,190,208,189,208,184,208,
+146,209,129,208,181,208,191,208,190,208,180,208,173,209,130,208,190,209,130,208,
+190,208,188,209,135,208,181,208,188,208,189,208,181,209,130,208,187,208,181,209,
+130,209,128,208,176,208,183,208,190,208,189,208,176,208,179,208,180,208,181,208,
+188,208,189,208,181,208,148,208,187,209,143,208,159,209,128,208,184,208,189,208,
+176,209,129,208,189,208,184,209,133,209,130,208,181,208,188,208,186,209,130,208,
+190,208,179,208,190,208,180,208,178,208,190,209,130,209,130,208,176,208,188,208,
+161,208,168,208,144,208,188,208,176,209,143,208,167,209,130,208,190,208,178,208,
+176,209,129,208,178,208,176,208,188,208,181,208,188,209,131,208,162,208,176,208,
+186,208,180,208,178,208,176,208,189,208,176,208,188,209,141,209,130,208,184,209,
+141,209,130,209,131,208,146,208,176,208,188,209,130,208,181,209,133,208,191,209,
+128,208,190,209,130,209,131,209,130,208,189,208,176,208,180,208,180,208,189,209,
+143,208,146,208,190,209,130,209,130,209,128,208,184,208,189,208,181,208,185,208,
+146,208,176,209,129,208,189,208,184,208,188,209,129,208,176,208,188,209,130,208,
+190,209,130,209,128,209,131,208,177,208,158,208,189,208,184,208,188,208,184,209,
+128,208,189,208,181,208,181,208,158,208,158,208,158,208,187,208,184,209,134,209,
+141,209,130,208,176,208,158,208,189,208,176,208,189,208,181,208,188,208,180,208,
+190,208,188,208,188,208,190,208,185,208,180,208,178,208,181,208,190,208,189,208,
+190,209,129,209,131,208,180,224,164,149,224,165,135,224,164,185,224,165,136,224,
+164,149,224,165,128,224,164,184,224,165,135,224,164,149,224,164,190,224,164,149,
+224,165,139,224,164,148,224,164,176,224,164,170,224,164,176,224,164,168,224,165,
+135,224,164,143,224,164,149,224,164,149,224,164,191,224,164,173,224,165,128,224,
+164,135,224,164,184,224,164,149,224,164,176,224,164,164,224,165,139,224,164,185,
+224,165,139,224,164,134,224,164,170,224,164,185,224,165,128,224,164,175,224,164,
+185,224,164,175,224,164,190,224,164,164,224,164,149,224,164,165,224,164,190,106,
+97,103,114,97,110,224,164,134,224,164,156,224,164,156,224,165,139,224,164,133,
+224,164,172,224,164,166,224,165,139,224,164,151,224,164,136,224,164,156,224,164,
+190,224,164,151,224,164,143,224,164,185,224,164,174,224,164,135,224,164,168,224,
+164,181,224,164,185,224,164,175,224,165,135,224,164,165,224,165,135,224,164,165,
+224,165,128,224,164,152,224,164,176,224,164,156,224,164,172,224,164,166,224,165,
+128,224,164,149,224,164,136,224,164,156,224,165,128,224,164,181,224,165,135,224,
+164,168,224,164,136,224,164,168,224,164,143,224,164,185,224,164,176,224,164,137,
+224,164,184,224,164,174,224,165,135,224,164,149,224,164,174,224,164,181,224,165,
+139,224,164,178,224,165,135,224,164,184,224,164,172,224,164,174,224,164,136,224,
+164,166,224,165,135,224,164,147,224,164,176,224,164,134,224,164,174,224,164,172,
+224,164,184,224,164,173,224,164,176,224,164,172,224,164,168,224,164,154,224,164,
+178,224,164,174,224,164,168,224,164,134,224,164,151,224,164,184,224,165,128,224,
+164,178,224,165,128,216,185,217,132,217,137,216,165,217,132,217,137,217,135,216,
+176,216,167,216,162,216,174,216,177,216,185,216,175,216,175,216,167,217,132,217,
+137,217,135,216,176,217,135,216,181,217,136,216,177,216,186,217,138,216,177,217,
+131,216,167,217,134,217,136,217,132,216,167,216,168,217,138,217,134,216,185,216,
+177,216,182,216,176,217,132,217,131,217,135,217,134,216,167,217,138,217,136,217,
+133,217,130,216,167,217,132,216,185,217,132,217,138,216,167,217,134,216,167,217,
+132,217,131,217,134,216,173,216,170,217,137,217,130,216,168,217,132,217,136,216,
+173,216,169,216,167,216,174,216,177,217,129,217,130,216,183,216,185,216,168,216,
+175,216,177,217,131,217,134,216,165,216,176,216,167,217,131,217,133,216,167,216,
+167,216,173,216,175,216,165,217,132,216,167,217,129,217,138,217,135,216,168,216,
+185,216,182,217,131,217,138,217,129,216,168,216,173,216,171,217,136,217,133,217,
+134,217,136,217,135,217,136,216,163,217,134,216,167,216,172,216,175,216,167,217,
+132,217,135,216,167,216,179,217,132,217,133,216,185,217,134,216,175,217,132,217,
+138,216,179,216,185,216,168,216,177,216,181,217,132,217,137,217,133,217,134,216,
+176,216,168,217,135,216,167,216,163,217,134,217,135,217,133,216,171,217,132,217,
+131,217,134,216,170,216,167,217,132,216,167,216,173,217,138,216,171,217,133,216,
+181,216,177,216,180,216,177,216,173,216,173,217,136,217,132,217,136,217,129,217,
+138,216,167,216,176,216,167,217,132,217,131,217,132,217,133,216,177,216,169,216,
+167,217,134,216,170,216,167,217,132,217,129,216,163,216,168,217,136,216,174,216,
+167,216,181,216,163,217,134,216,170,216,167,217,134,217,135,216,167,217,132,217,
+138,216,185,216,182,217,136,217,136,217,130,216,175,216,167,216,168,217,134,216,
+174,217,138,216,177,216,168,217,134,216,170,217,132,217,131,217,133,216,180,216,
+167,216,161,217,136,217,135,217,138,216,167,216,168,217,136,217,130,216,181,216,
+181,217,136,217,133,216,167,216,177,217,130,217,133,216,163,216,173,216,175,217,
+134,216,173,217,134,216,185,216,175,217,133,216,177,216,163,217,138,216,167,216,
+173,216,169,217,131,216,170,216,168,216,175,217,136,217,134,217,138,216,172,216,
+168,217,133,217,134,217,135,216,170,216,173,216,170,216,172,217,135,216,169,216,
+179,217,134,216,169,217,138,216,170,217,133,217,131,216,177,216,169,216,186,216,
+178,216,169,217,134,217,129,216,179,216,168,217,138,216,170,217,132,217,132,217,
+135,217,132,217,134,216,167,216,170,217,132,217,131,217,130,217,132,216,168,217,
+132,217,133,216,167,216,185,217,134,217,135,216,163,217,136,217,132,216,180,217,
+138,216,161,217,134,217,136,216,177,216,163,217,133,216,167,217,129,217,138,217,
+131,216,168,217,131,217,132,216,176,216,167,216,170,216,177,216,170,216,168,216,
+168,216,163,217,134,217,135,217,133,216,179,216,167,217,134,217,131,216,168,217,
+138,216,185,217,129,217,130,216,175,216,173,216,179,217,134,217,132,217,135,217,
+133,216,180,216,185,216,177,216,163,217,135,217,132,216,180,217,135,216,177,217,
+130,216,183,216,177,216,183,217,132,216,168,112,114,111,102,105,108,101,115,101,
+114,118,105,99,101,100,101,102,97,117,108,116,104,105,109,115,101,108,102,100,
+101,116,97,105,108,115,99,111,110,116,101,110,116,115,117,112,112,111,114,116,
+115,116,97,114,116,101,100,109,101,115,115,97,103,101,115,117,99,99,101,115,115,
+102,97,115,104,105,111,110,60,116,105,116,108,101,62,99,111,117,110,116,114,121,
+97,99,99,111,117,110,116,99,114,101,97,116,101,100,115,116,111,114,105,101,115,
+114,101,115,117,108,116,115,114,117,110,110,105,110,103,112,114,111,99,101,115,
+115,119,114,105,116,105,110,103,111,98,106,101,99,116,115,118,105,115,105,98,108
+,101,119,101,108,99,111,109,101,97,114,116,105,99,108,101,117,110,107,110,111,
+119,110,110,101,116,119,111,114,107,99,111,109,112,97,110,121,100,121,110,97,109
+,105,99,98,114,111,119,115,101,114,112,114,105,118,97,99,121,112,114,111,98,108,
+101,109,83,101,114,118,105,99,101,114,101,115,112,101,99,116,100,105,115,112,108
+,97,121,114,101,113,117,101,115,116,114,101,115,101,114,118,101,119,101,98,115,
+105,116,101,104,105,115,116,111,114,121,102,114,105,101,110,100,115,111,112,116,
+105,111,110,115,119,111,114,107,105,110,103,118,101,114,115,105,111,110,109,105,
+108,108,105,111,110,99,104,97,110,110,101,108,119,105,110,100,111,119,46,97,100,
+100,114,101,115,115,118,105,115,105,116,101,100,119,101,97,116,104,101,114,99,
+111,114,114,101,99,116,112,114,111,100,117,99,116,101,100,105,114,101,99,116,102
+,111,114,119,97,114,100,121,111,117,32,99,97,110,114,101,109,111,118,101,100,115
+,117,98,106,101,99,116,99,111,110,116,114,111,108,97,114,99,104,105,118,101,99,
+117,114,114,101,110,116,114,101,97,100,105,110,103,108,105,98,114,97,114,121,108
+,105,109,105,116,101,100,109,97,110,97,103,101,114,102,117,114,116,104,101,114,
+115,117,109,109,97,114,121,109,97,99,104,105,110,101,109,105,110,117,116,101,115
+,112,114,105,118,97,116,101,99,111,110,116,101,120,116,112,114,111,103,114,97,
+109,115,111,99,105,101,116,121,110,117,109,98,101,114,115,119,114,105,116,116,
+101,110,101,110,97,98,108,101,100,116,114,105,103,103,101,114,115,111,117,114,99
+,101,115,108,111,97,100,105,110,103,101,108,101,109,101,110,116,112,97,114,116,
+110,101,114,102,105,110,97,108,108,121,112,101,114,102,101,99,116,109,101,97,110
+,105,110,103,115,121,115,116,101,109,115,107,101,101,112,105,110,103,99,117,108,
+116,117,114,101,38,113,117,111,116,59,44,106,111,117,114,110,97,108,112,114,111,
+106,101,99,116,115,117,114,102,97,99,101,115,38,113,117,111,116,59,101,120,112,
+105,114,101,115,114,101,118,105,101,119,115,98,97,108,97,110,99,101,69,110,103,
+108,105,115,104,67,111,110,116,101,110,116,116,104,114,111,117,103,104,80,108,
+101,97,115,101,32,111,112,105,110,105,111,110,99,111,110,116,97,99,116,97,118,
+101,114,97,103,101,112,114,105,109,97,114,121,118,105,108,108,97,103,101,83,112,
+97,110,105,115,104,103,97,108,108,101,114,121,100,101,99,108,105,110,101,109,101
+,101,116,105,110,103,109,105,115,115,105,111,110,112,111,112,117,108,97,114,113,
+117,97,108,105,116,121,109,101,97,115,117,114,101,103,101,110,101,114,97,108,115
+,112,101,99,105,101,115,115,101,115,115,105,111,110,115,101,99,116,105,111,110,
+119,114,105,116,101,114,115,99,111,117,110,116,101,114,105,110,105,116,105,97,
+108,114,101,112,111,114,116,115,102,105,103,117,114,101,115,109,101,109,98,101,
+114,115,104,111,108,100,105,110,103,100,105,115,112,117,116,101,101,97,114,108,
+105,101,114,101,120,112,114,101,115,115,100,105,103,105,116,97,108,112,105,99,
+116,117,114,101,65,110,111,116,104,101,114,109,97,114,114,105,101,100,116,114,97
+,102,102,105,99,108,101,97,100,105,110,103,99,104,97,110,103,101,100,99,101,110,
+116,114,97,108,118,105,99,116,111,114,121,105,109,97,103,101,115,47,114,101,97,
+115,111,110,115,115,116,117,100,105,101,115,102,101,97,116,117,114,101,108,105,
+115,116,105,110,103,109,117,115,116,32,98,101,115,99,104,111,111,108,115,86,101,
+114,115,105,111,110,117,115,117,97,108,108,121,101,112,105,115,111,100,101,112,
+108,97,121,105,110,103,103,114,111,119,105,110,103,111,98,118,105,111,117,115,
+111,118,101,114,108,97,121,112,114,101,115,101,110,116,97,99,116,105,111,110,115
+,60,47,117,108,62,13,10,119,114,97,112,112,101,114,97,108,114,101,97,100,121,99,
+101,114,116,97,105,110,114,101,97,108,105,116,121,115,116,111,114,97,103,101,97,
+110,111,116,104,101,114,100,101,115,107,116,111,112,111,102,102,101,114,101,100,
+112,97,116,116,101,114,110,117,110,117,115,117,97,108,68,105,103,105,116,97,108,
+99,97,112,105,116,97,108,87,101,98,115,105,116,101,102,97,105,108,117,114,101,99
+,111,110,110,101,99,116,114,101,100,117,99,101,100,65,110,100,114,111,105,100,
+100,101,99,97,100,101,115,114,101,103,117,108,97,114,32,38,97,109,112,59,32,97,
+110,105,109,97,108,115,114,101,108,101,97,115,101,65,117,116,111,109,97,116,103,
+101,116,116,105,110,103,109,101,116,104,111,100,115,110,111,116,104,105,110,103,
+80,111,112,117,108,97,114,99,97,112,116,105,111,110,108,101,116,116,101,114,115,
+99,97,112,116,117,114,101,115,99,105,101,110,99,101,108,105,99,101,110,115,101,
+99,104,97,110,103,101,115,69,110,103,108,97,110,100,61,49,38,97,109,112,59,72,
+105,115,116,111,114,121,32,61,32,110,101,119,32,67,101,110,116,114,97,108,117,
+112,100,97,116,101,100,83,112,101,99,105,97,108,78,101,116,119,111,114,107,114,
+101,113,117,105,114,101,99,111,109,109,101,110,116,119,97,114,110,105,110,103,67
+,111,108,108,101,103,101,116,111,111,108,98,97,114,114,101,109,97,105,110,115,98
+,101,99,97,117,115,101,101,108,101,99,116,101,100,68,101,117,116,115,99,104,102,
+105,110,97,110,99,101,119,111,114,107,101,114,115,113,117,105,99,107,108,121,98,
+101,116,119,101,101,110,101,120,97,99,116,108,121,115,101,116,116,105,110,103,
+100,105,115,101,97,115,101,83,111,99,105,101,116,121,119,101,97,112,111,110,115,
+101,120,104,105,98,105,116,38,108,116,59,33,45,45,67,111,110,116,114,111,108,99,
+108,97,115,115,101,115,99,111,118,101,114,101,100,111,117,116,108,105,110,101,97
+,116,116,97,99,107,115,100,101,118,105,99,101,115,40,119,105,110,100,111,119,112
+,117,114,112,111,115,101,116,105,116,108,101,61,34,77,111,98,105,108,101,32,107,
+105,108,108,105,110,103,115,104,111,119,105,110,103,73,116,97,108,105,97,110,100
+,114,111,112,112,101,100,104,101,97,118,105,108,121,101,102,102,101,99,116,115,
+45,49,39,93,41,59,10,99,111,110,102,105,114,109,67,117,114,114,101,110,116,97,
+100,118,97,110,99,101,115,104,97,114,105,110,103,111,112,101,110,105,110,103,100
+,114,97,119,105,110,103,98,105,108,108,105,111,110,111,114,100,101,114,101,100,
+71,101,114,109,97,110,121,114,101,108,97,116,101,100,60,47,102,111,114,109,62,
+105,110,99,108,117,100,101,119,104,101,116,104,101,114,100,101,102,105,110,101,
+100,83,99,105,101,110,99,101,99,97,116,97,108,111,103,65,114,116,105,99,108,101,
+98,117,116,116,111,110,115,108,97,114,103,101,115,116,117,110,105,102,111,114,
+109,106,111,117,114,110,101,121,115,105,100,101,98,97,114,67,104,105,99,97,103,
+111,104,111,108,105,100,97,121,71,101,110,101,114,97,108,112,97,115,115,97,103,
+101,44,38,113,117,111,116,59,97,110,105,109,97,116,101,102,101,101,108,105,110,
+103,97,114,114,105,118,101,100,112,97,115,115,105,110,103,110,97,116,117,114,97,
+108,114,111,117,103,104,108,121,46,10,10,84,104,101,32,98,117,116,32,110,111,116
+,100,101,110,115,105,116,121,66,114,105,116,97,105,110,67,104,105,110,101,115,
+101,108,97,99,107,32,111,102,116,114,105,98,117,116,101,73,114,101,108,97,110,
+100,34,32,100,97,116,97,45,102,97,99,116,111,114,115,114,101,99,101,105,118,101,
+116,104,97,116,32,105,115,76,105,98,114,97,114,121,104,117,115,98,97,110,100,105
+,110,32,102,97,99,116,97,102,102,97,105,114,115,67,104,97,114,108,101,115,114,97
+,100,105,99,97,108,98,114,111,117,103,104,116,102,105,110,100,105,110,103,108,97
+,110,100,105,110,103,58,108,97,110,103,61,34,114,101,116,117,114,110,32,108,101,
+97,100,101,114,115,112,108,97,110,110,101,100,112,114,101,109,105,117,109,112,97
+,99,107,97,103,101,65,109,101,114,105,99,97,69,100,105,116,105,111,110,93,38,113
+,117,111,116,59,77,101,115,115,97,103,101,110,101,101,100,32,116,111,118,97,108,
+117,101,61,34,99,111,109,112,108,101,120,108,111,111,107,105,110,103,115,116,97,
+116,105,111,110,98,101,108,105,101,118,101,115,109,97,108,108,101,114,45,109,111
+,98,105,108,101,114,101,99,111,114,100,115,119,97,110,116,32,116,111,107,105,110
+,100,32,111,102,70,105,114,101,102,111,120,121,111,117,32,97,114,101,115,105,109
+,105,108,97,114,115,116,117,100,105,101,100,109,97,120,105,109,117,109,104,101,
+97,100,105,110,103,114,97,112,105,100,108,121,99,108,105,109,97,116,101,107,105,
+110,103,100,111,109,101,109,101,114,103,101,100,97,109,111,117,110,116,115,102,
+111,117,110,100,101,100,112,105,111,110,101,101,114,102,111,114,109,117,108,97,
+100,121,110,97,115,116,121,104,111,119,32,116,111,32,83,117,112,112,111,114,116,
+114,101,118,101,110,117,101,101,99,111,110,111,109,121,82,101,115,117,108,116,
+115,98,114,111,116,104,101,114,115,111,108,100,105,101,114,108,97,114,103,101,
+108,121,99,97,108,108,105,110,103,46,38,113,117,111,116,59,65,99,99,111,117,110,
+116,69,100,119,97,114,100,32,115,101,103,109,101,110,116,82,111,98,101,114,116,
+32,101,102,102,111,114,116,115,80,97,99,105,102,105,99,108,101,97,114,110,101,
+100,117,112,32,119,105,116,104,104,101,105,103,104,116,58,119,101,32,104,97,118,
+101,65,110,103,101,108,101,115,110,97,116,105,111,110,115,95,115,101,97,114,99,
+104,97,112,112,108,105,101,100,97,99,113,117,105,114,101,109,97,115,115,105,118,
+101,103,114,97,110,116,101,100,58,32,102,97,108,115,101,116,114,101,97,116,101,
+100,98,105,103,103,101,115,116,98,101,110,101,102,105,116,100,114,105,118,105,
+110,103,83,116,117,100,105,101,115,109,105,110,105,109,117,109,112,101,114,104,
+97,112,115,109,111,114,110,105,110,103,115,101,108,108,105,110,103,105,115,32,
+117,115,101,100,114,101,118,101,114,115,101,118,97,114,105,97,110,116,32,114,111
+,108,101,61,34,109,105,115,115,105,110,103,97,99,104,105,101,118,101,112,114,111
+,109,111,116,101,115,116,117,100,101,110,116,115,111,109,101,111,110,101,101,120
+,116,114,101,109,101,114,101,115,116,111,114,101,98,111,116,116,111,109,58,101,
+118,111,108,118,101,100,97,108,108,32,116,104,101,115,105,116,101,109,97,112,101
+,110,103,108,105,115,104,119,97,121,32,116,111,32,32,65,117,103,117,115,116,115,
+121,109,98,111,108,115,67,111,109,112,97,110,121,109,97,116,116,101,114,115,109,
+117,115,105,99,97,108,97,103,97,105,110,115,116,115,101,114,118,105,110,103,125,
+41,40,41,59,13,10,112,97,121,109,101,110,116,116,114,111,117,98,108,101,99,111,
+110,99,101,112,116,99,111,109,112,97,114,101,112,97,114,101,110,116,115,112,108,
+97,121,101,114,115,114,101,103,105,111,110,115,109,111,110,105,116,111,114,32,39
+,39,84,104,101,32,119,105,110,110,105,110,103,101,120,112,108,111,114,101,97,100
+,97,112,116,101,100,71,97,108,108,101,114,121,112,114,111,100,117,99,101,97,98,
+105,108,105,116,121,101,110,104,97,110,99,101,99,97,114,101,101,114,115,41,46,32
+,84,104,101,32,99,111,108,108,101,99,116,83,101,97,114,99,104,32,97,110,99,105,
+101,110,116,101,120,105,115,116,101,100,102,111,111,116,101,114,32,104,97,110,
+100,108,101,114,112,114,105,110,116,101,100,99,111,110,115,111,108,101,69,97,115
+,116,101,114,110,101,120,112,111,114,116,115,119,105,110,100,111,119,115,67,104,
+97,110,110,101,108,105,108,108,101,103,97,108,110,101,117,116,114,97,108,115,117
+,103,103,101,115,116,95,104,101,97,100,101,114,115,105,103,110,105,110,103,46,
+104,116,109,108,34,62,115,101,116,116,108,101,100,119,101,115,116,101,114,110,99
+,97,117,115,105,110,103,45,119,101,98,107,105,116,99,108,97,105,109,101,100,74,
+117,115,116,105,99,101,99,104,97,112,116,101,114,118,105,99,116,105,109,115,84,
+104,111,109,97,115,32,109,111,122,105,108,108,97,112,114,111,109,105,115,101,112
+,97,114,116,105,101,115,101,100,105,116,105,111,110,111,117,116,115,105,100,101,
+58,102,97,108,115,101,44,104,117,110,100,114,101,100,79,108,121,109,112,105,99,
+95,98,117,116,116,111,110,97,117,116,104,111,114,115,114,101,97,99,104,101,100,
+99,104,114,111,110,105,99,100,101,109,97,110,100,115,115,101,99,111,110,100,115,
+112,114,111,116,101,99,116,97,100,111,112,116,101,100,112,114,101,112,97,114,101
+,110,101,105,116,104,101,114,103,114,101,97,116,108,121,103,114,101,97,116,101,
+114,111,118,101,114,97,108,108,105,109,112,114,111,118,101,99,111,109,109,97,110
+,100,115,112,101,99,105,97,108,115,101,97,114,99,104,46,119,111,114,115,104,105,
+112,102,117,110,100,105,110,103,116,104,111,117,103,104,116,104,105,103,104,101,
+115,116,105,110,115,116,101,97,100,117,116,105,108,105,116,121,113,117,97,114,
+116,101,114,67,117,108,116,117,114,101,116,101,115,116,105,110,103,99,108,101,97
+,114,108,121,101,120,112,111,115,101,100,66,114,111,119,115,101,114,108,105,98,
+101,114,97,108,125,32,99,97,116,99,104,80,114,111,106,101,99,116,101,120,97,109,
+112,108,101,104,105,100,101,40,41,59,70,108,111,114,105,100,97,97,110,115,119,
+101,114,115,97,108,108,111,119,101,100,69,109,112,101,114,111,114,100,101,102,
+101,110,115,101,115,101,114,105,111,117,115,102,114,101,101,100,111,109,83,101,
+118,101,114,97,108,45,98,117,116,116,111,110,70,117,114,116,104,101,114,111,117,
+116,32,111,102,32,33,61,32,110,117,108,108,116,114,97,105,110,101,100,68,101,110
+,109,97,114,107,118,111,105,100,40,48,41,47,97,108,108,46,106,115,112,114,101,
+118,101,110,116,82,101,113,117,101,115,116,83,116,101,112,104,101,110,10,10,87,
+104,101,110,32,111,98,115,101,114,118,101,60,47,104,50,62,13,10,77,111,100,101,
+114,110,32,112,114,111,118,105,100,101,34,32,97,108,116,61,34,98,111,114,100,101
+,114,115,46,10,10,70,111,114,32,10,10,77,97,110,121,32,97,114,116,105,115,116,
+115,112,111,119,101,114,101,100,112,101,114,102,111,114,109,102,105,99,116,105,
+111,110,116,121,112,101,32,111,102,109,101,100,105,99,97,108,116,105,99,107,101,
+116,115,111,112,112,111,115,101,100,67,111,117,110,99,105,108,119,105,116,110,
+101,115,115,106,117,115,116,105,99,101,71,101,111,114,103,101,32,66,101,108,103,
+105,117,109,46,46,46,60,47,97,62,116,119,105,116,116,101,114,110,111,116,97,98,
+108,121,119,97,105,116,105,110,103,119,97,114,102,97,114,101,32,79,116,104,101,
+114,32,114,97,110,107,105,110,103,112,104,114,97,115,101,115,109,101,110,116,105
+,111,110,115,117,114,118,105,118,101,115,99,104,111,108,97,114,60,47,112,62,13,
+10,32,67,111,117,110,116,114,121,105,103,110,111,114,101,100,108,111,115,115,32,
+111,102,106,117,115,116,32,97,115,71,101,111,114,103,105,97,115,116,114,97,110,
+103,101,60,104,101,97,100,62,60,115,116,111,112,112,101,100,49,39,93,41,59,13,10
+,105,115,108,97,110,100,115,110,111,116,97,98,108,101,98,111,114,100,101,114,58,
+108,105,115,116,32,111,102,99,97,114,114,105,101,100,49,48,48,44,48,48,48,60,47,
+104,51,62,10,32,115,101,118,101,114,97,108,98,101,99,111,109,101,115,115,101,108
+,101,99,116,32,119,101,100,100,105,110,103,48,48,46,104,116,109,108,109,111,110,
+97,114,99,104,111,102,102,32,116,104,101,116,101,97,99,104,101,114,104,105,103,
+104,108,121,32,98,105,111,108,111,103,121,108,105,102,101,32,111,102,111,114,32,
+101,118,101,110,114,105,115,101,32,111,102,38,114,97,113,117,111,59,112,108,117,
+115,111,110,101,104,117,110,116,105,110,103,40,116,104,111,117,103,104,68,111,
+117,103,108,97,115,106,111,105,110,105,110,103,99,105,114,99,108,101,115,70,111,
+114,32,116,104,101,65,110,99,105,101,110,116,86,105,101,116,110,97,109,118,101,
+104,105,99,108,101,115,117,99,104,32,97,115,99,114,121,115,116,97,108,118,97,108
+,117,101,32,61,87,105,110,100,111,119,115,101,110,106,111,121,101,100,97,32,115,
+109,97,108,108,97,115,115,117,109,101,100,60,97,32,105,100,61,34,102,111,114,101
+,105,103,110,32,65,108,108,32,114,105,104,111,119,32,116,104,101,68,105,115,112,
+108,97,121,114,101,116,105,114,101,100,104,111,119,101,118,101,114,104,105,100,
+100,101,110,59,98,97,116,116,108,101,115,115,101,101,107,105,110,103,99,97,98,
+105,110,101,116,119,97,115,32,110,111,116,108,111,111,107,32,97,116,99,111,110,
+100,117,99,116,103,101,116,32,116,104,101,74,97,110,117,97,114,121,104,97,112,
+112,101,110,115,116,117,114,110,105,110,103,97,58,104,111,118,101,114,79,110,108
+,105,110,101,32,70,114,101,110,99,104,32,108,97,99,107,105,110,103,116,121,112,
+105,99,97,108,101,120,116,114,97,99,116,101,110,101,109,105,101,115,101,118,101,
+110,32,105,102,103,101,110,101,114,97,116,100,101,99,105,100,101,100,97,114,101,
+32,110,111,116,47,115,101,97,114,99,104,98,101,108,105,101,102,115,45,105,109,97
+,103,101,58,108,111,99,97,116,101,100,115,116,97,116,105,99,46,108,111,103,105,
+110,34,62,99,111,110,118,101,114,116,118,105,111,108,101,110,116,101,110,116,101
+,114,101,100,102,105,114,115,116,34,62,99,105,114,99,117,105,116,70,105,110,108,
+97,110,100,99,104,101,109,105,115,116,115,104,101,32,119,97,115,49,48,112,120,59
+,34,62,97,115,32,115,117,99,104,100,105,118,105,100,101,100,60,47,115,112,97,110
+,62,119,105,108,108,32,98,101,108,105,110,101,32,111,102,97,32,103,114,101,97,
+116,109,121,115,116,101,114,121,47,105,110,100,101,120,46,102,97,108,108,105,110
+,103,100,117,101,32,116,111,32,114,97,105,108,119,97,121,99,111,108,108,101,103,
+101,109,111,110,115,116,101,114,100,101,115,99,101,110,116,105,116,32,119,105,
+116,104,110,117,99,108,101,97,114,74,101,119,105,115,104,32,112,114,111,116,101,
+115,116,66,114,105,116,105,115,104,102,108,111,119,101,114,115,112,114,101,100,
+105,99,116,114,101,102,111,114,109,115,98,117,116,116,111,110,32,119,104,111,32,
+119,97,115,108,101,99,116,117,114,101,105,110,115,116,97,110,116,115,117,105,99,
+105,100,101,103,101,110,101,114,105,99,112,101,114,105,111,100,115,109,97,114,
+107,101,116,115,83,111,99,105,97,108,32,102,105,115,104,105,110,103,99,111,109,
+98,105,110,101,103,114,97,112,104,105,99,119,105,110,110,101,114,115,60,98,114,
+32,47,62,60,98,121,32,116,104,101,32,78,97,116,117,114,97,108,80,114,105,118,97,
+99,121,99,111,111,107,105,101,115,111,117,116,99,111,109,101,114,101,115,111,108
+,118,101,83,119,101,100,105,115,104,98,114,105,101,102,108,121,80,101,114,115,
+105,97,110,115,111,32,109,117,99,104,67,101,110,116,117,114,121,100,101,112,105,
+99,116,115,99,111,108,117,109,110,115,104,111,117,115,105,110,103,115,99,114,105
+,112,116,115,110,101,120,116,32,116,111,98,101,97,114,105,110,103,109,97,112,112
+,105,110,103,114,101,118,105,115,101,100,106,81,117,101,114,121,40,45,119,105,
+100,116,104,58,116,105,116,108,101,34,62,116,111,111,108,116,105,112,83,101,99,
+116,105,111,110,100,101,115,105,103,110,115,84,117,114,107,105,115,104,121,111,
+117,110,103,101,114,46,109,97,116,99,104,40,125,41,40,41,59,10,10,98,117,114,110
+,105,110,103,111,112,101,114,97,116,101,100,101,103,114,101,101,115,115,111,117,
+114,99,101,61,82,105,99,104,97,114,100,99,108,111,115,101,108,121,112,108,97,115
+,116,105,99,101,110,116,114,105,101,115,60,47,116,114,62,13,10,99,111,108,111,
+114,58,35,117,108,32,105,100,61,34,112,111,115,115,101,115,115,114,111,108,108,
+105,110,103,112,104,121,115,105,99,115,102,97,105,108,105,110,103,101,120,101,99
+,117,116,101,99,111,110,116,101,115,116,108,105,110,107,32,116,111,68,101,102,97
+,117,108,116,60,98,114,32,47,62,10,58,32,116,114,117,101,44,99,104,97,114,116,
+101,114,116,111,117,114,105,115,109,99,108,97,115,115,105,99,112,114,111,99,101,
+101,100,101,120,112,108,97,105,110,60,47,104,49,62,13,10,111,110,108,105,110,101
+,46,63,120,109,108,32,118,101,104,101,108,112,105,110,103,100,105,97,109,111,110
+,100,117,115,101,32,116,104,101,97,105,114,108,105,110,101,101,110,100,32,45,45,
+62,41,46,97,116,116,114,40,114,101,97,100,101,114,115,104,111,115,116,105,110,
+103,35,102,102,102,102,102,102,114,101,97,108,105,122,101,86,105,110,99,101,110,
+116,115,105,103,110,97,108,115,32,115,114,99,61,34,47,80,114,111,100,117,99,116,
+100,101,115,112,105,116,101,100,105,118,101,114,115,101,116,101,108,108,105,110,
+103,80,117,98,108,105,99,32,104,101,108,100,32,105,110,74,111,115,101,112,104,32
+,116,104,101,97,116,114,101,97,102,102,101,99,116,115,60,115,116,121,108,101,62,
+97,32,108,97,114,103,101,100,111,101,115,110,39,116,108,97,116,101,114,44,32,69,
+108,101,109,101,110,116,102,97,118,105,99,111,110,99,114,101,97,116,111,114,72,
+117,110,103,97,114,121,65,105,114,112,111,114,116,115,101,101,32,116,104,101,115
+,111,32,116,104,97,116,77,105,99,104,97,101,108,83,121,115,116,101,109,115,80,
+114,111,103,114,97,109,115,44,32,97,110,100,32,32,119,105,100,116,104,61,101,38,
+113,117,111,116,59,116,114,97,100,105,110,103,108,101,102,116,34,62,10,112,101,
+114,115,111,110,115,71,111,108,100,101,110,32,65,102,102,97,105,114,115,103,114,
+97,109,109,97,114,102,111,114,109,105,110,103,100,101,115,116,114,111,121,105,
+100,101,97,32,111,102,99,97,115,101,32,111,102,111,108,100,101,115,116,32,116,
+104,105,115,32,105,115,46,115,114,99,32,61,32,99,97,114,116,111,111,110,114,101,
+103,105,115,116,114,67,111,109,109,111,110,115,77,117,115,108,105,109,115,87,104
+,97,116,32,105,115,105,110,32,109,97,110,121,109,97,114,107,105,110,103,114,101,
+118,101,97,108,115,73,110,100,101,101,100,44,101,113,117,97,108,108,121,47,115,
+104,111,119,95,97,111,117,116,100,111,111,114,101,115,99,97,112,101,40,65,117,
+115,116,114,105,97,103,101,110,101,116,105,99,115,121,115,116,101,109,44,73,110,
+32,116,104,101,32,115,105,116,116,105,110,103,72,101,32,97,108,115,111,73,115,
+108,97,110,100,115,65,99,97,100,101,109,121,10,9,9,60,33,45,45,68,97,110,105,101
+,108,32,98,105,110,100,105,110,103,98,108,111,99,107,34,62,105,109,112,111,115,
+101,100,117,116,105,108,105,122,101,65,98,114,97,104,97,109,40,101,120,99,101,
+112,116,123,119,105,100,116,104,58,112,117,116,116,105,110,103,41,46,104,116,109
+,108,40,124,124,32,91,93,59,10,68,65,84,65,91,32,42,107,105,116,99,104,101,110,
+109,111,117,110,116,101,100,97,99,116,117,97,108,32,100,105,97,108,101,99,116,
+109,97,105,110,108,121,32,95,98,108,97,110,107,39,105,110,115,116,97,108,108,101
+,120,112,101,114,116,115,105,102,40,116,121,112,101,73,116,32,97,108,115,111,38,
+99,111,112,121,59,32,34,62,84,101,114,109,115,98,111,114,110,32,105,110,79,112,
+116,105,111,110,115,101,97,115,116,101,114,110,116,97,108,107,105,110,103,99,111
+,110,99,101,114,110,103,97,105,110,101,100,32,111,110,103,111,105,110,103,106,
+117,115,116,105,102,121,99,114,105,116,105,99,115,102,97,99,116,111,114,121,105,
+116,115,32,111,119,110,97,115,115,97,117,108,116,105,110,118,105,116,101,100,108
+,97,115,116,105,110,103,104,105,115,32,111,119,110,104,114,101,102,61,34,47,34,
+32,114,101,108,61,34,100,101,118,101,108,111,112,99,111,110,99,101,114,116,100,
+105,97,103,114,97,109,100,111,108,108,97,114,115,99,108,117,115,116,101,114,112,
+104,112,63,105,100,61,97,108,99,111,104,111,108,41,59,125,41,40,41,59,117,115,
+105,110,103,32,97,62,60,115,112,97,110,62,118,101,115,115,101,108,115,114,101,
+118,105,118,97,108,65,100,100,114,101,115,115,97,109,97,116,101,117,114,97,110,
+100,114,111,105,100,97,108,108,101,103,101,100,105,108,108,110,101,115,115,119,
+97,108,107,105,110,103,99,101,110,116,101,114,115,113,117,97,108,105,102,121,109
+,97,116,99,104,101,115,117,110,105,102,105,101,100,101,120,116,105,110,99,116,68
+,101,102,101,110,115,101,100,105,101,100,32,105,110,10,9,60,33,45,45,32,99,117,
+115,116,111,109,115,108,105,110,107,105,110,103,76,105,116,116,108,101,32,66,111
+,111,107,32,111,102,101,118,101,110,105,110,103,109,105,110,46,106,115,63,97,114
+,101,32,116,104,101,107,111,110,116,97,107,116,116,111,100,97,121,39,115,46,104,
+116,109,108,34,32,116,97,114,103,101,116,61,119,101,97,114,105,110,103,65,108,
+108,32,82,105,103,59,10,125,41,40,41,59,114,97,105,115,105,110,103,32,65,108,115
+,111,44,32,99,114,117,99,105,97,108,97,98,111,117,116,34,62,100,101,99,108,97,
+114,101,45,45,62,10,60,115,99,102,105,114,101,102,111,120,97,115,32,109,117,99,
+104,97,112,112,108,105,101,115,105,110,100,101,120,44,32,115,44,32,98,117,116,32
+,116,121,112,101,32,61,32,10,13,10,60,33,45,45,116,111,119,97,114,100,115,82,101
+,99,111,114,100,115,80,114,105,118,97,116,101,70,111,114,101,105,103,110,80,114,
+101,109,105,101,114,99,104,111,105,99,101,115,86,105,114,116,117,97,108,114,101,
+116,117,114,110,115,67,111,109,109,101,110,116,80,111,119,101,114,101,100,105,
+110,108,105,110,101,59,112,111,118,101,114,116,121,99,104,97,109,98,101,114,76,
+105,118,105,110,103,32,118,111,108,117,109,101,115,65,110,116,104,111,110,121,
+108,111,103,105,110,34,32,82,101,108,97,116,101,100,69,99,111,110,111,109,121,
+114,101,97,99,104,101,115,99,117,116,116,105,110,103,103,114,97,118,105,116,121,
+108,105,102,101,32,105,110,67,104,97,112,116,101,114,45,115,104,97,100,111,119,
+78,111,116,97,98,108,101,60,47,116,100,62,13,10,32,114,101,116,117,114,110,115,
+116,97,100,105,117,109,119,105,100,103,101,116,115,118,97,114,121,105,110,103,
+116,114,97,118,101,108,115,104,101,108,100,32,98,121,119,104,111,32,97,114,101,
+119,111,114,107,32,105,110,102,97,99,117,108,116,121,97,110,103,117,108,97,114,
+119,104,111,32,104,97,100,97,105,114,112,111,114,116,116,111,119,110,32,111,102,
+10,10,83,111,109,101,32,39,99,108,105,99,107,39,99,104,97,114,103,101,115,107,
+101,121,119,111,114,100,105,116,32,119,105,108,108,99,105,116,121,32,111,102,40,
+116,104,105,115,41,59,65,110,100,114,101,119,32,117,110,105,113,117,101,32,99,
+104,101,99,107,101,100,111,114,32,109,111,114,101,51,48,48,112,120,59,32,114,101
+,116,117,114,110,59,114,115,105,111,110,61,34,112,108,117,103,105,110,115,119,
+105,116,104,105,110,32,104,101,114,115,101,108,102,83,116,97,116,105,111,110,70,
+101,100,101,114,97,108,118,101,110,116,117,114,101,112,117,98,108,105,115,104,
+115,101,110,116,32,116,111,116,101,110,115,105,111,110,97,99,116,114,101,115,115
+,99,111,109,101,32,116,111,102,105,110,103,101,114,115,68,117,107,101,32,111,102
+,112,101,111,112,108,101,44,101,120,112,108,111,105,116,119,104,97,116,32,105,
+115,104,97,114,109,111,110,121,97,32,109,97,106,111,114,34,58,34,104,116,116,112
+,105,110,32,104,105,115,32,109,101,110,117,34,62,10,109,111,110,116,104,108,121,
+111,102,102,105,99,101,114,99,111,117,110,99,105,108,103,97,105,110,105,110,103,
+101,118,101,110,32,105,110,83,117,109,109,97,114,121,100,97,116,101,32,111,102,
+108,111,121,97,108,116,121,102,105,116,110,101,115,115,97,110,100,32,119,97,115,
+101,109,112,101,114,111,114,115,117,112,114,101,109,101,83,101,99,111,110,100,32
+,104,101,97,114,105,110,103,82,117,115,115,105,97,110,108,111,110,103,101,115,
+116,65,108,98,101,114,116,97,108,97,116,101,114,97,108,115,101,116,32,111,102,32
+,115,109,97,108,108,34,62,46,97,112,112,101,110,100,100,111,32,119,105,116,104,
+102,101,100,101,114,97,108,98,97,110,107,32,111,102,98,101,110,101,97,116,104,68
+,101,115,112,105,116,101,67,97,112,105,116,97,108,103,114,111,117,110,100,115,41
+,44,32,97,110,100,32,112,101,114,99,101,110,116,105,116,32,102,114,111,109,99,
+108,111,115,105,110,103,99,111,110,116,97,105,110,73,110,115,116,101,97,100,102,
+105,102,116,101,101,110,97,115,32,119,101,108,108,46,121,97,104,111,111,46,114,
+101,115,112,111,110,100,102,105,103,104,116,101,114,111,98,115,99,117,114,101,
+114,101,102,108,101,99,116,111,114,103,97,110,105,99,61,32,77,97,116,104,46,101,
+100,105,116,105,110,103,111,110,108,105,110,101,32,112,97,100,100,105,110,103,97
+,32,119,104,111,108,101,111,110,101,114,114,111,114,121,101,97,114,32,111,102,
+101,110,100,32,111,102,32,98,97,114,114,105,101,114,119,104,101,110,32,105,116,
+104,101,97,100,101,114,32,104,111,109,101,32,111,102,114,101,115,117,109,101,100
+,114,101,110,97,109,101,100,115,116,114,111,110,103,62,104,101,97,116,105,110,
+103,114,101,116,97,105,110,115,99,108,111,117,100,102,114,119,97,121,32,111,102,
+32,77,97,114,99,104,32,49,107,110,111,119,105,110,103,105,110,32,112,97,114,116,
+66,101,116,119,101,101,110,108,101,115,115,111,110,115,99,108,111,115,101,115,
+116,118,105,114,116,117,97,108,108,105,110,107,115,34,62,99,114,111,115,115,101,
+100,69,78,68,32,45,45,62,102,97,109,111,117,115,32,97,119,97,114,100,101,100,76,
+105,99,101,110,115,101,72,101,97,108,116,104,32,102,97,105,114,108,121,32,119,
+101,97,108,116,104,121,109,105,110,105,109,97,108,65,102,114,105,99,97,110,99,
+111,109,112,101,116,101,108,97,98,101,108,34,62,115,105,110,103,105,110,103,102,
+97,114,109,101,114,115,66,114,97,115,105,108,41,100,105,115,99,117,115,115,114,
+101,112,108,97,99,101,71,114,101,103,111,114,121,102,111,110,116,32,99,111,112,
+117,114,115,117,101,100,97,112,112,101,97,114,115,109,97,107,101,32,117,112,114,
+111,117,110,100,101,100,98,111,116,104,32,111,102,98,108,111,99,107,101,100,115,
+97,119,32,116,104,101,111,102,102,105,99,101,115,99,111,108,111,117,114,115,105,
+102,40,100,111,99,117,119,104,101,110,32,104,101,101,110,102,111,114,99,101,112,
+117,115,104,40,102,117,65,117,103,117,115,116,32,85,84,70,45,56,34,62,70,97,110,
+116,97,115,121,105,110,32,109,111,115,116,105,110,106,117,114,101,100,85,115,117
+,97,108,108,121,102,97,114,109,105,110,103,99,108,111,115,117,114,101,111,98,106
+,101,99,116,32,100,101,102,101,110,99,101,117,115,101,32,111,102,32,77,101,100,
+105,99,97,108,60,98,111,100,121,62,10,101,118,105,100,101,110,116,98,101,32,117,
+115,101,100,107,101,121,67,111,100,101,115,105,120,116,101,101,110,73,115,108,97
+,109,105,99,35,48,48,48,48,48,48,101,110,116,105,114,101,32,119,105,100,101,108,
+121,32,97,99,116,105,118,101,32,40,116,121,112,101,111,102,111,110,101,32,99,97,
+110,99,111,108,111,114,32,61,115,112,101,97,107,101,114,101,120,116,101,110,100,
+115,80,104,121,115,105,99,115,116,101,114,114,97,105,110,60,116,98,111,100,121,
+62,102,117,110,101,114,97,108,118,105,101,119,105,110,103,109,105,100,100,108,
+101,32,99,114,105,99,107,101,116,112,114,111,112,104,101,116,115,104,105,102,116
+,101,100,100,111,99,116,111,114,115,82,117,115,115,101,108,108,32,116,97,114,103
+,101,116,99,111,109,112,97,99,116,97,108,103,101,98,114,97,115,111,99,105,97,108
+,45,98,117,108,107,32,111,102,109,97,110,32,97,110,100,60,47,116,100,62,10,32,
+104,101,32,108,101,102,116,41,46,118,97,108,40,41,102,97,108,115,101,41,59,108,
+111,103,105,99,97,108,98,97,110,107,105,110,103,104,111,109,101,32,116,111,110,
+97,109,105,110,103,32,65,114,105,122,111,110,97,99,114,101,100,105,116,115,41,59
+,10,125,41,59,10,102,111,117,110,100,101,114,105,110,32,116,117,114,110,67,111,
+108,108,105,110,115,98,101,102,111,114,101,32,66,117,116,32,116,104,101,99,104,
+97,114,103,101,100,84,105,116,108,101,34,62,67,97,112,116,97,105,110,115,112,101
+,108,108,101,100,103,111,100,100,101,115,115,84,97,103,32,45,45,62,65,100,100,
+105,110,103,58,98,117,116,32,119,97,115,82,101,99,101,110,116,32,112,97,116,105,
+101,110,116,98,97,99,107,32,105,110,61,102,97,108,115,101,38,76,105,110,99,111,
+108,110,119,101,32,107,110,111,119,67,111,117,110,116,101,114,74,117,100,97,105,
+115,109,115,99,114,105,112,116,32,97,108,116,101,114,101,100,39,93,41,59,10,32,
+32,104,97,115,32,116,104,101,117,110,99,108,101,97,114,69,118,101,110,116,39,44,
+98,111,116,104,32,105,110,110,111,116,32,97,108,108,10,10,60,33,45,45,32,112,108
+,97,99,105,110,103,104,97,114,100,32,116,111,32,99,101,110,116,101,114,115,111,
+114,116,32,111,102,99,108,105,101,110,116,115,115,116,114,101,101,116,115,66,101
+,114,110,97,114,100,97,115,115,101,114,116,115,116,101,110,100,32,116,111,102,97
+,110,116,97,115,121,100,111,119,110,32,105,110,104,97,114,98,111,117,114,70,114,
+101,101,100,111,109,106,101,119,101,108,114,121,47,97,98,111,117,116,46,46,115,
+101,97,114,99,104,108,101,103,101,110,100,115,105,115,32,109,97,100,101,109,111,
+100,101,114,110,32,111,110,108,121,32,111,110,111,110,108,121,32,116,111,105,109
+,97,103,101,34,32,108,105,110,101,97,114,32,112,97,105,110,116,101,114,97,110,
+100,32,110,111,116,114,97,114,101,108,121,32,97,99,114,111,110,121,109,100,101,
+108,105,118,101,114,115,104,111,114,116,101,114,48,48,38,97,109,112,59,97,115,32
+,109,97,110,121,119,105,100,116,104,61,34,47,42,32,60,33,91,67,116,105,116,108,
+101,32,61,111,102,32,116,104,101,32,108,111,119,101,115,116,32,112,105,99,107,
+101,100,32,101,115,99,97,112,101,100,117,115,101,115,32,111,102,112,101,111,112,
+108,101,115,32,80,117,98,108,105,99,77,97,116,116,104,101,119,116,97,99,116,105,
+99,115,100,97,109,97,103,101,100,119,97,121,32,102,111,114,108,97,119,115,32,111
+,102,101,97,115,121,32,116,111,32,119,105,110,100,111,119,115,116,114,111,110,
+103,32,32,115,105,109,112,108,101,125,99,97,116,99,104,40,115,101,118,101,110,
+116,104,105,110,102,111,98,111,120,119,101,110,116,32,116,111,112,97,105,110,116
+,101,100,99,105,116,105,122,101,110,73,32,100,111,110,39,116,114,101,116,114,101
+,97,116,46,32,83,111,109,101,32,119,119,46,34,41,59,10,98,111,109,98,105,110,103
+,109,97,105,108,116,111,58,109,97,100,101,32,105,110,46,32,77,97,110,121,32,99,
+97,114,114,105,101,115,124,124,123,125,59,119,105,119,111,114,107,32,111,102,115
+,121,110,111,110,121,109,100,101,102,101,97,116,115,102,97,118,111,114,101,100,
+111,112,116,105,99,97,108,112,97,103,101,84,114,97,117,110,108,101,115,115,32,
+115,101,110,100,105,110,103,108,101,102,116,34,62,60,99,111,109,83,99,111,114,65
+,108,108,32,116,104,101,106,81,117,101,114,121,46,116,111,117,114,105,115,116,67
+,108,97,115,115,105,99,102,97,108,115,101,34,32,87,105,108,104,101,108,109,115,
+117,98,117,114,98,115,103,101,110,117,105,110,101,98,105,115,104,111,112,115,46,
+115,112,108,105,116,40,103,108,111,98,97,108,32,102,111,108,108,111,119,115,98,
+111,100,121,32,111,102,110,111,109,105,110,97,108,67,111,110,116,97,99,116,115,
+101,99,117,108,97,114,108,101,102,116,32,116,111,99,104,105,101,102,108,121,45,
+104,105,100,100,101,110,45,98,97,110,110,101,114,60,47,108,105,62,10,10,46,32,87
+,104,101,110,32,105,110,32,98,111,116,104,100,105,115,109,105,115,115,69,120,112
+,108,111,114,101,97,108,119,97,121,115,32,118,105,97,32,116,104,101,115,112,97,
+195,177,111,108,119,101,108,102,97,114,101,114,117,108,105,110,103,32,97,114,114
+,97,110,103,101,99,97,112,116,97,105,110,104,105,115,32,115,111,110,114,117,108,
+101,32,111,102,104,101,32,116,111,111,107,105,116,115,101,108,102,44,61,48,38,97
+,109,112,59,40,99,97,108,108,101,100,115,97,109,112,108,101,115,116,111,32,109,
+97,107,101,99,111,109,47,112,97,103,77,97,114,116,105,110,32,75,101,110,110,101,
+100,121,97,99,99,101,112,116,115,102,117,108,108,32,111,102,104,97,110,100,108,
+101,100,66,101,115,105,100,101,115,47,47,45,45,62,60,47,97,98,108,101,32,116,111
+,116,97,114,103,101,116,115,101,115,115,101,110,99,101,104,105,109,32,116,111,32
+,105,116,115,32,98,121,32,99,111,109,109,111,110,46,109,105,110,101,114,97,108,
+116,111,32,116,97,107,101,119,97,121,115,32,116,111,115,46,111,114,103,47,108,97
+,100,118,105,115,101,100,112,101,110,97,108,116,121,115,105,109,112,108,101,58,
+105,102,32,116,104,101,121,76,101,116,116,101,114,115,97,32,115,104,111,114,116,
+72,101,114,98,101,114,116,115,116,114,105,107,101,115,32,103,114,111,117,112,115
+,46,108,101,110,103,116,104,102,108,105,103,104,116,115,111,118,101,114,108,97,
+112,115,108,111,119,108,121,32,108,101,115,115,101,114,32,115,111,99,105,97,108,
+32,60,47,112,62,10,9,9,105,116,32,105,110,116,111,114,97,110,107,101,100,32,114,
+97,116,101,32,111,102,117,108,62,13,10,32,32,97,116,116,101,109,112,116,112,97,
+105,114,32,111,102,109,97,107,101,32,105,116,75,111,110,116,97,107,116,65,110,
+116,111,110,105,111,104,97,118,105,110,103,32,114,97,116,105,110,103,115,32,97,
+99,116,105,118,101,115,116,114,101,97,109,115,116,114,97,112,112,101,100,34,41,
+46,99,115,115,40,104,111,115,116,105,108,101,108,101,97,100,32,116,111,108,105,
+116,116,108,101,32,103,114,111,117,112,115,44,80,105,99,116,117,114,101,45,45,62
+,13,10,13,10,32,114,111,119,115,61,34,32,111,98,106,101,99,116,105,110,118,101,
+114,115,101,60,102,111,111,116,101,114,67,117,115,116,111,109,86,62,60,92,47,115
+,99,114,115,111,108,118,105,110,103,67,104,97,109,98,101,114,115,108,97,118,101,
+114,121,119,111,117,110,100,101,100,119,104,101,114,101,97,115,33,61,32,39,117,
+110,100,102,111,114,32,97,108,108,112,97,114,116,108,121,32,45,114,105,103,104,
+116,58,65,114,97,98,105,97,110,98,97,99,107,101,100,32,99,101,110,116,117,114,
+121,117,110,105,116,32,111,102,109,111,98,105,108,101,45,69,117,114,111,112,101,
+44,105,115,32,104,111,109,101,114,105,115,107,32,111,102,100,101,115,105,114,101
+,100,67,108,105,110,116,111,110,99,111,115,116,32,111,102,97,103,101,32,111,102,
+32,98,101,99,111,109,101,32,110,111,110,101,32,111,102,112,38,113,117,111,116,59
+,77,105,100,100,108,101,32,101,97,100,39,41,91,48,67,114,105,116,105,99,115,115,
+116,117,100,105,111,115,62,38,99,111,112,121,59,103,114,111,117,112,34,62,97,115
+,115,101,109,98,108,109,97,107,105,110,103,32,112,114,101,115,115,101,100,119,
+105,100,103,101,116,46,112,115,58,34,32,63,32,114,101,98,117,105,108,116,98,121,
+32,115,111,109,101,70,111,114,109,101,114,32,101,100,105,116,111,114,115,100,101
+,108,97,121,101,100,67,97,110,111,110,105,99,104,97,100,32,116,104,101,112,117,
+115,104,105,110,103,99,108,97,115,115,61,34,98,117,116,32,97,114,101,112,97,114,
+116,105,97,108,66,97,98,121,108,111,110,98,111,116,116,111,109,32,99,97,114,114,
+105,101,114,67,111,109,109,97,110,100,105,116,115,32,117,115,101,65,115,32,119,
+105,116,104,99,111,117,114,115,101,115,97,32,116,104,105,114,100,100,101,110,111
+,116,101,115,97,108,115,111,32,105,110,72,111,117,115,116,111,110,50,48,112,120,
+59,34,62,97,99,99,117,115,101,100,100,111,117,98,108,101,32,103,111,97,108,32,
+111,102,70,97,109,111,117,115,32,41,46,98,105,110,100,40,112,114,105,101,115,116
+,115,32,79,110,108,105,110,101,105,110,32,74,117,108,121,115,116,32,43,32,34,103
+,99,111,110,115,117,108,116,100,101,99,105,109,97,108,104,101,108,112,102,117,
+108,114,101,118,105,118,101,100,105,115,32,118,101,114,121,114,39,43,39,105,112,
+116,108,111,115,105,110,103,32,102,101,109,97,108,101,115,105,115,32,97,108,115,
+111,115,116,114,105,110,103,115,100,97,121,115,32,111,102,97,114,114,105,118,97,
+108,102,117,116,117,114,101,32,60,111,98,106,101,99,116,102,111,114,99,105,110,
+103,83,116,114,105,110,103,40,34,32,47,62,10,9,9,104,101,114,101,32,105,115,101,
+110,99,111,100,101,100,46,32,32,84,104,101,32,98,97,108,108,111,111,110,100,111,
+110,101,32,98,121,47,99,111,109,109,111,110,98,103,99,111,108,111,114,108,97,119
+,32,111,102,32,73,110,100,105,97,110,97,97,118,111,105,100,101,100,98,117,116,32
+,116,104,101,50,112,120,32,51,112,120,106,113,117,101,114,121,46,97,102,116,101,
+114,32,97,112,111,108,105,99,121,46,109,101,110,32,97,110,100,102,111,111,116,
+101,114,45,61,32,116,114,117,101,59,102,111,114,32,117,115,101,115,99,114,101,
+101,110,46,73,110,100,105,97,110,32,105,109,97,103,101,32,61,102,97,109,105,108,
+121,44,104,116,116,112,58,47,47,32,38,110,98,115,112,59,100,114,105,118,101,114,
+115,101,116,101,114,110,97,108,115,97,109,101,32,97,115,110,111,116,105,99,101,
+100,118,105,101,119,101,114,115,125,41,40,41,59,10,32,105,115,32,109,111,114,101
+,115,101,97,115,111,110,115,102,111,114,109,101,114,32,116,104,101,32,110,101,
+119,105,115,32,106,117,115,116,99,111,110,115,101,110,116,32,83,101,97,114,99,
+104,119,97,115,32,116,104,101,119,104,121,32,116,104,101,115,104,105,112,112,101
+,100,98,114,62,60,98,114,62,119,105,100,116,104,58,32,104,101,105,103,104,116,61
+,109,97,100,101,32,111,102,99,117,105,115,105,110,101,105,115,32,116,104,97,116,
+97,32,118,101,114,121,32,65,100,109,105,114,97,108,32,102,105,120,101,100,59,110
+,111,114,109,97,108,32,77,105,115,115,105,111,110,80,114,101,115,115,44,32,111,
+110,116,97,114,105,111,99,104,97,114,115,101,116,116,114,121,32,116,111,32,105,
+110,118,97,100,101,100,61,34,116,114,117,101,34,115,112,97,99,105,110,103,105,
+115,32,109,111,115,116,97,32,109,111,114,101,32,116,111,116,97,108,108,121,102,
+97,108,108,32,111,102,125,41,59,13,10,32,32,105,109,109,101,110,115,101,116,105,
+109,101,32,105,110,115,101,116,32,111,117,116,115,97,116,105,115,102,121,116,111
+,32,102,105,110,100,100,111,119,110,32,116,111,108,111,116,32,111,102,32,80,108,
+97,121,101,114,115,105,110,32,74,117,110,101,113,117,97,110,116,117,109,110,111,
+116,32,116,104,101,116,105,109,101,32,116,111,100,105,115,116,97,110,116,70,105,
+110,110,105,115,104,115,114,99,32,61,32,40,115,105,110,103,108,101,32,104,101,
+108,112,32,111,102,71,101,114,109,97,110,32,108,97,119,32,97,110,100,108,97,98,
+101,108,101,100,102,111,114,101,115,116,115,99,111,111,107,105,110,103,115,112,
+97,99,101,34,62,104,101,97,100,101,114,45,119,101,108,108,32,97,115,83,116,97,
+110,108,101,121,98,114,105,100,103,101,115,47,103,108,111,98,97,108,67,114,111,
+97,116,105,97,32,65,98,111,117,116,32,91,48,93,59,10,32,32,105,116,44,32,97,110,
+100,103,114,111,117,112,101,100,98,101,105,110,103,32,97,41,123,116,104,114,111,
+119,104,101,32,109,97,100,101,108,105,103,104,116,101,114,101,116,104,105,99,97,
+108,70,70,70,70,70,70,34,98,111,116,116,111,109,34,108,105,107,101,32,97,32,101,
+109,112,108,111,121,115,108,105,118,101,32,105,110,97,115,32,115,101,101,110,112
+,114,105,110,116,101,114,109,111,115,116,32,111,102,117,98,45,108,105,110,107,
+114,101,106,101,99,116,115,97,110,100,32,117,115,101,105,109,97,103,101,34,62,
+115,117,99,99,101,101,100,102,101,101,100,105,110,103,78,117,99,108,101,97,114,
+105,110,102,111,114,109,97,116,111,32,104,101,108,112,87,111,109,101,110,39,115,
+78,101,105,116,104,101,114,77,101,120,105,99,97,110,112,114,111,116,101,105,110,
+60,116,97,98,108,101,32,98,121,32,109,97,110,121,104,101,97,108,116,104,121,108,
+97,119,115,117,105,116,100,101,118,105,115,101,100,46,112,117,115,104,40,123,115
+,101,108,108,101,114,115,115,105,109,112,108,121,32,84,104,114,111,117,103,104,
+46,99,111,111,107,105,101,32,73,109,97,103,101,40,111,108,100,101,114,34,62,117,
+115,46,106,115,34,62,32,83,105,110,99,101,32,117,110,105,118,101,114,115,108,97,
+114,103,101,114,32,111,112,101,110,32,116,111,33,45,45,32,101,110,100,108,105,
+101,115,32,105,110,39,93,41,59,13,10,32,32,109,97,114,107,101,116,119,104,111,32
+,105,115,32,40,34,68,79,77,67,111,109,97,110,97,103,101,100,111,110,101,32,102,
+111,114,116,121,112,101,111,102,32,75,105,110,103,100,111,109,112,114,111,102,
+105,116,115,112,114,111,112,111,115,101,116,111,32,115,104,111,119,99,101,110,
+116,101,114,59,109,97,100,101,32,105,116,100,114,101,115,115,101,100,119,101,114
+,101,32,105,110,109,105,120,116,117,114,101,112,114,101,99,105,115,101,97,114,
+105,115,105,110,103,115,114,99,32,61,32,39,109,97,107,101,32,97,32,115,101,99,
+117,114,101,100,66,97,112,116,105,115,116,118,111,116,105,110,103,32,10,9,9,118,
+97,114,32,77,97,114,99,104,32,50,103,114,101,119,32,117,112,67,108,105,109,97,
+116,101,46,114,101,109,111,118,101,115,107,105,108,108,101,100,119,97,121,32,116
+,104,101,60,47,104,101,97,100,62,102,97,99,101,32,111,102,97,99,116,105,110,103,
+32,114,105,103,104,116,34,62,116,111,32,119,111,114,107,114,101,100,117,99,101,
+115,104,97,115,32,104,97,100,101,114,101,99,116,101,100,115,104,111,119,40,41,59
+,97,99,116,105,111,110,61,98,111,111,107,32,111,102,97,110,32,97,114,101,97,61,
+61,32,34,104,116,116,60,104,101,97,100,101,114,10,60,104,116,109,108,62,99,111,
+110,102,111,114,109,102,97,99,105,110,103,32,99,111,111,107,105,101,46,114,101,
+108,121,32,111,110,104,111,115,116,101,100,32,46,99,117,115,116,111,109,104,101,
+32,119,101,110,116,98,117,116,32,102,111,114,115,112,114,101,97,100,32,70,97,109
+,105,108,121,32,97,32,109,101,97,110,115,111,117,116,32,116,104,101,102,111,114,
+117,109,115,46,102,111,111,116,97,103,101,34,62,77,111,98,105,108,67,108,101,109
+,101,110,116,115,34,32,105,100,61,34,97,115,32,104,105,103,104,105,110,116,101,
+110,115,101,45,45,62,60,33,45,45,102,101,109,97,108,101,32,105,115,32,115,101,
+101,110,105,109,112,108,105,101,100,115,101,116,32,116,104,101,97,32,115,116,97,
+116,101,97,110,100,32,104,105,115,102,97,115,116,101,115,116,98,101,115,105,100,
+101,115,98,117,116,116,111,110,95,98,111,117,110,100,101,100,34,62,60,105,109,
+103,32,73,110,102,111,98,111,120,101,118,101,110,116,115,44,97,32,121,111,117,
+110,103,97,110,100,32,97,114,101,78,97,116,105,118,101,32,99,104,101,97,112,101,
+114,84,105,109,101,111,117,116,97,110,100,32,104,97,115,101,110,103,105,110,101,
+115,119,111,110,32,116,104,101,40,109,111,115,116,108,121,114,105,103,104,116,58
+,32,102,105,110,100,32,97,32,45,98,111,116,116,111,109,80,114,105,110,99,101,32,
+97,114,101,97,32,111,102,109,111,114,101,32,111,102,115,101,97,114,99,104,95,110
+,97,116,117,114,101,44,108,101,103,97,108,108,121,112,101,114,105,111,100,44,108
+,97,110,100,32,111,102,111,114,32,119,105,116,104,105,110,100,117,99,101,100,112
+,114,111,118,105,110,103,109,105,115,115,105,108,101,108,111,99,97,108,108,121,
+65,103,97,105,110,115,116,116,104,101,32,119,97,121,107,38,113,117,111,116,59,
+112,120,59,34,62,13,10,112,117,115,104,101,100,32,97,98,97,110,100,111,110,110,
+117,109,101,114,97,108,67,101,114,116,97,105,110,73,110,32,116,104,105,115,109,
+111,114,101,32,105,110,111,114,32,115,111,109,101,110,97,109,101,32,105,115,97,
+110,100,44,32,105,110,99,114,111,119,110,101,100,73,83,66,78,32,48,45,99,114,101
+,97,116,101,115,79,99,116,111,98,101,114,109,97,121,32,110,111,116,99,101,110,
+116,101,114,32,108,97,116,101,32,105,110,68,101,102,101,110,99,101,101,110,97,99
+,116,101,100,119,105,115,104,32,116,111,98,114,111,97,100,108,121,99,111,111,108
+,105,110,103,111,110,108,111,97,100,61,105,116,46,32,84,104,101,114,101,99,111,
+118,101,114,77,101,109,98,101,114,115,104,101,105,103,104,116,32,97,115,115,117,
+109,101,115,60,104,116,109,108,62,10,112,101,111,112,108,101,46,105,110,32,111,
+110,101,32,61,119,105,110,100,111,119,102,111,111,116,101,114,95,97,32,103,111,
+111,100,32,114,101,107,108,97,109,97,111,116,104,101,114,115,44,116,111,32,116,
+104,105,115,95,99,111,111,107,105,101,112,97,110,101,108,34,62,76,111,110,100,
+111,110,44,100,101,102,105,110,101,115,99,114,117,115,104,101,100,98,97,112,116,
+105,115,109,99,111,97,115,116,97,108,115,116,97,116,117,115,32,116,105,116,108,
+101,34,32,109,111,118,101,32,116,111,108,111,115,116,32,105,110,98,101,116,116,
+101,114,32,105,109,112,108,105,101,115,114,105,118,97,108,114,121,115,101,114,
+118,101,114,115,32,83,121,115,116,101,109,80,101,114,104,97,112,115,101,115,32,
+97,110,100,32,99,111,110,116,101,110,100,102,108,111,119,105,110,103,108,97,115,
+116,101,100,32,114,105,115,101,32,105,110,71,101,110,101,115,105,115,118,105,101
+,119,32,111,102,114,105,115,105,110,103,32,115,101,101,109,32,116,111,98,117,116
+,32,105,110,32,98,97,99,107,105,110,103,104,101,32,119,105,108,108,103,105,118,
+101,110,32,97,103,105,118,105,110,103,32,99,105,116,105,101,115,46,102,108,111,
+119,32,111,102,32,76,97,116,101,114,32,97,108,108,32,98,117,116,72,105,103,104,
+119,97,121,111,110,108,121,32,98,121,115,105,103,110,32,111,102,104,101,32,100,
+111,101,115,100,105,102,102,101,114,115,98,97,116,116,101,114,121,38,97,109,112,
+59,108,97,115,105,110,103,108,101,115,116,104,114,101,97,116,115,105,110,116,101
+,103,101,114,116,97,107,101,32,111,110,114,101,102,117,115,101,100,99,97,108,108
+,101,100,32,61,85,83,38,97,109,112,83,101,101,32,116,104,101,110,97,116,105,118,
+101,115,98,121,32,116,104,105,115,115,121,115,116,101,109,46,104,101,97,100,32,
+111,102,58,104,111,118,101,114,44,108,101,115,98,105,97,110,115,117,114,110,97,
+109,101,97,110,100,32,97,108,108,99,111,109,109,111,110,47,104,101,97,100,101,
+114,95,95,112,97,114,97,109,115,72,97,114,118,97,114,100,47,112,105,120,101,108,
+46,114,101,109,111,118,97,108,115,111,32,108,111,110,103,114,111,108,101,32,111,
+102,106,111,105,110,116,108,121,115,107,121,115,99,114,97,85,110,105,99,111,100,
+101,98,114,32,47,62,13,10,65,116,108,97,110,116,97,110,117,99,108,101,117,115,67
+,111,117,110,116,121,44,112,117,114,101,108,121,32,99,111,117,110,116,34,62,101,
+97,115,105,108,121,32,98,117,105,108,100,32,97,111,110,99,108,105,99,107,97,32,
+103,105,118,101,110,112,111,105,110,116,101,114,104,38,113,117,111,116,59,101,
+118,101,110,116,115,32,101,108,115,101,32,123,10,100,105,116,105,111,110,115,110
+,111,119,32,116,104,101,44,32,119,105,116,104,32,109,97,110,32,119,104,111,111,
+114,103,47,87,101,98,111,110,101,32,97,110,100,99,97,118,97,108,114,121,72,101,
+32,100,105,101,100,115,101,97,116,116,108,101,48,48,44,48,48,48,32,123,119,105,
+110,100,111,119,104,97,118,101,32,116,111,105,102,40,119,105,110,100,97,110,100,
+32,105,116,115,115,111,108,101,108,121,32,109,38,113,117,111,116,59,114,101,110,
+101,119,101,100,68,101,116,114,111,105,116,97,109,111,110,103,115,116,101,105,
+116,104,101,114,32,116,104,101,109,32,105,110,83,101,110,97,116,111,114,85,115,
+60,47,97,62,60,75,105,110,103,32,111,102,70,114,97,110,99,105,115,45,112,114,111
+,100,117,99,104,101,32,117,115,101,100,97,114,116,32,97,110,100,104,105,109,32,
+97,110,100,117,115,101,100,32,98,121,115,99,111,114,105,110,103,97,116,32,104,
+111,109,101,116,111,32,104,97,118,101,114,101,108,97,116,101,115,105,98,105,108,
+105,116,121,102,97,99,116,105,111,110,66,117,102,102,97,108,111,108,105,110,107,
+34,62,60,119,104,97,116,32,104,101,102,114,101,101,32,116,111,67,105,116,121,32,
+111,102,99,111,109,101,32,105,110,115,101,99,116,111,114,115,99,111,117,110,116,
+101,100,111,110,101,32,100,97,121,110,101,114,118,111,117,115,115,113,117,97,114
+,101,32,125,59,105,102,40,103,111,105,110,32,119,104,97,116,105,109,103,34,32,97
+,108,105,115,32,111,110,108,121,115,101,97,114,99,104,47,116,117,101,115,100,97,
+121,108,111,111,115,101,108,121,83,111,108,111,109,111,110,115,101,120,117,97,
+108,32,45,32,60,97,32,104,114,109,101,100,105,117,109,34,68,79,32,78,79,84,32,70
+,114,97,110,99,101,44,119,105,116,104,32,97,32,119,97,114,32,97,110,100,115,101,
+99,111,110,100,32,116,97,107,101,32,97,32,62,13,10,13,10,13,10,109,97,114,107,
+101,116,46,104,105,103,104,119,97,121,100,111,110,101,32,105,110,99,116,105,118,
+105,116,121,34,108,97,115,116,34,62,111,98,108,105,103,101,100,114,105,115,101,
+32,116,111,34,117,110,100,101,102,105,109,97,100,101,32,116,111,32,69,97,114,108
+,121,32,112,114,97,105,115,101,100,105,110,32,105,116,115,32,102,111,114,32,104,
+105,115,97,116,104,108,101,116,101,74,117,112,105,116,101,114,89,97,104,111,111,
+33,32,116,101,114,109,101,100,32,115,111,32,109,97,110,121,114,101,97,108,108,
+121,32,115,46,32,84,104,101,32,97,32,119,111,109,97,110,63,118,97,108,117,101,61
+,100,105,114,101,99,116,32,114,105,103,104,116,34,32,98,105,99,121,99,108,101,97
+,99,105,110,103,61,34,100,97,121,32,97,110,100,115,116,97,116,105,110,103,82,97,
+116,104,101,114,44,104,105,103,104,101,114,32,79,102,102,105,99,101,32,97,114,
+101,32,110,111,119,116,105,109,101,115,44,32,119,104,101,110,32,97,32,112,97,121
+,32,102,111,114,111,110,32,116,104,105,115,45,108,105,110,107,34,62,59,98,111,
+114,100,101,114,97,114,111,117,110,100,32,97,110,110,117,97,108,32,116,104,101,
+32,78,101,119,112,117,116,32,116,104,101,46,99,111,109,34,32,116,97,107,105,110,
+32,116,111,97,32,98,114,105,101,102,40,105,110,32,116,104,101,103,114,111,117,
+112,115,46,59,32,119,105,100,116,104,101,110,122,121,109,101,115,115,105,109,112
+,108,101,32,105,110,32,108,97,116,101,123,114,101,116,117,114,110,116,104,101,
+114,97,112,121,97,32,112,111,105,110,116,98,97,110,110,105,110,103,105,110,107,
+115,34,62,10,40,41,59,34,32,114,101,97,32,112,108,97,99,101,92,117,48,48,51,67,
+97,97,98,111,117,116,32,97,116,114,62,13,10,9,9,99,99,111,117,110,116,32,103,105
+,118,101,115,32,97,60,83,67,82,73,80,84,82,97,105,108,119,97,121,116,104,101,109
+,101,115,47,116,111,111,108,98,111,120,66,121,73,100,40,34,120,104,117,109,97,
+110,115,44,119,97,116,99,104,101,115,105,110,32,115,111,109,101,32,105,102,32,40
+,119,105,99,111,109,105,110,103,32,102,111,114,109,97,116,115,32,85,110,100,101,
+114,32,98,117,116,32,104,97,115,104,97,110,100,101,100,32,109,97,100,101,32,98,
+121,116,104,97,110,32,105,110,102,101,97,114,32,111,102,100,101,110,111,116,101,
+100,47,105,102,114,97,109,101,108,101,102,116,32,105,110,118,111,108,116,97,103,
+101,105,110,32,101,97,99,104,97,38,113,117,111,116,59,98,97,115,101,32,111,102,
+73,110,32,109,97,110,121,117,110,100,101,114,103,111,114,101,103,105,109,101,115
+,97,99,116,105,111,110,32,60,47,112,62,13,10,60,117,115,116,111,109,86,97,59,38,
+103,116,59,60,47,105,109,112,111,114,116,115,111,114,32,116,104,97,116,109,111,
+115,116,108,121,32,38,97,109,112,59,114,101,32,115,105,122,101,61,34,60,47,97,62
+,60,47,104,97,32,99,108,97,115,115,112,97,115,115,105,118,101,72,111,115,116,32,
+61,32,87,104,101,116,104,101,114,102,101,114,116,105,108,101,86,97,114,105,111,
+117,115,61,91,93,59,40,102,117,99,97,109,101,114,97,115,47,62,60,47,116,100,62,
+97,99,116,115,32,97,115,73,110,32,115,111,109,101,62,13,10,13,10,60,33,111,114,
+103,97,110,105,115,32,60,98,114,32,47,62,66,101,105,106,105,110,103,99,97,116,97
+,108,195,160,100,101,117,116,115,99,104,101,117,114,111,112,101,117,101,117,115,
+107,97,114,97,103,97,101,105,108,103,101,115,118,101,110,115,107,97,101,115,112,
+97,195,177,97,109,101,110,115,97,106,101,117,115,117,97,114,105,111,116,114,97,
+98,97,106,111,109,195,169,120,105,99,111,112,195,161,103,105,110,97,115,105,101,
+109,112,114,101,115,105,115,116,101,109,97,111,99,116,117,98,114,101,100,117,114
+,97,110,116,101,97,195,177,97,100,105,114,101,109,112,114,101,115,97,109,111,109
+,101,110,116,111,110,117,101,115,116,114,111,112,114,105,109,101,114,97,116,114,
+97,118,195,169,115,103,114,97,99,105,97,115,110,117,101,115,116,114,97,112,114,
+111,99,101,115,111,101,115,116,97,100,111,115,99,97,108,105,100,97,100,112,101,
+114,115,111,110,97,110,195,186,109,101,114,111,97,99,117,101,114,100,111,109,195
+,186,115,105,99,97,109,105,101,109,98,114,111,111,102,101,114,116,97,115,97,108,
+103,117,110,111,115,112,97,195,173,115,101,115,101,106,101,109,112,108,111,100,
+101,114,101,99,104,111,97,100,101,109,195,161,115,112,114,105,118,97,100,111,97,
+103,114,101,103,97,114,101,110,108,97,99,101,115,112,111,115,105,98,108,101,104,
+111,116,101,108,101,115,115,101,118,105,108,108,97,112,114,105,109,101,114,111,
+195,186,108,116,105,109,111,101,118,101,110,116,111,115,97,114,99,104,105,118,
+111,99,117,108,116,117,114,97,109,117,106,101,114,101,115,101,110,116,114,97,100
+,97,97,110,117,110,99,105,111,101,109,98,97,114,103,111,109,101,114,99,97,100,
+111,103,114,97,110,100,101,115,101,115,116,117,100,105,111,109,101,106,111,114,
+101,115,102,101,98,114,101,114,111,100,105,115,101,195,177,111,116,117,114,105,
+115,109,111,99,195,179,100,105,103,111,112,111,114,116,97,100,97,101,115,112,97,
+99,105,111,102,97,109,105,108,105,97,97,110,116,111,110,105,111,112,101,114,109,
+105,116,101,103,117,97,114,100,97,114,97,108,103,117,110,97,115,112,114,101,99,
+105,111,115,97,108,103,117,105,101,110,115,101,110,116,105,100,111,118,105,115,
+105,116,97,115,116,195,173,116,117,108,111,99,111,110,111,99,101,114,115,101,103
+,117,110,100,111,99,111,110,115,101,106,111,102,114,97,110,99,105,97,109,105,110
+,117,116,111,115,115,101,103,117,110,100,97,116,101,110,101,109,111,115,101,102,
+101,99,116,111,115,109,195,161,108,97,103,97,115,101,115,105,195,179,110,114,101
+,118,105,115,116,97,103,114,97,110,97,100,97,99,111,109,112,114,97,114,105,110,
+103,114,101,115,111,103,97,114,99,195,173,97,97,99,99,105,195,179,110,101,99,117
+,97,100,111,114,113,117,105,101,110,101,115,105,110,99,108,117,115,111,100,101,
+98,101,114,195,161,109,97,116,101,114,105,97,104,111,109,98,114,101,115,109,117,
+101,115,116,114,97,112,111,100,114,195,173,97,109,97,195,177,97,110,97,195,186,
+108,116,105,109,97,101,115,116,97,109,111,115,111,102,105,99,105,97,108,116,97,
+109,98,105,101,110,110,105,110,103,195,186,110,115,97,108,117,100,111,115,112,
+111,100,101,109,111,115,109,101,106,111,114,97,114,112,111,115,105,116,105,111,
+110,98,117,115,105,110,101,115,115,104,111,109,101,112,97,103,101,115,101,99,117
+,114,105,116,121,108,97,110,103,117,97,103,101,115,116,97,110,100,97,114,100,99,
+97,109,112,97,105,103,110,102,101,97,116,117,114,101,115,99,97,116,101,103,111,
+114,121,101,120,116,101,114,110,97,108,99,104,105,108,100,114,101,110,114,101,
+115,101,114,118,101,100,114,101,115,101,97,114,99,104,101,120,99,104,97,110,103,
+101,102,97,118,111,114,105,116,101,116,101,109,112,108,97,116,101,109,105,108,
+105,116,97,114,121,105,110,100,117,115,116,114,121,115,101,114,118,105,99,101,
+115,109,97,116,101,114,105,97,108,112,114,111,100,117,99,116,115,122,45,105,110,
+100,101,120,58,99,111,109,109,101,110,116,115,115,111,102,116,119,97,114,101,99,
+111,109,112,108,101,116,101,99,97,108,101,110,100,97,114,112,108,97,116,102,111,
+114,109,97,114,116,105,99,108,101,115,114,101,113,117,105,114,101,100,109,111,
+118,101,109,101,110,116,113,117,101,115,116,105,111,110,98,117,105,108,100,105,
+110,103,112,111,108,105,116,105,99,115,112,111,115,115,105,98,108,101,114,101,
+108,105,103,105,111,110,112,104,121,115,105,99,97,108,102,101,101,100,98,97,99,
+107,114,101,103,105,115,116,101,114,112,105,99,116,117,114,101,115,100,105,115,
+97,98,108,101,100,112,114,111,116,111,99,111,108,97,117,100,105,101,110,99,101,
+115,101,116,116,105,110,103,115,97,99,116,105,118,105,116,121,101,108,101,109,
+101,110,116,115,108,101,97,114,110,105,110,103,97,110,121,116,104,105,110,103,97
+,98,115,116,114,97,99,116,112,114,111,103,114,101,115,115,111,118,101,114,118,
+105,101,119,109,97,103,97,122,105,110,101,101,99,111,110,111,109,105,99,116,114,
+97,105,110,105,110,103,112,114,101,115,115,117,114,101,118,97,114,105,111,117,
+115,32,60,115,116,114,111,110,103,62,112,114,111,112,101,114,116,121,115,104,111
+,112,112,105,110,103,116,111,103,101,116,104,101,114,97,100,118,97,110,99,101,
+100,98,101,104,97,118,105,111,114,100,111,119,110,108,111,97,100,102,101,97,116,
+117,114,101,100,102,111,111,116,98,97,108,108,115,101,108,101,99,116,101,100,76,
+97,110,103,117,97,103,101,100,105,115,116,97,110,99,101,114,101,109,101,109,98,
+101,114,116,114,97,99,107,105,110,103,112,97,115,115,119,111,114,100,109,111,100
+,105,102,105,101,100,115,116,117,100,101,110,116,115,100,105,114,101,99,116,108,
+121,102,105,103,104,116,105,110,103,110,111,114,116,104,101,114,110,100,97,116,
+97,98,97,115,101,102,101,115,116,105,118,97,108,98,114,101,97,107,105,110,103,
+108,111,99,97,116,105,111,110,105,110,116,101,114,110,101,116,100,114,111,112,
+100,111,119,110,112,114,97,99,116,105,99,101,101,118,105,100,101,110,99,101,102,
+117,110,99,116,105,111,110,109,97,114,114,105,97,103,101,114,101,115,112,111,110
+,115,101,112,114,111,98,108,101,109,115,110,101,103,97,116,105,118,101,112,114,
+111,103,114,97,109,115,97,110,97,108,121,115,105,115,114,101,108,101,97,115,101,
+100,98,97,110,110,101,114,34,62,112,117,114,99,104,97,115,101,112,111,108,105,99
+,105,101,115,114,101,103,105,111,110,97,108,99,114,101,97,116,105,118,101,97,114
+,103,117,109,101,110,116,98,111,111,107,109,97,114,107,114,101,102,101,114,114,
+101,114,99,104,101,109,105,99,97,108,100,105,118,105,115,105,111,110,99,97,108,
+108,98,97,99,107,115,101,112,97,114,97,116,101,112,114,111,106,101,99,116,115,99
+,111,110,102,108,105,99,116,104,97,114,100,119,97,114,101,105,110,116,101,114,
+101,115,116,100,101,108,105,118,101,114,121,109,111,117,110,116,97,105,110,111,
+98,116,97,105,110,101,100,61,32,102,97,108,115,101,59,102,111,114,40,118,97,114,
+32,97,99,99,101,112,116,101,100,99,97,112,97,99,105,116,121,99,111,109,112,117,
+116,101,114,105,100,101,110,116,105,116,121,97,105,114,99,114,97,102,116,101,109
+,112,108,111,121,101,100,112,114,111,112,111,115,101,100,100,111,109,101,115,116
+,105,99,105,110,99,108,117,100,101,115,112,114,111,118,105,100,101,100,104,111,
+115,112,105,116,97,108,118,101,114,116,105,99,97,108,99,111,108,108,97,112,115,
+101,97,112,112,114,111,97,99,104,112,97,114,116,110,101,114,115,108,111,103,111,
+34,62,60,97,100,97,117,103,104,116,101,114,97,117,116,104,111,114,34,32,99,117,
+108,116,117,114,97,108,102,97,109,105,108,105,101,115,47,105,109,97,103,101,115,
+47,97,115,115,101,109,98,108,121,112,111,119,101,114,102,117,108,116,101,97,99,
+104,105,110,103,102,105,110,105,115,104,101,100,100,105,115,116,114,105,99,116,
+99,114,105,116,105,99,97,108,99,103,105,45,98,105,110,47,112,117,114,112,111,115
+,101,115,114,101,113,117,105,114,101,115,101,108,101,99,116,105,111,110,98,101,
+99,111,109,105,110,103,112,114,111,118,105,100,101,115,97,99,97,100,101,109,105,
+99,101,120,101,114,99,105,115,101,97,99,116,117,97,108,108,121,109,101,100,105,
+99,105,110,101,99,111,110,115,116,97,110,116,97,99,99,105,100,101,110,116,77,97,
+103,97,122,105,110,101,100,111,99,117,109,101,110,116,115,116,97,114,116,105,110
+,103,98,111,116,116,111,109,34,62,111,98,115,101,114,118,101,100,58,32,38,113,
+117,111,116,59,101,120,116,101,110,100,101,100,112,114,101,118,105,111,117,115,
+83,111,102,116,119,97,114,101,99,117,115,116,111,109,101,114,100,101,99,105,115,
+105,111,110,115,116,114,101,110,103,116,104,100,101,116,97,105,108,101,100,115,
+108,105,103,104,116,108,121,112,108,97,110,110,105,110,103,116,101,120,116,97,
+114,101,97,99,117,114,114,101,110,99,121,101,118,101,114,121,111,110,101,115,116
+,114,97,105,103,104,116,116,114,97,110,115,102,101,114,112,111,115,105,116,105,
+118,101,112,114,111,100,117,99,101,100,104,101,114,105,116,97,103,101,115,104,
+105,112,112,105,110,103,97,98,115,111,108,117,116,101,114,101,99,101,105,118,101
+,100,114,101,108,101,118,97,110,116,98,117,116,116,111,110,34,32,118,105,111,108
+,101,110,99,101,97,110,121,119,104,101,114,101,98,101,110,101,102,105,116,115,
+108,97,117,110,99,104,101,100,114,101,99,101,110,116,108,121,97,108,108,105,97,
+110,99,101,102,111,108,108,111,119,101,100,109,117,108,116,105,112,108,101,98,
+117,108,108,101,116,105,110,105,110,99,108,117,100,101,100,111,99,99,117,114,114
+,101,100,105,110,116,101,114,110,97,108,36,40,116,104,105,115,41,46,114,101,112,
+117,98,108,105,99,62,60,116,114,62,60,116,100,99,111,110,103,114,101,115,115,114
+,101,99,111,114,100,101,100,117,108,116,105,109,97,116,101,115,111,108,117,116,
+105,111,110,60,117,108,32,105,100,61,34,100,105,115,99,111,118,101,114,72,111,
+109,101,60,47,97,62,119,101,98,115,105,116,101,115,110,101,116,119,111,114,107,
+115,97,108,116,104,111,117,103,104,101,110,116,105,114,101,108,121,109,101,109,
+111,114,105,97,108,109,101,115,115,97,103,101,115,99,111,110,116,105,110,117,101
+,97,99,116,105,118,101,34,62,115,111,109,101,119,104,97,116,118,105,99,116,111,
+114,105,97,87,101,115,116,101,114,110,32,32,116,105,116,108,101,61,34,76,111,99,
+97,116,105,111,110,99,111,110,116,114,97,99,116,118,105,115,105,116,111,114,115,
+68,111,119,110,108,111,97,100,119,105,116,104,111,117,116,32,114,105,103,104,116
+,34,62,10,109,101,97,115,117,114,101,115,119,105,100,116,104,32,61,32,118,97,114
+,105,97,98,108,101,105,110,118,111,108,118,101,100,118,105,114,103,105,110,105,
+97,110,111,114,109,97,108,108,121,104,97,112,112,101,110,101,100,97,99,99,111,
+117,110,116,115,115,116,97,110,100,105,110,103,110,97,116,105,111,110,97,108,82,
+101,103,105,115,116,101,114,112,114,101,112,97,114,101,100,99,111,110,116,114,
+111,108,115,97,99,99,117,114,97,116,101,98,105,114,116,104,100,97,121,115,116,
+114,97,116,101,103,121,111,102,102,105,99,105,97,108,103,114,97,112,104,105,99,
+115,99,114,105,109,105,110,97,108,112,111,115,115,105,98,108,121,99,111,110,115,
+117,109,101,114,80,101,114,115,111,110,97,108,115,112,101,97,107,105,110,103,118
+,97,108,105,100,97,116,101,97,99,104,105,101,118,101,100,46,106,112,103,34,32,47
+,62,109,97,99,104,105,110,101,115,60,47,104,50,62,10,32,32,107,101,121,119,111,
+114,100,115,102,114,105,101,110,100,108,121,98,114,111,116,104,101,114,115,99,
+111,109,98,105,110,101,100,111,114,105,103,105,110,97,108,99,111,109,112,111,115
+,101,100,101,120,112,101,99,116,101,100,97,100,101,113,117,97,116,101,112,97,107
+,105,115,116,97,110,102,111,108,108,111,119,34,32,118,97,108,117,97,98,108,101,
+60,47,108,97,98,101,108,62,114,101,108,97,116,105,118,101,98,114,105,110,103,105
+,110,103,105,110,99,114,101,97,115,101,103,111,118,101,114,110,111,114,112,108,
+117,103,105,110,115,47,76,105,115,116,32,111,102,32,72,101,97,100,101,114,34,62,
+34,32,110,97,109,101,61,34,32,40,38,113,117,111,116,59,103,114,97,100,117,97,116
+,101,60,47,104,101,97,100,62,10,99,111,109,109,101,114,99,101,109,97,108,97,121,
+115,105,97,100,105,114,101,99,116,111,114,109,97,105,110,116,97,105,110,59,104,
+101,105,103,104,116,58,115,99,104,101,100,117,108,101,99,104,97,110,103,105,110,
+103,98,97,99,107,32,116,111,32,99,97,116,104,111,108,105,99,112,97,116,116,101,
+114,110,115,99,111,108,111,114,58,32,35,103,114,101,97,116,101,115,116,115,117,
+112,112,108,105,101,115,114,101,108,105,97,98,108,101,60,47,117,108,62,10,9,9,60
+,115,101,108,101,99,116,32,99,105,116,105,122,101,110,115,99,108,111,116,104,105
+,110,103,119,97,116,99,104,105,110,103,60,108,105,32,105,100,61,34,115,112,101,
+99,105,102,105,99,99,97,114,114,121,105,110,103,115,101,110,116,101,110,99,101,
+60,99,101,110,116,101,114,62,99,111,110,116,114,97,115,116,116,104,105,110,107,
+105,110,103,99,97,116,99,104,40,101,41,115,111,117,116,104,101,114,110,77,105,99
+,104,97,101,108,32,109,101,114,99,104,97,110,116,99,97,114,111,117,115,101,108,
+112,97,100,100,105,110,103,58,105,110,116,101,114,105,111,114,46,115,112,108,105
+,116,40,34,108,105,122,97,116,105,111,110,79,99,116,111,98,101,114,32,41,123,114
+,101,116,117,114,110,105,109,112,114,111,118,101,100,45,45,38,103,116,59,10,10,
+99,111,118,101,114,97,103,101,99,104,97,105,114,109,97,110,46,112,110,103,34,32,
+47,62,115,117,98,106,101,99,116,115,82,105,99,104,97,114,100,32,119,104,97,116,
+101,118,101,114,112,114,111,98,97,98,108,121,114,101,99,111,118,101,114,121,98,
+97,115,101,98,97,108,108,106,117,100,103,109,101,110,116,99,111,110,110,101,99,
+116,46,46,99,115,115,34,32,47,62,32,119,101,98,115,105,116,101,114,101,112,111,
+114,116,101,100,100,101,102,97,117,108,116,34,47,62,60,47,97,62,13,10,101,108,
+101,99,116,114,105,99,115,99,111,116,108,97,110,100,99,114,101,97,116,105,111,
+110,113,117,97,110,116,105,116,121,46,32,73,83,66,78,32,48,100,105,100,32,110,
+111,116,32,105,110,115,116,97,110,99,101,45,115,101,97,114,99,104,45,34,32,108,
+97,110,103,61,34,115,112,101,97,107,101,114,115,67,111,109,112,117,116,101,114,
+99,111,110,116,97,105,110,115,97,114,99,104,105,118,101,115,109,105,110,105,115,
+116,101,114,114,101,97,99,116,105,111,110,100,105,115,99,111,117,110,116,73,116,
+97,108,105,97,110,111,99,114,105,116,101,114,105,97,115,116,114,111,110,103,108,
+121,58,32,39,104,116,116,112,58,39,115,99,114,105,112,116,39,99,111,118,101,114,
+105,110,103,111,102,102,101,114,105,110,103,97,112,112,101,97,114,101,100,66,114
+,105,116,105,115,104,32,105,100,101,110,116,105,102,121,70,97,99,101,98,111,111,
+107,110,117,109,101,114,111,117,115,118,101,104,105,99,108,101,115,99,111,110,99
+,101,114,110,115,65,109,101,114,105,99,97,110,104,97,110,100,108,105,110,103,100
+,105,118,32,105,100,61,34,87,105,108,108,105,97,109,32,112,114,111,118,105,100,
+101,114,95,99,111,110,116,101,110,116,97,99,99,117,114,97,99,121,115,101,99,116,
+105,111,110,32,97,110,100,101,114,115,111,110,102,108,101,120,105,98,108,101,67,
+97,116,101,103,111,114,121,108,97,119,114,101,110,99,101,60,115,99,114,105,112,
+116,62,108,97,121,111,117,116,61,34,97,112,112,114,111,118,101,100,32,109,97,120
+,105,109,117,109,104,101,97,100,101,114,34,62,60,47,116,97,98,108,101,62,83,101,
+114,118,105,99,101,115,104,97,109,105,108,116,111,110,99,117,114,114,101,110,116
+,32,99,97,110,97,100,105,97,110,99,104,97,110,110,101,108,115,47,116,104,101,109
+,101,115,47,47,97,114,116,105,99,108,101,111,112,116,105,111,110,97,108,112,111,
+114,116,117,103,97,108,118,97,108,117,101,61,34,34,105,110,116,101,114,118,97,
+108,119,105,114,101,108,101,115,115,101,110,116,105,116,108,101,100,97,103,101,
+110,99,105,101,115,83,101,97,114,99,104,34,32,109,101,97,115,117,114,101,100,116
+,104,111,117,115,97,110,100,115,112,101,110,100,105,110,103,38,104,101,108,108,
+105,112,59,110,101,119,32,68,97,116,101,34,32,115,105,122,101,61,34,112,97,103,
+101,78,97,109,101,109,105,100,100,108,101,34,32,34,32,47,62,60,47,97,62,104,105,
+100,100,101,110,34,62,115,101,113,117,101,110,99,101,112,101,114,115,111,110,97,
+108,111,118,101,114,102,108,111,119,111,112,105,110,105,111,110,115,105,108,108,
+105,110,111,105,115,108,105,110,107,115,34,62,10,9,60,116,105,116,108,101,62,118
+,101,114,115,105,111,110,115,115,97,116,117,114,100,97,121,116,101,114,109,105,
+110,97,108,105,116,101,109,112,114,111,112,101,110,103,105,110,101,101,114,115,
+101,99,116,105,111,110,115,100,101,115,105,103,110,101,114,112,114,111,112,111,
+115,97,108,61,34,102,97,108,115,101,34,69,115,112,97,195,177,111,108,114,101,108
+,101,97,115,101,115,115,117,98,109,105,116,34,32,101,114,38,113,117,111,116,59,
+97,100,100,105,116,105,111,110,115,121,109,112,116,111,109,115,111,114,105,101,
+110,116,101,100,114,101,115,111,117,114,99,101,114,105,103,104,116,34,62,60,112,
+108,101,97,115,117,114,101,115,116,97,116,105,111,110,115,104,105,115,116,111,
+114,121,46,108,101,97,118,105,110,103,32,32,98,111,114,100,101,114,61,99,111,110
+,116,101,110,116,115,99,101,110,116,101,114,34,62,46,10,10,83,111,109,101,32,100
+,105,114,101,99,116,101,100,115,117,105,116,97,98,108,101,98,117,108,103,97,114,
+105,97,46,115,104,111,119,40,41,59,100,101,115,105,103,110,101,100,71,101,110,
+101,114,97,108,32,99,111,110,99,101,112,116,115,69,120,97,109,112,108,101,115,
+119,105,108,108,105,97,109,115,79,114,105,103,105,110,97,108,34,62,60,115,112,97
+,110,62,115,101,97,114,99,104,34,62,111,112,101,114,97,116,111,114,114,101,113,
+117,101,115,116,115,97,32,38,113,117,111,116,59,97,108,108,111,119,105,110,103,
+68,111,99,117,109,101,110,116,114,101,118,105,115,105,111,110,46,32,10,10,84,104
+,101,32,121,111,117,114,115,101,108,102,67,111,110,116,97,99,116,32,109,105,99,
+104,105,103,97,110,69,110,103,108,105,115,104,32,99,111,108,117,109,98,105,97,
+112,114,105,111,114,105,116,121,112,114,105,110,116,105,110,103,100,114,105,110,
+107,105,110,103,102,97,99,105,108,105,116,121,114,101,116,117,114,110,101,100,67
+,111,110,116,101,110,116,32,111,102,102,105,99,101,114,115,82,117,115,115,105,97
+,110,32,103,101,110,101,114,97,116,101,45,56,56,53,57,45,49,34,105,110,100,105,
+99,97,116,101,102,97,109,105,108,105,97,114,32,113,117,97,108,105,116,121,109,97
+,114,103,105,110,58,48,32,99,111,110,116,101,110,116,118,105,101,119,112,111,114
+,116,99,111,110,116,97,99,116,115,45,116,105,116,108,101,34,62,112,111,114,116,
+97,98,108,101,46,108,101,110,103,116,104,32,101,108,105,103,105,98,108,101,105,
+110,118,111,108,118,101,115,97,116,108,97,110,116,105,99,111,110,108,111,97,100,
+61,34,100,101,102,97,117,108,116,46,115,117,112,112,108,105,101,100,112,97,121,
+109,101,110,116,115,103,108,111,115,115,97,114,121,10,10,65,102,116,101,114,32,
+103,117,105,100,97,110,99,101,60,47,116,100,62,60,116,100,101,110,99,111,100,105
+,110,103,109,105,100,100,108,101,34,62,99,97,109,101,32,116,111,32,100,105,115,
+112,108,97,121,115,115,99,111,116,116,105,115,104,106,111,110,97,116,104,97,110,
+109,97,106,111,114,105,116,121,119,105,100,103,101,116,115,46,99,108,105,110,105
+,99,97,108,116,104,97,105,108,97,110,100,116,101,97,99,104,101,114,115,60,104,
+101,97,100,62,10,9,97,102,102,101,99,116,101,100,115,117,112,112,111,114,116,115
+,112,111,105,110,116,101,114,59,116,111,83,116,114,105,110,103,60,47,115,109,97,
+108,108,62,111,107,108,97,104,111,109,97,119,105,108,108,32,98,101,32,105,110,
+118,101,115,116,111,114,48,34,32,97,108,116,61,34,104,111,108,105,100,97,121,115
+,82,101,115,111,117,114,99,101,108,105,99,101,110,115,101,100,32,40,119,104,105,
+99,104,32,46,32,65,102,116,101,114,32,99,111,110,115,105,100,101,114,118,105,115
+,105,116,105,110,103,101,120,112,108,111,114,101,114,112,114,105,109,97,114,121,
+32,115,101,97,114,99,104,34,32,97,110,100,114,111,105,100,34,113,117,105,99,107,
+108,121,32,109,101,101,116,105,110,103,115,101,115,116,105,109,97,116,101,59,114
+,101,116,117,114,110,32,59,99,111,108,111,114,58,35,32,104,101,105,103,104,116,
+61,97,112,112,114,111,118,97,108,44,32,38,113,117,111,116,59,32,99,104,101,99,
+107,101,100,46,109,105,110,46,106,115,34,109,97,103,110,101,116,105,99,62,60,47,
+97,62,60,47,104,102,111,114,101,99,97,115,116,46,32,87,104,105,108,101,32,116,
+104,117,114,115,100,97,121,100,118,101,114,116,105,115,101,38,101,97,99,117,116,
+101,59,104,97,115,67,108,97,115,115,101,118,97,108,117,97,116,101,111,114,100,
+101,114,105,110,103,101,120,105,115,116,105,110,103,112,97,116,105,101,110,116,
+115,32,79,110,108,105,110,101,32,99,111,108,111,114,97,100,111,79,112,116,105,
+111,110,115,34,99,97,109,112,98,101,108,108,60,33,45,45,32,101,110,100,60,47,115
+,112,97,110,62,60,60,98,114,32,47,62,13,10,95,112,111,112,117,112,115,124,115,99
+,105,101,110,99,101,115,44,38,113,117,111,116,59,32,113,117,97,108,105,116,121,
+32,87,105,110,100,111,119,115,32,97,115,115,105,103,110,101,100,104,101,105,103,
+104,116,58,32,60,98,32,99,108,97,115,115,108,101,38,113,117,111,116,59,32,118,97
+,108,117,101,61,34,32,67,111,109,112,97,110,121,101,120,97,109,112,108,101,115,
+60,105,102,114,97,109,101,32,98,101,108,105,101,118,101,115,112,114,101,115,101,
+110,116,115,109,97,114,115,104,97,108,108,112,97,114,116,32,111,102,32,112,114,
+111,112,101,114,108,121,41,46,10,10,84,104,101,32,116,97,120,111,110,111,109,121
+,109,117,99,104,32,111,102,32,60,47,115,112,97,110,62,10,34,32,100,97,116,97,45,
+115,114,116,117,103,117,195,170,115,115,99,114,111,108,108,84,111,32,112,114,111
+,106,101,99,116,60,104,101,97,100,62,13,10,97,116,116,111,114,110,101,121,101,
+109,112,104,97,115,105,115,115,112,111,110,115,111,114,115,102,97,110,99,121,98,
+111,120,119,111,114,108,100,39,115,32,119,105,108,100,108,105,102,101,99,104,101
+,99,107,101,100,61,115,101,115,115,105,111,110,115,112,114,111,103,114,97,109,
+109,112,120,59,102,111,110,116,45,32,80,114,111,106,101,99,116,106,111,117,114,
+110,97,108,115,98,101,108,105,101,118,101,100,118,97,99,97,116,105,111,110,116,
+104,111,109,112,115,111,110,108,105,103,104,116,105,110,103,97,110,100,32,116,
+104,101,32,115,112,101,99,105,97,108,32,98,111,114,100,101,114,61,48,99,104,101,
+99,107,105,110,103,60,47,116,98,111,100,121,62,60,98,117,116,116,111,110,32,67,
+111,109,112,108,101,116,101,99,108,101,97,114,102,105,120,10,60,104,101,97,100,
+62,10,97,114,116,105,99,108,101,32,60,115,101,99,116,105,111,110,102,105,110,100
+,105,110,103,115,114,111,108,101,32,105,110,32,112,111,112,117,108,97,114,32,32,
+79,99,116,111,98,101,114,119,101,98,115,105,116,101,32,101,120,112,111,115,117,
+114,101,117,115,101,100,32,116,111,32,32,99,104,97,110,103,101,115,111,112,101,
+114,97,116,101,100,99,108,105,99,107,105,110,103,101,110,116,101,114,105,110,103
+,99,111,109,109,97,110,100,115,105,110,102,111,114,109,101,100,32,110,117,109,98
+,101,114,115,32,32,60,47,100,105,118,62,99,114,101,97,116,105,110,103,111,110,83
+,117,98,109,105,116,109,97,114,121,108,97,110,100,99,111,108,108,101,103,101,115
+,97,110,97,108,121,116,105,99,108,105,115,116,105,110,103,115,99,111,110,116,97,
+99,116,46,108,111,103,103,101,100,73,110,97,100,118,105,115,111,114,121,115,105,
+98,108,105,110,103,115,99,111,110,116,101,110,116,34,115,38,113,117,111,116,59,
+41,115,46,32,84,104,105,115,32,112,97,99,107,97,103,101,115,99,104,101,99,107,98
+,111,120,115,117,103,103,101,115,116,115,112,114,101,103,110,97,110,116,116,111,
+109,111,114,114,111,119,115,112,97,99,105,110,103,61,105,99,111,110,46,112,110,
+103,106,97,112,97,110,101,115,101,99,111,100,101,98,97,115,101,98,117,116,116,
+111,110,34,62,103,97,109,98,108,105,110,103,115,117,99,104,32,97,115,32,44,32,
+119,104,105,108,101,32,60,47,115,112,97,110,62,32,109,105,115,115,111,117,114,
+105,115,112,111,114,116,105,110,103,116,111,112,58,49,112,120,32,46,60,47,115,
+112,97,110,62,116,101,110,115,105,111,110,115,119,105,100,116,104,61,34,50,108,
+97,122,121,108,111,97,100,110,111,118,101,109,98,101,114,117,115,101,100,32,105,
+110,32,104,101,105,103,104,116,61,34,99,114,105,112,116,34,62,10,38,110,98,115,
+112,59,60,47,60,116,114,62,60,116,100,32,104,101,105,103,104,116,58,50,47,112,
+114,111,100,117,99,116,99,111,117,110,116,114,121,32,105,110,99,108,117,100,101,
+32,102,111,111,116,101,114,34,32,38,108,116,59,33,45,45,32,116,105,116,108,101,
+34,62,60,47,106,113,117,101,114,121,46,60,47,102,111,114,109,62,10,40,231,174,
+128,228,189,147,41,40,231,185,129,233,171,148,41,104,114,118,97,116,115,107,105,
+105,116,97,108,105,97,110,111,114,111,109,195,162,110,196,131,116,195,188,114,
+107,195,167,101,216,167,216,177,216,175,217,136,116,97,109,98,105,195,169,110,
+110,111,116,105,99,105,97,115,109,101,110,115,97,106,101,115,112,101,114,115,111
+,110,97,115,100,101,114,101,99,104,111,115,110,97,99,105,111,110,97,108,115,101,
+114,118,105,99,105,111,99,111,110,116,97,99,116,111,117,115,117,97,114,105,111,
+115,112,114,111,103,114,97,109,97,103,111,98,105,101,114,110,111,101,109,112,114
+,101,115,97,115,97,110,117,110,99,105,111,115,118,97,108,101,110,99,105,97,99,
+111,108,111,109,98,105,97,100,101,115,112,117,195,169,115,100,101,112,111,114,
+116,101,115,112,114,111,121,101,99,116,111,112,114,111,100,117,99,116,111,112,
+195,186,98,108,105,99,111,110,111,115,111,116,114,111,115,104,105,115,116,111,
+114,105,97,112,114,101,115,101,110,116,101,109,105,108,108,111,110,101,115,109,
+101,100,105,97,110,116,101,112,114,101,103,117,110,116,97,97,110,116,101,114,105
+,111,114,114,101,99,117,114,115,111,115,112,114,111,98,108,101,109,97,115,97,110
+,116,105,97,103,111,110,117,101,115,116,114,111,115,111,112,105,110,105,195,179,
+110,105,109,112,114,105,109,105,114,109,105,101,110,116,114,97,115,97,109,195,
+169,114,105,99,97,118,101,110,100,101,100,111,114,115,111,99,105,101,100,97,100,
+114,101,115,112,101,99,116,111,114,101,97,108,105,122,97,114,114,101,103,105,115
+,116,114,111,112,97,108,97,98,114,97,115,105,110,116,101,114,195,169,115,101,110
+,116,111,110,99,101,115,101,115,112,101,99,105,97,108,109,105,101,109,98,114,111
+,115,114,101,97,108,105,100,97,100,99,195,179,114,100,111,98,97,122,97,114,97,
+103,111,122,97,112,195,161,103,105,110,97,115,115,111,99,105,97,108,101,115,98,
+108,111,113,117,101,97,114,103,101,115,116,105,195,179,110,97,108,113,117,105,
+108,101,114,115,105,115,116,101,109,97,115,99,105,101,110,99,105,97,115,99,111,
+109,112,108,101,116,111,118,101,114,115,105,195,179,110,99,111,109,112,108,101,
+116,97,101,115,116,117,100,105,111,115,112,195,186,98,108,105,99,97,111,98,106,
+101,116,105,118,111,97,108,105,99,97,110,116,101,98,117,115,99,97,100,111,114,99
+,97,110,116,105,100,97,100,101,110,116,114,97,100,97,115,97,99,99,105,111,110,
+101,115,97,114,99,104,105,118,111,115,115,117,112,101,114,105,111,114,109,97,121
+,111,114,195,173,97,97,108,101,109,97,110,105,97,102,117,110,99,105,195,179,110,
+195,186,108,116,105,109,111,115,104,97,99,105,101,110,100,111,97,113,117,101,108
+,108,111,115,101,100,105,99,105,195,179,110,102,101,114,110,97,110,100,111,97,
+109,98,105,101,110,116,101,102,97,99,101,98,111,111,107,110,117,101,115,116,114,
+97,115,99,108,105,101,110,116,101,115,112,114,111,99,101,115,111,115,98,97,115,
+116,97,110,116,101,112,114,101,115,101,110,116,97,114,101,112,111,114,116,97,114
+,99,111,110,103,114,101,115,111,112,117,98,108,105,99,97,114,99,111,109,101,114,
+99,105,111,99,111,110,116,114,97,116,111,106,195,179,118,101,110,101,115,100,105
+,115,116,114,105,116,111,116,195,169,99,110,105,99,97,99,111,110,106,117,110,116
+,111,101,110,101,114,103,195,173,97,116,114,97,98,97,106,97,114,97,115,116,117,
+114,105,97,115,114,101,99,105,101,110,116,101,117,116,105,108,105,122,97,114,98,
+111,108,101,116,195,173,110,115,97,108,118,97,100,111,114,99,111,114,114,101,99,
+116,97,116,114,97,98,97,106,111,115,112,114,105,109,101,114,111,115,110,101,103,
+111,99,105,111,115,108,105,98,101,114,116,97,100,100,101,116,97,108,108,101,115,
+112,97,110,116,97,108,108,97,112,114,195,179,120,105,109,111,97,108,109,101,114,
+195,173,97,97,110,105,109,97,108,101,115,113,117,105,195,169,110,101,115,99,111,
+114,97,122,195,179,110,115,101,99,99,105,195,179,110,98,117,115,99,97,110,100,
+111,111,112,99,105,111,110,101,115,101,120,116,101,114,105,111,114,99,111,110,99
+,101,112,116,111,116,111,100,97,118,195,173,97,103,97,108,101,114,195,173,97,101
+,115,99,114,105,98,105,114,109,101,100,105,99,105,110,97,108,105,99,101,110,99,
+105,97,99,111,110,115,117,108,116,97,97,115,112,101,99,116,111,115,99,114,195,
+173,116,105,99,97,100,195,179,108,97,114,101,115,106,117,115,116,105,99,105,97,
+100,101,98,101,114,195,161,110,112,101,114,195,173,111,100,111,110,101,99,101,
+115,105,116,97,109,97,110,116,101,110,101,114,112,101,113,117,101,195,177,111,
+114,101,99,105,98,105,100,97,116,114,105,98,117,110,97,108,116,101,110,101,114,
+105,102,101,99,97,110,99,105,195,179,110,99,97,110,97,114,105,97,115,100,101,115
+,99,97,114,103,97,100,105,118,101,114,115,111,115,109,97,108,108,111,114,99,97,
+114,101,113,117,105,101,114,101,116,195,169,99,110,105,99,111,100,101,98,101,114
+,195,173,97,118,105,118,105,101,110,100,97,102,105,110,97,110,122,97,115,97,100,
+101,108,97,110,116,101,102,117,110,99,105,111,110,97,99,111,110,115,101,106,111,
+115,100,105,102,195,173,99,105,108,99,105,117,100,97,100,101,115,97,110,116,105,
+103,117,97,115,97,118,97,110,122,97,100,97,116,195,169,114,109,105,110,111,117,
+110,105,100,97,100,101,115,115,195,161,110,99,104,101,122,99,97,109,112,97,195,
+177,97,115,111,102,116,111,110,105,99,114,101,118,105,115,116,97,115,99,111,110,
+116,105,101,110,101,115,101,99,116,111,114,101,115,109,111,109,101,110,116,111,
+115,102,97,99,117,108,116,97,100,99,114,195,169,100,105,116,111,100,105,118,101,
+114,115,97,115,115,117,112,117,101,115,116,111,102,97,99,116,111,114,101,115,115
+,101,103,117,110,100,111,115,112,101,113,117,101,195,177,97,208,179,208,190,208,
+180,208,176,208,181,209,129,208,187,208,184,208,181,209,129,209,130,209,140,208,
+177,209,139,208,187,208,190,208,177,209,139,209,130,209,140,209,141,209,130,208,
+190,208,188,208,149,209,129,208,187,208,184,209,130,208,190,208,179,208,190,208,
+188,208,181,208,189,209,143,208,178,209,129,208,181,209,133,209,141,209,130,208,
+190,208,185,208,180,208,176,208,182,208,181,208,177,209,139,208,187,208,184,208,
+179,208,190,208,180,209,131,208,180,208,181,208,189,209,140,209,141,209,130,208,
+190,209,130,208,177,209,139,208,187,208,176,209,129,208,181,208,177,209,143,208,
+190,208,180,208,184,208,189,209,129,208,181,208,177,208,181,208,189,208,176,208,
+180,208,190,209,129,208,176,208,185,209,130,209,132,208,190,209,130,208,190,208,
+189,208,181,208,179,208,190,209,129,208,178,208,190,208,184,209,129,208,178,208,
+190,208,185,208,184,208,179,209,128,209,139,209,130,208,190,208,182,208,181,208,
+178,209,129,208,181,208,188,209,129,208,178,208,190,209,142,208,187,208,184,209,
+136,209,140,209,141,209,130,208,184,209,133,208,191,208,190,208,186,208,176,208,
+180,208,189,208,181,208,185,208,180,208,190,208,188,208,176,208,188,208,184,209,
+128,208,176,208,187,208,184,208,177,208,190,209,130,208,181,208,188,209,131,209,
+133,208,190,209,130,209,143,208,180,208,178,209,131,209,133,209,129,208,181,209,
+130,208,184,208,187,209,142,208,180,208,184,208,180,208,181,208,187,208,190,208,
+188,208,184,209,128,208,181,209,130,208,181,208,177,209,143,209,129,208,178,208,
+190,208,181,208,178,208,184,208,180,208,181,209,135,208,181,208,179,208,190,209,
+141,209,130,208,184,208,188,209,129,209,135,208,181,209,130,209,130,208,181,208,
+188,209,139,209,134,208,181,208,189,209,139,209,129,209,130,208,176,208,187,208,
+178,208,181,208,180,209,140,209,130,208,181,208,188,208,181,208,178,208,190,208,
+180,209,139,209,130,208,181,208,177,208,181,208,178,209,139,209,136,208,181,208,
+189,208,176,208,188,208,184,209,130,208,184,208,191,208,176,209,130,208,190,208,
+188,209,131,208,191,209,128,208,176,208,178,208,187,208,184,209,134,208,176,208,
+190,208,180,208,189,208,176,208,179,208,190,208,180,209,139,208,183,208,189,208,
+176,209,142,208,188,208,190,208,179,209,131,208,180,209,128,209,131,208,179,208,
+178,209,129,208,181,208,185,208,184,208,180,208,181,209,130,208,186,208,184,208,
+189,208,190,208,190,208,180,208,189,208,190,208,180,208,181,208,187,208,176,208,
+180,208,181,208,187,208,181,209,129,209,128,208,190,208,186,208,184,209,142,208,
+189,209,143,208,178,208,181,209,129,209,140,208,149,209,129,209,130,209,140,209,
+128,208,176,208,183,208,176,208,189,208,176,209,136,208,184,216,167,217,132,217,
+132,217,135,216,167,217,132,216,170,217,138,216,172,217,133,217,138,216,185,216,
+174,216,167,216,181,216,169,216,167,217,132,216,176,217,138,216,185,217,132,217,
+138,217,135,216,172,216,175,217,138,216,175,216,167,217,132,216,162,217,134,216,
+167,217,132,216,177,216,175,216,170,216,173,217,131,217,133,216,181,217,129,216,
+173,216,169,217,131,216,167,217,134,216,170,216,167,217,132,217,132,217,138,217,
+138,217,131,217,136,217,134,216,180,216,168,217,131,216,169,217,129,217,138,217,
+135,216,167,216,168,217,134,216,167,216,170,216,173,217,136,216,167,216,161,216,
+163,217,131,216,171,216,177,216,174,217,132,216,167,217,132,216,167,217,132,216,
+173,216,168,216,175,217,132,217,138,217,132,216,175,216,177,217,136,216,179,216,
+167,216,182,216,186,216,183,216,170,217,131,217,136,217,134,217,135,217,134,216,
+167,217,131,216,179,216,167,216,173,216,169,217,134,216,167,216,175,217,138,216,
+167,217,132,216,183,216,168,216,185,217,132,217,138,217,131,216,180,217,131,216,
+177,216,167,217,138,217,133,217,131,217,134,217,133,217,134,217,135,216,167,216,
+180,216,177,217,131,216,169,216,177,216,166,217,138,216,179,217,134,216,180,217,
+138,216,183,217,133,216,167,216,176,216,167,216,167,217,132,217,129,217,134,216,
+180,216,168,216,167,216,168,216,170,216,185,216,168,216,177,216,177,216,173,217,
+133,216,169,217,131,216,167,217,129,216,169,217,138,217,130,217,136,217,132,217,
+133,216,177,217,131,216,178,217,131,217,132,217,133,216,169,216,163,216,173,217,
+133,216,175,217,130,217,132,216,168,217,138,217,138,216,185,217,134,217,138,216,
+181,217,136,216,177,216,169,216,183,216,177,217,138,217,130,216,180,216,167,216,
+177,217,131,216,172,217,136,216,167,217,132,216,163,216,174,216,177,217,137,217,
+133,216,185,217,134,216,167,216,167,216,168,216,173,216,171,216,185,216,177,217,
+136,216,182,216,168,216,180,217,131,217,132,217,133,216,179,216,172,217,132,216,
+168,217,134,216,167,217,134,216,174,216,167,217,132,216,175,217,131,216,170,216,
+167,216,168,217,131,217,132,217,138,216,169,216,168,216,175,217,136,217,134,216,
+163,217,138,216,182,216,167,217,138,217,136,216,172,216,175,217,129,216,177,217,
+138,217,130,217,131,216,170,216,168,216,170,216,163,217,129,216,182,217,132,217,
+133,216,183,216,168,216,174,216,167,217,131,216,171,216,177,216,168,216,167,216,
+177,217,131,216,167,217,129,216,182,217,132,216,167,216,173,217,132,217,137,217,
+134,217,129,216,179,217,135,216,163,217,138,216,167,217,133,216,177,216,175,217,
+136,216,175,216,163,217,134,217,135,216,167,216,175,217,138,217,134,216,167,216,
+167,217,132,216,167,217,134,217,133,216,185,216,177,216,182,216,170,216,185,217,
+132,217,133,216,175,216,167,216,174,217,132,217,133,217,133,217,131,217,134,0,0,
+0,0,0,0,0,0,1,0,1,0,1,0,1,0,2,0,2,0,2,0,2,0,4,0,4,0,4,0,4,0,0,1,2,3,4,5,6,7,7,6,
+5,4,3,2,1,0,8,9,10,11,12,13,14,15,15,14,13,12,11,10,9,8,16,17,18,19,20,21,22,23,
+23,22,21,20,19,18,17,16,24,25,26,27,28,29,30,31,31,30,29,28,27,26,25,24,255,255,
+255,255,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,2,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,
+3,0,0,0,255,255,0,1,0,0,0,1,0,0,255,255,0,1,0,0,0,8,0,8,0,8,0,8,0,0,0,1,0,2,0,3,
+0,4,0,5,0,6,0,7,114,101,115,111,117,114,99,101,115,99,111,117,110,116,114,105,
+101,115,113,117,101,115,116,105,111,110,115,101,113,117,105,112,109,101,110,116,
+99,111,109,109,117,110,105,116,121,97,118,97,105,108,97,98,108,101,104,105,103,
+104,108,105,103,104,116,68,84,68,47,120,104,116,109,108,109,97,114,107,101,116,
+105,110,103,107,110,111,119,108,101,100,103,101,115,111,109,101,116,104,105,110,
+103,99,111,110,116,97,105,110,101,114,100,105,114,101,99,116,105,111,110,115,117
+,98,115,99,114,105,98,101,97,100,118,101,114,116,105,115,101,99,104,97,114,97,99
+,116,101,114,34,32,118,97,108,117,101,61,34,60,47,115,101,108,101,99,116,62,65,
+117,115,116,114,97,108,105,97,34,32,99,108,97,115,115,61,34,115,105,116,117,97,
+116,105,111,110,97,117,116,104,111,114,105,116,121,102,111,108,108,111,119,105,
+110,103,112,114,105,109,97,114,105,108,121,111,112,101,114,97,116,105,111,110,99
+,104,97,108,108,101,110,103,101,100,101,118,101,108,111,112,101,100,97,110,111,
+110,121,109,111,117,115,102,117,110,99,116,105,111,110,32,102,117,110,99,116,105
+,111,110,115,99,111,109,112,97,110,105,101,115,115,116,114,117,99,116,117,114,
+101,97,103,114,101,101,109,101,110,116,34,32,116,105,116,108,101,61,34,112,111,
+116,101,110,116,105,97,108,101,100,117,99,97,116,105,111,110,97,114,103,117,109,
+101,110,116,115,115,101,99,111,110,100,97,114,121,99,111,112,121,114,105,103,104
+,116,108,97,110,103,117,97,103,101,115,101,120,99,108,117,115,105,118,101,99,111
+,110,100,105,116,105,111,110,60,47,102,111,114,109,62,13,10,115,116,97,116,101,
+109,101,110,116,97,116,116,101,110,116,105,111,110,66,105,111,103,114,97,112,104
+,121,125,32,101,108,115,101,32,123,10,115,111,108,117,116,105,111,110,115,119,
+104,101,110,32,116,104,101,32,65,110,97,108,121,116,105,99,115,116,101,109,112,
+108,97,116,101,115,100,97,110,103,101,114,111,117,115,115,97,116,101,108,108,105
+,116,101,100,111,99,117,109,101,110,116,115,112,117,98,108,105,115,104,101,114,
+105,109,112,111,114,116,97,110,116,112,114,111,116,111,116,121,112,101,105,110,
+102,108,117,101,110,99,101,38,114,97,113,117,111,59,60,47,101,102,102,101,99,116
+,105,118,101,103,101,110,101,114,97,108,108,121,116,114,97,110,115,102,111,114,
+109,98,101,97,117,116,105,102,117,108,116,114,97,110,115,112,111,114,116,111,114
+,103,97,110,105,122,101,100,112,117,98,108,105,115,104,101,100,112,114,111,109,
+105,110,101,110,116,117,110,116,105,108,32,116,104,101,116,104,117,109,98,110,97
+,105,108,78,97,116,105,111,110,97,108,32,46,102,111,99,117,115,40,41,59,111,118,
+101,114,32,116,104,101,32,109,105,103,114,97,116,105,111,110,97,110,110,111,117,
+110,99,101,100,102,111,111,116,101,114,34,62,10,101,120,99,101,112,116,105,111,
+110,108,101,115,115,32,116,104,97,110,101,120,112,101,110,115,105,118,101,102,
+111,114,109,97,116,105,111,110,102,114,97,109,101,119,111,114,107,116,101,114,
+114,105,116,111,114,121,110,100,105,99,97,116,105,111,110,99,117,114,114,101,110
+,116,108,121,99,108,97,115,115,78,97,109,101,99,114,105,116,105,99,105,115,109,
+116,114,97,100,105,116,105,111,110,101,108,115,101,119,104,101,114,101,65,108,
+101,120,97,110,100,101,114,97,112,112,111,105,110,116,101,100,109,97,116,101,114
+,105,97,108,115,98,114,111,97,100,99,97,115,116,109,101,110,116,105,111,110,101,
+100,97,102,102,105,108,105,97,116,101,60,47,111,112,116,105,111,110,62,116,114,
+101,97,116,109,101,110,116,100,105,102,102,101,114,101,110,116,47,100,101,102,97
+,117,108,116,46,80,114,101,115,105,100,101,110,116,111,110,99,108,105,99,107,61,
+34,98,105,111,103,114,97,112,104,121,111,116,104,101,114,119,105,115,101,112,101
+,114,109,97,110,101,110,116,70,114,97,110,195,167,97,105,115,72,111,108,108,121,
+119,111,111,100,101,120,112,97,110,115,105,111,110,115,116,97,110,100,97,114,100
+,115,60,47,115,116,121,108,101,62,10,114,101,100,117,99,116,105,111,110,68,101,
+99,101,109,98,101,114,32,112,114,101,102,101,114,114,101,100,67,97,109,98,114,
+105,100,103,101,111,112,112,111,110,101,110,116,115,66,117,115,105,110,101,115,
+115,32,99,111,110,102,117,115,105,111,110,62,10,60,116,105,116,108,101,62,112,
+114,101,115,101,110,116,101,100,101,120,112,108,97,105,110,101,100,100,111,101,
+115,32,110,111,116,32,119,111,114,108,100,119,105,100,101,105,110,116,101,114,
+102,97,99,101,112,111,115,105,116,105,111,110,115,110,101,119,115,112,97,112,101
+,114,60,47,116,97,98,108,101,62,10,109,111,117,110,116,97,105,110,115,108,105,
+107,101,32,116,104,101,32,101,115,115,101,110,116,105,97,108,102,105,110,97,110,
+99,105,97,108,115,101,108,101,99,116,105,111,110,97,99,116,105,111,110,61,34,47,
+97,98,97,110,100,111,110,101,100,69,100,117,99,97,116,105,111,110,112,97,114,115
+,101,73,110,116,40,115,116,97,98,105,108,105,116,121,117,110,97,98,108,101,32,
+116,111,60,47,116,105,116,108,101,62,10,114,101,108,97,116,105,111,110,115,78,
+111,116,101,32,116,104,97,116,101,102,102,105,99,105,101,110,116,112,101,114,102
+,111,114,109,101,100,116,119,111,32,121,101,97,114,115,83,105,110,99,101,32,116,
+104,101,116,104,101,114,101,102,111,114,101,119,114,97,112,112,101,114,34,62,97,
+108,116,101,114,110,97,116,101,105,110,99,114,101,97,115,101,100,66,97,116,116,
+108,101,32,111,102,112,101,114,99,101,105,118,101,100,116,114,121,105,110,103,32
+,116,111,110,101,99,101,115,115,97,114,121,112,111,114,116,114,97,121,101,100,
+101,108,101,99,116,105,111,110,115,69,108,105,122,97,98,101,116,104,60,47,105,
+102,114,97,109,101,62,100,105,115,99,111,118,101,114,121,105,110,115,117,114,97,
+110,99,101,115,46,108,101,110,103,116,104,59,108,101,103,101,110,100,97,114,121,
+71,101,111,103,114,97,112,104,121,99,97,110,100,105,100,97,116,101,99,111,114,
+112,111,114,97,116,101,115,111,109,101,116,105,109,101,115,115,101,114,118,105,
+99,101,115,46,105,110,104,101,114,105,116,101,100,60,47,115,116,114,111,110,103,
+62,67,111,109,109,117,110,105,116,121,114,101,108,105,103,105,111,117,115,108,
+111,99,97,116,105,111,110,115,67,111,109,109,105,116,116,101,101,98,117,105,108,
+100,105,110,103,115,116,104,101,32,119,111,114,108,100,110,111,32,108,111,110,
+103,101,114,98,101,103,105,110,110,105,110,103,114,101,102,101,114,101,110,99,
+101,99,97,110,110,111,116,32,98,101,102,114,101,113,117,101,110,99,121,116,121,
+112,105,99,97,108,108,121,105,110,116,111,32,116,104,101,32,114,101,108,97,116,
+105,118,101,59,114,101,99,111,114,100,105,110,103,112,114,101,115,105,100,101,
+110,116,105,110,105,116,105,97,108,108,121,116,101,99,104,110,105,113,117,101,
+116,104,101,32,111,116,104,101,114,105,116,32,99,97,110,32,98,101,101,120,105,
+115,116,101,110,99,101,117,110,100,101,114,108,105,110,101,116,104,105,115,32,
+116,105,109,101,116,101,108,101,112,104,111,110,101,105,116,101,109,115,99,111,
+112,101,112,114,97,99,116,105,99,101,115,97,100,118,97,110,116,97,103,101,41,59,
+114,101,116,117,114,110,32,70,111,114,32,111,116,104,101,114,112,114,111,118,105
+,100,105,110,103,100,101,109,111,99,114,97,99,121,98,111,116,104,32,116,104,101,
+32,101,120,116,101,110,115,105,118,101,115,117,102,102,101,114,105,110,103,115,
+117,112,112,111,114,116,101,100,99,111,109,112,117,116,101,114,115,32,102,117,
+110,99,116,105,111,110,112,114,97,99,116,105,99,97,108,115,97,105,100,32,116,104
+,97,116,105,116,32,109,97,121,32,98,101,69,110,103,108,105,115,104,60,47,102,114
+,111,109,32,116,104,101,32,115,99,104,101,100,117,108,101,100,100,111,119,110,
+108,111,97,100,115,60,47,108,97,98,101,108,62,10,115,117,115,112,101,99,116,101,
+100,109,97,114,103,105,110,58,32,48,115,112,105,114,105,116,117,97,108,60,47,104
+,101,97,100,62,10,10,109,105,99,114,111,115,111,102,116,103,114,97,100,117,97,
+108,108,121,100,105,115,99,117,115,115,101,100,104,101,32,98,101,99,97,109,101,
+101,120,101,99,117,116,105,118,101,106,113,117,101,114,121,46,106,115,104,111,
+117,115,101,104,111,108,100,99,111,110,102,105,114,109,101,100,112,117,114,99,
+104,97,115,101,100,108,105,116,101,114,97,108,108,121,100,101,115,116,114,111,
+121,101,100,117,112,32,116,111,32,116,104,101,118,97,114,105,97,116,105,111,110,
+114,101,109,97,105,110,105,110,103,105,116,32,105,115,32,110,111,116,99,101,110,
+116,117,114,105,101,115,74,97,112,97,110,101,115,101,32,97,109,111,110,103,32,
+116,104,101,99,111,109,112,108,101,116,101,100,97,108,103,111,114,105,116,104,
+109,105,110,116,101,114,101,115,116,115,114,101,98,101,108,108,105,111,110,117,
+110,100,101,102,105,110,101,100,101,110,99,111,117,114,97,103,101,114,101,115,
+105,122,97,98,108,101,105,110,118,111,108,118,105,110,103,115,101,110,115,105,
+116,105,118,101,117,110,105,118,101,114,115,97,108,112,114,111,118,105,115,105,
+111,110,40,97,108,116,104,111,117,103,104,102,101,97,116,117,114,105,110,103,99,
+111,110,100,117,99,116,101,100,41,44,32,119,104,105,99,104,32,99,111,110,116,105
+,110,117,101,100,45,104,101,97,100,101,114,34,62,70,101,98,114,117,97,114,121,32
+,110,117,109,101,114,111,117,115,32,111,118,101,114,102,108,111,119,58,99,111,
+109,112,111,110,101,110,116,102,114,97,103,109,101,110,116,115,101,120,99,101,
+108,108,101,110,116,99,111,108,115,112,97,110,61,34,116,101,99,104,110,105,99,97
+,108,110,101,97,114,32,116,104,101,32,65,100,118,97,110,99,101,100,32,115,111,
+117,114,99,101,32,111,102,101,120,112,114,101,115,115,101,100,72,111,110,103,32,
+75,111,110,103,32,70,97,99,101,98,111,111,107,109,117,108,116,105,112,108,101,32
+,109,101,99,104,97,110,105,115,109,101,108,101,118,97,116,105,111,110,111,102,
+102,101,110,115,105,118,101,60,47,102,111,114,109,62,10,9,115,112,111,110,115,
+111,114,101,100,100,111,99,117,109,101,110,116,46,111,114,32,38,113,117,111,116,
+59,116,104,101,114,101,32,97,114,101,116,104,111,115,101,32,119,104,111,109,111,
+118,101,109,101,110,116,115,112,114,111,99,101,115,115,101,115,100,105,102,102,
+105,99,117,108,116,115,117,98,109,105,116,116,101,100,114,101,99,111,109,109,101
+,110,100,99,111,110,118,105,110,99,101,100,112,114,111,109,111,116,105,110,103,
+34,32,119,105,100,116,104,61,34,46,114,101,112,108,97,99,101,40,99,108,97,115,
+115,105,99,97,108,99,111,97,108,105,116,105,111,110,104,105,115,32,102,105,114,
+115,116,100,101,99,105,115,105,111,110,115,97,115,115,105,115,116,97,110,116,105
+,110,100,105,99,97,116,101,100,101,118,111,108,117,116,105,111,110,45,119,114,97
+,112,112,101,114,34,101,110,111,117,103,104,32,116,111,97,108,111,110,103,32,116
+,104,101,100,101,108,105,118,101,114,101,100,45,45,62,13,10,60,33,45,45,65,109,
+101,114,105,99,97,110,32,112,114,111,116,101,99,116,101,100,78,111,118,101,109,
+98,101,114,32,60,47,115,116,121,108,101,62,60,102,117,114,110,105,116,117,114,
+101,73,110,116,101,114,110,101,116,32,32,111,110,98,108,117,114,61,34,115,117,
+115,112,101,110,100,101,100,114,101,99,105,112,105,101,110,116,98,97,115,101,100
+,32,111,110,32,77,111,114,101,111,118,101,114,44,97,98,111,108,105,115,104,101,
+100,99,111,108,108,101,99,116,101,100,119,101,114,101,32,109,97,100,101,101,109,
+111,116,105,111,110,97,108,101,109,101,114,103,101,110,99,121,110,97,114,114,97,
+116,105,118,101,97,100,118,111,99,97,116,101,115,112,120,59,98,111,114,100,101,
+114,99,111,109,109,105,116,116,101,100,100,105,114,61,34,108,116,114,34,101,109,
+112,108,111,121,101,101,115,114,101,115,101,97,114,99,104,46,32,115,101,108,101,
+99,116,101,100,115,117,99,99,101,115,115,111,114,99,117,115,116,111,109,101,114,
+115,100,105,115,112,108,97,121,101,100,83,101,112,116,101,109,98,101,114,97,100,
+100,67,108,97,115,115,40,70,97,99,101,98,111,111,107,32,115,117,103,103,101,115,
+116,101,100,97,110,100,32,108,97,116,101,114,111,112,101,114,97,116,105,110,103,
+101,108,97,98,111,114,97,116,101,83,111,109,101,116,105,109,101,115,73,110,115,
+116,105,116,117,116,101,99,101,114,116,97,105,110,108,121,105,110,115,116,97,108
+,108,101,100,102,111,108,108,111,119,101,114,115,74,101,114,117,115,97,108,101,
+109,116,104,101,121,32,104,97,118,101,99,111,109,112,117,116,105,110,103,103,101
+,110,101,114,97,116,101,100,112,114,111,118,105,110,99,101,115,103,117,97,114,97
+,110,116,101,101,97,114,98,105,116,114,97,114,121,114,101,99,111,103,110,105,122
+,101,119,97,110,116,101,100,32,116,111,112,120,59,119,105,100,116,104,58,116,104
+,101,111,114,121,32,111,102,98,101,104,97,118,105,111,117,114,87,104,105,108,101
+,32,116,104,101,101,115,116,105,109,97,116,101,100,98,101,103,97,110,32,116,111,
+32,105,116,32,98,101,99,97,109,101,109,97,103,110,105,116,117,100,101,109,117,
+115,116,32,104,97,118,101,109,111,114,101,32,116,104,97,110,68,105,114,101,99,
+116,111,114,121,101,120,116,101,110,115,105,111,110,115,101,99,114,101,116,97,
+114,121,110,97,116,117,114,97,108,108,121,111,99,99,117,114,114,105,110,103,118,
+97,114,105,97,98,108,101,115,103,105,118,101,110,32,116,104,101,112,108,97,116,
+102,111,114,109,46,60,47,108,97,98,101,108,62,60,102,97,105,108,101,100,32,116,
+111,99,111,109,112,111,117,110,100,115,107,105,110,100,115,32,111,102,32,115,111
+,99,105,101,116,105,101,115,97,108,111,110,103,115,105,100,101,32,45,45,38,103,
+116,59,10,10,115,111,117,116,104,119,101,115,116,116,104,101,32,114,105,103,104,
+116,114,97,100,105,97,116,105,111,110,109,97,121,32,104,97,118,101,32,117,110,
+101,115,99,97,112,101,40,115,112,111,107,101,110,32,105,110,34,32,104,114,101,
+102,61,34,47,112,114,111,103,114,97,109,109,101,111,110,108,121,32,116,104,101,
+32,99,111,109,101,32,102,114,111,109,100,105,114,101,99,116,111,114,121,98,117,
+114,105,101,100,32,105,110,97,32,115,105,109,105,108,97,114,116,104,101,121,32,
+119,101,114,101,60,47,102,111,110,116,62,60,47,78,111,114,119,101,103,105,97,110
+,115,112,101,99,105,102,105,101,100,112,114,111,100,117,99,105,110,103,112,97,
+115,115,101,110,103,101,114,40,110,101,119,32,68,97,116,101,116,101,109,112,111,
+114,97,114,121,102,105,99,116,105,111,110,97,108,65,102,116,101,114,32,116,104,
+101,101,113,117,97,116,105,111,110,115,100,111,119,110,108,111,97,100,46,114,101
+,103,117,108,97,114,108,121,100,101,118,101,108,111,112,101,114,97,98,111,118,
+101,32,116,104,101,108,105,110,107,101,100,32,116,111,112,104,101,110,111,109,
+101,110,97,112,101,114,105,111,100,32,111,102,116,111,111,108,116,105,112,34,62,
+115,117,98,115,116,97,110,99,101,97,117,116,111,109,97,116,105,99,97,115,112,101
+,99,116,32,111,102,65,109,111,110,103,32,116,104,101,99,111,110,110,101,99,116,
+101,100,101,115,116,105,109,97,116,101,115,65,105,114,32,70,111,114,99,101,115,
+121,115,116,101,109,32,111,102,111,98,106,101,99,116,105,118,101,105,109,109,101
+,100,105,97,116,101,109,97,107,105,110,103,32,105,116,112,97,105,110,116,105,110
+,103,115,99,111,110,113,117,101,114,101,100,97,114,101,32,115,116,105,108,108,
+112,114,111,99,101,100,117,114,101,103,114,111,119,116,104,32,111,102,104,101,97
+,100,101,100,32,98,121,69,117,114,111,112,101,97,110,32,100,105,118,105,115,105,
+111,110,115,109,111,108,101,99,117,108,101,115,102,114,97,110,99,104,105,115,101
+,105,110,116,101,110,116,105,111,110,97,116,116,114,97,99,116,101,100,99,104,105
+,108,100,104,111,111,100,97,108,115,111,32,117,115,101,100,100,101,100,105,99,97
+,116,101,100,115,105,110,103,97,112,111,114,101,100,101,103,114,101,101,32,111,
+102,102,97,116,104,101,114,32,111,102,99,111,110,102,108,105,99,116,115,60,47,97
+,62,60,47,112,62,10,99,97,109,101,32,102,114,111,109,119,101,114,101,32,117,115,
+101,100,110,111,116,101,32,116,104,97,116,114,101,99,101,105,118,105,110,103,69,
+120,101,99,117,116,105,118,101,101,118,101,110,32,109,111,114,101,97,99,99,101,
+115,115,32,116,111,99,111,109,109,97,110,100,101,114,80,111,108,105,116,105,99,
+97,108,109,117,115,105,99,105,97,110,115,100,101,108,105,99,105,111,117,115,112,
+114,105,115,111,110,101,114,115,97,100,118,101,110,116,32,111,102,85,84,70,45,56
+,34,32,47,62,60,33,91,67,68,65,84,65,91,34,62,67,111,110,116,97,99,116,83,111,
+117,116,104,101,114,110,32,98,103,99,111,108,111,114,61,34,115,101,114,105,101,
+115,32,111,102,46,32,73,116,32,119,97,115,32,105,110,32,69,117,114,111,112,101,
+112,101,114,109,105,116,116,101,100,118,97,108,105,100,97,116,101,46,97,112,112,
+101,97,114,105,110,103,111,102,102,105,99,105,97,108,115,115,101,114,105,111,117
+,115,108,121,45,108,97,110,103,117,97,103,101,105,110,105,116,105,97,116,101,100
+,101,120,116,101,110,100,105,110,103,108,111,110,103,45,116,101,114,109,105,110,
+102,108,97,116,105,111,110,115,117,99,104,32,116,104,97,116,103,101,116,67,111,
+111,107,105,101,109,97,114,107,101,100,32,98,121,60,47,98,117,116,116,111,110,62
+,105,109,112,108,101,109,101,110,116,98,117,116,32,105,116,32,105,115,105,110,99
+,114,101,97,115,101,115,100,111,119,110,32,116,104,101,32,114,101,113,117,105,
+114,105,110,103,100,101,112,101,110,100,101,110,116,45,45,62,10,60,33,45,45,32,
+105,110,116,101,114,118,105,101,119,87,105,116,104,32,116,104,101,32,99,111,112,
+105,101,115,32,111,102,99,111,110,115,101,110,115,117,115,119,97,115,32,98,117,
+105,108,116,86,101,110,101,122,117,101,108,97,40,102,111,114,109,101,114,108,121
+,116,104,101,32,115,116,97,116,101,112,101,114,115,111,110,110,101,108,115,116,
+114,97,116,101,103,105,99,102,97,118,111,117,114,32,111,102,105,110,118,101,110,
+116,105,111,110,87,105,107,105,112,101,100,105,97,99,111,110,116,105,110,101,110
+,116,118,105,114,116,117,97,108,108,121,119,104,105,99,104,32,119,97,115,112,114
+,105,110,99,105,112,108,101,67,111,109,112,108,101,116,101,32,105,100,101,110,
+116,105,99,97,108,115,104,111,119,32,116,104,97,116,112,114,105,109,105,116,105,
+118,101,97,119,97,121,32,102,114,111,109,109,111,108,101,99,117,108,97,114,112,
+114,101,99,105,115,101,108,121,100,105,115,115,111,108,118,101,100,85,110,100,
+101,114,32,116,104,101,118,101,114,115,105,111,110,61,34,62,38,110,98,115,112,59
+,60,47,73,116,32,105,115,32,116,104,101,32,84,104,105,115,32,105,115,32,119,105,
+108,108,32,104,97,118,101,111,114,103,97,110,105,115,109,115,115,111,109,101,32,
+116,105,109,101,70,114,105,101,100,114,105,99,104,119,97,115,32,102,105,114,115,
+116,116,104,101,32,111,110,108,121,32,102,97,99,116,32,116,104,97,116,102,111,
+114,109,32,105,100,61,34,112,114,101,99,101,100,105,110,103,84,101,99,104,110,
+105,99,97,108,112,104,121,115,105,99,105,115,116,111,99,99,117,114,115,32,105,
+110,110,97,118,105,103,97,116,111,114,115,101,99,116,105,111,110,34,62,115,112,
+97,110,32,105,100,61,34,115,111,117,103,104,116,32,116,111,98,101,108,111,119,32
+,116,104,101,115,117,114,118,105,118,105,110,103,125,60,47,115,116,121,108,101,
+62,104,105,115,32,100,101,97,116,104,97,115,32,105,110,32,116,104,101,99,97,117,
+115,101,100,32,98,121,112,97,114,116,105,97,108,108,121,101,120,105,115,116,105,
+110,103,32,117,115,105,110,103,32,116,104,101,119,97,115,32,103,105,118,101,110,
+97,32,108,105,115,116,32,111,102,108,101,118,101,108,115,32,111,102,110,111,116,
+105,111,110,32,111,102,79,102,102,105,99,105,97,108,32,100,105,115,109,105,115,
+115,101,100,115,99,105,101,110,116,105,115,116,114,101,115,101,109,98,108,101,
+115,100,117,112,108,105,99,97,116,101,101,120,112,108,111,115,105,118,101,114,
+101,99,111,118,101,114,101,100,97,108,108,32,111,116,104,101,114,103,97,108,108,
+101,114,105,101,115,123,112,97,100,100,105,110,103,58,112,101,111,112,108,101,32
+,111,102,114,101,103,105,111,110,32,111,102,97,100,100,114,101,115,115,101,115,
+97,115,115,111,99,105,97,116,101,105,109,103,32,97,108,116,61,34,105,110,32,109,
+111,100,101,114,110,115,104,111,117,108,100,32,98,101,109,101,116,104,111,100,32
+,111,102,114,101,112,111,114,116,105,110,103,116,105,109,101,115,116,97,109,112,
+110,101,101,100,101,100,32,116,111,116,104,101,32,71,114,101,97,116,114,101,103,
+97,114,100,105,110,103,115,101,101,109,101,100,32,116,111,118,105,101,119,101,
+100,32,97,115,105,109,112,97,99,116,32,111,110,105,100,101,97,32,116,104,97,116,
+116,104,101,32,87,111,114,108,100,104,101,105,103,104,116,32,111,102,101,120,112
+,97,110,100,105,110,103,84,104,101,115,101,32,97,114,101,99,117,114,114,101,110,
+116,34,62,99,97,114,101,102,117,108,108,121,109,97,105,110,116,97,105,110,115,99
+,104,97,114,103,101,32,111,102,67,108,97,115,115,105,99,97,108,97,100,100,114,
+101,115,115,101,100,112,114,101,100,105,99,116,101,100,111,119,110,101,114,115,
+104,105,112,60,100,105,118,32,105,100,61,34,114,105,103,104,116,34,62,13,10,114,
+101,115,105,100,101,110,99,101,108,101,97,118,101,32,116,104,101,99,111,110,116,
+101,110,116,34,62,97,114,101,32,111,102,116,101,110,32,32,125,41,40,41,59,13,10,
+112,114,111,98,97,98,108,121,32,80,114,111,102,101,115,115,111,114,45,98,117,116
+,116,111,110,34,32,114,101,115,112,111,110,100,101,100,115,97,121,115,32,116,104
+,97,116,104,97,100,32,116,111,32,98,101,112,108,97,99,101,100,32,105,110,72,117,
+110,103,97,114,105,97,110,115,116,97,116,117,115,32,111,102,115,101,114,118,101,
+115,32,97,115,85,110,105,118,101,114,115,97,108,101,120,101,99,117,116,105,111,
+110,97,103,103,114,101,103,97,116,101,102,111,114,32,119,104,105,99,104,105,110,
+102,101,99,116,105,111,110,97,103,114,101,101,100,32,116,111,104,111,119,101,118
+,101,114,44,32,112,111,112,117,108,97,114,34,62,112,108,97,99,101,100,32,111,110
+,99,111,110,115,116,114,117,99,116,101,108,101,99,116,111,114,97,108,115,121,109
+,98,111,108,32,111,102,105,110,99,108,117,100,105,110,103,114,101,116,117,114,
+110,32,116,111,97,114,99,104,105,116,101,99,116,67,104,114,105,115,116,105,97,
+110,112,114,101,118,105,111,117,115,32,108,105,118,105,110,103,32,105,110,101,97
+,115,105,101,114,32,116,111,112,114,111,102,101,115,115,111,114,10,38,108,116,59
+,33,45,45,32,101,102,102,101,99,116,32,111,102,97,110,97,108,121,116,105,99,115,
+119,97,115,32,116,97,107,101,110,119,104,101,114,101,32,116,104,101,116,111,111,
+107,32,111,118,101,114,98,101,108,105,101,102,32,105,110,65,102,114,105,107,97,
+97,110,115,97,115,32,102,97,114,32,97,115,112,114,101,118,101,110,116,101,100,
+119,111,114,107,32,119,105,116,104,97,32,115,112,101,99,105,97,108,60,102,105,
+101,108,100,115,101,116,67,104,114,105,115,116,109,97,115,82,101,116,114,105,101
+,118,101,100,10,10,73,110,32,116,104,101,32,98,97,99,107,32,105,110,116,111,110,
+111,114,116,104,101,97,115,116,109,97,103,97,122,105,110,101,115,62,60,115,116,
+114,111,110,103,62,99,111,109,109,105,116,116,101,101,103,111,118,101,114,110,
+105,110,103,103,114,111,117,112,115,32,111,102,115,116,111,114,101,100,32,105,
+110,101,115,116,97,98,108,105,115,104,97,32,103,101,110,101,114,97,108,105,116,
+115,32,102,105,114,115,116,116,104,101,105,114,32,111,119,110,112,111,112,117,
+108,97,116,101,100,97,110,32,111,98,106,101,99,116,67,97,114,105,98,98,101,97,
+110,97,108,108,111,119,32,116,104,101,100,105,115,116,114,105,99,116,115,119,105
+,115,99,111,110,115,105,110,108,111,99,97,116,105,111,110,46,59,32,119,105,100,
+116,104,58,32,105,110,104,97,98,105,116,101,100,83,111,99,105,97,108,105,115,116
+,74,97,110,117,97,114,121,32,49,60,47,102,111,111,116,101,114,62,115,105,109,105
+,108,97,114,108,121,99,104,111,105,99,101,32,111,102,116,104,101,32,115,97,109,
+101,32,115,112,101,99,105,102,105,99,32,98,117,115,105,110,101,115,115,32,84,104
+,101,32,102,105,114,115,116,46,108,101,110,103,116,104,59,32,100,101,115,105,114
+,101,32,116,111,100,101,97,108,32,119,105,116,104,115,105,110,99,101,32,116,104,
+101,117,115,101,114,65,103,101,110,116,99,111,110,99,101,105,118,101,100,105,110
+,100,101,120,46,112,104,112,97,115,32,38,113,117,111,116,59,101,110,103,97,103,
+101,32,105,110,114,101,99,101,110,116,108,121,44,102,101,119,32,121,101,97,114,
+115,119,101,114,101,32,97,108,115,111,10,60,104,101,97,100,62,10,60,101,100,105,
+116,101,100,32,98,121,97,114,101,32,107,110,111,119,110,99,105,116,105,101,115,
+32,105,110,97,99,99,101,115,115,107,101,121,99,111,110,100,101,109,110,101,100,
+97,108,115,111,32,104,97,118,101,115,101,114,118,105,99,101,115,44,102,97,109,
+105,108,121,32,111,102,83,99,104,111,111,108,32,111,102,99,111,110,118,101,114,
+116,101,100,110,97,116,117,114,101,32,111,102,32,108,97,110,103,117,97,103,101,
+109,105,110,105,115,116,101,114,115,60,47,111,98,106,101,99,116,62,116,104,101,
+114,101,32,105,115,32,97,32,112,111,112,117,108,97,114,115,101,113,117,101,110,
+99,101,115,97,100,118,111,99,97,116,101,100,84,104,101,121,32,119,101,114,101,97
+,110,121,32,111,116,104,101,114,108,111,99,97,116,105,111,110,61,101,110,116,101
+,114,32,116,104,101,109,117,99,104,32,109,111,114,101,114,101,102,108,101,99,116
+,101,100,119,97,115,32,110,97,109,101,100,111,114,105,103,105,110,97,108,32,97,
+32,116,121,112,105,99,97,108,119,104,101,110,32,116,104,101,121,101,110,103,105,
+110,101,101,114,115,99,111,117,108,100,32,110,111,116,114,101,115,105,100,101,
+110,116,115,119,101,100,110,101,115,100,97,121,116,104,101,32,116,104,105,114,
+100,32,112,114,111,100,117,99,116,115,74,97,110,117,97,114,121,32,50,119,104,97,
+116,32,116,104,101,121,97,32,99,101,114,116,97,105,110,114,101,97,99,116,105,111
+,110,115,112,114,111,99,101,115,115,111,114,97,102,116,101,114,32,104,105,115,
+116,104,101,32,108,97,115,116,32,99,111,110,116,97,105,110,101,100,34,62,60,47,
+100,105,118,62,10,60,47,97,62,60,47,116,100,62,100,101,112,101,110,100,32,111,
+110,115,101,97,114,99,104,34,62,10,112,105,101,99,101,115,32,111,102,99,111,109,
+112,101,116,105,110,103,82,101,102,101,114,101,110,99,101,116,101,110,110,101,
+115,115,101,101,119,104,105,99,104,32,104,97,115,32,118,101,114,115,105,111,110,
+61,60,47,115,112,97,110,62,32,60,60,47,104,101,97,100,101,114,62,103,105,118,101
+,115,32,116,104,101,104,105,115,116,111,114,105,97,110,118,97,108,117,101,61,34,
+34,62,112,97,100,100,105,110,103,58,48,118,105,101,119,32,116,104,97,116,116,111
+,103,101,116,104,101,114,44,116,104,101,32,109,111,115,116,32,119,97,115,32,102,
+111,117,110,100,115,117,98,115,101,116,32,111,102,97,116,116,97,99,107,32,111,
+110,99,104,105,108,100,114,101,110,44,112,111,105,110,116,115,32,111,102,112,101
+,114,115,111,110,97,108,32,112,111,115,105,116,105,111,110,58,97,108,108,101,103
+,101,100,108,121,67,108,101,118,101,108,97,110,100,119,97,115,32,108,97,116,101,
+114,97,110,100,32,97,102,116,101,114,97,114,101,32,103,105,118,101,110,119,97,
+115,32,115,116,105,108,108,115,99,114,111,108,108,105,110,103,100,101,115,105,
+103,110,32,111,102,109,97,107,101,115,32,116,104,101,109,117,99,104,32,108,101,
+115,115,65,109,101,114,105,99,97,110,115,46,10,10,65,102,116,101,114,32,44,32,98
+,117,116,32,116,104,101,77,117,115,101,117,109,32,111,102,108,111,117,105,115,
+105,97,110,97,40,102,114,111,109,32,116,104,101,109,105,110,110,101,115,111,116,
+97,112,97,114,116,105,99,108,101,115,97,32,112,114,111,99,101,115,115,68,111,109
+,105,110,105,99,97,110,118,111,108,117,109,101,32,111,102,114,101,116,117,114,
+110,105,110,103,100,101,102,101,110,115,105,118,101,48,48,112,120,124,114,105,
+103,104,109,97,100,101,32,102,114,111,109,109,111,117,115,101,111,118,101,114,34
+,32,115,116,121,108,101,61,34,115,116,97,116,101,115,32,111,102,40,119,104,105,
+99,104,32,105,115,99,111,110,116,105,110,117,101,115,70,114,97,110,99,105,115,99
+,111,98,117,105,108,100,105,110,103,32,119,105,116,104,111,117,116,32,97,119,105
+,116,104,32,115,111,109,101,119,104,111,32,119,111,117,108,100,97,32,102,111,114
+,109,32,111,102,97,32,112,97,114,116,32,111,102,98,101,102,111,114,101,32,105,
+116,107,110,111,119,110,32,97,115,32,32,83,101,114,118,105,99,101,115,108,111,99
+,97,116,105,111,110,32,97,110,100,32,111,102,116,101,110,109,101,97,115,117,114,
+105,110,103,97,110,100,32,105,116,32,105,115,112,97,112,101,114,98,97,99,107,118
+,97,108,117,101,115,32,111,102,13,10,60,116,105,116,108,101,62,61,32,119,105,110
+,100,111,119,46,100,101,116,101,114,109,105,110,101,101,114,38,113,117,111,116,
+59,32,112,108,97,121,101,100,32,98,121,97,110,100,32,101,97,114,108,121,60,47,99
+,101,110,116,101,114,62,102,114,111,109,32,116,104,105,115,116,104,101,32,116,
+104,114,101,101,112,111,119,101,114,32,97,110,100,111,102,32,38,113,117,111,116,
+59,105,110,110,101,114,72,84,77,76,60,97,32,104,114,101,102,61,34,121,58,105,110
+,108,105,110,101,59,67,104,117,114,99,104,32,111,102,116,104,101,32,101,118,101,
+110,116,118,101,114,121,32,104,105,103,104,111,102,102,105,99,105,97,108,32,45,
+104,101,105,103,104,116,58,32,99,111,110,116,101,110,116,61,34,47,99,103,105,45,
+98,105,110,47,116,111,32,99,114,101,97,116,101,97,102,114,105,107,97,97,110,115,
+101,115,112,101,114,97,110,116,111,102,114,97,110,195,167,97,105,115,108,97,116,
+118,105,101,197,161,117,108,105,101,116,117,118,105,197,179,196,140,101,197,161,
+116,105,110,97,196,141,101,197,161,116,105,110,97,224,185,132,224,184,151,224,
+184,162,230,151,165,230,156,172,232,170,158,231,174,128,228,189,147,229,173,151,
+231,185,129,233,171,148,229,173,151,237,149,156,234,181,173,236,150,180,228,184,
+186,228,187,128,228,185,136,232,174,161,231,174,151,230,156,186,231,172,148,232,
+174,176,230,156,172,232,168,142,232,171,150,229,141,128,230,156,141,229,138,161,
+229,153,168,228,186,146,232,129,148,231,189,145,230,136,191,229,156,176,228,186,
+167,228,191,177,228,185,144,233,131,168,229,135,186,231,137,136,231,164,190,230,
+142,146,232,161,140,230,166,156,233,131,168,232,144,189,230,160,188,232,191,155,
+228,184,128,230,173,165,230,148,175,228,187,152,229,174,157,233,170,140,232,175,
+129,231,160,129,229,167,148,229,145,152,228,188,154,230,149,176,230,141,174,229,
+186,147,230,182,136,232,180,185,232,128,133,229,138,158,229,133,172,229,174,164,
+232,174,168,232,174,186,229,140,186,230,183,177,229,156,179,229,184,130,230,146,
+173,230,148,190,229,153,168,229,140,151,228,186,172,229,184,130,229,164,167,229,
+173,166,231,148,159,232,182,138,230,157,165,232,182,138,231,174,161,231,144,134,
+229,145,152,228,191,161,230,129,175,231,189,145,115,101,114,118,105,99,105,111,
+115,97,114,116,195,173,99,117,108,111,97,114,103,101,110,116,105,110,97,98,97,
+114,99,101,108,111,110,97,99,117,97,108,113,117,105,101,114,112,117,98,108,105,
+99,97,100,111,112,114,111,100,117,99,116,111,115,112,111,108,195,173,116,105,99,
+97,114,101,115,112,117,101,115,116,97,119,105,107,105,112,101,100,105,97,115,105
+,103,117,105,101,110,116,101,98,195,186,115,113,117,101,100,97,99,111,109,117,
+110,105,100,97,100,115,101,103,117,114,105,100,97,100,112,114,105,110,99,105,112
+,97,108,112,114,101,103,117,110,116,97,115,99,111,110,116,101,110,105,100,111,
+114,101,115,112,111,110,100,101,114,118,101,110,101,122,117,101,108,97,112,114,
+111,98,108,101,109,97,115,100,105,99,105,101,109,98,114,101,114,101,108,97,99,
+105,195,179,110,110,111,118,105,101,109,98,114,101,115,105,109,105,108,97,114,
+101,115,112,114,111,121,101,99,116,111,115,112,114,111,103,114,97,109,97,115,105
+,110,115,116,105,116,117,116,111,97,99,116,105,118,105,100,97,100,101,110,99,117
+,101,110,116,114,97,101,99,111,110,111,109,195,173,97,105,109,195,161,103,101,
+110,101,115,99,111,110,116,97,99,116,97,114,100,101,115,99,97,114,103,97,114,110
+,101,99,101,115,97,114,105,111,97,116,101,110,99,105,195,179,110,116,101,108,195
+,169,102,111,110,111,99,111,109,105,115,105,195,179,110,99,97,110,99,105,111,110
+,101,115,99,97,112,97,99,105,100,97,100,101,110,99,111,110,116,114,97,114,97,110
+,195,161,108,105,115,105,115,102,97,118,111,114,105,116,111,115,116,195,169,114,
+109,105,110,111,115,112,114,111,118,105,110,99,105,97,101,116,105,113,117,101,
+116,97,115,101,108,101,109,101,110,116,111,115,102,117,110,99,105,111,110,101,
+115,114,101,115,117,108,116,97,100,111,99,97,114,195,161,99,116,101,114,112,114,
+111,112,105,101,100,97,100,112,114,105,110,99,105,112,105,111,110,101,99,101,115
+,105,100,97,100,109,117,110,105,99,105,112,97,108,99,114,101,97,99,105,195,179,
+110,100,101,115,99,97,114,103,97,115,112,114,101,115,101,110,99,105,97,99,111,
+109,101,114,99,105,97,108,111,112,105,110,105,111,110,101,115,101,106,101,114,99
+,105,99,105,111,101,100,105,116,111,114,105,97,108,115,97,108,97,109,97,110,99,
+97,103,111,110,122,195,161,108,101,122,100,111,99,117,109,101,110,116,111,112,
+101,108,195,173,99,117,108,97,114,101,99,105,101,110,116,101,115,103,101,110,101
+,114,97,108,101,115,116,97,114,114,97,103,111,110,97,112,114,195,161,99,116,105,
+99,97,110,111,118,101,100,97,100,101,115,112,114,111,112,117,101,115,116,97,112,
+97,99,105,101,110,116,101,115,116,195,169,99,110,105,99,97,115,111,98,106,101,
+116,105,118,111,115,99,111,110,116,97,99,116,111,115,224,164,174,224,165,135,224
+,164,130,224,164,178,224,164,191,224,164,143,224,164,185,224,165,136,224,164,130
+,224,164,151,224,164,175,224,164,190,224,164,184,224,164,190,224,164,165,224,164
+,143,224,164,181,224,164,130,224,164,176,224,164,185,224,165,135,224,164,149,224
+,165,139,224,164,136,224,164,149,224,165,129,224,164,155,224,164,176,224,164,185
+,224,164,190,224,164,172,224,164,190,224,164,166,224,164,149,224,164,185,224,164
+,190,224,164,184,224,164,173,224,165,128,224,164,185,224,165,129,224,164,143,224
+,164,176,224,164,185,224,165,128,224,164,174,224,165,136,224,164,130,224,164,166
+,224,164,191,224,164,168,224,164,172,224,164,190,224,164,164,100,105,112,108,111
+,100,111,99,115,224,164,184,224,164,174,224,164,175,224,164,176,224,165,130,224,
+164,170,224,164,168,224,164,190,224,164,174,224,164,170,224,164,164,224,164,190,
+224,164,171,224,164,191,224,164,176,224,164,148,224,164,184,224,164,164,224,164,
+164,224,164,176,224,164,185,224,164,178,224,165,139,224,164,151,224,164,185,224,
+165,129,224,164,134,224,164,172,224,164,190,224,164,176,224,164,166,224,165,135,
+224,164,182,224,164,185,224,165,129,224,164,136,224,164,150,224,165,135,224,164,
+178,224,164,175,224,164,166,224,164,191,224,164,149,224,164,190,224,164,174,224,
+164,181,224,165,135,224,164,172,224,164,164,224,165,128,224,164,168,224,164,172,
+224,165,128,224,164,154,224,164,174,224,165,140,224,164,164,224,164,184,224,164,
+190,224,164,178,224,164,178,224,165,135,224,164,150,224,164,156,224,165,137,224,
+164,172,224,164,174,224,164,166,224,164,166,224,164,164,224,164,165,224,164,190,
+224,164,168,224,164,185,224,165,128,224,164,182,224,164,185,224,164,176,224,164,
+133,224,164,178,224,164,151,224,164,149,224,164,173,224,165,128,224,164,168,224,
+164,151,224,164,176,224,164,170,224,164,190,224,164,184,224,164,176,224,164,190,
+224,164,164,224,164,149,224,164,191,224,164,143,224,164,137,224,164,184,224,165,
+135,224,164,151,224,164,175,224,165,128,224,164,185,224,165,130,224,164,129,224,
+164,134,224,164,151,224,165,135,224,164,159,224,165,128,224,164,174,224,164,150,
+224,165,139,224,164,156,224,164,149,224,164,190,224,164,176,224,164,133,224,164,
+173,224,165,128,224,164,151,224,164,175,224,165,135,224,164,164,224,165,129,224,
+164,174,224,164,181,224,165,139,224,164,159,224,164,166,224,165,135,224,164,130,
+224,164,133,224,164,151,224,164,176,224,164,144,224,164,184,224,165,135,224,164,
+174,224,165,135,224,164,178,224,164,178,224,164,151,224,164,190,224,164,185,224,
+164,190,224,164,178,224,164,138,224,164,170,224,164,176,224,164,154,224,164,190,
+224,164,176,224,164,144,224,164,184,224,164,190,224,164,166,224,165,135,224,164,
+176,224,164,156,224,164,191,224,164,184,224,164,166,224,164,191,224,164,178,224,
+164,172,224,164,130,224,164,166,224,164,172,224,164,168,224,164,190,224,164,185,
+224,165,130,224,164,130,224,164,178,224,164,190,224,164,150,224,164,156,224,165,
+128,224,164,164,224,164,172,224,164,159,224,164,168,224,164,174,224,164,191,224,
+164,178,224,164,135,224,164,184,224,165,135,224,164,134,224,164,168,224,165,135,
+224,164,168,224,164,175,224,164,190,224,164,149,224,165,129,224,164,178,224,164,
+178,224,165,137,224,164,151,224,164,173,224,164,190,224,164,151,224,164,176,224,
+165,135,224,164,178,224,164,156,224,164,151,224,164,185,224,164,176,224,164,190,
+224,164,174,224,164,178,224,164,151,224,165,135,224,164,170,224,165,135,224,164,
+156,224,164,185,224,164,190,224,164,165,224,164,135,224,164,184,224,165,128,224,
+164,184,224,164,185,224,165,128,224,164,149,224,164,178,224,164,190,224,164,160,
+224,165,128,224,164,149,224,164,185,224,164,190,224,164,129,224,164,166,224,165,
+130,224,164,176,224,164,164,224,164,185,224,164,164,224,164,184,224,164,190,224,
+164,164,224,164,175,224,164,190,224,164,166,224,164,134,224,164,175,224,164,190,
+224,164,170,224,164,190,224,164,149,224,164,149,224,165,140,224,164,168,224,164,
+182,224,164,190,224,164,174,224,164,166,224,165,135,224,164,150,224,164,175,224,
+164,185,224,165,128,224,164,176,224,164,190,224,164,175,224,164,150,224,165,129,
+224,164,166,224,164,178,224,164,151,224,165,128,99,97,116,101,103,111,114,105,
+101,115,101,120,112,101,114,105,101,110,99,101,60,47,116,105,116,108,101,62,13,
+10,67,111,112,121,114,105,103,104,116,32,106,97,118,97,115,99,114,105,112,116,99
+,111,110,100,105,116,105,111,110,115,101,118,101,114,121,116,104,105,110,103,60,
+112,32,99,108,97,115,115,61,34,116,101,99,104,110,111,108,111,103,121,98,97,99,
+107,103,114,111,117,110,100,60,97,32,99,108,97,115,115,61,34,109,97,110,97,103,
+101,109,101,110,116,38,99,111,112,121,59,32,50,48,49,106,97,118,97,83,99,114,105
+,112,116,99,104,97,114,97,99,116,101,114,115,98,114,101,97,100,99,114,117,109,98
+,116,104,101,109,115,101,108,118,101,115,104,111,114,105,122,111,110,116,97,108,
+103,111,118,101,114,110,109,101,110,116,67,97,108,105,102,111,114,110,105,97,97,
+99,116,105,118,105,116,105,101,115,100,105,115,99,111,118,101,114,101,100,78,97,
+118,105,103,97,116,105,111,110,116,114,97,110,115,105,116,105,111,110,99,111,110
+,110,101,99,116,105,111,110,110,97,118,105,103,97,116,105,111,110,97,112,112,101
+,97,114,97,110,99,101,60,47,116,105,116,108,101,62,60,109,99,104,101,99,107,98,
+111,120,34,32,116,101,99,104,110,105,113,117,101,115,112,114,111,116,101,99,116,
+105,111,110,97,112,112,97,114,101,110,116,108,121,97,115,32,119,101,108,108,32,
+97,115,117,110,116,39,44,32,39,85,65,45,114,101,115,111,108,117,116,105,111,110,
+111,112,101,114,97,116,105,111,110,115,116,101,108,101,118,105,115,105,111,110,
+116,114,97,110,115,108,97,116,101,100,87,97,115,104,105,110,103,116,111,110,110,
+97,118,105,103,97,116,111,114,46,32,61,32,119,105,110,100,111,119,46,105,109,112
+,114,101,115,115,105,111,110,38,108,116,59,98,114,38,103,116,59,108,105,116,101,
+114,97,116,117,114,101,112,111,112,117,108,97,116,105,111,110,98,103,99,111,108,
+111,114,61,34,35,101,115,112,101,99,105,97,108,108,121,32,99,111,110,116,101,110
+,116,61,34,112,114,111,100,117,99,116,105,111,110,110,101,119,115,108,101,116,
+116,101,114,112,114,111,112,101,114,116,105,101,115,100,101,102,105,110,105,116,
+105,111,110,108,101,97,100,101,114,115,104,105,112,84,101,99,104,110,111,108,111
+,103,121,80,97,114,108,105,97,109,101,110,116,99,111,109,112,97,114,105,115,111,
+110,117,108,32,99,108,97,115,115,61,34,46,105,110,100,101,120,79,102,40,34,99,
+111,110,99,108,117,115,105,111,110,100,105,115,99,117,115,115,105,111,110,99,111
+,109,112,111,110,101,110,116,115,98,105,111,108,111,103,105,99,97,108,82,101,118
+,111,108,117,116,105,111,110,95,99,111,110,116,97,105,110,101,114,117,110,100,
+101,114,115,116,111,111,100,110,111,115,99,114,105,112,116,62,60,112,101,114,109
+,105,115,115,105,111,110,101,97,99,104,32,111,116,104,101,114,97,116,109,111,115
+,112,104,101,114,101,32,111,110,102,111,99,117,115,61,34,60,102,111,114,109,32,
+105,100,61,34,112,114,111,99,101,115,115,105,110,103,116,104,105,115,46,118,97,
+108,117,101,103,101,110,101,114,97,116,105,111,110,67,111,110,102,101,114,101,
+110,99,101,115,117,98,115,101,113,117,101,110,116,119,101,108,108,45,107,110,111
+,119,110,118,97,114,105,97,116,105,111,110,115,114,101,112,117,116,97,116,105,
+111,110,112,104,101,110,111,109,101,110,111,110,100,105,115,99,105,112,108,105,
+110,101,108,111,103,111,46,112,110,103,34,32,40,100,111,99,117,109,101,110,116,
+44,98,111,117,110,100,97,114,105,101,115,101,120,112,114,101,115,115,105,111,110
+,115,101,116,116,108,101,109,101,110,116,66,97,99,107,103,114,111,117,110,100,
+111,117,116,32,111,102,32,116,104,101,101,110,116,101,114,112,114,105,115,101,40
+,34,104,116,116,112,115,58,34,32,117,110,101,115,99,97,112,101,40,34,112,97,115,
+115,119,111,114,100,34,32,100,101,109,111,99,114,97,116,105,99,60,97,32,104,114,
+101,102,61,34,47,119,114,97,112,112,101,114,34,62,10,109,101,109,98,101,114,115,
+104,105,112,108,105,110,103,117,105,115,116,105,99,112,120,59,112,97,100,100,105
+,110,103,112,104,105,108,111,115,111,112,104,121,97,115,115,105,115,116,97,110,
+99,101,117,110,105,118,101,114,115,105,116,121,102,97,99,105,108,105,116,105,101
+,115,114,101,99,111,103,110,105,122,101,100,112,114,101,102,101,114,101,110,99,
+101,105,102,32,40,116,121,112,101,111,102,109,97,105,110,116,97,105,110,101,100,
+118,111,99,97,98,117,108,97,114,121,104,121,112,111,116,104,101,115,105,115,46,
+115,117,98,109,105,116,40,41,59,38,97,109,112,59,110,98,115,112,59,97,110,110,
+111,116,97,116,105,111,110,98,101,104,105,110,100,32,116,104,101,70,111,117,110,
+100,97,116,105,111,110,112,117,98,108,105,115,104,101,114,34,97,115,115,117,109,
+112,116,105,111,110,105,110,116,114,111,100,117,99,101,100,99,111,114,114,117,
+112,116,105,111,110,115,99,105,101,110,116,105,115,116,115,101,120,112,108,105,
+99,105,116,108,121,105,110,115,116,101,97,100,32,111,102,100,105,109,101,110,115
+,105,111,110,115,32,111,110,67,108,105,99,107,61,34,99,111,110,115,105,100,101,
+114,101,100,100,101,112,97,114,116,109,101,110,116,111,99,99,117,112,97,116,105,
+111,110,115,111,111,110,32,97,102,116,101,114,105,110,118,101,115,116,109,101,
+110,116,112,114,111,110,111,117,110,99,101,100,105,100,101,110,116,105,102,105,
+101,100,101,120,112,101,114,105,109,101,110,116,77,97,110,97,103,101,109,101,110
+,116,103,101,111,103,114,97,112,104,105,99,34,32,104,101,105,103,104,116,61,34,
+108,105,110,107,32,114,101,108,61,34,46,114,101,112,108,97,99,101,40,47,100,101,
+112,114,101,115,115,105,111,110,99,111,110,102,101,114,101,110,99,101,112,117,
+110,105,115,104,109,101,110,116,101,108,105,109,105,110,97,116,101,100,114,101,
+115,105,115,116,97,110,99,101,97,100,97,112,116,97,116,105,111,110,111,112,112,
+111,115,105,116,105,111,110,119,101,108,108,32,107,110,111,119,110,115,117,112,
+112,108,101,109,101,110,116,100,101,116,101,114,109,105,110,101,100,104,49,32,99
+,108,97,115,115,61,34,48,112,120,59,109,97,114,103,105,110,109,101,99,104,97,110
+,105,99,97,108,115,116,97,116,105,115,116,105,99,115,99,101,108,101,98,114,97,
+116,101,100,71,111,118,101,114,110,109,101,110,116,10,10,68,117,114,105,110,103,
+32,116,100,101,118,101,108,111,112,101,114,115,97,114,116,105,102,105,99,105,97,
+108,101,113,117,105,118,97,108,101,110,116,111,114,105,103,105,110,97,116,101,
+100,67,111,109,109,105,115,115,105,111,110,97,116,116,97,99,104,109,101,110,116,
+60,115,112,97,110,32,105,100,61,34,116,104,101,114,101,32,119,101,114,101,78,101
+,100,101,114,108,97,110,100,115,98,101,121,111,110,100,32,116,104,101,114,101,
+103,105,115,116,101,114,101,100,106,111,117,114,110,97,108,105,115,116,102,114,
+101,113,117,101,110,116,108,121,97,108,108,32,111,102,32,116,104,101,108,97,110,
+103,61,34,101,110,34,32,60,47,115,116,121,108,101,62,13,10,97,98,115,111,108,117
+,116,101,59,32,115,117,112,112,111,114,116,105,110,103,101,120,116,114,101,109,
+101,108,121,32,109,97,105,110,115,116,114,101,97,109,60,47,115,116,114,111,110,
+103,62,32,112,111,112,117,108,97,114,105,116,121,101,109,112,108,111,121,109,101
+,110,116,60,47,116,97,98,108,101,62,13,10,32,99,111,108,115,112,97,110,61,34,60,
+47,102,111,114,109,62,10,32,32,99,111,110,118,101,114,115,105,111,110,97,98,111,
+117,116,32,116,104,101,32,60,47,112,62,60,47,100,105,118,62,105,110,116,101,103,
+114,97,116,101,100,34,32,108,97,110,103,61,34,101,110,80,111,114,116,117,103,117
+,101,115,101,115,117,98,115,116,105,116,117,116,101,105,110,100,105,118,105,100,
+117,97,108,105,109,112,111,115,115,105,98,108,101,109,117,108,116,105,109,101,
+100,105,97,97,108,109,111,115,116,32,97,108,108,112,120,32,115,111,108,105,100,
+32,35,97,112,97,114,116,32,102,114,111,109,115,117,98,106,101,99,116,32,116,111,
+105,110,32,69,110,103,108,105,115,104,99,114,105,116,105,99,105,122,101,100,101,
+120,99,101,112,116,32,102,111,114,103,117,105,100,101,108,105,110,101,115,111,
+114,105,103,105,110,97,108,108,121,114,101,109,97,114,107,97,98,108,101,116,104,
+101,32,115,101,99,111,110,100,104,50,32,99,108,97,115,115,61,34,60,97,32,116,105
+,116,108,101,61,34,40,105,110,99,108,117,100,105,110,103,112,97,114,97,109,101,
+116,101,114,115,112,114,111,104,105,98,105,116,101,100,61,32,34,104,116,116,112,
+58,47,47,100,105,99,116,105,111,110,97,114,121,112,101,114,99,101,112,116,105,
+111,110,114,101,118,111,108,117,116,105,111,110,102,111,117,110,100,97,116,105,
+111,110,112,120,59,104,101,105,103,104,116,58,115,117,99,99,101,115,115,102,117,
+108,115,117,112,112,111,114,116,101,114,115,109,105,108,108,101,110,110,105,117,
+109,104,105,115,32,102,97,116,104,101,114,116,104,101,32,38,113,117,111,116,59,
+110,111,45,114,101,112,101,97,116,59,99,111,109,109,101,114,99,105,97,108,105,
+110,100,117,115,116,114,105,97,108,101,110,99,111,117,114,97,103,101,100,97,109,
+111,117,110,116,32,111,102,32,117,110,111,102,102,105,99,105,97,108,101,102,102,
+105,99,105,101,110,99,121,82,101,102,101,114,101,110,99,101,115,99,111,111,114,
+100,105,110,97,116,101,100,105,115,99,108,97,105,109,101,114,101,120,112,101,100
+,105,116,105,111,110,100,101,118,101,108,111,112,105,110,103,99,97,108,99,117,
+108,97,116,101,100,115,105,109,112,108,105,102,105,101,100,108,101,103,105,116,
+105,109,97,116,101,115,117,98,115,116,114,105,110,103,40,48,34,32,99,108,97,115,
+115,61,34,99,111,109,112,108,101,116,101,108,121,105,108,108,117,115,116,114,97,
+116,101,102,105,118,101,32,121,101,97,114,115,105,110,115,116,114,117,109,101,
+110,116,80,117,98,108,105,115,104,105,110,103,49,34,32,99,108,97,115,115,61,34,
+112,115,121,99,104,111,108,111,103,121,99,111,110,102,105,100,101,110,99,101,110
+,117,109,98,101,114,32,111,102,32,97,98,115,101,110,99,101,32,111,102,102,111,99
+,117,115,101,100,32,111,110,106,111,105,110,101,100,32,116,104,101,115,116,114,
+117,99,116,117,114,101,115,112,114,101,118,105,111,117,115,108,121,62,60,47,105,
+102,114,97,109,101,62,111,110,99,101,32,97,103,97,105,110,98,117,116,32,114,97,
+116,104,101,114,105,109,109,105,103,114,97,110,116,115,111,102,32,99,111,117,114
+,115,101,44,97,32,103,114,111,117,112,32,111,102,76,105,116,101,114,97,116,117,
+114,101,85,110,108,105,107,101,32,116,104,101,60,47,97,62,38,110,98,115,112,59,
+10,102,117,110,99,116,105,111,110,32,105,116,32,119,97,115,32,116,104,101,67,111
+,110,118,101,110,116,105,111,110,97,117,116,111,109,111,98,105,108,101,80,114,
+111,116,101,115,116,97,110,116,97,103,103,114,101,115,115,105,118,101,97,102,116
+,101,114,32,116,104,101,32,83,105,109,105,108,97,114,108,121,44,34,32,47,62,60,
+47,100,105,118,62,99,111,108,108,101,99,116,105,111,110,13,10,102,117,110,99,116
+,105,111,110,118,105,115,105,98,105,108,105,116,121,116,104,101,32,117,115,101,
+32,111,102,118,111,108,117,110,116,101,101,114,115,97,116,116,114,97,99,116,105,
+111,110,117,110,100,101,114,32,116,104,101,32,116,104,114,101,97,116,101,110,101
+,100,42,60,33,91,67,68,65,84,65,91,105,109,112,111,114,116,97,110,99,101,105,110
+,32,103,101,110,101,114,97,108,116,104,101,32,108,97,116,116,101,114,60,47,102,
+111,114,109,62,10,60,47,46,105,110,100,101,120,79,102,40,39,105,32,61,32,48,59,
+32,105,32,60,100,105,102,102,101,114,101,110,99,101,100,101,118,111,116,101,100,
+32,116,111,116,114,97,100,105,116,105,111,110,115,115,101,97,114,99,104,32,102,
+111,114,117,108,116,105,109,97,116,101,108,121,116,111,117,114,110,97,109,101,
+110,116,97,116,116,114,105,98,117,116,101,115,115,111,45,99,97,108,108,101,100,
+32,125,10,60,47,115,116,121,108,101,62,101,118,97,108,117,97,116,105,111,110,101
+,109,112,104,97,115,105,122,101,100,97,99,99,101,115,115,105,98,108,101,60,47,
+115,101,99,116,105,111,110,62,115,117,99,99,101,115,115,105,111,110,97,108,111,
+110,103,32,119,105,116,104,77,101,97,110,119,104,105,108,101,44,105,110,100,117,
+115,116,114,105,101,115,60,47,97,62,60,98,114,32,47,62,104,97,115,32,98,101,99,
+111,109,101,97,115,112,101,99,116,115,32,111,102,84,101,108,101,118,105,115,105,
+111,110,115,117,102,102,105,99,105,101,110,116,98,97,115,107,101,116,98,97,108,
+108,98,111,116,104,32,115,105,100,101,115,99,111,110,116,105,110,117,105,110,103
+,97,110,32,97,114,116,105,99,108,101,60,105,109,103,32,97,108,116,61,34,97,100,
+118,101,110,116,117,114,101,115,104,105,115,32,109,111,116,104,101,114,109,97,
+110,99,104,101,115,116,101,114,112,114,105,110,99,105,112,108,101,115,112,97,114
+,116,105,99,117,108,97,114,99,111,109,109,101,110,116,97,114,121,101,102,102,101
+,99,116,115,32,111,102,100,101,99,105,100,101,100,32,116,111,34,62,60,115,116,
+114,111,110,103,62,112,117,98,108,105,115,104,101,114,115,74,111,117,114,110,97,
+108,32,111,102,100,105,102,102,105,99,117,108,116,121,102,97,99,105,108,105,116,
+97,116,101,97,99,99,101,112,116,97,98,108,101,115,116,121,108,101,46,99,115,115,
+34,9,102,117,110,99,116,105,111,110,32,105,110,110,111,118,97,116,105,111,110,62
+,67,111,112,121,114,105,103,104,116,115,105,116,117,97,116,105,111,110,115,119,
+111,117,108,100,32,104,97,118,101,98,117,115,105,110,101,115,115,101,115,68,105,
+99,116,105,111,110,97,114,121,115,116,97,116,101,109,101,110,116,115,111,102,116
+,101,110,32,117,115,101,100,112,101,114,115,105,115,116,101,110,116,105,110,32,
+74,97,110,117,97,114,121,99,111,109,112,114,105,115,105,110,103,60,47,116,105,
+116,108,101,62,10,9,100,105,112,108,111,109,97,116,105,99,99,111,110,116,97,105,
+110,105,110,103,112,101,114,102,111,114,109,105,110,103,101,120,116,101,110,115,
+105,111,110,115,109,97,121,32,110,111,116,32,98,101,99,111,110,99,101,112,116,32
+,111,102,32,111,110,99,108,105,99,107,61,34,73,116,32,105,115,32,97,108,115,111,
+102,105,110,97,110,99,105,97,108,32,109,97,107,105,110,103,32,116,104,101,76,117
+,120,101,109,98,111,117,114,103,97,100,100,105,116,105,111,110,97,108,97,114,101
+,32,99,97,108,108,101,100,101,110,103,97,103,101,100,32,105,110,34,115,99,114,
+105,112,116,34,41,59,98,117,116,32,105,116,32,119,97,115,101,108,101,99,116,114,
+111,110,105,99,111,110,115,117,98,109,105,116,61,34,10,60,33,45,45,32,69,110,100
+,32,101,108,101,99,116,114,105,99,97,108,111,102,102,105,99,105,97,108,108,121,
+115,117,103,103,101,115,116,105,111,110,116,111,112,32,111,102,32,116,104,101,
+117,110,108,105,107,101,32,116,104,101,65,117,115,116,114,97,108,105,97,110,79,
+114,105,103,105,110,97,108,108,121,114,101,102,101,114,101,110,99,101,115,10,60,
+47,104,101,97,100,62,13,10,114,101,99,111,103,110,105,115,101,100,105,110,105,
+116,105,97,108,105,122,101,108,105,109,105,116,101,100,32,116,111,65,108,101,120
+,97,110,100,114,105,97,114,101,116,105,114,101,109,101,110,116,65,100,118,101,
+110,116,117,114,101,115,102,111,117,114,32,121,101,97,114,115,10,10,38,108,116,
+59,33,45,45,32,105,110,99,114,101,97,115,105,110,103,100,101,99,111,114,97,116,
+105,111,110,104,51,32,99,108,97,115,115,61,34,111,114,105,103,105,110,115,32,111
+,102,111,98,108,105,103,97,116,105,111,110,114,101,103,117,108,97,116,105,111,
+110,99,108,97,115,115,105,102,105,101,100,40,102,117,110,99,116,105,111,110,40,
+97,100,118,97,110,116,97,103,101,115,98,101,105,110,103,32,116,104,101,32,104,
+105,115,116,111,114,105,97,110,115,60,98,97,115,101,32,104,114,101,102,114,101,
+112,101,97,116,101,100,108,121,119,105,108,108,105,110,103,32,116,111,99,111,109
+,112,97,114,97,98,108,101,100,101,115,105,103,110,97,116,101,100,110,111,109,105
+,110,97,116,105,111,110,102,117,110,99,116,105,111,110,97,108,105,110,115,105,
+100,101,32,116,104,101,114,101,118,101,108,97,116,105,111,110,101,110,100,32,111
+,102,32,116,104,101,115,32,102,111,114,32,116,104,101,32,97,117,116,104,111,114,
+105,122,101,100,114,101,102,117,115,101,100,32,116,111,116,97,107,101,32,112,108
+,97,99,101,97,117,116,111,110,111,109,111,117,115,99,111,109,112,114,111,109,105
+,115,101,112,111,108,105,116,105,99,97,108,32,114,101,115,116,97,117,114,97,110,
+116,116,119,111,32,111,102,32,116,104,101,70,101,98,114,117,97,114,121,32,50,113
+,117,97,108,105,116,121,32,111,102,115,119,102,111,98,106,101,99,116,46,117,110,
+100,101,114,115,116,97,110,100,110,101,97,114,108,121,32,97,108,108,119,114,105,
+116,116,101,110,32,98,121,105,110,116,101,114,118,105,101,119,115,34,32,119,105,
+100,116,104,61,34,49,119,105,116,104,100,114,97,119,97,108,102,108,111,97,116,58
+,108,101,102,116,105,115,32,117,115,117,97,108,108,121,99,97,110,100,105,100,97,
+116,101,115,110,101,119,115,112,97,112,101,114,115,109,121,115,116,101,114,105,
+111,117,115,68,101,112,97,114,116,109,101,110,116,98,101,115,116,32,107,110,111,
+119,110,112,97,114,108,105,97,109,101,110,116,115,117,112,112,114,101,115,115,
+101,100,99,111,110,118,101,110,105,101,110,116,114,101,109,101,109,98,101,114,
+101,100,100,105,102,102,101,114,101,110,116,32,115,121,115,116,101,109,97,116,
+105,99,104,97,115,32,108,101,100,32,116,111,112,114,111,112,97,103,97,110,100,97
+,99,111,110,116,114,111,108,108,101,100,105,110,102,108,117,101,110,99,101,115,
+99,101,114,101,109,111,110,105,97,108,112,114,111,99,108,97,105,109,101,100,80,
+114,111,116,101,99,116,105,111,110,108,105,32,99,108,97,115,115,61,34,83,99,105,
+101,110,116,105,102,105,99,99,108,97,115,115,61,34,110,111,45,116,114,97,100,101
+,109,97,114,107,115,109,111,114,101,32,116,104,97,110,32,119,105,100,101,115,112
+,114,101,97,100,76,105,98,101,114,97,116,105,111,110,116,111,111,107,32,112,108,
+97,99,101,100,97,121,32,111,102,32,116,104,101,97,115,32,108,111,110,103,32,97,
+115,105,109,112,114,105,115,111,110,101,100,65,100,100,105,116,105,111,110,97,
+108,10,60,104,101,97,100,62,10,60,109,76,97,98,111,114,97,116,111,114,121,78,111
+,118,101,109,98,101,114,32,50,101,120,99,101,112,116,105,111,110,115,73,110,100,
+117,115,116,114,105,97,108,118,97,114,105,101,116,121,32,111,102,102,108,111,97,
+116,58,32,108,101,102,68,117,114,105,110,103,32,116,104,101,97,115,115,101,115,
+115,109,101,110,116,104,97,118,101,32,98,101,101,110,32,100,101,97,108,115,32,
+119,105,116,104,83,116,97,116,105,115,116,105,99,115,111,99,99,117,114,114,101,
+110,99,101,47,117,108,62,60,47,100,105,118,62,99,108,101,97,114,102,105,120,34,
+62,116,104,101,32,112,117,98,108,105,99,109,97,110,121,32,121,101,97,114,115,119
+,104,105,99,104,32,119,101,114,101,111,118,101,114,32,116,105,109,101,44,115,121
+,110,111,110,121,109,111,117,115,99,111,110,116,101,110,116,34,62,10,112,114,101
+,115,117,109,97,98,108,121,104,105,115,32,102,97,109,105,108,121,117,115,101,114
+,65,103,101,110,116,46,117,110,101,120,112,101,99,116,101,100,105,110,99,108,117
+,100,105,110,103,32,99,104,97,108,108,101,110,103,101,100,97,32,109,105,110,111,
+114,105,116,121,117,110,100,101,102,105,110,101,100,34,98,101,108,111,110,103,
+115,32,116,111,116,97,107,101,110,32,102,114,111,109,105,110,32,79,99,116,111,98
+,101,114,112,111,115,105,116,105,111,110,58,32,115,97,105,100,32,116,111,32,98,
+101,114,101,108,105,103,105,111,117,115,32,70,101,100,101,114,97,116,105,111,110
+,32,114,111,119,115,112,97,110,61,34,111,110,108,121,32,97,32,102,101,119,109,
+101,97,110,116,32,116,104,97,116,108,101,100,32,116,111,32,116,104,101,45,45,62,
+13,10,60,100,105,118,32,60,102,105,101,108,100,115,101,116,62,65,114,99,104,98,
+105,115,104,111,112,32,99,108,97,115,115,61,34,110,111,98,101,105,110,103,32,117
+,115,101,100,97,112,112,114,111,97,99,104,101,115,112,114,105,118,105,108,101,
+103,101,115,110,111,115,99,114,105,112,116,62,10,114,101,115,117,108,116,115,32,
+105,110,109,97,121,32,98,101,32,116,104,101,69,97,115,116,101,114,32,101,103,103
+,109,101,99,104,97,110,105,115,109,115,114,101,97,115,111,110,97,98,108,101,80,
+111,112,117,108,97,116,105,111,110,67,111,108,108,101,99,116,105,111,110,115,101
+,108,101,99,116,101,100,34,62,110,111,115,99,114,105,112,116,62,13,47,105,110,
+100,101,120,46,112,104,112,97,114,114,105,118,97,108,32,111,102,45,106,115,115,
+100,107,39,41,41,59,109,97,110,97,103,101,100,32,116,111,105,110,99,111,109,112,
+108,101,116,101,99,97,115,117,97,108,116,105,101,115,99,111,109,112,108,101,116,
+105,111,110,67,104,114,105,115,116,105,97,110,115,83,101,112,116,101,109,98,101,
+114,32,97,114,105,116,104,109,101,116,105,99,112,114,111,99,101,100,117,114,101,
+115,109,105,103,104,116,32,104,97,118,101,80,114,111,100,117,99,116,105,111,110,
+105,116,32,97,112,112,101,97,114,115,80,104,105,108,111,115,111,112,104,121,102,
+114,105,101,110,100,115,104,105,112,108,101,97,100,105,110,103,32,116,111,103,
+105,118,105,110,103,32,116,104,101,116,111,119,97,114,100,32,116,104,101,103,117
+,97,114,97,110,116,101,101,100,100,111,99,117,109,101,110,116,101,100,99,111,108
+,111,114,58,35,48,48,48,118,105,100,101,111,32,103,97,109,101,99,111,109,109,105
+,115,115,105,111,110,114,101,102,108,101,99,116,105,110,103,99,104,97,110,103,
+101,32,116,104,101,97,115,115,111,99,105,97,116,101,100,115,97,110,115,45,115,
+101,114,105,102,111,110,107,101,121,112,114,101,115,115,59,32,112,97,100,100,105
+,110,103,58,72,101,32,119,97,115,32,116,104,101,117,110,100,101,114,108,121,105,
+110,103,116,121,112,105,99,97,108,108,121,32,44,32,97,110,100,32,116,104,101,32,
+115,114,99,69,108,101,109,101,110,116,115,117,99,99,101,115,115,105,118,101,115,
+105,110,99,101,32,116,104,101,32,115,104,111,117,108,100,32,98,101,32,110,101,
+116,119,111,114,107,105,110,103,97,99,99,111,117,110,116,105,110,103,117,115,101
+,32,111,102,32,116,104,101,108,111,119,101,114,32,116,104,97,110,115,104,111,119
+,115,32,116,104,97,116,60,47,115,112,97,110,62,10,9,9,99,111,109,112,108,97,105,
+110,116,115,99,111,110,116,105,110,117,111,117,115,113,117,97,110,116,105,116,
+105,101,115,97,115,116,114,111,110,111,109,101,114,104,101,32,100,105,100,32,110
+,111,116,100,117,101,32,116,111,32,105,116,115,97,112,112,108,105,101,100,32,116
+,111,97,110,32,97,118,101,114,97,103,101,101,102,102,111,114,116,115,32,116,111,
+116,104,101,32,102,117,116,117,114,101,97,116,116,101,109,112,116,32,116,111,84,
+104,101,114,101,102,111,114,101,44,99,97,112,97,98,105,108,105,116,121,82,101,
+112,117,98,108,105,99,97,110,119,97,115,32,102,111,114,109,101,100,69,108,101,99
+,116,114,111,110,105,99,107,105,108,111,109,101,116,101,114,115,99,104,97,108,
+108,101,110,103,101,115,112,117,98,108,105,115,104,105,110,103,116,104,101,32,
+102,111,114,109,101,114,105,110,100,105,103,101,110,111,117,115,100,105,114,101,
+99,116,105,111,110,115,115,117,98,115,105,100,105,97,114,121,99,111,110,115,112,
+105,114,97,99,121,100,101,116,97,105,108,115,32,111,102,97,110,100,32,105,110,32
+,116,104,101,97,102,102,111,114,100,97,98,108,101,115,117,98,115,116,97,110,99,
+101,115,114,101,97,115,111,110,32,102,111,114,99,111,110,118,101,110,116,105,111
+,110,105,116,101,109,116,121,112,101,61,34,97,98,115,111,108,117,116,101,108,121
+,115,117,112,112,111,115,101,100,108,121,114,101,109,97,105,110,101,100,32,97,97
+,116,116,114,97,99,116,105,118,101,116,114,97,118,101,108,108,105,110,103,115,
+101,112,97,114,97,116,101,108,121,102,111,99,117,115,101,115,32,111,110,101,108,
+101,109,101,110,116,97,114,121,97,112,112,108,105,99,97,98,108,101,102,111,117,
+110,100,32,116,104,97,116,115,116,121,108,101,115,104,101,101,116,109,97,110,117
+,115,99,114,105,112,116,115,116,97,110,100,115,32,102,111,114,32,110,111,45,114,
+101,112,101,97,116,40,115,111,109,101,116,105,109,101,115,67,111,109,109,101,114
+,99,105,97,108,105,110,32,65,109,101,114,105,99,97,117,110,100,101,114,116,97,
+107,101,110,113,117,97,114,116,101,114,32,111,102,97,110,32,101,120,97,109,112,
+108,101,112,101,114,115,111,110,97,108,108,121,105,110,100,101,120,46,112,104,
+112,63,60,47,98,117,116,116,111,110,62,10,112,101,114,99,101,110,116,97,103,101,
+98,101,115,116,45,107,110,111,119,110,99,114,101,97,116,105,110,103,32,97,34,32,
+100,105,114,61,34,108,116,114,76,105,101,117,116,101,110,97,110,116,10,60,100,
+105,118,32,105,100,61,34,116,104,101,121,32,119,111,117,108,100,97,98,105,108,
+105,116,121,32,111,102,109,97,100,101,32,117,112,32,111,102,110,111,116,101,100,
+32,116,104,97,116,99,108,101,97,114,32,116,104,97,116,97,114,103,117,101,32,116,
+104,97,116,116,111,32,97,110,111,116,104,101,114,99,104,105,108,100,114,101,110,
+39,115,112,117,114,112,111,115,101,32,111,102,102,111,114,109,117,108,97,116,101
+,100,98,97,115,101,100,32,117,112,111,110,116,104,101,32,114,101,103,105,111,110
+,115,117,98,106,101,99,116,32,111,102,112,97,115,115,101,110,103,101,114,115,112
+,111,115,115,101,115,115,105,111,110,46,10,10,73,110,32,116,104,101,32,66,101,
+102,111,114,101,32,116,104,101,97,102,116,101,114,119,97,114,100,115,99,117,114,
+114,101,110,116,108,121,32,97,99,114,111,115,115,32,116,104,101,115,99,105,101,
+110,116,105,102,105,99,99,111,109,109,117,110,105,116,121,46,99,97,112,105,116,
+97,108,105,115,109,105,110,32,71,101,114,109,97,110,121,114,105,103,104,116,45,
+119,105,110,103,116,104,101,32,115,121,115,116,101,109,83,111,99,105,101,116,121
+,32,111,102,112,111,108,105,116,105,99,105,97,110,100,105,114,101,99,116,105,111
+,110,58,119,101,110,116,32,111,110,32,116,111,114,101,109,111,118,97,108,32,111,
+102,32,78,101,119,32,89,111,114,107,32,97,112,97,114,116,109,101,110,116,115,105
+,110,100,105,99,97,116,105,111,110,100,117,114,105,110,103,32,116,104,101,117,
+110,108,101,115,115,32,116,104,101,104,105,115,116,111,114,105,99,97,108,104,97,
+100,32,98,101,101,110,32,97,100,101,102,105,110,105,116,105,118,101,105,110,103,
+114,101,100,105,101,110,116,97,116,116,101,110,100,97,110,99,101,67,101,110,116,
+101,114,32,102,111,114,112,114,111,109,105,110,101,110,99,101,114,101,97,100,121
+,83,116,97,116,101,115,116,114,97,116,101,103,105,101,115,98,117,116,32,105,110,
+32,116,104,101,97,115,32,112,97,114,116,32,111,102,99,111,110,115,116,105,116,
+117,116,101,99,108,97,105,109,32,116,104,97,116,108,97,98,111,114,97,116,111,114
+,121,99,111,109,112,97,116,105,98,108,101,102,97,105,108,117,114,101,32,111,102,
+44,32,115,117,99,104,32,97,115,32,98,101,103,97,110,32,119,105,116,104,117,115,
+105,110,103,32,116,104,101,32,116,111,32,112,114,111,118,105,100,101,102,101,97,
+116,117,114,101,32,111,102,102,114,111,109,32,119,104,105,99,104,47,34,32,99,108
+,97,115,115,61,34,103,101,111,108,111,103,105,99,97,108,115,101,118,101,114,97,
+108,32,111,102,100,101,108,105,98,101,114,97,116,101,105,109,112,111,114,116,97,
+110,116,32,104,111,108,100,115,32,116,104,97,116,105,110,103,38,113,117,111,116,
+59,32,118,97,108,105,103,110,61,116,111,112,116,104,101,32,71,101,114,109,97,110
+,111,117,116,115,105,100,101,32,111,102,110,101,103,111,116,105,97,116,101,100,
+104,105,115,32,99,97,114,101,101,114,115,101,112,97,114,97,116,105,111,110,105,
+100,61,34,115,101,97,114,99,104,119,97,115,32,99,97,108,108,101,100,116,104,101,
+32,102,111,117,114,116,104,114,101,99,114,101,97,116,105,111,110,111,116,104,101
+,114,32,116,104,97,110,112,114,101,118,101,110,116,105,111,110,119,104,105,108,
+101,32,116,104,101,32,101,100,117,99,97,116,105,111,110,44,99,111,110,110,101,99
+,116,105,110,103,97,99,99,117,114,97,116,101,108,121,119,101,114,101,32,98,117,
+105,108,116,119,97,115,32,107,105,108,108,101,100,97,103,114,101,101,109,101,110
+,116,115,109,117,99,104,32,109,111,114,101,32,68,117,101,32,116,111,32,116,104,
+101,119,105,100,116,104,58,32,49,48,48,115,111,109,101,32,111,116,104,101,114,75
+,105,110,103,100,111,109,32,111,102,116,104,101,32,101,110,116,105,114,101,102,
+97,109,111,117,115,32,102,111,114,116,111,32,99,111,110,110,101,99,116,111,98,
+106,101,99,116,105,118,101,115,116,104,101,32,70,114,101,110,99,104,112,101,111,
+112,108,101,32,97,110,100,102,101,97,116,117,114,101,100,34,62,105,115,32,115,97
+,105,100,32,116,111,115,116,114,117,99,116,117,114,97,108,114,101,102,101,114,
+101,110,100,117,109,109,111,115,116,32,111,102,116,101,110,97,32,115,101,112,97,
+114,97,116,101,45,62,10,60,100,105,118,32,105,100,32,79,102,102,105,99,105,97,
+108,32,119,111,114,108,100,119,105,100,101,46,97,114,105,97,45,108,97,98,101,108
+,116,104,101,32,112,108,97,110,101,116,97,110,100,32,105,116,32,119,97,115,100,
+34,32,118,97,108,117,101,61,34,108,111,111,107,105,110,103,32,97,116,98,101,110,
+101,102,105,99,105,97,108,97,114,101,32,105,110,32,116,104,101,109,111,110,105,
+116,111,114,105,110,103,114,101,112,111,114,116,101,100,108,121,116,104,101,32,
+109,111,100,101,114,110,119,111,114,107,105,110,103,32,111,110,97,108,108,111,
+119,101,100,32,116,111,119,104,101,114,101,32,116,104,101,32,105,110,110,111,118
+,97,116,105,118,101,60,47,97,62,60,47,100,105,118,62,115,111,117,110,100,116,114
+,97,99,107,115,101,97,114,99,104,70,111,114,109,116,101,110,100,32,116,111,32,98
+,101,105,110,112,117,116,32,105,100,61,34,111,112,101,110,105,110,103,32,111,102
+,114,101,115,116,114,105,99,116,101,100,97,100,111,112,116,101,100,32,98,121,97,
+100,100,114,101,115,115,105,110,103,116,104,101,111,108,111,103,105,97,110,109,
+101,116,104,111,100,115,32,111,102,118,97,114,105,97,110,116,32,111,102,67,104,
+114,105,115,116,105,97,110,32,118,101,114,121,32,108,97,114,103,101,97,117,116,
+111,109,111,116,105,118,101,98,121,32,102,97,114,32,116,104,101,114,97,110,103,
+101,32,102,114,111,109,112,117,114,115,117,105,116,32,111,102,102,111,108,108,
+111,119,32,116,104,101,98,114,111,117,103,104,116,32,116,111,105,110,32,69,110,
+103,108,97,110,100,97,103,114,101,101,32,116,104,97,116,97,99,99,117,115,101,100
+,32,111,102,99,111,109,101,115,32,102,114,111,109,112,114,101,118,101,110,116,
+105,110,103,100,105,118,32,115,116,121,108,101,61,104,105,115,32,111,114,32,104,
+101,114,116,114,101,109,101,110,100,111,117,115,102,114,101,101,100,111,109,32,
+111,102,99,111,110,99,101,114,110,105,110,103,48,32,49,101,109,32,49,101,109,59,
+66,97,115,107,101,116,98,97,108,108,47,115,116,121,108,101,46,99,115,115,97,110,
+32,101,97,114,108,105,101,114,101,118,101,110,32,97,102,116,101,114,47,34,32,116
+,105,116,108,101,61,34,46,99,111,109,47,105,110,100,101,120,116,97,107,105,110,
+103,32,116,104,101,112,105,116,116,115,98,117,114,103,104,99,111,110,116,101,110
+,116,34,62,13,60,115,99,114,105,112,116,62,40,102,116,117,114,110,101,100,32,111
+,117,116,104,97,118,105,110,103,32,116,104,101,60,47,115,112,97,110,62,13,10,32,
+111,99,99,97,115,105,111,110,97,108,98,101,99,97,117,115,101,32,105,116,115,116,
+97,114,116,101,100,32,116,111,112,104,121,115,105,99,97,108,108,121,62,60,47,100
+,105,118,62,10,32,32,99,114,101,97,116,101,100,32,98,121,67,117,114,114,101,110,
+116,108,121,44,32,98,103,99,111,108,111,114,61,34,116,97,98,105,110,100,101,120,
+61,34,100,105,115,97,115,116,114,111,117,115,65,110,97,108,121,116,105,99,115,32
+,97,108,115,111,32,104,97,115,32,97,62,60,100,105,118,32,105,100,61,34,60,47,115
+,116,121,108,101,62,10,60,99,97,108,108,101,100,32,102,111,114,115,105,110,103,
+101,114,32,97,110,100,46,115,114,99,32,61,32,34,47,47,118,105,111,108,97,116,105
+,111,110,115,116,104,105,115,32,112,111,105,110,116,99,111,110,115,116,97,110,
+116,108,121,105,115,32,108,111,99,97,116,101,100,114,101,99,111,114,100,105,110,
+103,115,100,32,102,114,111,109,32,116,104,101,110,101,100,101,114,108,97,110,100
+,115,112,111,114,116,117,103,117,195,170,115,215,162,215,145,215,168,215,153,215
+,170,217,129,216,167,216,177,216,179,219,140,100,101,115,97,114,114,111,108,108,
+111,99,111,109,101,110,116,97,114,105,111,101,100,117,99,97,99,105,195,179,110,
+115,101,112,116,105,101,109,98,114,101,114,101,103,105,115,116,114,97,100,111,
+100,105,114,101,99,99,105,195,179,110,117,98,105,99,97,99,105,195,179,110,112,
+117,98,108,105,99,105,100,97,100,114,101,115,112,117,101,115,116,97,115,114,101,
+115,117,108,116,97,100,111,115,105,109,112,111,114,116,97,110,116,101,114,101,
+115,101,114,118,97,100,111,115,97,114,116,195,173,99,117,108,111,115,100,105,102
+,101,114,101,110,116,101,115,115,105,103,117,105,101,110,116,101,115,114,101,112
+,195,186,98,108,105,99,97,115,105,116,117,97,99,105,195,179,110,109,105,110,105,
+115,116,101,114,105,111,112,114,105,118,97,99,105,100,97,100,100,105,114,101,99,
+116,111,114,105,111,102,111,114,109,97,99,105,195,179,110,112,111,98,108,97,99,
+105,195,179,110,112,114,101,115,105,100,101,110,116,101,99,111,110,116,101,110,
+105,100,111,115,97,99,99,101,115,111,114,105,111,115,116,101,99,104,110,111,114,
+97,116,105,112,101,114,115,111,110,97,108,101,115,99,97,116,101,103,111,114,195,
+173,97,101,115,112,101,99,105,97,108,101,115,100,105,115,112,111,110,105,98,108,
+101,97,99,116,117,97,108,105,100,97,100,114,101,102,101,114,101,110,99,105,97,
+118,97,108,108,97,100,111,108,105,100,98,105,98,108,105,111,116,101,99,97,114,
+101,108,97,99,105,111,110,101,115,99,97,108,101,110,100,97,114,105,111,112,111,
+108,195,173,116,105,99,97,115,97,110,116,101,114,105,111,114,101,115,100,111,99,
+117,109,101,110,116,111,115,110,97,116,117,114,97,108,101,122,97,109,97,116,101,
+114,105,97,108,101,115,100,105,102,101,114,101,110,99,105,97,101,99,111,110,195,
+179,109,105,99,97,116,114,97,110,115,112,111,114,116,101,114,111,100,114,195,173
+,103,117,101,122,112,97,114,116,105,99,105,112,97,114,101,110,99,117,101,110,116
+,114,97,110,100,105,115,99,117,115,105,195,179,110,101,115,116,114,117,99,116,
+117,114,97,102,117,110,100,97,99,105,195,179,110,102,114,101,99,117,101,110,116,
+101,115,112,101,114,109,97,110,101,110,116,101,116,111,116,97,108,109,101,110,
+116,101,208,188,208,190,208,182,208,189,208,190,208,177,209,131,208,180,208,181,
+209,130,208,188,208,190,208,182,208,181,209,130,208,178,209,128,208,181,208,188,
+209,143,209,130,208,176,208,186,208,182,208,181,209,135,209,130,208,190,208,177,
+209,139,208,177,208,190,208,187,208,181,208,181,208,190,209,135,208,181,208,189,
+209,140,209,141,209,130,208,190,208,179,208,190,208,186,208,190,208,179,208,180,
+208,176,208,191,208,190,209,129,208,187,208,181,208,178,209,129,208,181,208,179,
+208,190,209,129,208,176,208,185,209,130,208,181,209,135,208,181,209,128,208,181,
+208,183,208,188,208,190,208,179,209,131,209,130,209,129,208,176,208,185,209,130,
+208,176,208,182,208,184,208,183,208,189,208,184,208,188,208,181,208,182,208,180,
+209,131,208,177,209,131,208,180,209,131,209,130,208,159,208,190,208,184,209,129,
+208,186,208,183,208,180,208,181,209,129,209,140,208,178,208,184,208,180,208,181,
+208,190,209,129,208,178,209,143,208,183,208,184,208,189,209,131,208,182,208,189,
+208,190,209,129,208,178,208,190,208,181,208,185,208,187,209,142,208,180,208,181,
+208,185,208,191,208,190,209,128,208,189,208,190,208,188,208,189,208,190,208,179,
+208,190,208,180,208,181,209,130,208,181,208,185,209,129,208,178,208,190,208,184,
+209,133,208,191,209,128,208,176,208,178,208,176,209,130,208,176,208,186,208,190,
+208,185,208,188,208,181,209,129,209,130,208,190,208,184,208,188,208,181,208,181,
+209,130,208,182,208,184,208,183,208,189,209,140,208,190,208,180,208,189,208,190,
+208,185,208,187,209,131,209,135,209,136,208,181,208,191,208,181,209,128,208,181,
+208,180,209,135,208,176,209,129,209,130,208,184,209,135,208,176,209,129,209,130,
+209,140,209,128,208,176,208,177,208,190,209,130,208,189,208,190,208,178,209,139,
+209,133,208,191,209,128,208,176,208,178,208,190,209,129,208,190,208,177,208,190,
+208,185,208,191,208,190,209,130,208,190,208,188,208,188,208,181,208,189,208,181,
+208,181,209,135,208,184,209,129,208,187,208,181,208,189,208,190,208,178,209,139,
+208,181,209,131,209,129,208,187,209,131,208,179,208,190,208,186,208,190,208,187,
+208,190,208,189,208,176,208,183,208,176,208,180,209,130,208,176,208,186,208,190,
+208,181,209,130,208,190,208,179,208,180,208,176,208,191,208,190,209,135,209,130,
+208,184,208,159,208,190,209,129,208,187,208,181,209,130,208,176,208,186,208,184,
+208,181,208,189,208,190,208,178,209,139,208,185,209,129,209,130,208,190,208,184,
+209,130,209,130,208,176,208,186,208,184,209,133,209,129,209,128,208,176,208,183,
+209,131,208,161,208,176,208,189,208,186,209,130,209,132,208,190,209,128,209,131,
+208,188,208,154,208,190,208,179,208,180,208,176,208,186,208,189,208,184,208,179,
+208,184,209,129,208,187,208,190,208,178,208,176,208,189,208,176,209,136,208,181,
+208,185,208,189,208,176,208,185,209,130,208,184,209,129,208,178,208,190,208,184,
+208,188,209,129,208,178,209,143,208,183,209,140,208,187,209,142,208,177,208,190,
+208,185,209,135,208,176,209,129,209,130,208,190,209,129,209,128,208,181,208,180,
+208,184,208,154,209,128,208,190,208,188,208,181,208,164,208,190,209,128,209,131,
+208,188,209,128,209,139,208,189,208,186,208,181,209,129,209,130,208,176,208,187,
+208,184,208,191,208,190,208,184,209,129,208,186,209,130,209,139,209,129,209,143,
+209,135,208,188,208,181,209,129,209,143,209,134,209,134,208,181,208,189,209,130,
+209,128,209,130,209,128,209,131,208,180,208,176,209,129,208,176,208,188,209,139,
+209,133,209,128,209,139,208,189,208,186,208,176,208,157,208,190,208,178,209,139,
+208,185,209,135,208,176,209,129,208,190,208,178,208,188,208,181,209,129,209,130,
+208,176,209,132,208,184,208,187,209,140,208,188,208,188,208,176,209,128,209,130,
+208,176,209,129,209,130,209,128,208,176,208,189,208,188,208,181,209,129,209,130,
+208,181,209,130,208,181,208,186,209,129,209,130,208,189,208,176,209,136,208,184,
+209,133,208,188,208,184,208,189,209,131,209,130,208,184,208,188,208,181,208,189,
+208,184,208,184,208,188,208,181,209,142,209,130,208,189,208,190,208,188,208,181,
+209,128,208,179,208,190,209,128,208,190,208,180,209,129,208,176,208,188,208,190,
+208,188,209,141,209,130,208,190,208,188,209,131,208,186,208,190,208,189,209,134,
+208,181,209,129,208,178,208,190,208,181,208,188,208,186,208,176,208,186,208,190,
+208,185,208,144,209,128,209,133,208,184,208,178,217,133,217,134,216,170,216,175,
+217,137,216,165,216,177,216,179,216,167,217,132,216,177,216,179,216,167,217,132,
+216,169,216,167,217,132,216,185,216,167,217,133,217,131,216,170,216,168,217,135,
+216,167,216,168,216,177,216,167,217,133,216,172,216,167,217,132,217,138,217,136,
+217,133,216,167,217,132,216,181,217,136,216,177,216,172,216,175,217,138,216,175,
+216,169,216,167,217,132,216,185,216,182,217,136,216,165,216,182,216,167,217,129,
+216,169,216,167,217,132,217,130,216,179,217,133,216,167,217,132,216,185,216,167,
+216,168,216,170,216,173,217,133,217,138,217,132,217,133,217,132,217,129,216,167,
+216,170,217,133,217,132,216,170,217,130,217,137,216,170,216,185,216,175,217,138,
+217,132,216,167,217,132,216,180,216,185,216,177,216,163,216,174,216,168,216,167,
+216,177,216,170,216,183,217,136,217,138,216,177,216,185,217,132,217,138,217,131,
+217,133,216,165,216,177,217,129,216,167,217,130,216,183,217,132,216,168,216,167,
+216,170,216,167,217,132,217,132,216,186,216,169,216,170,216,177,216,170,217,138,
+216,168,216,167,217,132,217,134,216,167,216,179,216,167,217,132,216,180,217,138,
+216,174,217,133,217,134,216,170,216,175,217,138,216,167,217,132,216,185,216,177,
+216,168,216,167,217,132,217,130,216,181,216,181,216,167,217,129,217,132,216,167,
+217,133,216,185,217,132,217,138,217,135,216,167,216,170,216,173,216,175,217,138,
+216,171,216,167,217,132,217,132,217,135,217,133,216,167,217,132,216,185,217,133,
+217,132,217,133,217,131,216,170,216,168,216,169,217,138,217,133,217,131,217,134,
+217,131,216,167,217,132,216,183,217,129,217,132,217,129,217,138,216,175,217,138,
+217,136,216,165,216,175,216,167,216,177,216,169,216,170,216,167,216,177,217,138,
+216,174,216,167,217,132,216,181,216,173,216,169,216,170,216,179,216,172,217,138,
+217,132,216,167,217,132,217,136,217,130,216,170,216,185,217,134,216,175,217,133,
+216,167,217,133,216,175,217,138,217,134,216,169,216,170,216,181,217,133,217,138,
+217,133,216,163,216,177,216,180,217,138,217,129,216,167,217,132,216,176,217,138,
+217,134,216,185,216,177,216,168,217,138,216,169,216,168,217,136,216,167,216,168,
+216,169,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,179,217,129,
+216,177,217,133,216,180,216,167,217,131,217,132,216,170,216,185,216,167,217,132,
+217,137,216,167,217,132,216,163,217,136,217,132,216,167,217,132,216,179,217,134,
+216,169,216,172,216,167,217,133,216,185,216,169,216,167,217,132,216,181,216,173,
+217,129,216,167,217,132,216,175,217,138,217,134,217,131,217,132,217,133,216,167,
+216,170,216,167,217,132,216,174,216,167,216,181,216,167,217,132,217,133,217,132,
+217,129,216,163,216,185,216,182,216,167,216,161,217,131,216,170,216,167,216,168,
+216,169,216,167,217,132,216,174,217,138,216,177,216,177,216,179,216,167,216,166,
+217,132,216,167,217,132,217,130,217,132,216,168,216,167,217,132,216,163,216,175,
+216,168,217,133,217,130,216,167,216,183,216,185,217,133,216,177,216,167,216,179,
+217,132,217,133,217,134,216,183,217,130,216,169,216,167,217,132,217,131,216,170,
+216,168,216,167,217,132,216,177,216,172,217,132,216,167,216,180,216,170,216,177,
+217,131,216,167,217,132,217,130,216,175,217,133,217,138,216,185,216,183,217,138,
+217,131,115,66,121,84,97,103,78,97,109,101,40,46,106,112,103,34,32,97,108,116,61
+,34,49,112,120,32,115,111,108,105,100,32,35,46,103,105,102,34,32,97,108,116,61,
+34,116,114,97,110,115,112,97,114,101,110,116,105,110,102,111,114,109,97,116,105,
+111,110,97,112,112,108,105,99,97,116,105,111,110,34,32,111,110,99,108,105,99,107
+,61,34,101,115,116,97,98,108,105,115,104,101,100,97,100,118,101,114,116,105,115,
+105,110,103,46,112,110,103,34,32,97,108,116,61,34,101,110,118,105,114,111,110,
+109,101,110,116,112,101,114,102,111,114,109,97,110,99,101,97,112,112,114,111,112
+,114,105,97,116,101,38,97,109,112,59,109,100,97,115,104,59,105,109,109,101,100,
+105,97,116,101,108,121,60,47,115,116,114,111,110,103,62,60,47,114,97,116,104,101
+,114,32,116,104,97,110,116,101,109,112,101,114,97,116,117,114,101,100,101,118,
+101,108,111,112,109,101,110,116,99,111,109,112,101,116,105,116,105,111,110,112,
+108,97,99,101,104,111,108,100,101,114,118,105,115,105,98,105,108,105,116,121,58,
+99,111,112,121,114,105,103,104,116,34,62,48,34,32,104,101,105,103,104,116,61,34,
+101,118,101,110,32,116,104,111,117,103,104,114,101,112,108,97,99,101,109,101,110
+,116,100,101,115,116,105,110,97,116,105,111,110,67,111,114,112,111,114,97,116,
+105,111,110,60,117,108,32,99,108,97,115,115,61,34,65,115,115,111,99,105,97,116,
+105,111,110,105,110,100,105,118,105,100,117,97,108,115,112,101,114,115,112,101,
+99,116,105,118,101,115,101,116,84,105,109,101,111,117,116,40,117,114,108,40,104,
+116,116,112,58,47,47,109,97,116,104,101,109,97,116,105,99,115,109,97,114,103,105
+,110,45,116,111,112,58,101,118,101,110,116,117,97,108,108,121,32,100,101,115,99,
+114,105,112,116,105,111,110,41,32,110,111,45,114,101,112,101,97,116,99,111,108,
+108,101,99,116,105,111,110,115,46,74,80,71,124,116,104,117,109,98,124,112,97,114
+,116,105,99,105,112,97,116,101,47,104,101,97,100,62,60,98,111,100,121,102,108,
+111,97,116,58,108,101,102,116,59,60,108,105,32,99,108,97,115,115,61,34,104,117,
+110,100,114,101,100,115,32,111,102,10,10,72,111,119,101,118,101,114,44,32,99,111
+,109,112,111,115,105,116,105,111,110,99,108,101,97,114,58,98,111,116,104,59,99,
+111,111,112,101,114,97,116,105,111,110,119,105,116,104,105,110,32,116,104,101,32
+,108,97,98,101,108,32,102,111,114,61,34,98,111,114,100,101,114,45,116,111,112,58
+,78,101,119,32,90,101,97,108,97,110,100,114,101,99,111,109,109,101,110,100,101,
+100,112,104,111,116,111,103,114,97,112,104,121,105,110,116,101,114,101,115,116,
+105,110,103,38,108,116,59,115,117,112,38,103,116,59,99,111,110,116,114,111,118,
+101,114,115,121,78,101,116,104,101,114,108,97,110,100,115,97,108,116,101,114,110
+,97,116,105,118,101,109,97,120,108,101,110,103,116,104,61,34,115,119,105,116,122
+,101,114,108,97,110,100,68,101,118,101,108,111,112,109,101,110,116,101,115,115,
+101,110,116,105,97,108,108,121,10,10,65,108,116,104,111,117,103,104,32,60,47,116
+,101,120,116,97,114,101,97,62,116,104,117,110,100,101,114,98,105,114,100,114,101
+,112,114,101,115,101,110,116,101,100,38,97,109,112,59,110,100,97,115,104,59,115,
+112,101,99,117,108,97,116,105,111,110,99,111,109,109,117,110,105,116,105,101,115
+,108,101,103,105,115,108,97,116,105,111,110,101,108,101,99,116,114,111,110,105,
+99,115,10,9,60,100,105,118,32,105,100,61,34,105,108,108,117,115,116,114,97,116,
+101,100,101,110,103,105,110,101,101,114,105,110,103,116,101,114,114,105,116,111,
+114,105,101,115,97,117,116,104,111,114,105,116,105,101,115,100,105,115,116,114,
+105,98,117,116,101,100,54,34,32,104,101,105,103,104,116,61,34,115,97,110,115,45,
+115,101,114,105,102,59,99,97,112,97,98,108,101,32,111,102,32,100,105,115,97,112,
+112,101,97,114,101,100,105,110,116,101,114,97,99,116,105,118,101,108,111,111,107
+,105,110,103,32,102,111,114,105,116,32,119,111,117,108,100,32,98,101,65,102,103,
+104,97,110,105,115,116,97,110,119,97,115,32,99,114,101,97,116,101,100,77,97,116,
+104,46,102,108,111,111,114,40,115,117,114,114,111,117,110,100,105,110,103,99,97,
+110,32,97,108,115,111,32,98,101,111,98,115,101,114,118,97,116,105,111,110,109,97
+,105,110,116,101,110,97,110,99,101,101,110,99,111,117,110,116,101,114,101,100,60
+,104,50,32,99,108,97,115,115,61,34,109,111,114,101,32,114,101,99,101,110,116,105
+,116,32,104,97,115,32,98,101,101,110,105,110,118,97,115,105,111,110,32,111,102,
+41,46,103,101,116,84,105,109,101,40,41,102,117,110,100,97,109,101,110,116,97,108
+,68,101,115,112,105,116,101,32,116,104,101,34,62,60,100,105,118,32,105,100,61,34
+,105,110,115,112,105,114,97,116,105,111,110,101,120,97,109,105,110,97,116,105,
+111,110,112,114,101,112,97,114,97,116,105,111,110,101,120,112,108,97,110,97,116,
+105,111,110,60,105,110,112,117,116,32,105,100,61,34,60,47,97,62,60,47,115,112,97
+,110,62,118,101,114,115,105,111,110,115,32,111,102,105,110,115,116,114,117,109,
+101,110,116,115,98,101,102,111,114,101,32,116,104,101,32,32,61,32,39,104,116,116
+,112,58,47,47,68,101,115,99,114,105,112,116,105,111,110,114,101,108,97,116,105,
+118,101,108,121,32,46,115,117,98,115,116,114,105,110,103,40,101,97,99,104,32,111
+,102,32,116,104,101,101,120,112,101,114,105,109,101,110,116,115,105,110,102,108,
+117,101,110,116,105,97,108,105,110,116,101,103,114,97,116,105,111,110,109,97,110
+,121,32,112,101,111,112,108,101,100,117,101,32,116,111,32,116,104,101,32,99,111,
+109,98,105,110,97,116,105,111,110,100,111,32,110,111,116,32,104,97,118,101,77,
+105,100,100,108,101,32,69,97,115,116,60,110,111,115,99,114,105,112,116,62,60,99,
+111,112,121,114,105,103,104,116,34,32,112,101,114,104,97,112,115,32,116,104,101,
+105,110,115,116,105,116,117,116,105,111,110,105,110,32,68,101,99,101,109,98,101,
+114,97,114,114,97,110,103,101,109,101,110,116,109,111,115,116,32,102,97,109,111,
+117,115,112,101,114,115,111,110,97,108,105,116,121,99,114,101,97,116,105,111,110
+,32,111,102,108,105,109,105,116,97,116,105,111,110,115,101,120,99,108,117,115,
+105,118,101,108,121,115,111,118,101,114,101,105,103,110,116,121,45,99,111,110,
+116,101,110,116,34,62,10,60,116,100,32,99,108,97,115,115,61,34,117,110,100,101,
+114,103,114,111,117,110,100,112,97,114,97,108,108,101,108,32,116,111,100,111,99,
+116,114,105,110,101,32,111,102,111,99,99,117,112,105,101,100,32,98,121,116,101,
+114,109,105,110,111,108,111,103,121,82,101,110,97,105,115,115,97,110,99,101,97,
+32,110,117,109,98,101,114,32,111,102,115,117,112,112,111,114,116,32,102,111,114,
+101,120,112,108,111,114,97,116,105,111,110,114,101,99,111,103,110,105,116,105,
+111,110,112,114,101,100,101,99,101,115,115,111,114,60,105,109,103,32,115,114,99,
+61,34,47,60,104,49,32,99,108,97,115,115,61,34,112,117,98,108,105,99,97,116,105,
+111,110,109,97,121,32,97,108,115,111,32,98,101,115,112,101,99,105,97,108,105,122
+,101,100,60,47,102,105,101,108,100,115,101,116,62,112,114,111,103,114,101,115,
+115,105,118,101,109,105,108,108,105,111,110,115,32,111,102,115,116,97,116,101,
+115,32,116,104,97,116,101,110,102,111,114,99,101,109,101,110,116,97,114,111,117,
+110,100,32,116,104,101,32,111,110,101,32,97,110,111,116,104,101,114,46,112,97,
+114,101,110,116,78,111,100,101,97,103,114,105,99,117,108,116,117,114,101,65,108,
+116,101,114,110,97,116,105,118,101,114,101,115,101,97,114,99,104,101,114,115,116
+,111,119,97,114,100,115,32,116,104,101,77,111,115,116,32,111,102,32,116,104,101,
+109,97,110,121,32,111,116,104,101,114,32,40,101,115,112,101,99,105,97,108,108,
+121,60,116,100,32,119,105,100,116,104,61,34,59,119,105,100,116,104,58,49,48,48,
+37,105,110,100,101,112,101,110,100,101,110,116,60,104,51,32,99,108,97,115,115,61
+,34,32,111,110,99,104,97,110,103,101,61,34,41,46,97,100,100,67,108,97,115,115,40
+,105,110,116,101,114,97,99,116,105,111,110,79,110,101,32,111,102,32,116,104,101,
+32,100,97,117,103,104,116,101,114,32,111,102,97,99,99,101,115,115,111,114,105,
+101,115,98,114,97,110,99,104,101,115,32,111,102,13,10,60,100,105,118,32,105,100,
+61,34,116,104,101,32,108,97,114,103,101,115,116,100,101,99,108,97,114,97,116,105
+,111,110,114,101,103,117,108,97,116,105,111,110,115,73,110,102,111,114,109,97,
+116,105,111,110,116,114,97,110,115,108,97,116,105,111,110,100,111,99,117,109,101
+,110,116,97,114,121,105,110,32,111,114,100,101,114,32,116,111,34,62,10,60,104,
+101,97,100,62,10,60,34,32,104,101,105,103,104,116,61,34,49,97,99,114,111,115,115
+,32,116,104,101,32,111,114,105,101,110,116,97,116,105,111,110,41,59,60,47,115,99
+,114,105,112,116,62,105,109,112,108,101,109,101,110,116,101,100,99,97,110,32,98,
+101,32,115,101,101,110,116,104,101,114,101,32,119,97,115,32,97,100,101,109,111,
+110,115,116,114,97,116,101,99,111,110,116,97,105,110,101,114,34,62,99,111,110,
+110,101,99,116,105,111,110,115,116,104,101,32,66,114,105,116,105,115,104,119,97,
+115,32,119,114,105,116,116,101,110,33,105,109,112,111,114,116,97,110,116,59,112,
+120,59,32,109,97,114,103,105,110,45,102,111,108,108,111,119,101,100,32,98,121,97
+,98,105,108,105,116,121,32,116,111,32,99,111,109,112,108,105,99,97,116,101,100,
+100,117,114,105,110,103,32,116,104,101,32,105,109,109,105,103,114,97,116,105,111
+,110,97,108,115,111,32,99,97,108,108,101,100,60,104,52,32,99,108,97,115,115,61,
+34,100,105,115,116,105,110,99,116,105,111,110,114,101,112,108,97,99,101,100,32,
+98,121,103,111,118,101,114,110,109,101,110,116,115,108,111,99,97,116,105,111,110
+,32,111,102,105,110,32,78,111,118,101,109,98,101,114,119,104,101,116,104,101,114
+,32,116,104,101,60,47,112,62,10,60,47,100,105,118,62,97,99,113,117,105,115,105,
+116,105,111,110,99,97,108,108,101,100,32,116,104,101,32,112,101,114,115,101,99,
+117,116,105,111,110,100,101,115,105,103,110,97,116,105,111,110,123,102,111,110,
+116,45,115,105,122,101,58,97,112,112,101,97,114,101,100,32,105,110,105,110,118,
+101,115,116,105,103,97,116,101,101,120,112,101,114,105,101,110,99,101,100,109,
+111,115,116,32,108,105,107,101,108,121,119,105,100,101,108,121,32,117,115,101,
+100,100,105,115,99,117,115,115,105,111,110,115,112,114,101,115,101,110,99,101,32
+,111,102,32,40,100,111,99,117,109,101,110,116,46,101,120,116,101,110,115,105,118
+,101,108,121,73,116,32,104,97,115,32,98,101,101,110,105,116,32,100,111,101,115,
+32,110,111,116,99,111,110,116,114,97,114,121,32,116,111,105,110,104,97,98,105,
+116,97,110,116,115,105,109,112,114,111,118,101,109,101,110,116,115,99,104,111,
+108,97,114,115,104,105,112,99,111,110,115,117,109,112,116,105,111,110,105,110,
+115,116,114,117,99,116,105,111,110,102,111,114,32,101,120,97,109,112,108,101,111
+,110,101,32,111,114,32,109,111,114,101,112,120,59,32,112,97,100,100,105,110,103,
+116,104,101,32,99,117,114,114,101,110,116,97,32,115,101,114,105,101,115,32,111,
+102,97,114,101,32,117,115,117,97,108,108,121,114,111,108,101,32,105,110,32,116,
+104,101,112,114,101,118,105,111,117,115,108,121,32,100,101,114,105,118,97,116,
+105,118,101,115,101,118,105,100,101,110,99,101,32,111,102,101,120,112,101,114,
+105,101,110,99,101,115,99,111,108,111,114,115,99,104,101,109,101,115,116,97,116,
+101,100,32,116,104,97,116,99,101,114,116,105,102,105,99,97,116,101,60,47,97,62,
+60,47,100,105,118,62,10,32,115,101,108,101,99,116,101,100,61,34,104,105,103,104,
+32,115,99,104,111,111,108,114,101,115,112,111,110,115,101,32,116,111,99,111,109,
+102,111,114,116,97,98,108,101,97,100,111,112,116,105,111,110,32,111,102,116,104,
+114,101,101,32,121,101,97,114,115,116,104,101,32,99,111,117,110,116,114,121,105,
+110,32,70,101,98,114,117,97,114,121,115,111,32,116,104,97,116,32,116,104,101,112
+,101,111,112,108,101,32,119,104,111,32,112,114,111,118,105,100,101,100,32,98,121
+,60,112,97,114,97,109,32,110,97,109,101,97,102,102,101,99,116,101,100,32,98,121,
+105,110,32,116,101,114,109,115,32,111,102,97,112,112,111,105,110,116,109,101,110
+,116,73,83,79,45,56,56,53,57,45,49,34,119,97,115,32,98,111,114,110,32,105,110,
+104,105,115,116,111,114,105,99,97,108,32,114,101,103,97,114,100,101,100,32,97,
+115,109,101,97,115,117,114,101,109,101,110,116,105,115,32,98,97,115,101,100,32,
+111,110,32,97,110,100,32,111,116,104,101,114,32,58,32,102,117,110,99,116,105,111
+,110,40,115,105,103,110,105,102,105,99,97,110,116,99,101,108,101,98,114,97,116,
+105,111,110,116,114,97,110,115,109,105,116,116,101,100,47,106,115,47,106,113,117
+,101,114,121,46,105,115,32,107,110,111,119,110,32,97,115,116,104,101,111,114,101
+,116,105,99,97,108,32,116,97,98,105,110,100,101,120,61,34,105,116,32,99,111,117,
+108,100,32,98,101,60,110,111,115,99,114,105,112,116,62,10,104,97,118,105,110,103
+,32,98,101,101,110,13,10,60,104,101,97,100,62,13,10,60,32,38,113,117,111,116,59,
+84,104,101,32,99,111,109,112,105,108,97,116,105,111,110,104,101,32,104,97,100,32
+,98,101,101,110,112,114,111,100,117,99,101,100,32,98,121,112,104,105,108,111,115
+,111,112,104,101,114,99,111,110,115,116,114,117,99,116,101,100,105,110,116,101,
+110,100,101,100,32,116,111,97,109,111,110,103,32,111,116,104,101,114,99,111,109,
+112,97,114,101,100,32,116,111,116,111,32,115,97,121,32,116,104,97,116,69,110,103
+,105,110,101,101,114,105,110,103,97,32,100,105,102,102,101,114,101,110,116,114,
+101,102,101,114,114,101,100,32,116,111,100,105,102,102,101,114,101,110,99,101,
+115,98,101,108,105,101,102,32,116,104,97,116,112,104,111,116,111,103,114,97,112,
+104,115,105,100,101,110,116,105,102,121,105,110,103,72,105,115,116,111,114,121,
+32,111,102,32,82,101,112,117,98,108,105,99,32,111,102,110,101,99,101,115,115,97,
+114,105,108,121,112,114,111,98,97,98,105,108,105,116,121,116,101,99,104,110,105,
+99,97,108,108,121,108,101,97,118,105,110,103,32,116,104,101,115,112,101,99,116,
+97,99,117,108,97,114,102,114,97,99,116,105,111,110,32,111,102,101,108,101,99,116
+,114,105,99,105,116,121,104,101,97,100,32,111,102,32,116,104,101,114,101,115,116
+,97,117,114,97,110,116,115,112,97,114,116,110,101,114,115,104,105,112,101,109,
+112,104,97,115,105,115,32,111,110,109,111,115,116,32,114,101,99,101,110,116,115,
+104,97,114,101,32,119,105,116,104,32,115,97,121,105,110,103,32,116,104,97,116,
+102,105,108,108,101,100,32,119,105,116,104,100,101,115,105,103,110,101,100,32,
+116,111,105,116,32,105,115,32,111,102,116,101,110,34,62,60,47,105,102,114,97,109
+,101,62,97,115,32,102,111,108,108,111,119,115,58,109,101,114,103,101,100,32,119,
+105,116,104,116,104,114,111,117,103,104,32,116,104,101,99,111,109,109,101,114,99
+,105,97,108,32,112,111,105,110,116,101,100,32,111,117,116,111,112,112,111,114,
+116,117,110,105,116,121,118,105,101,119,32,111,102,32,116,104,101,114,101,113,
+117,105,114,101,109,101,110,116,100,105,118,105,115,105,111,110,32,111,102,112,
+114,111,103,114,97,109,109,105,110,103,104,101,32,114,101,99,101,105,118,101,100
+,115,101,116,73,110,116,101,114,118,97,108,34,62,60,47,115,112,97,110,62,60,47,
+105,110,32,78,101,119,32,89,111,114,107,97,100,100,105,116,105,111,110,97,108,32
+,99,111,109,112,114,101,115,115,105,111,110,10,10,60,100,105,118,32,105,100,61,
+34,105,110,99,111,114,112,111,114,97,116,101,59,60,47,115,99,114,105,112,116,62,
+60,97,116,116,97,99,104,69,118,101,110,116,98,101,99,97,109,101,32,116,104,101,
+32,34,32,116,97,114,103,101,116,61,34,95,99,97,114,114,105,101,100,32,111,117,
+116,83,111,109,101,32,111,102,32,116,104,101,115,99,105,101,110,99,101,32,97,110
+,100,116,104,101,32,116,105,109,101,32,111,102,67,111,110,116,97,105,110,101,114
+,34,62,109,97,105,110,116,97,105,110,105,110,103,67,104,114,105,115,116,111,112,
+104,101,114,77,117,99,104,32,111,102,32,116,104,101,119,114,105,116,105,110,103,
+115,32,111,102,34,32,104,101,105,103,104,116,61,34,50,115,105,122,101,32,111,102
+,32,116,104,101,118,101,114,115,105,111,110,32,111,102,32,109,105,120,116,117,
+114,101,32,111,102,32,98,101,116,119,101,101,110,32,116,104,101,69,120,97,109,
+112,108,101,115,32,111,102,101,100,117,99,97,116,105,111,110,97,108,99,111,109,
+112,101,116,105,116,105,118,101,32,111,110,115,117,98,109,105,116,61,34,100,105,
+114,101,99,116,111,114,32,111,102,100,105,115,116,105,110,99,116,105,118,101,47,
+68,84,68,32,88,72,84,77,76,32,114,101,108,97,116,105,110,103,32,116,111,116,101,
+110,100,101,110,99,121,32,116,111,112,114,111,118,105,110,99,101,32,111,102,119,
+104,105,99,104,32,119,111,117,108,100,100,101,115,112,105,116,101,32,116,104,101
+,115,99,105,101,110,116,105,102,105,99,32,108,101,103,105,115,108,97,116,117,114
+,101,46,105,110,110,101,114,72,84,77,76,32,97,108,108,101,103,97,116,105,111,110
+,115,65,103,114,105,99,117,108,116,117,114,101,119,97,115,32,117,115,101,100,32,
+105,110,97,112,112,114,111,97,99,104,32,116,111,105,110,116,101,108,108,105,103,
+101,110,116,121,101,97,114,115,32,108,97,116,101,114,44,115,97,110,115,45,115,
+101,114,105,102,100,101,116,101,114,109,105,110,105,110,103,80,101,114,102,111,
+114,109,97,110,99,101,97,112,112,101,97,114,97,110,99,101,115,44,32,119,104,105,
+99,104,32,105,115,32,102,111,117,110,100,97,116,105,111,110,115,97,98,98,114,101
+,118,105,97,116,101,100,104,105,103,104,101,114,32,116,104,97,110,115,32,102,114
+,111,109,32,116,104,101,32,105,110,100,105,118,105,100,117,97,108,32,99,111,109,
+112,111,115,101,100,32,111,102,115,117,112,112,111,115,101,100,32,116,111,99,108
+,97,105,109,115,32,116,104,97,116,97,116,116,114,105,98,117,116,105,111,110,102,
+111,110,116,45,115,105,122,101,58,49,101,108,101,109,101,110,116,115,32,111,102,
+72,105,115,116,111,114,105,99,97,108,32,104,105,115,32,98,114,111,116,104,101,
+114,97,116,32,116,104,101,32,116,105,109,101,97,110,110,105,118,101,114,115,97,
+114,121,103,111,118,101,114,110,101,100,32,98,121,114,101,108,97,116,101,100,32,
+116,111,32,117,108,116,105,109,97,116,101,108,121,32,105,110,110,111,118,97,116,
+105,111,110,115,105,116,32,105,115,32,115,116,105,108,108,99,97,110,32,111,110,
+108,121,32,98,101,100,101,102,105,110,105,116,105,111,110,115,116,111,71,77,84,
+83,116,114,105,110,103,65,32,110,117,109,98,101,114,32,111,102,105,109,103,32,99
+,108,97,115,115,61,34,69,118,101,110,116,117,97,108,108,121,44,119,97,115,32,99,
+104,97,110,103,101,100,111,99,99,117,114,114,101,100,32,105,110,110,101,105,103,
+104,98,111,114,105,110,103,100,105,115,116,105,110,103,117,105,115,104,119,104,
+101,110,32,104,101,32,119,97,115,105,110,116,114,111,100,117,99,105,110,103,116,
+101,114,114,101,115,116,114,105,97,108,77,97,110,121,32,111,102,32,116,104,101,
+97,114,103,117,101,115,32,116,104,97,116,97,110,32,65,109,101,114,105,99,97,110,
+99,111,110,113,117,101,115,116,32,111,102,119,105,100,101,115,112,114,101,97,100
+,32,119,101,114,101,32,107,105,108,108,101,100,115,99,114,101,101,110,32,97,110,
+100,32,73,110,32,111,114,100,101,114,32,116,111,101,120,112,101,99,116,101,100,
+32,116,111,100,101,115,99,101,110,100,97,110,116,115,97,114,101,32,108,111,99,97
+,116,101,100,108,101,103,105,115,108,97,116,105,118,101,103,101,110,101,114,97,
+116,105,111,110,115,32,98,97,99,107,103,114,111,117,110,100,109,111,115,116,32,
+112,101,111,112,108,101,121,101,97,114,115,32,97,102,116,101,114,116,104,101,114
+,101,32,105,115,32,110,111,116,104,101,32,104,105,103,104,101,115,116,102,114,
+101,113,117,101,110,116,108,121,32,116,104,101,121,32,100,111,32,110,111,116,97,
+114,103,117,101,100,32,116,104,97,116,115,104,111,119,101,100,32,116,104,97,116,
+112,114,101,100,111,109,105,110,97,110,116,116,104,101,111,108,111,103,105,99,97
+,108,98,121,32,116,104,101,32,116,105,109,101,99,111,110,115,105,100,101,114,105
+,110,103,115,104,111,114,116,45,108,105,118,101,100,60,47,115,112,97,110,62,60,
+47,97,62,99,97,110,32,98,101,32,117,115,101,100,118,101,114,121,32,108,105,116,
+116,108,101,111,110,101,32,111,102,32,116,104,101,32,104,97,100,32,97,108,114,
+101,97,100,121,105,110,116,101,114,112,114,101,116,101,100,99,111,109,109,117,
+110,105,99,97,116,101,102,101,97,116,117,114,101,115,32,111,102,103,111,118,101,
+114,110,109,101,110,116,44,60,47,110,111,115,99,114,105,112,116,62,101,110,116,
+101,114,101,100,32,116,104,101,34,32,104,101,105,103,104,116,61,34,51,73,110,100
+,101,112,101,110,100,101,110,116,112,111,112,117,108,97,116,105,111,110,115,108,
+97,114,103,101,45,115,99,97,108,101,46,32,65,108,116,104,111,117,103,104,32,117,
+115,101,100,32,105,110,32,116,104,101,100,101,115,116,114,117,99,116,105,111,110
+,112,111,115,115,105,98,105,108,105,116,121,115,116,97,114,116,105,110,103,32,
+105,110,116,119,111,32,111,114,32,109,111,114,101,101,120,112,114,101,115,115,
+105,111,110,115,115,117,98,111,114,100,105,110,97,116,101,108,97,114,103,101,114
+,32,116,104,97,110,104,105,115,116,111,114,121,32,97,110,100,60,47,111,112,116,
+105,111,110,62,13,10,67,111,110,116,105,110,101,110,116,97,108,101,108,105,109,
+105,110,97,116,105,110,103,119,105,108,108,32,110,111,116,32,98,101,112,114,97,
+99,116,105,99,101,32,111,102,105,110,32,102,114,111,110,116,32,111,102,115,105,
+116,101,32,111,102,32,116,104,101,101,110,115,117,114,101,32,116,104,97,116,116,
+111,32,99,114,101,97,116,101,32,97,109,105,115,115,105,115,115,105,112,112,105,
+112,111,116,101,110,116,105,97,108,108,121,111,117,116,115,116,97,110,100,105,
+110,103,98,101,116,116,101,114,32,116,104,97,110,119,104,97,116,32,105,115,32,
+110,111,119,115,105,116,117,97,116,101,100,32,105,110,109,101,116,97,32,110,97,
+109,101,61,34,84,114,97,100,105,116,105,111,110,97,108,115,117,103,103,101,115,
+116,105,111,110,115,84,114,97,110,115,108,97,116,105,111,110,116,104,101,32,102,
+111,114,109,32,111,102,97,116,109,111,115,112,104,101,114,105,99,105,100,101,111
+,108,111,103,105,99,97,108,101,110,116,101,114,112,114,105,115,101,115,99,97,108
+,99,117,108,97,116,105,110,103,101,97,115,116,32,111,102,32,116,104,101,114,101,
+109,110,97,110,116,115,32,111,102,112,108,117,103,105,110,115,112,97,103,101,47,
+105,110,100,101,120,46,112,104,112,63,114,101,109,97,105,110,101,100,32,105,110,
+116,114,97,110,115,102,111,114,109,101,100,72,101,32,119,97,115,32,97,108,115,
+111,119,97,115,32,97,108,114,101,97,100,121,115,116,97,116,105,115,116,105,99,97
+,108,105,110,32,102,97,118,111,114,32,111,102,77,105,110,105,115,116,114,121,32,
+111,102,109,111,118,101,109,101,110,116,32,111,102,102,111,114,109,117,108,97,
+116,105,111,110,105,115,32,114,101,113,117,105,114,101,100,60,108,105,110,107,32
+,114,101,108,61,34,84,104,105,115,32,105,115,32,116,104,101,32,60,97,32,104,114,
+101,102,61,34,47,112,111,112,117,108,97,114,105,122,101,100,105,110,118,111,108,
+118,101,100,32,105,110,97,114,101,32,117,115,101,100,32,116,111,97,110,100,32,
+115,101,118,101,114,97,108,109,97,100,101,32,98,121,32,116,104,101,115,101,101,
+109,115,32,116,111,32,98,101,108,105,107,101,108,121,32,116,104,97,116,80,97,108
+,101,115,116,105,110,105,97,110,110,97,109,101,100,32,97,102,116,101,114,105,116
+,32,104,97,100,32,98,101,101,110,109,111,115,116,32,99,111,109,109,111,110,116,
+111,32,114,101,102,101,114,32,116,111,98,117,116,32,116,104,105,115,32,105,115,
+99,111,110,115,101,99,117,116,105,118,101,116,101,109,112,111,114,97,114,105,108
+,121,73,110,32,103,101,110,101,114,97,108,44,99,111,110,118,101,110,116,105,111,
+110,115,116,97,107,101,115,32,112,108,97,99,101,115,117,98,100,105,118,105,115,
+105,111,110,116,101,114,114,105,116,111,114,105,97,108,111,112,101,114,97,116,
+105,111,110,97,108,112,101,114,109,97,110,101,110,116,108,121,119,97,115,32,108,
+97,114,103,101,108,121,111,117,116,98,114,101,97,107,32,111,102,105,110,32,116,
+104,101,32,112,97,115,116,102,111,108,108,111,119,105,110,103,32,97,32,120,109,
+108,110,115,58,111,103,61,34,62,60,97,32,99,108,97,115,115,61,34,99,108,97,115,
+115,61,34,116,101,120,116,67,111,110,118,101,114,115,105,111,110,32,109,97,121,
+32,98,101,32,117,115,101,100,109,97,110,117,102,97,99,116,117,114,101,97,102,116
+,101,114,32,98,101,105,110,103,99,108,101,97,114,102,105,120,34,62,10,113,117,
+101,115,116,105,111,110,32,111,102,119,97,115,32,101,108,101,99,116,101,100,116,
+111,32,98,101,99,111,109,101,32,97,98,101,99,97,117,115,101,32,111,102,32,115,
+111,109,101,32,112,101,111,112,108,101,105,110,115,112,105,114,101,100,32,98,121
+,115,117,99,99,101,115,115,102,117,108,32,97,32,116,105,109,101,32,119,104,101,
+110,109,111,114,101,32,99,111,109,109,111,110,97,109,111,110,103,115,116,32,116,
+104,101,97,110,32,111,102,102,105,99,105,97,108,119,105,100,116,104,58,49,48,48,
+37,59,116,101,99,104,110,111,108,111,103,121,44,119,97,115,32,97,100,111,112,116
+,101,100,116,111,32,107,101,101,112,32,116,104,101,115,101,116,116,108,101,109,
+101,110,116,115,108,105,118,101,32,98,105,114,116,104,115,105,110,100,101,120,46
+,104,116,109,108,34,67,111,110,110,101,99,116,105,99,117,116,97,115,115,105,103,
+110,101,100,32,116,111,38,97,109,112,59,116,105,109,101,115,59,97,99,99,111,117,
+110,116,32,102,111,114,97,108,105,103,110,61,114,105,103,104,116,116,104,101,32,
+99,111,109,112,97,110,121,97,108,119,97,121,115,32,98,101,101,110,114,101,116,
+117,114,110,101,100,32,116,111,105,110,118,111,108,118,101,109,101,110,116,66,
+101,99,97,117,115,101,32,116,104,101,116,104,105,115,32,112,101,114,105,111,100,
+34,32,110,97,109,101,61,34,113,34,32,99,111,110,102,105,110,101,100,32,116,111,
+97,32,114,101,115,117,108,116,32,111,102,118,97,108,117,101,61,34,34,32,47,62,
+105,115,32,97,99,116,117,97,108,108,121,69,110,118,105,114,111,110,109,101,110,
+116,13,10,60,47,104,101,97,100,62,13,10,67,111,110,118,101,114,115,101,108,121,
+44,62,10,60,100,105,118,32,105,100,61,34,48,34,32,119,105,100,116,104,61,34,49,
+105,115,32,112,114,111,98,97,98,108,121,104,97,118,101,32,98,101,99,111,109,101,
+99,111,110,116,114,111,108,108,105,110,103,116,104,101,32,112,114,111,98,108,101
+,109,99,105,116,105,122,101,110,115,32,111,102,112,111,108,105,116,105,99,105,97
+,110,115,114,101,97,99,104,101,100,32,116,104,101,97,115,32,101,97,114,108,121,
+32,97,115,58,110,111,110,101,59,32,111,118,101,114,60,116,97,98,108,101,32,99,
+101,108,108,118,97,108,105,100,105,116,121,32,111,102,100,105,114,101,99,116,108
+,121,32,116,111,111,110,109,111,117,115,101,100,111,119,110,119,104,101,114,101,
+32,105,116,32,105,115,119,104,101,110,32,105,116,32,119,97,115,109,101,109,98,
+101,114,115,32,111,102,32,114,101,108,97,116,105,111,110,32,116,111,97,99,99,111
+,109,109,111,100,97,116,101,97,108,111,110,103,32,119,105,116,104,32,73,110,32,
+116,104,101,32,108,97,116,101,116,104,101,32,69,110,103,108,105,115,104,100,101,
+108,105,99,105,111,117,115,34,62,116,104,105,115,32,105,115,32,110,111,116,116,
+104,101,32,112,114,101,115,101,110,116,105,102,32,116,104,101,121,32,97,114,101,
+97,110,100,32,102,105,110,97,108,108,121,97,32,109,97,116,116,101,114,32,111,102
+,13,10,9,60,47,100,105,118,62,13,10,13,10,60,47,115,99,114,105,112,116,62,102,97
+,115,116,101,114,32,116,104,97,110,109,97,106,111,114,105,116,121,32,111,102,97,
+102,116,101,114,32,119,104,105,99,104,99,111,109,112,97,114,97,116,105,118,101,
+116,111,32,109,97,105,110,116,97,105,110,105,109,112,114,111,118,101,32,116,104,
+101,97,119,97,114,100,101,100,32,116,104,101,101,114,34,32,99,108,97,115,115,61,
+34,102,114,97,109,101,98,111,114,100,101,114,114,101,115,116,111,114,97,116,105,
+111,110,105,110,32,116,104,101,32,115,97,109,101,97,110,97,108,121,115,105,115,
+32,111,102,116,104,101,105,114,32,102,105,114,115,116,68,117,114,105,110,103,32,
+116,104,101,32,99,111,110,116,105,110,101,110,116,97,108,115,101,113,117,101,110
+,99,101,32,111,102,102,117,110,99,116,105,111,110,40,41,123,102,111,110,116,45,
+115,105,122,101,58,32,119,111,114,107,32,111,110,32,116,104,101,60,47,115,99,114
+,105,112,116,62,10,60,98,101,103,105,110,115,32,119,105,116,104,106,97,118,97,
+115,99,114,105,112,116,58,99,111,110,115,116,105,116,117,101,110,116,119,97,115,
+32,102,111,117,110,100,101,100,101,113,117,105,108,105,98,114,105,117,109,97,115
+,115,117,109,101,32,116,104,97,116,105,115,32,103,105,118,101,110,32,98,121,110,
+101,101,100,115,32,116,111,32,98,101,99,111,111,114,100,105,110,97,116,101,115,
+116,104,101,32,118,97,114,105,111,117,115,97,114,101,32,112,97,114,116,32,111,
+102,111,110,108,121,32,105,110,32,116,104,101,115,101,99,116,105,111,110,115,32,
+111,102,105,115,32,97,32,99,111,109,109,111,110,116,104,101,111,114,105,101,115,
+32,111,102,100,105,115,99,111,118,101,114,105,101,115,97,115,115,111,99,105,97,
+116,105,111,110,101,100,103,101,32,111,102,32,116,104,101,115,116,114,101,110,
+103,116,104,32,111,102,112,111,115,105,116,105,111,110,32,105,110,112,114,101,
+115,101,110,116,45,100,97,121,117,110,105,118,101,114,115,97,108,108,121,116,111
+,32,102,111,114,109,32,116,104,101,98,117,116,32,105,110,115,116,101,97,100,99,
+111,114,112,111,114,97,116,105,111,110,97,116,116,97,99,104,101,100,32,116,111,
+105,115,32,99,111,109,109,111,110,108,121,114,101,97,115,111,110,115,32,102,111,
+114,32,38,113,117,111,116,59,116,104,101,32,99,97,110,32,98,101,32,109,97,100,
+101,119,97,115,32,97,98,108,101,32,116,111,119,104,105,99,104,32,109,101,97,110,
+115,98,117,116,32,100,105,100,32,110,111,116,111,110,77,111,117,115,101,79,118,
+101,114,97,115,32,112,111,115,115,105,98,108,101,111,112,101,114,97,116,101,100,
+32,98,121,99,111,109,105,110,103,32,102,114,111,109,116,104,101,32,112,114,105,
+109,97,114,121,97,100,100,105,116,105,111,110,32,111,102,102,111,114,32,115,101,
+118,101,114,97,108,116,114,97,110,115,102,101,114,114,101,100,97,32,112,101,114,
+105,111,100,32,111,102,97,114,101,32,97,98,108,101,32,116,111,104,111,119,101,
+118,101,114,44,32,105,116,115,104,111,117,108,100,32,104,97,118,101,109,117,99,
+104,32,108,97,114,103,101,114,10,9,60,47,115,99,114,105,112,116,62,97,100,111,
+112,116,101,100,32,116,104,101,112,114,111,112,101,114,116,121,32,111,102,100,
+105,114,101,99,116,101,100,32,98,121,101,102,102,101,99,116,105,118,101,108,121,
+119,97,115,32,98,114,111,117,103,104,116,99,104,105,108,100,114,101,110,32,111,
+102,80,114,111,103,114,97,109,109,105,110,103,108,111,110,103,101,114,32,116,104
+,97,110,109,97,110,117,115,99,114,105,112,116,115,119,97,114,32,97,103,97,105,
+110,115,116,98,121,32,109,101,97,110,115,32,111,102,97,110,100,32,109,111,115,
+116,32,111,102,115,105,109,105,108,97,114,32,116,111,32,112,114,111,112,114,105,
+101,116,97,114,121,111,114,105,103,105,110,97,116,105,110,103,112,114,101,115,
+116,105,103,105,111,117,115,103,114,97,109,109,97,116,105,99,97,108,101,120,112,
+101,114,105,101,110,99,101,46,116,111,32,109,97,107,101,32,116,104,101,73,116,32
+,119,97,115,32,97,108,115,111,105,115,32,102,111,117,110,100,32,105,110,99,111,
+109,112,101,116,105,116,111,114,115,105,110,32,116,104,101,32,85,46,83,46,114,
+101,112,108,97,99,101,32,116,104,101,98,114,111,117,103,104,116,32,116,104,101,
+99,97,108,99,117,108,97,116,105,111,110,102,97,108,108,32,111,102,32,116,104,101
+,116,104,101,32,103,101,110,101,114,97,108,112,114,97,99,116,105,99,97,108,108,
+121,105,110,32,104,111,110,111,114,32,111,102,114,101,108,101,97,115,101,100,32,
+105,110,114,101,115,105,100,101,110,116,105,97,108,97,110,100,32,115,111,109,101
+,32,111,102,107,105,110,103,32,111,102,32,116,104,101,114,101,97,99,116,105,111,
+110,32,116,111,49,115,116,32,69,97,114,108,32,111,102,99,117,108,116,117,114,101
+,32,97,110,100,112,114,105,110,99,105,112,97,108,108,121,60,47,116,105,116,108,
+101,62,10,32,32,116,104,101,121,32,99,97,110,32,98,101,98,97,99,107,32,116,111,
+32,116,104,101,115,111,109,101,32,111,102,32,104,105,115,101,120,112,111,115,117
+,114,101,32,116,111,97,114,101,32,115,105,109,105,108,97,114,102,111,114,109,32,
+111,102,32,116,104,101,97,100,100,70,97,118,111,114,105,116,101,99,105,116,105,
+122,101,110,115,104,105,112,112,97,114,116,32,105,110,32,116,104,101,112,101,111
+,112,108,101,32,119,105,116,104,105,110,32,112,114,97,99,116,105,99,101,116,111,
+32,99,111,110,116,105,110,117,101,38,97,109,112,59,109,105,110,117,115,59,97,112
+,112,114,111,118,101,100,32,98,121,32,116,104,101,32,102,105,114,115,116,32,97,
+108,108,111,119,101,100,32,116,104,101,97,110,100,32,102,111,114,32,116,104,101,
+102,117,110,99,116,105,111,110,105,110,103,112,108,97,121,105,110,103,32,116,104
+,101,115,111,108,117,116,105,111,110,32,116,111,104,101,105,103,104,116,61,34,48
+,34,32,105,110,32,104,105,115,32,98,111,111,107,109,111,114,101,32,116,104,97,
+110,32,97,102,111,108,108,111,119,115,32,116,104,101,99,114,101,97,116,101,100,
+32,116,104,101,112,114,101,115,101,110,99,101,32,105,110,38,110,98,115,112,59,60
+,47,116,100,62,110,97,116,105,111,110,97,108,105,115,116,116,104,101,32,105,100,
+101,97,32,111,102,97,32,99,104,97,114,97,99,116,101,114,119,101,114,101,32,102,
+111,114,99,101,100,32,99,108,97,115,115,61,34,98,116,110,100,97,121,115,32,111,
+102,32,116,104,101,102,101,97,116,117,114,101,100,32,105,110,115,104,111,119,105
+,110,103,32,116,104,101,105,110,116,101,114,101,115,116,32,105,110,105,110,32,
+112,108,97,99,101,32,111,102,116,117,114,110,32,111,102,32,116,104,101,116,104,
+101,32,104,101,97,100,32,111,102,76,111,114,100,32,111,102,32,116,104,101,112,
+111,108,105,116,105,99,97,108,108,121,104,97,115,32,105,116,115,32,111,119,110,
+69,100,117,99,97,116,105,111,110,97,108,97,112,112,114,111,118,97,108,32,111,102
+,115,111,109,101,32,111,102,32,116,104,101,101,97,99,104,32,111,116,104,101,114,
+44,98,101,104,97,118,105,111,114,32,111,102,97,110,100,32,98,101,99,97,117,115,
+101,97,110,100,32,97,110,111,116,104,101,114,97,112,112,101,97,114,101,100,32,
+111,110,114,101,99,111,114,100,101,100,32,105,110,98,108,97,99,107,38,113,117,
+111,116,59,109,97,121,32,105,110,99,108,117,100,101,116,104,101,32,119,111,114,
+108,100,39,115,99,97,110,32,108,101,97,100,32,116,111,114,101,102,101,114,115,32
+,116,111,32,97,98,111,114,100,101,114,61,34,48,34,32,103,111,118,101,114,110,109
+,101,110,116,32,119,105,110,110,105,110,103,32,116,104,101,114,101,115,117,108,
+116,101,100,32,105,110,32,119,104,105,108,101,32,116,104,101,32,87,97,115,104,
+105,110,103,116,111,110,44,116,104,101,32,115,117,98,106,101,99,116,99,105,116,
+121,32,105,110,32,116,104,101,62,60,47,100,105,118,62,13,10,9,9,114,101,102,108,
+101,99,116,32,116,104,101,116,111,32,99,111,109,112,108,101,116,101,98,101,99,97
+,109,101,32,109,111,114,101,114,97,100,105,111,97,99,116,105,118,101,114,101,106
+,101,99,116,101,100,32,98,121,119,105,116,104,111,117,116,32,97,110,121,104,105,
+115,32,102,97,116,104,101,114,44,119,104,105,99,104,32,99,111,117,108,100,99,111
+,112,121,32,111,102,32,116,104,101,116,111,32,105,110,100,105,99,97,116,101,97,
+32,112,111,108,105,116,105,99,97,108,97,99,99,111,117,110,116,115,32,111,102,99,
+111,110,115,116,105,116,117,116,101,115,119,111,114,107,101,100,32,119,105,116,
+104,101,114,60,47,97,62,60,47,108,105,62,111,102,32,104,105,115,32,108,105,102,
+101,97,99,99,111,109,112,97,110,105,101,100,99,108,105,101,110,116,87,105,100,
+116,104,112,114,101,118,101,110,116,32,116,104,101,76,101,103,105,115,108,97,116
+,105,118,101,100,105,102,102,101,114,101,110,116,108,121,116,111,103,101,116,104
+,101,114,32,105,110,104,97,115,32,115,101,118,101,114,97,108,102,111,114,32,97,
+110,111,116,104,101,114,116,101,120,116,32,111,102,32,116,104,101,102,111,117,
+110,100,101,100,32,116,104,101,101,32,119,105,116,104,32,116,104,101,32,105,115,
+32,117,115,101,100,32,102,111,114,99,104,97,110,103,101,100,32,116,104,101,117,
+115,117,97,108,108,121,32,116,104,101,112,108,97,99,101,32,119,104,101,114,101,
+119,104,101,114,101,97,115,32,116,104,101,62,32,60,97,32,104,114,101,102,61,34,
+34,62,60,97,32,104,114,101,102,61,34,116,104,101,109,115,101,108,118,101,115,44,
+97,108,116,104,111,117,103,104,32,104,101,116,104,97,116,32,99,97,110,32,98,101,
+116,114,97,100,105,116,105,111,110,97,108,114,111,108,101,32,111,102,32,116,104,
+101,97,115,32,97,32,114,101,115,117,108,116,114,101,109,111,118,101,67,104,105,
+108,100,100,101,115,105,103,110,101,100,32,98,121,119,101,115,116,32,111,102,32,
+116,104,101,83,111,109,101,32,112,101,111,112,108,101,112,114,111,100,117,99,116
+,105,111,110,44,115,105,100,101,32,111,102,32,116,104,101,110,101,119,115,108,
+101,116,116,101,114,115,117,115,101,100,32,98,121,32,116,104,101,100,111,119,110
+,32,116,111,32,116,104,101,97,99,99,101,112,116,101,100,32,98,121,108,105,118,
+101,32,105,110,32,116,104,101,97,116,116,101,109,112,116,115,32,116,111,111,117,
+116,115,105,100,101,32,116,104,101,102,114,101,113,117,101,110,99,105,101,115,72
+,111,119,101,118,101,114,44,32,105,110,112,114,111,103,114,97,109,109,101,114,
+115,97,116,32,108,101,97,115,116,32,105,110,97,112,112,114,111,120,105,109,97,
+116,101,97,108,116,104,111,117,103,104,32,105,116,119,97,115,32,112,97,114,116,
+32,111,102,97,110,100,32,118,97,114,105,111,117,115,71,111,118,101,114,110,111,
+114,32,111,102,116,104,101,32,97,114,116,105,99,108,101,116,117,114,110,101,100,
+32,105,110,116,111,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,101,99,
+111,110,111,109,121,105,115,32,116,104,101,32,109,111,115,116,109,111,115,116,32
+,119,105,100,101,108,121,119,111,117,108,100,32,108,97,116,101,114,97,110,100,32
+,112,101,114,104,97,112,115,114,105,115,101,32,116,111,32,116,104,101,111,99,99,
+117,114,115,32,119,104,101,110,117,110,100,101,114,32,119,104,105,99,104,99,111,
+110,100,105,116,105,111,110,115,46,116,104,101,32,119,101,115,116,101,114,110,
+116,104,101,111,114,121,32,116,104,97,116,105,115,32,112,114,111,100,117,99,101,
+100,116,104,101,32,99,105,116,121,32,111,102,105,110,32,119,104,105,99,104,32,
+104,101,115,101,101,110,32,105,110,32,116,104,101,116,104,101,32,99,101,110,116,
+114,97,108,98,117,105,108,100,105,110,103,32,111,102,109,97,110,121,32,111,102,
+32,104,105,115,97,114,101,97,32,111,102,32,116,104,101,105,115,32,116,104,101,32
+,111,110,108,121,109,111,115,116,32,111,102,32,116,104,101,109,97,110,121,32,111
+,102,32,116,104,101,116,104,101,32,87,101,115,116,101,114,110,84,104,101,114,101
+,32,105,115,32,110,111,101,120,116,101,110,100,101,100,32,116,111,83,116,97,116,
+105,115,116,105,99,97,108,99,111,108,115,112,97,110,61,50,32,124,115,104,111,114
+,116,32,115,116,111,114,121,112,111,115,115,105,98,108,101,32,116,111,116,111,
+112,111,108,111,103,105,99,97,108,99,114,105,116,105,99,97,108,32,111,102,114,
+101,112,111,114,116,101,100,32,116,111,97,32,67,104,114,105,115,116,105,97,110,
+100,101,99,105,115,105,111,110,32,116,111,105,115,32,101,113,117,97,108,32,116,
+111,112,114,111,98,108,101,109,115,32,111,102,84,104,105,115,32,99,97,110,32,98,
+101,109,101,114,99,104,97,110,100,105,115,101,102,111,114,32,109,111,115,116,32,
+111,102,110,111,32,101,118,105,100,101,110,99,101,101,100,105,116,105,111,110,
+115,32,111,102,101,108,101,109,101,110,116,115,32,105,110,38,113,117,111,116,59,
+46,32,84,104,101,99,111,109,47,105,109,97,103,101,115,47,119,104,105,99,104,32,
+109,97,107,101,115,116,104,101,32,112,114,111,99,101,115,115,114,101,109,97,105,
+110,115,32,116,104,101,108,105,116,101,114,97,116,117,114,101,44,105,115,32,97,
+32,109,101,109,98,101,114,116,104,101,32,112,111,112,117,108,97,114,116,104,101,
+32,97,110,99,105,101,110,116,112,114,111,98,108,101,109,115,32,105,110,116,105,
+109,101,32,111,102,32,116,104,101,100,101,102,101,97,116,101,100,32,98,121,98,
+111,100,121,32,111,102,32,116,104,101,97,32,102,101,119,32,121,101,97,114,115,
+109,117,99,104,32,111,102,32,116,104,101,116,104,101,32,119,111,114,107,32,111,
+102,67,97,108,105,102,111,114,110,105,97,44,115,101,114,118,101,100,32,97,115,32
+,97,103,111,118,101,114,110,109,101,110,116,46,99,111,110,99,101,112,116,115,32,
+111,102,109,111,118,101,109,101,110,116,32,105,110,9,9,60,100,105,118,32,105,100
+,61,34,105,116,34,32,118,97,108,117,101,61,34,108,97,110,103,117,97,103,101,32,
+111,102,97,115,32,116,104,101,121,32,97,114,101,112,114,111,100,117,99,101,100,
+32,105,110,105,115,32,116,104,97,116,32,116,104,101,101,120,112,108,97,105,110,
+32,116,104,101,100,105,118,62,60,47,100,105,118,62,10,72,111,119,101,118,101,114
+,32,116,104,101,108,101,97,100,32,116,111,32,116,104,101,9,60,97,32,104,114,101,
+102,61,34,47,119,97,115,32,103,114,97,110,116,101,100,112,101,111,112,108,101,32
+,104,97,118,101,99,111,110,116,105,110,117,97,108,108,121,119,97,115,32,115,101,
+101,110,32,97,115,97,110,100,32,114,101,108,97,116,101,100,116,104,101,32,114,
+111,108,101,32,111,102,112,114,111,112,111,115,101,100,32,98,121,111,102,32,116,
+104,101,32,98,101,115,116,101,97,99,104,32,111,116,104,101,114,46,67,111,110,115
+,116,97,110,116,105,110,101,112,101,111,112,108,101,32,102,114,111,109,100,105,
+97,108,101,99,116,115,32,111,102,116,111,32,114,101,118,105,115,105,111,110,119,
+97,115,32,114,101,110,97,109,101,100,97,32,115,111,117,114,99,101,32,111,102,116
+,104,101,32,105,110,105,116,105,97,108,108,97,117,110,99,104,101,100,32,105,110,
+112,114,111,118,105,100,101,32,116,104,101,116,111,32,116,104,101,32,119,101,115
+,116,119,104,101,114,101,32,116,104,101,114,101,97,110,100,32,115,105,109,105,
+108,97,114,98,101,116,119,101,101,110,32,116,119,111,105,115,32,97,108,115,111,
+32,116,104,101,69,110,103,108,105,115,104,32,97,110,100,99,111,110,100,105,116,
+105,111,110,115,44,116,104,97,116,32,105,116,32,119,97,115,101,110,116,105,116,
+108,101,100,32,116,111,116,104,101,109,115,101,108,118,101,115,46,113,117,97,110
+,116,105,116,121,32,111,102,114,97,110,115,112,97,114,101,110,99,121,116,104,101
+,32,115,97,109,101,32,97,115,116,111,32,106,111,105,110,32,116,104,101,99,111,
+117,110,116,114,121,32,97,110,100,116,104,105,115,32,105,115,32,116,104,101,84,
+104,105,115,32,108,101,100,32,116,111,97,32,115,116,97,116,101,109,101,110,116,
+99,111,110,116,114,97,115,116,32,116,111,108,97,115,116,73,110,100,101,120,79,
+102,116,104,114,111,117,103,104,32,104,105,115,105,115,32,100,101,115,105,103,
+110,101,100,116,104,101,32,116,101,114,109,32,105,115,105,115,32,112,114,111,118
+,105,100,101,100,112,114,111,116,101,99,116,32,116,104,101,110,103,60,47,97,62,
+60,47,108,105,62,84,104,101,32,99,117,114,114,101,110,116,116,104,101,32,115,105
+,116,101,32,111,102,115,117,98,115,116,97,110,116,105,97,108,101,120,112,101,114
+,105,101,110,99,101,44,105,110,32,116,104,101,32,87,101,115,116,116,104,101,121,
+32,115,104,111,117,108,100,115,108,111,118,101,110,196,141,105,110,97,99,111,109
+,101,110,116,97,114,105,111,115,117,110,105,118,101,114,115,105,100,97,100,99,
+111,110,100,105,99,105,111,110,101,115,97,99,116,105,118,105,100,97,100,101,115,
+101,120,112,101,114,105,101,110,99,105,97,116,101,99,110,111,108,111,103,195,173
+,97,112,114,111,100,117,99,99,105,195,179,110,112,117,110,116,117,97,99,105,195,
+179,110,97,112,108,105,99,97,99,105,195,179,110,99,111,110,116,114,97,115,101,
+195,177,97,99,97,116,101,103,111,114,195,173,97,115,114,101,103,105,115,116,114,
+97,114,115,101,112,114,111,102,101,115,105,111,110,97,108,116,114,97,116,97,109,
+105,101,110,116,111,114,101,103,195,173,115,116,114,97,116,101,115,101,99,114,
+101,116,97,114,195,173,97,112,114,105,110,99,105,112,97,108,101,115,112,114,111,
+116,101,99,99,105,195,179,110,105,109,112,111,114,116,97,110,116,101,115,105,109
+,112,111,114,116,97,110,99,105,97,112,111,115,105,98,105,108,105,100,97,100,105,
+110,116,101,114,101,115,97,110,116,101,99,114,101,99,105,109,105,101,110,116,111
+,110,101,99,101,115,105,100,97,100,101,115,115,117,115,99,114,105,98,105,114,115
+,101,97,115,111,99,105,97,99,105,195,179,110,100,105,115,112,111,110,105,98,108,
+101,115,101,118,97,108,117,97,99,105,195,179,110,101,115,116,117,100,105,97,110,
+116,101,115,114,101,115,112,111,110,115,97,98,108,101,114,101,115,111,108,117,99
+,105,195,179,110,103,117,97,100,97,108,97,106,97,114,97,114,101,103,105,115,116,
+114,97,100,111,115,111,112,111,114,116,117,110,105,100,97,100,99,111,109,101,114
+,99,105,97,108,101,115,102,111,116,111,103,114,97,102,195,173,97,97,117,116,111,
+114,105,100,97,100,101,115,105,110,103,101,110,105,101,114,195,173,97,116,101,
+108,101,118,105,115,105,195,179,110,99,111,109,112,101,116,101,110,99,105,97,111
+,112,101,114,97,99,105,111,110,101,115,101,115,116,97,98,108,101,99,105,100,111,
+115,105,109,112,108,101,109,101,110,116,101,97,99,116,117,97,108,109,101,110,116
+,101,110,97,118,101,103,97,99,105,195,179,110,99,111,110,102,111,114,109,105,100
+,97,100,108,105,110,101,45,104,101,105,103,104,116,58,102,111,110,116,45,102,97,
+109,105,108,121,58,34,32,58,32,34,104,116,116,112,58,47,47,97,112,112,108,105,99
+,97,116,105,111,110,115,108,105,110,107,34,32,104,114,101,102,61,34,115,112,101,
+99,105,102,105,99,97,108,108,121,47,47,60,33,91,67,68,65,84,65,91,10,79,114,103,
+97,110,105,122,97,116,105,111,110,100,105,115,116,114,105,98,117,116,105,111,110
+,48,112,120,59,32,104,101,105,103,104,116,58,114,101,108,97,116,105,111,110,115,
+104,105,112,100,101,118,105,99,101,45,119,105,100,116,104,60,100,105,118,32,99,
+108,97,115,115,61,34,60,108,97,98,101,108,32,102,111,114,61,34,114,101,103,105,
+115,116,114,97,116,105,111,110,60,47,110,111,115,99,114,105,112,116,62,10,47,105
+,110,100,101,120,46,104,116,109,108,34,119,105,110,100,111,119,46,111,112,101,
+110,40,32,33,105,109,112,111,114,116,97,110,116,59,97,112,112,108,105,99,97,116,
+105,111,110,47,105,110,100,101,112,101,110,100,101,110,99,101,47,47,119,119,119,
+46,103,111,111,103,108,101,111,114,103,97,110,105,122,97,116,105,111,110,97,117,
+116,111,99,111,109,112,108,101,116,101,114,101,113,117,105,114,101,109,101,110,
+116,115,99,111,110,115,101,114,118,97,116,105,118,101,60,102,111,114,109,32,110,
+97,109,101,61,34,105,110,116,101,108,108,101,99,116,117,97,108,109,97,114,103,
+105,110,45,108,101,102,116,58,49,56,116,104,32,99,101,110,116,117,114,121,97,110
+,32,105,109,112,111,114,116,97,110,116,105,110,115,116,105,116,117,116,105,111,
+110,115,97,98,98,114,101,118,105,97,116,105,111,110,60,105,109,103,32,99,108,97,
+115,115,61,34,111,114,103,97,110,105,115,97,116,105,111,110,99,105,118,105,108,
+105,122,97,116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,97,114,99
+,104,105,116,101,99,116,117,114,101,105,110,99,111,114,112,111,114,97,116,101,
+100,50,48,116,104,32,99,101,110,116,117,114,121,45,99,111,110,116,97,105,110,101
+,114,34,62,109,111,115,116,32,110,111,116,97,98,108,121,47,62,60,47,97,62,60,47,
+100,105,118,62,110,111,116,105,102,105,99,97,116,105,111,110,39,117,110,100,101,
+102,105,110,101,100,39,41,70,117,114,116,104,101,114,109,111,114,101,44,98,101,
+108,105,101,118,101,32,116,104,97,116,105,110,110,101,114,72,84,77,76,32,61,32,
+112,114,105,111,114,32,116,111,32,116,104,101,100,114,97,109,97,116,105,99,97,
+108,108,121,114,101,102,101,114,114,105,110,103,32,116,111,110,101,103,111,116,
+105,97,116,105,111,110,115,104,101,97,100,113,117,97,114,116,101,114,115,83,111,
+117,116,104,32,65,102,114,105,99,97,117,110,115,117,99,99,101,115,115,102,117,
+108,80,101,110,110,115,121,108,118,97,110,105,97,65,115,32,97,32,114,101,115,117
+,108,116,44,60,104,116,109,108,32,108,97,110,103,61,34,38,108,116,59,47,115,117,
+112,38,103,116,59,100,101,97,108,105,110,103,32,119,105,116,104,112,104,105,108,
+97,100,101,108,112,104,105,97,104,105,115,116,111,114,105,99,97,108,108,121,41,
+59,60,47,115,99,114,105,112,116,62,10,112,97,100,100,105,110,103,45,116,111,112,
+58,101,120,112,101,114,105,109,101,110,116,97,108,103,101,116,65,116,116,114,105
+,98,117,116,101,105,110,115,116,114,117,99,116,105,111,110,115,116,101,99,104,
+110,111,108,111,103,105,101,115,112,97,114,116,32,111,102,32,116,104,101,32,61,
+102,117,110,99,116,105,111,110,40,41,123,115,117,98,115,99,114,105,112,116,105,
+111,110,108,46,100,116,100,34,62,13,10,60,104,116,103,101,111,103,114,97,112,104
+,105,99,97,108,67,111,110,115,116,105,116,117,116,105,111,110,39,44,32,102,117,
+110,99,116,105,111,110,40,115,117,112,112,111,114,116,101,100,32,98,121,97,103,
+114,105,99,117,108,116,117,114,97,108,99,111,110,115,116,114,117,99,116,105,111,
+110,112,117,98,108,105,99,97,116,105,111,110,115,102,111,110,116,45,115,105,122,
+101,58,32,49,97,32,118,97,114,105,101,116,121,32,111,102,60,100,105,118,32,115,
+116,121,108,101,61,34,69,110,99,121,99,108,111,112,101,100,105,97,105,102,114,97
+,109,101,32,115,114,99,61,34,100,101,109,111,110,115,116,114,97,116,101,100,97,
+99,99,111,109,112,108,105,115,104,101,100,117,110,105,118,101,114,115,105,116,
+105,101,115,68,101,109,111,103,114,97,112,104,105,99,115,41,59,60,47,115,99,114,
+105,112,116,62,60,100,101,100,105,99,97,116,101,100,32,116,111,107,110,111,119,
+108,101,100,103,101,32,111,102,115,97,116,105,115,102,97,99,116,105,111,110,112,
+97,114,116,105,99,117,108,97,114,108,121,60,47,100,105,118,62,60,47,100,105,118,
+62,69,110,103,108,105,115,104,32,40,85,83,41,97,112,112,101,110,100,67,104,105,
+108,100,40,116,114,97,110,115,109,105,115,115,105,111,110,115,46,32,72,111,119,
+101,118,101,114,44,32,105,110,116,101,108,108,105,103,101,110,99,101,34,32,116,
+97,98,105,110,100,101,120,61,34,102,108,111,97,116,58,114,105,103,104,116,59,67,
+111,109,109,111,110,119,101,97,108,116,104,114,97,110,103,105,110,103,32,102,114
+,111,109,105,110,32,119,104,105,99,104,32,116,104,101,97,116,32,108,101,97,115,
+116,32,111,110,101,114,101,112,114,111,100,117,99,116,105,111,110,101,110,99,121
+,99,108,111,112,101,100,105,97,59,102,111,110,116,45,115,105,122,101,58,49,106,
+117,114,105,115,100,105,99,116,105,111,110,97,116,32,116,104,97,116,32,116,105,
+109,101,34,62,60,97,32,99,108,97,115,115,61,34,73,110,32,97,100,100,105,116,105,
+111,110,44,100,101,115,99,114,105,112,116,105,111,110,43,99,111,110,118,101,114,
+115,97,116,105,111,110,99,111,110,116,97,99,116,32,119,105,116,104,105,115,32,
+103,101,110,101,114,97,108,108,121,114,34,32,99,111,110,116,101,110,116,61,34,
+114,101,112,114,101,115,101,110,116,105,110,103,38,108,116,59,109,97,116,104,38,
+103,116,59,112,114,101,115,101,110,116,97,116,105,111,110,111,99,99,97,115,105,
+111,110,97,108,108,121,60,105,109,103,32,119,105,100,116,104,61,34,110,97,118,
+105,103,97,116,105,111,110,34,62,99,111,109,112,101,110,115,97,116,105,111,110,
+99,104,97,109,112,105,111,110,115,104,105,112,109,101,100,105,97,61,34,97,108,
+108,34,32,118,105,111,108,97,116,105,111,110,32,111,102,114,101,102,101,114,101,
+110,99,101,32,116,111,114,101,116,117,114,110,32,116,114,117,101,59,83,116,114,
+105,99,116,47,47,69,78,34,32,116,114,97,110,115,97,99,116,105,111,110,115,105,
+110,116,101,114,118,101,110,116,105,111,110,118,101,114,105,102,105,99,97,116,
+105,111,110,73,110,102,111,114,109,97,116,105,111,110,32,100,105,102,102,105,99,
+117,108,116,105,101,115,67,104,97,109,112,105,111,110,115,104,105,112,99,97,112,
+97,98,105,108,105,116,105,101,115,60,33,91,101,110,100,105,102,93,45,45,62,125,
+10,60,47,115,99,114,105,112,116,62,10,67,104,114,105,115,116,105,97,110,105,116,
+121,102,111,114,32,101,120,97,109,112,108,101,44,80,114,111,102,101,115,115,105,
+111,110,97,108,114,101,115,116,114,105,99,116,105,111,110,115,115,117,103,103,
+101,115,116,32,116,104,97,116,119,97,115,32,114,101,108,101,97,115,101,100,40,
+115,117,99,104,32,97,115,32,116,104,101,114,101,109,111,118,101,67,108,97,115,
+115,40,117,110,101,109,112,108,111,121,109,101,110,116,116,104,101,32,65,109,101
+,114,105,99,97,110,115,116,114,117,99,116,117,114,101,32,111,102,47,105,110,100,
+101,120,46,104,116,109,108,32,112,117,98,108,105,115,104,101,100,32,105,110,115,
+112,97,110,32,99,108,97,115,115,61,34,34,62,60,97,32,104,114,101,102,61,34,47,
+105,110,116,114,111,100,117,99,116,105,111,110,98,101,108,111,110,103,105,110,
+103,32,116,111,99,108,97,105,109,101,100,32,116,104,97,116,99,111,110,115,101,
+113,117,101,110,99,101,115,60,109,101,116,97,32,110,97,109,101,61,34,71,117,105,
+100,101,32,116,111,32,116,104,101,111,118,101,114,119,104,101,108,109,105,110,
+103,97,103,97,105,110,115,116,32,116,104,101,32,99,111,110,99,101,110,116,114,97
+,116,101,100,44,10,46,110,111,110,116,111,117,99,104,32,111,98,115,101,114,118,
+97,116,105,111,110,115,60,47,97,62,10,60,47,100,105,118,62,10,102,32,40,100,111,
+99,117,109,101,110,116,46,98,111,114,100,101,114,58,32,49,112,120,32,123,102,111
+,110,116,45,115,105,122,101,58,49,116,114,101,97,116,109,101,110,116,32,111,102,
+48,34,32,104,101,105,103,104,116,61,34,49,109,111,100,105,102,105,99,97,116,105,
+111,110,73,110,100,101,112,101,110,100,101,110,99,101,100,105,118,105,100,101,
+100,32,105,110,116,111,103,114,101,97,116,101,114,32,116,104,97,110,97,99,104,
+105,101,118,101,109,101,110,116,115,101,115,116,97,98,108,105,115,104,105,110,
+103,74,97,118,97,83,99,114,105,112,116,34,32,110,101,118,101,114,116,104,101,108
+,101,115,115,115,105,103,110,105,102,105,99,97,110,99,101,66,114,111,97,100,99,
+97,115,116,105,110,103,62,38,110,98,115,112,59,60,47,116,100,62,99,111,110,116,
+97,105,110,101,114,34,62,10,115,117,99,104,32,97,115,32,116,104,101,32,105,110,
+102,108,117,101,110,99,101,32,111,102,97,32,112,97,114,116,105,99,117,108,97,114
+,115,114,99,61,39,104,116,116,112,58,47,47,110,97,118,105,103,97,116,105,111,110
+,34,32,104,97,108,102,32,111,102,32,116,104,101,32,115,117,98,115,116,97,110,116
+,105,97,108,32,38,110,98,115,112,59,60,47,100,105,118,62,97,100,118,97,110,116,
+97,103,101,32,111,102,100,105,115,99,111,118,101,114,121,32,111,102,102,117,110,
+100,97,109,101,110,116,97,108,32,109,101,116,114,111,112,111,108,105,116,97,110,
+116,104,101,32,111,112,112,111,115,105,116,101,34,32,120,109,108,58,108,97,110,
+103,61,34,100,101,108,105,98,101,114,97,116,101,108,121,97,108,105,103,110,61,99
+,101,110,116,101,114,101,118,111,108,117,116,105,111,110,32,111,102,112,114,101,
+115,101,114,118,97,116,105,111,110,105,109,112,114,111,118,101,109,101,110,116,
+115,98,101,103,105,110,110,105,110,103,32,105,110,74,101,115,117,115,32,67,104,
+114,105,115,116,80,117,98,108,105,99,97,116,105,111,110,115,100,105,115,97,103,
+114,101,101,109,101,110,116,116,101,120,116,45,97,108,105,103,110,58,114,44,32,
+102,117,110,99,116,105,111,110,40,41,115,105,109,105,108,97,114,105,116,105,101,
+115,98,111,100,121,62,60,47,104,116,109,108,62,105,115,32,99,117,114,114,101,110
+,116,108,121,97,108,112,104,97,98,101,116,105,99,97,108,105,115,32,115,111,109,
+101,116,105,109,101,115,116,121,112,101,61,34,105,109,97,103,101,47,109,97,110,
+121,32,111,102,32,116,104,101,32,102,108,111,119,58,104,105,100,100,101,110,59,
+97,118,97,105,108,97,98,108,101,32,105,110,100,101,115,99,114,105,98,101,32,116,
+104,101,101,120,105,115,116,101,110,99,101,32,111,102,97,108,108,32,111,118,101,
+114,32,116,104,101,116,104,101,32,73,110,116,101,114,110,101,116,9,60,117,108,32
+,99,108,97,115,115,61,34,105,110,115,116,97,108,108,97,116,105,111,110,110,101,
+105,103,104,98,111,114,104,111,111,100,97,114,109,101,100,32,102,111,114,99,101,
+115,114,101,100,117,99,105,110,103,32,116,104,101,99,111,110,116,105,110,117,101
+,115,32,116,111,78,111,110,101,116,104,101,108,101,115,115,44,116,101,109,112,
+101,114,97,116,117,114,101,115,10,9,9,60,97,32,104,114,101,102,61,34,99,108,111,
+115,101,32,116,111,32,116,104,101,101,120,97,109,112,108,101,115,32,111,102,32,
+105,115,32,97,98,111,117,116,32,116,104,101,40,115,101,101,32,98,101,108,111,119
+,41,46,34,32,105,100,61,34,115,101,97,114,99,104,112,114,111,102,101,115,115,105
+,111,110,97,108,105,115,32,97,118,97,105,108,97,98,108,101,116,104,101,32,111,
+102,102,105,99,105,97,108,9,9,60,47,115,99,114,105,112,116,62,10,10,9,9,60,100,
+105,118,32,105,100,61,34,97,99,99,101,108,101,114,97,116,105,111,110,116,104,114
+,111,117,103,104,32,116,104,101,32,72,97,108,108,32,111,102,32,70,97,109,101,100
+,101,115,99,114,105,112,116,105,111,110,115,116,114,97,110,115,108,97,116,105,
+111,110,115,105,110,116,101,114,102,101,114,101,110,99,101,32,116,121,112,101,61
+,39,116,101,120,116,47,114,101,99,101,110,116,32,121,101,97,114,115,105,110,32,
+116,104,101,32,119,111,114,108,100,118,101,114,121,32,112,111,112,117,108,97,114
+,123,98,97,99,107,103,114,111,117,110,100,58,116,114,97,100,105,116,105,111,110,
+97,108,32,115,111,109,101,32,111,102,32,116,104,101,32,99,111,110,110,101,99,116
+,101,100,32,116,111,101,120,112,108,111,105,116,97,116,105,111,110,101,109,101,
+114,103,101,110,99,101,32,111,102,99,111,110,115,116,105,116,117,116,105,111,110
+,65,32,72,105,115,116,111,114,121,32,111,102,115,105,103,110,105,102,105,99,97,
+110,116,32,109,97,110,117,102,97,99,116,117,114,101,100,101,120,112,101,99,116,
+97,116,105,111,110,115,62,60,110,111,115,99,114,105,112,116,62,60,99,97,110,32,
+98,101,32,102,111,117,110,100,98,101,99,97,117,115,101,32,116,104,101,32,104,97,
+115,32,110,111,116,32,98,101,101,110,110,101,105,103,104,98,111,117,114,105,110,
+103,119,105,116,104,111,117,116,32,116,104,101,32,97,100,100,101,100,32,116,111,
+32,116,104,101,9,60,108,105,32,99,108,97,115,115,61,34,105,110,115,116,114,117,
+109,101,110,116,97,108,83,111,118,105,101,116,32,85,110,105,111,110,97,99,107,
+110,111,119,108,101,100,103,101,100,119,104,105,99,104,32,99,97,110,32,98,101,
+110,97,109,101,32,102,111,114,32,116,104,101,97,116,116,101,110,116,105,111,110,
+32,116,111,97,116,116,101,109,112,116,115,32,116,111,32,100,101,118,101,108,111,
+112,109,101,110,116,115,73,110,32,102,97,99,116,44,32,116,104,101,60,108,105,32,
+99,108,97,115,115,61,34,97,105,109,112,108,105,99,97,116,105,111,110,115,115,117
+,105,116,97,98,108,101,32,102,111,114,109,117,99,104,32,111,102,32,116,104,101,
+32,99,111,108,111,110,105,122,97,116,105,111,110,112,114,101,115,105,100,101,110
+,116,105,97,108,99,97,110,99,101,108,66,117,98,98,108,101,32,73,110,102,111,114,
+109,97,116,105,111,110,109,111,115,116,32,111,102,32,116,104,101,32,105,115,32,
+100,101,115,99,114,105,98,101,100,114,101,115,116,32,111,102,32,116,104,101,32,
+109,111,114,101,32,111,114,32,108,101,115,115,105,110,32,83,101,112,116,101,109,
+98,101,114,73,110,116,101,108,108,105,103,101,110,99,101,115,114,99,61,34,104,
+116,116,112,58,47,47,112,120,59,32,104,101,105,103,104,116,58,32,97,118,97,105,
+108,97,98,108,101,32,116,111,109,97,110,117,102,97,99,116,117,114,101,114,104,
+117,109,97,110,32,114,105,103,104,116,115,108,105,110,107,32,104,114,101,102,61,
+34,47,97,118,97,105,108,97,98,105,108,105,116,121,112,114,111,112,111,114,116,
+105,111,110,97,108,111,117,116,115,105,100,101,32,116,104,101,32,97,115,116,114,
+111,110,111,109,105,99,97,108,104,117,109,97,110,32,98,101,105,110,103,115,110,
+97,109,101,32,111,102,32,116,104,101,32,97,114,101,32,102,111,117,110,100,32,105
+,110,97,114,101,32,98,97,115,101,100,32,111,110,115,109,97,108,108,101,114,32,
+116,104,97,110,97,32,112,101,114,115,111,110,32,119,104,111,101,120,112,97,110,
+115,105,111,110,32,111,102,97,114,103,117,105,110,103,32,116,104,97,116,110,111,
+119,32,107,110,111,119,110,32,97,115,73,110,32,116,104,101,32,101,97,114,108,121
+,105,110,116,101,114,109,101,100,105,97,116,101,100,101,114,105,118,101,100,32,
+102,114,111,109,83,99,97,110,100,105,110,97,118,105,97,110,60,47,97,62,60,47,100
+,105,118,62,13,10,99,111,110,115,105,100,101,114,32,116,104,101,97,110,32,101,
+115,116,105,109,97,116,101,100,116,104,101,32,78,97,116,105,111,110,97,108,60,
+100,105,118,32,105,100,61,34,112,97,103,114,101,115,117,108,116,105,110,103,32,
+105,110,99,111,109,109,105,115,115,105,111,110,101,100,97,110,97,108,111,103,111
+,117,115,32,116,111,97,114,101,32,114,101,113,117,105,114,101,100,47,117,108,62,
+10,60,47,100,105,118,62,10,119,97,115,32,98,97,115,101,100,32,111,110,97,110,100
+,32,98,101,99,97,109,101,32,97,38,110,98,115,112,59,38,110,98,115,112,59,116,34,
+32,118,97,108,117,101,61,34,34,32,119,97,115,32,99,97,112,116,117,114,101,100,
+110,111,32,109,111,114,101,32,116,104,97,110,114,101,115,112,101,99,116,105,118,
+101,108,121,99,111,110,116,105,110,117,101,32,116,111,32,62,13,10,60,104,101,97,
+100,62,13,10,60,119,101,114,101,32,99,114,101,97,116,101,100,109,111,114,101,32,
+103,101,110,101,114,97,108,105,110,102,111,114,109,97,116,105,111,110,32,117,115
+,101,100,32,102,111,114,32,116,104,101,105,110,100,101,112,101,110,100,101,110,
+116,32,116,104,101,32,73,109,112,101,114,105,97,108,99,111,109,112,111,110,101,
+110,116,32,111,102,116,111,32,116,104,101,32,110,111,114,116,104,105,110,99,108,
+117,100,101,32,116,104,101,32,67,111,110,115,116,114,117,99,116,105,111,110,115,
+105,100,101,32,111,102,32,116,104,101,32,119,111,117,108,100,32,110,111,116,32,
+98,101,102,111,114,32,105,110,115,116,97,110,99,101,105,110,118,101,110,116,105,
+111,110,32,111,102,109,111,114,101,32,99,111,109,112,108,101,120,99,111,108,108,
+101,99,116,105,118,101,108,121,98,97,99,107,103,114,111,117,110,100,58,32,116,
+101,120,116,45,97,108,105,103,110,58,32,105,116,115,32,111,114,105,103,105,110,
+97,108,105,110,116,111,32,97,99,99,111,117,110,116,116,104,105,115,32,112,114,
+111,99,101,115,115,97,110,32,101,120,116,101,110,115,105,118,101,104,111,119,101
+,118,101,114,44,32,116,104,101,116,104,101,121,32,97,114,101,32,110,111,116,114,
+101,106,101,99,116,101,100,32,116,104,101,99,114,105,116,105,99,105,115,109,32,
+111,102,100,117,114,105,110,103,32,119,104,105,99,104,112,114,111,98,97,98,108,
+121,32,116,104,101,116,104,105,115,32,97,114,116,105,99,108,101,40,102,117,110,
+99,116,105,111,110,40,41,123,73,116,32,115,104,111,117,108,100,32,98,101,97,110,
+32,97,103,114,101,101,109,101,110,116,97,99,99,105,100,101,110,116,97,108,108,
+121,100,105,102,102,101,114,115,32,102,114,111,109,65,114,99,104,105,116,101,99,
+116,117,114,101,98,101,116,116,101,114,32,107,110,111,119,110,97,114,114,97,110,
+103,101,109,101,110,116,115,105,110,102,108,117,101,110,99,101,32,111,110,97,116
+,116,101,110,100,101,100,32,116,104,101,105,100,101,110,116,105,99,97,108,32,116
+,111,115,111,117,116,104,32,111,102,32,116,104,101,112,97,115,115,32,116,104,114
+,111,117,103,104,120,109,108,34,32,116,105,116,108,101,61,34,119,101,105,103,104
+,116,58,98,111,108,100,59,99,114,101,97,116,105,110,103,32,116,104,101,100,105,
+115,112,108,97,121,58,110,111,110,101,114,101,112,108,97,99,101,100,32,116,104,
+101,60,105,109,103,32,115,114,99,61,34,47,105,104,116,116,112,115,58,47,47,119,
+119,119,46,87,111,114,108,100,32,87,97,114,32,73,73,116,101,115,116,105,109,111,
+110,105,97,108,115,102,111,117,110,100,32,105,110,32,116,104,101,114,101,113,117
+,105,114,101,100,32,116,111,32,97,110,100,32,116,104,97,116,32,116,104,101,98,
+101,116,119,101,101,110,32,116,104,101,32,119,97,115,32,100,101,115,105,103,110,
+101,100,99,111,110,115,105,115,116,115,32,111,102,32,99,111,110,115,105,100,101,
+114,97,98,108,121,112,117,98,108,105,115,104,101,100,32,98,121,116,104,101,32,
+108,97,110,103,117,97,103,101,67,111,110,115,101,114,118,97,116,105,111,110,99,
+111,110,115,105,115,116,101,100,32,111,102,114,101,102,101,114,32,116,111,32,116
+,104,101,98,97,99,107,32,116,111,32,116,104,101,32,99,115,115,34,32,109,101,100,
+105,97,61,34,80,101,111,112,108,101,32,102,114,111,109,32,97,118,97,105,108,97,
+98,108,101,32,111,110,112,114,111,118,101,100,32,116,111,32,98,101,115,117,103,
+103,101,115,116,105,111,110,115,34,119,97,115,32,107,110,111,119,110,32,97,115,
+118,97,114,105,101,116,105,101,115,32,111,102,108,105,107,101,108,121,32,116,111
+,32,98,101,99,111,109,112,114,105,115,101,100,32,111,102,115,117,112,112,111,114
+,116,32,116,104,101,32,104,97,110,100,115,32,111,102,32,116,104,101,99,111,117,
+112,108,101,100,32,119,105,116,104,99,111,110,110,101,99,116,32,97,110,100,32,98
+,111,114,100,101,114,58,110,111,110,101,59,112,101,114,102,111,114,109,97,110,99
+,101,115,98,101,102,111,114,101,32,98,101,105,110,103,108,97,116,101,114,32,98,
+101,99,97,109,101,99,97,108,99,117,108,97,116,105,111,110,115,111,102,116,101,
+110,32,99,97,108,108,101,100,114,101,115,105,100,101,110,116,115,32,111,102,109,
+101,97,110,105,110,103,32,116,104,97,116,62,60,108,105,32,99,108,97,115,115,61,
+34,101,118,105,100,101,110,99,101,32,102,111,114,101,120,112,108,97,110,97,116,
+105,111,110,115,101,110,118,105,114,111,110,109,101,110,116,115,34,62,60,47,97,
+62,60,47,100,105,118,62,119,104,105,99,104,32,97,108,108,111,119,115,73,110,116,
+114,111,100,117,99,116,105,111,110,100,101,118,101,108,111,112,101,100,32,98,121
+,97,32,119,105,100,101,32,114,97,110,103,101,111,110,32,98,101,104,97,108,102,32
+,111,102,118,97,108,105,103,110,61,34,116,111,112,34,112,114,105,110,99,105,112,
+108,101,32,111,102,97,116,32,116,104,101,32,116,105,109,101,44,60,47,110,111,115
+,99,114,105,112,116,62,13,115,97,105,100,32,116,111,32,104,97,118,101,105,110,32
+,116,104,101,32,102,105,114,115,116,119,104,105,108,101,32,111,116,104,101,114,
+115,104,121,112,111,116,104,101,116,105,99,97,108,112,104,105,108,111,115,111,
+112,104,101,114,115,112,111,119,101,114,32,111,102,32,116,104,101,99,111,110,116
+,97,105,110,101,100,32,105,110,112,101,114,102,111,114,109,101,100,32,98,121,105
+,110,97,98,105,108,105,116,121,32,116,111,119,101,114,101,32,119,114,105,116,116
+,101,110,115,112,97,110,32,115,116,121,108,101,61,34,105,110,112,117,116,32,110,
+97,109,101,61,34,116,104,101,32,113,117,101,115,116,105,111,110,105,110,116,101,
+110,100,101,100,32,102,111,114,114,101,106,101,99,116,105,111,110,32,111,102,105
+,109,112,108,105,101,115,32,116,104,97,116,105,110,118,101,110,116,101,100,32,
+116,104,101,116,104,101,32,115,116,97,110,100,97,114,100,119,97,115,32,112,114,
+111,98,97,98,108,121,108,105,110,107,32,98,101,116,119,101,101,110,112,114,111,
+102,101,115,115,111,114,32,111,102,105,110,116,101,114,97,99,116,105,111,110,115
+,99,104,97,110,103,105,110,103,32,116,104,101,73,110,100,105,97,110,32,79,99,101
+,97,110,32,99,108,97,115,115,61,34,108,97,115,116,119,111,114,107,105,110,103,32
+,119,105,116,104,39,104,116,116,112,58,47,47,119,119,119,46,121,101,97,114,115,
+32,98,101,102,111,114,101,84,104,105,115,32,119,97,115,32,116,104,101,114,101,99
+,114,101,97,116,105,111,110,97,108,101,110,116,101,114,105,110,103,32,116,104,
+101,109,101,97,115,117,114,101,109,101,110,116,115,97,110,32,101,120,116,114,101
+,109,101,108,121,118,97,108,117,101,32,111,102,32,116,104,101,115,116,97,114,116
+,32,111,102,32,116,104,101,10,60,47,115,99,114,105,112,116,62,10,10,97,110,32,
+101,102,102,111,114,116,32,116,111,105,110,99,114,101,97,115,101,32,116,104,101,
+116,111,32,116,104,101,32,115,111,117,116,104,115,112,97,99,105,110,103,61,34,48
+,34,62,115,117,102,102,105,99,105,101,110,116,108,121,116,104,101,32,69,117,114,
+111,112,101,97,110,99,111,110,118,101,114,116,101,100,32,116,111,99,108,101,97,
+114,84,105,109,101,111,117,116,100,105,100,32,110,111,116,32,104,97,118,101,99,
+111,110,115,101,113,117,101,110,116,108,121,102,111,114,32,116,104,101,32,110,
+101,120,116,101,120,116,101,110,115,105,111,110,32,111,102,101,99,111,110,111,
+109,105,99,32,97,110,100,97,108,116,104,111,117,103,104,32,116,104,101,97,114,
+101,32,112,114,111,100,117,99,101,100,97,110,100,32,119,105,116,104,32,116,104,
+101,105,110,115,117,102,102,105,99,105,101,110,116,103,105,118,101,110,32,98,121
+,32,116,104,101,115,116,97,116,105,110,103,32,116,104,97,116,101,120,112,101,110
+,100,105,116,117,114,101,115,60,47,115,112,97,110,62,60,47,97,62,10,116,104,111,
+117,103,104,116,32,116,104,97,116,111,110,32,116,104,101,32,98,97,115,105,115,99
+,101,108,108,112,97,100,100,105,110,103,61,105,109,97,103,101,32,111,102,32,116,
+104,101,114,101,116,117,114,110,105,110,103,32,116,111,105,110,102,111,114,109,
+97,116,105,111,110,44,115,101,112,97,114,97,116,101,100,32,98,121,97,115,115,97,
+115,115,105,110,97,116,101,100,115,34,32,99,111,110,116,101,110,116,61,34,97,117
+,116,104,111,114,105,116,121,32,111,102,110,111,114,116,104,119,101,115,116,101,
+114,110,60,47,100,105,118,62,10,60,100,105,118,32,34,62,60,47,100,105,118,62,13,
+10,32,32,99,111,110,115,117,108,116,97,116,105,111,110,99,111,109,109,117,110,
+105,116,121,32,111,102,116,104,101,32,110,97,116,105,111,110,97,108,105,116,32,
+115,104,111,117,108,100,32,98,101,112,97,114,116,105,99,105,112,97,110,116,115,
+32,97,108,105,103,110,61,34,108,101,102,116,116,104,101,32,103,114,101,97,116,
+101,115,116,115,101,108,101,99,116,105,111,110,32,111,102,115,117,112,101,114,
+110,97,116,117,114,97,108,100,101,112,101,110,100,101,110,116,32,111,110,105,115
+,32,109,101,110,116,105,111,110,101,100,97,108,108,111,119,105,110,103,32,116,
+104,101,119,97,115,32,105,110,118,101,110,116,101,100,97,99,99,111,109,112,97,
+110,121,105,110,103,104,105,115,32,112,101,114,115,111,110,97,108,97,118,97,105,
+108,97,98,108,101,32,97,116,115,116,117,100,121,32,111,102,32,116,104,101,111,
+110,32,116,104,101,32,111,116,104,101,114,101,120,101,99,117,116,105,111,110,32,
+111,102,72,117,109,97,110,32,82,105,103,104,116,115,116,101,114,109,115,32,111,
+102,32,116,104,101,97,115,115,111,99,105,97,116,105,111,110,115,114,101,115,101,
+97,114,99,104,32,97,110,100,115,117,99,99,101,101,100,101,100,32,98,121,100,101,
+102,101,97,116,101,100,32,116,104,101,97,110,100,32,102,114,111,109,32,116,104,
+101,98,117,116,32,116,104,101,121,32,97,114,101,99,111,109,109,97,110,100,101,
+114,32,111,102,115,116,97,116,101,32,111,102,32,116,104,101,121,101,97,114,115,
+32,111,102,32,97,103,101,116,104,101,32,115,116,117,100,121,32,111,102,60,117,
+108,32,99,108,97,115,115,61,34,115,112,108,97,99,101,32,105,110,32,116,104,101,
+119,104,101,114,101,32,104,101,32,119,97,115,60,108,105,32,99,108,97,115,115,61,
+34,102,116,104,101,114,101,32,97,114,101,32,110,111,119,104,105,99,104,32,98,101
+,99,97,109,101,104,101,32,112,117,98,108,105,115,104,101,100,101,120,112,114,101
+,115,115,101,100,32,105,110,116,111,32,119,104,105,99,104,32,116,104,101,99,111,
+109,109,105,115,115,105,111,110,101,114,102,111,110,116,45,119,101,105,103,104,
+116,58,116,101,114,114,105,116,111,114,121,32,111,102,101,120,116,101,110,115,
+105,111,110,115,34,62,82,111,109,97,110,32,69,109,112,105,114,101,101,113,117,97
+,108,32,116,111,32,116,104,101,73,110,32,99,111,110,116,114,97,115,116,44,104,
+111,119,101,118,101,114,44,32,97,110,100,105,115,32,116,121,112,105,99,97,108,
+108,121,97,110,100,32,104,105,115,32,119,105,102,101,40,97,108,115,111,32,99,97,
+108,108,101,100,62,60,117,108,32,99,108,97,115,115,61,34,101,102,102,101,99,116,
+105,118,101,108,121,32,101,118,111,108,118,101,100,32,105,110,116,111,115,101,
+101,109,32,116,111,32,104,97,118,101,119,104,105,99,104,32,105,115,32,116,104,
+101,116,104,101,114,101,32,119,97,115,32,110,111,97,110,32,101,120,99,101,108,
+108,101,110,116,97,108,108,32,111,102,32,116,104,101,115,101,100,101,115,99,114,
+105,98,101,100,32,98,121,73,110,32,112,114,97,99,116,105,99,101,44,98,114,111,97
+,100,99,97,115,116,105,110,103,99,104,97,114,103,101,100,32,119,105,116,104,114,
+101,102,108,101,99,116,101,100,32,105,110,115,117,98,106,101,99,116,101,100,32,
+116,111,109,105,108,105,116,97,114,121,32,97,110,100,116,111,32,116,104,101,32,
+112,111,105,110,116,101,99,111,110,111,109,105,99,97,108,108,121,115,101,116,84,
+97,114,103,101,116,105,110,103,97,114,101,32,97,99,116,117,97,108,108,121,118,
+105,99,116,111,114,121,32,111,118,101,114,40,41,59,60,47,115,99,114,105,112,116,
+62,99,111,110,116,105,110,117,111,117,115,108,121,114,101,113,117,105,114,101,
+100,32,102,111,114,101,118,111,108,117,116,105,111,110,97,114,121,97,110,32,101,
+102,102,101,99,116,105,118,101,110,111,114,116,104,32,111,102,32,116,104,101,44,
+32,119,104,105,99,104,32,119,97,115,32,102,114,111,110,116,32,111,102,32,116,104
+,101,111,114,32,111,116,104,101,114,119,105,115,101,115,111,109,101,32,102,111,
+114,109,32,111,102,104,97,100,32,110,111,116,32,98,101,101,110,103,101,110,101,
+114,97,116,101,100,32,98,121,105,110,102,111,114,109,97,116,105,111,110,46,112,
+101,114,109,105,116,116,101,100,32,116,111,105,110,99,108,117,100,101,115,32,116
+,104,101,100,101,118,101,108,111,112,109,101,110,116,44,101,110,116,101,114,101,
+100,32,105,110,116,111,116,104,101,32,112,114,101,118,105,111,117,115,99,111,110
+,115,105,115,116,101,110,116,108,121,97,114,101,32,107,110,111,119,110,32,97,115
+,116,104,101,32,102,105,101,108,100,32,111,102,116,104,105,115,32,116,121,112,
+101,32,111,102,103,105,118,101,110,32,116,111,32,116,104,101,116,104,101,32,116,
+105,116,108,101,32,111,102,99,111,110,116,97,105,110,115,32,116,104,101,105,110,
+115,116,97,110,99,101,115,32,111,102,105,110,32,116,104,101,32,110,111,114,116,
+104,100,117,101,32,116,111,32,116,104,101,105,114,97,114,101,32,100,101,115,105,
+103,110,101,100,99,111,114,112,111,114,97,116,105,111,110,115,119,97,115,32,116,
+104,97,116,32,116,104,101,111,110,101,32,111,102,32,116,104,101,115,101,109,111,
+114,101,32,112,111,112,117,108,97,114,115,117,99,99,101,101,100,101,100,32,105,
+110,115,117,112,112,111,114,116,32,102,114,111,109,105,110,32,100,105,102,102,
+101,114,101,110,116,100,111,109,105,110,97,116,101,100,32,98,121,100,101,115,105
+,103,110,101,100,32,102,111,114,111,119,110,101,114,115,104,105,112,32,111,102,
+97,110,100,32,112,111,115,115,105,98,108,121,115,116,97,110,100,97,114,100,105,
+122,101,100,114,101,115,112,111,110,115,101,84,101,120,116,119,97,115,32,105,110
+,116,101,110,100,101,100,114,101,99,101,105,118,101,100,32,116,104,101,97,115,
+115,117,109,101,100,32,116,104,97,116,97,114,101,97,115,32,111,102,32,116,104,
+101,112,114,105,109,97,114,105,108,121,32,105,110,116,104,101,32,98,97,115,105,
+115,32,111,102,105,110,32,116,104,101,32,115,101,110,115,101,97,99,99,111,117,
+110,116,115,32,102,111,114,100,101,115,116,114,111,121,101,100,32,98,121,97,116,
+32,108,101,97,115,116,32,116,119,111,119,97,115,32,100,101,99,108,97,114,101,100
+,99,111,117,108,100,32,110,111,116,32,98,101,83,101,99,114,101,116,97,114,121,32
+,111,102,97,112,112,101,97,114,32,116,111,32,98,101,109,97,114,103,105,110,45,
+116,111,112,58,49,47,94,92,115,43,124,92,115,43,36,47,103,101,41,123,116,104,114
+,111,119,32,101,125,59,116,104,101,32,115,116,97,114,116,32,111,102,116,119,111,
+32,115,101,112,97,114,97,116,101,108,97,110,103,117,97,103,101,32,97,110,100,119
+,104,111,32,104,97,100,32,98,101,101,110,111,112,101,114,97,116,105,111,110,32,
+111,102,100,101,97,116,104,32,111,102,32,116,104,101,114,101,97,108,32,110,117,
+109,98,101,114,115,9,60,108,105,110,107,32,114,101,108,61,34,112,114,111,118,105
+,100,101,100,32,116,104,101,116,104,101,32,115,116,111,114,121,32,111,102,99,111
+,109,112,101,116,105,116,105,111,110,115,101,110,103,108,105,115,104,32,40,85,75
+,41,101,110,103,108,105,115,104,32,40,85,83,41,208,156,208,190,208,189,208,179,
+208,190,208,187,208,161,209,128,208,191,209,129,208,186,208,184,209,129,209,128,
+208,191,209,129,208,186,208,184,209,129,209,128,208,191,209,129,208,186,208,190,
+217,132,216,185,216,177,216,168,217,138,216,169,230,173,163,233,171,148,228,184,
+173,230,150,135,231,174,128,228,189,147,228,184,173,230,150,135,231,185,129,228,
+189,147,228,184,173,230,150,135,230,156,137,233,153,144,229,133,172,229,143,184,
+228,186,186,230,176,145,230,148,191,229,186,156,233,152,191,233,135,140,229,183,
+180,229,183,180,231,164,190,228,188,154,228,184,187,228,185,137,230,147,141,228,
+189,156,231,179,187,231,187,159,230,148,191,231,173,150,230,179,149,232,167,132,
+105,110,102,111,114,109,97,99,105,195,179,110,104,101,114,114,97,109,105,101,110
+,116,97,115,101,108,101,99,116,114,195,179,110,105,99,111,100,101,115,99,114,105
+,112,99,105,195,179,110,99,108,97,115,105,102,105,99,97,100,111,115,99,111,110,
+111,99,105,109,105,101,110,116,111,112,117,98,108,105,99,97,99,105,195,179,110,
+114,101,108,97,99,105,111,110,97,100,97,115,105,110,102,111,114,109,195,161,116,
+105,99,97,114,101,108,97,99,105,111,110,97,100,111,115,100,101,112,97,114,116,97
+,109,101,110,116,111,116,114,97,98,97,106,97,100,111,114,101,115,100,105,114,101
+,99,116,97,109,101,110,116,101,97,121,117,110,116,97,109,105,101,110,116,111,109
+,101,114,99,97,100,111,76,105,98,114,101,99,111,110,116,195,161,99,116,101,110,
+111,115,104,97,98,105,116,97,99,105,111,110,101,115,99,117,109,112,108,105,109,
+105,101,110,116,111,114,101,115,116,97,117,114,97,110,116,101,115,100,105,115,
+112,111,115,105,99,105,195,179,110,99,111,110,115,101,99,117,101,110,99,105,97,
+101,108,101,99,116,114,195,179,110,105,99,97,97,112,108,105,99,97,99,105,111,110
+,101,115,100,101,115,99,111,110,101,99,116,97,100,111,105,110,115,116,97,108,97,
+99,105,195,179,110,114,101,97,108,105,122,97,99,105,195,179,110,117,116,105,108,
+105,122,97,99,105,195,179,110,101,110,99,105,99,108,111,112,101,100,105,97,101,
+110,102,101,114,109,101,100,97,100,101,115,105,110,115,116,114,117,109,101,110,
+116,111,115,101,120,112,101,114,105,101,110,99,105,97,115,105,110,115,116,105,
+116,117,99,105,195,179,110,112,97,114,116,105,99,117,108,97,114,101,115,115,117,
+98,99,97,116,101,103,111,114,105,97,209,130,208,190,208,187,209,140,208,186,208,
+190,208,160,208,190,209,129,209,129,208,184,208,184,209,128,208,176,208,177,208,
+190,209,130,209,139,208,177,208,190,208,187,209,140,209,136,208,181,208,191,209,
+128,208,190,209,129,209,130,208,190,208,188,208,190,208,182,208,181,209,130,208,
+181,208,180,209,128,209,131,208,179,208,184,209,133,209,129,208,187,209,131,209,
+135,208,176,208,181,209,129,208,181,208,185,209,135,208,176,209,129,208,178,209,
+129,208,181,208,179,208,180,208,176,208,160,208,190,209,129,209,129,208,184,209,
+143,208,156,208,190,209,129,208,186,208,178,208,181,208,180,209,128,209,131,208,
+179,208,184,208,181,208,179,208,190,209,128,208,190,208,180,208,176,208,178,208,
+190,208,191,209,128,208,190,209,129,208,180,208,176,208,189,208,189,209,139,209,
+133,208,180,208,190,208,187,208,182,208,189,209,139,208,184,208,188,208,181,208,
+189,208,189,208,190,208,156,208,190,209,129,208,186,208,178,209,139,209,128,209,
+131,208,177,208,187,208,181,208,185,208,156,208,190,209,129,208,186,208,178,208,
+176,209,129,209,130,209,128,208,176,208,189,209,139,208,189,208,184,209,135,208,
+181,208,179,208,190,209,128,208,176,208,177,208,190,209,130,208,181,208,180,208,
+190,208,187,208,182,208,181,208,189,209,131,209,129,208,187,209,131,208,179,208,
+184,209,130,208,181,208,191,208,181,209,128,209,140,208,158,208,180,208,189,208,
+176,208,186,208,190,208,191,208,190,209,130,208,190,208,188,209,131,209,128,208,
+176,208,177,208,190,209,130,209,131,208,176,208,191,209,128,208,181,208,187,209,
+143,208,178,208,190,208,190,208,177,209,137,208,181,208,190,208,180,208,189,208,
+190,208,179,208,190,209,129,208,178,208,190,208,181,208,179,208,190,209,129,209,
+130,208,176,209,130,209,140,208,184,208,180,209,128,209,131,208,179,208,190,208,
+185,209,132,208,190,209,128,209,131,208,188,208,181,209,133,208,190,209,128,208,
+190,209,136,208,190,208,191,209,128,208,190,209,130,208,184,208,178,209,129,209,
+129,209,139,208,187,208,186,208,176,208,186,208,176,208,182,208,180,209,139,208,
+185,208,178,208,187,208,176,209,129,209,130,208,184,208,179,209,128,209,131,208,
+191,208,191,209,139,208,178,208,188,208,181,209,129,209,130,208,181,209,128,208,
+176,208,177,208,190,209,130,208,176,209,129,208,186,208,176,208,183,208,176,208,
+187,208,191,208,181,209,128,208,178,209,139,208,185,208,180,208,181,208,187,208,
+176,209,130,209,140,208,180,208,181,208,189,209,140,208,179,208,184,208,191,208,
+181,209,128,208,184,208,190,208,180,208,177,208,184,208,183,208,189,208,181,209,
+129,208,190,209,129,208,189,208,190,208,178,208,181,208,188,208,190,208,188,208,
+181,208,189,209,130,208,186,209,131,208,191,208,184,209,130,209,140,208,180,208,
+190,208,187,208,182,208,189,208,176,209,128,208,176,208,188,208,186,208,176,209,
+133,208,189,208,176,209,135,208,176,208,187,208,190,208,160,208,176,208,177,208,
+190,209,130,208,176,208,162,208,190,208,187,209,140,208,186,208,190,209,129,208,
+190,208,178,209,129,208,181,208,188,208,178,209,130,208,190,209,128,208,190,208,
+185,208,189,208,176,209,135,208,176,208,187,208,176,209,129,208,191,208,184,209,
+129,208,190,208,186,209,129,208,187,209,131,208,182,208,177,209,139,209,129,208,
+184,209,129,209,130,208,181,208,188,208,191,208,181,209,135,208,176,209,130,208,
+184,208,189,208,190,208,178,208,190,208,179,208,190,208,191,208,190,208,188,208,
+190,209,137,208,184,209,129,208,176,208,185,209,130,208,190,208,178,208,191,208,
+190,209,135,208,181,208,188,209,131,208,191,208,190,208,188,208,190,209,137,209,
+140,208,180,208,190,208,187,208,182,208,189,208,190,209,129,209,129,209,139,208,
+187,208,186,208,184,208,177,209,139,209,129,209,130,209,128,208,190,208,180,208,
+176,208,189,208,189,209,139,208,181,208,188,208,189,208,190,208,179,208,184,208,
+181,208,191,209,128,208,190,208,181,208,186,209,130,208,161,208,181,208,185,209,
+135,208,176,209,129,208,188,208,190,208,180,208,181,208,187,208,184,209,130,208,
+176,208,186,208,190,208,179,208,190,208,190,208,189,208,187,208,176,208,185,208,
+189,208,179,208,190,209,128,208,190,208,180,208,181,208,178,208,181,209,128,209,
+129,208,184,209,143,209,129,209,130,209,128,208,176,208,189,208,181,209,132,208,
+184,208,187,209,140,208,188,209,139,209,131,209,128,208,190,208,178,208,189,209,
+143,209,128,208,176,208,183,208,189,209,139,209,133,208,184,209,129,208,186,208,
+176,209,130,209,140,208,189,208,181,208,180,208,181,208,187,209,142,209,143,208,
+189,208,178,208,176,209,128,209,143,208,188,208,181,208,189,209,140,209,136,208,
+181,208,188,208,189,208,190,208,179,208,184,209,133,208,180,208,176,208,189,208,
+189,208,190,208,185,208,183,208,189,208,176,209,135,208,184,209,130,208,189,208,
+181,208,187,209,140,208,183,209,143,209,132,208,190,209,128,209,131,208,188,208,
+176,208,162,208,181,208,191,208,181,209,128,209,140,208,188,208,181,209,129,209,
+143,209,134,208,176,208,183,208,176,209,137,208,184,209,130,209,139,208,155,209,
+131,209,135,209,136,208,184,208,181,224,164,168,224,164,185,224,165,128,224,164,
+130,224,164,149,224,164,176,224,164,168,224,165,135,224,164,133,224,164,170,224,
+164,168,224,165,135,224,164,149,224,164,191,224,164,175,224,164,190,224,164,149,
+224,164,176,224,165,135,224,164,130,224,164,133,224,164,168,224,165,141,224,164,
+175,224,164,149,224,165,141,224,164,175,224,164,190,224,164,151,224,164,190,224,
+164,135,224,164,161,224,164,172,224,164,190,224,164,176,224,165,135,224,164,149,
+224,164,191,224,164,184,224,165,128,224,164,166,224,164,191,224,164,175,224,164,
+190,224,164,170,224,164,185,224,164,178,224,165,135,224,164,184,224,164,191,224,
+164,130,224,164,185,224,164,173,224,164,190,224,164,176,224,164,164,224,164,133,
+224,164,170,224,164,168,224,165,128,224,164,181,224,164,190,224,164,178,224,165,
+135,224,164,184,224,165,135,224,164,181,224,164,190,224,164,149,224,164,176,224,
+164,164,224,165,135,224,164,174,224,165,135,224,164,176,224,165,135,224,164,185,
+224,165,139,224,164,168,224,165,135,224,164,184,224,164,149,224,164,164,224,165,
+135,224,164,172,224,164,185,224,165,129,224,164,164,224,164,184,224,164,190,224,
+164,135,224,164,159,224,164,185,224,165,139,224,164,151,224,164,190,224,164,156,
+224,164,190,224,164,168,224,165,135,224,164,174,224,164,191,224,164,168,224,164,
+159,224,164,149,224,164,176,224,164,164,224,164,190,224,164,149,224,164,176,224,
+164,168,224,164,190,224,164,137,224,164,168,224,164,149,224,165,135,224,164,175,
+224,164,185,224,164,190,224,164,129,224,164,184,224,164,172,224,164,184,224,165,
+135,224,164,173,224,164,190,224,164,183,224,164,190,224,164,134,224,164,170,224,
+164,149,224,165,135,224,164,178,224,164,191,224,164,175,224,165,135,224,164,182,
+224,165,129,224,164,176,224,165,130,224,164,135,224,164,184,224,164,149,224,165,
+135,224,164,152,224,164,130,224,164,159,224,165,135,224,164,174,224,165,135,224,
+164,176,224,165,128,224,164,184,224,164,149,224,164,164,224,164,190,224,164,174,
+224,165,135,224,164,176,224,164,190,224,164,178,224,165,135,224,164,149,224,164,
+176,224,164,133,224,164,167,224,164,191,224,164,149,224,164,133,224,164,170,224,
+164,168,224,164,190,224,164,184,224,164,174,224,164,190,224,164,156,224,164,174,
+224,165,129,224,164,157,224,165,135,224,164,149,224,164,190,224,164,176,224,164,
+163,224,164,185,224,165,139,224,164,164,224,164,190,224,164,149,224,164,161,224,
+164,188,224,165,128,224,164,175,224,164,185,224,164,190,224,164,130,224,164,185,
+224,165,139,224,164,159,224,164,178,224,164,182,224,164,172,224,165,141,224,164,
+166,224,164,178,224,164,191,224,164,175,224,164,190,224,164,156,224,165,128,224,
+164,181,224,164,168,224,164,156,224,164,190,224,164,164,224,164,190,224,164,149,
+224,165,136,224,164,184,224,165,135,224,164,134,224,164,170,224,164,149,224,164,
+190,224,164,181,224,164,190,224,164,178,224,165,128,224,164,166,224,165,135,224,
+164,168,224,165,135,224,164,170,224,165,130,224,164,176,224,165,128,224,164,170,
+224,164,190,224,164,168,224,165,128,224,164,137,224,164,184,224,164,149,224,165,
+135,224,164,185,224,165,139,224,164,151,224,165,128,224,164,172,224,165,136,224,
+164,160,224,164,149,224,164,134,224,164,170,224,164,149,224,165,128,224,164,181,
+224,164,176,224,165,141,224,164,183,224,164,151,224,164,190,224,164,130,224,164,
+181,224,164,134,224,164,170,224,164,149,224,165,139,224,164,156,224,164,191,224,
+164,178,224,164,190,224,164,156,224,164,190,224,164,168,224,164,190,224,164,184,
+224,164,185,224,164,174,224,164,164,224,164,185,224,164,174,224,165,135,224,164,
+130,224,164,137,224,164,168,224,164,149,224,165,128,224,164,175,224,164,190,224,
+164,185,224,165,130,224,164,166,224,164,176,224,165,141,224,164,156,224,164,184,
+224,165,130,224,164,154,224,165,128,224,164,170,224,164,184,224,164,130,224,164,
+166,224,164,184,224,164,181,224,164,190,224,164,178,224,164,185,224,165,139,224,
+164,168,224,164,190,224,164,185,224,165,139,224,164,164,224,165,128,224,164,156,
+224,165,136,224,164,184,224,165,135,224,164,181,224,164,190,224,164,170,224,164,
+184,224,164,156,224,164,168,224,164,164,224,164,190,224,164,168,224,165,135,224,
+164,164,224,164,190,224,164,156,224,164,190,224,164,176,224,165,128,224,164,152,
+224,164,190,224,164,175,224,164,178,224,164,156,224,164,191,224,164,178,224,165,
+135,224,164,168,224,165,128,224,164,154,224,165,135,224,164,156,224,164,190,224,
+164,130,224,164,154,224,164,170,224,164,164,224,165,141,224,164,176,224,164,151,
+224,165,130,224,164,151,224,164,178,224,164,156,224,164,190,224,164,164,224,165,
+135,224,164,172,224,164,190,224,164,185,224,164,176,224,164,134,224,164,170,224,
+164,168,224,165,135,224,164,181,224,164,190,224,164,185,224,164,168,224,164,135,
+224,164,184,224,164,149,224,164,190,224,164,184,224,165,129,224,164,172,224,164,
+185,224,164,176,224,164,185,224,164,168,224,165,135,224,164,135,224,164,184,224,
+164,184,224,165,135,224,164,184,224,164,185,224,164,191,224,164,164,224,164,172,
+224,164,161,224,164,188,224,165,135,224,164,152,224,164,159,224,164,168,224,164,
+190,224,164,164,224,164,178,224,164,190,224,164,182,224,164,170,224,164,190,224,
+164,130,224,164,154,224,164,182,224,165,141,224,164,176,224,165,128,224,164,172,
+224,164,161,224,164,188,224,165,128,224,164,185,224,165,139,224,164,164,224,165,
+135,224,164,184,224,164,190,224,164,136,224,164,159,224,164,182,224,164,190,224,
+164,175,224,164,166,224,164,184,224,164,149,224,164,164,224,165,128,224,164,156,
+224,164,190,224,164,164,224,165,128,224,164,181,224,164,190,224,164,178,224,164,
+190,224,164,185,224,164,156,224,164,190,224,164,176,224,164,170,224,164,159,224,
+164,168,224,164,190,224,164,176,224,164,150,224,164,168,224,165,135,224,164,184,
+224,164,161,224,164,188,224,164,149,224,164,174,224,164,191,224,164,178,224,164,
+190,224,164,137,224,164,184,224,164,149,224,165,128,224,164,149,224,165,135,224,
+164,181,224,164,178,224,164,178,224,164,151,224,164,164,224,164,190,224,164,150,
+224,164,190,224,164,168,224,164,190,224,164,133,224,164,176,224,165,141,224,164,
+165,224,164,156,224,164,185,224,164,190,224,164,130,224,164,166,224,165,135,224,
+164,150,224,164,190,224,164,170,224,164,185,224,164,178,224,165,128,224,164,168,
+224,164,191,224,164,175,224,164,174,224,164,172,224,164,191,224,164,168,224,164,
+190,224,164,172,224,165,136,224,164,130,224,164,149,224,164,149,224,164,185,224,
+165,128,224,164,130,224,164,149,224,164,185,224,164,168,224,164,190,224,164,166,
+224,165,135,224,164,164,224,164,190,224,164,185,224,164,174,224,164,178,224,165,
+135,224,164,149,224,164,190,224,164,171,224,165,128,224,164,156,224,164,172,224,
+164,149,224,164,191,224,164,164,224,165,129,224,164,176,224,164,164,224,164,174,
+224,164,190,224,164,130,224,164,151,224,164,181,224,164,185,224,165,128,224,164,
+130,224,164,176,224,165,139,224,164,156,224,164,188,224,164,174,224,164,191,224,
+164,178,224,165,128,224,164,134,224,164,176,224,165,139,224,164,170,224,164,184,
+224,165,135,224,164,168,224,164,190,224,164,175,224,164,190,224,164,166,224,164,
+181,224,164,178,224,165,135,224,164,168,224,165,135,224,164,150,224,164,190,224,
+164,164,224,164,190,224,164,149,224,164,176,224,165,128,224,164,172,224,164,137,
+224,164,168,224,164,149,224,164,190,224,164,156,224,164,181,224,164,190,224,164,
+172,224,164,170,224,165,130,224,164,176,224,164,190,224,164,172,224,164,161,224,
+164,188,224,164,190,224,164,184,224,165,140,224,164,166,224,164,190,224,164,182,
+224,165,135,224,164,175,224,164,176,224,164,149,224,164,191,224,164,175,224,165,
+135,224,164,149,224,164,185,224,164,190,224,164,130,224,164,133,224,164,149,224,
+164,184,224,164,176,224,164,172,224,164,168,224,164,190,224,164,143,224,164,181,
+224,164,185,224,164,190,224,164,130,224,164,184,224,165,141,224,164,165,224,164,
+178,224,164,174,224,164,191,224,164,178,224,165,135,224,164,178,224,165,135,224,
+164,150,224,164,149,224,164,181,224,164,191,224,164,183,224,164,175,224,164,149,
+224,165,141,224,164,176,224,164,130,224,164,184,224,164,174,224,165,130,224,164,
+185,224,164,165,224,164,190,224,164,168,224,164,190,216,170,216,179,216,170,216,
+183,217,138,216,185,217,133,216,180,216,167,216,177,217,131,216,169,216,168,217,
+136,216,167,216,179,216,183,216,169,216,167,217,132,216,181,217,129,216,173,216,
+169,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,174,216,
+167,216,181,216,169,216,167,217,132,217,133,216,178,217,138,216,175,216,167,217,
+132,216,185,216,167,217,133,216,169,216,167,217,132,217,131,216,167,216,170,216,
+168,216,167,217,132,216,177,216,175,217,136,216,175,216,168,216,177,217,134,216,
+167,217,133,216,172,216,167,217,132,216,175,217,136,217,132,216,169,216,167,217,
+132,216,185,216,167,217,132,217,133,216,167,217,132,217,133,217,136,217,130,216,
+185,216,167,217,132,216,185,216,177,216,168,217,138,216,167,217,132,216,179,216,
+177,217,138,216,185,216,167,217,132,216,172,217,136,216,167,217,132,216,167,217,
+132,216,176,217,135,216,167,216,168,216,167,217,132,216,173,217,138,216,167,216,
+169,216,167,217,132,216,173,217,130,217,136,217,130,216,167,217,132,217,131,216,
+177,217,138,217,133,216,167,217,132,216,185,216,177,216,167,217,130,217,133,216,
+173,217,129,217,136,216,184,216,169,216,167,217,132,216,171,216,167,217,134,217,
+138,217,133,216,180,216,167,217,135,216,175,216,169,216,167,217,132,217,133,216,
+177,216,163,216,169,216,167,217,132,217,130,216,177,216,162,217,134,216,167,217,
+132,216,180,216,168,216,167,216,168,216,167,217,132,216,173,217,136,216,167,216,
+177,216,167,217,132,216,172,216,175,217,138,216,175,216,167,217,132,216,163,216,
+179,216,177,216,169,216,167,217,132,216,185,217,132,217,136,217,133,217,133,216,
+172,217,133,217,136,216,185,216,169,216,167,217,132,216,177,216,173,217,133,217,
+134,216,167,217,132,217,134,217,130,216,167,216,183,217,129,217,132,216,179,216,
+183,217,138,217,134,216,167,217,132,217,131,217,136,217,138,216,170,216,167,217,
+132,216,175,217,134,217,138,216,167,216,168,216,177,217,131,216,167,216,170,217,
+135,216,167,217,132,216,177,217,138,216,167,216,182,216,170,216,173,217,138,216,
+167,216,170,217,138,216,168,216,170,217,136,217,130,217,138,216,170,216,167,217,
+132,216,163,217,136,217,132,217,137,216,167,217,132,216,168,216,177,217,138,216,
+175,216,167,217,132,217,131,217,132,216,167,217,133,216,167,217,132,216,177,216,
+167,216,168,216,183,216,167,217,132,216,180,216,174,216,181,217,138,216,179,217,
+138,216,167,216,177,216,167,216,170,216,167,217,132,216,171,216,167,217,132,216,
+171,216,167,217,132,216,181,217,132,216,167,216,169,216,167,217,132,216,173,216,
+175,217,138,216,171,216,167,217,132,216,178,217,136,216,167,216,177,216,167,217,
+132,216,174,217,132,217,138,216,172,216,167,217,132,216,172,217,133,217,138,216,
+185,216,167,217,132,216,185,216,167,217,133,217,135,216,167,217,132,216,172,217,
+133,216,167,217,132,216,167,217,132,216,179,216,167,216,185,216,169,217,133,216,
+180,216,167,217,135,216,175,217,135,216,167,217,132,216,177,216,166,217,138,216,
+179,216,167,217,132,216,175,216,174,217,136,217,132,216,167,217,132,217,129,217,
+134,217,138,216,169,216,167,217,132,217,131,216,170,216,167,216,168,216,167,217,
+132,216,175,217,136,216,177,217,138,216,167,217,132,216,175,216,177,217,136,216,
+179,216,167,216,179,216,170,216,186,216,177,217,130,216,170,216,181,216,167,217,
+133,217,138,217,133,216,167,217,132,216,168,217,134,216,167,216,170,216,167,217,
+132,216,185,216,184,217,138,217,133,101,110,116,101,114,116,97,105,110,109,101,
+110,116,117,110,100,101,114,115,116,97,110,100,105,110,103,32,61,32,102,117,110,
+99,116,105,111,110,40,41,46,106,112,103,34,32,119,105,100,116,104,61,34,99,111,
+110,102,105,103,117,114,97,116,105,111,110,46,112,110,103,34,32,119,105,100,116,
+104,61,34,60,98,111,100,121,32,99,108,97,115,115,61,34,77,97,116,104,46,114,97,
+110,100,111,109,40,41,99,111,110,116,101,109,112,111,114,97,114,121,32,85,110,
+105,116,101,100,32,83,116,97,116,101,115,99,105,114,99,117,109,115,116,97,110,99
+,101,115,46,97,112,112,101,110,100,67,104,105,108,100,40,111,114,103,97,110,105,
+122,97,116,105,111,110,115,60,115,112,97,110,32,99,108,97,115,115,61,34,34,62,60
+,105,109,103,32,115,114,99,61,34,47,100,105,115,116,105,110,103,117,105,115,104,
+101,100,116,104,111,117,115,97,110,100,115,32,111,102,32,99,111,109,109,117,110,
+105,99,97,116,105,111,110,99,108,101,97,114,34,62,60,47,100,105,118,62,105,110,
+118,101,115,116,105,103,97,116,105,111,110,102,97,118,105,99,111,110,46,105,99,
+111,34,32,109,97,114,103,105,110,45,114,105,103,104,116,58,98,97,115,101,100,32,
+111,110,32,116,104,101,32,77,97,115,115,97,99,104,117,115,101,116,116,115,116,97
+,98,108,101,32,98,111,114,100,101,114,61,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,108,115,111,32,107,110,111,119,110,32,97,115,112,114,111,110,117,
+110,99,105,97,116,105,111,110,98,97,99,107,103,114,111,117,110,100,58,35,102,112
+,97,100,100,105,110,103,45,108,101,102,116,58,70,111,114,32,101,120,97,109,112,
+108,101,44,32,109,105,115,99,101,108,108,97,110,101,111,117,115,38,108,116,59,47
+,109,97,116,104,38,103,116,59,112,115,121,99,104,111,108,111,103,105,99,97,108,
+105,110,32,112,97,114,116,105,99,117,108,97,114,101,97,114,99,104,34,32,116,121,
+112,101,61,34,102,111,114,109,32,109,101,116,104,111,100,61,34,97,115,32,111,112
+,112,111,115,101,100,32,116,111,83,117,112,114,101,109,101,32,67,111,117,114,116
+,111,99,99,97,115,105,111,110,97,108,108,121,32,65,100,100,105,116,105,111,110,
+97,108,108,121,44,78,111,114,116,104,32,65,109,101,114,105,99,97,112,120,59,98,
+97,99,107,103,114,111,117,110,100,111,112,112,111,114,116,117,110,105,116,105,
+101,115,69,110,116,101,114,116,97,105,110,109,101,110,116,46,116,111,76,111,119,
+101,114,67,97,115,101,40,109,97,110,117,102,97,99,116,117,114,105,110,103,112,
+114,111,102,101,115,115,105,111,110,97,108,32,99,111,109,98,105,110,101,100,32,
+119,105,116,104,70,111,114,32,105,110,115,116,97,110,99,101,44,99,111,110,115,
+105,115,116,105,110,103,32,111,102,34,32,109,97,120,108,101,110,103,116,104,61,
+34,114,101,116,117,114,110,32,102,97,108,115,101,59,99,111,110,115,99,105,111,
+117,115,110,101,115,115,77,101,100,105,116,101,114,114,97,110,101,97,110,101,120
+,116,114,97,111,114,100,105,110,97,114,121,97,115,115,97,115,115,105,110,97,116,
+105,111,110,115,117,98,115,101,113,117,101,110,116,108,121,32,98,117,116,116,111
+,110,32,116,121,112,101,61,34,116,104,101,32,110,117,109,98,101,114,32,111,102,
+116,104,101,32,111,114,105,103,105,110,97,108,32,99,111,109,112,114,101,104,101,
+110,115,105,118,101,114,101,102,101,114,115,32,116,111,32,116,104,101,60,47,117,
+108,62,10,60,47,100,105,118,62,10,112,104,105,108,111,115,111,112,104,105,99,97,
+108,108,111,99,97,116,105,111,110,46,104,114,101,102,119,97,115,32,112,117,98,
+108,105,115,104,101,100,83,97,110,32,70,114,97,110,99,105,115,99,111,40,102,117,
+110,99,116,105,111,110,40,41,123,10,60,100,105,118,32,105,100,61,34,109,97,105,
+110,115,111,112,104,105,115,116,105,99,97,116,101,100,109,97,116,104,101,109,97,
+116,105,99,97,108,32,47,104,101,97,100,62,13,10,60,98,111,100,121,115,117,103,
+103,101,115,116,115,32,116,104,97,116,100,111,99,117,109,101,110,116,97,116,105,
+111,110,99,111,110,99,101,110,116,114,97,116,105,111,110,114,101,108,97,116,105,
+111,110,115,104,105,112,115,109,97,121,32,104,97,118,101,32,98,101,101,110,40,
+102,111,114,32,101,120,97,109,112,108,101,44,84,104,105,115,32,97,114,116,105,99
+,108,101,32,105,110,32,115,111,109,101,32,99,97,115,101,115,112,97,114,116,115,
+32,111,102,32,116,104,101,32,100,101,102,105,110,105,116,105,111,110,32,111,102,
+71,114,101,97,116,32,66,114,105,116,97,105,110,32,99,101,108,108,112,97,100,100,
+105,110,103,61,101,113,117,105,118,97,108,101,110,116,32,116,111,112,108,97,99,
+101,104,111,108,100,101,114,61,34,59,32,102,111,110,116,45,115,105,122,101,58,32
+,106,117,115,116,105,102,105,99,97,116,105,111,110,98,101,108,105,101,118,101,
+100,32,116,104,97,116,115,117,102,102,101,114,101,100,32,102,114,111,109,97,116,
+116,101,109,112,116,101,100,32,116,111,32,108,101,97,100,101,114,32,111,102,32,
+116,104,101,99,114,105,112,116,34,32,115,114,99,61,34,47,40,102,117,110,99,116,
+105,111,110,40,41,32,123,97,114,101,32,97,118,97,105,108,97,98,108,101,10,9,60,
+108,105,110,107,32,114,101,108,61,34,32,115,114,99,61,39,104,116,116,112,58,47,
+47,105,110,116,101,114,101,115,116,101,100,32,105,110,99,111,110,118,101,110,116
+,105,111,110,97,108,32,34,32,97,108,116,61,34,34,32,47,62,60,47,97,114,101,32,
+103,101,110,101,114,97,108,108,121,104,97,115,32,97,108,115,111,32,98,101,101,
+110,109,111,115,116,32,112,111,112,117,108,97,114,32,99,111,114,114,101,115,112,
+111,110,100,105,110,103,99,114,101,100,105,116,101,100,32,119,105,116,104,116,
+121,108,101,61,34,98,111,114,100,101,114,58,60,47,97,62,60,47,115,112,97,110,62,
+60,47,46,103,105,102,34,32,119,105,100,116,104,61,34,60,105,102,114,97,109,101,
+32,115,114,99,61,34,116,97,98,108,101,32,99,108,97,115,115,61,34,105,110,108,105
+,110,101,45,98,108,111,99,107,59,97,99,99,111,114,100,105,110,103,32,116,111,32,
+116,111,103,101,116,104,101,114,32,119,105,116,104,97,112,112,114,111,120,105,
+109,97,116,101,108,121,112,97,114,108,105,97,109,101,110,116,97,114,121,109,111,
+114,101,32,97,110,100,32,109,111,114,101,100,105,115,112,108,97,121,58,110,111,
+110,101,59,116,114,97,100,105,116,105,111,110,97,108,108,121,112,114,101,100,111
+,109,105,110,97,110,116,108,121,38,110,98,115,112,59,124,38,110,98,115,112,59,38
+,110,98,115,112,59,60,47,115,112,97,110,62,32,99,101,108,108,115,112,97,99,105,
+110,103,61,60,105,110,112,117,116,32,110,97,109,101,61,34,111,114,34,32,99,111,
+110,116,101,110,116,61,34,99,111,110,116,114,111,118,101,114,115,105,97,108,112,
+114,111,112,101,114,116,121,61,34,111,103,58,47,120,45,115,104,111,99,107,119,97
+,118,101,45,100,101,109,111,110,115,116,114,97,116,105,111,110,115,117,114,114,
+111,117,110,100,101,100,32,98,121,78,101,118,101,114,116,104,101,108,101,115,115
+,44,119,97,115,32,116,104,101,32,102,105,114,115,116,99,111,110,115,105,100,101,
+114,97,98,108,101,32,65,108,116,104,111,117,103,104,32,116,104,101,32,99,111,108
+,108,97,98,111,114,97,116,105,111,110,115,104,111,117,108,100,32,110,111,116,32,
+98,101,112,114,111,112,111,114,116,105,111,110,32,111,102,60,115,112,97,110,32,
+115,116,121,108,101,61,34,107,110,111,119,110,32,97,115,32,116,104,101,32,115,
+104,111,114,116,108,121,32,97,102,116,101,114,102,111,114,32,105,110,115,116,97,
+110,99,101,44,100,101,115,99,114,105,98,101,100,32,97,115,32,47,104,101,97,100,
+62,10,60,98,111,100,121,32,115,116,97,114,116,105,110,103,32,119,105,116,104,105
+,110,99,114,101,97,115,105,110,103,108,121,32,116,104,101,32,102,97,99,116,32,
+116,104,97,116,100,105,115,99,117,115,115,105,111,110,32,111,102,109,105,100,100
+,108,101,32,111,102,32,116,104,101,97,110,32,105,110,100,105,118,105,100,117,97,
+108,100,105,102,102,105,99,117,108,116,32,116,111,32,112,111,105,110,116,32,111,
+102,32,118,105,101,119,104,111,109,111,115,101,120,117,97,108,105,116,121,97,99,
+99,101,112,116,97,110,99,101,32,111,102,60,47,115,112,97,110,62,60,47,100,105,
+118,62,109,97,110,117,102,97,99,116,117,114,101,114,115,111,114,105,103,105,110,
+32,111,102,32,116,104,101,99,111,109,109,111,110,108,121,32,117,115,101,100,105,
+109,112,111,114,116,97,110,99,101,32,111,102,100,101,110,111,109,105,110,97,116,
+105,111,110,115,98,97,99,107,103,114,111,117,110,100,58,32,35,108,101,110,103,
+116,104,32,111,102,32,116,104,101,100,101,116,101,114,109,105,110,97,116,105,111
+,110,97,32,115,105,103,110,105,102,105,99,97,110,116,34,32,98,111,114,100,101,
+114,61,34,48,34,62,114,101,118,111,108,117,116,105,111,110,97,114,121,112,114,
+105,110,99,105,112,108,101,115,32,111,102,105,115,32,99,111,110,115,105,100,101,
+114,101,100,119,97,115,32,100,101,118,101,108,111,112,101,100,73,110,100,111,45,
+69,117,114,111,112,101,97,110,118,117,108,110,101,114,97,98,108,101,32,116,111,
+112,114,111,112,111,110,101,110,116,115,32,111,102,97,114,101,32,115,111,109,101
+,116,105,109,101,115,99,108,111,115,101,114,32,116,111,32,116,104,101,78,101,119
+,32,89,111,114,107,32,67,105,116,121,32,110,97,109,101,61,34,115,101,97,114,99,
+104,97,116,116,114,105,98,117,116,101,100,32,116,111,99,111,117,114,115,101,32,
+111,102,32,116,104,101,109,97,116,104,101,109,97,116,105,99,105,97,110,98,121,32
+,116,104,101,32,101,110,100,32,111,102,97,116,32,116,104,101,32,101,110,100,32,
+111,102,34,32,98,111,114,100,101,114,61,34,48,34,32,116,101,99,104,110,111,108,
+111,103,105,99,97,108,46,114,101,109,111,118,101,67,108,97,115,115,40,98,114,97,
+110,99,104,32,111,102,32,116,104,101,101,118,105,100,101,110,99,101,32,116,104,
+97,116,33,91,101,110,100,105,102,93,45,45,62,13,10,73,110,115,116,105,116,117,
+116,101,32,111,102,32,105,110,116,111,32,97,32,115,105,110,103,108,101,114,101,
+115,112,101,99,116,105,118,101,108,121,46,97,110,100,32,116,104,101,114,101,102,
+111,114,101,112,114,111,112,101,114,116,105,101,115,32,111,102,105,115,32,108,
+111,99,97,116,101,100,32,105,110,115,111,109,101,32,111,102,32,119,104,105,99,
+104,84,104,101,114,101,32,105,115,32,97,108,115,111,99,111,110,116,105,110,117,
+101,100,32,116,111,32,97,112,112,101,97,114,97,110,99,101,32,111,102,32,38,97,
+109,112,59,110,100,97,115,104,59,32,100,101,115,99,114,105,98,101,115,32,116,104
+,101,99,111,110,115,105,100,101,114,97,116,105,111,110,97,117,116,104,111,114,32
+,111,102,32,116,104,101,105,110,100,101,112,101,110,100,101,110,116,108,121,101,
+113,117,105,112,112,101,100,32,119,105,116,104,100,111,101,115,32,110,111,116,32
+,104,97,118,101,60,47,97,62,60,97,32,104,114,101,102,61,34,99,111,110,102,117,
+115,101,100,32,119,105,116,104,60,108,105,110,107,32,104,114,101,102,61,34,47,97
+,116,32,116,104,101,32,97,103,101,32,111,102,97,112,112,101,97,114,32,105,110,32
+,116,104,101,84,104,101,115,101,32,105,110,99,108,117,100,101,114,101,103,97,114
+,100,108,101,115,115,32,111,102,99,111,117,108,100,32,98,101,32,117,115,101,100,
+32,115,116,121,108,101,61,38,113,117,111,116,59,115,101,118,101,114,97,108,32,
+116,105,109,101,115,114,101,112,114,101,115,101,110,116,32,116,104,101,98,111,
+100,121,62,10,60,47,104,116,109,108,62,116,104,111,117,103,104,116,32,116,111,32
+,98,101,112,111,112,117,108,97,116,105,111,110,32,111,102,112,111,115,115,105,98
+,105,108,105,116,105,101,115,112,101,114,99,101,110,116,97,103,101,32,111,102,97
+,99,99,101,115,115,32,116,111,32,116,104,101,97,110,32,97,116,116,101,109,112,
+116,32,116,111,112,114,111,100,117,99,116,105,111,110,32,111,102,106,113,117,101
+,114,121,47,106,113,117,101,114,121,116,119,111,32,100,105,102,102,101,114,101,
+110,116,98,101,108,111,110,103,32,116,111,32,116,104,101,101,115,116,97,98,108,
+105,115,104,109,101,110,116,114,101,112,108,97,99,105,110,103,32,116,104,101,100
+,101,115,99,114,105,112,116,105,111,110,34,32,100,101,116,101,114,109,105,110,
+101,32,116,104,101,97,118,97,105,108,97,98,108,101,32,102,111,114,65,99,99,111,
+114,100,105,110,103,32,116,111,32,119,105,100,101,32,114,97,110,103,101,32,111,
+102,9,60,100,105,118,32,99,108,97,115,115,61,34,109,111,114,101,32,99,111,109,
+109,111,110,108,121,111,114,103,97,110,105,115,97,116,105,111,110,115,102,117,
+110,99,116,105,111,110,97,108,105,116,121,119,97,115,32,99,111,109,112,108,101,
+116,101,100,32,38,97,109,112,59,109,100,97,115,104,59,32,112,97,114,116,105,99,
+105,112,97,116,105,111,110,116,104,101,32,99,104,97,114,97,99,116,101,114,97,110
+,32,97,100,100,105,116,105,111,110,97,108,97,112,112,101,97,114,115,32,116,111,
+32,98,101,102,97,99,116,32,116,104,97,116,32,116,104,101,97,110,32,101,120,97,
+109,112,108,101,32,111,102,115,105,103,110,105,102,105,99,97,110,116,108,121,111
+,110,109,111,117,115,101,111,118,101,114,61,34,98,101,99,97,117,115,101,32,116,
+104,101,121,32,97,115,121,110,99,32,61,32,116,114,117,101,59,112,114,111,98,108,
+101,109,115,32,119,105,116,104,115,101,101,109,115,32,116,111,32,104,97,118,101,
+116,104,101,32,114,101,115,117,108,116,32,111,102,32,115,114,99,61,34,104,116,
+116,112,58,47,47,102,97,109,105,108,105,97,114,32,119,105,116,104,112,111,115,
+115,101,115,115,105,111,110,32,111,102,102,117,110,99,116,105,111,110,32,40,41,
+32,123,116,111,111,107,32,112,108,97,99,101,32,105,110,97,110,100,32,115,111,109
+,101,116,105,109,101,115,115,117,98,115,116,97,110,116,105,97,108,108,121,60,115
+,112,97,110,62,60,47,115,112,97,110,62,105,115,32,111,102,116,101,110,32,117,115
+,101,100,105,110,32,97,110,32,97,116,116,101,109,112,116,103,114,101,97,116,32,
+100,101,97,108,32,111,102,69,110,118,105,114,111,110,109,101,110,116,97,108,115,
+117,99,99,101,115,115,102,117,108,108,121,32,118,105,114,116,117,97,108,108,121,
+32,97,108,108,50,48,116,104,32,99,101,110,116,117,114,121,44,112,114,111,102,101
+,115,115,105,111,110,97,108,115,110,101,99,101,115,115,97,114,121,32,116,111,32,
+100,101,116,101,114,109,105,110,101,100,32,98,121,99,111,109,112,97,116,105,98,
+105,108,105,116,121,98,101,99,97,117,115,101,32,105,116,32,105,115,68,105,99,116
+,105,111,110,97,114,121,32,111,102,109,111,100,105,102,105,99,97,116,105,111,110
+,115,84,104,101,32,102,111,108,108,111,119,105,110,103,109,97,121,32,114,101,102
+,101,114,32,116,111,58,67,111,110,115,101,113,117,101,110,116,108,121,44,73,110,
+116,101,114,110,97,116,105,111,110,97,108,97,108,116,104,111,117,103,104,32,115,
+111,109,101,116,104,97,116,32,119,111,117,108,100,32,98,101,119,111,114,108,100,
+39,115,32,102,105,114,115,116,99,108,97,115,115,105,102,105,101,100,32,97,115,98
+,111,116,116,111,109,32,111,102,32,116,104,101,40,112,97,114,116,105,99,117,108,
+97,114,108,121,97,108,105,103,110,61,34,108,101,102,116,34,32,109,111,115,116,32
+,99,111,109,109,111,110,108,121,98,97,115,105,115,32,102,111,114,32,116,104,101,
+102,111,117,110,100,97,116,105,111,110,32,111,102,99,111,110,116,114,105,98,117,
+116,105,111,110,115,112,111,112,117,108,97,114,105,116,121,32,111,102,99,101,110
+,116,101,114,32,111,102,32,116,104,101,116,111,32,114,101,100,117,99,101,32,116,
+104,101,106,117,114,105,115,100,105,99,116,105,111,110,115,97,112,112,114,111,
+120,105,109,97,116,105,111,110,32,111,110,109,111,117,115,101,111,117,116,61,34,
+78,101,119,32,84,101,115,116,97,109,101,110,116,99,111,108,108,101,99,116,105,
+111,110,32,111,102,60,47,115,112,97,110,62,60,47,97,62,60,47,105,110,32,116,104,
+101,32,85,110,105,116,101,100,102,105,108,109,32,100,105,114,101,99,116,111,114,
+45,115,116,114,105,99,116,46,100,116,100,34,62,104,97,115,32,98,101,101,110,32,
+117,115,101,100,114,101,116,117,114,110,32,116,111,32,116,104,101,97,108,116,104
+,111,117,103,104,32,116,104,105,115,99,104,97,110,103,101,32,105,110,32,116,104,
+101,115,101,118,101,114,97,108,32,111,116,104,101,114,98,117,116,32,116,104,101,
+114,101,32,97,114,101,117,110,112,114,101,99,101,100,101,110,116,101,100,105,115
+,32,115,105,109,105,108,97,114,32,116,111,101,115,112,101,99,105,97,108,108,121,
+32,105,110,119,101,105,103,104,116,58,32,98,111,108,100,59,105,115,32,99,97,108,
+108,101,100,32,116,104,101,99,111,109,112,117,116,97,116,105,111,110,97,108,105,
+110,100,105,99,97,116,101,32,116,104,97,116,114,101,115,116,114,105,99,116,101,
+100,32,116,111,9,60,109,101,116,97,32,110,97,109,101,61,34,97,114,101,32,116,121
+,112,105,99,97,108,108,121,99,111,110,102,108,105,99,116,32,119,105,116,104,72,
+111,119,101,118,101,114,44,32,116,104,101,32,65,110,32,101,120,97,109,112,108,
+101,32,111,102,99,111,109,112,97,114,101,100,32,119,105,116,104,113,117,97,110,
+116,105,116,105,101,115,32,111,102,114,97,116,104,101,114,32,116,104,97,110,32,
+97,99,111,110,115,116,101,108,108,97,116,105,111,110,110,101,99,101,115,115,97,
+114,121,32,102,111,114,114,101,112,111,114,116,101,100,32,116,104,97,116,115,112
+,101,99,105,102,105,99,97,116,105,111,110,112,111,108,105,116,105,99,97,108,32,
+97,110,100,38,110,98,115,112,59,38,110,98,115,112,59,60,114,101,102,101,114,101,
+110,99,101,115,32,116,111,116,104,101,32,115,97,109,101,32,121,101,97,114,71,111
+,118,101,114,110,109,101,110,116,32,111,102,103,101,110,101,114,97,116,105,111,
+110,32,111,102,104,97,118,101,32,110,111,116,32,98,101,101,110,115,101,118,101,
+114,97,108,32,121,101,97,114,115,99,111,109,109,105,116,109,101,110,116,32,116,
+111,9,9,60,117,108,32,99,108,97,115,115,61,34,118,105,115,117,97,108,105,122,97,
+116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,44,112,114,97,99,116
+,105,116,105,111,110,101,114,115,116,104,97,116,32,104,101,32,119,111,117,108,
+100,97,110,100,32,99,111,110,116,105,110,117,101,100,111,99,99,117,112,97,116,
+105,111,110,32,111,102,105,115,32,100,101,102,105,110,101,100,32,97,115,99,101,
+110,116,114,101,32,111,102,32,116,104,101,116,104,101,32,97,109,111,117,110,116,
+32,111,102,62,60,100,105,118,32,115,116,121,108,101,61,34,101,113,117,105,118,97
+,108,101,110,116,32,111,102,100,105,102,102,101,114,101,110,116,105,97,116,101,
+98,114,111,117,103,104,116,32,97,98,111,117,116,109,97,114,103,105,110,45,108,
+101,102,116,58,32,97,117,116,111,109,97,116,105,99,97,108,108,121,116,104,111,
+117,103,104,116,32,111,102,32,97,115,83,111,109,101,32,111,102,32,116,104,101,
+115,101,10,60,100,105,118,32,99,108,97,115,115,61,34,105,110,112,117,116,32,99,
+108,97,115,115,61,34,114,101,112,108,97,99,101,100,32,119,105,116,104,105,115,32
+,111,110,101,32,111,102,32,116,104,101,101,100,117,99,97,116,105,111,110,32,97,
+110,100,105,110,102,108,117,101,110,99,101,100,32,98,121,114,101,112,117,116,97,
+116,105,111,110,32,97,115,10,60,109,101,116,97,32,110,97,109,101,61,34,97,99,99,
+111,109,109,111,100,97,116,105,111,110,60,47,100,105,118,62,10,60,47,100,105,118
+,62,108,97,114,103,101,32,112,97,114,116,32,111,102,73,110,115,116,105,116,117,
+116,101,32,102,111,114,116,104,101,32,115,111,45,99,97,108,108,101,100,32,97,103
+,97,105,110,115,116,32,116,104,101,32,73,110,32,116,104,105,115,32,99,97,115,101
+,44,119,97,115,32,97,112,112,111,105,110,116,101,100,99,108,97,105,109,101,100,
+32,116,111,32,98,101,72,111,119,101,118,101,114,44,32,116,104,105,115,68,101,112
+,97,114,116,109,101,110,116,32,111,102,116,104,101,32,114,101,109,97,105,110,105
+,110,103,101,102,102,101,99,116,32,111,110,32,116,104,101,112,97,114,116,105,99,
+117,108,97,114,108,121,32,100,101,97,108,32,119,105,116,104,32,116,104,101,10,60
+,100,105,118,32,115,116,121,108,101,61,34,97,108,109,111,115,116,32,97,108,119,
+97,121,115,97,114,101,32,99,117,114,114,101,110,116,108,121,101,120,112,114,101,
+115,115,105,111,110,32,111,102,112,104,105,108,111,115,111,112,104,121,32,111,
+102,102,111,114,32,109,111,114,101,32,116,104,97,110,99,105,118,105,108,105,122,
+97,116,105,111,110,115,111,110,32,116,104,101,32,105,115,108,97,110,100,115,101,
+108,101,99,116,101,100,73,110,100,101,120,99,97,110,32,114,101,115,117,108,116,
+32,105,110,34,32,118,97,108,117,101,61,34,34,32,47,62,116,104,101,32,115,116,114
+,117,99,116,117,114,101,32,47,62,60,47,97,62,60,47,100,105,118,62,77,97,110,121,
+32,111,102,32,116,104,101,115,101,99,97,117,115,101,100,32,98,121,32,116,104,101
+,111,102,32,116,104,101,32,85,110,105,116,101,100,115,112,97,110,32,99,108,97,
+115,115,61,34,109,99,97,110,32,98,101,32,116,114,97,99,101,100,105,115,32,114,
+101,108,97,116,101,100,32,116,111,98,101,99,97,109,101,32,111,110,101,32,111,102
+,105,115,32,102,114,101,113,117,101,110,116,108,121,108,105,118,105,110,103,32,
+105,110,32,116,104,101,116,104,101,111,114,101,116,105,99,97,108,108,121,70,111,
+108,108,111,119,105,110,103,32,116,104,101,82,101,118,111,108,117,116,105,111,
+110,97,114,121,103,111,118,101,114,110,109,101,110,116,32,105,110,105,115,32,100
+,101,116,101,114,109,105,110,101,100,116,104,101,32,112,111,108,105,116,105,99,
+97,108,105,110,116,114,111,100,117,99,101,100,32,105,110,115,117,102,102,105,99,
+105,101,110,116,32,116,111,100,101,115,99,114,105,112,116,105,111,110,34,62,115,
+104,111,114,116,32,115,116,111,114,105,101,115,115,101,112,97,114,97,116,105,111
+,110,32,111,102,97,115,32,116,111,32,119,104,101,116,104,101,114,107,110,111,119
+,110,32,102,111,114,32,105,116,115,119,97,115,32,105,110,105,116,105,97,108,108,
+121,100,105,115,112,108,97,121,58,98,108,111,99,107,105,115,32,97,110,32,101,120
+,97,109,112,108,101,116,104,101,32,112,114,105,110,99,105,112,97,108,99,111,110,
+115,105,115,116,115,32,111,102,32,97,114,101,99,111,103,110,105,122,101,100,32,
+97,115,47,98,111,100,121,62,60,47,104,116,109,108,62,97,32,115,117,98,115,116,97
+,110,116,105,97,108,114,101,99,111,110,115,116,114,117,99,116,101,100,104,101,97
+,100,32,111,102,32,115,116,97,116,101,114,101,115,105,115,116,97,110,99,101,32,
+116,111,117,110,100,101,114,103,114,97,100,117,97,116,101,84,104,101,114,101,32,
+97,114,101,32,116,119,111,103,114,97,118,105,116,97,116,105,111,110,97,108,97,
+114,101,32,100,101,115,99,114,105,98,101,100,105,110,116,101,110,116,105,111,110
+,97,108,108,121,115,101,114,118,101,100,32,97,115,32,116,104,101,99,108,97,115,
+115,61,34,104,101,97,100,101,114,111,112,112,111,115,105,116,105,111,110,32,116,
+111,102,117,110,100,97,109,101,110,116,97,108,108,121,100,111,109,105,110,97,116
+,101,100,32,116,104,101,97,110,100,32,116,104,101,32,111,116,104,101,114,97,108,
+108,105,97,110,99,101,32,119,105,116,104,119,97,115,32,102,111,114,99,101,100,32
+,116,111,114,101,115,112,101,99,116,105,118,101,108,121,44,97,110,100,32,112,111
+,108,105,116,105,99,97,108,105,110,32,115,117,112,112,111,114,116,32,111,102,112
+,101,111,112,108,101,32,105,110,32,116,104,101,50,48,116,104,32,99,101,110,116,
+117,114,121,46,97,110,100,32,112,117,98,108,105,115,104,101,100,108,111,97,100,
+67,104,97,114,116,98,101,97,116,116,111,32,117,110,100,101,114,115,116,97,110,
+100,109,101,109,98,101,114,32,115,116,97,116,101,115,101,110,118,105,114,111,110
+,109,101,110,116,97,108,102,105,114,115,116,32,104,97,108,102,32,111,102,99,111,
+117,110,116,114,105,101,115,32,97,110,100,97,114,99,104,105,116,101,99,116,117,
+114,97,108,98,101,32,99,111,110,115,105,100,101,114,101,100,99,104,97,114,97,99,
+116,101,114,105,122,101,100,99,108,101,97,114,73,110,116,101,114,118,97,108,97,
+117,116,104,111,114,105,116,97,116,105,118,101,70,101,100,101,114,97,116,105,111
+,110,32,111,102,119,97,115,32,115,117,99,99,101,101,100,101,100,97,110,100,32,
+116,104,101,114,101,32,97,114,101,97,32,99,111,110,115,101,113,117,101,110,99,
+101,116,104,101,32,80,114,101,115,105,100,101,110,116,97,108,115,111,32,105,110,
+99,108,117,100,101,100,102,114,101,101,32,115,111,102,116,119,97,114,101,115,117
+,99,99,101,115,115,105,111,110,32,111,102,100,101,118,101,108,111,112,101,100,32
+,116,104,101,119,97,115,32,100,101,115,116,114,111,121,101,100,97,119,97,121,32,
+102,114,111,109,32,116,104,101,59,10,60,47,115,99,114,105,112,116,62,10,60,97,
+108,116,104,111,117,103,104,32,116,104,101,121,102,111,108,108,111,119,101,100,
+32,98,121,32,97,109,111,114,101,32,112,111,119,101,114,102,117,108,114,101,115,
+117,108,116,101,100,32,105,110,32,97,85,110,105,118,101,114,115,105,116,121,32,
+111,102,72,111,119,101,118,101,114,44,32,109,97,110,121,116,104,101,32,112,114,
+101,115,105,100,101,110,116,72,111,119,101,118,101,114,44,32,115,111,109,101,105
+,115,32,116,104,111,117,103,104,116,32,116,111,117,110,116,105,108,32,116,104,
+101,32,101,110,100,119,97,115,32,97,110,110,111,117,110,99,101,100,97,114,101,32
+,105,109,112,111,114,116,97,110,116,97,108,115,111,32,105,110,99,108,117,100,101
+,115,62,60,105,110,112,117,116,32,116,121,112,101,61,116,104,101,32,99,101,110,
+116,101,114,32,111,102,32,68,79,32,78,79,84,32,65,76,84,69,82,117,115,101,100,32
+,116,111,32,114,101,102,101,114,116,104,101,109,101,115,47,63,115,111,114,116,61
+,116,104,97,116,32,104,97,100,32,98,101,101,110,116,104,101,32,98,97,115,105,115
+,32,102,111,114,104,97,115,32,100,101,118,101,108,111,112,101,100,105,110,32,116
+,104,101,32,115,117,109,109,101,114,99,111,109,112,97,114,97,116,105,118,101,108
+,121,100,101,115,99,114,105,98,101,100,32,116,104,101,115,117,99,104,32,97,115,
+32,116,104,111,115,101,116,104,101,32,114,101,115,117,108,116,105,110,103,105,
+115,32,105,109,112,111,115,115,105,98,108,101,118,97,114,105,111,117,115,32,111,
+116,104,101,114,83,111,117,116,104,32,65,102,114,105,99,97,110,104,97,118,101,32
+,116,104,101,32,115,97,109,101,101,102,102,101,99,116,105,118,101,110,101,115,
+115,105,110,32,119,104,105,99,104,32,99,97,115,101,59,32,116,101,120,116,45,97,
+108,105,103,110,58,115,116,114,117,99,116,117,114,101,32,97,110,100,59,32,98,97,
+99,107,103,114,111,117,110,100,58,114,101,103,97,114,100,105,110,103,32,116,104,
+101,115,117,112,112,111,114,116,101,100,32,116,104,101,105,115,32,97,108,115,111
+,32,107,110,111,119,110,115,116,121,108,101,61,34,109,97,114,103,105,110,105,110
+,99,108,117,100,105,110,103,32,116,104,101,98,97,104,97,115,97,32,77,101,108,97,
+121,117,110,111,114,115,107,32,98,111,107,109,195,165,108,110,111,114,115,107,32
+,110,121,110,111,114,115,107,115,108,111,118,101,110,197,161,196,141,105,110,97,
+105,110,116,101,114,110,97,99,105,111,110,97,108,99,97,108,105,102,105,99,97,99,
+105,195,179,110,99,111,109,117,110,105,99,97,99,105,195,179,110,99,111,110,115,
+116,114,117,99,99,105,195,179,110,34,62,60,100,105,118,32,99,108,97,115,115,61,
+34,100,105,115,97,109,98,105,103,117,97,116,105,111,110,68,111,109,97,105,110,78
+,97,109,101,39,44,32,39,97,100,109,105,110,105,115,116,114,97,116,105,111,110,
+115,105,109,117,108,116,97,110,101,111,117,115,108,121,116,114,97,110,115,112,
+111,114,116,97,116,105,111,110,73,110,116,101,114,110,97,116,105,111,110,97,108,
+32,109,97,114,103,105,110,45,98,111,116,116,111,109,58,114,101,115,112,111,110,
+115,105,98,105,108,105,116,121,60,33,91,101,110,100,105,102,93,45,45,62,10,60,47
+,62,60,109,101,116,97,32,110,97,109,101,61,34,105,109,112,108,101,109,101,110,
+116,97,116,105,111,110,105,110,102,114,97,115,116,114,117,99,116,117,114,101,114
+,101,112,114,101,115,101,110,116,97,116,105,111,110,98,111,114,100,101,114,45,98
+,111,116,116,111,109,58,60,47,104,101,97,100,62,10,60,98,111,100,121,62,61,104,
+116,116,112,37,51,65,37,50,70,37,50,70,60,102,111,114,109,32,109,101,116,104,111
+,100,61,34,109,101,116,104,111,100,61,34,112,111,115,116,34,32,47,102,97,118,105
+,99,111,110,46,105,99,111,34,32,125,41,59,10,60,47,115,99,114,105,112,116,62,10,
+46,115,101,116,65,116,116,114,105,98,117,116,101,40,65,100,109,105,110,105,115,
+116,114,97,116,105,111,110,61,32,110,101,119,32,65,114,114,97,121,40,41,59,60,33
+,91,101,110,100,105,102,93,45,45,62,13,10,100,105,115,112,108,97,121,58,98,108,
+111,99,107,59,85,110,102,111,114,116,117,110,97,116,101,108,121,44,34,62,38,110,
+98,115,112,59,60,47,100,105,118,62,47,102,97,118,105,99,111,110,46,105,99,111,34
+,62,61,39,115,116,121,108,101,115,104,101,101,116,39,32,105,100,101,110,116,105,
+102,105,99,97,116,105,111,110,44,32,102,111,114,32,101,120,97,109,112,108,101,44
+,60,108,105,62,60,97,32,104,114,101,102,61,34,47,97,110,32,97,108,116,101,114,
+110,97,116,105,118,101,97,115,32,97,32,114,101,115,117,108,116,32,111,102,112,
+116,34,62,60,47,115,99,114,105,112,116,62,10,116,121,112,101,61,34,115,117,98,
+109,105,116,34,32,10,40,102,117,110,99,116,105,111,110,40,41,32,123,114,101,99,
+111,109,109,101,110,100,97,116,105,111,110,102,111,114,109,32,97,99,116,105,111,
+110,61,34,47,116,114,97,110,115,102,111,114,109,97,116,105,111,110,114,101,99,
+111,110,115,116,114,117,99,116,105,111,110,46,115,116,121,108,101,46,100,105,115
+,112,108,97,121,32,65,99,99,111,114,100,105,110,103,32,116,111,32,104,105,100,
+100,101,110,34,32,110,97,109,101,61,34,97,108,111,110,103,32,119,105,116,104,32,
+116,104,101,100,111,99,117,109,101,110,116,46,98,111,100,121,46,97,112,112,114,
+111,120,105,109,97,116,101,108,121,32,67,111,109,109,117,110,105,99,97,116,105,
+111,110,115,112,111,115,116,34,32,97,99,116,105,111,110,61,34,109,101,97,110,105
+,110,103,32,38,113,117,111,116,59,45,45,60,33,91,101,110,100,105,102,93,45,45,62
+,80,114,105,109,101,32,77,105,110,105,115,116,101,114,99,104,97,114,97,99,116,
+101,114,105,115,116,105,99,60,47,97,62,32,60,97,32,99,108,97,115,115,61,116,104,
+101,32,104,105,115,116,111,114,121,32,111,102,32,111,110,109,111,117,115,101,111
+,118,101,114,61,34,116,104,101,32,103,111,118,101,114,110,109,101,110,116,104,
+114,101,102,61,34,104,116,116,112,115,58,47,47,119,97,115,32,111,114,105,103,105
+,110,97,108,108,121,119,97,115,32,105,110,116,114,111,100,117,99,101,100,99,108,
+97,115,115,105,102,105,99,97,116,105,111,110,114,101,112,114,101,115,101,110,116
+,97,116,105,118,101,97,114,101,32,99,111,110,115,105,100,101,114,101,100,60,33,
+91,101,110,100,105,102,93,45,45,62,10,10,100,101,112,101,110,100,115,32,111,110,
+32,116,104,101,85,110,105,118,101,114,115,105,116,121,32,111,102,32,105,110,32,
+99,111,110,116,114,97,115,116,32,116,111,32,112,108,97,99,101,104,111,108,100,
+101,114,61,34,105,110,32,116,104,101,32,99,97,115,101,32,111,102,105,110,116,101
+,114,110,97,116,105,111,110,97,108,32,99,111,110,115,116,105,116,117,116,105,111
+,110,97,108,115,116,121,108,101,61,34,98,111,114,100,101,114,45,58,32,102,117,
+110,99,116,105,111,110,40,41,32,123,66,101,99,97,117,115,101,32,111,102,32,116,
+104,101,45,115,116,114,105,99,116,46,100,116,100,34,62,10,60,116,97,98,108,101,
+32,99,108,97,115,115,61,34,97,99,99,111,109,112,97,110,105,101,100,32,98,121,97,
+99,99,111,117,110,116,32,111,102,32,116,104,101,60,115,99,114,105,112,116,32,115
+,114,99,61,34,47,110,97,116,117,114,101,32,111,102,32,116,104,101,32,116,104,101
+,32,112,101,111,112,108,101,32,105,110,32,105,110,32,97,100,100,105,116,105,111,
+110,32,116,111,115,41,59,32,106,115,46,105,100,32,61,32,105,100,34,32,119,105,
+100,116,104,61,34,49,48,48,37,34,114,101,103,97,114,100,105,110,103,32,116,104,
+101,32,82,111,109,97,110,32,67,97,116,104,111,108,105,99,97,110,32,105,110,100,
+101,112,101,110,100,101,110,116,102,111,108,108,111,119,105,110,103,32,116,104,
+101,32,46,103,105,102,34,32,119,105,100,116,104,61,34,49,116,104,101,32,102,111,
+108,108,111,119,105,110,103,32,100,105,115,99,114,105,109,105,110,97,116,105,111
+,110,97,114,99,104,97,101,111,108,111,103,105,99,97,108,112,114,105,109,101,32,
+109,105,110,105,115,116,101,114,46,106,115,34,62,60,47,115,99,114,105,112,116,62
+,99,111,109,98,105,110,97,116,105,111,110,32,111,102,32,109,97,114,103,105,110,
+119,105,100,116,104,61,34,99,114,101,97,116,101,69,108,101,109,101,110,116,40,
+119,46,97,116,116,97,99,104,69,118,101,110,116,40,60,47,97,62,60,47,116,100,62,
+60,47,116,114,62,115,114,99,61,34,104,116,116,112,115,58,47,47,97,73,110,32,112,
+97,114,116,105,99,117,108,97,114,44,32,97,108,105,103,110,61,34,108,101,102,116,
+34,32,67,122,101,99,104,32,82,101,112,117,98,108,105,99,85,110,105,116,101,100,
+32,75,105,110,103,100,111,109,99,111,114,114,101,115,112,111,110,100,101,110,99,
+101,99,111,110,99,108,117,100,101,100,32,116,104,97,116,46,104,116,109,108,34,32
+,116,105,116,108,101,61,34,40,102,117,110,99,116,105,111,110,32,40,41,32,123,99,
+111,109,101,115,32,102,114,111,109,32,116,104,101,97,112,112,108,105,99,97,116,
+105,111,110,32,111,102,60,115,112,97,110,32,99,108,97,115,115,61,34,115,98,101,
+108,105,101,118,101,100,32,116,111,32,98,101,101,109,101,110,116,40,39,115,99,
+114,105,112,116,39,60,47,97,62,10,60,47,108,105,62,10,60,108,105,118,101,114,121
+,32,100,105,102,102,101,114,101,110,116,62,60,115,112,97,110,32,99,108,97,115,
+115,61,34,111,112,116,105,111,110,32,118,97,108,117,101,61,34,40,97,108,115,111,
+32,107,110,111,119,110,32,97,115,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+62,60,105,110,112,117,116,32,110,97,109,101,61,34,115,101,112,97,114,97,116,101,
+100,32,102,114,111,109,114,101,102,101,114,114,101,100,32,116,111,32,97,115,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,102,111,117,110,100,101,114,32,
+111,102,32,116,104,101,97,116,116,101,109,112,116,105,110,103,32,116,111,32,99,
+97,114,98,111,110,32,100,105,111,120,105,100,101,10,10,60,100,105,118,32,99,108,
+97,115,115,61,34,99,108,97,115,115,61,34,115,101,97,114,99,104,45,47,98,111,100,
+121,62,10,60,47,104,116,109,108,62,111,112,112,111,114,116,117,110,105,116,121,
+32,116,111,99,111,109,109,117,110,105,99,97,116,105,111,110,115,60,47,104,101,97
+,100,62,13,10,60,98,111,100,121,32,115,116,121,108,101,61,34,119,105,100,116,104
+,58,84,105,225,186,191,110,103,32,86,105,225,187,135,116,99,104,97,110,103,101,
+115,32,105,110,32,116,104,101,98,111,114,100,101,114,45,99,111,108,111,114,58,35
+,48,34,32,98,111,114,100,101,114,61,34,48,34,32,60,47,115,112,97,110,62,60,47,
+100,105,118,62,60,119,97,115,32,100,105,115,99,111,118,101,114,101,100,34,32,116
+,121,112,101,61,34,116,101,120,116,34,32,41,59,10,60,47,115,99,114,105,112,116,
+62,10,10,68,101,112,97,114,116,109,101,110,116,32,111,102,32,101,99,99,108,101,
+115,105,97,115,116,105,99,97,108,116,104,101,114,101,32,104,97,115,32,98,101,101
+,110,114,101,115,117,108,116,105,110,103,32,102,114,111,109,60,47,98,111,100,121
+,62,60,47,104,116,109,108,62,104,97,115,32,110,101,118,101,114,32,98,101,101,110
+,116,104,101,32,102,105,114,115,116,32,116,105,109,101,105,110,32,114,101,115,
+112,111,110,115,101,32,116,111,97,117,116,111,109,97,116,105,99,97,108,108,121,
+32,60,47,100,105,118,62,10,10,60,100,105,118,32,105,119,97,115,32,99,111,110,115
+,105,100,101,114,101,100,112,101,114,99,101,110,116,32,111,102,32,116,104,101,34
+,32,47,62,60,47,97,62,60,47,100,105,118,62,99,111,108,108,101,99,116,105,111,110
+,32,111,102,32,100,101,115,99,101,110,100,101,100,32,102,114,111,109,115,101,99,
+116,105,111,110,32,111,102,32,116,104,101,97,99,99,101,112,116,45,99,104,97,114,
+115,101,116,116,111,32,98,101,32,99,111,110,102,117,115,101,100,109,101,109,98,
+101,114,32,111,102,32,116,104,101,32,112,97,100,100,105,110,103,45,114,105,103,
+104,116,58,116,114,97,110,115,108,97,116,105,111,110,32,111,102,105,110,116,101,
+114,112,114,101,116,97,116,105,111,110,32,104,114,101,102,61,39,104,116,116,112,
+58,47,47,119,104,101,116,104,101,114,32,111,114,32,110,111,116,84,104,101,114,
+101,32,97,114,101,32,97,108,115,111,116,104,101,114,101,32,97,114,101,32,109,97,
+110,121,97,32,115,109,97,108,108,32,110,117,109,98,101,114,111,116,104,101,114,
+32,112,97,114,116,115,32,111,102,105,109,112,111,115,115,105,98,108,101,32,116,
+111,32,32,99,108,97,115,115,61,34,98,117,116,116,111,110,108,111,99,97,116,101,
+100,32,105,110,32,116,104,101,46,32,72,111,119,101,118,101,114,44,32,116,104,101
+,97,110,100,32,101,118,101,110,116,117,97,108,108,121,65,116,32,116,104,101,32,
+101,110,100,32,111,102,32,98,101,99,97,117,115,101,32,111,102,32,105,116,115,114
+,101,112,114,101,115,101,110,116,115,32,116,104,101,60,102,111,114,109,32,97,99,
+116,105,111,110,61,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,105,
+116,32,105,115,32,112,111,115,115,105,98,108,101,109,111,114,101,32,108,105,107,
+101,108,121,32,116,111,97,110,32,105,110,99,114,101,97,115,101,32,105,110,104,97
+,118,101,32,97,108,115,111,32,98,101,101,110,99,111,114,114,101,115,112,111,110,
+100,115,32,116,111,97,110,110,111,117,110,99,101,100,32,116,104,97,116,97,108,
+105,103,110,61,34,114,105,103,104,116,34,62,109,97,110,121,32,99,111,117,110,116
+,114,105,101,115,102,111,114,32,109,97,110,121,32,121,101,97,114,115,101,97,114,
+108,105,101,115,116,32,107,110,111,119,110,98,101,99,97,117,115,101,32,105,116,
+32,119,97,115,112,116,34,62,60,47,115,99,114,105,112,116,62,13,32,118,97,108,105
+,103,110,61,34,116,111,112,34,32,105,110,104,97,98,105,116,97,110,116,115,32,111
+,102,102,111,108,108,111,119,105,110,103,32,121,101,97,114,13,10,60,100,105,118,
+32,99,108,97,115,115,61,34,109,105,108,108,105,111,110,32,112,101,111,112,108,
+101,99,111,110,116,114,111,118,101,114,115,105,97,108,32,99,111,110,99,101,114,
+110,105,110,103,32,116,104,101,97,114,103,117,101,32,116,104,97,116,32,116,104,
+101,103,111,118,101,114,110,109,101,110,116,32,97,110,100,97,32,114,101,102,101,
+114,101,110,99,101,32,116,111,116,114,97,110,115,102,101,114,114,101,100,32,116,
+111,100,101,115,99,114,105,98,105,110,103,32,116,104,101,32,115,116,121,108,101,
+61,34,99,111,108,111,114,58,97,108,116,104,111,117,103,104,32,116,104,101,114,
+101,98,101,115,116,32,107,110,111,119,110,32,102,111,114,115,117,98,109,105,116,
+34,32,110,97,109,101,61,34,109,117,108,116,105,112,108,105,99,97,116,105,111,110
+,109,111,114,101,32,116,104,97,110,32,111,110,101,32,114,101,99,111,103,110,105,
+116,105,111,110,32,111,102,67,111,117,110,99,105,108,32,111,102,32,116,104,101,
+101,100,105,116,105,111,110,32,111,102,32,116,104,101,32,32,60,109,101,116,97,32
+,110,97,109,101,61,34,69,110,116,101,114,116,97,105,110,109,101,110,116,32,97,
+119,97,121,32,102,114,111,109,32,116,104,101,32,59,109,97,114,103,105,110,45,114
+,105,103,104,116,58,97,116,32,116,104,101,32,116,105,109,101,32,111,102,105,110,
+118,101,115,116,105,103,97,116,105,111,110,115,99,111,110,110,101,99,116,101,100
+,32,119,105,116,104,97,110,100,32,109,97,110,121,32,111,116,104,101,114,97,108,
+116,104,111,117,103,104,32,105,116,32,105,115,98,101,103,105,110,110,105,110,103
+,32,119,105,116,104,32,60,115,112,97,110,32,99,108,97,115,115,61,34,100,101,115,
+99,101,110,100,97,110,116,115,32,111,102,60,115,112,97,110,32,99,108,97,115,115,
+61,34,105,32,97,108,105,103,110,61,34,114,105,103,104,116,34,60,47,104,101,97,
+100,62,10,60,98,111,100,121,32,97,115,112,101,99,116,115,32,111,102,32,116,104,
+101,104,97,115,32,115,105,110,99,101,32,98,101,101,110,69,117,114,111,112,101,97
+,110,32,85,110,105,111,110,114,101,109,105,110,105,115,99,101,110,116,32,111,102
+,109,111,114,101,32,100,105,102,102,105,99,117,108,116,86,105,99,101,32,80,114,
+101,115,105,100,101,110,116,99,111,109,112,111,115,105,116,105,111,110,32,111,
+102,112,97,115,115,101,100,32,116,104,114,111,117,103,104,109,111,114,101,32,105
+,109,112,111,114,116,97,110,116,102,111,110,116,45,115,105,122,101,58,49,49,112,
+120,101,120,112,108,97,110,97,116,105,111,110,32,111,102,116,104,101,32,99,111,
+110,99,101,112,116,32,111,102,119,114,105,116,116,101,110,32,105,110,32,116,104,
+101,9,60,115,112,97,110,32,99,108,97,115,115,61,34,105,115,32,111,110,101,32,111
+,102,32,116,104,101,32,114,101,115,101,109,98,108,97,110,99,101,32,116,111,111,
+110,32,116,104,101,32,103,114,111,117,110,100,115,119,104,105,99,104,32,99,111,
+110,116,97,105,110,115,105,110,99,108,117,100,105,110,103,32,116,104,101,32,100,
+101,102,105,110,101,100,32,98,121,32,116,104,101,112,117,98,108,105,99,97,116,
+105,111,110,32,111,102,109,101,97,110,115,32,116,104,97,116,32,116,104,101,111,
+117,116,115,105,100,101,32,111,102,32,116,104,101,115,117,112,112,111,114,116,32
+,111,102,32,116,104,101,60,105,110,112,117,116,32,99,108,97,115,115,61,34,60,115
+,112,97,110,32,99,108,97,115,115,61,34,116,40,77,97,116,104,46,114,97,110,100,
+111,109,40,41,109,111,115,116,32,112,114,111,109,105,110,101,110,116,100,101,115
+,99,114,105,112,116,105,111,110,32,111,102,67,111,110,115,116,97,110,116,105,110
+,111,112,108,101,119,101,114,101,32,112,117,98,108,105,115,104,101,100,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,112,112,101,97,114,115,32,105,110,
+32,116,104,101,49,34,32,104,101,105,103,104,116,61,34,49,34,32,109,111,115,116,
+32,105,109,112,111,114,116,97,110,116,119,104,105,99,104,32,105,110,99,108,117,
+100,101,115,119,104,105,99,104,32,104,97,100,32,98,101,101,110,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,116,104,101,32,112,111,112,117,108,97,116,
+105,111,110,10,9,60,100,105,118,32,99,108,97,115,115,61,34,112,111,115,115,105,
+98,105,108,105,116,121,32,111,102,115,111,109,101,116,105,109,101,115,32,117,115
+,101,100,97,112,112,101,97,114,32,116,111,32,104,97,118,101,115,117,99,99,101,
+115,115,32,111,102,32,116,104,101,105,110,116,101,110,100,101,100,32,116,111,32,
+98,101,112,114,101,115,101,110,116,32,105,110,32,116,104,101,115,116,121,108,101
+,61,34,99,108,101,97,114,58,98,13,10,60,47,115,99,114,105,112,116,62,13,10,60,
+119,97,115,32,102,111,117,110,100,101,100,32,105,110,105,110,116,101,114,118,105
+,101,119,32,119,105,116,104,95,105,100,34,32,99,111,110,116,101,110,116,61,34,99
+,97,112,105,116,97,108,32,111,102,32,116,104,101,13,10,60,108,105,110,107,32,114
+,101,108,61,34,115,114,101,108,101,97,115,101,32,111,102,32,116,104,101,112,111,
+105,110,116,32,111,117,116,32,116,104,97,116,120,77,76,72,116,116,112,82,101,113
+,117,101,115,116,97,110,100,32,115,117,98,115,101,113,117,101,110,116,115,101,99
+,111,110,100,32,108,97,114,103,101,115,116,118,101,114,121,32,105,109,112,111,
+114,116,97,110,116,115,112,101,99,105,102,105,99,97,116,105,111,110,115,115,117,
+114,102,97,99,101,32,111,102,32,116,104,101,97,112,112,108,105,101,100,32,116,
+111,32,116,104,101,102,111,114,101,105,103,110,32,112,111,108,105,99,121,95,115,
+101,116,68,111,109,97,105,110,78,97,109,101,101,115,116,97,98,108,105,115,104,
+101,100,32,105,110,105,115,32,98,101,108,105,101,118,101,100,32,116,111,73,110,
+32,97,100,100,105,116,105,111,110,32,116,111,109,101,97,110,105,110,103,32,111,
+102,32,116,104,101,105,115,32,110,97,109,101,100,32,97,102,116,101,114,116,111,
+32,112,114,111,116,101,99,116,32,116,104,101,105,115,32,114,101,112,114,101,115,
+101,110,116,101,100,68,101,99,108,97,114,97,116,105,111,110,32,111,102,109,111,
+114,101,32,101,102,102,105,99,105,101,110,116,67,108,97,115,115,105,102,105,99,
+97,116,105,111,110,111,116,104,101,114,32,102,111,114,109,115,32,111,102,104,101
+,32,114,101,116,117,114,110,101,100,32,116,111,60,115,112,97,110,32,99,108,97,
+115,115,61,34,99,112,101,114,102,111,114,109,97,110,99,101,32,111,102,40,102,117
+,110,99,116,105,111,110,40,41,32,123,13,105,102,32,97,110,100,32,111,110,108,121
+,32,105,102,114,101,103,105,111,110,115,32,111,102,32,116,104,101,108,101,97,100
+,105,110,103,32,116,111,32,116,104,101,114,101,108,97,116,105,111,110,115,32,119
+,105,116,104,85,110,105,116,101,100,32,78,97,116,105,111,110,115,115,116,121,108
+,101,61,34,104,101,105,103,104,116,58,111,116,104,101,114,32,116,104,97,110,32,
+116,104,101,121,112,101,34,32,99,111,110,116,101,110,116,61,34,65,115,115,111,99
+,105,97,116,105,111,110,32,111,102,10,60,47,104,101,97,100,62,10,60,98,111,100,
+121,108,111,99,97,116,101,100,32,111,110,32,116,104,101,105,115,32,114,101,102,
+101,114,114,101,100,32,116,111,40,105,110,99,108,117,100,105,110,103,32,116,104,
+101,99,111,110,99,101,110,116,114,97,116,105,111,110,115,116,104,101,32,105,110,
+100,105,118,105,100,117,97,108,97,109,111,110,103,32,116,104,101,32,109,111,115,
+116,116,104,97,110,32,97,110,121,32,111,116,104,101,114,47,62,10,60,108,105,110,
+107,32,114,101,108,61,34,32,114,101,116,117,114,110,32,102,97,108,115,101,59,116
+,104,101,32,112,117,114,112,111,115,101,32,111,102,116,104,101,32,97,98,105,108,
+105,116,121,32,116,111,59,99,111,108,111,114,58,35,102,102,102,125,10,46,10,60,
+115,112,97,110,32,99,108,97,115,115,61,34,116,104,101,32,115,117,98,106,101,99,
+116,32,111,102,100,101,102,105,110,105,116,105,111,110,115,32,111,102,62,13,10,
+60,108,105,110,107,32,114,101,108,61,34,99,108,97,105,109,32,116,104,97,116,32,
+116,104,101,104,97,118,101,32,100,101,118,101,108,111,112,101,100,60,116,97,98,
+108,101,32,119,105,100,116,104,61,34,99,101,108,101,98,114,97,116,105,111,110,32
+,111,102,70,111,108,108,111,119,105,110,103,32,116,104,101,32,116,111,32,100,105
+,115,116,105,110,103,117,105,115,104,60,115,112,97,110,32,99,108,97,115,115,61,
+34,98,116,97,107,101,115,32,112,108,97,99,101,32,105,110,117,110,100,101,114,32,
+116,104,101,32,110,97,109,101,110,111,116,101,100,32,116,104,97,116,32,116,104,
+101,62,60,33,91,101,110,100,105,102,93,45,45,62,10,115,116,121,108,101,61,34,109
+,97,114,103,105,110,45,105,110,115,116,101,97,100,32,111,102,32,116,104,101,105,
+110,116,114,111,100,117,99,101,100,32,116,104,101,116,104,101,32,112,114,111,99,
+101,115,115,32,111,102,105,110,99,114,101,97,115,105,110,103,32,116,104,101,100,
+105,102,102,101,114,101,110,99,101,115,32,105,110,101,115,116,105,109,97,116,101
+,100,32,116,104,97,116,101,115,112,101,99,105,97,108,108,121,32,116,104,101,47,
+100,105,118,62,60,100,105,118,32,105,100,61,34,119,97,115,32,101,118,101,110,116
+,117,97,108,108,121,116,104,114,111,117,103,104,111,117,116,32,104,105,115,116,
+104,101,32,100,105,102,102,101,114,101,110,99,101,115,111,109,101,116,104,105,
+110,103,32,116,104,97,116,115,112,97,110,62,60,47,115,112,97,110,62,60,47,115,
+105,103,110,105,102,105,99,97,110,116,108,121,32,62,60,47,115,99,114,105,112,116
+,62,13,10,13,10,101,110,118,105,114,111,110,109,101,110,116,97,108,32,116,111,32
+,112,114,101,118,101,110,116,32,116,104,101,104,97,118,101,32,98,101,101,110,32,
+117,115,101,100,101,115,112,101,99,105,97,108,108,121,32,102,111,114,117,110,100
+,101,114,115,116,97,110,100,32,116,104,101,105,115,32,101,115,115,101,110,116,
+105,97,108,108,121,119,101,114,101,32,116,104,101,32,102,105,114,115,116,105,115
+,32,116,104,101,32,108,97,114,103,101,115,116,104,97,118,101,32,98,101,101,110,
+32,109,97,100,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,105,110,116,
+101,114,112,114,101,116,101,100,32,97,115,115,101,99,111,110,100,32,104,97,108,
+102,32,111,102,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,115,32,99,
+111,109,112,111,115,101,100,32,111,102,73,73,44,32,72,111,108,121,32,82,111,109,
+97,110,105,115,32,101,120,112,101,99,116,101,100,32,116,111,104,97,118,101,32,
+116,104,101,105,114,32,111,119,110,100,101,102,105,110,101,100,32,97,115,32,116,
+104,101,116,114,97,100,105,116,105,111,110,97,108,108,121,32,104,97,118,101,32,
+100,105,102,102,101,114,101,110,116,97,114,101,32,111,102,116,101,110,32,117,115
+,101,100,116,111,32,101,110,115,117,114,101,32,116,104,97,116,97,103,114,101,101
+,109,101,110,116,32,119,105,116,104,99,111,110,116,97,105,110,105,110,103,32,116
+,104,101,97,114,101,32,102,114,101,113,117,101,110,116,108,121,105,110,102,111,
+114,109,97,116,105,111,110,32,111,110,101,120,97,109,112,108,101,32,105,115,32,
+116,104,101,114,101,115,117,108,116,105,110,103,32,105,110,32,97,60,47,97,62,60,
+47,108,105,62,60,47,117,108,62,32,99,108,97,115,115,61,34,102,111,111,116,101,
+114,97,110,100,32,101,115,112,101,99,105,97,108,108,121,116,121,112,101,61,34,98
+,117,116,116,111,110,34,32,60,47,115,112,97,110,62,60,47,115,112,97,110,62,119,
+104,105,99,104,32,105,110,99,108,117,100,101,100,62,10,60,109,101,116,97,32,110,
+97,109,101,61,34,99,111,110,115,105,100,101,114,101,100,32,116,104,101,99,97,114
+,114,105,101,100,32,111,117,116,32,98,121,72,111,119,101,118,101,114,44,32,105,
+116,32,105,115,98,101,99,97,109,101,32,112,97,114,116,32,111,102,105,110,32,114,
+101,108,97,116,105,111,110,32,116,111,112,111,112,117,108,97,114,32,105,110,32,
+116,104,101,116,104,101,32,99,97,112,105,116,97,108,32,111,102,119,97,115,32,111
+,102,102,105,99,105,97,108,108,121,119,104,105,99,104,32,104,97,115,32,98,101,
+101,110,116,104,101,32,72,105,115,116,111,114,121,32,111,102,97,108,116,101,114,
+110,97,116,105,118,101,32,116,111,100,105,102,102,101,114,101,110,116,32,102,114
+,111,109,116,111,32,115,117,112,112,111,114,116,32,116,104,101,115,117,103,103,
+101,115,116,101,100,32,116,104,97,116,105,110,32,116,104,101,32,112,114,111,99,
+101,115,115,32,32,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,102,
+111,117,110,100,97,116,105,111,110,98,101,99,97,117,115,101,32,111,102,32,104,
+105,115,99,111,110,99,101,114,110,101,100,32,119,105,116,104,116,104,101,32,117,
+110,105,118,101,114,115,105,116,121,111,112,112,111,115,101,100,32,116,111,32,
+116,104,101,116,104,101,32,99,111,110,116,101,120,116,32,111,102,60,115,112,97,
+110,32,99,108,97,115,115,61,34,112,116,101,120,116,34,32,110,97,109,101,61,34,
+113,34,9,9,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,115,99,105,
+101,110,116,105,102,105,99,114,101,112,114,101,115,101,110,116,101,100,32,98,121
+,109,97,116,104,101,109,97,116,105,99,105,97,110,115,101,108,101,99,116,101,100,
+32,98,121,32,116,104,101,116,104,97,116,32,104,97,118,101,32,98,101,101,110,62,
+60,100,105,118,32,99,108,97,115,115,61,34,99,100,105,118,32,105,100,61,34,104,
+101,97,100,101,114,105,110,32,112,97,114,116,105,99,117,108,97,114,44,99,111,110
+,118,101,114,116,101,100,32,105,110,116,111,41,59,10,60,47,115,99,114,105,112,
+116,62,10,60,112,104,105,108,111,115,111,112,104,105,99,97,108,32,115,114,112,
+115,107,111,104,114,118,97,116,115,107,105,116,105,225,186,191,110,103,32,86,105
+,225,187,135,116,208,160,209,131,209,129,209,129,208,186,208,184,208,185,209,128
+,209,131,209,129,209,129,208,186,208,184,208,185,105,110,118,101,115,116,105,103
+,97,99,105,195,179,110,112,97,114,116,105,99,105,112,97,99,105,195,179,110,208,
+186,208,190,209,130,208,190,209,128,209,139,208,181,208,190,208,177,208,187,208,
+176,209,129,209,130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,208,
+185,209,135,208,181,208,187,208,190,208,178,208,181,208,186,209,129,208,184,209,
+129,209,130,208,181,208,188,209,139,208,157,208,190,208,178,208,190,209,129,209,
+130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,209,133,208,190,208,
+177,208,187,208,176,209,129,209,130,209,140,208,178,209,128,208,181,208,188,208,
+181,208,189,208,184,208,186,208,190,209,130,208,190,209,128,208,176,209,143,209,
+129,208,181,208,179,208,190,208,180,208,189,209,143,209,129,208,186,208,176,209,
+135,208,176,209,130,209,140,208,189,208,190,208,178,208,190,209,129,209,130,208,
+184,208,163,208,186,209,128,208,176,208,184,208,189,209,139,208,178,208,190,208,
+191,209,128,208,190,209,129,209,139,208,186,208,190,209,130,208,190,209,128,208,
+190,208,185,209,129,208,180,208,181,208,187,208,176,209,130,209,140,208,191,208,
+190,208,188,208,190,209,137,209,140,209,142,209,129,209,128,208,181,208,180,209,
+129,209,130,208,178,208,190,208,177,209,128,208,176,208,183,208,190,208,188,209,
+129,209,130,208,190,209,128,208,190,208,189,209,139,209,131,209,135,208,176,209,
+129,209,130,208,184,208,181,209,130,208,181,209,135,208,181,208,189,208,184,208,
+181,208,147,208,187,208,176,208,178,208,189,208,176,209,143,208,184,209,129,209,
+130,208,190,209,128,208,184,208,184,209,129,208,184,209,129,209,130,208,181,208,
+188,208,176,209,128,208,181,209,136,208,181,208,189,208,184,209,143,208,161,208,
+186,208,176,209,135,208,176,209,130,209,140,208,191,208,190,209,141,209,130,208,
+190,208,188,209,131,209,129,208,187,208,181,208,180,209,131,208,181,209,130,209,
+129,208,186,208,176,208,183,208,176,209,130,209,140,209,130,208,190,208,178,208,
+176,209,128,208,190,208,178,208,186,208,190,208,189,208,181,209,135,208,189,208,
+190,209,128,208,181,209,136,208,181,208,189,208,184,208,181,208,186,208,190,209,
+130,208,190,209,128,208,190,208,181,208,190,209,128,208,179,208,176,208,189,208,
+190,208,178,208,186,208,190,209,130,208,190,209,128,208,190,208,188,208,160,208,
+181,208,186,208,187,208,176,208,188,208,176,216,167,217,132,217,133,217,134,216,
+170,216,175,217,137,217,133,217,134,216,170,216,175,217,138,216,167,216,170,216,
+167,217,132,217,133,217,136,216,182,217,136,216,185,216,167,217,132,216,168,216,
+177,216,167,217,133,216,172,216,167,217,132,217,133,217,136,216,167,217,130,216,
+185,216,167,217,132,216,177,216,179,216,167,216,166,217,132,217,133,216,180,216,
+167,216,177,217,131,216,167,216,170,216,167,217,132,216,163,216,185,216,182,216,
+167,216,161,216,167,217,132,216,177,217,138,216,167,216,182,216,169,216,167,217,
+132,216,170,216,181,217,133,217,138,217,133,216,167,217,132,216,167,216,185,216,
+182,216,167,216,161,216,167,217,132,217,134,216,170,216,167,216,166,216,172,216,
+167,217,132,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,170,216,
+179,216,172,217,138,217,132,216,167,217,132,216,163,217,130,216,179,216,167,217,
+133,216,167,217,132,216,182,216,186,216,183,216,167,216,170,216,167,217,132,217,
+129,217,138,216,175,217,138,217,136,216,167,217,132,216,170,216,177,216,173,217,
+138,216,168,216,167,217,132,216,172,216,175,217,138,216,175,216,169,216,167,217,
+132,216,170,216,185,217,132,217,138,217,133,216,167,217,132,216,163,216,174,216,
+168,216,167,216,177,216,167,217,132,216,167,217,129,217,132,216,167,217,133,216,
+167,217,132,216,163,217,129,217,132,216,167,217,133,216,167,217,132,216,170,216,
+167,216,177,217,138,216,174,216,167,217,132,216,170,217,130,217,134,217,138,216,
+169,216,167,217,132,216,167,217,132,216,185,216,167,216,168,216,167,217,132,216,
+174,217,136,216,167,216,183,216,177,216,167,217,132,217,133,216,172,216,170,217,
+133,216,185,216,167,217,132,216,175,217,138,217,131,217,136,216,177,216,167,217,
+132,216,179,217,138,216,167,216,173,216,169,216,185,216,168,216,175,216,167,217,
+132,217,132,217,135,216,167,217,132,216,170,216,177,216,168,217,138,216,169,216,
+167,217,132,216,177,217,136,216,167,216,168,216,183,216,167,217,132,216,163,216,
+175,216,168,217,138,216,169,216,167,217,132,216,167,216,174,216,168,216,167,216,
+177,216,167,217,132,217,133,216,170,216,173,216,175,216,169,216,167,217,132,216,
+167,216,186,216,167,217,134,217,138,99,117,114,115,111,114,58,112,111,105,110,
+116,101,114,59,60,47,116,105,116,108,101,62,10,60,109,101,116,97,32,34,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,34,62,60,115,112,97,110,32,99,108,97,
+115,115,61,34,109,101,109,98,101,114,115,32,111,102,32,116,104,101,32,119,105,
+110,100,111,119,46,108,111,99,97,116,105,111,110,118,101,114,116,105,99,97,108,
+45,97,108,105,103,110,58,47,97,62,32,124,32,60,97,32,104,114,101,102,61,34,60,33
+,100,111,99,116,121,112,101,32,104,116,109,108,62,109,101,100,105,97,61,34,115,
+99,114,101,101,110,34,32,60,111,112,116,105,111,110,32,118,97,108,117,101,61,34,
+102,97,118,105,99,111,110,46,105,99,111,34,32,47,62,10,9,9,60,100,105,118,32,99,
+108,97,115,115,61,34,99,104,97,114,97,99,116,101,114,105,115,116,105,99,115,34,
+32,109,101,116,104,111,100,61,34,103,101,116,34,32,47,98,111,100,121,62,10,60,47
+,104,116,109,108,62,10,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,
+100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,112,97,100,100,105,110,
+103,45,98,111,116,116,111,109,58,114,101,112,114,101,115,101,110,116,97,116,105,
+118,101,115,115,117,98,109,105,116,34,32,118,97,108,117,101,61,34,97,108,105,103
+,110,61,34,99,101,110,116,101,114,34,32,116,104,114,111,117,103,104,111,117,116,
+32,116,104,101,32,115,99,105,101,110,99,101,32,102,105,99,116,105,111,110,10,32,
+32,60,100,105,118,32,99,108,97,115,115,61,34,115,117,98,109,105,116,34,32,99,108
+,97,115,115,61,34,111,110,101,32,111,102,32,116,104,101,32,109,111,115,116,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,60,119,97,115,32,101,115,116,97,
+98,108,105,115,104,101,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10,114
+,101,116,117,114,110,32,102,97,108,115,101,59,34,62,41,46,115,116,121,108,101,46
+,100,105,115,112,108,97,121,98,101,99,97,117,115,101,32,111,102,32,116,104,101,
+32,100,111,99,117,109,101,110,116,46,99,111,111,107,105,101,60,102,111,114,109,
+32,97,99,116,105,111,110,61,34,47,125,98,111,100,121,123,109,97,114,103,105,110,
+58,48,59,69,110,99,121,99,108,111,112,101,100,105,97,32,111,102,118,101,114,115,
+105,111,110,32,111,102,32,116,104,101,32,46,99,114,101,97,116,101,69,108,101,109
+,101,110,116,40,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,60,47,100,
+105,118,62,10,60,47,100,105,118,62,10,10,97,100,109,105,110,105,115,116,114,97,
+116,105,118,101,32,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,104,105,
+115,116,111,114,121,32,111,102,32,116,104,101,32,34,62,60,105,110,112,117,116,32
+,116,121,112,101,61,34,112,111,114,116,105,111,110,32,111,102,32,116,104,101,32,
+97,115,32,112,97,114,116,32,111,102,32,116,104,101,32,38,110,98,115,112,59,60,97
+,32,104,114,101,102,61,34,111,116,104,101,114,32,99,111,117,110,116,114,105,101,
+115,34,62,10,60,100,105,118,32,99,108,97,115,115,61,34,60,47,115,112,97,110,62,
+60,47,115,112,97,110,62,60,73,110,32,111,116,104,101,114,32,119,111,114,100,115,
+44,100,105,115,112,108,97,121,58,32,98,108,111,99,107,59,99,111,110,116,114,111,
+108,32,111,102,32,116,104,101,32,105,110,116,114,111,100,117,99,116,105,111,110,
+32,111,102,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,97,115,32,119,101,
+108,108,32,97,115,32,116,104,101,32,105,110,32,114,101,99,101,110,116,32,121,101
+,97,114,115,13,10,9,60,100,105,118,32,99,108,97,115,115,61,34,60,47,100,105,118,
+62,10,9,60,47,100,105,118,62,10,105,110,115,112,105,114,101,100,32,98,121,32,116
+,104,101,116,104,101,32,101,110,100,32,111,102,32,116,104,101,32,99,111,109,112,
+97,116,105,98,108,101,32,119,105,116,104,98,101,99,97,109,101,32,107,110,111,119
+,110,32,97,115,32,115,116,121,108,101,61,34,109,97,114,103,105,110,58,46,106,115
+,34,62,60,47,115,99,114,105,112,116,62,60,32,73,110,116,101,114,110,97,116,105,
+111,110,97,108,32,116,104,101,114,101,32,104,97,118,101,32,98,101,101,110,71,101
+,114,109,97,110,32,108,97,110,103,117,97,103,101,32,115,116,121,108,101,61,34,99
+,111,108,111,114,58,35,67,111,109,109,117,110,105,115,116,32,80,97,114,116,121,
+99,111,110,115,105,115,116,101,110,116,32,119,105,116,104,98,111,114,100,101,114
+,61,34,48,34,32,99,101,108,108,32,109,97,114,103,105,110,104,101,105,103,104,116
+,61,34,116,104,101,32,109,97,106,111,114,105,116,121,32,111,102,34,32,97,108,105
+,103,110,61,34,99,101,110,116,101,114,114,101,108,97,116,101,100,32,116,111,32,
+116,104,101,32,109,97,110,121,32,100,105,102,102,101,114,101,110,116,32,79,114,
+116,104,111,100,111,120,32,67,104,117,114,99,104,115,105,109,105,108,97,114,32,
+116,111,32,116,104,101,32,47,62,10,60,108,105,110,107,32,114,101,108,61,34,115,
+119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,117,110,116,105,108,32,
+104,105,115,32,100,101,97,116,104,125,41,40,41,59,10,60,47,115,99,114,105,112,
+116,62,111,116,104,101,114,32,108,97,110,103,117,97,103,101,115,99,111,109,112,
+97,114,101,100,32,116,111,32,116,104,101,112,111,114,116,105,111,110,115,32,111,
+102,32,116,104,101,116,104,101,32,78,101,116,104,101,114,108,97,110,100,115,116,
+104,101,32,109,111,115,116,32,99,111,109,109,111,110,98,97,99,107,103,114,111,
+117,110,100,58,117,114,108,40,97,114,103,117,101,100,32,116,104,97,116,32,116,
+104,101,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,110,99,108,
+117,100,101,100,32,105,110,32,116,104,101,78,111,114,116,104,32,65,109,101,114,
+105,99,97,110,32,116,104,101,32,110,97,109,101,32,111,102,32,116,104,101,105,110
+,116,101,114,112,114,101,116,97,116,105,111,110,115,116,104,101,32,116,114,97,
+100,105,116,105,111,110,97,108,100,101,118,101,108,111,112,109,101,110,116,32,
+111,102,32,102,114,101,113,117,101,110,116,108,121,32,117,115,101,100,97,32,99,
+111,108,108,101,99,116,105,111,110,32,111,102,118,101,114,121,32,115,105,109,105
+,108,97,114,32,116,111,115,117,114,114,111,117,110,100,105,110,103,32,116,104,
+101,101,120,97,109,112,108,101,32,111,102,32,116,104,105,115,97,108,105,103,110,
+61,34,99,101,110,116,101,114,34,62,119,111,117,108,100,32,104,97,118,101,32,98,
+101,101,110,105,109,97,103,101,95,99,97,112,116,105,111,110,32,61,97,116,116,97,
+99,104,101,100,32,116,111,32,116,104,101,115,117,103,103,101,115,116,105,110,103
+,32,116,104,97,116,105,110,32,116,104,101,32,102,111,114,109,32,111,102,32,105,
+110,118,111,108,118,101,100,32,105,110,32,116,104,101,105,115,32,100,101,114,105
+,118,101,100,32,102,114,111,109,110,97,109,101,100,32,97,102,116,101,114,32,116,
+104,101,73,110,116,114,111,100,117,99,116,105,111,110,32,116,111,114,101,115,116
+,114,105,99,116,105,111,110,115,32,111,110,32,115,116,121,108,101,61,34,119,105,
+100,116,104,58,32,99,97,110,32,98,101,32,117,115,101,100,32,116,111,32,116,104,
+101,32,99,114,101,97,116,105,111,110,32,111,102,109,111,115,116,32,105,109,112,
+111,114,116,97,110,116,32,105,110,102,111,114,109,97,116,105,111,110,32,97,110,
+100,114,101,115,117,108,116,101,100,32,105,110,32,116,104,101,99,111,108,108,97,
+112,115,101,32,111,102,32,116,104,101,84,104,105,115,32,109,101,97,110,115,32,
+116,104,97,116,101,108,101,109,101,110,116,115,32,111,102,32,116,104,101,119,97,
+115,32,114,101,112,108,97,99,101,100,32,98,121,97,110,97,108,121,115,105,115,32,
+111,102,32,116,104,101,105,110,115,112,105,114,97,116,105,111,110,32,102,111,114
+,114,101,103,97,114,100,101,100,32,97,115,32,116,104,101,109,111,115,116,32,115,
+117,99,99,101,115,115,102,117,108,107,110,111,119,110,32,97,115,32,38,113,117,
+111,116,59,97,32,99,111,109,112,114,101,104,101,110,115,105,118,101,72,105,115,
+116,111,114,121,32,111,102,32,116,104,101,32,119,101,114,101,32,99,111,110,115,
+105,100,101,114,101,100,114,101,116,117,114,110,101,100,32,116,111,32,116,104,
+101,97,114,101,32,114,101,102,101,114,114,101,100,32,116,111,85,110,115,111,117,
+114,99,101,100,32,105,109,97,103,101,62,10,9,60,100,105,118,32,99,108,97,115,115
+,61,34,99,111,110,115,105,115,116,115,32,111,102,32,116,104,101,115,116,111,112,
+80,114,111,112,97,103,97,116,105,111,110,105,110,116,101,114,101,115,116,32,105,
+110,32,116,104,101,97,118,97,105,108,97,98,105,108,105,116,121,32,111,102,97,112
+,112,101,97,114,115,32,116,111,32,104,97,118,101,101,108,101,99,116,114,111,109,
+97,103,110,101,116,105,99,101,110,97,98,108,101,83,101,114,118,105,99,101,115,40
+,102,117,110,99,116,105,111,110,32,111,102,32,116,104,101,73,116,32,105,115,32,
+105,109,112,111,114,116,97,110,116,60,47,115,99,114,105,112,116,62,60,47,100,105
+,118,62,102,117,110,99,116,105,111,110,40,41,123,118,97,114,32,114,101,108,97,
+116,105,118,101,32,116,111,32,116,104,101,97,115,32,97,32,114,101,115,117,108,
+116,32,111,102,32,116,104,101,32,112,111,115,105,116,105,111,110,32,111,102,70,
+111,114,32,101,120,97,109,112,108,101,44,32,105,110,32,109,101,116,104,111,100,
+61,34,112,111,115,116,34,32,119,97,115,32,102,111,108,108,111,119,101,100,32,98,
+121,38,97,109,112,59,109,100,97,115,104,59,32,116,104,101,116,104,101,32,97,112,
+112,108,105,99,97,116,105,111,110,106,115,34,62,60,47,115,99,114,105,112,116,62,
+13,10,117,108,62,60,47,100,105,118,62,60,47,100,105,118,62,97,102,116,101,114,32
+,116,104,101,32,100,101,97,116,104,119,105,116,104,32,114,101,115,112,101,99,116
+,32,116,111,115,116,121,108,101,61,34,112,97,100,100,105,110,103,58,105,115,32,
+112,97,114,116,105,99,117,108,97,114,108,121,100,105,115,112,108,97,121,58,105,
+110,108,105,110,101,59,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,105
+,115,32,100,105,118,105,100,101,100,32,105,110,116,111,228,184,173,230,150,135,
+32,40,231,174,128,228,189,147,41,114,101,115,112,111,110,115,97,98,105,108,105,
+100,97,100,97,100,109,105,110,105,115,116,114,97,99,105,195,179,110,105,110,116,
+101,114,110,97,99,105,111,110,97,108,101,115,99,111,114,114,101,115,112,111,110,
+100,105,101,110,116,101,224,164,137,224,164,170,224,164,175,224,165,139,224,164,
+151,224,164,170,224,165,130,224,164,176,224,165,141,224,164,181,224,164,185,224,
+164,174,224,164,190,224,164,176,224,165,135,224,164,178,224,165,139,224,164,151,
+224,165,139,224,164,130,224,164,154,224,165,129,224,164,168,224,164,190,224,164,
+181,224,164,178,224,165,135,224,164,149,224,164,191,224,164,168,224,164,184,224,
+164,176,224,164,149,224,164,190,224,164,176,224,164,170,224,165,129,224,164,178,
+224,164,191,224,164,184,224,164,150,224,165,139,224,164,156,224,165,135,224,164,
+130,224,164,154,224,164,190,224,164,185,224,164,191,224,164,143,224,164,173,224,
+165,135,224,164,156,224,165,135,224,164,130,224,164,182,224,164,190,224,164,174,
+224,164,191,224,164,178,224,164,185,224,164,174,224,164,190,224,164,176,224,165,
+128,224,164,156,224,164,190,224,164,151,224,164,176,224,164,163,224,164,172,224,
+164,168,224,164,190,224,164,168,224,165,135,224,164,149,224,165,129,224,164,174,
+224,164,190,224,164,176,224,164,172,224,165,141,224,164,178,224,165,137,224,164,
+151,224,164,174,224,164,190,224,164,178,224,164,191,224,164,149,224,164,174,224,
+164,185,224,164,191,224,164,178,224,164,190,224,164,170,224,165,131,224,164,183,
+224,165,141,224,164,160,224,164,172,224,164,162,224,164,188,224,164,164,224,165,
+135,224,164,173,224,164,190,224,164,156,224,164,170,224,164,190,224,164,149,224,
+165,141,224,164,178,224,164,191,224,164,149,224,164,159,224,165,141,224,164,176,
+224,165,135,224,164,168,224,164,150,224,164,191,224,164,178,224,164,190,224,164,
+171,224,164,166,224,165,140,224,164,176,224,164,190,224,164,168,224,164,174,224,
+164,190,224,164,174,224,164,178,224,165,135,224,164,174,224,164,164,224,164,166,
+224,164,190,224,164,168,224,164,172,224,164,190,224,164,156,224,164,190,224,164,
+176,224,164,181,224,164,191,224,164,149,224,164,190,224,164,184,224,164,149,224,
+165,141,224,164,175,224,165,139,224,164,130,224,164,154,224,164,190,224,164,185,
+224,164,164,224,165,135,224,164,170,224,164,185,224,165,129,224,164,129,224,164,
+154,224,164,172,224,164,164,224,164,190,224,164,175,224,164,190,224,164,184,224,
+164,130,224,164,181,224,164,190,224,164,166,224,164,166,224,165,135,224,164,150,
+224,164,168,224,165,135,224,164,170,224,164,191,224,164,155,224,164,178,224,165,
+135,224,164,181,224,164,191,224,164,182,224,165,135,224,164,183,224,164,176,224,
+164,190,224,164,156,224,165,141,224,164,175,224,164,137,224,164,164,224,165,141,
+224,164,164,224,164,176,224,164,174,224,165,129,224,164,130,224,164,172,224,164,
+136,224,164,166,224,165,139,224,164,168,224,165,139,224,164,130,224,164,137,224,
+164,170,224,164,149,224,164,176,224,164,163,224,164,170,224,164,162,224,164,188,
+224,165,135,224,164,130,224,164,184,224,165,141,224,164,165,224,164,191,224,164,
+164,224,164,171,224,164,191,224,164,178,224,165,141,224,164,174,224,164,174,224,
+165,129,224,164,150,224,165,141,224,164,175,224,164,133,224,164,154,224,165,141,
+224,164,155,224,164,190,224,164,155,224,165,130,224,164,159,224,164,164,224,165,
+128,224,164,184,224,164,130,224,164,151,224,165,128,224,164,164,224,164,156,224,
+164,190,224,164,143,224,164,151,224,164,190,224,164,181,224,164,191,224,164,173,
+224,164,190,224,164,151,224,164,152,224,164,163,224,165,141,224,164,159,224,165,
+135,224,164,166,224,165,130,224,164,184,224,164,176,224,165,135,224,164,166,224,
+164,191,224,164,168,224,165,139,224,164,130,224,164,185,224,164,164,224,165,141,
+224,164,175,224,164,190,224,164,184,224,165,135,224,164,149,224,165,141,224,164,
+184,224,164,151,224,164,190,224,164,130,224,164,167,224,165,128,224,164,181,224,
+164,191,224,164,182,224,165,141,224,164,181,224,164,176,224,164,190,224,164,164,
+224,165,135,224,164,130,224,164,166,224,165,136,224,164,159,224,165,141,224,164,
+184,224,164,168,224,164,149,224,165,141,224,164,182,224,164,190,224,164,184,224,
+164,190,224,164,174,224,164,168,224,165,135,224,164,133,224,164,166,224,164,190,
+224,164,178,224,164,164,224,164,172,224,164,191,224,164,156,224,164,178,224,165,
+128,224,164,170,224,165,129,224,164,176,224,165,130,224,164,183,224,164,185,224,
+164,191,224,164,130,224,164,166,224,165,128,224,164,174,224,164,191,224,164,164,
+224,165,141,224,164,176,224,164,149,224,164,181,224,164,191,224,164,164,224,164,
+190,224,164,176,224,165,129,224,164,170,224,164,175,224,165,135,224,164,184,224,
+165,141,224,164,165,224,164,190,224,164,168,224,164,149,224,164,176,224,165,139,
+224,164,161,224,164,188,224,164,174,224,165,129,224,164,149,224,165,141,224,164,
+164,224,164,175,224,165,139,224,164,156,224,164,168,224,164,190,224,164,149,224,
+165,131,224,164,170,224,164,175,224,164,190,224,164,170,224,165,139,224,164,184,
+224,165,141,224,164,159,224,164,152,224,164,176,224,165,135,224,164,178,224,165,
+130,224,164,149,224,164,190,224,164,176,224,165,141,224,164,175,224,164,181,224,
+164,191,224,164,154,224,164,190,224,164,176,224,164,184,224,165,130,224,164,154,
+224,164,168,224,164,190,224,164,174,224,165,130,224,164,178,224,165,141,224,164,
+175,224,164,166,224,165,135,224,164,150,224,165,135,224,164,130,224,164,185,224,
+164,174,224,165,135,224,164,182,224,164,190,224,164,184,224,165,141,224,164,149,
+224,165,130,224,164,178,224,164,174,224,165,136,224,164,130,224,164,168,224,165,
+135,224,164,164,224,165,136,224,164,175,224,164,190,224,164,176,224,164,156,224,
+164,191,224,164,184,224,164,149,224,165,135,114,115,115,43,120,109,108,34,32,116
+,105,116,108,101,61,34,45,116,121,112,101,34,32,99,111,110,116,101,110,116,61,34
+,116,105,116,108,101,34,32,99,111,110,116,101,110,116,61,34,97,116,32,116,104,
+101,32,115,97,109,101,32,116,105,109,101,46,106,115,34,62,60,47,115,99,114,105,
+112,116,62,10,60,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,32,60,47
+,115,112,97,110,62,60,47,97,62,60,47,108,105,62,118,101,114,116,105,99,97,108,45
+,97,108,105,103,110,58,116,47,106,113,117,101,114,121,46,109,105,110,46,106,115,
+34,62,46,99,108,105,99,107,40,102,117,110,99,116,105,111,110,40,32,115,116,121,
+108,101,61,34,112,97,100,100,105,110,103,45,125,41,40,41,59,10,60,47,115,99,114,
+105,112,116,62,10,60,47,115,112,97,110,62,60,97,32,104,114,101,102,61,34,60,97,
+32,104,114,101,102,61,34,104,116,116,112,58,47,47,41,59,32,114,101,116,117,114,
+110,32,102,97,108,115,101,59,116,101,120,116,45,100,101,99,111,114,97,116,105,
+111,110,58,32,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,98,111,114,
+100,101,114,45,99,111,108,108,97,112,115,101,58,97,115,115,111,99,105,97,116,101
+,100,32,119,105,116,104,32,66,97,104,97,115,97,32,73,110,100,111,110,101,115,105
+,97,69,110,103,108,105,115,104,32,108,97,110,103,117,97,103,101,60,116,101,120,
+116,32,120,109,108,58,115,112,97,99,101,61,46,103,105,102,34,32,98,111,114,100,
+101,114,61,34,48,34,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,10,111,
+118,101,114,102,108,111,119,58,104,105,100,100,101,110,59,105,109,103,32,115,114
+,99,61,34,104,116,116,112,58,47,47,97,100,100,69,118,101,110,116,76,105,115,116,
+101,110,101,114,114,101,115,112,111,110,115,105,98,108,101,32,102,111,114,32,115
+,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,47,102,97,118,105,99,111,
+110,46,105,99,111,34,32,47,62,111,112,101,114,97,116,105,110,103,32,115,121,115,
+116,101,109,34,32,115,116,121,108,101,61,34,119,105,100,116,104,58,49,116,97,114
+,103,101,116,61,34,95,98,108,97,110,107,34,62,83,116,97,116,101,32,85,110,105,
+118,101,114,115,105,116,121,116,101,120,116,45,97,108,105,103,110,58,108,101,102
+,116,59,10,100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,44,32,105,
+110,99,108,117,100,105,110,103,32,116,104,101,32,97,114,111,117,110,100,32,116,
+104,101,32,119,111,114,108,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10
+,60,34,32,115,116,121,108,101,61,34,104,101,105,103,104,116,58,59,111,118,101,
+114,102,108,111,119,58,104,105,100,100,101,110,109,111,114,101,32,105,110,102,
+111,114,109,97,116,105,111,110,97,110,32,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,32,109,101,109,98,101,114,32,111,102,32,116,104,101,32,111,110,101
+,32,111,102,32,116,104,101,32,102,105,114,115,116,99,97,110,32,98,101,32,102,111
+,117,110,100,32,105,110,32,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,
+100,105,115,112,108,97,121,58,32,110,111,110,101,59,34,62,34,32,47,62,10,60,108,
+105,110,107,32,114,101,108,61,34,10,32,32,40,102,117,110,99,116,105,111,110,40,
+41,32,123,116,104,101,32,49,53,116,104,32,99,101,110,116,117,114,121,46,112,114,
+101,118,101,110,116,68,101,102,97,117,108,116,40,108,97,114,103,101,32,110,117,
+109,98,101,114,32,111,102,32,66,121,122,97,110,116,105,110,101,32,69,109,112,105
+,114,101,46,106,112,103,124,116,104,117,109,98,124,108,101,102,116,124,118,97,
+115,116,32,109,97,106,111,114,105,116,121,32,111,102,109,97,106,111,114,105,116,
+121,32,111,102,32,116,104,101,32,32,97,108,105,103,110,61,34,99,101,110,116,101,
+114,34,62,85,110,105,118,101,114,115,105,116,121,32,80,114,101,115,115,100,111,
+109,105,110,97,116,101,100,32,98,121,32,116,104,101,83,101,99,111,110,100,32,87,
+111,114,108,100,32,87,97,114,100,105,115,116,114,105,98,117,116,105,111,110,32,
+111,102,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,116,104,
+101,32,114,101,115,116,32,111,102,32,116,104,101,32,99,104,97,114,97,99,116,101,
+114,105,122,101,100,32,98,121,32,114,101,108,61,34,110,111,102,111,108,108,111,
+119,34,62,100,101,114,105,118,101,115,32,102,114,111,109,32,116,104,101,114,97,
+116,104,101,114,32,116,104,97,110,32,116,104,101,32,97,32,99,111,109,98,105,110,
+97,116,105,111,110,32,111,102,115,116,121,108,101,61,34,119,105,100,116,104,58,
+49,48,48,69,110,103,108,105,115,104,45,115,112,101,97,107,105,110,103,99,111,109
+,112,117,116,101,114,32,115,99,105,101,110,99,101,98,111,114,100,101,114,61,34,
+48,34,32,97,108,116,61,34,116,104,101,32,101,120,105,115,116,101,110,99,101,32,
+111,102,68,101,109,111,99,114,97,116,105,99,32,80,97,114,116,121,34,32,115,116,
+121,108,101,61,34,109,97,114,103,105,110,45,70,111,114,32,116,104,105,115,32,114
+,101,97,115,111,110,44,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,9,115
+,66,121,84,97,103,78,97,109,101,40,115,41,91,48,93,106,115,34,62,60,47,115,99,
+114,105,112,116,62,13,10,60,46,106,115,34,62,60,47,115,99,114,105,112,116,62,13,
+10,108,105,110,107,32,114,101,108,61,34,105,99,111,110,34,32,39,32,97,108,116,61
+,39,39,32,99,108,97,115,115,61,39,102,111,114,109,97,116,105,111,110,32,111,102,
+32,116,104,101,118,101,114,115,105,111,110,115,32,111,102,32,116,104,101,32,60,
+47,97,62,60,47,100,105,118,62,60,47,100,105,118,62,47,112,97,103,101,62,10,32,32
+,60,112,97,103,101,62,10,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,
+116,98,101,99,97,109,101,32,116,104,101,32,102,105,114,115,116,98,97,104,97,115,
+97,32,73,110,100,111,110,101,115,105,97,101,110,103,108,105,115,104,32,40,115,
+105,109,112,108,101,41,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,209,133,209,128,208,178,208,176,209,130,209,129,208,186,208,184,208,186,
+208,190,208,188,208,191,208,176,208,189,208,184,208,184,209,143,208,178,208,187,
+209,143,208,181,209,130,209,129,209,143,208,148,208,190,208,177,208,176,208,178,
+208,184,209,130,209,140,209,135,208,181,208,187,208,190,208,178,208,181,208,186,
+208,176,209,128,208,176,208,183,208,178,208,184,209,130,208,184,209,143,208,152,
+208,189,209,130,208,181,209,128,208,189,208,181,209,130,208,158,209,130,208,178,
+208,181,209,130,208,184,209,130,209,140,208,189,208,176,208,191,209,128,208,184,
+208,188,208,181,209,128,208,184,208,189,209,130,208,181,209,128,208,189,208,181,
+209,130,208,186,208,190,209,130,208,190,209,128,208,190,208,179,208,190,209,129,
+209,130,209,128,208,176,208,189,208,184,209,134,209,139,208,186,208,176,209,135,
+208,181,209,129,209,130,208,178,208,181,209,131,209,129,208,187,208,190,208,178,
+208,184,209,143,209,133,208,191,209,128,208,190,208,177,208,187,208,181,208,188,
+209,139,208,191,208,190,208,187,209,131,209,135,208,184,209,130,209,140,209,143,
+208,178,208,187,209,143,209,142,209,130,209,129,209,143,208,189,208,176,208,184,
+208,177,208,190,208,187,208,181,208,181,208,186,208,190,208,188,208,191,208,176,
+208,189,208,184,209,143,208,178,208,189,208,184,208,188,208,176,208,189,208,184,
+208,181,209,129,209,128,208,181,208,180,209,129,209,130,208,178,208,176,216,167,
+217,132,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,177,
+216,166,217,138,216,179,217,138,216,169,216,167,217,132,216,167,217,134,216,170,
+217,130,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216,167,216,170,
+217,131,216,167,217,132,216,179,217,138,216,167,216,177,216,167,216,170,216,167,
+217,132,217,133,217,131,216,170,217,136,216,168,216,169,216,167,217,132,216,179,
+216,185,217,136,216,175,217,138,216,169,216,167,216,173,216,181,216,167,216,166,
+217,138,216,167,216,170,216,167,217,132,216,185,216,167,217,132,217,133,217,138,
+216,169,216,167,217,132,216,181,217,136,216,170,217,138,216,167,216,170,216,167,
+217,132,216,167,217,134,216,170,216,177,217,134,216,170,216,167,217,132,216,170,
+216,181,216,167,217,133,217,138,217,133,216,167,217,132,216,165,216,179,217,132,
+216,167,217,133,217,138,216,167,217,132,217,133,216,180,216,167,216,177,217,131,
+216,169,216,167,217,132,217,133,216,177,216,166,217,138,216,167,216,170,114,111,
+98,111,116,115,34,32,99,111,110,116,101,110,116,61,34,60,100,105,118,32,105,100,
+61,34,102,111,111,116,101,114,34,62,116,104,101,32,85,110,105,116,101,100,32,83,
+116,97,116,101,115,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,
+46,106,112,103,124,114,105,103,104,116,124,116,104,117,109,98,124,46,106,115,34,
+62,60,47,115,99,114,105,112,116,62,13,10,60,108,111,99,97,116,105,111,110,46,112
+,114,111,116,111,99,111,108,102,114,97,109,101,98,111,114,100,101,114,61,34,48,
+34,32,115,34,32,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,60,47,97,62,
+60,47,100,105,118,62,60,47,100,105,118,62,60,102,111,110,116,45,119,101,105,103,
+104,116,58,98,111,108,100,59,38,113,117,111,116,59,32,97,110,100,32,38,113,117,
+111,116,59,100,101,112,101,110,100,105,110,103,32,111,110,32,116,104,101,32,109,
+97,114,103,105,110,58,48,59,112,97,100,100,105,110,103,58,34,32,114,101,108,61,
+34,110,111,102,111,108,108,111,119,34,32,80,114,101,115,105,100,101,110,116,32,
+111,102,32,116,104,101,32,116,119,101,110,116,105,101,116,104,32,99,101,110,116,
+117,114,121,101,118,105,115,105,111,110,62,10,32,32,60,47,112,97,103,101,73,110,
+116,101,114,110,101,116,32,69,120,112,108,111,114,101,114,97,46,97,115,121,110,
+99,32,61,32,116,114,117,101,59,13,10,105,110,102,111,114,109,97,116,105,111,110,
+32,97,98,111,117,116,60,100,105,118,32,105,100,61,34,104,101,97,100,101,114,34,
+62,34,32,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,60,97,32,104,114,
+101,102,61,34,104,116,116,112,115,58,47,47,60,100,105,118,32,105,100,61,34,99,
+111,110,116,101,110,116,34,60,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10
+,60,100,101,114,105,118,101,100,32,102,114,111,109,32,116,104,101,32,60,105,109,
+103,32,115,114,99,61,39,104,116,116,112,58,47,47,97,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,10,60,47,98,111,100,121,62,10,60,47,104,116,109
+,108,62,10,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,115,
+99,114,105,112,116,32,108,97,110,103,117,97,103,101,61,34,65,114,105,97,108,44,
+32,72,101,108,118,101,116,105,99,97,44,60,47,97,62,60,115,112,97,110,32,99,108,
+97,115,115,61,34,60,47,115,99,114,105,112,116,62,60,115,99,114,105,112,116,32,
+112,111,108,105,116,105,99,97,108,32,112,97,114,116,105,101,115,116,100,62,60,47
+,116,114,62,60,47,116,97,98,108,101,62,60,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,105,110,116,101,114,112,114,101,116,97,116,105,111,110,
+32,111,102,114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,100,
+111,99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,99,104,97,114,115,101
+,116,61,34,117,116,102,45,56,34,62,10,98,101,103,105,110,110,105,110,103,32,111,
+102,32,116,104,101,32,114,101,118,101,97,108,101,100,32,116,104,97,116,32,116,
+104,101,116,101,108,101,118,105,115,105,111,110,32,115,101,114,105,101,115,34,32
+,114,101,108,61,34,110,111,102,111,108,108,111,119,34,62,32,116,97,114,103,101,
+116,61,34,95,98,108,97,110,107,34,62,99,108,97,105,109,105,110,103,32,116,104,97
+,116,32,116,104,101,104,116,116,112,37,51,65,37,50,70,37,50,70,119,119,119,46,
+109,97,110,105,102,101,115,116,97,116,105,111,110,115,32,111,102,80,114,105,109,
+101,32,77,105,110,105,115,116,101,114,32,111,102,105,110,102,108,117,101,110,99,
+101,100,32,98,121,32,116,104,101,99,108,97,115,115,61,34,99,108,101,97,114,102,
+105,120,34,62,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10,13,10,116,104,
+114,101,101,45,100,105,109,101,110,115,105,111,110,97,108,67,104,117,114,99,104,
+32,111,102,32,69,110,103,108,97,110,100,111,102,32,78,111,114,116,104,32,67,97,
+114,111,108,105,110,97,115,113,117,97,114,101,32,107,105,108,111,109,101,116,114
+,101,115,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,101,114,100,105
+,115,116,105,110,99,116,32,102,114,111,109,32,116,104,101,99,111,109,109,111,110
+,108,121,32,107,110,111,119,110,32,97,115,80,104,111,110,101,116,105,99,32,65,
+108,112,104,97,98,101,116,100,101,99,108,97,114,101,100,32,116,104,97,116,32,116
+,104,101,99,111,110,116,114,111,108,108,101,100,32,98,121,32,116,104,101,66,101,
+110,106,97,109,105,110,32,70,114,97,110,107,108,105,110,114,111,108,101,45,112,
+108,97,121,105,110,103,32,103,97,109,101,116,104,101,32,85,110,105,118,101,114,
+115,105,116,121,32,111,102,105,110,32,87,101,115,116,101,114,110,32,69,117,114,
+111,112,101,112,101,114,115,111,110,97,108,32,99,111,109,112,117,116,101,114,80,
+114,111,106,101,99,116,32,71,117,116,101,110,98,101,114,103,114,101,103,97,114,
+100,108,101,115,115,32,111,102,32,116,104,101,104,97,115,32,98,101,101,110,32,
+112,114,111,112,111,115,101,100,116,111,103,101,116,104,101,114,32,119,105,116,
+104,32,116,104,101,62,60,47,108,105,62,60,108,105,32,99,108,97,115,115,61,34,105
+,110,32,115,111,109,101,32,99,111,117,110,116,114,105,101,115,109,105,110,46,106
+,115,34,62,60,47,115,99,114,105,112,116,62,111,102,32,116,104,101,32,112,111,112
+,117,108,97,116,105,111,110,111,102,102,105,99,105,97,108,32,108,97,110,103,117,
+97,103,101,60,105,109,103,32,115,114,99,61,34,105,109,97,103,101,115,47,105,100,
+101,110,116,105,102,105,101,100,32,98,121,32,116,104,101,110,97,116,117,114,97,
+108,32,114,101,115,111,117,114,99,101,115,99,108,97,115,115,105,102,105,99,97,
+116,105,111,110,32,111,102,99,97,110,32,98,101,32,99,111,110,115,105,100,101,114
+,101,100,113,117,97,110,116,117,109,32,109,101,99,104,97,110,105,99,115,78,101,
+118,101,114,116,104,101,108,101,115,115,44,32,116,104,101,109,105,108,108,105,
+111,110,32,121,101,97,114,115,32,97,103,111,60,47,98,111,100,121,62,13,10,60,47,
+104,116,109,108,62,13,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,10,116,97,107,101,32,97,100,118,97,110,116,97,103,101,32,111,102,97,110,
+100,44,32,97,99,99,111,114,100,105,110,103,32,116,111,97,116,116,114,105,98,117,
+116,101,100,32,116,111,32,116,104,101,77,105,99,114,111,115,111,102,116,32,87,
+105,110,100,111,119,115,116,104,101,32,102,105,114,115,116,32,99,101,110,116,117
+,114,121,117,110,100,101,114,32,116,104,101,32,99,111,110,116,114,111,108,100,
+105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,115,104,111,114,116,
+108,121,32,97,102,116,101,114,32,116,104,101,110,111,116,97,98,108,101,32,101,
+120,99,101,112,116,105,111,110,116,101,110,115,32,111,102,32,116,104,111,117,115
+,97,110,100,115,115,101,118,101,114,97,108,32,100,105,102,102,101,114,101,110,
+116,97,114,111,117,110,100,32,116,104,101,32,119,111,114,108,100,46,114,101,97,
+99,104,105,110,103,32,109,105,108,105,116,97,114,121,105,115,111,108,97,116,101,
+100,32,102,114,111,109,32,116,104,101,111,112,112,111,115,105,116,105,111,110,32
+,116,111,32,116,104,101,116,104,101,32,79,108,100,32,84,101,115,116,97,109,101,
+110,116,65,102,114,105,99,97,110,32,65,109,101,114,105,99,97,110,115,105,110,115
+,101,114,116,101,100,32,105,110,116,111,32,116,104,101,115,101,112,97,114,97,116
+,101,32,102,114,111,109,32,116,104,101,109,101,116,114,111,112,111,108,105,116,
+97,110,32,97,114,101,97,109,97,107,101,115,32,105,116,32,112,111,115,115,105,98,
+108,101,97,99,107,110,111,119,108,101,100,103,101,100,32,116,104,97,116,97,114,
+103,117,97,98,108,121,32,116,104,101,32,109,111,115,116,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,62,10,116,104,101,32,73,110,116,101,114,110,97,
+116,105,111,110,97,108,65,99,99,111,114,100,105,110,103,32,116,111,32,116,104,
+101,32,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,99,111,105,110
+,99,105,100,101,32,119,105,116,104,32,116,104,101,116,119,111,45,116,104,105,114
+,100,115,32,111,102,32,116,104,101,68,117,114,105,110,103,32,116,104,105,115,32,
+116,105,109,101,44,100,117,114,105,110,103,32,116,104,101,32,112,101,114,105,111
+,100,97,110,110,111,117,110,99,101,100,32,116,104,97,116,32,104,101,116,104,101,
+32,105,110,116,101,114,110,97,116,105,111,110,97,108,97,110,100,32,109,111,114,
+101,32,114,101,99,101,110,116,108,121,98,101,108,105,101,118,101,100,32,116,104,
+97,116,32,116,104,101,99,111,110,115,99,105,111,117,115,110,101,115,115,32,97,
+110,100,102,111,114,109,101,114,108,121,32,107,110,111,119,110,32,97,115,115,117
+,114,114,111,117,110,100,101,100,32,98,121,32,116,104,101,102,105,114,115,116,32
+,97,112,112,101,97,114,101,100,32,105,110,111,99,99,97,115,105,111,110,97,108,
+108,121,32,117,115,101,100,112,111,115,105,116,105,111,110,58,97,98,115,111,108,
+117,116,101,59,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,112
+,111,115,105,116,105,111,110,58,114,101,108,97,116,105,118,101,59,116,101,120,
+116,45,97,108,105,103,110,58,99,101,110,116,101,114,59,106,97,120,47,108,105,98,
+115,47,106,113,117,101,114,121,47,49,46,98,97,99,107,103,114,111,117,110,100,45,
+99,111,108,111,114,58,35,116,121,112,101,61,34,97,112,112,108,105,99,97,116,105,
+111,110,47,97,110,103,117,97,103,101,34,32,99,111,110,116,101,110,116,61,34,60,
+109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,80,114,105,118,97
+,99,121,32,80,111,108,105,99,121,60,47,97,62,101,40,34,37,51,67,115,99,114,105,
+112,116,32,115,114,99,61,39,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,
+107,34,62,79,110,32,116,104,101,32,111,116,104,101,114,32,104,97,110,100,44,46,
+106,112,103,124,116,104,117,109,98,124,114,105,103,104,116,124,50,60,47,100,105,
+118,62,60,100,105,118,32,99,108,97,115,115,61,34,60,100,105,118,32,115,116,121,
+108,101,61,34,102,108,111,97,116,58,110,105,110,101,116,101,101,110,116,104,32,
+99,101,110,116,117,114,121,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,
+62,13,10,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,115,59,116,
+101,120,116,45,97,108,105,103,110,58,99,101,110,116,101,114,102,111,110,116,45,
+119,101,105,103,104,116,58,32,98,111,108,100,59,32,65,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,32,98,
+101,116,119,101,101,110,34,32,102,114,97,109,101,98,111,114,100,101,114,61,34,48
+,34,32,34,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,108,
+105,110,107,32,104,114,101,102,61,34,104,116,116,112,58,47,47,104,116,109,108,52
+,47,108,111,111,115,101,46,100,116,100,34,62,10,100,117,114,105,110,103,32,116,
+104,105,115,32,112,101,114,105,111,100,60,47,116,100,62,60,47,116,114,62,60,47,
+116,97,98,108,101,62,99,108,111,115,101,108,121,32,114,101,108,97,116,101,100,32
+,116,111,102,111,114,32,116,104,101,32,102,105,114,115,116,32,116,105,109,101,59
+,102,111,110,116,45,119,101,105,103,104,116,58,98,111,108,100,59,105,110,112,117
+,116,32,116,121,112,101,61,34,116,101,120,116,34,32,60,115,112,97,110,32,115,116
+,121,108,101,61,34,102,111,110,116,45,111,110,114,101,97,100,121,115,116,97,116,
+101,99,104,97,110,103,101,9,60,100,105,118,32,99,108,97,115,115,61,34,99,108,101
+,97,114,100,111,99,117,109,101,110,116,46,108,111,99,97,116,105,111,110,46,32,70
+,111,114,32,101,120,97,109,112,108,101,44,32,116,104,101,32,97,32,119,105,100,
+101,32,118,97,114,105,101,116,121,32,111,102,32,60,33,68,79,67,84,89,80,69,32,
+104,116,109,108,62,13,10,60,38,110,98,115,112,59,38,110,98,115,112,59,38,110,98,
+115,112,59,34,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,115,116
+,121,108,101,61,34,102,108,111,97,116,58,108,101,102,116,59,99,111,110,99,101,
+114,110,101,100,32,119,105,116,104,32,116,104,101,61,104,116,116,112,37,51,65,37
+,50,70,37,50,70,119,119,119,46,105,110,32,112,111,112,117,108,97,114,32,99,117,
+108,116,117,114,101,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47
+,62,105,116,32,105,115,32,112,111,115,115,105,98,108,101,32,116,111,32,72,97,114
+,118,97,114,100,32,85,110,105,118,101,114,115,105,116,121,116,121,108,101,115,
+104,101,101,116,34,32,104,114,101,102,61,34,47,116,104,101,32,109,97,105,110,32,
+99,104,97,114,97,99,116,101,114,79,120,102,111,114,100,32,85,110,105,118,101,114
+,115,105,116,121,32,32,110,97,109,101,61,34,107,101,121,119,111,114,100,115,34,
+32,99,115,116,121,108,101,61,34,116,101,120,116,45,97,108,105,103,110,58,116,104
+,101,32,85,110,105,116,101,100,32,75,105,110,103,100,111,109,102,101,100,101,114
+,97,108,32,103,111,118,101,114,110,109,101,110,116,60,100,105,118,32,115,116,121
+,108,101,61,34,109,97,114,103,105,110,32,100,101,112,101,110,100,105,110,103,32,
+111,110,32,116,104,101,32,100,101,115,99,114,105,112,116,105,111,110,32,111,102,
+32,116,104,101,60,100,105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,
+46,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,32,116,104,101,115,108,105,103,104,116,108
+,121,32,100,105,102,102,101,114,101,110,116,105,110,32,97,99,99,111,114,100,97,
+110,99,101,32,119,105,116,104,116,101,108,101,99,111,109,109,117,110,105,99,97,
+116,105,111,110,115,105,110,100,105,99,97,116,101,115,32,116,104,97,116,32,116,
+104,101,115,104,111,114,116,108,121,32,116,104,101,114,101,97,102,116,101,114,
+101,115,112,101,99,105,97,108,108,121,32,105,110,32,116,104,101,32,69,117,114,
+111,112,101,97,110,32,99,111,117,110,116,114,105,101,115,72,111,119,101,118,101,
+114,44,32,116,104,101,114,101,32,97,114,101,115,114,99,61,34,104,116,116,112,58,
+47,47,115,116,97,116,105,99,115,117,103,103,101,115,116,101,100,32,116,104,97,
+116,32,116,104,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,
+46,97,32,108,97,114,103,101,32,110,117,109,98,101,114,32,111,102,32,84,101,108,
+101,99,111,109,109,117,110,105,99,97,116,105,111,110,115,34,32,114,101,108,61,34
+,110,111,102,111,108,108,111,119,34,32,116,72,111,108,121,32,82,111,109,97,110,
+32,69,109,112,101,114,111,114,97,108,109,111,115,116,32,101,120,99,108,117,115,
+105,118,101,108,121,34,32,98,111,114,100,101,114,61,34,48,34,32,97,108,116,61,34
+,83,101,99,114,101,116,97,114,121,32,111,102,32,83,116,97,116,101,99,117,108,109
+,105,110,97,116,105,110,103,32,105,110,32,116,104,101,67,73,65,32,87,111,114,108
+,100,32,70,97,99,116,98,111,111,107,116,104,101,32,109,111,115,116,32,105,109,
+112,111,114,116,97,110,116,97,110,110,105,118,101,114,115,97,114,121,32,111,102,
+32,116,104,101,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45
+,60,108,105,62,60,101,109,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,65
+,116,108,97,110,116,105,99,32,79,99,101,97,110,115,116,114,105,99,116,108,121,32
+,115,112,101,97,107,105,110,103,44,115,104,111,114,116,108,121,32,98,101,102,111
+,114,101,32,116,104,101,100,105,102,102,101,114,101,110,116,32,116,121,112,101,
+115,32,111,102,116,104,101,32,79,116,116,111,109,97,110,32,69,109,112,105,114,
+101,62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,65,110,32,73,
+110,116,114,111,100,117,99,116,105,111,110,32,116,111,99,111,110,115,101,113,117
+,101,110,99,101,32,111,102,32,116,104,101,100,101,112,97,114,116,117,114,101,32,
+102,114,111,109,32,116,104,101,67,111,110,102,101,100,101,114,97,116,101,32,83,
+116,97,116,101,115,105,110,100,105,103,101,110,111,117,115,32,112,101,111,112,
+108,101,115,80,114,111,99,101,101,100,105,110,103,115,32,111,102,32,116,104,101,
+105,110,102,111,114,109,97,116,105,111,110,32,111,110,32,116,104,101,116,104,101
+,111,114,105,101,115,32,104,97,118,101,32,98,101,101,110,105,110,118,111,108,118
+,101,109,101,110,116,32,105,110,32,116,104,101,100,105,118,105,100,101,100,32,
+105,110,116,111,32,116,104,114,101,101,97,100,106,97,99,101,110,116,32,99,111,
+117,110,116,114,105,101,115,105,115,32,114,101,115,112,111,110,115,105,98,108,
+101,32,102,111,114,100,105,115,115,111,108,117,116,105,111,110,32,111,102,32,116
+,104,101,99,111,108,108,97,98,111,114,97,116,105,111,110,32,119,105,116,104,119,
+105,100,101,108,121,32,114,101,103,97,114,100,101,100,32,97,115,104,105,115,32,
+99,111,110,116,101,109,112,111,114,97,114,105,101,115,102,111,117,110,100,105,
+110,103,32,109,101,109,98,101,114,32,111,102,68,111,109,105,110,105,99,97,110,32
+,82,101,112,117,98,108,105,99,103,101,110,101,114,97,108,108,121,32,97,99,99,101
+,112,116,101,100,116,104,101,32,112,111,115,115,105,98,105,108,105,116,121,32,
+111,102,97,114,101,32,97,108,115,111,32,97,118,97,105,108,97,98,108,101,117,110,
+100,101,114,32,99,111,110,115,116,114,117,99,116,105,111,110,114,101,115,116,111
+,114,97,116,105,111,110,32,111,102,32,116,104,101,116,104,101,32,103,101,110,101
+,114,97,108,32,112,117,98,108,105,99,105,115,32,97,108,109,111,115,116,32,101,
+110,116,105,114,101,108,121,112,97,115,115,101,115,32,116,104,114,111,117,103,
+104,32,116,104,101,104,97,115,32,98,101,101,110,32,115,117,103,103,101,115,116,
+101,100,99,111,109,112,117,116,101,114,32,97,110,100,32,118,105,100,101,111,71,
+101,114,109,97,110,105,99,32,108,97,110,103,117,97,103,101,115,32,97,99,99,111,
+114,100,105,110,103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110
+,116,32,102,114,111,109,32,116,104,101,115,104,111,114,116,108,121,32,97,102,116
+,101,114,119,97,114,100,115,104,114,101,102,61,34,104,116,116,112,115,58,47,47,
+119,119,119,46,114,101,99,101,110,116,32,100,101,118,101,108,111,112,109,101,110
+,116,66,111,97,114,100,32,111,102,32,68,105,114,101,99,116,111,114,115,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,114,99,104,124,32,60,97,32,104,114
+,101,102,61,34,104,116,116,112,58,47,47,73,110,32,112,97,114,116,105,99,117,108,
+97,114,44,32,116,104,101,77,117,108,116,105,112,108,101,32,102,111,111,116,110,
+111,116,101,115,111,114,32,111,116,104,101,114,32,115,117,98,115,116,97,110,99,
+101,116,104,111,117,115,97,110,100,115,32,111,102,32,121,101,97,114,115,116,114,
+97,110,115,108,97,116,105,111,110,32,111,102,32,116,104,101,60,47,100,105,118,62
+,13,10,60,47,100,105,118,62,13,10,13,10,60,97,32,104,114,101,102,61,34,105,110,
+100,101,120,46,112,104,112,119,97,115,32,101,115,116,97,98,108,105,115,104,101,
+100,32,105,110,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,
+112,97,114,116,105,99,105,112,97,116,101,32,105,110,32,116,104,101,97,32,115,116
+,114,111,110,103,32,105,110,102,108,117,101,110,99,101,115,116,121,108,101,61,34
+,109,97,114,103,105,110,45,116,111,112,58,114,101,112,114,101,115,101,110,116,
+101,100,32,98,121,32,116,104,101,103,114,97,100,117,97,116,101,100,32,102,114,
+111,109,32,116,104,101,84,114,97,100,105,116,105,111,110,97,108,108,121,44,32,
+116,104,101,69,108,101,109,101,110,116,40,34,115,99,114,105,112,116,34,41,59,72,
+111,119,101,118,101,114,44,32,115,105,110,99,101,32,116,104,101,47,100,105,118,
+62,10,60,47,100,105,118,62,10,60,100,105,118,32,108,101,102,116,59,32,109,97,114
+,103,105,110,45,108,101,102,116,58,112,114,111,116,101,99,116,105,111,110,32,97,
+103,97,105,110,115,116,48,59,32,118,101,114,116,105,99,97,108,45,97,108,105,103,
+110,58,85,110,102,111,114,116,117,110,97,116,101,108,121,44,32,116,104,101,116,
+121,112,101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,47,100,105,118,62,
+10,60,100,105,118,32,99,108,97,115,115,61,34,32,99,108,97,115,115,61,34,99,108,
+101,97,114,102,105,120,34,62,60,100,105,118,32,99,108,97,115,115,61,34,102,111,
+111,116,101,114,9,9,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,116,104,
+101,32,109,111,116,105,111,110,32,112,105,99,116,117,114,101,208,145,209,138,208
+,187,208,179,208,176,209,128,209,129,208,186,208,184,208,177,209,138,208,187,208
+,179,208,176,209,128,209,129,208,186,208,184,208,164,208,181,208,180,208,181,209
+,128,208,176,209,134,208,184,208,184,208,189,208,181,209,129,208,186,208,190,208
+,187,209,140,208,186,208,190,209,129,208,190,208,190,208,177,209,137,208,181,208
+,189,208,184,208,181,209,129,208,190,208,190,208,177,209,137,208,181,208,189,208
+,184,209,143,208,191,209,128,208,190,208,179,209,128,208,176,208,188,208,188,209
+,139,208,158,209,130,208,191,209,128,208,176,208,178,208,184,209,130,209,140,208
+,177,208,181,209,129,208,191,208,187,208,176,209,130,208,189,208,190,208,188,208
+,176,209,130,208,181,209,128,208,184,208,176,208,187,209,139,208,191,208,190,208
+,183,208,178,208,190,208,187,209,143,208,181,209,130,208,191,208,190,209,129,208
+,187,208,181,208,180,208,189,208,184,208,181,209,128,208,176,208,183,208,187,208
+,184,209,135,208,189,209,139,209,133,208,191,209,128,208,190,208,180,209,131,208
+,186,209,134,208,184,208,184,208,191,209,128,208,190,208,179,209,128,208,176,208
+,188,208,188,208,176,208,191,208,190,208,187,208,189,208,190,209,129,209,130,209
+,140,209,142,208,189,208,176,209,133,208,190,208,180,208,184,209,130,209,129,209
+,143,208,184,208,183,208,177,209,128,208,176,208,189,208,189,208,190,208,181,208
+,189,208,176,209,129,208,181,208,187,208,181,208,189,208,184,209,143,208,184,208
+,183,208,188,208,181,208,189,208,181,208,189,208,184,209,143,208,186,208,176,209
+,130,208,181,208,179,208,190,209,128,208,184,208,184,208,144,208,187,208,181,208
+,186,209,129,208,176,208,189,208,180,209,128,224,164,166,224,165,141,224,164,181
+,224,164,190,224,164,176,224,164,190,224,164,174,224,165,136,224,164,168,224,165
+,129,224,164,133,224,164,178,224,164,170,224,165,141,224,164,176,224,164,166,224
+,164,190,224,164,168,224,164,173,224,164,190,224,164,176,224,164,164,224,165,128
+,224,164,175,224,164,133,224,164,168,224,165,129,224,164,166,224,165,135,224,164
+,182,224,164,185,224,164,191,224,164,168,224,165,141,224,164,166,224,165,128,224
+,164,135,224,164,130,224,164,161,224,164,191,224,164,175,224,164,190,224,164,166
+,224,164,191,224,164,178,224,165,141,224,164,178,224,165,128,224,164,133,224,164
+,167,224,164,191,224,164,149,224,164,190,224,164,176,224,164,181,224,165,128,224
+,164,161,224,164,191,224,164,175,224,165,139,224,164,154,224,164,191,224,164,159
+,224,165,141,224,164,160,224,165,135,224,164,184,224,164,174,224,164,190,224,164
+,154,224,164,190,224,164,176,224,164,156,224,164,130,224,164,149,224,165,141,224
+,164,182,224,164,168,224,164,166,224,165,129,224,164,168,224,164,191,224,164,175
+,224,164,190,224,164,170,224,165,141,224,164,176,224,164,175,224,165,139,224,164
+,151,224,164,133,224,164,168,224,165,129,224,164,184,224,164,190,224,164,176,224
+,164,145,224,164,168,224,164,178,224,164,190,224,164,135,224,164,168,224,164,170
+,224,164,190,224,164,176,224,165,141,224,164,159,224,165,128,224,164,182,224,164
+,176,224,165,141,224,164,164,224,165,139,224,164,130,224,164,178,224,165,139,224
+,164,149,224,164,184,224,164,173,224,164,190,224,164,171,224,164,188,224,165,141
+,224,164,178,224,165,136,224,164,182,224,164,182,224,164,176,224,165,141,224,164
+,164,224,165,135,224,164,130,224,164,170,224,165,141,224,164,176,224,164,166,224
+,165,135,224,164,182,224,164,170,224,165,141,224,164,178,224,165,135,224,164,175
+,224,164,176,224,164,149,224,165,135,224,164,130,224,164,166,224,165,141,224,164
+,176,224,164,184,224,165,141,224,164,165,224,164,191,224,164,164,224,164,191,224
+,164,137,224,164,164,224,165,141,224,164,170,224,164,190,224,164,166,224,164,137
+,224,164,168,224,165,141,224,164,185,224,165,135,224,164,130,224,164,154,224,164
+,191,224,164,159,224,165,141,224,164,160,224,164,190,224,164,175,224,164,190,224
+,164,164,224,165,141,224,164,176,224,164,190,224,164,156,224,165,141,224,164,175
+,224,164,190,224,164,166,224,164,190,224,164,170,224,165,129,224,164,176,224,164
+,190,224,164,168,224,165,135,224,164,156,224,165,139,224,164,161,224,164,188,224
+,165,135,224,164,130,224,164,133,224,164,168,224,165,129,224,164,181,224,164,190
+,224,164,166,224,164,182,224,165,141,224,164,176,224,165,135,224,164,163,224,165
+,128,224,164,182,224,164,191,224,164,149,224,165,141,224,164,183,224,164,190,224
+,164,184,224,164,176,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184
+,224,164,130,224,164,151,224,165,141,224,164,176,224,164,185,224,164,170,224,164
+,176,224,164,191,224,164,163,224,164,190,224,164,174,224,164,172,224,165,141,224
+,164,176,224,164,190,224,164,130,224,164,161,224,164,172,224,164,154,224,165,141
+,224,164,154,224,165,139,224,164,130,224,164,137,224,164,170,224,164,178,224,164
+,172,224,165,141,224,164,167,224,164,174,224,164,130,224,164,164,224,165,141,224
+,164,176,224,165,128,224,164,184,224,164,130,224,164,170,224,164,176,224,165,141
+,224,164,149,224,164,137,224,164,174,224,165,141,224,164,174,224,165,128,224,164
+,166,224,164,174,224,164,190,224,164,167,224,165,141,224,164,175,224,164,174,224
+,164,184,224,164,185,224,164,190,224,164,175,224,164,164,224,164,190,224,164,182
+,224,164,172,224,165,141,224,164,166,224,165,139,224,164,130,224,164,174,224,165
+,128,224,164,161,224,164,191,224,164,175,224,164,190,224,164,134,224,164,136,224
+,164,170,224,165,128,224,164,143,224,164,178,224,164,174,224,165,139,224,164,172
+,224,164,190,224,164,135,224,164,178,224,164,184,224,164,130,224,164,150,224,165
+,141,224,164,175,224,164,190,224,164,134,224,164,170,224,164,176,224,165,135,224
+,164,182,224,164,168,224,164,133,224,164,168,224,165,129,224,164,172,224,164,130
+,224,164,167,224,164,172,224,164,190,224,164,156,224,164,188,224,164,190,224,164
+,176,224,164,168,224,164,181,224,165,128,224,164,168,224,164,164,224,164,174,224
+,164,170,224,165,141,224,164,176,224,164,174,224,165,129,224,164,150,224,164,170
+,224,165,141,224,164,176,224,164,182,224,165,141,224,164,168,224,164,170,224,164
+,176,224,164,191,224,164,181,224,164,190,224,164,176,224,164,168,224,165,129,224
+,164,149,224,164,184,224,164,190,224,164,168,224,164,184,224,164,174,224,164,176
+,224,165,141,224,164,165,224,164,168,224,164,134,224,164,175,224,165,139,224,164
+,156,224,164,191,224,164,164,224,164,184,224,165,139,224,164,174,224,164,181,224
+,164,190,224,164,176,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216
+,167,216,170,216,167,217,132,217,133,217,134,216,170,216,175,217,138,216,167,216
+,170,216,167,217,132,217,131,217,133,216,168,217,138,217,136,216,170,216,177,216
+,167,217,132,217,133,216,180,216,167,217,135,216,175,216,167,216,170,216,185,216
+,175,216,175,216,167,217,132,216,178,217,136,216,167,216,177,216,185,216,175,216
+,175,216,167,217,132,216,177,216,175,217,136,216,175,216,167,217,132,216,165,216
+,179,217,132,216,167,217,133,217,138,216,169,216,167,217,132,217,129,217,136,216
+,170,217,136,216,180,217,136,216,168,216,167,217,132,217,133,216,179,216,167,216
+,168,217,130,216,167,216,170,216,167,217,132,217,133,216,185,217,132,217,136,217
+,133,216,167,216,170,216,167,217,132,217,133,216,179,217,132,216,179,217,132,216
+,167,216,170,216,167,217,132,216,172,216,177,216,167,217,129,217,138,217,131,216
+,179,216,167,217,132,216,167,216,179,217,132,216,167,217,133,217,138,216,169,216
+,167,217,132,216,167,216,170,216,181,216,167,217,132,216,167,216,170,107,101,121
+,119,111,114,100,115,34,32,99,111,110,116,101,110,116,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,62,60,97,32,116,97,114,103,101,116,
+61,34,95,98,108,97,110,107,34,32,116,101,120,116,47,104,116,109,108,59,32,99,104
+,97,114,115,101,116,61,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,
+34,62,60,116,97,98,108,101,32,99,101,108,108,112,97,100,100,105,110,103,61,34,97
+,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,116,101,120,
+116,45,97,108,105,103,110,58,32,99,101,110,116,101,114,59,116,111,32,108,97,115,
+116,32,118,101,114,115,105,111,110,32,98,121,32,98,97,99,107,103,114,111,117,110
+,100,45,99,111,108,111,114,58,32,35,34,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,32
+,105,100,61,60,97,32,104,114,101,102,61,34,35,34,32,99,108,97,115,115,61,34,34,
+62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,99,114,105,112,
+116,34,32,115,114,99,61,34,104,116,116,112,58,47,47,10,60,115,99,114,105,112,116
+,32,108,97,110,103,117,97,103,101,61,34,47,47,69,78,34,32,34,104,116,116,112,58,
+47,47,119,119,119,46,119,101,110,99,111,100,101,85,82,73,67,111,109,112,111,110,
+101,110,116,40,34,32,104,114,101,102,61,34,106,97,118,97,115,99,114,105,112,116,
+58,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,116,101,110,116,100,111,
+99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,115,99,112,111,115,105,
+116,105,111,110,58,32,97,98,115,111,108,117,116,101,59,115,99,114,105,112,116,32
+,115,114,99,61,34,104,116,116,112,58,47,47,32,115,116,121,108,101,61,34,109,97,
+114,103,105,110,45,116,111,112,58,46,109,105,110,46,106,115,34,62,60,47,115,99,
+114,105,112,116,62,10,60,47,100,105,118,62,10,60,100,105,118,32,99,108,97,115,
+115,61,34,119,51,46,111,114,103,47,49,57,57,57,47,120,104,116,109,108,34,32,10,
+13,10,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,62,100,105,115,116,105
+,110,99,116,105,111,110,32,98,101,116,119,101,101,110,47,34,32,116,97,114,103,
+101,116,61,34,95,98,108,97,110,107,34,62,60,108,105,110,107,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,101,110,99,111,100,105,110,103,61,34,117,116,102,
+45,56,34,63,62,10,119,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,
+101,114,63,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,119,119,119,46,
+105,99,111,110,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,32,115,116,
+121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,58,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,32,47,62,10,109,101,116,97,32,112,114,111,112,
+101,114,116,121,61,34,111,103,58,116,60,105,110,112,117,116,32,116,121,112,101,
+61,34,116,101,120,116,34,32,32,115,116,121,108,101,61,34,116,101,120,116,45,97,
+108,105,103,110,58,116,104,101,32,100,101,118,101,108,111,112,109,101,110,116,32
+,111,102,32,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,105,115
+,32,99,111,110,115,105,100,101,114,101,100,32,116,111,32,98,101,116,97,98,108,
+101,32,119,105,100,116,104,61,34,49,48,48,37,34,32,73,110,32,97,100,100,105,116,
+105,111,110,32,116,111,32,116,104,101,32,99,111,110,116,114,105,98,117,116,101,
+100,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,115,32,
+98,101,116,119,101,101,110,100,101,118,101,108,111,112,109,101,110,116,32,111,
+102,32,116,104,101,32,73,116,32,105,115,32,105,109,112,111,114,116,97,110,116,32
+,116,111,32,60,47,115,99,114,105,112,116,62,10,10,60,115,99,114,105,112,116,32,
+32,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,49,62,60,47,
+115,112,97,110,62,60,115,112,97,110,32,105,100,61,103,98,76,105,98,114,97,114,
+121,32,111,102,32,67,111,110,103,114,101,115,115,60,105,109,103,32,115,114,99,61
+,34,104,116,116,112,58,47,47,105,109,69,110,103,108,105,115,104,32,116,114,97,
+110,115,108,97,116,105,111,110,65,99,97,100,101,109,121,32,111,102,32,83,99,105,
+101,110,99,101,115,100,105,118,32,115,116,121,108,101,61,34,100,105,115,112,108,
+97,121,58,99,111,110,115,116,114,117,99,116,105,111,110,32,111,102,32,116,104,
+101,46,103,101,116,69,108,101,109,101,110,116,66,121,73,100,40,105,100,41,105,
+110,32,99,111,110,106,117,110,99,116,105,111,110,32,119,105,116,104,69,108,101,
+109,101,110,116,40,39,115,99,114,105,112,116,39,41,59,32,60,109,101,116,97,32,
+112,114,111,112,101,114,116,121,61,34,111,103,58,208,145,209,138,208,187,208,179
+,208,176,209,128,209,129,208,186,208,184,10,32,116,121,112,101,61,34,116,101,120
+,116,34,32,110,97,109,101,61,34,62,80,114,105,118,97,99,121,32,80,111,108,105,99
+,121,60,47,97,62,97,100,109,105,110,105,115,116,101,114,101,100,32,98,121,32,116
+,104,101,101,110,97,98,108,101,83,105,110,103,108,101,82,101,113,117,101,115,116
+,115,116,121,108,101,61,38,113,117,111,116,59,109,97,114,103,105,110,58,60,47,
+100,105,118,62,60,47,100,105,118,62,60,47,100,105,118,62,60,62,60,105,109,103,32
+,115,114,99,61,34,104,116,116,112,58,47,47,105,32,115,116,121,108,101,61,38,113,
+117,111,116,59,102,108,111,97,116,58,114,101,102,101,114,114,101,100,32,116,111,
+32,97,115,32,116,104,101,32,116,111,116,97,108,32,112,111,112,117,108,97,116,105
+,111,110,32,111,102,105,110,32,87,97,115,104,105,110,103,116,111,110,44,32,68,46
+,67,46,32,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45,97,
+109,111,110,103,32,111,116,104,101,114,32,116,104,105,110,103,115,44,111,114,103
+,97,110,105,122,97,116,105,111,110,32,111,102,32,116,104,101,112,97,114,116,105,
+99,105,112,97,116,101,100,32,105,110,32,116,104,101,116,104,101,32,105,110,116,
+114,111,100,117,99,116,105,111,110,32,111,102,105,100,101,110,116,105,102,105,
+101,100,32,119,105,116,104,32,116,104,101,102,105,99,116,105,111,110,97,108,32,
+99,104,97,114,97,99,116,101,114,32,79,120,102,111,114,100,32,85,110,105,118,101,
+114,115,105,116,121,32,109,105,115,117,110,100,101,114,115,116,97,110,100,105,
+110,103,32,111,102,84,104,101,114,101,32,97,114,101,44,32,104,111,119,101,118,
+101,114,44,115,116,121,108,101,115,104,101,101,116,34,32,104,114,101,102,61,34,
+47,67,111,108,117,109,98,105,97,32,85,110,105,118,101,114,115,105,116,121,101,
+120,112,97,110,100,101,100,32,116,111,32,105,110,99,108,117,100,101,117,115,117,
+97,108,108,121,32,114,101,102,101,114,114,101,100,32,116,111,105,110,100,105,99,
+97,116,105,110,103,32,116,104,97,116,32,116,104,101,104,97,118,101,32,115,117,
+103,103,101,115,116,101,100,32,116,104,97,116,97,102,102,105,108,105,97,116,101,
+100,32,119,105,116,104,32,116,104,101,99,111,114,114,101,108,97,116,105,111,110,
+32,98,101,116,119,101,101,110,110,117,109,98,101,114,32,111,102,32,100,105,102,
+102,101,114,101,110,116,62,60,47,116,100,62,60,47,116,114,62,60,47,116,97,98,108
+,101,62,82,101,112,117,98,108,105,99,32,111,102,32,73,114,101,108,97,110,100,10,
+60,47,115,99,114,105,112,116,62,10,60,115,99,114,105,112,116,32,117,110,100,101,
+114,32,116,104,101,32,105,110,102,108,117,101,110,99,101,99,111,110,116,114,105,
+98,117,116,105,111,110,32,116,111,32,116,104,101,79,102,102,105,99,105,97,108,32
+,119,101,98,115,105,116,101,32,111,102,104,101,97,100,113,117,97,114,116,101,114
+,115,32,111,102,32,116,104,101,99,101,110,116,101,114,101,100,32,97,114,111,117,
+110,100,32,116,104,101,105,109,112,108,105,99,97,116,105,111,110,115,32,111,102,
+32,116,104,101,104,97,118,101,32,98,101,101,110,32,100,101,118,101,108,111,112,
+101,100,70,101,100,101,114,97,108,32,82,101,112,117,98,108,105,99,32,111,102,98,
+101,99,97,109,101,32,105,110,99,114,101,97,115,105,110,103,108,121,99,111,110,
+116,105,110,117,97,116,105,111,110,32,111,102,32,116,104,101,78,111,116,101,44,
+32,104,111,119,101,118,101,114,44,32,116,104,97,116,115,105,109,105,108,97,114,
+32,116,111,32,116,104,97,116,32,111,102,32,99,97,112,97,98,105,108,105,116,105,
+101,115,32,111,102,32,116,104,101,97,99,99,111,114,100,97,110,99,101,32,119,105,
+116,104,32,116,104,101,112,97,114,116,105,99,105,112,97,110,116,115,32,105,110,
+32,116,104,101,102,117,114,116,104,101,114,32,100,101,118,101,108,111,112,109,
+101,110,116,117,110,100,101,114,32,116,104,101,32,100,105,114,101,99,116,105,111
+,110,105,115,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,101,100,
+104,105,115,32,121,111,117,110,103,101,114,32,98,114,111,116,104,101,114,60,47,
+116,100,62,60,47,116,114,62,60,47,116,97,98,108,101,62,60,97,32,104,116,116,112,
+45,101,113,117,105,118,61,34,88,45,85,65,45,112,104,121,115,105,99,97,108,32,112
+,114,111,112,101,114,116,105,101,115,111,102,32,66,114,105,116,105,115,104,32,67
+,111,108,117,109,98,105,97,104,97,115,32,98,101,101,110,32,99,114,105,116,105,99
+,105,122,101,100,40,119,105,116,104,32,116,104,101,32,101,120,99,101,112,116,105
+,111,110,113,117,101,115,116,105,111,110,115,32,97,98,111,117,116,32,116,104,101
+,112,97,115,115,105,110,103,32,116,104,114,111,117,103,104,32,116,104,101,48,34,
+32,99,101,108,108,112,97,100,100,105,110,103,61,34,48,34,32,116,104,111,117,115,
+97,110,100,115,32,111,102,32,112,101,111,112,108,101,114,101,100,105,114,101,99,
+116,115,32,104,101,114,101,46,32,70,111,114,104,97,118,101,32,99,104,105,108,100
+,114,101,110,32,117,110,100,101,114,37,51,69,37,51,67,47,115,99,114,105,112,116,
+37,51,69,34,41,41,59,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119
+,119,119,46,60,108,105,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,
+47,115,105,116,101,95,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,116,
+101,120,116,45,100,101,99,111,114,97,116,105,111,110,58,110,111,110,101,115,116,
+121,108,101,61,34,100,105,115,112,108,97,121,58,32,110,111,110,101,60,109,101,
+116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,88,45,110,101,119,32,68,
+97,116,101,40,41,46,103,101,116,84,105,109,101,40,41,32,116,121,112,101,61,34,
+105,109,97,103,101,47,120,45,105,99,111,110,34,60,47,115,112,97,110,62,60,115,
+112,97,110,32,99,108,97,115,115,61,34,108,97,110,103,117,97,103,101,61,34,106,97
+,118,97,115,99,114,105,112,116,119,105,110,100,111,119,46,108,111,99,97,116,105,
+111,110,46,104,114,101,102,60,97,32,104,114,101,102,61,34,106,97,118,97,115,99,
+114,105,112,116,58,45,45,62,13,10,60,115,99,114,105,112,116,32,116,121,112,101,
+61,34,116,60,97,32,104,114,101,102,61,39,104,116,116,112,58,47,47,119,119,119,46
+,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,114,101,102,61,34,60,47,
+100,105,118,62,13,10,60,100,105,118,32,99,108,97,115,115,61,34,60,115,99,114,105
+,112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,34,32,114,101,108,61,34,
+115,116,121,108,101,115,104,101,101,116,34,32,116,60,47,100,105,118,62,10,60,115
+,99,114,105,112,116,32,116,121,112,101,61,47,97,62,32,60,97,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,32,97,108,108,111,119,84,114,97,110,115,112,97,
+114,101,110,99,121,61,34,88,45,85,65,45,67,111,109,112,97,116,105,98,108,101,34,
+32,99,111,110,114,101,108,97,116,105,111,110,115,104,105,112,32,98,101,116,119,
+101,101,110,10,60,47,115,99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,
+32,60,47,97,62,60,47,108,105,62,60,47,117,108,62,60,47,100,105,118,62,97,115,115
+,111,99,105,97,116,101,100,32,119,105,116,104,32,116,104,101,32,112,114,111,103,
+114,97,109,109,105,110,103,32,108,97,110,103,117,97,103,101,60,47,97,62,60,97,32
+,104,114,101,102,61,34,104,116,116,112,58,47,47,60,47,97,62,60,47,108,105,62,60,
+108,105,32,99,108,97,115,115,61,34,102,111,114,109,32,97,99,116,105,111,110,61,
+34,104,116,116,112,58,47,47,60,100,105,118,32,115,116,121,108,101,61,34,100,105,
+115,112,108,97,121,58,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101
+,61,34,113,34,60,116,97,98,108,101,32,119,105,100,116,104,61,34,49,48,48,37,34,
+32,98,97,99,107,103,114,111,117,110,100,45,112,111,115,105,116,105,111,110,58,34
+,32,98,111,114,100,101,114,61,34,48,34,32,119,105,100,116,104,61,34,114,101,108,
+61,34,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,54,62,60,117,
+108,62,60,108,105,62,60,97,32,104,114,101,102,61,34,32,32,60,109,101,116,97,32,
+104,116,116,112,45,101,113,117,105,118,61,34,99,115,115,34,32,109,101,100,105,97
+,61,34,115,99,114,101,101,110,34,32,114,101,115,112,111,110,115,105,98,108,101,
+32,102,111,114,32,116,104,101,32,34,32,116,121,112,101,61,34,97,112,112,108,105,
+99,97,116,105,111,110,47,34,32,115,116,121,108,101,61,34,98,97,99,107,103,114,
+111,117,110,100,45,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,
+102,45,56,34,32,97,108,108,111,119,116,114,97,110,115,112,97,114,101,110,99,121,
+61,34,115,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,13,10,60,109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,62,
+60,47,115,112,97,110,62,60,115,112,97,110,32,99,108,97,115,115,61,34,48,34,32,99
+,101,108,108,115,112,97,99,105,110,103,61,34,48,34,62,59,10,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,115,111,109,101,116,105,109,101,115,
+32,99,97,108,108,101,100,32,116,104,101,100,111,101,115,32,110,111,116,32,110,
+101,99,101,115,115,97,114,105,108,121,70,111,114,32,109,111,114,101,32,105,110,
+102,111,114,109,97,116,105,111,110,97,116,32,116,104,101,32,98,101,103,105,110,
+110,105,110,103,32,111,102,32,60,33,68,79,67,84,89,80,69,32,104,116,109,108,62,
+60,104,116,109,108,112,97,114,116,105,99,117,108,97,114,108,121,32,105,110,32,
+116,104,101,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,97,109,
+101,61,34,106,97,118,97,115,99,114,105,112,116,58,118,111,105,100,40,48,41,59,34
+,101,102,102,101,99,116,105,118,101,110,101,115,115,32,111,102,32,116,104,101,32
+,97,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,103,101,
+110,101,114,97,108,108,121,32,99,111,110,115,105,100,101,114,101,100,62,60,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,34,62,60,47,115,
+99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,116,104,114,111,117,103,
+104,111,117,116,32,116,104,101,32,119,111,114,108,100,99,111,109,109,111,110,32,
+109,105,115,99,111,110,99,101,112,116,105,111,110,97,115,115,111,99,105,97,116,
+105,111,110,32,119,105,116,104,32,116,104,101,60,47,100,105,118,62,10,60,47,100,
+105,118,62,10,60,100,105,118,32,99,100,117,114,105,110,103,32,104,105,115,32,108
+,105,102,101,116,105,109,101,44,99,111,114,114,101,115,112,111,110,100,105,110,
+103,32,116,111,32,116,104,101,116,121,112,101,61,34,105,109,97,103,101,47,120,45
+,105,99,111,110,34,32,97,110,32,105,110,99,114,101,97,115,105,110,103,32,110,117
+,109,98,101,114,100,105,112,108,111,109,97,116,105,99,32,114,101,108,97,116,105,
+111,110,115,97,114,101,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,
+101,100,109,101,116,97,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,
+32,60,105,110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,101,120
+,97,109,112,108,101,115,32,105,110,99,108,117,100,101,32,116,104,101,34,62,60,
+105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,105,112,97,114,116,105,
+99,105,112,97,116,105,111,110,32,105,110,32,116,104,101,116,104,101,32,101,115,
+116,97,98,108,105,115,104,109,101,110,116,32,111,102,10,60,47,100,105,118,62,10,
+60,100,105,118,32,99,108,97,115,115,61,34,38,97,109,112,59,110,98,115,112,59,38,
+97,109,112,59,110,98,115,112,59,116,111,32,100,101,116,101,114,109,105,110,101,
+32,119,104,101,116,104,101,114,113,117,105,116,101,32,100,105,102,102,101,114,
+101,110,116,32,102,114,111,109,109,97,114,107,101,100,32,116,104,101,32,98,101,
+103,105,110,110,105,110,103,100,105,115,116,97,110,99,101,32,98,101,116,119,101,
+101,110,32,116,104,101,99,111,110,116,114,105,98,117,116,105,111,110,115,32,116,
+111,32,116,104,101,99,111,110,102,108,105,99,116,32,98,101,116,119,101,101,110,
+32,116,104,101,119,105,100,101,108,121,32,99,111,110,115,105,100,101,114,101,100
+,32,116,111,119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,102,105,114,
+115,116,119,105,116,104,32,118,97,114,121,105,110,103,32,100,101,103,114,101,101
+,115,104,97,118,101,32,115,112,101,99,117,108,97,116,101,100,32,116,104,97,116,
+40,100,111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,112,
+97,114,116,105,99,105,112,97,116,105,110,103,32,105,110,32,116,104,101,111,114,
+105,103,105,110,97,108,108,121,32,100,101,118,101,108,111,112,101,100,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,32,116,121,112,101,
+61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,105,110,116,101,114,99,104,97
+,110,103,101,97,98,108,121,32,119,105,116,104,109,111,114,101,32,99,108,111,115,
+101,108,121,32,114,101,108,97,116,101,100,115,111,99,105,97,108,32,97,110,100,32
+,112,111,108,105,116,105,99,97,108,116,104,97,116,32,119,111,117,108,100,32,111,
+116,104,101,114,119,105,115,101,112,101,114,112,101,110,100,105,99,117,108,97,
+114,32,116,111,32,116,104,101,115,116,121,108,101,32,116,121,112,101,61,34,116,
+101,120,116,47,99,115,115,116,121,112,101,61,34,115,117,98,109,105,116,34,32,110
+,97,109,101,61,34,102,97,109,105,108,105,101,115,32,114,101,115,105,100,105,110,
+103,32,105,110,100,101,118,101,108,111,112,105,110,103,32,99,111,117,110,116,114
+,105,101,115,99,111,109,112,117,116,101,114,32,112,114,111,103,114,97,109,109,
+105,110,103,101,99,111,110,111,109,105,99,32,100,101,118,101,108,111,112,109,101
+,110,116,100,101,116,101,114,109,105,110,97,116,105,111,110,32,111,102,32,116,
+104,101,102,111,114,32,109,111,114,101,32,105,110,102,111,114,109,97,116,105,111
+,110,111,110,32,115,101,118,101,114,97,108,32,111,99,99,97,115,105,111,110,115,
+112,111,114,116,117,103,117,195,170,115,32,40,69,117,114,111,112,101,117,41,208,
+163,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,209,
+131,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,208,
+160,208,190,209,129,209,129,208,184,208,185,209,129,208,186,208,190,208,185,208,
+188,208,176,209,130,208,181,209,128,208,184,208,176,208,187,208,190,208,178,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,208,184,209,
+131,208,191,209,128,208,176,208,178,208,187,208,181,208,189,208,184,209,143,208,
+189,208,181,208,190,208,177,209,133,208,190,208,180,208,184,208,188,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+152,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+160,208,181,209,129,208,191,209,131,208,177,208,187,208,184,208,186,208,184,208,
+186,208,190,208,187,208,184,209,135,208,181,209,129,209,130,208,178,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,142,209,
+130,208,181,209,128,209,128,208,184,209,130,208,190,209,128,208,184,208,184,208,
+180,208,190,209,129,209,130,208,176,209,130,208,190,209,135,208,189,208,190,216,
+167,217,132,217,133,216,170,217,136,216,167,216,172,216,175,217,136,217,134,216,
+167,217,132,216,167,216,180,216,170,216,177,216,167,217,131,216,167,216,170,216,
+167,217,132,216,167,217,130,216,170,216,177,216,167,216,173,216,167,216,170,104,
+116,109,108,59,32,99,104,97,114,115,101,116,61,85,84,70,45,56,34,32,115,101,116,
+84,105,109,101,111,117,116,40,102,117,110,99,116,105,111,110,40,41,100,105,115,
+112,108,97,121,58,105,110,108,105,110,101,45,98,108,111,99,107,59,60,105,110,112
+,117,116,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,116,121,112,101,
+32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,60,105,109,103,32,
+115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,46,34,32,34,104,116,116,
+112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,115,104,111,114,116,99,117,
+116,32,105,99,111,110,34,32,104,114,101,102,61,34,34,32,97,117,116,111,99,111,
+109,112,108,101,116,101,61,34,111,102,102,34,32,60,47,97,62,60,47,100,105,118,62
+,60,100,105,118,32,99,108,97,115,115,61,60,47,97,62,60,47,108,105,62,10,60,108,
+105,32,99,108,97,115,115,61,34,99,115,115,34,32,116,121,112,101,61,34,116,101,
+120,116,47,99,115,115,34,32,60,102,111,114,109,32,97,99,116,105,111,110,61,34,
+104,116,116,112,58,47,47,120,116,47,99,115,115,34,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,108,105,110,107,32,114,101,108,61,34,97,108,116,101,114,110
+,97,116,101,34,32,13,10,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,
+101,120,116,47,32,111,110,99,108,105,99,107,61,34,106,97,118,97,115,99,114,105,
+112,116,58,40,110,101,119,32,68,97,116,101,41,46,103,101,116,84,105,109,101,40,
+41,125,104,101,105,103,104,116,61,34,49,34,32,119,105,100,116,104,61,34,49,34,32
+,80,101,111,112,108,101,39,115,32,82,101,112,117,98,108,105,99,32,111,102,32,32,
+60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,116,101,
+120,116,45,100,101,99,111,114,97,116,105,111,110,58,117,110,100,101,114,116,104,
+101,32,98,101,103,105,110,110,105,110,103,32,111,102,32,116,104,101,32,60,47,100
+,105,118,62,10,60,47,100,105,118,62,10,60,47,100,105,118,62,10,101,115,116,97,98
+,108,105,115,104,109,101,110,116,32,111,102,32,116,104,101,32,60,47,100,105,118,
+62,60,47,100,105,118,62,60,47,100,105,118,62,60,47,100,35,118,105,101,119,112,
+111,114,116,123,109,105,110,45,104,101,105,103,104,116,58,10,60,115,99,114,105,
+112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,111,112,116,105,111,110,62,
+60,111,112,116,105,111,110,32,118,97,108,117,101,61,111,102,116,101,110,32,114,
+101,102,101,114,114,101,100,32,116,111,32,97,115,32,47,111,112,116,105,111,110,
+62,10,60,111,112,116,105,111,110,32,118,97,108,117,60,33,68,79,67,84,89,80,69,32
+,104,116,109,108,62,10,60,33,45,45,91,73,110,116,101,114,110,97,116,105,111,110,
+97,108,32,65,105,114,112,111,114,116,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,60,47,97,62,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,224,184,160,224,184,178,224,184,169,224,184,178,224,185
+,132,224,184,151,224,184,162,225,131,165,225,131,144,225,131,160,225,131,151,225
+,131,163,225,131,154,225,131,152,230,173,163,233,171,148,228,184,173,230,150,135
+,32,40,231,185,129,233,171,148,41,224,164,168,224,164,191,224,164,176,224,165,
+141,224,164,166,224,165,135,224,164,182,224,164,161,224,164,190,224,164,137,224,
+164,168,224,164,178,224,165,139,224,164,161,224,164,149,224,165,141,224,164,183,
+224,165,135,224,164,164,224,165,141,224,164,176,224,164,156,224,164,190,224,164,
+168,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184,224,164,130,224,
+164,172,224,164,130,224,164,167,224,164,191,224,164,164,224,164,184,224,165,141,
+224,164,165,224,164,190,224,164,170,224,164,168,224,164,190,224,164,184,224,165,
+141,224,164,181,224,165,128,224,164,149,224,164,190,224,164,176,224,164,184,224,
+164,130,224,164,184,224,165,141,224,164,149,224,164,176,224,164,163,224,164,184,
+224,164,190,224,164,174,224,164,151,224,165,141,224,164,176,224,165,128,224,164,
+154,224,164,191,224,164,159,224,165,141,224,164,160,224,165,139,224,164,130,224,
+164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164,168,
+224,164,133,224,164,174,224,165,135,224,164,176,224,164,191,224,164,149,224,164,
+190,224,164,181,224,164,191,224,164,173,224,164,191,224,164,168,224,165,141,224,
+164,168,224,164,151,224,164,190,224,164,161,224,164,191,224,164,175,224,164,190,
+224,164,129,224,164,149,224,165,141,224,164,175,224,165,139,224,164,130,224,164,
+149,224,164,191,224,164,184,224,165,129,224,164,176,224,164,149,224,165,141,224,
+164,183,224,164,190,224,164,170,224,164,185,224,165,129,224,164,129,224,164,154,
+224,164,164,224,165,128,224,164,170,224,165,141,224,164,176,224,164,172,224,164,
+130,224,164,167,224,164,168,224,164,159,224,164,191,224,164,170,224,165,141,224,
+164,170,224,164,163,224,165,128,224,164,149,224,165,141,224,164,176,224,164,191,
+224,164,149,224,165,135,224,164,159,224,164,170,224,165,141,224,164,176,224,164,
+190,224,164,176,224,164,130,224,164,173,224,164,170,224,165,141,224,164,176,224,
+164,190,224,164,170,224,165,141,224,164,164,224,164,174,224,164,190,224,164,178,
+224,164,191,224,164,149,224,165,139,224,164,130,224,164,176,224,164,171,224,164,
+188,224,165,141,224,164,164,224,164,190,224,164,176,224,164,168,224,164,191,224,
+164,176,224,165,141,224,164,174,224,164,190,224,164,163,224,164,178,224,164,191,
+224,164,174,224,164,191,224,164,159,224,165,135,224,164,161,100,101,115,99,114,
+105,112,116,105,111,110,34,32,99,111,110,116,101,110,116,61,34,100,111,99,117,
+109,101,110,116,46,108,111,99,97,116,105,111,110,46,112,114,111,116,46,103,101,
+116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,60,33,68,79
+,67,84,89,80,69,32,104,116,109,108,62,10,60,104,116,109,108,32,60,109,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,58,117,114,108,34,32
+,99,111,110,116,101,110,116,61,34,104,116,116,112,58,47,47,46,99,115,115,34,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,115,116,121,108,101
+,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,62,116,121,112,101,61
+,34,116,101,120,116,47,99,115,115,34,32,104,114,101,102,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,32,120,109,108,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,34,32,109,101,116,104
+,111,100,61,34,103,101,116,34,32,97,99,116,105,111,110,61,34,108,105,110,107,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,32,61,32,100,111
+,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,116,121,112,
+101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,34,32,47,62,99,101,108,108
+,112,97,100,100,105,110,103,61,34,48,34,32,99,101,108,108,115,112,46,99,115,115,
+34,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,60,47,97,62,60,
+47,108,105,62,60,108,105,62,60,97,32,104,114,101,102,61,34,34,32,119,105,100,116
+,104,61,34,49,34,32,104,101,105,103,104,116,61,34,49,34,34,62,60,97,32,104,114,
+101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,115,116,121,108,101,61,34,
+100,105,115,112,108,97,121,58,110,111,110,101,59,34,62,97,108,116,101,114,110,97
+,116,101,34,32,116,121,112,101,61,34,97,112,112,108,105,45,47,47,87,51,67,47,47,
+68,84,68,32,88,72,84,77,76,32,49,46,48,32,101,108,108,115,112,97,99,105,110,103,
+61,34,48,34,32,99,101,108,108,112,97,100,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,118,97,108,117,101,61,34,47,97,62,38,110,98,115,112,59,60,115,
+112,97,110,32,114,111,108,101,61,34,115,10,60,105,110,112,117,116,32,116,121,112
+,101,61,34,104,105,100,100,101,110,34,32,108,97,110,103,117,97,103,101,61,34,74,
+97,118,97,83,99,114,105,112,116,34,32,32,100,111,99,117,109,101,110,116,46,103,
+101,116,69,108,101,109,101,110,116,115,66,103,61,34,48,34,32,99,101,108,108,115,
+112,97,99,105,110,103,61,34,48,34,32,121,112,101,61,34,116,101,120,116,47,99,115
+,115,34,32,109,101,100,105,97,61,34,116,121,112,101,61,39,116,101,120,116,47,106
+,97,118,97,115,99,114,105,112,116,39,119,105,116,104,32,116,104,101,32,101,120,
+99,101,112,116,105,111,110,32,111,102,32,121,112,101,61,34,116,101,120,116,47,99
+,115,115,34,32,114,101,108,61,34,115,116,32,104,101,105,103,104,116,61,34,49,34,
+32,119,105,100,116,104,61,34,49,34,32,61,39,43,101,110,99,111,100,101,85,82,73,
+67,111,109,112,111,110,101,110,116,40,60,108,105,110,107,32,114,101,108,61,34,97
+,108,116,101,114,110,97,116,101,34,32,10,98,111,100,121,44,32,116,114,44,32,105,
+110,112,117,116,44,32,116,101,120,116,109,101,116,97,32,110,97,109,101,61,34,114
+,111,98,111,116,115,34,32,99,111,110,109,101,116,104,111,100,61,34,112,111,115,
+116,34,32,97,99,116,105,111,110,61,34,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,46,99,115,115,34,32,114,101,108,61,34,115,116,
+121,108,101,115,104,101,101,116,34,32,60,47,100,105,118,62,60,47,100,105,118,62,
+60,100,105,118,32,99,108,97,115,115,108,97,110,103,117,97,103,101,61,34,106,97,
+118,97,115,99,114,105,112,116,34,62,97,114,105,97,45,104,105,100,100,101,110,61,
+34,116,114,117,101,34,62,194,183,60,114,105,112,116,34,32,116,121,112,101,61,34,
+116,101,120,116,47,106,97,118,97,115,108,61,48,59,125,41,40,41,59,10,40,102,117,
+110,99,116,105,111,110,40,41,123,98,97,99,107,103,114,111,117,110,100,45,105,109
+,97,103,101,58,32,117,114,108,40,47,97,62,60,47,108,105,62,60,108,105,62,60,97,
+32,104,114,101,102,61,34,104,9,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+104,116,116,112,58,47,47,97,116,111,114,34,32,97,114,105,97,45,104,105,100,100,
+101,110,61,34,116,114,117,62,32,60,97,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,108,97,110,103,117,97,103,101,61,34,106,97,118,97,115,99
+,114,105,112,116,34,32,47,111,112,116,105,111,110,62,10,60,111,112,116,105,111,
+110,32,118,97,108,117,101,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,
+32,99,108,97,115,115,61,114,97,116,111,114,34,32,97,114,105,97,45,104,105,100,
+100,101,110,61,34,116,114,101,61,40,110,101,119,32,68,97,116,101,41,46,103,101,
+116,84,105,109,101,40,41,112,111,114,116,117,103,117,195,170,115,32,40,100,111,
+32,66,114,97,115,105,108,41,208,190,209,128,208,179,208,176,208,189,208,184,208,
+183,208,176,209,134,208,184,208,184,208,178,208,190,208,183,208,188,208,190,208,
+182,208,189,208,190,209,129,209,130,209,140,208,190,208,177,209,128,208,176,208,
+183,208,190,208,178,208,176,208,189,208,184,209,143,209,128,208,181,208,179,208,
+184,209,129,209,130,209,128,208,176,209,134,208,184,208,184,208,178,208,190,208,
+183,208,188,208,190,208,182,208,189,208,190,209,129,209,130,208,184,208,190,208,
+177,209,143,208,183,208,176,209,130,208,181,208,187,209,140,208,189,208,176,60,
+33,68,79,67,84,89,80,69,32,104,116,109,108,32,80,85,66,76,73,67,32,34,110,116,45
+,84,121,112,101,34,32,99,111,110,116,101,110,116,61,34,116,101,120,116,47,60,109
+,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,116,101,
+114,97,110,115,105,116,105,111,110,97,108,47,47,69,78,34,32,34,104,116,116,112,
+58,60,104,116,109,108,32,120,109,108,110,115,61,34,104,116,116,112,58,47,47,119,
+119,119,45,47,47,87,51,67,47,47,68,84,68,32,88,72,84,77,76,32,49,46,48,32,84,68,
+84,68,47,120,104,116,109,108,49,45,116,114,97,110,115,105,116,105,111,110,97,108
+,47,47,119,119,119,46,119,51,46,111,114,103,47,84,82,47,120,104,116,109,108,49,
+47,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,
+39,59,60,109,101,116,97,32,110,97,109,101,61,34,100,101,115,99,114,105,112,116,
+105,111,110,112,97,114,101,110,116,78,111,100,101,46,105,110,115,101,114,116,66,
+101,102,111,114,101,60,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,110,97,106,115,34,32,116,121,112,101,61,34,116,101,120,116,47,
+106,97,118,97,115,99,114,105,40,100,111,99,117,109,101,110,116,41,46,114,101,97,
+100,121,40,102,117,110,99,116,105,115,99,114,105,112,116,32,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,105,109,97,103,101,34,32,99,111,110,116,
+101,110,116,61,34,104,116,116,112,58,47,47,85,65,45,67,111,109,112,97,116,105,98
+,108,101,34,32,99,111,110,116,101,110,116,61,116,109,108,59,32,99,104,97,114,115
+,101,116,61,117,116,102,45,56,34,32,47,62,10,108,105,110,107,32,114,101,108,61,
+34,115,104,111,114,116,99,117,116,32,105,99,111,110,60,108,105,110,107,32,114,
+101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,116,121,112,101,61,61,32,100,111,99,
+117,109,101,110,116,46,99,114,101,97,116,101,69,108,101,109,101,110,60,97,32,116
+,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,104,114,101,102,61,32,100,
+111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,115,66,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61
+,97,46,116,121,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,
+105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,
+97,109,101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,
+34,32,47,62,100,116,100,34,62,10,60,104,116,109,108,32,120,109,108,110,115,61,34
+,104,116,116,112,45,47,47,87,51,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,
+32,84,101,110,116,115,66,121,84,97,103,78,97,109,101,40,39,115,99,114,105,112,
+116,39,41,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,
+34,32,110,97,109,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,101,120,
+116,47,106,97,118,97,115,34,32,115,116,121,108,101,61,34,100,105,115,112,108,97,
+121,58,110,111,110,101,59,34,62,100,111,99,117,109,101,110,116,46,103,101,116,69
+,108,101,109,101,110,116,66,121,73,100,40,61,100,111,99,117,109,101,110,116,46,
+99,114,101,97,116,101,69,108,101,109,101,110,116,40,39,32,116,121,112,101,61,39,
+116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,105,110,112,117,116,
+32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61,34,100,46,103,
+101,116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,115,110
+,105,99,97,108,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,
+46,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,32,84,114,97,110,115,105,116,
+60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34
+,62,10,10,60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,
+115,115,34,62,105,111,110,97,108,46,100,116,100,34,62,10,60,104,116,109,108,32,
+120,109,108,110,115,61,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,
+116,101,110,116,45,84,121,112,101,100,105,110,103,61,34,48,34,32,99,101,108,108,
+115,112,97,99,105,110,103,61,34,48,34,104,116,109,108,59,32,99,104,97,114,115,
+101,116,61,117,116,102,45,56,34,32,47,62,10,32,115,116,121,108,101,61,34,100,105
+,115,112,108,97,121,58,110,111,110,101,59,34,62,60,60,108,105,62,60,97,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,32,116,121,112,101,61,
+39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,62,208,180,208,181
+,209,143,209,130,208,181,208,187,209,140,208,189,208,190,209,129,209,130,208,184
+,209,129,208,190,208,190,209,130,208,178,208,181,209,130,209,129,209,130,208,178
+,208,184,208,184,208,191,209,128,208,190,208,184,208,183,208,178,208,190,208,180
+,209,129,209,130,208,178,208,176,208,177,208,181,208,183,208,190,208,191,208,176
+,209,129,208,189,208,190,209,129,209,130,208,184,224,164,170,224,165,129,224,164
+,184,224,165,141,224,164,164,224,164,191,224,164,149,224,164,190,224,164,149,224
+,164,190,224,164,130,224,164,151,224,165,141,224,164,176,224,165,135,224,164,184
+,224,164,137,224,164,168,224,165,141,224,164,185,224,165,139,224,164,130,224,164
+,168,224,165,135,224,164,181,224,164,191,224,164,167,224,164,190,224,164,168,224
+,164,184,224,164,173,224,164,190,224,164,171,224,164,191,224,164,149,224,165,141
+,224,164,184,224,164,191,224,164,130,224,164,151,224,164,184,224,165,129,224,164
+,176,224,164,149,224,165,141,224,164,183,224,164,191,224,164,164,224,164,149,224
+,165,137,224,164,170,224,165,128,224,164,176,224,164,190,224,164,135,224,164,159
+,224,164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164
+,170,224,164,168,224,164,149,224,164,190,224,164,176,224,165,141,224,164,176,224
+,164,181,224,164,190,224,164,136,224,164,184,224,164,149,224,165,141,224,164,176
+,224,164,191,224,164,175,224,164,164,224,164,190
+}
+/* GENERATED CODE END */
+;
+#endif  /* !BROTLI_EXTERNAL_DICTIONARY_DATA */
+
+#if !defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+static const BrotliDictionary kBrotliDictionary = {
+#else
+static BrotliDictionary kBrotliDictionary = {
+#endif
+  /* size_bits_by_length */
+  {
+    0, 0, 0, 0, 10, 10, 11, 11,
+    10, 10, 10, 10, 10, 9, 9, 8,
+    7, 7, 8, 7, 7, 6, 6, 5,
+    5, 0, 0, 0, 0, 0, 0, 0
+  },
+
+  /* offsets_by_length */
+  {
+    0, 0, 0, 0, 0, 4096, 9216, 21504,
+    35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
+    104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
+    122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
+  },
+
+  /* data_size ==  sizeof(kBrotliDictionaryData) */
+  122784,
+
+  /* data */
+#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+  NULL
+#else
+  kBrotliDictionaryData
+#endif
+};
+
+const BrotliDictionary* BrotliGetDictionary(void) {
+  return &kBrotliDictionary;
+}
+
+void BrotliSetDictionaryData(const uint8_t* data) {
+#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+  if (!!data && !kBrotliDictionary.data) {
+    kBrotliDictionary.data = data;
+  }
+#else
+  BROTLI_UNUSED(data);  // Appease -Werror=unused-parameter
+#endif
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.h
new file mode 100644
index 0000000000..b1c6f7f580
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.h
@@ -0,0 +1,64 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Collection of static dictionary words. */
+
+#ifndef BROTLI_COMMON_DICTIONARY_H_
+#define BROTLI_COMMON_DICTIONARY_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BrotliDictionary {
+  /**
+   * Number of bits to encode index of dictionary word in a bucket.
+   *
+   * Specification: Appendix A. Static Dictionary Data
+   *
+   * Words in a dictionary are bucketed by length.
+   * @c 0 means that there are no words of a given length.
+   * Dictionary consists of words with length of [4..24] bytes.
+   * Values at [0..3] and [25..31] indices should not be addressed.
+   */
+  uint8_t size_bits_by_length[32];
+
+  /* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
+  uint32_t offsets_by_length[32];
+
+  /* assert(data_size == offsets_by_length[31]) */
+  size_t data_size;
+
+  /* Data array is not bound, and should obey to size_bits_by_length values.
+     Specified size matches default (RFC 7932) dictionary. Its size is
+     defined by data_size */
+  const uint8_t* data;
+} BrotliDictionary;
+
+BROTLI_COMMON_API const BrotliDictionary* BrotliGetDictionary(void);
+
+/**
+ * Sets dictionary data.
+ *
+ * When dictionary data is already set / present, this method is no-op.
+ *
+ * Dictionary data MUST be provided before BrotliGetDictionary is invoked.
+ * This method is used ONLY in multi-client environment (e.g. C + Java),
+ * to reduce storage by sharing single dictionary between implementations.
+ */
+BROTLI_COMMON_API void BrotliSetDictionaryData(const uint8_t* data);
+
+#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
+#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_DICTIONARY_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.c
new file mode 100644
index 0000000000..25d84a9467
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.c
@@ -0,0 +1,23 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include <stdlib.h>
+
+#include <brotli/types.h>
+
+#include "platform.h"
+
+/* Default brotli_alloc_func */
+void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+/* Default brotli_free_func */
+void BrotliDefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.h
new file mode 100644
index 0000000000..4186a8e96d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/platform.h
@@ -0,0 +1,525 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific features and build options.
+
+   Build options are:
+    * BROTLI_BUILD_32_BIT disables 64-bit optimizations
+    * BROTLI_BUILD_64_BIT forces to use 64-bit optimizations
+    * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations
+    * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations
+    * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations
+    * BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs
+    * BROTLI_BUILD_NO_UNALIGNED_READ_FAST forces off the fast-unaligned-read
+      optimizations (mainly for testing purposes).
+    * BROTLI_DEBUG dumps file name and line number when decoder detects stream
+      or memory error
+    * BROTLI_ENABLE_LOG enables asserts and dumps various state information
+*/
+
+#ifndef BROTLI_COMMON_PLATFORM_H_
+#define BROTLI_COMMON_PLATFORM_H_
+
+#include <string.h>  /* memcpy */
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(OS_LINUX) || defined(OS_CYGWIN) || defined(__EMSCRIPTEN__)
+#include <endian.h>
+#elif defined(OS_FREEBSD)
+#include <machine/endian.h>
+#elif defined(OS_MACOSX)
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define BROTLI_X_BYTE_ORDER BYTE_ORDER
+#define BROTLI_X_LITTLE_ENDIAN LITTLE_ENDIAN
+#define BROTLI_X_BIG_ENDIAN BIG_ENDIAN
+#endif
+
+#if BROTLI_MSVC_VERSION_CHECK(18, 0, 0)
+#include <intrin.h>
+#endif
+
+#if defined(BROTLI_ENABLE_LOG) || defined(BROTLI_DEBUG)
+#include <assert.h>
+#include <stdio.h>
+#endif
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+/* Define "BROTLI_PREDICT_TRUE" and "BROTLI_PREDICT_FALSE" macros for capable
+   compilers.
+
+To apply compiler hint, enclose the branching condition into macros, like this:
+
+  if (BROTLI_PREDICT_TRUE(zero == 0)) {
+    // main execution path
+  } else {
+    // compiler should place this code outside of main execution path
+  }
+
+OR:
+
+  if (BROTLI_PREDICT_FALSE(something_rare_or_unexpected_happens)) {
+    // compiler should place this code outside of main execution path
+  }
+
+*/
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_expect, 3, 0, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||               \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 15, 0) ||              \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                  \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                 \
+    BROTLI_TI_VERSION_CHECK(7, 3, 0) ||                   \
+    BROTLI_TINYC_VERSION_CHECK(0, 9, 27)
+#define BROTLI_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define BROTLI_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define BROTLI_PREDICT_FALSE(x) (x)
+#define BROTLI_PREDICT_TRUE(x) (x)
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__cplusplus)
+#define BROTLI_RESTRICT restrict
+#elif BROTLI_GNUC_VERSION_CHECK(3, 1, 0) ||                         \
+    BROTLI_MSVC_VERSION_CHECK(14, 0, 0) ||                          \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                           \
+    BROTLI_PGI_VERSION_CHECK(17, 10, 0) ||                          \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_IAR_VERSION_CHECK(8, 0, 0) ||                            \
+    (BROTLI_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus))
+#define BROTLI_RESTRICT __restrict
+#elif BROTLI_SUNPRO_VERSION_CHECK(5, 3, 0) && !defined(__cplusplus)
+#define BROTLI_RESTRICT _Restrict
+#else
+#define BROTLI_RESTRICT
+#endif
+
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
+    (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define BROTLI_MAYBE_INLINE inline
+#elif defined(__GNUC_STDC_INLINE__) || defined(__GNUC_GNU_INLINE__) || \
+    BROTLI_ARM_VERSION_CHECK(6, 2, 0)
+#define BROTLI_MAYBE_INLINE __inline__
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0) || \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) || BROTLI_TI_VERSION_CHECK(8, 0, 0)
+#define BROTLI_MAYBE_INLINE __inline
+#else
+#define BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(always_inline, 4, 0, 0) ||                       \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __attribute__((__always_inline__))
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __forceinline
+#elif BROTLI_TI_VERSION_CHECK(7, 0, 0) && defined(__cplusplus)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("inline=forced")
+#else
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(noinline, 4, 0, 0) ||                            \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_NOINLINE __attribute__((__noinline__))
+#elif BROTLI_MSVC_VERSION_CHECK(13, 10, 0)
+#define BROTLI_NOINLINE __declspec(noinline)
+#elif BROTLI_PGI_VERSION_CHECK(10, 2, 0)
+#define BROTLI_NOINLINE _Pragma("noinline")
+#elif BROTLI_TI_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
+#define BROTLI_NOINLINE _Pragma("FUNC_CANNOT_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_NOINLINE _Pragma("inline=never")
+#else
+#define BROTLI_NOINLINE
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(unused, 2, 7, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE __attribute__ ((unused))
+#else
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+#define BROTLI_ALIGNED(N) __attribute__((aligned(N)))
+#else
+#define BROTLI_ALIGNED(N)
+#endif
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 7)) || \
+    (defined(M_ARM) && (M_ARM == 7))
+#define BROTLI_TARGET_ARMV7
+#endif  /* ARMv7 */
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 8)) || \
+    defined(__aarch64__) || defined(__ARM64_ARCH_8__)
+#define BROTLI_TARGET_ARMV8_ANY
+
+#if defined(__ARM_32BIT_STATE)
+#define BROTLI_TARGET_ARMV8_32
+#elif defined(__ARM_64BIT_STATE)
+#define BROTLI_TARGET_ARMV8_64
+#endif
+
+#endif  /* ARMv8 */
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define BROTLI_TARGET_NEON
+#endif
+
+#if defined(__i386) || defined(_M_IX86)
+#define BROTLI_TARGET_X86
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define BROTLI_TARGET_X64
+#endif
+
+#if defined(__PPC64__)
+#define BROTLI_TARGET_POWERPC64
+#endif
+
+#if defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64
+#define BROTLI_TARGET_RISCV64
+#endif
+
+#if defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \
+    defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64)
+#define BROTLI_TARGET_64_BITS 1
+#else
+#define BROTLI_TARGET_64_BITS 0
+#endif
+
+#if defined(BROTLI_BUILD_64_BIT)
+#define BROTLI_64_BITS 1
+#elif defined(BROTLI_BUILD_32_BIT)
+#define BROTLI_64_BITS 0
+#else
+#define BROTLI_64_BITS BROTLI_TARGET_64_BITS
+#endif
+
+#if (BROTLI_64_BITS)
+#define brotli_reg_t uint64_t
+#else
+#define brotli_reg_t uint32_t
+#endif
+
+#if defined(BROTLI_BUILD_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_BUILD_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(BROTLI_BUILD_ENDIAN_NEUTRAL)
+/* Just break elif chain. */
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(_WIN32) || defined(BROTLI_TARGET_X64)
+/* Win32 & x64 can currently always be assumed to be little endian */
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_X_BYTE_ORDER)
+#if BROTLI_X_BYTE_ORDER == BROTLI_X_LITTLE_ENDIAN
+#define BROTLI_LITTLE_ENDIAN 1
+#elif BROTLI_X_BYTE_ORDER == BROTLI_X_BIG_ENDIAN
+#define BROTLI_BIG_ENDIAN 1
+#endif
+#endif  /* BROTLI_X_BYTE_ORDER */
+
+#if !defined(BROTLI_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 0
+#endif
+
+#if !defined(BROTLI_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 0
+#endif
+
+#if defined(BROTLI_X_BYTE_ORDER)
+#undef BROTLI_X_BYTE_ORDER
+#undef BROTLI_X_LITTLE_ENDIAN
+#undef BROTLI_X_BIG_ENDIAN
+#endif
+
+#if defined(BROTLI_BUILD_NO_UNALIGNED_READ_FAST)
+#define BROTLI_UNALIGNED_READ_FAST (!!0)
+#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) ||       \
+    defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \
+    defined(BROTLI_TARGET_RISCV64)
+/* These targets are known to generate efficient code for unaligned reads
+ * (e.g. a single instruction, not multiple 1-byte loads, shifted and or'd
+ * together). */
+#define BROTLI_UNALIGNED_READ_FAST (!!1)
+#else
+#define BROTLI_UNALIGNED_READ_FAST (!!0)
+#endif
+
+/* Portable unaligned memory access: read / write values via memcpy. */
+static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
+  uint16_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#if BROTLI_LITTLE_ENDIAN
+/* Straight endianness. Just read / write values. */
+#define BROTLI_UNALIGNED_LOAD16LE BrotliUnalignedRead16
+#define BROTLI_UNALIGNED_LOAD32LE BrotliUnalignedRead32
+#define BROTLI_UNALIGNED_LOAD64LE BrotliUnalignedRead64
+#define BROTLI_UNALIGNED_STORE64LE BrotliUnalignedWrite64
+#elif BROTLI_BIG_ENDIAN  /* BROTLI_LITTLE_ENDIAN */
+/* Explain compiler to byte-swap values. */
+#define BROTLI_BSWAP16_(V) ((uint16_t)( \
+  (((V) & 0xFFU) << 8) | \
+  (((V) >> 8) & 0xFFU)))
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  uint16_t value = BrotliUnalignedRead16(p);
+  return BROTLI_BSWAP16_(value);
+}
+#define BROTLI_BSWAP32_(V) ( \
+  (((V) & 0xFFU) << 24) | (((V) & 0xFF00U) << 8) | \
+  (((V) >> 8) & 0xFF00U) | (((V) >> 24) & 0xFFU))
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  uint32_t value = BrotliUnalignedRead32(p);
+  return BROTLI_BSWAP32_(value);
+}
+#define BROTLI_BSWAP64_(V) ( \
+  (((V) & 0xFFU) << 56) | (((V) & 0xFF00U) << 40) | \
+  (((V) & 0xFF0000U) << 24) | (((V) & 0xFF000000U) << 8) | \
+  (((V) >> 8) & 0xFF000000U) | (((V) >> 24) & 0xFF0000U) | \
+  (((V) >> 40) & 0xFF00U) | (((V) >> 56) & 0xFFU))
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  uint64_t value = BrotliUnalignedRead64(p);
+  return BROTLI_BSWAP64_(value);
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint64_t value = BROTLI_BSWAP64_(v);
+  BrotliUnalignedWrite64(p, value);
+}
+#else  /* BROTLI_LITTLE_ENDIAN */
+/* Read / store values byte-wise; hopefully compiler will understand. */
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  return (uint16_t)(in[0] | (in[1] << 8));
+}
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint32_t value = (uint32_t)(in[0]);
+  value |= (uint32_t)(in[1]) << 8;
+  value |= (uint32_t)(in[2]) << 16;
+  value |= (uint32_t)(in[3]) << 24;
+  return value;
+}
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint64_t value = (uint64_t)(in[0]);
+  value |= (uint64_t)(in[1]) << 8;
+  value |= (uint64_t)(in[2]) << 16;
+  value |= (uint64_t)(in[3]) << 24;
+  value |= (uint64_t)(in[4]) << 32;
+  value |= (uint64_t)(in[5]) << 40;
+  value |= (uint64_t)(in[6]) << 48;
+  value |= (uint64_t)(in[7]) << 56;
+  return value;
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint8_t* out = (uint8_t*)p;
+  out[0] = (uint8_t)v;
+  out[1] = (uint8_t)(v >> 8);
+  out[2] = (uint8_t)(v >> 16);
+  out[3] = (uint8_t)(v >> 24);
+  out[4] = (uint8_t)(v >> 32);
+  out[5] = (uint8_t)(v >> 40);
+  out[6] = (uint8_t)(v >> 48);
+  out[7] = (uint8_t)(v >> 56);
+}
+#endif  /* BROTLI_LITTLE_ENDIAN */
+
+static BROTLI_INLINE void* BROTLI_UNALIGNED_LOAD_PTR(const void* p) {
+  void* v;
+  memcpy(&v, p, sizeof(void*));
+  return v;
+}
+
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE_PTR(void* p, const void* v) {
+  memcpy(p, &v, sizeof(void*));
+}
+
+/* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_IS_CONSTANT(x) (!!__builtin_constant_p(x))
+#else
+#define BROTLI_IS_CONSTANT(x) (!!0)
+#endif
+
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+#define BROTLI_HAS_UBFX (!!1)
+#else
+#define BROTLI_HAS_UBFX (!!0)
+#endif
+
+#if defined(BROTLI_ENABLE_LOG)
+#define BROTLI_LOG(x) printf x
+#else
+#define BROTLI_LOG(x)
+#endif
+
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+#define BROTLI_DCHECK(x) assert(x)
+static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
+  fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
+  fflush(stderr);
+}
+#define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__)
+#else
+#define BROTLI_DCHECK(x)
+#define BROTLI_DUMP() (void)(0)
+#endif
+
+/* BrotliRBit assumes brotli_reg_t fits native CPU register type. */
+#if (BROTLI_64_BITS == BROTLI_TARGET_64_BITS)
+/* TODO(eustas): add appropriate icc/sunpro/arm/ibm/ti checks. */
+#if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \
+    !defined(BROTLI_BUILD_NO_RBIT)
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+/* TODO(eustas): detect ARMv6T2 and enable this code for it. */
+static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) {
+  brotli_reg_t output;
+  __asm__("rbit %0, %1\n" : "=r"(output) : "r"(input));
+  return output;
+}
+#define BROTLI_RBIT(x) BrotliRBit(x)
+#endif  /* armv7 / armv8 */
+#endif  /* gcc || clang */
+#endif  /* brotli_reg_t is native */
+#if !defined(BROTLI_RBIT)
+static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ }
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REPEAT_4(X) {X; X; X; X;}
+#define BROTLI_REPEAT_5(X) {X; X; X; X; X;}
+#define BROTLI_REPEAT_6(X) {X; X; X; X; X; X;}
+
+#define BROTLI_UNUSED(X) (void)(X)
+
+#define BROTLI_MIN_MAX(T)                                                      \
+  static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
+  static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
+BROTLI_MIN_MAX(double) BROTLI_MIN_MAX(float) BROTLI_MIN_MAX(int)
+BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t)
+#undef BROTLI_MIN_MAX
+#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
+#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
+
+#define BROTLI_SWAP(T, A, I, J) { \
+  T __brotli_swap_tmp = (A)[(I)]; \
+  (A)[(I)] = (A)[(J)];            \
+  (A)[(J)] = __brotli_swap_tmp;   \
+}
+
+#if BROTLI_64_BITS
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_ctzll, 3, 4, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_TZCNT64 __builtin_ctzll
+#elif BROTLI_MSVC_VERSION_CHECK(18, 0, 0)
+#if defined(BROTLI_TARGET_X64)
+#define BROTLI_TZCNT64 _tzcnt_u64
+#else /* BROTLI_TARGET_X64 */
+static BROTLI_INLINE uint32_t BrotliBsf64Msvc(uint64_t x) {
+  uint32_t lsb;
+  _BitScanForward64(&lsb, x);
+  return lsb;
+}
+#define BROTLI_TZCNT64 BrotliBsf64Msvc
+#endif /* BROTLI_TARGET_X64 */
+#endif /* __builtin_ctzll */
+#endif /* BROTLI_64_BITS */
+
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_clz, 3, 4, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_BSR32(x) (31u ^ (uint32_t)__builtin_clz(x))
+#elif BROTLI_MSVC_VERSION_CHECK(18, 0, 0)
+static BROTLI_INLINE uint32_t BrotliBsr32Msvc(uint32_t x) {
+  unsigned long msb;
+  _BitScanReverse(&msb, x);
+  return (uint32_t)msb;
+}
+#define BROTLI_BSR32 BrotliBsr32Msvc
+#endif /* __builtin_clz */
+
+/* Default brotli_alloc_func */
+BROTLI_COMMON_API void* BrotliDefaultAllocFunc(void* opaque, size_t size);
+
+/* Default brotli_free_func */
+BROTLI_COMMON_API void BrotliDefaultFreeFunc(void* opaque, void* address);
+
+BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
+  BROTLI_UNUSED(&BrotliSuppressUnusedFunctions);
+  BROTLI_UNUSED(&BrotliUnalignedRead16);
+  BROTLI_UNUSED(&BrotliUnalignedRead32);
+  BROTLI_UNUSED(&BrotliUnalignedRead64);
+  BROTLI_UNUSED(&BrotliUnalignedWrite64);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD16LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD_PTR);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE_PTR);
+  BROTLI_UNUSED(&BrotliRBit);
+  BROTLI_UNUSED(&brotli_min_double);
+  BROTLI_UNUSED(&brotli_max_double);
+  BROTLI_UNUSED(&brotli_min_float);
+  BROTLI_UNUSED(&brotli_max_float);
+  BROTLI_UNUSED(&brotli_min_int);
+  BROTLI_UNUSED(&brotli_max_int);
+  BROTLI_UNUSED(&brotli_min_size_t);
+  BROTLI_UNUSED(&brotli_max_size_t);
+  BROTLI_UNUSED(&brotli_min_uint32_t);
+  BROTLI_UNUSED(&brotli_max_uint32_t);
+  BROTLI_UNUSED(&brotli_min_uint8_t);
+  BROTLI_UNUSED(&brotli_max_uint8_t);
+  BROTLI_UNUSED(&BrotliDefaultAllocFunc);
+  BROTLI_UNUSED(&BrotliDefaultFreeFunc);
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+  BROTLI_UNUSED(&BrotliDump);
+#endif
+}
+
+#endif  /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary.c
new file mode 100644
index 0000000000..3ca40c0698
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary.c
@@ -0,0 +1,515 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Shared Dictionary definition and utilities. */
+
+#include <brotli/shared_dictionary.h>
+
+#include <memory.h>
+#include <stdlib.h>  /* malloc, free */
+#include <stdio.h>
+
+#include "dictionary.h"
+#include "platform.h"
+#include "shared_dictionary_internal.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
+    - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
+
+/* Max allowed by spec */
+#define BROTLI_MAX_SIZE_BITS 15u
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
+    BROTLI_BOOL* result) {
+  uint8_t value;
+  size_t position = *pos;
+  if (position >= size) return BROTLI_FALSE;  /* past file end */
+  value = encoded[position++];
+  if (value > 1) return BROTLI_FALSE;  /* invalid bool */
+  *result = TO_BROTLI_BOOL(value);
+  *pos = position;
+  return BROTLI_TRUE;  /* success */
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
+    uint8_t* result) {
+  size_t position = *pos;
+  if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
+  *result = encoded[position++];
+  *pos = position;
+  return BROTLI_TRUE;
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
+    uint16_t* result) {
+  size_t position = *pos;
+  if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
+  *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
+  position += 2;
+  *pos = position;
+  return BROTLI_TRUE;
+}
+
+/* Reads a varint into a uint32_t, and returns error if it's too large */
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
+    size_t* pos, uint32_t* result) {
+  int num = 0;
+  uint8_t byte;
+  *result = 0;
+  for (;;) {
+    if (*pos >= size) return BROTLI_FALSE;
+    byte = encoded[(*pos)++];
+    if (num == 4 && byte > 15) return BROTLI_FALSE;
+    *result |= (uint32_t)(byte & 127) << (num * 7);
+    if (byte < 128) return BROTLI_TRUE;
+    num++;
+  }
+}
+
+/* Returns the total length of word list. */
+static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
+    uint32_t* offsets_by_length) {
+  uint32_t pos = 0;
+  uint32_t i;
+  for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+    offsets_by_length[i] = pos;
+    if (size_bits_by_length[i] != 0) {
+      pos += i << size_bits_by_length[i];
+    }
+  }
+  return pos;
+}
+
+static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
+    size_t* pos, BrotliDictionary* out) {
+  size_t offset;
+  size_t i;
+  size_t position = *pos;
+  if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
+    return BROTLI_FALSE;
+  }
+
+  memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
+  memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
+      &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
+  for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+      i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+    if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
+      return BROTLI_FALSE;
+    }
+  }
+  position += BROTLI_NUM_ENCODED_LENGTHS;
+  offset = BrotliSizeBitsToOffsets(
+      out->size_bits_by_length, out->offsets_by_length);
+
+  out->data = &encoded[position];
+  out->data_size = offset;
+  position += offset;
+  if (position > size) return BROTLI_FALSE;
+  *pos = position;
+  return BROTLI_TRUE;
+}
+
+/* Computes the cutOffTransforms of a BrotliTransforms which already has the
+   transforms data correctly filled in. */
+static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
+  uint32_t i;
+  for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
+    transforms->cutOffTransforms[i] = -1;
+  }
+  for (i = 0; i < transforms->num_transforms; i++) {
+    const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
+    uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
+    const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
+    if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
+        transforms->cutOffTransforms[type] == -1) {
+      transforms->cutOffTransforms[type] = (int16_t)i;
+    }
+  }
+}
+
+static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
+    size_t* pos, BrotliTransforms* out, uint16_t* out_table,
+    size_t* out_table_size) {
+  size_t position = *pos;
+  size_t offset = 0;
+  size_t stringlet_count = 0;  /* NUM_PREFIX_SUFFIX */
+  size_t data_length = 0;
+
+  /* PREFIX_SUFFIX_LENGTH */
+  if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
+    return BROTLI_FALSE;
+  }
+  data_length = out->prefix_suffix_size;
+
+  /* Must at least have space for null terminator. */
+  if (data_length < 1) return BROTLI_FALSE;
+  out->prefix_suffix = &encoded[position];
+  if (position + data_length >= size) return BROTLI_FALSE;
+  while (BROTLI_TRUE) {
+    /* STRING_LENGTH */
+    size_t stringlet_len = encoded[position + offset];
+    out_table[stringlet_count] = (uint16_t)offset;
+    stringlet_count++;
+    offset++;
+    if (stringlet_len == 0) {
+      if (offset == data_length) {
+        break;
+      } else {
+        return BROTLI_FALSE;
+      }
+    }
+    if (stringlet_count > 255) return BROTLI_FALSE;
+    offset += stringlet_len;
+    if (offset >= data_length) return BROTLI_FALSE;
+  }
+
+  position += data_length;
+  *pos = position;
+  *out_table_size = (uint16_t)stringlet_count;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
+    size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
+    size_t* prefix_suffix_count) {
+  uint32_t i;
+  BROTLI_BOOL has_params = BROTLI_FALSE;
+  BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
+  size_t position = *pos;
+  size_t stringlet_cnt = 0;
+  if (position >= size) return BROTLI_FALSE;
+
+  prefix_suffix_ok = ParsePrefixSuffixTable(
+      size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
+  if (!prefix_suffix_ok) return BROTLI_FALSE;
+  out->prefix_suffix_map = prefix_suffix_table;
+  *prefix_suffix_count = stringlet_cnt;
+
+  out->num_transforms = encoded[position++];
+  out->transforms = &encoded[position];
+  position += (size_t)out->num_transforms * 3;
+  if (position > size) return BROTLI_FALSE;
+  /* Check for errors and read extra parameters. */
+  for (i = 0; i < out->num_transforms; i++) {
+    uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
+    uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+    uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
+    if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
+    if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
+    if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
+    if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
+        type == BROTLI_TRANSFORM_SHIFT_ALL) {
+      has_params = BROTLI_TRUE;
+    }
+  }
+  if (has_params) {
+    out->params = &encoded[position];
+    position += (size_t)out->num_transforms * 2;
+    if (position > size) return BROTLI_FALSE;
+    for (i = 0; i < out->num_transforms; i++) {
+      uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+      if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
+          type != BROTLI_TRANSFORM_SHIFT_ALL) {
+        if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
+          return BROTLI_FALSE;
+        }
+      }
+    }
+  } else {
+    out->params = NULL;
+  }
+  ComputeCutoffTransforms(out);
+  *pos = position;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
+    size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
+  size_t pos = 0;
+  uint32_t chunk_size = 0;
+  uint8_t num_word_lists;
+  uint8_t num_transform_lists;
+  *is_custom_static_dict = BROTLI_FALSE;
+  *num_prefix = 0;
+
+  /* Skip magic header bytes. */
+  pos += 2;
+
+  /* LZ77_DICTIONARY_LENGTH */
+  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+  if (chunk_size != 0) {
+    /* This limitation is not specified but the 32-bit Brotli decoder for now */
+    if (chunk_size > 1073741823) return BROTLI_FALSE;
+    *num_prefix = 1;
+    if (pos + chunk_size > size) return BROTLI_FALSE;
+    pos += chunk_size;
+  }
+
+  if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
+    return BROTLI_FALSE;
+  }
+  if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
+    return BROTLI_FALSE;
+  }
+
+  if (num_word_lists > 0 || num_transform_lists > 0) {
+    *is_custom_static_dict = BROTLI_TRUE;
+  }
+
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
+    BrotliSharedDictionary* dict) {
+  uint32_t i;
+  size_t pos = 0;
+  uint32_t chunk_size = 0;
+  size_t total_prefix_suffix_count = 0;
+  size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+  uint16_t temporary_prefix_suffix_table[256];
+
+  /* Skip magic header bytes. */
+  pos += 2;
+
+  /* LZ77_DICTIONARY_LENGTH */
+  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+  if (chunk_size != 0) {
+    if (pos + chunk_size > size) return BROTLI_FALSE;
+    dict->prefix_size[dict->num_prefix] = chunk_size;
+    dict->prefix[dict->num_prefix] = &encoded[pos];
+    dict->num_prefix++;
+    /* LZ77_DICTIONARY_LENGTH bytes. */
+    pos += chunk_size;
+  }
+
+  /* NUM_WORD_LISTS */
+  if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
+    return BROTLI_FALSE;
+  }
+  if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+    return BROTLI_FALSE;
+  }
+
+  if (dict->num_word_lists != 0) {
+    dict->words_instances = (BrotliDictionary*)dict->alloc_func(
+        dict->memory_manager_opaque,
+        dict->num_word_lists * sizeof(*dict->words_instances));
+    if (!dict->words_instances) return BROTLI_FALSE;  /* OOM */
+  }
+  for (i = 0; i < dict->num_word_lists; i++) {
+    if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  /* NUM_TRANSFORM_LISTS */
+  if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
+    return BROTLI_FALSE;
+  }
+  if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+    return BROTLI_FALSE;
+  }
+
+  if (dict->num_transform_lists != 0) {
+    dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
+        dict->memory_manager_opaque,
+        dict->num_transform_lists * sizeof(*dict->transforms_instances));
+    if (!dict->transforms_instances) return BROTLI_FALSE;  /* OOM */
+  }
+  for (i = 0; i < dict->num_transform_lists; i++) {
+    BROTLI_BOOL ok = BROTLI_FALSE;
+    size_t prefix_suffix_count = 0;
+    trasform_list_start[i] = pos;
+    dict->transforms_instances[i].prefix_suffix_map =
+        temporary_prefix_suffix_table;
+    ok = ParseTransformsList(
+        size, encoded, &pos, &dict->transforms_instances[i],
+        temporary_prefix_suffix_table, &prefix_suffix_count);
+    if (!ok) return BROTLI_FALSE;
+    total_prefix_suffix_count += prefix_suffix_count;
+  }
+  if (total_prefix_suffix_count != 0) {
+    dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
+        dict->memory_manager_opaque,
+        total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
+    if (!dict->prefix_suffix_maps) return BROTLI_FALSE;  /* OOM */
+  }
+  total_prefix_suffix_count = 0;
+  for (i = 0; i < dict->num_transform_lists; i++) {
+    size_t prefix_suffix_count = 0;
+    size_t position = trasform_list_start[i];
+    uint16_t* prefix_suffix_map =
+      &dict->prefix_suffix_maps[total_prefix_suffix_count];
+    BROTLI_BOOL ok = ParsePrefixSuffixTable(
+        size, encoded, &position, &dict->transforms_instances[i],
+        prefix_suffix_map, &prefix_suffix_count);
+    if (!ok) return BROTLI_FALSE;
+    dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
+    total_prefix_suffix_count += prefix_suffix_count;
+  }
+
+  if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
+    if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
+      return BROTLI_FALSE;
+    }
+    if (dict->num_dictionaries == 0 ||
+        dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+      return BROTLI_FALSE;
+    }
+    for (i = 0; i < dict->num_dictionaries; i++) {
+      uint8_t words_index;
+      uint8_t transforms_index;
+      if (!ReadUint8(encoded, size, &pos, &words_index)) {
+        return BROTLI_FALSE;
+      }
+      if (words_index > dict->num_word_lists) return BROTLI_FALSE;
+      if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
+        return BROTLI_FALSE;
+      }
+      if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
+      dict->words[i] = words_index == dict->num_word_lists ?
+          BrotliGetDictionary() : &dict->words_instances[words_index];
+      dict->transforms[i] = transforms_index == dict->num_transform_lists ?
+          BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
+    }
+    /* CONTEXT_ENABLED */
+    if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
+      return BROTLI_FALSE;
+    }
+
+    /* CONTEXT_MAP */
+    if (dict->context_based) {
+      for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
+        if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
+          return BROTLI_FALSE;
+        }
+        if (dict->context_map[i] >= dict->num_dictionaries) {
+          return BROTLI_FALSE;
+        }
+      }
+    }
+  } else {
+    dict->context_based = BROTLI_FALSE;
+    dict->num_dictionaries = 1;
+    dict->words[0] = BrotliGetDictionary();
+    dict->transforms[0] = BrotliGetTransforms();
+  }
+
+  return BROTLI_TRUE;
+}
+
+/* Decodes shared dictionary and verifies correctness.
+   Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
+   The BrotliSharedDictionary must already have been initialized. If the
+   BrotliSharedDictionary already contains data, compound dictionaries
+   will be appended, but an error will be returned if it already has
+   custom words or transforms.
+   TODO(lode): link to RFC for shared brotli once published. */
+static BROTLI_BOOL DecodeSharedDictionary(
+    const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
+  uint32_t num_prefix = 0;
+  BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
+  BROTLI_BOOL has_custom_static_dict =
+      dict->num_word_lists > 0 || dict->num_transform_lists > 0;
+
+  /* Check magic header bytes. */
+  if (size < 2) return BROTLI_FALSE;
+  if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
+
+  if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
+    return BROTLI_FALSE;
+  }
+
+  if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+    return BROTLI_FALSE;
+  }
+
+  /* Cannot combine different static dictionaries, only prefix dictionaries */
+  if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
+
+  return ParseDictionary(encoded, size, dict);
+}
+
+void BrotliSharedDictionaryDestroyInstance(
+    BrotliSharedDictionary* dict) {
+  if (!dict) {
+    return;
+  } else {
+    brotli_free_func free_func = dict->free_func;
+    void* opaque = dict->memory_manager_opaque;
+    /* Cleanup. */
+    free_func(opaque, dict->words_instances);
+    free_func(opaque, dict->transforms_instances);
+    free_func(opaque, dict->prefix_suffix_maps);
+    /* Self-destruction. */
+    free_func(opaque, dict);
+  }
+}
+
+BROTLI_BOOL BrotliSharedDictionaryAttach(
+    BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
+    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
+  if (!dict) {
+    return BROTLI_FALSE;
+  }
+  if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
+    return DecodeSharedDictionary(data, data_size, dict);
+  } else if (type == BROTLI_SHARED_DICTIONARY_RAW) {
+    if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+      return BROTLI_FALSE;
+    }
+    dict->prefix_size[dict->num_prefix] = data_size;
+    dict->prefix[dict->num_prefix] = data;
+    dict->num_prefix++;
+    return BROTLI_TRUE;
+  } else {
+    return BROTLI_FALSE;
+  }
+}
+
+BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliSharedDictionary* dict = 0;
+  if (!alloc_func && !free_func) {
+    dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
+  } else if (alloc_func && free_func) {
+    dict = (BrotliSharedDictionary*)alloc_func(
+        opaque, sizeof(BrotliSharedDictionary));
+  }
+  if (dict == 0) {
+    return 0;
+  }
+
+  /* TODO(eustas): explicitly initialize all the fields? */
+  memset(dict, 0, sizeof(BrotliSharedDictionary));
+
+  dict->context_based = BROTLI_FALSE;
+  dict->num_dictionaries = 1;
+  dict->num_word_lists = 0;
+  dict->num_transform_lists = 0;
+
+  dict->words[0] = BrotliGetDictionary();
+  dict->transforms[0] = BrotliGetTransforms();
+
+  dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
+  dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
+  dict->memory_manager_opaque = opaque;
+
+  return dict;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary_internal.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary_internal.h
new file mode 100644
index 0000000000..963762e432
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/shared_dictionary_internal.h
@@ -0,0 +1,75 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* (Transparent) Shared Dictionary definition. */
+
+#ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
+#define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
+
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#include "dictionary.h"
+#include "transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct BrotliSharedDictionaryStruct {
+  /* LZ77 prefixes (compound dictionary). */
+  uint32_t num_prefix;  /* max SHARED_BROTLI_MAX_COMPOUND_DICTS */
+  size_t prefix_size[SHARED_BROTLI_MAX_COMPOUND_DICTS];
+  const uint8_t* prefix[SHARED_BROTLI_MAX_COMPOUND_DICTS];
+
+  /* If set, the context map is used to select word and transform list from 64
+     contexts, if not set, the context map is not used and only words[0] and
+     transforms[0] are to be used. */
+  BROTLI_BOOL context_based;
+
+  uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+  /* Amount of word_list+transform_list combinations. */
+  uint8_t num_dictionaries;
+
+  /* Must use num_dictionaries values. */
+  const BrotliDictionary* words[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+  /* Must use num_dictionaries values. */
+  const BrotliTransforms* transforms[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+  /* Amount of custom word lists. May be 0 if only Brotli's built-in is used */
+  uint8_t num_word_lists;
+
+  /* Contents of the custom words lists. Must be NULL if num_word_lists is 0. */
+  BrotliDictionary* words_instances;
+
+  /* Amount of custom transform lists. May be 0 if only Brotli's built-in is
+     used */
+  uint8_t num_transform_lists;
+
+  /* Contents of the custom transform lists. Must be NULL if num_transform_lists
+     is 0. */
+  BrotliTransforms* transforms_instances;
+
+  /* Concatenated prefix_suffix_maps of the custom transform lists. Must be NULL
+     if num_transform_lists is 0. */
+  uint16_t* prefix_suffix_maps;
+
+  /* Memory management */
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* memory_manager_opaque;
+};
+
+typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionaryInternal;
+#define BrotliSharedDictionary BrotliSharedDictionaryInternal
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.c
new file mode 100644
index 0000000000..49455fc496
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.c
@@ -0,0 +1,291 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* RFC 7932 transforms string data */
+static const char kPrefixSuffix[217] =
+      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
+/* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
+      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
+/* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
+      "that \1\'\6 with \6 from \4 by \1(\6. T"
+/* 4x       _5_ _7      _E      _5    _A _C */
+      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
+/* 6x     _3    _8    _D    _2    _7_ _ _A _C */
+      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
+/* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
+      " not \3er \3al \4ful \4ive \5less \4es"
+/* Ax       _5   _9   _D    _2    _7     _D */
+      "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
+/* Cx    _2    _7___ ___ _A    _F     _5        _8 */
+
+static const uint16_t kPrefixSuffixMap[50] = {
+  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
+  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
+  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
+  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
+  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
+};
+
+/* RFC 7932 transforms */
+static const uint8_t kTransformsData[] = {
+  49, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 0,
+   0, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 47,
+   0, BROTLI_TRANSFORM_IDENTITY, 49,
+   4, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 6,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
+   1, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 1,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 7,
+  49, BROTLI_TRANSFORM_IDENTITY, 9,
+  48, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 8,
+  49, BROTLI_TRANSFORM_IDENTITY, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 11,
+  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 13,
+  49, BROTLI_TRANSFORM_IDENTITY, 14,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 15,
+  49, BROTLI_TRANSFORM_IDENTITY, 16,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 12,
+   5, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 1,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 18,
+  49, BROTLI_TRANSFORM_IDENTITY, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 19,
+  49, BROTLI_TRANSFORM_IDENTITY, 20,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 22,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 23,
+  49, BROTLI_TRANSFORM_IDENTITY, 24,
+  49, BROTLI_TRANSFORM_IDENTITY, 25,
+  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
+  49, BROTLI_TRANSFORM_IDENTITY, 27,
+  49, BROTLI_TRANSFORM_IDENTITY, 28,
+   0, BROTLI_TRANSFORM_IDENTITY, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 29,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 31,
+  49, BROTLI_TRANSFORM_IDENTITY, 32,
+  47, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
+   5, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 30,
+   0, BROTLI_TRANSFORM_IDENTITY, 5,
+  35, BROTLI_TRANSFORM_IDENTITY, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 2,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 36,
+  49, BROTLI_TRANSFORM_IDENTITY, 33,
+   5, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 37,
+   0, BROTLI_TRANSFORM_IDENTITY, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 38,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 39,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+   0, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_IDENTITY, 40,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 41,
+  49, BROTLI_TRANSFORM_IDENTITY, 42,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 43,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
+   0, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+  49, BROTLI_TRANSFORM_IDENTITY, 44,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  45, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 46,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+};
+
+static const BrotliTransforms kBrotliTransforms = {
+  sizeof(kPrefixSuffix),
+  (const uint8_t*)kPrefixSuffix,
+  kPrefixSuffixMap,
+  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
+  kTransformsData,
+  NULL,  /* no extra parameters */
+  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
+};
+
+const BrotliTransforms* BrotliGetTransforms(void) {
+  return &kBrotliTransforms;
+}
+
+static int ToUpperCase(uint8_t* p) {
+  if (p[0] < 0xC0) {
+    if (p[0] >= 'a' && p[0] <= 'z') {
+      p[0] ^= 32;
+    }
+    return 1;
+  }
+  /* An overly simplified uppercasing model for UTF-8. */
+  if (p[0] < 0xE0) {
+    p[1] ^= 32;
+    return 2;
+  }
+  /* An arbitrary transform for three byte characters. */
+  p[2] ^= 5;
+  return 3;
+}
+
+static int Shift(uint8_t* word, int word_len, uint16_t parameter) {
+  /* Limited sign extension: scalar < (1 << 24). */
+  uint32_t scalar =
+      (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
+  if (word[0] < 0x80) {
+    /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
+    scalar += (uint32_t)word[0];
+    word[0] = (uint8_t)(scalar & 0x7Fu);
+    return 1;
+  } else if (word[0] < 0xC0) {
+    /* Continuation / 10AAAAAA. */
+    return 1;
+  } else if (word[0] < 0xE0) {
+    /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
+    if (word_len < 2) return 1;
+    scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
+    word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
+    word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
+    return 2;
+  } else if (word[0] < 0xF0) {
+    /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
+    if (word_len < 3) return word_len;
+    scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
+        ((word[0] & 0x0Fu) << 12u));
+    word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
+    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
+    word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
+    return 3;
+  } else if (word[0] < 0xF8) {
+    /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
+    if (word_len < 4) return word_len;
+    scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
+        ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
+    word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
+    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
+    word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
+    word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
+    return 4;
+  }
+  return 1;
+}
+
+int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx) {
+  int idx = 0;
+  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
+  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
+  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
+  {
+    int prefix_len = *prefix++;
+    while (prefix_len--) { dst[idx++] = *prefix++; }
+  }
+  {
+    const int t = type;
+    int i = 0;
+    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
+      len -= t;
+    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
+        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
+      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
+      word += skip;
+      len -= skip;
+    }
+    while (i < len) { dst[idx++] = word[i++]; }
+    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
+      ToUpperCase(&dst[idx - len]);
+    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
+      uint8_t* uppercase = &dst[idx - len];
+      while (len > 0) {
+        int step = ToUpperCase(uppercase);
+        uppercase += step;
+        len -= step;
+      }
+    } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
+      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
+          + (transforms->params[transform_idx * 2 + 1] << 8u));
+      Shift(&dst[idx - len], len, param);
+    } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
+      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
+          + (transforms->params[transform_idx * 2 + 1] << 8u));
+      uint8_t* shift = &dst[idx - len];
+      while (len > 0) {
+        int step = Shift(shift, len, param);
+        shift += step;
+        len -= step;
+      }
+    }
+  }
+  {
+    int suffix_len = *suffix++;
+    while (suffix_len--) { dst[idx++] = *suffix++; }
+    return idx;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.h
new file mode 100644
index 0000000000..b6f86cc7d5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/transform.h
@@ -0,0 +1,85 @@
+/* transforms is a part of ABI, but not API.
+
+   It means that there are some functions that are supposed to be in "common"
+   library, but header itself is not placed into include/brotli. This way,
+   aforementioned functions will be available only to brotli internals.
+ */
+
+#ifndef BROTLI_COMMON_TRANSFORM_H_
+#define BROTLI_COMMON_TRANSFORM_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum BrotliWordTransformType {
+  BROTLI_TRANSFORM_IDENTITY = 0,
+  BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
+  BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
+  BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
+  BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
+  BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
+  BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
+  BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
+  BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
+  BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
+  BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
+  BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
+  BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
+  BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
+  BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
+  BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
+  BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
+  BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
+  BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
+  BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
+  BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
+  BROTLI_TRANSFORM_SHIFT_FIRST = 21,
+  BROTLI_TRANSFORM_SHIFT_ALL = 22,
+  BROTLI_NUM_TRANSFORM_TYPES  /* Counts transforms, not a transform itself. */
+};
+
+#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
+
+typedef struct BrotliTransforms {
+  uint16_t prefix_suffix_size;
+  /* Last character must be null, so prefix_suffix_size must be at least 1. */
+  const uint8_t* prefix_suffix;
+  const uint16_t* prefix_suffix_map;
+  uint32_t num_transforms;
+  /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
+  const uint8_t* transforms;
+  /* Shift for BROTLI_TRANSFORM_SHIFT_FIRST and BROTLI_TRANSFORM_SHIFT_ALL,
+     must be NULL if and only if no such transforms are present. */
+  const uint8_t* params;
+  /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
+     0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
+     -1, if cut-off transform does not exist. */
+  int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
+} BrotliTransforms;
+
+/* T is BrotliTransforms*; result is uint8_t. */
+#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
+#define BROTLI_TRANSFORM_TYPE(T, I)      ((T)->transforms[((I) * 3) + 1])
+#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
+
+/* T is BrotliTransforms*; result is const uint8_t*. */
+#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
+#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
+
+BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
+
+BROTLI_COMMON_API int BrotliTransformDictionaryWord(
+    uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_TRANSFORM_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/version.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/version.h
new file mode 100644
index 0000000000..01b2998e25
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/version.h
@@ -0,0 +1,26 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Version definition. */
+
+#ifndef BROTLI_COMMON_VERSION_H_
+#define BROTLI_COMMON_VERSION_H_
+
+/* This macro should only be used when library is compiled together with client.
+   If library is dynamically linked, use BrotliDecoderVersion and
+   BrotliEncoderVersion methods. */
+
+/* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */
+#define BROTLI_VERSION 0x1000009
+
+/* This macro is used by build system to produce Libtool-friendly soname. See
+   https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
+ */
+
+/* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */
+#define BROTLI_ABI_VERSION 0x1009000
+
+#endif  /* BROTLI_COMMON_VERSION_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.c b/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.c
new file mode 100644
index 0000000000..97e21f56f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.c
@@ -0,0 +1,77 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#include "bit_reader.h"
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+const uint32_t kBrotliBitMask[33] = {   0x00000000,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+void BrotliInitBitReader(BrotliBitReader* const br) {
+  br->val_ = 0;
+  br->bit_pos_ = sizeof(br->val_) << 3;
+}
+
+BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
+  size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
+  /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
+     overflow. If unalignment is caused by BrotliSafeReadBits, then there is
+     enough space in accumulator to fix alignment. */
+  if (BROTLI_UNALIGNED_READ_FAST) {
+    aligned_read_mask = 0;
+  }
+  if (BrotliGetAvailableBits(br) == 0) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
+    if (!BrotliPullByte(br)) {
+      /* If we consumed all the input, we don't care about the alignment. */
+      return BROTLI_TRUE;
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+BROTLI_BOOL BrotliSafeReadBits32Slow(BrotliBitReader* const br,
+    uint32_t n_bits, uint32_t* val) {
+  uint32_t low_val;
+  uint32_t high_val;
+  BrotliBitReaderState memento;
+  BROTLI_DCHECK(n_bits <= 32);
+  BROTLI_DCHECK(n_bits > 24);
+  BrotliBitReaderSaveState(br, &memento);
+  if (!BrotliSafeReadBits(br, 16, &low_val) ||
+      !BrotliSafeReadBits(br, n_bits - 16, &high_val)) {
+    BrotliBitReaderRestoreState(br, &memento);
+    return BROTLI_FALSE;
+  }
+  *val = low_val | (high_val << 16);
+  return BROTLI_TRUE;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.h b/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.h
new file mode 100644
index 0000000000..c737bda584
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/bit_reader.h
@@ -0,0 +1,364 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
+#include <string.h>  /* memcpy */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
+
+BROTLI_INTERNAL extern const uint32_t kBrotliBitMask[33];
+
+static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
+  if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
+    /* Masking with this expression turns to a single
+       "Unsigned Bit Field Extract" UBFX instruction on ARM. */
+    return ~((0xFFFFFFFFu) << n);
+  } else {
+    return kBrotliBitMask[n];
+  }
+}
+
+typedef struct {
+  brotli_reg_t val_;       /* pre-fetched bits */
+  uint32_t bit_pos_;       /* current bit-reading position in val_ */
+  const uint8_t* next_in;  /* the byte we're reading from */
+  size_t avail_in;
+} BrotliBitReader;
+
+typedef struct {
+  brotli_reg_t val_;
+  uint32_t bit_pos_;
+  const uint8_t* next_in;
+  size_t avail_in;
+} BrotliBitReaderState;
+
+/* Initializes the BrotliBitReader fields. */
+BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
+
+/* Ensures that accumulator is not empty.
+   May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+   Returns BROTLI_FALSE if data is required but there is no input available.
+   For !BROTLI_UNALIGNED_READ_FAST this function also prepares bit reader for
+   aligned reading. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
+
+/* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden
+   the main code-path. Never called for RFC brotli streams, required only for
+   "large-window" mode and other extensions. */
+BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val);
+
+static BROTLI_INLINE void BrotliBitReaderSaveState(
+    BrotliBitReader* const from, BrotliBitReaderState* to) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE void BrotliBitReaderRestoreState(
+    BrotliBitReader* const to, BrotliBitReaderState* from) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE uint32_t BrotliGetAvailableBits(
+    const BrotliBitReader* br) {
+  return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_;
+}
+
+/* Returns amount of unread bytes the bit reader still has buffered from the
+   BrotliInput, including whole bytes in br->val_. Result is capped with
+   maximal ring-buffer size (larger number won't be utilized anyway). */
+static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
+  static const size_t kCap = (size_t)1 << BROTLI_LARGE_MAX_WBITS;
+  if (br->avail_in > kCap) return kCap;
+  return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
+}
+
+/* Checks if there is at least |num| bytes left in the input ring-buffer
+   (excluding the bits remaining in br->val_). */
+static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
+    BrotliBitReader* const br, size_t num) {
+  return TO_BROTLI_BOOL(br->avail_in >= num);
+}
+
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
+   Precondition: accumulator contains at least 1 bit.
+   |n_bits| should be in the range [1..24] for regular build. For portable
+   non-64-bit little-endian build only 16 bits are safe to request. */
+static BROTLI_INLINE void BrotliFillBitWindow(
+    BrotliBitReader* const br, uint32_t n_bits) {
+#if (BROTLI_64_BITS)
+  if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+      (n_bits <= 8)) {
+    uint32_t bit_pos = br->bit_pos_;
+    if (bit_pos >= 56) {
+      br->val_ =
+          (br->val_ >> 56) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8);
+      br->bit_pos_ =
+          bit_pos ^ 56; /* here same as -= 56 because of the if condition */
+      br->avail_in -= 7;
+      br->next_in += 7;
+    }
+  } else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+             (n_bits <= 16)) {
+    uint32_t bit_pos = br->bit_pos_;
+    if (bit_pos >= 48) {
+      br->val_ =
+          (br->val_ >> 48) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16);
+      br->bit_pos_ =
+          bit_pos ^ 48; /* here same as -= 48 because of the if condition */
+      br->avail_in -= 6;
+      br->next_in += 6;
+    }
+  } else {
+    uint32_t bit_pos = br->bit_pos_;
+    if (bit_pos >= 32) {
+      br->val_ = (br->val_ >> 32) |
+                 (((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32);
+      br->bit_pos_ =
+          bit_pos ^ 32; /* here same as -= 32 because of the if condition */
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#else
+  if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+      (n_bits <= 8)) {
+    uint32_t bit_pos = br->bit_pos_;
+    if (bit_pos >= 24) {
+      br->val_ =
+          (br->val_ >> 24) | (BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8);
+      br->bit_pos_ =
+          bit_pos ^ 24; /* here same as -= 24 because of the if condition */
+      br->avail_in -= 3;
+      br->next_in += 3;
+    }
+  } else {
+    uint32_t bit_pos = br->bit_pos_;
+    if (bit_pos >= 16) {
+      br->val_ = (br->val_ >> 16) |
+                 (((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16);
+      br->bit_pos_ =
+          bit_pos ^ 16; /* here same as -= 16 because of the if condition */
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#endif
+}
+
+/* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
+   more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
+static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 17);
+}
+
+/* Tries to pull one byte of input to accumulator.
+   Returns BROTLI_FALSE if there is no input available. */
+static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
+  if (br->avail_in == 0) {
+    return BROTLI_FALSE;
+  }
+  br->val_ >>= 8;
+#if (BROTLI_64_BITS)
+  br->val_ |= ((uint64_t)*br->next_in) << 56;
+#else
+  br->val_ |= ((uint32_t)*br->next_in) << 24;
+#endif
+  br->bit_pos_ -= 8;
+  --br->avail_in;
+  ++br->next_in;
+  return BROTLI_TRUE;
+}
+
+/* Returns currently available bits.
+   The number of valid bits could be calculated by BrotliGetAvailableBits. */
+static BROTLI_INLINE brotli_reg_t BrotliGetBitsUnmasked(
+    BrotliBitReader* const br) {
+  return br->val_ >> br->bit_pos_;
+}
+
+/* Like BrotliGetBits, but does not mask the result.
+   The result contains at least 16 valid bits. */
+static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
+    BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 16);
+  return (uint32_t)BrotliGetBitsUnmasked(br);
+}
+
+/* Returns the specified number of bits from |br| without advancing bit
+   position. */
+static BROTLI_INLINE uint32_t BrotliGetBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BrotliFillBitWindow(br, n_bits);
+  return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+}
+
+/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  return BROTLI_TRUE;
+}
+
+/* Advances the bit pos by |n_bits|. */
+static BROTLI_INLINE void BrotliDropBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  br->bit_pos_ += n_bits;
+}
+
+static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
+  uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
+  uint32_t unused_bits = unused_bytes << 3;
+  br->avail_in += unused_bytes;
+  br->next_in -= unused_bytes;
+  if (unused_bits == sizeof(br->val_) << 3) {
+    br->val_ = 0;
+  } else {
+    br->val_ <<= unused_bits;
+  }
+  br->bit_pos_ += unused_bits;
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Precondition: accumulator MUST contain at least |n_bits|. */
+static BROTLI_INLINE void BrotliTakeBits(
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  BROTLI_LOG(("[BrotliTakeBits]  %d %d %d val: %6x\n",
+      (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
+  BrotliDropBits(br, n_bits);
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Assumes that there is enough input to perform BrotliFillBitWindow.
+   Up to 24 bits are allowed to be requested from this method. */
+static BROTLI_INLINE uint32_t BrotliReadBits24(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BROTLI_DCHECK(n_bits <= 24);
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
+    uint32_t val;
+    BrotliFillBitWindow(br, n_bits);
+    BrotliTakeBits(br, n_bits, &val);
+    return val;
+  } else {
+    uint32_t low_val;
+    uint32_t high_val;
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, 16, &low_val);
+    BrotliFillBitWindow(br, 8);
+    BrotliTakeBits(br, n_bits - 16, &high_val);
+    return low_val | (high_val << 16);
+  }
+}
+
+/* Same as BrotliReadBits24, but allows reading up to 32 bits. */
+static BROTLI_INLINE uint32_t BrotliReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BROTLI_DCHECK(n_bits <= 32);
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
+    uint32_t val;
+    BrotliFillBitWindow(br, n_bits);
+    BrotliTakeBits(br, n_bits, &val);
+    return val;
+  } else {
+    uint32_t low_val;
+    uint32_t high_val;
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, 16, &low_val);
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, n_bits - 16, &high_val);
+    return low_val | (high_val << 16);
+  }
+}
+
+/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. |n_bits| MUST be positive.
+   Up to 24 bits are allowed to be requested from this method. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  BROTLI_DCHECK(n_bits <= 24);
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  BrotliTakeBits(br, n_bits, val);
+  return BROTLI_TRUE;
+}
+
+/* Same as BrotliSafeReadBits, but allows reading up to 32 bits. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  BROTLI_DCHECK(n_bits <= 32);
+  if (BROTLI_64_BITS || (n_bits <= 24)) {
+    while (BrotliGetAvailableBits(br) < n_bits) {
+      if (!BrotliPullByte(br)) {
+        return BROTLI_FALSE;
+      }
+    }
+    BrotliTakeBits(br, n_bits, val);
+    return BROTLI_TRUE;
+  } else {
+    return BrotliSafeReadBits32Slow(br, n_bits, val);
+  }
+}
+
+/* Advances the bit reader position to the next byte boundary and verifies
+   that any skipped bits are set to zero. */
+static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
+  uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7;
+  uint32_t pad_bits = 0;
+  if (pad_bits_count != 0) {
+    BrotliTakeBits(br, pad_bits_count, &pad_bits);
+  }
+  return TO_BROTLI_BOOL(pad_bits == 0);
+}
+
+/* Copies remaining input bytes stored in the bit reader to the output. Value
+   |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
+   warmed up again after this. */
+static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
+                                          BrotliBitReader* br, size_t num) {
+  while (BrotliGetAvailableBits(br) >= 8 && num > 0) {
+    *dest = (uint8_t)BrotliGetBitsUnmasked(br);
+    BrotliDropBits(br, 8);
+    ++dest;
+    --num;
+  }
+  memcpy(dest, br->next_in, num);
+  br->avail_in -= num;
+  br->next_in += num;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_BIT_READER_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/decode.c b/third-party/libjxl/libjxl/third_party/brotli/c/dec/decode.c
new file mode 100644
index 0000000000..845f556c04
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/decode.c
@@ -0,0 +1,2807 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include <brotli/decode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/shared_dictionary_internal.h"
+#include "../common/transform.h"
+#include "../common/version.h"
+#include "bit_reader.h"
+#include "huffman.h"
+#include "prefix.h"
+#include "state.h"
+
+#if defined(BROTLI_TARGET_NEON)
+#include <arm_neon.h>
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_FAILURE(CODE) (BROTLI_DUMP(), CODE)
+
+#define BROTLI_LOG_UINT(name)                                       \
+  BROTLI_LOG(("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name)))
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                     \
+  BROTLI_LOG(("[%s] %s[%lu] = %lu\n", __func__, #array_name,        \
+         (unsigned long)(idx), (unsigned long)array_name[idx]))
+
+#define HUFFMAN_TABLE_BITS 8U
+#define HUFFMAN_TABLE_MASK 0xFF
+
+/* We need the slack region for the following reasons:
+    - doing up to two 16-byte copies for fast backward copying
+    - inserting transformed dictionary word:
+        255 prefix + 32 base + 255 suffix */
+static const uint32_t kRingBufferWriteAheadSlack = 542;
+
+static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
+  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+/* Static prefix code for the complex code length code lengths. */
+static const uint8_t kCodeLengthPrefixLength[16] = {
+  2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4,
+};
+
+static const uint8_t kCodeLengthPrefixValue[16] = {
+  0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5,
+};
+
+BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
+  if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+  switch (p) {
+    case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
+      state->canny_ringbuffer_allocation = !!value ? 0 : 1;
+      return BROTLI_TRUE;
+
+    case BROTLI_DECODER_PARAM_LARGE_WINDOW:
+      state->large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliDecoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliDecoderState*)malloc(sizeof(BrotliDecoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliDecoderState*)alloc_func(opaque, sizeof(BrotliDecoderState));
+  }
+  if (state == 0) {
+    BROTLI_DUMP();
+    return 0;
+  }
+  if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) {
+    BROTLI_DUMP();
+    if (!alloc_func && !free_func) {
+      free(state);
+    } else if (alloc_func && free_func) {
+      free_func(opaque, state);
+    }
+    return 0;
+  }
+  return state;
+}
+
+/* Deinitializes and frees BrotliDecoderState instance. */
+void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    brotli_free_func free_func = state->free_func;
+    void* opaque = state->memory_manager_opaque;
+    BrotliDecoderStateCleanup(state);
+    free_func(opaque, state);
+  }
+}
+
+/* Saves error code and converts it to BrotliDecoderResult. */
+static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
+    BrotliDecoderState* s, BrotliDecoderErrorCode e, size_t consumed_input) {
+  s->error_code = (int)e;
+  s->used_input += consumed_input;
+  switch (e) {
+    case BROTLI_DECODER_SUCCESS:
+      return BROTLI_DECODER_RESULT_SUCCESS;
+
+    case BROTLI_DECODER_NEEDS_MORE_INPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+
+    case BROTLI_DECODER_NEEDS_MORE_OUTPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+
+    default:
+      return BROTLI_DECODER_RESULT_ERROR;
+  }
+}
+
+/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli".
+   Precondition: bit-reader accumulator has at least 8 bits. */
+static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
+                                               BrotliBitReader* br) {
+  uint32_t n;
+  BROTLI_BOOL large_window = s->large_window;
+  s->large_window = BROTLI_FALSE;
+  BrotliTakeBits(br, 1, &n);
+  if (n == 0) {
+    s->window_bits = 16;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n != 0) {
+    s->window_bits = 17 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n == 1) {
+    if (large_window) {
+      BrotliTakeBits(br, 1, &n);
+      if (n == 1) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+      }
+      s->large_window = BROTLI_TRUE;
+      return BROTLI_DECODER_SUCCESS;
+    } else {
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+    }
+  }
+  if (n != 0) {
+    s->window_bits = 8 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  s->window_bits = 17;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
+#if defined(BROTLI_TARGET_NEON)
+  vst1q_u8(dst, vld1q_u8(src));
+#else
+  uint32_t buffer[4];
+  memcpy(buffer, src, 16);
+  memcpy(dst, buffer, 16);
+#endif
+}
+
+/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
+static BROTLI_NOINLINE BrotliDecoderErrorCode DecodeVarLenUint8(
+    BrotliDecoderState* s, BrotliBitReader* br, uint32_t* value) {
+  uint32_t bits;
+  switch (s->substate_decode_uint8) {
+    case BROTLI_STATE_DECODE_UINT8_NONE:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 0;
+        return BROTLI_DECODER_SUCCESS;
+      }
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_SHORT:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 3, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 1;
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+      /* Use output value as a temporary storage. It MUST be persisted. */
+      *value = bits;
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_LONG:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      *value = (1U << *value) + bits;
+      s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+      return BROTLI_DECODER_SUCCESS;
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);  /* COV_NF_LINE */
+  }
+}
+
+/* Decodes a metablock length and flags by reading 2 - 31 bits. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  uint32_t bits;
+  int i;
+  for (;;) {
+    switch (s->substate_metablock_header) {
+      case BROTLI_STATE_METABLOCK_HEADER_NONE:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->is_last_metablock = bits ? 1 : 0;
+        s->meta_block_remaining_len = 0;
+        s->is_uncompressed = 0;
+        s->is_metadata = 0;
+        if (!s->is_last_metablock) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_EMPTY:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_NIBBLES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->size_nibbles = (uint8_t)(bits + 4);
+        s->loop_counter = 0;
+        if (bits == 3) {
+          s->is_metadata = 1;
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_SIZE:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 4, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 4 &&
+              bits == 0) {
+            return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 4));
+        }
+        s->substate_metablock_header =
+            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED:
+        if (!s->is_last_metablock) {
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          s->is_uncompressed = bits ? 1 : 0;
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      case BROTLI_STATE_METABLOCK_HEADER_RESERVED:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits != 0) {
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_RESERVED);
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_BYTES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits == 0) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->size_nibbles = (uint8_t)bits;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_METADATA:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 1 &&
+              bits == 0) {
+            return BROTLI_FAILURE(
+                BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 8));
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);  /* COV_NF_LINE */
+    }
+  }
+}
+
+/* Decodes the Huffman code.
+   This method doesn't read data from the bit reader, BUT drops the amount of
+   bits that correspond to the decoded symbol.
+   bits MUST contain at least 15 (BROTLI_HUFFMAN_MAX_CODE_LENGTH) valid bits. */
+static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
+                                           const HuffmanCode* table,
+                                           BrotliBitReader* br) {
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) {
+    uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS;
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(table,
+        BROTLI_HC_FAST_LOAD_VALUE(table) +
+        ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits)));
+  }
+  BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+  return BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Reads and decodes the next Huffman code from bit-stream.
+   This method peeks 16 bits of input and drops 0 - 15 of them. */
+static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table,
+                                         BrotliBitReader* br) {
+  return DecodeSymbol(BrotliGet16BitsUnmasked(br), table, br);
+}
+
+/* Same as DecodeSymbol, but it is known that there is less than 15 bits of
+   input are currently available. */
+static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  uint32_t available_bits = BrotliGetAvailableBits(br);
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  if (available_bits == 0) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) == 0) {
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    }
+    return BROTLI_FALSE;  /* No valid bits at all. */
+  }
+  val = (uint32_t)BrotliGetBitsUnmasked(br);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, val & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) <= HUFFMAN_TABLE_BITS) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) <= available_bits) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    } else {
+      return BROTLI_FALSE;  /* Not enough bits for the first level. */
+    }
+  }
+  if (available_bits <= HUFFMAN_TABLE_BITS) {
+    return BROTLI_FALSE;  /* Not enough bits to move to the second level. */
+  }
+
+  /* Speculatively drop HUFFMAN_TABLE_BITS. */
+  val = (val & BitMask(BROTLI_HC_FAST_LOAD_BITS(table))) >> HUFFMAN_TABLE_BITS;
+  available_bits -= HUFFMAN_TABLE_BITS;
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + val);
+  if (available_bits < BROTLI_HC_FAST_LOAD_BITS(table)) {
+    return BROTLI_FALSE;  /* Not enough bits for the second level. */
+  }
+
+  BrotliDropBits(br, HUFFMAN_TABLE_BITS + BROTLI_HC_FAST_LOAD_BITS(table));
+  *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  if (BROTLI_PREDICT_TRUE(BrotliSafeGetBits(br, 15, &val))) {
+    *result = DecodeSymbol(val, table, br);
+    return BROTLI_TRUE;
+  }
+  return SafeDecodeSymbol(table, br, result);
+}
+
+/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
+static BROTLI_INLINE void PreloadSymbol(int safe,
+                                        const HuffmanCode* table,
+                                        BrotliBitReader* br,
+                                        uint32_t* bits,
+                                        uint32_t* value) {
+  if (safe) {
+    return;
+  }
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS));
+  *bits = BROTLI_HC_FAST_LOAD_BITS(table);
+  *value = BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Decodes the next Huffman code using data prepared by PreloadSymbol.
+   Reads 0 - 15 bits. Also peeks 8 following bits. */
+static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
+                                                  BrotliBitReader* br,
+                                                  uint32_t* bits,
+                                                  uint32_t* value) {
+  uint32_t result = *value;
+  if (BROTLI_PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) {
+    uint32_t val = BrotliGet16BitsUnmasked(br);
+    const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
+    uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext);
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask);
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext));
+    result = BROTLI_HC_FAST_LOAD_VALUE(ext);
+  } else {
+    BrotliDropBits(br, *bits);
+  }
+  PreloadSymbol(0, table, br, bits, value);
+  return result;
+}
+
+static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
+  uint32_t result = 0;
+  while (x) {
+    x >>= 1;
+    ++result;
+  }
+  return result;
+}
+
+/* Reads (s->symbol + 1) symbols.
+   Totally 1..4 symbols are read, 1..11 bits each.
+   The list of symbols MUST NOT contain duplicates. */
+static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
+    uint32_t alphabet_size_max, uint32_t alphabet_size_limit,
+    BrotliDecoderState* s) {
+  /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t max_bits = Log2Floor(alphabet_size_max - 1);
+  uint32_t i = h->sub_loop_counter;
+  uint32_t num_symbols = h->symbol;
+  while (i <= num_symbols) {
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, max_bits, &v))) {
+      h->sub_loop_counter = i;
+      h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    if (v >= alphabet_size_limit) {
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
+    }
+    h->symbols_lists_array[i] = (uint16_t)v;
+    BROTLI_LOG_UINT(h->symbols_lists_array[i]);
+    ++i;
+  }
+
+  for (i = 0; i < num_symbols; ++i) {
+    uint32_t k = i + 1;
+    for (; k <= num_symbols; ++k) {
+      if (h->symbols_lists_array[i] == h->symbols_lists_array[k]) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME);
+      }
+    }
+  }
+
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Process single decoded symbol code length:
+    A) reset the repeat variable
+    B) remember code length (if it is not 0)
+    C) extend corresponding index-chain
+    D) reduce the Huffman space
+    E) update the histogram */
+static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
+    uint32_t* symbol, uint32_t* repeat, uint32_t* space,
+    uint32_t* prev_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  *repeat = 0;
+  if (code_len != 0) {  /* code_len == 1..15 */
+    symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
+    next_symbol[code_len] = (int)(*symbol);
+    *prev_code_len = code_len;
+    *space -= 32768U >> code_len;
+    code_length_histo[code_len]++;
+    BROTLI_LOG(("[ReadHuffmanCode] code_length[%d] = %d\n",
+        (int)*symbol, (int)code_len));
+  }
+  (*symbol)++;
+}
+
+/* Process repeated symbol code length.
+    A) Check if it is the extension of previous repeat sequence; if the decoded
+       value is not BROTLI_REPEAT_PREVIOUS_CODE_LENGTH, then it is a new
+       symbol-skip
+    B) Update repeat variable
+    C) Check if operation is feasible (fits alphabet)
+    D) For each symbol do the same operations as in ProcessSingleCodeLength
+
+   PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or
+                 code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */
+static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
+    uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
+    uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
+    uint32_t* repeat_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  uint32_t old_repeat;
+  uint32_t extra_bits = 3;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  uint32_t new_len = 0;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  if (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+    new_len = *prev_code_len;
+    extra_bits = 2;
+  }
+  if (*repeat_code_len != new_len) {
+    *repeat = 0;
+    *repeat_code_len = new_len;
+  }
+  old_repeat = *repeat;
+  if (*repeat > 0) {
+    *repeat -= 2;
+    *repeat <<= extra_bits;
+  }
+  *repeat += repeat_delta + 3U;
+  repeat_delta = *repeat - old_repeat;
+  if (*symbol + repeat_delta > alphabet_size) {
+    BROTLI_DUMP();
+    *symbol = alphabet_size;
+    *space = 0xFFFFF;
+    return;
+  }
+  BROTLI_LOG(("[ReadHuffmanCode] code_length[%d..%d] = %d\n",
+      (int)*symbol, (int)(*symbol + repeat_delta - 1), (int)*repeat_code_len));
+  if (*repeat_code_len != 0) {
+    unsigned last = *symbol + repeat_delta;
+    int next = next_symbol[*repeat_code_len];
+    do {
+      symbol_lists[next] = (uint16_t)*symbol;
+      next = (int)*symbol;
+    } while (++(*symbol) != last);
+    next_symbol[*repeat_code_len] = next;
+    *space -= repeat_delta << (15 - *repeat_code_len);
+    code_length_histo[*repeat_code_len] =
+        (uint16_t)(code_length_histo[*repeat_code_len] + repeat_delta);
+  } else {
+    *symbol += repeat_delta;
+  }
+}
+
+/* Reads and decodes symbol codelengths. */
+static BrotliDecoderErrorCode ReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t symbol = h->symbol;
+  uint32_t repeat = h->repeat;
+  uint32_t space = h->space;
+  uint32_t prev_code_len = h->prev_code_len;
+  uint32_t repeat_code_len = h->repeat_code_len;
+  uint16_t* symbol_lists = h->symbol_lists;
+  uint16_t* code_length_histo = h->code_length_histo;
+  int* next_symbol = h->next_symbol;
+  if (!BrotliWarmupBitReader(br)) {
+    return BROTLI_DECODER_NEEDS_MORE_INPUT;
+  }
+  while (symbol < alphabet_size && space > 0) {
+    const HuffmanCode* p = h->table;
+    uint32_t code_len;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
+      h->symbol = symbol;
+      h->repeat = repeat;
+      h->prev_code_len = prev_code_len;
+      h->repeat_code_len = repeat_code_len;
+      h->space = space;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    BrotliFillBitWindow16(br);
+    BROTLI_HC_ADJUST_TABLE_INDEX(p, BrotliGetBitsUnmasked(br) &
+        BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));  /* Use 1..5 bits. */
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
+          &prev_code_len, symbol_lists, code_length_histo, next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits =
+          (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3;
+      uint32_t repeat_delta =
+          (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(extra_bits);
+      BrotliDropBits(br, extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &symbol, &repeat, &space, &prev_code_len, &repeat_code_len,
+          symbol_lists, code_length_histo, next_symbol);
+    }
+  }
+  h->space = space;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  BROTLI_BOOL get_byte = BROTLI_FALSE;
+  while (h->symbol < alphabet_size && h->space > 0) {
+    const HuffmanCode* p = h->table;
+    uint32_t code_len;
+    uint32_t available_bits;
+    uint32_t bits = 0;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (get_byte && !BrotliPullByte(br)) return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    get_byte = BROTLI_FALSE;
+    available_bits = BrotliGetAvailableBits(br);
+    if (available_bits != 0) {
+      bits = (uint32_t)BrotliGetBitsUnmasked(br);
+    }
+    BROTLI_HC_ADJUST_TABLE_INDEX(p,
+        bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    if (BROTLI_HC_FAST_LOAD_BITS(p) > available_bits) {
+      get_byte = BROTLI_TRUE;
+      continue;
+    }
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));
+      ProcessSingleCodeLength(code_len, &h->symbol, &h->repeat, &h->space,
+          &h->prev_code_len, h->symbol_lists, h->code_length_histo,
+          h->next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits = code_len - 14U;
+      uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) &
+          BitMask(extra_bits);
+      if (available_bits < BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits) {
+        get_byte = BROTLI_TRUE;
+        continue;
+      }
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &h->symbol, &h->repeat, &h->space, &h->prev_code_len,
+          &h->repeat_code_len, h->symbol_lists, h->code_length_histo,
+          h->next_symbol);
+    }
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Reads and decodes 15..18 codes using static prefix code.
+   Each code is 2..4 bits long. In total 30..72 bits are used. */
+static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t num_codes = h->repeat;
+  unsigned space = h->space;
+  uint32_t i = h->sub_loop_counter;
+  for (; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
+    uint32_t ix;
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeGetBits(br, 4, &ix))) {
+      uint32_t available_bits = BrotliGetAvailableBits(br);
+      if (available_bits != 0) {
+        ix = BrotliGetBitsUnmasked(br) & 0xF;
+      } else {
+        ix = 0;
+      }
+      if (kCodeLengthPrefixLength[ix] > available_bits) {
+        h->sub_loop_counter = i;
+        h->repeat = num_codes;
+        h->space = space;
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+    }
+    v = kCodeLengthPrefixValue[ix];
+    BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
+    h->code_length_code_lengths[code_len_idx] = (uint8_t)v;
+    BROTLI_LOG_ARRAY_INDEX(h->code_length_code_lengths, code_len_idx);
+    if (v != 0) {
+      space = space - (32U >> v);
+      ++num_codes;
+      ++h->code_length_histo[v];
+      if (space - 1U >= 32U) {
+        /* space is 0 or wrapped around. */
+        break;
+      }
+    }
+  }
+  if (!(num_codes == 1 || space == 0)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CL_SPACE);
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes the Huffman tables.
+   There are 2 scenarios:
+    A) Huffman code contains only few symbols (1..4). Those symbols are read
+       directly; their code lengths are defined by the number of symbols.
+       For this scenario 4 - 49 bits will be read.
+
+    B) 2-phase decoding:
+    B.1) Small Huffman table is decoded; it is specified with code lengths
+         encoded with predefined entropy code. 32 - 74 bits are used.
+    B.2) Decoded table is used to decode code lengths of symbols in resulting
+         Huffman table. In worst case 3520 bits are read. */
+static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size_max,
+                                              uint32_t alphabet_size_limit,
+                                              HuffmanCode* table,
+                                              uint32_t* opt_table_size,
+                                              BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  /* State machine. */
+  for (;;) {
+    switch (h->substate_huffman) {
+      case BROTLI_STATE_HUFFMAN_NONE:
+        if (!BrotliSafeReadBits(br, 2, &h->sub_loop_counter)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        BROTLI_LOG_UINT(h->sub_loop_counter);
+        /* The value is used as follows:
+           1 for simple code;
+           0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+        if (h->sub_loop_counter != 1) {
+          h->space = 32;
+          h->repeat = 0;  /* num_codes */
+          memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo[0]) *
+              (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
+          memset(&h->code_length_code_lengths[0], 0,
+              sizeof(h->code_length_code_lengths));
+          h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+          continue;
+        }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
+        /* Read symbols, codes & code lengths directly. */
+        if (!BrotliSafeReadBits(br, 2, &h->symbol)) {  /* num_symbols */
+          h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        h->sub_loop_counter = 0;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
+        BrotliDecoderErrorCode result =
+            ReadSimpleHuffmanSymbols(alphabet_size_max, alphabet_size_limit, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
+        uint32_t table_size;
+        if (h->symbol == 3) {
+          uint32_t bits;
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          h->symbol += bits;
+        }
+        BROTLI_LOG_UINT(h->symbol);
+        table_size = BrotliBuildSimpleHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, h->symbols_lists_array, h->symbol);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      /* Decode Huffman-coded code lengths. */
+      case BROTLI_STATE_HUFFMAN_COMPLEX: {
+        uint32_t i;
+        BrotliDecoderErrorCode result = ReadCodeLengthCodeLengths(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        BrotliBuildCodeLengthsHuffmanTable(h->table,
+                                           h->code_length_code_lengths,
+                                           h->code_length_histo);
+        memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo));
+        for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
+          h->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+          h->symbol_lists[h->next_symbol[i]] = 0xFFFF;
+        }
+
+        h->symbol = 0;
+        h->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+        h->repeat = 0;
+        h->repeat_code_len = 0;
+        h->space = 32768;
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
+        uint32_t table_size;
+        BrotliDecoderErrorCode result = ReadSymbolCodeLengths(
+            alphabet_size_limit, s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeReadSymbolCodeLengths(alphabet_size_limit, s);
+        }
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+
+        if (h->space != 0) {
+          BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)h->space));
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE);
+        }
+        table_size = BrotliBuildHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, h->symbol_lists, h->code_length_histo);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);  /* COV_NF_LINE */
+    }
+  }
+}
+
+/* Decodes a block length by reading 3..39 bits. */
+static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
+                                              BrotliBitReader* br) {
+  uint32_t code;
+  uint32_t nbits;
+  code = ReadSymbol(table, br);
+  nbits = _kBrotliPrefixCodeRanges[code].nbits;  /* nbits == 2..24 */
+  return _kBrotliPrefixCodeRanges[code].offset + BrotliReadBits24(br, nbits);
+}
+
+/* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
+   reading can't be continued with ReadBlockLength. */
+static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
+    BrotliDecoderState* s, uint32_t* result, const HuffmanCode* table,
+    BrotliBitReader* br) {
+  uint32_t index;
+  if (s->substate_read_block_length == BROTLI_STATE_READ_BLOCK_LENGTH_NONE) {
+    if (!SafeReadSymbol(table, br, &index)) {
+      return BROTLI_FALSE;
+    }
+  } else {
+    index = s->block_length_index;
+  }
+  {
+    uint32_t bits;
+    uint32_t nbits = _kBrotliPrefixCodeRanges[index].nbits;
+    uint32_t offset = _kBrotliPrefixCodeRanges[index].offset;
+    if (!BrotliSafeReadBits(br, nbits, &bits)) {
+      s->block_length_index = index;
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
+      return BROTLI_FALSE;
+    }
+    *result = offset + bits;
+    s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Transform:
+    1) initialize list L with values 0, 1,... 255
+    2) For each input element X:
+    2.1) let Y = L[X]
+    2.2) remove X-th element from L
+    2.3) prepend Y to L
+    2.4) append Y to output
+
+   In most cases max(Y) <= 7, so most of L remains intact.
+   To reduce the cost of initialization, we reuse L, remember the upper bound
+   of Y values, and reinitialize only first elements in L.
+
+   Most of input values are 0 and 1. To reduce number of branches, we replace
+   inner for loop with do-while. */
+static BROTLI_NOINLINE void InverseMoveToFrontTransform(
+    uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
+  /* Reinitialize elements that could have been changed. */
+  uint32_t i = 1;
+  uint32_t upper_bound = state->mtf_upper_bound;
+  uint32_t* mtf = &state->mtf[1];  /* Make mtf[-1] addressable. */
+  uint8_t* mtf_u8 = (uint8_t*)mtf;
+  /* Load endian-aware constant. */
+  const uint8_t b0123[4] = {0, 1, 2, 3};
+  uint32_t pattern;
+  memcpy(&pattern, &b0123, 4);
+
+  /* Initialize list using 4 consequent values pattern. */
+  mtf[0] = pattern;
+  do {
+    pattern += 0x04040404;  /* Advance all 4 values by 4. */
+    mtf[i] = pattern;
+    i++;
+  } while (i <= upper_bound);
+
+  /* Transform the input. */
+  upper_bound = 0;
+  for (i = 0; i < v_len; ++i) {
+    int index = v[i];
+    uint8_t value = mtf_u8[index];
+    upper_bound |= v[i];
+    v[i] = value;
+    mtf_u8[-1] = value;
+    do {
+      index--;
+      mtf_u8[index + 1] = mtf_u8[index];
+    } while (index >= 0);
+  }
+  /* Remember amount of elements to be reinitialized. */
+  state->mtf_upper_bound = upper_bound >> 2;
+}
+
+/* Decodes a series of Huffman table using ReadHuffmanCode function. */
+static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
+    HuffmanTreeGroup* group, BrotliDecoderState* s) {
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  if (h->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
+    h->next = group->codes;
+    h->htree_index = 0;
+    h->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
+  }
+  while (h->htree_index < group->num_htrees) {
+    uint32_t table_size;
+    BrotliDecoderErrorCode result = ReadHuffmanCode(group->alphabet_size_max,
+        group->alphabet_size_limit, h->next, &table_size, s);
+    if (result != BROTLI_DECODER_SUCCESS) return result;
+    group->htrees[h->htree_index] = h->next;
+    h->next += table_size;
+    ++h->htree_index;
+  }
+  h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes a context map.
+   Decoding is done in 4 phases:
+    1) Read auxiliary information (6..16 bits) and allocate memory.
+       In case of trivial context map, decoding is finished at this phase.
+    2) Decode Huffman table using ReadHuffmanCode function.
+       This table will be used for reading context map items.
+    3) Read context map items; "0" values could be run-length encoded.
+    4) Optionally, apply InverseMoveToFront transform to the resulting map. */
+static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
+                                               uint32_t* num_htrees,
+                                               uint8_t** context_map_arg,
+                                               BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+
+  switch ((int)h->substate_context_map) {
+    case BROTLI_STATE_CONTEXT_MAP_NONE:
+      result = DecodeVarLenUint8(s, br, num_htrees);
+      if (result != BROTLI_DECODER_SUCCESS) {
+        return result;
+      }
+      (*num_htrees)++;
+      h->context_index = 0;
+      BROTLI_LOG_UINT(context_map_size);
+      BROTLI_LOG_UINT(*num_htrees);
+      *context_map_arg =
+          (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)context_map_size);
+      if (*context_map_arg == 0) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP);
+      }
+      if (*num_htrees <= 1) {
+        memset(*context_map_arg, 0, (size_t)context_map_size);
+        return BROTLI_DECODER_SUCCESS;
+      }
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
+      uint32_t bits;
+      /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
+         to peek 4 bits ahead. */
+      if (!BrotliSafeGetBits(br, 5, &bits)) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if ((bits & 1) != 0) { /* Use RLE for zeros. */
+        h->max_run_length_prefix = (bits >> 1) + 1;
+        BrotliDropBits(br, 5);
+      } else {
+        h->max_run_length_prefix = 0;
+        BrotliDropBits(br, 1);
+      }
+      BROTLI_LOG_UINT(h->max_run_length_prefix);
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
+      uint32_t alphabet_size = *num_htrees + h->max_run_length_prefix;
+      result = ReadHuffmanCode(alphabet_size, alphabet_size,
+                               h->context_map_table, NULL, s);
+      if (result != BROTLI_DECODER_SUCCESS) return result;
+      h->code = 0xFFFF;
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_DECODE: {
+      uint32_t context_index = h->context_index;
+      uint32_t max_run_length_prefix = h->max_run_length_prefix;
+      uint8_t* context_map = *context_map_arg;
+      uint32_t code = h->code;
+      BROTLI_BOOL skip_preamble = (code != 0xFFFF);
+      while (context_index < context_map_size || skip_preamble) {
+        if (!skip_preamble) {
+          if (!SafeReadSymbol(h->context_map_table, br, &code)) {
+            h->code = 0xFFFF;
+            h->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          BROTLI_LOG_UINT(code);
+
+          if (code == 0) {
+            context_map[context_index++] = 0;
+            continue;
+          }
+          if (code > max_run_length_prefix) {
+            context_map[context_index++] =
+                (uint8_t)(code - max_run_length_prefix);
+            continue;
+          }
+        } else {
+          skip_preamble = BROTLI_FALSE;
+        }
+        /* RLE sub-stage. */
+        {
+          uint32_t reps;
+          if (!BrotliSafeReadBits(br, code, &reps)) {
+            h->code = code;
+            h->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          reps += 1U << code;
+          BROTLI_LOG_UINT(reps);
+          if (context_index + reps > context_map_size) {
+            return
+                BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT);
+          }
+          do {
+            context_map[context_index++] = 0;
+          } while (--reps);
+        }
+      }
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
+      uint32_t bits;
+      if (!BrotliSafeReadBits(br, 1, &bits)) {
+        h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits != 0) {
+        InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
+      }
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+      return BROTLI_DECODER_SUCCESS;
+    }
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);  /* COV_NF_LINE */
+  }
+}
+
+/* Decodes a command or literal and updates block type ring-buffer.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
+    int safe, BrotliDecoderState* s, int tree_type) {
+  uint32_t max_block_type = s->num_block_types[tree_type];
+  const HuffmanCode* type_tree = &s->block_type_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_258];
+  const HuffmanCode* len_tree = &s->block_len_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_26];
+  BrotliBitReader* br = &s->br;
+  uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
+  uint32_t block_type;
+  if (max_block_type <= 1) {
+    return BROTLI_FALSE;
+  }
+
+  /* Read 0..15 + 3..39 bits. */
+  if (!safe) {
+    block_type = ReadSymbol(type_tree, br);
+    s->block_length[tree_type] = ReadBlockLength(len_tree, br);
+  } else {
+    BrotliBitReaderState memento;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(type_tree, br, &block_type)) return BROTLI_FALSE;
+    if (!SafeReadBlockLength(s, &s->block_length[tree_type], len_tree, br)) {
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+
+  if (block_type == 1) {
+    block_type = ringbuffer[1] + 1;
+  } else if (block_type == 0) {
+    block_type = ringbuffer[0];
+  } else {
+    block_type -= 2;
+  }
+  if (block_type >= max_block_type) {
+    block_type -= max_block_type;
+  }
+  ringbuffer[0] = ringbuffer[1];
+  ringbuffer[1] = block_type;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
+    BrotliDecoderState* s) {
+  size_t i;
+  for (i = 0; i < 8; ++i) s->trivial_literal_contexts[i] = 0;
+  for (i = 0; i < s->num_block_types[0]; i++) {
+    size_t offset = i << BROTLI_LITERAL_CONTEXT_BITS;
+    size_t error = 0;
+    size_t sample = s->context_map[offset];
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) {
+      BROTLI_REPEAT_4({ error |= s->context_map[offset + j++] ^ sample; })
+    }
+    if (error == 0) {
+      s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31);
+    }
+  }
+}
+
+static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
+  uint8_t context_mode;
+  size_t trivial;
+  uint32_t block_type = s->block_type_rb[1];
+  uint32_t context_offset = block_type << BROTLI_LITERAL_CONTEXT_BITS;
+  s->context_map_slice = s->context_map + context_offset;
+  trivial = s->trivial_literal_contexts[block_type >> 5];
+  s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
+  s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
+  context_mode = s->context_modes[block_type] & 3;
+  s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
+}
+
+/* Decodes the block type and updates the state for literal context.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeLiteralBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 0)) {
+    return BROTLI_FALSE;
+  }
+  PrepareLiteralDecoding(s);
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeLiteralBlockSwitch(BrotliDecoderState* s) {
+  DecodeLiteralBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeLiteralBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeLiteralBlockSwitchInternal(1, s);
+}
+
+/* Block switch for insert/copy length.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 1)) {
+    return BROTLI_FALSE;
+  }
+  s->htree_command = s->insert_copy_hgroup.htrees[s->block_type_rb[3]];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) {
+  DecodeCommandBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeCommandBlockSwitchInternal(1, s);
+}
+
+/* Block switch for distance codes.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeDistanceBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 2)) {
+    return BROTLI_FALSE;
+  }
+  s->dist_context_map_slice = s->dist_context_map +
+      (s->block_type_rb[5] << BROTLI_DISTANCE_CONTEXT_BITS);
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeDistanceBlockSwitch(BrotliDecoderState* s) {
+  DecodeDistanceBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeDistanceBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeDistanceBlockSwitchInternal(1, s);
+}
+
+static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) {
+  size_t pos = wrap && s->pos > s->ringbuffer_size ?
+      (size_t)s->ringbuffer_size : (size_t)(s->pos);
+  size_t partial_pos_rb = (s->rb_roundtrips * (size_t)s->ringbuffer_size) + pos;
+  return partial_pos_rb - s->partial_pos_out;
+}
+
+/* Dumps output.
+   Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push
+   and either ring-buffer is as big as window size, or |force| is true. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
+    BrotliDecoderState* s, size_t* available_out, uint8_t** next_out,
+    size_t* total_out, BROTLI_BOOL force) {
+  uint8_t* start =
+      s->ringbuffer + (s->partial_pos_out & (size_t)s->ringbuffer_mask);
+  size_t to_write = UnwrittenBytes(s, BROTLI_TRUE);
+  size_t num_written = *available_out;
+  if (num_written > to_write) {
+    num_written = to_write;
+  }
+  if (s->meta_block_remaining_len < 0) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1);
+  }
+  if (next_out && !*next_out) {
+    *next_out = start;
+  } else {
+    if (next_out) {
+      memcpy(*next_out, start, num_written);
+      *next_out += num_written;
+    }
+  }
+  *available_out -= num_written;
+  BROTLI_LOG_UINT(to_write);
+  BROTLI_LOG_UINT(num_written);
+  s->partial_pos_out += num_written;
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  if (num_written < to_write) {
+    if (s->ringbuffer_size == (1 << s->window_bits) || force) {
+      return BROTLI_DECODER_NEEDS_MORE_OUTPUT;
+    } else {
+      return BROTLI_DECODER_SUCCESS;
+    }
+  }
+  /* Wrap ring buffer only if it has reached its maximal size. */
+  if (s->ringbuffer_size == (1 << s->window_bits) &&
+      s->pos >= s->ringbuffer_size) {
+    s->pos -= s->ringbuffer_size;
+    s->rb_roundtrips++;
+    s->should_wrap_ringbuffer = (size_t)s->pos != 0 ? 1 : 0;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
+  if (s->should_wrap_ringbuffer) {
+    memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos);
+    s->should_wrap_ringbuffer = 0;
+  }
+}
+
+/* Allocates ring-buffer.
+
+   s->ringbuffer_size MUST be updated by BrotliCalculateRingBufferSize before
+   this function is called.
+
+   Last two bytes of ring-buffer are initialized to 0, so context calculation
+   could be done uniformly for the first two and all other positions. */
+static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer(
+    BrotliDecoderState* s) {
+  uint8_t* old_ringbuffer = s->ringbuffer;
+  if (s->ringbuffer_size == s->new_ringbuffer_size) {
+    return BROTLI_TRUE;
+  }
+
+  s->ringbuffer = (uint8_t*)BROTLI_DECODER_ALLOC(s,
+      (size_t)(s->new_ringbuffer_size) + kRingBufferWriteAheadSlack);
+  if (s->ringbuffer == 0) {
+    /* Restore previous value. */
+    s->ringbuffer = old_ringbuffer;
+    return BROTLI_FALSE;
+  }
+  s->ringbuffer[s->new_ringbuffer_size - 2] = 0;
+  s->ringbuffer[s->new_ringbuffer_size - 1] = 0;
+
+  if (!!old_ringbuffer) {
+    memcpy(s->ringbuffer, old_ringbuffer, (size_t)s->pos);
+    BROTLI_DECODER_FREE(s, old_ringbuffer);
+  }
+
+  s->ringbuffer_size = s->new_ringbuffer_size;
+  s->ringbuffer_mask = s->new_ringbuffer_size - 1;
+  s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
+
+  return BROTLI_TRUE;
+}
+
+static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
+    size_t* available_out, uint8_t** next_out, size_t* total_out,
+    BrotliDecoderState* s) {
+  /* TODO(eustas): avoid allocation for single uncompressed block. */
+  if (!BrotliEnsureRingBuffer(s)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1);
+  }
+
+  /* State machine */
+  for (;;) {
+    switch (s->substate_uncompressed) {
+      case BROTLI_STATE_UNCOMPRESSED_NONE: {
+        int nbytes = (int)BrotliGetRemainingBytes(&s->br);
+        if (nbytes > s->meta_block_remaining_len) {
+          nbytes = s->meta_block_remaining_len;
+        }
+        if (s->pos + nbytes > s->ringbuffer_size) {
+          nbytes = s->ringbuffer_size - s->pos;
+        }
+        /* Copy remaining bytes from s->br.buf_ to ring-buffer. */
+        BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes);
+        s->pos += nbytes;
+        s->meta_block_remaining_len -= nbytes;
+        if (s->pos < 1 << s->window_bits) {
+          if (s->meta_block_remaining_len == 0) {
+            return BROTLI_DECODER_SUCCESS;
+          }
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_UNCOMPRESSED_WRITE: {
+        BrotliDecoderErrorCode result;
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+        break;
+      }
+    }
+  }
+  BROTLI_DCHECK(0);  /* Unreachable */
+}
+
+static BROTLI_BOOL AttachCompoundDictionary(
+    BrotliDecoderState* state, const uint8_t* data, size_t size) {
+  BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
+  if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+  if (!addon) {
+    addon = (BrotliDecoderCompoundDictionary*)BROTLI_DECODER_ALLOC(
+        state, sizeof(BrotliDecoderCompoundDictionary));
+    if (!addon) return BROTLI_FALSE;
+    addon->num_chunks = 0;
+    addon->total_size = 0;
+    addon->br_length = 0;
+    addon->br_copied = 0;
+    addon->block_bits = -1;
+    addon->chunk_offsets[0] = 0;
+    state->compound_dictionary = addon;
+  }
+  if (addon->num_chunks == 15) return BROTLI_FALSE;
+  addon->chunks[addon->num_chunks] = data;
+  addon->num_chunks++;
+  addon->total_size += (int)size;
+  addon->chunk_offsets[addon->num_chunks] = addon->total_size;
+  return BROTLI_TRUE;
+}
+
+static void EnsureCoumpoundDictionaryInitialized(BrotliDecoderState* state) {
+  BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
+  /* 256 = (1 << 8) slots in block map. */
+  int block_bits = 8;
+  int cursor = 0;
+  int index = 0;
+  if (addon->block_bits != -1) return;
+  while (((addon->total_size - 1) >> block_bits) != 0) block_bits++;
+  block_bits -= 8;
+  addon->block_bits = block_bits;
+  while (cursor < addon->total_size) {
+    while (addon->chunk_offsets[index + 1] < cursor) index++;
+    addon->block_map[cursor >> block_bits] = (uint8_t)index;
+    cursor += 1 << block_bits;
+  }
+}
+
+static BROTLI_BOOL InitializeCompoundDictionaryCopy(BrotliDecoderState* s,
+    int address, int length) {
+  BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+  int index;
+  EnsureCoumpoundDictionaryInitialized(s);
+  index = addon->block_map[address >> addon->block_bits];
+  while (address >= addon->chunk_offsets[index + 1]) index++;
+  if (addon->total_size < address + length) return BROTLI_FALSE;
+  /* Update the recent distances cache. */
+  s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+  ++s->dist_rb_idx;
+  s->meta_block_remaining_len -= length;
+  addon->br_index = index;
+  addon->br_offset = address - addon->chunk_offsets[index];
+  addon->br_length = length;
+  addon->br_copied = 0;
+  return BROTLI_TRUE;
+}
+
+static int GetCompoundDictionarySize(BrotliDecoderState* s) {
+  return s->compound_dictionary ? s->compound_dictionary->total_size : 0;
+}
+
+static int CopyFromCompoundDictionary(BrotliDecoderState* s, int pos) {
+  BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+  int orig_pos = pos;
+  while (addon->br_length != addon->br_copied) {
+    uint8_t* copy_dst = &s->ringbuffer[pos];
+    const uint8_t* copy_src =
+        addon->chunks[addon->br_index] + addon->br_offset;
+    int space = s->ringbuffer_size - pos;
+    int rem_chunk_length = (addon->chunk_offsets[addon->br_index + 1] -
+        addon->chunk_offsets[addon->br_index]) - addon->br_offset;
+    int length = addon->br_length - addon->br_copied;
+    if (length > rem_chunk_length) length = rem_chunk_length;
+    if (length > space) length = space;
+    memcpy(copy_dst, copy_src, (size_t)length);
+    pos += length;
+    addon->br_offset += length;
+    addon->br_copied += length;
+    if (length == rem_chunk_length) {
+      addon->br_index++;
+      addon->br_offset = 0;
+    }
+    if (pos == s->ringbuffer_size) break;
+  }
+  return pos - orig_pos;
+}
+
+BROTLI_BOOL BrotliDecoderAttachDictionary(
+    BrotliDecoderState* state, BrotliSharedDictionaryType type,
+    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
+  uint32_t i;
+  uint32_t num_prefix_before = state->dictionary->num_prefix;
+  if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+  if (!BrotliSharedDictionaryAttach(state->dictionary, type, data_size, data)) {
+    return BROTLI_FALSE;
+  }
+  for (i = num_prefix_before; i < state->dictionary->num_prefix; i++) {
+    if (!AttachCompoundDictionary(
+        state, state->dictionary->prefix[i],
+        state->dictionary->prefix_size[i])) {
+      return BROTLI_FALSE;
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+/* Calculates the smallest feasible ring buffer.
+
+   If we know the data size is small, do not allocate more ring buffer
+   size than needed to reduce memory usage.
+
+   When this method is called, metablock size and flags MUST be decoded. */
+static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
+    BrotliDecoderState* s) {
+  int window_size = 1 << s->window_bits;
+  int new_ringbuffer_size = window_size;
+  /* We need at least 2 bytes of ring buffer size to get the last two
+     bytes for context from there */
+  int min_size = s->ringbuffer_size ? s->ringbuffer_size : 1024;
+  int output_size;
+
+  /* If maximum is already reached, no further extension is retired. */
+  if (s->ringbuffer_size == window_size) {
+    return;
+  }
+
+  /* Metadata blocks does not touch ring buffer. */
+  if (s->is_metadata) {
+    return;
+  }
+
+  if (!s->ringbuffer) {
+    output_size = 0;
+  } else {
+    output_size = s->pos;
+  }
+  output_size += s->meta_block_remaining_len;
+  min_size = min_size < output_size ? output_size : min_size;
+
+  if (!!s->canny_ringbuffer_allocation) {
+    /* Reduce ring buffer size to save memory when server is unscrupulous.
+       In worst case memory usage might be 1.5x bigger for a short period of
+       ring buffer reallocation. */
+    while ((new_ringbuffer_size >> 1) >= min_size) {
+      new_ringbuffer_size >>= 1;
+    }
+  }
+
+  s->new_ringbuffer_size = new_ringbuffer_size;
+}
+
+/* Reads 1..256 2-bit context modes. */
+static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  int i = s->loop_counter;
+
+  while (i < (int)s->num_block_types[0]) {
+    uint32_t bits;
+    if (!BrotliSafeReadBits(br, 2, &bits)) {
+      s->loop_counter = i;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    s->context_modes[i] = (uint8_t)bits;
+    BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
+    i++;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
+  int offset = s->distance_code - 3;
+  if (s->distance_code <= 3) {
+    /* Compensate double distance-ring-buffer roll for dictionary items. */
+    s->distance_context = 1 >> s->distance_code;
+    s->distance_code = s->dist_rb[(s->dist_rb_idx - offset) & 3];
+    s->dist_rb_idx -= s->distance_context;
+  } else {
+    int index_delta = 3;
+    int delta;
+    int base = s->distance_code - 10;
+    if (s->distance_code < 10) {
+      base = s->distance_code - 4;
+    } else {
+      index_delta = 2;
+    }
+    /* Unpack one of six 4-bit values. */
+    delta = ((0x605142 >> (4 * base)) & 0xF) - 3;
+    s->distance_code = s->dist_rb[(s->dist_rb_idx + index_delta) & 0x3] + delta;
+    if (s->distance_code <= 0) {
+      /* A huge distance will cause a BROTLI_FAILURE() soon.
+         This is a little faster than failing here. */
+      s->distance_code = 0x7FFFFFFF;
+    }
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits(br, n_bits, val);
+  } else {
+    *val = 0;
+    return BROTLI_TRUE;
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits32(br, n_bits, val);
+  } else {
+    *val = 0;
+    return BROTLI_TRUE;
+  }
+}
+
+/*
+   RFC 7932 Section 4 with "..." shortenings and "[]" emendations.
+
+   Each distance ... is represented with a pair <distance code, extra bits>...
+   The distance code is encoded using a prefix code... The number of extra bits
+   can be 0..24... Two additional parameters: NPOSTFIX (0..3), and ...
+   NDIRECT (0..120) ... are encoded in the meta-block header...
+
+   The first 16 distance symbols ... reference past distances... ring buffer ...
+   Next NDIRECT distance symbols ... represent distances from 1 to NDIRECT...
+   [For] distance symbols 16 + NDIRECT and greater ... the number of extra bits
+   ... is given by the following formula:
+
+   [ xcode = dcode - NDIRECT - 16 ]
+   ndistbits = 1 + [ xcode ] >> (NPOSTFIX + 1)
+
+   ...
+*/
+
+/*
+   RFC 7932 Section 9.2 with "..." shortenings and "[]" emendations.
+
+   ... to get the actual value of the parameter NDIRECT, left-shift this
+   four-bit number by NPOSTFIX bits ...
+*/
+
+/* Remaining formulas from RFC 7932 Section 4 could be rewritten as following:
+
+     alphabet_size = 16 + NDIRECT + (max_distbits << (NPOSTFIX + 1))
+
+     half = ((xcode >> NPOSTFIX) & 1) << ndistbits
+     postfix = xcode & ((1 << NPOSTFIX) - 1)
+     range_start = 2 * (1 << ndistbits - 1 - 1)
+
+     distance = (range_start + half + extra) << NPOSTFIX + postfix + NDIRECT + 1
+
+   NB: ndistbits >= 1 -> range_start >= 0
+   NB: range_start has factor 2, as the range is covered by 2 "halves"
+   NB: extra -1 offset in range_start formula covers the absence of
+       ndistbits = 0 case
+   NB: when NPOSTFIX = 0, NDIRECT is not greater than 15
+
+   In other words, xcode has the following binary structure - XXXHPPP:
+    - XXX represent the number of extra distance bits
+    - H selects upper / lower range of distances
+    - PPP represent "postfix"
+
+  "Regular" distance encoding has NPOSTFIX = 0; omitting the postfix part
+  simplifies distance calculation.
+
+  Using NPOSTFIX > 0 allows cheaper encoding of regular structures, e.g. where
+  most of distances have the same reminder of division by 2/4/8. For example,
+  the table of int32_t values that come from different sources; if it is likely
+  that 3 highest bytes of values from the same source are the same, then
+  copy distance often looks like 4x + y.
+
+  Distance calculation could be rewritten to:
+
+    ndistbits = NDISTBITS(NDIRECT, NPOSTFIX)[dcode]
+    distance = OFFSET(NDIRECT, NPOSTFIX)[dcode] + extra << NPOSTFIX
+
+  NDISTBITS and OFFSET could be pre-calculated, as NDIRECT and NPOSTFIX could
+  change only once per meta-block.
+*/
+
+/* Calculates distance lookup table.
+   NB: it is possible to have all 64 tables precalculated. */
+static void CalculateDistanceLut(BrotliDecoderState* s) {
+  BrotliMetablockBodyArena* b = &s->arena.body;
+  uint32_t npostfix = s->distance_postfix_bits;
+  uint32_t ndirect = s->num_direct_distance_codes;
+  uint32_t alphabet_size_limit = s->distance_hgroup.alphabet_size_limit;
+  uint32_t postfix = 1u << npostfix;
+  uint32_t j;
+  uint32_t bits = 1;
+  uint32_t half = 0;
+
+  /* Skip short codes. */
+  uint32_t i = BROTLI_NUM_DISTANCE_SHORT_CODES;
+
+  /* Fill direct codes. */
+  for (j = 0; j < ndirect; ++j) {
+    b->dist_extra_bits[i] = 0;
+    b->dist_offset[i] = j + 1;
+    ++i;
+  }
+
+  /* Fill regular distance codes. */
+  while (i < alphabet_size_limit) {
+    uint32_t base = ndirect + ((((2 + half) << bits) - 4) << npostfix) + 1;
+    /* Always fill the complete group. */
+    for (j = 0; j < postfix; ++j) {
+      b->dist_extra_bits[i] = (uint8_t)bits;
+      b->dist_offset[i] = base + j;
+      ++i;
+    }
+    bits = bits + half;
+    half = half ^ 1;
+  }
+}
+
+/* Precondition: s->distance_code < 0. */
+static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br) {
+  BrotliMetablockBodyArena* b = &s->arena.body;
+  uint32_t code;
+  uint32_t bits;
+  BrotliBitReaderState memento;
+  HuffmanCode* distance_tree = s->distance_hgroup.htrees[s->dist_htree_index];
+  if (!safe) {
+    code = ReadSymbol(distance_tree, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(distance_tree, br, &code)) {
+      return BROTLI_FALSE;
+    }
+  }
+  --s->block_length[2];
+  /* Convert the distance code to the actual distance by possibly
+     looking up past distances from the s->dist_rb. */
+  s->distance_context = 0;
+  if ((code & ~0xFu) == 0) {
+    s->distance_code = (int)code;
+    TakeDistanceFromRingBuffer(s);
+    return BROTLI_TRUE;
+  }
+  if (!safe) {
+    bits = BrotliReadBits32(br, b->dist_extra_bits[code]);
+  } else {
+    if (!SafeReadBits32(br, b->dist_extra_bits[code], &bits)) {
+      ++s->block_length[2];
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+  s->distance_code =
+      (int)(b->dist_offset[code] + (bits << s->distance_postfix_bits));
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  ReadDistanceInternal(0, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  return ReadDistanceInternal(1, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  uint32_t cmd_code;
+  uint32_t insert_len_extra = 0;
+  uint32_t copy_length;
+  CmdLutElement v;
+  BrotliBitReaderState memento;
+  if (!safe) {
+    cmd_code = ReadSymbol(s->htree_command, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) {
+      return BROTLI_FALSE;
+    }
+  }
+  v = kCmdLut[cmd_code];
+  s->distance_code = v.distance_code;
+  s->distance_context = v.context;
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  *insert_length = v.insert_len_offset;
+  if (!safe) {
+    if (BROTLI_PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
+      insert_len_extra = BrotliReadBits24(br, v.insert_len_extra_bits);
+    }
+    copy_length = BrotliReadBits24(br, v.copy_len_extra_bits);
+  } else {
+    if (!SafeReadBits(br, v.insert_len_extra_bits, &insert_len_extra) ||
+        !SafeReadBits(br, v.copy_len_extra_bits, &copy_length)) {
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+  s->copy_length = (int)copy_length + v.copy_len_offset;
+  --s->block_length[1];
+  *insert_length += (int)insert_len_extra;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  ReadCommandInternal(0, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  return ReadCommandInternal(1, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL CheckInputAmount(
+    int safe, BrotliBitReader* const br, size_t num) {
+  if (safe) {
+    return BROTLI_TRUE;
+  }
+  return BrotliCheckInputAmount(br, num);
+}
+
+#define BROTLI_SAFE(METHOD)                       \
+  {                                               \
+    if (safe) {                                   \
+      if (!Safe##METHOD) {                        \
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT; \
+        goto saveStateAndReturn;                  \
+      }                                           \
+    } else {                                      \
+      METHOD;                                     \
+    }                                             \
+  }
+
+static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
+    int safe, BrotliDecoderState* s) {
+  int pos = s->pos;
+  int i = s->loop_counter;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+  int compound_dictionary_size = GetCompoundDictionarySize(s);
+
+  if (!CheckInputAmount(safe, br, 28)) {
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (!safe) {
+    BROTLI_UNUSED(BrotliWarmupBitReader(br));
+  }
+
+  /* Jump into state machine. */
+  if (s->state == BROTLI_STATE_COMMAND_BEGIN) {
+    goto CommandBegin;
+  } else if (s->state == BROTLI_STATE_COMMAND_INNER) {
+    goto CommandInner;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_DECODE_LITERALS) {
+    goto CommandPostDecodeLiterals;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_WRAP_COPY) {
+    goto CommandPostWrapCopy;
+  } else {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);  /* COV_NF_LINE */
+  }
+
+CommandBegin:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+  }
+  if (!CheckInputAmount(safe, br, 28)) {  /* 156 bits + 7 bytes */
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (BROTLI_PREDICT_FALSE(s->block_length[1] == 0)) {
+    BROTLI_SAFE(DecodeCommandBlockSwitch(s));
+    goto CommandBegin;
+  }
+  /* Read the insert/copy length in the command. */
+  BROTLI_SAFE(ReadCommand(s, br, &i));
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
+              pos, i, s->copy_length));
+  if (i == 0) {
+    goto CommandPostDecodeLiterals;
+  }
+  s->meta_block_remaining_len -= i;
+
+CommandInner:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_INNER;
+  }
+  /* Read the literals in the command. */
+  if (s->trivial_literal_context) {
+    uint32_t bits;
+    uint32_t value;
+    PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+    do {
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+        if (!s->trivial_literal_context) goto CommandInner;
+      }
+      if (!safe) {
+        s->ringbuffer[pos] =
+            (uint8_t)ReadPreloadedSymbol(s->literal_htree, br, &bits, &value);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        s->ringbuffer[pos] = (uint8_t)literal;
+      }
+      --s->block_length[0];
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  } else {
+    uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+    uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+    do {
+      const HuffmanCode* hc;
+      uint8_t context;
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        if (s->trivial_literal_context) goto CommandInner;
+      }
+      context = BROTLI_CONTEXT(p1, p2, s->context_lookup);
+      BROTLI_LOG_UINT(context);
+      hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
+      p2 = p1;
+      if (!safe) {
+        p1 = (uint8_t)ReadSymbol(hc, br);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(hc, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        p1 = (uint8_t)literal;
+      }
+      s->ringbuffer[pos] = p1;
+      --s->block_length[0];
+      BROTLI_LOG_UINT(s->context_map_slice[context]);
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos & s->ringbuffer_mask);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (BROTLI_PREDICT_FALSE(s->meta_block_remaining_len <= 0)) {
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  }
+
+CommandPostDecodeLiterals:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+  }
+  if (s->distance_code >= 0) {
+    /* Implicit distance case. */
+    s->distance_context = s->distance_code ? 0 : 1;
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+  } else {
+    /* Read distance code in the command, unless it was implicitly zero. */
+    if (BROTLI_PREDICT_FALSE(s->block_length[2] == 0)) {
+      BROTLI_SAFE(DecodeDistanceBlockSwitch(s));
+    }
+    BROTLI_SAFE(ReadDistance(s, br));
+  }
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d distance = %d\n",
+              pos, s->distance_code));
+  if (s->max_distance != s->max_backward_distance) {
+    s->max_distance =
+        (pos < s->max_backward_distance) ? pos : s->max_backward_distance;
+  }
+  i = s->copy_length;
+  /* Apply copy of LZ77 back-reference, or static dictionary reference if
+     the distance is larger than the max LZ77 distance */
+  if (s->distance_code > s->max_distance) {
+    /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC.
+       With this choice, no signed overflow can occur after decoding
+       a special distance code (e.g., after adding 3 to the last distance). */
+    if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
+    }
+    if (s->distance_code - s->max_distance - 1 < compound_dictionary_size) {
+      int address = compound_dictionary_size -
+          (s->distance_code - s->max_distance);
+      if (!InitializeCompoundDictionaryCopy(s, address, i)) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY);
+      }
+      pos += CopyFromCompoundDictionary(s, pos);
+      if (pos >= s->ringbuffer_size) {
+        s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+        goto saveStateAndReturn;
+      }
+    } else if (i >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
+               i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+      uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+      uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+      uint8_t dict_id = s->dictionary->context_based ?
+          s->dictionary->context_map[BROTLI_CONTEXT(p1, p2, s->context_lookup)]
+          : 0;
+      const BrotliDictionary* words = s->dictionary->words[dict_id];
+      const BrotliTransforms* transforms = s->dictionary->transforms[dict_id];
+      int offset = (int)words->offsets_by_length[i];
+      uint32_t shift = words->size_bits_by_length[i];
+      int address =
+          s->distance_code - s->max_distance - 1 - compound_dictionary_size;
+      int mask = (int)BitMask(shift);
+      int word_idx = address & mask;
+      int transform_idx = address >> shift;
+      /* Compensate double distance-ring-buffer roll. */
+      s->dist_rb_idx += s->distance_context;
+      offset += word_idx * i;
+      /* If the distance is out of bound, select a next static dictionary if
+         there exist multiple. */
+      if ((transform_idx >= (int)transforms->num_transforms ||
+          words->size_bits_by_length[i] == 0) &&
+          s->dictionary->num_dictionaries > 1) {
+        uint8_t dict_id2;
+        int dist_remaining = address -
+            (int)(((1u << shift) & ~1u)) * (int)transforms->num_transforms;
+        for (dict_id2 = 0; dict_id2 < s->dictionary->num_dictionaries;
+            dict_id2++) {
+          const BrotliDictionary* words2 = s->dictionary->words[dict_id2];
+          if (dict_id2 != dict_id && words2->size_bits_by_length[i] != 0) {
+            const BrotliTransforms* transforms2 =
+                s->dictionary->transforms[dict_id2];
+            uint32_t shift2 = words2->size_bits_by_length[i];
+            int num = (int)((1u << shift2) & ~1u) *
+                (int)transforms2->num_transforms;
+            if (dist_remaining < num) {
+              dict_id = dict_id2;
+              words = words2;
+              transforms = transforms2;
+              address = dist_remaining;
+              shift = shift2;
+              mask = (int)BitMask(shift);
+              word_idx = address & mask;
+              transform_idx = address >> shift;
+              offset = (int)words->offsets_by_length[i] + word_idx * i;
+              break;
+            }
+            dist_remaining -= num;
+          }
+        }
+      }
+      if (BROTLI_PREDICT_FALSE(words->size_bits_by_length[i] == 0)) {
+        BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+            "len: %d bytes left: %d\n",
+            pos, s->distance_code, i, s->meta_block_remaining_len));
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
+      }
+      if (BROTLI_PREDICT_FALSE(!words->data)) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
+      }
+      if (transform_idx < (int)transforms->num_transforms) {
+        const uint8_t* word = &words->data[offset];
+        int len = i;
+        if (transform_idx == transforms->cutOffTransforms[0]) {
+          memcpy(&s->ringbuffer[pos], word, (size_t)len);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n",
+                      len, word));
+        } else {
+          len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len,
+              transforms, transform_idx);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
+                      " transform_idx = %d, transformed: [%.*s]\n",
+                      i, word, transform_idx, len, &s->ringbuffer[pos]));
+          if (len == 0 && s->distance_code <= 120) {
+            BROTLI_LOG(("Invalid length-0 dictionary word after transform\n"));
+            return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
+          }
+        }
+        pos += len;
+        s->meta_block_remaining_len -= len;
+        if (pos >= s->ringbuffer_size) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+          goto saveStateAndReturn;
+        }
+      } else {
+        BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+            "len: %d bytes left: %d\n",
+            pos, s->distance_code, i, s->meta_block_remaining_len));
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
+      }
+    } else {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
+    }
+  } else {
+    int src_start = (pos - s->distance_code) & s->ringbuffer_mask;
+    uint8_t* copy_dst = &s->ringbuffer[pos];
+    uint8_t* copy_src = &s->ringbuffer[src_start];
+    int dst_end = pos + i;
+    int src_end = src_start + i;
+    /* Update the recent distances cache. */
+    s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+    ++s->dist_rb_idx;
+    s->meta_block_remaining_len -= i;
+    /* There are 32+ bytes of slack in the ring-buffer allocation.
+       Also, we have 16 short codes, that make these 16 bytes irrelevant
+       in the ring-buffer. Let's copy over them as a first guess. */
+    memmove16(copy_dst, copy_src);
+    if (src_end > pos && dst_end > src_start) {
+      /* Regions intersect. */
+      goto CommandPostWrapCopy;
+    }
+    if (dst_end >= s->ringbuffer_size || src_end >= s->ringbuffer_size) {
+      /* At least one region wraps. */
+      goto CommandPostWrapCopy;
+    }
+    pos += i;
+    if (i > 16) {
+      if (i > 32) {
+        memcpy(copy_dst + 16, copy_src + 16, (size_t)(i - 16));
+      } else {
+        /* This branch covers about 45% cases.
+           Fixed size short copy allows more compiler optimizations. */
+        memmove16(copy_dst + 16, copy_src + 16);
+      }
+    }
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+CommandPostWrapCopy:
+  {
+    int wrap_guard = s->ringbuffer_size - pos;
+    while (--i >= 0) {
+      s->ringbuffer[pos] =
+          s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(--wrap_guard == 0)) {
+        s->state = BROTLI_STATE_COMMAND_POST_WRITE_2;
+        goto saveStateAndReturn;
+      }
+    }
+  }
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+
+saveStateAndReturn:
+  s->pos = pos;
+  s->loop_counter = i;
+  return result;
+}
+
+#undef BROTLI_SAFE
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode ProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(0, s);
+}
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(1, s);
+}
+
+BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size,
+    const uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(encoded_size)],
+    size_t* decoded_size,
+    uint8_t decoded_buffer[BROTLI_ARRAY_PARAM(*decoded_size)]) {
+  BrotliDecoderState s;
+  BrotliDecoderResult result;
+  size_t total_out = 0;
+  size_t available_in = encoded_size;
+  const uint8_t* next_in = encoded_buffer;
+  size_t available_out = *decoded_size;
+  uint8_t* next_out = decoded_buffer;
+  if (!BrotliDecoderStateInit(&s, 0, 0, 0)) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  result = BrotliDecoderDecompressStream(
+      &s, &available_in, &next_in, &available_out, &next_out, &total_out);
+  *decoded_size = total_out;
+  BrotliDecoderStateCleanup(&s);
+  if (result != BROTLI_DECODER_RESULT_SUCCESS) {
+    result = BROTLI_DECODER_RESULT_ERROR;
+  }
+  return result;
+}
+
+/* Invariant: input stream is never overconsumed:
+    - invalid input implies that the whole stream is invalid -> any amount of
+      input could be read and discarded
+    - when result is "needs more input", then at least one more byte is REQUIRED
+      to complete decoding; all input data MUST be consumed by decoder, so
+      client could swap the input buffer
+    - when result is "needs more output" decoder MUST ensure that it doesn't
+      hold more than 7 bits in bit reader; this saves client from swapping input
+      buffer ahead of time
+    - when result is "success" decoder MUST return all unused data back to input
+      buffer; this is possible because the invariant is held on enter */
+BrotliDecoderResult BrotliDecoderDecompressStream(
+    BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+  size_t input_size = *available_in;
+#define BROTLI_SAVE_ERROR_CODE(code) \
+    SaveErrorCode(s, (code), input_size - *available_in)
+  /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  /* Do not try to process further in a case of unrecoverable error. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  if (*available_out && (!next_out || !*next_out)) {
+    return BROTLI_SAVE_ERROR_CODE(
+        BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
+  }
+  if (!*available_out) next_out = 0;
+  if (s->buffer_length == 0) {  /* Just connect bit reader to input stream. */
+    br->avail_in = *available_in;
+    br->next_in = *next_in;
+  } else {
+    /* At least one byte of input is required. More than one byte of input may
+       be required to complete the transaction -> reading more data must be
+       done in a loop -> do it in a main loop. */
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    br->next_in = &s->buffer.u8[0];
+  }
+  /* State machine */
+  for (;;) {
+    if (result != BROTLI_DECODER_SUCCESS) {
+      /* Error, needs more input/output. */
+      if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+        if (s->ringbuffer != 0) {  /* Pro-actively push output. */
+          BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s,
+              available_out, next_out, total_out, BROTLI_TRUE);
+          /* WriteRingBuffer checks s->meta_block_remaining_len validity. */
+          if ((int)intermediate_result < 0) {
+            result = intermediate_result;
+            break;
+          }
+        }
+        if (s->buffer_length != 0) {  /* Used with internal buffer. */
+          if (br->avail_in == 0) {
+            /* Successfully finished read transaction.
+               Accumulator contains less than 8 bits, because internal buffer
+               is expanded byte-by-byte until it is enough to complete read. */
+            s->buffer_length = 0;
+            /* Switch to input stream and restart. */
+            result = BROTLI_DECODER_SUCCESS;
+            br->avail_in = *available_in;
+            br->next_in = *next_in;
+            continue;
+          } else if (*available_in != 0) {
+            /* Not enough data in buffer, but can take one more byte from
+               input stream. */
+            result = BROTLI_DECODER_SUCCESS;
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            br->avail_in = s->buffer_length;
+            (*next_in)++;
+            (*available_in)--;
+            /* Retry with more data in buffer. */
+            continue;
+          }
+          /* Can't finish reading and no more input. */
+          break;
+        } else {  /* Input stream doesn't contain enough input. */
+          /* Copy tail to internal buffer and return. */
+          *next_in = br->next_in;
+          *available_in = br->avail_in;
+          while (*available_in) {
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            (*next_in)++;
+            (*available_in)--;
+          }
+          break;
+        }
+        /* Unreachable. */
+      }
+
+      /* Fail or needs more output. */
+
+      if (s->buffer_length != 0) {
+        /* Just consumed the buffered input and produced some output. Otherwise
+           it would result in "needs more input". Reset internal buffer. */
+        s->buffer_length = 0;
+      } else {
+        /* Using input stream in last iteration. When decoder switches to input
+           stream it has less than 8 bits in accumulator, so it is safe to
+           return unused accumulator bits there. */
+        BrotliBitReaderUnload(br);
+        *available_in = br->avail_in;
+        *next_in = br->next_in;
+      }
+      break;
+    }
+    switch (s->state) {
+      case BROTLI_STATE_UNINITED:
+        /* Prepare to the first read. */
+        if (!BrotliWarmupBitReader(br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        /* Decode window size. */
+        result = DecodeWindowBits(s, br);  /* Reads 1..8 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        if (s->large_window) {
+          s->state = BROTLI_STATE_LARGE_WINDOW_BITS;
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+        break;
+
+      case BROTLI_STATE_LARGE_WINDOW_BITS:
+        if (!BrotliSafeReadBits(br, 6, &s->window_bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
+            s->window_bits > BROTLI_LARGE_MAX_WBITS) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_INITIALIZE:
+        BROTLI_LOG_UINT(s->window_bits);
+        /* Maximum distance, see section 9.1. of the spec. */
+        s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
+
+        /* Allocate memory for both block_type_trees and block_len_trees. */
+        s->block_type_trees = (HuffmanCode*)BROTLI_DECODER_ALLOC(s,
+            sizeof(HuffmanCode) * 3 *
+                (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26));
+        if (s->block_type_trees == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES);
+          break;
+        }
+        s->block_len_trees =
+            s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
+
+        s->state = BROTLI_STATE_METABLOCK_BEGIN;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_BEGIN:
+        BrotliDecoderStateMetablockBegin(s);
+        BROTLI_LOG_UINT(s->pos);
+        s->state = BROTLI_STATE_METABLOCK_HEADER;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER:
+        result = DecodeMetaBlockLength(s, br);  /* Reads 2 - 31 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        BROTLI_LOG_UINT(s->is_last_metablock);
+        BROTLI_LOG_UINT(s->meta_block_remaining_len);
+        BROTLI_LOG_UINT(s->is_metadata);
+        BROTLI_LOG_UINT(s->is_uncompressed);
+        if (s->is_metadata || s->is_uncompressed) {
+          if (!BrotliJumpToByteBoundary(br)) {
+            result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_1);
+            break;
+          }
+        }
+        if (s->is_metadata) {
+          s->state = BROTLI_STATE_METADATA;
+          break;
+        }
+        if (s->meta_block_remaining_len == 0) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+          break;
+        }
+        BrotliCalculateRingBufferSize(s);
+        if (s->is_uncompressed) {
+          s->state = BROTLI_STATE_UNCOMPRESSED;
+          break;
+        }
+        s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER;
+      /* Fall through. */
+
+      case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER: {
+        BrotliMetablockHeaderArena* h = &s->arena.header;
+        s->loop_counter = 0;
+        /* Initialize compressed metablock header arena. */
+        h->sub_loop_counter = 0;
+        /* Make small negative indexes addressable. */
+        h->symbol_lists =
+            &h->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+        h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_0:
+        if (s->loop_counter >= 3) {
+          s->state = BROTLI_STATE_METABLOCK_HEADER_2;
+          break;
+        }
+        /* Reads 1..11 bits. */
+        result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->num_block_types[s->loop_counter]++;
+        BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]);
+        if (s->num_block_types[s->loop_counter] < 2) {
+          s->loop_counter++;
+          break;
+        }
+        s->state = BROTLI_STATE_HUFFMAN_CODE_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_1: {
+        uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_type_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_2;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_2: {
+        uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_len_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_3: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
+            &s->block_len_trees[tree_offset], br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        BROTLI_LOG_UINT(s->block_length[s->loop_counter]);
+        s->loop_counter++;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
+      }
+
+      case BROTLI_STATE_UNCOMPRESSED: {
+        result = CopyUncompressedBlockToOutput(
+            available_out, next_out, total_out, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_METABLOCK_DONE;
+        break;
+      }
+
+      case BROTLI_STATE_METADATA:
+        for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
+          uint32_t bits;
+          /* Read one byte and ignore it. */
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+            break;
+          }
+        }
+        if (result == BROTLI_DECODER_SUCCESS) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+        }
+        break;
+
+      case BROTLI_STATE_METABLOCK_HEADER_2: {
+        uint32_t bits;
+        if (!BrotliSafeReadBits(br, 6, &bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        s->distance_postfix_bits = bits & BitMask(2);
+        bits >>= 2;
+        s->num_direct_distance_codes = bits << s->distance_postfix_bits;
+        BROTLI_LOG_UINT(s->num_direct_distance_codes);
+        BROTLI_LOG_UINT(s->distance_postfix_bits);
+        s->context_modes =
+            (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]);
+        if (s->context_modes == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES);
+          break;
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_CONTEXT_MODES;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MODES:
+        result = ReadContextModes(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_CONTEXT_MAP_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_1:
+        result = DecodeContextMap(
+            s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
+            &s->num_literal_htrees, &s->context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        DetectTrivialLiteralBlockTypes(s);
+        s->state = BROTLI_STATE_CONTEXT_MAP_2;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_2: {
+        uint32_t npostfix = s->distance_postfix_bits;
+        uint32_t ndirect = s->num_direct_distance_codes;
+        uint32_t distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+            npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
+        uint32_t distance_alphabet_size_limit = distance_alphabet_size_max;
+        BROTLI_BOOL allocation_success = BROTLI_TRUE;
+        if (s->large_window) {
+          BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
+              BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
+          distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+              npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
+          distance_alphabet_size_limit = limit.max_alphabet_size;
+        }
+        result = DecodeContextMap(
+            s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
+            &s->num_dist_htrees, &s->dist_context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
+            BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
+            BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->distance_hgroup, distance_alphabet_size_max,
+            distance_alphabet_size_limit, s->num_dist_htrees);
+        if (!allocation_success) {
+          return BROTLI_SAVE_ERROR_CODE(
+              BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_TREE_GROUP;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_TREE_GROUP: {
+        HuffmanTreeGroup* hgroup = NULL;
+        switch (s->loop_counter) {
+          case 0: hgroup = &s->literal_hgroup; break;
+          case 1: hgroup = &s->insert_copy_hgroup; break;
+          case 2: hgroup = &s->distance_hgroup; break;
+          default: return BROTLI_SAVE_ERROR_CODE(BROTLI_FAILURE(
+              BROTLI_DECODER_ERROR_UNREACHABLE));  /* COV_NF_LINE */
+        }
+        result = HuffmanTreeGroupDecode(hgroup, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->loop_counter++;
+        if (s->loop_counter < 3) {
+          break;
+        }
+        s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY:
+        PrepareLiteralDecoding(s);
+        s->dist_context_map_slice = s->dist_context_map;
+        s->htree_command = s->insert_copy_hgroup.htrees[0];
+        if (!BrotliEnsureRingBuffer(s)) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
+          break;
+        }
+        CalculateDistanceLut(s);
+        s->state = BROTLI_STATE_COMMAND_BEGIN;
+      /* Fall through. */
+
+      case BROTLI_STATE_COMMAND_BEGIN:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_INNER:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRAP_COPY:
+        result = ProcessCommands(s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeProcessCommands(s);
+        }
+        break;
+
+      case BROTLI_STATE_COMMAND_INNER_WRITE:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_1:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_2:
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        WrapRingBuffer(s);
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
+          BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+          if (addon && (addon->br_length != addon->br_copied)) {
+            s->pos += CopyFromCompoundDictionary(s, s->pos);
+            if (s->pos >= s->ringbuffer_size) continue;
+          }
+          if (s->meta_block_remaining_len == 0) {
+            /* Next metablock, if any. */
+            s->state = BROTLI_STATE_METABLOCK_DONE;
+          } else {
+            s->state = BROTLI_STATE_COMMAND_BEGIN;
+          }
+          break;
+        } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
+        } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */
+          if (s->loop_counter == 0) {
+            if (s->meta_block_remaining_len == 0) {
+              s->state = BROTLI_STATE_METABLOCK_DONE;
+            } else {
+              s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+            }
+            break;
+          }
+          s->state = BROTLI_STATE_COMMAND_INNER;
+        }
+        break;
+
+      case BROTLI_STATE_METABLOCK_DONE:
+        if (s->meta_block_remaining_len < 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
+          break;
+        }
+        BrotliDecoderStateCleanupAfterMetablock(s);
+        if (!s->is_last_metablock) {
+          s->state = BROTLI_STATE_METABLOCK_BEGIN;
+          break;
+        }
+        if (!BrotliJumpToByteBoundary(br)) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_2);
+          break;
+        }
+        if (s->buffer_length == 0) {
+          BrotliBitReaderUnload(br);
+          *available_in = br->avail_in;
+          *next_in = br->next_in;
+        }
+        s->state = BROTLI_STATE_DONE;
+      /* Fall through. */
+
+      case BROTLI_STATE_DONE:
+        if (s->ringbuffer != 0) {
+          result = WriteRingBuffer(
+              s, available_out, next_out, total_out, BROTLI_TRUE);
+          if (result != BROTLI_DECODER_SUCCESS) {
+            break;
+          }
+        }
+        return BROTLI_SAVE_ERROR_CODE(result);
+    }
+  }
+  return BROTLI_SAVE_ERROR_CODE(result);
+#undef BROTLI_SAVE_ERROR_CODE
+}
+
+BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) {
+  /* After unrecoverable error remaining output is considered nonsensical. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_FALSE;
+  }
+  return TO_BROTLI_BOOL(
+      s->ringbuffer != 0 && UnwrittenBytes(s, BROTLI_FALSE) != 0);
+}
+
+const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) {
+  uint8_t* result = 0;
+  size_t available_out = *size ? *size : 1u << 24;
+  size_t requested_out = available_out;
+  BrotliDecoderErrorCode status;
+  if ((s->ringbuffer == 0) || ((int)s->error_code < 0)) {
+    *size = 0;
+    return 0;
+  }
+  WrapRingBuffer(s);
+  status = WriteRingBuffer(s, &available_out, &result, 0, BROTLI_TRUE);
+  /* Either WriteRingBuffer returns those "success" codes... */
+  if (status == BROTLI_DECODER_SUCCESS ||
+      status == BROTLI_DECODER_NEEDS_MORE_OUTPUT) {
+    *size = requested_out - available_out;
+  } else {
+    /* ... or stream is broken. Normally this should be caught by
+       BrotliDecoderDecompressStream, this is just a safeguard. */
+    if ((int)status < 0) SaveErrorCode(s, status, 0);
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state != BROTLI_STATE_UNINITED ||
+      BrotliGetAvailableBits(&s->br) != 0);
+}
+
+BROTLI_BOOL BrotliDecoderIsFinished(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state == BROTLI_STATE_DONE) &&
+      !BrotliDecoderHasMoreOutput(s);
+}
+
+BrotliDecoderErrorCode BrotliDecoderGetErrorCode(const BrotliDecoderState* s) {
+  return (BrotliDecoderErrorCode)s->error_code;
+}
+
+const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c) {
+  switch (c) {
+#define BROTLI_ERROR_CODE_CASE_(PREFIX, NAME, CODE) \
+    case BROTLI_DECODER ## PREFIX ## NAME: return #NAME;
+#define BROTLI_NOTHING_
+    BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_CASE_, BROTLI_NOTHING_)
+#undef BROTLI_ERROR_CODE_CASE_
+#undef BROTLI_NOTHING_
+    default: return "INVALID";
+  }
+}
+
+uint32_t BrotliDecoderVersion(void) {
+  return BROTLI_VERSION;
+}
+
+/* Escalate internal functions visibility; for testing purposes only. */
+#if defined(BROTLI_TEST)
+BROTLI_BOOL SafeReadSymbolForTest(
+    const HuffmanCode*, BrotliBitReader*, uint32_t*);
+BROTLI_BOOL SafeReadSymbolForTest(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  return SafeReadSymbol(table, br, result);
+}
+
+void InverseMoveToFrontTransformForTest(
+    uint8_t*, uint32_t, BrotliDecoderState*);
+void InverseMoveToFrontTransformForTest(
+    uint8_t* v, uint32_t l, BrotliDecoderState* s) {
+  InverseMoveToFrontTransform(v, l, s);
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.c b/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.c
new file mode 100644
index 0000000000..3806454864
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.c
@@ -0,0 +1,342 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#include "huffman.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_REVERSE_BITS_MAX 8
+
+#if defined(BROTLI_RBIT)
+#define BROTLI_REVERSE_BITS_BASE \
+  ((sizeof(brotli_reg_t) << 3) - BROTLI_REVERSE_BITS_MAX)
+#else
+#define BROTLI_REVERSE_BITS_BASE 0
+static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = {
+  0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+  0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+  0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+  0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+  0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+  0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+  0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+  0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+  0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+  0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+  0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+  0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+  0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+  0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+  0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+  0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+  0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+  0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+  0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+  0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+  0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+  0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REVERSE_BITS_LOWEST \
+  ((brotli_reg_t)1 << (BROTLI_REVERSE_BITS_MAX - 1 + BROTLI_REVERSE_BITS_BASE))
+
+/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
+   where reverse(value, len) is the bit-wise reversal of the len least
+   significant bits of value. */
+static BROTLI_INLINE brotli_reg_t BrotliReverseBits(brotli_reg_t num) {
+#if defined(BROTLI_RBIT)
+  return BROTLI_RBIT(num);
+#else
+  return kReverseBits[num];
+#endif
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
+                                         int step, int end,
+                                         HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+   of bit lengths for the remaining symbols, |len| is the code length of the
+   next processed symbol. */
+static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
+                                          int len, int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
+                                        const uint8_t* const code_lengths,
+                                        uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_size;         /* size of current table */
+  int sorted[BROTLI_CODE_LENGTH_CODES];  /* symbols sorted by code length */
+  /* offsets in sorted table for each length */
+  int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1];
+  int bits;
+  int bits_count;
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
+                BROTLI_REVERSE_BITS_MAX);
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5);
+
+  /* Generate offsets into sorted symbol table by code length. */
+  symbol = -1;
+  bits = 1;
+  /* BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5 */
+  BROTLI_REPEAT_5({
+    symbol += count[bits];
+    offset[bits] = symbol;
+    bits++;
+  });
+  /* Symbols with code length 0 are placed after all other symbols. */
+  offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
+
+  /* Sort symbols by length, by symbol order within each length. */
+  symbol = BROTLI_CODE_LENGTH_CODES;
+  do {
+    BROTLI_REPEAT_6({
+      symbol--;
+      sorted[offset[code_lengths[symbol]]--] = symbol;
+    });
+  } while (symbol != 0);
+
+  table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH;
+
+  /* Special case: all symbols but one have 0 code length. */
+  if (offset[0] == 0) {
+    code = ConstructHuffmanCode(0, (uint16_t)sorted[0]);
+    for (key = 0; key < (brotli_reg_t)table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  /* Fill in table. */
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  symbol = 0;
+  bits = 1;
+  step = 2;
+  do {
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)sorted[symbol++]);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+}
+
+uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+                                 int root_bits,
+                                 const uint16_t* const symbol_lists,
+                                 uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  HuffmanCode* table;     /* next available space in table */
+  int len;                /* current code length */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  brotli_reg_t sub_key;   /* 2nd level table prefix code */
+  brotli_reg_t sub_key_step;  /* 2nd level table prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_bits;         /* key length of current table */
+  int table_size;         /* size of current table */
+  int total_size;         /* sum of root table size and 2nd level table sizes */
+  int max_length = -1;
+  int bits;
+  int bits_count;
+
+  BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX);
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <=
+                BROTLI_REVERSE_BITS_MAX);
+
+  while (symbol_lists[max_length] == 0xFFFF) max_length--;
+  max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1;
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1 << table_bits;
+  total_size = table_size;
+
+  /* Fill in the root table. Reduce the table size to if possible,
+     and create the repetitions by memcpy. */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1 << table_bits;
+  }
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  bits = 1;
+  step = 2;
+  do {
+    symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= table_bits);
+
+  /* If root_bits != table_bits then replicate to fill the remaining slots. */
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  /* Fill in 2nd level tables and add pointers to root table. */
+  key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
+  sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
+  sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len) {
+    symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (; count[len] != 0; --count[len]) {
+      if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) {
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1 << table_bits;
+        total_size += table_size;
+        sub_key = BrotliReverseBits(key);
+        key += key_step;
+        root_table[sub_key] = ConstructHuffmanCode(
+            (uint8_t)(table_bits + root_bits),
+            (uint16_t)(((size_t)(table - root_table)) - sub_key));
+        sub_key = 0;
+      }
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)(len - root_bits), (uint16_t)symbol);
+      ReplicateValue(
+          &table[BrotliReverseBits(sub_key)], step, table_size, code);
+      sub_key += sub_key_step;
+    }
+    step <<= 1;
+    sub_key_step >>= 1;
+  }
+  return (uint32_t)total_size;
+}
+
+uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+                                       int root_bits,
+                                       uint16_t* val,
+                                       uint32_t num_symbols) {
+  uint32_t table_size = 1;
+  const uint32_t goal_size = 1U << root_bits;
+  switch (num_symbols) {
+    case 0:
+      table[0] = ConstructHuffmanCode(0, val[0]);
+      break;
+    case 1:
+      if (val[1] > val[0]) {
+        table[0] = ConstructHuffmanCode(1, val[0]);
+        table[1] = ConstructHuffmanCode(1, val[1]);
+      } else {
+        table[0] = ConstructHuffmanCode(1, val[1]);
+        table[1] = ConstructHuffmanCode(1, val[0]);
+      }
+      table_size = 2;
+      break;
+    case 2:
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      if (val[2] > val[1]) {
+        table[1] = ConstructHuffmanCode(2, val[1]);
+        table[3] = ConstructHuffmanCode(2, val[2]);
+      } else {
+        table[1] = ConstructHuffmanCode(2, val[2]);
+        table[3] = ConstructHuffmanCode(2, val[1]);
+      }
+      table_size = 4;
+      break;
+    case 3: {
+      int i, k;
+      for (i = 0; i < 3; ++i) {
+        for (k = i + 1; k < 4; ++k) {
+          if (val[k] < val[i]) {
+            uint16_t t = val[k];
+            val[k] = val[i];
+            val[i] = t;
+          }
+        }
+      }
+      table[0] = ConstructHuffmanCode(2, val[0]);
+      table[2] = ConstructHuffmanCode(2, val[1]);
+      table[1] = ConstructHuffmanCode(2, val[2]);
+      table[3] = ConstructHuffmanCode(2, val[3]);
+      table_size = 4;
+      break;
+    }
+    case 4: {
+      if (val[3] < val[2]) {
+        uint16_t t = val[3];
+        val[3] = val[2];
+        val[2] = t;
+      }
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[1] = ConstructHuffmanCode(2, val[1]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      table[3] = ConstructHuffmanCode(3, val[2]);
+      table[4] = ConstructHuffmanCode(1, val[0]);
+      table[5] = ConstructHuffmanCode(2, val[1]);
+      table[6] = ConstructHuffmanCode(1, val[0]);
+      table[7] = ConstructHuffmanCode(3, val[3]);
+      table_size = 8;
+      break;
+    }
+  }
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+  return goal_size;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.h b/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.h
new file mode 100644
index 0000000000..50360962c7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/huffman.h
@@ -0,0 +1,122 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
+
+/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
+#define BROTLI_HUFFMAN_MAX_SIZE_26 396
+/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
+#define BROTLI_HUFFMAN_MAX_SIZE_258 632
+/* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
+#define BROTLI_HUFFMAN_MAX_SIZE_272 646
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
+
+#if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
+  BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
+#define BROTLI_HUFFMAN_CODE_FAST_LOAD
+#endif
+
+#if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
+/* Do not create this struct directly - use the ConstructHuffmanCode
+ * constructor below! */
+typedef struct {
+  uint8_t bits;    /* number of bits used for this symbol */
+  uint16_t value;  /* symbol value or table offset */
+} HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  HuffmanCode h;
+  h.bits = bits;
+  h.value = value;
+  return h;
+}
+
+/* Please use the following macros to optimize HuffmanCode accesses in hot
+ * paths.
+ *
+ * For example, assuming |table| contains a HuffmanCode pointer:
+ *
+ *   BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
+ *   *bits = BROTLI_HC_GET_BITS(table);
+ *   *value = BROTLI_HC_GET_VALUE(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
+ *   *bits2 = BROTLI_HC_GET_BITS(table);
+ *   *value2 = BROTLI_HC_GET_VALUE(table);
+ *
+ */
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value)
+
+#else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  return (HuffmanCode) ((value & 0xFFFF) << 16) | (bits & 0xFF);
+}
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
+#endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
+    const uint8_t* const code_lengths, uint16_t* count);
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+   Returns size of resulting table. */
+BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+    int root_bits, const uint16_t* const symbol_lists, uint16_t* count);
+
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+   interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+   2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+   4 means 4 symbols with lengths [1, 2, 3, 3]. */
+BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+    int root_bits, uint16_t* symbols, uint32_t num_symbols);
+
+/* Contains a collection of Huffman trees with the same alphabet size. */
+/* alphabet_size_limit is needed due to simple codes, since
+   log2(alphabet_size_max) could be greater than log2(alphabet_size_limit). */
+typedef struct {
+  HuffmanCode** htrees;
+  HuffmanCode* codes;
+  uint16_t alphabet_size_max;
+  uint16_t alphabet_size_limit;
+  uint16_t num_htrees;
+} HuffmanTreeGroup;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_HUFFMAN_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/prefix.h b/third-party/libjxl/libjxl/third_party/brotli/c/dec/prefix.h
new file mode 100644
index 0000000000..e8acf07740
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/prefix.h
@@ -0,0 +1,733 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup tables to map prefix codes to value ranges. This is used during
+   decoding of the block lengths, literal insertion lengths and copy lengths. */
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+
+typedef struct CmdLutElement {
+  uint8_t insert_len_extra_bits;
+  uint8_t copy_len_extra_bits;
+  int8_t distance_code;
+  uint8_t context;
+  uint16_t insert_len_offset;
+  uint16_t copy_len_offset;
+} CmdLutElement;
+
+static const CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 },
+  { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 },
+  { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 },
+  { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 },
+  { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 },
+  { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 },
+  { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 },
+  { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 },
+  { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 },
+  { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 },
+  { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 },
+  { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 },
+  { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 },
+  { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 },
+  { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 },
+  { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 },
+  { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 },
+  { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 },
+  { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 },
+  { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 },
+  { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 },
+  { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 },
+  { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 },
+  { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 },
+  { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 },
+  { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 },
+  { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 },
+  { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 },
+  { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 },
+  { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 },
+  { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 },
+  { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 },
+  { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 },
+  { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 },
+  { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 },
+  { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 },
+  { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 },
+  { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 },
+  { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 },
+  { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 },
+  { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 },
+  { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 },
+  { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 },
+  { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 },
+  { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 },
+  { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 },
+  { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 },
+  { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 },
+  { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 },
+  { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 },
+  { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 },
+  { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 },
+  { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 },
+  { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 },
+  { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 },
+  { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 },
+  { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 },
+  { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 },
+  { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 },
+  { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 },
+  { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 },
+  { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 },
+  { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 },
+  { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 },
+  { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 },
+  { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 },
+  { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 },
+  { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 },
+  { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 },
+  { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 },
+  { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 },
+  { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 },
+  { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 },
+  { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 },
+  { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 },
+  { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 },
+};
+
+#endif  /* BROTLI_DEC_PREFIX_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.c b/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.c
new file mode 100644
index 0000000000..08d4c8bf68
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.c
@@ -0,0 +1,179 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "state.h"
+
+#include <stdlib.h>  /* free, malloc */
+
+#include <brotli/types.h>
+
+#include "../common/dictionary.h"
+#include "huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func) {
+    s->alloc_func = BrotliDefaultAllocFunc;
+    s->free_func = BrotliDefaultFreeFunc;
+    s->memory_manager_opaque = 0;
+  } else {
+    s->alloc_func = alloc_func;
+    s->free_func = free_func;
+    s->memory_manager_opaque = opaque;
+  }
+
+  s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
+
+  BrotliInitBitReader(&s->br);
+  s->state = BROTLI_STATE_UNINITED;
+  s->large_window = 0;
+  s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+  s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+  s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+  s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+
+  s->buffer_length = 0;
+  s->loop_counter = 0;
+  s->pos = 0;
+  s->rb_roundtrips = 0;
+  s->partial_pos_out = 0;
+  s->used_input = 0;
+
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+  s->ringbuffer = NULL;
+  s->ringbuffer_size = 0;
+  s->new_ringbuffer_size = 0;
+  s->ringbuffer_mask = 0;
+
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->dist_context_map_slice = NULL;
+
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+
+  s->is_last_metablock = 0;
+  s->is_uncompressed = 0;
+  s->is_metadata = 0;
+  s->should_wrap_ringbuffer = 0;
+  s->canny_ringbuffer_allocation = 1;
+
+  s->window_bits = 0;
+  s->max_distance = 0;
+  s->dist_rb[0] = 16;
+  s->dist_rb[1] = 15;
+  s->dist_rb[2] = 11;
+  s->dist_rb[3] = 4;
+  s->dist_rb_idx = 0;
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+
+  s->mtf_upper_bound = 63;
+
+  s->compound_dictionary = NULL;
+  s->dictionary =
+      BrotliSharedDictionaryCreateInstance(alloc_func, free_func, opaque);
+  if (!s->dictionary) return BROTLI_FALSE;
+
+  return BROTLI_TRUE;
+}
+
+void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
+  s->meta_block_remaining_len = 0;
+  s->block_length[0] = 1U << 24;
+  s->block_length[1] = 1U << 24;
+  s->block_length[2] = 1U << 24;
+  s->num_block_types[0] = 1;
+  s->num_block_types[1] = 1;
+  s->num_block_types[2] = 1;
+  s->block_type_rb[0] = 1;
+  s->block_type_rb[1] = 0;
+  s->block_type_rb[2] = 1;
+  s->block_type_rb[3] = 0;
+  s->block_type_rb[4] = 1;
+  s->block_type_rb[5] = 0;
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->literal_htree = NULL;
+  s->dist_context_map_slice = NULL;
+  s->dist_htree_index = 0;
+  s->context_lookup = NULL;
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+}
+
+void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
+  BROTLI_DECODER_FREE(s, s->context_modes);
+  BROTLI_DECODER_FREE(s, s->context_map);
+  BROTLI_DECODER_FREE(s, s->dist_context_map);
+  BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
+}
+
+#ifdef BROTLI_REPORTING
+/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
+void BrotliDecoderOnFinish(const BrotliDecoderState* s);
+#define BROTLI_DECODER_ON_FINISH(s) BrotliDecoderOnFinish(s);
+#else
+#if !defined(BROTLI_DECODER_ON_FINISH)
+#define BROTLI_DECODER_ON_FINISH(s) (void)(s);
+#endif
+#endif
+
+void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
+  BrotliDecoderStateCleanupAfterMetablock(s);
+
+  BROTLI_DECODER_ON_FINISH(s);
+
+  BROTLI_DECODER_FREE(s, s->compound_dictionary);
+  BrotliSharedDictionaryDestroyInstance(s->dictionary);
+  s->dictionary = NULL;
+  BROTLI_DECODER_FREE(s, s->ringbuffer);
+  BROTLI_DECODER_FREE(s, s->block_type_trees);
+}
+
+BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
+    HuffmanTreeGroup* group, uint32_t alphabet_size_max,
+    uint32_t alphabet_size_limit, uint32_t ntrees) {
+  /* 376 = 256 (1-st level table) + 4 + 7 + 15 + 31 + 63 (2-nd level mix-tables)
+     This number is discovered "unlimited" "enough" calculator; it is actually
+     a wee bigger than required in several cases (especially for alphabets with
+     less than 16 symbols). */
+  const size_t max_table_size = alphabet_size_limit + 376;
+  const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
+  const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
+  /* Pointer alignment is, hopefully, wider than sizeof(HuffmanCode). */
+  HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
+      code_size + htree_size);
+  group->alphabet_size_max = (uint16_t)alphabet_size_max;
+  group->alphabet_size_limit = (uint16_t)alphabet_size_limit;
+  group->num_htrees = (uint16_t)ntrees;
+  group->htrees = p;
+  group->codes = (HuffmanCode*)(&p[ntrees]);
+  return !!p;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.h b/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.h
new file mode 100644
index 0000000000..6ec5c8fc48
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/dec/state.h
@@ -0,0 +1,384 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli state for partial streaming decoding. */
+
+#ifndef BROTLI_DEC_STATE_H_
+#define BROTLI_DEC_STATE_H_
+
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "bit_reader.h"
+#include "huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Graphviz diagram that describes state transitions:
+
+digraph States {
+  graph [compound=true]
+  concentrate=true
+  node [shape="box"]
+
+  UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE}
+  subgraph cluster_metablock_workflow {
+    style="rounded"
+    label=< <B>METABLOCK CYCLE</B> >
+    METABLOCK_BEGIN -> METABLOCK_HEADER
+    METABLOCK_HEADER:sw -> METADATA
+    METABLOCK_HEADER:s -> UNCOMPRESSED
+    METABLOCK_HEADER:se -> METABLOCK_DONE:ne
+    METADATA:s -> METABLOCK_DONE:w
+    UNCOMPRESSED:s -> METABLOCK_DONE:n
+    METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"]
+  }
+  INITIALIZE -> METABLOCK_BEGIN
+  METABLOCK_DONE -> DONE
+
+  subgraph cluster_compressed_metablock {
+    style="rounded"
+    label=< <B>COMPRESSED METABLOCK</B> >
+
+    subgraph cluster_command {
+      style="rounded"
+      label=< <B>HOT LOOP</B> >
+
+      _METABLOCK_DONE_PORT_ [shape=point style=invis]
+
+      {
+        // Set different shape for nodes returning from "compressed metablock".
+        node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS;
+        CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1;
+      }
+
+      CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY
+
+      // IO ("write") nodes are not in the hot loop!
+      CMD_INNER_WRITE [style=dashed]
+      CMD_INNER -> CMD_INNER_WRITE
+      CMD_POST_WRITE_1 [style=dashed]
+      CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1
+      CMD_POST_WRITE_2 [style=dashed]
+      CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2
+
+      CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"]
+      CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS}
+          [constraint="false"]
+      CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"]
+      CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"]
+      CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"]
+      CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"]
+      {rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS;
+          CMD_POST_WRAP_COPY}
+      {rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2}
+
+      {CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} ->
+          _METABLOCK_DONE_PORT_ [style=invis]
+      {CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_
+          [constraint="false" style=invis]
+    }
+
+    BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n
+    HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3
+    HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1
+    CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP
+    TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e
+    BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n
+
+    HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"]
+    {rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3}
+    {rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2;
+        TREE_GROUP}
+  }
+  METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n
+
+  _METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se
+      [constraint="false" ltail=cluster_command]
+
+  UNINITED [shape=Mdiamond];
+  DONE [shape=Msquare];
+}
+
+
+ */
+
+typedef enum {
+  BROTLI_STATE_UNINITED,
+  BROTLI_STATE_LARGE_WINDOW_BITS,
+  BROTLI_STATE_INITIALIZE,
+  BROTLI_STATE_METABLOCK_BEGIN,
+  BROTLI_STATE_METABLOCK_HEADER,
+  BROTLI_STATE_METABLOCK_HEADER_2,
+  BROTLI_STATE_CONTEXT_MODES,
+  BROTLI_STATE_COMMAND_BEGIN,
+  BROTLI_STATE_COMMAND_INNER,
+  BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
+  BROTLI_STATE_COMMAND_POST_WRAP_COPY,
+  BROTLI_STATE_UNCOMPRESSED,
+  BROTLI_STATE_METADATA,
+  BROTLI_STATE_COMMAND_INNER_WRITE,
+  BROTLI_STATE_METABLOCK_DONE,
+  BROTLI_STATE_COMMAND_POST_WRITE_1,
+  BROTLI_STATE_COMMAND_POST_WRITE_2,
+  BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER,
+  BROTLI_STATE_HUFFMAN_CODE_0,
+  BROTLI_STATE_HUFFMAN_CODE_1,
+  BROTLI_STATE_HUFFMAN_CODE_2,
+  BROTLI_STATE_HUFFMAN_CODE_3,
+  BROTLI_STATE_CONTEXT_MAP_1,
+  BROTLI_STATE_CONTEXT_MAP_2,
+  BROTLI_STATE_TREE_GROUP,
+  BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY,
+  BROTLI_STATE_DONE
+} BrotliRunningState;
+
+typedef enum {
+  BROTLI_STATE_METABLOCK_HEADER_NONE,
+  BROTLI_STATE_METABLOCK_HEADER_EMPTY,
+  BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
+  BROTLI_STATE_METABLOCK_HEADER_SIZE,
+  BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
+  BROTLI_STATE_METABLOCK_HEADER_RESERVED,
+  BROTLI_STATE_METABLOCK_HEADER_BYTES,
+  BROTLI_STATE_METABLOCK_HEADER_METADATA
+} BrotliRunningMetablockHeaderState;
+
+typedef enum {
+  BROTLI_STATE_UNCOMPRESSED_NONE,
+  BROTLI_STATE_UNCOMPRESSED_WRITE
+} BrotliRunningUncompressedState;
+
+typedef enum {
+  BROTLI_STATE_TREE_GROUP_NONE,
+  BROTLI_STATE_TREE_GROUP_LOOP
+} BrotliRunningTreeGroupState;
+
+typedef enum {
+  BROTLI_STATE_CONTEXT_MAP_NONE,
+  BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
+  BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
+  BROTLI_STATE_CONTEXT_MAP_DECODE,
+  BROTLI_STATE_CONTEXT_MAP_TRANSFORM
+} BrotliRunningContextMapState;
+
+typedef enum {
+  BROTLI_STATE_HUFFMAN_NONE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_READ,
+  BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
+  BROTLI_STATE_HUFFMAN_COMPLEX,
+  BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
+} BrotliRunningHuffmanState;
+
+typedef enum {
+  BROTLI_STATE_DECODE_UINT8_NONE,
+  BROTLI_STATE_DECODE_UINT8_SHORT,
+  BROTLI_STATE_DECODE_UINT8_LONG
+} BrotliRunningDecodeUint8State;
+
+typedef enum {
+  BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
+  BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
+} BrotliRunningReadBlockLengthState;
+
+/* BrotliDecoderState addon, used for Compound Dictionary functionality. */
+typedef struct BrotliDecoderCompoundDictionary {
+  int num_chunks;
+  int total_size;
+  int br_index;
+  int br_offset;
+  int br_length;
+  int br_copied;
+  const uint8_t* chunks[16];
+  int chunk_offsets[16];
+  int block_bits;
+  uint8_t block_map[256];
+} BrotliDecoderCompoundDictionary;
+
+typedef struct BrotliMetablockHeaderArena {
+  BrotliRunningTreeGroupState substate_tree_group;
+  BrotliRunningContextMapState substate_context_map;
+  BrotliRunningHuffmanState substate_huffman;
+
+  uint32_t sub_loop_counter;
+
+  uint32_t repeat_code_len;
+  uint32_t prev_code_len;
+
+  /* For ReadHuffmanCode. */
+  uint32_t symbol;
+  uint32_t repeat;
+  uint32_t space;
+
+  /* Huffman table for "histograms". */
+  HuffmanCode table[32];
+  /* List of heads of symbol chains. */
+  uint16_t* symbol_lists;
+  /* Storage from symbol_lists. */
+  uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
+                               BROTLI_NUM_COMMAND_SYMBOLS];
+  /* Tails of symbol chains. */
+  int next_symbol[32];
+  uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
+  /* Population counts for the code lengths. */
+  uint16_t code_length_histo[16];
+
+  /* For HuffmanTreeGroupDecode. */
+  int htree_index;
+  HuffmanCode* next;
+
+  /* For DecodeContextMap. */
+  uint32_t context_index;
+  uint32_t max_run_length_prefix;
+  uint32_t code;
+  HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
+} BrotliMetablockHeaderArena;
+
+typedef struct BrotliMetablockBodyArena {
+  uint8_t dist_extra_bits[544];
+  uint32_t dist_offset[544];
+} BrotliMetablockBodyArena;
+
+struct BrotliDecoderStateStruct {
+  BrotliRunningState state;
+
+  /* This counter is reused for several disjoint loops. */
+  int loop_counter;
+
+  BrotliBitReader br;
+
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* memory_manager_opaque;
+
+  /* Temporary storage for remaining input. Brotli stream format is designed in
+     a way, that 64 bits are enough to make progress in decoding. */
+  union {
+    uint64_t u64;
+    uint8_t u8[8];
+  } buffer;
+  uint32_t buffer_length;
+
+  int pos;
+  int max_backward_distance;
+  int max_distance;
+  int ringbuffer_size;
+  int ringbuffer_mask;
+  int dist_rb_idx;
+  int dist_rb[4];
+  int error_code;
+  uint8_t* ringbuffer;
+  uint8_t* ringbuffer_end;
+  HuffmanCode* htree_command;
+  const uint8_t* context_lookup;
+  uint8_t* context_map_slice;
+  uint8_t* dist_context_map_slice;
+
+  /* This ring buffer holds a few past copy distances that will be used by
+     some special distance codes. */
+  HuffmanTreeGroup literal_hgroup;
+  HuffmanTreeGroup insert_copy_hgroup;
+  HuffmanTreeGroup distance_hgroup;
+  HuffmanCode* block_type_trees;
+  HuffmanCode* block_len_trees;
+  /* This is true if the literal context map histogram type always matches the
+     block type. It is then not needed to keep the context (faster decoding). */
+  int trivial_literal_context;
+  /* Distance context is actual after command is decoded and before distance is
+     computed. After distance computation it is used as a temporary variable. */
+  int distance_context;
+  int meta_block_remaining_len;
+  uint32_t block_length_index;
+  uint32_t block_length[3];
+  uint32_t num_block_types[3];
+  uint32_t block_type_rb[6];
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  uint32_t num_dist_htrees;
+  uint8_t* dist_context_map;
+  HuffmanCode* literal_htree;
+  uint8_t dist_htree_index;
+
+  int copy_length;
+  int distance_code;
+
+  /* For partial write operations. */
+  size_t rb_roundtrips;  /* how many times we went around the ring-buffer */
+  size_t partial_pos_out;  /* how much output to the user in total */
+
+  /* For InverseMoveToFrontTransform. */
+  uint32_t mtf_upper_bound;
+  uint32_t mtf[64 + 1];
+
+  /* Less used attributes are at the end of this struct. */
+
+  /* For reporting. */
+  uint64_t used_input;  /* how many bytes of input are consumed */
+
+  /* States inside function calls. */
+  BrotliRunningMetablockHeaderState substate_metablock_header;
+  BrotliRunningUncompressedState substate_uncompressed;
+  BrotliRunningDecodeUint8State substate_decode_uint8;
+  BrotliRunningReadBlockLengthState substate_read_block_length;
+
+  unsigned int is_last_metablock : 1;
+  unsigned int is_uncompressed : 1;
+  unsigned int is_metadata : 1;
+  unsigned int should_wrap_ringbuffer : 1;
+  unsigned int canny_ringbuffer_allocation : 1;
+  unsigned int large_window : 1;
+  unsigned int size_nibbles : 8;
+  uint32_t window_bits;
+
+  int new_ringbuffer_size;
+
+  uint32_t num_literal_htrees;
+  uint8_t* context_map;
+  uint8_t* context_modes;
+
+  BrotliSharedDictionary* dictionary;
+  BrotliDecoderCompoundDictionary* compound_dictionary;
+
+  uint32_t trivial_literal_contexts[8];  /* 256 bits */
+
+  union {
+    BrotliMetablockHeaderArena header;
+    BrotliMetablockBodyArena body;
+  } arena;
+};
+
+typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
+#define BrotliDecoderState BrotliDecoderStateInternal
+
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
+    BrotliDecoderState* s);
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
+    BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max,
+    uint32_t alphabet_size_limit, uint32_t ntrees);
+
+#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
+
+#define BROTLI_DECODER_FREE(S, X) {          \
+  S->free_func(S->memory_manager_opaque, X); \
+  X = NULL;                                  \
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_STATE_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.c
new file mode 100644
index 0000000000..ff5b7becfc
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.c
@@ -0,0 +1,205 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "backward_references.h"
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "command.h"
+#include "compound_dictionary.h"
+#include "dictionary_hash.h"
+#include "encoder_dict.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
+                                                size_t max_distance,
+                                                const int* dist_cache) {
+  if (distance <= max_distance) {
+    size_t distance_plus_3 = distance + 3;
+    size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
+    size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
+    if (distance == (size_t)dist_cache[0]) {
+      return 0;
+    } else if (distance == (size_t)dist_cache[1]) {
+      return 1;
+    } else if (offset0 < 7) {
+      return (0x9750468 >> (4 * offset0)) & 0xF;
+    } else if (offset1 < 7) {
+      return (0xFDB1ACE >> (4 * offset1)) & 0xF;
+    } else if (distance == (size_t)dist_cache[2]) {
+      return 2;
+    } else if (distance == (size_t)dist_cache[3]) {
+      return 3;
+    }
+  }
+  return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+#define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
+
+#define PREFIX() N
+#define ENABLE_COMPOUND_DICTIONARY 0
+
+#define HASHER() H2
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H3
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H4
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H40
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H41
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H42
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H54
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H35
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#undef ENABLE_COMPOUND_DICTIONARY
+#undef PREFIX
+#define PREFIX() D
+#define ENABLE_COMPOUND_DICTIONARY 1
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H40
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H41
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H42
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "backward_references_inc.h"
+#undef HASHER
+
+#undef ENABLE_COMPOUND_DICTIONARY
+#undef PREFIX
+
+#undef EXPORT_FN
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+void BrotliCreateBackwardReferences(size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  if (params->dictionary.compound.num_chunks != 0) {
+    switch (params->hasher.type) {
+#define CASE_(N)                                                    \
+      case N:                                                       \
+        CreateBackwardReferencesDH ## N(num_bytes,                  \
+            position, ringbuffer, ringbuffer_mask,                  \
+            literal_context_lut, params, hasher, dist_cache,        \
+            last_insert_len, commands, num_commands, num_literals); \
+        return;
+      CASE_(5)
+      CASE_(6)
+      CASE_(40)
+      CASE_(41)
+      CASE_(42)
+      CASE_(55)
+      CASE_(65)
+#undef CASE_
+      default:
+        break;
+    }
+  }
+
+  switch (params->hasher.type) {
+#define CASE_(N)                                                  \
+    case N:                                                       \
+      CreateBackwardReferencesNH ## N(num_bytes,                  \
+          position, ringbuffer, ringbuffer_mask,                  \
+          literal_context_lut, params, hasher, dist_cache,        \
+          last_insert_len, commands, num_commands, num_literals); \
+      return;
+    FOR_GENERIC_HASHERS(CASE_)
+#undef CASE_
+    default:
+      break;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.h
new file mode 100644
index 0000000000..20fb98a4d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references.h
@@ -0,0 +1,40 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "command.h"
+#include "hash.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* "commands" points to the next output command to write to, "*num_commands" is
+   initially the total amount of commands output by previous
+   CreateBackwardReferences calls, and must be incremented by the amount written
+   by this call. */
+BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.c
new file mode 100644
index 0000000000..6325032e1c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.c
@@ -0,0 +1,939 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "backward_references_hq.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "command.h"
+#include "compound_dictionary.h"
+#include "encoder_dict.h"
+#include "fast_log.h"
+#include "find_match_length.h"
+#include "literal_cost.h"
+#include "memory.h"
+#include "params.h"
+#include "prefix.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
+#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
+
+static const float kInfinity = 1.7e38f;  /* ~= 2 ^ 127 */
+
+static const uint32_t kDistanceCacheIndex[] = {
+  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+};
+static const int kDistanceCacheOffset[] = {
+  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
+  ZopfliNode stub;
+  size_t i;
+  stub.length = 1;
+  stub.distance = 0;
+  stub.dcode_insert_length = 0;
+  stub.u.cost = kInfinity;
+  for (i = 0; i < length; ++i) array[i] = stub;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
+  return self->length & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
+  const uint32_t modifier = self->length >> 25;
+  return ZopfliNodeCopyLength(self) + 9u - modifier;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
+  return self->distance;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
+  const uint32_t short_code = self->dcode_insert_length >> 27;
+  return short_code == 0 ?
+      ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
+      short_code - 1;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
+  return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
+}
+
+/* Temporary data for ZopfliCostModelSetFromCommands. */
+typedef struct ZopfliCostModelArena {
+  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
+  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+} ZopfliCostModelArena;
+
+/* Histogram based cost model for zopflification. */
+typedef struct ZopfliCostModel {
+  /* The insert and copy length symbols. */
+  float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
+  float* cost_dist_;
+  uint32_t distance_histogram_size;
+  /* Cumulative costs of literals per position in the stream. */
+  float* literal_costs_;
+  float min_cost_cmd_;
+  size_t num_bytes_;
+
+  /* Temporary data. */
+  union {
+    size_t literal_histograms[3 * 256];
+    ZopfliCostModelArena arena;
+  };
+} ZopfliCostModel;
+
+static void InitZopfliCostModel(
+    MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
+    size_t num_bytes) {
+  self->num_bytes_ = num_bytes;
+  self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
+  self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
+  self->distance_histogram_size = dist->alphabet_size_limit;
+  if (BROTLI_IS_OOM(m)) return;
+}
+
+static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
+  BROTLI_FREE(m, self->literal_costs_);
+  BROTLI_FREE(m, self->cost_dist_);
+}
+
+static void SetCost(const uint32_t* histogram, size_t histogram_size,
+                    BROTLI_BOOL literal_histogram, float* cost) {
+  size_t sum = 0;
+  size_t missing_symbol_sum;
+  float log2sum;
+  float missing_symbol_cost;
+  size_t i;
+  for (i = 0; i < histogram_size; i++) {
+    sum += histogram[i];
+  }
+  log2sum = (float)FastLog2(sum);
+  missing_symbol_sum = sum;
+  if (!literal_histogram) {
+    for (i = 0; i < histogram_size; i++) {
+      if (histogram[i] == 0) missing_symbol_sum++;
+    }
+  }
+  missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
+  for (i = 0; i < histogram_size; i++) {
+    if (histogram[i] == 0) {
+      cost[i] = missing_symbol_cost;
+      continue;
+    }
+
+    /* Shannon bits for this symbol. */
+    cost[i] = log2sum - (float)FastLog2(histogram[i]);
+
+    /* Cannot be coded with less than 1 bit */
+    if (cost[i] < 1) cost[i] = 1;
+  }
+}
+
+static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
+                                           size_t position,
+                                           const uint8_t* ringbuffer,
+                                           size_t ringbuffer_mask,
+                                           const Command* commands,
+                                           size_t num_commands,
+                                           size_t last_insert_len) {
+  ZopfliCostModelArena* arena = &self->arena;
+  size_t pos = position - last_insert_len;
+  float min_cost_cmd = kInfinity;
+  size_t i;
+  float* cost_cmd = self->cost_cmd_;
+
+  memset(arena->histogram_literal, 0, sizeof(arena->histogram_literal));
+  memset(arena->histogram_cmd, 0, sizeof(arena->histogram_cmd));
+  memset(arena->histogram_dist, 0, sizeof(arena->histogram_dist));
+
+  for (i = 0; i < num_commands; i++) {
+    size_t inslength = commands[i].insert_len_;
+    size_t copylength = CommandCopyLen(&commands[i]);
+    size_t distcode = commands[i].dist_prefix_ & 0x3FF;
+    size_t cmdcode = commands[i].cmd_prefix_;
+    size_t j;
+
+    arena->histogram_cmd[cmdcode]++;
+    if (cmdcode >= 128) arena->histogram_dist[distcode]++;
+
+    for (j = 0; j < inslength; j++) {
+      arena->histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
+    }
+
+    pos += inslength + copylength;
+  }
+
+  SetCost(arena->histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+          arena->cost_literal);
+  SetCost(arena->histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+          cost_cmd);
+  SetCost(arena->histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
+          self->cost_dist_);
+
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
+  }
+  self->min_cost_cmd_ = min_cost_cmd;
+
+  {
+    float* literal_costs = self->literal_costs_;
+    float literal_carry = 0.0;
+    size_t num_bytes = self->num_bytes_;
+    literal_costs[0] = 0.0;
+    for (i = 0; i < num_bytes; ++i) {
+      literal_carry +=
+          arena->cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+      literal_costs[i + 1] = literal_costs[i] + literal_carry;
+      literal_carry -= literal_costs[i + 1] - literal_costs[i];
+    }
+  }
+}
+
+static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
+                                               size_t position,
+                                               const uint8_t* ringbuffer,
+                                               size_t ringbuffer_mask) {
+  float* literal_costs = self->literal_costs_;
+  float literal_carry = 0.0;
+  float* cost_dist = self->cost_dist_;
+  float* cost_cmd = self->cost_cmd_;
+  size_t num_bytes = self->num_bytes_;
+  size_t i;
+  BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
+                                    ringbuffer, self->literal_histograms,
+                                    &literal_costs[1]);
+  literal_costs[0] = 0.0;
+  for (i = 0; i < num_bytes; ++i) {
+    literal_carry += literal_costs[i + 1];
+    literal_costs[i + 1] = literal_costs[i] + literal_carry;
+    literal_carry -= literal_costs[i + 1] - literal_costs[i];
+  }
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
+  }
+  for (i = 0; i < self->distance_histogram_size; ++i) {
+    cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
+  }
+  self->min_cost_cmd_ = (float)FastLog2(11);
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
+    const ZopfliCostModel* self, uint16_t cmdcode) {
+  return self->cost_cmd_[cmdcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
+    const ZopfliCostModel* self, size_t distcode) {
+  return self->cost_dist_[distcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
+    const ZopfliCostModel* self, size_t from, size_t to) {
+  return self->literal_costs_[to] - self->literal_costs_[from];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
+    const ZopfliCostModel* self) {
+  return self->min_cost_cmd_;
+}
+
+/* REQUIRES: len >= 2, start_pos <= pos */
+/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
+/* Maintains the "ZopfliNode array invariant". */
+static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
+    size_t start_pos, size_t len, size_t len_code, size_t dist,
+    size_t short_code, float cost) {
+  ZopfliNode* next = &nodes[pos + len];
+  next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
+  next->distance = (uint32_t)dist;
+  next->dcode_insert_length = (uint32_t)(
+      (short_code << 27) | (pos - start_pos));
+  next->u.cost = cost;
+}
+
+typedef struct PosData {
+  size_t pos;
+  int distance_cache[4];
+  float costdiff;
+  float cost;
+} PosData;
+
+/* Maintains the smallest 8 cost difference together with their positions */
+typedef struct StartPosQueue {
+  PosData q_[8];
+  size_t idx_;
+} StartPosQueue;
+
+static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
+  self->idx_ = 0;
+}
+
+static size_t StartPosQueueSize(const StartPosQueue* self) {
+  return BROTLI_MIN(size_t, self->idx_, 8);
+}
+
+static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
+  size_t offset = ~(self->idx_++) & 7;
+  size_t len = StartPosQueueSize(self);
+  size_t i;
+  PosData* q = self->q_;
+  q[offset] = *posdata;
+  /* Restore the sorted order. In the list of |len| items at most |len - 1|
+     adjacent element comparisons / swaps are required. */
+  for (i = 1; i < len; ++i) {
+    if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
+      BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
+    }
+    ++offset;
+  }
+}
+
+static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
+  return &self->q_[(k - self->idx_) & 7];
+}
+
+/* Returns the minimum possible copy length that can improve the cost of any */
+/* future position. */
+static size_t ComputeMinimumCopyLength(const float start_cost,
+                                       const ZopfliNode* nodes,
+                                       const size_t num_bytes,
+                                       const size_t pos) {
+  /* Compute the minimum possible cost of reaching any future position. */
+  float min_cost = start_cost;
+  size_t len = 2;
+  size_t next_len_bucket = 4;
+  size_t next_len_offset = 10;
+  while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
+    /* We already reached (pos + len) with no more cost than the minimum
+       possible cost of reaching anything from this pos, so there is no point in
+       looking for lengths <= len. */
+    ++len;
+    if (len == next_len_offset) {
+      /* We reached the next copy length code bucket, so we add one more
+         extra bit to the minimum cost. */
+      min_cost += 1.0f;
+      next_len_offset += next_len_bucket;
+      next_len_bucket *= 2;
+    }
+  }
+  return len;
+}
+
+/* REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static uint32_t ComputeDistanceShortcut(const size_t block_start,
+                                        const size_t pos,
+                                        const size_t max_backward_limit,
+                                        const size_t gap,
+                                        const ZopfliNode* nodes) {
+  const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
+  const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
+  const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
+  /* Since |block_start + pos| is the end position of the command, the copy part
+     starts from |block_start + pos - clen|. Distances that are greater than
+     this or greater than |max_backward_limit| + |gap| are static dictionary
+     references, and do not update the last distances.
+     Also distance code 0 (last distance) does not update the last distances. */
+  if (pos == 0) {
+    return 0;
+  } else if (dist + clen <= block_start + pos + gap &&
+             dist <= max_backward_limit + gap &&
+             ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
+    return (uint32_t)pos;
+  } else {
+    return nodes[pos - clen - ilen].u.shortcut;
+  }
+}
+
+/* Fills in dist_cache[0..3] with the last four distances (as defined by
+   Section 4. of the Spec) that would be used at (block_start + pos) if we
+   used the shortest path of commands from block_start, computed from
+   nodes[0..pos]. The last four distances at block_start are in
+   starting_dist_cache[0..3].
+   REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static void ComputeDistanceCache(const size_t pos,
+                                 const int* starting_dist_cache,
+                                 const ZopfliNode* nodes,
+                                 int* dist_cache) {
+  int idx = 0;
+  size_t p = nodes[pos].u.shortcut;
+  while (idx < 4 && p > 0) {
+    const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
+    const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
+    const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
+    dist_cache[idx++] = (int)dist;
+    /* Because of prerequisite, p >= clen + ilen >= 2. */
+    p = nodes[p - clen - ilen].u.shortcut;
+  }
+  for (; idx < 4; ++idx) {
+    dist_cache[idx] = *starting_dist_cache++;
+  }
+}
+
+/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
+   is eligible. */
+static void EvaluateNode(
+    const size_t block_start, const size_t pos, const size_t max_backward_limit,
+    const size_t gap, const int* starting_dist_cache,
+    const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
+  /* Save cost, because ComputeDistanceCache invalidates it. */
+  float node_cost = nodes[pos].u.cost;
+  nodes[pos].u.shortcut = ComputeDistanceShortcut(
+      block_start, pos, max_backward_limit, gap, nodes);
+  if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
+    PosData posdata;
+    posdata.pos = pos;
+    posdata.cost = node_cost;
+    posdata.costdiff = node_cost -
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+    ComputeDistanceCache(
+        pos, starting_dist_cache, nodes, posdata.distance_cache);
+    StartPosQueuePush(queue, &posdata);
+  }
+}
+
+/* Returns longest copy length. */
+static size_t UpdateNodes(
+    const size_t num_bytes, const size_t block_start, const size_t pos,
+    const uint8_t* ringbuffer, const size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t max_backward_limit,
+    const int* starting_dist_cache, const size_t num_matches,
+    const BackwardMatch* matches, const ZopfliCostModel* model,
+    StartPosQueue* queue, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t cur_ix = block_start + pos;
+  const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
+  const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
+  const size_t dictionary_start = BROTLI_MIN(size_t,
+      cur_ix + stream_offset, max_backward_limit);
+  const size_t max_len = num_bytes - pos;
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  const size_t max_iters = MaxZopfliCandidates(params);
+  size_t min_len;
+  size_t result = 0;
+  size_t k;
+  const CompoundDictionary* addon = &params->dictionary.compound;
+  size_t gap = addon->total_size;
+
+  EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
+      starting_dist_cache, model, queue, nodes);
+
+  {
+    const PosData* posdata = StartPosQueueAt(queue, 0);
+    float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
+        ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
+    min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
+  }
+
+  /* Go over the command starting positions in order of increasing cost
+     difference. */
+  for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
+    const PosData* posdata = StartPosQueueAt(queue, k);
+    const size_t start = posdata->pos;
+    const uint16_t inscode = GetInsertLengthCode(pos - start);
+    const float start_costdiff = posdata->costdiff;
+    const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+
+    /* Look for last distance matches using the distance cache from this
+       starting position. */
+    size_t best_len = min_len - 1;
+    size_t j = 0;
+    for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
+      const size_t idx = kDistanceCacheIndex[j];
+      const size_t backward =
+          (size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
+      size_t prev_ix = cur_ix - backward;
+      size_t len = 0;
+      uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
+      if (cur_ix_masked + best_len > ringbuffer_mask) {
+        break;
+      }
+      if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
+        /* Word dictionary -> ignore. */
+        continue;
+      }
+      if (backward <= max_distance) {
+        /* Regular backward reference. */
+        if (prev_ix >= cur_ix) {
+          continue;
+        }
+
+        prev_ix &= ringbuffer_mask;
+        if (prev_ix + best_len > ringbuffer_mask ||
+            continuation != ringbuffer[prev_ix + best_len]) {
+          continue;
+        }
+        len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
+                                       &ringbuffer[cur_ix_masked],
+                                       max_len);
+      } else if (backward > dictionary_start) {
+        size_t d = 0;
+        size_t offset;
+        size_t limit;
+        const uint8_t* source;
+        offset = dictionary_start + 1 + addon->total_size - 1;
+        while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
+        source = addon->chunk_source[d];
+        offset = offset - addon->chunk_offsets[d] - backward;
+        limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
+        limit = limit > max_len ? max_len : limit;
+        if (best_len >= limit ||
+            continuation != source[offset + best_len]) {
+          continue;
+        }
+        len = FindMatchLengthWithLimit(&source[offset],
+                                       &ringbuffer[cur_ix_masked],
+                                       limit);
+      } else {
+        /* "Gray" area. It is addressable by decoder, but this encoder
+           instance does not have that data -> should not touch it. */
+        continue;
+      }
+      {
+        const float dist_cost = base_cost +
+            ZopfliCostModelGetDistanceCost(model, j);
+        size_t l;
+        for (l = best_len + 1; l <= len; ++l) {
+          const uint16_t copycode = GetCopyLengthCode(l);
+          const uint16_t cmdcode =
+              CombineLengthCodes(inscode, copycode, j == 0);
+          const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
+              (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + l].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
+            result = BROTLI_MAX(size_t, result, l);
+          }
+          best_len = l;
+        }
+      }
+    }
+
+    /* At higher iterations look only for new last distance matches, since
+       looking only for new command start positions with the same distances
+       does not help much. */
+    if (k >= 2) continue;
+
+    {
+      /* Loop through all possible copy lengths at this position. */
+      size_t len = min_len;
+      for (j = 0; j < num_matches; ++j) {
+        BackwardMatch match = matches[j];
+        size_t dist = match.distance;
+        BROTLI_BOOL is_dictionary_match =
+            TO_BROTLI_BOOL(dist > dictionary_start + gap);
+        /* We already tried all possible last distance matches, so we can use
+           normal distance code here. */
+        size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+        uint16_t dist_symbol;
+        uint32_t distextra;
+        uint32_t distnumextra;
+        float dist_cost;
+        size_t max_match_len;
+        PrefixEncodeCopyDistance(
+            dist_code, params->dist.num_direct_distance_codes,
+            params->dist.distance_postfix_bits, &dist_symbol, &distextra);
+        distnumextra = dist_symbol >> 10;
+        dist_cost = base_cost + (float)distnumextra +
+            ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
+
+        /* Try all copy lengths up until the maximum copy length corresponding
+           to this distance. If the distance refers to the static dictionary, or
+           the maximum length is long enough, try only one maximum length. */
+        max_match_len = BackwardMatchLength(&match);
+        if (len < max_match_len &&
+            (is_dictionary_match || max_match_len > max_zopfli_len)) {
+          len = max_match_len;
+        }
+        for (; len <= max_match_len; ++len) {
+          const size_t len_code =
+              is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
+          const uint16_t copycode = GetCopyLengthCode(len_code);
+          const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
+          const float cost = dist_cost + (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + len].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
+            result = BROTLI_MAX(size_t, result, len);
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+static size_t ComputeShortestPathFromNodes(size_t num_bytes,
+    ZopfliNode* nodes) {
+  size_t index = num_bytes;
+  size_t num_commands = 0;
+  while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
+      nodes[index].length == 1) --index;
+  nodes[index].u.next = BROTLI_UINT32_MAX;
+  while (index != 0) {
+    size_t len = ZopfliNodeCommandLength(&nodes[index]);
+    index -= len;
+    nodes[index].u.next = (uint32_t)len;
+    num_commands++;
+  }
+  return num_commands;
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+void BrotliZopfliCreateCommands(const size_t num_bytes,
+    const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
+    size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  size_t pos = 0;
+  uint32_t offset = nodes[0].u.next;
+  size_t i;
+  size_t gap = params->dictionary.compound.total_size;
+  for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
+    const ZopfliNode* next = &nodes[pos + offset];
+    size_t copy_length = ZopfliNodeCopyLength(next);
+    size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
+    pos += insert_length;
+    offset = next->u.next;
+    if (i == 0) {
+      insert_length += *last_insert_len;
+      *last_insert_len = 0;
+    }
+    {
+      size_t distance = ZopfliNodeCopyDistance(next);
+      size_t len_code = ZopfliNodeLengthCode(next);
+      size_t dictionary_start = BROTLI_MIN(size_t,
+          block_start + pos + stream_offset, max_backward_limit);
+      BROTLI_BOOL is_dictionary =
+          TO_BROTLI_BOOL(distance > dictionary_start + gap);
+      size_t dist_code = ZopfliNodeDistanceCode(next);
+      InitCommand(&commands[i], &params->dist, insert_length,
+          copy_length, (int)len_code - (int)copy_length, dist_code);
+
+      if (!is_dictionary && dist_code > 0) {
+        dist_cache[3] = dist_cache[2];
+        dist_cache[2] = dist_cache[1];
+        dist_cache[1] = dist_cache[0];
+        dist_cache[0] = (int)distance;
+      }
+    }
+
+    *num_literals += insert_length;
+    pos += copy_length;
+  }
+  *last_insert_len += num_bytes - pos;
+}
+
+static size_t ZopfliIterate(size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
+    const ZopfliCostModel* model, const uint32_t* num_matches,
+    const BackwardMatch* matches, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  StartPosQueue queue;
+  size_t cur_match_pos = 0;
+  size_t i;
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitStartPosQueue(&queue);
+  for (i = 0; i + 3 < num_bytes; i++) {
+    size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
+        ringbuffer_mask, params, max_backward_limit, dist_cache,
+        num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    cur_match_pos += num_matches[i];
+    if (num_matches[i] == 1 &&
+        BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t,
+          BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
+    }
+    if (skip > 1) {
+      skip--;
+      while (skip) {
+        i++;
+        if (i + 3 >= num_bytes) break;
+        EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
+            dist_cache, model, &queue, nodes);
+        cur_match_pos += num_matches[i];
+        skip--;
+      }
+    }
+  }
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+static void MergeMatches(BackwardMatch* dst,
+    BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
+  while (len1 > 0 && len2 > 0) {
+    size_t l1 = BackwardMatchLength(src1);
+    size_t l2 = BackwardMatchLength(src2);
+    if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
+      *dst++ = *src1++;
+      len1--;
+    } else {
+      *dst++ = *src2++;
+      len2--;
+    }
+  }
+  while (len1-- > 0) *dst++ = *src1++;
+  while (len2-- > 0) *dst++ = *src2++;
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  StartPosQueue queue;
+  BackwardMatch* BROTLI_RESTRICT matches =
+      BROTLI_ALLOC(m, BackwardMatch, 2 * (MAX_NUM_MATCHES_H10 + 64));
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t i;
+  const CompoundDictionary* addon = &params->dictionary.compound;
+  size_t gap = addon->total_size;
+  size_t lz_matches_offset =
+      (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
+  ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
+    return 0;
+  }
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitZopfliCostModel(m, model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return 0;
+  ZopfliCostModelSetFromLiteralCosts(
+      model, position, ringbuffer, ringbuffer_mask);
+  InitStartPosQueue(&queue);
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
+    const size_t pos = position + i;
+    const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    const size_t dictionary_start = BROTLI_MIN(size_t,
+        pos + stream_offset, max_backward_limit);
+    size_t skip;
+    size_t num_matches;
+    int dict_id = 0;
+    if (params->dictionary.contextual.context_based) {
+      uint8_t p1 = pos >= 1 ?
+          ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
+      uint8_t p2 = pos >= 2 ?
+          ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
+      dict_id = params->dictionary.contextual.context_map[
+          BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+    }
+    num_matches = FindAllMatchesH10(&hasher->privat._H10,
+        params->dictionary.contextual.dict[dict_id],
+        ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
+        dictionary_start + gap, params, &matches[lz_matches_offset]);
+    if (addon->num_chunks != 0) {
+      size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
+          ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
+          dictionary_start, params->dist.max_distance,
+          &matches[lz_matches_offset - 64], 64);
+      MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
+          &matches[lz_matches_offset], num_matches);
+      num_matches += cd_matches;
+    }
+    if (num_matches > 0 &&
+        BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
+      matches[0] = matches[num_matches - 1];
+      num_matches = 1;
+    }
+    skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
+        params, max_backward_limit, dist_cache, num_matches, matches, model,
+        &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
+    }
+    if (skip > 1) {
+      /* Add the tail of the copy to the hasher. */
+      StoreRangeH10(&hasher->privat._H10,
+          ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
+          size_t, pos + skip, store_end));
+      skip--;
+      while (skip) {
+        i++;
+        if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
+        EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
+            dist_cache, model, &queue, nodes);
+        skip--;
+      }
+    }
+  }
+  CleanupZopfliCostModel(m, model);
+  BROTLI_FREE(m, model);
+  BROTLI_FREE(m, matches);
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
+  BrotliInitZopfliNodes(nodes, num_bytes + 1);
+  *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
+      position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
+      dist_cache, hasher, nodes);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+      last_insert_len, params, commands, num_literals);
+  BROTLI_FREE(m, nodes);
+}
+
+void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
+  size_t matches_size = 4 * num_bytes;
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t cur_match_pos = 0;
+  size_t i;
+  size_t orig_num_literals;
+  size_t orig_last_insert_len;
+  int orig_dist_cache[4];
+  size_t orig_num_commands;
+  ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
+  ZopfliNode* nodes;
+  BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
+  const CompoundDictionary* addon = &params->dictionary.compound;
+  size_t gap = addon->total_size;
+  size_t shadow_matches =
+      (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
+      BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
+    return;
+  }
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
+    const size_t pos = position + i;
+    size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    size_t dictionary_start = BROTLI_MIN(size_t,
+        pos + stream_offset, max_backward_limit);
+    size_t max_length = num_bytes - i;
+    size_t num_found_matches;
+    size_t cur_match_end;
+    size_t j;
+    int dict_id = 0;
+    if (params->dictionary.contextual.context_based) {
+      uint8_t p1 = pos >= 1 ?
+          ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
+      uint8_t p2 = pos >= 2 ?
+          ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
+      dict_id = params->dictionary.contextual.context_map[
+          BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+    }
+    /* Ensure that we have enough free slots. */
+    BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
+        cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
+    if (BROTLI_IS_OOM(m)) return;
+    num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
+        params->dictionary.contextual.dict[dict_id],
+        ringbuffer, ringbuffer_mask, pos, max_length,
+        max_distance, dictionary_start + gap, params,
+        &matches[cur_match_pos + shadow_matches]);
+    if (addon->num_chunks != 0) {
+      size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
+          ringbuffer, ringbuffer_mask, pos, 3, max_length,
+          dictionary_start, params->dist.max_distance,
+          &matches[cur_match_pos + shadow_matches - 64], 64);
+      MergeMatches(&matches[cur_match_pos],
+          &matches[cur_match_pos + shadow_matches - 64], cd_matches,
+          &matches[cur_match_pos + shadow_matches], num_found_matches);
+      num_found_matches += cd_matches;
+    }
+    cur_match_end = cur_match_pos + num_found_matches;
+    for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
+      BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
+          BackwardMatchLength(&matches[j + 1]));
+    }
+    num_matches[i] = (uint32_t)num_found_matches;
+    if (num_found_matches > 0) {
+      const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
+      if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
+        const size_t skip = match_len - 1;
+        matches[cur_match_pos++] = matches[cur_match_end - 1];
+        num_matches[i] = 1;
+        /* Add the tail of the copy to the hasher. */
+        StoreRangeH10(&hasher->privat._H10,
+                      ringbuffer, ringbuffer_mask, pos + 1,
+                      BROTLI_MIN(size_t, pos + match_len, store_end));
+        memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
+        i += skip;
+      } else {
+        cur_match_pos = cur_match_end;
+      }
+    }
+  }
+  orig_num_literals = *num_literals;
+  orig_last_insert_len = *last_insert_len;
+  memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+  orig_num_commands = *num_commands;
+  nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
+  InitZopfliCostModel(m, model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < 2; i++) {
+    BrotliInitZopfliNodes(nodes, num_bytes + 1);
+    if (i == 0) {
+      ZopfliCostModelSetFromLiteralCosts(
+          model, position, ringbuffer, ringbuffer_mask);
+    } else {
+      ZopfliCostModelSetFromCommands(model, position, ringbuffer,
+          ringbuffer_mask, commands, *num_commands - orig_num_commands,
+          orig_last_insert_len);
+    }
+    *num_commands = orig_num_commands;
+    *num_literals = orig_num_literals;
+    *last_insert_len = orig_last_insert_len;
+    memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
+    *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
+        ringbuffer_mask, params, gap, dist_cache, model, num_matches, matches,
+        nodes);
+    BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+        last_insert_len, params, commands, num_literals);
+  }
+  CleanupZopfliCostModel(m, model);
+  BROTLI_FREE(m, model);
+  BROTLI_FREE(m, nodes);
+  BROTLI_FREE(m, matches);
+  BROTLI_FREE(m, num_matches);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.h
new file mode 100644
index 0000000000..8acf975ab9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_hq.h
@@ -0,0 +1,96 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "command.h"
+#include "hash.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+typedef struct ZopfliNode {
+  /* Best length to get up to this byte (not including this byte itself)
+     highest 7 bit is used to reconstruct the length code. */
+  uint32_t length;
+  /* Distance associated with the length. */
+  uint32_t distance;
+  /* Number of literal inserts before this copy; highest 5 bits contain
+     distance short code + 1 (or zero if no short code). */
+  uint32_t dcode_insert_length;
+
+  /* This union holds information used by dynamic-programming. During forward
+     pass |cost| it used to store the goal function. When node is processed its
+     |cost| is invalidated in favor of |shortcut|. On path back-tracing pass
+     |next| is assigned the offset to next node on the path. */
+  union {
+    /* Smallest cost to get to this byte from the beginning, as found so far. */
+    float cost;
+    /* Offset to the next node on the path. Equals to command_length() of the
+       next node on the path. For last node equals to BROTLI_UINT32_MAX */
+    uint32_t next;
+    /* Node position that provides next distance for distance cache. */
+    uint32_t shortcut;
+  } u;
+} ZopfliNode;
+
+BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
+
+/* Computes the shortest path of commands from position to at most
+   position + num_bytes.
+
+   On return, path->size() is the number of commands found and path[i] is the
+   length of the i-th command (copy length plus insert length).
+   Note that the sum of the lengths of all commands can be less than num_bytes.
+
+   On return, the nodes[0..num_bytes] array will have the following
+   "ZopfliNode array invariant":
+   For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
+     (1) nodes[i].copy_length() >= 2
+     (2) nodes[i].command_length() <= i and
+     (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
+BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
+    MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
+
+BROTLI_INTERNAL void BrotliZopfliCreateCommands(
+    const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
+    int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_inc.h
new file mode 100644
index 0000000000..752c12e9fd
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/backward_references_inc.h
@@ -0,0 +1,189 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: EXPORT_FN, FN */
+
+static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
+    size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  HASHER()* privat = &hasher->privat.FN(_);
+  /* Set maximum distance, see section 9.1. of the spec. */
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t position_offset = params->stream_offset;
+
+  const Command* const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  const size_t pos_end = position + num_bytes;
+  const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
+      position + num_bytes - FN(StoreLookahead)() + 1 : position;
+
+  /* For speed up heuristics for random data. */
+  const size_t random_heuristics_window_size =
+      LiteralSpreeLengthForSparseSearch(params);
+  size_t apply_random_heuristics = position + random_heuristics_window_size;
+  const size_t gap = params->dictionary.compound.total_size;
+
+  /* Minimum score to accept a backward reference. */
+  const score_t kMinScore = BROTLI_SCORE_BASE + 100;
+
+  FN(PrepareDistanceCache)(privat, dist_cache);
+
+  while (position + FN(HashTypeLength)() < pos_end) {
+    size_t max_length = pos_end - position;
+    size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+    size_t dictionary_start = BROTLI_MIN(size_t,
+        position + position_offset, max_backward_limit);
+    HasherSearchResult sr;
+    int dict_id = 0;
+    uint8_t p1 = 0;
+    uint8_t p2 = 0;
+    if (params->dictionary.contextual.context_based) {
+      p1 = position >= 1 ?
+          ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
+      p2 = position >= 2 ?
+          ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
+      dict_id = params->dictionary.contextual.context_map[
+          BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+    }
+    sr.len = 0;
+    sr.len_code_delta = 0;
+    sr.distance = 0;
+    sr.score = kMinScore;
+    FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
+        ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
+        max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
+    if (ENABLE_COMPOUND_DICTIONARY) {
+      LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
+          ringbuffer_mask, dist_cache, position, max_length,
+          dictionary_start, params->dist.max_distance, &sr);
+    }
+    if (sr.score > kMinScore) {
+      /* Found a match. Let's look for something even better ahead. */
+      int delayed_backward_references_in_row = 0;
+      --max_length;
+      for (;; --max_length) {
+        const score_t cost_diff_lazy = 175;
+        HasherSearchResult sr2;
+        sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
+            BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
+        sr2.len_code_delta = 0;
+        sr2.distance = 0;
+        sr2.score = kMinScore;
+        max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
+        dictionary_start = BROTLI_MIN(size_t,
+            position + 1 + position_offset, max_backward_limit);
+        if (params->dictionary.contextual.context_based) {
+          p2 = p1;
+          p1 = ringbuffer[position & ringbuffer_mask];
+          dict_id = params->dictionary.contextual.context_map[
+              BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+        }
+        FN(FindLongestMatch)(privat,
+            params->dictionary.contextual.dict[dict_id],
+            ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
+            max_distance, dictionary_start + gap, params->dist.max_distance,
+            &sr2);
+        if (ENABLE_COMPOUND_DICTIONARY) {
+          LookupCompoundDictionaryMatch(
+              &params->dictionary.compound, ringbuffer,
+              ringbuffer_mask, dist_cache, position + 1, max_length,
+              dictionary_start, params->dist.max_distance, &sr2);
+        }
+        if (sr2.score >= sr.score + cost_diff_lazy) {
+          /* Ok, let's just write one byte for now and start a match from the
+             next byte. */
+          ++position;
+          ++insert_length;
+          sr = sr2;
+          if (++delayed_backward_references_in_row < 4 &&
+              position + FN(HashTypeLength)() < pos_end) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          position + 2 * sr.len + random_heuristics_window_size;
+      dictionary_start = BROTLI_MIN(size_t,
+          position + position_offset, max_backward_limit);
+      {
+        /* The first 16 codes are special short-codes,
+           and the minimum offset is 1. */
+        size_t distance_code = ComputeDistanceCode(
+            sr.distance, dictionary_start + gap, dist_cache);
+        if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
+          dist_cache[3] = dist_cache[2];
+          dist_cache[2] = dist_cache[1];
+          dist_cache[1] = dist_cache[0];
+          dist_cache[0] = (int)sr.distance;
+          FN(PrepareDistanceCache)(privat, dist_cache);
+        }
+        InitCommand(commands++, &params->dist, insert_length,
+            sr.len, sr.len_code_delta, distance_code);
+      }
+      *num_literals += insert_length;
+      insert_length = 0;
+      /* Put the hash keys into the table, if there are enough bytes left.
+         Depending on the hasher implementation, it can push all positions
+         in the given range or only a subset of them.
+         Avoid hash poisoning with RLE data. */
+      {
+        size_t range_start = position + 2;
+        size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
+        if (sr.distance < (sr.len >> 2)) {
+          range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
+              range_start, position + sr.len - (sr.distance << 2)));
+        }
+        FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
+                       range_end);
+      }
+      position += sr.len;
+    } else {
+      ++insert_length;
+      ++position;
+      /* If we have not seen matches for a long time, we can skip some
+         match lookups. Unsuccessful match lookups are very very expensive
+         and this kind of a heuristic speeds up compression quite
+         a lot. */
+      if (position > apply_random_heuristics) {
+        /* Going through uncompressible data, jump. */
+        if (position >
+            apply_random_heuristics + 4 * random_heuristics_window_size) {
+          /* It is quite a long time since we saw a copy, so we assume
+             that this data is not compressible, and store hashes less
+             often. Hashes of non compressible data are less likely to
+             turn out to be useful in the future, too, so we store less of
+             them to not to flood out the hash table of good compressible
+             data. */
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
+          for (; position < pos_jump; position += 4) {
+            FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
+            insert_length += 4;
+          }
+        } else {
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
+          for (; position < pos_jump; position += 2) {
+            FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += pos_end - position;
+  *last_insert_len = insert_length;
+  *num_commands += (size_t)(commands - orig_commands);
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.c
new file mode 100644
index 0000000000..6b7c904ced
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.c
@@ -0,0 +1,36 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#include "bit_cost.h"
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "fast_log.h"
+#include "histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#include "bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.h
new file mode 100644
index 0000000000..f6f2773994
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost.h
@@ -0,0 +1,64 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "fast_log.h"
+#include "histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE double ShannonEntropy(
+    const uint32_t* population, size_t size, size_t* total) {
+  size_t sum = 0;
+  double retval = 0;
+  const uint32_t* population_end = population + size;
+  size_t p;
+  if (size & 1) {
+    goto odd_number_of_elements_left;
+  }
+  while (population < population_end) {
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+ odd_number_of_elements_left:
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+  }
+  if (sum) retval += (double)sum * FastLog2(sum);
+  *total = sum;
+  return retval;
+}
+
+static BROTLI_INLINE double BitsEntropy(
+    const uint32_t* population, size_t size) {
+  size_t sum;
+  double retval = ShannonEntropy(population, size, &sum);
+  if (retval < (double)sum) {
+    /* At least one bit per literal is needed. */
+    retval = (double)sum;
+  }
+  return retval;
+}
+
+BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
+BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
+BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BIT_COST_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost_inc.h
new file mode 100644
index 0000000000..453c226042
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/bit_cost_inc.h
@@ -0,0 +1,127 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+double FN(BrotliPopulationCost)(const HistogramType* histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  const size_t data_size = FN(HistogramDataSize)();
+  int count = 0;
+  size_t s[5];
+  double bits = 0.0;
+  size_t i;
+  if (histogram->total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  for (i = 0; i < data_size; ++i) {
+    if (histogram->data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram->data_[s[0]];
+    const uint32_t histo1 = histogram->data_[s[1]];
+    const uint32_t histo2 = histogram->data_[s[2]];
+    const uint32_t histomax =
+        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    uint32_t h23;
+    uint32_t histomax;
+    for (i = 0; i < 4; ++i) {
+      histo[i] = histogram->data_[s[i]];
+    }
+    /* Sort */
+    for (i = 0; i < 4; ++i) {
+      size_t j;
+      for (j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          BROTLI_SWAP(uint32_t, histo, j, i);
+        }
+      }
+    }
+    h23 = histo[2] + histo[3];
+    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  {
+    /* In this loop we compute the entropy of the histogram and simultaneously
+       build a simplified histogram of the code length codes where we use the
+       zero repeat code 17, but we don't use the non-zero repeat code 16. */
+    size_t max_depth = 1;
+    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
+    const double log2total = FastLog2(histogram->total_count_);
+    for (i = 0; i < data_size;) {
+      if (histogram->data_[i] > 0) {
+        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+                                    = log2(total_count) - log2(count(symbol)) */
+        double log2p = log2total - FastLog2(histogram->data_[i]);
+        /* Approximate the bit depth by round(-log2(P(symbol))) */
+        size_t depth = (size_t)(log2p + 0.5);
+        bits += histogram->data_[i] * log2p;
+        if (depth > 15) {
+          depth = 15;
+        }
+        if (depth > max_depth) {
+          max_depth = depth;
+        }
+        ++depth_histo[depth];
+        ++i;
+      } else {
+        /* Compute the run length of zeros and add the appropriate number of 0
+           and 17 code length codes to the code length code histogram. */
+        uint32_t reps = 1;
+        size_t k;
+        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
+          ++reps;
+        }
+        i += reps;
+        if (i == data_size) {
+          /* Don't add any cost for the last zero run, since these are encoded
+             only implicitly. */
+          break;
+        }
+        if (reps < 3) {
+          depth_histo[0] += reps;
+        } else {
+          reps -= 2;
+          while (reps > 0) {
+            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
+            /* Add the 3 extra bits for the 17 code length code. */
+            bits += 3;
+            reps >>= 3;
+          }
+        }
+      }
+    }
+    /* Add the estimated encoding cost of the code length code histogram. */
+    bits += (double)(18 + 2 * max_depth);
+    /* Add the entropy of the code length code histogram. */
+    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
+  }
+  return bits;
+}
+
+#undef HistogramType
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_encoder_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_encoder_inc.h
new file mode 100644
index 0000000000..8cbd5eac67
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_encoder_inc.h
@@ -0,0 +1,34 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Creates entropy codes for all block types and stores them to the bit
+   stream. */
+static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
+    const HistogramType* histograms, const size_t histograms_size,
+    const size_t alphabet_size, HuffmanTree* tree,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t table_size = histograms_size * self->histogram_length_;
+  self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
+  self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    size_t i;
+    for (i = 0; i < histograms_size; ++i) {
+      size_t ix = i * self->histogram_length_;
+      BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
+          alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
+          storage_ix, storage);
+    }
+  }
+}
+
+#undef HistogramType
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.c
new file mode 100644
index 0000000000..eba1b691e5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.c
@@ -0,0 +1,217 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#include "block_splitter.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/platform.h"
+#include "bit_cost.h"
+#include "cluster.h"
+#include "command.h"
+#include "fast_log.h"
+#include "histogram.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const size_t kMaxLiteralHistograms = 100;
+static const size_t kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 28.1;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const size_t kLiteralStrideLength = 70;
+static const size_t kCommandStrideLength = 40;
+static const size_t kDistanceStrideLength = 40;
+static const size_t kSymbolsPerLiteralHistogram = 544;
+static const size_t kSymbolsPerCommandHistogram = 530;
+static const size_t kSymbolsPerDistanceHistogram = 544;
+static const size_t kMinLengthForBlockSplitting = 128;
+static const size_t kIterMulForRefining = 2;
+static const size_t kMinItersForRefining = 100;
+
+static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
+  /* Count how many we have. */
+  size_t total_length = 0;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    total_length += cmds[i].insert_len_;
+  }
+  return total_length;
+}
+
+static void CopyLiteralsToByteArray(const Command* cmds,
+                                    const size_t num_commands,
+                                    const uint8_t* data,
+                                    const size_t offset,
+                                    const size_t mask,
+                                    uint8_t* literals) {
+  size_t pos = 0;
+  size_t from_pos = offset & mask;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    size_t insert_len = cmds[i].insert_len_;
+    if (from_pos + insert_len > mask) {
+      size_t head_size = mask + 1 - from_pos;
+      memcpy(literals + pos, data + from_pos, head_size);
+      from_pos = 0;
+      pos += head_size;
+      insert_len -= head_size;
+    }
+    if (insert_len > 0) {
+      memcpy(literals + pos, data + from_pos, insert_len);
+      pos += insert_len;
+    }
+    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
+  }
+}
+
+static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
+  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
+  *seed *= 16807U;
+  return *seed;
+}
+
+static BROTLI_INLINE double BitCost(size_t count) {
+  return count == 0 ? -2.0 : FastLog2(count);
+}
+
+#define HISTOGRAMS_PER_BATCH 64
+#define CLUSTERS_PER_BATCH 16
+
+#define FN(X) X ## Literal
+#define DataType uint8_t
+/* NOLINTNEXTLINE(build/include) */
+#include "block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+/* NOLINTNEXTLINE(build/include) */
+#include "block_splitter_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+void BrotliInitBlockSplit(BlockSplit* self) {
+  self->num_types = 0;
+  self->num_blocks = 0;
+  self->types = 0;
+  self->lengths = 0;
+  self->types_alloc_size = 0;
+  self->lengths_alloc_size = 0;
+}
+
+void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
+  BROTLI_FREE(m, self->types);
+  BROTLI_FREE(m, self->lengths);
+}
+
+/* Extracts literals, command distance and prefix codes, then applies
+ * SplitByteVector to create partitioning. */
+void BrotliSplitBlock(MemoryManager* m,
+                      const Command* cmds,
+                      const size_t num_commands,
+                      const uint8_t* data,
+                      const size_t pos,
+                      const size_t mask,
+                      const BrotliEncoderParams* params,
+                      BlockSplit* literal_split,
+                      BlockSplit* insert_and_copy_split,
+                      BlockSplit* dist_split) {
+  {
+    size_t literals_count = CountLiterals(cmds, num_commands);
+    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
+    /* Create a continuous array of literals. */
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
+    /* Create the block split on the array of literals.
+     * Literal histograms can have alphabet size up to 256.
+     * Though, to accomodate context modeling, less than half of maximum size
+     * is allowed. */
+    SplitByteVectorLiteral(
+        m, literals, literals_count,
+        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
+        literal_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, literals);
+    /* NB: this might be a good place for injecting extra splitting without
+     *     increasing encoder complexity; however, output parition would be less
+     *     optimal than one produced with forced splitting inside
+     *     SplitByteVector (FindBlocks / ClusterBlocks). */
+  }
+
+  {
+    /* Compute prefix codes for commands. */
+    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
+    for (i = 0; i < num_commands; ++i) {
+      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
+    }
+    /* Create the block split on the array of command prefixes. */
+    SplitByteVectorCommand(
+        m, insert_and_copy_codes, num_commands,
+        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kCommandBlockSwitchCost, params,
+        insert_and_copy_split);
+    if (BROTLI_IS_OOM(m)) return;
+    /* TODO(eustas): reuse for distances? */
+    BROTLI_FREE(m, insert_and_copy_codes);
+  }
+
+  {
+    /* Create a continuous array of distance prefixes. */
+    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t j = 0;
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
+    for (i = 0; i < num_commands; ++i) {
+      const Command* cmd = &cmds[i];
+      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
+      }
+    }
+    /* Create the block split on the array of distance prefixes. */
+    SplitByteVectorDistance(
+        m, distance_prefixes, j,
+        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+        kDistanceStrideLength, kDistanceBlockSwitchCost, params,
+        dist_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, distance_prefixes);
+  }
+}
+
+#if defined(BROTLI_TEST)
+size_t CountLiteralsForTest(const Command*, const size_t);
+size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
+  return CountLiterals(cmds, num_commands);
+}
+
+void CopyLiteralsToByteArrayForTest(const Command*,
+    const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
+void CopyLiteralsToByteArrayForTest(const Command* cmds,
+    const size_t num_commands, const uint8_t* data, const size_t offset,
+    const size_t mask, uint8_t* literals) {
+  CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.h
new file mode 100644
index 0000000000..6046b90a5d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter.h
@@ -0,0 +1,52 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "command.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplit {
+  size_t num_types;  /* Amount of distinct types */
+  size_t num_blocks;  /* Amount of values in types and length */
+  uint8_t* types;
+  uint32_t* lengths;
+
+  size_t types_alloc_size;
+  size_t lengths_alloc_size;
+} BlockSplit;
+
+BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
+BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
+                                             BlockSplit* self);
+
+BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
+                                      const Command* cmds,
+                                      const size_t num_commands,
+                                      const uint8_t* data,
+                                      const size_t offset,
+                                      const size_t mask,
+                                      const BrotliEncoderParams* params,
+                                      BlockSplit* literal_split,
+                                      BlockSplit* insert_and_copy_split,
+                                      BlockSplit* dist_split);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter_inc.h
new file mode 100644
index 0000000000..aa40bfd329
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/block_splitter_inc.h
@@ -0,0 +1,481 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, DataType */
+
+#define HistogramType FN(Histogram)
+
+static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
+                                    size_t stride,
+                                    size_t num_histograms,
+                                    HistogramType* histograms) {
+  uint32_t seed = 7;
+  size_t block_length = length / num_histograms;
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
+  }
+}
+
+static void FN(RandomSample)(uint32_t* seed,
+                             const DataType* data,
+                             size_t length,
+                             size_t stride,
+                             HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  FN(HistogramAddVector)(sample, data + pos, stride);
+}
+
+static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
+                                   size_t stride,
+                                   size_t num_histograms,
+                                   HistogramType* histograms,
+                                   HistogramType* tmp) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  uint32_t seed = 7;
+  size_t iter;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (iter = 0; iter < iters; ++iter) {
+    FN(HistogramClear)(tmp);
+    FN(RandomSample)(&seed, data, length, stride, tmp);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
+  }
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+static size_t FN(FindBlocks)(const DataType* data, const size_t length,
+                             const double block_switch_bitcost,
+                             const size_t num_histograms,
+                             const HistogramType* histograms,
+                             double* insert_cost,
+                             double* cost,
+                             uint8_t* switch_signal,
+                             uint8_t* block_id) {
+  const size_t alphabet_size = FN(HistogramDataSize)();
+  const size_t bitmap_len = (num_histograms + 7) >> 3;
+  size_t num_blocks = 1;
+  size_t byte_ix;
+  size_t i;
+  size_t j;
+  BROTLI_DCHECK(num_histograms <= 256);
+
+  /* Trivial case: single historgram -> single block type. */
+  if (num_histograms <= 1) {
+    for (i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+
+  /* Fill bitcost for each symbol of all histograms.
+   * Non-existing symbol cost: 2 + log2(total_count).
+   * Regular symbol cost: -log2(symbol_count / total_count). */
+  memset(insert_cost, 0,
+         sizeof(insert_cost[0]) * alphabet_size * num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
+  }
+  for (i = alphabet_size; i != 0;) {
+    /* Reverse order to use the 0-th row as a temporary storage. */
+    --i;
+    for (j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+
+  /* After each iteration of this loop, cost[k] will contain the difference
+     between the minimum cost of arriving at the current byte position using
+     entropy code k, and the minimum cost of arriving at the current byte
+     position. This difference is capped at the block switch cost, and if it
+     reaches block switch cost, it means that when we trace back from the last
+     position, we need to switch here. */
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmap_len);
+  for (byte_ix = 0; byte_ix < length; ++byte_ix) {
+    size_t ix = byte_ix * bitmap_len;
+    size_t symbol = data[byte_ix];
+    size_t insert_cost_ix = symbol * num_histograms;
+    double min_cost = 1e99;
+    double block_switch_cost = block_switch_bitcost;
+    size_t k;
+    for (k = 0; k < num_histograms; ++k) {
+      /* We are coding the symbol with entropy code k. */
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = (uint8_t)k;
+      }
+    }
+    /* More blocks for the beginning. */
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
+    }
+    for (k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        const uint8_t mask = (uint8_t)(1u << (k & 7));
+        cost[k] = block_switch_cost;
+        BROTLI_DCHECK((k >> 3) < bitmap_len);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+
+  byte_ix = length - 1;
+  {  /* Trace back from the last position and switch at the marked places. */
+    size_t ix = byte_ix * bitmap_len;
+    uint8_t cur_id = block_id[byte_ix];
+    while (byte_ix > 0) {
+      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
+      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmap_len);
+      --byte_ix;
+      ix -= bitmap_len;
+      if (switch_signal[ix + (cur_id >> 3)] & mask) {
+        if (cur_id != block_id[byte_ix]) {
+          cur_id = block_id[byte_ix];
+          ++num_blocks;
+        }
+      }
+      block_id[byte_ix] = cur_id;
+    }
+  }
+  return num_blocks;
+}
+
+static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
+                                uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  uint16_t next_id = 0;
+  size_t i;
+  for (i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  for (i = 0; i < length; ++i) {
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (i = 0; i < length; ++i) {
+    block_ids[i] = (uint8_t)new_id[block_ids[i]];
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+  }
+  BROTLI_DCHECK(next_id <= num_histograms);
+  return next_id;
+}
+
+static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
+                                     const uint8_t* block_ids,
+                                     const size_t num_histograms,
+                                     HistogramType* histograms) {
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < length; ++i) {
+    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
+  }
+}
+
+/* Given the initial partitioning build partitioning with limited number
+ * of histograms (and block types). */
+static void FN(ClusterBlocks)(MemoryManager* m,
+                              const DataType* data, const size_t length,
+                              const size_t num_blocks,
+                              uint8_t* block_ids,
+                              BlockSplit* split) {
+  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* u32 =
+      BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
+  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
+      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
+  size_t all_histograms_size = 0;
+  size_t all_histograms_capacity = expected_num_clusters;
+  HistogramType* all_histograms =
+      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
+  size_t cluster_size_size = 0;
+  size_t cluster_size_capacity = expected_num_clusters;
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
+  size_t num_clusters = 0;
+  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
+      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
+  size_t max_num_pairs =
+      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
+  size_t pairs_capacity = max_num_pairs + 1;
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
+  size_t pos = 0;
+  uint32_t* clusters;
+  size_t num_final_clusters;
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index;
+  size_t i;
+  uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const block_lengths =
+      u32 + 4 * HISTOGRAMS_PER_BATCH;
+  /* TODO(eustas): move to arena? */
+  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
+      BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
+      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
+      BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
+    return;
+  }
+
+  memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
+
+  /* Calculate block lengths (convert repeating values -> series length). */
+  {
+    size_t block_idx = 0;
+    for (i = 0; i < length; ++i) {
+      BROTLI_DCHECK(block_idx < num_blocks);
+      ++block_lengths[block_idx];
+      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+        ++block_idx;
+      }
+    }
+    BROTLI_DCHECK(block_idx == num_blocks);
+  }
+
+  /* Pre-cluster blocks (cluster batches). */
+  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
+    const size_t num_to_combine =
+        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      size_t k;
+      size_t block_length = block_lengths[i + j];
+      FN(HistogramClear)(&histograms[j]);
+      for (k = 0; k < block_length; ++k) {
+        FN(HistogramAdd)(&histograms[j], data[pos++]);
+      }
+      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
+      new_clusters[j] = (uint32_t)j;
+      symbols[j] = (uint32_t)j;
+      sizes[j] = 1;
+    }
+    num_new_clusters = FN(BrotliHistogramCombine)(
+        histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
+        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
+    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
+        all_histograms_capacity, all_histograms_size + num_new_clusters);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
+        cluster_size_capacity, cluster_size_size + num_new_clusters);
+    if (BROTLI_IS_OOM(m)) return;
+    for (j = 0; j < num_new_clusters; ++j) {
+      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
+      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
+      remap[new_clusters[j]] = (uint32_t)j;
+    }
+    for (j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    BROTLI_DCHECK(num_clusters == cluster_size_size);
+    BROTLI_DCHECK(num_clusters == all_histograms_size);
+  }
+  BROTLI_FREE(m, histograms);
+
+  /* Final clustering. */
+  max_num_pairs =
+      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
+  if (pairs_capacity < max_num_pairs + 1) {
+    BROTLI_FREE(m, pairs);
+    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
+  }
+  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
+  for (i = 0; i < num_clusters; ++i) {
+    clusters[i] = (uint32_t)i;
+  }
+  num_final_clusters = FN(BrotliHistogramCombine)(
+      all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
+      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
+      max_num_pairs);
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+
+  /* Assign blocks to final histograms. */
+  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
+  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
+  pos = 0;
+  {
+    uint32_t next_index = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      size_t j;
+      uint32_t best_out;
+      double best_bits;
+      FN(HistogramClear)(tmp);
+      for (j = 0; j < block_lengths[i]; ++j) {
+        FN(HistogramAdd)(tmp, data[pos++]);
+      }
+      /* Among equally good histograms prefer last used. */
+      /* TODO(eustas): should we give a block-switch discount here? */
+      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
+      best_bits = FN(BrotliHistogramBitCostDistance)(
+          tmp, &all_histograms[best_out], tmp + 1);
+      for (j = 0; j < num_final_clusters; ++j) {
+        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
+            tmp, &all_histograms[clusters[j]], tmp + 1);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = clusters[j];
+        }
+      }
+      histogram_symbols[i] = best_out;
+      if (new_index[best_out] == kInvalidIndex) {
+        new_index[best_out] = next_index++;
+      }
+    }
+  }
+  BROTLI_FREE(m, tmp);
+  BROTLI_FREE(m, clusters);
+  BROTLI_FREE(m, all_histograms);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+
+  /* Rewrite final assignment to block-split. There might be less blocks
+   * than |num_blocks| due to clustering. */
+  {
+    uint32_t cur_length = 0;
+    size_t block_idx = 0;
+    uint8_t max_type = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      cur_length += block_lengths[i];
+      if (i + 1 == num_blocks ||
+          histogram_symbols[i] != histogram_symbols[i + 1]) {
+        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
+        split->types[block_idx] = id;
+        split->lengths[block_idx] = cur_length;
+        max_type = BROTLI_MAX(uint8_t, max_type, id);
+        cur_length = 0;
+        ++block_idx;
+      }
+    }
+    split->num_blocks = block_idx;
+    split->num_types = (size_t)max_type + 1;
+  }
+  BROTLI_FREE(m, new_index);
+  BROTLI_FREE(m, u32);
+  BROTLI_FREE(m, histogram_symbols);
+}
+
+/* Create BlockSplit (partitioning) given the limits, estimates and "effort"
+ * parameters.
+ *
+ * NB: max_histograms is often less than number of histograms allowed by format;
+ *     this is done intentionally, to save some "space" for context-aware
+ *     clustering (here entropy is estimated for context-free symbols). */
+static void FN(SplitByteVector)(MemoryManager* m,
+                                const DataType* data, const size_t length,
+                                const size_t symbols_per_histogram,
+                                const size_t max_histograms,
+                                const size_t sampling_stride_length,
+                                const double block_switch_cost,
+                                const BrotliEncoderParams* params,
+                                BlockSplit* split) {
+  const size_t data_size = FN(HistogramDataSize)();
+  HistogramType* histograms;
+  HistogramType* tmp;
+  /* Calculate number of histograms; initial estimate is one histogram per
+   * specified amount of symbols; however, this value is capped. */
+  size_t num_histograms = length / symbols_per_histogram + 1;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+
+  /* Corner case: no input. */
+  if (length == 0) {
+    split->num_types = 1;
+    return;
+  }
+
+  if (length < kMinLengthForBlockSplitting) {
+    BROTLI_ENSURE_CAPACITY(m, uint8_t,
+        split->types, split->types_alloc_size, split->num_blocks + 1);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t,
+        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
+    if (BROTLI_IS_OOM(m)) return;
+    split->num_types = 1;
+    split->types[split->num_blocks] = 0;
+    split->lengths[split->num_blocks] = (uint32_t)length;
+    split->num_blocks++;
+    return;
+  }
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
+  tmp = histograms + num_histograms;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
+  /* Find good entropy codes. */
+  FN(InitialEntropyCodes)(data, length,
+                          sampling_stride_length,
+                          num_histograms, histograms);
+  FN(RefineEntropyCodes)(data, length,
+                         sampling_stride_length,
+                         num_histograms, histograms, tmp);
+  {
+    /* Find a good path through literals with the good entropy codes. */
+    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
+    size_t num_blocks = 0;
+    const size_t bitmaplen = (num_histograms + 7) >> 3;
+    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
+    double* cost = BROTLI_ALLOC(m, double, num_histograms);
+    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
+    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
+    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
+        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
+        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
+      return;
+    }
+    for (i = 0; i < iters; ++i) {
+      num_blocks = FN(FindBlocks)(data, length,
+                                  block_switch_cost,
+                                  num_histograms, histograms,
+                                  insert_cost, cost, switch_signal,
+                                  block_ids);
+      num_histograms = FN(RemapBlockIds)(block_ids, length,
+                                         new_id, num_histograms);
+      FN(BuildBlockHistograms)(data, length, block_ids,
+                               num_histograms, histograms);
+    }
+    BROTLI_FREE(m, insert_cost);
+    BROTLI_FREE(m, cost);
+    BROTLI_FREE(m, switch_signal);
+    BROTLI_FREE(m, new_id);
+    BROTLI_FREE(m, histograms);
+    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, block_ids);
+  }
+}
+
+#undef HistogramType
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.c
new file mode 100644
index 0000000000..5fa0c69aa8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.c
@@ -0,0 +1,1336 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli bit stream functions to support the low level format. There are no
+   compression algorithms here, just the right ordering of bits to match the
+   specs. */
+
+#include "brotli_bit_stream.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "entropy_encode.h"
+#include "entropy_encode_static.h"
+#include "fast_log.h"
+#include "histogram.h"
+#include "memory.h"
+#include "write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
+/* The maximum size of Huffman dictionary for distances assuming that
+   NPOSTFIX = 0 and NDIRECT = 0. */
+#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \
+  BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS)
+/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */
+
+static BROTLI_INLINE uint32_t BlockLengthPrefixCode(uint32_t len) {
+  uint32_t code = (len >= 177) ? (len >= 753 ? 20 : 14) : (len >= 41 ? 7 : 0);
+  while (code < (BROTLI_NUM_BLOCK_LEN_SYMBOLS - 1) &&
+      len >= _kBrotliPrefixCodeRanges[code + 1].offset) ++code;
+  return code;
+}
+
+static BROTLI_INLINE void GetBlockLengthPrefixCode(uint32_t len, size_t* code,
+    uint32_t* n_extra, uint32_t* extra) {
+  *code = BlockLengthPrefixCode(len);
+  *n_extra = _kBrotliPrefixCodeRanges[*code].nbits;
+  *extra = len - _kBrotliPrefixCodeRanges[*code].offset;
+}
+
+typedef struct BlockTypeCodeCalculator {
+  size_t last_type;
+  size_t second_last_type;
+} BlockTypeCodeCalculator;
+
+static void InitBlockTypeCodeCalculator(BlockTypeCodeCalculator* self) {
+  self->last_type = 1;
+  self->second_last_type = 0;
+}
+
+static BROTLI_INLINE size_t NextBlockTypeCode(
+    BlockTypeCodeCalculator* calculator, uint8_t type) {
+  size_t type_code = (type == calculator->last_type + 1) ? 1u :
+      (type == calculator->second_last_type) ? 0u : type + 2u;
+  calculator->second_last_type = calculator->last_type;
+  calculator->last_type = type;
+  return type_code;
+}
+
+/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliEncodeMlen(size_t length, uint64_t* bits,
+                             size_t* numbits, uint64_t* nibblesbits) {
+  size_t lg = (length == 1) ? 1 : Log2FloorNonZero((uint32_t)(length - 1)) + 1;
+  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
+  BROTLI_DCHECK(length > 0);
+  BROTLI_DCHECK(length <= (1 << 24));
+  BROTLI_DCHECK(lg <= 24);
+  *nibblesbits = mnibbles - 4;
+  *numbits = mnibbles * 4;
+  *bits = length - 1;
+}
+
+static BROTLI_INLINE void StoreCommandExtra(
+    const Command* cmd, size_t* storage_ix, uint8_t* storage) {
+  uint32_t copylen_code = CommandCopyLenCode(cmd);
+  uint16_t inscode = GetInsertLengthCode(cmd->insert_len_);
+  uint16_t copycode = GetCopyLengthCode(copylen_code);
+  uint32_t insnumextra = GetInsertExtra(inscode);
+  uint64_t insextraval = cmd->insert_len_ - GetInsertBase(inscode);
+  uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
+  uint64_t bits = (copyextraval << insnumextra) | insextraval;
+  BrotliWriteBits(
+      insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
+}
+
+/* Data structure that stores almost everything that is needed to encode each
+   block switch command. */
+typedef struct BlockSplitCode {
+  BlockTypeCodeCalculator type_code_calculator;
+  uint8_t type_depths[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint16_t type_bits[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint8_t length_depths[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  uint16_t length_bits[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+} BlockSplitCode;
+
+/* Stores a number between 0 and 255. */
+static void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  } else {
+    size_t nbits = Log2FloorNonZero(n);
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(3, nbits, storage_ix, storage);
+    BrotliWriteBits(nbits, n - ((size_t)1 << nbits), storage_ix, storage);
+  }
+}
+
+/* Stores the compressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void StoreCompressedMetaBlockHeader(BROTLI_BOOL is_final_block,
+                                           size_t length,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit. */
+  BrotliWriteBits(1, (uint64_t)is_final_block, storage_ix, storage);
+  /* Write ISEMPTY bit. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+
+  if (!is_final_block) {
+    /* Write ISUNCOMPRESSED bit. */
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+}
+
+/* Stores the uncompressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliStoreUncompressedMetaBlockHeader(size_t length,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit.
+     Uncompressed block cannot be the last one, so set to 0. */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+  /* Write ISUNCOMPRESSED bit. */
+  BrotliWriteBits(1, 1, storage_ix, storage);
+}
+
+static void BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const int num_codes, const uint8_t* code_length_bitdepth,
+    size_t* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[BROTLI_CODE_LENGTH_CODES] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  /* The bit lengths of the Huffman code over the code length alphabet
+     are compressed with the following static Huffman code:
+       Symbol   Code
+       ------   ----
+       0          00
+       1        1110
+       2         110
+       3          01
+       4          10
+       5        1111 */
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
+     0, 7, 3, 2, 1, 15
+  };
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
+    2, 4, 3, 2, 2, 4
+  };
+
+  size_t skip_some = 0;  /* skips none. */
+
+  /* Throw away trailing zeros: */
+  size_t codes_to_store = BROTLI_CODE_LENGTH_CODES;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  /* skips two. */
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  /* skips three. */
+    }
+  }
+  BrotliWriteBits(2, skip_some, storage_ix, storage);
+  {
+    size_t i;
+    for (i = skip_some; i < codes_to_store; ++i) {
+      size_t l = code_length_bitdepth[kStorageOrder[i]];
+      BrotliWriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+          kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
+    }
+  }
+}
+
+static void BrotliStoreHuffmanTreeToBitMask(
+    const size_t huffman_tree_size, const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits, const uint8_t* code_length_bitdepth,
+    const uint16_t* code_length_bitdepth_symbols,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage) {
+  size_t i;
+  for (i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    BrotliWriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
+                    storage_ix, storage);
+    /* Extra bits */
+    switch (ix) {
+      case BROTLI_REPEAT_PREVIOUS_CODE_LENGTH:
+        BrotliWriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+      case BROTLI_REPEAT_ZERO_CODE_LENGTH:
+        BrotliWriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+    }
+  }
+}
+
+static void StoreSimpleHuffmanTree(const uint8_t* depths,
+                                   size_t symbols[4],
+                                   size_t num_symbols,
+                                   size_t max_bits,
+                                   size_t* storage_ix, uint8_t* storage) {
+  /* value of 1 indicates a simple Huffman code */
+  BrotliWriteBits(2, 1, storage_ix, storage);
+  BrotliWriteBits(2, num_symbols - 1, storage_ix, storage);  /* NSYM - 1 */
+
+  {
+    /* Sort */
+    size_t i;
+    for (i = 0; i < num_symbols; i++) {
+      size_t j;
+      for (j = i + 1; j < num_symbols; j++) {
+        if (depths[symbols[j]] < depths[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+  }
+
+  if (num_symbols == 2) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+  } else if (num_symbols == 3) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+  } else {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+    /* tree-select */
+    BrotliWriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+  }
+}
+
+/* num = alphabet size
+   depths = symbol depths */
+void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+                            HuffmanTree* tree,
+                            size_t* storage_ix, uint8_t* storage) {
+  /* Write the Huffman tree into the brotli-representation.
+     The command alphabet is the largest, so this allocation will fit all
+     alphabets. */
+  /* TODO(eustas): fix me */
+  uint8_t huffman_tree[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t huffman_tree_extra_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t huffman_tree_size = 0;
+  uint8_t code_length_bitdepth[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[BROTLI_CODE_LENGTH_CODES];
+  uint32_t huffman_tree_histogram[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  size_t i;
+  int num_codes = 0;
+  size_t code = 0;
+
+  BROTLI_DCHECK(num <= BROTLI_NUM_COMMAND_SYMBOLS);
+
+  BrotliWriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                         huffman_tree_extra_bits);
+
+  /* Calculate the statistics of the Huffman tree in brotli-representation. */
+  for (i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  for (i = 0; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  /* Calculate another Huffman tree to use for compressing both the
+     earlier Huffman tree with. */
+  BrotliCreateHuffmanTree(huffman_tree_histogram, BROTLI_CODE_LENGTH_CODES,
+                          5, tree, code_length_bitdepth);
+  BrotliConvertBitDepthsToSymbols(code_length_bitdepth,
+                                  BROTLI_CODE_LENGTH_CODES,
+                                  code_length_bitdepth_symbols);
+
+  /* Now, we have all the data, let's start storing it */
+  BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                               storage_ix, storage);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  /* Store the real Huffman tree now. */
+  BrotliStoreHuffmanTreeToBitMask(huffman_tree_size,
+                                  huffman_tree,
+                                  huffman_tree_extra_bits,
+                                  code_length_bitdepth,
+                                  code_length_bitdepth_symbols,
+                                  storage_ix, storage);
+}
+
+/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
+   bits[0:length] and stores the encoded tree to the bit stream. */
+static void BuildAndStoreHuffmanTree(const uint32_t* histogram,
+                                     const size_t histogram_length,
+                                     const size_t alphabet_size,
+                                     HuffmanTree* tree,
+                                     uint8_t* depth,
+                                     uint16_t* bits,
+                                     size_t* storage_ix,
+                                     uint8_t* storage) {
+  size_t count = 0;
+  size_t s4[4] = { 0 };
+  size_t i;
+  size_t max_bits = 0;
+  for (i = 0; i < histogram_length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  {
+    size_t max_bits_counter = alphabet_size - 1;
+    while (max_bits_counter) {
+      max_bits_counter >>= 1;
+      ++max_bits;
+    }
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, s4[0], storage_ix, storage);
+    depth[s4[0]] = 0;
+    bits[s4[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, histogram_length * sizeof(depth[0]));
+  BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth);
+  BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits);
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
+  } else {
+    BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+}
+
+void BrotliBuildAndStoreHuffmanTreeFast(HuffmanTree* tree,
+                                        const uint32_t* histogram,
+                                        const size_t histogram_total,
+                                        const size_t max_bits,
+                                        uint8_t* depth, uint16_t* bits,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  size_t count = 0;
+  size_t symbols[4] = { 0 };
+  size_t length = 0;
+  size_t total = histogram_total;
+  while (total != 0) {
+    if (histogram[length]) {
+      if (count < 4) {
+        symbols[count] = length;
+      }
+      ++count;
+      total -= histogram[length];
+    }
+    ++length;
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    depth[symbols[0]] = 0;
+    bits[symbols[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, length * sizeof(depth[0]));
+  {
+    uint32_t count_limit;
+    for (count_limit = 1; ; count_limit *= 2) {
+      HuffmanTree* node = tree;
+      size_t l;
+      for (l = length; l != 0;) {
+        --l;
+        if (histogram[l]) {
+          if (BROTLI_PREDICT_TRUE(histogram[l] >= count_limit)) {
+            InitHuffmanTree(node, histogram[l], -1, (int16_t)l);
+          } else {
+            InitHuffmanTree(node, count_limit, -1, (int16_t)l);
+          }
+          ++node;
+        }
+      }
+      {
+        const int n = (int)(node - tree);
+        HuffmanTree sentinel;
+        int i = 0;      /* Points to the next leaf node. */
+        int j = n + 1;  /* Points to the next non-leaf node. */
+        int k;
+
+        SortHuffmanTreeItems(tree, (size_t)n, SortHuffmanTree);
+        /* The nodes are:
+           [0, n): the sorted leaf nodes that we start with.
+           [n]: we add a sentinel here.
+           [n + 1, 2n): new parent nodes are added here, starting from
+                        (n+1). These are naturally in ascending order.
+           [2n]: we add a sentinel at the end as well.
+           There will be (2n+1) elements at the end. */
+        InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+        *node++ = sentinel;
+        *node++ = sentinel;
+
+        for (k = n - 1; k > 0; --k) {
+          int left, right;
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            left = i;
+            ++i;
+          } else {
+            left = j;
+            ++j;
+          }
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            right = i;
+            ++i;
+          } else {
+            right = j;
+            ++j;
+          }
+          /* The sentinel node becomes the parent node. */
+          node[-1].total_count_ =
+              tree[left].total_count_ + tree[right].total_count_;
+          node[-1].index_left_ = (int16_t)left;
+          node[-1].index_right_or_value_ = (int16_t)right;
+          /* Add back the last sentinel node. */
+          *node++ = sentinel;
+        }
+        if (BrotliSetDepth(2 * n - 1, tree, depth, 14)) {
+          /* We need to pack the Huffman tree in 14 bits. If this was not
+             successful, add fake entities to the lowest values and retry. */
+          break;
+        }
+      }
+    }
+  }
+  BrotliConvertBitDepthsToSymbols(depth, length, bits);
+  if (count <= 4) {
+    size_t i;
+    /* value of 1 indicates a simple Huffman code */
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(2, count - 1, storage_ix, storage);  /* NSYM - 1 */
+
+    /* Sort */
+    for (i = 0; i < count; i++) {
+      size_t j;
+      for (j = i + 1; j < count; j++) {
+        if (depth[symbols[j]] < depth[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+
+    if (count == 2) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    } else if (count == 3) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    } else {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+      /* tree-select */
+      BrotliWriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+    }
+  } else {
+    uint8_t previous_value = 8;
+    size_t i;
+    /* Complex Huffman Tree */
+    StoreStaticCodeLengthCode(storage_ix, storage);
+
+    /* Actual RLE coding. */
+    for (i = 0; i < length;) {
+      const uint8_t value = depth[i];
+      size_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < length && depth[k] == value; ++k) {
+        ++reps;
+      }
+      i += reps;
+      if (value == 0) {
+        BrotliWriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
+                        storage_ix, storage);
+      } else {
+        if (previous_value != value) {
+          BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                          storage_ix, storage);
+          --reps;
+        }
+        if (reps < 3) {
+          while (reps != 0) {
+            reps--;
+            BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                            storage_ix, storage);
+          }
+        } else {
+          reps -= 3;
+          BrotliWriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
+                          storage_ix, storage);
+        }
+        previous_value = value;
+      }
+    }
+  }
+}
+
+static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
+  size_t i = 0;
+  for (; i < v_size; ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+static void MoveToFront(uint8_t* v, size_t index) {
+  uint8_t value = v[index];
+  size_t i;
+  for (i = index; i != 0; --i) {
+    v[i] = v[i - 1];
+  }
+  v[0] = value;
+}
+
+static void MoveToFrontTransform(const uint32_t* BROTLI_RESTRICT v_in,
+                                 const size_t v_size,
+                                 uint32_t* v_out) {
+  size_t i;
+  uint8_t mtf[256];
+  uint32_t max_value;
+  if (v_size == 0) {
+    return;
+  }
+  max_value = v_in[0];
+  for (i = 1; i < v_size; ++i) {
+    if (v_in[i] > max_value) max_value = v_in[i];
+  }
+  BROTLI_DCHECK(max_value < 256u);
+  for (i = 0; i <= max_value; ++i) {
+    mtf[i] = (uint8_t)i;
+  }
+  {
+    size_t mtf_size = max_value + 1;
+    for (i = 0; i < v_size; ++i) {
+      size_t index = IndexOf(mtf, mtf_size, (uint8_t)v_in[i]);
+      BROTLI_DCHECK(index < mtf_size);
+      v_out[i] = (uint32_t)index;
+      MoveToFront(mtf, index);
+    }
+  }
+}
+
+/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
+   the run length plus extra bits (lower 9 bits is the prefix code and the rest
+   are the extra bits). Non-zero values in v[] are shifted by
+   *max_length_prefix. Will not create prefix codes bigger than the initial
+   value of *max_run_length_prefix. The prefix code of run length L is simply
+   Log2Floor(L) and the number of extra bits is the same as the prefix code. */
+static void RunLengthCodeZeros(const size_t in_size,
+    uint32_t* BROTLI_RESTRICT v, size_t* BROTLI_RESTRICT out_size,
+    uint32_t* BROTLI_RESTRICT max_run_length_prefix) {
+  uint32_t max_reps = 0;
+  size_t i;
+  uint32_t max_prefix;
+  for (i = 0; i < in_size;) {
+    uint32_t reps = 0;
+    for (; i < in_size && v[i] != 0; ++i) ;
+    for (; i < in_size && v[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = BROTLI_MAX(uint32_t, reps, max_reps);
+  }
+  max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
+  max_prefix = BROTLI_MIN(uint32_t, max_prefix, *max_run_length_prefix);
+  *max_run_length_prefix = max_prefix;
+  *out_size = 0;
+  for (i = 0; i < in_size;) {
+    BROTLI_DCHECK(*out_size <= i);
+    if (v[i] != 0) {
+      v[*out_size] = v[i] + *max_run_length_prefix;
+      ++i;
+      ++(*out_size);
+    } else {
+      uint32_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < in_size && v[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps != 0) {
+        if (reps < (2u << max_prefix)) {
+          uint32_t run_length_prefix = Log2FloorNonZero(reps);
+          const uint32_t extra_bits = reps - (1u << run_length_prefix);
+          v[*out_size] = run_length_prefix + (extra_bits << 9);
+          ++(*out_size);
+          break;
+        } else {
+          const uint32_t extra_bits = (1u << max_prefix) - 1u;
+          v[*out_size] = max_prefix + (extra_bits << 9);
+          reps -= (2u << max_prefix) - 1u;
+          ++(*out_size);
+        }
+      }
+    }
+  }
+}
+
+#define SYMBOL_BITS 9
+
+typedef struct EncodeContextMapArena {
+  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+} EncodeContextMapArena;
+
+static void EncodeContextMap(MemoryManager* m,
+                             EncodeContextMapArena* arena,
+                             const uint32_t* context_map,
+                             size_t context_map_size,
+                             size_t num_clusters,
+                             HuffmanTree* tree,
+                             size_t* storage_ix, uint8_t* storage) {
+  size_t i;
+  uint32_t* rle_symbols;
+  uint32_t max_run_length_prefix = 6;
+  size_t num_rle_symbols = 0;
+  uint32_t* BROTLI_RESTRICT const histogram = arena->histogram;
+  static const uint32_t kSymbolMask = (1u << SYMBOL_BITS) - 1u;
+  uint8_t* BROTLI_RESTRICT const depths = arena->depths;
+  uint16_t* BROTLI_RESTRICT const bits = arena->bits;
+
+  StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+  if (num_clusters == 1) {
+    return;
+  }
+
+  rle_symbols = BROTLI_ALLOC(m, uint32_t, context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(rle_symbols)) return;
+  MoveToFrontTransform(context_map, context_map_size, rle_symbols);
+  RunLengthCodeZeros(context_map_size, rle_symbols,
+                     &num_rle_symbols, &max_run_length_prefix);
+  memset(histogram, 0, sizeof(arena->histogram));
+  for (i = 0; i < num_rle_symbols; ++i) {
+    ++histogram[rle_symbols[i] & kSymbolMask];
+  }
+  {
+    BROTLI_BOOL use_rle = TO_BROTLI_BOOL(max_run_length_prefix > 0);
+    BrotliWriteBits(1, (uint64_t)use_rle, storage_ix, storage);
+    if (use_rle) {
+      BrotliWriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+    }
+  }
+  BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
+                           num_clusters + max_run_length_prefix,
+                           tree, depths, bits, storage_ix, storage);
+  for (i = 0; i < num_rle_symbols; ++i) {
+    const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
+    const uint32_t extra_bits_val = rle_symbols[i] >> SYMBOL_BITS;
+    BrotliWriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
+    if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
+      BrotliWriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(1, 1, storage_ix, storage);  /* use move-to-front */
+  BROTLI_FREE(m, rle_symbols);
+}
+
+/* Stores the block switch command with index block_ix to the bit stream. */
+static BROTLI_INLINE void StoreBlockSwitch(BlockSplitCode* code,
+                                           const uint32_t block_len,
+                                           const uint8_t block_type,
+                                           BROTLI_BOOL is_first_block,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  size_t typecode = NextBlockTypeCode(&code->type_code_calculator, block_type);
+  size_t lencode;
+  uint32_t len_nextra;
+  uint32_t len_extra;
+  if (!is_first_block) {
+    BrotliWriteBits(code->type_depths[typecode], code->type_bits[typecode],
+                    storage_ix, storage);
+  }
+  GetBlockLengthPrefixCode(block_len, &lencode, &len_nextra, &len_extra);
+
+  BrotliWriteBits(code->length_depths[lencode], code->length_bits[lencode],
+                  storage_ix, storage);
+  BrotliWriteBits(len_nextra, len_extra, storage_ix, storage);
+}
+
+/* Builds a BlockSplitCode data structure from the block split given by the
+   vector of block types and block lengths and stores it to the bit stream. */
+static void BuildAndStoreBlockSplitCode(const uint8_t* types,
+                                        const uint32_t* lengths,
+                                        const size_t num_blocks,
+                                        const size_t num_types,
+                                        HuffmanTree* tree,
+                                        BlockSplitCode* code,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  uint32_t type_histo[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint32_t length_histo[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  size_t i;
+  BlockTypeCodeCalculator type_code_calculator;
+  memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
+  memset(length_histo, 0, sizeof(length_histo));
+  InitBlockTypeCodeCalculator(&type_code_calculator);
+  for (i = 0; i < num_blocks; ++i) {
+    size_t type_code = NextBlockTypeCode(&type_code_calculator, types[i]);
+    if (i != 0) ++type_histo[type_code];
+    ++length_histo[BlockLengthPrefixCode(lengths[i])];
+  }
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {  /* TODO(eustas): else? could StoreBlockSwitch occur? */
+    BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree,
+                             &code->type_depths[0], &code->type_bits[0],
+                             storage_ix, storage);
+    BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             tree, &code->length_depths[0],
+                             &code->length_bits[0], storage_ix, storage);
+    StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
+  }
+}
+
+/* Stores a context map where the histogram type is always the block type. */
+static void StoreTrivialContextMap(EncodeContextMapArena* arena,
+                                   size_t num_types,
+                                   size_t context_bits,
+                                   HuffmanTree* tree,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {
+    size_t repeat_code = context_bits - 1u;
+    size_t repeat_bits = (1u << repeat_code) - 1u;
+    size_t alphabet_size = num_types + repeat_code;
+    uint32_t* BROTLI_RESTRICT const histogram = arena->histogram;
+    uint8_t* BROTLI_RESTRICT const depths = arena->depths;
+    uint16_t* BROTLI_RESTRICT const bits = arena->bits;
+    size_t i;
+    memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
+    /* Write RLEMAX. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(4, repeat_code - 1, storage_ix, storage);
+    histogram[repeat_code] = (uint32_t)num_types;
+    histogram[0] = 1;
+    for (i = context_bits; i < alphabet_size; ++i) {
+      histogram[i] = 1;
+    }
+    BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size,
+                             tree, depths, bits, storage_ix, storage);
+    for (i = 0; i < num_types; ++i) {
+      size_t code = (i == 0 ? 0 : i + context_bits - 1);
+      BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
+      BrotliWriteBits(
+          depths[repeat_code], bits[repeat_code], storage_ix, storage);
+      BrotliWriteBits(repeat_code, repeat_bits, storage_ix, storage);
+    }
+    /* Write IMTF (inverse-move-to-front) bit. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+  }
+}
+
+/* Manages the encoding of one block category (literal, command or distance). */
+typedef struct BlockEncoder {
+  size_t histogram_length_;
+  size_t num_block_types_;
+  const uint8_t* block_types_;  /* Not owned. */
+  const uint32_t* block_lengths_;  /* Not owned. */
+  size_t num_blocks_;
+  BlockSplitCode block_split_code_;
+  size_t block_ix_;
+  size_t block_len_;
+  size_t entropy_ix_;
+  uint8_t* depths_;
+  uint16_t* bits_;
+} BlockEncoder;
+
+static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length,
+    size_t num_block_types, const uint8_t* block_types,
+    const uint32_t* block_lengths, const size_t num_blocks) {
+  self->histogram_length_ = histogram_length;
+  self->num_block_types_ = num_block_types;
+  self->block_types_ = block_types;
+  self->block_lengths_ = block_lengths;
+  self->num_blocks_ = num_blocks;
+  InitBlockTypeCodeCalculator(&self->block_split_code_.type_code_calculator);
+  self->block_ix_ = 0;
+  self->block_len_ = num_blocks == 0 ? 0 : block_lengths[0];
+  self->entropy_ix_ = 0;
+  self->depths_ = 0;
+  self->bits_ = 0;
+}
+
+static void CleanupBlockEncoder(MemoryManager* m, BlockEncoder* self) {
+  BROTLI_FREE(m, self->depths_);
+  BROTLI_FREE(m, self->bits_);
+}
+
+/* Creates entropy codes of block lengths and block types and stores them
+   to the bit stream. */
+static void BuildAndStoreBlockSwitchEntropyCodes(BlockEncoder* self,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  BuildAndStoreBlockSplitCode(self->block_types_, self->block_lengths_,
+      self->num_blocks_, self->num_block_types_, tree, &self->block_split_code_,
+      storage_ix, storage);
+}
+
+/* Stores the next symbol with the entropy code of the current block type.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
+    uint8_t* storage) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = block_type * self->histogram_length_;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t ix = self->entropy_ix_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+/* Stores the next symbol with the entropy code of the current block type and
+   context value.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
+    size_t context, const uint32_t* context_map, size_t* storage_ix,
+    uint8_t* storage, const size_t context_bits) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = (size_t)block_type << context_bits;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t histo_ix = context_map[self->entropy_ix_ + context];
+    size_t ix = histo_ix * self->histogram_length_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+#define FN(X) X ## Literal
+/* NOLINTNEXTLINE(build/include) */
+#include "block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Command
+/* NOLINTNEXTLINE(build/include) */
+#include "block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "block_encoder_inc.h"
+#undef FN
+
+static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  storage[*storage_ix >> 3] = 0;
+}
+
+typedef struct StoreMetablockArena {
+  BlockEncoder literal_enc;
+  BlockEncoder command_enc;
+  BlockEncoder distance_enc;
+  EncodeContextMapArena context_map_arena;
+} StoreMetablockArena;
+
+void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage) {
+
+  size_t pos = start_pos;
+  size_t i;
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  uint32_t num_effective_distance_symbols = params->dist.alphabet_size_limit;
+  HuffmanTree* tree;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  StoreMetablockArena* arena = NULL;
+  BlockEncoder* literal_enc = NULL;
+  BlockEncoder* command_enc = NULL;
+  BlockEncoder* distance_enc = NULL;
+  const BrotliDistanceParams* dist = &params->dist;
+  BROTLI_DCHECK(
+      num_effective_distance_symbols <= BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS);
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  arena = BROTLI_ALLOC(m, StoreMetablockArena, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree) || BROTLI_IS_NULL(arena)) return;
+  literal_enc = &arena->literal_enc;
+  command_enc = &arena->command_enc;
+  distance_enc = &arena->distance_enc;
+  InitBlockEncoder(literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+      mb->literal_split.num_types, mb->literal_split.types,
+      mb->literal_split.lengths, mb->literal_split.num_blocks);
+  InitBlockEncoder(command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
+      mb->command_split.num_types, mb->command_split.types,
+      mb->command_split.lengths, mb->command_split.num_blocks);
+  InitBlockEncoder(distance_enc, num_effective_distance_symbols,
+      mb->distance_split.num_types, mb->distance_split.types,
+      mb->distance_split.lengths, mb->distance_split.num_blocks);
+
+  BuildAndStoreBlockSwitchEntropyCodes(literal_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(command_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(distance_enc, tree, storage_ix, storage);
+
+  BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
+  BrotliWriteBits(
+      4, dist->num_direct_distance_codes >> dist->distance_postfix_bits,
+      storage_ix, storage);
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
+  }
+
+  if (mb->literal_context_map_size == 0) {
+    StoreTrivialContextMap(
+        &arena->context_map_arena, mb->literal_histograms_size,
+        BROTLI_LITERAL_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m, &arena->context_map_arena,
+        mb->literal_context_map, mb->literal_context_map_size,
+        mb->literal_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  if (mb->distance_context_map_size == 0) {
+    StoreTrivialContextMap(
+        &arena->context_map_arena, mb->distance_histograms_size,
+        BROTLI_DISTANCE_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m, &arena->context_map_arena,
+        mb->distance_context_map, mb->distance_context_map_size,
+        mb->distance_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  BuildAndStoreEntropyCodesLiteral(m, literal_enc, mb->literal_histograms,
+      mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesCommand(m, command_enc, mb->command_histograms,
+      mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesDistance(m, distance_enc, mb->distance_histograms,
+      mb->distance_histograms_size, num_distance_symbols, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, tree);
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t cmd_code = cmd.cmd_prefix_;
+    StoreSymbol(command_enc, cmd_code, storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    if (mb->literal_context_map_size == 0) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        StoreSymbol(literal_enc, input[pos & mask], storage_ix, storage);
+        ++pos;
+      }
+    } else {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        uint8_t literal = input[pos & mask];
+        StoreSymbolWithContext(literal_enc, literal, context,
+            mb->literal_context_map, storage_ix, storage,
+            BROTLI_LITERAL_CONTEXT_BITS);
+        prev_byte2 = prev_byte;
+        prev_byte = literal;
+        ++pos;
+      }
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = input[(pos - 2) & mask];
+      prev_byte = input[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+        uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+        uint64_t distextra = cmd.dist_extra_;
+        if (mb->distance_context_map_size == 0) {
+          StoreSymbol(distance_enc, dist_code, storage_ix, storage);
+        } else {
+          size_t context = CommandDistanceContext(&cmd);
+          StoreSymbolWithContext(distance_enc, dist_code, context,
+              mb->distance_context_map, storage_ix, storage,
+              BROTLI_DISTANCE_CONTEXT_BITS);
+        }
+        BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+      }
+    }
+  }
+  CleanupBlockEncoder(m, distance_enc);
+  CleanupBlockEncoder(m, command_enc);
+  CleanupBlockEncoder(m, literal_enc);
+  BROTLI_FREE(m, arena);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+static void BuildHistograms(const uint8_t* input,
+                            size_t start_pos,
+                            size_t mask,
+                            const Command* commands,
+                            size_t n_commands,
+                            HistogramLiteral* lit_histo,
+                            HistogramCommand* cmd_histo,
+                            HistogramDistance* dist_histo) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    HistogramAddCommand(cmd_histo, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      HistogramAddLiteral(lit_histo, input[pos & mask]);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF);
+    }
+  }
+}
+
+static void StoreDataWithHuffmanCodes(const uint8_t* input,
+                                      size_t start_pos,
+                                      size_t mask,
+                                      const Command* commands,
+                                      size_t n_commands,
+                                      const uint8_t* lit_depth,
+                                      const uint16_t* lit_bits,
+                                      const uint8_t* cmd_depth,
+                                      const uint16_t* cmd_bits,
+                                      const uint8_t* dist_depth,
+                                      const uint16_t* dist_bits,
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    const size_t cmd_code = cmd.cmd_prefix_;
+    size_t j;
+    BrotliWriteBits(
+        cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      const uint8_t literal = input[pos & mask];
+      BrotliWriteBits(
+          lit_depth[literal], lit_bits[literal], storage_ix, storage);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      const size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+      const uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+      const uint32_t distextra = cmd.dist_extra_;
+      BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
+                      storage_ix, storage);
+      BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+    }
+  }
+}
+
+/* TODO(eustas): pull alloc/dealloc to caller? */
+typedef struct MetablockArena {
+  HistogramLiteral lit_histo;
+  HistogramCommand cmd_histo;
+  HistogramDistance dist_histo;
+  /* TODO(eustas): merge bits and depth? */
+  uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  HuffmanTree tree[MAX_HUFFMAN_TREE_SIZE];
+} MetablockArena;
+
+void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  MetablockArena* arena = BROTLI_ALLOC(m, MetablockArena, 1);
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  HistogramClearLiteral(&arena->lit_histo);
+  HistogramClearCommand(&arena->cmd_histo);
+  HistogramClearDistance(&arena->dist_histo);
+
+  BuildHistograms(input, start_pos, mask, commands, n_commands,
+                  &arena->lit_histo, &arena->cmd_histo, &arena->dist_histo);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  BuildAndStoreHuffmanTree(arena->lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+                           BROTLI_NUM_LITERAL_SYMBOLS, arena->tree,
+                           arena->lit_depth, arena->lit_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(arena->cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+                           BROTLI_NUM_COMMAND_SYMBOLS, arena->tree,
+                           arena->cmd_depth, arena->cmd_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(arena->dist_histo.data_,
+                           MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+                           num_distance_symbols, arena->tree,
+                           arena->dist_depth, arena->dist_bits,
+                           storage_ix, storage);
+  StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                            n_commands, arena->lit_depth, arena->lit_bits,
+                            arena->cmd_depth, arena->cmd_bits,
+                            arena->dist_depth, arena->dist_bits,
+                            storage_ix, storage);
+  BROTLI_FREE(m, arena);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  MetablockArena* arena = BROTLI_ALLOC(m, MetablockArena, 1);
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  uint32_t distance_alphabet_bits =
+      Log2FloorNonZero(num_distance_symbols - 1) + 1;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  if (n_commands <= 128) {
+    uint32_t histogram[BROTLI_NUM_LITERAL_SYMBOLS] = { 0 };
+    size_t pos = start_pos;
+    size_t num_literals = 0;
+    size_t i;
+    for (i = 0; i < n_commands; ++i) {
+      const Command cmd = commands[i];
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        ++histogram[input[pos & mask]];
+        ++pos;
+      }
+      num_literals += cmd.insert_len_;
+      pos += CommandCopyLen(&cmd);
+    }
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, histogram, num_literals,
+                                       /* max_bits = */ 8,
+                                       arena->lit_depth, arena->lit_bits,
+                                       storage_ix, storage);
+    StoreStaticCommandHuffmanTree(storage_ix, storage);
+    StoreStaticDistanceHuffmanTree(storage_ix, storage);
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, arena->lit_depth, arena->lit_bits,
+                              kStaticCommandCodeDepth,
+                              kStaticCommandCodeBits,
+                              kStaticDistanceCodeDepth,
+                              kStaticDistanceCodeBits,
+                              storage_ix, storage);
+  } else {
+    HistogramClearLiteral(&arena->lit_histo);
+    HistogramClearCommand(&arena->cmd_histo);
+    HistogramClearDistance(&arena->dist_histo);
+    BuildHistograms(input, start_pos, mask, commands, n_commands,
+                    &arena->lit_histo, &arena->cmd_histo, &arena->dist_histo);
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->lit_histo.data_,
+                                       arena->lit_histo.total_count_,
+                                       /* max_bits = */ 8,
+                                       arena->lit_depth, arena->lit_bits,
+                                       storage_ix, storage);
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->cmd_histo.data_,
+                                       arena->cmd_histo.total_count_,
+                                       /* max_bits = */ 10,
+                                       arena->cmd_depth, arena->cmd_bits,
+                                       storage_ix, storage);
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->dist_histo.data_,
+                                       arena->dist_histo.total_count_,
+                                       /* max_bits = */
+                                       distance_alphabet_bits,
+                                       arena->dist_depth, arena->dist_bits,
+                                       storage_ix, storage);
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, arena->lit_depth, arena->lit_bits,
+                              arena->cmd_depth, arena->cmd_bits,
+                              arena->dist_depth, arena->dist_bits,
+                              storage_ix, storage);
+  }
+
+  BROTLI_FREE(m, arena);
+
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes). */
+void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block,
+                                      const uint8_t* BROTLI_RESTRICT input,
+                                      size_t position, size_t mask,
+                                      size_t len,
+                                      size_t* BROTLI_RESTRICT storage_ix,
+                                      uint8_t* BROTLI_RESTRICT storage) {
+  size_t masked_pos = position & mask;
+  BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
+  JumpToByteBoundary(storage_ix, storage);
+
+  if (masked_pos + len > mask + 1) {
+    size_t len1 = mask + 1 - masked_pos;
+    memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
+    *storage_ix += len1 << 3;
+    len -= len1;
+    masked_pos = 0;
+  }
+  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
+  *storage_ix += len << 3;
+
+  /* We need to clear the next 4 bytes to continue to be
+     compatible with BrotliWriteBits. */
+  BrotliWriteBitsPrepareStorage(*storage_ix, storage);
+
+  /* Since the uncompressed block itself may not be the final block, add an
+     empty one after this. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+#if defined(BROTLI_TEST)
+void GetBlockLengthPrefixCodeForTest(uint32_t len, size_t* code,
+                                     uint32_t* n_extra, uint32_t* extra) {
+  GetBlockLengthPrefixCode(len, code, n_extra, extra);
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.h
new file mode 100644
index 0000000000..a289509af3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/brotli_bit_stream.h
@@ -0,0 +1,89 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to convert brotli-related data structures into the
+   brotli bit stream. The functions here operate under
+   assumption that there is enough space in the storage, i.e., there are
+   no out-of-range checks anywhere.
+
+   These functions do bit addressing into a byte array. The byte array
+   is called "storage" and the index to the bit is called storage_ix
+   in function arguments. */
+
+#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+
+#include <brotli/types.h>
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "command.h"
+#include "entropy_encode.h"
+#include "memory.h"
+#include "metablock.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* All Store functions here will use a storage_ix, which is always the bit
+   position for the current storage. */
+
+BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
+
+BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
+    HuffmanTree* tree, const uint32_t* histogram, const size_t histogram_total,
+    const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
+    uint8_t* storage);
+
+/* REQUIRES: length > 0 */
+/* REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Stores the meta-block without doing any block splitting, just collects
+   one histogram per block category and uses that for entropy coding.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Same as above, but uses static prefix codes for histograms with a only a few
+   symbols, and uses static code length prefix codes for all other histograms.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes).
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
+    BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
+    size_t position, size_t mask, size_t len,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
+
+#if defined(BROTLI_TEST)
+void GetBlockLengthPrefixCodeForTest(uint32_t, size_t*, uint32_t*, uint32_t*);
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.c
new file mode 100644
index 0000000000..b0faf8114c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.c
@@ -0,0 +1,57 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#include "cluster.h"
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "bit_cost.h"  /* BrotliPopulationCost */
+#include "fast_log.h"
+#include "histogram.h"
+#include "memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
+    const HistogramPair* p1, const HistogramPair* p2) {
+  if (p1->cost_diff != p2->cost_diff) {
+    return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
+  }
+  return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return (double)size_a * FastLog2(size_a) +
+    (double)size_b * FastLog2(size_b) -
+    (double)size_c * FastLog2(size_c);
+}
+
+#define CODE(X) X
+
+#define FN(X) X ## Literal
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.h
new file mode 100644
index 0000000000..013629c6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster.h
@@ -0,0 +1,49 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "histogram.h"
+#include "memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct HistogramPair {
+  uint32_t idx1;
+  uint32_t idx2;
+  double cost_combo;
+  double cost_diff;
+} HistogramPair;
+
+#define CODE(X) /* Declaration */;
+
+#define FN(X) X ## Literal
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_CLUSTER_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster_inc.h
new file mode 100644
index 0000000000..d6215ef06e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/cluster_inc.h
@@ -0,0 +1,325 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, CODE */
+
+#define HistogramType FN(Histogram)
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
+    const HistogramType* out, HistogramType* tmp, const uint32_t* cluster_size,
+    uint32_t idx1, uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    size_t* num_pairs) CODE({
+  BROTLI_BOOL is_good_pair = BROTLI_FALSE;
+  HistogramPair p;
+  p.idx1 = p.idx2 = 0;
+  p.cost_diff = p.cost_combo = 0;
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
+    double cost_combo;
+    *tmp = out[idx1];
+    FN(HistogramAddHistogram)(tmp, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(tmp);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      is_good_pair = BROTLI_TRUE;
+    }
+  }
+  if (is_good_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
+      /* Replace the top of the queue if needed. */
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+})
+
+BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  HistogramType* tmp,
+                                                  uint32_t* cluster_size,
+                                                  uint32_t* symbols,
+                                                  uint32_t* clusters,
+                                                  HistogramPair* pairs,
+                                                  size_t num_clusters,
+                                                  size_t symbols_size,
+                                                  size_t max_clusters,
+                                                  size_t max_num_pairs) CODE({
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+  size_t num_pairs = 0;
+
+  {
+    /* We maintain a vector of histogram pairs, with the property that the pair
+       with the maximum bit cost reduction is the first. */
+    size_t idx1;
+    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
+      size_t idx2;
+      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+        FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, clusters[idx1],
+            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
+      }
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    uint32_t best_idx1;
+    uint32_t best_idx2;
+    size_t i;
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    /* Take the best pair from the top of heap. */
+    best_idx1 = pairs[0].idx1;
+    best_idx2 = pairs[0].idx2;
+    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    {
+      /* Remove pairs intersecting the just combined best pair. */
+      size_t copy_to_idx = 0;
+      for (i = 0; i < num_pairs; ++i) {
+        HistogramPair* p = &pairs[i];
+        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
+            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
+          /* Remove invalid pair from the queue. */
+          continue;
+        }
+        if (HistogramPairIsLess(&pairs[0], p)) {
+          /* Replace the top of the queue if needed. */
+          HistogramPair front = pairs[0];
+          pairs[0] = *p;
+          pairs[copy_to_idx] = front;
+        } else {
+          pairs[copy_to_idx] = *p;
+        }
+        ++copy_to_idx;
+      }
+      num_pairs = copy_to_idx;
+    }
+
+    /* Push new pairs formed with the combined histogram to the heap. */
+    for (i = 0; i < num_clusters; ++i) {
+      FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, best_idx1,
+          clusters[i], max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+})
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
+    const HistogramType* histogram, const HistogramType* candidate,
+    HistogramType* tmp) CODE({
+  if (histogram->total_count_ == 0) {
+    return 0.0;
+  } else {
+    *tmp = *histogram;
+    FN(HistogramAddHistogram)(tmp, candidate);
+    return FN(BrotliPopulationCost)(tmp) - candidate->bit_cost_;
+  }
+})
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
+    size_t in_size, const uint32_t* clusters, size_t num_clusters,
+    HistogramType* out, HistogramType* tmp, uint32_t* symbols) CODE({
+  size_t i;
+  for (i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits =
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out], tmp);
+    size_t j;
+    for (j = 0; j < num_clusters; ++j) {
+      const double cur_bits =
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]], tmp);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  /* Recompute each out based on raw and symbols. */
+  for (i = 0; i < num_clusters; ++i) {
+    FN(HistogramClear)(&out[clusters[i]]);
+  }
+  for (i = 0; i < in_size; ++i) {
+    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
+  }
+})
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
+    HistogramType* out, uint32_t* symbols, size_t length) CODE({
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
+  uint32_t next_index;
+  HistogramType* tmp;
+  size_t i;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0;
+  for (i = 0; i < length; ++i) {
+      new_index[i] = kInvalidIndex;
+  }
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  /* TODO(eustas): by using idea of "cycle-sort" we can avoid allocation of
+     tmp and reduce the number of copying by the factor of 2. */
+  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0;
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  BROTLI_FREE(m, new_index);
+  for (i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  BROTLI_FREE(m, tmp);
+  return next_index;
+})
+
+BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
+    MemoryManager* m, const HistogramType* in, const size_t in_size,
+    size_t max_histograms, HistogramType* out, size_t* out_size,
+    uint32_t* histogram_symbols) CODE({
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
+  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
+  size_t num_clusters = 0;
+  const size_t max_input_histograms = 64;
+  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
+  /* For the first pass of clustering, we allow all pairs. */
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  /* TODO(eustas): move to "persistent" arena? */
+  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 1);
+  size_t i;
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
+      BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)|| BROTLI_IS_NULL(tmp)) {
+    return;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    cluster_size[i] = 1;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    out[i] = in[i];
+    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
+    histogram_symbols[i] = (uint32_t)i;
+  }
+
+  for (i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine =
+        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = (uint32_t)(i + j);
+    }
+    num_new_clusters =
+        FN(BrotliHistogramCombine)(out, tmp, cluster_size,
+                                   &histogram_symbols[i],
+                                   &clusters[num_clusters], pairs,
+                                   num_to_combine, num_to_combine,
+                                   max_histograms, pairs_capacity);
+    num_clusters += num_new_clusters;
+  }
+
+  {
+    /* For the second pass, we limit the total number of histogram pairs.
+       After this limit is reached, we only keep searching for the best pair. */
+    size_t max_num_pairs = BROTLI_MIN(size_t,
+        64 * num_clusters, (num_clusters / 2) * num_clusters);
+    BROTLI_ENSURE_CAPACITY(
+        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+
+    /* Collapse similar histograms. */
+    num_clusters = FN(BrotliHistogramCombine)(out, tmp, cluster_size,
+                                              histogram_symbols, clusters,
+                                              pairs, num_clusters, in_size,
+                                              max_histograms, max_num_pairs);
+  }
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+  /* Find the optimal map from original histograms to the final ones. */
+  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
+                           out, tmp, histogram_symbols);
+  BROTLI_FREE(m, tmp);
+  BROTLI_FREE(m, clusters);
+  /* Convert the context map to a canonical form. */
+  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
+  if (BROTLI_IS_OOM(m)) return;
+})
+
+#undef HistogramType
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.c
new file mode 100644
index 0000000000..bf80561bca
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.c
@@ -0,0 +1,28 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "command.h"
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+const uint32_t kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
+    0,  1,  2,  3,  4,   5,   6,   8,   10,   14,   18,   26,
+    34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
+const uint32_t kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
+    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
+const uint32_t kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
+    2,  3,  4,  5,  6,  7,   8,   9,   10,  12,  14,   18,
+    22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
+const uint32_t kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.h
new file mode 100644
index 0000000000..ba4de7eab3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/command.h
@@ -0,0 +1,191 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* This class models a sequence of literals and a backward reference copy. */
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "fast_log.h"
+#include "params.h"
+#include "prefix.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL extern const uint32_t
+    kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES];
+BROTLI_INTERNAL extern const uint32_t
+    kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES];
+BROTLI_INTERNAL extern const uint32_t
+    kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES];
+BROTLI_INTERNAL extern const uint32_t
+    kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES];
+
+static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
+  if (insertlen < 6) {
+    return (uint16_t)insertlen;
+  } else if (insertlen < 130) {
+    uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
+    return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
+  } else if (insertlen < 2114) {
+    return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
+  } else if (insertlen < 6210) {
+    return 21u;
+  } else if (insertlen < 22594) {
+    return 22u;
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
+  if (copylen < 10) {
+    return (uint16_t)(copylen - 2);
+  } else if (copylen < 134) {
+    uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
+    return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
+  } else if (copylen < 2118) {
+    return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
+  uint16_t bits64 =
+      (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
+  if (use_last_distance && inscode < 8u && copycode < 16u) {
+    return (copycode < 8u) ? bits64 : (bits64 | 64u);
+  } else {
+    /* Specification: 5 Encoding of ... (last table) */
+    /* offset = 2 * index, where index is in range [0..8] */
+    uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
+    /* All values in specification are K * 64,
+       where   K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
+           i + 1 = [1, 2, 3, 4, 5, 6, 7, 8,  9],
+       K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1,  2] = D.
+       All values in D require only 2 bits to encode.
+       Magic constant is shifted 6 bits left, to avoid final multiplication. */
+    offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
+    return (uint16_t)(offset | bits64);
+  }
+}
+
+static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
+                                        BROTLI_BOOL use_last_distance,
+                                        uint16_t* code) {
+  uint16_t inscode = GetInsertLengthCode(insertlen);
+  uint16_t copycode = GetCopyLengthCode(copylen);
+  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
+}
+
+static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
+  return kBrotliInsBase[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
+  return kBrotliInsExtra[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
+  return kBrotliCopyBase[copycode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
+  return kBrotliCopyExtra[copycode];
+}
+
+typedef struct Command {
+  uint32_t insert_len_;
+  /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
+  uint32_t copy_len_;
+  /* Stores distance extra bits. */
+  uint32_t dist_extra_;
+  uint16_t cmd_prefix_;
+  /* Stores distance code in low 10 bits
+     and number of extra bits in high 6 bits. */
+  uint16_t dist_prefix_;
+} Command;
+
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+static BROTLI_INLINE void InitCommand(Command* self,
+    const BrotliDistanceParams* dist, size_t insertlen,
+    size_t copylen, int copylen_code_delta, size_t distance_code) {
+  /* Don't rely on signed int representation, use honest casts. */
+  uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = (uint32_t)(copylen | (delta << 25));
+  /* The distance prefix and extra bits are stored in this Command as if
+     npostfix and ndirect were 0, they are only recomputed later after the
+     clustering if needed. */
+  PrefixEncodeCopyDistance(
+      distance_code, dist->num_direct_distance_codes,
+      dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
+  GetLengthCode(
+      insertlen, (size_t)((int)copylen + copylen_code_delta),
+      TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = 4 << 25;
+  self->dist_extra_ = 0;
+  self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
+  GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
+    const Command* self, const BrotliDistanceParams* dist) {
+  if ((self->dist_prefix_ & 0x3FFu) <
+      BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
+    return self->dist_prefix_ & 0x3FFu;
+  } else {
+    uint32_t dcode = self->dist_prefix_ & 0x3FFu;
+    uint32_t nbits = self->dist_prefix_ >> 10;
+    uint32_t extra = self->dist_extra_;
+    uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
+    uint32_t hcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) >>
+        dist->distance_postfix_bits;
+    uint32_t lcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
+    uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
+    return ((offset + extra) << dist->distance_postfix_bits) + lcode +
+        dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
+  }
+}
+
+static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
+  uint32_t r = self->cmd_prefix_ >> 6;
+  uint32_t c = self->cmd_prefix_ & 7;
+  if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+    return c;
+  }
+  return 3;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
+  return self->copy_len_ & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
+  uint32_t modifier = self->copy_len_ >> 25;
+  int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
+  return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMMAND_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.c
new file mode 100644
index 0000000000..a3b5e6933d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.c
@@ -0,0 +1,207 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "compound_dictionary.h"
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "memory.h"
+#include "quality.h"
+
+static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
+    const uint8_t* source, size_t source_size, uint32_t bucket_bits,
+    uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
+  /* Step 1: create "bloated" hasher. */
+  uint32_t num_slots = 1u << slot_bits;
+  uint32_t num_buckets = 1u << bucket_bits;
+  uint32_t hash_shift = 64u - bucket_bits;
+  uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+  uint32_t slot_mask = num_slots - 1;
+  size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
+      (sizeof(uint32_t) << slot_bits) +
+      (sizeof(uint16_t) << bucket_bits) +
+      (sizeof(uint32_t) << bucket_bits) +
+      (sizeof(uint32_t) * source_size);
+  uint8_t* flat = NULL;
+  PreparedDictionary* result = NULL;
+  uint16_t* num = NULL;
+  uint32_t* bucket_heads = NULL;
+  uint32_t* next_bucket = NULL;
+  uint32_t* slot_offsets = NULL;
+  uint16_t* heads = NULL;
+  uint32_t* items = NULL;
+  uint8_t** source_ref = NULL;
+  uint32_t i;
+  uint32_t* slot_size = NULL;
+  uint32_t* slot_limit = NULL;
+  uint32_t total_items = 0;
+  if (slot_bits > 16) return NULL;
+  if (slot_bits > bucket_bits) return NULL;
+  if (bucket_bits - slot_bits >= 16) return NULL;
+
+  flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
+
+  slot_size = (uint32_t*)flat;
+  slot_limit = (uint32_t*)(&slot_size[num_slots]);
+  num = (uint16_t*)(&slot_limit[num_slots]);
+  bucket_heads = (uint32_t*)(&num[num_buckets]);
+  next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
+  memset(num, 0, num_buckets * sizeof(num[0]));
+
+  /* TODO(eustas): apply custom "store" order. */
+  for (i = 0; i + 7 < source_size; ++i) {
+    const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
+        kPreparedDictionaryHashMul64Long;
+    const uint32_t key = (uint32_t)(h >> hash_shift);
+    uint16_t count = num[key];
+    next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
+    bucket_heads[key] = i;
+    count++;
+    if (count > bucket_limit) count = bucket_limit;
+    num[key] = count;
+  }
+
+  /* Step 2: find slot limits. */
+  for (i = 0; i < num_slots; ++i) {
+    BROTLI_BOOL overflow = BROTLI_FALSE;
+    slot_limit[i] = bucket_limit;
+    while (BROTLI_TRUE) {
+      uint32_t limit = slot_limit[i];
+      size_t j;
+      uint32_t count = 0;
+      overflow = BROTLI_FALSE;
+      for (j = i; j < num_buckets; j += num_slots) {
+        uint32_t size = num[j];
+        /* Last chain may span behind 64K limit; overflow happens only if
+           we are about to use 0xFFFF+ as item offset. */
+        if (count >= 0xFFFF) {
+          overflow = BROTLI_TRUE;
+          break;
+        }
+        if (size > limit) size = limit;
+        count += size;
+      }
+      if (!overflow) {
+        slot_size[i] = count;
+        total_items += count;
+        break;
+      }
+      slot_limit[i]--;
+    }
+  }
+
+  /* Step 3: transfer data to "slim" hasher. */
+  alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
+      (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
+      sizeof(uint8_t*);
+
+  result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
+    BROTLI_FREE(m, flat);
+    return NULL;
+  }
+  slot_offsets = (uint32_t*)(&result[1]);
+  heads = (uint16_t*)(&slot_offsets[num_slots]);
+  items = (uint32_t*)(&heads[num_buckets]);
+  source_ref = (uint8_t**)(&items[total_items]);
+
+  result->magic = kLeanPreparedDictionaryMagic;
+  result->num_items = total_items;
+  result->source_size = (uint32_t)source_size;
+  result->hash_bits = hash_bits;
+  result->bucket_bits = bucket_bits;
+  result->slot_bits = slot_bits;
+  BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
+
+  total_items = 0;
+  for (i = 0; i < num_slots; ++i) {
+    slot_offsets[i] = total_items;
+    total_items += slot_size[i];
+    slot_size[i] = 0;
+  }
+  for (i = 0; i < num_buckets; ++i) {
+    uint32_t slot = i & slot_mask;
+    uint32_t count = num[i];
+    uint32_t pos;
+    size_t j;
+    size_t cursor = slot_size[slot];
+    if (count > slot_limit[slot]) count = slot_limit[slot];
+    if (count == 0) {
+      heads[i] = 0xFFFF;
+      continue;
+    }
+    heads[i] = (uint16_t)cursor;
+    cursor += slot_offsets[slot];
+    slot_size[slot] += count;
+    pos = bucket_heads[i];
+    for (j = 0; j < count; j++) {
+      items[cursor++] = pos;
+      pos = next_bucket[pos];
+    }
+    items[cursor - 1] |= 0x80000000;
+  }
+
+  BROTLI_FREE(m, flat);
+  return result;
+}
+
+PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
+    const uint8_t* source, size_t source_size) {
+  uint32_t bucket_bits = 17;
+  uint32_t slot_bits = 7;
+  uint32_t hash_bits = 40;
+  uint16_t bucket_limit = 32;
+  size_t volume = 16u << bucket_bits;
+  /* Tune parameters to fit dictionary size. */
+  while (volume < source_size && bucket_bits < 22) {
+    bucket_bits++;
+    slot_bits++;
+    volume <<= 1;
+  }
+  return CreatePreparedDictionaryWithParams(m,
+      source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
+}
+
+void DestroyPreparedDictionary(MemoryManager* m,
+    PreparedDictionary* dictionary) {
+  if (!dictionary) return;
+  BROTLI_FREE(m, dictionary);
+}
+
+BROTLI_BOOL AttachPreparedDictionary(
+    CompoundDictionary* compound, const PreparedDictionary* dictionary) {
+  size_t length = 0;
+  size_t index = 0;
+
+  if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+    return BROTLI_FALSE;
+  }
+
+  if (!dictionary) return BROTLI_FALSE;
+
+  length = dictionary->source_size;
+  index = compound->num_chunks;
+  compound->total_size += length;
+  compound->chunks[index] = dictionary;
+  compound->chunk_offsets[index + 1] = compound->total_size;
+  {
+    uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
+    uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
+    uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
+    const void* tail = (void*)&items[dictionary->num_items];
+    if (dictionary->magic == kPreparedDictionaryMagic) {
+      compound->chunk_source[index] = (const uint8_t*)tail;
+    } else {
+      /* dictionary->magic == kLeanPreparedDictionaryMagic */
+      compound->chunk_source[index] =
+          (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+    }
+  }
+  compound->num_chunks++;
+  return BROTLI_TRUE;
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.h
new file mode 100644
index 0000000000..9c531d5b19
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compound_dictionary.h
@@ -0,0 +1,74 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
+#define BROTLI_ENC_PREPARED_DICTIONARY_H_
+
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "../common/constants.h"
+#include "memory.h"
+
+/* "Fat" prepared dictionary, could be cooked outside of C implementation,
+ * e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */
+static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
+
+static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
+
+static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
+
+/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility
+ * of caller of "prepare dictionary" to keep the LZ77 data while prepared
+ * dictionary is in use. */
+static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3;
+
+static const uint64_t kPreparedDictionaryHashMul64Long =
+    BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+typedef struct PreparedDictionary {
+  uint32_t magic;
+  uint32_t num_items;
+  uint32_t source_size;
+  uint32_t hash_bits;
+  uint32_t bucket_bits;
+  uint32_t slot_bits;
+
+  /* --- Dynamic size members --- */
+
+  /* uint32_t slot_offsets[1 << slot_bits]; */
+  /* uint16_t heads[1 << bucket_bits]; */
+  /* uint32_t items[variable]; */
+
+  /* [maybe] uint8_t* source_ref, depending on magic. */
+  /* [maybe] uint8_t source[source_size], depending on magic. */
+} PreparedDictionary;
+
+BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
+    const uint8_t* source, size_t source_size);
+
+BROTLI_INTERNAL void DestroyPreparedDictionary(MemoryManager* m,
+    PreparedDictionary* dictionary);
+
+typedef struct CompoundDictionary {
+  /* LZ77 prefix, compound dictionary */
+  size_t num_chunks;
+  size_t total_size;
+  /* Client instances. */
+  const PreparedDictionary* chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+  const uint8_t* chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+  size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+
+  size_t num_prepared_instances_;
+  /* Owned instances. */
+  PreparedDictionary* prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+} CompoundDictionary;
+
+BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(
+    CompoundDictionary* compound, const PreparedDictionary* dictionary);
+
+#endif /* BROTLI_ENC_PREPARED_DICTIONARY */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.c
new file mode 100644
index 0000000000..13890eabf6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.c
@@ -0,0 +1,800 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream.
+
+   Adapted from the CompressFragment() function in
+   https://github.com/google/snappy/blob/master/snappy.cc */
+
+#include "compress_fragment.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "brotli_bit_stream.h"
+#include "entropy_encode.h"
+#include "fast_log.h"
+#include "find_match_length.h"
+#include "write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
+  BROTLI_DCHECK(offset >= 0);
+  BROTLI_DCHECK(offset <= 3);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
+  return TO_BROTLI_BOOL(
+      BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
+      p1[4] == p2[4]);
+}
+
+/* Builds a literal prefix code into "depths" and "bits" based on the statistics
+   of the "input" string and stores it into the bit stream.
+   Note that the prefix code here is built from the pre-LZ77 input, therefore
+   we can only approximate the statistics of the actual literal stream.
+   Moreover, for long inputs we build a histogram from a sample of the input
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
+                                             const uint8_t* input,
+                                             const size_t input_size,
+                                             uint8_t depths[256],
+                                             uint16_t bits[256],
+                                             size_t* storage_ix,
+                                             uint8_t* storage) {
+  uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
+  size_t histogram_total;
+  size_t i;
+  memset(histogram, 0, sizeof(s->histogram));
+
+  if (input_size < (1 << 15)) {
+    for (i = 0; i < input_size; ++i) {
+      ++histogram[input[i]];
+    }
+    histogram_total = input_size;
+    for (i = 0; i < 256; ++i) {
+      /* We weigh the first 11 samples with weight 3 to account for the
+         balancing effect of the LZ77 phase on the histogram. */
+      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  } else {
+    static const size_t kSampleRate = 29;
+    for (i = 0; i < input_size; i += kSampleRate) {
+      ++histogram[input[i]];
+    }
+    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
+    for (i = 0; i < 256; ++i) {
+      /* We add 1 to each population count to avoid 0 bit depths (since this is
+         only a sample and we don't know if the symbol appears or not), and we
+         weigh the first 11 samples with weight 3 to account for the balancing
+         effect of the LZ77 phase on the histogram (more frequent symbols are
+         more likely to be in backward references instead as literals). */
+      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
+                                     /* max_bits = */ 8,
+                                     depths, bits, storage_ix, storage);
+  {
+    size_t literal_ratio = 0;
+    for (i = 0; i < 256; ++i) {
+      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
+    }
+    /* Estimated encoding ratio, millibytes per symbol. */
+    return (literal_ratio * 125) / histogram_total;
+  }
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
+    size_t* storage_ix, uint8_t* storage) {
+  const uint32_t* const histogram = s->cmd_histo;
+  uint8_t* const depth = s->cmd_depth;
+  uint16_t* const bits = s->cmd_bits;
+  uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
+  uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
+  /* TODO(eustas): do only once on initialization. */
+  memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
+
+  BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(tmp_depth, depth, 24);
+  memcpy(tmp_depth + 24, depth + 40, 8);
+  memcpy(tmp_depth + 32, depth + 24, 8);
+  memcpy(tmp_depth + 40, depth + 48, 8);
+  memcpy(tmp_depth + 48, depth + 32, 8);
+  memcpy(tmp_depth + 56, depth + 56, 8);
+  BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
+  memcpy(bits, tmp_bits, 48);
+  memcpy(bits + 24, tmp_bits + 32, 16);
+  memcpy(bits + 32, tmp_bits + 48, 16);
+  memcpy(bits + 40, tmp_bits + 24, 16);
+  memcpy(bits + 48, tmp_bits + 40, 16);
+  memcpy(bits + 56, tmp_bits + 56, 16);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(tmp_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(tmp_depth, depth, 8);
+    memcpy(tmp_depth + 64, depth + 8, 8);
+    memcpy(tmp_depth + 128, depth + 16, 8);
+    memcpy(tmp_depth + 192, depth + 24, 8);
+    memcpy(tmp_depth + 384, depth + 32, 8);
+    for (i = 0; i < 8; ++i) {
+      tmp_depth[128 + 8 * i] = depth[40 + i];
+      tmp_depth[256 + 8 * i] = depth[48 + i];
+      tmp_depth[448 + 8 * i] = depth[56 + i];
+    }
+    /* TODO(eustas): could/should full-length machinery be avoided? */
+    BrotliStoreHuffmanTree(
+        tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
+}
+
+/* REQUIRES: insertlen < 6210 */
+static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
+                                        const uint8_t depth[128],
+                                        const uint16_t bits[128],
+                                        uint32_t histo[128],
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  if (insertlen < 6) {
+    const size_t code = insertlen + 40;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    ++histo[code];
+  } else if (insertlen < 130) {
+    const size_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t inscode = (nbits << 1) + prefix + 42;
+    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[inscode];
+  } else if (insertlen < 2114) {
+    const size_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 50;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
+    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
+    ++histo[61];
+  }
+}
+
+static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
+                                            const uint8_t depth[128],
+                                            const uint16_t bits[128],
+                                            uint32_t histo[128],
+                                            size_t* storage_ix,
+                                            uint8_t* storage) {
+  if (insertlen < 22594) {
+    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
+    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
+    ++histo[62];
+  } else {
+    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
+    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
+    ++histo[63];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen,
+                                      const uint8_t depth[128],
+                                      const uint16_t bits[128],
+                                      uint32_t histo[128],
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  if (copylen < 10) {
+    BrotliWriteBits(
+        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    ++histo[copylen + 14];
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 20;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
+    ++histo[39];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
+                                                  const uint8_t depth[128],
+                                                  const uint16_t bits[128],
+                                                  uint32_t histo[128],
+                                                  size_t* storage_ix,
+                                                  uint8_t* storage) {
+  if (copylen < 12) {
+    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    ++histo[copylen - 4];
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 4;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 30;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(5, tail & 31, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[39];
+    ++histo[64];
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(size_t distance,
+                                       const uint8_t depth[128],
+                                       const uint16_t bits[128],
+                                       uint32_t histo[128],
+                                       size_t* storage_ix, uint8_t* storage) {
+  const size_t d = distance + 3;
+  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
+  const size_t prefix = (d >> nbits) & 1;
+  const size_t offset = (2 + prefix) << nbits;
+  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
+  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
+  ++histo[distcode];
+}
+
+static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
+                                       const uint8_t depth[256],
+                                       const uint16_t bits[256],
+                                       size_t* storage_ix, uint8_t* storage) {
+  size_t j;
+  for (j = 0; j < len; j++) {
+    const uint8_t lit = input[j];
+    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
+  }
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
+    uint8_t* array) {
+  while (n_bits > 0) {
+    size_t byte_pos = pos >> 3;
+    size_t n_unchanged_bits = pos & 7;
+    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
+    size_t total_bits = n_unchanged_bits + n_changed_bits;
+    uint32_t mask =
+        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
+    uint32_t unchanged_bits = array[byte_pos] & mask;
+    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
+    array[byte_pos] =
+        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
+    n_bits -= n_changed_bits;
+    bits >>= n_changed_bits;
+    pos += n_changed_bits;
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
+    const uint8_t* data, size_t len, const uint8_t* depths) {
+  uint32_t* BROTLI_RESTRICT const histo = s->histogram;
+  static const size_t kSampleRate = 43;
+  size_t i;
+  memset(histo, 0, sizeof(s->histogram));
+  for (i = 0; i < len; i += kSampleRate) {
+    ++histo[data[i]];
+  }
+  {
+    const size_t total = (len + kSampleRate - 1) / kSampleRate;
+    double r = (FastLog2(total) + 0.5) * (double)total + 200;
+    for (i = 0; i < 256; ++i) {
+      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
+    }
+    return TO_BROTLI_BOOL(r >= 0.0);
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 980
+
+static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
+    const uint8_t* metablock_start, const uint8_t* next_emit,
+    const size_t insertlen, const size_t literal_ratio) {
+  const size_t compressed = (size_t)(next_emit - metablock_start);
+  if (compressed * 50 > insertlen) {
+    return BROTLI_FALSE;
+  } else {
+    return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
+  }
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
+                                      const size_t storage_ix_start,
+                                      size_t* storage_ix, uint8_t* storage) {
+  const size_t len = (size_t)(end - begin);
+  RewindBitPosition(storage_ix_start, storage_ix, storage);
+  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], begin, len);
+  *storage_ix += len << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static uint32_t kCmdHistoSeed[128] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 0, 0, 0, 0,
+};
+
+static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_bits,
+    size_t* storage_ix, uint8_t* storage) {
+  uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
+  uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
+  uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
+  uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
+  uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
+  const uint8_t* ip_end;
+
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+
+  static const size_t kFirstBlockSize = 3 << 15;
+  static const size_t kMergeBlockSize = 1 << 16;
+
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+  const size_t kMinMatchLen = 5;
+
+  const uint8_t* metablock_start = input;
+  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+  size_t total_block_size = block_size;
+  /* Save the bit position of the MLEN field of the meta-block header, so that
+     we can update it later if we decide to extend this meta-block. */
+  size_t mlen_storage_ix = *storage_ix + 3;
+
+  size_t literal_ratio;
+
+  const uint8_t* ip;
+  int last_distance;
+
+  const size_t shift = 64u - table_bits;
+
+  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  /* No block splits, no contexts. */
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  literal_ratio = BuildAndStoreLiteralPrefixCode(
+      s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
+
+  {
+    /* Store the pre-compressed command and distance prefix codes. */
+    size_t i;
+    for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(s->cmd_code_numbits & 7,
+                  s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
+
+ emit_commands:
+  /* Initialize the command and distance histograms. We will gather
+     statistics of command and distance codes during the processing
+     of this block and use it to update the command and distance
+     prefix codes for the next block. */
+  memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
+
+  /* "ip" is the input pointer. */
+  ip = input;
+  last_distance = -1;
+  ip_end = input + block_size;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
+      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (i.e. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        BROTLI_DCHECK(hash == Hash(next_ip, shift));
+        ip = next_ip;
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit" to the bit stream, and then see if we can find a next match
+         immediately afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 5-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        int distance = (int)(base - candidate);  /* > 0 */
+        size_t insert = (size_t)(base - next_emit);
+        ip += matched;
+        BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
+                    (int)(next_emit - base_ip), (unsigned long)insert, 2));
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                             literal_ratio)) {
+          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
+                                    storage_ix, storage);
+          input_size -= (size_t)(base - input);
+          input = base;
+          next_emit = input;
+          goto next_block;
+        } else {
+          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                            storage_ix, storage);
+        }
+        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                     storage_ix, storage);
+        if (distance == last_distance) {
+          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          ++cmd_histo[64];
+        } else {
+          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
+                       cmd_histo, storage_ix, storage);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
+                                storage_ix, storage);
+        BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
+                    "[CompressFragment] pos = %d insert = %d copy = %d\n"
+                    "[CompressFragment] pos = %d distance = %d\n",
+                    (int)(base - base_ip), (int)distance,
+                    (int)(base - base_ip) + 2, 0, (int)matched - 2,
+                    (int)(base - base_ip) + 2, (int)distance));
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (IsMatch(ip, candidate)) {
+        /* We have a 5-byte match at ip, and no need to emit any literal bytes
+           prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        if (ip - candidate > MAX_DISTANCE) break;
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
+                     cmd_histo, storage_ix, storage);
+        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
+                    "[CompressFragment] pos = %d distance = %d\n",
+                    (int)(base - base_ip), 0, (int)matched,
+                    (int)(base - base_ip), (int)last_distance));
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift);
+    }
+  }
+
+ emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  input += block_size;
+  input_size -= block_size;
+  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
+
+  /* Decide if we want to continue this meta-block instead of emitting the
+     last insert-only command. */
+  if (input_size > 0 &&
+      total_block_size + block_size <= (1 << 20) &&
+      ShouldMergeBlock(s, input, block_size, lit_depth)) {
+    BROTLI_DCHECK(total_block_size > (1 << 16));
+    /* Update the size of the current meta-block and continue emitting commands.
+       We can do this because the current size and the new size both have 5
+       nibbles. */
+    total_block_size += block_size;
+    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
+    goto emit_commands;
+  }
+
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const size_t insert = (size_t)(ip_end - next_emit);
+    BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
+                (int)(next_emit - base_ip), (unsigned long)insert, 2));
+    if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
+    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                         literal_ratio)) {
+      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
+                                storage_ix, storage);
+    } else {
+      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                   storage_ix, storage);
+    }
+  }
+  next_emit = ip_end;
+
+next_block:
+  /* If we have more data, write a new meta-block header and prefix codes and
+     then continue emitting commands. */
+  if (input_size > 0) {
+    metablock_start = input;
+    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+    total_block_size = block_size;
+    /* Save the bit position of the MLEN field of the meta-block header, so that
+       we can update it later if we decide to extend this meta-block. */
+    mlen_storage_ix = *storage_ix + 3;
+    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    /* No block splits, no contexts. */
+    BrotliWriteBits(13, 0, storage_ix, storage);
+    literal_ratio = BuildAndStoreLiteralPrefixCode(
+        s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
+    goto emit_commands;
+  }
+
+  if (!is_last) {
+    /* If this is not the last block, update the command and distance prefix
+       codes for the next block and store the compressed forms. */
+    s->cmd_code[0] = 0;
+    s->cmd_code_numbits = 0;
+    BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
+  }
+}
+
+#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
+
+#define BAKE_METHOD_PARAM_(B) \
+static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,          \
+    BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
+  BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B,    \
+      storage_ix, storage);                                                  \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentFast(
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_size,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+
+  if (input_size == 0) {
+    BROTLI_DCHECK(is_last);
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  switch (table_bits) {
+#define CASE_(B)                                                     \
+    case B:                                                          \
+      BrotliCompressFragmentFastImpl ## B(                           \
+          s, input, input_size, is_last, table, storage_ix, storage);\
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
+                              storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.h
new file mode 100644
index 0000000000..9c0780f8c9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment.h
@@ -0,0 +1,86 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "entropy_encode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BrotliOnePassArena {
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
+     used for the next block. The command prefix code is over a smaller alphabet
+     with the following 64 symbols:
+        0 - 15: insert length code 0, copy length code 0 - 15, same distance
+       16 - 39: insert length code 0, copy length code 0 - 23
+       40 - 63: insert length code 0 - 23, copy length code 0
+     Note that symbols 16 and 40 represent the same code in the full alphabet,
+     but we do not use either of them. */
+  uint8_t cmd_depth[128];
+  uint16_t cmd_bits[128];
+  uint32_t cmd_histo[128];
+
+  /* The compressed form of the command and distance prefix codes for the next
+     block. */
+  uint8_t cmd_code[512];
+  size_t cmd_code_numbits;
+
+  HuffmanTree tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
+  uint32_t histogram[256];
+  uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t tmp_bits[64];
+} BrotliOnePassArena;
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
+   (see comment in encode.h) used for the encoding of this input fragment.
+   If "is_last" is 0, they are updated to reflect the statistics
+   of this input fragment, to be used for the encoding of the next fragment.
+
+   "*cmd_code_numbits" is the number of bits of the compressed representation
+   of the command and distance prefix codes, and "cmd_code" is an array of
+   at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
+   command and distance prefix codes. If "is_last" is 0, these are also
+   updated to represent the updated "cmd_depth" and "cmd_bits".
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentFast(BrotliOnePassArena* s,
+                                                const uint8_t* input,
+                                                size_t input_size,
+                                                BROTLI_BOOL is_last,
+                                                int* table, size_t table_size,
+                                                size_t* storage_ix,
+                                                uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.c
new file mode 100644
index 0000000000..a762679c1f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.c
@@ -0,0 +1,657 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#include "compress_fragment_two_pass.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "bit_cost.h"
+#include "brotli_bit_stream.h"
+#include "entropy_encode.h"
+#include "fast_log.h"
+#include "find_match_length.h"
+#include "write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
+    size_t shift, size_t length) {
+  const uint64_t h =
+      (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
+    size_t shift, size_t length) {
+  BROTLI_DCHECK(offset <= 8 - length);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
+    size_t length) {
+  if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
+    if (length == 4) return BROTLI_TRUE;
+    return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
+  }
+  return BROTLI_FALSE;
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+  /* TODO(eustas): initialize once. */
+  memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
+  BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
+  BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
+                          &s->cmd_depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
+  memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
+  memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
+  memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
+  memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
+  memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
+  BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
+  memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
+  memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
+  memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
+  memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
+  memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
+  memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
+  BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
+    memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
+    memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
+    memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
+    memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
+    memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
+    for (i = 0; i < 8; ++i) {
+      s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
+      s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
+      s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
+    }
+    BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
+                           s->tmp_tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
+                         storage);
+}
+
+static BROTLI_INLINE void EmitInsertLen(
+    uint32_t insertlen, uint32_t** commands) {
+  if (insertlen < 6) {
+    **commands = insertlen;
+  } else if (insertlen < 130) {
+    const uint32_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const uint32_t prefix = tail >> nbits;
+    const uint32_t inscode = (nbits << 1) + prefix + 2;
+    const uint32_t extra = tail - (prefix << nbits);
+    **commands = inscode | (extra << 8);
+  } else if (insertlen < 2114) {
+    const uint32_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const uint32_t code = nbits + 10;
+    const uint32_t extra = tail - (1u << nbits);
+    **commands = code | (extra << 8);
+  } else if (insertlen < 6210) {
+    const uint32_t extra = insertlen - 2114;
+    **commands = 21 | (extra << 8);
+  } else if (insertlen < 22594) {
+    const uint32_t extra = insertlen - 6210;
+    **commands = 22 | (extra << 8);
+  } else {
+    const uint32_t extra = insertlen - 22594;
+    **commands = 23 | (extra << 8);
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
+  if (copylen < 10) {
+    **commands = (uint32_t)(copylen + 38);
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 44;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else {
+    const size_t extra = copylen - 2118;
+    **commands = (uint32_t)(63 | (extra << 8));
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(
+    size_t copylen, uint32_t** commands) {
+  if (copylen < 12) {
+    **commands = (uint32_t)(copylen + 20);
+    ++(*commands);
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 28;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 54;
+    const size_t extra = tail & 31;
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else {
+    const size_t extra = copylen - 2120;
+    **commands = (uint32_t)(63 | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
+  uint32_t d = distance + 3;
+  uint32_t nbits = Log2FloorNonZero(d) - 1;
+  const uint32_t prefix = (d >> nbits) & 1;
+  const uint32_t offset = (2 + prefix) << nbits;
+  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
+  uint32_t extra = d - offset;
+  **commands = distcode | (extra << 8);
+  ++(*commands);
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static BROTLI_INLINE void CreateCommands(const uint8_t* input,
+    size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
+    size_t table_bits, size_t min_match,
+    uint8_t** literals, uint32_t** commands) {
+  /* "ip" is the input pointer. */
+  const uint8_t* ip = input;
+  const size_t shift = 64u - table_bits;
+  const uint8_t* ip_end = input + block_size;
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+
+  int last_distance = -1;
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift, min_match); ; ) {
+      /* Step 1: Scan forward in the input looking for a 6-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (ie. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        ip = next_ip;
+        BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift, min_match);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate, min_match)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit", and then see if we can find a next match immediately
+         afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 6-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        int distance = (int)(base - candidate);  /* > 0 */
+        int insert = (int)(base - next_emit);
+        ip += matched;
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitInsertLen((uint32_t)insert, commands);
+        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
+                    (int)(next_emit - base_ip), insert, 2));
+        memcpy(*literals, next_emit, (size_t)insert);
+        *literals += insert;
+        if (distance == last_distance) {
+          **commands = 64;
+          ++(*commands);
+        } else {
+          EmitDistance((uint32_t)distance, commands);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, commands);
+        BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
+                    "[CompressFragment] pos = %d insert = %d copy = %d\n"
+                    "[CompressFragment] pos = %d distance = %d\n",
+                    (int)(base - base_ip), (int)distance,
+                    (int)(base - base_ip) + 2, 0, (int)matched - 2,
+                    (int)(base - base_ip) + 2, (int)distance));
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (ip - candidate <= MAX_DISTANCE &&
+          IsMatch(ip, candidate, min_match)) {
+        /* We have a 6-byte match at ip, and no need to emit any
+           literal bytes prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, commands);
+        EmitDistance((uint32_t)last_distance, commands);
+        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
+                    "[CompressFragment] pos = %d distance = %d\n",
+                    (int)(base - base_ip), 0, (int)matched,
+                    (int)(base - base_ip), (int)last_distance));
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift, min_match);
+    }
+  }
+
+emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const uint32_t insert = (uint32_t)(ip_end - next_emit);
+    EmitInsertLen(insert, commands);
+    BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
+                (int)(next_emit - base_ip), insert, 2));
+    memcpy(*literals, next_emit, insert);
+    *literals += insert;
+  }
+}
+
+static void StoreCommands(BrotliTwoPassArena* s,
+                          const uint8_t* literals, const size_t num_literals,
+                          const uint32_t* commands, const size_t num_commands,
+                          size_t* storage_ix, uint8_t* storage) {
+  static const uint32_t kNumExtraBits[128] = {
+      0,  0,  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,
+      6,  7,  8,  9,  10, 12, 14, 24, 0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  7,  8,  9,  10, 24,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
+      9,  9,  10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+      17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
+  };
+  static const uint32_t kInsertOffset[24] = {
+      0,  1,  2,  3,  4,   5,   6,   8,   10,   14,   18,   26,
+      34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
+  };
+
+  size_t i;
+  memset(s->lit_histo, 0, sizeof(s->lit_histo));
+  /* TODO(eustas): is that necessary? */
+  memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
+  /* TODO(eustas): is that necessary? */
+  memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
+  memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
+  for (i = 0; i < num_literals; ++i) {
+    ++s->lit_histo[literals[i]];
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
+                                     /* max_bits = */ 8, s->lit_depth,
+                                     s->lit_bits, storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t code = commands[i] & 0xFF;
+    BROTLI_DCHECK(code < 128);
+    ++s->cmd_histo[code];
+  }
+  s->cmd_histo[1] += 1;
+  s->cmd_histo[2] += 1;
+  s->cmd_histo[64] += 1;
+  s->cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t cmd = commands[i];
+    const uint32_t code = cmd & 0xFF;
+    const uint32_t extra = cmd >> 8;
+    BROTLI_DCHECK(code < 128);
+    BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    if (code < 24) {
+      const uint32_t insert = kInsertOffset[code] + extra;
+      uint32_t j;
+      for (j = 0; j < insert; ++j) {
+        const uint8_t lit = *literals;
+        BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
+                        storage);
+        ++literals;
+      }
+    }
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 0.98
+#define SAMPLE_RATE 43
+
+static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
+    const uint8_t* input, size_t input_size, size_t num_literals) {
+  double corpus_size = (double)input_size;
+  if ((double)num_literals < MIN_RATIO * corpus_size) {
+    return BROTLI_TRUE;
+  } else {
+    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
+    size_t i;
+    memset(s->lit_histo, 0, sizeof(s->lit_histo));
+    for (i = 0; i < input_size; i += SAMPLE_RATE) {
+      ++s->lit_histo[input[i]];
+    }
+    return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
+                                      size_t* storage_ix, uint8_t* storage) {
+  BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], input, input_size);
+  *storage_ix += input_size << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_bits, size_t min_match,
+    size_t* storage_ix, uint8_t* storage) {
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+  BROTLI_UNUSED(is_last);
+
+  while (input_size > 0) {
+    size_t block_size =
+        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
+    uint32_t* commands = command_buf;
+    uint8_t* literals = literal_buf;
+    size_t num_literals;
+    CreateCommands(input, block_size, input_size, base_ip, table,
+                   table_bits, min_match, &literals, &commands);
+    num_literals = (size_t)(literals - literal_buf);
+    if (ShouldCompress(s, input, block_size, num_literals)) {
+      const size_t num_commands = (size_t)(commands - command_buf);
+      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      /* No block splits, no contexts. */
+      BrotliWriteBits(13, 0, storage_ix, storage);
+      StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
+                    storage_ix, storage);
+    } else {
+      /* Since we did not find many backward references and the entropy of
+         the data is close to 8 bits, we can simply emit an uncompressed block.
+         This makes compression speed of uncompressible data about 3x faster. */
+      EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
+    }
+    input += block_size;
+    input_size -= block_size;
+  }
+}
+
+#define FOR_TABLE_BITS_(X) \
+  X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
+
+#define BAKE_METHOD_PARAM_(B)                                                  \
+static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,            \
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
+    int* table, size_t* storage_ix, uint8_t* storage) {                        \
+  size_t min_match = (B <= 15) ? 4 : 6;                                        \
+  BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
+      literal_buf, table, B, min_match, storage_ix, storage);                  \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentTwoPass(
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+  switch (table_bits) {
+#define CASE_(B)                                      \
+    case B:                                           \
+      BrotliCompressFragmentTwoPassImpl ## B(         \
+          s, input, input_size, is_last, command_buf, \
+          literal_buf, table, storage_ix, storage);   \
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    RewindBitPosition(initial_storage_ix, storage_ix, storage);
+    EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.h
new file mode 100644
index 0000000000..6d28d9bb78
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/compress_fragment_two_pass.h
@@ -0,0 +1,72 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "entropy_encode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* TODO(eustas): turn to macro. */
+static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
+
+typedef struct BrotliTwoPassArena {
+  uint32_t lit_histo[256];
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  uint32_t cmd_histo[128];
+  uint8_t cmd_depth[128];
+  uint16_t cmd_bits[128];
+
+  /* BuildAndStoreCommandPrefixCode */
+  HuffmanTree tmp_tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
+  uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t tmp_bits[64];
+} BrotliTwoPassArena;
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: "command_buf" and "literal_buf" point to at least
+              kCompressFragmentTwoPassBlockSize long arrays.
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is a power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(BrotliTwoPassArena* s,
+                                                   const uint8_t* input,
+                                                   size_t input_size,
+                                                   BROTLI_BOOL is_last,
+                                                   uint32_t* command_buf,
+                                                   uint8_t* literal_buf,
+                                                   int* table,
+                                                   size_t table_size,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.c
new file mode 100644
index 0000000000..1a60eb3cda
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.c
@@ -0,0 +1,1848 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#include "../common/platform.h"
+#include "dictionary_hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* GENERATED CODE START */
+BROTLI_INTERNAL const uint16_t kStaticDictionaryHashWords[32768] = {
+1002,0,0,0,0,0,0,0,0,683,0,0,0,0,0,0,0,1265,0,0,0,0,0,1431,0,0,0,0,0,0,40,0,0,0,
+0,155,8,741,0,624,0,0,0,0,0,0,0,0,0,0,0,0,66,503,0,0,0,451,0,0,0,0,0,0,0,835,70,
+0,0,539,0,0,0,0,0,0,0,0,0,113,0,0,0,0,718,0,0,0,0,0,0,520,0,1070,0,0,0,0,0,1515,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,610,0,0,750,0,0,0,307,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,964,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,999,0,0,0,0,0,0,0,0,
+645,75,0,649,52,282,0,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1621,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,211,225,0,0,687,718,0,0,110,0,58,0,0,0,0,0,0,345,0,0,301,0,0,
+0,203,0,0,1154,674,1949,0,0,0,0,0,0,0,0,0,259,0,0,0,0,0,0,0,1275,0,0,0,1231,254,
+0,0,0,0,0,0,0,277,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,0,0,800,0,0,0,29,
+116,100,490,0,0,0,0,0,1641,0,543,0,0,0,0,41,181,0,657,0,0,202,25,0,0,0,0,0,0,0,
+0,0,0,423,0,0,0,113,0,0,0,927,963,0,976,0,206,0,0,0,0,0,0,0,0,0,2002,0,0,0,0,0,
+0,0,0,0,0,0,696,0,1170,0,0,0,0,226,13,0,769,678,551,0,0,0,0,0,0,57,0,0,0,10,188,
+0,0,0,624,0,0,0,0,0,0,0,0,0,1941,130,0,0,0,0,378,269,0,0,528,0,1146,0,0,0,1105,
+0,1616,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,656,0,1940,0,0,0,0,0,173,0,0,0,0,0,0,0,0,0,
+0,0,457,342,810,0,0,0,0,620,0,0,0,0,0,0,0,967,95,447,406,0,0,0,477,0,1268,944,
+1941,0,0,0,629,0,0,0,0,0,375,0,0,0,1636,0,0,0,0,774,0,1,1034,0,0,0,0,0,824,0,0,
+0,0,0,118,0,0,560,296,0,0,0,0,0,0,0,0,1009,894,0,0,0,0,0,0,0,0,0,0,0,0,0,1474,
+366,0,0,0,0,0,0,0,0,0,79,1723,0,0,200,0,0,0,0,0,0,0,0,1759,372,0,16,0,943,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,258,0,0,900,1839,707,30,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,2004,0,0,10,115,0,50,0,0,0,0,0,0,0,0,0,0,520,1,0,738,98,482,0,0,0,0,
+0,0,0,0,0,0,701,2,0,0,0,0,0,0,0,0,557,0,0,0,0,0,0,0,0,0,347,0,0,0,0,572,0,0,0,0,
+0,0,0,0,0,832,0,0,797,809,0,0,0,0,0,0,0,0,0,0,0,528,0,0,0,861,0,0,294,0,0,0,109,
+0,0,0,0,0,0,0,0,1187,290,266,0,0,0,0,49,50,748,0,0,466,399,0,0,0,0,0,0,0,378,0,
+519,0,0,0,0,0,0,0,0,0,0,0,0,667,351,902,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,180,
+0,0,869,0,0,0,0,0,0,0,260,0,0,0,0,0,0,0,0,0,0,523,36,0,0,587,510,809,29,260,0,0,
+0,0,0,0,0,0,570,0,565,0,1464,0,0,0,0,0,0,10,0,0,787,399,380,200,0,0,0,0,516,0,
+844,887,0,0,0,0,0,0,0,44,0,0,0,305,1655,0,0,0,0,0,0,0,0,0,0,0,0,0,0,786,10,0,0,
+0,0,0,0,0,0,0,2031,0,0,0,0,0,684,0,0,0,0,0,1480,0,0,0,27,0,0,0,395,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,813,511,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,0,206,
+496,0,0,0,0,0,909,0,891,0,0,0,0,0,0,0,0,0,687,0,0,0,1342,0,0,0,0,0,0,0,0,0,0,
+160,41,0,0,0,0,0,0,0,0,0,0,0,1718,778,0,0,0,0,0,0,0,0,0,0,1610,0,0,0,0,0,115,0,
+0,0,0,314,294,0,0,0,983,178,193,0,0,0,0,0,0,0,0,0,174,0,0,0,0,0,0,0,0,0,0,848,
+1796,0,0,0,0,0,0,221,0,687,1660,0,0,0,0,262,0,0,179,0,0,0,0,0,66,0,773,0,352,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,0,152,0,0,1197,0,0,0,0,0,0,0,0,0,0,0,0,560,0,0,
+564,0,0,0,797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,556,0,819,0,0,0,0,0,0,0,0,719,544,
+637,5,0,0,0,0,0,0,0,0,0,0,0,101,0,1441,0,0,0,893,0,0,0,0,0,0,0,0,0,238,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1296,0,0,969,1729,314,60,0,0,0,0,0,1144,0,1147,0,0,0,0,0,
+0,0,0,0,0,437,1853,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,828,0,176,0,0,0,0,0,0,434,39,0,
+0,0,0,0,159,0,0,0,902,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,270,0,0,0,0,801,556,0,0,
+0,0,0,0,0,416,19,197,369,0,0,0,0,0,0,0,0,0,28,34,0,757,0,0,898,1553,0,721,0,0,0,
+0,1012,0,0,0,0,1102,0,898,183,0,0,0,0,0,0,0,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,277,0,0,0,435,0,0,0,0,0,1311,0,0,0,0,
+0,0,211,437,0,0,0,28,0,0,750,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2012,0,702,
+0,808,0,0,0,0,739,166,0,0,0,0,0,0,719,170,500,0,0,0,0,0,0,0,0,1500,327,0,0,450,
+0,0,0,1318,0,0,0,1602,0,0,331,754,0,0,0,0,0,1368,0,0,557,0,0,0,799,850,0,0,0,0,
+0,0,0,0,908,0,0,0,0,0,19,62,459,0,0,0,0,0,0,0,0,0,0,0,0,1802,0,0,0,0,0,0,0,0,0,
+1397,0,0,0,0,120,238,0,0,0,0,0,0,0,0,0,0,0,1324,0,0,0,0,0,0,0,0,602,201,0,0,164,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,615,0,0,0,0,0,0,0,0,0,0,0,0,0,1243,0,0,0,0,968,0,0,
+0,0,0,0,882,0,0,0,907,329,100,0,0,0,0,0,0,0,0,0,0,0,176,26,9,0,0,265,256,0,0,0,
+0,0,0,0,0,0,643,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,610,0,0,0,0,973,2001,0,
+0,0,0,0,0,522,0,0,0,0,0,0,0,0,0,0,0,553,0,0,0,0,0,0,1582,0,1578,0,0,0,0,0,0,0,0,
+0,0,0,795,0,0,0,432,0,0,0,0,0,0,84,126,0,0,0,0,790,0,377,64,0,1529,0,0,0,0,530,
+1857,539,1104,0,0,0,0,0,0,0,0,0,0,0,0,977,0,0,0,34,0,0,0,0,0,0,0,0,0,0,0,24,26,
+0,0,918,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,183,379,0,0,0,0,0,0,0,792,
+0,0,0,0,0,0,0,0,0,1920,0,0,0,0,0,0,0,0,0,771,0,0,0,1979,0,901,254,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,140,0,0,0,0,0,440,37,0,
+508,0,0,0,513,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,533,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,752,920,0,1048,0,153,0,
+0,391,0,0,1952,0,0,0,0,0,0,0,0,0,0,126,0,0,0,0,640,0,483,69,1616,0,0,0,0,0,734,
+0,0,0,0,0,0,480,0,495,0,472,0,0,0,0,0,0,0,0,874,229,0,0,0,0,948,0,0,0,0,0,0,0,0,
+1009,748,0,555,0,0,0,0,0,0,193,0,653,0,0,0,0,0,0,0,0,0,0,984,0,0,0,172,0,0,0,0,
+0,0,0,0,83,1568,0,0,384,0,0,0,0,0,0,0,164,880,0,0,0,0,0,0,0,0,0,0,0,367,121,0,0,
+828,0,0,0,0,0,0,0,1541,0,0,0,0,0,0,0,343,0,0,0,0,0,0,0,0,561,57,0,0,0,0,0,0,0,
+926,0,0,0,0,827,0,194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,
+0,0,0,896,1249,0,0,0,0,0,1614,0,0,0,860,0,0,0,0,0,0,0,0,964,102,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,899,0,569,0,0,0,0,795,2045,0,0,0,
+0,0,0,104,52,0,0,0,0,0,604,0,0,0,0,779,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,
+494,0,677,0,0,0,0,0,0,0,508,0,0,0,0,0,0,0,0,0,1014,0,957,0,0,630,310,0,0,0,570,
+0,0,449,0,64,537,0,0,0,0,0,0,0,244,0,0,0,0,0,0,0,0,0,0,0,0,0,0,702,1650,49,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,1279,0,0,0,0,0,0,0,896,0,0,
+178,0,0,0,0,0,0,0,0,0,0,0,0,0,808,695,0,0,0,0,539,1117,0,0,0,0,0,0,0,0,257,0,
+1003,0,0,0,1,448,0,516,0,0,960,0,125,4,0,1268,30,748,0,0,852,0,0,0,6,0,0,848,
+236,1385,862,1811,0,0,0,0,698,803,0,0,0,0,0,0,0,610,992,0,0,878,0,1847,0,0,0,0,
+0,0,0,383,0,1404,0,0,0,0,986,0,347,0,0,0,0,0,0,0,0,0,0,0,592,572,0,1411,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,606,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1829,0,0,0,0,0,0,0,0,0,0,0,0,700,748,0,0,0,0,0,0,365,0,0,127,0,0,
+83,198,0,0,0,0,0,0,864,55,0,0,0,0,726,1752,0,0,0,0,0,0,0,0,0,0,0,0,0,1066,0,764,
+0,0,0,0,683,0,550,309,0,0,874,1212,0,0,0,1364,0,986,381,723,0,0,0,1573,0,0,0,0,
+0,1025,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1559,0,0,0,0,493,133,0,0,0,0,148,
+119,0,0,0,0,0,0,537,14,541,0,635,126,0,0,0,495,0,0,0,0,861,998,1009,0,0,0,0,0,0,
+0,359,368,0,0,0,0,304,1577,0,0,0,0,0,1107,0,0,0,0,0,929,0,0,0,1142,0,0,0,0,289,
+175,0,432,0,219,0,0,0,0,0,785,0,0,595,0,0,0,0,0,0,0,0,0,0,0,0,0,80,0,0,0,0,0,0,
+931,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1323,0,0,0,0,290,0,559,1751,127,0,0,0,
+934,1167,0,963,0,260,0,0,0,573,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+580,1689,0,0,0,0,0,0,0,0,0,1164,0,0,982,1922,0,63,0,0,0,0,0,793,0,0,0,0,0,0,0,0,
+0,0,0,0,0,67,790,0,0,0,0,0,0,0,0,0,0,391,443,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,271,0,0,0,0,0,0,0,0,0,0,0,1140,0,0,0,0,340,300,0,897,0,0,0,0,0,0,
+0,0,0,0,890,0,0,0,0,818,321,53,0,0,0,0,0,0,0,0,0,468,0,243,0,870,0,0,0,1765,121,
+0,0,0,180,518,0,822,419,634,0,0,0,0,0,0,0,0,0,898,0,0,0,0,454,36,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,806,0,0,0,0,0,0,0,0,0,0,0,0,1326,0,104,0,0,0,0,0,0,0,
+0,0,260,0,0,0,0,0,0,0,0,0,0,0,0,542,45,0,0,263,1516,42,0,0,0,0,0,468,0,1005,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,288,87,0,0,0,0,0,0,0,0,502,988,133,0,0,0,0,0,0,
+141,0,0,872,1842,0,0,0,0,0,0,0,0,261,619,0,0,0,0,189,246,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,678,0,0,0,0,0,0,0,0,0,0,0,0,285,35,0,517,0,0,0,0,0,0,0,0,0,0,
+540,214,667,0,74,0,0,125,0,0,0,0,0,761,131,0,0,0,0,0,0,0,0,0,0,0,0,0,333,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1338,94,0,0,0,0,0,0,0,0,0,0,0,0,449,0,646,103,
+86,641,2028,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,869,87,277,117,39,0,0,0,0,0,0,0,0,938,
+297,0,0,0,0,558,464,0,0,0,0,0,0,0,0,0,0,731,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1608,0,
+0,0,0,0,0,0,1429,0,0,733,1010,0,0,338,1656,0,0,0,1038,979,2010,0,0,0,0,0,0,0,
+1005,0,0,121,0,0,0,219,20,0,0,0,0,0,0,872,1440,0,0,0,683,0,1070,0,0,522,0,0,0,0,
+439,669,0,0,0,0,0,0,0,0,1245,0,0,0,0,0,1218,0,0,547,233,0,0,0,0,0,0,0,0,0,482,0,
+0,0,0,0,0,0,886,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,795,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,371,0,0,0,0,0,0,0,0,0,0,0,0,0,622,0,625,0,0,0,339,29,0,0,338,0,0,0,
+0,130,0,0,0,0,0,0,0,0,0,307,0,0,0,0,0,0,0,0,0,0,2044,0,0,0,0,0,0,0,0,308,770,0,
+0,0,0,0,1266,0,0,0,0,0,0,0,0,0,400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,690,739,0,0,
+0,0,0,0,0,990,0,0,0,1831,0,0,0,0,0,0,0,0,0,0,0,0,0,613,0,0,0,0,0,0,0,0,0,0,0,0,
+0,763,0,878,0,0,0,977,0,100,0,0,0,0,0,0,0,0,0,463,0,0,0,0,623,318,0,0,296,463,
+137,0,0,454,0,0,0,1527,58,0,0,0,0,0,0,0,18,48,0,0,0,0,0,729,0,0,0,442,0,0,0,0,
+40,449,0,853,0,0,0,0,0,0,227,0,0,0,0,0,0,1491,0,0,0,0,0,0,0,0,0,0,161,55,0,450,
+0,1174,62,0,207,0,0,0,0,0,0,0,0,869,0,0,0,0,80,213,0,0,0,0,0,0,0,0,0,0,354,820,
+0,0,747,0,0,0,954,0,0,1073,0,556,0,0,0,692,0,191,0,804,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,831,162,0,0,35,0,0,0,0,0,0,0,0,1235,0,0,0,0,0,1234,0,0,
+0,0,0,0,0,0,0,0,96,0,0,0,0,0,0,0,149,0,0,0,902,204,0,0,833,0,287,366,0,0,0,0,0,
+0,992,2020,0,0,0,0,0,0,0,0,0,0,0,356,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,784,0,0,567,
+630,0,0,0,539,0,0,27,0,0,0,0,0,0,0,0,0,0,755,0,0,0,0,0,0,0,0,0,0,0,0,814,0,0,0,
+0,0,0,0,0,0,0,0,0,0,987,0,0,255,761,194,0,1086,0,0,0,0,0,0,1016,0,0,1396,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,562,271,913,0,0,0,0,0,0,0,0,320,153,45,475,0,0,
+0,0,0,0,0,713,0,327,0,0,0,0,0,0,604,552,3,359,0,0,0,0,853,80,0,0,0,0,0,0,0,2016,
+6,887,0,0,0,0,975,0,961,0,0,0,0,0,916,1891,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,100,101,390,708,0,0,0,587,983,512,0,0,0,0,0,0,0,0,0,0,0,645,0,0,0,851,0,0,0,
+0,0,498,140,217,0,0,0,1448,0,0,0,0,0,0,0,0,0,905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+643,105,0,792,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,0,0,0,0,0,0,0,0,535,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1748,0,0,0,0,0,754,0,0,0,0,0,0,0,0,0,0,0,0,91,0,0,1565,0,91,792,
+939,3,370,0,0,0,0,95,0,0,0,0,551,7,619,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1150,0,
+0,0,0,0,0,0,0,0,0,0,0,0,671,0,0,0,0,0,888,368,149,0,0,105,1134,0,983,0,0,458,31,
+0,643,0,0,0,312,0,740,0,0,0,1642,0,0,0,0,0,0,0,236,0,0,0,0,0,0,0,59,68,0,0,0,0,
+0,867,795,0,0,0,0,970,1977,0,0,0,0,0,0,0,1148,0,775,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,970,0,0,0,0,0,0,0,0,0,665,71,0,0,0,0,827,0,0,0,0,0,0,0,0,0,
+0,479,0,0,0,0,0,0,0,0,99,607,0,0,0,0,0,0,0,1960,0,0,0,793,0,0,871,41,0,0,241,94,
+0,0,0,0,209,0,0,1497,0,0,0,0,0,0,0,0,0,98,0,0,0,463,0,0,0,0,291,0,0,0,0,0,0,0,0,
+0,0,984,0,0,0,0,0,205,0,0,0,0,0,0,205,42,0,801,0,0,0,0,0,635,0,0,533,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,371,0,1282,0,0,0,825,0,0,0,0,0,0,0,0,0,357,879,467,0,317,0,0,
+0,0,0,0,0,924,0,0,0,0,849,1795,0,0,0,0,895,1799,43,0,0,0,0,0,0,0,0,0,0,1820,0,0,
+0,0,0,0,0,525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,110,0,493,0,174,417,0,0,
+0,0,0,583,733,0,0,0,0,0,0,481,215,0,0,0,0,477,0,0,0,0,0,0,0,0,308,0,0,0,0,0,0,0,
+0,297,126,0,0,361,1551,0,0,0,0,0,0,871,1807,0,0,0,0,0,1307,0,685,0,0,0,0,0,0,0,
+797,0,858,0,565,0,0,0,0,0,0,0,0,0,0,0,0,434,252,826,0,0,0,0,0,0,791,0,0,0,0,509,
+231,178,601,0,0,0,0,0,0,0,0,43,1591,0,0,0,0,0,1683,0,0,0,0,45,0,0,0,0,0,0,0,0,0,
+0,1120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,556,494,0,398,0,0,0,1030,0,0,0,0,0,0,
+168,0,0,0,0,0,0,0,0,0,0,973,0,642,0,0,0,0,0,0,0,0,0,1615,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,378,594,0,1093,0,679,112,0,0,0,0,1492,540,1374,714,
+1486,0,0,0,0,825,1511,0,0,0,0,0,0,0,0,0,0,0,0,0,952,0,0,736,143,0,700,0,1540,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1557,0,0,0,860,990,0,0,0,807,0,0,0,0,0,131,
+515,0,646,0,0,0,0,117,728,508,121,0,0,0,0,0,0,357,0,0,0,0,0,0,237,0,0,0,0,0,0,0,
+0,0,1784,0,0,0,0,0,0,0,0,0,0,0,713,348,1536,0,738,0,0,0,0,0,0,0,434,0,0,0,0,0,0,
+366,1877,39,0,0,0,0,0,0,580,0,0,0,0,0,0,0,0,0,0,0,0,0,0,873,0,0,0,0,171,0,625,
+550,107,343,943,0,0,0,0,0,0,0,768,0,0,0,0,0,0,0,799,0,0,0,894,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1673,0,0,0,0,0,0,0,0,0,0,0,1052,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+272,0,441,0,0,3,9,0,0,0,1182,0,1346,0,0,0,0,0,0,0,0,682,0,0,1004,24,0,0,968,0,0,
+0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185,0,0,0,578,
+474,0,0,0,0,0,0,0,0,0,0,0,0,0,0,113,530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,556,0,0,0,0,0,0,16,1317,0,0,97,0,0,0,703,0,0,0,0,0,0,0,0,892,0,0,0,1571,0,0,
+426,186,0,1101,0,0,0,0,0,0,0,0,937,585,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,644,291,
+0,0,0,0,749,0,162,0,0,381,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,762,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,628,21,0,0,0,0,0,0,0,0,919,0,0,0,0,0,0,0,0,0,
+633,0,0,0,0,332,0,0,0,0,0,0,0,0,0,1489,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,832,398,0,645,0,0,0,13,0,0,0,0,0,0,0,0,0,0,20,0,800,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1993,0,0,0,0,769,0,0,0,665,0,0,0,0,0,0,0,0,0,0,1426,0,0,0,0,60,0,0,0,
+641,1874,0,644,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1757,0,0,0,0,0,937,0,1652,0,654,0,
+0,0,0,0,0,0,527,0,0,0,0,0,0,0,0,0,0,0,0,0,226,0,0,0,0,0,1486,0,0,0,0,0,0,0,0,0,
+0,0,325,0,0,0,0,0,0,0,1345,0,0,91,0,404,0,0,0,0,0,0,0,0,0,0,0,0,973,0,0,0,0,0,0,
+0,1176,0,549,0,0,0,0,0,0,0,0,0,0,976,0,0,0,0,0,21,0,0,0,0,0,51,0,0,0,0,314,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,198,6,0,1093,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1776,0,0,0,0,0,1528,0,419,0,0,0,0,0,0,0,0,76,138,0,0,0,0,638,29,0,0,0,0,
+0,0,0,1418,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1710,0,0,0,0,0,
+0,0,0,0,0,0,0,532,23,0,0,0,0,0,0,0,862,0,0,946,592,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,70,0,0,0,0,0,0,0,0,0,812,0,0,0,76,0,0,988,0,442,0,0,0,896,0,0,0,0,0,0,
+483,0,0,0,0,1709,0,0,0,0,0,0,119,0,0,0,117,0,309,0,0,0,0,0,596,976,0,0,0,0,0,0,
+0,0,0,0,0,768,0,0,0,0,0,0,0,0,0,518,0,0,0,0,0,0,0,0,0,0,0,0,0,0,863,0,0,0,24,
+145,1020,0,0,1984,0,0,0,0,0,0,0,658,0,0,0,0,0,0,0,0,0,0,106,1827,0,1010,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,582,87,0,0,0,0,0,0,0,267,0,0,0,703,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,496,0,0,0,0,1121,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,249,561,0,0,0,0,0,
+0,0,760,0,0,154,0,0,0,255,0,419,323,0,0,0,0,0,368,0,0,0,0,0,0,0,0,0,0,522,0,0,0,
+0,0,0,0,551,562,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,
+0,0,0,284,525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,958,0,0,594,0,0,0,0,0,0,6,479,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,820,1641,0,1556,0,0,0,0,0,0,0,302,0,0,
+0,0,0,148,0,0,676,0,0,0,0,0,0,1674,0,0,0,0,0,0,178,0,0,0,0,0,0,0,94,389,0,0,0,0,
+91,8,0,0,0,0,0,0,0,0,0,0,112,0,0,0,0,0,0,0,0,0,0,747,0,0,0,0,0,0,0,1746,0,0,0,0,
+0,24,0,1352,158,1530,0,0,718,130,280,1401,0,0,0,0,0,1946,8,0,0,0,0,1607,0,0,0,0,
+0,0,882,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,417,0,0,0,1597,633,433,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,234,0,0,0,0,0,0,0,0,680,1950,0,0,0,0,249,5,0,0,0,
+0,0,0,0,0,0,1216,0,1773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,509,180,0,0,0,0,0,0,0,1002,
+0,0,0,0,0,0,0,0,0,0,0,0,0,931,0,0,0,0,0,0,0,0,747,943,0,1837,0,0,0,0,0,0,0,641,
+0,0,0,0,280,0,0,0,5,0,0,0,0,0,72,545,0,0,0,0,0,0,0,0,0,742,0,0,254,151,872,0,0,
+0,0,0,0,0,0,0,0,0,0,921,0,0,517,833,0,1680,0,0,436,251,584,0,0,0,0,0,0,0,0,0,0,
+0,24,500,0,0,0,0,0,0,0,0,195,1775,514,389,0,0,0,0,0,0,0,743,0,0,0,0,0,0,292,0,0,
+0,227,1283,774,1805,0,0,0,0,0,0,0,0,0,0,119,81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,913,
+1910,0,0,0,1826,490,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1162,700,30,
+0,0,0,721,839,0,0,0,617,0,0,0,0,0,0,0,0,0,169,428,0,0,0,0,0,1648,637,1205,0,0,0,
+1596,0,0,4,266,0,0,0,0,0,0,0,0,0,0,0,862,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,
+0,279,157,391,604,0,0,713,945,877,973,0,0,0,0,0,0,0,0,0,0,0,0,0,0,859,567,628,
+1846,0,0,0,0,0,0,0,0,0,762,0,0,191,0,0,0,0,298,0,0,767,909,0,0,0,0,0,0,0,795,0,
+0,301,0,0,1970,0,0,0,0,0,0,0,0,0,1236,0,0,0,0,0,0,644,369,15,0,160,71,0,0,0,0,0,
+1447,0,0,0,0,0,0,0,0,735,1255,76,0,0,0,0,0,0,0,0,0,0,474,0,0,0,0,0,0,0,0,0,0,
+841,0,0,0,0,0,0,0,0,0,0,836,0,0,0,0,0,1622,0,0,735,0,0,0,0,1601,804,1390,394,0,
+0,0,0,0,0,96,0,289,0,0,35,688,0,0,0,667,0,513,0,0,0,0,0,0,0,2034,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,704,0,1524,0,1078,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,306,
+0,0,0,0,0,0,0,431,0,1196,0,0,54,0,15,1448,0,1418,0,0,0,0,0,0,0,0,0,907,0,0,0,0,
+0,0,194,1767,0,0,0,0,0,840,0,900,0,0,0,0,0,0,0,0,0,0,0,1436,0,0,0,0,642,1560,0,
+0,0,0,0,0,94,386,0,0,0,0,0,0,0,0,0,0,830,416,0,0,20,731,0,0,0,0,0,0,0,0,697,0,0,
+662,0,0,0,0,0,0,0,0,0,861,0,0,0,0,0,0,0,871,671,864,0,928,7,0,332,0,0,0,0,1055,
+0,0,0,0,0,0,986,0,0,0,0,0,44,76,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,175,518,
+831,1108,0,0,0,836,0,1852,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,843,1804,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,246,0,0,0,610,202,0,0,36,0,0,0,240,654,13,0,0,0,0,0,0,0,
+0,391,0,403,0,0,0,0,0,0,0,0,0,0,75,0,366,815,0,0,631,0,0,0,0,0,0,0,0,345,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,952,0,0,0,0,0,0,0,0,0,0,0,673,35,662,0,287,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,34,0,0,0,0,0,0,0,0,151,0,427,0,0,382,0,0,0,329,0,0,279,0,0,0,
+0,0,0,0,0,0,0,906,0,0,366,843,0,1443,0,1372,992,0,36,123,0,649,0,0,0,0,0,767,0,
+1018,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,995,0,0,0,0,0,0,0,72,368,0,0,1345,0,0,0,
+589,0,0,0,0,0,0,0,0,0,1988,0,0,220,541,0,0,0,686,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,32,196,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,0,0,1452,0,
+0,0,616,0,0,0,0,0,0,0,0,0,1229,0,0,0,0,0,0,0,0,0,0,667,120,0,0,0,0,0,0,0,1146,0,
+0,0,0,0,0,0,0,0,0,0,352,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,935,0,1050,0,
+147,88,0,0,923,0,0,0,0,0,934,0,0,0,0,0,0,0,0,114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,341,222,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,
+637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1159,0,0,0,847,0,0,0,0,0,0,683,0,867,944,0,0,
+0,0,0,1809,0,0,0,0,0,0,0,0,0,0,395,170,0,0,0,0,0,0,0,0,0,0,618,535,0,1625,0,0,0,
+0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,778,0,0,0,0,0,46,0,2032,0,0,37,
+1458,0,938,363,34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,314,0,0,0,0,0,0,889,0,0,0,0,0,0,0,
+0,0,0,0,462,0,0,0,0,525,0,0,23,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,0,0,0,0,0,
+0,498,725,0,0,0,0,7,0,0,0,0,773,0,0,0,164,0,0,0,0,0,0,0,0,936,583,659,1462,0,
+220,0,0,0,0,803,0,0,544,119,0,0,0,0,0,0,0,0,0,0,0,181,176,0,1192,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,1878,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,
+944,0,0,0,0,0,0,0,273,0,0,0,0,0,855,0,0,0,0,5,127,0,0,0,0,0,0,0,0,752,230,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,162,0,654,48,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,197,
+0,0,0,0,0,0,0,963,0,0,0,0,0,0,0,0,0,0,858,0,0,0,0,0,0,0,0,0,0,676,1978,0,0,102,
+972,0,0,0,0,0,0,0,361,0,461,0,0,0,472,0,0,0,0,0,0,0,0,0,0,0,0,0,0,747,905,0,0,0,
+155,0,0,0,0,0,0,0,0,0,0,319,163,0,0,0,0,0,0,0,0,0,848,0,0,36,631,0,0,0,0,0,1769,
+0,0,0,0,0,144,0,0,0,0,0,0,0,0,0,0,369,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,555,247,0,0,
+996,0,0,189,0,0,0,0,0,0,0,0,0,0,280,0,0,0,0,0,0,0,0,0,0,0,526,746,0,0,345,0,0,0,
+1017,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,651,428,0,0,0,1162,230,327,546,792,0,0,0,
+1203,0,0,0,0,0,0,0,0,0,672,189,0,0,0,0,0,0,99,0,0,0,298,0,0,0,0,0,0,555,397,0,0,
+0,0,0,1157,0,0,0,0,0,0,0,0,0,0,398,1523,0,366,0,0,787,0,0,0,282,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,157,0,941,0,0,0,0,0,1336,0,0,116,0,0,0,0,0,0,787,0,0,0,0,0,0,0,0,0,
+0,170,160,0,1815,0,0,0,0,0,866,0,0,0,0,0,0,0,0,0,689,0,0,0,0,820,0,498,108,0,0,
+0,1119,0,0,0,244,609,1005,0,581,0,0,0,0,0,895,0,0,0,1898,0,0,0,0,0,926,0,0,0,0,
+0,0,0,0,0,0,0,0,0,538,496,294,301,0,0,0,18,0,0,757,0,0,0,0,0,1263,0,820,0,722,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2028,0,0,0,0,124,1875,0,0,0,881,0,0,0,1348,
+0,0,0,0,0,0,0,911,0,954,0,0,0,0,414,0,0,0,0,517,0,0,0,0,0,816,0,0,0,0,0,0,0,0,
+713,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,593,150,0,0,0,0,
+0,553,0,0,0,0,0,0,0,0,0,0,108,0,0,0,0,420,0,0,0,0,0,0,0,0,0,0,0,1777,0,0,55,493,
+0,0,81,0,321,980,0,0,0,0,0,0,0,0,0,0,0,0,0,0,362,112,0,74,0,0,0,0,0,0,0,625,0,0,
+0,0,0,0,377,16,0,0,61,281,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,1031,0,0,0,0,0,0,51,0,
+0,0,0,0,0,0,211,309,15,125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,789,173,0,439,9,648,
+0,0,294,0,0,0,0,0,0,0,374,8,0,1099,0,0,0,0,0,0,0,575,0,0,0,518,0,0,0,702,0,0,0,
+0,0,0,87,0,0,0,438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,464,122,0,0,0,1802,0,0,0,0,
+0,0,499,0,0,0,87,476,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,840,283,0,0,0,0,1620,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,609,1160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,600,
+323,372,0,0,0,0,471,722,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,
+477,1304,0,1774,0,0,88,0,438,12,0,0,0,0,0,0,0,0,671,997,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,639,22,0,0,782,681,0,0,0,0,0,0,0,0,0,0,1013,664,0,942,0,1349,0,0,0,0,0,0,0,
+0,0,0,0,0,356,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,215,289,0,1975,
+109,450,0,0,0,0,0,0,0,0,0,0,705,0,0,664,0,0,0,0,0,0,0,1238,0,0,318,0,0,0,0,0,0,
+0,0,0,0,0,0,0,960,1872,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,0,0,0,0,0,0,0,0,0,239,
+777,0,26,0,0,0,0,0,0,0,0,0,0,0,0,375,414,0,17,0,0,0,1350,0,955,0,0,0,0,0,0,0,0,
+887,960,0,0,0,0,0,0,0,0,0,0,708,710,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,919,0,0,0,
+0,502,280,7,45,0,0,0,0,777,0,0,0,0,410,0,1110,0,0,0,0,0,0,414,341,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,787,0,0,0,436,0,0,0,0,0,0,0,1707,613,377,96,0,0,0,0,451,
+0,0,0,0,0,0,0,0,0,0,0,0,0,680,0,483,916,0,0,0,0,0,0,937,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,739,0,0,0,0,0,0,0,0,82,0,0,663,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,128,0,0,0,0,0,0,0,0,1087,0,0,0,0,0,0,0,503,0,0,0,0,0,0,9,113,104,324,0,460,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,935,702,434,485,1014,949,423,0,900,
+0,0,0,0,0,0,0,2018,574,0,0,0,0,0,0,0,0,0,0,0,0,1206,0,0,0,0,0,0,0,0,38,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1022,0,0,0,0,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2029,0,0,0,0,0,0,0,0,0,0,0,0,523,0,0,0,0,0,0,625,0,0,425,37,0,0,0,1943,0,0,0,
+0,0,765,0,0,0,0,0,0,0,0,0,0,551,0,0,0,0,0,0,0,0,0,0,0,0,168,0,0,1010,0,0,1994,0,
+0,0,91,0,0,0,0,532,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1884,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,240,15,0,0,0,1227,0,1534,0,0,0,0,0,0,0,0,0,0,0,0,0,0,392,0,
+0,0,0,0,0,0,0,0,0,0,0,655,562,395,0,0,0,501,1019,0,0,0,0,509,267,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1099,0,0,0,0,0,0,948,0,0,0,0,0,0,0,
+462,114,0,0,258,404,0,1717,0,0,0,0,82,1061,0,724,0,0,0,0,0,1133,0,0,0,0,0,0,
+1021,841,0,1021,0,0,0,0,0,0,0,0,0,0,488,373,37,0,0,0,0,564,0,0,0,0,0,513,0,0,0,
+825,0,0,899,0,0,778,0,0,12,1417,0,1116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,114,545,0,5,
+0,0,0,0,0,0,0,192,0,0,763,0,0,0,0,0,0,0,755,759,0,0,0,0,0,0,0,0,0,370,0,1237,0,
+0,0,0,0,0,298,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,
+0,0,0,0,814,991,0,757,57,0,0,0,0,0,0,0,0,0,540,0,0,0,0,608,0,0,0,0,0,0,0,0,1014,
+0,0,0,902,0,0,0,0,553,1668,0,0,0,0,0,0,0,0,0,559,60,0,0,0,0,0,511,0,0,675,0,0,
+156,0,0,0,0,0,0,709,0,698,0,0,0,1745,0,0,0,0,0,0,0,0,0,714,0,0,0,0,0,0,0,0,206,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,776,0,0,0,0,0,0,0,0,0,1272,0,0,
+0,0,0,1059,0,0,0,0,0,0,406,0,0,0,0,0,0,0,0,0,0,947,0,0,0,0,0,0,168,0,0,0,0,0,0,
+870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,554,0,0,0,0,784,908,0,0,0,0,0,0,
+0,396,358,0,0,0,0,0,0,0,0,2,228,0,0,0,0,0,0,0,0,0,0,0,845,14,0,716,1820,594,0,
+81,1428,0,161,0,782,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,998,0,
+0,0,0,0,0,0,0,0,0,0,0,1043,0,1496,0,0,0,0,0,0,0,0,781,0,0,0,0,0,0,0,817,1114,0,
+1814,958,0,0,0,0,812,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,139,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,236,643,0,0,0,0,0,0,0,0,0,1172,0,0,0,0,0,0,0,0,0,1338,0,0,0,
+0,0,0,0,0,0,0,0,54,0,0,0,256,0,0,351,0,955,1885,0,469,0,0,0,1270,0,744,0,313,0,
+0,0,0,0,0,0,0,402,969,0,0,0,0,0,0,50,0,0,0,0,572,0,0,0,0,847,0,0,0,0,0,0,0,248,
+43,0,369,0,0,0,0,0,0,0,0,0,0,0,0,0,766,0,363,0,0,0,0,0,0,0,0,0,0,0,678,0,0,409,
+258,82,249,0,0,0,0,0,0,0,0,0,0,0,0,32,393,0,788,0,0,0,1281,509,1968,0,0,0,0,39,
+291,0,0,0,589,0,0,54,1059,0,0,0,0,0,0,824,0,0,0,0,0,0,0,0,0,0,1005,0,1598,0,0,0,
+0,0,919,0,0,0,0,0,0,0,0,52,132,0,0,0,0,0,328,0,0,0,0,173,0,0,0,0,0,65,1411,0,0,
+0,0,0,0,0,0,0,0,442,0,842,0,0,0,0,0,0,0,0,0,534,0,0,0,0,0,0,0,0,0,0,0,0,0,845,
+210,0,0,0,0,0,0,0,0,892,0,0,223,0,0,0,0,529,0,0,0,807,0,137,218,0,1444,0,0,0,0,
+0,332,661,0,0,0,0,0,0,0,76,1517,0,0,0,0,0,0,0,0,0,0,0,418,0,0,0,0,0,0,0,0,481,
+379,0,0,0,0,0,149,18,0,0,0,0,0,0,0,0,742,304,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,799,925,195,51,0,0,0,0,688,0,0,0,0,697,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1169,751,0,0,0,452,929,0,221,0,1437,0,0,0,0,955,1251,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,0,132,0,0,0,0,0,865,0,0,0,0,0,0,0,767,
+672,42,0,0,0,1050,0,0,0,0,0,0,0,0,368,44,0,0,0,0,0,0,0,570,29,0,0,0,0,0,0,227,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,522,0,0,0,0,0,0,0,1529,0,0,0,0,0,0,739,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1667,0,0,0,0,0,0,132,511,0,138,208,1020,0,0,23,565,0,344,0,0,0,
+0,0,922,0,0,0,0,0,0,0,240,0,0,415,171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,402,0,0,754,31,716,0,982,731,0,0,0,0,0,0,0,888,0,0,0,803,847,0,0,823,
+0,0,0,0,0,0,785,0,0,2,0,0,0,0,0,0,0,532,0,0,681,0,0,314,0,384,684,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,649,447,0,1818,1007,0,321,0,66,360,0,0,0,385,0,0,0,0,0,0,
+0,900,73,254,0,0,0,0,683,1959,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,86,0,0,725,0,0,0,0,0,196,0,0,0,0,0,831,0,0,0,0,723,0,0,0,0,0,994,627,0,0,
+0,0,0,0,0,0,0,0,764,66,0,0,0,0,205,36,0,0,0,0,0,0,0,950,0,0,0,887,111,0,0,831,
+388,165,0,0,0,0,0,155,0,0,0,0,0,0,0,0,0,0,0,0,0,0,780,755,0,0,0,0,898,146,0,0,0,
+0,0,0,0,45,7,0,0,0,0,0,0,0,0,607,0,0,0,0,0,0,65,0,0,0,0,0,0,0,0,0,88,0,0,0,0,0,
+621,600,0,367,0,0,0,0,0,0,0,561,0,559,0,585,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,672,157,0,0,0,0,714,0,0,0,
+0,0,456,0,925,0,0,0,0,0,0,0,0,19,0,0,0,0,1473,0,0,0,0,0,0,0,0,0,0,113,0,0,0,0,0,
+0,0,0,0,0,0,0,0,69,463,0,0,82,193,2,471,0,0,0,0,633,0,0,0,0,0,0,1148,129,1392,
+542,803,0,0,0,0,0,0,0,0,0,0,0,0,438,0,0,0,0,0,0,875,0,0,0,0,0,237,0,0,0,0,0,0,0,
+65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,563,0,0,0,9,444,0,0,43,1260,0,0,0,0,0,0,
+971,0,0,699,0,0,0,0,0,1116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,829,242,0,
+0,593,0,0,0,0,0,0,0,0,201,36,224,0,0,0,0,0,0,1430,0,1806,0,523,0,0,212,1889,0,0,
+0,827,0,0,0,0,0,2043,136,242,0,0,0,0,0,0,284,148,10,0,0,0,0,0,0,1249,0,0,0,807,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94,0,0,0,494,0,0,0,0,0,0,0,0,1510,0,0,0,0,0,
+0,0,0,0,0,505,1306,0,0,764,268,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,384,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1703,0,0,0,0,159,964,583,0,0,0,
+0,0,0,515,0,0,854,0,0,0,0,0,0,0,0,0,0,0,0,1123,0,0,0,0,0,0,0,136,0,0,0,0,0,1782,
+0,0,44,1287,0,0,0,0,0,732,0,0,0,0,313,679,0,0,316,0,0,0,0,595,0,0,0,0,0,0,753,
+147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,137,0,0,0,0,414,0,1762,0,0,0,0,0,0,0,0,
+0,0,0,599,0,0,0,0,0,0,0,0,0,1749,0,0,0,1627,0,488,0,0,0,0,0,83,0,0,0,0,676,0,0,
+1639,0,0,0,0,0,0,0,0,0,278,0,0,0,0,0,0,97,0,14,1085,0,0,0,0,0,0,781,388,0,849,
+59,229,0,0,0,0,0,1115,0,0,0,0,108,0,0,0,0,700,0,0,0,0,0,0,0,0,0,1414,0,0,0,0,0,
+0,0,0,0,0,0,0,0,660,737,1035,0,0,0,0,0,0,521,690,0,0,0,0,0,0,0,0,0,0,0,0,272,0,
+0,0,0,0,0,0,0,0,0,1744,0,0,0,0,0,0,128,733,0,0,277,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,936,1981,40,0,0,0,0,0,0,0,0,775,0,0,0,0,0,0,0,0,0,306,0,0,0,0,
+0,0,0,979,0,0,0,0,0,611,0,0,0,0,0,178,0,0,0,1969,0,0,0,0,0,0,0,664,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,390,0,0,0,1510,0,0,0,0,0,0,0,0,0,0,0,493,0,0,37,0,0,0,0,724,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,1537,0,0,168,473,0,0,0,105,0,0,0,0,
+627,438,0,0,0,0,0,0,0,0,0,0,11,1256,0,0,0,1626,0,779,0,0,0,0,25,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,308,0,0,0,0,0,741,0,671,0,0,0,0,649,150,0,0,99,521,0,0,3,339,0,0,0,
+543,0,0,0,0,0,0,0,0,0,1358,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,234,155,
+0,0,0,0,0,0,0,1628,0,766,0,0,0,0,0,0,0,0,0,0,0,0,0,829,0,0,0,1445,0,0,0,486,0,0,
+0,0,2,1635,0,0,0,0,558,0,0,0,0,0,0,0,0,0,0,1461,0,0,0,0,0,599,0,0,0,0,0,0,0,0,0,
+1376,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,93,0,0,0,0,0,0,447,0,0,66,1432,0,0,0,0,
+0,0,307,0,413,609,0,0,0,930,0,0,0,0,21,939,0,0,0,0,0,962,4,651,0,0,0,0,15,579,0,
+0,0,0,0,597,0,0,0,0,0,981,0,0,0,545,0,0,0,0,0,0,0,1558,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,800,17,0,0,17,0,907,0,0,0,110,0,0,0,53,458,0,1983,0,0,0,0,0,0,0,0,0,0,443,0,
+0,0,0,0,0,0,0,0,0,0,924,1844,0,1232,0,0,0,0,70,519,0,993,0,0,0,0,0,0,14,530,0,
+907,0,0,0,0,0,733,0,0,0,0,0,0,0,0,55,0,188,531,56,0,0,1693,0,0,0,0,0,0,0,0,441,
+0,192,928,0,0,0,0,0,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1525,0,259,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,512,185,0,464,1603,0,0,0,0,0,0,0,0,0,0,0,1113,
+284,720,0,0,722,0,0,0,0,0,13,0,0,0,0,0,0,0,4,289,43,0,0,0,0,0,0,1694,0,0,0,0,
+193,0,0,0,0,409,0,0,0,0,0,0,0,0,0,0,0,0,308,0,0,1863,0,0,0,0,0,0,0,0,0,790,0,0,
+745,1002,0,0,0,0,0,0,0,0,0,289,68,477,13,0,0,0,0,0,0,0,0,0,0,609,0,0,0,0,0,0,0,
+0,0,0,0,367,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,528,0,0,0,0,0,0,0,0,0,694,58,
+548,0,0,0,0,0,0,687,0,0,0,0,1749,0,0,0,0,0,0,0,0,1004,661,0,0,0,0,0,0,445,0,0,0,
+74,0,0,0,0,213,0,0,0,0,0,0,0,0,0,0,0,0,0,834,0,0,189,1672,0,0,0,0,0,0,0,1548,
+192,0,0,0,0,0,0,0,0,0,0,0,0,0,32,751,0,78,0,0,0,0,0,0,544,1602,105,473,0,0,0,0,
+0,0,156,1949,0,1779,0,0,0,0,0,0,0,0,0,0,0,763,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,
+0,0,0,883,0,0,0,0,0,0,0,488,0,617,0,0,50,0,694,1518,785,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,546,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,1016,0,0,0,577,0,0,0,0,0,0,
+184,935,114,720,0,0,100,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,95,14,0,969,0,0,0,0,0,0,0,
+727,0,1021,0,0,0,0,0,1190,0,0,0,0,0,0,0,0,0,0,0,0,0,153,0,0,0,0,0,0,0,0,0,798,0,
+587,0,0,695,42,0,1929,141,957,0,465,7,908,0,0,450,148,0,0,0,1166,0,0,0,0,0,0,0,
+0,0,0,0,0,253,0,1003,0,0,0,0,0,0,0,0,0,0,0,46,0,0,879,0,806,0,1868,0,0,0,0,0,
+1846,0,0,0,730,0,0,0,0,0,0,0,965,0,0,0,0,506,0,0,0,10,0,0,0,22,0,0,0,0,0,0,0,0,
+0,0,0,0,0,960,296,0,0,0,0,0,0,0,0,0,0,0,587,0,0,0,0,20,0,0,0,32,982,0,0,0,0,0,0,
+0,0,0,0,941,0,0,0,0,435,0,0,0,0,0,0,71,419,0,0,0,0,0,0,688,740,94,345,0,0,679,
+582,0,0,0,0,0,0,0,945,0,0,0,0,0,0,0,0,0,0,0,0,539,0,684,1993,0,0,0,659,0,583,0,
+803,0,704,0,0,0,0,0,198,181,347,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,481,405,203,0,0,99,826,0,0,0,0,0,0,0,492,0,408,0,0,0,0,0,0,0,0,0,0,4,0,0,
+0,0,665,349,137,0,0,0,0,612,1270,0,0,0,0,0,371,0,0,0,826,0,0,0,0,21,1535,858,
+374,0,0,0,0,0,0,311,0,0,0,991,1968,0,0,0,0,494,1647,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,769,0,0,0,0,0,642,0,0,157,123,0,0,0,1435,0,0,0,0,0,0,0,0,0,0,79,0,0,0,
+0,0,0,1425,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,106,393,486,1690,0,0,0,0,
+0,0,0,0,0,0,0,0,756,184,0,0,0,1382,0,0,0,175,0,1493,0,1007,0,0,0,0,0,0,0,0,0,0,
+0,219,0,0,0,0,515,99,0,851,0,0,0,0,0,1278,0,0,0,0,0,0,0,1000,982,0,762,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,910,1819,0,0,0,0,0,0,906,0,0,0,0,0,0,0,0,0,0,1730,0,0,
+0,0,0,0,0,0,0,0,0,1185,0,0,0,0,0,0,0,0,40,0,0,0,147,0,0,0,0,0,0,0,0,0,0,0,0,0,
+650,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,30,0,553,0,0,20,597,0,1614,0,0,0,0,0,327,
+49,0,0,0,0,0,0,0,78,0,0,786,134,0,0,0,12,496,0,0,0,0,0,0,0,0,0,0,42,204,0,614,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147,247,0,0,0,0,942,0,0,2023,0,0,0,0,
+0,0,67,285,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1309,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41,532,0,0,0,0,0,0,0,
+1692,0,0,0,0,55,1704,0,0,0,0,988,0,0,0,223,0,0,0,0,0,0,0,57,1123,0,0,0,0,0,1764,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,0,0,0,1599,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,129,0,0,0,0,0,0,0,0,0,0,0,534,0,0,0,0,0,0,0,0,0,0,0,
+0,0,504,621,1248,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1397,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,441,75,0,0,0,0,0,0,0,0,0,0,841,0,0,0,0,0,693,0,650,314,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,913,0,0,0,0,0,0,0,0,0,0,0,0,0,0,880,0,475,0,
+0,1016,179,602,111,329,0,0,0,1864,0,0,0,0,846,1888,0,0,780,0,0,0,82,0,0,0,0,821,
+0,0,0,0,0,0,0,0,0,0,0,956,112,0,0,0,261,455,0,0,0,0,0,0,337,385,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,184,1865,0,0,721,16,0,486,0,0,0,265,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,621,0,0,0,0,0,0,0,0,234,0,0,815,0,0,743,
+1987,205,197,0,0,0,0,0,0,0,0,0,314,0,0,0,0,0,0,0,0,0,0,0,0,0,0,219,452,589,0,
+176,333,0,0,0,0,0,0,0,1110,47,0,0,0,0,0,0,0,0,0,0,0,864,0,0,300,0,1237,0,0,0,0,
+0,0,0,0,0,0,0,1685,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,135,395,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,631,0,0,0,0,0,0,835,0,0,0,606,459,0,979,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,612,0,0,0,0,0,0,0,0,158,372,0,854,0,0,0,0,0,
+0,0,1492,0,0,0,833,0,0,0,0,0,0,0,1739,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+195,0,0,0,0,0,0,0,0,730,1997,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,266,751,0,0,0,0,0,
+0,0,821,0,0,0,715,0,0,0,868,0,959,0,0,0,0,0,0,0,0,0,0,0,1053,0,0,0,950,0,1081,0,
+1595,0,0,0,0,59,0,0,0,0,0,0,0,0,0,0,47,684,0,0,0,0,0,0,1606,0,777,0,1020,0,0,0,
+1094,0,0,0,0,0,0,0,350,0,0,0,0,0,0,242,1812,0,0,0,967,0,0,0,473,286,0,0,0,0,0,0,
+798,629,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,513,337,306,0,0,0,0,0,0,0,0,0,
+146,0,0,1646,0,0,0,0,0,465,0,0,0,525,0,0,0,0,0,0,299,165,0,0,0,0,0,0,0,1064,0,0,
+0,0,0,596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,238,1741,0,1233,451,1824,0,0,0,0,733,495,
+0,0,0,0,0,1204,0,0,0,559,341,0,224,21,0,0,0,0,0,0,0,0,97,1446,0,0,0,0,0,0,0,729,
+0,0,565,727,0,1948,0,0,0,519,0,0,0,0,0,0,0,0,0,1193,0,0,0,0,0,0,790,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,323,2,201,0,0,59,0,0,34,0,896,961,0,1285,0,0,46,0,479,0,0,
+0,0,549,0,663,0,0,0,0,0,783,65,682,0,0,0,0,0,11,0,0,0,0,0,522,0,0,0,52,0,0,0,0,
+0,383,0,0,0,0,0,0,0,0,127,0,0,0,0,0,397,194,0,0,635,0,0,0,0,0,0,0,0,0,0,975,0,0,
+0,0,0,0,0,0,0,0,116,0,51,0,0,858,0,1075,535,448,0,0,0,0,0,610,0,0,0,0,0,0,0,0,0,
+0,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,267,673,319,94,92,0,551,0,0,218,
+1406,69,256,0,0,952,1980,0,833,0,0,0,0,0,0,0,0,0,0,0,0,39,0,0,0,0,0,0,0,81,0,0,
+0,352,634,0,0,0,0,0,618,0,0,0,0,0,0,73,339,0,0,0,0,0,0,0,0,0,0,0,0,0,0,169,759,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1075,0,0,0,0,0,0,482,649,0,0,0,0,0,0,0,0,386,336,0,0,
+0,1035,0,0,0,0,0,0,0,0,0,0,0,924,0,73,0,0,0,0,0,1971,0,0,0,0,0,0,0,0,0,1344,0,
+501,0,0,0,0,0,0,0,0,46,799,0,0,0,0,0,0,0,276,0,0,0,0,0,0,0,770,0,0,0,0,0,0,0,0,
+0,0,0,0,0,158,0,0,0,0,0,1432,0,0,0,0,0,0,0,0,0,0,25,0,0,2001,0,0,0,0,0,0,0,0,0,
+0,0,0,0,478,0,0,0,0,0,0,91,1461,211,602,0,0,0,0,0,0,0,0,0,1068,0,0,124,567,0,0,
+0,1006,0,0,0,0,0,0,0,0,0,735,812,0,0,323,0,0,0,304,0,0,0,0,0,0,0,0,0,148,0,0,0,
+0,0,0,0,0,0,523,0,0,144,730,0,0,981,0,0,111,0,0,132,0,0,0,0,0,0,890,0,0,0,0,0,
+444,0,1787,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,2041,932,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,937,0,995,0,0,255,0,0,138,863,965,0,0,631,0,0,0,0,1394,16,652,0,0,0,0,0,0,
+0,0,0,0,0,0,0,897,0,321,0,0,0,0,0,922,0,619,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,844,0,0,0,0,0,0,1659,0,1100,0,0,0,1173,0,1930,268,251,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,390,711,0,0,0,0,0,0,0,0,0,0,0,0,0,744,0,0,0,0,0,0,0,0,0,624,0,0,0,
+1998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1125,0,0,0,594,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,268,0,0,0,0,0,0,0,563,0,0,0,0,0,0,0,0,2,39,0,0,0,1332,0,0,0,0,0,
+0,0,508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,66,796,0,0,0,0,527,0,0,0,0,98,0,0,576,0,
+0,0,0,0,122,0,276,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,645,0,0,0,0,
+0,0,0,0,0,0,0,290,0,0,762,1292,0,0,0,1315,0,1955,0,0,0,0,0,0,0,0,0,0,210,131,0,
+0,0,0,797,0,38,0,11,488,0,936,0,441,0,0,0,0,0,595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+991,0,0,0,0,0,0,0,0,0,0,0,653,0,523,0,0,0,903,0,0,0,0,0,0,0,0,0,0,0,0,80,0,0,0,
+0,0,0,0,0,0,432,0,0,314,0,0,0,0,232,1368,534,0,0,0,0,0,27,0,0,0,12,0,0,0,0,0,0,
+0,0,0,264,736,0,1657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1117,0,127,0,0,0,1208,0,1294,
+0,0,0,0,364,0,0,0,0,0,125,1334,0,0,0,0,0,0,0,0,0,0,0,0,0,0,792,0,0,0,0,0,0,0,
+849,699,0,0,0,0,0,968,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1446,
+124,397,0,0,0,0,0,0,0,0,0,0,0,641,0,0,0,0,0,0,0,0,0,0,0,0,127,346,0,0,517,75,0,
+0,0,0,0,0,0,0,83,0,0,0,0,0,0,1031,0,0,0,0,0,0,0,1470,0,954,0,0,345,304,410,0,0,
+0,0,734,0,0,0,0,0,1822,0,0,0,1798,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,161,
+1865,69,0,0,0,0,0,0,922,0,0,0,0,0,0,0,0,0,0,0,541,0,627,0,0,0,0,0,0,0,0,0,166,0,
+0,0,0,0,0,0,0,0,849,0,0,0,0,0,0,0,717,0,0,0,0,0,0,0,0,0,0,0,0,0,0,600,0,0,0,0,0,
+0,654,0,0,188,273,0,0,0,543,0,410,87,0,0,941,0,0,186,250,0,1785,0,0,0,0,0,1339,
+462,961,0,780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,529,0,0,0,0,0,0,474,1276,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,24,948,0,0,0,0,657,753,0,0,0,0,941,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,706,985,837,0,1861,0,0,0,0,0,0,0,0,0,0,0,0,0,0,292,933,0,0,0,0,0,
+0,0,0,0,767,0,0,0,0,0,0,0,641,0,0,0,1233,114,0,883,0,274,2008,0,1794,285,0,0,
+571,0,0,0,0,0,0,0,0,0,0,823,960,16,617,0,431,0,0,0,0,0,0,0,0,0,0,567,0,401,0,2,
+781,424,33,0,2006,0,0,274,0,0,1882,0,794,0,0,0,1848,0,0,0,0,0,0,448,47,0,0,0,
+1199,0,0,0,0,0,0,0,0,417,0,0,0,0,0,0,0,0,0,0,295,0,0,0,0,0,0,0,1019,0,0,0,0,0,0,
+0,0,0,0,0,0,0,620,0,0,0,0,464,0,0,0,0,208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,442,0,930,0,0,0,0,0,516,68,0,0,0,0,0,1128,104,0,0,0,0,0,0,0,0,787,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,491,0,0,0,0,0,0,711,0,0,9,0,101,441,0,0,0,0,0,0,0,0,
+0,0,160,396,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,679,326,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1128,0,0,0,0,0,737,0,1796,0,0,0,0,0,0,0,0,0,0,0,0,338,574,0,0,
+0,0,0,1096,491,405,0,0,0,0,0,1081,0,0,0,0,0,0,0,0,0,0,0,0,0,1676,0,1207,0,0,0,0,
+0,0,969,354,0,0,0,0,598,0,297,0,0,0,0,0,0,0,0,1772,751,0,37,0,0,1828,0,0,0,0,0,
+0,0,0,0,257,191,582,0,0,0,0,0,0,790,0,0,0,0,0,47,0,0,0,0,0,0,0,449,306,1011,0,0,
+0,0,0,299,0,0,0,0,0,0,837,0,0,0,0,0,0,10,329,0,0,0,0,0,1320,0,0,0,0,0,0,158,657,
+0,1191,0,0,0,0,0,0,7,0,974,1939,0,1665,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,288,
+66,0,0,0,0,494,175,0,1643,0,0,0,0,0,0,0,0,570,750,719,0,0,0,0,0,0,0,0,0,0,0,0,0,
+13,0,0,1247,0,0,221,356,0,0,0,0,0,0,0,0,0,0,694,1809,0,0,0,0,0,0,0,411,0,44,31,
+0,0,0,0,669,0,673,0,0,0,0,0,0,0,0,0,1303,704,299,0,0,0,275,0,0,216,1761,0,0,0,0,
+0,0,0,0,0,0,0,1319,0,0,428,0,0,0,0,0,0,0,0,0,0,514,0,0,0,0,0,0,49,55,102,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,364,0,0,0,0,379,0,921,971,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1258,0,0,0,1058,0,0,0,0,0,656,0,0,0,0,0,144,0,0,0,0,0,0,0,0,0,0,
+0,1373,10,605,0,0,0,0,0,0,0,838,0,1012,0,0,0,0,0,0,0,0,0,0,0,0,0,0,154,365,0,0,
+0,0,0,0,0,0,0,340,0,0,0,0,0,810,0,0,0,0,0,0,495,0,0,0,0,0,0,0,0,0,261,0,535,248,
+0,358,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,567,445,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,697,0,0,0,1336,0,0,0,0,0,0,0,0,917,174,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,972,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,351,0,0,0,0,0,0,0,0,0,0,
+0,0,0,286,0,0,56,438,0,0,0,0,0,1950,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,738,0,0,0,0,0,
+0,0,0,0,0,969,2047,0,0,0,0,0,0,0,818,0,0,0,0,0,0,0,866,0,0,0,0,0,0,0,1467,0,0,0,
+0,0,0,0,0,0,0,0,0,0,972,0,355,0,0,0,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,267,189,104,0,0,0,0,1613,0,0,0,0,0,0,0,116,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,886,0,86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,863,0,0,0,0,0,
+0,0,1953,450,1773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,
+0,0,0,0,0,1142,0,1189,0,0,0,663,0,0,0,0,0,0,0,846,0,0,528,0,393,378,0,0,0,0,0,0,
+325,899,680,1880,0,1770,0,0,0,0,0,648,0,0,0,0,0,0,185,167,0,2046,0,0,0,0,0,0,
+249,1645,0,152,0,0,0,1733,0,0,0,0,0,1006,0,0,0,0,0,420,0,0,0,832,0,0,0,0,0,351,
+0,0,0,0,6,40,0,0,60,0,0,0,0,1354,745,724,0,0,0,0,0,0,0,0,772,1951,275,108,639,0,
+0,0,0,0,0,0,0,0,500,1758,0,0,0,0,0,0,0,0,0,0,0,1886,711,205,0,0,965,865,0,0,0,
+534,0,0,0,0,691,0,0,0,237,443,0,878,0,0,0,0,0,1410,0,0,0,0,0,0,0,0,0,0,0,0,0,
+995,0,0,0,0,0,0,0,0,0,0,0,0,0,578,0,0,0,0,881,0,0,0,0,0,0,0,0,822,0,923,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,924,0,0,0,665,0,0,0,0,0,1901,0,0,0,0,0,950,498,93,
+0,0,0,1451,0,0,0,0,0,747,828,788,400,184,0,198,0,0,0,0,0,0,0,0,0,0,0,994,0,0,0,
+0,0,0,0,0,615,320,0,0,0,978,843,905,0,0,0,0,0,0,0,0,850,974,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,509,0,0,0,0,0,273,0,0,0,0,0,0,0,0,0,0,0,0,0,
+201,0,0,0,1041,0,0,0,1040,0,0,0,0,0,0,0,0,0,693,234,774,0,336,0,1399,22,0,805,
+802,777,167,789,0,0,1705,0,0,0,0,0,0,0,0,0,0,0,10,13,11,0,0,204,264,0,0,56,0,0,
+1917,0,470,0,0,0,0,0,0,0,0,0,0,0,1198,0,0,0,0,0,0,0,0,0,0,1015,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,715,0,0,1002,0,0,0,298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,867,0,0,724,0,0,0,0,0,0,0,0,0,0,0,0,768,0,0,0,0,0,1066,0,0,0,0,67,0,174,948,
+0,0,0,0,0,0,0,0,0,0,0,0,0,764,0,0,0,0,75,137,0,756,0,0,0,0,0,0,1008,842,643,0,0,
+0,67,0,0,0,0,0,0,0,0,0,0,0,135,821,0,0,0,0,0,0,0,0,736,0,389,355,0,0,786,0,0,0,
+0,0,0,2044,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1030,0,0,0,1083,0,0,0,0,0,
+1226,0,0,0,0,356,319,8,389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,0,0,0,427,
+0,413,0,730,0,0,0,0,0,373,0,0,0,0,0,0,0,0,0,799,0,0,0,1793,0,0,0,322,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,89,290,2,0,0,0,0,0,0,0,0,0,0,672,
+699,1860,0,0,0,737,0,0,0,1612,0,0,0,0,0,0,0,0,0,0,0,145,124,884,0,0,0,0,0,387,0,
+0,0,0,0,0,0,0,0,0,0,679,0,550,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1305,0,0,0,0,0,0,0,
+576,0,0,0,0,0,0,0,686,0,607,0,0,37,0,0,0,0,0,0,0,0,0,101,1726,0,0,0,0,0,958,0,0,
+0,903,0,0,0,0,147,0,0,0,0,0,0,0,0,0,0,0,367,0,0,0,0,690,0,705,273,0,0,887,0,0,0,
+0,0,0,0,0,0,0,0,90,0,0,0,0,0,0,0,908,0,0,0,0,0,0,0,1261,0,0,497,1235,0,429,0,0,
+0,0,904,0,12,125,0,0,0,841,0,0,0,0,0,860,946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,768,0,770,160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,271,0,0,0,0,0,0,0,719,0,699,581,0,0,0,0,0,0,0,0,0,0,862,304,0,631,0,0,0,0,880,
+1513,0,0,0,0,0,981,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,434,0,0,0,0,0,550,0,0,476,930,
+824,553,0,0,452,0,151,0,0,0,0,0,0,772,0,292,135,0,0,0,0,0,0,0,504,0,0,1089,0,0,
+0,0,0,0,0,0,0,0,0,783,0,0,0,0,0,0,206,393,0,0,0,0,0,0,0,0,232,912,0,0,0,0,0,977,
+0,0,716,98,0,0,0,0,0,733,0,0,0,0,0,0,0,0,19,0,0,0,0,668,0,360,0,0,0,0,0,0,656,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,726,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,1269,0,0,463,0,
+0,0,0,0,0,1454,0,1287,245,0,989,0,0,0,0,0,0,0,0,0,107,164,0,0,0,0,0,0,0,1061,0,
+0,0,0,2,484,0,0,0,0,0,0,0,1127,0,0,0,0,0,0,0,460,0,0,0,0,0,932,0,0,0,0,0,0,0,
+588,625,0,0,0,0,76,92,0,0,0,0,0,0,0,0,0,0,0,0,0,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+763,0,622,0,0,0,253,0,546,0,0,110,0,256,916,0,0,35,212,0,0,746,0,0,0,150,0,0,
+1466,0,0,0,1299,0,0,0,0,0,0,0,0,0,1518,0,0,0,0,0,0,0,0,0,0,0,0,0,1229,0,0,0,816,
+0,0,0,0,0,0,159,0,0,0,0,0,734,869,126,1716,0,0,0,0,0,0,202,232,0,0,0,0,212,0,0,
+0,0,0,111,1003,0,0,0,0,0,0,0,0,0,0,0,1712,0,0,216,0,0,0,0,516,0,0,0,0,0,650,0,0,
+0,0,57,99,0,0,0,0,300,574,0,0,0,0,1023,0,0,302,0,1871,0,728,252,0,0,461,0,0,0,
+323,0,0,0,0,0,0,775,461,0,0,0,0,0,0,172,0,0,464,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,73,727,0,1023,0,0,0,0,0,0,0,0,0,0,577,0,0,0,0,0,0,0,0,1037,0,0,0,0,0,0,
+0,0,280,677,0,0,0,0,0,0,0,0,0,0,0,799,0,0,0,0,159,0,446,1730,0,0,0,0,0,0,0,0,0,
+395,0,0,0,0,145,0,0,0,0,0,0,0,20,0,0,426,608,0,0,0,0,0,977,0,250,0,0,0,0,0,100,
+0,0,0,0,1982,0,0,0,0,0,476,0,0,0,0,0,0,594,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,447,0,0,0,0,526,0,0,14,1124,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,188,0,0,0,0,0,0,0,0,362,301,0,0,0,1743,0,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,872,0,831,0,0,208,202,0,0,0,0,0,0,0,1954,0,
+0,0,0,516,872,0,0,313,224,0,0,24,0,11,546,0,0,0,1937,242,241,46,0,0,0,830,1273,
+0,0,0,0,0,0,0,825,327,1006,0,0,0,0,0,1580,516,366,0,0,0,0,0,1736,0,0,0,0,0,0,0,
+0,0,0,0,1935,0,826,0,0,0,0,139,331,0,0,0,0,0,0,0,0,0,0,0,288,0,916,0,0,0,0,0,
+1888,0,0,0,0,0,0,0,1471,0,1570,0,394,0,0,0,0,0,0,0,1931,0,1719,0,658,228,0,0,0,
+0,0,374,0,0,0,0,735,0,0,0,0,0,0,323,498,0,1063,0,0,0,0,155,0,0,0,0,0,0,0,0,906,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1139,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,108,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,616,
+902,0,0,0,0,0,692,0,0,0,0,0,0,823,0,0,0,305,0,0,0,0,0,0,0,681,0,0,0,0,0,214,
+1004,0,0,0,0,0,0,0,23,0,0,1703,0,0,0,0,0,0,0,0,0,1443,0,0,19,714,0,0,0,0,64,737,
+0,0,345,1758,0,0,579,47,0,0,539,139,0,0,0,0,388,0,0,0,0,253,0,0,0,0,0,0,252,0,
+745,0,0,0,0,0,0,0,0,0,0,0,504,107,0,871,0,0,0,229,0,0,0,0,0,903,0,0,71,0,0,549,
+6,47,0,0,0,0,0,0,0,0,0,980,865,705,0,0,0,161,0,0,0,0,143,1331,0,0,0,1388,33,724,
+0,0,0,19,0,0,0,395,0,0,0,0,0,846,210,0,0,0,122,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,695,937,497,0,0,0,0,0,718,0,0,0,0,0,0,0,1581,0,
+0,0,0,0,0,161,49,0,0,0,0,0,0,0,0,0,597,0,0,0,1094,0,0,0,811,908,0,0,0,0,0,0,0,0,
+0,0,1471,0,0,0,0,0,0,0,0,0,0,42,1935,0,0,0,2014,66,2007,0,0,586,0,0,0,0,0,0,0,0,
+0,28,1077,0,0,0,1221,0,0,62,0,0,0,0,0,0,0,0,0,0,1766,0,0,0,0,0,0,0,0,0,0,0,0,25,
+0,499,1388,0,0,97,10,0,0,0,0,0,481,0,0,0,0,0,0,0,0,0,0,37,134,155,486,0,1442,0,
+0,0,0,0,591,0,0,0,0,0,0,310,1173,0,0,0,0,409,1156,0,0,0,482,0,0,263,926,0,0,0,0,
+0,0,0,0,0,0,0,0,0,804,0,0,0,0,0,0,0,0,0,0,0,0,0,1265,0,415,0,348,0,0,0,1012,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,165,1803,0,0,0,0,0,0,0,408,
+0,0,0,0,0,0,257,1321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1138,0,0,0,249,0,
+0,0,576,0,0,0,0,231,0,0,0,288,0,0,0,0,0,0,0,0,0,433,1487,569,1678,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,0,0,0,0,779,538,0,0,0,413,0,0,0,
+0,0,0,0,0,0,0,495,0,0,0,0,0,191,54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,530,567,
+0,0,0,0,0,1484,0,0,0,0,0,0,815,609,0,0,0,0,0,484,0,0,0,0,0,0,0,0,0,0,900,0,0,0,
+0,1335,0,1724,0,0,0,0,0,0,0,0,0,0,0,640,0,0,0,0,0,0,0,0,0,0,0,1831,0,0,0,0,0,0,
+0,0,0,0,0,0,0,474,0,0,0,0,0,0,0,0,0,1103,0,1504,655,1034,0,0,0,0,0,305,0,0,0,0,
+0,0,0,0,0,1236,0,0,429,217,0,0,0,0,739,278,0,0,0,0,0,0,0,708,0,0,0,0,0,1840,233,
+0,0,0,0,0,0,0,0,2017,0,0,0,0,0,1488,0,0,0,1590,0,0,0,0,0,1800,28,0,0,0,0,0,0,0,
+0,0,45,0,36,0,22,1442,378,0,0,0,0,0,0,1507,0,0,0,0,0,0,0,0,0,0,39,0,0,1054,725,
+1955,0,2036,0,0,0,0,0,0,0,0,0,0,896,1871,0,0,0,0,0,0,0,0,0,0,805,0,0,0,0,2046,0,
+0,0,0,17,712,0,617,55,320,271,0,0,0,0,0,0,0,0,0,445,0,184,103,0,0,0,0,0,0,0,0,
+659,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+337,0,0,0,506,0,0,0,0,0,843,77,0,458,0,0,0,0,0,1420,382,109,142,330,0,0,0,0,0,0,
+0,0,0,0,0,0,87,0,0,0,492,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1239,0,0,0,0,0,0,
+211,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1049,0,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1985,0,0,122,0,0,234,0,0,0,1098,0,0,0,0,0,0,549,253,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,522,131,0,0,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,507,0,0,0,0,811,630,0,0,0,343,
+0,0,0,0,0,448,591,455,0,1381,0,0,0,0,0,0,0,575,0,0,0,0,0,1175,0,0,0,0,0,0,0,0,0,
+653,0,0,0,1761,0,1198,0,0,0,0,297,1127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,678,0,0,
+164,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,0,45,0,0,0,0,0,121,0,0,0,0,0,0,
+0,0,125,0,0,0,1622,0,0,0,0,0,721,145,0,0,0,970,792,0,0,0,715,0,0,0,0,0,1999,0,0,
+74,531,0,0,65,0,0,0,105,220,0,0,0,0,0,0,0,960,0,0,0,0,0,0,428,19,0,0,401,96,0,0,
+0,0,0,1595,116,0,1021,0,0,0,0,0,750,1961,0,0,148,0,0,0,0,0,0,0,0,0,0,0,0,0,75,0,
+0,1383,0,0,0,0,0,0,0,0,0,0,0,0,0,0,779,0,0,0,0,0,0,0,0,598,0,424,0,0,0,0,0,0,0,
+1222,0,0,0,876,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,133,0,0,0,0,187,0,8,0,0,0,0,0,
+0,0,429,0,685,0,0,0,0,0,0,0,0,0,0,0,132,472,0,0,0,0,0,0,0,0,0,938,0,0,874,0,0,0,
+0,0,774,0,0,0,0,0,92,0,0,0,0,0,0,830,701,0,0,0,0,0,426,350,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,603,59,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,0,441,163,4,0,
+0,0,0,0,0,0,0,0,806,0,0,0,0,0,0,233,0,0,0,0,1994,0,1739,0,0,393,0,47,1038,0,0,0,
+309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,363,0,0,0,175,0,0,0,0,0,0,0,666,
+0,0,1675,0,1600,0,0,0,808,0,0,0,0,0,0,0,0,0,0,0,280,54,0,0,0,0,0,0,0,0,421,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,249,0,0,103,254,0,262,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,805,0,0,0,0,0,0,0,0,0,1630,0,0,0,0,0,0,0,0,0,0,0,0,0,671,972,989,0,0,
+0,0,0,0,0,889,0,0,0,1382,0,0,0,0,0,0,0,775,0,0,0,0,0,0,0,0,0,0,388,202,0,0,0,0,
+16,560,0,0,0,841,0,0,566,0,0,0,938,0,0,0,0,0,0,0,0,0,0,912,0,0,0,1361,0,0,0,0,0,
+0,618,236,0,1854,0,0,318,190,0,1376,0,0,0,0,0,0,0,349,0,0,0,0,951,1972,0,0,0,0,
+0,0,344,0,0,0,0,0,0,0,0,850,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,910,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,163,85,0,487,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,145,0,83,0,0,1013,0,0,0,1922,0,0,169,557,66,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1193,82,0,352,454,57,0,0,1333,396,107,0,370,0,0,0,0,0,0,0,0,0,204,0,0,0,
+0,0,1706,0,0,0,0,0,0,0,0,0,0,0,0,394,1204,0,0,0,0,0,1007,0,0,0,1696,0,1519,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,981,0,0,0,0,1072,0,0,0,712,0,1629,0,0,0,0,0,0,0,728,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1271,0,0,0,1608,16,0,0,0,0,485,0,0,0,0,0,0,
+153,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1991,0,0,0,0,0,0,0,0,52,0,21,0,
+0,0,0,0,0,0,0,0,819,0,0,0,0,0,917,0,0,0,0,784,0,0,0,0,135,0,0,0,0,0,454,0,0,0,0,
+0,0,0,0,0,852,1719,0,0,0,0,0,852,0,0,0,0,0,952,0,0,0,0,568,0,0,0,0,0,448,0,0,0,
+67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1826,657,0,729,666,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+669,0,0,0,0,0,0,0,402,0,0,152,0,0,0,0,912,0,0,0,0,0,0,51,320,0,445,0,0,0,0,308,
+0,0,0,0,0,386,0,0,239,0,0,130,83,0,143,0,348,0,0,0,0,0,0,0,958,0,0,0,0,0,210,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,430,0,0,0,0,0,0,0,0,0,0,0,0,7,213,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,801,0,0,0,0,0,0,0,0,0,936,0,108,0,0,
+0,0,0,0,0,0,0,885,587,219,398,364,0,1165,0,0,342,241,303,0,0,0,0,0,0,0,0,0,0,
+1454,0,0,0,0,0,0,0,0,0,0,254,562,0,786,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1294,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,493,216,0,0,0,0,219,341,0,0,0,0,0,
+0,0,0,0,0,130,1734,154,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,701,604,0,0,879,0,195,
+666,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1669,0,0,0,1791,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1228,0,0,0,0,0,623,0,0,0,0,0,0,0,798,0,0,0,0,0,0,0,0,0,0,0,0,84,
+122,0,0,0,837,0,0,0,0,0,0,1013,0,0,577,0,0,0,460,932,0,0,0,0,0,0,0,0,0,0,0,31,
+131,0,0,0,605,0,0,0,1246,0,0,0,0,68,278,165,307,781,0,0,0,0,0,0,33,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1113,0,0,720,1953,203,0,0,0,0,0,0,0,425,326,0,0,0,0,0,
+0,0,0,0,0,241,1316,0,0,0,0,0,416,0,0,0,1300,0,847,0,0,662,358,0,0,0,0,839,1823,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,654,1522,0,0,0,0,0,0,163,0,0,0,0,0,314,978,0,0,0,
+601,0,0,0,0,0,946,434,0,0,0,402,411,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,1467,
+410,0,0,0,0,0,0,0,0,0,0,0,0,0,0,483,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,677,0,0,0,0,0,0,0,0,0,0,0,0,70,0,0,0,0,1405,0,0,0,0,0,0,108,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,777,0,0,0,0,0,747,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,505,0,326,0,0,164,628,654,0,0,0,
+37,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,668,152,0,0,0,0,0,0,0,0,0,0,0,581,
+0,0,0,0,44,126,89,0,0,0,0,0,0,0,0,1531,0,0,0,0,0,0,0,0,203,1167,0,0,0,0,0,0,0,0,
+531,1232,0,0,0,0,0,943,0,670,231,880,0,1617,0,0,0,1957,0,0,0,0,0,0,0,975,0,0,0,
+0,0,0,0,0,0,0,0,242,0,0,0,0,0,0,0,0,0,421,0,0,14,834,0,0,0,0,0,0,0,0,0,0,0,0,
+465,0,0,0,0,0,834,688,413,855,0,0,0,590,0,0,0,0,0,0,0,0,114,0,0,0,0,0,0,0,0,0,0,
+0,45,169,0,0,0,0,0,0,0,0,0,0,0,198,0,0,565,585,0,0,0,0,0,0,0,0,0,0,0,0,0,691,0,
+0,0,593,0,0,0,0,0,0,0,0,0,913,116,0,0,0,0,1360,0,0,0,802,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,673,308,0,709,1006,1895,0,228,0,0,0,1840,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,608,0,0,0,0,0,0,0,0,0,1573,0,2039,136,540,0,0,0,0,0,0,0,
+897,0,0,938,1878,0,0,0,0,0,0,0,0,0,1469,0,999,0,299,0,0,0,0,0,0,0,578,0,0,0,0,0,
+456,0,0,0,1679,163,693,0,0,0,0,0,0,48,755,0,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,
+1091,0,0,0,0,695,0,0,1464,0,0,0,0,0,975,0,0,335,0,0,1979,0,0,0,0,269,1566,630,
+396,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1815,634,0,0,0,966,0,0,0,0,0,0,0,9,
+412,0,958,0,0,579,382,0,212,0,0,0,0,965,681,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,655,
+0,0,0,0,67,0,0,0,0,0,0,751,0,0,0,0,423,231,0,0,1016,300,0,0,0,0,100,237,0,0,0,
+1370,0,0,0,1208,0,0,0,0,0,1219,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,199,0,0,427,0,0,
+0,0,949,665,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,712,0,0,0,0,0,1186,0,0,0,0,0,0,0,0,0,0,295,312,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+151,0,0,0,0,588,4,0,0,0,0,0,414,104,0,0,757,263,0,561,0,0,0,320,0,0,0,0,0,0,0,0,
+0,0,0,225,0,0,0,0,37,817,0,974,0,0,0,0,0,0,0,0,0,0,0,0,0,2026,131,235,16,0,590,
+1157,0,0,0,0,0,0,0,0,221,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,140,390,0,0,0,0,
+0,0,0,1144,0,0,0,464,0,0,0,0,0,0,0,0,0,0,0,0,204,407,303,1218,0,0,0,0,5,325,0,0,
+0,0,12,800,0,1783,0,0,0,0,0,0,0,0,0,0,504,621,0,0,0,0,0,0,0,0,0,920,0,376,0,0,0,
+0,0,218,580,0,768,454,0,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,164,0,0,0,0,0,0,0,
+0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,120,285,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,226,343,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,1812,0,0,8,0,0,0,21,1125,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1327,0,0,0,0,575,1598,0,0,0,0,0,0,0,0,0,895,0,0,0,959,0,0,
+0,0,0,1759,173,0,0,0,0,266,261,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1427,0,0,300,1033,0,0,0,0,0,0,0,0,0,0,0,584,0,0,0,0,52,734,
+0,0,217,239,0,1129,0,0,0,0,0,0,0,0,732,20,0,0,0,0,0,0,0,0,0,0,0,418,0,0,0,613,0,
+0,0,0,0,0,0,0,0,632,0,0,85,984,0,0,0,0,909,694,7,1109,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,167,0,0,0,0,280,62,0,0,33,0,0,359,186,980,0,0,0,0,0,0,0,0,0,0,0,585,0,0,0,
+211,0,0,336,145,0,1130,0,873,0,0,840,263,0,0,0,0,0,0,0,0,0,916,0,0,0,0,0,0,0,0,
+0,0,155,0,0,0,461,97,0,0,0,0,0,1356,0,0,0,0,0,0,0,593,0,0,0,0,0,1392,0,0,0,0,
+126,0,0,0,0,1179,0,0,0,0,0,162,0,0,0,0,0,765,0,187,0,1286,0,0,0,0,0,0,0,0,0,635,
+0,0,23,215,0,0,0,1306,0,0,97,716,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,657,0,
+0,0,0,0,0,0,0,299,0,0,0,0,0,0,134,0,0,0,0,0,0,0,0,0,0,0,658,1082,0,0,0,0,0,2002,
+0,0,0,0,0,0,833,248,0,0,0,0,0,1654,0,0,531,0,0,0,0,0,0,634,0,0,0,0,0,0,0,0,0,
+853,573,249,0,0,0,0,0,0,0,0,527,0,0,0,0,1419,0,0,0,0,0,0,20,49,0,0,0,992,0,0,0,
+728,0,0,0,0,0,0,0,0,0,0,0,0,497,1579,0,0,0,0,62,268,0,0,0,0,0,0,0,1201,0,0,0,0,
+0,0,0,0,0,0,0,0,495,193,0,0,0,0,106,0,0,859,0,0,23,0,0,0,0,0,0,0,813,925,0,0,
+223,613,953,0,0,0,0,0,0,0,0,666,0,0,0,0,0,0,0,0,0,670,0,0,40,216,0,0,0,0,0,0,
+259,0,0,0,440,1114,0,0,0,0,0,0,0,0,74,475,0,0,188,139,0,797,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1572,0,0,0,0,39,0,0,0,0,0,0,0,0,0,0,0,0,1594,0,0,0,0,0,0,0,290,0,232,
+0,0,887,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,521,14,0,0,0,0,0,741,0,0,0,992,0,
+0,0,0,0,0,0,0,111,0,0,425,0,0,0,0,0,789,0,0,0,1593,0,1768,0,0,233,0,0,0,0,943,0,
+0,0,0,0,0,0,955,225,245,0,0,0,0,0,0,241,0,0,0,0,1943,0,0,0,1284,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,709,0,0,0,0,0,0,554,0,0,0,0,0,0,0,0,1564,0,0,0,
+443,0,0,0,0,0,0,280,0,0,0,0,0,0,0,0,729,0,0,0,348,0,0,0,0,0,0,0,758,848,298,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,829,1422,189,121,0,0,632,812,0,0,556,0,0,0,0,0,436,172,
+530,844,232,984,0,0,0,0,0,0,0,0,0,0,147,0,0,0,0,0,0,0,0,537,0,0,0,0,0,859,0,0,
+842,0,0,0,0,0,0,0,0,0,0,1291,0,0,0,0,0,0,0,0,0,0,0,1482,612,392,0,0,0,262,31,0,
+0,0,0,0,0,0,0,0,0,753,549,0,0,0,0,0,0,696,0,0,0,0,0,0,0,834,0,0,0,0,0,771,0,0,0,
+0,0,0,0,0,0,0,0,0,0,921,0,0,0,674,0,0,0,0,0,0,0,0,0,0,308,444,0,0,0,0,0,0,805,
+180,0,0,278,271,0,0,214,505,0,1215,0,0,0,0,0,0,387,271,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1645,42,92,0,459,0,0,330,1557,0,0,0,0,0,0,0,0,113,18,0,0,0,
+1742,0,0,0,965,0,0,0,0,0,0,0,0,0,0,0,0,0,182,0,0,65,0,0,0,0,0,0,0,0,0,0,0,0,973,
+0,0,0,0,0,328,0,0,588,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1786,
+0,0,962,1985,0,0,0,308,508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,588,0,0,0,0,0,0,614,793,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,290,0,0,0,0,0,0,0,0,0,0,1136,0,0,0,0,0,0,0,0,0,0,796,719,0,0,
+326,210,0,0,0,701,758,472,0,0,0,1947,278,1079,0,0,0,0,0,0,497,41,0,0,634,46,961,
+0,810,524,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,532,0,997,0,0,0,0,0,0,0,0,0,0,0,1301,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1298,0,671,0,0,0,306,0,0,0,0,0,0,0,0,0,0,
+693,1823,0,0,0,759,0,0,0,0,0,1932,0,0,0,0,0,0,0,0,0,0,0,0,0,0,88,182,0,0,0,1964,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,521,0,0,0,0,0,0,424,857,0,0,0,0,671,328,0,
+529,0,0,0,0,0,716,0,1509,80,67,0,0,0,0,59,141,0,0,0,0,0,0,783,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1498,0,0,0,0,343,430,803,1183,677,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1357,53,0,0,0,0,590,0,0,0,0,0,0,0,0,0,0,
+0,0,0,329,0,0,0,0,0,0,0,469,0,0,0,0,0,0,0,0,0,0,460,0,0,1743,0,0,963,340,0,0,0,
+0,0,1603,0,0,250,0,0,0,0,0,646,218,0,1794,0,0,0,571,0,455,0,0,0,1012,0,0,0,0,0,
+0,0,0,0,0,0,0,597,161,0,349,0,524,0,0,0,0,0,0,0,0,0,0,0,0,322,432,0,0,0,0,0,0,
+325,223,0,0,0,0,0,566,0,0,0,1394,481,436,0,48,457,610,756,618,0,0,0,755,0,1217,
+0,0,0,0,0,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,544,492,107,414,0,0,0,0,0,0,0,0,0,0,0,
+1007,0,0,0,0,5,0,0,1580,0,0,0,0,0,0,0,0,0,0,0,0,0,673,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1843,0,0,0,0,0,0,0,0,0,165,0,0,0,0,0,0,809,885,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,498,0,0,0,306,9,0,0,0,0,0,0,0,437,721,146,0,0,0,0,0,0,0,0,0,0,0,177,0,0,0,0,
+0,0,0,1377,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,200,0,959,0,0,0,1928,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1435,0,481,0,0,0,0,0,0,142,84,0,0,0,0,0,
+1015,0,0,0,315,0,0,0,0,0,0,759,0,0,0,0,0,0,0,0,712,0,0,0,1722,0,0,0,0,0,0,0,0,0,
+0,0,0,222,0,985,1414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1273,
+538,706,0,0,0,0,0,0,0,0,115,0,0,0,0,0,0,0,0,0,0,1781,0,0,0,0,0,431,97,665,42,
+237,0,0,0,264,0,0,213,0,0,0,0,0,0,0,455,0,0,0,906,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+624,0,574,0,0,0,0,0,0,0,0,0,0,0,0,354,0,0,0,1558,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,
+235,723,1813,0,0,0,957,0,830,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,496,0,0,0,0,0,0,0,
+547,239,88,0,0,0,0,0,0,0,0,0,1310,0,0,0,0,0,0,0,0,80,1076,0,0,118,0,0,0,479,274,
+0,0,0,0,0,0,0,0,0,0,0,497,0,0,669,261,0,0,0,0,13,0,0,0,0,0,0,791,250,642,0,0,0,
+1429,939,949,0,0,0,0,0,0,0,0,0,0,0,0,0,818,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,982,330,0,0,0,0,545,0,0,0,0,0,0,947,0,1188,0,0,0,0,0,904,0,0,0,0,0,1372,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,693,377,0,0,0,0,0,0,0,0,0,0,0,0,0,0,695,0,0,
+713,386,0,0,0,0,128,1575,0,0,0,0,0,0,424,893,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,904,0,0,0,0,0,552,322,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,1808,49,0,0,0,0,
+1832,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,421,0,0,442,415,0,0,289,
+0,0,0,0,0,206,110,0,0,0,0,0,205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,1539,0,0,0,0,0,1340,0,1194,0,0,0,0,0,0,0,0,549,0,0,0,0,0,0,0,0,1720,0,0,0,0,
+0,0,0,0,0,319,0,0,0,0,112,1180,0,0,0,0,0,0,0,0,0,0,0,967,0,0,0,0,0,0,0,0,0,1940,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,735,0,0,0,0,0,0,0,0,0,897,132,0,0,0,0,0,0,0,
+0,0,0,38,838,0,0,0,379,218,8,660,1017,0,0,0,0,0,0,111,387,647,877,0,0,53,790,0,
+0,0,0,0,0,0,0,458,0,0,0,0,0,0,954,0,0,0,394,0,1367,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,882,0,0,0,0,0,0,0,1409,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38,124,342,199,0,0,0,0,
+0,0,0,0,0,0,724,628,0,0,0,0,804,266,0,0,0,0,0,208,0,79,0,0,0,0,0,0,0,0,741,0,0,
+0,0,0,0,0,0,0,0,606,0,1494,821,1553,0,0,135,405,0,0,178,100,0,0,0,0,0,0,0,0,0,0,
+0,0,0,481,0,0,0,1378,0,0,0,0,0,0,0,0,0,0,0,0,0,791,33,1227,857,0,467,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,447,0,0,0,0,0,0,86,128,0,0,0,0,0,0,587,0,0,0,692,1018,0,
+195,0,0,0,0,0,0,0,1546,0,0,0,0,0,0,0,0,0,0,0,684,0,0,345,0,0,0,0,0,0,365,0,1683,
+0,0,472,0,433,0,0,0,0,0,0,0,28,0,0,0,997,0,705,3,0,0,0,0,0,0,0,0,0,229,0,0,0,0,
+102,0,0,0,0,866,1022,0,0,0,0,0,0,0,0,0,55,0,115,0,0,0,0,933,0,0,0,0,0,0,0,702,0,
+0,0,0,0,0,0,1728,26,484,0,0,0,185,618,417,0,803,0,0,0,0,0,0,0,0,0,0,0,1262,0,0,
+0,0,0,0,0,0,0,0,0,0,0,633,0,0,0,0,0,0,0,0,0,0,0,0,0,479,262,0,0,0,0,0,0,830,0,0,
+0,0,26,70,0,0,0,0,0,0,0,0,217,0,640,51,0,0,360,1586,0,0,0,0,0,652,0,0,0,0,0,766,
+0,0,0,0,298,737,0,0,0,0,0,0,0,0,0,0,655,222,906,0,0,1013,991,2009,0,0,0,0,503,0,
+0,0,216,154,0,0,0,716,0,844,0,0,0,0,621,252,0,0,0,0,748,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,103,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,576,0,0,0,648,0,0,0,331,0,0,0,
+0,0,0,0,0,0,0,0,0,632,0,0,0,518,107,0,0,0,0,0,0,0,0,851,0,0,0,0,504,0,0,0,0,0,0,
+0,0,0,0,0,0,7,883,0,0,0,0,0,0,0,922,0,0,0,0,0,0,0,0,91,993,0,0,0,0,0,0,200,131,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,365,1433,0,0,0,0,28,103,0,0,798,1013,0,0,0,0,0,0,0,
+0,39,1925,0,853,0,0,271,519,0,0,0,0,338,0,0,300,470,419,0,0,0,0,0,0,836,0,0,0,0,
+0,0,1937,0,0,0,0,0,393,0,0,357,0,0,0,0,0,703,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,387,0,0,0,0,0,0,75,708,453,1351,0,303,0,0,772,0,0,0,0,0,0,0,0,749,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1065,0,0,717,226,0,0,0,0,0,890,431,626,0,0,0,0,706,0,0,0,
+51,698,0,0,0,0,0,0,0,0,0,0,0,828,0,0,17,0,0,0,0,1929,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,871,498,0,101,1793,0,0,0,0,0,0,435,0,
+0,0,0,0,966,0,129,1644,0,0,0,0,0,0,0,0,0,0,0,0,0,997,502,0,0,0,0,0,0,0,0,0,0,0,
+0,823,0,1927,0,0,0,0,98,1756,0,0,0,0,0,0,0,0,0,0,0,0,8,0,160,1046,0,492,0,0,0,0,
+0,0,129,45,0,0,0,0,0,0,353,558,0,0,0,0,0,785,0,0,0,1145,189,0,0,0,26,353,0,0,0,
+0,0,2024,0,0,0,606,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,855,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2011,0,0,5,4,0,0,461,764,0,0,0,1449,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1445,0,0,
+0,1168,0,0,0,233,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,216,0,0,0,286,0,0,0,
+3,0,0,0,723,536,0,0,0,0,0,285,0,0,0,560,0,0,0,0,0,690,0,0,0,0,0,1246,0,0,63,0,
+33,0,0,0,0,0,520,1862,0,0,0,0,0,0,0,0,0,0,0,0,630,0,0,0,0,554,0,0,0,0,0,1001,0,
+0,0,0,0,446,0,0,0,0,0,0,0,1313,0,0,837,636,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,278,
+0,0,0,0,0,0,0,0,868,0,0,0,0,1010,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1231,0,304,0,506,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,93,1408,794,
+843,704,0,285,114,485,898,145,0,19,2035,0,0,0,1933,0,0,0,0,0,0,0,1728,0,0,0,0,0,
+0,0,0,746,0,0,0,0,0,0,0,995,1964,0,0,0,0,0,0,0,0,0,0,0,1550,0,874,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,1018,0,0,0,814,126,0,0,1264,0,0,814,955,0,0,0,0,0,0,
+0,981,0,0,0,0,0,0,0,0,915,56,0,0,100,0,0,0,0,0,0,0,0,0,638,0,0,0,0,738,0,0,0,0,
+0,0,0,0,0,758,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1112,0,0,214,0,0,0,133,0,196,
+168,0,0,0,0,0,1152,0,1245,0,0,538,169,871,1816,0,0,413,133,0,0,0,978,0,0,43,93,
+371,0,0,0,0,0,0,526,25,0,754,335,0,0,0,0,182,0,0,0,0,0,0,0,0,0,0,0,39,601,0,0,0,
+0,0,0,0,181,370,0,0,1652,358,0,0,0,0,0,0,0,0,0,176,286,0,788,0,0,0,0,0,1223,780,
+254,1003,896,0,0,0,1447,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,744,0,0,0,0,0,126,0,
+41,788,0,0,0,629,0,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,420,37,1900,0,0,0,0,542,1570,957,0,0,0,0,0,0,
+0,373,31,0,0,0,0,125,325,0,0,0,0,0,0,323,0,0,1547,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1216,0,0,0,0,0,0,198,1905,629,15,0,0,0,0,0,0,20,75,543,1353,0,0,0,533,0,0,6,0,0,
+0,0,0,0,538,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,11,0,0,0,284,659,0,989,0,0,0,0,0,
+0,0,0,0,848,0,0,507,0,0,0,0,0,0,0,0,188,991,884,0,0,0,0,60,959,0,0,0,0,0,1653,0,
+0,922,337,0,638,0,0,500,0,0,0,0,0,0,0,0,0,0,0,166,0,0,0,0,0,0,0,0,0,0,0,0,418,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,760,0,0,0,0,0,0,1277,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,770,0,0,0,0,0,0,0,243,89,0,0,0,0,0,0,0,0,0,1396,0,
+560,0,0,3,1658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,586,0,0,1271,0,0,0,505,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1947,
+41,445,0,0,0,0,0,0,0,0,57,189,0,0,371,0,0,0,0,552,0,883,0,923,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,875,0,0,0,1788,49,0,0,0,0,0,
+0,0,0,0,0,0,661,0,0,1945,0,0,0,0,0,794,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,1135,0,0,0,745,0,0,0,0,0,0,0,84,0,0,0,0,0,0,0,410,0,976,0,0,0,0,0,703,0,0,
+0,0,0,0,187,322,0,0,0,227,0,0,0,0,560,0,31,1395,0,0,0,0,0,466,0,0,0,0,643,167,0,
+0,0,1428,0,412,0,0,0,0,0,0,0,0,0,1118,562,0,0,0,0,0,256,0,0,0,0,0,0,1771,0,0,0,
+0,0,1190,132,0,66,0,0,0,0,0,0,0,0,0,0,317,0,0,0,63,0,0,0,0,0,0,0,1475,0,0,0,0,0,
+0,0,288,0,0,0,0,608,0,0,0,0,0,0,0,0,1225,0,1189,0,0,0,0,0,0,0,1468,0,0,0,0,0,
+689,120,0,0,0,0,0,0,0,1,0,329,0,0,0,0,226,0,0,0,0,0,1855,0,0,461,0,0,0,0,1346,0,
+0,0,0,0,85,0,0,299,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1171,0,0,
+0,980,0,0,0,0,0,0,0,0,637,279,0,0,0,0,0,293,0,0,0,0,528,17,0,0,0,0,5,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0,0,0,0,0,601,0,0,0,0,0,0,779,0,
+196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1322,737,752,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,412,192,80,0,0,8,1470,0,0,0,0,0,0,0,0,0,873,0,0,0,0,0,835,0,0,0,0,256,
+38,986,0,0,0,0,0,0,0,0,0,91,257,278,911,0,0,0,0,0,0,0,0,749,151,0,0,0,0,0,0,0,0,
+0,0,0,0,989,0,0,990,0,0,90,194,0,0,0,0,0,425,0,0,0,0,0,774,0,0,0,0,0,0,0,0,0,0,
+646,827,752,0,0,0,662,0,22,21,0,0,0,0,0,0,95,239,0,0,0,431,0,0,0,0,0,874,0,0,
+265,65,0,0,0,1350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1887,0,0,0,0,0,0,0,809,
+0,696,0,1074,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,630,0,0,802,0,0,0,56,776,0,
+970,0,0,797,0,0,0,0,0,400,0,0,1951,0,0,41,0,11,118,0,0,0,0,0,0,0,0,251,615,0,0,
+0,1044,0,0,0,0,0,0,0,0,0,0,0,225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,370,0,0,0,0,
+104,48,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,930,0,0,0,0,
+0,0,0,0,0,0,0,1286,0,759,0,120,385,0,0,0,429,0,0,0,0,0,0,0,0,820,0,0,0,0,0,0,
+199,0,10,151,0,0,0,761,365,0,0,0,0,0,0,0,0,0,46,1086,0,0,0,0,11,1624,58,344,0,0,
+1008,1868,0,0,0,888,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,711,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,440,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,914,1913,0,958,0,885,0,0,0,0,0,0,0,0,0,0,0,
+0,0,847,276,0,302,65,0,0,0,510,0,1514,0,0,0,0,0,0,152,291,0,0,0,0,0,0,0,0,0,0,0,
+0,282,589,0,0,0,0,0,0,0,0,0,0,0,0,0,130,0,0,463,42,0,0,0,0,0,372,0,0,0,0,0,0,0,
+0,0,680,0,0,0,0,0,0,0,0,977,1997,0,0,0,810,0,0,0,0,0,0,0,0,0,1390,0,0,0,644,0,0,
+867,982,0,0,0,0,0,0,0,540,0,123,0,0,0,1978,0,0,0,0,789,623,0,1723,0,1220,0,0,0,
+0,0,0,0,480,0,0,0,0,0,0,0,0,0,0,0,888,0,0,0,0,0,0,0,0,0,0,0,0,299,1995,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,788,179,0,0,0,0,0,0,431,156,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1373,39,80,196,0,0,507,0,0,0,646,0,0,0,0,
+0,1214,0,0,0,0,926,0,0,0,1,114,0,0,0,0,0,446,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,490,0,0,0,491,0,1584,0,0,507,250,0,0,0,158,
+10,362,1,0,0,0,0,0,0,0,0,0,408,228,860,480,0,779,0,0,0,557,0,0,142,197,0,0,0,0,
+0,0,0,0,0,0,0,1490,11,378,316,1057,0,0,18,579,299,1546,0,177,0,0,0,0,0,0,0,0,0,
+411,0,0,0,0,727,439,0,0,0,0,0,1528,0,0,0,0,0,0,58,0,482,0,0,0,505,1952,0,0,0,0,
+0,0,0,0,0,0,0,242,0,0,0,0,0,0,0,953,0,0,0,0,802,0,0,0,0,0,0,0,0,0,0,290,0,0,791,
+52,0,0,0,0,0,0,0,0,0,0,0,112,0,0,0,0,0,1028,0,0,138,0,0,0,0,1811,0,0,0,0,0,0,
+934,1821,0,0,0,0,371,38,0,0,0,1296,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,723,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1330,0,0,0,0,0,0,0,1255,296,109,0,0,0,0,0,660,0,0,0,0,270,591,0,
+0,0,0,0,0,0,1090,81,0,0,0,0,391,0,0,0,0,249,322,0,0,0,0,0,0,0,1412,0,0,0,0,0,0,
+0,0,0,0,526,632,0,0,0,0,0,0,235,144,0,0,0,0,0,940,0,0,0,52,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,309,196,0,0,0,0,0,1912,0,1290,0,686,0,0,625,0,0,0,0,0,0,0,0,0,0,0,412,0,
+0,0,0,43,0,0,0,0,11,967,758,0,0,0,0,0,0,0,0,0,0,0,0,0,0,220,0,0,0,0,0,0,0,0,0,0,
+873,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,890,0,0,2,0,0,0,0,0,0,0,0,1774,
+393,263,0,0,0,0,0,0,818,456,0,0,251,178,393,97,0,0,0,0,0,674,168,0,0,0,0,0,0,0,
+159,1639,0,0,0,0,0,0,0,0,59,934,0,191,0,0,0,0,346,165,0,877,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,128,0,0,0,0,0,0,1297,0,0,0,0,0,0,164,0,0,0,15,132,241,1073,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,228,324,53,0,0,910,0,0,0,0,0,0,0,0,734,705,
+217,73,0,0,0,0,0,0,0,0,636,389,0,1409,0,0,0,0,0,893,0,0,0,0,21,0,0,0,0,0,0,0,0,
+0,0,0,0,0,721,0,0,0,959,0,0,0,0,1433,0,0,0,0,0,0,0,0,0,0,0,0,174,189,0,0,0,0,0,
+0,0,0,0,0,22,2,0,0,815,354,0,0,0,0,425,0,411,60,13,1611,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1478,596,0,0,398,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,1159,0,0,0,0,0,
+592,223,0,0,0,0,0,0,0,245,64,0,0,0,0,278,0,604,0,0,1502,265,0,0,0,0,0,0,0,310,
+1763,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,129,0,0,0,0,0,0,0,0,0,1356,0,0,0,0,0,0,0,
+0,505,0,0,0,0,0,0,0,1000,0,0,966,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,839,0,0,0,0,0,0,
+0,0,0,0,0,0,0,637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,590,0,0,0,0,280,0,0,0,1386,0,0,0,
+281,0,1064,0,0,0,0,0,917,0,0,15,555,0,0,1014,1883,0,0,0,965,0,0,117,33,0,0,0,
+801,0,0,0,0,0,877,0,824,0,0,0,0,0,0,0,0,0,0,0,365,0,0,0,0,0,0,774,7,0,430,0,0,
+231,360,0,0,0,0,0,0,0,0,822,740,0,0,929,1485,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,852,0,0,0,0,17,0,0,0,0,0,0,1001,0,0,0,0,35,831,0,0,384,457,0,0,0,1351,0,27,
+0,0,984,0,264,552,0,401,0,0,0,710,0,1211,0,0,11,205,0,0,0,0,0,0,0,0,0,0,0,0,5,
+579,0,717,0,0,1011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,805,0,0,0,0,0,0,0,0,0,0,0,489,0,
+0,0,1024,0,0,0,0,0,0,0,0,0,892,0,0,0,0,0,0,0,0,0,0,0,0,473,0,0,0,659,864,0,0,0,
+0,0,0,152,819,0,51,0,0,0,0,0,0,0,0,0,0,130,0,0,0,0,0,229,0,0,0,0,674,0,0,0,0,0,
+0,0,0,0,770,52,79,0,0,0,1666,0,409,0,0,0,0,0,0,0,195,0,688,0,0,0,0,0,0,0,0,0,0,
+0,889,174,160,0,0,0,0,0,0,0,0,0,0,0,0,0,872,0,918,569,268,0,0,0,1224,0,1361,0,0,
+0,0,0,0,0,0,0,374,0,0,0,0,0,731,0,0,0,0,190,0,0,0,0,0,0,0,202,506,444,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,835,0,17,1526,0,0,0,0,0,477,0,0,
+994,1374,76,0,0,0,0,0,0,0,355,287,0,1389,0,0,0,0,0,0,455,384,0,0,0,264,0,0,0,0,
+0,0,0,0,0,0,0,0,1001,0,0,0,0,0,0,0,0,0,0,0,0,28,0,0,0,851,175,359,0,0,0,0,0,0,0,
+0,287,740,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,857,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+819,1402,0,0,0,0,0,0,174,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1649,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,655,573,0,0,0,0,0,0,0,0,128,351,0,0,0,0,0,0,
+0,0,0,0,0,918,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,687,0,0,0,0,0,0,0,0,0,1525,
+0,0,0,1009,0,0,0,0,0,0,0,340,0,0,0,0,0,0,0,0,0,0,861,0,176,0,0,0,0,0,0,0,0,0,96,
+985,0,615,0,0,0,0,0,0,0,1919,0,0,0,0,0,1131,0,0,0,0,0,0,0,247,0,0,0,0,27,23,0,0,
+0,0,0,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1015,0,0,0,0,0,1088,0,0,
+0,0,0,1585,0,0,0,0,227,0,0,0,478,360,0,0,0,95,0,0,0,0,0,0,699,0,0,0,26,0,0,0,0,
+1119,0,0,0,739,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,741,67,0,0,0,0,0,0,464,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42,0,96,0,0,0,26,342,0,0,0,0,0,0,203,0,0,449,0,
+0,0,0,0,0,0,0,0,0,256,311,0,0,0,0,0,0,758,0,0,0,0,0,0,0,0,827,0,0,0,0,581,64,0,
+1047,0,0,0,0,0,288,0,0,0,0,0,1375,0,0,0,0,0,0,0,0,0,0,0,1309,0,0,0,0,0,0,0,0,
+376,12,0,0,0,0,0,154,0,1520,0,1753,95,502,0,0,0,0,0,0,0,269,291,1197,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,1341,0,1017,0,0,0,0,0,0,0,
+0,857,1810,533,0,0,1453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,836,211,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,19,0,156,0,0,0,0,1009,0,0,0,0,0,0,0,0,0,0,0,0,0,820,0,0,
+0,0,0,0,0,0,0,228,0,0,0,1131,0,1276,0,0,0,0,0,0,0,0,0,0,0,0,849,1792,0,0,389,
+291,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,525,0,0,
+0,453,0,0,0,0,666,0,0,0,422,0,355,0,0,0,0,165,0,260,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,865,0,0,0,0,0,0,0,1625,0,0,0,234,0,1383,0,0,0,0,0,0,0,0,306,0,0,0,802,1921,
+0,0,0,0,0,0,180,0,0,0,0,1312,814,0,0,0,0,0,0,0,0,0,0,707,0,0,0,1493,11,61,733,0,
+0,0,341,0,0,0,98,0,0,0,0,0,0,0,0,0,0,0,1014,0,0,0,0,0,0,0,142,102,0,0,30,0,0,
+823,0,1045,0,0,0,1930,0,1512,0,0,0,0,0,0,0,87,0,1243,245,0,0,0,0,0,0,0,48,68,0,
+0,0,0,0,0,0,0,126,77,625,938,0,0,351,0,0,0,174,1668,0,707,0,0,0,0,0,0,0,0,0,0,0,
+403,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,282,0,0,0,0,0,0,8,44,0,0,363,115,0,0,0,0,0,0,
+0,0,0,0,0,0,545,761,0,0,835,1254,0,0,0,0,930,1936,0,0,0,0,0,0,0,0,653,0,0,0,0,0,
+344,0,0,1483,673,185,0,0,460,93,753,478,0,0,0,0,0,1020,0,0,0,0,0,0,0,103,0,0,0,
+499,0,0,0,0,0,0,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,968,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,3,0,0,0,0,399,0,0,0,0,224,563,0,0,0,0,0,704,0,0,0,0,0,0,0,0,0,0,0,
+1559,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,861,0,0,0,0,946,333,746,0,0,0,0,0,
+0,0,910,0,0,0,0,0,0,0,0,0,0,0,0,0,652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1393,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1514,0,0,0,0,201,0,510,717,0,0,528,0,0,0,0,
+20,0,0,0,1251,0,0,0,1163,0,0,0,307,0,0,0,0,0,1091,0,0,0,0,0,0,0,0,0,0,0,429,0,0,
+0,881,0,0,0,0,0,621,0,0,0,0,0,0,0,736,0,348,0,868,0,0,0,0,433,0,0,0,771,1495,0,
+0,0,0,215,0,0,0,0,0,124,0,0,0,0,0,0,0,0,0,0,0,55,0,0,0,0,0,0,0,112,62,0,856,270,
+0,572,0,0,0,0,939,0,0,0,0,0,0,0,352,0,0,0,0,0,0,0,0,0,647,0,0,0,0,10,0,0,0,0,0,
+0,0,220,0,0,0,0,0,0,0,0,0,0,0,0,0,464,0,0,109,0,0,0,1746,0,0,0,515,0,0,0,566,0,
+0,0,0,0,0,67,40,0,0,722,992,0,0,923,0,0,0,0,0,0,1145,0,0,0,0,0,0,0,0,0,0,0,568,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,0,0,0,0,645,0,0,328,0,0,0,0,0,0,0,0,0,0,0,0,
+1363,0,0,0,0,0,1280,0,0,0,0,0,0,0,0,0,0,7,28,360,162,0,0,0,0,0,0,0,0,0,0,0,764,
+0,0,833,862,0,856,0,0,0,0,0,0,736,92,0,0,948,1944,0,1479,63,590,0,0,0,1521,0,0,
+0,709,0,0,61,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,483,0,0,0,0,1213,
+0,0,0,0,29,1022,0,1712,0,466,0,0,0,0,0,0,0,0,0,0,0,0,0,731,0,0,0,0,0,0,171,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,241,0,0,0,0,0,0,0,0,0,0,0,964,2005,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1100,0,0,0,954,0,0,0,0,0,0,0,0,0,1958,0,0,34,549,994,0,0,449,
+137,850,0,0,670,146,0,0,0,0,518,159,0,0,0,0,0,0,0,0,151,0,0,1027,0,0,0,0,0,0,0,
+0,0,0,983,0,0,0,0,993,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,141,501,0,0,0,
+0,0,0,0,0,0,452,0,0,0,0,0,0,0,0,0,0,233,149,0,0,0,0,0,0,0,0,582,0,0,0,801,0,0,0,
+0,0,0,70,0,0,369,0,36,0,0,0,0,0,0,0,204,721,430,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1817,16,1078,1021,0,0,
+406,0,0,0,0,0,69,0,0,0,0,0,1830,0,0,0,824,0,0,0,0,0,0,0,0,0,826,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,816,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,717,1845,0,423,0,0,
+0,0,0,0,0,0,510,0,0,1048,0,0,0,618,0,0,0,520,0,0,0,0,990,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,321,0,0,0,0,0,0,0,1135,0,0,921,0,0,0,24,397,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,856,0,0,0,139,282,981,0,288,0,0,0,1890,651,56,0,0,0,0,0,0,0,
+0,261,0,0,0,0,0,0,0,0,0,0,0,617,1403,0,1205,0,0,563,0,0,0,0,0,0,0,0,333,0,0,0,0,
+0,369,0,0,0,0,0,0,0,0,0,622,0,0,0,1407,0,0,0,0,0,0,0,0,0,0,0,0,624,160,0,363,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,619,0,174,292,0,0,656,616,0,0,0,685,0,0,0,0,0,0,0,0,0,0,0,0,0,647,0,0,0,631,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1267,0,0,0,1797,0,0,0,1684,0,0,469,0,531,
+1230,73,0,0,0,0,0,0,0,0,0,268,0,0,0,0,0,102,558,109,65,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,595,0,0,0,0,0,374,1832,0,0,0,0,0,0,16,0,405,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,881,0,1495,0,0,0,0,0,0,0,0,0,142,0,0,0,0,0,0,0,0,0,0,21,466,23,
+257,0,0,0,0,0,0,77,404,0,0,0,0,0,0,712,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,860,
+1848,0,0,652,629,0,0,0,0,13,377,0,1842,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1501,0,
+0,0,1906,0,0,0,0,0,0,0,0,0,0,0,0,0,491,234,171,0,0,0,0,631,1186,0,0,0,0,0,0,0,0,
+0,0,0,0,931,0,170,0,0,0,0,0,0,0,0,0,0,1587,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+765,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,424,0,0,714,0,0,0,0,685,0,0,0,0,0,
+0,285,0,0,0,0,0,0,429,0,0,0,0,0,0,0,0,0,0,71,18,0,0,0,0,0,0,0,0,0,0,116,828,0,0,
+0,0,0,0,289,0,0,0,0,0,0,0,0,675,0,0,0,1424,0,0,0,0,0,647,0,0,0,1334,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,36,209,0,0,0,0,0,0,0,342,0,0,0,928,0,0,0,0,0,1838,118,856,654,
+318,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,915,895,454,0,0,513,1425,0,0,
+0,0,0,0,791,0,153,0,0,0,0,0,0,796,909,445,345,0,0,0,0,0,0,0,0,578,0,0,0,1387,0,
+0,0,555,0,0,0,0,0,0,766,0,0,0,0,0,0,0,0,0,0,541,0,0,0,0,0,0,0,0,0,0,0,0,0,880,0,
+0,0,0,0,1506,0,0,983,0,768,0,0,0,0,584,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,737,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,226,30,426,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,462,0,0,0,385,0,398,0,0,0,0,0,0,
+0,0,0,347,0,0,0,0,125,1259,644,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,469,0,0,0,0,0,
+1367,0,0,0,0,0,0,0,0,0,0,0,719,0,0,0,0,0,0,0,0,0,0,0,0,0,1423,0,0,0,0,0,0,0,0,0,
+749,0,0,0,0,546,645,0,0,0,0,0,0,277,0,0,1275,0,0,0,0,0,0,0,453,536,555,0,0,987,
+1107,0,0,90,0,0,0,0,0,0,0,0,860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+257,0,1768,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1071,0,0,0,0,0,0,0,0,0,0,0,0,0,83,
+0,835,0,0,0,0,0,0,0,2006,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,696,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,95,1718,0,0,0,0,0,0,0,26,0,550,0,0,0,0,0,901,0,0,0,0,0,
+0,822,0,0,122,0,0,0,807,0,0,0,0,0,262,0,620,601,34,0,0,170,0,0,0,0,537,0,0,0,0,
+0,0,0,0,0,332,0,0,208,1909,182,261,0,0,0,1721,0,0,0,0,0,933,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1609,0,895,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,812,0,0,942,1916,0,0,0,0,
+0,0,0,778,0,0,0,137,0,1314,0,0,0,0,0,0,0,1661,0,0,0,0,0,0,0,1591,0,0,0,0,0,0,
+820,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185,89,0,1160,230,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,63,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1740,0,0,177,
+170,0,1961,0,0,0,0,0,0,0,0,0,0,0,0,91,0,17,44,0,0,0,0,0,0,0,0,0,270,0,296,0,0,0,
+0,0,0,0,1523,0,0,0,0,0,0,0,0,0,0,757,7,0,0,0,0,0,0,0,0,0,0,530,588,0,0,0,0,0,0,
+0,0,0,786,0,0,0,0,0,580,627,88,447,57,0,0,0,0,0,0,0,0,845,735,0,0,0,0,0,31,15,0,
+460,521,12,424,0,0,0,1302,0,0,0,0,0,0,0,595,0,0,0,13,548,97,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1472,452,1767,0,0,0,0,0,0,0,0,0,0,115,0,0,0,0,0,0,1543,0,1111,0,0,0,0,
+1,0,359,488,0,267,0,0,0,1983,0,0,0,0,0,0,0,1155,0,1575,0,1438,31,0,0,377,101,0,
+0,0,0,0,0,0,0,0,0,0,0,0,476,0,0,0,0,0,0,0,0,2023,0,0,0,0,0,1836,0,0,0,0,35,843,
+0,0,0,0,0,0,0,554,0,0,0,536,625,207,0,1371,0,0,0,424,785,336,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,896,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,750,0,0,0,0,238,0,0,
+0,0,0,383,0,0,0,0,0,0,0,0,603,725,11,0,0,0,0,0,0,0,0,0,476,0,0,0,0,0,1552,0,0,0,
+0,0,0,0,680,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,435,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1431,0,0,13,112,0,0,356,0,0,0,0,0,0,0,0,0,0,1963,0,0,0,1244,18,0,0,0,0,0,0,867,
+0,0,0,0,0,0,50,708,73,592,0,502,0,0,0,0,0,0,161,347,0,0,0,0,470,33,0,246,571,10,
+0,465,614,0,237,0,0,0,0,0,24,18,0,506,0,0,0,0,0,0,33,309,0,0,0,0,0,0,0,0,0,0,
+140,0,0,0,0,1056,0,0,0,1704,0,0,0,0,0,0,0,1036,0,0,0,0,0,0,0,0,0,1315,432,86,
+264,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107,0,0,0,0,0,123,927,0,0,957,1149,0,0,
+0,0,0,778,0,502,196,0,0,0,0,1312,0,0,0,0,0,0,0,855,0,0,0,0,0,0,0,0,0,0,45,1400,
+0,0,0,1003,0,0,0,0,0,1097,0,0,0,0,0,0,0,0,545,612,0,0,0,0,0,0,0,0,0,0,0,0,54,0,
+0,0,0,172,0,0,0,1029,0,0,0,0,0,0,0,0,0,568,0,0,0,732,617,0,0,974,94,989,733,0,0,
+0,0,0,0,1789,0,0,665,2015,0,0,0,0,0,0,806,287,0,0,0,0,0,1539,0,0,0,0,0,0,0,0,0,
+0,182,1563,0,0,0,0,0,0,0,0,0,484,0,0,0,0,0,1623,0,0,0,0,0,0,0,0,878,1833,0,1569,
+0,0,0,0,0,0,0,0,93,0,715,994,0,0,0,0,0,63,0,591,0,0,0,0,0,0,0,749,0,0,0,0,547,
+366,0,0,0,1747,0,0,0,0,0,0,0,89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1463,0,772,
+893,0,0,0,48,0,0,941,0,0,690,1785,106,440,0,0,0,0,0,0,0,0,0,0,32,0,332,216,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,852,0,
+0,416,564,0,918,0,1764,0,0,3,0,0,274,0,0,0,0,501,0,0,0,0,0,0,0,851,743,0,49,0,
+879,0,0,47,0,0,0,0,0,0,865,0,1202,0,0,0,0,0,0,47,272,0,0,0,0,0,0,0,0,0,0,0,1455,
+0,0,0,0,891,1911,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,761,0,0,0,0,0,0,0,0,0,407,0,
+183,0,0,490,0,0,0,0,0,0,0,35,731,0,0,0,0,0,0,0,819,0,0,0,0,0,0,0,0,0,0,0,0,0,
+575,0,0,0,0,45,818,0,0,77,222,0,0,0,0,849,1880,0,0,0,633,0,1308,0,0,0,0,0,0,0,0,
+0,0,86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,817,0,0,0,0,0,0,0,0,0,882,0,0,0,914,0,0,0,0,
+0,0,0,0,0,0,865,0,0,426,399,58,0,0,0,0,0,0,538,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,876,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,139,566,0,63,12,0,0,0,
+0,0,0,0,0,0,0,0,0,0,3,114,0,0,0,0,0,0,0,0,576,0,0,0,0,0,0,0,0,933,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,692,0,0,0,0,0,0,0,0,0,0,0,0,752,0,0,0,0,
+0,0,0,0,375,0,1011,0,0,96,0,0,0,0,0,0,0,0,0,148,0,0,0,0,0,0,0,0,0,0,0,337,56,
+666,0,246,394,0,0,0,0,0,0,0,0,437,0,0,0,506,0,0,0,0,1003,0,1163,0,328,0,0,0,0,0,
+0,0,0,1000,0,0,0,0,0,744,101,0,0,0,0,0,726,0,0,176,0,146,9,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,839,0,0,0,0,0,0,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,246,1931,29,0,0,1771,0,0,0,0,0,846,6,157,0,0,0,0,0,0,0,0,0,875,0,0,477,
+773,177,639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1747,0,0,0,0,158,873,0,659,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,391,0,0,0,0,0,0,0,0,0,0,0,0,668,883,0,78,628,0,0,0,
+0,0,0,0,0,0,0,0,0,1460,0,962,0,0,0,0,0,460,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,199,0,
+0,0,388,474,0,271,0,333,608,0,0,0,0,0,0,49,0,988,0,707,617,0,0,0,0,0,0,0,756,0,
+0,0,0,0,1583,0,0,0,0,0,0,0,0,0,0,285,0,0,0,0,0,0,0,0,0,0,0,0,0,0,344,0,0,0,0,0,
+0,0,0,515,1709,0,0,0,0,0,0,0,0,404,0,0,0,0,500,0,0,0,0,0,0,0,0,0,68,216,0,0,0,0,
+0,0,0,488,353,0,0,177,236,0,0,458,490,0,0,0,0,0,0,756,1504,0,757,0,1735,0,0,108,
+598,0,0,0,0};
+BROTLI_INTERNAL const uint8_t kStaticDictionaryHashLengths[32768] = {
+8,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,12,0,0,0,0,4,22,5,0,
+4,0,0,0,0,0,0,0,0,0,0,0,0,14,6,0,0,0,5,0,0,0,0,0,0,0,7,13,0,0,4,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,8,0,0,0,0,0,0,7,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,4,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,10,4,0,5,13,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,8,7,0,0,9,0,8,0,0,0,0,0,0,6,0,
+0,9,0,0,0,11,0,0,6,8,7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,6,8,0,0,0,0,0,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,0,0,0,8,4,13,7,0,0,0,0,0,
+7,0,5,0,0,0,0,8,5,0,5,0,0,8,7,0,0,0,0,0,0,0,0,0,0,9,0,0,0,8,0,0,0,10,4,0,5,0,4,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,8,7,0,4,9,4,0,0,0,0,0,0,
+9,0,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,7,18,0,0,0,0,4,9,0,0,4,0,6,0,0,0,6,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,8,7,0,0,0,
+0,9,0,0,0,0,0,0,0,8,6,10,6,0,0,0,4,0,6,8,6,0,0,0,4,0,0,0,0,0,5,0,0,0,6,0,0,0,0,
+10,0,12,7,0,0,0,0,0,4,0,0,0,0,0,5,0,0,8,7,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,11,0,0,0,0,0,0,0,0,0,8,7,0,0,10,0,0,0,0,0,0,0,0,6,10,0,17,0,8,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,8,6,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+7,0,0,11,4,0,5,0,0,0,0,0,0,0,0,0,0,10,5,0,6,8,5,0,0,0,0,0,0,0,0,0,0,11,5,0,0,0,
+0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,9,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,0,0,0,0,
+0,0,0,0,0,0,5,0,0,0,6,0,0,10,0,0,0,20,0,0,0,0,0,0,0,0,6,9,5,0,0,0,0,10,4,8,0,0,
+4,13,0,0,0,0,0,0,0,9,0,9,0,0,0,0,0,0,0,0,0,0,0,0,4,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,12,5,0,0,10,4,10,7,13,
+0,0,0,0,0,0,0,0,6,0,6,0,6,0,0,0,0,0,0,19,0,0,4,12,6,9,0,0,0,0,4,0,4,11,0,0,0,0,
+0,0,0,12,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,5,0,0,0,0,0,6,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,9,6,0,0,0,0,0,4,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,5,0,0,0,0,14,4,0,0,0,4,12,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,6,0,
+0,0,0,0,0,12,0,9,6,0,0,0,0,13,0,0,5,0,0,0,0,0,4,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,13,0,9,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,8,7,8,4,0,0,0,0,0,0,0,0,0,0,0,7,0,7,0,0,0,4,0,
+0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,8,4,0,0,0,0,0,6,0,7,0,
+0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,0,9,5,0,0,
+0,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,9,4,0,0,0,0,0,0,0,4,
+12,5,11,0,0,0,0,0,0,0,0,0,8,7,0,5,0,0,8,7,0,5,0,0,0,0,8,0,0,0,0,7,0,4,10,0,0,0,
+0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+13,5,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,14,5,0,0,0,7,0,0,10,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,4,0,5,0,0,0,0,8,5,0,0,0,0,0,0,9,5,9,0,0,0,0,0,0,0,0,6,9,0,
+0,4,0,0,0,7,0,0,0,6,0,0,10,4,0,0,0,0,0,6,0,0,10,0,0,0,8,5,0,0,0,0,0,0,0,0,10,0,
+0,0,0,0,18,4,12,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,7,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,4,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,8,0,0,0,0,0,0,6,0,0,0,4,10,5,0,0,0,0,0,0,0,0,0,0,
+0,4,8,7,0,0,8,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,
+0,0,0,8,6,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,8,7,0,0,0,0,8,0,12,6,0,6,0,0,0,0,9,7,11,7,0,0,0,
+0,0,0,0,0,0,0,0,0,11,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,10,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,6,0,0,0,7,0,4,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,14,0,0,0,0,0,8,4,0,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,20,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,5,0,7,0,5,0,0,10,0,0,7,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,6,0,4,9,7,0,0,0,
+0,0,7,0,0,0,0,0,0,10,0,9,0,9,0,0,0,0,0,0,0,0,4,9,0,0,0,0,6,0,0,0,0,0,0,0,0,11,4,
+0,6,0,0,0,0,0,0,8,0,8,0,0,0,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,0,0,0,0,0,13,6,0,0,11,
+0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,6,18,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,0,0,9,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,11,
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,4,0,0,0,0,8,
+6,0,0,0,0,0,0,9,6,0,0,0,0,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,
+0,6,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,6,0,6,0,0,10,6,0,0,0,7,0,0,8,0,8,7,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,12,0,12,0,0,0,11,6,0,5,0,0,12,0,12,5,0,7,11,6,0,0,11,
+0,0,0,12,0,0,4,12,7,8,6,0,0,0,0,8,5,0,0,0,0,0,0,0,4,11,0,0,6,0,7,0,0,0,0,0,0,0,
+5,0,6,0,0,0,0,8,0,10,0,0,0,0,0,0,0,0,0,0,0,9,7,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,0,10,0,0,5,0,0,12,6,0,0,0,0,0,0,10,6,0,0,0,0,8,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,5,0,0,0,0,11,0,10,6,0,0,8,6,0,0,0,6,0,7,10,6,0,
+0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,10,7,0,0,0,0,
+10,6,0,0,0,0,0,0,8,5,11,0,8,4,0,0,0,4,0,0,0,0,9,4,8,0,0,0,0,0,0,0,11,6,0,0,0,0,
+10,7,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,9,6,0,5,0,7,0,0,0,0,0,7,0,0,11,0,0,
+0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,13,0,8,6,13,0,0,0,11,7,0,7,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,0,9,6,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5,9,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,9,7,0,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,
+5,11,5,0,0,0,0,0,0,0,0,0,4,0,7,0,6,0,0,0,6,20,0,0,0,10,7,0,5,14,4,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,
+0,0,6,0,4,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,11,6,15,0,0,0,0,0,
+10,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,7,0,0,0,0,0,0,0,0,9,7,13,0,0,0,0,0,
+0,7,0,0,8,6,0,0,0,0,0,0,0,0,9,4,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,0,0,0,0,8,5,0,4,0,0,0,0,0,0,0,0,0,0,12,6,8,0,12,0,0,7,0,0,0,
+0,0,5,10,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+14,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,8,7,10,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,18,6,
+14,7,0,0,0,0,0,0,0,0,11,6,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,11,7,0,0,10,7,0,0,0,6,8,6,0,0,0,0,0,0,0,6,0,0,
+19,0,0,0,9,5,0,0,0,0,0,0,11,7,0,0,0,7,0,6,0,0,11,0,0,0,0,4,8,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,6,0,0,8,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+0,7,0,0,0,7,15,0,0,5,0,0,0,0,10,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,0,0,0,9,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,5,0,4,0,0,0,4,0,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,11,6,0,0,8,5,14,0,0,4,0,0,0,7,
+17,0,0,0,0,0,0,0,13,5,0,0,0,0,0,5,0,0,0,5,0,0,0,0,16,6,0,4,0,0,0,0,0,0,12,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,5,0,5,0,6,10,0,12,0,0,0,0,0,0,0,0,7,0,0,0,0,8,4,
+0,0,0,0,0,0,0,0,0,0,8,7,0,0,8,0,0,0,8,0,0,6,0,7,0,0,0,5,0,6,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,22,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,18,0,0,0,9,4,0,0,8,0,9,7,0,0,0,0,0,0,8,6,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,9,7,0,0,0,6,0,0,14,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,7,10,4,
+0,6,0,0,0,0,0,0,8,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,9,6,0,0,0,0,0,0,
+0,0,11,6,12,7,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,0,9,6,11,6,0,0,0,0,9,5,0,0,0,0,0,0,
+0,6,8,5,0,0,0,0,8,0,10,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,
+5,10,7,0,0,0,5,8,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,4,8,7,0,0,0,6,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,22,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,5,0,0,0,0,0,0,0,
+0,0,0,0,0,17,0,0,6,0,6,12,4,19,6,0,0,0,0,16,0,0,0,0,7,15,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,4,10,4,0,0,8,7,0,7,0,0,9,
+4,0,6,0,0,0,4,0,5,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,10,0,0,0,0,0,11,7,0,0,
+0,0,12,6,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,
+0,0,0,0,0,0,0,0,0,10,4,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,8,7,0,0,
+0,0,0,0,0,6,0,0,0,4,0,0,11,4,0,0,12,7,0,0,0,0,9,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,
+4,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,9,4,0,6,0,0,0,0,0,4,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,7,9,6,0,7,0,
+0,0,0,0,0,0,6,0,0,0,0,8,6,0,0,0,0,10,6,11,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,5,0,4,8,0,0,0,0,0,9,7,0,0,0,0,0,0,
+13,5,0,0,0,0,8,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,5,0,0,11,7,0,0,0,0,0,0,8,6,0,
+0,0,0,0,7,0,4,0,0,0,0,0,0,0,5,0,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,10,4,9,0,0,0,0,0,
+0,4,0,0,0,0,10,5,10,7,0,0,0,0,0,0,0,0,16,7,0,0,0,0,0,7,0,0,0,0,11,0,0,0,0,0,0,0,
+0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,4,0,0,0,7,0,0,0,0,0,0,13,0,0,
+0,0,0,0,0,0,0,0,7,0,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,13,7,0,7,0,4,16,0,0,0,0,6,8,7,9,7,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,8,5,0,4,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,11,7,0,0,11,
+0,0,0,0,0,9,5,0,4,0,0,0,0,9,7,8,6,0,0,0,0,0,0,10,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,7,0,0,0,0,0,0,0,0,0,0,0,4,10,6,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,0,10,7,10,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,6,8,7,12,4,0,0,0,0,0,0,0,5,14,
+0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,20,4,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,0,
+0,6,15,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,12,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,8,6,0,0,18,0,0,0,10,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,9,6,0,
+6,0,0,0,0,0,0,0,0,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,9,0,9,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,9,5,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,0,8,0,0,0,16,0,0,0,0,0,0,0,
+0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,0,0,0,11,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,11,0,0,0,9,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,7,0,7,0,6,
+0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,6,0,0,18,0,8,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,7,0,4,0,0,0,
+0,0,0,0,0,0,0,8,0,0,0,0,0,16,0,0,0,0,0,16,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,18,0,0,0,0,0,0,0,0,0,9,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,4,0,
+0,0,0,0,0,0,0,9,4,0,0,0,0,12,5,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,5,0,0,0,0,0,0,0,5,0,0,10,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,9,0,0,0,11,0,0,6,0,6,0,0,
+0,7,0,0,0,0,0,0,8,0,0,0,0,6,0,0,0,0,0,0,19,0,0,0,12,0,9,0,0,0,0,0,10,7,0,0,0,0,
+0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,16,7,12,
+0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,10,5,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,4,0,0,9,0,0,0,8,0,12,4,0,0,0,0,
+0,4,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,5,0,
+0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,8,6,0,6,0,0,0,0,0,0,
+0,4,0,0,0,0,0,6,0,0,9,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,10,6,0,0,0,0,8,
+6,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,7,0,6,
+10,7,0,0,10,5,11,6,0,0,0,0,0,7,16,0,0,0,0,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,
+0,0,0,0,0,8,7,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+8,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,12,7,0,7,0,0,0,
+0,0,0,0,6,0,0,0,0,9,0,0,0,23,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,4,0,0,11,7,10,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,7,0,0,8,7,8,0,0,0,0,0,0,0,0,0,0,0,14,5,0,0,0,0,
+0,0,0,0,18,6,8,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,11,0,0,0,9,7,12,6,0,0,0,0,0,0,0,0,
+0,0,12,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,7,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,8,7,0,0,0,6,10,0,0,0,9,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,6,
+10,7,0,0,0,7,0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0,
+0,0,0,8,7,8,6,0,0,11,7,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,8,7,0,0,0,0,0,0,0,0,
+0,5,0,0,13,0,0,0,0,5,0,0,9,7,0,0,0,0,0,0,0,4,0,0,11,0,0,7,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,12,7,19,0,8,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,10,6,8,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,7,0,0,12,0,0,0,0,6,9,6,
+14,0,0,0,0,0,0,6,0,5,0,0,8,7,0,0,0,6,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,5,0,
+7,0,0,10,0,9,7,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,0,0,0,0,5,0,6,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,9,7,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,0,11,7,0,0,13,7,
+0,0,0,0,0,0,0,0,12,0,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,11,5,0,5,13,0,8,0,
+0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,11,5,
+9,6,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,10,0,0,0,8,5,0,0,9,0,0,0,8,7,9,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,0,
+0,11,0,13,6,0,0,9,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,5,21,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,5,0,0,0,0,0,0,0,0,10,0,8,0,
+0,6,0,0,0,4,0,0,9,0,0,0,0,0,0,0,0,0,0,4,0,0,8,6,0,6,0,7,10,0,8,4,0,4,0,0,0,0,0,
+5,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,6,12,0,0,7,0,0,0,5,0,0,
+0,0,0,0,0,0,0,6,0,0,8,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+15,7,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,24,7,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,4,12,0,0,7,0,0,0,0,0,5,0,0,0,0,0,0,0,0,15,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,9,0,9,6,
+0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,8,4,0,7,0,0,0,0,0,0,0,0,
+22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,4,0,7,0,0,21,7,0,7,9,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,8,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,23,0,0,0,0,7,0,0,0,
+4,0,0,0,0,0,0,0,0,9,4,11,7,0,5,0,0,0,0,11,0,0,4,20,0,0,0,0,0,0,0,0,0,0,0,11,5,0,
+7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+21,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,11,6,0,0,0,0,0,0,0,0,9,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,5,0,4,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,
+0,0,0,10,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,7,0,0,11,7,0,0,0,0,0,0,0,4,
+0,4,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,7,0,
+0,0,0,0,0,0,0,0,6,0,0,21,6,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,14,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,8,0,0,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,
+0,0,0,8,7,0,0,11,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,7,13,7,10,4,0,
+0,0,6,0,0,0,0,0,0,0,0,0,5,10,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,0,8,4,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,12,7,0,6,0,0,10,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,
+0,0,0,0,7,0,0,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,10,5,0,6,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,9,0,11,4,0,0,0,6,0,0,0,5,12,7,0,5,0,0,0,0,0,4,0,0,0,7,0,0,0,
+0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,13,6,10,0,0,0,17,0,0,4,0,0,0,0,0,6,0,4,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,11,7,0,0,0,7,0,0,0,6,0,0,0,0,0,0,
+0,6,0,4,0,0,0,0,8,0,0,0,0,5,0,0,0,0,0,4,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,9,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,0,0,
+0,0,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,16,4,0,0,11,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+8,7,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,8,6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,
+7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,12,5,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,
+5,18,7,0,0,14,0,0,0,0,0,0,0,9,4,0,7,0,0,0,0,0,0,0,5,0,0,0,6,0,0,0,6,0,0,0,0,0,0,
+8,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,7,0,0,0,0,0,0,11,0,0,0,
+10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,14,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,14,6,0,0,0,0,11,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,10,7,0,6,0,0,9,0,9,5,0,0,0,0,0,
+0,0,0,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,8,5,0,0,0,0,0,0,0,0,0,0,11,4,0,6,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,12,4,0,6,8,6,0,0,0,0,0,0,0,0,0,0,8,0,0,5,0,0,0,0,0,0,0,7,0,0,13,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,12,7,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,13,4,0,7,0,0,0,7,0,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,0,0,0,
+9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,10,6,21,5,0,0,0,0,8,0,0,0,0,4,0,
+7,0,0,0,0,0,0,11,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,4,0,0,0,0,0,0,
+0,7,9,6,11,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,7,10,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,19,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,9,4,10,4,0,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,7,9,7,10,4,0,7,0,0,0,0,0,0,0,6,12,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,8,0,
+0,0,0,0,0,5,0,0,8,7,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,8,0,0,6,0,0,0,7,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,7,9,7,0,0,0,4,8,0,0,0,0,6,11,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,13,4,0,0,
+12,6,0,6,0,0,0,0,8,7,0,7,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,4,0,0,0,0,0,0,0,0,0,0,9,
+7,22,0,0,0,0,4,0,0,0,0,0,6,0,0,0,4,0,0,9,0,0,6,0,0,24,7,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,10,6,0,5,0,0,0,0,0,0,0,7,0,0,8,0,0,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,7,0,
+7,0,0,0,0,0,0,13,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,
+0,0,0,0,0,7,12,0,9,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,4,0,0,0,7,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,0,4,18,0,0,0,0,0,10,0,0,5,0,0,11,0,0,0,0,0,0,5,0,6,0,
+0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,10,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,
+0,0,0,5,8,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,20,7,0,0,0,0,0,0,0,0,0,0,0,4,9,0,12,
+6,8,0,14,7,0,5,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,10,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,9,6,0,7,12,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,4,0,0,9,0,
+12,6,0,5,0,0,0,6,0,4,0,6,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,0,8,0,0,0,0,4,0,0,0,0,
+10,0,0,0,0,0,0,0,8,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,12,6,20,5,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,5,0,0,0,6,13,7,0,0,0,0,15,6,0,0,0,
+6,0,0,13,7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
+10,6,0,0,0,0,0,6,0,0,0,0,9,0,0,0,0,0,19,6,0,0,0,0,0,0,0,0,0,0,13,0,11,0,0,0,0,0,
+0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,10,0,0,6,0,0,0,0,8,0,0,
+0,9,0,15,4,0,6,0,0,0,0,0,6,12,0,0,0,0,0,0,0,14,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
+0,0,0,0,0,8,7,0,0,0,0,0,6,10,0,0,0,0,0,0,0,0,7,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,10,5,0,0,0,0,8,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,12,0,0,0,10,7,0,5,0,6,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,4,0,0,0,0,0,7,0,0,0,0,0,0,0,4,9,6,0,0,0,7,0,0,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,0,4,12,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,6,9,4,0,0,8,4,0,6,
+0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,8,0,0,6,13,4,0,5,8,0,0,0,0,0,0,0,8,0,0,0,10,5,0,0,9,0,0,0,0,0,0,6,0,0,
+24,0,0,0,0,0,0,0,8,0,0,7,0,0,12,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,
+6,8,0,10,0,9,7,0,0,0,5,0,0,0,0,0,0,0,4,8,5,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,4,0,0,0,0,0,6,0,0,0,0,0,5,0,0,0,0,8,0,0,
+0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,10,4,0,0,0,0,0,0,0,6,0,0,0,4,20,0,0,7,
+10,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,9,6,0,0,0,0,0,0,0,4,
+12,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,9,4,0,5,0,0,
+0,0,0,0,0,6,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,9,0,0,0,0,7,0,0,0,0,0,6,0,5,0,0,0,0,0,0,0,0,9,0,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,17,7,0,0,13,6,14,6,0,0,0,0,
+8,0,0,0,0,0,0,7,12,7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,4,0,0,0,0,0,4,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,12,4,0,0,10,7,0,0,0,
+0,0,0,10,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,12,0,0,6,
+0,0,0,0,0,0,0,0,8,7,12,0,0,0,0,0,0,6,0,6,0,4,0,0,18,6,0,0,0,6,0,0,0,0,0,6,10,6,
+0,0,0,0,0,0,8,7,14,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,
+0,0,0,8,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,7,0,0,10,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,9,4,8,0,0,0,0,0,0,4,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,
+0,6,0,0,9,7,0,0,0,0,0,5,0,0,0,0,8,7,0,0,14,0,0,0,0,6,0,0,0,0,0,0,9,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,5,0,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,0,0,6,0,0,0,6,0,4,0,0,0,0,0,4,0,0,0,0,12,0,0,7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,12,0,16,6,0,0,0,0,0,0,11,7,0,4,8,7,0,0,0,0,0,6,0,0,0,0,16,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,10,7,0,0,0,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,13,4,0,0,10,0,0,0,0,0,0,0,0,0,19,0,0,0,
+0,0,0,0,0,0,0,0,0,0,8,6,22,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,
+5,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,18,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,14,7,0,0,11,5,0,0,0,5,0,0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,24,6,0,0,
+0,7,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,7,0,4,0,0,0,0,8,7,0,0,
+9,6,0,0,14,5,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,6,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,7,0,0,0,5,0,0,
+0,0,12,7,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,6,0,0,13,7,0,0,0,0,0,0,14,0,11,4,0,
+0,0,4,0,0,0,0,14,5,0,0,0,0,0,5,11,5,0,0,0,0,22,5,0,0,0,0,0,7,0,0,0,0,0,4,0,0,0,
+4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,17,0,10,0,0,0,8,0,0,0,19,
+5,18,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,10,6,0,6,0,0,0,0,10,4,0,4,0,
+0,0,0,0,0,14,7,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,0,9,6,12,0,0,6,0,0,0,0,0,0,0,0,
+12,0,10,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,13,0,9,7,0,0,0,0,0,0,0,0,0,0,0,7,9,7,0,0,8,0,0,0,0,0,
+22,0,0,0,0,0,0,0,23,6,14,0,0,0,0,0,0,7,0,0,0,0,11,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,8,5,0,0,0,0,0,0,0,0,0,7,11,6,21,0,0,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,0,4,9,7,0,0,0,0,0,0,12,0,0,0,0,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,9,
+0,0,0,20,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,11,7,0,0,0,0,0,0,0,6,15,0,0,
+0,0,0,0,0,0,0,0,0,0,0,12,4,0,5,0,0,0,0,0,0,11,7,17,6,0,0,0,0,0,0,15,6,0,7,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,6,0,5,
+0,0,11,0,11,7,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
+17,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,8,7,9,6,0,0,14,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,
+8,7,0,4,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,5,0,4,0,0,8,7,0,6,12,5,0,7,18,7,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+10,0,11,0,0,0,0,0,0,0,0,0,0,0,9,0,0,4,0,6,0,7,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,
+7,0,0,0,0,8,0,0,0,15,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,23,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,5,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,12,7,9,7,0,0,10,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,9,0,8,7,0,0,0,
+6,0,6,0,4,0,5,0,0,0,0,0,5,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,7,10,5,0,0,11,6,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,4,9,7,0,
+0,0,0,11,7,0,0,0,0,0,5,0,0,0,7,0,0,0,0,23,6,11,4,0,0,0,0,0,0,9,0,0,0,10,6,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,10,6,0,0,0,7,0,0,
+0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,
+6,11,7,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,6,0,0,0,5,0,6,0,6,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,8,7,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,4,10,0,8,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,10,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,10,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,11,6,0,4,0,0,14,5,0,7,0,0,0,0,0,6,16,0,0,0,0,0,0,0,10,0,0,7,15,0,0,0,11,7,0,0,
+0,0,0,0,0,0,0,0,8,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,5,0,0,0,
+0,8,0,0,6,0,0,0,0,0,0,9,5,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,6,0,
+0,0,0,0,0,0,7,0,0,0,0,15,7,0,0,0,0,8,0,0,0,14,0,0,0,0,0,0,0,16,7,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,12,6,11,7,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,
+7,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,12,0,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,8,0,0,5,8,7,10,6,0,0,0,7,0,0,0,0,12,6,
+0,0,9,0,0,0,12,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,10,0,0,0,10,5,0,0,0,0,0,0,9,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,0,0,9,5,0,4,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,9,0,0,5,0,0,8,7,8,
+6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,10,0,9,4,0,0,0,0,0,0,0,6,
+11,0,0,0,0,0,0,0,0,0,0,0,8,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,8,7,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,
+0,0,0,10,0,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,8,4,0,5,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,8,5,0,0,0,
+0,0,0,0,7,0,0,0,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,6,0,7,0,0,0,0,
+20,0,0,0,0,0,0,0,0,0,0,7,9,0,0,0,0,0,0,6,0,6,0,7,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,
+0,0,0,14,7,0,0,0,5,0,0,22,4,10,0,0,0,0,0,0,4,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,11,5,13,0,0,0,0,0,0,0,0,0,8,0,0,7,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,10,7,0,
+0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,7,14,6,0,0,0,0,9,5,
+0,0,0,0,0,6,0,0,0,5,10,0,8,6,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,8,4,0,6,0,
+0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,
+14,0,0,5,0,0,18,0,8,4,0,6,0,0,20,0,13,0,0,0,0,7,0,4,0,0,0,0,0,4,8,4,0,0,0,0,0,6,
+0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,14,0,0,0,0,0,9,7,0,0,9,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,20,0,14,0,0,4,0,6,8,5,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,4,12,7,0,6,0,0,9,7,10,5,
+0,0,8,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,18,0,0,0,14,7,0,0,0,0,0,4,
+0,0,0,0,0,0,17,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,23,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,0,0,0,0,0,0,12,7,8,4,0,0,0,0,0,0,0,0,0,6,0,0,9,5,0,0,0,7,0,0,0,
+0,0,0,0,0,0,4,10,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,18,7,
+0,0,8,0,0,5,0,0,10,0,0,0,0,0,0,6,0,0,0,0,0,5,0,7,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,
+6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,6,0,0,10,0,0,5,10,4,0,0,12,0,0,0,0,
+6,22,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,7,0,5,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,6,0,7,0,0,0,6,0,6,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,16,6,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12,7,0,0,0,0,9,0,0,0,0,6,0,0,11,0,0,0,0,0,13,0,9,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,10,7,0,0,0,7,0,6,0,
+0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,11,0,15,0,22,7,0,4,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+18,0,0,0,0,0,0,0,0,0,14,0,0,4,0,0,0,0,8,7,9,0,0,0,0,0,9,0,0,0,14,0,0,0,0,0,0,0,
+0,0,11,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,7,0,0,0,6,0,6,0,0,0,0,8,0,0,0,0,
+0,11,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,9,4,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,8,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,0,0,0,0,8,6,0,0,9,5,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,5,0,
+0,10,6,9,0,0,0,0,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+11,7,12,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,5,0,0,10,6,
+0,0,0,4,0,7,13,0,0,4,0,0,11,4,0,6,0,0,0,0,0,6,8,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,5,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,5,0,0,0,0,12,6,0,0,0,0,
+11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,11,5,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,
+7,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,6,17,0,9,0,10,6,0,6,12,0,0,4,0,0,0,
+0,0,0,0,0,0,0,8,5,12,7,0,4,0,0,0,0,0,0,0,0,0,0,11,0,9,0,10,6,11,5,0,7,0,0,8,0,0,
+7,0,4,0,0,0,7,0,0,0,0,0,0,8,6,0,0,0,6,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,11,0,0,0,0,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,10,0,0,0,0,0,8,6,0,0,0,0,0,6,12,0,0,0,0,0,
+0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,6,0,0,16,0,11,5,0,0,0,0,0,
+0,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,9,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,6,10,
+7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,9,5,0,0,0,0,8,0,9,0,0,
+0,0,0,0,0,0,7,10,0,13,0,0,6,0,0,0,0,0,0,0,0,0,6,9,4,0,0,0,0,0,0,10,0,0,0,0,0,10,
+0,0,0,0,0,0,0,10,6,11,0,0,0,0,0,9,0,0,0,0,0,0,4,0,0,0,0,0,0,10,5,0,0,0,0,0,6,0,
+0,0,0,0,0,18,4,0,7,0,0,0,0,0,0,24,0,8,6,0,7,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,0,0,0,0,10,7,0,6,0,0,0,0,0,0,0,0,8,5,10,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,
+6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,4,0,5,15,0,0,0,0,7,0,7,0,0,0,0,
+0,0,0,0,0,6,10,5,0,0,0,6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,12,0,0,0,0,0,0,0,0,
+0,0,5,0,0,0,0,0,0,14,4,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,11,0,10,4,9,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,7,0,0,0,
+0,0,0,0,0,0,0,0,7,13,7,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,8,0,10,6,0,4,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,
+0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,9,7,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,0,5,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,11,0,0,0,0,6,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,
+6,0,0,0,0,0,0,0,6,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,6,0,0,0,5,0,0,0,0,0,0,0,5,0,0,10,0,11,5,0,0,0,0,0,0,14,7,9,7,
+0,6,0,0,0,0,0,4,0,0,0,0,0,0,11,7,0,6,0,0,0,0,0,0,9,7,0,4,0,0,0,7,0,0,0,0,0,5,0,
+0,0,0,0,5,0,0,0,7,0,0,0,0,0,5,0,0,0,0,17,5,0,0,8,0,0,0,0,6,9,4,0,0,0,0,0,0,0,0,
+8,7,11,7,9,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,6,9,5,0,0,8,6,0,0,0,5,0,
+0,0,0,9,0,0,0,9,6,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,5,0,0,0,0,0,7,0,0,0,0,0,7,13,5,0,0,0,7,0,0,0,0,0,7,9,6,11,7,0,7,0,0,0,
+0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,8,5,0,0,0,5,9,4,0,0,0,0,0,0,0,0,8,4,0,0,0,0,
+24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,5,11,6,0,4,0,7,20,0,8,5,9,5,9,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,7,23,5,0,0,8,4,0,0,10,0,0,6,0,5,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,0,0,0,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,
+6,0,0,0,0,14,0,18,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,6,0,4,0,0,0,0,0,0,8,4,
+11,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,12,0,10,7,0,0,10,0,0,0,0,
+0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,6,0,0,0,0,8,
+6,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,4,0,6,0,4,0,0,0,0,0,5,0,0,
+0,0,0,0,0,0,0,7,0,0,0,7,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,17,7,11,0,0,0,0,0,0,0,0,0,0,4,12,6,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,5,12,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,6,0,0,20,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,4,
+0,0,0,5,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,6,0,4,13,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,12,6,0,7,0,0,0,0,10,0,23,6,0,0,
+0,4,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+10,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,11,0,9,7,0,0,
+0,0,0,0,0,0,0,0,9,7,0,4,0,0,0,0,8,7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,6,0,0,10,7,10,5,0,0,8,0,8,0,0,0,0,0,0,4,0,5,10,0,0,0,0,0,0,0,9,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,11,7,0,0,0,0,0,0,0,0,9,4,0,0,0,0,0,6,0,0,8,
+7,0,0,0,0,0,5,0,0,0,0,0,0,0,0,10,0,0,0,0,5,0,4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,24,7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,6,0,0,9,0,0,0,0,0,0,7,0,6,13,0,8,
+0,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,0,0,8,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,
+4,0,0,0,0,0,4,0,0,0,0,0,0,0,6,8,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,0,4,0,0,0,5,0,7,0,0,10,0,10,7,0,0,12,5,0,0,9,0,0,0,10,0,
+0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,
+12,0,0,0,0,0,8,5,13,6,0,0,0,0,0,0,9,4,0,0,0,0,8,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,14,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,17,6,0,0,0,0,12,6,0,0,0,0,8,0,0,7,0,
+7,0,4,9,0,0,6,0,0,0,6,0,0,0,0,0,0,8,7,0,0,0,0,0,0,11,0,0,4,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,18,7,0,4,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,0,11,7,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+11,0,0,0,0,0,0,0,21,0,0,6,10,0,0,0,0,0,9,0,10,0,0,0,0,0,11,0,0,0,0,6,0,0,0,0,0,
+5,0,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,4,0,0,23,7,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,9,7,0,0,0,7,
+0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,
+11,6,0,0,0,0,0,0,0,6,0,0,0,0,10,7,0,0,9,4,0,0,11,0,8,5,0,0,0,7,8,5,22,0,0,0,9,6,
+0,0,0,0,0,0,0,6,10,4,0,0,0,0,0,7,9,4,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,
+0,0,0,11,6,0,0,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,6,0,6,0,4,0,0,
+0,0,0,0,0,7,0,7,0,4,13,0,0,0,0,0,8,0,0,0,0,7,0,0,0,0,0,0,11,6,0,7,0,0,0,0,9,0,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,8,0,0,0,0,0,8,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,13,5,8,0,0,
+0,0,0,0,0,14,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,17,6,0,0,0,0,13,4,0,0,9,6,0,0,10,5,0,
+0,10,5,0,0,0,0,13,0,0,0,0,6,0,0,0,0,0,0,10,0,12,0,0,0,0,0,0,0,0,0,0,0,8,4,0,4,0,
+0,0,4,0,0,0,0,0,4,0,0,12,0,0,5,9,4,0,0,0,0,0,0,0,0,0,5,8,5,0,0,0,7,0,0,0,0,8,7,
+0,0,0,6,12,5,0,0,0,5,0,0,0,5,0,0,0,0,0,4,12,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,7,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,4,11,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,12,7,0,0,0,7,10,7,0,0,11,0,0,0,0,0,0,0,0,0,11,7,0,0,0,6,0,0,11,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,22,0,10,7,0,0,8,5,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,0,0,9,6,8,7,0,6,0,0,0,0,0,5,0,0,0,0,0,0,8,7,0,0,0,0,9,7,0,0,0,6,0,0,8,7,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,5,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,5,0,0,0,0,14,0,0,0,
+9,0,0,0,0,0,0,0,0,0,9,7,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,0,0,0,0,0,12,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,10,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,6,0,0,0,0,0,0,9,6,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,9,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,6,0,7,12,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,7,0,0,8,6,0,0,0,0,10,7,0,0,0,0,0,0,0,6,0,0,0,0,0,6,12,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,6,0,0,0,6,0,0,0,0,0,6,16,0,0,0,0,0,0,0,0,0,9,0,17,0,14,7,8,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,11,0,0,6,8,7,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,
+9,0,0,0,0,7,0,0,0,0,11,5,0,4,9,6,8,0,0,0,0,0,0,0,0,0,10,0,11,7,0,0,0,0,0,0,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,
+0,0,0,12,0,0,0,0,0,10,5,0,4,0,0,0,0,0,7,10,6,11,6,0,0,0,0,0,0,0,0,0,0,0,0,17,0,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,8,0,0,4,0,0,0,6,0,0,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,9,6,0,0,0,4,0,0,0,0,0,4,10,7,0,7,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,6,0,0,0,6,0,6,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,18,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,13,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,
+0,0,0,6,0,0,0,0,0,4,8,0,0,0,11,7,0,0,0,4,0,0,0,0,0,7,0,0,8,5,0,0,16,0,0,0,13,6,
+0,0,0,0,0,0,0,6,0,0,0,0,20,0,11,6,0,0,8,7,0,0,0,0,0,6,17,0,8,0,0,0,0,0,8,7,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,
+0,0,4,0,7,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,8,
+0,8,0,0,0,0,0,0,0,11,0,8,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,6,0,0,9,0,
+0,0,0,0,8,0,0,0,0,0,18,0,0,0,0,0,0,4,9,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,0,0,14,0,0,0,0,7,0,6,0,0,8,0,20,7,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,8,0,0,0,14,0,0,0,0,0,0,0,8,0,0,7,0,6,0,0,0,7,0,0,0,0,0,0,0,0,
+0,0,0,4,12,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,10,6,0,
+5,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,5,8,4,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,12,7,0,
+0,0,0,13,6,0,0,0,7,0,0,8,0,0,0,8,0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,0,11,5,
+0,6,0,0,8,5,0,7,0,0,0,0,0,0,0,7,0,0,0,0,8,6,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14,0,10,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,19,0,0,4,0,0,0,7,
+0,0,11,5,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,16,0,10,5,18,0,0,7,9,6,0,5,0,0,0,0,0,
+0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,5,0,0,0,7,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,4,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,23,0,0,0,0,5,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,14,0,20,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,
+11,0,0,0,0,7,0,0,0,0,15,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,7,0,0,0,0,
+0,4,0,0,0,0,10,0,0,0,0,0,9,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,10,0,11,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,11,0,0,0,0,7,0,0,0,0,0,0,8,7,0,
+4,0,0,0,0,11,0,0,0,0,0,11,0,0,5,0,0,8,7,0,4,0,7,0,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,0,0,
+0,0,4,11,5,10,7,0,7,0,0,9,6,9,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,9,4,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,8,6,0,0,0,0,11,7,0,0,0,0,0,0,0,0,0,0,11,7,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,8,5,0,0,8,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
+10,7,0,0,0,6,0,0,0,0,0,0,8,0,0,6,0,0,0,6,10,0,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,6,0,
+0,0,6,0,0,0,0,9,5,8,5,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,8,7,10,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,5,0,0,0,6,0,7,0,0,
+10,5,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,11,0,0,0,0,0,13,4,
+0,0,0,4,0,0,0,0,0,5,8,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,7,14,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,5,0,0,15,6,10,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,14,6,10,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,6,0,5,11,4,0,6,0,0,0,7,0,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,5,0,0,8,5,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,0,0,0,9,6,9,4,0,0,0,4,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,5,0,
+0,0,0,0,0,0,0,0,0,0,4,0,0,11,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,
+0,0,0,7,12,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,
+4,9,6,0,4,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,6,0,
+7,8,6,0,0,0,0,0,0,0,4,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,5,0,4,0,0,0,0,0,0,0,5,0,0,0,
+0,0,5,0,0,0,7,12,7,0,0,0,0,0,0,18,4,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,6,0,0,0,
+0,12,0,0,7,0,0,0,0,0,7,0,0,13,0,0,6,0,0,0,0,8,7,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,7,10,5,0,0,8,0,0,0,0,0,0,0,8,6,0,7,0,0,8,4,0,4,0,0,0,0,10,4,0,0,14,0,
+0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,17,0,0,0,0,0,0,6,0,0,0,0,8,6,0,0,10,5,0,0,0,0,8,
+6,0,0,0,6,0,0,0,7,0,0,0,0,0,6,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,12,0,0,0,0,6,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,4,24,0,0,
+0,0,0,12,6,0,0,10,6,0,5,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,17,7,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,11,5,9,0,8,7,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,10,7,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,5,8,7,0,0,0,
+0,8,5,0,0,0,0,10,7,0,7,0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,
+0,6,12,0,8,7,0,0,0,0,0,0,0,0,0,0,16,0,10,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,22,0,0,0,
+0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,22,0,0,6,0,0,21,0,0,0,22,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,7,8,0,0,0,0,6,14,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,8,5,0,0,11,7,0,6,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,5,0,0,0,0,0,0,0,0,0,4,0,0,8,7,0,0,0,0,8,5,11,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,8,5,0,0,10,0,0,4,13,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,0,13,6,
+0,6,0,7,0,0,8,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,15,0,0,0,10,7,0,0,0,0,0,
+7,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,19,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,6,0,5,
+0,7,0,0,0,0,0,0,0,0,0,6,0,0,11,4,0,0,0,6,0,0,13,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,
+0,7,0,0,0,0,0,0,11,7,0,0,0,0,0,6,0,0,10,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,11,6,
+0,0,0,0,0,0,0,0,10,0,0,0,0,6,0,0,0,0,0,0,8,7,0,0,0,5,0,0,0,5,0,0,0,0,0,0,0,0,0,
+0,0,0,8,7,0,0,0,0,9,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,10,0,
+0,6,0,0,13,0,0,0,0,0,0,0,9,6,0,0,8,6,8,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,
+0,9,7,0,0,0,0,0,0,11,0,0,0,10,7,0,0,0,0,0,0,0,0,9,6,0,0,12,4,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,5,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,
+16,0,0,4,0,0,0,0,0,7,0,0,0,6,0,6,0,0,11,0,0,0,0,5,0,0,0,0,0,0,0,4,8,5,0,0,0,0,0,
+0,14,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,
+0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,
+0,0,0,0,6,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,14,7,0,0,9,7,0,0,11,0,0,0,0,0,10,
+4,11,5,13,6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,5,0,0,0,0,0,4,0,0,9,0,0,0,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,6,12,5,0,0,0,6,14,0,0,0,0,0,0,0,0,0,0,4,9,4,
+0,0,0,0,0,5,0,0,0,0,0,0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,5,0,0,
+0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,11,6,0,0,13,7,0,0,13,6,0,7,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,10,6,0,4,0,0,12,6,0,0,0,0,0,0,0,0,10,6,
+0,0,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,7,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,0,
+0,0,7,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,5,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,8,7,0,0,8,5,0,0,0,4,9,5,0,0,0,7,10,6,0,0,
+0,0,0,0,9,7,0,0,8,5,8,0,8,4,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,11,7,0,0,0,7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,5,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,7,0,0,0,0,8,5,0,4,0,0,0,0,0,6,0,6,14,
+6,0,0,0,0,9,6,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,14,7,9,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,16,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,14,0,
+0,6,0,0,8,6,0,0,0,0,0,6,0,0,12,0,0,0,0,0,8,5,0,7,11,0,0,5,0,4,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,9,6,0,4,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,10,5,0,0,0,0,
+0,4,0,0,0,7,11,6,0,4,8,5,9,5,0,0,0,5,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,14,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,6,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,9,0,0,0,12,5,0,0,0,0,0,0,0,4,10,5,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,4,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,10,4,0,0,0,0,0,5,0,0,0,4,
+0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,0,10,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,7,0,0,0,0,0,0,0,0,15,0,0,0,
+0,0,0,0,0,0,0,7,0,0,0,0,0,7,10,7,9,7,0,0,0,7,0,0,8,0,0,0,0,0,0,0,9,0,0,0,8,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,8,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,15,7,12,6,0,0,0,7,0,5,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,5,0,0,0,0,
+0,0,0,6,9,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,9,7,0,0,14,0,0,0,11,7,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,11,7,0,0,0,0,8,0,0,0,0,0,0,6,8,7,0,0,0,7,10,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,10,0,0,0,0,0,0,
+6,0,6,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,11,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,10,7,0,0,0,0,9,7,0,0,0,0,0,0,13,7,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,12,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,9,6,0,0,11,0,0,
+0,0,0,14,4,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,7,0,0,
+0,0,0,6,0,7,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,20,
+7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,0,0,0,11,5,0,0,0,0,0,0,0,0,0,0,10,4,0,0,0,5,8,5,10,4,0,0,0,0,0,
+0,13,6,9,7,0,0,10,7,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,6,0,0,0,7,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,10,7,0,0,
+0,0,0,0,0,0,0,0,12,4,0,0,0,0,8,7,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,
+0,0,0,0,6,0,6,9,6,0,0,12,5,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,0,0,0,5,8,7,9,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,11,
+4,0,0,0,0,0,0,8,0,0,0,10,7,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,0,
+0,0,0,0,0,5,0,6,0,0,10,0,14,0,0,0,0,0,0,0,23,0,0,0,12,0,10,5,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,8,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,22,0,8,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,
+0,0,0,0,0,6,18,4,0,0,0,7,10,6,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,0,0,0,0,0,0,6,0,0,0,0,11,5,0,0,0,0,0,0,0,0,
+15,0,8,6,0,0,13,7,0,0,0,0,0,7,0,0,0,0,0,7,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,9,5,9,
+0,0,6,8,6,0,0,0,0,10,0,0,0,18,5,0,0,0,5,0,7,0,0,0,0,8,6,0,0,0,0,9,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,14,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,8,5,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,20,5,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,0,8,4,24,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,7,0,0,0,0,10,5,0,0,8,5,0,0,0,0,0,0,0,0,12,7,0,6,0,0,10,6,0,0,0,
+0,14,0,0,4,9,5,0,0,0,0,0,0,9,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,8,0,0,0,0,0,11,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,8,5,11,7,0,4,0,0,10,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,11,6,0,0,0,0,0,5,14,6,0,0,0,0,10,0,0,
+0,13,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,7,12,0,10,6,0,0,0,0,0,0,10,0,0,0,0,0,10,0,9,
+7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,9,7,0,0,0,
+0,0,0,0,0,0,0,0,0,24,0,11,7,0,7,0,0,0,0,0,0,8,6,0,0,0,0,0,0,8,7,0,0,0,0,0,5,0,0,
+0,6,9,0,0,0,23,5,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,18,4,0,0,11,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,9,0,0,0,11,0,0,0,23,0,0,
+0,10,4,0,0,0,0,0,7,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,7,0,0,19,0,11,0,0,0,0,0,12,7,0,
+0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,
+9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,4,0,0,0,0,10,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,22,0,8,7,10,4,11,0,13,5,8,7,9,0,8,7,0,0,0,7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,4,11,0,0,6,0,0,8,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,5,0,0,
+20,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,7,0,0,14,0,0,0,9,0,13,7,0,0,0,0,0,6,0,7,0,0,8,6,10,6,0,0,8,6,0,0,0,6,0,
+0,12,6,9,0,0,0,0,0,0,5,9,0,12,4,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,
+0,0,4,8,0,0,6,8,0,0,0,0,0,0,0,0,0,13,6,0,7,0,0,0,0,0,6,8,7,8,6,0,0,0,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,18,0,11,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,14,
+6,0,0,0,0,12,7,8,0,0,0,0,0,0,0,8,7,0,0,0,0,10,4,0,0,0,0,0,0,10,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,15,6,9,7,0,0,0,0,0,0,15,6,11,7,0,0,0,7,0,0,21,0,0,
+0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,17,6,0,0,10,5,0,5,0,0,0,0,0,0,0,0,0,7,
+0,0,10,0,0,0,0,0,0,0,0,4,11,5,0,0,0,0,16,7,0,0,0,0,0,6,0,0,8,7,0,4,0,0,10,0,0,0,
+0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,
+0,0,0,10,4,0,0,0,0,0,0,0,0,0,6,0,5,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,
+0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,10,7,0,0,0,0,0,0,0,0,8,4,0,0,10,0,0,0,0,4,0,6,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,7,17,0,0,0,0,0,
+0,0,0,0,0,0,10,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,5,0,4,0,0,0,0,0,6,0,0,0,0,0,0,10,5,0,0,
+0,5,0,0,0,0,9,0,19,7,0,0,0,0,0,7,0,0,0,0,10,6,0,0,0,6,0,5,0,0,0,0,0,0,0,0,0,6,8,
+0,0,0,0,0,11,0,0,0,0,0,0,6,0,0,0,0,0,7,9,0,15,0,0,0,0,0,0,0,0,0,0,4,0,0,0,5,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,9,0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,6,0,0,
+0,0,0,6,10,0,0,0,0,0,0,0,23,0,14,0,0,0,0,7,0,0,0,0,0,7,0,0,9,0,0,0,0,7,0,0,0,0,
+0,6,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,
+0,0,0,0,0,9,5,0,0,0,0,0,4,0,0,0,0,9,5,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,10,0,0,0,0,0,0,5,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,11,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,14,7,0,0,12,7,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,8,6,10,0,0,0,0,0,0,0,0,0,10,7,8,5,0,0,0,0,0,0,
+0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,5,0,0,9,5,0,0,0,0,0,5,0,0,0,0,0,4,0,0,0,
+0,0,0,0,0,0,0,12,4,11,0,0,0,9,0,11,7,0,0,0,0,0,0,10,6,0,0,0,6,0,0,0,0,15,5,0,0,
+11,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,4,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,8,0,0,0,19,7,0,4,0,0,9,0,0,0,0,0,10,0,
+0,6,0,0,13,0,12,6,0,0,0,0,0,0,0,0,10,7,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,13,7,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,4,9,0,0,0,10,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,10,0,23,6,0,0,0,6,8,0,0,0,0,0,0,0,0,0,17,7,0,0,0,0,11,6,22,5,0,
+0,9,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,5,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,4,11,0,9,4,0,0,
+0,7,0,7,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,11,5,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,6,0,0,0,4,0,0,0,0,
+0,0,0,0,0,7,0,0,0,4,0,0,10,4,0,0,0,0,0,0,0,7,0,7,0,0,0,6,0,0,0,0,8,6,0,6,0,6,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,22,6,12,0,0,6,0,0,0,6,0,0,0,0,0,7,0,0,0,0,11,0,0,0,
+9,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,6,0,0,0,6,0,6,0,0,8,7,0,0,0,4,9,7,19,0,0,0,0,0,0,0,0,0,9,6,10,6,0,6,0,0,0,
+4,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,6,16,7,10,6,0,0,23,6,11,7,0,4,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,10,7,0,0,0,0,0,7,0,0,0,0,0,0,15,0,10,0,0,0,14,6,0,0,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,0,0,0,5,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,5,0,0,11,5,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,0,6,0,0,10,0,0,0,0,7,0,0,0,0,0,0,10,6,0,0,0,0,8,4,0,0,0,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,12,5,0,0,0,0,
+0,6,0,0,0,0,9,6,0,0,0,0,0,0,0,6,9,0,0,0,0,6,0,0,0,0,8,7,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,10,5,0,0,0,0,0,0,8,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,0,0,0,0,0,7,0,7,0,4,0,0,10,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,13,
+7,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,13,0,0,0,0,0,0,0,0,7,10,5,0,0,0,0,0,0,9,7,0,0,8,6,9,
+5,0,0,0,0,0,6,12,0,0,0,0,0,0,0,18,6,0,0,0,0,0,0,0,0,19,7,0,4,0,0,0,0,9,5,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,7,0,0,0,0,0,0,14,0,0,0,23,7,8,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,22,0,0,7,0,0,0,0,0,0,0,0,9,7,8,4,0,
+0,0,0,0,0,0,0,8,5,0,6,0,0,0,0,0,6,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,
+8,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,8,6,0,0,11,7,0,0,0,
+0,12,0,8,6,19,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,11,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,11,7,0,0,0,0,0,4,10,0,0,0,0,0,0,0,8,7,0,0,0,0,14,0,8,0,0,6,10,0,0,
+0,0,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,13,0,0,0,0,0,0,0,11,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,5,0,0,0,6,0,0,0,5,0,7,0,0,0,
+0,0,6,0,0,21,7,0,0,9,6,0,0,0,6,0,0,13,7,0,0,0,5,0,0,0,0,0,4,0,6,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,11,5,0,6,0,0,10,5,0,0,0,0,0,0,0,0,9,6,0,0,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,0,0,0,0,0,0,6,0,0,0,0,15,4,0,0,12,7,0,0,0,6,
+0,7,0,0,8,0,9,5,0,4,0,0,0,6,0,6,0,0,23,4,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,4,0,0,8,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,12,6,0,0,0,0,0,0,10,7,0,7,0,0,0,0,0,0,0,0,0,0,
+9,0,0,0,0,0,8,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,11,5,0,0,0,6,0,6,0,0,0,0,0,0,0,6,0,
+4,0,0,0,0,0,0,0,0,0,0,0,5,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,8,7,0,0,0,6,0,6,0,
+0,0,0,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,11,0,0,0,0,0,0,0,10,5,9,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,23,7,0,0,0,0,0,7,0,0,10,6,18,0,0,0,
+0,0,0,0,8,7,0,6,0,0,0,0,0,0,8,5,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,4,12,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,13,5,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,0,11,0,0,0,0,0,0,0,0,0,
+17,5,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,8,7,0,0,0,0,0,0,0,
+0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,
+10,0,0,0,8,6,0,0,0,7,0,0,0,0,0,0,8,0,0,0,14,0,0,0,0,7,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,4,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,
+10,0,0,0,16,5,0,0,0,0,0,0,8,0,0,4,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,10,0,0,0,
+0,0,0,0,0,5,0,0,0,0,12,5,0,7,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,12,6,0,0,0,0,0,7,0,6,0,6,12,6,0,0,0,0,0,0,0,4,8,7,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,10,6,8,0,0,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+16,0,8,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,23,5,0,0,0,7,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,14,0,0,0,0,7,0,0,0,4,17,5,0,0,0,0,11,0,9,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,0,5,0,7,0,0,0,0,0,0,0,0,8,0,0,0,
+12,6,0,0,0,0,0,0,13,0,0,0,0,7,9,0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,10,7,12,0,0,0,9,0,
+0,0,14,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,15,6,0,0,23,0,0,7,0,6,0,0,0,7,0,6,
+0,0,0,0,0,0,0,6,0,6,9,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,8,7,9,4,0,0,10,0,0,0,10,
+6,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,6,0,0,0,0,0,0,9,4,
+0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,9,6,0,0,0,0,8,6,0,0,0,0,0,0,0,0,12,0,0,
+0,0,0,8,0,0,6,11,6,0,0,8,7,8,5,0,0,0,0,0,5,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,
+7,0,0,0,0,9,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,0,0,0,0,6,12,5,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,10,0,10,
+7,0,0,8,0,0,0,0,4,0,0,0,6,0,0,0,6,0,0,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,
+0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,6,0,5,0,0,0,0,8,0,0,0,10,7,0,0,0,0,10,0,0,0,
+0,0,13,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,19,7,0,4,12,0,8,0,0,0,0,6,0,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,18,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,
+0,14,0,0,4,0,0,0,6,0,0,0,6,0,0,0,7,0,0,0,0,0,0,10,4,0,0,9,7,0,0,11,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,12,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,22,5,9,7,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,9,6,0,5,0,0,0,0,0,0,10,5,0,0,8,6,0,6,10,5,0,0,0,6,0,0,0,6,0,0,20,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,0,17,4,0,7,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,
+0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,
+0,0,7,0,0,8,6,12,0,0,7,18,7,0,0,8,4,0,0,0,0,9,6,0,0,0,0,0,0,0,0,13,0,0,6,0,0,0,
+0,0,0,0,0,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,12,0,0,0,8,0,0,0,0,0,0,
+4,0,0,10,0,16,0,0,0,0,0,0,0,12,7,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,16,6,10,0,0,5,0,0,0,0,0,6,0,0,0,0,
+0,7,0,0,0,7,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,5,8,7,0,7,0,0,0,0,0,0,0,0,8,0,0,6,0,0,0,6,0,0,0,4,0,0,0,0,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,8,0,0,0,
+9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,13,5,0,5,0,0,0,7,8,4,0,0,0,0,0,0,0,
+0,12,0,0,0,0,0,0,0,0,0,0,0,8,6,0,6,0,0,11,0,0,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,11,6,0,0,10,6,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,6,0,0,0,7,0,0,9,0,8,7,11,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,9,6,10,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,10,7,0,0,0,0,0,0,11,0,9,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,15,5,12,5,
+0,0,0,0,0,0,12,7,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,12,6,0,
+0,0,0,24,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,10,4,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,11,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+0,0,8,0,0,0,0,7,0,0,0,0,0,0,10,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,
+0,0,0,0,0,14,7,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,0,6,0,0,0,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,7,20,7,11,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,7,9,6,0,0,12,7,0,0,0,0,0,0,10,0,12,0,
+0,0,0,0,0,4,9,6,13,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,5,0,0,0,0,0,0,8,0,0,0,0,0,0,
+0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,11,0,9,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,5,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,4,0,5,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,7,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,8,7,0,0,0,0,0,0,12,0,0,6,0,0,0,0,0,0,0,6,8,4,0,0,10,7,0,0,10,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,5,
+0,4,0,0,0,0,0,6,0,0,0,0,0,0,8,0,0,6,0,0,0,6,0,0,0,0,0,7,0,5,8,4,0,0,9,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,5,0,0,15,6,8,6,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,6,0,0,0,0,0,0,0,7,0,0,0,4,0,
+6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,5,0,6,12,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,12,7,0,0,0,0,
+0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,0,10,
+7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,7,8,7,9,6,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,5,12,0,
+10,5,12,6,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,5,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+11,7,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,6,0,7,0,0,0,0,8,0,8,5,0,6,0,0,0,6,0,0,0,
+0,0,0,0,6,0,6,0,6,9,0,0,5,17,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,7,0,0,0,0,16,5,0,0,0,0,0,0,0,4,0,0,0,5,11,5,0,7,0,0,0,4,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,12,0,0,0,
+0,0,12,0,0,0,0,0,0,0,0,4,10,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,20,5,0,0,
+10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,12,0,0,0,0,0,0,6,0,0,0,0,0,0,9,4,10,7,0,4,0,0,
+0,0,0,0,10,6,0,0,0,0,8,4,0,7,8,6,0,6,8,0,10,0,0,0,0,0,13,5,0,6,0,0,0,0,0,0,22,4,
+0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,10,
+5,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,10,4,0,0,10,7,0,0,0,0,0,5,0,
+5,8,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,4,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,
+4,0,0,0,4,10,0,0,6,13,7,8,0,0,0,0,0,0,7,0,0,12,7,0,0,0,0,0,0,10,5,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,13,7,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,6,0,6,
+0,0,0,0,0,0,0,0,12,0,8,4,0,0,0,0,0,4,0,4,0,0,0,0,0,0,0,5,0,0,0,0,12,5,0,0,0,7,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,10,0,0,0,20,0,0,5,0,0,10,
+7,11,7,0,0,0,0,0,0,0,0,0,0,17,0,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,10,7,0,4,0,6,0,0,24,0,0,5,0,0,0,0,8,0,0,
+0,0,0,0,0,10,5,0,4,0,6,0,0,8,0,0,0,0,0,0,4,0,6,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,
+0,0,0,6,0,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,7,
+0,0,13,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+17,7,0,0,11,6,0,0,0,0,12,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,10,0,0,4,8,6,0,0,0,
+0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,5,0,7,18,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,6,0,0,9,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,
+0,0,0,8,7,10,0,8,5,0,0,0,0,0,0,0,0,9,0,0,0,10,0,0,0,0,6,0,7,0,4,0,0,0,0,0,0,0,0,
+8,0,0,0,0,0,8,4,0,0,0,0,0,5,0,0,10,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,11,0,0,
+7,0,0,0,0,0,6,10,5,0,0,0,0,0,0,0,0,0,5,0,0,9,5,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,13,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,
+0,0,0,8,4,0,6,12,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,11,4,0,0,0,6,14,0,11,0,9,6,0,0,0,0,0,0,22,0,12,0,8,6,0,0,0,0,0,0,0,6,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,
+10,7,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,11,0,0,0,0,0,0,0,8,6,0,0,9,
+7,0,0,12,4,0,0,0,0,0,0,12,6,0,6,0,7,0,0,8,5,0,0,0,0};
+/* GENERATED CODE END */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.h
new file mode 100644
index 0000000000..e553ea5d4e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/dictionary_hash.h
@@ -0,0 +1,25 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
+#define BROTLI_ENC_DICTIONARY_HASH_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+extern const uint16_t kStaticDictionaryHashWords[32768];
+extern const uint8_t kStaticDictionaryHashLengths[32768];
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/encode.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encode.c
new file mode 100644
index 0000000000..1d225253a9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encode.c
@@ -0,0 +1,1983 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Implementation of Brotli compressor. */
+
+#include <brotli/encode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "../common/version.h"
+#include "backward_references.h"
+#include "backward_references_hq.h"
+#include "bit_cost.h"
+#include "brotli_bit_stream.h"
+#include "compress_fragment.h"
+#include "compress_fragment_two_pass.h"
+#include "dictionary_hash.h"
+#include "encoder_dict.h"
+#include "entropy_encode.h"
+#include "fast_log.h"
+#include "hash.h"
+#include "histogram.h"
+#include "memory.h"
+#include "metablock.h"
+#include "prefix.h"
+#include "state.h"
+#include "quality.h"
+#include "ringbuffer.h"
+#include "utf8_util.h"
+#include "write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
+
+static size_t InputBlockSize(BrotliEncoderState* s) {
+  return (size_t)1 << s->params.lgblock;
+}
+
+static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
+  return s->input_pos_ - s->last_processed_pos_;
+}
+
+static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  size_t block_size = InputBlockSize(s);
+  if (delta >= block_size) return 0;
+  return block_size - (size_t)delta;
+}
+
+BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
+  /* Changing parameters on the fly is not implemented yet. */
+  if (state->is_initialized_) return BROTLI_FALSE;
+  /* TODO(eustas): Validate/clamp parameters here. */
+  switch (p) {
+    case BROTLI_PARAM_MODE:
+      state->params.mode = (BrotliEncoderMode)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_QUALITY:
+      state->params.quality = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGWIN:
+      state->params.lgwin = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGBLOCK:
+      state->params.lgblock = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
+      if ((value != 0) && (value != 1)) return BROTLI_FALSE;
+      state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_SIZE_HINT:
+      state->params.size_hint = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LARGE_WINDOW:
+      state->params.large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NPOSTFIX:
+      state->params.dist.distance_postfix_bits = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NDIRECT:
+      state->params.dist.num_direct_distance_codes = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_STREAM_OFFSET:
+      if (value > (1u << 30)) return BROTLI_FALSE;
+      state->params.stream_offset = value;
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+/* Wraps 64-bit input position to 32-bit ring-buffer position preserving
+   "not-a-first-lap" feature. */
+static uint32_t WrapPosition(uint64_t position) {
+  uint32_t result = (uint32_t)position;
+  uint64_t gb = position >> 30;
+  if (gb > 2) {
+    /* Wrap every 2GiB; The first 3GB are continuous. */
+    result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
+  }
+  return result;
+}
+
+static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
+  MemoryManager* m = &s->memory_manager_;
+  if (s->storage_size_ < size) {
+    BROTLI_FREE(m, s->storage_);
+    s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
+    s->storage_size_ = size;
+  }
+  return s->storage_;
+}
+
+static size_t HashTableSize(size_t max_table_size, size_t input_size) {
+  size_t htsize = 256;
+  while (htsize < max_table_size && htsize < input_size) {
+    htsize <<= 1;
+  }
+  return htsize;
+}
+
+static int* GetHashTable(BrotliEncoderState* s, int quality,
+                         size_t input_size, size_t* table_size) {
+  /* Use smaller hash table when input.size() is smaller, since we
+     fill the table, incurring O(hash table size) overhead for
+     compression, and if the input is short, we won't need that
+     many hash table entries anyway. */
+  MemoryManager* m = &s->memory_manager_;
+  const size_t max_table_size = MaxHashTableSize(quality);
+  size_t htsize = HashTableSize(max_table_size, input_size);
+  int* table;
+  BROTLI_DCHECK(max_table_size >= 256);
+  if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    /* Only odd shifts are supported by fast-one-pass. */
+    if ((htsize & 0xAAAAA) == 0) {
+      htsize <<= 1;
+    }
+  }
+
+  if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
+    table = s->small_table_;
+  } else {
+    if (htsize > s->large_table_size_) {
+      s->large_table_size_ = htsize;
+      BROTLI_FREE(m, s->large_table_);
+      s->large_table_ = BROTLI_ALLOC(m, int, htsize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
+    }
+    table = s->large_table_;
+  }
+
+  *table_size = htsize;
+  memset(table, 0, htsize * sizeof(*table));
+  return table;
+}
+
+static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
+    uint16_t* last_bytes, uint8_t* last_bytes_bits) {
+  if (large_window) {
+    *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
+    *last_bytes_bits = 14;
+  } else {
+    if (lgwin == 16) {
+      *last_bytes = 0;
+      *last_bytes_bits = 1;
+    } else if (lgwin == 17) {
+      *last_bytes = 1;
+      *last_bytes_bits = 7;
+    } else if (lgwin > 17) {
+      *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
+      *last_bytes_bits = 4;
+    } else {
+      *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
+      *last_bytes_bits = 7;
+    }
+  }
+}
+
+/* TODO(eustas): move to compress_fragment.c? */
+/* Initializes the command and distance prefix codes for the first block. */
+static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
+  static const uint8_t kDefaultCommandDepths[128] = {
+    0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+    0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
+    7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
+    7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
+    4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  };
+  static const uint16_t kDefaultCommandBits[128] = {
+    0,   0,   8,   9,   3,  35,   7,   71,
+    39, 103,  23,  47, 175, 111, 239,   31,
+    0,   0,   0,   4,  12,   2,  10,    6,
+    13,  29,  11,  43,  27,  59,  87,   55,
+    15,  79, 319, 831, 191, 703, 447,  959,
+    0,  14,   1,  25,   5,  21,  19,   51,
+    119, 159,  95, 223, 479, 991,  63,  575,
+    127, 639, 383, 895, 255, 767, 511, 1023,
+    14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
+    2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
+    767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
+  };
+  static const uint8_t kDefaultCommandCode[] = {
+    0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
+    0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
+    0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
+    0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
+    0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
+  };
+  static const size_t kDefaultCommandCodeNumBits = 448;
+  COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
+  COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
+
+  /* Initialize the pre-compressed form of the command and distance prefix
+     codes. */
+  COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
+  s->cmd_code_numbits = kDefaultCommandCodeNumBits;
+}
+
+/* Decide about the context map based on the ability of the prediction
+   ability of the previous byte UTF8-prefix on the next byte. The
+   prediction ability is calculated as Shannon entropy. Here we need
+   Shannon entropy instead of 'BitsEntropy' since the prefix will be
+   encoded with the remaining 6 bits of the following byte, and
+   BitsEntropy will assume that symbol to be stored alone using Huffman
+   coding. */
+static void ChooseContextMap(int quality,
+                             uint32_t* bigram_histo,
+                             size_t* num_literal_contexts,
+                             const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapContinuation[64] = {
+    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
+    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+
+  uint32_t monogram_histo[3] = { 0 };
+  uint32_t two_prefix_histo[6] = { 0 };
+  size_t total;
+  size_t i;
+  size_t dummy;
+  double entropy[4];
+  for (i = 0; i < 9; ++i) {
+    monogram_histo[i % 3] += bigram_histo[i];
+    two_prefix_histo[i % 6] += bigram_histo[i];
+  }
+  entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
+  entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
+                ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
+  entropy[3] = 0;
+  for (i = 0; i < 3; ++i) {
+    entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
+  }
+
+  total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
+  BROTLI_DCHECK(total != 0);
+  entropy[0] = 1.0 / (double)total;
+  entropy[1] *= entropy[0];
+  entropy[2] *= entropy[0];
+  entropy[3] *= entropy[0];
+
+  if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
+    /* 3 context models is a bit slower, don't use it at lower qualities. */
+    entropy[3] = entropy[1] * 10;
+  }
+  /* If expected savings by symbol are less than 0.2 bits, skip the
+     context modeling -- in exchange for faster decoding speed. */
+  if (entropy[1] - entropy[2] < 0.2 &&
+      entropy[1] - entropy[3] < 0.2) {
+    *num_literal_contexts = 1;
+  } else if (entropy[2] - entropy[3] < 0.02) {
+    *num_literal_contexts = 2;
+    *literal_context_map = kStaticContextMapSimpleUTF8;
+  } else {
+    *num_literal_contexts = 3;
+    *literal_context_map = kStaticContextMapContinuation;
+  }
+}
+
+/* Decide if we want to use a more complex static context map containing 13
+   context values, based on the entropy reduction of histograms over the
+   first 5 bits of literals. */
+static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map,
+    uint32_t* arena) {
+  static const uint32_t kStaticContextMapComplexUTF8[64] = {
+    11, 11, 12, 12, /* 0 special */
+    0, 0, 0, 0, /* 4 lf */
+    1, 1, 9, 9, /* 8 space */
+    2, 2, 2, 2, /* !, first after space/lf and after something else. */
+    1, 1, 1, 1, /* " */
+    8, 3, 3, 3, /* % */
+    1, 1, 1, 1, /* ({[ */
+    2, 2, 2, 2, /* }]) */
+    8, 4, 4, 4, /* :; */
+    8, 7, 4, 4, /* . */
+    8, 0, 0, 0, /* > */
+    3, 3, 3, 3, /* [0..9] */
+    5, 5, 10, 5, /* [A-Z] */
+    5, 5, 10, 5,
+    6, 6, 6, 6, /* [a-z] */
+    6, 6, 6, 6,
+  };
+  BROTLI_UNUSED(quality);
+  /* Try the more complex static context map only for long data. */
+  if (size_hint < (1 << 20)) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t end_pos = start_pos + length;
+    /* To make entropy calculations faster, we collect histograms
+       over the 5 most significant bits of literals. One histogram
+       without context and 13 additional histograms for each context value. */
+    uint32_t* BROTLI_RESTRICT const combined_histo = arena;
+    uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
+    uint32_t total = 0;
+    double entropy[3];
+    size_t dummy;
+    size_t i;
+    ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
+    memset(arena, 0, sizeof(arena[0]) * 32 * 14);
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      const size_t stride_end_pos = start_pos + 64;
+      uint8_t prev2 = input[start_pos & mask];
+      uint8_t prev1 = input[(start_pos + 1) & mask];
+      size_t pos;
+      /* To make the analysis of the data faster we only examine 64 byte long
+         strides at every 4kB intervals. */
+      for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
+            BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
+        ++total;
+        ++combined_histo[literal >> 3];
+        ++context_histo[(context << 5) + (literal >> 3)];
+        prev2 = prev1;
+        prev1 = literal;
+      }
+    }
+    entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
+    entropy[2] = 0;
+    for (i = 0; i < 13; ++i) {
+      entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
+    }
+    entropy[0] = 1.0 / (double)total;
+    entropy[1] *= entropy[0];
+    entropy[2] *= entropy[0];
+    /* The triggering heuristics below were tuned by compressing the individual
+       files of the silesia corpus. If we skip this kind of context modeling
+       for not very well compressible input (i.e. entropy using context modeling
+       is 60% of maximal entropy) or if expected savings by symbol are less
+       than 0.2 bits, then in every case when it triggers, the final compression
+       ratio is improved. Note however that this heuristics might be too strict
+       for some cases and could be tuned further. */
+    if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
+      return BROTLI_FALSE;
+    } else {
+      *num_literal_contexts = 13;
+      *literal_context_map = kStaticContextMapComplexUTF8;
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+static void DecideOverLiteralContextModeling(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map,
+    uint32_t* arena) {
+  if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
+    return;
+  } else if (ShouldUseComplexStaticContextMap(
+      input, start_pos, length, mask, quality, size_hint,
+      num_literal_contexts, literal_context_map, arena)) {
+    /* Context map was already set, nothing else to do. */
+  } else {
+    /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
+       UTF8 data faster we only examine 64 byte long strides at every 4kB
+       intervals. */
+    const size_t end_pos = start_pos + length;
+    uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
+    memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      static const int lut[4] = { 0, 0, 1, 2 };
+      const size_t stride_end_pos = start_pos + 64;
+      int prev = lut[input[start_pos & mask] >> 6] * 3;
+      size_t pos;
+      for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        ++bigram_prefix_histo[prev + lut[literal >> 6]];
+        prev = lut[literal >> 6] * 3;
+      }
+    }
+    ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
+                     literal_context_map);
+  }
+}
+
+static BROTLI_BOOL ShouldCompress(
+    const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
+    const size_t bytes, const size_t num_literals, const size_t num_commands) {
+  /* TODO(eustas): find more precise minimal block overhead. */
+  if (bytes <= 2) return BROTLI_FALSE;
+  if (num_commands < (bytes >> 8) + 2) {
+    if ((double)num_literals > 0.99 * (double)bytes) {
+      uint32_t literal_histo[256] = { 0 };
+      static const uint32_t kSampleRate = 13;
+      static const double kMinEntropy = 7.92;
+      const double bit_cost_threshold =
+          (double)bytes * kMinEntropy / kSampleRate;
+      size_t t = (bytes + kSampleRate - 1) / kSampleRate;
+      uint32_t pos = (uint32_t)last_flush_pos;
+      size_t i;
+      for (i = 0; i < t; i++) {
+        ++literal_histo[data[pos & mask]];
+        pos += kSampleRate;
+      }
+      if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
+        return BROTLI_FALSE;
+      }
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+/* Chooses the literal context mode for a metablock */
+static ContextType ChooseContextMode(const BrotliEncoderParams* params,
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length) {
+  /* We only do the computation for the option of something else than
+     CONTEXT_UTF8 for the highest qualities */
+  if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
+      !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
+    return CONTEXT_SIGNED;
+  }
+  return CONTEXT_UTF8;
+}
+
+static void WriteMetaBlockInternal(MemoryManager* m,
+                                   const uint8_t* data,
+                                   const size_t mask,
+                                   const uint64_t last_flush_pos,
+                                   const size_t bytes,
+                                   const BROTLI_BOOL is_last,
+                                   ContextType literal_context_mode,
+                                   const BrotliEncoderParams* params,
+                                   const uint8_t prev_byte,
+                                   const uint8_t prev_byte2,
+                                   const size_t num_literals,
+                                   const size_t num_commands,
+                                   Command* commands,
+                                   const int* saved_dist_cache,
+                                   int* dist_cache,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
+  uint16_t last_bytes;
+  uint8_t last_bytes_bits;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  BrotliEncoderParams block_params = *params;
+
+  if (bytes == 0) {
+    /* Write the ISLAST and ISEMPTY bits. */
+    BrotliWriteBits(2, 3, storage_ix, storage);
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  if (!ShouldCompress(data, mask, last_flush_pos, bytes,
+                      num_literals, num_commands)) {
+    /* Restore the distance cache, as its last update by
+       CreateBackwardReferences is now unused. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask, bytes,
+                                     storage_ix, storage);
+    return;
+  }
+
+  BROTLI_DCHECK(*storage_ix <= 14);
+  last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
+  last_bytes_bits = (uint8_t)(*storage_ix);
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
+                             bytes, mask, is_last, params,
+                             commands, num_commands,
+                             storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
+                                bytes, mask, is_last, params,
+                                commands, num_commands,
+                                storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else {
+    MetaBlockSplit mb;
+    InitMetaBlockSplit(&mb);
+    if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
+      size_t num_literal_contexts = 1;
+      const uint32_t* literal_context_map = NULL;
+      if (!params->disable_literal_context_modeling) {
+        /* TODO(eustas): pull to higher level and reuse. */
+        uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
+        if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
+        DecideOverLiteralContextModeling(
+            data, wrapped_last_flush_pos, bytes, mask, params->quality,
+            params->size_hint, &num_literal_contexts,
+            &literal_context_map, arena);
+        BROTLI_FREE(m, arena);
+      }
+      BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
+          prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
+          literal_context_map, commands, num_commands, &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    } else {
+      BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    }
+    if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
+      /* The number of distance symbols effectively used for distance
+         histograms. It might be less than distance alphabet size
+         for "Large Window Brotli" (32-bit). */
+      BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
+    }
+    BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
+                         prev_byte, prev_byte2,
+                         is_last,
+                         &block_params,
+                         literal_context_mode,
+                         commands, num_commands,
+                         &mb,
+                         storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    DestroyMetaBlockSplit(m, &mb);
+  }
+  if (bytes + 4 < (*storage_ix >> 3)) {
+    /* Restore the distance cache and last byte. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    storage[0] = (uint8_t)last_bytes;
+    storage[1] = (uint8_t)(last_bytes >> 8);
+    *storage_ix = last_bytes_bits;
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask,
+                                     bytes, storage_ix, storage);
+  }
+}
+
+static void ChooseDistanceParams(BrotliEncoderParams* params) {
+  uint32_t distance_postfix_bits = 0;
+  uint32_t num_direct_distance_codes = 0;
+
+  if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
+    uint32_t ndirect_msb;
+    if (params->mode == BROTLI_MODE_FONT) {
+      distance_postfix_bits = 1;
+      num_direct_distance_codes = 12;
+    } else {
+      distance_postfix_bits = params->dist.distance_postfix_bits;
+      num_direct_distance_codes = params->dist.num_direct_distance_codes;
+    }
+    ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
+    if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
+        num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
+        (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
+      distance_postfix_bits = 0;
+      num_direct_distance_codes = 0;
+    }
+  }
+
+  BrotliInitDistanceParams(&params->dist, distance_postfix_bits,
+                           num_direct_distance_codes, params->large_window);
+}
+
+static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
+  MemoryManager* m = &s->memory_manager_;
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  if (s->is_initialized_) return BROTLI_TRUE;
+
+  s->last_bytes_bits_ = 0;
+  s->last_bytes_ = 0;
+  s->flint_ = BROTLI_FLINT_DONE;
+  s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+
+  SanitizeParams(&s->params);
+  s->params.lgblock = ComputeLgBlock(&s->params);
+  ChooseDistanceParams(&s->params);
+
+  if (s->params.stream_offset != 0) {
+    s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
+    /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
+    s->dist_cache_[0] = -16;
+    s->dist_cache_[1] = -16;
+    s->dist_cache_[2] = -16;
+    s->dist_cache_[3] = -16;
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+  }
+
+  RingBufferSetup(&s->params, &s->ringbuffer_);
+
+  /* Initialize last byte with stream header. */
+  {
+    int lgwin = s->params.lgwin;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+        s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+      lgwin = BROTLI_MAX(int, lgwin, 18);
+    }
+    if (s->params.stream_offset == 0) {
+      EncodeWindowBits(lgwin, s->params.large_window,
+                       &s->last_bytes_, &s->last_bytes_bits_);
+    } else {
+      /* Bigger values have the same effect, but could cause overflows. */
+      s->params.stream_offset = BROTLI_MIN(size_t,
+          s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
+    }
+  }
+
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    InitCommandPrefixCodes(s->one_pass_arena_);
+  } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  }
+
+  s->is_initialized_ = BROTLI_TRUE;
+  return BROTLI_TRUE;
+}
+
+static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
+  params->mode = BROTLI_DEFAULT_MODE;
+  params->large_window = BROTLI_FALSE;
+  params->quality = BROTLI_DEFAULT_QUALITY;
+  params->lgwin = BROTLI_DEFAULT_WINDOW;
+  params->lgblock = 0;
+  params->stream_offset = 0;
+  params->size_hint = 0;
+  params->disable_literal_context_modeling = BROTLI_FALSE;
+  BrotliInitSharedEncoderDictionary(&params->dictionary);
+  params->dist.distance_postfix_bits = 0;
+  params->dist.num_direct_distance_codes = 0;
+  params->dist.alphabet_size_max =
+      BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
+  params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
+  params->dist.max_distance = BROTLI_MAX_DISTANCE;
+}
+
+static void BrotliEncoderCleanupParams(MemoryManager* m,
+    BrotliEncoderParams* params) {
+  BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
+}
+
+static void BrotliEncoderInitState(BrotliEncoderState* s) {
+  BrotliEncoderInitParams(&s->params);
+  s->input_pos_ = 0;
+  s->num_commands_ = 0;
+  s->num_literals_ = 0;
+  s->last_insert_len_ = 0;
+  s->last_flush_pos_ = 0;
+  s->last_processed_pos_ = 0;
+  s->prev_byte_ = 0;
+  s->prev_byte2_ = 0;
+  s->storage_size_ = 0;
+  s->storage_ = 0;
+  HasherInit(&s->hasher_);
+  s->large_table_ = NULL;
+  s->large_table_size_ = 0;
+  s->one_pass_arena_ = NULL;
+  s->two_pass_arena_ = NULL;
+  s->command_buf_ = NULL;
+  s->literal_buf_ = NULL;
+  s->total_in_ = 0;
+  s->next_out_ = NULL;
+  s->available_out_ = 0;
+  s->total_out_ = 0;
+  s->stream_state_ = BROTLI_STREAM_PROCESSING;
+  s->is_last_block_emitted_ = BROTLI_FALSE;
+  s->is_initialized_ = BROTLI_FALSE;
+
+  RingBufferInit(&s->ringbuffer_);
+
+  s->commands_ = 0;
+  s->cmd_alloc_size_ = 0;
+
+  /* Initialize distance cache. */
+  s->dist_cache_[0] = 4;
+  s->dist_cache_[1] = 11;
+  s->dist_cache_[2] = 15;
+  s->dist_cache_[3] = 16;
+  /* Save the state of the distance cache in case we need to restore it for
+     emitting an uncompressed block. */
+  memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+}
+
+BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
+      sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
+  if (state == NULL) {
+    /* BROTLI_DUMP(); */
+    return 0;
+  }
+  BrotliInitMemoryManager(
+      &state->memory_manager_, alloc_func, free_func, opaque);
+  BrotliEncoderInitState(state);
+  return state;
+}
+
+#ifdef BROTLI_REPORTING
+/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
+void BrotliEncoderOnFinish(const BrotliEncoderState* s);
+#define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s);
+#else
+#if !defined(BROTLI_ENCODER_ON_FINISH)
+#define BROTLI_ENCODER_ON_FINISH(s) (void)(s);
+#endif
+#endif
+
+static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
+  MemoryManager* m = &s->memory_manager_;
+
+  BROTLI_ENCODER_ON_FINISH(s);
+
+  if (BROTLI_IS_OOM(m)) {
+    BrotliWipeOutMemoryManager(m);
+    return;
+  }
+
+  BROTLI_FREE(m, s->storage_);
+  BROTLI_FREE(m, s->commands_);
+  RingBufferFree(m, &s->ringbuffer_);
+  DestroyHasher(m, &s->hasher_);
+  BROTLI_FREE(m, s->large_table_);
+  BROTLI_FREE(m, s->one_pass_arena_);
+  BROTLI_FREE(m, s->two_pass_arena_);
+  BROTLI_FREE(m, s->command_buf_);
+  BROTLI_FREE(m, s->literal_buf_);
+  BrotliEncoderCleanupParams(m, &s->params);
+}
+
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    BrotliEncoderCleanupState(state);
+    BrotliBootstrapFree(state, &state->memory_manager_);
+  }
+}
+
+/*
+   Copies the given input data to the internal ring buffer of the compressor.
+   No processing of the data occurs at this time and this function can be
+   called multiple times before calling WriteBrotliData() to process the
+   accumulated input. At most input_block_size() bytes of input data can be
+   copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+ */
+static void CopyInputToRingBuffer(BrotliEncoderState* s,
+                                  const size_t input_size,
+                                  const uint8_t* input_buffer) {
+  RingBuffer* ringbuffer_ = &s->ringbuffer_;
+  MemoryManager* m = &s->memory_manager_;
+  RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
+  if (BROTLI_IS_OOM(m)) return;
+  s->input_pos_ += input_size;
+
+  /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
+     hashing not depend on uninitialized data. This makes compression
+     deterministic and it prevents uninitialized memory warnings in Valgrind.
+     Even without erasing, the output would be valid (but nondeterministic).
+
+     Background information: The compressor stores short (at most 8 bytes)
+     substrings of the input already read in a hash table, and detects
+     repetitions by looking up such substrings in the hash table. If it
+     can find a substring, it checks whether the substring is really there
+     in the ring buffer (or it's just a hash collision). Should the hash
+     table become corrupt, this check makes sure that the output is
+     still valid, albeit the compression ratio would be bad.
+
+     The compressor populates the hash table from the ring buffer as it's
+     reading new bytes from the input. However, at the last few indexes of
+     the ring buffer, there are not enough bytes to build full-length
+     substrings from. Since the hash table always contains full-length
+     substrings, we erase with dummy zeros here to make sure that those
+     substrings will contain zeros at the end instead of uninitialized
+     data.
+
+     Please note that erasing is not necessary (because the
+     memory region is already initialized since he ring buffer
+     has a `tail' that holds a copy of the beginning,) so we
+     skip erasing if we have already gone around at least once in
+     the ring buffer.
+
+     Only clear during the first round of ring-buffer writes. On
+     subsequent rounds data in the ring-buffer would be affected. */
+  if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
+    /* This is the first time when the ring buffer is being written.
+       We clear 7 bytes just after the bytes that have been copied from
+       the input buffer.
+
+       The ring-buffer has a "tail" that holds a copy of the beginning,
+       but only once the ring buffer has been fully written once, i.e.,
+       pos <= mask. For the first time, we need to write values
+       in this tail (where index may be larger than mask), so that
+       we have exactly defined behavior and don't read uninitialized
+       memory. Due to performance reasons, hashing reads data using a
+       LOAD64, which can go 7 bytes beyond the bytes written in the
+       ring-buffer. */
+    memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
+  }
+}
+
+/* Marks all input as processed.
+   Returns true if position wrapping occurs. */
+static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
+  s->last_processed_pos_ = s->input_pos_;
+  return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
+}
+
+static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
+                              uint32_t* wrapped_last_processed_pos) {
+  Command* last_command = &s->commands_[s->num_commands_ - 1];
+  const uint8_t* data = s->ringbuffer_.buffer_;
+  const uint32_t mask = s->ringbuffer_.mask_;
+  uint64_t max_backward_distance =
+      (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
+  uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
+  uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
+  uint64_t max_distance = last_processed_pos < max_backward_distance ?
+      last_processed_pos : max_backward_distance;
+  uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
+  uint32_t distance_code = CommandRestoreDistanceCode(last_command,
+                                                      &s->params.dist);
+  const CompoundDictionary* dict = &s->params.dictionary.compound;
+  size_t compound_dictionary_size = dict->total_size;
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
+      distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
+    if (cmd_dist <= max_distance) {
+      while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+             data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
+        last_command->copy_len_++;
+        (*bytes)--;
+        (*wrapped_last_processed_pos)++;
+      }
+    } else {
+      if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
+          last_copy_len < cmd_dist - max_distance) {
+        size_t address =
+            compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
+            (size_t)last_copy_len;
+        size_t br_index = 0;
+        size_t br_offset;
+        const uint8_t* chunk;
+        size_t chunk_length;
+        while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
+        br_offset = address - dict->chunk_offsets[br_index];
+        chunk = dict->chunk_source[br_index];
+        chunk_length =
+            dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
+        while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+               chunk[br_offset]) {
+          last_command->copy_len_++;
+          (*bytes)--;
+          (*wrapped_last_processed_pos)++;
+          if (++br_offset == chunk_length) {
+            br_index++;
+            br_offset = 0;
+            if (br_index != dict->num_chunks) {
+              chunk = dict->chunk_source[br_index];
+              chunk_length = dict->chunk_offsets[br_index + 1] -
+                  dict->chunk_offsets[br_index];
+            } else {
+              break;
+            }
+          }
+        }
+      }
+    }
+    /* The copy length is at most the metablock size, and thus expressible. */
+    GetLengthCode(last_command->insert_len_,
+                  (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
+                           (int)(last_command->copy_len_ >> 25)),
+                  TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
+                  &last_command->cmd_prefix_);
+  }
+}
+
+/*
+   Processes the accumulated input data and sets |*out_size| to the length of
+   the new output meta-block, or to zero if no new output meta-block has been
+   created (in this case the processed input data is buffered internally).
+   If |*out_size| is positive, |*output| points to the start of the output
+   data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
+   always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
+   to 7 bits of the last byte of output. To force encoder to dump the remaining
+   bits use WriteMetadata() to append an empty meta-data block.
+   Returns BROTLI_FALSE if the size of the input data is larger than
+   input_block_size().
+ */
+static BROTLI_BOOL EncodeData(
+    BrotliEncoderState* s, const BROTLI_BOOL is_last,
+    const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  uint32_t bytes = (uint32_t)delta;
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint8_t* data;
+  uint32_t mask;
+  MemoryManager* m = &s->memory_manager_;
+  ContextType literal_context_mode;
+  ContextLut literal_context_lut;
+  BROTLI_BOOL fast_compress =
+      s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY;
+
+  data = s->ringbuffer_.buffer_;
+  mask = s->ringbuffer_.mask_;
+
+  if (delta == 0) {  /* No new input; still might want to flush or finish. */
+    if (!data) {  /* No input has been processed so far. */
+      if (is_last) {  /* Emit complete finalized stream. */
+        BROTLI_DCHECK(s->last_bytes_bits_ <= 14);
+        s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_);
+        s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u);
+        s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_;
+        s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8);
+        *output = s->tiny_buf_.u8;
+        *out_size = (s->last_bytes_bits_ + 7u) >> 3u;
+        return BROTLI_TRUE;
+      } else {  /* No data, not last -> no-op. */
+        *out_size = 0;
+        return BROTLI_TRUE;
+      }
+    } else {
+      /* Fast compress performs flush every block -> flush is no-op. */
+      if (!is_last && (!force_flush || fast_compress)) {  /* Another no-op. */
+        *out_size = 0;
+        return BROTLI_TRUE;
+      }
+    }
+  }
+  BROTLI_DCHECK(data);
+
+  if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
+  /* Adding more blocks after "last" block is forbidden. */
+  if (s->is_last_block_emitted_) return BROTLI_FALSE;
+  if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
+
+  if (delta > InputBlockSize(s)) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
+      !s->command_buf_) {
+    s->command_buf_ =
+        BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+    s->literal_buf_ =
+        BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
+        BROTLI_IS_NULL(s->literal_buf_)) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  if (fast_compress) {
+    uint8_t* storage;
+    size_t storage_ix = s->last_bytes_bits_;
+    size_t table_size;
+    int* table;
+
+    storage = GetBrotliStorage(s, 2 * bytes + 503);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    table = GetHashTable(s, s->params.quality, bytes, &table_size);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+      BrotliCompressFragmentFast(
+          s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          table, table_size,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    } else {
+      BrotliCompressFragmentTwoPass(
+          s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          s->command_buf_, s->literal_buf_,
+          table, table_size,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    }
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    UpdateLastProcessedPos(s);
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+
+  {
+    /* Theoretical max number of commands is 1 per 2 bytes. */
+    size_t newsize = s->num_commands_ + bytes / 2 + 1;
+    if (newsize > s->cmd_alloc_size_) {
+      Command* new_commands;
+      /* Reserve a bit more memory to allow merging with a next block
+         without reallocation: that would impact speed. */
+      newsize += (bytes / 4) + 16;
+      s->cmd_alloc_size_ = newsize;
+      new_commands = BROTLI_ALLOC(m, Command, newsize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
+      if (s->commands_) {
+        memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
+        BROTLI_FREE(m, s->commands_);
+      }
+      s->commands_ = new_commands;
+    }
+  }
+
+  InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
+      wrapped_last_processed_pos, bytes, is_last);
+
+  literal_context_mode = ChooseContextMode(
+      &s->params, data, WrapPosition(s->last_flush_pos_),
+      mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
+  literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+  if (s->num_commands_ && s->last_insert_len_ == 0) {
+    ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
+  }
+
+  if (s->params.quality == ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else {
+    BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+  }
+
+  {
+    const size_t max_length = MaxMetablockSize(&s->params);
+    const size_t max_literals = max_length / 8;
+    const size_t max_commands = max_length / 8;
+    const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
+    /* If maximal possible additional block doesn't fit metablock, flush now. */
+    /* TODO(eustas): Postpone decision until next block arrives? */
+    const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
+        processed_bytes + InputBlockSize(s) <= max_length);
+    /* If block splitting is not used, then flush as soon as there is some
+       amount of commands / literals produced. */
+    const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
+        s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
+        s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
+    if (!is_last && !force_flush && !should_flush &&
+        next_input_fits_metablock &&
+        s->num_literals_ < max_literals &&
+        s->num_commands_ < max_commands) {
+      /* Merge with next input block. Everything will happen later. */
+      if (UpdateLastProcessedPos(s)) {
+        HasherReset(&s->hasher_);
+      }
+      *out_size = 0;
+      return BROTLI_TRUE;
+    }
+  }
+
+  /* Create the last insert-only command. */
+  if (s->last_insert_len_ > 0) {
+    InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
+    s->num_literals_ += s->last_insert_len_;
+    s->last_insert_len_ = 0;
+  }
+
+  if (!is_last && s->input_pos_ == s->last_flush_pos_) {
+    /* We have no new input data and we don't have to finish the stream, so
+       nothing to do. */
+    *out_size = 0;
+    return BROTLI_TRUE;
+  }
+  BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
+  BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
+  BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
+  {
+    const uint32_t metablock_size =
+        (uint32_t)(s->input_pos_ - s->last_flush_pos_);
+    uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
+    size_t storage_ix = s->last_bytes_bits_;
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    WriteMetaBlockInternal(
+        m, data, mask, s->last_flush_pos_, metablock_size, is_last,
+        literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
+        s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
+        s->dist_cache_, &storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    s->last_flush_pos_ = s->input_pos_;
+    if (UpdateLastProcessedPos(s)) {
+      HasherReset(&s->hasher_);
+    }
+    if (s->last_flush_pos_ > 0) {
+      s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
+    }
+    if (s->last_flush_pos_ > 1) {
+      s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
+    }
+    s->num_commands_ = 0;
+    s->num_literals_ = 0;
+    /* Save the state of the distance cache in case we need to restore it for
+       emitting an uncompressed block. */
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Dumps remaining output bits and metadata header to |header|.
+   Returns number of produced bytes.
+   REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
+   REQUIRED: |block_size| <= (1 << 24). */
+static size_t WriteMetadataHeader(
+    BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
+  size_t storage_ix;
+  storage_ix = s->last_bytes_bits_;
+  header[0] = (uint8_t)s->last_bytes_;
+  header[1] = (uint8_t)(s->last_bytes_ >> 8);
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  BrotliWriteBits(2, 3, &storage_ix, header);
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  if (block_size == 0) {
+    BrotliWriteBits(2, 0, &storage_ix, header);
+  } else {
+    uint32_t nbits = (block_size == 1) ? 0 :
+        (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
+    uint32_t nbytes = (nbits + 7) / 8;
+    BrotliWriteBits(2, nbytes, &storage_ix, header);
+    BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
+  }
+  return (storage_ix + 7u) >> 3;
+}
+
+size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
+  /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
+  size_t num_large_blocks = input_size >> 14;
+  size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
+  size_t result = input_size + overhead;
+  if (input_size == 0) return 2;
+  return (result < input_size) ? 0 : result;
+}
+
+/* Wraps data to uncompressed brotli stream with minimal window size.
+   |output| should point at region with at least BrotliEncoderMaxCompressedSize
+   addressable bytes.
+   Returns the length of stream. */
+static size_t MakeUncompressedStream(
+    const uint8_t* input, size_t input_size, uint8_t* output) {
+  size_t size = input_size;
+  size_t result = 0;
+  size_t offset = 0;
+  if (input_size == 0) {
+    output[0] = 6;
+    return 1;
+  }
+  output[result++] = 0x21;  /* window bits = 10, is_last = false */
+  output[result++] = 0x03;  /* empty metadata, padding */
+  while (size > 0) {
+    uint32_t nibbles = 0;
+    uint32_t chunk_size;
+    uint32_t bits;
+    chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
+    if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
+    bits =
+        (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
+    output[result++] = (uint8_t)bits;
+    output[result++] = (uint8_t)(bits >> 8);
+    output[result++] = (uint8_t)(bits >> 16);
+    if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
+    memcpy(&output[result], &input[offset], chunk_size);
+    result += chunk_size;
+    offset += chunk_size;
+    size -= chunk_size;
+  }
+  output[result++] = 3;
+  return result;
+}
+
+BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
+    size_t* encoded_size,
+    uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]) {
+  BrotliEncoderState* s;
+  size_t out_size = *encoded_size;
+  const uint8_t* input_start = input_buffer;
+  uint8_t* output_start = encoded_buffer;
+  size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
+  if (out_size == 0) {
+    /* Output buffer needs at least one byte. */
+    return BROTLI_FALSE;
+  }
+  if (input_size == 0) {
+    /* Handle the special case of empty input. */
+    *encoded_size = 1;
+    *encoded_buffer = 6;
+    return BROTLI_TRUE;
+  }
+
+  s = BrotliEncoderCreateInstance(0, 0, 0);
+  if (!s) {
+    return BROTLI_FALSE;
+  } else {
+    size_t available_in = input_size;
+    const uint8_t* next_in = input_buffer;
+    size_t available_out = *encoded_size;
+    uint8_t* next_out = encoded_buffer;
+    size_t total_out = 0;
+    BROTLI_BOOL result = BROTLI_FALSE;
+    /* TODO(eustas): check that parameters are sane. */
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
+    if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
+    }
+    result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!BrotliEncoderIsFinished(s)) result = 0;
+    *encoded_size = total_out;
+    BrotliEncoderDestroyInstance(s);
+    if (!result || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return BROTLI_TRUE;
+  }
+fallback:
+  *encoded_size = 0;
+  if (!max_out_size) return BROTLI_FALSE;
+  if (out_size >= max_out_size) {
+    *encoded_size =
+        MakeUncompressedStream(input_start, input_size, output_start);
+    return BROTLI_TRUE;
+  }
+  return BROTLI_FALSE;
+}
+
+static void InjectBytePaddingBlock(BrotliEncoderState* s) {
+  uint32_t seal = s->last_bytes_;
+  size_t seal_bits = s->last_bytes_bits_;
+  uint8_t* destination;
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+  /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
+  seal |= 0x6u << seal_bits;
+  seal_bits += 6;
+  /* If we have already created storage, then append to it.
+     Storage is valid until next block is being compressed. */
+  if (s->next_out_) {
+    destination = s->next_out_ + s->available_out_;
+  } else {
+    destination = s->tiny_buf_.u8;
+    s->next_out_ = destination;
+  }
+  destination[0] = (uint8_t)seal;
+  if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
+  if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
+  s->available_out_ += (seal_bits + 7) >> 3;
+}
+
+/* Fills the |total_out|, if it is not NULL. */
+static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) {
+  if (total_out) {
+    /* Saturating conversion uint64_t -> size_t */
+    size_t result = (size_t)-1;
+    if (s->total_out_ < result) {
+      result = (size_t)s->total_out_;
+    }
+    *total_out = result;
+  }
+}
+
+/* Injects padding bits or pushes compressed data to output.
+   Returns false if nothing is done. */
+static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->last_bytes_bits_ != 0) {
+    InjectBytePaddingBlock(s);
+    return BROTLI_TRUE;
+  }
+
+  if (s->available_out_ != 0 && *available_out != 0) {
+    size_t copy_output_size =
+        BROTLI_MIN(size_t, s->available_out_, *available_out);
+    memcpy(*next_out, s->next_out_, copy_output_size);
+    *next_out += copy_output_size;
+    *available_out -= copy_output_size;
+    s->next_out_ += copy_output_size;
+    s->available_out_ -= copy_output_size;
+    s->total_out_ += copy_output_size;
+    SetTotalOut(s, total_out);
+    return BROTLI_TRUE;
+  }
+
+  return BROTLI_FALSE;
+}
+
+static void CheckFlushComplete(BrotliEncoderState* s) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->available_out_ == 0) {
+    s->stream_state_ = BROTLI_STREAM_PROCESSING;
+    s->next_out_ = 0;
+  }
+}
+
+static BROTLI_BOOL BrotliEncoderCompressStreamFast(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out) {
+  const size_t block_size_limit = (size_t)1 << s->params.lgwin;
+  const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
+      BROTLI_MIN(size_t, *available_in, block_size_limit));
+  uint32_t* tmp_command_buf = NULL;
+  uint32_t* command_buf = NULL;
+  uint8_t* tmp_literal_buf = NULL;
+  uint8_t* literal_buf = NULL;
+  MemoryManager* m = &s->memory_manager_;
+  if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
+      s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
+      s->command_buf_ =
+          BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+      s->literal_buf_ =
+          BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
+          BROTLI_IS_NULL(s->literal_buf_)) {
+        return BROTLI_FALSE;
+      }
+    }
+    if (s->command_buf_) {
+      command_buf = s->command_buf_;
+      literal_buf = s->literal_buf_;
+    } else {
+      tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
+      tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
+          BROTLI_IS_NULL(tmp_literal_buf)) {
+        return BROTLI_FALSE;
+      }
+      command_buf = tmp_command_buf;
+      literal_buf = tmp_literal_buf;
+    }
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+
+    /* Compress block only when internal output buffer is empty, stream is not
+       finished, there is no pending flush request, and there is either
+       additional input or pending operation. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING &&
+        (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
+      size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
+      BROTLI_BOOL is_last =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
+      BROTLI_BOOL force_flush =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
+      size_t max_out_size = 2 * block_size + 503;
+      BROTLI_BOOL inplace = BROTLI_TRUE;
+      uint8_t* storage = NULL;
+      size_t storage_ix = s->last_bytes_bits_;
+      size_t table_size;
+      int* table;
+
+      if (force_flush && block_size == 0) {
+        s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        continue;
+      }
+      if (max_out_size <= *available_out) {
+        storage = *next_out;
+      } else {
+        inplace = BROTLI_FALSE;
+        storage = GetBrotliStorage(s, max_out_size);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      storage[0] = (uint8_t)s->last_bytes_;
+      storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+      table = GetHashTable(s, s->params.quality, block_size, &table_size);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+      if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+        BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
+            is_last, table, table_size, &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      } else {
+        BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
+            is_last, command_buf, literal_buf, table, table_size,
+            &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      if (block_size != 0) {
+        *next_in += block_size;
+        *available_in -= block_size;
+        s->total_in_ += block_size;
+      }
+      if (inplace) {
+        size_t out_bytes = storage_ix >> 3;
+        BROTLI_DCHECK(out_bytes <= *available_out);
+        BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
+        *next_out += out_bytes;
+        *available_out -= out_bytes;
+        s->total_out_ += out_bytes;
+        SetTotalOut(s, total_out);
+      } else {
+        size_t out_bytes = storage_ix >> 3;
+        s->next_out_ = storage;
+        s->available_out_ = out_bytes;
+      }
+      s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+      s->last_bytes_bits_ = storage_ix & 7u;
+
+      if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+      if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+      continue;
+    }
+    break;
+  }
+  BROTLI_FREE(m, tmp_command_buf);
+  BROTLI_FREE(m, tmp_literal_buf);
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProcessMetadata(
+    BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (*available_in > (1u << 24)) return BROTLI_FALSE;
+  /* Switch to metadata block workflow, if required. */
+  if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+    s->remaining_metadata_bytes_ = (uint32_t)*available_in;
+    s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
+  }
+  if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
+      s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+    if (s->available_out_ != 0) break;
+
+    if (s->input_pos_ != s->last_flush_pos_) {
+      BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
+          &s->available_out_, &s->next_out_);
+      if (!result) return BROTLI_FALSE;
+      continue;
+    }
+
+    if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
+      s->next_out_ = s->tiny_buf_.u8;
+      s->available_out_ =
+          WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
+      s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
+      continue;
+    } else {
+      /* Exit workflow only when there is no more input and no more output.
+         Otherwise client may continue producing empty metadata blocks. */
+      if (s->remaining_metadata_bytes_ == 0) {
+        s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+        s->stream_state_ = BROTLI_STREAM_PROCESSING;
+        break;
+      }
+      if (*available_out) {
+        /* Directly copy input to output. */
+        uint32_t copy = (uint32_t)BROTLI_MIN(
+            size_t, s->remaining_metadata_bytes_, *available_out);
+        memcpy(*next_out, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->total_in_ += copy;  /* not actually data input, though */
+        s->remaining_metadata_bytes_ -= copy;
+        *next_out += copy;
+        *available_out -= copy;
+      } else {
+        /* This guarantees progress in "TakeOutput" workflow. */
+        uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
+        s->next_out_ = s->tiny_buf_.u8;
+        memcpy(s->next_out_, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->total_in_ += copy;  /* not actually data input, though */
+        s->remaining_metadata_bytes_ -= copy;
+        s->available_out_ = copy;
+      }
+      continue;
+    }
+  }
+
+  return BROTLI_TRUE;
+}
+
+static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
+  if (s->params.size_hint == 0) {
+    uint64_t delta = UnprocessedInputSize(s);
+    uint64_t tail = available_in;
+    uint32_t limit = 1u << 30;
+    uint32_t total;
+    if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
+      total = limit;
+    } else {
+      total = (uint32_t)(delta + tail);
+    }
+    s->params.size_hint = total;
+  }
+}
+
+BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out) {
+  if (!EnsureInitialized(s)) return BROTLI_FALSE;
+
+  /* Unfinished metadata block; check requirements. */
+  if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
+    if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
+    if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
+  }
+
+  if (op == BROTLI_OPERATION_EMIT_METADATA) {
+    UpdateSizeHint(s, 0);  /* First data metablock might be emitted here. */
+    return ProcessMetadata(
+        s, available_in, next_in, available_out, next_out, total_out);
+  }
+
+  if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
+      s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
+        available_out, next_out, total_out);
+  }
+  while (BROTLI_TRUE) {
+    size_t remaining_block_size = RemainingInputBlockSize(s);
+    /* Shorten input to flint size. */
+    if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
+      remaining_block_size = (size_t)s->flint_;
+    }
+
+    if (remaining_block_size != 0 && *available_in != 0) {
+      size_t copy_input_size =
+          BROTLI_MIN(size_t, remaining_block_size, *available_in);
+      CopyInputToRingBuffer(s, copy_input_size, *next_in);
+      *next_in += copy_input_size;
+      *available_in -= copy_input_size;
+      s->total_in_ += copy_input_size;
+      if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
+      continue;
+    }
+
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      /* Exit the "emit flint" workflow. */
+      if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
+        CheckFlushComplete(s);
+        if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+          s->flint_ = BROTLI_FLINT_DONE;
+        }
+      }
+      continue;
+    }
+
+    /* Compress data only when internal output buffer is empty, stream is not
+       finished and there is no pending flush request. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+      if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
+        BROTLI_BOOL is_last = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
+        BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
+        BROTLI_BOOL result;
+        /* Force emitting (uncompressed) piece containing flint. */
+        if (!is_last && s->flint_ == 0) {
+          s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
+          force_flush = BROTLI_TRUE;
+        }
+        UpdateSizeHint(s, *available_in);
+        result = EncodeData(s, is_last, force_flush,
+            &s->available_out_, &s->next_out_);
+        if (!result) return BROTLI_FALSE;
+        if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+        continue;
+      }
+    }
+    break;
+  }
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
+      !BrotliEncoderHasMoreOutput(s));
+}
+
+BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->available_out_ != 0);
+}
+
+const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
+  size_t consumed_size = s->available_out_;
+  uint8_t* result = s->next_out_;
+  if (*size) {
+    consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
+  }
+  if (consumed_size) {
+    s->next_out_ += consumed_size;
+    s->available_out_ -= consumed_size;
+    s->total_out_ += consumed_size;
+    CheckFlushComplete(s);
+    *size = consumed_size;
+  } else {
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+uint32_t BrotliEncoderVersion(void) {
+  return BROTLI_VERSION;
+}
+
+BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
+    BrotliSharedDictionaryType type, size_t size,
+    const uint8_t data[BROTLI_ARRAY_PARAM(size)], int quality,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  ManagedDictionary* managed_dictionary = NULL;
+  if (type != BROTLI_SHARED_DICTIONARY_RAW &&
+      type != BROTLI_SHARED_DICTIONARY_SERIALIZED) {
+    return NULL;
+  }
+  managed_dictionary =
+      BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
+  if (managed_dictionary == NULL) {
+    return NULL;
+  }
+  if (type == BROTLI_SHARED_DICTIONARY_RAW) {
+    managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
+        &managed_dictionary->memory_manager_, data, size);
+  } else {
+    SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
+        &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
+    managed_dictionary->dictionary = (uint32_t*)dict;
+    if (dict != NULL) {
+      BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
+          &managed_dictionary->memory_manager_, data, size, quality, dict);
+      if (!ok) {
+        BrotliFree(&managed_dictionary->memory_manager_, dict);
+        managed_dictionary->dictionary = NULL;
+      }
+    }
+  }
+  if (managed_dictionary->dictionary == NULL) {
+    BrotliDestroyManagedDictionary(managed_dictionary);
+    return NULL;
+  }
+  return (BrotliEncoderPreparedDictionary*)managed_dictionary;
+}
+
+void BrotliEncoderDestroyPreparedDictionary(
+    BrotliEncoderPreparedDictionary* dictionary) {
+  ManagedDictionary* dict = (ManagedDictionary*)dictionary;
+  if (!dictionary) return;
+  /* First field of dictionary structs. */
+  /* Only managed dictionaries are eligible for destruction by this method. */
+  if (dict->magic != kManagedDictionaryMagic) {
+    return;
+  }
+  if (dict->dictionary == NULL) {
+    /* This should never ever happen. */
+  } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) {
+    DestroyPreparedDictionary(
+        &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
+  } else if (*dict->dictionary == kSharedDictionaryMagic) {
+    BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
+        (SharedEncoderDictionary*)dict->dictionary);
+    BrotliFree(&dict->memory_manager_, dict->dictionary);
+  } else {
+    /* There is also kPreparedDictionaryMagic, but such instances should be
+     * constructed and destroyed by different means. */
+  }
+  dict->dictionary = NULL;
+  BrotliDestroyManagedDictionary(dict);
+}
+
+BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
+    const BrotliEncoderPreparedDictionary* dictionary) {
+  /* First field of dictionary structs */
+  const BrotliEncoderPreparedDictionary* dict = dictionary;
+  uint32_t magic = *((const uint32_t*)dict);
+  SharedEncoderDictionary* current = NULL;
+  if (magic == kManagedDictionaryMagic) {
+    /* Unwrap managed dictionary. */
+    ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
+    magic = *managed_dictionary->dictionary;
+    dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
+  }
+  current = &state->params.dictionary;
+  if (magic == kPreparedDictionaryMagic ||
+      magic == kLeanPreparedDictionaryMagic) {
+    const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
+    if (!AttachPreparedDictionary(&current->compound, prepared)) {
+      return BROTLI_FALSE;
+    }
+  } else if (magic == kSharedDictionaryMagic) {
+    const SharedEncoderDictionary* attached =
+        (const SharedEncoderDictionary*)dict;
+    BROTLI_BOOL was_default = !current->contextual.context_based &&
+        current->contextual.num_dictionaries == 1 &&
+        current->contextual.dict[0]->hash_table_words ==
+        kStaticDictionaryHashWords &&
+        current->contextual.dict[0]->hash_table_lengths ==
+        kStaticDictionaryHashLengths;
+    BROTLI_BOOL new_default = !attached->contextual.context_based &&
+        attached->contextual.num_dictionaries == 1 &&
+        attached->contextual.dict[0]->hash_table_words ==
+        kStaticDictionaryHashWords &&
+        attached->contextual.dict[0]->hash_table_lengths ==
+        kStaticDictionaryHashLengths;
+    size_t i;
+    if (state->is_initialized_) return BROTLI_FALSE;
+    current->max_quality =
+        BROTLI_MIN(int, current->max_quality, attached->max_quality);
+    for (i = 0; i < attached->compound.num_chunks; i++) {
+      if (!AttachPreparedDictionary(&current->compound,
+          attached->compound.chunks[i])) {
+        return BROTLI_FALSE;
+      }
+    }
+    if (!new_default) {
+      if (!was_default) return BROTLI_FALSE;
+      /* Copy by value, but then set num_instances_ to 0 because their memory
+      is managed by attached, not by current */
+      current->contextual = attached->contextual;
+      current->contextual.num_instances_ = 0;
+    }
+  } else {
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin,
+                                            size_t input_size) {
+  BrotliEncoderParams params;
+  BrotliEncoderInitParams(&params);
+  params.quality = quality;
+  params.lgwin = lgwin;
+  params.size_hint = input_size;
+  params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS;
+  SanitizeParams(&params);
+  params.lgblock = ComputeLgBlock(&params);
+  ChooseHasher(&params, &params.hasher);
+  if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    size_t state_size = sizeof(BrotliEncoderState);
+    size_t block_size = BROTLI_MIN(size_t, input_size, (1ul << params.lgwin));
+    size_t hash_table_size =
+        HashTableSize(MaxHashTableSize(params.quality), block_size);
+    size_t hash_size =
+        (hash_table_size < (1u << 10)) ? 0 : sizeof(int) * hash_table_size;
+    size_t cmdbuf_size = params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY ?
+        5 * BROTLI_MIN(size_t, block_size, 1ul << 17) : 0;
+    if (params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+      state_size += sizeof(BrotliOnePassArena);
+    } else {
+      state_size += sizeof(BrotliTwoPassArena);
+    }
+    return hash_size + cmdbuf_size + state_size;
+  } else {
+    size_t short_ringbuffer_size = (size_t)1 << params.lgblock;
+    int ringbuffer_bits = ComputeRbBits(&params);
+    size_t ringbuffer_size = input_size < short_ringbuffer_size ?
+        input_size : (1u << ringbuffer_bits) + short_ringbuffer_size;
+    size_t hash_size[4] = {0};
+    size_t metablock_size =
+        BROTLI_MIN(size_t, input_size, MaxMetablockSize(&params));
+    size_t inputblock_size =
+        BROTLI_MIN(size_t, input_size, (size_t)1 << params.lgblock);
+    size_t cmdbuf_size = metablock_size * 2 + inputblock_size * 6;
+    size_t outbuf_size = metablock_size * 2 + 503;
+    size_t histogram_size = 0;
+    HasherSize(&params, BROTLI_TRUE, input_size, hash_size);
+    if (params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+      cmdbuf_size = BROTLI_MIN(size_t, cmdbuf_size,
+          MAX_NUM_DELAYED_SYMBOLS * sizeof(Command) + inputblock_size * 12);
+    }
+    if (params.quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
+      /* Only a very rough estimation, based on enwik8. */
+      histogram_size = 200 << 20;
+    } else if (params.quality >= MIN_QUALITY_FOR_BLOCK_SPLIT) {
+      size_t literal_histograms =
+          BROTLI_MIN(size_t, metablock_size / 6144, 256);
+      size_t command_histograms =
+          BROTLI_MIN(size_t, metablock_size / 6144, 256);
+      size_t distance_histograms =
+          BROTLI_MIN(size_t, metablock_size / 6144, 256);
+      histogram_size = literal_histograms * sizeof(HistogramLiteral) +
+                       command_histograms * sizeof(HistogramCommand) +
+                       distance_histograms * sizeof(HistogramDistance);
+    }
+    return (ringbuffer_size +
+            hash_size[0] + hash_size[1] + hash_size[2] + hash_size[3] +
+            cmdbuf_size +
+            outbuf_size +
+            histogram_size);
+  }
+}
+size_t BrotliEncoderGetPreparedDictionarySize(
+    const BrotliEncoderPreparedDictionary* prepared_dictionary) {
+  /* First field of dictionary structs */
+  const BrotliEncoderPreparedDictionary* prepared = prepared_dictionary;
+  uint32_t magic = *((const uint32_t*)prepared);
+  size_t overhead = 0;
+  if (magic == kManagedDictionaryMagic) {
+    const ManagedDictionary* managed = (const ManagedDictionary*)prepared;
+    overhead = sizeof(ManagedDictionary);
+    magic = *managed->dictionary;
+    prepared = (const BrotliEncoderPreparedDictionary*)managed->dictionary;
+  }
+
+  if (magic == kPreparedDictionaryMagic) {
+    const PreparedDictionary* dictionary =
+        (const PreparedDictionary*)prepared;
+    /* Keep in sync with step 3 of CreatePreparedDictionary */
+    return sizeof(PreparedDictionary) + dictionary->source_size +
+        (sizeof(uint32_t) << dictionary->slot_bits) +
+        (sizeof(uint16_t) << dictionary->bucket_bits) +
+        (sizeof(uint32_t) * dictionary->num_items) + overhead;
+  } else if (magic == kLeanPreparedDictionaryMagic) {
+    const PreparedDictionary* dictionary =
+        (const PreparedDictionary*)prepared;
+    /* Keep in sync with step 3 of CreatePreparedDictionary */
+    return sizeof(PreparedDictionary) + sizeof(uint8_t*) +
+        (sizeof(uint32_t) << dictionary->slot_bits) +
+        (sizeof(uint16_t) << dictionary->bucket_bits) +
+        (sizeof(uint32_t) * dictionary->num_items) + overhead;
+  } else if (magic == kSharedDictionaryMagic) {
+    const SharedEncoderDictionary* dictionary =
+        (const SharedEncoderDictionary*)prepared;
+    const CompoundDictionary* compound = &dictionary->compound;
+    const ContextualEncoderDictionary* contextual = &dictionary->contextual;
+    size_t result = sizeof(*dictionary);
+    size_t i;
+    size_t num_instances;
+    const BrotliEncoderDictionary* instances;
+    for (i = 0; i < compound->num_prepared_instances_; i++) {
+      size_t size = BrotliEncoderGetPreparedDictionarySize(
+          (const BrotliEncoderPreparedDictionary*)
+          compound->prepared_instances_[i]);
+      if (!size) return 0;  /* error */
+      result += size;
+    }
+    if (contextual->context_based) {
+      num_instances = contextual->num_instances_;
+      instances = contextual->instances_;
+      result += sizeof(*instances) * num_instances;
+    } else {
+      num_instances = 1;
+      instances = &contextual->instance_;
+    }
+    for (i = 0; i < num_instances; i++) {
+      const BrotliEncoderDictionary* dict = &instances[i];
+      result += dict->trie.pool_capacity * sizeof(BrotliTrieNode);
+      if (dict->hash_table_data_words_) {
+        result += sizeof(kStaticDictionaryHashWords);
+      }
+      if (dict->hash_table_data_lengths_) {
+        result += sizeof(kStaticDictionaryHashLengths);
+      }
+      if (dict->buckets_data_) {
+        result += sizeof(*dict->buckets_data_) * dict->buckets_alloc_size_;
+      }
+      if (dict->dict_words_data_) {
+        result += sizeof(*dict->dict_words) * dict->dict_words_alloc_size_;
+      }
+      if (dict->words_instance_) {
+        result += sizeof(*dict->words_instance_);
+        /* data_size not added here: it is never allocated by the
+           SharedEncoderDictionary, instead it always points to the file
+           already loaded in memory. So if the caller wants to include
+           this memory as well, add the size of the loaded dictionary
+           file to this. */
+      }
+    }
+    return result + overhead;
+  }
+  return 0;  /* error */
+}
+
+#if defined(BROTLI_TEST)
+size_t MakeUncompressedStreamForTest(const uint8_t*, size_t, uint8_t*);
+size_t MakeUncompressedStreamForTest(
+    const uint8_t* input, size_t input_size, uint8_t* output) {
+  return MakeUncompressedStream(input, input_size, output);
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.c
new file mode 100644
index 0000000000..0c93e009a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.c
@@ -0,0 +1,636 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "encoder_dict.h"
+
+#include <stdlib.h>  /* malloc, free */
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/shared_dictionary_internal.h"
+#include "../common/transform.h"
+#include "compound_dictionary.h"
+#include "dictionary_hash.h"
+#include "memory.h"
+#include "quality.h"
+#include "hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NUM_HASH_BITS 15u
+#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
+
+static void BrotliTrieInit(BrotliTrie* trie) {
+  trie->pool_capacity = 0;
+  trie->pool_size = 0;
+  trie->pool = 0;
+
+  /* Set up the root node */
+  trie->root.single = 0;
+  trie->root.len_ = 0;
+  trie->root.idx_ = 0;
+  trie->root.sub = 0;
+}
+
+static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
+  BrotliFree(m, trie->pool);
+}
+
+/* Initializes to RFC 7932 static dictionary / transforms. */
+static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
+  dict->words = BrotliGetDictionary();
+  dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
+
+  dict->hash_table_words = kStaticDictionaryHashWords;
+  dict->hash_table_lengths = kStaticDictionaryHashLengths;
+  dict->buckets = kStaticDictionaryBuckets;
+  dict->dict_words = kStaticDictionaryWords;
+
+  dict->cutoffTransformsCount = kCutoffTransformsCount;
+  dict->cutoffTransforms = kCutoffTransforms;
+
+  dict->parent = 0;
+
+  dict->hash_table_data_words_ = 0;
+  dict->hash_table_data_lengths_ = 0;
+  dict->buckets_alloc_size_ = 0;
+  dict->buckets_data_ = 0;
+  dict->dict_words_alloc_size_ = 0;
+  dict->dict_words_data_ = 0;
+  dict->words_instance_ = 0;
+  dict->has_words_heavy = BROTLI_FALSE;
+  BrotliTrieInit(&dict->trie);
+}
+
+static void BrotliDestroyEncoderDictionary(MemoryManager* m,
+    BrotliEncoderDictionary* dict) {
+  BrotliFree(m, dict->hash_table_data_words_);
+  BrotliFree(m, dict->hash_table_data_lengths_);
+  BrotliFree(m, dict->buckets_data_);
+  BrotliFree(m, dict->dict_words_data_);
+  BrotliFree(m, dict->words_instance_);
+  BrotliTrieFree(m, &dict->trie);
+}
+
+/* Word length must be at least 4 bytes */
+static uint32_t Hash(const uint8_t* data, int bits) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - bits);
+}
+
+/* Theoretical max possible word size after transform */
+#define kTransformedBufferSize \
+    (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
+
+/* To be safe buffer must have at least kTransformedBufferSize */
+static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
+    const BrotliTransforms* transforms,
+    const BrotliEncoderDictionary* dict,
+    uint8_t* buffer, size_t* size) {
+  const uint8_t* dict_word = &dict->words->data[
+      dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
+  *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
+      transforms, transform);
+}
+
+static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
+  DictWord result;
+  result.len = len;
+  result.transform = transform;
+  result.idx = idx;
+  return result;
+}
+
+static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
+                                BrotliTrieNode** keep) {
+  uint32_t result;
+  uint32_t keep_index = 0;
+  if (keep && *keep != &trie->root) {
+    /* Optional node to keep, since address may change after re-allocating */
+    keep_index = (uint32_t)(*keep - trie->pool);
+  }
+  if (trie->pool_size == 0) {
+    /* Have a dummy node in the front. We do not want the result to be 0, it
+    must be at least 1, 0 represents "null pointer" */
+    trie->pool_size = 1;
+  }
+  BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
+                         trie->pool_size + num);
+  if (BROTLI_IS_OOM(m)) return 0;
+  /* Init the new nodes to empty */
+  memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
+  result = (uint32_t)trie->pool_size;
+  trie->pool_size += num;
+  if (keep && *keep != &trie->root) {
+    *keep = trie->pool + keep_index;
+  }
+  return result;
+}
+
+/**
+ * len and idx: payload for last node
+ * word, size: the string
+ * index: position in the string
+ */
+static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
+    uint32_t idx, const uint8_t* word, size_t size, int index,
+    BrotliTrieNode* node, BrotliTrie* trie) {
+  BrotliTrieNode* child = 0;
+  uint8_t c;
+  if ((size_t)index == size) {
+    if (!node->len_ || idx < node->idx_) {
+      node->len_ = len;
+      node->idx_ = idx;
+    }
+    return BROTLI_TRUE;
+  }
+  c = word[index];
+  if (node->single && c != node->c) {
+    BrotliTrieNode old = trie->pool[node->sub];
+    uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    node->single = 0;
+    node->sub = new_nodes;
+    trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
+    trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
+        old;
+  }
+  if (!node->sub) {
+    uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    node->single = 1;
+    node->c = c;
+    node->sub = new_node;
+  }
+  if (node->single) {
+    child = &trie->pool[node->sub];
+  } else {
+    if (!trie->pool[node->sub + (c >> 4)].sub) {
+      uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      trie->pool[node->sub + (c >> 4)].sub = new_nodes;
+    }
+    child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
+  }
+  return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
+}
+
+static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
+                          const uint8_t* word, size_t size, BrotliTrie* trie) {
+  return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
+}
+
+const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
+                                    const BrotliTrieNode* node, uint8_t c) {
+  BrotliTrieNode* temp_node;
+  if (node->single) {
+    if (node->c == c) return &trie->pool[node->sub];
+    return 0;
+  }
+  if (!node->sub) return 0;
+  temp_node = &trie->pool[node->sub + (c >> 4)];
+  if (!temp_node->sub) return 0;
+  return &trie->pool[temp_node->sub + (c & 15)];
+}
+
+static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
+                                            const uint8_t* word, size_t size) {
+  const BrotliTrieNode* node = &trie->root;
+  size_t i;
+  for (i = 0; i < size; i++) {
+    node = BrotliTrieSub(trie, node, word[i]);
+    if (!node) return 0;
+  }
+  return node;
+}
+
+static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
+    const BrotliTransforms* transforms,
+    BrotliEncoderDictionary* dict) {
+  uint32_t i;
+  DictWord* dict_words;
+  uint16_t* buckets;
+  DictWord** words_by_hash;
+  size_t* words_by_hash_size;
+  size_t* words_by_hash_capacity;
+  BrotliTrie dedup;
+  uint8_t word[kTransformedBufferSize];
+  size_t word_size;
+  size_t total = 0;
+  uint8_t l;
+  uint16_t idx;
+
+  BrotliTrieInit(&dedup);
+
+  words_by_hash = (DictWord**)BrotliAllocate(m,
+      sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
+  words_by_hash_size = (size_t*)BrotliAllocate(m,
+      sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
+  words_by_hash_capacity = (size_t*)BrotliAllocate(m,
+      sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
+  memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
+  memset(words_by_hash_capacity, 0,
+         sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
+
+  if (transforms->num_transforms > 0) {
+    for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+        l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
+      uint16_t n = dict->words->size_bits_by_length[l] ?
+          (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
+      for (idx = 0; idx < n; ++idx) {
+        uint32_t key;
+        /* First transform (usually identity) */
+        TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
+                                  &word_size);
+        /* Cannot hash words smaller than 4 bytes */
+        if (word_size < 4) {
+          /* Break instead of continue, all next words of this length will have
+             same length after transform */
+          break;
+        }
+        if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
+          return BROTLI_FALSE;
+        }
+        key = Hash(word, NUM_HASH_BITS);
+        BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+            words_by_hash_capacity[key], words_by_hash_size[key],
+            MakeDictWord(l, 0, idx));
+        ++total;
+      }
+    }
+  }
+
+  /* These LUT transforms only supported if no custom transforms. This is
+     ok, we will use the heavy trie instead. */
+  if (transforms == BrotliGetTransforms()) {
+    for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+        l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
+      uint16_t n = dict->words->size_bits_by_length[l] ?
+          (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
+      for (idx = 0; idx < n; ++idx) {
+        int k;
+        BROTLI_BOOL is_ascii = BROTLI_TRUE;
+        size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
+        const uint8_t* data = &dict->words->data[offset];
+        for (k = 0; k < l; ++k) {
+          if (data[k] >= 128) is_ascii = BROTLI_FALSE;
+        }
+        if (data[0] < 128) {
+          int transform = 9;  /* {empty, uppercase first, empty} */
+          uint32_t ix = idx + (uint32_t)transform * n;
+          const BrotliTrieNode* it;
+          TransformedDictionaryWord(idx, l, transform, transforms,
+                                   dict, word, &word_size);
+          it = BrotliTrieFind(&dedup, word, word_size);
+          if (!it || it->idx_ > ix) {
+            uint32_t key = Hash(word, NUM_HASH_BITS);
+            if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
+              return BROTLI_FALSE;
+            }
+            BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+                words_by_hash_capacity[key], words_by_hash_size[key],
+                MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
+            ++total;
+          }
+        }
+        if (is_ascii) {
+          int transform = 44;  /* {empty, uppercase all, empty} */
+          uint32_t ix = idx + (uint32_t)transform * n;
+          const BrotliTrieNode* it;
+          TransformedDictionaryWord(idx, l, transform, transforms,
+                                    dict, word, &word_size);
+          it = BrotliTrieFind(&dedup, word, word_size);
+          if (!it || it->idx_ > ix) {
+            uint32_t key = Hash(word, NUM_HASH_BITS);
+            if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
+              return BROTLI_FALSE;
+            }
+            BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+                words_by_hash_capacity[key], words_by_hash_size[key],
+                MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
+            ++total;
+          }
+        }
+      }
+    }
+  }
+
+  dict_words = (DictWord*)BrotliAllocate(m,
+      sizeof(*dict->dict_words) * (total + 1));
+  buckets = (uint16_t*)BrotliAllocate(m,
+      sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  dict->dict_words_alloc_size_ = total + 1;
+  dict->dict_words = dict->dict_words_data_ = dict_words;
+  dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
+  dict->buckets = dict->buckets_data_ = buckets;
+
+  /* Unused; makes offsets start from 1. */
+  dict_words[0] = MakeDictWord(0, 0, 0);
+  total = 1;
+  for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
+    size_t num_words = words_by_hash_size[i];
+    if (num_words > 0) {
+      buckets[i] = (uint16_t)(total);
+      memcpy(&dict_words[total], &words_by_hash[i][0],
+          sizeof(dict_words[0]) * num_words);
+      total += num_words;
+      dict_words[total - 1].len |= 0x80;
+    } else {
+      buckets[i] = 0;
+    }
+  }
+
+  for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
+    BrotliFree(m, words_by_hash[i]);
+  }
+  BrotliFree(m, words_by_hash);
+  BrotliFree(m, words_by_hash_size);
+  BrotliFree(m, words_by_hash_capacity);
+  BrotliTrieFree(m, &dedup);
+
+  return BROTLI_TRUE;
+}
+
+static void BuildDictionaryHashTable(uint16_t* hash_table_words,
+    uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
+  int j, len;
+  /* The order of the loops is such that in case of collision, words with
+     shorter length are preferred, and in case of same length, words with
+     smaller index. There is only a single word per bucket. */
+  /* TODO(lode): consider adding optional user-supplied frequency_map to use
+     for preferred words instead, this can make the encoder better for
+     quality 9 and below without affecting the decoder */
+  memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
+  memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
+  for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
+      len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
+    const size_t num_words = dict->size_bits_by_length[len] ?
+        (1u << dict->size_bits_by_length[len]) : 0;
+    for (j = (int)num_words - 1; j >= 0; --j) {
+      size_t offset = dict->offsets_by_length[len] +
+          (size_t)len * (size_t)j;
+      const uint8_t* word = &dict->data[offset];
+      const uint32_t key = Hash(word, 14);
+      int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
+      BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
+      hash_table_words[idx] = (uint16_t)j;
+      hash_table_lengths[idx] = (uint8_t)len;
+    }
+  }
+}
+
+static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
+    const BrotliTransforms* transforms,
+    BrotliEncoderDictionary* dict) {
+  int i, j, l;
+  for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
+    for (l = 0; l < 32; l++) {
+      int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
+      for (i = 0; i < num; i++) {
+        uint8_t transformed[kTransformedBufferSize];
+        size_t size;
+        TransformedDictionaryWord(
+            (uint32_t)i, l, j, transforms, dict, transformed, &size);
+        if (size < 4) continue;
+        if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
+            transformed, size, &dict->trie)) {
+          return BROTLI_FALSE;
+        }
+      }
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
+   the custom transforms, where possible within the limits of the
+   cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
+   transforms that remove the N last characters (OmitLast). */
+static void ComputeCutoffTransforms(
+    const BrotliTransforms* transforms,
+    uint32_t* count, uint64_t* data) {
+  int i;
+  /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
+     ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
+     transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
+     for N=9 it must be 36-99.
+     TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t
+     for the cutoff transforms, so that shared dictionaries can have the
+     OmitLast transforms anywhere without loss. */
+  *count = 0;
+  *data = 0;
+  for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
+    int idx = transforms->cutOffTransforms[i];
+    if (idx == -1) break;  /* Not found */
+    if (idx < (i << 2)) break;  /* Too small for the encoding */
+    if (idx >= (i << 2) + 64) break;  /* Too large for the encoding */
+    (*count)++;
+    *data |= (uint64_t)(((uint64_t)idx -
+        ((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
+  }
+}
+
+static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
+    const BrotliTransforms* transforms,
+    BrotliEncoderDictionary* current) {
+  int default_words = current->words == BrotliGetDictionary();
+  int default_transforms = transforms == BrotliGetTransforms();
+
+  if (default_words && default_transforms) {
+    /* hashes are already set to Brotli defaults */
+    return BROTLI_TRUE;
+  }
+
+  current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
+      m, sizeof(kStaticDictionaryHashWords));
+  current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
+      m, sizeof(kStaticDictionaryHashLengths));
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  current->hash_table_words = current->hash_table_data_words_;
+  current->hash_table_lengths = current->hash_table_data_lengths_;
+
+  BuildDictionaryHashTable(current->hash_table_data_words_,
+      current->hash_table_data_lengths_, current->words);
+
+  ComputeCutoffTransforms(transforms,
+      &current->cutoffTransformsCount, &current->cutoffTransforms);
+
+  /* Only compute the data for slow encoder if the requested quality is high
+     enough to need it */
+  if (quality >= ZOPFLIFICATION_QUALITY) {
+    if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
+
+    /* For the built-in Brotli transforms, there is a hard-coded function to
+       handle all transforms, but for custom transforms, we use the following
+       large hammer instead */
+    current->has_words_heavy = !default_transforms;
+    if (current->has_words_heavy) {
+      if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
+    }
+  }
+
+  return BROTLI_TRUE;
+}
+
+void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
+  dict->magic = kSharedDictionaryMagic;
+
+  dict->compound.num_chunks = 0;
+  dict->compound.total_size = 0;
+  dict->compound.chunk_offsets[0] = 0;
+  dict->compound.num_prepared_instances_ = 0;
+
+  dict->contextual.context_based = 0;
+  dict->contextual.num_dictionaries = 1;
+  dict->contextual.instances_ = 0;
+  dict->contextual.num_instances_ = 1;  /* The instance_ field */
+  dict->contextual.dict[0] = &dict->contextual.instance_;
+  InitEncoderDictionary(&dict->contextual.instance_);
+  dict->contextual.instance_.parent = &dict->contextual;
+
+  dict->max_quality = BROTLI_MAX_QUALITY;
+}
+
+/* TODO(eustas): make sure that tooling will warn user if not all the cutoff
+   transforms are available (for low-quality encoder). */
+static BROTLI_BOOL InitCustomSharedEncoderDictionary(
+    MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
+    int quality, SharedEncoderDictionary* dict) {
+  ContextualEncoderDictionary* contextual;
+  CompoundDictionary* compound;
+  BrotliEncoderDictionary* instances;
+  int i;
+  BrotliInitSharedEncoderDictionary(dict);
+
+  contextual = &dict->contextual;
+  compound = &dict->compound;
+
+  for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
+    PreparedDictionary* prepared = CreatePreparedDictionary(m,
+        decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
+    AttachPreparedDictionary(compound, prepared);
+    /* remember for cleanup */
+    compound->prepared_instances_[
+        compound->num_prepared_instances_++] = prepared;
+  }
+
+  dict->max_quality = quality;
+  contextual->context_based = decoded_dict->context_based;
+  if (decoded_dict->context_based) {
+    memcpy(contextual->context_map, decoded_dict->context_map,
+        SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
+  }
+
+  contextual->num_dictionaries = decoded_dict->num_dictionaries;
+  contextual->num_instances_ = decoded_dict->num_dictionaries;
+  if (contextual->num_instances_ == 1) {
+    instances = &contextual->instance_;
+  } else {
+    contextual->instances_ = (BrotliEncoderDictionary*)
+        BrotliAllocate(m, sizeof(*contextual->instances_) *
+        contextual->num_instances_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    instances = contextual->instances_;
+  }
+  for (i = 0; i < (int)contextual->num_instances_; i++) {
+    BrotliEncoderDictionary* current = &instances[i];
+    InitEncoderDictionary(current);
+    current->parent = &dict->contextual;
+    if (decoded_dict->words[i] == BrotliGetDictionary()) {
+      current->words = BrotliGetDictionary();
+    } else {
+      current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
+          m, sizeof(BrotliDictionary));
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      *current->words_instance_ = *decoded_dict->words[i];
+      current->words = current->words_instance_;
+    }
+    current->num_transforms =
+        (uint32_t)decoded_dict->transforms[i]->num_transforms;
+    if (!ComputeDictionary(
+        m, quality, decoded_dict->transforms[i], current)) {
+      return BROTLI_FALSE;
+    }
+
+    contextual->dict[i] = current;
+  }
+
+  return BROTLI_TRUE;  /* success */
+}
+
+BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
+    MemoryManager* m, const uint8_t* encoded_dict, size_t size,
+    int quality, SharedEncoderDictionary* dict) {
+  BROTLI_BOOL success = BROTLI_FALSE;
+  BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
+      m->alloc_func, m->free_func, m->opaque);
+  if (!decoded_dict) {  /* OOM */
+    return BROTLI_FALSE;
+  }
+  success = BrotliSharedDictionaryAttach(
+      decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
+  if (success) {
+    success = InitCustomSharedEncoderDictionary(m,
+        decoded_dict, quality, dict);
+  }
+  BrotliSharedDictionaryDestroyInstance(decoded_dict);
+  return success;
+}
+
+void BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
+                                          SharedEncoderDictionary* dict) {
+  size_t i;
+  for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
+    DestroyPreparedDictionary(m,
+        (PreparedDictionary*)dict->compound.prepared_instances_[i]);
+  }
+  if (dict->contextual.num_instances_ == 1) {
+    BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
+  } else if (dict->contextual.num_instances_ > 1) {
+    for (i = 0; i < dict->contextual.num_instances_; i++) {
+      BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
+    }
+    BrotliFree(m, dict->contextual.instances_);
+  }
+}
+
+ManagedDictionary* BrotliCreateManagedDictionary(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
+      sizeof(ManagedDictionary), alloc_func, free_func, opaque);
+  if (result == NULL) return NULL;
+
+  result->magic = kManagedDictionaryMagic;
+  BrotliInitMemoryManager(
+      &result->memory_manager_, alloc_func, free_func, opaque);
+  result->dictionary = NULL;
+
+  return result;
+}
+
+void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
+  if (!dictionary) return;
+  BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
+}
+
+/* Escalate internal functions visibility; for testing purposes only. */
+#if defined(BROTLI_TEST)
+void InitEncoderDictionaryForTest(BrotliEncoderDictionary*);
+void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) {
+  InitEncoderDictionary(d);
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.h
new file mode 100644
index 0000000000..b291f98b47
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/encoder_dict.h
@@ -0,0 +1,152 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_ENCODER_DICT_H_
+#define BROTLI_ENC_ENCODER_DICT_H_
+
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "compound_dictionary.h"
+#include "memory.h"
+#include "static_dict_lut.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/*
+Dictionary hierarchy for Encoder:
+-SharedEncoderDictionary
+--CompoundDictionary
+---PreparedDictionary [up to 15x]
+   = prefix dictionary with precomputed hashes
+--ContextualEncoderDictionary
+---BrotliEncoderDictionary [up to 64x]
+   = for each context, precomputed static dictionary with words + transforms
+
+Dictionary hiearchy from common: similar, but without precomputed hashes
+-BrotliSharedDictionary
+--BrotliDictionary [up to 64x]
+--BrotliTransforms [up to 64x]
+--const uint8_t* prefix [up to 15x]: compound dictionaries
+*/
+
+typedef struct BrotliTrieNode {
+  uint8_t single;  /* if 1, sub is a single node for c instead of 256 */
+  uint8_t c;
+  uint8_t len_;  /* untransformed length */
+  uint32_t idx_;  /* word index + num words * transform index */
+  uint32_t sub;  /* index of sub node(s) in the pool */
+} BrotliTrieNode;
+
+typedef struct BrotliTrie {
+  BrotliTrieNode* pool;
+  size_t pool_capacity;
+  size_t pool_size;
+  BrotliTrieNode root;
+} BrotliTrie;
+
+BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
+    const BrotliTrieNode* node, uint8_t c);
+/* Dictionary data (words and transforms) for 1 possible context */
+typedef struct BrotliEncoderDictionary {
+  const BrotliDictionary* words;
+  uint32_t num_transforms;
+
+  /* cut off for fast encoder */
+  uint32_t cutoffTransformsCount;
+  uint64_t cutoffTransforms;
+
+  /* from dictionary_hash.h, for fast encoder */
+  const uint16_t* hash_table_words;
+  const uint8_t* hash_table_lengths;
+
+  /* from static_dict_lut.h, for slow encoder */
+  const uint16_t* buckets;
+  const DictWord* dict_words;
+  /* Heavy version, for use by slow encoder when there are custom transforms.
+     Contains every possible transformed dictionary word in a trie. It encodes
+     about as fast as the non-heavy encoder but consumes a lot of memory and
+     takes time to build. */
+  BrotliTrie trie;
+  BROTLI_BOOL has_words_heavy;
+
+  /* Reference to other dictionaries. */
+  const struct ContextualEncoderDictionary* parent;
+
+  /* Allocated memory, used only when not using the Brotli defaults */
+  uint16_t* hash_table_data_words_;
+  uint8_t* hash_table_data_lengths_;
+  size_t buckets_alloc_size_;
+  uint16_t* buckets_data_;
+  size_t dict_words_alloc_size_;
+  DictWord* dict_words_data_;
+  BrotliDictionary* words_instance_;
+} BrotliEncoderDictionary;
+
+/* Dictionary data for all 64 contexts */
+typedef struct ContextualEncoderDictionary {
+  BROTLI_BOOL context_based;
+  uint8_t num_dictionaries;
+  uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+  const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+  /* If num_instances_ is 1, instance_ is used, else dynamic allocation with
+     instances_ is used. */
+  size_t num_instances_;
+  BrotliEncoderDictionary instance_;
+  BrotliEncoderDictionary* instances_;
+} ContextualEncoderDictionary;
+
+typedef struct SharedEncoderDictionary {
+  /* Magic value to distinguish this struct from PreparedDictionary for
+     certain external usages. */
+  uint32_t magic;
+
+  /* LZ77 prefix, compound dictionary */
+  CompoundDictionary compound;
+
+  /* Custom static dictionary (optionally context-based) */
+  ContextualEncoderDictionary contextual;
+
+  /* The maximum quality the dictionary was computed for */
+  int max_quality;
+} SharedEncoderDictionary;
+
+typedef struct ManagedDictionary {
+  uint32_t magic;
+  MemoryManager memory_manager_;
+  uint32_t* dictionary;
+} ManagedDictionary;
+
+/* Initializes to the brotli built-in dictionary */
+BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
+    SharedEncoderDictionary* dict);
+
+/* Initializes to shared dictionary that will be parsed from
+   encoded_dict. Requires that you keep the encoded_dict buffer
+   around, parts of data will point to it. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
+    MemoryManager* m, const uint8_t* encoded_dict, size_t size,
+    int quality, SharedEncoderDictionary* dict);
+
+BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
+    MemoryManager* m, SharedEncoderDictionary* dict);
+
+BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
+    ManagedDictionary* dictionary);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODER_DICT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.c
new file mode 100644
index 0000000000..9aed43b6eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.c
@@ -0,0 +1,504 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#include "entropy_encode.h"
+
+#include <string.h>  /* memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+const size_t kBrotliShellGaps[] = {132, 57, 23, 10, 4, 1};
+
+BROTLI_BOOL BrotliSetDepth(
+    int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
+  int stack[16];
+  int level = 0;
+  int p = p0;
+  BROTLI_DCHECK(max_depth <= 15);
+  stack[0] = -1;
+  while (BROTLI_TRUE) {
+    if (pool[p].index_left_ >= 0) {
+      level++;
+      if (level > max_depth) return BROTLI_FALSE;
+      stack[level] = pool[p].index_right_or_value_;
+      p = pool[p].index_left_;
+      continue;
+    } else {
+      depth[pool[p].index_right_or_value_] = (uint8_t)level;
+    }
+    while (level >= 0 && stack[level] == -1) level--;
+    if (level < 0) return BROTLI_TRUE;
+    p = stack[level];
+    stack[level] = -1;
+  }
+}
+
+/* Sort the root nodes, least popular first. */
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  if (v0->total_count_ != v1->total_count_) {
+    return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+  }
+  return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
+}
+
+/* This function will create a Huffman tree.
+
+   The catch here is that the tree cannot be arbitrarily deep.
+   Brotli specifies a maximum depth of 15 bits for "code trees"
+   and 7 bits for "code length code trees."
+
+   count_limit is the value that is to be faked as the minimum value
+   and this minimum value is raised until the tree matches the
+   maximum length requirement.
+
+   This algorithm is not of excellent performance for very long data blocks,
+   especially when population counts are longer than 2**tree_limit, but
+   we are not planning to use this with extremely long blocks.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+void BrotliCreateHuffmanTree(const uint32_t* data,
+                             const size_t length,
+                             const int tree_limit,
+                             HuffmanTree* tree,
+                             uint8_t* depth) {
+  uint32_t count_limit;
+  HuffmanTree sentinel;
+  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+  /* For block sizes below 64 kB, we never need to do a second iteration
+     of this loop. Probably all of our block sizes will be smaller than
+     that, so this loop is mostly of academic interest. If we actually
+     would need this, we would be better off with the Katajainen algorithm. */
+  for (count_limit = 1; ; count_limit *= 2) {
+    size_t n = 0;
+    size_t i;
+    size_t j;
+    size_t k;
+    for (i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
+        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
+      }
+    }
+
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
+      break;
+    }
+
+    SortHuffmanTreeItems(tree, n, SortHuffmanTree);
+
+    /* The nodes are:
+       [0, n): the sorted leaf nodes that we start with.
+       [n]: we add a sentinel here.
+       [n + 1, 2n): new parent nodes are added here, starting from
+                    (n+1). These are naturally in ascending order.
+       [2n]: we add a sentinel at the end as well.
+       There will be (2n+1) elements at the end. */
+    tree[n] = sentinel;
+    tree[n + 1] = sentinel;
+
+    i = 0;      /* Points to the next leaf node. */
+    j = n + 1;  /* Points to the next non-leaf node. */
+    for (k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      {
+        /* The sentinel node becomes the parent node. */
+        size_t j_end = 2 * n - k;
+        tree[j_end].total_count_ =
+            tree[left].total_count_ + tree[right].total_count_;
+        tree[j_end].index_left_ = (int16_t)left;
+        tree[j_end].index_right_or_value_ = (int16_t)right;
+
+        /* Add back the last sentinel node. */
+        tree[j_end + 1] = sentinel;
+      }
+    }
+    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
+      /* We need to pack the Huffman tree in tree_limit bits. If this was not
+         successful, add fake entities to the lowest values and retry. */
+      break;
+    }
+  }
+}
+
+static void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitions(
+    const uint8_t previous_value,
+    const uint8_t value,
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  BROTLI_DCHECK(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitionsZeros(
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                       uint8_t* good_for_rle) {
+  size_t nonzero_count = 0;
+  size_t stride;
+  size_t limit;
+  size_t sum;
+  const size_t streak_limit = 1240;
+  /* Let's make the Huffman code more compatible with RLE encoding. */
+  size_t i;
+  for (i = 0; i < length; i++) {
+    if (counts[i]) {
+      ++nonzero_count;
+    }
+  }
+  if (nonzero_count < 16) {
+    return;
+  }
+  while (length != 0 && counts[length - 1] == 0) {
+    --length;
+  }
+  if (length == 0) {
+    return;  /* All zeros. */
+  }
+  /* Now counts[0..length - 1] does not have trailing zeros. */
+  {
+    size_t nonzeros = 0;
+    uint32_t smallest_nonzero = 1 << 30;
+    for (i = 0; i < length; ++i) {
+      if (counts[i] != 0) {
+        ++nonzeros;
+        if (smallest_nonzero > counts[i]) {
+          smallest_nonzero = counts[i];
+        }
+      }
+    }
+    if (nonzeros < 5) {
+      /* Small histogram will model it well. */
+      return;
+    }
+    if (smallest_nonzero < 4) {
+      size_t zeros = length - nonzeros;
+      if (zeros < 6) {
+        for (i = 1; i < length - 1; ++i) {
+          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
+            counts[i] = 1;
+          }
+        }
+      }
+    }
+    if (nonzeros < 28) {
+      return;
+    }
+  }
+  /* 2) Let's mark all population counts that already can be encoded
+     with an RLE code. */
+  memset(good_for_rle, 0, length);
+  {
+    /* Let's not spoil any of the existing good RLE codes.
+       Mark any seq of 0's that is longer as 5 as a good_for_rle.
+       Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
+    uint32_t symbol = counts[0];
+    size_t step = 0;
+    for (i = 0; i <= length; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && step >= 5) ||
+            (symbol != 0 && step >= 7)) {
+          size_t k;
+          for (k = 0; k < step; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        step = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++step;
+      }
+    }
+  }
+  /* 3) Let's replace those population counts that lead to more RLE codes.
+     Math here is in 24.8 fixed point representation. */
+  stride = 0;
+  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
+  sum = 0;
+  for (i = 0; i <= length; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        size_t k;
+        /* The stride must end, collapse what we have, if we have enough (4). */
+        size_t count = (sum + stride / 2) / stride;
+        if (count == 0) {
+          count = 1;
+        }
+        if (sum == 0) {
+          /* Don't make an all zeros stride to be upgraded to ones. */
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          /* We don't want to change value at counts[i],
+             that is already belonging to the next stride. Thus - 1. */
+          counts[i - k - 1] = (uint32_t)count;
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 2) {
+        /* All interesting strides have a count of at least 4, */
+        /* at least when non-zeros. */
+        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
+      } else if (i < length) {
+        limit = 256 * counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (256 * sum + stride / 2) / stride;
+      }
+      if (stride == 4) {
+        limit += 120;
+      }
+    }
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             BROTLI_BOOL* use_rle_for_non_zero,
+                             BROTLI_BOOL* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  size_t i;
+  for (i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    size_t k;
+    for (k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero =
+      TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
+  *use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
+}
+
+void BrotliWriteHuffmanTree(const uint8_t* depth,
+                            size_t length,
+                            size_t* tree_size,
+                            uint8_t* tree,
+                            uint8_t* extra_bits_data) {
+  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+  size_t i;
+  BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
+  BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
+
+  /* Throw away trailing zeros. */
+  size_t new_length = length;
+  for (i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  /* First gather statistics on if it is a good idea to do RLE. */
+  if (length > 50) {
+    /* Find RLE coding for longer codes.
+       Shorter codes seem not to benefit from RLE. */
+    DecideOverRleUse(depth, new_length,
+                     &use_rle_for_non_zero, &use_rle_for_zero);
+  }
+
+  /* Actual RLE coding. */
+  for (i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      size_t k;
+      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      BrotliWriteHuffmanTreeRepetitionsZeros(
+          reps, tree_size, tree, extra_bits_data);
+    } else {
+      BrotliWriteHuffmanTreeRepetitions(previous_value,
+                                        value, reps, tree_size,
+                                        tree, extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
+    0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
+    0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
+  };
+  size_t retval = kLut[bits & 0x0F];
+  size_t i;
+  for (i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = (uint16_t)(bits >> 4);
+    retval |= kLut[bits & 0x0F];
+  }
+  retval >>= ((0 - num_bits) & 0x03);
+  return (uint16_t)retval;
+}
+
+/* 0..15 are values for bits */
+#define MAX_HUFFMAN_BITS 16
+
+void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                     size_t len,
+                                     uint16_t* bits) {
+  /* In Brotli, all bit depths are [1..15]
+     0 bit depth means that the symbol does not exist. */
+  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
+  uint16_t next_code[MAX_HUFFMAN_BITS];
+  size_t i;
+  int code = 0;
+  for (i = 0; i < len; ++i) {
+    ++bl_count[depth[i]];
+  }
+  bl_count[0] = 0;
+  next_code[0] = 0;
+  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
+    code = (code + bl_count[i - 1]) << 1;
+    next_code[i] = (uint16_t)code;
+  }
+  for (i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.h
new file mode 100644
index 0000000000..e1c779cc6f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode.h
@@ -0,0 +1,123 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A node of a Huffman tree. */
+typedef struct HuffmanTree {
+  uint32_t total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+} HuffmanTree;
+
+static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
+    int16_t left, int16_t right) {
+  self->total_count_ = count;
+  self->index_left_ = left;
+  self->index_right_or_value_ = right;
+}
+
+/* Returns 1 is assignment of depths succeeded, otherwise 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
+    int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
+
+/* This function will create a Huffman tree.
+
+   The (data,length) contains the population counts.
+   The tree_limit is the maximum bit depth of the Huffman codes.
+
+   The depth contains the tree, i.e., how many bits are used for
+   the symbol.
+
+   The actual Huffman tree is constructed in the tree[] array, which has to
+   be at least 2 * length + 1 long.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
+                                             const size_t length,
+                                             const int tree_limit,
+                                             HuffmanTree* tree,
+                                             uint8_t* depth);
+
+/* Change the population counts in a way that the consequent
+   Huffman tree compression, especially its RLE-part will be more
+   likely to compress this data more efficiently.
+
+   length contains the size of the histogram.
+   counts contains the population counts.
+   good_for_rle is a buffer of at least length size */
+BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
+    size_t length, uint32_t* counts, uint8_t* good_for_rle);
+
+/* Write a Huffman tree from bit depths into the bit-stream representation
+   of a Huffman tree. The generated Huffman tree is to be compressed once
+   more using a Huffman tree */
+BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
+                                            size_t num,
+                                            size_t* tree_size,
+                                            uint8_t* tree,
+                                            uint8_t* extra_bits_data);
+
+/* Get the actual bit values for a tree of bit depths. */
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                                     size_t len,
+                                                     uint16_t* bits);
+
+BROTLI_INTERNAL extern const size_t kBrotliShellGaps[6];
+/* Input size optimized Shell sort. */
+typedef BROTLI_BOOL (*HuffmanTreeComparator)(
+    const HuffmanTree*, const HuffmanTree*);
+static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
+    const size_t n, HuffmanTreeComparator comparator) {
+  if (n < 13) {
+    /* Insertion sort. */
+    size_t i;
+    for (i = 1; i < n; ++i) {
+      HuffmanTree tmp = items[i];
+      size_t k = i;
+      size_t j = i - 1;
+      while (comparator(&tmp, &items[j])) {
+        items[k] = items[j];
+        k = j;
+        if (!j--) break;
+      }
+      items[k] = tmp;
+    }
+    return;
+  } else {
+    /* Shell sort. */
+    int g = n < 57 ? 2 : 0;
+    for (; g < 6; ++g) {
+      size_t gap = kBrotliShellGaps[g];
+      size_t i;
+      for (i = gap; i < n; ++i) {
+        size_t j = i;
+        HuffmanTree tmp = items[i];
+        for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
+          items[j] = items[j - gap];
+        }
+        items[j] = tmp;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode_static.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode_static.h
new file mode 100644
index 0000000000..ecff1fe9ee
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/entropy_encode_static.h
@@ -0,0 +1,542 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Static entropy codes used for faster meta-block encoding. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const uint8_t kCodeLengthDepth[18] = {
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
+};
+
+static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+};
+
+static const uint8_t kStaticDistanceCodeDepth[64] = {
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+/* GENERATED CODE START */
+static const uint32_t kCodeLengthBits[18] = {
+  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
+};
+
+static BROTLI_INLINE void StoreStaticCodeLengthCode(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
+}
+
+static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
+  0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
+  0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
+  0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
+  0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
+  0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
+  0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
+  0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
+  0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
+  0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
+  0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
+  0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
+  0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
+  0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
+  0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
+  0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
+  0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
+  0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
+  0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
+  0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
+  0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
+  0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
+  0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
+  0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
+  0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
+  0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
+  0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
+  0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
+  0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
+  0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
+  0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
+  0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
+  0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
+  0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
+  0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
+  0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
+  0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
+  0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
+  0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
+  0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
+  0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
+  0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
+  0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
+  0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
+  0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
+  0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
+  0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
+  0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
+  0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
+  0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
+  0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
+  0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
+  0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
+  0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
+  0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
+  0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
+  0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
+  0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
+  0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
+  0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
+  0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
+  0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
+  0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
+  0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
+  0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
+  0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
+  0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
+  0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
+  0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
+  0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
+  0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
+  0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
+  0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
+  0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
+  0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
+  0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
+  0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
+  0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
+  0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
+  0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
+  0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
+  0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
+  0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
+  0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
+  0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
+  0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
+  0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
+  0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
+  0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
+  0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
+  0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
+  0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
+  0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
+  0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
+  0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
+  0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
+  0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
+  0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
+  0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
+  0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
+  0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
+  0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
+  0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
+  0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
+  0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
+  0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
+  0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
+  0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
+  0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
+  0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
+  0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
+  0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
+  0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
+  0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
+  0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
+  0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
+  0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
+  0x06f9cb87, 0x08f9cb87,
+};
+
+static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+};
+
+static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
+  0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
+  0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
+  0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
+  0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
+  0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
+  0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
+  0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
+  0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
+  0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
+  0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
+  0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
+  0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
+  0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
+  0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
+  0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
+  0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
+  0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
+  0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
+  0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
+  0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
+  0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
+  0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
+  0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
+  0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
+  0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
+  0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
+  0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
+  0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
+  0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
+  0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
+  0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
+  0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
+  0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
+  0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
+  0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
+  0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
+  0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
+  0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
+  0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
+  0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
+  0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
+  0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
+  0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
+  0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
+  0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
+  0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
+  0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
+  0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
+  0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
+  0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
+  0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
+  0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
+  0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
+  0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
+  0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
+  0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
+  0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
+  0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
+  0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
+  0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
+  0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
+  0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
+  0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
+  0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
+  0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
+  0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
+  0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
+  0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
+  0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
+  0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
+  0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
+  0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
+  0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
+  0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
+  0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
+  0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
+  0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
+  0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
+  0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
+  0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
+  0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
+  0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
+  0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
+  0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
+  0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
+  0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
+  0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
+  0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
+  0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
+  0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
+  0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
+  0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
+  0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
+  0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
+  0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
+  0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
+  0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
+  0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
+  0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
+  0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
+  0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
+  0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
+  0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
+  0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
+  0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
+  0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
+  0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
+  0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
+  0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
+  0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
+  0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
+  0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
+  0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
+  0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
+  0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
+  0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
+  0x2baeb6db, 0x3baeb6db,
+};
+
+static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+};
+
+static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+    0,  256,  128,  384,   64,  320,  192,  448,
+   32,  288,  160,  416,   96,  352,  224,  480,
+   16,  272,  144,  400,   80,  336,  208,  464,
+   48,  304,  176,  432,  112,  368,  240,  496,
+    8,  264,  136,  392,   72,  328,  200,  456,
+   40,  296,  168,  424,  104,  360,  232,  488,
+   24,  280,  152,  408,   88,  344,  216,  472,
+   56,  312,  184,  440,  120,  376,  248,  504,
+    4,  260,  132,  388,   68,  324,  196,  452,
+   36,  292,  164,  420,  100,  356,  228,  484,
+   20,  276,  148,  404,   84,  340,  212,  468,
+   52,  308,  180,  436,  116,  372,  244,  500,
+   12,  268,  140,  396,   76,  332,  204,  460,
+   44,  300,  172,  428,  108,  364,  236,  492,
+   28,  284,  156,  412,   92,  348,  220,  476,
+   60,  316,  188,  444,  124,  380,  252,  508,
+    2,  258,  130,  386,   66,  322,  194,  450,
+   34,  290,  162,  418,   98,  354,  226,  482,
+   18,  274,  146,  402,   82,  338,  210,  466,
+   50,  306,  178,  434,  114,  370,  242,  498,
+   10,  266,  138,  394,   74,  330,  202,  458,
+   42,  298,  170,  426,  106,  362,  234,  490,
+   26,  282,  154,  410,   90,  346,  218,  474,
+   58,  314,  186,  442,  122,  378,  250,  506,
+    6,  262,  134,  390,   70,  326,  198,  454,
+   38,  294,  166,  422,  102,  358,  230,  486,
+   22,  278,  150,  406,   86,  342,  214,  470,
+   54,  310,  182,  438,  118,  374,  246,  502,
+   14,  270,  142,  398,   78,  334,  206,  462,
+   46,  302,  174,  430,  110,  366,  238,  494,
+   30,  286,  158,  414,   94,  350,  222,  478,
+   62,  318,  190,  446,  126,  382,  254,  510,
+    1,  257,  129,  385,   65,  321,  193,  449,
+   33,  289,  161,  417,   97,  353,  225,  481,
+   17,  273,  145,  401,   81,  337,  209,  465,
+   49,  305,  177,  433,  113,  369,  241,  497,
+    9,  265,  137,  393,   73,  329,  201,  457,
+   41,  297,  169,  425,  105,  361,  233,  489,
+   25,  281,  153,  409,   89,  345,  217,  473,
+   57,  313,  185,  441,  121,  377,  249,  505,
+    5,  261,  133,  389,   69,  325,  197,  453,
+   37,  293,  165,  421,  101,  357,  229,  485,
+   21,  277,  149,  405,   85,  341,  213,  469,
+   53,  309,  181,  437,  117,  373,  245,  501,
+   13,  269,  141,  397,   77,  333,  205,  461,
+   45,  301,  173,  429,  109,  365,  237,  493,
+   29,  285,  157,  413,   93,  349,  221,  477,
+   61,  317,  189,  445,  125,  381,  253,  509,
+    3,  259,  131,  387,   67,  323,  195,  451,
+   35,  291,  163,  419,   99,  355,  227,  483,
+   19,  275,  147,  403,   83,  339,  211,  467,
+   51,  307,  179,  435,  115,  371,  243,  499,
+   11,  267,  139,  395,   75,  331,  203,  459,
+   43,  299,  171,  427,  107,  363,  235,  491,
+   27,  283,  155,  411,   91,  347,  219,  475,
+   59,  315,  187,  443,  123,  379,  251,  507,
+    7, 1031,  519, 1543,  263, 1287,  775, 1799,
+  135, 1159,  647, 1671,  391, 1415,  903, 1927,
+   71, 1095,  583, 1607,  327, 1351,  839, 1863,
+  199, 1223,  711, 1735,  455, 1479,  967, 1991,
+   39, 1063,  551, 1575,  295, 1319,  807, 1831,
+  167, 1191,  679, 1703,  423, 1447,  935, 1959,
+  103, 1127,  615, 1639,  359, 1383,  871, 1895,
+  231, 1255,  743, 1767,  487, 1511,  999, 2023,
+   23, 1047,  535, 1559,  279, 1303,  791, 1815,
+  151, 1175,  663, 1687,  407, 1431,  919, 1943,
+   87, 1111,  599, 1623,  343, 1367,  855, 1879,
+  215, 1239,  727, 1751,  471, 1495,  983, 2007,
+   55, 1079,  567, 1591,  311, 1335,  823, 1847,
+  183, 1207,  695, 1719,  439, 1463,  951, 1975,
+  119, 1143,  631, 1655,  375, 1399,  887, 1911,
+  247, 1271,  759, 1783,  503, 1527, 1015, 2039,
+   15, 1039,  527, 1551,  271, 1295,  783, 1807,
+  143, 1167,  655, 1679,  399, 1423,  911, 1935,
+   79, 1103,  591, 1615,  335, 1359,  847, 1871,
+  207, 1231,  719, 1743,  463, 1487,  975, 1999,
+   47, 1071,  559, 1583,  303, 1327,  815, 1839,
+  175, 1199,  687, 1711,  431, 1455,  943, 1967,
+  111, 1135,  623, 1647,  367, 1391,  879, 1903,
+  239, 1263,  751, 1775,  495, 1519, 1007, 2031,
+   31, 1055,  543, 1567,  287, 1311,  799, 1823,
+  159, 1183,  671, 1695,  415, 1439,  927, 1951,
+   95, 1119,  607, 1631,  351, 1375,  863, 1887,
+  223, 1247,  735, 1759,  479, 1503,  991, 2015,
+   63, 1087,  575, 1599,  319, 1343,  831, 1855,
+  191, 1215,  703, 1727,  447, 1471,  959, 1983,
+  127, 1151,  639, 1663,  383, 1407,  895, 1919,
+  255, 1279,  767, 1791,  511, 1535, 1023, 2047,
+};
+
+static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
+  BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
+}
+
+static const uint16_t kStaticDistanceCodeBits[64] = {
+   0, 32, 16, 48,  8, 40, 24, 56,  4, 36, 20, 52, 12, 44, 28, 60,
+   2, 34, 18, 50, 10, 42, 26, 58,  6, 38, 22, 54, 14, 46, 30, 62,
+   1, 33, 17, 49,  9, 41, 25, 57,  5, 37, 21, 53, 13, 45, 29, 61,
+   3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
+};
+
+static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
+}
+/* GENERATED CODE END */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.c
new file mode 100644
index 0000000000..2fa0efcf86
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.c
@@ -0,0 +1,105 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "fast_log.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
+const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.h
new file mode 100644
index 0000000000..f82f4cffc8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/fast_log.h
@@ -0,0 +1,67 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for fast computation of logarithms. */
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
+#if defined(BROTLI_BSR32)
+  return BROTLI_BSR32((uint32_t)n);
+#else
+  uint32_t result = 0;
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
+#define BROTLI_LOG2_TABLE_SIZE 256
+
+/* A lookup table for small values of log2(int) to be used in entropy
+   computation. */
+BROTLI_INTERNAL extern const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE];
+
+/* Visual Studio 2012 and Android API levels < 18 do not have the log2()
+ * function defined, so we use log() and a multiplication instead. */
+#if !defined(BROTLI_HAVE_LOG2)
+#if ((defined(_MSC_VER) && _MSC_VER <= 1700) || \
+     (defined(__ANDROID_API__) && __ANDROID_API__ < 18))
+#define BROTLI_HAVE_LOG2 0
+#else
+#define BROTLI_HAVE_LOG2 1
+#endif
+#endif
+
+#define LOG_2_INV 1.4426950408889634
+
+/* Faster logarithm for small integers, with the property of log2(0) == 0. */
+static BROTLI_INLINE double FastLog2(size_t v) {
+  if (v < BROTLI_LOG2_TABLE_SIZE) {
+    return kBrotliLog2Table[v];
+  }
+#if !(BROTLI_HAVE_LOG2)
+  return log((double)v) * LOG_2_INV;
+#else
+  return log2((double)v);
+#endif
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FAST_LOG_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/find_match_length.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/find_match_length.h
new file mode 100644
index 0000000000..dee0414abc
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/find_match_length.h
@@ -0,0 +1,80 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find maximal matching prefixes of strings. */
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Separate implementation for little-endian 64-bit targets, for speed. */
+#if defined(BROTLI_TZCNT64) && BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  /* + 1 is for pre-decrement in while */
+  while (BROTLI_PREDICT_TRUE(--limit2)) {
+    if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
+                      BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
+          BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
+      size_t matching_bits = (size_t)BROTLI_TZCNT64(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  /* + 1 is for pre-decrement in while */
+  while (--limit) {
+    if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  /* Find out how long the match is. We loop over the data 32 bits at a
+     time until we find a 32-bit block that doesn't match; then we find
+     the first non-matching bit and use that to calculate the total
+     length of the match. */
+  while (s2_ptr <= s2_limit - 4 &&
+         BrotliUnalignedRead32(s2_ptr) ==
+         BrotliUnalignedRead32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash.h
new file mode 100644
index 0000000000..fc6e33400c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash.h
@@ -0,0 +1,729 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data. */
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <stdlib.h>  /* exit */
+#include <string.h>  /* memcmp, memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "compound_dictionary.h"
+#include "encoder_dict.h"
+#include "fast_log.h"
+#include "find_match_length.h"
+#include "memory.h"
+#include "quality.h"
+#include "static_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct {
+  /**
+   * Dynamically allocated areas; regular hasher uses one or two allocations;
+   * "composite" hasher uses up to 4 allocations.
+   */
+  void* extra[4];
+
+  /**
+   * False before the fisrt invocation of HasherSetup (where "extra" memory)
+   * is allocated.
+   */
+  BROTLI_BOOL is_setup_;
+
+  size_t dict_num_lookups;
+  size_t dict_num_matches;
+
+  BrotliHasherParams params;
+
+  /**
+   * False if hasher needs to be "prepared" before use (before the first
+   * invocation of HasherSetup or after HasherReset). "preparation" is hasher
+   * data initialization (using input ringbuffer).
+   */
+  BROTLI_BOOL is_prepared_;
+} HasherCommon;
+
+#define score_t size_t
+
+static const uint32_t kCutoffTransformsCount = 10;
+/*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
+/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
+static const uint64_t kCutoffTransforms =
+    BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
+
+typedef struct HasherSearchResult {
+  size_t len;
+  size_t distance;
+  score_t score;
+  int len_code_delta; /* == len_code - len */
+} HasherSearchResult;
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
+static const uint64_t kHashMul64Long =
+    BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - 14);
+}
+
+static BROTLI_INLINE void PrepareDistanceCache(
+    int* BROTLI_RESTRICT distance_cache, const int num_distances) {
+  if (num_distances > 4) {
+    int last_distance = distance_cache[0];
+    distance_cache[4] = last_distance - 1;
+    distance_cache[5] = last_distance + 1;
+    distance_cache[6] = last_distance - 2;
+    distance_cache[7] = last_distance + 2;
+    distance_cache[8] = last_distance - 3;
+    distance_cache[9] = last_distance + 3;
+    if (num_distances > 10) {
+      int next_last_distance = distance_cache[1];
+      distance_cache[10] = next_last_distance - 1;
+      distance_cache[11] = next_last_distance + 1;
+      distance_cache[12] = next_last_distance - 2;
+      distance_cache[13] = next_last_distance + 2;
+      distance_cache[14] = next_last_distance - 3;
+      distance_cache[15] = next_last_distance + 3;
+    }
+  }
+}
+
+#define BROTLI_LITERAL_BYTE_SCORE 135
+#define BROTLI_DISTANCE_BIT_PENALTY 30
+/* Score must be positive after applying maximal penalty. */
+#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
+
+/* Usually, we always choose the longest backward reference. This function
+   allows for the exception of that rule.
+
+   If we choose a backward reference that is further away, it will
+   usually be coded with more bits. We approximate this by assuming
+   log2(distance). If the distance can be expressed in terms of the
+   last four distances, we use some heuristic constants to estimate
+   the bits cost. For the first up to four literals we use the bit
+   cost of the literals from the literal cost model, after that we
+   use the average bit cost of the cost model.
+
+   This function is used to sometimes discard a longer backward reference
+   when it is not much longer and the bit cost for encoding it is more
+   than the saved literals.
+
+   backward_reference_offset MUST be positive. */
+static BROTLI_INLINE score_t BackwardReferenceScore(
+    size_t copy_length, size_t backward_reference_offset) {
+  return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
+      BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
+}
+
+static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
+    size_t copy_length) {
+  return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
+      BROTLI_SCORE_BASE + 15;
+}
+
+static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
+    size_t distance_short_code) {
+  return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
+}
+
+static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
+    const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
+    const uint8_t* data, size_t max_length, size_t max_backward,
+    size_t max_distance, HasherSearchResult* out) {
+  size_t offset;
+  size_t matchlen;
+  size_t backward;
+  score_t score;
+  offset = dictionary->words->offsets_by_length[len] + len * word_idx;
+  if (len > max_length) {
+    return BROTLI_FALSE;
+  }
+
+  matchlen =
+      FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+  if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
+    return BROTLI_FALSE;
+  }
+  {
+    size_t cut = len - matchlen;
+    size_t transform_id = (cut << 2) +
+        (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+    backward = max_backward + 1 + word_idx +
+        (transform_id << dictionary->words->size_bits_by_length[len]);
+  }
+  if (backward > max_distance) {
+    return BROTLI_FALSE;
+  }
+  score = BackwardReferenceScore(matchlen, backward);
+  if (score < out->score) {
+    return BROTLI_FALSE;
+  }
+  out->len = matchlen;
+  out->len_code_delta = (int)len - (int)matchlen;
+  out->distance = backward;
+  out->score = score;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void SearchInStaticDictionary(
+    const BrotliEncoderDictionary* dictionary,
+    HasherCommon* common, const uint8_t* data, size_t max_length,
+    size_t max_backward, size_t max_distance,
+    HasherSearchResult* out, BROTLI_BOOL shallow) {
+  size_t key;
+  size_t i;
+  if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
+    return;
+  }
+  key = Hash14(data) << 1;
+  for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
+    common->dict_num_lookups++;
+    if (dictionary->hash_table_lengths[key] != 0) {
+      BROTLI_BOOL item_matches = TestStaticDictionaryItem(
+          dictionary, dictionary->hash_table_lengths[key],
+          dictionary->hash_table_words[key], data,
+          max_length, max_backward, max_distance, out);
+      if (item_matches) {
+        common->dict_num_matches++;
+      }
+    }
+  }
+}
+
+typedef struct BackwardMatch {
+  uint32_t distance;
+  uint32_t length_and_code;
+} BackwardMatch;
+
+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code = (uint32_t)(len << 5);
+}
+
+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len, size_t len_code) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code =
+      (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
+}
+
+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
+  return self->length_and_code >> 5;
+}
+
+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
+  size_t code = self->length_and_code & 31;
+  return code ? code : BackwardMatchLength(self);
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+
+#define HASHER() H10
+#define BUCKET_BITS 17
+#define MAX_TREE_SEARCH_DEPTH 64
+#define MAX_TREE_COMP_LENGTH 128
+#include "hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
+#undef MAX_TREE_SEARCH_DEPTH
+#undef MAX_TREE_COMP_LENGTH
+#undef BUCKET_BITS
+#undef HASHER
+/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
+#define MAX_NUM_MATCHES_H10 128
+
+/* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
+   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+   and HTML inputs. */
+
+#define HASHER() H2
+#define BUCKET_BITS 16
+#define BUCKET_SWEEP_BITS 0
+#define HASH_LEN 5
+#define USE_DICTIONARY 1
+#include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef BUCKET_SWEEP_BITS
+#undef USE_DICTIONARY
+#undef HASHER
+
+#define HASHER() H3
+#define BUCKET_SWEEP_BITS 1
+#define USE_DICTIONARY 0
+#include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H4
+#define BUCKET_BITS 17
+#define BUCKET_SWEEP_BITS 2
+#define USE_DICTIONARY 1
+#include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H5
+#include "hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define HASHER() H6
+#include "hash_longest_match64_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define BUCKET_BITS 15
+
+#define NUM_LAST_DISTANCES_TO_CHECK 4
+#define NUM_BANKS 1
+#define BANK_BITS 16
+#define HASHER() H40
+#include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+
+#define NUM_LAST_DISTANCES_TO_CHECK 10
+#define HASHER() H41
+#include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#define NUM_LAST_DISTANCES_TO_CHECK 16
+#define NUM_BANKS 512
+#define BANK_BITS 9
+#define HASHER() H42
+#include "hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#undef BUCKET_BITS
+
+#define HASHER() H54
+#define BUCKET_BITS 20
+#define BUCKET_SWEEP_BITS 2
+#define HASH_LEN 7
+#define USE_DICTIONARY 0
+#include "hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+/* fast large window hashers */
+
+#define HASHER() HROLLING_FAST
+#define CHUNKLEN 32
+#define JUMP 4
+#define NUMBUCKETS 16777216
+#define MASK ((NUMBUCKETS * 64) - 1)
+#include "hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef JUMP
+#undef HASHER
+
+
+#define HASHER() HROLLING
+#define JUMP 1
+#include "hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef MASK
+#undef NUMBUCKETS
+#undef JUMP
+#undef CHUNKLEN
+#undef HASHER
+
+#define HASHER() H35
+#define HASHER_A H3
+#define HASHER_B HROLLING_FAST
+#include "hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H55
+#define HASHER_A H54
+#define HASHER_B HROLLING_FAST
+#include "hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H65
+#define HASHER_A H6
+#define HASHER_B HROLLING
+#include "hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+#define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
+#define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
+#define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
+#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
+
+typedef struct {
+  HasherCommon common;
+
+  union {
+#define MEMBER_(N) \
+    H ## N _H ## N;
+    FOR_ALL_HASHERS(MEMBER_)
+#undef MEMBER_
+  } privat;
+} Hasher;
+
+/* MUST be invoked before any other method. */
+static BROTLI_INLINE void HasherInit(Hasher* hasher) {
+  hasher->common.is_setup_ = BROTLI_FALSE;
+  hasher->common.extra[0] = NULL;
+  hasher->common.extra[1] = NULL;
+  hasher->common.extra[2] = NULL;
+  hasher->common.extra[3] = NULL;
+}
+
+static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
+  if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
+  if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
+  if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
+  if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
+}
+
+static BROTLI_INLINE void HasherReset(Hasher* hasher) {
+  hasher->common.is_prepared_ = BROTLI_FALSE;
+}
+
+static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
+    BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
+  switch (params->hasher.type) {
+#define SIZE_(N)                                                           \
+    case N:                                                                \
+      HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
+      break;
+    FOR_ALL_HASHERS(SIZE_)
+#undef SIZE_
+    default:
+      break;
+  }
+}
+
+static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
+    BrotliEncoderParams* params, const uint8_t* data, size_t position,
+    size_t input_size, BROTLI_BOOL is_last) {
+  BROTLI_BOOL one_shot = (position == 0 && is_last);
+  if (!hasher->common.is_setup_) {
+    size_t alloc_size[4] = {0};
+    size_t i;
+    ChooseHasher(params, &params->hasher);
+    hasher->common.params = params->hasher;
+    hasher->common.dict_num_lookups = 0;
+    hasher->common.dict_num_matches = 0;
+    HasherSize(params, one_shot, input_size, alloc_size);
+    for (i = 0; i < 4; ++i) {
+      if (alloc_size[i] == 0) continue;
+      hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
+    }
+    switch (hasher->common.params.type) {
+#define INITIALIZE_(N)                        \
+      case N:                                 \
+        InitializeH ## N(&hasher->common,     \
+            &hasher->privat._H ## N, params); \
+        break;
+      FOR_ALL_HASHERS(INITIALIZE_);
+#undef INITIALIZE_
+      default:
+        break;
+    }
+    HasherReset(hasher);
+    hasher->common.is_setup_ = BROTLI_TRUE;
+  }
+
+  if (!hasher->common.is_prepared_) {
+    switch (hasher->common.params.type) {
+#define PREPARE_(N)                      \
+      case N:                            \
+        PrepareH ## N(                   \
+            &hasher->privat._H ## N,     \
+            one_shot, input_size, data); \
+        break;
+      FOR_ALL_HASHERS(PREPARE_)
+#undef PREPARE_
+      default: break;
+    }
+    hasher->common.is_prepared_ = BROTLI_TRUE;
+  }
+}
+
+static BROTLI_INLINE void InitOrStitchToPreviousBlock(
+    MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
+    BrotliEncoderParams* params, size_t position, size_t input_size,
+    BROTLI_BOOL is_last) {
+  HasherSetup(m, hasher, params, data, position, input_size, is_last);
+  if (BROTLI_IS_OOM(m)) return;
+  switch (hasher->common.params.type) {
+#define INIT_(N)                             \
+    case N:                                  \
+      StitchToPreviousBlockH ## N(           \
+          &hasher->privat._H ## N,           \
+          input_size, position, data, mask); \
+    break;
+    FOR_ALL_HASHERS(INIT_)
+#undef INIT_
+    default: break;
+  }
+}
+
+/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
+       to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
+static BROTLI_INLINE void FindCompoundDictionaryMatch(
+    const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t distance_offset,
+    const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
+  const uint32_t source_size = self->source_size;
+  const size_t boundary = distance_offset - source_size;
+  const uint32_t hash_bits = self->hash_bits;
+  const uint32_t bucket_bits = self->bucket_bits;
+  const uint32_t slot_bits = self->slot_bits;
+
+  const uint32_t hash_shift = 64u - bucket_bits;
+  const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
+  const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+
+  const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
+  const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
+  const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
+  const uint8_t* source = NULL;
+
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  const uint64_t h =
+      (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
+      kPreparedDictionaryHashMul64Long;
+  const uint32_t key = (uint32_t)(h >> hash_shift);
+  const uint32_t slot = key & slot_mask;
+  const uint32_t head = heads[key];
+  const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
+  uint32_t item = (head == 0xFFFF) ? 1 : 0;
+
+  const void* tail = (void*)&items[self->num_items];
+  if (self->magic == kPreparedDictionaryMagic) {
+    source = (const uint8_t*)tail;
+  } else {
+    /* kLeanPreparedDictionaryMagic */
+    source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+  }
+
+  for (i = 0; i < 4; ++i) {
+    const size_t distance = (size_t)distance_cache[i];
+    size_t offset;
+    size_t limit;
+    size_t len;
+    if (distance <= boundary || distance > distance_offset) continue;
+    offset = distance_offset - distance;
+    limit = source_size - offset;
+    limit = limit > max_length ? max_length : limit;
+    len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
+                                   limit);
+    if (len >= 2) {
+      score_t score = BackwardReferenceScoreUsingLastDistance(len);
+      if (best_score < score) {
+        if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+        if (best_score < score) {
+          best_score = score;
+          if (len > best_len) best_len = len;
+          out->len = len;
+          out->len_code_delta = 0;
+          out->distance = distance;
+          out->score = best_score;
+        }
+      }
+    }
+  }
+  while (item == 0) {
+    size_t offset;
+    size_t distance;
+    size_t limit;
+    item = *chain;
+    chain++;
+    offset = item & 0x7FFFFFFF;
+    item &= 0x80000000;
+    distance = distance_offset - offset;
+    limit = source_size - offset;
+    limit = (limit > max_length) ? max_length : limit;
+    if (distance > max_distance) continue;
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        best_len >= limit ||
+        data[cur_ix_masked + best_len] != source[offset + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&source[offset],
+                                                  &data[cur_ix_masked],
+                                                  limit);
+      if (len >= 4) {
+        score_t score = BackwardReferenceScore(len, distance);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          out->len = best_len;
+          out->len_code_delta = 0;
+          out->distance = distance;
+          out->score = best_score;
+        }
+      }
+    }
+  }
+}
+
+/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
+       to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
+static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
+    const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
+    const size_t max_length, const size_t distance_offset,
+    const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
+  const uint32_t source_size = self->source_size;
+  const uint32_t hash_bits = self->hash_bits;
+  const uint32_t bucket_bits = self->bucket_bits;
+  const uint32_t slot_bits = self->slot_bits;
+
+  const uint32_t hash_shift = 64u - bucket_bits;
+  const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
+  const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+
+  const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
+  const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
+  const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
+  const uint8_t* source = NULL;
+
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t best_len = min_length;
+  const uint64_t h =
+      (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
+      kPreparedDictionaryHashMul64Long;
+  const uint32_t key = (uint32_t)(h >> hash_shift);
+  const uint32_t slot = key & slot_mask;
+  const uint32_t head = heads[key];
+  const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
+  uint32_t item = (head == 0xFFFF) ? 1 : 0;
+  size_t found = 0;
+
+  const void* tail = (void*)&items[self->num_items];
+  if (self->magic == kPreparedDictionaryMagic) {
+    source = (const uint8_t*)tail;
+  } else {
+    /* kLeanPreparedDictionaryMagic */
+    source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+  }
+
+  while (item == 0) {
+    size_t offset;
+    size_t distance;
+    size_t limit;
+    size_t len;
+    item = *chain;
+    chain++;
+    offset = item & 0x7FFFFFFF;
+    item &= 0x80000000;
+    distance = distance_offset - offset;
+    limit = source_size - offset;
+    limit = (limit > max_length) ? max_length : limit;
+    if (distance > max_distance) continue;
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        best_len >= limit ||
+        data[cur_ix_masked + best_len] != source[offset + best_len]) {
+      continue;
+    }
+    len = FindMatchLengthWithLimit(
+        &source[offset], &data[cur_ix_masked], limit);
+    if (len > best_len) {
+      best_len = len;
+      InitBackwardMatch(matches++, distance, len);
+      found++;
+      if (found == match_limit) break;
+    }
+  }
+  return found;
+}
+
+static BROTLI_INLINE void LookupCompoundDictionaryMatch(
+    const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length,
+    const size_t max_ring_buffer_distance, const size_t max_distance,
+    HasherSearchResult* sr) {
+  size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
+  size_t d;
+  for (d = 0; d < addon->num_chunks; ++d) {
+    /* Only one prepared dictionary type is currently supported. */
+    FindCompoundDictionaryMatch(
+        (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
+        distance_cache, cur_ix, max_length,
+        base_offset - addon->chunk_offsets[d], max_distance, sr);
+  }
+}
+
+static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
+    const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
+    const size_t max_length, const size_t max_ring_buffer_distance,
+    const size_t max_distance, BackwardMatch* matches,
+    size_t match_limit) {
+  size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
+  size_t d;
+  size_t total_found = 0;
+  for (d = 0; d < addon->num_chunks; ++d) {
+    /* Only one prepared dictionary type is currently supported. */
+    total_found += FindAllCompoundDictionaryMatches(
+        (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
+        cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
+        max_distance, matches + total_found, match_limit - total_found);
+    if (total_found == match_limit) break;
+    if (total_found > 0) {
+      min_length = BackwardMatchLength(&matches[total_found - 1]);
+    }
+  }
+  return total_found;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HASH_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_composite_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_composite_inc.h
new file mode 100644
index 0000000000..3923bc72c8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_composite_inc.h
@@ -0,0 +1,140 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, HASHER_A, HASHER_B */
+
+/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
+   and HASHER_B. */
+
+#define HashComposite HASHER()
+
+#define FN_A(X) EXPAND_CAT(X, HASHER_A)
+#define FN_B(X) EXPAND_CAT(X, HASHER_B)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) {
+  size_t a =  FN_A(HashTypeLength)();
+  size_t b =  FN_B(HashTypeLength)();
+  return a > b ? a : b;
+}
+
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  size_t a =  FN_A(StoreLookahead)();
+  size_t b =  FN_B(StoreLookahead)();
+  return a > b ? a : b;
+}
+
+typedef struct HashComposite {
+  HASHER_A ha;
+  HASHER_B hb;
+  HasherCommon ha_common;
+  HasherCommon hb_common;
+
+  /* Shortcuts. */
+  HasherCommon* common;
+
+  BROTLI_BOOL fresh;
+  const BrotliEncoderParams* params;
+} HashComposite;
+
+static void FN(Initialize)(HasherCommon* common,
+    HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
+  self->common = common;
+
+  self->ha_common = *self->common;
+  self->hb_common = *self->common;
+  self->fresh = BROTLI_TRUE;
+  self->params = params;
+  /* TODO(lode): Initialize of the hashers is deferred to Prepare (and params
+     remembered here) because we don't get the one_shot and input_size params
+     here that are needed to know the memory size of them. Instead provide
+     those params to all hashers FN(Initialize) */
+}
+
+static void FN(Prepare)(
+    HashComposite* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  if (self->fresh) {
+    self->fresh = BROTLI_FALSE;
+    self->ha_common.extra[0] = self->common->extra[0];
+    self->ha_common.extra[1] = self->common->extra[1];
+    self->ha_common.extra[2] = NULL;
+    self->ha_common.extra[3] = NULL;
+    self->hb_common.extra[0] = self->common->extra[2];
+    self->hb_common.extra[1] = self->common->extra[3];
+    self->hb_common.extra[2] = NULL;
+    self->hb_common.extra[3] = NULL;
+
+    FN_A(Initialize)(&self->ha_common, &self->ha, self->params);
+    FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
+  }
+  FN_A(Prepare)(&self->ha, one_shot, input_size, data);
+  FN_B(Prepare)(&self->hb, one_shot, input_size, data);
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  size_t alloc_size_a[4] = {0};
+  size_t alloc_size_b[4] = {0};
+  FN_A(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_a);
+  FN_B(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_b);
+  /* Should never happen. */
+  if (alloc_size_a[2] != 0 || alloc_size_a[3] != 0) exit(EXIT_FAILURE);
+  if (alloc_size_b[2] != 0 || alloc_size_b[3] != 0) exit(EXIT_FAILURE);
+  alloc_size[0] = alloc_size_a[0];
+  alloc_size[1] = alloc_size_a[1];
+  alloc_size[2] = alloc_size_b[0];
+  alloc_size[3] = alloc_size_b[1];
+}
+
+static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  FN_A(Store)(&self->ha, data, mask, ix);
+  FN_B(Store)(&self->hb, data, mask, ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashComposite* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  FN_A(StoreRange)(&self->ha, data, mask, ix_start, ix_end);
+  FN_B(StoreRange)(&self->hb, data, mask, ix_start, ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashComposite* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  FN_A(StitchToPreviousBlock)(&self->ha, num_bytes, position,
+      ringbuffer, ring_buffer_mask);
+  FN_B(StitchToPreviousBlock)(&self->hb, num_bytes, position,
+      ringbuffer, ring_buffer_mask);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashComposite* BROTLI_RESTRICT self, int* BROTLI_RESTRICT distance_cache) {
+  FN_A(PrepareDistanceCache)(&self->ha, distance_cache);
+  FN_B(PrepareDistanceCache)(&self->hb, distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashComposite* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  FN_A(FindLongestMatch)(&self->ha, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
+      max_distance, out);
+  FN_B(FindLongestMatch)(&self->hb, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
+      max_distance, out);
+}
+
+#undef HashComposite
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_forgetful_chain_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
new file mode 100644
index 0000000000..48e1cdcdf2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
@@ -0,0 +1,295 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
+                        NUM_LAST_DISTANCES_TO_CHECK */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   Hashes are stored in chains which are bucketed to groups. Group of chains
+   share a storage "bank". When more than "bank size" chain nodes are added,
+   oldest nodes are replaced; this way several chains may share a tail. */
+
+#define HashForgetfulChain HASHER()
+
+#define BANK_SIZE (1 << BANK_BITS)
+
+/* Number of hash buckets. */
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define CAPPED_CHAINS 0
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in.*/
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
+  const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct FN(Slot) {
+  uint16_t delta;
+  uint16_t next;
+} FN(Slot);
+
+typedef struct FN(Bank) {
+  FN(Slot) slots[BANK_SIZE];
+} FN(Bank);
+
+typedef struct HashForgetfulChain {
+  uint16_t free_slot_idx[NUM_BANKS];  /* Up to 1KiB. Move to dynamic? */
+  size_t max_hops;
+
+  /* Shortcuts. */
+  void* extra[2];
+  HasherCommon* common;
+
+  /* --- Dynamic size members --- */
+
+  /* uint32_t addr[BUCKET_SIZE]; */
+
+  /* uint16_t head[BUCKET_SIZE]; */
+
+  /* Truncated hash used for quick rejection of "distance cache" candidates. */
+  /* uint8_t tiny_hash[65536];*/
+
+  /* FN(Bank) banks[NUM_BANKS]; */
+} HashForgetfulChain;
+
+static uint32_t* FN(Addr)(void* extra) {
+  return (uint32_t*)extra;
+}
+
+static uint16_t* FN(Head)(void* extra) {
+  return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
+}
+
+static uint8_t* FN(TinyHash)(void* extra) {
+  return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
+}
+
+static FN(Bank)* FN(Banks)(void* extra) {
+  return (FN(Bank)*)(extra);
+}
+
+static void FN(Initialize)(
+    HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common = common;
+  self->extra[0] = common->extra[0];
+  self->extra[1] = common->extra[1];
+
+  self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
+}
+
+static void FN(Prepare)(
+    HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
+  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      size_t bucket = FN(HashBytes)(&data[i]);
+      /* See InitEmpty comment. */
+      addr[bucket] = 0xCCCCCCCC;
+      head[bucket] = 0xCCCC;
+    }
+  } else {
+    /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
+       processed by hasher never reaches 3GB + 64M; this makes all new chains
+       to be terminated after the first node. */
+    memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
+    memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
+  }
+  memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
+  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
+                  sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
+  alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
+}
+
+/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
+   node to corresponding chain; also update tiny_hash for current position. */
+static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
+  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
+  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
+  const size_t key = FN(HashBytes)(&data[ix & mask]);
+  const size_t bank = key & (NUM_BANKS - 1);
+  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
+  size_t delta = ix - addr[key];
+  tiny_hash[(uint16_t)ix] = (uint8_t)key;
+  if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
+  banks[bank].slots[idx].delta = (uint16_t)delta;
+  banks[bank].slots[idx].next = head[key];
+  addr[key] = (uint32_t)ix;
+  head[key] = (uint16_t)idx;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
+  uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
+  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  const uint8_t tiny_hash = (uint8_t)(key);
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (cur_ix - backward);
+    /* For distance code 0 we want to consider 2-byte matches. */
+    if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
+    if (prev_ix >= cur_ix || backward > max_backward) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 2) {
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const size_t bank = key & (NUM_BANKS - 1);
+    size_t backward = 0;
+    size_t hops = self->max_hops;
+    size_t delta = cur_ix - addr[key];
+    size_t slot = head[key];
+    while (hops--) {
+      size_t prev_ix;
+      size_t last = slot;
+      backward += delta;
+      if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
+      prev_ix = (cur_ix - backward) & ring_buffer_mask;
+      slot = banks[bank].slots[last].next;
+      delta = banks[bank].slots[last].delta;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    FN(Store)(self, data, ring_buffer_mask, cur_ix);
+  }
+  if (out->score == min_score) {
+    SearchInStaticDictionary(dictionary,
+        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef BANK_SIZE
+#undef BUCKET_SIZE
+#undef CAPPED_CHAINS
+
+#undef HashForgetfulChain
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match64_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match64_inc.h
new file mode 100644
index 0000000000..d02435e768
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match64_inc.h
@@ -0,0 +1,267 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
+                                            const uint64_t mask,
+                                            const int shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for selecting the next 4-8 bytes of input */
+  uint64_t hash_mask_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  int block_bits_;
+  int num_last_distances_to_check_;
+
+  /* Shortcuts. */
+  HasherCommon* common_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  uint16_t* num_;  /* uint16_t[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common_ = common;
+
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 64 - common->params.bucket_bits;
+  self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_bits_ = common->params.block_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+  self->num_last_distances_to_check_ =
+      common->params.num_last_distances_to_check;
+  self->num_ = (uint16_t*)common->extra[0];
+  self->buckets_ = (uint32_t*)common->extra[1];
+}
+
+static void FN(Prepare)(
+    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
+                                         self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  alloc_size[0] = sizeof(uint16_t) * bucket_size;
+  alloc_size[1] = sizeof(uint32_t) * bucket_size * block_size;
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
+                                     self->hash_shift_);
+  const size_t minor_ix = num[key] & self->block_mask_;
+  const size_t offset = minor_ix + (key << self->block_bits_);
+  ++num[key];
+  buckets[offset] = (uint32_t)ix;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key = FN(HashBytes)(
+        &data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+    const size_t down =
+        (num[key] > self->block_size_) ?
+        (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_inc.h
new file mode 100644
index 0000000000..788e9ef993
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_inc.h
@@ -0,0 +1,258 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static uint32_t FN(HashBytes)(
+    const uint8_t* BROTLI_RESTRICT data, const int shift) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  int block_bits_;
+  int num_last_distances_to_check_;
+
+  /* Shortcuts. */
+  HasherCommon* common_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  uint16_t* num_;  /* uint16_t[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common_ = common;
+
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 32 - common->params.bucket_bits;
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+  self->num_ = (uint16_t*)common->extra[0];
+  self->buckets_ = (uint32_t*)common->extra[1];
+  self->block_bits_ = common->params.block_bits;
+  self->num_last_distances_to_check_ =
+      common->params.num_last_distances_to_check;
+}
+
+static void FN(Prepare)(
+    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  alloc_size[0] = sizeof(uint16_t) * bucket_size;
+  alloc_size[1] = sizeof(uint32_t) * bucket_size * block_size;
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
+  const size_t minor_ix = self->num_[key] & self->block_mask_;
+  const size_t offset = minor_ix + (key << self->block_bits_);
+  self->buckets_[offset] = (uint32_t)ix;
+  ++self->num_[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key =
+        FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+    const size_t down =
+        (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
new file mode 100644
index 0000000000..54397ef891
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
@@ -0,0 +1,266 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
+                        USE_DICTIONARY
+ */
+
+#define HashLongestMatchQuickly HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+#define BUCKET_MASK (BUCKET_SIZE - 1)
+#define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
+#define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t* data) {
+  const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
+                      kHashMul64);
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> (64 - BUCKET_BITS));
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE). */
+typedef struct HashLongestMatchQuickly {
+  /* Shortcuts. */
+  HasherCommon* common;
+
+  /* --- Dynamic size members --- */
+
+  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
+} HashLongestMatchQuickly;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common = common;
+
+  BROTLI_UNUSED(params);
+  self->buckets_ = (uint32_t*)common->extra[0];
+}
+
+static void FN(Prepare)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i]);
+      if (BUCKET_SWEEP == 1) {
+        buckets[key] = 0;
+      } else {
+        uint32_t j;
+        for (j = 0; j < BUCKET_SWEEP; ++j) {
+          buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
+        }
+      }
+    }
+  } else {
+    /* It is not strictly necessary to fill this buffer here, but
+       not filling will make the results of the compression stochastic
+       (but correct). This is because random data would cause the
+       system to find accidentally good backward references here and there. */
+    memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
+  }
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE;
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  if (BUCKET_SWEEP == 1) {
+    self->buckets_[key] = (uint32_t)ix;
+  } else {
+    /* Wiggle the value with the bucket sweep range. */
+    const uint32_t off = ix & BUCKET_SWEEP_MASK;
+    self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
+  }
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(distance_cache);
+}
+
+/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t best_len_in = out->len;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  int compare_char = data[cur_ix_masked + best_len_in];
+  size_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  size_t key_out;
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = best_len_in;
+  size_t cached_backward = (size_t)distance_cache[0];
+  size_t prev_ix = cur_ix - cached_backward;
+  out->len_code_delta = 0;
+  if (prev_ix < cur_ix) {
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char == data[prev_ix + best_len]) {
+      const size_t len = FindMatchLengthWithLimit(
+          &data[prev_ix], &data[cur_ix_masked], max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          out->len = len;
+          out->distance = cached_backward;
+          out->score = score;
+          if (BUCKET_SWEEP == 1) {
+            buckets[key] = (uint32_t)cur_ix;
+            return;
+          } else {
+            best_len = len;
+            best_score = score;
+            compare_char = data[cur_ix_masked + len];
+          }
+        }
+      }
+    }
+  }
+  if (BUCKET_SWEEP == 1) {
+    size_t backward;
+    size_t len;
+    /* Only one to look for, don't bother to prepare for a loop. */
+    prev_ix = buckets[key];
+    buckets[key] = (uint32_t)cur_ix;
+    backward = cur_ix - prev_ix;
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char != data[prev_ix + best_len_in]) {
+      return;
+    }
+    if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+      return;
+    }
+    len = FindMatchLengthWithLimit(&data[prev_ix],
+                                   &data[cur_ix_masked],
+                                   max_length);
+    if (len >= 4) {
+      const score_t score = BackwardReferenceScore(len, backward);
+      if (best_score < score) {
+        out->len = len;
+        out->distance = backward;
+        out->score = score;
+        return;
+      }
+    }
+  } else {
+    size_t keys[BUCKET_SWEEP];
+    size_t i;
+    for (i = 0; i < BUCKET_SWEEP; ++i) {
+      keys[i] = (key + (i << 3)) & BUCKET_MASK;
+    }
+    key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
+    for (i = 0; i < BUCKET_SWEEP; ++i) {
+      size_t len;
+      size_t backward;
+      prev_ix = buckets[keys[i]];
+      backward = cur_ix - prev_ix;
+      prev_ix &= (uint32_t)ring_buffer_mask;
+      if (compare_char != data[prev_ix + best_len]) {
+        continue;
+      }
+      if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        continue;
+      }
+      len = FindMatchLengthWithLimit(&data[prev_ix],
+                                     &data[cur_ix_masked],
+                                     max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_len = len;
+          out->len = len;
+          compare_char = data[cur_ix_masked + len];
+          best_score = score;
+          out->score = score;
+          out->distance = backward;
+        }
+      }
+    }
+  }
+  if (USE_DICTIONARY && min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_TRUE);
+  }
+  if (BUCKET_SWEEP != 1) {
+    buckets[key_out] = (uint32_t)cur_ix;
+  }
+}
+
+#undef BUCKET_SWEEP_MASK
+#undef BUCKET_SWEEP
+#undef BUCKET_MASK
+#undef BUCKET_SIZE
+
+#undef HashLongestMatchQuickly
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_rolling_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_rolling_inc.h
new file mode 100644
index 0000000000..4c7a6b199a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_rolling_inc.h
@@ -0,0 +1,212 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
+/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
+/* JUMP = skip bytes for speedup */
+
+/* Rolling hash for long distance long string matches. Stores one position
+   per bucket, bucket key is computed over a long region. */
+
+#define HashRolling HASHER()
+
+static const uint32_t FN(kRollingHashMul32) = 69069;
+static const uint32_t FN(kInvalidPos) = 0xffffffff;
+
+/* This hasher uses a longer forward length, but returning a higher value here
+   will hurt compression by the main hasher when combined with a composite
+   hasher. The hasher tests for forward itself instead. */
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* Computes a code from a single byte. A lookup table of 256 values could be
+   used, but simply adding 1 works about as good. */
+static uint32_t FN(HashByte)(uint8_t byte) {
+  return (uint32_t)byte + 1u;
+}
+
+static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
+                                               uint32_t factor) {
+  return (uint32_t)(factor * state + FN(HashByte)(add));
+}
+
+static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
+                                        uint8_t rem, uint32_t factor,
+                                        uint32_t factor_remove) {
+  return (uint32_t)(factor * state +
+      FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
+}
+
+typedef struct HashRolling {
+  uint32_t state;
+  uint32_t* table;
+  size_t next_ix;
+
+  uint32_t chunk_len;
+  uint32_t factor;
+  uint32_t factor_remove;
+} HashRolling;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  size_t i;
+  self->state = 0;
+  self->next_ix = 0;
+
+  self->factor = FN(kRollingHashMul32);
+
+  /* Compute the factor of the oldest byte to remove: factor**steps modulo
+     0xffffffff (the multiplications rely on 32-bit overflow) */
+  self->factor_remove = 1;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->factor_remove *= self->factor;
+  }
+
+  self->table = (uint32_t*)common->extra[0];
+  for (i = 0; i < NUMBUCKETS; i++) {
+    self->table[i] = FN(kInvalidPos);
+  }
+
+  BROTLI_UNUSED(params);
+}
+
+static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  size_t i;
+  /* Too small size, cannot use this hasher. */
+  if (input_size < CHUNKLEN) return;
+  self->state = 0;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->state = FN(HashRollingFunctionInitial)(
+        self->state, data[i], self->factor);
+  }
+  BROTLI_UNUSED(one_shot);
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  alloc_size[0] = NUMBUCKETS * sizeof(uint32_t);
+}
+
+static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix_start);
+  BROTLI_UNUSED(ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashRolling* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  /* In this case we must re-initialize the hasher from scratch from the
+     current position. */
+  size_t position_masked;
+  size_t available = num_bytes;
+  if ((position & (JUMP - 1)) != 0) {
+    size_t diff = JUMP - (position & (JUMP - 1));
+    available = (diff > available) ? 0 : (available - diff);
+    position += diff;
+  }
+  position_masked = position & ring_buffer_mask;
+  /* wrapping around ringbuffer not handled. */
+  if (available > ring_buffer_mask - position_masked) {
+    available = ring_buffer_mask - position_masked;
+  }
+
+  FN(Prepare)(self, BROTLI_FALSE, available,
+      ringbuffer + (position & ring_buffer_mask));
+  self->next_ix = position;
+  BROTLI_UNUSED(num_bytes);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashRolling* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashRolling* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t pos;
+
+  if ((cur_ix & (JUMP - 1)) != 0) return;
+
+  /* Not enough lookahead */
+  if (max_length < CHUNKLEN) return;
+
+  for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
+    uint32_t code = self->state & MASK;
+
+    uint8_t rem = data[pos & ring_buffer_mask];
+    uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
+    size_t found_ix = FN(kInvalidPos);
+
+    self->state = FN(HashRollingFunction)(
+        self->state, add, rem, self->factor, self->factor_remove);
+
+    if (code < NUMBUCKETS) {
+      found_ix = self->table[code];
+      self->table[code] = (uint32_t)pos;
+      if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
+        /* The cast to 32-bit makes backward distances up to 4GB work even
+           if cur_ix is above 4GB, despite using 32-bit values in the table. */
+        size_t backward = (uint32_t)(cur_ix - found_ix);
+        if (backward <= max_backward) {
+          const size_t found_ix_masked = found_ix & ring_buffer_mask;
+          const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
+                                                      &data[cur_ix_masked],
+                                                      max_length);
+          if (len >= 4 && len > out->len) {
+            score_t score = BackwardReferenceScore(len, backward);
+            if (score > out->score) {
+              out->len = len;
+              out->distance = backward;
+              out->score = score;
+              out->len_code_delta = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  self->next_ix = cur_ix + JUMP;
+
+  /* NOTE: this hasher does not search in the dictionary. It is used as
+     backup-hasher, the main hasher already searches in it. */
+  BROTLI_UNUSED(dictionary);
+  BROTLI_UNUSED(distance_cache);
+  BROTLI_UNUSED(dictionary_distance);
+  BROTLI_UNUSED(max_distance);
+}
+
+#undef HashRolling
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_to_binary_tree_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
new file mode 100644
index 0000000000..a639d2d4b0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
@@ -0,0 +1,330 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
+                        MAX_TREE_SEARCH_DEPTH */
+
+/* A (forgetful) hash table where each hash bucket contains a binary tree of
+   sequences whose first 4 bytes share the same hash code.
+   Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
+   position in the input data. The binary tree is sorted by the lexicographic
+   order of the sequences, and it is also a max-heap with respect to the
+   starting positions. */
+
+#define HashToBinaryTree HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  return MAX_TREE_COMP_LENGTH;
+}
+
+static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct HashToBinaryTree {
+  /* The window size minus 1 */
+  size_t window_mask_;
+
+  /* Hash table that maps the 4-byte hashes of the sequence to the last
+     position where this hash was found, which is the root of the binary
+     tree of sequences that share this hash bucket. */
+  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
+
+  /* A position used to mark a non-existent sequence, i.e. a tree is empty if
+     its root is at invalid_pos_ and a node is a leaf if both its children
+     are at invalid_pos_. */
+  uint32_t invalid_pos_;
+
+  /* --- Dynamic size members --- */
+
+  /* The union of the binary trees of each hash bucket. The root of the tree
+     corresponding to a hash is a sequence starting at buckets_[hash] and
+     the left and right children of a sequence starting at pos are
+     forest_[2 * pos] and forest_[2 * pos + 1]. */
+  uint32_t* forest_;  /* uint32_t[2 * num_nodes] */
+} HashToBinaryTree;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->buckets_ = (uint32_t*)common->extra[0];
+  self->forest_ = (uint32_t*)common->extra[1];
+
+  self->window_mask_ = (1u << params->lgwin) - 1u;
+  self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
+}
+
+static void FN(Prepare)
+    (HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t invalid_pos = self->invalid_pos_;
+  uint32_t i;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  for (i = 0; i < BUCKET_SIZE; i++) {
+    buckets[i] = invalid_pos;
+  }
+}
+
+static BROTLI_INLINE void FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size, size_t* alloc_size) {
+  size_t num_nodes = (size_t)1 << params->lgwin;
+  if (one_shot && input_size < num_nodes) {
+    num_nodes = input_size;
+  }
+  alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE;
+  alloc_size[1] = 2 * sizeof(uint32_t) * num_nodes;
+}
+
+static BROTLI_INLINE size_t FN(LeftChildIndex)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_);
+}
+
+static BROTLI_INLINE size_t FN(RightChildIndex)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_) + 1;
+}
+
+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
+   hash bucket's binary tree is searched for matches and is re-rooted at the
+   current position.
+
+   If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
+   current position is searched for matches, but the state of the hash table
+   is not changed, since we can not know the final sorting order of the
+   current (incomplete) sequence.
+
+   This function must be called with increasing cur_ix positions. */
+static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
+    HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
+    const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
+    BackwardMatch* BROTLI_RESTRICT matches) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const size_t max_comp_len =
+      BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
+  const BROTLI_BOOL should_reroot_tree =
+      TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
+  const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  uint32_t* BROTLI_RESTRICT forest = self->forest_;
+  size_t prev_ix = buckets[key];
+  /* The forest index of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_left = FN(LeftChildIndex)(self, cur_ix);
+  /* The forest index of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_right = FN(RightChildIndex)(self, cur_ix);
+  /* The match length of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_left = 0;
+  /* The match length of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_right = 0;
+  size_t depth_remaining;
+  if (should_reroot_tree) {
+    buckets[key] = (uint32_t)cur_ix;
+  }
+  for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
+    const size_t backward = cur_ix - prev_ix;
+    const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+    if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+      if (should_reroot_tree) {
+        forest[node_left] = self->invalid_pos_;
+        forest[node_right] = self->invalid_pos_;
+      }
+      break;
+    }
+    {
+      const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
+      size_t len;
+      BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
+      len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      BROTLI_DCHECK(
+          0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
+      if (matches && len > *best_len) {
+        *best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+      if (len >= max_comp_len) {
+        if (should_reroot_tree) {
+          forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
+          forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
+        }
+        break;
+      }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (should_reroot_tree) {
+          forest[node_left] = (uint32_t)prev_ix;
+        }
+        node_left = FN(RightChildIndex)(self, prev_ix);
+        prev_ix = forest[node_left];
+      } else {
+        best_len_right = len;
+        if (should_reroot_tree) {
+          forest[node_right] = (uint32_t)prev_ix;
+        }
+        node_right = FN(LeftChildIndex)(self, prev_ix);
+        prev_ix = forest[node_right];
+      }
+    }
+  }
+  return matches;
+}
+
+/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+   length of max_length and stores the position cur_ix in the hash table.
+
+   Sets *num_matches to the number of matches found, and stores the found
+   matches in matches[0] to matches[*num_matches - 1]. The matches will be
+   sorted by strictly increasing length and (non-strictly) increasing
+   distance. */
+static BROTLI_INLINE size_t FN(FindAllMatches)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const BrotliEncoderParams* params,
+    BackwardMatch* matches) {
+  BackwardMatch* const orig_matches = matches;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t best_len = 1;
+  const size_t short_match_max_backward =
+      params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
+  size_t stop = cur_ix - short_match_max_backward;
+  uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+  size_t i;
+  if (cur_ix < short_match_max_backward) { stop = 0; }
+  for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t prev_ix = i;
+    const size_t backward = cur_ix - prev_ix;
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      break;
+    }
+    prev_ix &= ring_buffer_mask;
+    if (data[cur_ix_masked] != data[prev_ix] ||
+        data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+      continue;
+    }
+    {
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+    }
+  }
+  if (best_len < max_length) {
+    matches = FN(StoreAndFindMatches)(self, data, cur_ix,
+        ring_buffer_mask, max_length, max_backward, &best_len, matches);
+  }
+  for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
+    dict_matches[i] = kInvalidMatch;
+  }
+  {
+    size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
+    if (BrotliFindAllStaticDictionaryMatches(dictionary,
+        &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
+      size_t maxlen = BROTLI_MIN(
+          size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
+      size_t l;
+      for (l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          size_t distance = dictionary_distance + (dict_id >> 5) + 1;
+          if (distance <= params->dist.max_distance) {
+            InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
+          }
+        }
+      }
+    }
+  }
+  return (size_t)(matches - orig_matches);
+}
+
+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
+   current sequence, without returning any matches.
+   REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
+static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  /* Maximum distance is window size - 16, see section 9.1. of the spec. */
+  const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
+  FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
+      max_backward, NULL, NULL);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i = ix_start;
+  size_t j = ix_start;
+  if (ix_start + 63 <= ix_end) {
+    i = ix_end - 63;
+  }
+  if (ix_start + 512 <= i) {
+    for (; j < i; j += 8) {
+      FN(Store)(self, data, mask, j);
+    }
+  }
+  for (; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 &&
+      position >= MAX_TREE_COMP_LENGTH) {
+    /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
+    const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
+    size_t i;
+    for (i = i_start; i < i_end; ++i) {
+      /* Maximum distance is window size - 16, see section 9.1. of the spec.
+         Furthermore, we have to make sure that we don't look further back
+         from the start of the next block than the window size, otherwise we
+         could access already overwritten areas of the ring-buffer. */
+      const size_t max_backward =
+          self->window_mask_ - BROTLI_MAX(size_t,
+                                          BROTLI_WINDOW_GAP - 1,
+                                          position - i);
+      /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
+         end of the current block and that we have at least
+         MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
+      FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
+          MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
+    }
+  }
+}
+
+#undef BUCKET_SIZE
+
+#undef HashToBinaryTree
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.c
new file mode 100644
index 0000000000..4dbb87f907
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.c
@@ -0,0 +1,100 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Build per-context histograms of literals, commands and distance codes. */
+
+#include "histogram.h"
+
+#include "../common/context.h"
+#include "block_splitter.h"
+#include "command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplitIterator {
+  const BlockSplit* split_;  /* Not owned. */
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+} BlockSplitIterator;
+
+static void InitBlockSplitIterator(BlockSplitIterator* self,
+    const BlockSplit* split) {
+  self->split_ = split;
+  self->idx_ = 0;
+  self->type_ = 0;
+  self->length_ = split->lengths ? split->lengths[0] : 0;
+}
+
+static void BlockSplitIteratorNext(BlockSplitIterator* self) {
+  if (self->length_ == 0) {
+    ++self->idx_;
+    self->type_ = self->split_->types[self->idx_];
+    self->length_ = self->split_->lengths[self->idx_];
+  }
+  --self->length_;
+}
+
+void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms) {
+  size_t pos = start_pos;
+  BlockSplitIterator literal_it;
+  BlockSplitIterator insert_and_copy_it;
+  BlockSplitIterator dist_it;
+  size_t i;
+
+  InitBlockSplitIterator(&literal_it, literal_split);
+  InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
+  InitBlockSplitIterator(&dist_it, dist_split);
+  for (i = 0; i < num_commands; ++i) {
+    const Command* cmd = &cmds[i];
+    size_t j;
+    BlockSplitIteratorNext(&insert_and_copy_it);
+    HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
+        cmd->cmd_prefix_);
+    /* TODO(eustas): unwrap iterator blocks. */
+    for (j = cmd->insert_len_; j != 0; --j) {
+      size_t context;
+      BlockSplitIteratorNext(&literal_it);
+      context = literal_it.type_;
+      if (context_modes) {
+        ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
+        context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
+            BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
+      }
+      HistogramAddLiteral(&literal_histograms[context],
+          ringbuffer[pos & mask]);
+      prev_byte2 = prev_byte;
+      prev_byte = ringbuffer[pos & mask];
+      ++pos;
+    }
+    pos += CommandCopyLen(cmd);
+    if (CommandCopyLen(cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd->cmd_prefix_ >= 128) {
+        size_t context;
+        BlockSplitIteratorNext(&dist_it);
+        context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
+            CommandDistanceContext(cmd);
+        HistogramAddDistance(&copy_dist_histograms[context],
+            cmd->dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.h
new file mode 100644
index 0000000000..d1abd973c1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram.h
@@ -0,0 +1,64 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Models the histograms of literals, commands and distance codes. */
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <string.h>  /* memset */
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "block_splitter.h"
+#include "command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
+
+#define FN(X) X ## Literal
+#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
+#define DataType uint8_t
+#include "histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
+#include "histogram_inc.h"  /* NOLINT(build/include) */
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Distance
+#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
+#include "histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HISTOGRAM_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram_inc.h
new file mode 100644
index 0000000000..50eaf7468d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/histogram_inc.h
@@ -0,0 +1,51 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: Histogram, DATA_SIZE, DataType */
+
+/* A simple container for histograms of data in blocks. */
+
+typedef struct FN(Histogram) {
+  uint32_t data_[DATA_SIZE];
+  size_t total_count_;
+  double bit_cost_;
+} FN(Histogram);
+
+static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
+  memset(self->data_, 0, sizeof(self->data_));
+  self->total_count_ = 0;
+  self->bit_cost_ = HUGE_VAL;
+}
+
+static BROTLI_INLINE void FN(ClearHistograms)(
+    FN(Histogram)* array, size_t length) {
+  size_t i;
+  for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
+}
+
+static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
+  ++self->data_[val];
+  ++self->total_count_;
+}
+
+static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
+    const DataType* p, size_t n) {
+  self->total_count_ += n;
+  n += 1;
+  while (--n) ++self->data_[*p++];
+}
+
+static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
+    const FN(Histogram)* v) {
+  size_t i;
+  self->total_count_ += v->total_count_;
+  for (i = 0; i < DATA_SIZE; ++i) {
+    self->data_[i] += v->data_[i];
+  }
+}
+
+static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.c
new file mode 100644
index 0000000000..2ac847f3c9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.c
@@ -0,0 +1,180 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#include "literal_cost.h"
+
+#include <string.h>  /* memset */
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "fast_log.h"
+#include "utf8_util.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
+  if (c < 128) {
+    return 0;  /* Next one is the 'Byte 1' again. */
+  } else if (c >= 192) {  /* Next one is the 'Byte 2' of utf-8 encoding. */
+    return BROTLI_MIN(size_t, 1, clamp);
+  } else {
+    /* Let's decide over the last byte if this ends the sequence. */
+    if (last < 0xE0) {
+      return 0;  /* Completed two or three byte coding. */
+    } else {  /* Next one is the 'Byte 3' of utf-8 encoding. */
+      return BROTLI_MIN(size_t, 2, clamp);
+    }
+  }
+}
+
+static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
+                                        const uint8_t* data) {
+  size_t counts[3] = { 0 };
+  size_t max_utf8 = 1;  /* should be 2, but 1 compresses better. */
+  size_t last_c = 0;
+  size_t i;
+  for (i = 0; i < len; ++i) {
+    size_t c = data[(pos + i) & mask];
+    ++counts[UTF8Position(last_c, c, 2)];
+    last_c = c;
+  }
+  if (counts[2] < 500) {
+    max_utf8 = 1;
+  }
+  if (counts[1] + counts[2] < 25) {
+    max_utf8 = 0;
+  }
+  return max_utf8;
+}
+
+static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
+                                            const uint8_t* data,
+                                            size_t* histogram, float* cost) {
+  /* max_utf8 is 0 (normal ASCII single byte modeling),
+     1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
+  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
+  size_t window_half = 495;
+  size_t in_window = BROTLI_MIN(size_t, window_half, len);
+  size_t in_window_utf8[3] = { 0 };
+  size_t i;
+  memset(histogram, 0, 3 * 256 * sizeof(histogram[0]));
+
+  {  /* Bootstrap histograms. */
+    size_t last_c = 0;
+    size_t utf8_pos = 0;
+    for (i = 0; i < in_window; ++i) {
+      size_t c = data[(pos + i) & mask];
+      ++histogram[256 * utf8_pos + c];
+      ++in_window_utf8[utf8_pos];
+      utf8_pos = UTF8Position(last_c, c, max_utf8);
+      last_c = c;
+    }
+  }
+
+  /* Compute bit costs with sliding window. */
+  for (i = 0; i < len; ++i) {
+    if (i >= window_half) {
+      /* Remove a byte in the past. */
+      size_t c =
+          i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c =
+          i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      --histogram[256 * utf8_pos2 + data[(pos + i - window_half) & mask]];
+      --in_window_utf8[utf8_pos2];
+    }
+    if (i + window_half < len) {
+      /* Add a byte in the future. */
+      size_t c = data[(pos + i + window_half - 1) & mask];
+      size_t last_c = data[(pos + i + window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      ++histogram[256 * utf8_pos2 + data[(pos + i + window_half) & mask]];
+      ++in_window_utf8[utf8_pos2];
+    }
+    {
+      size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+      size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+      size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+      size_t masked_pos = (pos + i) & mask;
+      size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
+      double lit_cost;
+      if (histo == 0) {
+        histo = 1;
+      }
+      lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+      lit_cost += 0.02905;
+      if (lit_cost < 1.0) {
+        lit_cost *= 0.5;
+        lit_cost += 0.5;
+      }
+      /* Make the first bytes more expensive -- seems to help, not sure why.
+         Perhaps because the entropy source is changing its properties
+         rapidly in the beginning of the file, perhaps because the beginning
+         of the data is a statistical "anomaly". */
+      if (i < 2000) {
+        lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
+      }
+      cost[i] = (float)lit_cost;
+    }
+  }
+}
+
+void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                       const uint8_t* data,
+                                       size_t* histogram, float* cost) {
+  if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, histogram, cost);
+    return;
+  } else {
+    size_t window_half = 2000;
+    size_t in_window = BROTLI_MIN(size_t, window_half, len);
+    size_t i;
+    memset(histogram, 0, 256 * sizeof(histogram[0]));
+
+    /* Bootstrap histogram. */
+    for (i = 0; i < in_window; ++i) {
+      ++histogram[data[(pos + i) & mask]];
+    }
+
+    /* Compute bit costs with sliding window. */
+    for (i = 0; i < len; ++i) {
+      size_t histo;
+      if (i >= window_half) {
+        /* Remove a byte in the past. */
+        --histogram[data[(pos + i - window_half) & mask]];
+        --in_window;
+      }
+      if (i + window_half < len) {
+        /* Add a byte in the future. */
+        ++histogram[data[(pos + i + window_half) & mask]];
+        ++in_window;
+      }
+      histo = histogram[data[(pos + i) & mask]];
+      if (histo == 0) {
+        histo = 1;
+      }
+      {
+        double lit_cost = FastLog2(in_window) - FastLog2(histo);
+        lit_cost += 0.029;
+        if (lit_cost < 1.0) {
+          lit_cost *= 0.5;
+          lit_cost += 0.5;
+        }
+        cost[i] = (float)lit_cost;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.h
new file mode 100644
index 0000000000..284a8e5af7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/literal_cost.h
@@ -0,0 +1,32 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Estimates how many bits the literals in the interval [pos, pos + len) in the
+   ring-buffer (data, mask) will take entropy coded and writes these estimates
+   to the cost[0..len) array. */
+BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
+    size_t pos, size_t len, size_t mask, const uint8_t* data, size_t* histogram,
+    float* cost);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_LITERAL_COST_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.c
new file mode 100644
index 0000000000..51e1b7f18c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.c
@@ -0,0 +1,193 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "memory.h"
+
+#include <stdlib.h>  /* exit, free, malloc */
+#include <string.h>  /* memcpy */
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_PERM_ALLOCATED 128
+#define MAX_NEW_ALLOCATED 64
+#define MAX_NEW_FREED 64
+
+#define PERM_ALLOCATED_OFFSET 0
+#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
+#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
+
+void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque) {
+  if (!alloc_func) {
+    m->alloc_func = BrotliDefaultAllocFunc;
+    m->free_func = BrotliDefaultFreeFunc;
+    m->opaque = 0;
+  } else {
+    m->alloc_func = alloc_func;
+    m->free_func = free_func;
+    m->opaque = opaque;
+  }
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  m->is_oom = BROTLI_FALSE;
+  m->perm_allocated = 0;
+  m->new_allocated = 0;
+  m->new_freed = 0;
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) exit(EXIT_FAILURE);
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  m->free_func(m->opaque, p);
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  BROTLI_UNUSED(m);
+}
+
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+static void SortPointers(void** items, const size_t n) {
+  /* Shell sort. */
+  static const size_t gaps[] = {23, 10, 4, 1};
+  int g = 0;
+  for (; g < 4; ++g) {
+    size_t gap = gaps[g];
+    size_t i;
+    for (i = gap; i < n; ++i) {
+      size_t j = i;
+      void* tmp = items[i];
+      for (; j >= gap && tmp < items[j - gap]; j -= gap) {
+        items[j] = items[j - gap];
+      }
+      items[j] = tmp;
+    }
+  }
+}
+
+static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
+  size_t a_read_index = 0;
+  size_t b_read_index = 0;
+  size_t a_write_index = 0;
+  size_t b_write_index = 0;
+  size_t annihilated = 0;
+  while (a_read_index < a_len && b_read_index < b_len) {
+    if (a[a_read_index] == b[b_read_index]) {
+      a_read_index++;
+      b_read_index++;
+      annihilated++;
+    } else if (a[a_read_index] < b[b_read_index]) {
+      a[a_write_index++] = a[a_read_index++];
+    } else {
+      b[b_write_index++] = b[b_read_index++];
+    }
+  }
+  while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
+  while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
+  return annihilated;
+}
+
+static void CollectGarbagePointers(MemoryManager* m) {
+  size_t annihilated;
+  SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
+  SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  annihilated = Annihilate(
+      m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
+      m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  m->new_allocated -= annihilated;
+  m->new_freed -= annihilated;
+
+  if (m->new_freed != 0) {
+    annihilated = Annihilate(
+        m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
+        m->pointers + NEW_FREED_OFFSET, m->new_freed);
+    m->perm_allocated -= annihilated;
+    m->new_freed -= annihilated;
+    BROTLI_DCHECK(m->new_freed == 0);
+  }
+
+  if (m->new_allocated != 0) {
+    BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
+    memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
+           m->pointers + NEW_ALLOCATED_OFFSET,
+           sizeof(void*) * m->new_allocated);
+    m->perm_allocated += m->new_allocated;
+    m->new_allocated = 0;
+    SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
+  }
+}
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) {
+    m->is_oom = BROTLI_TRUE;
+    return NULL;
+  }
+  if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
+  m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  if (!p) return;
+  m->free_func(m->opaque, p);
+  if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
+  m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  size_t i;
+  CollectGarbagePointers(m);
+  /* Now all unfreed pointers are in perm-allocated list. */
+  for (i = 0; i < m->perm_allocated; ++i) {
+    m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
+  }
+  m->perm_allocated = 0;
+}
+
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+void* BrotliBootstrapAlloc(size_t size,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func && !free_func) {
+    return malloc(size);
+  } else if (alloc_func && free_func) {
+    return alloc_func(opaque, size);
+  }
+  return NULL;
+}
+
+void BrotliBootstrapFree(void* address, MemoryManager* m) {
+  if (!address) {
+    /* Should not happen! */
+    return;
+  } else {
+    /* Copy values, as those would be freed. */
+    brotli_free_func free_func = m->free_func;
+    void* opaque = m->opaque;
+    free_func(opaque, address);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.h
new file mode 100644
index 0000000000..cbe4e309a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/memory.h
@@ -0,0 +1,121 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for memory management. */
+
+#ifndef BROTLI_ENC_MEMORY_H_
+#define BROTLI_ENC_MEMORY_H_
+
+#include <string.h>  /* memcpy */
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
+    !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_ENCODER_EXIT_ON_OOM
+#endif
+
+typedef struct MemoryManager {
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* opaque;
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  BROTLI_BOOL is_oom;
+  size_t perm_allocated;
+  size_t new_allocated;
+  size_t new_freed;
+  void* pointers[256];
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+} MemoryManager;
+
+BROTLI_INTERNAL void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque);
+
+BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
+#define BROTLI_ALLOC(M, T, N)                               \
+  ((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
+
+BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
+#define BROTLI_FREE(M, P) { \
+  BrotliFree((M), (P));     \
+  P = NULL;                 \
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_OOM(M) (!!0)
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+/*
+BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
+The only purpose of it is to explain static analyzers the state of things.
+NB: use ONLY together with BROTLI_IS_OOM
+    AND ONLY for allocations in the current scope.
+ */
+#if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_NULL(A) ((A) == nullptr)
+#else  /* defined(__clang_analyzer__) */
+#define BROTLI_IS_NULL(A) (!!0)
+#endif  /* defined(__clang_analyzer__) */
+
+BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
+
+/*
+Dynamically grows array capacity to at least the requested size
+M: MemoryManager
+T: data type
+A: array
+C: capacity
+R: requested size
+*/
+#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) {                    \
+  if (C < (R)) {                                                   \
+    size_t _new_size = (C == 0) ? (R) : C;                         \
+    T* new_array;                                                  \
+    while (_new_size < (R)) _new_size *= 2;                        \
+    new_array = BROTLI_ALLOC((M), T, _new_size);                   \
+    if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
+      memcpy(new_array, A, C * sizeof(T));                         \
+    BROTLI_FREE((M), A);                                           \
+    A = new_array;                                                 \
+    C = _new_size;                                                 \
+  }                                                                \
+}
+
+/*
+Appends value and dynamically grows array capacity when needed
+M: MemoryManager
+T: data type
+A: array
+C: array capacity
+S: array size
+V: value to append
+*/
+#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
+  (S)++;                                                  \
+  BROTLI_ENSURE_CAPACITY(M, T, A, C, S);                  \
+  A[(S) - 1] = (V);                                       \
+}
+
+/* "Bootstrap" allocations are not tracked by memory manager; should be used
+   only to allocate MemoryManager itself (or structure containing it). */
+BROTLI_INTERNAL void* BrotliBootstrapAlloc(size_t size,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL void BrotliBootstrapFree(void* address, MemoryManager* m);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_MEMORY_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.c
new file mode 100644
index 0000000000..0c5c078d05
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.c
@@ -0,0 +1,677 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "metablock.h"
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "bit_cost.h"
+#include "block_splitter.h"
+#include "cluster.h"
+#include "entropy_encode.h"
+#include "histogram.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitDistanceParams(BrotliDistanceParams* dist_params,
+    uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window) {
+  uint32_t alphabet_size_max;
+  uint32_t alphabet_size_limit;
+  uint32_t max_distance;
+
+  dist_params->distance_postfix_bits = npostfix;
+  dist_params->num_direct_distance_codes = ndirect;
+
+  alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+      npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
+  alphabet_size_limit = alphabet_size_max;
+  max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
+      (1U << (npostfix + 2));
+
+  if (large_window) {
+    BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
+        BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
+    alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+        npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
+    alphabet_size_limit = limit.max_alphabet_size;
+    max_distance = limit.max_distance;
+  }
+
+  dist_params->alphabet_size_max = alphabet_size_max;
+  dist_params->alphabet_size_limit = alphabet_size_limit;
+  dist_params->max_distance = max_distance;
+}
+
+static void RecomputeDistancePrefixes(Command* cmds,
+                                      size_t num_commands,
+                                      const BrotliDistanceParams* orig_params,
+                                      const BrotliDistanceParams* new_params) {
+  size_t i;
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    return;
+  }
+
+  for (i = 0; i < num_commands; ++i) {
+    Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd, orig_params),
+                               new_params->num_direct_distance_codes,
+                               new_params->distance_postfix_bits,
+                               &cmd->dist_prefix_,
+                               &cmd->dist_extra_);
+    }
+  }
+}
+
+static BROTLI_BOOL ComputeDistanceCost(const Command* cmds,
+                                       size_t num_commands,
+                                       const BrotliDistanceParams* orig_params,
+                                       const BrotliDistanceParams* new_params,
+                                       double* cost,
+                                       HistogramDistance* tmp) {
+  size_t i;
+  BROTLI_BOOL equal_params = BROTLI_FALSE;
+  uint16_t dist_prefix;
+  uint32_t dist_extra;
+  double extra_bits = 0.0;
+  HistogramClearDistance(tmp);
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    equal_params = BROTLI_TRUE;
+  }
+
+  for (i = 0; i < num_commands; i++) {
+    const Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      if (equal_params) {
+        dist_prefix = cmd->dist_prefix_;
+      } else {
+        uint32_t distance = CommandRestoreDistanceCode(cmd, orig_params);
+        if (distance > new_params->max_distance) {
+          return BROTLI_FALSE;
+        }
+        PrefixEncodeCopyDistance(distance,
+                                 new_params->num_direct_distance_codes,
+                                 new_params->distance_postfix_bits,
+                                 &dist_prefix,
+                                 &dist_extra);
+      }
+      HistogramAddDistance(tmp, dist_prefix & 0x3FF);
+      extra_bits += dist_prefix >> 10;
+    }
+  }
+
+  *cost = BrotliPopulationCostDistance(tmp) + extra_bits;
+  return BROTLI_TRUE;
+}
+
+void BrotliBuildMetaBlock(MemoryManager* m,
+                          const uint8_t* ringbuffer,
+                          const size_t pos,
+                          const size_t mask,
+                          BrotliEncoderParams* params,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          Command* cmds,
+                          size_t num_commands,
+                          ContextType literal_context_mode,
+                          MetaBlockSplit* mb) {
+  /* Histogram ids need to fit in one byte. */
+  static const size_t kMaxNumberOfHistograms = 256;
+  HistogramDistance* distance_histograms;
+  HistogramLiteral* literal_histograms;
+  ContextType* literal_context_modes = NULL;
+  size_t literal_histograms_size;
+  size_t distance_histograms_size;
+  size_t i;
+  size_t literal_context_multiplier = 1;
+  uint32_t npostfix;
+  uint32_t ndirect_msb = 0;
+  BROTLI_BOOL check_orig = BROTLI_TRUE;
+  double best_dist_cost = 1e99;
+  BrotliDistanceParams orig_params = params->dist;
+  BrotliDistanceParams new_params = params->dist;
+  HistogramDistance* tmp = BROTLI_ALLOC(m, HistogramDistance, 1);
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return;
+
+  for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
+    for (; ndirect_msb < 16; ndirect_msb++) {
+      uint32_t ndirect = ndirect_msb << npostfix;
+      BROTLI_BOOL skip;
+      double dist_cost;
+      BrotliInitDistanceParams(&new_params, npostfix, ndirect,
+                               params->large_window);
+      if (npostfix == orig_params.distance_postfix_bits &&
+          ndirect == orig_params.num_direct_distance_codes) {
+        check_orig = BROTLI_FALSE;
+      }
+      skip = !ComputeDistanceCost(
+          cmds, num_commands, &orig_params, &new_params, &dist_cost, tmp);
+      if (skip || (dist_cost > best_dist_cost)) {
+        break;
+      }
+      best_dist_cost = dist_cost;
+      params->dist = new_params;
+    }
+    if (ndirect_msb > 0) ndirect_msb--;
+    ndirect_msb /= 2;
+  }
+  if (check_orig) {
+    double dist_cost;
+    ComputeDistanceCost(cmds, num_commands, &orig_params, &orig_params,
+                        &dist_cost, tmp);
+    if (dist_cost < best_dist_cost) {
+      /* NB: currently unused; uncomment when more param tuning is added. */
+      /* best_dist_cost = dist_cost; */
+      params->dist = orig_params;
+    }
+  }
+  BROTLI_FREE(m, tmp);
+  RecomputeDistancePrefixes(cmds, num_commands, &orig_params, &params->dist);
+
+  BrotliSplitBlock(m, cmds, num_commands,
+                   ringbuffer, pos, mask, params,
+                   &mb->literal_split,
+                   &mb->command_split,
+                   &mb->distance_split);
+  if (BROTLI_IS_OOM(m)) return;
+
+  if (!params->disable_literal_context_modeling) {
+    literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
+    literal_context_modes =
+        BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_context_modes)) return;
+    for (i = 0; i < mb->literal_split.num_types; ++i) {
+      literal_context_modes[i] = literal_context_mode;
+    }
+  }
+
+  literal_histograms_size =
+      mb->literal_split.num_types * literal_context_multiplier;
+  literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_histograms)) return;
+  ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
+
+  distance_histograms_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_histograms)) return;
+  ClearHistogramsDistance(distance_histograms, distance_histograms_size);
+
+  BROTLI_DCHECK(mb->command_histograms == 0);
+  mb->command_histograms_size = mb->command_split.num_types;
+  mb->command_histograms =
+      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->command_histograms)) return;
+  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
+
+  BrotliBuildHistogramsWithContext(cmds, num_commands,
+      &mb->literal_split, &mb->command_split, &mb->distance_split,
+      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
+      literal_histograms, mb->command_histograms, distance_histograms);
+  BROTLI_FREE(m, literal_context_modes);
+
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
+
+  BROTLI_DCHECK(mb->literal_histograms == 0);
+  mb->literal_histograms_size = mb->literal_context_map_size;
+  mb->literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_histograms)) return;
+
+  BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
+      kMaxNumberOfHistograms, mb->literal_histograms,
+      &mb->literal_histograms_size, mb->literal_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, literal_histograms);
+
+  if (params->disable_literal_context_modeling) {
+    /* Distribute assignment to all contexts. */
+    for (i = mb->literal_split.num_types; i != 0;) {
+      size_t j = 0;
+      i--;
+      for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
+        mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+            mb->literal_context_map[i];
+      }
+    }
+  }
+
+  BROTLI_DCHECK(mb->distance_context_map == 0);
+  mb->distance_context_map_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  mb->distance_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_context_map)) return;
+
+  BROTLI_DCHECK(mb->distance_histograms == 0);
+  mb->distance_histograms_size = mb->distance_context_map_size;
+  mb->distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_histograms)) return;
+
+  BrotliClusterHistogramsDistance(m, distance_histograms,
+                                  mb->distance_context_map_size,
+                                  kMaxNumberOfHistograms,
+                                  mb->distance_histograms,
+                                  &mb->distance_histograms_size,
+                                  mb->distance_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, distance_histograms);
+}
+
+#define FN(X) X ## Literal
+#include "metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define BROTLI_MAX_STATIC_CONTEXTS 13
+
+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+typedef struct ContextBlockSplitter {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  size_t num_contexts_;
+  size_t max_block_types_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramLiteral* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} ContextBlockSplitter;
+
+static void InitContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
+    size_t num_contexts, size_t min_block_size, double split_threshold,
+    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
+    size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  size_t max_num_types;
+  BROTLI_DCHECK(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
+
+  self->alphabet_size_ = alphabet_size;
+  self->num_contexts_ = num_contexts;
+  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  split->num_blocks = max_num_blocks;
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types * num_contexts;
+  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
+  /* Clear only current histogram. */
+  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void ContextBlockSplitterFinishBlock(
+    ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  const size_t num_contexts = self->num_contexts_;
+  double* last_entropy = self->last_entropy_;
+  HistogramLiteral* histograms = self->histograms_;
+
+  if (self->block_size_ < self->min_block_size_) {
+    self->block_size_ = self->min_block_size_;
+  }
+  if (self->num_blocks_ == 0) {
+    size_t i;
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+
+    for (i = 0; i < num_contexts; ++i) {
+      last_entropy[i] =
+          BitsEntropy(histograms[i].data_, self->alphabet_size_);
+      last_entropy[num_contexts + i] = last_entropy[i];
+    }
+    ++self->num_blocks_;
+    ++split->num_types;
+    self->curr_histogram_ix_ += num_contexts;
+    if (self->curr_histogram_ix_ < *self->histograms_size_) {
+      ClearHistogramsLiteral(
+          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+    }
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    /* Try merging the set of histograms for the current block type with the
+       respective set of histograms for the last and second last block types.
+       Decide over the split based on the total reduction of entropy across
+       all contexts. */
+    double entropy[BROTLI_MAX_STATIC_CONTEXTS];
+    HistogramLiteral* combined_histo =
+        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
+    double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
+    double diff[2] = { 0.0 };
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(combined_histo)) return;
+    for (i = 0; i < num_contexts; ++i) {
+      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
+      size_t j;
+      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
+                               self->alphabet_size_);
+      for (j = 0; j < 2; ++j) {
+        size_t jx = j * num_contexts + i;
+        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
+        combined_histo[jx] = histograms[curr_histo_ix];
+        HistogramAddHistogramLiteral(&combined_histo[jx],
+            &histograms[last_histogram_ix]);
+        combined_entropy[jx] = BitsEntropy(
+            &combined_histo[jx].data_[0], self->alphabet_size_);
+        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
+      }
+    }
+
+    if (split->num_types < self->max_block_types_ &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = split->num_types * num_contexts;
+      for (i = 0; i < num_contexts; ++i) {
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = entropy[i];
+      }
+      ++self->num_blocks_;
+      ++split->num_types;
+      self->curr_histogram_ix_ += num_contexts;
+      if (self->curr_histogram_ix_ < *self->histograms_size_) {
+        ClearHistogramsLiteral(
+            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+      }
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] =
+            combined_histo[num_contexts + i];
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = combined_entropy[num_contexts + i];
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
+        last_entropy[i] = combined_entropy[i];
+        if (split->num_types == 1) {
+          last_entropy[num_contexts + i] = last_entropy[i];
+        }
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      self->block_size_ = 0;
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+    BROTLI_FREE(m, combined_histo);
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types * num_contexts;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+static void ContextBlockSplitterAddSymbol(
+    ContextBlockSplitter* self, MemoryManager* m,
+    size_t symbol, size_t context) {
+  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
+      symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+}
+
+static void MapStaticContexts(MemoryManager* m,
+                              size_t num_contexts,
+                              const uint32_t* static_context_map,
+                              MetaBlockSplit* mb) {
+  size_t i;
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
+
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    uint32_t offset = (uint32_t)(i * num_contexts);
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
+      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+          offset + static_context_map[j];
+    }
+  }
+}
+
+typedef struct GreedyMetablockArena {
+  union {
+    BlockSplitterLiteral plain;
+    ContextBlockSplitter ctx;
+  } lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
+} GreedyMetablockArena;
+
+static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
+    MemoryManager* m, GreedyMetablockArena* arena, const uint8_t* ringbuffer,
+    size_t pos, size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    ContextLut literal_context_lut, const size_t num_contexts,
+    const uint32_t* static_context_map, const Command* commands,
+    size_t n_commands, MetaBlockSplit* mb) {
+  size_t num_literals = 0;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  if (num_contexts == 1) {
+    InitBlockSplitterLiteral(m, &arena->lit_blocks.plain, 256, 512, 400.0,
+        num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  } else {
+    InitContextBlockSplitter(m, &arena->lit_blocks.ctx, 256, num_contexts, 512,
+        400.0, num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  }
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &arena->cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS,
+      1024, 500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &arena->dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    BlockSplitterAddSymbolCommand(&arena->cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      uint8_t literal = ringbuffer[pos & mask];
+      if (num_contexts == 1) {
+        BlockSplitterAddSymbolLiteral(&arena->lit_blocks.plain, literal);
+      } else {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        ContextBlockSplitterAddSymbol(&arena->lit_blocks.ctx, m, literal,
+                                      static_context_map[context]);
+        if (BROTLI_IS_OOM(m)) return;
+      }
+      prev_byte2 = prev_byte;
+      prev_byte = literal;
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        BlockSplitterAddSymbolDistance(
+            &arena->dist_blocks, cmd.dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+
+  if (num_contexts == 1) {
+    BlockSplitterFinishBlockLiteral(
+        &arena->lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
+  } else {
+    ContextBlockSplitterFinishBlock(
+        &arena->lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+  BlockSplitterFinishBlockCommand(
+      &arena->cmd_blocks, /* is_final = */ BROTLI_TRUE);
+  BlockSplitterFinishBlockDistance(
+      &arena->dist_blocks, /* is_final = */ BROTLI_TRUE);
+
+  if (num_contexts > 1) {
+    MapStaticContexts(m, num_contexts, static_context_map, mb);
+  }
+}
+
+void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                const uint8_t* ringbuffer,
+                                size_t pos,
+                                size_t mask,
+                                uint8_t prev_byte,
+                                uint8_t prev_byte2,
+                                ContextLut literal_context_lut,
+                                size_t num_contexts,
+                                const uint32_t* static_context_map,
+                                const Command* commands,
+                                size_t n_commands,
+                                MetaBlockSplit* mb) {
+  GreedyMetablockArena* arena = BROTLI_ALLOC(m, GreedyMetablockArena, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
+  if (num_contexts == 1) {
+    BrotliBuildMetaBlockGreedyInternal(m, arena, ringbuffer, pos, mask,
+        prev_byte, prev_byte2, literal_context_lut, 1, NULL, commands,
+        n_commands, mb);
+  } else {
+    BrotliBuildMetaBlockGreedyInternal(m, arena, ringbuffer, pos, mask,
+        prev_byte, prev_byte2, literal_context_lut, num_contexts,
+        static_context_map, commands, n_commands, mb);
+  }
+  BROTLI_FREE(m, arena);
+}
+
+void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                              MetaBlockSplit* mb) {
+  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t i;
+  for (i = 0; i < mb->literal_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->command_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
+                                      mb->command_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->distance_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
+                                      mb->distance_histograms[i].data_,
+                                      good_for_rle);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.h
new file mode 100644
index 0000000000..db38f8fd21
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock.h
@@ -0,0 +1,106 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#ifndef BROTLI_ENC_METABLOCK_H_
+#define BROTLI_ENC_METABLOCK_H_
+
+#include <brotli/types.h>
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "block_splitter.h"
+#include "command.h"
+#include "histogram.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct MetaBlockSplit {
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  uint32_t* literal_context_map;
+  size_t literal_context_map_size;
+  uint32_t* distance_context_map;
+  size_t distance_context_map_size;
+  HistogramLiteral* literal_histograms;
+  size_t literal_histograms_size;
+  HistogramCommand* command_histograms;
+  size_t command_histograms_size;
+  HistogramDistance* distance_histograms;
+  size_t distance_histograms_size;
+} MetaBlockSplit;
+
+static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
+  BrotliInitBlockSplit(&mb->literal_split);
+  BrotliInitBlockSplit(&mb->command_split);
+  BrotliInitBlockSplit(&mb->distance_split);
+  mb->literal_context_map = 0;
+  mb->literal_context_map_size = 0;
+  mb->distance_context_map = 0;
+  mb->distance_context_map_size = 0;
+  mb->literal_histograms = 0;
+  mb->literal_histograms_size = 0;
+  mb->command_histograms = 0;
+  mb->command_histograms_size = 0;
+  mb->distance_histograms = 0;
+  mb->distance_histograms_size = 0;
+}
+
+static BROTLI_INLINE void DestroyMetaBlockSplit(
+    MemoryManager* m, MetaBlockSplit* mb) {
+  BrotliDestroyBlockSplit(m, &mb->literal_split);
+  BrotliDestroyBlockSplit(m, &mb->command_split);
+  BrotliDestroyBlockSplit(m, &mb->distance_split);
+  BROTLI_FREE(m, mb->literal_context_map);
+  BROTLI_FREE(m, mb->distance_context_map);
+  BROTLI_FREE(m, mb->literal_histograms);
+  BROTLI_FREE(m, mb->command_histograms);
+  BROTLI_FREE(m, mb->distance_histograms);
+}
+
+/* Uses the slow shortest-path block splitter and does context clustering.
+   The distance parameters are dynamically selected based on the commands
+   which get recomputed under the new distance parameters. The new distance
+   parameters are stored into *params. */
+BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
+                                          const uint8_t* ringbuffer,
+                                          const size_t pos,
+                                          const size_t mask,
+                                          BrotliEncoderParams* params,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          Command* cmds,
+                                          size_t num_commands,
+                                          ContextType literal_context_mode,
+                                          MetaBlockSplit* mb);
+
+/* Uses a fast greedy block splitter that tries to merge current block with the
+   last or the second last block and uses a static context clustering which
+   is the same for all block types. */
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
+    size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                                              MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliDistanceParams* params,
+    uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_METABLOCK_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock_inc.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock_inc.h
new file mode 100644
index 0000000000..f9393869ab
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/metablock_inc.h
@@ -0,0 +1,185 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Greedy block splitter for one block category (literal, command or distance).
+*/
+typedef struct FN(BlockSplitter) {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramType* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* Temporary storage for BlockSplitterFinishBlock. */
+  HistogramType combined_histo[2];
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} FN(BlockSplitter);
+
+static void FN(InitBlockSplitter)(
+    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
+    size_t min_block_size, double split_threshold, size_t num_symbols,
+    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  size_t max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
+  self->alphabet_size_ = alphabet_size;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  self->split_->num_blocks = max_num_blocks;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types;
+  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
+  /* Clear only current histogram. */
+  FN(HistogramClear)(&self->histograms_[0]);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void FN(BlockSplitterFinishBlock)(
+    FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  double* last_entropy = self->last_entropy_;
+  HistogramType* histograms = self->histograms_;
+  self->block_size_ =
+      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
+  if (self->num_blocks_ == 0) {
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+    last_entropy[0] =
+        BitsEntropy(histograms[0].data_, self->alphabet_size_);
+    last_entropy[1] = last_entropy[0];
+    ++self->num_blocks_;
+    ++split->num_types;
+    ++self->curr_histogram_ix_;
+    if (self->curr_histogram_ix_ < *self->histograms_size_)
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
+                                 self->alphabet_size_);
+    double combined_entropy[2];
+    double diff[2];
+    size_t j;
+    for (j = 0; j < 2; ++j) {
+      size_t last_histogram_ix = self->last_histogram_ix_[j];
+      self->combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&self->combined_histo[j],
+          &histograms[last_histogram_ix]);
+      combined_entropy[j] = BitsEntropy(
+          &self->combined_histo[j].data_[0], self->alphabet_size_);
+      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
+    }
+
+    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = entropy;
+      ++self->num_blocks_;
+      ++split->num_types;
+      ++self->curr_histogram_ix_;
+      if (self->curr_histogram_ix_ < *self->histograms_size_)
+        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      histograms[self->last_histogram_ix_[0]] = self->combined_histo[1];
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = combined_entropy[1];
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      histograms[self->last_histogram_ix_[0]] = self->combined_histo[0];
+      last_entropy[0] = combined_entropy[0];
+      if (split->num_types == 1) {
+        last_entropy[1] = last_entropy[0];
+      }
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
+  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
+  }
+}
+
+#undef HistogramType
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/params.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/params.h
new file mode 100644
index 0000000000..baeb31967d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/params.h
@@ -0,0 +1,48 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Parameters for the Brotli encoder with chosen quality levels. */
+
+#ifndef BROTLI_ENC_PARAMS_H_
+#define BROTLI_ENC_PARAMS_H_
+
+#include <brotli/encode.h>
+
+#include "encoder_dict.h"
+
+typedef struct BrotliHasherParams {
+  int type;
+  int bucket_bits;
+  int block_bits;
+  int hash_len;
+  int num_last_distances_to_check;
+} BrotliHasherParams;
+
+typedef struct BrotliDistanceParams {
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  uint32_t alphabet_size_max;
+  uint32_t alphabet_size_limit;
+  size_t max_distance;
+} BrotliDistanceParams;
+
+/* Encoding parameters */
+typedef struct BrotliEncoderParams {
+  BrotliEncoderMode mode;
+  int quality;
+  int lgwin;
+  int lgblock;
+  size_t stream_offset;
+  size_t size_hint;
+  BROTLI_BOOL disable_literal_context_modeling;
+  BROTLI_BOOL large_window;
+  BrotliHasherParams hasher;
+  BrotliDistanceParams dist;
+  /* TODO(eustas): rename to BrotliShared... */
+  SharedEncoderDictionary dictionary;
+} BrotliEncoderParams;
+
+#endif  /* BROTLI_ENC_PARAMS_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/prefix.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/prefix.h
new file mode 100644
index 0000000000..0f006f1614
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/prefix.h
@@ -0,0 +1,54 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for encoding of integers into prefix codes the amount of extra
+   bits, and the actual values of the extra bits. */
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include <brotli/types.h>
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include "fast_log.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Here distance_code is an intermediate code, i.e. one of the special codes or
+   the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
+static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
+                                                   size_t num_direct_codes,
+                                                   size_t postfix_bits,
+                                                   uint16_t* code,
+                                                   uint32_t* extra_bits) {
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
+    *code = (uint16_t)distance_code;
+    *extra_bits = 0;
+    return;
+  } else {
+    size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
+        (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
+    size_t bucket = Log2FloorNonZero(dist) - 1;
+    size_t postfix_mask = (1u << postfix_bits) - 1;
+    size_t postfix = dist & postfix_mask;
+    size_t prefix = (dist >> bucket) & 1;
+    size_t offset = (2 + prefix) << bucket;
+    size_t nbits = bucket - postfix_bits;
+    *code = (uint16_t)((nbits << 10) |
+        (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
+         ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+    *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_PREFIX_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/quality.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/quality.h
new file mode 100644
index 0000000000..99891b479c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/quality.h
@@ -0,0 +1,166 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Constants and formulas that affect speed-ratio trade-offs and thus define
+   quality levels. */
+
+#ifndef BROTLI_ENC_QUALITY_H_
+#define BROTLI_ENC_QUALITY_H_
+
+#include <brotli/encode.h>
+
+#include "../common/platform.h"
+#include "params.h"
+
+#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
+#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
+#define ZOPFLIFICATION_QUALITY 10
+#define HQ_ZOPFLIFICATION_QUALITY 11
+
+#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
+#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
+#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
+#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
+#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
+#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
+#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
+#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
+
+/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
+   so we buffer at most this much literals and commands. */
+#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
+
+/* Returns hash-table size for quality levels 0 and 1. */
+static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
+  return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
+}
+
+/* The maximum length for which the zopflification uses distinct distances. */
+#define MAX_ZOPFLI_LEN_QUALITY_10 150
+#define MAX_ZOPFLI_LEN_QUALITY_11 325
+
+/* Do not thoroughly search when a long copy is found. */
+#define BROTLI_LONG_COPY_QUICK_STEP 16384
+
+static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
+  return params->quality <= 10 ?
+      MAX_ZOPFLI_LEN_QUALITY_10 :
+      MAX_ZOPFLI_LEN_QUALITY_11;
+}
+
+/* Number of best candidates to evaluate to expand Zopfli chain. */
+static BROTLI_INLINE size_t MaxZopfliCandidates(
+  const BrotliEncoderParams* params) {
+  return params->quality <= 10 ? 1 : 5;
+}
+
+static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
+  params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
+      BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    params->large_window = BROTLI_FALSE;
+  }
+  if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+    params->lgwin = BROTLI_MIN_WINDOW_BITS;
+  } else {
+    int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
+                                           BROTLI_MAX_WINDOW_BITS;
+    if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
+  }
+}
+
+/* Returns optimized lg_block value. */
+static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
+  int lgblock = params->lgblock;
+  if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    lgblock = params->lgwin;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    lgblock = 14;
+  } else if (lgblock == 0) {
+    lgblock = 16;
+    if (params->quality >= 9 && params->lgwin > lgblock) {
+      lgblock = BROTLI_MIN(int, 18, params->lgwin);
+    }
+  } else {
+    lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
+        BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
+  }
+  return lgblock;
+}
+
+/* Returns log2 of the size of main ring buffer area.
+   Allocate at least lgwin + 1 bits for the ring buffer so that the newly
+   added block fits there completely and we still get lgwin bits and at least
+   read_block_size_bits + 1 bits because the copy tail length needs to be
+   smaller than ring-buffer size. */
+static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
+  return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
+}
+
+static BROTLI_INLINE size_t MaxMetablockSize(
+    const BrotliEncoderParams* params) {
+  int bits =
+      BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
+  return (size_t)1 << bits;
+}
+
+/* When searching for backward references and have not seen matches for a long
+   time, we can skip some match lookups. Unsuccessful match lookups are very
+   expensive and this kind of a heuristic speeds up compression quite a lot.
+   At first 8 byte strides are taken and every second byte is put to hasher.
+   After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
+   Applied only to qualities 2 to 9. */
+static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
+    const BrotliEncoderParams* params) {
+  return params->quality < 9 ? 64 : 512;
+}
+
+static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
+                                       BrotliHasherParams* hparams) {
+  if (params->quality > 9) {
+    hparams->type = 10;
+  } else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
+    hparams->type = 54;
+  } else if (params->quality < 5) {
+    hparams->type = params->quality;
+  } else if (params->lgwin <= 16) {
+    hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
+  } else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
+    hparams->type = 6;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = 15;
+    hparams->hash_len = 5;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  } else {
+    hparams->type = 5;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = params->quality < 7 ? 14 : 15;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  }
+
+  if (params->lgwin > 24) {
+    /* Different hashers for large window brotli: not for qualities <= 2,
+       these are too fast for large window. Not for qualities >= 10: their
+       hasher already works well with large window. So the changes are:
+       H3 --> H35: for quality 3.
+       H54 --> H55: for quality 4 with size hint > 1MB
+       H6 --> H65: for qualities 5, 6, 7, 8, 9. */
+    if (hparams->type == 3) {
+      hparams->type = 35;
+    }
+    if (hparams->type == 54) {
+      hparams->type = 55;
+    }
+    if (hparams->type == 6) {
+      hparams->type = 65;
+    }
+  }
+}
+
+#endif  /* BROTLI_ENC_QUALITY_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/ringbuffer.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/ringbuffer.h
new file mode 100644
index 0000000000..27245b7f39
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/ringbuffer.h
@@ -0,0 +1,168 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Sliding window over the input data. */
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+#include <string.h>  /* memcpy */
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "memory.h"
+#include "quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+   data in a circular manner: writing a byte writes it to:
+     `position() % (1 << window_bits)'.
+   For convenience, the RingBuffer array contains another copy of the
+   first `1 << tail_bits' bytes:
+     buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+   and another copy of the last two bytes:
+     buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+     buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
+typedef struct RingBuffer {
+  /* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;
+  const uint32_t total_size_;
+
+  uint32_t cur_size_;
+  /* Position to write in the ring buffer. */
+  uint32_t pos_;
+  /* The actual ring buffer containing the copy of the last two bytes, the data,
+     and the copy of the beginning as a tail. */
+  uint8_t* data_;
+  /* The start of the ring-buffer. */
+  uint8_t* buffer_;
+} RingBuffer;
+
+static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
+  rb->cur_size_ = 0;
+  rb->pos_ = 0;
+  rb->data_ = 0;
+  rb->buffer_ = 0;
+}
+
+static BROTLI_INLINE void RingBufferSetup(
+    const BrotliEncoderParams* params, RingBuffer* rb) {
+  int window_bits = ComputeRbBits(params);
+  int tail_bits = params->lgblock;
+  *(uint32_t*)&rb->size_ = 1u << window_bits;
+  *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
+  *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
+  *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
+}
+
+static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
+  BROTLI_FREE(m, rb->data_);
+}
+
+/* Allocates or re-allocates data_ to the given length + plus some slack
+   region before and after. Fills the slack regions with zeros. */
+static BROTLI_INLINE void RingBufferInitBuffer(
+    MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
+  static const size_t kSlackForEightByteHashingEverywhere = 7;
+  uint8_t* new_data = BROTLI_ALLOC(
+      m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
+  size_t i;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
+  if (rb->data_) {
+    memcpy(new_data, rb->data_,
+        2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
+    BROTLI_FREE(m, rb->data_);
+  }
+  rb->data_ = new_data;
+  rb->cur_size_ = buflen;
+  rb->buffer_ = rb->data_ + 2;
+  rb->buffer_[-2] = rb->buffer_[-1] = 0;
+  for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    rb->buffer_[rb->cur_size_ + i] = 0;
+  }
+}
+
+static BROTLI_INLINE void RingBufferWriteTail(
+    const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  const size_t masked_pos = rb->pos_ & rb->mask_;
+  if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
+    /* Just fill the tail buffer with the beginning data. */
+    const size_t p = rb->size_ + masked_pos;
+    memcpy(&rb->buffer_[p], bytes,
+        BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
+  }
+}
+
+/* Push bytes into the ring buffer. */
+static BROTLI_INLINE void RingBufferWrite(
+    MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  if (rb->pos_ == 0 && n < rb->tail_size_) {
+    /* Special case for the first write: to process the first block, we don't
+       need to allocate the whole ring-buffer and we don't need the tail
+       either. However, we do this memory usage optimization only if the
+       first write is less than the tail size, which is also the input block
+       size, otherwise it is likely that other blocks will follow and we
+       will need to reallocate to the full size anyway. */
+    rb->pos_ = (uint32_t)n;
+    RingBufferInitBuffer(m, rb->pos_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    memcpy(rb->buffer_, bytes, n);
+    return;
+  }
+  if (rb->cur_size_ < rb->total_size_) {
+    /* Lazily allocate the full buffer. */
+    RingBufferInitBuffer(m, rb->total_size_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    /* Initialize the last two bytes to zero, so that we don't have to worry
+       later when we copy the last two bytes to the first two positions. */
+    rb->buffer_[rb->size_ - 2] = 0;
+    rb->buffer_[rb->size_ - 1] = 0;
+    /* Initialize tail; might be touched by "best_len++" optimization when
+       ring buffer is "full". */
+    rb->buffer_[rb->size_] = 241;
+  }
+  {
+    const size_t masked_pos = rb->pos_ & rb->mask_;
+    /* The length of the writes is limited so that we do not need to worry
+       about a write */
+    RingBufferWriteTail(bytes, n, rb);
+    if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
+      /* A single write fits. */
+      memcpy(&rb->buffer_[masked_pos], bytes, n);
+    } else {
+      /* Split into two writes.
+         Copy into the end of the buffer, including the tail buffer. */
+      memcpy(&rb->buffer_[masked_pos], bytes,
+             BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
+      /* Copy into the beginning of the buffer */
+      memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
+             n - (rb->size_ - masked_pos));
+    }
+  }
+  {
+    BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
+    uint32_t rb_pos_mask = (1u << 31) - 1;
+    rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+    rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+    rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
+    if (not_first_lap) {
+      /* Wrap, but preserve not-a-first-lap feature. */
+      rb->pos_ |= 1u << 31;
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_RINGBUFFER_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/state.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/state.h
new file mode 100644
index 0000000000..cb82987701
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/state.h
@@ -0,0 +1,104 @@
+/* Copyright 2022 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Encoder state. */
+
+#ifndef BROTLI_ENC_STATE_H_
+#define BROTLI_ENC_STATE_H_
+
+#include <brotli/types.h>
+
+#include "command.h"
+#include "compress_fragment.h"
+#include "compress_fragment_two_pass.h"
+#include "hash.h"
+#include "memory.h"
+#include "params.h"
+#include "ringbuffer.h"
+
+typedef enum BrotliEncoderStreamState {
+  /* Default state. */
+  BROTLI_STREAM_PROCESSING = 0,
+  /* Intermediate state; after next block is emitted, byte-padding should be
+     performed before getting back to default state. */
+  BROTLI_STREAM_FLUSH_REQUESTED = 1,
+  /* Last metablock was produced; no more input is acceptable. */
+  BROTLI_STREAM_FINISHED = 2,
+  /* Flushing compressed block and writing meta-data block header. */
+  BROTLI_STREAM_METADATA_HEAD = 3,
+  /* Writing metadata block body. */
+  BROTLI_STREAM_METADATA_BODY = 4
+} BrotliEncoderStreamState;
+
+typedef enum BrotliEncoderFlintState {
+  BROTLI_FLINT_NEEDS_2_BYTES = 2,
+  BROTLI_FLINT_NEEDS_1_BYTE = 1,
+  BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
+  BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
+  BROTLI_FLINT_DONE = -2
+} BrotliEncoderFlintState;
+
+typedef struct BrotliEncoderStateStruct {
+  BrotliEncoderParams params;
+
+  MemoryManager memory_manager_;
+
+  uint64_t input_pos_;
+  RingBuffer ringbuffer_;
+  size_t cmd_alloc_size_;
+  Command* commands_;
+  size_t num_commands_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
+  int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
+  int saved_dist_cache_[4];
+  uint16_t last_bytes_;
+  uint8_t last_bytes_bits_;
+  /* "Flint" is a tiny uncompressed block emitted before the continuation
+     block to unwire literal context from previous data. Despite being int8_t,
+     field is actually BrotliEncoderFlintState enum. */
+  int8_t flint_;
+  uint8_t prev_byte_;
+  uint8_t prev_byte2_;
+  size_t storage_size_;
+  uint8_t* storage_;
+
+  Hasher hasher_;
+
+  /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
+  int small_table_[1 << 10];  /* 4KiB */
+  int* large_table_;          /* Allocated only when needed */
+  size_t large_table_size_;
+
+  BrotliOnePassArena* one_pass_arena_;
+  BrotliTwoPassArena* two_pass_arena_;
+
+  /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
+  uint32_t* command_buf_;
+  uint8_t* literal_buf_;
+
+  uint64_t total_in_;
+  uint8_t* next_out_;
+  size_t available_out_;
+  uint64_t total_out_;
+  /* Temporary buffer for padding flush bits or metadata block header / body. */
+  union {
+    uint64_t u64[2];
+    uint8_t u8[16];
+  } tiny_buf_;
+  uint32_t remaining_metadata_bytes_;
+  BrotliEncoderStreamState stream_state_;
+
+  BROTLI_BOOL is_last_block_emitted_;
+  BROTLI_BOOL is_initialized_;
+} BrotliEncoderStateStruct;
+
+typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal;
+#define BrotliEncoderState BrotliEncoderStateInternal
+
+#endif  // BROTLI_ENC_STATE_H_
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.c
new file mode 100644
index 0000000000..9e6f270430
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.c
@@ -0,0 +1,540 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "static_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "encoder_dict.h"
+#include "find_match_length.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - kDictNumBits);
+}
+
+static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
+                                   uint32_t* matches) {
+  uint32_t match = (uint32_t)((distance << 5) + len_code);
+  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
+}
+
+static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
+                                            const uint8_t* data,
+                                            size_t id,
+                                            size_t len,
+                                            size_t maxlen) {
+  const size_t offset = dictionary->offsets_by_length[len] + len * id;
+  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
+                                  BROTLI_MIN(size_t, len, maxlen));
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
+    DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t offset = dictionary->offsets_by_length[w.len] +
+        (size_t)w.len * (size_t)w.idx;
+    const uint8_t* dict = &dictionary->data[offset];
+    if (w.transform == 0) {
+      /* Match against base dictionary word. */
+      return
+          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
+    } else if (w.transform == 10) {
+      /* Match against uppercase first transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
+              (dict[0] ^ 32) == data[0] &&
+              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+              w.len - 1u);
+    } else {
+      /* Match against uppercase all transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      size_t i;
+      for (i = 0; i < w.len; ++i) {
+        if (dict[i] >= 'a' && dict[i] <= 'z') {
+          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
+        } else {
+          if (dict[i] != data[i]) return BROTLI_FALSE;
+        }
+      }
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+/* Finds matches for a single static dictionary */
+static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
+    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+    size_t min_length, size_t max_length, uint32_t* matches) {
+  BROTLI_BOOL has_found_match = BROTLI_FALSE;
+  if (dictionary->has_words_heavy) {
+    const BrotliTrieNode* node = &dictionary->trie.root;
+    size_t l = 0;
+    while (node && l < max_length) {
+      uint8_t c;
+      if (l >= min_length && node->len_) {
+        AddMatch(node->idx_, l, node->len_, matches);
+        has_found_match = BROTLI_TRUE;
+      }
+      c = data[l++];
+      node = BrotliTrieSub(&dictionary->trie, node, c);
+    }
+    return has_found_match;
+  }
+  {
+    size_t offset = dictionary->buckets[Hash(data)];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const size_t matchlen =
+            DictMatchLength(dictionary->words, data, id, l, max_length);
+        const uint8_t* s;
+        size_t minlen;
+        size_t maxlen;
+        size_t len;
+        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
+        if (matchlen == l) {
+          AddMatch(id, l, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
+        if (matchlen >= l - 1) {
+          AddMatch(id + 12 * n, l - 1, l, matches);
+          if (l + 2 < max_length &&
+              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
+              data[l + 2] == ' ') {
+            AddMatch(id + 49 * n, l + 3, l, matches);
+          }
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
+        minlen = min_length;
+        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
+        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
+        for (len = minlen; len <= maxlen; ++len) {
+          size_t cut = l - len;
+          size_t transform_id = (cut << 2) +
+              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+          AddMatch(id + transform_id * n, len, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        if (matchlen < l || l + 6 >= max_length) {
+          continue;
+        }
+        s = &data[l];
+        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
+        if (s[0] == ' ') {
+          AddMatch(id + n, l + 1, l, matches);
+          if (s[1] == 'a') {
+            if (s[2] == ' ') {
+              AddMatch(id + 28 * n, l + 3, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
+            } else if (s[2] == 't') {
+              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == 'd' && s[4] == ' ') {
+                AddMatch(id + 10 * n, l + 5, l, matches);
+              }
+            }
+          } else if (s[1] == 'b') {
+            if (s[2] == 'y' && s[3] == ' ') {
+              AddMatch(id + 38 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'i') {
+            if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'f') {
+            if (s[2] == 'o') {
+              if (s[3] == 'r' && s[4] == ' ') {
+                AddMatch(id + 25 * n, l + 5, l, matches);
+              }
+            } else if (s[2] == 'r') {
+              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
+                AddMatch(id + 37 * n, l + 6, l, matches);
+              }
+            }
+          } else if (s[1] == 'o') {
+            if (s[2] == 'f') {
+              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'n') {
+            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
+              AddMatch(id + 80 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 't') {
+            if (s[2] == 'h') {
+              if (s[3] == 'e') {
+                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
+              } else if (s[3] == 'a') {
+                if (s[4] == 't' && s[5] == ' ') {
+                  AddMatch(id + 29 * n, l + 6, l, matches);
+                }
+              }
+            } else if (s[2] == 'o') {
+              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'w') {
+            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
+              AddMatch(id + 35 * n, l + 6, l, matches);
+            }
+          }
+        } else if (s[0] == '"') {
+          AddMatch(id + 19 * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + 21 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + 20 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 31 * n, l + 2, l, matches);
+            if (s[2] == 'T' && s[3] == 'h') {
+              if (s[4] == 'e') {
+                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
+              } else if (s[4] == 'i') {
+                if (s[5] == 's' && s[6] == ' ') {
+                  AddMatch(id + 75 * n, l + 7, l, matches);
+                }
+              }
+            }
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + 76 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 14 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\n') {
+          AddMatch(id + 22 * n, l + 1, l, matches);
+          if (s[1] == '\t') {
+            AddMatch(id + 50 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ']') {
+          AddMatch(id + 24 * n, l + 1, l, matches);
+        } else if (s[0] == '\'') {
+          AddMatch(id + 36 * n, l + 1, l, matches);
+        } else if (s[0] == ':') {
+          AddMatch(id + 51 * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + 57 * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + 70 * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + 86 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == 'a') {
+          if (s[1] == 'l' && s[2] == ' ') {
+            AddMatch(id + 84 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'e') {
+          if (s[1] == 'd') {
+            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
+          } else if (s[1] == 'r') {
+            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
+          } else if (s[1] == 's') {
+            if (s[2] == 't' && s[3] == ' ') {
+              AddMatch(id + 95 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'f') {
+          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
+            AddMatch(id + 90 * n, l + 4, l, matches);
+          }
+        } else if (s[0] == 'i') {
+          if (s[1] == 'v') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 92 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'z') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 100 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'l') {
+          if (s[1] == 'e') {
+            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
+              AddMatch(id + 93 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 'y') {
+            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'o') {
+          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
+            AddMatch(id + 106 * n, l + 4, l, matches);
+          }
+        }
+      } else {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, data, max_length)) {
+          continue;
+        }
+        /* Transform "" + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 1 >= max_length) {
+          continue;
+        }
+        /* Transforms "" + kUppercase{First,All} + <suffix> */
+        s = &data[l];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
+        } else if (s[0] == '"') {
+          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\'') {
+          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
+          }
+        }
+      }
+    }
+  }
+  /* Transforms with prefixes " " and "." */
+  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
+    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
+    size_t offset = dictionary->buckets[Hash(&data[1])];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
+        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
+        */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
+        } else if (is_space) {
+          if (s[0] == ',') {
+            AddMatch(id + 103 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 33 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '.') {
+            AddMatch(id + 71 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 52 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '=') {
+            if (s[1] == '"') {
+              AddMatch(id + 81 * n, l + 3, l, matches);
+            } else if (s[1] == '\'') {
+              AddMatch(id + 98 * n, l + 3, l, matches);
+            }
+          }
+        }
+      } else if (is_space) {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + <suffix> */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
+        } else if (s[0] == ',') {
+          if (!is_all_caps) {
+            AddMatch(id + 109 * n, l + 2, l, matches);
+          }
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 6) {
+    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
+    if ((data[1] == ' ' &&
+         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
+        (data[0] == 0xC2 && data[1] == 0xA0)) {
+      size_t offset = dictionary->buckets[Hash(&data[2])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
+          if (data[0] == 0xC2) {
+            AddMatch(id + 102 * n, l + 2, l, matches);
+            has_found_match = BROTLI_TRUE;
+          } else if (l + 2 < max_length && data[l + 2] == ' ') {
+            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
+            AddMatch(id + t * n, l + 3, l, matches);
+            has_found_match = BROTLI_TRUE;
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 9) {
+    /* Transforms with prefixes " the " and ".com/" */
+    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
+         data[3] == 'e' && data[4] == ' ') ||
+        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
+         data[3] == 'm' && data[4] == '/')) {
+      size_t offset = dictionary->buckets[Hash(&data[5])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
+          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
+          has_found_match = BROTLI_TRUE;
+          if (l + 5 < max_length) {
+            const uint8_t* s = &data[l + 5];
+            if (data[0] == ' ') {
+              if (l + 8 < max_length &&
+                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
+                AddMatch(id + 62 * n, l + 9, l, matches);
+                if (l + 12 < max_length &&
+                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
+                  AddMatch(id + 73 * n, l + 13, l, matches);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return has_found_match;
+}
+
+/* Finds matches for one or more dictionaries, if multiple are present
+   in the contextual dictionary */
+BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+    size_t min_length, size_t max_length, uint32_t* matches) {
+  BROTLI_BOOL has_found_match =
+      BrotliFindAllStaticDictionaryMatchesFor(
+          dictionary, data, min_length, max_length, matches);
+
+  if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
+    uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+    int l;
+    const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
+    if (dictionary2 == dictionary) {
+      dictionary2 = dictionary->parent->dict[1];
+    }
+
+    for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
+      matches2[l] = kInvalidMatch;
+    }
+
+    has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
+        dictionary2, data, min_length, max_length, matches2);
+
+    for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
+      if (matches2[l] != kInvalidMatch) {
+        uint32_t dist = (uint32_t)(matches2[l] >> 5);
+        uint32_t len_code = matches2[l] & 31;
+        uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
+            size_bits_by_length[len_code]) & ~1u) *
+            (uint32_t)dictionary->num_transforms;
+        /* TODO(lode): check for dist overflow */
+        dist += skipdist;
+        AddMatch(dist, (size_t)l, len_code, matches);
+      }
+    }
+  }
+  return has_found_match;
+}
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.h
new file mode 100644
index 0000000000..ab832207d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict.h
@@ -0,0 +1,41 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Class to model the static dictionary. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
+#include <brotli/types.h>
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "encoder_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
+static const uint32_t kInvalidMatch = 0xFFFFFFF;
+
+/* Matches data against static dictionary words, and for each length l,
+   for which a match is found, updates matches[l] to be the minimum possible
+     (distance << 5) + len_code.
+   Returns 1 if matches have been found, otherwise 0.
+   Prerequisites:
+     matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
+     all elements are initialized to kInvalidMatch */
+BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* data, size_t min_length, size_t max_length,
+    uint32_t* matches);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict_lut.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict_lut.h
new file mode 100644
index 0000000000..a465ffde74
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/static_dict_lut.h
@@ -0,0 +1,5866 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table for static dictionary and transforms. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
+#define BROTLI_ENC_STATIC_DICT_LUT_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct DictWord {
+  /* Highest bit is used to indicate end of bucket. */
+  uint8_t len;
+  uint8_t transform;
+  uint16_t idx;
+} DictWord;
+
+/* GENERATED CODE START */
+static const int kDictNumBits = 15;
+static const uint32_t kDictHashMul32 = 0x1E35A7BD;
+
+static const uint16_t kStaticDictionaryBuckets[32768] = {
+1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
+0,53,0,0,0,0,0,0,55,0,0,0,0,0,0,61,76,0,0,0,94,0,0,0,0,0,0,96,0,97,0,98,0,0,0,0,
+0,0,0,99,101,106,108,0,0,0,0,0,110,0,111,112,0,113,118,124,0,0,0,0,0,125,128,0,0
+,0,0,129,0,0,131,0,0,0,0,0,0,132,0,0,135,0,0,0,137,0,0,0,0,0,138,139,0,0,0,0,0,0
+,0,142,143,144,0,0,0,0,0,145,0,0,0,146,149,151,152,0,0,153,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,154,0,0,0,0,0,0,155,0,0,0,0,160,182,0,0,0,0,0,0,183,0,0,0,188,189,0,0,
+192,0,0,0,0,0,0,194,0,0,0,0,0,0,0,0,197,202,209,0,0,210,0,224,0,0,0,225,0,0,0,0,
+0,0,0,0,0,0,231,0,0,0,232,0,240,0,0,242,0,0,0,0,0,0,0,0,0,0,0,244,0,0,0,246,0,0,
+249,251,253,0,0,0,0,0,258,0,0,261,263,0,0,0,267,0,0,268,0,269,0,0,0,0,0,0,0,0,0,
+271,0,0,0,0,0,0,272,0,273,0,277,0,278,286,0,0,0,0,287,0,289,290,291,0,0,0,295,0,
+0,296,297,0,0,0,0,0,0,0,0,0,0,298,0,0,0,299,0,0,305,0,324,0,0,0,0,0,327,0,328,
+329,0,0,0,0,336,0,0,340,0,341,342,343,0,0,346,0,348,0,0,0,0,0,0,349,351,0,0,355,
+0,363,0,364,0,368,369,0,370,0,0,0,0,0,0,0,372,0,0,0,0,0,0,0,0,0,0,0,373,0,375,0,
+0,0,0,376,377,0,0,394,395,396,0,0,398,0,0,0,0,400,0,0,408,0,0,0,0,420,0,0,0,0,0,
+0,421,0,0,422,423,0,0,429,435,436,442,0,0,443,0,444,445,453,456,0,457,0,0,0,0,0,
+458,0,0,0,459,0,0,0,460,0,462,463,465,0,0,0,0,0,0,466,469,0,0,0,0,0,0,470,0,0,0,
+474,0,476,0,0,0,0,483,0,485,0,0,0,486,0,0,488,491,492,0,0,497,499,500,0,501,0,0,
+0,505,0,0,506,0,0,0,507,0,0,0,509,0,0,0,0,511,512,519,0,0,0,0,0,0,529,530,0,0,0,
+534,0,0,0,0,543,0,0,0,0,0,0,0,0,0,553,0,0,0,0,557,560,0,0,0,0,0,0,561,0,564,0,0,
+0,0,0,0,565,566,0,575,0,619,0,620,0,0,623,624,0,0,0,625,0,0,626,627,0,0,628,0,0,
+0,0,630,0,631,0,0,0,0,0,0,0,0,0,641,0,0,0,0,643,656,668,0,0,0,673,0,0,0,674,0,0,
+0,0,0,0,0,0,682,0,687,0,690,0,693,699,700,0,0,0,0,0,0,704,705,0,0,0,0,707,710,0,
+711,0,0,0,0,726,0,0,729,0,0,0,730,731,0,0,0,0,0,752,0,0,0,762,0,763,0,0,767,0,0,
+0,770,774,0,0,775,0,0,0,0,0,0,0,0,0,0,776,0,0,0,777,783,0,0,0,785,788,0,0,0,0,
+790,0,0,0,793,0,0,0,0,794,0,0,804,819,821,0,827,0,0,0,834,0,0,835,0,0,0,841,0,
+844,0,850,851,859,0,860,0,0,0,0,0,0,0,874,0,876,0,877,890,0,0,0,0,0,0,0,0,893,
+894,898,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,899,0,0,0,900,904,906,0,0,0,907,0,908,909,
+0,910,0,0,0,0,911,0,0,0,0,0,916,0,0,0,922,925,0,930,0,934,0,0,0,0,0,943,0,0,944,
+0,953,954,0,0,0,0,0,0,955,0,962,963,0,0,976,0,0,977,978,979,980,0,981,0,0,0,0,
+984,0,0,985,0,0,987,989,991,0,0,0,0,0,0,0,0,0,992,0,0,0,993,0,0,0,0,0,0,996,0,0,
+0,1000,0,0,0,0,0,1002,0,0,0,0,1005,1007,0,0,0,1009,0,0,0,1010,0,0,0,0,0,0,1011,0
+,1012,0,0,0,0,1014,1016,0,0,0,1020,0,1021,0,0,0,0,1022,0,0,0,1024,0,0,0,0,0,0,
+1025,0,0,1026,1027,0,0,0,0,0,1031,0,1033,0,0,0,0,1034,0,0,0,1037,1040,0,0,0,1042
+,1043,0,0,1053,0,1054,0,0,1057,0,0,0,1058,0,0,1060,0,0,0,0,0,0,0,1061,0,0,1062,0
+,0,0,0,1063,0,0,0,0,1064,0,0,0,0,0,1065,0,0,0,0,1066,1067,0,0,0,1069,1070,1072,0
+,0,0,0,0,0,1073,0,1075,0,0,0,0,0,0,1080,1084,0,0,0,0,1088,0,0,0,0,0,0,1094,0,
+1095,0,1107,0,0,0,1112,1114,0,1119,0,1122,0,0,1126,0,1129,0,1130,0,0,0,0,0,1132,
+0,0,0,0,0,0,1144,0,0,1145,1146,0,1148,1149,0,0,1150,1151,0,0,0,0,1152,0,1153,0,0
+,0,0,0,1154,0,1163,0,0,0,1164,0,0,0,0,0,1165,0,1167,0,1170,0,0,0,0,0,1171,1172,0
+,0,0,0,0,0,0,0,1173,1175,1177,0,1186,0,0,0,0,0,0,0,0,0,0,1195,0,0,1221,0,0,1224,
+0,0,1227,0,0,0,0,0,1228,1229,0,0,1230,0,0,0,0,0,0,0,0,0,1231,0,0,0,1233,0,0,1243
+,1244,1246,1248,0,0,0,0,1254,1255,1258,1259,0,0,0,1260,0,0,1261,0,0,0,1262,1264,
+0,0,1265,0,0,0,0,0,0,0,0,0,0,0,0,1266,0,1267,0,0,0,0,1273,1274,1276,1289,0,0,
+1291,1292,1293,0,0,1294,1295,1296,0,0,0,0,1302,0,1304,0,0,0,0,0,0,0,0,0,1311,
+1312,0,1314,0,1316,1320,1321,0,0,0,0,0,0,0,1322,1323,1324,0,1335,0,1336,0,0,0,0,
+1341,1342,0,1346,0,1357,0,0,0,1358,1360,0,0,0,0,0,0,1361,0,0,0,1362,1365,0,1366,
+0,0,0,0,0,0,0,1379,0,0,0,0,0,0,0,0,0,0,0,0,1386,0,1388,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1395,0,0,0,0,1403,0,1405,0,0,1407,0,0,0,0,0,1408,1409,0,1410,0,0,0,1412,1413,
+1416,0,0,1429,1451,0,0,1454,0,0,0,0,0,0,0,1455,0,0,0,0,0,0,0,1456,0,0,0,0,1459,
+1460,1461,1475,0,0,0,0,0,0,1477,0,1480,0,1481,0,0,1486,0,0,1495,0,0,0,1496,0,0,
+1498,1499,1501,1520,1521,0,0,0,1526,0,0,0,0,1528,1529,0,1533,1536,0,0,0,1537,
+1538,1549,0,1550,1558,1559,1572,0,1573,0,0,0,0,0,0,0,0,0,1575,0,0,0,0,0,1579,0,
+1599,0,1603,0,1604,0,1605,0,0,0,0,0,1608,1610,0,0,0,0,1611,0,1615,0,1616,1618,0,
+1619,0,0,1622,0,0,0,0,1634,0,0,0,1635,0,0,0,1641,0,0,0,0,0,0,0,0,0,1643,0,0,0,
+1650,0,0,1652,0,0,0,0,0,1653,0,0,0,1654,0,0,0,0,1655,0,1662,0,0,1663,1664,0,0,
+1668,0,0,1669,1670,0,1672,1673,0,0,0,0,0,1674,0,0,0,1675,1676,1680,0,1682,0,0,
+1687,0,0,0,0,0,1704,0,0,1705,0,0,1721,0,0,0,0,1734,1735,0,0,0,0,1737,0,0,0,0,
+1739,0,0,1740,0,0,0,0,0,0,0,0,0,0,1741,1743,0,0,0,0,1745,0,0,0,1749,0,0,0,1751,0
+,0,0,0,0,0,1760,0,0,0,0,1765,0,0,0,0,0,1784,0,1785,1787,0,0,0,0,1788,1789,0,0,0,
+0,1790,1791,1793,0,1798,1799,0,0,0,0,1801,0,1803,1805,0,0,0,1806,1811,0,1812,
+1814,0,1821,0,0,0,0,0,1822,1833,0,0,0,0,0,0,1848,0,0,0,0,0,0,1857,0,0,0,1859,0,0
+,0,0,1861,0,0,0,0,0,0,0,1866,0,1921,1925,0,0,0,1929,1930,0,0,0,0,0,0,0,0,0,1931,
+0,0,0,0,1932,0,0,0,1934,0,0,0,0,0,0,0,0,1946,0,0,1948,0,0,0,0,1950,0,1957,0,1958
+,0,0,0,0,0,1965,1967,0,0,0,0,1968,0,1969,0,1971,1972,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1973,0,0,0,0,1975,0,0,0,0,1976,1979,0,1982,0,0,0,0,1984,1988,0,0,0,0,1990,
+2004,2008,0,0,0,2012,2013,0,0,0,0,0,0,0,0,0,0,2015,0,2016,2017,0,0,0,0,2021,0,0,
+2025,0,0,0,0,0,2029,2036,2040,0,2042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2043,0,0,0,0,0,
+2045,0,0,0,0,0,0,0,2046,2047,0,2048,2049,0,2059,0,0,2063,0,2064,2065,0,0,2066,0,
+0,0,0,0,0,2069,0,0,0,0,2070,0,2071,0,2072,0,0,0,0,2080,2082,2083,0,0,0,0,0,2085,
+0,2086,2088,2089,2105,0,0,0,0,2107,0,0,2116,2117,0,2120,0,0,2122,0,0,0,0,0,2123,
+0,0,2125,2127,2128,0,0,0,2130,0,0,0,2137,2139,2140,2141,0,0,0,0,0,0,0,0,0,2144,
+2145,0,0,2146,2149,0,0,0,0,2150,0,0,2151,2158,0,2159,0,2160,0,0,0,0,0,0,2161,
+2162,0,0,2194,2202,0,0,0,0,0,0,2205,2217,0,2220,0,2221,0,2222,2224,0,0,0,0,2237,
+0,0,0,0,0,2238,0,2239,2241,0,0,2242,0,0,0,0,0,2243,0,0,0,0,0,0,2252,0,0,2253,0,0
+,0,2257,2258,0,0,0,2260,0,0,0,0,0,0,0,2262,0,2264,0,0,0,0,0,2269,2270,0,0,0,0,0,
+0,0,0,0,2271,0,2273,0,0,0,0,2277,0,0,0,0,2278,0,0,0,0,2279,0,2280,0,2283,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2287,0,0,0,0,0,0,0,2289,2290,0,0,0,0,2291,0,2292,0,
+0,0,2293,2295,2296,0,0,0,0,0,0,0,2298,0,0,0,0,0,2303,0,2305,0,0,2306,0,2307,0,0,
+0,0,0,0,0,0,0,0,0,0,2313,2314,2315,2316,0,0,2318,0,2319,0,2322,0,0,2323,0,2324,0
+,2326,0,0,0,0,0,0,0,2335,0,2336,2338,2339,0,2340,0,0,0,2355,0,2375,0,2382,2386,0
+,2387,0,0,2394,0,0,0,0,2395,0,2397,0,0,0,0,0,2398,0,0,0,0,0,0,0,2399,2402,2404,
+2408,2411,0,0,0,2413,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2415,0,0,2416,2417,2419,0,2420,
+0,0,0,0,0,2425,0,0,0,2426,0,0,0,0,0,0,0,0,0,0,0,0,2427,2428,0,2429,0,0,2430,2434
+,0,2436,0,0,0,0,0,0,2441,2442,0,2445,0,0,2446,2457,0,2459,0,0,2462,0,2464,0,2477
+,0,2478,2486,0,0,0,2491,0,0,2493,0,0,2494,0,2495,0,2513,2523,0,0,0,0,2524,0,0,0,
+0,0,0,2528,2529,2530,0,0,2531,0,2533,0,0,2534,2535,0,2536,2537,0,2538,0,2539,
+2540,0,0,0,2545,2546,0,0,0,0,0,0,0,2548,0,0,2549,0,2550,2555,0,0,0,0,0,2557,0,
+2560,0,0,0,0,0,0,0,0,0,0,0,2561,0,2576,0,0,0,0,0,0,0,0,0,2577,2578,0,0,0,2579,0,
+0,0,0,0,0,0,2580,0,0,0,0,2581,0,0,0,0,2583,0,2584,0,2588,2590,0,0,0,2591,0,0,0,0
+,2593,2594,0,2595,0,2601,2602,0,0,2603,0,2605,0,0,0,2606,2607,2611,0,2615,0,0,0,
+2617,0,0,0,0,0,0,0,0,0,0,0,0,0,2619,0,0,2620,0,0,0,2621,0,2623,0,2625,0,0,2628,
+2629,0,0,2635,2636,2637,0,0,2639,0,0,0,2642,0,0,0,0,2643,0,2644,0,2649,0,0,0,0,0
+,0,2655,2656,0,0,2657,0,0,0,0,0,2658,0,0,0,0,0,2659,0,0,0,0,2664,2685,0,2687,0,
+2688,0,0,2689,0,0,2694,0,2695,0,0,2698,0,2701,2706,0,0,0,2707,0,2709,2710,2711,0
+,0,0,2720,2730,2735,0,0,0,0,2738,2740,0,0,0,0,2747,0,0,0,0,0,0,2748,0,0,2749,0,0
+,0,0,0,2750,0,0,2752,2754,0,0,0,0,0,2758,0,0,0,0,2762,0,0,0,0,2763,0,0,0,0,0,0,0
+,2764,2767,0,0,0,0,2768,0,0,2770,0,0,0,0,0,0,0,2771,0,0,0,0,0,0,0,0,0,2772,0,0,0
+,0,0,2773,2776,0,0,2783,0,0,2784,0,2789,0,2790,0,0,0,2792,0,0,0,0,0,0,0,0,0,0,
+2793,2795,0,0,0,0,0,0,2796,0,0,0,0,0,0,2797,2799,0,0,0,0,2803,0,0,0,0,2806,0,
+2807,2808,2817,2819,0,0,0,0,0,2821,0,0,0,0,2822,2823,0,0,0,0,0,0,0,2824,0,0,2828
+,0,2834,0,0,0,0,0,0,2836,0,2838,0,0,2839,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2841,
+0,0,0,2842,0,0,0,0,0,2843,2844,0,0,0,0,2846,0,0,2847,0,2849,0,2853,0,0,0,0,0,
+2857,0,0,0,0,2858,0,2859,0,0,2860,0,2862,2868,0,0,0,0,2875,0,2876,0,0,2877,2878,
+2884,2889,2890,0,0,2891,0,0,2892,0,0,0,2906,2912,0,2913,0,0,0,0,0,0,0,0,2916,0,
+2934,0,0,0,0,0,2935,0,0,0,0,2939,0,2940,0,0,0,0,0,0,0,2941,0,0,0,2946,0,2949,0,0
+,2950,2954,2955,0,0,0,2959,2961,0,0,2962,0,2963,0,0,0,0,0,0,2964,2965,2966,2967,
+0,0,0,0,0,0,0,2969,0,0,0,0,0,2970,2975,0,2982,2983,2984,0,0,0,0,0,2989,0,0,2990,
+0,0,0,0,0,0,0,2991,0,0,0,0,0,0,0,0,2998,0,3000,3001,0,0,3002,0,0,0,3003,0,0,3012
+,0,0,3022,0,0,3024,0,0,3025,3027,0,0,0,3030,0,0,0,0,3034,3035,0,0,3036,0,3039,0,
+3049,0,0,3050,0,0,0,0,0,0,3051,0,3053,0,0,0,0,3057,0,3058,0,0,0,0,0,0,0,0,3063,0
+,0,3073,3074,3078,3079,0,3080,3086,0,0,0,0,0,0,0,0,3087,0,3092,0,3095,0,3099,0,0
+,0,3100,0,3101,3102,0,3122,0,0,0,3124,0,3125,0,0,0,0,0,0,3132,3134,0,0,3136,0,0,
+0,0,0,0,0,3147,0,0,3149,0,0,0,0,0,3150,3151,3152,0,0,0,0,3158,0,0,3160,0,0,3161,
+0,0,3162,0,3163,3166,3168,0,0,3169,3170,0,0,3171,0,0,0,0,0,0,0,3182,0,3184,0,0,
+3188,0,0,3194,0,0,0,0,0,0,3204,0,0,0,0,3209,0,0,0,0,0,0,0,0,0,0,0,3216,3217,0,0,
+0,0,0,0,0,3219,0,0,3220,3222,0,3223,0,0,0,0,3224,0,3225,3226,0,3228,3233,0,3239,
+3241,3242,0,0,3251,3252,3253,3255,0,0,0,0,0,0,0,0,3260,0,0,3261,0,0,0,3267,0,0,0
+,0,0,0,0,0,3271,0,0,0,3278,0,3282,0,0,0,3284,0,0,0,3285,3286,0,0,0,0,0,0,0,3287,
+3292,0,0,0,0,3294,3296,0,0,3299,3300,3301,0,3302,0,0,0,0,0,3304,3306,0,0,0,0,0,0
+,3308,0,0,0,0,0,0,0,0,0,3311,0,0,0,0,0,0,0,0,3312,3314,3315,0,3318,0,0,0,0,0,0,0
+,0,3319,0,0,0,0,0,3321,0,0,0,0,0,0,0,0,0,3322,0,0,3324,3325,0,0,3326,0,0,3328,
+3329,3331,0,0,3335,0,0,3337,0,3338,0,0,0,0,3343,3347,0,0,0,3348,0,0,3351,0,0,0,0
+,0,0,3354,0,0,0,0,0,0,0,0,0,0,3355,0,0,3365,3366,3367,0,0,0,0,0,0,3368,3369,0,
+3370,0,0,3373,0,0,3376,0,0,3377,0,3379,3387,0,0,0,0,0,3390,0,0,0,0,0,0,0,3402,0,
+3403,3436,3437,3439,0,0,3441,0,0,0,3442,0,0,3449,0,0,0,3450,0,0,0,0,0,0,0,3451,0
+,0,3452,0,3453,3456,0,3457,0,0,3458,0,3459,0,0,0,0,0,0,0,0,0,3460,0,0,3469,3470,
+0,0,3475,0,0,0,3480,3487,3489,0,3490,0,0,3491,3499,0,3500,0,0,3501,0,0,0,3502,0,
+3514,0,0,0,3516,3517,0,0,0,3518,0,0,0,0,3520,3521,3522,0,0,3526,3530,0,0,0,0,
+3531,0,0,0,0,3536,0,0,0,0,0,0,0,3539,3541,0,0,3542,3544,0,3547,3548,0,0,3550,0,
+3553,0,0,0,0,0,0,0,3554,0,3555,0,3558,0,3559,0,0,0,0,0,0,0,0,3563,0,3581,0,0,0,
+3599,0,0,0,3600,0,3601,0,3602,3603,0,0,3606,3608,0,3610,3611,0,0,0,0,0,0,0,0,0,
+3612,3616,3619,0,0,0,0,0,0,0,0,0,0,0,0,0,3624,3628,0,3629,3634,3635,0,0,0,0,0,0,
+3636,0,3637,0,0,3638,3651,0,0,0,0,0,0,3652,3653,0,0,0,0,3656,3657,0,0,0,0,0,3658
+,0,0,0,0,3659,0,3661,3663,3664,0,3665,0,3692,0,0,0,3694,3696,0,0,0,0,0,0,0,0,0,0
+,0,0,3698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3700,0,0,3701,0,0,0,3708,3709,0,0,0,3711
+,3712,0,0,0,0,0,3723,0,3724,3725,0,0,3726,0,0,0,0,0,0,3728,3729,0,3734,3735,3737
+,0,0,0,3743,0,3745,0,0,3746,0,0,3747,3748,0,3757,0,3759,3766,3767,0,3768,0,0,0,0
+,3769,0,0,3771,0,3774,0,0,0,0,0,0,3775,0,0,0,0,0,0,3776,0,3777,3786,0,3788,3789,
+0,0,0,0,0,0,0,0,0,3791,0,3811,0,0,0,0,0,3814,3815,3816,3820,0,0,0,0,0,0,0,3821,0
+,0,3825,0,0,0,0,3835,0,0,3848,3849,0,0,0,0,3850,3851,3853,0,0,0,0,3859,0,3860,
+3862,0,0,0,0,0,3863,0,0,0,0,0,0,0,0,3873,0,3874,0,3875,3886,0,3887,0,0,0,0,3892,
+3913,0,3914,0,0,0,3925,3931,0,0,0,0,3934,3941,3942,0,0,0,0,3943,0,0,0,3944,0,0,0
+,0,0,3945,0,3947,0,0,0,3956,3957,0,0,0,0,0,0,0,0,0,3958,0,3959,3965,0,0,0,0,3966
+,0,0,0,3967,0,0,0,3968,3974,0,0,0,0,0,3975,3977,3978,0,0,0,0,3980,0,3985,0,0,0,0
+,0,0,0,0,3986,4011,0,0,4017,0,0,0,0,0,0,0,0,0,0,0,4018,0,0,0,0,4019,0,4023,0,0,0
+,4027,4028,0,0,0,0,0,0,0,0,4031,4034,0,0,4035,4037,4039,4040,0,0,0,0,0,4059,0,
+4060,4061,0,4062,4063,4066,0,0,4072,0,0,0,0,0,0,0,0,0,0,0,0,0,4088,0,0,0,0,0,
+4091,0,0,0,0,4094,4095,0,0,4096,0,0,0,0,0,4098,4099,0,0,0,4101,0,4104,0,0,0,4105
+,4108,0,4113,0,0,4115,4116,0,4126,0,0,4127,0,0,0,0,0,0,0,4128,4132,4133,0,4134,0
+,0,0,4137,0,0,4141,0,0,0,0,4144,4146,4147,0,0,0,0,4148,0,0,4311,0,0,0,4314,4329,
+0,4331,4332,0,4333,0,4334,0,0,0,4335,0,4336,0,0,0,4337,0,0,0,4342,4345,4346,4350
+,0,4351,4352,0,4354,4355,0,0,4364,0,0,0,0,4369,0,0,0,4373,0,4374,0,0,0,0,4377,0,
+0,0,0,4378,0,0,0,4380,0,0,0,4381,4382,0,0,0,0,0,0,0,4384,0,0,0,0,4385,0,0,0,4386
+,0,0,0,4391,4398,0,0,0,0,4407,4409,0,0,0,0,4410,0,0,4411,0,4414,4415,4418,0,4427
+,4428,4430,0,4431,0,4448,0,0,0,0,0,4449,0,0,0,4451,4452,0,4453,4454,0,4456,0,0,0
+,0,0,0,0,4459,0,4463,0,0,0,0,0,4466,0,4467,0,4469,0,0,0,0,0,0,0,0,0,0,0,0,0,4470
+,4471,0,4473,0,0,4475,0,0,0,0,4477,4478,0,0,0,4479,4481,0,4482,0,4484,0,0,0,0,0,
+0,0,4486,0,0,4488,0,0,4497,0,4508,0,0,4510,4511,0,4520,4523,0,4524,0,4525,0,4527
+,0,0,4528,0,0,0,0,4530,0,4531,0,0,4532,0,0,0,4533,0,0,0,0,0,4535,0,0,0,4536,0,0,
+0,0,0,4541,4543,4544,4545,4547,0,4548,0,0,0,0,4550,4551,0,4553,0,0,0,0,4562,0,0,
+4571,0,0,0,4574,0,0,0,4575,0,4576,0,4577,0,0,0,4581,0,0,0,0,0,4582,0,0,4586,0,0,
+0,4588,0,0,4597,0,4598,0,0,0,0,4616,4617,0,4618,0,0,0,0,4619,0,4620,0,0,4621,0,
+4624,0,0,0,0,0,4625,0,0,0,0,4657,0,4659,0,4667,0,0,0,4668,4670,0,4672,0,0,0,0,0,
+4673,4676,0,0,0,0,4687,0,0,0,0,4697,0,0,0,0,4699,0,4701,0,0,0,0,4702,0,0,4706,0,
+0,4713,0,0,0,4714,4715,4716,0,0,0,0,0,0,0,0,0,0,0,0,4717,0,0,4720,0,4721,4729,
+4735,0,0,0,4737,0,0,0,4739,0,0,0,4740,0,0,0,4741,0,0,0,0,0,4742,0,4745,4746,4747
+,0,0,0,0,0,0,0,0,4748,0,0,0,4749,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4751,
+4786,0,4787,0,4788,4796,0,0,4797,4798,0,4799,4806,4807,0,0,0,0,4809,4810,0,0,0,0
+,0,0,4811,0,0,0,0,0,4812,0,4813,0,0,4815,0,4821,4822,0,0,0,0,4823,0,0,0,0,0,0,0,
+0,0,0,4824,0,0,0,0,4826,0,0,0,4828,0,4829,0,0,0,4843,0,0,4847,0,4853,4855,4858,0
+,0,0,0,0,4859,0,4864,0,0,4879,0,0,0,0,4880,0,0,0,0,4881,0,4882,0,0,0,0,0,0,0,0,0
+,4883,0,0,0,0,4884,0,0,0,0,0,4886,4887,4888,4894,4896,0,4902,0,0,4905,0,0,4915,0
+,0,0,0,0,0,0,4916,4917,4919,4921,0,0,0,0,0,4926,0,0,0,0,4927,0,0,0,0,0,0,0,0,
+4929,0,4930,4931,0,4938,0,4952,0,4953,4957,4960,4964,0,0,0,0,0,0,0,5019,5020,
+5022,0,0,0,0,0,5023,0,0,0,5024,0,0,0,5025,0,0,0,0,5028,0,0,0,0,5029,5030,5031,0,
+5033,0,0,0,0,0,0,0,0,0,5034,5035,0,5036,0,0,5037,0,0,0,0,5038,0,0,5039,0,0,0,
+5041,5042,0,0,0,0,5044,5049,5054,0,5055,0,5057,0,0,0,5060,0,0,0,0,0,5063,0,5064,
+5065,0,5067,0,0,0,5068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5076,0,0,0,0,0,0,
+0,5077,0,0,5078,5080,0,0,5083,0,0,0,0,0,0,0,0,5085,0,0,0,0,0,0,5098,5099,5101,
+5105,5107,0,5108,0,5109,0,0,0,0,0,0,0,5110,0,0,0,0,0,5117,5118,0,5121,0,5122,0,0
+,5130,0,0,0,5137,0,0,0,5148,0,0,0,0,0,0,0,5151,5154,0,0,0,5155,0,0,5156,5159,
+5161,0,0,0,0,5162,0,0,0,0,5163,5164,0,5166,0,0,0,0,0,0,0,0,0,0,5167,0,0,0,5172,0
+,0,0,0,0,0,5178,5179,0,0,5190,0,0,5191,5192,5194,0,0,5198,5201,0,0,0,0,0,5203,0,
+5206,5209,0,0,0,0,0,0,5213,0,5214,5216,0,0,0,0,0,5217,0,0,0,0,0,0,0,0,5218,5219,
+0,5231,0,0,5244,5249,0,5254,0,5255,0,0,5257,0,0,0,0,0,5258,0,5260,5270,0,5277,0,
+0,0,0,0,0,5280,5281,5282,5283,0,0,0,0,0,5284,0,5285,0,0,0,0,0,5287,5288,0,0,0,0,
+0,0,0,0,0,0,5289,5291,0,0,5294,0,0,5295,0,0,0,0,0,0,0,5304,0,0,5306,5307,5308,0,
+5309,0,0,5310,0,0,0,0,5311,5312,0,5313,0,0,0,0,0,5316,0,0,0,5317,0,0,0,0,0,0,0,0
+,0,5325,0,0,0,0,0,0,5326,0,5327,5329,0,5332,0,0,0,0,5338,0,0,0,0,0,0,0,0,5340,0,
+0,5341,0,0,0,5342,0,5343,5344,0,0,5345,0,0,0,0,0,0,5347,5348,0,0,0,0,0,0,0,0,0,
+5349,0,5350,0,5354,0,0,0,0,5358,0,0,5359,0,0,5361,0,0,5365,0,5367,0,5373,0,0,0,
+5379,0,0,0,5380,0,0,0,5382,0,5384,0,0,0,0,0,0,5385,0,0,0,0,5387,0,0,0,0,0,0,5388
+,5390,5393,0,0,0,0,0,0,0,0,0,0,0,5396,0,0,0,0,5397,5402,0,0,0,0,0,5403,0,0,0,
+5404,5405,0,0,0,0,0,0,0,0,0,0,0,0,5406,0,0,0,0,5410,0,0,5411,0,5415,0,0,0,0,5416
+,5434,0,0,0,0,0,0,0,0,0,0,0,5438,0,5440,0,0,0,0,0,0,5441,5442,0,0,0,5443,5444,
+5447,0,0,5448,5449,5451,0,0,0,5456,5457,0,0,0,5459,0,0,0,5461,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5464,0,5466,0,0,5467,0,5470,0,0,5473,0,0,5474,0,0,5476,0,0,0,0,0,0,0,0
+,0,0,0,5477,0,0,0,0,0,0,0,5484,0,0,5485,5486,0,0,0,0,0,5488,0,0,0,0,0,0,0,5489,0
+,0,0,0,0,5507,0,0,0,5510,0,5511,0,0,5512,0,0,0,5513,0,5515,0,0,5516,5517,0,5518,
+0,0,5522,0,0,0,0,0,5534,5535,0,0,5536,0,5538,0,0,5543,0,5544,0,0,5545,0,5547,0,
+5557,0,0,5558,0,5560,5567,0,0,0,0,5568,0,0,0,5571,5573,0,5574,0,5575,0,0,0,0,
+5577,0,0,5598,0,0,0,0,0,0,0,0,0,5600,5609,0,0,0,0,5610,0,0,5612,0,5624,0,5625,0,
+0,0,5629,0,5641,0,5642,5643,0,0,0,0,0,0,5651,0,0,0,5652,5653,0,5661,5662,5678,0,
+5679,0,0,0,0,5685,5686,0,0,0,0,0,5690,5692,0,5703,0,0,0,0,0,5706,0,0,0,0,5707,0,
+0,0,0,0,0,5708,0,0,5709,0,5710,0,0,0,5712,0,5733,0,5734,5735,0,0,5744,5751,0,0,0
+,0,0,0,0,0,0,0,0,0,5752,0,5754,0,0,0,0,0,0,5757,5758,0,5760,5761,0,0,0,0,5763,
+5764,5765,0,5766,0,5767,5768,0,5770,0,0,0,0,5776,5780,0,0,0,0,5782,0,0,0,0,5784,
+0,0,5788,0,0,0,0,0,0,0,0,0,0,0,5797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5799,0,0,5801,
+0,0,0,5811,0,0,0,0,0,0,5816,0,0,5827,0,0,0,0,0,0,0,0,5830,5831,0,0,5832,0,0,5833
+,0,5835,5844,5845,0,5846,0,0,0,0,0,5850,0,0,0,0,0,5852,0,5855,5857,0,0,5859,0,
+5861,0,0,5863,0,5865,0,0,0,5873,5875,0,0,0,5877,0,5879,0,0,0,5888,0,0,5889,5891,
+0,5894,0,0,0,0,0,0,5895,0,5897,0,0,0,0,0,0,5907,0,5911,0,0,5912,0,5913,5922,5924
+,0,5927,5928,0,0,0,0,5929,5930,0,5933,0,0,0,0,5949,0,0,5951,0,0,0,0,0,0,0,0,5953
+,0,0,5954,0,5959,5960,5961,0,5964,0,0,0,5976,5978,5987,5990,0,0,0,0,0,5991,0,
+5992,0,0,0,5994,5995,0,0,5996,0,0,6001,6003,0,0,0,0,6007,0,0,0,0,0,6008,0,0,6009
+,0,6010,0,0,0,6011,6015,0,6017,0,6019,0,6023,0,0,0,0,0,0,0,6025,0,0,0,0,0,0,0,0,
+0,0,6026,0,6030,0,0,6032,0,0,0,6033,6038,6040,0,0,0,6041,6045,0,0,6046,0,0,6053,
+0,0,6054,0,6055,0,0,0,0,0,0,6057,0,6063,0,0,0,6064,0,6066,6071,6072,0,0,0,0,0,0,
+6075,6076,0,0,6077,0,0,0,0,0,0,0,0,0,6078,6079,0,0,0,0,0,0,0,0,6080,0,6083,0,0,0
+,0,0,6084,0,0,6088,0,6089,0,0,6093,6105,0,0,6107,0,6110,0,0,0,6111,6125,6126,0,0
+,0,6129,0,0,0,0,6130,0,0,0,6131,6134,0,0,0,0,0,0,6142,0,0,0,0,0,6144,0,0,6146,
+6151,6153,0,6156,0,6163,0,6180,6181,0,0,0,0,0,6182,0,0,0,0,6184,6195,0,0,6206,0,
+6208,0,0,6212,6213,6214,0,6215,0,0,0,6228,0,0,0,6234,0,0,0,0,0,0,6235,6240,0,
+6242,6243,6244,0,6250,6255,0,0,0,0,0,6257,0,0,0,6258,6278,0,6284,0,0,0,6285,0,0,
+0,0,0,0,0,0,6286,0,0,0,6320,0,0,6322,6332,0,0,0,0,0,0,0,0,6334,0,0,0,0,0,0,0,
+6335,0,0,6337,0,6338,0,6339,6340,0,0,6356,6357,6369,0,0,0,6370,6371,6372,0,6373,
+0,0,0,0,0,6376,0,0,0,0,0,6382,6383,6384,0,0,0,0,6386,0,6389,6397,6400,6411,0,
+6414,0,0,0,0,0,0,0,6415,6416,0,0,0,0,0,0,6417,0,0,0,0,6418,0,0,0,0,0,0,0,6420,0,
+6421,6423,6425,0,6429,6430,0,6433,6438,0,0,0,0,0,0,0,0,0,0,6439,6440,0,0,6441,0,
+0,6444,0,0,0,0,6446,0,0,0,0,6447,6448,0,0,6450,0,0,0,6454,0,0,6455,0,6461,0,0,0,
+0,0,0,6462,0,0,6463,0,6464,0,6465,6467,0,0,0,6468,0,6479,6480,0,0,0,0,0,0,0,6481
+,0,0,6485,6487,0,0,0,0,0,0,6493,0,0,0,0,0,0,0,0,6494,6495,6496,0,0,0,0,0,6498,0,
+0,0,6507,6508,0,0,0,0,0,0,0,0,0,0,6511,6512,0,0,0,0,6513,0,0,0,6514,0,0,0,0,0,
+6516,0,0,6517,6518,0,0,0,6519,6520,6521,0,6523,0,0,0,0,6524,6528,0,6530,0,0,6532
+,0,6578,0,0,0,6583,0,6584,0,0,0,6587,0,0,0,6590,0,6591,0,0,0,0,0,6592,0,0,0,0,
+6593,6594,0,0,0,0,0,6599,6600,0,0,6601,6602,6604,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6608,0,0,0,0,0,0,0,0,6610,6611,0,6615,0,6616,6618,6620,0,6637,0,0,0,0,6639,0,0,0
+,0,6641,0,6642,0,0,0,6647,0,6660,6663,0,6664,0,6666,6669,0,6675,6676,6677,0,0,0,
+0,0,0,0,0,0,6678,0,0,0,6679,0,6680,0,0,0,0,0,0,0,6693,0,0,0,0,0,0,0,0,0,6704,
+6705,6706,0,0,6711,6713,0,0,0,0,0,6716,0,0,0,6717,0,6719,6724,0,0,0,0,0,0,0,0,
+6725,6726,0,0,0,0,0,6728,6729,6735,0,6737,6742,0,0,6743,6750,0,6751,0,0,6752,
+6753,0,0,0,0,0,0,6754,0,0,0,0,0,6756,0,0,0,0,0,0,6763,0,0,6764,6765,0,0,0,6770,0
+,0,0,6776,6780,0,6781,0,0,0,6783,0,6784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6785,0,0,0,6792,0,0,0,6793,0,0,6802,0,0,0,0,0,6803,0,0,0,6804,0,0,0,6812,0,0,
+6823,0,6824,6839,0,0,0,0,6852,0,0,6854,0,6856,6857,0,0,0,0,0,0,0,0,0,6867,0,6868
+,6870,6872,0,0,0,6873,6874,0,0,0,0,0,6875,0,0,6877,0,0,0,0,0,0,0,6878,0,0,0,6879
+,0,6880,0,0,0,0,0,0,0,0,0,0,6887,0,6888,6891,6893,0,6895,0,0,0,0,0,0,0,0,6899,0,
+0,0,0,6901,0,0,0,0,6910,0,6911,0,0,6912,0,0,6913,6914,0,0,0,6915,0,0,0,6916,6919
+,0,0,0,0,0,0,6924,0,6925,0,0,0,6926,6927,6928,0,6929,0,6930,0,0,6931,6935,0,6936
+,0,0,0,0,6939,6940,6941,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6942,6948,6949,0,0,0,0,0,0
+,0,6952,6954,6963,6965,6966,0,0,6967,6968,0,0,0,0,0,0,0,0,0,6969,0,0,6970,6979,0
+,0,6980,0,0,6983,0,0,0,0,0,6984,0,0,0,0,0,0,0,6988,6990,6992,0,0,0,0,0,0,0,6995,
+0,0,0,7012,0,0,0,0,0,0,0,0,0,7019,0,0,0,0,0,0,0,0,7021,0,0,7022,7023,7028,0,7030
+,7033,0,0,0,0,0,0,7038,0,0,0,0,0,0,0,0,0,0,7039,0,0,0,0,0,7046,0,7047,0,0,0,0,0,
+0,0,0,0,0,0,7048,7052,0,0,0,0,0,7054,0,7060,0,0,0,0,7061,0,7065,0,0,0,0,7067,
+7069,0,7070,7071,7072,0,0,7078,0,7080,7081,0,7083,0,0,0,7084,7087,7088,0,0,7090,
+0,7093,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7107,0,0,7108,0,0,0,0,0,0,0,0,7110,0,7114,0
+,0,0,0,0,0,0,7115,0,7116,0,0,0,0,0,7117,0,0,7118,0,0,7124,0,7125,0,0,7126,0,0,0,
+0,7128,0,0,0,0,0,7129,0,7130,0,7132,7133,0,0,7134,0,0,7139,0,7148,7150,0,0,0,0,
+7152,0,0,0,7153,7156,7157,0,0,0,0,0,7158,0,0,0,0,0,0,0,0,0,0,7163,7165,7169,0,
+7171,0,0,0,0,0,0,0,0,0,7172,0,7173,7181,0,0,0,0,0,7182,7185,0,0,0,0,7187,0,7201,
+7204,0,0,0,0,0,7206,7207,0,0,0,0,7211,7216,0,7218,0,0,0,0,7226,7228,7230,7232,
+7233,7235,7237,0,0,0,0,7238,7241,0,7242,0,0,7247,0,0,0,7266,0,0,0,0,0,0,0,7289,0
+,0,7290,7291,0,0,7292,0,7297,0,0,0,0,0,0,0,0,0,0,7300,0,7301,0,0,0,0,0,0,0,0,0,0
+,0,0,7302,0,0,0,0,7305,0,0,0,0,7307,0,7308,0,7310,0,7335,0,0,0,0,0,0,0,7337,0,
+7343,7347,0,0,0,0,0,7348,0,7349,7350,7352,7354,0,0,0,0,7357,0,7358,7366,0,7367,
+7368,0,0,7373,0,0,0,7374,0,0,0,0,0,0,0,7376,0,0,0,7377,0,0,0,0,0,7378,0,7379,
+7380,0,0,0,0,0,7383,0,0,7386,0,0,0,0,7398,0,0,0,7399,7400,0,7401,0,0,0,0,0,0,0,
+7402,0,0,0,0,0,7405,0,0,0,0,0,7406,0,0,0,0,0,0,0,0,7421,7427,7429,0,0,0,7435,0,0
+,7436,0,0,0,7437,0,0,0,0,0,0,7438,7443,0,7446,0,7448,0,0,0,0,0,0,0,0,0,0,7456,0,
+0,0,0,0,7457,0,0,7461,0,0,0,0,0,7462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7463,7466,7472,
+0,7476,0,0,7490,0,7491,0,0,7493,0,0,0,7498,7499,0,0,7508,0,0,0,0,0,7512,0,0,0,
+7513,7514,7516,0,0,0,0,7518,0,0,7519,7521,7522,0,0,0,7526,0,0,7529,0,0,7531,0,
+7536,0,7538,0,7539,0,0,7541,7542,7546,0,0,0,0,0,7547,0,7548,0,0,0,0,0,7550,0,0,
+7552,7553,0,0,0,0,0,0,0,0,0,0,7554,7563,0,7573,0,0,0,0,0,0,7574,7576,0,7578,7581
+,7583,0,0,0,7584,0,7587,0,0,0,0,0,7589,0,0,0,7594,0,0,7595,0,0,7600,7602,7610,0,
+0,0,0,0,7612,0,7613,7614,0,0,7615,0,0,7616,0,7620,0,7621,7622,0,7623,0,0,0,0,
+7626,0,0,0,0,7627,7629,7631,0,0,7633,0,0,0,0,0,7639,0,7640,7642,0,0,7643,0,0,0,0
+,7644,0,0,0,0,0,0,0,7645,0,0,0,0,0,7661,7662,7663,7665,0,7666,0,7667,0,7684,7688
+,7690,0,7691,0,0,0,0,0,0,7692,0,0,7700,0,7707,0,7708,0,7709,0,7721,0,0,0,7722,0,
+7724,0,0,0,0,0,0,7729,7731,0,7732,0,7733,7735,0,0,0,0,0,0,0,7739,0,0,7741,7745,0
+,7748,0,0,0,7751,0,0,0,7752,0,0,0,0,0,0,0,7753,0,0,7756,0,7757,0,7759,0,7760,0,0
+,0,0,7761,7768,0,0,7769,0,0,7770,0,0,7771,0,0,7772,0,0,7773,0,0,0,0,0,7778,7783,
+0,0,0,0,0,7784,7785,0,7790,0,0,0,0,7792,0,7798,0,0,0,0,0,7799,0,7810,0,0,7813,0,
+7814,0,7816,0,7818,7824,7825,7826,0,7828,7830,0,0,0,7840,0,7842,0,7843,0,0,0,0,
+7844,0,0,0,0,0,0,0,7846,0,0,0,0,0,7856,7857,7858,7862,0,7865,0,0,7866,0,0,7913,0
+,0,0,0,7914,0,0,7915,7917,7918,7919,0,7920,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7921,
+7922,0,7924,0,0,7925,0,0,7927,0,7930,7935,0,0,7937,0,0,0,0,0,0,7939,0,7940,0,0,0
+,0,0,7941,0,0,0,0,7945,0,0,0,0,7949,0,0,0,0,0,0,0,0,7950,0,7953,0,0,0,0,0,0,0,
+7968,0,0,0,0,7969,7972,7992,0,7993,0,0,0,0,0,0,0,0,0,0,0,7994,0,0,0,0,8007,8008,
+0,0,0,0,0,0,0,0,0,0,0,0,8010,0,0,0,8012,0,0,0,0,0,0,0,0,8018,0,8028,8029,0,0,
+8030,0,0,8032,8033,0,0,8034,8036,0,0,0,0,0,0,0,0,0,0,8037,0,0,0,8043,8052,8059,
+8060,0,0,8061,0,0,0,8062,0,8063,0,8064,0,8066,8068,0,0,0,8080,8081,0,8089,0,0,0,
+0,0,8092,0,0,0,0,0,0,8093,8110,0,0,0,0,0,0,0,8111,0,0,0,0,0,8112,8115,0,8117,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8120,8121,8122,8128,8129,8130,8131,0,0,8139,0,0,
+8144,0,0,0,0,8145,8146,8153,0,0,0,0,0,0,0,0,8154,0,8157,8160,8162,0,8164,8165,0,
+0,0,0,8166,8167,0,0,8179,0,0,0,8185,0,0,0,8186,0,0,8187,0,0,0,8188,0,0,0,0,0,
+8204,0,0,0,0,8210,0,0,0,0,0,8213,0,8214,0,0,8215,0,0,0,0,0,0,8218,0,0,0,0,0,0,0,
+0,0,8219,0,8221,0,0,8222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8225,0,0,0,8233,0,0,
+8242,0,0,0,0,0,0,0,0,0,0,0,8247,0,8248,8252,0,8256,8257,0,0,8261,0,8264,8265,0,0
+,0,0,8267,0,0,0,8269,0,0,0,0,0,0,0,0,0,8270,0,0,0,8278,0,8279,8283,0,0,8285,8286
+,8289,8292,0,0,0,0,8293,8295,8299,8300,8301,0,0,0,0,0,0,8304,8307,0,0,0,0,0,0,0,
+8321,0,0,0,8322,8323,8325,8326,8327,0,0,8332,8338,0,0,8340,0,0,0,0,0,8350,0,0,
+8351,0,8354,8355,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8360,8372,0,0,0,0,0,0,0,0,8377,0,0,
+0,0,8380,0,0,0,8383,0,8384,0,0,0,0,8386,8392,0,0,8394,0,0,0,0,0,0,0,8396,8397,0,
+8398,0,8399,0,0,0,0,0,8400,0,8401,8410,8411,0,8412,8413,8422,0,0,0,0,8423,0,0,0,
+0,8424,0,0,8425,0,0,0,0,0,0,0,8441,8442,0,0,0,0,0,0,8443,0,0,8444,0,8447,0,0,0,0
+,8451,0,8458,0,8462,0,0,8468,0,8469,0,0,0,8470,0,8473,8479,8480,0,0,0,0,8481,
+8483,0,0,0,0,0,0,0,0,0,8484,0,0,8490,0,0,0,0,0,0,8491,8493,8494,0,8528,0,0,0,0,0
+,0,0,8530,0,0,0,0,0,0,0,0,8534,8538,8540,0,0,8541,0,0,8545,0,8557,0,0,8569,8570,
+0,0,8571,8574,8575,8579,0,8583,0,0,0,0,8591,0,0,0,0,0,0,0,0,8606,0,8607,0,0,0,0,
+0,0,0,0,0,8608,0,0,8609,0,0,0,8610,0,0,0,8611,0,0,8613,8617,8621,0,0,8622,0,8623
+,0,8624,8625,0,0,0,0,0,0,0,0,0,8637,8638,8639,8650,0,0,0,0,8652,8654,8655,0,0,0,
+0,0,0,0,0,0,0,8656,0,0,0,0,0,8657,0,0,0,0,0,0,0,0,0,8658,0,0,8659,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8660,0,0,0,0,0,0,8661,8663,8664,0,0,0,0,8665,0,8669,0,
+0,0,0,0,0,0,8671,8674,0,8684,0,8686,0,0,0,8689,0,0,0,8690,0,8706,0,0,0,0,0,0,0,0
+,0,0,0,8710,0,8711,8713,8714,8724,8727,8728,8733,8736,0,8737,8739,0,0,0,0,8742,
+8743,8745,8754,0,0,0,0,8756,0,0,0,0,0,0,8757,8760,0,0,0,0,0,8762,8763,8764,0,
+8766,8769,8770,8773,0,8774,0,8779,0,0,0,0,8780,0,0,8781,0,0,8783,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8784,0,0,0,0,0,0,0,0,8785,0,0,0,0,8786,0,0,0,0,8788
+,8790,0,0,0,8803,0,8813,8814,0,0,0,0,0,8815,8816,0,0,0,0,8818,0,0,0,0,8822,8828,
+8829,0,8831,0,0,0,0,8833,0,0,0,8834,0,0,0,8835,0,8836,0,0,0,8837,0,0,0,0,0,0,
+8838,8839,0,0,0,0,0,0,0,0,0,0,0,8840,0,0,0,8841,0,8842,0,0,0,8846,0,0,0,0,0,0,0,
+8847,0,8848,0,0,8864,0,0,8866,0,0,8870,8872,0,0,8873,8874,0,0,0,0,0,0,8875,0,
+8876,0,0,0,0,8896,8900,0,0,0,0,8901,0,0,0,0,0,8904,0,8907,0,0,0,0,8911,8912,8913
+,0,0,0,8914,0,8915,0,0,0,0,0,0,0,0,0,0,0,0,8916,0,0,0,8929,0,0,0,0,0,0,0,0,0,0,
+8930,0,8932,0,8943,0,0,0,8945,8947,0,0,0,0,8949,0,8950,0,8954,8957,0,0,8970,0,0,
+0,0,8971,0,8996,0,0,0,0,8997,9000,0,0,0,0,9001,9002,0,9004,9009,9024,0,0,0,0,0,0
+,0,0,0,0,0,0,9027,9082,0,0,9083,9089,0,0,0,0,0,0,9090,0,0,0,9092,0,0,9093,0,9095
+,0,0,9096,9097,9101,9102,0,0,0,0,0,0,0,0,9112,0,0,0,0,0,0,9114,0,0,9120,0,9121,
+9122,0,0,0,9123,9124,0,0,9125,0,0,9126,0,9127,0,0,9129,9131,0,0,0,9132,0,0,9136,
+0,9144,0,0,9148,0,0,0,0,0,0,9149,0,9152,9163,0,0,9165,0,0,0,0,0,0,0,0,0,0,0,0,0,
+9166,0,9169,0,0,0,0,0,0,0,9170,0,0,0,0,9172,0,9174,9175,9176,0,9177,0,0,0,0,0,0,
+0,0,9186,0,9187,0,0,0,9188,9189,0,0,9190,0,0,0,0,9191,0,0,0,9193,0,0,0,0,9197,
+9198,0,0,0,9208,9211,0,0,0,0,9216,9217,0,9220,0,0,0,0,9221,9222,9223,0,9224,9225
+,0,0,9227,0,9228,9229,0,0,9230,0,9232,0,9233,0,0,0,0,0,9234,9235,0,0,9237,0,0,0,
+0,0,0,0,0,9238,9240,0,0,9241,0,0,0,0,9244,0,0,0,0,9247,0,0,0,0,0,0,0,0,0,0,9248,
+0,0,0,9249,0,0,0,0,0,9250,0,0,0,0,9251,0,0,9252,9255,0,0,0,9256,0,0,0,0,0,0,0,
+9257,0,0,9258,0,0,0,0,0,0,9259,0,0,0,0,0,9262,9263,0,0,9265,9266,0,0,0,0,0,0,0,0
+,9268,9271,0,0,0,0,0,0,0,0,0,9273,0,0,0,9276,9277,9279,0,0,0,0,0,0,0,9280,0,0,
+9293,0,0,0,0,0,9297,9301,0,0,0,0,0,0,0,0,0,0,0,9308,9309,9313,9321,9322,0,9326,
+9327,0,0,9477,0,9479,0,0,0,0,9482,0,0,0,9483,0,9484,0,0,0,0,0,0,0,0,0,9485,0,0,
+9486,0,0,0,9489,0,0,0,0,9490,9491,0,0,0,0,9493,0,9495,9496,0,0,0,0,0,0,0,0,9500,
+0,9502,0,0,0,0,0,9504,9507,0,9509,0,9511,0,0,9513,0,0,0,0,0,0,0,0,9515,0,0,0,0,0
+,0,9516,9517,0,0,0,0,9532,0,0,9533,0,0,9538,0,9539,9540,0,0,0,0,9541,0,0,0,9542,
+0,0,0,0,0,0,0,0,9544,9545,0,9546,0,0,0,0,0,0,9547,9548,0,0,0,9550,0,9557,0,9558,
+0,9561,0,9563,9570,0,9572,9574,9575,0,0,0,9577,9592,0,0,9596,0,0,0,9598,0,9600,0
+,9601,0,0,0,0,0,0,9608,0,9638,9639,0,0,0,0,0,0,0,9641,0,0,9643,9644,9645,9646,0,
+0,0,9648,0,0,0,0,0,0,0,9650,9654,0,0,0,0,0,0,0,0,9655,0,0,0,0,0,9656,0,9657,0,0,
+0,0,9658,0,0,9659,0,0,9664,0,0,9665,0,9667,9669,0,0,0,0,0,0,0,0,0,0,0,0,9671,0,
+9673,9681,0,0,0,0,9682,9683,9684,0,0,0,0,9686,9698,0,0,9700,9701,9702,0,9703,
+9717,0,0,0,0,9718,0,9726,0,0,0,0,9727,0,0,0,9728,0,9742,0,9744,0,0,0,9750,0,9754
+,9755,0,0,0,0,0,9756,0,9757,9768,0,9769,0,0,0,9770,9771,0,9773,0,9774,0,9775,0,0
+,0,9776,9777,9784,0,0,0,9786,0,9789,0,0,0,0,9793,9794,0,0,0,9808,0,0,0,0,0,9811,
+0,0,0,0,0,0,0,0,0,0,0,0,9812,0,9820,0,9823,0,9828,0,0,0,0,9830,0,0,9833,9836,0,0
+,0,9840,0,0,0,9841,0,0,9842,0,9845,0,0,0,9847,9848,0,0,9855,0,0,0,0,0,0,9856,
+9863,9865,0,0,0,0,0,0,0,0,9866,9867,9868,9873,9875,0,0,0,0,0,0,9880,0,9886,0,0,0
+,9887,0,0,9891,0,0,0,0,0,0,0,9906,9907,9908,0,0,0,9909,0,0,0,0,0,0,9910,0,0,0,0,
+9913,0,0,0,0,9914,0,0,0,0,0,9922,0,0,0,0,9923,9925,0,0,0,0,0,0,9930,0,0,0,9931,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9932,0,9939,0,0,9940,9962,9966,0,9969,9970,0,0,9974
+,0,9979,9981,9982,0,0,0,9985,0,0,0,0,0,0,9987,0,0,0,0,0,0,0,9988,9993,0,0,9994,0
+,0,0,9997,0,10004,0,0,0,0,0,10007,10019,10020,10022,0,0,0,10031,0,0,0,0,0,10032,
+0,0,10034,0,10036,0,0,0,0,10038,0,10039,10040,10041,10042,0,0,0,0,0,10043,0,0,0,
+0,0,10045,10054,0,0,0,0,10055,0,0,10057,10058,0,0,0,0,0,0,10059,0,0,0,0,0,0,0,
+10060,0,0,0,0,0,0,0,10063,0,10066,0,0,0,10070,0,10072,0,0,10076,10077,0,0,10084,
+0,10087,10090,10091,0,0,0,10094,10097,0,0,0,0,0,0,10098,0,0,0,0,0,0,10103,0,
+10104,0,10108,0,0,0,0,0,0,0,0,10120,0,0,0,10122,0,0,10125,0,0,0,0,10127,10128,0,
+0,10134,0,10135,10136,0,10137,0,0,10147,0,10149,10150,0,0,10156,0,10158,10159,
+10160,10168,0,0,10171,0,10173,0,0,0,10176,0,0,0,0,10177,0,0,0,0,10178,0,0,0,0,
+10194,0,10202,0,0,10203,10204,0,10205,10206,0,10207,0,0,0,0,10209,0,0,0,0,0,0,0,
+10213,0,0,0,0,0,0,10217,0,10229,0,10230,10231,0,0,10232,0,0,10237,10238,10244,0,
+0,0,0,0,10250,0,10252,0,0,0,0,0,0,10255,0,0,10257,0,0,0,0,0,0,10258,0,10259,0,0,
+0,0,0,0,0,0,10260,0,0,0,0,0,0,0,10284,10288,10289,0,0,0,10290,0,10296,0,0,0,0,0,
+10297,0,0,0,0,0,0,10298,0,0,0,0,10299,10303,0,0,0,0,0,10306,0,0,0,10307,0,10308,
+0,0,0,0,10311,0,0,0,0,0,0,0,10315,10317,0,0,0,10318,10319,0,10321,0,10326,0,
+10328,0,0,0,0,10329,0,0,10331,0,10332,0,0,0,0,0,0,10334,0,0,10335,10338,0,0,0,0,
+0,10339,10349,0,0,0,0,0,0,10351,0,10353,0,0,0,0,0,0,10362,0,10368,0,10369,0,0,0,
+10372,10373,0,0,0,0,0,10374,0,0,0,10375,0,10376,0,0,10386,10388,10390,0,0,0,0,0,
+0,0,10391,0,0,10392,10394,0,0,10396,0,10397,0,10403,0,0,0,0,0,0,0,0,10404,0,
+10405,10410,0,0,10411,0,10412,0,0,0,0,0,0,0,10421,10422,10423,0,0,0,0,0,0,0,0,0,
+10425,0,0,10427,0,0,10430,0,0,0,0,0,10432,0,10433,10434,0,0,0,0,10436,10437,0,
+10438,0,10439,0,10444,10446,0,0,0,0,0,10448,0,0,0,0,0,10449,0,0,0,0,0,0,0,10451,
+0,10453,0,0,0,10454,10457,0,0,10459,0,10469,0,0,0,0,0,10472,10481,0,0,0,0,0,
+10482,10483,0,10492,0,0,0,0,0,0,0,0,0,0,10499,0,0,0,10502,0,0,10510,0,10521,
+10524,0,0,10525,10526,10528,0,0,0,0,0,0,0,0,10530,0,0,0,0,10533,0,10534,0,0,0,0,
+0,0,0,0,0,0,10535,10536,0,0,10544,0,10553,10556,0,10557,10559,0,0,0,0,0,10562,
+10563,10564,0,10565,0,0,0,10566,0,10567,0,0,0,0,10575,0,0,10576,0,10578,0,0,0,0,
+0,0,0,0,0,0,10585,10586,10587,10589,0,10590,0,0,10594,0,0,0,0,0,10598,0,0,10601,
+0,0,0,10602,0,10603,0,10604,0,10605,0,0,10607,0,10626,0,10627,0,0,0,0,0,10629,
+10630,10631,0,0,0,10646,0,0,0,10647,0,10650,0,10651,0,0,0,10652,10653,10655,0,
+10658,0,0,10659,0,10667,0,0,0,0,10669,0,0,0,0,0,0,0,0,0,10670,0,0,0,10671,0,0,0,
+0,10672,10673,0,10674,0,0,0,10676,0,0,0,0,0,0,10678,0,10682,0,0,10692,0,10697,0,
+0,0,0,10698,0,0,0,10700,0,0,0,0,0,10703,0,10704,0,0,0,0,0,0,0,10705,0,10715,
+10718,10720,0,0,10722,0,0,0,0,0,0,0,0,10723,0,0,0,0,10726,0,0,0,0,0,10727,10730,
+10743,0,0,0,0,0,0,10744,0,0,10745,0,0,0,0,0,0,10748,0,0,0,0,10750,0,0,10752,
+10753,0,0,0,10756,0,0,0,0,0,0,10758,0,0,0,10759,0,10769,0,0,10772,0,0,0,0,0,0,
+10773,0,0,0,10777,0,0,10779,0,0,0,0,0,0,0,0,10780,10784,0,0,0,10789,0,0,0,10791,
+0,0,0,0,0,0,0,0,0,10795,0,0,10796,0,10808,0,10809,0,0,0,10810,0,0,0,10812,0,0,
+10814,0,0,0,0,0,0,0,0,0,10815,0,0,0,0,10816,10817,0,0,0,0,10819,0,10820,0,0,0,0,
+10821,10822,10823,0,10826,10849,0,0,0,0,10850,0,0,10852,0,10853,0,0,10856,0,0,
+10857,10858,10859,10860,0,0,0,0,0,0,10863,0,10866,10867,10872,10890,0,0,10891,
+10892,0,0,0,0,0,10893,0,0,0,10896,10899,0,0,10900,10902,0,0,0,0,0,10903,0,0,0,0,
+0,0,0,0,0,0,0,0,10905,0,10906,0,0,0,0,10908,10911,0,10912,0,0,10916,0,0,0,0,0,
+10917,0,10918,0,0,0,10923,0,0,0,0,0,10924,0,0,10928,10929,0,0,10930,0,0,0,10932,
+0,0,0,0,10939,0,0,10945,0,0,0,10947,0,0,10948,0,0,0,0,0,0,0,0,0,0,0,0,10958,0,
+10960,10962,0,0,10964,0,0,0,10966,0,0,0,0,0,0,0,0,0,0,10967,0,0,0,10968,0,0,0,
+10973,0,0,0,0,0,10975,0,0,0,10976,10978,0,0,10982,10984,10987,0,0,10988,0,10989,
+0,0,10991,0,0,0,0,10992,0,0,0,10993,0,10995,0,0,0,10996,10997,0,0,0,10998,0,
+10999,0,11001,0,0,0,0,0,0,11010,11012,0,11013,11016,11017,0,0,11019,11020,11021,
+0,0,0,0,0,0,0,0,0,0,0,0,11022,0,0,11023,11029,0,0,0,0,11031,0,0,0,11034,0,0,0,0,
+11055,0,0,0,0,0,11056,11060,0,0,0,0,0,0,11061,0,0,11064,11065,0,11066,0,11069,0,
+11085,0,0,0,0,0,11086,0,0,0,11088,0,0,0,11094,0,0,0,11095,11096,0,0,0,0,0,0,
+11097,11098,0,0,0,0,0,0,11099,0,0,11102,11108,0,0,0,11109,0,11114,11119,0,11131,
+0,0,0,11142,0,0,11143,0,11146,0,11147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11148,0,
+11149,11152,11153,11154,0,11156,0,11157,0,0,0,11158,0,0,11159,11160,0,0,0,0,0,0,
+0,0,0,0,0,0,11163,0,0,11164,11166,0,0,0,11172,11174,0,0,0,11176,0,0,0,0,0,11182,
+11183,0,0,0,11184,11187,0,0,11188,11189,0,0,0,0,0,0,11194,0,0,0,0,0,0,0,11200,
+11202,0,0,0,0,0,0,11203,0,11204,0,0,0,0,0,11205,0,0,0,11206,0,11207,0,0,11209,0,
+11211,0,11214,0,0,11231,0,0,0,11293,11295,0,0,11296,11297,11302,0,0,0,11307,0,0,
+0,0,11309,11310,0,11311,0,0,0,11313,0,11314,0,0,0,0,11334,0,11338,0,0,0,11339,0,
+0,0,0,0,11340,0,11341,11342,0,11344,0,11345,0,0,0,11348,11349,0,0,11350,0,0,0,
+11355,0,0,0,0,0,0,11356,0,11357,11370,0,0,11371,0,11374,11376,0,0,0,11377,0,0,
+11378,11383,0,11386,11399,0,11400,11406,0,0,0,11408,0,0,11409,11412,0,0,0,0,
+11417,0,0,0,11418,0,11421,0,11426,11429,0,0,0,0,0,11430,0,11437,0,11438,0,0,0,0,
+0,11440,11453,0,0,0,0,0,0,11454,0,0,0,0,11455,0,0,11456,11460,11461,11463,0,
+11469,0,11473,0,0,0,0,11474,0,0,0,11475,0,11476,11477,11480,0,0,0,0,11481,0,0,
+11484,0,0,11487,0,0,0,0,0,0,0,0,0,0,11497,0,0,11502,0,11509,0,0,11510,11511,
+11513,0,0,0,0,0,0,0,0,0,0,11515,0,0,0,0,11516,0,11520,11521,0,0,0,0,0,0,0,0,0,0,
+0,11529,11530,11531,11534,0,0,11543,0,0,0,0,0,11547,0,11548,0,0,0,0,0,11552,
+11556,0,11557,0,0,11559,0,11560,0,0,0,0,0,0,11561,0,0,11563,11564,0,11565,0,0,0,
+0,11567,0,0,0,11569,0,11574,0,11575,0,0,0,11577,0,11578,0,0,0,11580,11581,0,0,0,
+11582,11584,0,0,0,0,0,0,0,11587,0,11588,11591,0,11595,0,0,0,0,0,0,0,0,11596,0,
+11597,0,0,0,0,11598,11601,0,0,0,11602,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11603,
+11604,0,11606,0,0,11608,0,0,0,0,11610,0,0,11611,0,0,0,0,11613,0,11622,0,0,0,
+11623,0,0,0,0,11625,0,0,11626,11627,11628,11630,0,0,0,0,0,0,11639,0,0,11646,0,
+11648,11649,0,11650,0,0,0,0,0,0,0,0,0,11651,0,0,11652,11653,11656,0,0,11677,
+11679,0,0,0,0,11680,0,0,11681,0,11685,0,0,0,0,0,0,0,0,11688,0,0,0,11716,0,11719,
+0,0,0,0,0,11721,0,0,11724,11743,0,0,0,0,0,0,0,0,11745,11748,11750,0,0,0,0,0,
+11751,0,0,0,11752,11754,0,11755,0,0,0,0,0,0,0,11759,0,0,0,0,0,0,11760,0,0,0,
+11761,0,0,0,0,0,0,11766,11767,0,11772,11773,0,11774,0,0,11775,0,11777,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,11778,11780,0,0,0,0,0,0,0,11783,0,11784,0,0,0,11785,
+0,0,0,11786,0,0,0,0,11788,0,0,11789,11791,11792,0,0,0,0,11795,11834,11835,11836,
+0,0,11837,0,0,0,11838,0,0,11846,11851,0,11852,0,11869,0,0,0,11871,0,0,0,11872,
+11874,0,0,0,0,0,0,11875,0,11876,11877,0,0,0,0,0,0,0,0,0,0,11883,0,0,0,0,0,0,0,
+11884,0,11885,0,11886,0,0,11887,0,11894,11895,11897,11909,11910,0,11912,11918,0,
+0,11920,0,11922,11924,11927,11928,0,0,0,0,11929,0,11934,0,0,0,0,0,11941,11943,
+11944,0,11945,0,0,0,0,11948,11949,0,0,0,0,11953,0,11954,0,11955,0,11956,0,0,0,0,
+0,11957,0,0,11959,0,0,0,0,0,0,0,0,11961,0,0,0,0,0,11978,0,0,0,11979,11980,11986,
+11987,0,11992,0,0,0,0,0,11993,0,0,0,11994,0,11999,12004,12005,12006,0,0,0,0,0,
+12011,0,0,12012,12014,0,0,12015,0,0,12019,12028,0,0,12029,0,0,12032,12033,0,0,0,
+0,12034,0,12041,12043,0,0,12044,0,0,0,0,0,0,0,12046,0,0,0,0,0,0,0,12054,12055,0,
+12056,0,0,0,12060,12064,0,0,0,0,0,12065,12067,12068,0,0,0,0,0,0,0,0,12074,0,0,0,
+12075,12076,0,0,0,12079,0,12081,12086,12087,0,0,12088,0,0,0,0,12089,0,12092,0,0,
+0,0,12097,0,0,0,0,0,0,0,0,12098,0,0,0,0,0,0,0,0,0,0,0,0,0,12102,12103,12104,
+12111,0,0,12114,12116,0,0,0,12118,0,0,0,12119,12120,12128,0,0,0,0,12130,0,0,0,0,
+0,0,12131,0,0,0,12132,12134,0,0,0,0,12137,0,12139,0,12141,0,0,12142,0,0,0,12144,
+0,0,0,0,0,12145,0,12148,0,12153,0,0,0,0,12154,12171,12173,0,0,0,12175,0,0,0,0,
+12178,0,0,0,0,0,0,0,12183,0,0,0,0,0,0,0,0,12184,0,0,0,12186,0,0,0,0,0,12187,
+12188,0,0,12189,0,12196,0,12197,0,0,12198,0,12201,0,0,0,0,12203,0,12209,0,0,0,0,
+12210,12211,12212,12213,0,12217,12218,0,0,0,0,0,0,0,0,0,12222,0,0,0,0,0,0,0,
+12223,0,0,12229,0,0,0,0,12233,0,0,0,0,12234,0,0,12236,12242,0,0,0,12243,0,0,0,
+12244,12253,0,12254,12256,0,12257,0,0,12275,0,0,0,0,0,12277,0,0,0,0,0,12278,0,
+12289,0,0,12290,0,12292,12293,0,0,12294,0,12295,0,0,12296,0,12297,0,12298,0,0,0,
+0,12301,0,0,0,0,0,0,0,0,0,0,0,0,0,12309,0,12338,12340,0,0,0,0,12341,0,0,0,0,0,0,
+0,0,12342,12343,0,12344,0,0,0,0,0,0,0,0,0,12345,0,0,0,0,0,0,0,0,12346,0,0,0,0,
+12348,0,0,0,0,0,0,0,0,0,0,0,0,12350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12351,0,12355,
+12356,12357,0,0,12367,12370,12371,0,0,0,0,0,12372,12376,0,0,0,0,0,0,0,0,12379,0,
+12382,0,12383,0,0,12384,0,0,0,0,12393,0,0,12394,0,0,0,0,12398,12403,0,0,12404,0,
+0,0,0,0,0,0,0,0,0,0,0,0,12410,0,0,0,12411,0,0,0,12412,0,0,0,0,12420,0,12421,0,0,
+0,0,0,12423,0,12425,12429,0,0,0,12431,12432,0,0,0,0,0,0,0,0,0,0,0,0,12434,0,0,0,
+0,0,12435,12436,0,0,0,0,0,0,0,0,12437,0,0,0,0,0,12438,0,0,0,0,0,0,0,0,12445,0,0,
+0,12450,12451,0,0,0,0,0,0,0,0,12452,12475,0,0,12493,12494,0,0,0,12495,0,0,0,0,
+12496,12502,12509,0,0,0,0,12510,0,12512,12513,0,0,0,0,12514,0,0,0,12515,0,12520,
+0,0,0,12524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12527,0,0,0,12528,0,0,0,12529,0,0,0,
+0,0,12530,0,12535,0,0,12536,0,12538,0,0,0,0,0,0,0,0,0,0,0,0,12540,0,12548,0,0,0,
+0,0,12550,0,0,0,12551,12552,0,0,0,12554,0,0,0,0,0,0,0,0,12555,0,0,12562,0,12565,
+0,12566,0,0,0,0,0,0,0,0,0,0,0,0,12569,0,0,0,12571,12574,0,0,0,0,0,0,0,12577,0,0,
+0,0,0,0,0,12578,12579,12603,0,12608,0,0,12611,0,12612,0,12615,0,12625,0,0,0,0,
+12627,12646,0,12648,0,0,12657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12670,0,0,12671,0,
+12673,12677,0,0,0,0,0,0,0,0,0,0,0,12679,0,12681,0,12682,12693,0,12694,0,12697,0,
+12701,0,0,0,12703,12704,0,0,0,0,12707,12737,0,0,12739,0,0,12740,0,0,12742,12743,
+0,0,0,0,0,0,0,0,0,12745,0,12746,12747,0,12748,0,0,12759,12767,0,0,0,0,12773,0,
+12774,12778,0,0,0,0,0,0,0,12779,0,0,0,0,0,12780,12793,0,12824,0,12825,0,12836,0,
+0,0,0,12839,0,12842,0,0,0,0,0,0,0,0,0,0,0,0,12843,12845,0,12846,0,0,0,0,12847,0,
+0,12850,12852,12853,0,0,0,12854,0,0,0,12855,0,12856,0,12858,0,0,12859,0,12862,0,
+12863,0,0,12866,0,12869,12872,12873,0,0,0,0,0,0,0,0,0,12875,0,12877,0,0,12878,0,
+0,0,0,0,0,0,0,0,12884,12885,12888,0,12889,0,0,0,0,12893,0,0,0,12895,12896,12898,
+0,0,0,0,0,0,0,12902,0,12909,12910,0,12926,0,12928,0,0,0,12929,0,12930,0,0,0,0,
+12931,0,12932,12933,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12934,0,12942,0,0,0,0,12944,
+0,0,0,0,0,0,0,0,12946,0,0,12948,0,0,12949,0,0,0,0,12950,0,0,0,0,12951,0,12952,0,
+12953,0,0,0,12954,12958,12959,0,0,0,0,0,12960,12964,0,0,0,0,0,12966,0,0,0,0,0,0,
+0,0,12970,0,12971,0,0,0,0,0,0,12972,0,0,12982,0,0,0,12984,12985,0,12986,12996,
+12997,13001,13002,0,0,0,0,13004,0,0,13005,0,0,13007,13009,0,13017,0,0,0,13020,0,
+13021,0,0,0,0,0,0,0,0,0,0,13022,0,0,0,0,0,0,0,0,13024,13027,0,0,0,0,0,13028,0,0,
+13029,0,0,0,0,0,0,0,13032,0,13037,0,0,0,0,0,0,13040,0,0,13041,0,0,0,13043,13044,
+13046,0,0,0,0,13047,0,0,0,0,0,0,0,13049,13054,0,13056,0,0,13060,13061,0,0,0,0,0,
+13067,0,0,13068,0,13071,0,0,0,0,0,13077,13078,0,0,0,0,0,13079,13080,13081,0,
+13082,0,0,0,13085,0,0,0,0,0,0,0,13086,0,13087,13088,0,0,0,0,0,13094,0,13099,0,
+13100,0,0,0,13101,0,13125,13126,13128,13129,0,0,13130,0,13131,0,0,0,0,0,0,13134,
+0,0,0,0,0,0,0,0,0,0,0,13150,0,13168,0,0,0,0,0,0,0,0,0,13169,0,0,13170,0,0,0,0,
+13174,0,0,0,13176,0,0,0,0,0,13177,0,13178,13183,13187,0,0,0,13189,0,0,13190,0,0,
+13191,0,0,13206,0,0,0,13207,0,0,0,0,0,0,0,0,0,0,13212,0,0,13219,13232,0,0,0,
+13241,0,13249,13253,0,0,0,0,0,13255,13259,0,13260,13261,0,13262,0,13272,0,0,0,0,
+13276,0,0,0,0,13277,13299,0,0,13301,13302,0,0,13303,0,0,13305,0,13310,0,0,0,
+13311,0,0,0,0,13325,0,13328,0,0,0,13329,0,0,0,0,0,0,13330,0,0,13331,0,13335,0,0,
+13342,0,0,0,0,0,13343,0,13354,0,13362,0,13366,13367,13369,0,0,13371,13372,0,
+13373,13374,0,13376,0,13380,13381,13386,0,13387,13388,0,13389,13391,13395,0,0,0,
+0,0,13401,13409,0,13410,0,0,0,0,13420,0,0,0,0,0,13422,0,0,0,0,13423,0,0,0,0,
+13425,0,0,0,0,0,13427,0,0,0,13428,0,0,13430,13438,0,13439,0,13445,0,13448,13449,
+0,0,0,0,0,0,13451,0,13457,0,0,0,0,13458,13459,0,13460,0,0,0,0,13464,13465,13466,
+13470,0,13471,13472,13474,13475,0,13476,0,0,13478,13479,0,13481,0,0,0,0,13487,0,
+13490,0,13493,0,0,13494,0,0,13495,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13496,13497,0,
+13500,0,0,13516,13522,0,0,13525,13528,0,0,0,13530,13535,0,13537,13539,0,13540,0,
+13543,0,13544,0,0,0,0,0,0,13545,0,0,0,0,0,0,13547,0,0,0,13549,13555,0,0,0,13556,
+13557,0,0,0,0,0,0,0,13558,0,13563,0,0,0,0,13564,0,0,0,0,0,0,0,0,13566,0,0,0,0,0,
+0,13569,0,0,13571,0,0,0,0,13573,0,0,0,0,0,0,13578,0,0,0,0,0,0,0,0,0,0,13581,0,
+13586,0,13595,0,13600,0,0,0,0,0,0,0,0,13601,13603,0,13604,13605,13606,13607,0,0,
+13617,13618,0,0,0,0,0,0,0,13623,0,13625,13627,0,0,0,0,0,0,0,0,13629,0,0,0,13634,
+0,0,0,13638,0,0,0,0,0,0,0,0,13654,0,0,0,0,0,0,0,0,0,0,13656,0,13659,0,0,13660,0,
+0,13662,0,0,0,13663,0,13664,0,0,0,0,0,13668,0,13669,13671,0,0,13672,0,0,0,0,0,0,
+13675,13685,0,13686,0,0,0,13687,0,0,0,13692,13694,13697,0,0,0,13702,0,0,0,0,0,
+13705,0,0,0,0,13707,0,0,0,13714,0,0,0,0,0,0,0,0,0,13715,0,13716,13717,0,0,13719,
+13724,13730,13731,0,0,0,0,0,0,0,0,13732,0,0,0,0,0,0,0,13734,0,13736,0,0,13737,
+13738,13747,0,13751,0,0,13752,0,0,0,13753,0,13757,0,0,13762,13763,0,13764,13765,
+0,13766,0,0,13767,0,0,0,13768,0,0,0,0,0,0,0,13769,0,0,13772,0,13775,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,13776,13778,13787,0,0,0,13797,0,13798,0,13801,0,13804,
+13806,0,0,0,0,13816,13817,0,0,0,0,0,0,0,0,0,0,0,0,0,13834,0,13836,0,0,13838,0,0,
+13839,0,13840,0,0,0,0,13842,0,0,0,0,0,0,13843,0,0,0,0,0,0,0,0,0,13845,0,0,0,0,0,
+13858,0,0,13860,0,0,13861,0,0,13862,13863,0,13868,0,13869,13870,0,0,0,0,0,0,0,0,
+0,0,13872,0,0,0,0,13873,13878,0,0,0,0,0,0,0,0,0,0,13886,0,13888,13889,13890,0,0,
+13891,13894,0,13897,13899,13900,13904,0,0,13906,0,0,0,13909,0,0,0,13910,0,0,0,
+13911,0,0,0,0,0,13912,13917,0,0,0,0,13918,0,13919,0,0,13920,0,0,0,13921,0,0,
+13922,0,0,0,0,0,0,0,13924,0,13927,0,0,0,0,0,13932,0,13933,0,13934,0,0,13935,0,
+13944,0,0,0,13954,0,0,13955,0,0,0,0,13956,0,13957,0,13967,13969,0,0,0,0,0,0,0,0,
+0,0,0,0,13970,13990,0,13991,13994,0,13995,0,0,0,0,13996,0,0,13999,0,0,0,14018,0,
+14019,0,14021,0,0,0,0,0,0,14041,0,0,0,0,0,0,0,0,14043,0,0,0,0,14046,0,0,0,14048,
+14049,0,0,0,0,0,0,0,0,0,0,14051,0,0,14052,14056,0,14063,0,14064,14066,0,0,14067,
+0,0,0,0,0,0,0,0,0,14068,0,0,0,14072,0,14074,14075,0,14076,14079,14085,14086,
+14087,14093,0,0,0,0,14095,0,0,0,0,0,0,14096,14097,0,0,0,0,0,0,0,14098,0,14102,0,
+0,0,0,0,14103,0,0,0,14104,0,0,14105,0,0,0,14107,14108,0,0,14109,0,0,0,0,0,0,0,0,
+14117,0,0,0,0,14118,0,0,0,0,14119,0,0,14120,0,0,14121,0,14122,14127,0,14128,
+14136,0,0,14138,0,14140,0,0,0,14141,14142,0,0,0,0,14146,0,0,14149,0,14151,0,0,0,
+14152,0,0,14153,0,0,0,0,0,0,0,0,0,14154,0,14156,14157,0,0,14159,0,14161,0,0,0,0,
+14162,0,0,0,0,0,0,14163,0,0,14173,0,0,0,0,0,0,14174,0,0,14176,0,0,14178,0,0,
+14179,14181,0,0,14182,14185,14187,0,14190,0,0,14197,0,0,0,0,0,0,0,0,0,0,0,0,
+14198,0,0,0,0,0,0,14199,14200,0,0,0,14204,0,0,14208,0,0,0,0,0,0,0,0,0,0,0,14231,
+0,0,0,0,0,0,0,0,0,14234,0,0,14235,0,0,0,14240,14241,0,0,0,14246,0,0,0,14247,0,
+14250,0,0,14251,0,0,14254,0,0,14256,0,0,0,14260,0,14261,0,0,0,0,14262,14267,
+14269,0,0,14277,0,0,14278,0,14279,14282,0,0,0,14283,0,0,0,14284,14285,0,0,0,0,
+14286,0,0,0,14288,0,0,0,14289,0,14290,0,14293,14301,14302,14304,14305,0,14307,0,
+14308,14309,0,0,0,0,0,0,0,0,0,0,0,14311,14312,0,0,14317,0,0,0,0,0,0,0,14318,0,0,
+0,0,14320,0,0,0,0,14321,14322,0,0,0,0,0,14326,14329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14330,14331,0,0,0,0,14332,0,0,0,14333,0,0,14337,14340,0,14341,0,0,14342,0,14345,
+14346,0,0,14347,0,14362,0,0,0,0,0,14364,14365,14371,0,14373,0,0,14374,0,14379,0,
+14400,0,0,0,0,0,14401,0,0,14405,0,14406,0,14408,14409,0,0,0,14417,0,0,14424,0,0,
+0,0,0,0,0,0,0,14430,0,0,0,14431,0,0,14435,0,14440,0,0,0,0,0,0,14442,0,0,14443,0,
+0,0,0,0,14446,0,0,0,0,0,0,0,14454,0,14457,0,14460,0,0,14466,0,0,0,0,0,14467,0,0,
+0,0,0,0,14469,0,14477,0,0,0,0,0,0,14478,14482,0,0,0,14483,0,0,0,14485,14486,0,0,
+0,14487,14488,14489,14492,14493,14494,14495,14496,14497,0,14499,0,14501,0,0,0,0,
+0,0,0,0,0,0,14502,0,14507,14512,14513,14514,0,0,0,0,0,0,0,0,0,0,0,14515,14526,
+14530,0,14537,0,14544,0,14547,0,0,14548,14550,14551,0,0,14552,0,0,0,14553,0,
+14554,0,0,0,0,14556,14564,0,0,14565,14566,0,0,0,0,0,0,14568,0,0,14569,0,0,0,
+14571,14576,0,0,14577,14578,14579,0,0,14580,0,0,0,0,14582,0,0,0,0,0,0,0,0,0,0,0,
+0,14583,0,0,0,0,0,14587,0,14588,0,0,14600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,14601,0,0,14604,14605,14611,0,14613,0,0,0,0,14615,0,0,0,0,0,0,14627,0,14628,0,
+0,0,0,14631,0,14633,14634,0,0,0,0,14635,0,0,0,0,0,0,0,0,14636,0,0,14639,14642,0,
+0,0,0,14644,0,0,0,0,14645,14646,0,14653,0,0,14654,0,14658,0,14661,0,0,0,14665,0,
+0,0,14668,0,0,0,0,0,0,0,0,0,14669,0,0,14670,0,0,0,14680,0,0,14681,0,0,0,0,0,
+14682,14683,0,0,0,0,14686,0,0,0,0,14687,14697,0,0,0,0,14699,14705,14711,0,0,0,0,
+0,0,0,0,0,0,14712,0,0,0,14713,0,0,0,0,14719,0,14720,14721,14726,0,0,0,14728,
+14729,0,0,0,0,14731,0,0,0,0,0,0,0,14733,14736,14737,0,0,14740,14742,0,0,0,14744,
+14753,0,0,0,0,14755,14758,14760,0,0,0,0,0,14761,14762,14765,14771,0,14772,0,
+14773,14774,0,0,14775,0,0,14776,0,0,0,0,14777,0,14779,0,0,14782,0,0,14785,14786,
+14788,0,0,0,0,0,14795,0,0,0,0,0,0,14798,0,14803,14804,14806,0,0,0,14809,0,0,0,0,
+0,0,14810,0,0,0,0,14811,0,14812,0,0,0,0,0,14815,0,0,0,0,0,0,0,0,14816,0,14818,0,
+0,0,0,0,0,14819,0,14820,0,14823,0,0,0,14824,0,0,14826,14827,0,0,0,0,0,0,0,0,0,0,
+0,0,14830,0,0,0,0,0,14833,0,14845,0,0,0,0,0,14846,0,0,14847,14871,0,14873,0,
+14876,0,14877,14878,14880,0,0,0,0,0,14881,0,14882,14894,0,0,0,0,14895,0,14907,0,
+14908,0,0,0,0,0,0,0,14911,0,0,0,0,14920,0,0,14931,0,14932,14934,14935,0,0,14936,
+0,14945,0,0,0,0,0,0,0,14947,0,0,14948,14949,14951,0,0,14952,0,0,0,14964,14973,0,
+0,14990,0,0,0,0,14995,0,0,14998,15001,0,0,15002,15020,0,0,0,0,0,0,15021,0,15022,
+0,0,0,0,15023,0,0,15025,15029,15033,0,0,0,15034,0,0,0,15035,0,0,0,0,0,15043,
+15044,0,0,0,15045,15046,15048,15050,0,15065,0,0,0,0,15066,0,0,15075,15082,15084,
+0,0,15085,15086,0,0,0,0,0,0,0,0,15088,0,0,0,15089,0,0,0,0,15094,0,15096,0,15097,
+0,15100,0,0,15102,0,0,0,0,0,0,0,0,15105,0,0,15106,0,15109,15113,0,0,0,15115,0,
+15118,0,0,0,0,0,0,15119,0,0,15120,0,0,0,0,0,15123,15129,0,0,0,15130,0,15131,0,0,
+15134,0,15135,0,0,0,15137,15138,0,0,0,0,0,0,15139,0,0,0,0,0,15140,0,0,15154,
+15162,0,15169,15170,0,15175,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15177,0,15178,15179,0,
+0,0,0,0,15183,0,0,0,0,0,0,0,0,0,0,0,0,15185,15187,0,15194,15195,15196,0,0,0,0,0,
+0,0,15204,0,0,0,0,15206,0,0,0,0,0,15207,0,0,0,0,0,0,0,0,0,15213,0,15214,0,0,0,0,
+0,0,0,15232,0,0,0,0,15234,0,15238,15240,0,15248,0,0,0,0,15250,15251,0,0,0,0,0,0,
+0,15252,0,0,0,15255,15262,15266,0,0,0,15267,0,0,0,15277,15279,0,0,0,15280,15281,
+15282,0,0,0,0,0,15285,0,0,0,0,15289,0,0,15291,0,0,0,0,0,0,0,15296,15297,0,0,
+15304,0,0,0,0,15306,0,0,0,0,0,0,15307,15308,0,15309,0,0,15311,0,0,15312,15313,0,
+0,0,0,0,0,0,0,0,0,0,0,15314,15317,0,0,0,15318,15319,0,0,0,0,15320,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,15321,0,0,0,0,0,15324,0,15325,15326,0,15330,0,0,0,0,15334,0,
+15335,0,15341,0,0,15342,0,0,15343,15344,0,0,0,0,15345,0,0,0,0,15347,0,0,15348,
+15349,15350,0,15356,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15357,0,15358,0,0,0,0,0,0,0,
+15359,15360,15364,0,15380,0,0,0,0,0,15392,0,0,15393,0,15395,0,0,0,0,0,0,0,0,
+15396,0,0,15397,15398,0,0,0,0,0,0,0,0,0,15399,0,15400,0,0,0,15402,0,15405,15410,
+0,0,0,0,15411,0,0,0,15412,0,15416,0,0,0,0,0,0,0,15428,0,15435,0,0,15438,0,0,0,0,
+15439,0,0,0,15440,0,0,0,15441,15449,15451,0,0,0,0,0,0,0,15452,0,0,15455,0,0,0,
+15456,0,0,15458,0,15460,15461,0,0,0,0,0,15462,15464,0,15465,0,0,15466,0,0,15467,
+0,0,0,0,0,15468,0,0,0,0,15481,0,0,15484,0,15485,15486,0,0,0,15487,0,0,0,0,0,
+15488,0,15492,15498,0,0,0,15499,0,0,0,15500,0,15501,0,0,15512,0,15522,0,0,0,
+15524,0,15525,15526,0,0,15527,0,0,15545,15546,0,15548,15552,0,15553,0,0,0,15554,
+0,15555,0,15557,15565,15573,15577,15578,0,15582,0,15583,0,0,0,0,0,0,0,0,0,0,0,0,
+0,15586,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15588,0,0,0,0,0,15589,0,0,0,0,0,0,0,15593,
+15594,0,0,0,0,15595,0,0,0,0,0,0,15596,0,0,0,15597,0,0,0,0,15600,0,0,15601,0,0,0,
+0,15602,15603,0,0,0,0,0,0,15604,0,15609,0,0,15612,0,0,15613,0,0,15615,15617,
+15618,0,0,15620,0,15636,15637,0,0,15649,0,0,0,0,0,0,0,15650,0,0,15651,0,0,0,
+15656,0,15658,0,0,0,15664,0,0,15665,0,0,15668,0,0,0,0,0,15669,0,0,15674,0,0,
+15675,0,0,0,0,15676,0,0,0,0,0,0,0,0,0,0,0,15677,0,0,0,0,15678,0,0,0,0,0,15679,0,
+0,15681,0,15686,0,0,0,0,15687,0,15688,0,0,15690,0,0,0,15697,0,15699,15700,0,0,0,
+0,0,0,0,0,0,15701,0,15702,15703,0,15704,0,15705,0,15707,0,15709,0,15712,15716,0,
+15717,0,15718,15720,0,0,0,0,0,15724,0,0,0,15725,0,15726,0,0,0,15740,0,15745,
+15746,0,0,15747,0,15748,0,0,0,0,0,15749,0,0,0,15752,0,15753,0,0,0,0,0,0,15759,0,
+0,0,15765,0,0,0,0,0,0,0,0,0,15767,0,0,0,15771,0,0,15784,0,0,0,0,15785,15790,
+15791,0,0,15792,0,0,0,15807,0,15811,0,0,0,0,0,0,0,0,0,0,0,0,15818,0,0,0,15819,0,
+0,0,0,15821,0,0,0,0,0,15822,15824,0,0,15827,0,0,15829,15831,0,15832,0,0,15833,0,
+15835,15838,15839,15843,0,0,0,0,0,0,0,0,0,0,0,15844,0,0,0,0,15845,15851,15856,0,
+0,0,0,0,0,0,15858,15860,0,15861,0,0,0,15864,0,0,0,0,15865,0,0,0,0,0,0,15866,0,
+15872,0,0,15876,0,0,0,0,15877,15878,15883,15885,0,0,15888,0,0,0,0,0,15889,15890,
+0,0,0,0,0,0,0,0,15892,0,0,0,0,0,0,0,15893,0,0,15894,0,0,0,15895,0,15896,15897,0,
+15898,15901,15902,0,15911,15915,0,15916,0,15924,15935,0,15937,0,0,0,0,0,15950,0,
+0,0,0,0,0,0,15958,0,0,0,15961,0,0,15966,0,15967,0,0,15977,0,0,15978,0,0,15981,
+15982,15983,0,0,0,0,0,0,0,15986,0,0,0,15990,0,15991,15995,15998,0,15999,0,16000,
+0,0,0,0,16008,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16009,16011,0,16013,0,0,0,0,
+0,0,0,0,16014,0,0,16015,16023,16024,16025,0,0,16026,0,16030,0,16032,0,16033,0,0,
+0,0,0,0,16035,16036,16037,0,0,0,0,0,16039,0,0,0,0,16041,0,0,0,0,0,16043,16044,0,
+0,16047,0,0,0,16048,0,0,16049,16050,16052,0,0,0,0,0,16055,0,0,0,0,0,0,0,0,16056,
+0,0,0,0,0,0,0,16058,16060,16061,0,0,16063,0,0,16064,0,0,0,16067,16068,0,0,16069,
+16078,0,0,0,16079,0,0,0,16080,0,16081,0,0,0,16088,0,0,0,0,0,0,0,0,0,0,0,16089,
+16093,0,16097,0,16103,0,16104,16105,0,0,16256,0,0,16259,0,0,0,0,0,0,0,16260,
+16261,0,0,16262,0,0,16263,0,16268,0,0,0,0,0,0,0,16269,0,0,16270,16273,0,16274,0,
+0,0,0,16275,16276,16277,16280,0,0,0,16281,16284,0,0,0,16286,0,16289,0,0,0,0,0,0,
+0,0,0,16290,0,0,0,0,16291,0,0,0,0,0,0,0,16292,0,0,0,0,0,0,0,0,16293,16295,16297,
+0,16302,0,16304,0,16305,0,16306,0,0,0,0,0,0,0,0,0,0,0,0,16307,16308,16312,0,0,0,
+0,0,0,16313,16315,0,16318,0,0,0,16321,0,0,0,0,0,0,0,16326,16333,16336,0,0,0,0,
+16337,16340,0,0,0,0,0,16345,0,0,16346,0,0,0,0,0,0,0,0,0,16347,0,0,16348,0,0,0,0,
+16349,0,0,0,16350,0,16357,0,0,0,0,16359,16360,0,0,0,0,16362,16363,16364,16365,0,
+0,16366,0,0,0,0,16367,16368,0,16369,16374,0,0,0,0,0,0,0,16376,0,0,0,0,16378,
+16379,0,16380,0,0,0,16381,16383,0,0,0,0,0,16390,0,0,0,16399,0,16402,16404,16406,
+16407,0,0,0,16409,16411,0,0,0,0,16412,0,16413,16415,16423,0,0,0,0,0,16424,0,0,0,
+16428,16434,16435,16449,0,16450,16451,0,0,0,16453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+16454,0,0,16456,16458,0,0,16459,0,0,16460,0,0,0,0,16462,0,16463,0,0,16466,0,0,0,
+0,0,16479,0,0,16480,0,16481,16484,0,0,0,0,0,0,0,0,0,0,16485,0,0,0,0,0,0,16489,0,
+0,0,0,0,16491,0,0,16498,0,0,16503,0,16505,0,0,0,0,0,0,0,0,16506,0,0,0,16508,
+16509,0,0,0,0,0,0,0,0,16511,16513,0,0,0,16516,0,16517,0,16519,0,16529,0,0,16531,
+0,0,0,0,0,0,16534,0,0,16541,16542,0,0,0,0,0,0,0,0,0,16543,16547,16548,0,0,0,
+16551,0,16552,0,0,0,16553,0,0,16558,0,0,16562,16565,0,0,0,16570,0,0,0,16573,
+16585,0,0,0,16586,16587,16595,0,16596,0,16598,0,0,0,16600,0,0,0,0,0,0,0,0,0,0,0,
+0,0,16601,0,0,0,0,16603,0,0,0,0,0,0,0,16604,16612,0,0,0,0,16613,0,16618,0,0,0,
+16640,0,0,16641,0,0,0,0,0,0,16645,0,0,0,0,16646,0,0,0,0,0,0,16651,0,0,0,0,16653,
+16654,0,0,0,16655,0,0,16656,16667,0,0,0,0,16671,0,16672,0,0,0,16673,0,0,0,0,0,
+16676,0,16686,0,0,0,0,16689,0,16690,0,16692,0,16693,0,16694,0,16696,0,0,0,16705,
+0,0,0,0,0,0,16707,0,0,0,16709,0,0,0,0,16711,0,16712,16713,0,0,0,16715,0,0,0,0,
+16716,0,0,0,0,0,0,0,0,0,16718,16724,0,0,16726,16727,0,0,0,0,0,0,0,16728,0,16729,
+0,0,16730,0,0,0,0,0,16731,0,0,0,16732,0,0,0,0,16734,16738,0,0,0,0,0,0,0,0,16743,
+0,0,16745,0,0,0,0,0,16749,0,16752,0,0,0,0,16756,0,0,16758,0,16759,0,0,0,0,0,
+16760,0,0,0,0,0,0,0,16762,0,16769,0,16770,0,16772,0,0,0,16777,16780,0,0,0,0,0,0,
+16781,0,0,16782,0,16784,0,0,16785,16787,16792,0,0,16794,0,0,0,16798,0,0,16809,0,
+0,16814,16816,16817,0,16819,0,0,0,0,0,0,0,0,0,0,16820,0,0,16836,16839,0,0,16841,
+16851,16857,0,0,16858,16859,0,0,16860,0,0,0,0,0,0,0,0,16862,0,16863,0,0,0,0,0,0,
+0,16864,0,0,0,0,0,0,0,16876,0,16881,16882,0,16885,16886,0,16887,0,0,0,16889,
+16891,0,0,0,0,0,16894,16895,0,0,0,0,0,0,0,0,0,0,0,16897,0,16898,0,0,0,0,0,16913,
+0,0,16924,16925,16926,0,0,16927,0,0,0,16937,16938,0,0,0,16940,16941,0,0,0,16942,
+16945,0,16946,16949,16950,0,0,0,16952,16955,0,0,0,16965,0,16969,0,0,16975,0,0,
+16976,0,0,0,0,16978,0,0,16981,0,16983,16989,0,0,0,0,16990,0,0,16991,0,0,0,16993,
+0,16994,16996,17000,0,0,0,0,0,17002,17004,0,17006,0,0,17007,0,0,0,0,17008,17013,
+17014,0,0,0,0,0,0,0,0,0,17021,0,17031,0,0,0,0,0,17033,17036,0,17038,0,0,17039,0,
+17045,0,0,17046,17047,0,0,0,0,17048,0,17049,17050,0,17051,17053,0,17054,0,17055,
+0,0,0,0,0,17063,0,0,17064,0,0,0,0,0,0,0,17065,0,0,17068,0,0,0,0,0,17072,0,0,0,0,
+0,0,17073,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17074,0,17080,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,17081,17083,17084,0,0,0,17085,0,0,0,0,17092,0,0,0,0,0,0,0,
+0,0,17093,0,17095,17102,0,0,0,0,0,0,17103,0,0,17105,0,17107,0,0,0,0,17114,0,0,0,
+0,0,17115,17125,17127,0,0,17128,0,0,0,17129,17130,0,17131,0,0,0,0,0,17132,17135,
+17145,0,0,0,0,0,0,0,0,17146,0,17147,0,17148,0,0,0,0,0,0,17149,17150,0,17151,
+17153,0,17155,0,0,0,0,17163,17171,0,17174,0,0,0,0,17179,0,0,17182,17185,0,0,0,0,
+0,17186,0,0,17188,0,0,0,0,0,0,0,17189,17191,0,17194,0,0,0,0,0,0,0,0,0,17195,
+17196,17203,17204,0,0,17205,17217,0,0,0,0,0,17218,0,0,0,0,17219,0,17220,0,17221,
+0,0,17230,0,0,0,0,0,17236,0,17238,17239,0,0,0,17241,17244,0,0,17245,0,17248,0,0,
+17251,0,17252,0,0,17264,0,17266,0,0,0,17268,0,0,0,0,17271,17272,0,17273,0,17295,
+0,17302,0,17305,0,0,0,17306,0,0,0,0,0,0,0,17308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+17309,0,17310,17313,0,0,0,0,17314,17315,0,17317,0,0,0,0,17318,0,0,0,0,0,0,0,
+17320,0,0,0,0,0,0,17334,0,17344,17348,0,0,0,17350,17351,0,0,17353,0,0,17354,0,0,
+0,0,0,0,0,0,0,17355,0,0,0,0,0,0,17356,17357,0,0,17359,0,0,0,17371,0,17372,0,0,0,
+17393,0,0,0,0,17394,0,0,0,0,0,17395,0,0,17399,0,0,0,17401,17417,0,17418,0,17419,
+0,0,0,0,0,17422,17423,0,0,0,0,0,17424,0,0,0,0,0,17428,17429,17433,0,0,0,17437,0,
+0,17441,0,0,17442,0,0,17453,0,0,0,0,0,0,0,0,17454,17456,17462,0,0,17466,0,0,
+17468,0,0,17469,0,0,0,0,17470,0,17475,0,0,0,0,0,17479,0,0,0,17483,17484,0,17485,
+0,17486,0,17491,17492,0,0,17493,0,17494,17495,0,0,0,17496,0,0,0,17497,0,0,0,
+17502,0,0,0,0,0,17503,0,17505,0,17507,0,0,0,17512,17513,17514,0,0,17515,0,0,0,
+17519,0,0,0,17522,0,0,17523,0,0,0,0,0,0,0,0,0,17527,0,0,0,17528,0,0,0,17534,0,0,
+0,0,17536,0,0,0,17539,0,17540,17543,17549,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17556,
+0,0,17558,0,17559,0,0,17560,0,0,0,17563,0,0,0,0,0,0,17564,0,0,17565,17566,0,
+17567,0,0,0,0,0,0,17569,17570,0,17575,0,0,0,0,0,0,0,0,0,0,0,17581,0,0,0,17582,
+17583,0,17586,0,0,17587,0,0,0,0,0,0,0,17588,0,0,0,0,17596,17597,0,0,17598,17600,
+0,0,0,0,0,0,17601,0,0,0,17604,0,0,17605,0,0,17607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,17612,0,0,17618,0,17621,17622,0,0,0,0,17623,0,0,17624,0,0,17630,0,0,
+17631,17633,17634,0,0,0,0,0,0,0,17635,0,0,17636,0,0,17637,0,17638,0,17640,0,0,0,
+0,0,0,0,0,0,0,17641,0,0,0,0,0,0,0,0,0,0,17643,0,0,0,0,17645,0,0,0,0,0,0,0,0,
+17646,17662,0,0,0,0,0,0,0,0,0,17663,17664,0,17665,17666,0,0,0,17669,17671,17673,
+0,17679,0,0,0,0,0,0,0,17684,0,0,0,17686,0,17714,0,0,17720,17722,17726,0,0,17728,
+0,0,17729,0,0,0,17732,0,17733,0,17734,0,0,0,17735,0,0,0,0,17737,0,0,0,0,17739,0,
+0,0,17741,17742,0,0,0,0,17743,17744,17745,0,0,0,17749,0,17750,17751,17752,17754,
+17761,17762,0,17763,0,17766,0,17772,0,0,0,0,0,17775,0,0,0,0,0,0,0,17776,0,0,
+17777,0,0,17778,17779,0,17782,17783,0,0,0,0,0,0,0,0,0,0,17784,0,0,0,0,0,0,0,
+17821,0,0,0,17822,0,0,0,17823,17825,0,0,0,0,0,17826,17831,17832,17833,0,0,17845,
+0,0,0,17846,0,0,0,17848,17850,17854,0,17855,0,0,17859,0,0,0,0,0,0,17860,17861,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17870,17871,0,0,0,0,0,0,17872,0,0,0,17879,0,
+0,0,17881,17883,0,17884,0,17885,0,0,17886,0,0,17887,17891,17953,0,0,0,0,17954,0,
+0,17955,0,17968,0,0,17972,0,0,0,0,0,17974,0,0,0,0,17976,17978,0,0,17983,0,0,0,0,
+18003,0,0,0,0,0,18007,0,0,0,0,0,18009,0,0,0,0,0,0,0,18010,0,0,0,0,0,0,18012,0,0,
+18014,0,0,0,18015,0,0,0,18016,0,18017,0,0,0,18030,0,0,0,0,0,0,0,18031,0,0,18036,
+18037,18038,0,0,18049,18056,0,18057,18058,0,18059,0,0,0,0,0,0,0,0,18062,0,0,0,0,
+18064,0,0,0,0,0,0,0,0,18067,0,0,0,18068,0,0,18075,0,0,18078,18093,18094,0,0,0,0,
+0,0,0,0,18097,0,0,0,0,0,18098,18100,0,0,0,18108,0,18111,0,0,18112,0,18113,0,0,
+18115,18116,0,18118,0,0,0,0,18121,0,0,0,0,18123,0,0,0,0,0,0,0,0,0,18124,0,0,0,0,
+18125,18126,0,18127,0,0,18128,18135,0,0,0,0,0,0,0,0,0,18150,0,0,0,0,0,18151,
+18152,0,0,18156,18164,0,18166,18171,0,0,0,0,0,0,0,0,0,18172,18183,0,18184,0,0,0,
+0,18185,0,18187,0,0,0,0,0,18188,0,0,0,0,0,0,0,0,18189,0,0,18190,0,0,18191,18192,
+0,0,18194,18195,18196,0,0,0,18197,0,18203,0,18204,0,0,0,0,18205,0,0,0,18207,
+18208,0,0,18214,0,0,0,18215,18216,0,0,0,18220,0,0,18222,0,0,0,0,0,18223,0,18225,
+18231,0,18234,0,18235,0,0,0,0,18240,0,0,18241,18242,0,0,0,0,0,18243,18251,0,
+18253,0,18254,0,0,0,18266,0,0,0,0,0,0,18269,18270,18271,18273,18281,0,0,0,0,0,0,
+0,0,0,0,0,0,18282,0,18283,0,18284,0,0,0,0,0,0,18285,0,18287,18289,0,0,18290,0,0,
+0,0,18308,0,0,0,18310,0,0,0,0,0,0,0,0,0,0,0,0,18311,0,18312,18313,0,18315,0,0,
+18316,18320,0,18331,0,18332,0,18336,0,0,0,0,18337,0,18340,0,0,0,0,0,0,0,0,0,
+18341,0,18344,18345,0,18346,0,0,0,0,0,18348,0,18351,0,0,18356,0,0,0,0,0,0,18357,
+0,0,0,0,0,18367,0,0,0,18368,0,18369,0,18370,18371,0,0,0,18437,18444,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,18445,18450,0,0,0,0,18451,0,18452,0,0,0,18453,0,0,0,0,0,18455,0,
+0,0,18456,0,18457,0,18460,0,0,18461,0,0,0,0,0,0,0,0,18466,0,0,18467,0,0,0,0,
+18473,0,0,0,18476,0,18477,0,0,0,18478,18479,18480,0,0,0,18485,0,0,0,18486,0,0,0,
+0,0,0,18488,18490,0,0,0,0,0,0,18491,0,0,0,0,0,18495,0,0,18496,0,0,0,0,0,0,18505,
+0,18521,0,18522,18523,0,0,0,18525,18526,0,0,0,0,0,18527,0,0,0,0,18532,18533,0,
+18534,0,0,0,0,0,0,18535,18537,0,18538,0,0,0,0,0,0,18540,18541,18542,18543,0,
+18546,0,0,0,0,18553,18556,0,0,18558,0,0,18569,18571,0,0,0,18572,0,18574,0,0,0,0,
+18586,0,0,0,0,0,18588,0,0,18589,0,0,0,0,0,0,18590,0,18592,0,0,0,0,18594,0,0,0,
+18596,0,0,18597,18598,0,0,18601,0,0,0,0,18602,0,0,0,18603,18604,0,18605,0,0,0,0,
+18608,0,0,18611,0,0,0,0,0,0,0,0,0,18612,0,18616,0,0,18617,18619,0,0,0,18628,0,0,
+0,18629,0,0,18630,0,0,0,0,0,0,0,18631,0,18632,0,0,18635,18637,0,0,0,0,0,0,18641,
+18643,18648,0,18652,0,0,18653,0,18655,18656,0,0,0,18657,0,0,18666,18674,0,0,0,0,
+18677,18684,18685,0,0,18686,0,0,18690,0,0,0,0,0,0,0,18695,18696,0,0,0,0,0,0,0,0,
+0,0,18697,0,0,18700,0,0,0,0,0,0,18702,0,18708,0,0,18709,0,18710,0,0,18711,0,
+18714,0,0,18718,0,0,0,0,0,0,18719,0,0,18722,0,18726,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18731,0,0,0,0,0,18739,18741,0,0,18742,0,18743,18744,18746,18748,0,18752,18753,0,
+0,18754,18763,0,18765,0,0,0,18766,0,0,0,18769,0,0,0,0,0,18773,18778,18779,18781,
+0,0,18784,18787,0,18788,0,18793,0,0,0,0,0,0,18795,0,0,18800,0,0,0,0,0,18801,
+18804,0,0,0,0,0,0,0,18806,0,0,0,18811,18815,18816,0,0,0,0,18825,0,0,18827,18829,
+0,0,18830,0,0,0,0,18831,0,0,18832,0,0,0,0,18833,0,18840,0,18841,0,18842,0,0,0,0,
+18843,0,18844,0,0,0,0,0,0,18845,18846,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18848,0,0,0,18853,18860,0,0,18862,18866,0,0,18867,18869,0,0,18874,18881,18891,0,
+0,0,0,0,0,0,0,0,0,18892,0,0,0,0,0,0,0,0,18895,0,18896,0,0,0,18900,0,0,0,18901,0,
+18902,18915,18916,0,0,0,0,0,0,0,0,18919,0,0,0,0,0,18920,0,0,0,18921,18929,0,0,0,
+0,18930,0,0,0,0,0,0,18932,0,0,0,0,18934,18942,0,0,0,18951,18957,0,0,0,0,18958,0,
+0,0,0,18959,18960,0,0,18961,0,0,18962,0,0,0,0,18963,18964,0,0,0,18965,0,18967,0,
+0,0,0,0,0,0,0,0,18968,0,18969,0,18970,18973,18976,0,0,0,0,0,0,18977,0,0,0,18981,
+0,0,0,18990,0,18998,0,0,0,0,0,18999,19003,0,0,19005,0,0,0,19006,0,0,0,0,0,0,
+19008,19011,0,0,19018,0,0,19019,0,19024,0,19031,19032,0,19039,0,19041,19050,0,0,
+0,19051,19055,19056,0,19059,19063,19064,0,0,19088,0,0,0,19093,19094,0,0,0,0,
+19095,0,19096,0,0,0,19097,0,0,19098,0,19099,19100,0,0,19103,0,0,0,0,0,0,0,19111,
+0,0,0,0,0,0,19112,0,0,0,19116,19117,0,19121,19122,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,19123,19124,0,0,0,0,0,0,0,19125,19126,0,19128,0,0,0,0,0,0,0,0,0,0,
+19129,19130,19131,19132,0,0,19146,0,0,19147,19156,19158,0,0,0,0,0,0,0,0,19182,
+19185,0,0,19187,0,0,0,19193,0,0,0,0,0,19194,0,19197,0,0,0,0,19198,0,0,0,0,0,0,0,
+0,0,0,19202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19203,0,19205,19210,
+0,0,0,19213,0,19218,0,0,0,19223,19229,0,0,19230,0,0,19231,19232,19233,19239,0,0,
+0,0,0,19240,0,19248,19249,0,0,0,0,19254,0,19256,19258,19259,0,0,19261,0,19266,0,
+0,0,19272,0,19278,19281,19282,0,0,0,0,0,0,0,0,0,0,0,0,19283,0,0,19284,0,0,19285,
+19287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19288,19291,0,19292,0,0,0,0,19297,0,19298,0,0,
+0,0,19302,19303,0,0,0,0,19304,19305,0,0,0,0,19314,0,0,19315,0,0,19321,0,0,0,0,0,
+0,0,19322,0,19333,0,19334,19335,0,19336,19337,0,0,0,0,0,0,0,0,0,0,0,19346,0,0,
+19353,0,19354,19362,0,19366,19367,0,0,19369,0,19375,0,19377,19380,19388,0,0,0,0,
+0,19389,19390,0,0,0,0,19392,0,0,0,0,0,19402,0,0,0,0,0,0,0,0,19412,0,0,19413,
+19422,0,19424,0,0,0,19425,0,0,0,19428,0,0,0,0,19431,0,0,0,0,0,19432,0,0,0,0,0,
+19448,19459,0,0,19461,0,19462,19463,0,19467,19474,19482,0,0,0,0,19494,0,0,0,0,
+19501,0,0,0,0,0,0,0,0,0,0,19502,19504,0,0,0,0,0,0,0,19505,0,0,0,0,19506,19507,0,
+0,0,19508,0,0,19511,0,0,19514,0,19515,0,19516,0,19518,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,19530,0,19537,19538,0,19543,19546,0,19547,19551,0,0,0,0,0,0,19552,
+19553,0,0,0,0,0,0,0,0,0,0,0,0,19555,0,0,19556,0,0,0,0,0,0,0,0,0,0,0,0,19560,
+19561,0,0,19562,0,0,0,0,0,0,19565,19567,0,19568,0,0,0,19569,19570,0,19578,0,0,0,
+0,19580,0,0,0,0,19581,19584,0,0,0,0,0,0,0,19585,19586,0,0,0,19587,19588,0,19589,
+0,0,0,0,0,0,19592,19593,19599,0,19600,0,0,19604,0,0,19605,0,19606,19608,19610,0,
+19613,19614,0,0,0,0,0,0,19616,19617,0,0,19618,0,0,19619,0,0,0,19620,19621,19631,
+0,0,19632,19634,19636,0,19643,0,0,19644,19658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,19659,0,0,0,0,0,0,0,0,0,0,0,19675,19677,0,0,0,0,19679,0,19683,0,19684,0,0,
+0,0,0,0,19687,0,0,0,0,0,0,0,0,19688,19689,19692,0,0,0,0,0,0,0,19695,19697,0,0,0,
+0,0,19698,19699,0,0,19700,0,19702,0,0,19703,0,0,0,0,0,0,19704,19708,0,19710,0,
+19713,0,0,0,19715,0,0,0,0,19718,0,0,0,0,0,0,0,19720,0,19722,0,0,19725,0,0,0,0,0,
+0,0,0,0,0,0,0,0,19730,0,0,0,0,0,19731,0,19734,19735,19739,0,0,19740,0,19741,0,0,
+0,19746,0,0,19747,0,19771,0,0,0,0,0,0,0,0,19772,19775,0,0,0,0,0,0,19778,0,0,0,0,
+0,19779,0,0,19780,19790,0,19791,0,0,19792,0,0,0,19793,0,0,19796,19797,0,0,0,
+19799,0,0,0,19801,0,0,0,0,19803,0,19804,0,19805,0,0,19807,0,0,0,19808,0,0,0,0,0,
+0,19809,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19816,0,19821,0,19822,19830,19831,0,0,
+0,19833,0,0,0,0,0,0,0,0,0,0,19838,0,0,0,0,19839,0,0,19843,0,0,0,0,19845,0,0,0,0,
+19847,0,0,19848,0,19849,0,0,0,0,0,0,0,19851,0,0,0,19854,0,0,0,0,0,0,0,0,0,19864,
+0,19865,0,19866,0,0,0,0,0,0,0,19868,0,0,19870,0,0,19871,0,0,19872,19873,19875,0,
+19880,19882,19884,0,0,19885,19886,19888,0,0,0,0,0,0,0,0,0,0,0,0,19890,19892,
+19893,0,0,19894,0,0,0,19895,0,19896,19902,0,0,19903,0,0,19905,0,0,0,19906,0,
+19908,0,19909,19911,0,0,0,19913,19920,0,19938,19939,19940,0,0,0,0,0,0,0,19942,0,
+19943,0,19945,0,0,0,19951,19952,19954,19960,0,19965,0,19971,0,0,0,0,0,19975,0,
+19976,0,19990,0,0,19991,0,19993,0,19995,0,0,0,19998,19999,20001,0,20003,20005,0,
+20011,20012,0,0,0,0,0,0,20014,0,20020,0,0,0,0,20021,0,0,0,0,0,20023,20024,0,0,0,
+0,0,20025,0,0,20027,0,0,20029,0,0,20032,0,0,0,0,20044,20045,0,20048,20049,0,0,
+20050,0,20052,0,0,20054,20057,0,0,0,0,0,0,0,0,0,20059,0,0,20061,0,20062,0,20064,
+0,0,20066,0,0,20067,0,0,0,0,20069,0,0,0,0,0,0,20070,20071,0,0,0,0,0,0,0,0,0,0,0,
+20072,0,0,20073,20074,0,0,0,0,0,20075,0,20078,0,0,0,0,20080,0,20081,0,0,0,0,0,0,
+20095,0,20098,0,0,0,0,0,0,0,20107,0,0,0,0,0,0,0,0,20112,0,0,0,20113,20114,0,0,0,
+20115,20123,20124,0,0,0,20131,20133,20134,0,0,0,0,20136,0,0,20137,20138,20150,0,
+20152,0,0,0,20153,0,0,20154,0,0,0,20158,0,20163,0,0,20164,0,0,0,0,0,0,0,20166,0,
+20168,0,20170,0,20175,0,0,20178,0,0,0,0,20223,0,0,0,0,20224,0,20226,0,0,20230,0,
+20231,0,0,0,0,20232,0,0,20233,20234,0,20244,0,20247,0,0,0,0,0,0,20249,0,0,0,
+20250,0,0,0,0,20251,0,20253,0,20254,0,0,0,0,20256,0,0,20264,0,0,0,0,20266,0,0,0,
+20278,0,0,20279,20282,0,0,0,0,0,20283,0,20284,0,20285,0,20287,20290,0,0,0,0,
+20292,0,0,0,0,20293,20297,0,0,0,0,0,0,20299,0,20300,20303,0,0,0,0,0,0,20307,0,0,
+20308,0,20309,0,20310,0,0,0,0,0,0,20312,0,0,0,20314,0,0,0,0,20315,20316,0,20322,
+0,0,0,0,0,0,20339,0,0,0,20342,0,0,0,0,20352,0,0,0,0,0,0,0,0,0,0,20362,0,0,20365,
+0,20375,20377,0,0,0,0,0,0,0,0,0,0,0,20378,20379,0,20380,0,0,20381,0,20382,0,
+20383,0,20388,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20390,20392,20393,0,0,20395,0,0,0,0,0,
+20396,0,0,0,0,0,0,0,0,20398,20415,0,0,0,20417,0,0,20420,0,0,20426,20428,0,20431,
+0,0,20432,0,20433,20434,20435,0,0,0,0,20440,0,0,0,0,0,20442,0,20443,0,20446,0,0,
+0,0,20448,0,20451,0,0,0,0,0,0,0,0,0,20452,20453,0,0,20454,0,0,0,0,0,0,20457,0,
+20458,0,0,0,20465,0,0,0,0,0,20469,0,0,0,20473,0,20476,0,0,0,0,0,0,0,0,20477,0,0,
+20485,0,0,20486,0,0,20487,0,20496,0,20497,0,0,20498,0,0,0,0,0,0,0,0,0,0,20499,
+20500,0,20501,0,0,0,0,0,20520,20527,0,20529,0,0,0,0,20539,0,0,20540,0,0,0,20543,
+0,0,0,20546,0,0,0,0,0,20548,0,0,20563,0,0,20564,0,20566,0,0,0,0,0,20589,0,0,0,0,
+20590,0,0,20593,20594,0,0,0,0,20595,0,20597,20598,0,0,0,20618,20620,0,0,0,0,
+20621,0,0,0,0,20627,0,0,0,0,0,20628,0,0,0,20629,0,20630,0,0,20639,0,0,0,0,0,
+20707,0,0,20709,0,0,0,20713,20714,0,0,0,0,0,20724,20725,0,0,0,0,20726,20728,
+20729,0,20733,0,20734,0,20735,20736,0,20737,0,0,20744,0,20745,0,20748,0,0,20749,
+0,0,0,0,0,0,0,0,20750,0,0,0,0,20754,0,0,0,20761,0,0,20763,0,0,0,0,0,0,0,20766,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,20767,0,0,0,0,20768,0,20769,20777,0,0,0,0,0,0,20785,0,
+0,0,20786,20795,20801,0,20802,0,20807,0,0,20808,0,0,20810,0,0,20811,0,20812,0,0,
+0,0,0,20813,0,0,20818,20820,20821,0,0,0,20822,0,20823,0,0,0,20826,0,0,0,0,0,0,0,
+20829,20830,20831,0,20832,20836,0,0,20839,0,0,20840,20842,0,20843,0,20844,0,
+20854,0,0,0,20855,0,0,0,0,20856,0,0,0,20869,0,0,20871,0,0,0,0,0,0,0,20873,0,0,0,
+0,0,20876,0,0,0,0,0,20880,0,0,20882,0,0,0,0,20883,20884,0,0,20890,0,0,0,0,0,0,0,
+0,0,20891,0,0,0,0,0,20905,0,20906,20910,0,0,20912,20915,0,0,0,0,0,20916,0,20917,
+0,20919,20920,20922,0,20927,0,20928,20929,20930,0,0,20935,0,0,20939,0,0,20941,0,
+0,0,20943,0,0,0,20946,20947,0,0,0,0,0,20950,0,20954,0,0,20955,20964,0,0,20967,0,
+0,0,0,0,20973,20975,0,0,0,20984,0,20987,20988,0,0,0,0,0,20989,0,0,0,20995,0,
+20998,0,20999,0,0,0,0,21000,21001,0,0,0,0,21008,0,21010,0,21016,0,0,0,21017,
+21018,0,0,0,0,0,21021,21026,21027,21028,0,0,21029,0,0,0,0,0,21030,0,0,0,0,0,0,0,
+0,0,0,0,0,0,21031,21032,0,0,0,0,0,21037,0,0,21038,0,0,0,0,0,0,0,0,0,21039,0,
+21041,0,21046,21047,0,0,0,21049,21053,0,0,21057,21064,21065,0,0,21066,21067,0,0,
+0,21069,0,0,0,21071,21072,0,0,21073,0,21074,0,0,21078,0,0,0,0,21079,0,0,21080,
+21081,0,0,21086,21087,0,21089,0,0,0,0,0,0,0,21091,0,21093,0,21094,0,0,0,0,0,0,0,
+0,21095,0,0,0,0,0,21096,0,21098,0,0,0,0,0,0,0,21099,0,0,21100,21101,21102,0,0,0,
+0,0,21103,0,21104,0,0,0,0,0,21105,21108,21109,0,0,21112,21113,0,0,0,0,0,0,21115,
+21122,21123,0,0,0,0,0,21125,0,0,0,0,0,0,0,0,21129,21131,0,0,21134,0,0,0,21137,
+21142,0,21143,0,0,21144,0,21145,21146,0,21152,21154,21155,21156,0,0,0,21160,0,0,
+0,0,0,0,21161,0,21164,0,21166,0,0,0,0,21170,0,0,0,0,21171,0,0,21172,0,21174,0,
+21175,0,0,0,0,0,21176,21179,21188,0,0,0,21189,0,0,21190,0,0,0,21192,0,0,21193,0,
+0,0,21198,0,21212,0,0,21213,0,0,0,0,0,0,21215,21216,0,0,21223,21225,0,21226,0,0,
+0,0,21227,21228,0,0,21229,0,0,0,0,21230,21236,0,0,0,0,0,0,0,0,0,0,0,0,0,21237,0,
+0,21238,21239,0,0,0,0,21256,0,0,0,0,0,21257,0,0,0,0,0,0,0,21259,0,0,0,21263,0,
+21272,0,21274,0,21282,0,0,0,0,0,0,0,0,21283,0,0,0,0,0,0,0,0,21294,0,0,21297,0,0,
+0,0,21298,0,0,0,21299,0,21300,21302,0,21316,0,21318,21322,21323,0,21324,0,21326,
+0,0,0,21327,21328,0,0,0,21352,0,0,21354,21361,0,0,0,0,0,0,0,0,0,0,0,0,0,21362,0,
+0,0,21363,0,0,0,0,0,0,0,0,0,21366,0,0,21367,21372,21374,0,0,0,21375,21377,0,
+21378,0,0,0,21380,0,0,0,0,0,0,0,0,0,0,21381,0,0,0,0,0,0,21382,0,21383,0,0,21384,
+0,0,21385,0,0,0,0,21389,21390,0,0,0,0,0,0,0,0,0,0,0,0,0,21397,21398,0,0,0,0,0,0,
+0,0,0,0,21399,0,21400,0,0,0,0,21402,0,0,0,21403,21404,0,21405,21406,0,0,0,21407,
+0,0,0,0,0,0,0,0,0,0,0,0,21408,0,0,0,0,21409,0,21421,0,21422,0,0,0,21425,21428,0,
+0,0,0,21429,0,0,0,0,0,21433,0,0,0,0,0,0,0,0,0,0,21434,0,21443,0,21444,21449,0,
+21452,0,21453,21454,0,0,0,21457,0,0,21458,0,0,0,21460,21461,0,0,21464,0,0,0,
+21473,21478,0,0,21479,0,0,21481,21483,0,0,0,0,0,0,0,0,21484,0,0,21485,21486,0,0,
+21488,0,0,0,0,0,0,21523,0,0,21525,0,0,0,0,0,0,0,21526,0,0,0,0,0,0,21529,21530,0,
+0,21531,0,0,21533,0,0,21539,21564,0,21567,0,0,0,0,0,0,0,0,21575,0,0,0,0,21577,0,
+0,0,0,0,21591,0,0,21604,0,0,0,0,0,0,0,0,0,21605,0,21606,0,0,21617,21618,21619,
+21620,0,0,0,0,0,0,0,0,0,0,0,0,0,21623,0,0,0,0,21631,0,21635,0,0,0,0,21639,21646,
+21653,21662,0,0,21663,21664,0,21666,0,0,21667,0,21670,21672,21673,0,21674,21683,
+0,0,0,0,0,21684,0,21694,0,0,0,0,21695,21700,0,21703,0,21704,0,0,21709,0,0,0,
+21710,0,0,0,0,0,0,0,0,21711,0,0,0,21712,0,21717,0,21730,0,0,0,21731,21733,0,0,0,
+0,21737,21741,21742,0,21747,0,0,0,21749,0,0,0,0,0,0,0,0,0,0,0,0,0,21750,0,0,0,0,
+0,21752,0,0,0,0,21753,0,0,0,0,0,0,21755,21756,0,21757,0,0,0,0,0,0,21760,0,0,
+21763,0,0,0,0,0,0,0,0,0,21764,0,0,21766,0,0,21767,0,0,0,0,0,0,0,0,0,21773,0,
+21774,0,0,21775,0,0,0,0,21776,0,0,21777,0,0,0,0,0,0,0,0,0,21780,21787,21788,
+21791,0,0,0,21797,0,0,0,0,0,21805,0,0,0,0,21806,0,21807,21809,0,21810,21811,0,
+21817,21819,21820,0,21823,0,21824,0,0,21825,0,0,21826,21832,0,0,0,0,0,21833,
+21848,21849,0,0,21867,21870,21871,21873,0,0,0,21874,0,0,0,0,0,0,0,0,0,21875,0,
+21878,0,0,0,21879,0,21881,21886,0,0,0,0,21887,0,0,21888,21894,21895,21897,0,
+21901,0,21904,0,0,21906,0,0,0,21909,21910,21911,0,0,21912,0,0,21913,21914,21915,
+0,21919,0,0,0,0,0,0,0,21921,0,0,21922,21933,21939,0,0,0,0,0,0,0,0,0,0,0,21944,0,
+0,0,0,0,21945,0,21947,0,0,0,0,0,0,0,0,0,0,21949,0,0,0,21950,0,0,0,0,0,0,0,0,0,0,
+0,0,0,21951,0,21952,0,0,0,0,0,0,0,0,0,21954,21957,0,0,0,0,21958,0,21959,0,0,0,0,
+0,0,21962,21963,0,0,0,0,0,0,0,0,21964,21965,0,0,21969,21970,0,0,0,21974,0,0,
+21980,21981,0,21982,0,0,0,0,0,21985,0,21988,0,21992,0,21999,0,0,0,0,0,0,22001,0,
+22002,0,0,0,0,0,0,22003,0,0,0,0,0,22004,0,0,0,22008,0,22009,22015,0,0,22016,0,0,
+0,22017,22019,0,0,0,0,0,0,0,0,0,22020,0,0,0,0,0,0,0,0,0,0,22021,22037,0,22039,0,
+0,0,22040,0,0,0,22048,22049,0,0,22053,22055,22056,22059,0,0,22060,22061,0,0,
+22064,0,0,0,0,22066,0,0,0,0,0,0,0,22073,0,0,0,22074,22075,0,0,0,0,0,0,0,22076,0,
+0,0,0,22077,22084,22099,0,0,0,0,0,0,0,22104,0,0,22107,0,22108,0,22109,0,22110,0,
+0,0,0,0,0,0,22111,22119,0,22120,22122,0,0,0,0,22125,0,0,0,22128,22129,0,0,0,0,0,
+0,22141,0,0,0,22142,0,0,22144,22146,0,22148,22149,22151,22154,0,0,0,22162,0,0,0,
+0,22164,22177,0,0,0,0,22179,0,22182,22183,0,0,22184,22188,0,0,0,0,0,0,0,0,22190,
+0,22194,22201,0,0,22208,0,22209,0,22212,0,0,22215,0,22223,22231,0,0,22232,0,
+22234,0,0,22235,22236,0,22237,0,22240,0,0,0,0,0,22241,0,0,0,22242,22246,22247,0,
+0,0,22259,22268,0,22269,0,0,0,0,0,0,0,22270,0,0,0,0,22271,0,22272,0,22277,0,0,0,
+0,0,22278,22280,22283,22286,0,0,22287,22289,0,0,22290,0,22293,0,0,0,0,0,0,0,0,0,
+0,22295,0,22301,22302,0,0,0,22305,0,22308,0,0,0,0,0,0,0,0,0,0,22315,0,0,0,22317,
+0,22334,0,0,0,22335,0,0,0,0,0,22336,0,22338,22344,0,22347,22349,0,22350,0,0,0,0,
+0,0,0,22357,0,0,0,0,0,22358,0,0,0,0,0,0,0,0,0,0,22359,22360,0,0,0,0,0,0,0,0,
+22361,22366,0,0,22369,0,22370,22373,0,0,0,0,0,22375,0,22377,0,0,0,0,0,22378,0,0,
+0,0,22381,0,0,0,0,22382,0,22383,0,0,0,0,0,0,0,0,0,22391,0,0,22392,22395,22396,
+22402,0,0,0,0,0,0,0,0,0,0,0,0,0,22405,0,0,22406,0,0,22408,0,0,22409,22410,0,0,0,
+0,0,0,22424,0,0,0,0,22426,0,0,0,22427,0,22428,0,22432,0,22435,22442,22443,0,0,0,
+0,22444,0,0,0,0,0,22446,0,22454,0,22455,0,0,0,22465,0,22470,0,22471,0,0,0,0,
+22472,22473,0,22487,0,0,0,22488,0,0,0,0,22489,0,0,22499,0,0,0,0,0,0,22514,0,0,
+22515,0,0,0,0,0,0,0,22516,0,0,0,22517,22520,0,0,0,22534,0,0,22535,0,0,22536,0,
+22540,22553,0,22555,0,0,0,0,22561,0,0,22562,0,0,0,0,0,0,0,0,0,0,0,22566,0,0,0,0,
+22567,22568,0,0,22575,0,22579,0,22582,22583,22585,0,0,0,0,0,22586,0,0,22587,0,0,
+22590,0,0,0,0,0,22591,0,22592,0,0,0,0,0,22593,0,22602,0,0,22604,0,0,22609,0,0,
+22618,0,0,0,0,0,0,22619,0,22624,22625,0,0,22638,0,0,0,0,0,22639,0,0,22640,0,0,0,
+0,0,0,0,22644,0,22645,22647,0,0,0,0,22652,22653,0,0,0,22654,0,22655,0,0,0,22656,
+0,0,0,0,0,0,0,0,0,0,22673,22675,22676,0,0,22678,22679,0,22691,0,0,0,0,0,0,0,
+22693,0,0,22696,0,22699,22707,22708,0,0,0,0,0,0,0,0,22718,0,22719,0,0,0,0,22723,
+0,0,0,22724,22725,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22726,22728,0,0,0,0,0,0,0,0,22729,
+0,0,22731,0,0,0,0,22732,22735,22736,0,0,0,0,22739,0,22749,0,0,22751,0,0,0,0,0,0,
+0,0,0,0,0,22758,0,0,0,0,0,22760,0,0,0,0,0,22764,22765,22766,0,22768,0,0,0,0,0,
+22769,22770,0,0,0,0,0,0,22771,0,0,22772,22775,0,22776,22777,22780,0,0,22782,
+22784,0,22787,0,22789,22796,0,0,0,0,0,22798,0,0,0,0,0,0,22802,0,22803,22804,0,0,
+0,0,0,0,0,0,0,0,22805,0,0,22810,22811,22814,22816,0,22825,22826,0,22831,22833,0,
+0,0,0,0,0,0,0,0,22834,0,22836,22838,0,22839,0,0,0,0,0,22840,0,22847,0,0,0,0,0,
+22856,22857,0,22858,22859,0,0,22862,0,0,22864,0,0,0,0,22865,0,0,0,0,0,0,0,0,0,0,
+0,22866,0,22867,22868,0,0,0,0,22869,0,22871,0,22872,0,22873,22881,22882,22884,
+22885,0,0,0,0,0,0,0,22886,22887,0,22894,0,22895,0,0,0,22900,0,22901,0,0,0,0,
+22904,0,0,0,0,22905,22907,0,0,0,22915,22917,0,0,22918,0,0,0,22920,0,0,0,22929,
+22930,0,0,0,22941,22942,0,0,0,22943,0,0,0,22944,0,0,0,0,0,0,0,22946,0,22947,0,0,
+22954,0,22956,0,0,22962,0,0,0,0,0,0,0,22963,0,0,22964,0,0,0,0,0,0,0,22965,0,
+22968,0,0,0,22969,0,0,0,0,0,22970,0,22971,0,0,0,0,0,22978,0,0,22979,0,22987,0,0,
+22989,0,0,0,0,0,0,22990,0,23005,0,0,0,0,0,0,0,23006,23007,23008,0,0,23023,23024,
+23029,0,0,0,0,23030,0,0,0,0,0,23032,0,0,0,0,0,23035,0,0,0,0,23038,0,0,0,23048,0,
+23049,23052,23053,23060,23061,0,23063,0,0,0,0,23067,23068,0,0,0,23069,23073,0,0,
+0,23127,0,23128,0,0,0,0,0,23129,0,23138,23141,0,23149,0,0,23150,0,0,0,23152,0,0,
+0,0,0,0,0,0,23154,0,0,0,0,23157,23159,23160,0,0,0,0,0,0,0,0,0,0,0,0,23180,0,0,0,
+0,23181,0,0,23188,0,23189,0,0,0,0,0,0,0,0,0,0,0,0,23195,0,0,23196,23199,0,0,0,0,
+0,0,0,0,0,23202,0,23204,0,23207,0,23209,23210,0,0,0,0,0,0,23227,23229,0,0,23230,
+23234,23238,0,0,0,23245,23246,23248,0,0,0,0,23249,23254,0,0,0,23265,0,0,0,0,0,0,
+0,23268,0,23276,0,0,0,0,23277,0,23297,0,23298,0,0,0,0,23299,0,23302,0,0,23303,
+23312,0,0,23314,0,23320,0,0,0,0,23324,0,23325,0,23328,0,23334,0,0,0,23337,0,0,0,
+0,23343,23344,23346,0,23348,0,0,0,0,0,0,0,0,23353,0,0,0,0,23355,0,23356,23358,0,
+0,0,23359,23360,0,23361,0,23367,0,23369,0,0,23373,0,23378,23379,0,23382,23383,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,23387,0,0,0,0,0,0,23388,23390,0,0,23393,23398,0,0,0,
+23399,0,0,0,23400,0,0,0,0,23401,0,0,0,23415,0,0,0,0,0,0,0,0,23416,0,23422,0,
+23443,23444,0,0,0,0,23448,0,23454,0,0,0,0,0,0,23456,0,0,23458,23464,0,0,0,0,0,0,
+23465,0,0,0,23470,23471,0,0,23472,0,0,0,23473,23496,0,0,0,0,0,0,0,0,23497,0,
+23499,0,0,23502,0,0,23503,0,0,23513,0,0,23515,0,0,0,23517,0,0,0,0,23518,23519,
+23521,23524,0,23525,23528,23539,0,0,0,0,0,23541,0,0,23544,0,0,23556,0,0,23557,0,
+0,0,0,0,0,0,0,0,0,0,0,0,23559,0,23560,0,0,23561,0,0,23566,0,0,0,0,0,23568,23569,
+23570,0,0,0,0,23571,0,23574,0,0,0,0,0,0,0,0,0,0,0,23575,0,23579,0,0,23581,0,0,0,
+0,0,0,23587,0,0,0,0,0,0,0,23596,23598,0,0,0,0,23602,23606,0,0,23607,0,23608,0,0,
+0,23614,23616,0,0,0,0,0,23618,0,0,23619,0,0,0,0,23621,23626,0,23627,0,0,0,0,0,0,
+0,23629,0,23630,0,0,0,0,23634,0,23636,0,0,0,0,0,0,23638,0,0,0,0,23640,23667,0,
+23669,0,0,0,23681,0,0,0,0,0,0,0,23682,0,23683,0,0,0,0,0,23684,0,0,0,23685,23689,
+0,23693,23694,23700,0,23702,0,23709,0,0,0,0,0,0,0,23712,0,0,0,0,0,23714,0,0,
+23715,0,0,0,0,23718,0,0,23720,0,0,0,0,23722,0,0,0,23726,23729,0,23741,23746,0,
+23748,0,0,0,0,23749,0,0,0,0,0,23750,0,0,0,0,23751,0,23753,0,0,0,0,23757,23765,0,
+0,0,23770,0,0,0,0,0,0,0,23771,0,23772,23781,0,0,23796,0,0,0,0,23798,0,23799,0,0,
+0,23802,0,0,23806,0,23807,0,0,23808,0,23809,0,23819,0,0,0,23821,0,23827,0,0,0,
+23829,0,0,0,0,0,0,0,23830,0,0,0,0,0,0,23832,23833,23834,23835,0,0,0,0,23837,
+23838,0,0,0,0,0,23846,0,0,0,0,0,0,23847,0,0,0,0,0,23879,23881,0,0,23882,23883,
+23895,0,23899,0,0,0,0,23901,0,0,0,0,0,0,23902,0,0,0,0,0,23903,23905,0,23906,0,
+23907,23918,23919,23920,0,23922,0,23924,0,23927,0,23934,0,23937,23941,0,23942,
+23946,0,0,0,0,0,23955,23956,23958,0,0,0,0,0,0,23959,0,23962,23965,0,23966,0,0,0,
+0,23967,23968,0,0,23973,0,0,23974,0,0,0,0,23975,0,23976,0,0,0,0,0,0,0,0,0,0,0,0,
+0,23977,0,0,0,0,0,0,0,0,23980,0,0,23984,0,23985,0,0,23987,0,0,23988,23990,23991,
+0,0,0,0,0,0,23992,0,0,0,0,0,0,0,0,23994,0,0,0,23998,0,0,0,0,0,0,0,0,0,23999,0,0,
+24003,0,24004,0,24006,0,0,0,24007,0,0,24008,0,0,0,0,0,0,0,24009,0,0,24010,0,0,
+24011,0,0,24013,24014,0,0,24015,24016,24027,0,24028,24029,0,24030,0,0,0,0,0,
+24033,24034,0,24035,0,0,24036,0,0,24044,0,24048,24049,24063,24067,0,24068,24070,
+0,0,24071,24078,24087,0,24090,0,0,0,24095,0,24098,24101,24104,24106,0,24107,0,0,
+0,24108,0,0,0,0,24110,24111,0,24113,0,0,24115,24120,0,0,0,0,0,0,24124,0,24125,0,
+24126,0,24127,0,0,0,0,0,24135,0,0,24136,0,24137,24142,0,0,0,24146,0,0,24147,
+24149,24154,0,24163,0,0,0,24165,24166,24167,0,0,0,0,0,0,0,0,0,0,24169,24170,
+24175,0,0,0,24178,0,0,24179,0,0,24181,0,24184,24197,0,24201,24204,0,0,0,0,0,0,
+24206,24212,24220,0,0,0,24224,0,0,0,0,0,0,0,0,24226,0,24234,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,24235,0,24236,0,0,0,0,0,24239,24240,24241,0,0,24248,0,0,24249,0,
+24251,0,0,0,0,0,0,24253,0,24268,0,0,0,24269,0,24271,24272,0,0,0,0,24273,0,0,
+24274,0,0,24279,0,0,0,0,0,0,0,24280,0,24293,24294,0,0,0,0,0,0,24296,0,0,24323,0,
+0,0,24329,24330,24331,24339,0,24351,0,0,24369,24370,0,0,0,24371,0,0,0,0,24372,
+24373,24374,0,0,0,0,0,24378,0,0,0,0,24379,0,24381,0,24383,24389,0,24390,0,0,
+24394,24395,24400,0,0,0,24401,24402,0,24406,0,0,0,24411,0,0,0,24415,0,24416,0,0,
+0,0,0,24417,0,24419,0,24422,0,24423,24428,0,24435,0,0,0,24439,0,0,0,24440,24442,
+24446,0,0,0,24447,24448,24449,24452,0,0,0,0,24453,24457,0,0,24458,24459,24460,0,
+24465,0,0,0,0,0,0,0,24470,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24471,0,24473,
+24474,24475,24476,0,24478,0,0,0,0,24480,0,0,0,0,0,0,0,0,0,0,24481,0,0,0,0,0,0,0,
+0,0,0,24482,24485,0,0,0,0,24486,0,0,0,24488,0,0,0,24494,0,0,0,0,24497,0,0,24498,
+0,0,0,24499,24506,0,0,0,24507,0,0,24511,0,0,24513,24514,0,0,0,0,0,24517,0,24518,
+0,24520,0,24521,24524,24525,0,0,0,0,0,24527,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24528,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24537,24539,0,24540,0,0,0,24548,0,0,0,0,0,24549,
+24550,0,0,0,24553,24554,0,24555,0,24556,0,24558,0,0,0,0,0,24560,0,0,0,24561,0,0,
+0,0,0,24562,0,0,0,0,0,0,0,0,0,0,0,0,0,24567,0,0,0,0,0,24569,0,0,0,24574,0,24575,
+0,0,0,0,0,0,0,0,0,0,0,24577,24581,0,24584,0,0,0,0,0,24585,0,0,0,0,0,24586,0,0,
+24587,0,24588,0,0,0,0,0,0,0,0,0,0,24590,24591,0,0,0,0,24592,0,0,0,0,0,0,0,24594,
+0,0,0,0,0,0,0,24596,24597,0,0,0,0,24602,24603,0,0,0,0,24604,0,0,24605,0,24610,0,
+0,24611,0,0,0,0,24612,24615,24616,24624,0,0,0,24627,0,24638,24639,0,0,0,0,24640,
+0,0,0,24655,24656,24657,0,0,0,0,0,0,0,0,24662,0,24663,24664,0,0,0,0,0,24665,0,0,
+0,0,24667,0,0,0,0,0,0,24668,24669,0,24670,24674,0,0,0,24675,0,24678,0,0,24679,0,
+0,0,24681,0,24683,0,0,0,0,24684,0,24685,0,0,24686,0,0,24688,24689,0,0,0,0,24690,
+24691,0,0,0,0,0,0,0,24697,0,24698,0,0,0,0,0,0,0,0,24709,0,0,0,0,0,24710,0,24712,
+0,0,0,0,0,0,24713,24714,0,24715,0,24716,24718,0,24719,0,0,0,0,24720,0,0,24725,0,
+0,24738,0,24749,24750,0,0,0,24752,0,0,0,24753,0,0,0,24758,0,0,0,0,0,24762,0,
+24763,0,0,0,0,0,0,0,24764,0,0,0,0,0,24765,24767,24768,0,24772,0,0,0,0,24773,0,0,
+0,0,24777,0,0,0,0,0,24785,0,24786,24788,0,0,0,24789,0,0,0,0,24794,24798,0,24799,
+24800,0,0,0,24803,0,24804,24806,0,24807,0,0,0,24810,0,0,0,0,0,0,24827,24828,0,
+24835,0,0,0,0,0,0,24836,0,0,0,0,0,24839,0,24843,24844,0,0,0,0,0,0,0,0,0,0,24847,
+0,0,24848,0,0,0,0,0,0,24849,0,24850,24851,0,0,0,24852,0,24853,0,0,0,0,0,0,0,0,0,
+24854,0,24855,0,0,24868,0,0,0,24883,0,0,0,24884,0,24895,24897,0,0,0,0,0,24899,0,
+0,0,0,0,24900,0,24913,0,0,0,0,0,0,24914,0,0,24917,24930,24931,0,0,0,24932,0,0,
+24939,0,0,24942,0,0,0,0,0,0,0,0,0,24945,24950,0,24951,0,0,24953,0,0,0,24954,0,
+24959,0,0,0,24961,0,0,24962,0,24964,24968,24970,24972,0,0,0,0,0,24976,0,0,0,
+24977,0,24982,0,0,24983,0,0,24984,0,0,0,24993,0,0,0,24994,0,0,25001,0,0,0,25003,
+0,0,25018,0,0,25023,0,0,0,25034,0,0,25035,25036,0,25037,0,0,0,0,0,0,0,25039,0,0,
+0,0,0,25040,0,0,0,0,0,0,0,25042,0,0,25043,25045,0,0,0,0,0,0,25049,0,0,25051,0,
+25052,25053,0,0,25054,0,0,0,25055,0,0,0,0,25057,25059,0,0,25060,25064,0,25065,
+25069,25070,0,0,0,0,25072,0,25073,0,25090,0,0,25092,25093,25101,0,0,0,0,0,0,
+25105,25108,0,0,25113,0,0,25115,25116,0,0,0,0,0,0,25117,0,0,0,25120,25121,0,0,0,
+0,0,0,0,25125,0,0,0,25126,0,25130,25134,0,25139,0,25143,0,0,0,25151,0,25161,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25163,0,0,0,0,0,0,0,25174,0,25175,0,25207,0,0,
+0,25209,0,0,0,0,25213,0,25219,0,25223,0,25225,0,0,0,25227,0,0,0,25228,0,0,0,
+25229,0,0,0,0,0,0,0,25231,25233,0,0,0,0,25237,25239,0,0,0,25243,0,0,0,25252,0,
+25257,25258,0,0,0,0,25260,25265,0,25268,0,0,25273,25324,0,25325,0,25326,0,0,0,0,
+0,0,0,0,25327,0,0,0,0,0,25328,0,0,0,0,0,0,25332,0,0,0,25333,0,0,0,25336,25337,
+25338,0,0,25343,0,25350,0,0,0,0,0,0,0,25352,0,25354,0,25375,0,25379,0,0,0,0,
+25384,0,0,0,0,0,0,0,0,0,25386,0,25388,0,25390,0,0,25399,0,0,25401,0,0,0,25402,0,
+0,0,25407,0,0,0,0,0,0,0,0,0,0,0,25413,25415,0,0,25417,0,0,0,0,0,0,0,25419,0,0,0,
+25421,0,0,0,25424,0,0,0,0,25433,0,0,0,0,0,0,0,0,0,25435,0,0,0,0,0,0,25436,0,0,0,
+25437,0,0,25440,0,0,0,0,0,0,25442,0,0,25443,0,25446,0,0,25449,0,0,0,25450,0,0,0,
+0,25452,0,25453,25454,25455,0,0,0,25456,0,25457,0,0,0,25459,0,25461,0,25468,0,0,
+0,0,0,0,0,0,25469,0,0,0,0,0,25471,0,0,0,0,0,25474,0,0,0,0,0,0,0,0,25475,0,0,0,0,
+25477,0,0,0,0,25483,0,0,0,0,0,25484,0,0,0,0,0,0,0,0,0,0,0,0,25485,0,25497,0,0,
+25498,0,25504,0,25510,0,25512,0,0,25513,25514,0,0,0,0,0,0,25517,25518,25519,0,
+25520,0,0,0,0,0,0,0,25521,0,25522,25527,25534,0,25536,0,25537,0,0,25548,25550,0,
+0,25551,0,25552,0,0,0,0,0,25554,0,25555,0,25556,25557,25568,0,0,0,25570,25571,0,
+0,0,0,0,0,25574,0,0,0,0,25579,0,0,0,25581,0,0,0,25582,0,0,0,0,0,0,0,0,0,25588,0,
+0,0,0,25589,0,0,0,0,25590,0,25591,25592,25593,0,25594,0,0,0,25596,0,25597,25615,
+0,0,0,0,0,25618,0,0,0,0,25619,25623,0,0,25629,0,0,25631,0,0,0,25635,25636,0,0,
+25649,0,0,0,0,25654,0,0,0,25661,25663,0,0,25671,0,0,25678,25698,0,25699,25702,
+25703,0,0,0,0,0,0,0,0,25704,0,0,0,0,0,25706,0,0,25710,0,25711,0,25712,0,25715,
+25716,25717,0,0,25718,25728,25732,0,0,0,25734,0,0,0,0,0,0,0,0,0,25737,0,0,25739,
+0,0,0,25740,0,25741,25745,0,25746,0,25748,25772,25778,0,0,0,0,0,25780,0,0,0,0,
+25781,0,25782,25784,25785,0,0,0,25789,0,0,0,0,0,0,25797,25801,0,0,0,25808,25809,
+0,0,25811,25814,25815,0,0,25817,0,0,0,0,0,0,0,0,25820,0,0,0,0,25832,25833,0,0,0,
+25846,0,0,0,25847,25848,0,0,0,0,0,0,0,0,0,25849,25850,0,0,25851,0,0,25852,0,
+25862,0,0,0,25863,25865,0,0,0,0,0,0,0,25867,25868,0,25869,25874,0,25875,0,25876,
+25877,0,0,0,0,25878,25902,0,0,0,0,0,0,0,25903,25904,25905,0,0,0,25908,25909,0,0,
+0,0,25910,0,0,0,0,0,0,0,25912,0,25913,0,0,0,0,0,0,0,0,25914,0,0,25916,0,0,0,0,0,
+25917,25927,0,0,0,0,25928,0,0,25930,0,0,0,25933,0,0,25938,25942,0,0,0,0,0,0,0,
+25945,0,25950,0,25956,0,0,25961,25962,0,0,25963,0,25964,25965,25966,0,0,0,0,0,
+25967,0,0,0,0,25968,0,0,0,25969,25971,0,0,0,0,0,25973,25975,0,0,0,0,0,0,0,25978,
+0,25981,0,0,0,25982,0,0,0,25984,0,0,0,0,0,0,0,25993,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26002,0,0,0,26005,0,0,0,26006,26007,0,0,26014,26015,26016,0,0,0,0,0,0,26017,
+26018,26020,0,26022,26023,0,0,0,26024,26028,0,26029,26033,26034,26044,0,0,0,0,0,
+26046,0,0,26047,0,0,26049,0,26050,0,26051,0,0,0,0,0,26053,0,0,0,0,26054,26059,0,
+0,0,0,0,0,26060,0,26066,0,0,0,0,0,0,0,0,0,0,0,0,26067,0,26069,0,0,26071,0,0,0,
+26073,0,26074,26077,0,0,0,0,26078,0,0,0,26079,0,26090,0,0,26094,0,0,0,0,0,0,0,0,
+26095,0,0,0,0,0,0,0,0,0,0,0,26096,26101,0,26107,26122,0,26124,0,0,26125,0,0,0,0,
+0,0,26136,26141,26155,0,0,0,0,0,0,0,0,0,26164,26166,0,0,0,26167,0,26170,26171,0,
+0,26172,0,0,26174,0,0,0,0,0,0,0,0,0,0,0,0,0,26175,0,0,0,26176,26177,0,26321,
+26322,0,26323,0,0,26324,0,0,0,0,0,0,0,26325,0,26331,0,0,0,0,0,0,26335,0,0,0,
+26350,0,0,0,26379,0,0,26382,26383,26385,0,0,26392,26406,0,0,0,0,26411,0,0,0,0,0,
+26412,0,0,26420,0,0,26423,0,26424,26426,26432,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26435,0,26436,0,0,0,0,0,26441,0,26444,0,0,0,26446,0,0,0,0,26447,0,0,0,0,26449,0,
+26450,26452,0,26453,26454,0,0,0,26455,0,0,0,26456,0,0,26458,0,0,26460,0,26463,0,
+0,0,0,0,0,0,0,26464,26470,0,0,0,0,0,0,0,0,0,26473,0,0,26474,0,0,0,0,0,0,0,26475,
+0,0,0,0,0,0,0,26477,0,26485,0,0,26486,0,26487,0,0,26488,26493,26494,0,0,26495,0,
+26497,26504,26506,0,0,0,0,0,26507,0,0,0,0,0,26509,0,0,26510,0,0,0,0,0,0,0,0,0,0,
+0,0,0,26512,0,26513,26515,0,0,0,26518,0,0,0,26519,0,26524,26526,0,0,0,26527,0,
+26532,0,26533,26537,26558,0,0,0,26559,0,0,0,26571,0,0,26573,0,26588,0,26593,0,0,
+0,0,0,0,26603,0,26604,0,0,0,0,0,0,0,0,0,0,26606,0,0,0,0,0,0,0,26607,26609,26611,
+26614,0,0,0,26616,26620,0,26621,0,0,0,0,0,26627,0,26629,0,0,26630,0,0,26632,
+26643,0,0,0,26644,0,0,0,0,0,0,0,0,0,26646,26647,0,0,0,26650,0,0,26656,0,0,0,0,
+26663,26670,26671,0,0,0,26685,26686,26687,0,26689,0,0,0,0,26744,0,26745,0,26747,
+26748,0,26749,26750,26751,0,0,0,0,26752,26755,0,0,0,26756,26769,0,0,0,26774,0,0,
+0,0,0,26775,0,26777,26778,0,26786,0,0,0,26787,0,0,0,0,0,0,0,0,0,0,0,0,0,26788,0,
+0,26789,0,0,0,0,0,26791,0,26792,26793,0,0,0,26794,0,26797,26798,0,0,0,26800,0,0,
+26803,0,26804,0,0,0,0,0,0,0,0,0,26805,0,0,26808,0,0,26809,0,0,0,0,0,0,0,26812,0,
+26825,0,0,0,0,0,0,0,26826,0,0,26827,26829,26834,0,0,0,0,26835,0,0,26849,0,26851,
+0,0,0,0,0,0,0,0,0,26852,0,26853,26857,0,26858,0,26859,0,0,0,0,0,0,0,26876,0,
+26878,26882,26883,0,0,0,0,26890,26894,0,0,0,0,26895,26896,0,0,0,0,0,26900,0,0,0,
+0,0,0,0,26911,26913,26914,26915,26916,26919,0,0,0,26921,26922,0,0,26925,0,0,0,
+26928,0,0,26929,26930,0,0,0,26931,0,26932,0,0,0,0,0,26933,0,0,0,0,0,0,26937,0,0,
+26943,0,0,26944,0,0,0,26946,0,0,0,0,0,0,0,26956,0,26958,0,0,26963,0,0,0,0,0,0,0,
+26965,0,26969,26970,26972,0,0,0,0,0,26973,0,26974,0,26978,0,26980,0,0,0,0,0,0,
+26982,0,26986,26987,0,26990,0,0,0,0,27003,27006,0,0,27007,27010,27012,27013,0,0,
+0,0,0,0,0,0,27014,27015,27018,0,27019,0,0,0,0,0,27025,0,0,0,27026,0,0,0,0,27029,
+27030,27031,27034,0,0,27036,27037,0,0,0,27038,27042,0,0,0,27044,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,27045,0,0,0,0,0,0,0,27046,0,0,0,0,0,0,0,27047,27049,0,27050,0,0,0,
+27051,27052,0,27055,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27056,27058,27059,0,
+27061,0,27064,0,0,0,0,0,27069,0,0,27070,0,0,0,0,0,0,0,27072,0,0,0,0,0,0,0,0,
+27076,0,0,0,0,0,27078,0,27079,0,0,0,27081,0,0,0,0,0,0,27082,0,27083,27086,0,0,0,
+0,27087,0,0,0,0,0,27088,27090,0,27094,0,0,27095,0,27099,27102,0,0,0,27103,0,0,0,
+0,27105,0,0,0,27106,0,0,0,0,0,0,27107,0,0,0,0,27108,27117,0,0,0,0,27118,0,0,
+27124,0,27126,0,0,27130,27131,0,0,0,0,0,0,27147,0,0,0,0,27148,27149,0,0,0,0,
+27150,27151,0,27152,0,27159,0,0,0,27164,0,0,0,0,0,0,0,27175,0,27189,0,0,27191,0,
+27193,0,27195,0,27198,0,0,0,0,0,27200,0,0,0,0,27202,0,0,0,0,27203,0,0,27204,0,0,
+27206,0,27207,0,0,0,0,27209,0,0,0,27213,0,0,27216,27219,27220,27222,27223,0,
+27224,0,27225,27226,0,0,27233,0,0,0,0,27235,0,27237,0,27238,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,27239,0,27242,27243,0,27250,0,0,0,27251,0,27253,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,27254,27255,27258,0,0,0,27259,0,0,0,0,0,0,27267,0,27276,27278,
+0,0,0,0,0,0,0,0,0,27296,27297,27301,0,0,0,0,0,0,27302,0,0,0,0,0,0,27312,27313,0,
+0,0,0,0,27318,0,27320,0,27329,0,27330,27331,0,27332,0,0,0,0,27340,0,0,0,27348,0,
+0,0,0,0,0,27350,0,27351,0,0,0,0,27355,0,0,27358,27359,27361,0,0,0,27365,0,27367,
+0,27376,27378,0,0,27379,0,0,0,0,0,0,27396,0,27397,27404,0,0,0,0,0,27408,0,0,0,0,
+27453,0,0,0,27456,0,0,0,27458,0,0,0,0,0,0,0,27459,0,0,0,27460,0,0,27461,0,27465,
+27467,0,0,27469,0,27470,0,27471,0,27477,27482,0,0,0,0,0,0,27484,0,0,0,0,0,0,
+27485,0,0,0,0,0,27493,0,27494,27502,0,0,0,0,0,0,0,0,0,0,0,0,27511,27532,0,0,0,
+27533,27545,0,0,0,27546,0,0,0,0,0,0,0,0,0,0,27547,0,0,27549,27550,0,27551,0,0,0,
+0,0,0,0,27555,0,0,27571,0,27573,27574,27575,27577,0,27578,0,0,27579,27585,0,0,0,
+0,0,27586,0,0,27588,27589,0,0,0,0,27596,0,0,27600,0,0,0,0,0,0,0,0,0,0,0,27608,0,
+0,0,0,0,0,0,0,0,0,0,27610,0,0,0,27618,0,0,27620,0,0,0,27631,0,0,27632,27634,0,
+27636,27638,0,0,0,27643,0,27644,27649,0,0,0,0,0,0,0,0,0,0,0,0,0,27651,27660,0,
+27661,0,0,0,0,0,0,0,27662,0,0,27664,0,27665,0,0,0,27669,0,27671,0,0,0,27673,
+27674,0,0,0,27682,0,0,0,27711,0,27712,27713,27719,27720,0,0,27728,0,27729,0,0,0,
+0,0,0,0,0,0,27731,0,0,27732,0,27733,0,27738,0,0,0,27742,0,0,0,27743,27744,0,0,0,
+0,0,0,27745,27746,0,0,0,27747,27748,27751,27752,0,0,0,27768,27770,0,0,0,27774,
+27775,0,27776,27777,0,0,27781,0,27784,0,27786,0,0,27791,0,27792,27793,27804,0,
+27812,27813,0,0,0,0,0,0,0,0,27814,0,27825,0,27827,0,0,0,0,27828,27861,27862,0,0,
+0,27864,0,0,0,27865,27884,0,27889,0,0,0,0,0,27890,0,27891,0,0,0,27892,0,0,0,0,0,
+27897,27898,0,0,27899,0,0,0,27901,27905,0,0,27920,0,0,27921,0,27922,0,0,0,27931,
+27934,0,0,0,0,0,0,0,0,0,0,27941,0,27942,0,27945,0,27947,27954,0,0,0,0,27960,
+27963,0,0,0,0,0,0,0,0,27964,27965,0,0,0,27967,0,27969,27975,0,27976,27977,0,
+27981,0,27983,28051,28052,0,0,0,0,0,28056,0,0,0,0,0,0,28058,28059,0,0,28061,0,0,
+0,0,0,0,0,28063,0,0,0,0,0,0,28066,0,0,0,0,0,0,28069,28070,28072,0,28073,0,0,
+28074,0,0,0,0,28075,0,0,0,0,0,0,0,28078,0,0,0,0,28085,0,0,0,0,28086,0,0,0,0,0,0,
+28088,0,0,0,0,0,0,0,0,28090,0,28097,28114,28115,0,0,0,0,0,0,0,28116,0,0,0,0,0,
+28118,0,28129,0,28131,0,0,28135,0,0,0,28140,28141,0,0,0,28146,0,0,0,0,28152,0,0,
+0,0,28155,28157,28161,0,0,0,0,28166,0,28167,0,0,0,0,0,0,0,0,0,0,0,28172,0,0,0,0,
+0,0,28173,0,0,28175,0,0,0,0,0,0,0,0,0,28178,28188,0,28190,0,0,0,0,0,28191,0,
+28193,28206,0,0,28207,28209,0,28211,0,28213,0,0,0,28215,28216,28217,0,28222,0,
+28223,28225,0,0,0,28226,0,28227,28229,28232,0,0,0,0,0,0,0,0,0,28235,0,28241,0,0,
+28242,0,0,0,0,28243,0,0,0,28245,0,0,0,28248,28250,0,28251,28252,0,0,0,0,0,0,
+28253,0,0,28254,28255,0,0,28256,0,0,28258,0,0,0,0,0,28259,0,0,28260,0,0,28261,0,
+0,0,0,28262,28263,0,0,28264,0,0,0,28266,0,28268,28269,0,28270,28272,28274,0,
+28277,28278,0,0,0,28279,0,28280,28281,28283,0,28292,0,28294,0,28297,0,0,0,0,
+28299,0,0,0,0,0,28300,0,0,0,0,0,0,0,28301,0,0,0,0,0,0,0,0,0,0,0,0,0,28302,28303,
+0,0,0,0,28304,0,0,28305,0,28312,0,28313,28314,0,0,0,0,0,0,28315,0,0,0,28320,
+28321,0,0,28328,0,0,0,28329,28338,0,28339,0,0,28344,0,0,0,0,0,0,0,0,28347,0,0,0,
+0,0,0,0,0,28348,0,0,0,0,0,28411,0,28412,28413,0,28416,0,0,0,28420,0,0,0,0,0,
+28421,0,0,0,0,28423,0,0,0,28424,0,0,28428,0,0,0,0,0,28429,0,0,0,28431,28434,0,
+28458,0,0,0,0,0,0,0,0,0,0,0,28464,0,0,0,0,28465,0,28467,0,0,0,0,0,0,28471,0,0,0,
+0,28474,0,28480,0,28481,0,0,28485,0,0,0,0,28486,28488,0,0,28489,0,0,0,0,28492,0,
+0,0,28495,0,28497,0,28499,0,0,0,0,28500,0,0,28502,28503,0,0,0,28508,0,0,0,28510,
+0,0,28512,28513,28514,28521,0,28526,0,28527,28528,0,0,0,0,28529,0,0,28532,0,0,
+28537,28538,0,0,0,28539,0,28548,0,28553,28554,0,0,0,0,0,0,0,0,0,0,0,0,28560,
+28563,0,0,28564,0,0,0,0,28565,0,0,0,0,0,0,0,28566,28568,0,0,0,0,0,0,28569,0,0,0,
+28570,0,28572,28573,0,0,0,0,28575,0,0,0,0,28576,28581,28588,0,0,28589,0,0,0,
+28590,28595,0,28598,0,0,28601,0,0,28605,0,0,0,0,28614,28615,28619,0,0,0,0,0,0,
+28620,0,28626,0,0,28628,0,28631,0,28632,0,0,0,0,0,0,28635,0,0,0,28637,28638,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28639,0,28643,0,0,28652,0,0,0,28662,0,
+28670,28671,0,0,0,0,0,0,0,0,0,28672,28673,28675,28676,0,0,0,0,0,0,0,28691,0,0,0,
+28695,0,0,0,28696,0,28697,28698,0,28705,0,28707,28708,28710,0,0,0,0,0,0,0,28711,
+28728,0,0,0,28736,0,0,0,28737,0,0,0,0,0,0,0,0,0,28738,0,28739,0,28741,0,0,28742,
+0,0,0,0,0,0,0,0,0,0,0,28745,0,0,0,0,0,0,28749,28750,28752,28754,28756,0,28757,0,
+0,0,0,28759,28760,0,0,0,0,0,0,28762,0,0,0,28764,0,0,0,0,0,0,28766,0,28767,28768,
+0,0,0,0,28769,28770,0,0,0,0,0,0,0,0,0,0,0,0,0,28771,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,28772,0,28773,0,28782,0,0,0,0,0,0,28784,0,28785,0,28786,0,0,0,28787,0,0,0,
+28797,0,0,0,0,0,0,28799,0,0,28801,0,0,0,0,28802,0,28805,0,0,28806,0,0,28807,0,0,
+0,0,0,0,0,28808,0,0,0,0,0,28810,28812,0,0,28816,28819,0,0,28821,0,28826,0,0,0,
+28842,28852,0,0,28853,0,28854,28855,0,0,0,28857,0,0,0,28858,0,28867,28868,28869,
+0,0,0,28874,28880,28882,28890,28892,0,0,0,0,0,0,0,28895,0,0,0,28898,28899,0,0,0,
+28900,0,0,28904,0,28906,0,0,0,0,28907,0,0,0,0,0,0,28908,0,0,0,28910,0,28914,0,0,
+0,0,0,0,0,28915,28916,28919,0,0,28920,0,28921,0,0,0,0,0,0,0,0,28924,0,0,0,0,
+28926,28929,0,0,0,28930,0,28936,0,28939,0,0,0,0,28942,0,0,0,0,0,0,28956,0,0,0,
+28966,0,0,0,0,28967,0,0,0,0,0,0,0,0,0,28968,0,28971,0,28975,28976,0,28982,28983,
+0,0,28984,28989,28996,28997,28998,0,0,0,0,0,0,28999,0,0,0,0,0,29000,0,29001,0,0,
+0,29009,0,0,29011,0,0,29021,0,0,0,0,29024,0,29025,0,0,0,0,0,29026,0,0,0,29036,0,
+0,0,29037,0,0,0,0,29038,0,29045,0,29047,0,0,0,0,0,0,0,0,0,29051,0,0,0,29054,
+29056,29062,0,29070,29082,0,0,0,29083,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29084,0,0,
+0,0,29085,29088,0,0,0,0,0,0,0,29090,29097,0,0,0,29103,0,0,0,0,0,0,0,0,29105,0,0,
+0,0,0,29107,0,29109,0,0,0,29115,0,0,29120,0,0,29138,29140,0,0,0,0,0,0,0,0,0,
+29152,0,29160,29174,0,29176,0,0,29180,0,29181,0,0,0,0,0,0,0,0,29228,0,0,29229,0,
+0,29230,0,0,0,0,0,0,0,0,0,0,29234,0,0,0,29241,0,29245,0,29248,0,29250,29256,
+29280,0,29282,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29285,0,0,29286,29291,29292,0,0,0,0,
+29294,0,29295,0,0,0,0,0,29296,29297,29298,29300,0,29302,0,0,29304,29307,0,29312,
+0,0,0,29322,0,0,29323,0,0,29324,29326,29328,0,29335,0,0,0,0,0,0,0,29338,29339,0,
+0,0,0,0,29341,29343,0,0,0,0,29344,0,0,0,0,0,29345,0,0,0,0,29346,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,29347,29348,29349,0,0,29354,0,0,29355,0,0,0,0,0,0,0,0,29357,0,0,
+0,0,29364,0,29365,0,0,0,0,0,0,0,29366,0,0,29368,0,0,0,0,0,0,0,0,29378,0,29381,0,
+0,0,0,0,0,0,0,29386,0,0,0,0,0,0,29389,0,0,0,29390,0,0,29391,29397,0,29398,29412,
+29414,29418,29419,0,0,0,0,0,0,0,29420,0,0,0,0,0,0,0,29423,0,0,0,29435,0,0,0,
+29437,0,0,29439,0,29441,0,0,0,0,29443,0,29446,29450,29452,0,0,0,0,0,29456,0,0,0,
+0,0,29461,0,0,0,29464,0,0,0,0,0,0,0,0,29468,0,29473,0,0,0,29486,0,0,0,29490,0,0,
+0,29491,29492,0,0,29497,0,0,0,29498,0,29499,0,29502,29505,0,29509,0,0,0,29510,0,
+0,0,29512,0,0,0,29516,0,0,0,0,0,0,0,0,29518,0,29519,0,0,0,0,0,29520,29521,29529,
+0,0,0,0,0,0,0,0,29530,0,0,29531,29538,0,29540,0,0,0,29542,0,29543,29544,29547,0,
+0,29548,0,0,0,29549,0,0,0,29550,0,0,29552,0,0,0,0,29558,29561,0,29562,29564,0,0,
+29565,0,0,29566,0,0,0,0,0,0,0,0,0,0,29578,29584,29586,29591,0,0,0,0,29593,29594,
+0,0,29597,0,0,29613,0,29614,0,29615,0,0,0,0,29616,29617,0,0,29625,0,0,0,29632,0,
+0,0,0,0,0,0,29633,0,0,0,0,0,29634,29635,29637,0,29638,0,29641,29643,0,0,0,0,0,0,
+29644,0,29645,0,29649,0,0,0,29650,0,29653,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29656,
+29659,0,0,29660,0,0,0,29661,0,0,0,0,0,29664,0,0,0,29671,29673,0,0,0,0,0,0,0,
+29675,0,29677,29679,0,0,29684,0,0,0,0,0,29685,0,0,0,29687,0,0,0,29688,0,29689,
+29690,29700,0,29701,0,0,0,29702,0,29706,0,0,0,0,0,0,0,29720,0,29721,0,29727,0,
+29733,29734,0,29750,29761,0,29763,0,0,0,0,0,29764,0,0,29765,0,0,0,29771,0,0,0,0,
+0,0,0,0,0,0,0,0,29772,0,0,0,29773,29774,29775,0,0,0,0,0,0,0,0,0,0,0,29822,0,0,0,
+29824,0,29825,0,0,0,0,0,29827,0,0,0,0,0,0,0,0,29829,0,29832,29834,0,0,29835,0,0,
+29837,29838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29843,0,0,0,0,29844,29845,0,0,0,
+0,0,0,0,0,0,29849,0,0,29869,29872,29890,29905,0,0,0,0,0,29907,29921,0,29922,0,0,
+29923,29926,29944,29946,0,0,0,0,0,0,0,29947,29948,0,0,0,29951,0,0,0,0,0,29953,0,
+0,29956,0,29957,0,0,29962,0,0,0,0,29971,0,0,0,29972,0,0,0,0,0,29978,0,29979,
+29992,30007,30008,30010,0,0,0,30013,0,0,0,0,30014,30016,0,0,0,0,0,0,0,0,0,0,0,
+30017,0,0,0,0,0,30023,30031,0,0,30033,0,0,0,0,0,0,0,0,0,0,30034,0,30038,0,30039,
+0,30040,0,0,0,0,0,0,30067,30068,0,0,0,30069,0,30072,0,0,0,30073,0,0,0,0,30075,0,
+0,0,0,0,0,30079,0,0,30080,0,0,0,0,0,30082,0,0,0,0,0,0,0,0,0,0,0,30084,30090,0,0,
+30091,0,0,0,0,30098,30118,0,30119,0,30121,30130,0,0,0,0,0,0,0,0,0,0,0,0,0,30131,
+30132,30133,0,0,0,0,0,0,30135,0,0,0,0,0,0,0,0,0,0,0,30136,0,0,30137,30138,0,0,0,
+30139,30146,0,0,0,0,0,30147,0,0,30148,30151,0,0,0,30168,0,30172,30173,0,0,0,0,0,
+0,0,0,30180,30181,0,30192,0,0,0,0,0,0,0,30194,30196,0,0,30199,0,0,30202,0,0,0,0,
+30203,0,0,0,0,0,0,0,0,0,0,30213,0,0,0,30216,0,0,30217,0,0,0,30218,0,0,0,0,30219,
+0,30220,0,30222,30227,0,0,0,0,0,30231,0,0,30233,30235,0,0,0,0,30238,0,30240,
+30243,30245,0,30250,30252,0,0,0,30269,0,0,30271,30272,0,0,0,30278,30280,0,0,
+30282,0,30284,0,30294,0,0,0,0,30295,30296,0,0,0,0,0,30298,30299,30302,30304,
+30306,0,0,0,0,0,0,30316,30317,0,0,0,30318,0,0,0,30319,0,30320,30322,30326,0,0,0,
+0,0,30327,0,30332,30348,30349,0,0,30356,0,0,0,0,0,0,0,0,30357,0,30358,0,30359,
+30360,0,0,30365,30366,30378,0,0,0,0,30379,0,0,30381,0,30385,0,30388,30397,0,0,0,
+30401,0,0,0,0,30403,0,0,0,0,0,30404,0,0,30405,0,30406,30408,0,30409,0,30410,0,0,
+0,30417,0,0,30418,30419,0,30420,0,30424,0,0,0,30427,30430,30432,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,30433,0,0,0,0,0,0,0,30436,0,30437,30438,0,30441,30442,0,0,
+0,30445,0,0,0,0,30452,30456,30457,0,0,0,30458,0,30464,0,0,0,0,0,0,30467,0,30469,
+0,0,0,0,0,30477,0,0,30484,0,0,0,0,0,30485,0,0,0,0,0,30486,30487,30497,30498,0,0,
+0,0,0,0,0,0,0,0,30505,0,30508,0,0,0,30509,30510,0,30514,30516,0,0,0,0,0,0,0,0,0,
+0,0,30523,0,30524,0,30525,0,0,0,0,30537,0,0,30538,0,0,0,0,0,30553,0,0,30555,
+30556,30558,30559,30560,0,0,30561,0,30562,0,0,0,0,0,0,0,0,30563,30570,30571,0,
+30586,30587,0,0,30590,0,0,30594,0,0,0,0,30611,30612,30623,30634,0,0,30636,30640,
+30655,30656,0,30657,0,0,30658,30669,0,30670,0,30676,30678,0,0,0,0,0,0,0,30679,0,
+0,0,0,0,0,0,0,0,0,0,30695,0,0,30698,0,0,0,0,30700,0,0,0,0,30701,0,30702,30703,0,
+0,0,0,30707,0,0,0,30709,0,0,30710,30719,30729,0,0,0,0,0,0,0,0,0,30731,0,0,30733,
+0,0,0,30734,0,0,0,0,0,30736,30737,0,0,0,30740,0,0,0,30743,0,30746,0,30747,30748,
+0,0,30751,30752,30753,0,0,0,30754,0,0,30760,0,0,0,0,0,0,0,30763,0,30764,0,0,
+30766,0,30769,30770,30771,30774,30777,0,0,30779,30780,30781,0,0,0,0,30790,0,0,0,
+30792,0,0,0,0,30810,0,0,0,0,0,0,0,30812,30819,0,0,30823,30824,0,30825,0,30827,0,
+0,0,0,0,0,30828,0,0,30830,0,0,0,30834,0,30835,0,30837,30838,0,30845,0,0,0,0,0,
+30846,30847,0,0,30849,0,30851,0,0,0,0,0,30852,30858,0,0,30859,0,30865,0,0,30866,
+0,0,30868,0,0,30869,0,0,0,30881,30883,0,0,0,0,0,30889,0,30891,0,0,0,0,30894,0,
+30895,0,30897,0,30898,0,0,0,30904,30906,0,30909,0,0,0,0,0,0,30910,0,0,0,30915,
+30933,30942,0,0,0,0,30943,0,0,30945,0,0,0,0,0,0,30946,0,0,30947,0,0,30955,30956,
+0,0,30960,0,0,30961,30962,30966,0,0,30969,30974,0,0,0,30976,0,0,30977,0,30978,
+30982,0,0,0,0,0,0,0,30994,30995,30998,0,31000,0,0,31001,0,0,31003,31005,0,0,
+31006,31011,0,0,31014,0,31016,0,0,0,0,31018,0,0,31020,31023,31024,31025,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,31027,31028,31029,0,0,0,0,0,0,31032,0,0,0,0,0,0,0,0,0,0,0,
+31036,31037,31038,0,0,0,31041,31043,31045,0,31047,0,0,0,31048,0,31049,0,0,0,
+31053,31054,31055,0,0,31063,0,0,0,0,0,31066,0,31068,31071,0,0,0,31072,31073,0,0,
+0,0,31075,0,0,31076,0,0,0,31077,31079,0,31080,0,0,0,0,0,0,0,0,0,0,31087,0,31142,
+0,31144,0,0,31145,31146,31147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31149,0,31151,31152,0,
+0,0,0,0,0,0,31162,31171,31174,31175,0,0,0,31176,0,0,0,0,0,0,0,31179,0,0,0,31186,
+0,0,0,31192,31195,0,0,31196,0,0,0,0,0,0,0,0,31198,0,0,0,0,0,31199,0,0,0,31205,0,
+0,0,0,31211,31215,0,0,0,0,31231,0,31232,0,0,0,0,0,0,0,0,0,0,31233,31236,31253,0,
+31254,0,0,0,0,0,0,31255,0,0,31257,0,0,0,0,0,0,0,0,0,31258,31259,0,0,31260,0,
+31261,0,0,0,0,0,31262,31263,0,0,31264,0,31266,0,31267,0,0,0,0,0,31281,0,31282,0,
+31284,0,0,31285,31287,31288,0,0,31290,0,0,0,31292,31295,0,31299,0,31300,0,0,0,0,
+0,31302,0,0,0,0,31303,0,0,0,0,0,0,31304,0,0,0,0,0,31305,31308,31309,31315,0,
+31317,0,0,0,0,0,31323,0,31324,0,0,0,0,0,31325,31327,0,0,31331,0,0,0,0,0,31333,0,
+0,0,0,0,31336,0,0,31337,0,0,0,0,0,0,31338,0,0,0,0,0,0,0,0,0,0,0,0,31339,0,0,0,0,
+0,0,0,31342,0,0,0,0,31345,0,0,0,0,0,0,0,0,31347,0,0,0,0,0,0,31348,0,0,31350,
+31351,0,31352,0,0,31354,0,0,0,0,31355,0,0,31356,0,0,0,0,0,0,0,0,0,0,31363,0,
+31372,0,0,31373,0,0,0,0,0,0,0,0,0,31376,0,31388,0,31389,0,31392,0,31401,0,31405,
+31407,31408,0,31409,0,0,0,0,0,0,31413,31415,0,0,0,31416,31418,0,0,0,0,0,0,31422,
+31423,0,0,31424,0,31425,31432,0,0,0,0,0,0,0,0,0,31433,0,0,0,0,0,0,0,0,31434,0,0,
+0,0,0,0,31435,0,0,0,0,31438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31442,0,31444,0,
+31448,0,0,31451,0,0,0,0,31452,0,31461,31465,0,0,31466,0,0,31467,0,0,31468,0,0,0,
+31469,31473,0,31476,0,0,0,0,31489,31490,0,0,0,0,0,0,0,31492,31493,31494,0,0,0,0,
+31501,31504,31505,0,0,0,0,0,0,0,0,0,31509,0,0,0,0,31510,0,0,31511,0,0,31513,0,0,
+0,0,0,0,0,0,0,31514,0,31522,31536,31539,31540,0,31541,0,0,0,0,0,0,31546,31553,
+31559,0,0,0,31560,31561,31562,0,0,31564,31567,0,31569,0,0,0,31570,0,0,0,0,31571,
+0,0,0,0,0,0,31572,31574,31580,31581,0,0,31582,31584,31585,31586,31595,0,31596,0,
+0,0,0,31597,0,31599,0,31600,31601,0,0,31603,31604,0,0,31608,31610,0,0,0,31611,0,
+31615,0,0,0,0,31616,0,0,0,0,0,0,31617,0,0,0,0,0,31618,0,0,0,0,0,0,31621,0,0,0,0,
+0,0,0,0,0,31622,31625,0,0,0,0,31627,0,31641,0,0,31642,0,0,31643,0,0,0,0,0,0,0,0,
+0,31644,0,31646,0,0,0,0,31648,0,0,0,31652,0,0,0,31657,0,0,31676,0,0,0,0,0,0,0,
+31689,31691,31692,0,31694,0,0,0,31696,0,31702,0,31703,0};
+
+static const DictWord kStaticDictionaryWords[31705] = {
+{0,0,0},{8,0,1002},{136,0,1015},{4,0,683},{4,10,325},{138,10,125},{7,11,572},{9,
+11,592},{11,11,680},{11,11,842},{11,11,924},{12,11,356},{12,11,550},{13,11,317},
+{13,11,370},{13,11,469},{13,11,471},{14,11,397},{18,11,69},{146,11,145},{134,0,
+1265},{136,11,534},{134,0,1431},{11,0,138},{140,0,40},{4,0,155},{7,0,1689},{4,10
+,718},{135,10,1216},{4,0,245},{5,0,151},{5,0,741},{6,0,1147},{7,0,498},{7,0,870}
+,{7,0,1542},{12,0,213},{14,0,36},{14,0,391},{17,0,111},{18,0,6},{18,0,46},{18,0,
+151},{19,0,36},{20,0,32},{20,0,56},{20,0,69},{20,0,102},{21,0,4},{22,0,8},{22,0,
+10},{22,0,14},{150,0,31},{4,0,624},{135,0,1752},{5,10,124},{5,10,144},{6,10,548}
+,{7,10,15},{7,10,153},{137,10,629},{6,0,503},{9,0,586},{13,0,468},{14,0,66},{16,
+0,58},{7,10,1531},{8,10,416},{9,10,275},{10,10,100},{11,10,658},{11,10,979},{12,
+10,86},{14,10,207},{15,10,20},{143,10,25},{5,0,603},{7,0,1212},{9,0,565},{14,0,
+301},{5,10,915},{6,10,1783},{7,10,211},{7,10,1353},{9,10,83},{10,10,376},{10,10,
+431},{11,10,543},{12,10,664},{13,10,280},{13,10,428},{14,10,128},{17,10,52},{145
+,10,81},{4,0,492},{133,0,451},{135,0,835},{141,0,70},{132,0,539},{7,11,748},{139
+,11,700},{7,11,1517},{11,11,597},{14,11,76},{14,11,335},{148,11,33},{6,0,113},{
+135,0,436},{4,10,338},{133,10,400},{136,0,718},{133,11,127},{133,11,418},{6,0,
+1505},{7,0,520},{6,11,198},{11,10,892},{140,11,83},{4,10,221},{5,10,659},{5,10,
+989},{7,10,697},{7,10,1211},{138,10,284},{135,0,1070},{5,11,276},{6,11,55},{135,
+11,1369},{134,0,1515},{6,11,1752},{136,11,726},{138,10,507},{15,0,78},{4,10,188}
+,{135,10,805},{5,10,884},{139,10,991},{133,11,764},{134,10,1653},{6,11,309},{7,
+11,331},{138,11,550},{135,11,1861},{132,11,348},{135,11,986},{135,11,1573},{12,0
+,610},{13,0,431},{144,0,59},{9,11,799},{140,10,166},{134,0,1530},{132,0,750},{
+132,0,307},{133,0,964},{6,11,194},{7,11,133},{10,11,493},{10,11,570},{139,11,664
+},{5,11,24},{5,11,569},{6,11,3},{6,11,119},{6,11,143},{6,11,440},{7,11,295},{7,
+11,599},{7,11,1686},{7,11,1854},{8,11,424},{9,11,43},{9,11,584},{9,11,760},{10,
+11,148},{10,11,328},{11,11,159},{11,11,253},{11,11,506},{12,11,487},{12,11,531},
+{144,11,33},{136,10,760},{5,11,14},{5,11,892},{6,11,283},{7,11,234},{136,11,537}
+,{135,11,1251},{4,11,126},{8,11,635},{147,11,34},{4,11,316},{135,11,1561},{6,0,
+999},{6,0,1310},{137,11,861},{4,11,64},{5,11,352},{5,11,720},{6,11,368},{139,11,
+359},{4,0,75},{5,0,180},{6,0,500},{7,0,58},{7,0,710},{10,0,645},{136,10,770},{
+133,0,649},{6,0,276},{7,0,282},{7,0,879},{7,0,924},{8,0,459},{9,0,599},{9,0,754}
+,{11,0,574},{12,0,128},{12,0,494},{13,0,52},{13,0,301},{15,0,30},{143,0,132},{
+132,0,200},{4,10,89},{5,10,489},{6,10,315},{7,10,553},{7,10,1745},{138,10,243},{
+135,11,1050},{7,0,1621},{6,10,1658},{9,10,3},{10,10,154},{11,10,641},{13,10,85},
+{13,10,201},{141,10,346},{6,11,175},{137,11,289},{5,11,432},{133,11,913},{6,0,
+225},{137,0,211},{7,0,718},{8,0,687},{139,0,374},{4,10,166},{133,10,505},{9,0,
+110},{134,10,1670},{8,0,58},{9,0,724},{11,0,809},{13,0,113},{145,0,72},{6,0,345}
+,{7,0,1247},{144,11,82},{5,11,931},{134,11,1698},{8,0,767},{8,0,803},{9,0,301},{
+137,0,903},{139,0,203},{134,0,1154},{7,0,1949},{136,0,674},{134,0,259},{135,0,
+1275},{5,11,774},{6,11,1637},{6,11,1686},{134,11,1751},{134,0,1231},{7,10,445},{
+8,10,307},{8,10,704},{10,10,41},{10,10,439},{11,10,237},{11,10,622},{140,10,201}
+,{136,0,254},{6,11,260},{135,11,1484},{139,0,277},{135,10,1977},{4,10,189},{5,10
+,713},{6,11,573},{136,10,57},{138,10,371},{132,10,552},{134,11,344},{133,0,248},
+{9,0,800},{10,0,693},{11,0,482},{11,0,734},{11,0,789},{134,11,240},{4,0,116},{5,
+0,95},{5,0,445},{7,0,1688},{8,0,29},{9,0,272},{11,0,509},{11,0,915},{4,11,292},{
+4,11,736},{5,11,871},{6,11,171},{6,11,1689},{7,11,1324},{7,11,1944},{9,11,415},{
+9,11,580},{14,11,230},{146,11,68},{7,0,490},{13,0,100},{143,0,75},{135,0,1641},{
+133,0,543},{7,11,209},{8,11,661},{10,11,42},{11,11,58},{12,11,58},{12,11,118},{
+141,11,32},{5,0,181},{8,0,41},{6,11,63},{135,11,920},{133,0,657},{133,11,793},{
+138,0,709},{7,0,25},{8,0,202},{138,0,536},{5,11,665},{135,10,1788},{145,10,49},{
+9,0,423},{140,0,89},{5,11,67},{6,11,62},{6,11,374},{135,11,1391},{8,0,113},{9,0,
+877},{10,0,554},{11,0,83},{12,0,136},{19,0,109},{9,11,790},{140,11,47},{138,10,
+661},{4,0,963},{10,0,927},{14,0,442},{135,10,1945},{133,0,976},{132,0,206},{4,11
+,391},{135,11,1169},{134,0,2002},{6,0,696},{134,0,1008},{134,0,1170},{132,11,271
+},{7,0,13},{8,0,226},{10,0,537},{11,0,570},{11,0,605},{11,0,799},{11,0,804},{12,
+0,85},{12,0,516},{12,0,623},{13,0,112},{13,0,361},{14,0,77},{14,0,78},{17,0,28},
+{19,0,110},{140,11,314},{132,0,769},{134,0,1544},{4,0,551},{137,0,678},{5,10,84}
+,{134,10,163},{9,0,57},{9,0,459},{10,0,425},{11,0,119},{12,0,184},{12,0,371},{13
+,0,358},{145,0,51},{5,0,188},{5,0,814},{8,0,10},{9,0,421},{9,0,729},{10,0,609},{
+11,0,689},{4,11,253},{5,10,410},{5,11,544},{7,11,300},{137,11,340},{134,0,624},{
+138,11,321},{135,0,1941},{18,0,130},{5,10,322},{8,10,186},{9,10,262},{10,10,187}
+,{142,10,208},{5,11,53},{5,11,541},{6,11,94},{6,11,499},{7,11,230},{139,11,321},
+{133,10,227},{4,0,378},{4,11,920},{5,11,25},{5,11,790},{6,11,457},{135,11,853},{
+137,0,269},{132,0,528},{134,0,1146},{7,10,1395},{8,10,486},{9,10,236},{9,10,878}
+,{10,10,218},{11,10,95},{19,10,17},{147,10,31},{7,10,2043},{8,10,672},{141,10,
+448},{134,0,1105},{134,0,1616},{134,11,1765},{140,11,163},{5,10,412},{133,11,822
+},{132,11,634},{6,0,656},{134,11,1730},{134,0,1940},{5,0,104},{6,0,173},{135,0,
+1631},{136,10,562},{6,11,36},{7,11,658},{8,11,454},{147,11,86},{5,0,457},{134,10
+,1771},{7,0,810},{8,0,138},{8,0,342},{9,0,84},{10,0,193},{11,0,883},{140,0,359},
+{9,0,620},{135,10,1190},{137,10,132},{7,11,975},{137,11,789},{6,0,95},{6,0,1934}
+,{136,0,967},{141,11,335},{6,0,406},{10,0,409},{10,0,447},{11,0,44},{140,0,100},
+{4,10,317},{135,10,1279},{132,0,477},{134,0,1268},{6,0,1941},{8,0,944},{5,10,63}
+,{133,10,509},{132,0,629},{132,11,104},{4,0,246},{133,0,375},{6,0,1636},{132,10,
+288},{135,11,1614},{9,0,49},{10,0,774},{8,10,89},{8,10,620},{11,10,628},{12,10,
+322},{143,10,124},{4,0,282},{7,0,1034},{11,0,398},{11,0,634},{12,0,1},{12,0,79},
+{12,0,544},{14,0,237},{17,0,10},{146,0,20},{132,0,824},{7,11,45},{9,11,542},{9,
+11,566},{138,11,728},{5,0,118},{5,0,499},{6,0,476},{6,0,665},{6,0,1176},{6,0,
+1196},{7,0,600},{7,0,888},{135,0,1096},{7,0,296},{7,0,596},{8,0,560},{8,0,586},{
+9,0,612},{11,0,304},{12,0,46},{13,0,89},{14,0,112},{145,0,122},{5,0,894},{6,0,
+1772},{9,0,1009},{138,10,120},{5,11,533},{7,11,755},{138,11,780},{151,10,1},{6,0
+,1474},{7,11,87},{142,11,288},{139,0,366},{137,10,461},{7,11,988},{7,11,1939},{9
+,11,64},{9,11,502},{12,11,7},{12,11,34},{13,11,12},{13,11,234},{147,11,77},{7,0,
+1599},{7,0,1723},{8,0,79},{8,0,106},{8,0,190},{8,0,302},{8,0,383},{8,0,713},{9,0
+,119},{9,0,233},{9,0,419},{9,0,471},{10,0,181},{10,0,406},{11,0,57},{11,0,85},{
+11,0,120},{11,0,177},{11,0,296},{11,0,382},{11,0,454},{11,0,758},{11,0,999},{12,
+0,27},{12,0,98},{12,0,131},{12,0,245},{12,0,312},{12,0,446},{12,0,454},{13,0,25}
+,{13,0,98},{13,0,426},{13,0,508},{14,0,70},{14,0,163},{14,0,272},{14,0,277},{14,
+0,370},{15,0,95},{15,0,138},{15,0,167},{17,0,38},{148,0,96},{135,10,1346},{10,0,
+200},{19,0,2},{151,0,22},{135,11,141},{134,10,85},{134,0,1759},{138,0,372},{145,
+0,16},{8,0,943},{132,11,619},{139,11,88},{5,11,246},{8,11,189},{9,11,355},{9,11,
+512},{10,11,124},{10,11,453},{11,11,143},{11,11,416},{11,11,859},{141,11,341},{5
+,0,258},{134,0,719},{6,0,1798},{6,0,1839},{8,0,900},{10,0,874},{10,0,886},{12,0,
+698},{12,0,732},{12,0,770},{16,0,106},{18,0,163},{18,0,170},{18,0,171},{152,0,20
+},{9,0,707},{11,0,326},{11,0,339},{12,0,423},{12,0,502},{20,0,62},{9,11,707},{11
+,11,326},{11,11,339},{12,11,423},{12,11,502},{148,11,62},{5,0,30},{7,0,495},{8,0
+,134},{9,0,788},{140,0,438},{133,11,678},{5,10,279},{6,10,235},{7,10,468},{8,10,
+446},{9,10,637},{10,10,717},{11,10,738},{140,10,514},{5,11,35},{6,11,287},{7,11,
+862},{7,11,1886},{138,11,179},{7,0,1948},{7,0,2004},{132,11,517},{5,10,17},{6,10
+,371},{137,10,528},{4,0,115},{5,0,669},{6,0,407},{8,0,311},{11,0,10},{141,0,5},{
+137,0,381},{5,0,50},{6,0,439},{7,0,780},{135,0,1040},{136,11,667},{11,11,403},{
+146,11,83},{5,0,1},{6,0,81},{138,0,520},{134,0,738},{5,0,482},{8,0,98},{9,0,172}
+,{10,0,360},{10,0,700},{10,0,822},{11,0,302},{11,0,778},{12,0,50},{12,0,127},{12
+,0,396},{13,0,62},{13,0,328},{14,0,122},{147,0,72},{9,11,157},{10,11,131},{140,
+11,72},{135,11,714},{135,11,539},{5,0,2},{6,0,512},{7,0,797},{7,0,1494},{8,0,253
+},{8,0,589},{9,0,77},{10,0,1},{10,0,129},{10,0,225},{11,0,118},{11,0,226},{11,0,
+251},{11,0,430},{11,0,701},{11,0,974},{11,0,982},{12,0,64},{12,0,260},{12,0,488}
+,{140,0,690},{5,11,394},{7,11,367},{7,11,487},{7,11,857},{7,11,1713},{8,11,246},
+{9,11,537},{10,11,165},{12,11,219},{140,11,561},{136,0,557},{5,10,779},{5,10,807
+},{6,10,1655},{134,10,1676},{4,10,196},{5,10,558},{133,10,949},{11,11,827},{12,
+11,56},{14,11,34},{143,11,148},{137,0,347},{133,0,572},{134,0,832},{4,0,12},{7,0
+,504},{7,0,522},{7,0,809},{8,0,797},{141,0,88},{4,10,752},{133,11,449},{7,11,86}
+,{8,11,103},{145,11,69},{7,11,2028},{138,11,641},{5,0,528},{6,11,1},{142,11,2},{
+134,0,861},{10,0,294},{4,10,227},{5,10,159},{5,10,409},{7,10,80},{10,10,479},{12
+,10,418},{14,10,50},{14,10,249},{142,10,295},{7,10,1470},{8,10,66},{8,10,137},{8
+,10,761},{9,10,638},{11,10,80},{11,10,212},{11,10,368},{11,10,418},{12,10,8},{13
+,10,15},{16,10,61},{17,10,59},{19,10,28},{148,10,84},{20,0,109},{135,11,1148},{6
+,11,277},{7,11,1274},{7,11,1386},{7,11,1392},{12,11,129},{146,11,87},{6,11,187},
+{7,11,39},{7,11,1203},{8,11,380},{8,11,542},{14,11,117},{149,11,28},{134,0,1187}
+,{5,0,266},{9,0,290},{9,0,364},{10,0,293},{11,0,606},{142,0,45},{6,11,297},{7,11
+,793},{139,11,938},{4,0,50},{6,0,594},{9,0,121},{10,0,49},{10,0,412},{139,0,834}
+,{136,0,748},{7,11,464},{8,11,438},{11,11,105},{11,11,363},{12,11,231},{14,11,
+386},{15,11,102},{148,11,75},{132,0,466},{13,0,399},{14,0,337},{6,10,38},{7,10,
+1220},{8,10,185},{8,10,256},{9,10,22},{9,10,331},{10,10,738},{11,10,205},{11,10,
+540},{11,10,746},{13,10,465},{142,10,194},{9,0,378},{141,0,162},{137,0,519},{4,
+10,159},{6,10,115},{7,10,252},{7,10,257},{7,10,1928},{8,10,69},{9,10,384},{10,10
+,91},{10,10,615},{12,10,375},{14,10,235},{18,10,117},{147,10,123},{5,11,604},{5,
+10,911},{136,10,278},{132,0,667},{8,0,351},{9,0,322},{4,10,151},{135,10,1567},{
+134,0,902},{133,10,990},{12,0,180},{5,10,194},{7,10,1662},{137,10,90},{4,0,869},
+{134,0,1996},{134,0,813},{133,10,425},{137,11,761},{132,0,260},{133,10,971},{5,
+11,20},{6,11,298},{7,11,659},{7,11,1366},{137,11,219},{4,0,39},{5,0,36},{7,0,
+1843},{8,0,407},{11,0,144},{140,0,523},{4,0,510},{10,0,587},{139,10,752},{7,0,29
+},{7,0,66},{7,0,1980},{10,0,487},{138,0,809},{13,0,260},{14,0,82},{18,0,63},{137
+,10,662},{5,10,72},{6,10,264},{7,10,21},{7,10,46},{7,10,2013},{8,10,215},{8,10,
+513},{10,10,266},{139,10,22},{134,0,570},{6,0,565},{7,0,1667},{4,11,439},{10,10,
+95},{11,10,603},{12,11,242},{13,10,443},{14,10,160},{143,10,4},{134,0,1464},{134
+,10,431},{9,0,372},{15,0,2},{19,0,10},{19,0,18},{5,10,874},{6,10,1677},{143,10,0
+},{132,0,787},{6,0,380},{12,0,399},{21,0,19},{7,10,939},{7,10,1172},{7,10,1671},
+{9,10,540},{10,10,696},{11,10,265},{11,10,732},{11,10,928},{11,10,937},{141,10,
+438},{137,0,200},{132,11,233},{132,0,516},{134,11,577},{132,0,844},{11,0,887},{
+14,0,365},{142,0,375},{132,11,482},{8,0,821},{140,0,44},{7,0,1655},{136,0,305},{
+5,10,682},{135,10,1887},{135,11,346},{132,10,696},{4,0,10},{7,0,917},{139,0,786}
+,{5,11,795},{6,11,1741},{8,11,417},{137,11,782},{4,0,1016},{134,0,2031},{5,0,684
+},{4,10,726},{133,10,630},{6,0,1021},{134,0,1480},{8,10,802},{136,10,838},{134,0
+,27},{134,0,395},{135,11,622},{7,11,625},{135,11,1750},{4,11,203},{135,11,1936},
+{6,10,118},{7,10,215},{7,10,1521},{140,10,11},{132,0,813},{136,0,511},{7,10,615}
+,{138,10,251},{135,10,1044},{145,0,56},{133,10,225},{6,0,342},{6,0,496},{8,0,275
+},{137,0,206},{4,0,909},{133,0,940},{132,0,891},{7,11,311},{9,11,308},{140,11,
+255},{4,10,370},{5,10,756},{135,10,1326},{4,0,687},{134,0,1596},{134,0,1342},{6,
+10,1662},{7,10,48},{8,10,771},{10,10,116},{13,10,104},{14,10,105},{14,10,184},{
+15,10,168},{19,10,92},{148,10,68},{138,10,209},{4,11,400},{5,11,267},{135,11,232
+},{151,11,12},{6,0,41},{141,0,160},{141,11,314},{134,0,1718},{136,0,778},{142,11
+,261},{134,0,1610},{133,0,115},{132,0,294},{14,0,314},{132,10,120},{132,0,983},{
+5,0,193},{140,0,178},{138,10,429},{5,10,820},{135,10,931},{6,0,994},{6,0,1051},{
+6,0,1439},{7,0,174},{133,11,732},{4,11,100},{7,11,679},{8,11,313},{138,10,199},{
+6,10,151},{6,10,1675},{7,10,383},{151,10,10},{6,0,1796},{8,0,848},{8,0,867},{8,0
+,907},{10,0,855},{140,0,703},{140,0,221},{4,0,122},{5,0,796},{5,0,952},{6,0,1660
+},{6,0,1671},{8,0,567},{9,0,687},{9,0,742},{10,0,686},{11,0,682},{11,0,909},{140
+,0,281},{5,11,362},{5,11,443},{6,11,318},{7,11,1019},{139,11,623},{5,11,463},{
+136,11,296},{11,0,583},{13,0,262},{6,10,1624},{12,10,422},{142,10,360},{5,0,179}
+,{7,0,1095},{135,0,1213},{4,10,43},{4,11,454},{5,10,344},{133,10,357},{4,0,66},{
+7,0,722},{135,0,904},{134,0,773},{7,0,352},{133,10,888},{5,11,48},{5,11,404},{6,
+11,557},{7,11,458},{8,11,597},{10,11,455},{10,11,606},{11,11,49},{11,11,548},{12
+,11,476},{13,11,18},{141,11,450},{134,11,418},{132,10,711},{5,11,442},{135,11,
+1984},{141,0,35},{137,0,152},{134,0,1197},{135,11,1093},{137,11,203},{137,10,440
+},{10,0,592},{10,0,753},{12,0,317},{12,0,355},{12,0,465},{12,0,469},{12,0,560},{
+12,0,578},{141,0,243},{133,0,564},{134,0,797},{5,10,958},{133,10,987},{5,11,55},
+{7,11,376},{140,11,161},{133,11,450},{134,0,556},{134,0,819},{11,10,276},{142,10
+,293},{7,0,544},{138,0,61},{8,0,719},{4,10,65},{5,10,479},{5,10,1004},{7,10,1913
+},{8,10,317},{9,10,302},{10,10,612},{141,10,22},{4,0,5},{5,0,498},{8,0,637},{9,0
+,521},{4,11,213},{4,10,261},{7,11,223},{7,10,510},{136,11,80},{5,0,927},{7,0,101
+},{4,10,291},{7,11,381},{7,11,806},{7,11,820},{8,11,354},{8,11,437},{8,11,787},{
+9,10,515},{9,11,657},{10,11,58},{10,11,339},{10,11,749},{11,11,914},{12,10,152},
+{12,11,162},{12,10,443},{13,11,75},{13,10,392},{14,11,106},{14,11,198},{14,11,
+320},{14,10,357},{14,11,413},{146,11,43},{6,0,1153},{7,0,1441},{136,11,747},{4,0
+,893},{5,0,780},{133,0,893},{138,11,654},{133,11,692},{133,0,238},{134,11,191},{
+4,10,130},{135,10,843},{6,0,1296},{5,10,42},{5,10,879},{7,10,245},{7,10,324},{7,
+10,1532},{11,10,463},{11,10,472},{13,10,363},{144,10,52},{134,0,1729},{6,0,1999}
+,{136,0,969},{4,10,134},{133,10,372},{4,0,60},{7,0,941},{7,0,1800},{8,0,314},{9,
+0,700},{139,0,487},{134,0,1144},{6,11,162},{7,11,1960},{136,11,831},{132,11,706}
+,{135,0,1147},{138,11,426},{138,11,89},{7,0,1853},{138,0,437},{136,0,419},{135,
+10,1634},{133,0,828},{5,0,806},{7,0,176},{7,0,178},{7,0,1240},{7,0,1976},{132,10
+,644},{135,11,1877},{5,11,420},{135,11,1449},{4,0,51},{5,0,39},{6,0,4},{7,0,591}
+,{7,0,849},{7,0,951},{7,0,1613},{7,0,1760},{7,0,1988},{9,0,434},{10,0,754},{11,0
+,25},{139,0,37},{10,11,57},{138,11,277},{135,10,540},{132,11,204},{135,0,159},{
+139,11,231},{133,0,902},{7,0,928},{7,11,366},{9,11,287},{12,11,199},{12,11,556},
+{140,11,577},{6,10,623},{136,10,789},{4,10,908},{5,10,359},{5,10,508},{6,10,1723
+},{7,10,343},{7,10,1996},{135,10,2026},{134,0,270},{4,10,341},{135,10,480},{5,11
+,356},{135,11,224},{11,11,588},{11,11,864},{11,11,968},{143,11,160},{132,0,556},
+{137,0,801},{132,0,416},{142,0,372},{5,0,152},{5,0,197},{7,0,340},{7,0,867},{10,
+0,548},{10,0,581},{11,0,6},{12,0,3},{12,0,19},{14,0,110},{142,0,289},{139,0,369}
+,{7,11,630},{9,11,567},{11,11,150},{11,11,444},{141,11,119},{134,11,539},{7,10,
+1995},{8,10,299},{11,10,890},{140,10,674},{7,0,34},{7,0,190},{8,0,28},{8,0,141},
+{8,0,444},{8,0,811},{9,0,468},{11,0,334},{12,0,24},{12,0,386},{140,0,576},{133,0
+,757},{7,0,1553},{136,0,898},{133,0,721},{136,0,1012},{4,0,789},{5,0,647},{135,0
+,1102},{132,0,898},{10,0,183},{4,10,238},{5,10,503},{6,10,179},{7,10,2003},{8,10
+,381},{8,10,473},{9,10,149},{10,10,788},{15,10,45},{15,10,86},{20,10,110},{150,
+10,57},{9,0,136},{19,0,107},{4,10,121},{5,10,156},{5,10,349},{10,10,605},{142,10
+,342},{4,11,235},{135,11,255},{4,11,194},{5,11,584},{6,11,384},{7,11,583},{10,11
+,761},{11,11,760},{139,11,851},{6,10,80},{6,10,1694},{7,10,173},{7,10,1974},{9,
+10,547},{10,10,730},{14,10,18},{150,10,39},{4,10,923},{134,10,1711},{5,0,277},{
+141,0,247},{132,0,435},{133,11,562},{134,0,1311},{5,11,191},{137,11,271},{132,10
+,595},{7,11,1537},{14,11,96},{143,11,73},{5,0,437},{7,0,502},{7,0,519},{7,0,1122
+},{7,0,1751},{14,0,211},{6,10,459},{7,10,1753},{7,10,1805},{8,10,658},{9,10,1},{
+11,10,959},{141,10,446},{6,0,814},{4,11,470},{5,11,473},{6,11,153},{7,11,1503},{
+7,11,1923},{10,11,701},{11,11,132},{11,11,168},{11,11,227},{11,11,320},{11,11,
+436},{11,11,525},{11,11,855},{12,11,41},{12,11,286},{13,11,103},{13,11,284},{14,
+11,255},{14,11,262},{15,11,117},{143,11,127},{5,0,265},{6,0,212},{135,0,28},{138
+,0,750},{133,11,327},{6,11,552},{7,11,1754},{137,11,604},{134,0,2012},{132,0,702
+},{5,11,80},{6,11,405},{7,11,403},{7,11,1502},{7,11,1626},{8,11,456},{9,11,487},
+{9,11,853},{9,11,889},{10,11,309},{11,11,721},{11,11,994},{12,11,430},{141,11,
+165},{5,0,808},{135,0,2045},{5,0,166},{8,0,739},{140,0,511},{134,10,490},{4,11,
+453},{5,11,887},{6,11,535},{8,11,6},{136,11,543},{4,0,119},{5,0,170},{5,0,447},{
+7,0,1708},{7,0,1889},{9,0,357},{9,0,719},{12,0,486},{140,0,596},{137,0,500},{7,
+10,250},{136,10,507},{132,10,158},{6,0,809},{134,0,1500},{9,0,327},{11,0,350},{
+11,0,831},{13,0,352},{4,10,140},{7,10,362},{8,10,209},{9,10,10},{9,10,503},{9,10
+,614},{10,10,689},{11,10,327},{11,10,725},{12,10,252},{12,10,583},{13,10,192},{
+14,10,269},{14,10,356},{148,10,50},{135,11,741},{4,0,450},{7,0,1158},{19,10,1},{
+19,10,26},{150,10,9},{6,0,597},{135,0,1318},{134,0,1602},{6,10,228},{7,10,1341},
+{9,10,408},{138,10,343},{7,0,1375},{7,0,1466},{138,0,331},{132,0,754},{132,10,
+557},{5,11,101},{6,11,88},{6,11,543},{7,11,1677},{9,11,100},{10,11,677},{14,11,
+169},{14,11,302},{14,11,313},{15,11,48},{143,11,84},{134,0,1368},{4,11,310},{9,
+11,795},{10,11,733},{11,11,451},{12,11,249},{14,11,115},{14,11,286},{143,11,100}
+,{132,10,548},{10,0,557},{7,10,197},{8,10,142},{8,10,325},{9,10,150},{9,10,596},
+{10,10,353},{11,10,74},{11,10,315},{12,10,662},{12,10,681},{14,10,423},{143,10,
+141},{133,11,587},{5,0,850},{136,0,799},{10,0,908},{12,0,701},{12,0,757},{142,0,
+466},{4,0,62},{5,0,275},{18,0,19},{6,10,399},{6,10,579},{7,10,692},{7,10,846},{7
+,10,1015},{7,10,1799},{8,10,403},{9,10,394},{10,10,133},{12,10,4},{12,10,297},{
+12,10,452},{16,10,81},{18,10,25},{21,10,14},{22,10,12},{151,10,18},{12,0,459},{7
+,10,1546},{11,10,299},{142,10,407},{132,10,177},{132,11,498},{7,11,217},{8,11,
+140},{138,11,610},{5,10,411},{135,10,653},{134,0,1802},{7,10,439},{10,10,727},{
+11,10,260},{139,10,684},{133,11,905},{11,11,580},{142,11,201},{134,0,1397},{5,10
+,208},{7,10,753},{135,10,1528},{7,0,238},{7,0,2033},{8,0,120},{8,0,188},{8,0,659
+},{9,0,598},{10,0,466},{12,0,342},{12,0,588},{13,0,503},{14,0,246},{143,0,92},{
+135,11,1041},{4,11,456},{7,11,105},{7,11,358},{7,11,1637},{8,11,643},{139,11,483
+},{6,0,1318},{134,0,1324},{4,0,201},{7,0,1744},{8,0,602},{11,0,247},{11,0,826},{
+17,0,65},{133,10,242},{8,0,164},{146,0,62},{133,10,953},{139,10,802},{133,0,615}
+,{7,11,1566},{8,11,269},{9,11,212},{9,11,718},{14,11,15},{14,11,132},{142,11,227
+},{133,10,290},{132,10,380},{5,10,52},{7,10,277},{9,10,368},{139,10,791},{135,0,
+1243},{133,11,539},{11,11,919},{141,11,409},{136,0,968},{133,11,470},{134,0,882}
+,{132,0,907},{5,0,100},{10,0,329},{12,0,416},{149,0,29},{10,10,138},{139,10,476}
+,{5,10,725},{5,10,727},{6,11,91},{7,11,435},{135,10,1811},{4,11,16},{5,11,316},{
+5,11,842},{6,11,370},{6,11,1778},{8,11,166},{11,11,812},{12,11,206},{12,11,351},
+{14,11,418},{16,11,15},{16,11,34},{18,11,3},{19,11,3},{19,11,7},{20,11,4},{149,
+11,21},{132,0,176},{5,0,636},{5,0,998},{7,0,9},{7,0,1508},{8,0,26},{9,0,317},{9,
+0,358},{10,0,210},{10,0,292},{10,0,533},{11,0,555},{12,0,526},{12,0,607},{13,0,
+263},{13,0,459},{142,0,271},{6,0,256},{8,0,265},{4,10,38},{7,10,307},{7,10,999},
+{7,10,1481},{7,10,1732},{7,10,1738},{9,10,414},{11,10,316},{12,10,52},{13,10,420
+},{147,10,100},{135,10,1296},{4,11,611},{133,11,606},{4,0,643},{142,11,21},{133,
+11,715},{133,10,723},{6,0,610},{135,11,597},{10,0,127},{141,0,27},{6,0,1995},{6,
+0,2001},{8,0,119},{136,0,973},{4,11,149},{138,11,368},{12,0,522},{4,11,154},{5,
+10,109},{6,10,1784},{7,11,1134},{7,10,1895},{8,11,105},{12,10,296},{140,10,302},
+{4,11,31},{6,11,429},{7,11,962},{9,11,458},{139,11,691},{10,0,553},{11,0,876},{
+13,0,193},{13,0,423},{14,0,166},{19,0,84},{4,11,312},{5,10,216},{7,10,1879},{9,
+10,141},{9,10,270},{9,10,679},{10,10,159},{11,10,197},{12,10,538},{12,10,559},{
+14,10,144},{14,10,167},{143,10,67},{134,0,1582},{7,0,1578},{135,11,1578},{137,10
+,81},{132,11,236},{134,10,391},{134,0,795},{7,10,322},{136,10,249},{5,11,836},{5
+,11,857},{6,11,1680},{7,11,59},{147,11,53},{135,0,432},{10,11,68},{139,11,494},{
+4,11,81},{139,11,867},{7,0,126},{136,0,84},{142,11,280},{5,11,282},{8,11,650},{9
+,11,295},{9,11,907},{138,11,443},{136,0,790},{5,10,632},{138,10,526},{6,0,64},{
+12,0,377},{13,0,309},{14,0,141},{14,0,429},{14,11,141},{142,11,429},{134,0,1529}
+,{6,0,321},{7,0,1857},{9,0,530},{19,0,99},{7,10,948},{7,10,1042},{8,10,235},{8,
+10,461},{9,10,453},{10,10,354},{145,10,77},{7,0,1104},{11,0,269},{11,0,539},{11,
+0,627},{11,0,706},{11,0,975},{12,0,248},{12,0,434},{12,0,600},{12,0,622},{13,0,
+297},{13,0,485},{14,0,69},{14,0,409},{143,0,108},{4,10,362},{7,10,52},{7,10,303}
+,{10,11,70},{12,11,26},{14,11,17},{14,11,178},{15,11,34},{149,11,12},{11,0,977},
+{141,0,507},{9,0,34},{139,0,484},{5,10,196},{6,10,486},{7,10,212},{8,10,309},{
+136,10,346},{6,0,1700},{7,0,26},{7,0,293},{7,0,382},{7,0,1026},{7,0,1087},{7,0,
+2027},{8,0,24},{8,0,114},{8,0,252},{8,0,727},{8,0,729},{9,0,30},{9,0,199},{9,0,
+231},{9,0,251},{9,0,334},{9,0,361},{9,0,712},{10,0,55},{10,0,60},{10,0,232},{10,
+0,332},{10,0,384},{10,0,396},{10,0,504},{10,0,542},{10,0,652},{11,0,20},{11,0,48
+},{11,0,207},{11,0,291},{11,0,298},{11,0,342},{11,0,365},{11,0,394},{11,0,620},{
+11,0,705},{11,0,1017},{12,0,123},{12,0,340},{12,0,406},{12,0,643},{13,0,61},{13,
+0,269},{13,0,311},{13,0,319},{13,0,486},{14,0,234},{15,0,62},{15,0,85},{16,0,71}
+,{18,0,119},{20,0,105},{135,10,1912},{4,11,71},{5,11,376},{7,11,119},{138,11,665
+},{10,0,918},{10,0,926},{4,10,686},{136,11,55},{138,10,625},{136,10,706},{132,11
+,479},{4,10,30},{133,10,43},{6,0,379},{7,0,270},{8,0,176},{8,0,183},{9,0,432},{9
+,0,661},{12,0,247},{12,0,617},{18,0,125},{7,11,607},{8,11,99},{152,11,4},{5,0,
+792},{133,0,900},{4,11,612},{133,11,561},{4,11,41},{4,10,220},{5,11,74},{7,10,
+1535},{7,11,1627},{11,11,871},{140,11,619},{135,0,1920},{7,11,94},{11,11,329},{
+11,11,965},{12,11,241},{14,11,354},{15,11,22},{148,11,63},{9,11,209},{137,11,300
+},{134,0,771},{135,0,1979},{4,0,901},{133,0,776},{142,0,254},{133,11,98},{9,11,
+16},{141,11,386},{133,11,984},{4,11,182},{6,11,205},{135,11,220},{7,10,1725},{7,
+10,1774},{138,10,393},{5,10,263},{134,10,414},{4,11,42},{9,11,205},{9,11,786},{
+138,11,659},{14,0,140},{148,0,41},{8,0,440},{10,0,359},{6,10,178},{6,11,289},{6,
+10,1750},{7,11,1670},{9,10,690},{10,10,155},{10,10,373},{11,10,698},{12,11,57},{
+13,10,155},{20,10,93},{151,11,4},{4,0,37},{5,0,334},{7,0,1253},{151,11,25},{4,0,
+508},{4,11,635},{5,10,97},{137,10,393},{139,11,533},{4,0,640},{133,0,513},{134,
+10,1639},{132,11,371},{4,11,272},{7,11,836},{7,11,1651},{145,11,89},{5,11,825},{
+6,11,444},{6,11,1640},{136,11,308},{4,10,191},{7,10,934},{8,10,647},{145,10,97},
+{12,0,246},{15,0,162},{19,0,64},{20,0,8},{20,0,95},{22,0,24},{152,0,17},{4,0,533
+},{5,10,165},{9,10,346},{138,10,655},{5,11,737},{139,10,885},{133,10,877},{8,10,
+128},{139,10,179},{137,11,307},{140,0,752},{133,0,920},{135,0,1048},{5,0,153},{6
+,0,580},{6,10,1663},{7,10,132},{7,10,1154},{7,10,1415},{7,10,1507},{12,10,493},{
+15,10,105},{151,10,15},{5,10,459},{7,10,1073},{8,10,241},{136,10,334},{138,0,391
+},{135,0,1952},{133,11,525},{8,11,641},{11,11,388},{140,11,580},{142,0,126},{134
+,0,640},{132,0,483},{7,0,1616},{9,0,69},{6,10,324},{6,10,520},{7,10,338},{7,10,
+1729},{8,10,228},{139,10,750},{5,11,493},{134,11,528},{135,0,734},{4,11,174},{
+135,11,911},{138,0,480},{9,0,495},{146,0,104},{135,10,705},{9,0,472},{4,10,73},{
+6,10,612},{7,10,927},{7,10,1330},{7,10,1822},{8,10,217},{9,10,765},{9,10,766},{
+10,10,408},{11,10,51},{11,10,793},{12,10,266},{15,10,158},{20,10,89},{150,10,32}
+,{7,11,548},{137,11,58},{4,11,32},{5,11,215},{6,11,269},{7,11,1782},{7,11,1892},
+{10,11,16},{11,11,822},{11,11,954},{141,11,481},{132,0,874},{9,0,229},{5,10,389}
+,{136,10,636},{7,11,1749},{136,11,477},{134,0,948},{5,11,308},{135,11,1088},{4,0
+,748},{139,0,1009},{136,10,21},{6,0,555},{135,0,485},{5,11,126},{8,11,297},{9,11
+,366},{9,11,445},{12,11,53},{12,11,374},{141,11,492},{7,11,1551},{139,11,361},{
+136,0,193},{136,0,472},{8,0,653},{13,0,93},{147,0,14},{132,0,984},{132,11,175},{
+5,0,172},{6,0,1971},{132,11,685},{149,11,8},{133,11,797},{13,0,83},{5,10,189},{7
+,10,442},{7,10,443},{8,10,281},{12,10,174},{141,10,261},{134,0,1568},{133,11,565
+},{139,0,384},{133,0,260},{7,0,758},{7,0,880},{7,0,1359},{9,0,164},{9,0,167},{10
+,0,156},{10,0,588},{12,0,101},{14,0,48},{15,0,70},{6,10,2},{7,10,1262},{7,10,
+1737},{8,10,22},{8,10,270},{8,10,612},{9,10,312},{9,10,436},{10,10,311},{10,10,
+623},{11,10,72},{11,10,330},{11,10,455},{12,10,321},{12,10,504},{12,10,530},{12,
+10,543},{13,10,17},{13,10,156},{13,10,334},{17,10,60},{148,10,64},{4,11,252},{7,
+11,1068},{10,11,434},{11,11,228},{11,11,426},{13,11,231},{18,11,106},{148,11,87}
+,{7,10,354},{10,10,410},{139,10,815},{6,0,367},{7,10,670},{7,10,1327},{8,10,411}
+,{8,10,435},{9,10,653},{9,10,740},{10,10,385},{11,10,222},{11,10,324},{11,10,829
+},{140,10,611},{7,0,1174},{6,10,166},{135,10,374},{146,0,121},{132,0,828},{5,11,
+231},{138,11,509},{7,11,601},{9,11,277},{9,11,674},{10,11,178},{10,11,257},{10,
+11,418},{11,11,531},{11,11,544},{11,11,585},{12,11,113},{12,11,475},{13,11,99},{
+142,11,428},{134,0,1541},{135,11,1779},{5,0,343},{134,10,398},{135,10,50},{135,
+11,1683},{4,0,440},{7,0,57},{8,0,167},{8,0,375},{9,0,82},{9,0,561},{9,0,744},{10
+,0,620},{137,11,744},{134,0,926},{6,10,517},{7,10,1159},{10,10,621},{139,10,192}
+,{137,0,827},{8,0,194},{136,0,756},{10,10,223},{139,10,645},{7,10,64},{136,10,
+245},{4,11,399},{5,11,119},{5,11,494},{7,11,751},{137,11,556},{132,0,808},{135,0
+,22},{7,10,1763},{140,10,310},{5,0,639},{7,0,1249},{11,0,896},{134,11,584},{134,
+0,1614},{135,0,860},{135,11,1121},{5,10,129},{6,10,61},{135,10,947},{4,0,102},{7
+,0,815},{7,0,1699},{139,0,964},{13,10,505},{141,10,506},{139,10,1000},{132,11,
+679},{132,0,899},{132,0,569},{5,11,694},{137,11,714},{136,0,795},{6,0,2045},{139
+,11,7},{6,0,52},{9,0,104},{9,0,559},{12,0,308},{147,0,87},{4,0,301},{132,0,604},
+{133,10,637},{136,0,779},{5,11,143},{5,11,769},{6,11,1760},{7,11,682},{7,11,1992
+},{136,11,736},{137,10,590},{147,0,32},{137,11,527},{5,10,280},{135,10,1226},{
+134,0,494},{6,0,677},{6,0,682},{134,0,1044},{133,10,281},{135,10,1064},{7,0,508}
+,{133,11,860},{6,11,422},{7,11,0},{7,11,1544},{9,11,577},{11,11,990},{12,11,141}
+,{12,11,453},{13,11,47},{141,11,266},{134,0,1014},{5,11,515},{137,11,131},{134,0
+,957},{132,11,646},{6,0,310},{7,0,1849},{8,0,72},{8,0,272},{8,0,431},{9,0,12},{9
+,0,376},{10,0,563},{10,0,630},{10,0,796},{10,0,810},{11,0,367},{11,0,599},{11,0,
+686},{140,0,672},{7,0,570},{4,11,396},{7,10,120},{7,11,728},{8,10,489},{9,11,117
+},{9,10,319},{10,10,820},{11,10,1004},{12,10,379},{12,10,679},{13,10,117},{13,11
+,202},{13,10,412},{14,10,25},{15,10,52},{15,10,161},{16,10,47},{20,11,51},{149,
+10,2},{6,11,121},{6,11,124},{6,11,357},{7,11,1138},{7,11,1295},{8,11,162},{139,
+11,655},{8,0,449},{4,10,937},{5,10,801},{136,11,449},{139,11,958},{6,0,181},{7,0
+,537},{8,0,64},{9,0,127},{10,0,496},{12,0,510},{141,0,384},{138,11,253},{4,0,244
+},{135,0,233},{133,11,237},{132,10,365},{6,0,1650},{10,0,702},{139,0,245},{5,10,
+7},{139,10,774},{13,0,463},{20,0,49},{13,11,463},{148,11,49},{4,10,734},{5,10,
+662},{134,10,430},{4,10,746},{135,10,1090},{5,10,360},{136,10,237},{137,0,338},{
+143,11,10},{7,11,571},{138,11,366},{134,0,1279},{9,11,513},{10,11,22},{10,11,39}
+,{12,11,122},{140,11,187},{133,0,896},{146,0,178},{134,0,695},{137,0,808},{134,
+11,587},{7,11,107},{7,11,838},{8,11,550},{138,11,401},{7,0,1117},{136,0,539},{4,
+10,277},{5,10,608},{6,10,493},{7,10,457},{140,10,384},{133,11,768},{12,0,257},{7
+,10,27},{135,10,316},{140,0,1003},{4,0,207},{5,0,586},{5,0,676},{6,0,448},{8,0,
+244},{11,0,1},{13,0,3},{16,0,54},{17,0,4},{18,0,13},{133,10,552},{4,10,401},{137
+,10,264},{5,0,516},{7,0,1883},{135,11,1883},{12,0,960},{132,11,894},{5,0,4},{5,0
+,810},{6,0,13},{6,0,538},{6,0,1690},{6,0,1726},{7,0,499},{7,0,1819},{8,0,148},{8
+,0,696},{8,0,791},{12,0,125},{143,0,9},{135,0,1268},{11,0,30},{14,0,315},{9,10,
+543},{10,10,524},{12,10,524},{16,10,18},{20,10,26},{148,10,65},{6,0,748},{4,10,
+205},{5,10,623},{7,10,104},{136,10,519},{11,0,542},{139,0,852},{140,0,6},{132,0,
+848},{7,0,1385},{11,0,582},{11,0,650},{11,0,901},{11,0,949},{12,0,232},{12,0,236
+},{13,0,413},{13,0,501},{18,0,116},{7,10,579},{9,10,41},{9,10,244},{9,10,669},{
+10,10,5},{11,10,861},{11,10,951},{139,10,980},{4,0,945},{6,0,1811},{6,0,1845},{6
+,0,1853},{6,0,1858},{8,0,862},{12,0,782},{12,0,788},{18,0,160},{148,0,117},{132,
+10,717},{4,0,925},{5,0,803},{8,0,698},{138,0,828},{134,0,1416},{132,0,610},{139,
+0,992},{6,0,878},{134,0,1477},{135,0,1847},{138,11,531},{137,11,539},{134,11,272
+},{133,0,383},{134,0,1404},{132,10,489},{4,11,9},{5,11,128},{7,11,368},{11,11,
+480},{148,11,3},{136,0,986},{9,0,660},{138,0,347},{135,10,892},{136,11,682},{7,0
+,572},{9,0,592},{11,0,680},{12,0,356},{140,0,550},{7,0,1411},{138,11,527},{4,11,
+2},{7,11,545},{135,11,894},{137,10,473},{11,0,64},{7,11,481},{7,10,819},{9,10,26
+},{9,10,392},{9,11,792},{10,10,152},{10,10,226},{12,10,276},{12,10,426},{12,10,
+589},{13,10,460},{15,10,97},{19,10,48},{148,10,104},{135,10,51},{136,11,445},{
+136,11,646},{135,0,606},{132,10,674},{6,0,1829},{134,0,1830},{132,10,770},{5,10,
+79},{7,10,1027},{7,10,1477},{139,10,52},{5,11,530},{142,11,113},{134,10,1666},{7
+,0,748},{139,0,700},{134,10,195},{133,10,789},{9,0,87},{10,0,365},{4,10,251},{4,
+10,688},{7,10,513},{135,10,1284},{136,11,111},{133,0,127},{6,0,198},{140,0,83},{
+133,11,556},{133,10,889},{4,10,160},{5,10,330},{7,10,1434},{136,10,174},{5,0,276
+},{6,0,55},{7,0,1369},{138,0,864},{8,11,16},{140,11,568},{6,0,1752},{136,0,726},
+{135,0,1066},{133,0,764},{6,11,186},{137,11,426},{11,0,683},{139,11,683},{6,0,
+309},{7,0,331},{138,0,550},{133,10,374},{6,0,1212},{6,0,1852},{7,0,1062},{8,0,
+874},{8,0,882},{138,0,936},{132,11,585},{134,0,1364},{7,0,986},{133,10,731},{6,0
+,723},{6,0,1408},{138,0,381},{135,0,1573},{134,0,1025},{4,10,626},{5,10,642},{6,
+10,425},{10,10,202},{139,10,141},{4,11,93},{5,11,252},{6,11,229},{7,11,291},{9,
+11,550},{139,11,644},{137,11,749},{137,11,162},{132,11,381},{135,0,1559},{6,0,
+194},{7,0,133},{10,0,493},{10,0,570},{139,0,664},{5,0,24},{5,0,569},{6,0,3},{6,0
+,119},{6,0,143},{6,0,440},{7,0,295},{7,0,599},{7,0,1686},{7,0,1854},{8,0,424},{9
+,0,43},{9,0,584},{9,0,760},{10,0,148},{10,0,328},{11,0,159},{11,0,253},{11,0,506
+},{12,0,487},{140,0,531},{6,0,661},{134,0,1517},{136,10,835},{151,10,17},{5,0,14
+},{5,0,892},{6,0,283},{7,0,234},{136,0,537},{139,0,541},{4,0,126},{8,0,635},{147
+,0,34},{4,0,316},{4,0,495},{135,0,1561},{4,11,187},{5,11,184},{5,11,690},{7,11,
+1869},{138,11,756},{139,11,783},{4,0,998},{137,0,861},{136,0,1009},{139,11,292},
+{5,11,21},{6,11,77},{6,11,157},{7,11,974},{7,11,1301},{7,11,1339},{7,11,1490},{7
+,11,1873},{137,11,628},{7,11,1283},{9,11,227},{9,11,499},{10,11,341},{11,11,325}
+,{11,11,408},{14,11,180},{15,11,144},{18,11,47},{147,11,49},{4,0,64},{5,0,352},{
+5,0,720},{6,0,368},{139,0,359},{5,10,384},{8,10,455},{140,10,48},{5,10,264},{134
+,10,184},{7,0,1577},{10,0,304},{10,0,549},{12,0,365},{13,0,220},{13,0,240},{142,
+0,33},{134,0,1107},{134,0,929},{135,0,1142},{6,0,175},{137,0,289},{5,0,432},{133
+,0,913},{6,0,279},{7,0,219},{5,10,633},{135,10,1323},{7,0,785},{7,10,359},{8,10,
+243},{140,10,175},{139,0,595},{132,10,105},{8,11,398},{9,11,681},{139,11,632},{
+140,0,80},{5,0,931},{134,0,1698},{142,11,241},{134,11,20},{134,0,1323},{11,0,526
+},{11,0,939},{141,0,290},{5,0,774},{6,0,780},{6,0,1637},{6,0,1686},{6,0,1751},{8
+,0,559},{141,0,109},{141,0,127},{7,0,1167},{11,0,934},{13,0,391},{17,0,76},{135,
+11,709},{135,0,963},{6,0,260},{135,0,1484},{134,0,573},{4,10,758},{139,11,941},{
+135,10,1649},{145,11,36},{4,0,292},{137,0,580},{4,0,736},{5,0,871},{6,0,1689},{
+135,0,1944},{7,11,945},{11,11,713},{139,11,744},{134,0,1164},{135,11,937},{6,0,
+1922},{9,0,982},{15,0,173},{15,0,178},{15,0,200},{18,0,189},{18,0,207},{21,0,47}
+,{135,11,1652},{7,0,1695},{139,10,128},{6,0,63},{135,0,920},{133,0,793},{143,11,
+134},{133,10,918},{5,0,67},{6,0,62},{6,0,374},{135,0,1391},{9,0,790},{12,0,47},{
+4,11,579},{5,11,226},{5,11,323},{135,11,960},{10,11,784},{141,11,191},{4,0,391},
+{135,0,1169},{137,0,443},{13,11,232},{146,11,35},{132,10,340},{132,0,271},{137,
+11,313},{5,11,973},{137,11,659},{134,0,1140},{6,11,135},{135,11,1176},{4,0,253},
+{5,0,544},{7,0,300},{137,0,340},{7,0,897},{5,10,985},{7,10,509},{145,10,96},{138
+,11,735},{135,10,1919},{138,0,890},{5,0,818},{134,0,1122},{5,0,53},{5,0,541},{6,
+0,94},{6,0,499},{7,0,230},{139,0,321},{4,0,920},{5,0,25},{5,0,790},{6,0,457},{7,
+0,853},{8,0,788},{142,11,31},{132,10,247},{135,11,314},{132,0,468},{7,0,243},{6,
+10,337},{7,10,494},{8,10,27},{8,10,599},{138,10,153},{4,10,184},{5,10,390},{7,10
+,618},{7,10,1456},{139,10,710},{134,0,870},{134,0,1238},{134,0,1765},{10,0,853},
+{10,0,943},{14,0,437},{14,0,439},{14,0,443},{14,0,446},{14,0,452},{14,0,469},{14
+,0,471},{14,0,473},{16,0,93},{16,0,102},{16,0,110},{148,0,121},{4,0,605},{7,0,
+518},{7,0,1282},{7,0,1918},{10,0,180},{139,0,218},{133,0,822},{4,0,634},{11,0,
+916},{142,0,419},{6,11,281},{7,11,6},{8,11,282},{8,11,480},{8,11,499},{9,11,198}
+,{10,11,143},{10,11,169},{10,11,211},{10,11,417},{10,11,574},{11,11,147},{11,11,
+395},{12,11,75},{12,11,407},{12,11,608},{13,11,500},{142,11,251},{134,0,898},{6,
+0,36},{7,0,658},{8,0,454},{150,11,48},{133,11,674},{135,11,1776},{4,11,419},{10,
+10,227},{11,10,497},{11,10,709},{140,10,415},{6,10,360},{7,10,1664},{136,10,478}
+,{137,0,806},{12,11,508},{14,11,102},{14,11,226},{144,11,57},{135,11,1123},{4,11
+,138},{7,11,1012},{7,11,1280},{137,11,76},{5,11,29},{140,11,638},{136,10,699},{
+134,0,1326},{132,0,104},{135,11,735},{132,10,739},{134,0,1331},{7,0,260},{135,11
+,260},{135,11,1063},{7,0,45},{9,0,542},{9,0,566},{10,0,728},{137,10,869},{4,10,
+67},{5,10,422},{7,10,1037},{7,10,1289},{7,10,1555},{9,10,741},{145,10,108},{139,
+0,263},{134,0,1516},{14,0,146},{15,0,42},{16,0,23},{17,0,86},{146,0,17},{138,0,
+468},{136,0,1005},{4,11,17},{5,11,23},{7,11,995},{11,11,383},{11,11,437},{12,11,
+460},{140,11,532},{7,0,87},{142,0,288},{138,10,96},{135,11,626},{144,10,26},{7,0
+,988},{7,0,1939},{9,0,64},{9,0,502},{12,0,22},{12,0,34},{13,0,12},{13,0,234},{
+147,0,77},{13,0,133},{8,10,203},{11,10,823},{11,10,846},{12,10,482},{13,10,277},
+{13,10,302},{13,10,464},{14,10,205},{142,10,221},{4,10,449},{133,10,718},{135,0,
+141},{6,0,1842},{136,0,872},{8,11,70},{12,11,171},{141,11,272},{4,10,355},{6,10,
+311},{9,10,256},{138,10,404},{132,0,619},{137,0,261},{10,11,233},{10,10,758},{
+139,11,76},{5,0,246},{8,0,189},{9,0,355},{9,0,512},{10,0,124},{10,0,453},{11,0,
+143},{11,0,416},{11,0,859},{141,0,341},{134,11,442},{133,10,827},{5,10,64},{140,
+10,581},{4,10,442},{7,10,1047},{7,10,1352},{135,10,1643},{134,11,1709},{5,0,678}
+,{6,0,305},{7,0,775},{7,0,1065},{133,10,977},{11,11,69},{12,11,105},{12,11,117},
+{13,11,213},{14,11,13},{14,11,62},{14,11,177},{14,11,421},{15,11,19},{146,11,141
+},{137,11,309},{5,0,35},{7,0,862},{7,0,1886},{138,0,179},{136,0,285},{132,0,517}
+,{7,11,976},{9,11,146},{10,11,206},{10,11,596},{13,11,218},{142,11,153},{132,10,
+254},{6,0,214},{12,0,540},{4,10,275},{7,10,1219},{140,10,376},{8,0,667},{11,0,
+403},{146,0,83},{12,0,74},{10,11,648},{11,11,671},{143,11,46},{135,0,125},{134,
+10,1753},{133,0,761},{6,0,912},{4,11,518},{6,10,369},{6,10,502},{7,10,1036},{7,
+11,1136},{8,10,348},{9,10,452},{10,10,26},{11,10,224},{11,10,387},{11,10,772},{
+12,10,95},{12,10,629},{13,10,195},{13,10,207},{13,10,241},{14,10,260},{14,10,270
+},{143,10,140},{10,0,131},{140,0,72},{132,10,269},{5,10,480},{7,10,532},{7,10,
+1197},{7,10,1358},{8,10,291},{11,10,349},{142,10,396},{8,11,689},{137,11,863},{8
+,0,333},{138,0,182},{4,11,18},{7,11,145},{7,11,444},{7,11,1278},{8,11,49},{8,11,
+400},{9,11,71},{9,11,250},{10,11,459},{12,11,160},{144,11,24},{14,11,35},{142,11
+,191},{135,11,1864},{135,0,1338},{148,10,15},{14,0,94},{15,0,65},{16,0,4},{16,0,
+77},{16,0,80},{145,0,5},{12,11,82},{143,11,36},{133,11,1010},{133,0,449},{133,0,
+646},{7,0,86},{8,0,103},{135,10,657},{7,0,2028},{138,0,641},{136,10,533},{134,0,
+1},{139,11,970},{5,11,87},{7,11,313},{7,11,1103},{10,11,112},{10,11,582},{11,11,
+389},{11,11,813},{12,11,385},{13,11,286},{14,11,124},{146,11,108},{6,0,869},{132
+,11,267},{6,0,277},{7,0,1274},{7,0,1386},{146,0,87},{6,0,187},{7,0,39},{7,0,1203
+},{8,0,380},{14,0,117},{149,0,28},{4,10,211},{4,10,332},{5,10,335},{6,10,238},{7
+,10,269},{7,10,811},{7,10,1797},{8,10,836},{9,10,507},{141,10,242},{4,0,785},{5,
+0,368},{6,0,297},{7,0,793},{139,0,938},{7,0,464},{8,0,558},{11,0,105},{12,0,231}
+,{14,0,386},{15,0,102},{148,0,75},{133,10,1009},{8,0,877},{140,0,731},{139,11,
+289},{10,11,249},{139,11,209},{132,11,561},{134,0,1608},{132,11,760},{134,0,1429
+},{9,11,154},{140,11,485},{5,10,228},{6,10,203},{7,10,156},{8,10,347},{137,10,
+265},{7,0,1010},{11,0,733},{11,0,759},{13,0,34},{14,0,427},{146,0,45},{7,10,1131
+},{135,10,1468},{136,11,255},{7,0,1656},{9,0,369},{10,0,338},{10,0,490},{11,0,
+154},{11,0,545},{11,0,775},{13,0,77},{141,0,274},{133,11,621},{134,0,1038},{4,11
+,368},{135,11,641},{6,0,2010},{8,0,979},{8,0,985},{10,0,951},{138,0,1011},{134,0
+,1005},{19,0,121},{5,10,291},{5,10,318},{7,10,765},{9,10,389},{140,10,548},{5,0,
+20},{6,0,298},{7,0,659},{137,0,219},{7,0,1440},{11,0,854},{11,0,872},{11,0,921},
+{12,0,551},{13,0,472},{142,0,367},{5,0,490},{6,0,615},{6,0,620},{135,0,683},{6,0
+,1070},{134,0,1597},{139,0,522},{132,0,439},{136,0,669},{6,0,766},{6,0,1143},{6,
+0,1245},{10,10,525},{139,10,82},{9,11,92},{147,11,91},{6,0,668},{134,0,1218},{6,
+11,525},{9,11,876},{140,11,284},{132,0,233},{136,0,547},{132,10,422},{5,10,355},
+{145,10,0},{6,11,300},{135,11,1515},{4,0,482},{137,10,905},{4,0,886},{7,0,346},{
+133,11,594},{133,10,865},{5,10,914},{134,10,1625},{135,0,334},{5,0,795},{6,0,
+1741},{133,10,234},{135,10,1383},{6,11,1641},{136,11,820},{135,0,371},{7,11,1313
+},{138,11,660},{135,10,1312},{135,0,622},{7,0,625},{135,0,1750},{135,0,339},{4,0
+,203},{135,0,1936},{15,0,29},{16,0,38},{15,11,29},{144,11,38},{5,0,338},{135,0,
+1256},{135,10,1493},{10,0,130},{6,10,421},{7,10,61},{7,10,1540},{138,10,501},{6,
+11,389},{7,11,149},{9,11,142},{138,11,94},{137,10,341},{11,0,678},{12,0,307},{
+142,10,98},{6,11,8},{7,11,1881},{136,11,91},{135,0,2044},{6,0,770},{6,0,802},{6,
+0,812},{7,0,311},{9,0,308},{12,0,255},{6,10,102},{7,10,72},{15,10,142},{147,10,
+67},{151,10,30},{135,10,823},{135,0,1266},{135,11,1746},{135,10,1870},{4,0,400},
+{5,0,267},{135,0,232},{7,11,24},{11,11,542},{139,11,852},{135,11,1739},{4,11,503
+},{135,11,1661},{5,11,130},{7,11,1314},{9,11,610},{10,11,718},{11,11,601},{11,11
+,819},{11,11,946},{140,11,536},{10,11,149},{11,11,280},{142,11,336},{7,0,739},{
+11,0,690},{7,11,1946},{8,10,48},{8,10,88},{8,10,582},{8,10,681},{9,10,373},{9,10
+,864},{11,10,157},{11,10,843},{148,10,27},{134,0,990},{4,10,88},{5,10,137},{5,10
+,174},{5,10,777},{6,10,1664},{6,10,1725},{7,10,77},{7,10,426},{7,10,1317},{7,10,
+1355},{8,10,126},{8,10,563},{9,10,523},{9,10,750},{10,10,310},{10,10,836},{11,10
+,42},{11,10,318},{11,10,731},{12,10,68},{12,10,92},{12,10,507},{12,10,692},{13,
+10,81},{13,10,238},{13,10,374},{14,10,436},{18,10,138},{19,10,78},{19,10,111},{
+20,10,55},{20,10,77},{148,10,92},{141,10,418},{7,0,1831},{132,10,938},{6,0,776},
+{134,0,915},{138,10,351},{5,11,348},{6,11,522},{6,10,1668},{7,10,1499},{8,10,117
+},{9,10,314},{138,10,174},{135,10,707},{132,0,613},{133,10,403},{132,11,392},{5,
+11,433},{9,11,633},{139,11,629},{133,0,763},{132,0,878},{132,0,977},{132,0,100},
+{6,0,463},{4,10,44},{5,10,311},{7,10,639},{7,10,762},{7,10,1827},{9,10,8},{9,10,
+462},{148,10,83},{134,11,234},{4,10,346},{7,10,115},{9,10,180},{9,10,456},{138,
+10,363},{5,0,362},{5,0,443},{6,0,318},{7,0,1019},{139,0,623},{5,0,463},{8,0,296}
+,{7,11,140},{7,11,1950},{8,11,680},{11,11,817},{147,11,88},{7,11,1222},{138,11,
+386},{142,0,137},{132,0,454},{7,0,1914},{6,11,5},{7,10,1051},{9,10,545},{11,11,
+249},{12,11,313},{16,11,66},{145,11,26},{135,0,1527},{145,0,58},{148,11,59},{5,0
+,48},{5,0,404},{6,0,557},{7,0,458},{8,0,597},{10,0,455},{10,0,606},{11,0,49},{11
+,0,548},{12,0,476},{13,0,18},{141,0,450},{5,11,963},{134,11,1773},{133,0,729},{
+138,11,586},{5,0,442},{135,0,1984},{134,0,449},{144,0,40},{4,0,853},{7,11,180},{
+8,11,509},{136,11,792},{6,10,185},{7,10,1899},{9,10,875},{139,10,673},{134,11,
+524},{12,0,227},{4,10,327},{5,10,478},{7,10,1332},{136,10,753},{6,0,1491},{5,10,
+1020},{133,10,1022},{4,10,103},{133,10,401},{132,11,931},{4,10,499},{135,10,1421
+},{5,0,55},{7,0,376},{140,0,161},{133,0,450},{6,0,1174},{134,0,1562},{10,0,62},{
+13,0,400},{135,11,1837},{140,0,207},{135,0,869},{4,11,773},{5,11,618},{137,11,
+756},{132,10,96},{4,0,213},{7,0,223},{8,0,80},{135,10,968},{4,11,90},{5,11,337},
+{5,11,545},{7,11,754},{9,11,186},{10,11,72},{10,11,782},{11,11,513},{11,11,577},
+{11,11,610},{11,11,889},{11,11,961},{12,11,354},{12,11,362},{12,11,461},{12,11,
+595},{13,11,79},{143,11,121},{7,0,381},{7,0,806},{7,0,820},{8,0,354},{8,0,437},{
+8,0,787},{9,0,657},{10,0,58},{10,0,339},{10,0,749},{11,0,914},{12,0,162},{13,0,
+75},{14,0,106},{14,0,198},{14,0,320},{14,0,413},{146,0,43},{136,0,747},{136,0,
+954},{134,0,1073},{135,0,556},{7,11,151},{9,11,329},{139,11,254},{5,0,692},{134,
+0,1395},{6,10,563},{137,10,224},{134,0,191},{132,0,804},{9,11,187},{10,11,36},{
+17,11,44},{146,11,64},{7,11,165},{7,11,919},{136,11,517},{4,11,506},{5,11,295},{
+7,11,1680},{15,11,14},{144,11,5},{4,0,706},{6,0,162},{7,0,1960},{136,0,831},{135
+,11,1376},{7,11,987},{9,11,688},{10,11,522},{11,11,788},{140,11,566},{150,0,35},
+{138,0,426},{135,0,1235},{135,11,1741},{7,11,389},{7,11,700},{7,11,940},{8,11,
+514},{9,11,116},{9,11,535},{10,11,118},{11,11,107},{11,11,148},{11,11,922},{12,
+11,254},{12,11,421},{142,11,238},{134,0,1234},{132,11,743},{4,10,910},{5,10,832}
+,{135,11,1335},{141,0,96},{135,11,185},{146,0,149},{4,0,204},{137,0,902},{4,11,
+784},{133,11,745},{136,0,833},{136,0,949},{7,0,366},{9,0,287},{12,0,199},{12,0,
+556},{12,0,577},{5,11,81},{7,11,146},{7,11,1342},{7,11,1446},{8,11,53},{8,11,561
+},{8,11,694},{8,11,754},{9,11,97},{9,11,115},{9,11,894},{10,11,462},{10,11,813},
+{11,11,230},{11,11,657},{11,11,699},{11,11,748},{12,11,119},{12,11,200},{12,11,
+283},{14,11,273},{145,11,15},{5,11,408},{137,11,747},{9,11,498},{140,11,181},{6,
+0,2020},{136,0,992},{5,0,356},{135,0,224},{134,0,784},{7,0,630},{9,0,567},{11,0,
+150},{11,0,444},{13,0,119},{8,10,528},{137,10,348},{134,0,539},{4,10,20},{133,10
+,616},{142,0,27},{7,11,30},{8,11,86},{8,11,315},{8,11,700},{9,11,576},{9,11,858}
+,{11,11,310},{11,11,888},{11,11,904},{12,11,361},{141,11,248},{138,11,839},{134,
+0,755},{134,0,1063},{7,10,1091},{135,10,1765},{134,11,428},{7,11,524},{8,11,169}
+,{8,11,234},{9,11,480},{138,11,646},{139,0,814},{7,11,1462},{139,11,659},{4,10,
+26},{5,10,429},{6,10,245},{7,10,704},{7,10,1379},{135,10,1474},{7,11,1205},{138,
+11,637},{139,11,803},{132,10,621},{136,0,987},{4,11,266},{8,11,4},{9,11,39},{10,
+11,166},{11,11,918},{12,11,635},{20,11,10},{22,11,27},{150,11,43},{4,0,235},{135
+,0,255},{4,0,194},{5,0,584},{6,0,384},{7,0,583},{10,0,761},{11,0,760},{139,0,851
+},{133,10,542},{134,0,1086},{133,10,868},{8,0,1016},{136,0,1018},{7,0,1396},{7,
+11,1396},{136,10,433},{135,10,1495},{138,10,215},{141,10,124},{7,11,157},{8,11,
+279},{9,11,759},{16,11,31},{16,11,39},{16,11,75},{18,11,24},{20,11,42},{152,11,1
+},{5,0,562},{134,11,604},{134,0,913},{5,0,191},{137,0,271},{4,0,470},{6,0,153},{
+7,0,1503},{7,0,1923},{10,0,701},{11,0,132},{11,0,227},{11,0,320},{11,0,436},{11,
+0,525},{11,0,855},{11,0,873},{12,0,41},{12,0,286},{13,0,103},{13,0,284},{14,0,
+255},{14,0,262},{15,0,117},{143,0,127},{7,0,475},{12,0,45},{147,10,112},{132,11,
+567},{137,11,859},{6,0,713},{6,0,969},{6,0,1290},{134,0,1551},{133,0,327},{6,0,
+552},{6,0,1292},{7,0,1754},{137,0,604},{4,0,223},{6,0,359},{11,0,3},{13,0,108},{
+14,0,89},{16,0,22},{5,11,762},{7,11,1880},{9,11,680},{139,11,798},{5,0,80},{6,0,
+405},{7,0,403},{7,0,1502},{8,0,456},{9,0,487},{9,0,853},{9,0,889},{10,0,309},{11
+,0,721},{11,0,994},{12,0,430},{141,0,165},{133,11,298},{132,10,647},{134,0,2016}
+,{18,10,10},{146,11,10},{4,0,453},{5,0,887},{6,0,535},{8,0,6},{8,0,543},{136,0,
+826},{136,0,975},{10,0,961},{138,0,962},{138,10,220},{6,0,1891},{6,0,1893},{9,0,
+916},{9,0,965},{9,0,972},{12,0,801},{12,0,859},{12,0,883},{15,0,226},{149,0,51},
+{132,10,109},{135,11,267},{7,11,92},{7,11,182},{8,11,453},{9,11,204},{11,11,950}
+,{12,11,94},{12,11,644},{16,11,20},{16,11,70},{16,11,90},{147,11,55},{134,10,
+1746},{6,11,71},{7,11,845},{7,11,1308},{8,11,160},{137,11,318},{5,0,101},{6,0,88
+},{7,0,263},{7,0,628},{7,0,1677},{8,0,349},{9,0,100},{10,0,677},{14,0,169},{14,0
+,302},{14,0,313},{15,0,48},{15,0,84},{7,11,237},{8,11,664},{9,11,42},{9,11,266},
+{9,11,380},{9,11,645},{10,11,177},{138,11,276},{138,11,69},{4,0,310},{7,0,708},{
+7,0,996},{9,0,795},{10,0,390},{10,0,733},{11,0,451},{12,0,249},{14,0,115},{14,0,
+286},{143,0,100},{5,0,587},{4,10,40},{10,10,67},{11,10,117},{11,10,768},{139,10,
+935},{6,0,1942},{7,0,512},{136,0,983},{7,10,992},{8,10,301},{9,10,722},{12,10,63
+},{13,10,29},{14,10,161},{143,10,18},{136,11,76},{139,10,923},{134,0,645},{134,0
+,851},{4,0,498},{132,11,293},{7,0,217},{8,0,140},{10,0,610},{14,11,352},{17,11,
+53},{18,11,146},{18,11,152},{19,11,11},{150,11,54},{134,0,1448},{138,11,841},{
+133,0,905},{4,11,605},{7,11,518},{7,11,1282},{7,11,1918},{10,11,180},{139,11,218
+},{139,11,917},{135,10,825},{140,10,328},{4,0,456},{7,0,105},{7,0,358},{7,0,1637
+},{8,0,643},{139,0,483},{134,0,792},{6,11,96},{135,11,1426},{137,11,691},{4,11,
+651},{133,11,289},{7,11,688},{8,11,35},{9,11,511},{10,11,767},{147,11,118},{150,
+0,56},{5,0,243},{5,0,535},{6,10,204},{10,10,320},{10,10,583},{13,10,502},{14,10,
+72},{14,10,274},{14,10,312},{14,10,344},{15,10,159},{16,10,62},{16,10,69},{17,10
+,30},{18,10,42},{18,10,53},{18,10,84},{18,10,140},{19,10,68},{19,10,85},{20,10,5
+},{20,10,45},{20,10,101},{22,10,7},{150,10,20},{4,10,558},{6,10,390},{7,10,162},
+{7,10,689},{9,10,360},{138,10,653},{146,11,23},{135,0,1748},{5,10,856},{6,10,
+1672},{6,10,1757},{134,10,1781},{5,0,539},{5,0,754},{6,0,876},{132,11,704},{135,
+11,1078},{5,10,92},{10,10,736},{140,10,102},{17,0,91},{5,10,590},{137,10,213},{
+134,0,1565},{6,0,91},{135,0,435},{4,0,939},{140,0,792},{134,0,1399},{4,0,16},{5,
+0,316},{5,0,842},{6,0,370},{6,0,1778},{8,0,166},{11,0,812},{12,0,206},{12,0,351}
+,{14,0,418},{16,0,15},{16,0,34},{18,0,3},{19,0,3},{19,0,7},{20,0,4},{21,0,21},{4
+,11,720},{133,11,306},{144,0,95},{133,11,431},{132,11,234},{135,0,551},{4,0,999}
+,{6,0,1966},{134,0,2042},{7,0,619},{10,0,547},{11,0,122},{12,0,601},{15,0,7},{
+148,0,20},{5,11,464},{6,11,236},{7,11,276},{7,11,696},{7,11,914},{7,11,1108},{7,
+11,1448},{9,11,15},{9,11,564},{10,11,14},{12,11,565},{13,11,449},{14,11,53},{15,
+11,13},{16,11,64},{145,11,41},{6,0,884},{6,0,1019},{134,0,1150},{6,11,1767},{12,
+11,194},{145,11,107},{136,10,503},{133,11,840},{7,0,671},{134,10,466},{132,0,888
+},{4,0,149},{138,0,368},{4,0,154},{7,0,1134},{136,0,105},{135,0,983},{9,11,642},
+{11,11,236},{142,11,193},{4,0,31},{6,0,429},{7,0,962},{9,0,458},{139,0,691},{6,0
+,643},{134,0,1102},{132,0,312},{4,11,68},{5,11,634},{6,11,386},{7,11,794},{8,11,
+273},{9,11,563},{10,11,105},{10,11,171},{11,11,94},{139,11,354},{133,0,740},{135
+,0,1642},{4,11,95},{7,11,416},{8,11,211},{139,11,830},{132,0,236},{138,10,241},{
+7,11,731},{13,11,20},{143,11,11},{5,0,836},{5,0,857},{6,0,1680},{135,0,59},{10,0
+,68},{11,0,494},{152,11,6},{4,0,81},{139,0,867},{135,0,795},{133,11,689},{4,0,
+1001},{5,0,282},{6,0,1932},{6,0,1977},{6,0,1987},{6,0,1992},{8,0,650},{8,0,919},
+{8,0,920},{8,0,923},{8,0,926},{8,0,927},{8,0,931},{8,0,939},{8,0,947},{8,0,956},
+{8,0,997},{9,0,907},{10,0,950},{10,0,953},{10,0,954},{10,0,956},{10,0,958},{10,0
+,959},{10,0,964},{10,0,970},{10,0,972},{10,0,973},{10,0,975},{10,0,976},{10,0,
+980},{10,0,981},{10,0,984},{10,0,988},{10,0,990},{10,0,995},{10,0,999},{10,0,
+1002},{10,0,1003},{10,0,1005},{10,0,1006},{10,0,1008},{10,0,1009},{10,0,1012},{
+10,0,1014},{10,0,1015},{10,0,1019},{10,0,1020},{10,0,1022},{12,0,959},{12,0,961}
+,{12,0,962},{12,0,963},{12,0,964},{12,0,965},{12,0,967},{12,0,968},{12,0,969},{
+12,0,970},{12,0,971},{12,0,972},{12,0,973},{12,0,974},{12,0,975},{12,0,976},{12,
+0,977},{12,0,979},{12,0,981},{12,0,982},{12,0,983},{12,0,984},{12,0,985},{12,0,
+986},{12,0,987},{12,0,989},{12,0,990},{12,0,992},{12,0,993},{12,0,995},{12,0,998
+},{12,0,999},{12,0,1000},{12,0,1001},{12,0,1002},{12,0,1004},{12,0,1005},{12,0,
+1006},{12,0,1007},{12,0,1008},{12,0,1009},{12,0,1010},{12,0,1011},{12,0,1012},{
+12,0,1014},{12,0,1015},{12,0,1016},{12,0,1017},{12,0,1018},{12,0,1019},{12,0,
+1022},{12,0,1023},{14,0,475},{14,0,477},{14,0,478},{14,0,479},{14,0,480},{14,0,
+482},{14,0,483},{14,0,484},{14,0,485},{14,0,486},{14,0,487},{14,0,488},{14,0,489
+},{14,0,490},{14,0,491},{14,0,492},{14,0,493},{14,0,494},{14,0,495},{14,0,496},{
+14,0,497},{14,0,498},{14,0,499},{14,0,500},{14,0,501},{14,0,502},{14,0,503},{14,
+0,504},{14,0,506},{14,0,507},{14,0,508},{14,0,509},{14,0,510},{14,0,511},{16,0,
+113},{16,0,114},{16,0,115},{16,0,117},{16,0,118},{16,0,119},{16,0,121},{16,0,122
+},{16,0,123},{16,0,124},{16,0,125},{16,0,126},{16,0,127},{18,0,242},{18,0,243},{
+18,0,244},{18,0,245},{18,0,248},{18,0,249},{18,0,250},{18,0,251},{18,0,252},{18,
+0,253},{18,0,254},{18,0,255},{20,0,125},{20,0,126},{148,0,127},{7,11,1717},{7,11
+,1769},{138,11,546},{7,11,1127},{7,11,1572},{10,11,297},{10,11,422},{11,11,764},
+{11,11,810},{12,11,264},{13,11,102},{13,11,300},{13,11,484},{14,11,147},{14,11,
+229},{17,11,71},{18,11,118},{147,11,120},{6,0,1148},{134,0,1586},{132,0,775},{
+135,10,954},{133,11,864},{133,11,928},{138,11,189},{135,10,1958},{6,10,549},{8,
+10,34},{8,10,283},{9,10,165},{138,10,475},{5,10,652},{5,10,701},{135,10,449},{
+135,11,695},{4,10,655},{7,10,850},{17,10,75},{146,10,137},{140,11,682},{133,11,
+523},{8,0,970},{136,10,670},{136,11,555},{7,11,76},{8,11,44},{9,11,884},{10,11,
+580},{11,11,399},{11,11,894},{15,11,122},{18,11,144},{147,11,61},{6,10,159},{6,
+10,364},{7,10,516},{7,10,1439},{137,10,518},{4,0,71},{5,0,376},{7,0,119},{138,0,
+665},{141,10,151},{11,0,827},{14,0,34},{143,0,148},{133,11,518},{4,0,479},{135,
+11,1787},{135,11,1852},{135,10,993},{7,0,607},{136,0,99},{134,0,1960},{132,0,793
+},{4,0,41},{5,0,74},{7,0,1627},{11,0,871},{140,0,619},{7,0,94},{11,0,329},{11,0,
+965},{12,0,241},{14,0,354},{15,0,22},{148,0,63},{7,10,501},{9,10,111},{10,10,141
+},{11,10,332},{13,10,43},{13,10,429},{14,10,130},{14,10,415},{145,10,102},{9,0,
+209},{137,0,300},{134,0,1497},{138,11,255},{4,11,934},{5,11,138},{136,11,610},{
+133,0,98},{6,0,1316},{10,11,804},{138,11,832},{8,11,96},{9,11,36},{10,11,607},{
+11,11,423},{11,11,442},{12,11,309},{14,11,199},{15,11,90},{145,11,110},{132,0,
+463},{5,10,149},{136,10,233},{133,10,935},{4,11,652},{8,11,320},{9,11,13},{9,11,
+398},{9,11,727},{10,11,75},{10,11,184},{10,11,230},{10,11,564},{10,11,569},{11,
+11,973},{12,11,70},{12,11,189},{13,11,57},{13,11,257},{22,11,6},{150,11,16},{142
+,0,291},{12,10,582},{146,10,131},{136,10,801},{133,0,984},{145,11,116},{4,11,692
+},{133,11,321},{4,0,182},{6,0,205},{135,0,220},{4,0,42},{9,0,205},{9,0,786},{138
+,0,659},{6,0,801},{11,11,130},{140,11,609},{132,0,635},{5,11,345},{135,11,1016},
+{139,0,533},{132,0,371},{4,0,272},{135,0,836},{6,0,1282},{135,11,1100},{5,0,825}
+,{134,0,1640},{135,11,1325},{133,11,673},{4,11,287},{133,11,1018},{135,0,357},{6
+,0,467},{137,0,879},{7,0,317},{135,0,569},{6,0,924},{134,0,1588},{5,11,34},{5,10
+,406},{10,11,724},{12,11,444},{13,11,354},{18,11,32},{23,11,24},{23,11,31},{152,
+11,5},{6,0,1795},{6,0,1835},{6,0,1836},{6,0,1856},{8,0,844},{8,0,849},{8,0,854},
+{8,0,870},{8,0,887},{10,0,852},{138,0,942},{6,10,69},{135,10,117},{137,0,307},{4
+,0,944},{6,0,1799},{6,0,1825},{10,0,848},{10,0,875},{10,0,895},{10,0,899},{10,0,
+902},{140,0,773},{11,0,43},{13,0,72},{141,0,142},{135,10,1830},{134,11,382},{4,
+10,432},{135,10,824},{132,11,329},{7,0,1820},{139,11,124},{133,10,826},{133,0,
+525},{132,11,906},{7,11,1940},{136,11,366},{138,11,10},{4,11,123},{4,11,649},{5,
+11,605},{7,11,1509},{136,11,36},{6,0,110},{135,0,1681},{133,0,493},{133,11,767},
+{4,0,174},{135,0,911},{138,11,786},{8,0,417},{137,0,782},{133,10,1000},{7,0,733}
+,{137,0,583},{4,10,297},{6,10,529},{7,10,152},{7,10,713},{7,10,1845},{8,10,710},
+{8,10,717},{12,10,639},{140,10,685},{4,0,32},{5,0,215},{6,0,269},{7,0,1782},{7,0
+,1892},{10,0,16},{11,0,822},{11,0,954},{141,0,481},{4,11,273},{5,11,658},{133,11
+,995},{136,0,477},{134,11,72},{135,11,1345},{5,0,308},{7,0,1088},{4,10,520},{135
+,10,575},{133,11,589},{5,0,126},{8,0,297},{9,0,366},{140,0,374},{7,0,1551},{139,
+0,361},{5,11,117},{6,11,514},{6,11,541},{7,11,1164},{7,11,1436},{8,11,220},{8,11
+,648},{10,11,688},{139,11,560},{133,11,686},{4,0,946},{6,0,1807},{8,0,871},{10,0
+,854},{10,0,870},{10,0,888},{10,0,897},{10,0,920},{12,0,722},{12,0,761},{12,0,
+763},{12,0,764},{14,0,454},{14,0,465},{16,0,107},{18,0,167},{18,0,168},{146,0,
+172},{132,0,175},{135,0,1307},{132,0,685},{135,11,1834},{133,0,797},{6,0,745},{6
+,0,858},{134,0,963},{133,0,565},{5,10,397},{6,10,154},{7,11,196},{7,10,676},{8,
+10,443},{8,10,609},{9,10,24},{9,10,325},{10,10,35},{10,11,765},{11,11,347},{11,
+10,535},{11,11,552},{11,11,576},{11,10,672},{11,11,790},{11,10,1018},{12,11,263}
+,{12,10,637},{13,11,246},{13,11,270},{13,11,395},{14,11,74},{14,11,176},{14,11,
+190},{14,11,398},{14,11,412},{15,11,32},{15,11,63},{16,10,30},{16,11,88},{147,11
+,105},{13,11,84},{141,11,122},{4,0,252},{7,0,1068},{10,0,434},{11,0,228},{11,0,
+426},{13,0,231},{18,0,106},{148,0,87},{137,0,826},{4,11,589},{139,11,282},{5,11,
+381},{135,11,1792},{132,0,791},{5,0,231},{10,0,509},{133,10,981},{7,0,601},{9,0,
+277},{9,0,674},{10,0,178},{10,0,418},{10,0,571},{11,0,531},{12,0,113},{12,0,475}
+,{13,0,99},{142,0,428},{4,10,56},{7,11,616},{7,10,1791},{8,10,607},{8,10,651},{
+10,11,413},{11,10,465},{11,10,835},{12,10,337},{141,10,480},{7,0,1591},{144,0,43
+},{9,10,158},{138,10,411},{135,0,1683},{8,0,289},{11,0,45},{12,0,278},{140,0,537
+},{6,11,120},{7,11,1188},{7,11,1710},{8,11,286},{9,11,667},{11,11,592},{139,11,
+730},{136,10,617},{135,0,1120},{135,11,1146},{139,10,563},{4,11,352},{4,10,369},
+{135,11,687},{143,11,38},{4,0,399},{5,0,119},{5,0,494},{7,0,751},{9,0,556},{14,
+11,179},{15,11,151},{150,11,11},{4,11,192},{5,11,49},{6,11,200},{6,11,293},{6,11
+,1696},{135,11,488},{4,0,398},{133,0,660},{7,0,1030},{134,10,622},{135,11,595},{
+141,0,168},{132,11,147},{7,0,973},{10,10,624},{142,10,279},{132,10,363},{132,0,
+642},{133,11,934},{134,0,1615},{7,11,505},{135,11,523},{7,0,594},{7,0,851},{7,0,
+1858},{9,0,411},{9,0,574},{9,0,666},{9,0,737},{10,0,346},{10,0,712},{11,0,246},{
+11,0,432},{11,0,517},{11,0,647},{11,0,679},{11,0,727},{12,0,304},{12,0,305},{12,
+0,323},{12,0,483},{12,0,572},{12,0,593},{12,0,602},{13,0,95},{13,0,101},{13,0,
+171},{13,0,315},{13,0,378},{13,0,425},{13,0,475},{14,0,63},{14,0,380},{14,0,384}
+,{15,0,133},{18,0,112},{148,0,72},{135,0,1093},{132,0,679},{8,0,913},{10,0,903},
+{10,0,915},{12,0,648},{12,0,649},{14,0,455},{16,0,112},{138,11,438},{137,0,203},
+{134,10,292},{134,0,1492},{7,0,1374},{8,0,540},{5,10,177},{6,10,616},{7,10,827},
+{9,10,525},{138,10,656},{135,0,1486},{9,0,714},{138,10,31},{136,0,825},{134,0,
+1511},{132,11,637},{134,0,952},{4,10,161},{133,10,631},{5,0,143},{5,0,769},{6,0,
+1760},{7,0,682},{7,0,1992},{136,0,736},{132,0,700},{134,0,1540},{132,11,777},{9,
+11,867},{138,11,837},{7,0,1557},{135,10,1684},{133,0,860},{6,0,422},{7,0,0},{7,0
+,1544},{9,0,605},{11,0,990},{12,0,235},{12,0,453},{13,0,47},{13,0,266},{9,10,469
+},{9,10,709},{12,10,512},{14,10,65},{145,10,12},{11,0,807},{10,10,229},{11,10,73
+},{139,10,376},{6,11,170},{7,11,1080},{8,11,395},{8,11,487},{11,11,125},{141,11,
+147},{5,0,515},{137,0,131},{7,0,1605},{11,0,962},{146,0,139},{132,0,646},{4,0,
+396},{7,0,728},{9,0,117},{13,0,202},{148,0,51},{6,0,121},{6,0,124},{6,0,357},{7,
+0,1138},{7,0,1295},{8,0,162},{8,0,508},{11,0,655},{4,11,535},{6,10,558},{7,10,
+651},{8,11,618},{9,10,0},{10,10,34},{139,10,1008},{135,11,1245},{138,0,357},{150
+,11,23},{133,0,237},{135,0,1784},{7,10,1832},{138,10,374},{132,0,713},{132,11,46
+},{6,0,1536},{10,0,348},{5,11,811},{6,11,1679},{6,11,1714},{135,11,2032},{11,11,
+182},{142,11,195},{6,0,523},{7,0,738},{7,10,771},{7,10,1731},{9,10,405},{138,10,
+421},{7,11,1458},{9,11,407},{139,11,15},{6,11,34},{7,11,69},{7,11,640},{7,11,
+1089},{8,11,708},{8,11,721},{9,11,363},{9,11,643},{10,11,628},{148,11,98},{133,0
+,434},{135,0,1877},{7,0,571},{138,0,366},{5,10,881},{133,10,885},{9,0,513},{10,0
+,25},{10,0,39},{12,0,122},{140,0,187},{132,0,580},{5,10,142},{134,10,546},{132,
+11,462},{137,0,873},{5,10,466},{11,10,571},{12,10,198},{13,10,283},{14,10,186},{
+15,10,21},{143,10,103},{7,0,171},{4,10,185},{5,10,257},{5,10,839},{5,10,936},{9,
+10,399},{10,10,258},{10,10,395},{10,10,734},{11,10,1014},{12,10,23},{13,10,350},
+{14,10,150},{147,10,6},{134,0,625},{7,0,107},{7,0,838},{8,0,550},{138,0,401},{5,
+11,73},{6,11,23},{134,11,338},{4,0,943},{6,0,1850},{12,0,713},{142,0,434},{11,0,
+588},{11,0,864},{11,0,936},{11,0,968},{12,0,73},{12,0,343},{12,0,394},{13,0,275}
+,{14,0,257},{15,0,160},{7,10,404},{7,10,1377},{7,10,1430},{7,10,2017},{8,10,149}
+,{8,10,239},{8,10,512},{8,10,793},{8,10,818},{9,10,474},{9,10,595},{10,10,122},{
+10,10,565},{10,10,649},{10,10,783},{11,10,239},{11,10,295},{11,10,447},{11,10,
+528},{11,10,639},{11,10,800},{12,10,25},{12,10,157},{12,10,316},{12,10,390},{12,
+10,391},{12,10,395},{12,10,478},{12,10,503},{12,10,592},{12,10,680},{13,10,50},{
+13,10,53},{13,10,132},{13,10,198},{13,10,322},{13,10,415},{13,10,511},{14,10,71}
+,{14,10,395},{15,10,71},{15,10,136},{17,10,123},{18,10,93},{147,10,58},{133,0,
+768},{11,0,103},{142,0,0},{136,10,712},{132,0,799},{132,0,894},{7,11,725},{8,11,
+498},{139,11,268},{135,11,1798},{135,11,773},{141,11,360},{4,10,377},{152,10,13}
+,{135,0,1673},{132,11,583},{134,0,1052},{133,11,220},{140,11,69},{132,11,544},{4
+,10,180},{135,10,1906},{134,0,272},{4,0,441},{134,0,1421},{4,0,9},{5,0,128},{7,0
+,368},{11,0,480},{148,0,3},{5,11,176},{6,11,437},{6,11,564},{11,11,181},{141,11,
+183},{132,10,491},{7,0,1182},{141,11,67},{6,0,1346},{4,10,171},{138,10,234},{4,
+10,586},{7,10,1186},{138,10,631},{136,0,682},{134,0,1004},{15,0,24},{143,11,24},
+{134,0,968},{4,0,2},{6,0,742},{6,0,793},{7,0,545},{7,0,894},{9,10,931},{10,10,
+334},{148,10,71},{136,11,600},{133,10,765},{9,0,769},{140,0,185},{4,11,790},{5,
+11,273},{134,11,394},{7,0,474},{137,0,578},{4,11,135},{6,11,127},{7,11,1185},{7,
+11,1511},{8,11,613},{11,11,5},{12,11,133},{12,11,495},{12,11,586},{14,11,385},{
+15,11,118},{17,11,20},{146,11,98},{133,10,424},{5,0,530},{142,0,113},{6,11,230},
+{7,11,961},{7,11,1085},{136,11,462},{7,11,1954},{137,11,636},{136,10,714},{149,
+11,6},{135,10,685},{9,10,420},{10,10,269},{10,10,285},{10,10,576},{11,10,397},{
+13,10,175},{145,10,90},{132,10,429},{5,0,556},{5,11,162},{136,11,68},{132,11,654
+},{4,11,156},{7,11,998},{7,11,1045},{7,11,1860},{9,11,48},{9,11,692},{11,11,419}
+,{139,11,602},{6,0,1317},{8,0,16},{9,0,825},{12,0,568},{7,11,1276},{8,11,474},{
+137,11,652},{18,0,97},{7,10,18},{7,10,699},{7,10,1966},{8,10,752},{9,10,273},{9,
+10,412},{9,10,703},{10,10,71},{10,10,427},{138,10,508},{10,0,703},{7,11,1454},{
+138,11,703},{4,10,53},{5,10,186},{135,10,752},{134,0,892},{134,0,1571},{8,10,575
+},{10,10,289},{139,10,319},{6,0,186},{137,0,426},{134,0,1101},{132,10,675},{132,
+0,585},{6,0,1870},{137,0,937},{152,11,10},{9,11,197},{10,11,300},{12,11,473},{13
+,11,90},{141,11,405},{4,0,93},{5,0,252},{6,0,229},{7,0,291},{9,0,550},{139,0,644
+},{137,0,749},{9,0,162},{6,10,209},{8,10,468},{9,10,210},{11,10,36},{12,10,28},{
+12,10,630},{13,10,21},{13,10,349},{14,10,7},{145,10,13},{132,0,381},{132,11,606}
+,{4,10,342},{135,10,1179},{7,11,1587},{7,11,1707},{10,11,528},{139,11,504},{12,
+11,39},{13,11,265},{141,11,439},{4,10,928},{133,10,910},{7,10,1838},{7,11,1978},
+{136,11,676},{6,0,762},{6,0,796},{134,0,956},{4,10,318},{4,10,496},{7,10,856},{
+139,10,654},{137,11,242},{4,11,361},{133,11,315},{132,11,461},{132,11,472},{132,
+0,857},{5,0,21},{6,0,77},{6,0,157},{7,0,974},{7,0,1301},{7,0,1339},{7,0,1490},{7
+,0,1873},{9,0,628},{7,10,915},{8,10,247},{147,10,0},{4,10,202},{5,10,382},{6,10,
+454},{7,10,936},{7,10,1803},{8,10,758},{9,10,375},{9,10,895},{10,10,743},{10,10,
+792},{11,10,978},{11,10,1012},{142,10,109},{7,11,617},{10,11,498},{11,11,501},{
+12,11,16},{140,11,150},{7,10,1150},{7,10,1425},{7,10,1453},{10,11,747},{140,10,
+513},{133,11,155},{11,0,919},{141,0,409},{138,10,791},{10,0,633},{139,11,729},{7
+,11,163},{8,11,319},{9,11,402},{10,11,24},{10,11,681},{11,11,200},{11,11,567},{
+12,11,253},{12,11,410},{142,11,219},{5,11,475},{7,11,1780},{9,11,230},{11,11,297
+},{11,11,558},{14,11,322},{147,11,76},{7,0,332},{6,10,445},{137,10,909},{135,11,
+1956},{136,11,274},{134,10,578},{135,0,1489},{135,11,1848},{5,11,944},{134,11,
+1769},{132,11,144},{136,10,766},{4,0,832},{135,10,541},{8,0,398},{9,0,681},{139,
+0,632},{136,0,645},{9,0,791},{10,0,93},{16,0,13},{17,0,23},{18,0,135},{19,0,12},
+{20,0,1},{20,0,12},{148,0,14},{6,11,247},{137,11,555},{134,0,20},{132,0,800},{
+135,0,1841},{139,10,983},{137,10,768},{132,10,584},{141,11,51},{6,0,1993},{4,11,
+620},{138,11,280},{136,0,769},{11,0,290},{11,0,665},{7,11,1810},{11,11,866},{12,
+11,103},{13,11,495},{17,11,67},{147,11,74},{134,0,1426},{139,0,60},{4,10,326},{
+135,10,1770},{7,0,1874},{9,0,641},{132,10,226},{6,0,644},{5,10,426},{8,10,30},{9
+,10,2},{11,10,549},{147,10,122},{5,11,428},{138,11,442},{135,11,1871},{135,0,
+1757},{147,10,117},{135,0,937},{135,0,1652},{6,0,654},{134,0,1476},{133,11,99},{
+135,0,527},{132,10,345},{4,10,385},{4,11,397},{7,10,265},{135,10,587},{4,0,579},
+{5,0,226},{5,0,323},{135,0,960},{134,0,1486},{8,11,502},{144,11,9},{4,10,347},{5
+,10,423},{5,10,996},{135,10,1329},{7,11,727},{146,11,73},{4,11,485},{7,11,353},{
+7,10,1259},{7,11,1523},{9,10,125},{139,10,65},{6,0,325},{5,10,136},{6,11,366},{7
+,11,1384},{7,11,1601},{136,10,644},{138,11,160},{6,0,1345},{137,11,282},{18,0,91
+},{147,0,70},{136,0,404},{4,11,157},{133,11,471},{133,0,973},{6,0,135},{135,0,
+1176},{8,11,116},{11,11,551},{142,11,159},{4,0,549},{4,10,433},{133,10,719},{136
+,0,976},{5,11,160},{7,11,363},{7,11,589},{10,11,170},{141,11,55},{144,0,21},{144
+,0,51},{135,0,314},{135,10,1363},{4,11,108},{7,11,405},{10,11,491},{139,11,498},
+{146,0,4},{4,10,555},{8,10,536},{10,10,288},{139,10,1005},{135,11,1005},{6,0,281
+},{7,0,6},{8,0,282},{8,0,480},{8,0,499},{9,0,198},{10,0,143},{10,0,169},{10,0,
+211},{10,0,417},{10,0,574},{11,0,147},{11,0,395},{12,0,75},{12,0,407},{12,0,608}
+,{13,0,500},{142,0,251},{6,0,1093},{6,0,1405},{9,10,370},{138,10,90},{4,11,926},
+{133,11,983},{135,0,1776},{134,0,1528},{132,0,419},{132,11,538},{6,11,294},{7,11
+,1267},{136,11,624},{135,11,1772},{138,11,301},{4,10,257},{135,10,2031},{4,0,138
+},{7,0,1012},{7,0,1280},{9,0,76},{135,10,1768},{132,11,757},{5,0,29},{140,0,638}
+,{7,11,655},{135,11,1844},{7,0,1418},{6,11,257},{135,11,1522},{8,11,469},{138,11
+,47},{142,11,278},{6,10,83},{6,10,1733},{135,10,1389},{11,11,204},{11,11,243},{
+140,11,293},{135,11,1875},{6,0,1710},{135,0,2038},{137,11,299},{4,0,17},{5,0,23}
+,{7,0,995},{11,0,383},{11,0,437},{12,0,460},{140,0,532},{133,0,862},{137,10,696}
+,{6,0,592},{138,0,946},{138,11,599},{7,10,1718},{9,10,95},{9,10,274},{10,10,279}
+,{10,10,317},{10,10,420},{11,10,303},{11,10,808},{12,10,134},{12,10,367},{13,10,
+149},{13,10,347},{14,10,349},{14,10,406},{18,10,22},{18,10,89},{18,10,122},{147,
+10,47},{8,0,70},{12,0,171},{141,0,272},{133,10,26},{132,10,550},{137,0,812},{10,
+0,233},{139,0,76},{134,0,988},{134,0,442},{136,10,822},{7,0,896},{4,10,902},{5,
+10,809},{134,10,122},{5,11,150},{7,11,106},{8,11,603},{9,11,593},{9,11,634},{10,
+11,44},{10,11,173},{11,11,462},{11,11,515},{13,11,216},{13,11,288},{142,11,400},
+{136,0,483},{135,10,262},{6,0,1709},{133,10,620},{4,10,34},{5,10,574},{7,10,279}
+,{7,10,1624},{136,10,601},{137,10,170},{147,0,119},{12,11,108},{141,11,291},{11,
+0,69},{12,0,105},{12,0,117},{13,0,213},{14,0,13},{14,0,62},{14,0,177},{14,0,421}
+,{15,0,19},{146,0,141},{137,0,309},{11,11,278},{142,11,73},{7,0,608},{7,0,976},{
+9,0,146},{10,0,206},{10,0,596},{13,0,218},{142,0,153},{133,10,332},{6,10,261},{8
+,10,182},{139,10,943},{4,11,493},{144,11,55},{134,10,1721},{132,0,768},{4,10,933
+},{133,10,880},{7,11,555},{7,11,1316},{7,11,1412},{7,11,1839},{9,11,192},{9,11,
+589},{11,11,241},{11,11,676},{11,11,811},{11,11,891},{12,11,140},{12,11,346},{12
+,11,479},{13,11,30},{13,11,49},{13,11,381},{14,11,188},{15,11,150},{16,11,76},{
+18,11,30},{148,11,52},{4,0,518},{135,0,1136},{6,11,568},{7,11,112},{7,11,1804},{
+8,11,362},{8,11,410},{8,11,830},{9,11,514},{11,11,649},{142,11,157},{135,11,673}
+,{8,0,689},{137,0,863},{4,0,18},{7,0,145},{7,0,444},{7,0,1278},{8,0,49},{8,0,400
+},{9,0,71},{9,0,250},{10,0,459},{12,0,160},{16,0,24},{132,11,625},{140,0,1020},{
+4,0,997},{6,0,1946},{6,0,1984},{134,0,1998},{6,11,16},{6,11,158},{7,11,43},{7,11
+,129},{7,11,181},{8,11,276},{8,11,377},{10,11,523},{11,11,816},{12,11,455},{13,
+11,303},{142,11,135},{133,10,812},{134,0,658},{4,11,1},{7,11,1143},{7,11,1463},{
+8,11,61},{9,11,207},{9,11,390},{9,11,467},{139,11,836},{150,11,26},{140,0,106},{
+6,0,1827},{10,0,931},{18,0,166},{20,0,114},{4,10,137},{7,10,1178},{7,11,1319},{
+135,10,1520},{133,0,1010},{4,11,723},{5,11,895},{7,11,1031},{8,11,199},{8,11,340
+},{9,11,153},{9,11,215},{10,11,21},{10,11,59},{10,11,80},{10,11,224},{11,11,229}
+,{11,11,652},{12,11,192},{13,11,146},{142,11,91},{132,11,295},{6,11,619},{7,11,
+898},{7,11,1092},{8,11,485},{18,11,28},{147,11,116},{137,11,51},{6,10,1661},{7,
+10,1975},{7,10,2009},{135,10,2011},{5,11,309},{140,11,211},{5,0,87},{7,0,313},{7
+,0,1103},{10,0,208},{10,0,582},{11,0,389},{11,0,813},{12,0,385},{13,0,286},{14,0
+,124},{146,0,108},{5,11,125},{8,11,77},{138,11,15},{132,0,267},{133,0,703},{137,
+11,155},{133,11,439},{11,11,164},{140,11,76},{9,0,496},{5,10,89},{7,10,1915},{9,
+10,185},{9,10,235},{10,10,64},{10,10,270},{10,10,403},{10,10,469},{10,10,529},{
+10,10,590},{11,10,140},{11,10,860},{13,10,1},{13,10,422},{14,10,341},{14,10,364}
+,{17,10,93},{18,10,113},{19,10,97},{147,10,113},{133,10,695},{135,0,1121},{5,10,
+6},{6,10,183},{7,10,680},{7,10,978},{7,10,1013},{7,10,1055},{12,10,230},{13,10,
+172},{146,10,29},{4,11,8},{7,11,1152},{7,11,1153},{7,11,1715},{9,11,374},{10,11,
+478},{139,11,648},{135,11,1099},{6,10,29},{139,10,63},{4,0,561},{10,0,249},{139,
+0,209},{132,0,760},{7,11,799},{138,11,511},{136,11,87},{9,0,154},{140,0,485},{
+136,0,255},{132,0,323},{140,0,419},{132,10,311},{134,10,1740},{4,0,368},{135,0,
+641},{7,10,170},{8,10,90},{8,10,177},{8,10,415},{11,10,714},{142,10,281},{4,11,
+69},{5,11,122},{9,11,656},{138,11,464},{5,11,849},{134,11,1633},{8,0,522},{142,0
+,328},{11,10,91},{13,10,129},{15,10,101},{145,10,125},{7,0,562},{8,0,551},{4,10,
+494},{6,10,74},{7,10,44},{11,11,499},{12,10,17},{15,10,5},{148,10,11},{4,10,276}
+,{133,10,296},{9,0,92},{147,0,91},{4,10,7},{5,10,90},{5,10,158},{6,10,542},{7,10
+,221},{7,10,1574},{9,10,490},{10,10,540},{11,10,443},{139,10,757},{6,0,525},{6,0
+,1976},{8,0,806},{9,0,876},{140,0,284},{5,11,859},{7,10,588},{7,11,1160},{8,11,
+107},{9,10,175},{9,11,291},{9,11,439},{10,10,530},{10,11,663},{11,11,609},{140,
+11,197},{7,11,168},{13,11,196},{141,11,237},{139,0,958},{133,0,594},{135,10,580}
+,{7,10,88},{136,10,627},{6,0,479},{6,0,562},{7,0,1060},{13,0,6},{5,10,872},{6,10
+,57},{7,10,471},{9,10,447},{137,10,454},{136,11,413},{145,11,19},{4,11,117},{6,
+11,372},{7,11,1905},{142,11,323},{4,11,722},{139,11,471},{17,0,61},{5,10,31},{
+134,10,614},{8,10,330},{140,10,477},{7,10,1200},{138,10,460},{6,10,424},{135,10,
+1866},{6,0,1641},{136,0,820},{6,0,1556},{134,0,1618},{9,11,5},{12,11,216},{12,11
+,294},{12,11,298},{12,11,400},{12,11,518},{13,11,229},{143,11,139},{15,11,155},{
+144,11,79},{4,0,302},{135,0,1766},{5,10,13},{134,10,142},{6,0,148},{7,0,1313},{7
+,10,116},{8,10,322},{8,10,755},{9,10,548},{10,10,714},{11,10,884},{141,10,324},{
+137,0,676},{9,11,88},{139,11,270},{5,11,12},{7,11,375},{137,11,438},{134,0,1674}
+,{7,10,1472},{135,10,1554},{11,0,178},{7,10,1071},{7,10,1541},{7,10,1767},{7,10,
+1806},{11,10,162},{11,10,242},{12,10,605},{15,10,26},{144,10,44},{6,0,389},{7,0,
+149},{9,0,142},{138,0,94},{140,11,71},{145,10,115},{6,0,8},{7,0,1881},{8,0,91},{
+11,11,966},{12,11,287},{13,11,342},{13,11,402},{15,11,110},{143,11,163},{4,11,
+258},{136,11,639},{6,11,22},{7,11,903},{138,11,577},{133,11,681},{135,10,1111},{
+135,11,1286},{9,0,112},{8,10,1},{138,10,326},{5,10,488},{6,10,527},{7,10,489},{7
+,10,1636},{8,10,121},{8,10,144},{8,10,359},{9,10,193},{9,10,241},{9,10,336},{9,
+10,882},{11,10,266},{11,10,372},{11,10,944},{12,10,401},{140,10,641},{4,11,664},
+{133,11,804},{6,0,747},{134,0,1015},{135,0,1746},{9,10,31},{10,10,244},{10,10,
+699},{12,10,149},{141,10,497},{133,10,377},{135,0,24},{6,0,1352},{5,11,32},{145,
+10,101},{7,0,1530},{10,0,158},{13,0,13},{13,0,137},{13,0,258},{14,0,111},{14,0,
+225},{14,0,253},{14,0,304},{14,0,339},{14,0,417},{146,0,33},{4,0,503},{135,0,
+1661},{5,0,130},{6,0,845},{7,0,1314},{9,0,610},{10,0,718},{11,0,601},{11,0,819},
+{11,0,946},{140,0,536},{10,0,149},{11,0,280},{142,0,336},{134,0,1401},{135,0,
+1946},{8,0,663},{144,0,8},{134,0,1607},{135,10,2023},{4,11,289},{7,11,629},{7,11
+,1698},{7,11,1711},{140,11,215},{6,11,450},{136,11,109},{10,0,882},{10,0,883},{
+10,0,914},{138,0,928},{133,10,843},{136,11,705},{132,10,554},{133,10,536},{5,0,
+417},{9,10,79},{11,10,625},{145,10,7},{7,11,1238},{142,11,37},{4,0,392},{135,0,
+1597},{5,0,433},{9,0,633},{11,0,629},{132,10,424},{7,10,336},{136,10,785},{134,
+11,355},{6,0,234},{7,0,769},{9,0,18},{138,0,358},{4,10,896},{134,10,1777},{138,
+11,323},{7,0,140},{7,0,1950},{8,0,680},{11,0,817},{147,0,88},{7,0,1222},{138,0,
+386},{139,11,908},{11,0,249},{12,0,313},{16,0,66},{145,0,26},{134,0,5},{7,10,750
+},{9,10,223},{11,10,27},{11,10,466},{12,10,624},{14,10,265},{146,10,61},{134,11,
+26},{134,0,1216},{5,0,963},{134,0,1773},{4,11,414},{5,11,467},{9,11,654},{10,11,
+451},{12,11,59},{141,11,375},{135,11,17},{4,10,603},{133,10,661},{4,10,11},{6,10
+,128},{7,10,231},{7,10,1533},{138,10,725},{135,11,955},{7,0,180},{8,0,509},{136,
+0,792},{132,10,476},{132,0,1002},{133,11,538},{135,10,1807},{132,0,931},{7,0,943
+},{11,0,614},{140,0,747},{135,0,1837},{9,10,20},{10,10,324},{10,10,807},{139,10,
+488},{134,0,641},{6,11,280},{10,11,502},{11,11,344},{140,11,38},{5,11,45},{7,11,
+1161},{11,11,448},{11,11,880},{13,11,139},{13,11,407},{15,11,16},{17,11,95},{18,
+11,66},{18,11,88},{18,11,123},{149,11,7},{9,0,280},{138,0,134},{22,0,22},{23,0,5
+},{151,0,29},{136,11,777},{4,0,90},{5,0,545},{7,0,754},{9,0,186},{10,0,72},{10,0
+,782},{11,0,577},{11,0,610},{11,0,960},{12,0,354},{12,0,362},{12,0,595},{4,11,
+410},{135,11,521},{135,11,1778},{5,10,112},{6,10,103},{134,10,150},{138,10,356},
+{132,0,742},{7,0,151},{9,0,329},{139,0,254},{8,0,853},{8,0,881},{8,0,911},{8,0,
+912},{10,0,872},{12,0,741},{12,0,742},{152,0,18},{4,11,573},{136,11,655},{6,0,
+921},{134,0,934},{9,0,187},{10,0,36},{11,0,1016},{17,0,44},{146,0,64},{7,0,833},
+{136,0,517},{4,0,506},{5,0,295},{135,0,1680},{4,10,708},{8,10,15},{9,10,50},{9,
+10,386},{11,10,18},{11,10,529},{140,10,228},{7,0,251},{7,0,1701},{8,0,436},{4,10
+,563},{7,10,592},{7,10,637},{7,10,770},{8,10,463},{9,10,60},{9,10,335},{9,10,904
+},{10,10,73},{11,10,434},{12,10,585},{13,10,331},{18,10,110},{148,10,60},{132,10
+,502},{136,0,584},{6,10,347},{138,10,161},{7,0,987},{9,0,688},{10,0,522},{11,0,
+788},{12,0,137},{12,0,566},{14,0,9},{14,0,24},{14,0,64},{7,11,899},{142,11,325},
+{4,0,214},{5,0,500},{5,10,102},{6,10,284},{7,10,1079},{7,10,1423},{7,10,1702},{8
+,10,470},{9,10,554},{9,10,723},{139,10,333},{7,10,246},{135,10,840},{6,10,10},{8
+,10,571},{9,10,739},{143,10,91},{133,10,626},{146,0,195},{134,0,1775},{7,0,389},
+{7,0,700},{7,0,940},{8,0,514},{9,0,116},{9,0,535},{10,0,118},{11,0,107},{11,0,
+148},{11,0,922},{12,0,254},{12,0,421},{142,0,238},{5,10,18},{6,10,526},{13,10,24
+},{13,10,110},{19,10,5},{147,10,44},{132,0,743},{11,0,292},{4,10,309},{5,10,462}
+,{7,10,970},{135,10,1097},{22,10,30},{150,10,33},{139,11,338},{135,11,1598},{7,0
+,1283},{9,0,227},{11,0,325},{11,0,408},{14,0,180},{146,0,47},{4,0,953},{6,0,1805
+},{6,0,1814},{6,0,1862},{140,0,774},{6,11,611},{135,11,1733},{135,11,1464},{5,0,
+81},{7,0,146},{7,0,1342},{8,0,53},{8,0,561},{8,0,694},{8,0,754},{9,0,115},{9,0,
+179},{9,0,894},{10,0,462},{10,0,813},{11,0,230},{11,0,657},{11,0,699},{11,0,748}
+,{12,0,119},{12,0,200},{12,0,283},{142,0,273},{5,0,408},{6,0,789},{6,0,877},{6,0
+,1253},{6,0,1413},{137,0,747},{134,10,1704},{135,11,663},{6,0,1910},{6,0,1915},{
+6,0,1923},{9,0,913},{9,0,928},{9,0,950},{9,0,954},{9,0,978},{9,0,993},{12,0,812}
+,{12,0,819},{12,0,831},{12,0,833},{12,0,838},{12,0,909},{12,0,928},{12,0,931},{
+12,0,950},{15,0,186},{15,0,187},{15,0,195},{15,0,196},{15,0,209},{15,0,215},{15,
+0,236},{15,0,241},{15,0,249},{15,0,253},{18,0,180},{18,0,221},{18,0,224},{18,0,
+227},{18,0,229},{149,0,60},{7,0,1826},{135,0,1938},{11,0,490},{18,0,143},{5,10,
+86},{7,10,743},{9,10,85},{10,10,281},{10,10,432},{12,10,251},{13,10,118},{142,10
+,378},{5,10,524},{133,10,744},{141,11,442},{10,10,107},{140,10,436},{135,11,503}
+,{134,0,1162},{132,10,927},{7,0,30},{8,0,86},{8,0,315},{8,0,700},{9,0,576},{9,0,
+858},{10,0,414},{11,0,310},{11,0,888},{11,0,904},{12,0,361},{13,0,248},{13,0,371
+},{14,0,142},{12,10,670},{146,10,94},{134,0,721},{4,11,113},{5,11,163},{5,11,735
+},{7,11,1009},{7,10,1149},{9,11,9},{9,10,156},{9,11,771},{12,11,90},{13,11,138},
+{13,11,410},{143,11,128},{138,0,839},{133,10,778},{137,0,617},{133,10,502},{8,10
+,196},{10,10,283},{139,10,406},{6,0,428},{7,0,524},{8,0,169},{8,0,234},{9,0,480}
+,{138,0,646},{133,10,855},{134,0,1648},{7,0,1205},{138,0,637},{7,0,1596},{4,11,
+935},{133,11,823},{5,11,269},{7,11,434},{7,11,891},{8,11,339},{9,11,702},{11,11,
+594},{11,11,718},{145,11,100},{7,11,878},{9,11,485},{141,11,264},{4,0,266},{8,0,
+4},{9,0,39},{10,0,166},{11,0,918},{12,0,635},{20,0,10},{22,0,27},{22,0,43},{22,0
+,52},{134,11,1713},{7,10,1400},{9,10,446},{138,10,45},{135,11,900},{132,0,862},{
+134,0,1554},{135,11,1033},{19,0,16},{147,11,16},{135,11,1208},{7,0,157},{136,0,
+279},{6,0,604},{136,0,391},{13,10,455},{15,10,99},{15,10,129},{144,10,68},{135,
+10,172},{7,0,945},{11,0,713},{139,0,744},{4,0,973},{10,0,877},{10,0,937},{10,0,
+938},{140,0,711},{139,0,1022},{132,10,568},{142,11,143},{4,0,567},{9,0,859},{132
+,10,732},{7,0,1846},{136,0,628},{136,10,733},{133,0,762},{4,10,428},{135,10,1789
+},{10,0,784},{13,0,191},{7,10,2015},{140,10,665},{133,0,298},{7,0,633},{7,0,905}
+,{7,0,909},{7,0,1538},{9,0,767},{140,0,636},{138,10,806},{132,0,795},{139,0,301}
+,{135,0,1970},{5,11,625},{135,11,1617},{135,11,275},{7,11,37},{8,11,425},{8,11,
+693},{9,11,720},{10,11,380},{10,11,638},{11,11,273},{11,11,307},{11,11,473},{12,
+11,61},{143,11,43},{135,11,198},{134,0,1236},{7,0,369},{12,0,644},{12,0,645},{
+144,0,90},{19,0,15},{149,0,27},{6,0,71},{7,0,845},{8,0,160},{9,0,318},{6,10,1623
+},{134,10,1681},{134,0,1447},{134,0,1255},{138,0,735},{8,0,76},{132,11,168},{6,
+10,1748},{8,10,715},{9,10,802},{10,10,46},{10,10,819},{13,10,308},{14,10,351},{
+14,10,363},{146,10,67},{135,11,91},{6,0,474},{4,10,63},{133,10,347},{133,10,749}
+,{138,0,841},{133,10,366},{6,0,836},{132,11,225},{135,0,1622},{135,10,89},{140,0
+,735},{134,0,1601},{138,11,145},{6,0,1390},{137,0,804},{142,0,394},{6,11,15},{7,
+11,70},{10,11,240},{147,11,93},{6,0,96},{135,0,1426},{4,0,651},{133,0,289},{7,11
+,956},{7,10,977},{7,11,1157},{7,11,1506},{7,11,1606},{7,11,1615},{7,11,1619},{7,
+11,1736},{7,11,1775},{8,11,590},{9,11,324},{9,11,736},{9,11,774},{9,11,776},{9,
+11,784},{10,11,567},{10,11,708},{11,11,518},{11,11,613},{11,11,695},{11,11,716},
+{11,11,739},{11,11,770},{11,11,771},{11,11,848},{11,11,857},{11,11,931},{11,11,
+947},{12,11,326},{12,11,387},{12,11,484},{12,11,528},{12,11,552},{12,11,613},{13
+,11,189},{13,11,256},{13,11,340},{13,11,432},{13,11,436},{13,11,440},{13,11,454}
+,{14,11,174},{14,11,220},{14,11,284},{14,11,390},{145,11,121},{7,0,688},{8,0,35}
+,{9,0,511},{10,0,767},{147,0,118},{134,0,667},{4,0,513},{5,10,824},{133,10,941},
+{7,10,440},{8,10,230},{139,10,106},{134,0,2034},{135,11,1399},{143,11,66},{135,
+11,1529},{4,11,145},{6,11,176},{7,11,395},{9,11,562},{144,11,28},{132,11,501},{
+132,0,704},{134,0,1524},{7,0,1078},{134,11,464},{6,11,509},{10,11,82},{20,11,91}
+,{151,11,13},{4,0,720},{133,0,306},{133,0,431},{7,0,1196},{4,10,914},{5,10,800},
+{133,10,852},{135,11,1189},{10,0,54},{141,10,115},{7,10,564},{142,10,168},{5,0,
+464},{6,0,236},{7,0,696},{7,0,914},{7,0,1108},{7,0,1448},{9,0,15},{9,0,564},{10,
+0,14},{12,0,565},{13,0,449},{14,0,53},{15,0,13},{16,0,64},{17,0,41},{4,10,918},{
+133,10,876},{6,0,1418},{134,10,1764},{4,10,92},{133,10,274},{134,0,907},{4,11,
+114},{8,10,501},{9,11,492},{13,11,462},{142,11,215},{4,11,77},{5,11,361},{6,11,
+139},{6,11,401},{6,11,404},{7,11,413},{7,11,715},{7,11,1716},{11,11,279},{12,11,
+179},{12,11,258},{13,11,244},{142,11,358},{6,0,1767},{12,0,194},{145,0,107},{134
+,11,1717},{5,10,743},{142,11,329},{4,10,49},{7,10,280},{135,10,1633},{5,0,840},{
+7,11,1061},{8,11,82},{11,11,250},{12,11,420},{141,11,184},{135,11,724},{134,0,
+900},{136,10,47},{134,0,1436},{144,11,0},{6,0,675},{7,0,1008},{7,0,1560},{9,0,
+642},{11,0,236},{14,0,193},{5,10,272},{5,10,908},{5,10,942},{8,10,197},{9,10,47}
+,{11,10,538},{139,10,742},{4,0,68},{5,0,628},{5,0,634},{6,0,386},{7,0,794},{8,0,
+273},{9,0,563},{10,0,105},{10,0,171},{11,0,94},{139,0,354},{135,10,1911},{137,10
+,891},{4,0,95},{6,0,1297},{6,0,1604},{7,0,416},{139,0,830},{6,11,513},{135,11,
+1052},{7,0,731},{13,0,20},{143,0,11},{137,11,899},{10,0,850},{140,0,697},{4,0,
+662},{7,11,1417},{12,11,382},{17,11,48},{152,11,12},{133,0,736},{132,0,861},{4,
+10,407},{132,10,560},{141,10,490},{6,11,545},{7,11,565},{7,11,1669},{10,11,114},
+{11,11,642},{140,11,618},{6,0,871},{134,0,1000},{5,0,864},{10,0,648},{11,0,671},
+{15,0,46},{133,11,5},{133,0,928},{11,0,90},{13,0,7},{4,10,475},{11,10,35},{13,10
+,71},{13,10,177},{142,10,422},{136,0,332},{135,11,192},{134,0,1055},{136,11,763}
+,{11,0,986},{140,0,682},{7,0,76},{8,0,44},{9,0,884},{10,0,580},{11,0,399},{11,0,
+894},{143,0,122},{135,11,1237},{135,10,636},{11,0,300},{6,10,222},{7,10,1620},{8
+,10,409},{137,10,693},{4,11,87},{5,11,250},{10,11,601},{13,11,298},{13,11,353},{
+141,11,376},{5,0,518},{10,0,340},{11,0,175},{149,0,16},{140,0,771},{6,0,1108},{
+137,0,831},{132,0,836},{135,0,1852},{4,0,957},{6,0,1804},{8,0,842},{8,0,843},{8,
+0,851},{8,0,855},{140,0,767},{135,11,814},{4,11,57},{7,11,1195},{7,11,1438},{7,
+11,1548},{7,11,1835},{7,11,1904},{9,11,757},{10,11,604},{139,11,519},{133,10,882
+},{138,0,246},{4,0,934},{5,0,202},{8,0,610},{7,11,1897},{12,11,290},{13,11,80},{
+13,11,437},{145,11,74},{8,0,96},{9,0,36},{10,0,607},{10,0,804},{10,0,832},{11,0,
+423},{11,0,442},{12,0,309},{14,0,199},{15,0,90},{145,0,110},{132,10,426},{7,0,
+654},{8,0,240},{6,10,58},{7,10,745},{7,10,1969},{8,10,675},{9,10,479},{9,10,731}
+,{10,10,330},{10,10,593},{10,10,817},{11,10,32},{11,10,133},{11,10,221},{145,10,
+68},{9,0,13},{9,0,398},{9,0,727},{10,0,75},{10,0,184},{10,0,230},{10,0,564},{10,
+0,569},{11,0,973},{12,0,70},{12,0,189},{13,0,57},{141,0,257},{4,11,209},{135,11,
+902},{7,0,391},{137,10,538},{134,0,403},{6,11,303},{7,11,335},{7,11,1437},{7,11,
+1668},{8,11,553},{8,11,652},{8,11,656},{9,11,558},{11,11,743},{149,11,18},{132,
+11,559},{11,0,75},{142,0,267},{6,0,815},{141,11,2},{141,0,366},{137,0,631},{133,
+11,1017},{5,0,345},{135,0,1016},{133,11,709},{134,11,1745},{133,10,566},{7,0,952
+},{6,10,48},{9,10,139},{10,10,399},{11,10,469},{12,10,634},{141,10,223},{133,0,
+673},{9,0,850},{7,11,8},{136,11,206},{6,0,662},{149,0,35},{4,0,287},{133,0,1018}
+,{6,10,114},{7,10,1224},{7,10,1556},{136,10,3},{8,10,576},{137,10,267},{4,0,884}
+,{5,0,34},{10,0,724},{12,0,444},{13,0,354},{18,0,32},{23,0,24},{23,0,31},{152,0,
+5},{133,10,933},{132,11,776},{138,0,151},{136,0,427},{134,0,382},{132,0,329},{9,
+0,846},{10,0,827},{138,11,33},{9,0,279},{10,0,407},{14,0,84},{22,0,18},{135,11,
+1297},{136,11,406},{132,0,906},{136,0,366},{134,0,843},{134,0,1443},{135,0,1372}
+,{138,0,992},{4,0,123},{5,0,605},{7,0,1509},{136,0,36},{132,0,649},{8,11,175},{
+10,11,168},{138,11,573},{133,0,767},{134,0,1018},{135,11,1305},{12,10,30},{13,10
+,148},{14,10,87},{14,10,182},{16,10,42},{148,10,70},{134,11,607},{4,0,273},{5,0,
+658},{133,0,995},{6,0,72},{139,11,174},{10,0,483},{12,0,368},{7,10,56},{7,10,
+1989},{8,10,337},{8,10,738},{9,10,600},{13,10,447},{142,10,92},{5,11,784},{138,
+10,666},{135,0,1345},{139,11,882},{134,0,1293},{133,0,589},{134,0,1988},{5,0,117
+},{6,0,514},{6,0,541},{7,0,1164},{7,0,1436},{8,0,220},{8,0,648},{10,0,688},{139,
+0,560},{136,0,379},{5,0,686},{7,10,866},{135,10,1163},{132,10,328},{9,11,14},{9,
+11,441},{10,11,306},{139,11,9},{4,10,101},{135,10,1171},{5,10,833},{136,10,744},
+{5,11,161},{7,11,839},{135,11,887},{7,0,196},{10,0,765},{11,0,347},{11,0,552},{
+11,0,790},{12,0,263},{13,0,246},{13,0,270},{13,0,395},{14,0,176},{14,0,190},{14,
+0,398},{14,0,412},{15,0,32},{15,0,63},{16,0,88},{147,0,105},{6,10,9},{6,10,397},
+{7,10,53},{7,10,1742},{10,10,632},{11,10,828},{140,10,146},{5,0,381},{135,0,1792
+},{134,0,1452},{135,11,429},{8,0,367},{10,0,760},{14,0,79},{20,0,17},{152,0,0},{
+7,0,616},{138,0,413},{11,10,417},{12,10,223},{140,10,265},{7,11,1611},{13,11,14}
+,{15,11,44},{19,11,13},{148,11,76},{135,0,1229},{6,0,120},{7,0,1188},{7,0,1710},
+{8,0,286},{9,0,667},{11,0,592},{139,0,730},{135,11,1814},{135,0,1146},{4,10,186}
+,{5,10,157},{8,10,168},{138,10,6},{4,0,352},{135,0,687},{4,0,192},{5,0,49},{6,0,
+200},{6,0,293},{6,0,1696},{135,0,1151},{133,10,875},{5,10,773},{5,10,991},{6,10,
+1635},{134,10,1788},{7,10,111},{136,10,581},{6,0,935},{134,0,1151},{134,0,1050},
+{132,0,650},{132,0,147},{11,0,194},{12,0,62},{12,0,88},{11,11,194},{12,11,62},{
+140,11,88},{6,0,339},{135,0,923},{134,10,1747},{7,11,643},{136,11,236},{133,0,
+934},{7,10,1364},{7,10,1907},{141,10,158},{132,10,659},{4,10,404},{135,10,675},{
+7,11,581},{9,11,644},{137,11,699},{13,0,211},{14,0,133},{14,0,204},{15,0,64},{15
+,0,69},{15,0,114},{16,0,10},{19,0,23},{19,0,35},{19,0,39},{19,0,51},{19,0,71},{
+19,0,75},{152,0,15},{133,10,391},{5,11,54},{135,11,1513},{7,0,222},{8,0,341},{5,
+10,540},{134,10,1697},{134,10,78},{132,11,744},{136,0,293},{137,11,701},{7,11,
+930},{10,11,402},{10,11,476},{13,11,452},{18,11,55},{147,11,104},{132,0,637},{
+133,10,460},{8,11,50},{137,11,624},{132,11,572},{134,0,1159},{4,10,199},{139,10,
+34},{134,0,847},{134,10,388},{6,11,43},{7,11,38},{8,11,248},{9,11,504},{138,11,
+513},{9,0,683},{4,10,511},{6,10,608},{9,10,333},{10,10,602},{11,10,441},{11,10,
+723},{11,10,976},{140,10,357},{9,0,867},{138,0,837},{6,0,944},{135,11,326},{135,
+0,1809},{5,10,938},{7,11,783},{136,10,707},{133,11,766},{133,11,363},{6,0,170},{
+7,0,1080},{8,0,395},{8,0,487},{141,0,147},{6,11,258},{140,11,409},{4,0,535},{8,0
+,618},{5,11,249},{148,11,82},{6,0,1379},{149,11,15},{135,0,1625},{150,0,23},{5,
+11,393},{6,11,378},{7,11,1981},{9,11,32},{9,11,591},{10,11,685},{10,11,741},{142
+,11,382},{133,11,788},{7,11,1968},{10,11,19},{139,11,911},{7,11,1401},{135,11,
+1476},{4,11,61},{5,11,58},{5,11,171},{5,11,635},{5,11,683},{5,11,700},{6,11,291}
+,{6,11,566},{7,11,1650},{11,11,523},{12,11,273},{12,11,303},{15,11,39},{143,11,
+111},{6,10,469},{7,10,1709},{138,10,515},{4,0,778},{134,11,589},{132,0,46},{5,0,
+811},{6,0,1679},{6,0,1714},{135,0,2032},{7,0,1458},{9,0,407},{11,0,15},{12,0,651
+},{149,0,37},{7,0,938},{132,10,500},{6,0,34},{7,0,69},{7,0,1089},{7,0,1281},{8,0
+,708},{8,0,721},{9,0,363},{148,0,98},{10,11,231},{147,11,124},{7,11,726},{152,11
+,9},{5,10,68},{134,10,383},{136,11,583},{4,11,917},{133,11,1005},{11,10,216},{
+139,10,340},{135,11,1675},{8,0,441},{10,0,314},{143,0,3},{132,11,919},{4,10,337}
+,{6,10,353},{7,10,1934},{8,10,488},{137,10,429},{7,0,889},{7,10,1795},{8,10,259}
+,{9,10,135},{9,10,177},{9,10,860},{10,10,825},{11,10,115},{11,10,370},{11,10,405
+},{11,10,604},{12,10,10},{12,10,667},{12,10,669},{13,10,76},{14,10,310},{15,10,
+76},{15,10,147},{148,10,23},{4,10,15},{4,11,255},{5,10,22},{5,11,302},{6,11,132}
+,{6,10,244},{7,10,40},{7,11,128},{7,10,200},{7,11,283},{7,10,906},{7,10,1199},{7
+,11,1299},{9,10,616},{10,11,52},{10,11,514},{10,10,716},{11,10,635},{11,10,801},
+{11,11,925},{12,10,458},{13,11,92},{142,11,309},{132,0,462},{137,11,173},{135,10
+,1735},{8,0,525},{5,10,598},{7,10,791},{8,10,108},{137,10,123},{5,0,73},{6,0,23}
+,{134,0,338},{132,0,676},{132,10,683},{7,0,725},{8,0,498},{139,0,268},{12,0,21},
+{151,0,7},{135,0,773},{4,10,155},{135,10,1689},{4,0,164},{5,0,730},{5,10,151},{5
+,10,741},{6,11,210},{7,10,498},{7,10,870},{7,10,1542},{12,10,213},{14,10,36},{14
+,10,391},{17,10,111},{18,10,6},{18,10,46},{18,10,151},{19,10,36},{20,10,32},{20,
+10,56},{20,10,69},{20,10,102},{21,10,4},{22,10,8},{22,10,10},{22,10,14},{150,10,
+31},{4,10,624},{135,10,1752},{4,0,583},{9,0,936},{15,0,214},{18,0,199},{24,0,26}
+,{134,11,588},{7,0,1462},{11,0,659},{4,11,284},{134,11,223},{133,0,220},{139,0,
+803},{132,0,544},{4,10,492},{133,10,451},{16,0,98},{148,0,119},{4,11,218},{7,11,
+526},{143,11,137},{135,10,835},{4,11,270},{5,11,192},{6,11,332},{7,11,1322},{13,
+11,9},{13,10,70},{14,11,104},{142,11,311},{132,10,539},{140,11,661},{5,0,176},{6
+,0,437},{6,0,564},{11,0,181},{141,0,183},{135,0,1192},{6,10,113},{135,10,436},{
+136,10,718},{135,10,520},{135,0,1878},{140,11,196},{7,11,379},{8,11,481},{137,11
+,377},{5,11,1003},{6,11,149},{137,11,746},{8,11,262},{9,11,627},{10,11,18},{11,
+11,214},{11,11,404},{11,11,457},{11,11,780},{11,11,849},{11,11,913},{13,11,330},
+{13,11,401},{142,11,200},{149,0,26},{136,11,304},{132,11,142},{135,0,944},{4,0,
+790},{5,0,273},{134,0,394},{134,0,855},{4,0,135},{6,0,127},{7,0,1185},{7,0,1511}
+,{8,0,613},{11,0,5},{12,0,336},{12,0,495},{12,0,586},{12,0,660},{12,0,668},{14,0
+,385},{15,0,118},{17,0,20},{146,0,98},{6,0,230},{9,0,752},{18,0,109},{12,10,610}
+,{13,10,431},{144,10,59},{7,0,1954},{135,11,925},{4,11,471},{5,11,51},{6,11,602}
+,{8,11,484},{10,11,195},{140,11,159},{132,10,307},{136,11,688},{132,11,697},{7,
+11,812},{7,11,1261},{7,11,1360},{9,11,632},{140,11,352},{5,0,162},{8,0,68},{133,
+10,964},{4,0,654},{136,11,212},{4,0,156},{7,0,998},{7,0,1045},{7,0,1860},{9,0,48
+},{9,0,692},{11,0,419},{139,0,602},{133,11,221},{4,11,373},{5,11,283},{6,11,480}
+,{135,11,609},{142,11,216},{132,0,240},{6,11,192},{9,11,793},{145,11,55},{4,10,
+75},{5,10,180},{6,10,500},{7,10,58},{7,10,710},{138,10,645},{4,11,132},{5,11,69}
+,{5,10,649},{135,11,1242},{6,10,276},{7,10,282},{7,10,879},{7,10,924},{8,10,459}
+,{9,10,599},{9,10,754},{11,10,574},{12,10,128},{12,10,494},{13,10,52},{13,10,301
+},{15,10,30},{143,10,132},{132,10,200},{4,11,111},{135,11,302},{9,0,197},{10,0,
+300},{12,0,473},{13,0,90},{141,0,405},{132,11,767},{6,11,42},{7,11,1416},{7,11,
+1590},{7,11,2005},{8,11,131},{8,11,466},{9,11,672},{13,11,252},{148,11,103},{8,0
+,958},{8,0,999},{10,0,963},{138,0,1001},{135,10,1621},{135,0,858},{4,0,606},{137
+,11,444},{6,11,44},{136,11,368},{139,11,172},{4,11,570},{133,11,120},{139,11,624
+},{7,0,1978},{8,0,676},{6,10,225},{137,10,211},{7,0,972},{11,0,102},{136,10,687}
+,{6,11,227},{135,11,1589},{8,10,58},{9,10,724},{11,10,809},{13,10,113},{145,10,
+72},{4,0,361},{133,0,315},{132,0,461},{6,10,345},{135,10,1247},{132,0,472},{8,10
+,767},{8,10,803},{9,10,301},{137,10,903},{135,11,1333},{135,11,477},{7,10,1949},
+{136,10,674},{6,0,905},{138,0,747},{133,0,155},{134,10,259},{7,0,163},{8,0,319},
+{9,0,402},{10,0,24},{10,0,681},{11,0,200},{12,0,253},{12,0,410},{142,0,219},{5,0
+,475},{7,0,1780},{9,0,230},{11,0,297},{11,0,558},{14,0,322},{19,0,76},{6,11,1667
+},{7,11,2036},{138,11,600},{136,10,254},{6,0,848},{135,0,1956},{6,11,511},{140,
+11,132},{5,11,568},{6,11,138},{135,11,1293},{6,0,631},{137,0,838},{149,0,36},{4,
+11,565},{8,11,23},{136,11,827},{5,0,944},{134,0,1769},{4,0,144},{6,0,842},{6,0,
+1400},{4,11,922},{133,11,1023},{133,10,248},{9,10,800},{10,10,693},{11,10,482},{
+11,10,734},{139,10,789},{7,11,1002},{139,11,145},{4,10,116},{5,10,95},{5,10,445}
+,{7,10,1688},{8,10,29},{9,10,272},{11,10,509},{139,10,915},{14,0,369},{146,0,72}
+,{135,10,1641},{132,11,740},{133,10,543},{140,11,116},{6,0,247},{9,0,555},{5,10,
+181},{136,10,41},{133,10,657},{136,0,996},{138,10,709},{7,0,189},{8,10,202},{138
+,10,536},{136,11,402},{4,11,716},{141,11,31},{10,0,280},{138,0,797},{9,10,423},{
+140,10,89},{8,10,113},{9,10,877},{10,10,554},{11,10,83},{12,10,136},{147,10,109}
+,{133,10,976},{7,0,746},{132,10,206},{136,0,526},{139,0,345},{136,0,1017},{8,11,
+152},{9,11,53},{9,11,268},{9,11,901},{10,11,518},{10,11,829},{11,11,188},{13,11,
+74},{14,11,46},{15,11,17},{15,11,33},{17,11,40},{18,11,36},{19,11,20},{22,11,1},
+{152,11,2},{133,11,736},{136,11,532},{5,0,428},{138,0,651},{135,11,681},{135,0,
+1162},{7,0,327},{13,0,230},{17,0,113},{8,10,226},{10,10,537},{11,10,570},{11,10,
+605},{11,10,799},{11,10,804},{12,10,85},{12,10,516},{12,10,623},{12,11,677},{13,
+10,361},{14,10,77},{14,10,78},{147,10,110},{4,0,792},{7,0,1717},{10,0,546},{132,
+10,769},{4,11,684},{136,11,384},{132,10,551},{134,0,1203},{9,10,57},{9,10,459},{
+10,10,425},{11,10,119},{12,10,184},{12,10,371},{13,10,358},{145,10,51},{5,0,672}
+,{5,10,814},{8,10,10},{9,10,421},{9,10,729},{10,10,609},{139,10,689},{138,0,189}
+,{134,10,624},{7,11,110},{7,11,188},{8,11,290},{8,11,591},{9,11,382},{9,11,649},
+{11,11,71},{11,11,155},{11,11,313},{12,11,5},{13,11,325},{142,11,287},{133,0,99}
+,{6,0,1053},{135,0,298},{7,11,360},{7,11,425},{9,11,66},{9,11,278},{138,11,644},
+{4,0,397},{136,0,555},{137,10,269},{132,10,528},{4,11,900},{133,11,861},{6,0,
+1157},{5,11,254},{7,11,985},{136,11,73},{7,11,1959},{136,11,683},{12,0,398},{20,
+0,39},{21,0,11},{150,0,41},{4,0,485},{7,0,353},{135,0,1523},{6,0,366},{7,0,1384}
+,{135,0,1601},{138,0,787},{137,0,282},{5,10,104},{6,10,173},{135,10,1631},{139,
+11,146},{4,0,157},{133,0,471},{134,0,941},{132,11,725},{7,0,1336},{8,10,138},{8,
+10,342},{9,10,84},{10,10,193},{11,10,883},{140,10,359},{134,11,196},{136,0,116},
+{133,11,831},{134,0,787},{134,10,95},{6,10,406},{10,10,409},{10,10,447},{11,10,
+44},{140,10,100},{5,0,160},{7,0,363},{7,0,589},{10,0,170},{141,0,55},{134,0,1815
+},{132,0,866},{6,0,889},{6,0,1067},{6,0,1183},{4,11,321},{134,11,569},{5,11,848}
+,{134,11,66},{4,11,36},{6,10,1636},{7,11,1387},{10,11,205},{11,11,755},{141,11,
+271},{132,0,689},{9,0,820},{4,10,282},{7,10,1034},{11,10,398},{11,10,634},{12,10
+,1},{12,10,79},{12,10,544},{14,10,237},{17,10,10},{146,10,20},{4,0,108},{7,0,804
+},{139,0,498},{132,11,887},{6,0,1119},{135,11,620},{6,11,165},{138,11,388},{5,0,
+244},{5,10,499},{6,10,476},{7,10,600},{7,10,888},{135,10,1096},{140,0,609},{135,
+0,1005},{4,0,412},{133,0,581},{4,11,719},{135,11,155},{7,10,296},{7,10,596},{8,
+10,560},{8,10,586},{9,10,612},{11,10,304},{12,10,46},{13,10,89},{14,10,112},{145
+,10,122},{4,0,895},{133,0,772},{142,11,307},{135,0,1898},{4,0,926},{133,0,983},{
+4,11,353},{6,11,146},{6,11,1789},{7,11,288},{7,11,990},{7,11,1348},{9,11,665},{9
+,11,898},{11,11,893},{142,11,212},{132,0,538},{133,11,532},{6,0,294},{7,0,1267},
+{8,0,624},{141,0,496},{7,0,1325},{4,11,45},{135,11,1257},{138,0,301},{9,0,298},{
+12,0,291},{13,0,276},{14,0,6},{17,0,18},{21,0,32},{7,10,1599},{7,10,1723},{8,10,
+79},{8,10,106},{8,10,190},{8,10,302},{8,10,383},{8,10,713},{9,10,119},{9,10,233}
+,{9,10,419},{9,10,471},{10,10,181},{10,10,406},{11,10,57},{11,10,85},{11,10,120}
+,{11,10,177},{11,10,296},{11,10,382},{11,10,454},{11,10,758},{11,10,999},{12,10,
+27},{12,10,131},{12,10,245},{12,10,312},{12,10,446},{12,10,454},{13,10,98},{13,
+10,426},{13,10,508},{14,10,163},{14,10,272},{14,10,277},{14,10,370},{15,10,95},{
+15,10,138},{15,10,167},{17,10,38},{148,10,96},{132,0,757},{134,0,1263},{4,0,820}
+,{134,10,1759},{133,0,722},{136,11,816},{138,10,372},{145,10,16},{134,0,1039},{4
+,0,991},{134,0,2028},{133,10,258},{7,0,1875},{139,0,124},{6,11,559},{6,11,1691},
+{135,11,586},{5,0,324},{7,0,881},{8,10,134},{9,10,788},{140,10,438},{7,11,1823},
+{139,11,693},{6,0,1348},{134,0,1545},{134,0,911},{132,0,954},{8,0,329},{8,0,414}
+,{7,10,1948},{135,10,2004},{5,0,517},{6,10,439},{7,10,780},{135,10,1040},{132,0,
+816},{5,10,1},{6,10,81},{138,10,520},{9,0,713},{10,0,222},{5,10,482},{8,10,98},{
+10,10,700},{10,10,822},{11,10,302},{11,10,778},{12,10,50},{12,10,127},{12,10,396
+},{13,10,62},{13,10,328},{14,10,122},{147,10,72},{137,0,33},{5,10,2},{7,10,1494}
+,{136,10,589},{6,10,512},{7,10,797},{8,10,253},{9,10,77},{10,10,1},{10,11,108},{
+10,10,129},{10,10,225},{11,11,116},{11,10,118},{11,10,226},{11,10,251},{11,10,
+430},{11,10,701},{11,10,974},{11,10,982},{12,10,64},{12,10,260},{12,10,488},{140
+,10,690},{134,11,456},{133,11,925},{5,0,150},{7,0,106},{7,0,774},{8,0,603},{9,0,
+593},{9,0,634},{10,0,44},{10,0,173},{11,0,462},{11,0,515},{13,0,216},{13,0,288},
+{142,0,400},{137,10,347},{5,0,748},{134,0,553},{12,0,108},{141,0,291},{7,0,420},
+{4,10,12},{7,10,522},{7,10,809},{8,10,797},{141,10,88},{6,11,193},{7,11,240},{7,
+11,1682},{10,11,51},{10,11,640},{11,11,410},{13,11,82},{14,11,247},{14,11,331},{
+142,11,377},{133,10,528},{135,0,1777},{4,0,493},{144,0,55},{136,11,633},{139,0,
+81},{6,0,980},{136,0,321},{148,10,109},{5,10,266},{9,10,290},{9,10,364},{10,10,
+293},{11,10,606},{142,10,45},{6,0,568},{7,0,112},{7,0,1804},{8,0,362},{8,0,410},
+{8,0,830},{9,0,514},{11,0,649},{142,0,157},{4,0,74},{6,0,510},{6,10,594},{9,10,
+121},{10,10,49},{10,10,412},{139,10,834},{134,0,838},{136,10,748},{132,10,466},{
+132,0,625},{135,11,1443},{4,11,237},{135,11,514},{9,10,378},{141,10,162},{6,0,16
+},{6,0,158},{7,0,43},{7,0,129},{7,0,181},{8,0,276},{8,0,377},{10,0,523},{11,0,
+816},{12,0,455},{13,0,303},{142,0,135},{135,0,281},{4,0,1},{7,0,1143},{7,0,1463}
+,{8,0,61},{9,0,207},{9,0,390},{9,0,467},{139,0,836},{6,11,392},{7,11,65},{135,11
+,2019},{132,10,667},{4,0,723},{5,0,895},{7,0,1031},{8,0,199},{8,0,340},{9,0,153}
+,{9,0,215},{10,0,21},{10,0,59},{10,0,80},{10,0,224},{10,0,838},{11,0,229},{11,0,
+652},{12,0,192},{13,0,146},{142,0,91},{132,0,295},{137,0,51},{9,11,222},{10,11,
+43},{139,11,900},{5,0,309},{140,0,211},{5,0,125},{8,0,77},{138,0,15},{136,11,604
+},{138,0,789},{5,0,173},{4,10,39},{7,10,1843},{8,10,407},{11,10,144},{140,10,523
+},{138,11,265},{133,0,439},{132,10,510},{7,0,648},{7,0,874},{11,0,164},{12,0,76}
+,{18,0,9},{7,10,1980},{10,10,487},{138,10,809},{12,0,111},{14,0,294},{19,0,45},{
+13,10,260},{146,10,63},{133,11,549},{134,10,570},{4,0,8},{7,0,1152},{7,0,1153},{
+7,0,1715},{9,0,374},{10,0,478},{139,0,648},{135,0,1099},{5,0,575},{6,0,354},{135
+,0,701},{7,11,36},{8,11,201},{136,11,605},{4,10,787},{136,11,156},{6,0,518},{149
+,11,13},{140,11,224},{134,0,702},{132,10,516},{5,11,724},{10,11,305},{11,11,151}
+,{12,11,33},{12,11,121},{12,11,381},{17,11,3},{17,11,27},{17,11,78},{18,11,18},{
+19,11,54},{149,11,5},{8,0,87},{4,11,523},{5,11,638},{11,10,887},{14,10,365},{142
+,10,375},{138,0,438},{136,10,821},{135,11,1908},{6,11,242},{7,11,227},{7,11,1581
+},{8,11,104},{9,11,113},{9,11,220},{9,11,427},{10,11,74},{10,11,239},{11,11,579}
+,{11,11,1023},{13,11,4},{13,11,204},{13,11,316},{18,11,95},{148,11,86},{4,0,69},
+{5,0,122},{5,0,849},{6,0,1633},{9,0,656},{138,0,464},{7,0,1802},{4,10,10},{139,
+10,786},{135,11,861},{139,0,499},{7,0,476},{7,0,1592},{138,0,87},{133,10,684},{4
+,0,840},{134,10,27},{142,0,283},{6,0,1620},{7,11,1328},{136,11,494},{5,0,859},{7
+,0,1160},{8,0,107},{9,0,291},{9,0,439},{10,0,663},{11,0,609},{140,0,197},{7,11,
+1306},{8,11,505},{9,11,482},{10,11,126},{11,11,225},{12,11,347},{12,11,449},{13,
+11,19},{142,11,218},{5,11,268},{10,11,764},{12,11,120},{13,11,39},{145,11,127},{
+145,10,56},{7,11,1672},{10,11,472},{11,11,189},{143,11,51},{6,10,342},{6,10,496}
+,{8,10,275},{137,10,206},{133,0,600},{4,0,117},{6,0,372},{7,0,1905},{142,0,323},
+{4,10,909},{5,10,940},{135,11,1471},{132,10,891},{4,0,722},{139,0,471},{4,11,384
+},{135,11,1022},{132,10,687},{9,0,5},{12,0,216},{12,0,294},{12,0,298},{12,0,400}
+,{12,0,518},{13,0,229},{143,0,139},{135,11,1703},{7,11,1602},{10,11,698},{12,11,
+212},{141,11,307},{6,10,41},{141,10,160},{135,11,1077},{9,11,159},{11,11,28},{
+140,11,603},{4,0,514},{7,0,1304},{138,0,477},{134,0,1774},{9,0,88},{139,0,270},{
+5,0,12},{7,0,375},{9,0,438},{134,10,1718},{132,11,515},{136,10,778},{8,11,632},{
+8,11,697},{137,11,854},{6,0,362},{6,0,997},{146,0,51},{7,0,816},{7,0,1241},{9,0,
+283},{9,0,520},{10,0,213},{10,0,307},{10,0,463},{10,0,671},{10,0,746},{11,0,401}
+,{11,0,794},{12,0,517},{18,0,107},{147,0,115},{133,10,115},{150,11,28},{4,11,136
+},{133,11,551},{142,10,314},{132,0,258},{6,0,22},{7,0,903},{7,0,1963},{8,0,639},
+{138,0,577},{5,0,681},{8,0,782},{13,0,130},{17,0,84},{5,10,193},{140,10,178},{9,
+11,17},{138,11,291},{7,11,1287},{9,11,44},{10,11,552},{10,11,642},{11,11,839},{
+12,11,274},{12,11,275},{12,11,372},{13,11,91},{142,11,125},{135,10,174},{4,0,664
+},{5,0,804},{139,0,1013},{134,0,942},{6,0,1349},{6,0,1353},{6,0,1450},{7,11,1518
+},{139,11,694},{11,0,356},{4,10,122},{5,10,796},{5,10,952},{6,10,1660},{6,10,
+1671},{8,10,567},{9,10,687},{9,10,742},{10,10,686},{11,10,682},{140,10,281},{5,0
+,32},{6,11,147},{7,11,886},{9,11,753},{138,11,268},{5,10,179},{7,10,1095},{135,
+10,1213},{4,10,66},{7,10,722},{135,10,904},{135,10,352},{9,11,245},{138,11,137},
+{4,0,289},{7,0,629},{7,0,1698},{7,0,1711},{12,0,215},{133,11,414},{6,0,1975},{
+135,11,1762},{6,0,450},{136,0,109},{141,10,35},{134,11,599},{136,0,705},{133,0,
+664},{134,11,1749},{11,11,402},{12,11,109},{12,11,431},{13,11,179},{13,11,206},{
+14,11,175},{14,11,217},{16,11,3},{148,11,53},{135,0,1238},{134,11,1627},{132,11,
+488},{13,0,318},{10,10,592},{10,10,753},{12,10,317},{12,10,355},{12,10,465},{12,
+10,469},{12,10,560},{140,10,578},{133,10,564},{132,11,83},{140,11,676},{6,0,1872
+},{6,0,1906},{6,0,1907},{9,0,934},{9,0,956},{9,0,960},{9,0,996},{12,0,794},{12,0
+,876},{12,0,880},{12,0,918},{15,0,230},{18,0,234},{18,0,238},{21,0,38},{149,0,62
+},{134,10,556},{134,11,278},{137,0,103},{7,10,544},{8,10,719},{138,10,61},{4,10,
+5},{5,10,498},{8,10,637},{137,10,521},{7,0,777},{12,0,229},{12,0,239},{15,0,12},
+{12,11,229},{12,11,239},{143,11,12},{6,0,26},{7,11,388},{7,11,644},{139,11,781},
+{7,11,229},{8,11,59},{9,11,190},{9,11,257},{10,11,378},{140,11,191},{133,10,927}
+,{135,10,1441},{4,10,893},{5,10,780},{133,10,893},{4,0,414},{5,0,467},{9,0,654},
+{10,0,451},{12,0,59},{141,0,375},{142,0,173},{135,0,17},{7,0,1350},{133,10,238},
+{135,0,955},{4,0,960},{10,0,887},{12,0,753},{18,0,161},{18,0,162},{152,0,19},{
+136,11,344},{6,10,1729},{137,11,288},{132,11,660},{4,0,217},{5,0,710},{7,0,760},
+{7,0,1926},{9,0,428},{9,0,708},{10,0,254},{10,0,296},{10,0,720},{11,0,109},{11,0
+,255},{12,0,165},{12,0,315},{13,0,107},{13,0,203},{14,0,54},{14,0,99},{14,0,114}
+,{14,0,388},{16,0,85},{17,0,9},{17,0,33},{20,0,25},{20,0,28},{20,0,29},{21,0,9},
+{21,0,10},{21,0,34},{22,0,17},{4,10,60},{7,10,1800},{8,10,314},{9,10,700},{139,
+10,487},{7,11,1035},{138,11,737},{7,11,690},{9,11,217},{9,11,587},{140,11,521},{
+6,0,919},{7,11,706},{7,11,1058},{138,11,538},{7,10,1853},{138,10,437},{136,10,
+419},{6,0,280},{10,0,502},{11,0,344},{140,0,38},{5,0,45},{7,0,1161},{11,0,448},{
+11,0,880},{13,0,139},{13,0,407},{15,0,16},{17,0,95},{18,0,66},{18,0,88},{18,0,
+123},{149,0,7},{11,11,92},{11,11,196},{11,11,409},{11,11,450},{11,11,666},{11,11
+,777},{12,11,262},{13,11,385},{13,11,393},{15,11,115},{16,11,45},{145,11,82},{
+136,0,777},{134,11,1744},{4,0,410},{7,0,521},{133,10,828},{134,0,673},{7,0,1110}
+,{7,0,1778},{7,10,176},{135,10,178},{5,10,806},{7,11,268},{7,10,1976},{136,11,
+569},{4,11,733},{9,11,194},{10,11,92},{11,11,198},{12,11,84},{12,11,87},{13,11,
+128},{144,11,74},{5,0,341},{7,0,1129},{11,0,414},{4,10,51},{6,10,4},{7,10,591},{
+7,10,849},{7,10,951},{7,10,1613},{7,10,1760},{7,10,1988},{9,10,434},{10,10,754},
+{11,10,25},{139,10,37},{133,10,902},{135,10,928},{135,0,787},{132,0,436},{134,10
+,270},{7,0,1587},{135,0,1707},{6,0,377},{7,0,1025},{9,0,613},{145,0,104},{7,11,
+982},{7,11,1361},{10,11,32},{143,11,56},{139,0,96},{132,0,451},{132,10,416},{142
+,10,372},{5,10,152},{5,10,197},{7,11,306},{7,10,340},{7,10,867},{10,10,548},{10,
+10,581},{11,10,6},{12,10,3},{12,10,19},{14,10,110},{142,10,289},{134,0,680},{134
+,11,609},{7,0,483},{7,10,190},{8,10,28},{8,10,141},{8,10,444},{8,10,811},{9,10,
+468},{11,10,334},{12,10,24},{12,10,386},{140,10,576},{10,0,916},{133,10,757},{5,
+10,721},{135,10,1553},{133,11,178},{134,0,937},{132,10,898},{133,0,739},{147,0,
+82},{135,0,663},{146,0,128},{5,10,277},{141,10,247},{134,0,1087},{132,10,435},{6
+,11,381},{7,11,645},{7,11,694},{136,11,546},{7,0,503},{135,0,1885},{6,0,1965},{8
+,0,925},{138,0,955},{4,0,113},{5,0,163},{5,0,735},{7,0,1009},{9,0,9},{9,0,771},{
+12,0,90},{13,0,138},{13,0,410},{143,0,128},{4,0,324},{138,0,104},{7,0,460},{5,10
+,265},{134,10,212},{133,11,105},{7,11,261},{7,11,1107},{7,11,1115},{7,11,1354},{
+7,11,1588},{7,11,1705},{7,11,1902},{9,11,465},{10,11,248},{10,11,349},{10,11,647
+},{11,11,527},{11,11,660},{11,11,669},{12,11,529},{141,11,305},{5,11,438},{9,11,
+694},{12,11,627},{141,11,210},{152,11,11},{4,0,935},{133,0,823},{132,10,702},{5,
+0,269},{7,0,434},{7,0,891},{8,0,339},{9,0,702},{11,0,594},{11,0,718},{17,0,100},
+{5,10,808},{135,10,2045},{7,0,1014},{9,0,485},{141,0,264},{134,0,1713},{7,0,1810
+},{11,0,866},{12,0,103},{13,0,495},{140,11,233},{4,0,423},{10,0,949},{138,0,1013
+},{135,0,900},{8,11,25},{138,11,826},{5,10,166},{8,10,739},{140,10,511},{134,0,
+2018},{7,11,1270},{139,11,612},{4,10,119},{5,10,170},{5,10,447},{7,10,1708},{7,
+10,1889},{9,10,357},{9,10,719},{12,10,486},{140,10,596},{12,0,574},{140,11,574},
+{132,11,308},{6,0,964},{6,0,1206},{134,0,1302},{4,10,450},{135,10,1158},{135,11,
+150},{136,11,649},{14,0,213},{148,0,38},{9,11,45},{9,11,311},{141,11,42},{134,11
+,521},{7,10,1375},{7,10,1466},{138,10,331},{132,10,754},{5,11,339},{7,11,1442},{
+14,11,3},{15,11,41},{147,11,66},{136,11,378},{134,0,1022},{5,10,850},{136,10,799
+},{142,0,143},{135,0,2029},{134,11,1628},{8,0,523},{150,0,34},{5,0,625},{135,0,
+1617},{7,0,275},{7,10,238},{7,10,2033},{8,10,120},{8,10,188},{8,10,659},{9,10,
+598},{10,10,466},{12,10,342},{12,10,588},{13,10,503},{14,10,246},{143,10,92},{7,
+0,37},{8,0,425},{8,0,693},{9,0,720},{10,0,380},{10,0,638},{11,0,273},{11,0,473},
+{12,0,61},{143,0,43},{135,11,829},{135,0,1943},{132,0,765},{5,11,486},{135,11,
+1349},{7,11,1635},{8,11,17},{10,11,217},{138,11,295},{4,10,201},{7,10,1744},{8,
+10,602},{11,10,247},{11,10,826},{145,10,65},{138,11,558},{11,0,551},{142,0,159},
+{8,10,164},{146,10,62},{139,11,176},{132,0,168},{136,0,1010},{134,0,1994},{135,0
+,91},{138,0,532},{135,10,1243},{135,0,1884},{132,10,907},{5,10,100},{10,10,329},
+{12,10,416},{149,10,29},{134,11,447},{132,10,176},{5,10,636},{5,10,998},{7,10,9}
+,{7,10,1508},{8,10,26},{9,10,317},{9,10,358},{10,10,210},{10,10,292},{10,10,533}
+,{11,10,555},{12,10,526},{12,10,607},{13,10,263},{13,10,459},{142,10,271},{4,11,
+609},{135,11,756},{6,0,15},{7,0,70},{10,0,240},{147,0,93},{4,11,930},{133,11,947
+},{134,0,1227},{134,0,1534},{133,11,939},{133,11,962},{5,11,651},{8,11,170},{9,
+11,61},{9,11,63},{10,11,23},{10,11,37},{10,11,834},{11,11,4},{11,11,187},{11,11,
+281},{11,11,503},{11,11,677},{12,11,96},{12,11,130},{12,11,244},{14,11,5},{14,11
+,40},{14,11,162},{14,11,202},{146,11,133},{4,11,406},{5,11,579},{12,11,492},{150
+,11,15},{139,0,392},{6,10,610},{10,10,127},{141,10,27},{7,0,655},{7,0,1844},{136
+,10,119},{4,0,145},{6,0,176},{7,0,395},{137,0,562},{132,0,501},{140,11,145},{136
+,0,1019},{134,0,509},{139,0,267},{6,11,17},{7,11,16},{7,11,1001},{7,11,1982},{9,
+11,886},{10,11,489},{10,11,800},{11,11,782},{12,11,320},{13,11,467},{14,11,145},
+{14,11,387},{143,11,119},{145,11,17},{6,0,1099},{133,11,458},{7,11,1983},{8,11,0
+},{8,11,171},{9,11,120},{9,11,732},{10,11,473},{11,11,656},{11,11,998},{18,11,0}
+,{18,11,2},{147,11,21},{12,11,427},{146,11,38},{10,0,948},{138,0,968},{7,10,126}
+,{136,10,84},{136,10,790},{4,0,114},{9,0,492},{13,0,462},{142,0,215},{6,10,64},{
+12,10,377},{141,10,309},{4,0,77},{5,0,361},{6,0,139},{6,0,401},{6,0,404},{7,0,
+413},{7,0,715},{7,0,1716},{11,0,279},{12,0,179},{12,0,258},{13,0,244},{142,0,358
+},{134,0,1717},{7,0,772},{7,0,1061},{7,0,1647},{8,0,82},{11,0,250},{11,0,607},{
+12,0,311},{12,0,420},{13,0,184},{13,0,367},{7,10,1104},{11,10,269},{11,10,539},{
+11,10,627},{11,10,706},{11,10,975},{12,10,248},{12,10,434},{12,10,600},{12,10,
+622},{13,10,297},{13,10,485},{14,10,69},{14,10,409},{143,10,108},{135,0,724},{4,
+11,512},{4,11,519},{133,11,342},{134,0,1133},{145,11,29},{11,10,977},{141,10,507
+},{6,0,841},{6,0,1042},{6,0,1194},{10,0,993},{140,0,1021},{6,11,31},{7,11,491},{
+7,11,530},{8,11,592},{9,10,34},{11,11,53},{11,10,484},{11,11,779},{12,11,167},{
+12,11,411},{14,11,14},{14,11,136},{15,11,72},{16,11,17},{144,11,72},{4,0,1021},{
+6,0,2037},{133,11,907},{7,0,373},{8,0,335},{8,0,596},{9,0,488},{6,10,1700},{7,10
+,293},{7,10,382},{7,10,1026},{7,10,1087},{7,10,2027},{8,10,252},{8,10,727},{8,10
+,729},{9,10,30},{9,10,199},{9,10,231},{9,10,251},{9,10,334},{9,10,361},{9,10,712
+},{10,10,55},{10,10,60},{10,10,232},{10,10,332},{10,10,384},{10,10,396},{10,10,
+504},{10,10,542},{10,10,652},{11,10,20},{11,10,48},{11,10,207},{11,10,291},{11,
+10,298},{11,10,342},{11,10,365},{11,10,394},{11,10,620},{11,10,705},{11,10,1017}
+,{12,10,123},{12,10,340},{12,10,406},{12,10,643},{13,10,61},{13,10,269},{13,10,
+311},{13,10,319},{13,10,486},{14,10,234},{15,10,62},{15,10,85},{16,10,71},{18,10
+,119},{148,10,105},{150,0,37},{4,11,208},{5,11,106},{6,11,531},{8,11,408},{9,11,
+188},{138,11,572},{132,0,564},{6,0,513},{135,0,1052},{132,0,825},{9,0,899},{140,
+11,441},{134,0,778},{133,11,379},{7,0,1417},{12,0,382},{17,0,48},{152,0,12},{132
+,11,241},{7,0,1116},{6,10,379},{7,10,270},{8,10,176},{8,10,183},{9,10,432},{9,10
+,661},{12,10,247},{12,10,617},{146,10,125},{5,10,792},{133,10,900},{6,0,545},{7,
+0,565},{7,0,1669},{10,0,114},{11,0,642},{140,0,618},{133,0,5},{138,11,7},{132,11
+,259},{135,0,192},{134,0,701},{136,0,763},{135,10,1979},{4,10,901},{133,10,776},
+{10,0,755},{147,0,29},{133,0,759},{4,11,173},{5,11,312},{5,11,512},{135,11,1285}
+,{7,11,1603},{7,11,1691},{9,11,464},{11,11,195},{12,11,279},{12,11,448},{14,11,
+11},{147,11,102},{7,0,370},{7,0,1007},{7,0,1177},{135,0,1565},{135,0,1237},{4,0,
+87},{5,0,250},{141,0,298},{4,11,452},{5,11,583},{5,11,817},{6,11,433},{7,11,593}
+,{7,11,720},{7,11,1378},{8,11,161},{9,11,284},{10,11,313},{139,11,886},{4,11,547
+},{135,11,1409},{136,11,722},{4,10,37},{5,10,334},{135,10,1253},{132,10,508},{12
+,0,107},{146,0,31},{8,11,420},{139,11,193},{135,0,814},{135,11,409},{140,0,991},
+{4,0,57},{7,0,1195},{7,0,1438},{7,0,1548},{7,0,1835},{7,0,1904},{9,0,757},{10,0,
+604},{139,0,519},{132,0,540},{138,11,308},{132,10,533},{136,0,608},{144,11,65},{
+4,0,1014},{134,0,2029},{4,0,209},{7,0,902},{5,11,1002},{136,11,745},{134,0,2030}
+,{6,0,303},{7,0,335},{7,0,1437},{7,0,1668},{8,0,553},{8,0,652},{8,0,656},{9,0,
+558},{11,0,743},{149,0,18},{5,11,575},{6,11,354},{135,11,701},{4,11,239},{6,11,
+477},{7,11,1607},{11,11,68},{139,11,617},{132,0,559},{8,0,527},{18,0,60},{147,0,
+24},{133,10,920},{138,0,511},{133,0,1017},{133,0,675},{138,10,391},{11,0,156},{
+135,10,1952},{138,11,369},{132,11,367},{133,0,709},{6,0,698},{134,0,887},{142,10
+,126},{134,0,1745},{132,10,483},{13,11,299},{142,11,75},{133,0,714},{7,0,8},{136
+,0,206},{138,10,480},{4,11,694},{9,10,495},{146,10,104},{7,11,1248},{11,11,621},
+{139,11,702},{140,11,687},{132,0,776},{139,10,1009},{135,0,1272},{134,0,1059},{8
+,10,653},{13,10,93},{147,10,14},{135,11,213},{136,0,406},{133,10,172},{132,0,947
+},{8,0,175},{10,0,168},{138,0,573},{132,0,870},{6,0,1567},{151,11,28},{134,11,
+472},{5,10,260},{136,11,132},{4,11,751},{11,11,390},{140,11,32},{4,11,409},{133,
+11,78},{12,0,554},{6,11,473},{145,11,105},{133,0,784},{8,0,908},{136,11,306},{
+139,0,882},{6,0,358},{7,0,1393},{8,0,396},{10,0,263},{14,0,154},{16,0,48},{17,0,
+8},{7,11,1759},{8,11,396},{10,11,263},{14,11,154},{16,11,48},{145,11,8},{13,11,
+163},{13,11,180},{18,11,78},{148,11,35},{14,0,32},{18,0,85},{20,0,2},{152,0,16},
+{7,0,228},{10,0,770},{8,10,167},{8,10,375},{9,10,82},{9,10,561},{138,10,620},{
+132,0,845},{9,0,14},{9,0,441},{10,0,306},{139,0,9},{11,0,966},{12,0,287},{13,0,
+342},{13,0,402},{15,0,110},{15,0,163},{8,10,194},{136,10,756},{134,0,1578},{4,0,
+967},{6,0,1820},{6,0,1847},{140,0,716},{136,0,594},{7,0,1428},{7,0,1640},{7,0,
+1867},{9,0,169},{9,0,182},{9,0,367},{9,0,478},{9,0,506},{9,0,551},{9,0,557},{9,0
+,648},{9,0,697},{9,0,705},{9,0,725},{9,0,787},{9,0,794},{10,0,198},{10,0,214},{
+10,0,267},{10,0,275},{10,0,456},{10,0,551},{10,0,561},{10,0,613},{10,0,627},{10,
+0,668},{10,0,675},{10,0,691},{10,0,695},{10,0,707},{10,0,715},{11,0,183},{11,0,
+201},{11,0,244},{11,0,262},{11,0,352},{11,0,439},{11,0,493},{11,0,572},{11,0,591
+},{11,0,608},{11,0,611},{11,0,646},{11,0,674},{11,0,711},{11,0,751},{11,0,761},{
+11,0,776},{11,0,785},{11,0,850},{11,0,853},{11,0,862},{11,0,865},{11,0,868},{11,
+0,875},{11,0,898},{11,0,902},{11,0,903},{11,0,910},{11,0,932},{11,0,942},{11,0,
+957},{11,0,967},{11,0,972},{12,0,148},{12,0,195},{12,0,220},{12,0,237},{12,0,318
+},{12,0,339},{12,0,393},{12,0,445},{12,0,450},{12,0,474},{12,0,505},{12,0,509},{
+12,0,533},{12,0,591},{12,0,594},{12,0,597},{12,0,621},{12,0,633},{12,0,642},{13,
+0,59},{13,0,60},{13,0,145},{13,0,239},{13,0,250},{13,0,329},{13,0,344},{13,0,365
+},{13,0,372},{13,0,387},{13,0,403},{13,0,414},{13,0,456},{13,0,470},{13,0,478},{
+13,0,483},{13,0,489},{14,0,55},{14,0,57},{14,0,81},{14,0,90},{14,0,148},{14,0,
+239},{14,0,266},{14,0,321},{14,0,326},{14,0,327},{14,0,330},{14,0,347},{14,0,355
+},{14,0,401},{14,0,404},{14,0,411},{14,0,414},{14,0,416},{14,0,420},{15,0,61},{
+15,0,74},{15,0,87},{15,0,88},{15,0,94},{15,0,96},{15,0,116},{15,0,149},{15,0,154
+},{16,0,50},{16,0,63},{16,0,73},{17,0,2},{17,0,66},{17,0,92},{17,0,103},{17,0,
+112},{17,0,120},{18,0,50},{18,0,54},{18,0,82},{18,0,86},{18,0,90},{18,0,111},{18
+,0,115},{18,0,156},{19,0,40},{19,0,79},{20,0,78},{21,0,22},{135,11,883},{5,0,161
+},{135,0,839},{4,0,782},{13,11,293},{142,11,56},{133,11,617},{139,11,50},{135,10
+,22},{145,0,64},{5,10,639},{7,10,1249},{139,10,896},{138,0,998},{135,11,2042},{4
+,11,546},{142,11,233},{6,0,1043},{134,0,1574},{134,0,1496},{4,10,102},{7,10,815}
+,{7,10,1699},{139,10,964},{12,0,781},{142,0,461},{4,11,313},{133,11,577},{6,0,
+639},{6,0,1114},{137,0,817},{8,11,184},{141,11,433},{7,0,1814},{135,11,935},{10,
+0,997},{140,0,958},{4,0,812},{137,11,625},{132,10,899},{136,10,795},{5,11,886},{
+6,11,46},{6,11,1790},{7,11,14},{7,11,732},{7,11,1654},{8,11,95},{8,11,327},{8,11
+,616},{10,11,598},{10,11,769},{11,11,134},{11,11,747},{12,11,378},{142,11,97},{
+136,0,139},{6,10,52},{9,10,104},{9,10,559},{12,10,308},{147,10,87},{133,11,1021}
+,{132,10,604},{132,10,301},{136,10,779},{7,0,643},{136,0,236},{132,11,153},{134,
+0,1172},{147,10,32},{133,11,798},{6,0,1338},{132,11,587},{6,11,598},{7,11,42},{8
+,11,695},{10,11,212},{11,11,158},{14,11,196},{145,11,85},{135,10,508},{5,11,957}
+,{5,11,1008},{135,11,249},{4,11,129},{135,11,465},{5,0,54},{7,11,470},{7,11,1057
+},{7,11,1201},{9,11,755},{11,11,906},{140,11,527},{7,11,908},{146,11,7},{5,11,
+148},{136,11,450},{144,11,1},{4,0,256},{135,0,1488},{9,0,351},{6,10,310},{7,10,
+1849},{8,10,72},{8,10,272},{8,10,431},{9,10,12},{10,10,563},{10,10,630},{10,10,
+796},{10,10,810},{11,10,367},{11,10,599},{11,10,686},{140,10,672},{6,0,1885},{6,
+0,1898},{6,0,1899},{140,0,955},{4,0,714},{133,0,469},{6,0,1270},{134,0,1456},{
+132,0,744},{6,0,313},{7,10,537},{8,10,64},{9,10,127},{10,10,496},{12,10,510},{
+141,10,384},{4,11,217},{4,10,244},{5,11,710},{7,10,233},{7,11,1926},{9,11,428},{
+9,11,708},{10,11,254},{10,11,296},{10,11,720},{11,11,109},{11,11,255},{12,11,165
+},{12,11,315},{13,11,107},{13,11,203},{14,11,54},{14,11,99},{14,11,114},{14,11,
+388},{16,11,85},{17,11,9},{17,11,33},{20,11,25},{20,11,28},{20,11,29},{21,11,9},
+{21,11,10},{21,11,34},{150,11,17},{138,0,402},{7,0,969},{146,0,55},{8,0,50},{137
+,0,624},{134,0,1355},{132,0,572},{134,10,1650},{10,10,702},{139,10,245},{10,0,
+847},{142,0,445},{6,0,43},{7,0,38},{8,0,248},{138,0,513},{133,0,369},{137,10,338
+},{133,0,766},{133,0,363},{133,10,896},{8,11,392},{11,11,54},{13,11,173},{13,11,
+294},{148,11,7},{134,0,678},{7,11,1230},{136,11,531},{6,0,258},{140,0,409},{5,0,
+249},{148,0,82},{7,10,1117},{136,10,539},{5,0,393},{6,0,378},{7,0,1981},{9,0,32}
+,{9,0,591},{10,0,685},{10,0,741},{142,0,382},{133,0,788},{134,0,1281},{134,0,
+1295},{7,0,1968},{141,0,509},{4,0,61},{5,0,58},{5,0,171},{5,0,683},{6,0,291},{6,
+0,566},{7,0,1650},{11,0,523},{12,0,273},{12,0,303},{15,0,39},{143,0,111},{6,0,
+706},{134,0,1283},{134,0,589},{135,11,1433},{133,11,435},{7,0,1059},{13,0,54},{5
+,10,4},{5,10,810},{6,10,13},{6,10,538},{6,10,1690},{6,10,1726},{7,10,1819},{8,10
+,148},{8,10,696},{8,10,791},{12,10,125},{143,10,9},{135,10,1268},{5,11,85},{6,11
+,419},{7,11,134},{7,11,305},{7,11,361},{7,11,1337},{8,11,71},{140,11,519},{137,0
+,824},{140,11,688},{5,11,691},{7,11,345},{7,10,1385},{9,11,94},{11,10,582},{11,
+10,650},{11,10,901},{11,10,949},{12,11,169},{12,10,232},{12,10,236},{13,10,413},
+{13,10,501},{146,10,116},{4,0,917},{133,0,1005},{7,0,1598},{5,11,183},{6,11,582}
+,{9,11,344},{10,11,679},{140,11,435},{4,10,925},{5,10,803},{8,10,698},{138,10,
+828},{132,0,919},{135,11,511},{139,10,992},{4,0,255},{5,0,302},{6,0,132},{7,0,
+128},{7,0,283},{7,0,1299},{10,0,52},{10,0,514},{11,0,925},{13,0,92},{142,0,309},
+{134,0,1369},{135,10,1847},{134,0,328},{7,11,1993},{136,11,684},{133,10,383},{
+137,0,173},{134,11,583},{134,0,1411},{19,0,65},{5,11,704},{8,11,357},{10,11,745}
+,{14,11,426},{17,11,94},{147,11,57},{9,10,660},{138,10,347},{4,11,179},{5,11,198
+},{133,11,697},{7,11,347},{7,11,971},{8,11,181},{138,11,711},{141,0,442},{11,0,
+842},{11,0,924},{13,0,317},{13,0,370},{13,0,469},{13,0,471},{14,0,397},{18,0,69}
+,{18,0,145},{7,10,572},{9,10,592},{11,10,680},{12,10,356},{140,10,550},{14,11,19
+},{14,11,28},{144,11,29},{136,0,534},{4,11,243},{5,11,203},{7,11,19},{7,11,71},{
+7,11,113},{10,11,405},{11,11,357},{142,11,240},{6,0,210},{10,0,845},{138,0,862},
+{7,11,1351},{9,11,581},{10,11,639},{11,11,453},{140,11,584},{7,11,1450},{139,11,
+99},{10,0,892},{12,0,719},{144,0,105},{4,0,284},{6,0,223},{134,11,492},{5,11,134
+},{6,11,408},{6,11,495},{135,11,1593},{136,0,529},{137,0,807},{4,0,218},{7,0,526
+},{143,0,137},{6,0,1444},{142,11,4},{132,11,665},{4,0,270},{5,0,192},{6,0,332},{
+7,0,1322},{4,11,248},{7,11,137},{137,11,349},{140,0,661},{7,0,1517},{11,0,597},{
+14,0,76},{14,0,335},{20,0,33},{7,10,748},{139,10,700},{5,11,371},{135,11,563},{
+146,11,57},{133,10,127},{133,0,418},{4,11,374},{7,11,547},{7,11,1700},{7,11,1833
+},{139,11,858},{6,10,198},{140,10,83},{7,11,1812},{13,11,259},{13,11,356},{14,11
+,242},{147,11,114},{7,0,379},{8,0,481},{9,0,377},{5,10,276},{6,10,55},{135,10,
+1369},{138,11,286},{5,0,1003},{6,0,149},{6,10,1752},{136,10,726},{8,0,262},{9,0,
+627},{10,0,18},{11,0,214},{11,0,404},{11,0,457},{11,0,780},{11,0,913},{13,0,401}
+,{14,0,200},{6,11,1647},{7,11,1552},{7,11,2010},{9,11,494},{137,11,509},{135,0,
+742},{136,0,304},{132,0,142},{133,10,764},{6,10,309},{7,10,331},{138,10,550},{
+135,10,1062},{6,11,123},{7,11,214},{7,10,986},{9,11,728},{10,11,157},{11,11,346}
+,{11,11,662},{143,11,106},{135,10,1573},{7,0,925},{137,0,799},{4,0,471},{5,0,51}
+,{6,0,602},{8,0,484},{138,0,195},{136,0,688},{132,0,697},{6,0,1169},{6,0,1241},{
+6,10,194},{7,10,133},{10,10,493},{10,10,570},{139,10,664},{140,0,751},{7,0,929},
+{10,0,452},{11,0,878},{16,0,33},{5,10,24},{5,10,569},{6,10,3},{6,10,119},{6,10,
+143},{6,10,440},{7,10,599},{7,10,1686},{7,10,1854},{8,10,424},{9,10,43},{9,10,
+584},{9,10,760},{10,10,328},{11,10,159},{11,10,253},{12,10,487},{140,10,531},{4,
+11,707},{13,11,106},{18,11,49},{147,11,41},{5,0,221},{5,11,588},{134,11,393},{
+134,0,1437},{6,11,211},{7,11,1690},{11,11,486},{140,11,369},{5,10,14},{5,10,892}
+,{6,10,283},{7,10,234},{136,10,537},{4,0,988},{136,0,955},{135,0,1251},{4,10,126
+},{8,10,635},{147,10,34},{4,10,316},{135,10,1561},{137,10,861},{4,10,64},{5,10,
+352},{5,10,720},{6,10,368},{139,10,359},{134,0,192},{4,0,132},{5,0,69},{135,0,
+1242},{7,10,1577},{10,10,304},{10,10,549},{12,10,365},{13,10,220},{13,10,240},{
+142,10,33},{4,0,111},{7,0,865},{134,11,219},{5,11,582},{6,11,1646},{7,11,99},{7,
+11,1962},{7,11,1986},{8,11,515},{8,11,773},{9,11,23},{9,11,491},{12,11,620},{14,
+11,52},{145,11,50},{132,0,767},{7,11,568},{148,11,21},{6,0,42},{7,0,1416},{7,0,
+2005},{8,0,131},{8,0,466},{9,0,672},{13,0,252},{20,0,103},{133,11,851},{135,0,
+1050},{6,10,175},{137,10,289},{5,10,432},{133,10,913},{6,0,44},{136,0,368},{135,
+11,784},{132,0,570},{133,0,120},{139,10,595},{140,0,29},{6,0,227},{135,0,1589},{
+4,11,98},{7,11,1365},{9,11,422},{9,11,670},{10,11,775},{11,11,210},{13,11,26},{
+13,11,457},{141,11,476},{140,10,80},{5,10,931},{134,10,1698},{133,0,522},{134,0,
+1120},{135,0,1529},{12,0,739},{14,0,448},{142,0,467},{11,10,526},{11,10,939},{
+141,10,290},{5,10,774},{6,10,1637},{6,10,1686},{134,10,1751},{6,0,1667},{135,0,
+2036},{7,10,1167},{11,10,934},{13,10,391},{145,10,76},{137,11,147},{6,10,260},{7
+,10,1484},{11,11,821},{12,11,110},{12,11,153},{18,11,41},{150,11,19},{6,0,511},{
+12,0,132},{134,10,573},{5,0,568},{6,0,138},{135,0,1293},{132,0,1020},{8,0,258},{
+9,0,208},{137,0,359},{4,0,565},{8,0,23},{136,0,827},{134,0,344},{4,0,922},{5,0,
+1023},{13,11,477},{14,11,120},{148,11,61},{134,0,240},{5,11,209},{6,11,30},{11,
+11,56},{139,11,305},{6,0,171},{7,0,1002},{7,0,1324},{9,0,415},{14,0,230},{18,0,
+68},{4,10,292},{4,10,736},{5,10,871},{6,10,1689},{7,10,1944},{137,10,580},{9,11,
+635},{139,11,559},{4,11,150},{5,11,303},{134,11,327},{6,10,63},{135,10,920},{133
+,10,793},{8,11,192},{10,11,78},{10,11,555},{11,11,308},{13,11,359},{147,11,95},{
+135,11,786},{135,11,1712},{136,0,402},{6,0,754},{6,11,1638},{7,11,79},{7,11,496}
+,{9,11,138},{10,11,336},{11,11,12},{12,11,412},{12,11,440},{142,11,305},{4,0,716
+},{141,0,31},{133,0,982},{8,0,691},{8,0,731},{5,10,67},{6,10,62},{6,10,374},{135
+,10,1391},{9,10,790},{140,10,47},{139,11,556},{151,11,1},{7,11,204},{7,11,415},{
+8,11,42},{10,11,85},{11,11,33},{11,11,564},{12,11,571},{149,11,1},{8,0,888},{7,
+11,610},{135,11,1501},{4,10,391},{135,10,1169},{5,0,847},{9,0,840},{138,0,803},{
+137,0,823},{134,0,785},{8,0,152},{9,0,53},{9,0,268},{9,0,901},{10,0,518},{10,0,
+829},{11,0,188},{13,0,74},{14,0,46},{15,0,17},{15,0,33},{17,0,40},{18,0,36},{19,
+0,20},{22,0,1},{152,0,2},{4,11,3},{5,11,247},{5,11,644},{7,11,744},{7,11,1207},{
+7,11,1225},{7,11,1909},{146,11,147},{136,0,532},{135,0,681},{132,10,271},{140,0,
+314},{140,0,677},{4,0,684},{136,0,384},{5,11,285},{9,11,67},{13,11,473},{143,11,
+82},{4,10,253},{5,10,544},{7,10,300},{137,10,340},{7,0,110},{7,0,447},{8,0,290},
+{8,0,591},{9,0,382},{9,0,649},{11,0,71},{11,0,155},{11,0,313},{12,0,5},{13,0,325
+},{142,0,287},{134,0,1818},{136,0,1007},{138,0,321},{7,0,360},{7,0,425},{9,0,66}
+,{9,0,278},{138,0,644},{133,10,818},{5,0,385},{5,10,541},{6,10,94},{6,10,499},{7
+,10,230},{139,10,321},{4,10,920},{5,10,25},{5,10,790},{6,10,457},{7,10,853},{136
+,10,788},{4,0,900},{133,0,861},{5,0,254},{7,0,985},{136,0,73},{7,0,1959},{136,0,
+683},{134,10,1765},{133,10,822},{132,10,634},{4,11,29},{6,11,532},{7,11,1628},{7
+,11,1648},{9,11,303},{9,11,350},{10,11,433},{11,11,97},{11,11,557},{11,11,745},{
+12,11,289},{12,11,335},{12,11,348},{12,11,606},{13,11,116},{13,11,233},{13,11,
+466},{14,11,181},{14,11,209},{14,11,232},{14,11,236},{14,11,300},{16,11,41},{148
+,11,97},{19,0,86},{6,10,36},{7,10,658},{136,10,454},{135,11,1692},{132,0,725},{5
+,11,501},{7,11,1704},{9,11,553},{11,11,520},{12,11,557},{141,11,249},{134,0,196}
+,{133,0,831},{136,0,723},{7,0,1897},{13,0,80},{13,0,437},{145,0,74},{4,0,992},{6
+,0,627},{136,0,994},{135,11,1294},{132,10,104},{5,0,848},{6,0,66},{136,0,764},{4
+,0,36},{7,0,1387},{10,0,205},{139,0,755},{6,0,1046},{134,0,1485},{134,0,950},{
+132,0,887},{14,0,450},{148,0,111},{7,0,620},{7,0,831},{9,10,542},{9,10,566},{138
+,10,728},{6,0,165},{138,0,388},{139,10,263},{4,0,719},{135,0,155},{138,10,468},{
+6,11,453},{144,11,36},{134,11,129},{5,0,533},{7,0,755},{138,0,780},{134,0,1465},
+{4,0,353},{6,0,146},{6,0,1789},{7,0,427},{7,0,990},{7,0,1348},{9,0,665},{9,0,898
+},{11,0,893},{142,0,212},{7,10,87},{142,10,288},{4,0,45},{135,0,1257},{12,0,7},{
+7,10,988},{7,10,1939},{9,10,64},{9,10,502},{12,10,34},{13,10,12},{13,10,234},{
+147,10,77},{4,0,607},{5,11,60},{6,11,504},{7,11,614},{7,11,1155},{140,11,0},{135
+,10,141},{8,11,198},{11,11,29},{140,11,534},{140,0,65},{136,0,816},{132,10,619},
+{139,0,88},{5,10,246},{8,10,189},{9,10,355},{9,10,512},{10,10,124},{10,10,453},{
+11,10,143},{11,10,416},{11,10,859},{141,10,341},{4,11,379},{135,11,1397},{4,0,
+600},{137,0,621},{133,0,367},{134,0,561},{6,0,559},{134,0,1691},{6,0,585},{134,
+11,585},{135,11,1228},{4,11,118},{5,10,678},{6,11,274},{6,11,361},{7,11,75},{141
+,11,441},{135,11,1818},{137,11,841},{5,0,573},{6,0,287},{7,10,862},{7,10,1886},{
+138,10,179},{132,10,517},{140,11,693},{5,11,314},{6,11,221},{7,11,419},{10,11,
+650},{11,11,396},{12,11,156},{13,11,369},{14,11,333},{145,11,47},{140,10,540},{
+136,10,667},{11,10,403},{146,10,83},{6,0,672},{133,10,761},{9,0,157},{10,10,131}
+,{140,10,72},{7,0,714},{134,11,460},{134,0,456},{133,0,925},{5,11,682},{135,11,
+1887},{136,11,510},{136,11,475},{133,11,1016},{9,0,19},{7,11,602},{8,11,179},{10
+,11,781},{140,11,126},{6,11,329},{138,11,111},{6,0,822},{134,0,1473},{144,11,86}
+,{11,0,113},{139,11,113},{5,11,821},{134,11,1687},{133,10,449},{7,0,463},{17,0,
+69},{136,10,103},{7,10,2028},{138,10,641},{6,0,193},{7,0,240},{7,0,1682},{10,0,
+51},{10,0,640},{11,0,410},{13,0,82},{14,0,247},{14,0,331},{142,0,377},{6,0,471},
+{11,0,411},{142,0,2},{5,11,71},{7,11,1407},{9,11,388},{9,11,704},{10,11,261},{10
+,11,619},{11,11,547},{11,11,619},{143,11,157},{136,0,633},{135,0,1148},{6,0,554}
+,{7,0,1392},{12,0,129},{7,10,1274},{7,10,1386},{7,11,2008},{9,11,337},{10,11,517
+},{146,10,87},{7,0,803},{8,0,542},{6,10,187},{7,10,1203},{8,10,380},{14,10,117},
+{149,10,28},{6,10,297},{7,10,793},{139,10,938},{8,0,438},{11,0,363},{7,10,464},{
+11,10,105},{12,10,231},{14,10,386},{15,10,102},{148,10,75},{5,11,16},{6,11,86},{
+6,11,603},{7,11,292},{7,11,561},{8,11,257},{8,11,382},{9,11,721},{9,11,778},{11,
+11,581},{140,11,466},{6,0,717},{4,11,486},{133,11,491},{132,0,875},{132,11,72},{
+6,11,265},{135,11,847},{4,0,237},{135,0,514},{6,0,392},{7,0,65},{135,0,2019},{
+140,11,261},{135,11,922},{137,11,404},{12,0,563},{14,0,101},{18,0,129},{7,10,
+1010},{11,10,733},{11,10,759},{13,10,34},{146,10,45},{7,10,1656},{9,10,369},{10,
+10,338},{10,10,490},{11,10,154},{11,10,545},{11,10,775},{13,10,77},{141,10,274},
+{4,0,444},{10,0,146},{140,0,9},{139,11,163},{7,0,1260},{135,0,1790},{9,0,222},{
+10,0,43},{139,0,900},{137,11,234},{138,0,971},{137,0,761},{134,0,699},{136,11,
+434},{6,0,1116},{7,0,1366},{5,10,20},{6,11,197},{6,10,298},{7,10,659},{8,11,205}
+,{137,10,219},{132,11,490},{11,11,820},{150,11,51},{7,10,1440},{11,10,854},{11,
+10,872},{11,10,921},{12,10,551},{13,10,472},{142,10,367},{140,11,13},{132,0,829}
+,{12,0,242},{132,10,439},{136,10,669},{6,0,593},{6,11,452},{7,11,312},{138,11,
+219},{4,11,333},{9,11,176},{12,11,353},{141,11,187},{7,0,36},{8,0,201},{136,0,
+605},{140,0,224},{132,10,233},{134,0,1430},{134,0,1806},{4,0,523},{133,0,638},{6
+,0,1889},{9,0,958},{9,0,971},{9,0,976},{12,0,796},{12,0,799},{12,0,808},{12,0,
+835},{12,0,836},{12,0,914},{12,0,946},{15,0,216},{15,0,232},{18,0,183},{18,0,187
+},{18,0,194},{18,0,212},{18,0,232},{149,0,49},{132,10,482},{6,0,827},{134,0,1434
+},{135,10,346},{134,0,2043},{6,0,242},{7,0,227},{7,0,1581},{8,0,104},{9,0,113},{
+9,0,220},{9,0,427},{10,0,136},{10,0,239},{11,0,579},{11,0,1023},{13,0,4},{13,0,
+204},{13,0,316},{148,0,86},{134,11,1685},{7,0,148},{8,0,284},{141,0,63},{142,0,
+10},{135,11,584},{134,0,1249},{7,0,861},{135,10,334},{5,10,795},{6,10,1741},{137
+,11,70},{132,0,807},{7,11,135},{8,11,7},{8,11,62},{9,11,243},{10,11,658},{10,11,
+697},{11,11,456},{139,11,756},{9,11,395},{138,11,79},{137,11,108},{147,0,94},{
+136,0,494},{135,11,631},{135,10,622},{7,0,1510},{135,10,1750},{4,10,203},{135,10
+,1936},{7,11,406},{7,11,459},{8,11,606},{139,11,726},{7,0,1306},{8,0,505},{9,0,
+482},{10,0,126},{11,0,225},{12,0,347},{12,0,449},{13,0,19},{14,0,218},{142,0,435
+},{5,0,268},{10,0,764},{12,0,120},{13,0,39},{145,0,127},{142,11,68},{11,10,678},
+{140,10,307},{12,11,268},{12,11,640},{142,11,119},{135,10,2044},{133,11,612},{4,
+11,372},{7,11,482},{8,11,158},{9,11,602},{9,11,615},{10,11,245},{10,11,678},{10,
+11,744},{11,11,248},{139,11,806},{7,10,311},{9,10,308},{140,10,255},{4,0,384},{
+135,0,1022},{5,11,854},{135,11,1991},{135,10,1266},{4,10,400},{5,10,267},{135,10
+,232},{135,0,1703},{9,0,159},{11,0,661},{140,0,603},{4,0,964},{14,0,438},{14,0,
+444},{14,0,456},{22,0,60},{22,0,63},{9,11,106},{9,11,163},{9,11,296},{10,11,167}
+,{10,11,172},{10,11,777},{139,11,16},{136,0,583},{132,0,515},{8,0,632},{8,0,697}
+,{137,0,854},{5,11,195},{135,11,1685},{6,0,1123},{134,0,1365},{134,11,328},{7,11
+,1997},{8,11,730},{139,11,1006},{4,0,136},{133,0,551},{134,0,1782},{7,0,1287},{9
+,0,44},{10,0,552},{10,0,642},{11,0,839},{12,0,274},{12,0,275},{12,0,372},{13,0,
+91},{142,0,125},{5,11,751},{11,11,797},{140,11,203},{133,0,732},{7,0,679},{8,0,
+313},{4,10,100},{135,11,821},{10,0,361},{142,0,316},{134,0,595},{6,0,147},{7,0,
+886},{9,0,753},{138,0,268},{5,10,362},{5,10,443},{6,10,318},{7,10,1019},{139,10,
+623},{5,10,463},{136,10,296},{4,10,454},{5,11,950},{5,11,994},{134,11,351},{138,
+0,137},{5,10,48},{5,10,404},{6,10,557},{7,10,458},{8,10,597},{10,10,455},{10,10,
+606},{11,10,49},{11,10,548},{12,10,476},{13,10,18},{141,10,450},{133,0,414},{135
+,0,1762},{5,11,421},{135,11,47},{5,10,442},{135,10,1984},{134,0,599},{134,0,1749
+},{134,0,1627},{4,0,488},{132,11,350},{137,11,751},{132,0,83},{140,0,676},{133,
+11,967},{7,0,1639},{5,10,55},{140,10,161},{4,11,473},{7,11,623},{8,11,808},{9,11
+,871},{9,11,893},{11,11,38},{11,11,431},{12,11,112},{12,11,217},{12,11,243},{12,
+11,562},{12,11,683},{13,11,141},{13,11,197},{13,11,227},{13,11,406},{13,11,487},
+{14,11,156},{14,11,203},{14,11,224},{14,11,256},{18,11,58},{150,11,0},{133,10,
+450},{7,11,736},{139,11,264},{134,0,278},{4,11,222},{7,11,286},{136,11,629},{135
+,10,869},{140,0,97},{144,0,14},{134,0,1085},{4,10,213},{7,10,223},{136,10,80},{7
+,0,388},{7,0,644},{139,0,781},{132,0,849},{7,0,229},{8,0,59},{9,0,190},{10,0,378
+},{140,0,191},{7,10,381},{7,10,806},{7,10,820},{8,10,354},{8,10,437},{8,10,787},
+{9,10,657},{10,10,58},{10,10,339},{10,10,749},{11,10,914},{12,10,162},{13,10,75}
+,{14,10,106},{14,10,198},{14,10,320},{14,10,413},{146,10,43},{141,11,306},{136,
+10,747},{134,0,1115},{16,0,94},{16,0,108},{136,11,146},{6,0,700},{6,0,817},{134,
+0,1002},{133,10,692},{4,11,465},{135,11,1663},{134,10,191},{6,0,1414},{135,11,
+913},{132,0,660},{7,0,1035},{138,0,737},{6,10,162},{7,10,1960},{136,10,831},{132
+,10,706},{7,0,690},{9,0,217},{9,0,587},{140,0,521},{138,10,426},{135,10,1235},{6
+,11,82},{7,11,138},{7,11,517},{9,11,673},{139,11,238},{138,0,272},{5,11,495},{7,
+11,834},{9,11,733},{139,11,378},{134,0,1744},{132,0,1011},{7,11,828},{142,11,116
+},{4,0,733},{9,0,194},{10,0,92},{11,0,198},{12,0,84},{13,0,128},{133,11,559},{10
+,0,57},{10,0,277},{6,11,21},{6,11,1737},{7,11,1444},{136,11,224},{4,10,204},{137
+,10,902},{136,10,833},{11,0,348},{12,0,99},{18,0,1},{18,0,11},{19,0,4},{7,10,366
+},{9,10,287},{12,10,199},{12,10,556},{140,10,577},{6,0,1981},{136,0,936},{21,0,
+33},{150,0,40},{5,11,519},{138,11,204},{5,10,356},{135,10,224},{134,0,775},{135,
+0,306},{7,10,630},{9,10,567},{11,10,150},{11,10,444},{141,10,119},{5,0,979},{134
+,10,539},{133,0,611},{4,11,402},{135,11,1679},{5,0,178},{7,11,2},{8,11,323},{136
+,11,479},{5,11,59},{135,11,672},{4,0,1010},{6,0,1969},{138,11,237},{133,11,412},
+{146,11,34},{7,11,1740},{146,11,48},{134,0,664},{139,10,814},{4,11,85},{135,11,
+549},{133,11,94},{133,11,457},{132,0,390},{134,0,1510},{4,10,235},{135,10,255},{
+4,10,194},{5,10,584},{6,11,11},{6,10,384},{7,11,187},{7,10,583},{10,10,761},{11,
+10,760},{139,10,851},{4,11,522},{139,11,802},{135,0,493},{10,11,776},{13,11,345}
+,{142,11,425},{146,0,37},{4,11,52},{135,11,661},{134,0,724},{134,0,829},{133,11,
+520},{133,10,562},{4,11,281},{5,11,38},{7,11,194},{7,11,668},{7,11,1893},{137,11
+,397},{5,10,191},{137,10,271},{7,0,1537},{14,0,96},{143,0,73},{5,0,473},{11,0,
+168},{4,10,470},{6,10,153},{7,10,1503},{7,10,1923},{10,10,701},{11,10,132},{11,
+10,227},{11,10,320},{11,10,436},{11,10,525},{11,10,855},{12,10,41},{12,10,286},{
+13,10,103},{13,10,284},{14,10,255},{14,10,262},{15,10,117},{143,10,127},{133,0,
+105},{5,0,438},{9,0,694},{12,0,627},{141,0,210},{133,10,327},{6,10,552},{7,10,
+1754},{137,10,604},{134,0,1256},{152,0,11},{5,11,448},{11,11,98},{139,11,524},{7
+,0,1626},{5,10,80},{6,10,405},{7,10,403},{7,10,1502},{8,10,456},{9,10,487},{9,10
+,853},{9,10,889},{10,10,309},{11,10,721},{11,10,994},{12,10,430},{13,10,165},{14
+,11,16},{146,11,44},{132,0,779},{8,0,25},{138,0,826},{4,10,453},{5,10,887},{6,10
+,535},{8,10,6},{8,10,543},{136,10,826},{137,11,461},{140,11,632},{132,0,308},{
+135,0,741},{132,0,671},{7,0,150},{8,0,649},{136,0,1020},{9,0,99},{6,11,336},{8,
+11,552},{9,11,285},{10,11,99},{139,11,568},{134,0,521},{5,0,339},{14,0,3},{15,0,
+41},{15,0,166},{147,0,66},{6,11,423},{7,11,665},{7,11,1210},{9,11,218},{141,11,
+222},{6,0,543},{5,10,101},{5,11,256},{6,10,88},{7,10,1677},{9,10,100},{10,10,677
+},{14,10,169},{14,10,302},{14,10,313},{15,10,48},{143,10,84},{4,10,310},{7,10,
+708},{7,10,996},{9,10,795},{10,10,390},{10,10,733},{11,10,451},{12,10,249},{14,
+10,115},{14,10,286},{143,10,100},{133,10,587},{13,11,417},{14,11,129},{143,11,15
+},{134,0,1358},{136,11,554},{132,10,498},{7,10,217},{8,10,140},{138,10,610},{135
+,11,989},{135,11,634},{6,0,155},{140,0,234},{135,11,462},{132,11,618},{134,0,
+1628},{132,0,766},{4,11,339},{5,10,905},{135,11,259},{135,0,829},{4,11,759},{141
+,11,169},{7,0,1445},{4,10,456},{7,10,358},{7,10,1637},{8,10,643},{139,10,483},{5
+,0,486},{135,0,1349},{5,11,688},{135,11,712},{7,0,1635},{8,0,17},{10,0,217},{10,
+0,295},{12,0,2},{140,11,2},{138,0,558},{150,10,56},{4,11,278},{5,11,465},{135,11
+,1367},{136,11,482},{133,10,535},{6,0,1362},{6,0,1461},{10,11,274},{10,11,625},{
+139,11,530},{5,0,599},{5,11,336},{6,11,341},{6,11,478},{6,11,1763},{136,11,386},
+{7,10,1748},{137,11,151},{134,0,1376},{133,10,539},{135,11,73},{135,11,1971},{
+139,11,283},{9,0,93},{139,0,474},{6,10,91},{135,10,435},{6,0,447},{5,11,396},{
+134,11,501},{4,10,16},{5,10,316},{5,10,842},{6,10,370},{6,10,1778},{8,10,166},{
+11,10,812},{12,10,206},{12,10,351},{14,10,418},{16,10,15},{16,10,34},{18,10,3},{
+19,10,3},{19,10,7},{20,10,4},{149,10,21},{7,0,577},{7,0,1432},{9,0,475},{9,0,505
+},{9,0,526},{9,0,609},{9,0,689},{9,0,726},{9,0,735},{9,0,738},{10,0,556},{10,0,
+674},{10,0,684},{11,0,89},{11,0,202},{11,0,272},{11,0,380},{11,0,415},{11,0,505}
+,{11,0,537},{11,0,550},{11,0,562},{11,0,640},{11,0,667},{11,0,688},{11,0,847},{
+11,0,927},{11,0,930},{11,0,940},{12,0,144},{12,0,325},{12,0,329},{12,0,389},{12,
+0,403},{12,0,451},{12,0,515},{12,0,604},{12,0,616},{12,0,626},{13,0,66},{13,0,
+131},{13,0,167},{13,0,236},{13,0,368},{13,0,411},{13,0,434},{13,0,453},{13,0,461
+},{13,0,474},{14,0,59},{14,0,60},{14,0,139},{14,0,152},{14,0,276},{14,0,353},{14
+,0,402},{15,0,28},{15,0,81},{15,0,123},{15,0,152},{18,0,136},{148,0,88},{4,11,
+929},{133,11,799},{136,11,46},{142,0,307},{4,0,609},{7,0,756},{9,0,544},{11,0,
+413},{144,0,25},{10,0,687},{7,10,619},{10,10,547},{11,10,122},{140,10,601},{4,0,
+930},{133,0,947},{133,0,939},{142,0,21},{4,11,892},{133,11,770},{133,0,962},{5,0
+,651},{8,0,170},{9,0,61},{9,0,63},{10,0,23},{10,0,37},{10,0,834},{11,0,4},{11,0,
+187},{11,0,281},{11,0,503},{11,0,677},{12,0,96},{12,0,130},{12,0,244},{14,0,5},{
+14,0,40},{14,0,162},{14,0,202},{146,0,133},{4,0,406},{5,0,579},{12,0,492},{150,0
+,15},{135,11,158},{135,0,597},{132,0,981},{132,10,888},{4,10,149},{138,10,368},{
+132,0,545},{4,10,154},{7,10,1134},{136,10,105},{135,11,2001},{134,0,1558},{4,10,
+31},{6,10,429},{7,10,962},{9,10,458},{139,10,691},{132,10,312},{135,10,1642},{6,
+0,17},{6,0,1304},{7,0,16},{7,0,1001},{9,0,886},{10,0,489},{10,0,800},{11,0,782},
+{12,0,320},{13,0,467},{14,0,145},{14,0,387},{143,0,119},{135,0,1982},{17,0,17},{
+7,11,1461},{140,11,91},{4,10,236},{132,11,602},{138,0,907},{136,0,110},{7,0,272}
+,{19,0,53},{5,10,836},{5,10,857},{134,10,1680},{5,0,458},{7,11,1218},{136,11,303
+},{7,0,1983},{8,0,0},{8,0,171},{9,0,120},{9,0,732},{10,0,473},{11,0,656},{11,0,
+998},{18,0,0},{18,0,2},{19,0,21},{10,10,68},{139,10,494},{137,11,662},{4,11,13},
+{5,11,567},{7,11,1498},{9,11,124},{11,11,521},{140,11,405},{4,10,81},{139,10,867
+},{135,11,1006},{7,11,800},{7,11,1783},{138,11,12},{9,0,295},{10,0,443},{5,10,
+282},{8,10,650},{137,10,907},{132,11,735},{4,11,170},{4,10,775},{135,11,323},{6,
+0,1844},{10,0,924},{11,11,844},{12,11,104},{140,11,625},{5,11,304},{7,11,1403},{
+140,11,498},{134,0,1232},{4,0,519},{10,0,70},{12,0,26},{14,0,17},{14,0,178},{15,
+0,34},{149,0,12},{132,0,993},{4,11,148},{133,11,742},{6,0,31},{7,0,491},{7,0,530
+},{8,0,592},{11,0,53},{11,0,779},{12,0,167},{12,0,411},{14,0,14},{14,0,136},{15,
+0,72},{16,0,17},{144,0,72},{133,0,907},{134,0,733},{133,11,111},{4,10,71},{5,10,
+376},{7,10,119},{138,10,665},{136,0,55},{8,0,430},{136,11,430},{4,0,208},{5,0,
+106},{6,0,531},{8,0,408},{9,0,188},{138,0,572},{12,0,56},{11,10,827},{14,10,34},
+{143,10,148},{134,0,1693},{133,11,444},{132,10,479},{140,0,441},{9,0,449},{10,0,
+192},{138,0,740},{134,0,928},{4,0,241},{7,10,607},{136,10,99},{8,11,123},{15,11,
+6},{144,11,7},{6,11,285},{8,11,654},{11,11,749},{12,11,190},{12,11,327},{13,11,
+120},{13,11,121},{13,11,327},{15,11,47},{146,11,40},{4,10,41},{5,10,74},{7,10,
+1627},{11,10,871},{140,10,619},{7,0,1525},{11,10,329},{11,10,965},{12,10,241},{
+14,10,354},{15,10,22},{148,10,63},{132,0,259},{135,11,183},{9,10,209},{137,10,
+300},{5,11,937},{135,11,100},{133,10,98},{4,0,173},{5,0,312},{5,0,512},{135,0,
+1285},{141,0,185},{7,0,1603},{7,0,1691},{9,0,464},{11,0,195},{12,0,279},{12,0,
+448},{14,0,11},{147,0,102},{135,0,1113},{133,10,984},{4,0,452},{5,0,583},{135,0,
+720},{4,0,547},{5,0,817},{6,0,433},{7,0,593},{7,0,1378},{8,0,161},{9,0,284},{10,
+0,313},{139,0,886},{8,0,722},{4,10,182},{6,10,205},{135,10,220},{150,0,13},{4,10
+,42},{9,10,205},{9,10,786},{138,10,659},{6,0,289},{7,0,1670},{12,0,57},{151,0,4}
+,{132,10,635},{14,0,43},{146,0,21},{139,10,533},{135,0,1694},{8,0,420},{139,0,
+193},{135,0,409},{132,10,371},{4,10,272},{135,10,836},{5,10,825},{134,10,1640},{
+5,11,251},{5,11,956},{8,11,268},{9,11,214},{146,11,142},{138,0,308},{6,0,1863},{
+141,11,37},{137,10,879},{7,10,317},{135,10,569},{132,11,294},{134,0,790},{5,0,
+1002},{136,0,745},{5,11,346},{5,11,711},{136,11,390},{135,0,289},{5,0,504},{11,0
+,68},{137,10,307},{4,0,239},{6,0,477},{7,0,1607},{139,0,617},{149,0,13},{133,0,
+609},{133,11,624},{5,11,783},{7,11,1998},{135,11,2047},{133,10,525},{132,0,367},
+{132,11,594},{6,0,528},{133,10,493},{4,10,174},{135,10,911},{8,10,417},{137,10,
+782},{132,0,694},{7,0,548},{137,0,58},{4,10,32},{5,10,215},{6,10,269},{7,10,1782
+},{7,10,1892},{10,10,16},{11,10,822},{11,10,954},{141,10,481},{140,0,687},{7,0,
+1749},{136,10,477},{132,11,569},{133,10,308},{135,10,1088},{4,0,661},{138,0,1004
+},{5,11,37},{6,11,39},{6,11,451},{7,11,218},{7,11,667},{7,11,1166},{7,11,1687},{
+8,11,662},{144,11,2},{9,0,445},{12,0,53},{13,0,492},{5,10,126},{8,10,297},{9,10,
+366},{140,10,374},{7,10,1551},{139,10,361},{148,0,74},{134,11,508},{135,0,213},{
+132,10,175},{132,10,685},{6,0,760},{6,0,834},{134,0,1248},{7,11,453},{7,11,635},
+{7,11,796},{8,11,331},{9,11,328},{9,11,330},{9,11,865},{10,11,119},{10,11,235},{
+11,11,111},{11,11,129},{11,11,240},{12,11,31},{12,11,66},{12,11,222},{12,11,269}
+,{12,11,599},{12,11,689},{13,11,186},{13,11,364},{142,11,345},{7,0,1672},{139,0,
+189},{133,10,797},{133,10,565},{6,0,1548},{6,11,98},{7,11,585},{135,11,702},{9,0
+,968},{15,0,192},{149,0,56},{4,10,252},{6,11,37},{7,11,299},{7,10,1068},{7,11,
+1666},{8,11,195},{8,11,316},{9,11,178},{9,11,276},{9,11,339},{9,11,536},{10,11,
+102},{10,11,362},{10,10,434},{10,11,785},{11,11,55},{11,11,149},{11,10,228},{11,
+10,426},{11,11,773},{13,10,231},{13,11,416},{13,11,419},{14,11,38},{14,11,41},{
+14,11,210},{18,10,106},{148,10,87},{4,0,751},{11,0,390},{140,0,32},{4,0,409},{
+133,0,78},{11,11,458},{12,11,15},{140,11,432},{7,0,1602},{10,0,257},{10,0,698},{
+11,0,544},{11,0,585},{12,0,212},{13,0,307},{5,10,231},{7,10,601},{9,10,277},{9,
+10,674},{10,10,178},{10,10,418},{10,10,509},{11,10,531},{12,10,113},{12,10,475},
+{13,10,99},{142,10,428},{6,0,473},{145,0,105},{6,0,1949},{15,0,156},{133,11,645}
+,{7,10,1591},{144,10,43},{135,0,1779},{135,10,1683},{4,11,290},{135,11,1356},{
+134,0,763},{6,11,70},{7,11,1292},{10,11,762},{139,11,288},{142,0,29},{140,11,428
+},{7,0,883},{7,11,131},{7,11,422},{8,11,210},{140,11,573},{134,0,488},{4,10,399}
+,{5,10,119},{5,10,494},{7,10,751},{137,10,556},{133,0,617},{132,11,936},{139,0,
+50},{7,0,1518},{139,0,694},{137,0,785},{4,0,546},{135,0,2042},{7,11,716},{13,11,
+97},{141,11,251},{132,11,653},{145,0,22},{134,0,1016},{4,0,313},{133,0,577},{136
+,11,657},{8,0,184},{141,0,433},{135,0,935},{6,0,720},{9,0,114},{146,11,80},{12,0
+,186},{12,0,292},{14,0,100},{18,0,70},{7,10,594},{7,10,851},{7,10,1858},{9,10,
+411},{9,10,574},{9,10,666},{9,10,737},{10,10,346},{10,10,712},{11,10,246},{11,10
+,432},{11,10,517},{11,10,647},{11,10,679},{11,10,727},{12,10,304},{12,10,305},{
+12,10,323},{12,10,483},{12,10,572},{12,10,593},{12,10,602},{13,10,95},{13,10,101
+},{13,10,171},{13,10,315},{13,10,378},{13,10,425},{13,10,475},{14,10,63},{14,10,
+380},{14,10,384},{15,10,133},{18,10,112},{148,10,72},{135,10,1093},{135,11,1836}
+,{132,10,679},{137,10,203},{11,0,402},{12,0,109},{12,0,431},{13,0,179},{13,0,206
+},{14,0,217},{16,0,3},{148,0,53},{7,11,1368},{8,11,232},{8,11,361},{10,11,682},{
+138,11,742},{137,10,714},{5,0,886},{6,0,46},{6,0,1790},{7,0,14},{7,0,732},{7,0,
+1654},{8,0,95},{8,0,327},{8,0,616},{9,0,892},{10,0,598},{10,0,769},{11,0,134},{
+11,0,747},{12,0,378},{14,0,97},{137,11,534},{4,0,969},{136,10,825},{137,11,27},{
+6,0,727},{142,11,12},{133,0,1021},{134,0,1190},{134,11,1657},{5,10,143},{5,10,
+769},{6,10,1760},{7,10,682},{7,10,1992},{136,10,736},{132,0,153},{135,11,127},{
+133,0,798},{132,0,587},{6,0,598},{7,0,42},{8,0,695},{10,0,212},{11,0,158},{14,0,
+196},{145,0,85},{133,10,860},{6,0,1929},{134,0,1933},{5,0,957},{5,0,1008},{9,0,
+577},{12,0,141},{6,10,422},{7,10,0},{7,10,1544},{8,11,364},{11,10,990},{12,10,
+453},{13,10,47},{141,10,266},{134,0,1319},{4,0,129},{135,0,465},{7,0,470},{7,0,
+1057},{7,0,1201},{9,0,755},{11,0,906},{140,0,527},{7,0,908},{146,0,7},{5,0,148},
+{136,0,450},{5,10,515},{137,10,131},{7,10,1605},{11,10,962},{146,10,139},{132,10
+,646},{134,0,1166},{4,10,396},{7,10,728},{9,10,117},{13,10,202},{148,10,51},{6,
+10,121},{6,10,124},{6,10,357},{7,10,1138},{7,10,1295},{8,10,162},{139,10,655},{
+14,0,374},{142,11,374},{138,0,253},{139,0,1003},{5,11,909},{9,11,849},{138,11,
+805},{133,10,237},{7,11,525},{7,11,1579},{8,11,497},{136,11,573},{137,0,46},{132
+,0,879},{134,0,806},{135,0,1868},{6,0,1837},{134,0,1846},{6,0,730},{134,0,881},{
+7,0,965},{7,0,1460},{7,0,1604},{7,11,193},{7,11,397},{7,11,1105},{8,11,124},{8,
+11,619},{9,11,305},{10,11,264},{11,11,40},{12,11,349},{13,11,134},{13,11,295},{
+14,11,155},{15,11,120},{146,11,105},{136,0,506},{143,0,10},{4,11,262},{7,11,342}
+,{7,10,571},{7,10,1877},{10,10,366},{141,11,23},{133,11,641},{10,0,22},{9,10,513
+},{10,10,39},{12,10,122},{140,10,187},{135,11,1431},{150,11,49},{4,11,99},{6,11,
+250},{6,11,346},{8,11,127},{138,11,81},{6,0,2014},{8,0,928},{10,0,960},{10,0,979
+},{140,0,996},{134,0,296},{132,11,915},{5,11,75},{9,11,517},{10,11,470},{12,11,
+155},{141,11,224},{137,10,873},{4,0,854},{140,11,18},{134,0,587},{7,10,107},{7,
+10,838},{8,10,550},{138,10,401},{11,0,636},{15,0,145},{17,0,34},{19,0,50},{23,0,
+20},{11,10,588},{11,10,864},{11,10,968},{143,10,160},{135,11,216},{7,0,982},{10,
+0,32},{143,0,56},{133,10,768},{133,11,954},{6,11,304},{7,11,1114},{8,11,418},{10
+,11,345},{11,11,341},{11,11,675},{141,11,40},{9,11,410},{139,11,425},{136,0,941}
+,{5,0,435},{132,10,894},{5,0,85},{6,0,419},{7,0,134},{7,0,305},{7,0,361},{7,0,
+1337},{8,0,71},{140,0,519},{140,0,688},{135,0,740},{5,0,691},{7,0,345},{9,0,94},
+{140,0,169},{5,0,183},{6,0,582},{10,0,679},{140,0,435},{134,11,14},{6,0,945},{
+135,0,511},{134,11,1708},{5,11,113},{6,11,243},{7,11,1865},{11,11,161},{16,11,37
+},{145,11,99},{132,11,274},{137,0,539},{7,0,1993},{8,0,684},{134,10,272},{6,0,
+659},{134,0,982},{4,10,9},{5,10,128},{7,10,368},{11,10,480},{148,10,3},{134,0,
+583},{132,0,803},{133,0,704},{4,0,179},{5,0,198},{133,0,697},{7,0,347},{7,0,971}
+,{8,0,181},{10,0,711},{135,11,166},{136,10,682},{4,10,2},{7,10,545},{7,10,894},{
+136,11,521},{135,0,481},{132,0,243},{5,0,203},{7,0,19},{7,0,71},{7,0,113},{10,0,
+405},{11,0,357},{142,0,240},{5,11,725},{5,11,727},{135,11,1811},{6,0,826},{137,
+11,304},{7,0,1450},{139,0,99},{133,11,654},{134,0,492},{5,0,134},{6,0,408},{6,0,
+495},{7,0,1593},{6,11,273},{10,11,188},{13,11,377},{146,11,77},{9,10,769},{140,
+10,185},{135,11,410},{142,0,4},{4,0,665},{134,11,1785},{4,0,248},{7,0,137},{137,
+0,349},{5,10,530},{142,10,113},{7,0,1270},{139,0,612},{132,11,780},{5,0,371},{
+135,0,563},{135,0,826},{6,0,1535},{23,0,21},{151,0,23},{4,0,374},{7,0,547},{7,0,
+1700},{7,0,1833},{139,0,858},{133,10,556},{7,11,612},{8,11,545},{8,11,568},{8,11
+,642},{9,11,717},{10,11,541},{10,11,763},{11,11,449},{12,11,489},{13,11,153},{13
+,11,296},{14,11,138},{14,11,392},{15,11,50},{16,11,6},{16,11,12},{148,11,9},{9,0
+,311},{141,0,42},{8,10,16},{140,10,568},{6,0,1968},{6,0,2027},{138,0,991},{6,0,
+1647},{7,0,1552},{7,0,2010},{9,0,494},{137,0,509},{133,11,948},{6,10,186},{137,
+10,426},{134,0,769},{134,0,642},{132,10,585},{6,0,123},{7,0,214},{9,0,728},{10,0
+,157},{11,0,346},{11,0,662},{143,0,106},{142,11,381},{135,0,1435},{4,11,532},{5,
+11,706},{135,11,662},{5,11,837},{134,11,1651},{4,10,93},{5,10,252},{6,10,229},{7
+,10,291},{9,10,550},{139,10,644},{148,0,79},{137,10,749},{134,0,1425},{137,10,
+162},{4,11,362},{7,11,52},{7,11,303},{140,11,166},{132,10,381},{4,11,330},{7,11,
+933},{7,11,2012},{136,11,292},{135,11,767},{4,0,707},{5,0,588},{6,0,393},{13,0,
+106},{18,0,49},{147,0,41},{6,0,211},{7,0,1690},{11,0,486},{140,0,369},{137,11,
+883},{4,11,703},{135,11,207},{4,0,187},{5,0,184},{5,0,690},{7,0,1869},{10,0,756}
+,{139,0,783},{132,11,571},{134,0,1382},{5,0,175},{6,10,77},{6,10,157},{7,10,974}
+,{7,10,1301},{7,10,1339},{7,10,1490},{7,10,1873},{137,10,628},{134,0,1493},{5,11
+,873},{133,11,960},{134,0,1007},{12,11,93},{12,11,501},{13,11,362},{14,11,151},{
+15,11,40},{15,11,59},{16,11,46},{17,11,25},{18,11,14},{18,11,134},{19,11,25},{19
+,11,69},{20,11,16},{20,11,19},{20,11,66},{21,11,23},{21,11,25},{150,11,42},{11,
+10,919},{141,10,409},{134,0,219},{5,0,582},{6,0,1646},{7,0,99},{7,0,1962},{7,0,
+1986},{8,0,515},{8,0,773},{9,0,23},{9,0,491},{12,0,620},{142,0,93},{133,0,851},{
+5,11,33},{134,11,470},{135,11,1291},{134,0,1278},{135,11,1882},{135,10,1489},{
+132,0,1000},{138,0,982},{8,0,762},{8,0,812},{137,0,910},{6,11,47},{7,11,90},{7,
+11,664},{7,11,830},{7,11,1380},{7,11,2025},{8,11,448},{136,11,828},{4,0,98},{4,0
+,940},{6,0,1819},{6,0,1834},{6,0,1841},{7,0,1365},{8,0,859},{8,0,897},{8,0,918},
+{9,0,422},{9,0,670},{10,0,775},{10,0,894},{10,0,909},{10,0,910},{10,0,935},{11,0
+,210},{12,0,750},{12,0,755},{13,0,26},{13,0,457},{13,0,476},{16,0,100},{16,0,109
+},{18,0,173},{18,0,175},{8,10,398},{9,10,681},{139,10,632},{9,11,417},{137,11,
+493},{136,10,645},{138,0,906},{134,0,1730},{134,10,20},{133,11,1019},{134,0,1185
+},{10,0,40},{136,10,769},{9,0,147},{134,11,208},{140,0,650},{5,0,209},{6,0,30},{
+11,0,56},{139,0,305},{132,0,553},{138,11,344},{6,11,68},{7,11,398},{7,11,448},{7
+,11,1629},{7,11,1813},{8,11,387},{8,11,442},{9,11,710},{10,11,282},{138,11,722},
+{5,0,597},{14,0,20},{142,11,20},{135,0,1614},{135,10,1757},{4,0,150},{5,0,303},{
+6,0,327},{135,10,937},{16,0,49},{7,10,1652},{144,11,49},{8,0,192},{10,0,78},{141
+,0,359},{135,0,786},{143,0,134},{6,0,1638},{7,0,79},{7,0,496},{9,0,138},{10,0,
+336},{11,0,12},{12,0,412},{12,0,440},{142,0,305},{136,11,491},{4,10,579},{5,10,
+226},{5,10,323},{135,10,960},{7,0,204},{7,0,415},{8,0,42},{10,0,85},{139,0,564},
+{132,0,614},{4,11,403},{5,11,441},{7,11,450},{11,11,101},{12,11,193},{141,11,430
+},{135,11,1927},{135,11,1330},{4,0,3},{5,0,247},{5,0,644},{7,0,744},{7,0,1207},{
+7,0,1225},{7,0,1909},{146,0,147},{136,0,942},{4,0,1019},{134,0,2023},{5,11,679},
+{133,10,973},{5,0,285},{9,0,67},{13,0,473},{143,0,82},{7,11,328},{137,11,326},{
+151,0,8},{6,10,135},{135,10,1176},{135,11,1128},{134,0,1309},{135,11,1796},{135,
+10,314},{4,11,574},{7,11,350},{7,11,1024},{8,11,338},{9,11,677},{10,11,808},{139
+,11,508},{7,11,818},{17,11,14},{17,11,45},{18,11,75},{148,11,18},{146,10,4},{135
+,11,1081},{4,0,29},{6,0,532},{7,0,1628},{7,0,1648},{9,0,350},{10,0,433},{11,0,97
+},{11,0,557},{11,0,745},{12,0,289},{12,0,335},{12,0,348},{12,0,606},{13,0,116},{
+13,0,233},{13,0,466},{14,0,181},{14,0,209},{14,0,232},{14,0,236},{14,0,300},{16,
+0,41},{148,0,97},{7,0,318},{6,10,281},{8,10,282},{8,10,480},{8,10,499},{9,10,198
+},{10,10,143},{10,10,169},{10,10,211},{10,10,417},{10,10,574},{11,10,147},{11,10
+,395},{12,10,75},{12,10,407},{12,10,608},{13,10,500},{142,10,251},{135,11,1676},
+{135,11,2037},{135,0,1692},{5,0,501},{7,0,1704},{9,0,553},{11,0,520},{12,0,557},
+{141,0,249},{6,0,1527},{14,0,324},{15,0,55},{15,0,80},{14,11,324},{15,11,55},{
+143,11,80},{135,10,1776},{8,0,988},{137,11,297},{132,10,419},{142,0,223},{139,11
+,234},{7,0,1123},{12,0,508},{14,0,102},{14,0,226},{144,0,57},{4,10,138},{7,10,
+1012},{7,10,1280},{137,10,76},{7,0,1764},{5,10,29},{140,10,638},{134,0,2015},{
+134,0,1599},{138,11,56},{6,11,306},{7,11,1140},{7,11,1340},{8,11,133},{138,11,
+449},{139,11,1011},{6,10,1710},{135,10,2038},{7,11,1763},{140,11,310},{6,0,129},
+{4,10,17},{5,10,23},{7,10,995},{11,10,383},{11,10,437},{12,10,460},{140,10,532},
+{5,11,329},{136,11,260},{133,10,862},{132,0,534},{6,0,811},{135,0,626},{132,11,
+657},{4,0,25},{5,0,60},{6,0,504},{7,0,614},{7,0,1155},{12,0,0},{152,11,7},{7,0,
+1248},{11,0,621},{139,0,702},{137,0,321},{8,10,70},{12,10,171},{141,10,272},{10,
+10,233},{139,10,76},{4,0,379},{7,0,1397},{134,10,442},{5,11,66},{7,11,1896},{136
+,11,288},{134,11,1643},{134,10,1709},{4,11,21},{5,11,91},{5,11,570},{5,11,648},{
+5,11,750},{5,11,781},{6,11,54},{6,11,112},{6,11,402},{6,11,1732},{7,11,315},{7,
+11,749},{7,11,1347},{7,11,1900},{9,11,78},{9,11,508},{10,11,611},{11,11,510},{11
+,11,728},{13,11,36},{14,11,39},{16,11,83},{17,11,124},{148,11,30},{4,0,118},{6,0
+,274},{6,0,361},{7,0,75},{141,0,441},{10,11,322},{10,11,719},{139,11,407},{147,
+10,119},{12,11,549},{14,11,67},{147,11,60},{11,10,69},{12,10,105},{12,10,117},{
+13,10,213},{14,10,13},{14,10,62},{14,10,177},{14,10,421},{15,10,19},{146,10,141}
+,{9,0,841},{137,10,309},{7,10,608},{7,10,976},{8,11,125},{8,11,369},{8,11,524},{
+9,10,146},{10,10,206},{10,11,486},{10,10,596},{11,11,13},{11,11,381},{11,11,736}
+,{11,11,766},{11,11,845},{13,11,114},{13,10,218},{13,11,292},{14,11,47},{142,10,
+153},{12,0,693},{135,11,759},{5,0,314},{6,0,221},{7,0,419},{10,0,650},{11,0,396}
+,{12,0,156},{13,0,369},{14,0,333},{145,0,47},{6,11,1684},{6,11,1731},{7,11,356},
+{7,11,1932},{8,11,54},{8,11,221},{9,11,225},{9,11,356},{10,11,77},{10,11,446},{
+10,11,731},{12,11,404},{141,11,491},{132,11,375},{4,10,518},{135,10,1136},{4,0,
+913},{4,11,411},{11,11,643},{140,11,115},{4,11,80},{133,11,44},{8,10,689},{137,
+10,863},{138,0,880},{4,10,18},{7,10,145},{7,10,444},{7,10,1278},{8,10,49},{8,10,
+400},{9,10,71},{9,10,250},{10,10,459},{12,10,160},{144,10,24},{136,0,475},{5,0,
+1016},{5,11,299},{135,11,1083},{7,0,602},{8,0,179},{10,0,781},{140,0,126},{6,0,
+329},{138,0,111},{135,0,1864},{4,11,219},{7,11,1761},{137,11,86},{6,0,1888},{6,0
+,1892},{6,0,1901},{6,0,1904},{9,0,953},{9,0,985},{9,0,991},{9,0,1001},{12,0,818}
+,{12,0,846},{12,0,847},{12,0,861},{12,0,862},{12,0,873},{12,0,875},{12,0,877},{
+12,0,879},{12,0,881},{12,0,884},{12,0,903},{12,0,915},{12,0,926},{12,0,939},{15,
+0,182},{15,0,219},{15,0,255},{18,0,191},{18,0,209},{18,0,211},{149,0,41},{5,11,
+328},{135,11,918},{137,0,780},{12,0,82},{143,0,36},{133,10,1010},{5,0,821},{134,
+0,1687},{133,11,514},{132,0,956},{134,0,1180},{10,0,112},{5,10,87},{7,10,313},{7
+,10,1103},{10,10,582},{11,10,389},{11,10,813},{12,10,385},{13,10,286},{14,10,124
+},{146,10,108},{5,0,71},{7,0,1407},{9,0,704},{10,0,261},{10,0,619},{11,0,547},{
+11,0,619},{143,0,157},{4,0,531},{5,0,455},{5,11,301},{6,11,571},{14,11,49},{146,
+11,102},{132,10,267},{6,0,385},{7,0,2008},{9,0,337},{138,0,517},{133,11,726},{
+133,11,364},{4,11,76},{7,11,1550},{9,11,306},{9,11,430},{9,11,663},{10,11,683},{
+11,11,427},{11,11,753},{12,11,334},{12,11,442},{14,11,258},{14,11,366},{143,11,
+131},{6,0,1865},{6,0,1879},{6,0,1881},{6,0,1894},{6,0,1908},{9,0,915},{9,0,926},
+{9,0,940},{9,0,943},{9,0,966},{9,0,980},{9,0,989},{9,0,1005},{9,0,1010},{12,0,
+813},{12,0,817},{12,0,840},{12,0,843},{12,0,855},{12,0,864},{12,0,871},{12,0,872
+},{12,0,899},{12,0,905},{12,0,924},{15,0,171},{15,0,181},{15,0,224},{15,0,235},{
+15,0,251},{146,0,184},{137,11,52},{5,0,16},{6,0,86},{6,0,603},{7,0,292},{7,0,561
+},{8,0,257},{8,0,382},{9,0,721},{9,0,778},{11,0,581},{140,0,466},{4,0,486},{5,0,
+491},{135,10,1121},{4,0,72},{6,0,265},{135,0,1300},{135,11,1183},{10,10,249},{
+139,10,209},{132,10,561},{137,11,519},{4,11,656},{4,10,760},{135,11,779},{9,10,
+154},{140,10,485},{135,11,1793},{135,11,144},{136,10,255},{133,0,621},{4,10,368}
+,{135,10,641},{135,11,1373},{7,11,554},{7,11,605},{141,11,10},{137,0,234},{5,0,
+815},{6,0,1688},{134,0,1755},{5,11,838},{5,11,841},{134,11,1649},{7,0,1987},{7,0
+,2040},{136,0,743},{133,11,1012},{6,0,197},{136,0,205},{6,0,314},{134,11,314},{
+144,11,53},{6,11,251},{7,11,365},{7,11,1357},{7,11,1497},{8,11,154},{141,11,281}
+,{133,11,340},{6,0,452},{7,0,312},{138,0,219},{138,0,589},{4,0,333},{9,0,176},{
+12,0,353},{141,0,187},{9,10,92},{147,10,91},{134,0,1110},{11,0,47},{139,11,495},
+{6,10,525},{8,10,806},{9,10,876},{140,10,284},{8,11,261},{9,11,144},{9,11,466},{
+10,11,370},{12,11,470},{13,11,144},{142,11,348},{137,11,897},{8,0,863},{8,0,864}
+,{8,0,868},{8,0,884},{10,0,866},{10,0,868},{10,0,873},{10,0,911},{10,0,912},{10,
+0,944},{12,0,727},{6,11,248},{9,11,546},{10,11,535},{11,11,681},{141,11,135},{6,
+0,300},{135,0,1515},{134,0,1237},{139,10,958},{133,10,594},{140,11,250},{134,0,
+1685},{134,11,567},{7,0,135},{8,0,7},{8,0,62},{9,0,243},{10,0,658},{10,0,697},{
+11,0,456},{139,0,756},{9,0,395},{138,0,79},{6,10,1641},{136,10,820},{4,10,302},{
+135,10,1766},{134,11,174},{135,10,1313},{135,0,631},{134,10,1674},{134,11,395},{
+138,0,835},{7,0,406},{7,0,459},{8,0,606},{139,0,726},{134,11,617},{134,0,979},{6
+,10,389},{7,10,149},{9,10,142},{138,10,94},{5,11,878},{133,11,972},{6,10,8},{7,
+10,1881},{8,10,91},{136,11,511},{133,0,612},{132,11,351},{4,0,372},{7,0,482},{8,
+0,158},{9,0,602},{9,0,615},{10,0,245},{10,0,678},{10,0,744},{11,0,248},{139,0,
+806},{5,0,854},{135,0,1991},{132,11,286},{135,11,344},{7,11,438},{7,11,627},{7,
+11,1516},{8,11,40},{9,11,56},{9,11,294},{10,11,30},{10,11,259},{11,11,969},{146,
+11,148},{135,0,1492},{5,11,259},{7,11,414},{7,11,854},{142,11,107},{135,10,1746}
+,{6,0,833},{134,0,998},{135,10,24},{6,0,750},{135,0,1739},{4,10,503},{135,10,
+1661},{5,10,130},{7,10,1314},{9,10,610},{10,10,718},{11,10,601},{11,10,819},{11,
+10,946},{140,10,536},{10,10,149},{11,10,280},{142,10,336},{132,11,738},{135,10,
+1946},{5,0,195},{135,0,1685},{7,0,1997},{8,0,730},{139,0,1006},{151,11,17},{133,
+11,866},{14,0,463},{14,0,470},{150,0,61},{5,0,751},{8,0,266},{11,0,578},{4,10,
+392},{135,10,1597},{5,10,433},{9,10,633},{139,10,629},{135,0,821},{6,0,715},{134
+,0,1325},{133,11,116},{6,0,868},{132,11,457},{134,0,959},{6,10,234},{138,11,199}
+,{7,0,1053},{7,10,1950},{8,10,680},{11,10,817},{147,10,88},{7,10,1222},{138,10,
+386},{5,0,950},{5,0,994},{6,0,351},{134,0,1124},{134,0,1081},{7,0,1595},{6,10,5}
+,{11,10,249},{12,10,313},{16,10,66},{145,10,26},{148,0,59},{5,11,527},{6,11,189}
+,{135,11,859},{5,10,963},{6,10,1773},{11,11,104},{11,11,554},{15,11,60},{143,11,
+125},{135,0,47},{137,0,684},{134,11,116},{134,0,1606},{134,0,777},{7,0,1020},{8,
+10,509},{136,10,792},{135,0,1094},{132,0,350},{133,11,487},{4,11,86},{5,11,667},
+{5,11,753},{6,11,316},{6,11,455},{135,11,946},{7,0,1812},{13,0,259},{13,0,356},{
+14,0,242},{147,0,114},{132,10,931},{133,0,967},{4,0,473},{7,0,623},{8,0,808},{9,
+0,871},{9,0,893},{11,0,38},{11,0,431},{12,0,112},{12,0,217},{12,0,243},{12,0,562
+},{12,0,663},{12,0,683},{13,0,141},{13,0,197},{13,0,227},{13,0,406},{13,0,487},{
+14,0,156},{14,0,203},{14,0,224},{14,0,256},{18,0,58},{150,0,0},{138,0,286},{7,10
+,943},{139,10,614},{135,10,1837},{150,11,45},{132,0,798},{4,0,222},{7,0,286},{
+136,0,629},{4,11,79},{7,11,1773},{10,11,450},{11,11,589},{13,11,332},{13,11,493}
+,{14,11,183},{14,11,334},{14,11,362},{14,11,368},{14,11,376},{14,11,379},{19,11,
+90},{19,11,103},{19,11,127},{148,11,90},{5,0,337},{11,0,513},{11,0,889},{11,0,
+961},{12,0,461},{13,0,79},{15,0,121},{4,10,90},{5,10,545},{7,10,754},{9,10,186},
+{10,10,72},{10,10,782},{11,10,577},{11,10,610},{12,10,354},{12,10,362},{140,10,
+595},{141,0,306},{136,0,146},{7,0,1646},{9,10,329},{11,10,254},{141,11,124},{4,0
+,465},{135,0,1663},{132,0,525},{133,11,663},{10,0,299},{18,0,74},{9,10,187},{11,
+10,1016},{145,10,44},{7,0,165},{7,0,919},{4,10,506},{136,10,517},{5,10,295},{135
+,10,1680},{133,11,846},{134,0,1064},{5,11,378},{7,11,1402},{7,11,1414},{8,11,465
+},{9,11,286},{10,11,185},{10,11,562},{10,11,635},{11,11,31},{11,11,393},{12,11,
+456},{13,11,312},{18,11,65},{18,11,96},{147,11,89},{132,0,596},{7,10,987},{9,10,
+688},{10,10,522},{11,10,788},{140,10,566},{6,0,82},{7,0,138},{7,0,517},{7,0,1741
+},{11,0,238},{4,11,648},{134,10,1775},{7,0,1233},{7,10,700},{7,10,940},{8,10,514
+},{9,10,116},{9,10,535},{10,10,118},{11,10,107},{11,10,148},{11,10,922},{12,10,
+254},{12,10,421},{142,10,238},{4,0,962},{6,0,1824},{8,0,894},{12,0,708},{12,0,
+725},{14,0,451},{20,0,94},{22,0,59},{150,0,62},{5,11,945},{6,11,1656},{6,11,1787
+},{7,11,167},{8,11,824},{9,11,391},{10,11,375},{139,11,185},{5,0,495},{7,0,834},
+{9,0,733},{139,0,378},{4,10,743},{135,11,1273},{6,0,1204},{7,11,1645},{8,11,352}
+,{137,11,249},{139,10,292},{133,0,559},{132,11,152},{9,0,499},{10,0,341},{15,0,
+144},{19,0,49},{7,10,1283},{9,10,227},{11,10,325},{11,10,408},{14,10,180},{146,
+10,47},{6,0,21},{6,0,1737},{7,0,1444},{136,0,224},{133,11,1006},{7,0,1446},{9,0,
+97},{17,0,15},{5,10,81},{7,10,146},{7,10,1342},{8,10,53},{8,10,561},{8,10,694},{
+8,10,754},{9,10,115},{9,10,894},{10,10,462},{10,10,813},{11,10,230},{11,10,657},
+{11,10,699},{11,10,748},{12,10,119},{12,10,200},{12,10,283},{142,10,273},{5,10,
+408},{137,10,747},{135,11,431},{135,11,832},{6,0,729},{134,0,953},{4,0,727},{8,0
+,565},{5,11,351},{7,11,264},{136,11,565},{134,0,1948},{5,0,519},{5,11,40},{7,11,
+598},{7,11,1638},{8,11,78},{9,11,166},{9,11,640},{9,11,685},{9,11,773},{11,11,
+215},{13,11,65},{14,11,172},{14,11,317},{145,11,6},{8,11,60},{9,11,343},{139,11,
+769},{137,11,455},{134,0,1193},{140,0,790},{7,11,1951},{8,11,765},{8,11,772},{
+140,11,671},{7,11,108},{8,11,219},{8,11,388},{9,11,639},{9,11,775},{11,11,275},{
+140,11,464},{132,11,468},{7,10,30},{8,10,86},{8,10,315},{8,10,700},{9,10,576},{9
+,10,858},{11,10,310},{11,10,888},{11,10,904},{12,10,361},{141,10,248},{5,11,15},
+{6,11,56},{7,11,1758},{8,11,500},{9,11,730},{11,11,331},{13,11,150},{142,11,282}
+,{4,0,402},{7,0,2},{8,0,323},{136,0,479},{138,10,839},{11,0,580},{142,0,201},{5,
+0,59},{135,0,672},{137,10,617},{146,0,34},{134,11,1886},{4,0,961},{136,0,896},{6
+,0,1285},{5,11,205},{6,11,438},{137,11,711},{134,10,428},{7,10,524},{8,10,169},{
+8,10,234},{9,10,480},{138,10,646},{148,0,46},{141,0,479},{133,11,534},{6,0,2019}
+,{134,10,1648},{4,0,85},{7,0,549},{7,10,1205},{138,10,637},{4,0,663},{5,0,94},{7
+,11,235},{7,11,1475},{15,11,68},{146,11,120},{6,11,443},{9,11,237},{9,11,571},{9
+,11,695},{10,11,139},{11,11,715},{12,11,417},{141,11,421},{132,0,783},{4,0,682},
+{8,0,65},{9,10,39},{10,10,166},{11,10,918},{12,10,635},{20,10,10},{22,10,27},{22
+,10,43},{150,10,52},{6,0,11},{135,0,187},{132,0,522},{4,0,52},{135,0,661},{4,0,
+383},{133,0,520},{135,11,546},{11,0,343},{142,0,127},{4,11,578},{7,10,157},{7,11
+,624},{7,11,916},{8,10,279},{10,11,256},{11,11,87},{139,11,703},{134,10,604},{4,
+0,281},{5,0,38},{7,0,194},{7,0,668},{7,0,1893},{137,0,397},{7,10,945},{11,10,713
+},{139,10,744},{139,10,1022},{9,0,635},{139,0,559},{5,11,923},{7,11,490},{12,11,
+553},{13,11,100},{14,11,118},{143,11,75},{132,0,975},{132,10,567},{137,10,859},{
+7,10,1846},{7,11,1846},{8,10,628},{136,11,628},{148,0,116},{138,11,750},{14,0,51
+},{14,11,51},{15,11,7},{148,11,20},{132,0,858},{134,0,1075},{4,11,924},{133,10,
+762},{136,0,535},{133,0,448},{10,10,784},{141,10,191},{133,10,298},{7,0,610},{
+135,0,1501},{7,10,633},{7,10,905},{7,10,909},{7,10,1538},{9,10,767},{140,10,636}
+,{4,11,265},{7,11,807},{135,11,950},{5,11,93},{12,11,267},{144,11,26},{136,0,191
+},{139,10,301},{135,10,1970},{135,0,267},{4,0,319},{5,0,699},{138,0,673},{6,0,
+336},{7,0,92},{7,0,182},{8,0,453},{8,0,552},{9,0,204},{9,0,285},{10,0,99},{11,0,
+568},{11,0,950},{12,0,94},{16,0,20},{16,0,70},{19,0,55},{12,10,644},{144,10,90},
+{6,0,551},{7,0,1308},{7,10,845},{7,11,994},{8,10,160},{137,10,318},{19,11,1},{19
+,11,26},{150,11,9},{7,0,1406},{9,0,218},{141,0,222},{5,0,256},{138,0,69},{5,11,
+233},{5,11,320},{6,11,140},{7,11,330},{136,11,295},{6,0,1980},{136,0,952},{4,0,
+833},{137,11,678},{133,11,978},{4,11,905},{6,11,1701},{137,11,843},{138,10,735},
+{136,10,76},{17,0,39},{148,0,36},{18,0,81},{146,11,81},{14,0,352},{17,0,53},{18,
+0,146},{18,0,152},{19,0,11},{150,0,54},{135,0,634},{138,10,841},{132,0,618},{4,0
+,339},{7,0,259},{17,0,73},{4,11,275},{140,11,376},{132,11,509},{7,11,273},{139,
+11,377},{4,0,759},{13,0,169},{137,10,804},{6,10,96},{135,10,1426},{4,10,651},{
+133,10,289},{7,0,1075},{8,10,35},{9,10,511},{10,10,767},{147,10,118},{6,0,649},{
+6,0,670},{136,0,482},{5,0,336},{6,0,341},{6,0,478},{6,0,1763},{136,0,386},{5,11,
+802},{7,11,2021},{8,11,805},{14,11,94},{15,11,65},{16,11,4},{16,11,77},{16,11,80
+},{145,11,5},{6,0,1035},{5,11,167},{5,11,899},{6,11,410},{137,11,777},{134,11,
+1705},{5,0,924},{133,0,969},{132,10,704},{135,0,73},{135,11,10},{135,10,1078},{5
+,11,11},{6,11,117},{6,11,485},{7,11,1133},{9,11,582},{9,11,594},{11,11,21},{11,
+11,818},{12,11,535},{141,11,86},{135,0,1971},{4,11,264},{7,11,1067},{8,11,204},{
+8,11,385},{139,11,953},{6,0,1458},{135,0,1344},{5,0,396},{134,0,501},{4,10,720},
+{133,10,306},{4,0,929},{5,0,799},{8,0,46},{8,0,740},{133,10,431},{7,11,646},{7,
+11,1730},{11,11,446},{141,11,178},{7,0,276},{5,10,464},{6,10,236},{7,10,696},{7,
+10,914},{7,10,1108},{7,10,1448},{9,10,15},{9,10,564},{10,10,14},{12,10,565},{13,
+10,449},{14,10,53},{15,10,13},{16,10,64},{145,10,41},{4,0,892},{133,0,770},{6,10
+,1767},{12,10,194},{145,10,107},{135,0,158},{5,10,840},{138,11,608},{134,0,1432}
+,{138,11,250},{8,11,794},{9,11,400},{10,11,298},{142,11,228},{151,0,25},{7,11,
+1131},{135,11,1468},{135,0,2001},{9,10,642},{11,10,236},{142,10,193},{4,10,68},{
+5,10,634},{6,10,386},{7,10,794},{8,10,273},{9,10,563},{10,10,105},{10,10,171},{
+11,10,94},{139,10,354},{136,11,724},{132,0,478},{11,11,512},{13,11,205},{19,11,
+30},{22,11,36},{151,11,19},{7,0,1461},{140,0,91},{6,11,190},{7,11,768},{135,11,
+1170},{4,0,602},{8,0,211},{4,10,95},{7,10,416},{139,10,830},{7,10,731},{13,10,20
+},{143,10,11},{6,0,1068},{135,0,1872},{4,0,13},{5,0,567},{7,0,1498},{9,0,124},{
+11,0,521},{12,0,405},{135,11,1023},{135,0,1006},{132,0,735},{138,0,812},{4,0,170
+},{135,0,323},{6,11,137},{9,11,75},{9,11,253},{10,11,194},{138,11,444},{5,0,304}
+,{7,0,1403},{5,10,864},{10,10,648},{11,10,671},{143,10,46},{135,11,1180},{133,10
+,928},{4,0,148},{133,0,742},{11,10,986},{140,10,682},{133,0,523},{135,11,1743},{
+7,0,730},{18,0,144},{19,0,61},{8,10,44},{9,10,884},{10,10,580},{11,10,399},{11,
+10,894},{143,10,122},{5,11,760},{7,11,542},{8,11,135},{136,11,496},{136,0,981},{
+133,0,111},{10,0,132},{11,0,191},{11,0,358},{139,0,460},{7,11,319},{7,11,355},{7
+,11,763},{10,11,389},{145,11,43},{134,0,890},{134,0,1420},{136,11,557},{133,10,
+518},{133,0,444},{135,0,1787},{135,10,1852},{8,0,123},{15,0,6},{144,0,7},{6,0,
+2041},{10,11,38},{139,11,784},{136,0,932},{5,0,937},{135,0,100},{6,0,995},{4,11,
+58},{5,11,286},{6,11,319},{7,11,402},{7,11,1254},{7,11,1903},{8,11,356},{140,11,
+408},{4,11,389},{9,11,181},{9,11,255},{10,11,8},{10,11,29},{10,11,816},{11,11,
+311},{11,11,561},{12,11,67},{141,11,181},{138,0,255},{5,0,138},{4,10,934},{136,
+10,610},{4,0,965},{10,0,863},{138,0,898},{10,10,804},{138,10,832},{12,0,631},{8,
+10,96},{9,10,36},{10,10,607},{11,10,423},{11,10,442},{12,10,309},{14,10,199},{15
+,10,90},{145,10,110},{134,0,1394},{4,0,652},{8,0,320},{22,0,6},{22,0,16},{9,10,
+13},{9,10,398},{9,10,727},{10,10,75},{10,10,184},{10,10,230},{10,10,564},{10,10,
+569},{11,10,973},{12,10,70},{12,10,189},{13,10,57},{141,10,257},{6,0,897},{134,0
+,1333},{4,0,692},{133,0,321},{133,11,373},{135,0,922},{5,0,619},{133,0,698},{137
+,10,631},{5,10,345},{135,10,1016},{9,0,957},{9,0,1018},{12,0,828},{12,0,844},{12
+,0,897},{12,0,901},{12,0,943},{15,0,180},{18,0,197},{18,0,200},{18,0,213},{18,0,
+214},{146,0,226},{5,0,917},{134,0,1659},{135,0,1100},{134,0,1173},{134,0,1930},{
+5,0,251},{5,0,956},{8,0,268},{9,0,214},{146,0,142},{133,10,673},{137,10,850},{4,
+10,287},{133,10,1018},{132,11,672},{5,0,346},{5,0,711},{8,0,390},{11,11,752},{
+139,11,885},{5,10,34},{10,10,724},{12,10,444},{13,10,354},{18,10,32},{23,10,24},
+{23,10,31},{152,10,5},{4,11,710},{134,11,606},{134,0,744},{134,10,382},{133,11,
+145},{4,10,329},{7,11,884},{140,11,124},{4,11,467},{5,11,405},{134,11,544},{9,10
+,846},{138,10,827},{133,0,624},{9,11,372},{15,11,2},{19,11,10},{147,11,18},{4,11
+,387},{135,11,1288},{5,0,783},{7,0,1998},{135,0,2047},{132,10,906},{136,10,366},
+{135,11,550},{4,10,123},{4,10,649},{5,10,605},{7,10,1509},{136,10,36},{134,0,
+1125},{132,0,594},{133,10,767},{135,11,1227},{136,11,467},{4,11,576},{135,11,
+1263},{4,0,268},{7,0,1534},{135,11,1534},{4,10,273},{5,10,658},{5,11,919},{5,10,
+995},{134,11,1673},{133,0,563},{134,10,72},{135,10,1345},{4,11,82},{5,11,333},{5
+,11,904},{6,11,207},{7,11,325},{7,11,1726},{8,11,101},{10,11,778},{139,11,220},{
+5,0,37},{6,0,39},{6,0,451},{7,0,218},{7,0,667},{7,0,1166},{7,0,1687},{8,0,662},{
+16,0,2},{133,10,589},{134,0,1332},{133,11,903},{134,0,508},{5,10,117},{6,10,514}
+,{6,10,541},{7,10,1164},{7,10,1436},{8,10,220},{8,10,648},{10,10,688},{11,10,560
+},{140,11,147},{6,11,555},{135,11,485},{133,10,686},{7,0,453},{7,0,635},{7,0,796
+},{8,0,331},{9,0,330},{9,0,865},{10,0,119},{10,0,235},{11,0,111},{11,0,129},{11,
+0,240},{12,0,31},{12,0,66},{12,0,222},{12,0,269},{12,0,599},{12,0,684},{12,0,689
+},{12,0,691},{142,0,345},{135,0,1834},{4,11,705},{7,11,615},{138,11,251},{136,11
+,345},{137,0,527},{6,0,98},{7,0,702},{135,0,991},{11,0,576},{14,0,74},{7,10,196}
+,{10,10,765},{11,10,347},{11,10,552},{11,10,790},{12,10,263},{13,10,246},{13,10,
+270},{13,10,395},{14,10,176},{14,10,190},{14,10,398},{14,10,412},{15,10,32},{15,
+10,63},{16,10,88},{147,10,105},{134,11,90},{13,0,84},{141,0,122},{6,0,37},{7,0,
+299},{7,0,1666},{8,0,195},{8,0,316},{9,0,178},{9,0,276},{9,0,339},{9,0,536},{10,
+0,102},{10,0,362},{10,0,785},{11,0,55},{11,0,149},{11,0,773},{13,0,416},{13,0,
+419},{14,0,38},{14,0,41},{142,0,210},{5,10,381},{135,10,1792},{7,11,813},{12,11,
+497},{141,11,56},{7,10,616},{138,10,413},{133,0,645},{6,11,125},{135,11,1277},{
+132,0,290},{6,0,70},{7,0,1292},{10,0,762},{139,0,288},{6,10,120},{7,10,1188},{7,
+10,1710},{8,10,286},{9,10,667},{11,10,592},{139,10,730},{135,11,1784},{7,0,1315}
+,{135,11,1315},{134,0,1955},{135,10,1146},{7,0,131},{7,0,422},{8,0,210},{140,0,
+573},{4,10,352},{135,10,687},{139,0,797},{143,0,38},{14,0,179},{15,0,151},{150,0
+,11},{7,0,488},{4,10,192},{5,10,49},{6,10,200},{6,10,293},{134,10,1696},{132,0,
+936},{135,11,703},{6,11,160},{7,11,1106},{9,11,770},{10,11,618},{11,11,112},{140
+,11,413},{5,0,453},{134,0,441},{135,0,595},{132,10,650},{132,10,147},{6,0,991},{
+6,0,1182},{12,11,271},{145,11,109},{133,10,934},{140,11,221},{132,0,653},{7,0,
+505},{135,0,523},{134,0,903},{135,11,479},{7,11,304},{9,11,646},{9,11,862},{10,
+11,262},{11,11,696},{12,11,208},{15,11,79},{147,11,108},{146,0,80},{135,11,981},
+{142,0,432},{132,0,314},{137,11,152},{7,0,1368},{8,0,232},{8,0,361},{10,0,682},{
+138,0,742},{135,11,1586},{9,0,534},{4,11,434},{11,11,663},{12,11,210},{13,11,166
+},{13,11,310},{14,11,373},{147,11,43},{7,11,1091},{135,11,1765},{6,11,550},{135,
+11,652},{137,0,27},{142,0,12},{4,10,637},{5,11,553},{7,11,766},{138,11,824},{7,
+11,737},{8,11,298},{136,11,452},{7,0,736},{139,0,264},{134,0,1657},{133,11,292},
+{138,11,135},{6,0,844},{134,0,1117},{135,0,127},{9,10,867},{138,10,837},{6,0,
+1184},{134,0,1208},{134,0,1294},{136,0,364},{6,0,1415},{7,0,1334},{11,0,125},{6,
+10,170},{7,11,393},{8,10,395},{8,10,487},{10,11,603},{11,11,206},{141,10,147},{
+137,11,748},{4,11,912},{137,11,232},{4,10,535},{136,10,618},{137,0,792},{7,11,
+1973},{136,11,716},{135,11,98},{5,0,909},{9,0,849},{138,0,805},{4,0,630},{132,0,
+699},{5,11,733},{14,11,103},{150,10,23},{12,11,158},{18,11,8},{19,11,62},{20,11,
+6},{22,11,4},{23,11,2},{151,11,9},{132,0,968},{132,10,778},{132,10,46},{5,10,811
+},{6,10,1679},{6,10,1714},{135,10,2032},{6,0,1446},{7,10,1458},{9,10,407},{139,
+10,15},{7,0,206},{7,0,397},{7,0,621},{7,0,640},{8,0,124},{8,0,619},{9,0,305},{9,
+0,643},{10,0,264},{10,0,628},{11,0,40},{12,0,349},{13,0,134},{13,0,295},{14,0,
+155},{15,0,120},{18,0,105},{6,10,34},{7,10,1089},{8,10,708},{8,10,721},{9,10,363
+},{148,10,98},{4,0,262},{5,0,641},{135,0,342},{137,11,72},{4,0,99},{6,0,250},{6,
+0,346},{8,0,127},{138,0,81},{132,0,915},{5,0,75},{9,0,517},{10,0,470},{12,0,155}
+,{141,0,224},{132,10,462},{11,11,600},{11,11,670},{141,11,245},{142,0,83},{5,10,
+73},{6,10,23},{134,10,338},{6,0,1031},{139,11,923},{7,11,164},{7,11,1571},{9,11,
+107},{140,11,225},{134,0,1470},{133,0,954},{6,0,304},{8,0,418},{10,0,345},{11,0,
+341},{139,0,675},{9,0,410},{139,0,425},{4,11,27},{5,11,484},{5,11,510},{6,11,434
+},{7,11,1000},{7,11,1098},{8,11,2},{136,11,200},{134,0,734},{140,11,257},{7,10,
+725},{8,10,498},{139,10,268},{134,0,1822},{135,0,1798},{135,10,773},{132,11,460}
+,{4,11,932},{133,11,891},{134,0,14},{132,10,583},{7,10,1462},{8,11,625},{139,10,
+659},{5,0,113},{6,0,243},{6,0,1708},{7,0,1865},{11,0,161},{16,0,37},{17,0,99},{
+133,10,220},{134,11,76},{5,11,461},{135,11,1925},{140,0,69},{8,11,92},{137,11,
+221},{139,10,803},{132,10,544},{4,0,274},{134,0,922},{132,0,541},{5,0,627},{6,10
+,437},{6,10,564},{11,10,181},{141,10,183},{135,10,1192},{7,0,166},{132,11,763},{
+133,11,253},{134,0,849},{9,11,73},{10,11,110},{14,11,185},{145,11,119},{5,11,212
+},{12,11,35},{141,11,382},{133,0,717},{137,0,304},{136,0,600},{133,0,654},{6,0,
+273},{10,0,188},{13,0,377},{146,0,77},{4,10,790},{5,10,273},{134,10,394},{132,0,
+543},{135,0,410},{11,0,98},{11,0,524},{141,0,87},{132,0,941},{135,11,1175},{4,0,
+250},{7,0,1612},{11,0,186},{12,0,133},{6,10,127},{7,10,1511},{8,10,613},{12,10,
+495},{12,10,586},{12,10,660},{12,10,668},{14,10,385},{15,10,118},{17,10,20},{146
+,10,98},{6,0,1785},{133,11,816},{134,0,1339},{7,0,961},{7,0,1085},{7,0,1727},{8,
+0,462},{6,10,230},{135,11,1727},{9,0,636},{135,10,1954},{132,0,780},{5,11,869},{
+5,11,968},{6,11,1626},{8,11,734},{136,11,784},{4,11,542},{6,11,1716},{6,11,1727}
+,{7,11,1082},{7,11,1545},{8,11,56},{8,11,118},{8,11,412},{8,11,564},{9,11,888},{
+9,11,908},{10,11,50},{10,11,423},{11,11,685},{11,11,697},{11,11,933},{12,11,299}
+,{13,11,126},{13,11,136},{13,11,170},{141,11,190},{134,11,226},{4,11,232},{9,11,
+202},{10,11,474},{140,11,433},{137,11,500},{5,0,529},{136,10,68},{132,10,654},{4
+,10,156},{7,10,998},{7,10,1045},{7,10,1860},{9,10,48},{9,10,692},{11,10,419},{
+139,10,602},{7,0,1276},{8,0,474},{9,0,652},{6,11,108},{7,11,1003},{7,11,1181},{
+136,11,343},{7,11,1264},{7,11,1678},{11,11,945},{12,11,341},{12,11,471},{140,11,
+569},{134,11,1712},{5,0,948},{12,0,468},{19,0,96},{148,0,24},{4,11,133},{7,11,
+711},{7,11,1298},{7,11,1585},{135,11,1929},{6,0,753},{140,0,657},{139,0,941},{6,
+11,99},{7,11,1808},{145,11,57},{6,11,574},{7,11,428},{7,11,1250},{10,11,669},{11
+,11,485},{11,11,840},{12,11,300},{142,11,250},{4,0,532},{5,0,706},{135,0,662},{5
+,0,837},{6,0,1651},{139,0,985},{7,0,1861},{9,10,197},{10,10,300},{12,10,473},{13
+,10,90},{141,10,405},{137,11,252},{6,11,323},{135,11,1564},{4,0,330},{4,0,863},{
+7,0,933},{7,0,2012},{8,0,292},{7,11,461},{8,11,775},{138,11,435},{132,10,606},{4
+,11,655},{7,11,850},{17,11,75},{146,11,137},{135,0,767},{7,10,1978},{136,10,676}
+,{132,0,641},{135,11,1559},{134,0,1233},{137,0,242},{17,0,114},{4,10,361},{133,
+10,315},{137,0,883},{132,10,461},{138,0,274},{134,0,2008},{134,0,1794},{4,0,703}
+,{135,0,207},{12,0,285},{132,10,472},{132,0,571},{5,0,873},{5,0,960},{8,0,823},{
+9,0,881},{136,11,577},{7,0,617},{10,0,498},{11,0,501},{12,0,16},{140,0,150},{138
+,10,747},{132,0,431},{133,10,155},{11,0,283},{11,0,567},{7,10,163},{8,10,319},{9
+,10,402},{10,10,24},{10,10,681},{11,10,200},{12,10,253},{12,10,410},{142,10,219}
+,{4,11,413},{5,11,677},{8,11,432},{140,11,280},{9,0,401},{5,10,475},{7,10,1780},
+{11,10,297},{11,10,558},{14,10,322},{147,10,76},{6,0,781},{9,0,134},{10,0,2},{10
+,0,27},{10,0,333},{11,0,722},{143,0,1},{5,0,33},{6,0,470},{139,0,424},{135,0,
+2006},{12,0,783},{135,10,1956},{136,0,274},{135,0,1882},{132,0,794},{135,0,1848}
+,{5,10,944},{134,10,1769},{6,0,47},{7,0,90},{7,0,664},{7,0,830},{7,0,1380},{7,0,
+2025},{8,0,448},{136,0,828},{132,10,144},{134,0,1199},{4,11,395},{139,11,762},{
+135,11,1504},{9,0,417},{137,0,493},{9,11,174},{10,11,164},{11,11,440},{11,11,841
+},{143,11,98},{134,11,426},{139,11,1002},{134,0,295},{134,0,816},{6,10,247},{137
+,10,555},{133,0,1019},{4,0,620},{5,11,476},{10,10,280},{138,10,797},{139,0,464},
+{5,11,76},{6,11,458},{6,11,497},{7,11,764},{7,11,868},{9,11,658},{10,11,594},{11
+,11,173},{11,11,566},{12,11,20},{12,11,338},{141,11,200},{134,0,208},{4,11,526},
+{7,11,1029},{135,11,1054},{132,11,636},{6,11,233},{7,11,660},{7,11,1124},{17,11,
+31},{19,11,22},{151,11,14},{10,0,442},{133,10,428},{10,0,930},{140,0,778},{6,0,
+68},{7,0,448},{7,0,1629},{7,0,1769},{7,0,1813},{8,0,442},{8,0,516},{9,0,710},{10
+,0,282},{10,0,722},{7,10,1717},{138,10,546},{134,0,1128},{11,0,844},{12,0,104},{
+140,0,625},{4,11,432},{135,11,824},{138,10,189},{133,0,787},{133,10,99},{4,11,
+279},{7,11,301},{137,11,362},{8,0,491},{4,10,397},{136,10,555},{4,11,178},{133,
+11,399},{134,0,711},{144,0,9},{4,0,403},{5,0,441},{7,0,450},{10,0,840},{11,0,101
+},{12,0,193},{141,0,430},{135,11,1246},{12,10,398},{20,10,39},{21,10,11},{150,10
+,41},{4,10,485},{7,10,353},{135,10,1523},{6,10,366},{7,10,1384},{7,10,1601},{135
+,11,1912},{7,0,396},{10,0,160},{135,11,396},{137,10,282},{134,11,1692},{4,10,157
+},{5,10,471},{6,11,202},{10,11,448},{11,11,208},{12,11,360},{17,11,117},{17,11,
+118},{18,11,27},{148,11,67},{133,0,679},{137,0,326},{136,10,116},{7,11,872},{10,
+11,516},{139,11,167},{132,11,224},{5,11,546},{7,11,35},{8,11,11},{8,11,12},{9,11
+,315},{9,11,533},{10,11,802},{11,11,166},{12,11,525},{142,11,243},{7,0,1128},{
+135,11,1920},{5,11,241},{8,11,242},{9,11,451},{10,11,667},{11,11,598},{140,11,
+429},{6,0,737},{5,10,160},{7,10,363},{7,10,589},{10,10,170},{141,10,55},{135,0,
+1796},{142,11,254},{4,0,574},{7,0,350},{7,0,1024},{8,0,338},{9,0,677},{138,0,808
+},{134,0,1096},{137,11,516},{7,0,405},{10,0,491},{4,10,108},{4,11,366},{139,10,
+498},{11,11,337},{142,11,303},{134,11,1736},{7,0,1081},{140,11,364},{7,10,1005},
+{140,10,609},{7,0,1676},{4,10,895},{133,10,772},{135,0,2037},{6,0,1207},{11,11,
+916},{142,11,419},{14,11,140},{148,11,41},{6,11,331},{136,11,623},{9,0,944},{9,0
+,969},{9,0,1022},{12,0,913},{12,0,936},{15,0,177},{15,0,193},{4,10,926},{133,10,
+983},{5,0,354},{135,11,506},{8,0,598},{9,0,664},{138,0,441},{4,11,640},{133,11,
+513},{137,0,297},{132,10,538},{6,10,294},{7,10,1267},{136,10,624},{7,0,1772},{7,
+11,1888},{8,11,289},{11,11,45},{12,11,278},{140,11,537},{135,10,1325},{138,0,751
+},{141,0,37},{134,0,1828},{132,10,757},{132,11,394},{6,0,257},{135,0,1522},{4,0,
+582},{9,0,191},{135,11,1931},{7,11,574},{7,11,1719},{137,11,145},{132,11,658},{
+10,0,790},{132,11,369},{9,11,781},{10,11,144},{11,11,385},{13,11,161},{13,11,228
+},{13,11,268},{148,11,107},{8,0,469},{10,0,47},{136,11,374},{6,0,306},{7,0,1140}
+,{7,0,1340},{8,0,133},{138,0,449},{139,0,1011},{7,10,1875},{139,10,124},{4,11,
+344},{6,11,498},{139,11,323},{137,0,299},{132,0,837},{133,11,906},{5,0,329},{8,0
+,260},{138,0,10},{134,0,1320},{4,0,657},{146,0,158},{135,0,1191},{152,0,7},{6,0,
+1939},{8,0,974},{138,0,996},{135,0,1665},{11,11,126},{139,11,287},{143,0,8},{14,
+11,149},{14,11,399},{143,11,57},{5,0,66},{7,0,1896},{136,0,288},{7,0,175},{10,0,
+494},{5,10,150},{8,10,603},{9,10,593},{9,10,634},{10,10,173},{11,10,462},{11,10,
+515},{13,10,216},{13,10,288},{142,10,400},{134,0,1643},{136,11,21},{4,0,21},{5,0
+,91},{5,0,648},{5,0,750},{5,0,781},{6,0,54},{6,0,112},{6,0,402},{6,0,1732},{7,0,
+315},{7,0,749},{7,0,1427},{7,0,1900},{9,0,78},{9,0,508},{10,0,611},{10,0,811},{
+11,0,510},{11,0,728},{13,0,36},{14,0,39},{16,0,83},{17,0,124},{148,0,30},{4,0,
+668},{136,0,570},{10,0,322},{10,0,719},{139,0,407},{135,11,1381},{136,11,193},{
+12,10,108},{141,10,291},{132,11,616},{136,11,692},{8,0,125},{8,0,369},{8,0,524},
+{10,0,486},{11,0,13},{11,0,381},{11,0,736},{11,0,766},{11,0,845},{13,0,114},{13,
+0,292},{142,0,47},{134,0,1247},{6,0,1684},{6,0,1731},{7,0,356},{8,0,54},{8,0,221
+},{9,0,225},{9,0,356},{10,0,77},{10,0,446},{10,0,731},{12,0,404},{141,0,491},{
+135,10,1777},{4,11,305},{4,10,493},{144,10,55},{4,0,951},{6,0,1809},{6,0,1849},{
+8,0,846},{8,0,866},{8,0,899},{10,0,896},{12,0,694},{142,0,468},{5,11,214},{7,11,
+603},{8,11,611},{9,11,686},{10,11,88},{11,11,459},{11,11,496},{12,11,463},{12,11
+,590},{13,11,0},{142,11,214},{132,0,411},{4,0,80},{133,0,44},{140,11,74},{143,0,
+31},{7,0,669},{6,10,568},{7,10,1804},{8,10,362},{8,10,410},{8,10,830},{9,10,514}
+,{11,10,649},{142,10,157},{7,0,673},{134,11,1703},{132,10,625},{134,0,1303},{5,0
+,299},{135,0,1083},{138,0,704},{6,0,275},{7,0,408},{6,10,158},{7,10,129},{7,10,
+181},{8,10,276},{8,10,377},{10,10,523},{11,10,816},{12,10,455},{13,10,303},{142,
+10,135},{4,0,219},{7,0,367},{7,0,1713},{7,0,1761},{9,0,86},{9,0,537},{10,0,165},
+{12,0,219},{140,0,561},{8,0,216},{4,10,1},{4,11,737},{6,11,317},{7,10,1143},{7,
+10,1463},{9,10,207},{9,10,390},{9,10,467},{10,11,98},{11,11,294},{11,10,836},{12
+,11,60},{12,11,437},{13,11,64},{13,11,380},{142,11,430},{6,11,1758},{8,11,520},{
+9,11,345},{9,11,403},{142,11,350},{5,11,47},{10,11,242},{138,11,579},{5,11,139},
+{7,11,1168},{138,11,539},{135,0,1319},{4,10,295},{4,10,723},{5,10,895},{7,10,
+1031},{8,10,199},{8,10,340},{9,10,153},{9,10,215},{10,10,21},{10,10,59},{10,10,
+80},{10,10,224},{10,10,838},{11,10,229},{11,10,652},{12,10,192},{13,10,146},{142
+,10,91},{140,0,428},{137,10,51},{133,0,514},{5,10,309},{140,10,211},{6,0,1010},{
+5,10,125},{8,10,77},{138,10,15},{4,0,55},{5,0,301},{6,0,571},{142,0,49},{146,0,
+102},{136,11,370},{4,11,107},{7,11,613},{8,11,358},{8,11,439},{8,11,504},{9,11,
+501},{10,11,383},{139,11,477},{132,11,229},{133,0,364},{133,10,439},{4,11,903},{
+135,11,1816},{11,0,379},{140,10,76},{4,0,76},{4,0,971},{7,0,1550},{9,0,306},{9,0
+,430},{9,0,663},{10,0,683},{10,0,921},{11,0,427},{11,0,753},{12,0,334},{12,0,442
+},{14,0,258},{14,0,366},{143,0,131},{137,0,52},{4,11,47},{6,11,373},{7,11,452},{
+7,11,543},{7,11,1714},{7,11,1856},{9,11,6},{11,11,257},{139,11,391},{4,10,8},{7,
+10,1152},{7,10,1153},{7,10,1715},{9,10,374},{10,10,478},{139,10,648},{4,11,785},
+{133,11,368},{135,10,1099},{135,11,860},{5,11,980},{134,11,1754},{134,0,1258},{6
+,0,1058},{6,0,1359},{7,11,536},{7,11,1331},{136,11,143},{4,0,656},{135,0,779},{
+136,10,87},{5,11,19},{6,11,533},{146,11,126},{7,0,144},{138,10,438},{5,11,395},{
+5,11,951},{134,11,1776},{135,0,1373},{7,0,554},{7,0,605},{141,0,10},{4,10,69},{5
+,10,122},{9,10,656},{138,10,464},{5,10,849},{134,10,1633},{5,0,838},{5,0,841},{
+134,0,1649},{133,0,1012},{139,10,499},{7,10,476},{7,10,1592},{138,10,87},{6,0,
+251},{7,0,365},{7,0,1357},{7,0,1497},{8,0,154},{141,0,281},{132,11,441},{132,11,
+695},{7,11,497},{9,11,387},{147,11,81},{133,0,340},{14,10,283},{142,11,283},{134
+,0,810},{135,11,1894},{139,0,495},{5,11,284},{6,11,49},{6,11,350},{7,11,1},{7,11
+,377},{7,11,1693},{8,11,18},{8,11,678},{9,11,161},{9,11,585},{9,11,671},{9,11,
+839},{11,11,912},{141,11,427},{5,10,859},{7,10,1160},{8,10,107},{9,10,291},{9,10
+,439},{10,10,663},{11,10,609},{140,10,197},{8,0,261},{9,0,144},{9,0,466},{10,0,
+370},{12,0,470},{13,0,144},{142,0,348},{137,0,897},{6,0,248},{9,0,546},{10,0,535
+},{11,0,681},{141,0,135},{4,0,358},{135,0,1496},{134,0,567},{136,0,445},{4,10,
+117},{6,10,372},{7,10,1905},{142,10,323},{4,10,722},{139,10,471},{6,0,697},{134,
+0,996},{7,11,2007},{9,11,101},{9,11,450},{10,11,66},{10,11,842},{11,11,536},{140
+,11,587},{132,0,577},{134,0,1336},{9,10,5},{12,10,216},{12,10,294},{12,10,298},{
+12,10,400},{12,10,518},{13,10,229},{143,10,139},{6,0,174},{138,0,917},{134,10,
+1774},{5,10,12},{7,10,375},{9,10,88},{9,10,438},{11,11,62},{139,10,270},{134,11,
+1766},{6,11,0},{7,11,84},{7,10,816},{7,10,1241},{9,10,283},{9,10,520},{10,10,213
+},{10,10,307},{10,10,463},{10,10,671},{10,10,746},{11,10,401},{11,10,794},{11,11
+,895},{12,10,517},{17,11,11},{18,10,107},{147,10,115},{5,0,878},{133,0,972},{6,
+11,1665},{7,11,256},{7,11,1388},{138,11,499},{4,10,258},{136,10,639},{4,11,22},{
+5,11,10},{6,10,22},{7,11,848},{7,10,903},{7,10,1963},{8,11,97},{138,10,577},{5,
+10,681},{136,10,782},{133,11,481},{132,0,351},{4,10,664},{5,10,804},{139,10,1013
+},{6,11,134},{7,11,437},{7,11,959},{9,11,37},{14,11,285},{14,11,371},{144,11,60}
+,{7,11,486},{8,11,155},{11,11,93},{140,11,164},{132,0,286},{7,0,438},{7,0,627},{
+7,0,1516},{8,0,40},{9,0,56},{9,0,294},{10,0,30},{11,0,969},{11,0,995},{146,0,148
+},{5,11,591},{135,11,337},{134,0,1950},{133,10,32},{138,11,500},{5,11,380},{5,11
+,650},{136,11,310},{4,11,364},{7,11,1156},{7,11,1187},{137,11,409},{4,0,738},{
+134,11,482},{4,11,781},{6,11,487},{7,11,926},{8,11,263},{139,11,500},{135,11,418
+},{6,0,2047},{10,0,969},{4,10,289},{7,10,629},{7,10,1698},{7,10,1711},{140,10,
+215},{6,10,450},{136,10,109},{134,0,818},{136,10,705},{133,0,866},{4,11,94},{135
+,11,1265},{132,11,417},{134,0,1467},{135,10,1238},{4,0,972},{6,0,1851},{134,0,
+1857},{134,0,355},{133,0,116},{132,0,457},{135,11,1411},{4,11,408},{4,11,741},{
+135,11,500},{134,10,26},{142,11,137},{5,0,527},{6,0,189},{7,0,859},{136,0,267},{
+11,0,104},{11,0,554},{15,0,60},{143,0,125},{134,0,1613},{4,10,414},{5,10,467},{9
+,10,654},{10,10,451},{12,10,59},{141,10,375},{135,10,17},{134,0,116},{135,11,541
+},{135,10,955},{6,11,73},{135,11,177},{133,11,576},{134,0,886},{133,0,487},{4,0,
+86},{5,0,667},{5,0,753},{6,0,316},{6,0,455},{135,0,946},{142,11,231},{150,0,45},
+{134,0,863},{134,0,1953},{6,10,280},{10,10,502},{11,10,344},{140,10,38},{4,0,79}
+,{7,0,1773},{10,0,450},{11,0,589},{13,0,332},{13,0,493},{14,0,183},{14,0,334},{
+14,0,362},{14,0,368},{14,0,376},{14,0,379},{19,0,90},{19,0,103},{19,0,127},{148,
+0,90},{5,10,45},{7,10,1161},{11,10,448},{11,10,880},{13,10,139},{13,10,407},{15,
+10,16},{17,10,95},{18,10,66},{18,10,88},{18,10,123},{149,10,7},{136,10,777},{4,
+10,410},{135,10,521},{135,10,1778},{135,11,538},{142,0,381},{133,11,413},{134,0,
+1142},{6,0,1189},{136,11,495},{5,0,663},{6,0,1962},{134,0,2003},{7,11,54},{8,11,
+312},{10,11,191},{10,11,614},{140,11,567},{132,10,436},{133,0,846},{10,0,528},{
+11,0,504},{7,10,1587},{135,10,1707},{5,0,378},{8,0,465},{9,0,286},{10,0,185},{10
+,0,562},{10,0,635},{11,0,31},{11,0,393},{13,0,312},{18,0,65},{18,0,96},{147,0,89
+},{7,0,899},{14,0,325},{6,11,468},{7,11,567},{7,11,1478},{8,11,530},{142,11,290}
+,{7,0,1880},{9,0,680},{139,0,798},{134,0,1770},{132,0,648},{150,11,35},{5,0,945}
+,{6,0,1656},{6,0,1787},{7,0,167},{8,0,824},{9,0,391},{10,0,375},{139,0,185},{6,
+11,484},{135,11,822},{134,0,2046},{7,0,1645},{8,0,352},{137,0,249},{132,0,152},{
+6,0,611},{135,0,1733},{6,11,1724},{135,11,2022},{133,0,1006},{141,11,96},{5,0,
+420},{135,0,1449},{146,11,149},{135,0,832},{135,10,663},{133,0,351},{5,0,40},{7,
+0,598},{7,0,1638},{8,0,78},{9,0,166},{9,0,640},{9,0,685},{9,0,773},{11,0,215},{
+13,0,65},{14,0,172},{14,0,317},{145,0,6},{8,0,60},{9,0,343},{139,0,769},{134,0,
+1354},{132,0,724},{137,0,745},{132,11,474},{7,0,1951},{8,0,765},{8,0,772},{140,0
+,671},{7,0,108},{8,0,219},{8,0,388},{9,0,775},{11,0,275},{140,0,464},{137,0,639}
+,{135,10,503},{133,11,366},{5,0,15},{6,0,56},{7,0,1758},{8,0,500},{9,0,730},{11,
+0,331},{13,0,150},{14,0,282},{5,11,305},{9,11,560},{141,11,208},{4,10,113},{5,10
+,163},{5,10,735},{7,10,1009},{9,10,9},{9,10,771},{12,10,90},{13,10,138},{13,10,
+410},{143,10,128},{4,10,324},{138,10,104},{135,11,466},{142,11,27},{134,0,1886},
+{5,0,205},{6,0,438},{9,0,711},{4,11,480},{6,11,167},{6,11,302},{6,11,1642},{7,11
+,130},{7,11,656},{7,11,837},{7,11,1547},{7,11,1657},{8,11,429},{9,11,228},{10,11
+,643},{13,11,289},{13,11,343},{147,11,101},{134,0,865},{6,0,2025},{136,0,965},{7
+,11,278},{10,11,739},{11,11,708},{141,11,348},{133,0,534},{135,11,1922},{137,0,
+691},{4,10,935},{133,10,823},{6,0,443},{9,0,237},{9,0,571},{9,0,695},{10,0,139},
+{11,0,715},{12,0,417},{141,0,421},{5,10,269},{7,10,434},{7,10,891},{8,10,339},{9
+,10,702},{11,10,594},{11,10,718},{145,10,100},{6,0,1555},{7,0,878},{9,10,485},{
+141,10,264},{134,10,1713},{7,10,1810},{11,10,866},{12,10,103},{141,10,495},{135,
+10,900},{6,0,1410},{9,11,316},{139,11,256},{4,0,995},{135,0,1033},{132,0,578},{
+10,0,881},{12,0,740},{12,0,743},{140,0,759},{132,0,822},{133,0,923},{142,10,143}
+,{135,11,1696},{6,11,363},{7,11,1955},{136,11,725},{132,0,924},{133,0,665},{135,
+10,2029},{135,0,1901},{4,0,265},{6,0,1092},{6,0,1417},{7,0,807},{135,0,950},{5,0
+,93},{12,0,267},{141,0,498},{135,0,1451},{5,11,813},{135,11,2046},{5,10,625},{
+135,10,1617},{135,0,747},{6,0,788},{137,0,828},{7,0,184},{11,0,307},{11,0,400},{
+15,0,130},{5,11,712},{7,11,1855},{8,10,425},{8,10,693},{9,10,720},{10,10,380},{
+10,10,638},{11,11,17},{11,10,473},{12,10,61},{13,11,321},{144,11,67},{135,0,198}
+,{6,11,320},{7,11,781},{7,11,1921},{9,11,55},{10,11,186},{10,11,273},{10,11,664}
+,{10,11,801},{11,11,996},{11,11,997},{13,11,157},{142,11,170},{136,11,271},{135,
+0,994},{7,11,103},{7,11,863},{11,11,184},{14,11,299},{145,11,62},{11,10,551},{
+142,10,159},{5,0,233},{5,0,320},{6,0,140},{8,0,295},{8,0,615},{136,11,615},{133,
+0,978},{4,0,905},{6,0,1701},{137,0,843},{132,10,168},{4,0,974},{8,0,850},{12,0,
+709},{12,0,768},{140,0,786},{135,10,91},{152,0,6},{138,10,532},{135,10,1884},{
+132,0,509},{6,0,1307},{135,0,273},{5,11,77},{7,11,1455},{10,11,843},{19,11,73},{
+150,11,5},{132,11,458},{135,11,1420},{6,11,109},{138,11,382},{6,0,201},{6,11,330
+},{7,10,70},{7,11,1084},{10,10,240},{11,11,142},{147,10,93},{7,0,1041},{140,11,
+328},{133,11,354},{134,0,1040},{133,0,693},{134,0,774},{139,0,234},{132,0,336},{
+7,0,1399},{139,10,392},{20,0,22},{148,11,22},{5,0,802},{7,0,2021},{136,0,805},{5
+,0,167},{5,0,899},{6,0,410},{137,0,777},{137,0,789},{134,0,1705},{7,10,655},{135
+,10,1844},{4,10,145},{6,10,176},{7,10,395},{137,10,562},{132,10,501},{135,0,10},
+{5,0,11},{6,0,117},{6,0,485},{7,0,1133},{9,0,582},{9,0,594},{10,0,82},{11,0,21},
+{11,0,818},{12,0,535},{13,0,86},{20,0,91},{23,0,13},{134,10,509},{4,0,264},{7,0,
+1067},{8,0,204},{8,0,385},{139,0,953},{139,11,737},{138,0,56},{134,0,1917},{133,
+0,470},{10,11,657},{14,11,297},{142,11,361},{135,11,412},{7,0,1198},{7,11,1198},
+{8,11,556},{14,11,123},{14,11,192},{143,11,27},{7,11,1985},{14,11,146},{15,11,42
+},{16,11,23},{17,11,86},{146,11,17},{11,0,1015},{136,11,122},{4,10,114},{9,10,
+492},{13,10,462},{142,10,215},{4,10,77},{5,10,361},{6,10,139},{6,10,401},{6,10,
+404},{7,10,413},{7,10,715},{7,10,1716},{11,10,279},{12,10,179},{12,10,258},{13,
+10,244},{142,10,358},{134,10,1717},{7,10,1061},{8,10,82},{11,10,250},{12,10,420}
+,{141,10,184},{133,0,715},{135,10,724},{9,0,919},{9,0,922},{9,0,927},{9,0,933},{
+9,0,962},{9,0,1000},{9,0,1002},{9,0,1021},{12,0,890},{12,0,907},{12,0,930},{15,0
+,207},{15,0,228},{15,0,238},{149,0,61},{8,0,794},{9,0,400},{10,0,298},{142,0,228
+},{5,11,430},{5,11,932},{6,11,131},{7,11,417},{9,11,522},{11,11,314},{141,11,390
+},{132,0,867},{8,0,724},{132,11,507},{137,11,261},{4,11,343},{133,11,511},{6,0,
+190},{7,0,768},{135,0,1170},{6,10,513},{135,10,1052},{7,11,455},{138,11,591},{
+134,0,1066},{137,10,899},{14,0,67},{147,0,60},{4,0,948},{18,0,174},{146,0,176},{
+135,0,1023},{7,10,1417},{12,10,382},{17,10,48},{152,10,12},{134,11,575},{132,0,
+764},{6,10,545},{7,10,565},{7,10,1669},{10,10,114},{11,10,642},{140,10,618},{6,0
+,137},{9,0,75},{9,0,253},{10,0,194},{138,0,444},{4,0,756},{133,10,5},{8,0,1008},
+{135,10,192},{132,0,842},{11,0,643},{12,0,115},{136,10,763},{139,0,67},{133,10,
+759},{4,0,821},{5,0,760},{7,0,542},{8,0,135},{8,0,496},{135,11,580},{7,10,370},{
+7,10,1007},{7,10,1177},{135,10,1565},{135,10,1237},{140,0,736},{7,0,319},{7,0,
+355},{7,0,763},{10,0,389},{145,0,43},{8,11,333},{138,11,182},{4,10,87},{5,10,250
+},{141,10,298},{138,0,786},{134,0,2044},{8,11,330},{140,11,477},{135,11,1338},{
+132,11,125},{134,0,1030},{134,0,1083},{132,11,721},{135,10,814},{7,11,776},{8,11
+,145},{147,11,56},{134,0,1226},{4,10,57},{7,10,1195},{7,10,1438},{7,10,1548},{7,
+10,1835},{7,10,1904},{9,10,757},{10,10,604},{139,10,519},{7,11,792},{8,11,147},{
+10,11,821},{139,11,1021},{137,11,797},{4,0,58},{5,0,286},{6,0,319},{7,0,402},{7,
+0,1254},{7,0,1903},{8,0,356},{140,0,408},{4,0,389},{4,0,815},{9,0,181},{9,0,255}
+,{10,0,8},{10,0,29},{10,0,816},{11,0,311},{11,0,561},{12,0,67},{141,0,181},{7,11
+,1472},{135,11,1554},{7,11,1071},{7,11,1541},{7,11,1767},{7,11,1806},{7,11,1999}
+,{9,11,248},{10,11,400},{11,11,162},{11,11,178},{11,11,242},{12,11,605},{15,11,
+26},{144,11,44},{5,11,168},{5,11,930},{8,11,74},{9,11,623},{12,11,500},{12,11,
+579},{13,11,41},{143,11,93},{6,11,220},{7,11,1101},{141,11,105},{5,0,474},{7,0,
+507},{4,10,209},{7,11,507},{135,10,902},{132,0,427},{6,0,413},{7,10,335},{7,10,
+1437},{7,10,1668},{8,10,553},{8,10,652},{8,10,656},{9,10,558},{11,10,743},{149,
+10,18},{132,0,730},{6,11,19},{7,11,1413},{139,11,428},{133,0,373},{132,10,559},{
+7,11,96},{8,11,401},{137,11,896},{7,0,799},{7,0,1972},{5,10,1017},{138,10,511},{
+135,0,1793},{7,11,1961},{7,11,1965},{8,11,702},{136,11,750},{8,11,150},{8,11,737
+},{140,11,366},{132,0,322},{133,10,709},{8,11,800},{9,11,148},{9,11,872},{9,11,
+890},{11,11,309},{11,11,1001},{13,11,267},{141,11,323},{134,10,1745},{7,0,290},{
+136,10,206},{7,0,1651},{145,0,89},{139,0,2},{132,0,672},{6,0,1860},{8,0,905},{10
+,0,844},{10,0,846},{10,0,858},{12,0,699},{12,0,746},{140,0,772},{135,11,424},{
+133,11,547},{133,0,737},{5,11,490},{6,11,615},{6,11,620},{135,11,683},{6,0,746},
+{134,0,1612},{132,10,776},{9,11,385},{149,11,17},{133,0,145},{135,10,1272},{7,0,
+884},{140,0,124},{4,0,387},{135,0,1288},{5,11,133},{136,10,406},{136,11,187},{6,
+0,679},{8,11,8},{138,11,0},{135,0,550},{135,11,798},{136,11,685},{7,11,1086},{
+145,11,46},{8,10,175},{10,10,168},{138,10,573},{135,0,1305},{4,0,576},{135,0,
+1263},{6,0,686},{134,0,1563},{134,0,607},{5,0,919},{134,0,1673},{148,0,37},{8,11
+,774},{10,11,670},{140,11,51},{133,10,784},{139,10,882},{4,0,82},{5,0,333},{5,0,
+904},{6,0,207},{7,0,325},{7,0,1726},{8,0,101},{10,0,778},{139,0,220},{135,11,371
+},{132,0,958},{133,0,903},{4,11,127},{5,11,350},{6,11,356},{8,11,426},{9,11,572}
+,{10,11,247},{139,11,312},{140,0,147},{6,11,59},{7,11,885},{9,11,603},{141,11,
+397},{10,0,367},{9,10,14},{9,10,441},{139,10,9},{11,10,966},{12,10,287},{13,10,
+342},{13,10,402},{15,10,110},{143,10,163},{134,0,690},{132,0,705},{9,0,651},{11,
+0,971},{13,0,273},{7,10,1428},{7,10,1640},{7,10,1867},{9,10,169},{9,10,182},{9,
+10,367},{9,10,478},{9,10,506},{9,10,551},{9,10,557},{9,10,648},{9,10,697},{9,10,
+705},{9,10,725},{9,10,787},{9,10,794},{10,10,198},{10,10,214},{10,10,267},{10,10
+,275},{10,10,456},{10,10,551},{10,10,561},{10,10,613},{10,10,627},{10,10,668},{
+10,10,675},{10,10,691},{10,10,695},{10,10,707},{10,10,715},{11,10,183},{11,10,
+201},{11,10,262},{11,10,352},{11,10,439},{11,10,493},{11,10,572},{11,10,591},{11
+,10,608},{11,10,611},{11,10,646},{11,10,674},{11,10,711},{11,10,751},{11,10,761}
+,{11,10,776},{11,10,785},{11,10,850},{11,10,853},{11,10,862},{11,10,865},{11,10,
+868},{11,10,875},{11,10,898},{11,10,902},{11,10,903},{11,10,910},{11,10,932},{11
+,10,942},{11,10,957},{11,10,967},{11,10,972},{12,10,148},{12,10,195},{12,10,220}
+,{12,10,237},{12,10,318},{12,10,339},{12,10,393},{12,10,445},{12,10,450},{12,10,
+474},{12,10,505},{12,10,509},{12,10,533},{12,10,591},{12,10,594},{12,10,597},{12
+,10,621},{12,10,633},{12,10,642},{13,10,59},{13,10,60},{13,10,145},{13,10,239},{
+13,10,250},{13,10,329},{13,10,344},{13,10,365},{13,10,372},{13,10,387},{13,10,
+403},{13,10,414},{13,10,456},{13,10,470},{13,10,478},{13,10,483},{13,10,489},{14
+,10,55},{14,10,57},{14,10,81},{14,10,90},{14,10,148},{14,10,239},{14,10,266},{14
+,10,321},{14,10,326},{14,10,327},{14,10,330},{14,10,347},{14,10,355},{14,10,401}
+,{14,10,404},{14,10,411},{14,10,414},{14,10,416},{14,10,420},{15,10,61},{15,10,
+74},{15,10,87},{15,10,88},{15,10,94},{15,10,96},{15,10,116},{15,10,149},{15,10,
+154},{16,10,50},{16,10,63},{16,10,73},{17,10,2},{17,10,66},{17,10,92},{17,10,103
+},{17,10,112},{17,10,120},{18,10,50},{18,10,54},{18,10,82},{18,10,86},{18,10,90}
+,{18,10,111},{18,10,115},{18,10,156},{19,10,40},{19,10,79},{20,10,78},{149,10,22
+},{7,0,887},{5,10,161},{135,10,839},{142,11,98},{134,0,90},{138,11,356},{135,11,
+441},{6,11,111},{7,11,4},{8,11,163},{8,11,776},{138,11,566},{134,0,908},{134,0,
+1261},{7,0,813},{12,0,497},{141,0,56},{134,0,1235},{135,0,429},{135,11,1994},{
+138,0,904},{6,0,125},{7,0,1277},{137,0,772},{151,0,12},{4,0,841},{5,0,386},{133,
+11,386},{5,11,297},{135,11,1038},{6,0,860},{6,0,1069},{135,11,309},{136,0,946},{
+135,10,1814},{141,11,418},{136,11,363},{10,0,768},{139,0,787},{22,11,30},{150,11
+,33},{6,0,160},{7,0,1106},{9,0,770},{11,0,112},{140,0,413},{11,11,216},{139,11,
+340},{136,10,139},{135,11,1390},{135,11,808},{132,11,280},{12,0,271},{17,0,109},
+{7,10,643},{136,10,236},{140,11,54},{4,11,421},{133,11,548},{11,0,719},{12,0,36}
+,{141,0,337},{7,0,581},{9,0,644},{137,0,699},{11,11,511},{13,11,394},{14,11,298}
+,{14,11,318},{146,11,103},{7,0,304},{9,0,646},{9,0,862},{11,0,696},{12,0,208},{
+15,0,79},{147,0,108},{4,0,631},{7,0,1126},{135,0,1536},{135,11,1527},{8,0,880},{
+10,0,869},{138,0,913},{7,0,1513},{5,10,54},{6,11,254},{9,11,109},{138,11,103},{
+135,0,981},{133,11,729},{132,10,744},{132,0,434},{134,0,550},{7,0,930},{10,0,476
+},{13,0,452},{19,0,104},{6,11,1630},{10,10,402},{146,10,55},{5,0,553},{138,0,824
+},{136,0,452},{8,0,151},{137,10,624},{132,10,572},{132,0,772},{133,11,671},{133,
+0,292},{138,0,135},{132,11,889},{140,11,207},{9,0,504},{6,10,43},{7,10,38},{8,10
+,248},{138,10,513},{6,0,1089},{135,11,1910},{4,11,627},{133,11,775},{135,0,783},
+{133,10,766},{133,10,363},{7,0,387},{135,11,387},{7,0,393},{10,0,603},{11,0,206}
+,{7,11,202},{11,11,362},{11,11,948},{140,11,388},{6,11,507},{7,11,451},{8,11,389
+},{12,11,490},{13,11,16},{13,11,215},{13,11,351},{18,11,132},{147,11,125},{4,0,
+912},{9,0,232},{135,11,841},{6,10,258},{140,10,409},{5,10,249},{148,10,82},{136,
+11,566},{6,0,977},{135,11,1214},{7,0,1973},{136,0,716},{135,0,98},{133,0,733},{5
+,11,912},{134,11,1695},{5,10,393},{6,10,378},{7,10,1981},{9,10,32},{9,10,591},{
+10,10,685},{10,10,741},{142,10,382},{133,10,788},{10,0,19},{11,0,911},{7,10,1968
+},{141,10,509},{5,0,668},{5,11,236},{6,11,572},{8,11,492},{11,11,618},{144,11,56
+},{135,11,1789},{4,0,360},{5,0,635},{5,0,700},{5,10,58},{5,10,171},{5,10,683},{6
+,10,291},{6,10,566},{7,10,1650},{11,10,523},{12,10,273},{12,10,303},{15,10,39},{
+143,10,111},{133,0,901},{134,10,589},{5,11,190},{136,11,318},{140,0,656},{7,0,
+726},{152,0,9},{4,10,917},{133,10,1005},{135,10,1598},{134,11,491},{4,10,919},{
+133,11,434},{137,0,72},{6,0,1269},{6,0,1566},{134,0,1621},{9,0,463},{10,0,595},{
+4,10,255},{5,10,302},{6,10,132},{7,10,128},{7,10,283},{7,10,1299},{10,10,52},{10
+,10,514},{11,10,925},{13,10,92},{142,10,309},{135,0,1454},{134,0,1287},{11,0,600
+},{13,0,245},{137,10,173},{136,0,989},{7,0,164},{7,0,1571},{9,0,107},{140,0,225}
+,{6,0,1061},{141,10,442},{4,0,27},{5,0,484},{5,0,510},{6,0,434},{7,0,1000},{7,0,
+1098},{136,0,2},{7,11,85},{7,11,247},{8,11,585},{10,11,163},{138,11,316},{11,11,
+103},{142,11,0},{134,0,1127},{4,0,460},{134,0,852},{134,10,210},{4,0,932},{133,0
+,891},{6,0,588},{147,11,83},{8,0,625},{4,10,284},{134,10,223},{134,0,76},{8,0,92
+},{137,0,221},{4,11,124},{10,11,457},{11,11,121},{11,11,169},{11,11,422},{11,11,
+870},{12,11,214},{13,11,389},{14,11,187},{143,11,77},{9,11,618},{138,11,482},{4,
+10,218},{7,10,526},{143,10,137},{13,0,9},{14,0,104},{14,0,311},{4,10,270},{5,10,
+192},{6,10,332},{135,10,1322},{140,10,661},{135,11,1193},{6,11,107},{7,11,638},{
+7,11,1632},{137,11,396},{132,0,763},{4,0,622},{5,11,370},{134,11,1756},{133,0,
+253},{135,0,546},{9,0,73},{10,0,110},{14,0,185},{17,0,119},{133,11,204},{7,0,624
+},{7,0,916},{10,0,256},{139,0,87},{7,10,379},{8,10,481},{137,10,377},{5,0,212},{
+12,0,35},{13,0,382},{5,11,970},{134,11,1706},{9,0,746},{5,10,1003},{134,10,149},
+{10,0,150},{11,0,849},{13,0,330},{8,10,262},{9,10,627},{11,10,214},{11,10,404},{
+11,10,457},{11,10,780},{11,10,913},{13,10,401},{142,10,200},{134,0,1466},{135,11
+,3},{6,0,1299},{4,11,35},{5,11,121},{5,11,483},{5,11,685},{6,11,489},{7,11,1204}
+,{136,11,394},{135,10,742},{4,10,142},{136,10,304},{4,11,921},{133,11,1007},{134
+,0,1518},{6,0,1229},{135,0,1175},{133,0,816},{12,0,159},{4,10,471},{4,11,712},{5
+,10,51},{6,10,602},{7,10,925},{8,10,484},{138,10,195},{134,11,1629},{5,0,869},{5
+,0,968},{6,0,1626},{8,0,734},{136,0,784},{4,0,542},{6,0,1716},{6,0,1727},{7,0,
+1082},{7,0,1545},{8,0,56},{8,0,118},{8,0,412},{8,0,564},{9,0,888},{9,0,908},{10,
+0,50},{10,0,423},{11,0,685},{11,0,697},{11,0,933},{12,0,299},{13,0,126},{13,0,
+136},{13,0,170},{13,0,190},{136,10,688},{132,10,697},{4,0,232},{9,0,202},{10,0,
+474},{140,0,433},{136,0,212},{6,0,108},{7,0,1003},{7,0,1181},{8,0,111},{136,0,
+343},{5,10,221},{135,11,1255},{133,11,485},{134,0,1712},{142,0,216},{5,0,643},{6
+,0,516},{4,11,285},{5,11,317},{6,11,301},{7,11,7},{8,11,153},{10,11,766},{11,11,
+468},{12,11,467},{141,11,143},{4,0,133},{7,0,711},{7,0,1298},{135,0,1585},{134,0
+,650},{135,11,512},{6,0,99},{7,0,1808},{145,0,57},{6,0,246},{6,0,574},{7,0,428},
+{9,0,793},{10,0,669},{11,0,485},{11,0,840},{12,0,300},{14,0,250},{145,0,55},{4,
+10,132},{5,10,69},{135,10,1242},{136,0,1023},{7,0,302},{132,10,111},{135,0,1871}
+,{132,0,728},{9,0,252},{132,10,767},{6,0,461},{7,0,1590},{7,10,1416},{7,10,2005}
+,{8,10,131},{8,10,466},{9,10,672},{13,10,252},{148,10,103},{6,0,323},{135,0,1564
+},{7,0,461},{136,0,775},{6,10,44},{136,10,368},{139,0,172},{132,0,464},{4,10,570
+},{133,10,120},{137,11,269},{6,10,227},{135,10,1589},{6,11,1719},{6,11,1735},{7,
+11,2016},{7,11,2020},{8,11,837},{137,11,852},{7,0,727},{146,0,73},{132,0,1023},{
+135,11,852},{135,10,1529},{136,0,577},{138,11,568},{134,0,1037},{8,11,67},{138,
+11,419},{4,0,413},{5,0,677},{8,0,432},{140,0,280},{10,0,600},{6,10,1667},{7,11,
+967},{7,10,2036},{141,11,11},{6,10,511},{140,10,132},{6,0,799},{5,10,568},{6,10,
+138},{135,10,1293},{8,0,159},{4,10,565},{136,10,827},{7,0,646},{7,0,1730},{11,0,
+446},{141,0,178},{4,10,922},{133,10,1023},{135,11,11},{132,0,395},{11,0,145},{
+135,10,1002},{9,0,174},{10,0,164},{11,0,440},{11,0,514},{11,0,841},{15,0,98},{
+149,0,20},{134,0,426},{10,0,608},{139,0,1002},{7,11,320},{8,11,51},{12,11,481},{
+12,11,570},{148,11,106},{9,0,977},{9,0,983},{132,11,445},{138,0,250},{139,0,100}
+,{6,0,1982},{136,10,402},{133,11,239},{4,10,716},{141,10,31},{5,0,476},{7,11,83}
+,{7,11,1990},{8,11,130},{139,11,720},{8,10,691},{136,10,731},{5,11,123},{6,11,
+530},{7,11,348},{135,11,1419},{5,0,76},{6,0,458},{6,0,497},{7,0,868},{9,0,658},{
+10,0,594},{11,0,173},{11,0,566},{12,0,20},{12,0,338},{141,0,200},{9,11,139},{10,
+11,399},{11,11,469},{12,11,634},{141,11,223},{9,10,840},{138,10,803},{133,10,847
+},{11,11,223},{140,11,168},{132,11,210},{8,0,447},{9,10,53},{9,10,268},{9,10,901
+},{10,10,518},{10,10,829},{11,10,188},{13,10,74},{14,10,46},{15,10,17},{15,10,33
+},{17,10,40},{18,10,36},{19,10,20},{22,10,1},{152,10,2},{4,0,526},{7,0,1029},{
+135,0,1054},{19,11,59},{150,11,2},{4,0,636},{6,0,1875},{6,0,1920},{9,0,999},{12,
+0,807},{12,0,825},{15,0,179},{15,0,190},{18,0,182},{136,10,532},{6,0,1699},{7,0,
+660},{7,0,1124},{17,0,31},{19,0,22},{151,0,14},{135,10,681},{132,11,430},{140,10
+,677},{4,10,684},{136,10,384},{132,11,756},{133,11,213},{7,0,188},{7,10,110},{8,
+10,290},{8,10,591},{9,10,382},{9,10,649},{11,10,71},{11,10,155},{11,10,313},{12,
+10,5},{13,10,325},{142,10,287},{7,10,360},{7,10,425},{9,10,66},{9,10,278},{138,
+10,644},{142,11,164},{4,0,279},{7,0,301},{137,0,362},{134,11,586},{135,0,1743},{
+4,0,178},{133,0,399},{4,10,900},{133,10,861},{5,10,254},{7,10,985},{136,10,73},{
+133,11,108},{7,10,1959},{136,10,683},{133,11,219},{4,11,193},{5,11,916},{7,11,
+364},{10,11,398},{10,11,726},{11,11,317},{11,11,626},{12,11,142},{12,11,288},{12
+,11,678},{13,11,313},{15,11,113},{18,11,114},{21,11,30},{150,11,53},{6,11,241},{
+7,11,907},{8,11,832},{9,11,342},{10,11,729},{11,11,284},{11,11,445},{11,11,651},
+{11,11,863},{13,11,398},{146,11,99},{132,0,872},{134,0,831},{134,0,1692},{6,0,
+202},{6,0,1006},{9,0,832},{10,0,636},{11,0,208},{12,0,360},{17,0,118},{18,0,27},
+{20,0,67},{137,11,734},{132,10,725},{7,11,993},{138,11,666},{134,0,1954},{134,10
+,196},{7,0,872},{10,0,516},{139,0,167},{133,10,831},{4,11,562},{9,11,254},{139,
+11,879},{137,0,313},{4,0,224},{132,11,786},{11,0,24},{12,0,170},{136,10,723},{5,
+0,546},{7,0,35},{8,0,11},{8,0,12},{9,0,315},{9,0,533},{10,0,802},{11,0,166},{12,
+0,525},{142,0,243},{7,0,1937},{13,10,80},{13,10,437},{145,10,74},{5,0,241},{8,0,
+242},{9,0,451},{10,0,667},{11,0,598},{140,0,429},{150,0,46},{6,0,1273},{137,0,
+830},{5,10,848},{6,10,66},{136,10,764},{6,0,825},{134,0,993},{4,0,1006},{10,0,
+327},{13,0,271},{4,10,36},{7,10,1387},{139,10,755},{134,0,1023},{135,0,1580},{4,
+0,366},{137,0,516},{132,10,887},{6,0,1736},{135,0,1891},{6,11,216},{7,11,901},{7
+,11,1343},{136,11,493},{6,10,165},{138,10,388},{7,11,341},{139,11,219},{4,10,719
+},{135,10,155},{134,0,1935},{132,0,826},{6,0,331},{6,0,1605},{8,0,623},{11,0,139
+},{139,0,171},{135,11,1734},{10,11,115},{11,11,420},{12,11,154},{13,11,404},{14,
+11,346},{15,11,54},{143,11,112},{7,0,288},{4,10,353},{6,10,146},{6,10,1789},{7,
+10,990},{7,10,1348},{9,10,665},{9,10,898},{11,10,893},{142,10,212},{6,0,916},{
+134,0,1592},{7,0,1888},{4,10,45},{135,10,1257},{5,11,1011},{136,11,701},{139,11,
+596},{4,11,54},{5,11,666},{7,11,1039},{7,11,1130},{9,11,195},{138,11,302},{134,0
+,1471},{134,0,1570},{132,0,394},{140,10,65},{136,10,816},{135,0,1931},{7,0,574},
+{135,0,1719},{134,11,467},{132,0,658},{9,0,781},{10,0,144},{11,0,385},{13,0,161}
+,{13,0,228},{13,0,268},{20,0,107},{134,11,1669},{136,0,374},{135,0,735},{4,0,344
+},{6,0,498},{139,0,323},{7,0,586},{7,0,1063},{6,10,559},{134,10,1691},{137,0,155
+},{133,0,906},{7,11,122},{9,11,259},{10,11,84},{11,11,470},{12,11,541},{141,11,
+379},{134,0,1139},{10,0,108},{139,0,116},{134,10,456},{133,10,925},{5,11,82},{5,
+11,131},{7,11,1755},{8,11,31},{9,11,168},{9,11,764},{139,11,869},{134,11,605},{5
+,11,278},{137,11,68},{4,11,163},{5,11,201},{5,11,307},{5,11,310},{6,11,335},{7,
+11,284},{136,11,165},{135,11,1660},{6,11,33},{135,11,1244},{4,0,616},{136,11,483
+},{8,0,857},{8,0,902},{8,0,910},{10,0,879},{12,0,726},{4,11,199},{139,11,34},{
+136,0,692},{6,10,193},{7,10,240},{7,10,1682},{10,10,51},{10,10,640},{11,10,410},
+{13,10,82},{14,10,247},{14,10,331},{142,10,377},{6,0,823},{134,0,983},{139,10,
+411},{132,0,305},{136,10,633},{138,11,203},{134,0,681},{6,11,326},{7,11,677},{
+137,11,425},{5,0,214},{7,0,603},{8,0,611},{9,0,686},{10,0,88},{11,0,459},{11,0,
+496},{12,0,463},{12,0,590},{141,0,0},{136,0,1004},{142,0,23},{134,0,1703},{147,
+11,8},{145,11,56},{135,0,1443},{4,10,237},{135,10,514},{6,0,714},{145,0,19},{5,
+11,358},{7,11,473},{7,11,1184},{10,11,662},{13,11,212},{13,11,304},{13,11,333},{
+145,11,98},{4,0,737},{10,0,98},{11,0,294},{12,0,60},{12,0,437},{13,0,64},{13,0,
+380},{142,0,430},{6,10,392},{7,10,65},{135,10,2019},{6,0,1758},{8,0,520},{9,0,
+345},{9,0,403},{142,0,350},{5,0,47},{10,0,242},{138,0,579},{5,0,139},{7,0,1168},
+{138,0,539},{134,0,1459},{13,0,388},{141,11,388},{134,0,253},{7,10,1260},{135,10
+,1790},{10,0,252},{9,10,222},{139,10,900},{140,0,745},{133,11,946},{4,0,107},{7,
+0,613},{8,0,439},{8,0,504},{9,0,501},{10,0,383},{139,0,477},{135,11,1485},{132,0
+,871},{7,11,411},{7,11,590},{8,11,631},{9,11,323},{10,11,355},{11,11,491},{12,11
+,143},{12,11,402},{13,11,73},{14,11,408},{15,11,107},{146,11,71},{132,0,229},{
+132,0,903},{140,0,71},{133,0,549},{4,0,47},{6,0,373},{7,0,452},{7,0,543},{7,0,
+1828},{7,0,1856},{9,0,6},{11,0,257},{139,0,391},{7,11,1467},{8,11,328},{10,11,
+544},{11,11,955},{13,11,320},{145,11,83},{5,0,980},{134,0,1754},{136,0,865},{5,0
+,705},{137,0,606},{7,0,161},{8,10,201},{136,10,605},{143,11,35},{5,11,835},{6,11
+,483},{140,10,224},{7,0,536},{7,0,1331},{136,0,143},{134,0,1388},{5,0,724},{10,0
+,305},{11,0,151},{12,0,33},{12,0,121},{12,0,381},{17,0,3},{17,0,27},{17,0,78},{
+18,0,18},{19,0,54},{149,0,5},{4,10,523},{133,10,638},{5,0,19},{134,0,533},{5,0,
+395},{5,0,951},{134,0,1776},{135,0,1908},{132,0,846},{10,0,74},{11,0,663},{12,0,
+210},{13,0,166},{13,0,310},{14,0,373},{18,0,95},{19,0,43},{6,10,242},{7,10,227},
+{7,10,1581},{8,10,104},{9,10,113},{9,10,220},{9,10,427},{10,10,239},{11,10,579},
+{11,10,1023},{13,10,4},{13,10,204},{13,10,316},{148,10,86},{9,11,716},{11,11,108
+},{13,11,123},{14,11,252},{19,11,38},{21,11,3},{151,11,11},{8,0,372},{9,0,122},{
+138,0,175},{132,11,677},{7,11,1374},{136,11,540},{135,10,861},{132,0,695},{7,0,
+497},{9,0,387},{147,0,81},{136,0,937},{134,0,718},{7,0,1328},{136,10,494},{132,
+11,331},{6,0,1581},{133,11,747},{5,0,284},{6,0,49},{6,0,350},{7,0,1},{7,0,377},{
+7,0,1693},{8,0,18},{8,0,678},{9,0,161},{9,0,585},{9,0,671},{9,0,839},{11,0,912},
+{141,0,427},{7,10,1306},{8,10,505},{9,10,482},{10,10,126},{11,10,225},{12,10,347
+},{12,10,449},{13,10,19},{14,10,218},{142,10,435},{10,10,764},{12,10,120},{13,10
+,39},{145,10,127},{4,0,597},{133,10,268},{134,0,1094},{4,0,1008},{134,0,1973},{
+132,0,811},{139,0,908},{135,0,1471},{133,11,326},{4,10,384},{135,10,1022},{7,0,
+1935},{8,0,324},{12,0,42},{4,11,691},{7,11,1935},{8,11,324},{9,11,35},{10,11,680
+},{11,11,364},{12,11,42},{13,11,357},{146,11,16},{135,0,2014},{7,0,2007},{9,0,
+101},{9,0,450},{10,0,66},{10,0,842},{11,0,536},{12,0,587},{6,11,32},{7,11,385},{
+7,11,757},{7,11,1916},{8,11,37},{8,11,94},{8,11,711},{9,11,541},{10,11,162},{10,
+11,795},{11,11,989},{11,11,1010},{12,11,14},{142,11,308},{139,0,586},{135,10,
+1703},{7,0,1077},{11,0,28},{9,10,159},{140,10,603},{6,0,1221},{136,10,583},{6,11
+,152},{6,11,349},{6,11,1682},{7,11,1252},{8,11,112},{9,11,435},{9,11,668},{10,11
+,290},{10,11,319},{10,11,815},{11,11,180},{11,11,837},{12,11,240},{13,11,152},{
+13,11,219},{142,11,158},{139,0,62},{132,10,515},{8,10,632},{8,10,697},{137,10,
+854},{134,0,1766},{132,11,581},{6,11,126},{7,11,573},{8,11,397},{142,11,44},{150
+,0,28},{11,0,670},{22,0,25},{4,10,136},{133,10,551},{6,0,1665},{7,0,256},{7,0,
+1388},{138,0,499},{4,0,22},{5,0,10},{7,0,1576},{136,0,97},{134,10,1782},{5,0,481
+},{7,10,1287},{9,10,44},{10,10,552},{10,10,642},{11,10,839},{12,10,274},{12,10,
+275},{12,10,372},{13,10,91},{142,10,125},{133,11,926},{7,11,1232},{137,11,531},{
+6,0,134},{7,0,437},{7,0,1824},{9,0,37},{14,0,285},{142,0,371},{7,0,486},{8,0,155
+},{11,0,93},{140,0,164},{6,0,1391},{134,0,1442},{133,11,670},{133,0,591},{6,10,
+147},{7,10,886},{7,11,1957},{9,10,753},{138,10,268},{5,0,380},{5,0,650},{7,0,
+1173},{136,0,310},{4,0,364},{7,0,1156},{7,0,1187},{137,0,409},{135,11,1621},{134
+,0,482},{133,11,506},{4,0,781},{6,0,487},{7,0,926},{8,0,263},{139,0,500},{138,10
+,137},{135,11,242},{139,11,96},{133,10,414},{135,10,1762},{134,0,804},{5,11,834}
+,{7,11,1202},{8,11,14},{9,11,481},{137,11,880},{134,10,599},{4,0,94},{135,0,1265
+},{4,0,415},{132,0,417},{5,0,348},{6,0,522},{6,10,1749},{7,11,1526},{138,11,465}
+,{134,10,1627},{132,0,1012},{132,10,488},{4,11,357},{6,11,172},{7,11,143},{137,
+11,413},{4,10,83},{4,11,590},{146,11,76},{140,10,676},{7,11,287},{8,11,355},{9,
+11,293},{137,11,743},{134,10,278},{6,0,1803},{18,0,165},{24,0,21},{5,11,169},{7,
+11,333},{136,11,45},{12,10,97},{140,11,97},{4,0,408},{4,0,741},{135,0,500},{132,
+11,198},{7,10,388},{7,10,644},{139,10,781},{4,11,24},{5,11,140},{5,11,185},{7,11
+,1500},{11,11,565},{139,11,838},{6,0,1321},{9,0,257},{7,10,229},{8,10,59},{9,10,
+190},{10,10,378},{140,10,191},{4,11,334},{133,11,593},{135,11,1885},{134,0,1138}
+,{4,0,249},{6,0,73},{135,0,177},{133,0,576},{142,0,231},{137,0,288},{132,10,660}
+,{7,10,1035},{138,10,737},{135,0,1487},{6,0,989},{9,0,433},{7,10,690},{9,10,587}
+,{140,10,521},{7,0,1264},{7,0,1678},{11,0,945},{12,0,341},{12,0,471},{140,0,569}
+,{132,11,709},{133,11,897},{5,11,224},{13,11,174},{146,11,52},{135,11,1840},{134
+,10,1744},{12,0,87},{16,0,74},{4,10,733},{9,10,194},{10,10,92},{11,10,198},{12,
+10,84},{141,10,128},{140,0,779},{135,0,538},{4,11,608},{133,11,497},{133,0,413},
+{7,11,1375},{7,11,1466},{138,11,331},{136,0,495},{6,11,540},{136,11,136},{7,0,54
+},{8,0,312},{10,0,191},{10,0,614},{140,0,567},{6,0,468},{7,0,567},{7,0,1478},{8,
+0,530},{14,0,290},{133,11,999},{4,11,299},{7,10,306},{135,11,1004},{142,11,296},
+{134,0,1484},{133,10,979},{6,0,609},{9,0,815},{12,11,137},{14,11,9},{14,11,24},{
+142,11,64},{133,11,456},{6,0,484},{135,0,822},{133,10,178},{136,11,180},{132,11,
+755},{137,0,900},{135,0,1335},{6,0,1724},{135,0,2022},{135,11,1139},{5,0,640},{
+132,10,390},{6,0,1831},{138,11,633},{135,11,566},{4,11,890},{5,11,805},{5,11,819
+},{5,11,961},{6,11,396},{6,11,1631},{6,11,1678},{7,11,1967},{7,11,2041},{9,11,
+630},{11,11,8},{11,11,1019},{12,11,176},{13,11,225},{14,11,292},{149,11,24},{132
+,0,474},{134,0,1103},{135,0,1504},{134,0,1576},{6,0,961},{6,0,1034},{140,0,655},
+{11,11,514},{149,11,20},{5,0,305},{135,11,1815},{7,11,1505},{10,11,190},{10,11,
+634},{11,11,792},{12,11,358},{140,11,447},{5,11,0},{6,11,536},{7,11,604},{13,11,
+445},{145,11,126},{7,0,1236},{133,10,105},{4,0,480},{6,0,217},{6,0,302},{6,0,
+1642},{7,0,130},{7,0,837},{7,0,1321},{7,0,1547},{7,0,1657},{8,0,429},{9,0,228},{
+13,0,289},{13,0,343},{19,0,101},{6,11,232},{6,11,412},{7,11,1074},{8,11,9},{8,11
+,157},{8,11,786},{9,11,196},{9,11,352},{9,11,457},{10,11,337},{11,11,232},{11,11
+,877},{12,11,480},{140,11,546},{5,10,438},{7,11,958},{9,10,694},{12,10,627},{13,
+11,38},{141,10,210},{4,11,382},{136,11,579},{7,0,278},{10,0,739},{11,0,708},{141
+,0,348},{4,11,212},{135,11,1206},{135,11,1898},{6,0,708},{6,0,1344},{152,10,11},
+{137,11,768},{134,0,1840},{140,0,233},{8,10,25},{138,10,826},{6,0,2017},{133,11,
+655},{6,0,1488},{139,11,290},{132,10,308},{134,0,1590},{134,0,1800},{134,0,1259}
+,{16,0,28},{6,11,231},{7,11,95},{136,11,423},{133,11,300},{135,10,150},{136,10,
+649},{7,11,1874},{137,11,641},{6,11,237},{7,11,611},{8,11,100},{9,11,416},{11,11
+,335},{12,11,173},{146,11,101},{137,0,45},{134,10,521},{17,0,36},{14,11,26},{146
+,11,150},{7,0,1442},{14,0,22},{5,10,339},{15,10,41},{15,10,166},{147,10,66},{8,0
+,378},{6,11,581},{135,11,1119},{134,0,1507},{147,11,117},{139,0,39},{134,0,1054}
+,{6,0,363},{7,0,1955},{136,0,725},{134,0,2036},{133,11,199},{6,0,1871},{9,0,935}
+,{9,0,961},{9,0,1004},{9,0,1016},{12,0,805},{12,0,852},{12,0,853},{12,0,869},{12
+,0,882},{12,0,896},{12,0,906},{12,0,917},{12,0,940},{15,0,170},{15,0,176},{15,0,
+188},{15,0,201},{15,0,205},{15,0,212},{15,0,234},{15,0,244},{18,0,181},{18,0,193
+},{18,0,196},{18,0,201},{18,0,202},{18,0,210},{18,0,217},{18,0,235},{18,0,236},{
+18,0,237},{21,0,54},{21,0,55},{21,0,58},{21,0,59},{152,0,22},{134,10,1628},{137,
+0,805},{5,0,813},{135,0,2046},{142,11,42},{5,0,712},{6,0,1240},{11,0,17},{13,0,
+321},{144,0,67},{132,0,617},{135,10,829},{6,0,320},{7,0,781},{7,0,1921},{9,0,55}
+,{10,0,186},{10,0,273},{10,0,664},{10,0,801},{11,0,996},{11,0,997},{13,0,157},{
+142,0,170},{136,0,271},{5,10,486},{135,10,1349},{18,11,91},{147,11,70},{10,0,445
+},{7,10,1635},{8,10,17},{138,10,295},{136,11,404},{7,0,103},{7,0,863},{11,0,184}
+,{145,0,62},{138,10,558},{137,0,659},{6,11,312},{6,11,1715},{10,11,584},{11,11,
+546},{11,11,692},{12,11,259},{12,11,295},{13,11,46},{141,11,154},{134,0,676},{
+132,11,588},{4,11,231},{5,11,61},{6,11,104},{7,11,729},{7,11,964},{7,11,1658},{
+140,11,414},{6,11,263},{138,11,757},{11,0,337},{142,0,303},{135,11,1363},{132,11
+,320},{140,0,506},{134,10,447},{5,0,77},{7,0,1455},{10,0,843},{147,0,73},{7,10,
+577},{7,10,1432},{9,10,475},{9,10,505},{9,10,526},{9,10,609},{9,10,689},{9,10,
+726},{9,10,735},{9,10,738},{10,10,556},{10,10,674},{10,10,684},{11,10,89},{11,10
+,202},{11,10,272},{11,10,380},{11,10,415},{11,10,505},{11,10,537},{11,10,550},{
+11,10,562},{11,10,640},{11,10,667},{11,10,688},{11,10,847},{11,10,927},{11,10,
+930},{11,10,940},{12,10,144},{12,10,325},{12,10,329},{12,10,389},{12,10,403},{12
+,10,451},{12,10,515},{12,10,604},{12,10,616},{12,10,626},{13,10,66},{13,10,131},
+{13,10,167},{13,10,236},{13,10,368},{13,10,411},{13,10,434},{13,10,453},{13,10,
+461},{13,10,474},{14,10,59},{14,10,60},{14,10,139},{14,10,152},{14,10,276},{14,
+10,353},{14,10,402},{15,10,28},{15,10,81},{15,10,123},{15,10,152},{18,10,136},{
+148,10,88},{132,0,458},{135,0,1420},{6,0,109},{10,0,382},{4,11,405},{4,10,609},{
+7,10,756},{7,11,817},{9,10,544},{11,10,413},{14,11,58},{14,10,307},{16,10,25},{
+17,11,37},{146,11,124},{6,0,330},{7,0,1084},{11,0,142},{133,11,974},{4,10,930},{
+133,10,947},{5,10,939},{142,11,394},{16,0,91},{145,0,87},{5,11,235},{5,10,962},{
+7,11,1239},{11,11,131},{140,11,370},{11,0,492},{5,10,651},{8,10,170},{9,10,61},{
+9,10,63},{10,10,23},{10,10,37},{10,10,834},{11,10,4},{11,10,281},{11,10,503},{11
+,10,677},{12,10,96},{12,10,130},{12,10,244},{14,10,5},{14,10,40},{14,10,162},{14
+,10,202},{146,10,133},{4,10,406},{5,10,579},{12,10,492},{150,10,15},{9,11,137},{
+138,11,221},{134,0,1239},{11,0,211},{140,0,145},{7,11,390},{138,11,140},{135,11,
+1418},{135,11,1144},{134,0,1049},{7,0,321},{6,10,17},{7,10,1001},{7,10,1982},{9,
+10,886},{10,10,489},{10,10,800},{11,10,782},{12,10,320},{13,10,467},{14,10,145},
+{14,10,387},{143,10,119},{145,10,17},{5,11,407},{11,11,489},{19,11,37},{20,11,73
+},{150,11,38},{133,10,458},{135,0,1985},{7,10,1983},{8,10,0},{8,10,171},{9,10,
+120},{9,10,732},{10,10,473},{11,10,656},{11,10,998},{18,10,0},{18,10,2},{147,10,
+21},{5,11,325},{7,11,1483},{8,11,5},{8,11,227},{9,11,105},{10,11,585},{140,11,
+614},{136,0,122},{132,0,234},{135,11,1196},{6,0,976},{6,0,1098},{134,0,1441},{7,
+0,253},{136,0,549},{6,11,621},{13,11,504},{144,11,19},{132,10,519},{5,0,430},{5,
+0,932},{6,0,131},{7,0,417},{9,0,522},{11,0,314},{141,0,390},{14,0,149},{14,0,399
+},{143,0,57},{5,10,907},{6,10,31},{6,11,218},{7,10,491},{7,10,530},{8,10,592},{
+11,10,53},{11,10,779},{12,10,167},{12,10,411},{14,10,14},{14,10,136},{15,10,72},
+{16,10,17},{144,10,72},{140,11,330},{7,11,454},{7,11,782},{136,11,768},{132,0,
+507},{10,11,676},{140,11,462},{6,0,630},{9,0,811},{4,10,208},{5,10,106},{6,10,
+531},{8,10,408},{9,10,188},{138,10,572},{4,0,343},{5,0,511},{134,10,1693},{134,
+11,164},{132,0,448},{7,0,455},{138,0,591},{135,0,1381},{12,10,441},{150,11,50},{
+9,10,449},{10,10,192},{138,10,740},{6,0,575},{132,10,241},{134,0,1175},{134,0,
+653},{134,0,1761},{134,0,1198},{132,10,259},{6,11,343},{7,11,195},{9,11,226},{10
+,11,197},{10,11,575},{11,11,502},{139,11,899},{7,0,1127},{7,0,1572},{10,0,297},{
+10,0,422},{11,0,764},{11,0,810},{12,0,264},{13,0,102},{13,0,300},{13,0,484},{14,
+0,147},{14,0,229},{17,0,71},{18,0,118},{147,0,120},{135,11,666},{132,0,678},{4,
+10,173},{5,10,312},{5,10,512},{135,10,1285},{7,10,1603},{7,10,1691},{9,10,464},{
+11,10,195},{12,10,279},{12,10,448},{14,10,11},{147,10,102},{16,0,99},{146,0,164}
+,{7,11,1125},{9,11,143},{11,11,61},{14,11,405},{150,11,21},{137,11,260},{4,10,
+452},{5,10,583},{5,10,817},{6,10,433},{7,10,593},{7,10,720},{7,10,1378},{8,10,
+161},{9,10,284},{10,10,313},{139,10,886},{132,10,547},{136,10,722},{14,0,35},{
+142,0,191},{141,0,45},{138,0,121},{132,0,125},{134,0,1622},{133,11,959},{8,10,
+420},{139,10,193},{132,0,721},{135,10,409},{136,0,145},{7,0,792},{8,0,147},{10,0
+,821},{11,0,970},{11,0,1021},{136,11,173},{134,11,266},{132,0,715},{7,0,1999},{
+138,10,308},{133,0,531},{5,0,168},{5,0,930},{8,0,74},{9,0,623},{12,0,500},{140,0
+,579},{144,0,65},{138,11,246},{6,0,220},{7,0,1101},{13,0,105},{142,11,314},{5,10
+,1002},{136,10,745},{134,0,960},{20,0,0},{148,11,0},{4,0,1005},{4,10,239},{6,10,
+477},{7,10,1607},{11,10,68},{139,10,617},{6,0,19},{7,0,1413},{139,0,428},{149,10
+,13},{7,0,96},{8,0,401},{8,0,703},{9,0,896},{136,11,300},{134,0,1595},{145,0,116
+},{136,0,1021},{7,0,1961},{7,0,1965},{7,0,2030},{8,0,150},{8,0,702},{8,0,737},{8
+,0,750},{140,0,366},{11,11,75},{142,11,267},{132,10,367},{8,0,800},{9,0,148},{9,
+0,872},{9,0,890},{11,0,309},{11,0,1001},{13,0,267},{13,0,323},{5,11,427},{5,11,
+734},{7,11,478},{136,11,52},{7,11,239},{11,11,217},{142,11,165},{132,11,323},{
+140,11,419},{13,0,299},{142,0,75},{6,11,87},{6,11,1734},{7,11,20},{7,11,1056},{8
+,11,732},{9,11,406},{9,11,911},{138,11,694},{134,0,1383},{132,10,694},{133,11,
+613},{137,0,779},{4,0,598},{140,10,687},{6,0,970},{135,0,424},{133,0,547},{7,11,
+32},{7,11,984},{8,11,85},{8,11,709},{9,11,579},{9,11,847},{9,11,856},{10,11,799}
+,{11,11,258},{11,11,1007},{12,11,331},{12,11,615},{13,11,188},{13,11,435},{14,11
+,8},{15,11,165},{16,11,27},{148,11,40},{6,0,1222},{134,0,1385},{132,0,876},{138,
+11,151},{135,10,213},{4,11,167},{135,11,82},{133,0,133},{6,11,24},{7,11,74},{7,
+11,678},{137,11,258},{5,11,62},{6,11,534},{7,11,684},{7,11,1043},{7,11,1072},{8,
+11,280},{8,11,541},{8,11,686},{10,11,519},{11,11,252},{140,11,282},{136,0,187},{
+8,0,8},{10,0,0},{10,0,818},{139,0,988},{132,11,359},{11,0,429},{15,0,51},{135,10
+,1672},{136,0,685},{5,11,211},{7,11,88},{136,11,627},{134,0,472},{136,0,132},{6,
+11,145},{141,11,336},{4,10,751},{11,10,390},{140,10,32},{6,0,938},{6,0,1060},{4,
+11,263},{4,10,409},{133,10,78},{137,0,874},{8,0,774},{10,0,670},{12,0,51},{4,11,
+916},{6,10,473},{7,10,1602},{10,10,698},{12,10,212},{13,10,307},{145,10,105},{
+146,0,92},{143,10,156},{132,0,830},{137,0,701},{4,11,599},{6,11,1634},{7,11,5},{
+7,11,55},{7,11,67},{7,11,97},{7,11,691},{7,11,979},{7,11,1697},{8,11,207},{8,11,
+214},{8,11,231},{8,11,294},{8,11,336},{8,11,428},{8,11,451},{8,11,460},{8,11,471
+},{8,11,622},{8,11,626},{8,11,679},{8,11,759},{8,11,829},{9,11,11},{9,11,246},{9
+,11,484},{9,11,573},{9,11,706},{9,11,762},{9,11,798},{9,11,855},{9,11,870},{9,11
+,912},{10,11,303},{10,11,335},{10,11,424},{10,11,461},{10,11,543},{10,11,759},{
+10,11,814},{11,11,59},{11,11,199},{11,11,235},{11,11,475},{11,11,590},{11,11,929
+},{11,11,963},{12,11,114},{12,11,182},{12,11,226},{12,11,332},{12,11,439},{12,11
+,575},{12,11,598},{13,11,8},{13,11,125},{13,11,194},{13,11,287},{14,11,197},{14,
+11,383},{15,11,53},{17,11,63},{19,11,46},{19,11,98},{19,11,106},{148,11,85},{4,0
+,127},{5,0,350},{6,0,356},{8,0,426},{9,0,572},{10,0,247},{139,0,312},{134,0,1215
+},{6,0,59},{9,0,603},{13,0,397},{7,11,1853},{138,11,437},{134,0,1762},{147,11,
+126},{135,10,883},{13,0,293},{142,0,56},{133,10,617},{139,10,50},{5,11,187},{7,
+10,1518},{139,10,694},{135,0,441},{6,0,111},{7,0,4},{8,0,163},{8,0,776},{138,0,
+566},{132,0,806},{4,11,215},{9,11,38},{10,11,3},{11,11,23},{11,11,127},{139,11,
+796},{14,0,233},{4,10,546},{135,10,2042},{135,0,1994},{134,0,1739},{135,11,1530}
+,{136,0,393},{5,0,297},{7,0,1038},{14,0,359},{19,0,52},{148,0,47},{135,0,309},{4
+,10,313},{133,10,577},{8,10,184},{141,10,433},{135,10,935},{12,10,186},{12,10,
+292},{14,10,100},{146,10,70},{136,0,363},{14,0,175},{11,10,402},{12,10,109},{12,
+10,431},{13,10,179},{13,10,206},{14,10,217},{16,10,3},{148,10,53},{5,10,886},{6,
+10,46},{6,10,1790},{7,10,14},{7,10,732},{7,10,1654},{8,10,95},{8,10,327},{8,10,
+616},{9,10,892},{10,10,598},{10,10,769},{11,10,134},{11,10,747},{12,10,378},{142
+,10,97},{136,0,666},{135,0,1675},{6,0,655},{134,0,1600},{135,0,808},{133,10,1021
+},{4,11,28},{5,11,440},{7,11,248},{11,11,833},{140,11,344},{134,11,1654},{132,0,
+280},{140,0,54},{4,0,421},{133,0,548},{132,10,153},{6,11,339},{135,11,923},{133,
+11,853},{133,10,798},{132,10,587},{6,11,249},{7,11,1234},{139,11,573},{6,10,598}
+,{7,10,42},{8,10,695},{10,10,212},{11,10,158},{14,10,196},{145,10,85},{7,0,249},
+{5,10,957},{133,10,1008},{4,10,129},{135,10,465},{6,0,254},{7,0,842},{7,0,1659},
+{9,0,109},{10,0,103},{7,10,908},{7,10,1201},{9,10,755},{11,10,906},{12,10,527},{
+146,10,7},{5,0,262},{136,10,450},{144,0,1},{10,11,201},{142,11,319},{7,11,49},{7
+,11,392},{8,11,20},{8,11,172},{8,11,690},{9,11,383},{9,11,845},{10,11,48},{11,11
+,293},{11,11,832},{11,11,920},{141,11,221},{5,11,858},{133,11,992},{134,0,805},{
+139,10,1003},{6,0,1630},{134,11,307},{7,11,1512},{135,11,1794},{6,11,268},{137,
+11,62},{135,10,1868},{133,0,671},{4,0,989},{8,0,972},{136,0,998},{132,11,423},{
+132,0,889},{135,0,1382},{135,0,1910},{7,10,965},{7,10,1460},{135,10,1604},{4,0,
+627},{5,0,775},{138,11,106},{134,11,348},{7,0,202},{11,0,362},{11,0,948},{140,0,
+388},{138,11,771},{6,11,613},{136,11,223},{6,0,560},{7,0,451},{8,0,389},{12,0,
+490},{13,0,16},{13,0,215},{13,0,351},{18,0,132},{147,0,125},{135,0,841},{136,0,
+566},{136,0,938},{132,11,670},{5,0,912},{6,0,1695},{140,11,55},{9,11,40},{139,11
+,136},{7,0,1361},{7,10,982},{10,10,32},{143,10,56},{11,11,259},{140,11,270},{5,0
+,236},{6,0,572},{8,0,492},{11,0,618},{144,0,56},{8,11,572},{9,11,310},{9,11,682}
+,{137,11,698},{134,0,1854},{5,0,190},{136,0,318},{133,10,435},{135,0,1376},{4,11
+,296},{6,11,352},{7,11,401},{7,11,1410},{7,11,1594},{7,11,1674},{8,11,63},{8,11,
+660},{137,11,74},{7,0,349},{5,10,85},{6,10,419},{7,10,305},{7,10,361},{7,10,1337
+},{8,10,71},{140,10,519},{4,11,139},{4,11,388},{140,11,188},{6,0,1972},{6,0,2013
+},{8,0,951},{10,0,947},{10,0,974},{10,0,1018},{142,0,476},{140,10,688},{135,10,
+740},{5,10,691},{7,10,345},{9,10,94},{140,10,169},{9,0,344},{5,10,183},{6,10,582
+},{10,10,679},{140,10,435},{135,10,511},{132,0,850},{8,11,441},{10,11,314},{143,
+11,3},{7,10,1993},{136,10,684},{4,11,747},{6,11,290},{6,10,583},{7,11,649},{7,11
+,1479},{135,11,1583},{133,11,232},{133,10,704},{134,0,910},{4,10,179},{5,10,198}
+,{133,10,697},{7,10,347},{7,10,971},{8,10,181},{138,10,711},{136,11,525},{14,0,
+19},{14,0,28},{144,0,29},{7,0,85},{7,0,247},{8,0,585},{138,0,163},{4,0,487},{7,
+11,472},{7,11,1801},{10,11,748},{141,11,458},{4,10,243},{5,10,203},{7,10,19},{7,
+10,71},{7,10,113},{10,10,405},{11,10,357},{142,10,240},{7,10,1450},{139,10,99},{
+132,11,425},{138,0,145},{147,0,83},{6,10,492},{137,11,247},{4,0,1013},{134,0,
+2033},{5,10,134},{6,10,408},{6,10,495},{135,10,1593},{135,0,1922},{134,11,1768},
+{4,0,124},{10,0,457},{11,0,121},{11,0,169},{11,0,870},{11,0,874},{12,0,214},{14,
+0,187},{143,0,77},{5,0,557},{135,0,1457},{139,0,66},{5,11,943},{6,11,1779},{142,
+10,4},{4,10,248},{4,10,665},{7,10,137},{137,10,349},{7,0,1193},{5,11,245},{6,11,
+576},{7,11,582},{136,11,225},{144,0,82},{7,10,1270},{139,10,612},{5,0,454},{10,0
+,352},{138,11,352},{18,0,57},{5,10,371},{135,10,563},{135,0,1333},{6,0,107},{7,0
+,638},{7,0,1632},{9,0,396},{134,11,610},{5,0,370},{134,0,1756},{4,10,374},{7,10,
+547},{7,10,1700},{7,10,1833},{139,10,858},{133,0,204},{6,0,1305},{9,10,311},{141
+,10,42},{5,0,970},{134,0,1706},{6,10,1647},{7,10,1552},{7,10,2010},{9,10,494},{
+137,10,509},{13,11,455},{15,11,99},{15,11,129},{144,11,68},{135,0,3},{4,0,35},{5
+,0,121},{5,0,483},{5,0,685},{6,0,489},{6,0,782},{6,0,1032},{7,0,1204},{136,0,394
+},{4,0,921},{133,0,1007},{8,11,360},{138,11,63},{135,0,1696},{134,0,1519},{132,
+11,443},{135,11,944},{6,10,123},{7,10,214},{9,10,728},{10,10,157},{11,10,346},{
+11,10,662},{143,10,106},{137,0,981},{135,10,1435},{134,0,1072},{132,0,712},{134,
+0,1629},{134,0,728},{4,11,298},{137,11,483},{6,0,1177},{6,0,1271},{5,11,164},{7,
+11,121},{142,11,189},{7,0,1608},{4,10,707},{5,10,588},{6,10,393},{13,10,106},{18
+,10,49},{147,10,41},{23,0,16},{151,11,16},{6,10,211},{7,10,1690},{11,10,486},{
+140,10,369},{133,0,485},{19,11,15},{149,11,27},{4,11,172},{9,11,611},{10,11,436}
+,{12,11,673},{141,11,255},{5,11,844},{10,11,484},{11,11,754},{12,11,457},{14,11,
+171},{14,11,389},{146,11,153},{4,0,285},{5,0,27},{5,0,317},{6,0,301},{7,0,7},{8,
+0,153},{10,0,766},{11,0,468},{12,0,467},{141,0,143},{134,0,1462},{9,11,263},{10,
+11,147},{138,11,492},{133,11,537},{6,0,1945},{6,0,1986},{6,0,1991},{134,0,2038},
+{134,10,219},{137,11,842},{14,0,52},{17,0,50},{5,10,582},{6,10,1646},{7,10,99},{
+7,10,1962},{7,10,1986},{8,10,515},{8,10,773},{9,10,23},{9,10,491},{12,10,620},{
+142,10,93},{138,11,97},{20,0,21},{20,0,44},{133,10,851},{136,0,819},{139,0,917},
+{5,11,230},{5,11,392},{6,11,420},{8,10,762},{8,10,812},{9,11,568},{9,10,910},{
+140,11,612},{135,0,784},{15,0,135},{143,11,135},{10,0,454},{140,0,324},{4,11,0},
+{5,11,41},{7,11,1459},{7,11,1469},{7,11,1618},{7,11,1859},{9,11,549},{139,11,905
+},{4,10,98},{7,10,1365},{9,10,422},{9,10,670},{10,10,775},{11,10,210},{13,10,26}
+,{13,10,457},{141,10,476},{6,0,1719},{6,0,1735},{7,0,2016},{7,0,2020},{8,0,837},
+{137,0,852},{133,11,696},{135,0,852},{132,0,952},{134,10,1730},{132,11,771},{138
+,0,568},{137,0,448},{139,0,146},{8,0,67},{138,0,419},{133,11,921},{137,10,147},{
+134,0,1826},{10,0,657},{14,0,297},{142,0,361},{6,0,666},{6,0,767},{134,0,1542},{
+139,0,729},{6,11,180},{7,11,1137},{8,11,751},{139,11,805},{4,11,183},{7,11,271},
+{11,11,824},{11,11,952},{13,11,278},{13,11,339},{13,11,482},{14,11,424},{148,11,
+99},{4,0,669},{5,11,477},{5,11,596},{6,11,505},{7,11,1221},{11,11,907},{12,11,
+209},{141,11,214},{135,11,1215},{5,0,402},{6,10,30},{11,10,56},{139,10,305},{7,
+11,564},{142,11,168},{139,0,152},{7,0,912},{135,10,1614},{4,10,150},{5,10,303},{
+134,10,327},{7,0,320},{8,0,51},{9,0,868},{10,0,833},{12,0,481},{12,0,570},{148,0
+,106},{132,0,445},{7,11,274},{11,11,263},{11,11,479},{11,11,507},{140,11,277},{
+10,0,555},{11,0,308},{19,0,95},{6,11,1645},{8,10,192},{10,10,78},{141,10,359},{
+135,10,786},{6,11,92},{6,11,188},{7,11,1269},{7,11,1524},{7,11,1876},{10,11,228}
+,{139,11,1020},{4,11,459},{133,11,966},{11,0,386},{6,10,1638},{7,10,79},{7,10,
+496},{9,10,138},{10,10,336},{12,10,412},{12,10,440},{142,10,305},{133,0,239},{7,
+0,83},{7,0,1990},{8,0,130},{139,0,720},{138,11,709},{4,0,143},{5,0,550},{133,0,
+752},{5,0,123},{6,0,530},{7,0,348},{135,0,1419},{135,0,2024},{6,11,18},{7,11,179
+},{7,11,721},{7,11,932},{8,11,548},{8,11,757},{9,11,54},{9,11,65},{9,11,532},{9,
+11,844},{10,11,113},{10,11,117},{10,11,236},{10,11,315},{10,11,430},{10,11,798},
+{11,11,153},{11,11,351},{11,11,375},{12,11,78},{12,11,151},{12,11,392},{14,11,
+248},{143,11,23},{7,10,204},{7,10,415},{8,10,42},{10,10,85},{139,10,564},{134,0,
+958},{133,11,965},{132,0,210},{135,11,1429},{138,11,480},{134,11,182},{139,11,
+345},{10,11,65},{10,11,488},{138,11,497},{4,10,3},{5,10,247},{5,10,644},{7,10,
+744},{7,10,1207},{7,10,1225},{7,10,1909},{146,10,147},{132,0,430},{5,10,285},{9,
+10,67},{13,10,473},{143,10,82},{144,11,16},{7,11,1162},{9,11,588},{10,11,260},{
+151,10,8},{133,0,213},{138,0,7},{135,0,801},{134,11,1786},{135,11,308},{6,0,936}
+,{134,0,1289},{133,0,108},{132,0,885},{133,0,219},{139,0,587},{4,0,193},{5,0,916
+},{6,0,1041},{7,0,364},{10,0,398},{10,0,726},{11,0,317},{11,0,626},{12,0,142},{
+12,0,288},{12,0,678},{13,0,313},{15,0,113},{146,0,114},{135,0,1165},{6,0,241},{9
+,0,342},{10,0,729},{11,0,284},{11,0,445},{11,0,651},{11,0,863},{13,0,398},{146,0
+,99},{7,0,907},{136,0,832},{9,0,303},{4,10,29},{6,10,532},{7,10,1628},{7,10,1648
+},{9,10,350},{10,10,433},{11,10,97},{11,10,557},{11,10,745},{12,10,289},{12,10,
+335},{12,10,348},{12,10,606},{13,10,116},{13,10,233},{13,10,466},{14,10,181},{14
+,10,209},{14,10,232},{14,10,236},{14,10,300},{16,10,41},{148,10,97},{7,11,423},{
+7,10,1692},{136,11,588},{6,0,931},{134,0,1454},{5,10,501},{7,10,1704},{9,10,553}
+,{11,10,520},{12,10,557},{141,10,249},{136,11,287},{4,0,562},{9,0,254},{139,0,
+879},{132,0,786},{14,11,32},{18,11,85},{20,11,2},{152,11,16},{135,0,1294},{7,11,
+723},{135,11,1135},{6,0,216},{7,0,901},{7,0,1343},{8,0,493},{134,11,403},{7,11,
+719},{8,11,809},{136,11,834},{5,11,210},{6,11,213},{7,11,60},{10,11,364},{139,11
+,135},{7,0,341},{11,0,219},{5,11,607},{8,11,326},{136,11,490},{4,11,701},{5,11,
+472},{5,11,639},{7,11,1249},{9,11,758},{139,11,896},{135,11,380},{135,11,1947},{
+139,0,130},{135,0,1734},{10,0,115},{11,0,420},{12,0,154},{13,0,404},{14,0,346},{
+143,0,54},{134,10,129},{4,11,386},{7,11,41},{8,11,405},{9,11,497},{11,11,110},{
+11,11,360},{15,11,37},{144,11,84},{141,11,282},{5,11,46},{7,11,1452},{7,11,1480}
+,{8,11,634},{140,11,472},{4,11,524},{136,11,810},{10,11,238},{141,11,33},{133,0,
+604},{5,0,1011},{136,0,701},{8,0,856},{8,0,858},{8,0,879},{12,0,702},{142,0,447}
+,{4,0,54},{5,0,666},{7,0,1039},{7,0,1130},{9,0,195},{138,0,302},{4,10,25},{5,10,
+60},{6,10,504},{7,10,614},{7,10,1155},{140,10,0},{7,10,1248},{11,10,621},{139,10
+,702},{133,11,997},{137,10,321},{134,0,1669},{134,0,1791},{4,10,379},{135,10,
+1397},{138,11,372},{5,11,782},{5,11,829},{134,11,1738},{135,0,1228},{4,10,118},{
+6,10,274},{6,10,361},{7,10,75},{141,10,441},{132,0,623},{9,11,279},{10,11,407},{
+14,11,84},{150,11,18},{137,10,841},{135,0,798},{140,10,693},{5,10,314},{6,10,221
+},{7,10,419},{10,10,650},{11,10,396},{12,10,156},{13,10,369},{14,10,333},{145,10
+,47},{135,11,1372},{7,0,122},{9,0,259},{10,0,84},{11,0,470},{12,0,541},{141,0,
+379},{134,0,837},{8,0,1013},{4,11,78},{5,11,96},{5,11,182},{7,11,1724},{7,11,
+1825},{10,11,394},{10,11,471},{11,11,532},{14,11,340},{145,11,88},{134,0,577},{
+135,11,1964},{132,10,913},{134,0,460},{8,0,891},{10,0,901},{10,0,919},{10,0,932}
+,{12,0,715},{12,0,728},{12,0,777},{14,0,457},{144,0,103},{5,0,82},{5,0,131},{7,0
+,1755},{8,0,31},{9,0,168},{9,0,764},{139,0,869},{136,10,475},{6,0,605},{5,10,
+1016},{9,11,601},{9,11,619},{10,11,505},{10,11,732},{11,11,355},{140,11,139},{7,
+10,602},{8,10,179},{10,10,781},{140,10,126},{134,0,1246},{6,10,329},{138,10,111}
+,{6,11,215},{7,11,1028},{7,11,1473},{7,11,1721},{9,11,424},{138,11,779},{5,0,278
+},{137,0,68},{6,0,932},{6,0,1084},{144,0,86},{4,0,163},{5,0,201},{5,0,307},{5,0,
+310},{6,0,335},{7,0,284},{7,0,1660},{136,0,165},{136,0,781},{134,0,707},{6,0,33}
+,{135,0,1244},{5,10,821},{6,11,67},{6,10,1687},{7,11,258},{7,11,1630},{9,11,354}
+,{9,11,675},{10,11,830},{14,11,80},{145,11,80},{6,11,141},{7,11,225},{9,11,59},{
+9,11,607},{10,11,312},{11,11,687},{12,11,555},{13,11,373},{13,11,494},{148,11,58
+},{134,0,1113},{9,0,388},{5,10,71},{7,10,1407},{9,10,704},{10,10,261},{10,10,619
+},{11,10,547},{11,10,619},{143,10,157},{7,0,1953},{136,0,720},{138,0,203},{7,10,
+2008},{9,10,337},{138,10,517},{6,0,326},{7,0,677},{137,0,425},{139,11,81},{7,0,
+1316},{7,0,1412},{7,0,1839},{9,0,589},{11,0,241},{11,0,676},{11,0,811},{11,0,891
+},{12,0,140},{12,0,346},{12,0,479},{13,0,140},{13,0,381},{14,0,188},{18,0,30},{
+148,0,108},{5,0,416},{6,10,86},{6,10,603},{7,10,292},{7,10,561},{8,10,257},{8,10
+,382},{9,10,721},{9,10,778},{11,10,581},{140,10,466},{4,10,486},{133,10,491},{
+134,0,1300},{132,10,72},{7,0,847},{6,10,265},{7,11,430},{139,11,46},{5,11,602},{
+6,11,106},{7,11,1786},{7,11,1821},{7,11,2018},{9,11,418},{137,11,763},{5,0,358},
+{7,0,535},{7,0,1184},{10,0,662},{13,0,212},{13,0,304},{13,0,333},{145,0,98},{5,
+11,65},{6,11,416},{7,11,1720},{7,11,1924},{8,11,677},{10,11,109},{11,11,14},{11,
+11,70},{11,11,569},{11,11,735},{15,11,153},{148,11,80},{6,0,1823},{8,0,839},{8,0
+,852},{8,0,903},{10,0,940},{12,0,707},{140,0,775},{135,11,1229},{6,0,1522},{140,
+0,654},{136,11,595},{139,0,163},{141,0,314},{132,0,978},{4,0,601},{6,0,2035},{
+137,10,234},{5,10,815},{6,10,1688},{134,10,1755},{133,0,946},{136,0,434},{6,10,
+197},{136,10,205},{7,0,411},{7,0,590},{8,0,631},{9,0,323},{10,0,355},{11,0,491},
+{12,0,143},{12,0,402},{13,0,73},{14,0,408},{15,0,107},{146,0,71},{7,0,1467},{8,0
+,328},{10,0,544},{11,0,955},{12,0,13},{13,0,320},{145,0,83},{142,0,410},{11,0,
+511},{13,0,394},{14,0,298},{14,0,318},{146,0,103},{6,10,452},{7,10,312},{138,10,
+219},{138,10,589},{4,10,333},{9,10,176},{12,10,353},{141,10,187},{135,11,329},{
+132,11,469},{5,0,835},{134,0,483},{134,11,1743},{5,11,929},{6,11,340},{8,11,376}
+,{136,11,807},{134,10,1685},{132,0,677},{5,11,218},{7,11,1610},{138,11,83},{5,11
+,571},{135,11,1842},{132,11,455},{137,0,70},{135,0,1405},{7,10,135},{8,10,7},{8,
+10,62},{9,10,243},{10,10,658},{10,10,697},{11,10,456},{139,10,756},{9,10,395},{
+138,10,79},{137,0,108},{6,11,161},{7,11,372},{137,11,597},{132,11,349},{132,0,
+777},{132,0,331},{135,10,631},{133,0,747},{6,11,432},{6,11,608},{139,11,322},{
+138,10,835},{5,11,468},{7,11,1809},{10,11,325},{11,11,856},{12,11,345},{143,11,
+104},{133,11,223},{7,10,406},{7,10,459},{8,10,606},{139,10,726},{132,11,566},{
+142,0,68},{4,11,59},{135,11,1394},{6,11,436},{139,11,481},{4,11,48},{5,11,271},{
+135,11,953},{139,11,170},{5,11,610},{136,11,457},{133,11,755},{135,11,1217},{133
+,10,612},{132,11,197},{132,0,505},{4,10,372},{7,10,482},{8,10,158},{9,10,602},{9
+,10,615},{10,10,245},{10,10,678},{10,10,744},{11,10,248},{139,10,806},{133,0,326
+},{5,10,854},{135,10,1991},{4,0,691},{146,0,16},{6,0,628},{9,0,35},{10,0,680},{
+10,0,793},{11,0,364},{13,0,357},{143,0,164},{138,0,654},{6,0,32},{7,0,385},{7,0,
+757},{7,0,1916},{8,0,37},{8,0,94},{8,0,711},{9,0,541},{10,0,162},{10,0,795},{11,
+0,989},{11,0,1010},{12,0,14},{142,0,308},{133,11,217},{6,0,152},{6,0,349},{6,0,
+1682},{7,0,1252},{8,0,112},{9,0,435},{9,0,668},{10,0,290},{10,0,319},{10,0,815},
+{11,0,180},{11,0,837},{12,0,240},{13,0,152},{13,0,219},{142,0,158},{4,0,581},{
+134,0,726},{5,10,195},{135,10,1685},{6,0,126},{7,0,573},{8,0,397},{142,0,44},{
+138,0,89},{7,10,1997},{8,10,730},{139,10,1006},{134,0,1531},{134,0,1167},{5,0,
+926},{12,0,203},{133,10,751},{4,11,165},{7,11,1398},{135,11,1829},{7,0,1232},{
+137,0,531},{135,10,821},{134,0,943},{133,0,670},{4,0,880},{139,0,231},{134,0,
+1617},{135,0,1957},{5,11,9},{7,11,297},{7,11,966},{140,11,306},{6,0,975},{134,0,
+985},{5,10,950},{5,10,994},{134,10,351},{12,11,21},{151,11,7},{5,11,146},{6,11,
+411},{138,11,721},{7,0,242},{135,0,1942},{6,11,177},{135,11,467},{5,0,421},{7,10
+,47},{137,10,684},{5,0,834},{7,0,1202},{8,0,14},{9,0,481},{137,0,880},{138,0,465
+},{6,0,688},{9,0,834},{132,10,350},{132,0,855},{4,0,357},{6,0,172},{7,0,143},{
+137,0,413},{133,11,200},{132,0,590},{7,10,1812},{13,10,259},{13,10,356},{14,10,
+242},{147,10,114},{133,10,967},{11,0,114},{4,10,473},{7,10,623},{8,10,808},{9,10
+,871},{9,10,893},{11,10,431},{12,10,112},{12,10,217},{12,10,243},{12,10,562},{12
+,10,663},{12,10,683},{13,10,141},{13,10,197},{13,10,227},{13,10,406},{13,10,487}
+,{14,10,156},{14,10,203},{14,10,224},{14,10,256},{18,10,58},{150,10,0},{138,10,
+286},{4,10,222},{7,10,286},{136,10,629},{5,0,169},{7,0,333},{136,0,45},{134,11,
+481},{132,0,198},{4,0,24},{5,0,140},{5,0,185},{7,0,1500},{11,0,565},{11,0,838},{
+4,11,84},{7,11,1482},{10,11,76},{138,11,142},{133,0,585},{141,10,306},{133,11,
+1015},{4,11,315},{5,11,507},{135,11,1370},{136,10,146},{6,0,691},{134,0,1503},{4
+,0,334},{133,0,593},{4,10,465},{135,10,1663},{142,11,173},{135,0,913},{12,0,116}
+,{134,11,1722},{134,0,1360},{132,0,802},{8,11,222},{8,11,476},{9,11,238},{11,11,
+516},{11,11,575},{15,11,109},{146,11,100},{6,0,308},{9,0,673},{7,10,138},{7,10,
+517},{139,10,238},{132,0,709},{6,0,1876},{6,0,1895},{9,0,994},{9,0,1006},{12,0,
+829},{12,0,888},{12,0,891},{146,0,185},{148,10,94},{4,0,228},{133,0,897},{7,0,
+1840},{5,10,495},{7,10,834},{9,10,733},{139,10,378},{133,10,559},{6,10,21},{6,10
+,1737},{7,10,1444},{136,10,224},{4,0,608},{133,0,497},{6,11,40},{135,11,1781},{
+134,0,1573},{135,0,2039},{6,0,540},{136,0,136},{4,0,897},{5,0,786},{133,10,519},
+{6,0,1878},{6,0,1884},{9,0,938},{9,0,948},{9,0,955},{9,0,973},{9,0,1012},{12,0,
+895},{12,0,927},{143,0,254},{134,0,1469},{133,0,999},{4,0,299},{135,0,1004},{4,0
+,745},{133,0,578},{136,11,574},{133,0,456},{134,0,1457},{7,0,1679},{132,10,402},
+{7,0,693},{8,0,180},{12,0,163},{8,10,323},{136,10,479},{11,10,580},{142,10,201},
+{5,10,59},{135,10,672},{132,11,354},{146,10,34},{4,0,755},{135,11,1558},{7,0,
+1740},{146,0,48},{4,10,85},{135,10,549},{139,0,338},{133,10,94},{134,0,1091},{
+135,11,469},{12,0,695},{12,0,704},{20,0,113},{5,11,830},{14,11,338},{148,11,81},
+{135,0,1464},{6,10,11},{135,10,187},{135,0,975},{13,0,335},{132,10,522},{134,0,
+1979},{5,11,496},{135,11,203},{4,10,52},{135,10,661},{7,0,1566},{8,0,269},{9,0,
+212},{9,0,718},{14,0,15},{14,0,132},{142,0,227},{4,0,890},{5,0,805},{5,0,819},{5
+,0,961},{6,0,396},{6,0,1631},{6,0,1678},{7,0,1967},{7,0,2041},{9,0,630},{11,0,8}
+,{11,0,1019},{12,0,176},{13,0,225},{14,0,292},{21,0,24},{4,10,383},{133,10,520},
+{134,11,547},{135,11,1748},{5,11,88},{137,11,239},{146,11,128},{7,11,650},{135,
+11,1310},{4,10,281},{5,10,38},{7,10,194},{7,10,668},{7,10,1893},{137,10,397},{
+135,0,1815},{9,10,635},{139,10,559},{7,0,1505},{10,0,190},{10,0,634},{11,0,792},
+{12,0,358},{140,0,447},{5,0,0},{6,0,536},{7,0,604},{13,0,445},{145,0,126},{7,11,
+1076},{9,11,80},{11,11,78},{11,11,421},{11,11,534},{140,11,545},{8,0,966},{10,0,
+1023},{14,11,369},{146,11,72},{135,11,1641},{6,0,232},{6,0,412},{7,0,1074},{8,0,
+9},{8,0,157},{8,0,786},{9,0,196},{9,0,352},{9,0,457},{10,0,337},{11,0,232},{11,0
+,877},{12,0,480},{140,0,546},{135,0,958},{4,0,382},{136,0,579},{4,0,212},{135,0,
+1206},{4,11,497},{5,11,657},{135,11,1584},{132,0,681},{8,0,971},{138,0,965},{5,
+10,448},{136,10,535},{14,0,16},{146,0,44},{11,0,584},{11,0,616},{14,0,275},{11,
+11,584},{11,11,616},{142,11,275},{136,11,13},{7,10,610},{135,10,1501},{7,11,642}
+,{8,11,250},{11,11,123},{11,11,137},{13,11,48},{142,11,95},{133,0,655},{17,0,67}
+,{147,0,74},{134,0,751},{134,0,1967},{6,0,231},{136,0,423},{5,0,300},{138,0,1016
+},{4,10,319},{5,10,699},{138,10,673},{6,0,237},{7,0,611},{8,0,100},{9,0,416},{11
+,0,335},{12,0,173},{18,0,101},{6,10,336},{8,10,552},{9,10,285},{10,10,99},{139,
+10,568},{134,0,1370},{7,10,1406},{9,10,218},{141,10,222},{133,10,256},{135,0,
+1208},{14,11,213},{148,11,38},{6,0,1219},{135,11,1642},{13,0,417},{14,0,129},{
+143,0,15},{10,11,545},{140,11,301},{17,10,39},{148,10,36},{133,0,199},{4,11,904}
+,{133,11,794},{12,0,427},{146,0,38},{134,0,949},{8,0,665},{135,10,634},{132,10,
+618},{135,10,259},{132,10,339},{133,11,761},{141,10,169},{132,10,759},{5,0,688},
+{7,0,539},{135,0,712},{7,11,386},{138,11,713},{134,0,1186},{6,11,7},{6,11,35},{7
+,11,147},{7,11,1069},{7,11,1568},{7,11,1575},{7,11,1917},{8,11,43},{8,11,208},{9
+,11,128},{9,11,866},{10,11,20},{11,11,981},{147,11,33},{7,11,893},{8,10,482},{
+141,11,424},{6,0,312},{6,0,1715},{10,0,584},{11,0,546},{11,0,692},{12,0,259},{12
+,0,295},{13,0,46},{141,0,154},{5,10,336},{6,10,341},{6,10,478},{6,10,1763},{136,
+10,386},{137,0,151},{132,0,588},{152,0,4},{6,11,322},{9,11,552},{11,11,274},{13,
+11,209},{13,11,499},{14,11,85},{15,11,126},{145,11,70},{135,10,73},{4,0,231},{5,
+0,61},{6,0,104},{7,0,729},{7,0,964},{7,0,1658},{140,0,414},{6,0,263},{138,0,757}
+,{135,10,1971},{4,0,612},{133,0,561},{132,0,320},{135,10,1344},{8,11,83},{8,11,
+817},{9,11,28},{9,11,29},{9,11,885},{10,11,387},{11,11,633},{11,11,740},{13,11,
+235},{13,11,254},{15,11,143},{143,11,146},{5,10,396},{134,10,501},{140,11,49},{
+132,0,225},{4,10,929},{5,10,799},{8,10,46},{136,10,740},{4,0,405},{7,0,817},{14,
+0,58},{17,0,37},{146,0,124},{133,0,974},{4,11,412},{133,11,581},{4,10,892},{133,
+10,770},{4,0,996},{134,0,2026},{4,0,527},{5,0,235},{7,0,1239},{11,0,131},{140,0,
+370},{9,0,16},{13,0,386},{135,11,421},{7,0,956},{7,0,1157},{7,0,1506},{7,0,1606}
+,{7,0,1615},{7,0,1619},{7,0,1736},{7,0,1775},{8,0,590},{9,0,324},{9,0,736},{9,0,
+774},{9,0,776},{9,0,784},{10,0,567},{10,0,708},{11,0,518},{11,0,613},{11,0,695},
+{11,0,716},{11,0,739},{11,0,770},{11,0,771},{11,0,848},{11,0,857},{11,0,931},{11
+,0,947},{12,0,326},{12,0,387},{12,0,484},{12,0,528},{12,0,552},{12,0,613},{13,0,
+189},{13,0,256},{13,0,340},{13,0,432},{13,0,436},{13,0,440},{13,0,454},{14,0,174
+},{14,0,220},{14,0,284},{14,0,390},{145,0,121},{135,10,158},{9,0,137},{138,0,221
+},{4,11,110},{10,11,415},{10,11,597},{142,11,206},{141,11,496},{135,11,205},{151
+,10,25},{135,11,778},{7,11,1656},{7,10,2001},{9,11,369},{10,11,338},{10,11,490},
+{11,11,154},{11,11,545},{11,11,775},{13,11,77},{141,11,274},{4,11,444},{10,11,
+146},{140,11,9},{7,0,390},{138,0,140},{135,0,1144},{134,0,464},{7,10,1461},{140,
+10,91},{132,10,602},{4,11,283},{135,11,1194},{5,0,407},{11,0,204},{11,0,243},{11
+,0,489},{12,0,293},{19,0,37},{20,0,73},{150,0,38},{7,0,1218},{136,0,303},{5,0,
+325},{8,0,5},{8,0,227},{9,0,105},{10,0,585},{12,0,614},{4,10,13},{5,10,567},{7,
+10,1498},{9,10,124},{11,10,521},{140,10,405},{135,10,1006},{7,0,800},{10,0,12},{
+134,11,1720},{135,0,1783},{132,10,735},{138,10,812},{4,10,170},{135,10,323},{6,0
+,621},{13,0,504},{144,0,89},{5,10,304},{135,10,1403},{137,11,216},{6,0,920},{6,0
+,1104},{9,11,183},{139,11,286},{4,0,376},{133,10,742},{134,0,218},{8,0,641},{11,
+0,388},{140,0,580},{7,0,454},{7,0,782},{8,0,768},{140,0,686},{137,11,33},{133,10
+,111},{144,0,0},{10,0,676},{140,0,462},{6,0,164},{136,11,735},{133,10,444},{150,
+0,50},{7,11,1862},{12,11,491},{12,11,520},{13,11,383},{14,11,244},{146,11,12},{5
+,11,132},{9,11,486},{9,11,715},{10,11,458},{11,11,373},{11,11,668},{11,11,795},{
+11,11,897},{12,11,272},{12,11,424},{12,11,539},{12,11,558},{14,11,245},{14,11,
+263},{14,11,264},{14,11,393},{142,11,403},{8,10,123},{15,10,6},{144,10,7},{6,0,
+285},{8,0,654},{11,0,749},{12,0,190},{12,0,327},{13,0,120},{13,0,121},{13,0,327}
+,{15,0,47},{146,0,40},{5,11,8},{6,11,89},{6,11,400},{7,11,1569},{7,11,1623},{7,
+11,1850},{8,11,218},{8,11,422},{9,11,570},{138,11,626},{6,11,387},{7,11,882},{
+141,11,111},{6,0,343},{7,0,195},{9,0,226},{10,0,197},{10,0,575},{11,0,502},{11,0
+,899},{6,11,224},{7,11,877},{137,11,647},{5,10,937},{135,10,100},{135,11,790},{
+150,0,29},{147,0,8},{134,0,1812},{149,0,8},{135,11,394},{7,0,1125},{9,0,143},{11
+,0,61},{14,0,405},{150,0,21},{10,11,755},{147,11,29},{9,11,378},{141,11,162},{
+135,10,922},{5,10,619},{133,10,698},{134,0,1327},{6,0,1598},{137,0,575},{9,11,
+569},{12,11,12},{12,11,81},{12,11,319},{13,11,69},{14,11,259},{16,11,87},{17,11,
+1},{17,11,21},{17,11,24},{18,11,15},{18,11,56},{18,11,59},{18,11,127},{18,11,154
+},{19,11,19},{148,11,31},{6,0,895},{135,11,1231},{5,0,959},{7,11,124},{136,11,38
+},{5,11,261},{7,11,78},{7,11,199},{8,11,815},{9,11,126},{138,11,342},{5,10,917},
+{134,10,1659},{7,0,1759},{5,11,595},{135,11,1863},{136,0,173},{134,0,266},{142,0
+,261},{132,11,628},{5,10,251},{5,10,956},{8,10,268},{9,10,214},{146,10,142},{7,
+11,266},{136,11,804},{135,11,208},{6,11,79},{7,11,1021},{135,11,1519},{11,11,704
+},{141,11,396},{5,10,346},{5,10,711},{136,10,390},{136,11,741},{134,11,376},{134
+,0,1427},{6,0,1033},{6,0,1217},{136,0,300},{133,10,624},{6,11,100},{7,11,244},{7
+,11,632},{7,11,1609},{8,11,178},{8,11,638},{141,11,58},{6,0,584},{5,10,783},{7,
+10,1998},{135,10,2047},{5,0,427},{5,0,734},{7,0,478},{136,0,52},{7,0,239},{11,0,
+217},{142,0,165},{134,0,1129},{6,0,168},{6,0,1734},{7,0,20},{7,0,1056},{8,0,732}
+,{9,0,406},{9,0,911},{138,0,694},{132,10,594},{133,11,791},{7,11,686},{8,11,33},
+{8,11,238},{10,11,616},{11,11,467},{11,11,881},{13,11,217},{13,11,253},{142,11,
+268},{137,11,476},{134,0,418},{133,0,613},{132,0,632},{132,11,447},{7,0,32},{7,0
+,984},{8,0,85},{8,0,709},{9,0,579},{9,0,847},{9,0,856},{10,0,799},{11,0,258},{11
+,0,1007},{12,0,331},{12,0,615},{13,0,188},{13,0,435},{14,0,8},{15,0,165},{16,0,
+27},{20,0,40},{144,11,35},{4,11,128},{5,11,415},{6,11,462},{7,11,294},{7,11,578}
+,{10,11,710},{139,11,86},{5,0,694},{136,0,909},{7,0,1109},{11,0,7},{5,10,37},{6,
+10,39},{6,10,451},{7,10,218},{7,10,1166},{7,10,1687},{8,10,662},{144,10,2},{136,
+11,587},{6,11,427},{7,11,1018},{138,11,692},{4,11,195},{6,10,508},{135,11,802},{
+4,0,167},{135,0,82},{5,0,62},{6,0,24},{6,0,534},{7,0,74},{7,0,678},{7,0,684},{7,
+0,1043},{7,0,1072},{8,0,280},{8,0,541},{8,0,686},{9,0,258},{10,0,519},{11,0,252}
+,{140,0,282},{138,0,33},{4,0,359},{133,11,738},{7,0,980},{9,0,328},{13,0,186},{
+13,0,364},{7,10,635},{7,10,796},{8,10,331},{9,10,330},{9,10,865},{10,10,119},{10
+,10,235},{11,10,111},{11,10,129},{11,10,240},{12,10,31},{12,10,66},{12,10,222},{
+12,10,269},{12,10,599},{12,10,684},{12,10,689},{12,10,691},{142,10,345},{137,10,
+527},{6,0,596},{7,0,585},{135,10,702},{134,11,1683},{133,0,211},{6,0,145},{141,0
+,336},{134,0,1130},{7,0,873},{6,10,37},{7,10,1666},{8,10,195},{8,10,316},{9,10,
+178},{9,10,276},{9,10,339},{9,10,536},{10,10,102},{10,10,362},{10,10,785},{11,10
+,55},{11,10,149},{11,10,773},{13,10,416},{13,10,419},{14,10,38},{14,10,41},{142,
+10,210},{8,0,840},{136,0,841},{132,0,263},{5,11,3},{8,11,578},{9,11,118},{10,11,
+705},{12,11,383},{141,11,279},{132,0,916},{133,11,229},{133,10,645},{15,0,155},{
+16,0,79},{8,11,102},{10,11,578},{10,11,672},{12,11,496},{13,11,408},{14,11,121},
+{145,11,106},{4,0,599},{5,0,592},{6,0,1634},{7,0,5},{7,0,55},{7,0,67},{7,0,97},{
+7,0,691},{7,0,979},{7,0,1600},{7,0,1697},{8,0,207},{8,0,214},{8,0,231},{8,0,294}
+,{8,0,336},{8,0,428},{8,0,471},{8,0,622},{8,0,626},{8,0,679},{8,0,759},{8,0,829}
+,{9,0,11},{9,0,246},{9,0,484},{9,0,573},{9,0,706},{9,0,762},{9,0,798},{9,0,855},
+{9,0,870},{9,0,912},{10,0,303},{10,0,335},{10,0,424},{10,0,461},{10,0,543},{10,0
+,759},{10,0,814},{11,0,59},{11,0,199},{11,0,235},{11,0,590},{11,0,631},{11,0,929
+},{11,0,963},{11,0,987},{12,0,114},{12,0,182},{12,0,226},{12,0,332},{12,0,439},{
+12,0,575},{12,0,598},{12,0,675},{13,0,8},{13,0,125},{13,0,194},{13,0,287},{14,0,
+197},{14,0,383},{15,0,53},{17,0,63},{19,0,46},{19,0,98},{19,0,106},{148,0,85},{7
+,0,1356},{132,10,290},{6,10,70},{7,10,1292},{10,10,762},{139,10,288},{150,11,55}
+,{4,0,593},{8,11,115},{8,11,350},{9,11,489},{10,11,128},{11,11,306},{12,11,373},
+{14,11,30},{17,11,79},{147,11,80},{135,11,1235},{134,0,1392},{4,11,230},{133,11,
+702},{147,0,126},{7,10,131},{7,10,422},{8,10,210},{140,10,573},{134,0,1179},{139
+,11,435},{139,10,797},{134,11,1728},{4,0,162},{18,11,26},{19,11,42},{20,11,43},{
+21,11,0},{23,11,27},{152,11,14},{132,10,936},{6,0,765},{5,10,453},{134,10,441},{
+133,0,187},{135,0,1286},{6,0,635},{6,0,904},{6,0,1210},{134,0,1489},{4,0,215},{8
+,0,890},{9,0,38},{10,0,923},{11,0,23},{11,0,127},{139,0,796},{6,0,1165},{134,0,
+1306},{7,0,716},{13,0,97},{141,0,251},{132,10,653},{136,0,657},{146,10,80},{5,11
+,622},{7,11,1032},{11,11,26},{11,11,213},{11,11,707},{12,11,380},{13,11,226},{
+141,11,355},{6,0,299},{5,11,70},{6,11,334},{9,11,171},{11,11,637},{12,11,202},{
+14,11,222},{145,11,42},{142,0,134},{4,11,23},{5,11,313},{5,11,1014},{6,11,50},{6
+,11,51},{7,11,142},{7,11,384},{9,11,783},{139,11,741},{4,11,141},{7,11,559},{8,
+11,640},{9,11,460},{12,11,183},{141,11,488},{136,11,614},{7,10,1368},{8,10,232},
+{8,10,361},{10,10,682},{138,10,742},{137,10,534},{6,0,1082},{140,0,658},{137,10,
+27},{135,0,2002},{142,10,12},{4,0,28},{5,0,440},{7,0,248},{11,0,833},{140,0,344}
+,{7,10,736},{139,10,264},{134,10,1657},{134,0,1654},{138,0,531},{5,11,222},{9,11
+,140},{138,11,534},{6,0,634},{6,0,798},{134,0,840},{138,11,503},{135,10,127},{
+133,0,853},{5,11,154},{7,11,1491},{10,11,379},{138,11,485},{6,0,249},{7,0,1234},
+{139,0,573},{133,11,716},{7,11,1570},{140,11,542},{136,10,364},{138,0,527},{4,11
+,91},{5,11,388},{5,11,845},{6,11,206},{6,11,252},{6,11,365},{7,11,136},{7,11,531
+},{8,11,264},{136,11,621},{134,0,1419},{135,11,1441},{7,0,49},{7,0,392},{8,0,20}
+,{8,0,172},{8,0,690},{9,0,383},{9,0,845},{10,0,48},{11,0,293},{11,0,832},{11,0,
+920},{11,0,984},{141,0,221},{5,0,858},{133,0,992},{5,0,728},{137,10,792},{5,10,
+909},{9,10,849},{138,10,805},{7,0,525},{7,0,1579},{8,0,497},{136,0,573},{6,0,268
+},{137,0,62},{135,11,576},{134,0,1201},{5,11,771},{5,11,863},{5,11,898},{6,11,
+1632},{6,11,1644},{134,11,1780},{133,11,331},{7,0,193},{7,0,1105},{10,0,495},{7,
+10,397},{8,10,124},{8,10,619},{9,10,305},{11,10,40},{12,10,349},{13,10,134},{13,
+10,295},{14,10,155},{15,10,120},{146,10,105},{138,0,106},{6,0,859},{5,11,107},{7
+,11,201},{136,11,518},{6,11,446},{135,11,1817},{13,0,23},{4,10,262},{135,10,342}
+,{133,10,641},{137,11,851},{6,0,925},{137,0,813},{132,11,504},{6,0,613},{136,0,
+223},{4,10,99},{6,10,250},{6,10,346},{8,10,127},{138,10,81},{136,0,953},{132,10,
+915},{139,11,892},{5,10,75},{9,10,517},{10,10,470},{12,10,155},{141,10,224},{4,0
+,666},{7,0,1017},{7,11,996},{138,11,390},{5,11,883},{133,11,975},{14,10,83},{142
+,11,83},{4,0,670},{5,11,922},{134,11,1707},{135,0,216},{9,0,40},{11,0,136},{135,
+11,787},{5,10,954},{5,11,993},{7,11,515},{137,11,91},{139,0,259},{7,0,1114},{9,0
+,310},{9,0,682},{10,0,440},{13,0,40},{6,10,304},{8,10,418},{11,10,341},{139,10,
+675},{14,0,296},{9,10,410},{139,10,425},{10,11,377},{12,11,363},{13,11,68},{13,
+11,94},{14,11,108},{142,11,306},{7,0,1401},{135,0,1476},{4,0,296},{6,0,475},{7,0
+,401},{7,0,1410},{7,0,1594},{7,0,1674},{8,0,63},{8,0,660},{137,0,74},{4,0,139},{
+4,0,388},{140,0,188},{132,0,797},{132,11,766},{5,11,103},{7,11,921},{8,11,580},{
+8,11,593},{8,11,630},{138,11,28},{4,11,911},{5,11,867},{133,11,1013},{134,10,14}
+,{134,0,1572},{134,10,1708},{21,0,39},{5,10,113},{6,10,243},{7,10,1865},{11,10,
+161},{16,10,37},{145,10,99},{7,11,1563},{141,11,182},{5,11,135},{6,11,519},{7,11
+,1722},{10,11,271},{11,11,261},{145,11,54},{132,10,274},{134,0,1594},{4,11,300},
+{5,11,436},{135,11,484},{4,0,747},{6,0,290},{7,0,649},{7,0,1479},{135,0,1583},{
+133,11,535},{147,11,82},{133,0,232},{137,0,887},{135,10,166},{136,0,521},{4,0,14
+},{7,0,472},{7,0,1801},{10,0,748},{141,0,458},{134,0,741},{134,0,992},{16,0,111}
+,{137,10,304},{4,0,425},{5,11,387},{7,11,557},{12,11,547},{142,11,86},{135,11,
+1747},{5,10,654},{135,11,1489},{7,0,789},{4,11,6},{5,11,708},{136,11,75},{6,10,
+273},{10,10,188},{13,10,377},{146,10,77},{6,0,1593},{4,11,303},{7,11,619},{10,11
+,547},{10,11,687},{11,11,122},{140,11,601},{134,0,1768},{135,10,410},{138,11,772
+},{11,0,233},{139,10,524},{5,0,943},{134,0,1779},{134,10,1785},{136,11,529},{132
+,0,955},{5,0,245},{6,0,576},{7,0,582},{136,0,225},{132,10,780},{142,0,241},{134,
+0,1943},{4,11,106},{7,11,310},{7,11,1785},{10,11,690},{139,11,717},{134,0,1284},
+{5,11,890},{133,11,988},{6,11,626},{142,11,431},{10,11,706},{145,11,32},{137,11,
+332},{132,11,698},{135,0,709},{5,10,948},{138,11,17},{136,0,554},{134,0,1564},{
+139,10,941},{132,0,443},{134,0,909},{134,11,84},{142,0,280},{4,10,532},{5,10,706
+},{135,10,662},{132,0,729},{5,10,837},{6,10,1651},{139,10,985},{135,10,1861},{4,
+0,348},{152,11,3},{5,11,986},{6,11,130},{7,11,1582},{8,11,458},{10,11,101},{10,
+11,318},{138,11,823},{134,0,758},{4,0,298},{137,0,848},{4,10,330},{7,10,933},{7,
+10,2012},{136,10,292},{7,11,1644},{137,11,129},{6,0,1422},{9,0,829},{135,10,767}
+,{5,0,164},{7,0,121},{142,0,189},{7,0,812},{7,0,1261},{7,0,1360},{9,0,632},{140,
+0,352},{135,11,1788},{139,0,556},{135,11,997},{145,10,114},{4,0,172},{9,0,611},{
+10,0,436},{12,0,673},{13,0,255},{137,10,883},{11,0,530},{138,10,274},{133,0,844}
+,{134,0,984},{13,0,232},{18,0,35},{4,10,703},{135,10,207},{132,10,571},{9,0,263}
+,{10,0,147},{138,0,492},{7,11,1756},{137,11,98},{5,10,873},{5,10,960},{8,10,823}
+,{137,10,881},{133,0,537},{132,0,859},{7,11,1046},{139,11,160},{137,0,842},{139,
+10,283},{5,10,33},{6,10,470},{139,10,424},{6,11,45},{7,11,433},{8,11,129},{9,11,
+21},{10,11,392},{11,11,79},{12,11,499},{13,11,199},{141,11,451},{135,0,1291},{
+135,10,1882},{7,11,558},{136,11,353},{134,0,1482},{5,0,230},{5,0,392},{6,0,420},
+{9,0,568},{140,0,612},{6,0,262},{7,10,90},{7,10,664},{7,10,830},{7,10,1380},{7,
+10,2025},{8,11,81},{8,10,448},{8,10,828},{9,11,189},{9,11,201},{11,11,478},{11,
+11,712},{141,11,338},{142,0,31},{5,11,353},{151,11,26},{132,0,753},{4,0,0},{5,0,
+41},{7,0,1459},{7,0,1469},{7,0,1859},{9,0,549},{139,0,905},{9,10,417},{137,10,
+493},{135,11,1113},{133,0,696},{141,11,448},{134,10,295},{132,0,834},{4,0,771},{
+5,10,1019},{6,11,25},{7,11,855},{7,11,1258},{144,11,32},{134,0,1076},{133,0,921}
+,{133,0,674},{4,11,4},{7,11,1118},{7,11,1320},{7,11,1706},{8,11,277},{9,11,622},
+{10,11,9},{11,11,724},{12,11,350},{12,11,397},{13,11,28},{13,11,159},{15,11,89},
+{18,11,5},{19,11,9},{20,11,34},{150,11,47},{134,10,208},{6,0,444},{136,0,308},{6
+,0,180},{7,0,1137},{8,0,751},{139,0,805},{4,0,183},{7,0,271},{11,0,824},{11,0,
+952},{13,0,278},{13,0,339},{13,0,482},{14,0,424},{148,0,99},{7,11,317},{135,11,
+569},{4,0,19},{5,0,477},{5,0,596},{6,0,505},{7,0,1221},{11,0,907},{12,0,209},{
+141,0,214},{135,0,1215},{6,0,271},{7,0,398},{8,0,387},{10,0,344},{7,10,448},{7,
+10,1629},{7,10,1813},{8,10,442},{9,10,710},{10,10,282},{138,10,722},{11,10,844},
+{12,10,104},{140,10,625},{134,11,255},{133,10,787},{134,0,1645},{11,11,956},{151
+,11,3},{6,0,92},{6,0,188},{7,0,209},{7,0,1269},{7,0,1524},{7,0,1876},{8,0,661},{
+10,0,42},{10,0,228},{11,0,58},{11,0,1020},{12,0,58},{12,0,118},{141,0,32},{4,0,
+459},{133,0,966},{4,11,536},{7,11,1141},{10,11,723},{139,11,371},{140,0,330},{
+134,0,1557},{7,11,285},{135,11,876},{136,10,491},{135,11,560},{6,0,18},{7,0,179}
+,{7,0,932},{8,0,548},{8,0,757},{9,0,54},{9,0,65},{9,0,532},{9,0,844},{10,0,113},
+{10,0,117},{10,0,315},{10,0,560},{10,0,622},{10,0,798},{11,0,153},{11,0,351},{11
+,0,375},{12,0,78},{12,0,151},{12,0,392},{12,0,666},{14,0,248},{143,0,23},{6,0,
+1742},{132,11,690},{4,10,403},{5,10,441},{7,10,450},{10,10,840},{11,10,101},{12,
+10,193},{141,10,430},{133,0,965},{134,0,182},{10,0,65},{10,0,488},{138,0,497},{
+135,11,1346},{6,0,973},{6,0,1158},{10,11,200},{19,11,2},{151,11,22},{4,11,190},{
+133,11,554},{133,10,679},{7,0,328},{137,10,326},{133,11,1001},{9,0,588},{138,0,
+260},{133,11,446},{135,10,1128},{135,10,1796},{147,11,119},{134,0,1786},{6,0,
+1328},{6,0,1985},{8,0,962},{138,0,1017},{135,0,308},{11,0,508},{4,10,574},{7,10,
+350},{7,10,1024},{8,10,338},{9,10,677},{138,10,808},{138,11,752},{135,10,1081},{
+137,11,96},{7,10,1676},{135,10,2037},{136,0,588},{132,11,304},{133,0,614},{140,0
+,793},{136,0,287},{137,10,297},{141,10,37},{6,11,53},{6,11,199},{7,11,1408},{8,
+11,32},{8,11,93},{9,11,437},{10,11,397},{10,11,629},{11,11,593},{11,11,763},{13,
+11,326},{145,11,35},{134,11,105},{9,11,320},{10,11,506},{138,11,794},{5,11,114},
+{5,11,255},{141,11,285},{140,0,290},{7,11,2035},{8,11,19},{9,11,89},{138,11,831}
+,{134,0,1136},{7,0,719},{8,0,796},{8,0,809},{8,0,834},{6,10,306},{7,10,1140},{7,
+10,1340},{8,10,133},{138,10,449},{139,10,1011},{5,0,210},{6,0,213},{7,0,60},{10,
+0,364},{139,0,135},{5,0,607},{8,0,326},{136,0,490},{138,11,176},{132,0,701},{5,0
+,472},{7,0,380},{137,0,758},{135,0,1947},{6,0,1079},{138,0,278},{138,11,391},{5,
+10,329},{8,10,260},{139,11,156},{4,0,386},{7,0,41},{8,0,405},{8,0,728},{9,0,497}
+,{11,0,110},{11,0,360},{15,0,37},{144,0,84},{5,0,46},{7,0,1452},{7,0,1480},{8,0,
+634},{140,0,472},{136,0,961},{4,0,524},{136,0,810},{10,0,238},{141,0,33},{132,10
+,657},{152,10,7},{133,0,532},{5,0,997},{135,10,1665},{7,11,594},{7,11,851},{7,11
+,1858},{9,11,411},{9,11,574},{9,11,666},{9,11,737},{10,11,346},{10,11,712},{11,
+11,246},{11,11,432},{11,11,517},{11,11,647},{11,11,679},{11,11,727},{12,11,304},
+{12,11,305},{12,11,323},{12,11,483},{12,11,572},{12,11,593},{12,11,602},{13,11,
+95},{13,11,101},{13,11,171},{13,11,315},{13,11,378},{13,11,425},{13,11,475},{14,
+11,63},{14,11,380},{14,11,384},{15,11,133},{18,11,112},{148,11,72},{5,11,955},{
+136,11,814},{134,0,1301},{5,10,66},{7,10,1896},{136,10,288},{133,11,56},{134,10,
+1643},{6,0,1298},{148,11,100},{5,0,782},{5,0,829},{6,0,671},{6,0,1156},{6,0,1738
+},{137,11,621},{4,0,306},{5,0,570},{7,0,1347},{5,10,91},{5,10,648},{5,10,750},{5
+,10,781},{6,10,54},{6,10,112},{6,10,402},{6,10,1732},{7,10,315},{7,10,749},{7,10
+,1900},{9,10,78},{9,10,508},{10,10,611},{10,10,811},{11,10,510},{11,10,728},{13,
+10,36},{14,10,39},{16,10,83},{17,10,124},{148,10,30},{8,10,570},{9,11,477},{141,
+11,78},{4,11,639},{10,11,4},{10,10,322},{10,10,719},{11,10,407},{11,11,638},{12,
+11,177},{148,11,57},{7,0,1823},{139,0,693},{7,0,759},{5,11,758},{8,10,125},{8,10
+,369},{8,10,524},{10,10,486},{11,10,13},{11,10,381},{11,10,736},{11,10,766},{11,
+10,845},{13,10,114},{13,10,292},{142,10,47},{7,0,1932},{6,10,1684},{6,10,1731},{
+7,10,356},{8,10,54},{8,10,221},{9,10,225},{9,10,356},{10,10,77},{10,10,446},{10,
+10,731},{12,10,404},{141,10,491},{135,11,552},{135,11,1112},{4,0,78},{5,0,96},{5
+,0,182},{6,0,1257},{7,0,1724},{7,0,1825},{10,0,394},{10,0,471},{11,0,532},{14,0,
+340},{145,0,88},{139,11,328},{135,0,1964},{132,10,411},{4,10,80},{5,10,44},{137,
+11,133},{5,11,110},{6,11,169},{6,11,1702},{7,11,400},{8,11,538},{9,11,184},{9,11
+,524},{140,11,218},{4,0,521},{5,10,299},{7,10,1083},{140,11,554},{6,11,133},{9,
+11,353},{12,11,628},{146,11,79},{6,0,215},{7,0,584},{7,0,1028},{7,0,1473},{7,0,
+1721},{9,0,424},{138,0,779},{7,0,857},{7,0,1209},{7,10,1713},{9,10,537},{10,10,
+165},{12,10,219},{140,10,561},{4,10,219},{6,11,93},{7,11,1422},{7,10,1761},{7,11
+,1851},{8,11,673},{9,10,86},{9,11,529},{140,11,43},{137,11,371},{136,0,671},{5,0
+,328},{135,0,918},{132,0,529},{9,11,25},{10,11,467},{138,11,559},{4,11,335},{135
+,11,942},{134,0,716},{134,0,1509},{6,0,67},{7,0,258},{7,0,1630},{9,0,354},{9,0,
+675},{10,0,830},{14,0,80},{17,0,80},{140,10,428},{134,0,1112},{6,0,141},{7,0,225
+},{9,0,59},{9,0,607},{10,0,312},{11,0,687},{12,0,555},{13,0,373},{13,0,494},{148
+,0,58},{133,10,514},{8,11,39},{10,11,773},{11,11,84},{12,11,205},{142,11,1},{8,0
+,783},{5,11,601},{133,11,870},{136,11,594},{4,10,55},{5,10,301},{6,10,571},{14,
+10,49},{146,10,102},{132,11,181},{134,11,1652},{133,10,364},{4,11,97},{5,11,147}
+,{6,11,286},{7,11,1362},{141,11,176},{4,10,76},{7,10,1550},{9,10,306},{9,10,430}
+,{9,10,663},{10,10,683},{11,10,427},{11,10,753},{12,10,334},{12,10,442},{14,10,
+258},{14,10,366},{143,10,131},{137,10,52},{6,0,955},{134,0,1498},{6,11,375},{7,
+11,169},{7,11,254},{136,11,780},{7,0,430},{11,0,46},{14,0,343},{142,11,343},{135
+,0,1183},{5,0,602},{7,0,2018},{9,0,418},{9,0,803},{135,11,1447},{8,0,677},{135,
+11,1044},{139,11,285},{4,10,656},{135,10,779},{135,10,144},{5,11,629},{135,11,
+1549},{135,10,1373},{138,11,209},{7,10,554},{7,10,605},{141,10,10},{5,10,838},{5
+,10,841},{134,10,1649},{133,10,1012},{6,0,1357},{134,0,1380},{144,0,53},{6,0,590
+},{7,10,365},{7,10,1357},{7,10,1497},{8,10,154},{141,10,281},{133,10,340},{132,
+11,420},{135,0,329},{147,11,32},{4,0,469},{10,11,429},{139,10,495},{8,10,261},{9
+,10,144},{9,10,466},{10,10,370},{12,10,470},{13,10,144},{142,10,348},{142,0,460}
+,{4,11,325},{9,10,897},{138,11,125},{6,0,1743},{6,10,248},{9,10,546},{10,10,535}
+,{11,10,681},{141,10,135},{4,0,990},{5,0,929},{6,0,340},{8,0,376},{8,0,807},{8,0
+,963},{8,0,980},{138,0,1007},{134,0,1603},{140,0,250},{4,11,714},{133,11,469},{
+134,10,567},{136,10,445},{5,0,218},{7,0,1610},{8,0,646},{10,0,83},{11,11,138},{
+140,11,40},{7,0,1512},{135,0,1794},{135,11,1216},{11,0,0},{16,0,78},{132,11,718}
+,{133,0,571},{132,0,455},{134,0,1012},{5,11,124},{5,11,144},{6,11,548},{7,11,15}
+,{7,11,153},{137,11,629},{142,11,10},{6,11,75},{7,11,1531},{8,11,416},{9,11,240}
+,{9,11,275},{10,11,100},{11,11,658},{11,11,979},{12,11,86},{13,11,468},{14,11,66
+},{14,11,207},{15,11,20},{15,11,25},{144,11,58},{132,10,577},{5,11,141},{5,11,
+915},{6,11,1783},{7,11,211},{7,11,698},{7,11,1353},{9,11,83},{9,11,281},{10,11,
+376},{10,11,431},{11,11,543},{12,11,664},{13,11,280},{13,11,428},{14,11,61},{14,
+11,128},{17,11,52},{145,11,81},{6,0,161},{7,0,372},{137,0,597},{132,0,349},{10,
+11,702},{139,11,245},{134,0,524},{134,10,174},{6,0,432},{9,0,751},{139,0,322},{
+147,11,94},{4,11,338},{133,11,400},{5,0,468},{10,0,325},{11,0,856},{12,0,345},{
+143,0,104},{133,0,223},{132,0,566},{4,11,221},{5,11,659},{5,11,989},{7,11,697},{
+7,11,1211},{138,11,284},{135,11,1070},{4,0,59},{135,0,1394},{6,0,436},{11,0,481}
+,{5,10,878},{133,10,972},{4,0,48},{5,0,271},{135,0,953},{5,0,610},{136,0,457},{4
+,0,773},{5,0,618},{137,0,756},{133,0,755},{135,0,1217},{138,11,507},{132,10,351}
+,{132,0,197},{143,11,78},{4,11,188},{7,11,805},{11,11,276},{142,11,293},{5,11,
+884},{139,11,991},{132,10,286},{10,0,259},{10,0,428},{7,10,438},{7,10,627},{7,10
+,1516},{8,10,40},{9,10,56},{9,10,294},{11,10,969},{11,10,995},{146,10,148},{4,0,
+356},{5,0,217},{5,0,492},{5,0,656},{8,0,544},{136,11,544},{5,0,259},{6,0,1230},{
+7,0,414},{7,0,854},{142,0,107},{132,0,1007},{15,0,14},{144,0,5},{6,0,1580},{132,
+10,738},{132,11,596},{132,0,673},{133,10,866},{6,0,1843},{135,11,1847},{4,0,165}
+,{7,0,1398},{135,0,1829},{135,11,1634},{147,11,65},{6,0,885},{6,0,1009},{137,0,
+809},{133,10,116},{132,10,457},{136,11,770},{9,0,498},{12,0,181},{10,11,361},{
+142,11,316},{134,11,595},{5,0,9},{7,0,297},{7,0,966},{140,0,306},{4,11,89},{5,11
+,489},{6,11,315},{7,11,553},{7,11,1745},{138,11,243},{134,0,1487},{132,0,437},{5
+,0,146},{6,0,411},{138,0,721},{5,10,527},{6,10,189},{135,10,859},{11,10,104},{11
+,10,554},{15,10,60},{143,10,125},{6,11,1658},{9,11,3},{10,11,154},{11,11,641},{
+13,11,85},{13,11,201},{141,11,346},{6,0,177},{135,0,467},{134,0,1377},{134,10,
+116},{136,11,645},{4,11,166},{5,11,505},{6,11,1670},{137,11,110},{133,10,487},{4
+,10,86},{5,10,667},{5,10,753},{6,10,316},{6,10,455},{135,10,946},{133,0,200},{
+132,0,959},{6,0,1928},{134,0,1957},{139,11,203},{150,10,45},{4,10,79},{7,10,1773
+},{10,10,450},{11,10,589},{13,10,332},{13,10,493},{14,10,183},{14,10,334},{14,10
+,362},{14,10,368},{14,10,376},{14,10,379},{19,10,90},{19,10,103},{19,10,127},{
+148,10,90},{6,0,1435},{135,11,1275},{134,0,481},{7,11,445},{8,11,307},{8,11,704}
+,{10,11,41},{10,11,439},{11,11,237},{11,11,622},{140,11,201},{135,11,869},{4,0,
+84},{7,0,1482},{10,0,76},{138,0,142},{11,11,277},{144,11,14},{135,11,1977},{4,11
+,189},{5,11,713},{136,11,57},{133,0,1015},{138,11,371},{4,0,315},{5,0,507},{135,
+0,1370},{4,11,552},{142,10,381},{9,0,759},{16,0,31},{16,0,39},{16,0,75},{18,0,24
+},{20,0,42},{152,0,1},{134,0,712},{134,0,1722},{133,10,663},{133,10,846},{8,0,
+222},{8,0,476},{9,0,238},{11,0,516},{11,0,575},{15,0,109},{146,0,100},{7,0,1402}
+,{7,0,1414},{12,0,456},{5,10,378},{8,10,465},{9,10,286},{10,10,185},{10,10,562},
+{10,10,635},{11,10,31},{11,10,393},{13,10,312},{18,10,65},{18,10,96},{147,10,89}
+,{4,0,986},{6,0,1958},{6,0,2032},{8,0,934},{138,0,985},{7,10,1880},{9,10,680},{
+139,10,798},{134,10,1770},{145,11,49},{132,11,614},{132,10,648},{5,10,945},{6,10
+,1656},{6,10,1787},{7,10,167},{8,10,824},{9,10,391},{10,10,375},{139,10,185},{
+138,11,661},{7,0,1273},{135,11,1945},{7,0,706},{7,0,1058},{138,0,538},{7,10,1645
+},{8,10,352},{137,10,249},{132,10,152},{11,0,92},{11,0,196},{11,0,409},{11,0,450
+},{11,0,666},{11,0,777},{12,0,262},{13,0,385},{13,0,393},{15,0,115},{16,0,45},{
+145,0,82},{133,10,1006},{6,0,40},{135,0,1781},{9,11,614},{139,11,327},{5,10,420}
+,{135,10,1449},{135,0,431},{10,0,97},{135,10,832},{6,0,423},{7,0,665},{135,0,
+1210},{7,0,237},{8,0,664},{9,0,42},{9,0,266},{9,0,380},{9,0,645},{10,0,177},{138
+,0,276},{7,0,264},{133,10,351},{8,0,213},{5,10,40},{7,10,598},{7,10,1638},{9,10,
+166},{9,10,640},{9,10,685},{9,10,773},{11,10,215},{13,10,65},{14,10,172},{14,10,
+317},{145,10,6},{5,11,84},{134,11,163},{8,10,60},{9,10,343},{139,10,769},{137,0,
+455},{133,11,410},{8,0,906},{12,0,700},{12,0,706},{140,0,729},{21,11,33},{150,11
+,40},{7,10,1951},{8,10,765},{8,10,772},{140,10,671},{7,10,108},{8,10,219},{8,10,
+388},{9,10,639},{9,10,775},{11,10,275},{140,10,464},{5,11,322},{7,11,1941},{8,11
+,186},{9,11,262},{10,11,187},{14,11,208},{146,11,130},{139,0,624},{8,0,574},{5,
+11,227},{140,11,29},{7,11,1546},{11,11,299},{142,11,407},{5,10,15},{6,10,56},{7,
+10,1758},{8,10,500},{9,10,730},{11,10,331},{13,10,150},{142,10,282},{7,11,1395},
+{8,11,486},{9,11,236},{9,11,878},{10,11,218},{11,11,95},{19,11,17},{147,11,31},{
+135,11,2043},{4,0,354},{146,11,4},{140,11,80},{135,0,1558},{134,10,1886},{5,10,
+205},{6,10,438},{137,10,711},{133,11,522},{133,10,534},{7,0,235},{7,0,1475},{15,
+0,68},{146,0,120},{137,10,691},{4,0,942},{6,0,1813},{8,0,917},{10,0,884},{12,0,
+696},{12,0,717},{12,0,723},{12,0,738},{12,0,749},{12,0,780},{16,0,97},{146,0,169
+},{6,10,443},{8,11,562},{9,10,237},{9,10,571},{9,10,695},{10,10,139},{11,10,715}
+,{12,10,417},{141,10,421},{135,0,957},{133,0,830},{134,11,1771},{146,0,23},{5,0,
+496},{6,0,694},{7,0,203},{7,11,1190},{137,11,620},{137,11,132},{6,0,547},{134,0,
+1549},{8,11,258},{9,11,208},{137,11,359},{4,0,864},{5,0,88},{137,0,239},{135,11,
+493},{4,11,317},{135,11,1279},{132,11,477},{4,10,578},{5,11,63},{133,11,509},{7,
+0,650},{135,0,1310},{7,0,1076},{9,0,80},{11,0,78},{11,0,421},{11,0,534},{140,0,
+545},{132,11,288},{12,0,553},{14,0,118},{133,10,923},{7,0,274},{11,0,479},{139,0
+,507},{8,11,89},{8,11,620},{9,11,49},{10,11,774},{11,11,628},{12,11,322},{143,11
+,124},{4,0,497},{135,0,1584},{7,0,261},{7,0,1115},{7,0,1354},{7,0,1404},{7,0,
+1588},{7,0,1705},{7,0,1902},{9,0,465},{10,0,248},{10,0,349},{10,0,647},{11,0,527
+},{11,0,660},{11,0,669},{12,0,529},{13,0,305},{132,10,924},{133,10,665},{136,0,
+13},{6,0,791},{138,11,120},{7,0,642},{8,0,250},{11,0,123},{11,0,137},{13,0,48},{
+142,0,95},{4,10,265},{7,10,807},{135,10,950},{5,10,93},{140,10,267},{135,0,1429}
+,{4,0,949},{10,0,885},{10,0,891},{10,0,900},{10,0,939},{12,0,760},{142,0,449},{
+139,11,366},{132,0,818},{134,11,85},{135,10,994},{7,0,330},{5,10,233},{5,10,320}
+,{6,10,140},{136,10,295},{4,0,1004},{8,0,982},{136,0,993},{133,10,978},{4,10,905
+},{6,10,1701},{137,10,843},{10,0,545},{140,0,301},{6,0,947},{134,0,1062},{134,0,
+1188},{4,0,904},{5,0,794},{152,10,6},{134,0,1372},{135,11,608},{5,11,279},{6,11,
+235},{7,11,468},{8,11,446},{9,11,637},{10,11,717},{11,11,738},{140,11,514},{132,
+10,509},{5,11,17},{6,11,371},{137,11,528},{132,0,693},{4,11,115},{5,11,669},{6,
+11,407},{8,11,311},{11,11,10},{141,11,5},{11,0,377},{7,10,273},{137,11,381},{135
+,0,695},{7,0,386},{138,0,713},{135,10,1041},{134,0,1291},{6,0,7},{6,0,35},{7,0,
+147},{7,0,1069},{7,0,1568},{7,0,1575},{7,0,1917},{8,0,43},{8,0,208},{9,0,128},{9
+,0,866},{10,0,20},{11,0,981},{147,0,33},{7,0,893},{141,0,424},{139,10,234},{150,
+11,56},{5,11,779},{5,11,807},{6,11,1655},{134,11,1676},{5,10,802},{7,10,2021},{
+136,10,805},{4,11,196},{5,10,167},{5,11,558},{5,10,899},{5,11,949},{6,10,410},{
+137,10,777},{137,10,789},{134,10,1705},{8,0,904},{140,0,787},{6,0,322},{9,0,552}
+,{11,0,274},{13,0,209},{13,0,499},{14,0,85},{15,0,126},{145,0,70},{135,10,10},{5
+,10,11},{6,10,117},{6,10,485},{7,10,1133},{9,10,582},{9,10,594},{11,10,21},{11,
+10,818},{12,10,535},{141,10,86},{4,10,264},{7,10,1067},{8,10,204},{8,10,385},{
+139,10,953},{132,11,752},{138,10,56},{133,10,470},{6,0,1808},{8,0,83},{8,0,742},
+{8,0,817},{9,0,28},{9,0,29},{9,0,885},{10,0,387},{11,0,633},{11,0,740},{13,0,235
+},{13,0,254},{15,0,143},{143,0,146},{140,0,49},{134,0,1832},{4,11,227},{5,11,159
+},{5,11,409},{7,11,80},{10,11,294},{10,11,479},{12,11,418},{14,11,50},{14,11,249
+},{142,11,295},{7,11,1470},{8,11,66},{8,11,137},{8,11,761},{9,11,638},{11,11,80}
+,{11,11,212},{11,11,368},{11,11,418},{12,11,8},{13,11,15},{16,11,61},{17,11,59},
+{19,11,28},{148,11,84},{139,10,1015},{138,11,468},{135,0,421},{6,0,415},{7,0,
+1049},{137,0,442},{6,11,38},{7,11,1220},{8,11,185},{8,11,256},{9,11,22},{9,11,
+331},{10,11,738},{11,11,205},{11,11,540},{11,11,746},{13,11,399},{13,11,465},{14
+,11,88},{142,11,194},{139,0,289},{133,10,715},{4,0,110},{10,0,415},{10,0,597},{
+142,0,206},{4,11,159},{6,11,115},{7,11,252},{7,11,257},{7,11,1928},{8,11,69},{9,
+11,384},{10,11,91},{10,11,615},{12,11,375},{14,11,235},{18,11,117},{147,11,123},
+{5,11,911},{136,11,278},{7,0,205},{7,0,2000},{8,10,794},{9,10,400},{10,10,298},{
+142,10,228},{135,11,1774},{4,11,151},{7,11,1567},{8,11,351},{137,11,322},{136,10
+,724},{133,11,990},{7,0,1539},{11,0,512},{13,0,205},{19,0,30},{22,0,36},{23,0,19
+},{135,11,1539},{5,11,194},{7,11,1662},{9,11,90},{140,11,180},{6,10,190},{7,10,
+768},{135,10,1170},{134,0,1340},{4,0,283},{135,0,1194},{133,11,425},{133,11,971}
+,{12,0,549},{14,10,67},{147,10,60},{135,10,1023},{134,0,1720},{138,11,587},{5,11
+,72},{6,11,264},{7,11,21},{7,11,46},{7,11,2013},{8,11,215},{8,11,513},{10,11,266
+},{139,11,22},{5,0,319},{135,0,534},{6,10,137},{9,10,75},{9,10,253},{10,10,194},
+{138,10,444},{7,0,1180},{20,0,112},{6,11,239},{7,11,118},{10,11,95},{11,11,603},
+{13,11,443},{14,11,160},{143,11,4},{134,11,431},{5,11,874},{6,11,1677},{11,10,
+643},{12,10,115},{143,11,0},{134,0,967},{6,11,65},{7,11,939},{7,11,1172},{7,11,
+1671},{9,11,540},{10,11,696},{11,11,265},{11,11,732},{11,11,928},{11,11,937},{12
+,11,399},{13,11,438},{149,11,19},{137,11,200},{135,0,1940},{5,10,760},{7,10,542}
+,{8,10,135},{136,10,496},{140,11,44},{7,11,1655},{136,11,305},{7,10,319},{7,10,
+355},{7,10,763},{10,10,389},{145,10,43},{136,0,735},{138,10,786},{137,11,19},{
+132,11,696},{5,0,132},{9,0,486},{9,0,715},{10,0,458},{11,0,373},{11,0,668},{11,0
+,795},{11,0,897},{12,0,272},{12,0,424},{12,0,539},{12,0,558},{14,0,245},{14,0,
+263},{14,0,264},{14,0,393},{142,0,403},{10,0,38},{139,0,784},{132,0,838},{4,11,
+302},{135,11,1766},{133,0,379},{5,0,8},{6,0,89},{6,0,400},{7,0,1569},{7,0,1623},
+{7,0,1850},{8,0,218},{8,0,422},{9,0,570},{10,0,626},{4,11,726},{133,11,630},{4,0
+,1017},{138,0,660},{6,0,387},{7,0,882},{141,0,111},{6,0,224},{7,0,877},{137,0,
+647},{4,10,58},{5,10,286},{6,10,319},{7,10,402},{7,10,1254},{7,10,1903},{8,10,
+356},{140,10,408},{135,0,790},{9,0,510},{10,0,53},{4,10,389},{9,10,181},{10,10,
+29},{10,10,816},{11,10,311},{11,10,561},{12,10,67},{141,10,181},{142,0,458},{6,
+11,118},{7,11,215},{7,11,1521},{140,11,11},{134,0,954},{135,0,394},{134,0,1367},
+{5,11,225},{133,10,373},{132,0,882},{7,0,1409},{135,10,1972},{135,10,1793},{4,11
+,370},{5,11,756},{135,11,1326},{150,11,13},{7,11,354},{10,11,410},{139,11,815},{
+6,11,1662},{7,11,48},{8,11,771},{10,11,116},{13,11,104},{14,11,105},{14,11,184},
+{15,11,168},{19,11,92},{148,11,68},{7,0,124},{136,0,38},{5,0,261},{7,0,78},{7,0,
+199},{8,0,815},{9,0,126},{10,0,342},{140,0,647},{4,0,628},{140,0,724},{7,0,266},
+{8,0,804},{7,10,1651},{145,10,89},{135,0,208},{134,0,1178},{6,0,79},{135,0,1519}
+,{132,10,672},{133,10,737},{136,0,741},{132,11,120},{4,0,710},{6,0,376},{134,0,
+606},{134,0,1347},{134,0,1494},{6,0,850},{6,0,1553},{137,0,821},{5,10,145},{134,
+11,593},{7,0,1311},{140,0,135},{4,0,467},{5,0,405},{134,0,544},{5,11,820},{135,
+11,931},{6,0,100},{7,0,244},{7,0,632},{7,0,1609},{8,0,178},{8,0,638},{141,0,58},
+{4,10,387},{135,10,1288},{6,11,151},{6,11,1675},{7,11,383},{151,11,10},{132,0,
+481},{135,10,550},{134,0,1378},{6,11,1624},{11,11,11},{12,11,422},{13,11,262},{
+142,11,360},{133,0,791},{4,11,43},{5,11,344},{133,11,357},{7,0,1227},{140,0,978}
+,{7,0,686},{8,0,33},{8,0,238},{10,0,616},{11,0,467},{11,0,881},{13,0,217},{13,0,
+253},{142,0,268},{137,0,857},{8,0,467},{8,0,1006},{7,11,148},{8,11,284},{141,11,
+63},{4,10,576},{135,10,1263},{133,11,888},{5,10,919},{134,10,1673},{20,10,37},{
+148,11,37},{132,0,447},{132,11,711},{4,0,128},{5,0,415},{6,0,462},{7,0,294},{7,0
+,578},{10,0,710},{139,0,86},{4,10,82},{5,10,333},{5,10,904},{6,10,207},{7,10,325
+},{7,10,1726},{8,10,101},{10,10,778},{139,10,220},{136,0,587},{137,11,440},{133,
+10,903},{6,0,427},{7,0,1018},{138,0,692},{4,0,195},{135,0,802},{140,10,147},{134
+,0,1546},{134,0,684},{132,10,705},{136,0,345},{11,11,678},{140,11,307},{133,0,
+365},{134,0,1683},{4,11,65},{5,11,479},{5,11,1004},{7,11,1913},{8,11,317},{9,11,
+302},{10,11,612},{141,11,22},{138,0,472},{4,11,261},{135,11,510},{134,10,90},{
+142,0,433},{151,0,28},{4,11,291},{7,11,101},{9,11,515},{12,11,152},{12,11,443},{
+13,11,392},{142,11,357},{140,0,997},{5,0,3},{8,0,578},{9,0,118},{10,0,705},{141,
+0,279},{135,11,1266},{7,10,813},{12,10,497},{141,10,56},{133,0,229},{6,10,125},{
+135,10,1277},{8,0,102},{10,0,578},{10,0,672},{12,0,496},{13,0,408},{14,0,121},{
+17,0,106},{151,10,12},{6,0,866},{134,0,1080},{136,0,1022},{4,11,130},{135,11,843
+},{5,11,42},{5,11,879},{7,11,245},{7,11,324},{7,11,1532},{11,11,463},{11,11,472}
+,{13,11,363},{144,11,52},{150,0,55},{8,0,115},{8,0,350},{9,0,489},{10,0,128},{11
+,0,306},{12,0,373},{14,0,30},{17,0,79},{19,0,80},{4,11,134},{133,11,372},{134,0,
+657},{134,0,933},{135,11,1147},{4,0,230},{133,0,702},{134,0,1728},{4,0,484},{18,
+0,26},{19,0,42},{20,0,43},{21,0,0},{23,0,27},{152,0,14},{7,0,185},{135,0,703},{6
+,0,417},{10,0,618},{7,10,1106},{9,10,770},{11,10,112},{140,10,413},{134,0,803},{
+132,11,644},{134,0,1262},{7,11,540},{12,10,271},{145,10,109},{135,11,123},{132,0
+,633},{134,11,623},{4,11,908},{5,11,359},{5,11,508},{6,11,1723},{7,11,343},{7,11
+,1996},{135,11,2026},{135,0,479},{10,0,262},{7,10,304},{9,10,646},{9,10,862},{11
+,10,696},{12,10,208},{15,10,79},{147,10,108},{4,11,341},{135,11,480},{134,0,830}
+,{5,0,70},{5,0,622},{6,0,334},{7,0,1032},{9,0,171},{11,0,26},{11,0,213},{11,0,
+637},{11,0,707},{12,0,202},{12,0,380},{13,0,226},{13,0,355},{14,0,222},{145,0,42
+},{135,10,981},{143,0,217},{137,11,114},{4,0,23},{4,0,141},{5,0,313},{5,0,1014},
+{6,0,50},{6,0,51},{7,0,142},{7,0,384},{7,0,559},{8,0,640},{9,0,460},{9,0,783},{
+11,0,741},{12,0,183},{141,0,488},{141,0,360},{7,0,1586},{7,11,1995},{8,11,299},{
+11,11,890},{140,11,674},{132,10,434},{7,0,652},{134,10,550},{7,0,766},{5,10,553}
+,{138,10,824},{7,0,737},{8,0,298},{136,10,452},{4,11,238},{5,11,503},{6,11,179},
+{7,11,2003},{8,11,381},{8,11,473},{9,11,149},{10,11,183},{15,11,45},{143,11,86},
+{133,10,292},{5,0,222},{9,0,655},{138,0,534},{138,10,135},{4,11,121},{5,11,156},
+{5,11,349},{9,11,136},{10,11,605},{14,11,342},{147,11,107},{137,0,906},{6,0,1013
+},{134,0,1250},{6,0,1956},{6,0,2009},{8,0,991},{144,0,120},{135,11,1192},{138,0,
+503},{5,0,154},{7,0,1491},{10,0,379},{138,0,485},{6,0,1867},{6,0,1914},{6,0,1925
+},{9,0,917},{9,0,925},{9,0,932},{9,0,951},{9,0,1007},{9,0,1013},{12,0,806},{12,0
+,810},{12,0,814},{12,0,816},{12,0,824},{12,0,832},{12,0,837},{12,0,863},{12,0,
+868},{12,0,870},{12,0,889},{12,0,892},{12,0,900},{12,0,902},{12,0,908},{12,0,933
+},{12,0,942},{12,0,949},{12,0,954},{15,0,175},{15,0,203},{15,0,213},{15,0,218},{
+15,0,225},{15,0,231},{15,0,239},{15,0,248},{15,0,252},{18,0,190},{18,0,204},{18,
+0,215},{18,0,216},{18,0,222},{18,0,225},{18,0,230},{18,0,239},{18,0,241},{21,0,
+42},{21,0,43},{21,0,44},{21,0,45},{21,0,46},{21,0,53},{24,0,27},{152,0,31},{133,
+0,716},{135,0,844},{4,0,91},{5,0,388},{5,0,845},{6,0,206},{6,0,252},{6,0,365},{7
+,0,136},{7,0,531},{136,0,621},{7,10,393},{10,10,603},{139,10,206},{6,11,80},{6,
+11,1694},{7,11,173},{7,11,1974},{9,11,547},{10,11,730},{14,11,18},{150,11,39},{
+137,0,748},{4,11,923},{134,11,1711},{4,10,912},{137,10,232},{7,10,98},{7,10,1973
+},{136,10,716},{14,0,103},{133,10,733},{132,11,595},{12,0,158},{18,0,8},{19,0,62
+},{20,0,6},{22,0,4},{23,0,2},{23,0,9},{5,11,240},{6,11,459},{7,11,12},{7,11,114}
+,{7,11,502},{7,11,1751},{7,11,1753},{7,11,1805},{8,11,658},{9,11,1},{11,11,959},
+{13,11,446},{142,11,211},{135,0,576},{5,0,771},{5,0,863},{5,0,898},{6,0,648},{6,
+0,1632},{6,0,1644},{134,0,1780},{133,0,331},{7,11,633},{7,11,905},{7,11,909},{7,
+11,1538},{9,11,767},{140,11,636},{140,0,632},{5,0,107},{7,0,201},{136,0,518},{6,
+0,446},{7,0,1817},{134,11,490},{9,0,851},{141,0,510},{7,11,250},{8,11,506},{136,
+11,507},{4,0,504},{137,10,72},{132,11,158},{4,11,140},{7,11,362},{8,11,209},{9,
+11,10},{9,11,160},{9,11,503},{10,11,689},{11,11,350},{11,11,553},{11,11,725},{12
+,11,252},{12,11,583},{13,11,192},{13,11,352},{14,11,269},{14,11,356},{148,11,50}
+,{6,11,597},{135,11,1318},{135,10,1454},{5,0,883},{5,0,975},{8,0,392},{148,0,7},
+{6,11,228},{7,11,1341},{9,11,408},{138,11,343},{11,11,348},{11,10,600},{12,11,99
+},{13,10,245},{18,11,1},{18,11,11},{147,11,4},{134,11,296},{5,0,922},{134,0,1707
+},{132,11,557},{4,11,548},{7,10,164},{7,10,1571},{9,10,107},{140,10,225},{7,11,
+197},{8,11,142},{8,11,325},{9,11,150},{9,11,596},{10,11,350},{10,11,353},{11,11,
+74},{11,11,315},{14,11,423},{143,11,141},{5,0,993},{7,0,515},{137,0,91},{4,0,131
+},{8,0,200},{5,10,484},{5,10,510},{6,10,434},{7,10,1000},{7,10,1098},{136,10,2},
+{152,0,10},{4,11,62},{5,11,83},{6,11,399},{6,11,579},{7,11,692},{7,11,846},{7,11
+,1015},{7,11,1799},{8,11,403},{9,11,394},{10,11,133},{12,11,4},{12,11,297},{12,
+11,452},{16,11,81},{18,11,19},{18,11,25},{21,11,14},{22,11,12},{151,11,18},{140,
+11,459},{132,11,177},{7,0,1433},{9,0,365},{137,11,365},{132,10,460},{5,0,103},{6
+,0,2004},{7,0,921},{8,0,580},{8,0,593},{8,0,630},{10,0,28},{5,11,411},{135,11,
+653},{4,10,932},{133,10,891},{4,0,911},{5,0,867},{5,0,1013},{7,0,2034},{8,0,798}
+,{136,0,813},{7,11,439},{10,11,727},{11,11,260},{139,11,684},{136,10,625},{5,11,
+208},{7,11,753},{135,11,1528},{5,0,461},{7,0,1925},{12,0,39},{13,0,265},{13,0,
+439},{134,10,76},{6,0,853},{8,10,92},{137,10,221},{5,0,135},{6,0,519},{7,0,1722}
+,{10,0,271},{11,0,261},{145,0,54},{139,11,814},{14,0,338},{148,0,81},{4,0,300},{
+133,0,436},{5,0,419},{5,0,687},{7,0,864},{9,0,470},{135,11,864},{9,0,836},{133,
+11,242},{134,0,1937},{4,10,763},{133,11,953},{132,10,622},{132,0,393},{133,10,
+253},{8,0,357},{10,0,745},{14,0,426},{17,0,94},{19,0,57},{135,10,546},{5,11,615}
+,{146,11,37},{9,10,73},{10,10,110},{14,10,185},{145,10,119},{11,0,703},{7,10,624
+},{7,10,916},{10,10,256},{139,10,87},{133,11,290},{5,10,212},{12,10,35},{141,10,
+382},{132,11,380},{5,11,52},{7,11,277},{9,11,368},{139,11,791},{133,0,387},{10,
+11,138},{139,11,476},{4,0,6},{5,0,708},{136,0,75},{7,0,1351},{9,0,581},{10,0,639
+},{11,0,453},{140,0,584},{132,0,303},{138,0,772},{135,10,1175},{4,0,749},{5,10,
+816},{6,11,256},{7,11,307},{7,11,999},{7,11,1481},{7,11,1732},{7,11,1738},{8,11,
+265},{9,11,414},{11,11,316},{12,11,52},{13,11,420},{147,11,100},{135,11,1296},{6
+,0,1065},{5,10,869},{5,10,968},{6,10,1626},{8,10,734},{136,10,784},{4,10,542},{6
+,10,1716},{6,10,1727},{7,10,1082},{7,10,1545},{8,10,56},{8,10,118},{8,10,412},{8
+,10,564},{9,10,888},{9,10,908},{10,10,50},{10,10,423},{11,10,685},{11,10,697},{
+11,10,933},{12,10,299},{13,10,126},{13,10,136},{13,10,170},{141,10,190},{134,0,
+226},{4,0,106},{7,0,310},{11,0,717},{133,11,723},{5,0,890},{5,0,988},{4,10,232},
+{9,10,202},{10,10,474},{140,10,433},{6,0,626},{142,0,431},{10,0,706},{150,0,44},
+{13,0,51},{6,10,108},{7,10,1003},{7,10,1181},{8,10,111},{136,10,343},{132,0,698}
+,{5,11,109},{6,11,1784},{7,11,1895},{12,11,296},{140,11,302},{134,0,828},{134,10
+,1712},{138,0,17},{7,0,1929},{4,10,133},{5,11,216},{7,10,711},{7,10,1298},{7,10,
+1585},{7,11,1879},{9,11,141},{9,11,270},{9,11,679},{10,11,159},{10,11,553},{11,
+11,197},{11,11,438},{12,11,538},{12,11,559},{13,11,193},{13,11,423},{14,11,144},
+{14,11,166},{14,11,167},{15,11,67},{147,11,84},{141,11,127},{7,11,1872},{137,11,
+81},{6,10,99},{7,10,1808},{145,10,57},{134,11,391},{5,0,689},{6,0,84},{7,0,1250}
+,{6,10,574},{7,10,428},{10,10,669},{11,10,485},{11,10,840},{12,10,300},{142,10,
+250},{7,11,322},{136,11,249},{7,11,432},{135,11,1649},{135,10,1871},{137,10,252}
+,{6,11,155},{140,11,234},{7,0,871},{19,0,27},{147,11,27},{140,0,498},{5,0,986},{
+6,0,130},{138,0,823},{6,0,1793},{7,0,1582},{8,0,458},{10,0,101},{10,0,318},{10,0
+,945},{12,0,734},{16,0,104},{18,0,177},{6,10,323},{135,10,1564},{5,11,632},{138,
+11,526},{10,0,435},{7,10,461},{136,10,775},{6,11,144},{7,11,948},{7,11,1042},{7,
+11,1857},{8,11,235},{8,11,461},{9,11,453},{9,11,530},{10,11,354},{17,11,77},{19,
+11,99},{148,11,79},{138,0,966},{7,0,1644},{137,0,129},{135,0,997},{136,0,502},{5
+,11,196},{6,11,486},{7,11,212},{8,11,309},{136,11,346},{7,10,727},{146,10,73},{
+132,0,823},{132,11,686},{135,0,1927},{4,0,762},{7,0,1756},{137,0,98},{136,10,577
+},{24,0,8},{4,11,30},{5,11,43},{152,11,8},{7,0,1046},{139,0,160},{7,0,492},{4,10
+,413},{5,10,677},{7,11,492},{8,10,432},{140,10,280},{6,0,45},{7,0,433},{8,0,129}
+,{9,0,21},{10,0,392},{11,0,79},{12,0,499},{13,0,199},{141,0,451},{7,0,558},{136,
+0,353},{4,11,220},{7,11,1535},{9,11,93},{139,11,474},{7,10,646},{7,10,1730},{11,
+10,446},{141,10,178},{133,0,785},{134,0,1145},{8,0,81},{9,0,189},{9,0,201},{11,0
+,478},{11,0,712},{141,0,338},{5,0,353},{151,0,26},{11,0,762},{132,10,395},{134,0
+,2024},{4,0,611},{133,0,606},{9,10,174},{10,10,164},{11,10,440},{11,10,841},{143
+,10,98},{134,10,426},{10,10,608},{139,10,1002},{138,10,250},{6,0,25},{7,0,855},{
+7,0,1258},{144,0,32},{7,11,1725},{138,11,393},{5,11,263},{134,11,414},{6,0,2011}
+,{133,10,476},{4,0,4},{7,0,1118},{7,0,1320},{7,0,1706},{8,0,277},{9,0,622},{10,0
+,9},{11,0,724},{12,0,350},{12,0,397},{13,0,28},{13,0,159},{15,0,89},{18,0,5},{19
+,0,9},{20,0,34},{22,0,47},{6,11,178},{6,11,1750},{8,11,251},{9,11,690},{10,11,
+155},{10,11,196},{10,11,373},{11,11,698},{13,11,155},{148,11,93},{5,11,97},{137,
+11,393},{7,0,764},{11,0,461},{12,0,172},{5,10,76},{6,10,458},{6,10,497},{7,10,
+868},{9,10,658},{10,10,594},{11,10,566},{12,10,338},{141,10,200},{134,0,1449},{
+138,11,40},{134,11,1639},{134,0,1445},{6,0,1168},{4,10,526},{7,10,1029},{135,10,
+1054},{4,11,191},{7,11,934},{8,11,647},{145,11,97},{132,10,636},{6,0,233},{7,10,
+660},{7,10,1124},{17,10,31},{19,10,22},{151,10,14},{6,10,1699},{136,11,110},{12,
+11,246},{15,11,162},{19,11,64},{20,11,8},{20,11,95},{22,11,24},{152,11,17},{5,11
+,165},{9,11,346},{138,11,655},{5,11,319},{135,11,534},{134,0,255},{9,0,216},{8,
+11,128},{139,11,179},{9,0,183},{139,0,286},{11,0,956},{151,0,3},{4,0,536},{7,0,
+1141},{10,0,723},{139,0,371},{4,10,279},{7,10,301},{137,10,362},{7,0,285},{5,11,
+57},{6,11,101},{6,11,1663},{7,11,132},{7,11,1048},{7,11,1154},{7,11,1415},{7,11,
+1507},{12,11,493},{15,11,105},{151,11,15},{5,11,459},{7,11,1073},{7,10,1743},{8,
+11,241},{136,11,334},{4,10,178},{133,10,399},{135,0,560},{132,0,690},{135,0,1246
+},{18,0,157},{147,0,63},{10,0,599},{11,0,33},{12,0,571},{149,0,1},{6,11,324},{6,
+11,520},{7,11,338},{7,11,1616},{7,11,1729},{8,11,228},{9,11,69},{139,11,750},{7,
+0,1862},{12,0,491},{12,0,520},{13,0,383},{142,0,244},{135,11,734},{134,10,1692},
+{10,0,448},{11,0,630},{17,0,117},{6,10,202},{7,11,705},{12,10,360},{17,10,118},{
+18,10,27},{148,10,67},{4,11,73},{6,11,612},{7,11,927},{7,11,1822},{8,11,217},{9,
+11,472},{9,11,765},{9,11,766},{10,11,408},{11,11,51},{11,11,793},{12,11,266},{15
+,11,158},{20,11,89},{150,11,32},{4,0,190},{133,0,554},{133,0,1001},{5,11,389},{8
+,11,636},{137,11,229},{5,0,446},{7,10,872},{10,10,516},{139,10,167},{137,10,313}
+,{132,10,224},{134,0,1313},{5,10,546},{7,10,35},{8,10,11},{8,10,12},{9,10,315},{
+9,10,533},{10,10,802},{11,10,166},{12,10,525},{142,10,243},{6,0,636},{137,0,837}
+,{5,10,241},{8,10,242},{9,10,451},{10,10,667},{11,10,598},{140,10,429},{22,10,46
+},{150,11,46},{136,11,472},{11,0,278},{142,0,73},{141,11,185},{132,0,868},{134,0
+,972},{4,10,366},{137,10,516},{138,0,1010},{5,11,189},{6,10,1736},{7,11,442},{7,
+11,443},{8,11,281},{12,11,174},{13,11,83},{141,11,261},{139,11,384},{6,11,2},{7,
+11,191},{7,11,446},{7,11,758},{7,11,1262},{7,11,1737},{8,11,22},{8,11,270},{8,11
+,612},{9,11,4},{9,11,167},{9,11,312},{9,11,436},{10,11,156},{10,11,216},{10,11,
+311},{10,11,623},{11,11,72},{11,11,330},{11,11,455},{12,11,101},{12,11,321},{12,
+11,504},{12,11,530},{12,11,543},{13,11,17},{13,11,156},{13,11,334},{14,11,48},{
+15,11,70},{17,11,60},{148,11,64},{6,10,331},{136,10,623},{135,0,1231},{132,0,304
+},{6,11,60},{7,11,670},{7,11,1327},{8,11,411},{8,11,435},{9,11,653},{9,11,740},{
+10,11,385},{11,11,222},{11,11,324},{11,11,829},{140,11,611},{7,0,506},{6,11,166}
+,{7,11,374},{135,11,1174},{14,11,43},{146,11,21},{135,11,1694},{135,10,1888},{5,
+11,206},{134,11,398},{135,11,50},{150,0,26},{6,0,53},{6,0,199},{7,0,1408},{8,0,
+32},{8,0,93},{10,0,397},{10,0,629},{11,0,593},{11,0,763},{13,0,326},{145,0,35},{
+134,0,105},{132,10,394},{4,0,843},{138,0,794},{11,0,704},{141,0,396},{5,0,114},{
+5,0,255},{141,0,285},{6,0,619},{7,0,898},{7,0,1092},{8,0,485},{18,0,28},{19,0,
+116},{135,10,1931},{9,0,145},{7,10,574},{135,10,1719},{7,0,2035},{8,0,19},{9,0,
+89},{138,0,831},{132,10,658},{6,11,517},{7,11,1159},{10,11,621},{139,11,192},{7,
+0,1933},{7,11,1933},{9,10,781},{10,10,144},{11,10,385},{13,10,161},{13,10,228},{
+13,10,268},{148,10,107},{136,10,374},{10,11,223},{139,11,645},{135,0,1728},{7,11
+,64},{7,11,289},{136,11,245},{4,10,344},{6,10,498},{139,10,323},{136,0,746},{135
+,10,1063},{137,10,155},{4,0,987},{6,0,1964},{6,0,1974},{6,0,1990},{136,0,995},{
+133,11,609},{133,10,906},{134,0,1550},{134,0,874},{5,11,129},{6,11,61},{135,11,
+947},{4,0,1018},{6,0,1938},{6,0,2021},{134,0,2039},{132,0,814},{11,0,126},{139,0
+,287},{134,0,1264},{5,0,955},{136,0,814},{141,11,506},{132,11,314},{6,0,981},{
+139,11,1000},{5,0,56},{8,0,892},{8,0,915},{140,0,776},{148,0,100},{10,0,4},{10,0
+,13},{11,0,638},{148,0,57},{148,11,74},{5,0,738},{132,10,616},{133,11,637},{136,
+10,692},{133,0,758},{132,10,305},{137,11,590},{5,11,280},{135,11,1226},{134,11,
+494},{135,0,1112},{133,11,281},{13,0,44},{14,0,214},{5,10,214},{7,10,603},{8,10,
+611},{9,10,686},{10,10,88},{11,10,459},{11,10,496},{12,10,463},{140,10,590},{139
+,0,328},{135,11,1064},{137,0,133},{7,0,168},{13,0,196},{141,0,237},{134,10,1703}
+,{134,0,1152},{135,0,1245},{5,0,110},{6,0,169},{6,0,1702},{7,0,400},{8,0,538},{9
+,0,184},{9,0,524},{140,0,218},{6,0,1816},{10,0,871},{12,0,769},{140,0,785},{132,
+11,630},{7,11,33},{7,11,120},{8,11,489},{9,11,319},{10,11,820},{11,11,1004},{12,
+11,379},{13,11,117},{13,11,412},{14,11,25},{15,11,52},{15,11,161},{16,11,47},{
+149,11,2},{6,0,133},{8,0,413},{9,0,353},{139,0,993},{145,10,19},{4,11,937},{133,
+11,801},{134,0,978},{6,0,93},{6,0,1508},{7,0,1422},{7,0,1851},{8,0,673},{9,0,529
+},{140,0,43},{6,0,317},{10,0,512},{4,10,737},{11,10,294},{12,10,60},{12,10,437},
+{13,10,64},{13,10,380},{142,10,430},{9,0,371},{7,11,1591},{144,11,43},{6,10,1758
+},{8,10,520},{9,10,345},{9,10,403},{142,10,350},{5,0,526},{10,10,242},{138,10,
+579},{9,0,25},{10,0,467},{138,0,559},{5,10,139},{7,10,1168},{138,10,539},{4,0,
+335},{135,0,942},{140,0,754},{132,11,365},{11,0,182},{142,0,195},{142,11,29},{5,
+11,7},{139,11,774},{4,11,746},{135,11,1090},{8,0,39},{10,0,773},{11,0,84},{12,0,
+205},{142,0,1},{5,0,601},{5,0,870},{5,11,360},{136,11,237},{132,0,181},{136,0,
+370},{134,0,1652},{8,0,358},{4,10,107},{7,10,613},{8,10,439},{8,10,504},{9,10,
+501},{10,10,383},{139,10,477},{132,10,229},{137,11,785},{4,0,97},{5,0,147},{6,0,
+286},{7,0,1362},{141,0,176},{6,0,537},{7,0,788},{7,0,1816},{132,10,903},{140,10,
+71},{6,0,743},{134,0,1223},{6,0,375},{7,0,169},{7,0,254},{8,0,780},{135,11,1493}
+,{7,0,1714},{4,10,47},{6,10,373},{7,10,452},{7,10,543},{7,10,1856},{9,10,6},{11,
+10,257},{139,10,391},{6,0,896},{136,0,1003},{135,0,1447},{137,11,341},{5,10,980}
+,{134,10,1754},{145,11,22},{4,11,277},{5,11,608},{6,11,493},{7,11,457},{140,11,
+384},{7,10,536},{7,10,1331},{136,10,143},{140,0,744},{7,11,27},{135,11,316},{18,
+0,126},{5,10,19},{134,10,533},{4,0,788},{11,0,41},{5,11,552},{5,11,586},{5,11,
+676},{6,11,448},{8,11,244},{11,11,1},{11,11,41},{13,11,3},{16,11,54},{17,11,4},{
+146,11,13},{4,0,985},{6,0,1801},{4,11,401},{137,11,264},{5,10,395},{5,10,951},{
+134,10,1776},{5,0,629},{135,0,1549},{11,10,663},{12,10,210},{13,10,166},{13,10,
+310},{14,10,373},{147,10,43},{9,11,543},{10,11,524},{11,11,30},{12,11,524},{14,
+11,315},{16,11,18},{20,11,26},{148,11,65},{4,11,205},{5,11,623},{7,11,104},{136,
+11,519},{5,0,293},{134,0,601},{7,11,579},{9,11,41},{9,11,244},{9,11,669},{10,11,
+5},{11,11,861},{11,11,951},{139,11,980},{132,11,717},{132,10,695},{7,10,497},{9,
+10,387},{147,10,81},{132,0,420},{142,0,37},{6,0,1134},{6,0,1900},{12,0,830},{12,
+0,878},{12,0,894},{15,0,221},{143,0,245},{132,11,489},{7,0,1570},{140,0,542},{8,
+0,933},{136,0,957},{6,0,1371},{7,0,31},{8,0,373},{5,10,284},{6,10,49},{6,10,350}
+,{7,10,377},{7,10,1693},{8,10,678},{9,10,161},{9,10,585},{9,10,671},{9,10,839},{
+11,10,912},{141,10,427},{135,11,892},{4,0,325},{138,0,125},{139,11,47},{132,10,
+597},{138,0,323},{6,0,1547},{7,11,1605},{9,11,473},{11,11,962},{146,11,139},{139
+,10,908},{7,11,819},{9,11,26},{9,11,392},{10,11,152},{10,11,226},{11,11,19},{12,
+11,276},{12,11,426},{12,11,589},{13,11,460},{15,11,97},{19,11,48},{148,11,104},{
+135,11,51},{4,0,718},{135,0,1216},{6,0,1896},{6,0,1905},{6,0,1912},{9,0,947},{9,
+0,974},{12,0,809},{12,0,850},{12,0,858},{12,0,874},{12,0,887},{12,0,904},{12,0,
+929},{12,0,948},{12,0,952},{15,0,198},{15,0,206},{15,0,220},{15,0,227},{15,0,247
+},{18,0,188},{21,0,48},{21,0,50},{24,0,25},{24,0,29},{7,11,761},{7,11,1051},{137
+,11,545},{5,0,124},{5,0,144},{6,0,548},{7,0,15},{7,0,153},{137,0,629},{135,11,
+606},{135,10,2014},{7,10,2007},{9,11,46},{9,10,101},{9,10,450},{10,10,66},{10,10
+,842},{11,10,536},{140,10,587},{6,0,75},{7,0,1531},{8,0,416},{9,0,240},{9,0,275}
+,{10,0,100},{11,0,658},{11,0,979},{12,0,86},{14,0,207},{15,0,20},{143,0,25},{5,0
+,141},{5,0,915},{6,0,1783},{7,0,211},{7,0,698},{7,0,1353},{9,0,83},{9,0,281},{10
+,0,376},{10,0,431},{11,0,543},{12,0,664},{13,0,280},{13,0,428},{14,0,61},{14,0,
+128},{17,0,52},{145,0,81},{132,11,674},{135,0,533},{149,0,6},{132,11,770},{133,0
+,538},{5,11,79},{7,11,1027},{7,11,1477},{139,11,52},{139,10,62},{4,0,338},{133,0
+,400},{5,11,789},{134,11,195},{4,11,251},{4,11,688},{7,11,513},{7,11,1284},{9,11
+,87},{138,11,365},{134,10,1766},{6,0,0},{7,0,84},{11,0,895},{145,0,11},{139,0,
+892},{4,0,221},{5,0,659},{7,0,697},{7,0,1211},{138,0,284},{133,0,989},{133,11,
+889},{4,11,160},{5,11,330},{7,11,1434},{136,11,174},{6,10,1665},{7,10,256},{7,10
+,1388},{10,10,499},{139,10,670},{7,0,848},{4,10,22},{5,10,10},{136,10,97},{138,0
+,507},{133,10,481},{4,0,188},{135,0,805},{5,0,884},{6,0,732},{139,0,991},{135,11
+,968},{11,11,636},{15,11,145},{17,11,34},{19,11,50},{151,11,20},{7,0,959},{16,0,
+60},{6,10,134},{7,10,437},{9,10,37},{14,10,285},{142,10,371},{7,10,486},{8,10,
+155},{11,10,93},{140,10,164},{134,0,1653},{7,0,337},{133,10,591},{6,0,1989},{8,0
+,922},{8,0,978},{133,11,374},{132,0,638},{138,0,500},{133,11,731},{5,10,380},{5,
+10,650},{136,10,310},{138,11,381},{4,10,364},{7,10,1156},{7,10,1187},{137,10,409
+},{137,11,224},{140,0,166},{134,10,482},{4,11,626},{5,11,642},{6,11,425},{10,11,
+202},{139,11,141},{4,10,781},{6,10,487},{7,10,926},{8,10,263},{139,10,500},{135,
+0,418},{4,10,94},{135,10,1265},{136,0,760},{132,10,417},{136,11,835},{5,10,348},
+{134,10,522},{6,0,1277},{134,0,1538},{139,11,541},{135,11,1597},{5,11,384},{8,11
+,455},{140,11,48},{136,0,770},{5,11,264},{134,11,184},{4,0,89},{5,0,489},{6,0,
+315},{7,0,553},{7,0,1745},{138,0,243},{4,10,408},{4,10,741},{135,10,500},{134,0,
+1396},{133,0,560},{6,0,1658},{9,0,3},{10,0,154},{11,0,641},{13,0,85},{13,0,201},
+{141,0,346},{135,11,1595},{5,11,633},{6,11,28},{7,11,219},{135,11,1323},{9,11,
+769},{140,11,185},{135,11,785},{7,11,359},{8,11,243},{140,11,175},{138,0,586},{7
+,0,1271},{134,10,73},{132,11,105},{4,0,166},{5,0,505},{134,0,1670},{133,10,576},
+{4,11,324},{138,11,104},{142,10,231},{6,0,637},{7,10,1264},{7,10,1678},{11,10,
+945},{12,10,341},{12,10,471},{12,10,569},{23,11,21},{151,11,23},{8,11,559},{141,
+11,109},{134,0,1947},{7,0,445},{8,0,307},{8,0,704},{10,0,41},{10,0,439},{11,0,
+237},{11,0,622},{140,0,201},{135,11,963},{135,0,1977},{4,0,189},{5,0,713},{136,0
+,57},{138,0,371},{135,10,538},{132,0,552},{6,0,883},{133,10,413},{6,0,923},{132,
+11,758},{138,11,215},{136,10,495},{7,10,54},{8,10,312},{10,10,191},{10,10,614},{
+140,10,567},{7,11,351},{139,11,128},{7,0,875},{6,10,468},{7,10,1478},{8,10,530},
+{142,10,290},{135,0,1788},{17,0,49},{133,11,918},{12,11,398},{20,11,39},{21,11,
+11},{150,11,41},{10,0,661},{6,10,484},{135,10,822},{135,0,1945},{134,0,794},{137
+,10,900},{135,10,1335},{6,10,1724},{135,10,2022},{132,11,340},{134,0,1135},{4,0,
+784},{133,0,745},{5,0,84},{134,0,163},{133,0,410},{4,0,976},{5,11,985},{7,11,509
+},{7,11,529},{145,11,96},{132,10,474},{134,0,703},{135,11,1919},{5,0,322},{8,0,
+186},{9,0,262},{10,0,187},{142,0,208},{135,10,1504},{133,0,227},{9,0,560},{13,0,
+208},{133,10,305},{132,11,247},{7,0,1395},{8,0,486},{9,0,236},{9,0,878},{10,0,
+218},{11,0,95},{19,0,17},{147,0,31},{7,0,2043},{8,0,672},{141,0,448},{4,11,184},
+{5,11,390},{6,11,337},{7,11,23},{7,11,494},{7,11,618},{7,11,1456},{8,11,27},{8,
+11,599},{10,11,153},{139,11,710},{135,0,466},{135,10,1236},{6,0,167},{7,0,186},{
+7,0,656},{10,0,643},{4,10,480},{6,10,302},{6,10,1642},{7,10,837},{7,10,1547},{7,
+10,1657},{8,10,429},{9,10,228},{13,10,289},{13,10,343},{147,10,101},{134,0,1428}
+,{134,0,1440},{5,0,412},{7,10,278},{10,10,739},{11,10,708},{141,10,348},{134,0,
+1118},{136,0,562},{148,11,46},{9,0,316},{139,0,256},{134,0,1771},{135,0,1190},{
+137,0,132},{10,11,227},{11,11,497},{11,11,709},{140,11,415},{143,0,66},{6,11,360
+},{7,11,1664},{136,11,478},{144,10,28},{4,0,317},{135,0,1279},{5,0,63},{133,0,
+509},{136,11,699},{145,10,36},{134,0,1475},{11,11,343},{142,11,127},{132,11,739}
+,{132,0,288},{135,11,1757},{8,0,89},{8,0,620},{9,0,608},{11,0,628},{12,0,322},{
+143,0,124},{134,0,1225},{7,0,1189},{4,11,67},{5,11,422},{6,10,363},{7,11,1037},{
+7,11,1289},{7,11,1555},{7,10,1955},{8,10,725},{9,11,741},{145,11,108},{134,0,
+1468},{6,0,689},{134,0,1451},{138,0,120},{151,0,1},{137,10,805},{142,0,329},{5,
+10,813},{135,10,2046},{135,0,226},{138,11,96},{7,0,1855},{5,10,712},{11,10,17},{
+13,10,321},{144,10,67},{9,0,461},{6,10,320},{7,10,781},{7,10,1921},{9,10,55},{10
+,10,186},{10,10,273},{10,10,664},{10,10,801},{11,10,996},{11,10,997},{13,10,157}
+,{142,10,170},{8,11,203},{8,10,271},{11,11,823},{11,11,846},{12,11,482},{13,11,
+133},{13,11,277},{13,11,302},{13,11,464},{14,11,205},{142,11,221},{135,0,1346},{
+4,11,449},{133,11,718},{134,0,85},{14,0,299},{7,10,103},{7,10,863},{11,10,184},{
+145,10,62},{4,11,355},{6,11,311},{9,11,256},{138,11,404},{137,10,659},{138,11,
+758},{133,11,827},{5,11,64},{140,11,581},{134,0,1171},{4,11,442},{7,11,1047},{7,
+11,1352},{135,11,1643},{132,0,980},{5,11,977},{6,11,288},{7,11,528},{135,11,1065
+},{5,0,279},{6,0,235},{7,0,468},{8,0,446},{9,0,637},{10,0,717},{11,0,738},{140,0
+,514},{132,0,293},{11,10,337},{142,10,303},{136,11,285},{5,0,17},{6,0,371},{9,0,
+528},{12,0,364},{132,11,254},{5,10,77},{7,10,1455},{10,10,843},{147,10,73},{150,
+0,5},{132,10,458},{6,11,12},{7,11,1219},{145,11,73},{135,10,1420},{6,10,109},{
+138,10,382},{135,11,125},{6,10,330},{7,10,1084},{139,10,142},{6,11,369},{6,11,
+502},{7,11,1036},{8,11,348},{9,11,452},{10,11,26},{11,11,224},{11,11,387},{11,11
+,772},{12,11,95},{12,11,629},{13,11,195},{13,11,207},{13,11,241},{14,11,260},{14
+,11,270},{143,11,140},{132,11,269},{5,11,480},{7,11,532},{7,11,1197},{7,11,1358}
+,{8,11,291},{11,11,349},{142,11,396},{150,0,48},{10,0,601},{13,0,353},{141,0,376
+},{5,0,779},{5,0,807},{6,0,1655},{134,0,1676},{142,11,223},{4,0,196},{5,0,558},{
+133,0,949},{148,11,15},{135,11,1764},{134,0,1322},{132,0,752},{139,0,737},{135,
+11,657},{136,11,533},{135,0,412},{4,0,227},{5,0,159},{5,0,409},{7,0,80},{8,0,556
+},{10,0,479},{12,0,418},{14,0,50},{14,0,123},{14,0,192},{14,0,249},{14,0,295},{
+143,0,27},{7,0,1470},{8,0,66},{8,0,137},{8,0,761},{9,0,638},{11,0,80},{11,0,212}
+,{11,0,368},{11,0,418},{12,0,8},{13,0,15},{16,0,61},{17,0,59},{19,0,28},{148,0,
+84},{135,10,1985},{4,11,211},{4,11,332},{5,11,335},{6,11,238},{7,11,269},{7,11,
+811},{7,11,1797},{8,10,122},{8,11,836},{9,11,507},{141,11,242},{6,0,683},{134,0,
+1252},{4,0,873},{132,10,234},{134,0,835},{6,0,38},{7,0,1220},{8,0,185},{8,0,256}
+,{9,0,22},{9,0,331},{10,0,738},{11,0,205},{11,0,540},{11,0,746},{13,0,465},{14,0
+,88},{142,0,194},{138,0,986},{5,11,1009},{12,11,582},{146,11,131},{4,0,159},{6,0
+,115},{7,0,252},{7,0,257},{7,0,1928},{8,0,69},{9,0,384},{10,0,91},{10,0,615},{12
+,0,375},{14,0,235},{18,0,117},{147,0,123},{133,0,911},{136,0,278},{5,10,430},{5,
+10,932},{6,10,131},{7,10,417},{9,10,522},{11,10,314},{141,10,390},{14,10,149},{
+14,10,399},{143,10,57},{4,0,151},{7,0,1567},{136,0,749},{5,11,228},{6,11,203},{7
+,11,156},{8,11,347},{137,11,265},{132,10,507},{10,0,989},{140,0,956},{133,0,990}
+,{5,0,194},{6,0,927},{7,0,1662},{9,0,90},{140,0,564},{4,10,343},{133,10,511},{
+133,0,425},{7,10,455},{138,10,591},{4,0,774},{7,11,476},{7,11,1592},{138,11,87},
+{5,0,971},{135,10,1381},{5,11,318},{147,11,121},{5,11,291},{7,11,765},{9,11,389}
+,{140,11,548},{134,10,575},{4,0,827},{12,0,646},{12,0,705},{12,0,712},{140,0,714
+},{139,0,752},{137,0,662},{5,0,72},{6,0,264},{7,0,21},{7,0,46},{7,0,2013},{8,0,
+215},{8,0,513},{10,0,266},{139,0,22},{139,11,522},{6,0,239},{7,0,118},{10,0,95},
+{11,0,603},{13,0,443},{14,0,160},{143,0,4},{6,0,431},{134,0,669},{7,10,1127},{7,
+10,1572},{10,10,297},{10,10,422},{11,10,764},{11,10,810},{12,10,264},{13,10,102}
+,{13,10,300},{13,10,484},{14,10,147},{14,10,229},{17,10,71},{18,10,118},{147,10,
+120},{5,0,874},{6,0,1677},{15,0,0},{10,11,525},{139,11,82},{6,0,65},{7,0,939},{7
+,0,1172},{7,0,1671},{9,0,540},{10,0,696},{11,0,265},{11,0,732},{11,0,928},{11,0,
+937},{141,0,438},{134,0,1350},{136,11,547},{132,11,422},{5,11,355},{145,11,0},{
+137,11,905},{5,0,682},{135,0,1887},{132,0,809},{4,0,696},{133,11,865},{6,0,1074}
+,{6,0,1472},{14,10,35},{142,10,191},{5,11,914},{134,11,1625},{133,11,234},{135,
+11,1383},{137,11,780},{132,10,125},{4,0,726},{133,0,630},{8,0,802},{136,0,838},{
+132,10,721},{6,0,1337},{7,0,776},{19,0,56},{136,10,145},{132,0,970},{7,10,792},{
+8,10,147},{10,10,821},{139,10,1021},{139,10,970},{8,0,940},{137,0,797},{135,11,
+1312},{9,0,248},{10,0,400},{7,11,816},{7,11,1241},{7,10,1999},{9,11,283},{9,11,
+520},{10,11,213},{10,11,307},{10,11,463},{10,11,671},{10,11,746},{11,11,401},{11
+,11,794},{12,11,517},{18,11,107},{147,11,115},{6,0,1951},{134,0,2040},{135,11,
+339},{13,0,41},{15,0,93},{5,10,168},{5,10,930},{8,10,74},{9,10,623},{12,10,500},
+{140,10,579},{6,0,118},{7,0,215},{7,0,1521},{140,0,11},{6,10,220},{7,10,1101},{
+141,10,105},{6,11,421},{7,11,61},{7,11,1540},{10,11,11},{138,11,501},{7,0,615},{
+138,0,251},{140,11,631},{135,0,1044},{6,10,19},{7,10,1413},{139,10,428},{133,0,
+225},{7,10,96},{8,10,401},{8,10,703},{137,10,896},{145,10,116},{6,11,102},{7,11,
+72},{15,11,142},{147,11,67},{7,10,1961},{7,10,1965},{8,10,702},{136,10,750},{7,
+10,2030},{8,10,150},{8,10,737},{12,10,366},{151,11,30},{4,0,370},{5,0,756},{7,0,
+1326},{135,11,823},{8,10,800},{9,10,148},{9,10,872},{9,10,890},{11,10,309},{11,
+10,1001},{13,10,267},{141,10,323},{6,0,1662},{7,0,48},{8,0,771},{10,0,116},{13,0
+,104},{14,0,105},{14,0,184},{15,0,168},{19,0,92},{148,0,68},{10,0,209},{135,11,
+1870},{7,11,68},{8,11,48},{8,11,88},{8,11,582},{8,11,681},{9,11,373},{9,11,864},
+{11,11,157},{11,11,336},{11,11,843},{148,11,27},{134,0,930},{4,11,88},{5,11,137}
+,{5,11,174},{5,11,777},{6,11,1664},{6,11,1725},{7,11,77},{7,11,426},{7,11,1317},
+{7,11,1355},{8,11,126},{8,11,563},{9,11,523},{9,11,750},{10,11,310},{10,11,836},
+{11,11,42},{11,11,318},{11,11,731},{12,11,68},{12,11,92},{12,11,507},{12,11,692}
+,{13,11,81},{13,11,238},{13,11,374},{18,11,138},{19,11,78},{19,11,111},{20,11,55
+},{20,11,77},{148,11,92},{4,11,938},{135,11,1831},{5,10,547},{7,10,424},{8,11,
+617},{138,11,351},{6,0,1286},{6,11,1668},{7,11,1499},{8,11,117},{9,11,314},{138,
+11,174},{6,0,759},{6,0,894},{7,11,707},{139,11,563},{4,0,120},{135,0,1894},{9,0,
+385},{149,0,17},{138,0,429},{133,11,403},{5,0,820},{135,0,931},{10,0,199},{133,
+10,133},{6,0,151},{6,0,1675},{7,0,383},{151,0,10},{6,0,761},{136,10,187},{8,0,
+365},{10,10,0},{10,10,818},{139,10,988},{4,11,44},{5,11,311},{6,11,156},{7,11,
+639},{7,11,762},{7,11,1827},{9,11,8},{9,11,462},{148,11,83},{4,11,346},{7,11,115
+},{9,11,180},{9,11,456},{138,11,363},{136,10,685},{7,0,1086},{145,0,46},{6,0,
+1624},{11,0,11},{12,0,422},{13,0,444},{142,0,360},{6,0,1020},{6,0,1260},{134,0,
+1589},{4,0,43},{5,0,344},{5,0,357},{14,0,472},{150,0,58},{6,0,1864},{6,0,1866},{
+6,0,1868},{6,0,1869},{6,0,1874},{6,0,1877},{6,0,1903},{6,0,1911},{9,0,920},{9,0,
+921},{9,0,924},{9,0,946},{9,0,959},{9,0,963},{9,0,970},{9,0,997},{9,0,1008},{9,0
+,1017},{12,0,795},{12,0,797},{12,0,798},{12,0,800},{12,0,803},{12,0,811},{12,0,
+820},{12,0,821},{12,0,839},{12,0,841},{12,0,848},{12,0,911},{12,0,921},{12,0,922
+},{12,0,925},{12,0,937},{12,0,944},{12,0,945},{12,0,953},{15,0,184},{15,0,191},{
+15,0,199},{15,0,237},{15,0,240},{15,0,243},{15,0,246},{18,0,203},{21,0,40},{21,0
+,52},{21,0,57},{24,0,23},{24,0,28},{152,0,30},{134,0,725},{145,11,58},{133,0,888
+},{137,10,874},{4,0,711},{8,10,774},{10,10,670},{140,10,51},{144,11,40},{6,11,
+185},{7,11,1899},{139,11,673},{137,10,701},{137,0,440},{4,11,327},{5,11,478},{7,
+11,1332},{8,11,753},{140,11,227},{4,10,127},{5,10,350},{6,10,356},{8,10,426},{9,
+10,572},{10,10,247},{139,10,312},{5,11,1020},{133,11,1022},{4,11,103},{133,11,
+401},{6,0,1913},{6,0,1926},{6,0,1959},{9,0,914},{9,0,939},{9,0,952},{9,0,979},{9
+,0,990},{9,0,998},{9,0,1003},{9,0,1023},{12,0,827},{12,0,834},{12,0,845},{12,0,
+912},{12,0,935},{12,0,951},{15,0,172},{15,0,174},{18,0,198},{149,0,63},{5,0,958}
+,{5,0,987},{4,11,499},{135,11,1421},{7,0,885},{6,10,59},{6,10,1762},{9,10,603},{
+141,10,397},{10,11,62},{141,11,164},{4,0,847},{135,0,326},{11,0,276},{142,0,293}
+,{4,0,65},{5,0,479},{5,0,1004},{7,0,1913},{8,0,317},{9,0,302},{10,0,612},{13,0,
+22},{132,11,96},{4,0,261},{135,0,510},{135,0,1514},{6,10,111},{7,10,4},{8,10,163
+},{8,10,776},{138,10,566},{4,0,291},{9,0,515},{12,0,152},{12,0,443},{13,0,392},{
+142,0,357},{7,11,399},{135,11,1492},{4,0,589},{139,0,282},{6,11,563},{135,10,
+1994},{5,10,297},{135,10,1038},{4,0,130},{7,0,843},{135,0,1562},{5,0,42},{5,0,
+879},{7,0,245},{7,0,324},{7,0,1532},{11,0,463},{11,0,472},{13,0,363},{144,0,52},
+{4,0,134},{133,0,372},{133,0,680},{136,10,363},{6,0,1997},{8,0,935},{136,0,977},
+{4,0,810},{135,0,1634},{135,10,1675},{7,0,1390},{4,11,910},{133,11,832},{7,10,
+808},{8,11,266},{139,11,578},{132,0,644},{4,0,982},{138,0,867},{132,10,280},{135
+,0,540},{140,10,54},{135,0,123},{134,0,1978},{4,10,421},{133,10,548},{6,0,623},{
+136,0,789},{4,0,908},{5,0,359},{5,0,508},{6,0,1723},{7,0,343},{7,0,1996},{135,0,
+2026},{134,0,1220},{4,0,341},{135,0,480},{6,10,254},{9,10,109},{138,10,103},{134
+,0,888},{8,11,528},{137,11,348},{7,0,1995},{8,0,299},{11,0,890},{12,0,674},{4,11
+,20},{133,11,616},{135,11,1094},{134,10,1630},{4,0,238},{5,0,503},{6,0,179},{7,0
+,2003},{8,0,381},{8,0,473},{9,0,149},{10,0,788},{15,0,45},{15,0,86},{20,0,110},{
+150,0,57},{133,10,671},{4,11,26},{5,11,429},{6,11,245},{7,11,704},{7,11,1379},{
+135,11,1474},{4,0,121},{5,0,156},{5,0,349},{9,0,431},{10,0,605},{142,0,342},{7,
+11,943},{139,11,614},{132,10,889},{132,11,621},{7,10,1382},{7,11,1382},{135,10,
+1910},{132,10,627},{133,10,775},{133,11,542},{133,11,868},{136,11,433},{6,0,1373
+},{7,0,1011},{11,10,362},{11,10,948},{140,10,388},{6,0,80},{7,0,173},{9,0,547},{
+10,0,730},{14,0,18},{22,0,39},{135,11,1495},{6,0,1694},{135,0,1974},{140,0,196},
+{4,0,923},{6,0,507},{6,0,1711},{7,10,451},{8,10,389},{12,10,490},{13,10,16},{13,
+10,215},{13,10,351},{18,10,132},{147,10,125},{6,0,646},{134,0,1047},{135,10,841}
+,{136,10,566},{6,0,1611},{135,0,1214},{139,0,926},{132,11,525},{132,0,595},{5,0,
+240},{6,0,459},{7,0,12},{7,0,114},{7,0,949},{7,0,1753},{7,0,1805},{8,0,658},{9,0
+,1},{11,0,959},{141,0,446},{5,10,912},{134,10,1695},{132,0,446},{7,11,62},{12,11
+,45},{147,11,112},{5,10,236},{6,10,572},{8,10,492},{11,10,618},{144,10,56},{5,10
+,190},{136,10,318},{135,10,1376},{4,11,223},{6,11,359},{11,11,3},{13,11,108},{14
+,11,89},{144,11,22},{132,11,647},{134,0,490},{134,0,491},{134,0,1584},{135,11,
+685},{138,11,220},{7,0,250},{136,0,507},{132,0,158},{4,0,140},{7,0,362},{8,0,209
+},{9,0,10},{9,0,160},{9,0,503},{9,0,614},{10,0,689},{11,0,327},{11,0,553},{11,0,
+725},{11,0,767},{12,0,252},{12,0,583},{13,0,192},{14,0,269},{14,0,356},{148,0,50
+},{19,0,1},{19,0,26},{150,0,9},{132,11,109},{6,0,228},{7,0,1341},{9,0,408},{138,
+0,343},{4,0,373},{5,0,283},{6,0,480},{7,0,609},{10,0,860},{138,0,878},{6,0,779},
+{134,0,1209},{4,0,557},{7,11,263},{7,11,628},{136,11,349},{132,0,548},{7,0,197},
+{8,0,142},{8,0,325},{9,0,150},{9,0,596},{10,0,350},{10,0,353},{11,0,74},{11,0,
+315},{12,0,662},{12,0,681},{14,0,423},{143,0,141},{4,11,40},{10,11,67},{11,11,
+117},{11,11,768},{139,11,935},{7,11,992},{8,11,301},{9,11,722},{12,11,63},{13,11
+,29},{14,11,161},{143,11,18},{6,0,1490},{138,11,532},{5,0,580},{7,0,378},{7,0,
+674},{7,0,1424},{15,0,83},{16,0,11},{15,11,83},{144,11,11},{6,0,1057},{6,0,1335}
+,{10,0,316},{7,10,85},{7,10,247},{8,10,585},{138,10,163},{4,0,169},{5,0,83},{6,0
+,399},{6,0,579},{6,0,1513},{7,0,692},{7,0,846},{7,0,1015},{7,0,1799},{8,0,403},{
+9,0,394},{10,0,133},{12,0,4},{12,0,297},{12,0,452},{16,0,81},{18,0,25},{21,0,14}
+,{22,0,12},{151,0,18},{134,0,1106},{7,0,1546},{11,0,299},{142,0,407},{134,0,1192
+},{132,0,177},{5,0,411},{135,0,653},{7,0,439},{10,0,727},{11,0,260},{139,0,684},
+{138,10,145},{147,10,83},{5,0,208},{7,0,753},{135,0,1528},{137,11,617},{135,10,
+1922},{135,11,825},{11,0,422},{13,0,389},{4,10,124},{10,10,457},{11,10,121},{11,
+10,169},{11,10,870},{12,10,214},{14,10,187},{143,10,77},{11,0,615},{15,0,58},{11
+,11,615},{143,11,58},{9,0,618},{138,0,482},{6,0,1952},{6,0,1970},{142,0,505},{7,
+10,1193},{135,11,1838},{133,0,242},{135,10,1333},{6,10,107},{7,10,638},{7,10,
+1632},{137,10,396},{133,0,953},{5,10,370},{134,10,1756},{5,11,28},{6,11,204},{10
+,11,320},{10,11,583},{13,11,502},{14,11,72},{14,11,274},{14,11,312},{14,11,344},
+{15,11,159},{16,11,62},{16,11,69},{17,11,30},{18,11,42},{18,11,53},{18,11,84},{
+18,11,140},{19,11,68},{19,11,85},{20,11,5},{20,11,45},{20,11,101},{22,11,7},{150
+,11,20},{4,11,558},{6,11,390},{7,11,162},{7,11,689},{9,11,360},{138,11,653},{11,
+0,802},{141,0,67},{133,10,204},{133,0,290},{5,10,970},{134,10,1706},{132,0,380},
+{5,0,52},{7,0,277},{9,0,368},{139,0,791},{5,11,856},{6,11,1672},{6,11,1757},{6,
+11,1781},{7,11,1150},{7,11,1425},{7,11,1453},{140,11,513},{5,11,92},{7,10,3},{10
+,11,736},{140,11,102},{4,0,112},{5,0,653},{5,10,483},{5,10,685},{6,10,489},{7,10
+,1204},{136,10,394},{132,10,921},{6,0,1028},{133,10,1007},{5,11,590},{9,11,213},
+{145,11,91},{135,10,1696},{10,0,138},{139,0,476},{5,0,725},{5,0,727},{135,0,1811
+},{4,0,979},{6,0,1821},{6,0,1838},{8,0,876},{8,0,883},{8,0,889},{8,0,893},{8,0,
+895},{10,0,934},{12,0,720},{14,0,459},{148,0,123},{135,11,551},{4,0,38},{6,0,435
+},{7,0,307},{7,0,999},{7,0,1481},{7,0,1732},{7,0,1738},{8,0,371},{9,0,414},{11,0
+,316},{12,0,52},{13,0,420},{147,0,100},{135,0,1296},{132,10,712},{134,10,1629},{
+133,0,723},{134,0,651},{136,11,191},{9,11,791},{10,11,93},{11,11,301},{16,11,13}
+,{17,11,23},{18,11,135},{19,11,12},{20,11,1},{20,11,12},{148,11,14},{136,11,503}
+,{6,11,466},{135,11,671},{6,0,1200},{134,0,1330},{135,0,1255},{134,0,986},{5,0,
+109},{6,0,1784},{7,0,1895},{12,0,296},{140,0,302},{135,11,983},{133,10,485},{134
+,0,660},{134,0,800},{5,0,216},{5,0,294},{6,0,591},{7,0,1879},{9,0,141},{9,0,270}
+,{9,0,679},{10,0,159},{11,0,197},{11,0,438},{12,0,538},{12,0,559},{14,0,144},{14
+,0,167},{15,0,67},{4,10,285},{5,10,317},{6,10,301},{7,10,7},{8,10,153},{10,10,
+766},{11,10,468},{12,10,467},{141,10,143},{136,0,945},{134,0,1090},{137,0,81},{
+12,11,468},{19,11,96},{148,11,24},{134,0,391},{138,11,241},{7,0,322},{136,0,249}
+,{134,0,1412},{135,11,795},{5,0,632},{138,0,526},{136,10,819},{6,0,144},{7,0,948
+},{7,0,1042},{8,0,235},{8,0,461},{9,0,453},{9,0,796},{10,0,354},{17,0,77},{135,
+11,954},{139,10,917},{6,0,940},{134,0,1228},{4,0,362},{7,0,52},{135,0,303},{6,11
+,549},{8,11,34},{8,11,283},{9,11,165},{138,11,475},{7,11,370},{7,11,1007},{7,11,
+1177},{135,11,1565},{5,11,652},{5,11,701},{135,11,449},{5,0,196},{6,0,486},{7,0,
+212},{8,0,309},{136,0,346},{6,10,1719},{6,10,1735},{7,10,2016},{7,10,2020},{8,10
+,837},{137,10,852},{6,11,159},{6,11,364},{7,11,516},{7,11,1439},{137,11,518},{
+135,0,1912},{135,0,1290},{132,0,686},{141,11,151},{138,0,625},{136,0,706},{138,
+10,568},{139,0,412},{4,0,30},{133,0,43},{8,10,67},{138,10,419},{7,0,967},{141,0,
+11},{12,0,758},{14,0,441},{142,0,462},{10,10,657},{14,10,297},{142,10,361},{139,
+10,729},{4,0,220},{135,0,1535},{7,11,501},{9,11,111},{10,11,141},{11,11,332},{13
+,11,43},{13,11,429},{14,11,130},{14,11,415},{145,11,102},{4,0,950},{6,0,1859},{7
+,0,11},{8,0,873},{12,0,710},{12,0,718},{12,0,748},{12,0,765},{148,0,124},{5,11,
+149},{5,11,935},{136,11,233},{142,11,291},{134,0,1579},{7,0,890},{8,10,51},{9,10
+,868},{10,10,833},{12,10,481},{12,10,570},{148,10,106},{141,0,2},{132,10,445},{
+136,11,801},{135,0,1774},{7,0,1725},{138,0,393},{5,0,263},{134,0,414},{132,11,
+322},{133,10,239},{7,0,456},{7,10,1990},{8,10,130},{139,10,720},{137,0,818},{5,
+10,123},{6,10,530},{7,10,348},{135,10,1419},{135,10,2024},{6,0,178},{6,0,1750},{
+8,0,251},{9,0,690},{10,0,155},{10,0,196},{10,0,373},{11,0,698},{13,0,155},{148,0
+,93},{5,0,97},{137,0,393},{134,0,674},{11,0,223},{140,0,168},{132,10,210},{139,
+11,464},{6,0,1639},{146,0,159},{139,11,2},{7,0,934},{8,0,647},{17,0,97},{19,0,59
+},{150,0,2},{132,0,191},{5,0,165},{9,0,346},{10,0,655},{11,0,885},{4,10,430},{
+135,11,357},{133,0,877},{5,10,213},{133,11,406},{8,0,128},{139,0,179},{6,11,69},
+{135,11,117},{135,0,1297},{11,11,43},{13,11,72},{141,11,142},{135,11,1830},{142,
+0,164},{5,0,57},{6,0,101},{6,0,586},{6,0,1663},{7,0,132},{7,0,1154},{7,0,1415},{
+7,0,1507},{12,0,493},{15,0,105},{151,0,15},{5,0,459},{7,0,1073},{8,0,241},{136,0
+,334},{133,11,826},{133,10,108},{5,10,219},{10,11,132},{11,11,191},{11,11,358},{
+139,11,460},{6,0,324},{6,0,520},{7,0,338},{7,0,1729},{8,0,228},{139,0,750},{21,0
+,30},{22,0,53},{4,10,193},{5,10,916},{7,10,364},{10,10,398},{10,10,726},{11,10,
+317},{11,10,626},{12,10,142},{12,10,288},{12,10,678},{13,10,313},{15,10,113},{
+146,10,114},{6,11,110},{135,11,1681},{135,0,910},{6,10,241},{7,10,907},{8,10,832
+},{9,10,342},{10,10,729},{11,10,284},{11,10,445},{11,10,651},{11,10,863},{13,10,
+398},{146,10,99},{7,0,705},{9,0,734},{5,11,1000},{7,11,733},{137,11,583},{4,0,73
+},{6,0,612},{7,0,927},{7,0,1822},{8,0,217},{9,0,765},{9,0,766},{10,0,408},{11,0,
+51},{11,0,793},{12,0,266},{15,0,158},{20,0,89},{150,0,32},{7,0,1330},{4,11,297},
+{6,11,529},{7,11,152},{7,11,713},{7,11,1845},{8,11,710},{8,11,717},{140,11,639},
+{5,0,389},{136,0,636},{134,0,1409},{4,10,562},{9,10,254},{139,10,879},{134,0,893
+},{132,10,786},{4,11,520},{135,11,575},{136,0,21},{140,0,721},{136,0,959},{7,11,
+1428},{7,11,1640},{9,11,169},{9,11,182},{9,11,367},{9,11,478},{9,11,506},{9,11,
+551},{9,11,648},{9,11,651},{9,11,697},{9,11,705},{9,11,725},{9,11,787},{9,11,794
+},{10,11,198},{10,11,214},{10,11,267},{10,11,275},{10,11,456},{10,11,551},{10,11
+,561},{10,11,613},{10,11,627},{10,11,668},{10,11,675},{10,11,691},{10,11,695},{
+10,11,707},{10,11,715},{11,11,183},{11,11,201},{11,11,244},{11,11,262},{11,11,
+352},{11,11,439},{11,11,493},{11,11,572},{11,11,591},{11,11,608},{11,11,611},{11
+,11,646},{11,11,674},{11,11,711},{11,11,751},{11,11,761},{11,11,776},{11,11,785}
+,{11,11,850},{11,11,853},{11,11,862},{11,11,865},{11,11,868},{11,11,898},{11,11,
+902},{11,11,903},{11,11,910},{11,11,932},{11,11,942},{11,11,957},{11,11,967},{11
+,11,972},{12,11,148},{12,11,195},{12,11,220},{12,11,237},{12,11,318},{12,11,339}
+,{12,11,393},{12,11,445},{12,11,450},{12,11,474},{12,11,509},{12,11,533},{12,11,
+591},{12,11,594},{12,11,597},{12,11,621},{12,11,633},{12,11,642},{13,11,59},{13,
+11,60},{13,11,145},{13,11,239},{13,11,250},{13,11,273},{13,11,329},{13,11,344},{
+13,11,365},{13,11,372},{13,11,387},{13,11,403},{13,11,414},{13,11,456},{13,11,
+478},{13,11,483},{13,11,489},{14,11,55},{14,11,57},{14,11,81},{14,11,90},{14,11,
+148},{14,11,239},{14,11,266},{14,11,321},{14,11,326},{14,11,327},{14,11,330},{14
+,11,347},{14,11,355},{14,11,401},{14,11,411},{14,11,414},{14,11,416},{14,11,420}
+,{15,11,61},{15,11,74},{15,11,87},{15,11,88},{15,11,94},{15,11,96},{15,11,116},{
+15,11,149},{15,11,154},{16,11,50},{16,11,63},{16,11,73},{17,11,2},{17,11,66},{17
+,11,92},{17,11,103},{17,11,112},{18,11,50},{18,11,54},{18,11,82},{18,11,86},{18,
+11,90},{18,11,111},{18,11,115},{18,11,156},{19,11,40},{19,11,79},{20,11,78},{149
+,11,22},{137,11,170},{134,0,1433},{135,11,1307},{139,11,411},{5,0,189},{7,0,442}
+,{7,0,443},{8,0,281},{12,0,174},{141,0,261},{6,10,216},{7,10,901},{7,10,1343},{
+136,10,493},{5,11,397},{6,11,154},{7,10,341},{7,11,676},{8,11,443},{8,11,609},{9
+,11,24},{9,11,325},{10,11,35},{11,10,219},{11,11,535},{11,11,672},{11,11,1018},{
+12,11,637},{144,11,30},{6,0,2},{7,0,191},{7,0,446},{7,0,1262},{7,0,1737},{8,0,22
+},{8,0,270},{8,0,612},{9,0,4},{9,0,312},{9,0,436},{9,0,626},{10,0,216},{10,0,311
+},{10,0,521},{10,0,623},{11,0,72},{11,0,330},{11,0,455},{12,0,321},{12,0,504},{
+12,0,530},{12,0,543},{13,0,17},{13,0,156},{13,0,334},{14,0,131},{17,0,60},{148,0
+,64},{7,0,354},{10,0,410},{139,0,815},{139,10,130},{7,10,1734},{137,11,631},{12,
+0,425},{15,0,112},{10,10,115},{11,10,420},{13,10,404},{14,10,346},{143,10,54},{6
+,0,60},{6,0,166},{7,0,374},{7,0,670},{7,0,1327},{8,0,411},{8,0,435},{9,0,653},{9
+,0,740},{10,0,385},{11,0,222},{11,0,324},{11,0,829},{140,0,611},{7,0,1611},{13,0
+,14},{15,0,44},{19,0,13},{148,0,76},{133,11,981},{4,11,56},{7,11,1791},{8,11,607
+},{8,11,651},{11,11,465},{11,11,835},{12,11,337},{141,11,480},{6,0,1478},{5,10,
+1011},{136,10,701},{139,0,596},{5,0,206},{134,0,398},{4,10,54},{5,10,666},{7,10,
+1039},{7,10,1130},{9,10,195},{138,10,302},{7,0,50},{9,11,158},{138,11,411},{135,
+11,1120},{6,0,517},{7,0,1159},{10,0,621},{11,0,192},{134,10,1669},{4,0,592},{6,0
+,600},{135,0,1653},{10,0,223},{139,0,645},{136,11,139},{7,0,64},{136,0,245},{142
+,0,278},{6,11,622},{135,11,1030},{136,0,604},{134,0,1502},{138,0,265},{141,11,
+168},{7,0,1763},{140,0,310},{7,10,798},{139,11,719},{7,11,160},{10,11,624},{142,
+11,279},{132,11,363},{7,10,122},{9,10,259},{10,10,84},{11,10,470},{12,10,541},{
+141,10,379},{5,0,129},{6,0,61},{135,0,947},{134,0,1356},{135,11,1191},{13,0,505}
+,{141,0,506},{11,0,1000},{5,10,82},{5,10,131},{7,10,1755},{8,10,31},{9,10,168},{
+9,10,764},{139,10,869},{134,0,966},{134,10,605},{134,11,292},{5,11,177},{6,11,
+616},{7,11,827},{9,11,525},{138,11,656},{135,11,1486},{138,11,31},{5,10,278},{
+137,10,68},{4,10,163},{5,10,201},{5,10,307},{5,10,310},{6,10,335},{7,10,284},{
+136,10,165},{6,0,839},{135,10,1660},{136,10,781},{6,10,33},{135,10,1244},{133,0,
+637},{4,11,161},{133,11,631},{137,0,590},{7,10,1953},{136,10,720},{5,0,280},{7,0
+,1226},{138,10,203},{134,0,1386},{5,0,281},{6,0,1026},{6,10,326},{7,10,677},{137
+,10,425},{7,11,1557},{135,11,1684},{135,0,1064},{9,11,469},{9,11,709},{12,11,512
+},{14,11,65},{145,11,12},{134,0,917},{10,11,229},{11,11,73},{11,11,376},{139,11,
+433},{7,0,555},{9,0,192},{13,0,30},{13,0,49},{15,0,150},{16,0,76},{20,0,52},{7,
+10,1316},{7,10,1412},{7,10,1839},{9,10,589},{11,10,241},{11,10,676},{11,10,811},
+{11,10,891},{12,10,140},{12,10,346},{12,10,479},{13,10,381},{14,10,188},{146,10,
+30},{149,0,15},{6,0,1882},{6,0,1883},{6,0,1897},{9,0,945},{9,0,1014},{9,0,1020},
+{12,0,823},{12,0,842},{12,0,866},{12,0,934},{15,0,242},{146,0,208},{6,0,965},{
+134,0,1499},{7,0,33},{7,0,120},{8,0,489},{9,0,319},{10,0,820},{11,0,1004},{12,0,
+379},{12,0,679},{13,0,117},{13,0,412},{14,0,25},{15,0,52},{15,0,161},{16,0,47},{
+149,0,2},{6,11,558},{7,11,651},{8,11,421},{9,11,0},{138,11,34},{4,0,937},{5,0,
+801},{7,0,473},{5,10,358},{7,10,1184},{10,10,662},{13,10,212},{13,10,304},{13,10
+,333},{145,10,98},{132,0,877},{6,0,693},{134,0,824},{132,0,365},{7,11,1832},{138
+,11,374},{5,0,7},{139,0,774},{4,0,734},{5,0,662},{134,0,430},{4,0,746},{135,0,
+1090},{5,0,360},{8,0,237},{10,0,231},{147,0,124},{138,11,348},{6,11,6},{7,11,81}
+,{7,11,771},{7,11,1731},{9,11,405},{138,11,421},{6,0,740},{137,0,822},{133,10,
+946},{7,0,1485},{136,0,929},{7,10,411},{8,10,631},{9,10,323},{10,10,355},{11,10,
+491},{12,10,143},{12,10,402},{13,10,73},{14,10,408},{15,10,107},{146,10,71},{135
+,10,590},{5,11,881},{133,11,885},{150,11,25},{4,0,852},{5,11,142},{134,11,546},{
+7,10,1467},{8,10,328},{10,10,544},{11,10,955},{13,10,320},{145,10,83},{9,0,17},{
+10,0,291},{11,10,511},{13,10,394},{14,10,298},{14,10,318},{146,10,103},{5,11,466
+},{11,11,571},{12,11,198},{13,11,283},{14,11,186},{15,11,21},{143,11,103},{134,0
+,1001},{4,11,185},{5,11,257},{5,11,839},{5,11,936},{7,11,171},{9,11,399},{10,11,
+258},{10,11,395},{10,11,734},{11,11,1014},{12,11,23},{13,11,350},{14,11,150},{
+147,11,6},{143,0,35},{132,0,831},{5,10,835},{134,10,483},{4,0,277},{5,0,608},{6,
+0,493},{7,0,457},{12,0,384},{7,11,404},{7,11,1377},{7,11,1430},{7,11,2017},{8,11
+,149},{8,11,239},{8,11,512},{8,11,793},{8,11,818},{9,11,474},{9,11,595},{10,11,
+122},{10,11,565},{10,11,649},{10,11,783},{11,11,239},{11,11,295},{11,11,447},{11
+,11,528},{11,11,639},{11,11,800},{11,11,936},{12,11,25},{12,11,73},{12,11,77},{
+12,11,157},{12,11,316},{12,11,390},{12,11,391},{12,11,394},{12,11,395},{12,11,
+478},{12,11,503},{12,11,592},{12,11,680},{13,11,50},{13,11,53},{13,11,132},{13,
+11,198},{13,11,275},{13,11,322},{13,11,415},{14,11,71},{14,11,257},{14,11,395},{
+15,11,71},{15,11,136},{17,11,123},{18,11,93},{147,11,58},{134,0,1351},{7,0,27},{
+135,0,316},{136,11,712},{136,0,984},{133,0,552},{137,0,264},{132,0,401},{6,0,710
+},{6,0,1111},{134,0,1343},{134,0,1211},{9,0,543},{10,0,524},{11,0,108},{11,0,653
+},{12,0,524},{13,0,123},{14,0,252},{16,0,18},{19,0,38},{20,0,26},{20,0,65},{21,0
+,3},{151,0,11},{4,0,205},{5,0,623},{7,0,104},{8,0,519},{137,0,716},{132,10,677},
+{4,11,377},{152,11,13},{135,11,1673},{7,0,579},{9,0,41},{9,0,244},{9,0,669},{10,
+0,5},{11,0,861},{11,0,951},{139,0,980},{132,0,717},{136,0,1011},{132,0,805},{4,
+11,180},{135,11,1906},{132,10,777},{132,10,331},{132,0,489},{6,0,1024},{4,11,491
+},{133,10,747},{135,11,1182},{4,11,171},{138,11,234},{4,11,586},{7,11,1186},{138
+,11,631},{135,0,892},{135,11,336},{9,11,931},{10,11,334},{148,11,71},{137,0,473}
+,{6,0,864},{12,0,659},{139,11,926},{7,0,819},{9,0,26},{9,0,392},{10,0,152},{10,0
+,226},{11,0,19},{12,0,276},{12,0,426},{12,0,589},{13,0,460},{15,0,97},{19,0,48},
+{148,0,104},{135,0,51},{133,10,326},{4,10,691},{146,10,16},{9,0,130},{11,0,765},
+{10,10,680},{10,10,793},{141,10,357},{133,11,765},{8,0,229},{6,10,32},{7,10,385}
+,{7,10,757},{7,10,1916},{8,10,94},{8,10,711},{9,10,541},{10,10,162},{10,10,795},
+{11,10,989},{11,10,1010},{12,10,14},{142,10,308},{7,11,474},{137,11,578},{132,0,
+674},{132,0,770},{5,0,79},{7,0,1027},{7,0,1477},{139,0,52},{133,11,424},{134,0,
+1666},{6,0,409},{6,10,349},{6,10,1682},{7,10,1252},{8,10,112},{8,11,714},{9,10,
+435},{9,10,668},{10,10,290},{10,10,319},{10,10,815},{11,10,180},{11,10,837},{12,
+10,240},{13,10,152},{13,10,219},{142,10,158},{5,0,789},{134,0,195},{4,0,251},{4,
+0,688},{7,0,513},{135,0,1284},{132,10,581},{9,11,420},{10,11,269},{10,11,285},{
+10,11,576},{11,11,397},{13,11,175},{145,11,90},{6,10,126},{7,10,573},{8,10,397},
+{142,10,44},{132,11,429},{133,0,889},{4,0,160},{5,0,330},{7,0,1434},{136,0,174},
+{7,11,18},{7,11,699},{7,11,1966},{8,11,752},{9,11,273},{9,11,412},{9,11,703},{10
+,11,71},{10,11,427},{10,11,508},{146,11,97},{6,0,872},{134,0,899},{133,10,926},{
+134,0,1126},{134,0,918},{4,11,53},{5,11,186},{135,11,752},{7,0,268},{136,0,569},
+{134,0,1224},{6,0,1361},{7,10,1232},{137,10,531},{8,11,575},{10,11,289},{139,11,
+319},{133,10,670},{132,11,675},{133,0,374},{135,10,1957},{133,0,731},{11,0,190},
+{15,0,49},{11,11,190},{143,11,49},{4,0,626},{5,0,506},{5,0,642},{6,0,425},{10,0,
+202},{139,0,141},{137,0,444},{7,10,242},{135,10,1942},{6,11,209},{8,11,468},{9,
+11,210},{11,11,36},{12,11,28},{12,11,630},{13,11,21},{13,11,349},{14,11,7},{145,
+11,13},{4,11,342},{135,11,1179},{5,10,834},{7,10,1202},{8,10,14},{9,10,481},{137
+,10,880},{4,11,928},{133,11,910},{4,11,318},{4,11,496},{7,11,856},{139,11,654},{
+136,0,835},{7,0,1526},{138,10,465},{151,0,17},{135,0,477},{4,10,357},{6,10,172},
+{7,10,143},{137,10,413},{6,0,1374},{138,0,994},{18,0,76},{132,10,590},{7,0,287},
+{8,0,355},{9,0,293},{137,0,743},{134,0,1389},{7,11,915},{8,11,247},{147,11,0},{4
+,11,202},{5,11,382},{6,11,454},{7,11,936},{7,11,1803},{8,11,758},{9,11,375},{9,
+11,895},{10,11,743},{10,11,792},{11,11,978},{11,11,1012},{142,11,109},{5,0,384},
+{8,0,455},{140,0,48},{132,11,390},{5,10,169},{7,10,333},{136,10,45},{5,0,264},{
+134,0,184},{138,11,791},{133,11,717},{132,10,198},{6,11,445},{7,11,332},{137,11,
+909},{136,0,1001},{4,10,24},{5,10,140},{5,10,185},{7,10,1500},{11,10,565},{139,
+10,838},{134,11,578},{5,0,633},{6,0,28},{135,0,1323},{132,0,851},{136,11,267},{7
+,0,359},{8,0,243},{140,0,175},{4,10,334},{133,10,593},{141,11,87},{136,11,766},{
+10,0,287},{12,0,138},{10,11,287},{140,11,138},{4,0,105},{132,0,740},{140,10,116}
+,{134,0,857},{135,11,1841},{6,0,1402},{137,0,819},{132,11,584},{132,10,709},{133
+,10,897},{5,0,224},{13,0,174},{146,0,52},{135,10,1840},{4,10,608},{133,10,497},{
+139,11,60},{4,0,758},{135,0,1649},{4,11,226},{4,11,326},{135,11,1770},{5,11,426}
+,{8,11,30},{9,11,2},{11,11,549},{147,11,122},{135,10,2039},{6,10,540},{136,10,
+136},{4,0,573},{8,0,655},{4,10,897},{133,10,786},{7,0,351},{139,0,128},{133,10,
+999},{4,10,299},{135,10,1004},{133,0,918},{132,11,345},{4,11,385},{7,11,265},{
+135,11,587},{133,10,456},{136,10,180},{6,0,687},{134,0,1537},{4,11,347},{5,11,
+423},{5,11,996},{135,11,1329},{132,10,755},{7,11,1259},{9,11,125},{11,11,65},{
+140,11,285},{5,11,136},{6,11,136},{136,11,644},{134,0,1525},{4,0,1009},{135,0,
+1139},{139,10,338},{132,0,340},{135,10,1464},{8,0,847},{10,0,861},{10,0,876},{10
+,0,889},{10,0,922},{10,0,929},{10,0,933},{12,0,784},{140,0,791},{139,0,176},{9,
+11,134},{10,11,2},{10,11,27},{10,11,333},{11,11,722},{143,11,1},{4,11,433},{133,
+11,719},{5,0,985},{7,0,509},{7,0,529},{145,0,96},{132,0,615},{4,10,890},{5,10,
+805},{5,10,819},{5,10,961},{6,10,396},{6,10,1631},{6,10,1678},{7,10,1967},{7,10,
+2041},{9,10,630},{11,10,8},{11,10,1019},{12,10,176},{13,10,225},{14,10,292},{149
+,10,24},{135,0,1919},{134,0,1131},{144,11,21},{144,11,51},{135,10,1815},{4,0,247
+},{7,10,1505},{10,10,190},{10,10,634},{11,10,792},{12,10,358},{140,10,447},{5,10
+,0},{6,10,536},{7,10,604},{13,10,445},{145,10,126},{4,0,184},{5,0,390},{6,0,337}
+,{7,0,23},{7,0,494},{7,0,618},{7,0,1456},{8,0,27},{8,0,599},{10,0,153},{139,0,
+710},{6,10,232},{6,10,412},{7,10,1074},{8,10,9},{8,10,157},{8,10,786},{9,10,196}
+,{9,10,352},{9,10,457},{10,10,337},{11,10,232},{11,10,877},{12,10,480},{140,10,
+546},{13,0,38},{135,10,958},{4,10,382},{136,10,579},{4,10,212},{135,10,1206},{4,
+11,555},{8,11,536},{138,11,288},{11,11,139},{139,11,171},{9,11,370},{138,11,90},
+{132,0,1015},{134,0,1088},{5,10,655},{135,11,977},{134,0,1585},{17,10,67},{147,
+10,74},{10,0,227},{11,0,497},{11,0,709},{140,0,415},{6,0,360},{7,0,1664},{136,0,
+478},{7,0,95},{6,10,231},{136,10,423},{140,11,65},{4,11,257},{135,11,2031},{135,
+11,1768},{133,10,300},{139,11,211},{136,0,699},{6,10,237},{7,10,611},{8,10,100},
+{9,10,416},{11,10,335},{12,10,173},{146,10,101},{14,0,26},{146,0,150},{6,0,581},
+{135,0,1119},{135,10,1208},{132,0,739},{6,11,83},{6,11,1733},{135,11,1389},{137,
+0,869},{4,0,67},{5,0,422},{7,0,1037},{7,0,1289},{7,0,1555},{9,0,741},{145,0,108}
+,{133,10,199},{12,10,427},{146,10,38},{136,0,464},{142,0,42},{10,0,96},{8,11,501
+},{137,11,696},{134,11,592},{4,0,512},{4,0,966},{5,0,342},{6,0,1855},{8,0,869},{
+8,0,875},{8,0,901},{144,0,26},{8,0,203},{11,0,823},{11,0,846},{12,0,482},{13,0,
+277},{13,0,302},{13,0,464},{14,0,205},{142,0,221},{4,0,449},{133,0,718},{7,11,
+1718},{9,11,95},{9,11,274},{10,11,279},{10,11,317},{10,11,420},{11,11,303},{11,
+11,808},{12,11,134},{12,11,367},{13,11,149},{13,11,347},{14,11,349},{14,11,406},
+{18,11,22},{18,11,89},{18,11,122},{147,11,47},{133,11,26},{4,0,355},{6,0,311},{9
+,0,256},{138,0,404},{132,11,550},{10,0,758},{6,10,312},{6,10,1715},{10,10,584},{
+11,10,546},{11,10,692},{12,10,259},{12,10,295},{13,10,46},{141,10,154},{136,11,
+822},{5,0,827},{4,11,902},{5,11,809},{6,11,122},{135,11,896},{5,0,64},{140,0,581
+},{4,0,442},{6,0,739},{7,0,1047},{7,0,1352},{7,0,1643},{7,11,1911},{9,11,449},{
+10,11,192},{138,11,740},{135,11,262},{132,10,588},{133,11,620},{5,0,977},{6,0,
+288},{7,0,528},{4,11,34},{5,11,574},{7,11,279},{7,11,1624},{136,11,601},{6,0,
+1375},{4,10,231},{5,10,61},{6,10,104},{7,10,729},{7,10,964},{7,10,1658},{140,10,
+414},{6,10,263},{138,10,757},{132,10,320},{4,0,254},{7,0,1309},{5,11,332},{135,
+11,1309},{6,11,261},{8,11,182},{139,11,943},{132,10,225},{6,0,12},{135,0,1219},{
+4,0,275},{12,0,376},{6,11,1721},{141,11,490},{4,11,933},{133,11,880},{6,0,951},{
+6,0,1109},{6,0,1181},{7,0,154},{4,10,405},{7,10,817},{14,10,58},{17,10,37},{146,
+10,124},{6,0,1520},{133,10,974},{134,0,1753},{6,0,369},{6,0,502},{7,0,1036},{8,0
+,348},{9,0,452},{10,0,26},{11,0,224},{11,0,387},{11,0,772},{12,0,95},{12,0,629},
+{13,0,195},{13,0,207},{13,0,241},{14,0,260},{14,0,270},{143,0,140},{132,0,269},{
+5,0,480},{7,0,532},{7,0,1197},{7,0,1358},{8,0,291},{11,0,349},{142,0,396},{5,10,
+235},{7,10,1239},{11,10,131},{140,10,370},{7,10,956},{7,10,1157},{7,10,1506},{7,
+10,1606},{7,10,1615},{7,10,1619},{7,10,1736},{7,10,1775},{8,10,590},{9,10,324},{
+9,10,736},{9,10,774},{9,10,776},{9,10,784},{10,10,567},{10,10,708},{11,10,518},{
+11,10,613},{11,10,695},{11,10,716},{11,10,739},{11,10,770},{11,10,771},{11,10,
+848},{11,10,857},{11,10,931},{11,10,947},{12,10,326},{12,10,387},{12,10,484},{12
+,10,528},{12,10,552},{12,10,613},{13,10,189},{13,10,256},{13,10,340},{13,10,432}
+,{13,10,436},{13,10,440},{13,10,454},{14,10,174},{14,10,220},{14,10,284},{14,10,
+390},{145,10,121},{8,11,598},{9,11,664},{138,11,441},{9,10,137},{138,10,221},{
+133,11,812},{148,0,15},{134,0,1341},{6,0,1017},{4,11,137},{7,11,1178},{135,11,
+1520},{7,10,390},{138,10,140},{7,11,1260},{135,11,1790},{137,11,191},{135,10,
+1144},{6,0,1810},{7,0,657},{8,0,886},{10,0,857},{14,0,440},{144,0,96},{8,0,533},
+{6,11,1661},{7,11,1975},{7,11,2009},{135,11,2011},{6,0,1453},{134,10,464},{132,
+11,715},{5,10,407},{11,10,204},{11,10,243},{11,10,489},{12,10,293},{19,10,37},{
+20,10,73},{150,10,38},{133,11,703},{4,0,211},{7,0,1483},{5,10,325},{8,10,5},{8,
+10,227},{9,10,105},{10,10,585},{140,10,614},{4,0,332},{5,0,335},{6,0,238},{7,0,
+269},{7,0,811},{7,0,1797},{8,0,836},{9,0,507},{141,0,242},{5,11,89},{7,11,1915},
+{9,11,185},{9,11,235},{9,11,496},{10,11,64},{10,11,270},{10,11,403},{10,11,469},
+{10,11,529},{10,11,590},{11,11,140},{11,11,860},{13,11,1},{13,11,422},{14,11,341
+},{14,11,364},{17,11,93},{18,11,113},{19,11,97},{147,11,113},{133,11,695},{16,0,
+19},{5,11,6},{6,11,183},{6,10,621},{7,11,680},{7,11,978},{7,11,1013},{7,11,1055}
+,{12,11,230},{13,11,172},{13,10,504},{146,11,29},{136,0,156},{133,0,1009},{6,11,
+29},{139,11,63},{134,0,820},{134,10,218},{7,10,454},{7,10,782},{8,10,768},{140,
+10,686},{5,0,228},{6,0,203},{7,0,156},{8,0,347},{9,0,265},{18,0,39},{20,0,54},{
+21,0,31},{22,0,3},{23,0,0},{15,11,8},{18,11,39},{20,11,54},{21,11,31},{22,11,3},
+{151,11,0},{7,0,1131},{135,0,1468},{144,10,0},{134,0,1276},{10,10,676},{140,10,
+462},{132,11,311},{134,11,1740},{7,11,170},{8,11,90},{8,11,177},{8,11,415},{11,
+11,714},{142,11,281},{134,10,164},{6,0,1792},{138,0,849},{150,10,50},{5,0,291},{
+5,0,318},{7,0,765},{9,0,389},{12,0,548},{8,11,522},{142,11,328},{11,11,91},{13,
+11,129},{15,11,101},{145,11,125},{4,11,494},{6,11,74},{7,11,44},{7,11,407},{8,11
+,551},{12,11,17},{15,11,5},{148,11,11},{4,11,276},{133,11,296},{6,10,343},{7,10,
+195},{7,11,1777},{9,10,226},{10,10,197},{10,10,575},{11,10,502},{139,10,899},{10
+,0,525},{139,0,82},{14,0,453},{4,11,7},{5,11,90},{5,11,158},{6,11,542},{7,11,221
+},{7,11,1574},{9,11,490},{10,11,540},{11,11,443},{139,11,757},{135,0,666},{22,10
+,29},{150,11,29},{4,0,422},{147,10,8},{5,0,355},{145,0,0},{6,0,1873},{9,0,918},{
+7,11,588},{9,11,175},{138,11,530},{143,11,31},{11,0,165},{7,10,1125},{9,10,143},
+{14,10,405},{150,10,21},{9,0,260},{137,0,905},{5,11,872},{6,11,57},{6,11,479},{6
+,11,562},{7,11,471},{7,11,1060},{9,11,447},{9,11,454},{141,11,6},{138,11,704},{
+133,0,865},{5,0,914},{134,0,1625},{133,0,234},{7,0,1383},{5,11,31},{6,11,614},{
+145,11,61},{7,11,1200},{138,11,460},{6,11,424},{135,11,1866},{136,0,306},{5,10,
+959},{12,11,30},{13,11,148},{14,11,87},{14,11,182},{16,11,42},{18,11,92},{148,11
+,70},{6,0,1919},{6,0,1921},{9,0,923},{9,0,930},{9,0,941},{9,0,949},{9,0,987},{9,
+0,988},{9,0,992},{12,0,802},{12,0,815},{12,0,856},{12,0,885},{12,0,893},{12,0,
+898},{12,0,919},{12,0,920},{12,0,941},{12,0,947},{15,0,183},{15,0,185},{15,0,189
+},{15,0,197},{15,0,202},{15,0,233},{18,0,218},{18,0,219},{18,0,233},{143,11,156}
+,{135,10,1759},{136,10,173},{13,0,163},{13,0,180},{18,0,78},{20,0,35},{5,11,13},
+{134,11,142},{134,10,266},{6,11,97},{7,11,116},{8,11,322},{8,11,755},{9,11,548},
+{10,11,714},{11,11,884},{141,11,324},{135,0,1312},{9,0,814},{137,11,676},{133,0,
+707},{135,0,1493},{6,0,421},{7,0,61},{7,0,1540},{10,0,11},{138,0,501},{12,0,733}
+,{12,0,766},{7,11,866},{135,11,1163},{137,0,341},{142,0,98},{145,11,115},{135,11
+,1111},{136,10,300},{136,0,1014},{8,11,1},{9,11,112},{138,11,326},{132,11,730},{
+5,11,488},{6,11,527},{7,11,489},{7,11,1636},{8,11,121},{8,11,144},{8,11,359},{9,
+11,193},{9,11,241},{9,11,336},{9,11,882},{11,11,266},{11,11,372},{11,11,944},{12
+,11,401},{140,11,641},{6,0,971},{134,0,1121},{6,0,102},{7,0,72},{15,0,142},{147,
+0,67},{151,0,30},{135,0,823},{134,0,1045},{5,10,427},{5,10,734},{7,10,478},{136,
+10,52},{7,0,1930},{11,10,217},{142,10,165},{6,0,1512},{135,0,1870},{9,11,31},{10
+,11,244},{10,11,699},{12,11,149},{141,11,497},{133,11,377},{145,11,101},{10,11,
+158},{13,11,13},{13,11,137},{13,11,258},{14,11,111},{14,11,225},{14,11,253},{14,
+11,304},{14,11,339},{14,11,417},{146,11,33},{6,0,87},{6,10,1734},{7,10,20},{7,10
+,1056},{8,10,732},{9,10,406},{9,10,911},{138,10,694},{134,0,1243},{137,0,245},{7
+,0,68},{8,0,48},{8,0,88},{8,0,582},{8,0,681},{9,0,373},{9,0,864},{11,0,157},{11,
+0,336},{11,0,843},{148,0,27},{8,11,663},{144,11,8},{133,10,613},{4,0,88},{5,0,
+137},{5,0,174},{5,0,777},{6,0,1664},{6,0,1725},{7,0,77},{7,0,426},{7,0,1317},{7,
+0,1355},{8,0,126},{8,0,563},{9,0,523},{9,0,750},{10,0,310},{10,0,836},{11,0,42},
+{11,0,318},{11,0,731},{12,0,68},{12,0,92},{12,0,507},{12,0,692},{13,0,81},{13,0,
+238},{13,0,374},{14,0,436},{18,0,138},{19,0,78},{19,0,111},{20,0,55},{20,0,77},{
+148,0,92},{141,0,418},{4,0,938},{137,0,625},{138,0,351},{5,11,843},{7,10,32},{7,
+10,984},{8,10,85},{8,10,709},{9,10,579},{9,10,847},{9,10,856},{10,10,799},{11,10
+,258},{11,10,1007},{12,10,331},{12,10,615},{13,10,188},{13,10,435},{14,10,8},{15
+,10,165},{16,10,27},{148,10,40},{6,0,1668},{7,0,1499},{8,0,117},{9,0,314},{138,0
+,174},{135,0,707},{132,11,554},{133,11,536},{5,0,403},{5,11,207},{9,11,79},{11,
+11,625},{145,11,7},{132,11,424},{136,11,785},{4,10,167},{135,10,82},{9,0,7},{23,
+0,6},{9,11,7},{151,11,6},{6,0,282},{5,10,62},{6,10,534},{7,10,74},{7,10,678},{7,
+10,684},{7,10,1043},{7,10,1072},{8,10,280},{8,10,541},{8,10,686},{9,10,258},{10,
+10,519},{11,10,252},{140,10,282},{138,10,33},{132,10,359},{4,0,44},{5,0,311},{6,
+0,156},{7,0,639},{7,0,762},{7,0,1827},{9,0,8},{9,0,462},{148,0,83},{7,11,769},{9
+,11,18},{138,11,358},{4,0,346},{7,0,115},{9,0,180},{9,0,456},{10,0,363},{4,11,
+896},{134,11,1777},{133,10,211},{7,0,761},{7,0,1051},{137,0,545},{6,10,145},{141
+,10,336},{7,11,750},{9,11,223},{11,11,27},{11,11,466},{12,11,624},{14,11,265},{
+146,11,61},{6,0,752},{6,0,768},{6,0,1195},{6,0,1254},{6,0,1619},{137,0,835},{6,0
+,1936},{8,0,930},{136,0,960},{132,10,263},{132,11,249},{12,0,653},{132,10,916},{
+4,11,603},{133,11,661},{8,0,344},{4,11,11},{6,11,128},{7,11,231},{7,11,1533},{
+138,11,725},{134,0,1483},{134,0,875},{6,0,185},{7,0,1899},{9,0,875},{139,0,673},
+{15,10,155},{144,10,79},{7,0,93},{7,0,210},{7,0,1223},{8,0,451},{8,0,460},{11,0,
+353},{11,0,475},{4,10,599},{6,10,1634},{7,10,67},{7,10,691},{7,10,979},{7,10,
+1697},{8,10,207},{8,10,214},{8,10,231},{8,10,294},{8,10,336},{8,10,428},{8,10,
+471},{8,10,622},{8,10,626},{8,10,679},{8,10,759},{8,10,829},{9,10,11},{9,10,246}
+,{9,10,484},{9,10,573},{9,10,706},{9,10,762},{9,10,798},{9,10,855},{9,10,870},{9
+,10,912},{10,10,303},{10,10,335},{10,10,424},{10,10,461},{10,10,543},{10,10,759}
+,{10,10,814},{11,10,59},{11,10,235},{11,10,590},{11,10,929},{11,10,963},{11,10,
+987},{12,10,114},{12,10,182},{12,10,226},{12,10,332},{12,10,439},{12,10,575},{12
+,10,598},{12,10,675},{13,10,8},{13,10,125},{13,10,194},{13,10,287},{14,10,197},{
+14,10,383},{15,10,53},{17,10,63},{19,10,46},{19,10,98},{19,10,106},{148,10,85},{
+132,11,476},{4,0,327},{5,0,478},{7,0,1332},{136,0,753},{5,0,1020},{133,0,1022},{
+135,11,1807},{4,0,103},{133,0,401},{4,0,499},{135,0,1421},{10,0,207},{13,0,164},
+{147,10,126},{9,11,20},{10,11,324},{139,11,488},{132,0,96},{9,11,280},{138,11,
+134},{135,0,968},{133,10,187},{135,10,1286},{5,11,112},{6,11,103},{134,11,150},{
+8,0,914},{10,0,3},{4,10,215},{9,10,38},{11,10,23},{11,10,127},{139,10,796},{135,
+0,399},{6,0,563},{137,0,224},{6,0,704},{134,0,1214},{4,11,708},{8,11,15},{9,11,
+50},{9,11,386},{11,11,18},{11,11,529},{140,11,228},{4,11,563},{7,11,109},{7,11,
+592},{7,11,637},{7,11,770},{7,11,1701},{8,11,436},{8,11,463},{9,11,60},{9,11,335
+},{9,11,904},{10,11,73},{11,11,434},{12,11,585},{13,11,331},{18,11,110},{148,11,
+60},{134,0,1559},{132,11,502},{6,11,347},{138,11,161},{4,11,33},{5,11,102},{5,11
+,500},{6,11,284},{7,11,1079},{7,11,1423},{7,11,1702},{8,11,470},{9,11,554},{9,11
+,723},{139,11,333},{7,11,246},{135,11,840},{6,11,10},{8,11,571},{9,11,739},{143,
+11,91},{8,0,861},{10,0,905},{12,0,730},{12,0,789},{133,11,626},{134,0,946},{5,0,
+746},{12,0,333},{14,0,332},{12,11,333},{142,11,332},{5,11,18},{6,11,526},{13,11,
+24},{13,11,110},{19,11,5},{147,11,44},{4,0,910},{5,0,832},{135,10,2002},{10,11,
+768},{139,11,787},{4,11,309},{5,11,462},{7,11,970},{135,11,1097},{4,10,28},{5,10
+,440},{7,10,248},{11,10,833},{140,10,344},{134,10,1654},{6,0,632},{6,0,652},{6,0
+,1272},{6,0,1384},{134,0,1560},{134,11,1704},{6,0,1393},{133,10,853},{6,10,249},
+{7,10,1234},{139,10,573},{5,11,86},{7,11,743},{9,11,85},{10,11,281},{10,11,432},
+{11,11,490},{12,11,251},{13,11,118},{14,11,378},{146,11,143},{5,11,524},{133,11,
+744},{134,0,1514},{10,0,201},{142,0,319},{7,0,717},{10,0,510},{7,10,392},{8,10,
+20},{8,10,172},{8,10,690},{9,10,383},{9,10,845},{11,10,293},{11,10,832},{11,10,
+920},{11,10,984},{141,10,221},{134,0,1381},{5,10,858},{133,10,992},{8,0,528},{
+137,0,348},{10,11,107},{140,11,436},{4,0,20},{133,0,616},{134,0,1251},{132,11,
+927},{10,11,123},{12,11,670},{13,11,371},{14,11,142},{146,11,94},{134,0,1163},{7
+,11,1149},{137,11,156},{134,0,307},{133,11,778},{7,0,1091},{135,0,1765},{5,11,
+502},{6,10,268},{137,10,62},{8,11,196},{10,11,283},{139,11,406},{4,0,26},{5,0,
+429},{6,0,245},{7,0,704},{7,0,1379},{135,0,1474},{133,11,855},{132,0,881},{4,0,
+621},{135,11,1596},{7,11,1400},{9,11,446},{138,11,45},{6,0,736},{138,10,106},{
+133,0,542},{134,0,348},{133,0,868},{136,0,433},{135,0,1495},{138,0,771},{6,10,
+613},{136,10,223},{138,0,215},{141,0,124},{136,11,391},{135,11,172},{132,10,670}
+,{140,0,55},{9,10,40},{139,10,136},{7,0,62},{147,0,112},{132,0,856},{132,11,568}
+,{12,0,270},{139,10,259},{8,0,572},{137,0,698},{4,11,732},{9,10,310},{137,10,682
+},{142,10,296},{134,0,939},{136,11,733},{135,11,1435},{7,10,1401},{135,10,1476},
+{6,0,352},{4,10,296},{7,10,401},{7,10,1410},{7,10,1594},{7,10,1674},{8,10,63},{8
+,10,660},{137,10,74},{4,11,428},{133,11,668},{4,10,139},{4,10,388},{140,10,188},
+{7,11,2015},{140,11,665},{132,0,647},{146,0,10},{138,0,220},{142,0,464},{132,0,
+109},{134,0,1746},{6,0,515},{4,10,747},{6,11,1623},{6,11,1681},{7,10,649},{7,10,
+1479},{135,10,1583},{133,10,232},{135,0,566},{137,10,887},{4,0,40},{10,0,67},{11
+,0,117},{11,0,768},{139,0,935},{132,0,801},{7,0,992},{8,0,301},{9,0,722},{12,0,
+63},{13,0,29},{14,0,161},{143,0,18},{139,0,923},{6,11,1748},{8,11,715},{9,11,802
+},{10,11,46},{10,11,819},{13,11,308},{14,11,351},{14,11,363},{146,11,67},{137,11
+,745},{7,0,1145},{4,10,14},{7,10,1801},{10,10,748},{141,10,458},{4,11,63},{5,11,
+347},{134,11,474},{135,0,568},{4,10,425},{7,11,577},{7,11,1432},{9,11,475},{9,11
+,505},{9,11,526},{9,11,609},{9,11,689},{9,11,726},{9,11,735},{9,11,738},{10,11,
+556},{10,11,674},{10,11,684},{11,11,89},{11,11,202},{11,11,272},{11,11,380},{11,
+11,415},{11,11,505},{11,11,537},{11,11,550},{11,11,562},{11,11,640},{11,11,667},
+{11,11,688},{11,11,847},{11,11,927},{11,11,930},{11,11,940},{12,11,144},{12,11,
+325},{12,11,329},{12,11,389},{12,11,403},{12,11,451},{12,11,515},{12,11,604},{12
+,11,616},{12,11,626},{13,11,66},{13,11,131},{13,11,167},{13,11,236},{13,11,368},
+{13,11,411},{13,11,434},{13,11,453},{13,11,461},{13,11,474},{14,11,59},{14,11,60
+},{14,11,139},{14,11,152},{14,11,276},{14,11,353},{14,11,402},{15,11,28},{15,11,
+81},{15,11,123},{15,11,152},{18,11,136},{148,11,88},{137,0,247},{135,11,1622},{9
+,11,544},{11,11,413},{144,11,25},{4,0,645},{7,0,825},{6,10,1768},{135,11,89},{
+140,0,328},{5,10,943},{134,10,1779},{134,0,1363},{5,10,245},{6,10,576},{7,10,582
+},{136,10,225},{134,0,1280},{5,11,824},{133,11,941},{7,11,440},{8,11,230},{139,
+11,106},{5,0,28},{6,0,204},{10,0,320},{10,0,583},{13,0,502},{14,0,72},{14,0,274}
+,{14,0,312},{14,0,344},{15,0,159},{16,0,62},{16,0,69},{17,0,30},{18,0,42},{18,0,
+53},{18,0,84},{18,0,140},{19,0,68},{19,0,85},{20,0,5},{20,0,45},{20,0,101},{22,0
+,7},{150,0,20},{4,0,558},{6,0,390},{7,0,162},{7,0,689},{9,0,360},{138,0,653},{
+134,0,764},{6,0,862},{137,0,833},{5,0,856},{6,0,1672},{6,0,1757},{134,0,1781},{5
+,0,92},{10,0,736},{140,0,102},{6,0,1927},{6,0,1944},{8,0,924},{8,0,948},{10,0,
+967},{138,0,978},{134,0,1479},{5,0,590},{8,0,360},{9,0,213},{138,0,63},{134,0,
+1521},{6,0,709},{134,0,891},{132,10,443},{13,0,477},{14,0,120},{148,0,61},{4,11,
+914},{5,11,800},{133,11,852},{10,11,54},{141,11,115},{4,11,918},{133,11,876},{
+139,11,152},{4,11,92},{133,11,274},{135,11,1901},{9,11,800},{10,11,693},{11,11,
+482},{11,11,734},{139,11,789},{9,0,483},{132,10,298},{6,0,1213},{141,11,498},{
+135,11,1451},{133,11,743},{4,0,1022},{10,0,1000},{12,0,957},{12,0,980},{12,0,
+1013},{14,0,481},{144,0,116},{8,0,503},{17,0,29},{4,11,49},{7,11,280},{135,11,
+1633},{135,0,1712},{134,0,466},{136,11,47},{5,10,164},{7,10,121},{142,10,189},{7
+,10,812},{7,10,1261},{7,10,1360},{9,10,632},{140,10,352},{139,10,556},{132,0,731
+},{5,11,272},{5,11,908},{5,11,942},{7,11,1008},{7,11,1560},{8,11,197},{9,11,47},
+{11,11,538},{139,11,742},{4,10,172},{9,10,611},{10,10,436},{12,10,673},{141,10,
+255},{133,10,844},{10,0,484},{11,0,754},{12,0,457},{14,0,171},{14,0,389},{146,0,
+153},{9,10,263},{10,10,147},{138,10,492},{137,11,891},{138,0,241},{133,10,537},{
+6,0,2005},{136,0,964},{137,10,842},{151,11,8},{4,11,407},{132,11,560},{135,11,
+1884},{6,0,1100},{134,0,1242},{135,0,954},{5,10,230},{5,10,392},{6,10,420},{9,10
+,568},{140,10,612},{4,11,475},{11,11,35},{11,11,90},{13,11,7},{13,11,71},{13,11,
+177},{142,11,422},{136,11,332},{135,0,1958},{6,0,549},{8,0,34},{8,0,283},{9,0,
+165},{138,0,475},{10,0,952},{12,0,966},{140,0,994},{5,0,652},{5,0,701},{135,0,
+449},{4,0,655},{7,0,850},{17,0,75},{146,0,137},{4,0,146},{7,0,1618},{8,0,670},{5
+,10,41},{7,10,1459},{7,10,1469},{7,10,1859},{9,10,549},{139,10,905},{133,10,696}
+,{6,0,159},{6,0,364},{7,0,516},{137,0,518},{135,0,1439},{6,11,222},{7,11,636},{7
+,11,1620},{8,11,409},{9,11,693},{139,11,77},{13,0,151},{141,11,45},{6,0,1027},{4
+,11,336},{132,10,771},{139,11,392},{10,11,121},{11,11,175},{149,11,16},{8,0,950}
+,{138,0,983},{133,10,921},{135,0,993},{6,10,180},{7,10,1137},{8,10,751},{139,10,
+805},{7,0,501},{9,0,111},{10,0,141},{11,0,332},{13,0,43},{13,0,429},{14,0,130},{
+14,0,415},{145,0,102},{4,10,183},{5,11,882},{7,10,271},{11,10,824},{11,10,952},{
+13,10,278},{13,10,339},{13,10,482},{14,10,424},{148,10,99},{4,10,19},{5,10,477},
+{5,10,596},{6,10,505},{7,10,1221},{11,10,907},{12,10,209},{141,10,214},{135,10,
+1215},{133,0,452},{132,11,426},{5,0,149},{136,0,233},{133,0,935},{6,11,58},{7,11
+,654},{7,11,745},{7,11,1969},{8,11,240},{8,11,675},{9,11,479},{9,11,731},{10,11,
+330},{10,11,593},{10,11,817},{11,11,32},{11,11,133},{11,11,221},{145,11,68},{12,
+0,582},{18,0,131},{7,11,102},{137,11,538},{136,0,801},{134,10,1645},{132,0,70},{
+6,10,92},{6,10,188},{7,10,1269},{7,10,1524},{7,10,1876},{10,10,228},{139,10,1020
+},{4,10,459},{133,10,966},{138,0,369},{16,0,36},{140,10,330},{141,11,366},{7,0,
+721},{10,0,236},{12,0,204},{6,10,18},{7,10,932},{8,10,757},{9,10,54},{9,10,65},{
+9,10,844},{10,10,113},{10,10,315},{10,10,798},{11,10,153},{12,10,151},{12,10,392
+},{12,10,666},{142,10,248},{7,0,241},{10,0,430},{8,10,548},{9,10,532},{10,10,117
+},{11,10,351},{11,10,375},{143,10,23},{134,10,1742},{133,10,965},{133,11,566},{6
+,11,48},{135,11,63},{134,10,182},{10,10,65},{10,10,488},{138,10,497},{6,11,114},
+{7,11,1224},{7,11,1556},{136,11,3},{134,0,1817},{8,11,576},{137,11,267},{6,0,
+1078},{144,0,16},{9,10,588},{138,10,260},{138,0,1021},{5,0,406},{134,0,2022},{
+133,11,933},{6,0,69},{135,0,117},{7,0,1830},{136,11,427},{4,0,432},{135,0,824},{
+134,10,1786},{133,0,826},{139,11,67},{133,11,759},{135,10,308},{137,0,816},{133,
+0,1000},{4,0,297},{6,0,529},{7,0,152},{7,0,713},{7,0,1845},{8,0,710},{8,0,717},{
+12,0,639},{140,0,685},{7,0,423},{136,10,588},{136,10,287},{136,0,510},{134,0,
+1048},{6,0,618},{7,11,56},{7,11,1989},{8,11,337},{8,11,738},{9,11,600},{10,11,
+483},{12,11,37},{13,11,447},{142,11,92},{4,0,520},{135,0,575},{8,0,990},{138,0,
+977},{135,11,774},{9,11,347},{11,11,24},{140,11,170},{136,11,379},{140,10,290},{
+132,11,328},{4,0,321},{134,0,569},{4,11,101},{135,11,1171},{7,0,723},{7,0,1135},
+{5,11,833},{136,11,744},{7,10,719},{8,10,809},{136,10,834},{8,0,921},{136,10,796
+},{5,10,210},{6,10,213},{7,10,60},{10,10,364},{139,10,135},{5,0,397},{6,0,154},{
+7,0,676},{8,0,443},{8,0,609},{9,0,24},{9,0,325},{10,0,35},{11,0,535},{11,0,672},
+{11,0,1018},{12,0,637},{16,0,30},{5,10,607},{8,10,326},{136,10,490},{4,10,701},{
+5,10,472},{6,11,9},{6,11,397},{7,11,53},{7,11,1742},{9,10,758},{10,11,632},{11,
+11,828},{140,11,146},{135,10,380},{135,10,1947},{148,11,109},{10,10,278},{138,11
+,278},{134,0,856},{7,0,139},{4,10,386},{8,10,405},{8,10,728},{9,10,497},{11,10,
+110},{11,10,360},{15,10,37},{144,10,84},{141,0,282},{133,0,981},{5,0,288},{7,10,
+1452},{7,10,1480},{8,10,634},{140,10,472},{7,0,1890},{8,11,367},{10,11,760},{14,
+11,79},{20,11,17},{152,11,0},{4,10,524},{136,10,810},{4,0,56},{7,0,1791},{8,0,
+607},{8,0,651},{11,0,465},{11,0,835},{12,0,337},{141,0,480},{10,10,238},{141,10,
+33},{11,11,417},{12,11,223},{140,11,265},{9,0,158},{10,0,411},{140,0,261},{133,
+10,532},{133,10,997},{12,11,186},{12,11,292},{14,11,100},{146,11,70},{6,0,1403},
+{136,0,617},{134,0,1205},{139,0,563},{4,0,242},{134,0,333},{4,11,186},{5,11,157}
+,{8,11,168},{138,11,6},{132,0,369},{133,11,875},{5,10,782},{5,10,829},{134,10,
+1738},{134,0,622},{135,11,1272},{6,0,1407},{7,11,111},{136,11,581},{7,10,1823},{
+139,10,693},{7,0,160},{10,0,624},{142,0,279},{132,0,363},{10,11,589},{12,11,111}
+,{13,11,260},{14,11,82},{18,11,63},{147,11,45},{7,11,1364},{7,11,1907},{141,11,
+158},{4,11,404},{4,11,659},{135,11,675},{13,11,211},{14,11,133},{14,11,204},{15,
+11,64},{15,11,69},{15,11,114},{16,11,10},{19,11,23},{19,11,35},{19,11,39},{19,11
+,51},{19,11,71},{19,11,75},{152,11,15},{4,10,78},{5,10,96},{5,10,182},{7,10,1724
+},{7,10,1825},{10,10,394},{10,10,471},{11,10,532},{14,10,340},{145,10,88},{135,
+10,1964},{133,11,391},{11,11,887},{14,11,365},{142,11,375},{5,11,540},{6,11,1697
+},{7,11,222},{136,11,341},{134,11,78},{9,0,601},{9,0,619},{10,0,505},{10,0,732},
+{11,0,355},{140,0,139},{134,0,292},{139,0,174},{5,0,177},{6,0,616},{7,0,827},{9,
+0,525},{138,0,656},{10,0,31},{6,10,215},{7,10,1028},{7,10,1473},{7,10,1721},{9,
+10,424},{138,10,779},{135,10,584},{136,11,293},{134,0,685},{135,11,1868},{133,11
+,460},{7,0,647},{6,10,67},{7,10,1630},{9,10,354},{9,10,675},{10,10,830},{14,10,
+80},{145,10,80},{4,0,161},{133,0,631},{6,10,141},{7,10,225},{9,10,59},{9,10,607}
+,{10,10,312},{11,10,687},{12,10,555},{13,10,373},{13,10,494},{148,10,58},{7,11,
+965},{7,11,1460},{135,11,1604},{136,10,783},{134,11,388},{6,0,722},{6,0,1267},{4
+,11,511},{9,11,333},{9,11,379},{10,11,602},{11,11,441},{11,11,723},{11,11,976},{
+140,11,357},{134,0,1797},{135,0,1684},{9,0,469},{9,0,709},{12,0,512},{14,0,65},{
+17,0,12},{5,11,938},{136,11,707},{7,0,1230},{136,0,531},{10,0,229},{11,0,73},{11
+,0,376},{139,0,433},{12,0,268},{12,0,640},{142,0,119},{7,10,430},{139,10,46},{6,
+0,558},{7,0,651},{8,0,421},{9,0,0},{10,0,34},{139,0,1008},{6,0,106},{7,0,1786},{
+7,0,1821},{9,0,102},{9,0,763},{5,10,602},{7,10,2018},{137,10,418},{5,0,65},{6,0,
+416},{7,0,1720},{7,0,1924},{10,0,109},{11,0,14},{11,0,70},{11,0,569},{11,0,735},
+{15,0,153},{20,0,80},{136,10,677},{135,11,1625},{137,11,772},{136,0,595},{6,11,
+469},{7,11,1709},{138,11,515},{7,0,1832},{138,0,374},{9,0,106},{9,0,163},{9,0,
+296},{10,0,167},{10,0,172},{10,0,777},{139,0,16},{6,0,6},{7,0,81},{7,0,771},{7,0
+,1731},{9,0,405},{138,0,421},{4,11,500},{135,11,938},{5,11,68},{134,11,383},{5,0
+,881},{133,0,885},{6,0,854},{6,0,1132},{6,0,1495},{6,0,1526},{6,0,1533},{134,0,
+1577},{4,11,337},{6,11,353},{7,11,1934},{8,11,488},{137,11,429},{7,11,236},{7,11
+,1795},{8,11,259},{9,11,135},{9,11,177},{10,11,825},{11,11,115},{11,11,370},{11,
+11,405},{11,11,604},{12,11,10},{12,11,667},{12,11,669},{13,11,76},{14,11,310},{
+15,11,76},{15,11,147},{148,11,23},{5,0,142},{134,0,546},{4,11,15},{5,11,22},{6,
+11,244},{7,11,40},{7,11,200},{7,11,906},{7,11,1199},{9,11,616},{10,11,716},{11,
+11,635},{11,11,801},{140,11,458},{5,0,466},{11,0,571},{12,0,198},{13,0,283},{14,
+0,186},{15,0,21},{15,0,103},{135,10,329},{4,0,185},{5,0,257},{5,0,839},{5,0,936}
+,{9,0,399},{10,0,258},{10,0,395},{10,0,734},{11,0,1014},{12,0,23},{13,0,350},{14
+,0,150},{19,0,6},{135,11,1735},{12,11,36},{141,11,337},{5,11,598},{7,11,791},{8,
+11,108},{137,11,123},{132,10,469},{7,0,404},{7,0,1377},{7,0,1430},{7,0,2017},{8,
+0,149},{8,0,239},{8,0,512},{8,0,793},{8,0,818},{9,0,474},{9,0,595},{10,0,122},{
+10,0,565},{10,0,649},{10,0,783},{11,0,239},{11,0,295},{11,0,447},{11,0,528},{11,
+0,639},{11,0,800},{12,0,25},{12,0,77},{12,0,157},{12,0,256},{12,0,316},{12,0,390
+},{12,0,391},{12,0,395},{12,0,478},{12,0,503},{12,0,592},{12,0,680},{13,0,50},{
+13,0,53},{13,0,132},{13,0,198},{13,0,322},{13,0,415},{13,0,511},{14,0,71},{14,0,
+395},{15,0,71},{15,0,136},{17,0,123},{18,0,93},{147,0,58},{136,0,712},{134,10,
+1743},{5,10,929},{6,10,340},{8,10,376},{136,10,807},{6,0,1848},{8,0,860},{10,0,
+856},{10,0,859},{10,0,925},{10,0,941},{140,0,762},{6,0,629},{6,0,906},{9,0,810},
+{140,0,652},{5,10,218},{7,10,1610},{138,10,83},{7,10,1512},{135,10,1794},{4,0,
+377},{24,0,13},{4,11,155},{7,11,1689},{11,10,0},{144,10,78},{4,11,164},{5,11,151
+},{5,11,730},{5,11,741},{7,11,498},{7,11,870},{7,11,1542},{12,11,213},{14,11,36}
+,{14,11,391},{17,11,111},{18,11,6},{18,11,46},{18,11,151},{19,11,36},{20,11,32},
+{20,11,56},{20,11,69},{20,11,102},{21,11,4},{22,11,8},{22,11,10},{22,11,14},{150
+,11,31},{7,0,1842},{133,10,571},{4,10,455},{4,11,624},{135,11,1752},{134,0,1501}
+,{4,11,492},{5,11,451},{6,10,161},{7,10,372},{137,10,597},{132,10,349},{4,0,180}
+,{135,0,1906},{135,11,835},{141,11,70},{132,0,491},{137,10,751},{6,10,432},{139,
+10,322},{4,0,171},{138,0,234},{6,11,113},{135,11,436},{4,0,586},{7,0,1186},{138,
+0,631},{5,10,468},{10,10,325},{11,10,856},{12,10,345},{143,10,104},{5,10,223},{
+10,11,592},{10,11,753},{12,11,317},{12,11,355},{12,11,465},{12,11,469},{12,11,
+560},{12,11,578},{141,11,243},{132,10,566},{135,11,520},{4,10,59},{135,10,1394},
+{6,10,436},{139,10,481},{9,0,931},{10,0,334},{20,0,71},{4,10,48},{5,10,271},{7,
+10,953},{135,11,1878},{11,0,170},{5,10,610},{136,10,457},{133,10,755},{6,0,1587}
+,{135,10,1217},{4,10,197},{149,11,26},{133,11,585},{137,11,521},{133,0,765},{133
+,10,217},{139,11,586},{133,0,424},{9,11,752},{12,11,610},{13,11,431},{16,11,59},
+{146,11,109},{136,0,714},{7,0,685},{132,11,307},{9,0,420},{10,0,269},{10,0,285},
+{10,0,576},{11,0,397},{13,0,175},{145,0,90},{132,0,429},{133,11,964},{9,11,463},
+{138,11,595},{7,0,18},{7,0,699},{7,0,1966},{8,0,752},{9,0,273},{9,0,412},{9,0,
+703},{10,0,71},{10,0,427},{138,0,508},{4,10,165},{7,10,1398},{135,10,1829},{4,0,
+53},{5,0,186},{7,0,752},{7,0,828},{142,0,116},{8,0,575},{10,0,289},{139,0,319},{
+132,0,675},{134,0,1424},{4,11,75},{5,11,180},{6,11,500},{7,11,58},{7,11,710},{
+138,11,645},{133,11,649},{6,11,276},{7,11,282},{7,11,879},{7,11,924},{8,11,459},
+{9,11,599},{9,11,754},{11,11,574},{12,11,128},{12,11,494},{13,11,52},{13,11,301}
+,{15,11,30},{143,11,132},{6,0,647},{134,0,1095},{5,10,9},{7,10,297},{7,10,966},{
+140,10,306},{132,11,200},{134,0,1334},{5,10,146},{6,10,411},{138,10,721},{6,0,
+209},{6,0,1141},{6,0,1288},{8,0,468},{9,0,210},{11,0,36},{12,0,28},{12,0,630},{
+13,0,21},{13,0,349},{14,0,7},{145,0,13},{6,10,177},{135,10,467},{4,0,342},{135,0
+,1179},{10,11,454},{140,11,324},{4,0,928},{133,0,910},{7,0,1838},{6,11,225},{137
+,11,211},{16,0,101},{20,0,115},{20,0,118},{148,0,122},{4,0,496},{135,0,856},{4,0
+,318},{11,0,654},{7,11,718},{139,11,102},{8,11,58},{9,11,724},{11,11,809},{13,11
+,113},{145,11,72},{5,10,200},{6,11,345},{135,11,1247},{8,11,767},{8,11,803},{9,
+11,301},{137,11,903},{7,0,915},{8,0,247},{19,0,0},{7,11,1949},{136,11,674},{4,0,
+202},{5,0,382},{6,0,454},{7,0,936},{7,0,1803},{8,0,758},{9,0,375},{9,0,895},{10,
+0,743},{10,0,792},{11,0,978},{11,0,1012},{142,0,109},{7,0,1150},{7,0,1425},{7,0,
+1453},{140,0,513},{134,11,259},{138,0,791},{11,0,821},{12,0,110},{12,0,153},{18,
+0,41},{150,0,19},{134,10,481},{132,0,796},{6,0,445},{9,0,909},{136,11,254},{10,0
+,776},{13,0,345},{142,0,425},{4,10,84},{7,10,1482},{10,10,76},{138,10,142},{135,
+11,742},{6,0,578},{133,10,1015},{6,0,1387},{4,10,315},{5,10,507},{135,10,1370},{
+4,0,438},{133,0,555},{136,0,766},{133,11,248},{134,10,1722},{4,11,116},{5,11,95}
+,{5,11,445},{7,11,1688},{8,11,29},{9,11,272},{11,11,509},{139,11,915},{135,0,541
+},{133,11,543},{8,10,222},{8,10,476},{9,10,238},{11,10,516},{11,10,575},{15,10,
+109},{146,10,100},{6,0,880},{134,0,1191},{5,11,181},{136,11,41},{134,0,1506},{
+132,11,681},{7,11,25},{8,11,202},{138,11,536},{139,0,983},{137,0,768},{132,0,584
+},{9,11,423},{140,11,89},{8,11,113},{9,11,877},{10,11,554},{11,11,83},{12,11,136
+},{147,11,109},{7,10,706},{7,10,1058},{138,10,538},{133,11,976},{4,11,206},{135,
+11,746},{136,11,526},{140,0,737},{11,10,92},{11,10,196},{11,10,409},{11,10,450},
+{11,10,666},{11,10,777},{12,10,262},{13,10,385},{13,10,393},{15,10,115},{16,10,
+45},{145,10,82},{4,0,226},{4,0,326},{7,0,1770},{4,11,319},{5,11,699},{138,11,673
+},{6,10,40},{135,10,1781},{5,0,426},{8,0,30},{9,0,2},{11,0,549},{147,0,122},{6,0
+,1161},{134,0,1329},{138,10,97},{6,10,423},{7,10,665},{135,10,1210},{7,11,13},{8
+,11,226},{10,11,537},{11,11,570},{11,11,605},{11,11,799},{11,11,804},{12,11,85},
+{12,11,516},{12,11,623},{13,11,112},{13,11,361},{14,11,77},{14,11,78},{17,11,28}
+,{147,11,110},{132,11,769},{132,11,551},{132,11,728},{147,0,117},{9,11,57},{9,11
+,459},{10,11,425},{11,11,119},{12,11,184},{12,11,371},{13,11,358},{145,11,51},{5
+,11,188},{5,11,814},{8,11,10},{9,11,421},{9,11,729},{10,11,609},{139,11,689},{
+134,11,624},{135,11,298},{135,0,462},{4,0,345},{139,10,624},{136,10,574},{4,0,
+385},{7,0,265},{135,0,587},{6,0,808},{132,11,528},{133,0,398},{132,10,354},{4,0,
+347},{5,0,423},{5,0,996},{135,0,1329},{135,10,1558},{7,0,1259},{9,0,125},{139,0,
+65},{5,0,136},{6,0,136},{136,0,644},{5,11,104},{6,11,173},{135,11,1631},{135,0,
+469},{133,10,830},{4,0,278},{5,0,465},{135,0,1367},{7,11,810},{8,11,138},{8,11,
+342},{9,11,84},{10,11,193},{11,11,883},{140,11,359},{5,10,496},{135,10,203},{4,0
+,433},{133,0,719},{6,11,95},{134,10,547},{5,10,88},{137,10,239},{6,11,406},{10,
+11,409},{10,11,447},{11,11,44},{140,11,100},{134,0,1423},{7,10,650},{135,10,1310
+},{134,0,749},{135,11,1243},{135,0,1363},{6,0,381},{7,0,645},{7,0,694},{8,0,546}
+,{7,10,1076},{9,10,80},{11,10,78},{11,10,421},{11,10,534},{140,10,545},{134,11,
+1636},{135,11,1344},{12,0,277},{7,10,274},{11,10,479},{139,10,507},{6,0,705},{6,
+0,783},{6,0,1275},{6,0,1481},{4,11,282},{7,11,1034},{11,11,398},{11,11,634},{12,
+11,1},{12,11,79},{12,11,544},{14,11,237},{17,11,10},{146,11,20},{134,0,453},{4,0
+,555},{8,0,536},{10,0,288},{11,0,1005},{4,10,497},{135,10,1584},{5,11,118},{5,11
+,499},{6,11,476},{7,11,600},{7,11,888},{135,11,1096},{138,0,987},{7,0,1107},{7,
+10,261},{7,10,1115},{7,10,1354},{7,10,1588},{7,10,1705},{7,10,1902},{9,10,465},{
+10,10,248},{10,10,349},{10,10,647},{11,10,527},{11,10,660},{11,10,669},{12,10,
+529},{141,10,305},{7,11,296},{7,11,596},{8,11,560},{8,11,586},{9,11,612},{11,11,
+100},{11,11,304},{12,11,46},{13,11,89},{14,11,112},{145,11,122},{9,0,370},{138,0
+,90},{136,10,13},{132,0,860},{7,10,642},{8,10,250},{11,10,123},{11,10,137},{13,
+10,48},{142,10,95},{135,10,1429},{137,11,321},{132,0,257},{135,0,2031},{7,0,1768
+},{7,11,1599},{7,11,1723},{8,11,79},{8,11,106},{8,11,190},{8,11,302},{8,11,383},
+{9,11,119},{9,11,233},{9,11,298},{9,11,419},{9,11,471},{10,11,181},{10,11,406},{
+11,11,57},{11,11,85},{11,11,120},{11,11,177},{11,11,296},{11,11,382},{11,11,454}
+,{11,11,758},{11,11,999},{12,11,27},{12,11,98},{12,11,131},{12,11,245},{12,11,
+312},{12,11,446},{12,11,454},{13,11,25},{13,11,98},{13,11,426},{13,11,508},{14,
+11,6},{14,11,163},{14,11,272},{14,11,277},{14,11,370},{15,11,95},{15,11,138},{15
+,11,167},{17,11,18},{17,11,38},{20,11,96},{149,11,32},{5,11,722},{134,11,1759},{
+145,11,16},{6,0,1071},{134,0,1561},{10,10,545},{140,10,301},{6,0,83},{6,0,1733},
+{135,0,1389},{4,0,835},{135,0,1818},{133,11,258},{4,10,904},{133,10,794},{134,0,
+2006},{5,11,30},{7,11,495},{8,11,134},{9,11,788},{140,11,438},{135,11,2004},{137
+,0,696},{5,11,50},{6,11,439},{7,11,780},{135,11,1040},{7,11,772},{7,11,1104},{7,
+11,1647},{11,11,269},{11,11,539},{11,11,607},{11,11,627},{11,11,706},{11,11,975}
+,{12,11,248},{12,11,311},{12,11,434},{12,11,600},{12,11,622},{13,11,297},{13,11,
+367},{13,11,485},{14,11,69},{14,11,409},{143,11,108},{5,11,1},{6,11,81},{138,11,
+520},{7,0,1718},{9,0,95},{9,0,274},{10,0,279},{10,0,317},{10,0,420},{11,0,303},{
+11,0,808},{12,0,134},{12,0,367},{13,0,149},{13,0,347},{14,0,349},{14,0,406},{18,
+0,22},{18,0,89},{18,0,122},{147,0,47},{5,11,482},{8,11,98},{9,11,172},{10,11,222
+},{10,11,700},{10,11,822},{11,11,302},{11,11,778},{12,11,50},{12,11,127},{12,11,
+396},{13,11,62},{13,11,328},{14,11,122},{147,11,72},{7,10,386},{138,10,713},{6,
+10,7},{6,10,35},{7,10,147},{7,10,1069},{7,10,1568},{7,10,1575},{7,10,1917},{8,10
+,43},{8,10,208},{9,10,128},{9,10,866},{10,10,20},{11,10,981},{147,10,33},{133,0,
+26},{132,0,550},{5,11,2},{7,11,1494},{136,11,589},{6,11,512},{7,11,797},{8,11,
+253},{9,11,77},{10,11,1},{10,11,129},{10,11,225},{11,11,118},{11,11,226},{11,11,
+251},{11,11,430},{11,11,701},{11,11,974},{11,11,982},{12,11,64},{12,11,260},{12,
+11,488},{140,11,690},{7,10,893},{141,10,424},{134,0,901},{136,0,822},{4,0,902},{
+5,0,809},{134,0,122},{6,0,807},{134,0,1366},{7,0,262},{5,11,748},{134,11,553},{
+133,0,620},{4,0,34},{5,0,574},{7,0,279},{7,0,1624},{136,0,601},{9,0,170},{6,10,
+322},{9,10,552},{11,10,274},{13,10,209},{13,10,499},{14,10,85},{15,10,126},{145,
+10,70},{132,0,537},{4,11,12},{7,11,420},{7,11,522},{7,11,809},{8,11,797},{141,11
+,88},{133,0,332},{8,10,83},{8,10,742},{8,10,817},{9,10,28},{9,10,29},{9,10,885},
+{10,10,387},{11,10,633},{11,10,740},{13,10,235},{13,10,254},{15,10,143},{143,10,
+146},{6,0,1909},{9,0,964},{12,0,822},{12,0,854},{12,0,865},{12,0,910},{12,0,938}
+,{15,0,169},{15,0,208},{15,0,211},{18,0,205},{18,0,206},{18,0,220},{18,0,223},{
+152,0,24},{140,10,49},{5,11,528},{135,11,1580},{6,0,261},{8,0,182},{139,0,943},{
+134,0,1721},{4,0,933},{133,0,880},{136,11,321},{5,11,266},{9,11,290},{9,11,364},
+{10,11,293},{11,11,606},{142,11,45},{6,0,1609},{4,11,50},{6,11,510},{6,11,594},{
+9,11,121},{10,11,49},{10,11,412},{139,11,834},{7,0,895},{136,11,748},{132,11,466
+},{4,10,110},{10,10,415},{10,10,597},{142,10,206},{133,0,812},{135,11,281},{6,0,
+1890},{6,0,1902},{6,0,1916},{9,0,929},{9,0,942},{9,0,975},{9,0,984},{9,0,986},{9
+,0,1011},{9,0,1019},{12,0,804},{12,0,851},{12,0,867},{12,0,916},{12,0,923},{15,0
+,194},{15,0,204},{15,0,210},{15,0,222},{15,0,223},{15,0,229},{15,0,250},{18,0,
+179},{18,0,186},{18,0,192},{7,10,205},{135,10,2000},{132,11,667},{135,0,778},{4,
+0,137},{7,0,1178},{135,0,1520},{134,0,1314},{4,11,242},{134,11,333},{6,0,1661},{
+7,0,1975},{7,0,2009},{135,0,2011},{134,0,1591},{4,10,283},{135,10,1194},{11,0,
+820},{150,0,51},{4,11,39},{5,11,36},{7,11,1843},{8,11,407},{11,11,144},{140,11,
+523},{134,10,1720},{4,11,510},{7,11,29},{7,11,66},{7,11,1980},{10,11,487},{10,11
+,809},{146,11,9},{5,0,89},{7,0,1915},{9,0,185},{9,0,235},{10,0,64},{10,0,270},{
+10,0,403},{10,0,469},{10,0,529},{10,0,590},{11,0,140},{11,0,860},{13,0,1},{13,0,
+422},{14,0,341},{14,0,364},{17,0,93},{18,0,113},{19,0,97},{147,0,113},{133,0,695
+},{6,0,987},{134,0,1160},{5,0,6},{6,0,183},{7,0,680},{7,0,978},{7,0,1013},{7,0,
+1055},{12,0,230},{13,0,172},{146,0,29},{134,11,570},{132,11,787},{134,11,518},{6
+,0,29},{139,0,63},{132,11,516},{136,11,821},{132,0,311},{134,0,1740},{7,0,170},{
+8,0,90},{8,0,177},{8,0,415},{11,0,714},{14,0,281},{136,10,735},{134,0,1961},{135
+,11,1405},{4,11,10},{7,11,917},{139,11,786},{5,10,132},{9,10,486},{9,10,715},{10
+,10,458},{11,10,373},{11,10,668},{11,10,795},{11,10,897},{12,10,272},{12,10,424}
+,{12,10,539},{12,10,558},{14,10,245},{14,10,263},{14,10,264},{14,10,393},{142,10
+,403},{11,0,91},{13,0,129},{15,0,101},{145,0,125},{135,0,1132},{4,0,494},{6,0,74
+},{7,0,44},{7,0,407},{12,0,17},{15,0,5},{148,0,11},{133,10,379},{5,0,270},{5,11,
+684},{6,10,89},{6,10,400},{7,10,1569},{7,10,1623},{7,10,1850},{8,10,218},{8,10,
+422},{9,10,570},{138,10,626},{4,0,276},{133,0,296},{6,0,1523},{134,11,27},{6,10,
+387},{7,10,882},{141,10,111},{6,10,224},{7,10,877},{137,10,647},{135,10,790},{4,
+0,7},{5,0,90},{5,0,158},{6,0,542},{7,0,221},{7,0,1574},{9,0,490},{10,0,540},{11,
+0,443},{139,0,757},{7,0,588},{9,0,175},{138,0,530},{135,10,394},{142,11,23},{134
+,0,786},{135,0,580},{7,0,88},{136,0,627},{5,0,872},{6,0,57},{7,0,471},{9,0,447},
+{137,0,454},{6,11,342},{6,11,496},{8,11,275},{137,11,206},{4,11,909},{133,11,940
+},{6,0,735},{132,11,891},{8,0,845},{8,0,916},{135,10,1409},{5,0,31},{134,0,614},
+{11,0,458},{12,0,15},{140,0,432},{8,0,330},{140,0,477},{4,0,530},{5,0,521},{7,0,
+1200},{10,0,460},{132,11,687},{6,0,424},{135,0,1866},{9,0,569},{12,0,12},{12,0,
+81},{12,0,319},{13,0,69},{14,0,259},{16,0,87},{17,0,1},{17,0,21},{17,0,24},{18,0
+,15},{18,0,56},{18,0,59},{18,0,127},{18,0,154},{19,0,19},{148,0,31},{7,0,1302},{
+136,10,38},{134,11,253},{5,10,261},{7,10,78},{7,10,199},{8,10,815},{9,10,126},{
+138,10,342},{5,0,595},{135,0,1863},{6,11,41},{141,11,160},{5,0,13},{134,0,142},{
+6,0,97},{7,0,116},{8,0,322},{8,0,755},{9,0,548},{10,0,714},{11,0,884},{13,0,324}
+,{7,11,1304},{138,11,477},{132,10,628},{134,11,1718},{7,10,266},{136,10,804},{
+135,10,208},{7,0,1021},{6,10,79},{135,10,1519},{7,0,1472},{135,0,1554},{6,11,362
+},{146,11,51},{7,0,1071},{7,0,1541},{7,0,1767},{7,0,1806},{11,0,162},{11,0,242},
+{11,0,452},{12,0,605},{15,0,26},{144,0,44},{136,10,741},{133,11,115},{145,0,115}
+,{134,10,376},{6,0,1406},{134,0,1543},{5,11,193},{12,11,178},{13,11,130},{145,11
+,84},{135,0,1111},{8,0,1},{9,0,650},{10,0,326},{5,11,705},{137,11,606},{5,0,488}
+,{6,0,527},{7,0,489},{7,0,1636},{8,0,121},{8,0,144},{8,0,359},{9,0,193},{9,0,241
+},{9,0,336},{9,0,882},{11,0,266},{11,0,372},{11,0,944},{12,0,401},{140,0,641},{
+135,11,174},{6,0,267},{7,10,244},{7,10,632},{7,10,1609},{8,10,178},{8,10,638},{
+141,10,58},{134,0,1983},{134,0,1155},{134,0,1575},{134,0,1438},{9,0,31},{10,0,
+244},{10,0,699},{12,0,149},{141,0,497},{133,0,377},{4,11,122},{5,11,796},{5,11,
+952},{6,11,1660},{6,11,1671},{8,11,567},{9,11,687},{9,11,742},{10,11,686},{11,11
+,356},{11,11,682},{140,11,281},{145,0,101},{11,11,0},{144,11,78},{5,11,179},{5,
+10,791},{7,11,1095},{135,11,1213},{8,11,372},{9,11,122},{138,11,175},{7,10,686},
+{8,10,33},{8,10,238},{10,10,616},{11,10,467},{11,10,881},{13,10,217},{13,10,253}
+,{142,10,268},{9,0,476},{4,11,66},{7,11,722},{135,11,904},{7,11,352},{137,11,684
+},{135,0,2023},{135,0,1836},{132,10,447},{5,0,843},{144,0,35},{137,11,779},{141,
+11,35},{4,10,128},{5,10,415},{6,10,462},{7,10,294},{7,10,578},{10,10,710},{139,
+10,86},{132,0,554},{133,0,536},{136,10,587},{5,0,207},{9,0,79},{11,0,625},{145,0
+,7},{7,0,1371},{6,10,427},{138,10,692},{4,0,424},{4,10,195},{135,10,802},{8,0,
+785},{133,11,564},{135,0,336},{4,0,896},{6,0,1777},{134,11,556},{137,11,103},{
+134,10,1683},{7,11,544},{8,11,719},{138,11,61},{138,10,472},{4,11,5},{5,11,498},
+{136,11,637},{7,0,750},{9,0,223},{11,0,27},{11,0,466},{12,0,624},{14,0,265},{146
+,0,61},{12,0,238},{18,0,155},{12,11,238},{146,11,155},{151,10,28},{133,11,927},{
+12,0,383},{5,10,3},{8,10,578},{9,10,118},{10,10,705},{141,10,279},{4,11,893},{5,
+11,780},{133,11,893},{4,0,603},{133,0,661},{4,0,11},{6,0,128},{7,0,231},{7,0,
+1533},{10,0,725},{5,10,229},{5,11,238},{135,11,1350},{8,10,102},{10,10,578},{10,
+10,672},{12,10,496},{13,10,408},{14,10,121},{145,10,106},{132,0,476},{134,0,1552
+},{134,11,1729},{8,10,115},{8,10,350},{9,10,489},{10,10,128},{11,10,306},{12,10,
+373},{14,10,30},{17,10,79},{19,10,80},{150,10,55},{135,0,1807},{4,0,680},{4,11,
+60},{7,11,760},{7,11,1800},{8,11,314},{9,11,700},{139,11,487},{4,10,230},{5,10,
+702},{148,11,94},{132,11,228},{139,0,435},{9,0,20},{10,0,324},{10,0,807},{139,0,
+488},{6,10,1728},{136,11,419},{4,10,484},{18,10,26},{19,10,42},{20,10,43},{21,10
+,0},{23,10,27},{152,10,14},{135,0,1431},{133,11,828},{5,0,112},{6,0,103},{6,0,
+150},{7,0,1303},{9,0,292},{10,0,481},{20,0,13},{7,11,176},{7,11,178},{7,11,1110}
+,{10,11,481},{148,11,13},{138,0,356},{4,11,51},{5,11,39},{6,11,4},{7,11,591},{7,
+11,849},{7,11,951},{7,11,1129},{7,11,1613},{7,11,1760},{7,11,1988},{9,11,434},{
+10,11,754},{11,11,25},{11,11,37},{139,11,414},{6,0,1963},{134,0,2000},{132,10,
+633},{6,0,1244},{133,11,902},{135,11,928},{140,0,18},{138,0,204},{135,11,1173},{
+134,0,867},{4,0,708},{8,0,15},{9,0,50},{9,0,386},{11,0,18},{11,0,529},{140,0,228
+},{134,11,270},{4,0,563},{7,0,109},{7,0,592},{7,0,637},{7,0,770},{8,0,463},{9,0,
+60},{9,0,335},{9,0,904},{10,0,73},{11,0,434},{12,0,585},{13,0,331},{18,0,110},{
+148,0,60},{132,0,502},{14,11,359},{19,11,52},{148,11,47},{6,11,377},{7,11,1025},
+{9,11,613},{145,11,104},{6,0,347},{10,0,161},{5,10,70},{5,10,622},{6,10,334},{7,
+10,1032},{9,10,171},{11,10,26},{11,10,213},{11,10,637},{11,10,707},{12,10,202},{
+12,10,380},{13,10,226},{13,10,355},{14,10,222},{145,10,42},{132,11,416},{4,0,33}
+,{5,0,102},{6,0,284},{7,0,1079},{7,0,1423},{7,0,1702},{8,0,470},{9,0,554},{9,0,
+723},{11,0,333},{142,11,372},{5,11,152},{5,11,197},{7,11,340},{7,11,867},{10,11,
+548},{10,11,581},{11,11,6},{12,11,3},{12,11,19},{14,11,110},{142,11,289},{7,0,
+246},{135,0,840},{6,0,10},{8,0,571},{9,0,739},{143,0,91},{6,0,465},{7,0,1465},{4
+,10,23},{4,10,141},{5,10,313},{5,10,1014},{6,10,50},{7,10,142},{7,10,559},{8,10,
+640},{9,10,460},{9,10,783},{11,10,741},{12,10,183},{141,10,488},{133,0,626},{136
+,0,614},{138,0,237},{7,11,34},{7,11,190},{8,11,28},{8,11,141},{8,11,444},{8,11,
+811},{9,11,468},{11,11,334},{12,11,24},{12,11,386},{140,11,576},{133,11,757},{5,
+0,18},{6,0,526},{13,0,24},{13,0,110},{19,0,5},{147,0,44},{6,0,506},{134,11,506},
+{135,11,1553},{4,0,309},{5,0,462},{7,0,970},{7,0,1097},{22,0,30},{22,0,33},{7,11
+,1385},{11,11,582},{11,11,650},{11,11,901},{11,11,949},{12,11,232},{12,11,236},{
+13,11,413},{13,11,501},{146,11,116},{9,0,140},{5,10,222},{138,10,534},{6,0,1056}
+,{137,10,906},{134,0,1704},{138,10,503},{134,0,1036},{5,10,154},{7,10,1491},{10,
+10,379},{138,10,485},{4,11,383},{133,10,716},{134,0,1315},{5,0,86},{7,0,743},{9,
+0,85},{10,0,281},{10,0,432},{11,0,825},{12,0,251},{13,0,118},{142,0,378},{8,0,
+264},{4,10,91},{5,10,388},{5,10,845},{6,10,206},{6,10,252},{6,10,365},{7,10,136}
+,{7,10,531},{136,10,621},{5,0,524},{133,0,744},{5,11,277},{141,11,247},{132,11,
+435},{10,0,107},{140,0,436},{132,0,927},{10,0,123},{12,0,670},{146,0,94},{7,0,
+1149},{9,0,156},{138,0,957},{5,11,265},{6,11,212},{135,11,28},{133,0,778},{133,0
+,502},{8,0,196},{10,0,283},{139,0,406},{135,10,576},{136,11,535},{134,0,1312},{5
+,10,771},{5,10,863},{5,10,898},{6,10,1632},{6,10,1644},{134,10,1780},{5,0,855},{
+5,10,331},{135,11,1487},{132,11,702},{5,11,808},{135,11,2045},{7,0,1400},{9,0,
+446},{138,0,45},{140,10,632},{132,0,1003},{5,11,166},{8,11,739},{140,11,511},{5,
+10,107},{7,10,201},{136,10,518},{6,10,446},{135,10,1817},{134,0,1532},{134,0,
+1097},{4,11,119},{5,11,170},{5,11,447},{7,11,1708},{7,11,1889},{9,11,357},{9,11,
+719},{12,11,486},{140,11,596},{9,10,851},{141,10,510},{7,0,612},{8,0,545},{8,0,
+568},{8,0,642},{9,0,717},{10,0,541},{10,0,763},{11,0,449},{12,0,489},{13,0,153},
+{13,0,296},{14,0,138},{14,0,392},{15,0,50},{16,0,6},{16,0,12},{20,0,9},{132,10,
+504},{4,11,450},{135,11,1158},{11,0,54},{13,0,173},{13,0,294},{5,10,883},{5,10,
+975},{8,10,392},{148,10,7},{13,0,455},{15,0,99},{15,0,129},{144,0,68},{135,0,172
+},{132,11,754},{5,10,922},{134,10,1707},{134,0,1029},{17,11,39},{148,11,36},{4,0
+,568},{5,10,993},{7,10,515},{137,10,91},{132,0,732},{10,0,617},{138,11,617},{134
+,0,974},{7,0,989},{10,0,377},{12,0,363},{13,0,68},{13,0,94},{14,0,108},{142,0,
+306},{136,0,733},{132,0,428},{7,0,1789},{135,11,1062},{7,0,2015},{140,0,665},{
+135,10,1433},{5,0,287},{7,10,921},{8,10,580},{8,10,593},{8,10,630},{138,10,28},{
+138,0,806},{4,10,911},{5,10,867},{5,10,1013},{7,10,2034},{8,10,798},{136,10,813}
+,{134,0,1539},{8,11,523},{150,11,34},{135,11,740},{7,11,238},{7,11,2033},{8,11,
+120},{8,11,188},{8,11,659},{9,11,598},{10,11,466},{12,11,342},{12,11,588},{13,11
+,503},{14,11,246},{143,11,92},{7,0,1563},{141,0,182},{5,10,135},{6,10,519},{7,10
+,1722},{10,10,271},{11,10,261},{145,10,54},{14,10,338},{148,10,81},{7,0,484},{4,
+10,300},{133,10,436},{145,11,114},{6,0,1623},{134,0,1681},{133,11,640},{4,11,201
+},{7,11,1744},{8,11,602},{11,11,247},{11,11,826},{145,11,65},{8,11,164},{146,11,
+62},{6,0,1833},{6,0,1861},{136,0,878},{134,0,1569},{8,10,357},{10,10,745},{14,10
+,426},{17,10,94},{147,10,57},{12,0,93},{12,0,501},{13,0,362},{14,0,151},{15,0,40
+},{15,0,59},{16,0,46},{17,0,25},{18,0,14},{18,0,134},{19,0,25},{19,0,69},{20,0,
+16},{20,0,19},{20,0,66},{21,0,23},{21,0,25},{150,0,42},{6,0,1748},{8,0,715},{9,0
+,802},{10,0,46},{10,0,819},{13,0,308},{14,0,351},{14,0,363},{146,0,67},{132,0,
+994},{4,0,63},{133,0,347},{132,0,591},{133,0,749},{7,11,1577},{10,11,304},{10,11
+,549},{11,11,424},{12,11,365},{13,11,220},{13,11,240},{142,11,33},{133,0,366},{7
+,0,557},{12,0,547},{14,0,86},{133,10,387},{135,0,1747},{132,11,907},{5,11,100},{
+10,11,329},{12,11,416},{149,11,29},{4,10,6},{5,10,708},{136,10,75},{7,10,1351},{
+9,10,581},{10,10,639},{11,10,453},{140,10,584},{7,0,89},{132,10,303},{138,10,772
+},{132,11,176},{5,11,636},{5,11,998},{8,11,26},{137,11,358},{7,11,9},{7,11,1508}
+,{9,11,317},{10,11,210},{10,11,292},{10,11,533},{11,11,555},{12,11,526},{12,11,
+607},{13,11,263},{13,11,459},{142,11,271},{134,0,1463},{6,0,772},{6,0,1137},{139
+,11,595},{7,0,977},{139,11,66},{138,0,893},{20,0,48},{148,11,48},{5,0,824},{133,
+0,941},{134,11,295},{7,0,1543},{7,0,1785},{10,0,690},{4,10,106},{139,10,717},{7,
+0,440},{8,0,230},{139,0,106},{5,10,890},{133,10,988},{6,10,626},{142,10,431},{10
+,11,127},{141,11,27},{17,0,32},{10,10,706},{150,10,44},{132,0,216},{137,0,332},{
+4,10,698},{136,11,119},{139,11,267},{138,10,17},{11,11,526},{11,11,939},{141,11,
+290},{7,11,1167},{11,11,934},{13,11,391},{145,11,76},{139,11,39},{134,10,84},{4,
+0,914},{5,0,800},{133,0,852},{10,0,416},{141,0,115},{7,0,564},{142,0,168},{4,0,
+918},{133,0,876},{134,0,1764},{152,0,3},{4,0,92},{5,0,274},{7,11,126},{136,11,84
+},{140,10,498},{136,11,790},{8,0,501},{5,10,986},{6,10,130},{7,10,1582},{8,10,
+458},{10,10,101},{10,10,318},{138,10,823},{6,11,64},{12,11,377},{141,11,309},{5,
+0,743},{138,0,851},{4,0,49},{7,0,280},{135,0,1633},{134,0,879},{136,0,47},{7,10,
+1644},{137,10,129},{132,0,865},{134,0,1202},{9,11,34},{139,11,484},{135,10,997},
+{5,0,272},{5,0,908},{5,0,942},{8,0,197},{9,0,47},{11,0,538},{139,0,742},{6,11,
+1700},{7,11,26},{7,11,293},{7,11,382},{7,11,1026},{7,11,1087},{7,11,2027},{8,11,
+24},{8,11,114},{8,11,252},{8,11,727},{8,11,729},{9,11,30},{9,11,199},{9,11,231},
+{9,11,251},{9,11,334},{9,11,361},{9,11,488},{9,11,712},{10,11,55},{10,11,60},{10
+,11,232},{10,11,332},{10,11,384},{10,11,396},{10,11,504},{10,11,542},{10,11,652}
+,{11,11,20},{11,11,48},{11,11,207},{11,11,291},{11,11,298},{11,11,342},{11,11,
+365},{11,11,394},{11,11,620},{11,11,705},{11,11,1017},{12,11,123},{12,11,340},{
+12,11,406},{12,11,643},{13,11,61},{13,11,269},{13,11,311},{13,11,319},{13,11,486
+},{14,11,234},{15,11,62},{15,11,85},{16,11,71},{18,11,119},{148,11,105},{6,0,
+1455},{150,11,37},{135,10,1927},{135,0,1911},{137,0,891},{7,10,1756},{137,10,98}
+,{7,10,1046},{139,10,160},{132,0,761},{6,11,379},{7,11,270},{7,11,1116},{8,11,
+176},{8,11,183},{9,11,432},{9,11,661},{12,11,247},{12,11,617},{146,11,125},{6,10
+,45},{7,10,433},{8,10,129},{9,10,21},{10,10,392},{11,10,79},{12,10,499},{13,10,
+199},{141,10,451},{4,0,407},{5,11,792},{133,11,900},{132,0,560},{135,0,183},{13,
+0,490},{7,10,558},{136,10,353},{4,0,475},{6,0,731},{11,0,35},{13,0,71},{13,0,177
+},{14,0,422},{133,10,785},{8,10,81},{9,10,189},{9,10,201},{11,10,478},{11,10,712
+},{141,10,338},{4,0,418},{4,0,819},{133,10,353},{151,10,26},{4,11,901},{133,11,
+776},{132,0,575},{7,0,818},{16,0,92},{17,0,14},{17,0,45},{18,0,75},{148,0,18},{6
+,0,222},{7,0,636},{7,0,1620},{8,0,409},{9,0,693},{139,0,77},{6,10,25},{7,10,855}
+,{7,10,1258},{144,10,32},{6,0,1880},{6,0,1887},{6,0,1918},{6,0,1924},{9,0,967},{
+9,0,995},{9,0,1015},{12,0,826},{12,0,849},{12,0,857},{12,0,860},{12,0,886},{12,0
+,932},{18,0,228},{18,0,231},{146,0,240},{134,0,633},{134,0,1308},{4,11,37},{5,11
+,334},{135,11,1253},{10,0,86},{4,10,4},{7,10,1118},{7,10,1320},{7,10,1706},{8,10
+,277},{9,10,622},{11,10,724},{12,10,350},{12,10,397},{13,10,28},{13,10,159},{15,
+10,89},{18,10,5},{19,10,9},{20,10,34},{150,10,47},{132,11,508},{137,11,448},{12,
+11,107},{146,11,31},{132,0,817},{134,0,663},{133,0,882},{134,0,914},{132,11,540}
+,{132,11,533},{136,11,608},{8,0,885},{138,0,865},{132,0,426},{6,0,58},{7,0,745},
+{7,0,1969},{8,0,399},{8,0,675},{9,0,479},{9,0,731},{10,0,330},{10,0,593},{10,0,
+817},{11,0,32},{11,0,133},{11,0,221},{145,0,68},{134,10,255},{7,0,102},{137,0,
+538},{137,10,216},{7,11,253},{136,11,549},{135,11,912},{9,10,183},{139,10,286},{
+11,10,956},{151,10,3},{8,11,527},{18,11,60},{147,11,24},{4,10,536},{7,10,1141},{
+10,10,723},{139,10,371},{133,11,920},{7,0,876},{135,10,285},{135,10,560},{132,10
+,690},{142,11,126},{11,10,33},{12,10,571},{149,10,1},{133,0,566},{9,0,139},{10,0
+,399},{11,0,469},{12,0,634},{13,0,223},{132,11,483},{6,0,48},{135,0,63},{18,0,12
+},{7,10,1862},{12,10,491},{12,10,520},{13,10,383},{142,10,244},{135,11,1665},{
+132,11,448},{9,11,495},{146,11,104},{6,0,114},{7,0,1224},{7,0,1556},{136,0,3},{4
+,10,190},{133,10,554},{8,0,576},{9,0,267},{133,10,1001},{133,10,446},{133,0,933}
+,{139,11,1009},{8,11,653},{13,11,93},{147,11,14},{6,0,692},{6,0,821},{134,0,1077
+},{5,11,172},{135,11,801},{138,0,752},{4,0,375},{134,0,638},{134,0,1011},{140,11
+,540},{9,0,96},{133,11,260},{139,11,587},{135,10,1231},{12,0,30},{13,0,148},{14,
+0,87},{14,0,182},{16,0,42},{20,0,70},{132,10,304},{6,0,1398},{7,0,56},{7,0,1989}
+,{8,0,337},{8,0,738},{9,0,600},{12,0,37},{13,0,447},{142,0,92},{138,0,666},{5,0,
+394},{7,0,487},{136,0,246},{9,0,437},{6,10,53},{6,10,199},{7,10,1408},{8,10,32},
+{8,10,93},{10,10,397},{10,10,629},{11,10,593},{11,10,763},{13,10,326},{145,10,35
+},{134,10,105},{9,0,320},{10,0,506},{138,10,794},{7,11,57},{8,11,167},{8,11,375}
+,{9,11,82},{9,11,561},{10,11,620},{10,11,770},{11,10,704},{141,10,396},{6,0,1003
+},{5,10,114},{5,10,255},{141,10,285},{7,0,866},{135,0,1163},{133,11,531},{132,0,
+328},{7,10,2035},{8,10,19},{9,10,89},{138,10,831},{8,11,194},{136,11,756},{136,0
+,1000},{5,11,453},{134,11,441},{4,0,101},{5,0,833},{7,0,1171},{136,0,744},{133,0
+,726},{136,10,746},{138,0,176},{6,0,9},{6,0,397},{7,0,53},{7,0,1742},{10,0,632},
+{11,0,828},{140,0,146},{135,11,22},{145,11,64},{132,0,839},{11,0,417},{12,0,223}
+,{140,0,265},{4,11,102},{7,11,815},{7,11,1699},{139,11,964},{5,10,955},{136,10,
+814},{6,0,1931},{6,0,2007},{18,0,246},{146,0,247},{8,0,198},{11,0,29},{140,0,534
+},{135,0,1771},{6,0,846},{7,11,1010},{11,11,733},{11,11,759},{12,11,563},{13,11,
+34},{14,11,101},{18,11,45},{146,11,129},{4,0,186},{5,0,157},{8,0,168},{138,0,6},
+{132,11,899},{133,10,56},{148,10,100},{133,0,875},{5,0,773},{5,0,991},{6,0,1635}
+,{134,0,1788},{6,0,1274},{9,0,477},{141,0,78},{4,0,639},{7,0,111},{8,0,581},{12,
+0,177},{6,11,52},{9,11,104},{9,11,559},{10,10,4},{10,10,13},{11,10,638},{12,11,
+308},{19,11,87},{148,10,57},{132,11,604},{4,11,301},{133,10,738},{133,10,758},{
+134,0,1747},{7,11,1440},{11,11,854},{11,11,872},{11,11,921},{12,11,551},{13,11,
+472},{142,11,367},{7,0,1364},{7,0,1907},{141,0,158},{134,0,873},{4,0,404},{4,0,
+659},{7,0,552},{135,0,675},{135,10,1112},{139,10,328},{7,11,508},{137,10,133},{
+133,0,391},{5,10,110},{6,10,169},{6,10,1702},{7,10,400},{8,10,538},{9,10,184},{9
+,10,524},{140,10,218},{6,11,310},{7,11,1849},{8,11,72},{8,11,272},{8,11,431},{9,
+11,12},{9,11,351},{10,11,563},{10,11,630},{10,11,810},{11,11,367},{11,11,599},{
+11,11,686},{140,11,672},{5,0,540},{6,0,1697},{136,0,668},{132,0,883},{134,0,78},
+{12,0,628},{18,0,79},{6,10,133},{9,10,353},{139,10,993},{6,11,181},{7,11,537},{8
+,11,64},{9,11,127},{10,11,496},{12,11,510},{141,11,384},{6,10,93},{7,10,1422},{7
+,10,1851},{8,10,673},{9,10,529},{140,10,43},{137,10,371},{134,0,1460},{134,0,962
+},{4,11,244},{135,11,233},{9,10,25},{10,10,467},{138,10,559},{4,10,335},{135,10,
+942},{133,0,460},{135,11,334},{134,11,1650},{4,0,199},{139,0,34},{5,10,601},{8,
+10,39},{10,10,773},{11,10,84},{12,10,205},{142,10,1},{133,10,870},{134,0,388},{
+14,0,474},{148,0,120},{133,11,369},{139,0,271},{4,0,511},{9,0,333},{9,0,379},{10
+,0,602},{11,0,441},{11,0,723},{11,0,976},{12,0,357},{132,10,181},{134,0,608},{
+134,10,1652},{22,0,49},{137,11,338},{140,0,988},{134,0,617},{5,0,938},{136,0,707
+},{132,10,97},{5,10,147},{6,10,286},{7,10,1362},{141,10,176},{6,0,756},{134,0,
+1149},{133,11,896},{6,10,375},{7,10,169},{7,10,254},{136,10,780},{134,0,1583},{
+135,10,1447},{139,0,285},{7,11,1117},{8,11,393},{136,11,539},{135,0,344},{6,0,
+469},{7,0,1709},{138,0,515},{5,10,629},{135,10,1549},{5,11,4},{5,11,810},{6,11,
+13},{6,11,538},{6,11,1690},{6,11,1726},{7,11,499},{7,11,1819},{8,11,148},{8,11,
+696},{8,11,791},{12,11,125},{13,11,54},{143,11,9},{135,11,1268},{137,0,404},{132
+,0,500},{5,0,68},{134,0,383},{11,0,216},{139,0,340},{4,11,925},{5,11,803},{8,11,
+698},{138,11,828},{4,0,337},{6,0,353},{7,0,1934},{8,0,488},{137,0,429},{7,0,236}
+,{7,0,1795},{8,0,259},{9,0,135},{9,0,177},{9,0,860},{10,0,825},{11,0,115},{11,0,
+370},{11,0,405},{11,0,604},{12,0,10},{12,0,667},{12,0,669},{13,0,76},{14,0,310},
+{15,0,76},{15,0,147},{148,0,23},{4,0,15},{4,0,490},{5,0,22},{6,0,244},{7,0,40},{
+7,0,200},{7,0,906},{7,0,1199},{9,0,616},{10,0,716},{11,0,635},{11,0,801},{140,0,
+458},{12,0,756},{132,10,420},{134,0,1504},{6,0,757},{133,11,383},{6,0,1266},{135
+,0,1735},{5,0,598},{7,0,791},{8,0,108},{9,0,123},{7,10,1570},{140,10,542},{142,
+11,410},{9,11,660},{138,11,347}
+};
+/* GENERATED CODE END */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.c b/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.c
new file mode 100644
index 0000000000..65ec3f5c8d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.c
@@ -0,0 +1,85 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#include "utf8_util.h"
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t BrotliParseAsUTF8(
+    int* symbol, const uint8_t* input, size_t size) {
+  /* ASCII */
+  if ((input[0] & 0x80) == 0) {
+    *symbol = input[0];
+    if (*symbol > 0) {
+      return 1;
+    }
+  }
+  /* 2-byte UTF8 */
+  if (size > 1u &&
+      (input[0] & 0xE0) == 0xC0 &&
+      (input[1] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x1F) << 6) |
+               (input[1] & 0x3F));
+    if (*symbol > 0x7F) {
+      return 2;
+    }
+  }
+  /* 3-byte UFT8 */
+  if (size > 2u &&
+      (input[0] & 0xF0) == 0xE0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x0F) << 12) |
+               ((input[1] & 0x3F) << 6) |
+               (input[2] & 0x3F));
+    if (*symbol > 0x7FF) {
+      return 3;
+    }
+  }
+  /* 4-byte UFT8 */
+  if (size > 3u &&
+      (input[0] & 0xF8) == 0xF0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80 &&
+      (input[3] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x07) << 18) |
+               ((input[1] & 0x3F) << 12) |
+               ((input[2] & 0x3F) << 6) |
+               (input[3] & 0x3F));
+    if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
+      return 4;
+    }
+  }
+  /* Not UTF8, emit a special symbol above the UTF8-code space */
+  *symbol = 0x110000 | input[0];
+  return 1;
+}
+
+/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
+BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction) {
+  size_t size_utf8 = 0;
+  size_t i = 0;
+  while (i < length) {
+    int symbol;
+    size_t bytes_read =
+        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
+    i += bytes_read;
+    if (symbol < 0x110000) size_utf8 += bytes_read;
+  }
+  return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.h
new file mode 100644
index 0000000000..a38a95383c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/utf8_util.h
@@ -0,0 +1,33 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#ifndef BROTLI_ENC_UTF8_UTIL_H_
+#define BROTLI_ENC_UTF8_UTIL_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const double kMinUTF8Ratio = 0.75;
+
+/* Returns 1 if at least min_fraction of the bytes between pos and
+   pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
+   returns 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_UTF8_UTIL_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/enc/write_bits.h b/third-party/libjxl/libjxl/third_party/brotli/c/enc/write_bits.h
new file mode 100644
index 0000000000..242754b0ee
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/enc/write_bits.h
@@ -0,0 +1,88 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Write bits into a byte array. */
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <brotli/types.h>
+
+#include "../common/platform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* This function writes bits into bytes in increasing addresses, and within
+   a byte least-significant-bit first.
+
+   The function can write up to 56 bits in one go with WriteBits
+   Example: let's assume that 3 bits (Rs below) have been written already:
+
+   BYTE-0     BYTE+1       BYTE+2
+
+   0000 0RRR    0000 0000    0000 0000
+
+   Now, we could write 5 or less bits in MSB by just shifting by 3
+   and OR'ing to BYTE-0.
+
+   For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+   and locate the rest in BYTE+1, BYTE+2, etc. */
+static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
+                                          uint64_t bits,
+                                          size_t* BROTLI_RESTRICT pos,
+                                          uint8_t* BROTLI_RESTRICT array) {
+  BROTLI_LOG(("WriteBits  %2d  0x%08x%08x  %10d\n", (int)n_bits,
+      (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
+      (int)*pos));
+  BROTLI_DCHECK((bits >> n_bits) == 0);
+  BROTLI_DCHECK(n_bits <= 56);
+#if defined(BROTLI_LITTLE_ENDIAN)
+  /* This branch of the code can write up to 56 bits at a time,
+     7 bits are lost by being perhaps already in *p and at least
+     1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+     bits are in *p and we write 57 bits, then the next write will
+     access a byte that was never initialized). */
+  {
+    uint8_t* p = &array[*pos >> 3];
+    uint64_t v = (uint64_t)(*p);  /* Zero-extend 8 to 64 bits. */
+    v |= bits << (*pos & 7);
+    BROTLI_UNALIGNED_STORE64LE(p, v);  /* Set some bits. */
+    *pos += n_bits;
+  }
+#else
+  /* implicit & 0xFF is assumed for uint8_t arithmetics */
+  {
+    uint8_t* array_pos = &array[*pos >> 3];
+    const size_t bits_reserved_in_first_byte = (*pos & 7);
+    size_t bits_left_to_write;
+    bits <<= bits_reserved_in_first_byte;
+    *array_pos++ |= (uint8_t)bits;
+    for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+         bits_left_to_write >= 9;
+         bits_left_to_write -= 8) {
+      bits >>= 8;
+      *array_pos++ = (uint8_t)bits;
+    }
+    *array_pos = 0;
+    *pos += n_bits;
+  }
+#endif
+}
+
+static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
+    size_t pos, uint8_t* array) {
+  BROTLI_LOG(("WriteBitsPrepareStorage            %10d\n", (int)pos));
+  BROTLI_DCHECK((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_WRITE_BITS_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/decode_fuzzer.c b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/decode_fuzzer.c
new file mode 100644
index 0000000000..46144e07eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/decode_fuzzer.c
@@ -0,0 +1,58 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <brotli/decode.h>
+
+// Entry point for LibFuzzer.
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  size_t addend = 0;
+  if (size > 0)
+    addend = data[size - 1] & 7;
+  const uint8_t* next_in = data;
+
+  const int kBufferSize = 1024;
+  uint8_t* buffer = (uint8_t*) malloc(kBufferSize);
+  if (!buffer) {
+    // OOM is out-of-scope here.
+    return 0;
+  }
+  /* The biggest "magic number" in brotli is 16MiB - 16, so no need to check
+     the cases with much longer output. */
+  const size_t total_out_limit = (addend == 0) ? (1 << 26) : (1 << 24);
+  size_t total_out = 0;
+
+  BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0);
+
+  if (addend == 0)
+    addend = size;
+  /* Test both fast (addend == size) and slow (addend <= 7) decoding paths. */
+  for (size_t i = 0; i < size;) {
+    size_t next_i = i + addend;
+    if (next_i > size)
+      next_i = size;
+    size_t avail_in = next_i - i;
+    i = next_i;
+    BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+    while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      size_t avail_out = kBufferSize;
+      uint8_t* next_out = buffer;
+      result = BrotliDecoderDecompressStream(
+          state, &avail_in, &next_in, &avail_out, &next_out, &total_out);
+      if (total_out > total_out_limit)
+        break;
+    }
+    if (total_out > total_out_limit)
+      break;
+    if (result != BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
+      break;
+  }
+
+  BrotliDecoderDestroyInstance(state);
+  free(buffer);
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/run_decode_fuzzer.c b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/run_decode_fuzzer.c
new file mode 100644
index 0000000000..c84f98a32b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/run_decode_fuzzer.c
@@ -0,0 +1,44 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Simple runner for decode_fuzzer.cc */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+void LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char* *argv) {
+  if (argc != 2) {
+    fprintf(stderr, "Exactly one argument is expected.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  FILE* f = fopen(argv[1], "r");
+  if (!f) {
+    fprintf(stderr, "Failed to open input file.");
+    exit(EXIT_FAILURE);
+  }
+
+  size_t max_len = 1 << 20;
+  unsigned char* tmp = (unsigned char*)malloc(max_len);
+  size_t len = fread(tmp, 1, max_len, f);
+  if (ferror(f)) {
+    fclose(f);
+    fprintf(stderr, "Failed read input file.");
+    exit(EXIT_FAILURE);
+  }
+  /* Make data after the end "inaccessible". */
+  unsigned char* data = (unsigned char*)malloc(len);
+  memcpy(data, tmp, len);
+  free(tmp);
+
+  LLVMFuzzerTestOneInput(data, len);
+  free(data);
+  exit(EXIT_SUCCESS);
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/test_fuzzer.sh b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/test_fuzzer.sh
new file mode 100755
index 0000000000..4b99947268
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/fuzz/test_fuzzer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -e
+
+export CC=${CC:-cc}
+
+BROTLI="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+SRC=$BROTLI/c
+
+cd $BROTLI
+
+rm -rf bin
+mkdir bin
+cd bin
+
+cmake $BROTLI -DCMAKE_C_COMPILER="$CC" \
+    -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DENABLE_SANITIZER=address
+make -j$(nproc) brotlidec
+
+${CC} -o run_decode_fuzzer -std=c99 -fsanitize=address -I$SRC/include \
+    $SRC/fuzz/decode_fuzzer.c $SRC/fuzz/run_decode_fuzzer.c \
+    ./libbrotlidec.a ./libbrotlicommon.a
+
+mkdir decode_corpora
+unzip $BROTLI/java/org/brotli/integration/fuzz_data.zip -d decode_corpora
+
+for f in `ls decode_corpora`
+do
+ echo "Testing $f"
+ ./run_decode_fuzzer decode_corpora/$f
+done
+
+cd $BROTLI
+rm -rf bin
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/decode.h b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/decode.h
new file mode 100644
index 0000000000..9b580d22a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/decode.h
@@ -0,0 +1,368 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli decompression.
+ */
+
+#ifndef BROTLI_DEC_DECODE_H_
+#define BROTLI_DEC_DECODE_H_
+
+#include <brotli/port.h>
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Opaque structure that holds decoder state.
+ *
+ * Allocated and initialized with ::BrotliDecoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliDecoderDestroyInstance.
+ */
+typedef struct BrotliDecoderStateStruct BrotliDecoderState;
+
+/**
+ * Result type for ::BrotliDecoderDecompress and
+ * ::BrotliDecoderDecompressStream functions.
+ */
+typedef enum {
+  /** Decoding error, e.g. corrupted input or memory allocation problem. */
+  BROTLI_DECODER_RESULT_ERROR = 0,
+  /** Decoding successfully completed. */
+  BROTLI_DECODER_RESULT_SUCCESS = 1,
+  /** Partially done; should be called again with more input. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2,
+  /** Partially done; should be called again with more output. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3
+} BrotliDecoderResult;
+
+/**
+ * Template that evaluates items of ::BrotliDecoderErrorCode.
+ *
+ * Example: @code {.cpp}
+ * // Log Brotli error code.
+ * switch (brotliDecoderErrorCode) {
+ * #define CASE_(PREFIX, NAME, CODE) \
+ *   case BROTLI_DECODER ## PREFIX ## NAME: \
+ *     LOG(INFO) << "error code:" << #NAME; \
+ *     break;
+ * #define NEWLINE_
+ * BROTLI_DECODER_ERROR_CODES_LIST(CASE_, NEWLINE_)
+ * #undef CASE_
+ * #undef NEWLINE_
+ *   default: LOG(FATAL) << "unknown brotli error code";
+ * }
+ * @endcode
+ */
+#define BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE, SEPARATOR)      \
+  BROTLI_ERROR_CODE(_, NO_ERROR, 0) SEPARATOR                              \
+  /* Same as BrotliDecoderResult values */                                 \
+  BROTLI_ERROR_CODE(_, SUCCESS, 1) SEPARATOR                               \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_INPUT, 2) SEPARATOR                      \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_OUTPUT, 3) SEPARATOR                     \
+                                                                           \
+  /* Errors caused by invalid input */                                     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_NIBBLE, -1) SEPARATOR        \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, RESERVED, -2) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_META_NIBBLE, -3) SEPARATOR   \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_ALPHABET, -4) SEPARATOR \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_SAME, -5) SEPARATOR     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CL_SPACE, -6) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, HUFFMAN_SPACE, -7) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CONTEXT_MAP_REPEAT, -8) SEPARATOR      \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_1, -9) SEPARATOR          \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_2, -10) SEPARATOR         \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, TRANSFORM, -11) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DICTIONARY, -12) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR               \
+                                                                           \
+  /* -17 code is reserved */                                               \
+                                                                           \
+  BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR             \
+                                                                           \
+  /* Memory allocation problems */                                         \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MODES, -21) SEPARATOR           \
+  /* Literal, insert and distance trees together */                        \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, TREE_GROUPS, -22) SEPARATOR             \
+  /* -23..-24 codes are reserved for distinct tree groups */               \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MAP, -25) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_1, -26) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_2, -27) SEPARATOR           \
+  /* -28..-29 codes are reserved for dynamic ring-buffer allocation */     \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, BLOCK_TYPE_TREES, -30) SEPARATOR        \
+                                                                           \
+  /* "Impossible" states */                                                \
+  BROTLI_ERROR_CODE(_ERROR_, UNREACHABLE, -31)
+
+/**
+ * Error code for detailed logging / production debugging.
+ *
+ * See ::BrotliDecoderGetErrorCode and ::BROTLI_LAST_ERROR_CODE.
+ */
+typedef enum {
+#define BROTLI_COMMA_ ,
+#define BROTLI_ERROR_CODE_ENUM_ITEM_(PREFIX, NAME, CODE) \
+    BROTLI_DECODER ## PREFIX ## NAME = CODE
+  BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_ENUM_ITEM_, BROTLI_COMMA_)
+} BrotliDecoderErrorCode;
+#undef BROTLI_ERROR_CODE_ENUM_ITEM_
+#undef BROTLI_COMMA_
+
+/**
+ * The value of the last error code, negative integer.
+ *
+ * All other error code values are in the range from ::BROTLI_LAST_ERROR_CODE
+ * to @c -1. There are also 4 other possible non-error codes @c 0 .. @c 3 in
+ * ::BrotliDecoderErrorCode enumeration.
+ */
+#define BROTLI_LAST_ERROR_CODE BROTLI_DECODER_ERROR_UNREACHABLE
+
+/** Options to be used with ::BrotliDecoderSetParameter. */
+typedef enum BrotliDecoderParameter {
+  /**
+   * Disable "canny" ring buffer allocation strategy.
+   *
+   * Ring buffer is allocated according to window size, despite the real size of
+   * the content.
+   */
+  BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_DECODER_PARAM_LARGE_WINDOW = 1
+} BrotliDecoderParameter;
+
+/**
+ * Sets the specified parameter to the given decoder instance.
+ *
+ * @param state decoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_TRUE if value is accepted
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter param, uint32_t value);
+
+/**
+ * Adds LZ77 prefix dictionary, adds or replaces built-in static dictionary and
+ * transforms.
+ *
+ * Attached dictionary ownership is not transferred.
+ * Data provided to this method should be kept accessible until
+ * decoding is finished and decoder instance is destroyed.
+ *
+ * @note Dictionaries can NOT be attached after actual decoding is started.
+ *
+ * @param state decoder instance
+ * @param type dictionary data format
+ * @param data_size length of memory region pointed by @p data
+ * @param data dictionary data in format corresponding to @p type
+ * @returns ::BROTLI_FALSE if dictionary is corrupted,
+ *          or dictionary count limit is reached
+ * @returns ::BROTLI_TRUE if dictionary is accepted / attached
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderAttachDictionary(
+    BrotliDecoderState* state, BrotliSharedDictionaryType type,
+    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
+
+/**
+ * Creates an instance of ::BrotliDecoderState and initializes it.
+ *
+ * The instance can be used once for decoding and should then be destroyed with
+ * ::BrotliDecoderDestroyInstance, it cannot be reused for a new decoding
+ * session.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliDecoderState otherwise
+ */
+BROTLI_DEC_API BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliDecoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_DEC_API void BrotliDecoderDestroyInstance(BrotliDecoderState* state);
+
+/**
+ * Performs one-shot memory-to-memory decompression.
+ *
+ * Decompresses the data in @p encoded_buffer into @p decoded_buffer, and sets
+ * @p *decoded_size to the decompressed length.
+ *
+ * @param encoded_size size of @p encoded_buffer
+ * @param encoded_buffer compressed data buffer with at least @p encoded_size
+ *        addressable bytes
+ * @param[in, out] decoded_size @b in: size of @p decoded_buffer; \n
+ *                 @b out: length of decompressed data written to
+ *                 @p decoded_buffer
+ * @param decoded_buffer decompressed data destination buffer
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, or @p decoded_buffer is not large enough;
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS otherwise
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size,
+    const uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(encoded_size)],
+    size_t* decoded_size,
+    uint8_t decoded_buffer[BROTLI_ARRAY_PARAM(*decoded_size)]);
+
+/**
+ * Decompresses the input stream to the output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes decompressed since the last @p state initialization.
+ *
+ * @note Input is never overconsumed, so @p next_in and @p available_in could be
+ * passed to the next consumer after decoding is complete.
+ *
+ * @param state decoder instance
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next compressed byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes decompressed so far; can be @c NULL
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, arguments were invalid, etc.;
+ *          use ::BrotliDecoderGetErrorCode to get detailed error code
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT decoding is blocked until
+ *          more input data is provided
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT decoding is blocked until
+ *          more output space is provided
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS decoding is finished, no more
+ *          input might be consumed and no more output will be produced
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompressStream(
+  BrotliDecoderState* state, size_t* available_in, const uint8_t** next_in,
+  size_t* available_out, uint8_t** next_out, size_t* total_out);
+
+/**
+ * Checks if decoder has more output.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE, if decoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderHasMoreOutput(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliDecoderDecompressStream,
+ *     until ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT is reported
+ *  -# use ::BrotliDecoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliDecoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Decoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliDecoderTakeOutput,
+ *       immediate next call to ::BrotliDecoderTakeOutput may return more data.
+ *
+ * @param state decoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_DEC_API const uint8_t* BrotliDecoderTakeOutput(
+    BrotliDecoderState* state, size_t* size);
+
+/**
+ * Checks if instance has already consumed input.
+ *
+ * Instance that returns ::BROTLI_FALSE is considered "fresh" and could be
+ * reused.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder has already used some input bytes
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* state);
+
+/**
+ * Checks if decoder instance reached the final state.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsFinished(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires a detailed error code.
+ *
+ * Should be used only after ::BrotliDecoderDecompressStream returns
+ * ::BROTLI_DECODER_RESULT_ERROR.
+ *
+ * See also ::BrotliDecoderErrorString
+ *
+ * @param state decoder instance
+ * @returns last saved error code
+ */
+BROTLI_DEC_API BrotliDecoderErrorCode BrotliDecoderGetErrorCode(
+    const BrotliDecoderState* state);
+
+/**
+ * Converts error code to a c-string.
+ */
+BROTLI_DEC_API const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c);
+
+/**
+ * Gets a decoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_DEC_API uint32_t BrotliDecoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_DECODE_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/encode.h b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/encode.h
new file mode 100644
index 0000000000..7247d3d698
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/encode.h
@@ -0,0 +1,501 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli compression.
+ */
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <brotli/port.h>
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Minimal value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_MIN_WINDOW_BITS 10
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter.
+ *
+ * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
+ */
+#define BROTLI_MAX_WINDOW_BITS 24
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter
+ * in "Large Window Brotli" (32-bit).
+ */
+#define BROTLI_LARGE_MAX_WINDOW_BITS 30
+/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MIN_INPUT_BLOCK_BITS 16
+/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MAX_INPUT_BLOCK_BITS 24
+/** Minimal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MIN_QUALITY 0
+/** Maximal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MAX_QUALITY 11
+
+/** Options for ::BROTLI_PARAM_MODE parameter. */
+typedef enum BrotliEncoderMode {
+  /**
+   * Default compression mode.
+   *
+   * In this mode compressor does not know anything in advance about the
+   * properties of the input.
+   */
+  BROTLI_MODE_GENERIC = 0,
+  /** Compression mode for UTF-8 formatted text input. */
+  BROTLI_MODE_TEXT = 1,
+  /** Compression mode used in WOFF 2.0. */
+  BROTLI_MODE_FONT = 2
+} BrotliEncoderMode;
+
+/** Default value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_DEFAULT_QUALITY 11
+/** Default value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_DEFAULT_WINDOW 22
+/** Default value for ::BROTLI_PARAM_MODE parameter. */
+#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
+
+/** Operations that can be performed by streaming encoder. */
+typedef enum BrotliEncoderOperation {
+  /**
+   * Process input.
+   *
+   * Encoder may postpone producing output, until it has processed enough input.
+   */
+  BROTLI_OPERATION_PROCESS = 0,
+  /**
+   * Produce output for all processed input.
+   *
+   * Actual flush is performed when input stream is depleted and there is enough
+   * space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FLUSH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until flush is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * When flush is complete, output data will be sufficient for decoder to
+   * reproduce all the given input.
+   */
+  BROTLI_OPERATION_FLUSH = 1,
+  /**
+   * Finalize the stream.
+   *
+   * Actual finalization is performed when input stream is depleted and there is
+   * enough space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FINISH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until finalization is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * Helper function ::BrotliEncoderIsFinished checks if stream is finalized and
+   * output fully dumped.
+   *
+   * Adding more input data to finalized stream is impossible.
+   */
+  BROTLI_OPERATION_FINISH = 2,
+  /**
+   * Emit metadata block to stream.
+   *
+   * Metadata is opaque to Brotli: neither encoder, nor decoder processes this
+   * data or relies on it. It may be used to pass some extra information from
+   * encoder client to decoder client without interfering with main data stream.
+   *
+   * @note Encoder may emit empty metadata blocks internally, to pad encoded
+   *       stream to byte boundary.
+   *
+   * @warning Until emitting metadata is complete client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * @warning The whole content of input buffer is considered to be the content
+   *          of metadata block. Do @b NOT @e append metadata to input stream,
+   *          before it is depleted with other operations.
+   *
+   * Stream is soft-flushed before metadata block is emitted. Metadata block
+   * @b MUST be no longer than than 16MiB.
+   */
+  BROTLI_OPERATION_EMIT_METADATA = 3
+} BrotliEncoderOperation;
+
+/** Options to be used with ::BrotliEncoderSetParameter. */
+typedef enum BrotliEncoderParameter {
+  /**
+   * Tune encoder for specific input.
+   *
+   * ::BrotliEncoderMode enumerates all available values.
+   */
+  BROTLI_PARAM_MODE = 0,
+  /**
+   * The main compression speed-density lever.
+   *
+   * The higher the quality, the slower the compression. Range is
+   * from ::BROTLI_MIN_QUALITY to ::BROTLI_MAX_QUALITY.
+   */
+  BROTLI_PARAM_QUALITY = 1,
+  /**
+   * Recommended sliding LZ77 window size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than
+   * window size.
+   *
+   * Window size is `(1 << value) - 16`.
+   *
+   * Range is from ::BROTLI_MIN_WINDOW_BITS to ::BROTLI_MAX_WINDOW_BITS.
+   */
+  BROTLI_PARAM_LGWIN = 2,
+  /**
+   * Recommended input block size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than input
+   * block size.
+   *
+   * Range is from ::BROTLI_MIN_INPUT_BLOCK_BITS to
+   * ::BROTLI_MAX_INPUT_BLOCK_BITS.
+   *
+   * @note Bigger input block size allows better compression, but consumes more
+   *       memory. \n The rough formula of memory used for temporary input
+   *       storage is `3 << lgBlock`.
+   */
+  BROTLI_PARAM_LGBLOCK = 3,
+  /**
+   * Flag that affects usage of "literal context modeling" format feature.
+   *
+   * This flag is a "decoding-speed vs compression ratio" trade-off.
+   */
+  BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING = 4,
+  /**
+   * Estimated total input size for all ::BrotliEncoderCompressStream calls.
+   *
+   * The default value is 0, which means that the total input size is unknown.
+   */
+  BROTLI_PARAM_SIZE_HINT = 5,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_PARAM_LARGE_WINDOW = 6,
+  /**
+   * Recommended number of postfix bits (NPOSTFIX).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to ::BROTLI_MAX_NPOSTFIX.
+   */
+  BROTLI_PARAM_NPOSTFIX = 7,
+  /**
+   * Recommended number of direct distance codes (NDIRECT).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX).
+   */
+  BROTLI_PARAM_NDIRECT = 8,
+  /**
+   * Number of bytes of input stream already processed by a different instance.
+   *
+   * @note It is important to configure all the encoder instances with same
+   *       parameters (except this one) in order to allow all the encoded parts
+   *       obey the same restrictions implied by header.
+   *
+   * If offset is not 0, then stream header is omitted.
+   * In any case output start is byte aligned, so for proper streams stitching
+   * "predecessor" stream must be flushed.
+   *
+   * Range is not artificially limited, but all the values greater or equal to
+   * maximal window size have the same effect. Values greater than 2**30 are not
+   * allowed.
+   */
+  BROTLI_PARAM_STREAM_OFFSET = 9
+} BrotliEncoderParameter;
+
+/**
+ * Opaque structure that holds encoder state.
+ *
+ * Allocated and initialized with ::BrotliEncoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliEncoderDestroyInstance.
+ */
+typedef struct BrotliEncoderStateStruct BrotliEncoderState;
+
+/**
+ * Sets the specified parameter to the given encoder instance.
+ *
+ * @param state encoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_FALSE if value of parameter can not be changed at current
+ *          encoder state (e.g. when encoding is started, window size might be
+ *          already encoded and therefore it is impossible to change it)
+ * @returns ::BROTLI_TRUE if value is accepted
+ * @warning invalid values might be accepted in case they would not break
+ *          encoding process.
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter param, uint32_t value);
+
+/**
+ * Creates an instance of ::BrotliEncoderState and initializes it.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliEncoderState otherwise
+ */
+BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliEncoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+
+/* Opaque type for pointer to different possible internal structures containing
+   dictionary prepared for the encoder */
+typedef struct BrotliEncoderPreparedDictionaryStruct
+    BrotliEncoderPreparedDictionary;
+
+/**
+ * Prepares a shared dictionary from the given file format for the encoder.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param type type of dictionary stored in data
+ * @param data_size size of @p data buffer
+ * @param data pointer to the dictionary data
+ * @param quality the maximum Brotli quality to prepare the dictionary for,
+ *        use BROTLI_MAX_QUALITY by default
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ */
+BROTLI_ENC_API BrotliEncoderPreparedDictionary*
+BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,
+    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)],
+    int quality,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+BROTLI_ENC_API void BrotliEncoderDestroyPreparedDictionary(
+    BrotliEncoderPreparedDictionary* dictionary);
+
+/**
+ * Attaches a prepared dictionary of any type to the encoder. Can be used
+ * multiple times to attach multiple dictionaries. The dictionary type was
+ * determined by BrotliEncoderPrepareDictionary. Multiple raw prefix
+ * dictionaries and/or max 1 serialized dictionary with custom words can be
+ * attached.
+ *
+ * @returns ::BROTLI_FALSE in case of error
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(
+    BrotliEncoderState* state,
+    const BrotliEncoderPreparedDictionary* dictionary);
+
+/**
+ * Calculates the output size bound for the given @p input_size.
+ *
+ * @warning Result is only valid if quality is at least @c 2 and, in
+ *          case ::BrotliEncoderCompressStream was used, no flushes
+ *          (::BROTLI_OPERATION_FLUSH) were performed.
+ *
+ * @param input_size size of projected input
+ * @returns @c 0 if result does not fit @c size_t
+ */
+BROTLI_ENC_API size_t BrotliEncoderMaxCompressedSize(size_t input_size);
+
+/**
+ * Performs one-shot memory-to-memory compression.
+ *
+ * Compresses the data in @p input_buffer into @p encoded_buffer, and sets
+ * @p *encoded_size to the compressed length.
+ *
+ * @note If ::BrotliEncoderMaxCompressedSize(@p input_size) returns non-zero
+ *       value, then output is guaranteed to be no longer than that.
+ *
+ * @note If @p lgwin is greater than ::BROTLI_MAX_WINDOW_BITS then resulting
+ *       stream might be incompatible with RFC 7932; to decode such streams,
+ *       decoder should be configured with
+ *       ::BROTLI_DECODER_PARAM_LARGE_WINDOW = @c 1
+ *
+ * @param quality quality parameter value, e.g. ::BROTLI_DEFAULT_QUALITY
+ * @param lgwin lgwin parameter value, e.g. ::BROTLI_DEFAULT_WINDOW
+ * @param mode mode parameter value, e.g. ::BROTLI_DEFAULT_MODE
+ * @param input_size size of @p input_buffer
+ * @param input_buffer input data buffer with at least @p input_size
+ *        addressable bytes
+ * @param[in, out] encoded_size @b in: size of @p encoded_buffer; \n
+ *                 @b out: length of compressed data written to
+ *                 @p encoded_buffer, or @c 0 if compression fails
+ * @param encoded_buffer compressed data destination buffer
+ * @returns ::BROTLI_FALSE in case of compression error
+ * @returns ::BROTLI_FALSE if output buffer is too small
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
+    size_t* encoded_size,
+    uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]);
+
+/**
+ * Compresses input stream to output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes compressed since the last @p state initialization.
+ *
+ *
+ *
+ * Internally workflow consists of 3 tasks:
+ *  -# (optionally) copy input data to internal buffer
+ *  -# actually compress data and (optionally) store it to internal buffer
+ *  -# (optionally) copy compressed bytes from internal buffer to output stream
+ *
+ * Whenever all 3 tasks can't move forward anymore, or error occurs, this
+ * method returns the control flow to caller.
+ *
+ * @p op is used to perform flush, finish the stream, or inject metadata block.
+ * See ::BrotliEncoderOperation for more information.
+ *
+ * Flushing the stream means forcing encoding of all input passed to encoder and
+ * completing the current output block, so it could be fully decoded by stream
+ * decoder. To perform flush set @p op to ::BROTLI_OPERATION_FLUSH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * Finishing the stream means encoding of all input passed to encoder and
+ * adding specific "final" marks, so stream decoder could determine that stream
+ * is complete. To perform finish set @p op to ::BROTLI_OPERATION_FINISH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * @warning When flushing and finishing, @p op should not change until operation
+ *          is complete; input stream should not be swapped, reduced or
+ *          extended as well.
+ *
+ * @param state encoder instance
+ * @param op requested operation
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next input byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out compressed output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes produced so far; can be @c NULL
+ * @returns ::BROTLI_FALSE if there was an error
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* state, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out);
+
+/**
+ * Checks if encoder instance reached the final state.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE if encoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* state);
+
+/**
+ * Checks if encoder has more output.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE, if encoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderHasMoreOutput(
+    BrotliEncoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliEncoderCompressStream,
+ *     until ::BrotliEncoderHasMoreOutput returns BROTLI_TRUE
+ *  -# use ::BrotliEncoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliEncoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Encoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliEncoderTakeOutput,
+ *       immediate next call to ::BrotliEncoderTakeOutput may return more data.
+ *
+ * @param state encoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_ENC_API const uint8_t* BrotliEncoderTakeOutput(
+    BrotliEncoderState* state, size_t* size);
+
+/* Returns the estimated peak memory usage (in bytes) of the BrotliCompress()
+   function, not counting the memory needed for the input and output. */
+BROTLI_ENC_EXTRA_API size_t BrotliEncoderEstimatePeakMemoryUsage(
+    int quality, int lgwin, size_t input_size);
+/* Returns 0 if dictionary is not valid; otherwise returns allocation size. */
+BROTLI_ENC_EXTRA_API size_t BrotliEncoderGetPreparedDictionarySize(
+    const BrotliEncoderPreparedDictionary* dictionary);
+
+/**
+ * Gets an encoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_ENC_API uint32_t BrotliEncoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODE_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/port.h b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/port.h
new file mode 100644
index 0000000000..0d50019042
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/port.h
@@ -0,0 +1,305 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific API declarations. */
+
+#ifndef BROTLI_COMMON_PORT_H_
+#define BROTLI_COMMON_PORT_H_
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+#define BROTLI_MAKE_VERSION(major, minor, revision) \
+  (((major) * 1000000) + ((minor) * 1000) + (revision))
+
+#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
+#define BROTLI_GNUC_VERSION \
+  BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#elif defined(__GNUC__)
+#define BROTLI_GNUC_VERSION BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, 0)
+#endif
+
+#if defined(BROTLI_GNUC_VERSION)
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_GNUC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000)
+#define BROTLI_MSVC_VERSION                                \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 10000000),          \
+                      (_MSC_FULL_VER % 10000000) / 100000, \
+                      (_MSC_FULL_VER % 100000) / 100)
+#elif defined(_MSC_FULL_VER)
+#define BROTLI_MSVC_VERSION                              \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 1000000),         \
+                      (_MSC_FULL_VER % 1000000) / 10000, \
+                      (_MSC_FULL_VER % 10000) / 10)
+#elif defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION \
+  BROTLI_MAKE_VERSION(_MSC_VER / 100, _MSC_VER % 100, 0)
+#endif
+
+#if !defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) (0)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
+#else
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_VER >= ((major * 100) + (minor)))
+#endif
+
+#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE)
+#define BROTLI_INTEL_VERSION                   \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100,  \
+                      __INTEL_COMPILER % 100,  \
+                      __INTEL_COMPILER_UPDATE)
+#elif defined(__INTEL_COMPILER)
+#define BROTLI_INTEL_VERSION \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
+#endif
+
+#if defined(BROTLI_INTEL_VERSION)
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_INTEL_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__PGI) && \
+    defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
+#define BROTLI_PGI_VERSION \
+  BROTLI_MAKE_VERSION(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
+#endif
+
+#if defined(BROTLI_PGI_VERSION)
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_PGI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                       \
+  BROTLI_MAKE_VERSION(                                              \
+    (((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), \
+    (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf),   \
+    (__SUNPRO_C & 0xf) * 10)
+#elif defined(__SUNPRO_C)
+#define BROTLI_SUNPRO_VERSION                  \
+  BROTLI_MAKE_VERSION((__SUNPRO_C >> 8) & 0xf, \
+                      (__SUNPRO_C >> 4) & 0xf, \
+                      (__SUNPRO_C) & 0xf)
+#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                         \
+  BROTLI_MAKE_VERSION(                                                \
+    (((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), \
+    (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf),   \
+    (__SUNPRO_CC & 0xf) * 10)
+#elif defined(__SUNPRO_CC)
+#define BROTLI_SUNPRO_VERSION                   \
+  BROTLI_MAKE_VERSION((__SUNPRO_CC >> 8) & 0xf, \
+                      (__SUNPRO_CC >> 4) & 0xf, \
+                      (__SUNPRO_CC) & 0xf)
+#endif
+
+#if defined(BROTLI_SUNPRO_VERSION)
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_SUNPRO_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
+#define BROTLI_ARM_VERSION                                       \
+  BROTLI_MAKE_VERSION((__ARMCOMPILER_VERSION / 1000000),         \
+                      (__ARMCOMPILER_VERSION % 1000000) / 10000, \
+                      (__ARMCOMPILER_VERSION % 10000) / 100)
+#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
+#define BROTLI_ARM_VERSION                                 \
+  BROTLI_MAKE_VERSION((__ARMCC_VERSION / 1000000),         \
+                      (__ARMCC_VERSION % 1000000) / 10000, \
+                      (__ARMCC_VERSION % 10000) / 100)
+#endif
+
+#if defined(BROTLI_ARM_VERSION)
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_ARM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__ibmxl__)
+#define BROTLI_IBM_VERSION                    \
+  BROTLI_MAKE_VERSION(__ibmxl_version__,      \
+                      __ibmxl_release__,      \
+                      __ibmxl_modification__)
+#elif defined(__xlC__) && defined(__xlC_ver__)
+#define BROTLI_IBM_VERSION \
+  BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
+#elif defined(__xlC__)
+#define BROTLI_IBM_VERSION BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, 0)
+#endif
+
+#if defined(BROTLI_IBM_VERSION)
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IBM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TI_COMPILER_VERSION__)
+#define BROTLI_TI_VERSION                                         \
+  BROTLI_MAKE_VERSION((__TI_COMPILER_VERSION__ / 1000000),        \
+                      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
+                      (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(BROTLI_TI_VERSION)
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__IAR_SYSTEMS_ICC__)
+#if __VER__ > 1000
+#define BROTLI_IAR_VERSION                     \
+  BROTLI_MAKE_VERSION((__VER__ / 1000000),     \
+                      (__VER__ / 1000) % 1000, \
+                      (__VER__ % 1000))
+#else
+#define BROTLI_IAR_VERSION BROTLI_MAKE_VERSION(VER / 100, __VER__ % 100, 0)
+#endif
+#endif
+
+#if defined(BROTLI_IAR_VERSION)
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IAR_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TINYC__)
+#define BROTLI_TINYC_VERSION \
+  BROTLI_MAKE_VERSION(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
+#endif
+
+#if defined(BROTLI_TINYC_VERSION)
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TINYC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__has_attribute)
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  __has_attribute(attribute)
+#else
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(__has_builtin)
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  __has_builtin(builtin)
+#else
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(__has_feature)
+#define BROTLI_HAS_FEATURE(feature) __has_feature(feature)
+#else
+#define BROTLI_HAS_FEATURE(feature) (0)
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_PUBLIC
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_PUBLIC __attribute__ ((visibility ("default")))
+#else
+#define BROTLI_PUBLIC
+#endif
+
+/* BROTLI_INTERNAL could be defined to override visibility, e.g. for tests. */
+#if !defined(BROTLI_INTERNAL)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_INTERNAL
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_INTERNAL __attribute__ ((visibility ("hidden")))
+#else
+#define BROTLI_INTERNAL
+#endif
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) &&   \
+    !defined(__STDC_NO_VLA__) && !defined(__cplusplus) &&           \
+    !defined(__PGI) && !defined(__PGIC__) && !defined(__TINYC__) && \
+    !defined(__clang__)
+#define BROTLI_ARRAY_PARAM(name) (name)
+#else
+#define BROTLI_ARRAY_PARAM(name)
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if defined(BROTLI_SHARED_COMPILATION)
+#if defined(_WIN32)
+#if defined(BROTLICOMMON_SHARED_COMPILATION)
+#define BROTLI_COMMON_API __declspec(dllexport)
+#else
+#define BROTLI_COMMON_API __declspec(dllimport)
+#endif  /* BROTLICOMMON_SHARED_COMPILATION */
+#if defined(BROTLIDEC_SHARED_COMPILATION)
+#define BROTLI_DEC_API __declspec(dllexport)
+#else
+#define BROTLI_DEC_API __declspec(dllimport)
+#endif  /* BROTLIDEC_SHARED_COMPILATION */
+#if defined(BROTLIENC_SHARED_COMPILATION)
+#define BROTLI_ENC_API __declspec(dllexport)
+#else
+#define BROTLI_ENC_API __declspec(dllimport)
+#endif  /* BROTLIENC_SHARED_COMPILATION */
+#else  /* _WIN32 */
+#define BROTLI_COMMON_API BROTLI_PUBLIC
+#define BROTLI_DEC_API BROTLI_PUBLIC
+#define BROTLI_ENC_API BROTLI_PUBLIC
+#endif  /* _WIN32 */
+#else  /* BROTLI_SHARED_COMPILATION */
+#define BROTLI_COMMON_API
+#define BROTLI_DEC_API
+#define BROTLI_ENC_API
+#endif
+
+#if defined(BROTLI_BUILD_ENC_EXTRA_API)
+#define BROTLI_ENC_EXTRA_API BROTLI_ENC_API
+#else
+#define BROTLI_ENC_EXTRA_API BROTLI_INTERNAL
+#endif
+
+#endif  /* BROTLI_COMMON_PORT_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/shared_dictionary.h b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/shared_dictionary.h
new file mode 100644
index 0000000000..ceb6cf1cd2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/shared_dictionary.h
@@ -0,0 +1,97 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* (Opaque) Shared Dictionary definition and utilities. */
+
+#ifndef BROTLI_COMMON_SHARED_DICTIONARY_H_
+#define BROTLI_COMMON_SHARED_DICTIONARY_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
+#define SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH 31
+#define SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS 64
+#define SHARED_BROTLI_MAX_COMPOUND_DICTS 15
+
+/**
+ * Opaque structure that holds shared dictionary data.
+ *
+ * Allocated and initialized with ::BrotliSharedDictionaryCreateInstance.
+ * Cleaned up and deallocated with ::BrotliSharedDictionaryDestroyInstance.
+ */
+typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionary;
+
+/**
+ * Input data type for ::BrotliSharedDictionaryAttach.
+ */
+typedef enum BrotliSharedDictionaryType {
+  /** Raw LZ77 prefix dictionary. */
+  BROTLI_SHARED_DICTIONARY_RAW = 0,
+  /** Serialized shared dictionary. */
+  BROTLI_SHARED_DICTIONARY_SERIALIZED = 1
+} BrotliSharedDictionaryType;
+
+/**
+ * Creates an instance of ::BrotliSharedDictionary.
+ *
+ * Fresh instance has default word dictionary and transforms
+ * and no LZ77 prefix dictionary.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliSharedDictionary otherwise
+ */
+BROTLI_COMMON_API BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliSharedDictionary instance.
+ *
+ * @param dict shared dictionary instance to be cleaned up and deallocated
+ */
+BROTLI_COMMON_API void BrotliSharedDictionaryDestroyInstance(
+    BrotliSharedDictionary* dict);
+
+/**
+ * Attaches dictionary to a given instance of ::BrotliSharedDictionary.
+ *
+ * Dictionary to be attached is represented in a serialized format as a region
+ * of memory.
+ *
+ * Provided data it partially referenced by a resulting (compound) dictionary,
+ * and should be kept untouched, while at least one compound dictionary uses it.
+ * This way memory overhead is kept minimal by the cost of additional resource
+ * management.
+ *
+ * @param dict dictionary to extend
+ * @param type type of dictionary to attach
+ * @param data_size size of @p data
+ * @param data serialized dictionary of type @p type, with at least @p data_size
+ *        addressable bytes
+ * @returns ::BROTLI_TRUE if provided dictionary is successfully attached
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_COMMON_API BROTLI_BOOL BrotliSharedDictionaryAttach(
+    BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
+    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_SHARED_DICTIONARY_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/types.h b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/types.h
new file mode 100644
index 0000000000..eff1a3cd07
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/include/brotli/types.h
@@ -0,0 +1,83 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * Common types used in decoder and encoder API.
+ */
+
+#ifndef BROTLI_COMMON_TYPES_H_
+#define BROTLI_COMMON_TYPES_H_
+
+#include <stddef.h>  /* for size_t */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+#else
+#include <stdint.h>
+#endif  /* defined(_MSC_VER) && (_MSC_VER < 1600) */
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::BROTLI_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::BROTLI_TRUE and ::BROTLI_FALSE.
+ *
+ * ::BROTLI_BOOL values passed to Brotli should either be ::BROTLI_TRUE or
+ * ::BROTLI_FALSE, or be a result of ::TO_BROTLI_BOOL macros.
+ *
+ * ::BROTLI_BOOL values returned by Brotli should not be tested for equality
+ * with @c true, @c false, ::BROTLI_TRUE, ::BROTLI_FALSE, but rather should be
+ * evaluated, for example: @code{.cpp}
+ * if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
+ *     !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
+ *   bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
+ *   DoSomething(x);
+ * }
+ * @endcode
+ */
+#define BROTLI_BOOL int
+/** Portable @c true replacement. */
+#define BROTLI_TRUE 1
+/** Portable @c false replacement. */
+#define BROTLI_FALSE 0
+/** @c bool to ::BROTLI_BOOL conversion macros. */
+#define TO_BROTLI_BOOL(X) (!!(X) ? BROTLI_TRUE : BROTLI_FALSE)
+
+#define BROTLI_MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
+
+#define BROTLI_UINT32_MAX (~((uint32_t)0))
+#define BROTLI_SIZE_MAX (~((size_t)0))
+
+/**
+ * Allocating function pointer type.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param size requested memory region size; can not be @c 0
+ * @returns @c 0 in the case of failure
+ * @returns a valid pointer to a memory region of at least @p size bytes
+ *          long otherwise
+ */
+typedef void* (*brotli_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b SHOULD do nothing if @p address is @c 0.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param address memory region pointer returned by ::brotli_alloc_func, or @c 0
+ */
+typedef void (*brotli_free_func)(void* opaque, void* address);
+
+#endif  /* BROTLI_COMMON_TYPES_H_ */
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.c b/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.c
new file mode 100644
index 0000000000..102a87a76e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.c
@@ -0,0 +1,1245 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Command line interface for Brotli library. */
+
+/* Mute strerror/strcpy warnings. */
+#if !defined(_CRT_SECURE_NO_WARNINGS)
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+
+#include "../common/constants.h"
+#include "../common/version.h"
+
+#if defined(_WIN32)
+#include <io.h>
+#include <share.h>
+#include <sys/utime.h>
+
+#define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
+
+#if !defined(__MINGW32__)
+#define STDIN_FILENO _fileno(stdin)
+#define STDOUT_FILENO _fileno(stdout)
+#define S_IRUSR S_IREAD
+#define S_IWUSR S_IWRITE
+#endif
+
+#define fdopen _fdopen
+#define isatty _isatty
+#define unlink _unlink
+#define utimbuf _utimbuf
+#define utime _utime
+
+#define fopen ms_fopen
+#define open ms_open
+
+#define chmod(F, P) (0)
+#define chown(F, O, G) (0)
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define fseek _fseeki64
+#define ftell _ftelli64
+#endif
+
+static FILE* ms_fopen(const char* filename, const char* mode) {
+  FILE* result = 0;
+  fopen_s(&result, filename, mode);
+  return result;
+}
+
+static int ms_open(const char* filename, int oflag, int pmode) {
+  int result = -1;
+  _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
+  return result;
+}
+#else  /* !defined(_WIN32) */
+#include <unistd.h>
+#include <utime.h>
+#define MAKE_BINARY(FILENO) (FILENO)
+#endif  /* defined(_WIN32) */
+
+#if defined(__APPLE__) && !defined(_POSIX_C_SOURCE)
+#define HAVE_UTIMENSAT 1
+#define ATIME_NSEC(S) ((S)->st_atimespec.tv_nsec)
+#define MTIME_NSEC(S) ((S)->st_mtimespec.tv_nsec)
+#elif defined(_WIN32) || !defined(AT_SYMLINK_NOFOLLOW)
+#define HAVE_UTIMENSAT 0
+#else
+#define HAVE_UTIMENSAT 1
+#define ATIME_NSEC(S) ((S)->st_atim.tv_nsec)
+#define MTIME_NSEC(S) ((S)->st_mtim.tv_nsec)
+#endif
+
+typedef enum {
+  COMMAND_COMPRESS,
+  COMMAND_DECOMPRESS,
+  COMMAND_HELP,
+  COMMAND_INVALID,
+  COMMAND_TEST_INTEGRITY,
+  COMMAND_NOOP,
+  COMMAND_VERSION
+} Command;
+
+#define DEFAULT_LGWIN 24
+#define DEFAULT_SUFFIX ".br"
+#define MAX_OPTIONS 20
+
+typedef struct {
+  /* Parameters */
+  int quality;
+  int lgwin;
+  int verbosity;
+  BROTLI_BOOL force_overwrite;
+  BROTLI_BOOL junk_source;
+  BROTLI_BOOL copy_stat;
+  BROTLI_BOOL write_to_stdout;
+  BROTLI_BOOL test_integrity;
+  BROTLI_BOOL decompress;
+  BROTLI_BOOL large_window;
+  const char* output_path;
+  const char* dictionary_path;
+  const char* suffix;
+  int not_input_indices[MAX_OPTIONS];
+  size_t longest_path_len;
+  size_t input_count;
+
+  /* Inner state */
+  int argc;
+  char** argv;
+  uint8_t* dictionary;
+  size_t dictionary_size;
+  BrotliEncoderPreparedDictionary* prepared_dictionary;
+  char* modified_path;  /* Storage for path with appended / cut suffix */
+  int iterator;
+  int ignore;
+  BROTLI_BOOL iterator_error;
+  uint8_t* buffer;
+  uint8_t* input;
+  uint8_t* output;
+  const char* current_input_path;
+  const char* current_output_path;
+  int64_t input_file_length;  /* -1, if impossible to calculate */
+  FILE* fin;
+  FILE* fout;
+
+  /* I/O buffers */
+  size_t available_in;
+  const uint8_t* next_in;
+  size_t available_out;
+  uint8_t* next_out;
+
+  /* Reporting */
+  /* size_t would be large enough,
+     until 4GiB+ files are compressed / decompressed on 32-bit CPUs. */
+  size_t total_in;
+  size_t total_out;
+  clock_t start_time;
+  clock_t end_time;
+} Context;
+
+/* Parse up to 5 decimal digits. */
+static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
+  int value = 0;
+  int i;
+  for (i = 0; i < 5; ++i) {
+    char c = s[i];
+    if (c == 0) break;
+    if (s[i] < '0' || s[i] > '9') return BROTLI_FALSE;
+    value = (10 * value) + (c - '0');
+  }
+  if (i == 0) return BROTLI_FALSE;
+  if (i > 1 && s[0] == '0') return BROTLI_FALSE;
+  if (s[i] != 0) return BROTLI_FALSE;
+  if (value < low || value > high) return BROTLI_FALSE;
+  *result = value;
+  return BROTLI_TRUE;
+}
+
+/* Returns "base file name" or its tail, if it contains '/' or '\'. */
+static const char* FileName(const char* path) {
+  const char* separator_position = strrchr(path, '/');
+  if (separator_position) path = separator_position + 1;
+  separator_position = strrchr(path, '\\');
+  if (separator_position) path = separator_position + 1;
+  return path;
+}
+
+/* Detect if the program name is a special alias that infers a command type. */
+static Command ParseAlias(const char* name) {
+  /* TODO: cast name to lower case? */
+  const char* unbrotli = "unbrotli";
+  size_t unbrotli_len = strlen(unbrotli);
+  name = FileName(name);
+  /* Partial comparison. On Windows there could be ".exe" suffix. */
+  if (strncmp(name, unbrotli, unbrotli_len) == 0) {
+    char terminator = name[unbrotli_len];
+    if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
+  }
+  return COMMAND_COMPRESS;
+}
+
+static Command ParseParams(Context* params) {
+  int argc = params->argc;
+  char** argv = params->argv;
+  int i;
+  int next_option_index = 0;
+  size_t input_count = 0;
+  size_t longest_path_len = 1;
+  BROTLI_BOOL command_set = BROTLI_FALSE;
+  BROTLI_BOOL quality_set = BROTLI_FALSE;
+  BROTLI_BOOL output_set = BROTLI_FALSE;
+  BROTLI_BOOL keep_set = BROTLI_FALSE;
+  BROTLI_BOOL lgwin_set = BROTLI_FALSE;
+  BROTLI_BOOL suffix_set = BROTLI_FALSE;
+  BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
+  Command command = ParseAlias(argv[0]);
+
+  for (i = 1; i < argc; ++i) {
+    const char* arg = argv[i];
+    /* C99 5.1.2.2.1: "members argv[0] through argv[argc-1] inclusive shall
+       contain pointers to strings"; NULL and 0-length are not forbidden. */
+    size_t arg_len = arg ? strlen(arg) : 0;
+
+    if (arg_len == 0) {
+      params->not_input_indices[next_option_index++] = i;
+      continue;
+    }
+
+    /* Too many options. The expected longest option list is:
+       "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
+       This check is an additional guard that is never triggered, but provides
+       a guard for future changes. */
+    if (next_option_index > (MAX_OPTIONS - 2)) {
+      fprintf(stderr, "too many options passed\n");
+      return COMMAND_INVALID;
+    }
+
+    /* Input file entry. */
+    if (after_dash_dash || arg[0] != '-' || arg_len == 1) {
+      input_count++;
+      if (longest_path_len < arg_len) longest_path_len = arg_len;
+      continue;
+    }
+
+    /* Not a file entry. */
+    params->not_input_indices[next_option_index++] = i;
+
+    /* '--' entry stop parsing arguments. */
+    if (arg_len == 2 && arg[1] == '-') {
+      after_dash_dash = BROTLI_TRUE;
+      continue;
+    }
+
+    /* Simple / coalesced options. */
+    if (arg[1] != '-') {
+      size_t j;
+      for (j = 1; j < arg_len; ++j) {
+        char c = arg[j];
+        if (c >= '0' && c <= '9') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = c - '0';
+          continue;
+        } else if (c == 'c') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set\n");
+            return COMMAND_INVALID;
+          }
+          output_set = BROTLI_TRUE;
+          params->write_to_stdout = BROTLI_TRUE;
+          continue;
+        } else if (c == 'd') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -d\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_DECOMPRESS;
+          continue;
+        } else if (c == 'f') {
+          if (params->force_overwrite) {
+            fprintf(stderr, "force output overwrite already set\n");
+            return COMMAND_INVALID;
+          }
+          params->force_overwrite = BROTLI_TRUE;
+          continue;
+        } else if (c == 'h') {
+          /* Don't parse further. */
+          return COMMAND_HELP;
+        } else if (c == 'j' || c == 'k') {
+          if (keep_set) {
+            fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+            return COMMAND_INVALID;
+          }
+          keep_set = BROTLI_TRUE;
+          params->junk_source = TO_BROTLI_BOOL(c == 'j');
+          continue;
+        } else if (c == 'n') {
+          if (!params->copy_stat) {
+            fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+            return COMMAND_INVALID;
+          }
+          params->copy_stat = BROTLI_FALSE;
+          continue;
+        } else if (c == 't') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -t\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_TEST_INTEGRITY;
+          continue;
+        } else if (c == 'v') {
+          if (params->verbosity > 0) {
+            fprintf(stderr, "argument --verbose / -v already set\n");
+            return COMMAND_INVALID;
+          }
+          params->verbosity = 1;
+          continue;
+        } else if (c == 'V') {
+          /* Don't parse further. */
+          return COMMAND_VERSION;
+        } else if (c == 'Z') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = 11;
+          continue;
+        }
+        /* o/q/w/D/S with parameter is expected */
+        if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
+          fprintf(stderr, "invalid argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        if (j + 1 != arg_len) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        i++;
+        if (i == argc || !argv[i] || argv[i][0] == 0) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        params->not_input_indices[next_option_index++] = i;
+        if (c == 'o') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set (-o)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = argv[i];
+        } else if (c == 'q') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'w') {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(argv[i], 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'D') {
+          if (params->dictionary_path) {
+            fprintf(stderr, "dictionary path already set\n");
+            return COMMAND_INVALID;
+          }
+          params->dictionary_path = argv[i];
+        } else if (c == 'S') {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = argv[i];
+        }
+      }
+    } else {  /* Double-dash. */
+      arg = &arg[2];
+      if (strcmp("best", arg) == 0) {
+        if (quality_set) {
+          fprintf(stderr, "quality already set\n");
+          return COMMAND_INVALID;
+        }
+        quality_set = BROTLI_TRUE;
+        params->quality = 11;
+      } else if (strcmp("decompress", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --decompress\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_DECOMPRESS;
+      } else if (strcmp("force", arg) == 0) {
+        if (params->force_overwrite) {
+          fprintf(stderr, "force output overwrite already set\n");
+          return COMMAND_INVALID;
+        }
+        params->force_overwrite = BROTLI_TRUE;
+      } else if (strcmp("help", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_HELP;
+      } else if (strcmp("keep", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_FALSE;
+      } else if (strcmp("no-copy-stat", arg) == 0) {
+        if (!params->copy_stat) {
+          fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+          return COMMAND_INVALID;
+        }
+        params->copy_stat = BROTLI_FALSE;
+      } else if (strcmp("rm", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_TRUE;
+      } else if (strcmp("stdout", arg) == 0) {
+        if (output_set) {
+          fprintf(stderr, "write to standard output already set\n");
+          return COMMAND_INVALID;
+        }
+        output_set = BROTLI_TRUE;
+        params->write_to_stdout = BROTLI_TRUE;
+      } else if (strcmp("test", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --test\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_TEST_INTEGRITY;
+      } else if (strcmp("verbose", arg) == 0) {
+        if (params->verbosity > 0) {
+          fprintf(stderr, "argument --verbose / -v already set\n");
+          return COMMAND_INVALID;
+        }
+        params->verbosity = 1;
+      } else if (strcmp("version", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_VERSION;
+      } else {
+        /* key=value */
+        const char* value = strrchr(arg, '=');
+        size_t key_len;
+        if (!value || value[1] == 0) {
+          fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
+          return COMMAND_INVALID;
+        }
+        key_len = (size_t)(value - arg);
+        value++;
+        if (strncmp("dictionary", arg, key_len) == 0) {
+          if (params->dictionary_path) {
+            fprintf(stderr, "dictionary path already set\n");
+            return COMMAND_INVALID;
+          }
+          params->dictionary_path = value;
+        } else if (strncmp("lgwin", arg, key_len) == 0) {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("large_window", arg, key_len) == 0) {
+          /* This option is intentionally not mentioned in help. */
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_LARGE_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("output", arg, key_len) == 0) {
+          if (output_set) {
+            fprintf(stderr,
+                    "write to standard output already set (--output)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = value;
+        } else if (strncmp("quality", arg, key_len) == 0) {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(value, BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("suffix", arg, key_len) == 0) {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = value;
+        } else {
+          fprintf(stderr, "invalid parameter: [%s]\n", arg);
+          return COMMAND_INVALID;
+        }
+      }
+    }
+  }
+
+  params->input_count = input_count;
+  params->longest_path_len = longest_path_len;
+  params->decompress = (command == COMMAND_DECOMPRESS);
+  params->test_integrity = (command == COMMAND_TEST_INTEGRITY);
+
+  if (input_count > 1 && output_set) return COMMAND_INVALID;
+  if (params->test_integrity) {
+    if (params->output_path) return COMMAND_INVALID;
+    if (params->write_to_stdout) return COMMAND_INVALID;
+  }
+  if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
+    return COMMAND_INVALID;
+  }
+
+  return command;
+}
+
+static void PrintVersion(void) {
+  int major = BROTLI_VERSION >> 24;
+  int minor = (BROTLI_VERSION >> 12) & 0xFFF;
+  int patch = BROTLI_VERSION & 0xFFF;
+  fprintf(stdout, "brotli %d.%d.%d\n", major, minor, patch);
+}
+
+static void PrintHelp(const char* name, BROTLI_BOOL error) {
+  FILE* media = error ? stderr : stdout;
+  /* String is cut to pieces with length less than 509, to conform C90 spec. */
+  fprintf(media,
+"Usage: %s [OPTION]... [FILE]...\n",
+          name);
+  fprintf(media,
+"Options:\n"
+"  -#                          compression level (0-9)\n"
+"  -c, --stdout                write on standard output\n"
+"  -d, --decompress            decompress\n"
+"  -f, --force                 force output file overwrite\n"
+"  -h, --help                  display this help and exit\n");
+  fprintf(media,
+"  -j, --rm                    remove source file(s)\n"
+"  -k, --keep                  keep source file(s) (default)\n"
+"  -n, --no-copy-stat          do not copy source file(s) attributes\n"
+"  -o FILE, --output=FILE      output file (only if 1 input file)\n");
+  fprintf(media,
+"  -q NUM, --quality=NUM       compression level (%d-%d)\n",
+          BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY);
+  fprintf(media,
+"  -t, --test                  test compressed file integrity\n"
+"  -v, --verbose               verbose mode\n");
+  fprintf(media,
+"  -w NUM, --lgwin=NUM         set LZ77 window size (0, %d-%d)\n"
+"                              window size = 2**NUM - 16\n"
+"                              0 lets compressor choose the optimal value\n",
+          BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
+  fprintf(media,
+"  --large_window=NUM          use incompatible large-window brotli\n"
+"                              bitstream with window size (0, %d-%d)\n"
+"                              WARNING: this format is not compatible\n"
+"                              with brotli RFC 7932 and may not be\n"
+"                              decodable with regular brotli decoders\n",
+          BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
+  fprintf(media,
+"  -D FILE, --dictionary=FILE  use FILE as raw (LZ77) dictionary\n");
+  fprintf(media,
+"  -S SUF, --suffix=SUF        output file suffix (default:'%s')\n",
+          DEFAULT_SUFFIX);
+  fprintf(media,
+"  -V, --version               display version and exit\n"
+"  -Z, --best                  use best compression level (11) (default)\n"
+"Simple options could be coalesced, i.e. '-9kf' is equivalent to '-9 -k -f'.\n"
+"With no FILE, or when FILE is -, read standard input.\n"
+"All arguments after '--' are treated as files.\n");
+}
+
+static const char* PrintablePath(const char* path) {
+  return path ? path : "con";
+}
+
+static BROTLI_BOOL OpenInputFile(const char* input_path, FILE** f) {
+  *f = NULL;
+  if (!input_path) {
+    *f = fdopen(MAKE_BINARY(STDIN_FILENO), "rb");
+    return BROTLI_TRUE;
+  }
+  *f = fopen(input_path, "rb");
+  if (!*f) {
+    fprintf(stderr, "failed to open input file [%s]: %s\n",
+            PrintablePath(input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
+                                  BROTLI_BOOL force) {
+  int fd;
+  *f = NULL;
+  if (!output_path) {
+    *f = fdopen(MAKE_BINARY(STDOUT_FILENO), "wb");
+    return BROTLI_TRUE;
+  }
+  fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
+            S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  *f = fdopen(fd, "wb");
+  if (!*f) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static int64_t FileSize(const char* path) {
+  FILE* f = fopen(path, "rb");
+  int64_t retval;
+  if (f == NULL) {
+    return -1;
+  }
+  if (fseek(f, 0L, SEEK_END) != 0) {
+    fclose(f);
+    return -1;
+  }
+  retval = ftell(f);
+  if (fclose(f) != 0) {
+    return -1;
+  }
+  return retval;
+}
+
+static int CopyTimeStat(const struct stat* statbuf, const char* output_path) {
+#if HAVE_UTIMENSAT
+  struct timespec times[2];
+  times[0].tv_sec = statbuf->st_atime;
+  times[0].tv_nsec = ATIME_NSEC(statbuf);
+  times[1].tv_sec = statbuf->st_mtime;
+  times[1].tv_nsec = MTIME_NSEC(statbuf);
+  return utimensat(AT_FDCWD, output_path, times, AT_SYMLINK_NOFOLLOW);
+#else
+  struct utimbuf times;
+  times.actime = statbuf->st_atime;
+  times.modtime = statbuf->st_mtime;
+  return utime(output_path, &times);
+#endif
+}
+
+/* Copy file times and permissions.
+   TODO(eustas): this is a "best effort" implementation; honest cross-platform
+   fully featured implementation is way too hacky; add more hacks by request. */
+static void CopyStat(const char* input_path, const char* output_path) {
+  struct stat statbuf;
+  int res;
+  if (input_path == 0 || output_path == 0) {
+    return;
+  }
+  if (stat(input_path, &statbuf) != 0) {
+    return;
+  }
+  res = CopyTimeStat(&statbuf, output_path);
+  res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
+  if (res != 0) {
+    fprintf(stderr, "setting access bits failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, (uid_t)-1, statbuf.st_gid);
+  if (res != 0) {
+    fprintf(stderr, "setting group failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, statbuf.st_uid, (gid_t)-1);
+  if (res != 0) {
+    fprintf(stderr, "setting user failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+}
+
+/* Result ownership is passed to caller.
+   |*dictionary_size| is set to resulting buffer size. */
+static BROTLI_BOOL ReadDictionary(Context* context, Command command) {
+  static const int kMaxDictionarySize =
+      BROTLI_MAX_DISTANCE - BROTLI_MAX_BACKWARD_LIMIT(24);
+  FILE* f;
+  int64_t file_size_64;
+  uint8_t* buffer;
+  size_t bytes_read;
+
+  if (context->dictionary_path == NULL) return BROTLI_TRUE;
+  f = fopen(context->dictionary_path, "rb");
+  if (f == NULL) {
+    fprintf(stderr, "failed to open dictionary file [%s]: %s\n",
+            PrintablePath(context->dictionary_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+
+  file_size_64 = FileSize(context->dictionary_path);
+  if (file_size_64 == -1) {
+    fprintf(stderr, "could not get size of dictionary file [%s]",
+            PrintablePath(context->dictionary_path));
+    fclose(f);
+    return BROTLI_FALSE;
+  }
+
+  if (file_size_64 > kMaxDictionarySize) {
+    fprintf(stderr, "dictionary [%s] is larger than maximum allowed: %d\n",
+            PrintablePath(context->dictionary_path), kMaxDictionarySize);
+    fclose(f);
+    return BROTLI_FALSE;
+  }
+  context->dictionary_size = (size_t)file_size_64;
+
+  buffer = (uint8_t*)malloc(context->dictionary_size);
+  if (!buffer) {
+    fprintf(stderr, "could not read dictionary: out of memory\n");
+    fclose(f);
+    return BROTLI_FALSE;
+  }
+  bytes_read = fread(buffer, sizeof(uint8_t), context->dictionary_size, f);
+  if (bytes_read != context->dictionary_size) {
+    free(buffer);
+    fprintf(stderr, "failed to read dictionary [%s]: %s\n",
+            PrintablePath(context->dictionary_path), strerror(errno));
+    fclose(f);
+    return BROTLI_FALSE;
+  }
+  fclose(f);
+  context->dictionary = buffer;
+  if (command == COMMAND_COMPRESS) {
+    context->prepared_dictionary = BrotliEncoderPrepareDictionary(
+        BROTLI_SHARED_DICTIONARY_RAW, context->dictionary_size,
+        context->dictionary, BROTLI_MAX_QUALITY, NULL, NULL, NULL);
+    if (context->prepared_dictionary == NULL) {
+      fprintf(stderr, "failed to prepare dictionary [%s]\n",
+              PrintablePath(context->dictionary_path));
+      return BROTLI_FALSE;
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL NextFile(Context* context) {
+  const char* arg;
+  size_t arg_len;
+
+  /* Iterator points to last used arg; increment to search for the next one. */
+  context->iterator++;
+
+  context->input_file_length = -1;
+
+  /* No input path; read from console. */
+  if (context->input_count == 0) {
+    if (context->iterator > 1) return BROTLI_FALSE;
+    context->current_input_path = NULL;
+    /* Either write to the specified path, or to console. */
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  /* Skip option arguments. */
+  while (context->iterator == context->not_input_indices[context->ignore]) {
+    context->iterator++;
+    context->ignore++;
+  }
+
+  /* All args are scanned already. */
+  if (context->iterator >= context->argc) return BROTLI_FALSE;
+
+  /* Iterator now points to the input file name. */
+  arg = context->argv[context->iterator];
+  arg_len = strlen(arg);
+  /* Read from console. */
+  if (arg_len == 1 && arg[0] == '-') {
+    context->current_input_path = NULL;
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  context->current_input_path = arg;
+  context->input_file_length = FileSize(arg);
+  context->current_output_path = context->output_path;
+
+  if (context->output_path) return BROTLI_TRUE;
+  if (context->write_to_stdout) return BROTLI_TRUE;
+
+  strcpy(context->modified_path, arg);
+  context->current_output_path = context->modified_path;
+  /* If output is not specified, input path suffix should match. */
+  if (context->decompress) {
+    size_t suffix_len = strlen(context->suffix);
+    char* name = (char*)FileName(context->modified_path);
+    char* name_suffix;
+    size_t name_len = strlen(name);
+    if (name_len < suffix_len + 1) {
+      fprintf(stderr, "empty output file name for [%s] input file\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix = name + name_len - suffix_len;
+    if (strcmp(context->suffix, name_suffix) != 0) {
+      fprintf(stderr, "input file [%s] suffix mismatch\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix[0] = 0;
+    return BROTLI_TRUE;
+  } else {
+    strcpy(context->modified_path + arg_len, context->suffix);
+    return BROTLI_TRUE;
+  }
+}
+
+static BROTLI_BOOL OpenFiles(Context* context) {
+  BROTLI_BOOL is_ok = OpenInputFile(context->current_input_path, &context->fin);
+  if (!context->test_integrity && is_ok) {
+    is_ok = OpenOutputFile(
+        context->current_output_path, &context->fout, context->force_overwrite);
+  }
+  return is_ok;
+}
+
+static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  if (!context->test_integrity && context->fout) {
+    if (!success && context->current_output_path) {
+      unlink(context->current_output_path);
+    }
+    if (fclose(context->fout) != 0) {
+      if (success) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_output_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+
+    /* TOCTOU violation, but otherwise it is impossible to set file times. */
+    if (success && is_ok && context->copy_stat) {
+      CopyStat(context->current_input_path, context->current_output_path);
+    }
+  }
+
+  if (context->fin) {
+    if (fclose(context->fin) != 0) {
+      if (is_ok) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_input_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+  }
+  if (success && context->junk_source && context->current_input_path) {
+    unlink(context->current_input_path);
+  }
+
+  context->fin = NULL;
+  context->fout = NULL;
+
+  return is_ok;
+}
+
+static const size_t kFileBufferSize = 1 << 19;
+
+static void InitializeBuffers(Context* context) {
+  context->available_in = 0;
+  context->next_in = NULL;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+  context->total_in = 0;
+  context->total_out = 0;
+  if (context->verbosity > 0) {
+    context->start_time = clock();
+  }
+}
+
+/* This method might give the false-negative result.
+   However, after an empty / incomplete read it should tell the truth. */
+static BROTLI_BOOL HasMoreInput(Context* context) {
+  return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideInput(Context* context) {
+  context->available_in =
+      fread(context->input, 1, kFileBufferSize, context->fin);
+  context->total_in += context->available_in;
+  context->next_in = context->input;
+  if (ferror(context->fin)) {
+    fprintf(stderr, "failed to read input [%s]: %s\n",
+            PrintablePath(context->current_input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+/* Internal: should be used only in Provide-/Flush-Output. */
+static BROTLI_BOOL WriteOutput(Context* context) {
+  size_t out_size = (size_t)(context->next_out - context->output);
+  context->total_out += out_size;
+  if (out_size == 0) return BROTLI_TRUE;
+  if (context->test_integrity) return BROTLI_TRUE;
+
+  fwrite(context->output, 1, out_size, context->fout);
+  if (ferror(context->fout)) {
+    fprintf(stderr, "failed to write output [%s]: %s\n",
+            PrintablePath(context->current_output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL FlushOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = 0;
+  return BROTLI_TRUE;
+}
+
+static void PrintBytes(size_t value) {
+  if (value < 1024) {
+    fprintf(stderr, "%d B", (int)value);
+  } else if (value < 1048576) {
+    fprintf(stderr, "%0.3f KiB", (double)value / 1024.0);
+  } else if (value < 1073741824) {
+    fprintf(stderr, "%0.3f MiB", (double)value / 1048576.0);
+  } else {
+    fprintf(stderr, "%0.3f GiB", (double)value / 1073741824.0);
+  }
+}
+
+static void PrintFileProcessingProgress(Context* context) {
+  fprintf(stderr, "[%s]: ", PrintablePath(context->current_input_path));
+  PrintBytes(context->total_in);
+  fprintf(stderr, " -> ");
+  PrintBytes(context->total_out);
+  fprintf(stderr, " in %1.2f sec", (double)(context->end_time - context->start_time) / CLOCKS_PER_SEC);
+}
+
+static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
+  BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+  InitializeBuffers(context);
+  for (;;) {
+    if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+      if (!HasMoreInput(context)) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
+      if (!FlushOutput(context)) return BROTLI_FALSE;
+      int has_more_input =
+          (context->available_in != 0) || (fgetc(context->fin) != EOF);
+      if (has_more_input) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      if (context->verbosity > 0) {
+        context->end_time = clock();
+        fprintf(stderr, "Decompressed ");
+        PrintFileProcessingProgress(context);
+        fprintf(stderr, "\n");
+      }
+      return BROTLI_TRUE;
+    } else {
+      fprintf(stderr, "corrupt input [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    result = BrotliDecoderDecompressStream(s, &context->available_in,
+        &context->next_in, &context->available_out, &context->next_out, 0);
+  }
+}
+
+static BROTLI_BOOL DecompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    /* This allows decoding "large-window" streams. Though it creates
+       fragmentation (new builds decode streams that old builds don't),
+       it is better from used experience perspective. */
+    BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
+    if (context->dictionary) {
+      BrotliDecoderAttachDictionary(s, BROTLI_SHARED_DICTIONARY_RAW,
+          context->dictionary_size, context->dictionary);
+    }
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_input_path &&
+        !context->force_overwrite && isatty(STDIN_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = DecompressFile(context, s);
+    BrotliDecoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
+  BROTLI_BOOL is_eof = BROTLI_FALSE;
+  InitializeBuffers(context);
+  for (;;) {
+    if (context->available_in == 0 && !is_eof) {
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+      is_eof = !HasMoreInput(context);
+    }
+
+    if (!BrotliEncoderCompressStream(s,
+        is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &context->available_in, &context->next_in,
+        &context->available_out, &context->next_out, NULL)) {
+      /* Should detect OOM? */
+      fprintf(stderr, "failed to compress data [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    if (context->available_out == 0) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    }
+
+    if (BrotliEncoderIsFinished(s)) {
+      if (!FlushOutput(context)) return BROTLI_FALSE;
+      if (context->verbosity > 0) {
+        context->end_time = clock();
+        fprintf(stderr, "Compressed ");
+        PrintFileProcessingProgress(context);
+        fprintf(stderr, "\n");
+      }
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+static BROTLI_BOOL CompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliEncoderState* s = BrotliEncoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    BrotliEncoderSetParameter(s,
+        BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
+    if (context->lgwin > 0) {
+      /* Specified by user. */
+      /* Do not enable "large-window" extension, if not required. */
+      if (context->lgwin > BROTLI_MAX_WINDOW_BITS) {
+        BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, 1u);
+      }
+      BrotliEncoderSetParameter(s,
+          BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+    } else {
+      /* 0, or not specified by user; could be chosen by compressor. */
+      uint32_t lgwin = DEFAULT_LGWIN;
+      /* Use file size to limit lgwin. */
+      if (context->input_file_length >= 0) {
+        lgwin = BROTLI_MIN_WINDOW_BITS;
+        while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) <
+               (uint64_t)context->input_file_length) {
+          lgwin++;
+          if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
+        }
+      }
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
+    }
+    if (context->input_file_length > 0) {
+      uint32_t size_hint = context->input_file_length < (1 << 30) ?
+          (uint32_t)context->input_file_length : (1u << 30);
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
+    }
+    if (context->dictionary) {
+      BrotliEncoderAttachPreparedDictionary(s, context->prepared_dictionary);
+    }
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_output_path &&
+        !context->force_overwrite && isatty(STDOUT_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force output to a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = CompressFile(context, s);
+    BrotliEncoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+int main(int argc, char** argv) {
+  Command command;
+  Context context;
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  int i;
+
+  context.quality = 11;
+  context.lgwin = -1;
+  context.verbosity = 0;
+  context.force_overwrite = BROTLI_FALSE;
+  context.junk_source = BROTLI_FALSE;
+  context.copy_stat = BROTLI_TRUE;
+  context.test_integrity = BROTLI_FALSE;
+  context.write_to_stdout = BROTLI_FALSE;
+  context.decompress = BROTLI_FALSE;
+  context.large_window = BROTLI_FALSE;
+  context.output_path = NULL;
+  context.dictionary_path = NULL;
+  context.suffix = DEFAULT_SUFFIX;
+  for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
+  context.longest_path_len = 1;
+  context.input_count = 0;
+
+  context.argc = argc;
+  context.argv = argv;
+  context.dictionary = NULL;
+  context.dictionary_size = 0;
+  context.prepared_dictionary = NULL;
+  context.modified_path = NULL;
+  context.iterator = 0;
+  context.ignore = 0;
+  context.iterator_error = BROTLI_FALSE;
+  context.buffer = NULL;
+  context.current_input_path = NULL;
+  context.current_output_path = NULL;
+  context.fin = NULL;
+  context.fout = NULL;
+
+  command = ParseParams(&context);
+
+  if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
+      command == COMMAND_TEST_INTEGRITY) {
+    if (!ReadDictionary(&context, command)) is_ok = BROTLI_FALSE;
+    if (is_ok) {
+      size_t modified_path_len =
+          context.longest_path_len + strlen(context.suffix) + 1;
+      context.modified_path = (char*)malloc(modified_path_len);
+      context.buffer = (uint8_t*)malloc(kFileBufferSize * 2);
+      if (!context.modified_path || !context.buffer) {
+        fprintf(stderr, "out of memory\n");
+        is_ok = BROTLI_FALSE;
+      } else {
+        context.input = context.buffer;
+        context.output = context.buffer + kFileBufferSize;
+      }
+    }
+  }
+
+  if (!is_ok) command = COMMAND_NOOP;
+
+  switch (command) {
+    case COMMAND_NOOP:
+      break;
+
+    case COMMAND_VERSION:
+      PrintVersion();
+      break;
+
+    case COMMAND_COMPRESS:
+      is_ok = CompressFiles(&context);
+      break;
+
+    case COMMAND_DECOMPRESS:
+    case COMMAND_TEST_INTEGRITY:
+      is_ok = DecompressFiles(&context);
+      break;
+
+    case COMMAND_HELP:
+    case COMMAND_INVALID:
+    default:
+      is_ok = (command == COMMAND_HELP);
+      PrintHelp(FileName(argv[0]), is_ok);
+      break;
+  }
+
+  if (context.iterator_error) is_ok = BROTLI_FALSE;
+
+  BrotliEncoderDestroyPreparedDictionary(context.prepared_dictionary);
+  free(context.dictionary);
+  free(context.modified_path);
+  free(context.buffer);
+
+  if (!is_ok) exit(1);
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.md b/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.md
new file mode 100644
index 0000000000..cb6d6f3813
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/c/tools/brotli.md
@@ -0,0 +1,108 @@
+# NAME
+
+brotli(1) -- brotli, unbrotli - compress or decompress files
+
+# SYNOPSIS
+
+`brotli` [*OPTION|FILE*]...
+
+`unbrotli` is equivalent to `brotli --decompress`
+
+# DESCRIPTION
+
+`brotli` is a generic-purpose lossless compression algorithm that compresses
+data using a combination of a modern variant of the **LZ77** algorithm, Huffman
+coding and 2-nd order context modeling, with a compression ratio comparable to
+the best currently available general-purpose compression methods. It is similar
+in speed with deflate but offers more dense compression.
+
+`brotli` command line syntax similar to `gzip (1)` and `zstd (1)`.
+Unlike `gzip (1)`, source files are preserved by default. It is possible to
+remove them after processing by using the `--rm` _option_.
+
+Arguments that look like "`--name`" or "`--name=value`" are _options_. Every
+_option_ has a short form "`-x`" or "`-x value`". Multiple short form _options_
+could be coalesced:
+
+* "`--decompress --stdout --suffix=.b`" works the same as
+* "`-d -s -S .b`" and
+* "`-dsS .b`"
+
+`brotli` has 3 operation modes:
+
+* default mode is compression;
+* `--decompress` option activates decompression mode;
+* `--test` option switches to integrity test mode; this option is equivalent to
+  "`--decompress --stdout`" except that the decompressed data is discarded
+  instead of being written to standard output.
+
+Every non-option argument is a _file_ entry. If no _files_ are given or _file_
+is "`-`", `brotli` reads from standard input. All arguments after "`--`" are
+_file_ entries.
+
+Unless `--stdout` or `--output` is specified, _files_ are written to a new file
+whose name is derived from the source _file_ name:
+
+* when compressing, a suffix is appended to the source filename to
+  get the target filename
+* when decompressing, a suffix is removed from the source filename to
+  get the target filename
+
+Default suffix is `.br`, but it could be specified with `--suffix` option.
+
+Conflicting or duplicate _options_ are not allowed.
+
+# OPTIONS
+
+* `-#`:
+    compression level (0-9); bigger values cause denser, but slower compression
+* `-c`, `--stdout`:
+    write on standard output
+* `-d`, `--decompress`:
+    decompress mode
+* `-f`, `--force`:
+    force output file overwrite
+* `-h`, `--help`:
+    display this help and exit
+* `-j`, `--rm`:
+    remove source file(s); `gzip (1)`-like behaviour
+* `-k`, `--keep`:
+    keep source file(s); `zstd (1)`-like behaviour
+* `-n`, `--no-copy-stat`:
+    do not copy source file(s) attributes
+* `-o FILE`, `--output=FILE`
+    output file; valid only if there is a single input entry
+* `-q NUM`, `--quality=NUM`:
+    compression level (0-11); bigger values cause denser, but slower compression
+* `-t`, `--test`:
+    test file integrity mode
+* `-v`, `--verbose`:
+    increase output verbosity
+* `-w NUM`, `--lgwin=NUM`:
+    set LZ77 window size (0, 10-24) (default: 24); window size is
+    `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value;
+    bigger windows size improve density; decoder might require up to window size
+    memory to operate
+* `-D FILE`, `--dictionary=FILE`:
+    use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
+    compression and decompression
+* `-S SUF`, `--suffix=SUF`:
+    output file suffix (default: `.br`)
+* `-V`, `--version`:
+    display version and exit
+* `-Z`, `--best`:
+    use best compression level (default); same as "`-q 11`"
+
+# SEE ALSO
+
+`brotli` file format is defined in
+[RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
+
+`brotli` is open-sourced under the
+[MIT License](https://opensource.org/licenses/MIT).
+
+Mailing list: https://groups.google.com/forum/#!forum/brotli
+
+# BUGS
+
+Report bugs at: https://github.com/google/brotli/issues
diff --git a/third-party/libjxl/libjxl/third_party/brotli/compiler_config_setting.bzl b/third-party/libjxl/libjxl/third_party/brotli/compiler_config_setting.bzl
new file mode 100644
index 0000000000..572032bf7e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/compiler_config_setting.bzl
@@ -0,0 +1,28 @@
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Distributed under MIT license.
+#  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+"""Creates config_setting that allows selecting based on 'compiler' value."""
+
+def create_msvc_config():
+  # The "do_not_use_tools_cpp_compiler_present" attribute exists to
+  # distinguish between older versions of Bazel that do not support
+  # "@bazel_tools//tools/cpp:compiler" flag_value, and newer ones that do.
+  # In the future, the only way to select on the compiler will be through
+  # flag_values{"@bazel_tools//tools/cpp:compiler"} and the else branch can
+  # be removed.
+  if hasattr(cc_common, "do_not_use_tools_cpp_compiler_present"):
+    native.config_setting(
+      name = "msvc",
+      flag_values = {
+          "@bazel_tools//tools/cpp:compiler": "msvc-cl",
+      },
+      visibility = ["//visibility:public"],
+    )
+  else:
+    native.config_setting(
+      name = "msvc",
+      values = {"compiler": "msvc-cl"},
+      visibility = ["//visibility:public"],
+    )
diff --git a/third-party/libjxl/libjxl/third_party/brotli/configure b/third-party/libjxl/libjxl/third_party/brotli/configure
new file mode 100755
index 0000000000..d96129a444
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/configure
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+echo "Use Autotools, Bazel, CMake or Premake5 to generate projects / build files."
+echo "  Bazel: http://www.bazel.build/"
+echo "  CMake: https://cmake.org/"
+echo "  Premake5: https://premake.github.io/"
+echo "To generate Autotools 'configure' file run './bootstrap'."
+echo "Run './configure-cmake' for Autotools-like CMake configuration."
+echo "Or simply run 'make' to build and test command line tool."
diff --git a/third-party/libjxl/libjxl/third_party/brotli/configure-cmake b/third-party/libjxl/libjxl/third_party/brotli/configure-cmake
new file mode 100644
index 0000000000..6dfb92c4e4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/configure-cmake
@@ -0,0 +1,318 @@
+#!/usr/bin/env bash
+
+# Autotools-style (./configure) wrapper for CMake
+# <https://github.com/nemequ/configure-cmake>
+#
+#   *** IMPORTANT ***
+#
+#   You must include the GNUInstallDirs module (which comes with
+#   CMake) in your project.  Just put "include (GNUInstallDirs)" in
+#   you CMakeLists.txt and you should be good.
+#
+# This script was originally written for Squash
+# <https://quixdb.github.io/squash/> by Evan Nemerson
+# <evan@nemerson.com>, but has been spun off into a separate
+# repository.  Please feel free to copy it into your own repository,
+# though I would appreciate it if you would post improvements, bugs,
+# feature requests, etc. to the issue tracker at
+# <https://github.com/nemequ/configure-cmake/issues>.
+#
+# To the extent possible under law, the author(s) hereby waive all
+# copyright and related or neighboring rights to this work.  For
+# details, see <https://creativecommons.org/publicdomain/zero/1.0/>
+
+TOP_SRCDIR="$(dirname $0)"
+
+if [ "${CMAKE_CMD}" = "" ]; then
+    CMAKE_CMD="cmake"
+fi
+
+BUILD_TYPE="Debug"
+PREFIX=/usr/local
+LIBDIR=
+CMAKE_ARGS=
+
+if [ -e "${TOP_SRCDIR}/scripts/.configure-custom.sh" ]; then
+    . "${TOP_SRCDIR}/scripts/.configure-custom.sh"
+fi
+
+quote() {
+    echo "$1" | sed -e "s|'|'\\\\''|g; 1s/^/'/; \$s/\$/'/"
+}
+
+extract_var_string() {
+    VAR_NAME=$1
+    VAR_NAME=$(echo $1 | sed -e 's/[ \t]*$//')
+    if [ "x$2" != "x" ]; then
+        VAR_VALUE=$2
+    else
+        VAR_VALUE=yes
+    fi
+
+    if [ "x$3" != "x" ]; then
+        VAR_UC_NAME=$3
+    else
+        VAR_UC_NAME=$(echo "$1" | tr '[:lower:]' '[:upper:]' | tr -c '[:alnum:]' '_' | sed 's/_$//g')
+    fi
+}
+
+set_config_var() {
+    is_with=n
+    case "$1" in
+        "--enable-"*)
+            name="${1#--enable-}"
+            cfg="${ENABLE_VARS}"
+            ;;
+        "--disable-"*)
+            name="${1#--disable-}";
+            cfg="${DISABLE_VARS}";
+            ;;
+        "--with-"*)
+            # IFS="=" read -ra WITHARGS <<< "${1}"
+            name="${1#--with-}"
+            cfg="${WITH_VARS}"
+            is_with=y
+            ;;
+    esac
+
+    found=n
+    for varstring in $cfg; do
+        extract_var_string $(echo "${varstring}" | tr '|' ' ')
+        if [ "x$VAR_NAME" = "x$name" ]; then
+            found=y
+            break;
+        fi
+    done
+
+    if [ "$found" = "y" ]; then
+        if [ "x$is_with" = "xy" ]; then
+            CMAKE_ARGS="$CMAKE_ARGS -D${VAR_UC_NAME}=$(quote "$2")"
+        else
+            CMAKE_ARGS="$CMAKE_ARGS -D${VAR_UC_NAME}=$(quote "${VAR_VALUE}")"
+        fi
+    else
+        echo "Unknown parameter: ${1}"
+        exit 1
+    fi
+}
+
+prefix_to_offset() {
+    expr $(echo "${1}" | awk '{ print length }') + 1
+}
+
+print_help() {
+    cat <<EOF >&2
+  -h, --help              display this help and exit
+  --disable-debug         disable debugging mode
+  --pass-thru             pass remaining arguments through to CMake
+
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$PREFIX]
+  --bindir=DIR            user executables [PREFIX/bin]
+  --sbindir=DIR           system admin executables [PREFIX/sbin]
+  --libexecdir=DIR        program executables [PREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [PREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/PROJECT_NAME]
+EOF
+
+    first=y
+    for varstring in ${ENABLE_VARS}; do
+        if [ $first = 'y' ]; then
+            echo ""
+            first=n
+        fi
+        extract_var_string $(echo "${varstring}" | tr '|' ' ')
+        var_doc_name="ENABLE_${VAR_UC_NAME}_DOC"
+        eval "docstring=\$$var_doc_name"
+        if [ "x${docstring}" = "x" ]; then
+            printf "  --enable-%-14s enable %s support\n" "${VAR_NAME}" "$(echo -n "${VAR_NAME}" | tr '-' ' ')"
+        else
+            printf "  --enable-%-14s %s\n" "${VAR_NAME}" "$docstring"
+        fi
+    done
+
+    first=y
+    for varstring in ${DISABLE_VARS}; do
+        if [ $first = 'y' ]; then
+            echo ""
+            first=n
+        fi
+        extract_var_string $(echo "${varstring}" | tr '|' ' ')
+        var_doc_name="DISABLE_${VAR_UC_NAME}_DOC"
+        eval "docstring=\$$var_doc_name"
+        if [ "x${docstring}" = "x" ]; then
+            printf "  --disable-%-13s disable %s support\n" "${VAR_NAME}" "$(echo -n "${VAR_NAME}" | tr '-' ' ')"
+        else
+            printf "  --disable-%-13s %s\n" "${VAR_NAME}" "$docstring"
+        fi
+    done
+
+    first=y
+    for varstring in ${WITH_VARS}; do
+        if [ $first = 'y' ]; then
+            echo ""
+            first=n
+        fi
+        extract_var_string $(echo "${varstring}" | tr '|' ' ')
+        var_doc_name="WITH_${VAR_UC_NAME}_DOC"
+        eval "docstring=\$$var_doc_name"
+        paraminfo="${VAR_NAME}=${VAR_VALUE}"
+        if [ "x${docstring}" = "x" ]; then
+            printf "  --with-%-16s enable %s support\n" "$paraminfo" "$(echo -n "${VAR_NAME}" | tr '-' ' ')"
+        else
+            printf "  --with-%-16s %s\n" "$paraminfo" "$docstring"
+        fi
+    done
+
+    exit 0
+}
+
+while [ $# != 0 ]; do
+    case "$1" in
+        "--prefix="*)
+            PREFIX="${1#*=}";;
+        "--prefix")
+            PREFIX="${2}"; shift;;
+        "--bindir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_BINDIR=$(quote "${1#*=}")";;
+        "--bindir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_BINDIR=$(quote "$2")"; shift;;
+        "--sbindir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SBINDIR=$(quote "${1#*=}")";;
+        "--sbindir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SBINDIR=$(quote "$2")"; shift;;
+        "--libexecdir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LIBEXECDIR=$(quote "${1#*=}")";;
+        "--libexecdir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LIBEXECDIR=$(quote "$2")"; shift;;
+        "--sysconfdir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SYSCONFDIR=$(quote "${1#*=}")";;
+        "--sysconfdir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SYSCONFDIR=$(quote "$2")"; shift;;
+        "--sharedstatedir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SHAREDSTATEDIR=$(quote "${1#*=}")";;
+        "--sharedstatedir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_SHAREDSTATEDIR=$(quote "$2")"; shift;;
+        "--localstatedir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LOCALSTATEDIR=$(quote "${1#*=}")";;
+        "--localstatedir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LOCALSTATEDIR=$(quote "$2")"; shift;;
+        "--libdir="*)
+            LIBDIR="${1#*=}";;
+        "--libdir")
+            LIBDIR="${2}"; shift;;
+        "--includedir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_INCLUDEDIR=$(quote "${1#*=}")";;
+        "--includedir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_INCLUDEDIR=$(quote "$2")"; shift;;
+        "--oldincludedir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_OLDINCLUDEDIR=$(quote "${1#*=}")";;
+        "--oldincludedir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_OLDINCLUDEDIR=$(quote "$2")"; shift;;
+        "--datarootdir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DATAROOTDIR=$(quote "${1#*=}")";;
+        "--datarootdir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DATAROOTDIR=$(quote "$2")"; shift;;
+        "--datadir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DATADIR=$(quote "${1#*=}")";;
+        "--datadir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DATADIR=$(quote "$2")"; shift;;
+        "--infodir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_INFODIR=$(quote "${1#*=}")";;
+        "--infodir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_INFODIR=$(quote "$2")"; shift;;
+        "--localedir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LOCALEDIR=$(quote "${1#*=}")";;
+        "--localedir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_LOCALEDIR=$(quote "$2")"; shift;;
+        "--mandir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_MANDIR=$(quote "${1#*=}")";;
+        "--mandir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_MANDIR=$(quote "$2")"; shift;;
+        "--docdir="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DOCDIR=$(quote "${1#*=}")";;
+        "--docdir")
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_INSTALL_DOCDIR=$(quote "$2")"; shift;;
+
+        "CC="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_C_COMPILER=$(quote "${1#*=}")";;
+        "CXX="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CXX_COMPILER=$(quote "${1#*=}")";;
+        "CFLAGS="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_C_FLAGS=$(quote "${1#*=}")";;
+        "CXXFLAGS="*)
+            CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CXX_FLAGS=$(quote "${1#*=}")";;
+        "LDFLAGS="*)
+            LDFLAGS="$LDFLAGS ${1#*=}";;
+
+        "--help")
+            print_help;;
+        "-h")
+            print_help;;
+
+        # This flag is the only one which may be a bit surprising to
+        # people.  Autotools always builds with debugging symbols enabled
+        # (AFAIK), but for cmake you have to do -DCMAKE_BUILD_TYPE=Debug.
+        # Unfortunately this can change other things as well, so although
+        # I realize there is no --disable-debug flag I thought it would be
+        # prudent to support one here.
+        "--disable-debug")
+            BUILD_TYPE="Release";;
+
+        "--pass-thru")
+            shift;
+            while [ $# != 0 ]; do
+                CMAKE_ARGS="$CMAKE_ARGS $(quote "${1}")";
+                shift;
+            done;;
+
+        "--enable-"*)
+            set_config_var "$1"
+            ;;
+
+        "--disable-"*)
+            set_config_var "$1"
+            ;;
+
+        "--with-"*)
+            name=$(echo "${1#--with-}" | awk '{split($1,v,"="); print v[1]}')
+            case "${1}" in
+                "--with-${name}="*)
+                    set_config_var "--with-${name}" "${1#--with-${name}=}";;
+                "--with-${name}")
+                    set_config_var "$1" "$2";
+                    shift;;
+            esac
+            ;;
+
+        *)
+            echo "$0: error: unrecognized option: \`$1'" >&2
+            echo "Try \`$0 --help' for more information" >&2
+            exit -1
+    esac;
+    shift
+done
+
+if [ "x${LIBDIR}" = "x" ]; then
+    LIBDIR="${PREFIX}/lib"
+fi
+
+# Unlike CFLAGS/CXXFLAGS/CC/CXX, LDFLAGS isn't handled by CMake, so we
+# need to parse it here.
+if [ "x${LDFLAGS}" != "x" ]; then
+    for varname in EXE MODULE SHARED STATIC; do
+        CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_${varname}_LINKER_FLAGS=$(quote "$LDFLAGS")"
+    done
+fi
+
+eval "${CMAKE_CMD}" "${TOP_SRCDIR}" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" -DCMAKE_INSTALL_PREFIX="${PREFIX}" -DCMAKE_INSTALL_LIBDIR="${LIBDIR}" ${CMAKE_ARGS}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/configure.ac b/third-party/libjxl/libjxl/third_party/brotli/configure.ac
new file mode 100644
index 0000000000..9a3b285979
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/configure.ac
@@ -0,0 +1,14 @@
+AC_PREREQ(2.57)
+
+dnl Actual version is substituted by bootstrap
+AC_INIT([brotli], [0.0.0], [https://groups.google.com/forum/#!forum/brotli])
+
+AM_INIT_AUTOMAKE()
+AC_CONFIG_MACRO_DIR([m4])
+
+AC_PROG_CC
+LT_INIT
+
+AC_CONFIG_FILES([Makefile scripts/libbrotlicommon.pc scripts/libbrotlidec.pc scripts/libbrotlienc.pc])
+
+AC_OUTPUT
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/injected_code.txt b/third-party/libjxl/libjxl/third_party/brotli/csharp/injected_code.txt
new file mode 100644
index 0000000000..64f129b0e1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/injected_code.txt
@@ -0,0 +1,32 @@
+// <{[INJECTED CODE]}>
+		public override bool CanRead {
+			get {return true;}
+		}
+
+		public override bool CanSeek {
+			get {return false;}
+		}
+		public override long Length {
+			get {throw new System.NotSupportedException();}
+		}
+		public override long Position {
+			get {throw new System.NotSupportedException();}
+			set {throw new System.NotSupportedException();}
+		}
+		public override long Seek(long offset, System.IO.SeekOrigin origin) {
+			throw new System.NotSupportedException();
+		}
+		public override void SetLength(long value){
+			throw new System.NotSupportedException();
+		}
+
+		public override bool CanWrite{get{return false;}}
+		public override System.IAsyncResult BeginWrite(byte[] buffer, int offset,
+				int count, System.AsyncCallback callback, object state) {
+			throw new System.NotSupportedException();
+		}
+		public override void Write(byte[] buffer, int offset, int count) {
+			throw new System.NotSupportedException();
+		}
+
+		public override void Flush() {}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReader.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReader.cs
new file mode 100644
index 0000000000..d3f1a3ffff
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReader.cs
@@ -0,0 +1,271 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Bit reading helpers.</summary>
+	internal sealed class BitReader
+	{
+		/// <summary>
+		/// Input byte buffer, consist of a ring-buffer and a "slack" region where bytes from the start of
+		/// the ring-buffer are copied.
+		/// </summary>
+		private const int Capacity = 1024;
+
+		private const int Slack = 16;
+
+		private const int IntBufferSize = Capacity + Slack;
+
+		private const int ByteReadSize = Capacity << 2;
+
+		private const int ByteBufferSize = IntBufferSize << 2;
+
+		private readonly byte[] byteBuffer = new byte[ByteBufferSize];
+
+		private readonly int[] intBuffer = new int[IntBufferSize];
+
+		private readonly Org.Brotli.Dec.IntReader intReader = new Org.Brotli.Dec.IntReader();
+
+		private System.IO.Stream input;
+
+		/// <summary>Input stream is finished.</summary>
+		private bool endOfStreamReached;
+
+		/// <summary>Pre-fetched bits.</summary>
+		internal long accumulator;
+
+		/// <summary>Current bit-reading position in accumulator.</summary>
+		internal int bitOffset;
+
+		/// <summary>Offset of next item in intBuffer.</summary>
+		private int intOffset;
+
+		private int tailBytes = 0;
+
+		/* Number of bytes in unfinished "int" item. */
+		/// <summary>Fills up the input buffer.</summary>
+		/// <remarks>
+		/// Fills up the input buffer.
+		/// <p> No-op if there are at least 36 bytes present after current position.
+		/// <p> After encountering the end of the input stream, 64 additional zero bytes are copied to the
+		/// buffer.
+		/// </remarks>
+		internal static void ReadMoreInput(Org.Brotli.Dec.BitReader br)
+		{
+			// TODO: Split to check and read; move read outside of decoding loop.
+			if (br.intOffset <= Capacity - 9)
+			{
+				return;
+			}
+			if (br.endOfStreamReached)
+			{
+				if (IntAvailable(br) >= -2)
+				{
+					return;
+				}
+				throw new Org.Brotli.Dec.BrotliRuntimeException("No more input");
+			}
+			int readOffset = br.intOffset << 2;
+			int bytesRead = ByteReadSize - readOffset;
+			System.Array.Copy(br.byteBuffer, readOffset, br.byteBuffer, 0, bytesRead);
+			br.intOffset = 0;
+			try
+			{
+				while (bytesRead < ByteReadSize)
+				{
+					int len = br.input.Read(br.byteBuffer, bytesRead, ByteReadSize - bytesRead);
+					// EOF is -1 in Java, but 0 in C#.
+					if (len <= 0)
+					{
+						br.endOfStreamReached = true;
+						br.tailBytes = bytesRead;
+						bytesRead += 3;
+						break;
+					}
+					bytesRead += len;
+				}
+			}
+			catch (System.IO.IOException e)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Failed to read input", e);
+			}
+			Org.Brotli.Dec.IntReader.Convert(br.intReader, bytesRead >> 2);
+		}
+
+		internal static void CheckHealth(Org.Brotli.Dec.BitReader br, bool endOfStream)
+		{
+			if (!br.endOfStreamReached)
+			{
+				return;
+			}
+			int byteOffset = (br.intOffset << 2) + ((br.bitOffset + 7) >> 3) - 8;
+			if (byteOffset > br.tailBytes)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Read after end");
+			}
+			if (endOfStream && (byteOffset != br.tailBytes))
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Unused bytes after end");
+			}
+		}
+
+		/// <summary>Advances the Read buffer by 5 bytes to make room for reading next 24 bits.</summary>
+		internal static void FillBitWindow(Org.Brotli.Dec.BitReader br)
+		{
+			if (br.bitOffset >= 32)
+			{
+				br.accumulator = ((long)br.intBuffer[br.intOffset++] << 32) | ((long)(((ulong)br.accumulator) >> 32));
+				br.bitOffset -= 32;
+			}
+		}
+
+		/// <summary>Reads the specified number of bits from Read Buffer.</summary>
+		internal static int ReadBits(Org.Brotli.Dec.BitReader br, int n)
+		{
+			FillBitWindow(br);
+			int val = (int)((long)(((ulong)br.accumulator) >> br.bitOffset)) & ((1 << n) - 1);
+			br.bitOffset += n;
+			return val;
+		}
+
+		/// <summary>Initialize bit reader.</summary>
+		/// <remarks>
+		/// Initialize bit reader.
+		/// <p> Initialisation turns bit reader to a ready state. Also a number of bytes is prefetched to
+		/// accumulator. Because of that this method may block until enough data could be read from input.
+		/// </remarks>
+		/// <param name="br">BitReader POJO</param>
+		/// <param name="input">data source</param>
+		internal static void Init(Org.Brotli.Dec.BitReader br, System.IO.Stream input)
+		{
+			if (br.input != null)
+			{
+				throw new System.InvalidOperationException("Bit reader already has associated input stream");
+			}
+			Org.Brotli.Dec.IntReader.Init(br.intReader, br.byteBuffer, br.intBuffer);
+			br.input = input;
+			br.accumulator = 0;
+			br.bitOffset = 64;
+			br.intOffset = Capacity;
+			br.endOfStreamReached = false;
+			Prepare(br);
+		}
+
+		private static void Prepare(Org.Brotli.Dec.BitReader br)
+		{
+			ReadMoreInput(br);
+			CheckHealth(br, false);
+			FillBitWindow(br);
+			FillBitWindow(br);
+		}
+
+		internal static void Reload(Org.Brotli.Dec.BitReader br)
+		{
+			if (br.bitOffset == 64)
+			{
+				Prepare(br);
+			}
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		internal static void Close(Org.Brotli.Dec.BitReader br)
+		{
+			System.IO.Stream @is = br.input;
+			br.input = null;
+			if (@is != null)
+			{
+				@is.Close();
+			}
+		}
+
+		internal static void JumpToByteBoundary(Org.Brotli.Dec.BitReader br)
+		{
+			int padding = (64 - br.bitOffset) & 7;
+			if (padding != 0)
+			{
+				int paddingBits = Org.Brotli.Dec.BitReader.ReadBits(br, padding);
+				if (paddingBits != 0)
+				{
+					throw new Org.Brotli.Dec.BrotliRuntimeException("Corrupted padding bits");
+				}
+			}
+		}
+
+		internal static int IntAvailable(Org.Brotli.Dec.BitReader br)
+		{
+			int limit = Capacity;
+			if (br.endOfStreamReached)
+			{
+				limit = (br.tailBytes + 3) >> 2;
+			}
+			return limit - br.intOffset;
+		}
+
+		internal static void CopyBytes(Org.Brotli.Dec.BitReader br, byte[] data, int offset, int length)
+		{
+			if ((br.bitOffset & 7) != 0)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Unaligned copyBytes");
+			}
+			// Drain accumulator.
+			while ((br.bitOffset != 64) && (length != 0))
+			{
+				data[offset++] = unchecked((byte)((long)(((ulong)br.accumulator) >> br.bitOffset)));
+				br.bitOffset += 8;
+				length--;
+			}
+			if (length == 0)
+			{
+				return;
+			}
+			// Get data from shadow buffer with "sizeof(int)" granularity.
+			int copyInts = System.Math.Min(IntAvailable(br), length >> 2);
+			if (copyInts > 0)
+			{
+				int readOffset = br.intOffset << 2;
+				System.Array.Copy(br.byteBuffer, readOffset, data, offset, copyInts << 2);
+				offset += copyInts << 2;
+				length -= copyInts << 2;
+				br.intOffset += copyInts;
+			}
+			if (length == 0)
+			{
+				return;
+			}
+			// Read tail bytes.
+			if (IntAvailable(br) > 0)
+			{
+				// length = 1..3
+				FillBitWindow(br);
+				while (length != 0)
+				{
+					data[offset++] = unchecked((byte)((long)(((ulong)br.accumulator) >> br.bitOffset)));
+					br.bitOffset += 8;
+					length--;
+				}
+				CheckHealth(br, false);
+				return;
+			}
+			// Now it is possible to copy bytes directly.
+			try
+			{
+				while (length > 0)
+				{
+					int len = br.input.Read(data, offset, length);
+					if (len == -1)
+					{
+						throw new Org.Brotli.Dec.BrotliRuntimeException("Unexpected end of input");
+					}
+					offset += len;
+					length -= len;
+				}
+			}
+			catch (System.IO.IOException e)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Failed to read input", e);
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReaderTest.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReaderTest.cs
new file mode 100644
index 0000000000..c5403edf7b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BitReaderTest.cs
@@ -0,0 +1,33 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// Tests for
+	/// <see cref="BitReader"/>
+	/// .
+	/// </summary>
+	public class BitReaderTest
+	{
+		[NUnit.Framework.Test]
+		public virtual void TestReadAfterEos()
+		{
+			Org.Brotli.Dec.BitReader reader = new Org.Brotli.Dec.BitReader();
+			Org.Brotli.Dec.BitReader.Init(reader, new System.IO.MemoryStream(new byte[1]));
+			Org.Brotli.Dec.BitReader.ReadBits(reader, 9);
+			try
+			{
+				Org.Brotli.Dec.BitReader.CheckHealth(reader, false);
+			}
+			catch (Org.Brotli.Dec.BrotliRuntimeException)
+			{
+				// This exception is expected.
+				return;
+			}
+			NUnit.Framework.Assert.Fail("BrotliRuntimeException should have been thrown by BitReader.checkHealth");
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliInputStream.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliInputStream.cs
new file mode 100644
index 0000000000..36f8128eaf
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliInputStream.cs
@@ -0,0 +1,223 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// <see cref="System.IO.Stream"/>
+	/// decorator that decompresses brotli data.
+	/// <p> Not thread-safe.
+	/// </summary>
+	public class BrotliInputStream : System.IO.Stream
+	{
+		public const int DefaultInternalBufferSize = 16384;
+
+		/// <summary>Internal buffer used for efficient byte-by-byte reading.</summary>
+		private byte[] buffer;
+
+		/// <summary>Number of decoded but still unused bytes in internal buffer.</summary>
+		private int remainingBufferBytes;
+
+		/// <summary>Next unused byte offset.</summary>
+		private int bufferOffset;
+
+		/// <summary>Decoder state.</summary>
+		private readonly Org.Brotli.Dec.State state = new Org.Brotli.Dec.State();
+
+		/// <summary>
+		/// Creates a
+		/// <see cref="System.IO.Stream"/>
+		/// wrapper that decompresses brotli data.
+		/// <p> For byte-by-byte reading (
+		/// <see cref="ReadByte()"/>
+		/// ) internal buffer with
+		/// <see cref="DefaultInternalBufferSize"/>
+		/// size is allocated and used.
+		/// <p> Will block the thread until first kilobyte of data of source is available.
+		/// </summary>
+		/// <param name="source">underlying data source</param>
+		/// <exception cref="System.IO.IOException">in case of corrupted data or source stream problems</exception>
+		public BrotliInputStream(System.IO.Stream source)
+			: this(source, DefaultInternalBufferSize, null)
+		{
+		}
+
+		/// <summary>
+		/// Creates a
+		/// <see cref="System.IO.Stream"/>
+		/// wrapper that decompresses brotli data.
+		/// <p> For byte-by-byte reading (
+		/// <see cref="ReadByte()"/>
+		/// ) internal buffer of specified size is
+		/// allocated and used.
+		/// <p> Will block the thread until first kilobyte of data of source is available.
+		/// </summary>
+		/// <param name="source">compressed data source</param>
+		/// <param name="byteReadBufferSize">
+		/// size of internal buffer used in case of
+		/// byte-by-byte reading
+		/// </param>
+		/// <exception cref="System.IO.IOException">in case of corrupted data or source stream problems</exception>
+		public BrotliInputStream(System.IO.Stream source, int byteReadBufferSize)
+			: this(source, byteReadBufferSize, null)
+		{
+		}
+
+		/// <summary>
+		/// Creates a
+		/// <see cref="System.IO.Stream"/>
+		/// wrapper that decompresses brotli data.
+		/// <p> For byte-by-byte reading (
+		/// <see cref="ReadByte()"/>
+		/// ) internal buffer of specified size is
+		/// allocated and used.
+		/// <p> Will block the thread until first kilobyte of data of source is available.
+		/// </summary>
+		/// <param name="source">compressed data source</param>
+		/// <param name="byteReadBufferSize">
+		/// size of internal buffer used in case of
+		/// byte-by-byte reading
+		/// </param>
+		/// <param name="customDictionary">
+		/// custom dictionary data;
+		/// <see langword="null"/>
+		/// if not used
+		/// </param>
+		/// <exception cref="System.IO.IOException">in case of corrupted data or source stream problems</exception>
+		public BrotliInputStream(System.IO.Stream source, int byteReadBufferSize, byte[] customDictionary)
+		{
+			if (byteReadBufferSize <= 0)
+			{
+				throw new System.ArgumentException("Bad buffer size:" + byteReadBufferSize);
+			}
+			else if (source == null)
+			{
+				throw new System.ArgumentException("source is null");
+			}
+			this.buffer = new byte[byteReadBufferSize];
+			this.remainingBufferBytes = 0;
+			this.bufferOffset = 0;
+			try
+			{
+				Org.Brotli.Dec.State.SetInput(state, source);
+			}
+			catch (Org.Brotli.Dec.BrotliRuntimeException ex)
+			{
+				throw new System.IO.IOException("Brotli decoder initialization failed", ex);
+			}
+			if (customDictionary != null)
+			{
+				Org.Brotli.Dec.Decode.SetCustomDictionary(state, customDictionary);
+			}
+		}
+
+		/// <summary><inheritDoc/></summary>
+		/// <exception cref="System.IO.IOException"/>
+		public override void Close()
+		{
+			Org.Brotli.Dec.State.Close(state);
+		}
+
+		/// <summary><inheritDoc/></summary>
+		/// <exception cref="System.IO.IOException"/>
+		public override int ReadByte()
+		{
+			if (bufferOffset >= remainingBufferBytes)
+			{
+				remainingBufferBytes = Read(buffer, 0, buffer.Length);
+				bufferOffset = 0;
+				if (remainingBufferBytes == -1)
+				{
+					return -1;
+				}
+			}
+			return buffer[bufferOffset++] & unchecked((int)(0xFF));
+		}
+
+		/// <summary><inheritDoc/></summary>
+		/// <exception cref="System.IO.IOException"/>
+		public override int Read(byte[] destBuffer, int destOffset, int destLen)
+		{
+			if (destOffset < 0)
+			{
+				throw new System.ArgumentException("Bad offset: " + destOffset);
+			}
+			else if (destLen < 0)
+			{
+				throw new System.ArgumentException("Bad length: " + destLen);
+			}
+			else if (destOffset + destLen > destBuffer.Length)
+			{
+				throw new System.ArgumentException("Buffer overflow: " + (destOffset + destLen) + " > " + destBuffer.Length);
+			}
+			else if (destLen == 0)
+			{
+				return 0;
+			}
+			int copyLen = System.Math.Max(remainingBufferBytes - bufferOffset, 0);
+			if (copyLen != 0)
+			{
+				copyLen = System.Math.Min(copyLen, destLen);
+				System.Array.Copy(buffer, bufferOffset, destBuffer, destOffset, copyLen);
+				bufferOffset += copyLen;
+				destOffset += copyLen;
+				destLen -= copyLen;
+				if (destLen == 0)
+				{
+					return copyLen;
+				}
+			}
+			try
+			{
+				state.output = destBuffer;
+				state.outputOffset = destOffset;
+				state.outputLength = destLen;
+				state.outputUsed = 0;
+				Org.Brotli.Dec.Decode.Decompress(state);
+				if (state.outputUsed == 0)
+				{
+					return -1;
+				}
+				return state.outputUsed + copyLen;
+			}
+			catch (Org.Brotli.Dec.BrotliRuntimeException ex)
+			{
+				throw new System.IO.IOException("Brotli stream decoding failed", ex);
+			}
+		}
+		// <{[INJECTED CODE]}>
+		public override bool CanRead {
+			get {return true;}
+		}
+
+		public override bool CanSeek {
+			get {return false;}
+		}
+		public override long Length {
+			get {throw new System.NotSupportedException();}
+		}
+		public override long Position {
+			get {throw new System.NotSupportedException();}
+			set {throw new System.NotSupportedException();}
+		}
+		public override long Seek(long offset, System.IO.SeekOrigin origin) {
+			throw new System.NotSupportedException();
+		}
+		public override void SetLength(long value){
+			throw new System.NotSupportedException();
+		}
+
+		public override bool CanWrite{get{return false;}}
+		public override System.IAsyncResult BeginWrite(byte[] buffer, int offset,
+				int count, System.AsyncCallback callback, object state) {
+			throw new System.NotSupportedException();
+		}
+		public override void Write(byte[] buffer, int offset, int count) {
+			throw new System.NotSupportedException();
+		}
+
+		public override void Flush() {}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliRuntimeException.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliRuntimeException.cs
new file mode 100644
index 0000000000..1e0aef083b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/BrotliRuntimeException.cs
@@ -0,0 +1,22 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Unchecked exception used internally.</summary>
+	[System.Serializable]
+	internal class BrotliRuntimeException : System.Exception
+	{
+		internal BrotliRuntimeException(string message)
+			: base(message)
+		{
+		}
+
+		internal BrotliRuntimeException(string message, System.Exception cause)
+			: base(message, cause)
+		{
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Context.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Context.cs
new file mode 100644
index 0000000000..ad900e4900
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Context.cs
@@ -0,0 +1,57 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Common context lookup table for all context modes.</summary>
+	internal sealed class Context
+	{
+		internal static readonly int[] Lookup = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 44, 44, 44, 44, 44, 44, 44, 44
+			, 44, 44, 32, 32, 24, 40, 28, 12, 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60
+			, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 
+			2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 
+			1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
+			0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
+			3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+			4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+			6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
+			16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+			24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 
+			32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 
+			40, 40, 40, 40, 40, 40, 40, 40, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38
+			, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 
+			37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35
+			, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 
+			34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 
+			10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 
+			25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 
+			40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 
+			55, 55, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+		internal static readonly int[] LookupOffsets = new int[] { 1024, 1536, 1280, 1536, 0, 256, 768, 512 };
+		// CONTEXT_UTF8, last byte.
+		// ASCII range.
+		// UTF8 continuation byte range.
+		// UTF8 lead byte range.
+		// CONTEXT_UTF8 second last byte.
+		// ASCII range.
+		// UTF8 continuation byte range.
+		// UTF8 lead byte range.
+		// CONTEXT_SIGNED, second last byte.
+		// CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits.
+		// CONTEXT_LSB6, last byte.
+		// CONTEXT_MSB6, last byte.
+		// CONTEXT_{M,L}SB6, second last byte,
+		// CONTEXT_LSB6
+		// CONTEXT_MSB6
+		// CONTEXT_UTF8
+		// CONTEXT_SIGNED
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Decode.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Decode.cs
new file mode 100644
index 0000000000..bdc8709ff7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Decode.cs
@@ -0,0 +1,992 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>API for Brotli decompression.</summary>
+	internal sealed class Decode
+	{
+		private const int DefaultCodeLength = 8;
+
+		private const int CodeLengthRepeatCode = 16;
+
+		private const int NumLiteralCodes = 256;
+
+		private const int NumInsertAndCopyCodes = 704;
+
+		private const int NumBlockLengthCodes = 26;
+
+		private const int LiteralContextBits = 6;
+
+		private const int DistanceContextBits = 2;
+
+		private const int HuffmanTableBits = 8;
+
+		private const int HuffmanTableMask = unchecked((int)(0xFF));
+
+		private const int CodeLengthCodes = 18;
+
+		private static readonly int[] CodeLengthCodeOrder = new int[] { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+		private const int NumDistanceShortCodes = 16;
+
+		private static readonly int[] DistanceShortCodeIndexOffset = new int[] { 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 };
+
+		private static readonly int[] DistanceShortCodeValueOffset = new int[] { 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3 };
+
+		/// <summary>Static Huffman code for the code length code lengths.</summary>
+		private static readonly int[] FixedTable = new int[] { unchecked((int)(0x020000)), unchecked((int)(0x020004)), unchecked((int)(0x020003)), unchecked((int)(0x030002)), unchecked((int)(0x020000)), unchecked((int)(0x020004)), unchecked((int)(0x020003
+			)), unchecked((int)(0x040001)), unchecked((int)(0x020000)), unchecked((int)(0x020004)), unchecked((int)(0x020003)), unchecked((int)(0x030002)), unchecked((int)(0x020000)), unchecked((int)(0x020004)), unchecked((int)(0x020003)), unchecked((int
+			)(0x040005)) };
+
+		/// <summary>Decodes a number in the range [0..255], by reading 1 - 11 bits.</summary>
+		private static int DecodeVarLenUnsignedByte(Org.Brotli.Dec.BitReader br)
+		{
+			if (Org.Brotli.Dec.BitReader.ReadBits(br, 1) != 0)
+			{
+				int n = Org.Brotli.Dec.BitReader.ReadBits(br, 3);
+				if (n == 0)
+				{
+					return 1;
+				}
+				else
+				{
+					return Org.Brotli.Dec.BitReader.ReadBits(br, n) + (1 << n);
+				}
+			}
+			return 0;
+		}
+
+		private static void DecodeMetaBlockLength(Org.Brotli.Dec.BitReader br, Org.Brotli.Dec.State state)
+		{
+			state.inputEnd = Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 1;
+			state.metaBlockLength = 0;
+			state.isUncompressed = false;
+			state.isMetadata = false;
+			if (state.inputEnd && Org.Brotli.Dec.BitReader.ReadBits(br, 1) != 0)
+			{
+				return;
+			}
+			int sizeNibbles = Org.Brotli.Dec.BitReader.ReadBits(br, 2) + 4;
+			if (sizeNibbles == 7)
+			{
+				state.isMetadata = true;
+				if (Org.Brotli.Dec.BitReader.ReadBits(br, 1) != 0)
+				{
+					throw new Org.Brotli.Dec.BrotliRuntimeException("Corrupted reserved bit");
+				}
+				int sizeBytes = Org.Brotli.Dec.BitReader.ReadBits(br, 2);
+				if (sizeBytes == 0)
+				{
+					return;
+				}
+				for (int i = 0; i < sizeBytes; i++)
+				{
+					int bits = Org.Brotli.Dec.BitReader.ReadBits(br, 8);
+					if (bits == 0 && i + 1 == sizeBytes && sizeBytes > 1)
+					{
+						throw new Org.Brotli.Dec.BrotliRuntimeException("Exuberant nibble");
+					}
+					state.metaBlockLength |= bits << (i * 8);
+				}
+			}
+			else
+			{
+				for (int i = 0; i < sizeNibbles; i++)
+				{
+					int bits = Org.Brotli.Dec.BitReader.ReadBits(br, 4);
+					if (bits == 0 && i + 1 == sizeNibbles && sizeNibbles > 4)
+					{
+						throw new Org.Brotli.Dec.BrotliRuntimeException("Exuberant nibble");
+					}
+					state.metaBlockLength |= bits << (i * 4);
+				}
+			}
+			state.metaBlockLength++;
+			if (!state.inputEnd)
+			{
+				state.isUncompressed = Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 1;
+			}
+		}
+
+		/// <summary>Decodes the next Huffman code from bit-stream.</summary>
+		private static int ReadSymbol(int[] table, int offset, Org.Brotli.Dec.BitReader br)
+		{
+			int val = (int)((long)(((ulong)br.accumulator) >> br.bitOffset));
+			offset += val & HuffmanTableMask;
+			int bits = table[offset] >> 16;
+			int sym = table[offset] & unchecked((int)(0xFFFF));
+			if (bits <= HuffmanTableBits)
+			{
+				br.bitOffset += bits;
+				return sym;
+			}
+			offset += sym;
+			int mask = (1 << bits) - 1;
+			offset += (int)(((uint)(val & mask)) >> HuffmanTableBits);
+			br.bitOffset += ((table[offset] >> 16) + HuffmanTableBits);
+			return table[offset] & unchecked((int)(0xFFFF));
+		}
+
+		private static int ReadBlockLength(int[] table, int offset, Org.Brotli.Dec.BitReader br)
+		{
+			Org.Brotli.Dec.BitReader.FillBitWindow(br);
+			int code = ReadSymbol(table, offset, br);
+			int n = Org.Brotli.Dec.Prefix.BlockLengthNBits[code];
+			return Org.Brotli.Dec.Prefix.BlockLengthOffset[code] + Org.Brotli.Dec.BitReader.ReadBits(br, n);
+		}
+
+		private static int TranslateShortCodes(int code, int[] ringBuffer, int index)
+		{
+			if (code < NumDistanceShortCodes)
+			{
+				index += DistanceShortCodeIndexOffset[code];
+				index &= 3;
+				return ringBuffer[index] + DistanceShortCodeValueOffset[code];
+			}
+			return code - NumDistanceShortCodes + 1;
+		}
+
+		private static void MoveToFront(int[] v, int index)
+		{
+			int value = v[index];
+			for (; index > 0; index--)
+			{
+				v[index] = v[index - 1];
+			}
+			v[0] = value;
+		}
+
+		private static void InverseMoveToFrontTransform(byte[] v, int vLen)
+		{
+			int[] mtf = new int[256];
+			for (int i = 0; i < 256; i++)
+			{
+				mtf[i] = i;
+			}
+			for (int i = 0; i < vLen; i++)
+			{
+				int index = v[i] & unchecked((int)(0xFF));
+				v[i] = unchecked((byte)mtf[index]);
+				if (index != 0)
+				{
+					MoveToFront(mtf, index);
+				}
+			}
+		}
+
+		private static void ReadHuffmanCodeLengths(int[] codeLengthCodeLengths, int numSymbols, int[] codeLengths, Org.Brotli.Dec.BitReader br)
+		{
+			int symbol = 0;
+			int prevCodeLen = DefaultCodeLength;
+			int repeat = 0;
+			int repeatCodeLen = 0;
+			int space = 32768;
+			int[] table = new int[32];
+			Org.Brotli.Dec.Huffman.BuildHuffmanTable(table, 0, 5, codeLengthCodeLengths, CodeLengthCodes);
+			while (symbol < numSymbols && space > 0)
+			{
+				Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+				Org.Brotli.Dec.BitReader.FillBitWindow(br);
+				int p = (int)(((long)(((ulong)br.accumulator) >> br.bitOffset))) & 31;
+				br.bitOffset += table[p] >> 16;
+				int codeLen = table[p] & unchecked((int)(0xFFFF));
+				if (codeLen < CodeLengthRepeatCode)
+				{
+					repeat = 0;
+					codeLengths[symbol++] = codeLen;
+					if (codeLen != 0)
+					{
+						prevCodeLen = codeLen;
+						space -= 32768 >> codeLen;
+					}
+				}
+				else
+				{
+					int extraBits = codeLen - 14;
+					int newLen = 0;
+					if (codeLen == CodeLengthRepeatCode)
+					{
+						newLen = prevCodeLen;
+					}
+					if (repeatCodeLen != newLen)
+					{
+						repeat = 0;
+						repeatCodeLen = newLen;
+					}
+					int oldRepeat = repeat;
+					if (repeat > 0)
+					{
+						repeat -= 2;
+						repeat <<= extraBits;
+					}
+					repeat += Org.Brotli.Dec.BitReader.ReadBits(br, extraBits) + 3;
+					int repeatDelta = repeat - oldRepeat;
+					if (symbol + repeatDelta > numSymbols)
+					{
+						throw new Org.Brotli.Dec.BrotliRuntimeException("symbol + repeatDelta > numSymbols");
+					}
+					// COV_NF_LINE
+					for (int i = 0; i < repeatDelta; i++)
+					{
+						codeLengths[symbol++] = repeatCodeLen;
+					}
+					if (repeatCodeLen != 0)
+					{
+						space -= repeatDelta << (15 - repeatCodeLen);
+					}
+				}
+			}
+			if (space != 0)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Unused space");
+			}
+			// COV_NF_LINE
+			// TODO: Pass max_symbol to Huffman table builder instead?
+			Org.Brotli.Dec.Utils.FillWithZeroes(codeLengths, symbol, numSymbols - symbol);
+		}
+
+		// TODO: Use specialized versions for smaller tables.
+		internal static void ReadHuffmanCode(int alphabetSize, int[] table, int offset, Org.Brotli.Dec.BitReader br)
+		{
+			bool ok = true;
+			int simpleCodeOrSkip;
+			Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+			// TODO: Avoid allocation.
+			int[] codeLengths = new int[alphabetSize];
+			simpleCodeOrSkip = Org.Brotli.Dec.BitReader.ReadBits(br, 2);
+			if (simpleCodeOrSkip == 1)
+			{
+				// Read symbols, codes & code lengths directly.
+				int maxBitsCounter = alphabetSize - 1;
+				int maxBits = 0;
+				int[] symbols = new int[4];
+				int numSymbols = Org.Brotli.Dec.BitReader.ReadBits(br, 2) + 1;
+				while (maxBitsCounter != 0)
+				{
+					maxBitsCounter >>= 1;
+					maxBits++;
+				}
+				// TODO: uncomment when codeLengths is reused.
+				// Utils.fillWithZeroes(codeLengths, 0, alphabetSize);
+				for (int i = 0; i < numSymbols; i++)
+				{
+					symbols[i] = Org.Brotli.Dec.BitReader.ReadBits(br, maxBits) % alphabetSize;
+					codeLengths[symbols[i]] = 2;
+				}
+				codeLengths[symbols[0]] = 1;
+				switch (numSymbols)
+				{
+					case 1:
+					{
+						break;
+					}
+
+					case 2:
+					{
+						ok = symbols[0] != symbols[1];
+						codeLengths[symbols[1]] = 1;
+						break;
+					}
+
+					case 3:
+					{
+						ok = symbols[0] != symbols[1] && symbols[0] != symbols[2] && symbols[1] != symbols[2];
+						break;
+					}
+
+					case 4:
+					default:
+					{
+						ok = symbols[0] != symbols[1] && symbols[0] != symbols[2] && symbols[0] != symbols[3] && symbols[1] != symbols[2] && symbols[1] != symbols[3] && symbols[2] != symbols[3];
+						if (Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 1)
+						{
+							codeLengths[symbols[2]] = 3;
+							codeLengths[symbols[3]] = 3;
+						}
+						else
+						{
+							codeLengths[symbols[0]] = 2;
+						}
+						break;
+					}
+				}
+			}
+			else
+			{
+				// Decode Huffman-coded code lengths.
+				int[] codeLengthCodeLengths = new int[CodeLengthCodes];
+				int space = 32;
+				int numCodes = 0;
+				for (int i = simpleCodeOrSkip; i < CodeLengthCodes && space > 0; i++)
+				{
+					int codeLenIdx = CodeLengthCodeOrder[i];
+					Org.Brotli.Dec.BitReader.FillBitWindow(br);
+					int p = (int)((long)(((ulong)br.accumulator) >> br.bitOffset)) & 15;
+					// TODO: Demultiplex FIXED_TABLE.
+					br.bitOffset += FixedTable[p] >> 16;
+					int v = FixedTable[p] & unchecked((int)(0xFFFF));
+					codeLengthCodeLengths[codeLenIdx] = v;
+					if (v != 0)
+					{
+						space -= (32 >> v);
+						numCodes++;
+					}
+				}
+				ok = (numCodes == 1 || space == 0);
+				ReadHuffmanCodeLengths(codeLengthCodeLengths, alphabetSize, codeLengths, br);
+			}
+			if (!ok)
+			{
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Can't readHuffmanCode");
+			}
+			// COV_NF_LINE
+			Org.Brotli.Dec.Huffman.BuildHuffmanTable(table, offset, HuffmanTableBits, codeLengths, alphabetSize);
+		}
+
+		private static int DecodeContextMap(int contextMapSize, byte[] contextMap, Org.Brotli.Dec.BitReader br)
+		{
+			Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+			int numTrees = DecodeVarLenUnsignedByte(br) + 1;
+			if (numTrees == 1)
+			{
+				Org.Brotli.Dec.Utils.FillWithZeroes(contextMap, 0, contextMapSize);
+				return numTrees;
+			}
+			bool useRleForZeros = Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 1;
+			int maxRunLengthPrefix = 0;
+			if (useRleForZeros)
+			{
+				maxRunLengthPrefix = Org.Brotli.Dec.BitReader.ReadBits(br, 4) + 1;
+			}
+			int[] table = new int[Org.Brotli.Dec.Huffman.HuffmanMaxTableSize];
+			ReadHuffmanCode(numTrees + maxRunLengthPrefix, table, 0, br);
+			for (int i = 0; i < contextMapSize; )
+			{
+				Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+				Org.Brotli.Dec.BitReader.FillBitWindow(br);
+				int code = ReadSymbol(table, 0, br);
+				if (code == 0)
+				{
+					contextMap[i] = 0;
+					i++;
+				}
+				else if (code <= maxRunLengthPrefix)
+				{
+					int reps = (1 << code) + Org.Brotli.Dec.BitReader.ReadBits(br, code);
+					while (reps != 0)
+					{
+						if (i >= contextMapSize)
+						{
+							throw new Org.Brotli.Dec.BrotliRuntimeException("Corrupted context map");
+						}
+						// COV_NF_LINE
+						contextMap[i] = 0;
+						i++;
+						reps--;
+					}
+				}
+				else
+				{
+					contextMap[i] = unchecked((byte)(code - maxRunLengthPrefix));
+					i++;
+				}
+			}
+			if (Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 1)
+			{
+				InverseMoveToFrontTransform(contextMap, contextMapSize);
+			}
+			return numTrees;
+		}
+
+		private static void DecodeBlockTypeAndLength(Org.Brotli.Dec.State state, int treeType)
+		{
+			Org.Brotli.Dec.BitReader br = state.br;
+			int[] ringBuffers = state.blockTypeRb;
+			int offset = treeType * 2;
+			Org.Brotli.Dec.BitReader.FillBitWindow(br);
+			int blockType = ReadSymbol(state.blockTypeTrees, treeType * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize, br);
+			state.blockLength[treeType] = ReadBlockLength(state.blockLenTrees, treeType * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize, br);
+			if (blockType == 1)
+			{
+				blockType = ringBuffers[offset + 1] + 1;
+			}
+			else if (blockType == 0)
+			{
+				blockType = ringBuffers[offset];
+			}
+			else
+			{
+				blockType -= 2;
+			}
+			if (blockType >= state.numBlockTypes[treeType])
+			{
+				blockType -= state.numBlockTypes[treeType];
+			}
+			ringBuffers[offset] = ringBuffers[offset + 1];
+			ringBuffers[offset + 1] = blockType;
+		}
+
+		private static void DecodeLiteralBlockSwitch(Org.Brotli.Dec.State state)
+		{
+			DecodeBlockTypeAndLength(state, 0);
+			int literalBlockType = state.blockTypeRb[1];
+			state.contextMapSlice = literalBlockType << LiteralContextBits;
+			state.literalTreeIndex = state.contextMap[state.contextMapSlice] & unchecked((int)(0xFF));
+			state.literalTree = state.hGroup0.trees[state.literalTreeIndex];
+			int contextMode = state.contextModes[literalBlockType];
+			state.contextLookupOffset1 = Org.Brotli.Dec.Context.LookupOffsets[contextMode];
+			state.contextLookupOffset2 = Org.Brotli.Dec.Context.LookupOffsets[contextMode + 1];
+		}
+
+		private static void DecodeCommandBlockSwitch(Org.Brotli.Dec.State state)
+		{
+			DecodeBlockTypeAndLength(state, 1);
+			state.treeCommandOffset = state.hGroup1.trees[state.blockTypeRb[3]];
+		}
+
+		private static void DecodeDistanceBlockSwitch(Org.Brotli.Dec.State state)
+		{
+			DecodeBlockTypeAndLength(state, 2);
+			state.distContextMapSlice = state.blockTypeRb[5] << DistanceContextBits;
+		}
+
+		private static void MaybeReallocateRingBuffer(Org.Brotli.Dec.State state)
+		{
+			int newSize = state.maxRingBufferSize;
+			if ((long)newSize > state.expectedTotalSize)
+			{
+				/* TODO: Handle 2GB+ cases more gracefully. */
+				int minimalNewSize = (int)state.expectedTotalSize + state.customDictionary.Length;
+				while ((newSize >> 1) > minimalNewSize)
+				{
+					newSize >>= 1;
+				}
+				if (!state.inputEnd && newSize < 16384 && state.maxRingBufferSize >= 16384)
+				{
+					newSize = 16384;
+				}
+			}
+			if (newSize <= state.ringBufferSize)
+			{
+				return;
+			}
+			int ringBufferSizeWithSlack = newSize + Org.Brotli.Dec.Dictionary.MaxTransformedWordLength;
+			byte[] newBuffer = new byte[ringBufferSizeWithSlack];
+			if (state.ringBuffer != null)
+			{
+				System.Array.Copy(state.ringBuffer, 0, newBuffer, 0, state.ringBufferSize);
+			}
+			else if (state.customDictionary.Length != 0)
+			{
+				/* Prepend custom dictionary, if any. */
+				int length = state.customDictionary.Length;
+				int offset = 0;
+				if (length > state.maxBackwardDistance)
+				{
+					offset = length - state.maxBackwardDistance;
+					length = state.maxBackwardDistance;
+				}
+				System.Array.Copy(state.customDictionary, offset, newBuffer, 0, length);
+				state.pos = length;
+				state.bytesToIgnore = length;
+			}
+			state.ringBuffer = newBuffer;
+			state.ringBufferSize = newSize;
+		}
+
+		/// <summary>Reads next metablock header.</summary>
+		/// <param name="state">decoding state</param>
+		private static void ReadMetablockInfo(Org.Brotli.Dec.State state)
+		{
+			Org.Brotli.Dec.BitReader br = state.br;
+			if (state.inputEnd)
+			{
+				state.nextRunningState = Org.Brotli.Dec.RunningState.Finished;
+				state.bytesToWrite = state.pos;
+				state.bytesWritten = 0;
+				state.runningState = Org.Brotli.Dec.RunningState.Write;
+				return;
+			}
+			// TODO: Reset? Do we need this?
+			state.hGroup0.codes = null;
+			state.hGroup0.trees = null;
+			state.hGroup1.codes = null;
+			state.hGroup1.trees = null;
+			state.hGroup2.codes = null;
+			state.hGroup2.trees = null;
+			Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+			DecodeMetaBlockLength(br, state);
+			if (state.metaBlockLength == 0 && !state.isMetadata)
+			{
+				return;
+			}
+			if (state.isUncompressed || state.isMetadata)
+			{
+				Org.Brotli.Dec.BitReader.JumpToByteBoundary(br);
+				state.runningState = state.isMetadata ? Org.Brotli.Dec.RunningState.ReadMetadata : Org.Brotli.Dec.RunningState.CopyUncompressed;
+			}
+			else
+			{
+				state.runningState = Org.Brotli.Dec.RunningState.CompressedBlockStart;
+			}
+			if (state.isMetadata)
+			{
+				return;
+			}
+			state.expectedTotalSize += state.metaBlockLength;
+			if (state.ringBufferSize < state.maxRingBufferSize)
+			{
+				MaybeReallocateRingBuffer(state);
+			}
+		}
+
+		private static void ReadMetablockHuffmanCodesAndContextMaps(Org.Brotli.Dec.State state)
+		{
+			Org.Brotli.Dec.BitReader br = state.br;
+			for (int i = 0; i < 3; i++)
+			{
+				state.numBlockTypes[i] = DecodeVarLenUnsignedByte(br) + 1;
+				state.blockLength[i] = 1 << 28;
+				if (state.numBlockTypes[i] > 1)
+				{
+					ReadHuffmanCode(state.numBlockTypes[i] + 2, state.blockTypeTrees, i * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize, br);
+					ReadHuffmanCode(NumBlockLengthCodes, state.blockLenTrees, i * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize, br);
+					state.blockLength[i] = ReadBlockLength(state.blockLenTrees, i * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize, br);
+				}
+			}
+			Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+			state.distancePostfixBits = Org.Brotli.Dec.BitReader.ReadBits(br, 2);
+			state.numDirectDistanceCodes = NumDistanceShortCodes + (Org.Brotli.Dec.BitReader.ReadBits(br, 4) << state.distancePostfixBits);
+			state.distancePostfixMask = (1 << state.distancePostfixBits) - 1;
+			int numDistanceCodes = state.numDirectDistanceCodes + (48 << state.distancePostfixBits);
+			// TODO: Reuse?
+			state.contextModes = new byte[state.numBlockTypes[0]];
+			for (int i = 0; i < state.numBlockTypes[0]; )
+			{
+				/* Ensure that less than 256 bits read between readMoreInput. */
+				int limit = System.Math.Min(i + 96, state.numBlockTypes[0]);
+				for (; i < limit; ++i)
+				{
+					state.contextModes[i] = unchecked((byte)(Org.Brotli.Dec.BitReader.ReadBits(br, 2) << 1));
+				}
+				Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+			}
+			// TODO: Reuse?
+			state.contextMap = new byte[state.numBlockTypes[0] << LiteralContextBits];
+			int numLiteralTrees = DecodeContextMap(state.numBlockTypes[0] << LiteralContextBits, state.contextMap, br);
+			state.trivialLiteralContext = true;
+			for (int j = 0; j < state.numBlockTypes[0] << LiteralContextBits; j++)
+			{
+				if (state.contextMap[j] != j >> LiteralContextBits)
+				{
+					state.trivialLiteralContext = false;
+					break;
+				}
+			}
+			// TODO: Reuse?
+			state.distContextMap = new byte[state.numBlockTypes[2] << DistanceContextBits];
+			int numDistTrees = DecodeContextMap(state.numBlockTypes[2] << DistanceContextBits, state.distContextMap, br);
+			Org.Brotli.Dec.HuffmanTreeGroup.Init(state.hGroup0, NumLiteralCodes, numLiteralTrees);
+			Org.Brotli.Dec.HuffmanTreeGroup.Init(state.hGroup1, NumInsertAndCopyCodes, state.numBlockTypes[1]);
+			Org.Brotli.Dec.HuffmanTreeGroup.Init(state.hGroup2, numDistanceCodes, numDistTrees);
+			Org.Brotli.Dec.HuffmanTreeGroup.Decode(state.hGroup0, br);
+			Org.Brotli.Dec.HuffmanTreeGroup.Decode(state.hGroup1, br);
+			Org.Brotli.Dec.HuffmanTreeGroup.Decode(state.hGroup2, br);
+			state.contextMapSlice = 0;
+			state.distContextMapSlice = 0;
+			state.contextLookupOffset1 = Org.Brotli.Dec.Context.LookupOffsets[state.contextModes[0]];
+			state.contextLookupOffset2 = Org.Brotli.Dec.Context.LookupOffsets[state.contextModes[0] + 1];
+			state.literalTreeIndex = 0;
+			state.literalTree = state.hGroup0.trees[0];
+			state.treeCommandOffset = state.hGroup1.trees[0];
+			// TODO: == 0?
+			state.blockTypeRb[0] = state.blockTypeRb[2] = state.blockTypeRb[4] = 1;
+			state.blockTypeRb[1] = state.blockTypeRb[3] = state.blockTypeRb[5] = 0;
+		}
+
+		private static void CopyUncompressedData(Org.Brotli.Dec.State state)
+		{
+			Org.Brotli.Dec.BitReader br = state.br;
+			byte[] ringBuffer = state.ringBuffer;
+			// Could happen if block ends at ring buffer end.
+			if (state.metaBlockLength <= 0)
+			{
+				Org.Brotli.Dec.BitReader.Reload(br);
+				state.runningState = Org.Brotli.Dec.RunningState.BlockStart;
+				return;
+			}
+			int chunkLength = System.Math.Min(state.ringBufferSize - state.pos, state.metaBlockLength);
+			Org.Brotli.Dec.BitReader.CopyBytes(br, ringBuffer, state.pos, chunkLength);
+			state.metaBlockLength -= chunkLength;
+			state.pos += chunkLength;
+			if (state.pos == state.ringBufferSize)
+			{
+				state.nextRunningState = Org.Brotli.Dec.RunningState.CopyUncompressed;
+				state.bytesToWrite = state.ringBufferSize;
+				state.bytesWritten = 0;
+				state.runningState = Org.Brotli.Dec.RunningState.Write;
+				return;
+			}
+			Org.Brotli.Dec.BitReader.Reload(br);
+			state.runningState = Org.Brotli.Dec.RunningState.BlockStart;
+		}
+
+		private static bool WriteRingBuffer(Org.Brotli.Dec.State state)
+		{
+			/* Ignore custom dictionary bytes. */
+			if (state.bytesToIgnore != 0)
+			{
+				state.bytesWritten += state.bytesToIgnore;
+				state.bytesToIgnore = 0;
+			}
+			int toWrite = System.Math.Min(state.outputLength - state.outputUsed, state.bytesToWrite - state.bytesWritten);
+			if (toWrite != 0)
+			{
+				System.Array.Copy(state.ringBuffer, state.bytesWritten, state.output, state.outputOffset + state.outputUsed, toWrite);
+				state.outputUsed += toWrite;
+				state.bytesWritten += toWrite;
+			}
+			return state.outputUsed < state.outputLength;
+		}
+
+		internal static void SetCustomDictionary(Org.Brotli.Dec.State state, byte[] data)
+		{
+			state.customDictionary = (data == null) ? new byte[0] : data;
+		}
+
+		/// <summary>Actual decompress implementation.</summary>
+		internal static void Decompress(Org.Brotli.Dec.State state)
+		{
+			if (state.runningState == Org.Brotli.Dec.RunningState.Uninitialized)
+			{
+				throw new System.InvalidOperationException("Can't decompress until initialized");
+			}
+			if (state.runningState == Org.Brotli.Dec.RunningState.Closed)
+			{
+				throw new System.InvalidOperationException("Can't decompress after close");
+			}
+			Org.Brotli.Dec.BitReader br = state.br;
+			int ringBufferMask = state.ringBufferSize - 1;
+			byte[] ringBuffer = state.ringBuffer;
+			while (state.runningState != Org.Brotli.Dec.RunningState.Finished)
+			{
+				switch (state.runningState)
+				{
+					case Org.Brotli.Dec.RunningState.BlockStart:
+					{
+						// TODO: extract cases to methods for the better readability.
+						if (state.metaBlockLength < 0)
+						{
+							throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid metablock length");
+						}
+						ReadMetablockInfo(state);
+						/* Ring-buffer would be reallocated here. */
+						ringBufferMask = state.ringBufferSize - 1;
+						ringBuffer = state.ringBuffer;
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.CompressedBlockStart:
+					{
+						ReadMetablockHuffmanCodesAndContextMaps(state);
+						state.runningState = Org.Brotli.Dec.RunningState.MainLoop;
+						goto case Org.Brotli.Dec.RunningState.MainLoop;
+					}
+
+					case Org.Brotli.Dec.RunningState.MainLoop:
+					{
+						// Fall through
+						if (state.metaBlockLength <= 0)
+						{
+							state.runningState = Org.Brotli.Dec.RunningState.BlockStart;
+							continue;
+						}
+						Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+						if (state.blockLength[1] == 0)
+						{
+							DecodeCommandBlockSwitch(state);
+						}
+						state.blockLength[1]--;
+						Org.Brotli.Dec.BitReader.FillBitWindow(br);
+						int cmdCode = ReadSymbol(state.hGroup1.codes, state.treeCommandOffset, br);
+						int rangeIdx = (int)(((uint)cmdCode) >> 6);
+						state.distanceCode = 0;
+						if (rangeIdx >= 2)
+						{
+							rangeIdx -= 2;
+							state.distanceCode = -1;
+						}
+						int insertCode = Org.Brotli.Dec.Prefix.InsertRangeLut[rangeIdx] + (((int)(((uint)cmdCode) >> 3)) & 7);
+						int copyCode = Org.Brotli.Dec.Prefix.CopyRangeLut[rangeIdx] + (cmdCode & 7);
+						state.insertLength = Org.Brotli.Dec.Prefix.InsertLengthOffset[insertCode] + Org.Brotli.Dec.BitReader.ReadBits(br, Org.Brotli.Dec.Prefix.InsertLengthNBits[insertCode]);
+						state.copyLength = Org.Brotli.Dec.Prefix.CopyLengthOffset[copyCode] + Org.Brotli.Dec.BitReader.ReadBits(br, Org.Brotli.Dec.Prefix.CopyLengthNBits[copyCode]);
+						state.j = 0;
+						state.runningState = Org.Brotli.Dec.RunningState.InsertLoop;
+						goto case Org.Brotli.Dec.RunningState.InsertLoop;
+					}
+
+					case Org.Brotli.Dec.RunningState.InsertLoop:
+					{
+						// Fall through
+						if (state.trivialLiteralContext)
+						{
+							while (state.j < state.insertLength)
+							{
+								Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+								if (state.blockLength[0] == 0)
+								{
+									DecodeLiteralBlockSwitch(state);
+								}
+								state.blockLength[0]--;
+								Org.Brotli.Dec.BitReader.FillBitWindow(br);
+								ringBuffer[state.pos] = unchecked((byte)ReadSymbol(state.hGroup0.codes, state.literalTree, br));
+								state.j++;
+								if (state.pos++ == ringBufferMask)
+								{
+									state.nextRunningState = Org.Brotli.Dec.RunningState.InsertLoop;
+									state.bytesToWrite = state.ringBufferSize;
+									state.bytesWritten = 0;
+									state.runningState = Org.Brotli.Dec.RunningState.Write;
+									break;
+								}
+							}
+						}
+						else
+						{
+							int prevByte1 = ringBuffer[(state.pos - 1) & ringBufferMask] & unchecked((int)(0xFF));
+							int prevByte2 = ringBuffer[(state.pos - 2) & ringBufferMask] & unchecked((int)(0xFF));
+							while (state.j < state.insertLength)
+							{
+								Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+								if (state.blockLength[0] == 0)
+								{
+									DecodeLiteralBlockSwitch(state);
+								}
+								int literalTreeIndex = state.contextMap[state.contextMapSlice + (Org.Brotli.Dec.Context.Lookup[state.contextLookupOffset1 + prevByte1] | Org.Brotli.Dec.Context.Lookup[state.contextLookupOffset2 + prevByte2])] & unchecked((int)(0xFF));
+								state.blockLength[0]--;
+								prevByte2 = prevByte1;
+								Org.Brotli.Dec.BitReader.FillBitWindow(br);
+								prevByte1 = ReadSymbol(state.hGroup0.codes, state.hGroup0.trees[literalTreeIndex], br);
+								ringBuffer[state.pos] = unchecked((byte)prevByte1);
+								state.j++;
+								if (state.pos++ == ringBufferMask)
+								{
+									state.nextRunningState = Org.Brotli.Dec.RunningState.InsertLoop;
+									state.bytesToWrite = state.ringBufferSize;
+									state.bytesWritten = 0;
+									state.runningState = Org.Brotli.Dec.RunningState.Write;
+									break;
+								}
+							}
+						}
+						if (state.runningState != Org.Brotli.Dec.RunningState.InsertLoop)
+						{
+							continue;
+						}
+						state.metaBlockLength -= state.insertLength;
+						if (state.metaBlockLength <= 0)
+						{
+							state.runningState = Org.Brotli.Dec.RunningState.MainLoop;
+							continue;
+						}
+						if (state.distanceCode < 0)
+						{
+							Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+							if (state.blockLength[2] == 0)
+							{
+								DecodeDistanceBlockSwitch(state);
+							}
+							state.blockLength[2]--;
+							Org.Brotli.Dec.BitReader.FillBitWindow(br);
+							state.distanceCode = ReadSymbol(state.hGroup2.codes, state.hGroup2.trees[state.distContextMap[state.distContextMapSlice + (state.copyLength > 4 ? 3 : state.copyLength - 2)] & unchecked((int)(0xFF))], br);
+							if (state.distanceCode >= state.numDirectDistanceCodes)
+							{
+								state.distanceCode -= state.numDirectDistanceCodes;
+								int postfix = state.distanceCode & state.distancePostfixMask;
+								state.distanceCode = (int)(((uint)state.distanceCode) >> state.distancePostfixBits);
+								int n = ((int)(((uint)state.distanceCode) >> 1)) + 1;
+								int offset = ((2 + (state.distanceCode & 1)) << n) - 4;
+								state.distanceCode = state.numDirectDistanceCodes + postfix + ((offset + Org.Brotli.Dec.BitReader.ReadBits(br, n)) << state.distancePostfixBits);
+							}
+						}
+						// Convert the distance code to the actual distance by possibly looking up past distances
+						// from the ringBuffer.
+						state.distance = TranslateShortCodes(state.distanceCode, state.distRb, state.distRbIdx);
+						if (state.distance < 0)
+						{
+							throw new Org.Brotli.Dec.BrotliRuntimeException("Negative distance");
+						}
+						// COV_NF_LINE
+						if (state.maxDistance != state.maxBackwardDistance && state.pos < state.maxBackwardDistance)
+						{
+							state.maxDistance = state.pos;
+						}
+						else
+						{
+							state.maxDistance = state.maxBackwardDistance;
+						}
+						state.copyDst = state.pos;
+						if (state.distance > state.maxDistance)
+						{
+							state.runningState = Org.Brotli.Dec.RunningState.Transform;
+							continue;
+						}
+						if (state.distanceCode > 0)
+						{
+							state.distRb[state.distRbIdx & 3] = state.distance;
+							state.distRbIdx++;
+						}
+						if (state.copyLength > state.metaBlockLength)
+						{
+							throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid backward reference");
+						}
+						// COV_NF_LINE
+						state.j = 0;
+						state.runningState = Org.Brotli.Dec.RunningState.CopyLoop;
+						goto case Org.Brotli.Dec.RunningState.CopyLoop;
+					}
+
+					case Org.Brotli.Dec.RunningState.CopyLoop:
+					{
+						// fall through
+						int src = (state.pos - state.distance) & ringBufferMask;
+						int dst = state.pos;
+						int copyLength = state.copyLength - state.j;
+						if ((src + copyLength < ringBufferMask) && (dst + copyLength < ringBufferMask))
+						{
+							for (int k = 0; k < copyLength; ++k)
+							{
+								ringBuffer[dst++] = ringBuffer[src++];
+							}
+							state.j += copyLength;
+							state.metaBlockLength -= copyLength;
+							state.pos += copyLength;
+						}
+						else
+						{
+							for (; state.j < state.copyLength; )
+							{
+								ringBuffer[state.pos] = ringBuffer[(state.pos - state.distance) & ringBufferMask];
+								state.metaBlockLength--;
+								state.j++;
+								if (state.pos++ == ringBufferMask)
+								{
+									state.nextRunningState = Org.Brotli.Dec.RunningState.CopyLoop;
+									state.bytesToWrite = state.ringBufferSize;
+									state.bytesWritten = 0;
+									state.runningState = Org.Brotli.Dec.RunningState.Write;
+									break;
+								}
+							}
+						}
+						if (state.runningState == Org.Brotli.Dec.RunningState.CopyLoop)
+						{
+							state.runningState = Org.Brotli.Dec.RunningState.MainLoop;
+						}
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.Transform:
+					{
+						if (state.copyLength >= Org.Brotli.Dec.Dictionary.MinWordLength && state.copyLength <= Org.Brotli.Dec.Dictionary.MaxWordLength)
+						{
+							int offset = Org.Brotli.Dec.Dictionary.OffsetsByLength[state.copyLength];
+							int wordId = state.distance - state.maxDistance - 1;
+							int shift = Org.Brotli.Dec.Dictionary.SizeBitsByLength[state.copyLength];
+							int mask = (1 << shift) - 1;
+							int wordIdx = wordId & mask;
+							int transformIdx = (int)(((uint)wordId) >> shift);
+							offset += wordIdx * state.copyLength;
+							if (transformIdx < Org.Brotli.Dec.Transform.Transforms.Length)
+							{
+								int len = Org.Brotli.Dec.Transform.TransformDictionaryWord(ringBuffer, state.copyDst, Org.Brotli.Dec.Dictionary.GetData(), offset, state.copyLength, Org.Brotli.Dec.Transform.Transforms[transformIdx]);
+								state.copyDst += len;
+								state.pos += len;
+								state.metaBlockLength -= len;
+								if (state.copyDst >= state.ringBufferSize)
+								{
+									state.nextRunningState = Org.Brotli.Dec.RunningState.CopyWrapBuffer;
+									state.bytesToWrite = state.ringBufferSize;
+									state.bytesWritten = 0;
+									state.runningState = Org.Brotli.Dec.RunningState.Write;
+									continue;
+								}
+							}
+							else
+							{
+								throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid backward reference");
+							}
+						}
+						else
+						{
+							// COV_NF_LINE
+							throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid backward reference");
+						}
+						// COV_NF_LINE
+						state.runningState = Org.Brotli.Dec.RunningState.MainLoop;
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.CopyWrapBuffer:
+					{
+						System.Array.Copy(ringBuffer, state.ringBufferSize, ringBuffer, 0, state.copyDst - state.ringBufferSize);
+						state.runningState = Org.Brotli.Dec.RunningState.MainLoop;
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.ReadMetadata:
+					{
+						while (state.metaBlockLength > 0)
+						{
+							Org.Brotli.Dec.BitReader.ReadMoreInput(br);
+							// Optimize
+							Org.Brotli.Dec.BitReader.ReadBits(br, 8);
+							state.metaBlockLength--;
+						}
+						state.runningState = Org.Brotli.Dec.RunningState.BlockStart;
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.CopyUncompressed:
+					{
+						CopyUncompressedData(state);
+						continue;
+					}
+
+					case Org.Brotli.Dec.RunningState.Write:
+					{
+						if (!WriteRingBuffer(state))
+						{
+							// Output buffer is full.
+							return;
+						}
+						if (state.pos >= state.maxBackwardDistance)
+						{
+							state.maxDistance = state.maxBackwardDistance;
+						}
+						state.pos &= ringBufferMask;
+						state.runningState = state.nextRunningState;
+						continue;
+					}
+
+					default:
+					{
+						throw new Org.Brotli.Dec.BrotliRuntimeException("Unexpected state " + state.runningState);
+					}
+				}
+			}
+			if (state.runningState == Org.Brotli.Dec.RunningState.Finished)
+			{
+				if (state.metaBlockLength < 0)
+				{
+					throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid metablock length");
+				}
+				Org.Brotli.Dec.BitReader.JumpToByteBoundary(br);
+				Org.Brotli.Dec.BitReader.CheckHealth(state.br, true);
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DecodeTest.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DecodeTest.cs
new file mode 100644
index 0000000000..f6fad8c88f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DecodeTest.cs
@@ -0,0 +1,171 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// Tests for
+	/// <see cref="Decode"/>
+	/// .
+	/// </summary>
+	public class DecodeTest
+	{
+		/// <exception cref="System.IO.IOException"/>
+		private byte[] Decompress(byte[] data, bool byByte)
+		{
+			byte[] buffer = new byte[65536];
+			System.IO.MemoryStream input = new System.IO.MemoryStream(data);
+			System.IO.MemoryStream output = new System.IO.MemoryStream();
+			Org.Brotli.Dec.BrotliInputStream brotliInput = new Org.Brotli.Dec.BrotliInputStream(input);
+			if (byByte)
+			{
+				byte[] oneByte = new byte[1];
+				while (true)
+				{
+					int next = brotliInput.ReadByte();
+					if (next == -1)
+					{
+						break;
+					}
+					oneByte[0] = unchecked((byte)next);
+					output.Write(oneByte, 0, 1);
+				}
+			}
+			else
+			{
+				while (true)
+				{
+					int len = brotliInput.Read(buffer, 0, buffer.Length);
+					if (len <= 0)
+					{
+						break;
+					}
+					output.Write(buffer, 0, len);
+				}
+			}
+			brotliInput.Close();
+			return output.ToArray();
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		private byte[] DecompressWithDictionary(byte[] data, byte[] dictionary)
+		{
+			byte[] buffer = new byte[65536];
+			System.IO.MemoryStream input = new System.IO.MemoryStream(data);
+			System.IO.MemoryStream output = new System.IO.MemoryStream();
+			Org.Brotli.Dec.BrotliInputStream brotliInput = new Org.Brotli.Dec.BrotliInputStream(input, Org.Brotli.Dec.BrotliInputStream.DefaultInternalBufferSize, dictionary);
+			while (true)
+			{
+				int len = brotliInput.Read(buffer, 0, buffer.Length);
+				if (len <= 0)
+				{
+					break;
+				}
+				output.Write(buffer, 0, len);
+			}
+			brotliInput.Close();
+			return output.ToArray();
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		private void CheckDecodeResourceWithDictionary(string expected, string compressed, string dictionary)
+		{
+			byte[] expectedBytes = Org.Brotli.Dec.Transform.ReadUniBytes(expected);
+			byte[] compressedBytes = Org.Brotli.Dec.Transform.ReadUniBytes(compressed);
+			byte[] dictionaryBytes = Org.Brotli.Dec.Transform.ReadUniBytes(dictionary);
+			byte[] actual = DecompressWithDictionary(compressedBytes, dictionaryBytes);
+			NUnit.Framework.Assert.AreEqual(expectedBytes, actual);
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		private void CheckDecodeResource(string expected, string compressed)
+		{
+			byte[] expectedBytes = Org.Brotli.Dec.Transform.ReadUniBytes(expected);
+			byte[] compressedBytes = Org.Brotli.Dec.Transform.ReadUniBytes(compressed);
+			byte[] actual = Decompress(compressedBytes, false);
+			NUnit.Framework.Assert.AreEqual(expectedBytes, actual);
+			byte[] actualByByte = Decompress(compressedBytes, true);
+			NUnit.Framework.Assert.AreEqual(expectedBytes, actualByByte);
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestEmpty()
+		{
+			CheckDecodeResource(string.Empty, "\u0006");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestX()
+		{
+			CheckDecodeResource("X", "\u000B\u0000\u0080X\u0003");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestX10Y10()
+		{
+			CheckDecodeResource("XXXXXXXXXXYYYYYYYYYY", "\u001B\u0013\u0000\u0000\u00A4\u00B0\u00B2\u00EA\u0081G\u0002\u008A");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestX64()
+		{
+			CheckDecodeResource("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "\u001B\u003F\u0000\u0000$\u00B0\u00E2\u0099\u0080\u0012");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestUkkonooa()
+		{
+			CheckDecodeResource("ukko nooa, ukko nooa oli kunnon mies, kun han meni saunaan, " + "pisti laukun naulaan, ukko nooa, ukko nooa oli kunnon mies.", "\u001Bv\u0000\u0000\u0014J\u00AC\u009Bz\u00BD\u00E1\u0097\u009D\u007F\u008E\u00C2\u0082" + "6\u000E\u009C\u00E0\u0090\u0003\u00F7\u008B\u009E8\u00E6\u00B6\u0000\u00AB\u00C3\u00CA"
+				 + "\u00A0\u00C2\u00DAf6\u00DC\u00CD\u0080\u008D.!\u00D7n\u00E3\u00EAL\u00B8\u00F0\u00D2" + "\u00B8\u00C7\u00C2pM:\u00F0i~\u00A1\u00B8Es\u00AB\u00C4W\u001E");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestMonkey()
+		{
+			CheckDecodeResource("znxcvnmz,xvnm.,zxcnv.,xcn.z,vn.zvn.zxcvn.,zxcn.vn.v,znm.,vnzx.,vnzxc.vn.z,vnz.,nv.z,nvmz" + "xc,nvzxcvcnm.,vczxvnzxcnvmxc.zmcnvzm.,nvmc,nzxmc,vn.mnnmzxc,vnxcnmv,znvzxcnmv,.xcnvm,zxc" + "nzxv.zx,qweryweurqioweupropqwutioweupqrioweutiopweuriopweuriopqwurioputiopqwuriowuqeriou"
+				 + "pqweropuweropqwurweuqriopuropqwuriopuqwriopuqweopruioqweurqweuriouqweopruioupqiytioqtyio" + "wtyqptypryoqweutioioqtweqruowqeytiowquiourowetyoqwupiotweuqiorweuqroipituqwiorqwtioweuri" + "ouytuioerytuioweryuitoweytuiweyuityeruirtyuqriqweuropqweiruioqweurioqwuerioqwyuituierwot"
+				 + "ueryuiotweyrtuiwertyioweryrueioqptyioruyiopqwtjkasdfhlafhlasdhfjklashjkfhasjklfhklasjdfh" + "klasdhfjkalsdhfklasdhjkflahsjdkfhklasfhjkasdfhasfjkasdhfklsdhalghhaf;hdklasfhjklashjklfa" + "sdhfasdjklfhsdjklafsd;hkldadfjjklasdhfjasddfjklfhakjklasdjfkl;asdjfasfljasdfhjklasdfhjka"
+				 + "ghjkashf;djfklasdjfkljasdklfjklasdjfkljasdfkljaklfj", "\u001BJ\u0003\u0000\u008C\u0094n\u00DE\u00B4\u00D7\u0096\u00B1x\u0086\u00F2-\u00E1\u001A" + "\u00BC\u000B\u001C\u00BA\u00A9\u00C7\u00F7\u00CCn\u00B2B4QD\u008BN\u0013\b\u00A0\u00CDn"
+				 + "\u00E8,\u00A5S\u00A1\u009C],\u001D#\u001A\u00D2V\u00BE\u00DB\u00EB&\u00BA\u0003e|\u0096j" + "\u00A2v\u00EC\u00EF\u0087G3\u00D6\'\u000Ec\u0095\u00E2\u001D\u008D,\u00C5\u00D1(\u009F`" + "\u0094o\u0002\u008B\u00DD\u00AAd\u0094,\u001E;e|\u0007EZ\u00B2\u00E2\u00FCI\u0081,\u009F"
+				 + "@\u00AE\u00EFh\u0081\u00AC\u0016z\u000F\u00F5;m\u001C\u00B9\u001E-_\u00D5\u00C8\u00AF^" + "\u0085\u00AA\u0005\u00BESu\u00C2\u00B0\"\u008A\u0015\u00C6\u00A3\u00B1\u00E6B\u0014" + "\u00F4\u0084TS\u0019_\u00BE\u00C3\u00F2\u001D\u00D1\u00B7\u00E5\u00DD\u00B6\u00D9#\u00C6"
+				 + "\u00F6\u009F\u009E\u00F6Me0\u00FB\u00C0qE\u0004\u00AD\u0003\u00B5\u00BE\u00C9\u00CB" + "\u00FD\u00E2PZFt\u0004\r\u00FF \u0004w\u00B2m\'\u00BFG\u00A9\u009D\u001B\u0096,b\u0090#" + "\u008B\u00E0\u00F8\u001D\u00CF\u00AF\u001D=\u00EE\u008A\u00C8u#f\u00DD\u00DE\u00D6m"
+				 + "\u00E3*\u0082\u008Ax\u008A\u00DB\u00E6 L\u00B7\\c\u00BA0\u00E3?\u00B6\u00EE\u008C\"" + "\u00A2*\u00B0\"\n\u0099\u00FF=bQ\u00EE\b\u00F6=J\u00E4\u00CC\u00EF\"\u0087\u0011\u00E2" + "\u0083(\u00E4\u00F5\u008F5\u0019c[\u00E1Z\u0092s\u00DD\u00A1P\u009D8\\\u00EB\u00B5\u0003"
+				 + "jd\u0090\u0094\u00C8\u008D\u00FB/\u008A\u0086\"\u00CC\u001D\u0087\u00E0H\n\u0096w\u00909" + "\u00C6##H\u00FB\u0011GV\u00CA \u00E3B\u0081\u00F7w2\u00C1\u00A5\\@!e\u0017@)\u0017\u0017" + "lV2\u00988\u0006\u00DC\u0099M3)\u00BB\u0002\u00DFL&\u0093l\u0017\u0082\u0086 \u00D7"
+				 + "\u0003y}\u009A\u0000\u00D7\u0087\u0000\u00E7\u000Bf\u00E3Lfqg\b2\u00F9\b>\u00813\u00CD" + "\u0017r1\u00F0\u00B8\u0094RK\u00901\u008Eh\u00C1\u00EF\u0090\u00C9\u00E5\u00F2a\tr%" + "\u00AD\u00EC\u00C5b\u00C0\u000B\u0012\u0005\u00F7\u0091u\r\u00EEa..\u0019\t\u00C2\u0003"
+				);
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestFox()
+		{
+			CheckDecodeResource("The quick brown fox jumps over the lazy dog", "\u001B*\u0000\u0000\u0004\u0004\u00BAF:\u0085\u0003\u00E9\u00FA\f\u0091\u0002H\u0011," + "\u00F3\u008A:\u00A3V\u007F\u001A\u00AE\u00BF\u00A4\u00AB\u008EM\u00BF\u00ED\u00E2\u0004K"
+				 + "\u0091\u00FF\u0087\u00E9\u001E");
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		[NUnit.Framework.Test]
+		public virtual void TestFoxFox()
+		{
+			CheckDecodeResourceWithDictionary("The quick brown fox jumps over the lazy dog", "\u001B*\u0000\u0000 \u0000\u00C2\u0098\u00B0\u00CA\u0001", "The quick brown fox jumps over the lazy dog");
+		}
+
+		[NUnit.Framework.Test]
+		public virtual void TestUtils()
+		{
+			new Org.Brotli.Dec.Context();
+			new Org.Brotli.Dec.Decode();
+			new Org.Brotli.Dec.Dictionary();
+			new Org.Brotli.Dec.Huffman();
+			new Org.Brotli.Dec.Prefix();
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Dictionary.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Dictionary.cs
new file mode 100644
index 0000000000..3445f80f67
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Dictionary.cs
@@ -0,0 +1,97 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Collection of static dictionary words.</summary>
+	/// <remarks>
+	/// Collection of static dictionary words.
+	/// <p>Dictionary content is loaded from binary resource when
+	/// <see cref="GetData()"/>
+	/// is executed for the
+	/// first time. Consequently, it saves memory and CPU in case dictionary is not required.
+	/// <p>One possible drawback is that multiple threads that need dictionary data may be blocked (only
+	/// once in each classworld). To avoid this, it is enough to call
+	/// <see cref="GetData()"/>
+	/// proactively.
+	/// </remarks>
+	internal sealed class Dictionary
+	{
+		/// <summary>"Initialization-on-demand holder idiom" implementation.</summary>
+		/// <remarks>
+		/// "Initialization-on-demand holder idiom" implementation.
+		/// <p>This static class definition is not initialized until the JVM determines that it must be
+		/// executed (when the static method
+		/// <see cref="GetData()"/>
+		/// is invoked).
+		/// </remarks>
+		private class DataHolder0
+		{
+			internal static string GetData()
+			{
+				return "timedownlifeleftbackcodedatashowonlysitecityopenjustlikefreeworktextyearoverbodyloveformbookplaylivelinehelphomesidemorewordlongthemviewfindpagedaysfullheadtermeachareafromtruemarkableuponhighdatelandnewsevennextcasebothpostusedmadehandherewhatnameLinkblogsizebaseheldmakemainuser') +holdendswithNewsreadweresigntakehavegameseencallpathwellplusmenufilmpartjointhislistgoodneedwayswestjobsmindalsologorichuseslastteamarmyfoodkingwilleastwardbestfirePageknowaway.pngmovethanloadgiveselfnotemuchfeedmanyrockicononcelookhidediedHomerulehostajaxinfoclublawslesshalfsomesuchzone100%onescareTimeracebluefourweekfacehopegavehardlostwhenparkkeptpassshiproomHTMLplanTypedonesavekeepflaglinksoldfivetookratetownjumpthusdarkcardfilefearstaykillthatfallautoever.comtalkshopvotedeepmoderestturnbornbandfellroseurl(skinrolecomeactsagesmeetgold.jpgitemvaryfeltthensenddropViewcopy1.0\"</a>stopelseliestourpack.gifpastcss?graymean&gt;rideshotlatesaidroadvar feeljohnrickportfast'UA-dead</b>poorbilltypeU.S.woodmust2px;Inforankwidewantwalllead[0];paulwavesure$('#waitmassarmsgoesgainlangpaid!-- lockunitrootwalkfirmwifexml\"songtest20pxkindrowstoolfontmailsafestarmapscorerainflowbabyspansays4px;6px;artsfootrealwikiheatsteptriporg/lakeweaktoldFormcastfansbankveryrunsjulytask1px;goalgrewslowedgeid=\"sets5px;.js?40pxif (soonseatnonetubezerosentreedfactintogiftharm18pxcamehillboldzoomvoideasyringfillpeakinitcost3px;jacktagsbitsrolleditknewnear<!--growJSONdutyNamesaleyou lotspainjazzcoldeyesfishwww.risktabsprev10pxrise25pxBlueding300,ballfordearnwildbox.fairlackverspairjunetechif(!pickevil$(\"#warmlorddoespull,000ideadrawhugespotfundburnhrefcellkeystickhourlossfuel12pxsuitdealRSS\"agedgreyGET\"easeaimsgirlaids8px;navygridtips#999warsladycars); }php?helltallwhomzh:\u00E5*/\r\n 100hall.\n\nA7px;pushchat0px;crew*/</hash75pxflatrare && tellcampontolaidmissskiptentfinemalegetsplot400,\r\n\r\ncoolfeet.php<br>ericmostguidbelldeschairmathatom/img&#82luckcent000;tinygonehtmlselldrugFREEnodenick?id=losenullvastwindRSS wearrelybeensamedukenasacapewishgulfT23:hitsslotgatekickblurthey15px''););\">msiewinsbirdsortbetaseekT18:ordstreemall60pxfarm\u00E2\u0080\u0099sboys[0].');\"POSTbearkids);}}marytend(UK)quadzh:\u00E6-siz----prop');\rliftT19:viceandydebt>RSSpoolneckblowT16:doorevalT17:letsfailoralpollnovacolsgene \u00E2\u0080\u0094softrometillross<h3>pourfadepink<tr>mini)|!(minezh:\u00E8barshear00);milk -->ironfreddiskwentsoilputs/js/holyT22:ISBNT20:adamsees<h2>json', 'contT21: RSSloopasiamoon</p>soulLINEfortcartT14:<h1>80px!--<9px;T04:mike:46ZniceinchYorkricezh:\u00E4'));puremageparatonebond:37Z_of_']);000,zh:\u00E7tankyardbowlbush:56ZJava30px\n|}\n%C3%:34ZjeffEXPIcashvisagolfsnowzh:\u00E9quer.csssickmeatmin.binddellhirepicsrent:36ZHTTP-201fotowolfEND xbox:54ZBODYdick;\n}\nexit:35Zvarsbeat'});diet999;anne}}</[i].Langkm\u00C2\u00B2wiretoysaddssealalex;\n\t}echonine.org005)tonyjewssandlegsroof000) 200winegeardogsbootgarycutstyletemption.xmlcockgang$('.50pxPh.Dmiscalanloandeskmileryanunixdisc);}\ndustclip).\n\n70px-200DVDs7]><tapedemoi++)wageeurophiloptsholeFAQsasin-26TlabspetsURL bulkcook;}\r\nHEAD[0])abbrjuan(198leshtwin</i>sonyguysfuckpipe|-\n!002)ndow[1];[];\nLog salt\r\n\t\tbangtrimbath){\r\n00px\n});ko:\u00ECfeesad>\rs:// [];tollplug(){\n{\r\n .js'200pdualboat.JPG);\n}quot);\n\n');\n\r\n}\r201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990198919881987198619851984198319821981198019791978197719761975197419731972197119701969196819671966196519641963196219611960195919581957195619551954195319521951195010001024139400009999comom\u00C3\u00A1sesteestaperotodohacecadaa\u00C3\u00B1obiend\u00C3\u00ADaas\u00C3\u00ADvidacasootroforosolootracualdijosidograntipotemadebealgoqu\u00C3\u00A9estonadatrespococasabajotodasinoaguapuesunosantediceluisellamayozonaamorpisoobraclicellodioshoracasi\u00D0\u00B7\u00D0\u00B0\u00D0\u00BD\u00D0\u00B0\u00D0\u00BE\u00D0\u00BC\u00D1\u0080\u00D0\u00B0\u00D1\u0080\u00D1\u0083\u00D1\u0082\u00D0\u00B0\u00D0\u00BD\u00D0\u00B5\u00D0\u00BF\u00D0\u00BE\u00D0\u00BE\u00D1\u0082\u00D0\u00B8\u00D0\u00B7\u00D0\u00BD\u00D0\u00BE\u00D0\u00B4\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D0\u00B6\u00D0\u00B5\u00D0\u00BE\u00D0\u00BD\u00D0\u00B8\u00D1\u0085\u00D0\u009D\u00D0\u00B0\u00D0\u00B5\u00D0\u00B5\u00D0\u00B1\u00D1\u008B\u00D0\u00BC\u00D1\u008B\u00D0\u0092\u00D1\u008B\u00D1\u0081\u00D0\u00BE\u00D0\u00B2\u00D1\u008B\u00D0\u00B2\u00D0\u00BE\u00D0\u009D\u00D0\u00BE\u00D0\u00BE\u00D0\u00B1\u00D0\u009F\u00D0\u00BE\u00D0\u00BB\u00D0\u00B8\u00D0\u00BD\u00D0\u00B8\u00D0\u00A0\u00D0\u00A4\u00D0\u009D\u00D0\u00B5\u00D0\u009C\u00D1\u008B\u00D1\u0082\u00D1\u008B\u00D0\u009E\u00D0\u00BD\u00D0\u00B8\u00D0\u00BC\u00D0\u00B4\u00D0\u00B0\u00D0\u0097\u00D0\u00B0\u00D0\u0094\u00D0\u00B0\u00D0\u009D\u00D1\u0083\u00D0\u009E\u00D0\u00B1\u00D1\u0082\u00D0\u00B5\u00D0\u0098\u00D0\u00B7\u00D0\u00B5\u00D0\u00B9\u00D0\u00BD\u00D1\u0083\u00D0\u00BC\u00D0\u00BC\u00D0\u00A2\u00D1\u008B\u00D1\u0083\u00D0\u00B6\u00D9\u0081\u00D9\u008A\u00D8\u00A3\u00D9\u0086\u00D9\u0085\u00D8\u00A7\u00D9\u0085\u00D8\u00B9\u00D9\u0083\u00D9\u0084\u00D8\u00A3\u00D9\u0088\u00D8\u00B1\u00D8\u00AF\u00D9\u008A\u00D8\u00A7\u00D9\u0081\u00D9\u0089\u00D9\u0087\u00D9\u0088\u00D9\u0084\u00D9\u0085\u00D9\u0084\u00D9\u0083\u00D8\u00A7\u00D9\u0088\u00D9\u0084\u00D9\u0087\u00D8\u00A8\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00A5\u00D9\u0086\u00D9\u0087\u00D9\u008A\u00D8\u00A3\u00D9\u008A\u00D9\u0082\u00D8\u00AF\u00D9\u0087\u00D9\u0084\u00D8\u00AB\u00D9\u0085\u00D8\u00A8\u00D9\u0087\u00D9\u0084\u00D9\u0088\u00D9\u0084\u00D9\u008A\u00D8\u00A8\u00D9\u0084\u00D8\u00A7\u00D9\u008A\u00D8\u00A8\u00D9\u0083\u00D8\u00B4\u00D9\u008A\u00D8\u00A7\u00D9\u0085\u00D8\u00A3\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00A8\u00D9\u008A\u00D9\u0084\u00D9\u0086\u00D8\u00AD\u00D8\u00A8\u00D9\u0087\u00D9\u0085\u00D9\u0085\u00D8\u00B4\u00D9\u0088\u00D8\u00B4firstvideolightworldmediawhitecloseblackrightsmallbooksplacemusicfieldorderpointvalueleveltableboardhousegroupworksyearsstatetodaywaterstartstyledeathpowerphonenighterrorinputabouttermstitletoolseventlocaltimeslargewordsgamesshortspacefocusclearmodelblockguideradiosharewomenagainmoneyimagenamesyounglineslatercolorgreenfront&amp;watchforcepricerulesbeginaftervisitissueareasbelowindextotalhourslabelprintpressbuiltlinksspeedstudytradefoundsenseundershownformsrangeaddedstillmovedtakenaboveflashfixedoftenotherviewschecklegalriveritemsquickshapehumanexistgoingmoviethirdbasicpeacestagewidthloginideaswrotepagesusersdrivestorebreaksouthvoicesitesmonthwherebuildwhichearthforumthreesportpartyClicklowerlivesclasslayerentrystoryusagesoundcourtyour birthpopuptypesapplyImagebeinguppernoteseveryshowsmeansextramatchtrackknownearlybegansuperpapernorthlearngivennamedendedTermspartsGroupbrandusingwomanfalsereadyaudiotakeswhile.com/livedcasesdailychildgreatjudgethoseunitsneverbroadcoastcoverapplefilescyclesceneplansclickwritequeenpieceemailframeolderphotolimitcachecivilscaleenterthemetheretouchboundroyalaskedwholesincestock namefaithheartemptyofferscopeownedmightalbumthinkbloodarraymajortrustcanonunioncountvalidstoneStyleLoginhappyoccurleft:freshquitefilmsgradeneedsurbanfightbasishoverauto;route.htmlmixedfinalYour slidetopicbrownalonedrawnsplitreachRightdatesmarchquotegoodsLinksdoubtasyncthumballowchiefyouthnovel10px;serveuntilhandsCheckSpacequeryjamesequaltwice0,000Startpanelsongsroundeightshiftworthpostsleadsweeksavoidthesemilesplanesmartalphaplantmarksratesplaysclaimsalestextsstarswrong</h3>thing.org/multiheardPowerstandtokensolid(thisbringshipsstafftriedcallsfullyfactsagentThis //-->adminegyptEvent15px;Emailtrue\"crossspentblogsbox\">notedleavechinasizesguest</h4>robotheavytrue,sevengrandcrimesignsawaredancephase><!--en_US&#39;200px_namelatinenjoyajax.ationsmithU.S. holdspeterindianav\">chainscorecomesdoingpriorShare1990sromanlistsjapanfallstrialowneragree</h2>abusealertopera\"-//WcardshillsteamsPhototruthclean.php?saintmetallouismeantproofbriefrow\">genretrucklooksValueFrame.net/-->\n<try {\nvar makescostsplainadultquesttrainlaborhelpscausemagicmotortheir250pxleaststepsCountcouldglasssidesfundshotelawardmouthmovesparisgivesdutchtexasfruitnull,||[];top\">\n<!--POST\"ocean<br/>floorspeakdepth sizebankscatchchart20px;aligndealswould50px;url=\"parksmouseMost ...</amongbrainbody none;basedcarrydraftreferpage_home.meterdelaydreamprovejoint</tr>drugs<!-- aprilidealallenexactforthcodeslogicView seemsblankports (200saved_linkgoalsgrantgreekhomesringsrated30px;whoseparse();\" Blocklinuxjonespixel');\">);if(-leftdavidhorseFocusraiseboxesTrackement</em>bar\">.src=toweralt=\"cablehenry24px;setupitalysharpminortastewantsthis.resetwheelgirls/css/100%;clubsstuffbiblevotes 1000korea});\r\nbandsqueue= {};80px;cking{\r\n\t\taheadclockirishlike ratiostatsForm\"yahoo)[0];Aboutfinds</h1>debugtasksURL =cells})();12px;primetellsturns0x600.jpg\"spainbeachtaxesmicroangel--></giftssteve-linkbody.});\n\tmount (199FAQ</rogerfrankClass28px;feeds<h1><scotttests22px;drink) || lewisshall#039; for lovedwaste00px;ja:\u00E3\u0082simon<fontreplymeetsuntercheaptightBrand) != dressclipsroomsonkeymobilmain.Name platefunnytreescom/\"1.jpgwmodeparamSTARTleft idden, 201);\n}\nform.viruschairtransworstPagesitionpatch<!--\no-cacfirmstours,000 asiani++){adobe')[0]id=10both;menu .2.mi.png\"kevincoachChildbruce2.jpgURL)+.jpg|suitesliceharry120\" sweettr>\r\nname=diegopage swiss-->\n\n#fff;\">Log.com\"treatsheet) && 14px;sleepntentfiledja:\u00E3\u0083id=\"cName\"worseshots-box-delta\n&lt;bears:48Z<data-rural</a> spendbakershops= \"\";php\">ction13px;brianhellosize=o=%2F joinmaybe<img img\">, fjsimg\" \")[0]MTopBType\"newlyDanskczechtrailknows</h5>faq\">zh-cn10);\n-1\");type=bluestrulydavis.js';>\r\n<!steel you h2>\r\nform jesus100% menu.\r\n\t\r\nwalesrisksumentddingb-likteachgif\" vegasdanskeestishqipsuomisobredesdeentretodospuedea\u00C3\u00B1osest\u00C3\u00A1tienehastaotrospartedondenuevohacerformamismomejormundoaqu\u00C3\u00ADd\u00C3\u00ADass\u00C3\u00B3loayudafechatodastantomenosdatosotrassitiomuchoahoralugarmayorestoshorastenerantesfotosestaspa\u00C3\u00ADsnuevasaludforosmedioquienmesespoderchileser\u00C3\u00A1vecesdecirjos\u00C3\u00A9estarventagrupohechoellostengoamigocosasnivelgentemismaairesjuliotemashaciafavorjuniolibrepuntobuenoautorabrilbuenatextomarzosaberlistaluegoc\u00C3\u00B3moenerojuegoper\u00C3\u00BAhaberestoynuncamujervalorfueralibrogustaigualvotoscasosgu\u00C3\u00ADapuedosomosavisousteddebennochebuscafaltaeurosseriedichocursoclavecasasle\u00C3\u00B3nplazolargoobrasvistaapoyojuntotratavistocrearcampohemoscincocargopisosordenhacen\u00C3\u00A1readiscopedrocercapuedapapelmenor\u00C3\u00BAtilclarojorgecalleponertardenadiemarcasigueellassiglocochemotosmadreclaserestoni\u00C3\u00B1oquedapasarbancohijosviajepablo\u00C3\u00A9stevienereinodejarfondocanalnorteletracausatomarmanoslunesautosvillavendopesartipostengamarcollevapadreunidovamoszonasambosbandamariaabusomuchasubirriojavivirgradochicaall\u00C3\u00ADjovendichaestantalessalirsuelopesosfinesllamabusco\u00C3\u00A9stalleganegroplazahumorpagarjuntadobleislasbolsaba\u00C3\u00B1ohablalucha\u00C3\u0081readicenjugarnotasvalleall\u00C3\u00A1cargadolorabajoest\u00C3\u00A9gustomentemariofirmacostofichaplatahogarartesleyesaquelmuseobasespocosmitadcielochicomiedoganarsantoetapadebesplayaredessietecortecoreadudasdeseoviejodeseaaguas&quot;domaincommonstatuseventsmastersystemactionbannerremovescrollupdateglobalmediumfilternumberchangeresultpublicscreenchoosenormaltravelissuessourcetargetspringmodulemobileswitchphotosborderregionitselfsocialactivecolumnrecordfollowtitle>eitherlengthfamilyfriendlayoutauthorcreatereviewsummerserverplayedplayerexpandpolicyformatdoublepointsseriespersonlivingdesignmonthsforcesuniqueweightpeopleenergynaturesearchfigurehavingcustomoffsetletterwindowsubmitrendergroupsuploadhealthmethodvideosschoolfutureshadowdebatevaluesObjectothersrightsleaguechromesimplenoticesharedendingseasonreportonlinesquarebuttonimagesenablemovinglatestwinterFranceperiodstrongrepeatLondondetailformeddemandsecurepassedtoggleplacesdevicestaticcitiesstreamyellowattackstreetflighthiddeninfo\">openedusefulvalleycausesleadersecretseconddamagesportsexceptratingsignedthingseffectfieldsstatesofficevisualeditorvolumeReportmuseummoviesparentaccessmostlymother\" id=\"marketgroundchancesurveybeforesymbolmomentspeechmotioninsidematterCenterobjectexistsmiddleEuropegrowthlegacymannerenoughcareeransweroriginportalclientselectrandomclosedtopicscomingfatheroptionsimplyraisedescapechosenchurchdefinereasoncorneroutputmemoryiframepolicemodelsNumberduringoffersstyleskilledlistedcalledsilvermargindeletebetterbrowselimitsGlobalsinglewidgetcenterbudgetnowrapcreditclaimsenginesafetychoicespirit-stylespreadmakingneededrussiapleaseextentScriptbrokenallowschargedividefactormember-basedtheoryconfigaroundworkedhelpedChurchimpactshouldalwayslogo\" bottomlist\">){var prefixorangeHeader.push(couplegardenbridgelaunchReviewtakingvisionlittledatingButtonbeautythemesforgotSearchanchoralmostloadedChangereturnstringreloadMobileincomesupplySourceordersviewed&nbsp;courseAbout island<html cookiename=\"amazonmodernadvicein</a>: The dialoghousesBEGIN MexicostartscentreheightaddingIslandassetsEmpireSchooleffortdirectnearlymanualSelect.\n\nOnejoinedmenu\">PhilipawardshandleimportOfficeregardskillsnationSportsdegreeweekly (e.g.behinddoctorloggedunited</b></beginsplantsassistartistissued300px|canadaagencyschemeremainBrazilsamplelogo\">beyond-scaleacceptservedmarineFootercamera</h1>\n_form\"leavesstress\" />\r\n.gif\" onloadloaderOxfordsistersurvivlistenfemaleDesignsize=\"appealtext\">levelsthankshigherforcedanimalanyoneAfricaagreedrecentPeople<br />wonderpricesturned|| {};main\">inlinesundaywrap\">failedcensusminutebeaconquotes150px|estateremoteemail\"linkedright;signalformal1.htmlsignupprincefloat:.png\" forum.AccesspaperssoundsextendHeightsliderUTF-8\"&amp; Before. WithstudioownersmanageprofitjQueryannualparamsboughtfamousgooglelongeri++) {israelsayingdecidehome\">headerensurebranchpiecesblock;statedtop\"><racingresize--&gt;pacitysexualbureau.jpg\" 10,000obtaintitlesamount, Inc.comedymenu\" lyricstoday.indeedcounty_logo.FamilylookedMarketlse ifPlayerturkey);var forestgivingerrorsDomain}else{insertBlog</footerlogin.fasteragents<body 10px 0pragmafridayjuniordollarplacedcoversplugin5,000 page\">boston.test(avatartested_countforumsschemaindex,filledsharesreaderalert(appearSubmitline\">body\">\n* TheThoughseeingjerseyNews</verifyexpertinjurywidth=CookieSTART across_imagethreadnativepocketbox\">\nSystem DavidcancertablesprovedApril reallydriveritem\">more\">boardscolorscampusfirst || [];media.guitarfinishwidth:showedOther .php\" assumelayerswilsonstoresreliefswedenCustomeasily your String\n\nWhiltaylorclear:resortfrenchthough\") + \"<body>buyingbrandsMembername\">oppingsector5px;\">vspacepostermajor coffeemartinmaturehappen</nav>kansaslink\">Images=falsewhile hspace0&amp; \n\nIn  powerPolski-colorjordanBottomStart -count2.htmlnews\">01.jpgOnline-rightmillerseniorISBN 00,000 guidesvalue)ectionrepair.xml\"  rights.html-blockregExp:hoverwithinvirginphones</tr>\rusing \n\tvar >');\n\t</td>\n</tr>\nbahasabrasilgalegomagyarpolskisrpski\u00D8\u00B1\u00D8\u00AF\u00D9\u0088\u00E4\u00B8\u00AD\u00E6\u0096\u0087\u00E7\u00AE\u0080\u00E4\u00BD\u0093\u00E7\u00B9\u0081\u00E9\u00AB\u0094\u00E4\u00BF\u00A1\u00E6\u0081\u00AF\u00E4\u00B8\u00AD\u00E5\u009B\u00BD\u00E6\u0088\u0091\u00E4\u00BB\u00AC\u00E4\u00B8\u0080\u00E4\u00B8\u00AA\u00E5\u0085\u00AC\u00E5\u008F\u00B8\u00E7\u00AE\u00A1\u00E7\u0090\u0086\u00E8\u00AE\u00BA\u00E5\u009D\u009B\u00E5\u008F\u00AF\u00E4\u00BB\u00A5\u00E6\u009C\u008D\u00E5\u008A\u00A1\u00E6\u0097\u00B6\u00E9\u0097\u00B4\u00E4\u00B8\u00AA\u00E4\u00BA\u00BA\u00E4\u00BA\u00A7\u00E5\u0093\u0081\u00E8\u0087\u00AA\u00E5\u00B7\u00B1\u00E4\u00BC\u0081\u00E4\u00B8\u009A\u00E6\u009F\u00A5\u00E7\u009C\u008B\u00E5\u00B7\u00A5\u00E4\u00BD\u009C\u00E8\u0081\u0094\u00E7\u00B3\u00BB\u00E6\u00B2\u00A1\u00E6\u009C\u0089\u00E7\u00BD\u0091\u00E7\u00AB\u0099\u00E6\u0089\u0080\u00E6\u009C\u0089\u00E8\u00AF\u0084\u00E8\u00AE\u00BA\u00E4\u00B8\u00AD\u00E5\u00BF\u0083\u00E6\u0096\u0087\u00E7\u00AB\u00A0\u00E7\u0094\u00A8\u00E6\u0088\u00B7\u00E9\u00A6\u0096\u00E9\u00A1\u00B5\u00E4\u00BD\u009C\u00E8\u0080\u0085\u00E6\u008A\u0080\u00E6\u009C\u00AF\u00E9\u0097\u00AE\u00E9\u00A2\u0098\u00E7\u009B\u00B8\u00E5\u0085\u00B3\u00E4\u00B8\u008B\u00E8\u00BD\u00BD\u00E6\u0090\u009C\u00E7\u00B4\u00A2\u00E4\u00BD\u00BF\u00E7\u0094\u00A8\u00E8\u00BD\u00AF\u00E4\u00BB\u00B6\u00E5\u009C\u00A8\u00E7\u00BA\u00BF\u00E4\u00B8\u00BB\u00E9\u00A2\u0098\u00E8\u00B5\u0084\u00E6\u0096\u0099\u00E8\u00A7\u0086\u00E9\u00A2\u0091\u00E5\u009B\u009E\u00E5\u00A4\u008D\u00E6\u00B3\u00A8\u00E5\u0086\u008C\u00E7\u00BD\u0091\u00E7\u00BB\u009C\u00E6\u0094\u00B6\u00E8\u0097\u008F\u00E5\u0086\u0085\u00E5\u00AE\u00B9\u00E6\u008E\u00A8\u00E8\u008D\u0090\u00E5\u00B8\u0082\u00E5\u009C\u00BA\u00E6\u00B6\u0088\u00E6\u0081\u00AF\u00E7\u00A9\u00BA\u00E9\u0097\u00B4\u00E5\u008F\u0091\u00E5\u00B8\u0083\u00E4\u00BB\u0080\u00E4\u00B9\u0088\u00E5\u00A5\u00BD\u00E5\u008F\u008B\u00E7\u0094\u009F\u00E6\u00B4\u00BB\u00E5\u009B\u00BE\u00E7\u0089\u0087\u00E5\u008F\u0091\u00E5\u00B1\u0095\u00E5\u00A6\u0082\u00E6\u009E\u009C\u00E6\u0089\u008B\u00E6\u009C\u00BA\u00E6\u0096\u00B0\u00E9\u0097\u00BB\u00E6\u009C\u0080\u00E6\u0096\u00B0\u00E6\u0096\u00B9\u00E5\u00BC\u008F\u00E5\u008C\u0097\u00E4\u00BA\u00AC\u00E6\u008F\u0090\u00E4\u00BE\u009B\u00E5\u0085\u00B3\u00E4\u00BA\u008E\u00E6\u009B\u00B4\u00E5\u00A4\u009A\u00E8\u00BF\u0099\u00E4\u00B8\u00AA\u00E7\u00B3\u00BB\u00E7\u00BB\u009F\u00E7\u009F\u00A5\u00E9\u0081\u0093\u00E6\u00B8\u00B8\u00E6\u0088\u008F\u00E5\u00B9\u00BF\u00E5\u0091\u008A\u00E5\u0085\u00B6\u00E4\u00BB\u0096\u00E5\u008F\u0091\u00E8\u00A1\u00A8\u00E5\u00AE\u0089\u00E5\u0085\u00A8\u00E7\u00AC\u00AC\u00E4\u00B8\u0080\u00E4\u00BC\u009A\u00E5\u0091\u0098\u00E8\u00BF\u009B\u00E8\u00A1\u008C\u00E7\u0082\u00B9\u00E5\u0087\u00BB\u00E7\u0089\u0088\u00E6\u009D\u0083\u00E7\u0094\u00B5\u00E5\u00AD\u0090\u00E4\u00B8\u0096\u00E7\u0095\u008C\u00E8\u00AE\u00BE\u00E8\u00AE\u00A1\u00E5\u0085\u008D\u00E8\u00B4\u00B9\u00E6\u0095\u0099\u00E8\u0082\u00B2\u00E5\u008A\u00A0\u00E5\u0085\u00A5\u00E6\u00B4\u00BB\u00E5\u008A\u00A8\u00E4\u00BB\u0096\u00E4\u00BB\u00AC\u00E5\u0095\u0086\u00E5\u0093\u0081\u00E5\u008D\u009A\u00E5\u00AE\u00A2\u00E7\u008E\u00B0\u00E5\u009C\u00A8\u00E4\u00B8\u008A\u00E6\u00B5\u00B7\u00E5\u00A6\u0082\u00E4\u00BD\u0095\u00E5\u00B7\u00B2\u00E7\u00BB\u008F\u00E7\u0095\u0099\u00E8\u00A8\u0080\u00E8\u00AF\u00A6\u00E7\u00BB\u0086\u00E7\u00A4\u00BE\u00E5\u008C\u00BA\u00E7\u0099\u00BB\u00E5\u00BD\u0095\u00E6\u009C\u00AC\u00E7\u00AB\u0099\u00E9\u009C\u0080\u00E8\u00A6\u0081\u00E4\u00BB\u00B7\u00E6\u00A0\u00BC\u00E6\u0094\u00AF\u00E6\u008C\u0081\u00E5\u009B\u00BD\u00E9\u0099\u0085\u00E9\u0093\u00BE\u00E6\u008E\u00A5\u00E5\u009B\u00BD\u00E5\u00AE\u00B6\u00E5\u00BB\u00BA\u00E8\u00AE\u00BE\u00E6\u009C\u008B\u00E5\u008F\u008B\u00E9\u0098\u0085\u00E8\u00AF\u00BB\u00E6\u00B3\u0095\u00E5\u00BE\u008B\u00E4\u00BD\u008D\u00E7\u00BD\u00AE\u00E7\u00BB\u008F\u00E6\u00B5\u008E\u00E9\u0080\u0089\u00E6\u008B\u00A9\u00E8\u00BF\u0099\u00E6\u00A0\u00B7\u00E5\u00BD\u0093\u00E5\u0089\u008D\u00E5\u0088\u0086\u00E7\u00B1\u00BB\u00E6\u008E\u0092\u00E8\u00A1\u008C\u00E5\u009B\u00A0\u00E4\u00B8\u00BA\u00E4\u00BA\u00A4\u00E6\u0098\u0093\u00E6\u009C\u0080\u00E5\u0090\u008E\u00E9\u009F\u00B3\u00E4\u00B9\u0090\u00E4\u00B8\u008D\u00E8\u0083\u00BD\u00E9\u0080\u009A\u00E8\u00BF\u0087\u00E8\u00A1\u008C\u00E4\u00B8\u009A\u00E7\u00A7\u0091\u00E6\u008A\u0080\u00E5\u008F\u00AF\u00E8\u0083\u00BD\u00E8\u00AE\u00BE\u00E5\u00A4\u0087\u00E5\u0090\u0088\u00E4\u00BD\u009C\u00E5\u00A4\u00A7\u00E5\u00AE\u00B6\u00E7\u00A4\u00BE\u00E4\u00BC\u009A\u00E7\u00A0\u0094\u00E7\u00A9\u00B6\u00E4\u00B8\u0093\u00E4\u00B8\u009A\u00E5\u0085\u00A8\u00E9\u0083\u00A8\u00E9\u00A1\u00B9\u00E7\u009B\u00AE\u00E8\u00BF\u0099\u00E9\u0087\u008C\u00E8\u00BF\u0098\u00E6\u0098\u00AF\u00E5\u00BC\u0080\u00E5\u00A7\u008B\u00E6\u0083\u0085\u00E5\u0086\u00B5\u00E7\u0094\u00B5\u00E8\u0084\u0091\u00E6\u0096\u0087\u00E4\u00BB\u00B6\u00E5\u0093\u0081\u00E7\u0089\u008C\u00E5\u00B8\u00AE\u00E5\u008A\u00A9\u00E6\u0096\u0087\u00E5\u008C\u0096\u00E8\u00B5\u0084\u00E6\u00BA\u0090\u00E5\u00A4\u00A7\u00E5\u00AD\u00A6\u00E5\u00AD\u00A6\u00E4\u00B9\u00A0\u00E5\u009C\u00B0\u00E5\u009D\u0080\u00E6\u00B5\u008F\u00E8\u00A7\u0088\u00E6\u008A\u0095\u00E8\u00B5\u0084\u00E5\u00B7\u00A5\u00E7\u00A8\u008B\u00E8\u00A6\u0081\u00E6\u00B1\u0082\u00E6\u0080\u008E\u00E4\u00B9\u0088\u00E6\u0097\u00B6\u00E5\u0080\u0099\u00E5\u008A\u009F\u00E8\u0083\u00BD\u00E4\u00B8\u00BB\u00E8\u00A6\u0081\u00E7\u009B\u00AE\u00E5\u0089\u008D\u00E8\u00B5\u0084\u00E8\u00AE\u00AF\u00E5\u009F\u008E\u00E5\u00B8\u0082\u00E6\u0096\u00B9\u00E6\u00B3\u0095\u00E7\u0094\u00B5\u00E5\u00BD\u00B1\u00E6\u008B\u009B\u00E8\u0081\u0098\u00E5\u00A3\u00B0\u00E6\u0098\u008E\u00E4\u00BB\u00BB\u00E4\u00BD\u0095\u00E5\u0081\u00A5\u00E5\u00BA\u00B7\u00E6\u0095\u00B0\u00E6\u008D\u00AE\u00E7\u00BE\u008E\u00E5\u009B\u00BD\u00E6\u00B1\u00BD\u00E8\u00BD\u00A6\u00E4\u00BB\u008B\u00E7\u00BB\u008D\u00E4\u00BD\u0086\u00E6\u0098\u00AF\u00E4\u00BA\u00A4\u00E6\u00B5\u0081\u00E7\u0094\u009F\u00E4\u00BA\u00A7\u00E6\u0089\u0080\u00E4\u00BB\u00A5\u00E7\u0094\u00B5\u00E8\u00AF\u009D\u00E6\u0098\u00BE\u00E7\u00A4\u00BA\u00E4\u00B8\u0080\u00E4\u00BA\u009B\u00E5\u008D\u0095\u00E4\u00BD\u008D\u00E4\u00BA\u00BA\u00E5\u0091\u0098\u00E5\u0088\u0086\u00E6\u009E\u0090\u00E5\u009C\u00B0\u00E5\u009B\u00BE\u00E6\u0097\u0085\u00E6\u00B8\u00B8\u00E5\u00B7\u00A5\u00E5\u0085\u00B7\u00E5\u00AD\u00A6\u00E7\u0094\u009F\u00E7\u00B3\u00BB\u00E5\u0088\u0097\u00E7\u00BD\u0091\u00E5\u008F\u008B\u00E5\u00B8\u0096\u00E5\u00AD\u0090\u00E5\u00AF\u0086\u00E7\u00A0\u0081\u00E9\u00A2\u0091\u00E9\u0081\u0093\u00E6\u008E\u00A7\u00E5\u0088\u00B6\u00E5\u009C\u00B0\u00E5\u008C\u00BA\u00E5\u009F\u00BA\u00E6\u009C\u00AC\u00E5\u0085\u00A8\u00E5\u009B\u00BD\u00E7\u00BD\u0091\u00E4\u00B8\u008A\u00E9\u0087\u008D\u00E8\u00A6\u0081\u00E7\u00AC\u00AC\u00E4\u00BA\u008C\u00E5\u0096\u009C\u00E6\u00AC\u00A2\u00E8\u00BF\u009B\u00E5\u0085\u00A5\u00E5\u008F\u008B\u00E6\u0083\u0085\u00E8\u00BF\u0099\u00E4\u00BA\u009B\u00E8\u0080\u0083\u00E8\u00AF\u0095\u00E5\u008F\u0091\u00E7\u008E\u00B0\u00E5\u009F\u00B9\u00E8\u00AE\u00AD\u00E4\u00BB\u00A5\u00E4\u00B8\u008A\u00E6\u0094\u00BF\u00E5\u00BA\u009C\u00E6\u0088\u0090\u00E4\u00B8\u00BA\u00E7\u008E\u00AF\u00E5\u00A2\u0083\u00E9\u00A6\u0099\u00E6\u00B8\u00AF\u00E5\u0090\u008C\u00E6\u0097\u00B6\u00E5\u00A8\u00B1\u00E4\u00B9\u0090\u00E5\u008F\u0091\u00E9\u0080\u0081\u00E4\u00B8\u0080\u00E5\u00AE\u009A\u00E5\u00BC\u0080\u00E5\u008F\u0091\u00E4\u00BD\u009C\u00E5\u0093\u0081\u00E6\u00A0\u0087\u00E5\u0087\u0086\u00E6\u00AC\u00A2\u00E8\u00BF\u008E\u00E8\u00A7\u00A3\u00E5\u0086\u00B3\u00E5\u009C\u00B0\u00E6\u0096\u00B9\u00E4\u00B8\u0080\u00E4\u00B8\u008B\u00E4\u00BB\u00A5\u00E5\u008F\u008A\u00E8\u00B4\u00A3\u00E4\u00BB\u00BB\u00E6\u0088\u0096\u00E8\u0080\u0085\u00E5\u00AE\u00A2\u00E6\u0088\u00B7\u00E4\u00BB\u00A3\u00E8\u00A1\u00A8\u00E7\u00A7\u00AF\u00E5\u0088\u0086\u00E5\u00A5\u00B3\u00E4\u00BA\u00BA\u00E6\u0095\u00B0\u00E7\u00A0\u0081\u00E9\u0094\u0080\u00E5\u0094\u00AE\u00E5\u0087\u00BA\u00E7\u008E\u00B0\u00E7\u00A6\u00BB\u00E7\u00BA\u00BF\u00E5\u00BA\u0094\u00E7\u0094\u00A8\u00E5\u0088\u0097\u00E8\u00A1\u00A8\u00E4\u00B8\u008D\u00E5\u0090\u008C\u00E7\u00BC\u0096\u00E8\u00BE\u0091\u00E7\u00BB\u009F\u00E8\u00AE\u00A1\u00E6\u009F\u00A5\u00E8\u00AF\u00A2\u00E4\u00B8\u008D\u00E8\u00A6\u0081\u00E6\u009C\u0089\u00E5\u0085\u00B3\u00E6\u009C\u00BA\u00E6\u009E\u0084\u00E5\u00BE\u0088\u00E5\u00A4\u009A\u00E6\u0092\u00AD\u00E6\u0094\u00BE\u00E7\u00BB\u0084\u00E7\u00BB\u0087\u00E6\u0094\u00BF\u00E7\u00AD\u0096\u00E7\u009B\u00B4\u00E6\u008E\u00A5\u00E8\u0083\u00BD\u00E5\u008A\u009B\u00E6\u009D\u00A5\u00E6\u00BA\u0090\u00E6\u0099\u0082\u00E9\u0096\u0093\u00E7\u009C\u008B\u00E5\u0088\u00B0\u00E7\u0083\u00AD\u00E9\u0097\u00A8\u00E5\u0085\u00B3\u00E9\u0094\u00AE\u00E4\u00B8\u0093\u00E5\u008C\u00BA\u00E9\u009D\u009E\u00E5\u00B8\u00B8\u00E8\u008B\u00B1\u00E8\u00AF\u00AD\u00E7\u0099\u00BE\u00E5\u00BA\u00A6\u00E5\u00B8\u008C\u00E6\u009C\u009B\u00E7\u00BE\u008E\u00E5\u00A5\u00B3\u00E6\u00AF\u0094\u00E8\u00BE\u0083\u00E7\u009F\u00A5\u00E8\u00AF\u0086\u00E8\u00A7\u0084\u00E5\u00AE\u009A\u00E5\u00BB\u00BA\u00E8\u00AE\u00AE\u00E9\u0083\u00A8\u00E9\u0097\u00A8\u00E6\u0084\u008F\u00E8\u00A7\u0081\u00E7\u00B2\u00BE\u00E5\u00BD\u00A9\u00E6\u0097\u00A5\u00E6\u009C\u00AC\u00E6\u008F\u0090\u00E9\u00AB\u0098\u00E5\u008F\u0091\u00E8\u00A8\u0080\u00E6\u0096\u00B9\u00E9\u009D\u00A2\u00E5\u009F\u00BA\u00E9\u0087\u0091\u00E5\u00A4\u0084\u00E7\u0090\u0086\u00E6\u009D\u0083\u00E9\u0099\u0090\u00E5\u00BD\u00B1\u00E7\u0089\u0087\u00E9\u0093\u00B6\u00E8\u00A1\u008C\u00E8\u00BF\u0098\u00E6\u009C\u0089\u00E5\u0088\u0086\u00E4\u00BA\u00AB\u00E7\u0089\u00A9\u00E5\u0093\u0081\u00E7\u00BB\u008F\u00E8\u0090\u00A5\u00E6\u00B7\u00BB\u00E5\u008A\u00A0\u00E4\u00B8\u0093\u00E5\u00AE\u00B6\u00E8\u00BF\u0099\u00E7\u00A7\u008D\u00E8\u00AF\u009D\u00E9\u00A2\u0098\u00E8\u00B5\u00B7\u00E6\u009D\u00A5\u00E4\u00B8\u009A\u00E5\u008A\u00A1\u00E5\u0085\u00AC\u00E5\u0091\u008A\u00E8\u00AE\u00B0\u00E5\u00BD\u0095\u00E7\u00AE\u0080\u00E4\u00BB\u008B\u00E8\u00B4\u00A8\u00E9\u0087\u008F\u00E7\u0094\u00B7\u00E4\u00BA\u00BA\u00E5\u00BD\u00B1\u00E5\u0093\u008D\u00E5\u00BC\u0095\u00E7\u0094\u00A8\u00E6\u008A\u00A5\u00E5\u0091\u008A\u00E9\u0083\u00A8\u00E5\u0088\u0086\u00E5\u00BF\u00AB\u00E9\u0080\u009F\u00E5\u0092\u00A8\u00E8\u00AF\u00A2\u00E6\u0097\u00B6\u00E5\u00B0\u009A\u00E6\u00B3\u00A8\u00E6\u0084\u008F\u00E7\u0094\u00B3\u00E8\u00AF\u00B7\u00E5\u00AD\u00A6\u00E6\u00A0\u00A1\u00E5\u00BA\u0094\u00E8\u00AF\u00A5\u00E5\u008E\u0086\u00E5\u008F\u00B2\u00E5\u008F\u00AA\u00E6\u0098\u00AF\u00E8\u00BF\u0094\u00E5\u009B\u009E\u00E8\u00B4\u00AD\u00E4\u00B9\u00B0\u00E5\u0090\u008D\u00E7\u00A7\u00B0\u00E4\u00B8\u00BA\u00E4\u00BA\u0086\u00E6\u0088\u0090\u00E5\u008A\u009F\u00E8\u00AF\u00B4\u00E6\u0098\u008E\u00E4\u00BE\u009B\u00E5\u00BA\u0094\u00E5\u00AD\u00A9\u00E5\u00AD\u0090\u00E4\u00B8\u0093\u00E9\u00A2\u0098\u00E7\u00A8\u008B\u00E5\u00BA\u008F\u00E4\u00B8\u0080\u00E8\u0088\u00AC\u00E6\u009C\u0083\u00E5\u0093\u00A1\u00E5\u008F\u00AA\u00E6\u009C\u0089\u00E5\u0085\u00B6\u00E5\u00AE\u0083\u00E4\u00BF\u009D\u00E6\u008A\u00A4\u00E8\u0080\u008C\u00E4\u00B8\u0094\u00E4\u00BB\u008A\u00E5\u00A4\u00A9\u00E7\u00AA\u0097\u00E5\u008F\u00A3\u00E5\u008A\u00A8\u00E6\u0080\u0081\u00E7\u008A\u00B6\u00E6\u0080\u0081\u00E7\u0089\u00B9\u00E5\u0088\u00AB\u00E8\u00AE\u00A4\u00E4\u00B8\u00BA\u00E5\u00BF\u0085\u00E9\u00A1\u00BB\u00E6\u009B\u00B4\u00E6\u0096\u00B0\u00E5\u00B0\u008F\u00E8\u00AF\u00B4\u00E6\u0088\u0091\u00E5\u0080\u0091\u00E4\u00BD\u009C\u00E4\u00B8\u00BA\u00E5\u00AA\u0092\u00E4\u00BD\u0093\u00E5\u008C\u0085\u00E6\u008B\u00AC\u00E9\u0082\u00A3\u00E4\u00B9\u0088\u00E4\u00B8\u0080\u00E6\u00A0\u00B7\u00E5\u009B\u00BD\u00E5\u0086\u0085\u00E6\u0098\u00AF\u00E5\u0090\u00A6\u00E6\u00A0\u00B9\u00E6\u008D\u00AE\u00E7\u0094\u00B5\u00E8\u00A7\u0086\u00E5\u00AD\u00A6\u00E9\u0099\u00A2\u00E5\u0085\u00B7\u00E6\u009C\u0089\u00E8\u00BF\u0087\u00E7\u00A8\u008B\u00E7\u0094\u00B1\u00E4\u00BA\u008E\u00E4\u00BA\u00BA\u00E6\u0089\u008D\u00E5\u0087\u00BA\u00E6\u009D\u00A5\u00E4\u00B8\u008D\u00E8\u00BF\u0087\u00E6\u00AD\u00A3\u00E5\u009C\u00A8\u00E6\u0098\u008E\u00E6\u0098\u009F\u00E6\u0095\u0085\u00E4\u00BA\u008B\u00E5\u0085\u00B3\u00E7\u00B3\u00BB\u00E6\u00A0\u0087\u00E9\u00A2\u0098\u00E5\u0095\u0086\u00E5\u008A\u00A1\u00E8\u00BE\u0093\u00E5\u0085\u00A5\u00E4\u00B8\u0080\u00E7\u009B\u00B4\u00E5\u009F\u00BA\u00E7\u00A1\u0080\u00E6\u0095\u0099\u00E5\u00AD\u00A6\u00E4\u00BA\u0086\u00E8\u00A7\u00A3\u00E5\u00BB\u00BA\u00E7\u00AD\u0091\u00E7\u00BB\u0093\u00E6\u009E\u009C\u00E5\u0085\u00A8\u00E7\u0090\u0083\u00E9\u0080\u009A\u00E7\u009F\u00A5\u00E8\u00AE\u00A1\u00E5\u0088\u0092\u00E5\u00AF\u00B9\u00E4\u00BA\u008E\u00E8\u0089\u00BA\u00E6\u009C\u00AF\u00E7\u009B\u00B8\u00E5\u0086\u008C\u00E5\u008F\u0091\u00E7\u0094\u009F\u00E7\u009C\u009F\u00E7\u009A\u0084\u00E5\u00BB\u00BA\u00E7\u00AB\u008B\u00E7\u00AD\u0089\u00E7\u00BA\u00A7\u00E7\u00B1\u00BB\u00E5\u009E\u008B\u00E7\u00BB\u008F\u00E9\u00AA\u008C\u00E5\u00AE\u009E\u00E7\u008E\u00B0\u00E5\u0088\u00B6\u00E4\u00BD\u009C\u00E6\u009D\u00A5\u00E8\u0087\u00AA\u00E6\u00A0\u0087\u00E7\u00AD\u00BE\u00E4\u00BB\u00A5\u00E4\u00B8\u008B\u00E5\u008E\u009F\u00E5\u0088\u009B\u00E6\u0097\u00A0\u00E6\u00B3\u0095\u00E5\u0085\u00B6\u00E4\u00B8\u00AD\u00E5\u0080\u008B\u00E4\u00BA\u00BA\u00E4\u00B8\u0080\u00E5\u0088\u0087\u00E6\u008C\u0087\u00E5\u008D\u0097\u00E5\u0085\u00B3\u00E9\u0097\u00AD\u00E9\u009B\u0086\u00E5\u009B\u00A2\u00E7\u00AC\u00AC\u00E4\u00B8\u0089\u00E5\u0085\u00B3\u00E6\u00B3\u00A8\u00E5\u009B\u00A0\u00E6\u00AD\u00A4\u00E7\u0085\u00A7\u00E7\u0089\u0087\u00E6\u00B7\u00B1\u00E5\u009C\u00B3\u00E5\u0095\u0086\u00E4\u00B8\u009A\u00E5\u00B9\u00BF\u00E5\u00B7\u009E\u00E6\u0097\u00A5\u00E6\u009C\u009F\u00E9\u00AB\u0098\u00E7\u00BA\u00A7\u00E6\u009C\u0080\u00E8\u00BF\u0091\u00E7\u00BB\u00BC\u00E5\u0090\u0088\u00E8\u00A1\u00A8\u00E7\u00A4\u00BA\u00E4\u00B8\u0093\u00E8\u00BE\u0091\u00E8\u00A1\u008C\u00E4\u00B8\u00BA\u00E4\u00BA\u00A4\u00E9\u0080\u009A\u00E8\u00AF\u0084\u00E4\u00BB\u00B7\u00E8\u00A7\u0089\u00E5\u00BE\u0097\u00E7\u00B2\u00BE\u00E5\u008D\u008E\u00E5\u00AE\u00B6\u00E5\u00BA\u00AD\u00E5\u00AE\u008C\u00E6\u0088\u0090\u00E6\u0084\u009F\u00E8\u00A7\u0089\u00E5\u00AE\u0089\u00E8\u00A3\u0085\u00E5\u00BE\u0097\u00E5\u0088\u00B0\u00E9\u0082\u00AE\u00E4\u00BB\u00B6\u00E5\u0088\u00B6\u00E5\u00BA\u00A6\u00E9\u00A3\u009F\u00E5\u0093\u0081\u00E8\u0099\u00BD\u00E7\u0084\u00B6\u00E8\u00BD\u00AC\u00E8\u00BD\u00BD\u00E6\u008A\u00A5\u00E4\u00BB\u00B7\u00E8\u00AE\u00B0\u00E8\u0080\u0085\u00E6\u0096\u00B9\u00E6\u00A1\u0088\u00E8\u00A1\u008C\u00E6\u0094\u00BF\u00E4\u00BA\u00BA\u00E6\u00B0\u0091\u00E7\u0094\u00A8\u00E5\u0093\u0081\u00E4\u00B8\u009C\u00E8\u00A5\u00BF\u00E6\u008F\u0090\u00E5\u0087\u00BA\u00E9\u0085\u0092\u00E5\u00BA\u0097\u00E7\u0084\u00B6\u00E5\u0090\u008E\u00E4\u00BB\u0098\u00E6\u00AC\u00BE\u00E7\u0083\u00AD\u00E7\u0082\u00B9\u00E4\u00BB\u00A5\u00E5\u0089\u008D\u00E5\u00AE\u008C\u00E5\u0085\u00A8\u00E5\u008F\u0091\u00E5\u00B8\u0096\u00E8\u00AE\u00BE\u00E7\u00BD\u00AE\u00E9\u00A2\u0086\u00E5\u00AF\u00BC\u00E5\u00B7\u00A5\u00E4\u00B8\u009A\u00E5\u008C\u00BB\u00E9\u0099\u00A2\u00E7\u009C\u008B\u00E7\u009C\u008B\u00E7\u00BB\u008F\u00E5\u0085\u00B8\u00E5\u008E\u009F\u00E5\u009B\u00A0\u00E5\u00B9\u00B3\u00E5\u008F\u00B0\u00E5\u0090\u0084\u00E7\u00A7\u008D\u00E5\u00A2\u009E\u00E5\u008A\u00A0\u00E6\u009D\u0090\u00E6\u0096\u0099\u00E6\u0096\u00B0\u00E5\u00A2\u009E\u00E4\u00B9\u008B\u00E5\u0090\u008E\u00E8\u0081\u008C\u00E4\u00B8\u009A\u00E6\u0095\u0088\u00E6\u009E\u009C\u00E4\u00BB\u008A\u00E5\u00B9\u00B4\u00E8\u00AE\u00BA\u00E6\u0096\u0087\u00E6\u0088\u0091\u00E5\u009B\u00BD\u00E5\u0091\u008A\u00E8\u00AF\u0089\u00E7\u0089\u0088\u00E4\u00B8\u00BB\u00E4\u00BF\u00AE\u00E6\u0094\u00B9\u00E5\u008F\u0082\u00E4\u00B8\u008E\u00E6\u0089\u0093\u00E5\u008D\u00B0\u00E5\u00BF\u00AB\u00E4\u00B9\u0090\u00E6\u009C\u00BA\u00E6\u00A2\u00B0\u00E8\u00A7\u0082\u00E7\u0082\u00B9\u00E5\u00AD\u0098\u00E5\u009C\u00A8\u00E7\u00B2\u00BE\u00E7\u00A5\u009E\u00E8\u008E\u00B7\u00E5\u00BE\u0097\u00E5\u0088\u00A9\u00E7\u0094\u00A8\u00E7\u00BB\u00A7\u00E7\u00BB\u00AD\u00E4\u00BD\u00A0\u00E4\u00BB\u00AC\u00E8\u00BF\u0099\u00E4\u00B9\u0088\u00E6\u00A8\u00A1\u00E5\u00BC\u008F\u00E8\u00AF\u00AD\u00E8\u00A8\u0080\u00E8\u0083\u00BD\u00E5\u00A4\u009F\u00E9\u009B\u0085\u00E8\u0099\u008E\u00E6\u0093\u008D\u00E4\u00BD\u009C\u00E9\u00A3\u008E\u00E6\u00A0\u00BC\u00E4\u00B8\u0080\u00E8\u00B5\u00B7\u00E7\u00A7\u0091\u00E5\u00AD\u00A6\u00E4\u00BD\u0093\u00E8\u0082\u00B2\u00E7\u009F\u00AD\u00E4\u00BF\u00A1\u00E6\u009D\u00A1\u00E4\u00BB\u00B6\u00E6\u00B2\u00BB\u00E7\u0096\u0097\u00E8\u00BF\u0090\u00E5\u008A\u00A8\u00E4\u00BA\u00A7\u00E4\u00B8\u009A\u00E4\u00BC\u009A\u00E8\u00AE\u00AE\u00E5\u00AF\u00BC\u00E8\u0088\u00AA\u00E5\u0085\u0088\u00E7\u0094\u009F\u00E8\u0081\u0094\u00E7\u009B\u009F\u00E5\u008F\u00AF\u00E6\u0098\u00AF\u00E5\u0095\u008F\u00E9\u00A1\u008C\u00E7\u00BB\u0093\u00E6\u009E\u0084\u00E4\u00BD\u009C\u00E7\u0094\u00A8\u00E8\u00B0\u0083\u00E6\u009F\u00A5\u00E8\u00B3\u0087\u00E6\u0096\u0099\u00E8\u0087\u00AA\u00E5\u008A\u00A8\u00E8\u00B4\u009F\u00E8\u00B4\u00A3\u00E5\u0086\u009C\u00E4\u00B8\u009A\u00E8\u00AE\u00BF\u00E9\u0097\u00AE\u00E5\u00AE\u009E\u00E6\u0096\u00BD\u00E6\u008E\u00A5\u00E5\u008F\u0097\u00E8\u00AE\u00A8\u00E8\u00AE\u00BA\u00E9\u0082\u00A3\u00E4\u00B8\u00AA\u00E5\u008F\u008D\u00E9\u00A6\u0088\u00E5\u008A\u00A0\u00E5\u00BC\u00BA\u00E5\u00A5\u00B3\u00E6\u0080\u00A7\u00E8\u008C\u0083\u00E5\u009B\u00B4\u00E6\u009C\u008D\u00E5\u008B\u0099\u00E4\u00BC\u0091\u00E9\u0097\u00B2\u00E4\u00BB\u008A\u00E6\u0097\u00A5\u00E5\u00AE\u00A2\u00E6\u009C\u008D\u00E8\u00A7\u0080\u00E7\u009C\u008B\u00E5\u008F\u0082\u00E5\u008A\u00A0\u00E7\u009A\u0084\u00E8\u00AF\u009D\u00E4\u00B8\u0080\u00E7\u0082\u00B9\u00E4\u00BF\u009D\u00E8\u00AF\u0081\u00E5\u009B\u00BE\u00E4\u00B9\u00A6\u00E6\u009C\u0089\u00E6\u0095\u0088\u00E6\u00B5\u008B\u00E8\u00AF\u0095\u00E7\u00A7\u00BB\u00E5\u008A\u00A8\u00E6\u0089\u008D\u00E8\u0083\u00BD\u00E5\u0086\u00B3\u00E5\u00AE\u009A\u00E8\u0082\u00A1\u00E7\u00A5\u00A8\u00E4\u00B8\u008D\u00E6\u0096\u00AD\u00E9\u009C\u0080\u00E6\u00B1\u0082\u00E4\u00B8\u008D\u00E5\u00BE\u0097\u00E5\u008A\u009E\u00E6\u00B3\u0095\u00E4\u00B9\u008B\u00E9\u0097\u00B4\u00E9\u0087\u0087\u00E7\u0094\u00A8\u00E8\u0090\u00A5\u00E9\u0094\u0080\u00E6\u008A\u0095\u00E8\u00AF\u0089\u00E7\u009B\u00AE\u00E6\u00A0\u0087\u00E7\u0088\u00B1\u00E6\u0083\u0085\u00E6\u0091\u0084\u00E5\u00BD\u00B1\u00E6\u009C\u0089\u00E4\u00BA\u009B\u00E8\u00A4\u0087\u00E8\u00A3\u00BD\u00E6\u0096\u0087\u00E5\u00AD\u00A6\u00E6\u009C\u00BA\u00E4\u00BC\u009A\u00E6\u0095\u00B0\u00E5\u00AD\u0097\u00E8\u00A3\u0085\u00E4\u00BF\u00AE\u00E8\u00B4\u00AD\u00E7\u0089\u00A9\u00E5\u0086\u009C\u00E6\u009D\u0091\u00E5\u0085\u00A8\u00E9\u009D\u00A2\u00E7\u00B2\u00BE\u00E5\u0093\u0081\u00E5\u0085\u00B6\u00E5\u00AE\u009E\u00E4\u00BA\u008B\u00E6\u0083\u0085\u00E6\u00B0\u00B4\u00E5\u00B9\u00B3\u00E6\u008F\u0090\u00E7\u00A4\u00BA\u00E4\u00B8\u008A\u00E5\u00B8\u0082\u00E8\u00B0\u00A2\u00E8\u00B0\u00A2\u00E6\u0099\u00AE\u00E9\u0080\u009A\u00E6\u0095\u0099\u00E5\u00B8\u0088\u00E4\u00B8\u008A\u00E4\u00BC\u00A0\u00E7\u00B1\u00BB\u00E5\u0088\u00AB\u00E6\u00AD\u008C\u00E6\u009B\u00B2\u00E6\u008B\u00A5\u00E6\u009C\u0089\u00E5\u0088\u009B\u00E6\u0096\u00B0\u00E9\u0085\u008D\u00E4\u00BB\u00B6\u00E5\u008F\u00AA\u00E8\u00A6\u0081\u00E6\u0097\u00B6\u00E4\u00BB\u00A3\u00E8\u00B3\u0087\u00E8\u00A8\u008A\u00E8\u00BE\u00BE\u00E5\u0088\u00B0\u00E4\u00BA\u00BA\u00E7\u0094\u009F\u00E8\u00AE\u00A2\u00E9\u0098\u0085\u00E8\u0080\u0081\u00E5\u00B8\u0088\u00E5\u00B1\u0095\u00E7\u00A4\u00BA\u00E5\u00BF\u0083\u00E7\u0090\u0086\u00E8\u00B4\u00B4\u00E5\u00AD\u0090\u00E7\u00B6\u00B2\u00E7\u00AB\u0099\u00E4\u00B8\u00BB\u00E9\u00A1\u008C\u00E8\u0087\u00AA\u00E7\u0084\u00B6\u00E7\u00BA\u00A7\u00E5\u0088\u00AB\u00E7\u00AE\u0080\u00E5\u008D\u0095\u00E6\u0094\u00B9\u00E9\u009D\u00A9\u00E9\u0082\u00A3\u00E4\u00BA\u009B\u00E6\u009D\u00A5\u00E8\u00AF\u00B4\u00E6\u0089\u0093\u00E5\u00BC\u0080\u00E4\u00BB\u00A3\u00E7\u00A0\u0081\u00E5\u0088\u00A0\u00E9\u0099\u00A4\u00E8\u00AF\u0081\u00E5\u0088\u00B8\u00E8\u008A\u0082\u00E7\u009B\u00AE\u00E9\u0087\u008D\u00E7\u0082\u00B9\u00E6\u00AC\u00A1\u00E6\u0095\u00B8\u00E5\u00A4\u009A\u00E5\u00B0\u0091\u00E8\u00A7\u0084\u00E5\u0088\u0092\u00E8\u00B5\u0084\u00E9\u0087\u0091\u00E6\u0089\u00BE\u00E5\u0088\u00B0\u00E4\u00BB\u00A5\u00E5\u0090\u008E\u00E5\u00A4\u00A7\u00E5\u0085\u00A8\u00E4\u00B8\u00BB\u00E9\u00A1\u00B5\u00E6\u009C\u0080\u00E4\u00BD\u00B3\u00E5\u009B\u009E\u00E7\u00AD\u0094\u00E5\u00A4\u00A9\u00E4\u00B8\u008B\u00E4\u00BF\u009D\u00E9\u009A\u009C\u00E7\u008E\u00B0\u00E4\u00BB\u00A3\u00E6\u00A3\u0080\u00E6\u009F\u00A5\u00E6\u008A\u0095\u00E7\u00A5\u00A8\u00E5\u00B0\u008F\u00E6\u0097\u00B6\u00E6\u00B2\u0092\u00E6\u009C\u0089\u00E6\u00AD\u00A3\u00E5\u00B8\u00B8\u00E7\u0094\u009A\u00E8\u0087\u00B3\u00E4\u00BB\u00A3\u00E7\u0090\u0086\u00E7\u009B\u00AE\u00E5\u00BD\u0095\u00E5\u0085\u00AC\u00E5\u00BC\u0080\u00E5\u00A4\u008D\u00E5\u0088\u00B6\u00E9\u0087\u0091\u00E8\u009E\u008D\u00E5\u00B9\u00B8\u00E7\u00A6\u008F\u00E7\u0089\u0088\u00E6\u009C\u00AC\u00E5\u00BD\u00A2\u00E6\u0088\u0090\u00E5\u0087\u0086\u00E5\u00A4\u0087\u00E8\u00A1\u008C\u00E6\u0083\u0085\u00E5\u009B\u009E\u00E5\u0088\u00B0\u00E6\u0080\u009D\u00E6\u0083\u00B3\u00E6\u0080\u008E\u00E6\u00A0\u00B7\u00E5\u008D\u008F\u00E8\u00AE\u00AE\u00E8\u00AE\u00A4\u00E8\u00AF\u0081\u00E6\u009C\u0080\u00E5\u00A5\u00BD\u00E4\u00BA\u00A7\u00E7\u0094\u009F\u00E6\u008C\u0089\u00E7\u0085\u00A7\u00E6\u009C\u008D\u00E8\u00A3\u0085\u00E5\u00B9\u00BF\u00E4\u00B8\u009C\u00E5\u008A\u00A8\u00E6\u00BC\u00AB\u00E9\u0087\u0087\u00E8\u00B4\u00AD\u00E6\u0096\u00B0\u00E6\u0089\u008B\u00E7\u00BB\u0084\u00E5\u009B\u00BE\u00E9\u009D\u00A2\u00E6\u009D\u00BF\u00E5\u008F\u0082\u00E8\u0080\u0083\u00E6\u0094\u00BF\u00E6\u00B2\u00BB\u00E5\u00AE\u00B9\u00E6\u0098\u0093\u00E5\u00A4\u00A9\u00E5\u009C\u00B0\u00E5\u008A\u00AA\u00E5\u008A\u009B\u00E4\u00BA\u00BA\u00E4\u00BB\u00AC\u00E5\u008D\u0087\u00E7\u00BA\u00A7\u00E9\u0080\u009F\u00E5\u00BA\u00A6\u00E4\u00BA\u00BA\u00E7\u0089\u00A9\u00E8\u00B0\u0083\u00E6\u0095\u00B4\u00E6\u00B5\u0081\u00E8\u00A1\u008C\u00E9\u0080\u00A0\u00E6\u0088\u0090\u00E6\u0096\u0087\u00E5\u00AD\u0097\u00E9\u009F\u00A9\u00E5\u009B\u00BD\u00E8\u00B4\u00B8\u00E6\u0098\u0093\u00E5\u00BC\u0080\u00E5\u00B1\u0095\u00E7\u009B\u00B8\u00E9\u0097\u009C\u00E8\u00A1\u00A8\u00E7\u008E\u00B0\u00E5\u00BD\u00B1\u00E8\u00A7\u0086\u00E5\u00A6\u0082\u00E6\u00AD\u00A4\u00E7\u00BE\u008E\u00E5\u00AE\u00B9\u00E5\u00A4\u00A7\u00E5\u00B0\u008F\u00E6\u008A\u00A5\u00E9\u0081\u0093\u00E6\u009D\u00A1\u00E6\u00AC\u00BE\u00E5\u00BF\u0083\u00E6\u0083\u0085\u00E8\u00AE\u00B8\u00E5\u00A4\u009A\u00E6\u00B3\u0095\u00E8\u00A7\u0084\u00E5\u00AE\u00B6\u00E5\u00B1\u0085\u00E4\u00B9\u00A6\u00E5\u00BA\u0097\u00E8\u00BF\u009E\u00E6\u008E\u00A5\u00E7\u00AB\u008B\u00E5\u008D\u00B3\u00E4\u00B8\u00BE\u00E6\u008A\u00A5\u00E6\u008A\u0080\u00E5\u00B7\u00A7\u00E5\u00A5\u00A5\u00E8\u00BF\u0090\u00E7\u0099\u00BB\u00E5\u0085\u00A5\u00E4\u00BB\u00A5\u00E6\u009D\u00A5\u00E7\u0090\u0086\u00E8\u00AE\u00BA\u00E4\u00BA\u008B\u00E4\u00BB\u00B6\u00E8\u0087\u00AA\u00E7\u0094\u00B1\u00E4\u00B8\u00AD\u00E5\u008D\u008E\u00E5\u008A\u009E\u00E5\u0085\u00AC\u00E5\u00A6\u0088\u00E5\u00A6\u0088\u00E7\u009C\u009F\u00E6\u00AD\u00A3\u00E4\u00B8\u008D\u00E9\u0094\u0099\u00E5\u0085\u00A8\u00E6\u0096\u0087\u00E5\u0090\u0088\u00E5\u0090\u008C\u00E4\u00BB\u00B7\u00E5\u0080\u00BC\u00E5\u0088\u00AB\u00E4\u00BA\u00BA\u00E7\u009B\u0091\u00E7\u009D\u00A3\u00E5\u0085\u00B7\u00E4\u00BD\u0093\u00E4\u00B8\u0096\u00E7\u00BA\u00AA\u00E5\u009B\u00A2\u00E9\u0098\u009F\u00E5\u0088\u009B\u00E4\u00B8\u009A\u00E6\u0089\u00BF\u00E6\u008B\u0085\u00E5\u00A2\u009E\u00E9\u0095\u00BF\u00E6\u009C\u0089\u00E4\u00BA\u00BA\u00E4\u00BF\u009D\u00E6\u008C\u0081\u00E5\u0095\u0086\u00E5\u00AE\u00B6\u00E7\u00BB\u00B4\u00E4\u00BF\u00AE\u00E5\u008F\u00B0\u00E6\u00B9\u00BE\u00E5\u00B7\u00A6\u00E5\u008F\u00B3\u00E8\u0082\u00A1\u00E4\u00BB\u00BD\u00E7\u00AD\u0094\u00E6\u00A1\u0088\u00E5\u00AE\u009E\u00E9\u0099\u0085\u00E7\u0094\u00B5\u00E4\u00BF\u00A1\u00E7\u00BB\u008F\u00E7\u0090\u0086\u00E7\u0094\u009F\u00E5\u0091\u00BD\u00E5\u00AE\u00A3\u00E4\u00BC\u00A0\u00E4\u00BB\u00BB\u00E5\u008A\u00A1\u00E6\u00AD\u00A3\u00E5\u00BC\u008F\u00E7\u0089\u00B9\u00E8\u0089\u00B2\u00E4\u00B8\u008B\u00E6\u009D\u00A5\u00E5\u008D\u008F\u00E4\u00BC\u009A\u00E5\u008F\u00AA\u00E8\u0083\u00BD\u00E5\u00BD\u0093\u00E7\u0084\u00B6\u00E9\u0087\u008D\u00E6\u0096\u00B0\u00E5\u0085\u00A7\u00E5\u00AE\u00B9\u00E6\u008C\u0087\u00E5\u00AF\u00BC\u00E8\u00BF\u0090\u00E8\u00A1\u008C\u00E6\u0097\u00A5\u00E5\u00BF\u0097\u00E8\u00B3\u00A3\u00E5\u00AE\u00B6\u00E8\u00B6\u0085\u00E8\u00BF\u0087\u00E5\u009C\u009F\u00E5\u009C\u00B0\u00E6\u00B5\u0099\u00E6\u00B1\u009F\u00E6\u0094\u00AF\u00E4\u00BB\u0098\u00E6\u008E\u00A8\u00E5\u0087\u00BA\u00E7\u00AB\u0099\u00E9\u0095\u00BF\u00E6\u009D\u00AD\u00E5\u00B7\u009E\u00E6\u0089\u00A7\u00E8\u00A1\u008C\u00E5\u0088\u00B6\u00E9\u0080\u00A0\u00E4\u00B9\u008B\u00E4\u00B8\u0080\u00E6\u008E\u00A8\u00E5\u00B9\u00BF\u00E7\u008E\u00B0\u00E5\u009C\u00BA\u00E6\u008F\u008F\u00E8\u00BF\u00B0\u00E5\u008F\u0098\u00E5\u008C\u0096\u00E4\u00BC\u00A0\u00E7\u00BB\u009F\u00E6\u00AD\u008C\u00E6\u0089\u008B\u00E4\u00BF\u009D\u00E9\u0099\u00A9\u00E8\u00AF\u00BE\u00E7\u00A8\u008B\u00E5\u008C\u00BB\u00E7\u0096\u0097\u00E7\u00BB\u008F\u00E8\u00BF\u0087\u00E8\u00BF\u0087\u00E5\u008E\u00BB\u00E4\u00B9\u008B\u00E5\u0089\u008D\u00E6\u0094\u00B6\u00E5\u0085\u00A5\u00E5\u00B9\u00B4\u00E5\u00BA\u00A6\u00E6\u009D\u0082\u00E5\u00BF\u0097\u00E7\u00BE\u008E\u00E4\u00B8\u00BD\u00E6\u009C\u0080\u00E9\u00AB\u0098\u00E7\u0099\u00BB\u00E9\u0099\u0086\u00E6\u009C\u00AA\u00E6\u009D\u00A5\u00E5\u008A\u00A0\u00E5\u00B7\u00A5\u00E5\u0085\u008D\u00E8\u00B4\u00A3\u00E6\u0095\u0099\u00E7\u00A8\u008B\u00E7\u0089\u0088\u00E5\u009D\u0097\u00E8\u00BA\u00AB\u00E4\u00BD\u0093\u00E9\u0087\u008D\u00E5\u00BA\u0086\u00E5\u0087\u00BA\u00E5\u0094\u00AE\u00E6\u0088\u0090\u00E6\u009C\u00AC\u00E5\u00BD\u00A2\u00E5\u00BC\u008F\u00E5\u009C\u009F\u00E8\u00B1\u0086\u00E5\u0087\u00BA\u00E5\u0083\u00B9\u00E4\u00B8\u009C\u00E6\u0096\u00B9\u00E9\u0082\u00AE\u00E7\u00AE\u00B1\u00E5\u008D\u0097\u00E4\u00BA\u00AC\u00E6\u00B1\u0082\u00E8\u0081\u008C\u00E5\u008F\u0096\u00E5\u00BE\u0097\u00E8\u0081\u008C\u00E4\u00BD\u008D\u00E7\u009B\u00B8\u00E4\u00BF\u00A1\u00E9\u00A1\u00B5\u00E9\u009D\u00A2\u00E5\u0088\u0086\u00E9\u0092\u009F\u00E7\u00BD\u0091\u00E9\u00A1\u00B5\u00E7\u00A1\u00AE\u00E5\u00AE\u009A\u00E5\u009B\u00BE\u00E4\u00BE\u008B\u00E7\u00BD\u0091\u00E5\u009D\u0080\u00E7\u00A7\u00AF\u00E6\u009E\u0081\u00E9\u0094\u0099\u00E8\u00AF\u00AF\u00E7\u009B\u00AE\u00E7\u009A\u0084\u00E5\u00AE\u009D\u00E8\u00B4\u009D\u00E6\u009C\u00BA\u00E5\u0085\u00B3\u00E9\u00A3\u008E\u00E9\u0099\u00A9\u00E6\u008E\u0088\u00E6\u009D\u0083\u00E7\u0097\u0085\u00E6\u00AF\u0092\u00E5\u00AE\u00A0\u00E7\u0089\u00A9\u00E9\u0099\u00A4\u00E4\u00BA\u0086\u00E8\u00A9\u0095\u00E8\u00AB\u0096\u00E7\u0096\u00BE\u00E7\u0097\u0085\u00E5\u008F\u008A\u00E6\u0097\u00B6\u00E6\u00B1\u0082\u00E8\u00B4\u00AD\u00E7\u00AB\u0099\u00E7\u0082\u00B9\u00E5\u0084\u00BF\u00E7\u00AB\u00A5\u00E6\u00AF\u008F\u00E5\u00A4\u00A9\u00E4\u00B8\u00AD\u00E5\u00A4\u00AE\u00E8\u00AE\u00A4\u00E8\u00AF\u0086\u00E6\u00AF\u008F\u00E4\u00B8\u00AA\u00E5\u00A4\u00A9\u00E6\u00B4\u00A5\u00E5\u00AD\u0097\u00E4\u00BD\u0093\u00E5\u008F\u00B0\u00E7\u0081\u00A3\u00E7\u00BB\u00B4\u00E6\u008A\u00A4\u00E6\u009C\u00AC\u00E9\u00A1\u00B5\u00E4\u00B8\u00AA\u00E6\u0080\u00A7\u00E5\u00AE\u0098\u00E6\u0096\u00B9\u00E5\u00B8\u00B8\u00E8\u00A7\u0081\u00E7\u009B\u00B8\u00E6\u009C\u00BA\u00E6\u0088\u0098\u00E7\u0095\u00A5\u00E5\u00BA\u0094\u00E5\u00BD\u0093\u00E5\u00BE\u008B\u00E5\u00B8\u0088\u00E6\u0096\u00B9\u00E4\u00BE\u00BF\u00E6\u00A0\u00A1\u00E5\u009B\u00AD\u00E8\u0082\u00A1\u00E5\u00B8\u0082\u00E6\u0088\u00BF\u00E5\u00B1\u008B\u00E6\u00A0\u008F\u00E7\u009B\u00AE\u00E5\u0091\u0098\u00E5\u00B7\u00A5\u00E5\u00AF\u00BC\u00E8\u0087\u00B4\u00E7\u00AA\u0081\u00E7\u0084\u00B6\u00E9\u0081\u0093\u00E5\u0085\u00B7\u00E6\u009C\u00AC\u00E7\u00BD\u0091\u00E7\u00BB\u0093\u00E5\u0090\u0088\u00E6\u00A1\u00A3\u00E6\u00A1\u0088\u00E5\u008A\u00B3\u00E5\u008A\u00A8\u00E5\u008F\u00A6\u00E5\u00A4\u0096\u00E7\u00BE\u008E\u00E5\u0085\u0083\u00E5\u00BC\u0095\u00E8\u00B5\u00B7\u00E6\u0094\u00B9\u00E5\u008F\u0098\u00E7\u00AC\u00AC\u00E5\u009B\u009B\u00E4\u00BC\u009A\u00E8\u00AE\u00A1\u00E8\u00AA\u00AA\u00E6\u0098\u008E\u00E9\u009A\u0090\u00E7\u00A7\u0081\u00E5\u00AE\u009D\u00E5\u00AE\u009D\u00E8\u00A7\u0084\u00E8\u008C\u0083\u00E6\u00B6\u0088\u00E8\u00B4\u00B9\u00E5\u0085\u00B1\u00E5\u0090\u008C\u00E5\u00BF\u0098\u00E8\u00AE\u00B0\u00E4\u00BD\u0093\u00E7\u00B3\u00BB\u00E5\u00B8\u00A6\u00E6\u009D\u00A5\u00E5\u0090\u008D\u00E5\u00AD\u0097\u00E7\u0099\u00BC\u00E8\u00A1\u00A8\u00E5\u00BC\u0080\u00E6\u0094\u00BE\u00E5\u008A\u00A0\u00E7\u009B\u009F\u00E5\u008F\u0097\u00E5\u0088\u00B0\u00E4\u00BA\u008C\u00E6\u0089\u008B\u00E5\u00A4\u00A7\u00E9\u0087\u008F\u00E6\u0088\u0090\u00E4\u00BA\u00BA\u00E6\u0095\u00B0\u00E9\u0087\u008F\u00E5\u0085\u00B1\u00E4\u00BA\u00AB\u00E5\u008C\u00BA\u00E5\u009F\u009F\u00E5\u00A5\u00B3\u00E5\u00AD\u00A9\u00E5\u008E\u009F\u00E5\u0088\u0099\u00E6\u0089\u0080\u00E5\u009C\u00A8\u00E7\u00BB\u0093\u00E6\u009D\u009F\u00E9\u0080\u009A\u00E4\u00BF\u00A1\u00E8\u00B6\u0085\u00E7\u00BA\u00A7\u00E9\u0085\u008D\u00E7\u00BD\u00AE\u00E5\u00BD\u0093\u00E6\u0097\u00B6\u00E4\u00BC\u0098\u00E7\u00A7\u0080\u00E6\u0080\u00A7\u00E6\u0084\u009F\u00E6\u0088\u00BF\u00E4\u00BA\u00A7\u00E9\u0081\u008A\u00E6\u0088\u00B2\u00E5\u0087\u00BA\u00E5\u008F\u00A3\u00E6\u008F\u0090\u00E4\u00BA\u00A4\u00E5\u00B0\u00B1\u00E4\u00B8\u009A\u00E4\u00BF\u009D\u00E5\u0081\u00A5\u00E7\u00A8\u008B\u00E5\u00BA\u00A6\u00E5\u008F\u0082\u00E6\u0095\u00B0\u00E4\u00BA\u008B\u00E4\u00B8\u009A\u00E6\u0095\u00B4\u00E4\u00B8\u00AA\u00E5\u00B1\u00B1\u00E4\u00B8\u009C\u00E6\u0083\u0085\u00E6\u0084\u009F\u00E7\u0089\u00B9\u00E6\u00AE\u008A\u00E5\u0088\u0086\u00E9\u00A1\u009E\u00E6\u0090\u009C\u00E5\u00B0\u008B\u00E5\u00B1\u009E\u00E4\u00BA\u008E\u00E9\u0097\u00A8\u00E6\u0088\u00B7\u00E8\u00B4\u00A2\u00E5\u008A\u00A1\u00E5\u00A3\u00B0\u00E9\u009F\u00B3\u00E5\u008F\u008A\u00E5\u0085\u00B6\u00E8\u00B4\u00A2\u00E7\u00BB\u008F\u00E5\u009D\u009A\u00E6\u008C\u0081\u00E5\u00B9\u00B2\u00E9\u0083\u00A8\u00E6\u0088\u0090\u00E7\u00AB\u008B\u00E5\u0088\u00A9\u00E7\u009B\u008A\u00E8\u0080\u0083\u00E8\u0099\u0091\u00E6\u0088\u0090\u00E9\u0083\u00BD\u00E5\u008C\u0085\u00E8\u00A3\u0085\u00E7\u0094\u00A8\u00E6\u0088\u00B6\u00E6\u00AF\u0094\u00E8\u00B5\u009B\u00E6\u0096\u0087\u00E6\u0098\u008E\u00E6\u008B\u009B\u00E5\u0095\u0086\u00E5\u00AE\u008C\u00E6\u0095\u00B4\u00E7\u009C\u009F\u00E6\u0098\u00AF\u00E7\u009C\u00BC\u00E7\u009D\u009B\u00E4\u00BC\u0099\u00E4\u00BC\u00B4\u00E5\u00A8\u0081\u00E6\u009C\u009B\u00E9\u00A2\u0086\u00E5\u009F\u009F\u00E5\u008D\u00AB\u00E7\u0094\u009F\u00E4\u00BC\u0098\u00E6\u0083\u00A0\u00E8\u00AB\u0096\u00E5\u00A3\u0087\u00E5\u0085\u00AC\u00E5\u0085\u00B1\u00E8\u0089\u00AF\u00E5\u00A5\u00BD\u00E5\u0085\u0085\u00E5\u0088\u0086\u00E7\u00AC\u00A6\u00E5\u0090\u0088\u00E9\u0099\u0084\u00E4\u00BB\u00B6\u00E7\u0089\u00B9\u00E7\u0082\u00B9\u00E4\u00B8\u008D\u00E5\u008F\u00AF\u00E8\u008B\u00B1\u00E6\u0096\u0087\u00E8\u00B5\u0084\u00E4\u00BA\u00A7\u00E6\u00A0\u00B9\u00E6\u009C\u00AC\u00E6\u0098\u008E\u00E6\u0098\u00BE\u00E5\u00AF\u0086\u00E7\u00A2\u00BC\u00E5\u0085\u00AC\u00E4\u00BC\u0097\u00E6\u00B0\u0091\u00E6\u0097\u008F\u00E6\u009B\u00B4\u00E5\u008A\u00A0\u00E4\u00BA\u00AB\u00E5\u008F\u0097\u00E5\u0090\u008C\u00E5\u00AD\u00A6\u00E5\u0090\u00AF\u00E5\u008A\u00A8\u00E9\u0080\u0082\u00E5\u0090\u0088\u00E5\u008E\u009F\u00E6\u009D\u00A5\u00E9\u0097\u00AE\u00E7\u00AD\u0094\u00E6\u009C\u00AC\u00E6\u0096\u0087\u00E7\u00BE\u008E\u00E9\u00A3\u009F\u00E7\u00BB\u00BF\u00E8\u0089\u00B2\u00E7\u00A8\u00B3\u00E5\u00AE\u009A\u00E7\u00BB\u0088\u00E4\u00BA\u008E\u00E7\u0094\u009F\u00E7\u0089\u00A9\u00E4\u00BE\u009B\u00E6\u00B1\u0082\u00E6\u0090\u009C\u00E7\u008B\u0090\u00E5\u008A\u009B\u00E9\u0087\u008F\u00E4\u00B8\u00A5\u00E9\u0087\u008D\u00E6\u00B0\u00B8\u00E8\u00BF\u009C\u00E5\u0086\u0099\u00E7\u009C\u009F\u00E6\u009C\u0089\u00E9\u0099\u0090\u00E7\u00AB\u009E\u00E4\u00BA\u0089\u00E5\u00AF\u00B9\u00E8\u00B1\u00A1\u00E8\u00B4\u00B9\u00E7\u0094\u00A8\u00E4\u00B8\u008D\u00E5\u00A5\u00BD\u00E7\u00BB\u009D\u00E5\u00AF\u00B9\u00E5\u008D\u0081\u00E5\u0088\u0086\u00E4\u00BF\u0083\u00E8\u00BF\u009B\u00E7\u0082\u00B9\u00E8\u00AF\u0084\u00E5\u00BD\u00B1\u00E9\u009F\u00B3\u00E4\u00BC\u0098\u00E5\u008A\u00BF\u00E4\u00B8\u008D\u00E5\u00B0\u0091\u00E6\u00AC\u00A3\u00E8\u00B5\u008F\u00E5\u00B9\u00B6\u00E4\u00B8\u0094\u00E6\u009C\u0089\u00E7\u0082\u00B9\u00E6\u0096\u00B9\u00E5\u0090\u0091\u00E5\u0085\u00A8\u00E6\u0096\u00B0\u00E4\u00BF\u00A1\u00E7\u0094\u00A8\u00E8\u00AE\u00BE\u00E6\u0096\u00BD\u00E5\u00BD\u00A2\u00E8\u00B1\u00A1\u00E8\u00B5\u0084\u00E6\u00A0\u00BC\u00E7\u00AA\u0081\u00E7\u00A0\u00B4\u00E9\u009A\u008F\u00E7\u009D\u0080\u00E9\u0087\u008D\u00E5\u00A4\u00A7\u00E4\u00BA\u008E\u00E6\u0098\u00AF\u00E6\u00AF\u0095\u00E4\u00B8\u009A\u00E6\u0099\u00BA\u00E8\u0083\u00BD\u00E5\u008C\u0096\u00E5\u00B7\u00A5\u00E5\u00AE\u008C\u00E7\u00BE\u008E\u00E5\u0095\u0086\u00E5\u009F\u008E\u00E7\u00BB\u009F\u00E4\u00B8\u0080\u00E5\u0087\u00BA\u00E7\u0089\u0088\u00E6\u0089\u0093\u00E9\u0080\u00A0\u00E7\u0094\u00A2\u00E5\u0093\u0081\u00E6\u00A6\u0082\u00E5\u0086\u00B5\u00E7\u0094\u00A8\u00E4\u00BA\u008E\u00E4\u00BF\u009D\u00E7\u0095\u0099\u00E5\u009B\u00A0\u00E7\u00B4\u00A0\u00E4\u00B8\u00AD\u00E5\u009C\u008B\u00E5\u00AD\u0098\u00E5\u0082\u00A8\u00E8\u00B4\u00B4\u00E5\u009B\u00BE\u00E6\u009C\u0080\u00E6\u0084\u009B\u00E9\u0095\u00BF\u00E6\u009C\u009F\u00E5\u008F\u00A3\u00E4\u00BB\u00B7\u00E7\u0090\u0086\u00E8\u00B4\u00A2\u00E5\u009F\u00BA\u00E5\u009C\u00B0\u00E5\u00AE\u0089\u00E6\u008E\u0092\u00E6\u00AD\u00A6\u00E6\u00B1\u0089\u00E9\u0087\u008C\u00E9\u009D\u00A2\u00E5\u0088\u009B\u00E5\u00BB\u00BA\u00E5\u00A4\u00A9\u00E7\u00A9\u00BA\u00E9\u00A6\u0096\u00E5\u0085\u0088\u00E5\u00AE\u008C\u00E5\u0096\u0084\u00E9\u00A9\u00B1\u00E5\u008A\u00A8\u00E4\u00B8\u008B\u00E9\u009D\u00A2\u00E4\u00B8\u008D\u00E5\u0086\u008D\u00E8\u00AF\u009A\u00E4\u00BF\u00A1\u00E6\u0084\u008F\u00E4\u00B9\u0089\u00E9\u0098\u00B3\u00E5\u0085\u0089\u00E8\u008B\u00B1\u00E5\u009B\u00BD\u00E6\u00BC\u0082\u00E4\u00BA\u00AE\u00E5\u0086\u009B\u00E4\u00BA\u008B\u00E7\u008E\u00A9\u00E5\u00AE\u00B6\u00E7\u00BE\u00A4\u00E4\u00BC\u0097\u00E5\u0086\u009C\u00E6\u00B0\u0091\u00E5\u008D\u00B3\u00E5\u008F\u00AF\u00E5\u0090\u008D\u00E7\u00A8\u00B1\u00E5\u00AE\u00B6\u00E5\u0085\u00B7\u00E5\u008A\u00A8\u00E7\u0094\u00BB\u00E6\u0083\u00B3\u00E5\u0088\u00B0\u00E6\u00B3\u00A8\u00E6\u0098\u008E\u00E5\u00B0\u008F\u00E5\u00AD\u00A6\u00E6\u0080\u00A7\u00E8\u0083\u00BD\u00E8\u0080\u0083\u00E7\u00A0\u0094\u00E7\u00A1\u00AC\u00E4\u00BB\u00B6\u00E8\u00A7\u0082\u00E7\u009C\u008B\u00E6\u00B8\u0085\u00E6\u00A5\u009A\u00E6\u0090\u009E\u00E7\u00AC\u0091\u00E9\u00A6\u0096\u00E9\u00A0\u0081\u00E9\u00BB\u0084\u00E9\u0087\u0091\u00E9\u0080\u0082\u00E7\u0094\u00A8\u00E6\u00B1\u009F\u00E8\u008B\u008F\u00E7\u009C\u009F\u00E5\u00AE\u009E\u00E4\u00B8\u00BB\u00E7\u00AE\u00A1\u00E9\u0098\u00B6\u00E6\u00AE\u00B5\u00E8\u00A8\u00BB\u00E5\u0086\u008A\u00E7\u00BF\u00BB\u00E8\u00AF\u0091\u00E6\u009D\u0083\u00E5\u0088\u00A9\u00E5\u0081\u009A\u00E5\u00A5\u00BD\u00E4\u00BC\u00BC\u00E4\u00B9\u008E\u00E9\u0080\u009A\u00E8\u00AE\u00AF\u00E6\u0096\u00BD\u00E5\u00B7\u00A5\u00E7\u008B\u0080\u00E6\u0085\u008B\u00E4\u00B9\u009F\u00E8\u00AE\u00B8\u00E7\u008E\u00AF\u00E4\u00BF\u009D\u00E5\u009F\u00B9\u00E5\u0085\u00BB\u00E6\u00A6\u0082\u00E5\u00BF\u00B5\u00E5\u00A4\u00A7\u00E5\u009E\u008B\u00E6\u009C\u00BA\u00E7\u00A5\u00A8\u00E7\u0090\u0086\u00E8\u00A7\u00A3\u00E5\u008C\u00BF\u00E5\u0090\u008Dcuandoenviarmadridbuscariniciotiempoporquecuentaestadopuedenjuegoscontraest\u00C3\u00A1nnombretienenperfilmaneraamigosciudadcentroaunquepuedesdentroprimerprecioseg\u00C3\u00BAnbuenosvolverpuntossemanahab\u00C3\u00ADaagostonuevosunidoscarlosequiponi\u00C3\u00B1osmuchosalgunacorreoimagenpartirarribamar\u00C3\u00ADahombreempleoverdadcambiomuchasfueronpasadol\u00C3\u00ADneaparecenuevascursosestabaquierolibroscuantoaccesomiguelvarioscuatrotienesgruposser\u00C3\u00A1neuropamediosfrenteacercadem\u00C3\u00A1sofertacochesmodeloitalialetrasalg\u00C3\u00BAncompracualesexistecuerposiendoprensallegarviajesdineromurciapodr\u00C3\u00A1puestodiariopuebloquieremanuelpropiocrisisciertoseguromuertefuentecerrargrandeefectopartesmedidapropiaofrecetierrae-mailvariasformasfuturoobjetoseguirriesgonormasmismos\u00C3\u00BAnicocaminositiosraz\u00C3\u00B3ndebidopruebatoledoten\u00C3\u00ADajes\u00C3\u00BAsesperococinaorigentiendacientoc\u00C3\u00A1dizhablarser\u00C3\u00ADalatinafuerzaestiloguerraentrar\u00C3\u00A9xitol\u00C3\u00B3pezagendav\u00C3\u00ADdeoevitarpaginametrosjavierpadresf\u00C3\u00A1cilcabeza\u00C3\u00A1reassalidaenv\u00C3\u00ADojap\u00C3\u00B3nabusosbienestextosllevarpuedanfuertecom\u00C3\u00BAnclaseshumanotenidobilbaounidadest\u00C3\u00A1seditarcreado\u00D0\u00B4\u00D0\u00BB\u00D1\u008F\u00D1\u0087\u00D1\u0082\u00D0\u00BE\u00D0\u00BA\u00D0\u00B0\u00D0\u00BA\u00D0\u00B8\u00D0\u00BB\u00D0\u00B8\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D0\u00BF\u00D1\u0080\u00D0\u00B8\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00B5\u00D1\u0089\u00D0\u00B5\u00D1\u0083\u00D0\u00B6\u00D0\u00B5\u00D0\u009A\u00D0\u00B0\u00D0\u00BA\u00D0\u00B1\u00D0\u00B5\u00D0\u00B7\u00D0\u00B1\u00D1\u008B\u00D0\u00BB\u00D0\u00BE\u00D0\u00BD\u00D0\u00B8\u00D0\u0092\u00D1\u0081\u00D0\u00B5\u00D0\u00BF\u00D0\u00BE\u00D0\u00B4\u00D0\u00AD\u00D1\u0082\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D1\u0087\u00D0\u00B5\u00D0\u00BC\u00D0\u00BD\u00D0\u00B5\u00D1\u0082\u00D0\u00BB\u00D0\u00B5\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00BE\u00D0\u00BD\u00D0\u00B0\u00D0\u00B3\u00D0\u00B4\u00D0\u00B5\u00D0\u00BC\u00D0\u00BD\u00D0\u00B5\u00D0\u0094\u00D0\u00BB\u00D1\u008F\u00D0\u009F\u00D1\u0080\u00D0\u00B8\u00D0\u00BD\u00D0\u00B0\u00D1\u0081\u00D0\u00BD\u00D0\u00B8\u00D1\u0085\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D0\u00BA\u00D1\u0082\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D0\u00B2\u00D0\u00BE\u00D1\u0082\u00D1\u0082\u00D0\u00B0\u00D0\u00BC\u00D0\u00A1\u00D0\u00A8\u00D0\u0090\u00D0\u00BC\u00D0\u00B0\u00D1\u008F\u00D0\u00A7\u00D1\u0082\u00D0\u00BE\u00D0\u00B2\u00D0\u00B0\u00D1\u0081\u00D0\u00B2\u00D0\u00B0\u00D0\u00BC\u00D0\u00B5\u00D0\u00BC\u00D1\u0083\u00D0\u00A2\u00D0\u00B0\u00D0\u00BA\u00D0\u00B4\u00D0\u00B2\u00D0\u00B0\u00D0\u00BD\u00D0\u00B0\u00D0\u00BC\u00D1\u008D\u00D1\u0082\u00D0\u00B8\u00D1\u008D\u00D1\u0082\u00D1\u0083\u00D0\u0092\u00D0\u00B0\u00D0\u00BC\u00D1\u0082\u00D0\u00B5\u00D1\u0085\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D1\u0082\u00D1\u0083\u00D1\u0082\u00D0\u00BD\u00D0\u00B0\u00D0\u00B4\u00D0\u00B4\u00D0\u00BD\u00D1\u008F\u00D0\u0092\u00D0\u00BE\u00D1\u0082\u00D1\u0082\u00D1\u0080\u00D0\u00B8\u00D0\u00BD\u00D0\u00B5\u00D0\u00B9\u00D0\u0092\u00D0\u00B0\u00D1\u0081\u00D0\u00BD\u00D0\u00B8\u00D0\u00BC\u00D1\u0081\u00D0\u00B0\u00D0\u00BC\u00D1\u0082\u00D0\u00BE\u00D1\u0082\u00D1\u0080\u00D1\u0083\u00D0\u00B1\u00D0\u009E\u00D0\u00BD\u00D0\u00B8\u00D0\u00BC\u00D0\u00B8\u00D1\u0080\u00D0\u00BD\u00D0\u00B5\u00D0\u00B5\u00D0\u009E\u00D0\u009E\u00D0\u009E\u00D0\u00BB\u00D0\u00B8\u00D1\u0086\u00D1\u008D\u00D1\u0082\u00D0\u00B0\u00D0\u009E\u00D0\u00BD\u00D0\u00B0\u00D0\u00BD\u00D0\u00B5\u00D0\u00BC\u00D0\u00B4\u00D0\u00BE\u00D0\u00BC\u00D0\u00BC\u00D0\u00BE\u00D0\u00B9\u00D0\u00B4\u00D0\u00B2\u00D0\u00B5\u00D0\u00BE\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0083\u00D0\u00B4\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u00B9\u00E0\u00A5\u0088\u00E0\u00A4\u0095\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A5\u008B\u00E0\u00A4\u0094\u00E0\u00A4\u00B0\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u008F\u00E0\u00A4\u0095\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00AD\u00E0\u00A5\u0080\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A5\u008B\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u00AF\u00E0\u00A4\u00B9\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u0095\u00E0\u00A4\u00A5\u00E0\u00A4\u00BEjagran\u00E0\u00A4\u0086\u00E0\u00A4\u009C\u00E0\u00A4\u009C\u00E0\u00A5\u008B\u00E0\u00A4\u0085\u00E0\u00A4\u00AC\u00E0\u00A4\u00A6\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A4\u0088\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u0097\u00E0\u00A4\u008F\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A4\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u00B5\u00E0\u00A4\u00B9\u00E0\u00A4\u00AF\u00E0\u00A5\u0087\u00E0\u00A4\u00A5\u00E0\u00A5\u0087\u00E0\u00A4\u00A5\u00E0\u00A5\u0080\u00E0\u00A4\u0098\u00E0\u00A4\u00B0\u00E0\u00A4\u009C\u00E0\u00A4\u00AC\u00E0\u00A4\u00A6\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A4\u0088\u00E0\u00A4\u009C\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u0088\u00E0\u00A4\u00A8\u00E0\u00A4\u008F\u00E0\u00A4\u00B9\u00E0\u00A4\u00B0\u00E0\u00A4\u0089\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00AE\u00E0\u00A4\u00B5\u00E0\u00A5\u008B\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00AC\u00E0\u00A4\u00AE\u00E0\u00A4\u0088\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0093\u00E0\u00A4\u00B0\u00E0\u00A4\u0086\u00E0\u00A4\u00AE\u00E0\u00A4\u00AC\u00E0\u00A4\u00B8\u00E0\u00A4\u00AD\u00E0\u00A4\u00B0\u00E0\u00A4\u00AC\u00E0\u00A4\u00A8\u00E0\u00A4\u009A\u00E0\u00A4\u00B2\u00E0\u00A4\u00AE\u00E0\u00A4\u00A8\u00E0\u00A4\u0086\u00E0\u00A4\u0097\u00E0\u00A4\u00B8\u00E0\u00A5\u0080\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00D8\u00B9\u00D9\u0084\u00D9\u0089\u00D8\u00A5\u00D9\u0084\u00D9\u0089\u00D9\u0087\u00D8\u00B0\u00D8\u00A7\u00D8\u00A2\u00D8\u00AE\u00D8\u00B1\u00D8\u00B9\u00D8\u00AF\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D9\u0089\u00D9\u0087\u00D8\u00B0\u00D9\u0087\u00D8\u00B5\u00D9\u0088\u00D8\u00B1\u00D8\u00BA\u00D9\u008A\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D9\u0086\u00D9\u0088\u00D9\u0084\u00D8\u00A7\u00D8\u00A8\u00D9\u008A\u00D9\u0086\u00D8\u00B9\u00D8\u00B1\u00D8\u00B6\u00D8\u00B0\u00D9\u0084\u00D9\u0083\u00D9\u0087\u00D9\u0086\u00D8\u00A7\u00D9\u008A\u00D9\u0088\u00D9\u0085\u00D9\u0082\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D8\u00A7\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D9\u0086\u00D8\u00AD\u00D8\u00AA\u00D9\u0089\u00D9\u0082\u00D8\u00A8\u00D9\u0084\u00D9\u0088\u00D8\u00AD\u00D8\u00A9\u00D8\u00A7\u00D8\u00AE\u00D8\u00B1\u00D9\u0081\u00D9\u0082\u00D8\u00B7\u00D8\u00B9\u00D8\u00A8\u00D8\u00AF\u00D8\u00B1\u00D9\u0083\u00D9\u0086\u00D8\u00A5\u00D8\u00B0\u00D8\u00A7\u00D9\u0083\u00D9\u0085\u00D8\u00A7\u00D8\u00A7\u00D8\u00AD\u00D8\u00AF\u00D8\u00A5\u00D9\u0084\u00D8\u00A7\u00D9\u0081\u00D9\u008A\u00D9\u0087\u00D8\u00A8\u00D8\u00B9\u00D8\u00B6\u00D9\u0083\u00D9\u008A\u00D9\u0081\u00D8\u00A8\u00D8\u00AD\u00D8\u00AB\u00D9\u0088\u00D9\u0085\u00D9\u0086\u00D9\u0088\u00D9\u0087\u00D9\u0088\u00D8\u00A3\u00D9\u0086\u00D8\u00A7\u00D8\u00AC\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D9\u0087\u00D8\u00A7\u00D8\u00B3\u00D9\u0084\u00D9\u0085\u00D8\u00B9\u00D9\u0086\u00D8\u00AF\u00D9\u0084\u00D9\u008A\u00D8\u00B3\u00D8\u00B9\u00D8\u00A8\u00D8\u00B1\u00D8\u00B5\u00D9\u0084\u00D9\u0089\u00D9\u0085\u00D9\u0086\u00D8\u00B0\u00D8\u00A8\u00D9\u0087\u00D8\u00A7\u00D8\u00A3\u00D9\u0086\u00D9\u0087\u00D9\u0085\u00D8\u00AB\u00D9\u0084\u00D9\u0083\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00AD\u00D9\u008A\u00D8\u00AB\u00D9\u0085\u00D8\u00B5\u00D8\u00B1\u00D8\u00B4\u00D8\u00B1\u00D8\u00AD\u00D8\u00AD\u00D9\u0088\u00D9\u0084\u00D9\u0088\u00D9\u0081\u00D9\u008A\u00D8\u00A7\u00D8\u00B0\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D9\u0084\u00D9\u0085\u00D8\u00B1\u00D8\u00A9\u00D8\u00A7\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0081\u00D8\u00A3\u00D8\u00A8\u00D9\u0088\u00D8\u00AE\u00D8\u00A7\u00D8\u00B5\u00D8\u00A3\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D9\u0086\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D9\u008A\u00D8\u00B9\u00D8\u00B6\u00D9\u0088\u00D9\u0088\u00D9\u0082\u00D8\u00AF\u00D8\u00A7\u00D8\u00A8\u00D9\u0086\u00D8\u00AE\u00D9\u008A\u00D8\u00B1\u00D8\u00A8\u00D9\u0086\u00D8\u00AA\u00D9\u0084\u00D9\u0083\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00A1\u00D9\u0088\u00D9\u0087\u00D9\u008A\u00D8\u00A7\u00D8\u00A8\u00D9\u0088\u00D9\u0082\u00D8\u00B5\u00D8\u00B5\u00D9\u0088\u00D9\u0085\u00D8\u00A7\u00D8\u00B1\u00D9\u0082\u00D9\u0085\u00D8\u00A3\u00D8\u00AD\u00D8\u00AF\u00D9\u0086\u00D8\u00AD\u00D9\u0086\u00D8\u00B9\u00D8\u00AF\u00D9\u0085\u00D8\u00B1\u00D8\u00A3\u00D9\u008A\u00D8\u00A7\u00D8\u00AD\u00D8\u00A9\u00D9\u0083\u00D8\u00AA\u00D8\u00A8\u00D8\u00AF\u00D9\u0088\u00D9\u0086\u00D9\u008A\u00D8\u00AC\u00D8\u00A8\u00D9\u0085\u00D9\u0086\u00D9\u0087\u00D8\u00AA\u00D8\u00AD\u00D8\u00AA\u00D8\u00AC\u00D9\u0087\u00D8\u00A9\u00D8\u00B3\u00D9\u0086\u00D8\u00A9\u00D9\u008A\u00D8\u00AA\u00D9\u0085\u00D9\u0083\u00D8\u00B1\u00D8\u00A9\u00D8\u00BA\u00D8\u00B2\u00D8\u00A9\u00D9\u0086\u00D9\u0081\u00D8\u00B3\u00D8\u00A8\u00D9\u008A\u00D8\u00AA\u00D9\u0084\u00D9\u0084\u00D9\u0087\u00D9\u0084\u00D9\u0086\u00D8\u00A7\u00D8\u00AA\u00D9\u0084\u00D9\u0083\u00D9\u0082\u00D9\u0084\u00D8\u00A8\u00D9\u0084\u00D9\u0085\u00D8\u00A7\u00D8\u00B9\u00D9\u0086\u00D9\u0087\u00D8\u00A3\u00D9\u0088\u00D9\u0084\u00D8\u00B4\u00D9\u008A\u00D8\u00A1\u00D9\u0086\u00D9\u0088\u00D8\u00B1\u00D8\u00A3\u00D9\u0085\u00D8\u00A7\u00D9\u0081\u00D9\u008A\u00D9\u0083\u00D8\u00A8\u00D9\u0083\u00D9\u0084\u00D8\u00B0\u00D8\u00A7\u00D8\u00AA\u00D8\u00B1\u00D8\u00AA\u00D8\u00A8\u00D8\u00A8\u00D8\u00A3\u00D9\u0086\u00D9\u0087\u00D9\u0085\u00D8\u00B3\u00D8\u00A7\u00D9\u0086\u00D9\u0083\u00D8\u00A8\u00D9\u008A\u00D8\u00B9\u00D9\u0081\u00D9\u0082\u00D8\u00AF\u00D8\u00AD\u00D8\u00B3\u00D9\u0086\u00D9\u0084\u00D9\u0087\u00D9\u0085\u00D8\u00B4\u00D8\u00B9\u00D8\u00B1\u00D8\u00A3\u00D9\u0087\u00D9\u0084\u00D8\u00B4\u00D9\u0087\u00D8\u00B1\u00D9\u0082\u00D8\u00B7\u00D8\u00B1\u00D8\u00B7\u00D9\u0084\u00D8\u00A8profileservicedefaulthimselfdetailscontentsupportstartedmessagesuccessfashion<title>countryaccountcreatedstoriesresultsrunningprocesswritingobjectsvisiblewelcomearticleunknownnetworkcompanydynamicbrowserprivacyproblemServicerespectdisplayrequestreservewebsitehistoryfriendsoptionsworkingversionmillionchannelwindow.addressvisitedweathercorrectproductedirectforwardyou canremovedsubjectcontrolarchivecurrentreadinglibrarylimitedmanagerfurthersummarymachineminutesprivatecontextprogramsocietynumberswrittenenabledtriggersourcesloadingelementpartnerfinallyperfectmeaningsystemskeepingculture&quot;,journalprojectsurfaces&quot;expiresreviewsbalanceEnglishContentthroughPlease opinioncontactaverageprimaryvillageSpanishgallerydeclinemeetingmissionpopularqualitymeasuregeneralspeciessessionsectionwriterscounterinitialreportsfiguresmembersholdingdisputeearlierexpressdigitalpictureAnothermarriedtrafficleadingchangedcentralvictoryimages/reasonsstudiesfeaturelistingmust beschoolsVersionusuallyepisodeplayinggrowingobviousoverlaypresentactions</ul>\r\nwrapperalreadycertainrealitystorageanotherdesktopofferedpatternunusualDigitalcapitalWebsitefailureconnectreducedAndroiddecadesregular &amp; animalsreleaseAutomatgettingmethodsnothingPopularcaptionletterscapturesciencelicensechangesEngland=1&amp;History = new CentralupdatedSpecialNetworkrequirecommentwarningCollegetoolbarremainsbecauseelectedDeutschfinanceworkersquicklybetweenexactlysettingdiseaseSocietyweaponsexhibit&lt;!--Controlclassescoveredoutlineattacksdevices(windowpurposetitle=\"Mobile killingshowingItaliandroppedheavilyeffects-1']);\nconfirmCurrentadvancesharingopeningdrawingbillionorderedGermanyrelated</form>includewhetherdefinedSciencecatalogArticlebuttonslargestuniformjourneysidebarChicagoholidayGeneralpassage,&quot;animatefeelingarrivedpassingnaturalroughly.\n\nThe but notdensityBritainChineselack oftributeIreland\" data-factorsreceivethat isLibraryhusbandin factaffairsCharlesradicalbroughtfindinglanding:lang=\"return leadersplannedpremiumpackageAmericaEdition]&quot;Messageneed tovalue=\"complexlookingstationbelievesmaller-mobilerecordswant tokind ofFirefoxyou aresimilarstudiedmaximumheadingrapidlyclimatekingdomemergedamountsfoundedpioneerformuladynastyhow to SupportrevenueeconomyResultsbrothersoldierlargelycalling.&quot;AccountEdward segmentRobert effortsPacificlearnedup withheight:we haveAngelesnations_searchappliedacquiremassivegranted: falsetreatedbiggestbenefitdrivingStudiesminimumperhapsmorningsellingis usedreversevariant role=\"missingachievepromotestudentsomeoneextremerestorebottom:evolvedall thesitemapenglishway to  AugustsymbolsCompanymattersmusicalagainstserving})();\r\npaymenttroubleconceptcompareparentsplayersregionsmonitor ''The winningexploreadaptedGalleryproduceabilityenhancecareers). The collectSearch ancientexistedfooter handlerprintedconsoleEasternexportswindowsChannelillegalneutralsuggest_headersigning.html\">settledwesterncausing-webkitclaimedJusticechaptervictimsThomas mozillapromisepartieseditionoutside:false,hundredOlympic_buttonauthorsreachedchronicdemandssecondsprotectadoptedprepareneithergreatlygreateroverallimprovecommandspecialsearch.worshipfundingthoughthighestinsteadutilityquarterCulturetestingclearlyexposedBrowserliberal} catchProjectexamplehide();FloridaanswersallowedEmperordefenseseriousfreedomSeveral-buttonFurtherout of != nulltrainedDenmarkvoid(0)/all.jspreventRequestStephen\n\nWhen observe</h2>\r\nModern provide\" alt=\"borders.\n\nFor \n\nMany artistspoweredperformfictiontype ofmedicalticketsopposedCouncilwitnessjusticeGeorge Belgium...</a>twitternotablywaitingwarfare Other rankingphrasesmentionsurvivescholar</p>\r\n Countryignoredloss ofjust asGeorgiastrange<head><stopped1']);\r\nislandsnotableborder:list ofcarried100,000</h3>\n severalbecomesselect wedding00.htmlmonarchoff theteacherhighly biologylife ofor evenrise of&raquo;plusonehunting(thoughDouglasjoiningcirclesFor theAncientVietnamvehiclesuch ascrystalvalue =Windowsenjoyeda smallassumed<a id=\"foreign All rihow theDisplayretiredhoweverhidden;battlesseekingcabinetwas notlook atconductget theJanuaryhappensturninga:hoverOnline French lackingtypicalextractenemieseven ifgeneratdecidedare not/searchbeliefs-image:locatedstatic.login\">convertviolententeredfirst\">circuitFinlandchemistshe was10px;\">as suchdivided</span>will beline ofa greatmystery/index.fallingdue to railwaycollegemonsterdescentit withnuclearJewish protestBritishflowerspredictreformsbutton who waslectureinstantsuicidegenericperiodsmarketsSocial fishingcombinegraphicwinners<br /><by the NaturalPrivacycookiesoutcomeresolveSwedishbrieflyPersianso muchCenturydepictscolumnshousingscriptsnext tobearingmappingrevisedjQuery(-width:title\">tooltipSectiondesignsTurkishyounger.match(})();\n\nburningoperatedegreessource=Richardcloselyplasticentries</tr>\r\ncolor:#ul id=\"possessrollingphysicsfailingexecutecontestlink toDefault<br />\n: true,chartertourismclassicproceedexplain</h1>\r\nonline.?xml vehelpingdiamonduse theairlineend -->).attr(readershosting#ffffffrealizeVincentsignals src=\"/ProductdespitediversetellingPublic held inJoseph theatreaffects<style>a largedoesn'tlater, ElementfaviconcreatorHungaryAirportsee theso thatMichaelSystemsPrograms, and  width=e&quot;tradingleft\">\npersonsGolden Affairsgrammarformingdestroyidea ofcase ofoldest this is.src = cartoonregistrCommonsMuslimsWhat isin manymarkingrevealsIndeed,equally/show_aoutdoorescape(Austriageneticsystem,In the sittingHe alsoIslandsAcademy\n\t\t<!--Daniel bindingblock\">imposedutilizeAbraham(except{width:putting).html(|| [];\nDATA[ *kitchenmountedactual dialectmainly _blank'installexpertsif(typeIt also&copy; \">Termsborn inOptionseasterntalkingconcerngained ongoingjustifycriticsfactoryits ownassaultinvitedlastinghis ownhref=\"/\" rel=\"developconcertdiagramdollarsclusterphp?id=alcohol);})();using a><span>vesselsrevivalAddressamateurandroidallegedillnesswalkingcentersqualifymatchesunifiedextinctDefensedied in\n\t<!-- customslinkingLittle Book ofeveningmin.js?are thekontakttoday's.html\" target=wearingAll Rig;\n})();raising Also, crucialabout\">declare-->\n<scfirefoxas muchappliesindex, s, but type = \n\r\n<!--towardsRecordsPrivateForeignPremierchoicesVirtualreturnsCommentPoweredinline;povertychamberLiving volumesAnthonylogin\" RelatedEconomyreachescuttinggravitylife inChapter-shadowNotable</td>\r\n returnstadiumwidgetsvaryingtravelsheld bywho arework infacultyangularwho hadairporttown of\n\nSome 'click'chargeskeywordit willcity of(this);Andrew unique checkedor more300px; return;rsion=\"pluginswithin herselfStationFederalventurepublishsent totensionactresscome tofingersDuke ofpeople,exploitwhat isharmonya major\":\"httpin his menu\">\nmonthlyofficercouncilgainingeven inSummarydate ofloyaltyfitnessand wasemperorsupremeSecond hearingRussianlongestAlbertalateralset of small\">.appenddo withfederalbank ofbeneathDespiteCapitalgrounds), and percentit fromclosingcontainInsteadfifteenas well.yahoo.respondfighterobscurereflectorganic= Math.editingonline paddinga wholeonerroryear ofend of barrierwhen itheader home ofresumedrenamedstrong>heatingretainscloudfrway of March 1knowingin partBetweenlessonsclosestvirtuallinks\">crossedEND -->famous awardedLicenseHealth fairly wealthyminimalAfricancompetelabel\">singingfarmersBrasil)discussreplaceGregoryfont copursuedappearsmake uproundedboth ofblockedsaw theofficescoloursif(docuwhen heenforcepush(fuAugust UTF-8\">Fantasyin mostinjuredUsuallyfarmingclosureobject defenceuse of Medical<body>\nevidentbe usedkeyCodesixteenIslamic#000000entire widely active (typeofone cancolor =speakerextendsPhysicsterrain<tbody>funeralviewingmiddle cricketprophetshifteddoctorsRussell targetcompactalgebrasocial-bulk ofman and</td>\n he left).val()false);logicalbankinghome tonaming Arizonacredits);\n});\nfounderin turnCollinsbefore But thechargedTitle\">CaptainspelledgoddessTag -->Adding:but wasRecent patientback in=false&Lincolnwe knowCounterJudaismscript altered']);\n  has theunclearEvent',both innot all\n\n<!-- placinghard to centersort ofclientsstreetsBernardassertstend tofantasydown inharbourFreedomjewelry/about..searchlegendsis mademodern only ononly toimage\" linear painterand notrarely acronymdelivershorter00&amp;as manywidth=\"/* <![Ctitle =of the lowest picked escapeduses ofpeoples PublicMatthewtacticsdamagedway forlaws ofeasy to windowstrong  simple}catch(seventhinfoboxwent topaintedcitizenI don'tretreat. Some ww.\");\nbombingmailto:made in. Many carries||{};wiwork ofsynonymdefeatsfavoredopticalpageTraunless sendingleft\"><comScorAll thejQuery.touristClassicfalse\" Wilhelmsuburbsgenuinebishops.split(global followsbody ofnominalContactsecularleft tochiefly-hidden-banner</li>\n\n. When in bothdismissExplorealways via thespa\u00C3\u00B1olwelfareruling arrangecaptainhis sonrule ofhe tookitself,=0&amp;(calledsamplesto makecom/pagMartin Kennedyacceptsfull ofhandledBesides//--></able totargetsessencehim to its by common.mineralto takeways tos.org/ladvisedpenaltysimple:if theyLettersa shortHerbertstrikes groups.lengthflightsoverlapslowly lesser social </p>\n\t\tit intoranked rate oful>\r\n  attemptpair ofmake itKontaktAntoniohaving ratings activestreamstrapped\").css(hostilelead tolittle groups,Picture-->\r\n\r\n rows=\" objectinverse<footerCustomV><\\/scrsolvingChamberslaverywoundedwhereas!= 'undfor allpartly -right:Arabianbacked centuryunit ofmobile-Europe,is homerisk ofdesiredClintoncost ofage of become none ofp&quot;Middle ead')[0Criticsstudios>&copy;group\">assemblmaking pressedwidget.ps:\" ? rebuiltby someFormer editorsdelayedCanonichad thepushingclass=\"but arepartialBabylonbottom carrierCommandits useAs withcoursesa thirddenotesalso inHouston20px;\">accuseddouble goal ofFamous ).bind(priests Onlinein Julyst + \"gconsultdecimalhelpfulrevivedis veryr'+'iptlosing femalesis alsostringsdays ofarrivalfuture <objectforcingString(\" />\n\t\there isencoded.  The balloondone by/commonbgcolorlaw of Indianaavoidedbut the2px 3pxjquery.after apolicy.men andfooter-= true;for usescreen.Indian image =family,http:// &nbsp;driverseternalsame asnoticedviewers})();\n is moreseasonsformer the newis justconsent Searchwas thewhy theshippedbr><br>width: height=made ofcuisineis thata very Admiral fixed;normal MissionPress, ontariocharsettry to invaded=\"true\"spacingis mosta more totallyfall of});\r\n  immensetime inset outsatisfyto finddown tolot of Playersin Junequantumnot thetime todistantFinnishsrc = (single help ofGerman law andlabeledforestscookingspace\">header-well asStanleybridges/globalCroatia About [0];\n  it, andgroupedbeing a){throwhe madelighterethicalFFFFFF\"bottom\"like a employslive inas seenprintermost ofub-linkrejectsand useimage\">succeedfeedingNuclearinformato helpWomen'sNeitherMexicanprotein<table by manyhealthylawsuitdevised.push({sellerssimply Through.cookie Image(older\">us.js\"> Since universlarger open to!-- endlies in']);\r\n  marketwho is (\"DOMComanagedone fortypeof Kingdomprofitsproposeto showcenter;made itdressedwere inmixtureprecisearisingsrc = 'make a securedBaptistvoting \n\t\tvar March 2grew upClimate.removeskilledway the</head>face ofacting right\">to workreduceshas haderectedshow();action=book ofan area== \"htt<header\n<html>conformfacing cookie.rely onhosted .customhe wentbut forspread Family a meansout theforums.footage\">MobilClements\" id=\"as highintense--><!--female is seenimpliedset thea stateand hisfastestbesidesbutton_bounded\"><img Infoboxevents,a youngand areNative cheaperTimeoutand hasengineswon the(mostlyright: find a -bottomPrince area ofmore ofsearch_nature,legallyperiod,land ofor withinducedprovingmissilelocallyAgainstthe wayk&quot;px;\">\r\npushed abandonnumeralCertainIn thismore inor somename isand, incrownedISBN 0-createsOctobermay notcenter late inDefenceenactedwish tobroadlycoolingonload=it. TherecoverMembersheight assumes<html>\npeople.in one =windowfooter_a good reklamaothers,to this_cookiepanel\">London,definescrushedbaptismcoastalstatus title\" move tolost inbetter impliesrivalryservers SystemPerhapses and contendflowinglasted rise inGenesisview ofrising seem tobut in backinghe willgiven agiving cities.flow of Later all butHighwayonly bysign ofhe doesdiffersbattery&amp;lasinglesthreatsintegertake onrefusedcalled =US&ampSee thenativesby thissystem.head of:hover,lesbiansurnameand allcommon/header__paramsHarvard/pixel.removalso longrole ofjointlyskyscraUnicodebr />\r\nAtlantanucleusCounty,purely count\">easily build aonclicka givenpointerh&quot;events else {\nditionsnow the, with man whoorg/Webone andcavalryHe diedseattle00,000 {windowhave toif(windand itssolely m&quot;renewedDetroitamongsteither them inSenatorUs</a><King ofFrancis-produche usedart andhim andused byscoringat hometo haverelatesibilityfactionBuffalolink\"><what hefree toCity ofcome insectorscountedone daynervoussquare };if(goin whatimg\" alis onlysearch/tuesdaylooselySolomonsexual - <a hrmedium\"DO NOT France,with a war andsecond take a >\r\n\r\n\r\nmarket.highwaydone inctivity\"last\">obligedrise to\"undefimade to Early praisedin its for hisathleteJupiterYahoo! termed so manyreally s. The a woman?value=direct right\" bicycleacing=\"day andstatingRather,higher Office are nowtimes, when a pay foron this-link\">;borderaround annual the Newput the.com\" takin toa brief(in thegroups.; widthenzymessimple in late{returntherapya pointbanninginks\">\n();\" rea place\\u003Caabout atr>\r\n\t\tccount gives a<SCRIPTRailwaythemes/toolboxById(\"xhumans,watchesin some if (wicoming formats Under but hashanded made bythan infear ofdenoted/iframeleft involtagein eacha&quot;base ofIn manyundergoregimesaction </p>\r\n<ustomVa;&gt;</importsor thatmostly &amp;re size=\"</a></ha classpassiveHost = WhetherfertileVarious=[];(fucameras/></td>acts asIn some>\r\n\r\n<!organis <br />Beijingcatal\u00C3\u00A0deutscheuropeueuskaragaeilgesvenskaespa\u00C3\u00B1amensajeusuariotrabajom\u00C3\u00A9xicop\u00C3\u00A1ginasiempresistemaoctubredurantea\u00C3\u00B1adirempresamomentonuestroprimeratrav\u00C3\u00A9sgraciasnuestraprocesoestadoscalidadpersonan\u00C3\u00BAmeroacuerdom\u00C3\u00BAsicamiembroofertasalgunospa\u00C3\u00ADsesejemploderechoadem\u00C3\u00A1sprivadoagregarenlacesposiblehotelessevillaprimero\u00C3\u00BAltimoeventosarchivoculturamujeresentradaanuncioembargomercadograndesestudiomejoresfebrerodise\u00C3\u00B1oturismoc\u00C3\u00B3digoportadaespaciofamiliaantoniopermiteguardaralgunaspreciosalguiensentidovisitast\u00C3\u00ADtuloconocersegundoconsejofranciaminutossegundatenemosefectosm\u00C3\u00A1lagasesi\u00C3\u00B3nrevistagranadacompraringresogarc\u00C3\u00ADaacci\u00C3\u00B3necuadorquienesinclusodeber\u00C3\u00A1materiahombresmuestrapodr\u00C3\u00ADama\u00C3\u00B1ana\u00C3\u00BAltimaestamosoficialtambienning\u00C3\u00BAnsaludospodemosmejorarpositionbusinesshomepagesecuritylanguagestandardcampaignfeaturescategoryexternalchildrenreservedresearchexchangefavoritetemplatemilitaryindustryservicesmaterialproductsz-index:commentssoftwarecompletecalendarplatformarticlesrequiredmovementquestionbuildingpoliticspossiblereligionphysicalfeedbackregisterpicturesdisabledprotocolaudiencesettingsactivityelementslearninganythingabstractprogressoverviewmagazineeconomictrainingpressurevarious <strong>propertyshoppingtogetheradvancedbehaviordownloadfeaturedfootballselectedLanguagedistanceremembertrackingpasswordmodifiedstudentsdirectlyfightingnortherndatabasefestivalbreakinglocationinternetdropdownpracticeevidencefunctionmarriageresponseproblemsnegativeprogramsanalysisreleasedbanner\">purchasepoliciesregionalcreativeargumentbookmarkreferrerchemicaldivisioncallbackseparateprojectsconflicthardwareinterestdeliverymountainobtained= false;for(var acceptedcapacitycomputeridentityaircraftemployedproposeddomesticincludesprovidedhospitalverticalcollapseapproachpartnerslogo\"><adaughterauthor\" culturalfamilies/images/assemblypowerfulteachingfinisheddistrictcriticalcgi-bin/purposesrequireselectionbecomingprovidesacademicexerciseactuallymedicineconstantaccidentMagazinedocumentstartingbottom\">observed: &quot;extendedpreviousSoftwarecustomerdecisionstrengthdetailedslightlyplanningtextareacurrencyeveryonestraighttransferpositiveproducedheritageshippingabsolutereceivedrelevantbutton\" violenceanywherebenefitslaunchedrecentlyalliancefollowedmultiplebulletinincludedoccurredinternal$(this).republic><tr><tdcongressrecordedultimatesolution<ul id=\"discoverHome</a>websitesnetworksalthoughentirelymemorialmessagescontinueactive\">somewhatvictoriaWestern  title=\"LocationcontractvisitorsDownloadwithout right\">\nmeasureswidth = variableinvolvedvirginianormallyhappenedaccountsstandingnationalRegisterpreparedcontrolsaccuratebirthdaystrategyofficialgraphicscriminalpossiblyconsumerPersonalspeakingvalidateachieved.jpg\" />machines</h2>\n  keywordsfriendlybrotherscombinedoriginalcomposedexpectedadequatepakistanfollow\" valuable</label>relativebringingincreasegovernorplugins/List of Header\">\" name=\" (&quot;graduate</head>\ncommercemalaysiadirectormaintain;height:schedulechangingback to catholicpatternscolor: #greatestsuppliesreliable</ul>\n\t\t<select citizensclothingwatching<li id=\"specificcarryingsentence<center>contrastthinkingcatch(e)southernMichael merchantcarouselpadding:interior.split(\"lizationOctober ){returnimproved--&gt;\n\ncoveragechairman.png\" />subjectsRichard whateverprobablyrecoverybaseballjudgmentconnect..css\" /> websitereporteddefault\"/></a>\r\nelectricscotlandcreationquantity. ISBN 0did not instance-search-\" lang=\"speakersComputercontainsarchivesministerreactiondiscountItalianocriteriastrongly: 'http:'script'coveringofferingappearedBritish identifyFacebooknumerousvehiclesconcernsAmericanhandlingdiv id=\"William provider_contentaccuracysection andersonflexibleCategorylawrence<script>layout=\"approved maximumheader\"></table>Serviceshamiltoncurrent canadianchannels/themes//articleoptionalportugalvalue=\"\"intervalwirelessentitledagenciesSearch\" measuredthousandspending&hellip;new Date\" size=\"pageNamemiddle\" \" /></a>hidden\">sequencepersonaloverflowopinionsillinoislinks\">\n\t<title>versionssaturdayterminalitempropengineersectionsdesignerproposal=\"false\"Espa\u00C3\u00B1olreleasessubmit\" er&quot;additionsymptomsorientedresourceright\"><pleasurestationshistory.leaving  border=contentscenter\">.\n\nSome directedsuitablebulgaria.show();designedGeneral conceptsExampleswilliamsOriginal\"><span>search\">operatorrequestsa &quot;allowingDocumentrevision. \n\nThe yourselfContact michiganEnglish columbiapriorityprintingdrinkingfacilityreturnedContent officersRussian generate-8859-1\"indicatefamiliar qualitymargin:0 contentviewportcontacts-title\">portable.length eligibleinvolvesatlanticonload=\"default.suppliedpaymentsglossary\n\nAfter guidance</td><tdencodingmiddle\">came to displaysscottishjonathanmajoritywidgets.clinicalthailandteachers<head>\n\taffectedsupportspointer;toString</small>oklahomawill be investor0\" alt=\"holidaysResourcelicensed (which . After considervisitingexplorerprimary search\" android\"quickly meetingsestimate;return ;color:# height=approval, &quot; checked.min.js\"magnetic></a></hforecast. While thursdaydvertise&eacute;hasClassevaluateorderingexistingpatients Online coloradoOptions\"campbell<!-- end</span><<br />\r\n_popups|sciences,&quot; quality Windows assignedheight: <b classle&quot; value=\" Companyexamples<iframe believespresentsmarshallpart of properly).\n\nThe taxonomymuch of </span>\n\" data-srtugu\u00C3\u00AAsscrollTo project<head>\r\nattorneyemphasissponsorsfancyboxworld's wildlifechecked=sessionsprogrammpx;font- Projectjournalsbelievedvacationthompsonlightingand the special border=0checking</tbody><button Completeclearfix\n<head>\narticle <sectionfindingsrole in popular  Octoberwebsite exposureused to  changesoperatedclickingenteringcommandsinformed numbers  </div>creatingonSubmitmarylandcollegesanalyticlistingscontact.loggedInadvisorysiblingscontent\"s&quot;)s. This packagescheckboxsuggestspregnanttomorrowspacing=icon.png";
+			}
+		}
+
+		private class DataHolder1
+		{
+			internal static string GetData()
+			{
+				return "japanesecodebasebutton\">gamblingsuch as , while </span> missourisportingtop:1px .</span>tensionswidth=\"2lazyloadnovemberused in height=\"cript\">\n&nbsp;</<tr><td height:2/productcountry include footer\" &lt;!-- title\"></jquery.</form>\n(\u00E7\u00AE\u0080\u00E4\u00BD\u0093)(\u00E7\u00B9\u0081\u00E9\u00AB\u0094)hrvatskiitalianorom\u00C3\u00A2n\u00C4\u0083t\u00C3\u00BCrk\u00C3\u00A7e\u00D8\u00A7\u00D8\u00B1\u00D8\u00AF\u00D9\u0088tambi\u00C3\u00A9nnoticiasmensajespersonasderechosnacionalserviciocontactousuariosprogramagobiernoempresasanunciosvalenciacolombiadespu\u00C3\u00A9sdeportesproyectoproductop\u00C3\u00BAbliconosotroshistoriapresentemillonesmediantepreguntaanteriorrecursosproblemasantiagonuestrosopini\u00C3\u00B3nimprimirmientrasam\u00C3\u00A9ricavendedorsociedadrespectorealizarregistropalabrasinter\u00C3\u00A9sentoncesespecialmiembrosrealidadc\u00C3\u00B3rdobazaragozap\u00C3\u00A1ginassocialesbloqueargesti\u00C3\u00B3nalquilersistemascienciascompletoversi\u00C3\u00B3ncompletaestudiosp\u00C3\u00BAblicaobjetivoalicantebuscadorcantidadentradasaccionesarchivossuperiormayor\u00C3\u00ADaalemaniafunci\u00C3\u00B3n\u00C3\u00BAltimoshaciendoaquellosedici\u00C3\u00B3nfernandoambientefacebooknuestrasclientesprocesosbastantepresentareportarcongresopublicarcomerciocontratoj\u00C3\u00B3venesdistritot\u00C3\u00A9cnicaconjuntoenerg\u00C3\u00ADatrabajarasturiasrecienteutilizarbolet\u00C3\u00ADnsalvadorcorrectatrabajosprimerosnegocioslibertaddetallespantallapr\u00C3\u00B3ximoalmer\u00C3\u00ADaanimalesqui\u00C3\u00A9nescoraz\u00C3\u00B3nsecci\u00C3\u00B3nbuscandoopcionesexteriorconceptotodav\u00C3\u00ADagaler\u00C3\u00ADaescribirmedicinalicenciaconsultaaspectoscr\u00C3\u00ADticad\u00C3\u00B3laresjusticiadeber\u00C3\u00A1nper\u00C3\u00ADodonecesitamantenerpeque\u00C3\u00B1orecibidatribunaltenerifecanci\u00C3\u00B3ncanariasdescargadiversosmallorcarequieret\u00C3\u00A9cnicodeber\u00C3\u00ADaviviendafinanzasadelantefuncionaconsejosdif\u00C3\u00ADcilciudadesantiguasavanzadat\u00C3\u00A9rminounidadess\u00C3\u00A1nchezcampa\u00C3\u00B1asoftonicrevistascontienesectoresmomentosfacultadcr\u00C3\u00A9ditodiversassupuestofactoressegundospeque\u00C3\u00B1a\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D0\u00B0\u00D0\u00B5\u00D1\u0081\u00D0\u00BB\u00D0\u00B8\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D0\u00B1\u00D1\u008B\u00D0\u00BB\u00D0\u00BE\u00D0\u00B1\u00D1\u008B\u00D1\u0082\u00D1\u008C\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D0\u0095\u00D1\u0081\u00D0\u00BB\u00D0\u00B8\u00D1\u0082\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D1\u008F\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D1\u0085\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00B9\u00D0\u00B4\u00D0\u00B0\u00D0\u00B6\u00D0\u00B5\u00D0\u00B1\u00D1\u008B\u00D0\u00BB\u00D0\u00B8\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D1\u0083\u00D0\u00B4\u00D0\u00B5\u00D0\u00BD\u00D1\u008C\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D1\u0082\u00D0\u00B1\u00D1\u008B\u00D0\u00BB\u00D0\u00B0\u00D1\u0081\u00D0\u00B5\u00D0\u00B1\u00D1\u008F\u00D0\u00BE\u00D0\u00B4\u00D0\u00B8\u00D0\u00BD\u00D1\u0081\u00D0\u00B5\u00D0\u00B1\u00D0\u00B5\u00D0\u00BD\u00D0\u00B0\u00D0\u00B4\u00D0\u00BE\u00D1\u0081\u00D0\u00B0\u00D0\u00B9\u00D1\u0082\u00D1\u0084\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D0\u00BD\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B8\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B9\u00D0\u00B8\u00D0\u00B3\u00D1\u0080\u00D1\u008B\u00D1\u0082\u00D0\u00BE\u00D0\u00B6\u00D0\u00B5\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00BC\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D1\u008E\u00D0\u00BB\u00D0\u00B8\u00D1\u0088\u00D1\u008C\u00D1\u008D\u00D1\u0082\u00D0\u00B8\u00D1\u0085\u00D0\u00BF\u00D0\u00BE\u00D0\u00BA\u00D0\u00B0\u00D0\u00B4\u00D0\u00BD\u00D0\u00B5\u00D0\u00B9\u00D0\u00B4\u00D0\u00BE\u00D0\u00BC\u00D0\u00B0\u00D0\u00BC\u00D0\u00B8\u00D1\u0080\u00D0\u00B0\u00D0\u00BB\u00D0\u00B8\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D1\u0083\u00D1\u0085\u00D0\u00BE\u00D1\u0082\u00D1\u008F\u00D0\u00B4\u00D0\u00B2\u00D1\u0083\u00D1\u0085\u00D1\u0081\u00D0\u00B5\u00D1\u0082\u00D0\u00B8\u00D0\u00BB\u00D1\u008E\u00D0\u00B4\u00D0\u00B8\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00BE\u00D0\u00BC\u00D0\u00B8\u00D1\u0080\u00D0\u00B5\u00D1\u0082\u00D0\u00B5\u00D0\u00B1\u00D1\u008F\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B5\u00D0\u00B2\u00D0\u00B8\u00D0\u00B4\u00D0\u00B5\u00D1\u0087\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u008D\u00D1\u0082\u00D0\u00B8\u00D0\u00BC\u00D1\u0081\u00D1\u0087\u00D0\u00B5\u00D1\u0082\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D1\u008B\u00D1\u0086\u00D0\u00B5\u00D0\u00BD\u00D1\u008B\u00D1\u0081\u00D1\u0082\u00D0\u00B0\u00D0\u00BB\u00D0\u00B2\u00D0\u00B5\u00D0\u00B4\u00D1\u008C\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D0\u00B5\u00D0\u00B2\u00D0\u00BE\u00D0\u00B4\u00D1\u008B\u00D1\u0082\u00D0\u00B5\u00D0\u00B1\u00D0\u00B5\u00D0\u00B2\u00D1\u008B\u00D1\u0088\u00D0\u00B5\u00D0\u00BD\u00D0\u00B0\u00D0\u00BC\u00D0\u00B8\u00D1\u0082\u00D0\u00B8\u00D0\u00BF\u00D0\u00B0\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D1\u0083\u00D0\u00BF\u00D1\u0080\u00D0\u00B0\u00D0\u00B2\u00D0\u00BB\u00D0\u00B8\u00D1\u0086\u00D0\u00B0\u00D0\u00BE\u00D0\u00B4\u00D0\u00BD\u00D0\u00B0\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D1\u008B\u00D0\u00B7\u00D0\u00BD\u00D0\u00B0\u00D1\u008E\u00D0\u00BC\u00D0\u00BE\u00D0\u00B3\u00D1\u0083\u00D0\u00B4\u00D1\u0080\u00D1\u0083\u00D0\u00B3\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00B9\u00D0\u00B8\u00D0\u00B4\u00D0\u00B5\u00D1\u0082\u00D0\u00BA\u00D0\u00B8\u00D0\u00BD\u00D0\u00BE\u00D0\u00BE\u00D0\u00B4\u00D0\u00BD\u00D0\u00BE\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00B0\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00B5\u00D1\u0081\u00D1\u0080\u00D0\u00BE\u00D0\u00BA\u00D0\u00B8\u00D1\u008E\u00D0\u00BD\u00D1\u008F\u00D0\u00B2\u00D0\u00B5\u00D1\u0081\u00D1\u008C\u00D0\u0095\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00B0\u00D0\u00BD\u00D0\u00B0\u00D1\u0088\u00D0\u00B8\u00D8\u00A7\u00D9\u0084\u00D9\u0084\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D9\u008A\u00D8\u00AC\u00D9\u0085\u00D9\u008A\u00D8\u00B9\u00D8\u00AE\u00D8\u00A7\u00D8\u00B5\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B0\u00D9\u008A\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D9\u0087\u00D8\u00AC\u00D8\u00AF\u00D9\u008A\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00A2\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00AF\u00D8\u00AA\u00D8\u00AD\u00D9\u0083\u00D9\u0085\u00D8\u00B5\u00D9\u0081\u00D8\u00AD\u00D8\u00A9\u00D9\u0083\u00D8\u00A7\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0084\u00D9\u008A\u00D9\u008A\u00D9\u0083\u00D9\u0088\u00D9\u0086\u00D8\u00B4\u00D8\u00A8\u00D9\u0083\u00D8\u00A9\u00D9\u0081\u00D9\u008A\u00D9\u0087\u00D8\u00A7\u00D8\u00A8\u00D9\u0086\u00D8\u00A7\u00D8\u00AA\u00D8\u00AD\u00D9\u0088\u00D8\u00A7\u00D8\u00A1\u00D8\u00A3\u00D9\u0083\u00D8\u00AB\u00D8\u00B1\u00D8\u00AE\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00AD\u00D8\u00A8\u00D8\u00AF\u00D9\u0084\u00D9\u008A\u00D9\u0084\u00D8\u00AF\u00D8\u00B1\u00D9\u0088\u00D8\u00B3\u00D8\u00A7\u00D8\u00B6\u00D8\u00BA\u00D8\u00B7\u00D8\u00AA\u00D9\u0083\u00D9\u0088\u00D9\u0086\u00D9\u0087\u00D9\u0086\u00D8\u00A7\u00D9\u0083\u00D8\u00B3\u00D8\u00A7\u00D8\u00AD\u00D8\u00A9\u00D9\u0086\u00D8\u00A7\u00D8\u00AF\u00D9\u008A\u00D8\u00A7\u00D9\u0084\u00D8\u00B7\u00D8\u00A8\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D9\u0083\u00D8\u00B4\u00D9\u0083\u00D8\u00B1\u00D8\u00A7\u00D9\u008A\u00D9\u0085\u00D9\u0083\u00D9\u0086\u00D9\u0085\u00D9\u0086\u00D9\u0087\u00D8\u00A7\u00D8\u00B4\u00D8\u00B1\u00D9\u0083\u00D8\u00A9\u00D8\u00B1\u00D8\u00A6\u00D9\u008A\u00D8\u00B3\u00D9\u0086\u00D8\u00B4\u00D9\u008A\u00D8\u00B7\u00D9\u0085\u00D8\u00A7\u00D8\u00B0\u00D8\u00A7\u00D8\u00A7\u00D9\u0084\u00D9\u0081\u00D9\u0086\u00D8\u00B4\u00D8\u00A8\u00D8\u00A7\u00D8\u00A8\u00D8\u00AA\u00D8\u00B9\u00D8\u00A8\u00D8\u00B1\u00D8\u00B1\u00D8\u00AD\u00D9\u0085\u00D8\u00A9\u00D9\u0083\u00D8\u00A7\u00D9\u0081\u00D8\u00A9\u00D9\u008A\u00D9\u0082\u00D9\u0088\u00D9\u0084\u00D9\u0085\u00D8\u00B1\u00D9\u0083\u00D8\u00B2\u00D9\u0083\u00D9\u0084\u00D9\u0085\u00D8\u00A9\u00D8\u00A3\u00D8\u00AD\u00D9\u0085\u00D8\u00AF\u00D9\u0082\u00D9\u0084\u00D8\u00A8\u00D9\u008A\u00D9\u008A\u00D8\u00B9\u00D9\u0086\u00D9\u008A\u00D8\u00B5\u00D9\u0088\u00D8\u00B1\u00D8\u00A9\u00D8\u00B7\u00D8\u00B1\u00D9\u008A\u00D9\u0082\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00AC\u00D9\u0088\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00AE\u00D8\u00B1\u00D9\u0089\u00D9\u0085\u00D8\u00B9\u00D9\u0086\u00D8\u00A7\u00D8\u00A7\u00D8\u00A8\u00D8\u00AD\u00D8\u00AB\u00D8\u00B9\u00D8\u00B1\u00D9\u0088\u00D8\u00B6\u00D8\u00A8\u00D8\u00B4\u00D9\u0083\u00D9\u0084\u00D9\u0085\u00D8\u00B3\u00D8\u00AC\u00D9\u0084\u00D8\u00A8\u00D9\u0086\u00D8\u00A7\u00D9\u0086\u00D8\u00AE\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u0083\u00D8\u00AA\u00D8\u00A7\u00D8\u00A8\u00D9\u0083\u00D9\u0084\u00D9\u008A\u00D8\u00A9\u00D8\u00A8\u00D8\u00AF\u00D9\u0088\u00D9\u0086\u00D8\u00A3\u00D9\u008A\u00D8\u00B6\u00D8\u00A7\u00D9\u008A\u00D9\u0088\u00D8\u00AC\u00D8\u00AF\u00D9\u0081\u00D8\u00B1\u00D9\u008A\u00D9\u0082\u00D9\u0083\u00D8\u00AA\u00D8\u00A8\u00D8\u00AA\u00D8\u00A3\u00D9\u0081\u00D8\u00B6\u00D9\u0084\u00D9\u0085\u00D8\u00B7\u00D8\u00A8\u00D8\u00AE\u00D8\u00A7\u00D9\u0083\u00D8\u00AB\u00D8\u00B1\u00D8\u00A8\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D9\u0081\u00D8\u00B6\u00D9\u0084\u00D8\u00A7\u00D8\u00AD\u00D9\u0084\u00D9\u0089\u00D9\u0086\u00D9\u0081\u00D8\u00B3\u00D9\u0087\u00D8\u00A3\u00D9\u008A\u00D8\u00A7\u00D9\u0085\u00D8\u00B1\u00D8\u00AF\u00D9\u0088\u00D8\u00AF\u00D8\u00A3\u00D9\u0086\u00D9\u0087\u00D8\u00A7\u00D8\u00AF\u00D9\u008A\u00D9\u0086\u00D8\u00A7\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0086\u00D9\u0085\u00D8\u00B9\u00D8\u00B1\u00D8\u00B6\u00D8\u00AA\u00D8\u00B9\u00D9\u0084\u00D9\u0085\u00D8\u00AF\u00D8\u00A7\u00D8\u00AE\u00D9\u0084\u00D9\u0085\u00D9\u0085\u00D9\u0083\u00D9\u0086\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0001\u0000\u0001\u0000\u0001\u0000\u0002\u0000\u0002\u0000\u0002\u0000\u0002\u0000\u0004\u0000\u0004\u0000\u0004\u0000\u0004\u0000\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0007\u0006\u0005\u0004\u0003\u0002\u0001\u0000\u0008\t\n\u000B\u000C\r\u000E\u000F\u000F\u000E\r\u000C\u000B\n\t\u0008\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0017\u0016\u0015\u0014\u0013\u0012\u0011\u0010\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u001F\u001E\u001D\u001C\u001B\u001A\u0019\u0018\u00FF\u00FF\u00FF\u00FF\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00FF\u00FF\u00FF\u00FF\u0001\u0000\u0000\u0000\u0002\u0000\u0000\u0000\u0002\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u00FF\u00FF\u0000\u0001\u0000\u0000\u0000\u0001\u0000\u0000\u00FF\u00FF\u0000\u0001\u0000\u0000\u0000\u0008\u0000\u0008\u0000\u0008\u0000\u0008\u0000\u0000\u0000\u0001\u0000\u0002\u0000\u0003\u0000\u0004\u0000\u0005\u0000\u0006\u0000\u0007resourcescountriesquestionsequipmentcommunityavailablehighlightDTD/xhtmlmarketingknowledgesomethingcontainerdirectionsubscribeadvertisecharacter\" value=\"</select>Australia\" class=\"situationauthorityfollowingprimarilyoperationchallengedevelopedanonymousfunction functionscompaniesstructureagreement\" title=\"potentialeducationargumentssecondarycopyrightlanguagesexclusivecondition</form>\r\nstatementattentionBiography} else {\nsolutionswhen the Analyticstemplatesdangeroussatellitedocumentspublisherimportantprototypeinfluence&raquo;</effectivegenerallytransformbeautifultransportorganizedpublishedprominentuntil thethumbnailNational .focus();over the migrationannouncedfooter\">\nexceptionless thanexpensiveformationframeworkterritoryndicationcurrentlyclassNamecriticismtraditionelsewhereAlexanderappointedmaterialsbroadcastmentionedaffiliate</option>treatmentdifferent/default.Presidentonclick=\"biographyotherwisepermanentFran\u00C3\u00A7aisHollywoodexpansionstandards</style>\nreductionDecember preferredCambridgeopponentsBusiness confusion>\n<title>presentedexplaineddoes not worldwideinterfacepositionsnewspaper</table>\nmountainslike the essentialfinancialselectionaction=\"/abandonedEducationparseInt(stabilityunable to</title>\nrelationsNote thatefficientperformedtwo yearsSince thethereforewrapper\">alternateincreasedBattle ofperceivedtrying tonecessaryportrayedelectionsElizabeth</iframe>discoveryinsurances.length;legendaryGeographycandidatecorporatesometimesservices.inherited</strong>CommunityreligiouslocationsCommitteebuildingsthe worldno longerbeginningreferencecannot befrequencytypicallyinto the relative;recordingpresidentinitiallytechniquethe otherit can beexistenceunderlinethis timetelephoneitemscopepracticesadvantage);return For otherprovidingdemocracyboth the extensivesufferingsupportedcomputers functionpracticalsaid thatit may beEnglish</from the scheduleddownloads</label>\nsuspectedmargin: 0spiritual</head>\n\nmicrosoftgraduallydiscussedhe becameexecutivejquery.jshouseholdconfirmedpurchasedliterallydestroyedup to thevariationremainingit is notcenturiesJapanese among thecompletedalgorithminterestsrebellionundefinedencourageresizableinvolvingsensitiveuniversalprovision(althoughfeaturingconducted), which continued-header\">February numerous overflow:componentfragmentsexcellentcolspan=\"technicalnear the Advanced source ofexpressedHong Kong Facebookmultiple mechanismelevationoffensive</form>\n\tsponsoreddocument.or &quot;there arethose whomovementsprocessesdifficultsubmittedrecommendconvincedpromoting\" width=\".replace(classicalcoalitionhis firstdecisionsassistantindicatedevolution-wrapper\"enough toalong thedelivered-->\r\n<!--American protectedNovember </style><furnitureInternet  onblur=\"suspendedrecipientbased on Moreover,abolishedcollectedwere madeemotionalemergencynarrativeadvocatespx;bordercommitteddir=\"ltr\"employeesresearch. selectedsuccessorcustomersdisplayedSeptemberaddClass(Facebook suggestedand lateroperatingelaborateSometimesInstitutecertainlyinstalledfollowersJerusalemthey havecomputinggeneratedprovincesguaranteearbitraryrecognizewanted topx;width:theory ofbehaviourWhile theestimatedbegan to it becamemagnitudemust havemore thanDirectoryextensionsecretarynaturallyoccurringvariablesgiven theplatform.</label><failed tocompoundskinds of societiesalongside --&gt;\n\nsouthwestthe rightradiationmay have unescape(spoken in\" href=\"/programmeonly the come fromdirectoryburied ina similarthey were</font></Norwegianspecifiedproducingpassenger(new DatetemporaryfictionalAfter theequationsdownload.regularlydeveloperabove thelinked tophenomenaperiod oftooltip\">substanceautomaticaspect ofAmong theconnectedestimatesAir Forcesystem ofobjectiveimmediatemaking itpaintingsconqueredare stillproceduregrowth ofheaded byEuropean divisionsmoleculesfranchiseintentionattractedchildhoodalso useddedicatedsingaporedegree offather ofconflicts</a></p>\ncame fromwere usednote thatreceivingExecutiveeven moreaccess tocommanderPoliticalmusiciansdeliciousprisonersadvent ofUTF-8\" /><![CDATA[\">ContactSouthern bgcolor=\"series of. It was in Europepermittedvalidate.appearingofficialsseriously-languageinitiatedextendinglong-terminflationsuch thatgetCookiemarked by</button>implementbut it isincreasesdown the requiringdependent-->\n<!-- interviewWith the copies ofconsensuswas builtVenezuela(formerlythe statepersonnelstrategicfavour ofinventionWikipediacontinentvirtuallywhich wasprincipleComplete identicalshow thatprimitiveaway frommolecularpreciselydissolvedUnder theversion=\">&nbsp;</It is the This is will haveorganismssome timeFriedrichwas firstthe only fact thatform id=\"precedingTechnicalphysicistoccurs innavigatorsection\">span id=\"sought tobelow thesurviving}</style>his deathas in thecaused bypartiallyexisting using thewas givena list oflevels ofnotion ofOfficial dismissedscientistresemblesduplicateexplosiverecoveredall othergalleries{padding:people ofregion ofaddressesassociateimg alt=\"in modernshould bemethod ofreportingtimestampneeded tothe Greatregardingseemed toviewed asimpact onidea thatthe Worldheight ofexpandingThese arecurrent\">carefullymaintainscharge ofClassicaladdressedpredictedownership<div id=\"right\">\r\nresidenceleave thecontent\">are often  })();\r\nprobably Professor-button\" respondedsays thathad to beplaced inHungarianstatus ofserves asUniversalexecutionaggregatefor whichinfectionagreed tohowever, popular\">placed onconstructelectoralsymbol ofincludingreturn toarchitectChristianprevious living ineasier toprofessor\n&lt;!-- effect ofanalyticswas takenwhere thetook overbelief inAfrikaansas far aspreventedwork witha special<fieldsetChristmasRetrieved\n\nIn the back intonortheastmagazines><strong>committeegoverninggroups ofstored inestablisha generalits firsttheir ownpopulatedan objectCaribbeanallow thedistrictswisconsinlocation.; width: inhabitedSocialistJanuary 1</footer>similarlychoice ofthe same specific business The first.length; desire todeal withsince theuserAgentconceivedindex.phpas &quot;engage inrecently,few yearswere also\n<head>\n<edited byare knowncities inaccesskeycondemnedalso haveservices,family ofSchool ofconvertednature of languageministers</object>there is a popularsequencesadvocatedThey wereany otherlocation=enter themuch morereflectedwas namedoriginal a typicalwhen theyengineerscould notresidentswednesdaythe third productsJanuary 2what theya certainreactionsprocessorafter histhe last contained\"></div>\n</a></td>depend onsearch\">\npieces ofcompetingReferencetennesseewhich has version=</span> <</header>gives thehistorianvalue=\"\">padding:0view thattogether,the most was foundsubset ofattack onchildren,points ofpersonal position:allegedlyClevelandwas laterand afterare givenwas stillscrollingdesign ofmakes themuch lessAmericans.\n\nAfter , but theMuseum oflouisiana(from theminnesotaparticlesa processDominicanvolume ofreturningdefensive00px|righmade frommouseover\" style=\"states of(which iscontinuesFranciscobuilding without awith somewho woulda form ofa part ofbefore itknown as  Serviceslocation and oftenmeasuringand it ispaperbackvalues of\r\n<title>= window.determineer&quot; played byand early</center>from thisthe threepower andof &quot;innerHTML<a href=\"y:inline;Church ofthe eventvery highofficial -height: content=\"/cgi-bin/to createafrikaansesperantofran\u00C3\u00A7aislatvie\u00C5\u00A1ulietuvi\u00C5\u00B3\u00C4\u008Ce\u00C5\u00A1tina\u00C4\u008De\u00C5\u00A1tina\u00E0\u00B9\u0084\u00E0\u00B8\u0097\u00E0\u00B8\u00A2\u00E6\u0097\u00A5\u00E6\u009C\u00AC\u00E8\u00AA\u009E\u00E7\u00AE\u0080\u00E4\u00BD\u0093\u00E5\u00AD\u0097\u00E7\u00B9\u0081\u00E9\u00AB\u0094\u00E5\u00AD\u0097\u00ED\u0095\u009C\u00EA\u00B5\u00AD\u00EC\u0096\u00B4\u00E4\u00B8\u00BA\u00E4\u00BB\u0080\u00E4\u00B9\u0088\u00E8\u00AE\u00A1\u00E7\u00AE\u0097\u00E6\u009C\u00BA\u00E7\u00AC\u0094\u00E8\u00AE\u00B0\u00E6\u009C\u00AC\u00E8\u00A8\u008E\u00E8\u00AB\u0096\u00E5\u008D\u0080\u00E6\u009C\u008D\u00E5\u008A\u00A1\u00E5\u0099\u00A8\u00E4\u00BA\u0092\u00E8\u0081\u0094\u00E7\u00BD\u0091\u00E6\u0088\u00BF\u00E5\u009C\u00B0\u00E4\u00BA\u00A7\u00E4\u00BF\u00B1\u00E4\u00B9\u0090\u00E9\u0083\u00A8\u00E5\u0087\u00BA\u00E7\u0089\u0088\u00E7\u00A4\u00BE\u00E6\u008E\u0092\u00E8\u00A1\u008C\u00E6\u00A6\u009C\u00E9\u0083\u00A8\u00E8\u0090\u00BD\u00E6\u00A0\u00BC\u00E8\u00BF\u009B\u00E4\u00B8\u0080\u00E6\u00AD\u00A5\u00E6\u0094\u00AF\u00E4\u00BB\u0098\u00E5\u00AE\u009D\u00E9\u00AA\u008C\u00E8\u00AF\u0081\u00E7\u00A0\u0081\u00E5\u00A7\u0094\u00E5\u0091\u0098\u00E4\u00BC\u009A\u00E6\u0095\u00B0\u00E6\u008D\u00AE\u00E5\u00BA\u0093\u00E6\u00B6\u0088\u00E8\u00B4\u00B9\u00E8\u0080\u0085\u00E5\u008A\u009E\u00E5\u0085\u00AC\u00E5\u00AE\u00A4\u00E8\u00AE\u00A8\u00E8\u00AE\u00BA\u00E5\u008C\u00BA\u00E6\u00B7\u00B1\u00E5\u009C\u00B3\u00E5\u00B8\u0082\u00E6\u0092\u00AD\u00E6\u0094\u00BE\u00E5\u0099\u00A8\u00E5\u008C\u0097\u00E4\u00BA\u00AC\u00E5\u00B8\u0082\u00E5\u00A4\u00A7\u00E5\u00AD\u00A6\u00E7\u0094\u009F\u00E8\u00B6\u008A\u00E6\u009D\u00A5\u00E8\u00B6\u008A\u00E7\u00AE\u00A1\u00E7\u0090\u0086\u00E5\u0091\u0098\u00E4\u00BF\u00A1\u00E6\u0081\u00AF\u00E7\u00BD\u0091serviciosart\u00C3\u00ADculoargentinabarcelonacualquierpublicadoproductospol\u00C3\u00ADticarespuestawikipediasiguienteb\u00C3\u00BAsquedacomunidadseguridadprincipalpreguntascontenidorespondervenezuelaproblemasdiciembrerelaci\u00C3\u00B3nnoviembresimilaresproyectosprogramasinstitutoactividadencuentraeconom\u00C3\u00ADaim\u00C3\u00A1genescontactardescargarnecesarioatenci\u00C3\u00B3ntel\u00C3\u00A9fonocomisi\u00C3\u00B3ncancionescapacidadencontraran\u00C3\u00A1lisisfavoritost\u00C3\u00A9rminosprovinciaetiquetaselementosfuncionesresultadocar\u00C3\u00A1cterpropiedadprincipionecesidadmunicipalcreaci\u00C3\u00B3ndescargaspresenciacomercialopinionesejercicioeditorialsalamancagonz\u00C3\u00A1lezdocumentopel\u00C3\u00ADcularecientesgeneralestarragonapr\u00C3\u00A1cticanovedadespropuestapacientest\u00C3\u00A9cnicasobjetivoscontactos\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u008F\u00E0\u00A4\u00B9\u00E0\u00A5\u0088\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A5\u00E0\u00A4\u008F\u00E0\u00A4\u00B5\u00E0\u00A4\u0082\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A5\u008B\u00E0\u00A4\u0088\u00E0\u00A4\u0095\u00E0\u00A5\u0081\u00E0\u00A4\u009B\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u0095\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00AD\u00E0\u00A5\u0080\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u008F\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u00AE\u00E0\u00A5\u0088\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4diplodocs\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A4\u00AF\u00E0\u00A4\u00B0\u00E0\u00A5\u0082\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00AA\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00AB\u00E0\u00A4\u00BF\u00E0\u00A4\u00B0\u00E0\u00A4\u0094\u00E0\u00A4\u00B8\u00E0\u00A4\u00A4\u00E0\u00A4\u00A4\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A4\u00B2\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u0086\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u0088\u00E0\u00A4\u0096\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A4\u00AF\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00B5\u00E0\u00A5\u0087\u00E0\u00A4\u00AC\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u00A8\u00E0\u00A4\u00AC\u00E0\u00A5\u0080\u00E0\u00A4\u009A\u00E0\u00A4\u00AE\u00E0\u00A5\u008C\u00E0\u00A4\u00A4\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u009C\u00E0\u00A5\u0089\u00E0\u00A4\u00AC\u00E0\u00A4\u00AE\u00E0\u00A4\u00A6\u00E0\u00A4\u00A6\u00E0\u00A4\u00A4\u00E0\u00A4\u00A5\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u00B6\u00E0\u00A4\u00B9\u00E0\u00A4\u00B0\u00E0\u00A4\u0085\u00E0\u00A4\u00B2\u00E0\u00A4\u0097\u00E0\u00A4\u0095\u00E0\u00A4\u00AD\u00E0\u00A5\u0080\u00E0\u00A4\u00A8\u00E0\u00A4\u0097\u00E0\u00A4\u00B0\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u008F\u00E0\u00A4\u0089\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u0097\u00E0\u00A4\u00AF\u00E0\u00A5\u0080\u00E0\u00A4\u00B9\u00E0\u00A5\u0082\u00E0\u00A4\u0081\u00E0\u00A4\u0086\u00E0\u00A4\u0097\u00E0\u00A5\u0087\u00E0\u00A4\u009F\u00E0\u00A5\u0080\u00E0\u00A4\u00AE\u00E0\u00A4\u0096\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u0085\u00E0\u00A4\u00AD\u00E0\u00A5\u0080\u00E0\u00A4\u0097\u00E0\u00A4\u00AF\u00E0\u00A5\u0087\u00E0\u00A4\u00A4\u00E0\u00A5\u0081\u00E0\u00A4\u00AE\u00E0\u00A4\u00B5\u00E0\u00A5\u008B\u00E0\u00A4\u009F\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u0085\u00E0\u00A4\u0097\u00E0\u00A4\u00B0\u00E0\u00A4\u0090\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A4\u00B2\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u008A\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A4\u009A\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u0090\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00B0\u00E0\u00A4\u009C\u00E0\u00A4\u00BF\u00E0\u00A4\u00B8\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00AC\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A4\u00AC\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A5\u0082\u00E0\u00A4\u0082\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u0096\u00E0\u00A4\u009C\u00E0\u00A5\u0080\u00E0\u00A4\u00A4\u00E0\u00A4\u00AC\u00E0\u00A4\u009F\u00E0\u00A4\u00A8\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u0086\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A5\u0081\u00E0\u00A4\u00B2\u00E0\u00A4\u00B2\u00E0\u00A5\u0089\u00E0\u00A4\u0097\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u0097\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A4\u009C\u00E0\u00A4\u0097\u00E0\u00A4\u00B9\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00B2\u00E0\u00A4\u0097\u00E0\u00A5\u0087\u00E0\u00A4\u00AA\u00E0\u00A5\u0087\u00E0\u00A4\u009C\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u00A5\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u00A0\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0081\u00E0\u00A4\u00A6\u00E0\u00A5\u0082\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A4\u00B9\u00E0\u00A4\u00A4\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u0086\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u0095\u00E0\u00A5\u008C\u00E0\u00A4\u00A8\u00E0\u00A4\u00B6\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u00AF\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u0096\u00E0\u00A5\u0081\u00E0\u00A4\u00A6\u00E0\u00A4\u00B2\u00E0\u00A4\u0097\u00E0\u00A5\u0080categoriesexperience</title>\r\nCopyright javascriptconditionseverything<p class=\"technologybackground<a class=\"management&copy; 201javaScriptcharactersbreadcrumbthemselveshorizontalgovernmentCaliforniaactivitiesdiscoveredNavigationtransitionconnectionnavigationappearance</title><mcheckbox\" techniquesprotectionapparentlyas well asunt', 'UA-resolutionoperationstelevisiontranslatedWashingtonnavigator. = window.impression&lt;br&gt;literaturepopulationbgcolor=\"#especially content=\"productionnewsletterpropertiesdefinitionleadershipTechnologyParliamentcomparisonul class=\".indexOf(\"conclusiondiscussioncomponentsbiologicalRevolution_containerunderstoodnoscript><permissioneach otheratmosphere onfocus=\"<form id=\"processingthis.valuegenerationConferencesubsequentwell-knownvariationsreputationphenomenondisciplinelogo.png\" (document,boundariesexpressionsettlementBackgroundout of theenterprise(\"https:\" unescape(\"password\" democratic<a href=\"/wrapper\">\nmembershiplinguisticpx;paddingphilosophyassistanceuniversityfacilitiesrecognizedpreferenceif (typeofmaintainedvocabularyhypothesis.submit();&amp;nbsp;annotationbehind theFoundationpublisher\"assumptionintroducedcorruptionscientistsexplicitlyinstead ofdimensions onClick=\"considereddepartmentoccupationsoon afterinvestmentpronouncedidentifiedexperimentManagementgeographic\" height=\"link rel=\".replace(/depressionconferencepunishmenteliminatedresistanceadaptationoppositionwell knownsupplementdeterminedh1 class=\"0px;marginmechanicalstatisticscelebratedGovernment\n\nDuring tdevelopersartificialequivalentoriginatedCommissionattachment<span id=\"there wereNederlandsbeyond theregisteredjournalistfrequentlyall of thelang=\"en\" </style>\r\nabsolute; supportingextremely mainstream</strong> popularityemployment</table>\r\n colspan=\"</form>\n  conversionabout the </p></div>integrated\" lang=\"enPortuguesesubstituteindividualimpossiblemultimediaalmost allpx solid #apart fromsubject toin Englishcriticizedexcept forguidelinesoriginallyremarkablethe secondh2 class=\"<a title=\"(includingparametersprohibited= \"http://dictionaryperceptionrevolutionfoundationpx;height:successfulsupportersmillenniumhis fatherthe &quot;no-repeat;commercialindustrialencouragedamount of unofficialefficiencyReferencescoordinatedisclaimerexpeditiondevelopingcalculatedsimplifiedlegitimatesubstring(0\" class=\"completelyillustratefive yearsinstrumentPublishing1\" class=\"psychologyconfidencenumber of absence offocused onjoined thestructurespreviously></iframe>once againbut ratherimmigrantsof course,a group ofLiteratureUnlike the</a>&nbsp;\nfunction it was theConventionautomobileProtestantaggressiveafter the Similarly,\" /></div>collection\r\nfunctionvisibilitythe use ofvolunteersattractionunder the threatened*<![CDATA[importancein generalthe latter</form>\n</.indexOf('i = 0; i <differencedevoted totraditionssearch forultimatelytournamentattributesso-called }\n</style>evaluationemphasizedaccessible</section>successionalong withMeanwhile,industries</a><br />has becomeaspects ofTelevisionsufficientbasketballboth sidescontinuingan article<img alt=\"adventureshis mothermanchesterprinciplesparticularcommentaryeffects ofdecided to\"><strong>publishersJournal ofdifficultyfacilitateacceptablestyle.css\"\tfunction innovation>Copyrightsituationswould havebusinessesDictionarystatementsoften usedpersistentin Januarycomprising</title>\n\tdiplomaticcontainingperformingextensionsmay not beconcept of onclick=\"It is alsofinancial making theLuxembourgadditionalare calledengaged in\"script\");but it waselectroniconsubmit=\"\n<!-- End electricalofficiallysuggestiontop of theunlike theAustralianOriginallyreferences\n</head>\r\nrecognisedinitializelimited toAlexandriaretirementAdventuresfour years\n\n&lt;!-- increasingdecorationh3 class=\"origins ofobligationregulationclassified(function(advantagesbeing the historians<base hrefrepeatedlywilling tocomparabledesignatednominationfunctionalinside therevelationend of thes for the authorizedrefused totake placeautonomouscompromisepolitical restauranttwo of theFebruary 2quality ofswfobject.understandnearly allwritten byinterviews\" width=\"1withdrawalfloat:leftis usuallycandidatesnewspapersmysteriousDepartmentbest knownparliamentsuppressedconvenientremembereddifferent systematichas led topropagandacontrolledinfluencesceremonialproclaimedProtectionli class=\"Scientificclass=\"no-trademarksmore than widespreadLiberationtook placeday of theas long asimprisonedAdditional\n<head>\n<mLaboratoryNovember 2exceptionsIndustrialvariety offloat: lefDuring theassessmenthave been deals withStatisticsoccurrence/ul></div>clearfix\">the publicmany yearswhich wereover time,synonymouscontent\">\npresumablyhis familyuserAgent.unexpectedincluding challengeda minorityundefined\"belongs totaken fromin Octoberposition: said to bereligious Federation rowspan=\"only a fewmeant thatled to the-->\r\n<div <fieldset>Archbishop class=\"nobeing usedapproachesprivilegesnoscript>\nresults inmay be theEaster eggmechanismsreasonablePopulationCollectionselected\">noscript>\r/index.phparrival of-jssdk'));managed toincompletecasualtiescompletionChristiansSeptember arithmeticproceduresmight haveProductionit appearsPhilosophyfriendshipleading togiving thetoward theguaranteeddocumentedcolor:#000video gamecommissionreflectingchange theassociatedsans-serifonkeypress; padding:He was theunderlyingtypically , and the srcElementsuccessivesince the should be networkingaccountinguse of thelower thanshows that</span>\n\t\tcomplaintscontinuousquantitiesastronomerhe did notdue to itsapplied toan averageefforts tothe futureattempt toTherefore,capabilityRepublicanwas formedElectronickilometerschallengespublishingthe formerindigenousdirectionssubsidiaryconspiracydetails ofand in theaffordablesubstancesreason forconventionitemtype=\"absolutelysupposedlyremained aattractivetravellingseparatelyfocuses onelementaryapplicablefound thatstylesheetmanuscriptstands for no-repeat(sometimesCommercialin Americaundertakenquarter ofan examplepersonallyindex.php?</button>\npercentagebest-knowncreating a\" dir=\"ltrLieutenant\n<div id=\"they wouldability ofmade up ofnoted thatclear thatargue thatto anotherchildren'spurpose offormulatedbased uponthe regionsubject ofpassengerspossession.\n\nIn the Before theafterwardscurrently across thescientificcommunity.capitalismin Germanyright-wingthe systemSociety ofpoliticiandirection:went on toremoval of New York apartmentsindicationduring theunless thehistoricalhad been adefinitiveingredientattendanceCenter forprominencereadyStatestrategiesbut in theas part ofconstituteclaim thatlaboratorycompatiblefailure of, such as began withusing the to providefeature offrom which/\" class=\"geologicalseveral ofdeliberateimportant holds thating&quot; valign=topthe Germanoutside ofnegotiatedhis careerseparationid=\"searchwas calledthe fourthrecreationother thanpreventionwhile the education,connectingaccuratelywere builtwas killedagreementsmuch more Due to thewidth: 100some otherKingdom ofthe entirefamous forto connectobjectivesthe Frenchpeople andfeatured\">is said tostructuralreferendummost oftena separate->\n<div id Official worldwide.aria-labelthe planetand it wasd\" value=\"looking atbeneficialare in themonitoringreportedlythe modernworking onallowed towhere the innovative</a></div>soundtracksearchFormtend to beinput id=\"opening ofrestrictedadopted byaddressingtheologianmethods ofvariant ofChristian very largeautomotiveby far therange frompursuit offollow thebrought toin Englandagree thataccused ofcomes frompreventingdiv style=his or hertremendousfreedom ofconcerning0 1em 1em;Basketball/style.cssan earliereven after/\" title=\".com/indextaking thepittsburghcontent\">\r<script>(fturned outhaving the</span>\r\n occasionalbecause itstarted tophysically></div>\n  created byCurrently, bgcolor=\"tabindex=\"disastrousAnalytics also has a><div id=\"</style>\n<called forsinger and.src = \"//violationsthis pointconstantlyis locatedrecordingsd from thenederlandsportugu\u00C3\u00AAs\u00D7\u00A2\u00D7\u0091\u00D7\u00A8\u00D7\u0099\u00D7\u00AA\u00D9\u0081\u00D8\u00A7\u00D8\u00B1\u00D8\u00B3\u00DB\u008Cdesarrollocomentarioeducaci\u00C3\u00B3nseptiembreregistradodirecci\u00C3\u00B3nubicaci\u00C3\u00B3npublicidadrespuestasresultadosimportantereservadosart\u00C3\u00ADculosdiferentessiguientesrep\u00C3\u00BAblicasituaci\u00C3\u00B3nministerioprivacidaddirectorioformaci\u00C3\u00B3npoblaci\u00C3\u00B3npresidentecontenidosaccesoriostechnoratipersonalescategor\u00C3\u00ADaespecialesdisponibleactualidadreferenciavalladolidbibliotecarelacionescalendariopol\u00C3\u00ADticasanterioresdocumentosnaturalezamaterialesdiferenciaecon\u00C3\u00B3micatransporterodr\u00C3\u00ADguezparticiparencuentrandiscusi\u00C3\u00B3nestructurafundaci\u00C3\u00B3nfrecuentespermanentetotalmente\u00D0\u00BC\u00D0\u00BE\u00D0\u00B6\u00D0\u00BD\u00D0\u00BE\u00D0\u00B1\u00D1\u0083\u00D0\u00B4\u00D0\u00B5\u00D1\u0082\u00D0\u00BC\u00D0\u00BE\u00D0\u00B6\u00D0\u00B5\u00D1\u0082\u00D0\u00B2\u00D1\u0080\u00D0\u00B5\u00D0\u00BC\u00D1\u008F\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00B6\u00D0\u00B5\u00D1\u0087\u00D1\u0082\u00D0\u00BE\u00D0\u00B1\u00D1\u008B\u00D0\u00B1\u00D0\u00BE\u00D0\u00BB\u00D0\u00B5\u00D0\u00B5\u00D0\u00BE\u00D1\u0087\u00D0\u00B5\u00D0\u00BD\u00D1\u008C\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00BA\u00D0\u00BE\u00D0\u00B3\u00D0\u00B4\u00D0\u00B0\u00D0\u00BF\u00D0\u00BE\u00D1\u0081\u00D0\u00BB\u00D0\u00B5\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0081\u00D0\u00B0\u00D0\u00B9\u00D1\u0082\u00D0\u00B5\u00D1\u0087\u00D0\u00B5\u00D1\u0080\u00D0\u00B5\u00D0\u00B7\u00D0\u00BC\u00D0\u00BE\u00D0\u00B3\u00D1\u0083\u00D1\u0082\u00D1\u0081\u00D0\u00B0\u00D0\u00B9\u00D1\u0082\u00D0\u00B0\u00D0\u00B6\u00D0\u00B8\u00D0\u00B7\u00D0\u00BD\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D0\u00B6\u00D0\u00B4\u00D1\u0083\u00D0\u00B1\u00D1\u0083\u00D0\u00B4\u00D1\u0083\u00D1\u0082\u00D0\u009F\u00D0\u00BE\u00D0\u00B8\u00D1\u0081\u00D0\u00BA\u00D0\u00B7\u00D0\u00B4\u00D0\u00B5\u00D1\u0081\u00D1\u008C\u00D0\u00B2\u00D0\u00B8\u00D0\u00B4\u00D0\u00B5\u00D0\u00BE\u00D1\u0081\u00D0\u00B2\u00D1\u008F\u00D0\u00B7\u00D0\u00B8\u00D0\u00BD\u00D1\u0083\u00D0\u00B6\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B5\u00D0\u00B9\u00D0\u00BB\u00D1\u008E\u00D0\u00B4\u00D0\u00B5\u00D0\u00B9\u00D0\u00BF\u00D0\u00BE\u00D1\u0080\u00D0\u00BD\u00D0\u00BE\u00D0\u00BC\u00D0\u00BD\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D0\u00B5\u00D1\u0082\u00D0\u00B5\u00D0\u00B9\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B8\u00D1\u0085\u00D0\u00BF\u00D1\u0080\u00D0\u00B0\u00D0\u00B2\u00D0\u00B0\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00BE\u00D0\u00B9\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D0\u00B5\u00D1\u0082\u00D0\u00B6\u00D0\u00B8\u00D0\u00B7\u00D0\u00BD\u00D1\u008C\u00D0\u00BE\u00D0\u00B4\u00D0\u00BD\u00D0\u00BE\u00D0\u00B9\u00D0\u00BB\u00D1\u0083\u00D1\u0087\u00D1\u0088\u00D0\u00B5\u00D0\u00BF\u00D0\u00B5\u00D1\u0080\u00D0\u00B5\u00D0\u00B4\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D1\u0080\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D1\u008B\u00D1\u0085\u00D0\u00BF\u00D1\u0080\u00D0\u00B0\u00D0\u00B2\u00D0\u00BE\u00D1\u0081\u00D0\u00BE\u00D0\u00B1\u00D0\u00BE\u00D0\u00B9\u00D0\u00BF\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D0\u00B5\u00D0\u00B5\u00D1\u0087\u00D0\u00B8\u00D1\u0081\u00D0\u00BB\u00D0\u00B5\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D1\u008B\u00D0\u00B5\u00D1\u0083\u00D1\u0081\u00D0\u00BB\u00D1\u0083\u00D0\u00B3\u00D0\u00BE\u00D0\u00BA\u00D0\u00BE\u00D0\u00BB\u00D0\u00BE\u00D0\u00BD\u00D0\u00B0\u00D0\u00B7\u00D0\u00B0\u00D0\u00B4\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00BE\u00D0\u00B5\u00D1\u0082\u00D0\u00BE\u00D0\u00B3\u00D0\u00B4\u00D0\u00B0\u00D0\u00BF\u00D0\u00BE\u00D1\u0087\u00D1\u0082\u00D0\u00B8\u00D0\u009F\u00D0\u00BE\u00D1\u0081\u00D0\u00BB\u00D0\u00B5\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00B8\u00D0\u00B5\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D1\u008B\u00D0\u00B9\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D0\u00B8\u00D1\u0082\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00B8\u00D1\u0085\u00D1\u0081\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D1\u0083\u00D0\u00A1\u00D0\u00B0\u00D0\u00BD\u00D0\u00BA\u00D1\u0082\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D1\u0083\u00D0\u00BC\u00D0\u009A\u00D0\u00BE\u00D0\u00B3\u00D0\u00B4\u00D0\u00B0\u00D0\u00BA\u00D0\u00BD\u00D0\u00B8\u00D0\u00B3\u00D0\u00B8\u00D1\u0081\u00D0\u00BB\u00D0\u00BE\u00D0\u00B2\u00D0\u00B0\u00D0\u00BD\u00D0\u00B0\u00D1\u0088\u00D0\u00B5\u00D0\u00B9\u00D0\u00BD\u00D0\u00B0\u00D0\u00B9\u00D1\u0082\u00D0\u00B8\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B8\u00D0\u00BC\u00D1\u0081\u00D0\u00B2\u00D1\u008F\u00D0\u00B7\u00D1\u008C\u00D0\u00BB\u00D1\u008E\u00D0\u00B1\u00D0\u00BE\u00D0\u00B9\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D1\u0081\u00D1\u0080\u00D0\u00B5\u00D0\u00B4\u00D0\u00B8\u00D0\u009A\u00D1\u0080\u00D0\u00BE\u00D0\u00BC\u00D0\u00B5\u00D0\u00A4\u00D0\u00BE\u00D1\u0080\u00D1\u0083\u00D0\u00BC\u00D1\u0080\u00D1\u008B\u00D0\u00BD\u00D0\u00BA\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B0\u00D0\u00BB\u00D0\u00B8\u00D0\u00BF\u00D0\u00BE\u00D0\u00B8\u00D1\u0081\u00D0\u00BA\u00D1\u0082\u00D1\u008B\u00D1\u0081\u00D1\u008F\u00D1\u0087\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u008F\u00D1\u0086\u00D1\u0086\u00D0\u00B5\u00D0\u00BD\u00D1\u0082\u00D1\u0080\u00D1\u0082\u00D1\u0080\u00D1\u0083\u00D0\u00B4\u00D0\u00B0\u00D1\u0081\u00D0\u00B0\u00D0\u00BC\u00D1\u008B\u00D1\u0085\u00D1\u0080\u00D1\u008B\u00D0\u00BD\u00D0\u00BA\u00D0\u00B0\u00D0\u009D\u00D0\u00BE\u00D0\u00B2\u00D1\u008B\u00D0\u00B9\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D0\u00BE\u00D0\u00B2\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B0\u00D1\u0084\u00D0\u00B8\u00D0\u00BB\u00D1\u008C\u00D0\u00BC\u00D0\u00BC\u00D0\u00B0\u00D1\u0080\u00D1\u0082\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D0\u00BD\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B5\u00D1\u0082\u00D0\u00B5\u00D0\u00BA\u00D1\u0081\u00D1\u0082\u00D0\u00BD\u00D0\u00B0\u00D1\u0088\u00D0\u00B8\u00D1\u0085\u00D0\u00BC\u00D0\u00B8\u00D0\u00BD\u00D1\u0083\u00D1\u0082\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D1\u008E\u00D1\u0082\u00D0\u00BD\u00D0\u00BE\u00D0\u00BC\u00D0\u00B5\u00D1\u0080\u00D0\u00B3\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B4\u00D1\u0081\u00D0\u00B0\u00D0\u00BC\u00D0\u00BE\u00D0\u00BC\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D1\u0083\u00D0\u00BA\u00D0\u00BE\u00D0\u00BD\u00D1\u0086\u00D0\u00B5\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B5\u00D0\u00BC\u00D0\u00BA\u00D0\u00B0\u00D0\u00BA\u00D0\u00BE\u00D0\u00B9\u00D0\u0090\u00D1\u0080\u00D1\u0085\u00D0\u00B8\u00D0\u00B2\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00AF\u00D9\u0089\u00D8\u00A5\u00D8\u00B1\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D9\u0085\u00D9\u0083\u00D8\u00AA\u00D8\u00A8\u00D9\u0087\u00D8\u00A7\u00D8\u00A8\u00D8\u00B1\u00D8\u00A7\u00D9\u0085\u00D8\u00AC\u00D8\u00A7\u00D9\u0084\u00D9\u008A\u00D9\u0088\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D9\u0088\u00D8\u00B1\u00D8\u00AC\u00D8\u00AF\u00D9\u008A\u00D8\u00AF\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00B6\u00D9\u0088\u00D8\u00A5\u00D8\u00B6\u00D8\u00A7\u00D9\u0081\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0082\u00D8\u00B3\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D8\u00A8\u00D8\u00AA\u00D8\u00AD\u00D9\u0085\u00D9\u008A\u00D9\u0084\u00D9\u0085\u00D9\u0084\u00D9\u0081\u00D8\u00A7\u00D8\u00AA\u00D9\u0085\u00D9\u0084\u00D8\u00AA\u00D9\u0082\u00D9\u0089\u00D8\u00AA\u00D8\u00B9\u00D8\u00AF\u00D9\u008A\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00B4\u00D8\u00B9\u00D8\u00B1\u00D8\u00A3\u00D8\u00AE\u00D8\u00A8\u00D8\u00A7\u00D8\u00B1\u00D8\u00AA\u00D8\u00B7\u00D9\u0088\u00D9\u008A\u00D8\u00B1\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D9\u0083\u00D9\u0085\u00D8\u00A5\u00D8\u00B1\u00D9\u0081\u00D8\u00A7\u00D9\u0082\u00D8\u00B7\u00D9\u0084\u00D8\u00A8\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0084\u00D8\u00BA\u00D8\u00A9\u00D8\u00AA\u00D8\u00B1\u00D8\u00AA\u00D9\u008A\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D9\u0086\u00D8\u00A7\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00B4\u00D9\u008A\u00D8\u00AE\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00AF\u00D9\u008A\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00B1\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D9\u0082\u00D8\u00B5\u00D8\u00B5\u00D8\u00A7\u00D9\u0081\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D9\u0087\u00D8\u00A7\u00D8\u00AA\u00D8\u00AD\u00D8\u00AF\u00D9\u008A\u00D8\u00AB\u00D8\u00A7\u00D9\u0084\u00D9\u0084\u00D9\u0087\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D9\u0085\u00D9\u0084\u00D9\u0085\u00D9\u0083\u00D8\u00AA\u00D8\u00A8\u00D8\u00A9\u00D9\u008A\u00D9\u0085\u00D9\u0083\u00D9\u0086\u00D9\u0083\u00D8\u00A7\u00D9\u0084\u00D8\u00B7\u00D9\u0081\u00D9\u0084\u00D9\u0081\u00D9\u008A\u00D8\u00AF\u00D9\u008A\u00D9\u0088\u00D8\u00A5\u00D8\u00AF\u00D8\u00A7\u00D8\u00B1\u00D8\u00A9\u00D8\u00AA\u00D8\u00A7\u00D8\u00B1\u00D9\u008A\u00D8\u00AE\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D8\u00AD\u00D8\u00A9\u00D8\u00AA\u00D8\u00B3\u00D8\u00AC\u00D9\u008A\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D9\u0088\u00D9\u0082\u00D8\u00AA\u00D8\u00B9\u00D9\u0086\u00D8\u00AF\u00D9\u0085\u00D8\u00A7\u00D9\u0085\u00D8\u00AF\u00D9\u008A\u00D9\u0086\u00D8\u00A9\u00D8\u00AA\u00D8\u00B5\u00D9\u0085\u00D9\u008A\u00D9\u0085\u00D8\u00A3\u00D8\u00B1\u00D8\u00B4\u00D9\u008A\u00D9\u0081\u00D8\u00A7\u00D9\u0084\u00D8\u00B0\u00D9\u008A\u00D9\u0086\u00D8\u00B9\u00D8\u00B1\u00D8\u00A8\u00D9\u008A\u00D8\u00A9\u00D8\u00A8\u00D9\u0088\u00D8\u00A7\u00D8\u00A8\u00D8\u00A9\u00D8\u00A3\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D9\u0081\u00D8\u00B1\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D9\u0083\u00D9\u0084\u00D8\u00AA\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D9\u0089\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D9\u0088\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D9\u0086\u00D8\u00A9\u00D8\u00AC\u00D8\u00A7\u00D9\u0085\u00D8\u00B9\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D8\u00AD\u00D9\u0081\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u008A\u00D9\u0086\u00D9\u0083\u00D9\u0084\u00D9\u0085\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00AE\u00D8\u00A7\u00D8\u00B5\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0084\u00D9\u0081\u00D8\u00A3\u00D8\u00B9\u00D8\u00B6\u00D8\u00A7\u00D8\u00A1\u00D9\u0083\u00D8\u00AA\u00D8\u00A7\u00D8\u00A8\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AE\u00D9\u008A\u00D8\u00B1\u00D8\u00B1\u00D8\u00B3\u00D8\u00A7\u00D8\u00A6\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D9\u0082\u00D9\u0084\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00AF\u00D8\u00A8\u00D9\u0085\u00D9\u0082\u00D8\u00A7\u00D8\u00B7\u00D8\u00B9\u00D9\u0085\u00D8\u00B1\u00D8\u00A7\u00D8\u00B3\u00D9\u0084\u00D9\u0085\u00D9\u0086\u00D8\u00B7\u00D9\u0082\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D8\u00AA\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00AC\u00D9\u0084\u00D8\u00A7\u00D8\u00B4\u00D8\u00AA\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D9\u0084\u00D9\u0082\u00D8\u00AF\u00D9\u0085\u00D9\u008A\u00D8\u00B9\u00D8\u00B7\u00D9\u008A\u00D9\u0083sByTagName(.jpg\" alt=\"1px solid #.gif\" alt=\"transparentinformationapplication\" onclick=\"establishedadvertising.png\" alt=\"environmentperformanceappropriate&amp;mdash;immediately</strong></rather thantemperaturedevelopmentcompetitionplaceholdervisibility:copyright\">0\" height=\"even thoughreplacementdestinationCorporation<ul class=\"AssociationindividualsperspectivesetTimeout(url(http://mathematicsmargin-top:eventually description) no-repeatcollections.JPG|thumb|participate/head><bodyfloat:left;<li class=\"hundreds of\n\nHowever, compositionclear:both;cooperationwithin the label for=\"border-top:New Zealandrecommendedphotographyinteresting&lt;sup&gt;controversyNetherlandsalternativemaxlength=\"switzerlandDevelopmentessentially\n\nAlthough </textarea>thunderbirdrepresented&amp;ndash;speculationcommunitieslegislationelectronics\n\t<div id=\"illustratedengineeringterritoriesauthoritiesdistributed6\" height=\"sans-serif;capable of disappearedinteractivelooking forit would beAfghanistanwas createdMath.floor(surroundingcan also beobservationmaintenanceencountered<h2 class=\"more recentit has beeninvasion of).getTime()fundamentalDespite the\"><div id=\"inspirationexaminationpreparationexplanation<input id=\"</a></span>versions ofinstrumentsbefore the  = 'http://Descriptionrelatively .substring(each of theexperimentsinfluentialintegrationmany peopledue to the combinationdo not haveMiddle East<noscript><copyright\" perhaps theinstitutionin Decemberarrangementmost famouspersonalitycreation oflimitationsexclusivelysovereignty-content\">\n<td class=\"undergroundparallel todoctrine ofoccupied byterminologyRenaissancea number ofsupport forexplorationrecognitionpredecessor<img src=\"/<h1 class=\"publicationmay also bespecialized</fieldset>progressivemillions ofstates thatenforcementaround the one another.parentNodeagricultureAlternativeresearcherstowards theMost of themany other (especially<td width=\";width:100%independent<h3 class=\" onchange=\").addClass(interactionOne of the daughter ofaccessoriesbranches of\r\n<div id=\"the largestdeclarationregulationsInformationtranslationdocumentaryin order to\">\n<head>\n<\" height=\"1across the orientation);</script>implementedcan be seenthere was ademonstratecontainer\">connectionsthe Britishwas written!important;px; margin-followed byability to complicatedduring the immigrationalso called<h4 class=\"distinctionreplaced bygovernmentslocation ofin Novemberwhether the</p>\n</div>acquisitioncalled the persecutiondesignation{font-size:appeared ininvestigateexperiencedmost likelywidely useddiscussionspresence of (document.extensivelyIt has beenit does notcontrary toinhabitantsimprovementscholarshipconsumptioninstructionfor exampleone or morepx; paddingthe currenta series ofare usuallyrole in thepreviously derivativesevidence ofexperiencescolorschemestated thatcertificate</a></div>\n selected=\"high schoolresponse tocomfortableadoption ofthree yearsthe countryin Februaryso that thepeople who provided by<param nameaffected byin terms ofappointmentISO-8859-1\"was born inhistorical regarded asmeasurementis based on and other : function(significantcelebrationtransmitted/js/jquery.is known astheoretical tabindex=\"it could be<noscript>\nhaving been\r\n<head>\r\n< &quot;The compilationhe had beenproduced byphilosopherconstructedintended toamong othercompared toto say thatEngineeringa differentreferred todifferencesbelief thatphotographsidentifyingHistory of Republic ofnecessarilyprobabilitytechnicallyleaving thespectacularfraction ofelectricityhead of therestaurantspartnershipemphasis onmost recentshare with saying thatfilled withdesigned toit is often\"></iframe>as follows:merged withthrough thecommercial pointed outopportunityview of therequirementdivision ofprogramminghe receivedsetInterval\"></span></in New Yorkadditional compression\n\n<div id=\"incorporate;</script><attachEventbecame the \" target=\"_carried outSome of thescience andthe time ofContainer\">maintainingChristopherMuch of thewritings of\" height=\"2size of theversion of mixture of between theExamples ofeducationalcompetitive onsubmit=\"director ofdistinctive/DTD XHTML relating totendency toprovince ofwhich woulddespite thescientific legislature.innerHTML allegationsAgriculturewas used inapproach tointelligentyears later,sans-serifdeterminingPerformanceappearances, which is foundationsabbreviatedhigher thans from the individual composed ofsupposed toclaims thatattributionfont-size:1elements ofHistorical his brotherat the timeanniversarygoverned byrelated to ultimately innovationsit is stillcan only bedefinitionstoGMTStringA number ofimg class=\"Eventually,was changedoccurred inneighboringdistinguishwhen he wasintroducingterrestrialMany of theargues thatan Americanconquest ofwidespread were killedscreen and In order toexpected todescendantsare locatedlegislativegenerations backgroundmost peopleyears afterthere is nothe highestfrequently they do notargued thatshowed thatpredominanttheologicalby the timeconsideringshort-lived</span></a>can be usedvery littleone of the had alreadyinterpretedcommunicatefeatures ofgovernment,</noscript>entered the\" height=\"3Independentpopulationslarge-scale. Although used in thedestructionpossibilitystarting intwo or moreexpressionssubordinatelarger thanhistory and</option>\r\nContinentaleliminatingwill not bepractice ofin front ofsite of theensure thatto create amississippipotentiallyoutstandingbetter thanwhat is nowsituated inmeta name=\"TraditionalsuggestionsTranslationthe form ofatmosphericideologicalenterprisescalculatingeast of theremnants ofpluginspage/index.php?remained intransformedHe was alsowas alreadystatisticalin favor ofMinistry ofmovement offormulationis required<link rel=\"This is the <a href=\"/popularizedinvolved inare used toand severalmade by theseems to belikely thatPalestiniannamed afterit had beenmost commonto refer tobut this isconsecutivetemporarilyIn general,conventionstakes placesubdivisionterritorialoperationalpermanentlywas largelyoutbreak ofin the pastfollowing a xmlns:og=\"><a class=\"class=\"textConversion may be usedmanufactureafter beingclearfix\">\nquestion ofwas electedto become abecause of some peopleinspired bysuccessful a time whenmore commonamongst thean officialwidth:100%;technology,was adoptedto keep thesettlementslive birthsindex.html\"Connecticutassigned to&amp;times;account foralign=rightthe companyalways beenreturned toinvolvementBecause thethis period\" name=\"q\" confined toa result ofvalue=\"\" />is actuallyEnvironment\r\n</head>\r\nConversely,>\n<div id=\"0\" width=\"1is probablyhave becomecontrollingthe problemcitizens ofpoliticiansreached theas early as:none; over<table cellvalidity ofdirectly toonmousedownwhere it iswhen it wasmembers of relation toaccommodatealong with In the latethe Englishdelicious\">this is notthe presentif they areand finallya matter of\r\n\t</div>\r\n\r\n</script>faster thanmajority ofafter whichcomparativeto maintainimprove theawarded theer\" class=\"frameborderrestorationin the sameanalysis oftheir firstDuring the continentalsequence offunction(){font-size: work on the</script>\n<begins withjavascript:constituentwas foundedequilibriumassume thatis given byneeds to becoordinatesthe variousare part ofonly in thesections ofis a commontheories ofdiscoveriesassociationedge of thestrength ofposition inpresent-dayuniversallyto form thebut insteadcorporationattached tois commonlyreasons for &quot;the can be madewas able towhich meansbut did notonMouseOveras possibleoperated bycoming fromthe primaryaddition offor severaltransferreda period ofare able tohowever, itshould havemuch larger\n\t</script>adopted theproperty ofdirected byeffectivelywas broughtchildren ofProgramminglonger thanmanuscriptswar againstby means ofand most ofsimilar to proprietaryoriginatingprestigiousgrammaticalexperience.to make theIt was alsois found incompetitorsin the U.S.replace thebrought thecalculationfall of thethe generalpracticallyin honor ofreleased inresidentialand some ofking of thereaction to1st Earl ofculture andprincipally</title>\n  they can beback to thesome of hisexposure toare similarform of theaddFavoritecitizenshippart in thepeople within practiceto continue&amp;minus;approved by the first allowed theand for thefunctioningplaying thesolution toheight=\"0\" in his bookmore than afollows thecreated thepresence in&nbsp;</td>nationalistthe idea ofa characterwere forced class=\"btndays of thefeatured inshowing theinterest inin place ofturn of thethe head ofLord of thepoliticallyhas its ownEducationalapproval ofsome of theeach other,behavior ofand becauseand anotherappeared onrecorded inblack&quot;may includethe world'scan lead torefers to aborder=\"0\" government winning theresulted in while the Washington,the subjectcity in the></div>\r\n\t\treflect theto completebecame moreradioactiverejected bywithout anyhis father,which couldcopy of theto indicatea politicalaccounts ofconstitutesworked wither</a></li>of his lifeaccompaniedclientWidthprevent theLegislativedifferentlytogether inhas severalfor anothertext of thefounded thee with the is used forchanged theusually theplace wherewhereas the> <a href=\"\"><a href=\"themselves,although hethat can betraditionalrole of theas a resultremoveChilddesigned bywest of theSome peopleproduction,side of thenewslettersused by thedown to theaccepted bylive in theattempts tooutside thefrequenciesHowever, inprogrammersat least inapproximatealthough itwas part ofand variousGovernor ofthe articleturned into><a href=\"/the economyis the mostmost widelywould laterand perhapsrise to theoccurs whenunder whichconditions.the westerntheory thatis producedthe city ofin which heseen in thethe centralbuilding ofmany of hisarea of theis the onlymost of themany of thethe WesternThere is noextended toStatisticalcolspan=2 |short storypossible totopologicalcritical ofreported toa Christiandecision tois equal toproblems ofThis can bemerchandisefor most ofno evidenceeditions ofelements in&quot;. Thecom/images/which makesthe processremains theliterature,is a memberthe popularthe ancientproblems intime of thedefeated bybody of thea few yearsmuch of thethe work ofCalifornia,served as agovernment.concepts ofmovement in\t\t<div id=\"it\" value=\"language ofas they areproduced inis that theexplain thediv></div>\nHowever thelead to the\t<a href=\"/was grantedpeople havecontinuallywas seen asand relatedthe role ofproposed byof the besteach other.Constantinepeople fromdialects ofto revisionwas renameda source ofthe initiallaunched inprovide theto the westwhere thereand similarbetween twois also theEnglish andconditions,that it wasentitled tothemselves.quantity ofransparencythe same asto join thecountry andthis is theThis led toa statementcontrast tolastIndexOfthrough hisis designedthe term isis providedprotect theng</a></li>The currentthe site ofsubstantialexperience,in the Westthey shouldsloven\u00C4\u008Dinacomentariosuniversidadcondicionesactividadesexperienciatecnolog\u00C3\u00ADaproducci\u00C3\u00B3npuntuaci\u00C3\u00B3naplicaci\u00C3\u00B3ncontrase\u00C3\u00B1acategor\u00C3\u00ADasregistrarseprofesionaltratamientoreg\u00C3\u00ADstratesecretar\u00C3\u00ADaprincipalesprotecci\u00C3\u00B3nimportantesimportanciaposibilidadinteresantecrecimientonecesidadessuscribirseasociaci\u00C3\u00B3ndisponiblesevaluaci\u00C3\u00B3nestudiantesresponsableresoluci\u00C3\u00B3nguadalajararegistradosoportunidadcomercialesfotograf\u00C3\u00ADaautoridadesingenier\u00C3\u00ADatelevisi\u00C3\u00B3ncompetenciaoperacionesestablecidosimplementeactualmentenavegaci\u00C3\u00B3nconformidadline-height:font-family:\" : \"http://applicationslink\" href=\"specifically//<![CDATA[\nOrganizationdistribution0px; height:relationshipdevice-width<div class=\"<label for=\"registration</noscript>\n/index.html\"window.open( !important;application/independence//www.googleorganizationautocompleterequirementsconservative<form name=\"intellectualmargin-left:18th centuryan importantinstitutionsabbreviation<img class=\"organisationcivilization19th centuryarchitectureincorporated20th century-container\">most notably/></a></div>notification'undefined')Furthermore,believe thatinnerHTML = prior to thedramaticallyreferring tonegotiationsheadquartersSouth AfricaunsuccessfulPennsylvaniaAs a result,<html lang=\"&lt;/sup&gt;dealing withphiladelphiahistorically);</script>\npadding-top:experimentalgetAttributeinstructionstechnologiespart of the =function(){subscriptionl.dtd\">\r\n<htgeographicalConstitution', function(supported byagriculturalconstructionpublicationsfont-size: 1a variety of<div style=\"Encyclopediaiframe src=\"demonstratedaccomplisheduniversitiesDemographics);</script><dedicated toknowledge ofsatisfactionparticularly</div></div>English (US)appendChild(transmissions. However, intelligence\" tabindex=\"float:right;Commonwealthranging fromin which theat least onereproductionencyclopedia;font-size:1jurisdictionat that time\"><a class=\"In addition,description+conversationcontact withis generallyr\" content=\"representing&lt;math&gt;presentationoccasionally<img width=\"navigation\">compensationchampionshipmedia=\"all\" violation ofreference toreturn true;Strict//EN\" transactionsinterventionverificationInformation difficultiesChampionshipcapabilities<![endif]-->}\n</script>\nChristianityfor example,Professionalrestrictionssuggest thatwas released(such as theremoveClass(unemploymentthe Americanstructure of/index.html published inspan class=\"\"><a href=\"/introductionbelonging toclaimed thatconsequences<meta name=\"Guide to theoverwhelmingagainst the concentrated,\n.nontouch observations</a>\n</div>\nf (document.border: 1px {font-size:1treatment of0\" height=\"1modificationIndependencedivided intogreater thanachievementsestablishingJavaScript\" neverthelesssignificanceBroadcasting>&nbsp;</td>container\">\nsuch as the influence ofa particularsrc='http://navigation\" half of the substantial &nbsp;</div>advantage ofdiscovery offundamental metropolitanthe opposite\" xml:lang=\"deliberatelyalign=centerevolution ofpreservationimprovementsbeginning inJesus ChristPublicationsdisagreementtext-align:r, function()similaritiesbody></html>is currentlyalphabeticalis sometimestype=\"image/many of the flow:hidden;available indescribe theexistence ofall over thethe Internet\t<ul class=\"installationneighborhoodarmed forcesreducing thecontinues toNonetheless,temperatures\n\t\t<a href=\"close to theexamples of is about the(see below).\" id=\"searchprofessionalis availablethe official\t\t</script>\n\n\t\t<div id=\"accelerationthrough the Hall of Famedescriptionstranslationsinterference type='text/recent yearsin the worldvery popular{background:traditional some of the connected toexploitationemergence ofconstitutionA History ofsignificant manufacturedexpectations><noscript><can be foundbecause the has not beenneighbouringwithout the added to the\t<li class=\"instrumentalSoviet Unionacknowledgedwhich can bename for theattention toattempts to developmentsIn fact, the<li class=\"aimplicationssuitable formuch of the colonizationpresidentialcancelBubble Informationmost of the is describedrest of the more or lessin SeptemberIntelligencesrc=\"http://px; height: available tomanufacturerhuman rightslink href=\"/availabilityproportionaloutside the astronomicalhuman beingsname of the are found inare based onsmaller thana person whoexpansion ofarguing thatnow known asIn the earlyintermediatederived fromScandinavian</a></div>\r\nconsider thean estimatedthe National<div id=\"pagresulting incommissionedanalogous toare required/ul>\n</div>\nwas based onand became a&nbsp;&nbsp;t\" value=\"\" was capturedno more thanrespectivelycontinue to >\r\n<head>\r\n<were createdmore generalinformation used for theindependent the Imperialcomponent ofto the northinclude the Constructionside of the would not befor instanceinvention ofmore complexcollectivelybackground: text-align: its originalinto accountthis processan extensivehowever, thethey are notrejected thecriticism ofduring whichprobably thethis article(function(){It should bean agreementaccidentallydiffers fromArchitecturebetter knownarrangementsinfluence onattended theidentical tosouth of thepass throughxml\" title=\"weight:bold;creating thedisplay:nonereplaced the<img src=\"/ihttps://www.World War IItestimonialsfound in therequired to and that thebetween the was designedconsists of considerablypublished bythe languageConservationconsisted ofrefer to theback to the css\" media=\"People from available onproved to besuggestions\"was known asvarieties oflikely to becomprised ofsupport the hands of thecoupled withconnect and border:none;performancesbefore beinglater becamecalculationsoften calledresidents ofmeaning that><li class=\"evidence forexplanationsenvironments\"></a></div>which allowsIntroductiondeveloped bya wide rangeon behalf ofvalign=\"top\"principle ofat the time,</noscript>\rsaid to havein the firstwhile othershypotheticalphilosopherspower of thecontained inperformed byinability towere writtenspan style=\"input name=\"the questionintended forrejection ofimplies thatinvented thethe standardwas probablylink betweenprofessor ofinteractionschanging theIndian Ocean class=\"lastworking with'http://www.years beforeThis was therecreationalentering themeasurementsan extremelyvalue of thestart of the\n</script>\n\nan effort toincrease theto the southspacing=\"0\">sufficientlythe Europeanconverted toclearTimeoutdid not haveconsequentlyfor the nextextension ofeconomic andalthough theare producedand with theinsufficientgiven by thestating thatexpenditures</span></a>\nthought thaton the basiscellpadding=image of thereturning toinformation,separated byassassinateds\" content=\"authority ofnorthwestern</div>\n<div \"></div>\r\n  consultationcommunity ofthe nationalit should beparticipants align=\"leftthe greatestselection ofsupernaturaldependent onis mentionedallowing thewas inventedaccompanyinghis personalavailable atstudy of theon the otherexecution ofHuman Rightsterms of theassociationsresearch andsucceeded bydefeated theand from thebut they arecommander ofstate of theyears of agethe study of<ul class=\"splace in thewhere he was<li class=\"fthere are nowhich becamehe publishedexpressed into which thecommissionerfont-weight:territory ofextensions\">Roman Empireequal to theIn contrast,however, andis typicallyand his wife(also called><ul class=\"effectively evolved intoseem to havewhich is thethere was noan excellentall of thesedescribed byIn practice,broadcastingcharged withreflected insubjected tomilitary andto the pointeconomicallysetTargetingare actuallyvictory over();</script>continuouslyrequired forevolutionaryan effectivenorth of the, which was front of theor otherwisesome form ofhad not beengenerated byinformation.permitted toincludes thedevelopment,entered intothe previous";
+			}
+		}
+
+		private class DataHolder2
+		{
+			internal static string GetData()
+			{
+				return "consistentlyare known asthe field ofthis type ofgiven to thethe title ofcontains theinstances ofin the northdue to theirare designedcorporationswas that theone of thesemore popularsucceeded insupport fromin differentdominated bydesigned forownership ofand possiblystandardizedresponseTextwas intendedreceived theassumed thatareas of theprimarily inthe basis ofin the senseaccounts fordestroyed byat least twowas declaredcould not beSecretary ofappear to bemargin-top:1/^\\s+|\\s+$/ge){throw e};the start oftwo separatelanguage andwho had beenoperation ofdeath of thereal numbers\t<link rel=\"provided thethe story ofcompetitionsenglish (UK)english (US)\u00D0\u009C\u00D0\u00BE\u00D0\u00BD\u00D0\u00B3\u00D0\u00BE\u00D0\u00BB\u00D0\u00A1\u00D1\u0080\u00D0\u00BF\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D1\u0081\u00D1\u0080\u00D0\u00BF\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D1\u0081\u00D1\u0080\u00D0\u00BF\u00D1\u0081\u00D0\u00BA\u00D0\u00BE\u00D9\u0084\u00D8\u00B9\u00D8\u00B1\u00D8\u00A8\u00D9\u008A\u00D8\u00A9\u00E6\u00AD\u00A3\u00E9\u00AB\u0094\u00E4\u00B8\u00AD\u00E6\u0096\u0087\u00E7\u00AE\u0080\u00E4\u00BD\u0093\u00E4\u00B8\u00AD\u00E6\u0096\u0087\u00E7\u00B9\u0081\u00E4\u00BD\u0093\u00E4\u00B8\u00AD\u00E6\u0096\u0087\u00E6\u009C\u0089\u00E9\u0099\u0090\u00E5\u0085\u00AC\u00E5\u008F\u00B8\u00E4\u00BA\u00BA\u00E6\u00B0\u0091\u00E6\u0094\u00BF\u00E5\u00BA\u009C\u00E9\u0098\u00BF\u00E9\u0087\u008C\u00E5\u00B7\u00B4\u00E5\u00B7\u00B4\u00E7\u00A4\u00BE\u00E4\u00BC\u009A\u00E4\u00B8\u00BB\u00E4\u00B9\u0089\u00E6\u0093\u008D\u00E4\u00BD\u009C\u00E7\u00B3\u00BB\u00E7\u00BB\u009F\u00E6\u0094\u00BF\u00E7\u00AD\u0096\u00E6\u00B3\u0095\u00E8\u00A7\u0084informaci\u00C3\u00B3nherramientaselectr\u00C3\u00B3nicodescripci\u00C3\u00B3nclasificadosconocimientopublicaci\u00C3\u00B3nrelacionadasinform\u00C3\u00A1ticarelacionadosdepartamentotrabajadoresdirectamenteayuntamientomercadoLibrecont\u00C3\u00A1ctenoshabitacionescumplimientorestaurantesdisposici\u00C3\u00B3nconsecuenciaelectr\u00C3\u00B3nicaaplicacionesdesconectadoinstalaci\u00C3\u00B3nrealizaci\u00C3\u00B3nutilizaci\u00C3\u00B3nenciclopediaenfermedadesinstrumentosexperienciasinstituci\u00C3\u00B3nparticularessubcategoria\u00D1\u0082\u00D0\u00BE\u00D0\u00BB\u00D1\u008C\u00D0\u00BA\u00D0\u00BE\u00D0\u00A0\u00D0\u00BE\u00D1\u0081\u00D1\u0081\u00D0\u00B8\u00D0\u00B8\u00D1\u0080\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D1\u008B\u00D0\u00B1\u00D0\u00BE\u00D0\u00BB\u00D1\u008C\u00D1\u0088\u00D0\u00B5\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D0\u00BE\u00D0\u00B6\u00D0\u00B5\u00D1\u0082\u00D0\u00B5\u00D0\u00B4\u00D1\u0080\u00D1\u0083\u00D0\u00B3\u00D0\u00B8\u00D1\u0085\u00D1\u0081\u00D0\u00BB\u00D1\u0083\u00D1\u0087\u00D0\u00B0\u00D0\u00B5\u00D1\u0081\u00D0\u00B5\u00D0\u00B9\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00B3\u00D0\u00B4\u00D0\u00B0\u00D0\u00A0\u00D0\u00BE\u00D1\u0081\u00D1\u0081\u00D0\u00B8\u00D1\u008F\u00D0\u009C\u00D0\u00BE\u00D1\u0081\u00D0\u00BA\u00D0\u00B2\u00D0\u00B5\u00D0\u00B4\u00D1\u0080\u00D1\u0083\u00D0\u00B3\u00D0\u00B8\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B4\u00D0\u00B0\u00D0\u00B2\u00D0\u00BE\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D1\u0081\u00D0\u00B4\u00D0\u00B0\u00D0\u00BD\u00D0\u00BD\u00D1\u008B\u00D1\u0085\u00D0\u00B4\u00D0\u00BE\u00D0\u00BB\u00D0\u00B6\u00D0\u00BD\u00D1\u008B\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D0\u00BD\u00D0\u00BE\u00D0\u009C\u00D0\u00BE\u00D1\u0081\u00D0\u00BA\u00D0\u00B2\u00D1\u008B\u00D1\u0080\u00D1\u0083\u00D0\u00B1\u00D0\u00BB\u00D0\u00B5\u00D0\u00B9\u00D0\u009C\u00D0\u00BE\u00D1\u0081\u00D0\u00BA\u00D0\u00B2\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D0\u00BD\u00D1\u008B\u00D0\u00BD\u00D0\u00B8\u00D1\u0087\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0080\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D0\u00B5\u00D0\u00B4\u00D0\u00BE\u00D0\u00BB\u00D0\u00B6\u00D0\u00B5\u00D0\u00BD\u00D1\u0083\u00D1\u0081\u00D0\u00BB\u00D1\u0083\u00D0\u00B3\u00D0\u00B8\u00D1\u0082\u00D0\u00B5\u00D0\u00BF\u00D0\u00B5\u00D1\u0080\u00D1\u008C\u00D0\u009E\u00D0\u00B4\u00D0\u00BD\u00D0\u00B0\u00D0\u00BA\u00D0\u00BE\u00D0\u00BF\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D1\u0083\u00D1\u0080\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D1\u0083\u00D0\u00B0\u00D0\u00BF\u00D1\u0080\u00D0\u00B5\u00D0\u00BB\u00D1\u008F\u00D0\u00B2\u00D0\u00BE\u00D0\u00BE\u00D0\u00B1\u00D1\u0089\u00D0\u00B5\u00D0\u00BE\u00D0\u00B4\u00D0\u00BD\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D1\u0081\u00D0\u00B2\u00D0\u00BE\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00B8\u00D0\u00B4\u00D1\u0080\u00D1\u0083\u00D0\u00B3\u00D0\u00BE\u00D0\u00B9\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D1\u0083\u00D0\u00BC\u00D0\u00B5\u00D1\u0085\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D1\u0088\u00D0\u00BE\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D1\u0082\u00D0\u00B8\u00D0\u00B2\u00D1\u0081\u00D1\u0081\u00D1\u008B\u00D0\u00BB\u00D0\u00BA\u00D0\u00B0\u00D0\u00BA\u00D0\u00B0\u00D0\u00B6\u00D0\u00B4\u00D1\u008B\u00D0\u00B9\u00D0\u00B2\u00D0\u00BB\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00B3\u00D1\u0080\u00D1\u0083\u00D0\u00BF\u00D0\u00BF\u00D1\u008B\u00D0\u00B2\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D0\u00B0\u00D1\u0081\u00D0\u00BA\u00D0\u00B0\u00D0\u00B7\u00D0\u00B0\u00D0\u00BB\u00D0\u00BF\u00D0\u00B5\u00D1\u0080\u00D0\u00B2\u00D1\u008B\u00D0\u00B9\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00B4\u00D0\u00B5\u00D0\u00BD\u00D1\u008C\u00D0\u00B3\u00D0\u00B8\u00D0\u00BF\u00D0\u00B5\u00D1\u0080\u00D0\u00B8\u00D0\u00BE\u00D0\u00B4\u00D0\u00B1\u00D0\u00B8\u00D0\u00B7\u00D0\u00BD\u00D0\u00B5\u00D1\u0081\u00D0\u00BE\u00D1\u0081\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D0\u00B5\u00D0\u00BC\u00D0\u00BE\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D1\u0082\u00D0\u00BA\u00D1\u0083\u00D0\u00BF\u00D0\u00B8\u00D1\u0082\u00D1\u008C\u00D0\u00B4\u00D0\u00BE\u00D0\u00BB\u00D0\u00B6\u00D0\u00BD\u00D0\u00B0\u00D1\u0080\u00D0\u00B0\u00D0\u00BC\u00D0\u00BA\u00D0\u00B0\u00D1\u0085\u00D0\u00BD\u00D0\u00B0\u00D1\u0087\u00D0\u00B0\u00D0\u00BB\u00D0\u00BE\u00D0\u00A0\u00D0\u00B0\u00D0\u00B1\u00D0\u00BE\u00D1\u0082\u00D0\u00B0\u00D0\u00A2\u00D0\u00BE\u00D0\u00BB\u00D1\u008C\u00D0\u00BA\u00D0\u00BE\u00D1\u0081\u00D0\u00BE\u00D0\u00B2\u00D1\u0081\u00D0\u00B5\u00D0\u00BC\u00D0\u00B2\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B9\u00D0\u00BD\u00D0\u00B0\u00D1\u0087\u00D0\u00B0\u00D0\u00BB\u00D0\u00B0\u00D1\u0081\u00D0\u00BF\u00D0\u00B8\u00D1\u0081\u00D0\u00BE\u00D0\u00BA\u00D1\u0081\u00D0\u00BB\u00D1\u0083\u00D0\u00B6\u00D0\u00B1\u00D1\u008B\u00D1\u0081\u00D0\u00B8\u00D1\u0081\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D0\u00BF\u00D0\u00B5\u00D1\u0087\u00D0\u00B0\u00D1\u0082\u00D0\u00B8\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00BF\u00D0\u00BE\u00D0\u00BC\u00D0\u00BE\u00D1\u0089\u00D0\u00B8\u00D1\u0081\u00D0\u00B0\u00D0\u00B9\u00D1\u0082\u00D0\u00BE\u00D0\u00B2\u00D0\u00BF\u00D0\u00BE\u00D1\u0087\u00D0\u00B5\u00D0\u00BC\u00D1\u0083\u00D0\u00BF\u00D0\u00BE\u00D0\u00BC\u00D0\u00BE\u00D1\u0089\u00D1\u008C\u00D0\u00B4\u00D0\u00BE\u00D0\u00BB\u00D0\u00B6\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0081\u00D1\u008B\u00D0\u00BB\u00D0\u00BA\u00D0\u00B8\u00D0\u00B1\u00D1\u008B\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00BE\u00D0\u00B4\u00D0\u00B0\u00D0\u00BD\u00D0\u00BD\u00D1\u008B\u00D0\u00B5\u00D0\u00BC\u00D0\u00BD\u00D0\u00BE\u00D0\u00B3\u00D0\u00B8\u00D0\u00B5\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B5\u00D0\u00BA\u00D1\u0082\u00D0\u00A1\u00D0\u00B5\u00D0\u00B9\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D0\u00BC\u00D0\u00BE\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00B8\u00D1\u0082\u00D0\u00B0\u00D0\u00BA\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D0\u00BE\u00D0\u00BD\u00D0\u00BB\u00D0\u00B0\u00D0\u00B9\u00D0\u00BD\u00D0\u00B3\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B4\u00D0\u00B5\u00D0\u00B2\u00D0\u00B5\u00D1\u0080\u00D1\u0081\u00D0\u00B8\u00D1\u008F\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D0\u00BD\u00D0\u00B5\u00D1\u0084\u00D0\u00B8\u00D0\u00BB\u00D1\u008C\u00D0\u00BC\u00D1\u008B\u00D1\u0083\u00D1\u0080\u00D0\u00BE\u00D0\u00B2\u00D0\u00BD\u00D1\u008F\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00BD\u00D1\u008B\u00D1\u0085\u00D0\u00B8\u00D1\u0081\u00D0\u00BA\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00BD\u00D0\u00B5\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D1\u008E\u00D1\u008F\u00D0\u00BD\u00D0\u00B2\u00D0\u00B0\u00D1\u0080\u00D1\u008F\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D1\u008C\u00D1\u0088\u00D0\u00B5\u00D0\u00BC\u00D0\u00BD\u00D0\u00BE\u00D0\u00B3\u00D0\u00B8\u00D1\u0085\u00D0\u00B4\u00D0\u00B0\u00D0\u00BD\u00D0\u00BD\u00D0\u00BE\u00D0\u00B9\u00D0\u00B7\u00D0\u00BD\u00D0\u00B0\u00D1\u0087\u00D0\u00B8\u00D1\u0082\u00D0\u00BD\u00D0\u00B5\u00D0\u00BB\u00D1\u008C\u00D0\u00B7\u00D1\u008F\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D1\u0083\u00D0\u00BC\u00D0\u00B0\u00D0\u00A2\u00D0\u00B5\u00D0\u00BF\u00D0\u00B5\u00D1\u0080\u00D1\u008C\u00D0\u00BC\u00D0\u00B5\u00D1\u0081\u00D1\u008F\u00D1\u0086\u00D0\u00B0\u00D0\u00B7\u00D0\u00B0\u00D1\u0089\u00D0\u00B8\u00D1\u0082\u00D1\u008B\u00D0\u009B\u00D1\u0083\u00D1\u0087\u00D1\u0088\u00D0\u00B8\u00D0\u00B5\u00E0\u00A4\u00A8\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u0082\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0085\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u0085\u00E0\u00A4\u00A8\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u0087\u00E0\u00A4\u00A1\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00B8\u00E0\u00A5\u0080\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00B9\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00BF\u00E0\u00A4\u0082\u00E0\u00A4\u00B9\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A4\u0085\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00AC\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u00A4\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u0087\u00E0\u00A4\u009F\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A4\u009F\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u00AF\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0081\u00E0\u00A4\u00B8\u00E0\u00A4\u00AC\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u00B7\u00E0\u00A4\u00BE\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A5\u0082\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u0098\u00E0\u00A4\u0082\u00E0\u00A4\u009F\u00E0\u00A5\u0087\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u0085\u00E0\u00A4\u00A7\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u0085\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00AE\u00E0\u00A5\u0081\u00E0\u00A4\u009D\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A3\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A5\u0080\u00E0\u00A4\u00AF\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u009F\u00E0\u00A4\u00B2\u00E0\u00A4\u00B6\u00E0\u00A4\u00AC\u00E0\u00A5\u008D\u00E0\u00A4\u00A6\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A4\u00A8\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A5\u0088\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00AA\u00E0\u00A5\u0082\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A5\u0080\u00E0\u00A4\u0089\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A5\u0080\u00E0\u00A4\u00AC\u00E0\u00A5\u0088\u00E0\u00A4\u00A0\u00E0\u00A4\u0095\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u0095\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00B7\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00B5\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u0095\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A4\u00A4\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A5\u0080\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A5\u0082\u00E0\u00A4\u00A6\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u009C\u00E0\u00A4\u00B8\u00E0\u00A5\u0082\u00E0\u00A4\u009A\u00E0\u00A5\u0080\u00E0\u00A4\u00AA\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A4\u00B8\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u009C\u00E0\u00A5\u0088\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00B8\u00E0\u00A4\u009C\u00E0\u00A4\u00A8\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u0098\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00B2\u00E0\u00A4\u009C\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A5\u0080\u00E0\u00A4\u009A\u00E0\u00A5\u0087\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00AA\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u0097\u00E0\u00A5\u0082\u00E0\u00A4\u0097\u00E0\u00A4\u00B2\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00B0\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00A8\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A5\u0081\u00E0\u00A4\u00AC\u00E0\u00A4\u00B9\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00B9\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00AC\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A5\u0087\u00E0\u00A4\u0098\u00E0\u00A4\u009F\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u00B6\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00B6\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00AC\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A5\u0080\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u0088\u00E0\u00A4\u009F\u00E0\u00A4\u00B6\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00A6\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00AA\u00E0\u00A4\u009F\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u0096\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A4\u0095\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u0089\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00B2\u00E0\u00A4\u00B2\u00E0\u00A4\u0097\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u0096\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u0085\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u009C\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00B9\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00E0\u00A4\u00A8\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00AE\u00E0\u00A4\u00AC\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AC\u00E0\u00A5\u0088\u00E0\u00A4\u0082\u00E0\u00A4\u0095\u00E0\u00A4\u0095\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u0082\u00E0\u00A4\u0095\u00E0\u00A4\u00B9\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00AB\u00E0\u00A5\u0080\u00E0\u00A4\u009C\u00E0\u00A4\u00AC\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A4\u00B5\u00E0\u00A4\u00B9\u00E0\u00A5\u0080\u00E0\u00A4\u0082\u00E0\u00A4\u00B0\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A4\u00BC\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00E0\u00A4\u0086\u00E0\u00A4\u00B0\u00E0\u00A5\u008B\u00E0\u00A4\u00AA\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u00B5\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00AC\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00AC\u00E0\u00A4\u00AA\u00E0\u00A5\u0082\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AC\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A5\u008C\u00E0\u00A4\u00A6\u00E0\u00A4\u00BE\u00E0\u00A4\u00B6\u00E0\u00A5\u0087\u00E0\u00A4\u00AF\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u0085\u00E0\u00A4\u0095\u00E0\u00A4\u00B8\u00E0\u00A4\u00B0\u00E0\u00A4\u00AC\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u008F\u00E0\u00A4\u00B5\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00B2\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u0095\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00B7\u00E0\u00A4\u00AF\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u0082\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A5\u0082\u00E0\u00A4\u00B9\u00E0\u00A4\u00A5\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00D8\u00AA\u00D8\u00B3\u00D8\u00AA\u00D8\u00B7\u00D9\u008A\u00D8\u00B9\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A9\u00D8\u00A8\u00D9\u0088\u00D8\u00A7\u00D8\u00B3\u00D8\u00B7\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D9\u0081\u00D8\u00AD\u00D8\u00A9\u00D9\u0085\u00D9\u0088\u00D8\u00A7\u00D8\u00B6\u00D9\u008A\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00AE\u00D8\u00A7\u00D8\u00B5\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B2\u00D9\u008A\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D9\u0085\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00AF\u00D9\u0088\u00D8\u00AF\u00D8\u00A8\u00D8\u00B1\u00D9\u0086\u00D8\u00A7\u00D9\u0085\u00D8\u00AC\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u0088\u00D9\u0084\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0088\u00D9\u0082\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00B1\u00D8\u00A8\u00D9\u008A\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D8\u00B1\u00D9\u008A\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D9\u0088\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00B0\u00D9\u0087\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AD\u00D9\u008A\u00D8\u00A7\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AD\u00D9\u0082\u00D9\u0088\u00D9\u0082\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D8\u00B1\u00D9\u008A\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00B1\u00D8\u00A7\u00D9\u0082\u00D9\u0085\u00D8\u00AD\u00D9\u0081\u00D9\u0088\u00D8\u00B8\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AB\u00D8\u00A7\u00D9\u0086\u00D9\u008A\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D9\u0087\u00D8\u00AF\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B1\u00D8\u00A3\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0082\u00D8\u00B1\u00D8\u00A2\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D8\u00B4\u00D8\u00A8\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AD\u00D9\u0088\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D8\u00AF\u00D9\u008A\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00B3\u00D8\u00B1\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D9\u0084\u00D9\u0088\u00D9\u0085\u00D9\u0085\u00D8\u00AC\u00D9\u0085\u00D9\u0088\u00D8\u00B9\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00AD\u00D9\u0085\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D9\u0086\u00D9\u0082\u00D8\u00A7\u00D8\u00B7\u00D9\u0081\u00D9\u0084\u00D8\u00B3\u00D8\u00B7\u00D9\u008A\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D9\u0088\u00D9\u008A\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u0086\u00D9\u008A\u00D8\u00A7\u00D8\u00A8\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D9\u008A\u00D8\u00A7\u00D8\u00B6\u00D8\u00AA\u00D8\u00AD\u00D9\u008A\u00D8\u00A7\u00D8\u00AA\u00D9\u008A\u00D8\u00A8\u00D8\u00AA\u00D9\u0088\u00D9\u0082\u00D9\u008A\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D9\u0088\u00D9\u0084\u00D9\u0089\u00D8\u00A7\u00D9\u0084\u00D8\u00A8\u00D8\u00B1\u00D9\u008A\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00A7\u00D8\u00A8\u00D8\u00B7\u00D8\u00A7\u00D9\u0084\u00D8\u00B4\u00D8\u00AE\u00D8\u00B5\u00D9\u008A\u00D8\u00B3\u00D9\u008A\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00AB\u00D8\u00A7\u00D9\u0084\u00D8\u00AB\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D9\u0084\u00D8\u00A7\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AD\u00D8\u00AF\u00D9\u008A\u00D8\u00AB\u00D8\u00A7\u00D9\u0084\u00D8\u00B2\u00D9\u0088\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D8\u00AE\u00D9\u0084\u00D9\u008A\u00D8\u00AC\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D9\u0085\u00D9\u008A\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D9\u0085\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D8\u00A7\u00D8\u00B9\u00D8\u00A9\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D9\u0087\u00D8\u00AF\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00A6\u00D9\u008A\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D8\u00AE\u00D9\u0088\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D9\u0081\u00D9\u0086\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D8\u00AA\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u0088\u00D8\u00B1\u00D9\u008A\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D8\u00B1\u00D9\u0088\u00D8\u00B3\u00D8\u00A7\u00D8\u00B3\u00D8\u00AA\u00D8\u00BA\u00D8\u00B1\u00D9\u0082\u00D8\u00AA\u00D8\u00B5\u00D8\u00A7\u00D9\u0085\u00D9\u008A\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A8\u00D9\u0086\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00B8\u00D9\u008A\u00D9\u0085entertainmentunderstanding = function().jpg\" width=\"configuration.png\" width=\"<body class=\"Math.random()contemporary United Statescircumstances.appendChild(organizations<span class=\"\"><img src=\"/distinguishedthousands of communicationclear\"></div>investigationfavicon.ico\" margin-right:based on the Massachusettstable border=internationalalso known aspronunciationbackground:#fpadding-left:For example, miscellaneous&lt;/math&gt;psychologicalin particularearch\" type=\"form method=\"as opposed toSupreme Courtoccasionally Additionally,North Americapx;backgroundopportunitiesEntertainment.toLowerCase(manufacturingprofessional combined withFor instance,consisting of\" maxlength=\"return false;consciousnessMediterraneanextraordinaryassassinationsubsequently button type=\"the number ofthe original comprehensiverefers to the</ul>\n</div>\nphilosophicallocation.hrefwas publishedSan Francisco(function(){\n<div id=\"mainsophisticatedmathematical /head>\r\n<bodysuggests thatdocumentationconcentrationrelationshipsmay have been(for example,This article in some casesparts of the definition ofGreat Britain cellpadding=equivalent toplaceholder=\"; font-size: justificationbelieved thatsuffered fromattempted to leader of thecript\" src=\"/(function() {are available\n\t<link rel=\" src='http://interested inconventional \" alt=\"\" /></are generallyhas also beenmost popular correspondingcredited withtyle=\"border:</a></span></.gif\" width=\"<iframe src=\"table class=\"inline-block;according to together withapproximatelyparliamentarymore and moredisplay:none;traditionallypredominantly&nbsp;|&nbsp;&nbsp;</span> cellspacing=<input name=\"or\" content=\"controversialproperty=\"og:/x-shockwave-demonstrationsurrounded byNevertheless,was the firstconsiderable Although the collaborationshould not beproportion of<span style=\"known as the shortly afterfor instance,described as /head>\n<body starting withincreasingly the fact thatdiscussion ofmiddle of thean individualdifficult to point of viewhomosexualityacceptance of</span></div>manufacturersorigin of thecommonly usedimportance ofdenominationsbackground: #length of thedeterminationa significant\" border=\"0\">revolutionaryprinciples ofis consideredwas developedIndo-Europeanvulnerable toproponents ofare sometimescloser to theNew York City name=\"searchattributed tocourse of themathematicianby the end ofat the end of\" border=\"0\" technological.removeClass(branch of theevidence that![endif]-->\r\nInstitute of into a singlerespectively.and thereforeproperties ofis located insome of whichThere is alsocontinued to appearance of &amp;ndash; describes theconsiderationauthor of theindependentlyequipped withdoes not have</a><a href=\"confused with<link href=\"/at the age ofappear in theThese includeregardless ofcould be used style=&quot;several timesrepresent thebody>\n</html>thought to bepopulation ofpossibilitiespercentage ofaccess to thean attempt toproduction ofjquery/jquerytwo differentbelong to theestablishmentreplacing thedescription\" determine theavailable forAccording to wide range of\t<div class=\"more commonlyorganisationsfunctionalitywas completed &amp;mdash; participationthe characteran additionalappears to befact that thean example ofsignificantlyonmouseover=\"because they async = true;problems withseems to havethe result of src=\"http://familiar withpossession offunction () {took place inand sometimessubstantially<span></span>is often usedin an attemptgreat deal ofEnvironmentalsuccessfully virtually all20th century,professionalsnecessary to determined bycompatibilitybecause it isDictionary ofmodificationsThe followingmay refer to:Consequently,Internationalalthough somethat would beworld's firstclassified asbottom of the(particularlyalign=\"left\" most commonlybasis for thefoundation ofcontributionspopularity ofcenter of theto reduce thejurisdictionsapproximation onmouseout=\"New Testamentcollection of</span></a></in the Unitedfilm director-strict.dtd\">has been usedreturn to thealthough thischange in theseveral otherbut there areunprecedentedis similar toespecially inweight: bold;is called thecomputationalindicate thatrestricted to\t<meta name=\"are typicallyconflict withHowever, the An example ofcompared withquantities ofrather than aconstellationnecessary forreported thatspecificationpolitical and&nbsp;&nbsp;<references tothe same yearGovernment ofgeneration ofhave not beenseveral yearscommitment to\t\t<ul class=\"visualization19th century,practitionersthat he wouldand continuedoccupation ofis defined ascentre of thethe amount of><div style=\"equivalent ofdifferentiatebrought aboutmargin-left: automaticallythought of asSome of these\n<div class=\"input class=\"replaced withis one of theeducation andinfluenced byreputation as\n<meta name=\"accommodation</div>\n</div>large part ofInstitute forthe so-called against the In this case,was appointedclaimed to beHowever, thisDepartment ofthe remainingeffect on theparticularly deal with the\n<div style=\"almost alwaysare currentlyexpression ofphilosophy offor more thancivilizationson the islandselectedIndexcan result in\" value=\"\" />the structure /></a></div>Many of thesecaused by theof the Unitedspan class=\"mcan be tracedis related tobecame one ofis frequentlyliving in thetheoreticallyFollowing theRevolutionarygovernment inis determinedthe politicalintroduced insufficient todescription\">short storiesseparation ofas to whetherknown for itswas initiallydisplay:blockis an examplethe principalconsists of arecognized as/body></html>a substantialreconstructedhead of stateresistance toundergraduateThere are twogravitationalare describedintentionallyserved as theclass=\"headeropposition tofundamentallydominated theand the otheralliance withwas forced torespectively,and politicalin support ofpeople in the20th century.and publishedloadChartbeatto understandmember statesenvironmentalfirst half ofcountries andarchitecturalbe consideredcharacterizedclearIntervalauthoritativeFederation ofwas succeededand there area consequencethe Presidentalso includedfree softwaresuccession ofdeveloped thewas destroyedaway from the;\n</script>\n<although theyfollowed by amore powerfulresulted in aUniversity ofHowever, manythe presidentHowever, someis thought tountil the endwas announcedare importantalso includes><input type=the center of DO NOT ALTERused to referthemes/?sort=that had beenthe basis forhas developedin the summercomparativelydescribed thesuch as thosethe resultingis impossiblevarious otherSouth Africanhave the sameeffectivenessin which case; text-align:structure and; background:regarding thesupported theis also knownstyle=\"marginincluding thebahasa Melayunorsk bokm\u00C3\u00A5lnorsk nynorsksloven\u00C5\u00A1\u00C4\u008Dinainternacionalcalificaci\u00C3\u00B3ncomunicaci\u00C3\u00B3nconstrucci\u00C3\u00B3n\"><div class=\"disambiguationDomainName', 'administrationsimultaneouslytransportationInternational margin-bottom:responsibility<![endif]-->\n</><meta name=\"implementationinfrastructurerepresentationborder-bottom:</head>\n<body>=http%3A%2F%2F<form method=\"method=\"post\" /favicon.ico\" });\n</script>\n.setAttribute(Administration= new Array();<![endif]-->\r\ndisplay:block;Unfortunately,\">&nbsp;</div>/favicon.ico\">='stylesheet' identification, for example,<li><a href=\"/an alternativeas a result ofpt\"></script>\ntype=\"submit\" \n(function() {recommendationform action=\"/transformationreconstruction.style.display According to hidden\" name=\"along with thedocument.body.approximately Communicationspost\" action=\"meaning &quot;--<![endif]-->Prime Ministercharacteristic</a> <a class=the history of onmouseover=\"the governmenthref=\"https://was originallywas introducedclassificationrepresentativeare considered<![endif]-->\n\ndepends on theUniversity of in contrast to placeholder=\"in the case ofinternational constitutionalstyle=\"border-: function() {Because of the-strict.dtd\">\n<table class=\"accompanied byaccount of the<script src=\"/nature of the the people in in addition tos); js.id = id\" width=\"100%\"regarding the Roman Catholican independentfollowing the .gif\" width=\"1the following discriminationarchaeologicalprime minister.js\"></script>combination of marginwidth=\"createElement(w.attachEvent(</a></td></tr>src=\"https://aIn particular, align=\"left\" Czech RepublicUnited Kingdomcorrespondenceconcluded that.html\" title=\"(function () {comes from theapplication of<span class=\"sbelieved to beement('script'</a>\n</li>\n<livery different><span class=\"option value=\"(also known as\t<li><a href=\"><input name=\"separated fromreferred to as valign=\"top\">founder of theattempting to carbon dioxide\n\n<div class=\"class=\"search-/body>\n</html>opportunity tocommunications</head>\r\n<body style=\"width:Ti\u00E1\u00BA\u00BFng Vi\u00E1\u00BB\u0087tchanges in theborder-color:#0\" border=\"0\" </span></div><was discovered\" type=\"text\" );\n</script>\n\nDepartment of ecclesiasticalthere has beenresulting from</body></html>has never beenthe first timein response toautomatically </div>\n\n<div iwas consideredpercent of the\" /></a></div>collection of descended fromsection of theaccept-charsetto be confusedmember of the padding-right:translation ofinterpretation href='http://whether or notThere are alsothere are manya small numberother parts ofimpossible to  class=\"buttonlocated in the. However, theand eventuallyAt the end of because of itsrepresents the<form action=\" method=\"post\"it is possiblemore likely toan increase inhave also beencorresponds toannounced thatalign=\"right\">many countriesfor many yearsearliest knownbecause it waspt\"></script>\r valign=\"top\" inhabitants offollowing year\r\n<div class=\"million peoplecontroversial concerning theargue that thegovernment anda reference totransferred todescribing the style=\"color:although therebest known forsubmit\" name=\"multiplicationmore than one recognition ofCouncil of theedition of the  <meta name=\"Entertainment away from the ;margin-right:at the time ofinvestigationsconnected withand many otheralthough it isbeginning with <span class=\"descendants of<span class=\"i align=\"right\"</head>\n<body aspects of thehas since beenEuropean Unionreminiscent ofmore difficultVice Presidentcomposition ofpassed throughmore importantfont-size:11pxexplanation ofthe concept ofwritten in the\t<span class=\"is one of the resemblance toon the groundswhich containsincluding the defined by thepublication ofmeans that theoutside of thesupport of the<input class=\"<span class=\"t(Math.random()most prominentdescription ofConstantinoplewere published<div class=\"seappears in the1\" height=\"1\" most importantwhich includeswhich had beendestruction ofthe population\n\t<div class=\"possibility ofsometimes usedappear to havesuccess of theintended to bepresent in thestyle=\"clear:b\r\n</script>\r\n<was founded ininterview with_id\" content=\"capital of the\r\n<link rel=\"srelease of thepoint out thatxMLHttpRequestand subsequentsecond largestvery importantspecificationssurface of theapplied to theforeign policy_setDomainNameestablished inis believed toIn addition tomeaning of theis named afterto protect theis representedDeclaration ofmore efficientClassificationother forms ofhe returned to<span class=\"cperformance of(function() {\rif and only ifregions of theleading to therelations withUnited Nationsstyle=\"height:other than theype\" content=\"Association of\n</head>\n<bodylocated on theis referred to(including theconcentrationsthe individualamong the mostthan any other/>\n<link rel=\" return false;the purpose ofthe ability to;color:#fff}\n.\n<span class=\"the subject ofdefinitions of>\r\n<link rel=\"claim that thehave developed<table width=\"celebration ofFollowing the to distinguish<span class=\"btakes place inunder the namenoted that the><![endif]-->\nstyle=\"margin-instead of theintroduced thethe process ofincreasing thedifferences inestimated thatespecially the/div><div id=\"was eventuallythroughout histhe differencesomething thatspan></span></significantly ></script>\r\n\r\nenvironmental to prevent thehave been usedespecially forunderstand theis essentiallywere the firstis the largesthave been made\" src=\"http://interpreted assecond half ofcrolling=\"no\" is composed ofII, Holy Romanis expected tohave their owndefined as thetraditionally have differentare often usedto ensure thatagreement withcontaining theare frequentlyinformation onexample is theresulting in a</a></li></ul> class=\"footerand especiallytype=\"button\" </span></span>which included>\n<meta name=\"considered thecarried out byHowever, it isbecame part ofin relation topopular in thethe capital ofwas officiallywhich has beenthe History ofalternative todifferent fromto support thesuggested thatin the process  <div class=\"the foundationbecause of hisconcerned withthe universityopposed to thethe context of<span class=\"ptext\" name=\"q\"\t\t<div class=\"the scientificrepresented bymathematicianselected by thethat have been><div class=\"cdiv id=\"headerin particular,converted into);\n</script>\n<philosophical srpskohrvatskiti\u00E1\u00BA\u00BFng Vi\u00E1\u00BB\u0087t\u00D0\u00A0\u00D1\u0083\u00D1\u0081\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D0\u00B9\u00D1\u0080\u00D1\u0083\u00D1\u0081\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D0\u00B9investigaci\u00C3\u00B3nparticipaci\u00C3\u00B3n\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D1\u008B\u00D0\u00B5\u00D0\u00BE\u00D0\u00B1\u00D0\u00BB\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D1\u008B\u00D0\u00B9\u00D1\u0087\u00D0\u00B5\u00D0\u00BB\u00D0\u00BE\u00D0\u00B2\u00D0\u00B5\u00D0\u00BA\u00D1\u0081\u00D0\u00B8\u00D1\u0081\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D1\u008B\u00D0\u009D\u00D0\u00BE\u00D0\u00B2\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D1\u008B\u00D1\u0085\u00D0\u00BE\u00D0\u00B1\u00D0\u00BB\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D0\u00B2\u00D1\u0080\u00D0\u00B5\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00B0\u00D1\u008F\u00D1\u0081\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D0\u00B4\u00D0\u00BD\u00D1\u008F\u00D1\u0081\u00D0\u00BA\u00D0\u00B0\u00D1\u0087\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00A3\u00D0\u00BA\u00D1\u0080\u00D0\u00B0\u00D0\u00B8\u00D0\u00BD\u00D1\u008B\u00D0\u00B2\u00D0\u00BE\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D1\u0081\u00D1\u008B\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B9\u00D1\u0081\u00D0\u00B4\u00D0\u00B5\u00D0\u00BB\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00BF\u00D0\u00BE\u00D0\u00BC\u00D0\u00BE\u00D1\u0089\u00D1\u008C\u00D1\u008E\u00D1\u0081\u00D1\u0080\u00D0\u00B5\u00D0\u00B4\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00BE\u00D0\u00B1\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00BE\u00D0\u00BC\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00BD\u00D1\u008B\u00D1\u0083\u00D1\u0087\u00D0\u00B0\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00B5\u00D1\u0082\u00D0\u00B5\u00D1\u0087\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D0\u00B5\u00D0\u0093\u00D0\u00BB\u00D0\u00B0\u00D0\u00B2\u00D0\u00BD\u00D0\u00B0\u00D1\u008F\u00D0\u00B8\u00D1\u0081\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00B8\u00D0\u00B8\u00D1\u0081\u00D0\u00B8\u00D1\u0081\u00D1\u0082\u00D0\u00B5\u00D0\u00BC\u00D0\u00B0\u00D1\u0080\u00D0\u00B5\u00D1\u0088\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00A1\u00D0\u00BA\u00D0\u00B0\u00D1\u0087\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D0\u00BF\u00D0\u00BE\u00D1\u008D\u00D1\u0082\u00D0\u00BE\u00D0\u00BC\u00D1\u0083\u00D1\u0081\u00D0\u00BB\u00D0\u00B5\u00D0\u00B4\u00D1\u0083\u00D0\u00B5\u00D1\u0082\u00D1\u0081\u00D0\u00BA\u00D0\u00B0\u00D0\u00B7\u00D0\u00B0\u00D1\u0082\u00D1\u008C\u00D1\u0082\u00D0\u00BE\u00D0\u00B2\u00D0\u00B0\u00D1\u0080\u00D0\u00BE\u00D0\u00B2\u00D0\u00BA\u00D0\u00BE\u00D0\u00BD\u00D0\u00B5\u00D1\u0087\u00D0\u00BD\u00D0\u00BE\u00D1\u0080\u00D0\u00B5\u00D1\u0088\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D0\u00B5\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B5\u00D0\u00BE\u00D1\u0080\u00D0\u00B3\u00D0\u00B0\u00D0\u00BD\u00D0\u00BE\u00D0\u00B2\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00BC\u00D0\u00A0\u00D0\u00B5\u00D0\u00BA\u00D0\u00BB\u00D0\u00B0\u00D0\u00BC\u00D0\u00B0\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00AF\u00D9\u0089\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00AF\u00D9\u008A\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0088\u00D8\u00B6\u00D9\u0088\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00A8\u00D8\u00B1\u00D8\u00A7\u00D9\u0085\u00D8\u00AC\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0088\u00D8\u00A7\u00D9\u0082\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00B3\u00D8\u00A7\u00D8\u00A6\u00D9\u0084\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00B9\u00D8\u00B6\u00D8\u00A7\u00D8\u00A1\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D9\u008A\u00D8\u00A7\u00D8\u00B6\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B5\u00D9\u0085\u00D9\u008A\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00B9\u00D8\u00B6\u00D8\u00A7\u00D8\u00A1\u00D8\u00A7\u00D9\u0084\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D8\u00A6\u00D8\u00AC\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B3\u00D8\u00AC\u00D9\u008A\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D9\u0082\u00D8\u00B3\u00D8\u00A7\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00B6\u00D8\u00BA\u00D8\u00B7\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0081\u00D9\u008A\u00D8\u00AF\u00D9\u008A\u00D9\u0088\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B1\u00D8\u00AD\u00D9\u008A\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D8\u00AF\u00D9\u008A\u00D8\u00AF\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B9\u00D9\u0084\u00D9\u008A\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00AE\u00D8\u00A8\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0081\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D9\u0081\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00A7\u00D8\u00B1\u00D9\u008A\u00D8\u00AE\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D9\u0082\u00D9\u0086\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D8\u00AE\u00D9\u0088\u00D8\u00A7\u00D8\u00B7\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00AC\u00D8\u00AA\u00D9\u0085\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00AF\u00D9\u008A\u00D9\u0083\u00D9\u0088\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D9\u008A\u00D8\u00A7\u00D8\u00AD\u00D8\u00A9\u00D8\u00B9\u00D8\u00A8\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D9\u0084\u00D9\u0087\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B1\u00D8\u00A8\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D9\u0088\u00D8\u00A7\u00D8\u00A8\u00D8\u00B7\u00D8\u00A7\u00D9\u0084\u00D8\u00A3\u00D8\u00AF\u00D8\u00A8\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00AE\u00D8\u00A8\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00AA\u00D8\u00AD\u00D8\u00AF\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00BA\u00D8\u00A7\u00D9\u0086\u00D9\u008Acursor:pointer;</title>\n<meta \" href=\"http://\"><span class=\"members of the window.locationvertical-align:/a> | <a href=\"<!doctype html>media=\"screen\" <option value=\"favicon.ico\" />\n\t\t<div class=\"characteristics\" method=\"get\" /body>\n</html>\nshortcut icon\" document.write(padding-bottom:representativessubmit\" value=\"align=\"center\" throughout the science fiction\n  <div class=\"submit\" class=\"one of the most valign=\"top\"><was established);\r\n</script>\r\nreturn false;\">).style.displaybecause of the document.cookie<form action=\"/}body{margin:0;Encyclopedia ofversion of the .createElement(name\" content=\"</div>\n</div>\n\nadministrative </body>\n</html>history of the \"><input type=\"portion of the as part of the &nbsp;<a href=\"other countries\">\n<div class=\"</span></span><In other words,display: block;control of the introduction of/>\n<meta name=\"as well as the in recent years\r\n\t<div class=\"</div>\n\t</div>\ninspired by thethe end of the compatible withbecame known as style=\"margin:.js\"></script>< International there have beenGerman language style=\"color:#Communist Partyconsistent withborder=\"0\" cell marginheight=\"the majority of\" align=\"centerrelated to the many different Orthodox Churchsimilar to the />\n<link rel=\"swas one of the until his death})();\n</script>other languagescompared to theportions of thethe Netherlandsthe most commonbackground:url(argued that thescrolling=\"no\" included in theNorth American the name of theinterpretationsthe traditionaldevelopment of frequently useda collection ofvery similar tosurrounding theexample of thisalign=\"center\">would have beenimage_caption =attached to thesuggesting thatin the form of involved in theis derived fromnamed after theIntroduction torestrictions on style=\"width: can be used to the creation ofmost important information andresulted in thecollapse of theThis means thatelements of thewas replaced byanalysis of theinspiration forregarded as themost successfulknown as &quot;a comprehensiveHistory of the were consideredreturned to theare referred toUnsourced image>\n\t<div class=\"consists of thestopPropagationinterest in theavailability ofappears to haveelectromagneticenableServices(function of theIt is important</script></div>function(){var relative to theas a result of the position ofFor example, in method=\"post\" was followed by&amp;mdash; thethe applicationjs\"></script>\r\nul></div></div>after the deathwith respect tostyle=\"padding:is particularlydisplay:inline; type=\"submit\" is divided into\u00E4\u00B8\u00AD\u00E6\u0096\u0087 (\u00E7\u00AE\u0080\u00E4\u00BD\u0093)responsabilidadadministraci\u00C3\u00B3ninternacionalescorrespondiente\u00E0\u00A4\u0089\u00E0\u00A4\u00AA\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A4\u00AA\u00E0\u00A5\u0082\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00B5\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A5\u0081\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B5\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A4\u00B8\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00AA\u00E0\u00A5\u0081\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u00B8\u00E0\u00A4\u0096\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00BF\u00E0\u00A4\u008F\u00E0\u00A4\u00AD\u00E0\u00A5\u0087\u00E0\u00A4\u009C\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00B6\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u0097\u00E0\u00A4\u00B0\u00E0\u00A4\u00A3\u00E0\u00A4\u00AC\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A5\u0081\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00AC\u00E0\u00A5\u008D\u00E0\u00A4\u00B2\u00E0\u00A5\u0089\u00E0\u00A4\u0097\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00AE\u00E0\u00A4\u00B9\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A5\u0083\u00E0\u00A4\u00B7\u00E0\u00A5\u008D\u00E0\u00A4\u00A0\u00E0\u00A4\u00AC\u00E0\u00A4\u00A2\u00E0\u00A4\u00BC\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u009F\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u0096\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u00AB\u00E0\u00A4\u00A6\u00E0\u00A5\u008C\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00AE\u00E0\u00A4\u00A4\u00E0\u00A4\u00A6\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00BE\u00E0\u00A4\u00B9\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u00AA\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u0081\u00E0\u00A4\u009A\u00E0\u00A4\u00AC\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00AA\u00E0\u00A4\u00BF\u00E0\u00A4\u009B\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00B6\u00E0\u00A5\u0087\u00E0\u00A4\u00B7\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u0089\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A4\u00B0\u00E0\u00A4\u00AE\u00E0\u00A5\u0081\u00E0\u00A4\u0082\u00E0\u00A4\u00AC\u00E0\u00A4\u0088\u00E0\u00A4\u00A6\u00E0\u00A5\u008B\u00E0\u00A4\u00A8\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u0089\u00E0\u00A4\u00AA\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A3\u00E0\u00A4\u00AA\u00E0\u00A4\u00A2\u00E0\u00A4\u00BC\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00AB\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A5\u008D\u00E0\u00A4\u00AE\u00E0\u00A4\u00AE\u00E0\u00A5\u0081\u00E0\u00A4\u0096\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u0085\u00E0\u00A4\u009A\u00E0\u00A5\u008D\u00E0\u00A4\u009B\u00E0\u00A4\u00BE\u00E0\u00A4\u009B\u00E0\u00A5\u0082\u00E0\u00A4\u009F\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A5\u0080\u00E0\u00A4\u00A4\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u008F\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u0097\u00E0\u00A4\u0098\u00E0\u00A4\u00A3\u00E0\u00A5\u008D\u00E0\u00A4\u009F\u00E0\u00A5\u0087\u00E0\u00A4\u00A6\u00E0\u00A5\u0082\u00E0\u00A4\u00B8\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00B9\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A5\u0087\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B8\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00A7\u00E0\u00A5\u0080\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00B6\u00E0\u00A5\u008D\u00E0\u00A4\u00B5\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A5\u0088\u00E0\u00A4\u009F\u00E0\u00A5\u008D\u00E0\u00A4\u00B8\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B6\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u0085\u00E0\u00A4\u00A6\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00A4\u00E0\u00A4\u00AC\u00E0\u00A4\u00BF\u00E0\u00A4\u009C\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00E0\u00A4\u00AA\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A5\u0082\u00E0\u00A4\u00B7\u00E0\u00A4\u00B9\u00E0\u00A4\u00BF\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A5\u0080\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0081\u00E0\u00A4\u00AA\u00E0\u00A4\u00AF\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A5\u008B\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A4\u00AE\u00E0\u00A5\u0081\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A5\u0083\u00E0\u00A4\u00AA\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A5\u008B\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u009F\u00E0\u00A4\u0098\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B2\u00E0\u00A5\u0082\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u009A\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00B8\u00E0\u00A5\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A5\u0082\u00E0\u00A4\u00B2\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u0096\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00B9\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u0095\u00E0\u00A5\u0082\u00E0\u00A4\u00B2\u00E0\u00A4\u00AE\u00E0\u00A5\u0088\u00E0\u00A4\u0082\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00A4\u00E0\u00A5\u0088\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u009C\u00E0\u00A4\u00BF\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u0087rss+xml\" title=\"-type\" content=\"title\" content=\"at the same time.js\"></script>\n<\" method=\"post\" </span></a></li>vertical-align:t/jquery.min.js\">.click(function( style=\"padding-})();\n</script>\n</span><a href=\"<a href=\"http://); return false;text-decoration: scrolling=\"no\" border-collapse:associated with Bahasa IndonesiaEnglish language<text xml:space=.gif\" border=\"0\"</body>\n</html>\noverflow:hidden;img src=\"http://addEventListenerresponsible for s.js\"></script>\n/favicon.ico\" />operating system\" style=\"width:1target=\"_blank\">State Universitytext-align:left;\ndocument.write(, including the around the world);\r\n</script>\r\n<\" style=\"height:;overflow:hiddenmore informationan internationala member of the one of the firstcan be found in </div>\n\t\t</div>\ndisplay: none;\">\" />\n<link rel=\"\n  (function() {the 15th century.preventDefault(large number of Byzantine Empire.jpg|thumb|left|vast majority ofmajority of the  align=\"center\">University Pressdominated by theSecond World Wardistribution of style=\"position:the rest of the characterized by rel=\"nofollow\">derives from therather than the a combination ofstyle=\"width:100English-speakingcomputer scienceborder=\"0\" alt=\"the existence ofDemocratic Party\" style=\"margin-For this reason,.js\"></script>\n\tsByTagName(s)[0]js\"></script>\r\n<.js\"></script>\r\nlink rel=\"icon\" ' alt='' class='formation of theversions of the </a></div></div>/page>\n  <page>\n<div class=\"contbecame the firstbahasa Indonesiaenglish (simple)\u00CE\u0095\u00CE\u00BB\u00CE\u00BB\u00CE\u00B7\u00CE\u00BD\u00CE\u00B9\u00CE\u00BA\u00CE\u00AC\u00D1\u0085\u00D1\u0080\u00D0\u00B2\u00D0\u00B0\u00D1\u0082\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D0\u00BA\u00D0\u00BE\u00D0\u00BC\u00D0\u00BF\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D0\u00B8\u00D1\u008F\u00D0\u00B2\u00D0\u00BB\u00D1\u008F\u00D0\u00B5\u00D1\u0082\u00D1\u0081\u00D1\u008F\u00D0\u0094\u00D0\u00BE\u00D0\u00B1\u00D0\u00B0\u00D0\u00B2\u00D0\u00B8\u00D1\u0082\u00D1\u008C\u00D1\u0087\u00D0\u00B5\u00D0\u00BB\u00D0\u00BE\u00D0\u00B2\u00D0\u00B5\u00D0\u00BA\u00D0\u00B0\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00B2\u00D0\u00B8\u00D1\u0082\u00D0\u00B8\u00D1\u008F\u00D0\u0098\u00D0\u00BD\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D0\u00BD\u00D0\u00B5\u00D1\u0082\u00D0\u009E\u00D1\u0082\u00D0\u00B2\u00D0\u00B5\u00D1\u0082\u00D0\u00B8\u00D1\u0082\u00D1\u008C\u00D0\u00BD\u00D0\u00B0\u00D0\u00BF\u00D1\u0080\u00D0\u00B8\u00D0\u00BC\u00D0\u00B5\u00D1\u0080\u00D0\u00B8\u00D0\u00BD\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D0\u00BD\u00D0\u00B5\u00D1\u0082\u00D0\u00BA\u00D0\u00BE\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00BE\u00D0\u00B3\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D1\u0086\u00D1\u008B\u00D0\u00BA\u00D0\u00B0\u00D1\u0087\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00B5\u00D1\u0083\u00D1\u0081\u00D0\u00BB\u00D0\u00BE\u00D0\u00B2\u00D0\u00B8\u00D1\u008F\u00D1\u0085\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B1\u00D0\u00BB\u00D0\u00B5\u00D0\u00BC\u00D1\u008B\u00D0\u00BF\u00D0\u00BE\u00D0\u00BB\u00D1\u0083\u00D1\u0087\u00D0\u00B8\u00D1\u0082\u00D1\u008C\u00D1\u008F\u00D0\u00B2\u00D0\u00BB\u00D1\u008F\u00D1\u008E\u00D1\u0082\u00D1\u0081\u00D1\u008F\u00D0\u00BD\u00D0\u00B0\u00D0\u00B8\u00D0\u00B1\u00D0\u00BE\u00D0\u00BB\u00D0\u00B5\u00D0\u00B5\u00D0\u00BA\u00D0\u00BE\u00D0\u00BC\u00D0\u00BF\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00B2\u00D0\u00BD\u00D0\u00B8\u00D0\u00BC\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D0\u00B5\u00D1\u0081\u00D1\u0080\u00D0\u00B5\u00D0\u00B4\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00B0\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0088\u00D8\u00A7\u00D8\u00B6\u00D9\u008A\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00A6\u00D9\u008A\u00D8\u00B3\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0086\u00D8\u00AA\u00D9\u0082\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D9\u0083\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D9\u008A\u00D8\u00A7\u00D8\u00B1\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0083\u00D8\u00AA\u00D9\u0088\u00D8\u00A8\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B3\u00D8\u00B9\u00D9\u0088\u00D8\u00AF\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D8\u00AD\u00D8\u00B5\u00D8\u00A7\u00D8\u00A6\u00D9\u008A\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00B9\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00B5\u00D9\u0088\u00D8\u00AA\u00D9\u008A\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0086\u00D8\u00AA\u00D8\u00B1\u00D9\u0086\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00AA\u00D8\u00B5\u00D8\u00A7\u00D9\u0085\u00D9\u008A\u00D9\u0085\u00D8\u00A7\u00D9\u0084\u00D8\u00A5\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D9\u008A\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B1\u00D8\u00A6\u00D9\u008A\u00D8\u00A7\u00D8\u00AArobots\" content=\"<div id=\"footer\">the United States<img src=\"http://.jpg|right|thumb|.js\"></script>\r\n<location.protocolframeborder=\"0\" s\" />\n<meta name=\"</a></div></div><font-weight:bold;&quot; and &quot;depending on the margin:0;padding:\" rel=\"nofollow\" President of the twentieth centuryevision>\n  </pageInternet Explorera.async = true;\r\ninformation about<div id=\"header\">\" action=\"http://<a href=\"https://<div id=\"content\"</div>\r\n</div>\r\n<derived from the <img src='http://according to the \n</body>\n</html>\nstyle=\"font-size:script language=\"Arial, Helvetica,</a><span class=\"</script><script political partiestd></tr></table><href=\"http://www.interpretation ofrel=\"stylesheet\" document.write('<charset=\"utf-8\">\nbeginning of the revealed that thetelevision series\" rel=\"nofollow\"> target=\"_blank\">claiming that thehttp%3A%2F%2Fwww.manifestations ofPrime Minister ofinfluenced by theclass=\"clearfix\">/div>\r\n</div>\r\n\r\nthree-dimensionalChurch of Englandof North Carolinasquare kilometres.addEventListenerdistinct from thecommonly known asPhonetic Alphabetdeclared that thecontrolled by theBenjamin Franklinrole-playing gamethe University ofin Western Europepersonal computerProject Gutenbergregardless of thehas been proposedtogether with the></li><li class=\"in some countriesmin.js\"></script>of the populationofficial language<img src=\"images/identified by thenatural resourcesclassification ofcan be consideredquantum mechanicsNevertheless, themillion years ago</body>\r\n</html>\r\u00CE\u0095\u00CE\u00BB\u00CE\u00BB\u00CE\u00B7\u00CE\u00BD\u00CE\u00B9\u00CE\u00BA\u00CE\u00AC\ntake advantage ofand, according toattributed to theMicrosoft Windowsthe first centuryunder the controldiv class=\"headershortly after thenotable exceptiontens of thousandsseveral differentaround the world.reaching militaryisolated from theopposition to thethe Old TestamentAfrican Americansinserted into theseparate from themetropolitan areamakes it possibleacknowledged thatarguably the mosttype=\"text/css\">\nthe InternationalAccording to the pe=\"text/css\" />\ncoincide with thetwo-thirds of theDuring this time,during the periodannounced that hethe internationaland more recentlybelieved that theconsciousness andformerly known assurrounded by thefirst appeared inoccasionally usedposition:absolute;\" target=\"_blank\" position:relative;text-align:center;jax/libs/jquery/1.background-color:#type=\"application/anguage\" content=\"<meta http-equiv=\"Privacy Policy</a>e(\"%3Cscript src='\" target=\"_blank\">On the other hand,.jpg|thumb|right|2</div><div class=\"<div style=\"float:nineteenth century</body>\r\n</html>\r\n<img src=\"http://s;text-align:centerfont-weight: bold; According to the difference between\" frameborder=\"0\" \" style=\"position:link href=\"http://html4/loose.dtd\">\nduring this period</td></tr></table>closely related tofor the first time;font-weight:bold;input type=\"text\" <span style=\"font-onreadystatechange\t<div class=\"cleardocument.location. For example, the a wide variety of <!DOCTYPE html>\r\n<&nbsp;&nbsp;&nbsp;\"><a href=\"http://style=\"float:left;concerned with the=http%3A%2F%2Fwww.in popular culturetype=\"text/css\" />it is possible to Harvard Universitytylesheet\" href=\"/the main characterOxford University  name=\"keywords\" cstyle=\"text-align:the United Kingdomfederal government<div style=\"margin depending on the description of the<div class=\"header.min.js\"></script>destruction of theslightly differentin accordance withtelecommunicationsindicates that theshortly thereafterespecially in the European countriesHowever, there aresrc=\"http://staticsuggested that the\" src=\"http://www.a large number of Telecommunications\" rel=\"nofollow\" tHoly Roman Emperoralmost exclusively\" border=\"0\" alt=\"Secretary of Stateculminating in theCIA World Factbookthe most importantanniversary of thestyle=\"background-<li><em><a href=\"/the Atlantic Oceanstrictly speaking,shortly before thedifferent types ofthe Ottoman Empire><img src=\"http://An Introduction toconsequence of thedeparture from theConfederate Statesindigenous peoplesProceedings of theinformation on thetheories have beeninvolvement in thedivided into threeadjacent countriesis responsible fordissolution of thecollaboration withwidely regarded ashis contemporariesfounding member ofDominican Republicgenerally acceptedthe possibility ofare also availableunder constructionrestoration of thethe general publicis almost entirelypasses through thehas been suggestedcomputer and videoGermanic languages according to the different from theshortly afterwardshref=\"https://www.recent developmentBoard of Directors<div class=\"search| <a href=\"http://In particular, theMultiple footnotesor other substancethousands of yearstranslation of the</div>\r\n</div>\r\n\r\n<a href=\"index.phpwas established inmin.js\"></script>\nparticipate in thea strong influencestyle=\"margin-top:represented by thegraduated from theTraditionally, theElement(\"script\");However, since the/div>\n</div>\n<div left; margin-left:protection against0; vertical-align:Unfortunately, thetype=\"image/x-icon/div>\n<div class=\" class=\"clearfix\"><div class=\"footer\t\t</div>\n\t\t</div>\nthe motion picture\u00D0\u0091\u00D1\u008A\u00D0\u00BB\u00D0\u00B3\u00D0\u00B0\u00D1\u0080\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D0\u00B1\u00D1\u008A\u00D0\u00BB\u00D0\u00B3\u00D0\u00B0\u00D1\u0080\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\u00D0\u00A4\u00D0\u00B5\u00D0\u00B4\u00D0\u00B5\u00D1\u0080\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D0\u00B8\u00D0\u00BD\u00D0\u00B5\u00D1\u0081\u00D0\u00BA\u00D0\u00BE\u00D0\u00BB\u00D1\u008C\u00D0\u00BA\u00D0\u00BE\u00D1\u0081\u00D0\u00BE\u00D0\u00BE\u00D0\u00B1\u00D1\u0089\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D0\u00B5\u00D1\u0081\u00D0\u00BE\u00D0\u00BE\u00D0\u00B1\u00D1\u0089\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B3\u00D1\u0080\u00D0\u00B0\u00D0\u00BC\u00D0\u00BC\u00D1\u008B\u00D0\u009E\u00D1\u0082\u00D0\u00BF\u00D1\u0080\u00D0\u00B0\u00D0\u00B2\u00D0\u00B8\u00D1\u0082\u00D1\u008C\u00D0\u00B1\u00D0\u00B5\u00D1\u0081\u00D0\u00BF\u00D0\u00BB\u00D0\u00B0\u00D1\u0082\u00D0\u00BD\u00D0\u00BE\u00D0\u00BC\u00D0\u00B0\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D0\u00B8\u00D0\u00B0\u00D0\u00BB\u00D1\u008B\u00D0\u00BF\u00D0\u00BE\u00D0\u00B7\u00D0\u00B2\u00D0\u00BE\u00D0\u00BB\u00D1\u008F\u00D0\u00B5\u00D1\u0082\u00D0\u00BF\u00D0\u00BE\u00D1\u0081\u00D0\u00BB\u00D0\u00B5\u00D0\u00B4\u00D0\u00BD\u00D0\u00B8\u00D0\u00B5\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00BB\u00D0\u00B8\u00D1\u0087\u00D0\u00BD\u00D1\u008B\u00D1\u0085\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B4\u00D1\u0083\u00D0\u00BA\u00D1\u0086\u00D0\u00B8\u00D0\u00B8\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B3\u00D1\u0080\u00D0\u00B0\u00D0\u00BC\u00D0\u00BC\u00D0\u00B0\u00D0\u00BF\u00D0\u00BE\u00D0\u00BB\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D1\u008E\u00D0\u00BD\u00D0\u00B0\u00D1\u0085\u00D0\u00BE\u00D0\u00B4\u00D0\u00B8\u00D1\u0082\u00D1\u0081\u00D1\u008F\u00D0\u00B8\u00D0\u00B7\u00D0\u00B1\u00D1\u0080\u00D0\u00B0\u00D0\u00BD\u00D0\u00BD\u00D0\u00BE\u00D0\u00B5\u00D0\u00BD\u00D0\u00B0\u00D1\u0081\u00D0\u00B5\u00D0\u00BB\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00B8\u00D0\u00B7\u00D0\u00BC\u00D0\u00B5\u00D0\u00BD\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00BA\u00D0\u00B0\u00D1\u0082\u00D0\u00B5\u00D0\u00B3\u00D0\u00BE\u00D1\u0080\u00D0\u00B8\u00D0\u00B8\u00D0\u0090\u00D0\u00BB\u00D0\u00B5\u00D0\u00BA\u00D1\u0081\u00D0\u00B0\u00D0\u00BD\u00D0\u00B4\u00D1\u0080\u00E0\u00A4\u00A6\u00E0\u00A5\u008D\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A5\u0088\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u0085\u00E0\u00A4\u00B2\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00A6\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u00AF\u00E0\u00A4\u0085\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00B9\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A5\u008D\u00E0\u00A4\u00A6\u00E0\u00A5\u0080\u00E0\u00A4\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00A1\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u00BF\u00E0\u00A4\u00B2\u00E0\u00A5\u008D\u00E0\u00A4\u00B2\u00E0\u00A5\u0080\u00E0\u00A4\u0085\u00E0\u00A4\u00A7\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00B5\u00E0\u00A5\u0080\u00E0\u00A4\u00A1\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u009A\u00E0\u00A4\u00BF\u00E0\u00A4\u009F\u00E0\u00A5\u008D\u00E0\u00A4\u00A0\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u009A\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u009C\u00E0\u00A4\u0082\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B6\u00E0\u00A4\u00A8\u00E0\u00A4\u00A6\u00E0\u00A5\u0081\u00E0\u00A4\u00A8\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u0097\u00E0\u00A4\u0085\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u0091\u00E0\u00A4\u00A8\u00E0\u00A4\u00B2\u00E0\u00A4\u00BE\u00E0\u00A4\u0087\u00E0\u00A4\u00A8\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u009F\u00E0\u00A5\u0080\u00E0\u00A4\u00B6\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00B2\u00E0\u00A5\u008B\u00E0\u00A4\u0095\u00E0\u00A4\u00B8\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u00AB\u00E0\u00A4\u00BC\u00E0\u00A5\u008D\u00E0\u00A4\u00B2\u00E0\u00A5\u0088\u00E0\u00A4\u00B6\u00E0\u00A4\u00B6\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B2\u00E0\u00A5\u0087\u00E0\u00A4\u00AF\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u00A6\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00BF\u00E0\u00A4\u0089\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00AA\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A5\u008D\u00E0\u00A4\u00B9\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u009A\u00E0\u00A4\u00BF\u00E0\u00A4\u009F\u00E0\u00A5\u008D\u00E0\u00A4\u00A0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u009C\u00E0\u00A5\u008B\u00E0\u00A4\u00A1\u00E0\u00A4\u00BC\u00E0\u00A5\u0087\u00E0\u00A4\u0082\u00E0\u00A4\u0085\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00A6\u00E0\u00A4\u00B6\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00A3\u00E0\u00A5\u0080\u00E0\u00A4\u00B6\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B7\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00B9\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A4\u00BF\u00E0\u00A4\u00A3\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u00AC\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u00A1\u00E0\u00A4\u00AC\u00E0\u00A4\u009A\u00E0\u00A5\u008D\u00E0\u00A4\u009A\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u0089\u00E0\u00A4\u00AA\u00E0\u00A4\u00B2\u00E0\u00A4\u00AC\u00E0\u00A5\u008D\u00E0\u00A4\u00A7\u00E0\u00A4\u00AE\u00E0\u00A4\u0082\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u0095\u00E0\u00A4\u0089\u00E0\u00A4\u00AE\u00E0\u00A5\u008D\u00E0\u00A4\u00AE\u00E0\u00A5\u0080\u00E0\u00A4\u00A6\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00A7\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00AE\u00E0\u00A4\u00B8\u00E0\u00A4\u00B9\u00E0\u00A4\u00BE\u00E0\u00A4\u00AF\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00B6\u00E0\u00A4\u00AC\u00E0\u00A5\u008D\u00E0\u00A4\u00A6\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00AE\u00E0\u00A5\u0080\u00E0\u00A4\u00A1\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0086\u00E0\u00A4\u0088\u00E0\u00A4\u00AA\u00E0\u00A5\u0080\u00E0\u00A4\u008F\u00E0\u00A4\u00B2\u00E0\u00A4\u00AE\u00E0\u00A5\u008B\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u0087\u00E0\u00A4\u00B2\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u0096\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0086\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00A8\u00E0\u00A4\u0085\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u00AC\u00E0\u00A4\u0082\u00E0\u00A4\u00A7\u00E0\u00A4\u00AC\u00E0\u00A4\u00BE\u00E0\u00A4\u009C\u00E0\u00A4\u00BC\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A4\u00B5\u00E0\u00A5\u0080\u00E0\u00A4\u00A8\u00E0\u00A4\u00A4\u00E0\u00A4\u00AE\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00AE\u00E0\u00A5\u0081\u00E0\u00A4\u0096\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00B6\u00E0\u00A5\u008D\u00E0\u00A4\u00A8\u00E0\u00A4\u00AA\u00E0\u00A4\u00B0\u00E0\u00A4\u00BF\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A5\u0081\u00E0\u00A4\u0095\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00B8\u00E0\u00A4\u00AE\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00A8\u00E0\u00A4\u0086\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u009C\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00B8\u00E0\u00A5\u008B\u00E0\u00A4\u00AE\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D8\u00B1\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D9\u0086\u00D8\u00AA\u00D8\u00AF\u00D9\u008A\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0083\u00D9\u0085\u00D8\u00A8\u00D9\u008A\u00D9\u0088\u00D8\u00AA\u00D8\u00B1\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B4\u00D8\u00A7\u00D9\u0087\u00D8\u00AF\u00D8\u00A7\u00D8\u00AA\u00D8\u00B9\u00D8\u00AF\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00B2\u00D9\u0088\u00D8\u00A7\u00D8\u00B1\u00D8\u00B9\u00D8\u00AF\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00B1\u00D8\u00AF\u00D9\u0088\u00D8\u00AF\u00D8\u00A7\u00D9\u0084\u00D8\u00A5\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D9\u0081\u00D9\u0088\u00D8\u00AA\u00D9\u0088\u00D8\u00B4\u00D9\u0088\u00D8\u00A8\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B3\u00D8\u00A7\u00D8\u00A8\u00D9\u0082\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B9\u00D9\u0084\u00D9\u0088\u00D9\u0085\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00B3\u00D9\u0084\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00AC\u00D8\u00B1\u00D8\u00A7\u00D9\u0081\u00D9\u008A\u00D9\u0083\u00D8\u00B3\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085\u00D9\u008A\u00D8\u00A9\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00AA\u00D8\u00B5\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00AAkeywords\" content=\"w3.org/1999/xhtml\"><a target=\"_blank\" text/html; charset=\" target=\"_blank\"><table cellpadding=\"autocomplete=\"off\" text-align: center;to last version by background-color: #\" href=\"http://www./div></div><div id=<a href=\"#\" class=\"\"><img src=\"http://cript\" src=\"http://\n<script language=\"//EN\" \"http://www.wencodeURIComponent(\" href=\"javascript:<div class=\"contentdocument.write('<scposition: absolute;script src=\"http:// style=\"margin-top:.min.js\"></script>\n</div>\n<div class=\"w3.org/1999/xhtml\" \n\r\n</body>\r\n</html>distinction between/\" target=\"_blank\"><link href=\"http://encoding=\"utf-8\"?>\nw.addEventListener?action=\"http://www.icon\" href=\"http:// style=\"background:type=\"text/css\" />\nmeta property=\"og:t<input type=\"text\"  style=\"text-align:the development of tylesheet\" type=\"tehtml; charset=utf-8is considered to betable width=\"100%\" In addition to the contributed to the differences betweendevelopment of the It is important to </script>\n\n<script  style=\"font-size:1></span><span id=gbLibrary of Congress<img src=\"http://imEnglish translationAcademy of Sciencesdiv style=\"display:construction of the.getElementById(id)in conjunction withElement('script'); <meta property=\"og:\u00D0\u0091\u00D1\u008A\u00D0\u00BB\u00D0\u00B3\u00D0\u00B0\u00D1\u0080\u00D1\u0081\u00D0\u00BA\u00D0\u00B8\n type=\"text\" name=\">Privacy Policy</a>administered by theenableSingleRequeststyle=&quot;margin:</div></div></div><><img src=\"http://i style=&quot;float:referred to as the total population ofin Washington, D.C. style=\"background-among other things,organization of theparticipated in thethe introduction ofidentified with thefictional character Oxford University misunderstanding ofThere are, however,stylesheet\" href=\"/Columbia Universityexpanded to includeusually referred toindicating that thehave suggested thataffiliated with thecorrelation betweennumber of different></td></tr></table>Republic of Ireland\n</script>\n<script under the influencecontribution to theOfficial website ofheadquarters of thecentered around theimplications of thehave been developedFederal Republic ofbecame increasinglycontinuation of theNote, however, thatsimilar to that of capabilities of theaccordance with theparticipants in thefurther developmentunder the directionis often consideredhis younger brother</td></tr></table><a http-equiv=\"X-UA-physical propertiesof British Columbiahas been criticized(with the exceptionquestions about thepassing through the0\" cellpadding=\"0\" thousands of peopleredirects here. Forhave children under%3E%3C/script%3E\"));<a href=\"http://www.<li><a href=\"http://site_name\" content=\"text-decoration:nonestyle=\"display: none<meta http-equiv=\"X-new Date().getTime() type=\"image/x-icon\"</span><span class=\"language=\"javascriptwindow.location.href<a href=\"javascript:-->\r\n<script type=\"t<a href='http://www.hortcut icon\" href=\"</div>\r\n<div class=\"<script src=\"http://\" rel=\"stylesheet\" t</div>\n<script type=/a> <a href=\"http:// allowTransparency=\"X-UA-Compatible\" conrelationship between\n</script>\r\n<script </a></li></ul></div>associated with the programming language</a><a href=\"http://</a></li><li class=\"form action=\"http://<div style=\"display:type=\"text\" name=\"q\"<table width=\"100%\" background-position:\" border=\"0\" width=\"rel=\"shortcut icon\" h6><ul><li><a href=\"  <meta http-equiv=\"css\" media=\"screen\" responsible for the \" type=\"application/\" style=\"background-html; charset=utf-8\" allowtransparency=\"stylesheet\" type=\"te\r\n<meta http-equiv=\"></span><span class=\"0\" cellspacing=\"0\">;\n</script>\n<script sometimes called thedoes not necessarilyFor more informationat the beginning of <!DOCTYPE html><htmlparticularly in the type=\"hidden\" name=\"javascript:void(0);\"effectiveness of the autocomplete=\"off\" generally considered><input type=\"text\" \"></script>\r\n<scriptthroughout the worldcommon misconceptionassociation with the</div>\n</div>\n<div cduring his lifetime,corresponding to thetype=\"image/x-icon\" an increasing numberdiplomatic relationsare often consideredmeta charset=\"utf-8\" <input type=\"text\" examples include the\"><img src=\"http://iparticipation in thethe establishment of\n</div>\n<div class=\"&amp;nbsp;&amp;nbsp;to determine whetherquite different frommarked the beginningdistance between thecontributions to theconflict between thewidely considered towas one of the firstwith varying degreeshave speculated that(document.getElementparticipating in theoriginally developedeta charset=\"utf-8\"> type=\"text/css\" />\ninterchangeably withmore closely relatedsocial and politicalthat would otherwiseperpendicular to thestyle type=\"text/csstype=\"submit\" name=\"families residing indeveloping countriescomputer programmingeconomic developmentdetermination of thefor more informationon several occasionsportugu\u00C3\u00AAs (Europeu)\u00D0\u00A3\u00D0\u00BA\u00D1\u0080\u00D0\u00B0\u00D1\u0097\u00D0\u00BD\u00D1\u0081\u00D1\u008C\u00D0\u00BA\u00D0\u00B0\u00D1\u0083\u00D0\u00BA\u00D1\u0080\u00D0\u00B0\u00D1\u0097\u00D0\u00BD\u00D1\u0081\u00D1\u008C\u00D0\u00BA\u00D0\u00B0\u00D0\u00A0\u00D0\u00BE\u00D1\u0081\u00D1\u0081\u00D0\u00B8\u00D0\u00B9\u00D1\u0081\u00D0\u00BA\u00D0\u00BE\u00D0\u00B9\u00D0\u00BC\u00D0\u00B0\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D0\u00B8\u00D0\u00B0\u00D0\u00BB\u00D0\u00BE\u00D0\u00B2\u00D0\u00B8\u00D0\u00BD\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D0\u00BC\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D0\u00B8\u00D1\u0083\u00D0\u00BF\u00D1\u0080\u00D0\u00B0\u00D0\u00B2\u00D0\u00BB\u00D0\u00B5\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D0\u00BD\u00D0\u00B5\u00D0\u00BE\u00D0\u00B1\u00D1\u0085\u00D0\u00BE\u00D0\u00B4\u00D0\u00B8\u00D0\u00BC\u00D0\u00BE\u00D0\u00B8\u00D0\u00BD\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D0\u00BC\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D1\u008F\u00D0\u0098\u00D0\u00BD\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D0\u00BC\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D1\u008F\u00D0\u00A0\u00D0\u00B5\u00D1\u0081\u00D0\u00BF\u00D1\u0083\u00D0\u00B1\u00D0\u00BB\u00D0\u00B8\u00D0\u00BA\u00D0\u00B8\u00D0\u00BA\u00D0\u00BE\u00D0\u00BB\u00D0\u00B8\u00D1\u0087\u00D0\u00B5\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00BE\u00D0\u00B8\u00D0\u00BD\u00D1\u0084\u00D0\u00BE\u00D1\u0080\u00D0\u00BC\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D1\u008E\u00D1\u0082\u00D0\u00B5\u00D1\u0080\u00D1\u0080\u00D0\u00B8\u00D1\u0082\u00D0\u00BE\u00D1\u0080\u00D0\u00B8\u00D0\u00B8\u00D0\u00B4\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B0\u00D1\u0082\u00D0\u00BE\u00D1\u0087\u00D0\u00BD\u00D0\u00BE\u00D8\u00A7\u00D9\u0084\u00D9\u0085\u00D8\u00AA\u00D9\u0088\u00D8\u00A7\u00D8\u00AC\u00D8\u00AF\u00D9\u0088\u00D9\u0086\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D8\u00B4\u00D8\u00AA\u00D8\u00B1\u00D8\u00A7\u00D9\u0083\u00D8\u00A7\u00D8\u00AA\u00D8\u00A7\u00D9\u0084\u00D8\u00A7\u00D9\u0082\u00D8\u00AA\u00D8\u00B1\u00D8\u00A7\u00D8\u00AD\u00D8\u00A7\u00D8\u00AAhtml; charset=UTF-8\" setTimeout(function()display:inline-block;<input type=\"submit\" type = 'text/javascri<img src=\"http://www.\" \"http://www.w3.org/shortcut icon\" href=\"\" autocomplete=\"off\" </a></div><div class=</a></li>\n<li class=\"css\" type=\"text/css\" <form action=\"http://xt/css\" href=\"http://link rel=\"alternate\" \r\n<script type=\"text/ onclick=\"javascript:(new Date).getTime()}height=\"1\" width=\"1\" People's Republic of  <a href=\"http://www.text-decoration:underthe beginning of the </div>\n</div>\n</div>\nestablishment of the </div></div></div></d#viewport{min-height:\n<script src=\"http://option><option value=often referred to as /option>\n<option valu<!DOCTYPE html>\n<!--[International Airport>\n<a href=\"http://www</a><a href=\"http://w\u00E0\u00B8\u00A0\u00E0\u00B8\u00B2\u00E0\u00B8\u00A9\u00E0\u00B8\u00B2\u00E0\u00B9\u0084\u00E0\u00B8\u0097\u00E0\u00B8\u00A2\u00E1\u0083\u00A5\u00E1\u0083\u0090\u00E1\u0083\u00A0\u00E1\u0083\u0097\u00E1\u0083\u00A3\u00E1\u0083\u009A\u00E1\u0083\u0098\u00E6\u00AD\u00A3\u00E9\u00AB\u0094\u00E4\u00B8\u00AD\u00E6\u0096\u0087 (\u00E7\u00B9\u0081\u00E9\u00AB\u0094)\u00E0\u00A4\u00A8\u00E0\u00A4\u00BF\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00A6\u00E0\u00A5\u0087\u00E0\u00A4\u00B6\u00E0\u00A4\u00A1\u00E0\u00A4\u00BE\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A4\u00B2\u00E0\u00A5\u008B\u00E0\u00A4\u00A1\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B7\u00E0\u00A5\u0087\u00E0\u00A4\u00A4\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u009C\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u00AC\u00E0\u00A4\u0082\u00E0\u00A4\u00A7\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A5\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A4\u00BE\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00B5\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00B8\u00E0\u00A4\u0082\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u0095\u00E0\u00A4\u00B0\u00E0\u00A4\u00A3\u00E0\u00A4\u00B8\u00E0\u00A4\u00BE\u00E0\u00A4\u00AE\u00E0\u00A4\u0097\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0080\u00E0\u00A4\u009A\u00E0\u00A4\u00BF\u00E0\u00A4\u009F\u00E0\u00A5\u008D\u00E0\u00A4\u00A0\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u009C\u00E0\u00A5\u008D\u00E0\u00A4\u009E\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u0085\u00E0\u00A4\u00AE\u00E0\u00A5\u0087\u00E0\u00A4\u00B0\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00AD\u00E0\u00A4\u00BF\u00E0\u00A4\u00A8\u00E0\u00A5\u008D\u00E0\u00A4\u00A8\u00E0\u00A4\u0097\u00E0\u00A4\u00BE\u00E0\u00A4\u00A1\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00BE\u00E0\u00A4\u0081\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00AF\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u0095\u00E0\u00A4\u00BF\u00E0\u00A4\u00B8\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B7\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00B9\u00E0\u00A5\u0081\u00E0\u00A4\u0081\u00E0\u00A4\u009A\u00E0\u00A4\u00A4\u00E0\u00A5\u0080\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00AC\u00E0\u00A4\u0082\u00E0\u00A4\u00A7\u00E0\u00A4\u00A8\u00E0\u00A4\u009F\u00E0\u00A4\u00BF\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00AA\u00E0\u00A4\u00A3\u00E0\u00A5\u0080\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A5\u0087\u00E0\u00A4\u009F\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u0082\u00E0\u00A4\u00AD\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00B0\u00E0\u00A4\u00AB\u00E0\u00A4\u00BC\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A4\u00A8\u00E0\u00A4\u00BF\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00AE\u00E0\u00A4\u00BE\u00E0\u00A4\u00A3\u00E0\u00A4\u00B2\u00E0\u00A4\u00BF\u00E0\u00A4\u00AE\u00E0\u00A4\u00BF\u00E0\u00A4\u009F\u00E0\u00A5\u0087\u00E0\u00A4\u00A1description\" content=\"document.location.prot.getElementsByTagName(<!DOCTYPE html>\n<html <meta charset=\"utf-8\">:url\" content=\"http://.css\" rel=\"stylesheet\"style type=\"text/css\">type=\"text/css\" href=\"w3.org/1999/xhtml\" xmltype=\"text/javascript\" method=\"get\" action=\"link rel=\"stylesheet\"  = document.getElementtype=\"image/x-icon\" />cellpadding=\"0\" cellsp.css\" type=\"text/css\" </a></li><li><a href=\"\" width=\"1\" height=\"1\"\"><a href=\"http://www.style=\"display:none;\">alternate\" type=\"appli-//W3C//DTD XHTML 1.0 ellspacing=\"0\" cellpad type=\"hidden\" value=\"/a>&nbsp;<span role=\"s\n<input type=\"hidden\" language=\"JavaScript\"  document.getElementsBg=\"0\" cellspacing=\"0\" ype=\"text/css\" media=\"type='text/javascript'with the exception of ype=\"text/css\" rel=\"st height=\"1\" width=\"1\" ='+encodeURIComponent(<link rel=\"alternate\" \nbody, tr, input, textmeta name=\"robots\" conmethod=\"post\" action=\">\n<a href=\"http://www.css\" rel=\"stylesheet\" </div></div><div classlanguage=\"javascript\">aria-hidden=\"true\">\u00C2\u00B7<ript\" type=\"text/javasl=0;})();\n(function(){background-image: url(/a></li><li><a href=\"h\t\t<li><a href=\"http://ator\" aria-hidden=\"tru> <a href=\"http://www.language=\"javascript\" /option>\n<option value/div></div><div class=rator\" aria-hidden=\"tre=(new Date).getTime()portugu\u00C3\u00AAs (do Brasil)\u00D0\u00BE\u00D1\u0080\u00D0\u00B3\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D0\u00B7\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D0\u00B8\u00D0\u00B2\u00D0\u00BE\u00D0\u00B7\u00D0\u00BC\u00D0\u00BE\u00D0\u00B6\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D1\u008C\u00D0\u00BE\u00D0\u00B1\u00D1\u0080\u00D0\u00B0\u00D0\u00B7\u00D0\u00BE\u00D0\u00B2\u00D0\u00B0\u00D0\u00BD\u00D0\u00B8\u00D1\u008F\u00D1\u0080\u00D0\u00B5\u00D0\u00B3\u00D0\u00B8\u00D1\u0081\u00D1\u0082\u00D1\u0080\u00D0\u00B0\u00D1\u0086\u00D0\u00B8\u00D0\u00B8\u00D0\u00B2\u00D0\u00BE\u00D0\u00B7\u00D0\u00BC\u00D0\u00BE\u00D0\u00B6\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D0\u00BE\u00D0\u00B1\u00D1\u008F\u00D0\u00B7\u00D0\u00B0\u00D1\u0082\u00D0\u00B5\u00D0\u00BB\u00D1\u008C\u00D0\u00BD\u00D0\u00B0<!DOCTYPE html PUBLIC \"nt-Type\" content=\"text/<meta http-equiv=\"Conteransitional//EN\" \"http:<html xmlns=\"http://www-//W3C//DTD XHTML 1.0 TDTD/xhtml1-transitional//www.w3.org/TR/xhtml1/pe = 'text/javascript';<meta name=\"descriptionparentNode.insertBefore<input type=\"hidden\" najs\" type=\"text/javascri(document).ready(functiscript type=\"text/javasimage\" content=\"http://UA-Compatible\" content=tml; charset=utf-8\" />\nlink rel=\"shortcut icon<link rel=\"stylesheet\" </script>\n<script type== document.createElemen<a target=\"_blank\" href= document.getElementsBinput type=\"text\" name=a.type = 'text/javascrinput type=\"hidden\" namehtml; charset=utf-8\" />dtd\">\n<html xmlns=\"http-//W3C//DTD HTML 4.01 TentsByTagName('script')input type=\"hidden\" nam<script type=\"text/javas\" style=\"display:none;\">document.getElementById(=document.createElement(' type='text/javascript'input type=\"text\" name=\"d.getElementsByTagName(snical\" href=\"http://www.C//DTD HTML 4.01 Transit<style type=\"text/css\">\n\n<style type=\"text/css\">ional.dtd\">\n<html xmlns=http-equiv=\"Content-Typeding=\"0\" cellspacing=\"0\"html; charset=utf-8\" />\n style=\"display:none;\"><<li><a href=\"http://www. type='text/javascript'>\u00D0\u00B4\u00D0\u00B5\u00D1\u008F\u00D1\u0082\u00D0\u00B5\u00D0\u00BB\u00D1\u008C\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00D1\u0081\u00D0\u00BE\u00D0\u00BE\u00D1\u0082\u00D0\u00B2\u00D0\u00B5\u00D1\u0082\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00B8\u00D0\u00B8\u00D0\u00BF\u00D1\u0080\u00D0\u00BE\u00D0\u00B8\u00D0\u00B7\u00D0\u00B2\u00D0\u00BE\u00D0\u00B4\u00D1\u0081\u00D1\u0082\u00D0\u00B2\u00D0\u00B0\u00D0\u00B1\u00D0\u00B5\u00D0\u00B7\u00D0\u00BE\u00D0\u00BF\u00D0\u00B0\u00D1\u0081\u00D0\u00BD\u00D0\u00BE\u00D1\u0081\u00D1\u0082\u00D0\u00B8\u00E0\u00A4\u00AA\u00E0\u00A5\u0081\u00E0\u00A4\u00B8\u00E0\u00A5\u008D\u00E0\u00A4\u00A4\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A5\u0087\u00E0\u00A4\u00B8\u00E0\u00A4\u0089\u00E0\u00A4\u00A8\u00E0\u00A5\u008D\u00E0\u00A4\u00B9\u00E0\u00A5\u008B\u00E0\u00A4\u0082\u00E0\u00A4\u00A8\u00E0\u00A5\u0087\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u00A7\u00E0\u00A4\u00BE\u00E0\u00A4\u00A8\u00E0\u00A4\u00B8\u00E0\u00A4\u00AD\u00E0\u00A4\u00BE\u00E0\u00A4\u00AB\u00E0\u00A4\u00BF\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B8\u00E0\u00A4\u00BF\u00E0\u00A4\u0082\u00E0\u00A4\u0097\u00E0\u00A4\u00B8\u00E0\u00A5\u0081\u00E0\u00A4\u00B0\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B7\u00E0\u00A4\u00BF\u00E0\u00A4\u00A4\u00E0\u00A4\u0095\u00E0\u00A5\u0089\u00E0\u00A4\u00AA\u00E0\u00A5\u0080\u00E0\u00A4\u00B0\u00E0\u00A4\u00BE\u00E0\u00A4\u0087\u00E0\u00A4\u009F\u00E0\u00A4\u00B5\u00E0\u00A4\u00BF\u00E0\u00A4\u009C\u00E0\u00A5\u008D\u00E0\u00A4\u009E\u00E0\u00A4\u00BE\u00E0\u00A4\u00AA\u00E0\u00A4\u00A8\u00E0\u00A4\u0095\u00E0\u00A4\u00BE\u00E0\u00A4\u00B0\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00B5\u00E0\u00A4\u00BE\u00E0\u00A4\u0088\u00E0\u00A4\u00B8\u00E0\u00A4\u0095\u00E0\u00A5\u008D\u00E0\u00A4\u00B0\u00E0\u00A4\u00BF\u00E0\u00A4\u00AF\u00E0\u00A4\u00A4\u00E0\u00A4\u00BE";
+			}
+		}
+
+		private class DataHolder
+		{
+			internal static readonly byte[] Data;
+
+			static DataHolder()
+			{
+				Data = new byte[122784];
+				string[] chunks = new string[] { Org.Brotli.Dec.Dictionary.DataHolder0.GetData(), Org.Brotli.Dec.Dictionary.DataHolder1.GetData(), Org.Brotli.Dec.Dictionary.DataHolder2.GetData() };
+				int sum = 0;
+				foreach (string chunk in chunks)
+				{
+					sum += chunk.Length;
+				}
+				if (sum != Data.Length)
+				{
+					throw new System.Exception("Corrupted brotli dictionary");
+				}
+				sum = 0;
+				foreach (string chunk in chunks)
+				{
+					for (int j = 0; j < chunk.Length; ++j)
+					{
+						Data[sum++] = unchecked((byte)chunk[j]);
+					}
+				}
+			}
+		}
+
+		internal static byte[] GetData()
+		{
+			return Org.Brotli.Dec.Dictionary.DataHolder.Data;
+		}
+
+		internal static readonly int[] OffsetsByLength = new int[] { 0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016 };
+
+		internal static readonly int[] SizeBitsByLength = new int[] { 0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5 };
+
+		internal const int MinWordLength = 4;
+
+		internal const int MaxWordLength = 24;
+
+		internal const int MaxTransformedWordLength = 5 + MaxWordLength + 8;
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DictionaryTest.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DictionaryTest.cs
new file mode 100644
index 0000000000..84c35ab77d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/DictionaryTest.cs
@@ -0,0 +1,36 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// Tests for
+	/// <see cref="Dictionary"/>
+	/// .
+	/// </summary>
+	public class DictionaryTest
+	{
+		private static long Crc64(byte[] data)
+		{
+			long crc = -1;
+			for (int i = 0; i < data.Length; ++i)
+			{
+				long c = (crc ^ (long)(data[i] & unchecked((int)(0xFF)))) & unchecked((int)(0xFF));
+				for (int k = 0; k < 8; k++)
+				{
+					c = ((long)(((ulong)c) >> 1)) ^ (-(c & 1L) & -3932672073523589310L);
+				}
+				crc = c ^ ((long)(((ulong)crc) >> 8));
+			}
+			return ~crc;
+		}
+
+		[NUnit.Framework.Test]
+		public virtual void TestGetData()
+		{
+			NUnit.Framework.Assert.AreEqual(37084801881332636L, Crc64(Org.Brotli.Dec.Dictionary.GetData()));
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Huffman.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Huffman.cs
new file mode 100644
index 0000000000..4f10c7152a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Huffman.cs
@@ -0,0 +1,149 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Utilities for building Huffman decoding tables.</summary>
+	internal sealed class Huffman
+	{
+		/// <summary>
+		/// Maximum possible Huffman table size for an alphabet size of 704, max code length 15 and root
+		/// table bits 8.
+		/// </summary>
+		internal const int HuffmanMaxTableSize = 1080;
+
+		private const int MaxLength = 15;
+
+		/// <summary>Returns reverse(reverse(key, len) + 1, len).</summary>
+		/// <remarks>
+		/// Returns reverse(reverse(key, len) + 1, len).
+		/// <p> reverse(key, len) is the bit-wise reversal of the len least significant bits of key.
+		/// </remarks>
+		private static int GetNextKey(int key, int len)
+		{
+			int step = 1 << (len - 1);
+			while ((key & step) != 0)
+			{
+				step >>= 1;
+			}
+			return (key & (step - 1)) + step;
+		}
+
+		/// <summary>
+		/// Stores
+		/// <paramref name="item"/>
+		/// in
+		/// <c>table[0], table[step], table[2 * step] .., table[end]</c>
+		/// .
+		/// <p> Assumes that end is an integer multiple of step.
+		/// </summary>
+		private static void ReplicateValue(int[] table, int offset, int step, int end, int item)
+		{
+			do
+			{
+				end -= step;
+				table[offset + end] = item;
+			}
+			while (end > 0);
+		}
+
+		/// <param name="count">histogram of bit lengths for the remaining symbols,</param>
+		/// <param name="len">code length of the next processed symbol.</param>
+		/// <returns>table width of the next 2nd level table.</returns>
+		private static int NextTableBitSize(int[] count, int len, int rootBits)
+		{
+			int left = 1 << (len - rootBits);
+			while (len < MaxLength)
+			{
+				left -= count[len];
+				if (left <= 0)
+				{
+					break;
+				}
+				len++;
+				left <<= 1;
+			}
+			return len - rootBits;
+		}
+
+		/// <summary>Builds Huffman lookup table assuming code lengths are in symbol order.</summary>
+		internal static void BuildHuffmanTable(int[] rootTable, int tableOffset, int rootBits, int[] codeLengths, int codeLengthsSize)
+		{
+			int key;
+			// Reversed prefix code.
+			int[] sorted = new int[codeLengthsSize];
+			// Symbols sorted by code length.
+			// TODO: fill with zeroes?
+			int[] count = new int[MaxLength + 1];
+			// Number of codes of each length.
+			int[] offset = new int[MaxLength + 1];
+			// Offsets in sorted table for each length.
+			int symbol;
+			// Build histogram of code lengths.
+			for (symbol = 0; symbol < codeLengthsSize; symbol++)
+			{
+				count[codeLengths[symbol]]++;
+			}
+			// Generate offsets into sorted symbol table by code length.
+			offset[1] = 0;
+			for (int len = 1; len < MaxLength; len++)
+			{
+				offset[len + 1] = offset[len] + count[len];
+			}
+			// Sort symbols by length, by symbol order within each length.
+			for (symbol = 0; symbol < codeLengthsSize; symbol++)
+			{
+				if (codeLengths[symbol] != 0)
+				{
+					sorted[offset[codeLengths[symbol]]++] = symbol;
+				}
+			}
+			int tableBits = rootBits;
+			int tableSize = 1 << tableBits;
+			int totalSize = tableSize;
+			// Special case code with only one value.
+			if (offset[MaxLength] == 1)
+			{
+				for (key = 0; key < totalSize; key++)
+				{
+					rootTable[tableOffset + key] = sorted[0];
+				}
+				return;
+			}
+			// Fill in root table.
+			key = 0;
+			symbol = 0;
+			for (int len = 1, step = 2; len <= rootBits; len++, step <<= 1)
+			{
+				for (; count[len] > 0; count[len]--)
+				{
+					ReplicateValue(rootTable, tableOffset + key, step, tableSize, len << 16 | sorted[symbol++]);
+					key = GetNextKey(key, len);
+				}
+			}
+			// Fill in 2nd level tables and add pointers to root table.
+			int mask = totalSize - 1;
+			int low = -1;
+			int currentOffset = tableOffset;
+			for (int len = rootBits + 1, step = 2; len <= MaxLength; len++, step <<= 1)
+			{
+				for (; count[len] > 0; count[len]--)
+				{
+					if ((key & mask) != low)
+					{
+						currentOffset += tableSize;
+						tableBits = NextTableBitSize(count, len, rootBits);
+						tableSize = 1 << tableBits;
+						totalSize += tableSize;
+						low = key & mask;
+						rootTable[tableOffset + low] = (tableBits + rootBits) << 16 | (currentOffset - tableOffset - low);
+					}
+					ReplicateValue(rootTable, currentOffset + (key >> rootBits), step, tableSize, (len - rootBits) << 16 | sorted[symbol++]);
+					key = GetNextKey(key, len);
+				}
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/HuffmanTreeGroup.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/HuffmanTreeGroup.cs
new file mode 100644
index 0000000000..956ad26400
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/HuffmanTreeGroup.cs
@@ -0,0 +1,50 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Contains a collection of huffman trees with the same alphabet size.</summary>
+	internal sealed class HuffmanTreeGroup
+	{
+		/// <summary>The maximal alphabet size in this group.</summary>
+		private int alphabetSize;
+
+		/// <summary>Storage for Huffman lookup tables.</summary>
+		internal int[] codes;
+
+		/// <summary>
+		/// Offsets of distinct lookup tables in
+		/// <see cref="codes"/>
+		/// storage.
+		/// </summary>
+		internal int[] trees;
+
+		/// <summary>Initializes the Huffman tree group.</summary>
+		/// <param name="group">POJO to be initialised</param>
+		/// <param name="alphabetSize">the maximal alphabet size in this group</param>
+		/// <param name="n">number of Huffman codes</param>
+		internal static void Init(Org.Brotli.Dec.HuffmanTreeGroup group, int alphabetSize, int n)
+		{
+			group.alphabetSize = alphabetSize;
+			group.codes = new int[n * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize];
+			group.trees = new int[n];
+		}
+
+		/// <summary>Decodes Huffman trees from input stream and constructs lookup tables.</summary>
+		/// <param name="group">target POJO</param>
+		/// <param name="br">data source</param>
+		internal static void Decode(Org.Brotli.Dec.HuffmanTreeGroup group, Org.Brotli.Dec.BitReader br)
+		{
+			int next = 0;
+			int n = group.trees.Length;
+			for (int i = 0; i < n; i++)
+			{
+				group.trees[i] = next;
+				Org.Brotli.Dec.Decode.ReadHuffmanCode(group.alphabetSize, group.codes, next, br);
+				next += Org.Brotli.Dec.Huffman.HuffmanMaxTableSize;
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/IntReader.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/IntReader.cs
new file mode 100644
index 0000000000..4363e0d308
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/IntReader.cs
@@ -0,0 +1,36 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Byte-to-int conversion magic.</summary>
+	internal sealed class IntReader
+	{
+		private byte[] byteBuffer;
+
+		private int[] intBuffer;
+
+		internal static void Init(Org.Brotli.Dec.IntReader ir, byte[] byteBuffer, int[] intBuffer)
+		{
+			ir.byteBuffer = byteBuffer;
+			ir.intBuffer = intBuffer;
+		}
+
+		/// <summary>Translates bytes to ints.</summary>
+		/// <remarks>
+		/// Translates bytes to ints.
+		/// NB: intLen == 4 * byteSize!
+		/// NB: intLen should be less or equal to intBuffer length.
+		/// </remarks>
+		internal static void Convert(Org.Brotli.Dec.IntReader ir, int intLen)
+		{
+			for (int i = 0; i < intLen; ++i)
+			{
+				ir.intBuffer[i] = ((ir.byteBuffer[i * 4] & unchecked((int)(0xFF)))) | ((ir.byteBuffer[(i * 4) + 1] & unchecked((int)(0xFF))) << 8) | ((ir.byteBuffer[(i * 4) + 2] & unchecked((int)(0xFF))) << 16) | ((ir.byteBuffer[(i * 4) + 3] & unchecked((int
+					)(0xFF))) << 24);
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Prefix.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Prefix.cs
new file mode 100644
index 0000000000..abb6ccb654
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Prefix.cs
@@ -0,0 +1,33 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Lookup tables to map prefix codes to value ranges.</summary>
+	/// <remarks>
+	/// Lookup tables to map prefix codes to value ranges.
+	/// <p> This is used during decoding of the block lengths, literal insertion lengths and copy
+	/// lengths.
+	/// <p> Range represents values: [offset, offset + 2 ^ n_bits)
+	/// </remarks>
+	internal sealed class Prefix
+	{
+		internal static readonly int[] BlockLengthOffset = new int[] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
+
+		internal static readonly int[] BlockLengthNBits = new int[] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
+
+		internal static readonly int[] InsertLengthOffset = new int[] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
+
+		internal static readonly int[] InsertLengthNBits = new int[] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
+
+		internal static readonly int[] CopyLengthOffset = new int[] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
+
+		internal static readonly int[] CopyLengthNBits = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
+
+		internal static readonly int[] InsertRangeLut = new int[] { 0, 0, 8, 8, 0, 16, 8, 16, 16 };
+
+		internal static readonly int[] CopyRangeLut = new int[] { 0, 8, 0, 8, 16, 0, 16, 8, 16 };
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/RunningState.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/RunningState.cs
new file mode 100644
index 0000000000..81a374354c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/RunningState.cs
@@ -0,0 +1,37 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Enumeration of decoding state-machine.</summary>
+	internal sealed class RunningState
+	{
+		internal const int Uninitialized = 0;
+
+		internal const int BlockStart = 1;
+
+		internal const int CompressedBlockStart = 2;
+
+		internal const int MainLoop = 3;
+
+		internal const int ReadMetadata = 4;
+
+		internal const int CopyUncompressed = 5;
+
+		internal const int InsertLoop = 6;
+
+		internal const int CopyLoop = 7;
+
+		internal const int CopyWrapBuffer = 8;
+
+		internal const int Transform = 9;
+
+		internal const int Finished = 10;
+
+		internal const int Closed = 11;
+
+		internal const int Write = 12;
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/State.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/State.cs
new file mode 100644
index 0000000000..b35c39e1b6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/State.cs
@@ -0,0 +1,171 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	internal sealed class State
+	{
+		internal int runningState = Org.Brotli.Dec.RunningState.Uninitialized;
+
+		internal int nextRunningState;
+
+		internal readonly Org.Brotli.Dec.BitReader br = new Org.Brotli.Dec.BitReader();
+
+		internal byte[] ringBuffer;
+
+		internal readonly int[] blockTypeTrees = new int[3 * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize];
+
+		internal readonly int[] blockLenTrees = new int[3 * Org.Brotli.Dec.Huffman.HuffmanMaxTableSize];
+
+		internal int metaBlockLength;
+
+		internal bool inputEnd;
+
+		internal bool isUncompressed;
+
+		internal bool isMetadata;
+
+		internal readonly Org.Brotli.Dec.HuffmanTreeGroup hGroup0 = new Org.Brotli.Dec.HuffmanTreeGroup();
+
+		internal readonly Org.Brotli.Dec.HuffmanTreeGroup hGroup1 = new Org.Brotli.Dec.HuffmanTreeGroup();
+
+		internal readonly Org.Brotli.Dec.HuffmanTreeGroup hGroup2 = new Org.Brotli.Dec.HuffmanTreeGroup();
+
+		internal readonly int[] blockLength = new int[3];
+
+		internal readonly int[] numBlockTypes = new int[3];
+
+		internal readonly int[] blockTypeRb = new int[6];
+
+		internal readonly int[] distRb = new int[] { 16, 15, 11, 4 };
+
+		internal int pos = 0;
+
+		internal int maxDistance = 0;
+
+		internal int distRbIdx = 0;
+
+		internal bool trivialLiteralContext = false;
+
+		internal int literalTreeIndex = 0;
+
+		internal int literalTree;
+
+		internal int j;
+
+		internal int insertLength;
+
+		internal byte[] contextModes;
+
+		internal byte[] contextMap;
+
+		internal int contextMapSlice;
+
+		internal int distContextMapSlice;
+
+		internal int contextLookupOffset1;
+
+		internal int contextLookupOffset2;
+
+		internal int treeCommandOffset;
+
+		internal int distanceCode;
+
+		internal byte[] distContextMap;
+
+		internal int numDirectDistanceCodes;
+
+		internal int distancePostfixMask;
+
+		internal int distancePostfixBits;
+
+		internal int distance;
+
+		internal int copyLength;
+
+		internal int copyDst;
+
+		internal int maxBackwardDistance;
+
+		internal int maxRingBufferSize;
+
+		internal int ringBufferSize = 0;
+
+		internal long expectedTotalSize = 0;
+
+		internal byte[] customDictionary = new byte[0];
+
+		internal int bytesToIgnore = 0;
+
+		internal int outputOffset;
+
+		internal int outputLength;
+
+		internal int outputUsed;
+
+		internal int bytesWritten;
+
+		internal int bytesToWrite;
+
+		internal byte[] output;
+
+		// Current meta-block header information.
+		// TODO: Update to current spec.
+		private static int DecodeWindowBits(Org.Brotli.Dec.BitReader br)
+		{
+			if (Org.Brotli.Dec.BitReader.ReadBits(br, 1) == 0)
+			{
+				return 16;
+			}
+			int n = Org.Brotli.Dec.BitReader.ReadBits(br, 3);
+			if (n != 0)
+			{
+				return 17 + n;
+			}
+			n = Org.Brotli.Dec.BitReader.ReadBits(br, 3);
+			if (n != 0)
+			{
+				return 8 + n;
+			}
+			return 17;
+		}
+
+		/// <summary>Associate input with decoder state.</summary>
+		/// <param name="state">uninitialized state without associated input</param>
+		/// <param name="input">compressed data source</param>
+		internal static void SetInput(Org.Brotli.Dec.State state, System.IO.Stream input)
+		{
+			if (state.runningState != Org.Brotli.Dec.RunningState.Uninitialized)
+			{
+				throw new System.InvalidOperationException("State MUST be uninitialized");
+			}
+			Org.Brotli.Dec.BitReader.Init(state.br, input);
+			int windowBits = DecodeWindowBits(state.br);
+			if (windowBits == 9)
+			{
+				/* Reserved case for future expansion. */
+				throw new Org.Brotli.Dec.BrotliRuntimeException("Invalid 'windowBits' code");
+			}
+			state.maxRingBufferSize = 1 << windowBits;
+			state.maxBackwardDistance = state.maxRingBufferSize - 16;
+			state.runningState = Org.Brotli.Dec.RunningState.BlockStart;
+		}
+
+		/// <exception cref="System.IO.IOException"/>
+		internal static void Close(Org.Brotli.Dec.State state)
+		{
+			if (state.runningState == Org.Brotli.Dec.RunningState.Uninitialized)
+			{
+				throw new System.InvalidOperationException("State MUST be initialized");
+			}
+			if (state.runningState == Org.Brotli.Dec.RunningState.Closed)
+			{
+				return;
+			}
+			state.runningState = Org.Brotli.Dec.RunningState.Closed;
+			Org.Brotli.Dec.BitReader.Close(state.br);
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/SynthTest.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/SynthTest.cs
new file mode 100644
index 0000000000..92a03ae6a5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/SynthTest.cs
@@ -0,0 +1,2174 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// Tests for
+	/// <see cref="Decode"/>
+	/// .
+	/// </summary>
+	public class SynthTest
+	{
+		/// <exception cref="System.IO.IOException"/>
+		private byte[] Decompress(byte[] data)
+		{
+			byte[] buffer = new byte[65536];
+			System.IO.MemoryStream input = new System.IO.MemoryStream(data);
+			System.IO.MemoryStream output = new System.IO.MemoryStream();
+			Org.Brotli.Dec.BrotliInputStream brotliInput = new Org.Brotli.Dec.BrotliInputStream(input);
+			while (true)
+			{
+				int len = brotliInput.Read(buffer, 0, buffer.Length);
+				if (len <= 0)
+				{
+					break;
+				}
+				output.Write(buffer, 0, len);
+			}
+			brotliInput.Close();
+			return output.ToArray();
+		}
+
+		private void CheckSynth(byte[] compressed, bool expectSuccess, string expectedOutput)
+		{
+			byte[] expected = Org.Brotli.Dec.Transform.ReadUniBytes(expectedOutput);
+			try
+			{
+				byte[] actual = Decompress(compressed);
+				if (!expectSuccess)
+				{
+					NUnit.Framework.Assert.Fail("expected to fail decoding, but succeeded");
+				}
+				NUnit.Framework.Assert.AreEqual(expected, actual);
+			}
+			catch (System.IO.IOException)
+			{
+				if (expectSuccess)
+				{
+					NUnit.Framework.Assert.Fail("expected to succeed decoding, but failed");
+				}
+			}
+		}
+
+		/* GENERATED CODE START */
+		[NUnit.Framework.Test]
+		public virtual void TestBaseDictWord()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x02))) };
+			CheckSynth(compressed, true, string.Empty + "time");
+		}
+
+		/*
+		// The stream consists of a base dictionary word.
+		main_header
+		metablock_header_easy: 4, 1
+		command_inscopy_easy: 0, 4
+		command_dist_easy: 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestBaseDictWordFinishBlockOnRingbufferWrap()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x1f))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x9b))), unchecked((byte)unchecked((int)(0x58))), unchecked(
+				(byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked(
+				(byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked(
+				(byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked(
+				(byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked(
+				(byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0x34))), unchecked((byte)unchecked((int)(0xd4))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "aaaaaaaaaaaaaaaaaaaaaaaaaaaatime");
+		}
+
+		/*
+		main_header
+		metablock_header_easy: 32, 1 // 32 = minimal ringbuffer size
+		command_easy: 4, "aaaaaaaaaaaaaaaaaaaaaaaaaaaa", 29
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestBaseDictWordTooLong()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x02))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		// Has an unmodified dictionary word that goes over the end of the
+		// meta-block. Same as BaseDictWord, but with a shorter meta-block length.
+		main_header
+		metablock_header_easy: 1, 1
+		command_inscopy_easy: 0, 4
+		command_dist_easy: 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestBlockCountMessage()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x0b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x8c))), unchecked((byte)unchecked((int)(0xc1))), unchecked((byte)unchecked((int)(0xc5))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x08))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x22))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0xe1))), unchecked((byte)unchecked((int)(0xfc))), unchecked((byte)unchecked((int)(0xfd))), unchecked((byte)unchecked((int)(0x22))), unchecked(
+				(byte)unchecked((int)(0x2c))), unchecked((byte)unchecked((int)(0xc4))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0xd8))), unchecked(
+				(byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x89))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x77))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x10))), unchecked((byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "aabbaaaaabab");
+		}
+
+		/*
+		// Same as BlockSwitchMessage but also uses 0-bit block-type commands.
+		main_header
+		metablock_header_begin: 1, 0, 12, 0
+		// two literal block types
+		vlq_blocktypes: 2
+		huffman_simple: 1,1,4, 1  // literal blocktype prefix code
+		huffman_fixed: 26  // literal blockcount prefix code
+		blockcount_easy: 2  // 2 a's
+		// one ins/copy and dist block type
+		vlq_blocktypes: 1
+		vlq_blocktypes: 1
+		ndirect: 0 0
+		// two MSB6 literal context modes
+		bits: "00", "00"
+		// two literal prefix codes
+		vlq_blocktypes: 2
+		// literal context map
+		vlq_rlemax: 5
+		huffman_simple: 0,3,7, 5,0,6  // context map rle huffman code
+		// context map rle: repeat 0 64 times, 1+5 64 times
+		bits: "01", "0", "11111", "11", "0", "11111"
+		bit: 1  // MTF enabled
+		// one distance prefix code
+		vlq_blocktypes: 1
+		huffman_simple: 0,1,256, 97  // only a's
+		huffman_simple: 0,1,256, 98  // only b's
+		huffman_fixed: 704
+		huffman_fixed: 64
+		// now comes the data
+		command_inscopy_easy: 12, 0
+		blockcount_easy: 2  // switch to other block type; 2 b's
+		blockcount_easy: 5  // switch to other block type; 5 a's
+		blockcount_easy: 1  // switch to other block type; 1 b
+		blockcount_easy: 1  // switch to other block type; 1 a
+		blockcount_easy: 1  // switch to other block type; 1 b
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestBlockSwitchMessage()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x0b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xd1))), unchecked((byte)unchecked((int)(0xe1))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0xc6))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0xe2))), unchecked((byte)unchecked((int)(0x06))), unchecked((byte)unchecked((int)(0x04))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x91))), unchecked((byte)unchecked((int)(0xb2))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xfe))), unchecked((byte)unchecked((int)(0x7e))), unchecked(
+				(byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x16))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x1c))), unchecked(
+				(byte)unchecked((int)(0x6c))), unchecked((byte)unchecked((int)(0x99))), unchecked((byte)unchecked((int)(0xc4))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x09))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x3b))), unchecked((byte)unchecked((int)(0x6d))), unchecked((byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x08))), unchecked((byte)unchecked((int)(0x82))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "aabbaaaaabab");
+		}
+
+		/*
+		// Uses blocks with 1-symbol huffman codes that take 0 bits, so that it
+		// is the blockswitch commands that encode the message rather than actual
+		// literals.
+		main_header
+		metablock_header_begin: 1, 0, 12, 0
+		// two literal block types
+		vlq_blocktypes: 2
+		huffman_simple: 1,4,4, 1,0,2,3  // literal blocktype prefix code
+		huffman_fixed: 26  // literal blockcount prefix code
+		blockcount_easy: 2  // 2 a's
+		// one ins/copy and dist block type
+		vlq_blocktypes: 1
+		vlq_blocktypes: 1
+		ndirect: 0 0
+		// two MSB6 literal context modes
+		bits: "00", "00"
+		// two literal prefix codes
+		vlq_blocktypes: 2
+		// literal context map
+		vlq_rlemax: 5
+		huffman_simple: 0,3,7, 5,0,6  // context map rle huffman code
+		// context map rle: repeat 0 64 times, 1+5 64 times
+		bits: "01", "0", "11111", "11", "0", "11111"
+		bit: 1  // MTF enabled
+		// one distance prefix code
+		vlq_blocktypes: 1
+		huffman_simple: 0,1,256, 97  // only a's
+		huffman_simple: 0,1,256, 98  // only b's
+		huffman_fixed: 704
+		huffman_fixed: 64
+		// now comes the data
+		command_inscopy_easy: 12, 0
+		bits: "0"; blockcount_easy: 2  // switch to other block type; 2 b's
+		bits: "0"; blockcount_easy: 5  // switch to other block type; 5 a's
+		bits: "0"; blockcount_easy: 1  // switch to other block type; 1 b
+		bits: "0"; blockcount_easy: 1  // switch to other block type; 1 a
+		bits: "0"; blockcount_easy: 1  // switch to other block type; 1 b
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestClClTreeDeficiency()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x43))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x05))), unchecked(
+				(byte)unchecked((int)(0x88))), unchecked((byte)unchecked((int)(0x55))), unchecked((byte)unchecked((int)(0x90))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x89))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x77))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0x28))), unchecked((byte)unchecked((int)(0x40))), unchecked((byte)unchecked((int)(0x23))) };
+			CheckSynth(compressed, false, string.Empty + "aaab");
+		}
+
+		/*
+		// This test is a copy of TooManySymbolsRepeated, with changed clcl table.
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		hskip: 0
+		clcl_ordered: 0,3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,1,0
+		set_prefix_cl_rle: "", "110", "", "", "", "", "", "", "111", "101",\
+		"", "", "", "", "", "", "0", ""
+		cl_rle: 8
+		cl_rle_rep: 9, 96
+		cl_rle: 1
+		cl_rle_rep: 9, 159 // 1 + 96 + 1 + 159 = 257 > 256 = alphabet size
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		command_literal_bits: 0, 0, 0, 101100010
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestClClTreeExcess()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xc3))), unchecked((byte)unchecked((int)(0x7b))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x58))), unchecked(
+				(byte)unchecked((int)(0x41))), unchecked((byte)unchecked((int)(0x06))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x60))), unchecked((byte)unchecked((int)(0xcb))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x06))), unchecked((byte)unchecked((int)(0x48))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xdc))), unchecked(
+				(byte)unchecked((int)(0x69))), unchecked((byte)unchecked((int)(0xa3))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x8d))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, false, string.Empty + "aaab");
+		}
+
+		/*
+		// This test is a copy of ClClTreeDeficiency, with changed clcl table.
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		hskip: 0
+		clcl_ordered: 0,3,0,0,0,0,0,0,3,1,0,0,0,0,0,0,1,0
+		set_prefix_cl_rle: "", "110", "", "", "", "", "", "", "111", "1",\
+		"", "", "", "", "", "", "0", ""
+		cl_rle: 8
+		cl_rle_rep: 9, 96
+		cl_rle: 1
+		cl_rle_rep: 9, 159 // 1 + 96 + 1 + 159 = 257 > 256 = alphabet size
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		command_literal_bits: 0, 0, 0, 101100010
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestComplexHuffmanCodeTwoSymbols()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked(
+				(byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0xa2))), unchecked((byte)unchecked((int)(0x1a))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x0e))), unchecked((byte)unchecked((int)(0xb6))), unchecked((byte)unchecked((int)(0x4c))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x04))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xc0))), unchecked((byte)unchecked((int)(0x9d))), unchecked((byte)unchecked((int)(0x36))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x04))) };
+			CheckSynth(compressed, true, string.Empty + "ab");
+		}
+
+		/*
+		// This tests a complex huffman code with only two symbols followed by a
+		// tiny amount of content.
+		main_header
+		metablock_header_begin: 1, 0, 2, 0
+		metablock_header_trivial_context
+		// begin of literal huffman tree. The tree has symbol length 1 for "a",
+		// symbol length 1 for "b" and symbol length 0 for all others.
+		hskip: 0
+		clcl_ordered: 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+		set_prefix_cl_rle: "", "0", "", "", "", "", "", "", "", "",\
+		"", "", "", "", "", "", "", "1"
+		cl_rle_rep_0: 97
+		cl_rle: 1  // literal number 97, that is, the letter 'a'
+		cl_rle: 1  // literal number 98, that is, the letter 'b'
+		// end of literal huffman tree
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 2, 0
+		command_literal_bits: 0, 1  // a followed by b
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestCompressedUncompressedShortCompressed()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x8b))), unchecked((byte)unchecked((int)(0xfe))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x9b))), unchecked((byte)unchecked((int)(0x66))), unchecked(
+				(byte)unchecked((int)(0x6f))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x0a))), unchecked((byte)unchecked((int)(0x50))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x10))), unchecked(
+				(byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked(
+				(byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked(
+				(byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaabbbbbbbbbb"
+				);
+		}
+
+		/*
+		main_header: 22
+		metablock_header_easy: 1022, 0
+		command_easy: 1021, "a", 1 // 1022 x "a"
+		metablock_uncompressed: "bbbbbb"
+		metablock_header_easy: 4, 1
+		command_easy: 4, "", 1 // 6 + 4 = 10 x "b"
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestCompressedUncompressedShortCompressedSmallWindow()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x21))), unchecked((byte)unchecked((int)(0xf4))), unchecked((byte)unchecked((int)(0x0f))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x1c))), unchecked((byte)unchecked((int)(0xa7))), unchecked((byte)unchecked((int)(0x6d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x89))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x77))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0x34))), unchecked(
+				(byte)unchecked((int)(0x7b))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x50))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x80))), unchecked(
+				(byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x62))), unchecked(
+				(byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked(
+				(byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+				 + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaabbbbbbbbbb"
+				);
+		}
+
+		/*
+		main_header: 10
+		metablock_header_easy: 1022, 0
+		command_easy: 1021, "a", 1 // 1022 x "a"
+		metablock_uncompressed: "bbbbbb"
+		metablock_header_easy: 4, 1
+		command_easy: 4, "", 1 // 6 + 4 = 10 x "b"
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestCopyLengthTooLong()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x86))), unchecked((byte)unchecked((int)(0x02))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		// Has a copy length that goes over the end of the meta-block.
+		// Same as OneCommand, but with a shorter meta-block length.
+		main_header
+		metablock_header_easy: 2, 1
+		command_easy: 2, "a", 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestCustomHuffmanCode()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xc3))), unchecked((byte)unchecked((int)(0x3d))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x58))), unchecked(
+				(byte)unchecked((int)(0x82))), unchecked((byte)unchecked((int)(0x08))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xc0))), unchecked((byte)unchecked((int)(0xc1))), unchecked((byte)unchecked((int)(0x96))), unchecked(
+				(byte)unchecked((int)(0x49))), unchecked((byte)unchecked((int)(0x0c))), unchecked((byte)unchecked((int)(0x90))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xb8))), unchecked(
+				(byte)unchecked((int)(0xd3))), unchecked((byte)unchecked((int)(0x46))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x1a))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, true, string.Empty + "aaab");
+		}
+
+		/*
+		// This tests a small hand crafted huffman code followed by a tiny amount
+		// of content. This tests if the bit reader detects the end correctly even
+		// with tiny content after a larger huffman tree encoding.
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		// begin of literal huffman tree. The tree has symbol length 1 for "a",
+		// symbol length 8 for null, symbol length 9 for all others. The length 1
+		// for a is chosen on purpose here, the others must be like that to
+		// fulfill the requirement that sum of 32>>length is 32768.
+		hskip: 0
+		clcl_ordered: 0,3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,1,0
+		set_prefix_cl_rle: "", "110", "", "", "", "", "", "", "111", "10",\
+		"", "", "", "", "", "", "0", ""
+		cl_rle: 8
+		cl_rle_rep: 9, 96
+		cl_rle: 1  // literal number 97, that is, the letter 'a'
+		cl_rle_rep: 9, 158
+		// end of literal huffman tree
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		// Here is how the code "101100010" for b is derived: remember that a has
+		// symbol length 1, null has symbol length 8, the rest 9. So in the
+		// canonical huffman code, the code for "a" is "0", for null is
+		// "10000000". The next value has "100000010" (cfr. the rules of canonical
+		// prefix code). Counting upwards +95 from there, the value "@" (ascii 96,
+		// before "a") has "101100001", and so b, the next 9-bit symbol, has the
+		// next binary value "101100010".
+		command_literal_bits: 0, 0, 0, 101100010  // 3 a's followed by a b
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestEmpty()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x3b))) };
+			CheckSynth(compressed, true, string.Empty);
+		}
+
+		/*
+		main_header
+		metablock_lastempty
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestHelloWorld()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x0a))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x9b))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x59))), unchecked((byte)unchecked((int)(0x98))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0x13))), unchecked(
+				(byte)unchecked((int)(0xb8))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x3b))), unchecked((byte)unchecked((int)(0xd9))), unchecked((byte)unchecked((int)(0x98))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, true, string.Empty + "hello world");
+		}
+
+		/*
+		main_header
+		metablock_fixed: "hello world", 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInsertTooLong()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x09))), unchecked(
+				(byte)unchecked((int)(0x86))), unchecked((byte)unchecked((int)(0x46))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		// Has an insert length that goes over the end of the meta-block.
+		// Same as OneInsert, but with a shorter meta-block length.
+		main_header
+		metablock_header_easy: 1, 1
+		command_easy: 0, "ab"
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidNoLastMetablock()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x0b))), unchecked((byte)unchecked((int)(0x06))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x9b))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x13))), unchecked((byte)unchecked((int)(0x59))), unchecked((byte)unchecked((int)(0x98))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0xd8))), unchecked(
+				(byte)unchecked((int)(0x13))), unchecked((byte)unchecked((int)(0xb8))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x3b))), unchecked((byte)unchecked((int)(0xd9))), unchecked((byte)unchecked((int)(0x98))), unchecked(
+				(byte)unchecked((int)(0xe8))), unchecked((byte)unchecked((int)(0x00))) };
+			CheckSynth(compressed, false, string.Empty + "hello world");
+		}
+
+		/*
+		main_header
+		metablock_fixed: \"hello world\", 0
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidNoMetaBlocks()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x0b))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		main_header
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidTooFarDist()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0xa1))), unchecked((byte)unchecked((int)(0x48))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x1c))), unchecked((byte)unchecked((int)(0xa7))), unchecked((byte)unchecked((int)(0x6d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x89))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x77))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0xe8))), unchecked(
+				(byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x62))), unchecked((byte)unchecked((int)(0x6f))), unchecked((byte)unchecked((int)(0x4f))), unchecked((byte)unchecked((int)(0x60))), unchecked((byte)unchecked((int)(0x66))), unchecked(
+				(byte)unchecked((int)(0xe8))), unchecked((byte)unchecked((int)(0x44))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x0f))), unchecked((byte)unchecked((int)(0x09))), unchecked((byte)unchecked((int)(0x0d))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		main_header: 10
+		metablock_header_begin: 1, 0, 10, 0
+		metablock_header_trivial_context
+		huffman_fixed: 256
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_easy: 2, "too far!", 1000000  // distance too far for 10 wbits
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidTooLargeContextMap()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xd1))), unchecked((byte)unchecked((int)(0xe1))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0xc6))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0xe2))), unchecked((byte)unchecked((int)(0x06))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x91))), unchecked((byte)unchecked((int)(0xb2))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xfe))), unchecked((byte)unchecked((int)(0xfb))), unchecked(
+				(byte)unchecked((int)(0x45))), unchecked((byte)unchecked((int)(0x58))), unchecked((byte)unchecked((int)(0x88))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked(
+				(byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, false, string.Empty + "a");
+		}
+
+		/*
+		// Has a repeat code a context map that makes the size too big -> invalid.
+		main_header
+		metablock_header_begin: 1, 0, 1, 0
+		// two literal block types
+		vlq_blocktypes: 2
+		huffman_simple: 1,4,4, 1,0,2,3  // literal blocktype prefix code
+		huffman_fixed: 26  // literal blockcount prefix code
+		blockcount_easy: 1
+		// one ins/copy and dist block type
+		vlq_blocktypes: 1
+		vlq_blocktypes: 1
+		ndirect: 0 0
+		// two MSB6 literal context modes
+		bits: "00", "00"
+		// two literal prefix codes
+		vlq_blocktypes: 2
+		// literal context map
+		vlq_rlemax: 5
+		huffman_simple: 0,3,7, 5,0,6  // context map rle huffman code
+		// Too long context map rle: repeat 0 64 times, 1+5 65 times, that is 129
+		// values which is 1 too much.
+		bits: "01", "0", "11111", "11", "11", "0", "11111"
+		bit: 1  // MTF enabled
+		// one distance prefix code
+		vlq_blocktypes: 1
+		huffman_simple: 0,1,256, 97  // only a's
+		huffman_simple: 0,1,256, 98  // only b's
+		huffman_fixed: 704
+		huffman_fixed: 64
+		// now comes the data
+		command_inscopy_easy: 1, 0
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidTransformType()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x2d))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x19))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		main_header
+		metablock_header_easy: 4, 1
+		command_inscopy_easy: 0, 4
+		command_dist_easy: 123905 // = 121 << 10 + 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestInvalidWindowBits9()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x91))), unchecked((byte)unchecked((int)(0x10))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x1c))), unchecked((byte)unchecked((int)(0xa7))), unchecked((byte)unchecked((int)(0x6d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0xd8))), unchecked((byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0x89))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x77))), unchecked((byte)unchecked((int)(0xda))), unchecked((byte)unchecked((int)(0xc8))), unchecked(
+				(byte)unchecked((int)(0x20))), unchecked((byte)unchecked((int)(0x32))), unchecked((byte)unchecked((int)(0xd4))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, false, string.Empty + "a");
+		}
+
+		/*
+		main_header: 9
+		metablock_fixed: \"a\", 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestManyTinyMetablocks()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x0b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked(
+				(byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked(
+				(byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x04))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x61))), unchecked((byte)unchecked((int)(0x34))) };
+			CheckSynth(compressed, true, string.Empty + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababababababab"
+				 + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababababababab"
+				 + "abababababababababababababababababababababababababababababababababababab" + "abababababababababababab");
+		}
+
+		/*
+		main_header
+		repeat: 300
+		metablock_uncompressed: "a"
+		metablock_fixed: "b"
+		end_repeat
+		metablock_lastempty
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestNegativeDistance()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x0f))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x42))), unchecked((byte)unchecked((int)(0x01))), unchecked(
+				(byte)unchecked((int)(0x1c))) };
+			CheckSynth(compressed, false, string.Empty + "timemememememeXX");
+		}
+
+		/*
+		main_header
+		metablock_header_easy: 16, 1
+		command_inscopy_easy: 0, 4 // time
+		command_dist_easy: 1
+		command_inscopy_easy: 0, 2 // me
+		command_dist_easy: 2
+		command_inscopy_easy: 0, 2 // me
+		command_dist_easy: 2
+		command_inscopy_easy: 0, 2 // me
+		command_dist_easy: 2
+		command_inscopy_easy: 0, 2 // me
+		command_dist_easy: 2
+		command_inscopy_easy: 0, 2 // me
+		command_dist_easy: 2 // All rb items are 2 now
+		command_inscopy_easy: 0, 2
+		bits: "011100" // 15 -> distance = rb[idx + 2] - 3
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestNegativeRemainingLenBetweenMetablocks()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x0b))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x09))), unchecked(
+				(byte)unchecked((int)(0x86))), unchecked((byte)unchecked((int)(0x46))), unchecked((byte)unchecked((int)(0x11))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x38))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0xdb))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked(
+				(byte)unchecked((int)(0x24))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x91))), unchecked(
+				(byte)unchecked((int)(0x60))), unchecked((byte)unchecked((int)(0x68))), unchecked((byte)unchecked((int)(0x04))) };
+			CheckSynth(compressed, false, string.Empty + "abab");
+		}
+
+		/*
+		main_header
+		metablock_header_easy: 1, 0
+		command_easy: 0, "ab"  // remaining length == -1 -> invalid stream
+		metablock_header_easy: 2, 1
+		command_easy: 0, "ab"
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestOneCommand()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x11))), unchecked(
+				(byte)unchecked((int)(0x86))), unchecked((byte)unchecked((int)(0x02))) };
+			CheckSynth(compressed, true, string.Empty + "aaa");
+		}
+
+		/*
+		// The stream consists of one command with insert and copy.
+		main_header
+		metablock_header_easy: 3, 1
+		command_easy: 2, "a", 1
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestOneInsert()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x09))), unchecked(
+				(byte)unchecked((int)(0x86))), unchecked((byte)unchecked((int)(0x46))) };
+			CheckSynth(compressed, true, string.Empty + "ab");
+		}
+
+		/*
+		// The stream consists of one half command with insert only.
+		main_header
+		metablock_header_easy: 2, 1
+		command_easy: 0, "ab"
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestSimplePrefix()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xa0))), unchecked(
+				(byte)unchecked((int)(0xc3))), unchecked((byte)unchecked((int)(0xc4))), unchecked((byte)unchecked((int)(0xc6))), unchecked((byte)unchecked((int)(0xc8))), unchecked((byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x51))), unchecked((byte)unchecked((int)(0xa0))), unchecked(
+				(byte)unchecked((int)(0x1d))) };
+			CheckSynth(compressed, true, string.Empty + "abcd");
+		}
+
+		/*
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		huffman_simple: 1,4,256, 97,98,99,100  // ascii codes for a, b, c, d
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		command_literal_bits: 0, 10, 110, 111  // a, b, c, d
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestSimplePrefixDuplicateSymbols()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xa0))), unchecked(
+				(byte)unchecked((int)(0xc3))), unchecked((byte)unchecked((int)(0xc4))), unchecked((byte)unchecked((int)(0xc2))), unchecked((byte)unchecked((int)(0xc4))), unchecked((byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x70))), unchecked((byte)unchecked((int)(0xb0))), unchecked((byte)unchecked((int)(0x65))), unchecked((byte)unchecked((int)(0x12))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x24))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xee))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x51))), unchecked((byte)unchecked((int)(0xa0))), unchecked(
+				(byte)unchecked((int)(0x1d))) };
+			CheckSynth(compressed, false, string.Empty + "abab");
+		}
+
+		/*
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		huffman_simple: 1,4,256, 97,98,97,98  // ascii codes for a, b, a, b
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		command_literal_bits: 0, 10, 110, 111  // a, b, a, b
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestSimplePrefixOutOfRangeSymbols()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x4d))), unchecked((byte)unchecked((int)(0xff))), unchecked(
+				(byte)unchecked((int)(0xef))), unchecked((byte)unchecked((int)(0x7f))), unchecked((byte)unchecked((int)(0xff))), unchecked((byte)unchecked((int)(0xfc))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0xb8))), unchecked((byte)unchecked((int)(0xd3))), unchecked((byte)unchecked((int)(0x06))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+
+		/*
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		huffman_fixed: 256
+		huffman_simple: 1,4,704, 1023,1022,1021,1020
+		huffman_fixed: 64
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestTooManySymbolsRepeated()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xc3))), unchecked((byte)unchecked((int)(0x3d))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0x58))), unchecked(
+				(byte)unchecked((int)(0x82))), unchecked((byte)unchecked((int)(0x0c))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xc0))), unchecked((byte)unchecked((int)(0xc1))), unchecked((byte)unchecked((int)(0x96))), unchecked(
+				(byte)unchecked((int)(0x49))), unchecked((byte)unchecked((int)(0x0c))), unchecked((byte)unchecked((int)(0x90))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xb8))), unchecked(
+				(byte)unchecked((int)(0xd3))), unchecked((byte)unchecked((int)(0x46))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x1a))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, false, string.Empty + "aaab");
+		}
+
+		/*
+		// This test is a copy of CustomHuffmanCode, with changed repeat count.
+		main_header
+		metablock_header_begin: 1, 0, 4, 0
+		metablock_header_trivial_context
+		hskip: 0
+		clcl_ordered: 0,3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,1,0
+		set_prefix_cl_rle: "", "110", "", "", "", "", "", "", "111", "10",\
+		"", "", "", "", "", "", "0", ""
+		cl_rle: 8
+		cl_rle_rep: 9, 96
+		cl_rle: 1
+		cl_rle_rep: 9, 159 // 1 + 96 + 1 + 159 = 257 > 256 = alphabet size
+		huffman_fixed: 704
+		huffman_fixed: 64
+		command_inscopy_easy: 4, 0
+		command_literal_bits: 0, 0, 0, 101100010
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestTransformedDictWord()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x08))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x09))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, true, string.Empty + "time the ");
+		}
+
+		/*
+		// The stream consists of a transformed dictionary word.
+		main_header
+		metablock_header_easy: 9, 1
+		command_inscopy_easy: 0, 4
+		command_dist_easy: 5121
+		*/
+		[NUnit.Framework.Test]
+		public virtual void TestTransformedDictWordTooLong()
+		{
+			byte[] compressed = new byte[] { unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x03))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x80))), unchecked((byte)unchecked((int)(0xe3))), unchecked((byte)unchecked((int)(0xb4))), unchecked((byte)unchecked((int)(0x0d))), unchecked((byte)unchecked((int)(0x00))), unchecked(
+				(byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0x07))), unchecked((byte)unchecked((int)(0x5b))), unchecked((byte)unchecked((int)(0x26))), unchecked((byte)unchecked((int)(0x31))), unchecked((byte)unchecked((int)(0x40))), unchecked(
+				(byte)unchecked((int)(0x02))), unchecked((byte)unchecked((int)(0x00))), unchecked((byte)unchecked((int)(0xe0))), unchecked((byte)unchecked((int)(0x4e))), unchecked((byte)unchecked((int)(0x1b))), unchecked((byte)unchecked((int)(0x41))), unchecked(
+				(byte)unchecked((int)(0x09))), unchecked((byte)unchecked((int)(0x01))), unchecked((byte)unchecked((int)(0x01))) };
+			CheckSynth(compressed, false, string.Empty);
+		}
+		/*
+		// Has a transformed dictionary word that goes over the end of the
+		// meta-block, but the base dictionary word fits in the meta-block.
+		// Same as TransformedDictWord, but with a shorter meta-block length.
+		main_header
+		metablock_header_easy: 4, 1
+		command_inscopy_easy: 0, 4
+		command_dist_easy: 5121
+		*/
+		/* GENERATED CODE END */
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Transform.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Transform.cs
new file mode 100644
index 0000000000..9869d6ec2c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Transform.cs
@@ -0,0 +1,154 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Transformations on dictionary words.</summary>
+	internal sealed class Transform
+	{
+		private readonly byte[] prefix;
+
+		private readonly int type;
+
+		private readonly byte[] suffix;
+
+		internal Transform(string prefix, int type, string suffix)
+		{
+			this.prefix = ReadUniBytes(prefix);
+			this.type = type;
+			this.suffix = ReadUniBytes(suffix);
+		}
+
+		internal static byte[] ReadUniBytes(string uniBytes)
+		{
+			byte[] result = new byte[uniBytes.Length];
+			for (int i = 0; i < result.Length; ++i)
+			{
+				result[i] = unchecked((byte)uniBytes[i]);
+			}
+			return result;
+		}
+
+		internal static readonly Org.Brotli.Dec.Transform[] Transforms = new Org.Brotli.Dec.Transform[] { new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, 
+			Org.Brotli.Dec.WordTransformType.Identity, " "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst1, string.Empty), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " the "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity
+			, string.Empty), new Org.Brotli.Dec.Transform("s ", Org.Brotli.Dec.WordTransformType.Identity, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " of "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.UppercaseFirst, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " and "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst2, string.Empty), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast1, string.Empty), new Org.Brotli.Dec.Transform(", ", Org.Brotli.Dec.WordTransformType.Identity, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity
+			, ", "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " in "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.Identity, " to "), new Org.Brotli.Dec.Transform("e ", Org.Brotli.Dec.WordTransformType.Identity, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "\""), new Org.Brotli.Dec.Transform(string.Empty, 
+			Org.Brotli.Dec.WordTransformType.Identity, "."), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "\">"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "\n"), new 
+			Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast3, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "]"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.Identity, " for "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst3, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast2, string.Empty), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " a "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " that "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst
+			, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, ". "), new Org.Brotli.Dec.Transform(".", Org.Brotli.Dec.WordTransformType.Identity, string.Empty), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType
+			.Identity, ", "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst4, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " with "), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "'"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " from "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity
+			, " by "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst5, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst6, string.Empty), new Org.Brotli.Dec.Transform
+			(" the ", Org.Brotli.Dec.WordTransformType.Identity, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast4, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.Identity, ". The "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " on "), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " as "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " is "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast7
+			, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast1, "ing "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "\n\t"), new Org.Brotli.Dec.Transform(string.Empty
+			, Org.Brotli.Dec.WordTransformType.Identity, ":"), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, ". "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "ed "), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst9, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitFirst7, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.OmitLast6, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "("), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, ", "), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast8, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " at "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.Identity, "ly "), new Org.Brotli.Dec.Transform(" the ", Org.Brotli.Dec.WordTransformType.Identity, " of "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast5, string.Empty), new Org.Brotli.Dec.Transform(
+			string.Empty, Org.Brotli.Dec.WordTransformType.OmitLast9, string.Empty), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst, ", "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst
+			, "\""), new Org.Brotli.Dec.Transform(".", Org.Brotli.Dec.WordTransformType.Identity, "("), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.UppercaseFirst, "\">"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "=\""), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, "."), new Org.Brotli.Dec.Transform(".com/", 
+			Org.Brotli.Dec.WordTransformType.Identity, string.Empty), new Org.Brotli.Dec.Transform(" the ", Org.Brotli.Dec.WordTransformType.Identity, " of the "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst
+			, "'"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, ". This "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, ","), new Org.Brotli.Dec.Transform(".", Org.Brotli.Dec.WordTransformType
+			.Identity, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, "("), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, "."), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, " not "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, "=\""), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "er "
+			), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseAll, " "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "al "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType
+			.UppercaseAll, string.Empty), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "='"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "\""), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, ". "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, "("), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, 
+			"ful "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst, ". "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "ive "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.Identity, "less "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "'"), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "est "), new Org.Brotli.Dec.Transform
+			(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst, "."), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "\">"), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, "='"
+			), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, ","), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity, "ize "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType
+			.UppercaseAll, "."), new Org.Brotli.Dec.Transform("\u00c2\u00a0", Org.Brotli.Dec.WordTransformType.Identity, string.Empty), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.Identity, ","), new Org.Brotli.Dec.Transform(string.Empty
+			, Org.Brotli.Dec.WordTransformType.UppercaseFirst, "=\""), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "=\""), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.Identity
+			, "ous "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, ", "), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseFirst, "='"), new Org.Brotli.Dec.Transform(" ", 
+			Org.Brotli.Dec.WordTransformType.UppercaseFirst, ","), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseAll, "=\""), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseAll, ", "), new Org.Brotli.Dec.Transform
+			(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, ","), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "("), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.
+			UppercaseAll, ". "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseAll, "."), new Org.Brotli.Dec.Transform(string.Empty, Org.Brotli.Dec.WordTransformType.UppercaseAll, "='"), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType
+			.UppercaseAll, ". "), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseFirst, "=\""), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType.UppercaseAll, "='"), new Org.Brotli.Dec.Transform(" ", Org.Brotli.Dec.WordTransformType
+			.UppercaseFirst, "='") };
+
+		internal static int TransformDictionaryWord(byte[] dst, int dstOffset, byte[] word, int wordOffset, int len, Org.Brotli.Dec.Transform transform)
+		{
+			int offset = dstOffset;
+			// Copy prefix.
+			byte[] @string = transform.prefix;
+			int tmp = @string.Length;
+			int i = 0;
+			// In most cases tmp < 10 -> no benefits from System.arrayCopy
+			while (i < tmp)
+			{
+				dst[offset++] = @string[i++];
+			}
+			// Copy trimmed word.
+			int op = transform.type;
+			tmp = Org.Brotli.Dec.WordTransformType.GetOmitFirst(op);
+			if (tmp > len)
+			{
+				tmp = len;
+			}
+			wordOffset += tmp;
+			len -= tmp;
+			len -= Org.Brotli.Dec.WordTransformType.GetOmitLast(op);
+			i = len;
+			while (i > 0)
+			{
+				dst[offset++] = word[wordOffset++];
+				i--;
+			}
+			if (op == Org.Brotli.Dec.WordTransformType.UppercaseAll || op == Org.Brotli.Dec.WordTransformType.UppercaseFirst)
+			{
+				int uppercaseOffset = offset - len;
+				if (op == Org.Brotli.Dec.WordTransformType.UppercaseFirst)
+				{
+					len = 1;
+				}
+				while (len > 0)
+				{
+					tmp = dst[uppercaseOffset] & unchecked((int)(0xFF));
+					if (tmp < unchecked((int)(0xc0)))
+					{
+						if (tmp >= 'a' && tmp <= 'z')
+						{
+							dst[uppercaseOffset] ^= unchecked((byte)32);
+						}
+						uppercaseOffset += 1;
+						len -= 1;
+					}
+					else if (tmp < unchecked((int)(0xe0)))
+					{
+						dst[uppercaseOffset + 1] ^= unchecked((byte)32);
+						uppercaseOffset += 2;
+						len -= 2;
+					}
+					else
+					{
+						dst[uppercaseOffset + 2] ^= unchecked((byte)5);
+						uppercaseOffset += 3;
+						len -= 3;
+					}
+				}
+			}
+			// Copy suffix.
+			@string = transform.suffix;
+			tmp = @string.Length;
+			i = 0;
+			while (i < tmp)
+			{
+				dst[offset++] = @string[i++];
+			}
+			return offset - dstOffset;
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/TransformTest.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/TransformTest.cs
new file mode 100644
index 0000000000..0f6845fde0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/TransformTest.cs
@@ -0,0 +1,74 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>
+	/// Tests for
+	/// <see cref="Transform"/>
+	/// .
+	/// </summary>
+	public class TransformTest
+	{
+		private static long Crc64(byte[] data)
+		{
+			long crc = -1;
+			for (int i = 0; i < data.Length; ++i)
+			{
+				long c = (crc ^ (long)(data[i] & unchecked((int)(0xFF)))) & unchecked((int)(0xFF));
+				for (int k = 0; k < 8; k++)
+				{
+					c = ((long)(((ulong)c) >> 1)) ^ (-(c & 1L) & -3932672073523589310L);
+				}
+				crc = c ^ ((long)(((ulong)crc) >> 8));
+			}
+			return ~crc;
+		}
+
+		[NUnit.Framework.Test]
+		public virtual void TestTrimAll()
+		{
+			byte[] output = new byte[2];
+			byte[] input = new byte[] { 119, 111, 114, 100 };
+			// "word"
+			Org.Brotli.Dec.Transform transform = new Org.Brotli.Dec.Transform("[", Org.Brotli.Dec.WordTransformType.OmitFirst5, "]");
+			Org.Brotli.Dec.Transform.TransformDictionaryWord(output, 0, input, 0, input.Length, transform);
+			byte[] expectedOutput = new byte[] { 91, 93 };
+			// "[]"
+			NUnit.Framework.Assert.AreEqual(expectedOutput, output);
+		}
+
+		[NUnit.Framework.Test]
+		public virtual void TestCapitalize()
+		{
+			byte[] output = new byte[8];
+			byte[] input = new byte[] { 113, unchecked((byte)(-61)), unchecked((byte)(-90)), unchecked((byte)(-32)), unchecked((byte)(-92)), unchecked((byte)(-86)) };
+			// "qæप"
+			Org.Brotli.Dec.Transform transform = new Org.Brotli.Dec.Transform("[", Org.Brotli.Dec.WordTransformType.UppercaseAll, "]");
+			Org.Brotli.Dec.Transform.TransformDictionaryWord(output, 0, input, 0, input.Length, transform);
+			byte[] expectedOutput = new byte[] { 91, 81, unchecked((byte)(-61)), unchecked((byte)(-122)), unchecked((byte)(-32)), unchecked((byte)(-92)), unchecked((byte)(-81)), 93 };
+			// "[QÆय]"
+			NUnit.Framework.Assert.AreEqual(expectedOutput, output);
+		}
+
+		[NUnit.Framework.Test]
+		public virtual void TestAllTransforms()
+		{
+			/* This string allows to apply all transforms: head and tail cutting, capitalization and
+			turning to upper case; all results will be mutually different. */
+			// "o123456789abcdef"
+			byte[] testWord = new byte[] { 111, 49, 50, 51, 52, 53, 54, 55, 56, 57, 97, 98, 99, 100, 101, 102 };
+			byte[] output = new byte[2259];
+			int offset = 0;
+			for (int i = 0; i < Org.Brotli.Dec.Transform.Transforms.Length; ++i)
+			{
+				offset += Org.Brotli.Dec.Transform.TransformDictionaryWord(output, offset, testWord, 0, testWord.Length, Org.Brotli.Dec.Transform.Transforms[i]);
+				output[offset++] = unchecked((byte)(-1));
+			}
+			NUnit.Framework.Assert.AreEqual(output.Length, offset);
+			NUnit.Framework.Assert.AreEqual(8929191060211225186L, Crc64(output));
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Utils.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Utils.cs
new file mode 100644
index 0000000000..e95f87efee
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/Utils.cs
@@ -0,0 +1,59 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>A set of utility methods.</summary>
+	internal sealed class Utils
+	{
+		private static readonly byte[] ByteZeroes = new byte[1024];
+
+		private static readonly int[] IntZeroes = new int[1024];
+
+		/// <summary>Fills byte array with zeroes.</summary>
+		/// <remarks>
+		/// Fills byte array with zeroes.
+		/// <p> Current implementation uses
+		/// <see cref="System.Array.Copy(object, int, object, int, int)"/>
+		/// , so it should be used for length not
+		/// less than 16.
+		/// </remarks>
+		/// <param name="dest">array to fill with zeroes</param>
+		/// <param name="offset">the first byte to fill</param>
+		/// <param name="length">number of bytes to change</param>
+		internal static void FillWithZeroes(byte[] dest, int offset, int length)
+		{
+			int cursor = 0;
+			while (cursor < length)
+			{
+				int step = System.Math.Min(cursor + 1024, length) - cursor;
+				System.Array.Copy(ByteZeroes, 0, dest, offset + cursor, step);
+				cursor += step;
+			}
+		}
+
+		/// <summary>Fills int array with zeroes.</summary>
+		/// <remarks>
+		/// Fills int array with zeroes.
+		/// <p> Current implementation uses
+		/// <see cref="System.Array.Copy(object, int, object, int, int)"/>
+		/// , so it should be used for length not
+		/// less than 16.
+		/// </remarks>
+		/// <param name="dest">array to fill with zeroes</param>
+		/// <param name="offset">the first item to fill</param>
+		/// <param name="length">number of item to change</param>
+		internal static void FillWithZeroes(int[] dest, int offset, int length)
+		{
+			int cursor = 0;
+			while (cursor < length)
+			{
+				int step = System.Math.Min(cursor + 1024, length) - cursor;
+				System.Array.Copy(IntZeroes, 0, dest, offset + cursor, step);
+				cursor += step;
+			}
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/WordTransformType.cs b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/WordTransformType.cs
new file mode 100644
index 0000000000..777a5f5a55
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/org/brotli/dec/WordTransformType.cs
@@ -0,0 +1,68 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Distributed under MIT license.
+See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+namespace Org.Brotli.Dec
+{
+	/// <summary>Enumeration of all possible word transformations.</summary>
+	/// <remarks>
+	/// Enumeration of all possible word transformations.
+	/// <p>There are two simple types of transforms: omit X first/last symbols, two character-case
+	/// transforms and the identity transform.
+	/// </remarks>
+	internal sealed class WordTransformType
+	{
+		internal const int Identity = 0;
+
+		internal const int OmitLast1 = 1;
+
+		internal const int OmitLast2 = 2;
+
+		internal const int OmitLast3 = 3;
+
+		internal const int OmitLast4 = 4;
+
+		internal const int OmitLast5 = 5;
+
+		internal const int OmitLast6 = 6;
+
+		internal const int OmitLast7 = 7;
+
+		internal const int OmitLast8 = 8;
+
+		internal const int OmitLast9 = 9;
+
+		internal const int UppercaseFirst = 10;
+
+		internal const int UppercaseAll = 11;
+
+		internal const int OmitFirst1 = 12;
+
+		internal const int OmitFirst2 = 13;
+
+		internal const int OmitFirst3 = 14;
+
+		internal const int OmitFirst4 = 15;
+
+		internal const int OmitFirst5 = 16;
+
+		internal const int OmitFirst6 = 17;
+
+		internal const int OmitFirst7 = 18;
+
+		internal const int OmitFirst8 = 19;
+
+		internal const int OmitFirst9 = 20;
+
+		internal static int GetOmitFirst(int type)
+		{
+			return type >= OmitFirst1 ? (type - OmitFirst1 + 1) : 0;
+		}
+
+		internal static int GetOmitLast(int type)
+		{
+			return type <= OmitLast9 ? (type - OmitLast1 + 1) : 0;
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/sharpen.cfg b/third-party/libjxl/libjxl/third_party/brotli/csharp/sharpen.cfg
new file mode 100644
index 0000000000..b69cf43161
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/sharpen.cfg
@@ -0,0 +1,18 @@
+-pascalCase+
+-nativeTypeSystem
+-separateInterfaceConstants
+-maxColumns 240
+-copySharpenCs false
+-sharpenNamespace nonamespace
+-outputFolder build/generated
+
+-namespaceMapping java.io System.IO
+
+-typeMapping java.io.ByteArrayInputStream System.IO.MemoryStream
+-typeMapping java.io.ByteArrayOutputStream System.IO.MemoryStream
+-typeMapping java.io.InputStream System.IO.Stream
+
+-methodMapping java.io.ByteArrayOutputStream.toByteArray() ToArray
+-methodMapping java.io.InputStream.read() ReadByte
+-methodMapping org.brotli.dec.BrotliInputStream.read() ReadByte
+-methodMapping org.junit.Assert.assertArrayEquals NUnit.Framework.Assert.AreEqual
diff --git a/third-party/libjxl/libjxl/third_party/brotli/csharp/transpile.sh b/third-party/libjxl/libjxl/third_party/brotli/csharp/transpile.sh
new file mode 100644
index 0000000000..3fefb8432b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/csharp/transpile.sh
@@ -0,0 +1,94 @@
+if ! which mvn >/dev/null; then
+  echo -e '\033[01;31mMaven is not installed / configured.\033[00m'
+  exit 1
+fi
+
+if ! which mono >/dev/null; then
+  echo -e '\033[01;31mMono platform is not installed / configured.\033[00m'
+  exit 1
+fi
+
+if ! which nuget >/dev/null; then
+  echo -e '\033[01;31mNuGet compiler is not installed / configured.\033[00m'
+  exit 1
+fi
+
+if ! which mcs >/dev/null; then
+  echo -e '\033[01;31mC# compiler is not installed / configured.\033[00m'
+  exit 1
+fi
+
+rm -rf build
+mkdir build
+cd build
+
+#-------------------------------------------------------------------------------
+
+echo -e '\033[01;33mFetching Sharpen sources.\033[00m'
+
+git clone https://github.com/stanislaw89/sharpen.git
+cd sharpen
+git checkout 4f609ed42862a1f9aab1be00374ff86534a5e6d6 || exit 1
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mCompiling Sharpen.\033[00m'
+
+mvn clean package -DskipTests
+mvn dependency:copy -Dartifact=junit:junit:4.12 -DoutputDirectory=..
+cd ..
+cp sharpen/target/sharpencore-0.0.1-SNAPSHOT-jar-with-dependencies.jar ./sharpen.jar
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mTranspiling.\033[00m'
+
+cd ..
+java -jar build/sharpen.jar ../java/org/brotli/dec/ -cp build/junit-4.12.jar @sharpen.cfg
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mPatching.\033[00m'
+
+# TODO: detect "dead" files, that are not generated by sharpen anymore.
+cp -r build/generated/* ./
+
+# Reflection does not work without Sharpen.cs
+rm org/brotli/dec/EnumTest.cs
+
+PATTERN='\/\/ \<\{\[INJECTED CODE\]\}\>'
+CODE=$(<org/brotli/dec/BrotliInputStream.cs)
+REPLACEMENT=$(<injected_code.txt)
+echo "${CODE//$PATTERN/$REPLACEMENT}" > org/brotli/dec/BrotliInputStream.cs
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mDowloading dependencies.\033[00m'
+
+cd build
+nuget install NUnit -Version 3.6.1
+nuget install NUnit.ConsoleRunner -Version 3.6.1
+cd ..
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mCompiling generated code.\033[00m'
+
+SOURCES=`find org/brotli -type file ! -path "*Test.cs"`
+TESTS_SOURCES=`find org/brotli -type file -path "*Test.cs"`
+
+mcs $SOURCES -target:library -out:build/brotlidec.dll
+mcs $SOURCES $TESTS_SOURCES -target:library -out:build/brotlidec_test.dll -r:build/NUnit.3.6.1/lib/net45/nunit.framework.dll
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mRunning tests.\033[00m'
+
+export MONO_PATH=$MONO_PATH:`pwd`/build/NUnit.3.6.1/lib/net45
+mono --debug build/NUnit.ConsoleRunner.3.6.1/tools/nunit3-console.exe build/brotlidec_test.dll
+
+#-------------------------------------------------------------------------------
+
+echo -e '\n\033[01;33mCleanup.\033[00m'
+rm TestResult.xml
+rm -rf build
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf
new file mode 100644
index 0000000000..040f179e2b
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf differ
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.1 b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.1
new file mode 100644
index 0000000000..7ca135572c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.1
@@ -0,0 +1,129 @@
+.\" Automatically generated by Pandoc 2.7.3
+.\"
+.TH "brotli" "1" "August 14 2021" "brotli 1.0.9" "User Manual"
+.hy
+.SH NAME
+.PP
+brotli(1) -- brotli, unbrotli - compress or decompress files
+.SH SYNOPSIS
+.PP
+\f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&...
+.PP
+\f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R]
+.SH DESCRIPTION
+.PP
+\f[B]brotli\f[R] is a generic-purpose lossless compression algorithm
+that compresses data using a combination of a modern variant of the
+\f[B]LZ77\f[R] algorithm, Huffman coding and 2-nd order context
+modeling, with a compression ratio comparable to the best currently
+available general-purpose compression methods.
+It is similar in speed with deflate but offers more dense compression.
+.PP
+\f[B]brotli\f[R] command line syntax similar to \f[B]gzip (1)\f[R] and
+\f[B]zstd (1)\f[R].
+Unlike \f[B]gzip (1)\f[R], source files are preserved by default.
+It is possible to remove them after processing by using the
+\f[B]--rm\f[R] \f[I]option\f[R].
+.PP
+Arguments that look like \[lq]\f[B]--name\f[R]\[rq] or
+\[lq]\f[B]--name=value\f[R]\[rq] are \f[I]options\f[R].
+Every \f[I]option\f[R] has a short form \[lq]\f[B]-x\f[R]\[rq] or
+\[lq]\f[B]-x value\f[R]\[rq].
+Multiple short form \f[I]options\f[R] could be coalesced:
+.IP \[bu] 2
+\[lq]\f[B]--decompress --stdout --suffix=.b\f[R]\[rq] works the same as
+.IP \[bu] 2
+\[lq]\f[B]-d -s -S .b\f[R]\[rq] and
+.IP \[bu] 2
+\[lq]\f[B]-dsS .b\f[R]\[rq]
+.PP
+\f[B]brotli\f[R] has 3 operation modes:
+.IP \[bu] 2
+default mode is compression;
+.IP \[bu] 2
+\f[B]--decompress\f[R] option activates decompression mode;
+.IP \[bu] 2
+\f[B]--test\f[R] option switches to integrity test mode; this option is
+equivalent to \[lq]\f[B]--decompress --stdout\f[R]\[rq] except that the
+decompressed data is discarded instead of being written to standard
+output.
+.PP
+Every non-option argument is a \f[I]file\f[R] entry.
+If no \f[I]files\f[R] are given or \f[I]file\f[R] is
+\[lq]\f[B]-\f[R]\[rq], \f[B]brotli\f[R] reads from standard input.
+All arguments after \[lq]\f[B]--\f[R]\[rq] are \f[I]file\f[R] entries.
+.PP
+Unless \f[B]--stdout\f[R] or \f[B]--output\f[R] is specified,
+\f[I]files\f[R] are written to a new file whose name is derived from the
+source \f[I]file\f[R] name:
+.IP \[bu] 2
+when compressing, a suffix is appended to the source filename to get the
+target filename
+.IP \[bu] 2
+when decompressing, a suffix is removed from the source filename to get
+the target filename
+.PP
+Default suffix is \f[B].br\f[R], but it could be specified with
+\f[B]--suffix\f[R] option.
+.PP
+Conflicting or duplicate \f[I]options\f[R] are not allowed.
+.SH OPTIONS
+.IP \[bu] 2
+\f[B]-#\f[R]: compression level (0-9); bigger values cause denser, but
+slower compression
+.IP \[bu] 2
+\f[B]-c\f[R], \f[B]--stdout\f[R]: write on standard output
+.IP \[bu] 2
+\f[B]-d\f[R], \f[B]--decompress\f[R]: decompress mode
+.IP \[bu] 2
+\f[B]-f\f[R], \f[B]--force\f[R]: force output file overwrite
+.IP \[bu] 2
+\f[B]-h\f[R], \f[B]--help\f[R]: display this help and exit
+.IP \[bu] 2
+\f[B]-j\f[R], \f[B]--rm\f[R]: remove source file(s); \f[B]gzip
+(1)\f[R]-like behaviour
+.IP \[bu] 2
+\f[B]-k\f[R], \f[B]--keep\f[R]: keep source file(s); \f[B]zstd
+(1)\f[R]-like behaviour
+.IP \[bu] 2
+\f[B]-n\f[R], \f[B]--no-copy-stat\f[R]: do not copy source file(s)
+attributes
+.IP \[bu] 2
+\f[B]-o FILE\f[R], \f[B]--output=FILE\f[R] output file; valid only if
+there is a single input entry
+.IP \[bu] 2
+\f[B]-q NUM\f[R], \f[B]--quality=NUM\f[R]: compression level (0-11);
+bigger values cause denser, but slower compression
+.IP \[bu] 2
+\f[B]-t\f[R], \f[B]--test\f[R]: test file integrity mode
+.IP \[bu] 2
+\f[B]-v\f[R], \f[B]--verbose\f[R]: increase output verbosity
+.IP \[bu] 2
+\f[B]-w NUM\f[R], \f[B]--lgwin=NUM\f[R]: set LZ77 window size (0, 10-24)
+(default: 24); window size is \f[B](pow(2, NUM) - 16)\f[R]; 0 lets
+compressor decide over the optimal value; bigger windows size improve
+density; decoder might require up to window size memory to operate
+.IP \[bu] 2
+\f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77)
+dictionary; same dictionary MUST be used both for compression and
+decompression
+.IP \[bu] 2
+\f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default:
+\f[B].br\f[R])
+.IP \[bu] 2
+\f[B]-V\f[R], \f[B]--version\f[R]: display version and exit
+.IP \[bu] 2
+\f[B]-Z\f[R], \f[B]--best\f[R]: use best compression level (default);
+same as \[lq]\f[B]-q 11\f[R]\[rq]
+.SH SEE ALSO
+.PP
+\f[B]brotli\f[R] file format is defined in RFC
+7932 (https://www.ietf.org/rfc/rfc7932.txt).
+.PP
+\f[B]brotli\f[R] is open-sourced under the MIT
+License (https://opensource.org/licenses/MIT).
+.PP
+Mailing list: https://groups.google.com/forum/#!forum/brotli
+.SH BUGS
+.PP
+Report bugs at: https://github.com/google/brotli/issues
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.svg b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.svg
new file mode 100644
index 0000000000..3732d96c99
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/brotli.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="#fff" viewBox="0 0 512 512"><g transform="rotate(15 -844 1100)scale(24)"><circle fill="#ea3" r="10"/><ellipse rx="2" ry="8"/><ellipse rx="1" ry="4" cx="5"/><ellipse rx="1" ry="4" cx="-5"/></g></svg>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/constants.h.3 b/third-party/libjxl/libjxl/third_party/brotli/docs/constants.h.3
new file mode 100644
index 0000000000..1b29f28c98
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/constants.h.3
@@ -0,0 +1,47 @@
+.TH "constants.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+constants.h \- Common constants used in decoder and encoder API\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_LARGE_MAX_DISTANCE_BITS\fP   62U"
+.br
+.RI "\fIThe theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_LARGE_MAX_WBITS\fP   30"
+.br
+.RI "\fIThe maximum supported large brotli window bits by the encoder and decoder\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_NPOSTFIX\fP   3"
+.br
+.RI "\fIMaximal number of 'postfix' bits\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+Common constants used in decoder and encoder API\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_LARGE_MAX_DISTANCE_BITS   62U"
+
+.PP
+The theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. Even when in practice 32-bit encoders and decoders only support up to 30 max distance bits, the value is set to 62 because it affects the large window brotli file format\&. Specifically, it affects the encoding of simple huffman tree for distances, see Specification RFC 7932 chapter 3\&.4\&. 
+.SS "#define BROTLI_LARGE_MAX_WBITS   30"
+
+.PP
+The maximum supported large brotli window bits by the encoder and decoder\&. Large window brotli allows up to 62 bits, however the current encoder and decoder, designed for 32-bit integers, only support up to 30 bits maximum\&. 
+.SS "#define BROTLI_MAX_NPOSTFIX   3"
+
+.PP
+Maximal number of 'postfix' bits\&. Number of 'postfix' bits is stored as 2 bits in meta-block header\&. 
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/decode.h.3 b/third-party/libjxl/libjxl/third_party/brotli/docs/decode.h.3
new file mode 100644
index 0000000000..3d4e538a0e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/decode.h.3
@@ -0,0 +1,448 @@
+.TH "decode.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+decode.h \- API for Brotli decompression\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_DECODER_ERROR_CODES_LIST\fP(BROTLI_ERROR_CODE,  SEPARATOR)        "
+.br
+.RI "\fITemplate that evaluates items of \fBBrotliDecoderErrorCode\fP\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_LAST_ERROR_CODE\fP   BROTLI_DECODER_ERROR_UNREACHABLE"
+.br
+.RI "\fIThe value of the last error code, negative integer\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef enum \fBBrotliDecoderParameter\fP \fBBrotliDecoderParameter\fP"
+.br
+.RI "\fIOptions to be used with \fBBrotliDecoderSetParameter\fP\&. \fP"
+.ti -1c
+.RI "typedef struct BrotliDecoderStateStruct \fBBrotliDecoderState\fP"
+.br
+.RI "\fIOpaque structure that holds decoder state\&. \fP"
+.in -1c
+.SS "Enumerations"
+.SS "Functions"
+
+.in +1c
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderAttachDictionary\fP (\fBBrotliDecoderState\fP *state, BrotliSharedDictionaryType type, size_t data_size, const uint8_t data[data_size])"
+.br
+.RI "\fIAdds LZ77 prefix dictionary, adds or replaces built-in static dictionary and transforms\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderState\fP * \fBBrotliDecoderCreateInstance\fP (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void *opaque)"
+.br
+.RI "\fICreates an instance of \fBBrotliDecoderState\fP and initializes it\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderResult\fP \fBBrotliDecoderDecompress\fP (size_t encoded_size, const uint8_t encoded_buffer[encoded_size], size_t *decoded_size, uint8_t decoded_buffer[*decoded_size])"
+.br
+.RI "\fIPerforms one-shot memory-to-memory decompression\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderResult\fP \fBBrotliDecoderDecompressStream\fP (\fBBrotliDecoderState\fP *state, size_t *available_in, const uint8_t **next_in, size_t *available_out, uint8_t **next_out, size_t *total_out)"
+.br
+.RI "\fIDecompresses the input stream to the output stream\&. \fP"
+.ti -1c
+.RI "void \fBBrotliDecoderDestroyInstance\fP (\fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIDeinitializes and frees \fBBrotliDecoderState\fP instance\&. \fP"
+.ti -1c
+.RI "const char * \fBBrotliDecoderErrorString\fP (\fBBrotliDecoderErrorCode\fP c)"
+.br
+.RI "\fIConverts error code to a c-string\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderErrorCode\fP \fBBrotliDecoderGetErrorCode\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIAcquires a detailed error code\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderHasMoreOutput\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if decoder has more output\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderIsFinished\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if decoder instance reached the final state\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderIsUsed\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if instance has already consumed input\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderSetParameter\fP (\fBBrotliDecoderState\fP *state, \fBBrotliDecoderParameter\fP param, uint32_t value)"
+.br
+.RI "\fISets the specified parameter to the given decoder instance\&. \fP"
+.ti -1c
+.RI "const uint8_t * \fBBrotliDecoderTakeOutput\fP (\fBBrotliDecoderState\fP *state, size_t *size)"
+.br
+.RI "\fIAcquires pointer to internal output buffer\&. \fP"
+.ti -1c
+.RI "uint32_t \fBBrotliDecoderVersion\fP (void)"
+.br
+.RI "\fIGets a decoder library version\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+API for Brotli decompression\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE, SEPARATOR)"
+
+.PP
+Template that evaluates items of \fBBrotliDecoderErrorCode\fP\&. Example:
+.PP
+.nf
+// Log Brotli error code\&.
+switch (brotliDecoderErrorCode) {
+#define CASE_(PREFIX, NAME, CODE) \
+  case BROTLI_DECODER ## PREFIX ## NAME: \
+    LOG(INFO) << "error code:" << #NAME; \
+    break;
+#define NEWLINE_
+BROTLI_DECODER_ERROR_CODES_LIST(CASE_, NEWLINE_)
+#undef CASE_
+#undef NEWLINE_
+  default: LOG(FATAL) << "unknown brotli error code";
+}
+
+.fi
+.PP
+ 
+.SS "#define BROTLI_LAST_ERROR_CODE   BROTLI_DECODER_ERROR_UNREACHABLE"
+
+.PP
+The value of the last error code, negative integer\&. All other error code values are in the range from \fBBROTLI_LAST_ERROR_CODE\fP to \fC-1\fP\&. There are also 4 other possible non-error codes \fC0\fP \&.\&. \fC3\fP in \fBBrotliDecoderErrorCode\fP enumeration\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef enum \fBBrotliDecoderParameter\fP  \fBBrotliDecoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliDecoderSetParameter\fP\&. 
+.SS "typedef struct BrotliDecoderStateStruct \fBBrotliDecoderState\fP"
+
+.PP
+Opaque structure that holds decoder state\&. Allocated and initialized with \fBBrotliDecoderCreateInstance\fP\&. Cleaned up and deallocated with \fBBrotliDecoderDestroyInstance\fP\&. 
+.SH "Enumeration Type Documentation"
+.PP 
+.SS "enum \fBBrotliDecoderErrorCode\fP"
+
+.PP
+Error code for detailed logging / production debugging\&. See \fBBrotliDecoderGetErrorCode\fP and \fBBROTLI_LAST_ERROR_CODE\fP\&. 
+.SS "enum \fBBrotliDecoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliDecoderSetParameter\fP\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION \fP\fP
+Disable 'canny' ring buffer allocation strategy\&. Ring buffer is allocated according to window size, despite the real size of the content\&. 
+.TP
+\fB\fIBROTLI_DECODER_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&. 
+.SS "enum \fBBrotliDecoderResult\fP"
+
+.PP
+Result type for \fBBrotliDecoderDecompress\fP and \fBBrotliDecoderDecompressStream\fP functions\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_DECODER_RESULT_ERROR \fP\fP
+Decoding error, e\&.g\&. corrupted input or memory allocation problem\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_SUCCESS \fP\fP
+Decoding successfully completed\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT \fP\fP
+Partially done; should be called again with more input\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT \fP\fP
+Partially done; should be called again with more output\&. 
+.SH "Function Documentation"
+.PP 
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderAttachDictionary (\fBBrotliDecoderState\fP * state, BrotliSharedDictionaryType type, size_t data_size, const uint8_t data[data_size])"
+
+.PP
+Adds LZ77 prefix dictionary, adds or replaces built-in static dictionary and transforms\&. Attached dictionary ownership is not transferred\&. Data provided to this method should be kept accessible until decoding is finished and decoder instance is destroyed\&.
+.PP
+\fBNote:\fP
+.RS 4
+Dictionaries can NOT be attached after actual decoding is started\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fItype\fP dictionary data format 
+.br
+\fIdata_size\fP length of memory region pointed by \fCdata\fP 
+.br
+\fIdata\fP dictionary data in format corresponding to \fCtype\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if dictionary is corrupted, or dictionary count limit is reached 
+.PP
+\fBBROTLI_TRUE\fP if dictionary is accepted / attached 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderState\fP* BrotliDecoderCreateInstance (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void * opaque)"
+
+.PP
+Creates an instance of \fBBrotliDecoderState\fP and initializes it\&. The instance can be used once for decoding and should then be destroyed with \fBBrotliDecoderDestroyInstance\fP, it cannot be reused for a new decoding session\&.
+.PP
+\fCalloc_func\fP and \fCfree_func\fP \fBMUST\fP be both zero or both non-zero\&. In the case they are both zero, default memory allocators are used\&. \fCopaque\fP is passed to \fCalloc_func\fP and \fCfree_func\fP when they are called\&. \fCfree_func\fP has to return without doing anything when asked to free a NULL pointer\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIalloc_func\fP custom memory allocation function 
+.br
+\fIfree_func\fP custom memory free function 
+.br
+\fIopaque\fP custom memory manager handle 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if instance can not be allocated or initialized 
+.PP
+pointer to initialized \fBBrotliDecoderState\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderResult\fP BrotliDecoderDecompress (size_t encoded_size, const uint8_t encoded_buffer[encoded_size], size_t * decoded_size, uint8_t decoded_buffer[*decoded_size])"
+
+.PP
+Performs one-shot memory-to-memory decompression\&. Decompresses the data in \fCencoded_buffer\fP into \fCdecoded_buffer\fP, and sets \fC*decoded_size\fP to the decompressed length\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIencoded_size\fP size of \fCencoded_buffer\fP 
+.br
+\fIencoded_buffer\fP compressed data buffer with at least \fCencoded_size\fP addressable bytes 
+.br
+\fIdecoded_size\fP \fBin:\fP size of \fCdecoded_buffer\fP; 
+.br
+ \fBout:\fP length of decompressed data written to \fCdecoded_buffer\fP 
+.br
+\fIdecoded_buffer\fP decompressed data destination buffer 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_DECODER_RESULT_ERROR\fP if input is corrupted, memory allocation failed, or \fCdecoded_buffer\fP is not large enough; 
+.PP
+\fBBROTLI_DECODER_RESULT_SUCCESS\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderResult\fP BrotliDecoderDecompressStream (\fBBrotliDecoderState\fP * state, size_t * available_in, const uint8_t ** next_in, size_t * available_out, uint8_t ** next_out, size_t * total_out)"
+
+.PP
+Decompresses the input stream to the output stream\&. The values \fC*available_in\fP and \fC*available_out\fP must specify the number of bytes addressable at \fC*next_in\fP and \fC*next_out\fP respectively\&. When \fC*available_out\fP is \fC0\fP, \fCnext_out\fP is allowed to be \fCNULL\fP\&.
+.PP
+After each call, \fC*available_in\fP will be decremented by the amount of input bytes consumed, and the \fC*next_in\fP pointer will be incremented by that amount\&. Similarly, \fC*available_out\fP will be decremented by the amount of output bytes written, and the \fC*next_out\fP pointer will be incremented by that amount\&.
+.PP
+\fCtotal_out\fP, if it is not a null-pointer, will be set to the number of bytes decompressed since the last \fCstate\fP initialization\&.
+.PP
+\fBNote:\fP
+.RS 4
+Input is never overconsumed, so \fCnext_in\fP and \fCavailable_in\fP could be passed to the next consumer after decoding is complete\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIavailable_in\fP \fBin:\fP amount of available input; 
+.br
+ \fBout:\fP amount of unused input 
+.br
+\fInext_in\fP pointer to the next compressed byte 
+.br
+\fIavailable_out\fP \fBin:\fP length of output buffer; 
+.br
+ \fBout:\fP remaining size of output buffer 
+.br
+\fInext_out\fP output buffer cursor; can be \fCNULL\fP if \fCavailable_out\fP is \fC0\fP 
+.br
+\fItotal_out\fP number of bytes decompressed so far; can be \fCNULL\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_DECODER_RESULT_ERROR\fP if input is corrupted, memory allocation failed, arguments were invalid, etc\&.; use \fBBrotliDecoderGetErrorCode\fP to get detailed error code 
+.PP
+\fBBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT\fP decoding is blocked until more input data is provided 
+.PP
+\fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP decoding is blocked until more output space is provided 
+.PP
+\fBBROTLI_DECODER_RESULT_SUCCESS\fP decoding is finished, no more input might be consumed and no more output will be produced 
+.RE
+.PP
+
+.SS "void BrotliDecoderDestroyInstance (\fBBrotliDecoderState\fP * state)"
+
+.PP
+Deinitializes and frees \fBBrotliDecoderState\fP instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance to be cleaned up and deallocated 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderErrorCode\fP BrotliDecoderGetErrorCode (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Acquires a detailed error code\&. Should be used only after \fBBrotliDecoderDecompressStream\fP returns \fBBROTLI_DECODER_RESULT_ERROR\fP\&.
+.PP
+See also \fBBrotliDecoderErrorString\fP
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+last saved error code 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderHasMoreOutput (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if decoder has more output\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP, if decoder has some unconsumed output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderIsFinished (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if decoder instance reached the final state\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if decoder is in a state where it reached the end of the input and produced all of the output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderIsUsed (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if instance has already consumed input\&. Instance that returns \fBBROTLI_FALSE\fP is considered 'fresh' and could be reused\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if decoder has already used some input bytes 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderSetParameter (\fBBrotliDecoderState\fP * state, \fBBrotliDecoderParameter\fP param, uint32_t value)"
+
+.PP
+Sets the specified parameter to the given decoder instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIparam\fP parameter to set 
+.br
+\fIvalue\fP new parameter value 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if parameter is unrecognized, or value is invalid 
+.PP
+\fBBROTLI_TRUE\fP if value is accepted 
+.RE
+.PP
+
+.SS "const uint8_t* BrotliDecoderTakeOutput (\fBBrotliDecoderState\fP * state, size_t * size)"
+
+.PP
+Acquires pointer to internal output buffer\&. This method is used to make language bindings easier and more efficient:
+.IP "1." 4
+push data to \fBBrotliDecoderDecompressStream\fP, until \fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP is reported
+.IP "2." 4
+use \fBBrotliDecoderTakeOutput\fP to peek bytes and copy to language-specific entity
+.PP
+.PP
+Also this could be useful if there is an output stream that is able to consume all the provided data (e\&.g\&. when data is saved to file system)\&.
+.PP
+\fBAttention:\fP
+.RS 4
+After every call to \fBBrotliDecoderTakeOutput\fP \fC*size\fP bytes of output are considered consumed for all consecutive calls to the instance methods; returned pointer becomes invalidated as well\&.
+.RE
+.PP
+\fBNote:\fP
+.RS 4
+Decoder output is not guaranteed to be contiguous\&. This means that after the size-unrestricted call to \fBBrotliDecoderTakeOutput\fP, immediate next call to \fBBrotliDecoderTakeOutput\fP may return more data\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIsize\fP \fBin:\fP number of bytes caller is ready to take, \fC0\fP if any amount could be handled; 
+.br
+ \fBout:\fP amount of data pointed by returned pointer and considered consumed; 
+.br
+ out value is never greater than in value, unless it is \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+pointer to output data 
+.RE
+.PP
+
+.SS "uint32_t BrotliDecoderVersion (void)"
+
+.PP
+Gets a decoder library version\&. Look at BROTLI_VERSION for more information\&. 
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/encode.h.3 b/third-party/libjxl/libjxl/third_party/brotli/docs/encode.h.3
new file mode 100644
index 0000000000..f3430fa9ed
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/encode.h.3
@@ -0,0 +1,630 @@
+.TH "encode.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+encode.h \- API for Brotli compression\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_MODE\fP   \fBBROTLI_MODE_GENERIC\fP"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_MODE\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_QUALITY\fP   11"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_WINDOW\fP   22"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_LARGE_MAX_WINDOW_BITS\fP   30"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter in 'Large Window Brotli' (32-bit)\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP   24"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_QUALITY\fP   11"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_WINDOW_BITS\fP   24"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_INPUT_BLOCK_BITS\fP   16"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_QUALITY\fP   0"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_WINDOW_BITS\fP   10"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderMode\fP \fBBrotliEncoderMode\fP"
+.br
+.RI "\fIOptions for \fBBROTLI_PARAM_MODE\fP parameter\&. \fP"
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderOperation\fP \fBBrotliEncoderOperation\fP"
+.br
+.RI "\fIOperations that can be performed by streaming encoder\&. \fP"
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderParameter\fP \fBBrotliEncoderParameter\fP"
+.br
+.RI "\fIOptions to be used with \fBBrotliEncoderSetParameter\fP\&. \fP"
+.ti -1c
+.RI "typedef struct BrotliEncoderStateStruct \fBBrotliEncoderState\fP"
+.br
+.RI "\fIOpaque structure that holds encoder state\&. \fP"
+.in -1c
+.SS "Enumerations"
+.SS "Functions"
+
+.in +1c
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderAttachPreparedDictionary\fP (\fBBrotliEncoderState\fP *state, const BrotliEncoderPreparedDictionary *dictionary)"
+.br
+.RI "\fIAttaches a prepared dictionary of any type to the encoder\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderCompress\fP (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t *encoded_size, uint8_t encoded_buffer[*encoded_size])"
+.br
+.RI "\fIPerforms one-shot memory-to-memory compression\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderCompressStream\fP (\fBBrotliEncoderState\fP *state, \fBBrotliEncoderOperation\fP op, size_t *available_in, const uint8_t **next_in, size_t *available_out, uint8_t **next_out, size_t *total_out)"
+.br
+.RI "\fICompresses input stream to output stream\&. \fP"
+.ti -1c
+.RI "\fBBrotliEncoderState\fP * \fBBrotliEncoderCreateInstance\fP (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void *opaque)"
+.br
+.RI "\fICreates an instance of \fBBrotliEncoderState\fP and initializes it\&. \fP"
+.ti -1c
+.RI "void \fBBrotliEncoderDestroyInstance\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIDeinitializes and frees \fBBrotliEncoderState\fP instance\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderHasMoreOutput\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIChecks if encoder has more output\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderIsFinished\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIChecks if encoder instance reached the final state\&. \fP"
+.ti -1c
+.RI "size_t \fBBrotliEncoderMaxCompressedSize\fP (size_t input_size)"
+.br
+.RI "\fICalculates the output size bound for the given \fCinput_size\fP\&. \fP"
+.ti -1c
+.RI "BrotliEncoderPreparedDictionary * \fBBrotliEncoderPrepareDictionary\fP (BrotliSharedDictionaryType type, size_t data_size, const uint8_t data[data_size], int quality, \fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void *opaque)"
+.br
+.RI "\fIPrepares a shared dictionary from the given file format for the encoder\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderSetParameter\fP (\fBBrotliEncoderState\fP *state, \fBBrotliEncoderParameter\fP param, uint32_t value)"
+.br
+.RI "\fISets the specified parameter to the given encoder instance\&. \fP"
+.ti -1c
+.RI "const uint8_t * \fBBrotliEncoderTakeOutput\fP (\fBBrotliEncoderState\fP *state, size_t *size)"
+.br
+.RI "\fIAcquires pointer to internal output buffer\&. \fP"
+.ti -1c
+.RI "uint32_t \fBBrotliEncoderVersion\fP (void)"
+.br
+.RI "\fIGets an encoder library version\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+API for Brotli compression\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_DEFAULT_MODE   \fBBROTLI_MODE_GENERIC\fP"
+
+.PP
+Default value for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.SS "#define BROTLI_DEFAULT_QUALITY   11"
+
+.PP
+Default value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_DEFAULT_WINDOW   22"
+
+.PP
+Default value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.SS "#define BROTLI_MAX_INPUT_BLOCK_BITS   24"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. 
+.SS "#define BROTLI_MAX_QUALITY   11"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_MAX_WINDOW_BITS   24"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.PP
+\fBNote:\fP
+.RS 4
+equal to \fCBROTLI_MAX_DISTANCE_BITS\fP constant\&. 
+.RE
+.PP
+
+.SS "#define BROTLI_MIN_INPUT_BLOCK_BITS   16"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. 
+.SS "#define BROTLI_MIN_QUALITY   0"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_MIN_WINDOW_BITS   10"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef enum \fBBrotliEncoderMode\fP  \fBBrotliEncoderMode\fP"
+
+.PP
+Options for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.SS "typedef enum \fBBrotliEncoderOperation\fP  \fBBrotliEncoderOperation\fP"
+
+.PP
+Operations that can be performed by streaming encoder\&. 
+.SS "typedef enum \fBBrotliEncoderParameter\fP  \fBBrotliEncoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliEncoderSetParameter\fP\&. 
+.SS "typedef struct BrotliEncoderStateStruct \fBBrotliEncoderState\fP"
+
+.PP
+Opaque structure that holds encoder state\&. Allocated and initialized with \fBBrotliEncoderCreateInstance\fP\&. Cleaned up and deallocated with \fBBrotliEncoderDestroyInstance\fP\&. 
+.SH "Enumeration Type Documentation"
+.PP 
+.SS "enum \fBBrotliEncoderMode\fP"
+
+.PP
+Options for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_MODE_GENERIC \fP\fP
+Default compression mode\&. In this mode compressor does not know anything in advance about the properties of the input\&. 
+.TP
+\fB\fIBROTLI_MODE_TEXT \fP\fP
+Compression mode for UTF-8 formatted text input\&. 
+.TP
+\fB\fIBROTLI_MODE_FONT \fP\fP
+Compression mode used in WOFF 2\&.0\&. 
+.SS "enum \fBBrotliEncoderOperation\fP"
+
+.PP
+Operations that can be performed by streaming encoder\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_OPERATION_PROCESS \fP\fP
+Process input\&. Encoder may postpone producing output, until it has processed enough input\&. 
+.TP
+\fB\fIBROTLI_OPERATION_FLUSH \fP\fP
+Produce output for all processed input\&. Actual flush is performed when input stream is depleted and there is enough space in output stream\&. This means that client should repeat \fBBROTLI_OPERATION_FLUSH\fP operation until \fCavailable_in\fP becomes \fC0\fP, and \fBBrotliEncoderHasMoreOutput\fP returns \fBBROTLI_FALSE\fP\&. If output is acquired via \fBBrotliEncoderTakeOutput\fP, then operation should be repeated after output buffer is drained\&.
+.PP
+\fBWarning:\fP
+.RS 4
+Until flush is complete, client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.RE
+.PP
+When flush is complete, output data will be sufficient for decoder to reproduce all the given input\&. 
+.TP
+\fB\fIBROTLI_OPERATION_FINISH \fP\fP
+Finalize the stream\&. Actual finalization is performed when input stream is depleted and there is enough space in output stream\&. This means that client should repeat \fBBROTLI_OPERATION_FINISH\fP operation until \fCavailable_in\fP becomes \fC0\fP, and \fBBrotliEncoderHasMoreOutput\fP returns \fBBROTLI_FALSE\fP\&. If output is acquired via \fBBrotliEncoderTakeOutput\fP, then operation should be repeated after output buffer is drained\&.
+.PP
+\fBWarning:\fP
+.RS 4
+Until finalization is complete, client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.RE
+.PP
+Helper function \fBBrotliEncoderIsFinished\fP checks if stream is finalized and output fully dumped\&.
+.PP
+Adding more input data to finalized stream is impossible\&. 
+.TP
+\fB\fIBROTLI_OPERATION_EMIT_METADATA \fP\fP
+Emit metadata block to stream\&. Metadata is opaque to Brotli: neither encoder, nor decoder processes this data or relies on it\&. It may be used to pass some extra information from encoder client to decoder client without interfering with main data stream\&.
+.PP
+\fBNote:\fP
+.RS 4
+Encoder may emit empty metadata blocks internally, to pad encoded stream to byte boundary\&.
+.RE
+.PP
+\fBWarning:\fP
+.RS 4
+Until emitting metadata is complete client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.PP
+The whole content of input buffer is considered to be the content of metadata block\&. Do \fBNOT\fP \fIappend\fP metadata to input stream, before it is depleted with other operations\&.
+.RE
+.PP
+Stream is soft-flushed before metadata block is emitted\&. Metadata block \fBMUST\fP be no longer than than 16MiB\&. 
+.SS "enum \fBBrotliEncoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliEncoderSetParameter\fP\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_PARAM_MODE \fP\fP
+Tune encoder for specific input\&. \fBBrotliEncoderMode\fP enumerates all available values\&. 
+.TP
+\fB\fIBROTLI_PARAM_QUALITY \fP\fP
+The main compression speed-density lever\&. The higher the quality, the slower the compression\&. Range is from \fBBROTLI_MIN_QUALITY\fP to \fBBROTLI_MAX_QUALITY\fP\&. 
+.TP
+\fB\fIBROTLI_PARAM_LGWIN \fP\fP
+Recommended sliding LZ77 window size\&. Encoder may reduce this value, e\&.g\&. if input is much smaller than window size\&.
+.PP
+Window size is \fC(1 << value) - 16\fP\&.
+.PP
+Range is from \fBBROTLI_MIN_WINDOW_BITS\fP to \fBBROTLI_MAX_WINDOW_BITS\fP\&. 
+.TP
+\fB\fIBROTLI_PARAM_LGBLOCK \fP\fP
+Recommended input block size\&. Encoder may reduce this value, e\&.g\&. if input is much smaller than input block size\&.
+.PP
+Range is from \fBBROTLI_MIN_INPUT_BLOCK_BITS\fP to \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP\&.
+.PP
+\fBNote:\fP
+.RS 4
+Bigger input block size allows better compression, but consumes more memory\&. 
+.br
+ The rough formula of memory used for temporary input storage is \fC3 << lgBlock\fP\&. 
+.RE
+.PP
+
+.TP
+\fB\fIBROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING \fP\fP
+Flag that affects usage of 'literal context modeling' format feature\&. This flag is a 'decoding-speed vs compression ratio' trade-off\&. 
+.TP
+\fB\fIBROTLI_PARAM_SIZE_HINT \fP\fP
+Estimated total input size for all \fBBrotliEncoderCompressStream\fP calls\&. The default value is 0, which means that the total input size is unknown\&. 
+.TP
+\fB\fIBROTLI_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&. 
+.TP
+\fB\fIBROTLI_PARAM_NPOSTFIX \fP\fP
+Recommended number of postfix bits (NPOSTFIX)\&. Encoder may change this value\&.
+.PP
+Range is from 0 to \fBBROTLI_MAX_NPOSTFIX\fP\&. 
+.TP
+\fB\fIBROTLI_PARAM_NDIRECT \fP\fP
+Recommended number of direct distance codes (NDIRECT)\&. Encoder may change this value\&.
+.PP
+Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX)\&. 
+.TP
+\fB\fIBROTLI_PARAM_STREAM_OFFSET \fP\fP
+Number of bytes of input stream already processed by a different instance\&. 
+.PP
+\fBNote:\fP
+.RS 4
+It is important to configure all the encoder instances with same parameters (except this one) in order to allow all the encoded parts obey the same restrictions implied by header\&.
+.RE
+.PP
+If offset is not 0, then stream header is omitted\&. In any case output start is byte aligned, so for proper streams stitching 'predecessor' stream must be flushed\&.
+.PP
+Range is not artificially limited, but all the values greater or equal to maximal window size have the same effect\&. Values greater than 2**30 are not allowed\&. 
+.SH "Function Documentation"
+.PP 
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderAttachPreparedDictionary (\fBBrotliEncoderState\fP * state, const BrotliEncoderPreparedDictionary * dictionary)"
+
+.PP
+Attaches a prepared dictionary of any type to the encoder\&. Can be used multiple times to attach multiple dictionaries\&. The dictionary type was determined by BrotliEncoderPrepareDictionary\&. Multiple raw prefix dictionaries and/or max 1 serialized dictionary with custom words can be attached\&.
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP in case of error 
+.PP
+\fBBROTLI_TRUE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])"
+
+.PP
+Performs one-shot memory-to-memory compression\&. Compresses the data in \fCinput_buffer\fP into \fCencoded_buffer\fP, and sets \fC*encoded_size\fP to the compressed length\&.
+.PP
+\fBNote:\fP
+.RS 4
+If \fBBrotliEncoderMaxCompressedSize\fP(\fCinput_size\fP) returns non-zero value, then output is guaranteed to be no longer than that\&.
+.PP
+If \fClgwin\fP is greater than \fBBROTLI_MAX_WINDOW_BITS\fP then resulting stream might be incompatible with RFC 7932; to decode such streams, decoder should be configured with \fBBROTLI_DECODER_PARAM_LARGE_WINDOW\fP = \fC1\fP 
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIquality\fP quality parameter value, e\&.g\&. \fBBROTLI_DEFAULT_QUALITY\fP 
+.br
+\fIlgwin\fP lgwin parameter value, e\&.g\&. \fBBROTLI_DEFAULT_WINDOW\fP 
+.br
+\fImode\fP mode parameter value, e\&.g\&. \fBBROTLI_DEFAULT_MODE\fP 
+.br
+\fIinput_size\fP size of \fCinput_buffer\fP 
+.br
+\fIinput_buffer\fP input data buffer with at least \fCinput_size\fP addressable bytes 
+.br
+\fIencoded_size\fP \fBin:\fP size of \fCencoded_buffer\fP; 
+.br
+ \fBout:\fP length of compressed data written to \fCencoded_buffer\fP, or \fC0\fP if compression fails 
+.br
+\fIencoded_buffer\fP compressed data destination buffer 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP in case of compression error 
+.PP
+\fBBROTLI_FALSE\fP if output buffer is too small 
+.PP
+\fBBROTLI_TRUE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompressStream (\fBBrotliEncoderState\fP * state, \fBBrotliEncoderOperation\fP op, size_t * available_in, const uint8_t ** next_in, size_t * available_out, uint8_t ** next_out, size_t * total_out)"
+
+.PP
+Compresses input stream to output stream\&. The values \fC*available_in\fP and \fC*available_out\fP must specify the number of bytes addressable at \fC*next_in\fP and \fC*next_out\fP respectively\&. When \fC*available_out\fP is \fC0\fP, \fCnext_out\fP is allowed to be \fCNULL\fP\&.
+.PP
+After each call, \fC*available_in\fP will be decremented by the amount of input bytes consumed, and the \fC*next_in\fP pointer will be incremented by that amount\&. Similarly, \fC*available_out\fP will be decremented by the amount of output bytes written, and the \fC*next_out\fP pointer will be incremented by that amount\&.
+.PP
+\fCtotal_out\fP, if it is not a null-pointer, will be set to the number of bytes compressed since the last \fCstate\fP initialization\&.
+.PP
+Internally workflow consists of 3 tasks:
+.IP "1." 4
+(optionally) copy input data to internal buffer
+.IP "2." 4
+actually compress data and (optionally) store it to internal buffer
+.IP "3." 4
+(optionally) copy compressed bytes from internal buffer to output stream
+.PP
+.PP
+Whenever all 3 tasks can't move forward anymore, or error occurs, this method returns the control flow to caller\&.
+.PP
+\fCop\fP is used to perform flush, finish the stream, or inject metadata block\&. See \fBBrotliEncoderOperation\fP for more information\&.
+.PP
+Flushing the stream means forcing encoding of all input passed to encoder and completing the current output block, so it could be fully decoded by stream decoder\&. To perform flush set \fCop\fP to \fBBROTLI_OPERATION_FLUSH\fP\&. Under some circumstances (e\&.g\&. lack of output stream capacity) this operation would require several calls to \fBBrotliEncoderCompressStream\fP\&. The method must be called again until both input stream is depleted and encoder has no more output (see \fBBrotliEncoderHasMoreOutput\fP) after the method is called\&.
+.PP
+Finishing the stream means encoding of all input passed to encoder and adding specific 'final' marks, so stream decoder could determine that stream is complete\&. To perform finish set \fCop\fP to \fBBROTLI_OPERATION_FINISH\fP\&. Under some circumstances (e\&.g\&. lack of output stream capacity) this operation would require several calls to \fBBrotliEncoderCompressStream\fP\&. The method must be called again until both input stream is depleted and encoder has no more output (see \fBBrotliEncoderHasMoreOutput\fP) after the method is called\&.
+.PP
+\fBWarning:\fP
+.RS 4
+When flushing and finishing, \fCop\fP should not change until operation is complete; input stream should not be swapped, reduced or extended as well\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIop\fP requested operation 
+.br
+\fIavailable_in\fP \fBin:\fP amount of available input; 
+.br
+ \fBout:\fP amount of unused input 
+.br
+\fInext_in\fP pointer to the next input byte 
+.br
+\fIavailable_out\fP \fBin:\fP length of output buffer; 
+.br
+ \fBout:\fP remaining size of output buffer 
+.br
+\fInext_out\fP compressed output buffer cursor; can be \fCNULL\fP if \fCavailable_out\fP is \fC0\fP 
+.br
+\fItotal_out\fP number of bytes produced so far; can be \fCNULL\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if there was an error 
+.PP
+\fBBROTLI_TRUE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliEncoderState\fP* BrotliEncoderCreateInstance (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void * opaque)"
+
+.PP
+Creates an instance of \fBBrotliEncoderState\fP and initializes it\&. \fCalloc_func\fP and \fCfree_func\fP \fBMUST\fP be both zero or both non-zero\&. In the case they are both zero, default memory allocators are used\&. \fCopaque\fP is passed to \fCalloc_func\fP and \fCfree_func\fP when they are called\&. \fCfree_func\fP has to return without doing anything when asked to free a NULL pointer\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIalloc_func\fP custom memory allocation function 
+.br
+\fIfree_func\fP custom memory free function 
+.br
+\fIopaque\fP custom memory manager handle 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if instance can not be allocated or initialized 
+.PP
+pointer to initialized \fBBrotliEncoderState\fP otherwise 
+.RE
+.PP
+
+.SS "void BrotliEncoderDestroyInstance (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Deinitializes and frees \fBBrotliEncoderState\fP instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance to be cleaned up and deallocated 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderHasMoreOutput (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Checks if encoder has more output\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP, if encoder has some unconsumed output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderIsFinished (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Checks if encoder instance reached the final state\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if encoder is in a state where it reached the end of the input and produced all of the output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "size_t BrotliEncoderMaxCompressedSize (size_t input_size)"
+
+.PP
+Calculates the output size bound for the given \fCinput_size\fP\&. 
+.PP
+\fBWarning:\fP
+.RS 4
+Result is only valid if quality is at least \fC2\fP and, in case \fBBrotliEncoderCompressStream\fP was used, no flushes (\fBBROTLI_OPERATION_FLUSH\fP) were performed\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIinput_size\fP size of projected input 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if result does not fit \fCsize_t\fP 
+.RE
+.PP
+
+.SS "BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary (BrotliSharedDictionaryType type, size_t data_size, const uint8_t data[data_size], int quality, \fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void * opaque)"
+
+.PP
+Prepares a shared dictionary from the given file format for the encoder\&. \fCalloc_func\fP and \fCfree_func\fP \fBMUST\fP be both zero or both non-zero\&. In the case they are both zero, default memory allocators are used\&. \fCopaque\fP is passed to \fCalloc_func\fP and \fCfree_func\fP when they are called\&. \fCfree_func\fP has to return without doing anything when asked to free a NULL pointer\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fItype\fP type of dictionary stored in data 
+.br
+\fIdata_size\fP size of \fCdata\fP buffer 
+.br
+\fIdata\fP pointer to the dictionary data 
+.br
+\fIquality\fP the maximum Brotli quality to prepare the dictionary for, use BROTLI_MAX_QUALITY by default 
+.br
+\fIalloc_func\fP custom memory allocation function 
+.br
+\fIfree_func\fP custom memory free function 
+.br
+\fIopaque\fP custom memory manager handle 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderSetParameter (\fBBrotliEncoderState\fP * state, \fBBrotliEncoderParameter\fP param, uint32_t value)"
+
+.PP
+Sets the specified parameter to the given encoder instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIparam\fP parameter to set 
+.br
+\fIvalue\fP new parameter value 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if parameter is unrecognized, or value is invalid 
+.PP
+\fBBROTLI_FALSE\fP if value of parameter can not be changed at current encoder state (e\&.g\&. when encoding is started, window size might be already encoded and therefore it is impossible to change it) 
+.PP
+\fBBROTLI_TRUE\fP if value is accepted 
+.RE
+.PP
+\fBWarning:\fP
+.RS 4
+invalid values might be accepted in case they would not break encoding process\&. 
+.RE
+.PP
+
+.SS "const uint8_t* BrotliEncoderTakeOutput (\fBBrotliEncoderState\fP * state, size_t * size)"
+
+.PP
+Acquires pointer to internal output buffer\&. This method is used to make language bindings easier and more efficient:
+.IP "1." 4
+push data to \fBBrotliEncoderCompressStream\fP, until \fBBrotliEncoderHasMoreOutput\fP returns BROTLI_TRUE
+.IP "2." 4
+use \fBBrotliEncoderTakeOutput\fP to peek bytes and copy to language-specific entity
+.PP
+.PP
+Also this could be useful if there is an output stream that is able to consume all the provided data (e\&.g\&. when data is saved to file system)\&.
+.PP
+\fBAttention:\fP
+.RS 4
+After every call to \fBBrotliEncoderTakeOutput\fP \fC*size\fP bytes of output are considered consumed for all consecutive calls to the instance methods; returned pointer becomes invalidated as well\&.
+.RE
+.PP
+\fBNote:\fP
+.RS 4
+Encoder output is not guaranteed to be contiguous\&. This means that after the size-unrestricted call to \fBBrotliEncoderTakeOutput\fP, immediate next call to \fBBrotliEncoderTakeOutput\fP may return more data\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIsize\fP \fBin:\fP number of bytes caller is ready to take, \fC0\fP if any amount could be handled; 
+.br
+ \fBout:\fP amount of data pointed by returned pointer and considered consumed; 
+.br
+ out value is never greater than in value, unless it is \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+pointer to output data 
+.RE
+.PP
+
+.SS "uint32_t BrotliEncoderVersion (void)"
+
+.PP
+Gets an encoder library version\&. Look at BROTLI_VERSION for more information\&. 
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/docs/types.h.3 b/third-party/libjxl/libjxl/third_party/brotli/docs/types.h.3
new file mode 100644
index 0000000000..3d8a98cbc4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/docs/types.h.3
@@ -0,0 +1,117 @@
+.TH "types.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+types.h \- Common types used in decoder and encoder API\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_BOOL\fP   int"
+.br
+.RI "\fIA portable \fCbool\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_FALSE\fP   0"
+.br
+.RI "\fIPortable \fCfalse\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_TRUE\fP   1"
+.br
+.RI "\fIPortable \fCtrue\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBTO_BROTLI_BOOL\fP(X)   (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
+.br
+.RI "\fI\fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef void *(* \fBbrotli_alloc_func\fP) (void *opaque, size_t size)"
+.br
+.RI "\fIAllocating function pointer type\&. \fP"
+.ti -1c
+.RI "typedef void(* \fBbrotli_free_func\fP) (void *opaque, void *address)"
+.br
+.RI "\fIDeallocating function pointer type\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+Common types used in decoder and encoder API\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_BOOL   int"
+
+.PP
+A portable \fCbool\fP replacement\&. \fBBROTLI_BOOL\fP is a 'documentation' type: actually it is \fCint\fP, but in API it denotes a type, whose only values are \fBBROTLI_TRUE\fP and \fBBROTLI_FALSE\fP\&.
+.PP
+\fBBROTLI_BOOL\fP values passed to Brotli should either be \fBBROTLI_TRUE\fP or \fBBROTLI_FALSE\fP, or be a result of \fBTO_BROTLI_BOOL\fP macros\&.
+.PP
+\fBBROTLI_BOOL\fP values returned by Brotli should not be tested for equality with \fCtrue\fP, \fCfalse\fP, \fBBROTLI_TRUE\fP, \fBBROTLI_FALSE\fP, but rather should be evaluated, for example:
+.PP
+.nf
+if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
+    !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
+  bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
+  DoSomething(x);
+}
+
+.fi
+.PP
+ 
+.SS "#define BROTLI_FALSE   0"
+
+.PP
+Portable \fCfalse\fP replacement\&. 
+.SS "#define BROTLI_TRUE   1"
+
+.PP
+Portable \fCtrue\fP replacement\&. 
+.SS "#define TO_BROTLI_BOOL(X)   (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
+
+.PP
+\fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef void*(* brotli_alloc_func) (void *opaque, size_t size)"
+
+.PP
+Allocating function pointer type\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIopaque\fP custom memory manager handle provided by client 
+.br
+\fIsize\fP requested memory region size; can not be \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP in the case of failure 
+.PP
+a valid pointer to a memory region of at least \fCsize\fP bytes long otherwise 
+.RE
+.PP
+
+.SS "typedef void(* brotli_free_func) (void *opaque, void *address)"
+
+.PP
+Deallocating function pointer type\&. This function \fBSHOULD\fP do nothing if \fCaddress\fP is \fC0\fP\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIopaque\fP custom memory manager handle provided by client 
+.br
+\fIaddress\fP memory region pointer returned by \fBbrotli_alloc_func\fP, or \fC0\fP 
+.RE
+.PP
+
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/fetch-spec/shared-brotli-fetch-spec.txt b/third-party/libjxl/libjxl/third_party/brotli/fetch-spec/shared-brotli-fetch-spec.txt
new file mode 100644
index 0000000000..ea2d11712b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/fetch-spec/shared-brotli-fetch-spec.txt
@@ -0,0 +1,116 @@
+[DRAFT]
+
+Introduction:
+
+This document is a draft proposal for Shard Brotli dictionaries in the fetch spec
+(https://fetch.spec.whatwg.org/).
+
+The goal is to add support for custom dictionaries for Brotli. A dictionary is used
+to improve compression. A client can download a dictionary from a server and then
+use it to decompress resources compressed with this dictionary.
+
+This document specifies how the client and server negotiate the dictionary over HTTP.
+A high level overview is as follows: The server adds an HTTP header to the response
+with a URL of the dictionary. The browser downloads the dictionary from the URL and
+then caches it so it can be reused. The server also adds a checksum to an HTTP header
+which the client uses to verify the dictionary. Caching, CORS, and other existing
+mechanisms are used. A dictionary can be a pre-made static dictionary, but does not
+have to be, for example a previous page loaded from this server, or an old version
+of a page, can be used as well.
+
+Below are changes and additions to add Shared Brotli dictionaries to the fetch spec
+at https://fetch.spec.whatwg.org/:
+
+Additions to `4.5. HTTP-network-or-cache fetch`
+
+Add to point `15. Modify httpRequest’s header list per HTTP.`:
+
+  If the recursive-sbr flag is enabled, `Accept-Encoding` may not contain `sbr`
+  [NOTE-BOX] When sbr can be used, it is possible to add a header Available-Dict
+  with the URL and hash code of a cached resource. The server may then use it as
+  shared dictionary.
+
+Additions to `4.6. HTTP-network fetch`
+
+Add after point `10. Run these steps, but abort if the ongoing fetch is terminated`:
+
+  11. Let codings be the result of extracting header list values given
+      `Content-Encoding` and response’s header list.
+  12. If codings contains `sbr`
+    1. If the header list does not contain `Sbr-Dict`, return a network error
+    2. Let dictionaryId be the result of extracting header list values given
+       `Sbr-Dict` and response’s header list.
+
+To point `12. Run these substeps in parallel:`, add new first sub-point:
+
+  1. If codings contains `sbr`, run these subsubsteps:
+      1. Let dictionaryResponse be the result of performing a
+         Shared-Brotli-dictionary fetch given dictionaryId and request.
+      2. If dictionaryResponse is a network error, return a network error.
+
+Change point `12.4. Set bytes to the result of handling content codings given codings and bytes.` to:
+
+  4. Set bytes to the result of handling content codings given codings, bytes
+     and, if codings contains `sbr`, also dictionaryResponse's body.
+     [NOTE-BOX] If the dictionary is still being fetched, which happens in
+     parallel, enqueue bytes in a compressed buffer and handle content coding
+     once the dictionary is fetched
+
+Additions to `2.2.4. Bodies`
+
+
+Change last section `To handle content codings ...` to:
+
+  To handle content codings given codings, bytes and optionally a dictionary, run these substeps:
+    1. If codings are not supported, return bytes.
+    2. If the codings has `sbr`, run these subsubsteps:
+       a. Return the result of decoding bytes and dictionary with the Shared
+          Brotli decoder.
+          [Shared Brotli Spec] [IANA Brotli](https://www.iana.org/assignments/http-parameters/http-parameters.xhtml)
+    3. Else:
+       a. Return the result of decoding bytes with the given codings, as
+          explained in HTTP. [HTTP] [HTTP-SEMANTICS] [HTTP-COND] [HTTP-CACHING]
+          [HTTP-AUTH]
+
+New section `4.10. Shared-Brotli-dictionary fetch`
+
+  To perform a Shared-Brotli-dictionary fetch using dictionaryId, and parentRequest, perform these steps:
+
+  1. Let dictionaryURL be the URL extracted from dictionaryId
+  2. Let dictionaryHash be the hash id extracted from dictionaryId
+  3. Let dictionaryRequest be a new request whose method is `GET`, url is
+     dictionaryURL, mode is "cors", and client is parentRequest's client.
+  4. Let dictionaryResponse be the result of performing an
+     [HTTP-network-or-cache](https://fetch.spec.whatwg.org/#concept-http-network-or-cache-fetch)
+     fetch using dictionaryRequest with the recursive-sbr flag set to true.
+     [NOTE-BOX] For compression benefits, the dictionary should be reused to
+     decode multiple different responses. We rely on caching to achieve this.
+     It is suggested for servers to not add any "no-cache" or short "max-age"
+     Cache-Control directives, and it is suggested for the client to effectively
+     support caching it.
+     [NOTE-BOX] Since the same dictionary can be identified by a hash code, a
+     browser can avoid fetching a dictionary if it already has one with the same
+     hashed cached from a different source URL.
+     [NOTE-BOX] It is suggested that a server does not reuse the same URL
+     to host an updated or different dictionary. Instead the same dictionary URL
+     should contain a dictionary with the same content and same hash.
+  5. If dictionaryResponse is a network error, return a network error.
+  6. If dictionaryResponse's status is not an ok status, return a network error.
+  7. Let tokens be the result of
+     [parsing metadata](https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata)
+     given dictionaryHash.
+     [Subresource Integrity](https://w3c.github.io/webappsec-subresource-integrity/)
+  8. If tokens is no metadata or the length of tokens is not 1, return a network
+     error
+  9. Let algorithm be the alg component of tokens[0]. If alg is 'hw3', set
+     algorithm to 256-bit HighwayHash
+  10. Let digest be the val component of tokens[1].
+  11. Let hashValue be the result of base64 decoding digest
+      [base64](https://tools.ietf.org/html/rfc4648)
+  12. If hashValue is not a valid base64 encoding, return a network error
+      [NOTE-BOX] All of the supported hashing algorithms are cryptographically
+      secure.
+  13. Compute the hash code of dictionaryResponse's body using algorithm and
+      compare this checksum for equality with hashValue. If the computed
+      checksum does not match hashValue, return a network error.
+  14. Return dictionaryResponse.
diff --git a/third-party/libjxl/libjxl/third_party/brotli/pyproject.toml b/third-party/libjxl/libjxl/third_party/brotli/pyproject.toml
new file mode 100644
index 0000000000..fed528d4a7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/Makefile b/third-party/libjxl/libjxl/third_party/brotli/python/Makefile
new file mode 100644
index 0000000000..d5d88f87f3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/Makefile
@@ -0,0 +1,57 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+
+# Default (Build)
+.PHONY: all
+# Build
+.PHONY: build
+# Develop
+.PHONY: develop
+# Install
+.PHONY: install
+# Test
+.PHONY: test
+# Clean
+.PHONY: clean
+# Format
+.PHONY: fix
+
+
+PYTHON ?= python
+YAPF ?= yapf
+
+EXT_SUFFIX=$(shell $(PYTHON) -c 'import sysconfig; print(sysconfig.get_config_var("EXT_SUFFIX"))')
+EXT_SOURCES=$(shell find . -name '*.cc')
+EXTENSIONS=$(EXT_SOURCES:%.cc=%$(EXT_SUFFIX))
+
+
+all: build
+
+build: $(EXTENSIONS)
+
+$(EXTENSIONS): $(EXT_SOURCES)
+	@cd .. && $(PYTHON) setup.py build_ext --inplace
+
+develop:
+	@cd .. && $(PYTHON) setup.py develop
+
+install:
+	@cd .. && $(PYTHON) setup.py install
+
+test: build
+	@echo 'running tests'
+	@$(PYTHON) -m unittest discover -v -p '*_test.py'
+
+clean:
+	@cd .. && $(PYTHON) setup.py clean
+	@find .. -name '*.pyc' | xargs rm -v
+	@find .. -name '*.so' | xargs rm -v
+	@find .. -type d -name '__pycache__' | xargs rm -v -r
+	@find .. -type d -name '*.egg-info' | xargs rm -v -r
+
+fix:
+	@echo 'formatting code'
+	-@$(YAPF) --in-place --recursive --verify .
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/README.md b/third-party/libjxl/libjxl/third_party/brotli/python/README.md
new file mode 100644
index 0000000000..4b6f63f1dd
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/README.md
@@ -0,0 +1,54 @@
+This directory contains the code for the Python `brotli` module,
+`bro.py` tool, and roundtrip tests.
+
+Only Python 2.7+ is supported.
+
+We provide a `Makefile` to simplify common development commands.
+
+### Installation
+
+If you just want to install the latest release of the Python `brotli`
+module, we recommend installing from [PyPI][]:
+
+    $ pip install brotli
+
+Alternatively, you may install directly from source by running the
+following command from this directory:
+
+    $ make install
+
+### Development
+
+You may run the following commands from this directory:
+
+    $ make          # Build the module in-place
+
+    $ make test     # Test the module
+
+    $ make clean    # Remove all temporary files and build output
+
+If you wish to make the module available while still being
+able to edit the source files, you can use the `setuptools`
+"[development mode][]":
+
+    $ make develop  # Install the module in "development mode"
+
+### Code Style
+
+Brotli's code follows the [Google Python Style Guide][].  To
+automatically format your code, first install [YAPF][]:
+
+    $ pip install yapf
+
+Then, to format all files in the project, you can run:
+
+    $ make fix      # Automatically format code
+
+See the [YAPF usage][] documentation for more information.
+
+
+[PyPI]: https://pypi.org/project/Brotli/
+[development mode]: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode
+[Google Python Style Guide]: https://google.github.io/styleguide/pyguide.html
+[YAPF]: https://github.com/google/yapf
+[YAPF usage]: https://github.com/google/yapf#usage
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/_brotli.c b/third-party/libjxl/libjxl/third_party/brotli/python/_brotli.c
new file mode 100644
index 0000000000..c6a0da03d0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/_brotli.c
@@ -0,0 +1,985 @@
+#define PY_SSIZE_T_CLEAN 1
+#include <Python.h>
+#include <bytesobject.h>
+#include <structmember.h>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_Check PyLong_Check
+#define PyInt_AsLong PyLong_AsLong
+#else
+#define Py_ARRAY_LENGTH(array)  (sizeof(array) / sizeof((array)[0]))
+#endif
+
+static PyObject *BrotliError;
+
+/* -----------------------------------
+     BlocksOutputBuffer code
+   ----------------------------------- */
+typedef struct {
+    /* List of blocks */
+    PyObject *list;
+    /* Number of whole allocated size. */
+    Py_ssize_t allocated;
+} BlocksOutputBuffer;
+
+static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
+
+/* Block size sequence */
+#define KB (1024)
+#define MB (1024*1024)
+static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
+    { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB,
+      32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB,
+      256*MB };
+#undef KB
+#undef MB
+
+/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
+   allocated size growth step is:
+    1   32 KB       +32 KB
+    2   96 KB       +64 KB
+    3   352 KB      +256 KB
+    4   1.34 MB     +1 MB
+    5   5.34 MB     +4 MB
+    6   13.34 MB    +8 MB
+    7   29.34 MB    +16 MB
+    8   45.34 MB    +16 MB
+    9   77.34 MB    +32 MB
+    10  109.34 MB   +32 MB
+    11  141.34 MB   +32 MB
+    12  173.34 MB   +32 MB
+    13  237.34 MB   +64 MB
+    14  301.34 MB   +64 MB
+    15  429.34 MB   +128 MB
+    16  557.34 MB   +128 MB
+    17  813.34 MB   +256 MB
+    18  1069.34 MB  +256 MB
+    19  1325.34 MB  +256 MB
+    20  1581.34 MB  +256 MB
+    21  1837.34 MB  +256 MB
+    22  2093.34 MB  +256 MB
+    ...
+*/
+
+/* Initialize the buffer, and grow the buffer.
+   Return 0 on success
+   Return -1 on failure
+*/
+static inline int
+BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer,
+                               size_t *avail_out, uint8_t **next_out)
+{
+    PyObject *b;
+    const Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0];
+
+    // Ensure .list was set to NULL, for BlocksOutputBuffer_OnError().
+    assert(buffer->list == NULL);
+
+    // The first block
+    b = PyBytes_FromStringAndSize(NULL, block_size);
+    if (b == NULL) {
+        return -1;
+    }
+
+    // Create list
+    buffer->list = PyList_New(1);
+    if (buffer->list == NULL) {
+        Py_DECREF(b);
+        return -1;
+    }
+    PyList_SET_ITEM(buffer->list, 0, b);
+
+    // Set variables
+    buffer->allocated = block_size;
+
+    *avail_out = (size_t) block_size;
+    *next_out = (uint8_t*) PyBytes_AS_STRING(b);
+    return 0;
+}
+
+/* Grow the buffer. The avail_out must be 0, please check it before calling.
+   Return 0 on success
+   Return -1 on failure
+*/
+static inline int
+BlocksOutputBuffer_Grow(BlocksOutputBuffer *buffer,
+                        size_t *avail_out, uint8_t **next_out)
+{
+    PyObject *b;
+    const Py_ssize_t list_len = Py_SIZE(buffer->list);
+    Py_ssize_t block_size;
+
+    // Ensure no gaps in the data
+    assert(*avail_out == 0);
+
+    // Get block size
+    if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
+        block_size = BUFFER_BLOCK_SIZE[list_len];
+    } else {
+        block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
+    }
+
+    // Check buffer->allocated overflow
+    if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
+        PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
+        return -1;
+    }
+
+    // Create the block
+    b = PyBytes_FromStringAndSize(NULL, block_size);
+    if (b == NULL) {
+        PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
+        return -1;
+    }
+    if (PyList_Append(buffer->list, b) < 0) {
+        Py_DECREF(b);
+        return -1;
+    }
+    Py_DECREF(b);
+
+    // Set variables
+    buffer->allocated += block_size;
+
+    *avail_out = (size_t) block_size;
+    *next_out = (uint8_t*) PyBytes_AS_STRING(b);
+    return 0;
+}
+
+/* Finish the buffer.
+   Return a bytes object on success
+   Return NULL on failure
+*/
+static inline PyObject *
+BlocksOutputBuffer_Finish(BlocksOutputBuffer *buffer, size_t avail_out)
+{
+    PyObject *result, *block;
+    const Py_ssize_t list_len = Py_SIZE(buffer->list);
+
+    // Fast path for single block
+    if ((list_len == 1 && avail_out == 0) ||
+        (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == (Py_ssize_t) avail_out))
+    {
+        block = PyList_GET_ITEM(buffer->list, 0);
+        Py_INCREF(block);
+
+        Py_CLEAR(buffer->list);
+        return block;
+    }
+
+    // Final bytes object
+    result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
+    if (result == NULL) {
+        PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
+        return NULL;
+    }
+
+    // Memory copy
+    if (list_len > 0) {
+        char *posi = PyBytes_AS_STRING(result);
+
+        // Blocks except the last one
+        Py_ssize_t i = 0;
+        for (; i < list_len-1; i++) {
+            block = PyList_GET_ITEM(buffer->list, i);
+            memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
+            posi += Py_SIZE(block);
+        }
+        // The last block
+        block = PyList_GET_ITEM(buffer->list, i);
+        memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
+    } else {
+        assert(Py_SIZE(result) == 0);
+    }
+
+    Py_CLEAR(buffer->list);
+    return result;
+}
+
+/* Clean up the buffer */
+static inline void
+BlocksOutputBuffer_OnError(BlocksOutputBuffer *buffer)
+{
+    Py_CLEAR(buffer->list);
+}
+
+
+static int as_bounded_int(PyObject *o, int* result, int lower_bound, int upper_bound) {
+  long value = PyInt_AsLong(o);
+  if ((value < (long) lower_bound) || (value > (long) upper_bound)) {
+    return 0;
+  }
+  *result = (int) value;
+  return 1;
+}
+
+static int mode_convertor(PyObject *o, BrotliEncoderMode *mode) {
+  if (!PyInt_Check(o)) {
+    PyErr_SetString(BrotliError, "Invalid mode");
+    return 0;
+  }
+
+  int mode_value = -1;
+  if (!as_bounded_int(o, &mode_value, 0, 255)) {
+    PyErr_SetString(BrotliError, "Invalid mode");
+    return 0;
+  }
+  *mode = (BrotliEncoderMode) mode_value;
+  if (*mode != BROTLI_MODE_GENERIC &&
+      *mode != BROTLI_MODE_TEXT &&
+      *mode != BROTLI_MODE_FONT) {
+    PyErr_SetString(BrotliError, "Invalid mode");
+    return 0;
+  }
+
+  return 1;
+}
+
+static int quality_convertor(PyObject *o, int *quality) {
+  if (!PyInt_Check(o)) {
+    PyErr_SetString(BrotliError, "Invalid quality");
+    return 0;
+  }
+
+  if (!as_bounded_int(o, quality, 0, 11)) {
+    PyErr_SetString(BrotliError, "Invalid quality. Range is 0 to 11.");
+    return 0;
+  }
+
+  return 1;
+}
+
+static int lgwin_convertor(PyObject *o, int *lgwin) {
+  if (!PyInt_Check(o)) {
+    PyErr_SetString(BrotliError, "Invalid lgwin");
+    return 0;
+  }
+
+  if (!as_bounded_int(o, lgwin, 10, 24)) {
+    PyErr_SetString(BrotliError, "Invalid lgwin. Range is 10 to 24.");
+    return 0;
+  }
+
+  return 1;
+}
+
+static int lgblock_convertor(PyObject *o, int *lgblock) {
+  if (!PyInt_Check(o)) {
+    PyErr_SetString(BrotliError, "Invalid lgblock");
+    return 0;
+  }
+
+  if (!as_bounded_int(o, lgblock, 0, 24) || (*lgblock != 0 && *lgblock < 16)) {
+    PyErr_SetString(BrotliError, "Invalid lgblock. Can be 0 or in range 16 to 24.");
+    return 0;
+  }
+
+  return 1;
+}
+
+static PyObject* compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op,
+                                 uint8_t* input, size_t input_length) {
+  BROTLI_BOOL ok;
+
+  size_t available_in = input_length;
+  const uint8_t* next_in = input;
+
+  size_t available_out;
+  uint8_t* next_out;
+  BlocksOutputBuffer buffer = {.list=NULL};
+  PyObject *ret;
+
+  if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
+    goto error;
+  }
+
+  while (1) {
+    Py_BEGIN_ALLOW_THREADS
+    ok = BrotliEncoderCompressStream(enc, op,
+                                     &available_in, &next_in,
+                                     &available_out, &next_out, NULL);
+    Py_END_ALLOW_THREADS
+    if (!ok) {
+      goto error;
+    }
+
+    if (available_in || BrotliEncoderHasMoreOutput(enc)) {
+      if (available_out == 0) {
+        if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
+          goto error;
+        }
+      }
+      continue;
+    }
+
+    break;
+  }
+
+  ret = BlocksOutputBuffer_Finish(&buffer, available_out);
+  if (ret != NULL) {
+    return ret;
+  }
+
+error:
+  BlocksOutputBuffer_OnError(&buffer);
+  return NULL;
+}
+
+PyDoc_STRVAR(brotli_Compressor_doc,
+"An object to compress a byte string.\n"
+"\n"
+"Signature:\n"
+"  Compressor(mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n"
+"\n"
+"Args:\n"
+"  mode (int, optional): The compression mode can be MODE_GENERIC (default),\n"
+"    MODE_TEXT (for UTF-8 format text input) or MODE_FONT (for WOFF 2.0). \n"
+"  quality (int, optional): Controls the compression-speed vs compression-\n"
+"    density tradeoff. The higher the quality, the slower the compression.\n"
+"    Range is 0 to 11. Defaults to 11.\n"
+"  lgwin (int, optional): Base 2 logarithm of the sliding window size. Range\n"
+"    is 10 to 24. Defaults to 22.\n"
+"  lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
+"    Range is 16 to 24. If set to 0, the value will be set based on the\n"
+"    quality. Defaults to 0.\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If arguments are invalid.\n");
+
+typedef struct {
+  PyObject_HEAD
+  BrotliEncoderState* enc;
+} brotli_Compressor;
+
+static void brotli_Compressor_dealloc(brotli_Compressor* self) {
+  BrotliEncoderDestroyInstance(self->enc);
+  #if PY_MAJOR_VERSION >= 3
+  Py_TYPE(self)->tp_free((PyObject*)self);
+  #else
+  self->ob_type->tp_free((PyObject*)self);
+  #endif
+}
+
+static PyObject* brotli_Compressor_new(PyTypeObject *type, PyObject *args, PyObject *keywds) {
+  brotli_Compressor *self;
+  self = (brotli_Compressor *)type->tp_alloc(type, 0);
+
+  if (self != NULL) {
+    self->enc = BrotliEncoderCreateInstance(0, 0, 0);
+  }
+
+  return (PyObject *)self;
+}
+
+static int brotli_Compressor_init(brotli_Compressor *self, PyObject *args, PyObject *keywds) {
+  BrotliEncoderMode mode = (BrotliEncoderMode) -1;
+  int quality = -1;
+  int lgwin = -1;
+  int lgblock = -1;
+  int ok;
+
+  static const char *kwlist[] = {"mode", "quality", "lgwin", "lgblock", NULL};
+
+  ok = PyArg_ParseTupleAndKeywords(args, keywds, "|O&O&O&O&:Compressor",
+                    (char **) kwlist,
+                    &mode_convertor, &mode,
+                    &quality_convertor, &quality,
+                    &lgwin_convertor, &lgwin,
+                    &lgblock_convertor, &lgblock);
+  if (!ok)
+    return -1;
+  if (!self->enc)
+    return -1;
+
+  if ((int) mode != -1)
+    BrotliEncoderSetParameter(self->enc, BROTLI_PARAM_MODE, (uint32_t)mode);
+  if (quality != -1)
+    BrotliEncoderSetParameter(self->enc, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+  if (lgwin != -1)
+    BrotliEncoderSetParameter(self->enc, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+  if (lgblock != -1)
+    BrotliEncoderSetParameter(self->enc, BROTLI_PARAM_LGBLOCK, (uint32_t)lgblock);
+
+  return 0;
+}
+
+PyDoc_STRVAR(brotli_Compressor_process_doc,
+"Process \"string\" for compression, returning a string that contains \n"
+"compressed output data.  This data should be concatenated to the output \n"
+"produced by any preceding calls to the \"process()\" or flush()\" methods. \n"
+"Some or all of the input may be kept in internal buffers for later \n"
+"processing, and the compressed output data may be empty until enough input \n"
+"has been accumulated.\n"
+"\n"
+"Signature:\n"
+"  compress(string)\n"
+"\n"
+"Args:\n"
+"  string (bytes): The input data\n"
+"\n"
+"Returns:\n"
+"  The compressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If compression fails\n");
+
+static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) {
+  PyObject* ret;
+  Py_buffer input;
+  int ok;
+
+#if PY_MAJOR_VERSION >= 3
+  ok = PyArg_ParseTuple(args, "y*:process", &input);
+#else
+  ok = PyArg_ParseTuple(args, "s*:process", &input);
+#endif
+
+  if (!ok) {
+    return NULL;
+  }
+
+  if (!self->enc) {
+    goto error;
+  }
+
+  ret = compress_stream(self->enc, BROTLI_OPERATION_PROCESS,
+                        (uint8_t*) input.buf, input.len);
+  if (ret != NULL) {
+    goto finally;
+  }
+
+error:
+  PyErr_SetString(BrotliError,
+                  "BrotliEncoderCompressStream failed while processing the stream");
+  ret = NULL;
+
+finally:
+  PyBuffer_Release(&input);
+  return ret;
+}
+
+PyDoc_STRVAR(brotli_Compressor_flush_doc,
+"Process all pending input, returning a string containing the remaining\n"
+"compressed data. This data should be concatenated to the output produced by\n"
+"any preceding calls to the \"process()\" or \"flush()\" methods.\n"
+"\n"
+"Signature:\n"
+"  flush()\n"
+"\n"
+"Returns:\n"
+"  The compressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If compression fails\n");
+
+static PyObject* brotli_Compressor_flush(brotli_Compressor *self) {
+  PyObject *ret;
+
+  if (!self->enc) {
+    goto error;
+  }
+
+  ret = compress_stream(self->enc, BROTLI_OPERATION_FLUSH,
+                        NULL, 0);
+  if (ret != NULL) {
+    goto finally;
+  }
+
+error:
+  PyErr_SetString(BrotliError,
+                  "BrotliEncoderCompressStream failed while flushing the stream");
+  ret = NULL;
+finally:
+  return ret;
+}
+
+PyDoc_STRVAR(brotli_Compressor_finish_doc,
+"Process all pending input and complete all compression, returning a string\n"
+"containing the remaining compressed data. This data should be concatenated\n"
+"to the output produced by any preceding calls to the \"process()\" or\n"
+"\"flush()\" methods.\n"
+"After calling \"finish()\", the \"process()\" and \"flush()\" methods\n"
+"cannot be called again, and a new \"Compressor\" object should be created.\n"
+"\n"
+"Signature:\n"
+"  finish(string)\n"
+"\n"
+"Returns:\n"
+"  The compressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If compression fails\n");
+
+static PyObject* brotli_Compressor_finish(brotli_Compressor *self) {
+  PyObject *ret;
+
+  if (!self->enc) {
+    goto error;
+  }
+
+  ret = compress_stream(self->enc, BROTLI_OPERATION_FINISH,
+                        NULL, 0);
+
+  if (ret == NULL || !BrotliEncoderIsFinished(self->enc)) {
+    goto error;
+  }
+  goto finally;
+
+error:
+  PyErr_SetString(BrotliError,
+                  "BrotliEncoderCompressStream failed while finishing the stream");
+  ret = NULL;
+finally:
+  return ret;
+}
+
+static PyMemberDef brotli_Compressor_members[] = {
+  {NULL}  /* Sentinel */
+};
+
+static PyMethodDef brotli_Compressor_methods[] = {
+  {"process", (PyCFunction)brotli_Compressor_process, METH_VARARGS, brotli_Compressor_process_doc},
+  {"flush", (PyCFunction)brotli_Compressor_flush, METH_NOARGS, brotli_Compressor_flush_doc},
+  {"finish", (PyCFunction)brotli_Compressor_finish, METH_NOARGS, brotli_Compressor_finish_doc},
+  {NULL}  /* Sentinel */
+};
+
+static PyTypeObject brotli_CompressorType = {
+  #if PY_MAJOR_VERSION >= 3
+  PyVarObject_HEAD_INIT(NULL, 0)
+  #else
+  PyObject_HEAD_INIT(NULL)
+  0,                                     /* ob_size*/
+  #endif
+  "brotli.Compressor",                   /* tp_name */
+  sizeof(brotli_Compressor),             /* tp_basicsize */
+  0,                                     /* tp_itemsize */
+  (destructor)brotli_Compressor_dealloc, /* tp_dealloc */
+  0,                                     /* tp_print */
+  0,                                     /* tp_getattr */
+  0,                                     /* tp_setattr */
+  0,                                     /* tp_compare */
+  0,                                     /* tp_repr */
+  0,                                     /* tp_as_number */
+  0,                                     /* tp_as_sequence */
+  0,                                     /* tp_as_mapping */
+  0,                                     /* tp_hash  */
+  0,                                     /* tp_call */
+  0,                                     /* tp_str */
+  0,                                     /* tp_getattro */
+  0,                                     /* tp_setattro */
+  0,                                     /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                    /* tp_flags */
+  brotli_Compressor_doc,                 /* tp_doc */
+  0,                                     /* tp_traverse */
+  0,                                     /* tp_clear */
+  0,                                     /* tp_richcompare */
+  0,                                     /* tp_weaklistoffset */
+  0,                                     /* tp_iter */
+  0,                                     /* tp_iternext */
+  brotli_Compressor_methods,             /* tp_methods */
+  brotli_Compressor_members,             /* tp_members */
+  0,                                     /* tp_getset */
+  0,                                     /* tp_base */
+  0,                                     /* tp_dict */
+  0,                                     /* tp_descr_get */
+  0,                                     /* tp_descr_set */
+  0,                                     /* tp_dictoffset */
+  (initproc)brotli_Compressor_init,      /* tp_init */
+  0,                                     /* tp_alloc */
+  brotli_Compressor_new,                 /* tp_new */
+};
+
+static PyObject* decompress_stream(BrotliDecoderState* dec,
+                                   uint8_t* input, size_t input_length) {
+  BrotliDecoderResult result;
+
+  size_t available_in = input_length;
+  const uint8_t* next_in = input;
+
+  size_t available_out;
+  uint8_t* next_out;
+  BlocksOutputBuffer buffer = {.list=NULL};
+  PyObject *ret;
+
+  if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
+    goto error;
+  }
+
+  while (1) {
+    Py_BEGIN_ALLOW_THREADS
+    result = BrotliDecoderDecompressStream(dec,
+                                           &available_in, &next_in,
+                                           &available_out, &next_out, NULL);
+    Py_END_ALLOW_THREADS
+
+    if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      if (available_out == 0) {
+        if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
+          goto error;
+        }
+      }
+      continue;
+    }
+
+    break;
+  }
+
+  if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) {
+    goto error;
+  }
+
+  ret = BlocksOutputBuffer_Finish(&buffer, available_out);
+  if (ret != NULL) {
+    goto finally;
+  }
+
+error:
+  BlocksOutputBuffer_OnError(&buffer);
+  ret = NULL;
+finally:
+  return ret;
+}
+
+PyDoc_STRVAR(brotli_Decompressor_doc,
+"An object to decompress a byte string.\n"
+"\n"
+"Signature:\n"
+"  Decompressor()\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If arguments are invalid.\n");
+
+typedef struct {
+  PyObject_HEAD
+  BrotliDecoderState* dec;
+} brotli_Decompressor;
+
+static void brotli_Decompressor_dealloc(brotli_Decompressor* self) {
+  BrotliDecoderDestroyInstance(self->dec);
+  #if PY_MAJOR_VERSION >= 3
+  Py_TYPE(self)->tp_free((PyObject*)self);
+  #else
+  self->ob_type->tp_free((PyObject*)self);
+  #endif
+}
+
+static PyObject* brotli_Decompressor_new(PyTypeObject *type, PyObject *args, PyObject *keywds) {
+  brotli_Decompressor *self;
+  self = (brotli_Decompressor *)type->tp_alloc(type, 0);
+
+  if (self != NULL) {
+    self->dec = BrotliDecoderCreateInstance(0, 0, 0);
+  }
+
+  return (PyObject *)self;
+}
+
+static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, PyObject *keywds) {
+  int ok;
+
+  static const char *kwlist[] = {NULL};
+
+  ok = PyArg_ParseTupleAndKeywords(args, keywds, "|:Decompressor",
+                                   (char **) kwlist);
+  if (!ok)
+    return -1;
+  if (!self->dec)
+    return -1;
+
+  return 0;
+}
+
+PyDoc_STRVAR(brotli_Decompressor_process_doc,
+"Process \"string\" for decompression, returning a string that contains \n"
+"decompressed output data.  This data should be concatenated to the output \n"
+"produced by any preceding calls to the \"process()\" method. \n"
+"Some or all of the input may be kept in internal buffers for later \n"
+"processing, and the decompressed output data may be empty until enough input \n"
+"has been accumulated.\n"
+"\n"
+"Signature:\n"
+"  decompress(string)\n"
+"\n"
+"Args:\n"
+"  string (bytes): The input data\n"
+"\n"
+"Returns:\n"
+"  The decompressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If decompression fails\n");
+
+static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) {
+  PyObject* ret;
+  Py_buffer input;
+  int ok;
+
+#if PY_MAJOR_VERSION >= 3
+  ok = PyArg_ParseTuple(args, "y*:process", &input);
+#else
+  ok = PyArg_ParseTuple(args, "s*:process", &input);
+#endif
+
+  if (!ok) {
+    return NULL;
+  }
+
+  if (!self->dec) {
+    goto error;
+  }
+
+  ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len);
+  if (ret != NULL) {
+    goto finally;
+  }
+
+error:
+  PyErr_SetString(BrotliError,
+                  "BrotliDecoderDecompressStream failed while processing the stream");
+  ret = NULL;
+
+finally:
+  PyBuffer_Release(&input);
+  return ret;
+}
+
+PyDoc_STRVAR(brotli_Decompressor_is_finished_doc,
+"Checks if decoder instance reached the final state.\n"
+"\n"
+"Signature:\n"
+"  is_finished()\n"
+"\n"
+"Returns:\n"
+"  True  if the decoder is in a state where it reached the end of the input\n"
+"        and produced all of the output\n"
+"  False otherwise\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If decompression fails\n");
+
+static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) {
+  if (!self->dec) {
+    PyErr_SetString(BrotliError, "BrotliDecoderState is NULL while checking is_finished");
+    return NULL;
+  }
+
+  if (BrotliDecoderIsFinished(self->dec)) {
+    Py_RETURN_TRUE;
+  } else {
+    Py_RETURN_FALSE;
+  }
+}
+
+static PyMemberDef brotli_Decompressor_members[] = {
+  {NULL}  /* Sentinel */
+};
+
+static PyMethodDef brotli_Decompressor_methods[] = {
+  {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc},
+  {"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc},
+  {NULL}  /* Sentinel */
+};
+
+static PyTypeObject brotli_DecompressorType = {
+  #if PY_MAJOR_VERSION >= 3
+  PyVarObject_HEAD_INIT(NULL, 0)
+  #else
+  PyObject_HEAD_INIT(NULL)
+  0,                                     /* ob_size*/
+  #endif
+  "brotli.Decompressor",                   /* tp_name */
+  sizeof(brotli_Decompressor),             /* tp_basicsize */
+  0,                                       /* tp_itemsize */
+  (destructor)brotli_Decompressor_dealloc, /* tp_dealloc */
+  0,                                       /* tp_print */
+  0,                                       /* tp_getattr */
+  0,                                       /* tp_setattr */
+  0,                                       /* tp_compare */
+  0,                                       /* tp_repr */
+  0,                                       /* tp_as_number */
+  0,                                       /* tp_as_sequence */
+  0,                                       /* tp_as_mapping */
+  0,                                       /* tp_hash  */
+  0,                                       /* tp_call */
+  0,                                       /* tp_str */
+  0,                                       /* tp_getattro */
+  0,                                       /* tp_setattro */
+  0,                                       /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                      /* tp_flags */
+  brotli_Decompressor_doc,                 /* tp_doc */
+  0,                                       /* tp_traverse */
+  0,                                       /* tp_clear */
+  0,                                       /* tp_richcompare */
+  0,                                       /* tp_weaklistoffset */
+  0,                                       /* tp_iter */
+  0,                                       /* tp_iternext */
+  brotli_Decompressor_methods,             /* tp_methods */
+  brotli_Decompressor_members,             /* tp_members */
+  0,                                       /* tp_getset */
+  0,                                       /* tp_base */
+  0,                                       /* tp_dict */
+  0,                                       /* tp_descr_get */
+  0,                                       /* tp_descr_set */
+  0,                                       /* tp_dictoffset */
+  (initproc)brotli_Decompressor_init,      /* tp_init */
+  0,                                       /* tp_alloc */
+  brotli_Decompressor_new,                 /* tp_new */
+};
+
+PyDoc_STRVAR(brotli_decompress__doc__,
+"Decompress a compressed byte string.\n"
+"\n"
+"Signature:\n"
+"  decompress(string)\n"
+"\n"
+"Args:\n"
+"  string (bytes): The compressed input data.\n"
+"\n"
+"Returns:\n"
+"  The decompressed byte string.\n"
+"\n"
+"Raises:\n"
+"  brotli.error: If decompressor fails.\n");
+
+static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
+  BrotliDecoderState* state;
+  BrotliDecoderResult result;
+
+  const uint8_t* next_in;
+  size_t available_in;
+
+  uint8_t* next_out;
+  size_t available_out;
+  BlocksOutputBuffer buffer = {.list=NULL};
+  PyObject *ret;
+
+  static const char *kwlist[] = {"string", NULL};
+  Py_buffer input;
+  int ok;
+
+#if PY_MAJOR_VERSION >= 3
+  ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|:decompress",
+                                   (char**) kwlist, &input);
+#else
+  ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|:decompress",
+                                   (char**) kwlist, &input);
+#endif
+
+  if (!ok) {
+    return NULL;
+  }
+
+  state = BrotliDecoderCreateInstance(0, 0, 0);
+
+  next_in = (uint8_t*) input.buf;
+  available_in = input.len;
+
+  if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
+    goto error;
+  }
+
+  while (1) {
+    Py_BEGIN_ALLOW_THREADS
+    result = BrotliDecoderDecompressStream(state, &available_in, &next_in,
+                                           &available_out, &next_out, 0);
+    Py_END_ALLOW_THREADS
+
+    if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      if (available_out == 0) {
+        if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
+          goto error;
+        }
+      }
+      continue;
+    }
+
+    break;
+  }
+
+  if (result != BROTLI_DECODER_RESULT_SUCCESS || available_in != 0) {
+    goto error;
+  }
+
+  ret = BlocksOutputBuffer_Finish(&buffer, available_out);
+  if (ret != NULL) {
+    goto finally;
+  }
+
+error:
+  BlocksOutputBuffer_OnError(&buffer);
+  PyErr_SetString(BrotliError, "BrotliDecompress failed");
+  ret = NULL;
+
+finally:
+  BrotliDecoderDestroyInstance(state);
+  PyBuffer_Release(&input);
+  return ret;
+}
+
+static PyMethodDef brotli_methods[] = {
+  {"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, brotli_decompress__doc__},
+  {NULL, NULL, 0, NULL}
+};
+
+PyDoc_STRVAR(brotli_doc, "Implementation module for the Brotli library.");
+
+#if PY_MAJOR_VERSION >= 3
+#define INIT_BROTLI   PyInit__brotli
+#define CREATE_BROTLI PyModule_Create(&brotli_module)
+#define RETURN_BROTLI return m
+#define RETURN_NULL return NULL
+
+static struct PyModuleDef brotli_module = {
+  PyModuleDef_HEAD_INIT,
+  "_brotli",      /* m_name */
+  brotli_doc,     /* m_doc */
+  0,              /* m_size */
+  brotli_methods, /* m_methods */
+  NULL,           /* m_reload */
+  NULL,           /* m_traverse */
+  NULL,           /* m_clear */
+  NULL            /* m_free */
+};
+#else
+#define INIT_BROTLI   init_brotli
+#define CREATE_BROTLI Py_InitModule3("_brotli", brotli_methods, brotli_doc)
+#define RETURN_BROTLI return
+#define RETURN_NULL return
+#endif
+
+PyMODINIT_FUNC INIT_BROTLI(void) {
+  PyObject *m = CREATE_BROTLI;
+
+  BrotliError = PyErr_NewException((char*) "brotli.error", NULL, NULL);
+  if (BrotliError != NULL) {
+    Py_INCREF(BrotliError);
+    PyModule_AddObject(m, "error", BrotliError);
+  }
+
+  if (PyType_Ready(&brotli_CompressorType) < 0) {
+    RETURN_NULL;
+  }
+  Py_INCREF(&brotli_CompressorType);
+  PyModule_AddObject(m, "Compressor", (PyObject *)&brotli_CompressorType);
+
+  if (PyType_Ready(&brotli_DecompressorType) < 0) {
+    RETURN_NULL;
+  }
+  Py_INCREF(&brotli_DecompressorType);
+  PyModule_AddObject(m, "Decompressor", (PyObject *)&brotli_DecompressorType);
+
+  PyModule_AddIntConstant(m, "MODE_GENERIC", (int) BROTLI_MODE_GENERIC);
+  PyModule_AddIntConstant(m, "MODE_TEXT", (int) BROTLI_MODE_TEXT);
+  PyModule_AddIntConstant(m, "MODE_FONT", (int) BROTLI_MODE_FONT);
+
+  char version[16];
+  uint32_t decoderVersion = BrotliDecoderVersion();
+  snprintf(version, sizeof(version), "%d.%d.%d",
+      decoderVersion >> 24, (decoderVersion >> 12) & 0xFFF, decoderVersion & 0xFFF);
+  PyModule_AddStringConstant(m, "__version__", version);
+
+  RETURN_BROTLI;
+}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/bro.py b/third-party/libjxl/libjxl/third_party/brotli/python/bro.py
new file mode 100755
index 0000000000..6d715498a1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/bro.py
@@ -0,0 +1,160 @@
+#! /usr/bin/env python
+"""Compression/decompression utility using the Brotli algorithm."""
+
+from __future__ import print_function
+import argparse
+import sys
+import os
+import platform
+
+import brotli
+
+# default values of encoder parameters
+DEFAULT_PARAMS = {
+    'mode': brotli.MODE_GENERIC,
+    'quality': 11,
+    'lgwin': 22,
+    'lgblock': 0,
+}
+
+
+def get_binary_stdio(stream):
+    """ Return the specified standard input, output or errors stream as a
+    'raw' buffer object suitable for reading/writing binary data from/to it.
+    """
+    assert stream in ['stdin', 'stdout', 'stderr'], 'invalid stream name'
+    stdio = getattr(sys, stream)
+    if sys.version_info[0] < 3:
+        if sys.platform == 'win32':
+            # set I/O stream binary flag on python2.x (Windows)
+            runtime = platform.python_implementation()
+            if runtime == 'PyPy':
+                # the msvcrt trick doesn't work in pypy, so I use fdopen
+                mode = 'rb' if stream == 'stdin' else 'wb'
+                stdio = os.fdopen(stdio.fileno(), mode, 0)
+            else:
+                # this works with CPython -- untested on other implementations
+                import msvcrt
+                msvcrt.setmode(stdio.fileno(), os.O_BINARY)
+        return stdio
+    else:
+        # get 'buffer' attribute to read/write binary data on python3.x
+        if hasattr(stdio, 'buffer'):
+            return stdio.buffer
+        else:
+            orig_stdio = getattr(sys, '__%s__' % stream)
+            return orig_stdio.buffer
+
+
+def main(args=None):
+
+    parser = argparse.ArgumentParser(
+        prog=os.path.basename(__file__), description=__doc__)
+    parser.add_argument(
+        '--version', action='version', version=brotli.version)
+    parser.add_argument(
+        '-i',
+        '--input',
+        metavar='FILE',
+        type=str,
+        dest='infile',
+        help='Input file',
+        default=None)
+    parser.add_argument(
+        '-o',
+        '--output',
+        metavar='FILE',
+        type=str,
+        dest='outfile',
+        help='Output file',
+        default=None)
+    parser.add_argument(
+        '-f',
+        '--force',
+        action='store_true',
+        help='Overwrite existing output file',
+        default=False)
+    parser.add_argument(
+        '-d',
+        '--decompress',
+        action='store_true',
+        help='Decompress input file',
+        default=False)
+    params = parser.add_argument_group('optional encoder parameters')
+    params.add_argument(
+        '-m',
+        '--mode',
+        metavar='MODE',
+        type=int,
+        choices=[0, 1, 2],
+        help='The compression mode can be 0 for generic input, '
+        '1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data. '
+        'Defaults to 0.')
+    params.add_argument(
+        '-q',
+        '--quality',
+        metavar='QUALITY',
+        type=int,
+        choices=list(range(0, 12)),
+        help='Controls the compression-speed vs compression-density '
+        'tradeoff. The higher the quality, the slower the '
+        'compression. Range is 0 to 11. Defaults to 11.')
+    params.add_argument(
+        '--lgwin',
+        metavar='LGWIN',
+        type=int,
+        choices=list(range(10, 25)),
+        help='Base 2 logarithm of the sliding window size. Range is '
+        '10 to 24. Defaults to 22.')
+    params.add_argument(
+        '--lgblock',
+        metavar='LGBLOCK',
+        type=int,
+        choices=[0] + list(range(16, 25)),
+        help='Base 2 logarithm of the maximum input block size. '
+        'Range is 16 to 24. If set to 0, the value will be set based '
+        'on the quality. Defaults to 0.')
+    # set default values using global DEFAULT_PARAMS dictionary
+    parser.set_defaults(**DEFAULT_PARAMS)
+
+    options = parser.parse_args(args=args)
+
+    if options.infile:
+        if not os.path.isfile(options.infile):
+            parser.error('file "%s" not found' % options.infile)
+        with open(options.infile, 'rb') as infile:
+            data = infile.read()
+    else:
+        if sys.stdin.isatty():
+            # interactive console, just quit
+            parser.error('no input')
+        infile = get_binary_stdio('stdin')
+        data = infile.read()
+
+    if options.outfile:
+        if os.path.isfile(options.outfile) and not options.force:
+            parser.error('output file exists')
+        outfile = open(options.outfile, 'wb')
+    else:
+        outfile = get_binary_stdio('stdout')
+
+    try:
+        if options.decompress:
+            data = brotli.decompress(data)
+        else:
+            data = brotli.compress(
+                data,
+                mode=options.mode,
+                quality=options.quality,
+                lgwin=options.lgwin,
+                lgblock=options.lgblock)
+    except brotli.error as e:
+        parser.exit(1,
+                    'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
+
+    outfile.write(data)
+    outfile.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/brotli.py b/third-party/libjxl/libjxl/third_party/brotli/python/brotli.py
new file mode 100644
index 0000000000..9be4ed4bef
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/brotli.py
@@ -0,0 +1,55 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+"""Functions to compress and decompress data using the Brotli library."""
+
+import _brotli
+
+# The library version.
+version = __version__ = _brotli.__version__
+
+# The compression mode.
+MODE_GENERIC = _brotli.MODE_GENERIC
+MODE_TEXT = _brotli.MODE_TEXT
+MODE_FONT = _brotli.MODE_FONT
+
+# The Compressor object.
+Compressor = _brotli.Compressor
+
+# The Decompressor object.
+Decompressor = _brotli.Decompressor
+
+# Compress a byte string.
+def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0):
+    """Compress a byte string.
+
+    Args:
+      string (bytes): The input data.
+      mode (int, optional): The compression mode can be MODE_GENERIC (default),
+        MODE_TEXT (for UTF-8 format text input) or MODE_FONT (for WOFF 2.0).
+      quality (int, optional): Controls the compression-speed vs compression-
+        density tradeoff. The higher the quality, the slower the compression.
+        Range is 0 to 11. Defaults to 11.
+      lgwin (int, optional): Base 2 logarithm of the sliding window size. Range
+        is 10 to 24. Defaults to 22.
+      lgblock (int, optional): Base 2 logarithm of the maximum input block size.
+        Range is 16 to 24. If set to 0, the value will be set based on the
+        quality. Defaults to 0.
+
+    Returns:
+      The compressed byte string.
+
+    Raises:
+      brotli.error: If arguments are invalid, or compressor fails.
+    """
+    compressor = Compressor(mode=mode, quality=quality, lgwin=lgwin,
+                            lgblock=lgblock)
+    return compressor.process(string) + compressor.finish()
+
+# Decompress a compressed byte string.
+decompress = _brotli.decompress
+
+# Raised if compression or decompression fails.
+error = _brotli.error
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/__init__.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/_test_utils.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/_test_utils.py
new file mode 100644
index 0000000000..059cb43f87
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/_test_utils.py
@@ -0,0 +1,124 @@
+from __future__ import print_function
+import filecmp
+import glob
+import itertools
+import os
+import sys
+import sysconfig
+import tempfile
+import unittest
+
+
+project_dir = os.path.abspath(os.path.join(__file__, '..', '..', '..'))
+test_dir = os.getenv("BROTLI_TESTS_PATH")
+BRO_ARGS = [os.getenv("BROTLI_WRAPPER")]
+
+# Fallbacks
+if test_dir is None:
+  test_dir = os.path.join(project_dir, 'tests')
+if BRO_ARGS[0] is None:
+  python_exe = sys.executable or 'python'
+  bro_path = os.path.join(project_dir, 'python', 'bro.py')
+  BRO_ARGS = [python_exe, bro_path]
+
+# Get the platform/version-specific build folder.
+# By default, the distutils build base is in the same location as setup.py.
+platform_lib_name = 'lib.{platform}-{version[0]}.{version[1]}'.format(
+    platform=sysconfig.get_platform(), version=sys.version_info)
+build_dir = os.path.join(project_dir, 'bin', platform_lib_name)
+
+# Prepend the build folder to sys.path and the PYTHONPATH environment variable.
+if build_dir not in sys.path:
+    sys.path.insert(0, build_dir)
+TEST_ENV = os.environ.copy()
+if 'PYTHONPATH' not in TEST_ENV:
+    TEST_ENV['PYTHONPATH'] = build_dir
+else:
+    TEST_ENV['PYTHONPATH'] = build_dir + os.pathsep + TEST_ENV['PYTHONPATH']
+
+TESTDATA_DIR = os.path.join(test_dir, 'testdata')
+
+TESTDATA_FILES = [
+    'empty',  # Empty file
+    '10x10y',  # Small text
+    'alice29.txt',  # Large text
+    'random_org_10k.bin',  # Small data
+    'mapsdatazrh',  # Large data
+    'ukkonooa',  # Poem
+]
+
+# Some files might be missing in a lightweight sources pack.
+TESTDATA_PATH_CANDIDATES = [
+    os.path.join(TESTDATA_DIR, f) for f in TESTDATA_FILES
+]
+
+TESTDATA_PATHS = [
+    path for path in TESTDATA_PATH_CANDIDATES if os.path.isfile(path)
+]
+
+TESTDATA_PATHS_FOR_DECOMPRESSION = glob.glob(
+    os.path.join(TESTDATA_DIR, '*.compressed'))
+
+TEMP_DIR = tempfile.mkdtemp()
+
+
+def get_temp_compressed_name(filename):
+    return os.path.join(TEMP_DIR, os.path.basename(filename + '.bro'))
+
+
+def get_temp_uncompressed_name(filename):
+    return os.path.join(TEMP_DIR, os.path.basename(filename + '.unbro'))
+
+
+def bind_method_args(method, *args, **kwargs):
+    return lambda self: method(self, *args, **kwargs)
+
+
+def generate_test_methods(test_case_class,
+                          for_decompression=False,
+                          variants=None):
+    # Add test methods for each test data file.  This makes identifying problems
+    # with specific compression scenarios easier.
+    if for_decompression:
+        paths = TESTDATA_PATHS_FOR_DECOMPRESSION
+    else:
+        paths = TESTDATA_PATHS
+    opts = []
+    if variants:
+        opts_list = []
+        for k, v in variants.items():
+            opts_list.append([r for r in itertools.product([k], v)])
+        for o in itertools.product(*opts_list):
+            opts_name = '_'.join([str(i) for i in itertools.chain(*o)])
+            opts_dict = dict(o)
+            opts.append([opts_name, opts_dict])
+    else:
+        opts.append(['', {}])
+    for method in [m for m in dir(test_case_class) if m.startswith('_test')]:
+        for testdata in paths:
+            for (opts_name, opts_dict) in opts:
+                f = os.path.splitext(os.path.basename(testdata))[0]
+                name = 'test_{method}_{options}_{file}'.format(
+                    method=method, options=opts_name, file=f)
+                func = bind_method_args(
+                    getattr(test_case_class, method), testdata, **opts_dict)
+                setattr(test_case_class, name, func)
+
+
+class TestCase(unittest.TestCase):
+
+    def tearDown(self):
+        for f in TESTDATA_PATHS:
+            try:
+                os.unlink(get_temp_compressed_name(f))
+            except OSError:
+                pass
+            try:
+                os.unlink(get_temp_uncompressed_name(f))
+            except OSError:
+                pass
+
+    def assertFilesMatch(self, first, second):
+        self.assertTrue(
+            filecmp.cmp(first, second, shallow=False),
+            'File {} differs from {}'.format(first, second))
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/bro_test.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/bro_test.py
new file mode 100644
index 0000000000..454bd62271
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/bro_test.py
@@ -0,0 +1,101 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import subprocess
+import unittest
+
+from . import _test_utils
+
+BRO_ARGS = _test_utils.BRO_ARGS
+TEST_ENV = _test_utils.TEST_ENV
+
+
+def _get_original_name(test_data):
+    return test_data.split('.compressed')[0]
+
+
+class TestBroDecompress(_test_utils.TestCase):
+
+    def _check_decompression(self, test_data):
+        # Verify decompression matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        original = _get_original_name(test_data)
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _decompress_file(self, test_data):
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        args = BRO_ARGS + ['-f', '-d', '-i', test_data, '-o', temp_uncompressed]
+        subprocess.check_call(args, env=TEST_ENV)
+
+    def _decompress_pipe(self, test_data):
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        args = BRO_ARGS + ['-d']
+        with open(temp_uncompressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                subprocess.check_call(
+                    args, stdin=in_file, stdout=out_file, env=TEST_ENV)
+
+    def _test_decompress_file(self, test_data):
+        self._decompress_file(test_data)
+        self._check_decompression(test_data)
+
+    def _test_decompress_pipe(self, test_data):
+        self._decompress_pipe(test_data)
+        self._check_decompression(test_data)
+
+
+_test_utils.generate_test_methods(TestBroDecompress, for_decompression=True)
+
+
+class TestBroCompress(_test_utils.TestCase):
+
+    VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
+
+    def _check_decompression(self, test_data, **kwargs):
+        # Write decompression to temp file and verify it matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        original = test_data
+        args = BRO_ARGS + ['-f', '-d']
+        args.extend(['-i', temp_compressed, '-o', temp_uncompressed])
+        subprocess.check_call(args, env=TEST_ENV)
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _compress_file(self, test_data, **kwargs):
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        args = BRO_ARGS + ['-f']
+        if 'quality' in kwargs:
+            args.extend(['-q', str(kwargs['quality'])])
+        if 'lgwin' in kwargs:
+            args.extend(['--lgwin', str(kwargs['lgwin'])])
+        args.extend(['-i', test_data, '-o', temp_compressed])
+        subprocess.check_call(args, env=TEST_ENV)
+
+    def _compress_pipe(self, test_data, **kwargs):
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        args = BRO_ARGS
+        if 'quality' in kwargs:
+            args.extend(['-q', str(kwargs['quality'])])
+        if 'lgwin' in kwargs:
+            args.extend(['--lgwin', str(kwargs['lgwin'])])
+        with open(temp_compressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                subprocess.check_call(
+                    args, stdin=in_file, stdout=out_file, env=TEST_ENV)
+
+    def _test_compress_file(self, test_data, **kwargs):
+        self._compress_file(test_data, **kwargs)
+        self._check_decompression(test_data)
+
+    def _test_compress_pipe(self, test_data, **kwargs):
+        self._compress_pipe(test_data, **kwargs)
+        self._check_decompression(test_data)
+
+
+_test_utils.generate_test_methods(
+    TestBroCompress, variants=TestBroCompress.VARIANTS)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/compress_test.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/compress_test.py
new file mode 100644
index 0000000000..46ff68f50b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/compress_test.py
@@ -0,0 +1,41 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import unittest
+
+from . import _test_utils
+import brotli
+
+
+class TestCompress(_test_utils.TestCase):
+
+    VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
+
+    def _check_decompression(self, test_data, **kwargs):
+        kwargs = {}
+        # Write decompression to temp file and verify it matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        original = test_data
+        with open(temp_uncompressed, 'wb') as out_file:
+            with open(temp_compressed, 'rb') as in_file:
+                out_file.write(brotli.decompress(in_file.read(), **kwargs))
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _compress(self, test_data, **kwargs):
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        with open(temp_compressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                out_file.write(brotli.compress(in_file.read(), **kwargs))
+
+    def _test_compress(self, test_data, **kwargs):
+        self._compress(test_data, **kwargs)
+        self._check_decompression(test_data, **kwargs)
+
+
+_test_utils.generate_test_methods(TestCompress, variants=TestCompress.VARIANTS)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/compressor_test.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/compressor_test.py
new file mode 100644
index 0000000000..2d47919968
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/compressor_test.py
@@ -0,0 +1,94 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import functools
+import unittest
+
+from . import _test_utils
+import brotli
+
+
+# Do not inherit from TestCase here to ensure that test methods
+# are not run automatically and instead are run as part of a specific
+# configuration below.
+class _TestCompressor(object):
+
+    CHUNK_SIZE = 2048
+
+    def tearDown(self):
+        self.compressor = None
+
+    def _check_decompression(self, test_data):
+        # Write decompression to temp file and verify it matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        original = test_data
+        with open(temp_uncompressed, 'wb') as out_file:
+            with open(temp_compressed, 'rb') as in_file:
+                out_file.write(brotli.decompress(in_file.read()))
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _test_single_process(self, test_data):
+        # Write single-shot compression to temp file.
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        with open(temp_compressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                out_file.write(self.compressor.process(in_file.read()))
+            out_file.write(self.compressor.finish())
+        self._check_decompression(test_data)
+
+    def _test_multiple_process(self, test_data):
+        # Write chunked compression to temp file.
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        with open(temp_compressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
+                for data in iter(read_chunk, b''):
+                    out_file.write(self.compressor.process(data))
+            out_file.write(self.compressor.finish())
+        self._check_decompression(test_data)
+
+    def _test_multiple_process_and_flush(self, test_data):
+        # Write chunked and flushed compression to temp file.
+        temp_compressed = _test_utils.get_temp_compressed_name(test_data)
+        with open(temp_compressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
+                for data in iter(read_chunk, b''):
+                    out_file.write(self.compressor.process(data))
+                    out_file.write(self.compressor.flush())
+            out_file.write(self.compressor.finish())
+        self._check_decompression(test_data)
+
+
+_test_utils.generate_test_methods(_TestCompressor)
+
+
+class TestCompressorQuality1(_TestCompressor, _test_utils.TestCase):
+
+    def setUp(self):
+        self.compressor = brotli.Compressor(quality=1)
+
+
+class TestCompressorQuality6(_TestCompressor, _test_utils.TestCase):
+
+    def setUp(self):
+        self.compressor = brotli.Compressor(quality=6)
+
+
+class TestCompressorQuality9(_TestCompressor, _test_utils.TestCase):
+
+    def setUp(self):
+        self.compressor = brotli.Compressor(quality=9)
+
+
+class TestCompressorQuality11(_TestCompressor, _test_utils.TestCase):
+
+    def setUp(self):
+        self.compressor = brotli.Compressor(quality=11)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompress_test.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompress_test.py
new file mode 100644
index 0000000000..814e56332c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompress_test.py
@@ -0,0 +1,42 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import unittest
+
+from . import _test_utils
+import brotli
+
+
+def _get_original_name(test_data):
+    return test_data.split('.compressed')[0]
+
+
+class TestDecompress(_test_utils.TestCase):
+
+    def _check_decompression(self, test_data):
+        # Verify decompression matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        original = _get_original_name(test_data)
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _decompress(self, test_data):
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        with open(temp_uncompressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                out_file.write(brotli.decompress(in_file.read()))
+
+    def _test_decompress(self, test_data):
+        self._decompress(test_data)
+        self._check_decompression(test_data)
+
+    def test_garbage_appended(self):
+        with self.assertRaises(brotli.error):
+            brotli.decompress(brotli.compress(b'a') + b'a')
+
+
+_test_utils.generate_test_methods(TestDecompress, for_decompression=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompressor_test.py b/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompressor_test.py
new file mode 100644
index 0000000000..05918ada84
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/python/tests/decompressor_test.py
@@ -0,0 +1,59 @@
+# Copyright 2016 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import functools
+import unittest
+
+from . import _test_utils
+import brotli
+
+
+def _get_original_name(test_data):
+    return test_data.split('.compressed')[0]
+
+
+class TestDecompressor(_test_utils.TestCase):
+
+    CHUNK_SIZE = 1
+
+    def setUp(self):
+        self.decompressor = brotli.Decompressor()
+
+    def tearDown(self):
+        self.decompressor = None
+
+    def _check_decompression(self, test_data):
+        # Verify decompression matches the original.
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        original = _get_original_name(test_data)
+        self.assertFilesMatch(temp_uncompressed, original)
+
+    def _decompress(self, test_data):
+        temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
+        with open(temp_uncompressed, 'wb') as out_file:
+            with open(test_data, 'rb') as in_file:
+                read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
+                for data in iter(read_chunk, b''):
+                    out_file.write(self.decompressor.process(data))
+        self.assertTrue(self.decompressor.is_finished())
+
+    def _test_decompress(self, test_data):
+        self._decompress(test_data)
+        self._check_decompression(test_data)
+
+    def test_garbage_appended(self):
+        with self.assertRaises(brotli.error):
+            self.decompressor.process(brotli.compress(b'a') + b'a')
+
+    def test_already_finished(self):
+        self.decompressor.process(brotli.compress(b'a'))
+        with self.assertRaises(brotli.error):
+            self.decompressor.process(b'a')
+
+
+_test_utils.generate_test_methods(TestDecompressor, for_decompression=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/README.md b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/README.md
new file mode 100644
index 0000000000..366a82c3f0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/README.md
@@ -0,0 +1,3 @@
+Set of tools that can be used to download brotli RFC, extract and validate
+binary dictionary, and generate dictionary derivatives
+(e.g. Java `DictionaryData` class constants).
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-01-download-rfc.py b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-01-download-rfc.py
new file mode 100644
index 0000000000..04f9cccc7d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-01-download-rfc.py
@@ -0,0 +1,16 @@
+# Step 01 - download RFC7932.
+#
+# RFC is the ultimate source for brotli format and constants, including
+# static dictionary.
+
+import urllib2
+
+response = urllib2.urlopen("https://tools.ietf.org/rfc/rfc7932.txt")
+
+text = response.read()
+path = "rfc7932.txt"
+
+with open(path, "w") as rfc:
+  rfc.write(text)
+
+print("Downloaded and saved " + str(len(text)) + " bytes to " + path)
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-02-rfc-to-bin.py b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-02-rfc-to-bin.py
new file mode 100644
index 0000000000..ddf255a328
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-02-rfc-to-bin.py
@@ -0,0 +1,34 @@
+# Step 02 - parse RFC.
+#
+# Static dictionary is described in "Appendix A" section in a hexadecimal form.
+# This tool locates dictionary data in RFC and converts it to raw binary format.
+
+import re
+
+rfc_path = "rfc7932.txt"
+
+with open(rfc_path, "r") as rfc:
+  lines = rfc.readlines()
+
+re_data_line = re.compile("^      [0-9a-f]{64}$")
+
+appendix_a_found = False
+dictionary = []
+for line in lines:
+  if appendix_a_found:
+    if re_data_line.match(line) is not None:
+      data = line.strip()
+      for i in range(32):
+        dictionary.append(int(data[2 * i:2 * i + 2], 16))
+      if len(dictionary) == 122784:
+        break
+  else:
+    if line.startswith("Appendix A."):
+      appendix_a_found = True
+
+bin_path = "dictionary.bin"
+
+with open(bin_path, "wb") as output:
+  output.write(bytearray(dictionary))
+
+print("Parsed and saved " + str(len(dictionary)) + " bytes to " + bin_path)
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-03-validate-bin.py b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-03-validate-bin.py
new file mode 100644
index 0000000000..b52e6141b8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-03-validate-bin.py
@@ -0,0 +1,35 @@
+# Step 03 - validate raw dictionary file.
+#
+# CRC32, MD5, SHA1 and SHA256 checksums for raw binary dictionary are checked.
+
+import hashlib
+import zlib
+
+bin_path = "dictionary.bin"
+
+with open(bin_path, "rb") as raw:
+  data = raw.read()
+
+
+def check_digest(name, expected, actual):
+  if expected == actual:
+    print("[OK] " + name)
+  else:
+    print("[ERROR] " + name + " | " + expected + " != " + actual)
+
+
+check_digest(
+    "CRC32",  # This is the only checksum provided in RFC.
+    "0x5136cb04",
+    hex(zlib.crc32(data)))
+
+check_digest("MD5", "96cecd2ee7a666d5aa3627d74735b32a",
+             hashlib.md5(data).hexdigest())
+
+check_digest("SHA1", "72b41051cb61a9281ba3c4414c289da50d9a7640",
+             hashlib.sha1(data).hexdigest())
+
+check_digest(
+    "SHA256",
+    "20e42eb1b511c21806d4d227d07e5dd06877d8ce7b3a817f378f313653f35c70",
+    hashlib.sha256(data).hexdigest())
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-04-generate-java-literals.py b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-04-generate-java-literals.py
new file mode 100644
index 0000000000..d8645427b0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/dictionary/step-04-generate-java-literals.py
@@ -0,0 +1,85 @@
+# Step 04 - generate Java literals.
+#
+# Java byte-code has severe restrictions. There is no such thing as
+# "array literal" - those are implemented as series of data[x] = y;
+# as a consequence N-byte array will use 7N bytes in class, plus N bytes
+# in instantiated variable. Also no literal could be longer than 64KiB.
+#
+# To keep dictionary data compact both in source code and in compiled format
+# we use the following tricks:
+#  * use String as a data container
+#  * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows
+#    efficient conversion to byte array; also ASCII characters use only 1 byte
+#.   of memory (UTF-8 encoding)
+#  * RLE-compress sequence of 8-th bits
+#
+# This script generates literals used in Java code.
+
+try:
+  unichr  # Python 2
+except NameError:
+  unichr = chr  # Python 3
+
+bin_path = "dictionary.bin"
+
+with open(bin_path, "rb") as raw:
+  data = raw.read()
+
+low = []
+hi = []
+is_skip = True
+skip_flip_offset = 36
+cntr = skip_flip_offset
+for b in data:
+  value = ord(b)
+  low.append(chr(value & 0x7F))
+  if is_skip:
+    if value < 0x80:
+      cntr += 1
+    else:
+      is_skip = False
+      hi.append(unichr(cntr))
+      cntr = skip_flip_offset + 1
+  else:
+    if value >= 0x80:
+      cntr += 1
+    else:
+      is_skip = True
+      hi.append(unichr(cntr))
+      cntr = skip_flip_offset + 1
+hi.append(unichr(cntr))
+
+low0 = low[0:len(low) // 2]
+low1 = low[len(low) // 2:len(low)]
+
+
+def escape(chars):
+  result = []
+  for c in chars:
+    if "\r" == c:
+      result.append("\\r")
+    elif "\n" == c:
+      result.append("\\n")
+    elif "\t" == c:
+      result.append("\\t")
+    elif "\"" == c:
+      result.append("\\\"")
+    elif "\\" == c:
+      result.append("\\\\")
+    elif ord(c) < 32 or ord(c) >= 127:
+      result.append("\\u%04X" % ord(c))
+    else:
+      result.append(c)
+  return result
+
+
+source_code = [
+    "  private static final String DATA0 = \"", "".join(escape(low0)), "\";\n",
+    "  private static final String DATA1 = \"", "".join(escape(low1)), "\";\n",
+    "  private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n"
+]
+
+src_path = "DictionaryData.inc.java"
+
+with open(src_path, "w") as source:
+  source.write("".join(source_code))
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlicommon.pc.in b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlicommon.pc.in
new file mode 100644
index 0000000000..2a8cf7a35e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlicommon.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libbrotlicommon
+URL: https://github.com/google/brotli
+Description: Brotli common dictionary library
+Version: @PACKAGE_VERSION@
+Libs: -L${libdir} -lbrotlicommon
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlidec.pc.in b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlidec.pc.in
new file mode 100644
index 0000000000..6f8ef2e41d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlidec.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libbrotlidec
+URL: https://github.com/google/brotli
+Description: Brotli decoder library
+Version: @PACKAGE_VERSION@
+Libs: -L${libdir} -lbrotlidec
+Requires.private: libbrotlicommon >= 1.0.2
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlienc.pc.in b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlienc.pc.in
new file mode 100644
index 0000000000..2098afe2c1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/libbrotlienc.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libbrotlienc
+URL: https://github.com/google/brotli
+Description: Brotli encoder library
+Version: @PACKAGE_VERSION@
+Libs: -L${libdir} -lbrotlienc
+Requires.private: libbrotlicommon >= 1.0.2
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/brotli/scripts/sources.lst b/third-party/libjxl/libjxl/third_party/brotli/scripts/sources.lst
new file mode 100644
index 0000000000..2848cc54df
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/scripts/sources.lst
@@ -0,0 +1,111 @@
+# IT WOULD BE FOOLISH TO USE COMPUTERS TO AUTOMATE REPETITIVE TASKS:
+# neither CMake nor Automake support "glob" expressions,
+# so every header and source file have to be listed manually.
+
+BROTLI_CLI_C = \
+  c/tools/brotli.c
+
+BROTLI_COMMON_C = \
+  c/common/constants.c \
+  c/common/context.c \
+  c/common/dictionary.c \
+  c/common/platform.c \
+  c/common/shared_dictionary.c \
+  c/common/transform.c
+
+BROTLI_COMMON_H = \
+  c/common/constants.h \
+  c/common/context.h \
+  c/common/dictionary.h \
+  c/common/platform.h \
+  c/common/shared_dictionary_internal.h \
+  c/common/transform.h \
+  c/common/version.h
+
+BROTLI_DEC_C = \
+  c/dec/bit_reader.c \
+  c/dec/decode.c \
+  c/dec/huffman.c \
+  c/dec/state.c
+
+BROTLI_DEC_H = \
+  c/dec/bit_reader.h \
+  c/dec/huffman.h \
+  c/dec/prefix.h \
+  c/dec/state.h
+
+BROTLI_ENC_C = \
+  c/enc/backward_references.c \
+  c/enc/backward_references_hq.c \
+  c/enc/bit_cost.c \
+  c/enc/block_splitter.c \
+  c/enc/brotli_bit_stream.c \
+  c/enc/cluster.c \
+  c/enc/command.c \
+  c/enc/compound_dictionary.c \
+  c/enc/compress_fragment.c \
+  c/enc/compress_fragment_two_pass.c \
+  c/enc/dictionary_hash.c \
+  c/enc/encode.c \
+  c/enc/encoder_dict.c \
+  c/enc/entropy_encode.c \
+  c/enc/fast_log.c \
+  c/enc/histogram.c \
+  c/enc/literal_cost.c \
+  c/enc/memory.c \
+  c/enc/metablock.c \
+  c/enc/static_dict.c \
+  c/enc/utf8_util.c
+
+BROTLI_ENC_H = \
+  c/enc/backward_references.h \
+  c/enc/backward_references_hq.h \
+  c/enc/backward_references_inc.h \
+  c/enc/bit_cost.h \
+  c/enc/bit_cost_inc.h \
+  c/enc/block_encoder_inc.h \
+  c/enc/block_splitter.h \
+  c/enc/block_splitter_inc.h \
+  c/enc/brotli_bit_stream.h \
+  c/enc/cluster.h \
+  c/enc/cluster_inc.h \
+  c/enc/command.h \
+  c/enc/compound_dictionary.h \
+  c/enc/compress_fragment.h \
+  c/enc/compress_fragment_two_pass.h \
+  c/enc/dictionary_hash.h \
+  c/enc/encoder_dict.h \
+  c/enc/entropy_encode.h \
+  c/enc/entropy_encode_static.h \
+  c/enc/fast_log.h \
+  c/enc/find_match_length.h \
+  c/enc/hash.h \
+  c/enc/hash_composite_inc.h \
+  c/enc/hash_forgetful_chain_inc.h \
+  c/enc/hash_longest_match64_inc.h \
+  c/enc/hash_longest_match_inc.h \
+  c/enc/hash_longest_match_quickly_inc.h \
+  c/enc/hash_rolling_inc.h \
+  c/enc/hash_to_binary_tree_inc.h \
+  c/enc/histogram.h \
+  c/enc/histogram_inc.h \
+  c/enc/literal_cost.h \
+  c/enc/memory.h \
+  c/enc/metablock.h \
+  c/enc/metablock_inc.h \
+  c/enc/params.h \
+  c/enc/prefix.h \
+  c/enc/quality.h \
+  c/enc/ringbuffer.h \
+  c/enc/state.h \
+  c/enc/static_dict.h \
+  c/enc/static_dict_lut.h \
+  c/enc/utf8_util.h \
+  c/enc/write_bits.h
+
+BROTLI_INCLUDE = \
+  c/include/brotli/decode.h \
+  c/include/brotli/encode.h \
+  c/include/brotli/port.h \
+  c/include/brotli/shared_dictionary.h \
+  c/include/brotli/types.h
diff --git a/third-party/libjxl/libjxl/third_party/brotli/setup.cfg b/third-party/libjxl/libjxl/third_party/brotli/setup.cfg
new file mode 100644
index 0000000000..16da438a72
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/setup.cfg
@@ -0,0 +1,5 @@
+[build]
+build_base=bin
+
+[yapf]
+based_on_style=google
diff --git a/third-party/libjxl/libjxl/third_party/brotli/setup.py b/third-party/libjxl/libjxl/third_party/brotli/setup.py
new file mode 100644
index 0000000000..5d934831c6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/brotli/setup.py
@@ -0,0 +1,289 @@
+# Copyright 2015 The Brotli Authors. All rights reserved.
+#
+# Distributed under MIT license.
+# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+
+import os
+import platform
+import re
+import unittest
+
+try:
+    from setuptools import Extension
+    from setuptools import setup
+except:
+    from distutils.core import Extension
+    from distutils.core import setup
+from distutils.command.build_ext import build_ext
+from distutils import errors
+from distutils import dep_util
+from distutils import log
+
+
+CURR_DIR = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+
+
+def get_version():
+    """ Return BROTLI_VERSION string as defined in 'common/version.h' file. """
+    version_file_path = os.path.join(CURR_DIR, 'c', 'common', 'version.h')
+    version = 0
+    with open(version_file_path, 'r') as f:
+        for line in f:
+            m = re.match(r'#define\sBROTLI_VERSION\s+0x([0-9a-fA-F]+)', line)
+            if m:
+                version = int(m.group(1), 16)
+    if version == 0:
+        return ''
+    # Semantic version is calculated as (MAJOR << 24) | (MINOR << 12) | PATCH.
+    major = version >> 24
+    minor = (version >> 12) & 0xFFF
+    patch = version & 0xFFF
+    return '{0}.{1}.{2}'.format(major, minor, patch)
+
+
+def get_test_suite():
+    test_loader = unittest.TestLoader()
+    test_suite = test_loader.discover('python', pattern='*_test.py')
+    return test_suite
+
+
+class BuildExt(build_ext):
+
+    def get_source_files(self):
+        filenames = build_ext.get_source_files(self)
+        for ext in self.extensions:
+            filenames.extend(ext.depends)
+        return filenames
+
+    def build_extension(self, ext):
+        if ext.sources is None or not isinstance(ext.sources, (list, tuple)):
+            raise errors.DistutilsSetupError(
+                "in 'ext_modules' option (extension '%s'), "
+                "'sources' must be present and must be "
+                "a list of source filenames" % ext.name)
+
+        ext_path = self.get_ext_fullpath(ext.name)
+        depends = ext.sources + ext.depends
+        if not (self.force or dep_util.newer_group(depends, ext_path, 'newer')):
+            log.debug("skipping '%s' extension (up-to-date)", ext.name)
+            return
+        else:
+            log.info("building '%s' extension", ext.name)
+
+        c_sources = []
+        for source in ext.sources:
+            if source.endswith('.c'):
+                c_sources.append(source)
+        extra_args = ext.extra_compile_args or []
+
+        objects = []
+
+        macros = ext.define_macros[:]
+        if platform.system() == 'Darwin':
+            macros.append(('OS_MACOSX', '1'))
+        elif self.compiler.compiler_type == 'mingw32':
+            # On Windows Python 2.7, pyconfig.h defines "hypot" as "_hypot",
+            # This clashes with GCC's cmath, and causes compilation errors when
+            # building under MinGW: http://bugs.python.org/issue11566
+            macros.append(('_hypot', 'hypot'))
+        for undef in ext.undef_macros:
+            macros.append((undef,))
+
+        objs = self.compiler.compile(
+            c_sources,
+            output_dir=self.build_temp,
+            macros=macros,
+            include_dirs=ext.include_dirs,
+            debug=self.debug,
+            extra_postargs=extra_args,
+            depends=ext.depends)
+        objects.extend(objs)
+
+        self._built_objects = objects[:]
+        if ext.extra_objects:
+            objects.extend(ext.extra_objects)
+        extra_args = ext.extra_link_args or []
+        # when using GCC on Windows, we statically link libgcc and libstdc++,
+        # so that we don't need to package extra DLLs
+        if self.compiler.compiler_type == 'mingw32':
+            extra_args.extend(['-static-libgcc', '-static-libstdc++'])
+
+        ext_path = self.get_ext_fullpath(ext.name)
+        # Detect target language, if not provided
+        language = ext.language or self.compiler.detect_language(c_sources)
+
+        self.compiler.link_shared_object(
+            objects,
+            ext_path,
+            libraries=self.get_libraries(ext),
+            library_dirs=ext.library_dirs,
+            runtime_library_dirs=ext.runtime_library_dirs,
+            extra_postargs=extra_args,
+            export_symbols=self.get_export_symbols(ext),
+            debug=self.debug,
+            build_temp=self.build_temp,
+            target_lang=language)
+
+
+NAME = 'Brotli'
+
+VERSION = get_version()
+
+URL = 'https://github.com/google/brotli'
+
+DESCRIPTION = 'Python bindings for the Brotli compression library'
+
+AUTHOR = 'The Brotli Authors'
+
+LICENSE = 'MIT'
+
+PLATFORMS = ['Posix', 'MacOS X', 'Windows']
+
+CLASSIFIERS = [
+    'Development Status :: 4 - Beta',
+    'Environment :: Console',
+    'Intended Audience :: Developers',
+    'License :: OSI Approved :: MIT License',
+    'Operating System :: MacOS :: MacOS X',
+    'Operating System :: Microsoft :: Windows',
+    'Operating System :: POSIX :: Linux',
+    'Programming Language :: C',
+    'Programming Language :: C++',
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 2',
+    'Programming Language :: Python :: 2.7',
+    'Programming Language :: Python :: 3',
+    'Programming Language :: Python :: 3.3',
+    'Programming Language :: Python :: 3.4',
+    'Programming Language :: Python :: 3.5',
+    'Programming Language :: Unix Shell',
+    'Topic :: Software Development :: Libraries',
+    'Topic :: Software Development :: Libraries :: Python Modules',
+    'Topic :: System :: Archiving',
+    'Topic :: System :: Archiving :: Compression',
+    'Topic :: Text Processing :: Fonts',
+    'Topic :: Utilities',
+]
+
+PACKAGE_DIR = {'': 'python'}
+
+PY_MODULES = ['brotli']
+
+EXT_MODULES = [
+    Extension(
+        '_brotli',
+        sources=[
+            'python/_brotli.c',
+            'c/common/constants.c',
+            'c/common/context.c',
+            'c/common/dictionary.c',
+            'c/common/platform.c',
+            'c/common/shared_dictionary.c',
+            'c/common/transform.c',
+            'c/dec/bit_reader.c',
+            'c/dec/decode.c',
+            'c/dec/huffman.c',
+            'c/dec/state.c',
+            'c/enc/backward_references.c',
+            'c/enc/backward_references_hq.c',
+            'c/enc/bit_cost.c',
+            'c/enc/block_splitter.c',
+            'c/enc/brotli_bit_stream.c',
+            'c/enc/cluster.c',
+            'c/enc/command.c',
+            'c/enc/compound_dictionary.c',
+            'c/enc/compress_fragment.c',
+            'c/enc/compress_fragment_two_pass.c',
+            'c/enc/dictionary_hash.c',
+            'c/enc/encode.c',
+            'c/enc/encoder_dict.c',
+            'c/enc/entropy_encode.c',
+            'c/enc/fast_log.c',
+            'c/enc/histogram.c',
+            'c/enc/literal_cost.c',
+            'c/enc/memory.c',
+            'c/enc/metablock.c',
+            'c/enc/static_dict.c',
+            'c/enc/utf8_util.c',
+        ],
+        depends=[
+            'c/common/constants.h',
+            'c/common/context.h',
+            'c/common/dictionary.h',
+            'c/common/platform.h',
+            'c/common/shared_dictionary_internal.h',
+            'c/common/transform.h',
+            'c/common/version.h',
+            'c/dec/bit_reader.h',
+            'c/dec/huffman.h',
+            'c/dec/prefix.h',
+            'c/dec/state.h',
+            'c/enc/backward_references.h',
+            'c/enc/backward_references_hq.h',
+            'c/enc/backward_references_inc.h',
+            'c/enc/bit_cost.h',
+            'c/enc/bit_cost_inc.h',
+            'c/enc/block_encoder_inc.h',
+            'c/enc/block_splitter.h',
+            'c/enc/block_splitter_inc.h',
+            'c/enc/brotli_bit_stream.h',
+            'c/enc/cluster.h',
+            'c/enc/cluster_inc.h',
+            'c/enc/command.h',
+            'c/enc/compound_dictionary.h',
+            'c/enc/compress_fragment.h',
+            'c/enc/compress_fragment_two_pass.h',
+            'c/enc/dictionary_hash.h',
+            'c/enc/encoder_dict.h',
+            'c/enc/entropy_encode.h',
+            'c/enc/entropy_encode_static.h',
+            'c/enc/fast_log.h',
+            'c/enc/find_match_length.h',
+            'c/enc/hash.h',
+            'c/enc/hash_composite_inc.h',
+            'c/enc/hash_forgetful_chain_inc.h',
+            'c/enc/hash_longest_match64_inc.h',
+            'c/enc/hash_longest_match_inc.h',
+            'c/enc/hash_longest_match_quickly_inc.h',
+            'c/enc/hash_rolling_inc.h',
+            'c/enc/hash_to_binary_tree_inc.h',
+            'c/enc/histogram.h',
+            'c/enc/histogram_inc.h',
+            'c/enc/literal_cost.h',
+            'c/enc/memory.h',
+            'c/enc/metablock.h',
+            'c/enc/metablock_inc.h',
+            'c/enc/params.h',
+            'c/enc/prefix.h',
+            'c/enc/quality.h',
+            'c/enc/ringbuffer.h',
+            'c/enc/static_dict.h',
+            'c/enc/static_dict_lut.h',
+            'c/enc/utf8_util.h',
+            'c/enc/write_bits.h',
+        ],
+        include_dirs=[
+            'c/include',
+        ]),
+]
+
+TEST_SUITE = 'setup.get_test_suite'
+
+CMD_CLASS = {
+    'build_ext': BuildExt,
+}
+
+setup(
+    name=NAME,
+    description=DESCRIPTION,
+    version=VERSION,
+    url=URL,
+    author=AUTHOR,
+    license=LICENSE,
+    platforms=PLATFORMS,
+    classifiers=CLASSIFIERS,
+    package_dir=PACKAGE_DIR,
+    py_modules=PY_MODULES,
+    ext_modules=EXT_MODULES,
+    test_suite=TEST_SUITE,
+    cmdclass=CMD_CLASS)
diff --git a/third-party/libjxl/libjxl/third_party/dirent.cc b/third-party/libjxl/libjxl/third_party/dirent.cc
new file mode 100644
index 0000000000..81015ed0fb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/dirent.cc
@@ -0,0 +1,142 @@
+// Copyright (c) the JPEG XL Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(_WIN32) || defined(_WIN64)
+#include "third_party/dirent.h"
+
+#include "lib/jxl/base/status.h"
+
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+
+#include <memory>
+#include <string>
+
+int mkdir(const char* path, mode_t /*mode*/) {
+  const LPSECURITY_ATTRIBUTES sec = nullptr;
+  if (!CreateDirectory(path, sec)) {
+    JXL_NOTIFY_ERROR("Failed to create directory %s", path);
+    return -1;
+  }
+  return 0;
+}
+
+// Modified from code bearing the following notice:
+// https://trac.wildfiregames.com/browser/ps/trunk/source/lib/sysdep/os/
+/* Copyright (C) 2010 Wildfire Games.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+struct DIR {
+  HANDLE hFind;
+
+  WIN32_FIND_DATA findData;  // indeterminate if hFind == INVALID_HANDLE_VALUE
+
+  // readdir will return the address of this member.
+  // (must be stored in DIR to allow multiple independent
+  // opendir/readdir sequences).
+  dirent ent;
+
+  // used by readdir to skip the first FindNextFile.
+  size_t numCalls = 0;
+};
+
+static bool IsValidDirectory(const char* path) {
+  const DWORD fileAttributes = GetFileAttributes(path);
+
+  // path not found
+  if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false;
+
+  // not a directory
+  if ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) return false;
+
+  return true;
+}
+
+DIR* opendir(const char* path) {
+  if (!IsValidDirectory(path)) {
+    errno = ENOENT;
+    return nullptr;
+  }
+
+  std::unique_ptr<DIR> d(new DIR);
+
+  // NB: "c:\\path" only returns information about that directory;
+  // trailing slashes aren't allowed. append "\\*" to retrieve its entries.
+  std::string searchPath(path);
+  if (searchPath.back() != '/' && searchPath.back() != '\\') {
+    searchPath += '\\';
+  }
+  searchPath += '*';
+
+  // (we don't defer FindFirstFile until readdir because callers
+  // expect us to return 0 if directory reading will/did fail.)
+  d->hFind = FindFirstFile(searchPath.c_str(), &d->findData);
+  if (d->hFind != INVALID_HANDLE_VALUE) return d.release();
+  if (GetLastError() == ERROR_NO_MORE_FILES) return d.release();  // empty
+
+  JXL_NOTIFY_ERROR("Failed to open directory %s", searchPath.c_str());
+  return nullptr;
+}
+
+int closedir(DIR* dir) {
+  delete dir;
+  return 0;
+}
+
+dirent* readdir(DIR* d) {
+  // "empty" case from opendir
+  if (d->hFind == INVALID_HANDLE_VALUE) return nullptr;
+
+  // until end of directory or a valid entry was found:
+  for (;;) {
+    if (d->numCalls++ != 0)  // (skip first call to FindNextFile - see opendir)
+    {
+      if (!FindNextFile(d->hFind, &d->findData)) {
+        JXL_ASSERT(GetLastError() == ERROR_NO_MORE_FILES);
+        SetLastError(0);
+        return nullptr;  // end of directory or error
+      }
+    }
+
+    // only return non-hidden and non-system entries
+    if ((d->findData.dwFileAttributes &
+         (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {
+      d->ent.d_name = d->findData.cFileName;
+      return &d->ent;
+    }
+  }
+}
+
+#endif  // #if defined(_WIN32) || defined(_WIN64)
diff --git a/third-party/libjxl/libjxl/third_party/dirent.h b/third-party/libjxl/libjxl/third_party/dirent.h
new file mode 100644
index 0000000000..37a08f425b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/dirent.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIB_JXL_THIRD_PARTY_DIRENT_H_
+#define LIB_JXL_THIRD_PARTY_DIRENT_H_
+
+// Emulates POSIX readdir for Windows
+
+#if defined(_WIN32) || defined(_WIN64)
+
+#include <sys/stat.h>  // S_IFREG
+
+#ifndef _MODE_T_
+typedef unsigned int mode_t;
+#endif  // _MODE_T_
+int mkdir(const char* path, mode_t mode);
+
+struct dirent {
+  char* d_name;  // no path
+};
+
+#define stat _stat64
+
+#ifndef S_ISDIR
+#define S_ISDIR(m) (m & S_IFDIR)
+#endif  // S_ISDIR
+
+#ifndef S_ISREG
+#define S_ISREG(m) (m & S_IFREG)
+#endif  // S_ISREG
+
+struct DIR;
+DIR* opendir(const char* path);
+int closedir(DIR* dir);
+dirent* readdir(DIR* d);
+
+#endif  // #if defined(_WIN32) || defined(_WIN64)
+#endif  // LIB_JXL_THIRD_PARTY_DIRENT_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/BUILD b/third-party/libjxl/libjxl/third_party/highway/BUILD
new file mode 100644
index 0000000000..d54bd46b20
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/BUILD
@@ -0,0 +1,477 @@
+load("@rules_license//rules:license.bzl", "license")
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+load("@rules_cc//cc:defs.bzl", "cc_test")
+package(
+    default_applicable_licenses = ["//:license"],
+    default_visibility = ["//visibility:public"],
+)
+
+license(
+    name = "license",
+    package_name = "highway",
+    license_kinds = ["@rules_license//licenses/generic:notice"],
+)
+
+# Dual-licensed Apache 2 and 3-clause BSD.
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+# Detect compiler:
+config_setting(
+    name = "compiler_clang",
+    flag_values = {"@bazel_tools//tools/cpp:compiler": "clang"},
+)
+
+config_setting(
+    name = "compiler_clangcl",
+    flag_values = {"@bazel_tools//tools/cpp:compiler": "lexan"},
+)
+
+config_setting(
+    name = "compiler_msvc_actual",
+    flag_values = {"@bazel_tools//tools/cpp:compiler": "msvc"},
+)
+
+# The above is insufficient for Bazel on Windows, which does not seem to
+# detect/set a compiler flag. This workaround prevents compile errors due to
+# passing clang-only warning flags to MSVC.
+config_setting(
+    name = "compiler_msvc_cpu",
+    values = {
+        "cpu": "x64_windows",
+    },
+)
+
+selects.config_setting_group(
+    name = "compiler_msvc",
+    match_any = [
+        ":compiler_msvc_actual",
+        ":compiler_msvc_cpu",
+    ],
+)
+
+config_setting(
+    name = "compiler_emscripten",
+    values = {"cpu": "wasm32"},
+)
+
+# See https://github.com/bazelbuild/bazel/issues/12707
+config_setting(
+    name = "compiler_gcc_bug",
+    flag_values = {
+        "@bazel_tools//tools/cpp:compiler": "compiler",
+    },
+)
+
+config_setting(
+    name = "compiler_gcc_actual",
+    flag_values = {
+        "@bazel_tools//tools/cpp:compiler": "gcc",
+    },
+)
+
+selects.config_setting_group(
+    name = "compiler_gcc",
+    match_any = [
+        ":compiler_gcc_bug",
+        ":compiler_gcc_actual",
+    ],
+)
+
+# Additional warnings for Clang OR GCC (skip for MSVC)
+CLANG_GCC_COPTS = [
+    "-Wunused-parameter",
+    "-Wunused-variable",
+    "-Wextra-semi",
+    "-Wunreachable-code",
+]
+
+# Warnings supported by Clang and Clang-cl
+CLANG_OR_CLANGCL_OPTS = CLANG_GCC_COPTS + [
+    "-Wfloat-overflow-conversion",
+    "-Wfloat-zero-conversion",
+    "-Wfor-loop-analysis",
+    "-Wgnu-redeclared-enum",
+    "-Winfinite-recursion",
+    "-Wliteral-conversion",
+    "-Wno-c++98-compat",
+    "-Wno-unused-command-line-argument",
+    "-Wprivate-header",
+    "-Wself-assign",
+    "-Wstring-conversion",
+    "-Wtautological-overlap-compare",
+    "-Wthread-safety-analysis",
+    "-Wundefined-func-template",
+    "-Wunused-comparison",
+]
+
+# Warnings only supported by Clang, but not Clang-cl
+CLANG_ONLY_COPTS = CLANG_OR_CLANGCL_OPTS + [
+    # Do not treat the third_party headers as system headers when building
+    # highway - the errors are pertinent.
+    "--no-system-header-prefix=third_party/highway",
+]
+
+COPTS = select({
+    ":compiler_msvc": [],
+    ":compiler_gcc": CLANG_GCC_COPTS,
+    ":compiler_clangcl": CLANG_OR_CLANGCL_OPTS,
+    # Default to clang because compiler detection only works in Bazel
+    "//conditions:default": CLANG_ONLY_COPTS,
+}) + select({
+    "@platforms//cpu:riscv64": [
+        "-march=rv64gcv1p0",
+        "-menable-experimental-extensions",
+    ],
+    "//conditions:default": [
+    ],
+})
+
+DEFINES = select({
+    ":compiler_msvc": ["HWY_SHARED_DEFINE"],
+    ":compiler_clangcl": ["HWY_SHARED_DEFINE"],
+    "//conditions:default": [],
+})
+
+# Unused on Bazel builds, where this is not defined/known; Copybara replaces
+# usages with an empty list.
+COMPAT = [
+    "//buildenv/target:non_prod",  # includes mobile/vendor.
+]
+
+# WARNING: changing flags such as HWY_DISABLED_TARGETS may break users without
+# failing integration tests, if the machine running tests does not support the
+# newly enabled instruction set, or the failure is only caught by sanitizers
+# which do not run in CI.
+
+# NOTE: when adding a new dependency on the Highway library, please add your
+# test to the highway.users list in highway.blueprint.
+cc_library(
+    name = "hwy",
+    srcs = [
+        "hwy/aligned_allocator.cc",
+        "hwy/per_target.cc",
+        "hwy/print.cc",
+        "hwy/targets.cc",
+    ],
+    # Normal headers with include guards
+    hdrs = [
+        "hwy/aligned_allocator.h",
+        "hwy/base.h",
+        "hwy/cache_control.h",
+        "hwy/detect_compiler_arch.h",  # private
+        "hwy/print.h",
+    ],
+    compatible_with = [],
+    copts = COPTS,
+    defines = DEFINES,
+    local_defines = ["hwy_EXPORTS"],
+    textual_hdrs = [
+        # These are textual because config macros influence them:
+        "hwy/detect_targets.h",  # private
+        "hwy/targets.h",
+        # This .cc file #includes itself through foreach_target.h
+        "hwy/per_target.cc",
+        # End of list
+        "hwy/highway.h",  # public
+        "hwy/foreach_target.h",  # public
+        "hwy/per_target.h",  # public
+        "hwy/print-inl.h",  # public
+        "hwy/highway_export.h",  # public
+        "hwy/ops/arm_neon-inl.h",
+        "hwy/ops/arm_sve-inl.h",
+        "hwy/ops/emu128-inl.h",
+        "hwy/ops/generic_ops-inl.h",
+        "hwy/ops/scalar-inl.h",
+        "hwy/ops/set_macros-inl.h",
+        "hwy/ops/shared-inl.h",
+        "hwy/ops/tuple-inl.h",
+        "hwy/ops/x86_128-inl.h",
+        "hwy/ops/x86_256-inl.h",
+        "hwy/ops/x86_512-inl.h",
+        # Select avoids recompiling native arch if only non-native changed
+    ] + select({
+        ":compiler_emscripten": [
+            "hwy/ops/wasm_128-inl.h",
+            "hwy/ops/wasm_256-inl.h",
+        ],
+        "//conditions:default": [],
+    }) + select({
+        "@platforms//cpu:riscv64": ["hwy/ops/rvv-inl.h"],
+        "//conditions:default": [],
+    }),
+)
+
+cc_library(
+    name = "algo",
+    compatible_with = [],
+    copts = COPTS,
+    textual_hdrs = [
+        "hwy/contrib/algo/copy-inl.h",
+        "hwy/contrib/algo/find-inl.h",
+        "hwy/contrib/algo/transform-inl.h",
+    ],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "bit_pack",
+    compatible_with = [],
+    copts = COPTS,
+    textual_hdrs = [
+        "hwy/contrib/bit_pack/bit_pack-inl.h",
+    ],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "dot",
+    compatible_with = [],
+    copts = COPTS,
+    textual_hdrs = [
+        "hwy/contrib/dot/dot-inl.h",
+    ],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "image",
+    srcs = [
+        "hwy/contrib/image/image.cc",
+    ],
+    hdrs = [
+        "hwy/contrib/image/image.h",
+    ],
+    compatible_with = [],
+    copts = COPTS,
+    local_defines = ["hwy_contrib_EXPORTS"],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "math",
+    compatible_with = [],
+    copts = COPTS,
+    textual_hdrs = [
+        "hwy/contrib/math/math-inl.h",
+    ],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "unroller",
+    compatible_with = [],
+    copts = COPTS,
+    textual_hdrs = [
+        "hwy/contrib/unroller/unroller-inl.h",
+    ],
+    deps = [
+        ":hwy",
+    ],
+)
+
+# Everything required for tests that use Highway.
+cc_library(
+    name = "hwy_test_util",
+    srcs = ["hwy/tests/test_util.cc"],
+    hdrs = ["hwy/tests/test_util.h"],
+    compatible_with = [],
+    copts = COPTS,
+    local_defines = ["hwy_test_EXPORTS"],
+    textual_hdrs = [
+        "hwy/tests/test_util-inl.h",
+        "hwy/tests/hwy_gtest.h",
+    ],
+    # Must not depend on a gtest variant, which can conflict with the
+    # GUNIT_INTERNAL_BUILD_MODE defined by the test.
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_library(
+    name = "nanobenchmark",
+    srcs = [
+        "hwy/nanobenchmark.cc",
+        "hwy/timer.cc",
+    ],
+    hdrs = [
+        "hwy/nanobenchmark.h",
+        "hwy/robust_statistics.h",
+        "hwy/timer.h",
+    ],
+    compatible_with = [],
+    copts = COPTS,
+    local_defines = ["hwy_EXPORTS"],
+    textual_hdrs = [
+        "hwy/timer-inl.h",
+    ],
+    deps = [":hwy"],
+)
+
+cc_binary(
+    name = "benchmark",
+    srcs = ["hwy/examples/benchmark.cc"],
+    copts = COPTS,
+    deps = [
+        ":hwy",
+        ":nanobenchmark",
+    ],
+)
+
+cc_library(
+    name = "skeleton",
+    srcs = ["hwy/examples/skeleton.cc"],
+    hdrs = ["hwy/examples/skeleton.h"],
+    copts = COPTS,
+    local_defines = ["hwy_EXPORTS"],
+    textual_hdrs = ["hwy/examples/skeleton-inl.h"],
+    deps = [
+        ":hwy",
+    ],
+)
+
+cc_test(
+    name = "list_targets",
+    size = "small",
+    srcs = ["hwy/tests/list_targets.cc"],
+    deps = [":hwy"],
+)
+
+# path, name
+HWY_TESTS = [
+    ("hwy/contrib/algo/", "copy_test"),
+    ("hwy/contrib/algo/", "find_test"),
+    ("hwy/contrib/algo/", "transform_test"),
+    ("hwy/contrib/bit_pack/", "bit_pack_test"),
+    ("hwy/contrib/dot/", "dot_test"),
+    ("hwy/contrib/image/", "image_test"),
+    ("hwy/contrib/math/", "math_test"),
+    ("hwy/contrib/unroller/", "unroller_test"),
+    # contrib/sort has its own BUILD, we also add sort_test to GUITAR_TESTS.
+    # To run bench_sort, specify --test=hwy/contrib/sort:bench_sort.
+    ("hwy/examples/", "skeleton_test"),
+    ("hwy/", "nanobenchmark_test"),
+    ("hwy/", "aligned_allocator_test"),
+    ("hwy/", "base_test"),
+    ("hwy/", "highway_test"),
+    ("hwy/", "targets_test"),
+    ("hwy/tests/", "arithmetic_test"),
+    ("hwy/tests/", "blockwise_shift_test"),
+    ("hwy/tests/", "blockwise_test"),
+    ("hwy/tests/", "cast_test"),
+    ("hwy/tests/", "combine_test"),
+    ("hwy/tests/", "compare_test"),
+    ("hwy/tests/", "compress_test"),
+    ("hwy/tests/", "convert_test"),
+    ("hwy/tests/", "count_test"),
+    ("hwy/tests/", "crypto_test"),
+    ("hwy/tests/", "demote_test"),
+    ("hwy/tests/", "expand_test"),
+    ("hwy/tests/", "float_test"),
+    ("hwy/tests/", "if_test"),
+    ("hwy/tests/", "interleaved_test"),
+    ("hwy/tests/", "logical_test"),
+    ("hwy/tests/", "mask_mem_test"),
+    ("hwy/tests/", "mask_test"),
+    ("hwy/tests/", "memory_test"),
+    ("hwy/tests/", "mul_test"),
+    ("hwy/tests/", "reduction_test"),
+    ("hwy/tests/", "resize_test"),
+    ("hwy/tests/", "reverse_test"),
+    ("hwy/tests/", "shift_test"),
+    ("hwy/tests/", "shuffle4_test"),
+    ("hwy/tests/", "slide_up_down_test"),
+    ("hwy/tests/", "swizzle_block_test"),
+    ("hwy/tests/", "swizzle_test"),
+    ("hwy/tests/", "table_test"),
+    ("hwy/tests/", "test_util_test"),
+    ("hwy/tests/", "tuple_test"),
+]
+
+HWY_TEST_COPTS = select({
+    ":compiler_msvc": [],
+    "//conditions:default": [
+        # gTest triggers this warning (which is enabled by the
+        # extra-semi in COPTS), so we need to disable it here,
+        # but it's still enabled for :hwy.
+        "-Wno-c++98-compat-extra-semi",
+    ],
+})
+
+HWY_TEST_DEPS = [
+    ":algo",
+    ":bit_pack",
+    ":dot",
+    ":hwy",
+    ":hwy_test_util",
+    ":image",
+    ":math",
+    ":nanobenchmark",
+    ":skeleton",
+    ":unroller",
+    "//hwy/contrib/sort:vqsort",
+    "@com_google_googletest//:gtest_main",
+]
+
+[
+    [
+        cc_test(
+            name = test,
+            size = "medium",
+            timeout = "long",  # default moderate is not enough for math_test
+            srcs = [
+                subdir + test + ".cc",
+            ],
+            copts = COPTS + HWY_TEST_COPTS,
+            features = select({
+                "@platforms//cpu:riscv64": ["fully_static_link"],
+                "//conditions:default": [],
+            }),
+            linkopts = select({
+                ":compiler_emscripten": [
+                    "-s ASSERTIONS=2",
+                    "-s ENVIRONMENT=node,shell,web",
+                    "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+                    "-s DEMANGLE_SUPPORT=1",
+                    "-s EXIT_RUNTIME=1",
+                    "-s ALLOW_MEMORY_GROWTH=1",
+                    "--pre-js $(location :preamble.js.lds)",
+                ],
+                "//conditions:default": [],
+            }),
+            linkstatic = select({
+                "@platforms//cpu:riscv64": True,
+                "//conditions:default": False,
+            }),
+            local_defines = ["HWY_IS_TEST"],
+            # for test_suite.
+            tags = ["hwy_ops_test"],
+            deps = HWY_TEST_DEPS + select({
+                ":compiler_emscripten": [":preamble.js.lds"],
+                "//conditions:default": [],
+            }),
+        ),
+    ]
+    for subdir, test in HWY_TESTS
+]
+
+# For manually building the tests we define here (:all does not work in --config=msvc)
+test_suite(
+    name = "hwy_ops_tests",
+    tags = ["hwy_ops_test"],
+)
+
+# Placeholder for integration test, do not remove
diff --git a/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt b/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt
new file mode 100644
index 0000000000..f968522e36
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt
@@ -0,0 +1,626 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.10)
+
+# Set PIE flags for POSITION_INDEPENDENT_CODE targets, added in 3.14.
+if(POLICY CMP0083)
+  cmake_policy(SET CMP0083 NEW)
+endif()
+
+# Workaround for 3.19 raising error 'IMPORTED_LOCATION not set for imported
+# target "GTest::gtest_main"'.
+if(POLICY CMP0111)
+  cmake_policy(SET CMP0111 OLD)
+endif()
+
+project(hwy VERSION 1.0.6)  # Keep in sync with highway.h version
+# `hwy` is lowercase to handle find_package() in Config mode:
+set(namespace "${PROJECT_NAME}::")
+
+# Directly define the ABI version from the cmake project() version values:
+set(LIBRARY_VERSION "${hwy_VERSION}")
+set(LIBRARY_SOVERSION ${hwy_VERSION_MAJOR})
+
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+# Search for Atomics implementation:
+find_package(Atomics REQUIRED)
+
+# Enabled PIE binaries by default if supported.
+include(CheckPIESupported OPTIONAL RESULT_VARIABLE CHECK_PIE_SUPPORTED)
+if(CHECK_PIE_SUPPORTED)
+  check_pie_supported(LANGUAGES CXX)
+  if(CMAKE_CXX_LINK_PIE_SUPPORTED)
+    set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
+  endif()
+endif()
+
+include(GNUInstallDirs)
+
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE RelWithDebInfo)
+endif()
+
+# The following is only required with GCC < 6.1.0 or CLANG < 16.0
+set(HWY_CMAKE_ARM7 OFF CACHE BOOL "Set copts for Armv7 with NEON (requires vfpv4)?")
+
+# This must be set on 32-bit x86 with GCC < 13.1, otherwise math_test will be
+# skipped. For GCC 13.1+, you can also build with -fexcess-precision=standard.
+set(HWY_CMAKE_SSE2 OFF CACHE BOOL "Set SSE2 as baseline for 32-bit x86?")
+
+# Unconditionally adding -Werror risks breaking the build when new warnings
+# arise due to compiler/platform changes. Enable this in CI/tests.
+set(HWY_WARNINGS_ARE_ERRORS OFF CACHE BOOL "Add -Werror flag?")
+
+set(HWY_ENABLE_CONTRIB ON CACHE BOOL "Include contrib/")
+set(HWY_ENABLE_EXAMPLES FALSE CACHE BOOL "Build examples")
+set(HWY_ENABLE_INSTALL ON CACHE BOOL "Install library")
+set(HWY_ENABLE_TESTS FALSE CACHE BOOL "Enable HWY tests")
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+   "int main() {
+      #if !defined(__EMSCRIPTEN__)
+      static_assert(false, \"__EMSCRIPTEN__ is not defined\");
+      #endif
+      return 0;
+    }"
+  HWY_EMSCRIPTEN
+)
+
+check_cxx_source_compiles(
+   "int main() {
+      #if !defined(__riscv)
+      static_assert(false, \"__riscv is not defined\");
+      #endif
+      return 0;
+    }"
+  HWY_RISCV
+)
+
+if (HWY_ENABLE_CONTRIB)
+# Glob all the traits so we don't need to modify this file when adding
+# additional special cases.
+file(GLOB HWY_CONTRIB_SOURCES "hwy/contrib/sort/vqsort_*.cc")
+list(APPEND HWY_CONTRIB_SOURCES
+    hwy/contrib/dot/dot-inl.h
+    hwy/contrib/image/image.cc
+    hwy/contrib/image/image.h
+    hwy/contrib/math/math-inl.h
+    hwy/contrib/sort/order.h
+    hwy/contrib/sort/shared-inl.h
+    hwy/contrib/sort/sorting_networks-inl.h
+    hwy/contrib/sort/traits-inl.h
+    hwy/contrib/sort/traits128-inl.h
+    hwy/contrib/sort/vqsort-inl.h
+    hwy/contrib/sort/vqsort.cc
+    hwy/contrib/sort/vqsort.h
+    hwy/contrib/algo/copy-inl.h
+    hwy/contrib/algo/find-inl.h
+    hwy/contrib/algo/transform-inl.h
+    hwy/contrib/unroller/unroller-inl.h
+)
+endif()  # HWY_ENABLE_CONTRIB
+
+set(HWY_SOURCES
+    hwy/aligned_allocator.cc
+    hwy/aligned_allocator.h
+    hwy/base.h
+    hwy/cache_control.h
+    hwy/detect_compiler_arch.h  # private
+    hwy/detect_targets.h  # private
+    hwy/foreach_target.h
+    hwy/highway.h
+    hwy/highway_export.h
+    hwy/nanobenchmark.cc
+    hwy/nanobenchmark.h
+    hwy/ops/arm_neon-inl.h
+    hwy/ops/arm_sve-inl.h
+    hwy/ops/emu128-inl.h
+    hwy/ops/generic_ops-inl.h
+    hwy/ops/ppc_vsx-inl.h
+    hwy/ops/rvv-inl.h
+    hwy/ops/scalar-inl.h
+    hwy/ops/set_macros-inl.h
+    hwy/ops/shared-inl.h
+    hwy/ops/wasm_128-inl.h
+    hwy/ops/tuple-inl.h
+    hwy/ops/x86_128-inl.h
+    hwy/ops/x86_256-inl.h
+    hwy/ops/x86_512-inl.h
+    hwy/per_target.cc
+    hwy/per_target.h
+    hwy/print-inl.h
+    hwy/print.cc
+    hwy/print.h
+    hwy/robust_statistics.h
+    hwy/targets.cc
+    hwy/targets.h
+    hwy/timer.cc
+    hwy/timer.h
+    hwy/timer-inl.h
+)
+
+set(HWY_TEST_SOURCES
+    hwy/tests/hwy_gtest.h
+    hwy/tests/test_util-inl.h
+    hwy/tests/test_util.cc
+    hwy/tests/test_util.h
+)
+
+if (MSVC)
+  set(HWY_FLAGS
+    # fix build error C1128 in blockwise*_test & arithmetic_test
+    /bigobj
+  )
+else()
+  set(HWY_FLAGS
+    # Avoid changing binaries based on the current time and date.
+    -Wno-builtin-macro-redefined
+    -D__DATE__="redacted"
+    -D__TIMESTAMP__="redacted"
+    -D__TIME__="redacted"
+
+    # Optimizations
+    -fmerge-all-constants
+
+    # Warnings
+    -Wall
+    -Wextra
+    # These are not included in Wall nor Wextra:
+    -Wconversion
+    -Wsign-conversion
+    -Wvla
+    -Wnon-virtual-dtor
+  )
+
+  if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+    list(APPEND HWY_FLAGS
+      -Wfloat-overflow-conversion
+      -Wfloat-zero-conversion
+      -Wfor-loop-analysis
+      -Wgnu-redeclared-enum
+      -Winfinite-recursion
+      -Wself-assign
+      -Wstring-conversion
+      -Wtautological-overlap-compare
+      -Wthread-safety-analysis
+      -Wundefined-func-template
+
+      -fno-cxx-exceptions
+      -fno-slp-vectorize
+      -fno-vectorize
+
+      # Use color in messages
+      -fdiagnostics-show-option -fcolor-diagnostics
+    )
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0)
+      list(APPEND HWY_FLAGS -Wc++2a-extensions)
+    endif()
+  endif()
+
+  if (WIN32)
+    if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+      list(APPEND HWY_FLAGS
+        -Wno-global-constructors
+        -Wno-language-extension-token
+        -Wno-used-but-marked-unused
+        -Wno-shadow-field-in-constructor
+        -Wno-unused-member-function
+        -Wno-unused-template
+        -Wno-c++98-compat-pedantic
+        -Wno-used-but-marked-unused
+        -Wno-zero-as-null-pointer-constant
+      )
+    endif()
+
+    list(APPEND HWY_FLAGS
+      -Wno-cast-align
+      -Wno-double-promotion
+      -Wno-float-equal
+      -Wno-format-nonliteral
+      -Wno-shadow
+      -Wno-sign-conversion
+    )
+  else()
+    list(APPEND HWY_FLAGS
+      -fmath-errno
+      -fno-exceptions
+    )
+  endif()  # WIN32
+
+  # Workaround for excess precision, see #1488.
+  if (HWY_CMAKE_SSE2)
+    list(APPEND HWY_FLAGS -msse2 -mfpmath=sse)
+  endif()
+
+  if (HWY_CMAKE_ARM7)
+    list(APPEND HWY_FLAGS
+      -march=armv7-a
+      -mfpu=neon-vfpv4
+      -mfloat-abi=hard  # must match the toolchain specified as CXX=
+      -mfp16-format=ieee  # required for vcvt_f32_f16
+    )
+  endif()  # HWY_CMAKE_ARM7
+
+  if(HWY_RISCV)
+    # gcc(13) and recent clang both support V, but not yet runtime dispatch, so
+    # we add the gcv compiler flag, which then requires the CPU (now when using
+    # either compiler) to support V.
+    list(APPEND HWY_FLAGS -march=rv64gcv1p0)
+    if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+      list(APPEND HWY_FLAGS -menable-experimental-extensions)
+    endif()
+  endif()
+
+  if (HWY_WARNINGS_ARE_ERRORS)
+    list(APPEND HWY_FLAGS -Werror)
+  endif()
+
+  # Prevent "wasm-ld: error: --shared-memory is disallowed by targets.cc.o
+  # because it was not compiled with 'atomics' or 'bulk-memory' features."
+  if (HWY_EMSCRIPTEN)
+    list(APPEND HWY_FLAGS -matomics)
+  endif()
+
+endif()  # !MSVC
+
+include(CheckIncludeFile)
+check_include_file(sys/auxv.h  HAVE_SYS_AUXV_H)
+check_include_file(asm/hwcap.h HAVE_ASM_HWCAP_H)
+
+# By default prefer STATIC build (legacy behavior)
+option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
+option(HWY_FORCE_STATIC_LIBS "Ignore BUILD_SHARED_LIBS" OFF)
+# only expose shared/static options to advanced users:
+mark_as_advanced(BUILD_SHARED_LIBS)
+mark_as_advanced(HWY_FORCE_STATIC_LIBS)
+# Define visibility settings globally:
+set(CMAKE_CXX_VISIBILITY_PRESET hidden)
+set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
+
+# Copy-cat "add_library" logic + add override.
+set(HWY_LIBRARY_TYPE "SHARED")
+if (NOT BUILD_SHARED_LIBS OR HWY_FORCE_STATIC_LIBS)
+  set(HWY_LIBRARY_TYPE "STATIC")
+endif()
+
+# This preprocessor define will drive the build, also used in the *.pc files:
+if("${HWY_LIBRARY_TYPE}" STREQUAL "SHARED")
+  set(DLLEXPORT_TO_DEFINE "HWY_SHARED_DEFINE")
+else()
+  set(DLLEXPORT_TO_DEFINE "HWY_STATIC_DEFINE")
+endif()
+
+add_library(hwy ${HWY_LIBRARY_TYPE} ${HWY_SOURCES})
+if(NOT HAVE_SYS_AUXV_H)
+  target_compile_definitions(hwy PUBLIC TOOLCHAIN_MISS_SYS_AUXV_H)
+endif()
+if(NOT HAVE_ASM_HWCAP_H)
+  target_compile_definitions(hwy PUBLIC TOOLCHAIN_MISS_ASM_HWCAP_H)
+endif()
+target_compile_definitions(hwy PUBLIC "${DLLEXPORT_TO_DEFINE}")
+target_compile_options(hwy PRIVATE ${HWY_FLAGS})
+set_property(TARGET hwy PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_target_properties(hwy PROPERTIES VERSION ${LIBRARY_VERSION} SOVERSION ${LIBRARY_SOVERSION})
+target_include_directories(hwy PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+target_compile_features(hwy PUBLIC cxx_std_11)
+set_target_properties(hwy PROPERTIES
+  LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version)
+# For GCC __atomic_store_8, see #887
+target_link_libraries(hwy PRIVATE ${ATOMICS_LIBRARIES})
+# not supported by MSVC/Clang, safe to skip (we use DLLEXPORT annotations)
+if(UNIX AND NOT APPLE)
+  set_property(TARGET hwy APPEND_STRING PROPERTY
+    LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version")
+endif()
+
+if (HWY_ENABLE_CONTRIB)
+add_library(hwy_contrib ${HWY_LIBRARY_TYPE} ${HWY_CONTRIB_SOURCES})
+target_link_libraries(hwy_contrib PUBLIC hwy)
+target_compile_options(hwy_contrib PRIVATE ${HWY_FLAGS})
+set_property(TARGET hwy_contrib PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_target_properties(hwy_contrib PROPERTIES VERSION ${LIBRARY_VERSION} SOVERSION ${LIBRARY_SOVERSION})
+target_include_directories(hwy_contrib PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+target_compile_features(hwy_contrib PUBLIC cxx_std_11)
+set_target_properties(hwy_contrib PROPERTIES
+  LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version)
+# For GCC __atomic_store_8, see #887
+target_link_libraries(hwy_contrib PRIVATE ${ATOMICS_LIBRARIES})
+# not supported by MSVC/Clang, safe to skip (we use DLLEXPORT annotations)
+if(UNIX AND NOT APPLE)
+  set_property(TARGET hwy_contrib APPEND_STRING PROPERTY
+    LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version")
+endif()
+endif()  # HWY_ENABLE_CONTRIB
+
+if (HWY_ENABLE_TESTS)
+add_library(hwy_test ${HWY_LIBRARY_TYPE} ${HWY_TEST_SOURCES})
+target_link_libraries(hwy_test PUBLIC hwy)
+target_compile_options(hwy_test PRIVATE ${HWY_FLAGS})
+set_property(TARGET hwy_test PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_target_properties(hwy_test PROPERTIES VERSION ${LIBRARY_VERSION} SOVERSION ${LIBRARY_SOVERSION})
+target_include_directories(hwy_test PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+target_compile_features(hwy_test PUBLIC cxx_std_11)
+set_target_properties(hwy_test PROPERTIES
+  LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version)
+# not supported by MSVC/Clang, safe to skip (we use DLLEXPORT annotations)
+if(UNIX AND NOT APPLE)
+  set_property(TARGET hwy_test APPEND_STRING PROPERTY
+    LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/hwy/hwy.version")
+endif()
+
+# -------------------------------------------------------- hwy_list_targets
+# Generate a tool to print the compiled-in targets as defined by the current
+# flags. This tool will print to stderr at build time, after building hwy.
+add_executable(hwy_list_targets hwy/tests/list_targets.cc)
+target_compile_options(hwy_list_targets PRIVATE ${HWY_FLAGS})
+target_link_libraries(hwy_list_targets PRIVATE hwy)
+target_include_directories(hwy_list_targets PRIVATE
+  $<TARGET_PROPERTY:hwy,INCLUDE_DIRECTORIES>)
+# TARGET_FILE always returns the path to executable
+# Naked target also not always could be run (due to the lack of '.\' prefix)
+# Thus effective command to run should contain the full path
+# and emulator prefix (if any).
+if (NOT CMAKE_CROSSCOMPILING OR CMAKE_CROSSCOMPILING_EMULATOR)
+add_custom_command(TARGET hwy_list_targets POST_BUILD
+    COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $<TARGET_FILE:hwy_list_targets> || (exit 0))
+endif()
+endif()  # HWY_ENABLE_TESTS
+
+# --------------------------------------------------------
+# Allow skipping the following sections for projects that do not need them:
+# tests, examples, benchmarks and installation.
+
+# -------------------------------------------------------- install library
+if (HWY_ENABLE_INSTALL)
+
+install(TARGETS hwy EXPORT hwy_targets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
+# Install all the headers keeping the relative path to the current directory
+# when installing them.
+foreach (source ${HWY_SOURCES})
+  if ("${source}" MATCHES "\.h$")
+    get_filename_component(dirname "${source}" DIRECTORY)
+    install(FILES "${source}"
+        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dirname}")
+  endif()
+endforeach()
+
+if (HWY_ENABLE_CONTRIB)
+install(TARGETS hwy_contrib EXPORT hwy_targets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
+# Install all the headers keeping the relative path to the current directory
+# when installing them.
+foreach (source ${HWY_CONTRIB_SOURCES})
+  if ("${source}" MATCHES "\.h$")
+    get_filename_component(dirname "${source}" DIRECTORY)
+    install(FILES "${source}"
+        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dirname}")
+  endif()
+endforeach()
+endif()  # HWY_ENABLE_CONTRIB
+
+if (HWY_ENABLE_TESTS)
+install(TARGETS hwy_test EXPORT hwy_targets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
+# Install all the headers keeping the relative path to the current directory
+# when installing them.
+foreach (source ${HWY_TEST_SOURCES})
+  if ("${source}" MATCHES "\.h$")
+    get_filename_component(dirname "${source}" DIRECTORY)
+    install(FILES "${source}"
+        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dirname}")
+  endif()
+endforeach()
+endif()  # HWY_ENABLE_TESTS
+
+# Add a pkg-config file for libhwy and the contrib/test libraries.
+set(HWY_LIBRARY_VERSION "${CMAKE_PROJECT_VERSION}")
+set(HWY_PC_FILES libhwy.pc)
+if (HWY_ENABLE_CONTRIB)
+list(APPEND HWY_PC_FILES libhwy-contrib.pc)
+endif()  # HWY_ENABLE_CONTRIB
+if (HWY_ENABLE_TESTS)
+list(APPEND HWY_PC_FILES libhwy-test.pc)
+endif()  # HWY_ENABLE_TESTS
+foreach (pc ${HWY_PC_FILES})
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${pc}.in" "${pc}" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${pc}"
+      DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+endforeach()
+
+endif()  # HWY_ENABLE_INSTALL
+# -------------------------------------------------------- Examples
+if (HWY_ENABLE_EXAMPLES)
+
+# Avoids mismatch between GTest's static CRT and our dynamic.
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+# Programming exercise with integrated benchmark
+add_executable(hwy_benchmark hwy/examples/benchmark.cc)
+target_sources(hwy_benchmark PRIVATE
+    hwy/nanobenchmark.h)
+# Try adding one of -DHWY_COMPILE_ONLY_SCALAR, -DHWY_COMPILE_ONLY_EMU128 or
+# -DHWY_COMPILE_ONLY_STATIC to observe the difference in targets printed.
+target_compile_options(hwy_benchmark PRIVATE ${HWY_FLAGS})
+target_link_libraries(hwy_benchmark PRIVATE hwy)
+target_link_libraries(hwy_benchmark PRIVATE ${ATOMICS_LIBRARIES})
+set_target_properties(hwy_benchmark
+    PROPERTIES RUNTIME_OUTPUT_DIRECTORY "examples/")
+
+endif()  # HWY_ENABLE_EXAMPLES
+# -------------------------------------------------------- Tests
+
+include(CTest)
+
+if(BUILD_TESTING AND HWY_ENABLE_TESTS)
+enable_testing()
+include(GoogleTest)
+
+set(HWY_SYSTEM_GTEST OFF CACHE BOOL "Use pre-installed googletest?")
+if(HWY_SYSTEM_GTEST)
+find_package(GTest REQUIRED)
+else()
+# Download and unpack googletest at configure time
+configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
+execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+  RESULT_VARIABLE result
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
+if(result)
+  message(FATAL_ERROR "CMake step for googletest failed: ${result}")
+endif()
+execute_process(COMMAND ${CMAKE_COMMAND} --build .
+  RESULT_VARIABLE result
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
+if(result)
+  message(FATAL_ERROR "Build step for googletest failed: ${result}")
+endif()
+
+# Prevent overriding the parent project's compiler/linker
+# settings on Windows
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+# Add googletest directly to our build. This defines
+# the gtest and gtest_main targets.
+add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src
+                 ${CMAKE_CURRENT_BINARY_DIR}/googletest-build
+                 EXCLUDE_FROM_ALL)
+endif()  # HWY_SYSTEM_GTEST
+
+set(HWY_TEST_FILES
+  hwy/contrib/algo/copy_test.cc
+  hwy/contrib/algo/find_test.cc
+  hwy/contrib/algo/transform_test.cc
+  hwy/aligned_allocator_test.cc
+  hwy/base_test.cc
+  hwy/highway_test.cc
+  hwy/nanobenchmark_test.cc
+  hwy/targets_test.cc
+  hwy/examples/skeleton_test.cc
+  hwy/tests/arithmetic_test.cc
+  hwy/tests/blockwise_shift_test.cc
+  hwy/tests/blockwise_test.cc
+  hwy/tests/cast_test.cc
+  hwy/tests/combine_test.cc
+  hwy/tests/compare_test.cc
+  hwy/tests/compress_test.cc
+  hwy/tests/convert_test.cc
+  hwy/tests/count_test.cc
+  hwy/tests/crypto_test.cc
+  hwy/tests/demote_test.cc
+  hwy/tests/expand_test.cc
+  hwy/tests/float_test.cc
+  hwy/tests/if_test.cc
+  hwy/tests/interleaved_test.cc
+  hwy/tests/logical_test.cc
+  hwy/tests/mask_mem_test.cc
+  hwy/tests/mask_test.cc
+  hwy/tests/memory_test.cc
+  hwy/tests/mul_test.cc
+  hwy/tests/reduction_test.cc
+  hwy/tests/resize_test.cc
+  hwy/tests/reverse_test.cc
+  hwy/tests/shift_test.cc
+  hwy/tests/shuffle4_test.cc
+  hwy/tests/slide_up_down_test.cc
+  hwy/tests/swizzle_block_test.cc
+  hwy/tests/swizzle_test.cc
+  hwy/tests/table_test.cc
+  hwy/tests/test_util_test.cc
+  hwy/tests/tuple_test.cc
+)
+
+set(HWY_TEST_LIBS hwy hwy_test)
+
+if (HWY_ENABLE_CONTRIB)
+list(APPEND HWY_TEST_LIBS hwy_contrib)
+
+list(APPEND HWY_TEST_FILES
+  hwy/contrib/dot/dot_test.cc
+  hwy/contrib/image/image_test.cc
+  # Disabled due to SIGILL in clang7 debug build during gtest discovery phase,
+  # not reproducible locally. Still tested via bazel build.
+  hwy/contrib/math/math_test.cc
+  hwy/contrib/sort/sort_test.cc
+  hwy/contrib/sort/bench_sort.cc
+  hwy/contrib/unroller/unroller_test.cc
+)
+endif()  # HWY_ENABLE_CONTRIB
+
+if(HWY_SYSTEM_GTEST)
+  if (CMAKE_VERSION VERSION_LESS 3.20)
+    set(HWY_GTEST_LIBS GTest::GTest GTest::Main)
+  else()
+    set(HWY_GTEST_LIBS GTest::gtest GTest::gtest_main)
+  endif()
+else()
+  set(HWY_GTEST_LIBS gtest gtest_main)
+endif()
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+foreach (TESTFILE IN LISTS HWY_TEST_FILES)
+  # The TESTNAME is the name without the extension or directory.
+  get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+  add_executable(${TESTNAME} ${TESTFILE})
+  target_compile_options(${TESTNAME} PRIVATE ${HWY_FLAGS})
+  # Test all targets, not just the best/baseline. This changes the default
+  # policy to all-attainable; note that setting -DHWY_COMPILE_* directly can
+  # cause compile errors because only one may be set, and other CMakeLists.txt
+  # that include us may set them.
+  target_compile_options(${TESTNAME} PRIVATE -DHWY_IS_TEST=1)
+
+  target_link_libraries(${TESTNAME} PRIVATE ${HWY_TEST_LIBS} ${HWY_GTEST_LIBS})
+  # For GCC __atomic_store_8, see #887
+  target_link_libraries(${TESTNAME} PRIVATE ${ATOMICS_LIBRARIES})
+  # Output test targets in the test directory.
+  set_target_properties(${TESTNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "tests")
+
+  if (HWY_EMSCRIPTEN)
+    set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "-s SINGLE_FILE=1")
+  endif()
+
+  if(${CMAKE_VERSION} VERSION_LESS "3.10.3")
+    gtest_discover_tests(${TESTNAME} TIMEOUT 60)
+  else ()
+    gtest_discover_tests(${TESTNAME} DISCOVERY_TIMEOUT 60)
+  endif ()
+endforeach ()
+
+# The skeleton test uses the skeleton library code.
+target_sources(skeleton_test PRIVATE hwy/examples/skeleton.cc)
+
+endif()  # BUILD_TESTING
+
+if (HWY_ENABLE_INSTALL)
+  # write hwy-config file to handle `Config` mode
+  include(CMakePackageConfigHelpers)
+  write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/hwy-config-version.cmake" COMPATIBILITY SameMajorVersion)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/hwy-config-version.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/hwy")
+  install(EXPORT hwy_targets NAMESPACE "${namespace}" FILE hwy-config.cmake DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/hwy")
+endif()
diff --git a/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt.in b/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt.in
new file mode 100644
index 0000000000..a0260b82f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/CMakeLists.txt.in
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 2.8.12)
+
+project(googletest-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(googletest
+  GIT_REPOSITORY    https://github.com/google/googletest.git
+  GIT_TAG           43efa0a4efd40c78b9210d15373112081899a97c
+  SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/googletest-src"
+  BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/googletest-build"
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND     ""
+  INSTALL_COMMAND   ""
+  TEST_COMMAND      ""
+)
diff --git a/third-party/libjxl/libjxl/third_party/highway/CONTRIBUTING b/third-party/libjxl/libjxl/third_party/highway/CONTRIBUTING
new file mode 100644
index 0000000000..24e83ee578
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/CONTRIBUTING
@@ -0,0 +1,39 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project.
+Pull requests are very welcome. Bug reports (via issue) are also appreciated.
+
+Looking for a starter project? See the [wishlist](g3doc/op_wishlist.md) for
+some ideas for extensions and codegen improvements.
+
+There are just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## API changes
+
+Highway promises to be backwards-compatible to the current documented API. If
+you would like to propose a change, please raise an issue to discuss how we can
+retain compatibility.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/third-party/libjxl/libjxl/third_party/highway/LICENSE b/third-party/libjxl/libjxl/third_party/highway/LICENSE
new file mode 100644
index 0000000000..f49a4e16e6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/LICENSE-BSD3 b/third-party/libjxl/libjxl/third_party/highway/LICENSE-BSD3
new file mode 100644
index 0000000000..7e13e8b566
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/LICENSE-BSD3
@@ -0,0 +1,33 @@
+Copyright (c) the Highway Project Authors.
+All rights reserved.
+
+Redistribution and use in source and binary forms,
+with or without
+modification, are permitted provided that
+the following conditions are met:
+
+1. Redistributions of source code
+must retain the above copyright notice, this
+list of conditions and
+the following disclaimer.
+
+2. Redistributions in binary form
+must reproduce the above copyright notice,
+this list of conditions
+and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/MODULE.bazel b/third-party/libjxl/libjxl/third_party/highway/MODULE.bazel
new file mode 100644
index 0000000000..8cdf658a53
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/MODULE.bazel
@@ -0,0 +1,6 @@
+module(name = "highway", version = "1.0.4")
+
+bazel_dep(name = "bazel_skylib", version = "1.3.0")
+bazel_dep(name = "googletest", version = "1.12.1")
+bazel_dep(name = "rules_cc", version = "0.0.4")
+bazel_dep(name = "rules_license", version = "0.0.4")
diff --git a/third-party/libjxl/libjxl/third_party/highway/README.md b/third-party/libjxl/libjxl/third_party/highway/README.md
new file mode 100644
index 0000000000..8370caf115
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/README.md
@@ -0,0 +1,427 @@
+# Efficient and performance-portable vector software
+
+[//]: # (placeholder, do not remove)
+
+Highway is a C++ library that provides portable SIMD/vector intrinsics.
+
+[Documentation](https://google.github.io/highway/en/master/)
+
+Previously licensed under Apache 2, now dual-licensed as Apache 2 / BSD-3.
+
+## Why
+
+We are passionate about high-performance software. We see major untapped
+potential in CPUs (servers, mobile, desktops). Highway is for engineers who want
+to reliably and economically push the boundaries of what is possible in
+software.
+
+## How
+
+CPUs provide SIMD/vector instructions that apply the same operation to multiple
+data items. This can reduce energy usage e.g. *fivefold* because fewer
+instructions are executed. We also often see *5-10x* speedups.
+
+Highway makes SIMD/vector programming practical and workable according to these
+guiding principles:
+
+**Does what you expect**: Highway is a C++ library with carefully-chosen
+functions that map well to CPU instructions without extensive compiler
+transformations. The resulting code is more predictable and robust to code
+changes/compiler updates than autovectorization.
+
+**Works on widely-used platforms**: Highway supports four architectures; the
+same application code can target eight instruction sets, including those with
+'scalable' vectors (size unknown at compile time). Highway only requires C++11
+and supports four families of compilers. If you would like to use Highway on
+other platforms, please raise an issue.
+
+**Flexible to deploy**: Applications using Highway can run on heterogeneous
+clouds or client devices, choosing the best available instruction set at
+runtime. Alternatively, developers may choose to target a single instruction set
+without any runtime overhead. In both cases, the application code is the same
+except for swapping `HWY_STATIC_DISPATCH` with `HWY_DYNAMIC_DISPATCH` plus one
+line of code.
+
+**Suitable for a variety of domains**: Highway provides an extensive set of
+operations, used for image processing (floating-point), compression, video
+analysis, linear algebra, cryptography, sorting and random generation. We
+recognise that new use-cases may require additional ops and are happy to add
+them where it makes sense (e.g. no performance cliffs on some architectures). If
+you would like to discuss, please file an issue.
+
+**Rewards data-parallel design**: Highway provides tools such as Gather,
+MaskedLoad, and FixedTag to enable speedups for legacy data structures. However,
+the biggest gains are unlocked by designing algorithms and data structures for
+scalable vectors. Helpful techniques include batching, structure-of-array
+layouts, and aligned/padded allocations.
+
+## Examples
+
+Online demos using Compiler Explorer:
+
+-   [multiple targets with dynamic dispatch](https://gcc.godbolt.org/z/KM3ben7ET)
+    (more complicated, but flexible and uses best available SIMD)
+-   [single target using -m flags](https://gcc.godbolt.org/z/rGnjMevKG)
+    (simpler, but requires/only uses the instruction set enabled by compiler
+    flags)
+
+We observe that Highway is referenced in the following open source projects,
+found via sourcegraph.com. Most are Github repositories. If you would like to
+add your project or link to it directly, feel free to raise an issue or contact
+us via the below email.
+
+*   Browsers: Chromium (+Vivaldi), Firefox (+floorp / foxhound / librewolf / Waterfox)
+*   Cryptography: google/distributed_point_functions
+*   Image codecs: eustas/2im, [Grok JPEG 2000](https://github.com/GrokImageCompression/grok), [JPEG XL](https://github.com/libjxl/libjxl), OpenHTJ2K, [JPEGenc](https://github.com/osamu620/JPEGenc)
+*   Image processing: cloudinary/ssimulacra2, m-ab-s/media-autobuild_suite
+*   Image viewers: AlienCowEatCake/ImageViewer, mirillis/jpegxl-wic,
+    [Lux panorama/image viewer](https://bitbucket.org/kfj/pv/)
+*   Information retrieval: [iresearch database index](https://github.com/iresearch-toolkit/iresearch/blob/e7638e7a4b99136ca41f82be6edccf01351a7223/core/utils/simd_utils.hpp), michaeljclark/zvec
+
+Other
+
+*   [Evaluation of C++ SIMD Libraries](https://www.mnm-team.org/pub/Fopras/rock23/):
+    "Highway excelled with a strong performance across multiple SIMD extensions
+    [..]. Thus, Highway may currently be the most suitable SIMD library for many
+    software projects."
+*   [zimt](https://github.com/kfjahnke/zimt): C++11 template library to process n-dimensional arrays with multi-threaded SIMD code
+*   [vectorized Quicksort](https://github.com/google/highway/tree/master/hwy/contrib/sort) ([paper](https://arxiv.org/abs/2205.05982))
+
+If you'd like to get Highway, in addition to cloning from this Github repository
+or using it as a Git submodule, you can also find it in the following package
+managers or repositories: alpinelinux, conan-io, conda-forge, DragonFlyBSD,
+freebsd, ghostbsd, microsoft/vcpkg, MidnightBSD, MSYS2, NetBSD, openSUSE,
+opnsense, Xilinx/Vitis_Libraries. See also the list at
+https://repology.org/project/highway-simd-library/versions .
+
+## Current status
+
+### Targets
+
+Highway supports 20 targets, listed in alphabetical order of platform:
+
+-   Any: `EMU128`, `SCALAR`;
+-   Arm: `NEON` (Armv7+), `SVE`, `SVE2`, `SVE_256`, `SVE2_128`;
+-   POWER: `PPC8` (v2.07), `PPC9` (v3.0), `PPC10` (v3.1B, not yet supported
+    due to compiler bugs, see #1207; also requires QEMU 7.2);
+-   RISC-V: `RVV` (1.0);
+-   WebAssembly: `WASM`, `WASM_EMU256` (a 2x unrolled version of wasm128,
+    enabled if `HWY_WANT_WASM2` is defined. This will remain supported until it
+    is potentially superseded by a future version of WASM.);
+-   x86:
+    -   `SSE2`
+    -   `SSSE3` (~Intel Core)
+    -   `SSE4` (~Nehalem, also includes AES + CLMUL).
+    -   `AVX2` (~Haswell, also includes BMI2 + F16 + FMA)
+    -   `AVX3` (~Skylake, AVX-512F/BW/CD/DQ/VL)
+    -   `AVX3_DL` (~Icelake, includes BitAlg + CLMUL + GFNI + VAES + VBMI +
+        VBMI2 + VNNI + VPOPCNT; requires opt-in by defining `HWY_WANT_AVX3_DL`
+        unless compiling for static dispatch),
+    -   `AVX3_ZEN4` (like AVX3_DL but optimized for AMD Zen4; requires opt-in by
+        defining `HWY_WANT_AVX3_ZEN4` if compiling for static dispatch)
+    -   `AVX3_SPR` (~Sapphire Rapids, includes AVX-512FP16)
+
+Our policy is that unless otherwise specified, targets will remain supported as
+long as they can be (cross-)compiled with currently supported Clang or GCC, and
+tested using QEMU. If the target can be compiled with LLVM trunk and tested
+using our version of QEMU without extra flags, then it is eligible for inclusion
+in our continuous testing infrastructure. Otherwise, the target will be manually
+tested before releases with selected versions/configurations of Clang and GCC.
+
+SVE was initially tested using farm_sve (see acknowledgments).
+
+### Versioning
+
+Highway releases aim to follow the semver.org system (MAJOR.MINOR.PATCH),
+incrementing MINOR after backward-compatible additions and PATCH after
+backward-compatible fixes. We recommend using releases (rather than the Git tip)
+because they are tested more extensively, see below.
+
+The current version 1.0 signals an increased focus on backwards compatibility.
+Applications using documented functionality will remain compatible with future
+updates that have the same major version number.
+
+### Testing
+
+Continuous integration tests build with a recent version of Clang (running on
+native x86, or QEMU for RISC-V and Arm) and MSVC 2019 (v19.28, running on native
+x86).
+
+Before releases, we also test on x86 with Clang and GCC, and Armv7/8 via GCC
+cross-compile. See the [testing process](g3doc/release_testing_process.md) for
+details.
+
+### Related modules
+
+The `contrib` directory contains SIMD-related utilities: an image class with
+aligned rows, a math library (16 functions already implemented, mostly
+trigonometry), and functions for computing dot products and sorting.
+
+### Other libraries
+
+If you only require x86 support, you may also use Agner Fog's
+[VCL vector class library](https://github.com/vectorclass). It includes many
+functions including a complete math library.
+
+If you have existing code using x86/NEON intrinsics, you may be interested in
+[SIMDe](https://github.com/simd-everywhere/simde), which emulates those
+intrinsics using other platforms' intrinsics or autovectorization.
+
+## Installation
+
+This project uses CMake to generate and build. In a Debian-based system you can
+install it via:
+
+```bash
+sudo apt install cmake
+```
+
+Highway's unit tests use [googletest](https://github.com/google/googletest).
+By default, Highway's CMake downloads this dependency at configuration time.
+You can disable this by setting the `HWY_SYSTEM_GTEST` CMake variable to ON and
+installing gtest separately:
+
+```bash
+sudo apt install libgtest-dev
+```
+
+Running cross-compiled tests requires support from the OS, which on Debian is
+provided by the `qemu-user-binfmt` package.
+
+To build Highway as a shared or static library (depending on BUILD_SHARED_LIBS),
+the standard CMake workflow can be used:
+
+```bash
+mkdir -p build && cd build
+cmake ..
+make -j && make test
+```
+
+Or you can run `run_tests.sh` (`run_tests.bat` on Windows).
+
+Bazel is also supported for building, but it is not as widely used/tested.
+
+When building for Armv7, a limitation of current compilers requires you to add
+`-DHWY_CMAKE_ARM7:BOOL=ON` to the CMake command line; see #834 and #1032. We
+understand that work is underway to remove this limitation.
+
+Building on 32-bit x86 is not officially supported, and AVX2/3 are disabled by
+default there. Note that johnplatts has successfully built and run the Highway
+tests on 32-bit x86, including AVX2/3, on GCC 7/8 and Clang 8/11/12. On Ubuntu
+22.04, Clang 11 and 12, but not later versions, require extra compiler flags
+`-m32 -isystem /usr/i686-linux-gnu/include`. Clang 10 and earlier require the
+above plus `-isystem /usr/i686-linux-gnu/include/c++/12/i686-linux-gnu`. See
+#1279.
+
+## Building highway - Using vcpkg
+
+highway is now available in [vcpkg](https://github.com/Microsoft/vcpkg)
+
+```bash
+vcpkg install highway
+```
+
+The highway port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
+## Quick start
+
+You can use the `benchmark` inside examples/ as a starting point.
+
+A [quick-reference page](g3doc/quick_reference.md) briefly lists all operations
+and their parameters, and the [instruction_matrix](g3doc/instruction_matrix.pdf)
+indicates the number of instructions per operation.
+
+The [FAQ](g3doc/faq.md) answers questions about portability, API design and
+where to find more information.
+
+We recommend using full SIMD vectors whenever possible for maximum performance
+portability. To obtain them, pass a `ScalableTag<float>` (or equivalently
+`HWY_FULL(float)`) tag to functions such as `Zero/Set/Load`. There are two
+alternatives for use-cases requiring an upper bound on the lanes:
+
+-   For up to `N` lanes, specify `CappedTag<T, N>` or the equivalent
+    `HWY_CAPPED(T, N)`. The actual number of lanes will be `N` rounded down to
+    the nearest power of two, such as 4 if `N` is 5, or 8 if `N` is 8. This is
+    useful for data structures such as a narrow matrix. A loop is still required
+    because vectors may actually have fewer than `N` lanes.
+
+-   For exactly a power of two `N` lanes, specify `FixedTag<T, N>`. The largest
+    supported `N` depends on the target, but is guaranteed to be at least
+    `16/sizeof(T)`.
+
+Due to ADL restrictions, user code calling Highway ops must either:
+*   Reside inside `namespace hwy { namespace HWY_NAMESPACE {`; or
+*   prefix each op with an alias such as `namespace hn = hwy::HWY_NAMESPACE;
+    hn::Add()`; or
+*   add using-declarations for each op used: `using hwy::HWY_NAMESPACE::Add;`.
+
+Additionally, each function that calls Highway ops (such as `Load`) must either
+be prefixed with `HWY_ATTR`, OR reside between `HWY_BEFORE_NAMESPACE()` and
+`HWY_AFTER_NAMESPACE()`. Lambda functions currently require `HWY_ATTR` before
+their opening brace.
+
+The entry points into code using Highway differ slightly depending on whether
+they use static or dynamic dispatch.
+
+*   For static dispatch, `HWY_TARGET` will be the best available target among
+    `HWY_BASELINE_TARGETS`, i.e. those allowed for use by the compiler (see
+    [quick-reference](g3doc/quick_reference.md)). Functions inside
+    `HWY_NAMESPACE` can be called using `HWY_STATIC_DISPATCH(func)(args)` within
+    the same module they are defined in. You can call the function from other
+    modules by wrapping it in a regular function and declaring the regular
+    function in a header.
+
+*   For dynamic dispatch, a table of function pointers is generated via the
+    `HWY_EXPORT` macro that is used by `HWY_DYNAMIC_DISPATCH(func)(args)` to
+    call the best function pointer for the current CPU's supported targets. A
+    module is automatically compiled for each target in `HWY_TARGETS` (see
+    [quick-reference](g3doc/quick_reference.md)) if `HWY_TARGET_INCLUDE` is
+    defined and `foreach_target.h` is included.
+
+When using dynamic dispatch, `foreach_target.h` is included from translation
+units (.cc files), not headers. Headers containing vector code shared between
+several translation units require a special include guard, for example the
+following taken from `examples/skeleton-inl.h`:
+
+```
+#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#else
+#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#endif
+
+#include "hwy/highway.h"
+// Your vector code
+#endif
+```
+
+By convention, we name such headers `-inl.h` because their contents (often
+function templates) are usually inlined.
+
+## Compiler flags
+
+Applications should be compiled with optimizations enabled - without inlining,
+SIMD code may slow down by factors of 10 to 100. For clang and GCC, `-O2` is
+generally sufficient.
+
+For MSVC, we recommend compiling with `/Gv` to allow non-inlined functions to
+pass vector arguments in registers. If intending to use the AVX2 target together
+with half-width vectors (e.g. for `PromoteTo`), it is also important to compile
+with `/arch:AVX2`. This seems to be the only way to generate VEX-encoded SSE4
+instructions on MSVC. Otherwise, mixing VEX-encoded AVX2 instructions and
+non-VEX SSE4 may cause severe performance degradation. Unfortunately, the
+resulting binary will then require AVX2. Note that no such flag is needed for
+clang and GCC because they support target-specific attributes, which we use to
+ensure proper VEX code generation for AVX2 targets.
+
+## Strip-mining loops
+
+When vectorizing a loop, an important question is whether and how to deal with
+a number of iterations ('trip count', denoted `count`) that does not evenly
+divide the vector size `N = Lanes(d)`. For example, it may be necessary to avoid
+writing past the end of an array.
+
+In this section, let `T` denote the element type and `d = ScalableTag<T>`.
+Assume the loop body is given as a function `template<bool partial, class D>
+void LoopBody(D d, size_t index, size_t max_n)`.
+
+"Strip-mining" is a technique for vectorizing a loop by transforming it into an
+outer loop and inner loop, such that the number of iterations in the inner loop
+matches the vector width. Then, the inner loop is replaced with vector
+operations.
+
+Highway offers several strategies for loop vectorization:
+
+*   Ensure all inputs/outputs are padded. Then the (outer) loop is simply
+
+    ```
+    for (size_t i = 0; i < count; i += N) LoopBody<false>(d, i, 0);
+    ```
+    Here, the template parameter and second function argument are not needed.
+
+    This is the preferred option, unless `N` is in the thousands and vector
+    operations are pipelined with long latencies. This was the case for
+    supercomputers in the 90s, but nowadays ALUs are cheap and we see most
+    implementations split vectors into 1, 2 or 4 parts, so there is little cost
+    to processing entire vectors even if we do not need all their lanes. Indeed
+    this avoids the (potentially large) cost of predication or partial
+    loads/stores on older targets, and does not duplicate code.
+
+*   Process whole vectors and include previously processed elements
+    in the last vector:
+    ```
+    for (size_t i = 0; i < count; i += N) LoopBody<false>(d, HWY_MIN(i, count - N), 0);
+    ```
+
+    This is the second preferred option provided that `count >= N`
+    and `LoopBody` is idempotent. Some elements might be processed twice, but
+    a single code path and full vectorization is usually worth it. Even if
+    `count < N`, it usually makes sense to pad inputs/outputs up to `N`.
+
+*   Use the `Transform*` functions in hwy/contrib/algo/transform-inl.h. This
+    takes care of the loop and remainder handling and you simply define a
+    generic lambda function (C++14) or functor which receives the current vector
+    from the input/output array, plus optionally vectors from up to two extra
+    input arrays, and returns the value to write to the input/output array.
+
+    Here is an example implementing the BLAS function SAXPY (`alpha * x + y`):
+
+    ```
+    Transform1(d, x, n, y, [](auto d, const auto v, const auto v1) HWY_ATTR {
+      return MulAdd(Set(d, alpha), v, v1);
+    });
+    ```
+
+*   Process whole vectors as above, followed by a scalar loop:
+
+    ```
+    size_t i = 0;
+    for (; i + N <= count; i += N) LoopBody<false>(d, i, 0);
+    for (; i < count; ++i) LoopBody<false>(CappedTag<T, 1>(), i, 0);
+    ```
+    The template parameter and second function arguments are again not needed.
+
+    This avoids duplicating code, and is reasonable if `count` is large.
+    If `count` is small, the second loop may be slower than the next option.
+
+*   Process whole vectors as above, followed by a single call to a modified
+    `LoopBody` with masking:
+
+    ```
+    size_t i = 0;
+    for (; i + N <= count; i += N) {
+      LoopBody<false>(d, i, 0);
+    }
+    if (i < count) {
+      LoopBody<true>(d, i, count - i);
+    }
+    ```
+    Now the template parameter and third function argument can be used inside
+    `LoopBody` to non-atomically 'blend' the first `num_remaining` lanes of `v`
+    with the previous contents of memory at subsequent locations:
+    `BlendedStore(v, FirstN(d, num_remaining), d, pointer);`. Similarly,
+    `MaskedLoad(FirstN(d, num_remaining), d, pointer)` loads the first
+    `num_remaining` elements and returns zero in other lanes.
+
+    This is a good default when it is infeasible to ensure vectors are padded,
+    but is only safe `#if !HWY_MEM_OPS_MIGHT_FAULT`!
+    In contrast to the scalar loop, only a single final iteration is needed.
+    The increased code size from two loop bodies is expected to be worthwhile
+    because it avoids the cost of masking in all but the final iteration.
+
+## Additional resources
+
+*   [Highway introduction (slides)](g3doc/highway_intro.pdf)
+*   [Overview of instructions per operation on different architectures](g3doc/instruction_matrix.pdf)
+*   [Design philosophy and comparison](g3doc/design_philosophy.md)
+*   [Implementation details](g3doc/impl_details.md)
+
+## Acknowledgments
+
+We have used [farm-sve](https://gitlab.inria.fr/bramas/farm-sve) by Berenger
+Bramas; it has proved useful for checking the SVE port on an x86 development
+machine.
+
+This is not an officially supported Google product.
+Contact: janwas@google.com
diff --git a/third-party/libjxl/libjxl/third_party/highway/WORKSPACE b/third-party/libjxl/libjxl/third_party/highway/WORKSPACE
new file mode 100644
index 0000000000..71e0e69942
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/WORKSPACE
@@ -0,0 +1,38 @@
+workspace(name = "highway")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe")
+
+maybe(
+    http_archive,
+    name = "com_google_googletest",
+    urls = ["https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip"],
+    sha256 = "8daa1a71395892f7c1ec5f7cb5b099a02e606be720d62f1a6a98f8f8898ec826",
+    strip_prefix = "googletest-e2239ee6043f73722e7aa812a459f54a28552929",
+)
+
+# See https://google.github.io/googletest/quickstart-bazel.html
+maybe(
+    http_archive,
+    name = "rules_cc",
+    urls = ["https://github.com/bazelbuild/rules_cc/archive/40548a2974f1aea06215272d9c2b47a14a24e556.zip"],
+    sha256 = "56ac9633c13d74cb71e0546f103ce1c58810e4a76aa8325da593ca4277908d72",
+    strip_prefix = "rules_cc-40548a2974f1aea06215272d9c2b47a14a24e556",
+)
+
+# Need recent version for config_setting_group
+maybe(
+    http_archive,
+    name = "bazel_skylib",
+    urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz"],
+)
+
+maybe(
+    http_archive,
+    name = "rules_license",
+    urls = [
+        "https://github.com/bazelbuild/rules_license/releases/download/0.0.4/rules_license-0.0.4.tar.gz",
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_license/releases/download/0.0.4/rules_license-0.0.4.tar.gz",
+    ],
+    sha256 = "6157e1e68378532d0241ecd15d3c45f6e5cfd98fc10846045509fb2a7cc9e381",
+)
diff --git a/third-party/libjxl/libjxl/third_party/highway/cmake/FindAtomics.cmake b/third-party/libjxl/libjxl/third_party/highway/cmake/FindAtomics.cmake
new file mode 100644
index 0000000000..e866b73fac
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/cmake/FindAtomics.cmake
@@ -0,0 +1,56 @@
+# Original issue:
+# * https://gitlab.kitware.com/cmake/cmake/-/issues/23021#note_1098733
+#
+# For reference:
+# * https://gcc.gnu.org/wiki/Atomic/GCCMM
+#
+# riscv64 specific:
+# * https://lists.debian.org/debian-riscv/2022/01/msg00009.html
+#
+# ATOMICS_FOUND        - system has c++ atomics
+# ATOMICS_LIBRARIES    - libraries needed to use c++ atomics
+
+include(CheckCXXSourceCompiles)
+
+# RISC-V only has 32-bit and 64-bit atomic instructions. GCC is supposed
+# to convert smaller atomics to those larger ones via masking and
+# shifting like LLVM, but it’s a known bug that it does not. This means
+# anything that wants to use atomics on 1-byte or 2-byte types needs
+# -latomic, but not 4-byte or 8-byte (though it does no harm).
+set(atomic_code
+    "
+     #include <atomic>
+     #include <cstdint>
+     std::atomic<uint8_t> n8 (0); // riscv64
+     std::atomic<uint64_t> n64 (0); // armel, mipsel, powerpc
+     int main() {
+       ++n8;
+       ++n64;
+       return 0;
+     }")
+
+# https://gitlab.kitware.com/cmake/cmake/-/issues/24063
+set(CMAKE_CXX_STANDARD 11)
+check_cxx_source_compiles("${atomic_code}" ATOMICS_LOCK_FREE_INSTRUCTIONS)
+
+if(ATOMICS_LOCK_FREE_INSTRUCTIONS)
+  set(ATOMICS_FOUND TRUE)
+  set(ATOMICS_LIBRARIES)
+else()
+  set(CMAKE_REQUIRED_LIBRARIES "-latomic")
+  check_cxx_source_compiles("${atomic_code}" ATOMICS_IN_LIBRARY)
+  set(CMAKE_REQUIRED_LIBRARIES)
+  if(ATOMICS_IN_LIBRARY)
+    set(ATOMICS_LIBRARY atomic)
+    include(FindPackageHandleStandardArgs)
+    find_package_handle_standard_args(Atomics DEFAULT_MSG ATOMICS_LIBRARY)
+    set(ATOMICS_LIBRARIES ${ATOMICS_LIBRARY})
+    unset(ATOMICS_LIBRARY)
+  else()
+    if(Atomics_FIND_REQUIRED)
+      message(FATAL_ERROR "Neither lock free instructions nor -latomic found.")
+    endif()
+  endif()
+endif()
+unset(atomic_code)
+unset(CMAKE_CXX_STANDARD)
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/changelog b/third-party/libjxl/libjxl/third_party/highway/debian/changelog
new file mode 100644
index 0000000000..e2702fcace
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/changelog
@@ -0,0 +1,229 @@
+highway (1.0.6-1) UNRELEASED; urgency=medium
+
+  * Add MaskedGatherIndex, MaskedScatterIndex, LoadN, StoreN
+  * Add SatWidenMulPairwiseAdd, SumOfMulQuadAccumulate, PromoteUpperLowerTo
+  * Add F64 for Wasm, F64 AbsDiff
+  * Add F16 support to AVX3_SPR, RVV tuple (both not yet enabled)
+  * Validate all D args in x86 function signatures
+  * License: now dual Apache2/BSD3
+  * Doc: new users, vcpkg install instructions, AVX10 plans
+  * Doc: advice on dynamic dispatch plus -march flags
+  * Build: avoid installing hwy_test if !HWY_ENABLE_TESTS
+  * Codegen: improved PPC9 Find*True, variable-length CopyBytes
+  * Fix: GCC 8.2, MSVC, ICC, PPC9, SVE, arm64 MSVC issues
+  * Fix: IfNegativeThenElse, MulFixedPoint15, Debian changelog format
+  * Tests: faster builds (split up), use release builds
+
+ -- Jan Wassenberg <janwas@google.com>  Fri, 11 Aug 2023 14:00:00 +0200
+
+highway (1.0.5-1) UNRELEASED; urgency=medium
+
+  * Add Insert/ExtractBlock, BroadcastBlock/Lane, NumBlocks
+  * Add integer Le/Ge and [Neg]MulAdd, extend DemoteTo/PromoteTo
+  * Add Leading/TrailingZeroCount, HighestSetBitIndex, ReverseBits
+  * Add MaskedLoadOr, tuple Get/Set/Create, ReduceSum, WidenMulPairwiseAdd
+  * Add [ZeroExtend]ResizeBitCast, BitwiseIfThenElse, Find[Known]LastTrue
+  * Add AESRoundInv, AESKeyGenAssist
+  * Add contrib/math Atan2/SinCos, contrib/unroller
+  * Add fp16/bf16 support (Armv8, SVE, RVV), HWY_DYNAMIC_POINTER
+  * Add OrderedTruncate2To, Per4LaneBlockShuffle, TwoTablesLookupLanes
+  * Add SlideUp/Down[Blocks/Lanes], Slide1Up/Down, ReverseLaneBytes
+  * Add SetBeforeFirst, SetAtOrBefore/AfterFirst, SetOnlyFirst 
+  * Add 8-bit Reverse2/4/8, Shl/Shr, RotateRight, Reverse, Mul
+  * Add 8/16-bit DupEven/Odd, TableLookupLanes
+  * Add F64 ApproximateReciprocal[Sqrt], 32/64-bit SaturatedAdd/Sub
+  * Build: Support Bazel modules
+  * Codegen improvements
+  * Compiler: support Clang 15/16
+  * Doc: add Github pages, support policy, evaluation
+  * Doc: publish AVX-512 throttling/startup findings
+  * Release: add signing
+  * Test: add GCC to Github Actions
+  * VQSort: small N speedups: fix seeding, func ptr, 8-wide network.
+  * VQSort: add BenchAllColdSort, VQSortStatic
+  * VQSort: fix subnormal/inf/NaN, support fp16, fix KV types
+  * Workarounds: RVV VXRM, x87 excess precision, missing intrinsics
+
+ -- Jan Wassenberg <janwas@google.com>  Wed, 19 Jul 2023 15:00:00 +0200
+
+highway (1.0.4-1) UNRELEASED; urgency=medium
+
+  * Add PPC8..10, SSE2, AVX3_ZEN4, NEON_WITHOUT_AES targets
+  * Add Expand, LoadExpand, integer AbsDiff, SumsOf8AbsDiff
+  * Improved Half/Twice support, codegen for Shift*Same
+  * Support Wasm in Godbolt
+  * Faster KV128 sorting
+  * Fix armv7 build config, CMake config mode
+  * Update RVV intrinsics for 1.0-draft
+
+ -- Jan Wassenberg <janwas@google.com>  Fri, 17 Mar 2023 15:00:00 +0200
+
+highway (1.0.3-1) UNRELEASED; urgency=medium
+
+  * Add RearrangeToOddPlusEven, Xor3, 8-bit CompressStore, HWY_ASSUME
+  * Add contrib/bit_pack for 8/16-bit lanes
+  * Add WASM_EMU256 target
+  * Documentation improvements
+  * Allow opting out of C++ stdlib usage for Compiler Explorer
+  * Update for new RVV intrinsics; faster WASM min/max and extmul/q15mul
+  * Fix UB, GCC atomic
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 19 Jan 2023 13:00:00 +0200
+
+highway (1.0.2-1) UNRELEASED; urgency=medium
+
+  * Add ExclusiveNeither, FindKnownFirstTrue, Ne128
+  * Add 16-bit SumOfLanes/ReorderWidenMulAccumulate/ReorderDemote2To
+  * Faster sort for low-entropy input, improved pivot selection
+  * Add GN build system, Highway FAQ, k32v32 type to vqsort
+  * CMake: Support find_package(GTest), add rvv-inl.h, add HWY_ENABLE_TESTS
+  * Fix MIPS and C++20 build, Apple LLVM 10.3 detection, EMU128 AllTrue on RVV
+  * Fix missing exec_prefix, RVV build, warnings, libatomic linking
+  * Work around GCC 10.4 issue, disabled RDCYCLE, arm7 with vfpv3
+  * Documentation/example improvements
+  * Support static dispatch to SVE2_128 and SVE_256
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 27 Oct 2022 17:00:00 +0200
+
+highway (1.0.1-1) UNRELEASED; urgency=medium
+
+  * Add Eq128, i64 Mul, unsigned->float ConvertTo
+  * Faster sort for few unique keys, more robust pivot selection
+  * Fix: floating-point generator for sort tests, Min/MaxOfLanes for i16
+  * Fix: avoid always_inline in debug, link atomic
+  * GCC warnings: string.h, maybe-uninitialized, ignored-attributes
+  * GCC warnings: preprocessor int overflow, spurious use-after-free/overflow
+  * Doc: <=HWY_AVX3, Full32/64/128, how to use generic-inl
+
+ -- Jan Wassenberg <janwas@google.com>  Tue, 23 Aug 2022 10:00:00 +0200
+
+highway (1.0.0-1) UNRELEASED; urgency=medium
+
+  * ABI change: 64-bit target values, more room for expansion
+  * Add CompressBlocksNot, CompressNot, Lt128Upper, Min/Max128Upper, TruncateTo
+  * Add HWY_SVE2_128 target
+  * Sort speedups especially for 128-bit
+  * Documentation clarifications
+  * Faster NEON CountTrue/FindFirstTrue/AllFalse/AllTrue
+  * Improved SVE codegen
+  * Fix u16x8 ConcatEven/Odd, SSSE3 i64 Lt
+  * MSVC 2017 workarounds
+  * Support for runtime dispatch on Arm/GCC/Linux
+
+ -- Jan Wassenberg <janwas@google.com>  Wed, 27 Jul 2022 10:00:00 +0200
+
+highway (0.17.0-1) UNRELEASED; urgency=medium
+
+  * Add ExtractLane, InsertLane, IsInf, IsFinite, IsNaN
+  * Add StoreInterleaved2, LoadInterleaved2/3/4, BlendedStore, SafeFillN
+  * Add MulFixedPoint15, Or3
+  * Add Copy[If], Find[If], Generate, Replace[If] algos
+  * Add HWY_EMU128 target (replaces HWY_SCALAR)
+  * HWY_RVV is feature-complete
+  * Add HWY_ENABLE_CONTRIB build flag, HWY_NATIVE_FMA, HWY_WANT_SSSE3/SSE4 macros
+  * Extend ConcatOdd/Even and StoreInterleaved* to all types
+  * Allow CappedTag<T, nonPowerOfTwo>
+  * Sort speedups: 2x for AVX2, 1.09x for AVX3; avoid x86 malloc
+  * Expand documentation
+  * Fix RDTSCP crash in nanobenchmark
+  * Fix XCR0 check (was ignoring AVX3 on ICL)
+  * Support Arm/RISC-V timers
+
+ -- Jan Wassenberg <janwas@google.com>  Fri, 20 May 2022 10:00:00 +0200
+
+highway (0.16.0-1) UNRELEASED; urgency=medium
+
+  * Add contrib/sort (vectorized quicksort)
+  * Add IfNegativeThenElse, IfVecThenElse
+  * Add Reverse2,4,8, ReverseBlocks, DupEven/Odd, AESLastRound
+  * Add OrAnd, Min128, Max128, Lt128, SumsOf8
+  * Support capped/partial vectors on RVV/SVE, int64 in WASM
+  * Support SVE2, shared library build
+  * Remove deprecated overloads without the required d arg (UpperHalf etc.)
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 03 Feb 2022 11:00:00 +0100
+
+highway (0.15.0-1) UNRELEASED; urgency=medium
+
+  * New ops: CompressBlendedStore, ConcatOdd/Even, IndicesFromVec
+  * New ops: OddEvenBlocks, SwapAdjacentBlocks, Reverse, RotateRight
+  * Add bf16, unsigned comparisons, more lane types for Reverse/TableLookupLanes
+  * Contrib: add sort(ing network) and dot(product)
+  * Targets: update RVV for LLVM, add experimental WASM2
+  * Separate library hwy_test for test utils
+  * Add non-macro Simd<> aliases
+  * Fixes: const V& for GCC, AVX3 BZHI, POPCNT with AVX on MSVC, avoid %zu
+
+ -- Jan Wassenberg <janwas@google.com>  Wed, 10 Nov 2021 10:00:00 +0100
+
+highway (0.14.2-1) UNRELEASED; urgency=medium
+
+  * Add MaskedLoad
+  * Fix non-glibc PPC, Windows GCC, MSVC 19.14
+  * Opt-in for -Werror; separate design_philosophy.md
+
+ -- Jan Wassenberg <janwas@google.com>  Tue, 24 Aug 2021 15:00:00 +0200
+
+highway (0.14.1-1) UNRELEASED; urgency=medium
+
+  * Add LoadMaskBits, CompressBits[Store]
+  * Fix CPU feature check (AES/F16C) and warnings
+  * Improved DASSERT - disabled in optimized builds
+
+ -- Jan Wassenberg <janwas@google.com>  Tue, 17 Aug 2021 14:00:00 +0200
+
+highway (0.14.0-1) UNRELEASED; urgency=medium
+
+  * Add SVE, S-SSE3, AVX3_DL targets
+  * Support partial vectors in all ops
+  * Add PopulationCount, FindFirstTrue, Ne, TableLookupBytesOr0
+  * Add AESRound, CLMul, MulOdd, HWY_CAP_FLOAT16
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 29 Jul 2021 15:00:00 +0200
+
+highway (0.12.2-1) UNRELEASED; urgency=medium
+
+  * fix scalar-only test and Windows macro conflict with Load/StoreFence
+  * replace deprecated wasm intrinsics
+
+ -- Jan Wassenberg <janwas@google.com>  Mon, 31 May 2021 16:00:00 +0200
+
+highway (0.12.1-1) UNRELEASED; urgency=medium
+
+  * doc updates, ARM GCC support, fix s390/ppc, complete partial vectors
+  * fix warnings, faster ARM div/sqrt, separate hwy_contrib library
+  * add Abs(i64)/FirstN/Pause, enable AVX2 on MSVC
+
+ -- Jan Wassenberg <janwas@google.com>  Wed, 19 May 2021 15:00:00 +0200
+
+highway (0.12.0-1) UNRELEASED; urgency=medium
+
+  * Add Shift*8, Compress16, emulated Scatter/Gather, StoreInterleaved3/4
+  * Remove deprecated HWY_*_LANES, deprecate HWY_GATHER_LANES
+  * Proper IEEE rounding, reduce libstdc++ usage, inlined math
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 15 Apr 2021 20:00:00 +0200
+
+highway (0.11.1-1) UNRELEASED; urgency=medium
+
+  * Fix clang7 asan error, finish f16 conversions and add test
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 25 Feb 2021 16:00:00 +0200
+
+highway (0.11.0-1) UNRELEASED; urgency=medium
+
+  * Add RVV+mask logical ops, allow Shl/ShiftLeftSame on all targets, more math
+
+ -- Jan Wassenberg <janwas@google.com>  Thu, 18 Feb 2021 20:00:00 +0200
+
+highway (0.7.0-1) UNRELEASED; urgency=medium
+
+  * Added API stability notice, Compress[Store], contrib/, SignBit, CopySign
+
+ -- Jan Wassenberg <janwas@google.com>  Tue, 5 Jan 2021 17:00:00 +0200
+
+highway (0.1-1) UNRELEASED; urgency=medium
+
+  * Initial debian package.
+
+ -- Alex Deymo <deymo@google.com>  Mon, 19 Oct 2020 16:48:07 +0200
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/compat b/third-party/libjxl/libjxl/third_party/highway/debian/compat
new file mode 100644
index 0000000000..f599e28b8a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/compat
@@ -0,0 +1 @@
+10
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/control b/third-party/libjxl/libjxl/third_party/highway/debian/control
new file mode 100644
index 0000000000..7c60ebc7f4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/control
@@ -0,0 +1,23 @@
+Source: highway
+Maintainer: JPEG XL Maintainers <jpegxl@google.com>
+Section: misc
+Priority: optional
+Standards-Version: 3.9.8
+Build-Depends: cmake,
+               debhelper (>= 9),
+               libgtest-dev
+Homepage: https://github.com/google/highway
+
+Package: libhwy-dev
+Architecture: any
+Section: libdevel
+Depends: ${misc:Depends}
+Description: Efficient and performance-portable SIMD wrapper (developer files)
+ This library provides type-safe and source-code portable wrappers over
+ existing platform-specific intrinsics. Its design aims for simplicity,
+ reliable efficiency across platforms, and immediate usability with current
+ compilers.
+ .
+ This package installs the development files. There's no runtime library
+ since most of Highway is implemented in headers and only a very small
+ static library is needed.
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/copyright b/third-party/libjxl/libjxl/third_party/highway/debian/copyright
new file mode 100644
index 0000000000..53ea57aa97
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/copyright
@@ -0,0 +1,20 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: highway
+
+Files: *
+Copyright: 2020 Google LLC
+License: Apache-2.0
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+      http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ .
+ On Debian systems, the complete text of the Apache License, Version 2
+ can be found in "/usr/share/common-licenses/Apache-2.0".
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/rules b/third-party/libjxl/libjxl/third_party/highway/debian/rules
new file mode 100755
index 0000000000..30d7bcd2e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/rules
@@ -0,0 +1,11 @@
+#!/usr/bin/make -f
+
+# This variable could be set via --set-envvar=HWY_EXTRA_CONFIG=
+# Safeguard: turn undefined to empty
+HWY_EXTRA_CONFIG ?=
+
+%:
+	dh $@ --buildsystem=cmake
+
+override_dh_auto_configure:
+	dh_auto_configure -- -DHWY_SYSTEM_GTEST=ON $(HWY_EXTRA_CONFIG)
diff --git a/third-party/libjxl/libjxl/third_party/highway/debian/source/format b/third-party/libjxl/libjxl/third_party/highway/debian/source/format
new file mode 100644
index 0000000000..163aaf8d82
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/.gitignore b/third-party/libjxl/libjxl/third_party/highway/docs/.gitignore
new file mode 100644
index 0000000000..e87b171c2d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/.gitignore
@@ -0,0 +1,3 @@
+*.swp
+/_build
+/doctrees
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/Makefile b/third-party/libjxl/libjxl/third_party/highway/docs/Makefile
new file mode 100644
index 0000000000..d4bb2cbb9e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/dark.css b/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/dark.css
new file mode 100644
index 0000000000..da2235b5b0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/dark.css
@@ -0,0 +1,622 @@
+/* links */
+
+a,
+a:visited {
+    color: #aaddff;
+}
+
+
+/* code directives */
+
+.method dt,
+.class dt,
+.data dt,
+.attribute dt,
+.function dt,
+.classmethod dt,
+.exception dt,
+.descclassname,
+.descname {
+    background-color: #2d2d2d !important;
+}
+
+.rst-content dl:not(.docutils) dt {
+    color: #aaddff;
+    background-color: #2d2d2d;
+    border-top: solid 3px #525252;
+    border-left: solid 3px #525252;
+}
+
+em.property {
+    color: #888888;
+}
+
+
+/* tables */
+
+.rst-content table.docutils thead {
+    color: #ddd;
+}
+
+.rst-content table.docutils td {
+    border: 0px;
+}
+
+.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td {
+    background-color: #5a5a5a;
+}
+
+
+/* inlined code highlights */
+
+.xref,
+.py-meth,
+.rst-content a code {
+    color: #aaddff !important;
+    font-weight: normal !important;
+}
+
+.rst-content code {
+    color: #eee !important;
+    font-weight: normal !important;
+}
+
+code.literal {
+    background-color: #2d2d2d !important;
+    border: 1px solid #6d6d6d !important;
+}
+
+code.docutils.literal.notranslate {
+    color: #ddd;
+}
+
+
+/* notes, warnings, hints */
+
+.hint .admonition-title {
+    background: #2aa87c !important;
+}
+
+.warning .admonition-title {
+    background: #cc4444 !important;
+}
+
+.admonition-title {
+    background: #3a7ca8 !important;
+}
+
+.admonition,
+.note {
+    background-color: #2d2d2d !important;
+}
+
+
+/* table of contents */
+
+.wy-nav-content-wrap {
+    background-color: rgba(0, 0, 0, 0.6) !important;
+}
+
+.sidebar {
+    background-color: #191919 !important;
+}
+
+.sidebar-title {
+    background-color: #2b2b2b !important;
+}
+
+.wy-menu-vertical a {
+    color: #ddd;
+}
+
+.wy-menu-vertical code.docutils.literal.notranslate {
+    color: #404040;
+    background: none !important;
+    border: none !important;
+}
+
+.wy-nav-content {
+    background: #3c3c3c;
+    color: #dddddd;
+}
+
+.wy-menu-vertical li.on a,
+.wy-menu-vertical li.current>a {
+    background: #a3a3a3;
+    border-bottom: 0px !important;
+    border-top: 0px !important;
+}
+
+.wy-menu-vertical li.current {
+    background: #b3b3b3;
+}
+
+.toc-backref {
+    color: grey !important;
+}
+
+.highlight .hll {
+    background-color: #49483e
+}
+
+.highlight {
+    background: #222;
+    color: #f8f8f2
+}
+
+.highlight .c {
+    color: #888
+}
+
+
+/* Comment */
+
+.highlight .err {
+    color: #960050;
+    background-color: #1e0010
+}
+
+
+/* Error */
+
+.highlight .k {
+    color: #66d9ef
+}
+
+
+/* Keyword */
+
+.highlight .l {
+    color: #ae81ff
+}
+
+
+/* Literal */
+
+.highlight .n {
+    color: #f8f8f2
+}
+
+
+/* Name */
+
+.highlight .o {
+    color: #f92672
+}
+
+
+/* Operator */
+
+.highlight .p {
+    color: #f8f8f2
+}
+
+
+/* Punctuation */
+
+.highlight .ch {
+    color: #888
+}
+
+
+/* Comment.Hashbang */
+
+.highlight .cm {
+    color: #888
+}
+
+
+/* Comment.Multiline */
+
+.highlight .cp {
+    color: #888
+}
+
+
+/* Comment.Preproc */
+
+.highlight .cpf {
+    color: #888
+}
+
+
+/* Comment.PreprocFile */
+
+.highlight .c1 {
+    color: #888
+}
+
+
+/* Comment.Single */
+
+.highlight .cs {
+    color: #888
+}
+
+
+/* Comment.Special */
+
+.highlight .gd {
+    color: #f92672
+}
+
+
+/* Generic.Deleted */
+
+.highlight .ge {
+    font-style: italic
+}
+
+
+/* Generic.Emph */
+
+.highlight .gi {
+    color: #a6e22e
+}
+
+
+/* Generic.Inserted */
+
+.highlight .gs {
+    font-weight: bold
+}
+
+
+/* Generic.Strong */
+
+.highlight .gu {
+    color: #888
+}
+
+
+/* Generic.Subheading */
+
+.highlight .kc {
+    color: #66d9ef
+}
+
+
+/* Keyword.Constant */
+
+.highlight .kd {
+    color: #66d9ef
+}
+
+
+/* Keyword.Declaration */
+
+.highlight .kn {
+    color: #f92672
+}
+
+
+/* Keyword.Namespace */
+
+.highlight .kp {
+    color: #66d9ef
+}
+
+
+/* Keyword.Pseudo */
+
+.highlight .kr {
+    color: #66d9ef
+}
+
+
+/* Keyword.Reserved */
+
+.highlight .kt {
+    color: #66d9ef
+}
+
+
+/* Keyword.Type */
+
+.highlight .ld {
+    color: #e6db74
+}
+
+
+/* Literal.Date */
+
+.highlight .m {
+    color: #ae81ff
+}
+
+
+/* Literal.Number */
+
+.highlight .s {
+    color: #e6db74
+}
+
+
+/* Literal.String */
+
+.highlight .na {
+    color: #a6e22e
+}
+
+
+/* Name.Attribute */
+
+.highlight .nb {
+    color: #f8f8f2
+}
+
+
+/* Name.Builtin */
+
+.highlight .nc {
+    color: #a6e22e
+}
+
+
+/* Name.Class */
+
+.highlight .no {
+    color: #66d9ef
+}
+
+
+/* Name.Constant */
+
+.highlight .nd {
+    color: #a6e22e
+}
+
+
+/* Name.Decorator */
+
+.highlight .ni {
+    color: #f8f8f2
+}
+
+
+/* Name.Entity */
+
+.highlight .ne {
+    color: #a6e22e
+}
+
+
+/* Name.Exception */
+
+.highlight .nf {
+    color: #a6e22e
+}
+
+
+/* Name.Function */
+
+.highlight .nl {
+    color: #f8f8f2
+}
+
+
+/* Name.Label */
+
+.highlight .nn {
+    color: #f8f8f2
+}
+
+
+/* Name.Namespace */
+
+.highlight .nx {
+    color: #a6e22e
+}
+
+
+/* Name.Other */
+
+.highlight .py {
+    color: #f8f8f2
+}
+
+
+/* Name.Property */
+
+.highlight .nt {
+    color: #f92672
+}
+
+
+/* Name.Tag */
+
+.highlight .nv {
+    color: #f8f8f2
+}
+
+
+/* Name.Variable */
+
+.highlight .ow {
+    color: #f92672
+}
+
+
+/* Operator.Word */
+
+.highlight .w {
+    color: #f8f8f2
+}
+
+
+/* Text.Whitespace */
+
+.highlight .mb {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Bin */
+
+.highlight .mf {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Float */
+
+.highlight .mh {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Hex */
+
+.highlight .mi {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Integer */
+
+.highlight .mo {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Oct */
+
+.highlight .sa {
+    color: #e6db74
+}
+
+
+/* Literal.String.Affix */
+
+.highlight .sb {
+    color: #e6db74
+}
+
+
+/* Literal.String.Backtick */
+
+.highlight .sc {
+    color: #e6db74
+}
+
+
+/* Literal.String.Char */
+
+.highlight .dl {
+    color: #e6db74
+}
+
+
+/* Literal.String.Delimiter */
+
+.highlight .sd {
+    color: #e6db74
+}
+
+
+/* Literal.String.Doc */
+
+.highlight .s2 {
+    color: #e6db74
+}
+
+
+/* Literal.String.Double */
+
+.highlight .se {
+    color: #ae81ff
+}
+
+
+/* Literal.String.Escape */
+
+.highlight .sh {
+    color: #e6db74
+}
+
+
+/* Literal.String.Heredoc */
+
+.highlight .si {
+    color: #e6db74
+}
+
+
+/* Literal.String.Interpol */
+
+.highlight .sx {
+    color: #e6db74
+}
+
+
+/* Literal.String.Other */
+
+.highlight .sr {
+    color: #e6db74
+}
+
+
+/* Literal.String.Regex */
+
+.highlight .s1 {
+    color: #e6db74
+}
+
+
+/* Literal.String.Single */
+
+.highlight .ss {
+    color: #e6db74
+}
+
+
+/* Literal.String.Symbol */
+
+.highlight .bp {
+    color: #f8f8f2
+}
+
+
+/* Name.Builtin.Pseudo */
+
+.highlight .fm {
+    color: #a6e22e
+}
+
+
+/* Name.Function.Magic */
+
+.highlight .vc {
+    color: #f8f8f2
+}
+
+
+/* Name.Variable.Class */
+
+.highlight .vg {
+    color: #f8f8f2
+}
+
+
+/* Name.Variable.Global */
+
+.highlight .vi {
+    color: #f8f8f2
+}
+
+
+/* Name.Variable.Instance */
+
+.highlight .vm {
+    color: #f8f8f2
+}
+
+
+/* Name.Variable.Magic */
+
+.highlight .il {
+    color: #ae81ff
+}
+
+
+/* Literal.Number.Integer.Long */
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/toggle.css b/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/toggle.css
new file mode 100644
index 0000000000..f79376af7d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/_static/css/toggle.css
@@ -0,0 +1,77 @@
+input[type=checkbox] {
+    visibility: hidden;
+    height: 0;
+    width: 0;
+    margin: 0;
+}
+
+.rst-versions .rst-current-version {
+    padding: 10px;
+    display: flex;
+    justify-content: space-between;
+}
+
+.rst-versions .rst-current-version .fa-book,
+.rst-versions .rst-current-version .fa-v,
+.rst-versions .rst-current-version .fa-caret-down {
+    height: 24px;
+    line-height: 24px;
+    vertical-align: middle;
+}
+
+.rst-versions .rst-current-version .fa-element {
+    width: 80px;
+    text-align: center;
+}
+
+.rst-versions .rst-current-version .fa-book {
+    text-align: left;
+}
+
+.rst-versions .rst-current-version .fa-v {
+    color: #27AE60;
+    text-align: right;
+}
+
+label {
+    margin: 0 auto;
+    display: inline-block;
+    justify-content: center;
+    align-items: right;
+    border-radius: 100px;
+    position: relative;
+    cursor: pointer;
+    text-indent: -9999px;
+    width: 50px;
+    height: 21px;
+    background: #000;
+}
+
+label:after {
+    border-radius: 50%;
+    position: absolute;
+    content: '';
+    background: #fff;
+    width: 15px;
+    height: 15px;
+    top: 3px;
+    left: 3px;
+    transition: ease-in-out 200ms;
+}
+
+input:checked+label {
+    background: #3a7ca8;
+}
+
+input:checked+label:after {
+    left: calc(100% - 5px);
+    transform: translateX(-100%);
+}
+
+html.transition,
+html.transition *,
+html.transition *:before,
+html.transition *:after {
+    transition: ease-in-out 200ms !important;
+    transition-delay: 0 !important;
+} 
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/_static/js/toggle.js b/third-party/libjxl/libjxl/third_party/highway/docs/_static/js/toggle.js
new file mode 100644
index 0000000000..7cfe613f60
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/_static/js/toggle.js
@@ -0,0 +1,38 @@
+document.addEventListener('DOMContentLoaded', function() {
+
+    function toggleCssMode(isDay) {
+        var mode = (isDay ? "Day" : "Night");
+        localStorage.setItem("css-mode", mode);
+
+        var daysheet = $('link[href="_static/pygments.css"]')[0].sheet;
+        daysheet.disabled = !isDay;
+
+        var nightsheet = $('link[href="_static/css/dark.css"]')[0];
+        if (!isDay && nightsheet === undefined) {
+            var element = document.createElement("link");
+            element.setAttribute("rel", "stylesheet");
+            element.setAttribute("type", "text/css");
+            element.setAttribute("href", "_static/css/dark.css");
+            document.getElementsByTagName("head")[0].appendChild(element);
+            return;
+        }
+        if (nightsheet !== undefined) {
+            nightsheet.sheet.disabled = isDay;
+        }
+    }
+
+    var initial = localStorage.getItem("css-mode") != "Night";
+    var checkbox = document.querySelector('input[name=mode]');
+
+    toggleCssMode(initial);
+    checkbox.checked = initial;
+
+    checkbox.addEventListener('change', function() {
+        document.documentElement.classList.add('transition');
+        window.setTimeout(() => {
+            document.documentElement.classList.remove('transition');
+        }, 1000)
+        toggleCssMode(this.checked);
+    })
+
+}); 
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/_templates/layout.html b/third-party/libjxl/libjxl/third_party/highway/docs/_templates/layout.html
new file mode 100644
index 0000000000..e8f2b7f759
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/_templates/layout.html
@@ -0,0 +1,9 @@
+{% extends "!layout.html" %}
+  {% block footer %} {{ super() }}
+
+  <style>
+         .wy-nav-content { max-width: none; }
+  </style>
+
+{% endblock %}
+
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/_templates/versions.html b/third-party/libjxl/libjxl/third_party/highway/docs/_templates/versions.html
new file mode 100644
index 0000000000..e9abbcdafe
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/_templates/versions.html
@@ -0,0 +1,62 @@
+{% if READTHEDOCS or display_lower_left %}
+{# Add rst-badge after rst-versions for small badge style. #}
+  <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      <span class="fa fa-book"> Read the Docs</span>
+      
+      
+     <span class="fa fa-element">
+     <input class="container_toggle" type="checkbox" id="switch" name="mode">
+     <label for="switch"></label>
+     </span>
+      
+      v: {{ current_version }}
+      <span class="fa fa-caret-down"></span>
+    </span>
+    <div class="rst-other-versions">
+      {% if languages|length >= 1 %}
+      <dl>
+        <dt>{{ _('Languages') }}</dt>
+        {% for slug, url in languages %}
+          {% if slug == current_language %} <strong> {% endif %}
+          <dd><a href="{{ url }}">{{ slug }}</a></dd>
+          {% if slug == current_language %} </strong> {% endif %}
+        {% endfor %}
+      </dl>
+      {% endif %}
+      {% if versions|length >= 1 %}
+      <dl>
+        <dt>{{ _('Versions') }}</dt>
+        {% for slug, url in versions %}
+          {% if slug == current_version %} <strong> {% endif %}
+          <dd><a href="{{ url }}">{{ slug }}</a></dd>
+          {% if slug == current_version %} </strong> {% endif %}
+        {% endfor %}
+      </dl>
+      {% endif %}
+      {% if downloads|length >= 1 %}
+      <dl>
+        <dt>{{ _('Downloads') }}</dt>
+        {% for type, url in downloads %}
+          <dd><a href="{{ url }}">{{ type }}</a></dd>
+        {% endfor %}
+      </dl>
+      {% endif %}
+      {% if READTHEDOCS %}
+      <dl>
+        <dt>{{ _('On Read the Docs') }}</dt>
+          <dd>
+            <a href="//{{ PRODUCTION_DOMAIN }}/projects/{{ slug }}/?fromdocs={{ slug }}">{{ _('Project Home') }}</a>
+          </dd>
+          <dd>
+            <a href="//{{ PRODUCTION_DOMAIN }}/builds/{{ slug }}/?fromdocs={{ slug }}">{{ _('Builds') }}</a>
+          </dd>
+      </dl>
+      {% endif %}
+      <hr/>
+      {% trans %}Free document hosting provided by <a href="https://pages.github.com/">GitHub Pages</a>.{% endtrans %}
+ 
+    </div>
+  </div>
+{% endif %}
+ 
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/buildDocs.sh b/third-party/libjxl/libjxl/third_party/highway/docs/buildDocs.sh
new file mode 100755
index 0000000000..0cd587794a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/buildDocs.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+set -x
+################################################################################
+# File:    buildDocs.sh
+# Purpose: Script that builds our documentation using sphinx and updates GitHub
+#          Pages. This script is executed by:
+#            .github/workflows/docs_pages_workflow.yml
+#
+# Authors: Michael Altfield <michael@michaelaltfield.net>
+# Created: 2020-07-17
+# Updated: 2020-07-23
+# Version: 0.2
+################################################################################
+  
+###################
+# INSTALL DEPENDS #
+###################
+  
+apt-get update
+apt-get -y install git rsync pandoc python3-sphinx python3-sphinx-rtd-theme python3-stemmer python3-git python3-pip python3-virtualenv python3-setuptools
+  
+python3 -m pip install --upgrade rinohtype pygments sphinx-rtd-theme sphinx-tabs docutils==0.16 pandoc
+python3 -m pip list
+
+# get rid of all these safe dir warnings
+git config --global --add safe.directory '*'
+
+#####################
+# DECLARE VARIABLES #
+#####################
+  
+pwd
+ls -lah
+export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
+  
+# make a new temp dir which will be our GitHub Pages docroot
+docroot=`mktemp -d`
+ 
+export REPO_NAME="${GITHUB_REPOSITORY##*/}"
+  
+##############
+# BUILD DOCS #
+##############
+  
+# first, cleanup any old builds' static assets
+make -C docs clean
+
+# get a list of branches, excluding 'HEAD' and 'gh-pages'
+versions="`git for-each-ref '--format=%(refname:lstrip=-1)' refs/remotes/origin/ | grep -viE '^(HEAD|gh-pages)$'`"
+ls
+for current_version in ${versions}; do
+  
+   # make the current language available to conf.py
+   export current_version
+   git checkout ${current_version}
+  
+   echo "INFO: Building sites for ${current_version}"
+
+   cd docs && python3 mm-converter.py
+   cd ..
+  
+   # skip this branch if it doesn't have our docs dir & sphinx config
+   if [ ! -e 'docs/conf.py' ]; then
+      echo -e "\tINFO: Couldn't find 'docs/conf.py' (skipped)"
+      continue
+   fi
+  
+   languages="en"
+   for current_language in ${languages}; do
+  
+      # make the current language available to conf.py
+      export current_language
+  
+      ##########
+      # BUILDS #
+      ##########
+      echo "INFO: Building for ${current_language}"
+  
+      # HTML #
+      sphinx-build -b html docs/ docs/_build/html/${current_language}/${current_version} -D language="${current_language}"
+  
+      # EPUB #
+      sphinx-build -b epub docs/ docs/_build/epub -D language="${current_language}"
+      mkdir -p "${docroot}/${current_language}/${current_version}"
+      cp "docs/_build/epub/target.epub" "${docroot}/${current_language}/${current_version}/${REPO_NAME}_${current_language}_${current_version}.epub"
+  
+      # copy the static assets produced by the above build into our docroot
+      rsync -av "docs/_build/html/" "${docroot}/"
+  
+   done
+  
+done
+  
+# return to master branch
+git checkout master
+  
+#######################
+# Update GitHub Pages #
+#######################
+  
+git config --global user.name "${GITHUB_ACTOR}"
+git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
+  
+pushd "${docroot}"
+  
+# don't bother maintaining history; just generate fresh
+git init
+git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
+git checkout -b gh-pages
+  
+# add .nojekyll to the root so that github won't 404 on content added to dirs
+# that start with an underscore (_), such as our "_content" dir..
+touch .nojekyll
+  
+# add redirect from the docroot to our default docs language/version
+cat > index.html <<EOF
+<!DOCTYPE html>
+<html>
+   <head>
+      <title>${REPO_NAME} Docs</title>
+      <meta http-equiv = "refresh" content="0; url='/${REPO_NAME}/en/master/'" />
+   </head>
+   <body>
+      <p>Please wait while you're redirected to our <a href="/${REPO_NAME}/en/master/">documentation</a>.</p>
+   </body>
+</html>
+EOF
+  
+# Add README
+cat > README.md <<EOF
+# GitHub Pages Cache
+  
+Nothing to see here. The contents of this branch are essentially a cache that's not intended to be viewed on github.com.
+  
+  
+If you're looking to update documentation, check the relevant development branch's 'docs/' dir.
+  
+For more information on how this documentation is built using Sphinx, Read the Docs, and GitHub Actions/Pages, see:
+  
+ * https://tech.michaelaltfield.net/2020/07/18/sphinx-rtd-github-pages-1
+EOF
+  
+# copy the resulting html pages built from sphinx above to our new git repo
+git add .
+  
+# commit all the new files
+msg="Updating Docs for commit ${GITHUB_SHA} made on `date -d"@${SOURCE_DATE_EPOCH}" --iso-8601=seconds` from ${GITHUB_REF} by ${GITHUB_ACTOR}"
+git commit -am "${msg}"
+  
+# overwrite the contents of the gh-pages branch on our github.com repo
+git push deploy gh-pages --force
+  
+popd # return to main repo sandbox root
+  
+# exit cleanly
+exit 0
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/conf.py b/third-party/libjxl/libjxl/third_party/highway/docs/conf.py
new file mode 100644
index 0000000000..9bd21c3ef5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/conf.py
@@ -0,0 +1,159 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'HIGHWAY'
+
+# The full version, including alpha/beta/rc tags
+release = 'nightly'
+copyright = 'Apache 2'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+		'sphinx_rtd_theme',
+	    	'sphinx.ext.githubpages',
+	    	'sphinx.ext.autosectionlabel',
+	        'sphinx_tabs.tabs',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_logo = 'images/logo.png'
+html_favicon = 'images/logo-32x32.ico'
+html_theme_options = {
+    'logo_only': False,
+    'display_version': True,
+}
+# The master toctree document.
+master_doc = 'index'
+
+ # Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+html_extra_path = ["_static/css"]
+
+html_css_files = ["css/toggle.css"]
+html_js_files = ["js/toggle.js"]
+ 
+############################
+# SETUP THE RTD LOWER-LEFT #
+############################
+try:
+   html_context
+except NameError:
+   html_context = dict()
+html_context['display_lower_left'] = True
+ 
+if 'REPO_NAME' in os.environ:
+   REPO_NAME = os.environ['REPO_NAME']
+else:
+   REPO_NAME = ''
+ 
+# SET CURRENT_LANGUAGE
+if 'current_language' in os.environ:
+   # get the current_language env var set by buildDocs.sh
+   current_language = os.environ['current_language']
+else:
+   # the user is probably doing `make html`
+   # set this build's current language to english
+   current_language = 'en'
+ 
+# tell the theme which language to we're currently building
+html_context['current_language'] = current_language
+ 
+# SET CURRENT_VERSION
+from git import Repo
+repo = Repo( search_parent_directories=True )
+ 
+if 'current_version' in os.environ:
+   # get the current_version env var set by buildDocs.sh
+   current_version = os.environ['current_version']
+else:
+   # the user is probably doing `make html`
+   # set this build's current version by looking at the branch
+   current_version = repo.active_branch.name
+ 
+# tell the theme which version we're currently on ('current_version' affects
+# the lower-left rtd menu and 'version' affects the logo-area version)
+html_context['current_version'] = current_version
+html_context['version'] = current_version
+ 
+# POPULATE LINKS TO OTHER LANGUAGES
+html_context['languages'] = [ ('en', '/' +REPO_NAME+ '/en/' +current_version+ '/') ]
+ 
+languages = [lang.name for lang in os.scandir('locales') if lang.is_dir()]
+for lang in languages:
+   html_context['languages'].append( (lang, '/' +REPO_NAME+ '/' +lang+ '/' +current_version+ '/') )
+ 
+# POPULATE LINKS TO OTHER VERSIONS
+html_context['versions'] = list()
+ 
+versions = [branch.name for branch in repo.branches]
+for version in versions:
+   html_context['versions'].append( (version, '/' +REPO_NAME+ '/'  +current_language+ '/' +version+ '/') )
+ 
+# POPULATE LINKS TO OTHER FORMATS/DOWNLOADS
+ 
+# settings for creating PDF with rinoh
+rinoh_documents = [(
+ master_doc,
+ 'target',
+ project+ ' Documentation',
+ '© ' +copyright,
+)]
+today_fmt = "%B %d, %Y"
+ 
+# settings for EPUB
+epub_basename = 'target'
+ 
+html_context['downloads'] = list()
+ 
+html_context['downloads'].append( ('epub', '/' +REPO_NAME+ '/' +current_language+ '/' +current_version+ '/' +REPO_NAME+ '_' +current_language+ '_' +current_version+ '.epub') )
+ 
+##########################
+# "EDIT ON GITHUB" LINKS #
+##########################
+ 
+html_context['display_github'] = True
+html_context['github_user'] = 'google'
+html_context['github_repo'] = REPO_NAME
+html_context['github_version'] = 'master/docs/'
+ 
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/images/logo-32x32.ico b/third-party/libjxl/libjxl/third_party/highway/docs/images/logo-32x32.ico
new file mode 100644
index 0000000000..d834e41063
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/highway/docs/images/logo-32x32.ico differ
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/images/logo.png b/third-party/libjxl/libjxl/third_party/highway/docs/images/logo.png
new file mode 100644
index 0000000000..76c166cb9d
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/highway/docs/images/logo.png differ
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/index.rst b/third-party/libjxl/libjxl/third_party/highway/docs/index.rst
new file mode 100644
index 0000000000..61036ba06e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/index.rst
@@ -0,0 +1,13 @@
+Highway, a C++ library that provides portable SIMD/vector intrinsics
+====================================================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   README
+   quick_reference
+   design_philosophy
+   faq
+   impl_details
+   release_testing_process
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/locales/zh/index.rst b/third-party/libjxl/libjxl/third_party/highway/docs/locales/zh/index.rst
new file mode 100644
index 0000000000..c30fb48bbd
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/locales/zh/index.rst
@@ -0,0 +1,6 @@
+Highway, 为C++提供可移植的SIMD原语
+====================================================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: 我们正在准备中文支持：）
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/make.bat b/third-party/libjxl/libjxl/third_party/highway/docs/make.bat
new file mode 100644
index 0000000000..2119f51099
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/third-party/libjxl/libjxl/third_party/highway/docs/mm-converter.py b/third-party/libjxl/libjxl/third_party/highway/docs/mm-converter.py
new file mode 100755
index 0000000000..3b2f1ddf6a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/docs/mm-converter.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+
+# set correct links (pandoc did not deal with github links properly)
+
+import os
+import re
+import subprocess
+
+regex_pdf_links1 = re.compile(r'`(.*)\<g3doc\/(.*)\.pdf\>`__', 
+                            re.M | re.X) # Multiline and Verbose
+regex_md_links = re.compile(r'`(.*)\<g3doc\/(.*)\.md\>`__', 
+                            re.M | re.X) # Multiline and Verbose
+regex_md_links2 = re.compile(r'`(.*)\n(.*)\<g3doc\/(.*)\.md\>`__', 
+                            re.M | re.X) # Multiline and Verbose
+regex_pdf_links2 = re.compile(r'`(.*)\n\s+(.*)\<g3doc\/(.*)\.pdf\>`__', 
+                            re.M | re.X) # Multiline and Verbose
+
+def remove_links_to_index2(data):
+    # remove liks to the index, they are useless in py4web docs
+    data = data
+    print(re.search(regex_pdf_links2, data))
+    return re.sub(regex_pdf_links2, 
+                  r':download:`\1 \2<g3doc/\3.pdf>`',
+                  data)
+
+def remove_links_to_index(data):
+    # remove liks to the index, they are useless in py4web docs
+    data = data
+    print(re.search(regex_pdf_links1, data))
+    return re.sub(regex_pdf_links1, 
+                  r':download:`\1<g3doc/\2.pdf>`',
+                  data)
+
+def rewrite_md_links(data):
+    # remove liks to the index, they are useless in py4web docs
+    data = data
+    print(re.search(regex_md_links, data))
+    data = re.sub(regex_md_links, 
+                  r'`\1<\2.html>`__',
+                  data)
+    data = re.sub(regex_md_links2, 
+                  r'`\1 \2<\3.html>`__',
+                  data)
+    return data
+
+
+docs_on_pages = [
+    'README.md',
+    'quick_reference.md',
+    'design_philosophy.md',
+    'impl_details.md',
+    'faq.md',
+    'release_testing_process.md'
+]
+
+def convert2md(file):
+    print(f"    Working on file {file}")
+    file = os.path.join('g3doc', file)
+    data = open(file, 'r').read()
+    write_files(file, data)
+    
+def write_files(file, data):
+    for extension in ['rst']:
+        ext_dir = os.getcwd()
+        md_dir = os.path.join(os.getcwd(), 'g3doc')
+        if not os.path.isdir(ext_dir):
+            os.mkdir(ext_dir)
+        ext_file = os.path.join(ext_dir , os.path.splitext(os.path.basename(file))[0] + "." + extension)
+        md_file = os.path.join(md_dir , os.path.splitext(os.path.basename(file))[0] + ".md")
+        print(f'writing {ext_file}')
+        if os.path.exists(ext_file):
+            os.unlink(ext_file)
+        with open(ext_file, 'w') as handler:
+            write_format(extension, ext_file, handler, md_file, data)
+
+
+def write_format(extension, ext_file, handler, md_file, data):
+    if extension =='md':
+            handler.write(data)
+    elif extension =='rst':
+        try:
+            subprocess.call(['pandoc', '-s', md_file, '-f', 'markdown', '-t', 'rst', '-o', ext_file])
+            data = open(ext_file, 'r').read() 
+            data = remove_links_to_index(data)
+            data = remove_links_to_index2(data)
+            data = rewrite_md_links(data)
+            handler.write(data)
+            # Open a file for writing
+            # with open('tmp.txt', 'w') as f:
+                # Call the subprocess and redirect the output to the file
+                # subprocess.call(['awk', '{ gsub(/<g3doc\//, "<"); print }', ext_file], stdout=f)
+                # os.system('mv tmp.txt ' + ext_file)
+ 
+        except FileNotFoundError:
+            print("\n **** ERROR ****: you need the Pandoc module installed!")
+            exit(0)
+    elif extension =='html':
+        try:
+            subprocess.call(['pandoc', '-s', md_file, '-f', 'markdown', '-t', 'html', '-o', ext_file,  '--highlight-style=kate'])
+        except FileNotFoundError:
+            print("\n **** ERROR ****: you need the Pandoc module installed!")
+            exit(0)
+
+
+if __name__ == "__main__":
+    for doc in docs_on_pages:
+        print(doc)
+        convert2md(doc)
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy.gni b/third-party/libjxl/libjxl/third_party/highway/hwy.gni
new file mode 100644
index 0000000000..dec6905423
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy.gni
@@ -0,0 +1,54 @@
+_hwy = get_path_info("hwy", "abspath")
+
+hwy_public = [
+  # Public
+  "$_hwy/aligned_allocator.h",
+  "$_hwy/base.h",
+  "$_hwy/cache_control.h",
+  "$_hwy/per_target.h",
+  "$_hwy/print.h",
+
+  # Public, textual
+  "$_hwy/foreach_target.h",
+  "$_hwy/highway_export.h",
+  "$_hwy/highway.h",
+  "$_hwy/print-inl.h",
+  "$_hwy/timer-inl.h",
+
+  # Private
+  "$_hwy/detect_compiler_arch.h",
+  "$_hwy/detect_targets.h",
+  "$_hwy/targets.h",
+
+  # Private, textual:
+  "$_hwy/ops/arm_neon-inl.h",
+  "$_hwy/ops/arm_sve-inl.h",
+  "$_hwy/ops/emu128-inl.h",
+  "$_hwy/ops/generic_ops-inl.h",
+  "$_hwy/ops/scalar-inl.h",
+  "$_hwy/ops/set_macros-inl.h",
+  "$_hwy/ops/shared-inl.h",
+  "$_hwy/ops/x86_128-inl.h",
+  "$_hwy/ops/x86_256-inl.h",
+  "$_hwy/ops/x86_512-inl.h",
+]
+
+hwy_sources = [
+  "$_hwy/aligned_allocator.cc",
+  "$_hwy/per_target.cc",
+  "$_hwy/print.cc",
+  "$_hwy/targets.cc",
+]
+
+hwy_contrib_public = [
+  "$_hwy/contrib/algo/copy-inl.h",
+  "$_hwy/contrib/algo/find-inl.h",
+  "$_hwy/contrib/algo/transform-inl.h",
+  "$_hwy/contrib/dot/dot-inl.h",
+  "$_hwy/contrib/image/image.h",
+  "$_hwy/contrib/math/math-inl.h",
+]
+
+hwy_contrib_sources = [
+  "$_hwy/contrib/image/image.cc",
+]
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.cc
new file mode 100644
index 0000000000..e240a49e7b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.cc
@@ -0,0 +1,151 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>  // malloc
+
+#include <atomic>
+#include <limits>
+
+#include "hwy/base.h"
+
+namespace hwy {
+namespace {
+
+#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
+// Not actually an upper bound on the size, but this value prevents crossing a
+// 4K boundary (relevant on Andes).
+constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096);
+#else
+constexpr size_t kAlignment = HWY_ALIGNMENT;
+#endif
+
+#if HWY_ARCH_X86
+// On x86, aliasing can only occur at multiples of 2K, but that's too wasteful
+// if this is used for single-vector allocations. 256 is more reasonable.
+constexpr size_t kAlias = kAlignment * 4;
+#else
+constexpr size_t kAlias = kAlignment;
+#endif
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+  void* allocated;
+  size_t payload_size;
+};
+#pragma pack(pop)
+
+// Returns a 'random' (cyclical) offset for AllocateAlignedBytes.
+size_t NextAlignedOffset() {
+  static std::atomic<uint32_t> next{0};
+  constexpr uint32_t kGroups = kAlias / kAlignment;
+  const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+  const size_t offset = kAlignment * group;
+  HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias);
+  return offset;
+}
+
+}  // namespace
+
+HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size,
+                                         AllocPtr alloc_ptr, void* opaque_ptr) {
+  HWY_ASSERT(payload_size != 0);  // likely a bug in caller
+  if (payload_size >= std::numeric_limits<size_t>::max() / 2) {
+    HWY_DASSERT(false && "payload_size too large");
+    return nullptr;
+  }
+
+  size_t offset = NextAlignedOffset();
+
+  // What: | misalign | unused | AllocationHeader |payload
+  // Size: |<= kAlias | offset                    |payload_size
+  //       ^allocated.^aligned.^header............^payload
+  // The header must immediately precede payload, which must remain aligned.
+  // To avoid wasting space, the header resides at the end of `unused`,
+  // which therefore cannot be empty (offset == 0).
+  if (offset == 0) {
+    offset = kAlignment;  // = RoundUpTo(sizeof(AllocationHeader), kAlignment)
+    static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
+  }
+
+  const size_t allocated_size = kAlias + offset + payload_size;
+  void* allocated;
+  if (alloc_ptr == nullptr) {
+    allocated = malloc(allocated_size);
+  } else {
+    allocated = (*alloc_ptr)(opaque_ptr, allocated_size);
+  }
+  if (allocated == nullptr) return nullptr;
+  // Always round up even if already aligned - we already asked for kAlias
+  // extra bytes and there's no way to give them back.
+  uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+  static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+  static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+  aligned &= ~(kAlias - 1);
+
+  const uintptr_t payload = aligned + offset;  // still aligned
+
+  // Stash `allocated` and payload_size inside header for FreeAlignedBytes().
+  // The allocated_size can be reconstructed from the payload_size.
+  AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+  header->allocated = allocated;
+  header->payload_size = payload_size;
+
+  return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kAlignment);
+}
+
+HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
+                                    FreePtr free_ptr, void* opaque_ptr) {
+  if (aligned_pointer == nullptr) return;
+
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  HWY_DASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  if (free_ptr == nullptr) {
+    free(header->allocated);
+  } else {
+    (*free_ptr)(opaque_ptr, header->allocated);
+  }
+}
+
+// static
+HWY_DLLEXPORT void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer,
+                                                      FreePtr free_ptr,
+                                                      void* opaque_ptr,
+                                                      ArrayDeleter deleter) {
+  if (aligned_pointer == nullptr) return;
+
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  HWY_DASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  if (deleter) {
+    (*deleter)(aligned_pointer, header->payload_size);
+  }
+
+  if (free_ptr == nullptr) {
+    free(header->allocated);
+  } else {
+    (*free_ptr)(opaque_ptr, header->allocated);
+  }
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.h b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.h
new file mode 100644
index 0000000000..d0671a5774
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator.h
@@ -0,0 +1,211 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
+#define HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
+
+// Memory allocator with support for alignment and offsets.
+
+#include <memory>
+#include <utility>
+
+#include "hwy/base.h"
+
+namespace hwy {
+
+// Minimum alignment of allocated memory for use in HWY_ASSUME_ALIGNED, which
+// requires a literal. This matches typical L1 cache line sizes, which prevents
+// false sharing.
+#define HWY_ALIGNMENT 64
+
+// Pointers to functions equivalent to malloc/free with an opaque void* passed
+// to them.
+using AllocPtr = void* (*)(void* opaque, size_t bytes);
+using FreePtr = void (*)(void* opaque, void* memory);
+
+// Returns null or a pointer to at least `payload_size` (which can be zero)
+// bytes of newly allocated memory, aligned to the larger of HWY_ALIGNMENT and
+// the vector size. Calls `alloc` with the passed `opaque` pointer to obtain
+// memory or malloc() if it is null.
+HWY_DLLEXPORT void* AllocateAlignedBytes(size_t payload_size,
+                                         AllocPtr alloc_ptr, void* opaque_ptr);
+
+// Frees all memory. No effect if `aligned_pointer` == nullptr, otherwise it
+// must have been returned from a previous call to `AllocateAlignedBytes`.
+// Calls `free_ptr` with the passed `opaque_ptr` pointer to free the memory; if
+// `free_ptr` function is null, uses the default free().
+HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
+                                    FreePtr free_ptr, void* opaque_ptr);
+
+// Class that deletes the aligned pointer passed to operator() calling the
+// destructor before freeing the pointer. This is equivalent to the
+// std::default_delete but for aligned objects. For a similar deleter equivalent
+// to free() for aligned memory see AlignedFreer().
+class AlignedDeleter {
+ public:
+  AlignedDeleter() : free_(nullptr), opaque_ptr_(nullptr) {}
+  AlignedDeleter(FreePtr free_ptr, void* opaque_ptr)
+      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
+
+  template <typename T>
+  void operator()(T* aligned_pointer) const {
+    return DeleteAlignedArray(aligned_pointer, free_, opaque_ptr_,
+                              TypedArrayDeleter<T>);
+  }
+
+ private:
+  template <typename T>
+  static void TypedArrayDeleter(void* ptr, size_t size_in_bytes) {
+    size_t elems = size_in_bytes / sizeof(T);
+    for (size_t i = 0; i < elems; i++) {
+      // Explicitly call the destructor on each element.
+      (static_cast<T*>(ptr) + i)->~T();
+    }
+  }
+
+  // Function prototype that calls the destructor for each element in a typed
+  // array. TypeArrayDeleter<T> would match this prototype.
+  using ArrayDeleter = void (*)(void* t_ptr, size_t t_size);
+
+  HWY_DLLEXPORT static void DeleteAlignedArray(void* aligned_pointer,
+                                               FreePtr free_ptr,
+                                               void* opaque_ptr,
+                                               ArrayDeleter deleter);
+
+  FreePtr free_;
+  void* opaque_ptr_;
+};
+
+// Unique pointer to T with custom aligned deleter. This can be a single
+// element U or an array of element if T is a U[]. The custom aligned deleter
+// will call the destructor on U or each element of a U[] in the array case.
+template <typename T>
+using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
+
+// Aligned memory equivalent of make_unique<T> using the custom allocators
+// alloc/free with the passed `opaque` pointer. This function calls the
+// constructor with the passed Args... and calls the destructor of the object
+// when the AlignedUniquePtr is destroyed.
+template <typename T, typename... Args>
+AlignedUniquePtr<T> MakeUniqueAlignedWithAlloc(AllocPtr alloc, FreePtr free,
+                                               void* opaque, Args&&... args) {
+  T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T), alloc, opaque));
+  return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
+                             AlignedDeleter(free, opaque));
+}
+
+// Similar to MakeUniqueAlignedWithAlloc but using the default alloc/free
+// functions.
+template <typename T, typename... Args>
+AlignedUniquePtr<T> MakeUniqueAligned(Args&&... args) {
+  T* ptr = static_cast<T*>(AllocateAlignedBytes(
+      sizeof(T), /*alloc_ptr=*/nullptr, /*opaque_ptr=*/nullptr));
+  return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
+                             AlignedDeleter());
+}
+
+// Helpers for array allocators (avoids overflow)
+namespace detail {
+
+// Returns x such that 1u << x == n (if n is a power of two).
+static inline constexpr size_t ShiftCount(size_t n) {
+  return (n <= 1) ? 0 : 1 + ShiftCount(n / 2);
+}
+
+template <typename T>
+T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) {
+  constexpr size_t size = sizeof(T);
+
+  constexpr bool is_pow2 = (size & (size - 1)) == 0;
+  constexpr size_t bits = ShiftCount(size);
+  static_assert(!is_pow2 || (1ull << bits) == size, "ShiftCount is incorrect");
+
+  const size_t bytes = is_pow2 ? items << bits : items * size;
+  const size_t check = is_pow2 ? bytes >> bits : bytes / size;
+  if (check != items) {
+    return nullptr;  // overflowed
+  }
+  return static_cast<T*>(AllocateAlignedBytes(bytes, alloc_ptr, opaque_ptr));
+}
+
+}  // namespace detail
+
+// Aligned memory equivalent of make_unique<T[]> for array types using the
+// custom allocators alloc/free. This function calls the constructor with the
+// passed Args... on every created item. The destructor of each element will be
+// called when the AlignedUniquePtr is destroyed.
+template <typename T, typename... Args>
+AlignedUniquePtr<T[]> MakeUniqueAlignedArrayWithAlloc(
+    size_t items, AllocPtr alloc, FreePtr free, void* opaque, Args&&... args) {
+  T* ptr = detail::AllocateAlignedItems<T>(items, alloc, opaque);
+  if (ptr != nullptr) {
+    for (size_t i = 0; i < items; i++) {
+      new (ptr + i) T(std::forward<Args>(args)...);
+    }
+  }
+  return AlignedUniquePtr<T[]>(ptr, AlignedDeleter(free, opaque));
+}
+
+template <typename T, typename... Args>
+AlignedUniquePtr<T[]> MakeUniqueAlignedArray(size_t items, Args&&... args) {
+  return MakeUniqueAlignedArrayWithAlloc<T, Args...>(
+      items, nullptr, nullptr, nullptr, std::forward<Args>(args)...);
+}
+
+// Custom deleter for std::unique_ptr equivalent to using free() as a deleter
+// but for aligned memory.
+class AlignedFreer {
+ public:
+  // Pass address of this to ctor to skip deleting externally-owned memory.
+  static void DoNothing(void* /*opaque*/, void* /*aligned_pointer*/) {}
+
+  AlignedFreer() : free_(nullptr), opaque_ptr_(nullptr) {}
+  AlignedFreer(FreePtr free_ptr, void* opaque_ptr)
+      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
+
+  template <typename T>
+  void operator()(T* aligned_pointer) const {
+    // TODO(deymo): assert that we are using a POD type T.
+    FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_);
+  }
+
+ private:
+  FreePtr free_;
+  void* opaque_ptr_;
+};
+
+// Unique pointer to single POD, or (if T is U[]) an array of POD. For non POD
+// data use AlignedUniquePtr.
+template <typename T>
+using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>;
+
+// Allocate an aligned and uninitialized array of POD values as a unique_ptr.
+// Upon destruction of the unique_ptr the aligned array will be freed.
+template <typename T>
+AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items, AllocPtr alloc,
+                                          FreePtr free, void* opaque) {
+  return AlignedFreeUniquePtr<T[]>(
+      detail::AllocateAlignedItems<T>(items, alloc, opaque),
+      AlignedFreer(free, opaque));
+}
+
+// Same as previous AllocateAligned(), using default allocate/free functions.
+template <typename T>
+AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items) {
+  return AllocateAligned<T>(items, nullptr, nullptr, nullptr);
+}
+
+}  // namespace hwy
+#endif  // HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator_test.cc
new file mode 100644
index 0000000000..5d678a0de6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/aligned_allocator_test.cc
@@ -0,0 +1,280 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>  // malloc
+
+#include <array>
+#include <random>
+#include <set>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+// Sample object that keeps track on an external counter of how many times was
+// the explicit constructor and destructor called.
+template <size_t N>
+class SampleObject {
+ public:
+  SampleObject() { data_[0] = 'a'; }
+  explicit SampleObject(int* counter) : counter_(counter) {
+    if (counter) (*counter)++;
+    data_[0] = 'b';
+  }
+
+  ~SampleObject() {
+    if (counter_) (*counter_)--;
+  }
+
+  static_assert(N > sizeof(int*), "SampleObject size too small.");
+  int* counter_ = nullptr;
+  char data_[N - sizeof(int*)];
+};
+
+class FakeAllocator {
+ public:
+  // static AllocPtr and FreePtr member to be used with the aligned
+  // allocator. These functions calls the private non-static members.
+  static void* StaticAlloc(void* opaque, size_t bytes) {
+    return reinterpret_cast<FakeAllocator*>(opaque)->Alloc(bytes);
+  }
+  static void StaticFree(void* opaque, void* memory) {
+    return reinterpret_cast<FakeAllocator*>(opaque)->Free(memory);
+  }
+
+  // Returns the number of pending allocations to be freed.
+  size_t PendingAllocs() { return allocs_.size(); }
+
+ private:
+  void* Alloc(size_t bytes) {
+    void* ret = malloc(bytes);
+    allocs_.insert(ret);
+    return ret;
+  }
+  void Free(void* memory) {
+    if (!memory) return;
+    EXPECT_NE(allocs_.end(), allocs_.find(memory));
+    allocs_.erase(memory);
+    free(memory);
+  }
+
+  std::set<void*> allocs_;
+};
+
+}  // namespace
+
+namespace hwy {
+
+class AlignedAllocatorTest : public testing::Test {};
+
+TEST(AlignedAllocatorTest, FreeNullptr) {
+  // Calling free with a nullptr is always ok.
+  FreeAlignedBytes(/*aligned_pointer=*/nullptr, /*free_ptr=*/nullptr,
+                   /*opaque_ptr=*/nullptr);
+}
+
+TEST(AlignedAllocatorTest, Log2) {
+  EXPECT_EQ(0u, detail::ShiftCount(1));
+  EXPECT_EQ(1u, detail::ShiftCount(2));
+  EXPECT_EQ(3u, detail::ShiftCount(8));
+}
+
+// Allocator returns null when it detects overflow of items * sizeof(T).
+TEST(AlignedAllocatorTest, Overflow) {
+  constexpr size_t max = ~size_t(0);
+  constexpr size_t msb = (max >> 1) + 1;
+  using Size5 = std::array<uint8_t, 5>;
+  using Size10 = std::array<uint8_t, 10>;
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint32_t>(max / 2, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint32_t>(max / 3, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<Size5>(max / 4, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint16_t>(msb, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<double>(msb + 1, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<Size10>(msb / 4, nullptr, nullptr));
+}
+
+TEST(AlignedAllocatorTest, AllocDefaultPointers) {
+  const size_t kSize = 7777;
+  void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr,
+                                   /*opaque_ptr=*/nullptr);
+  ASSERT_NE(nullptr, ptr);
+  // Make sure the pointer is actually aligned.
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
+  char* p = static_cast<char*>(ptr);
+  size_t ret = 0;
+  for (size_t i = 0; i < kSize; i++) {
+    // Performs a computation using p[] to prevent it being optimized away.
+    p[i] = static_cast<char>(i & 0x7F);
+    if (i) ret += static_cast<size_t>(p[i] * p[i - 1]);
+  }
+  EXPECT_NE(0U, ret);
+  FreeAlignedBytes(ptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr);
+}
+
+TEST(AlignedAllocatorTest, EmptyAlignedUniquePtr) {
+  AlignedUniquePtr<SampleObject<32>> ptr(nullptr, AlignedDeleter());
+  AlignedUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedDeleter());
+}
+
+TEST(AlignedAllocatorTest, EmptyAlignedFreeUniquePtr) {
+  AlignedFreeUniquePtr<SampleObject<32>> ptr(nullptr, AlignedFreer());
+  AlignedFreeUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedFreer());
+}
+
+TEST(AlignedAllocatorTest, CustomAlloc) {
+  FakeAllocator fake_alloc;
+
+  const size_t kSize = 7777;
+  void* ptr =
+      AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc);
+  ASSERT_NE(nullptr, ptr);
+  // We should have only requested one alloc from the allocator.
+  EXPECT_EQ(1U, fake_alloc.PendingAllocs());
+  // Make sure the pointer is actually aligned.
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
+  FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc);
+  EXPECT_EQ(0U, fake_alloc.PendingAllocs());
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedDefaultConstructor) {
+  {
+    auto ptr = MakeUniqueAligned<SampleObject<24>>();
+    // Default constructor sets the data_[0] to 'a'.
+    EXPECT_EQ('a', ptr->data_[0]);
+    EXPECT_EQ(nullptr, ptr->counter_);
+  }
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAligned) {
+  int counter = 0;
+  {
+    // Creates the object, initializes it with the explicit constructor and
+    // returns an unique_ptr to it.
+    auto ptr = MakeUniqueAligned<SampleObject<24>>(&counter);
+    EXPECT_EQ(1, counter);
+    // Custom constructor sets the data_[0] to 'b'.
+    EXPECT_EQ('b', ptr->data_[0]);
+  }
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedArray) {
+  int counter = 0;
+  {
+    // Creates the array of objects and initializes them with the explicit
+    // constructor.
+    auto arr = MakeUniqueAlignedArray<SampleObject<24>>(7, &counter);
+    EXPECT_EQ(7, counter);
+    for (size_t i = 0; i < 7; i++) {
+      // Custom constructor sets the data_[0] to 'b'.
+      EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
+    }
+  }
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, AllocSingleInt) {
+  auto ptr = AllocateAligned<uint32_t>(1);
+  ASSERT_NE(nullptr, ptr.get());
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
+  // Force delete of the unique_ptr now to check that it doesn't crash.
+  ptr.reset(nullptr);
+  EXPECT_EQ(nullptr, ptr.get());
+}
+
+TEST(AlignedAllocatorTest, AllocMultipleInt) {
+  const size_t kSize = 7777;
+  auto ptr = AllocateAligned<uint32_t>(kSize);
+  ASSERT_NE(nullptr, ptr.get());
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
+  // ptr[i] is actually (*ptr.get())[i] which will use the operator[] of the
+  // underlying type chosen by AllocateAligned() for the std::unique_ptr.
+  EXPECT_EQ(&(ptr[0]) + 1, &(ptr[1]));
+
+  size_t ret = 0;
+  for (size_t i = 0; i < kSize; i++) {
+    // Performs a computation using ptr[] to prevent it being optimized away.
+    ptr[i] = static_cast<uint32_t>(i);
+    if (i) ret += ptr[i] * ptr[i - 1];
+  }
+  EXPECT_NE(0U, ret);
+}
+
+TEST(AlignedAllocatorTest, AllocateAlignedObjectWithoutDestructor) {
+  int counter = 0;
+  {
+    // This doesn't call the constructor.
+    auto obj = AllocateAligned<SampleObject<24>>(1);
+    obj[0].counter_ = &counter;
+  }
+  // Destroying the unique_ptr shouldn't have called the destructor of the
+  // SampleObject<24>.
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedArrayWithCustomAlloc) {
+  FakeAllocator fake_alloc;
+  int counter = 0;
+  {
+    // Creates the array of objects and initializes them with the explicit
+    // constructor.
+    auto arr = MakeUniqueAlignedArrayWithAlloc<SampleObject<24>>(
+        7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc,
+        &counter);
+    ASSERT_NE(nullptr, arr.get());
+    // An array should still only call a single allocation.
+    EXPECT_EQ(1u, fake_alloc.PendingAllocs());
+    EXPECT_EQ(7, counter);
+    for (size_t i = 0; i < 7; i++) {
+      // Custom constructor sets the data_[0] to 'b'.
+      EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
+    }
+  }
+  EXPECT_EQ(0, counter);
+  EXPECT_EQ(0u, fake_alloc.PendingAllocs());
+}
+
+TEST(AlignedAllocatorTest, DefaultInit) {
+  // The test is whether this compiles. Default-init is useful for output params
+  // and per-thread storage.
+  std::vector<AlignedUniquePtr<int[]>> ptrs;
+  std::vector<AlignedFreeUniquePtr<double[]>> free_ptrs;
+  ptrs.resize(128);
+  free_ptrs.resize(128);
+  // The following is to prevent elision of the pointers.
+  std::mt19937 rng(129);  // Emscripten lacks random_device.
+  std::uniform_int_distribution<size_t> dist(0, 127);
+  ptrs[dist(rng)] = MakeUniqueAlignedArray<int>(123);
+  free_ptrs[dist(rng)] = AllocateAligned<double>(456);
+  // "Use" pointer without resorting to printf. 0 == 0. Can't shift by 64.
+  const auto addr1 = reinterpret_cast<uintptr_t>(ptrs[dist(rng)].get());
+  const auto addr2 = reinterpret_cast<uintptr_t>(free_ptrs[dist(rng)].get());
+  constexpr size_t kBits = sizeof(uintptr_t) * 8;
+  EXPECT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1),
+            (addr2 >> (kBits - 1)) >> (kBits - 1));
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/base.h b/third-party/libjxl/libjxl/third_party/highway/hwy/base.h
new file mode 100644
index 0000000000..8f1e161b16
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/base.h
@@ -0,0 +1,1323 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_BASE_H_
+#define HIGHWAY_HWY_BASE_H_
+
+// For SIMD module implementations and their callers, target-independent.
+
+// IWYU pragma: begin_exports
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/detect_compiler_arch.h"
+#include "hwy/highway_export.h"
+
+// "IWYU pragma: keep" does not work for these includes, so hide from the IDE.
+#if !HWY_IDE
+
+#if !defined(HWY_NO_LIBCXX)
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS  // before inttypes.h
+#endif
+#include <inttypes.h>
+#endif
+
+#if (HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)) || HWY_COMPILER_MSVC
+#include <atomic>
+#endif
+
+#endif  // !HWY_IDE
+
+// IWYU pragma: end_exports
+
+#if HWY_COMPILER_MSVC
+#include <string.h>  // memcpy
+#endif
+
+//------------------------------------------------------------------------------
+// Compiler-specific definitions
+
+#define HWY_STR_IMPL(macro) #macro
+#define HWY_STR(macro) HWY_STR_IMPL(macro)
+
+#if HWY_COMPILER_MSVC
+
+#include <intrin.h>
+
+#define HWY_RESTRICT __restrict
+#define HWY_INLINE __forceinline
+#define HWY_NOINLINE __declspec(noinline)
+#define HWY_FLATTEN
+#define HWY_NORETURN __declspec(noreturn)
+#define HWY_LIKELY(expr) (expr)
+#define HWY_UNLIKELY(expr) (expr)
+#define HWY_PRAGMA(tokens) __pragma(tokens)
+#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
+#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
+#define HWY_MAYBE_UNUSED
+#define HWY_HAS_ASSUME_ALIGNED 0
+#if (_MSC_VER >= 1700)
+#define HWY_MUST_USE_RESULT _Check_return_
+#else
+#define HWY_MUST_USE_RESULT
+#endif
+
+#else
+
+#define HWY_RESTRICT __restrict__
+// force inlining without optimization enabled creates very inefficient code
+// that can cause compiler timeout
+#ifdef __OPTIMIZE__
+#define HWY_INLINE inline __attribute__((always_inline))
+#else
+#define HWY_INLINE inline
+#endif
+#define HWY_NOINLINE __attribute__((noinline))
+#define HWY_FLATTEN __attribute__((flatten))
+#define HWY_NORETURN __attribute__((noreturn))
+#define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#define HWY_PRAGMA(tokens) _Pragma(#tokens)
+#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
+#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
+// Encountered "attribute list cannot appear here" when using the C++17
+// [[maybe_unused]], so only use the old style attribute for now.
+#define HWY_MAYBE_UNUSED __attribute__((unused))
+#define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
+
+#endif  // !HWY_COMPILER_MSVC
+
+//------------------------------------------------------------------------------
+// Builtin/attributes (no more #include after this point due to namespace!)
+
+namespace hwy {
+
+// Enables error-checking of format strings.
+#if HWY_HAS_ATTRIBUTE(__format__)
+#define HWY_FORMAT(idx_fmt, idx_arg) \
+  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define HWY_FORMAT(idx_fmt, idx_arg)
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* HWY_RESTRICT aligned = (float*)HWY_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
+#define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+// Clang and GCC require attributes on each function into which SIMD intrinsics
+// are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and
+// automatic annotation via pragmas.
+#if HWY_COMPILER_ICC
+// As of ICC 2021.{1-9} the pragma is neither implemented nor required.
+#define HWY_PUSH_ATTRIBUTES(targets_str)
+#define HWY_POP_ATTRIBUTES
+#elif HWY_COMPILER_CLANG
+#define HWY_PUSH_ATTRIBUTES(targets_str)                                \
+  HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
+                                  apply_to = function))
+#define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
+#elif HWY_COMPILER_GCC_ACTUAL
+#define HWY_PUSH_ATTRIBUTES(targets_str) \
+  HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
+#define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
+#else
+#define HWY_PUSH_ATTRIBUTES(targets_str)
+#define HWY_POP_ATTRIBUTES
+#endif
+
+//------------------------------------------------------------------------------
+// Macros
+
+#define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
+
+#define HWY_CONCAT_IMPL(a, b) a##b
+#define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
+
+#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#if HWY_COMPILER_GCC_ACTUAL
+// nielskm: GCC does not support '#pragma GCC unroll' without the factor.
+#define HWY_UNROLL(factor) HWY_PRAGMA(GCC unroll factor)
+#define HWY_DEFAULT_UNROLL HWY_UNROLL(4)
+#elif HWY_COMPILER_CLANG || HWY_COMPILER_ICC || HWY_COMPILER_ICX
+#define HWY_UNROLL(factor) HWY_PRAGMA(unroll factor)
+#define HWY_DEFAULT_UNROLL HWY_UNROLL()
+#else
+#define HWY_UNROLL(factor)
+#define HWY_DEFAULT_UNROLL
+#endif
+
+// Tell a compiler that the expression always evaluates to true.
+// The expression should be free from any side effects.
+// Some older compilers may have trouble with complex expressions, therefore
+// it is advisable to split multiple conditions into separate assume statements,
+// and manually check the generated code.
+// OK but could fail:
+//   HWY_ASSUME(x == 2 && y == 3);
+// Better:
+//   HWY_ASSUME(x == 2);
+//   HWY_ASSUME(y == 3);
+#if HWY_HAS_CPP_ATTRIBUTE(assume)
+#define HWY_ASSUME(expr) [[assume(expr)]]
+#elif HWY_COMPILER_MSVC || HWY_COMPILER_ICC
+#define HWY_ASSUME(expr) __assume(expr)
+// __builtin_assume() was added in clang 3.6.
+#elif HWY_COMPILER_CLANG && HWY_HAS_BUILTIN(__builtin_assume)
+#define HWY_ASSUME(expr) __builtin_assume(expr)
+// __builtin_unreachable() was added in GCC 4.5, but __has_builtin() was added
+// later, so check for the compiler version directly.
+#elif HWY_COMPILER_GCC_ACTUAL >= 405
+#define HWY_ASSUME(expr) \
+  ((expr) ? static_cast<void>(0) : __builtin_unreachable())
+#else
+#define HWY_ASSUME(expr) static_cast<void>(0)
+#endif
+
+// Compile-time fence to prevent undesirable code reordering. On Clang x86, the
+// typical asm volatile("" : : : "memory") has no effect, whereas atomic fence
+// does, without generating code.
+#if HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)
+#define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
+#else
+// TODO(janwas): investigate alternatives. On Arm, the above generates barriers.
+#define HWY_FENCE
+#endif
+
+// 4 instances of a given literal value, useful as input to LoadDup128.
+#define HWY_REP4(literal) literal, literal, literal, literal
+
+HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
+    Abort(const char* file, int line, const char* format, ...);
+
+#define HWY_ABORT(format, ...) \
+  ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
+
+// Always enabled.
+#define HWY_ASSERT(condition)             \
+  do {                                    \
+    if (!(condition)) {                   \
+      HWY_ABORT("Assert %s", #condition); \
+    }                                     \
+  } while (0)
+
+#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
+#define HWY_IS_MSAN 1
+#else
+#define HWY_IS_MSAN 0
+#endif
+
+#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
+#define HWY_IS_ASAN 1
+#else
+#define HWY_IS_ASAN 0
+#endif
+
+#if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER)
+#define HWY_IS_TSAN 1
+#else
+#define HWY_IS_TSAN 0
+#endif
+
+// MSAN may cause lengthy build times or false positives e.g. in AVX3 DemoteTo.
+// You can disable MSAN by adding this attribute to the function that fails.
+#if HWY_IS_MSAN
+#define HWY_ATTR_NO_MSAN __attribute__((no_sanitize_memory))
+#else
+#define HWY_ATTR_NO_MSAN
+#endif
+
+// For enabling HWY_DASSERT and shortening tests in slower debug builds
+#if !defined(HWY_IS_DEBUG_BUILD)
+// Clang does not define NDEBUG, but it and GCC define __OPTIMIZE__, and recent
+// MSVC defines NDEBUG (if not, could instead check _DEBUG).
+#if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
+    HWY_IS_MSAN || HWY_IS_TSAN || defined(__clang_analyzer__)
+#define HWY_IS_DEBUG_BUILD 1
+#else
+#define HWY_IS_DEBUG_BUILD 0
+#endif
+#endif  // HWY_IS_DEBUG_BUILD
+
+#if HWY_IS_DEBUG_BUILD
+#define HWY_DASSERT(condition) HWY_ASSERT(condition)
+#else
+#define HWY_DASSERT(condition) \
+  do {                         \
+  } while (0)
+#endif
+
+//------------------------------------------------------------------------------
+// CopyBytes / ZeroBytes
+
+#if HWY_COMPILER_MSVC
+#pragma intrinsic(memcpy)
+#pragma intrinsic(memset)
+#endif
+
+// The source/destination must not overlap/alias.
+template <size_t kBytes, typename From, typename To>
+HWY_API void CopyBytes(const From* from, To* to) {
+#if HWY_COMPILER_MSVC
+  memcpy(to, from, kBytes);
+#else
+  __builtin_memcpy(static_cast<void*>(to), static_cast<const void*>(from),
+                   kBytes);
+#endif
+}
+
+HWY_API void CopyBytes(const void* HWY_RESTRICT from, void* HWY_RESTRICT to,
+                       size_t num_of_bytes_to_copy) {
+#if HWY_COMPILER_MSVC
+  memcpy(to, from, num_of_bytes_to_copy);
+#else
+  __builtin_memcpy(to, from, num_of_bytes_to_copy);
+#endif
+}
+
+// Same as CopyBytes, but for same-sized objects; avoids a size argument.
+template <typename From, typename To>
+HWY_API void CopySameSize(const From* HWY_RESTRICT from, To* HWY_RESTRICT to) {
+  static_assert(sizeof(From) == sizeof(To), "");
+  CopyBytes<sizeof(From)>(from, to);
+}
+
+template <size_t kBytes, typename To>
+HWY_API void ZeroBytes(To* to) {
+#if HWY_COMPILER_MSVC
+  memset(to, 0, kBytes);
+#else
+  __builtin_memset(to, 0, kBytes);
+#endif
+}
+
+HWY_API void ZeroBytes(void* to, size_t num_bytes) {
+#if HWY_COMPILER_MSVC
+  memset(to, 0, num_bytes);
+#else
+  __builtin_memset(to, 0, num_bytes);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// kMaxVectorSize (undocumented, pending removal)
+
+#if HWY_ARCH_X86
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64;  // AVX-512
+#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \
+    __riscv_v_intrinsic >= 11000
+// Not actually an upper bound on the size.
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
+#else
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
+#endif
+
+//------------------------------------------------------------------------------
+// Alignment
+
+// Potentially useful for LoadDup128 and capped vectors. In other cases, arrays
+// should be allocated dynamically via aligned_allocator.h because Lanes() may
+// exceed the stack size.
+#if HWY_ARCH_X86
+#define HWY_ALIGN_MAX alignas(64)
+#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \
+    __riscv_v_intrinsic >= 11000
+#define HWY_ALIGN_MAX alignas(8)  // only elements need be aligned
+#else
+#define HWY_ALIGN_MAX alignas(16)
+#endif
+
+//------------------------------------------------------------------------------
+// Lane types
+
+// float16_t load/store/conversion intrinsics are always supported on Armv8 and
+// VFPv4 (except with MSVC). On Armv7 Clang requires __ARM_FP & 2; GCC requires
+// -mfp16-format=ieee.
+#if (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC) ||                    \
+    (HWY_COMPILER_CLANG && defined(__ARM_FP) && (__ARM_FP & 2)) || \
+    (HWY_COMPILER_GCC_ACTUAL && defined(__ARM_FP16_FORMAT_IEEE))
+#define HWY_NEON_HAVE_FLOAT16C 1
+#else
+#define HWY_NEON_HAVE_FLOAT16C 0
+#endif
+
+// If 1, both __bf16 and a limited set of *_bf16 SVE intrinsics are available:
+// create/get/set/dup, ld/st, sel, rev, trn, uzp, zip.
+#if HWY_ARCH_ARM_A64 && defined(__ARM_FEATURE_SVE_BF16)
+#define HWY_SVE_HAVE_BFLOAT16 1
+#else
+#define HWY_SVE_HAVE_BFLOAT16 0
+#endif
+
+// Match [u]int##_t naming scheme so rvv-inl.h macros can obtain the type name
+// by concatenating base type and bits.
+
+// 1) ACLE's __fp16
+#if HWY_NEON_HAVE_FLOAT16C
+using float16_t = __fp16;
+// 2) C11 extension ISO/IEC TS 18661-3:2015 but not supported on all targets.
+//    Required if HWY_HAVE_FLOAT16, i.e. RVV with zvfh or AVX3_SPR (with
+//    sufficiently new compiler supporting avx512fp16). Do not use on clang-cl,
+//    which is missing __extendhfsf2.
+#elif (                                                                        \
+    (HWY_ARCH_RVV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG) ||           \
+    (HWY_ARCH_X86 && ((HWY_COMPILER_CLANG >= 1600 && !HWY_COMPILER_CLANGCL) || \
+                      HWY_COMPILER_GCC_ACTUAL >= 1200)))
+using float16_t = _Float16;
+// 3) Otherwise emulate
+#else
+#define HWY_EMULATE_FLOAT16
+#pragma pack(push, 1)
+struct float16_t {
+  uint16_t bits;
+};
+#pragma pack(pop)
+#endif  // float16_t
+
+#if HWY_SVE_HAVE_BFLOAT16
+using bfloat16_t = __bf16;
+#else
+#pragma pack(push, 1)
+struct bfloat16_t {
+  uint16_t bits;
+};
+#pragma pack(pop)
+#endif  // bfloat16_t
+
+HWY_API float F32FromF16(float16_t f16) {
+#ifdef HWY_EMULATE_FLOAT16
+  uint16_t bits16;
+  CopySameSize(&f16, &bits16);
+  const uint32_t sign = static_cast<uint32_t>(bits16 >> 15);
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  // Subnormal or zero
+  if (biased_exp == 0) {
+    const float subnormal =
+        (1.0f / 16384) * (static_cast<float>(mantissa) * (1.0f / 1024));
+    return sign ? -subnormal : subnormal;
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+
+  float result;
+  CopySameSize(&bits32, &result);
+  return result;
+#else
+  return static_cast<float>(f16);
+#endif
+}
+
+HWY_API float16_t F16FromF32(float f32) {
+#ifdef HWY_EMULATE_FLOAT16
+  uint32_t bits32;
+  CopySameSize(&f32, &bits32);
+  const uint32_t sign = bits32 >> 31;
+  const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
+  const uint32_t mantissa32 = bits32 & 0x7FFFFF;
+
+  const int32_t exp = HWY_MIN(static_cast<int32_t>(biased_exp32) - 127, 15);
+
+  // Tiny or zero => zero.
+  float16_t out;
+  if (exp < -24) {
+    // restore original sign
+    const uint16_t bits = static_cast<uint16_t>(sign << 15);
+    CopySameSize(&bits, &out);
+    return out;
+  }
+
+  uint32_t biased_exp16, mantissa16;
+
+  // exp = [-24, -15] => subnormal
+  if (exp < -14) {
+    biased_exp16 = 0;
+    const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
+    HWY_DASSERT(1 <= sub_exp && sub_exp < 11);
+    mantissa16 = static_cast<uint32_t>((1u << (10 - sub_exp)) +
+                                       (mantissa32 >> (13 + sub_exp)));
+  } else {
+    // exp = [-14, 15]
+    biased_exp16 = static_cast<uint32_t>(exp + 15);
+    HWY_DASSERT(1 <= biased_exp16 && biased_exp16 < 31);
+    mantissa16 = mantissa32 >> 13;
+  }
+
+  HWY_DASSERT(mantissa16 < 1024);
+  const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
+  HWY_DASSERT(bits16 < 0x10000);
+  const uint16_t narrowed = static_cast<uint16_t>(bits16);  // big-endian safe
+  CopySameSize(&narrowed, &out);
+  return out;
+#else
+  return static_cast<float16_t>(f32);
+#endif
+}
+
+HWY_API float F32FromBF16(bfloat16_t bf) {
+  uint16_t bits16;
+  CopyBytes<2>(&bf, &bits16);
+  uint32_t bits = bits16;
+  bits <<= 16;
+  float f;
+  CopySameSize(&bits, &f);
+  return f;
+}
+
+HWY_API float F32FromF16Mem(const void* ptr) {
+  float16_t f16;
+  CopyBytes<2>(ptr, &f16);
+  return F32FromF16(f16);
+}
+
+HWY_API float F32FromBF16Mem(const void* ptr) {
+  bfloat16_t bf;
+  CopyBytes<2>(ptr, &bf);
+  return F32FromBF16(bf);
+}
+
+HWY_API bfloat16_t BF16FromF32(float f) {
+  uint32_t bits;
+  CopySameSize(&f, &bits);
+  const uint16_t bits16 = static_cast<uint16_t>(bits >> 16);
+  bfloat16_t bf;
+  CopySameSize(&bits16, &bf);
+  return bf;
+}
+
+using float32_t = float;
+using float64_t = double;
+
+#pragma pack(push, 1)
+
+// Aligned 128-bit type. Cannot use __int128 because clang doesn't yet align it:
+// https://reviews.llvm.org/D86310
+struct alignas(16) uint128_t {
+  uint64_t lo;  // little-endian layout
+  uint64_t hi;
+};
+
+// 64 bit key plus 64 bit value. Faster than using uint128_t when only the key
+// field is to be compared (Lt128Upper instead of Lt128).
+struct alignas(16) K64V64 {
+  uint64_t value;  // little-endian layout
+  uint64_t key;
+};
+
+// 32 bit key plus 32 bit value. Allows vqsort recursions to terminate earlier
+// than when considering both to be a 64-bit key.
+struct alignas(8) K32V32 {
+  uint32_t value;  // little-endian layout
+  uint32_t key;
+};
+
+#pragma pack(pop)
+
+#ifdef HWY_EMULATE_FLOAT16
+
+static inline HWY_MAYBE_UNUSED bool operator<(const float16_t& a,
+                                              const float16_t& b) {
+  return F32FromF16(a) < F32FromF16(b);
+}
+// Required for std::greater.
+static inline HWY_MAYBE_UNUSED bool operator>(const float16_t& a,
+                                              const float16_t& b) {
+  return F32FromF16(a) > F32FromF16(b);
+}
+static inline HWY_MAYBE_UNUSED bool operator==(const float16_t& a,
+                                               const float16_t& b) {
+  return F32FromF16(a) == F32FromF16(b);
+}
+
+#endif  // HWY_EMULATE_FLOAT16
+
+static inline HWY_MAYBE_UNUSED bool operator<(const uint128_t& a,
+                                              const uint128_t& b) {
+  return (a.hi == b.hi) ? a.lo < b.lo : a.hi < b.hi;
+}
+// Required for std::greater.
+static inline HWY_MAYBE_UNUSED bool operator>(const uint128_t& a,
+                                              const uint128_t& b) {
+  return b < a;
+}
+static inline HWY_MAYBE_UNUSED bool operator==(const uint128_t& a,
+                                               const uint128_t& b) {
+  return a.lo == b.lo && a.hi == b.hi;
+}
+
+static inline HWY_MAYBE_UNUSED bool operator<(const K64V64& a,
+                                              const K64V64& b) {
+  return a.key < b.key;
+}
+// Required for std::greater.
+static inline HWY_MAYBE_UNUSED bool operator>(const K64V64& a,
+                                              const K64V64& b) {
+  return b < a;
+}
+static inline HWY_MAYBE_UNUSED bool operator==(const K64V64& a,
+                                               const K64V64& b) {
+  return a.key == b.key;
+}
+
+static inline HWY_MAYBE_UNUSED bool operator<(const K32V32& a,
+                                              const K32V32& b) {
+  return a.key < b.key;
+}
+// Required for std::greater.
+static inline HWY_MAYBE_UNUSED bool operator>(const K32V32& a,
+                                              const K32V32& b) {
+  return b < a;
+}
+static inline HWY_MAYBE_UNUSED bool operator==(const K32V32& a,
+                                               const K32V32& b) {
+  return a.key == b.key;
+}
+
+//------------------------------------------------------------------------------
+// Controlling overload resolution (SFINAE)
+
+template <bool Condition>
+struct EnableIfT {};
+template <>
+struct EnableIfT<true> {
+  using type = void;
+};
+
+template <bool Condition>
+using EnableIf = typename EnableIfT<Condition>::type;
+
+template <typename T, typename U>
+struct IsSameT {
+  enum { value = 0 };
+};
+
+template <typename T>
+struct IsSameT<T, T> {
+  enum { value = 1 };
+};
+
+template <typename T, typename U>
+HWY_API constexpr bool IsSame() {
+  return IsSameT<T, U>::value;
+}
+
+template <bool Condition, typename Then, typename Else>
+struct IfT {
+  using type = Then;
+};
+
+template <class Then, class Else>
+struct IfT<false, Then, Else> {
+  using type = Else;
+};
+
+template <bool Condition, typename Then, typename Else>
+using If = typename IfT<Condition, Then, Else>::type;
+
+// Insert into template/function arguments to enable this overload only for
+// vectors of exactly, at most (LE), or more than (GT) this many bytes.
+//
+// As an example, checking for a total size of 16 bytes will match both
+// Simd<uint8_t, 16, 0> and Simd<uint8_t, 8, 1>.
+#define HWY_IF_V_SIZE(T, kN, bytes) \
+  hwy::EnableIf<kN * sizeof(T) == bytes>* = nullptr
+#define HWY_IF_V_SIZE_LE(T, kN, bytes) \
+  hwy::EnableIf<kN * sizeof(T) <= bytes>* = nullptr
+#define HWY_IF_V_SIZE_GT(T, kN, bytes) \
+  hwy::EnableIf<(kN * sizeof(T) > bytes)>* = nullptr
+
+#define HWY_IF_LANES(kN, lanes) hwy::EnableIf<(kN == lanes)>* = nullptr
+#define HWY_IF_LANES_LE(kN, lanes) hwy::EnableIf<(kN <= lanes)>* = nullptr
+#define HWY_IF_LANES_GT(kN, lanes) hwy::EnableIf<(kN > lanes)>* = nullptr
+
+#define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
+#define HWY_IF_SIGNED(T)                                                   \
+  hwy::EnableIf<IsSigned<T>() && !IsFloat<T>() && !IsSpecialFloat<T>()>* = \
+      nullptr
+#define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
+#define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
+#define HWY_IF_FLOAT3264(T) hwy::EnableIf<hwy::IsFloat3264<T>()>* = nullptr
+#define HWY_IF_NOT_FLOAT3264(T) hwy::EnableIf<!hwy::IsFloat3264<T>()>* = nullptr
+#define HWY_IF_SPECIAL_FLOAT(T) \
+  hwy::EnableIf<hwy::IsSpecialFloat<T>()>* = nullptr
+#define HWY_IF_NOT_SPECIAL_FLOAT(T) \
+  hwy::EnableIf<!hwy::IsSpecialFloat<T>()>* = nullptr
+#define HWY_IF_FLOAT_OR_SPECIAL(T) \
+  hwy::EnableIf<hwy::IsFloat<T>() || hwy::IsSpecialFloat<T>()>* = nullptr
+#define HWY_IF_NOT_FLOAT_NOR_SPECIAL(T) \
+  hwy::EnableIf<!hwy::IsFloat<T>() && !hwy::IsSpecialFloat<T>()>* = nullptr
+
+#define HWY_IF_T_SIZE(T, bytes) hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
+#define HWY_IF_NOT_T_SIZE(T, bytes) \
+  hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
+// bit_array = 0x102 means 1 or 8 bytes. There is no NONE_OF because it sounds
+// too similar. If you want the opposite of this (2 or 4 bytes), ask for those
+// bits explicitly (0x14) instead of attempting to 'negate' 0x102.
+#define HWY_IF_T_SIZE_ONE_OF(T, bit_array) \
+  hwy::EnableIf<((size_t{1} << sizeof(T)) & (bit_array)) != 0>* = nullptr
+
+// Use instead of HWY_IF_T_SIZE to avoid ambiguity with float16_t/float/double
+// overloads.
+#define HWY_IF_UI16(T) \
+  hwy::EnableIf<IsSame<T, uint16_t>() || IsSame<T, int16_t>()>* = nullptr
+#define HWY_IF_UI32(T) \
+  hwy::EnableIf<IsSame<T, uint32_t>() || IsSame<T, int32_t>()>* = nullptr
+#define HWY_IF_UI64(T) \
+  hwy::EnableIf<IsSame<T, uint64_t>() || IsSame<T, int64_t>()>* = nullptr
+
+#define HWY_IF_LANES_PER_BLOCK(T, N, LANES) \
+  hwy::EnableIf<HWY_MIN(sizeof(T) * N, 16) / sizeof(T) == (LANES)>* = nullptr
+
+// Empty struct used as a size tag type.
+template <size_t N>
+struct SizeTag {};
+
+template <class T>
+struct RemoveConstT {
+  using type = T;
+};
+template <class T>
+struct RemoveConstT<const T> {
+  using type = T;
+};
+
+template <class T>
+using RemoveConst = typename RemoveConstT<T>::type;
+
+template <class T>
+struct RemoveRefT {
+  using type = T;
+};
+template <class T>
+struct RemoveRefT<T&> {
+  using type = T;
+};
+template <class T>
+struct RemoveRefT<T&&> {
+  using type = T;
+};
+
+template <class T>
+using RemoveRef = typename RemoveRefT<T>::type;
+
+//------------------------------------------------------------------------------
+// Type relations
+
+namespace detail {
+
+template <typename T>
+struct Relations;
+template <>
+struct Relations<uint8_t> {
+  using Unsigned = uint8_t;
+  using Signed = int8_t;
+  using Wide = uint16_t;
+  enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<int8_t> {
+  using Unsigned = uint8_t;
+  using Signed = int8_t;
+  using Wide = int16_t;
+  enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<uint16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Float = float16_t;
+  using Wide = uint32_t;
+  using Narrow = uint8_t;
+  enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<int16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Float = float16_t;
+  using Wide = int32_t;
+  using Narrow = int8_t;
+  enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<uint32_t> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = uint64_t;
+  using Narrow = uint16_t;
+  enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<int32_t> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = int64_t;
+  using Narrow = int16_t;
+  enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<uint64_t> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Wide = uint128_t;
+  using Narrow = uint32_t;
+  enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<int64_t> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Narrow = int32_t;
+  enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<uint128_t> {
+  using Unsigned = uint128_t;
+  using Narrow = uint64_t;
+  enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
+};
+template <>
+struct Relations<float16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Float = float16_t;
+  using Wide = float;
+  enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
+};
+template <>
+struct Relations<bfloat16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Wide = float;
+  enum { is_signed = 1, is_float = 1, is_bf16 = 1 };
+};
+template <>
+struct Relations<float> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = double;
+  using Narrow = float16_t;
+  enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
+};
+template <>
+struct Relations<double> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Narrow = float;
+  enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
+};
+
+template <size_t N>
+struct TypeFromSize;
+template <>
+struct TypeFromSize<1> {
+  using Unsigned = uint8_t;
+  using Signed = int8_t;
+};
+template <>
+struct TypeFromSize<2> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Float = float16_t;
+};
+template <>
+struct TypeFromSize<4> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+};
+template <>
+struct TypeFromSize<8> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+};
+template <>
+struct TypeFromSize<16> {
+  using Unsigned = uint128_t;
+};
+
+}  // namespace detail
+
+// Aliases for types of a different category, but the same size.
+template <typename T>
+using MakeUnsigned = typename detail::Relations<T>::Unsigned;
+template <typename T>
+using MakeSigned = typename detail::Relations<T>::Signed;
+template <typename T>
+using MakeFloat = typename detail::Relations<T>::Float;
+
+// Aliases for types of the same category, but different size.
+template <typename T>
+using MakeWide = typename detail::Relations<T>::Wide;
+template <typename T>
+using MakeNarrow = typename detail::Relations<T>::Narrow;
+
+// Obtain type from its size [bytes].
+template <size_t N>
+using UnsignedFromSize = typename detail::TypeFromSize<N>::Unsigned;
+template <size_t N>
+using SignedFromSize = typename detail::TypeFromSize<N>::Signed;
+template <size_t N>
+using FloatFromSize = typename detail::TypeFromSize<N>::Float;
+
+// Avoid confusion with SizeTag where the parameter is a lane size.
+using UnsignedTag = SizeTag<0>;
+using SignedTag = SizeTag<0x100>;  // integer
+using FloatTag = SizeTag<0x200>;
+using SpecialTag = SizeTag<0x300>;
+
+template <typename T, class R = detail::Relations<T>>
+constexpr auto TypeTag()
+    -> hwy::SizeTag<((R::is_signed + R::is_float + R::is_bf16) << 8)> {
+  return hwy::SizeTag<((R::is_signed + R::is_float + R::is_bf16) << 8)>();
+}
+
+// For when we only want to distinguish FloatTag from everything else.
+using NonFloatTag = SizeTag<0x400>;
+
+template <typename T, class R = detail::Relations<T>>
+constexpr auto IsFloatTag() -> hwy::SizeTag<(R::is_float ? 0x200 : 0x400)> {
+  return hwy::SizeTag<(R::is_float ? 0x200 : 0x400)>();
+}
+
+//------------------------------------------------------------------------------
+// Type traits
+
+template <typename T>
+HWY_API constexpr bool IsFloat3264() {
+  return IsSame<T, float>() || IsSame<T, double>();
+}
+
+template <typename T>
+HWY_API constexpr bool IsFloat() {
+  // Cannot use T(1.25) != T(1) for float16_t, which can only be converted to or
+  // from a float, not compared. Include float16_t in case HWY_HAVE_FLOAT16=1.
+  return IsSame<T, float16_t>() || IsFloat3264<T>();
+}
+
+// These types are often special-cased and not supported in all ops.
+template <typename T>
+HWY_API constexpr bool IsSpecialFloat() {
+  return IsSame<T, float16_t>() || IsSame<T, bfloat16_t>();
+}
+
+template <typename T>
+HWY_API constexpr bool IsSigned() {
+  return T(0) > T(-1);
+}
+template <>
+constexpr bool IsSigned<float16_t>() {
+  return true;
+}
+template <>
+constexpr bool IsSigned<bfloat16_t>() {
+  return true;
+}
+
+// Largest/smallest representable integer values.
+template <typename T>
+HWY_API constexpr T LimitsMax() {
+  static_assert(!IsFloat<T>(), "Only for integer types");
+  using TU = MakeUnsigned<T>;
+  return static_cast<T>(IsSigned<T>() ? (static_cast<TU>(~0ull) >> 1)
+                                      : static_cast<TU>(~0ull));
+}
+template <typename T>
+HWY_API constexpr T LimitsMin() {
+  static_assert(!IsFloat<T>(), "Only for integer types");
+  return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
+}
+
+// Largest/smallest representable value (integer or float). This naming avoids
+// confusion with numeric_limits<float>::min() (the smallest positive value).
+// Cannot be constexpr because we use CopySameSize for [b]float16_t.
+template <typename T>
+HWY_API T LowestValue() {
+  return LimitsMin<T>();
+}
+template <>
+HWY_INLINE bfloat16_t LowestValue<bfloat16_t>() {
+  const uint16_t kBits = 0xFF7F;  // -1.1111111 x 2^127
+  bfloat16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float16_t LowestValue<float16_t>() {
+  const uint16_t kBits = 0xFBFF;  // -1.1111111111 x 2^15
+  float16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float LowestValue<float>() {
+  return -3.402823466e+38F;
+}
+template <>
+HWY_INLINE double LowestValue<double>() {
+  return -1.7976931348623158e+308;
+}
+
+template <typename T>
+HWY_API T HighestValue() {
+  return LimitsMax<T>();
+}
+template <>
+HWY_INLINE bfloat16_t HighestValue<bfloat16_t>() {
+  const uint16_t kBits = 0x7F7F;  // 1.1111111 x 2^127
+  bfloat16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float16_t HighestValue<float16_t>() {
+  const uint16_t kBits = 0x7BFF;  // 1.1111111111 x 2^15
+  float16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float HighestValue<float>() {
+  return 3.402823466e+38F;
+}
+template <>
+HWY_INLINE double HighestValue<double>() {
+  return 1.7976931348623158e+308;
+}
+
+// Difference between 1.0 and the next representable value. Equal to
+// 1 / (1ULL << MantissaBits<T>()), but hard-coding ensures precision.
+template <typename T>
+HWY_API T Epsilon() {
+  return 1;
+}
+template <>
+HWY_INLINE bfloat16_t Epsilon<bfloat16_t>() {
+  const uint16_t kBits = 0x3C00;  // 0.0078125
+  bfloat16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float16_t Epsilon<float16_t>() {
+  const uint16_t kBits = 0x1400;  // 0.0009765625
+  float16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float Epsilon<float>() {
+  return 1.192092896e-7f;
+}
+template <>
+HWY_INLINE double Epsilon<double>() {
+  return 2.2204460492503131e-16;
+}
+
+// Returns width in bits of the mantissa field in IEEE binary16/32/64.
+template <typename T>
+constexpr int MantissaBits() {
+  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
+  return 0;
+}
+template <>
+constexpr int MantissaBits<bfloat16_t>() {
+  return 7;
+}
+template <>
+constexpr int MantissaBits<float16_t>() {
+  return 10;
+}
+template <>
+constexpr int MantissaBits<float>() {
+  return 23;
+}
+template <>
+constexpr int MantissaBits<double>() {
+  return 52;
+}
+
+// Returns the (left-shifted by one bit) IEEE binary16/32/64 representation with
+// the largest possible (biased) exponent field. Used by IsInf.
+template <typename T>
+constexpr MakeSigned<T> MaxExponentTimes2() {
+  return -(MakeSigned<T>{1} << (MantissaBits<T>() + 1));
+}
+
+// Returns bitmask of the sign bit in IEEE binary16/32/64.
+template <typename T>
+constexpr MakeUnsigned<T> SignMask() {
+  return MakeUnsigned<T>{1} << (sizeof(T) * 8 - 1);
+}
+
+// Returns bitmask of the exponent field in IEEE binary16/32/64.
+template <typename T>
+constexpr MakeUnsigned<T> ExponentMask() {
+  return (~(MakeUnsigned<T>{1} << MantissaBits<T>()) + 1) & ~SignMask<T>();
+}
+
+// Returns bitmask of the mantissa field in IEEE binary16/32/64.
+template <typename T>
+constexpr MakeUnsigned<T> MantissaMask() {
+  return (MakeUnsigned<T>{1} << MantissaBits<T>()) - 1;
+}
+
+// Returns 1 << mantissa_bits as a floating-point number. All integers whose
+// absolute value are less than this can be represented exactly.
+template <typename T>
+HWY_INLINE T MantissaEnd() {
+  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
+  return 0;
+}
+template <>
+HWY_INLINE bfloat16_t MantissaEnd<bfloat16_t>() {
+  const uint16_t kBits = 0x4300;  // 1.0 x 2^7
+  bfloat16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float16_t MantissaEnd<float16_t>() {
+  const uint16_t kBits = 0x6400;  // 1.0 x 2^10
+  float16_t ret;
+  CopySameSize(&kBits, &ret);
+  return ret;
+}
+template <>
+HWY_INLINE float MantissaEnd<float>() {
+  return 8388608.0f;  // 1 << 23
+}
+template <>
+HWY_INLINE double MantissaEnd<double>() {
+  // floating point literal with p52 requires C++17.
+  return 4503599627370496.0;  // 1 << 52
+}
+
+// Returns width in bits of the exponent field in IEEE binary16/32/64.
+template <typename T>
+constexpr int ExponentBits() {
+  // Exponent := remaining bits after deducting sign and mantissa.
+  return 8 * sizeof(T) - 1 - MantissaBits<T>();
+}
+
+// Returns largest value of the biased exponent field in IEEE binary16/32/64,
+// right-shifted so that the LSB is bit zero. Example: 0xFF for float.
+// This is expressed as a signed integer for more efficient comparison.
+template <typename T>
+constexpr MakeSigned<T> MaxExponentField() {
+  return (MakeSigned<T>{1} << ExponentBits<T>()) - 1;
+}
+
+//------------------------------------------------------------------------------
+// Helper functions
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+// Works for any `align`; if a power of two, compiler emits ADD+AND.
+constexpr inline size_t RoundUpTo(size_t what, size_t align) {
+  return DivCeil(what, align) * align;
+}
+
+// Undefined results for x == 0.
+HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) {
+#if HWY_COMPILER_MSVC
+  unsigned long index;  // NOLINT
+  _BitScanForward(&index, x);
+  return index;
+#else   // HWY_COMPILER_MSVC
+  return static_cast<size_t>(__builtin_ctz(x));
+#endif  // HWY_COMPILER_MSVC
+}
+
+HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x) {
+#if HWY_COMPILER_MSVC
+#if HWY_ARCH_X86_64
+  unsigned long index;  // NOLINT
+  _BitScanForward64(&index, x);
+  return index;
+#else   // HWY_ARCH_X86_64
+  // _BitScanForward64 not available
+  uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+  unsigned long index;  // NOLINT
+  if (lsb == 0) {
+    uint32_t msb = static_cast<uint32_t>(x >> 32u);
+    _BitScanForward(&index, msb);
+    return 32 + index;
+  } else {
+    _BitScanForward(&index, lsb);
+    return index;
+  }
+#endif  // HWY_ARCH_X86_64
+#else   // HWY_COMPILER_MSVC
+  return static_cast<size_t>(__builtin_ctzll(x));
+#endif  // HWY_COMPILER_MSVC
+}
+
+// Undefined results for x == 0.
+HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x) {
+#if HWY_COMPILER_MSVC
+  unsigned long index;  // NOLINT
+  _BitScanReverse(&index, x);
+  return 31 - index;
+#else   // HWY_COMPILER_MSVC
+  return static_cast<size_t>(__builtin_clz(x));
+#endif  // HWY_COMPILER_MSVC
+}
+
+HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x) {
+#if HWY_COMPILER_MSVC
+#if HWY_ARCH_X86_64
+  unsigned long index;  // NOLINT
+  _BitScanReverse64(&index, x);
+  return 63 - index;
+#else   // HWY_ARCH_X86_64
+  // _BitScanReverse64 not available
+  const uint32_t msb = static_cast<uint32_t>(x >> 32u);
+  unsigned long index;  // NOLINT
+  if (msb == 0) {
+    const uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+    _BitScanReverse(&index, lsb);
+    return 63 - index;
+  } else {
+    _BitScanReverse(&index, msb);
+    return 31 - index;
+  }
+#endif  // HWY_ARCH_X86_64
+#else   // HWY_COMPILER_MSVC
+  return static_cast<size_t>(__builtin_clzll(x));
+#endif  // HWY_COMPILER_MSVC
+}
+
+HWY_API size_t PopCount(uint64_t x) {
+#if HWY_COMPILER_GCC  // includes clang
+  return static_cast<size_t>(__builtin_popcountll(x));
+  // This instruction has a separate feature flag, but is often called from
+  // non-SIMD code, so we don't want to require dynamic dispatch. It was first
+  // supported by Intel in Nehalem (SSE4.2), but MSVC only predefines a macro
+  // for AVX, so check for that.
+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 && defined(__AVX__)
+  return _mm_popcnt_u64(x);
+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
+  return _mm_popcnt_u32(static_cast<uint32_t>(x & 0xFFFFFFFFu)) +
+         _mm_popcnt_u32(static_cast<uint32_t>(x >> 32));
+#else
+  x -= ((x >> 1) & 0x5555555555555555ULL);
+  x = (((x >> 2) & 0x3333333333333333ULL) + (x & 0x3333333333333333ULL));
+  x = (((x >> 4) + x) & 0x0F0F0F0F0F0F0F0FULL);
+  x += (x >> 8);
+  x += (x >> 16);
+  x += (x >> 32);
+  return static_cast<size_t>(x & 0x7Fu);
+#endif
+}
+
+// Skip HWY_API due to GCC "function not considered for inlining". Previously
+// such errors were caused by underlying type mismatches, but it's not clear
+// what is still mismatched despite all the casts.
+template <typename TI>
+/*HWY_API*/ constexpr size_t FloorLog2(TI x) {
+  return x == TI{1}
+             ? 0
+             : static_cast<size_t>(FloorLog2(static_cast<TI>(x >> 1)) + 1);
+}
+
+template <typename TI>
+/*HWY_API*/ constexpr size_t CeilLog2(TI x) {
+  return x == TI{1}
+             ? 0
+             : static_cast<size_t>(FloorLog2(static_cast<TI>(x - 1)) + 1);
+}
+
+template <typename T>
+HWY_INLINE constexpr T AddWithWraparound(hwy::FloatTag /*tag*/, T t, size_t n) {
+  return t + static_cast<T>(n);
+}
+
+template <typename T>
+HWY_INLINE constexpr T AddWithWraparound(hwy::NonFloatTag /*tag*/, T t,
+                                         size_t n) {
+  using TU = MakeUnsigned<T>;
+  return static_cast<T>(
+      static_cast<TU>(static_cast<TU>(t) + static_cast<TU>(n)) &
+      hwy::LimitsMax<TU>());
+}
+
+#if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
+#pragma intrinsic(_umul128)
+#endif
+
+// 64 x 64 = 128 bit multiplication
+HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t* HWY_RESTRICT upper) {
+#if defined(__SIZEOF_INT128__)
+  __uint128_t product = (__uint128_t)a * (__uint128_t)b;
+  *upper = (uint64_t)(product >> 64);
+  return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
+  return _umul128(a, b, upper);
+#else
+  constexpr uint64_t kLo32 = 0xFFFFFFFFU;
+  const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
+  const uint64_t hi_lo = (a >> 32) * (b & kLo32);
+  const uint64_t lo_hi = (a & kLo32) * (b >> 32);
+  const uint64_t hi_hi = (a >> 32) * (b >> 32);
+  const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
+  *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
+  return (t << 32) | (lo_lo & kLo32);
+#endif
+}
+
+// Prevents the compiler from eliding the computations that led to "output".
+template <class T>
+HWY_API void PreventElision(T&& output) {
+#if HWY_COMPILER_MSVC
+  // MSVC does not support inline assembly anymore (and never supported GCC's
+  // RTL constraints). Self-assignment with #pragma optimize("off") might be
+  // expected to prevent elision, but it does not with MSVC 2015. Type-punning
+  // with volatile pointers generates inefficient code on MSVC 2017.
+  static std::atomic<RemoveRef<T>> dummy;
+  dummy.store(output, std::memory_order_relaxed);
+#else
+  // Works by indicating to the compiler that "output" is being read and
+  // modified. The +r constraint avoids unnecessary writes to memory, but only
+  // works for built-in types (typically FuncOutput).
+  asm volatile("" : "+r"(output) : : "memory");
+#endif
+}
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_BASE_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/base_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/base_test.cc
new file mode 100644
index 0000000000..c23bf09b59
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/base_test.cc
@@ -0,0 +1,316 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/base.h"
+
+#include <limits>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "base_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+HWY_NOINLINE void TestAllLimits() {
+  HWY_ASSERT_EQ(uint8_t{0}, LimitsMin<uint8_t>());
+  HWY_ASSERT_EQ(uint16_t{0}, LimitsMin<uint16_t>());
+  HWY_ASSERT_EQ(uint32_t{0}, LimitsMin<uint32_t>());
+  HWY_ASSERT_EQ(uint64_t{0}, LimitsMin<uint64_t>());
+
+  HWY_ASSERT_EQ(int8_t{-128}, LimitsMin<int8_t>());
+  HWY_ASSERT_EQ(int16_t{-32768}, LimitsMin<int16_t>());
+  HWY_ASSERT_EQ(static_cast<int32_t>(0x80000000u), LimitsMin<int32_t>());
+  HWY_ASSERT_EQ(static_cast<int64_t>(0x8000000000000000ull),
+                LimitsMin<int64_t>());
+
+  HWY_ASSERT_EQ(uint8_t{0xFF}, LimitsMax<uint8_t>());
+  HWY_ASSERT_EQ(uint16_t{0xFFFF}, LimitsMax<uint16_t>());
+  HWY_ASSERT_EQ(uint32_t{0xFFFFFFFFu}, LimitsMax<uint32_t>());
+  HWY_ASSERT_EQ(uint64_t{0xFFFFFFFFFFFFFFFFull}, LimitsMax<uint64_t>());
+
+  HWY_ASSERT_EQ(int8_t{0x7F}, LimitsMax<int8_t>());
+  HWY_ASSERT_EQ(int16_t{0x7FFF}, LimitsMax<int16_t>());
+  HWY_ASSERT_EQ(int32_t{0x7FFFFFFFu}, LimitsMax<int32_t>());
+  HWY_ASSERT_EQ(int64_t{0x7FFFFFFFFFFFFFFFull}, LimitsMax<int64_t>());
+}
+
+struct TestLowestHighest {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    // numeric_limits<T>::lowest is only guaranteed to be what we expect (-max)
+    // for built-in floating-point types.
+    if (!IsSpecialFloat<T>()) {
+      HWY_ASSERT_EQ(std::numeric_limits<T>::lowest(), LowestValue<T>());
+      HWY_ASSERT_EQ(std::numeric_limits<T>::max(), HighestValue<T>());
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllLowestHighest() { ForAllTypes(TestLowestHighest()); }
+struct TestIsUnsigned {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(!IsSigned<T>(), "Expected !IsSigned");
+  }
+};
+
+struct TestIsSigned {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(IsSigned<T>(), "Expected IsSigned");
+  }
+};
+
+struct TestIsFloat {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(IsFloat<T>(), "Expected IsFloat");
+    static_assert(IsSigned<T>(), "Floats are also considered signed");
+  }
+};
+
+HWY_NOINLINE void TestAllType() {
+  ForUnsignedTypes(TestIsUnsigned());
+  ForSignedTypes(TestIsSigned());
+  ForFloatTypes(TestIsFloat());
+
+  static_assert(sizeof(MakeUnsigned<hwy::uint128_t>) == 16, "");
+  static_assert(sizeof(MakeWide<uint64_t>) == 16, "Expected uint128_t");
+  static_assert(sizeof(MakeNarrow<hwy::uint128_t>) == 8, "Expected uint64_t");
+}
+
+struct TestIsSame {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(IsSame<T, T>(), "T == T");
+    static_assert(!IsSame<MakeSigned<T>, MakeUnsigned<T>>(), "S != U");
+    static_assert(!IsSame<MakeUnsigned<T>, MakeSigned<T>>(), "U != S");
+  }
+};
+
+HWY_NOINLINE void TestAllIsSame() { ForAllTypes(TestIsSame()); }
+
+HWY_NOINLINE void TestAllBitScan() {
+  HWY_ASSERT_EQ(size_t{0}, Num0BitsAboveMS1Bit_Nonzero32(0x80000000u));
+  HWY_ASSERT_EQ(size_t{0}, Num0BitsAboveMS1Bit_Nonzero32(0xFFFFFFFFu));
+  HWY_ASSERT_EQ(size_t{1}, Num0BitsAboveMS1Bit_Nonzero32(0x40000000u));
+  HWY_ASSERT_EQ(size_t{1}, Num0BitsAboveMS1Bit_Nonzero32(0x40108210u));
+  HWY_ASSERT_EQ(size_t{30}, Num0BitsAboveMS1Bit_Nonzero32(2u));
+  HWY_ASSERT_EQ(size_t{30}, Num0BitsAboveMS1Bit_Nonzero32(3u));
+  HWY_ASSERT_EQ(size_t{31}, Num0BitsAboveMS1Bit_Nonzero32(1u));
+
+  HWY_ASSERT_EQ(size_t{0},
+                Num0BitsAboveMS1Bit_Nonzero64(0x8000000000000000ull));
+  HWY_ASSERT_EQ(size_t{0},
+                Num0BitsAboveMS1Bit_Nonzero64(0xFFFFFFFFFFFFFFFFull));
+  HWY_ASSERT_EQ(size_t{1},
+                Num0BitsAboveMS1Bit_Nonzero64(0x4000000000000000ull));
+  HWY_ASSERT_EQ(size_t{1},
+                Num0BitsAboveMS1Bit_Nonzero64(0x4010821004200011ull));
+  HWY_ASSERT_EQ(size_t{62}, Num0BitsAboveMS1Bit_Nonzero64(2ull));
+  HWY_ASSERT_EQ(size_t{62}, Num0BitsAboveMS1Bit_Nonzero64(3ull));
+  HWY_ASSERT_EQ(size_t{63}, Num0BitsAboveMS1Bit_Nonzero64(1ull));
+
+  HWY_ASSERT_EQ(size_t{0}, Num0BitsBelowLS1Bit_Nonzero32(1u));
+  HWY_ASSERT_EQ(size_t{1}, Num0BitsBelowLS1Bit_Nonzero32(2u));
+  HWY_ASSERT_EQ(size_t{30}, Num0BitsBelowLS1Bit_Nonzero32(0xC0000000u));
+  HWY_ASSERT_EQ(size_t{31}, Num0BitsBelowLS1Bit_Nonzero32(0x80000000u));
+
+  HWY_ASSERT_EQ(size_t{0}, Num0BitsBelowLS1Bit_Nonzero64(1ull));
+  HWY_ASSERT_EQ(size_t{1}, Num0BitsBelowLS1Bit_Nonzero64(2ull));
+  HWY_ASSERT_EQ(size_t{62},
+                Num0BitsBelowLS1Bit_Nonzero64(0xC000000000000000ull));
+  HWY_ASSERT_EQ(size_t{63},
+                Num0BitsBelowLS1Bit_Nonzero64(0x8000000000000000ull));
+}
+
+HWY_NOINLINE void TestAllPopCount() {
+  HWY_ASSERT_EQ(size_t{0}, PopCount(0u));
+  HWY_ASSERT_EQ(size_t{1}, PopCount(1u));
+  HWY_ASSERT_EQ(size_t{1}, PopCount(2u));
+  HWY_ASSERT_EQ(size_t{2}, PopCount(3u));
+  HWY_ASSERT_EQ(size_t{1}, PopCount(0x80000000u));
+  HWY_ASSERT_EQ(size_t{31}, PopCount(0x7FFFFFFFu));
+  HWY_ASSERT_EQ(size_t{32}, PopCount(0xFFFFFFFFu));
+
+  HWY_ASSERT_EQ(size_t{1}, PopCount(0x80000000ull));
+  HWY_ASSERT_EQ(size_t{31}, PopCount(0x7FFFFFFFull));
+  HWY_ASSERT_EQ(size_t{32}, PopCount(0xFFFFFFFFull));
+  HWY_ASSERT_EQ(size_t{33}, PopCount(0x10FFFFFFFFull));
+  HWY_ASSERT_EQ(size_t{63}, PopCount(0xFFFEFFFFFFFFFFFFull));
+  HWY_ASSERT_EQ(size_t{64}, PopCount(0xFFFFFFFFFFFFFFFFull));
+}
+
+template <class T>
+static HWY_INLINE T TestEndianGetIntegerVal(T val) {
+  static_assert(!IsFloat<T>() && !IsSpecialFloat<T>(),
+                "T must not be a floating-point type");
+  using TU = MakeUnsigned<T>;
+  static_assert(sizeof(T) == sizeof(TU),
+                "sizeof(T) == sizeof(TU) must be true");
+
+  uint8_t result_bytes[sizeof(T)];
+  const TU val_u = static_cast<TU>(val);
+
+  for (size_t i = 0; i < sizeof(T); i++) {
+#if HWY_IS_BIG_ENDIAN
+    const size_t shift_amt = (sizeof(T) - 1 - i) * 8;
+#else
+    const size_t shift_amt = i * 8;
+#endif
+    result_bytes[i] = static_cast<uint8_t>((val_u >> shift_amt) & 0xFF);
+  }
+
+  T result;
+  CopyBytes<sizeof(T)>(result_bytes, &result);
+  return result;
+}
+
+template <class T, class... Bytes>
+static HWY_INLINE T TestEndianCreateValueFromBytes(Bytes&&... bytes) {
+  static_assert(sizeof(T) > 0, "sizeof(T) > 0 must be true");
+  static_assert(sizeof...(Bytes) == sizeof(T),
+                "sizeof...(Bytes) == sizeof(T) must be true");
+
+  const uint8_t src_bytes[sizeof(T)]{static_cast<uint8_t>(bytes)...};
+
+  T result;
+  CopyBytes<sizeof(T)>(src_bytes, &result);
+  return result;
+}
+
+#define HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(val) \
+  HWY_ASSERT_EQ(val, TestEndianGetIntegerVal(val))
+
+HWY_NOINLINE void TestAllEndian() {
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int8_t{0x01});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint8_t{0x01});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int16_t{0x0102});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint16_t{0x0102});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int32_t{0x01020304});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint32_t{0x01020304});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int64_t{0x0102030405060708});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint64_t{0x0102030405060708});
+
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int16_t{0x0201});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint16_t{0x0201});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int32_t{0x04030201});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint32_t{0x04030201});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(int64_t{0x0807060504030201});
+  HWY_TEST_ENDIAN_CHECK_INTEGER_VAL(uint64_t{0x0807060504030201});
+
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? int16_t{0x0102} : int16_t{0x0201},
+                TestEndianCreateValueFromBytes<int16_t>(0x01, 0x02));
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? uint16_t{0x0102} : uint16_t{0x0201},
+                TestEndianCreateValueFromBytes<uint16_t>(0x01, 0x02));
+  HWY_ASSERT_EQ(
+      HWY_IS_BIG_ENDIAN ? int32_t{0x01020304} : int32_t{0x04030201},
+      TestEndianCreateValueFromBytes<int32_t>(0x01, 0x02, 0x03, 0x04));
+  HWY_ASSERT_EQ(
+      HWY_IS_BIG_ENDIAN ? uint32_t{0x01020304} : uint32_t{0x04030201},
+      TestEndianCreateValueFromBytes<uint32_t>(0x01, 0x02, 0x03, 0x04));
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? int64_t{0x0102030405060708}
+                                  : int64_t{0x0807060504030201},
+                TestEndianCreateValueFromBytes<int64_t>(
+                    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08));
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? uint64_t{0x0102030405060708}
+                                  : uint64_t{0x0807060504030201},
+                TestEndianCreateValueFromBytes<uint64_t>(
+                    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08));
+
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? int16_t{-0x5EFE} : int16_t{0x02A1},
+                TestEndianCreateValueFromBytes<int16_t>(0xA1, 0x02));
+  HWY_ASSERT_EQ(
+      HWY_IS_BIG_ENDIAN ? int32_t{-0x5E4D3CFC} : int32_t{0x04C3B2A1},
+      TestEndianCreateValueFromBytes<int32_t>(0xA1, 0xB2, 0xC3, 0x04));
+  HWY_ASSERT_EQ(HWY_IS_BIG_ENDIAN ? int64_t{-0x6E5D4C3B2A1908F8}
+                                  : int64_t{0x08F7E6D5C4B3A291},
+                TestEndianCreateValueFromBytes<int64_t>(
+                    0x91, 0xA2, 0xB3, 0xC4, 0xD5, 0xE6, 0xF7, 0x08));
+
+  HWY_ASSERT_EQ(HWY_IS_LITTLE_ENDIAN ? int16_t{-0x5DFF} : int16_t{0x01A2},
+                TestEndianCreateValueFromBytes<int16_t>(0x01, 0xA2));
+  HWY_ASSERT_EQ(
+      HWY_IS_LITTLE_ENDIAN ? int32_t{-0x3B4C5DFF} : int32_t{0x01A2B3C4},
+      TestEndianCreateValueFromBytes<int32_t>(0x01, 0xA2, 0xB3, 0xC4));
+  HWY_ASSERT_EQ(HWY_IS_LITTLE_ENDIAN ? int64_t{-0x0718293A4B5C6DFF}
+                                     : int64_t{0x0192A3B4C5D6E7F8},
+                TestEndianCreateValueFromBytes<int64_t>(
+                    0x01, 0x92, 0xA3, 0xB4, 0xC5, 0xD6, 0xE7, 0xF8));
+
+#if HWY_IS_BIG_ENDIAN
+  HWY_ASSERT_EQ(1.0f,
+                TestEndianCreateValueFromBytes<float>(0x3F, 0x80, 0x00, 0x00));
+  HWY_ASSERT_EQ(15922433.0f,
+                TestEndianCreateValueFromBytes<float>(0x4B, 0x72, 0xF5, 0x01));
+  HWY_ASSERT_EQ(-12357485.0f,
+                TestEndianCreateValueFromBytes<float>(0xCB, 0x3C, 0x8F, 0x6D));
+#else
+  HWY_ASSERT_EQ(1.0f,
+                TestEndianCreateValueFromBytes<float>(0x00, 0x00, 0x80, 0x3F));
+  HWY_ASSERT_EQ(15922433.0f,
+                TestEndianCreateValueFromBytes<float>(0x01, 0xF5, 0x72, 0x4B));
+  HWY_ASSERT_EQ(-12357485.0f,
+                TestEndianCreateValueFromBytes<float>(0x6D, 0x8F, 0x3C, 0xCB));
+#endif
+
+#if HWY_HAVE_FLOAT64
+#if HWY_IS_BIG_ENDIAN
+  HWY_ASSERT_EQ(1.0, TestEndianCreateValueFromBytes<double>(
+                         0x3F, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+  HWY_ASSERT_EQ(8707235690688195.0,
+                TestEndianCreateValueFromBytes<double>(0x43, 0x3E, 0xEF, 0x2F,
+                                                       0x4A, 0x51, 0xAE, 0xC3));
+  HWY_ASSERT_EQ(-6815854340348452.0,
+                TestEndianCreateValueFromBytes<double>(0xC3, 0x38, 0x36, 0xFB,
+                                                       0xC0, 0xCC, 0x1A, 0x24));
+#else
+  HWY_ASSERT_EQ(1.0, TestEndianCreateValueFromBytes<double>(
+                         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F));
+  HWY_ASSERT_EQ(8707235690688195.0,
+                TestEndianCreateValueFromBytes<double>(0xC3, 0xAE, 0x51, 0x4A,
+                                                       0x2F, 0xEF, 0x3E, 0x43));
+  HWY_ASSERT_EQ(-6815854340348452.0,
+                TestEndianCreateValueFromBytes<double>(0x24, 0x1A, 0xCC, 0xC0,
+                                                       0xFB, 0x36, 0x38, 0xC3));
+#endif  // HWY_IS_BIG_ENDIAN
+#endif  // HWY_HAVE_FLOAT64
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(BaseTest);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLimits);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLowestHighest);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllType);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllIsSame);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllBitScan);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllPopCount);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllEndian);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/cache_control.h b/third-party/libjxl/libjxl/third_party/highway/hwy/cache_control.h
new file mode 100644
index 0000000000..6e7665dd29
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/cache_control.h
@@ -0,0 +1,108 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
+#define HIGHWAY_HWY_CACHE_CONTROL_H_
+
+#include "hwy/base.h"
+
+// Requires SSE2; fails to compile on 32-bit Clang 7 (see
+// https://github.com/gperftools/gperftools/issues/946).
+#if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32)
+#undef HWY_DISABLE_CACHE_CONTROL
+#define HWY_DISABLE_CACHE_CONTROL
+#endif
+
+// intrin.h is sufficient on MSVC and already included by base.h.
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
+#include <emmintrin.h>  // SSE2
+#include <xmmintrin.h>  // _mm_prefetch
+#endif
+
+namespace hwy {
+
+// Even if N*sizeof(T) is smaller, Stream may write a multiple of this size.
+#define HWY_STREAM_MULTIPLE 16
+
+// The following functions may also require an attribute.
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
+#define HWY_ATTR_CACHE __attribute__((target("sse2")))
+#else
+#define HWY_ATTR_CACHE
+#endif
+
+// Windows.h #defines this, which causes infinite recursion. Temporarily
+// undefine to avoid conflict with our function.
+// TODO(janwas): remove when this function is removed.
+#pragma push_macro("LoadFence")
+#undef LoadFence
+
+// Delays subsequent loads until prior loads are visible. Beware of potentially
+// differing behavior across architectures and vendors: on Intel but not
+// AMD CPUs, also serves as a full fence (waits for all prior instructions to
+// complete).
+HWY_INLINE HWY_ATTR_CACHE void LoadFence() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_lfence();
+#endif
+}
+
+// TODO(janwas): remove when this function is removed. (See above.)
+#pragma pop_macro("LoadFence")
+
+// Ensures values written by previous `Stream` calls are visible on the current
+// core. This is NOT sufficient for synchronizing across cores; when `Stream`
+// outputs are to be consumed by other core(s), the producer must publish
+// availability (e.g. via mutex or atomic_flag) after `FlushStream`.
+HWY_INLINE HWY_ATTR_CACHE void FlushStream() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_sfence();
+#endif
+}
+
+// Optionally begins loading the cache line containing "p" to reduce latency of
+// subsequent actual loads.
+template <typename T>
+HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
+#elif HWY_COMPILER_GCC  // includes clang
+  // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
+  // desirable, so use the default 3 (keep in caches).
+  __builtin_prefetch(p, /*write=*/0, /*hint=*/3);
+#else
+  (void)p;
+#endif
+}
+
+// Invalidates and flushes the cache line containing "p", if possible.
+HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_clflush(p);
+#else
+  (void)p;
+#endif
+}
+
+// When called inside a spin-loop, may reduce power consumption.
+HWY_INLINE HWY_ATTR_CACHE void Pause() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_pause();
+#endif
+}
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CACHE_CONTROL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy-inl.h
new file mode 100644
index 0000000000..22f4252c1a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy-inl.h
@@ -0,0 +1,135 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// These functions avoid having to write a loop plus remainder handling in the
+// (unfortunately still common) case where arrays are not aligned/padded. If the
+// inputs are known to be aligned/padded, it is more efficient to write a single
+// loop using Load(). We do not provide a CopyAlignedPadded because it
+// would be more verbose than such a loop.
+
+// Fills `to`[0, `count`) with `value`.
+template <class D, typename T = TFromD<D>>
+void Fill(D d, T value, size_t count, T* HWY_RESTRICT to) {
+  const size_t N = Lanes(d);
+  const Vec<D> v = Set(d, value);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    StoreU(v, d, to + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  SafeFillN(remaining, value, d, to + idx);
+}
+
+// Copies `from`[0, `count`) to `to`, which must not overlap `from`.
+template <class D, typename T = TFromD<D>>
+void Copy(D d, const T* HWY_RESTRICT from, size_t count, T* HWY_RESTRICT to) {
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    const Vec<D> v = LoadU(d, from + idx);
+    StoreU(v, d, to + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  SafeCopyN(remaining, d, from + idx, to + idx);
+}
+
+// For idx in [0, count) in ascending order, appends `from[idx]` to `to` if the
+// corresponding mask element of `func(d, v)` is true. Returns the STL-style end
+// of the newly written elements in `to`.
+//
+// `func` is either a functor with a templated operator()(d, v) returning a
+// mask, or a generic lambda if using C++14. Due to apparent limitations of
+// Clang on Windows, it is currently necessary to add HWY_ATTR before the
+// opening { of the lambda to avoid errors about "function .. requires target".
+//
+// NOTE: this is only supported for 16-, 32- or 64-bit types.
+// NOTE: Func may be called a second time for elements it has already seen, but
+// these elements will not be written to `to` again.
+template <class D, class Func, typename T = TFromD<D>>
+T* CopyIf(D d, const T* HWY_RESTRICT from, size_t count, T* HWY_RESTRICT to,
+          const Func& func) {
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    const Vec<D> v = LoadU(d, from + idx);
+    to += CompressBlendedStore(v, func(d, v), d, to);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return to;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    // Workaround for -Waggressive-loop-optimizations on GCC 8
+    // (iteration 2305843009213693951 invokes undefined behavior for T=i64)
+    const uintptr_t addr = reinterpret_cast<uintptr_t>(from);
+    const T* HWY_RESTRICT from_idx =
+        reinterpret_cast<const T * HWY_RESTRICT>(addr + (idx * sizeof(T)));
+    const V1 v = LoadU(d1, from_idx);
+    // Avoid storing to `to` unless we know it should be kept - otherwise, we
+    // might overrun the end if it was allocated for the exact count.
+    if (CountTrue(d1, func(d1, v)) == 0) continue;
+    StoreU(v, d1, to);
+    to += 1;
+  }
+#else
+  // Start index of the last unaligned whole vector, ending at the array end.
+  const size_t last = count - N;
+  // Number of elements before `from` or already written.
+  const size_t invalid = idx - last;
+  HWY_DASSERT(0 != invalid && invalid < N);
+  const Mask<D> mask = Not(FirstN(d, invalid));
+  const Vec<D> v = MaskedLoad(mask, d, from + last);
+  to += CompressBlendedStore(v, And(mask, func(d, v)), d, to);
+#endif
+  return to;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy_test.cc
new file mode 100644
index 0000000000..c74f6e9bd7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/copy_test.cc
@@ -0,0 +1,204 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/algo/copy_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/algo/copy-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+// If your project requires C++14 or later, you can ignore this and pass lambdas
+// directly to Transform, without requiring an lvalue as we do here for C++11.
+#if __cplusplus < 201402L
+#define HWY_GENERIC_LAMBDA 0
+#else
+#define HWY_GENERIC_LAMBDA 1
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Returns random integer in [0, 128), which fits in any lane type.
+template <typename T>
+T Random7Bit(RandomState& rng) {
+  return static_cast<T>(Random32(&rng) & 127);
+}
+
+// In C++14, we can instead define these as generic lambdas next to where they
+// are invoked.
+#if !HWY_GENERIC_LAMBDA
+
+struct IsOdd {
+  template <class D, class V>
+  Mask<D> operator()(D d, V v) const {
+    return TestBit(v, Set(d, TFromD<D>{1}));
+  }
+};
+
+#endif  // !HWY_GENERIC_LAMBDA
+
+// Invokes Test (e.g. TestCopyIf) with all arg combinations. T comes from
+// ForFloatTypes.
+template <class Test>
+struct ForeachCountAndMisalign {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) const {
+    RandomState rng;
+    const size_t N = Lanes(d);
+    const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
+
+    for (size_t count = 0; count < 2 * N; ++count) {
+      for (size_t ma : misalignments) {
+        for (size_t mb : misalignments) {
+          Test()(d, count, ma, mb, rng);
+        }
+      }
+    }
+  }
+};
+
+struct TestFill {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    using T = TFromD<D>;
+    // HWY_MAX prevents error when misalign == count == 0.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(misalign_b + count + 1);
+    HWY_ASSERT(pa && pb);
+    T* expected = pa.get() + misalign_a;
+    const T value = Random7Bit<T>(rng);
+    for (size_t i = 0; i < count; ++i) {
+      expected[i] = value;
+    }
+    T* actual = pb.get() + misalign_b;
+
+    actual[count] = T{0};  // sentinel
+    Fill(d, value, count, actual);
+    HWY_ASSERT_EQ(T{0}, actual[count]);  // did not write past end
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected, actual, count, target_name,
+                                  __FILE__, __LINE__);
+  }
+};
+
+void TestAllFill() {
+  ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestFill>>());
+}
+
+struct TestCopy {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    using T = TFromD<D>;
+    // Prevents error if size to allocate is zero.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> pb =
+        AllocateAligned<T>(HWY_MAX(1, misalign_b + count));
+    HWY_ASSERT(pa && pb);
+    T* a = pa.get() + misalign_a;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random7Bit<T>(rng);
+    }
+    T* b = pb.get() + misalign_b;
+
+    Copy(d, a, count, b);
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, a, b, count, target_name, __FILE__,
+                                  __LINE__);
+  }
+};
+
+void TestAllCopy() {
+  ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestCopy>>());
+}
+
+struct TestCopyIf {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    using T = TFromD<D>;
+    const size_t padding = Lanes(ScalableTag<T>());
+
+    // Prevents error if size to allocate is zero.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> pb =
+        AllocateAligned<T>(HWY_MAX(1, misalign_b + count + padding));
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
+    HWY_ASSERT(pa && pb && expected);
+
+    T* a = pa.get() + misalign_a;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random7Bit<T>(rng);
+    }
+    T* b = pb.get() + misalign_b;
+
+    size_t num_odd = 0;
+    for (size_t i = 0; i < count; ++i) {
+      if (a[i] & 1) {
+        expected[num_odd++] = a[i];
+      }
+    }
+
+#if HWY_GENERIC_LAMBDA
+    const auto is_odd = [](const auto d, const auto v) HWY_ATTR {
+      return TestBit(v, Set(d, TFromD<decltype(d)>{1}));
+    };
+#else
+    const IsOdd is_odd;
+#endif
+    T* end = CopyIf(d, a, count, b, is_odd);
+    const size_t num_written = static_cast<size_t>(end - b);
+    HWY_ASSERT_EQ(num_odd, num_written);
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected.get(), b, num_odd, target_name,
+                                  __FILE__, __LINE__);
+  }
+};
+
+void TestAllCopyIf() {
+  ForUI163264(ForPartialVectors<ForeachCountAndMisalign<TestCopyIf>>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(CopyTest);
+HWY_EXPORT_AND_TEST_P(CopyTest, TestAllFill);
+HWY_EXPORT_AND_TEST_P(CopyTest, TestAllCopy);
+HWY_EXPORT_AND_TEST_P(CopyTest, TestAllCopyIf);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find-inl.h
new file mode 100644
index 0000000000..c1e5a84361
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find-inl.h
@@ -0,0 +1,108 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Returns index of the first element equal to `value` in `in[0, count)`, or
+// `count` if not found.
+template <class D, typename T = TFromD<D>>
+size_t Find(D d, T value, const T* HWY_RESTRICT in, size_t count) {
+  const size_t N = Lanes(d);
+  const Vec<D> broadcasted = Set(d, value);
+
+  size_t i = 0;
+  for (; i + N <= count; i += N) {
+    const intptr_t pos = FindFirstTrue(d, Eq(broadcasted, LoadU(d, in + i)));
+    if (pos >= 0) return i + static_cast<size_t>(pos);
+  }
+
+  if (i != count) {
+#if HWY_MEM_OPS_MIGHT_FAULT
+    // Scan single elements.
+    const CappedTag<T, 1> d1;
+    using V1 = Vec<decltype(d1)>;
+    const V1 broadcasted1 = Set(d1, GetLane(broadcasted));
+    for (; i < count; ++i) {
+      if (AllTrue(d1, Eq(broadcasted1, LoadU(d1, in + i)))) {
+        return i;
+      }
+    }
+#else
+    const size_t remaining = count - i;
+    HWY_DASSERT(0 != remaining && remaining < N);
+    const Mask<D> mask = FirstN(d, remaining);
+    const Vec<D> v = MaskedLoad(mask, d, in + i);
+    // Apply mask so that we don't 'find' the zero-padding from MaskedLoad.
+    const intptr_t pos = FindFirstTrue(d, And(Eq(broadcasted, v), mask));
+    if (pos >= 0) return i + static_cast<size_t>(pos);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+  }
+
+  return count;  // not found
+}
+
+// Returns index of the first element in `in[0, count)` for which `func(d, vec)`
+// returns true, otherwise `count`.
+template <class D, class Func, typename T = TFromD<D>>
+size_t FindIf(D d, const T* HWY_RESTRICT in, size_t count, const Func& func) {
+  const size_t N = Lanes(d);
+
+  size_t i = 0;
+  for (; i + N <= count; i += N) {
+    const intptr_t pos = FindFirstTrue(d, func(d, LoadU(d, in + i)));
+    if (pos >= 0) return i + static_cast<size_t>(pos);
+  }
+
+  if (i != count) {
+#if HWY_MEM_OPS_MIGHT_FAULT
+    // Scan single elements.
+    const CappedTag<T, 1> d1;
+    for (; i < count; ++i) {
+      if (AllTrue(d1, func(d1, LoadU(d1, in + i)))) {
+        return i;
+      }
+    }
+#else
+    const size_t remaining = count - i;
+    HWY_DASSERT(0 != remaining && remaining < N);
+    const Mask<D> mask = FirstN(d, remaining);
+    const Vec<D> v = MaskedLoad(mask, d, in + i);
+    // Apply mask so that we don't 'find' the zero-padding from MaskedLoad.
+    const intptr_t pos = FindFirstTrue(d, And(func(d, v), mask));
+    if (pos >= 0) return i + static_cast<size_t>(pos);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+  }
+
+  return count;  // not found
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find_test.cc
new file mode 100644
index 0000000000..4be5acba9c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/find_test.cc
@@ -0,0 +1,226 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <algorithm>  // std::find_if
+#include <vector>
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+#include "hwy/print.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/algo/find_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/algo/find-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+// If your project requires C++14 or later, you can ignore this and pass lambdas
+// directly to FindIf, without requiring an lvalue as we do here for C++11.
+#if __cplusplus < 201402L
+#define HWY_GENERIC_LAMBDA 0
+#else
+#define HWY_GENERIC_LAMBDA 1
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Returns random number in [-8, 8] - we use knowledge of the range to Find()
+// values we know are not present.
+template <typename T>
+T Random(RandomState& rng) {
+  const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
+  double val = (bits - 512) / 64.0;
+  // Clamp negative to zero for unsigned types.
+  if (!hwy::IsSigned<T>() && val < 0.0) {
+    val = -val;
+  }
+  return static_cast<T>(val);
+}
+
+// In C++14, we can instead define these as generic lambdas next to where they
+// are invoked.
+#if !HWY_GENERIC_LAMBDA
+
+class GreaterThan {
+ public:
+  GreaterThan(int val) : val_(val) {}
+  template <class D, class V>
+  Mask<D> operator()(D d, V v) const {
+    return Gt(v, Set(d, static_cast<TFromD<D>>(val_)));
+  }
+
+ private:
+  int val_;
+};
+
+#endif  // !HWY_GENERIC_LAMBDA
+
+// Invokes Test (e.g. TestFind) with all arg combinations.
+template <class Test>
+struct ForeachCountAndMisalign {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) const {
+    RandomState rng;
+    const size_t N = Lanes(d);
+    const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
+
+    // Find() checks 8 vectors at a time, so we want to cover a fairly large
+    // range without oversampling (checking every possible count).
+    std::vector<size_t> counts(AdjustedReps(512));
+    for (size_t& count : counts) {
+      count = static_cast<size_t>(rng()) % (16 * N + 1);
+    }
+    counts[0] = 0;  // ensure we test count=0.
+
+    for (size_t count : counts) {
+      for (size_t m : misalignments) {
+        Test()(d, count, m, rng);
+      }
+    }
+  }
+};
+
+struct TestFind {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign, RandomState& rng) {
+    using T = TFromD<D>;
+    // Must allocate at least one even if count is zero.
+    AlignedFreeUniquePtr<T[]> storage =
+        AllocateAligned<T>(HWY_MAX(1, misalign + count));
+    HWY_ASSERT(storage);
+    T* in = storage.get() + misalign;
+    for (size_t i = 0; i < count; ++i) {
+      in[i] = Random<T>(rng);
+    }
+
+    // For each position, search for that element (which we know is there)
+    for (size_t pos = 0; pos < count; ++pos) {
+      const size_t actual = Find(d, in[pos], in, count);
+
+      // We may have found an earlier occurrence of the same value; ensure the
+      // value is the same, and that it is the first.
+      if (!IsEqual(in[pos], in[actual])) {
+        fprintf(stderr, "%s count %d, found %.15f at %d but wanted %.15f\n",
+                hwy::TypeName(T(), Lanes(d)).c_str(), static_cast<int>(count),
+                static_cast<double>(in[actual]), static_cast<int>(actual),
+                static_cast<double>(in[pos]));
+        HWY_ASSERT(false);
+      }
+      for (size_t i = 0; i < actual; ++i) {
+        if (IsEqual(in[i], in[pos])) {
+          fprintf(stderr, "%s count %d, found %f at %d but Find returned %d\n",
+                  hwy::TypeName(T(), Lanes(d)).c_str(), static_cast<int>(count),
+                  static_cast<double>(in[i]), static_cast<int>(i),
+                  static_cast<int>(actual));
+          HWY_ASSERT(false);
+        }
+      }
+    }
+
+    // Also search for values we know not to be present (out of range)
+    HWY_ASSERT_EQ(count, Find(d, T{9}, in, count));
+    HWY_ASSERT_EQ(count, Find(d, static_cast<T>(-9), in, count));
+  }
+};
+
+void TestAllFind() {
+  ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestFind>>());
+}
+
+struct TestFindIf {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign, RandomState& rng) {
+    using T = TFromD<D>;
+    using TI = MakeSigned<T>;
+    // Must allocate at least one even if count is zero.
+    AlignedFreeUniquePtr<T[]> storage =
+        AllocateAligned<T>(HWY_MAX(1, misalign + count));
+    HWY_ASSERT(storage);
+    T* in = storage.get() + misalign;
+    for (size_t i = 0; i < count; ++i) {
+      in[i] = Random<T>(rng);
+      HWY_ASSERT(in[i] <= 8);
+      HWY_ASSERT(!hwy::IsSigned<T>() || static_cast<TI>(in[i]) >= -8);
+    }
+
+    bool found_any = false;
+    bool not_found_any = false;
+
+    // unsigned T would be promoted to signed and compare greater than any
+    // negative val, whereas Set() would just cast to an unsigned value and the
+    // comparison remains unsigned, so avoid negative numbers there.
+    const int min_val = IsSigned<T>() ? -9 : 0;
+    // Includes out-of-range value 9 to test the not-found path.
+    for (int val = min_val; val <= 9; ++val) {
+#if HWY_GENERIC_LAMBDA
+      const auto greater = [val](const auto d, const auto v) HWY_ATTR {
+        return Gt(v, Set(d, static_cast<T>(val)));
+      };
+#else
+      const GreaterThan greater(val);
+#endif
+      const size_t actual = FindIf(d, in, count, greater);
+      found_any |= actual < count;
+      not_found_any |= actual == count;
+
+      const auto pos = std::find_if(
+          in, in + count, [val](T x) { return x > static_cast<T>(val); });
+      // Convert returned iterator to index.
+      const size_t expected = static_cast<size_t>(pos - in);
+      if (expected != actual) {
+        fprintf(stderr, "%s count %d val %d, expected %d actual %d\n",
+                hwy::TypeName(T(), Lanes(d)).c_str(), static_cast<int>(count),
+                val, static_cast<int>(expected), static_cast<int>(actual));
+        hwy::detail::PrintArray(hwy::detail::MakeTypeInfo<T>(), "in", in, count,
+                                0, count);
+        HWY_ASSERT(false);
+      }
+    }
+
+    // We will always not-find something due to val=9.
+    HWY_ASSERT(not_found_any);
+    // We'll find something unless the input is empty or {0} - because 0 > i
+    // is false for all i=[0,9].
+    if (count != 0 && in[0] != 0) {
+      HWY_ASSERT(found_any);
+    }
+  }
+};
+
+void TestAllFindIf() {
+  ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestFindIf>>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(FindTest);
+HWY_EXPORT_AND_TEST_P(FindTest, TestAllFind);
+HWY_EXPORT_AND_TEST_P(FindTest, TestAllFindIf);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform-inl.h
new file mode 100644
index 0000000000..3e830acb47
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform-inl.h
@@ -0,0 +1,262 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// These functions avoid having to write a loop plus remainder handling in the
+// (unfortunately still common) case where arrays are not aligned/padded. If the
+// inputs are known to be aligned/padded, it is more efficient to write a single
+// loop using Load(). We do not provide a TransformAlignedPadded because it
+// would be more verbose than such a loop.
+//
+// Func is either a functor with a templated operator()(d, v[, v1[, v2]]), or a
+// generic lambda if using C++14. Due to apparent limitations of Clang on
+// Windows, it is currently necessary to add HWY_ATTR before the opening { of
+// the lambda to avoid errors about "always_inline function .. requires target".
+//
+// If HWY_MEM_OPS_MIGHT_FAULT, we use scalar code instead of masking. Otherwise,
+// we used `MaskedLoad` and `BlendedStore` to read/write the final partial
+// vector.
+
+// Fills `out[0, count)` with the vectors returned by `func(d, index_vec)`,
+// where `index_vec` is `Vec<RebindToUnsigned<D>>`. On the first call to `func`,
+// the value of its lane i is i, and increases by `Lanes(d)` after every call.
+// Note that some of these indices may be `>= count`, but the elements that
+// `func` returns in those lanes will not be written to `out`.
+template <class D, class Func, typename T = TFromD<D>>
+void Generate(D d, T* HWY_RESTRICT out, size_t count, const Func& func) {
+  const RebindToUnsigned<D> du;
+  using TU = TFromD<decltype(du)>;
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  Vec<decltype(du)> vidx = Iota(du, 0);
+  for (; idx + N <= count; idx += N) {
+    StoreU(func(d, vidx), d, out + idx);
+    vidx = Add(vidx, Set(du, static_cast<TU>(N)));
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  const RebindToUnsigned<decltype(d1)> du1;
+  for (; idx < count; ++idx) {
+    StoreU(func(d1, Set(du1, static_cast<TU>(idx))), d1, out + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  BlendedStore(func(d, vidx), mask, d, out + idx);
+#endif
+}
+
+// Replaces `inout[idx]` with `func(d, inout[idx])`. Example usage: multiplying
+// array elements by a constant.
+template <class D, class Func, typename T = TFromD<D>>
+void Transform(D d, T* HWY_RESTRICT inout, size_t count, const Func& func) {
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    const Vec<D> v = LoadU(d, inout + idx);
+    StoreU(func(d, v), d, inout + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    const V1 v = LoadU(d1, inout + idx);
+    StoreU(func(d1, v), d1, inout + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  const Vec<D> v = MaskedLoad(mask, d, inout + idx);
+  BlendedStore(func(d, v), mask, d, inout + idx);
+#endif
+}
+
+// Replaces `inout[idx]` with `func(d, inout[idx], in1[idx])`. Example usage:
+// multiplying array elements by those of another array.
+template <class D, class Func, typename T = TFromD<D>>
+void Transform1(D d, T* HWY_RESTRICT inout, size_t count,
+                const T* HWY_RESTRICT in1, const Func& func) {
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    const Vec<D> v = LoadU(d, inout + idx);
+    const Vec<D> v1 = LoadU(d, in1 + idx);
+    StoreU(func(d, v, v1), d, inout + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    const V1 v = LoadU(d1, inout + idx);
+    const V1 v1 = LoadU(d1, in1 + idx);
+    StoreU(func(d1, v, v1), d1, inout + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  const Vec<D> v = MaskedLoad(mask, d, inout + idx);
+  const Vec<D> v1 = MaskedLoad(mask, d, in1 + idx);
+  BlendedStore(func(d, v, v1), mask, d, inout + idx);
+#endif
+}
+
+// Replaces `inout[idx]` with `func(d, inout[idx], in1[idx], in2[idx])`. Example
+// usage: FMA of elements from three arrays, stored into the first array.
+template <class D, class Func, typename T = TFromD<D>>
+void Transform2(D d, T* HWY_RESTRICT inout, size_t count,
+                const T* HWY_RESTRICT in1, const T* HWY_RESTRICT in2,
+                const Func& func) {
+  const size_t N = Lanes(d);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    const Vec<D> v = LoadU(d, inout + idx);
+    const Vec<D> v1 = LoadU(d, in1 + idx);
+    const Vec<D> v2 = LoadU(d, in2 + idx);
+    StoreU(func(d, v, v1, v2), d, inout + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    const V1 v = LoadU(d1, inout + idx);
+    const V1 v1 = LoadU(d1, in1 + idx);
+    const V1 v2 = LoadU(d1, in2 + idx);
+    StoreU(func(d1, v, v1, v2), d1, inout + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  const Vec<D> v = MaskedLoad(mask, d, inout + idx);
+  const Vec<D> v1 = MaskedLoad(mask, d, in1 + idx);
+  const Vec<D> v2 = MaskedLoad(mask, d, in2 + idx);
+  BlendedStore(func(d, v, v1, v2), mask, d, inout + idx);
+#endif
+}
+
+template <class D, typename T = TFromD<D>>
+void Replace(D d, T* HWY_RESTRICT inout, size_t count, T new_t, T old_t) {
+  const size_t N = Lanes(d);
+  const Vec<D> old_v = Set(d, old_t);
+  const Vec<D> new_v = Set(d, new_t);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    Vec<D> v = LoadU(d, inout + idx);
+    StoreU(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  const Vec<decltype(d1)> old_v1 = Set(d1, old_t);
+  const Vec<decltype(d1)> new_v1 = Set(d1, new_t);
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    const V1 v1 = LoadU(d1, inout + idx);
+    StoreU(IfThenElse(Eq(v1, old_v1), new_v1, v1), d1, inout + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  const Vec<D> v = MaskedLoad(mask, d, inout + idx);
+  BlendedStore(IfThenElse(Eq(v, old_v), new_v, v), mask, d, inout + idx);
+#endif
+}
+
+template <class D, class Func, typename T = TFromD<D>>
+void ReplaceIf(D d, T* HWY_RESTRICT inout, size_t count, T new_t,
+               const Func& func) {
+  const size_t N = Lanes(d);
+  const Vec<D> new_v = Set(d, new_t);
+
+  size_t idx = 0;
+  for (; idx + N <= count; idx += N) {
+    Vec<D> v = LoadU(d, inout + idx);
+    StoreU(IfThenElse(func(d, v), new_v, v), d, inout + idx);
+  }
+
+  // `count` was a multiple of the vector length `N`: already done.
+  if (HWY_UNLIKELY(idx == count)) return;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  const Vec<decltype(d1)> new_v1 = Set(d1, new_t);
+  for (; idx < count; ++idx) {
+    using V1 = Vec<decltype(d1)>;
+    const V1 v = LoadU(d1, inout + idx);
+    StoreU(IfThenElse(func(d1, v), new_v1, v), d1, inout + idx);
+  }
+#else
+  const size_t remaining = count - idx;
+  HWY_DASSERT(0 != remaining && remaining < N);
+  const Mask<D> mask = FirstN(d, remaining);
+  const Vec<D> v = MaskedLoad(mask, d, inout + idx);
+  BlendedStore(IfThenElse(func(d, v), new_v, v), mask, d, inout + idx);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform_test.cc
new file mode 100644
index 0000000000..4726e82419
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/algo/transform_test.cc
@@ -0,0 +1,381 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>  // memcpy
+
+#include <vector>
+
+#include "hwy/aligned_allocator.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/algo/transform_test.cc"  //NOLINT
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/algo/transform-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+// If your project requires C++14 or later, you can ignore this and pass lambdas
+// directly to Transform, without requiring an lvalue as we do here for C++11.
+#if __cplusplus < 201402L
+#define HWY_GENERIC_LAMBDA 0
+#else
+#define HWY_GENERIC_LAMBDA 1
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+T Alpha() {
+  return static_cast<T>(1.5);  // arbitrary scalar
+}
+
+// Returns random floating-point number in [-8, 8) to ensure computations do
+// not exceed float32 precision.
+template <typename T>
+T Random(RandomState& rng) {
+  const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
+  const double val = (bits - 512) / 64.0;
+  // Clamp negative to zero for unsigned types.
+  return static_cast<T>(HWY_MAX(hwy::LowestValue<T>(), val));
+}
+
+// SCAL, AXPY names are from BLAS.
+template <typename T>
+HWY_NOINLINE void SimpleSCAL(const T* x, T* out, size_t count) {
+  for (size_t i = 0; i < count; ++i) {
+    out[i] = Alpha<T>() * x[i];
+  }
+}
+
+template <typename T>
+HWY_NOINLINE void SimpleAXPY(const T* x, const T* y, T* out, size_t count) {
+  for (size_t i = 0; i < count; ++i) {
+    out[i] = Alpha<T>() * x[i] + y[i];
+  }
+}
+
+template <typename T>
+HWY_NOINLINE void SimpleFMA4(const T* x, const T* y, const T* z, T* out,
+                             size_t count) {
+  for (size_t i = 0; i < count; ++i) {
+    out[i] = x[i] * y[i] + z[i];
+  }
+}
+
+// In C++14, we can instead define these as generic lambdas next to where they
+// are invoked.
+#if !HWY_GENERIC_LAMBDA
+
+// Generator that returns even numbers by doubling the output indices.
+struct Gen2 {
+  template <class D, class VU>
+  Vec<D> operator()(D d, VU vidx) const {
+    return BitCast(d, Add(vidx, vidx));
+  }
+};
+
+struct SCAL {
+  template <class D, class V>
+  Vec<D> operator()(D d, V v) const {
+    using T = TFromD<D>;
+    return Mul(Set(d, Alpha<T>()), v);
+  }
+};
+
+struct AXPY {
+  template <class D, class V>
+  Vec<D> operator()(D d, V v, V v1) const {
+    using T = TFromD<D>;
+    return MulAdd(Set(d, Alpha<T>()), v, v1);
+  }
+};
+
+struct FMA4 {
+  template <class D, class V>
+  Vec<D> operator()(D /*d*/, V v, V v1, V v2) const {
+    return MulAdd(v, v1, v2);
+  }
+};
+
+#endif  // !HWY_GENERIC_LAMBDA
+
+// Invokes Test (e.g. TestTransform1) with all arg combinations. T comes from
+// ForFloatTypes.
+template <class Test>
+struct ForeachCountAndMisalign {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) const {
+    RandomState rng;
+    const size_t N = Lanes(d);
+    const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
+
+    for (size_t count = 0; count < 2 * N; ++count) {
+      for (size_t ma : misalignments) {
+        for (size_t mb : misalignments) {
+          Test()(d, count, ma, mb, rng);
+        }
+      }
+    }
+  }
+};
+
+// Output-only, no loads
+struct TestGenerate {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t /*misalign_b*/,
+                  RandomState& /*rng*/) {
+    using T = TFromD<D>;
+    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1);
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
+    HWY_ASSERT(pa && expected);
+
+    T* actual = pa.get() + misalign_a;
+
+    for (size_t i = 0; i < count; ++i) {
+      expected[i] = static_cast<T>(2 * i);
+    }
+
+    // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
+    // the attribute also applies to lambdas? If so, remove HWY_ATTR.
+#if HWY_GENERIC_LAMBDA
+    const auto gen2 = [](const auto d, const auto vidx)
+                          HWY_ATTR { return BitCast(d, Add(vidx, vidx)); };
+#else
+    const Gen2 gen2;
+#endif
+    actual[count] = T{0};  // sentinel
+    Generate(d, actual, count, gen2);
+    HWY_ASSERT_EQ(T{0}, actual[count]);  // did not write past end
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected.get(), actual, count,
+                                  target_name, __FILE__, __LINE__);
+  }
+};
+
+// Zero extra input arrays
+struct TestTransform {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    if (misalign_b != 0) return;
+    using T = TFromD<D>;
+    // Prevents error if size to allocate is zero.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
+    HWY_ASSERT(pa && expected);
+
+    T* a = pa.get() + misalign_a;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random<T>(rng);
+    }
+
+    SimpleSCAL(a, expected.get(), count);
+
+    // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
+    // the attribute also applies to lambdas? If so, remove HWY_ATTR.
+#if HWY_GENERIC_LAMBDA
+    const auto scal = [](const auto d, const auto v)
+                          HWY_ATTR { return Mul(Set(d, Alpha<T>()), v); };
+#else
+    const SCAL scal;
+#endif
+    Transform(d, a, count, scal);
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected.get(), a, count, target_name,
+                                  __FILE__, __LINE__);
+  }
+};
+
+// One extra input array
+struct TestTransform1 {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    using T = TFromD<D>;
+    // Prevents error if size to allocate is zero.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> pb =
+        AllocateAligned<T>(HWY_MAX(1, misalign_b + count));
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
+    HWY_ASSERT(pa && pb && expected);
+    T* a = pa.get() + misalign_a;
+    T* b = pb.get() + misalign_b;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random<T>(rng);
+      b[i] = Random<T>(rng);
+    }
+
+    SimpleAXPY(a, b, expected.get(), count);
+
+#if HWY_GENERIC_LAMBDA
+    const auto axpy = [](const auto d, const auto v, const auto v1) HWY_ATTR {
+      return MulAdd(Set(d, Alpha<T>()), v, v1);
+    };
+#else
+    const AXPY axpy;
+#endif
+    Transform1(d, a, count, b, axpy);
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected.get(), a, count, target_name,
+                                  __FILE__, __LINE__);
+  }
+};
+
+// Two extra input arrays
+struct TestTransform2 {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    using T = TFromD<D>;
+    // Prevents error if size to allocate is zero.
+    AlignedFreeUniquePtr<T[]> pa =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> pb =
+        AllocateAligned<T>(HWY_MAX(1, misalign_b + count));
+    AlignedFreeUniquePtr<T[]> pc =
+        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
+    HWY_ASSERT(pa && pb && pc && expected);
+    T* a = pa.get() + misalign_a;
+    T* b = pb.get() + misalign_b;
+    T* c = pc.get() + misalign_a;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random<T>(rng);
+      b[i] = Random<T>(rng);
+      c[i] = Random<T>(rng);
+    }
+
+    SimpleFMA4(a, b, c, expected.get(), count);
+
+#if HWY_GENERIC_LAMBDA
+    const auto fma4 = [](auto /*d*/, auto v, auto v1, auto v2)
+                          HWY_ATTR { return MulAdd(v, v1, v2); };
+#else
+    const FMA4 fma4;
+#endif
+    Transform2(d, a, count, b, c, fma4);
+
+    const auto info = hwy::detail::MakeTypeInfo<T>();
+    const char* target_name = hwy::TargetName(HWY_TARGET);
+    hwy::detail::AssertArrayEqual(info, expected.get(), a, count, target_name,
+                                  __FILE__, __LINE__);
+  }
+};
+
+template <typename T>
+class IfEq {
+ public:
+  IfEq(T val) : val_(val) {}
+
+  template <class D, class V>
+  Mask<D> operator()(D d, V v) const {
+    return Eq(v, Set(d, val_));
+  }
+
+ private:
+  T val_;
+};
+
+struct TestReplace {
+  template <class D>
+  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
+                  RandomState& rng) {
+    if (misalign_b != 0) return;
+    if (count == 0) return;
+    using T = TFromD<D>;
+    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count);
+    AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(count);
+    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(count);
+    HWY_ASSERT(pa && pb && expected);
+
+    T* a = pa.get() + misalign_a;
+    for (size_t i = 0; i < count; ++i) {
+      a[i] = Random<T>(rng);
+    }
+
+    std::vector<size_t> positions(AdjustedReps(count));
+    for (size_t& pos : positions) {
+      pos = static_cast<size_t>(rng()) % count;
+    }
+
+    for (size_t pos = 0; pos < count; ++pos) {
+      const T old_t = a[pos];
+      const T new_t = Random<T>(rng);
+      for (size_t i = 0; i < count; ++i) {
+        expected[i] = IsEqual(a[i], old_t) ? new_t : a[i];
+      }
+
+      // Copy so ReplaceIf gets the same input (and thus also outputs expected)
+      memcpy(pb.get(), a, count * sizeof(T));
+
+      Replace(d, a, count, new_t, old_t);
+      HWY_ASSERT_ARRAY_EQ(expected.get(), a, count);
+
+      ReplaceIf(d, pb.get(), count, new_t, IfEq<T>(old_t));
+      HWY_ASSERT_ARRAY_EQ(expected.get(), pb.get(), count);
+    }
+  }
+};
+
+void TestAllGenerate() {
+  // The test BitCast-s the indices, which does not work for floats.
+  ForIntegerTypes(ForPartialVectors<ForeachCountAndMisalign<TestGenerate>>());
+}
+
+void TestAllTransform() {
+  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform>>());
+}
+
+void TestAllTransform1() {
+  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform1>>());
+}
+
+void TestAllTransform2() {
+  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform2>>());
+}
+
+void TestAllReplace() {
+  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestReplace>>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(TransformTest);
+HWY_EXPORT_AND_TEST_P(TransformTest, TestAllGenerate);
+HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform);
+HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform1);
+HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform2);
+HWY_EXPORT_AND_TEST_P(TransformTest, TestAllReplace);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack-inl.h
new file mode 100644
index 0000000000..a39a9aeeda
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack-inl.h
@@ -0,0 +1,2598 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_CONTRIB_BIT_PACK_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_BIT_PACK_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_BIT_PACK_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_BIT_PACK_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// The entry points are class templates specialized below for each number of
+// bits. Each provides Pack and Unpack member functions which load (Pack) or
+// store (Unpack) B raw vectors, and store (Pack) or load (Unpack) a number of
+// packed vectors equal to kBits. B denotes the bits per lane: 8 for Pack8, 16
+// for Pack16, which is also the upper bound for kBits.
+template <size_t kBits>  // <= 8
+struct Pack8 {};
+template <size_t kBits>  // <= 16
+struct Pack16 {};
+
+template <>
+struct Pack8<1> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    // 16-bit shifts avoid masking (bits will not cross 8-bit lanes).
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    const VU16 packed =
+        Xor3(Or(ShiftLeft<7>(raw7), ShiftLeft<6>(raw6)),
+             Xor3(ShiftLeft<5>(raw5), ShiftLeft<4>(raw4), ShiftLeft<3>(raw3)),
+             Xor3(ShiftLeft<2>(raw2), ShiftLeft<1>(raw1), raw0));
+    StoreU(BitCast(d8, packed), d8, packed_out);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 mask = Set(d16, 0x0101u);  // LSB in each byte
+
+    const VU16 packed = BitCast(d16, LoadU(d8, packed_in));
+
+    const VU16 raw0 = And(packed, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(ShiftRight<1>(packed), mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(ShiftRight<2>(packed), mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw3 = And(ShiftRight<3>(packed), mask);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    const VU16 raw4 = And(ShiftRight<4>(packed), mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(ShiftRight<5>(packed), mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(ShiftRight<6>(packed), mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    const VU16 raw7 = And(ShiftRight<7>(packed), mask);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<1>
+
+template <>
+struct Pack8<2> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    // 16-bit shifts avoid masking (bits will not cross 8-bit lanes).
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    const VU16 packed0 = Xor3(ShiftLeft<6>(raw6), ShiftLeft<4>(raw4),
+                              Or(ShiftLeft<2>(raw2), raw0));
+    const VU16 packed1 = Xor3(ShiftLeft<6>(raw7), ShiftLeft<4>(raw5),
+                              Or(ShiftLeft<2>(raw3), raw1));
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 mask = Set(d16, 0x0303u);  // Lowest 2 bits per byte
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(ShiftRight<2>(packed0), mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw3 = And(ShiftRight<2>(packed1), mask);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    const VU16 raw4 = And(ShiftRight<4>(packed0), mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(ShiftRight<4>(packed1), mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(ShiftRight<6>(packed0), mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    const VU16 raw7 = And(ShiftRight<6>(packed1), mask);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<2>
+
+template <>
+struct Pack8<3> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    // The upper two bits of these three will be filled with packed3 (6 bits).
+    VU16 packed0 = Or(ShiftLeft<3>(raw4), raw0);
+    VU16 packed1 = Or(ShiftLeft<3>(raw5), raw1);
+    VU16 packed2 = Or(ShiftLeft<3>(raw6), raw2);
+    const VU16 packed3 = Or(ShiftLeft<3>(raw7), raw3);
+
+    const VU16 hi2 = Set(d16, 0xC0C0u);
+    packed0 = OrAnd(packed0, ShiftLeft<2>(packed3), hi2);
+    packed1 = OrAnd(packed1, ShiftLeft<4>(packed3), hi2);
+    packed2 = OrAnd(packed2, ShiftLeft<6>(packed3), hi2);
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+    StoreU(BitCast(d8, packed2), d8, packed_out + 2 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 mask = Set(d16, 0x0707u);  // Lowest 3 bits per byte
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+    const VU16 packed2 = BitCast(d16, LoadU(d8, packed_in + 2 * N8));
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw4 = And(ShiftRight<3>(packed0), mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(ShiftRight<3>(packed1), mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(ShiftRight<3>(packed2), mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    // raw73 is the concatenation of the upper two bits in packed0..2.
+    const VU16 hi2 = Set(d16, 0xC0C0u);
+    const VU16 raw73 = Xor3(ShiftRight<6>(And(packed2, hi2)),  //
+                            ShiftRight<4>(And(packed1, hi2)),
+                            ShiftRight<2>(And(packed0, hi2)));
+
+    const VU16 raw3 = And(mask, raw73);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    const VU16 raw7 = And(mask, ShiftRight<3>(raw73));
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<3>
+
+template <>
+struct Pack8<4> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    // 16-bit shifts avoid masking (bits will not cross 8-bit lanes).
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    const VU16 packed0 = Or(ShiftLeft<4>(raw2), raw0);
+    const VU16 packed1 = Or(ShiftLeft<4>(raw3), raw1);
+    const VU16 packed2 = Or(ShiftLeft<4>(raw6), raw4);
+    const VU16 packed3 = Or(ShiftLeft<4>(raw7), raw5);
+
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+    StoreU(BitCast(d8, packed2), d8, packed_out + 2 * N8);
+    StoreU(BitCast(d8, packed3), d8, packed_out + 3 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 mask = Set(d16, 0x0F0Fu);  // Lowest 4 bits per byte
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+    const VU16 packed2 = BitCast(d16, LoadU(d8, packed_in + 2 * N8));
+    const VU16 packed3 = BitCast(d16, LoadU(d8, packed_in + 3 * N8));
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(ShiftRight<4>(packed0), mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw3 = And(ShiftRight<4>(packed1), mask);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    const VU16 raw4 = And(packed2, mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(packed3, mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(ShiftRight<4>(packed2), mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    const VU16 raw7 = And(ShiftRight<4>(packed3), mask);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<4>
+
+template <>
+struct Pack8<5> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    // Fill upper three bits with upper bits from raw4..7.
+    const VU16 hi3 = Set(d16, 0xE0E0u);
+    const VU16 packed0 = OrAnd(raw0, ShiftLeft<3>(raw4), hi3);
+    const VU16 packed1 = OrAnd(raw1, ShiftLeft<3>(raw5), hi3);
+    const VU16 packed2 = OrAnd(raw2, ShiftLeft<3>(raw6), hi3);
+    const VU16 packed3 = OrAnd(raw3, ShiftLeft<3>(raw7), hi3);
+
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+    StoreU(BitCast(d8, packed2), d8, packed_out + 2 * N8);
+    StoreU(BitCast(d8, packed3), d8, packed_out + 3 * N8);
+
+    // Combine lower two bits of raw4..7 into packed4.
+    const VU16 lo2 = Set(d16, 0x0303u);
+    const VU16 packed4 = Or(And(raw4, lo2), Xor3(ShiftLeft<2>(And(raw5, lo2)),
+                                                 ShiftLeft<4>(And(raw6, lo2)),
+                                                 ShiftLeft<6>(And(raw7, lo2))));
+    StoreU(BitCast(d8, packed4), d8, packed_out + 4 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+    const VU16 packed2 = BitCast(d16, LoadU(d8, packed_in + 2 * N8));
+    const VU16 packed3 = BitCast(d16, LoadU(d8, packed_in + 3 * N8));
+    const VU16 packed4 = BitCast(d16, LoadU(d8, packed_in + 4 * N8));
+
+    const VU16 mask = Set(d16, 0x1F1Fu);  // Lowest 5 bits per byte
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    // The upper bits are the top 3 bits shifted right by three.
+    const VU16 top4 = ShiftRight<3>(AndNot(mask, packed0));
+    const VU16 top5 = ShiftRight<3>(AndNot(mask, packed1));
+    const VU16 top6 = ShiftRight<3>(AndNot(mask, packed2));
+    const VU16 top7 = ShiftRight<3>(AndNot(mask, packed3));
+
+    // Insert the lower 2 bits, which were concatenated into a byte.
+    const VU16 lo2 = Set(d16, 0x0303u);
+    const VU16 raw4 = OrAnd(top4, lo2, packed4);
+    const VU16 raw5 = OrAnd(top5, lo2, ShiftRight<2>(packed4));
+    const VU16 raw6 = OrAnd(top6, lo2, ShiftRight<4>(packed4));
+    const VU16 raw7 = OrAnd(top7, lo2, ShiftRight<6>(packed4));
+
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<5>
+
+template <>
+struct Pack8<6> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    const VU16 hi2 = Set(d16, 0xC0C0u);
+    // Each triplet of these stores raw3/raw7 (6 bits) in the upper 2 bits.
+    const VU16 packed0 = OrAnd(raw0, ShiftLeft<2>(raw3), hi2);
+    const VU16 packed1 = OrAnd(raw1, ShiftLeft<4>(raw3), hi2);
+    const VU16 packed2 = OrAnd(raw2, ShiftLeft<6>(raw3), hi2);
+    const VU16 packed3 = OrAnd(raw4, ShiftLeft<2>(raw7), hi2);
+    const VU16 packed4 = OrAnd(raw5, ShiftLeft<4>(raw7), hi2);
+    const VU16 packed5 = OrAnd(raw6, ShiftLeft<6>(raw7), hi2);
+
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+    StoreU(BitCast(d8, packed2), d8, packed_out + 2 * N8);
+    StoreU(BitCast(d8, packed3), d8, packed_out + 3 * N8);
+    StoreU(BitCast(d8, packed4), d8, packed_out + 4 * N8);
+    StoreU(BitCast(d8, packed5), d8, packed_out + 5 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 mask = Set(d16, 0x3F3Fu);  // Lowest 6 bits per byte
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+    const VU16 packed2 = BitCast(d16, LoadU(d8, packed_in + 2 * N8));
+    const VU16 packed3 = BitCast(d16, LoadU(d8, packed_in + 3 * N8));
+    const VU16 packed4 = BitCast(d16, LoadU(d8, packed_in + 4 * N8));
+    const VU16 packed5 = BitCast(d16, LoadU(d8, packed_in + 5 * N8));
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw4 = And(packed3, mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(packed4, mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(packed5, mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    // raw3/7 are the concatenation of the upper two bits in packed0..2.
+    const VU16 raw3 = Xor3(ShiftRight<6>(AndNot(mask, packed2)),
+                           ShiftRight<4>(AndNot(mask, packed1)),
+                           ShiftRight<2>(AndNot(mask, packed0)));
+    const VU16 raw7 = Xor3(ShiftRight<6>(AndNot(mask, packed5)),
+                           ShiftRight<4>(AndNot(mask, packed4)),
+                           ShiftRight<2>(AndNot(mask, packed3)));
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<6>
+
+template <>
+struct Pack8<7> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+    const VU16 raw0 = BitCast(d16, LoadU(d8, raw + 0 * N8));
+    const VU16 raw1 = BitCast(d16, LoadU(d8, raw + 1 * N8));
+    const VU16 raw2 = BitCast(d16, LoadU(d8, raw + 2 * N8));
+    const VU16 raw3 = BitCast(d16, LoadU(d8, raw + 3 * N8));
+    const VU16 raw4 = BitCast(d16, LoadU(d8, raw + 4 * N8));
+    const VU16 raw5 = BitCast(d16, LoadU(d8, raw + 5 * N8));
+    const VU16 raw6 = BitCast(d16, LoadU(d8, raw + 6 * N8));
+    // Inserted into top bit of packed0..6.
+    const VU16 raw7 = BitCast(d16, LoadU(d8, raw + 7 * N8));
+
+    const VU16 hi1 = Set(d16, 0x8080u);
+    const VU16 packed0 = OrAnd(raw0, Add(raw7, raw7), hi1);
+    const VU16 packed1 = OrAnd(raw1, ShiftLeft<2>(raw7), hi1);
+    const VU16 packed2 = OrAnd(raw2, ShiftLeft<3>(raw7), hi1);
+    const VU16 packed3 = OrAnd(raw3, ShiftLeft<4>(raw7), hi1);
+    const VU16 packed4 = OrAnd(raw4, ShiftLeft<5>(raw7), hi1);
+    const VU16 packed5 = OrAnd(raw5, ShiftLeft<6>(raw7), hi1);
+    const VU16 packed6 = OrAnd(raw6, ShiftLeft<7>(raw7), hi1);
+
+    StoreU(BitCast(d8, packed0), d8, packed_out + 0 * N8);
+    StoreU(BitCast(d8, packed1), d8, packed_out + 1 * N8);
+    StoreU(BitCast(d8, packed2), d8, packed_out + 2 * N8);
+    StoreU(BitCast(d8, packed3), d8, packed_out + 3 * N8);
+    StoreU(BitCast(d8, packed4), d8, packed_out + 4 * N8);
+    StoreU(BitCast(d8, packed5), d8, packed_out + 5 * N8);
+    StoreU(BitCast(d8, packed6), d8, packed_out + 6 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    const RepartitionToWide<decltype(d8)> d16;
+    using VU16 = Vec<decltype(d16)>;
+    const size_t N8 = Lanes(d8);
+
+    const VU16 packed0 = BitCast(d16, LoadU(d8, packed_in + 0 * N8));
+    const VU16 packed1 = BitCast(d16, LoadU(d8, packed_in + 1 * N8));
+    const VU16 packed2 = BitCast(d16, LoadU(d8, packed_in + 2 * N8));
+    const VU16 packed3 = BitCast(d16, LoadU(d8, packed_in + 3 * N8));
+    const VU16 packed4 = BitCast(d16, LoadU(d8, packed_in + 4 * N8));
+    const VU16 packed5 = BitCast(d16, LoadU(d8, packed_in + 5 * N8));
+    const VU16 packed6 = BitCast(d16, LoadU(d8, packed_in + 6 * N8));
+
+    const VU16 mask = Set(d16, 0x7F7Fu);  // Lowest 7 bits per byte
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(BitCast(d8, raw0), d8, raw + 0 * N8);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(BitCast(d8, raw1), d8, raw + 1 * N8);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(BitCast(d8, raw2), d8, raw + 2 * N8);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(BitCast(d8, raw3), d8, raw + 3 * N8);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(BitCast(d8, raw4), d8, raw + 4 * N8);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(BitCast(d8, raw5), d8, raw + 5 * N8);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(BitCast(d8, raw6), d8, raw + 6 * N8);
+
+    const VU16 p0 = Xor3(ShiftRight<7>(AndNot(mask, packed6)),
+                         ShiftRight<6>(AndNot(mask, packed5)),
+                         ShiftRight<5>(AndNot(mask, packed4)));
+    const VU16 p1 = Xor3(ShiftRight<4>(AndNot(mask, packed3)),
+                         ShiftRight<3>(AndNot(mask, packed2)),
+                         ShiftRight<2>(AndNot(mask, packed1)));
+    const VU16 raw7 = Xor3(ShiftRight<1>(AndNot(mask, packed0)), p0, p1);
+    StoreU(BitCast(d8, raw7), d8, raw + 7 * N8);
+  }
+};  // Pack8<7>
+
+template <>
+struct Pack8<8> {
+  template <class D8>
+  HWY_INLINE void Pack(D8 d8, const uint8_t* HWY_RESTRICT raw,
+                       uint8_t* HWY_RESTRICT packed_out) const {
+    using VU8 = Vec<decltype(d8)>;
+    const size_t N8 = Lanes(d8);
+    const VU8 raw0 = LoadU(d8, raw + 0 * N8);
+    const VU8 raw1 = LoadU(d8, raw + 1 * N8);
+    const VU8 raw2 = LoadU(d8, raw + 2 * N8);
+    const VU8 raw3 = LoadU(d8, raw + 3 * N8);
+    const VU8 raw4 = LoadU(d8, raw + 4 * N8);
+    const VU8 raw5 = LoadU(d8, raw + 5 * N8);
+    const VU8 raw6 = LoadU(d8, raw + 6 * N8);
+    const VU8 raw7 = LoadU(d8, raw + 7 * N8);
+
+    StoreU(raw0, d8, packed_out + 0 * N8);
+    StoreU(raw1, d8, packed_out + 1 * N8);
+    StoreU(raw2, d8, packed_out + 2 * N8);
+    StoreU(raw3, d8, packed_out + 3 * N8);
+    StoreU(raw4, d8, packed_out + 4 * N8);
+    StoreU(raw5, d8, packed_out + 5 * N8);
+    StoreU(raw6, d8, packed_out + 6 * N8);
+    StoreU(raw7, d8, packed_out + 7 * N8);
+  }
+
+  template <class D8>
+  HWY_INLINE void Unpack(D8 d8, const uint8_t* HWY_RESTRICT packed_in,
+                         uint8_t* HWY_RESTRICT raw) const {
+    using VU8 = Vec<decltype(d8)>;
+    const size_t N8 = Lanes(d8);
+    const VU8 raw0 = LoadU(d8, packed_in + 0 * N8);
+    const VU8 raw1 = LoadU(d8, packed_in + 1 * N8);
+    const VU8 raw2 = LoadU(d8, packed_in + 2 * N8);
+    const VU8 raw3 = LoadU(d8, packed_in + 3 * N8);
+    const VU8 raw4 = LoadU(d8, packed_in + 4 * N8);
+    const VU8 raw5 = LoadU(d8, packed_in + 5 * N8);
+    const VU8 raw6 = LoadU(d8, packed_in + 6 * N8);
+    const VU8 raw7 = LoadU(d8, packed_in + 7 * N8);
+
+    StoreU(raw0, d8, raw + 0 * N8);
+    StoreU(raw1, d8, raw + 1 * N8);
+    StoreU(raw2, d8, raw + 2 * N8);
+    StoreU(raw3, d8, raw + 3 * N8);
+    StoreU(raw4, d8, raw + 4 * N8);
+    StoreU(raw5, d8, raw + 5 * N8);
+    StoreU(raw6, d8, raw + 6 * N8);
+    StoreU(raw7, d8, raw + 7 * N8);
+  }
+};  // Pack8<8>
+
+template <>
+struct Pack16<1> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    const VU16 p0 = Xor3(ShiftLeft<2>(raw2), Add(raw1, raw1), raw0);
+    const VU16 p1 =
+        Xor3(ShiftLeft<5>(raw5), ShiftLeft<4>(raw4), ShiftLeft<3>(raw3));
+    const VU16 p2 =
+        Xor3(ShiftLeft<8>(raw8), ShiftLeft<7>(raw7), ShiftLeft<6>(raw6));
+    const VU16 p3 =
+        Xor3(ShiftLeft<0xB>(rawB), ShiftLeft<0xA>(rawA), ShiftLeft<9>(raw9));
+    const VU16 p4 =
+        Xor3(ShiftLeft<0xE>(rawE), ShiftLeft<0xD>(rawD), ShiftLeft<0xC>(rawC));
+    const VU16 packed =
+        Or(Xor3(ShiftLeft<0xF>(rawF), p0, p1), Xor3(p2, p3, p4));
+    StoreU(packed, d, packed_out);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 mask = Set(d, 1u);  // Lowest bit
+
+    const VU16 packed = LoadU(d, packed_in);
+
+    const VU16 raw0 = And(packed, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(ShiftRight<1>(packed), mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(ShiftRight<2>(packed), mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(ShiftRight<3>(packed), mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(ShiftRight<4>(packed), mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(ShiftRight<5>(packed), mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(ShiftRight<6>(packed), mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(ShiftRight<7>(packed), mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(ShiftRight<8>(packed), mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(ShiftRight<9>(packed), mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(ShiftRight<0xA>(packed), mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(ShiftRight<0xB>(packed), mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(ShiftRight<0xC>(packed), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<0xD>(packed), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(ShiftRight<0xE>(packed), mask);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    const VU16 rawF = ShiftRight<0xF>(packed);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<1>
+
+template <>
+struct Pack16<2> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    VU16 packed0 = Xor3(ShiftLeft<4>(raw4), ShiftLeft<2>(raw2), raw0);
+    VU16 packed1 = Xor3(ShiftLeft<4>(raw5), ShiftLeft<2>(raw3), raw1);
+    packed0 = Xor3(packed0, ShiftLeft<8>(raw8), ShiftLeft<6>(raw6));
+    packed1 = Xor3(packed1, ShiftLeft<8>(raw9), ShiftLeft<6>(raw7));
+
+    packed0 = Xor3(packed0, ShiftLeft<12>(rawC), ShiftLeft<10>(rawA));
+    packed1 = Xor3(packed1, ShiftLeft<12>(rawD), ShiftLeft<10>(rawB));
+
+    packed0 = Or(packed0, ShiftLeft<14>(rawE));
+    packed1 = Or(packed1, ShiftLeft<14>(rawF));
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 mask = Set(d, 0x3u);  // Lowest 2 bits
+
+    const VU16 packed0 = LoadU(d, packed_in + 0 * N);
+    const VU16 packed1 = LoadU(d, packed_in + 1 * N);
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(ShiftRight<2>(packed0), mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(ShiftRight<2>(packed1), mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(ShiftRight<4>(packed0), mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(ShiftRight<4>(packed1), mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(ShiftRight<6>(packed0), mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(ShiftRight<6>(packed1), mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(ShiftRight<8>(packed0), mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(ShiftRight<8>(packed1), mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(ShiftRight<0xA>(packed0), mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(ShiftRight<0xA>(packed1), mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(ShiftRight<0xC>(packed0), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<0xC>(packed1), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = ShiftRight<0xE>(packed0);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    const VU16 rawF = ShiftRight<0xE>(packed1);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<2>
+
+template <>
+struct Pack16<3> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // We can fit 15 raw vectors in three packed vectors (five each).
+    VU16 packed0 = Xor3(ShiftLeft<6>(raw6), ShiftLeft<3>(raw3), raw0);
+    VU16 packed1 = Xor3(ShiftLeft<6>(raw7), ShiftLeft<3>(raw4), raw1);
+    VU16 packed2 = Xor3(ShiftLeft<6>(raw8), ShiftLeft<3>(raw5), raw2);
+
+    // rawF will be scattered into the upper bit of these three.
+    packed0 = Xor3(packed0, ShiftLeft<12>(rawC), ShiftLeft<9>(raw9));
+    packed1 = Xor3(packed1, ShiftLeft<12>(rawD), ShiftLeft<9>(rawA));
+    packed2 = Xor3(packed2, ShiftLeft<12>(rawE), ShiftLeft<9>(rawB));
+
+    const VU16 hi1 = Set(d, 0x8000u);
+    packed0 = Or(packed0, ShiftLeft<15>(rawF));  // MSB only, no mask
+    packed1 = OrAnd(packed1, ShiftLeft<14>(rawF), hi1);
+    packed2 = OrAnd(packed2, ShiftLeft<13>(rawF), hi1);
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 mask = Set(d, 0x7u);  // Lowest 3 bits
+
+    const VU16 packed0 = LoadU(d, packed_in + 0 * N);
+    const VU16 packed1 = LoadU(d, packed_in + 1 * N);
+    const VU16 packed2 = LoadU(d, packed_in + 2 * N);
+
+    const VU16 raw0 = And(mask, packed0);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(mask, packed1);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(mask, packed2);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(mask, ShiftRight<3>(packed0));
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(mask, ShiftRight<3>(packed1));
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(mask, ShiftRight<3>(packed2));
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(mask, ShiftRight<6>(packed0));
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(mask, ShiftRight<6>(packed1));
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(mask, ShiftRight<6>(packed2));
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(mask, ShiftRight<9>(packed0));
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(mask, ShiftRight<9>(packed1));
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(mask, ShiftRight<9>(packed2));
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(mask, ShiftRight<12>(packed0));
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(mask, ShiftRight<12>(packed1));
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(mask, ShiftRight<12>(packed2));
+    StoreU(rawE, d, raw + 0xE * N);
+
+    // rawF is the concatenation of the upper bit of packed0..2.
+    const VU16 down0 = ShiftRight<15>(packed0);
+    const VU16 down1 = ShiftRight<15>(packed1);
+    const VU16 down2 = ShiftRight<15>(packed2);
+    const VU16 rawF = Xor3(ShiftLeft<2>(down2), Add(down1, down1), down0);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<3>
+
+template <>
+struct Pack16<4> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    VU16 packed0 = Xor3(ShiftLeft<8>(raw4), ShiftLeft<4>(raw2), raw0);
+    VU16 packed1 = Xor3(ShiftLeft<8>(raw5), ShiftLeft<4>(raw3), raw1);
+    packed0 = Or(packed0, ShiftLeft<12>(raw6));
+    packed1 = Or(packed1, ShiftLeft<12>(raw7));
+    VU16 packed2 = Xor3(ShiftLeft<8>(rawC), ShiftLeft<4>(rawA), raw8);
+    VU16 packed3 = Xor3(ShiftLeft<8>(rawD), ShiftLeft<4>(rawB), raw9);
+    packed2 = Or(packed2, ShiftLeft<12>(rawE));
+    packed3 = Or(packed3, ShiftLeft<12>(rawF));
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 mask = Set(d, 0xFu);  // Lowest 4 bits
+
+    const VU16 packed0 = LoadU(d, packed_in + 0 * N);
+    const VU16 packed1 = LoadU(d, packed_in + 1 * N);
+    const VU16 packed2 = LoadU(d, packed_in + 2 * N);
+    const VU16 packed3 = LoadU(d, packed_in + 3 * N);
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(ShiftRight<4>(packed0), mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(ShiftRight<4>(packed1), mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(ShiftRight<8>(packed0), mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(ShiftRight<8>(packed1), mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = ShiftRight<12>(packed0);  // no mask required
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = ShiftRight<12>(packed1);  // no mask required
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(packed2, mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(packed3, mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(ShiftRight<4>(packed2), mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(ShiftRight<4>(packed3), mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(ShiftRight<8>(packed2), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<8>(packed3), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = ShiftRight<12>(packed2);  // no mask required
+    StoreU(rawE, d, raw + 0xE * N);
+
+    const VU16 rawF = ShiftRight<12>(packed3);  // no mask required
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<4>
+
+template <>
+struct Pack16<5> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // We can fit 15 raw vectors in five packed vectors (three each).
+    VU16 packed0 = Xor3(ShiftLeft<10>(rawA), ShiftLeft<5>(raw5), raw0);
+    VU16 packed1 = Xor3(ShiftLeft<10>(rawB), ShiftLeft<5>(raw6), raw1);
+    VU16 packed2 = Xor3(ShiftLeft<10>(rawC), ShiftLeft<5>(raw7), raw2);
+    VU16 packed3 = Xor3(ShiftLeft<10>(rawD), ShiftLeft<5>(raw8), raw3);
+    VU16 packed4 = Xor3(ShiftLeft<10>(rawE), ShiftLeft<5>(raw9), raw4);
+
+    // rawF will be scattered into the upper bits of these five.
+    const VU16 hi1 = Set(d, 0x8000u);
+    packed0 = Or(packed0, ShiftLeft<15>(rawF));  // MSB only, no mask
+    packed1 = OrAnd(packed1, ShiftLeft<14>(rawF), hi1);
+    packed2 = OrAnd(packed2, ShiftLeft<13>(rawF), hi1);
+    packed3 = OrAnd(packed3, ShiftLeft<12>(rawF), hi1);
+    packed4 = OrAnd(packed4, ShiftLeft<11>(rawF), hi1);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = LoadU(d, packed_in + 0 * N);
+    const VU16 packed1 = LoadU(d, packed_in + 1 * N);
+    const VU16 packed2 = LoadU(d, packed_in + 2 * N);
+    const VU16 packed3 = LoadU(d, packed_in + 3 * N);
+    const VU16 packed4 = LoadU(d, packed_in + 4 * N);
+
+    const VU16 mask = Set(d, 0x1Fu);  // Lowest 5 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(ShiftRight<5>(packed0), mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(ShiftRight<5>(packed1), mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(ShiftRight<5>(packed2), mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(ShiftRight<5>(packed3), mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(ShiftRight<5>(packed4), mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(ShiftRight<10>(packed0), mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(ShiftRight<10>(packed1), mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(ShiftRight<10>(packed2), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<10>(packed3), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(ShiftRight<10>(packed4), mask);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    // rawF is the concatenation of the lower bit of packed0..4.
+    const VU16 down0 = ShiftRight<15>(packed0);
+    const VU16 down1 = ShiftRight<15>(packed1);
+    const VU16 hi1 = Set(d, 0x8000u);
+    const VU16 p0 =
+        Xor3(ShiftRight<13>(And(packed2, hi1)), Add(down1, down1), down0);
+    const VU16 rawF = Xor3(ShiftRight<11>(And(packed4, hi1)),
+                           ShiftRight<12>(And(packed3, hi1)), p0);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<5>
+
+template <>
+struct Pack16<6> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    const VU16 packed3 = Or(ShiftLeft<6>(raw7), raw3);
+    const VU16 packed7 = Or(ShiftLeft<6>(rawF), rawB);
+    // Three vectors, two 6-bit raw each; packed3 (12 bits) is spread over the
+    // four remainder bits at the top of each vector.
+    const VU16 packed0 = Xor3(ShiftLeft<12>(packed3), ShiftLeft<6>(raw4), raw0);
+    VU16 packed1 = Or(ShiftLeft<6>(raw5), raw1);
+    VU16 packed2 = Or(ShiftLeft<6>(raw6), raw2);
+    const VU16 packed4 = Xor3(ShiftLeft<12>(packed7), ShiftLeft<6>(rawC), raw8);
+    VU16 packed5 = Or(ShiftLeft<6>(rawD), raw9);
+    VU16 packed6 = Or(ShiftLeft<6>(rawE), rawA);
+
+    const VU16 hi4 = Set(d, 0xF000u);
+    packed1 = OrAnd(packed1, ShiftLeft<8>(packed3), hi4);
+    packed2 = OrAnd(packed2, ShiftLeft<4>(packed3), hi4);
+    packed5 = OrAnd(packed5, ShiftLeft<8>(packed7), hi4);
+    packed6 = OrAnd(packed6, ShiftLeft<4>(packed7), hi4);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed4, d, packed_out + 3 * N);
+    StoreU(packed5, d, packed_out + 4 * N);
+    StoreU(packed6, d, packed_out + 5 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 mask = Set(d, 0x3Fu);  // Lowest 6 bits
+
+    const VU16 packed0 = LoadU(d, packed_in + 0 * N);
+    const VU16 packed1 = LoadU(d, packed_in + 1 * N);
+    const VU16 packed2 = LoadU(d, packed_in + 2 * N);
+    const VU16 packed4 = LoadU(d, packed_in + 3 * N);
+    const VU16 packed5 = LoadU(d, packed_in + 4 * N);
+    const VU16 packed6 = LoadU(d, packed_in + 5 * N);
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw4 = And(ShiftRight<6>(packed0), mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(ShiftRight<6>(packed1), mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(ShiftRight<6>(packed2), mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw8 = And(packed4, mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(packed5, mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(packed6, mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawC = And(ShiftRight<6>(packed4), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<6>(packed5), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(ShiftRight<6>(packed6), mask);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    // packed3 is the concatenation of the four upper bits in packed0..2.
+    const VU16 down0 = ShiftRight<12>(packed0);
+    const VU16 down4 = ShiftRight<12>(packed4);
+    const VU16 hi4 = Set(d, 0xF000u);
+    const VU16 packed3 = Xor3(ShiftRight<4>(And(packed2, hi4)),
+                              ShiftRight<8>(And(packed1, hi4)), down0);
+    const VU16 packed7 = Xor3(ShiftRight<4>(And(packed6, hi4)),
+                              ShiftRight<8>(And(packed5, hi4)), down4);
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 rawB = And(packed7, mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 raw7 = ShiftRight<6>(packed3);  // upper bits already zero
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 rawF = ShiftRight<6>(packed7);  // upper bits already zero
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<6>
+
+template <>
+struct Pack16<7> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    const VU16 packed7 = Or(ShiftLeft<7>(rawF), raw7);
+    // Seven vectors, two 7-bit raw each; packed7 (14 bits) is spread over the
+    // two remainder bits at the top of each vector.
+    const VU16 packed0 = Xor3(ShiftLeft<14>(packed7), ShiftLeft<7>(raw8), raw0);
+    VU16 packed1 = Or(ShiftLeft<7>(raw9), raw1);
+    VU16 packed2 = Or(ShiftLeft<7>(rawA), raw2);
+    VU16 packed3 = Or(ShiftLeft<7>(rawB), raw3);
+    VU16 packed4 = Or(ShiftLeft<7>(rawC), raw4);
+    VU16 packed5 = Or(ShiftLeft<7>(rawD), raw5);
+    VU16 packed6 = Or(ShiftLeft<7>(rawE), raw6);
+
+    const VU16 hi2 = Set(d, 0xC000u);
+    packed1 = OrAnd(packed1, ShiftLeft<12>(packed7), hi2);
+    packed2 = OrAnd(packed2, ShiftLeft<10>(packed7), hi2);
+    packed3 = OrAnd(packed3, ShiftLeft<8>(packed7), hi2);
+    packed4 = OrAnd(packed4, ShiftLeft<6>(packed7), hi2);
+    packed5 = OrAnd(packed5, ShiftLeft<4>(packed7), hi2);
+    packed6 = OrAnd(packed6, ShiftLeft<2>(packed7), hi2);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+
+    const VU16 mask = Set(d, 0x7Fu);  // Lowest 7 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw8 = And(ShiftRight<7>(packed0), mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(ShiftRight<7>(packed1), mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(ShiftRight<7>(packed2), mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(ShiftRight<7>(packed3), mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(ShiftRight<7>(packed4), mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(ShiftRight<7>(packed5), mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(ShiftRight<7>(packed6), mask);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    // packed7 is the concatenation of the two upper bits in packed0..6.
+    const VU16 down0 = ShiftRight<14>(packed0);
+    const VU16 hi2 = Set(d, 0xC000u);
+    const VU16 p0 = Xor3(ShiftRight<12>(And(packed1, hi2)),
+                         ShiftRight<10>(And(packed2, hi2)), down0);
+    const VU16 p1 = Xor3(ShiftRight<8>(And(packed3, hi2)),  //
+                         ShiftRight<6>(And(packed4, hi2)),
+                         ShiftRight<4>(And(packed5, hi2)));
+    const VU16 packed7 = Xor3(ShiftRight<2>(And(packed6, hi2)), p1, p0);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 rawF = ShiftRight<7>(packed7);  // upper bits already zero
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<7>
+
+template <>
+struct Pack16<8> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // This is equivalent to ConcatEven with 8-bit lanes, but much more
+    // efficient on RVV and slightly less efficient on SVE2.
+    const VU16 packed0 = Or(ShiftLeft<8>(raw2), raw0);
+    const VU16 packed1 = Or(ShiftLeft<8>(raw3), raw1);
+    const VU16 packed2 = Or(ShiftLeft<8>(raw6), raw4);
+    const VU16 packed3 = Or(ShiftLeft<8>(raw7), raw5);
+    const VU16 packed4 = Or(ShiftLeft<8>(rawA), raw8);
+    const VU16 packed5 = Or(ShiftLeft<8>(rawB), raw9);
+    const VU16 packed6 = Or(ShiftLeft<8>(rawE), rawC);
+    const VU16 packed7 = Or(ShiftLeft<8>(rawF), rawD);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 mask = Set(d, 0xFFu);  // Lowest 8 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = ShiftRight<8>(packed0);  // upper bits already zero
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = ShiftRight<8>(packed1);  // upper bits already zero
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed2, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed3, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = ShiftRight<8>(packed2);  // upper bits already zero
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = ShiftRight<8>(packed3);  // upper bits already zero
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(packed4, mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(packed5, mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = ShiftRight<8>(packed4);  // upper bits already zero
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = ShiftRight<8>(packed5);  // upper bits already zero
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(packed6, mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(packed7, mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = ShiftRight<8>(packed6);  // upper bits already zero
+    StoreU(rawE, d, raw + 0xE * N);
+
+    const VU16 rawF = ShiftRight<8>(packed7);  // upper bits already zero
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<8>
+
+template <>
+struct Pack16<9> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+    // 8 vectors, each with 9+7 bits; top 2 bits are concatenated into packed8.
+    const VU16 packed0 = Or(ShiftLeft<9>(raw8), raw0);
+    const VU16 packed1 = Or(ShiftLeft<9>(raw9), raw1);
+    const VU16 packed2 = Or(ShiftLeft<9>(rawA), raw2);
+    const VU16 packed3 = Or(ShiftLeft<9>(rawB), raw3);
+    const VU16 packed4 = Or(ShiftLeft<9>(rawC), raw4);
+    const VU16 packed5 = Or(ShiftLeft<9>(rawD), raw5);
+    const VU16 packed6 = Or(ShiftLeft<9>(rawE), raw6);
+    const VU16 packed7 = Or(ShiftLeft<9>(rawF), raw7);
+
+    // We could shift down, OR and shift up, but two shifts are typically more
+    // expensive than AND, shift into position, and OR (which can be further
+    // reduced via Xor3).
+    const VU16 mid2 = Set(d, 0x180u);  // top 2 in lower 9
+    const VU16 part8 = ShiftRight<7>(And(raw8, mid2));
+    const VU16 part9 = ShiftRight<5>(And(raw9, mid2));
+    const VU16 partA = ShiftRight<3>(And(rawA, mid2));
+    const VU16 partB = ShiftRight<1>(And(rawB, mid2));
+    const VU16 partC = ShiftLeft<1>(And(rawC, mid2));
+    const VU16 partD = ShiftLeft<3>(And(rawD, mid2));
+    const VU16 partE = ShiftLeft<5>(And(rawE, mid2));
+    const VU16 partF = ShiftLeft<7>(And(rawF, mid2));
+    const VU16 packed8 = Xor3(Xor3(part8, part9, partA),
+                              Xor3(partB, partC, partD), Or(partE, partF));
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+    StoreU(packed8, d, packed_out + 8 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+
+    const VU16 mask = Set(d, 0x1FFu);  // Lowest 9 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 mid2 = Set(d, 0x180u);  // top 2 in lower 9
+    const VU16 raw8 =
+        OrAnd(ShiftRight<9>(packed0), ShiftLeft<7>(packed8), mid2);
+    const VU16 raw9 =
+        OrAnd(ShiftRight<9>(packed1), ShiftLeft<5>(packed8), mid2);
+    const VU16 rawA =
+        OrAnd(ShiftRight<9>(packed2), ShiftLeft<3>(packed8), mid2);
+    const VU16 rawB =
+        OrAnd(ShiftRight<9>(packed3), ShiftLeft<1>(packed8), mid2);
+    const VU16 rawC =
+        OrAnd(ShiftRight<9>(packed4), ShiftRight<1>(packed8), mid2);
+    const VU16 rawD =
+        OrAnd(ShiftRight<9>(packed5), ShiftRight<3>(packed8), mid2);
+    const VU16 rawE =
+        OrAnd(ShiftRight<9>(packed6), ShiftRight<5>(packed8), mid2);
+    const VU16 rawF =
+        OrAnd(ShiftRight<9>(packed7), ShiftRight<7>(packed8), mid2);
+
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<9>
+
+template <>
+struct Pack16<10> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // 8 vectors, each with 10+6 bits; top 4 bits are concatenated into
+    // packed8 and packed9.
+    const VU16 packed0 = Or(ShiftLeft<10>(raw8), raw0);
+    const VU16 packed1 = Or(ShiftLeft<10>(raw9), raw1);
+    const VU16 packed2 = Or(ShiftLeft<10>(rawA), raw2);
+    const VU16 packed3 = Or(ShiftLeft<10>(rawB), raw3);
+    const VU16 packed4 = Or(ShiftLeft<10>(rawC), raw4);
+    const VU16 packed5 = Or(ShiftLeft<10>(rawD), raw5);
+    const VU16 packed6 = Or(ShiftLeft<10>(rawE), raw6);
+    const VU16 packed7 = Or(ShiftLeft<10>(rawF), raw7);
+
+    // We could shift down, OR and shift up, but two shifts are typically more
+    // expensive than AND, shift into position, and OR (which can be further
+    // reduced via Xor3).
+    const VU16 mid4 = Set(d, 0x3C0u);  // top 4 in lower 10
+    const VU16 part8 = ShiftRight<6>(And(raw8, mid4));
+    const VU16 part9 = ShiftRight<2>(And(raw9, mid4));
+    const VU16 partA = ShiftLeft<2>(And(rawA, mid4));
+    const VU16 partB = ShiftLeft<6>(And(rawB, mid4));
+    const VU16 partC = ShiftRight<6>(And(rawC, mid4));
+    const VU16 partD = ShiftRight<2>(And(rawD, mid4));
+    const VU16 partE = ShiftLeft<2>(And(rawE, mid4));
+    const VU16 partF = ShiftLeft<6>(And(rawF, mid4));
+    const VU16 packed8 = Or(Xor3(part8, part9, partA), partB);
+    const VU16 packed9 = Or(Xor3(partC, partD, partE), partF);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+
+    const VU16 mask = Set(d, 0x3FFu);  // Lowest 10 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 mid4 = Set(d, 0x3C0u);  // top 4 in lower 10
+    const VU16 raw8 =
+        OrAnd(ShiftRight<10>(packed0), ShiftLeft<6>(packed8), mid4);
+    const VU16 raw9 =
+        OrAnd(ShiftRight<10>(packed1), ShiftLeft<2>(packed8), mid4);
+    const VU16 rawA =
+        OrAnd(ShiftRight<10>(packed2), ShiftRight<2>(packed8), mid4);
+    const VU16 rawB =
+        OrAnd(ShiftRight<10>(packed3), ShiftRight<6>(packed8), mid4);
+    const VU16 rawC =
+        OrAnd(ShiftRight<10>(packed4), ShiftLeft<6>(packed9), mid4);
+    const VU16 rawD =
+        OrAnd(ShiftRight<10>(packed5), ShiftLeft<2>(packed9), mid4);
+    const VU16 rawE =
+        OrAnd(ShiftRight<10>(packed6), ShiftRight<2>(packed9), mid4);
+    const VU16 rawF =
+        OrAnd(ShiftRight<10>(packed7), ShiftRight<6>(packed9), mid4);
+
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<10>
+
+template <>
+struct Pack16<11> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // It is not obvious what the optimal partitioning looks like. To reduce the
+    // number of constants, we want to minimize the number of distinct bit
+    // lengths. 11+5 also requires 6-bit remnants with 4-bit leftovers.
+    // 8+3 seems better: it is easier to scatter 3 bits into the MSBs.
+    const VU16 lo8 = Set(d, 0xFFu);
+
+    // Lower 8 bits of all raw
+    const VU16 packed0 = OrAnd(ShiftLeft<8>(raw1), raw0, lo8);
+    const VU16 packed1 = OrAnd(ShiftLeft<8>(raw3), raw2, lo8);
+    const VU16 packed2 = OrAnd(ShiftLeft<8>(raw5), raw4, lo8);
+    const VU16 packed3 = OrAnd(ShiftLeft<8>(raw7), raw6, lo8);
+    const VU16 packed4 = OrAnd(ShiftLeft<8>(raw9), raw8, lo8);
+    const VU16 packed5 = OrAnd(ShiftLeft<8>(rawB), rawA, lo8);
+    const VU16 packed6 = OrAnd(ShiftLeft<8>(rawD), rawC, lo8);
+    const VU16 packed7 = OrAnd(ShiftLeft<8>(rawF), rawE, lo8);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+
+    // Three vectors, five 3bit remnants each, plus one 3bit in their MSB.
+    const VU16 top0 = ShiftRight<8>(raw0);
+    const VU16 top1 = ShiftRight<8>(raw1);
+    const VU16 top2 = ShiftRight<8>(raw2);
+    // Insert top raw bits into 3-bit groups within packed8..A. Moving the
+    // mask along avoids masking each of raw0..E and enables OrAnd.
+    VU16 next = Set(d, 0x38u);  // 0x7 << 3
+    VU16 packed8 = OrAnd(top0, ShiftRight<5>(raw3), next);
+    VU16 packed9 = OrAnd(top1, ShiftRight<5>(raw4), next);
+    VU16 packedA = OrAnd(top2, ShiftRight<5>(raw5), next);
+    next = ShiftLeft<3>(next);
+    packed8 = OrAnd(packed8, ShiftRight<2>(raw6), next);
+    packed9 = OrAnd(packed9, ShiftRight<2>(raw7), next);
+    packedA = OrAnd(packedA, ShiftRight<2>(raw8), next);
+    next = ShiftLeft<3>(next);
+    packed8 = OrAnd(packed8, Add(raw9, raw9), next);
+    packed9 = OrAnd(packed9, Add(rawA, rawA), next);
+    packedA = OrAnd(packedA, Add(rawB, rawB), next);
+    next = ShiftLeft<3>(next);
+    packed8 = OrAnd(packed8, ShiftLeft<4>(rawC), next);
+    packed9 = OrAnd(packed9, ShiftLeft<4>(rawD), next);
+    packedA = OrAnd(packedA, ShiftLeft<4>(rawE), next);
+
+    // Scatter upper 3 bits of rawF into the upper bits.
+    next = ShiftLeft<3>(next);  // = 0x8000u
+    packed8 = OrAnd(packed8, ShiftLeft<7>(rawF), next);
+    packed9 = OrAnd(packed9, ShiftLeft<6>(rawF), next);
+    packedA = OrAnd(packedA, ShiftLeft<5>(rawF), next);
+
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+    StoreU(packedA, d, packed_out + 0xA * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 packedA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+
+    const VU16 mask = Set(d, 0xFFu);  // Lowest 8 bits
+
+    const VU16 down0 = And(packed0, mask);
+    const VU16 down1 = ShiftRight<8>(packed0);
+    const VU16 down2 = And(packed1, mask);
+    const VU16 down3 = ShiftRight<8>(packed1);
+    const VU16 down4 = And(packed2, mask);
+    const VU16 down5 = ShiftRight<8>(packed2);
+    const VU16 down6 = And(packed3, mask);
+    const VU16 down7 = ShiftRight<8>(packed3);
+    const VU16 down8 = And(packed4, mask);
+    const VU16 down9 = ShiftRight<8>(packed4);
+    const VU16 downA = And(packed5, mask);
+    const VU16 downB = ShiftRight<8>(packed5);
+    const VU16 downC = And(packed6, mask);
+    const VU16 downD = ShiftRight<8>(packed6);
+    const VU16 downE = And(packed7, mask);
+    const VU16 downF = ShiftRight<8>(packed7);
+
+    // Three bits from packed8..A, eight bits from down0..F.
+    const VU16 hi3 = Set(d, 0x700u);
+    const VU16 raw0 = OrAnd(down0, ShiftLeft<8>(packed8), hi3);
+    const VU16 raw1 = OrAnd(down1, ShiftLeft<8>(packed9), hi3);
+    const VU16 raw2 = OrAnd(down2, ShiftLeft<8>(packedA), hi3);
+
+    const VU16 raw3 = OrAnd(down3, ShiftLeft<5>(packed8), hi3);
+    const VU16 raw4 = OrAnd(down4, ShiftLeft<5>(packed9), hi3);
+    const VU16 raw5 = OrAnd(down5, ShiftLeft<5>(packedA), hi3);
+
+    const VU16 raw6 = OrAnd(down6, ShiftLeft<2>(packed8), hi3);
+    const VU16 raw7 = OrAnd(down7, ShiftLeft<2>(packed9), hi3);
+    const VU16 raw8 = OrAnd(down8, ShiftLeft<2>(packedA), hi3);
+
+    const VU16 raw9 = OrAnd(down9, ShiftRight<1>(packed8), hi3);
+    const VU16 rawA = OrAnd(downA, ShiftRight<1>(packed9), hi3);
+    const VU16 rawB = OrAnd(downB, ShiftRight<1>(packedA), hi3);
+
+    const VU16 rawC = OrAnd(downC, ShiftRight<4>(packed8), hi3);
+    const VU16 rawD = OrAnd(downD, ShiftRight<4>(packed9), hi3);
+    const VU16 rawE = OrAnd(downE, ShiftRight<4>(packedA), hi3);
+
+    // Shift MSB into the top 3-of-11 and mask.
+    const VU16 rawF = Or(downF, Xor3(And(ShiftRight<7>(packed8), hi3),
+                                     And(ShiftRight<6>(packed9), hi3),
+                                     And(ShiftRight<5>(packedA), hi3)));
+
+    StoreU(raw0, d, raw + 0 * N);
+    StoreU(raw1, d, raw + 1 * N);
+    StoreU(raw2, d, raw + 2 * N);
+    StoreU(raw3, d, raw + 3 * N);
+    StoreU(raw4, d, raw + 4 * N);
+    StoreU(raw5, d, raw + 5 * N);
+    StoreU(raw6, d, raw + 6 * N);
+    StoreU(raw7, d, raw + 7 * N);
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<11>
+
+template <>
+struct Pack16<12> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // 8 vectors, each with 12+4 bits; top 8 bits are concatenated into
+    // packed8 to packedB.
+    const VU16 packed0 = Or(ShiftLeft<12>(raw8), raw0);
+    const VU16 packed1 = Or(ShiftLeft<12>(raw9), raw1);
+    const VU16 packed2 = Or(ShiftLeft<12>(rawA), raw2);
+    const VU16 packed3 = Or(ShiftLeft<12>(rawB), raw3);
+    const VU16 packed4 = Or(ShiftLeft<12>(rawC), raw4);
+    const VU16 packed5 = Or(ShiftLeft<12>(rawD), raw5);
+    const VU16 packed6 = Or(ShiftLeft<12>(rawE), raw6);
+    const VU16 packed7 = Or(ShiftLeft<12>(rawF), raw7);
+
+    // Masking after shifting left enables OrAnd.
+    const VU16 hi8 = Set(d, 0xFF00u);
+    const VU16 packed8 = OrAnd(ShiftRight<4>(raw8), ShiftLeft<4>(raw9), hi8);
+    const VU16 packed9 = OrAnd(ShiftRight<4>(rawA), ShiftLeft<4>(rawB), hi8);
+    const VU16 packedA = OrAnd(ShiftRight<4>(rawC), ShiftLeft<4>(rawD), hi8);
+    const VU16 packedB = OrAnd(ShiftRight<4>(rawE), ShiftLeft<4>(rawF), hi8);
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+    StoreU(packedA, d, packed_out + 0xA * N);
+    StoreU(packedB, d, packed_out + 0xB * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 packedA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+    const VU16 packedB = BitCast(d, LoadU(d, packed_in + 0xB * N));
+
+    const VU16 mask = Set(d, 0xFFFu);  // Lowest 12 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 mid8 = Set(d, 0xFF0u);  // upper 8 in lower 12
+    const VU16 raw8 =
+        OrAnd(ShiftRight<12>(packed0), ShiftLeft<4>(packed8), mid8);
+    const VU16 raw9 =
+        OrAnd(ShiftRight<12>(packed1), ShiftRight<4>(packed8), mid8);
+    const VU16 rawA =
+        OrAnd(ShiftRight<12>(packed2), ShiftLeft<4>(packed9), mid8);
+    const VU16 rawB =
+        OrAnd(ShiftRight<12>(packed3), ShiftRight<4>(packed9), mid8);
+    const VU16 rawC =
+        OrAnd(ShiftRight<12>(packed4), ShiftLeft<4>(packedA), mid8);
+    const VU16 rawD =
+        OrAnd(ShiftRight<12>(packed5), ShiftRight<4>(packedA), mid8);
+    const VU16 rawE =
+        OrAnd(ShiftRight<12>(packed6), ShiftLeft<4>(packedB), mid8);
+    const VU16 rawF =
+        OrAnd(ShiftRight<12>(packed7), ShiftRight<4>(packedB), mid8);
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<12>
+
+template <>
+struct Pack16<13> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // As with 11 bits, it is not obvious what the optimal partitioning looks
+    // like. We similarly go with an 8+5 split.
+    const VU16 lo8 = Set(d, 0xFFu);
+
+    // Lower 8 bits of all raw
+    const VU16 packed0 = OrAnd(ShiftLeft<8>(raw1), raw0, lo8);
+    const VU16 packed1 = OrAnd(ShiftLeft<8>(raw3), raw2, lo8);
+    const VU16 packed2 = OrAnd(ShiftLeft<8>(raw5), raw4, lo8);
+    const VU16 packed3 = OrAnd(ShiftLeft<8>(raw7), raw6, lo8);
+    const VU16 packed4 = OrAnd(ShiftLeft<8>(raw9), raw8, lo8);
+    const VU16 packed5 = OrAnd(ShiftLeft<8>(rawB), rawA, lo8);
+    const VU16 packed6 = OrAnd(ShiftLeft<8>(rawD), rawC, lo8);
+    const VU16 packed7 = OrAnd(ShiftLeft<8>(rawF), rawE, lo8);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+
+    // Five vectors, three 5bit remnants each, plus one 5bit in their MSB.
+    const VU16 top0 = ShiftRight<8>(raw0);
+    const VU16 top1 = ShiftRight<8>(raw1);
+    const VU16 top2 = ShiftRight<8>(raw2);
+    const VU16 top3 = ShiftRight<8>(raw3);
+    const VU16 top4 = ShiftRight<8>(raw4);
+
+    // Insert top raw bits into 5-bit groups within packed8..C. Moving the
+    // mask along avoids masking each of raw0..E and enables OrAnd.
+    VU16 next = Set(d, 0x3E0u);  // 0x1F << 5
+    VU16 packed8 = OrAnd(top0, ShiftRight<3>(raw5), next);
+    VU16 packed9 = OrAnd(top1, ShiftRight<3>(raw6), next);
+    VU16 packedA = OrAnd(top2, ShiftRight<3>(raw7), next);
+    VU16 packedB = OrAnd(top3, ShiftRight<3>(raw8), next);
+    VU16 packedC = OrAnd(top4, ShiftRight<3>(raw9), next);
+    next = ShiftLeft<5>(next);
+    packed8 = OrAnd(packed8, ShiftLeft<2>(rawA), next);
+    packed9 = OrAnd(packed9, ShiftLeft<2>(rawB), next);
+    packedA = OrAnd(packedA, ShiftLeft<2>(rawC), next);
+    packedB = OrAnd(packedB, ShiftLeft<2>(rawD), next);
+    packedC = OrAnd(packedC, ShiftLeft<2>(rawE), next);
+
+    // Scatter upper 5 bits of rawF into the upper bits.
+    next = ShiftLeft<3>(next);  // = 0x8000u
+    packed8 = OrAnd(packed8, ShiftLeft<7>(rawF), next);
+    packed9 = OrAnd(packed9, ShiftLeft<6>(rawF), next);
+    packedA = OrAnd(packedA, ShiftLeft<5>(rawF), next);
+    packedB = OrAnd(packedB, ShiftLeft<4>(rawF), next);
+    packedC = OrAnd(packedC, ShiftLeft<3>(rawF), next);
+
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+    StoreU(packedA, d, packed_out + 0xA * N);
+    StoreU(packedB, d, packed_out + 0xB * N);
+    StoreU(packedC, d, packed_out + 0xC * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 packedA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+    const VU16 packedB = BitCast(d, LoadU(d, packed_in + 0xB * N));
+    const VU16 packedC = BitCast(d, LoadU(d, packed_in + 0xC * N));
+
+    const VU16 mask = Set(d, 0xFFu);  // Lowest 8 bits
+
+    const VU16 down0 = And(packed0, mask);
+    const VU16 down1 = ShiftRight<8>(packed0);
+    const VU16 down2 = And(packed1, mask);
+    const VU16 down3 = ShiftRight<8>(packed1);
+    const VU16 down4 = And(packed2, mask);
+    const VU16 down5 = ShiftRight<8>(packed2);
+    const VU16 down6 = And(packed3, mask);
+    const VU16 down7 = ShiftRight<8>(packed3);
+    const VU16 down8 = And(packed4, mask);
+    const VU16 down9 = ShiftRight<8>(packed4);
+    const VU16 downA = And(packed5, mask);
+    const VU16 downB = ShiftRight<8>(packed5);
+    const VU16 downC = And(packed6, mask);
+    const VU16 downD = ShiftRight<8>(packed6);
+    const VU16 downE = And(packed7, mask);
+    const VU16 downF = ShiftRight<8>(packed7);
+
+    // Upper five bits from packed8..C, eight bits from down0..F.
+    const VU16 hi5 = Set(d, 0x1F00u);
+    const VU16 raw0 = OrAnd(down0, ShiftLeft<8>(packed8), hi5);
+    const VU16 raw1 = OrAnd(down1, ShiftLeft<8>(packed9), hi5);
+    const VU16 raw2 = OrAnd(down2, ShiftLeft<8>(packedA), hi5);
+    const VU16 raw3 = OrAnd(down3, ShiftLeft<8>(packedB), hi5);
+    const VU16 raw4 = OrAnd(down4, ShiftLeft<8>(packedC), hi5);
+
+    const VU16 raw5 = OrAnd(down5, ShiftLeft<3>(packed8), hi5);
+    const VU16 raw6 = OrAnd(down6, ShiftLeft<3>(packed9), hi5);
+    const VU16 raw7 = OrAnd(down7, ShiftLeft<3>(packedA), hi5);
+    const VU16 raw8 = OrAnd(down8, ShiftLeft<3>(packed9), hi5);
+    const VU16 raw9 = OrAnd(down9, ShiftLeft<3>(packedA), hi5);
+
+    const VU16 rawA = OrAnd(downA, ShiftRight<2>(packed8), hi5);
+    const VU16 rawB = OrAnd(downB, ShiftRight<2>(packed9), hi5);
+    const VU16 rawC = OrAnd(downC, ShiftRight<2>(packedA), hi5);
+    const VU16 rawD = OrAnd(downD, ShiftRight<2>(packed9), hi5);
+    const VU16 rawE = OrAnd(downE, ShiftRight<2>(packedA), hi5);
+
+    // Shift MSB into the top 5-of-11 and mask.
+    const VU16 p0 = Xor3(And(ShiftRight<7>(packed8), hi5),  //
+                         And(ShiftRight<6>(packed9), hi5),
+                         And(ShiftRight<5>(packedA), hi5));
+    const VU16 p1 = Xor3(And(ShiftRight<4>(packedB), hi5),
+                         And(ShiftRight<3>(packedC), hi5), downF);
+    const VU16 rawF = Or(p0, p1);
+
+    StoreU(raw0, d, raw + 0 * N);
+    StoreU(raw1, d, raw + 1 * N);
+    StoreU(raw2, d, raw + 2 * N);
+    StoreU(raw3, d, raw + 3 * N);
+    StoreU(raw4, d, raw + 4 * N);
+    StoreU(raw5, d, raw + 5 * N);
+    StoreU(raw6, d, raw + 6 * N);
+    StoreU(raw7, d, raw + 7 * N);
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<13>
+
+template <>
+struct Pack16<14> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // 14 vectors, each with 14+2 bits; two raw vectors are scattered
+    // across the upper 2 bits.
+    const VU16 hi2 = Set(d, 0xC000u);
+    const VU16 packed0 = Or(raw0, ShiftLeft<14>(rawE));
+    const VU16 packed1 = OrAnd(raw1, ShiftLeft<12>(rawE), hi2);
+    const VU16 packed2 = OrAnd(raw2, ShiftLeft<10>(rawE), hi2);
+    const VU16 packed3 = OrAnd(raw3, ShiftLeft<8>(rawE), hi2);
+    const VU16 packed4 = OrAnd(raw4, ShiftLeft<6>(rawE), hi2);
+    const VU16 packed5 = OrAnd(raw5, ShiftLeft<4>(rawE), hi2);
+    const VU16 packed6 = OrAnd(raw6, ShiftLeft<2>(rawE), hi2);
+    const VU16 packed7 = Or(raw7, ShiftLeft<14>(rawF));
+    const VU16 packed8 = OrAnd(raw8, ShiftLeft<12>(rawF), hi2);
+    const VU16 packed9 = OrAnd(raw9, ShiftLeft<10>(rawF), hi2);
+    const VU16 packedA = OrAnd(rawA, ShiftLeft<8>(rawF), hi2);
+    const VU16 packedB = OrAnd(rawB, ShiftLeft<6>(rawF), hi2);
+    const VU16 packedC = OrAnd(rawC, ShiftLeft<4>(rawF), hi2);
+    const VU16 packedD = OrAnd(rawD, ShiftLeft<2>(rawF), hi2);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+    StoreU(packedA, d, packed_out + 0xA * N);
+    StoreU(packedB, d, packed_out + 0xB * N);
+    StoreU(packedC, d, packed_out + 0xC * N);
+    StoreU(packedD, d, packed_out + 0xD * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 packedA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+    const VU16 packedB = BitCast(d, LoadU(d, packed_in + 0xB * N));
+    const VU16 packedC = BitCast(d, LoadU(d, packed_in + 0xC * N));
+    const VU16 packedD = BitCast(d, LoadU(d, packed_in + 0xD * N));
+
+    const VU16 mask = Set(d, 0x3FFFu);  // Lowest 14 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(packed8, mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(packed9, mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(packedA, mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(packedB, mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(packedC, mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(packedD, mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    // rawE is the concatenation of the top two bits in packed0..6.
+    const VU16 E0 = Xor3(ShiftRight<14>(packed0),  //
+                         ShiftRight<12>(AndNot(mask, packed1)),
+                         ShiftRight<10>(AndNot(mask, packed2)));
+    const VU16 E1 = Xor3(ShiftRight<8>(AndNot(mask, packed3)),
+                         ShiftRight<6>(AndNot(mask, packed4)),
+                         ShiftRight<4>(AndNot(mask, packed5)));
+    const VU16 rawE = Xor3(ShiftRight<2>(AndNot(mask, packed6)), E0, E1);
+    const VU16 F0 = Xor3(ShiftRight<14>(AndNot(mask, packed7)),
+                         ShiftRight<12>(AndNot(mask, packed8)),
+                         ShiftRight<10>(AndNot(mask, packed9)));
+    const VU16 F1 = Xor3(ShiftRight<8>(AndNot(mask, packedA)),
+                         ShiftRight<6>(AndNot(mask, packedB)),
+                         ShiftRight<4>(AndNot(mask, packedC)));
+    const VU16 rawF = Xor3(ShiftRight<2>(AndNot(mask, packedD)), F0, F1);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<14>
+
+template <>
+struct Pack16<15> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    // 15 vectors, each with 15+1 bits; one packed vector is scattered
+    // across the upper bit.
+    const VU16 hi1 = Set(d, 0x8000u);
+    const VU16 packed0 = Or(raw0, ShiftLeft<15>(rawF));
+    const VU16 packed1 = OrAnd(raw1, ShiftLeft<14>(rawF), hi1);
+    const VU16 packed2 = OrAnd(raw2, ShiftLeft<13>(rawF), hi1);
+    const VU16 packed3 = OrAnd(raw3, ShiftLeft<12>(rawF), hi1);
+    const VU16 packed4 = OrAnd(raw4, ShiftLeft<11>(rawF), hi1);
+    const VU16 packed5 = OrAnd(raw5, ShiftLeft<10>(rawF), hi1);
+    const VU16 packed6 = OrAnd(raw6, ShiftLeft<9>(rawF), hi1);
+    const VU16 packed7 = OrAnd(raw7, ShiftLeft<8>(rawF), hi1);
+    const VU16 packed8 = OrAnd(raw8, ShiftLeft<7>(rawF), hi1);
+    const VU16 packed9 = OrAnd(raw9, ShiftLeft<6>(rawF), hi1);
+    const VU16 packedA = OrAnd(rawA, ShiftLeft<5>(rawF), hi1);
+    const VU16 packedB = OrAnd(rawB, ShiftLeft<4>(rawF), hi1);
+    const VU16 packedC = OrAnd(rawC, ShiftLeft<3>(rawF), hi1);
+    const VU16 packedD = OrAnd(rawD, ShiftLeft<2>(rawF), hi1);
+    const VU16 packedE = OrAnd(rawE, ShiftLeft<1>(rawF), hi1);
+
+    StoreU(packed0, d, packed_out + 0 * N);
+    StoreU(packed1, d, packed_out + 1 * N);
+    StoreU(packed2, d, packed_out + 2 * N);
+    StoreU(packed3, d, packed_out + 3 * N);
+    StoreU(packed4, d, packed_out + 4 * N);
+    StoreU(packed5, d, packed_out + 5 * N);
+    StoreU(packed6, d, packed_out + 6 * N);
+    StoreU(packed7, d, packed_out + 7 * N);
+    StoreU(packed8, d, packed_out + 8 * N);
+    StoreU(packed9, d, packed_out + 9 * N);
+    StoreU(packedA, d, packed_out + 0xA * N);
+    StoreU(packedB, d, packed_out + 0xB * N);
+    StoreU(packedC, d, packed_out + 0xC * N);
+    StoreU(packedD, d, packed_out + 0xD * N);
+    StoreU(packedE, d, packed_out + 0xE * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 packed0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 packed1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 packed2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 packed3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 packed4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 packed5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 packed6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 packed7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 packed8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 packed9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 packedA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+    const VU16 packedB = BitCast(d, LoadU(d, packed_in + 0xB * N));
+    const VU16 packedC = BitCast(d, LoadU(d, packed_in + 0xC * N));
+    const VU16 packedD = BitCast(d, LoadU(d, packed_in + 0xD * N));
+    const VU16 packedE = BitCast(d, LoadU(d, packed_in + 0xE * N));
+
+    const VU16 mask = Set(d, 0x7FFFu);  // Lowest 15 bits
+
+    const VU16 raw0 = And(packed0, mask);
+    StoreU(raw0, d, raw + 0 * N);
+
+    const VU16 raw1 = And(packed1, mask);
+    StoreU(raw1, d, raw + 1 * N);
+
+    const VU16 raw2 = And(packed2, mask);
+    StoreU(raw2, d, raw + 2 * N);
+
+    const VU16 raw3 = And(packed3, mask);
+    StoreU(raw3, d, raw + 3 * N);
+
+    const VU16 raw4 = And(packed4, mask);
+    StoreU(raw4, d, raw + 4 * N);
+
+    const VU16 raw5 = And(packed5, mask);
+    StoreU(raw5, d, raw + 5 * N);
+
+    const VU16 raw6 = And(packed6, mask);
+    StoreU(raw6, d, raw + 6 * N);
+
+    const VU16 raw7 = And(packed7, mask);
+    StoreU(raw7, d, raw + 7 * N);
+
+    const VU16 raw8 = And(packed8, mask);
+    StoreU(raw8, d, raw + 8 * N);
+
+    const VU16 raw9 = And(packed9, mask);
+    StoreU(raw9, d, raw + 9 * N);
+
+    const VU16 rawA = And(packedA, mask);
+    StoreU(rawA, d, raw + 0xA * N);
+
+    const VU16 rawB = And(packedB, mask);
+    StoreU(rawB, d, raw + 0xB * N);
+
+    const VU16 rawC = And(packedC, mask);
+    StoreU(rawC, d, raw + 0xC * N);
+
+    const VU16 rawD = And(packedD, mask);
+    StoreU(rawD, d, raw + 0xD * N);
+
+    const VU16 rawE = And(packedE, mask);
+    StoreU(rawE, d, raw + 0xE * N);
+
+    // rawF is the concatenation of the top bit in packed0..E.
+    const VU16 F0 = Xor3(ShiftRight<15>(packed0),  //
+                         ShiftRight<14>(AndNot(mask, packed1)),
+                         ShiftRight<13>(AndNot(mask, packed2)));
+    const VU16 F1 = Xor3(ShiftRight<12>(AndNot(mask, packed3)),
+                         ShiftRight<11>(AndNot(mask, packed4)),
+                         ShiftRight<10>(AndNot(mask, packed5)));
+    const VU16 F2 = Xor3(ShiftRight<9>(AndNot(mask, packed6)),
+                         ShiftRight<8>(AndNot(mask, packed7)),
+                         ShiftRight<7>(AndNot(mask, packed8)));
+    const VU16 F3 = Xor3(ShiftRight<6>(AndNot(mask, packed9)),
+                         ShiftRight<5>(AndNot(mask, packedA)),
+                         ShiftRight<4>(AndNot(mask, packedB)));
+    const VU16 F4 = Xor3(ShiftRight<3>(AndNot(mask, packedC)),
+                         ShiftRight<2>(AndNot(mask, packedD)),
+                         ShiftRight<1>(AndNot(mask, packedE)));
+    const VU16 rawF = Xor3(F0, F1, Xor3(F2, F3, F4));
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<15>
+
+template <>
+struct Pack16<16> {
+  template <class D>
+  HWY_INLINE void Pack(D d, const uint16_t* HWY_RESTRICT raw,
+                       uint16_t* HWY_RESTRICT packed_out) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+    const VU16 raw0 = LoadU(d, raw + 0 * N);
+    const VU16 raw1 = LoadU(d, raw + 1 * N);
+    const VU16 raw2 = LoadU(d, raw + 2 * N);
+    const VU16 raw3 = LoadU(d, raw + 3 * N);
+    const VU16 raw4 = LoadU(d, raw + 4 * N);
+    const VU16 raw5 = LoadU(d, raw + 5 * N);
+    const VU16 raw6 = LoadU(d, raw + 6 * N);
+    const VU16 raw7 = LoadU(d, raw + 7 * N);
+    const VU16 raw8 = LoadU(d, raw + 8 * N);
+    const VU16 raw9 = LoadU(d, raw + 9 * N);
+    const VU16 rawA = LoadU(d, raw + 0xA * N);
+    const VU16 rawB = LoadU(d, raw + 0xB * N);
+    const VU16 rawC = LoadU(d, raw + 0xC * N);
+    const VU16 rawD = LoadU(d, raw + 0xD * N);
+    const VU16 rawE = LoadU(d, raw + 0xE * N);
+    const VU16 rawF = LoadU(d, raw + 0xF * N);
+
+    StoreU(raw0, d, packed_out + 0 * N);
+    StoreU(raw1, d, packed_out + 1 * N);
+    StoreU(raw2, d, packed_out + 2 * N);
+    StoreU(raw3, d, packed_out + 3 * N);
+    StoreU(raw4, d, packed_out + 4 * N);
+    StoreU(raw5, d, packed_out + 5 * N);
+    StoreU(raw6, d, packed_out + 6 * N);
+    StoreU(raw7, d, packed_out + 7 * N);
+    StoreU(raw8, d, packed_out + 8 * N);
+    StoreU(raw9, d, packed_out + 9 * N);
+    StoreU(rawA, d, packed_out + 0xA * N);
+    StoreU(rawB, d, packed_out + 0xB * N);
+    StoreU(rawC, d, packed_out + 0xC * N);
+    StoreU(rawD, d, packed_out + 0xD * N);
+    StoreU(rawE, d, packed_out + 0xE * N);
+    StoreU(rawF, d, packed_out + 0xF * N);
+  }
+
+  template <class D>
+  HWY_INLINE void Unpack(D d, const uint16_t* HWY_RESTRICT packed_in,
+                         uint16_t* HWY_RESTRICT raw) const {
+    using VU16 = Vec<decltype(d)>;
+    const size_t N = Lanes(d);
+
+    const VU16 raw0 = BitCast(d, LoadU(d, packed_in + 0 * N));
+    const VU16 raw1 = BitCast(d, LoadU(d, packed_in + 1 * N));
+    const VU16 raw2 = BitCast(d, LoadU(d, packed_in + 2 * N));
+    const VU16 raw3 = BitCast(d, LoadU(d, packed_in + 3 * N));
+    const VU16 raw4 = BitCast(d, LoadU(d, packed_in + 4 * N));
+    const VU16 raw5 = BitCast(d, LoadU(d, packed_in + 5 * N));
+    const VU16 raw6 = BitCast(d, LoadU(d, packed_in + 6 * N));
+    const VU16 raw7 = BitCast(d, LoadU(d, packed_in + 7 * N));
+    const VU16 raw8 = BitCast(d, LoadU(d, packed_in + 8 * N));
+    const VU16 raw9 = BitCast(d, LoadU(d, packed_in + 9 * N));
+    const VU16 rawA = BitCast(d, LoadU(d, packed_in + 0xA * N));
+    const VU16 rawB = BitCast(d, LoadU(d, packed_in + 0xB * N));
+    const VU16 rawC = BitCast(d, LoadU(d, packed_in + 0xC * N));
+    const VU16 rawD = BitCast(d, LoadU(d, packed_in + 0xD * N));
+    const VU16 rawE = BitCast(d, LoadU(d, packed_in + 0xE * N));
+    const VU16 rawF = BitCast(d, LoadU(d, packed_in + 0xF * N));
+
+    StoreU(raw0, d, raw + 0 * N);
+    StoreU(raw1, d, raw + 1 * N);
+    StoreU(raw2, d, raw + 2 * N);
+    StoreU(raw3, d, raw + 3 * N);
+    StoreU(raw4, d, raw + 4 * N);
+    StoreU(raw5, d, raw + 5 * N);
+    StoreU(raw6, d, raw + 6 * N);
+    StoreU(raw7, d, raw + 7 * N);
+    StoreU(raw8, d, raw + 8 * N);
+    StoreU(raw9, d, raw + 9 * N);
+    StoreU(rawA, d, raw + 0xA * N);
+    StoreU(rawB, d, raw + 0xB * N);
+    StoreU(rawC, d, raw + 0xC * N);
+    StoreU(rawD, d, raw + 0xD * N);
+    StoreU(rawE, d, raw + 0xE * N);
+    StoreU(rawF, d, raw + 0xF * N);
+  }
+};  // Pack16<16>
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_BIT_PACK_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack_test.cc
new file mode 100644
index 0000000000..4994e1489a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/bit_pack/bit_pack_test.cc
@@ -0,0 +1,206 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <vector>
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+#include "hwy/nanobenchmark.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/bit_pack/bit_pack_test.cc"  // NOLINT
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/bit_pack/bit_pack-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+#ifndef HWY_BIT_PACK_BENCHMARK
+#define HWY_BIT_PACK_BENCHMARK 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+// Used to prevent running benchmark (slow) for partial vectors and targets
+// except the best available. Global, not per-target, hence must be outside
+// HWY_NAMESPACE. Declare first because HWY_ONCE is only true after some code
+// has been re-included.
+extern size_t last_bits;
+extern uint64_t best_target;
+#if HWY_ONCE
+size_t last_bits = 0;
+uint64_t best_target = ~0ull;
+#endif
+namespace HWY_NAMESPACE {
+
+template <size_t kBits, typename T>
+T Random(RandomState& rng) {
+  return static_cast<T>(Random32(&rng) & kBits);
+}
+
+template <typename T>
+class Checker {
+ public:
+  explicit Checker(size_t num) { raw_.reserve(num); }
+  void NotifyRaw(T raw) { raw_.push_back(raw); }
+
+  void NotifyRawOutput(size_t bits, T raw) {
+    if (raw_[num_verified_] != raw) {
+      HWY_ABORT("%zu bits: pos %zu of %zu, expected %.0f actual %.0f\n", bits,
+                num_verified_, raw_.size(),
+                static_cast<double>(raw_[num_verified_]),
+                static_cast<double>(raw));
+    }
+    ++num_verified_;
+  }
+
+ private:
+  std::vector<T> raw_;
+  size_t num_verified_ = 0;
+};
+
+template <template <size_t> class PackT, size_t kVectors, size_t kBits>
+struct TestPack {
+  template <typename T, class D>
+  void operator()(T /* t */, D d) {
+    constexpr size_t kLoops = 16;  // working set slightly larger than L1
+    const size_t N = Lanes(d);
+    RandomState rng(N * 129);
+    static_assert(kBits <= kVectors, "");
+    const size_t num_per_loop = N * kVectors;
+    const size_t num = num_per_loop * kLoops;
+    const size_t num_packed_per_loop = N * kBits;
+    const size_t num_packed = num_packed_per_loop * kLoops;
+    Checker<T> checker(num);
+    AlignedFreeUniquePtr<T[]> raw = hwy::AllocateAligned<T>(num);
+    AlignedFreeUniquePtr<T[]> raw2 = hwy::AllocateAligned<T>(num);
+    AlignedFreeUniquePtr<T[]> packed = hwy::AllocateAligned<T>(num_packed);
+    HWY_ASSERT(raw && raw2 && packed);
+
+    for (size_t i = 0; i < num; ++i) {
+      raw[i] = Random<kBits, T>(rng);
+      checker.NotifyRaw(raw[i]);
+    }
+
+    best_target = HWY_MIN(best_target, HWY_TARGET);
+    const bool run_bench = HWY_BIT_PACK_BENCHMARK && (kBits != last_bits) &&
+                           (HWY_TARGET == best_target);
+    last_bits = kBits;
+
+    const PackT<kBits> func;
+
+    if (run_bench) {
+      const size_t kNumInputs = 1;
+      const size_t num_items = num * size_t(Unpredictable1());
+      const FuncInput inputs[kNumInputs] = {num_items};
+      Result results[kNumInputs];
+
+      Params p;
+      p.verbose = false;
+      p.max_evals = 7;
+      p.target_rel_mad = 0.002;
+      const size_t num_results = MeasureClosure(
+          [&](FuncInput) HWY_ATTR {
+            for (size_t i = 0, pi = 0; i < num;
+                 i += num_per_loop, pi += num_packed_per_loop) {
+              func.Pack(d, raw.get() + i, packed.get() + pi);
+            }
+            packed.get()[Random32(&rng) % num_packed] += Unpredictable1() - 1;
+            for (size_t i = 0, pi = 0; i < num;
+                 i += num_per_loop, pi += num_packed_per_loop) {
+              func.Unpack(d, packed.get() + pi, raw2.get() + i);
+            }
+            return raw2[Random32(&rng) % num];
+          },
+          inputs, kNumInputs, results, p);
+      if (num_results != kNumInputs) {
+        fprintf(stderr, "MeasureClosure failed.\n");
+        return;
+      }
+      // Print throughput for pack+unpack round trip
+      for (size_t i = 0; i < num_results; ++i) {
+        const size_t bytes_per_element = (kBits + 7) / 8;
+        const double bytes = results[i].input * bytes_per_element;
+        const double seconds =
+            results[i].ticks / platform::InvariantTicksPerSecond();
+        printf("Bits:%2d elements:%3d GB/s:%4.1f (+/-%3.1f%%)\n",
+               static_cast<int>(kBits), static_cast<int>(results[i].input),
+               1E-9 * bytes / seconds, results[i].variability * 100.0);
+      }
+    } else {
+      for (size_t i = 0, pi = 0; i < num;
+           i += num_per_loop, pi += num_packed_per_loop) {
+        func.Pack(d, raw.get() + i, packed.get() + pi);
+      }
+      packed.get()[Random32(&rng) % num_packed] += Unpredictable1() - 1;
+      for (size_t i = 0, pi = 0; i < num;
+           i += num_per_loop, pi += num_packed_per_loop) {
+        func.Unpack(d, packed.get() + pi, raw2.get() + i);
+      }
+    }
+
+    for (size_t i = 0; i < num; ++i) {
+      checker.NotifyRawOutput(kBits, raw2[i]);
+    }
+  }
+};
+
+void TestAllPack8() {
+  ForShrinkableVectors<TestPack<Pack8, 8, 1>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 2>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 3>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 4>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 5>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 6>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 7>>()(uint8_t());
+  ForShrinkableVectors<TestPack<Pack8, 8, 8>>()(uint8_t());
+}
+
+void TestAllPack16() {
+  ForShrinkableVectors<TestPack<Pack16, 16, 1>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 2>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 3>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 4>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 5>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 6>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 7>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 8>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 9>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 10>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 11>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 12>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 13>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 14>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 15>>()(uint16_t());
+  ForShrinkableVectors<TestPack<Pack16, 16, 16>>()(uint16_t());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(BitPackTest);
+HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack8);
+HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack16);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot-inl.h
new file mode 100644
index 0000000000..edaf3781ad
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot-inl.h
@@ -0,0 +1,251 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Include guard (still compiled once per target)
+#include <cmath>
+
+#if defined(HIGHWAY_HWY_CONTRIB_DOT_DOT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_DOT_DOT_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_DOT_DOT_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_DOT_DOT_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct Dot {
+  // Specify zero or more of these, ORed together, as the kAssumptions template
+  // argument to Compute. Each one may improve performance or reduce code size,
+  // at the cost of additional requirements on the arguments.
+  enum Assumptions {
+    // num_elements is at least N, which may be up to HWY_MAX_BYTES / sizeof(T).
+    kAtLeastOneVector = 1,
+    // num_elements is divisible by N (a power of two, so this can be used if
+    // the problem size is known to be a power of two >= HWY_MAX_BYTES /
+    // sizeof(T)).
+    kMultipleOfVector = 2,
+    // RoundUpTo(num_elements, N) elements are accessible; their value does not
+    // matter (will be treated as if they were zero).
+    kPaddedToVector = 4,
+  };
+
+  // Returns sum{pa[i] * pb[i]} for floating-point inputs, including float16_t
+  // and double if HWY_HAVE_FLOAT16/64. Aligning the
+  // pointers to a multiple of N elements is helpful but not required.
+  template <int kAssumptions, class D, typename T = TFromD<D>>
+  static HWY_INLINE T Compute(const D d, const T* const HWY_RESTRICT pa,
+                              const T* const HWY_RESTRICT pb,
+                              const size_t num_elements) {
+    static_assert(IsFloat<T>(), "MulAdd requires float type");
+    using V = decltype(Zero(d));
+
+    const size_t N = Lanes(d);
+    size_t i = 0;
+
+    constexpr bool kIsAtLeastOneVector =
+        (kAssumptions & kAtLeastOneVector) != 0;
+    constexpr bool kIsMultipleOfVector =
+        (kAssumptions & kMultipleOfVector) != 0;
+    constexpr bool kIsPaddedToVector = (kAssumptions & kPaddedToVector) != 0;
+
+    // Won't be able to do a full vector load without padding => scalar loop.
+    if (!kIsAtLeastOneVector && !kIsMultipleOfVector && !kIsPaddedToVector &&
+        HWY_UNLIKELY(num_elements < N)) {
+      // Only 2x unroll to avoid excessive code size.
+      T sum0 = T(0);
+      T sum1 = T(0);
+      for (; i + 2 <= num_elements; i += 2) {
+        sum0 += pa[i + 0] * pb[i + 0];
+        sum1 += pa[i + 1] * pb[i + 1];
+      }
+      if (i < num_elements) {
+        sum1 += pa[i] * pb[i];
+      }
+      return sum0 + sum1;
+    }
+
+    // Compiler doesn't make independent sum* accumulators, so unroll manually.
+    // 2 FMA ports * 4 cycle latency = up to 8 in-flight, but that is excessive
+    // for unaligned inputs (each unaligned pointer halves the throughput
+    // because it occupies both L1 load ports for a cycle). We cannot have
+    // arrays of vectors on RVV/SVE, so always unroll 4x.
+    V sum0 = Zero(d);
+    V sum1 = Zero(d);
+    V sum2 = Zero(d);
+    V sum3 = Zero(d);
+
+    // Main loop: unrolled
+    for (; i + 4 * N <= num_elements; /* i += 4 * N */) {  // incr in loop
+      const auto a0 = LoadU(d, pa + i);
+      const auto b0 = LoadU(d, pb + i);
+      i += N;
+      sum0 = MulAdd(a0, b0, sum0);
+      const auto a1 = LoadU(d, pa + i);
+      const auto b1 = LoadU(d, pb + i);
+      i += N;
+      sum1 = MulAdd(a1, b1, sum1);
+      const auto a2 = LoadU(d, pa + i);
+      const auto b2 = LoadU(d, pb + i);
+      i += N;
+      sum2 = MulAdd(a2, b2, sum2);
+      const auto a3 = LoadU(d, pa + i);
+      const auto b3 = LoadU(d, pb + i);
+      i += N;
+      sum3 = MulAdd(a3, b3, sum3);
+    }
+
+    // Up to 3 iterations of whole vectors
+    for (; i + N <= num_elements; i += N) {
+      const auto a = LoadU(d, pa + i);
+      const auto b = LoadU(d, pb + i);
+      sum0 = MulAdd(a, b, sum0);
+    }
+
+    if (!kIsMultipleOfVector) {
+      const size_t remaining = num_elements - i;
+      if (remaining != 0) {
+        if (kIsPaddedToVector) {
+          const auto mask = FirstN(d, remaining);
+          const auto a = LoadU(d, pa + i);
+          const auto b = LoadU(d, pb + i);
+          sum1 = MulAdd(IfThenElseZero(mask, a), IfThenElseZero(mask, b), sum1);
+        } else {
+          // Unaligned load such that the last element is in the highest lane -
+          // ensures we do not touch any elements outside the valid range.
+          // If we get here, then num_elements >= N.
+          HWY_DASSERT(i >= N);
+          i += remaining - N;
+          const auto skip = FirstN(d, N - remaining);
+          const auto a = LoadU(d, pa + i);  // always unaligned
+          const auto b = LoadU(d, pb + i);
+          sum1 = MulAdd(IfThenZeroElse(skip, a), IfThenZeroElse(skip, b), sum1);
+        }
+      }
+    }  // kMultipleOfVector
+
+    // Reduction tree: sum of all accumulators by pairs, then across lanes.
+    sum0 = Add(sum0, sum1);
+    sum2 = Add(sum2, sum3);
+    sum0 = Add(sum0, sum2);
+    return ReduceSum(d, sum0);
+  }
+
+  // Returns sum{pa[i] * pb[i]} for bfloat16 inputs. Aligning the pointers to a
+  // multiple of N elements is helpful but not required.
+  template <int kAssumptions, class D>
+  static HWY_INLINE float Compute(const D d,
+                                  const bfloat16_t* const HWY_RESTRICT pa,
+                                  const bfloat16_t* const HWY_RESTRICT pb,
+                                  const size_t num_elements) {
+    const RebindToUnsigned<D> du16;
+    const Repartition<float, D> df32;
+
+    using V = decltype(Zero(df32));
+    const size_t N = Lanes(d);
+    size_t i = 0;
+
+    constexpr bool kIsAtLeastOneVector =
+        (kAssumptions & kAtLeastOneVector) != 0;
+    constexpr bool kIsMultipleOfVector =
+        (kAssumptions & kMultipleOfVector) != 0;
+    constexpr bool kIsPaddedToVector = (kAssumptions & kPaddedToVector) != 0;
+
+    // Won't be able to do a full vector load without padding => scalar loop.
+    if (!kIsAtLeastOneVector && !kIsMultipleOfVector && !kIsPaddedToVector &&
+        HWY_UNLIKELY(num_elements < N)) {
+      float sum0 = 0.0f;  // Only 2x unroll to avoid excessive code size for..
+      float sum1 = 0.0f;  // this unlikely(?) case.
+      for (; i + 2 <= num_elements; i += 2) {
+        sum0 += F32FromBF16(pa[i + 0]) * F32FromBF16(pb[i + 0]);
+        sum1 += F32FromBF16(pa[i + 1]) * F32FromBF16(pb[i + 1]);
+      }
+      if (i < num_elements) {
+        sum1 += F32FromBF16(pa[i]) * F32FromBF16(pb[i]);
+      }
+      return sum0 + sum1;
+    }
+
+    // See comment in the other Compute() overload. Unroll 2x, but we need
+    // twice as many sums for ReorderWidenMulAccumulate.
+    V sum0 = Zero(df32);
+    V sum1 = Zero(df32);
+    V sum2 = Zero(df32);
+    V sum3 = Zero(df32);
+
+    // Main loop: unrolled
+    for (; i + 2 * N <= num_elements; /* i += 2 * N */) {  // incr in loop
+      const auto a0 = LoadU(d, pa + i);
+      const auto b0 = LoadU(d, pb + i);
+      i += N;
+      sum0 = ReorderWidenMulAccumulate(df32, a0, b0, sum0, sum1);
+      const auto a1 = LoadU(d, pa + i);
+      const auto b1 = LoadU(d, pb + i);
+      i += N;
+      sum2 = ReorderWidenMulAccumulate(df32, a1, b1, sum2, sum3);
+    }
+
+    // Possibly one more iteration of whole vectors
+    if (i + N <= num_elements) {
+      const auto a0 = LoadU(d, pa + i);
+      const auto b0 = LoadU(d, pb + i);
+      i += N;
+      sum0 = ReorderWidenMulAccumulate(df32, a0, b0, sum0, sum1);
+    }
+
+    if (!kIsMultipleOfVector) {
+      const size_t remaining = num_elements - i;
+      if (remaining != 0) {
+        if (kIsPaddedToVector) {
+          const auto mask = FirstN(du16, remaining);
+          const auto va = LoadU(d, pa + i);
+          const auto vb = LoadU(d, pb + i);
+          const auto a16 = BitCast(d, IfThenElseZero(mask, BitCast(du16, va)));
+          const auto b16 = BitCast(d, IfThenElseZero(mask, BitCast(du16, vb)));
+          sum2 = ReorderWidenMulAccumulate(df32, a16, b16, sum2, sum3);
+
+        } else {
+          // Unaligned load such that the last element is in the highest lane -
+          // ensures we do not touch any elements outside the valid range.
+          // If we get here, then num_elements >= N.
+          HWY_DASSERT(i >= N);
+          i += remaining - N;
+          const auto skip = FirstN(du16, N - remaining);
+          const auto va = LoadU(d, pa + i);  // always unaligned
+          const auto vb = LoadU(d, pb + i);
+          const auto a16 = BitCast(d, IfThenZeroElse(skip, BitCast(du16, va)));
+          const auto b16 = BitCast(d, IfThenZeroElse(skip, BitCast(du16, vb)));
+          sum2 = ReorderWidenMulAccumulate(df32, a16, b16, sum2, sum3);
+        }
+      }
+    }  // kMultipleOfVector
+
+    // Reduction tree: sum of all accumulators by pairs, then across lanes.
+    sum0 = Add(sum0, sum1);
+    sum2 = Add(sum2, sum3);
+    sum0 = Add(sum0, sum2);
+    return ReduceSum(df32, sum0);
+  }
+};
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_DOT_DOT_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot_test.cc
new file mode 100644
index 0000000000..4799c267c7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/dot/dot_test.cc
@@ -0,0 +1,183 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/dot/dot_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/dot/dot-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+HWY_NOINLINE T SimpleDot(const T* pa, const T* pb, size_t num) {
+  double sum = 0.0;
+  for (size_t i = 0; i < num; ++i) {
+    sum += pa[i] * pb[i];
+  }
+  return static_cast<T>(sum);
+}
+
+HWY_NOINLINE float SimpleDot(const bfloat16_t* pa, const bfloat16_t* pb,
+                             size_t num) {
+  float sum = 0.0f;
+  for (size_t i = 0; i < num; ++i) {
+    sum += F32FromBF16(pa[i]) * F32FromBF16(pb[i]);
+  }
+  return sum;
+}
+
+template <typename T>
+HWY_INLINE void SetValue(const float value, T* HWY_RESTRICT ptr) {
+  *ptr = static_cast<T>(value);
+}
+HWY_INLINE void SetValue(const float value, bfloat16_t* HWY_RESTRICT ptr) {
+  *ptr = BF16FromF32(value);
+}
+
+template <typename T>
+HWY_INLINE double GetValue(T f) {
+  return static_cast<double>(f);
+}
+template <>
+HWY_INLINE double GetValue<bfloat16_t>(bfloat16_t f) {
+  return static_cast<double>(F32FromBF16(f));
+}
+
+class TestDot {
+  // Computes/verifies one dot product.
+  template <int kAssumptions, class D>
+  void Test(D d, size_t num, size_t misalign_a, size_t misalign_b,
+            RandomState& rng) {
+    using T = TFromD<D>;
+    const size_t N = Lanes(d);
+    const auto random_t = [&rng]() {
+      const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
+      return static_cast<float>(bits - 512) * (1.0f / 64);
+    };
+
+    const size_t padded =
+        (kAssumptions & Dot::kPaddedToVector) ? RoundUpTo(num, N) : num;
+    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + padded);
+    AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(misalign_b + padded);
+    HWY_ASSERT(pa && pb);
+    T* a = pa.get() + misalign_a;
+    T* b = pb.get() + misalign_b;
+    size_t i = 0;
+    for (; i < num; ++i) {
+      SetValue(random_t(), a + i);
+      SetValue(random_t(), b + i);
+    }
+    // Fill padding with NaN - the values are not used, but avoids MSAN errors.
+    for (; i < padded; ++i) {
+      ScalableTag<float> df1;
+      SetValue(GetLane(NaN(df1)), a + i);
+      SetValue(GetLane(NaN(df1)), b + i);
+    }
+
+    const double expected = SimpleDot(a, b, num);
+    const double magnitude = expected > 0.0 ? expected : -expected;
+    const double actual = GetValue(Dot::Compute<kAssumptions>(d, a, b, num));
+    const double max = static_cast<double>(8 * 8 * num);
+    HWY_ASSERT(-max <= actual && actual <= max);
+    const double tolerance =
+        64.0 * GetValue(Epsilon<T>()) * HWY_MAX(magnitude, 1.0);
+    HWY_ASSERT(expected - tolerance <= actual &&
+               actual <= expected + tolerance);
+  }
+
+  // Runs tests with various alignments.
+  template <int kAssumptions, class D>
+  void ForeachMisalign(D d, size_t num, RandomState& rng) {
+    const size_t N = Lanes(d);
+    const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
+    for (size_t ma : misalignments) {
+      for (size_t mb : misalignments) {
+        Test<kAssumptions>(d, num, ma, mb, rng);
+      }
+    }
+  }
+
+  // Runs tests with various lengths compatible with the given assumptions.
+  template <int kAssumptions, class D>
+  void ForeachCount(D d, RandomState& rng) {
+    const size_t N = Lanes(d);
+    const size_t counts[] = {1,
+                             3,
+                             7,
+                             16,
+                             HWY_MAX(N / 2, 1),
+                             HWY_MAX(2 * N / 3, 1),
+                             N,
+                             N + 1,
+                             4 * N / 3,
+                             3 * N,
+                             8 * N,
+                             8 * N + 2};
+    for (size_t num : counts) {
+      if ((kAssumptions & Dot::kAtLeastOneVector) && num < N) continue;
+      if ((kAssumptions & Dot::kMultipleOfVector) && (num % N) != 0) continue;
+      ForeachMisalign<kAssumptions>(d, num, rng);
+    }
+  }
+
+ public:
+  // Must be inlined on aarch64 for bf16, else clang crashes.
+  template <class T, class D>
+  HWY_INLINE void operator()(T /*unused*/, D d) {
+    RandomState rng;
+
+    // All 8 combinations of the three length-related flags:
+    ForeachCount<0>(d, rng);
+    ForeachCount<Dot::kAtLeastOneVector>(d, rng);
+    ForeachCount<Dot::kMultipleOfVector>(d, rng);
+    ForeachCount<Dot::kMultipleOfVector | Dot::kAtLeastOneVector>(d, rng);
+    ForeachCount<Dot::kPaddedToVector>(d, rng);
+    ForeachCount<Dot::kPaddedToVector | Dot::kAtLeastOneVector>(d, rng);
+    ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector>(d, rng);
+    ForeachCount<Dot::kPaddedToVector | Dot::kMultipleOfVector |
+                 Dot::kAtLeastOneVector>(d, rng);
+  }
+};
+
+void TestAllDot() { ForFloatTypes(ForPartialVectors<TestDot>()); }
+void TestAllDotBF16() { ForShrinkableVectors<TestDot>()(bfloat16_t()); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(DotTest);
+HWY_EXPORT_AND_TEST_P(DotTest, TestAllDot);
+HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotBF16);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.cc
new file mode 100644
index 0000000000..67b37d2711
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.cc
@@ -0,0 +1,145 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <algorithm>  // std::swap
+#include <cstddef>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(GetVectorSize);  // Local function.
+}  // namespace
+
+size_t ImageBase::VectorSize() {
+  // Do not cache result - must return the current value, which may be greater
+  // than the first call if it was subject to DisableTargets!
+  return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+}
+
+size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+  const size_t vec_size = VectorSize();
+  size_t valid_bytes = xsize * sizeof_t;
+
+  // Allow unaligned accesses starting at the last valid value - this may raise
+  // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+  // Skip for the scalar case because no extra lanes will be loaded.
+  if (vec_size != 1) {
+    HWY_DASSERT(vec_size >= sizeof_t);
+    valid_bytes += vec_size - sizeof_t;
+  }
+
+  // Round up to vector and cache line size.
+  const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
+  size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+  // During the lengthy window before writes are committed to memory, CPUs
+  // guard against read after write hazards by checking the address, but
+  // only the lower 11 bits. We avoid a false dependency between writes to
+  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+  // Avoid2K prevents the same problem for the planes of an Image3.
+  if (bytes_per_row % HWY_ALIGNMENT == 0) {
+    bytes_per_row += align;
+  }
+
+  HWY_DASSERT(bytes_per_row % align == 0);
+  return bytes_per_row;
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+                     const size_t sizeof_t)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+  HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+  bytes_per_row_ = 0;
+  // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+  // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+  if (xsize != 0 && ysize != 0) {
+    bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+    bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
+    HWY_ASSERT(bytes_.get() != nullptr);
+    InitializePadding(sizeof_t, Padding::kRoundUp);
+  }
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+                     const size_t bytes_per_row, void* const aligned)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      bytes_per_row_(bytes_per_row),
+      bytes_(static_cast<uint8_t*>(aligned),
+             AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+  const size_t vec_size = VectorSize();
+  HWY_ASSERT(bytes_per_row % vec_size == 0);
+  HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
+}
+
+void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if HWY_IS_MSAN || HWY_IDE
+  if (xsize_ == 0 || ysize_ == 0) return;
+
+  const size_t vec_size = VectorSize();  // Bytes, independent of sizeof_t!
+  if (vec_size == 1) return;             // Scalar mode: no padding needed
+
+  const size_t valid_size = xsize_ * sizeof_t;
+  const size_t initialize_size = padding == Padding::kRoundUp
+                                     ? RoundUpTo(valid_size, vec_size)
+                                     : valid_size + vec_size - sizeof_t;
+  if (valid_size == initialize_size) return;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) && (__clang_major__ <= 6)
+    // There's a bug in msan in clang-6 when handling AVX2 operations. This
+    // workaround allows tests to pass on msan, although it is slower and
+    // prevents msan warnings from uninitialized images.
+    memset(row, 0, initialize_size);
+#else
+    memset(row + valid_size, 0, initialize_size - valid_size);
+#endif  // clang6
+  }
+#else
+  (void)sizeof_t;
+  (void)padding;
+#endif  // HWY_IS_MSAN
+}
+
+void ImageBase::Swap(ImageBase& other) {
+  std::swap(xsize_, other.xsize_);
+  std::swap(ysize_, other.ysize_);
+  std::swap(bytes_per_row_, other.bytes_per_row_);
+  std::swap(bytes_, other.bytes_);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.h
new file mode 100644
index 0000000000..4f578aaec4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image.h
@@ -0,0 +1,467 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <string.h>
+
+#include <utility>  // std::move
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+
+namespace hwy {
+
+// Type-independent parts of Image<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct HWY_CONTRIB_DLLEXPORT ImageBase {
+  // Returns required alignment in bytes for externally allocated memory.
+  static size_t VectorSize();
+
+  // Returns distance [bytes] between the start of two consecutive rows, a
+  // multiple of VectorSize but NOT kAlias (see implementation).
+  static size_t BytesPerRow(size_t xsize, size_t sizeof_t);
+
+  // No allocation (for output params or unused images)
+  ImageBase()
+      : xsize_(0),
+        ysize_(0),
+        bytes_per_row_(0),
+        bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
+
+  // Allocates memory (this is the common case)
+  ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+  // References but does not take ownership of external memory. Useful for
+  // interoperability with other libraries. `aligned` must be aligned to a
+  // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
+  // VectorSize() or preferably equal to BytesPerRow().
+  ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo() instead.
+  ImageBase(const ImageBase& other) = delete;
+  ImageBase& operator=(const ImageBase& other) = delete;
+
+  // Move constructor (required for returning Image from function)
+  ImageBase(ImageBase&& other) noexcept = default;
+
+  // Move assignment (required for std::vector)
+  ImageBase& operator=(ImageBase&& other) noexcept = default;
+
+  void Swap(ImageBase& other);
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. Caller is responsible
+  // for ensuring xsize/ysize are <= the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    xsize_ = static_cast<uint32_t>(xsize);
+    ysize_ = static_cast<uint32_t>(ysize);
+    // NOTE: we can't recompute bytes_per_row for more compact storage and
+    // better locality because that would invalidate the image contents.
+  }
+
+  // How many pixels.
+  HWY_INLINE size_t xsize() const { return xsize_; }
+  HWY_INLINE size_t ysize() const { return ysize_; }
+
+  // NOTE: do not use this for copying rows - the valid xsize may be much less.
+  HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  HWY_INLINE uint8_t* bytes() {
+    void* p = bytes_.get();
+    return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+  }
+  HWY_INLINE const uint8_t* bytes() const {
+    const void* p = bytes_.get();
+    return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+  }
+
+ protected:
+  // Returns pointer to the start of a row.
+  HWY_INLINE void* VoidRow(const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+    if (y >= ysize_) {
+      HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
+    }
+#endif
+
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return HWY_ASSUME_ALIGNED(row, 64);
+  }
+
+  enum class Padding {
+    // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+    kRoundUp,
+    // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
+    // vector to be initialized. If done by default, this would suppress
+    // legitimate msan warnings. We therefore require users to explicitly call
+    // InitializePadding before using unaligned loads (e.g. convolution).
+    kUnaligned
+  };
+
+  // Initializes the minimum bytes required to suppress msan warnings from
+  // legitimate (according to Padding mode) vector loads/stores on the right
+  // border, where some lanes are uninitialized and assumed to be unused.
+  void InitializePadding(size_t sizeof_t, Padding padding);
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  uint32_t xsize_;  // In valid pixels, not including any padding.
+  uint32_t ysize_;
+  size_t bytes_per_row_;  // Includes padding.
+  AlignedFreeUniquePtr<uint8_t[]> bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image : public ImageBase {
+ public:
+  using T = ComponentType;
+
+  Image() = default;
+  Image(const size_t xsize, const size_t ysize)
+      : ImageBase(xsize, ysize, sizeof(T)) {}
+  Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
+        void* aligned)
+      : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
+
+  void InitializePaddingForUnalignedAccesses() {
+    InitializePadding(sizeof(T), Padding::kUnaligned);
+  }
+
+  HWY_INLINE const T* ConstRow(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+  HWY_INLINE const T* ConstRow(const size_t y) {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Returns pointer to non-const. This allows passing const Image* parameters
+  // when the callee is only supposed to fill the pixels, as opposed to
+  // allocating or resizing the image.
+  HWY_INLINE T* MutableRow(const size_t y) const {
+    return static_cast<T*>(VoidRow(y));
+  }
+  HWY_INLINE T* MutableRow(const size_t y) {
+    return static_cast<T*>(VoidRow(y));
+  }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must
+  // NOT be used to determine xsize.
+  HWY_INLINE intptr_t PixelsPerRow() const {
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+};
+
+using ImageF = Image<float>;
+
+// A bundle of 3 same-sized images. To fill an existing Image3 using
+// single-channel producers, we also need access to each const Image*. Const
+// prevents breaking the same-size invariant, while still allowing pixels to be
+// changed via MutableRow.
+template <typename ComponentType>
+class Image3 {
+ public:
+  using T = ComponentType;
+  using ImageT = Image<T>;
+  static constexpr size_t kNumPlanes = 3;
+
+  Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
+
+  Image3(const size_t xsize, const size_t ysize)
+      : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
+                ImageT(xsize, ysize)} {}
+
+  Image3(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+  }
+
+  Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
+    if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
+      HWY_ABORT(
+          "Not same size: %d x %d, %d x %d, %d x %d\n",
+          static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
+          static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
+          static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
+    }
+    planes_[0] = std::move(plane0);
+    planes_[1] = std::move(plane1);
+    planes_[2] = std::move(plane2);
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo instead.
+  Image3(const Image3& other) = delete;
+  Image3& operator=(const Image3& other) = delete;
+
+  Image3& operator=(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+    return *this;
+  }
+
+  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+    return static_cast<const T*>(VoidPlaneRow(c, y));
+  }
+  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
+    return static_cast<const T*>(VoidPlaneRow(c, y));
+  }
+
+  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
+    return static_cast<T*>(VoidPlaneRow(c, y));
+  }
+  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
+    return static_cast<T*>(VoidPlaneRow(c, y));
+  }
+
+  HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
+
+  void Swap(Image3& other) {
+    for (size_t c = 0; c < 3; ++c) {
+      other.planes_[c].Swap(planes_[c]);
+    }
+  }
+
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    for (ImageT& plane : planes_) {
+      plane.ShrinkTo(xsize, ysize);
+    }
+  }
+
+  // Sizes of all three images are guaranteed to be equal.
+  HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
+  HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
+  // Returns offset [bytes] from one row to the next row of the same plane.
+  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+  // the valid xsize may be much less.
+  HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+  // to determine xsize.
+  HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+  // Returns pointer to the start of a row.
+  HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+    if (c >= kNumPlanes || y >= ysize()) {
+      HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
+                static_cast<int>(y), static_cast<int>(ysize()));
+    }
+#endif
+    // Use the first plane's stride because the compiler might not realize they
+    // are all equal. Thus we only need a single multiplication for all planes.
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    const void* row = planes_[c].bytes() + row_offset;
+    return static_cast<const T * HWY_RESTRICT>(
+        HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
+  }
+
+ private:
+  ImageT planes_[kNumPlanes];
+};
+
+using Image3F = Image3<float>;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions. Can compare size via SameSize(rect1, rect2).
+class Rect {
+ public:
+  // Most windows are xsize_max * ysize_max, except those on the borders where
+  // begin + size_max > end.
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+                 size_t ysize_max, size_t xend, size_t yend)
+      : x0_(xbegin),
+        y0_(ybegin),
+        xsize_(ClampedSize(xbegin, xsize_max, xend)),
+        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+  // Construct with origin and known size (typically from another Rect).
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+  // Construct a rect that covers a whole image.
+  template <typename Image>
+  explicit Rect(const Image& image)
+      : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+  Rect() : Rect(0, 0, 0, 0) {}
+
+  Rect(const Rect&) = default;
+  Rect& operator=(const Rect&) = default;
+
+  Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
+               size_t ysize_max) {
+    return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
+                y0_ + ysize_);
+  }
+
+  template <typename T>
+  const T* ConstRow(const Image<T>* image, size_t y) const {
+    return image->ConstRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* MutableRow(const Image<T>* image, size_t y) const {
+    return image->MutableRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
+    return image.ConstPlaneRow(c, y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
+    return image->MutablePlaneRow(c, y + y0_) + x0_;
+  }
+
+  // Returns true if this Rect fully resides in the given image. ImageT could be
+  // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+  template <class ImageT>
+  bool IsInside(const ImageT& image) const {
+    return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+  }
+
+  size_t x0() const { return x0_; }
+  size_t y0() const { return y0_; }
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+
+ private:
+  // Returns size_max, or whatever is left in [begin, end).
+  static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+                                      size_t end) {
+    return (begin + size_max <= end) ? size_max
+                                     : (end > begin ? end - begin : 0);
+  }
+
+  size_t x0_;
+  size_t y0_;
+
+  size_t xsize_;
+  size_t ysize_;
+};
+
+// Works for any image-like input type(s).
+template <class Image1, class Image2>
+HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
+  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
+                                                 const int64_t xsize) {
+  HWY_DASSERT(xsize != 0);
+
+  // TODO(janwas): replace with branchless version
+  while (x < 0 || x >= xsize) {
+    if (x < 0) {
+      x = -x - 1;
+    } else {
+      x = 2 * xsize - 1 - x;
+    }
+  }
+  return static_cast<size_t>(x);
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+  HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
+    return Mirror(coord, static_cast<int64_t>(size));
+  }
+};
+
+// Returns the same coordinate, for when we know "coord" is already valid (e.g.
+// interior of an image).
+struct WrapUnchanged {
+  HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
+    return static_cast<size_t>(coord);
+  }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+  template <class View>
+  WrapRowMirror(const View& image, size_t ysize)
+      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+  const float* operator()(const float* const HWY_RESTRICT row,
+                          const int64_t stride) const {
+    if (row < first_row_) {
+      const int64_t num_before = first_row_ - row;
+      // Mirrored; one row before => row 0, two before = row 1, ...
+      return first_row_ + num_before - stride;
+    }
+    if (row > last_row_) {
+      const int64_t num_after = row - last_row_;
+      // Mirrored; one row after => last row, two after = last - 1, ...
+      return last_row_ - num_after + stride;
+    }
+    return row;
+  }
+
+ private:
+  const float* const HWY_RESTRICT first_row_;
+  const float* const HWY_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+  HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
+                                     int64_t /*stride*/) const {
+    return row;
+  }
+};
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image_test.cc
new file mode 100644
index 0000000000..978c991a5a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/image/image_test.cc
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <random>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Ensure we can always write full aligned vectors.
+struct TestAlignedT {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+    std::mt19937 rng(129);
+    std::uniform_int_distribution<int> dist(0, 16);
+    const ScalableTag<T> d;
+
+    for (size_t ysize = 1; ysize < 4; ++ysize) {
+      for (size_t xsize = 1; xsize < 64; ++xsize) {
+        Image<T> img(xsize, ysize);
+
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; x += Lanes(d)) {
+            const auto values = Iota(d, static_cast<T>(dist(rng)));
+            Store(values, d, row + x);
+          }
+        }
+
+        // Sanity check to prevent optimizing out the writes
+        const auto x = std::uniform_int_distribution<size_t>(0, xsize - 1)(rng);
+        const auto y = std::uniform_int_distribution<size_t>(0, ysize - 1)(rng);
+        HWY_ASSERT(img.ConstRow(y)[x] < 16 + Lanes(d));
+      }
+    }
+  }
+};
+
+void TestAligned() { ForUnsignedTypes(TestAlignedT()); }
+
+// Ensure we can write an unaligned vector starting at the last valid value.
+struct TestUnalignedT {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+    std::mt19937 rng(129);
+    std::uniform_int_distribution<int> dist(0, 3);
+    const ScalableTag<T> d;
+
+    for (size_t ysize = 1; ysize < 4; ++ysize) {
+      for (size_t xsize = 1; xsize < 128; ++xsize) {
+        Image<T> img(xsize, ysize);
+        img.InitializePaddingForUnalignedAccesses();
+
+// This test reads padding, which only works if it was initialized,
+// which only happens in MSAN builds.
+#if HWY_IS_MSAN || HWY_IDE
+        // Initialize only the valid samples
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row[x] = static_cast<T>(1u << dist(rng));
+          }
+        }
+
+        // Read padding bits
+        auto accum = Zero(d);
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            accum = Or(accum, LoadU(d, row + x));
+          }
+        }
+
+        // Ensure padding was zero
+        const size_t N = Lanes(d);
+        auto lanes = AllocateAligned<T>(N);
+        HWY_ASSERT(lanes);
+        Store(accum, d, lanes.get());
+        for (size_t i = 0; i < N; ++i) {
+          HWY_ASSERT(lanes[i] < 16);
+        }
+#else  // Check that writing padding does not overwrite valid samples
+       // Initialize only the valid samples
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row[x] = static_cast<T>(x);
+          }
+        }
+
+        // Zero padding and rightmost sample
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          StoreU(Zero(d), d, row + xsize - 1);
+        }
+
+        // Ensure no samples except the rightmost were overwritten
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize - 1; ++x) {
+            HWY_ASSERT_EQ(static_cast<T>(x), row[x]);
+          }
+        }
+#endif
+      }
+    }
+  }
+};
+
+void TestUnaligned() { ForUnsignedTypes(TestUnalignedT()); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(ImageTest);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestAligned);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestUnaligned);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math-inl.h
new file mode 100644
index 0000000000..d701c5e9f4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math-inl.h
@@ -0,0 +1,1549 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Include guard (still compiled once per target)
+#if defined(HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_) == \
+    defined(HWY_TARGET_TOGGLE)  // NOLINT
+#ifdef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+/**
+ * Highway SIMD version of std::acos(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: [-1, +1]
+ * @return arc cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Acos(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAcos(const D d, VecArg<V> x) {
+  return Acos(d, x);
+}
+
+/**
+ * Highway SIMD version of std::acosh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[1, +FLT_MAX], float64[1, +DBL_MAX]
+ * @return hyperbolic arc cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Acosh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAcosh(const D d, VecArg<V> x) {
+  return Acosh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::asin(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: [-1, +1]
+ * @return arc sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Asin(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAsin(const D d, VecArg<V> x) {
+  return Asin(d, x);
+}
+
+/**
+ * Highway SIMD version of std::asinh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return hyperbolic arc sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Asinh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAsinh(const D d, VecArg<V> x) {
+  return Asinh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::atan(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return arc tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Atan(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAtan(const D d, VecArg<V> x) {
+  return Atan(d, x);
+}
+
+/**
+ * Highway SIMD version of std::atanh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: (-1, +1)
+ * @return hyperbolic arc tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Atanh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAtanh(const D d, VecArg<V> x) {
+  return Atanh(d, x);
+}
+
+// Atan2 was added later and some users may be implementing it themselves, so
+// notify them that this version of Highway defines it already.
+#ifndef HWY_HAVE_ATAN2
+#define HWY_HAVE_ATAN2 1
+#endif
+
+/**
+ * Highway SIMD version of std::atan2(x).
+ *
+ * Valid Lane Types: float32, float64
+ * Correctly handles negative zero, infinities, and NaN.
+ * @return atan2 of 'y', 'x'
+ */
+template <class D, class V = VFromD<D>, class M = MFromD<D>,
+          typename T = TFromD<D>>
+HWY_INLINE V Atan2(const D d, V y, V x) {
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kPi = Set(d, static_cast<T>(+3.14159265358979323846264));
+  const V kPi2 = Mul(kPi, kHalf);
+
+  const V k0 = Zero(d);
+  const M y_0 = Eq(y, k0);
+  const M x_0 = Eq(x, k0);
+  const M x_neg = Lt(x, k0);
+  const M y_inf = IsInf(y);
+  const M x_inf = IsInf(x);
+  const M nan = Or(IsNaN(y), IsNaN(x));
+
+  const V if_xneg_pi = IfThenElseZero(x_neg, kPi);
+  // x= +inf: pi/4; -inf: 3*pi/4; else: pi/2
+  const V if_yinf = Mul(kHalf, IfThenElse(x_inf, Add(kPi2, if_xneg_pi), kPi));
+
+  V t = Atan(d, Div(y, x));
+  // Disambiguate between quadrants 1/3 and 2/4 by adding (Q2: Pi; Q3: -Pi).
+  t = Add(t, CopySignToAbs(if_xneg_pi, y));
+  // Special cases for 0 and infinity:
+  t = IfThenElse(x_inf, if_xneg_pi, t);
+  t = IfThenElse(x_0, kPi2, t);
+  t = IfThenElse(y_inf, if_yinf, t);
+  t = IfThenElse(y_0, if_xneg_pi, t);
+  // Any input NaN => NaN, otherwise fix sign.
+  return IfThenElse(nan, NaN(d), CopySign(t, y));
+}
+template <class D, class V>
+HWY_NOINLINE V CallAtan2(const D d, VecArg<V> y, VecArg<V> x) {
+  return Atan2(d, y, x);
+}
+
+/**
+ * Highway SIMD version of std::cos(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: [-39000, +39000]
+ * @return cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Cos(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallCos(const D d, VecArg<V> x) {
+  return Cos(d, x);
+}
+
+/**
+ * Highway SIMD version of std::exp(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 1
+ *      Valid Range: float32[-FLT_MAX, +104], float64[-DBL_MAX, +706]
+ * @return e^x
+ */
+template <class D, class V>
+HWY_INLINE V Exp(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallExp(const D d, VecArg<V> x) {
+  return Exp(d, x);
+}
+
+/**
+ * Highway SIMD version of std::expm1(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-FLT_MAX, +104], float64[-DBL_MAX, +706]
+ * @return e^x - 1
+ */
+template <class D, class V>
+HWY_INLINE V Expm1(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallExpm1(const D d, VecArg<V> x) {
+  return Expm1(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return natural logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog(const D d, VecArg<V> x) {
+  return Log(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log10(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return base 10 logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log10(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog10(const D d, VecArg<V> x) {
+  return Log10(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log1p(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32[0, +FLT_MAX], float64[0, +DBL_MAX]
+ * @return log(1 + x)
+ */
+template <class D, class V>
+HWY_INLINE V Log1p(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog1p(const D d, VecArg<V> x) {
+  return Log1p(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log2(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return base 2 logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log2(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog2(const D d, VecArg<V> x) {
+  return Log2(d, x);
+}
+
+/**
+ * Highway SIMD version of std::sin(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: [-39000, +39000]
+ * @return sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Sin(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallSin(const D d, VecArg<V> x) {
+  return Sin(d, x);
+}
+
+/**
+ * Highway SIMD version of std::sinh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-88.7228, +88.7228], float64[-709, +709]
+ * @return hyperbolic sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Sinh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallSinh(const D d, VecArg<V> x) {
+  return Sinh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::tanh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return hyperbolic tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Tanh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallTanh(const D d, VecArg<V> x) {
+  return Tanh(d, x);
+}
+
+/**
+ * Highway SIMD version of SinCos.
+ * Compute the sine and cosine at the same time
+ * The performance should be around the same as calling Sin.
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 1
+ *      Valid Range: [-39000, +39000]
+ * @return sine and cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE void SinCos(const D d, V x, V& s, V& c);
+template <class D, class V>
+HWY_NOINLINE V CallSinCos(const D d, VecArg<V> x, VecArg<V>& s, VecArg<V>& c) {
+  SinCos(d, x, s, c);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation
+////////////////////////////////////////////////////////////////////////////////
+namespace impl {
+
+// Estrin's Scheme is a faster method for evaluating large polynomials on
+// super scalar architectures. It works by factoring the Horner's Method
+// polynomial into power of two sub-trees that can be evaluated in parallel.
+// Wikipedia Link: https://en.wikipedia.org/wiki/Estrin%27s_scheme
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1) {
+  return MulAdd(c1, x, c0);
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2) {
+  T x2 = Mul(x, x);
+  return MulAdd(x2, c2, MulAdd(c1, x, c0));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3) {
+  T x2 = Mul(x, x);
+  return MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  return MulAdd(x4, c4, MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  return MulAdd(x4, MulAdd(c5, x, c4),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  return MulAdd(x4, MulAdd(x2, c6, MulAdd(c5, x, c4)),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  return MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8, c8,
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8, MulAdd(c9, x, c8),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8, MulAdd(x2, c10, MulAdd(c9, x, c8)),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8, MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8)),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(
+      x8, MulAdd(x4, c12, MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+      MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+             MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(c13, x, c12),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(x2, c14, MulAdd(c13, x, c12)),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  T x16 = Mul(x8, x8);
+  return MulAdd(
+      x16, c16,
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16, T c17) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  T x16 = Mul(x8, x8);
+  return MulAdd(
+      x16, MulAdd(c17, x, c16),
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16, T c17,
+                                     T c18) {
+  T x2 = Mul(x, x);
+  T x4 = Mul(x2, x2);
+  T x8 = Mul(x4, x4);
+  T x16 = Mul(x8, x8);
+  return MulAdd(
+      x16, MulAdd(x2, c18, MulAdd(c17, x, c16)),
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+
+template <class FloatOrDouble>
+struct AsinImpl {};
+template <class FloatOrDouble>
+struct AtanImpl {};
+template <class FloatOrDouble>
+struct CosSinImpl {};
+template <class FloatOrDouble>
+struct ExpImpl {};
+template <class FloatOrDouble>
+struct LogImpl {};
+template <class FloatOrDouble>
+struct SinCosImpl {};
+
+template <>
+struct AsinImpl<float> {
+  // Polynomial approximation for asin(x) over the range [0, 0.5).
+  template <class D, class V>
+  HWY_INLINE V AsinPoly(D d, V x2, V /*x*/) {
+    const auto k0 = Set(d, +0.1666677296f);
+    const auto k1 = Set(d, +0.07495029271f);
+    const auto k2 = Set(d, +0.04547423869f);
+    const auto k3 = Set(d, +0.02424046025f);
+    const auto k4 = Set(d, +0.04197454825f);
+
+    return Estrin(x2, k0, k1, k2, k3, k4);
+  }
+};
+
+#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
+
+template <>
+struct AsinImpl<double> {
+  // Polynomial approximation for asin(x) over the range [0, 0.5).
+  template <class D, class V>
+  HWY_INLINE V AsinPoly(D d, V x2, V /*x*/) {
+    const auto k0 = Set(d, +0.1666666666666497543);
+    const auto k1 = Set(d, +0.07500000000378581611);
+    const auto k2 = Set(d, +0.04464285681377102438);
+    const auto k3 = Set(d, +0.03038195928038132237);
+    const auto k4 = Set(d, +0.02237176181932048341);
+    const auto k5 = Set(d, +0.01735956991223614604);
+    const auto k6 = Set(d, +0.01388715184501609218);
+    const auto k7 = Set(d, +0.01215360525577377331);
+    const auto k8 = Set(d, +0.006606077476277170610);
+    const auto k9 = Set(d, +0.01929045477267910674);
+    const auto k10 = Set(d, -0.01581918243329996643);
+    const auto k11 = Set(d, +0.03161587650653934628);
+
+    return Estrin(x2, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11);
+  }
+};
+
+#endif
+
+template <>
+struct AtanImpl<float> {
+  // Polynomial approximation for atan(x) over the range [0, 1.0).
+  template <class D, class V>
+  HWY_INLINE V AtanPoly(D d, V x) {
+    const auto k0 = Set(d, -0.333331018686294555664062f);
+    const auto k1 = Set(d, +0.199926957488059997558594f);
+    const auto k2 = Set(d, -0.142027363181114196777344f);
+    const auto k3 = Set(d, +0.106347933411598205566406f);
+    const auto k4 = Set(d, -0.0748900920152664184570312f);
+    const auto k5 = Set(d, +0.0425049886107444763183594f);
+    const auto k6 = Set(d, -0.0159569028764963150024414f);
+    const auto k7 = Set(d, +0.00282363896258175373077393f);
+
+    const auto y = Mul(x, x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7), Mul(y, x), x);
+  }
+};
+
+#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
+
+template <>
+struct AtanImpl<double> {
+  // Polynomial approximation for atan(x) over the range [0, 1.0).
+  template <class D, class V>
+  HWY_INLINE V AtanPoly(D d, V x) {
+    const auto k0 = Set(d, -0.333333333333311110369124);
+    const auto k1 = Set(d, +0.199999999996591265594148);
+    const auto k2 = Set(d, -0.14285714266771329383765);
+    const auto k3 = Set(d, +0.111111105648261418443745);
+    const auto k4 = Set(d, -0.090908995008245008229153);
+    const auto k5 = Set(d, +0.0769219538311769618355029);
+    const auto k6 = Set(d, -0.0666573579361080525984562);
+    const auto k7 = Set(d, +0.0587666392926673580854313);
+    const auto k8 = Set(d, -0.0523674852303482457616113);
+    const auto k9 = Set(d, +0.0466667150077840625632675);
+    const auto k10 = Set(d, -0.0407629191276836500001934);
+    const auto k11 = Set(d, +0.0337852580001353069993897);
+    const auto k12 = Set(d, -0.0254517624932312641616861);
+    const auto k13 = Set(d, +0.016599329773529201970117);
+    const auto k14 = Set(d, -0.00889896195887655491740809);
+    const auto k15 = Set(d, +0.00370026744188713119232403);
+    const auto k16 = Set(d, -0.00110611831486672482563471);
+    const auto k17 = Set(d, +0.000209850076645816976906797);
+    const auto k18 = Set(d, -1.88796008463073496563746e-5);
+
+    const auto y = Mul(x, x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11,
+                         k12, k13, k14, k15, k16, k17, k18),
+                  Mul(y, x), x);
+  }
+};
+
+#endif
+
+template <>
+struct CosSinImpl<float> {
+  // Rounds float toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return ConvertTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V Poly(D d, V x) {
+    const auto k0 = Set(d, -1.66666597127914428710938e-1f);
+    const auto k1 = Set(d, +8.33307858556509017944336e-3f);
+    const auto k2 = Set(d, -1.981069071916863322258e-4f);
+    const auto k3 = Set(d, +2.6083159809786593541503e-6f);
+
+    const auto y = Mul(x, x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3), Mul(y, x), x);
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V CosReduce(D d, V x, VI32 q) {
+    // kHalfPiPart0f + kHalfPiPart1f + kHalfPiPart2f + kHalfPiPart3f ~= -pi/2
+    const V kHalfPiPart0f = Set(d, -0.5f * 3.140625f);
+    const V kHalfPiPart1f = Set(d, -0.5f * 0.0009670257568359375f);
+    const V kHalfPiPart2f = Set(d, -0.5f * 6.2771141529083251953e-7f);
+    const V kHalfPiPart3f = Set(d, -0.5f * 1.2154201256553420762e-10f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kHalfPiPart0f, x);
+    x = MulAdd(qf, kHalfPiPart1f, x);
+    x = MulAdd(qf, kHalfPiPart2f, x);
+    x = MulAdd(qf, kHalfPiPart3f, x);
+    return x;
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V SinReduce(D d, V x, VI32 q) {
+    // kPiPart0f + kPiPart1f + kPiPart2f + kPiPart3f ~= -pi
+    const V kPiPart0f = Set(d, -3.140625f);
+    const V kPiPart1f = Set(d, -0.0009670257568359375f);
+    const V kPiPart2f = Set(d, -6.2771141529083251953e-7f);
+    const V kPiPart3f = Set(d, -1.2154201256553420762e-10f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kPiPart0f, x);
+    x = MulAdd(qf, kPiPart1f, x);
+    x = MulAdd(qf, kPiPart2f, x);
+    x = MulAdd(qf, kPiPart3f, x);
+    return x;
+  }
+
+  // (q & 2) == 0 ? -0.0 : +0.0
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<float, D>> CosSignFromQuadrant(D d, VI32 q) {
+    const VI32 kTwo = Set(Rebind<int32_t, D>(), 2);
+    return BitCast(d, ShiftLeft<30>(AndNot(q, kTwo)));
+  }
+
+  // ((q & 1) ? -0.0 : +0.0)
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<float, D>> SinSignFromQuadrant(D d, VI32 q) {
+    const VI32 kOne = Set(Rebind<int32_t, D>(), 1);
+    return BitCast(d, ShiftLeft<31>(And(q, kOne)));
+  }
+};
+
+#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
+
+template <>
+struct CosSinImpl<double> {
+  // Rounds double toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return DemoteTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V Poly(D d, V x) {
+    const auto k0 = Set(d, -0.166666666666666657414808);
+    const auto k1 = Set(d, +0.00833333333333332974823815);
+    const auto k2 = Set(d, -0.000198412698412696162806809);
+    const auto k3 = Set(d, +2.75573192239198747630416e-6);
+    const auto k4 = Set(d, -2.50521083763502045810755e-8);
+    const auto k5 = Set(d, +1.60590430605664501629054e-10);
+    const auto k6 = Set(d, -7.64712219118158833288484e-13);
+    const auto k7 = Set(d, +2.81009972710863200091251e-15);
+    const auto k8 = Set(d, -7.97255955009037868891952e-18);
+
+    const auto y = Mul(x, x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8), Mul(y, x), x);
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V CosReduce(D d, V x, VI32 q) {
+    // kHalfPiPart0d + kHalfPiPart1d + kHalfPiPart2d + kHalfPiPart3d ~= -pi/2
+    const V kHalfPiPart0d = Set(d, -0.5 * 3.1415926218032836914);
+    const V kHalfPiPart1d = Set(d, -0.5 * 3.1786509424591713469e-8);
+    const V kHalfPiPart2d = Set(d, -0.5 * 1.2246467864107188502e-16);
+    const V kHalfPiPart3d = Set(d, -0.5 * 1.2736634327021899816e-24);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kHalfPiPart0d, x);
+    x = MulAdd(qf, kHalfPiPart1d, x);
+    x = MulAdd(qf, kHalfPiPart2d, x);
+    x = MulAdd(qf, kHalfPiPart3d, x);
+    return x;
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V SinReduce(D d, V x, VI32 q) {
+    // kPiPart0d + kPiPart1d + kPiPart2d + kPiPart3d ~= -pi
+    const V kPiPart0d = Set(d, -3.1415926218032836914);
+    const V kPiPart1d = Set(d, -3.1786509424591713469e-8);
+    const V kPiPart2d = Set(d, -1.2246467864107188502e-16);
+    const V kPiPart3d = Set(d, -1.2736634327021899816e-24);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kPiPart0d, x);
+    x = MulAdd(qf, kPiPart1d, x);
+    x = MulAdd(qf, kPiPart2d, x);
+    x = MulAdd(qf, kPiPart3d, x);
+    return x;
+  }
+
+  // (q & 2) == 0 ? -0.0 : +0.0
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<double, D>> CosSignFromQuadrant(D d, VI32 q) {
+    const VI32 kTwo = Set(Rebind<int32_t, D>(), 2);
+    return BitCast(
+        d, ShiftLeft<62>(PromoteTo(Rebind<int64_t, D>(), AndNot(q, kTwo))));
+  }
+
+  // ((q & 1) ? -0.0 : +0.0)
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<double, D>> SinSignFromQuadrant(D d, VI32 q) {
+    const VI32 kOne = Set(Rebind<int32_t, D>(), 1);
+    return BitCast(
+        d, ShiftLeft<63>(PromoteTo(Rebind<int64_t, D>(), And(q, kOne))));
+  }
+};
+
+#endif
+
+template <>
+struct ExpImpl<float> {
+  // Rounds float toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return ConvertTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V ExpPoly(D d, V x) {
+    const auto k0 = Set(d, +0.5f);
+    const auto k1 = Set(d, +0.166666671633720397949219f);
+    const auto k2 = Set(d, +0.0416664853692054748535156f);
+    const auto k3 = Set(d, +0.00833336077630519866943359f);
+    const auto k4 = Set(d, +0.00139304355252534151077271f);
+    const auto k5 = Set(d, +0.000198527617612853646278381f);
+
+    return MulAdd(Estrin(x, k0, k1, k2, k3, k4, k5), Mul(x, x), x);
+  }
+
+  // Computes 2^x, where x is an integer.
+  template <class D, class VI32>
+  HWY_INLINE Vec<D> Pow2I(D d, VI32 x) {
+    const Rebind<int32_t, D> di32;
+    const VI32 kOffset = Set(di32, 0x7F);
+    return BitCast(d, ShiftLeft<23>(Add(x, kOffset)));
+  }
+
+  // Sets the exponent of 'x' to 2^e.
+  template <class D, class V, class VI32>
+  HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e) {
+    const VI32 y = ShiftRight<1>(e);
+    return Mul(Mul(x, Pow2I(d, y)), Pow2I(d, Sub(e, y)));
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V ExpReduce(D d, V x, VI32 q) {
+    // kLn2Part0f + kLn2Part1f ~= -ln(2)
+    const V kLn2Part0f = Set(d, -0.693145751953125f);
+    const V kLn2Part1f = Set(d, -1.428606765330187045e-6f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kLn2Part0f, x);
+    x = MulAdd(qf, kLn2Part1f, x);
+    return x;
+  }
+};
+
+template <>
+struct LogImpl<float> {
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> Log2p1NoSubnormal(D /*d*/, V x) {
+    const Rebind<int32_t, D> di32;
+    const Rebind<uint32_t, D> du32;
+    const auto kBias = Set(di32, 0x7F);
+    return Sub(BitCast(di32, ShiftRight<23>(BitCast(du32, x))), kBias);
+  }
+
+  // Approximates Log(x) over the range [sqrt(2) / 2, sqrt(2)].
+  template <class D, class V>
+  HWY_INLINE V LogPoly(D d, V x) {
+    const V k0 = Set(d, 0.66666662693f);
+    const V k1 = Set(d, 0.40000972152f);
+    const V k2 = Set(d, 0.28498786688f);
+    const V k3 = Set(d, 0.24279078841f);
+
+    const V x2 = Mul(x, x);
+    const V x4 = Mul(x2, x2);
+    return MulAdd(MulAdd(k2, x4, k0), x2, Mul(MulAdd(k3, x4, k1), x4));
+  }
+};
+
+#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
+template <>
+struct ExpImpl<double> {
+  // Rounds double toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return DemoteTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V ExpPoly(D d, V x) {
+    const auto k0 = Set(d, +0.5);
+    const auto k1 = Set(d, +0.166666666666666851703837);
+    const auto k2 = Set(d, +0.0416666666666665047591422);
+    const auto k3 = Set(d, +0.00833333333331652721664984);
+    const auto k4 = Set(d, +0.00138888888889774492207962);
+    const auto k5 = Set(d, +0.000198412698960509205564975);
+    const auto k6 = Set(d, +2.4801587159235472998791e-5);
+    const auto k7 = Set(d, +2.75572362911928827629423e-6);
+    const auto k8 = Set(d, +2.75573911234900471893338e-7);
+    const auto k9 = Set(d, +2.51112930892876518610661e-8);
+    const auto k10 = Set(d, +2.08860621107283687536341e-9);
+
+    return MulAdd(Estrin(x, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10),
+                  Mul(x, x), x);
+  }
+
+  // Computes 2^x, where x is an integer.
+  template <class D, class VI32>
+  HWY_INLINE Vec<D> Pow2I(D d, VI32 x) {
+    const Rebind<int32_t, D> di32;
+    const Rebind<int64_t, D> di64;
+    const VI32 kOffset = Set(di32, 0x3FF);
+    return BitCast(d, ShiftLeft<52>(PromoteTo(di64, Add(x, kOffset))));
+  }
+
+  // Sets the exponent of 'x' to 2^e.
+  template <class D, class V, class VI32>
+  HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e) {
+    const VI32 y = ShiftRight<1>(e);
+    return Mul(Mul(x, Pow2I(d, y)), Pow2I(d, Sub(e, y)));
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V ExpReduce(D d, V x, VI32 q) {
+    // kLn2Part0d + kLn2Part1d ~= -ln(2)
+    const V kLn2Part0d = Set(d, -0.6931471805596629565116018);
+    const V kLn2Part1d = Set(d, -0.28235290563031577122588448175e-12);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kLn2Part0d, x);
+    x = MulAdd(qf, kLn2Part1d, x);
+    return x;
+  }
+};
+
+template <>
+struct LogImpl<double> {
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int64_t, D>> Log2p1NoSubnormal(D /*d*/, V x) {
+    const Rebind<int64_t, D> di64;
+    const Rebind<uint64_t, D> du64;
+    return Sub(BitCast(di64, ShiftRight<52>(BitCast(du64, x))),
+               Set(di64, 0x3FF));
+  }
+
+  // Approximates Log(x) over the range [sqrt(2) / 2, sqrt(2)].
+  template <class D, class V>
+  HWY_INLINE V LogPoly(D d, V x) {
+    const V k0 = Set(d, 0.6666666666666735130);
+    const V k1 = Set(d, 0.3999999999940941908);
+    const V k2 = Set(d, 0.2857142874366239149);
+    const V k3 = Set(d, 0.2222219843214978396);
+    const V k4 = Set(d, 0.1818357216161805012);
+    const V k5 = Set(d, 0.1531383769920937332);
+    const V k6 = Set(d, 0.1479819860511658591);
+
+    const V x2 = Mul(x, x);
+    const V x4 = Mul(x2, x2);
+    return MulAdd(MulAdd(MulAdd(MulAdd(k6, x4, k4), x4, k2), x4, k0), x2,
+                  (Mul(MulAdd(MulAdd(k5, x4, k3), x4, k1), x4)));
+  }
+};
+
+#endif
+
+template <class D, class V, bool kAllowSubnormals = true>
+HWY_INLINE V Log(const D d, V x) {
+  // http://git.musl-libc.org/cgit/musl/tree/src/math/log.c for more info.
+  using T = TFromD<D>;
+  impl::LogImpl<T> impl;
+
+  constexpr bool kIsF32 = (sizeof(T) == 4);
+
+  // Float Constants
+  const V kLn2Hi = Set(d, kIsF32 ? static_cast<T>(0.69313812256f)
+                                 : static_cast<T>(0.693147180369123816490));
+  const V kLn2Lo = Set(d, kIsF32 ? static_cast<T>(9.0580006145e-6f)
+                                 : static_cast<T>(1.90821492927058770002e-10));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kMinNormal = Set(d, kIsF32 ? static_cast<T>(1.175494351e-38f)
+                                     : static_cast<T>(2.2250738585072014e-308));
+  const V kScale = Set(d, kIsF32 ? static_cast<T>(3.355443200e+7f)
+                                 : static_cast<T>(1.8014398509481984e+16));
+
+  // Integer Constants
+  using TI = MakeSigned<T>;
+  const Rebind<TI, D> di;
+  using VI = decltype(Zero(di));
+  const VI kLowerBits = Set(di, kIsF32 ? static_cast<TI>(0x00000000L)
+                                       : static_cast<TI>(0xFFFFFFFFLL));
+  const VI kMagic = Set(di, kIsF32 ? static_cast<TI>(0x3F3504F3L)
+                                   : static_cast<TI>(0x3FE6A09E00000000LL));
+  const VI kExpMask = Set(di, kIsF32 ? static_cast<TI>(0x3F800000L)
+                                     : static_cast<TI>(0x3FF0000000000000LL));
+  const VI kExpScale =
+      Set(di, kIsF32 ? static_cast<TI>(-25) : static_cast<TI>(-54));
+  const VI kManMask = Set(di, kIsF32 ? static_cast<TI>(0x7FFFFFL)
+                                     : static_cast<TI>(0xFFFFF00000000LL));
+
+  // Scale up 'x' so that it is no longer denormalized.
+  VI exp_bits;
+  V exp;
+  if (kAllowSubnormals == true) {
+    const auto is_denormal = Lt(x, kMinNormal);
+    x = IfThenElse(is_denormal, Mul(x, kScale), x);
+
+    // Compute the new exponent.
+    exp_bits = Add(BitCast(di, x), Sub(kExpMask, kMagic));
+    const VI exp_scale =
+        BitCast(di, IfThenElseZero(is_denormal, BitCast(d, kExpScale)));
+    exp = ConvertTo(
+        d, Add(exp_scale, impl.Log2p1NoSubnormal(d, BitCast(d, exp_bits))));
+  } else {
+    // Compute the new exponent.
+    exp_bits = Add(BitCast(di, x), Sub(kExpMask, kMagic));
+    exp = ConvertTo(d, impl.Log2p1NoSubnormal(d, BitCast(d, exp_bits)));
+  }
+
+  // Renormalize.
+  const V y = Or(And(x, BitCast(d, kLowerBits)),
+                 BitCast(d, Add(And(exp_bits, kManMask), kMagic)));
+
+  // Approximate and reconstruct.
+  const V ym1 = Sub(y, kOne);
+  const V z = Div(ym1, Add(y, kOne));
+
+  return MulSub(
+      exp, kLn2Hi,
+      Sub(MulSub(z, Sub(ym1, impl.LogPoly(d, z)), Mul(exp, kLn2Lo)), ym1));
+}
+
+// SinCos
+// Based on "sse_mathfun.h", by Julien Pommier
+// http://gruntthepeon.free.fr/ssemath/
+
+// Third degree poly
+template <class D, class V>
+HWY_INLINE void SinCos3(D d, TFromD<D> dp1, TFromD<D> dp2, TFromD<D> dp3, V x,
+                        V& s, V& c) {
+  using T = TFromD<D>;
+  using TI = MakeSigned<T>;
+  using DI = Rebind<TI, D>;
+  const DI di;
+  using VI = decltype(Zero(di));
+  using M = Mask<D>;
+
+  static constexpr size_t bits = sizeof(TI) * 8;
+  const VI sign_mask = SignBit(di);
+  const VI ci_0 = Zero(di);
+  const VI ci_1 = Set(di, 1);
+  const VI ci_2 = Set(di, 2);
+  const VI ci_4 = Set(di, 4);
+  const V cos_p0 = Set(d, T(2.443315711809948E-005));
+  const V cos_p1 = Set(d, T(-1.388731625493765E-003));
+  const V cos_p2 = Set(d, T(4.166664568298827E-002));
+  const V sin_p0 = Set(d, T(-1.9515295891E-4));
+  const V sin_p1 = Set(d, T(8.3321608736E-3));
+  const V sin_p2 = Set(d, T(-1.6666654611E-1));
+  const V FOPI = Set(d, T(1.27323954473516));  // 4 / M_PI
+  const V DP1 = Set(d, dp1);
+  const V DP2 = Set(d, dp2);
+  const V DP3 = Set(d, dp3);
+
+  V xmm1, xmm2, sign_bit_sin, y;
+  VI imm0, imm2, imm4;
+
+  sign_bit_sin = x;
+  x = Abs(x);
+
+  /* extract the sign bit (upper one) */
+  sign_bit_sin = And(sign_bit_sin, BitCast(d, sign_mask));
+
+  /* scale by 4/Pi */
+  y = Mul(x, FOPI);
+
+  /* store the integer part of y in imm2 */
+  imm2 = ConvertTo(di, y);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  imm2 = Add(imm2, ci_1);
+  imm2 = AndNot(ci_1, imm2);
+
+  y = ConvertTo(d, imm2);
+  imm4 = imm2;
+
+  /* get the swap sign flag for the sine */
+  imm0 = And(imm2, ci_4);
+  imm0 = ShiftLeft<bits - 3>(imm0);
+
+  V swap_sign_bit_sin = BitCast(d, imm0);
+
+  /* get the polynomial selection mask for the sine*/
+  imm2 = And(imm2, ci_2);
+  M poly_mask = RebindMask(d, Eq(imm2, ci_0));
+
+  /* The magic pass: "Extended precision modular arithmetic"
+  x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  x = MulAdd(y, DP1, x);
+  x = MulAdd(y, DP2, x);
+  x = MulAdd(y, DP3, x);
+
+  imm4 = Sub(imm4, ci_2);
+  imm4 = AndNot(imm4, ci_4);
+  imm4 = ShiftLeft<bits - 3>(imm4);
+
+  V sign_bit_cos = BitCast(d, imm4);
+
+  sign_bit_sin = Xor(sign_bit_sin, swap_sign_bit_sin);
+
+  /* Evaluate the first polynomial  (0 <= x <= Pi/4) */
+  V z = Mul(x, x);
+
+  y = MulAdd(cos_p0, z, cos_p1);
+  y = MulAdd(y, z, cos_p2);
+  y = Mul(y, z);
+  y = Mul(y, z);
+  y = NegMulAdd(z, Set(d, 0.5f), y);
+  y = Add(y, Set(d, 1));
+
+  /* Evaluate the second polynomial  (Pi/4 <= x <= 0) */
+  V y2 = MulAdd(sin_p0, z, sin_p1);
+  y2 = MulAdd(y2, z, sin_p2);
+  y2 = Mul(y2, z);
+  y2 = MulAdd(y2, x, x);
+
+  /* select the correct result from the two polynomials */
+  xmm1 = IfThenElse(poly_mask, y2, y);
+  xmm2 = IfThenElse(poly_mask, y, y2);
+
+  /* update the sign */
+  s = Xor(xmm1, sign_bit_sin);
+  c = Xor(xmm2, sign_bit_cos);
+}
+
+// Sixth degree poly
+template <class D, class V>
+HWY_INLINE void SinCos6(D d, TFromD<D> dp1, TFromD<D> dp2, TFromD<D> dp3, V x,
+                        V& s, V& c) {
+  using T = TFromD<D>;
+  using TI = MakeSigned<T>;
+  using DI = Rebind<TI, D>;
+  const DI di;
+  using VI = decltype(Zero(di));
+  using M = Mask<D>;
+
+  static constexpr size_t bits = sizeof(TI) * 8;
+  const VI sign_mask = SignBit(di);
+  const VI ci_0 = Zero(di);
+  const VI ci_1 = Set(di, 1);
+  const VI ci_2 = Set(di, 2);
+  const VI ci_4 = Set(di, 4);
+  const V cos_p0 = Set(d, T(-1.13585365213876817300E-11));
+  const V cos_p1 = Set(d, T(2.08757008419747316778E-9));
+  const V cos_p2 = Set(d, T(-2.75573141792967388112E-7));
+  const V cos_p3 = Set(d, T(2.48015872888517045348E-5));
+  const V cos_p4 = Set(d, T(-1.38888888888730564116E-3));
+  const V cos_p5 = Set(d, T(4.16666666666665929218E-2));
+  const V sin_p0 = Set(d, T(1.58962301576546568060E-10));
+  const V sin_p1 = Set(d, T(-2.50507477628578072866E-8));
+  const V sin_p2 = Set(d, T(2.75573136213857245213E-6));
+  const V sin_p3 = Set(d, T(-1.98412698295895385996E-4));
+  const V sin_p4 = Set(d, T(8.33333333332211858878E-3));
+  const V sin_p5 = Set(d, T(-1.66666666666666307295E-1));
+  const V FOPI = Set(d, T(1.2732395447351626861510701069801148));  // 4 / M_PI
+  const V DP1 = Set(d, dp1);
+  const V DP2 = Set(d, dp2);
+  const V DP3 = Set(d, dp3);
+
+  V xmm1, xmm2, sign_bit_sin, y;
+  VI imm0, imm2, imm4;
+
+  sign_bit_sin = x;
+  x = Abs(x);
+
+  /* extract the sign bit (upper one) */
+  sign_bit_sin = And(sign_bit_sin, BitCast(d, sign_mask));
+
+  /* scale by 4/Pi */
+  y = Mul(x, FOPI);
+
+  /* store the integer part of y in imm2 */
+  imm2 = ConvertTo(di, y);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  imm2 = Add(imm2, ci_1);
+  imm2 = AndNot(ci_1, imm2);
+
+  y = ConvertTo(d, imm2);
+  imm4 = imm2;
+
+  /* get the swap sign flag for the sine */
+  imm0 = And(imm2, ci_4);
+  imm0 = ShiftLeft<bits - 3>(imm0);
+
+  V swap_sign_bit_sin = BitCast(d, imm0);
+
+  /* get the polynomial selection mask for the sine*/
+  imm2 = And(imm2, ci_2);
+  M poly_mask = RebindMask(d, Eq(imm2, ci_0));
+
+  /* The magic pass: "Extended precision modular arithmetic"
+    x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  x = MulAdd(y, DP1, x);
+  x = MulAdd(y, DP2, x);
+  x = MulAdd(y, DP3, x);
+
+  imm4 = Sub(imm4, ci_2);
+  imm4 = AndNot(imm4, ci_4);
+  imm4 = ShiftLeft<bits - 3>(imm4);
+
+  V sign_bit_cos = BitCast(d, imm4);
+  sign_bit_sin = Xor(sign_bit_sin, swap_sign_bit_sin);
+
+  /* Evaluate the first polynomial  (0 <= x <= Pi/4) */
+  V z = Mul(x, x);
+
+  y = MulAdd(cos_p0, z, cos_p1);
+  y = MulAdd(y, z, cos_p2);
+  y = MulAdd(y, z, cos_p3);
+  y = MulAdd(y, z, cos_p4);
+  y = MulAdd(y, z, cos_p5);
+  y = Mul(y, z);
+  y = Mul(y, z);
+  y = NegMulAdd(z, Set(d, 0.5f), y);
+  y = Add(y, Set(d, 1.0f));
+
+  /* Evaluate the second polynomial  (Pi/4 <= x <= 0) */
+  V y2 = MulAdd(sin_p0, z, sin_p1);
+  y2 = MulAdd(y2, z, sin_p2);
+  y2 = MulAdd(y2, z, sin_p3);
+  y2 = MulAdd(y2, z, sin_p4);
+  y2 = MulAdd(y2, z, sin_p5);
+  y2 = Mul(y2, z);
+  y2 = MulAdd(y2, x, x);
+
+  /* select the correct result from the two polynomials */
+  xmm1 = IfThenElse(poly_mask, y2, y);
+  xmm2 = IfThenElse(poly_mask, y, y2);
+
+  /* update the sign */
+  s = Xor(xmm1, sign_bit_sin);
+  c = Xor(xmm2, sign_bit_cos);
+}
+
+template <>
+struct SinCosImpl<float> {
+  template <class D, class V>
+  HWY_INLINE void SinCos(D d, V x, V& s, V& c) {
+    SinCos3(d, -0.78515625f, -2.4187564849853515625e-4f,
+            -3.77489497744594108e-8f, x, s, c);
+  }
+};
+
+#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
+template <>
+struct SinCosImpl<double> {
+  template <class D, class V>
+  HWY_INLINE void SinCos(D d, V x, V& s, V& c) {
+    SinCos6(d, -7.85398125648498535156E-1, -3.77489470793079817668E-8,
+            -2.69515142907905952645E-15, x, s, c);
+  }
+};
+#endif
+
+}  // namespace impl
+
+template <class D, class V>
+HWY_INLINE V Acos(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kZero = Zero(d);
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kPi = Set(d, static_cast<T>(+3.14159265358979323846264));
+  const V kPiOverTwo = Set(d, static_cast<T>(+1.57079632679489661923132169));
+
+  const V sign_x = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign_x);
+  const auto mask = Lt(abs_x, kHalf);
+  const V yy =
+      IfThenElse(mask, Mul(abs_x, abs_x), NegMulAdd(abs_x, kHalf, kHalf));
+  const V y = IfThenElse(mask, abs_x, Sqrt(yy));
+
+  impl::AsinImpl<T> impl;
+  const V t = Mul(impl.AsinPoly(d, yy, y), Mul(y, yy));
+
+  const V t_plus_y = Add(t, y);
+  const V z =
+      IfThenElse(mask, Sub(kPiOverTwo, Add(Xor(y, sign_x), Xor(t, sign_x))),
+                 Add(t_plus_y, t_plus_y));
+  return IfThenElse(Or(mask, Ge(x, kZero)), z, Sub(kPi, z));
+}
+
+template <class D, class V>
+HWY_INLINE V Acosh(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kLarge = Set(d, static_cast<T>(268435456.0));
+  const V kLog2 = Set(d, static_cast<T>(0.693147180559945286227));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kTwo = Set(d, static_cast<T>(+2.0));
+
+  const auto is_x_large = Gt(x, kLarge);
+  const auto is_x_gt_2 = Gt(x, kTwo);
+
+  const V x_minus_1 = Sub(x, kOne);
+  const V y0 = MulSub(kTwo, x, Div(kOne, Add(Sqrt(MulSub(x, x, kOne)), x)));
+  const V y1 =
+      Add(Sqrt(MulAdd(x_minus_1, kTwo, Mul(x_minus_1, x_minus_1))), x_minus_1);
+  const V y2 =
+      IfThenElse(is_x_gt_2, IfThenElse(is_x_large, x, y0), Add(y1, kOne));
+  const V z = impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y2);
+
+  const auto is_pole = Eq(y2, kOne);
+  const auto divisor = Sub(IfThenZeroElse(is_pole, y2), kOne);
+  return Add(IfThenElse(is_x_gt_2, z,
+                        IfThenElse(is_pole, y1, Div(Mul(z, y1), divisor))),
+             IfThenElseZero(is_x_large, kLog2));
+}
+
+template <class D, class V>
+HWY_INLINE V Asin(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kTwo = Set(d, static_cast<T>(+2.0));
+  const V kPiOverTwo = Set(d, static_cast<T>(+1.57079632679489661923132169));
+
+  const V sign_x = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign_x);
+  const auto mask = Lt(abs_x, kHalf);
+  const V yy =
+      IfThenElse(mask, Mul(abs_x, abs_x), NegMulAdd(abs_x, kHalf, kHalf));
+  const V y = IfThenElse(mask, abs_x, Sqrt(yy));
+
+  impl::AsinImpl<T> impl;
+  const V z0 = MulAdd(impl.AsinPoly(d, yy, y), Mul(yy, y), y);
+  const V z1 = NegMulAdd(z0, kTwo, kPiOverTwo);
+  return Or(IfThenElse(mask, z0, z1), sign_x);
+}
+
+template <class D, class V>
+HWY_INLINE V Asinh(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kSmall = Set(d, static_cast<T>(1.0 / 268435456.0));
+  const V kLarge = Set(d, static_cast<T>(268435456.0));
+  const V kLog2 = Set(d, static_cast<T>(0.693147180559945286227));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kTwo = Set(d, static_cast<T>(+2.0));
+
+  const V sign_x = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign_x);
+
+  const auto is_x_large = Gt(abs_x, kLarge);
+  const auto is_x_lt_2 = Lt(abs_x, kTwo);
+
+  const V x2 = Mul(x, x);
+  const V sqrt_x2_plus_1 = Sqrt(Add(x2, kOne));
+
+  const V y0 = MulAdd(abs_x, kTwo, Div(kOne, Add(sqrt_x2_plus_1, abs_x)));
+  const V y1 = Add(Div(x2, Add(sqrt_x2_plus_1, kOne)), abs_x);
+  const V y2 =
+      IfThenElse(is_x_lt_2, Add(y1, kOne), IfThenElse(is_x_large, abs_x, y0));
+  const V z = impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y2);
+
+  const auto is_pole = Eq(y2, kOne);
+  const auto divisor = Sub(IfThenZeroElse(is_pole, y2), kOne);
+  const auto large = IfThenElse(is_pole, y1, Div(Mul(z, y1), divisor));
+  const V y = IfThenElse(Lt(abs_x, kSmall), x, large);
+  return Or(Add(IfThenElse(is_x_lt_2, y, z), IfThenElseZero(is_x_large, kLog2)),
+            sign_x);
+}
+
+template <class D, class V>
+HWY_INLINE V Atan(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kPiOverTwo = Set(d, static_cast<T>(+1.57079632679489661923132169));
+
+  const V sign = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign);
+  const auto mask = Gt(abs_x, kOne);
+
+  impl::AtanImpl<T> impl;
+  const auto divisor = IfThenElse(mask, abs_x, kOne);
+  const V y = impl.AtanPoly(d, IfThenElse(mask, Div(kOne, divisor), abs_x));
+  return Or(IfThenElse(mask, Sub(kPiOverTwo, y), y), sign);
+}
+
+template <class D, class V>
+HWY_INLINE V Atanh(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  return Mul(Log1p(d, Div(Add(abs_x, abs_x), Sub(kOne, abs_x))),
+             Xor(kHalf, sign));
+}
+
+template <class D, class V>
+HWY_INLINE V Cos(const D d, V x) {
+  using T = TFromD<D>;
+  impl::CosSinImpl<T> impl;
+
+  // Float Constants
+  const V kOneOverPi = Set(d, static_cast<T>(0.31830988618379067153));
+
+  // Integer Constants
+  const Rebind<int32_t, D> di32;
+  using VI32 = decltype(Zero(di32));
+  const VI32 kOne = Set(di32, 1);
+
+  const V y = Abs(x);  // cos(x) == cos(|x|)
+
+  // Compute the quadrant, q = int(|x| / pi) * 2 + 1
+  const VI32 q = Add(ShiftLeft<1>(impl.ToInt32(d, Mul(y, kOneOverPi))), kOne);
+
+  // Reduce range, apply sign, and approximate.
+  return impl.Poly(
+      d, Xor(impl.CosReduce(d, y, q), impl.CosSignFromQuadrant(d, q)));
+}
+
+template <class D, class V>
+HWY_INLINE V Exp(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kLowerBound =
+      Set(d, static_cast<T>((sizeof(T) == 4 ? -104.0 : -1000.0)));
+  const V kNegZero = Set(d, static_cast<T>(-0.0));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kOneOverLog2 = Set(d, static_cast<T>(+1.442695040888963407359924681));
+
+  impl::ExpImpl<T> impl;
+
+  // q = static_cast<int32>((x / log(2)) + ((x < 0) ? -0.5 : +0.5))
+  const auto q =
+      impl.ToInt32(d, MulAdd(x, kOneOverLog2, Or(kHalf, And(x, kNegZero))));
+
+  // Reduce, approximate, and then reconstruct.
+  const V y = impl.LoadExpShortRange(
+      d, Add(impl.ExpPoly(d, impl.ExpReduce(d, x, q)), kOne), q);
+  return IfThenElseZero(Ge(x, kLowerBound), y);
+}
+
+template <class D, class V>
+HWY_INLINE V Expm1(const D d, V x) {
+  using T = TFromD<D>;
+
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kLowerBound =
+      Set(d, static_cast<T>((sizeof(T) == 4 ? -104.0 : -1000.0)));
+  const V kLn2Over2 = Set(d, static_cast<T>(+0.346573590279972654708616));
+  const V kNegOne = Set(d, static_cast<T>(-1.0));
+  const V kNegZero = Set(d, static_cast<T>(-0.0));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kOneOverLog2 = Set(d, static_cast<T>(+1.442695040888963407359924681));
+
+  impl::ExpImpl<T> impl;
+
+  // q = static_cast<int32>((x / log(2)) + ((x < 0) ? -0.5 : +0.5))
+  const auto q =
+      impl.ToInt32(d, MulAdd(x, kOneOverLog2, Or(kHalf, And(x, kNegZero))));
+
+  // Reduce, approximate, and then reconstruct.
+  const V y = impl.ExpPoly(d, impl.ExpReduce(d, x, q));
+  const V z = IfThenElse(Lt(Abs(x), kLn2Over2), y,
+                         Sub(impl.LoadExpShortRange(d, Add(y, kOne), q), kOne));
+  return IfThenElse(Lt(x, kLowerBound), kNegOne, z);
+}
+
+template <class D, class V>
+HWY_INLINE V Log(const D d, V x) {
+  return impl::Log<D, V, /*kAllowSubnormals=*/true>(d, x);
+}
+
+template <class D, class V>
+HWY_INLINE V Log10(const D d, V x) {
+  using T = TFromD<D>;
+  return Mul(Log(d, x), Set(d, static_cast<T>(0.4342944819032518276511)));
+}
+
+template <class D, class V>
+HWY_INLINE V Log1p(const D d, V x) {
+  using T = TFromD<D>;
+  const V kOne = Set(d, static_cast<T>(+1.0));
+
+  const V y = Add(x, kOne);
+  const auto is_pole = Eq(y, kOne);
+  const auto divisor = Sub(IfThenZeroElse(is_pole, y), kOne);
+  const auto non_pole =
+      Mul(impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y), Div(x, divisor));
+  return IfThenElse(is_pole, x, non_pole);
+}
+
+template <class D, class V>
+HWY_INLINE V Log2(const D d, V x) {
+  using T = TFromD<D>;
+  return Mul(Log(d, x), Set(d, static_cast<T>(1.44269504088896340735992)));
+}
+
+template <class D, class V>
+HWY_INLINE V Sin(const D d, V x) {
+  using T = TFromD<D>;
+  impl::CosSinImpl<T> impl;
+
+  // Float Constants
+  const V kOneOverPi = Set(d, static_cast<T>(0.31830988618379067153));
+  const V kHalf = Set(d, static_cast<T>(0.5));
+
+  // Integer Constants
+  const Rebind<int32_t, D> di32;
+  using VI32 = decltype(Zero(di32));
+
+  const V abs_x = Abs(x);
+  const V sign_x = Xor(abs_x, x);
+
+  // Compute the quadrant, q = int((|x| / pi) + 0.5)
+  const VI32 q = impl.ToInt32(d, MulAdd(abs_x, kOneOverPi, kHalf));
+
+  // Reduce range, apply sign, and approximate.
+  return impl.Poly(d, Xor(impl.SinReduce(d, abs_x, q),
+                          Xor(impl.SinSignFromQuadrant(d, q), sign_x)));
+}
+
+template <class D, class V>
+HWY_INLINE V Sinh(const D d, V x) {
+  using T = TFromD<D>;
+  const V kHalf = Set(d, static_cast<T>(+0.5));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kTwo = Set(d, static_cast<T>(+2.0));
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  const V y = Expm1(d, abs_x);
+  const V z = Mul(Div(Add(y, kTwo), Add(y, kOne)), Mul(y, kHalf));
+  return Xor(z, sign);  // Reapply the sign bit
+}
+
+template <class D, class V>
+HWY_INLINE V Tanh(const D d, V x) {
+  using T = TFromD<D>;
+  const V kLimit = Set(d, static_cast<T>(18.714973875));
+  const V kOne = Set(d, static_cast<T>(+1.0));
+  const V kTwo = Set(d, static_cast<T>(+2.0));
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  const V y = Expm1(d, Mul(abs_x, kTwo));
+  const V z = IfThenElse(Gt(abs_x, kLimit), kOne, Div(y, Add(y, kTwo)));
+  return Xor(z, sign);  // Reapply the sign bit
+}
+
+template <class D, class V>
+HWY_INLINE void SinCos(const D d, V x, V& s, V& c) {
+  using T = TFromD<D>;
+  impl::SinCosImpl<T> impl;
+  impl.SinCos(d, x, s, c);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math_test.cc
new file mode 100644
index 0000000000..6de8374309
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/math/math_test.cc
@@ -0,0 +1,441 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <cfloat>  // FLT_MAX
+#include <cmath>   // std::abs
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/math/math_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/math/math-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// We have had test failures caused by excess precision due to keeping
+// intermediate results in 80-bit x87 registers. One such failure mode is that
+// Log1p computes a 1.0 which is not exactly equal to 1.0f, causing is_pole to
+// incorrectly evaluate to false.
+#undef HWY_MATH_TEST_EXCESS_PRECISION
+#if HWY_ARCH_X86_32 && HWY_COMPILER_GCC_ACTUAL && \
+    (HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_EMU128)
+
+// On 32-bit x86 with GCC 13+, build with `-fexcess-precision=standard` - see
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323.
+#if HWY_COMPILER_GCC_ACTUAL >= 1300
+
+#if FLT_EVAL_METHOD == 0  // correct flag given, no problem
+#define HWY_MATH_TEST_EXCESS_PRECISION 0
+#else
+#define HWY_MATH_TEST_EXCESS_PRECISION 1
+#pragma message( \
+    "Skipping scalar math_test on 32-bit x86 GCC 13+ without -fexcess-precision=standard")
+#endif  // FLT_EVAL_METHOD
+
+#else                  // HWY_COMPILER_GCC_ACTUAL < 1300
+
+// On 32-bit x86 with GCC <13, set HWY_CMAKE_SSE2 - see
+// https://stackoverflow.com/questions/20869904/c-handling-of-excess-precision .
+#if defined(__SSE2__)  // correct flag given, no problem
+#define HWY_MATH_TEST_EXCESS_PRECISION 0
+#else
+#define HWY_MATH_TEST_EXCESS_PRECISION 1
+#pragma message( \
+    "Skipping scalar math_test on 32-bit x86 GCC <13 without HWY_CMAKE_SSE2")
+#endif  // defined(__SSE2__)
+
+#endif  // HWY_COMPILER_GCC_ACTUAL
+#else   // not (x86-32, GCC, scalar target): running math_test normally
+#define HWY_MATH_TEST_EXCESS_PRECISION 0
+#endif  // HWY_ARCH_X86_32 etc
+
+template <class Out, class In>
+inline Out BitCast(const In& in) {
+  static_assert(sizeof(Out) == sizeof(In), "");
+  Out out;
+  CopyBytes<sizeof(out)>(&in, &out);
+  return out;
+}
+
+template <class T, class D>
+HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T),
+                           Vec<D> (*fxN)(D, VecArg<Vec<D>>), D d, T min, T max,
+                           uint64_t max_error_ulp) {
+  if (HWY_MATH_TEST_EXCESS_PRECISION) {
+    static bool once = true;
+    if (once) {
+      once = false;
+      fprintf(stderr,
+              "Skipping math_test due to GCC issue with excess precision.\n");
+    }
+  }
+
+  using UintT = MakeUnsigned<T>;
+
+  const UintT min_bits = BitCast<UintT>(min);
+  const UintT max_bits = BitCast<UintT>(max);
+
+  // If min is negative and max is positive, the range needs to be broken into
+  // two pieces, [+0, max] and [-0, min], otherwise [min, max].
+  int range_count = 1;
+  UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}};
+  if ((min < 0.0) && (max > 0.0)) {
+    ranges[0][0] = BitCast<UintT>(static_cast<T>(+0.0));
+    ranges[0][1] = max_bits;
+    ranges[1][0] = BitCast<UintT>(static_cast<T>(-0.0));
+    ranges[1][1] = min_bits;
+    range_count = 2;
+  }
+
+  uint64_t max_ulp = 0;
+  // Emulation is slower, so cannot afford as many.
+  constexpr UintT kSamplesPerRange = static_cast<UintT>(AdjustedReps(4000));
+  for (int range_index = 0; range_index < range_count; ++range_index) {
+    const UintT start = ranges[range_index][0];
+    const UintT stop = ranges[range_index][1];
+    const UintT step = HWY_MAX(1, ((stop - start) / kSamplesPerRange));
+    for (UintT value_bits = start; value_bits <= stop; value_bits += step) {
+      // For reasons unknown, the HWY_MAX is necessary on RVV, otherwise
+      // value_bits can be less than start, and thus possibly NaN.
+      const T value = BitCast<T>(HWY_MIN(HWY_MAX(start, value_bits), stop));
+      const T actual = GetLane(fxN(d, Set(d, value)));
+      const T expected = fx1(value);
+
+      // Skip small inputs and outputs on armv7, it flushes subnormals to zero.
+#if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7
+      if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) {
+        continue;
+      }
+#endif
+
+      const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected);
+      max_ulp = HWY_MAX(max_ulp, ulp);
+      if (ulp > max_error_ulp) {
+        fprintf(stderr, "%s: %s(%f) expected %f actual %f ulp %g max ulp %u\n",
+                hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected,
+                actual, static_cast<double>(ulp),
+                static_cast<uint32_t>(max_error_ulp));
+      }
+    }
+  }
+  fprintf(stderr, "%s: %s max_ulp %g\n", hwy::TypeName(T(), Lanes(d)).c_str(),
+          name, static_cast<double>(max_ulp));
+  HWY_ASSERT(max_ulp <= max_error_ulp);
+}
+
+#define DEFINE_MATH_TEST_FUNC(NAME)                     \
+  HWY_NOINLINE void TestAll##NAME() {                   \
+    ForFloat3264Types(ForPartialVectors<Test##NAME>()); \
+  }
+
+#undef DEFINE_MATH_TEST
+#define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \
+                         F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR)       \
+  struct Test##NAME {                                                     \
+    template <class T, class D>                                           \
+    HWY_NOINLINE void operator()(T, D d) {                                \
+      if (sizeof(T) == 4) {                                               \
+        TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX,  \
+                       F32_ERROR);                                        \
+      } else {                                                            \
+        TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d,                    \
+                       static_cast<T>(F64_MIN), static_cast<T>(F64_MAX),  \
+                       F64_ERROR);                                        \
+      }                                                                   \
+    }                                                                     \
+  };                                                                      \
+  DEFINE_MATH_TEST_FUNC(NAME)
+
+// Floating point values closest to but less than 1.0
+const float kNearOneF = BitCast<float>(0x3F7FFFFF);
+const double kNearOneD = BitCast<double>(0x3FEFFFFFFFFFFFFFULL);
+
+// The discrepancy is unacceptably large for MSYS2 (less accurate libm?), so
+// only increase the error tolerance there.
+constexpr uint64_t Cos64ULP() {
+#if defined(__MINGW32__)
+  return 23;
+#else
+  return 3;
+#endif
+}
+
+constexpr uint64_t ACosh32ULP() {
+#if defined(__MINGW32__)
+  return 8;
+#else
+  return 3;
+#endif
+}
+
+template <class D>
+static Vec<D> SinCosSin(const D d, VecArg<Vec<D>> x) {
+  Vec<D> s, c;
+  SinCos(d, x, s, c);
+  return s;
+}
+
+template <class D>
+static Vec<D> SinCosCos(const D d, VecArg<Vec<D>> x) {
+  Vec<D> s, c;
+  SinCos(d, x, s, c);
+  return c;
+}
+
+// on targets without FMA the result is less inaccurate
+constexpr uint64_t SinCosSin32ULP() {
+#if !(HWY_NATIVE_FMA)
+  return 256;
+#else
+  return 3;
+#endif
+}
+
+constexpr uint64_t SinCosCos32ULP() {
+#if !(HWY_NATIVE_FMA)
+  return 64;
+#else
+  return 3;
+#endif
+}
+
+// clang-format off
+DEFINE_MATH_TEST(Acos,
+  std::acos,  CallAcos,  -1.0f,      +1.0f,       3,  // NEON is 3 instead of 2
+  std::acos,  CallAcos,  -1.0,       +1.0,        2)
+DEFINE_MATH_TEST(Acosh,
+  std::acosh, CallAcosh, +1.0f,      +FLT_MAX,    ACosh32ULP(),
+  std::acosh, CallAcosh, +1.0,       +DBL_MAX,    3)
+DEFINE_MATH_TEST(Asin,
+  std::asin,  CallAsin,  -1.0f,      +1.0f,       4,  // 4 ulp on Armv7, not 2
+  std::asin,  CallAsin,  -1.0,       +1.0,        2)
+DEFINE_MATH_TEST(Asinh,
+  std::asinh, CallAsinh, -FLT_MAX,   +FLT_MAX,    3,
+  std::asinh, CallAsinh, -DBL_MAX,   +DBL_MAX,    3)
+DEFINE_MATH_TEST(Atan,
+  std::atan,  CallAtan,  -FLT_MAX,   +FLT_MAX,    3,
+  std::atan,  CallAtan,  -DBL_MAX,   +DBL_MAX,    3)
+DEFINE_MATH_TEST(Atanh,
+  std::atanh, CallAtanh, -kNearOneF, +kNearOneF,  4,  // NEON is 4 instead of 3
+  std::atanh, CallAtanh, -kNearOneD, +kNearOneD,  3)
+DEFINE_MATH_TEST(Cos,
+  std::cos,   CallCos,   -39000.0f,  +39000.0f,   3,
+  std::cos,   CallCos,   -39000.0,   +39000.0,    Cos64ULP())
+DEFINE_MATH_TEST(Exp,
+  std::exp,   CallExp,   -FLT_MAX,   +104.0f,     1,
+  std::exp,   CallExp,   -DBL_MAX,   +104.0,      1)
+DEFINE_MATH_TEST(Expm1,
+  std::expm1, CallExpm1, -FLT_MAX,   +104.0f,     4,
+  std::expm1, CallExpm1, -DBL_MAX,   +104.0,      4)
+DEFINE_MATH_TEST(Log,
+  std::log,   CallLog,   +FLT_MIN,   +FLT_MAX,    1,
+  std::log,   CallLog,   +DBL_MIN,   +DBL_MAX,    1)
+DEFINE_MATH_TEST(Log10,
+  std::log10, CallLog10, +FLT_MIN,   +FLT_MAX,    2,
+  std::log10, CallLog10, +DBL_MIN,   +DBL_MAX,    2)
+DEFINE_MATH_TEST(Log1p,
+  std::log1p, CallLog1p, +0.0f,      +1e37f,      3,  // NEON is 3 instead of 2
+  std::log1p, CallLog1p, +0.0,       +DBL_MAX,    2)
+DEFINE_MATH_TEST(Log2,
+  std::log2,  CallLog2,  +FLT_MIN,   +FLT_MAX,    2,
+  std::log2,  CallLog2,  +DBL_MIN,   +DBL_MAX,    2)
+DEFINE_MATH_TEST(Sin,
+  std::sin,   CallSin,   -39000.0f,  +39000.0f,   3,
+  std::sin,   CallSin,   -39000.0,   +39000.0,    4)  // MSYS is 4 instead of 3
+DEFINE_MATH_TEST(Sinh,
+  std::sinh,  CallSinh,  -80.0f,     +80.0f,      4,
+  std::sinh,  CallSinh,  -709.0,     +709.0,      4)
+DEFINE_MATH_TEST(Tanh,
+  std::tanh,  CallTanh,  -FLT_MAX,   +FLT_MAX,    4,
+  std::tanh,  CallTanh,  -DBL_MAX,   +DBL_MAX,    4)
+DEFINE_MATH_TEST(SinCosSin,
+  std::sin,   SinCosSin,   -39000.0f,  +39000.0f,   SinCosSin32ULP(),
+  std::sin,   SinCosSin,   -39000.0,   +39000.0,    1)
+DEFINE_MATH_TEST(SinCosCos,
+  std::cos,   SinCosCos,   -39000.0f,  +39000.0f,   SinCosCos32ULP(),
+  std::cos,   SinCosCos,   -39000.0,   +39000.0,    1)
+// clang-format on
+
+template <typename T, class D>
+void Atan2TestCases(T /*unused*/, D d, size_t& padded,
+                    AlignedFreeUniquePtr<T[]>& out_y,
+                    AlignedFreeUniquePtr<T[]>& out_x,
+                    AlignedFreeUniquePtr<T[]>& out_expected) {
+  struct YX {
+    T y;
+    T x;
+    T expected;
+  };
+  const T pos = static_cast<T>(1E5);
+  const T neg = static_cast<T>(-1E7);
+  // T{-0} is not enough to get an actual negative zero.
+  const T n0 = static_cast<T>(-0.0);
+  const T inf = GetLane(Inf(d));
+  const T nan = GetLane(NaN(d));
+
+  const T pi = static_cast<T>(3.141592653589793238);
+  const YX test_cases[] = {                                  // 45 degree steps:
+                           {T{0.0}, T{1.0}, T{0}},           // E
+                           {T{-1.0}, T{1.0}, -pi / 4},       // SE
+                           {T{-1.0}, T{0.0}, -pi / 2},       // S
+                           {T{-1.0}, T{-1.0}, -3 * pi / 4},  // SW
+                           {T{0.0}, T{-1.0}, pi},            // W
+                           {T{1.0}, T{-1.0}, 3 * pi / 4},    // NW
+                           {T{1.0}, T{0.0}, pi / 2},         // N
+                           {T{1.0}, T{1.0}, pi / 4},         // NE
+
+                           // y = ±0, x < 0 or -0
+                           {T{0}, T{-1}, pi},
+                           {n0, T{-2}, -pi},
+                           // y = ±0, x > 0 or +0
+                           {T{0}, T{2}, T{0}},
+                           {n0, T{2}, n0},
+                           // y = ±∞, x finite
+                           {inf, T{3}, pi / 2},
+                           {-inf, T{3}, -pi / 2},
+                           // y = ±∞, x = -∞
+                           {inf, -inf, 3 * pi / 4},
+                           {-inf, -inf, -3 * pi / 4},
+                           // y = ±∞, x = +∞
+                           {inf, inf, pi / 4},
+                           {-inf, inf, -pi / 4},
+                           // y < 0, x = ±0
+                           {T{-2}, T{0}, -pi / 2},
+                           {T{-1}, n0, -pi / 2},
+                           // y > 0, x = ±0
+                           {pos, T{0}, pi / 2},
+                           {T{4}, n0, pi / 2},
+                           // finite y > 0, x = -∞
+                           {pos, -inf, pi},
+                           // finite y < 0, x = -∞
+                           {neg, -inf, -pi},
+                           // finite y > 0, x = +∞
+                           {pos, inf, T{0}},
+                           // finite y < 0, x = +∞
+                           {neg, inf, n0},
+                           // y NaN xor x NaN
+                           {nan, T{0}, nan},
+                           {pos, nan, nan}};
+  const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
+  const size_t N = Lanes(d);
+  padded = RoundUpTo(kNumTestCases, N);  // allow loading whole vectors
+  out_y = AllocateAligned<T>(padded);
+  out_x = AllocateAligned<T>(padded);
+  out_expected = AllocateAligned<T>(padded);
+  HWY_ASSERT(out_y && out_x);
+  size_t i = 0;
+  for (; i < kNumTestCases; ++i) {
+    out_y[i] = test_cases[i].y;
+    out_x[i] = test_cases[i].x;
+    out_expected[i] = test_cases[i].expected;
+  }
+  for (; i < padded; ++i) {
+    out_y[i] = T{0};
+    out_x[i] = T{0};
+    out_expected[i] = T{0};
+  }
+}
+
+struct TestAtan2 {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    const size_t N = Lanes(d);
+
+    size_t padded;
+    AlignedFreeUniquePtr<T[]> in_y, in_x, expected;
+    Atan2TestCases(t, d, padded, in_y, in_x, expected);
+
+    const Vec<D> tolerance = Set(d, T(1E-5));
+
+    for (size_t i = 0; i < padded; ++i) {
+      const T actual = static_cast<T>(atan2(in_y[i], in_x[i]));
+      // fprintf(stderr, "%zu: table %f atan2 %f\n", i, expected[i], actual);
+      HWY_ASSERT_EQ(expected[i], actual);
+    }
+    for (size_t i = 0; i < padded; i += N) {
+      const Vec<D> y = Load(d, &in_y[i]);
+      const Vec<D> x = Load(d, &in_x[i]);
+#if HWY_ARCH_ARM_A64
+      // TODO(b/287462770): inline to work around incorrect SVE codegen
+      const Vec<D> actual = Atan2(d, y, x);
+#else
+      const Vec<D> actual = CallAtan2(d, y, x);
+#endif
+      const Vec<D> vexpected = Load(d, &expected[i]);
+
+      const Mask<D> exp_nan = IsNaN(vexpected);
+      const Mask<D> act_nan = IsNaN(actual);
+      HWY_ASSERT_MASK_EQ(d, exp_nan, act_nan);
+
+      // If not NaN, then compare with tolerance
+      const Mask<D> ge = Ge(actual, Sub(vexpected, tolerance));
+      const Mask<D> le = Le(actual, Add(vexpected, tolerance));
+      const Mask<D> ok = Or(act_nan, And(le, ge));
+      if (!AllTrue(d, ok)) {
+        const size_t mismatch =
+            static_cast<size_t>(FindKnownFirstTrue(d, Not(ok)));
+        fprintf(stderr, "Mismatch for i=%d expected %f actual %f\n",
+                static_cast<int>(i + mismatch), expected[i + mismatch],
+                ExtractLane(actual, mismatch));
+        HWY_ASSERT(0);
+      }
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllAtan2() {
+  if (HWY_MATH_TEST_EXCESS_PRECISION) return;
+
+  ForFloat3264Types(ForPartialVectors<TestAtan2>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(HwyMathTest);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcos);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcosh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsin);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsinh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtan);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtanh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllCos);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExpm1);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog10);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog1p);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog2);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSin);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllTanh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtan2);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosSin);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosCos);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/BUILD b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/BUILD
new file mode 100644
index 0000000000..7debef63a3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/BUILD
@@ -0,0 +1,215 @@
+package(
+    default_applicable_licenses = ["//:license"],
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+# Unused on Bazel builds, where this is not defined/known; Copybara replaces
+# usages with an empty list.
+COMPAT = [
+    "//buildenv/target:non_prod",  # includes mobile/vendor.
+]
+
+cc_library(
+    name = "intel",
+    # hdrs = select({
+    #     "//third_party/bazel_platforms/cpu:x86_64": [
+    #        "avx512-16bit-common.h",
+    #        "avx512-16bit-qsort.hpp",
+    #        "avx512-32bit-qsort.hpp",
+    #        "avx512-64bit-common.h",
+    #        "avx512-64bit-qsort.hpp",
+    #        "avx512-common-qsort.h",
+    #     ],
+    #     "//conditions:default": [],
+    # }),
+    compatible_with = [],
+)
+
+cc_library(
+    name = "vxsort",
+    srcs = [
+        # "vxsort/isa_detection.cpp",
+        # "vxsort/isa_detection_msvc.cpp",
+        # "vxsort/isa_detection_sane.cpp",
+        # "vxsort/machine_traits.avx2.cpp",
+        # "vxsort/smallsort/avx2_load_mask_tables.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.double.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.float.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.uint32_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX2.uint64_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.double.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.float.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.uint32_t.generated.cpp",
+        # "vxsort/smallsort/bitonic_sort.AVX512.uint64_t.generated.cpp",
+        # "vxsort/vxsort_stats.cpp",
+    ],
+    hdrs = [
+        # "vxsort/alignment.h",
+        # "vxsort/defs.h",
+        # "vxsort/isa_detection.h",
+        # "vxsort/machine_traits.avx2.h",
+        # "vxsort/machine_traits.avx512.h",
+        # "vxsort/machine_traits.h",
+        # "vxsort/packer.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.double.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.float.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.uint32_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX2.uint64_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.double.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.float.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.uint32_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.AVX512.uint64_t.generated.h",
+        # "vxsort/smallsort/bitonic_sort.h",
+        # "vxsort/vxsort.h",
+        # "vxsort/vxsort_stats.h",
+    ],
+    compatible_with = [],
+    textual_hdrs = [
+        # "vxsort/vxsort_targets_disable.h",
+        # "vxsort/vxsort_targets_enable_avx2.h",
+        # "vxsort/vxsort_targets_enable_avx512.h",
+    ],
+)
+
+cc_library(
+    name = "vqsort",
+    srcs = [
+        # Split into separate files to reduce MSVC build time.
+        "vqsort.cc",
+        "vqsort_128a.cc",
+        "vqsort_128d.cc",
+        "vqsort_f16a.cc",
+        "vqsort_f16d.cc",
+        "vqsort_f32a.cc",
+        "vqsort_f32d.cc",
+        "vqsort_f64a.cc",
+        "vqsort_f64d.cc",
+        "vqsort_i16a.cc",
+        "vqsort_i16d.cc",
+        "vqsort_i32a.cc",
+        "vqsort_i32d.cc",
+        "vqsort_i64a.cc",
+        "vqsort_i64d.cc",
+        "vqsort_kv64a.cc",
+        "vqsort_kv64d.cc",
+        "vqsort_kv128a.cc",
+        "vqsort_kv128d.cc",
+        "vqsort_u16a.cc",
+        "vqsort_u16d.cc",
+        "vqsort_u32a.cc",
+        "vqsort_u32d.cc",
+        "vqsort_u64a.cc",
+        "vqsort_u64d.cc",
+    ],
+    hdrs = [
+        "order.h",  # part of public interface, included by vqsort.h
+        "vqsort.h",  # public interface
+    ],
+    compatible_with = [],
+    local_defines = ["hwy_contrib_EXPORTS"],
+    textual_hdrs = [
+        "shared-inl.h",
+        "sorting_networks-inl.h",
+        "traits-inl.h",
+        "traits128-inl.h",
+        "vqsort-inl.h",
+        # Placeholder for internal instrumentation. Do not remove.
+    ],
+    deps = [
+        ":intel",  # required if HAVE_INTEL
+        ":vxsort",  # required if HAVE_VXSORT
+        "//:algo",
+        "//:hwy",
+    ],
+)
+
+# -----------------------------------------------------------------------------
+# Internal-only targets
+
+cc_library(
+    name = "helpers",
+    testonly = 1,
+    textual_hdrs = [
+        "algo-inl.h",
+        "result-inl.h",
+    ],
+    deps = [
+        ":vqsort",
+        "//:nanobenchmark",
+        # Required for HAVE_PDQSORT, but that is unused and this is
+        # unavailable to Bazel builds, hence commented out.
+        # "//third_party/boost/allowed",
+        # Avoid ips4o and thus TBB to work around hwloc build failure.
+    ],
+)
+
+cc_binary(
+    name = "print_network",
+    testonly = 1,
+    srcs = ["print_network.cc"],
+    deps = [
+        ":helpers",
+        ":vqsort",
+        "//:hwy",
+    ],
+)
+
+cc_test(
+    name = "sort_test",
+    size = "medium",
+    srcs = ["sort_test.cc"],
+    # Do not enable fully_static_link (pthread crash on bazel)
+    local_defines = ["HWY_IS_TEST"],
+    # for test_suite.
+    tags = ["hwy_ops_test"],
+    deps = [
+        ":helpers",
+        ":vqsort",
+        "@com_google_googletest//:gtest_main",
+        "//:hwy",
+        "//:hwy_test_util",
+    ],
+)
+
+cc_test(
+    name = "bench_sort",
+    size = "medium",
+    srcs = ["bench_sort.cc"],
+    # Do not enable fully_static_link (pthread crash on bazel)
+    local_defines = ["HWY_IS_TEST"],
+    # for test_suite.
+    tags = ["hwy_ops_test"],
+    deps = [
+        ":helpers",
+        ":vqsort",
+        "@com_google_googletest//:gtest_main",
+        "//:hwy",
+        "//:hwy_test_util",
+        "//:nanobenchmark",
+    ],
+)
+
+cc_binary(
+    name = "bench_parallel",
+    testonly = 1,
+    srcs = ["bench_parallel.cc"],
+    # Do not enable fully_static_link (pthread crash on bazel)
+    local_defines = ["HWY_IS_TEST"],
+    deps = [
+        ":helpers",
+        ":vqsort",
+        "@com_google_googletest//:gtest_main",
+        "//:hwy",
+        "//:hwy_test_util",
+    ],
+)
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/README.md b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/README.md
new file mode 100644
index 0000000000..800d2cfcda
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/README.md
@@ -0,0 +1,361 @@
+# Vectorized and performance-portable Quicksort
+
+## Introduction
+
+As of 2022-06-07 this sorts large arrays of built-in types about ten times as
+fast as LLVM's `std::sort`. Note that other algorithms such as pdqsort can be
+about twice as fast as LLVM's std::sort as of 2023-06.
+
+See also our
+[blog post](https://opensource.googleblog.com/2022/06/Vectorized%20and%20performance%20portable%20Quicksort.html)
+and [paper](https://arxiv.org/abs/2205.05982).
+
+## Instructions
+
+Here are instructions for reproducing our results with cross-platform CMake,
+Linux, or AWS (SVE, NEON).
+
+### CMake, any platform
+
+Please first ensure that Clang (tested with 13.0.1 and 15.0.6) is installed, and
+if it is not the default compiler, point the CC and CXX environment variables to
+it, e.g.
+
+```
+export CC=clang-15
+export CXX=clang++-15
+```
+
+Then run the usual CMake workflow, also documented in the Highway README, e.g.:
+
+```
+mkdir -p build && cd build && cmake .. && make -j
+taskset -c 2 tests/bench_sort
+```
+
+The optional `taskset -c 2` part reduces the variability of measurements by
+preventing the OS from migrating the benchmark between cores.
+
+### Linux
+
+Please first ensure golang, and Clang (tested with 13.0.1) are installed via
+your system's package manager.
+
+```
+go install github.com/bazelbuild/bazelisk@latest
+git clone https://github.com/google/highway
+cd highway
+CC=clang CXX=clang++ ~/go/bin/bazelisk build -c opt hwy/contrib/sort:all
+bazel-bin/hwy/contrib/sort/sort_test
+bazel-bin/hwy/contrib/sort/bench_sort
+```
+
+### AWS Graviton3
+
+Instance config: amazon linux 5.10 arm64, c7g.8xlarge (largest allowed config is
+32 vCPU). Initial launch will fail. Wait a few minutes for an email saying the
+config is verified, then re-launch. See IPv4 hostname in list of instances.
+
+`ssh -i /path/key.pem ec2-user@hostname`
+
+Note that the AWS CMake package is too old for llvm, so we build it first:
+```
+wget https://cmake.org/files/v3.23/cmake-3.23.2.tar.gz
+tar -xvzf cmake-3.23.2.tar.gz && cd cmake-3.23.2/
+./bootstrap -- -DCMAKE_USE_OPENSSL=OFF
+make -j8 && sudo make install
+cd ..
+```
+
+AWS clang is at version 11.1, which generates unnecessary `AND` instructions
+which slow down the sort by 1.15x. We tested with clang trunk as of June 13
+(which reports Git hash 8f6512fea000c3a0d394864bb94e524bee375069). To build:
+
+```
+git clone --depth 1 https://github.com/llvm/llvm-project.git
+cd llvm-project
+mkdir -p build && cd build
+/usr/local/bin/cmake ../llvm -DLLVM_ENABLE_PROJECTS="clang" -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" -DCMAKE_BUILD_TYPE=Release
+make -j32 && sudo make install
+```
+
+```
+sudo yum install go
+go install github.com/bazelbuild/bazelisk@latest
+git clone https://github.com/google/highway
+cd highway
+CC=/usr/local/bin/clang CXX=/usr/local/bin/clang++ ~/go/bin/bazelisk build -c opt --copt=-march=armv8.2-a+sve hwy/contrib/sort:all
+bazel-bin/hwy/contrib/sort/sort_test
+bazel-bin/hwy/contrib/sort/bench_sort
+```
+
+The above command line enables SVE, which is currently only available on
+Graviton 3. You can also test NEON on the same processor, or other Arm CPUs, by
+changing the `-march=` option to `--copt=-march=armv8.2-a+crypto`. Note that
+such flags will be unnecessary once Clang supports `#pragma target` for NEON and
+SVE intrinsics, as it does for x86.
+
+## Results
+
+`bench_sort` outputs the instruction set (AVX3 refers to AVX-512), the sort
+algorithm (std for `std::sort`, vq for our vqsort), the type of keys being
+sorted (f32 is float), the distribution of keys (uniform32 for uniform random
+with range 0-2^32), the number of keys, then the throughput of sorted keys (i.e.
+number of key bytes output per second).
+
+Example excerpt from Xeon 6154 (Skylake-X) CPU clocked at 3 GHz:
+
+```
+[ RUN      ] BenchSortGroup/BenchSort.BenchAllSort/AVX3
+      AVX3:          std:     f32: uniform32: 1.00E+06   54 MB/s ( 1 threads)
+      AVX3:           vq:     f32: uniform32: 1.00E+06 1143 MB/s ( 1 threads)
+```
+
+## Additional results
+
+Thanks to Lukas Bergdoll, who did a thorough [performance analysis](https://github.com/Voultapher/sort-research-rs/blob/main/writeup/intel_avx512/text.md)
+on various sort implementations. This helped us identify a performance bug,
+caused by obtaining entropy from the OS on each call. This was fixed in #1334
+and we look forward to the updated results.
+
+### Optimizations for small arrays
+
+Our initial focus was on large arrays. Since the VQSort paper was published,
+we have improved its performance for small arrays:
+
+-   Previously, each call to VQSort obtained entropy from the OS. Unpredictable
+    seeding does help avoid worst-cases, and the cost is negligible when the
+    input size is at least 100K elements. However, the overhead is very costly
+    for arrays of just 100 or 1000, so we now obtain entropy only once per
+    thread and cache the seeds in TLS. This significantly improves the
+    performance on subsequent calls. Users can also explicitly initialize the
+    random generator.
+
+-   We also improved the efficiency of our sorting network for inputs shorter
+    than half its size. Our approach avoids costly transposes by interpreting
+    inputs as a 2D matrix. Previously, we always used 16 rows, which means only
+    a single vector lane is active for up to 16 elements. We have added 8x2 and
+    8x4 networks which use more lanes when available, and also 4x1 and 8x1
+    networks for very small inputs.
+
+-   Previously we also loaded (overlapping) full vectors, with the offsets
+    determined by the number of columns. Now we use the minimum vector size
+    sufficient for the number of columns, which enables higher IPC on Skylake
+    and reduces the cost of unaligned loads.
+
+    Unfortunately this decreases code reuse; VQSort now consists of about 1500
+    instructions (https://gcc.godbolt.org/z/ojYKfjPe6). The size of sorting
+    networks has nearly doubled to 10.8 KiB, 70% of the total. Although large,
+    this still fits comfortably within 32 KiB instruction caches, and possibly
+    even in micro-op caches (DSB, 1500-2300 micro-ops), especially given that
+    not all instructions are guaranteed to execute.
+
+### Study of AVX-512 downclocking
+We study whether AVX-512 downclocking affects performance. Using the GHz
+reported by perf, we find an upper bound on the effects of downclocking, and
+observe that its effect is negligible when compared to scalar code.
+
+This issue has somehow attracted far more attention than seems warranted. An
+attempt by Daniel Lemire to measure the
+[worst-case](https://lemire.me/blog/2018/08/15/the-dangers-of-avx-512-throttling-a-3-impact/)
+only saw a **3% decrease**, and Intel CPUs since Icelake, as well as AMD Zen4,
+are much less impacted by throttling, if at all. By contrast, "Silver" and
+"Bronze" Intel Xeons have more severe throttling and would require a large(r)
+speedup from AVX-512 to outweigh the downclocking. However, these CPUs are
+marketed towards "entry compute, network and storage" and "small business and
+storage server solutions", and are thus less suitable for the high-performance
+workloads we consider.
+
+Our test workstation runs Linux (6.1.20-2rodete1-amd64) and has the same Xeon
+Gold 6154 CPU used in our paper because its Skylake microarchitecture is the
+most (potentially) affected. The compiler is a Clang similar to the LLVM trunk.
+
+We added a new 'cold' benchmark that initializes random seeds, fills an array
+with a constant except at one random index, calls VQSort, and then prints a
+random element to ensure the computations are not elided. To run it, we build
+bench_sort with `-DSORT_ONLY_COLD=1` and then invoke
+`taskset -c 6 setarch -R x86_64 perf stat -r 15 -d bench_sort`. The taskset and
+setarch serve to reduce variability by avoiding thread migration, and disabling
+address space randomization. `-r 15` requests 15 runs so that perf can display
+the variability of the measurements: < 1% for cycles, instructions, L1 dcache
+loads; LLC miss variability is much higher (> 10%) presumably due to the
+remaining background activity on this machine.
+
+For our measurements, we use the GHz value reported by `perf`. This does not
+include time spent in the kernel, and is thus noisy for short runtimes. Note
+that running `perf` under `sudo` is not an option because it results in
+"Workload failed: Cannot allocate memory". We see results between 2.6 - 2.9 GHz
+when running AVX-512 code. This is relative to 3.0 GHz nominal; we disabled
+Turbo Boost via MSR and ran `sudo cpupower frequency-set --governor performance`
+to prevent unnecessary frequency reductions. To the best of our knowledge, the
+remaining gap is explained by time spent in the kernel (in particular handling
+page faults) and downclocking. Thus an *upper-bound* for the latter is
+(3 - 2.9)/3 to (3 - 2.6)/3, or **1.03 - 1.13x**. Such a frequency reduction
+would already be negligible compared to the 2-4x increase in work per cycle from
+512-bit SIMD relative to 256 or 128-bit SIMD, which is typically less or not at
+all affected by downclocking.
+
+To further tighten this bound, we compare AVX-512 code vs. non-AVX-512 code, in
+the form of `std::sort`. Ensuring the remainder of the binary does not use
+AVX-512 is nontrivial. Library functions such as `memset` are known to use
+AVX-512, and they would not show up in a disassembly of our binary. Neither
+would they raise exceptions if run on a CPU lacking AVX-512 support, because
+software typically verifies CPU support before running AVX-512. As a first step,
+we take care to avoid calls to such library functions in our test, which is more
+feasible with a self-contained small binary. In particular, array
+zero-initialization typically compiles to `memset` (verified with clang-16), so
+we manually initialize the array to the return value of an `Unpredictable1`
+function whose implementation is not visible to the compiler. This indeed
+compiles to a scalar loop. To further increase confidence that the binary lacks
+AVX-512 instructions before VQSort, we replace the initialization loop with
+AVX-512 stores. This indeed raises the measured throughput from a fairly
+consistent 9 GB/s to 9-15 GB/s, likely because some of the AVX-512 startup now
+occurs outside of our timings. We examine this effect in the next section, but
+for now we can conclude that because adding AVX-512 makes a difference, the
+binary was otherwise not using it. Now we can revert to scalar initialization
+and compare the GHz reported for VQSort vs. `std::sort`. Across three runs, the
+ranges are 2.8-2.9 and 2.8-2.8 GHz. Thus we conclude: if there is any
+downclocking for a single core running AVX-512 on this Skylake-X CPU, the effect
+is **under the noise floor of our measurement**, and certainly far below any
+speedup one can reasonably predict from 512-bit SIMD. We expect this result to
+generalize to AMD Zen4 and any Gold/Platinum Intel Xeon.
+
+### Study of AVX-512 startup overhead
+
+In the previous section, we saw that downclocking is negligible on our system,
+but there is a noticeable benefit to warming up AVX-512 before the sort. To
+understand why, we refer to Travis Downs' excellent
+[measurements](https://travisdowns.github.io/blog/2020/01/17/avxfreq1.html#summary)
+of how Skylake reacts to an AVX-512 instruction: 8-20 us of reduced instruction
+throughput, an additional potential halt of 10 us, and then downclocking.
+Note that downclocking is negligible on a single core per the previous section.
+
+We choose the array length of 10K unsigned 64-bit keys such that VQSort
+completes in 7-10 us. Thus in this benchmark, VQSort (almost) finishes before
+AVX-512 is fully warmed up, and the speedup is reduced because the startup costs
+are amortized over relatively little data. Across five series of 15 runs, the
+average of average throughputs is 9.3 GB/s, implying a runtime of 8.6 us
+including startup costs.
+
+Note that the two-valued, almost all-equal input distribution is quite skewed.
+The above throughput does not reflect the performance attainable on other
+distributions, especially uniform random. However, this choice is deliberate
+because Quicksort can terminate early if all values in a partition are equal.
+When measuring such a 'best-case' input, we are more likely to observe the cost
+of startup overhead in surrounding code. Otherwise, this overhead might be
+hidden by the increase in sorting time.
+
+Now let us compare this throughput to the previously mentioned measurement with
+AVX-512 warmed up (via slow scatter instructions so that initialization takes
+about 100 us, well in excess of the warmup period): 15.2 GB/s, or 5.3 us without
+startup cost. It appears the 10 us halt is not happening, possibly because we do
+not use SIMD floating-point nor multiplication instructions. Thus we only
+experience reduced instruction throughput and/or increased latency. The ratio
+between cold and warmed-up time is only 1.6, which is plausible if the Skylake
+throttling is actually rounding latencies up to a multiple of four cycles, as
+Downs speculates. Indeed a large fraction of the SIMD instructions especially in
+the VQSort base case are cross-lane or 64-bit min/max operations with latencies
+of 3 cycles on Skylake, so their slowdown might only be 1.3x. The measured 1.6x
+could plausibly derive from 7/8 of 1.3x and 1/8 of 4x for single-cycle latency
+instructions.
+
+Assuming this understanding of AVX-512 startup cost is valid, how long does it
+remain active before the CPU reverts to the previous settings? The CPU cannot
+know what future instructions are coming, and to prevent unnecessary
+transitions, it has a hysteresis (delay after the last AVX-512 instruction
+before shutting down) which Downs measures as 680 us. Thus our benchmark
+subsequently sleeps for 100 ms to ensure the next run of the binary sees the
+original CPU state. Indeed we find for the five series that the slopes of the
+lines of best fit are negative in one case, positive in two, and flat in two,
+indicating there is no consistent pattern of benefit for earlier or later runs.
+
+What are the implications for users of VQSort? If the surrounding code executes
+an AVX-512 instruction at least every 500 us, then AVX-512 remains active and
+**any call to VQSort will benefit from it, no matter how small the input**.
+This is a reasonable expectation for modern systems whose designers were aware
+of data-oriented programming principles, because many (though not all) domains
+and operations can benefit from SIMD. By contrast, consider the case of dropping
+VQSort into an existing legacy system that does not yet use SIMD. In the case of
+10K input sizes, we still observe a 2.3x speedup vs. `std::sort`. However, the
+following code may have to deal with throttling for the remainder of the 20 us
+startup period. With VQSort we have 8.6 us runtime plus up to 11.4 us throttled
+code (potentially running at quarter speed) plus the remaining 3/4 of 11.4 for a
+total of 28.6. With `std::sort` we have 19.5 us runtime plus 20 us of normal
+subsequent code, or 39.5 us. Thus the overall speedup for the 20 us region plus
+VQSort **shrinks to 1.4x**, and it is possible to imagine an actual slowdown for
+sufficiently small inputs, when factoring in the throttling of subsequent code.
+This unfortunate 'beggar thy neighbor' effect cannot be solved at the level of
+individual building blocks such as a sort, and must instead be addressed at the
+system level. For example:
+
+-   vectorizing more and more parts of the code to amortize startup cost;
+-   relying on newer CPUs than Skylake (launched 2015!) which have little or no
+    AVX-512 startup overhead, such as Intel Icelake (2021) or AMD Zen4 (2022);
+-   ensuring sorts (or anything else using AVX-512) process at least 100 KiB
+    of data, such that the expected speedup outweighs any startup cost.
+
+Any of these solutions are sufficient to render AVX-512 startup overhead a
+non-issue.
+
+### Comparison with Intel's x86-simd-sort and vxsort
+
+Our May 2022 paper compared performance with `ips4o` and `std::sort`. We now add
+results for Intel's [x86-simd-sort](https://github.com/intel/x86-simd-sort),
+released as open source around October 2022, and
+[vxsort](https://github.com/damageboy/vxsort-cpp/tree/master). We find that
+VQSort is generally about 1.4 times as fast as either, and in a few cases equal
+or up to 2% slower.
+
+Note that vxsort was open-sourced around May 2020; we were unaware of it at the
+time of writing because it had been published in the form of a blog series. We
+imported both from Github on 2023-06-06 at about 10:15 UTC. Both are integrated
+into our bench_sort, running on the same Linux OS and Xeon 6154 CPU mentioned
+above. We use uniform random inputs, because vxsort and x86-simd-sort appear to
+have much less robust handling of skewed input distributions. They choose the
+pivot as the median of three keys, or of 64 bytes, respectively. By contrast,
+VQSort draws a 384 byte sample and analyzes their distribution, which improves
+load balance and prevents recursing into all-equal partitions. Lacking this, the
+other algorithms are more vulnerable to worst-cases. Choosing uniform random
+thus prevents disadvantaging the other algorithms.
+
+We sample performance across a range of input sizes and types:
+
+-   To isolate the performance of the sorting networks used by all three
+    algorithms, we start with powers of two up to 128. VQSort is generally the
+    fastest for 64-bit keys with the following exceptions: tie with vxsort at
+    N=2 (537 MB/s), slower than vxsort at N=16 (2114 vs. 2147), tie with
+    x86-simd-sort at N=32 (2643 MB/s). Note that VQSort is about 1.6 times as
+    fast as both others for N=128; possibly because its 2D structure enables
+    larger networks.
+
+-   The `kPow10` mode in bench_sort measures power of ten input sizes between
+    10 and 100K. Note that this covers non-power of two sizes, as well as the
+    crossover point between sorting networks and Quicksort recursion. The
+    speedups of VQSort relative to x86-simd-sort range from 1.33 to 1.81
+    (32-bit keys), and 1.25 to 1.68 (64-bit keys), with geomeans of 1.48 and
+    1.44. The speedups of VQSort relative to vxsort range from 1.08 to 2.10
+    (32-bit keys), and 1.00 to 1.47 (64-bit keys), with geomeans of 1.41 and
+    1.20. Note that vxsort matches VQSort at 10 64-bit elements; in all other
+    cases, VQSort is strictly faster.
+
+-   Finally, we study the effect of key type at a fixed input size of 10K
+    elements. x86-simd-sort requires AVX512-VBMI2 for int16, which our CPU does
+    not support. Also, both other algorithms do not support 128-bit keys, thus
+    we only consider 32/64-bit integer and float types. The results in MB/s are:
+
+    |Type|VQSort|x86-simd-sort|vxsort|
+    |---|---|---|---|
+    |f32|**1551**| 798| 823|
+    |f64|**1773**|1147| 745|
+    |i32|**1509**|1042| 968|
+    |i64|**1365**|1043|1145|
+
+    VQSort is the fastest for each type, in some cases even about twice as fast.
+    Interestingly, vxsort performs at its best on i64, whereas the others are at
+    their best for f64. A potential explanation is that this CPU can execute two
+    f64 min/max per cycle, but only one i64.
+
+In conclusion, VQSort is generally more efficient than vxsort and x86-simd-sort
+across a range of input sizes and types. Occasionally, it is up to 2% slower,
+but the geomean of its speedup (32-bit keys and power-of-ten sizes) vs. vxsort
+is **1.41**, and **1.48** vs. x86-simd-sort.
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/algo-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/algo-inl.h
new file mode 100644
index 0000000000..c4cd208584
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/algo-inl.h
@@ -0,0 +1,551 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Normal include guard for target-independent parts
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_ALGO_INL_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_ALGO_INL_H_
+
+#include <stdint.h>
+
+#include <algorithm>   // std::sort, std::min, std::max
+#include <functional>  // std::less, std::greater
+#include <vector>
+
+#include "hwy/base.h"
+#include "hwy/contrib/sort/vqsort.h"
+#include "hwy/print.h"
+
+// Third-party algorithms
+#define HAVE_AVX2SORT 0
+#define HAVE_IPS4O 0
+// When enabling, consider changing max_threads (required for Table 1a)
+#define HAVE_PARALLEL_IPS4O (HAVE_IPS4O && 1)
+#define HAVE_PDQSORT 0
+#define HAVE_SORT512 0
+#define HAVE_VXSORT 0
+#if HWY_ARCH_X86
+#define HAVE_INTEL 0
+#else
+#define HAVE_INTEL 0
+#endif
+
+#if HAVE_PARALLEL_IPS4O
+#include <thread>  // NOLINT
+#endif
+
+#if HAVE_AVX2SORT
+HWY_PUSH_ATTRIBUTES("avx2,avx")
+#include "avx2sort.h"  //NOLINT
+HWY_POP_ATTRIBUTES
+#endif
+#if HAVE_IPS4O || HAVE_PARALLEL_IPS4O
+#include "third_party/ips4o/include/ips4o.hpp"
+#include "third_party/ips4o/include/ips4o/thread_pool.hpp"
+#endif
+#if HAVE_PDQSORT
+#include "third_party/boost/allowed/sort/sort.hpp"
+#endif
+#if HAVE_SORT512
+#include "sort512.h"  //NOLINT
+#endif
+
+// vxsort is difficult to compile for multiple targets because it also uses
+// .cpp files, and we'd also have to #undef its include guards. Instead, compile
+// only for AVX2 or AVX3 depending on this macro.
+#define VXSORT_AVX3 1
+#if HAVE_VXSORT
+// inlined from vxsort_targets_enable_avx512 (must close before end of header)
+#ifdef __GNUC__
+#ifdef __clang__
+#if VXSORT_AVX3
+#pragma clang attribute push(__attribute__((target("avx512f,avx512dq"))), \
+                             apply_to = any(function))
+#else
+#pragma clang attribute push(__attribute__((target("avx2"))), \
+                             apply_to = any(function))
+#endif  // VXSORT_AVX3
+
+#else
+#pragma GCC push_options
+#if VXSORT_AVX3
+#pragma GCC target("avx512f,avx512dq")
+#else
+#pragma GCC target("avx2")
+#endif  // VXSORT_AVX3
+#endif
+#endif
+
+#if VXSORT_AVX3
+#include "vxsort/machine_traits.avx512.h"
+#else
+#include "vxsort/machine_traits.avx2.h"
+#endif  // VXSORT_AVX3
+#include "vxsort/vxsort.h"
+#ifdef __GNUC__
+#ifdef __clang__
+#pragma clang attribute pop
+#else
+#pragma GCC pop_options
+#endif
+#endif
+#endif  // HAVE_VXSORT
+
+namespace hwy {
+
+enum class Dist { kUniform8, kUniform16, kUniform32 };
+
+static inline std::vector<Dist> AllDist() {
+  return {/*Dist::kUniform8, Dist::kUniform16,*/ Dist::kUniform32};
+}
+
+static inline const char* DistName(Dist dist) {
+  switch (dist) {
+    case Dist::kUniform8:
+      return "uniform8";
+    case Dist::kUniform16:
+      return "uniform16";
+    case Dist::kUniform32:
+      return "uniform32";
+  }
+  return "unreachable";
+}
+
+template <typename T>
+class InputStats {
+ public:
+  void Notify(T value) {
+    min_ = std::min(min_, value);
+    max_ = std::max(max_, value);
+    // Converting to integer would truncate floats, multiplying to save digits
+    // risks overflow especially when casting, so instead take the sum of the
+    // bit representations as the checksum.
+    uint64_t bits = 0;
+    static_assert(sizeof(T) <= 8, "Expected a built-in type");
+    CopyBytes<sizeof(T)>(&value, &bits);  // not same size
+    sum_ += bits;
+    count_ += 1;
+  }
+
+  bool operator==(const InputStats& other) const {
+    char type_name[100];
+    detail::TypeName(hwy::detail::MakeTypeInfo<T>(), 1, type_name);
+
+    if (count_ != other.count_) {
+      HWY_ABORT("Sort %s: count %d vs %d\n", type_name,
+                static_cast<int>(count_), static_cast<int>(other.count_));
+    }
+
+    if (min_ != other.min_ || max_ != other.max_) {
+      HWY_ABORT("Sort %s: minmax %f/%f vs %f/%f\n", type_name,
+                static_cast<double>(min_), static_cast<double>(max_),
+                static_cast<double>(other.min_),
+                static_cast<double>(other.max_));
+    }
+
+    // Sum helps detect duplicated/lost values
+    if (sum_ != other.sum_) {
+      HWY_ABORT("Sort %s: Sum mismatch %g %g; min %g max %g\n", type_name,
+                static_cast<double>(sum_), static_cast<double>(other.sum_),
+                static_cast<double>(min_), static_cast<double>(max_));
+    }
+
+    return true;
+  }
+
+ private:
+  T min_ = hwy::HighestValue<T>();
+  T max_ = hwy::LowestValue<T>();
+  uint64_t sum_ = 0;
+  size_t count_ = 0;
+};
+
+enum class Algo {
+#if HAVE_INTEL
+  kIntel,
+#endif
+#if HAVE_AVX2SORT
+  kSEA,
+#endif
+#if HAVE_IPS4O
+  kIPS4O,
+#endif
+#if HAVE_PARALLEL_IPS4O
+  kParallelIPS4O,
+#endif
+#if HAVE_PDQSORT
+  kPDQ,
+#endif
+#if HAVE_SORT512
+  kSort512,
+#endif
+#if HAVE_VXSORT
+  kVXSort,
+#endif
+  kStd,
+  kVQSort,
+  kHeap,
+};
+
+static inline const char* AlgoName(Algo algo) {
+  switch (algo) {
+#if HAVE_INTEL
+    case Algo::kIntel:
+      return "intel";
+#endif
+#if HAVE_AVX2SORT
+    case Algo::kSEA:
+      return "sea";
+#endif
+#if HAVE_IPS4O
+    case Algo::kIPS4O:
+      return "ips4o";
+#endif
+#if HAVE_PARALLEL_IPS4O
+    case Algo::kParallelIPS4O:
+      return "par_ips4o";
+#endif
+#if HAVE_PDQSORT
+    case Algo::kPDQ:
+      return "pdq";
+#endif
+#if HAVE_SORT512
+    case Algo::kSort512:
+      return "sort512";
+#endif
+#if HAVE_VXSORT
+    case Algo::kVXSort:
+      return "vxsort";
+#endif
+    case Algo::kStd:
+      return "std";
+    case Algo::kVQSort:
+      return "vq";
+    case Algo::kHeap:
+      return "heap";
+  }
+  return "unreachable";
+}
+
+}  // namespace hwy
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_ALGO_INL_H_
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_ALGO_TOGGLE) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_ALGO_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_ALGO_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_ALGO_TOGGLE
+#endif
+
+#include "hwy/contrib/sort/traits-inl.h"
+#include "hwy/contrib/sort/traits128-inl.h"
+#include "hwy/contrib/sort/vqsort-inl.h"  // HeapSort
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+
+// Requires target pragma set by HWY_BEFORE_NAMESPACE
+#if HAVE_INTEL && HWY_TARGET <= HWY_AVX3
+// #include "avx512-16bit-qsort.hpp"  // requires vbmi2
+#include "avx512-32bit-qsort.hpp"
+#include "avx512-64bit-qsort.hpp"
+#endif
+
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+#if HAVE_INTEL || HAVE_VXSORT  // only supports ascending order
+template <typename T>
+using OtherOrder = detail::OrderAscending<T>;
+#else
+template <typename T>
+using OtherOrder = detail::OrderDescending<T>;
+#endif
+
+class Xorshift128Plus {
+  static HWY_INLINE uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+ public:
+  // Generates two vectors of 64-bit seeds via SplitMix64 and stores into
+  // `seeds`. Generating these afresh in each ChoosePivot is too expensive.
+  template <class DU64>
+  static void GenerateSeeds(DU64 du64, TFromD<DU64>* HWY_RESTRICT seeds) {
+    seeds[0] = SplitMix64(0x9E3779B97F4A7C15ull);
+    for (size_t i = 1; i < 2 * Lanes(du64); ++i) {
+      seeds[i] = SplitMix64(seeds[i - 1]);
+    }
+  }
+
+  // Need to pass in the state because vector cannot be class members.
+  template <class VU64>
+  static VU64 RandomBits(VU64& state0, VU64& state1) {
+    VU64 s1 = state0;
+    VU64 s0 = state1;
+    const VU64 bits = Add(s1, s0);
+    state0 = s0;
+    s1 = Xor(s1, ShiftLeft<23>(s1));
+    state1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0))));
+    return bits;
+  }
+};
+
+template <class D, class VU64, HWY_IF_NOT_FLOAT_D(D)>
+Vec<D> RandomValues(D d, VU64& s0, VU64& s1, const VU64 mask) {
+  const VU64 bits = Xorshift128Plus::RandomBits(s0, s1);
+  return BitCast(d, And(bits, mask));
+}
+
+// It is important to avoid denormals, which are flushed to zero by SIMD but not
+// scalar sorts, and NaN, which may be ordered differently in scalar vs. SIMD.
+template <class DF, class VU64, HWY_IF_FLOAT_D(DF)>
+Vec<DF> RandomValues(DF df, VU64& s0, VU64& s1, const VU64 mask) {
+  using TF = TFromD<DF>;
+  const RebindToUnsigned<decltype(df)> du;
+  using VU = Vec<decltype(du)>;
+
+  const VU64 bits64 = And(Xorshift128Plus::RandomBits(s0, s1), mask);
+
+#if HWY_TARGET == HWY_SCALAR  // Cannot repartition u64 to smaller types
+  using TU = MakeUnsigned<TF>;
+  const VU bits = Set(du, static_cast<TU>(GetLane(bits64) & LimitsMax<TU>()));
+#else
+  const VU bits = BitCast(du, bits64);
+#endif
+  // Avoid NaN/denormal by only generating values in [1, 2), i.e. random
+  // mantissas with the exponent taken from the representation of 1.0.
+  const VU k1 = BitCast(du, Set(df, TF{1.0}));
+  const VU mantissa_mask = Set(du, MantissaMask<TF>());
+  const VU representation = OrAnd(k1, bits, mantissa_mask);
+  return BitCast(df, representation);
+}
+
+template <class DU64>
+Vec<DU64> MaskForDist(DU64 du64, const Dist dist, size_t sizeof_t) {
+  switch (sizeof_t) {
+    case 2:
+      return Set(du64, (dist == Dist::kUniform8) ? 0x00FF00FF00FF00FFull
+                                                 : 0xFFFFFFFFFFFFFFFFull);
+    case 4:
+      return Set(du64, (dist == Dist::kUniform8)    ? 0x000000FF000000FFull
+                       : (dist == Dist::kUniform16) ? 0x0000FFFF0000FFFFull
+                                                    : 0xFFFFFFFFFFFFFFFFull);
+    case 8:
+      return Set(du64, (dist == Dist::kUniform8)    ? 0x00000000000000FFull
+                       : (dist == Dist::kUniform16) ? 0x000000000000FFFFull
+                                                    : 0x00000000FFFFFFFFull);
+    default:
+      HWY_ABORT("Logic error");
+      return Zero(du64);
+  }
+}
+
+template <typename T>
+InputStats<T> GenerateInput(const Dist dist, T* v, size_t num) {
+  SortTag<uint64_t> du64;
+  using VU64 = Vec<decltype(du64)>;
+  const size_t N64 = Lanes(du64);
+  auto seeds = hwy::AllocateAligned<uint64_t>(2 * N64);
+  Xorshift128Plus::GenerateSeeds(du64, seeds.get());
+  VU64 s0 = Load(du64, seeds.get());
+  VU64 s1 = Load(du64, seeds.get() + N64);
+
+#if HWY_TARGET == HWY_SCALAR
+  const Sisd<T> d;
+#else
+  const Repartition<T, decltype(du64)> d;
+#endif
+  using V = Vec<decltype(d)>;
+  const size_t N = Lanes(d);
+  const VU64 mask = MaskForDist(du64, dist, sizeof(T));
+  auto buf = hwy::AllocateAligned<T>(N);
+
+  size_t i = 0;
+  for (; i + N <= num; i += N) {
+    const V values = RandomValues(d, s0, s1, mask);
+    StoreU(values, d, v + i);
+  }
+  if (i < num) {
+    const V values = RandomValues(d, s0, s1, mask);
+    StoreU(values, d, buf.get());
+    CopyBytes(buf.get(), v + i, (num - i) * sizeof(T));
+  }
+
+  InputStats<T> input_stats;
+  for (size_t i = 0; i < num; ++i) {
+    input_stats.Notify(v[i]);
+  }
+  return input_stats;
+}
+
+struct SharedState {
+#if HAVE_PARALLEL_IPS4O
+  const unsigned max_threads = hwy::LimitsMax<unsigned>();  // 16 for Table 1a
+  ips4o::StdThreadPool pool{static_cast<int>(
+      HWY_MIN(max_threads, std::thread::hardware_concurrency() / 2))};
+#endif
+};
+
+// Bridge from keys (passed to Run) to lanes as expected by HeapSort. For
+// non-128-bit keys they are the same:
+template <class Order, typename KeyType, HWY_IF_NOT_T_SIZE(KeyType, 16)>
+void CallHeapSort(KeyType* HWY_RESTRICT keys, const size_t num_keys) {
+  using detail::SharedTraits;
+  using detail::TraitsLane;
+  if (Order().IsAscending()) {
+    const SharedTraits<TraitsLane<detail::OrderAscending<KeyType>>> st;
+    return detail::HeapSort(st, keys, num_keys);
+  } else {
+    const SharedTraits<TraitsLane<detail::OrderDescending<KeyType>>> st;
+    return detail::HeapSort(st, keys, num_keys);
+  }
+}
+
+#if VQSORT_ENABLED
+template <class Order>
+void CallHeapSort(hwy::uint128_t* HWY_RESTRICT keys, const size_t num_keys) {
+  using detail::SharedTraits;
+  using detail::Traits128;
+  uint64_t* lanes = reinterpret_cast<uint64_t*>(keys);
+  const size_t num_lanes = num_keys * 2;
+  if (Order().IsAscending()) {
+    const SharedTraits<Traits128<detail::OrderAscending128>> st;
+    return detail::HeapSort(st, lanes, num_lanes);
+  } else {
+    const SharedTraits<Traits128<detail::OrderDescending128>> st;
+    return detail::HeapSort(st, lanes, num_lanes);
+  }
+}
+
+template <class Order>
+void CallHeapSort(K64V64* HWY_RESTRICT keys, const size_t num_keys) {
+  using detail::SharedTraits;
+  using detail::Traits128;
+  uint64_t* lanes = reinterpret_cast<uint64_t*>(keys);
+  const size_t num_lanes = num_keys * 2;
+  if (Order().IsAscending()) {
+    const SharedTraits<Traits128<detail::OrderAscendingKV128>> st;
+    return detail::HeapSort(st, lanes, num_lanes);
+  } else {
+    const SharedTraits<Traits128<detail::OrderDescendingKV128>> st;
+    return detail::HeapSort(st, lanes, num_lanes);
+  }
+}
+#endif  // VQSORT_ENABLED
+
+template <class Order, typename KeyType>
+void Run(Algo algo, KeyType* HWY_RESTRICT inout, size_t num,
+         SharedState& shared, size_t /*thread*/) {
+  const std::less<KeyType> less;
+  const std::greater<KeyType> greater;
+
+#if !HAVE_PARALLEL_IPS4O
+  (void)shared;
+#endif
+
+  switch (algo) {
+#if HAVE_INTEL && HWY_TARGET <= HWY_AVX3
+    case Algo::kIntel:
+      return avx512_qsort<KeyType>(inout, static_cast<int64_t>(num));
+#endif
+
+#if HAVE_AVX2SORT
+    case Algo::kSEA:
+      return avx2::quicksort(inout, static_cast<int>(num));
+#endif
+
+#if HAVE_IPS4O
+    case Algo::kIPS4O:
+      if (Order().IsAscending()) {
+        return ips4o::sort(inout, inout + num, less);
+      } else {
+        return ips4o::sort(inout, inout + num, greater);
+      }
+#endif
+
+#if HAVE_PARALLEL_IPS4O
+    case Algo::kParallelIPS4O:
+      if (Order().IsAscending()) {
+        return ips4o::parallel::sort(inout, inout + num, less, shared.pool);
+      } else {
+        return ips4o::parallel::sort(inout, inout + num, greater, shared.pool);
+      }
+#endif
+
+#if HAVE_SORT512
+    case Algo::kSort512:
+      HWY_ABORT("not supported");
+      //    return Sort512::Sort(inout, num);
+#endif
+
+#if HAVE_PDQSORT
+    case Algo::kPDQ:
+      if (Order().IsAscending()) {
+        return boost::sort::pdqsort_branchless(inout, inout + num, less);
+      } else {
+        return boost::sort::pdqsort_branchless(inout, inout + num, greater);
+      }
+#endif
+
+#if HAVE_VXSORT
+    case Algo::kVXSort: {
+#if (VXSORT_AVX3 && HWY_TARGET != HWY_AVX3) || \
+    (!VXSORT_AVX3 && HWY_TARGET != HWY_AVX2)
+      fprintf(stderr, "Do not call for target %s\n",
+              hwy::TargetName(HWY_TARGET));
+      return;
+#else
+#if VXSORT_AVX3
+      vxsort::vxsort<KeyType, vxsort::AVX512> vx;
+#else
+      vxsort::vxsort<KeyType, vxsort::AVX2> vx;
+#endif
+      if (Order().IsAscending()) {
+        return vx.sort(inout, inout + num - 1);
+      } else {
+        fprintf(stderr, "Skipping VX - does not support descending order\n");
+        return;
+      }
+#endif  // enabled for this target
+    }
+#endif  // HAVE_VXSORT
+
+    case Algo::kStd:
+      if (Order().IsAscending()) {
+        return std::sort(inout, inout + num, less);
+      } else {
+        return std::sort(inout, inout + num, greater);
+      }
+
+    case Algo::kVQSort:
+      return VQSort(inout, num, Order());
+
+    case Algo::kHeap:
+      return CallHeapSort<Order>(inout, num);
+
+    default:
+      HWY_ABORT("Not implemented");
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_ALGO_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_parallel.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_parallel.cc
new file mode 100644
index 0000000000..c879c3fb10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_parallel.cc
@@ -0,0 +1,239 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Concurrent, independent sorts for generating more memory traffic and testing
+// scalability.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <condition_variable>  //NOLINT
+#include <functional>
+#include <memory>
+#include <mutex>   //NOLINT
+#include <thread>  //NOLINT
+#include <utility>
+#include <vector>
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/bench_parallel.cc"  //NOLINT
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/algo-inl.h"
+#include "hwy/contrib/sort/result-inl.h"
+#include "hwy/aligned_allocator.h"
+// Last
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace {
+
+class ThreadPool {
+ public:
+  // Starts the given number of worker threads and blocks until they are ready.
+  explicit ThreadPool(
+      const size_t num_threads = std::thread::hardware_concurrency())
+      : num_threads_(num_threads) {
+    HWY_ASSERT(num_threads_ > 0);
+    threads_.reserve(num_threads_);
+    for (size_t i = 0; i < num_threads_; ++i) {
+      threads_.emplace_back(ThreadFunc, this, i);
+    }
+
+    WorkersReadyBarrier();
+  }
+
+  ThreadPool(const ThreadPool&) = delete;
+  ThreadPool& operator&(const ThreadPool&) = delete;
+
+  // Waits for all threads to exit.
+  ~ThreadPool() {
+    StartWorkers(kWorkerExit);
+
+    for (std::thread& thread : threads_) {
+      thread.join();
+    }
+  }
+
+  size_t NumThreads() const { return threads_.size(); }
+
+  template <class Func>
+  void RunOnThreads(size_t max_threads, const Func& func) {
+    task_ = &CallClosure<Func>;
+    data_ = &func;
+    StartWorkers(max_threads);
+    WorkersReadyBarrier();
+  }
+
+ private:
+  // After construction and between calls to Run, workers are "ready", i.e.
+  // waiting on worker_start_cv_. They are "started" by sending a "command"
+  // and notifying all worker_start_cv_ waiters. (That is why all workers
+  // must be ready/waiting - otherwise, the notification will not reach all of
+  // them and the main thread waits in vain for them to report readiness.)
+  using WorkerCommand = uint64_t;
+
+  static constexpr WorkerCommand kWorkerWait = ~1ULL;
+  static constexpr WorkerCommand kWorkerExit = ~2ULL;
+
+  // Calls a closure (lambda with captures).
+  template <class Closure>
+  static void CallClosure(const void* f, size_t thread) {
+    (*reinterpret_cast<const Closure*>(f))(thread);
+  }
+
+  void WorkersReadyBarrier() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    // Typically only a single iteration.
+    while (workers_ready_ != threads_.size()) {
+      workers_ready_cv_.wait(lock);
+    }
+    workers_ready_ = 0;
+
+    // Safely handle spurious worker wakeups.
+    worker_start_command_ = kWorkerWait;
+  }
+
+  // Precondition: all workers are ready.
+  void StartWorkers(const WorkerCommand worker_command) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    worker_start_command_ = worker_command;
+    // Workers will need this lock, so release it before they wake up.
+    lock.unlock();
+    worker_start_cv_.notify_all();
+  }
+
+  static void ThreadFunc(ThreadPool* self, size_t thread) {
+    // Until kWorkerExit command received:
+    for (;;) {
+      std::unique_lock<std::mutex> lock(self->mutex_);
+      // Notify main thread that this thread is ready.
+      if (++self->workers_ready_ == self->num_threads_) {
+        self->workers_ready_cv_.notify_one();
+      }
+    RESUME_WAIT:
+      // Wait for a command.
+      self->worker_start_cv_.wait(lock);
+      const WorkerCommand command = self->worker_start_command_;
+      switch (command) {
+        case kWorkerWait:    // spurious wakeup:
+          goto RESUME_WAIT;  // lock still held, avoid incrementing ready.
+        case kWorkerExit:
+          return;  // exits thread
+        default:
+          break;
+      }
+
+      lock.unlock();
+      // Command is the maximum number of threads that should run the task.
+      HWY_ASSERT(command < self->NumThreads());
+      if (thread < command) {
+        self->task_(self->data_, thread);
+      }
+    }
+  }
+
+  const size_t num_threads_;
+
+  // Unmodified after ctor, but cannot be const because we call thread::join().
+  std::vector<std::thread> threads_;
+
+  std::mutex mutex_;  // guards both cv and their variables.
+  std::condition_variable workers_ready_cv_;
+  size_t workers_ready_ = 0;
+  std::condition_variable worker_start_cv_;
+  WorkerCommand worker_start_command_;
+
+  // Written by main thread, read by workers (after mutex lock/unlock).
+  std::function<void(const void*, size_t)> task_;  // points to CallClosure
+  const void* data_;                               // points to caller's Func
+};
+
+template <class Traits>
+void RunWithoutVerify(Traits st, const Dist dist, const size_t num_keys,
+                      const Algo algo, SharedState& shared, size_t thread) {
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+  using Order = typename Traits::Order;
+  const size_t num_lanes = num_keys * st.LanesPerKey();
+  auto aligned = hwy::AllocateAligned<LaneType>(num_lanes);
+
+  (void)GenerateInput(dist, aligned.get(), num_lanes);
+
+  const Timestamp t0;
+  Run<Order>(algo, reinterpret_cast<KeyType*>(aligned.get()), num_keys, shared,
+             thread);
+  HWY_ASSERT(aligned[0] < aligned[num_lanes - 1]);
+}
+
+void BenchParallel() {
+  // Not interested in benchmark results for other targets on x86
+  if (HWY_ARCH_X86 &&
+      (HWY_TARGET != HWY_AVX2 && HWY_TARGET != HWY_AVX3 &&
+       HWY_TARGET != HWY_AVX3_ZEN4 && HWY_TARGET != HWY_AVX3_SPR)) {
+    return;
+  }
+
+  ThreadPool pool;
+  const size_t NT = pool.NumThreads();
+
+  detail::SharedTraits<detail::TraitsLane<detail::OrderAscending<int64_t>>> st;
+  using KeyType = typename decltype(st)::KeyType;
+  const size_t num_keys = size_t{100} * 1000 * 1000;
+
+#if HAVE_IPS4O
+  const Algo algo = Algo::kIPS4O;
+#else
+  const Algo algo = Algo::kVQSort;
+#endif
+  const Dist dist = Dist::kUniform32;
+
+  SharedState shared;
+
+  std::vector<Result> results;
+  for (size_t nt = 1; nt < NT; nt += HWY_MAX(1, NT / 16)) {
+    Timestamp t0;
+    // Default capture because MSVC wants algo/dist but clang does not.
+    pool.RunOnThreads(nt, [=, &shared](size_t thread) {
+      RunWithoutVerify(st, dist, num_keys, algo, shared, thread);
+    });
+    const double sec = SecondsSince(t0);
+    results.emplace_back(algo, dist, num_keys, nt, sec, sizeof(KeyType),
+                         st.KeyString());
+    results.back().Print();
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+namespace {
+HWY_BEFORE_TEST(BenchParallel);
+HWY_EXPORT_AND_TEST_P(BenchParallel, BenchParallel);
+}  // namespace
+}  // namespace hwy
+
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_sort.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_sort.cc
new file mode 100644
index 0000000000..e6e5fb6765
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/bench_sort.cc
@@ -0,0 +1,477 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <vector>
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/bench_sort.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/algo-inl.h"
+#include "hwy/contrib/sort/vqsort.h"
+#include "hwy/contrib/sort/result-inl.h"
+#include "hwy/contrib/sort/sorting_networks-inl.h"  // SharedTraits
+#include "hwy/contrib/sort/traits-inl.h"
+#include "hwy/contrib/sort/traits128-inl.h"
+#include "hwy/tests/test_util-inl.h"
+#include "hwy/timer-inl.h"
+#include "hwy/timer.h"
+#include "hwy/per_target.h"
+// clang-format on
+
+#if HWY_OS_LINUX
+#include <unistd.h>  // usleep
+#endif
+
+// Mode for larger sorts because M1 is able to access more than the per-core
+// share of L2, so 1M elements might still be in cache.
+#define SORT_100M 0
+
+#ifndef SORT_ONLY_COLD
+#define SORT_ONLY_COLD 0
+#endif
+#ifndef SORT_BENCH_BASE_AND_PARTITION
+#define SORT_BENCH_BASE_AND_PARTITION (!SORT_ONLY_COLD && 0)
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+// Defined within HWY_ONCE, used by BenchAllSort.
+extern int64_t first_sort_target;
+extern int64_t first_cold_target;  // for BenchAllColdSort
+
+namespace HWY_NAMESPACE {
+namespace {
+using detail::OrderAscending;
+using detail::OrderDescending;
+using detail::SharedTraits;
+using detail::TraitsLane;
+
+#if VQSORT_ENABLED
+using detail::OrderAscending128;
+using detail::OrderAscendingKV128;
+using detail::Traits128;
+#endif  // VQSORT_ENABLED
+
+HWY_NOINLINE void BenchAllColdSort() {
+  // Only run the best(first) enabled target
+  if (first_cold_target == 0) first_cold_target = HWY_TARGET;
+  if (HWY_TARGET != first_cold_target) {
+    return;
+  }
+
+  char cpu100[100];
+  if (!platform::HaveTimerStop(cpu100)) {
+    fprintf(stderr, "CPU '%s' does not support RDTSCP, skipping benchmark.\n",
+            cpu100);
+    return;
+  }
+
+  // Initialize random seeds
+#if VQSORT_ENABLED
+  HWY_ASSERT(GetGeneratorState() != nullptr);  // vqsort
+#endif
+  RandomState rng(static_cast<uint64_t>(Unpredictable1() * 129));  // this test
+
+  using T = uint64_t;
+  constexpr size_t kSize = 10 * 1000;
+  HWY_ALIGN T items[kSize];
+
+  // Initialize array
+#if 0  // optional: deliberate AVX-512 to verify VQSort performance improves
+  const ScalableTag<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const size_t N = Lanes(d);
+  size_t i = 0;
+  for (; i + N <= kSize; i += N) {
+    // Super-slow scatter so that we spend enough time to warm up SKX.
+    const Vec<decltype(d)> val = Set(d, static_cast<T>(Unpredictable1()));
+    const Vec<decltype(di)> idx =
+        Iota(di, static_cast<T>(Unpredictable1() - 1));
+    ScatterIndex(val, d, items + i, idx);
+  }
+  for (; i < kSize; ++i) {
+    items[i] = static_cast<T>(Unpredictable1());
+  }
+#else  // scalar-only, verified with clang-16
+  for (size_t i = 0; i < kSize; ++i) {
+    items[i] = static_cast<T>(Unpredictable1());
+  }
+#endif
+  items[Random32(&rng) % kSize] = static_cast<T>(Unpredictable1() + 1);
+
+  const timer::Ticks t0 = timer::Start();
+#if VQSORT_ENABLED && 1  // change to && 0 to switch to std::sort.
+  VQSort(items, kSize, SortAscending());
+#else
+  SharedState shared;
+  Run<SortAscending>(Algo::kStd, items, kSize, shared, /*thread=*/0);
+#endif
+  const timer::Ticks t1 = timer::Stop();
+
+  const double ticks = static_cast<double>(t1 - t0);
+  const double elapsed = ticks / platform::InvariantTicksPerSecond();
+  const double GBps = kSize * sizeof(T) * 1E-9 / elapsed;
+
+  fprintf(stderr, "N=%zu GB/s=%.2f ns=%.1f random output: %g\n", kSize, GBps,
+          elapsed * 1E9, static_cast<double>(items[Random32(&rng) % kSize]));
+
+#if SORT_ONLY_COLD
+#if HWY_OS_LINUX
+  // Long enough for the CPU to switch off AVX-512 mode before the next run.
+  usleep(100 * 1000);  // NOLINT
+#endif
+#endif
+}
+
+#if (VQSORT_ENABLED && SORT_BENCH_BASE_AND_PARTITION) || HWY_IDE
+
+template <class Traits>
+HWY_NOINLINE void BenchPartition() {
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+  const SortTag<LaneType> d;
+  detail::SharedTraits<Traits> st;
+  const Dist dist = Dist::kUniform8;
+  double sum = 0.0;
+
+  constexpr size_t kLPK = st.LanesPerKey();
+  HWY_ALIGN LaneType
+      buf[SortConstants::BufBytes<LaneType, kLPK>(HWY_MAX_BYTES) /
+          sizeof(LaneType)];
+  uint64_t* HWY_RESTRICT state = GetGeneratorState();
+
+  const size_t max_log2 = AdjustedLog2Reps(20);
+  for (size_t log2 = max_log2; log2 < max_log2 + 1; ++log2) {
+    const size_t num_lanes = 1ull << log2;
+    const size_t num_keys = num_lanes / kLPK;
+    auto aligned = hwy::AllocateAligned<LaneType>(num_lanes);
+
+    std::vector<double> seconds;
+    const size_t num_reps = (1ull << (14 - log2 / 2)) * 30;
+    for (size_t rep = 0; rep < num_reps; ++rep) {
+      (void)GenerateInput(dist, aligned.get(), num_lanes);
+
+      // The pivot value can influence performance. Do exactly what vqsort will
+      // do so that the performance (influenced by prefetching and branch
+      // prediction) is likely to predict the actual performance inside vqsort.
+      detail::DrawSamples(d, st, aligned.get(), num_lanes, buf, state);
+      detail::SortSamples(d, st, buf);
+      auto pivot = detail::ChoosePivotByRank(d, st, buf);
+
+      const Timestamp t0;
+      detail::Partition(d, st, aligned.get(), num_lanes - 1, pivot, buf);
+      seconds.push_back(SecondsSince(t0));
+      // 'Use' the result to prevent optimizing out the partition.
+      sum += static_cast<double>(aligned.get()[num_lanes / 2]);
+    }
+
+    Result(Algo::kVQSort, dist, num_keys, 1, SummarizeMeasurements(seconds),
+           sizeof(KeyType), st.KeyString())
+        .Print();
+  }
+  HWY_ASSERT(sum != 999999);  // Prevent optimizing out
+}
+
+HWY_NOINLINE void BenchAllPartition() {
+  // Not interested in benchmark results for these targets
+  if (HWY_TARGET == HWY_SSSE3) {
+    return;
+  }
+
+  BenchPartition<TraitsLane<OrderDescending<float>>>();
+  BenchPartition<TraitsLane<OrderDescending<int32_t>>>();
+  BenchPartition<TraitsLane<OrderDescending<int64_t>>>();
+  BenchPartition<Traits128<OrderAscending128>>();
+  // BenchPartition<Traits128<OrderDescending128>>();
+  BenchPartition<Traits128<OrderAscendingKV128>>();
+}
+
+template <class Traits>
+HWY_NOINLINE void BenchBase(std::vector<Result>& results) {
+  // Not interested in benchmark results for these targets
+  if (HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4) {
+    return;
+  }
+
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+  const SortTag<LaneType> d;
+  detail::SharedTraits<Traits> st;
+  const Dist dist = Dist::kUniform32;
+
+  const size_t N = Lanes(d);
+  constexpr size_t kLPK = st.LanesPerKey();
+  const size_t num_lanes = SortConstants::BaseCaseNumLanes<kLPK>(N);
+  const size_t num_keys = num_lanes / kLPK;
+  auto keys = hwy::AllocateAligned<LaneType>(num_lanes);
+  auto buf = hwy::AllocateAligned<LaneType>(num_lanes + N);
+
+  std::vector<double> seconds;
+  double sum = 0;                             // prevents elision
+  constexpr size_t kMul = AdjustedReps(600);  // ensures long enough to measure
+
+  for (size_t rep = 0; rep < 30; ++rep) {
+    InputStats<LaneType> input_stats =
+        GenerateInput(dist, keys.get(), num_lanes);
+
+    const Timestamp t0;
+    for (size_t i = 0; i < kMul; ++i) {
+      detail::BaseCase(d, st, keys.get(), num_lanes, buf.get());
+      sum += static_cast<double>(keys[0]);
+    }
+    seconds.push_back(SecondsSince(t0));
+    // printf("%f\n", seconds.back());
+
+    HWY_ASSERT(VerifySort(st, input_stats, keys.get(), num_lanes, "BenchBase"));
+  }
+  HWY_ASSERT(sum < 1E99);
+  results.emplace_back(Algo::kVQSort, dist, num_keys * kMul, 1,
+                       SummarizeMeasurements(seconds), sizeof(KeyType),
+                       st.KeyString());
+}
+
+HWY_NOINLINE void BenchAllBase() {
+  // Not interested in benchmark results for these targets
+  if (HWY_TARGET == HWY_SSSE3) {
+    return;
+  }
+
+  std::vector<Result> results;
+  BenchBase<TraitsLane<OrderAscending<float>>>(results);
+  BenchBase<TraitsLane<OrderDescending<int64_t>>>(results);
+  BenchBase<Traits128<OrderAscending128>>(results);
+  for (const Result& r : results) {
+    r.Print();
+  }
+}
+
+#endif  // VQSORT_ENABLED && SORT_BENCH_BASE_AND_PARTITION
+
+std::vector<Algo> AlgoForBench() {
+  return {
+#if HAVE_AVX2SORT
+    Algo::kSEA,
+#endif
+#if HAVE_PARALLEL_IPS4O
+        Algo::kParallelIPS4O,
+#elif HAVE_IPS4O
+    Algo::kIPS4O,
+#endif
+#if HAVE_PDQSORT
+        Algo::kPDQ,
+#endif
+#if HAVE_SORT512
+        Algo::kSort512,
+#endif
+// Only include if we're compiling for the target it supports.
+#if HAVE_VXSORT && ((VXSORT_AVX3 && HWY_TARGET == HWY_AVX3) || \
+                    (!VXSORT_AVX3 && HWY_TARGET == HWY_AVX2))
+        Algo::kVXSort,
+#endif
+// Only include if we're compiling for the target it supports.
+#if HAVE_INTEL && HWY_TARGET <= HWY_AVX3
+        Algo::kIntel,
+#endif
+
+#if !HAVE_PARALLEL_IPS4O
+#if !SORT_100M
+    // 10-20x slower, but that's OK for the default size when we are not
+    // testing the parallel nor 100M modes.
+    // Algo::kStd,
+#endif
+
+#if VQSORT_ENABLED
+        Algo::kVQSort,
+#endif
+#endif  // !HAVE_PARALLEL_IPS4O
+  };
+}
+
+template <class Traits>
+HWY_NOINLINE void BenchSort(size_t num_keys) {
+  if (first_sort_target == 0) first_sort_target = HWY_TARGET;
+
+  SharedState shared;
+  detail::SharedTraits<Traits> st;
+  using Order = typename Traits::Order;
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+  const size_t num_lanes = num_keys * st.LanesPerKey();
+  auto aligned = hwy::AllocateAligned<LaneType>(num_lanes);
+
+  const size_t reps = num_keys > 1000 * 1000 ? 10 : 30;
+
+  for (Algo algo : AlgoForBench()) {
+    // Other algorithms don't depend on the vector instructions, so only run
+    // them for the first target.
+#if !HAVE_VXSORT
+    if (algo != Algo::kVQSort && HWY_TARGET != first_sort_target) {
+      continue;
+    }
+#endif
+
+    for (Dist dist : AllDist()) {
+      std::vector<double> seconds;
+      for (size_t rep = 0; rep < reps; ++rep) {
+        InputStats<LaneType> input_stats =
+            GenerateInput(dist, aligned.get(), num_lanes);
+
+        const Timestamp t0;
+        Run<Order>(algo, reinterpret_cast<KeyType*>(aligned.get()), num_keys,
+                   shared, /*thread=*/0);
+        seconds.push_back(SecondsSince(t0));
+        // printf("%f\n", seconds.back());
+
+        HWY_ASSERT(
+            VerifySort(st, input_stats, aligned.get(), num_lanes, "BenchSort"));
+      }
+      Result(algo, dist, num_keys, 1, SummarizeMeasurements(seconds),
+             sizeof(KeyType), st.KeyString())
+          .Print();
+    }  // dist
+  }    // algo
+}
+
+enum class BenchmarkModes {
+  kDefault,
+  k1M,
+  k10K,
+  kAllSmall,
+  kSmallPow2,
+  kSmallPow2Between,  // includes padding
+  kPow4,
+  kPow10
+};
+
+std::vector<size_t> SizesToBenchmark(BenchmarkModes mode) {
+  std::vector<size_t> sizes;
+  switch (mode) {
+    default:
+    case BenchmarkModes::kDefault:
+#if HAVE_PARALLEL_IPS4O || SORT_100M
+      sizes.push_back(100 * 1000 * size_t{1000});
+#else
+      sizes.push_back(100);
+      sizes.push_back(100 * 1000);
+#endif
+      break;
+    case BenchmarkModes::k1M:
+      sizes.push_back(1000 * 1000);
+      break;
+    case BenchmarkModes::k10K:
+      sizes.push_back(10 * 1000);
+      break;
+
+    case BenchmarkModes::kAllSmall:
+      sizes.reserve(128);
+      for (size_t i = 1; i <= 128; ++i) {
+        sizes.push_back(i);
+      }
+      break;
+    case BenchmarkModes::kSmallPow2:
+      for (size_t size = 2; size <= 128; size *= 2) {
+        sizes.push_back(size);
+      }
+      break;
+    case BenchmarkModes::kSmallPow2Between:
+      for (size_t size = 2; size <= 128; size *= 2) {
+        sizes.push_back(3 * size / 2);
+      }
+      break;
+
+    case BenchmarkModes::kPow4:
+      for (size_t size = 4; size <= 256 * 1024; size *= 4) {
+        sizes.push_back(size);
+      }
+      break;
+    case BenchmarkModes::kPow10:
+      for (size_t size = 10; size <= 100 * 1000; size *= 10) {
+        sizes.push_back(size);
+      }
+      break;
+  }
+  return sizes;
+}
+
+HWY_NOINLINE void BenchAllSort() {
+  // Not interested in benchmark results for these targets. Note that SSE4 is
+  // numerically less than SSE2, hence it is the lower bound.
+  if (HWY_SSE4 <= HWY_TARGET && HWY_TARGET <= HWY_SSE2) {
+    return;
+  }
+#if HAVE_INTEL
+  if (HWY_TARGET > HWY_AVX3) return;
+#endif
+
+  for (size_t num_keys : SizesToBenchmark(BenchmarkModes::kSmallPow2)) {
+#if !HAVE_INTEL
+#if HWY_HAVE_FLOAT16
+    if (hwy::HaveFloat16()) {
+      BenchSort<TraitsLane<OtherOrder<float16_t>>>(num_keys);
+    }
+#endif
+    BenchSort<TraitsLane<OrderAscending<float>>>(num_keys);
+#if HWY_HAVE_FLOAT64
+    if (hwy::HaveFloat64()) {
+      // BenchSort<TraitsLane<OtherOrder<double>>>(num_keys);
+    }
+#endif
+#endif  // !HAVE_INTEL
+    // BenchSort<TraitsLane<OrderAscending<int16_t>>>(num_keys);
+    BenchSort<TraitsLane<OtherOrder<int32_t>>>(num_keys);
+    BenchSort<TraitsLane<OrderAscending<int64_t>>>(num_keys);
+    // BenchSort<TraitsLane<OtherOrder<uint16_t>>>(num_keys);
+    // BenchSort<TraitsLane<OtherOrder<uint32_t>>>(num_keys);
+    // BenchSort<TraitsLane<OrderAscending<uint64_t>>>(num_keys);
+
+#if !HAVE_VXSORT && !HAVE_INTEL && VQSORT_ENABLED
+    BenchSort<Traits128<OrderAscending128>>(num_keys);
+    BenchSort<Traits128<OrderAscendingKV128>>(num_keys);
+#endif
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+int64_t first_sort_target = 0;  // none run yet
+int64_t first_cold_target = 0;  // none run yet
+namespace {
+HWY_BEFORE_TEST(BenchSort);
+HWY_EXPORT_AND_TEST_P(BenchSort, BenchAllColdSort);
+#if SORT_BENCH_BASE_AND_PARTITION
+HWY_EXPORT_AND_TEST_P(BenchSort, BenchAllPartition);
+HWY_EXPORT_AND_TEST_P(BenchSort, BenchAllBase);
+#endif
+
+#if !SORT_ONLY_COLD  // skip (warms up vector unit for next run)
+HWY_EXPORT_AND_TEST_P(BenchSort, BenchAllSort);
+#endif
+}  // namespace
+}  // namespace hwy
+
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/order.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/order.h
new file mode 100644
index 0000000000..6afb33fd5d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/order.h
@@ -0,0 +1,34 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tag arguments that determine the sort order. Used by both vqsort.h and the
+// VQSortStatic in vqsort-inl.h. Moved to a separate header so that the latter
+// can be used without pulling in the dllimport statements in vqsort.h.
+
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_
+
+namespace hwy {
+
+struct SortAscending {
+  constexpr bool IsAscending() const { return true; }
+};
+struct SortDescending {
+  constexpr bool IsAscending() const { return false; }
+};
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/print_network.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/print_network.cc
new file mode 100644
index 0000000000..0760696e79
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/print_network.cc
@@ -0,0 +1,90 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <vector>
+
+#include "hwy/base.h"
+
+// Based on A.7 in "Entwurf und Implementierung vektorisierter
+// Sortieralgorithmen" and code by Mark Blacher.
+void PrintMergeNetwork(int rows, int cols) {
+  printf("\n%d x %d:\n", rows, cols);
+  // Powers of two
+  HWY_ASSERT(rows != 0 && (rows & (rows - 1)) == 0);
+  HWY_ASSERT(cols != 0 && (cols & (cols - 1)) == 0);
+  HWY_ASSERT(rows >= 4);
+  HWY_ASSERT(cols >= 2);   // otherwise no cross-column merging required
+  HWY_ASSERT(cols <= 16);  // SortTraits lacks Reverse32
+
+  // Log(rows) times: sort half of the vectors with reversed groups of the
+  // other half. Group size halves until we are sorting adjacent vectors.
+  int group_size = rows;
+  int num_groups = 1;
+  for (; group_size >= 2; group_size /= 2, num_groups *= 2) {
+    // All vectors except those being reversed. Allows us to group the
+    // ReverseKeys and Sort2 operations, which is easier to read and may help
+    // in-order machines with high-latency ReverseKeys.
+    std::vector<int> all_vi;
+    for (int group = 0; group < num_groups; ++group) {
+      for (int i = 0; i < group_size / 2; ++i) {
+        all_vi.push_back(group * group_size + i);
+      }
+    }
+    for (int vi : all_vi) {
+      const int vr = vi ^ (group_size - 1);
+      printf("v%x = st.ReverseKeys%d(d, v%x);\n", vr, cols, vr);
+    }
+    for (int vi : all_vi) {
+      const int vr = vi ^ (group_size - 1);
+      printf("st.Sort2(d, v%x, v%x);\n", vi, vr);
+    }
+    printf("\n");
+  }
+
+  // Now merge across columns in all vectors.
+  if (cols > 2) {
+    for (int i = 0; i < rows; ++i) {
+      printf("v%x = st.SortPairsReverse%d(d, v%x);\n", i, cols, i);
+    }
+    printf("\n");
+  }
+  if (cols >= 16) {
+    for (int i = 0; i < rows; ++i) {
+      printf("v%x = st.SortPairsDistance4(d, v%x);\n", i, i);
+    }
+    printf("\n");
+  }
+  if (cols >= 8) {
+    for (int i = 0; i < rows; ++i) {
+      printf("v%x = st.SortPairsDistance2(d, v%x);\n", i, i);
+    }
+    printf("\n");
+  }
+  for (int i = 0; i < rows; ++i) {
+    printf("v%x = st.SortPairsDistance1(d, v%x);\n", i, i);
+  }
+  printf("\n");
+}
+
+int main(int argc, char** argv) {
+  PrintMergeNetwork(8, 2);
+  PrintMergeNetwork(8, 4);
+  PrintMergeNetwork(16, 4);
+  PrintMergeNetwork(16, 8);
+  PrintMergeNetwork(16, 16);
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/result-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/result-inl.h
new file mode 100644
index 0000000000..34365a1669
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/result-inl.h
@@ -0,0 +1,140 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/algo-inl.h"
+
+// Normal include guard for non-SIMD parts
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
+
+#include <time.h>
+
+#include <algorithm>  // std::sort
+#include <string>
+
+#include "hwy/base.h"
+#include "hwy/nanobenchmark.h"
+
+namespace hwy {
+
+struct Timestamp {
+  Timestamp() { t = platform::Now(); }
+  double t;
+};
+
+static inline double SecondsSince(const Timestamp& t0) {
+  const Timestamp t1;
+  return t1.t - t0.t;
+}
+
+// Returns trimmed mean (we don't want to run an out-of-L3-cache sort often
+// enough for the mode to be reliable).
+static inline double SummarizeMeasurements(std::vector<double>& seconds) {
+  std::sort(seconds.begin(), seconds.end());
+  double sum = 0;
+  int count = 0;
+  const size_t num = seconds.size();
+  for (size_t i = num / 4; i < num / 2; ++i) {
+    sum += seconds[i];
+    count += 1;
+  }
+  return sum / count;
+}
+
+}  // namespace hwy
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct Result {
+  Result() {}
+  Result(const Algo algo, Dist dist, size_t num_keys, size_t num_threads,
+         double sec, size_t sizeof_key, const char* key_name)
+      : target(HWY_TARGET),
+        algo(algo),
+        dist(dist),
+        num_keys(num_keys),
+        num_threads(num_threads),
+        sec(sec),
+        sizeof_key(sizeof_key),
+        key_name(key_name) {}
+
+  void Print() const {
+    const double bytes = static_cast<double>(num_keys) *
+                         static_cast<double>(num_threads) *
+                         static_cast<double>(sizeof_key);
+    printf("%10s: %12s: %7s: %9s: %05g %4.0f MB/s (%2zu threads)\n",
+           hwy::TargetName(target), AlgoName(algo), key_name.c_str(),
+           DistName(dist), static_cast<double>(num_keys), bytes * 1E-6 / sec,
+           num_threads);
+  }
+
+  int64_t target;
+  Algo algo;
+  Dist dist;
+  size_t num_keys = 0;
+  size_t num_threads = 0;
+  double sec = 0.0;
+  size_t sizeof_key = 0;
+  std::string key_name;
+};
+
+template <class Traits, typename LaneType>
+bool VerifySort(Traits st, const InputStats<LaneType>& input_stats,
+                const LaneType* out, size_t num_lanes, const char* caller) {
+  constexpr size_t N1 = st.LanesPerKey();
+  HWY_ASSERT(num_lanes >= N1);
+
+  InputStats<LaneType> output_stats;
+  // Ensure it matches the sort order
+  for (size_t i = 0; i < num_lanes - N1; i += N1) {
+    output_stats.Notify(out[i]);
+    if (N1 == 2) output_stats.Notify(out[i + 1]);
+    // Reverse order instead of checking !Compare1 so we accept equal keys.
+    if (st.Compare1(out + i + N1, out + i)) {
+      fprintf(stderr, "%s: i=%d of %d lanes: N1=%d", caller,
+              static_cast<int>(i), static_cast<int>(num_lanes),
+              static_cast<int>(N1));
+      fprintf(stderr, "%5.0f %5.0f vs. %5.0f %5.0f\n\n",
+              static_cast<double>(out[i + 1]), static_cast<double>(out[i + 0]),
+              static_cast<double>(out[i + N1 + 1]),
+              static_cast<double>(out[i + N1]));
+      HWY_ABORT("%d-bit sort is incorrect\n",
+                static_cast<int>(sizeof(LaneType) * 8 * N1));
+    }
+  }
+  output_stats.Notify(out[num_lanes - N1]);
+  if (N1 == 2) output_stats.Notify(out[num_lanes - N1 + 1]);
+
+  return input_stats == output_stats;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/shared-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/shared-inl.h
new file mode 100644
index 0000000000..18cb58d78b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/shared-inl.h
@@ -0,0 +1,154 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Definitions shared between vqsort-inl and sorting_networks-inl.
+
+// Normal include guard for target-independent parts
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_SHARED_INL_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_SHARED_INL_H_
+
+#include "hwy/base.h"
+
+namespace hwy {
+
+// Internal constants - these are to avoid magic numbers/literals and cannot be
+// changed without also changing the associated code.
+struct SortConstants {
+  // SortingNetwork reshapes its input into a matrix. This is the maximum number
+  // of *lanes* per vector. Must be at least 8 because SortSamples assumes the
+  // sorting network can handle 128 bytes with 8 rows, so 16 bytes per vector,
+  // which means 8 lanes for 16-bit types.
+#if HWY_COMPILER_MSVC || HWY_IS_DEBUG_BUILD
+  static constexpr size_t kMaxCols = 8;  // avoid build timeout/stack overflow
+#else
+  static constexpr size_t kMaxCols = 16;  // enough for u32 in 512-bit vector
+#endif
+
+  // 16 rows is a compromise between using the 32 AVX-512/SVE/RVV registers,
+  // fitting within 16 AVX2 registers with only a few spills, keeping BaseCase
+  // code size reasonable, and minimizing the extra logN factor for larger
+  // networks (for which only loose upper bounds on size are known).
+  static constexpr size_t kMaxRows = 16;
+
+  // Template argument ensures there is no actual division instruction.
+  template <size_t kLPK>
+  static constexpr HWY_INLINE size_t BaseCaseNumLanes(size_t N) {
+    // We use 8, 8x2, 8x4, and 16x{4..} networks, in units of keys. For N/kLPK
+    // < 4, we cannot use the 16-row networks.
+    return (((N / kLPK) >= 4) ? kMaxRows : 8) * HWY_MIN(N, kMaxCols);
+  }
+
+  // Unrolling is important (pipelining and amortizing branch mispredictions);
+  // 2x is sufficient to reach full memory bandwidth on SKX in Partition, but
+  // somewhat slower for sorting than 4x.
+  //
+  // To change, must also update left + 3 * N etc. in the loop.
+  static constexpr size_t kPartitionUnroll = 4;
+
+  // Chunk := group of keys loaded for sampling a pivot. Matches the typical
+  // cache line size of 64 bytes to get maximum benefit per L2 miss. Sort()
+  // ensures vectors are no larger than that, so this can be independent of the
+  // vector size and thus constexpr.
+  static constexpr HWY_INLINE size_t LanesPerChunk(size_t sizeof_t) {
+    return 64 / sizeof_t;
+  }
+
+  template <typename T>
+  static constexpr HWY_INLINE size_t SampleLanes() {
+    return 2 * LanesPerChunk(sizeof(T));  // Stored samples
+  }
+
+  static constexpr HWY_INLINE size_t PartitionBufNum(size_t N) {
+    // The main loop reads kPartitionUnroll vectors, and first loads from
+    // both left and right beforehand, so it requires min = 2 *
+    // kPartitionUnroll vectors. To handle smaller amounts (only guaranteed
+    // >= BaseCaseNumLanes), we partition the right side into a buffer. We need
+    // another vector at the end so CompressStore does not overwrite anything.
+    return (2 * kPartitionUnroll + 1) * N;
+  }
+
+  // Max across the three buffer usages.
+  template <typename T, size_t kLPK>
+  static constexpr HWY_INLINE size_t BufNum(size_t N) {
+    // BaseCase may write one padding vector, and SortSamples uses the space
+    // after samples as the buffer.
+    return HWY_MAX(SampleLanes<T>() + BaseCaseNumLanes<kLPK>(N) + N,
+                   PartitionBufNum(N));
+  }
+
+  // Translates vector_size to lanes and returns size in bytes.
+  template <typename T, size_t kLPK>
+  static constexpr HWY_INLINE size_t BufBytes(size_t vector_size) {
+    return BufNum<T, kLPK>(vector_size / sizeof(T)) * sizeof(T);
+  }
+
+  // Returns max for any type.
+  template <size_t kLPK>
+  static constexpr HWY_INLINE size_t MaxBufBytes(size_t vector_size) {
+    // If 2 lanes per key, it's a 128-bit key with u64 lanes.
+    return kLPK == 2 ? BufBytes<uint64_t, 2>(vector_size)
+                     : HWY_MAX((BufBytes<uint16_t, 1>(vector_size)),
+                               HWY_MAX((BufBytes<uint32_t, 1>(vector_size)),
+                                       (BufBytes<uint64_t, 1>(vector_size))));
+  }
+};
+
+static_assert(SortConstants::MaxBufBytes<1>(64) <= 1280, "Unexpectedly high");
+static_assert(SortConstants::MaxBufBytes<2>(64) <= 1280, "Unexpectedly high");
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_SHARED_INL_H_
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_SHARED_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_SHARED_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_SHARED_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_SHARED_TOGGLE
+#endif
+
+#include "hwy/highway.h"
+
+// vqsort isn't available on HWY_SCALAR, and builds time out on MSVC opt and
+// Armv7 debug.
+#undef VQSORT_ENABLED
+#if (HWY_TARGET == HWY_SCALAR) ||                 \
+    (HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD) || \
+    (HWY_ARCH_ARM_V7 && HWY_IS_DEBUG_BUILD)
+#define VQSORT_ENABLED 0
+#else
+#define VQSORT_ENABLED 1
+#endif
+
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Default tag / vector width selector.
+#if HWY_TARGET == HWY_RVV
+// Use LMUL = 1/2; for SEW=64 this ends up emulated via vsetvl.
+template <typename T>
+using SortTag = ScalableTag<T, -1>;
+#else
+template <typename T>
+using SortTag = ScalableTag<T>;
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_SHARED_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sort_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sort_test.cc
new file mode 100644
index 0000000000..6a6672db6b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sort_test.cc
@@ -0,0 +1,737 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <string.h>  // memcpy
+
+#include <unordered_map>
+#include <vector>
+
+#include "hwy/base.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/sort_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+// After foreach_target
+#include "hwy/contrib/sort/algo-inl.h"
+#include "hwy/contrib/sort/result-inl.h"
+#include "hwy/contrib/sort/traits128-inl.h"
+#include "hwy/contrib/sort/vqsort-inl.h"  // BaseCase
+#include "hwy/contrib/sort/vqsort.h"
+#include "hwy/highway.h"
+#include "hwy/per_target.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace {
+
+using detail::OrderAscending;
+using detail::SharedTraits;
+using detail::TraitsLane;
+
+#if VQSORT_ENABLED || HWY_IDE
+#if !HAVE_INTEL
+using detail::OrderAscending128;
+using detail::OrderAscendingKV128;
+using detail::OrderAscendingKV64;
+using detail::OrderDescending128;
+using detail::OrderDescendingKV128;
+using detail::OrderDescendingKV64;
+using detail::Traits128;
+#endif
+
+// Verify the corner cases of LargerSortValue/SmallerSortValue, used to
+// implement PrevValue/NextValue.
+struct TestFloatLargerSmaller {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T, D d) {
+    const Vec<D> p0 = Zero(d);
+    const Vec<D> p1 = Set(d, static_cast<T>(1));
+    const Vec<D> pinf = Inf(d);
+    const Vec<D> peps = Set(d, hwy::Epsilon<T>());
+    const Vec<D> pmax = Set(d, hwy::HighestValue<T>());
+
+    const Vec<D> n0 = Neg(p0);
+    const Vec<D> n1 = Neg(p1);
+    const Vec<D> ninf = Neg(pinf);
+    const Vec<D> neps = Neg(peps);
+    const Vec<D> nmax = Neg(pmax);
+
+    // Larger(0) is the smallest subnormal, typically eps * FLT_MIN.
+    const RebindToUnsigned<D> du;
+    const Vec<D> psub = BitCast(d, Set(du, 1));
+    const Vec<D> nsub = Neg(psub);
+    HWY_ASSERT(AllTrue(d, Lt(psub, peps)));
+    HWY_ASSERT(AllTrue(d, Gt(nsub, neps)));
+
+    // +/-0 moves to +/- smallest subnormal.
+    HWY_ASSERT_VEC_EQ(d, psub, detail::LargerSortValue(d, p0));
+    HWY_ASSERT_VEC_EQ(d, nsub, detail::SmallerSortValue(d, p0));
+    HWY_ASSERT_VEC_EQ(d, psub, detail::LargerSortValue(d, n0));
+    HWY_ASSERT_VEC_EQ(d, nsub, detail::SmallerSortValue(d, n0));
+
+    // The next magnitude larger than 1 is (1 + eps) by definition.
+    HWY_ASSERT_VEC_EQ(d, Add(p1, peps), detail::LargerSortValue(d, p1));
+    HWY_ASSERT_VEC_EQ(d, Add(n1, neps), detail::SmallerSortValue(d, n1));
+    // 1-eps and -1+eps are slightly different, but we can still ensure the
+    // next values are less than 1 / greater than -1.
+    HWY_ASSERT(AllTrue(d, Gt(p1, detail::SmallerSortValue(d, p1))));
+    HWY_ASSERT(AllTrue(d, Lt(n1, detail::LargerSortValue(d, n1))));
+
+    // Even for large (finite) values, we can move toward/away from infinity.
+    HWY_ASSERT_VEC_EQ(d, pinf, detail::LargerSortValue(d, pmax));
+    HWY_ASSERT_VEC_EQ(d, ninf, detail::SmallerSortValue(d, nmax));
+    HWY_ASSERT(AllTrue(d, Gt(pmax, detail::SmallerSortValue(d, pmax))));
+    HWY_ASSERT(AllTrue(d, Lt(nmax, detail::LargerSortValue(d, nmax))));
+
+    // For infinities, results are unchanged or the extremal finite value.
+    HWY_ASSERT_VEC_EQ(d, pinf, detail::LargerSortValue(d, pinf));
+    HWY_ASSERT_VEC_EQ(d, pmax, detail::SmallerSortValue(d, pinf));
+    HWY_ASSERT_VEC_EQ(d, nmax, detail::LargerSortValue(d, ninf));
+    HWY_ASSERT_VEC_EQ(d, ninf, detail::SmallerSortValue(d, ninf));
+  }
+};
+HWY_NOINLINE void TestAllFloatLargerSmaller() {
+  ForFloatTypesDynamic(ForPartialVectors<TestFloatLargerSmaller>());
+}
+
+// Previously, LastValue was the largest normal float, so we injected that
+// value into arrays containing only infinities. Ensure that does not happen.
+struct TestFloatInf {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T, D d) {
+    const size_t N = Lanes(d);
+    const size_t num = N * 3;
+    auto in = hwy::AllocateAligned<T>(num);
+    Fill(d, GetLane(Inf(d)), num, in.get());
+    VQSort(in.get(), num, SortAscending());
+    for (size_t i = 0; i < num; i += N) {
+      HWY_ASSERT(AllTrue(d, IsInf(LoadU(d, in.get() + i))));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllFloatInf() {
+  // TODO(janwas): bfloat16_t not yet supported.
+  ForFloatTypesDynamic(ForPartialVectors<TestFloatInf>());
+}
+
+template <class Traits>
+static HWY_NOINLINE void TestMedian3() {
+  using LaneType = typename Traits::LaneType;
+  using D = CappedTag<LaneType, 1>;
+  SharedTraits<Traits> st;
+  const D d;
+  using V = Vec<D>;
+  for (uint32_t bits = 0; bits < 8; ++bits) {
+    const V v0 = Set(d, LaneType{(bits & (1u << 0)) ? 1u : 0u});
+    const V v1 = Set(d, LaneType{(bits & (1u << 1)) ? 1u : 0u});
+    const V v2 = Set(d, LaneType{(bits & (1u << 2)) ? 1u : 0u});
+    const LaneType m = GetLane(detail::MedianOf3(st, v0, v1, v2));
+    // If at least half(rounded up) of bits are 1, so is the median.
+    const size_t count = PopCount(bits);
+    HWY_ASSERT_EQ((count >= 2) ? static_cast<LaneType>(1) : 0, m);
+  }
+}
+
+HWY_NOINLINE void TestAllMedian() {
+  TestMedian3<TraitsLane<OrderAscending<uint64_t> > >();
+}
+
+template <class Traits>
+static HWY_NOINLINE void TestBaseCaseAscDesc() {
+  using LaneType = typename Traits::LaneType;
+  SharedTraits<Traits> st;
+  const SortTag<LaneType> d;
+  const size_t N = Lanes(d);
+  constexpr size_t N1 = st.LanesPerKey();
+  const size_t base_case_num = SortConstants::BaseCaseNumLanes<N1>(N);
+
+  constexpr int kDebug = 0;
+  auto aligned_lanes = hwy::AllocateAligned<LaneType>(N + base_case_num + N);
+  auto buf = hwy::AllocateAligned<LaneType>(base_case_num + 2 * N);
+  HWY_ASSERT(aligned_lanes && buf);
+
+  std::vector<size_t> lengths;
+  lengths.push_back(HWY_MAX(1, N1));
+  lengths.push_back(3 * N1);
+  lengths.push_back(base_case_num / 2);
+  lengths.push_back(base_case_num / 2 + N1);
+  lengths.push_back(base_case_num - N1);
+  lengths.push_back(base_case_num);
+
+  std::vector<size_t> misalignments;
+  misalignments.push_back(0);
+  misalignments.push_back(1);
+  if (N >= 6) misalignments.push_back(N / 2 - 1);
+  misalignments.push_back(N / 2);
+  misalignments.push_back(N / 2 + 1);
+  misalignments.push_back(HWY_MIN(2 * N / 3 + 3, size_t{N - 1}));
+
+  for (bool asc : {false, true}) {
+    for (size_t len : lengths) {
+      for (size_t misalign : misalignments) {
+        LaneType* HWY_RESTRICT lanes = aligned_lanes.get() + misalign;
+        if (kDebug) {
+          printf("============%s asc %d N1 %d len %d misalign %d\n",
+                 st.KeyString(), asc, static_cast<int>(N1),
+                 static_cast<int>(len), static_cast<int>(misalign));
+        }
+
+        for (size_t i = 0; i < misalign; ++i) {
+          aligned_lanes[i] = hwy::LowestValue<LaneType>();
+        }
+        InputStats<LaneType> input_stats;
+        for (size_t i = 0; i < len; ++i) {
+          lanes[i] = asc ? static_cast<LaneType>(LaneType(i) + 1)
+                         : static_cast<LaneType>(LaneType(len) - LaneType(i));
+          input_stats.Notify(lanes[i]);
+          if (kDebug >= 2) {
+            printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+          }
+        }
+        for (size_t i = len; i < base_case_num + N; ++i) {
+          lanes[i] = hwy::LowestValue<LaneType>();
+        }
+
+        detail::BaseCase(d, st, lanes, len, buf.get());
+
+        if (kDebug >= 2) {
+          printf("out>>>>>>\n");
+          for (size_t i = 0; i < len; ++i) {
+            printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+          }
+        }
+
+        HWY_ASSERT(VerifySort(st, input_stats, lanes, len, "BaseAscDesc"));
+        for (size_t i = 0; i < misalign; ++i) {
+          if (aligned_lanes[i] != hwy::LowestValue<LaneType>())
+            HWY_ABORT("Overrun misalign at %d\n", static_cast<int>(i));
+        }
+        for (size_t i = len; i < base_case_num + N; ++i) {
+          if (lanes[i] != hwy::LowestValue<LaneType>())
+            HWY_ABORT("Overrun right at %d\n", static_cast<int>(i));
+        }
+      }  // misalign
+    }    // len
+  }      // asc
+}
+
+template <class Traits>
+static HWY_NOINLINE void TestBaseCase01() {
+  using LaneType = typename Traits::LaneType;
+  SharedTraits<Traits> st;
+  const SortTag<LaneType> d;
+  const size_t N = Lanes(d);
+  constexpr size_t N1 = st.LanesPerKey();
+  const size_t base_case_num = SortConstants::BaseCaseNumLanes<N1>(N);
+
+  constexpr int kDebug = 0;
+  auto lanes = hwy::AllocateAligned<LaneType>(base_case_num + N);
+  auto buf = hwy::AllocateAligned<LaneType>(base_case_num + 2 * N);
+  HWY_ASSERT(lanes && buf);
+
+  std::vector<size_t> lengths;
+  lengths.push_back(HWY_MAX(1, N1));
+  lengths.push_back(3 * N1);
+  lengths.push_back(base_case_num / 2);
+  lengths.push_back(base_case_num / 2 + N1);
+  lengths.push_back(base_case_num - N1);
+  lengths.push_back(base_case_num);
+
+  for (size_t len : lengths) {
+    if (kDebug) {
+      printf("============%s 01 N1 %d len %d\n", st.KeyString(),
+             static_cast<int>(N1), static_cast<int>(len));
+    }
+    const uint64_t kMaxBits = AdjustedLog2Reps(HWY_MIN(len, size_t{14}));
+    for (uint64_t bits = 0; bits < ((1ull << kMaxBits) - 1); ++bits) {
+      InputStats<LaneType> input_stats;
+      for (size_t i = 0; i < len; ++i) {
+        lanes[i] = (i < 64 && (bits & (1ull << i))) ? 1 : 0;
+        input_stats.Notify(lanes[i]);
+        if (kDebug >= 2) {
+          printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+        }
+      }
+      for (size_t i = len; i < base_case_num + N; ++i) {
+        lanes[i] = hwy::LowestValue<LaneType>();
+      }
+
+      detail::BaseCase(d, st, lanes.get(), len, buf.get());
+
+      if (kDebug >= 2) {
+        printf("out>>>>>>\n");
+        for (size_t i = 0; i < len; ++i) {
+          printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+        }
+      }
+
+      HWY_ASSERT(VerifySort(st, input_stats, lanes.get(), len, "Base01"));
+      for (size_t i = len; i < base_case_num + N; ++i) {
+        if (lanes[i] != hwy::LowestValue<LaneType>())
+          HWY_ABORT("Overrun right at %d\n", static_cast<int>(i));
+      }
+    }  // bits
+  }    // len
+}
+
+template <class Traits>
+static HWY_NOINLINE void TestBaseCase() {
+  TestBaseCaseAscDesc<Traits>();
+  TestBaseCase01<Traits>();
+}
+
+HWY_NOINLINE void TestAllBaseCase() {
+  // Workaround for stack overflow on MSVC debug.
+#if defined(_MSC_VER)
+  return;
+#endif
+  TestBaseCase<TraitsLane<OrderAscending<int32_t> > >();
+  TestBaseCase<TraitsLane<OtherOrder<int64_t> > >();
+#if !HAVE_INTEL
+  TestBaseCase<Traits128<OrderAscending128> >();
+  TestBaseCase<Traits128<OrderDescending128> >();
+#endif
+}
+
+template <class Traits>
+static HWY_NOINLINE void VerifyPartition(
+    Traits st, typename Traits::LaneType* HWY_RESTRICT lanes, size_t left,
+    size_t border, size_t right, const size_t N1,
+    const typename Traits::LaneType* pivot) {
+  /* for (size_t i = left; i < right; ++i) {
+     if (i == border) printf("--\n");
+     printf("%4zu: %3d\n", i, lanes[i]);
+   }*/
+
+  HWY_ASSERT(left % N1 == 0);
+  HWY_ASSERT(border % N1 == 0);
+  HWY_ASSERT(right % N1 == 0);
+  const bool asc = typename Traits::Order().IsAscending();
+  for (size_t i = left; i < border; i += N1) {
+    if (st.Compare1(pivot, lanes + i)) {
+      HWY_ABORT(
+          "%s: asc %d left[%d] piv %.0f %.0f compares before %.0f %.0f "
+          "border %d",
+          st.KeyString(), asc, static_cast<int>(i),
+          static_cast<double>(pivot[1]), static_cast<double>(pivot[0]),
+          static_cast<double>(lanes[i + 1]), static_cast<double>(lanes[i + 0]),
+          static_cast<int>(border));
+    }
+  }
+  for (size_t i = border; i < right; i += N1) {
+    if (!st.Compare1(pivot, lanes + i)) {
+      HWY_ABORT(
+          "%s: asc %d right[%d] piv %.0f %.0f compares after %.0f %.0f "
+          "border %d",
+          st.KeyString(), asc, static_cast<int>(i),
+          static_cast<double>(pivot[1]), static_cast<double>(pivot[0]),
+          static_cast<double>(lanes[i + 1]), static_cast<double>(lanes[i]),
+          static_cast<int>(border));
+    }
+  }
+}
+
+template <class Traits>
+static HWY_NOINLINE void TestPartition() {
+  using LaneType = typename Traits::LaneType;
+  const SortTag<LaneType> d;
+  SharedTraits<Traits> st;
+  const bool asc = typename Traits::Order().IsAscending();
+  const size_t N = Lanes(d);
+  constexpr int kDebug = 0;
+  constexpr size_t N1 = st.LanesPerKey();
+  const size_t base_case_num = SortConstants::BaseCaseNumLanes<N1>(N);
+  // left + len + align
+  const size_t total = 32 + (base_case_num + 4 * HWY_MAX(N, 4)) + 2 * N;
+  auto aligned_lanes = hwy::AllocateAligned<LaneType>(total);
+  HWY_ALIGN LaneType buf[SortConstants::BufBytes<LaneType, N1>(HWY_MAX_BYTES) /
+                         sizeof(LaneType)];
+
+  for (bool in_asc : {false, true}) {
+    for (int left_i : {0, 1, 7, 8, 30, 31}) {
+      const size_t left = static_cast<size_t>(left_i) & ~(N1 - 1);
+      for (size_t ofs :
+           {N, N + 3, 2 * N, 2 * N + 2, 2 * N + 3, 3 * N - 1, 4 * N - 2}) {
+        const size_t len = (base_case_num + ofs) & ~(N1 - 1);
+        for (LaneType pivot1 : {LaneType(0), LaneType(len / 3),
+                                LaneType(2 * len / 3), LaneType(len)}) {
+          const LaneType pivot2[2] = {pivot1, 0};
+          const auto pivot = st.SetKey(d, pivot2);
+          for (size_t misalign = 0; misalign < N;
+               misalign += st.LanesPerKey()) {
+            LaneType* HWY_RESTRICT lanes = aligned_lanes.get() + misalign;
+            const size_t right = left + len;
+            if (kDebug) {
+              printf(
+                  "=========%s asc %d left %d len %d right %d piv %.0f %.0f\n",
+                  st.KeyString(), asc, static_cast<int>(left),
+                  static_cast<int>(len), static_cast<int>(right),
+                  static_cast<double>(pivot2[1]),
+                  static_cast<double>(pivot2[0]));
+            }
+
+            for (size_t i = 0; i < misalign; ++i) {
+              aligned_lanes[i] = hwy::LowestValue<LaneType>();
+            }
+            for (size_t i = 0; i < left; ++i) {
+              lanes[i] = hwy::LowestValue<LaneType>();
+            }
+            std::unordered_map<LaneType, int> counts;
+            for (size_t i = left; i < right; ++i) {
+              lanes[i] = static_cast<LaneType>(
+                  in_asc ? LaneType(i + 1) - static_cast<LaneType>(left)
+                         : static_cast<LaneType>(right) - LaneType(i));
+              ++counts[lanes[i]];
+              if (kDebug >= 2) {
+                printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+              }
+            }
+            for (size_t i = right; i < total - misalign; ++i) {
+              lanes[i] = hwy::LowestValue<LaneType>();
+            }
+
+            size_t border = left + detail::Partition(d, st, lanes + left,
+                                                     right - left, pivot, buf);
+
+            if (kDebug >= 2) {
+              printf("out>>>>>>\n");
+              for (size_t i = left; i < right; ++i) {
+                printf("%3zu: %f\n", i, static_cast<double>(lanes[i]));
+              }
+              for (size_t i = right; i < total - misalign; ++i) {
+                printf("%3zu: sentinel %f\n", i, static_cast<double>(lanes[i]));
+              }
+            }
+            for (size_t i = left; i < right; ++i) {
+              --counts[lanes[i]];
+            }
+            for (auto kv : counts) {
+              if (kv.second != 0) {
+                PrintValue(kv.first);
+                HWY_ABORT("Incorrect count %d\n", kv.second);
+              }
+            }
+            VerifyPartition(st, lanes, left, border, right, N1, pivot2);
+            for (size_t i = 0; i < misalign; ++i) {
+              if (aligned_lanes[i] != hwy::LowestValue<LaneType>())
+                HWY_ABORT("Overrun misalign at %d\n", static_cast<int>(i));
+            }
+            for (size_t i = 0; i < left; ++i) {
+              if (lanes[i] != hwy::LowestValue<LaneType>())
+                HWY_ABORT("Overrun left at %d\n", static_cast<int>(i));
+            }
+            for (size_t i = right; i < total - misalign; ++i) {
+              if (lanes[i] != hwy::LowestValue<LaneType>())
+                HWY_ABORT("Overrun right at %d\n", static_cast<int>(i));
+            }
+          }  // misalign
+        }    // pivot
+      }      // len
+    }        // left
+  }          // asc
+}
+
+HWY_NOINLINE void TestAllPartition() {
+  TestPartition<TraitsLane<OtherOrder<int32_t> > >();
+#if !HAVE_INTEL
+  TestPartition<Traits128<OrderAscending128> >();
+#endif
+
+#if !HWY_IS_DEBUG_BUILD
+  TestPartition<TraitsLane<OrderAscending<int16_t> > >();
+  TestPartition<TraitsLane<OrderAscending<int64_t> > >();
+  TestPartition<TraitsLane<OtherOrder<float> > >();
+  // OK to check current target, not using dynamic dispatch here.
+#if HWY_HAVE_FLOAT64
+  TestPartition<TraitsLane<OtherOrder<double> > >();
+#endif
+#if !HAVE_INTEL
+  TestPartition<Traits128<OrderDescending128> >();
+#endif
+#endif
+}
+
+// (used for sample selection for choosing a pivot)
+template <typename TU>
+static HWY_NOINLINE void TestRandomGenerator() {
+  static_assert(!hwy::IsSigned<TU>(), "");
+  SortTag<TU> du;
+  const size_t N = Lanes(du);
+
+  uint64_t* state = GetGeneratorState();
+
+  // Ensure lower and upper 32 bits are uniformly distributed.
+  uint64_t sum_lo = 0, sum_hi = 0;
+  for (size_t i = 0; i < 1000; ++i) {
+    const uint64_t bits = detail::RandomBits(state);
+    sum_lo += bits & 0xFFFFFFFF;
+    sum_hi += bits >> 32;
+  }
+  const double expected = 1000 * (1ULL << 31);
+  HWY_ASSERT(0.9 * expected <= static_cast<double>(sum_lo) &&
+             static_cast<double>(sum_lo) <= 1.1 * expected);
+  HWY_ASSERT(0.9 * expected <= static_cast<double>(sum_hi) &&
+             static_cast<double>(sum_hi) <= 1.1 * expected);
+
+  const size_t lanes_per_block = HWY_MAX(64 / sizeof(TU), N);  // power of two
+
+  for (uint32_t num_blocks = 2; num_blocks < 100000;
+       num_blocks = 3 * num_blocks / 2) {
+    // Generate some numbers and ensure all are in range
+    uint64_t sum = 0;
+    constexpr size_t kReps = 10000;
+    for (size_t rep = 0; rep < kReps; ++rep) {
+      const uint32_t bits = detail::RandomBits(state) & 0xFFFFFFFF;
+      const size_t index = detail::RandomChunkIndex(num_blocks, bits);
+      HWY_ASSERT(((index + 1) * lanes_per_block) <=
+                 num_blocks * lanes_per_block);
+
+      sum += index;
+    }
+
+    // Also ensure the mean is near the middle of the range
+    const double expected = (num_blocks - 1) / 2.0;
+    const double actual = static_cast<double>(sum) / kReps;
+    HWY_ASSERT(0.9 * expected <= actual && actual <= 1.1 * expected);
+  }
+}
+
+HWY_NOINLINE void TestAllGenerator() {
+  TestRandomGenerator<uint32_t>();
+  TestRandomGenerator<uint64_t>();
+}
+
+#else
+static void TestAllFloatLargerSmaller() {}
+static void TestAllFloatInf() {}
+static void TestAllMedian() {}
+static void TestAllBaseCase() {}
+static void TestAllPartition() {}
+static void TestAllGenerator() {}
+#endif  // VQSORT_ENABLED
+
+// Remembers input, and compares results to that of a reference algorithm.
+template <class Traits>
+class CompareResults {
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+
+ public:
+  CompareResults(const LaneType* in, size_t num_lanes) {
+    copy_.resize(num_lanes);
+    memcpy(copy_.data(), in, num_lanes * sizeof(LaneType));
+  }
+
+  bool Verify(const LaneType* output) {
+#if HAVE_PDQSORT
+    const Algo reference = Algo::kPDQ;
+#else
+    const Algo reference = Algo::kStd;
+#endif
+    SharedState shared;
+    using Order = typename Traits::Order;
+    const Traits st;
+    const size_t num_keys = copy_.size() / st.LanesPerKey();
+    Run<Order>(reference, reinterpret_cast<KeyType*>(copy_.data()), num_keys,
+               shared, /*thread=*/0);
+#if VQSORT_PRINT >= 3
+    fprintf(stderr, "\nExpected:\n");
+    for (size_t i = 0; i < copy_.size(); ++i) {
+      PrintValue(copy_[i]);
+    }
+    fprintf(stderr, "\n");
+#endif
+    for (size_t i = 0; i < copy_.size(); ++i) {
+      if (copy_[i] != output[i]) {
+        if (sizeof(KeyType) == 16) {
+          fprintf(stderr, "%s Asc %d mismatch at %d of %d: %g %g\n",
+                  st.KeyString(), Order().IsAscending(), static_cast<int>(i),
+                  static_cast<int>(copy_.size()), static_cast<double>(copy_[i]),
+                  static_cast<double>(output[i]));
+        } else {
+          fprintf(stderr,
+                  "Type %s Asc %d mismatch at %d of %d: ", st.KeyString(),
+                  Order().IsAscending(), static_cast<int>(i),
+                  static_cast<int>(copy_.size()));
+          PrintValue(copy_[i]);
+          PrintValue(output[i]);
+          fprintf(stderr, "\n");
+        }
+        return false;
+      }
+    }
+    return true;
+  }
+
+ private:
+  std::vector<LaneType> copy_;
+};
+
+std::vector<Algo> AlgoForTest() {
+  return {
+#if HAVE_AVX2SORT
+    Algo::kSEA,
+#endif
+#if HAVE_IPS4O
+        Algo::kIPS4O,
+#endif
+#if HAVE_PDQSORT
+        Algo::kPDQ,
+#endif
+#if HAVE_SORT512
+        Algo::kSort512,
+#endif
+#if VQSORT_ENABLED
+        Algo::kVQSort,
+#endif
+        Algo::kHeap,
+  };
+}
+
+template <class Traits>
+void TestSort(size_t num_lanes) {
+// Workaround for stack overflow on clang-cl (/F 8388608 does not help).
+#if defined(_MSC_VER)
+  return;
+#endif
+  using Order = typename Traits::Order;
+  using LaneType = typename Traits::LaneType;
+  using KeyType = typename Traits::KeyType;
+  SharedState shared;
+  SharedTraits<Traits> st;
+
+  // Round up to a whole number of keys.
+  num_lanes += (st.Is128() && (num_lanes & 1));
+  const size_t num_keys = num_lanes / st.LanesPerKey();
+
+  constexpr size_t kMaxMisalign = 16;
+  auto aligned =
+      hwy::AllocateAligned<LaneType>(kMaxMisalign + num_lanes + kMaxMisalign);
+  HWY_ASSERT(aligned);
+  for (Algo algo : AlgoForTest()) {
+    for (Dist dist : AllDist()) {
+      for (size_t misalign : {size_t{0}, size_t{st.LanesPerKey()},
+                              size_t{3 * st.LanesPerKey()}, kMaxMisalign / 2}) {
+        LaneType* lanes = aligned.get() + misalign;
+
+        // Set up red zones before/after the keys to sort
+        for (size_t i = 0; i < misalign; ++i) {
+          aligned[i] = hwy::LowestValue<LaneType>();
+        }
+        for (size_t i = 0; i < kMaxMisalign; ++i) {
+          lanes[num_lanes + i] = hwy::HighestValue<LaneType>();
+        }
+#if HWY_IS_MSAN
+        __msan_poison(aligned.get(), misalign * sizeof(LaneType));
+        __msan_poison(lanes + num_lanes, kMaxMisalign * sizeof(LaneType));
+#endif
+        InputStats<LaneType> input_stats =
+            GenerateInput(dist, lanes, num_lanes);
+
+        CompareResults<Traits> compare(lanes, num_lanes);
+        Run<Order>(algo, reinterpret_cast<KeyType*>(lanes), num_keys, shared,
+                   /*thread=*/0);
+        HWY_ASSERT(compare.Verify(lanes));
+        HWY_ASSERT(VerifySort(st, input_stats, lanes, num_lanes, "TestSort"));
+
+        // Check red zones
+#if HWY_IS_MSAN
+        __msan_unpoison(aligned.get(), misalign * sizeof(LaneType));
+        __msan_unpoison(lanes + num_lanes, kMaxMisalign * sizeof(LaneType));
+#endif
+        for (size_t i = 0; i < misalign; ++i) {
+          if (aligned[i] != hwy::LowestValue<LaneType>())
+            HWY_ABORT("Overrun left at %d\n", static_cast<int>(i));
+        }
+        for (size_t i = num_lanes; i < num_lanes + kMaxMisalign; ++i) {
+          if (lanes[i] != hwy::HighestValue<LaneType>())
+            HWY_ABORT("Overrun right at %d\n", static_cast<int>(i));
+        }
+      }  // misalign
+    }    // dist
+  }      // algo
+}
+
+void TestAllSort() {
+  for (int num : {129, 504, 3 * 1000, 34567}) {
+    const size_t num_lanes = AdjustedReps(static_cast<size_t>(num));
+#if !HAVE_INTEL
+    TestSort<TraitsLane<OrderAscending<int16_t> > >(num_lanes);
+    TestSort<TraitsLane<OtherOrder<uint16_t> > >(num_lanes);
+#endif
+
+    TestSort<TraitsLane<OtherOrder<int32_t> > >(num_lanes);
+    TestSort<TraitsLane<OtherOrder<uint32_t> > >(num_lanes);
+
+    TestSort<TraitsLane<OrderAscending<int64_t> > >(num_lanes);
+    TestSort<TraitsLane<OrderAscending<uint64_t> > >(num_lanes);
+
+    // WARNING: for float types, SIMD comparisons will flush denormals to
+    // zero, causing mismatches with scalar sorts. In this test, we avoid
+    // generating denormal inputs.
+#if HWY_HAVE_FLOAT16  // #if protects algo-inl's GenerateRandom
+    // Must also check whether the dynamic-dispatch target supports float16_t!
+    if (hwy::HaveFloat16()) {
+      TestSort<TraitsLane<OrderAscending<float16_t> > >(num_lanes);
+    }
+#endif
+    TestSort<TraitsLane<OrderAscending<float> > >(num_lanes);
+#if HWY_HAVE_FLOAT64  // #if protects algo-inl's GenerateRandom
+    // Must also check whether the dynamic-dispatch target supports float64!
+    if (hwy::HaveFloat64()) {
+      TestSort<TraitsLane<OtherOrder<double> > >(num_lanes);
+    }
+#endif
+
+// Other algorithms do not support 128-bit keys.
+#if !HAVE_VXSORT && !HAVE_INTEL && VQSORT_ENABLED
+    TestSort<Traits128<OrderAscending128> >(num_lanes);
+    TestSort<Traits128<OrderDescending128> >(num_lanes);
+
+    TestSort<TraitsLane<OrderAscendingKV64> >(num_lanes);
+    TestSort<TraitsLane<OrderDescendingKV64> >(num_lanes);
+
+    TestSort<Traits128<OrderAscendingKV128> >(num_lanes);
+    TestSort<Traits128<OrderDescendingKV128> >(num_lanes);
+#endif
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+namespace {
+HWY_BEFORE_TEST(SortTest);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllFloatLargerSmaller);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllFloatInf);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllMedian);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllBaseCase);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllPartition);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllGenerator);
+HWY_EXPORT_AND_TEST_P(SortTest, TestAllSort);
+}  // namespace
+}  // namespace hwy
+
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sorting_networks-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sorting_networks-inl.h
new file mode 100644
index 0000000000..131d9277c3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/sorting_networks-inl.h
@@ -0,0 +1,902 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_SORTING_NETWORKS_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_SORTING_NETWORKS_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_SORTING_NETWORKS_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_SORTING_NETWORKS_TOGGLE
+#endif
+
+#include "hwy/contrib/sort/shared-inl.h"  // SortConstants
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+#if VQSORT_ENABLED
+
+using Constants = hwy::SortConstants;
+
+// ------------------------------ SharedTraits
+
+// Code shared between all traits. It's unclear whether these can profitably be
+// specialized for Lane vs Block, or optimized like SortPairsDistance1 using
+// Compare/DupOdd.
+template <class Base>
+struct SharedTraits : public Base {
+  using SharedTraitsForSortingNetwork =
+      SharedTraits<typename Base::TraitsForSortingNetwork>;
+
+  // Conditionally swaps lane 0 with 2, 1 with 3 etc.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsDistance2(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->SwapAdjacentPairs(d, v);
+    base->Sort2(d, v, swapped);
+    return base->OddEvenPairs(d, swapped, v);
+  }
+
+  // Swaps with the vector formed by reversing contiguous groups of 8 keys.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsReverse8(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys8(d, v);
+    base->Sort2(d, v, swapped);
+    return base->OddEvenQuads(d, swapped, v);
+  }
+
+  // Swaps with the vector formed by reversing contiguous groups of 8 keys.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsReverse16(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    static_assert(Constants::kMaxCols <= 16, "Need actual Reverse16");
+    Vec<D> swapped = base->ReverseKeys(d, v);
+    base->Sort2(d, v, swapped);
+    return ConcatUpperLower(d, swapped, v);  // 8 = half of the vector
+  }
+};
+
+// ------------------------------ Sorting network
+
+// Sorting networks for independent columns in 2, 4 and 8 vectors from
+// https://bertdobbelaere.github.io/sorting_networks.html.
+
+template <class D, class Traits, class V = Vec<D>>
+HWY_INLINE void Sort2(D d, Traits st, V& v0, V& v1) {
+  st.Sort2(d, v0, v1);
+}
+
+template <class D, class Traits, class V = Vec<D>>
+HWY_INLINE void Sort4(D d, Traits st, V& v0, V& v1, V& v2, V& v3) {
+  st.Sort2(d, v0, v2);
+  st.Sort2(d, v1, v3);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v1, v2);
+}
+
+template <class D, class Traits, class V = Vec<D>>
+HWY_INLINE void Sort8(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4, V& v5,
+                      V& v6, V& v7) {
+  st.Sort2(d, v0, v2);
+  st.Sort2(d, v1, v3);
+  st.Sort2(d, v4, v6);
+  st.Sort2(d, v5, v7);
+
+  st.Sort2(d, v0, v4);
+  st.Sort2(d, v1, v5);
+  st.Sort2(d, v2, v6);
+  st.Sort2(d, v3, v7);
+
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+
+  st.Sort2(d, v2, v4);
+  st.Sort2(d, v3, v5);
+
+  st.Sort2(d, v1, v4);
+  st.Sort2(d, v3, v6);
+
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v5, v6);
+}
+
+// (Green's irregular) sorting network for independent columns in 16 vectors.
+template <class D, class Traits, class V = Vec<D>>
+HWY_INLINE void Sort16(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4, V& v5,
+                       V& v6, V& v7, V& v8, V& v9, V& va, V& vb, V& vc, V& vd,
+                       V& ve, V& vf) {
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+  st.Sort2(d, va, vb);
+  st.Sort2(d, vc, vd);
+  st.Sort2(d, ve, vf);
+  st.Sort2(d, v0, v2);
+  st.Sort2(d, v1, v3);
+  st.Sort2(d, v4, v6);
+  st.Sort2(d, v5, v7);
+  st.Sort2(d, v8, va);
+  st.Sort2(d, v9, vb);
+  st.Sort2(d, vc, ve);
+  st.Sort2(d, vd, vf);
+  st.Sort2(d, v0, v4);
+  st.Sort2(d, v1, v5);
+  st.Sort2(d, v2, v6);
+  st.Sort2(d, v3, v7);
+  st.Sort2(d, v8, vc);
+  st.Sort2(d, v9, vd);
+  st.Sort2(d, va, ve);
+  st.Sort2(d, vb, vf);
+  st.Sort2(d, v0, v8);
+  st.Sort2(d, v1, v9);
+  st.Sort2(d, v2, va);
+  st.Sort2(d, v3, vb);
+  st.Sort2(d, v4, vc);
+  st.Sort2(d, v5, vd);
+  st.Sort2(d, v6, ve);
+  st.Sort2(d, v7, vf);
+  st.Sort2(d, v5, va);
+  st.Sort2(d, v6, v9);
+  st.Sort2(d, v3, vc);
+  st.Sort2(d, v7, vb);
+  st.Sort2(d, vd, ve);
+  st.Sort2(d, v4, v8);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v1, v4);
+  st.Sort2(d, v7, vd);
+  st.Sort2(d, v2, v8);
+  st.Sort2(d, vb, ve);
+  st.Sort2(d, v2, v4);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vb, vd);
+  st.Sort2(d, v3, v8);
+  st.Sort2(d, v7, vc);
+  st.Sort2(d, v3, v5);
+  st.Sort2(d, v6, v8);
+  st.Sort2(d, v7, v9);
+  st.Sort2(d, va, vc);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v7, v8);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vb, vc);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+}
+
+// ------------------------------ Merging networks
+
+// Blacher's hybrid bitonic/odd-even networks, generated by print_network.cc.
+// For acceptable performance, these must be inlined, otherwise vectors are
+// loaded from the stack. The kKeysPerVector allows calling from generic code
+// but skipping the functions when vectors have too few lanes for
+// st.SortPairsDistance1 to compile. `if constexpr` in the caller would also
+// work, but is not available in C++11. We write out the (unused) argument types
+// rather than `...` because GCC 9 (but not 10) fails to compile with `...`.
+
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 1)>
+HWY_INLINE void Merge8x2(D, Traits, V, V, V, V, V, V, V, V) {}
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 2)>
+HWY_INLINE void Merge8x4(D, Traits, V, V, V, V, V, V, V, V) {}
+
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 1)>
+HWY_INLINE void Merge16x2(D, Traits, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+                          V, V) {}
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 2)>
+HWY_INLINE void Merge16x4(D, Traits, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+                          V, V) {}
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 4)>
+HWY_INLINE void Merge16x8(D, Traits, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+                          V, V) {}
+template <size_t kKeysPerVector, class D, class Traits, class V,
+          HWY_IF_LANES_LE(kKeysPerVector, 8)>
+HWY_INLINE void Merge16x16(D, Traits, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+                           V, V) {}
+
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 1)>
+HWY_INLINE void Merge8x2(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                         V& v5, V& v6, V& v7) {
+  v7 = st.ReverseKeys2(d, v7);
+  v6 = st.ReverseKeys2(d, v6);
+  v5 = st.ReverseKeys2(d, v5);
+  v4 = st.ReverseKeys2(d, v4);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+
+  v3 = st.ReverseKeys2(d, v3);
+  v2 = st.ReverseKeys2(d, v2);
+  v7 = st.ReverseKeys2(d, v7);
+  v6 = st.ReverseKeys2(d, v6);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+
+  v1 = st.ReverseKeys2(d, v1);
+  v3 = st.ReverseKeys2(d, v3);
+  v5 = st.ReverseKeys2(d, v5);
+  v7 = st.ReverseKeys2(d, v7);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+}
+
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 2)>
+HWY_INLINE void Merge8x4(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                         V& v5, V& v6, V& v7) {
+  v7 = st.ReverseKeys4(d, v7);
+  v6 = st.ReverseKeys4(d, v6);
+  v5 = st.ReverseKeys4(d, v5);
+  v4 = st.ReverseKeys4(d, v4);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+
+  v3 = st.ReverseKeys4(d, v3);
+  v2 = st.ReverseKeys4(d, v2);
+  v7 = st.ReverseKeys4(d, v7);
+  v6 = st.ReverseKeys4(d, v6);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+
+  v1 = st.ReverseKeys4(d, v1);
+  v3 = st.ReverseKeys4(d, v3);
+  v5 = st.ReverseKeys4(d, v5);
+  v7 = st.ReverseKeys4(d, v7);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+
+  v0 = st.SortPairsReverse4(d, v0);
+  v1 = st.SortPairsReverse4(d, v1);
+  v2 = st.SortPairsReverse4(d, v2);
+  v3 = st.SortPairsReverse4(d, v3);
+  v4 = st.SortPairsReverse4(d, v4);
+  v5 = st.SortPairsReverse4(d, v5);
+  v6 = st.SortPairsReverse4(d, v6);
+  v7 = st.SortPairsReverse4(d, v7);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+}
+
+// Only used by the now-deprecated SortingNetwork().
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 1)>
+HWY_INLINE void Merge16x2(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                          V& v5, V& v6, V& v7, V& v8, V& v9, V& va, V& vb,
+                          V& vc, V& vd, V& ve, V& vf) {
+  vf = st.ReverseKeys2(d, vf);
+  ve = st.ReverseKeys2(d, ve);
+  vd = st.ReverseKeys2(d, vd);
+  vc = st.ReverseKeys2(d, vc);
+  vb = st.ReverseKeys2(d, vb);
+  va = st.ReverseKeys2(d, va);
+  v9 = st.ReverseKeys2(d, v9);
+  v8 = st.ReverseKeys2(d, v8);
+  st.Sort2(d, v0, vf);
+  st.Sort2(d, v1, ve);
+  st.Sort2(d, v2, vd);
+  st.Sort2(d, v3, vc);
+  st.Sort2(d, v4, vb);
+  st.Sort2(d, v5, va);
+  st.Sort2(d, v6, v9);
+  st.Sort2(d, v7, v8);
+
+  v7 = st.ReverseKeys2(d, v7);
+  v6 = st.ReverseKeys2(d, v6);
+  v5 = st.ReverseKeys2(d, v5);
+  v4 = st.ReverseKeys2(d, v4);
+  vf = st.ReverseKeys2(d, vf);
+  ve = st.ReverseKeys2(d, ve);
+  vd = st.ReverseKeys2(d, vd);
+  vc = st.ReverseKeys2(d, vc);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v8, vf);
+  st.Sort2(d, v9, ve);
+  st.Sort2(d, va, vd);
+  st.Sort2(d, vb, vc);
+
+  v3 = st.ReverseKeys2(d, v3);
+  v2 = st.ReverseKeys2(d, v2);
+  v7 = st.ReverseKeys2(d, v7);
+  v6 = st.ReverseKeys2(d, v6);
+  vb = st.ReverseKeys2(d, vb);
+  va = st.ReverseKeys2(d, va);
+  vf = st.ReverseKeys2(d, vf);
+  ve = st.ReverseKeys2(d, ve);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v8, vb);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vc, vf);
+  st.Sort2(d, vd, ve);
+
+  v1 = st.ReverseKeys2(d, v1);
+  v3 = st.ReverseKeys2(d, v3);
+  v5 = st.ReverseKeys2(d, v5);
+  v7 = st.ReverseKeys2(d, v7);
+  v9 = st.ReverseKeys2(d, v9);
+  vb = st.ReverseKeys2(d, vb);
+  vd = st.ReverseKeys2(d, vd);
+  vf = st.ReverseKeys2(d, vf);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+  st.Sort2(d, va, vb);
+  st.Sort2(d, vc, vd);
+  st.Sort2(d, ve, vf);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+  v8 = st.SortPairsDistance1(d, v8);
+  v9 = st.SortPairsDistance1(d, v9);
+  va = st.SortPairsDistance1(d, va);
+  vb = st.SortPairsDistance1(d, vb);
+  vc = st.SortPairsDistance1(d, vc);
+  vd = st.SortPairsDistance1(d, vd);
+  ve = st.SortPairsDistance1(d, ve);
+  vf = st.SortPairsDistance1(d, vf);
+}
+
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 2)>
+HWY_INLINE void Merge16x4(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                          V& v5, V& v6, V& v7, V& v8, V& v9, V& va, V& vb,
+                          V& vc, V& vd, V& ve, V& vf) {
+  vf = st.ReverseKeys4(d, vf);
+  ve = st.ReverseKeys4(d, ve);
+  vd = st.ReverseKeys4(d, vd);
+  vc = st.ReverseKeys4(d, vc);
+  vb = st.ReverseKeys4(d, vb);
+  va = st.ReverseKeys4(d, va);
+  v9 = st.ReverseKeys4(d, v9);
+  v8 = st.ReverseKeys4(d, v8);
+  st.Sort2(d, v0, vf);
+  st.Sort2(d, v1, ve);
+  st.Sort2(d, v2, vd);
+  st.Sort2(d, v3, vc);
+  st.Sort2(d, v4, vb);
+  st.Sort2(d, v5, va);
+  st.Sort2(d, v6, v9);
+  st.Sort2(d, v7, v8);
+
+  v7 = st.ReverseKeys4(d, v7);
+  v6 = st.ReverseKeys4(d, v6);
+  v5 = st.ReverseKeys4(d, v5);
+  v4 = st.ReverseKeys4(d, v4);
+  vf = st.ReverseKeys4(d, vf);
+  ve = st.ReverseKeys4(d, ve);
+  vd = st.ReverseKeys4(d, vd);
+  vc = st.ReverseKeys4(d, vc);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v8, vf);
+  st.Sort2(d, v9, ve);
+  st.Sort2(d, va, vd);
+  st.Sort2(d, vb, vc);
+
+  v3 = st.ReverseKeys4(d, v3);
+  v2 = st.ReverseKeys4(d, v2);
+  v7 = st.ReverseKeys4(d, v7);
+  v6 = st.ReverseKeys4(d, v6);
+  vb = st.ReverseKeys4(d, vb);
+  va = st.ReverseKeys4(d, va);
+  vf = st.ReverseKeys4(d, vf);
+  ve = st.ReverseKeys4(d, ve);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v8, vb);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vc, vf);
+  st.Sort2(d, vd, ve);
+
+  v1 = st.ReverseKeys4(d, v1);
+  v3 = st.ReverseKeys4(d, v3);
+  v5 = st.ReverseKeys4(d, v5);
+  v7 = st.ReverseKeys4(d, v7);
+  v9 = st.ReverseKeys4(d, v9);
+  vb = st.ReverseKeys4(d, vb);
+  vd = st.ReverseKeys4(d, vd);
+  vf = st.ReverseKeys4(d, vf);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+  st.Sort2(d, va, vb);
+  st.Sort2(d, vc, vd);
+  st.Sort2(d, ve, vf);
+
+  v0 = st.SortPairsReverse4(d, v0);
+  v1 = st.SortPairsReverse4(d, v1);
+  v2 = st.SortPairsReverse4(d, v2);
+  v3 = st.SortPairsReverse4(d, v3);
+  v4 = st.SortPairsReverse4(d, v4);
+  v5 = st.SortPairsReverse4(d, v5);
+  v6 = st.SortPairsReverse4(d, v6);
+  v7 = st.SortPairsReverse4(d, v7);
+  v8 = st.SortPairsReverse4(d, v8);
+  v9 = st.SortPairsReverse4(d, v9);
+  va = st.SortPairsReverse4(d, va);
+  vb = st.SortPairsReverse4(d, vb);
+  vc = st.SortPairsReverse4(d, vc);
+  vd = st.SortPairsReverse4(d, vd);
+  ve = st.SortPairsReverse4(d, ve);
+  vf = st.SortPairsReverse4(d, vf);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+  v8 = st.SortPairsDistance1(d, v8);
+  v9 = st.SortPairsDistance1(d, v9);
+  va = st.SortPairsDistance1(d, va);
+  vb = st.SortPairsDistance1(d, vb);
+  vc = st.SortPairsDistance1(d, vc);
+  vd = st.SortPairsDistance1(d, vd);
+  ve = st.SortPairsDistance1(d, ve);
+  vf = st.SortPairsDistance1(d, vf);
+}
+
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 4)>
+HWY_INLINE void Merge16x8(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                          V& v5, V& v6, V& v7, V& v8, V& v9, V& va, V& vb,
+                          V& vc, V& vd, V& ve, V& vf) {
+  vf = st.ReverseKeys8(d, vf);
+  ve = st.ReverseKeys8(d, ve);
+  vd = st.ReverseKeys8(d, vd);
+  vc = st.ReverseKeys8(d, vc);
+  vb = st.ReverseKeys8(d, vb);
+  va = st.ReverseKeys8(d, va);
+  v9 = st.ReverseKeys8(d, v9);
+  v8 = st.ReverseKeys8(d, v8);
+  st.Sort2(d, v0, vf);
+  st.Sort2(d, v1, ve);
+  st.Sort2(d, v2, vd);
+  st.Sort2(d, v3, vc);
+  st.Sort2(d, v4, vb);
+  st.Sort2(d, v5, va);
+  st.Sort2(d, v6, v9);
+  st.Sort2(d, v7, v8);
+
+  v7 = st.ReverseKeys8(d, v7);
+  v6 = st.ReverseKeys8(d, v6);
+  v5 = st.ReverseKeys8(d, v5);
+  v4 = st.ReverseKeys8(d, v4);
+  vf = st.ReverseKeys8(d, vf);
+  ve = st.ReverseKeys8(d, ve);
+  vd = st.ReverseKeys8(d, vd);
+  vc = st.ReverseKeys8(d, vc);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v8, vf);
+  st.Sort2(d, v9, ve);
+  st.Sort2(d, va, vd);
+  st.Sort2(d, vb, vc);
+
+  v3 = st.ReverseKeys8(d, v3);
+  v2 = st.ReverseKeys8(d, v2);
+  v7 = st.ReverseKeys8(d, v7);
+  v6 = st.ReverseKeys8(d, v6);
+  vb = st.ReverseKeys8(d, vb);
+  va = st.ReverseKeys8(d, va);
+  vf = st.ReverseKeys8(d, vf);
+  ve = st.ReverseKeys8(d, ve);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v8, vb);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vc, vf);
+  st.Sort2(d, vd, ve);
+
+  v1 = st.ReverseKeys8(d, v1);
+  v3 = st.ReverseKeys8(d, v3);
+  v5 = st.ReverseKeys8(d, v5);
+  v7 = st.ReverseKeys8(d, v7);
+  v9 = st.ReverseKeys8(d, v9);
+  vb = st.ReverseKeys8(d, vb);
+  vd = st.ReverseKeys8(d, vd);
+  vf = st.ReverseKeys8(d, vf);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+  st.Sort2(d, va, vb);
+  st.Sort2(d, vc, vd);
+  st.Sort2(d, ve, vf);
+
+  v0 = st.SortPairsReverse8(d, v0);
+  v1 = st.SortPairsReverse8(d, v1);
+  v2 = st.SortPairsReverse8(d, v2);
+  v3 = st.SortPairsReverse8(d, v3);
+  v4 = st.SortPairsReverse8(d, v4);
+  v5 = st.SortPairsReverse8(d, v5);
+  v6 = st.SortPairsReverse8(d, v6);
+  v7 = st.SortPairsReverse8(d, v7);
+  v8 = st.SortPairsReverse8(d, v8);
+  v9 = st.SortPairsReverse8(d, v9);
+  va = st.SortPairsReverse8(d, va);
+  vb = st.SortPairsReverse8(d, vb);
+  vc = st.SortPairsReverse8(d, vc);
+  vd = st.SortPairsReverse8(d, vd);
+  ve = st.SortPairsReverse8(d, ve);
+  vf = st.SortPairsReverse8(d, vf);
+
+  v0 = st.SortPairsDistance2(d, v0);
+  v1 = st.SortPairsDistance2(d, v1);
+  v2 = st.SortPairsDistance2(d, v2);
+  v3 = st.SortPairsDistance2(d, v3);
+  v4 = st.SortPairsDistance2(d, v4);
+  v5 = st.SortPairsDistance2(d, v5);
+  v6 = st.SortPairsDistance2(d, v6);
+  v7 = st.SortPairsDistance2(d, v7);
+  v8 = st.SortPairsDistance2(d, v8);
+  v9 = st.SortPairsDistance2(d, v9);
+  va = st.SortPairsDistance2(d, va);
+  vb = st.SortPairsDistance2(d, vb);
+  vc = st.SortPairsDistance2(d, vc);
+  vd = st.SortPairsDistance2(d, vd);
+  ve = st.SortPairsDistance2(d, ve);
+  vf = st.SortPairsDistance2(d, vf);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+  v8 = st.SortPairsDistance1(d, v8);
+  v9 = st.SortPairsDistance1(d, v9);
+  va = st.SortPairsDistance1(d, va);
+  vb = st.SortPairsDistance1(d, vb);
+  vc = st.SortPairsDistance1(d, vc);
+  vd = st.SortPairsDistance1(d, vd);
+  ve = st.SortPairsDistance1(d, ve);
+  vf = st.SortPairsDistance1(d, vf);
+}
+
+// Unused on MSVC, see below
+#if !HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD
+
+template <size_t kKeysPerVector, class D, class Traits, class V = Vec<D>,
+          HWY_IF_LANES_GT(kKeysPerVector, 8)>
+HWY_INLINE void Merge16x16(D d, Traits st, V& v0, V& v1, V& v2, V& v3, V& v4,
+                           V& v5, V& v6, V& v7, V& v8, V& v9, V& va, V& vb,
+                           V& vc, V& vd, V& ve, V& vf) {
+  vf = st.ReverseKeys16(d, vf);
+  ve = st.ReverseKeys16(d, ve);
+  vd = st.ReverseKeys16(d, vd);
+  vc = st.ReverseKeys16(d, vc);
+  vb = st.ReverseKeys16(d, vb);
+  va = st.ReverseKeys16(d, va);
+  v9 = st.ReverseKeys16(d, v9);
+  v8 = st.ReverseKeys16(d, v8);
+  st.Sort2(d, v0, vf);
+  st.Sort2(d, v1, ve);
+  st.Sort2(d, v2, vd);
+  st.Sort2(d, v3, vc);
+  st.Sort2(d, v4, vb);
+  st.Sort2(d, v5, va);
+  st.Sort2(d, v6, v9);
+  st.Sort2(d, v7, v8);
+
+  v7 = st.ReverseKeys16(d, v7);
+  v6 = st.ReverseKeys16(d, v6);
+  v5 = st.ReverseKeys16(d, v5);
+  v4 = st.ReverseKeys16(d, v4);
+  vf = st.ReverseKeys16(d, vf);
+  ve = st.ReverseKeys16(d, ve);
+  vd = st.ReverseKeys16(d, vd);
+  vc = st.ReverseKeys16(d, vc);
+  st.Sort2(d, v0, v7);
+  st.Sort2(d, v1, v6);
+  st.Sort2(d, v2, v5);
+  st.Sort2(d, v3, v4);
+  st.Sort2(d, v8, vf);
+  st.Sort2(d, v9, ve);
+  st.Sort2(d, va, vd);
+  st.Sort2(d, vb, vc);
+
+  v3 = st.ReverseKeys16(d, v3);
+  v2 = st.ReverseKeys16(d, v2);
+  v7 = st.ReverseKeys16(d, v7);
+  v6 = st.ReverseKeys16(d, v6);
+  vb = st.ReverseKeys16(d, vb);
+  va = st.ReverseKeys16(d, va);
+  vf = st.ReverseKeys16(d, vf);
+  ve = st.ReverseKeys16(d, ve);
+  st.Sort2(d, v0, v3);
+  st.Sort2(d, v1, v2);
+  st.Sort2(d, v4, v7);
+  st.Sort2(d, v5, v6);
+  st.Sort2(d, v8, vb);
+  st.Sort2(d, v9, va);
+  st.Sort2(d, vc, vf);
+  st.Sort2(d, vd, ve);
+
+  v1 = st.ReverseKeys16(d, v1);
+  v3 = st.ReverseKeys16(d, v3);
+  v5 = st.ReverseKeys16(d, v5);
+  v7 = st.ReverseKeys16(d, v7);
+  v9 = st.ReverseKeys16(d, v9);
+  vb = st.ReverseKeys16(d, vb);
+  vd = st.ReverseKeys16(d, vd);
+  vf = st.ReverseKeys16(d, vf);
+  st.Sort2(d, v0, v1);
+  st.Sort2(d, v2, v3);
+  st.Sort2(d, v4, v5);
+  st.Sort2(d, v6, v7);
+  st.Sort2(d, v8, v9);
+  st.Sort2(d, va, vb);
+  st.Sort2(d, vc, vd);
+  st.Sort2(d, ve, vf);
+
+  v0 = st.SortPairsReverse16(d, v0);
+  v1 = st.SortPairsReverse16(d, v1);
+  v2 = st.SortPairsReverse16(d, v2);
+  v3 = st.SortPairsReverse16(d, v3);
+  v4 = st.SortPairsReverse16(d, v4);
+  v5 = st.SortPairsReverse16(d, v5);
+  v6 = st.SortPairsReverse16(d, v6);
+  v7 = st.SortPairsReverse16(d, v7);
+  v8 = st.SortPairsReverse16(d, v8);
+  v9 = st.SortPairsReverse16(d, v9);
+  va = st.SortPairsReverse16(d, va);
+  vb = st.SortPairsReverse16(d, vb);
+  vc = st.SortPairsReverse16(d, vc);
+  vd = st.SortPairsReverse16(d, vd);
+  ve = st.SortPairsReverse16(d, ve);
+  vf = st.SortPairsReverse16(d, vf);
+
+  v0 = st.SortPairsDistance4(d, v0);
+  v1 = st.SortPairsDistance4(d, v1);
+  v2 = st.SortPairsDistance4(d, v2);
+  v3 = st.SortPairsDistance4(d, v3);
+  v4 = st.SortPairsDistance4(d, v4);
+  v5 = st.SortPairsDistance4(d, v5);
+  v6 = st.SortPairsDistance4(d, v6);
+  v7 = st.SortPairsDistance4(d, v7);
+  v8 = st.SortPairsDistance4(d, v8);
+  v9 = st.SortPairsDistance4(d, v9);
+  va = st.SortPairsDistance4(d, va);
+  vb = st.SortPairsDistance4(d, vb);
+  vc = st.SortPairsDistance4(d, vc);
+  vd = st.SortPairsDistance4(d, vd);
+  ve = st.SortPairsDistance4(d, ve);
+  vf = st.SortPairsDistance4(d, vf);
+
+  v0 = st.SortPairsDistance2(d, v0);
+  v1 = st.SortPairsDistance2(d, v1);
+  v2 = st.SortPairsDistance2(d, v2);
+  v3 = st.SortPairsDistance2(d, v3);
+  v4 = st.SortPairsDistance2(d, v4);
+  v5 = st.SortPairsDistance2(d, v5);
+  v6 = st.SortPairsDistance2(d, v6);
+  v7 = st.SortPairsDistance2(d, v7);
+  v8 = st.SortPairsDistance2(d, v8);
+  v9 = st.SortPairsDistance2(d, v9);
+  va = st.SortPairsDistance2(d, va);
+  vb = st.SortPairsDistance2(d, vb);
+  vc = st.SortPairsDistance2(d, vc);
+  vd = st.SortPairsDistance2(d, vd);
+  ve = st.SortPairsDistance2(d, ve);
+  vf = st.SortPairsDistance2(d, vf);
+
+  v0 = st.SortPairsDistance1(d, v0);
+  v1 = st.SortPairsDistance1(d, v1);
+  v2 = st.SortPairsDistance1(d, v2);
+  v3 = st.SortPairsDistance1(d, v3);
+  v4 = st.SortPairsDistance1(d, v4);
+  v5 = st.SortPairsDistance1(d, v5);
+  v6 = st.SortPairsDistance1(d, v6);
+  v7 = st.SortPairsDistance1(d, v7);
+  v8 = st.SortPairsDistance1(d, v8);
+  v9 = st.SortPairsDistance1(d, v9);
+  va = st.SortPairsDistance1(d, va);
+  vb = st.SortPairsDistance1(d, vb);
+  vc = st.SortPairsDistance1(d, vc);
+  vd = st.SortPairsDistance1(d, vd);
+  ve = st.SortPairsDistance1(d, ve);
+  vf = st.SortPairsDistance1(d, vf);
+}
+
+#endif  // !HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD
+
+// Reshapes `buf` into a matrix, sorts columns independently, and then merges
+// into a sorted 1D array without transposing.
+//
+// DEPRECATED, use BaseCase() instead.
+template <class Traits, class V>
+HWY_INLINE void SortingNetwork(Traits st, size_t cols, V& v0, V& v1, V& v2,
+                               V& v3, V& v4, V& v5, V& v6, V& v7, V& v8, V& v9,
+                               V& va, V& vb, V& vc, V& vd, V& ve, V& vf) {
+  // traits*-inl assume 'full' vectors (but still capped to kMaxCols).
+  const CappedTag<typename Traits::LaneType, Constants::kMaxCols> d;
+
+  HWY_DASSERT(cols <= Constants::kMaxCols);
+
+  // The network width depends on the number of keys, not lanes.
+  constexpr size_t kLanesPerKey = st.LanesPerKey();
+  const size_t keys = cols / kLanesPerKey;
+  constexpr size_t kMaxKeys = MaxLanes(d) / kLanesPerKey;
+
+  Sort16(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf);
+
+  // Checking MaxLanes avoids generating HWY_ASSERT code for the unreachable
+  // code paths: if MaxLanes < 2, then keys <= cols < 2.
+  if (HWY_LIKELY(keys >= 2 && kMaxKeys >= 2)) {
+    Merge16x2<kMaxKeys>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                        vc, vd, ve, vf);
+
+    if (HWY_LIKELY(keys >= 4 && kMaxKeys >= 4)) {
+      Merge16x4<kMaxKeys>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                          vc, vd, ve, vf);
+
+      if (HWY_LIKELY(keys >= 8 && kMaxKeys >= 8)) {
+        Merge16x8<kMaxKeys>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va,
+                            vb, vc, vd, ve, vf);
+
+        // Avoids build timeout. Must match #if condition in kMaxCols.
+#if !HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD
+        if (HWY_LIKELY(keys >= 16 && kMaxKeys >= 16)) {
+          Merge16x16<kMaxKeys>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9,
+                               va, vb, vc, vd, ve, vf);
+
+          static_assert(Constants::kMaxCols <= 16, "Add more branches");
+        }
+#endif
+      }
+    }
+  }
+}
+
+// As above, but loads from/stores to `buf`. This ensures full vectors are
+// aligned, and enables loads/stores without bounds checks.
+//
+// DEPRECATED, use BaseCase() instead.
+template <class Traits, typename T>
+HWY_NOINLINE void SortingNetwork(Traits st, T* HWY_RESTRICT buf, size_t cols) {
+  // traits*-inl assume 'full' vectors (but still capped to kMaxCols).
+  // However, for smaller arrays and sub-maximal `cols` we have overlapping
+  // loads where only the lowest `cols` are valid, and we skip Merge16 etc.
+  const CappedTag<T, Constants::kMaxCols> d;
+  using V = decltype(Zero(d));
+
+  HWY_DASSERT(cols <= Constants::kMaxCols);
+
+  // These are aligned iff cols == Lanes(d). We prefer unaligned/non-constexpr
+  // offsets to duplicating this code for every value of cols.
+  static_assert(Constants::kMaxRows == 16, "Update loads/stores/args");
+  V v0 = LoadU(d, buf + 0x0 * cols);
+  V v1 = LoadU(d, buf + 0x1 * cols);
+  V v2 = LoadU(d, buf + 0x2 * cols);
+  V v3 = LoadU(d, buf + 0x3 * cols);
+  V v4 = LoadU(d, buf + 0x4 * cols);
+  V v5 = LoadU(d, buf + 0x5 * cols);
+  V v6 = LoadU(d, buf + 0x6 * cols);
+  V v7 = LoadU(d, buf + 0x7 * cols);
+  V v8 = LoadU(d, buf + 0x8 * cols);
+  V v9 = LoadU(d, buf + 0x9 * cols);
+  V va = LoadU(d, buf + 0xa * cols);
+  V vb = LoadU(d, buf + 0xb * cols);
+  V vc = LoadU(d, buf + 0xc * cols);
+  V vd = LoadU(d, buf + 0xd * cols);
+  V ve = LoadU(d, buf + 0xe * cols);
+  V vf = LoadU(d, buf + 0xf * cols);
+
+  SortingNetwork(st, cols, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc,
+                 vd, ve, vf);
+
+  StoreU(v0, d, buf + 0x0 * cols);
+  StoreU(v1, d, buf + 0x1 * cols);
+  StoreU(v2, d, buf + 0x2 * cols);
+  StoreU(v3, d, buf + 0x3 * cols);
+  StoreU(v4, d, buf + 0x4 * cols);
+  StoreU(v5, d, buf + 0x5 * cols);
+  StoreU(v6, d, buf + 0x6 * cols);
+  StoreU(v7, d, buf + 0x7 * cols);
+  StoreU(v8, d, buf + 0x8 * cols);
+  StoreU(v9, d, buf + 0x9 * cols);
+  StoreU(va, d, buf + 0xa * cols);
+  StoreU(vb, d, buf + 0xb * cols);
+  StoreU(vc, d, buf + 0xc * cols);
+  StoreU(vd, d, buf + 0xd * cols);
+  StoreU(ve, d, buf + 0xe * cols);
+  StoreU(vf, d, buf + 0xf * cols);
+}
+
+#else
+template <class Base>
+struct SharedTraits : public Base {};
+#endif  // VQSORT_ENABLED
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_SORTING_NETWORKS_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits-inl.h
new file mode 100644
index 0000000000..186d644aa9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits-inl.h
@@ -0,0 +1,665 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
+#endif
+
+#include "hwy/contrib/sort/order.h"       // SortDescending
+#include "hwy/contrib/sort/shared-inl.h"  // SortConstants
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+// Base class of both KeyLane (with or without VQSORT_ENABLED)
+template <typename T>
+struct KeyLaneBase {
+  static constexpr bool Is128() { return false; }
+  constexpr size_t LanesPerKey() const { return 1; }
+
+  // What type bench_sort should allocate for generating inputs.
+  using LaneType = T;
+  // What type to pass to VQSort.
+  using KeyType = T;
+
+  const char* KeyString() const {
+    return IsSame<T, float16_t>()  ? "f16"
+           : IsSame<T, float>()    ? "f32"
+           : IsSame<T, double>()   ? "f64"
+           : IsSame<T, int16_t>()  ? "i16"
+           : IsSame<T, int32_t>()  ? "i32"
+           : IsSame<T, int64_t>()  ? "i64"
+           : IsSame<T, uint16_t>() ? "u32"
+           : IsSame<T, uint32_t>() ? "u32"
+           : IsSame<T, uint64_t>() ? "u64"
+                                   : "?";
+  }
+};
+
+// Wrapper functions so we can specialize for floats - infinity trumps
+// HighestValue (the normal value with the largest magnitude). Must be outside
+// Order* classes to enable SFINAE. LargestSortValue is used even if
+// !VQSORT_ENABLED.
+
+template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
+Vec<D> LargestSortValue(D d) {
+  return Inf(d);
+}
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+Vec<D> LargestSortValue(D d) {
+  return Set(d, hwy::HighestValue<TFromD<D>>());
+}
+
+template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
+Vec<D> SmallestSortValue(D d) {
+  return Neg(Inf(d));
+}
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+Vec<D> SmallestSortValue(D d) {
+  return Set(d, hwy::LowestValue<TFromD<D>>());
+}
+
+// Returns the next distinct larger value unless already +inf.
+template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
+Vec<D> LargerSortValue(D d, Vec<D> v) {
+  HWY_DASSERT(AllFalse(d, IsNaN(v)));  // we replaced all NaN with LastValue.
+  using T = TFromD<decltype(d)>;
+  const RebindToUnsigned<D> du;
+  using VU = Vec<decltype(du)>;
+  using TU = TFromD<decltype(du)>;
+
+  const VU vu = BitCast(du, Abs(v));
+
+  // The direction depends on the original sign. Integer comparison is cheaper
+  // than float comparison and treats -0 as 0 (so we return +epsilon).
+  const Mask<decltype(du)> was_pos = Le(BitCast(du, v), SignBit(du));
+  // If positive, add 1, else -1.
+  const VU add = IfThenElse(was_pos, Set(du, 1), Set(du, ~TU{0}));
+  // Prev/next integer is the prev/next value, even if mantissa under/overflows.
+  v = BitCast(d, Add(vu, add));
+  // But we may have overflowed into inf or NaN; replace with inf if positive,
+  // but the largest (later negated!) value if the input was -inf.
+  const Mask<D> was_pos_f = RebindMask(d, was_pos);
+  v = IfThenElse(IsFinite(v), v,
+                 IfThenElse(was_pos_f, Inf(d), Set(d, HighestValue<T>())));
+  // Restore the original sign - not via CopySignToAbs because we used a mask.
+  return IfThenElse(was_pos_f, v, Neg(v));
+}
+
+// Returns the next distinct smaller value unless already -inf.
+template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
+Vec<D> SmallerSortValue(D d, Vec<D> v) {
+  HWY_DASSERT(AllFalse(d, IsNaN(v)));  // we replaced all NaN with LastValue.
+  using T = TFromD<decltype(d)>;
+  const RebindToUnsigned<D> du;
+  using VU = Vec<decltype(du)>;
+  using TU = TFromD<decltype(du)>;
+
+  const VU vu = BitCast(du, Abs(v));
+
+  // The direction depends on the original sign. Float comparison because we
+  // want to treat 0 as -0 so we return -epsilon.
+  const Mask<D> was_pos = Gt(v, Zero(d));
+  // If positive, add -1, else 1.
+  const VU add =
+      IfThenElse(RebindMask(du, was_pos), Set(du, ~TU{0}), Set(du, 1));
+  // Prev/next integer is the prev/next value, even if mantissa under/overflows.
+  v = BitCast(d, Add(vu, add));
+  // But we may have overflowed into inf or NaN; replace with +inf (which will
+  // later be negated) if negative, but the largest value if the input was +inf.
+  v = IfThenElse(IsFinite(v), v,
+                 IfThenElse(was_pos, Set(d, HighestValue<T>()), Inf(d)));
+  // Restore the original sign - not via CopySignToAbs because we used a mask.
+  return IfThenElse(was_pos, v, Neg(v));
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+Vec<D> LargerSortValue(D d, Vec<D> v) {
+  return Add(v, Set(d, TFromD<D>{1}));
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+Vec<D> SmallerSortValue(D d, Vec<D> v) {
+  return Sub(v, Set(d, TFromD<D>{1}));
+}
+
+#if VQSORT_ENABLED || HWY_IDE
+
+// Highway does not provide a lane type for 128-bit keys, so we use uint64_t
+// along with an abstraction layer for single-lane vs. lane-pair, which is
+// independent of the order.
+template <typename T>
+struct KeyLane : public KeyLaneBase<T> {
+  // False indicates the entire key (i.e. lane) should be compared. KV stands
+  // for key-value.
+  static constexpr bool IsKV() { return false; }
+
+  // For HeapSort
+  HWY_INLINE void Swap(T* a, T* b) const {
+    const T temp = *a;
+    *a = *b;
+    *b = temp;
+  }
+
+  template <class V, class M>
+  HWY_INLINE V CompressKeys(V keys, M mask) const {
+    return CompressNot(keys, mask);
+  }
+
+  // Broadcasts one key into a vector
+  template <class D>
+  HWY_INLINE Vec<D> SetKey(D d, const T* key) const {
+    return Set(d, *key);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
+    return Eq(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
+    return Ne(a, b);
+  }
+
+  // For keys=lanes, any difference counts.
+  template <class D>
+  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
+    // Must avoid floating-point comparisons (for -0)
+    const RebindToUnsigned<D> du;
+    return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
+  }
+
+  HWY_INLINE bool Equal1(const T* a, const T* b) const { return *a == *b; }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
+    return Reverse(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys2(D d, Vec<D> v) const {
+    return Reverse2(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys4(D d, Vec<D> v) const {
+    return Reverse4(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys8(D d, Vec<D> v) const {
+    return Reverse8(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys16(D d, Vec<D> v) const {
+    static_assert(SortConstants::kMaxCols <= 16, "Assumes u32x16 = 512 bit");
+    return ReverseKeys(d, v);
+  }
+
+  template <class V>
+  HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
+    return OddEven(odd, even);
+  }
+
+  template <class D, HWY_IF_T_SIZE_D(D, 2)>
+  HWY_INLINE Vec<D> SwapAdjacentPairs(D d, const Vec<D> v) const {
+    const Repartition<uint32_t, D> du32;
+    return BitCast(d, Shuffle2301(BitCast(du32, v)));
+  }
+  template <class D, HWY_IF_T_SIZE_D(D, 4)>
+  HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
+    return Shuffle1032(v);
+  }
+  template <class D, HWY_IF_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
+    return SwapAdjacentBlocks(v);
+  }
+
+  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
+#if HWY_HAVE_FLOAT64  // in case D is float32
+    const RepartitionToWide<D> dw;
+#else
+    const RepartitionToWide<RebindToUnsigned<D>> dw;
+#endif
+    return BitCast(d, SwapAdjacentPairs(dw, BitCast(dw, v)));
+  }
+  template <class D, HWY_IF_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
+    // Assumes max vector size = 512
+    return ConcatLowerUpper(d, v, v);
+  }
+
+  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
+                                 const Vec<D> even) const {
+#if HWY_HAVE_FLOAT64  // in case D is float32
+    const RepartitionToWide<D> dw;
+#else
+    const RepartitionToWide<RebindToUnsigned<D>> dw;
+#endif
+    return BitCast(d, OddEven(BitCast(dw, odd), BitCast(dw, even)));
+  }
+  template <class D, HWY_IF_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> OddEvenPairs(D /* tag */, Vec<D> odd, Vec<D> even) const {
+    return OddEvenBlocks(odd, even);
+  }
+
+  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
+#if HWY_HAVE_FLOAT64  // in case D is float32
+    const RepartitionToWide<D> dw;
+#else
+    const RepartitionToWide<RebindToUnsigned<D>> dw;
+#endif
+    return BitCast(d, OddEvenPairs(dw, BitCast(dw, odd), BitCast(dw, even)));
+  }
+  template <class D, HWY_IF_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
+    return ConcatUpperLower(d, odd, even);
+  }
+};
+
+// Anything order-related depends on the key traits *and* the order (see
+// FirstOfLanes). We cannot implement just one Compare function because Lt128
+// only compiles if the lane type is u64. Thus we need either overloaded
+// functions with a tag type, class specializations, or separate classes.
+// We avoid overloaded functions because we want all functions to be callable
+// from a SortTraits without per-function wrappers. Specializing would work, but
+// we are anyway going to specialize at a higher level.
+template <typename T>
+struct OrderAscending : public KeyLane<T> {
+  using Order = SortAscending;
+  using OrderForSortingNetwork = OrderAscending<T>;
+
+  HWY_INLINE bool Compare1(const T* a, const T* b) { return *a < *b; }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
+    return Lt(a, b);
+  }
+
+  // Two halves of Sort2, used in ScanMinMax.
+  template <class D>
+  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Min(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Max(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
+                                 T* HWY_RESTRICT /* buf */) const {
+    return MinOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
+                                T* HWY_RESTRICT /* buf */) const {
+    return MaxOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return SmallestSortValue(d);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return LargestSortValue(d);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    return SmallerSortValue(d, v);
+  }
+};
+
+template <typename T>
+struct OrderDescending : public KeyLane<T> {
+  using Order = SortDescending;
+  using OrderForSortingNetwork = OrderDescending<T>;
+
+  HWY_INLINE bool Compare1(const T* a, const T* b) { return *b < *a; }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
+    return Lt(b, a);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Max(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Min(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
+                                 T* HWY_RESTRICT /* buf */) const {
+    return MaxOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
+                                T* HWY_RESTRICT /* buf */) const {
+    return MinOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return LargestSortValue(d);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return SmallestSortValue(d);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    return LargerSortValue(d, v);
+  }
+};
+
+struct KeyValue64 : public KeyLane<uint64_t> {
+  // True indicates only part of the key (i.e. lane) should be compared. KV
+  // stands for key-value.
+  static constexpr bool IsKV() { return true; }
+
+  template <class D>
+  HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
+    return Eq(ShiftRight<32>(a), ShiftRight<32>(b));
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
+    return Ne(ShiftRight<32>(a), ShiftRight<32>(b));
+  }
+
+  HWY_INLINE bool Equal1(const uint64_t* a, const uint64_t* b) const {
+    return (*a >> 32) == (*b >> 32);
+  }
+
+  // Only count differences in the actual key, not the value.
+  template <class D>
+  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
+    // Must avoid floating-point comparisons (for -0)
+    const RebindToUnsigned<D> du;
+    const Vec<decltype(du)> zero = Zero(du);
+    const Vec<decltype(du)> keys = ShiftRight<32>(diff);  // clear values
+    return AllTrue(du, Eq(BitCast(du, keys), zero));
+  }
+};
+
+struct OrderAscendingKV64 : public KeyValue64 {
+  using Order = SortAscending;
+  using OrderForSortingNetwork = OrderAscending<LaneType>;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return (*a >> 32) < (*b >> 32);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
+    return Lt(ShiftRight<32>(a), ShiftRight<32>(b));
+  }
+
+  // Not required to be stable (preserving the order of equivalent keys), so
+  // we can include the value in the comparison.
+  template <class D>
+  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Min(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Max(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
+                                 uint64_t* HWY_RESTRICT /* buf */) const {
+    return MinOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
+                                uint64_t* HWY_RESTRICT /* buf */) const {
+    return MaxOfLanes(d, v);
+  }
+
+  // Same as for regular lanes.
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D>>());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D>>());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    return Sub(v, Set(d, uint64_t{1} << 32));
+  }
+};
+
+struct OrderDescendingKV64 : public KeyValue64 {
+  using Order = SortDescending;
+  using OrderForSortingNetwork = OrderDescending<LaneType>;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return (*b >> 32) < (*a >> 32);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
+    return Lt(ShiftRight<32>(b), ShiftRight<32>(a));
+  }
+
+  // Not required to be stable (preserving the order of equivalent keys), so
+  // we can include the value in the comparison.
+  template <class D>
+  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Max(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
+    return Min(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
+                                 uint64_t* HWY_RESTRICT /* buf */) const {
+    return MaxOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
+                                uint64_t* HWY_RESTRICT /* buf */) const {
+    return MinOfLanes(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D>>());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D>>());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    return Add(v, Set(d, uint64_t{1} << 32));
+  }
+};
+
+// Shared code that depends on Order.
+template <class Base>
+struct TraitsLane : public Base {
+  using TraitsForSortingNetwork =
+      TraitsLane<typename Base::OrderForSortingNetwork>;
+
+  // For each lane i: replaces a[i] with the first and b[i] with the second
+  // according to Base.
+  // Corresponds to a conditional swap, which is one "node" of a sorting
+  // network. Min/Max are cheaper than compare + blend at least for integers.
+  template <class D>
+  HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
+    const Base* base = static_cast<const Base*>(this);
+
+    const Vec<D> a_copy = a;
+    // Prior to AVX3, there is no native 64-bit Min/Max, so they compile to 4
+    // instructions. We can reduce it to a compare + 2 IfThenElse.
+#if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
+    if (sizeof(TFromD<D>) == 8) {
+      const Mask<D> cmp = base->Compare(d, a, b);
+      a = IfThenElse(cmp, a, b);
+      b = IfThenElse(cmp, b, a_copy);
+      return;
+    }
+#endif
+    a = base->First(d, a, b);
+    b = base->Last(d, a_copy, b);
+  }
+
+  // Conditionally swaps even-numbered lanes with their odd-numbered neighbor.
+  template <class D, HWY_IF_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys2(d, v);
+    // Further to the above optimization, Sort2+OddEvenKeys compile to four
+    // instructions; we can save one by combining two blends.
+#if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
+    const Vec<D> cmp = VecFromMask(d, base->Compare(d, v, swapped));
+    return IfVecThenElse(DupOdd(cmp), swapped, v);
+#else
+    Sort2(d, v, swapped);
+    return base->OddEvenKeys(swapped, v);
+#endif
+  }
+
+  // (See above - we use Sort2 for non-64-bit types.)
+  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys2(d, v);
+    Sort2(d, v, swapped);
+    return base->OddEvenKeys(swapped, v);
+  }
+
+  // Swaps with the vector formed by reversing contiguous groups of 4 keys.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys4(d, v);
+    Sort2(d, v, swapped);
+    return base->OddEvenPairs(d, swapped, v);
+  }
+
+  // Conditionally swaps lane 0 with 4, 1 with 5 etc.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsDistance4(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->SwapAdjacentQuads(d, v);
+    // Only used in Merge16, so this will not be used on AVX2 (which only has 4
+    // u64 lanes), so skip the above optimization for 64-bit AVX2.
+    Sort2(d, v, swapped);
+    return base->OddEvenQuads(d, swapped, v);
+  }
+};
+
+#else
+
+template <typename T>
+struct OrderAscending : public KeyLaneBase<T> {
+  using Order = SortAscending;
+
+  HWY_INLINE bool Compare1(const T* a, const T* b) { return *a < *b; }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) {
+    return Lt(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return LargestSortValue(d);
+  }
+};
+
+template <typename T>
+struct OrderDescending : public KeyLaneBase<T> {
+  using Order = SortDescending;
+
+  HWY_INLINE bool Compare1(const T* a, const T* b) { return *b < *a; }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) {
+    return Lt(b, a);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return SmallestSortValue(d);
+  }
+};
+
+template <class Order>
+struct TraitsLane : public Order {
+  // For HeapSort
+  template <typename T>  // MSVC doesn't find typename Order::LaneType.
+  HWY_INLINE void Swap(T* a, T* b) const {
+    const T temp = *a;
+    *a = *b;
+    *b = temp;
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
+    return Set(d, *key);
+  }
+};
+
+#endif  // VQSORT_ENABLED
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits128-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits128-inl.h
new file mode 100644
index 0000000000..0229d5c4e1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/traits128-inl.h
@@ -0,0 +1,533 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
+#endif
+
+#include "hwy/contrib/sort/order.h"  // SortDescending
+#include "hwy/contrib/sort/shared-inl.h"
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+#if VQSORT_ENABLED || HWY_IDE
+
+// Highway does not provide a lane type for 128-bit keys, so we use uint64_t
+// along with an abstraction layer for single-lane vs. lane-pair, which is
+// independent of the order.
+struct KeyAny128 {
+  static constexpr bool Is128() { return true; }
+  constexpr size_t LanesPerKey() const { return 2; }
+
+  // What type bench_sort should allocate for generating inputs.
+  using LaneType = uint64_t;
+  // KeyType and KeyString are defined by derived classes.
+
+  HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
+    const FixedTag<LaneType, 2> d;
+    const auto temp = LoadU(d, a);
+    StoreU(LoadU(d, b), d, a);
+    StoreU(temp, d, b);
+  }
+
+  template <class V, class M>
+  HWY_INLINE V CompressKeys(V keys, M mask) const {
+    return CompressBlocksNot(keys, mask);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
+    return LoadDup128(d, key);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
+    return ReverseBlocks(d, v);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys2(D /* tag */, const Vec<D> v) const {
+    return SwapAdjacentBlocks(v);
+  }
+
+  // Only called for 4 keys because we do not support >512-bit vectors.
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys4(D d, const Vec<D> v) const {
+    HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
+    return ReverseKeys(d, v);
+  }
+
+  // Only called for 4 keys because we do not support >512-bit vectors.
+  template <class D>
+  HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
+                                 const Vec<D> even) const {
+    HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
+    return ConcatUpperLower(d, odd, even);
+  }
+
+  template <class V>
+  HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
+    return OddEvenBlocks(odd, even);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>) const {
+    HWY_ASSERT(0);  // not supported: would require 1024-bit vectors
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>) const {
+    HWY_ASSERT(0);  // not supported: would require 2048-bit vectors
+  }
+
+  // This is only called for 8/16 col networks (not supported).
+  template <class D>
+  HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>) const {
+    HWY_ASSERT(0);
+  }
+
+  // This is only called for 16 col networks (not supported).
+  template <class D>
+  HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>) const {
+    HWY_ASSERT(0);
+  }
+
+  // This is only called for 8 col networks (not supported).
+  template <class D>
+  HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>) const {
+    HWY_ASSERT(0);
+  }
+};
+
+// Base class shared between OrderAscending128, OrderDescending128.
+struct Key128 : public KeyAny128 {
+  // False indicates the entire key should be compared. KV means key-value.
+  static constexpr bool IsKV() { return false; }
+
+  // What type to pass to VQSort.
+  using KeyType = hwy::uint128_t;
+
+  const char* KeyString() const { return "U128"; }
+
+  template <class D>
+  HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
+    return Eq128(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
+    return Ne128(d, a, b);
+  }
+
+  // For keys=entire 128 bits, any difference counts.
+  template <class D>
+  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
+    // Must avoid floating-point comparisons (for -0)
+    const RebindToUnsigned<D> du;
+    return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
+  }
+
+  HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
+    return a[0] == b[0] && a[1] == b[1];
+  }
+
+  // Returns vector with only the top half of each block valid. This allows
+  // fusing the "replicate upper to lower half" step with a subsequent permute.
+  template <class Order, class D>
+  HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
+    const Mask<D> eqHL = Eq(a, b);
+    const Vec<D> ltHL = VecFromMask(d, Order().CompareLanes(a, b));
+#if HWY_TARGET <= HWY_AVX2  // slightly faster
+    const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
+    return OrAnd(ltHL, VecFromMask(d, eqHL), ltLX);
+#else
+    return IfThenElse(eqHL, DupEven(ltHL), ltHL);
+#endif
+  }
+};
+
+// Anything order-related depends on the key traits *and* the order (see
+// FirstOfLanes). We cannot implement just one Compare function because Lt128
+// only compiles if the lane type is u64. Thus we need either overloaded
+// functions with a tag type, class specializations, or separate classes.
+// We avoid overloaded functions because we want all functions to be callable
+// from a SortTraits without per-function wrappers. Specializing would work, but
+// we are anyway going to specialize at a higher level.
+struct OrderAscending128 : public Key128 {
+  using Order = SortAscending;
+  using OrderForSortingNetwork = OrderAscending128;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
+    return Lt128(d, a, b);
+  }
+
+  // Used by CompareTop
+  template <class V>
+  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
+    return Lt(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
+    return Min128(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
+    return Max128(d, a, b);
+  }
+
+  // Same as for regular lanes because 128-bit keys are u64.
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    const Vec<D> k0 = Zero(d);
+    const Vec<D> k1 = OddEven(k0, Set(d, uint64_t{1}));
+    const Mask<D> borrow = Eq(v, k0);  // don't-care, lo == 0
+    // lo == 0? 1 : 0, 0
+    const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(borrow, k1));
+    return Sub(Sub(v, k1), adjust);
+  }
+};
+
+struct OrderDescending128 : public Key128 {
+  using Order = SortDescending;
+  using OrderForSortingNetwork = OrderDescending128;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
+    return Lt128(d, b, a);
+  }
+
+  // Used by CompareTop
+  template <class V>
+  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
+    return Lt(b, a);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
+    return Max128(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
+    return Min128(d, a, b);
+  }
+
+  // Same as for regular lanes because 128-bit keys are u64.
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    const Vec<D> k1 = OddEven(Zero(d), Set(d, uint64_t{1}));
+    const Vec<D> added = Add(v, k1);
+    const Mask<D> overflowed = Lt(added, v);  // false, overflowed
+    // overflowed? 1 : 0, 0
+    const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(overflowed, k1));
+    return Add(added, adjust);
+  }
+};
+
+// Base class shared between OrderAscendingKV128, OrderDescendingKV128.
+struct KeyValue128 : public KeyAny128 {
+  // True indicates only part of the key (the more significant lane) should be
+  // compared. KV stands for key-value.
+  static constexpr bool IsKV() { return true; }
+
+  // What type to pass to VQSort.
+  using KeyType = K64V64;
+
+  const char* KeyString() const { return "KV128"; }
+
+  template <class D>
+  HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
+    return Eq128Upper(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
+    return Ne128Upper(d, a, b);
+  }
+
+  // Only count differences in the actual key, not the value.
+  template <class D>
+  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
+    // Must avoid floating-point comparisons (for -0)
+    const RebindToUnsigned<D> du;
+    const Vec<decltype(du)> zero = Zero(du);
+    const Vec<decltype(du)> keys = OddEven(diff, zero);  // clear values
+    return AllTrue(du, Eq(BitCast(du, keys), zero));
+  }
+
+  HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
+    return a[1] == b[1];
+  }
+
+  // Returns vector with only the top half of each block valid. This allows
+  // fusing the "replicate upper to lower half" step with a subsequent permute.
+  template <class Order, class D>
+  HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
+    // Only the upper lane of each block is a key, and only that lane is
+    // required to be valid, so comparing all lanes is sufficient.
+    return VecFromMask(d, Order().CompareLanes(a, b));
+  }
+};
+
+struct OrderAscendingKV128 : public KeyValue128 {
+  using Order = SortAscending;
+  using OrderForSortingNetwork = OrderAscending128;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return a[1] < b[1];
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
+    return Lt128Upper(d, a, b);
+  }
+
+  // Used by CompareTop
+  template <class V>
+  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
+    return Lt(a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
+    return Min128Upper(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
+    return Max128Upper(d, a, b);
+  }
+
+  // Same as for regular lanes because 128-bit keys are u64.
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
+    return Sub(v, k1);
+  }
+};
+
+struct OrderDescendingKV128 : public KeyValue128 {
+  using Order = SortDescending;
+  using OrderForSortingNetwork = OrderDescending128;
+
+  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) {
+    return b[1] < a[1];
+  }
+
+  template <class D>
+  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
+    return Lt128Upper(d, b, a);
+  }
+
+  // Used by CompareTop
+  template <class V>
+  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
+    return Lt(b, a);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
+    return Max128Upper(d, a, b);
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
+    return Min128Upper(d, a, b);
+  }
+
+  // Same as for regular lanes because 128-bit keys are u64.
+  template <class D>
+  HWY_INLINE Vec<D> FirstValue(D d) const {
+    return Set(d, hwy::HighestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastValue(D d) const {
+    return Set(d, hwy::LowestValue<TFromD<D> >());
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
+    const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
+    return Add(v, k1);
+  }
+};
+
+// We want to swap 2 u128, i.e. 4 u64 lanes, based on the 0 or FF..FF mask in
+// the most-significant of those lanes (the result of CompareTop), so
+// replicate it 4x. Only called for >= 256-bit vectors.
+
+#if HWY_TARGET <= HWY_AVX3
+template <class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V ReplicateTop4x(V v) {
+  return V{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+#if HWY_TARGET <= HWY_AVX2
+
+template <class V, HWY_IF_V_SIZE_V(V, 32)>
+HWY_INLINE V ReplicateTop4x(V v) {
+  return V{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
+}
+
+#else  // HWY_TARGET > HWY_AVX2
+
+template <class V>
+HWY_INLINE V ReplicateTop4x(V v) {
+#if HWY_TARGET == HWY_SVE_256
+  return svdup_lane_u64(v, 3);
+#else
+  HWY_ALIGN static constexpr uint64_t kIndices[8] = {3, 3, 3, 3, 7, 7, 7, 7};
+  const ScalableTag<uint64_t> d;
+  return TableLookupLanes(v, SetTableIndices(d, kIndices));
+#endif
+}
+
+#endif  // HWY_TARGET <= HWY_AVX2
+
+// Shared code that depends on Order.
+template <class Base>
+struct Traits128 : public Base {
+  using TraitsForSortingNetwork =
+      Traits128<typename Base::OrderForSortingNetwork>;
+
+  template <class D>
+  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
+                                 TFromD<D>* HWY_RESTRICT buf) const {
+    const Base* base = static_cast<const Base*>(this);
+    const size_t N = Lanes(d);
+    Store(v, d, buf);
+    v = base->SetKey(d, buf + 0);  // result must be broadcasted
+    for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
+      v = base->First(d, v, base->SetKey(d, buf + i));
+    }
+    return v;
+  }
+
+  template <class D>
+  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
+                                TFromD<D>* HWY_RESTRICT buf) const {
+    const Base* base = static_cast<const Base*>(this);
+    const size_t N = Lanes(d);
+    Store(v, d, buf);
+    v = base->SetKey(d, buf + 0);  // result must be broadcasted
+    for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
+      v = base->Last(d, v, base->SetKey(d, buf + i));
+    }
+    return v;
+  }
+
+  template <class D>
+  HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
+    const Base* base = static_cast<const Base*>(this);
+
+    const Vec<D> a_copy = a;
+    const auto lt = base->Compare(d, a, b);
+    a = IfThenElse(lt, a, b);
+    b = IfThenElse(lt, b, a_copy);
+  }
+
+  // Conditionally swaps even-numbered keys with their odd-numbered neighbor.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys2(d, v);
+    const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
+    return IfVecThenElse(ReplicateTop4x(cmpHx), swapped, v);
+  }
+
+  // Swaps with the vector formed by reversing contiguous groups of four 128-bit
+  // keys, which implies 512-bit vectors (we do not support more than that).
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
+    const Base* base = static_cast<const Base*>(this);
+    Vec<D> swapped = base->ReverseKeys4(d, v);
+
+    const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
+    // Similar to ReplicateTop4x, we want to gang together 2 comparison results
+    // (4 lanes). They are not contiguous, so use permute to replicate 4x.
+    HWY_ALIGN uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
+    const Vec<D> select = TableLookupLanes(cmpHx, SetTableIndices(d, kIndices));
+    return IfVecThenElse(select, swapped, v);
+  }
+
+  // Conditionally swaps lane 0 with 4, 1 with 5 etc.
+  template <class D>
+  HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>) const {
+    // Only used by Merge16, which would require 2048 bit vectors (unsupported).
+    HWY_ASSERT(0);
+  }
+};
+
+#endif  // VQSORT_ENABLED
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort-inl.h
new file mode 100644
index 0000000000..3f73fa1221
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort-inl.h
@@ -0,0 +1,1933 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Normal include guard for target-independent parts
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_INL_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_VQSORT_INL_H_
+
+#include <stdio.h>  // unconditional #include so we can use if(VQSORT_PRINT).
+#include <time.h>   // clock
+
+// IWYU pragma: begin_exports
+#include "hwy/base.h"
+#include "hwy/contrib/sort/order.h"  // SortAscending
+// IWYU pragma: end_exports
+
+#include "hwy/cache_control.h"  // Prefetch
+
+// If 1, VQSortStatic can be called without including vqsort.h, and we avoid
+// any DLLEXPORT. This simplifies integration into other build systems, but
+// decreases the security of random seeds.
+#ifndef VQSORT_ONLY_STATIC
+#define VQSORT_ONLY_STATIC 0
+#endif
+
+// Verbosity: 0 for none, 1 for brief per-sort, 2+ for more details.
+#ifndef VQSORT_PRINT
+#define VQSORT_PRINT 0
+#endif
+
+#if !VQSORT_ONLY_STATIC
+#include "hwy/contrib/sort/vqsort.h"  // Fill16BytesSecure
+#endif
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_VQSORT_INL_H_
+
+// Per-target
+#if defined(HIGHWAY_HWY_CONTRIB_SORT_VQSORT_TOGGLE) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_TOGGLE
+#undef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_TOGGLE
+#else
+#define HIGHWAY_HWY_CONTRIB_SORT_VQSORT_TOGGLE
+#endif
+
+#if VQSORT_PRINT
+#include "hwy/print-inl.h"
+#endif
+
+#include "hwy/contrib/algo/copy-inl.h"
+#include "hwy/contrib/sort/shared-inl.h"
+#include "hwy/contrib/sort/sorting_networks-inl.h"
+#include "hwy/contrib/sort/traits-inl.h"
+#include "hwy/contrib/sort/traits128-inl.h"
+// Placeholder for internal instrumentation. Do not remove.
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+using Constants = hwy::SortConstants;
+
+// Wrapper avoids #if in user code (interferes with code folding)
+template <class D>
+HWY_INLINE void MaybePrintVector(D d, const char* label, Vec<D> v,
+                                 size_t start = 0, size_t max_lanes = 16) {
+#if VQSORT_PRINT >= 2  // Print is only defined #if
+  Print(d, label, v, start, max_lanes);
+#else
+  (void)d;
+  (void)label;
+  (void)v;
+  (void)start;
+  (void)max_lanes;
+#endif
+}
+
+// ------------------------------ HeapSort
+
+template <class Traits, typename T>
+void SiftDown(Traits st, T* HWY_RESTRICT lanes, const size_t num_lanes,
+              size_t start) {
+  constexpr size_t N1 = st.LanesPerKey();
+  const FixedTag<T, N1> d;
+
+  while (start < num_lanes) {
+    const size_t left = 2 * start + N1;
+    const size_t right = 2 * start + 2 * N1;
+    if (left >= num_lanes) break;
+    size_t idx_larger = start;
+    const auto key_j = st.SetKey(d, lanes + start);
+    if (AllTrue(d, st.Compare(d, key_j, st.SetKey(d, lanes + left)))) {
+      idx_larger = left;
+    }
+    if (right < num_lanes &&
+        AllTrue(d, st.Compare(d, st.SetKey(d, lanes + idx_larger),
+                              st.SetKey(d, lanes + right)))) {
+      idx_larger = right;
+    }
+    if (idx_larger == start) break;
+    st.Swap(lanes + start, lanes + idx_larger);
+    start = idx_larger;
+  }
+}
+
+// Heapsort: O(1) space, O(N*logN) worst-case comparisons.
+// Based on LLVM sanitizer_common.h, licensed under Apache-2.0.
+template <class Traits, typename T>
+void HeapSort(Traits st, T* HWY_RESTRICT lanes, const size_t num_lanes) {
+  constexpr size_t N1 = st.LanesPerKey();
+
+  if (num_lanes < 2 * N1) return;
+
+  // Build heap.
+  for (size_t i = ((num_lanes - N1) / N1 / 2) * N1; i != (~N1 + 1); i -= N1) {
+    SiftDown(st, lanes, num_lanes, i);
+  }
+
+  for (size_t i = num_lanes - N1; i != 0; i -= N1) {
+    // Swap root with last
+    st.Swap(lanes + 0, lanes + i);
+
+    // Sift down the new root.
+    SiftDown(st, lanes, i, 0);
+  }
+}
+
+#if VQSORT_ENABLED || HWY_IDE
+
+// ------------------------------ BaseCase
+
+// Special cases where `num_lanes` is in the specified range (inclusive).
+template <class Traits, typename T>
+HWY_INLINE void Sort2To2(Traits st, T* HWY_RESTRICT keys, size_t num_lanes,
+                         T* HWY_RESTRICT /* buf */) {
+  constexpr size_t kLPK = st.LanesPerKey();
+  const size_t num_keys = num_lanes / kLPK;
+  HWY_DASSERT(num_keys == 2);
+  HWY_ASSUME(num_keys == 2);
+
+  // One key per vector, required to avoid reading past the end of `keys`.
+  const CappedTag<T, kLPK> d;
+  using V = Vec<decltype(d)>;
+
+  V v0 = LoadU(d, keys + 0x0 * kLPK);
+  V v1 = LoadU(d, keys + 0x1 * kLPK);
+
+  Sort2(d, st, v0, v1);
+
+  StoreU(v0, d, keys + 0x0 * kLPK);
+  StoreU(v1, d, keys + 0x1 * kLPK);
+}
+
+template <class Traits, typename T>
+HWY_INLINE void Sort3To4(Traits st, T* HWY_RESTRICT keys, size_t num_lanes,
+                         T* HWY_RESTRICT buf) {
+  constexpr size_t kLPK = st.LanesPerKey();
+  const size_t num_keys = num_lanes / kLPK;
+  HWY_DASSERT(3 <= num_keys && num_keys <= 4);
+  HWY_ASSUME(num_keys >= 3);
+  HWY_ASSUME(num_keys <= 4);  // reduces branches
+
+  // One key per vector, required to avoid reading past the end of `keys`.
+  const CappedTag<T, kLPK> d;
+  using V = Vec<decltype(d)>;
+
+  // If num_keys == 3, initialize padding for the last sorting network element
+  // so that it does not influence the other elements.
+  Store(st.LastValue(d), d, buf);
+
+  // Points to a valid key, or padding. This avoids special-casing
+  // HWY_MEM_OPS_MIGHT_FAULT because there is only a single key per vector.
+  T* in_out3 = num_keys == 3 ? buf : keys + 0x3 * kLPK;
+
+  V v0 = LoadU(d, keys + 0x0 * kLPK);
+  V v1 = LoadU(d, keys + 0x1 * kLPK);
+  V v2 = LoadU(d, keys + 0x2 * kLPK);
+  V v3 = LoadU(d, in_out3);
+
+  Sort4(d, st, v0, v1, v2, v3);
+
+  StoreU(v0, d, keys + 0x0 * kLPK);
+  StoreU(v1, d, keys + 0x1 * kLPK);
+  StoreU(v2, d, keys + 0x2 * kLPK);
+  StoreU(v3, d, in_out3);
+}
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+
+template <size_t kRows, size_t kLanesPerRow, class D, class Traits,
+          typename T = TFromD<D>>
+HWY_INLINE void CopyHalfToPaddedBuf(D d, Traits st, T* HWY_RESTRICT keys,
+                                    size_t num_lanes, T* HWY_RESTRICT buf) {
+  constexpr size_t kMinLanes = kRows / 2 * kLanesPerRow;
+  // Must cap for correctness: we will load up to the last valid lane, so
+  // Lanes(dmax) must not exceed `num_lanes` (known to be at least kMinLanes).
+  const CappedTag<T, kMinLanes> dmax;
+  const size_t Nmax = Lanes(dmax);
+  HWY_DASSERT(Nmax < num_lanes);
+  HWY_ASSUME(Nmax <= kMinLanes);
+
+  // Fill with padding - last in sort order, not copied to keys.
+  const Vec<decltype(dmax)> kPadding = st.LastValue(dmax);
+
+  // Rounding down allows aligned stores, which are typically faster.
+  size_t i = num_lanes & ~(Nmax - 1);
+  HWY_ASSUME(i != 0);  // because Nmax <= num_lanes; avoids branch
+  do {
+    Store(kPadding, dmax, buf + i);
+    i += Nmax;
+    // Initialize enough for the last vector even if Nmax > kLanesPerRow.
+  } while (i < (kRows - 1) * kLanesPerRow + Lanes(d));
+
+  // Ensure buf contains all we will read, and perhaps more before.
+  ptrdiff_t end = static_cast<ptrdiff_t>(num_lanes);
+  do {
+    end -= static_cast<ptrdiff_t>(Nmax);
+    StoreU(LoadU(dmax, keys + end), dmax, buf + end);
+  } while (end > static_cast<ptrdiff_t>(kRows / 2 * kLanesPerRow));
+}
+
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+
+template <size_t kKeysPerRow, class Traits, typename T>
+HWY_NOINLINE void Sort8Rows(Traits st, T* HWY_RESTRICT keys, size_t num_lanes,
+                            T* HWY_RESTRICT buf) {
+  // kKeysPerRow <= 4 because 8 64-bit keys implies 512-bit vectors, which
+  // are likely slower than 16x4, so 8x4 is the largest we handle here.
+  static_assert(kKeysPerRow <= 4, "");
+
+  constexpr size_t kLPK = st.LanesPerKey();
+
+  // We reshape the 1D keys into kRows x kKeysPerRow.
+  constexpr size_t kRows = 8;
+  constexpr size_t kLanesPerRow = kKeysPerRow * kLPK;
+  constexpr size_t kMinLanes = kRows / 2 * kLanesPerRow;
+  HWY_DASSERT(kMinLanes < num_lanes && num_lanes <= kRows * kLanesPerRow);
+
+  const CappedTag<T, kLanesPerRow> d;
+  using V = Vec<decltype(d)>;
+  V v4, v5, v6, v7;
+
+  // At least half the kRows are valid, otherwise a different function would
+  // have been called to handle this num_lanes.
+  V v0 = LoadU(d, keys + 0x0 * kLanesPerRow);
+  V v1 = LoadU(d, keys + 0x1 * kLanesPerRow);
+  V v2 = LoadU(d, keys + 0x2 * kLanesPerRow);
+  V v3 = LoadU(d, keys + 0x3 * kLanesPerRow);
+#if HWY_MEM_OPS_MIGHT_FAULT
+  CopyHalfToPaddedBuf<kRows, kLanesPerRow>(d, st, keys, num_lanes, buf);
+  v4 = LoadU(d, buf + 0x4 * kLanesPerRow);
+  v5 = LoadU(d, buf + 0x5 * kLanesPerRow);
+  v6 = LoadU(d, buf + 0x6 * kLanesPerRow);
+  v7 = LoadU(d, buf + 0x7 * kLanesPerRow);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+#if !HWY_MEM_OPS_MIGHT_FAULT || HWY_IDE
+  (void)buf;
+  const V vnum_lanes = Set(d, static_cast<T>(num_lanes));
+  // First offset where not all vector are guaranteed valid.
+  const V kIota = Iota(d, static_cast<T>(kMinLanes));
+  const V k1 = Set(d, static_cast<T>(kLanesPerRow));
+  const V k2 = Add(k1, k1);
+
+  using M = Mask<decltype(d)>;
+  const M m4 = Gt(vnum_lanes, kIota);
+  const M m5 = Gt(vnum_lanes, Add(kIota, k1));
+  const M m6 = Gt(vnum_lanes, Add(kIota, k2));
+  const M m7 = Gt(vnum_lanes, Add(kIota, Add(k2, k1)));
+
+  const V kPadding = st.LastValue(d);  // Not copied to keys.
+  v4 = MaskedLoadOr(kPadding, m4, d, keys + 0x4 * kLanesPerRow);
+  v5 = MaskedLoadOr(kPadding, m5, d, keys + 0x5 * kLanesPerRow);
+  v6 = MaskedLoadOr(kPadding, m6, d, keys + 0x6 * kLanesPerRow);
+  v7 = MaskedLoadOr(kPadding, m7, d, keys + 0x7 * kLanesPerRow);
+#endif  // !HWY_MEM_OPS_MIGHT_FAULT
+
+  Sort8(d, st, v0, v1, v2, v3, v4, v5, v6, v7);
+
+  // Merge8x2 is a no-op if kKeysPerRow < 2 etc.
+  Merge8x2<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7);
+  Merge8x4<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7);
+
+  StoreU(v0, d, keys + 0x0 * kLanesPerRow);
+  StoreU(v1, d, keys + 0x1 * kLanesPerRow);
+  StoreU(v2, d, keys + 0x2 * kLanesPerRow);
+  StoreU(v3, d, keys + 0x3 * kLanesPerRow);
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Store remaining vectors into buf and safely copy them into keys.
+  StoreU(v4, d, buf + 0x4 * kLanesPerRow);
+  StoreU(v5, d, buf + 0x5 * kLanesPerRow);
+  StoreU(v6, d, buf + 0x6 * kLanesPerRow);
+  StoreU(v7, d, buf + 0x7 * kLanesPerRow);
+
+  const ScalableTag<T> dmax;
+  const size_t Nmax = Lanes(dmax);
+
+  // The first half of vectors have already been stored unconditionally into
+  // `keys`, so we do not copy them.
+  size_t i = kMinLanes;
+  HWY_UNROLL(1)
+  for (; i + Nmax <= num_lanes; i += Nmax) {
+    StoreU(LoadU(dmax, buf + i), dmax, keys + i);
+  }
+
+  // Last iteration: copy partial vector
+  const size_t remaining = num_lanes - i;
+  HWY_ASSUME(remaining < 256);  // helps FirstN
+  SafeCopyN(remaining, dmax, buf + i, keys + i);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+#if !HWY_MEM_OPS_MIGHT_FAULT || HWY_IDE
+  BlendedStore(v4, m4, d, keys + 0x4 * kLanesPerRow);
+  BlendedStore(v5, m5, d, keys + 0x5 * kLanesPerRow);
+  BlendedStore(v6, m6, d, keys + 0x6 * kLanesPerRow);
+  BlendedStore(v7, m7, d, keys + 0x7 * kLanesPerRow);
+#endif  // !HWY_MEM_OPS_MIGHT_FAULT
+}
+
+template <size_t kKeysPerRow, class Traits, typename T>
+HWY_NOINLINE void Sort16Rows(Traits st, T* HWY_RESTRICT keys, size_t num_lanes,
+                             T* HWY_RESTRICT buf) {
+  static_assert(kKeysPerRow <= SortConstants::kMaxCols, "");
+
+  constexpr size_t kLPK = st.LanesPerKey();
+
+  // We reshape the 1D keys into kRows x kKeysPerRow.
+  constexpr size_t kRows = 16;
+  constexpr size_t kLanesPerRow = kKeysPerRow * kLPK;
+  constexpr size_t kMinLanes = kRows / 2 * kLanesPerRow;
+  HWY_DASSERT(kMinLanes < num_lanes && num_lanes <= kRows * kLanesPerRow);
+
+  const CappedTag<T, kLanesPerRow> d;
+  using V = Vec<decltype(d)>;
+  V v8, v9, va, vb, vc, vd, ve, vf;
+
+  // At least half the kRows are valid, otherwise a different function would
+  // have been called to handle this num_lanes.
+  V v0 = LoadU(d, keys + 0x0 * kLanesPerRow);
+  V v1 = LoadU(d, keys + 0x1 * kLanesPerRow);
+  V v2 = LoadU(d, keys + 0x2 * kLanesPerRow);
+  V v3 = LoadU(d, keys + 0x3 * kLanesPerRow);
+  V v4 = LoadU(d, keys + 0x4 * kLanesPerRow);
+  V v5 = LoadU(d, keys + 0x5 * kLanesPerRow);
+  V v6 = LoadU(d, keys + 0x6 * kLanesPerRow);
+  V v7 = LoadU(d, keys + 0x7 * kLanesPerRow);
+#if HWY_MEM_OPS_MIGHT_FAULT
+  CopyHalfToPaddedBuf<kRows, kLanesPerRow>(d, st, keys, num_lanes, buf);
+  v8 = LoadU(d, buf + 0x8 * kLanesPerRow);
+  v9 = LoadU(d, buf + 0x9 * kLanesPerRow);
+  va = LoadU(d, buf + 0xa * kLanesPerRow);
+  vb = LoadU(d, buf + 0xb * kLanesPerRow);
+  vc = LoadU(d, buf + 0xc * kLanesPerRow);
+  vd = LoadU(d, buf + 0xd * kLanesPerRow);
+  ve = LoadU(d, buf + 0xe * kLanesPerRow);
+  vf = LoadU(d, buf + 0xf * kLanesPerRow);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+#if !HWY_MEM_OPS_MIGHT_FAULT || HWY_IDE
+  (void)buf;
+  const V vnum_lanes = Set(d, static_cast<T>(num_lanes));
+  // First offset where not all vector are guaranteed valid.
+  const V kIota = Iota(d, static_cast<T>(kMinLanes));
+  const V k1 = Set(d, static_cast<T>(kLanesPerRow));
+  const V k2 = Add(k1, k1);
+  const V k4 = Add(k2, k2);
+  const V k8 = Add(k4, k4);
+
+  using M = Mask<decltype(d)>;
+  const M m8 = Gt(vnum_lanes, kIota);
+  const M m9 = Gt(vnum_lanes, Add(kIota, k1));
+  const M ma = Gt(vnum_lanes, Add(kIota, k2));
+  const M mb = Gt(vnum_lanes, Add(kIota, Sub(k4, k1)));
+  const M mc = Gt(vnum_lanes, Add(kIota, k4));
+  const M md = Gt(vnum_lanes, Add(kIota, Add(k4, k1)));
+  const M me = Gt(vnum_lanes, Add(kIota, Add(k4, k2)));
+  const M mf = Gt(vnum_lanes, Add(kIota, Sub(k8, k1)));
+
+  const V kPadding = st.LastValue(d);  // Not copied to keys.
+  v8 = MaskedLoadOr(kPadding, m8, d, keys + 0x8 * kLanesPerRow);
+  v9 = MaskedLoadOr(kPadding, m9, d, keys + 0x9 * kLanesPerRow);
+  va = MaskedLoadOr(kPadding, ma, d, keys + 0xa * kLanesPerRow);
+  vb = MaskedLoadOr(kPadding, mb, d, keys + 0xb * kLanesPerRow);
+  vc = MaskedLoadOr(kPadding, mc, d, keys + 0xc * kLanesPerRow);
+  vd = MaskedLoadOr(kPadding, md, d, keys + 0xd * kLanesPerRow);
+  ve = MaskedLoadOr(kPadding, me, d, keys + 0xe * kLanesPerRow);
+  vf = MaskedLoadOr(kPadding, mf, d, keys + 0xf * kLanesPerRow);
+#endif  // !HWY_MEM_OPS_MIGHT_FAULT
+
+  Sort16(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf);
+
+  // Merge16x4 is a no-op if kKeysPerRow < 4 etc.
+  Merge16x2<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                         vc, vd, ve, vf);
+  Merge16x4<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                         vc, vd, ve, vf);
+  Merge16x8<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                         vc, vd, ve, vf);
+#if !HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD
+  Merge16x16<kKeysPerRow>(d, st, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb,
+                          vc, vd, ve, vf);
+#endif
+
+  StoreU(v0, d, keys + 0x0 * kLanesPerRow);
+  StoreU(v1, d, keys + 0x1 * kLanesPerRow);
+  StoreU(v2, d, keys + 0x2 * kLanesPerRow);
+  StoreU(v3, d, keys + 0x3 * kLanesPerRow);
+  StoreU(v4, d, keys + 0x4 * kLanesPerRow);
+  StoreU(v5, d, keys + 0x5 * kLanesPerRow);
+  StoreU(v6, d, keys + 0x6 * kLanesPerRow);
+  StoreU(v7, d, keys + 0x7 * kLanesPerRow);
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Store remaining vectors into buf and safely copy them into keys.
+  StoreU(v8, d, buf + 0x8 * kLanesPerRow);
+  StoreU(v9, d, buf + 0x9 * kLanesPerRow);
+  StoreU(va, d, buf + 0xa * kLanesPerRow);
+  StoreU(vb, d, buf + 0xb * kLanesPerRow);
+  StoreU(vc, d, buf + 0xc * kLanesPerRow);
+  StoreU(vd, d, buf + 0xd * kLanesPerRow);
+  StoreU(ve, d, buf + 0xe * kLanesPerRow);
+  StoreU(vf, d, buf + 0xf * kLanesPerRow);
+
+  const ScalableTag<T> dmax;
+  const size_t Nmax = Lanes(dmax);
+
+  // The first half of vectors have already been stored unconditionally into
+  // `keys`, so we do not copy them.
+  size_t i = kMinLanes;
+  HWY_UNROLL(1)
+  for (; i + Nmax <= num_lanes; i += Nmax) {
+    StoreU(LoadU(dmax, buf + i), dmax, keys + i);
+  }
+
+  // Last iteration: copy partial vector
+  const size_t remaining = num_lanes - i;
+  HWY_ASSUME(remaining < 256);  // helps FirstN
+  SafeCopyN(remaining, dmax, buf + i, keys + i);
+#endif  // HWY_MEM_OPS_MIGHT_FAULT
+#if !HWY_MEM_OPS_MIGHT_FAULT || HWY_IDE
+  BlendedStore(v8, m8, d, keys + 0x8 * kLanesPerRow);
+  BlendedStore(v9, m9, d, keys + 0x9 * kLanesPerRow);
+  BlendedStore(va, ma, d, keys + 0xa * kLanesPerRow);
+  BlendedStore(vb, mb, d, keys + 0xb * kLanesPerRow);
+  BlendedStore(vc, mc, d, keys + 0xc * kLanesPerRow);
+  BlendedStore(vd, md, d, keys + 0xd * kLanesPerRow);
+  BlendedStore(ve, me, d, keys + 0xe * kLanesPerRow);
+  BlendedStore(vf, mf, d, keys + 0xf * kLanesPerRow);
+#endif  // !HWY_MEM_OPS_MIGHT_FAULT
+}
+
+// Sorts `keys` within the range [0, num_lanes) via sorting network.
+// Reshapes into a matrix, sorts columns independently, and then merges
+// into a sorted 1D array without transposing.
+//
+// `TraitsKV` is SharedTraits<Traits*<Order*>>. This abstraction layer bridges
+//   differences in sort order and single-lane vs 128-bit keys. For key-value
+//   types, items with the same key are not equivalent. Our sorting network
+//   does not preserve order, thus we prevent mixing padding into the items by
+//   comparing all the item bits, including the value (see *ForSortingNetwork).
+//
+// See M. Blacher's thesis: https://github.com/mark-blacher/masterthesis
+template <class D, class TraitsKV, typename T>
+HWY_NOINLINE void BaseCase(D d, TraitsKV, T* HWY_RESTRICT keys,
+                           size_t num_lanes, T* buf) {
+  using Traits = typename TraitsKV::SharedTraitsForSortingNetwork;
+  Traits st;
+  constexpr size_t kLPK = st.LanesPerKey();
+  HWY_DASSERT(num_lanes <= Constants::BaseCaseNumLanes<kLPK>(Lanes(d)));
+  const size_t num_keys = num_lanes / kLPK;
+
+  // Can be zero when called through HandleSpecialCases, but also 1 (in which
+  // case the array is already sorted). Also ensures num_lanes - 1 != 0.
+  if (HWY_UNLIKELY(num_keys <= 1)) return;
+
+  const size_t ceil_log2 =
+      32 - Num0BitsAboveMS1Bit_Nonzero32(static_cast<uint32_t>(num_keys - 1));
+
+  // Checking kMaxKeysPerVector avoids generating unreachable codepaths.
+  constexpr size_t kMaxKeysPerVector = MaxLanes(d) / kLPK;
+
+  using FuncPtr = decltype(&Sort2To2<Traits, T>);
+  const FuncPtr funcs[9] = {
+    /* <= 1 */ nullptr,  // We ensured num_keys > 1.
+    /* <= 2 */ &Sort2To2<Traits, T>,
+    /* <= 4 */ &Sort3To4<Traits, T>,
+    /* <= 8 */ &Sort8Rows<1, Traits, T>,  // 1 key per row
+    /* <= 16 */ kMaxKeysPerVector >= 2 ? &Sort8Rows<2, Traits, T> : nullptr,
+    /* <= 32 */ kMaxKeysPerVector >= 4 ? &Sort8Rows<4, Traits, T> : nullptr,
+    /* <= 64 */ kMaxKeysPerVector >= 4 ? &Sort16Rows<4, Traits, T> : nullptr,
+    /* <= 128 */ kMaxKeysPerVector >= 8 ? &Sort16Rows<8, Traits, T> : nullptr,
+#if !HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD
+    /* <= 256 */ kMaxKeysPerVector >= 16 ? &Sort16Rows<16, Traits, T> : nullptr,
+#endif
+  };
+  funcs[ceil_log2](st, keys, num_lanes, buf);
+}
+
+// ------------------------------ Partition
+
+// Consumes from `keys` until a multiple of kUnroll*N remains.
+// Temporarily stores the right side into `buf`, then moves behind `num`.
+// Returns the number of keys consumed from the left side.
+template <class D, class Traits, class T>
+HWY_INLINE size_t PartitionToMultipleOfUnroll(D d, Traits st,
+                                              T* HWY_RESTRICT keys, size_t& num,
+                                              const Vec<D> pivot,
+                                              T* HWY_RESTRICT buf) {
+  constexpr size_t kUnroll = Constants::kPartitionUnroll;
+  const size_t N = Lanes(d);
+  size_t readL = 0;
+  T* HWY_RESTRICT posL = keys;
+  size_t bufR = 0;
+  // Partition requires both a multiple of kUnroll*N and at least
+  // 2*kUnroll*N for the initial loads. If less, consume all here.
+  const size_t num_rem =
+      (num < 2 * kUnroll * N) ? num : (num & (kUnroll * N - 1));
+  size_t i = 0;
+  for (; i + N <= num_rem; i += N) {
+    const Vec<D> vL = LoadU(d, keys + readL);
+    readL += N;
+
+    const auto comp = st.Compare(d, pivot, vL);
+    posL += CompressBlendedStore(vL, Not(comp), d, posL);
+    bufR += CompressStore(vL, comp, d, buf + bufR);
+  }
+  // Last iteration: only use valid lanes.
+  if (HWY_LIKELY(i != num_rem)) {
+    const auto mask = FirstN(d, num_rem - i);
+    const Vec<D> vL = LoadU(d, keys + readL);
+
+    const auto comp = st.Compare(d, pivot, vL);
+    posL += CompressBlendedStore(vL, AndNot(comp, mask), d, posL);
+    bufR += CompressStore(vL, And(comp, mask), d, buf + bufR);
+  }
+
+  // MSAN seems not to understand CompressStore. buf[0, bufR) are valid.
+  detail::MaybeUnpoison(buf, bufR);
+
+  // Everything we loaded was put into buf, or behind the current `posL`, after
+  // which there is space for bufR items. First move items from `keys + num` to
+  // `posL` to free up space, then copy `buf` into the vacated `keys + num`.
+  // A loop with masked loads from `buf` is insufficient - we would also need to
+  // mask from `keys + num`. Combining a loop with memcpy for the remainders is
+  // slower than just memcpy, so we use that for simplicity.
+  num -= bufR;
+  CopyBytes(keys + num, posL, bufR * sizeof(T));
+  CopyBytes(buf, keys + num, bufR * sizeof(T));
+  return static_cast<size_t>(posL - keys);  // caller will shrink num by this.
+}
+
+template <class V>
+V OrXor(const V o, const V x1, const V x2) {
+  return Or(o, Xor(x1, x2));  // ternlog on AVX3
+}
+
+// Note: we could track the OrXor of v and pivot to see if the entire left
+// partition is equal, but that happens rarely and thus is a net loss.
+template <class D, class Traits, typename T>
+HWY_INLINE void StoreLeftRight(D d, Traits st, const Vec<D> v,
+                               const Vec<D> pivot, T* HWY_RESTRICT keys,
+                               size_t& writeL, size_t& remaining) {
+  const size_t N = Lanes(d);
+
+  const auto comp = st.Compare(d, pivot, v);
+
+  remaining -= N;
+  if (hwy::HWY_NAMESPACE::CompressIsPartition<T>::value ||
+      (HWY_MAX_BYTES == 16 && st.Is128())) {
+    // Non-native Compress (e.g. AVX2): we are able to partition a vector using
+    // a single Compress+two StoreU instead of two Compress[Blended]Store. The
+    // latter are more expensive. Because we store entire vectors, the contents
+    // between the updated writeL and writeR are ignored and will be overwritten
+    // by subsequent calls. This works because writeL and writeR are at least
+    // two vectors apart.
+    const auto lr = st.CompressKeys(v, comp);
+    const size_t num_left = N - CountTrue(d, comp);
+    StoreU(lr, d, keys + writeL);
+    // Now write the right-side elements (if any), such that the previous writeR
+    // is one past the end of the newly written right elements, then advance.
+    StoreU(lr, d, keys + remaining + writeL);
+    writeL += num_left;
+  } else {
+    // Native Compress[Store] (e.g. AVX3), which only keep the left or right
+    // side, not both, hence we require two calls.
+    const size_t num_left = CompressStore(v, Not(comp), d, keys + writeL);
+    writeL += num_left;
+
+    (void)CompressBlendedStore(v, comp, d, keys + remaining + writeL);
+  }
+}
+
+template <class D, class Traits, typename T>
+HWY_INLINE void StoreLeftRight4(D d, Traits st, const Vec<D> v0,
+                                const Vec<D> v1, const Vec<D> v2,
+                                const Vec<D> v3, const Vec<D> pivot,
+                                T* HWY_RESTRICT keys, size_t& writeL,
+                                size_t& remaining) {
+  StoreLeftRight(d, st, v0, pivot, keys, writeL, remaining);
+  StoreLeftRight(d, st, v1, pivot, keys, writeL, remaining);
+  StoreLeftRight(d, st, v2, pivot, keys, writeL, remaining);
+  StoreLeftRight(d, st, v3, pivot, keys, writeL, remaining);
+}
+
+// Moves "<= pivot" keys to the front, and others to the back. pivot is
+// broadcasted. Time-critical!
+//
+// Aligned loads do not seem to be worthwhile (not bottlenecked by load ports).
+template <class D, class Traits, typename T>
+HWY_INLINE size_t Partition(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
+                            const Vec<D> pivot, T* HWY_RESTRICT buf) {
+  using V = decltype(Zero(d));
+  const size_t N = Lanes(d);
+
+  // StoreLeftRight will CompressBlendedStore ending at `writeR`. Unless all
+  // lanes happen to be in the right-side partition, this will overrun `keys`,
+  // which triggers asan errors. Avoid by special-casing the last vector.
+  HWY_DASSERT(num > 2 * N);  // ensured by HandleSpecialCases
+  num -= N;
+  size_t last = num;
+  const V vlast = LoadU(d, keys + last);
+
+  const size_t consumedL =
+      PartitionToMultipleOfUnroll(d, st, keys, num, pivot, buf);
+  keys += consumedL;
+  last -= consumedL;
+  num -= consumedL;
+  constexpr size_t kUnroll = Constants::kPartitionUnroll;
+
+  // Partition splits the vector into 3 sections, left to right: Elements
+  // smaller or equal to the pivot, unpartitioned elements and elements larger
+  // than the pivot. To write elements unconditionally on the loop body without
+  // overwriting existing data, we maintain two regions of the loop where all
+  // elements have been copied elsewhere (e.g. vector registers.). I call these
+  // bufferL and bufferR, for left and right respectively.
+  //
+  // These regions are tracked by the indices (writeL, writeR, left, right) as
+  // presented in the diagram below.
+  //
+  //              writeL                                  writeR
+  //               \/                                       \/
+  //  |  <= pivot   | bufferL |   unpartitioned   | bufferR |   > pivot   |
+  //                          \/                  \/
+  //                         left                 right
+  //
+  // In the main loop body below we choose a side, load some elements out of the
+  // vector and move either `left` or `right`. Next we call into StoreLeftRight
+  // to partition the data, and the partitioned elements will be written either
+  // to writeR or writeL and the corresponding index will be moved accordingly.
+  //
+  // Note that writeR is not explicitly tracked as an optimization for platforms
+  // with conditional operations. Instead we track writeL and the number of
+  // elements left to process (`remaining`). From the diagram above we can see
+  // that:
+  //    writeR - writeL = remaining => writeR = remaining + writeL
+  //
+  // Tracking `remaining` is advantageous because each iteration reduces the
+  // number of unpartitioned elements by a fixed amount, so we can compute
+  // `remaining` without data dependencies.
+  //
+  size_t writeL = 0;
+  size_t remaining = num;
+
+  const T* HWY_RESTRICT readL = keys;
+  const T* HWY_RESTRICT readR = keys + num;
+  // Cannot load if there were fewer than 2 * kUnroll * N.
+  if (HWY_LIKELY(num != 0)) {
+    HWY_DASSERT(num >= 2 * kUnroll * N);
+    HWY_DASSERT((num & (kUnroll * N - 1)) == 0);
+
+    // Make space for writing in-place by reading from readL/readR.
+    const V vL0 = LoadU(d, readL + 0 * N);
+    const V vL1 = LoadU(d, readL + 1 * N);
+    const V vL2 = LoadU(d, readL + 2 * N);
+    const V vL3 = LoadU(d, readL + 3 * N);
+    readL += kUnroll * N;
+    readR -= kUnroll * N;
+    const V vR0 = LoadU(d, readR + 0 * N);
+    const V vR1 = LoadU(d, readR + 1 * N);
+    const V vR2 = LoadU(d, readR + 2 * N);
+    const V vR3 = LoadU(d, readR + 3 * N);
+
+    // readL/readR changed above, so check again before the loop.
+    while (readL != readR) {
+      V v0, v1, v2, v3;
+
+      // Data-dependent but branching is faster than forcing branch-free.
+      const size_t capacityL =
+          static_cast<size_t>((readL - keys) - static_cast<ptrdiff_t>(writeL));
+      HWY_DASSERT(capacityL <= num);  // >= 0
+      // Load data from the end of the vector with less data (front or back).
+      // The next paragraphs explain how this works.
+      //
+      // let block_size = (kUnroll * N)
+      // On the loop prelude we load block_size elements from the front of the
+      // vector and an additional block_size elements from the back. On each
+      // iteration k elements are written to the front of the vector and
+      // (block_size - k) to the back.
+      //
+      // This creates a loop invariant where the capacity on the front
+      // (capacityL) and on the back (capacityR) always add to 2 * block_size.
+      // In other words:
+      //    capacityL + capacityR = 2 * block_size
+      //    capacityR = 2 * block_size - capacityL
+      //
+      // This means that:
+      //    capacityL < capacityR <=>
+      //    capacityL < 2 * block_size - capacityL <=>
+      //    2 * capacityL < 2 * block_size <=>
+      //    capacityL < block_size
+      //
+      // Thus the check on the next line is equivalent to capacityL > capacityR.
+      //
+      if (kUnroll * N < capacityL) {
+        readR -= kUnroll * N;
+        v0 = LoadU(d, readR + 0 * N);
+        v1 = LoadU(d, readR + 1 * N);
+        v2 = LoadU(d, readR + 2 * N);
+        v3 = LoadU(d, readR + 3 * N);
+        hwy::Prefetch(readR - 3 * kUnroll * N);
+      } else {
+        v0 = LoadU(d, readL + 0 * N);
+        v1 = LoadU(d, readL + 1 * N);
+        v2 = LoadU(d, readL + 2 * N);
+        v3 = LoadU(d, readL + 3 * N);
+        readL += kUnroll * N;
+        hwy::Prefetch(readL + 3 * kUnroll * N);
+      }
+
+      StoreLeftRight4(d, st, v0, v1, v2, v3, pivot, keys, writeL, remaining);
+    }
+
+    // Now finish writing the saved vectors to the middle.
+    StoreLeftRight4(d, st, vL0, vL1, vL2, vL3, pivot, keys, writeL, remaining);
+    StoreLeftRight4(d, st, vR0, vR1, vR2, vR3, pivot, keys, writeL, remaining);
+  }
+
+  // We have partitioned [left, right) such that writeL is the boundary.
+  HWY_DASSERT(remaining == 0);
+  // Make space for inserting vlast: move up to N of the first right-side keys
+  // into the unused space starting at last. If we have fewer, ensure they are
+  // the last items in that vector by subtracting from the *load* address,
+  // which is safe because we have at least two vectors (checked above).
+  const size_t totalR = last - writeL;
+  const size_t startR = totalR < N ? writeL + totalR - N : writeL;
+  StoreU(LoadU(d, keys + startR), d, keys + last);
+
+  // Partition vlast: write L, then R, into the single-vector gap at writeL.
+  const auto comp = st.Compare(d, pivot, vlast);
+  writeL += CompressBlendedStore(vlast, Not(comp), d, keys + writeL);
+  (void)CompressBlendedStore(vlast, comp, d, keys + writeL);
+
+  return consumedL + writeL;
+}
+
+// Returns true and partitions if [keys, keys + num) contains only {valueL,
+// valueR}. Otherwise, sets third to the first differing value; keys may have
+// been reordered and a regular Partition is still necessary.
+// Called from two locations, hence NOINLINE.
+template <class D, class Traits, typename T>
+HWY_NOINLINE bool MaybePartitionTwoValue(D d, Traits st, T* HWY_RESTRICT keys,
+                                         size_t num, const Vec<D> valueL,
+                                         const Vec<D> valueR, Vec<D>& third,
+                                         T* HWY_RESTRICT buf) {
+  const size_t N = Lanes(d);
+
+  size_t i = 0;
+  size_t writeL = 0;
+
+  // As long as all lanes are equal to L or R, we can overwrite with valueL.
+  // This is faster than first counting, then backtracking to fill L and R.
+  for (; i + N <= num; i += N) {
+    const Vec<D> v = LoadU(d, keys + i);
+    // It is not clear how to apply OrXor here - that can check if *both*
+    // comparisons are true, but here we want *either*. Comparing the unsigned
+    // min of differences to zero works, but is expensive for u64 prior to AVX3.
+    const Mask<D> eqL = st.EqualKeys(d, v, valueL);
+    const Mask<D> eqR = st.EqualKeys(d, v, valueR);
+    // At least one other value present; will require a regular partition.
+    // On AVX-512, Or + AllTrue are folded into a single kortest if we are
+    // careful with the FindKnownFirstTrue argument, see below.
+    if (HWY_UNLIKELY(!AllTrue(d, Or(eqL, eqR)))) {
+      // If we repeat Or(eqL, eqR) here, the compiler will hoist it into the
+      // loop, which is a pessimization because this if-true branch is cold.
+      // We can defeat this via Not(Xor), which is equivalent because eqL and
+      // eqR cannot be true at the same time. Can we elide the additional Not?
+      // FindFirstFalse instructions are generally unavailable, but we can
+      // fuse Not and Xor/Or into one ExclusiveNeither.
+      const size_t lane = FindKnownFirstTrue(d, ExclusiveNeither(eqL, eqR));
+      third = st.SetKey(d, keys + i + lane);
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "found 3rd value at vec %zu; writeL %zu\n", i, writeL);
+      }
+      // 'Undo' what we did by filling the remainder of what we read with R.
+      for (; writeL + N <= i; writeL += N) {
+        StoreU(valueR, d, keys + writeL);
+      }
+      BlendedStore(valueR, FirstN(d, i - writeL), d, keys + writeL);
+      return false;
+    }
+    StoreU(valueL, d, keys + writeL);
+    writeL += CountTrue(d, eqL);
+  }
+
+  // Final vector, masked comparison (no effect if i == num)
+  const size_t remaining = num - i;
+  SafeCopyN(remaining, d, keys + i, buf);
+  const Vec<D> v = Load(d, buf);
+  const Mask<D> valid = FirstN(d, remaining);
+  const Mask<D> eqL = And(st.EqualKeys(d, v, valueL), valid);
+  const Mask<D> eqR = st.EqualKeys(d, v, valueR);
+  // Invalid lanes are considered equal.
+  const Mask<D> eq = Or(Or(eqL, eqR), Not(valid));
+  // At least one other value present; will require a regular partition.
+  if (HWY_UNLIKELY(!AllTrue(d, eq))) {
+    const size_t lane = FindKnownFirstTrue(d, Not(eq));
+    third = st.SetKey(d, keys + i + lane);
+    if (VQSORT_PRINT >= 2) {
+      fprintf(stderr, "found 3rd value at partial vec %zu; writeL %zu\n", i,
+              writeL);
+    }
+    // 'Undo' what we did by filling the remainder of what we read with R.
+    for (; writeL + N <= i; writeL += N) {
+      StoreU(valueR, d, keys + writeL);
+    }
+    BlendedStore(valueR, FirstN(d, i - writeL), d, keys + writeL);
+    return false;
+  }
+  BlendedStore(valueL, valid, d, keys + writeL);
+  writeL += CountTrue(d, eqL);
+
+  // Fill right side
+  i = writeL;
+  for (; i + N <= num; i += N) {
+    StoreU(valueR, d, keys + i);
+  }
+  BlendedStore(valueR, FirstN(d, num - i), d, keys + i);
+
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "Successful MaybePartitionTwoValue\n");
+  }
+  return true;
+}
+
+// Same as above, except that the pivot equals valueR, so scan right to left.
+template <class D, class Traits, typename T>
+HWY_INLINE bool MaybePartitionTwoValueR(D d, Traits st, T* HWY_RESTRICT keys,
+                                        size_t num, const Vec<D> valueL,
+                                        const Vec<D> valueR, Vec<D>& third,
+                                        T* HWY_RESTRICT buf) {
+  const size_t N = Lanes(d);
+
+  HWY_DASSERT(num >= N);
+  size_t pos = num - N;  // current read/write position
+  size_t countR = 0;     // number of valueR found
+
+  // For whole vectors, in descending address order: as long as all lanes are
+  // equal to L or R, overwrite with valueR. This is faster than counting, then
+  // filling both L and R. Loop terminates after unsigned wraparound.
+  for (; pos < num; pos -= N) {
+    const Vec<D> v = LoadU(d, keys + pos);
+    // It is not clear how to apply OrXor here - that can check if *both*
+    // comparisons are true, but here we want *either*. Comparing the unsigned
+    // min of differences to zero works, but is expensive for u64 prior to AVX3.
+    const Mask<D> eqL = st.EqualKeys(d, v, valueL);
+    const Mask<D> eqR = st.EqualKeys(d, v, valueR);
+    // If there is a third value, stop and undo what we've done. On AVX-512,
+    // Or + AllTrue are folded into a single kortest, but only if we are
+    // careful with the FindKnownFirstTrue argument - see prior comment on that.
+    if (HWY_UNLIKELY(!AllTrue(d, Or(eqL, eqR)))) {
+      const size_t lane = FindKnownFirstTrue(d, ExclusiveNeither(eqL, eqR));
+      third = st.SetKey(d, keys + pos + lane);
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "found 3rd value at vec %zu; countR %zu\n", pos,
+                countR);
+        MaybePrintVector(d, "third", third, 0, st.LanesPerKey());
+      }
+      pos += N;  // rewind: we haven't yet committed changes in this iteration.
+      // We have filled [pos, num) with R, but only countR of them should have
+      // been written. Rewrite [pos, num - countR) to L.
+      HWY_DASSERT(countR <= num - pos);
+      const size_t endL = num - countR;
+      for (; pos + N <= endL; pos += N) {
+        StoreU(valueL, d, keys + pos);
+      }
+      BlendedStore(valueL, FirstN(d, endL - pos), d, keys + pos);
+      return false;
+    }
+    StoreU(valueR, d, keys + pos);
+    countR += CountTrue(d, eqR);
+  }
+
+  // Final partial (or empty) vector, masked comparison.
+  const size_t remaining = pos + N;
+  HWY_DASSERT(remaining <= N);
+  const Vec<D> v = LoadU(d, keys);  // Safe because num >= N.
+  const Mask<D> valid = FirstN(d, remaining);
+  const Mask<D> eqL = st.EqualKeys(d, v, valueL);
+  const Mask<D> eqR = And(st.EqualKeys(d, v, valueR), valid);
+  // Invalid lanes are considered equal.
+  const Mask<D> eq = Or(Or(eqL, eqR), Not(valid));
+  // At least one other value present; will require a regular partition.
+  if (HWY_UNLIKELY(!AllTrue(d, eq))) {
+    const size_t lane = FindKnownFirstTrue(d, Not(eq));
+    third = st.SetKey(d, keys + lane);
+    if (VQSORT_PRINT >= 2) {
+      fprintf(stderr, "found 3rd value at partial vec %zu; writeR %zu\n", pos,
+              countR);
+      MaybePrintVector(d, "third", third, 0, st.LanesPerKey());
+    }
+    pos += N;  // rewind: we haven't yet committed changes in this iteration.
+    // We have filled [pos, num) with R, but only countR of them should have
+    // been written. Rewrite [pos, num - countR) to L.
+    HWY_DASSERT(countR <= num - pos);
+    const size_t endL = num - countR;
+    for (; pos + N <= endL; pos += N) {
+      StoreU(valueL, d, keys + pos);
+    }
+    BlendedStore(valueL, FirstN(d, endL - pos), d, keys + pos);
+    return false;
+  }
+  const size_t lastR = CountTrue(d, eqR);
+  countR += lastR;
+
+  // First finish writing valueR - [0, N) lanes were not yet written.
+  StoreU(valueR, d, keys);  // Safe because num >= N.
+
+  // Fill left side (ascending order for clarity)
+  const size_t endL = num - countR;
+  size_t i = 0;
+  for (; i + N <= endL; i += N) {
+    StoreU(valueL, d, keys + i);
+  }
+  Store(valueL, d, buf);
+  SafeCopyN(endL - i, d, buf, keys + i);  // avoids asan overrun
+
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr,
+            "MaybePartitionTwoValueR countR %zu pos %zu i %zu endL %zu\n",
+            countR, pos, i, endL);
+  }
+
+  return true;
+}
+
+// `idx_second` is `first_mismatch` from `AllEqual` and thus the index of the
+// second key. This is the first path into `MaybePartitionTwoValue`, called
+// when all samples are equal. Returns false if there are at least a third
+// value and sets `third`. Otherwise, partitions the array and returns true.
+template <class D, class Traits, typename T>
+HWY_INLINE bool PartitionIfTwoKeys(D d, Traits st, const Vec<D> pivot,
+                                   T* HWY_RESTRICT keys, size_t num,
+                                   const size_t idx_second, const Vec<D> second,
+                                   Vec<D>& third, T* HWY_RESTRICT buf) {
+  // True if second comes before pivot.
+  const bool is_pivotR = AllFalse(d, st.Compare(d, pivot, second));
+  if (VQSORT_PRINT >= 1) {
+    fprintf(stderr, "Samples all equal, diff at %zu, isPivotR %d\n", idx_second,
+            is_pivotR);
+  }
+  HWY_DASSERT(AllFalse(d, st.EqualKeys(d, second, pivot)));
+
+  // If pivot is R, we scan backwards over the entire array. Otherwise,
+  // we already scanned up to idx_second and can leave those in place.
+  return is_pivotR ? MaybePartitionTwoValueR(d, st, keys, num, second, pivot,
+                                             third, buf)
+                   : MaybePartitionTwoValue(d, st, keys + idx_second,
+                                            num - idx_second, pivot, second,
+                                            third, buf);
+}
+
+// Second path into `MaybePartitionTwoValue`, called when not all samples are
+// equal. `samples` is sorted.
+template <class D, class Traits, typename T>
+HWY_INLINE bool PartitionIfTwoSamples(D d, Traits st, T* HWY_RESTRICT keys,
+                                      size_t num, T* HWY_RESTRICT samples) {
+  constexpr size_t kSampleLanes = Constants::SampleLanes<T>();
+  constexpr size_t N1 = st.LanesPerKey();
+  const Vec<D> valueL = st.SetKey(d, samples);
+  const Vec<D> valueR = st.SetKey(d, samples + kSampleLanes - N1);
+  HWY_DASSERT(AllTrue(d, st.Compare(d, valueL, valueR)));
+  HWY_DASSERT(AllFalse(d, st.EqualKeys(d, valueL, valueR)));
+  const Vec<D> prev = st.PrevValue(d, valueR);
+  // If the sample has more than two values, then the keys have at least that
+  // many, and thus this special case is inapplicable.
+  if (HWY_UNLIKELY(!AllTrue(d, st.EqualKeys(d, valueL, prev)))) {
+    return false;
+  }
+
+  // Must not overwrite samples because if this returns false, caller wants to
+  // read the original samples again.
+  T* HWY_RESTRICT buf = samples + kSampleLanes;
+  Vec<D> third;  // unused
+  return MaybePartitionTwoValue(d, st, keys, num, valueL, valueR, third, buf);
+}
+
+// ------------------------------ Pivot sampling
+
+template <class Traits, class V>
+HWY_INLINE V MedianOf3(Traits st, V v0, V v1, V v2) {
+  const DFromV<V> d;
+  // Slightly faster for 128-bit, apparently because not serially dependent.
+  if (st.Is128()) {
+    // Median = XOR-sum 'minus' the first and last. Calling First twice is
+    // slightly faster than Compare + 2 IfThenElse or even IfThenElse + XOR.
+    const auto sum = Xor(Xor(v0, v1), v2);
+    const auto first = st.First(d, st.First(d, v0, v1), v2);
+    const auto last = st.Last(d, st.Last(d, v0, v1), v2);
+    return Xor(Xor(sum, first), last);
+  }
+  st.Sort2(d, v0, v2);
+  v1 = st.Last(d, v0, v1);
+  v1 = st.First(d, v1, v2);
+  return v1;
+}
+
+// Based on https://github.com/numpy/numpy/issues/16313#issuecomment-641897028
+HWY_INLINE uint64_t RandomBits(uint64_t* HWY_RESTRICT state) {
+  const uint64_t a = state[0];
+  const uint64_t b = state[1];
+  const uint64_t w = state[2] + 1;
+  const uint64_t next = a ^ w;
+  state[0] = (b + (b << 3)) ^ (b >> 11);
+  const uint64_t rot = (b << 24) | (b >> 40);
+  state[1] = rot + next;
+  state[2] = w;
+  return next;
+}
+
+// Returns slightly biased random index of a chunk in [0, num_chunks).
+// See https://www.pcg-random.org/posts/bounded-rands.html.
+HWY_INLINE size_t RandomChunkIndex(const uint32_t num_chunks, uint32_t bits) {
+  const uint64_t chunk_index = (static_cast<uint64_t>(bits) * num_chunks) >> 32;
+  HWY_DASSERT(chunk_index < num_chunks);
+  return static_cast<size_t>(chunk_index);
+}
+
+// Writes samples from `keys[0, num)` into `buf`.
+template <class D, class Traits, typename T>
+HWY_INLINE void DrawSamples(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
+                            T* HWY_RESTRICT buf, uint64_t* HWY_RESTRICT state) {
+  using V = decltype(Zero(d));
+  const size_t N = Lanes(d);
+
+  // Power of two
+  constexpr size_t kLanesPerChunk = Constants::LanesPerChunk(sizeof(T));
+
+  // Align start of keys to chunks. We have at least 2 chunks (x 64 bytes)
+  // because the base case handles anything up to 8 vectors (x 16 bytes).
+  HWY_DASSERT(num >= Constants::SampleLanes<T>());
+  const size_t misalign =
+      (reinterpret_cast<uintptr_t>(keys) / sizeof(T)) & (kLanesPerChunk - 1);
+  if (misalign != 0) {
+    const size_t consume = kLanesPerChunk - misalign;
+    keys += consume;
+    num -= consume;
+  }
+
+  // Generate enough random bits for 6 uint32
+  uint32_t bits[6];
+  for (size_t i = 0; i < 6; i += 2) {
+    const uint64_t bits64 = RandomBits(state);
+    CopyBytes<8>(&bits64, bits + i);
+  }
+
+  const size_t num_chunks64 = num / kLanesPerChunk;
+  // Clamp to uint32 for RandomChunkIndex
+  const uint32_t num_chunks =
+      static_cast<uint32_t>(HWY_MIN(num_chunks64, 0xFFFFFFFFull));
+
+  const size_t offset0 = RandomChunkIndex(num_chunks, bits[0]) * kLanesPerChunk;
+  const size_t offset1 = RandomChunkIndex(num_chunks, bits[1]) * kLanesPerChunk;
+  const size_t offset2 = RandomChunkIndex(num_chunks, bits[2]) * kLanesPerChunk;
+  const size_t offset3 = RandomChunkIndex(num_chunks, bits[3]) * kLanesPerChunk;
+  const size_t offset4 = RandomChunkIndex(num_chunks, bits[4]) * kLanesPerChunk;
+  const size_t offset5 = RandomChunkIndex(num_chunks, bits[5]) * kLanesPerChunk;
+  for (size_t i = 0; i < kLanesPerChunk; i += N) {
+    const V v0 = Load(d, keys + offset0 + i);
+    const V v1 = Load(d, keys + offset1 + i);
+    const V v2 = Load(d, keys + offset2 + i);
+    const V medians0 = MedianOf3(st, v0, v1, v2);
+    Store(medians0, d, buf + i);
+
+    const V v3 = Load(d, keys + offset3 + i);
+    const V v4 = Load(d, keys + offset4 + i);
+    const V v5 = Load(d, keys + offset5 + i);
+    const V medians1 = MedianOf3(st, v3, v4, v5);
+    Store(medians1, d, buf + i + kLanesPerChunk);
+  }
+}
+
+// For detecting inputs where (almost) all keys are equal.
+template <class D, class Traits>
+HWY_INLINE bool UnsortedSampleEqual(D d, Traits st,
+                                    const TFromD<D>* HWY_RESTRICT samples) {
+  constexpr size_t kSampleLanes = Constants::SampleLanes<TFromD<D>>();
+  const size_t N = Lanes(d);
+  // Both are powers of two, so there will be no remainders.
+  HWY_DASSERT(N < kSampleLanes);
+  using V = Vec<D>;
+
+  const V first = st.SetKey(d, samples);
+
+  if (!hwy::IsFloat<TFromD<D>>()) {
+    // OR of XOR-difference may be faster than comparison.
+    V diff = Zero(d);
+    for (size_t i = 0; i < kSampleLanes; i += N) {
+      const V v = Load(d, samples + i);
+      diff = OrXor(diff, first, v);
+    }
+    return st.NoKeyDifference(d, diff);
+  } else {
+    // Disable the OrXor optimization for floats because OrXor will not treat
+    // subnormals the same as actual comparisons, leading to logic errors for
+    // 2-value cases.
+    for (size_t i = 0; i < kSampleLanes; i += N) {
+      const V v = Load(d, samples + i);
+      if (!AllTrue(d, st.EqualKeys(d, v, first))) {
+        return false;
+      }
+    }
+    return true;
+  }
+}
+
+template <class D, class Traits, typename T>
+HWY_INLINE void SortSamples(D d, Traits st, T* HWY_RESTRICT buf) {
+  const size_t N = Lanes(d);
+  constexpr size_t kSampleLanes = Constants::SampleLanes<T>();
+  // Network must be large enough to sort two chunks.
+  HWY_DASSERT(Constants::BaseCaseNumLanes<st.LanesPerKey()>(N) >= kSampleLanes);
+
+  BaseCase(d, st, buf, kSampleLanes, buf + kSampleLanes);
+
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "Samples:\n");
+    for (size_t i = 0; i < kSampleLanes; i += N) {
+      MaybePrintVector(d, "", Load(d, buf + i), 0, N);
+    }
+  }
+}
+
+// ------------------------------ Pivot selection
+
+enum class PivotResult {
+  kDone,     // stop without partitioning (all equal, or two-value partition)
+  kNormal,   // partition and recurse left and right
+  kIsFirst,  // partition but skip left recursion
+  kWasLast,  // partition but skip right recursion
+};
+
+HWY_INLINE const char* PivotResultString(PivotResult result) {
+  switch (result) {
+    case PivotResult::kDone:
+      return "done";
+    case PivotResult::kNormal:
+      return "normal";
+    case PivotResult::kIsFirst:
+      return "first";
+    case PivotResult::kWasLast:
+      return "last";
+  }
+  return "unknown";
+}
+
+// (Could vectorize, but only 0.2% of total time)
+template <class Traits, typename T>
+HWY_INLINE size_t PivotRank(Traits st, const T* HWY_RESTRICT samples) {
+  constexpr size_t kSampleLanes = Constants::SampleLanes<T>();
+  constexpr size_t N1 = st.LanesPerKey();
+
+  constexpr size_t kRankMid = kSampleLanes / 2;
+  static_assert(kRankMid % N1 == 0, "Mid is not an aligned key");
+
+  // Find the previous value not equal to the median.
+  size_t rank_prev = kRankMid - N1;
+  for (; st.Equal1(samples + rank_prev, samples + kRankMid); rank_prev -= N1) {
+    // All previous samples are equal to the median.
+    if (rank_prev == 0) return 0;
+  }
+
+  size_t rank_next = rank_prev + N1;
+  for (; st.Equal1(samples + rank_next, samples + kRankMid); rank_next += N1) {
+    // The median is also the largest sample. If it is also the largest key,
+    // we'd end up with an empty right partition, so choose the previous key.
+    if (rank_next == kSampleLanes - N1) return rank_prev;
+  }
+
+  // If we choose the median as pivot, the ratio of keys ending in the left
+  // partition will likely be rank_next/kSampleLanes (if the sample is
+  // representative). This is because equal-to-pivot values also land in the
+  // left - it's infeasible to do an in-place vectorized 3-way partition.
+  // Check whether prev would lead to a more balanced partition.
+  const size_t excess_if_median = rank_next - kRankMid;
+  const size_t excess_if_prev = kRankMid - rank_prev;
+  return excess_if_median < excess_if_prev ? kRankMid : rank_prev;
+}
+
+// Returns pivot chosen from `samples`. It will never be the largest key
+// (thus the right partition will never be empty).
+template <class D, class Traits, typename T>
+HWY_INLINE Vec<D> ChoosePivotByRank(D d, Traits st,
+                                    const T* HWY_RESTRICT samples) {
+  const size_t pivot_rank = PivotRank(st, samples);
+  const Vec<D> pivot = st.SetKey(d, samples + pivot_rank);
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "  Pivot rank %zu = %f\n", pivot_rank,
+            static_cast<double>(GetLane(pivot)));
+  }
+  // Verify pivot is not equal to the last sample.
+  constexpr size_t kSampleLanes = Constants::SampleLanes<T>();
+  constexpr size_t N1 = st.LanesPerKey();
+  const Vec<D> last = st.SetKey(d, samples + kSampleLanes - N1);
+  const bool all_neq = AllTrue(d, st.NotEqualKeys(d, pivot, last));
+  (void)all_neq;
+  HWY_DASSERT(all_neq);
+  return pivot;
+}
+
+// Returns true if all keys equal `pivot`, otherwise returns false and sets
+// `*first_mismatch' to the index of the first differing key.
+template <class D, class Traits, typename T>
+HWY_INLINE bool AllEqual(D d, Traits st, const Vec<D> pivot,
+                         const T* HWY_RESTRICT keys, size_t num,
+                         size_t* HWY_RESTRICT first_mismatch) {
+  const size_t N = Lanes(d);
+  // Ensures we can use overlapping loads for the tail; see HandleSpecialCases.
+  HWY_DASSERT(num >= N);
+  const Vec<D> zero = Zero(d);
+
+  // Vector-align keys + i.
+  const size_t misalign =
+      (reinterpret_cast<uintptr_t>(keys) / sizeof(T)) & (N - 1);
+  HWY_DASSERT(misalign % st.LanesPerKey() == 0);
+  const size_t consume = N - misalign;
+  {
+    const Vec<D> v = LoadU(d, keys);
+    // Only check masked lanes; consider others to be equal.
+    const Mask<D> diff = And(FirstN(d, consume), st.NotEqualKeys(d, v, pivot));
+    if (HWY_UNLIKELY(!AllFalse(d, diff))) {
+      const size_t lane = FindKnownFirstTrue(d, diff);
+      *first_mismatch = lane;
+      return false;
+    }
+  }
+  size_t i = consume;
+  HWY_DASSERT(((reinterpret_cast<uintptr_t>(keys + i) / sizeof(T)) & (N - 1)) ==
+              0);
+
+  // Disable the OrXor optimization for floats because OrXor will not treat
+  // subnormals the same as actual comparisons, leading to logic errors for
+  // 2-value cases.
+  if (!hwy::IsFloat<T>()) {
+    // Sticky bits registering any difference between `keys` and the first key.
+    // We use vector XOR because it may be cheaper than comparisons, especially
+    // for 128-bit. 2x unrolled for more ILP.
+    Vec<D> diff0 = zero;
+    Vec<D> diff1 = zero;
+
+    // We want to stop once a difference has been found, but without slowing
+    // down the loop by comparing during each iteration. The compromise is to
+    // compare after a 'group', which consists of kLoops times two vectors.
+    constexpr size_t kLoops = 8;
+    const size_t lanes_per_group = kLoops * 2 * N;
+
+    for (; i + lanes_per_group <= num; i += lanes_per_group) {
+      HWY_DEFAULT_UNROLL
+      for (size_t loop = 0; loop < kLoops; ++loop) {
+        const Vec<D> v0 = Load(d, keys + i + loop * 2 * N);
+        const Vec<D> v1 = Load(d, keys + i + loop * 2 * N + N);
+        diff0 = OrXor(diff0, v0, pivot);
+        diff1 = OrXor(diff1, v1, pivot);
+      }
+
+      // If there was a difference in the entire group:
+      if (HWY_UNLIKELY(!st.NoKeyDifference(d, Or(diff0, diff1)))) {
+        // .. then loop until the first one, with termination guarantee.
+        for (;; i += N) {
+          const Vec<D> v = Load(d, keys + i);
+          const Mask<D> diff = st.NotEqualKeys(d, v, pivot);
+          if (HWY_UNLIKELY(!AllFalse(d, diff))) {
+            const size_t lane = FindKnownFirstTrue(d, diff);
+            *first_mismatch = i + lane;
+            return false;
+          }
+        }
+      }
+    }
+  }  // !hwy::IsFloat<T>()
+
+  // Whole vectors, no unrolling, compare directly
+  for (; i + N <= num; i += N) {
+    const Vec<D> v = Load(d, keys + i);
+    const Mask<D> diff = st.NotEqualKeys(d, v, pivot);
+    if (HWY_UNLIKELY(!AllFalse(d, diff))) {
+      const size_t lane = FindKnownFirstTrue(d, diff);
+      *first_mismatch = i + lane;
+      return false;
+    }
+  }
+  // Always re-check the last (unaligned) vector to reduce branching.
+  i = num - N;
+  const Vec<D> v = LoadU(d, keys + i);
+  const Mask<D> diff = st.NotEqualKeys(d, v, pivot);
+  if (HWY_UNLIKELY(!AllFalse(d, diff))) {
+    const size_t lane = FindKnownFirstTrue(d, diff);
+    *first_mismatch = i + lane;
+    return false;
+  }
+
+  if (VQSORT_PRINT >= 1) {
+    fprintf(stderr, "All keys equal\n");
+  }
+  return true;  // all equal
+}
+
+// Called from 'two locations', but only one is active (IsKV is constexpr).
+template <class D, class Traits, typename T>
+HWY_INLINE bool ExistsAnyBefore(D d, Traits st, const T* HWY_RESTRICT keys,
+                                size_t num, const Vec<D> pivot) {
+  const size_t N = Lanes(d);
+  HWY_DASSERT(num >= N);  // See HandleSpecialCases
+
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "Scanning for before\n");
+  }
+
+  size_t i = 0;
+
+  constexpr size_t kLoops = 16;
+  const size_t lanes_per_group = kLoops * N;
+
+  Vec<D> first = pivot;
+
+  // Whole group, unrolled
+  for (; i + lanes_per_group <= num; i += lanes_per_group) {
+    HWY_DEFAULT_UNROLL
+    for (size_t loop = 0; loop < kLoops; ++loop) {
+      const Vec<D> curr = LoadU(d, keys + i + loop * N);
+      first = st.First(d, first, curr);
+    }
+
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, first, pivot)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at end of group %zu\n",
+                i + lanes_per_group);
+      }
+      return true;
+    }
+  }
+  // Whole vectors, no unrolling
+  for (; i + N <= num; i += N) {
+    const Vec<D> curr = LoadU(d, keys + i);
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, curr, pivot)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at %zu\n", i);
+      }
+      return true;
+    }
+  }
+  // If there are remainders, re-check the last whole vector.
+  if (HWY_LIKELY(i != num)) {
+    const Vec<D> curr = LoadU(d, keys + num - N);
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, curr, pivot)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at last %zu\n", num - N);
+      }
+      return true;
+    }
+  }
+
+  return false;  // pivot is the first
+}
+
+// Called from 'two locations', but only one is active (IsKV is constexpr).
+template <class D, class Traits, typename T>
+HWY_INLINE bool ExistsAnyAfter(D d, Traits st, const T* HWY_RESTRICT keys,
+                               size_t num, const Vec<D> pivot) {
+  const size_t N = Lanes(d);
+  HWY_DASSERT(num >= N);  // See HandleSpecialCases
+
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "Scanning for after\n");
+  }
+
+  size_t i = 0;
+
+  constexpr size_t kLoops = 16;
+  const size_t lanes_per_group = kLoops * N;
+
+  Vec<D> last = pivot;
+
+  // Whole group, unrolled
+  for (; i + lanes_per_group <= num; i += lanes_per_group) {
+    HWY_DEFAULT_UNROLL
+    for (size_t loop = 0; loop < kLoops; ++loop) {
+      const Vec<D> curr = LoadU(d, keys + i + loop * N);
+      last = st.Last(d, last, curr);
+    }
+
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, pivot, last)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at end of group %zu\n",
+                i + lanes_per_group);
+      }
+      return true;
+    }
+  }
+  // Whole vectors, no unrolling
+  for (; i + N <= num; i += N) {
+    const Vec<D> curr = LoadU(d, keys + i);
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, pivot, curr)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at %zu\n", i);
+      }
+      return true;
+    }
+  }
+  // If there are remainders, re-check the last whole vector.
+  if (HWY_LIKELY(i != num)) {
+    const Vec<D> curr = LoadU(d, keys + num - N);
+    if (HWY_UNLIKELY(!AllFalse(d, st.Compare(d, pivot, curr)))) {
+      if (VQSORT_PRINT >= 2) {
+        fprintf(stderr, "Stopped scanning at last %zu\n", num - N);
+      }
+      return true;
+    }
+  }
+
+  return false;  // pivot is the last
+}
+
+// Returns pivot chosen from `keys[0, num)`. It will never be the largest key
+// (thus the right partition will never be empty).
+template <class D, class Traits, typename T>
+HWY_INLINE Vec<D> ChoosePivotForEqualSamples(D d, Traits st,
+                                             T* HWY_RESTRICT keys, size_t num,
+                                             T* HWY_RESTRICT samples,
+                                             Vec<D> second, Vec<D> third,
+                                             PivotResult& result) {
+  const Vec<D> pivot = st.SetKey(d, samples);  // the single unique sample
+
+  // Early out for mostly-0 arrays, where pivot is often FirstValue.
+  if (HWY_UNLIKELY(AllTrue(d, st.EqualKeys(d, pivot, st.FirstValue(d))))) {
+    result = PivotResult::kIsFirst;
+    if (VQSORT_PRINT >= 2) {
+      fprintf(stderr, "Pivot equals first possible value\n");
+    }
+    return pivot;
+  }
+  if (HWY_UNLIKELY(AllTrue(d, st.EqualKeys(d, pivot, st.LastValue(d))))) {
+    if (VQSORT_PRINT >= 2) {
+      fprintf(stderr, "Pivot equals last possible value\n");
+    }
+    result = PivotResult::kWasLast;
+    return st.PrevValue(d, pivot);
+  }
+
+  // If key-value, we didn't run PartitionIfTwo* and thus `third` is unknown and
+  // cannot be used.
+  if (st.IsKV()) {
+    // If true, pivot is either middle or last.
+    const bool before = !AllFalse(d, st.Compare(d, second, pivot));
+    if (HWY_UNLIKELY(before)) {
+      // Not last, so middle.
+      if (HWY_UNLIKELY(ExistsAnyAfter(d, st, keys, num, pivot))) {
+        result = PivotResult::kNormal;
+        return pivot;
+      }
+
+      // We didn't find anything after pivot, so it is the last. Because keys
+      // equal to the pivot go to the left partition, the right partition would
+      // be empty and Partition will not have changed anything. Instead use the
+      // previous value in sort order, which is not necessarily an actual key.
+      result = PivotResult::kWasLast;
+      return st.PrevValue(d, pivot);
+    }
+
+    // Otherwise, pivot is first or middle. Rule out it being first:
+    if (HWY_UNLIKELY(ExistsAnyBefore(d, st, keys, num, pivot))) {
+      result = PivotResult::kNormal;
+      return pivot;
+    }
+    // It is first: fall through to shared code below.
+  } else {
+    // Check if pivot is between two known values. If so, it is not the first
+    // nor the last and we can avoid scanning.
+    st.Sort2(d, second, third);
+    HWY_DASSERT(AllTrue(d, st.Compare(d, second, third)));
+    const bool before = !AllFalse(d, st.Compare(d, second, pivot));
+    const bool after = !AllFalse(d, st.Compare(d, pivot, third));
+    // Only reached if there are three keys, which means pivot is either first,
+    // last, or in between. Thus there is another key that comes before or
+    // after.
+    HWY_DASSERT(before || after);
+    if (HWY_UNLIKELY(before)) {
+      // Neither first nor last.
+      if (HWY_UNLIKELY(after || ExistsAnyAfter(d, st, keys, num, pivot))) {
+        result = PivotResult::kNormal;
+        return pivot;
+      }
+
+      // We didn't find anything after pivot, so it is the last. Because keys
+      // equal to the pivot go to the left partition, the right partition would
+      // be empty and Partition will not have changed anything. Instead use the
+      // previous value in sort order, which is not necessarily an actual key.
+      result = PivotResult::kWasLast;
+      return st.PrevValue(d, pivot);
+    }
+
+    // Has after, and we found one before: in the middle.
+    if (HWY_UNLIKELY(ExistsAnyBefore(d, st, keys, num, pivot))) {
+      result = PivotResult::kNormal;
+      return pivot;
+    }
+  }
+
+  // Pivot is first. We could consider a special partition mode that only
+  // reads from and writes to the right side, and later fills in the left
+  // side, which we know is equal to the pivot. However, that leads to more
+  // cache misses if the array is large, and doesn't save much, hence is a
+  // net loss.
+  result = PivotResult::kIsFirst;
+  return pivot;
+}
+
+// ------------------------------ Quicksort recursion
+
+template <class D, class Traits, typename T>
+HWY_NOINLINE void PrintMinMax(D d, Traits st, const T* HWY_RESTRICT keys,
+                              size_t num, T* HWY_RESTRICT buf) {
+  if (VQSORT_PRINT >= 2) {
+    const size_t N = Lanes(d);
+    if (num < N) return;
+
+    Vec<D> first = st.LastValue(d);
+    Vec<D> last = st.FirstValue(d);
+
+    size_t i = 0;
+    for (; i + N <= num; i += N) {
+      const Vec<D> v = LoadU(d, keys + i);
+      first = st.First(d, v, first);
+      last = st.Last(d, v, last);
+    }
+    if (HWY_LIKELY(i != num)) {
+      HWY_DASSERT(num >= N);  // See HandleSpecialCases
+      const Vec<D> v = LoadU(d, keys + num - N);
+      first = st.First(d, v, first);
+      last = st.Last(d, v, last);
+    }
+
+    first = st.FirstOfLanes(d, first, buf);
+    last = st.LastOfLanes(d, last, buf);
+    MaybePrintVector(d, "first", first, 0, st.LanesPerKey());
+    MaybePrintVector(d, "last", last, 0, st.LanesPerKey());
+  }
+}
+
+template <class D, class Traits, typename T>
+HWY_NOINLINE void Recurse(D d, Traits st, T* HWY_RESTRICT keys,
+                          const size_t num, T* HWY_RESTRICT buf,
+                          uint64_t* HWY_RESTRICT state,
+                          const size_t remaining_levels) {
+  HWY_DASSERT(num != 0);
+
+  const size_t N = Lanes(d);
+  constexpr size_t kLPK = st.LanesPerKey();
+  if (HWY_UNLIKELY(num <= Constants::BaseCaseNumLanes<kLPK>(N))) {
+    BaseCase(d, st, keys, num, buf);
+    return;
+  }
+
+  // Move after BaseCase so we skip printing for small subarrays.
+  if (VQSORT_PRINT >= 1) {
+    fprintf(stderr, "\n\n=== Recurse depth=%zu len=%zu\n", remaining_levels,
+            num);
+    PrintMinMax(d, st, keys, num, buf);
+  }
+
+  DrawSamples(d, st, keys, num, buf, state);
+
+  Vec<D> pivot;
+  PivotResult result = PivotResult::kNormal;
+  if (HWY_UNLIKELY(UnsortedSampleEqual(d, st, buf))) {
+    pivot = st.SetKey(d, buf);
+    size_t idx_second = 0;
+    if (HWY_UNLIKELY(AllEqual(d, st, pivot, keys, num, &idx_second))) {
+      return;
+    }
+    HWY_DASSERT(idx_second % st.LanesPerKey() == 0);
+    // Must capture the value before PartitionIfTwoKeys may overwrite it.
+    const Vec<D> second = st.SetKey(d, keys + idx_second);
+    MaybePrintVector(d, "pivot", pivot, 0, st.LanesPerKey());
+    MaybePrintVector(d, "second", second, 0, st.LanesPerKey());
+
+    Vec<D> third;
+    // Not supported for key-value types because two 'keys' may be equivalent
+    // but not interchangeable (their values may differ).
+    if (HWY_UNLIKELY(!st.IsKV() &&
+                     PartitionIfTwoKeys(d, st, pivot, keys, num, idx_second,
+                                        second, third, buf))) {
+      return;  // Done, skip recursion because each side has all-equal keys.
+    }
+
+    // We can no longer start scanning from idx_second because
+    // PartitionIfTwoKeys may have reordered keys.
+    pivot = ChoosePivotForEqualSamples(d, st, keys, num, buf, second, third,
+                                       result);
+    // If kNormal, `pivot` is very common but not the first/last. It is
+    // tempting to do a 3-way partition (to avoid moving the =pivot keys a
+    // second time), but that is a net loss due to the extra comparisons.
+  } else {
+    SortSamples(d, st, buf);
+
+    // Not supported for key-value types because two 'keys' may be equivalent
+    // but not interchangeable (their values may differ).
+    if (HWY_UNLIKELY(!st.IsKV() &&
+                     PartitionIfTwoSamples(d, st, keys, num, buf))) {
+      return;
+    }
+
+    pivot = ChoosePivotByRank(d, st, buf);
+  }
+
+  // Too many recursions. This is unlikely to happen because we select pivots
+  // from large (though still O(1)) samples.
+  if (HWY_UNLIKELY(remaining_levels == 0)) {
+    if (VQSORT_PRINT >= 1) {
+      fprintf(stderr, "HeapSort reached, size=%zu\n", num);
+    }
+    HeapSort(st, keys, num);  // Slow but N*logN.
+    return;
+  }
+
+  const size_t bound = Partition(d, st, keys, num, pivot, buf);
+  if (VQSORT_PRINT >= 2) {
+    fprintf(stderr, "bound %zu num %zu result %s\n", bound, num,
+            PivotResultString(result));
+  }
+  // The left partition is not empty because the pivot is usually one of the
+  // keys. Exception: if kWasLast, we set pivot to PrevValue(pivot), but we
+  // still have at least one value <= pivot because AllEqual ruled out the case
+  // of only one unique value. Note that for floating-point, PrevValue can
+  // return the same value (for -inf inputs), but that would just mean the
+  // pivot is again one of the keys.
+  HWY_DASSERT(bound != 0);
+  // ChoosePivot* ensure pivot != last, so the right partition is never empty
+  // except in the rare case of the pivot matching the last-in-sort-order value,
+  // which implies we anyway skip the right partition due to kWasLast.
+  HWY_DASSERT(bound != num || result == PivotResult::kWasLast);
+
+  if (HWY_LIKELY(result != PivotResult::kIsFirst)) {
+    Recurse(d, st, keys, bound, buf, state, remaining_levels - 1);
+  }
+  if (HWY_LIKELY(result != PivotResult::kWasLast)) {
+    Recurse(d, st, keys + bound, num - bound, buf, state, remaining_levels - 1);
+  }
+}
+
+// Returns true if sorting is finished.
+template <class D, class Traits, typename T>
+HWY_INLINE bool HandleSpecialCases(D d, Traits st, T* HWY_RESTRICT keys,
+                                   size_t num, T* HWY_RESTRICT buf) {
+  const size_t N = Lanes(d);
+  constexpr size_t kLPK = st.LanesPerKey();
+  const size_t base_case_num = Constants::BaseCaseNumLanes<kLPK>(N);
+
+  // Recurse will also check this, but doing so here first avoids setting up
+  // the random generator state.
+  if (HWY_UNLIKELY(num <= base_case_num)) {
+    if (VQSORT_PRINT >= 1) {
+      fprintf(stderr, "Special-casing small, %d lanes\n",
+              static_cast<int>(num));
+    }
+    BaseCase(d, st, keys, num, buf);
+    return true;
+  }
+
+  // 128-bit keys require vectors with at least two u64 lanes, which is always
+  // the case unless `d` requests partial vectors (e.g. fraction = 1/2) AND the
+  // hardware vector width is less than 128bit / fraction.
+  const bool partial_128 = !IsFull(d) && N < 2 && st.Is128();
+  // Partition assumes its input is at least two vectors. If vectors are huge,
+  // base_case_num may actually be smaller. If so, which is only possible on
+  // RVV, pass a capped or partial d (LMUL < 1). Use HWY_MAX_BYTES instead of
+  // HWY_LANES to account for the largest possible LMUL.
+  constexpr bool kPotentiallyHuge =
+      HWY_MAX_BYTES / sizeof(T) > Constants::kMaxRows * Constants::kMaxCols;
+  const bool huge_vec = kPotentiallyHuge && (2 * N > base_case_num);
+  if (partial_128 || huge_vec) {
+    if (VQSORT_PRINT >= 1) {
+      fprintf(stderr, "WARNING: using slow HeapSort: partial %d huge %d\n",
+              partial_128, huge_vec);
+    }
+    HeapSort(st, keys, num);
+    return true;
+  }
+
+  // We could also check for already sorted/reverse/equal, but that's probably
+  // counterproductive if vqsort is used as a base case.
+
+  return false;  // not finished sorting
+}
+
+#endif  // VQSORT_ENABLED
+
+template <class D, class Traits, typename T, HWY_IF_FLOAT(T)>
+HWY_INLINE size_t CountAndReplaceNaN(D d, Traits st, T* HWY_RESTRICT keys,
+                                     size_t num) {
+  const size_t N = Lanes(d);
+  // Will be sorted to the back of the array.
+  const Vec<D> sentinel = st.LastValue(d);
+  size_t num_nan = 0;
+  size_t i = 0;
+  for (; i + N <= num; i += N) {
+    const Mask<D> is_nan = IsNaN(LoadU(d, keys + i));
+    BlendedStore(sentinel, is_nan, d, keys + i);
+    num_nan += CountTrue(d, is_nan);
+  }
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  // Proceed one by one.
+  const CappedTag<T, 1> d1;
+  const Vec<decltype(d1)> sentinel1 = Set(d1, GetLane(sentinel));
+  for (; i < num; ++i) {
+    const Mask<decltype(d1)> is_nan = IsNaN(LoadU(d1, keys + i));
+    BlendedStore(sentinel1, is_nan, d1, keys + i);
+    num_nan += CountTrue(d1, is_nan);
+  }
+#else
+  const Mask<D> remaining = FirstN(d, num - i);
+  const Mask<D> is_nan = IsNaN(MaskedLoad(remaining, d, keys + i));
+  BlendedStore(sentinel, is_nan, d, keys + i);
+  num_nan += CountTrue(d, is_nan);
+#endif
+
+  return num_nan;
+}
+
+// IsNaN is not implemented for non-float, so skip it.
+template <class D, class Traits, typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_INLINE size_t CountAndReplaceNaN(D, Traits, T* HWY_RESTRICT, size_t) {
+  return 0;
+}
+
+HWY_INLINE void Fill16BytesStatic(void* bytes) {
+#if !VQSORT_ONLY_STATIC
+  if (Fill16BytesSecure(bytes)) return;
+#endif
+
+  uint64_t* words = reinterpret_cast<uint64_t*>(bytes);
+
+  // Static-only, or Fill16BytesSecure failed. Get some entropy from the
+  // stack/code location, and the clock() timer.
+  uint64_t** seed_stack = &words;
+  void (*seed_code)(void*) = &Fill16BytesStatic;
+  const uintptr_t bits_stack = reinterpret_cast<uintptr_t>(seed_stack);
+  const uintptr_t bits_code = reinterpret_cast<uintptr_t>(seed_code);
+  const uint64_t bits_time = static_cast<uint64_t>(clock());
+  words[0] = bits_stack ^ bits_time ^ 0xFEDCBA98;  // "Nothing up my sleeve"
+  words[1] = bits_code ^ bits_time ^ 0x01234567;   // constants.
+}
+
+HWY_INLINE uint64_t* GetGeneratorStateStatic() {
+  thread_local uint64_t state[3] = {0};
+  // This is a counter; zero indicates not yet initialized.
+  if (HWY_UNLIKELY(state[2] == 0)) {
+    Fill16BytesStatic(state);
+    state[2] = 1;
+  }
+  return state;
+}
+
+}  // namespace detail
+
+// Old interface with user-specified buffer, retained for compatibility. Called
+// by the newer overload below. `buf` must be vector-aligned and hold at least
+// SortConstants::BufBytes(HWY_MAX_BYTES, st.LanesPerKey()).
+template <class D, class Traits, typename T>
+void Sort(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
+          T* HWY_RESTRICT buf) {
+  if (VQSORT_PRINT >= 1) {
+    fprintf(stderr, "=============== Sort num %zu vec bytes %d\n", num,
+            static_cast<int>(sizeof(T) * Lanes(d)));
+  }
+
+#if HWY_MAX_BYTES > 64
+  // sorting_networks-inl and traits assume no more than 512 bit vectors.
+  if (HWY_UNLIKELY(Lanes(d) > 64 / sizeof(T))) {
+    return Sort(CappedTag<T, 64 / sizeof(T)>(), st, keys, num, buf);
+  }
+#endif  // HWY_MAX_BYTES > 64
+
+  const size_t num_nan = detail::CountAndReplaceNaN(d, st, keys, num);
+
+#if VQSORT_ENABLED || HWY_IDE
+  if (!detail::HandleSpecialCases(d, st, keys, num, buf)) {
+    uint64_t* HWY_RESTRICT state = detail::GetGeneratorStateStatic();
+    // Introspection: switch to worst-case N*logN heapsort after this many.
+    // Should never be reached, so computing log2 exactly does not help.
+    const size_t max_levels = 50;
+    detail::Recurse(d, st, keys, num, buf, state, max_levels);
+  }
+#else   // !VQSORT_ENABLED
+  (void)d;
+  (void)buf;
+  if (VQSORT_PRINT >= 1) {
+    fprintf(stderr, "WARNING: using slow HeapSort because vqsort disabled\n");
+  }
+  detail::HeapSort(st, keys, num);
+#endif  // VQSORT_ENABLED
+
+  if (num_nan != 0) {
+    Fill(d, GetLane(NaN(d)), num_nan, keys + num - num_nan);
+  }
+}
+
+// Sorts `keys[0..num-1]` according to the order defined by `st.Compare`.
+// In-place i.e. O(1) additional storage. Worst-case N*logN comparisons.
+// Non-stable (order of equal keys may change), except for the common case where
+// the upper bits of T are the key, and the lower bits are a sequential or at
+// least unique ID. Any NaN will be moved to the back of the array and replaced
+// with the canonical NaN(d).
+// There is no upper limit on `num`, but note that pivots may be chosen by
+// sampling only from the first 256 GiB.
+//
+// `d` is typically SortTag<T> (chooses between full and partial vectors).
+// `st` is SharedTraits<Traits*<Order*>>. This abstraction layer bridges
+//   differences in sort order and single-lane vs 128-bit keys.
+template <class D, class Traits, typename T>
+HWY_API void Sort(D d, Traits st, T* HWY_RESTRICT keys, size_t num) {
+  constexpr size_t kLPK = st.LanesPerKey();
+  HWY_ALIGN T buf[SortConstants::BufBytes<T, kLPK>(HWY_MAX_BYTES) / sizeof(T)];
+  return Sort(d, st, keys, num, buf);
+}
+
+#if VQSORT_ENABLED
+// Adapter from VQSort[Static] to SortTag and Traits*/Order*.
+namespace detail {
+
+// Primary template for built-in key types
+template <typename T>
+struct KeyAdapter {
+  using Ascending = OrderAscending<T>;
+  using Descending = OrderDescending<T>;
+
+  template <class Order>
+  using Traits = TraitsLane<Order>;
+};
+
+template <>
+struct KeyAdapter<hwy::uint128_t> {
+  using Ascending = OrderAscending128;
+  using Descending = OrderDescending128;
+
+  template <class Order>
+  using Traits = Traits128<Order>;
+};
+
+template <>
+struct KeyAdapter<hwy::K64V64> {
+  using Ascending = OrderAscendingKV128;
+  using Descending = OrderDescendingKV128;
+
+  template <class Order>
+  using Traits = Traits128<Order>;
+};
+
+template <>
+struct KeyAdapter<hwy::K32V32> {
+  using Ascending = OrderAscendingKV64;
+  using Descending = OrderDescendingKV64;
+
+  template <class Order>
+  using Traits = TraitsLane<Order>;
+};
+
+}  // namespace detail
+#endif  // VQSORT_ENABLED
+
+// Simpler interface matching VQSort(), but without dynamic dispatch. Uses the
+// instructions available in the current target (HWY_NAMESPACE). Supported key
+// types: 16-64 bit unsigned/signed/floating-point (but float64 only #if
+// HWY_HAVE_FLOAT64), uint128_t, K64V64, K32V32.
+template <typename T>
+void VQSortStatic(T* HWY_RESTRICT keys, size_t num, SortAscending) {
+#if VQSORT_ENABLED
+  using Adapter = detail::KeyAdapter<T>;
+  using Order = typename Adapter::Ascending;
+  const detail::SharedTraits<typename Adapter::template Traits<Order>> st;
+  using LaneType = typename decltype(st)::LaneType;
+  const SortTag<LaneType> d;
+  Sort(d, st, reinterpret_cast<LaneType*>(keys), num * st.LanesPerKey());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif  // VQSORT_ENABLED
+}
+
+template <typename T>
+void VQSortStatic(T* HWY_RESTRICT keys, size_t num, SortDescending) {
+#if VQSORT_ENABLED
+  using Adapter = detail::KeyAdapter<T>;
+  using Order = typename Adapter::Descending;
+  const detail::SharedTraits<typename Adapter::template Traits<Order>> st;
+  using LaneType = typename decltype(st)::LaneType;
+  const SortTag<LaneType> d;
+  Sort(d, st, reinterpret_cast<LaneType*>(keys), num * st.LanesPerKey());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif  // VQSORT_ENABLED
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_VQSORT_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.cc
new file mode 100644
index 0000000000..9d938360fb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.cc
@@ -0,0 +1,218 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"
+
+#include "hwy/base.h"
+#include "hwy/contrib/sort/vqsort-inl.h"
+#include "hwy/per_target.h"
+
+// Check if we have getrandom from <sys/random.h>. Because <features.h> is
+// unavailable on Android and non-Linux RVV, we assume that those systems lack
+// getrandom. Note that the only supported sources of entropy are getrandom or
+// Windows, thus VQSORT_SECURE_SEED=0 when this is 0 and we are not on Windows.
+#if defined(ANDROID) || defined(__ANDROID__) || (HWY_ARCH_RVV && !HWY_OS_LINUX)
+#define VQSORT_GETRANDOM 0
+#endif
+
+#if !defined(VQSORT_GETRANDOM) && HWY_OS_LINUX
+#include <features.h>
+
+// ---- which libc
+#if defined(__UCLIBC__)
+#define VQSORT_GETRANDOM 1  // added Mar 2015, before uclibc-ng 1.0
+
+#elif defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 25)
+#define VQSORT_GETRANDOM 1
+#else
+#define VQSORT_GETRANDOM 0
+#endif
+
+#else
+// Assume MUSL, which has getrandom since 2018. There is no macro to test, see
+// https://www.openwall.com/lists/musl/2013/03/29/13.
+#define VQSORT_GETRANDOM 1
+
+#endif  // ---- which libc
+#endif  // linux
+
+#if !defined(VQSORT_GETRANDOM)
+#define VQSORT_GETRANDOM 0
+#endif
+
+// Choose a seed source for SFC generator: 1=getrandom, 2=CryptGenRandom.
+// Allow user override - not all Android support the getrandom wrapper.
+#ifndef VQSORT_SECURE_SEED
+
+#if VQSORT_GETRANDOM
+#define VQSORT_SECURE_SEED 1
+#elif defined(_WIN32) || defined(_WIN64)
+#define VQSORT_SECURE_SEED 2
+#else
+#define VQSORT_SECURE_SEED 0
+#endif
+
+#endif  // VQSORT_SECURE_SEED
+
+// Pull in dependencies of the chosen seed source.
+#if VQSORT_SECURE_SEED == 1
+#include <sys/random.h>
+#elif VQSORT_SECURE_SEED == 2
+#include <windows.h>
+#if HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL
+#pragma comment(lib, "advapi32.lib")
+#endif  // HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL
+// Must come after windows.h.
+#include <wincrypt.h>
+#endif  // VQSORT_SECURE_SEED
+
+namespace hwy {
+
+// Returns false or performs the equivalent of `memcpy(bytes, r, 16)`, where r
+// is high-quality (unpredictable, uniformly distributed) random bits.
+bool Fill16BytesSecure(void* bytes) {
+#if VQSORT_SECURE_SEED == 1
+  // May block if urandom is not yet initialized.
+  const ssize_t ret = getrandom(bytes, 16, /*flags=*/0);
+  if (ret == 16) return true;
+#elif VQSORT_SECURE_SEED == 2
+  HCRYPTPROV hProvider{};
+  if (CryptAcquireContextA(&hProvider, nullptr, nullptr, PROV_RSA_FULL,
+                           CRYPT_VERIFYCONTEXT)) {
+    const BOOL ok =
+        CryptGenRandom(hProvider, 16, reinterpret_cast<BYTE*>(bytes));
+    CryptReleaseContext(hProvider, 0);
+    if (ok) return true;
+  }
+#else
+  (void)bytes;
+#endif
+
+  return false;
+}
+
+void Sorter::operator()(uint16_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint16_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint32_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint32_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint64_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint64_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+void Sorter::operator()(int16_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(int16_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(int32_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(int32_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(int64_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(int64_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+void Sorter::operator()(float16_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(float16_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(float* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(float* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(double* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(double* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+void Sorter::operator()(uint128_t* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(uint128_t* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+void Sorter::operator()(K64V64* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(K64V64* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+void Sorter::operator()(K32V32* HWY_RESTRICT keys, size_t n,
+                        SortAscending tag) const {
+  VQSort(keys, n, tag);
+}
+void Sorter::operator()(K32V32* HWY_RESTRICT keys, size_t n,
+                        SortDescending tag) const {
+  VQSort(keys, n, tag);
+}
+
+// Unused, only for ABI compatibility
+void Sorter::Fill24Bytes(const void*, size_t, void*) {}
+bool Sorter::HaveFloat64() { return hwy::HaveFloat64(); }
+Sorter::Sorter() {}
+void Sorter::Delete() {}
+uint64_t* GetGeneratorState() {
+  return HWY_STATIC_DISPATCH(detail::GetGeneratorStateStatic());
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.h
new file mode 100644
index 0000000000..857e4b8260
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort.h
@@ -0,0 +1,172 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Interface to vectorized quicksort with dynamic dispatch. For static dispatch
+// without any DLLEXPORT, avoid including this header and instead define
+// VQSORT_ONLY_STATIC, then call VQSortStatic* in vqsort-inl.h.
+//
+// Blog post: https://tinyurl.com/vqsort-blog
+// Paper with measurements: https://arxiv.org/abs/2205.05982
+//
+// To ensure the overhead of using wide vectors (e.g. AVX2 or AVX-512) is
+// worthwhile, we recommend using this code for sorting arrays whose size is at
+// least 100 KiB. See the README for details.
+
+#ifndef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
+#define HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
+
+#include "hwy/base.h"
+#include "hwy/contrib/sort/order.h"  // SortAscending
+
+namespace hwy {
+
+// Vectorized Quicksort: sorts keys[0, n). Does not preserve the ordering of
+// equivalent keys (defined as: neither greater nor less than another).
+// Dispatches to the best available instruction set. Does not allocate memory.
+// Uses about 1.2 KiB stack plus an internal 3-word TLS cache for random state.
+HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+
+// These two must only be called if hwy::HaveFloat16() is true.
+HWY_CONTRIB_DLLEXPORT void VQSort(float16_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(float16_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+
+HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+
+// These two must only be called if hwy::HaveFloat64() is true.
+HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+
+HWY_CONTRIB_DLLEXPORT void VQSort(uint128_t* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(uint128_t* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(K64V64* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(K64V64* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+HWY_CONTRIB_DLLEXPORT void VQSort(K32V32* HWY_RESTRICT keys, size_t n,
+                                  SortAscending);
+HWY_CONTRIB_DLLEXPORT void VQSort(K32V32* HWY_RESTRICT keys, size_t n,
+                                  SortDescending);
+
+// User-level caching is no longer required, so this class is no longer
+// beneficial. We recommend using the simpler VQSort() interface instead, and
+// retain this class only for compatibility. It now just calls VQSort.
+class HWY_CONTRIB_DLLEXPORT Sorter {
+ public:
+  Sorter();
+  ~Sorter() { Delete(); }
+
+  // Move-only
+  Sorter(const Sorter&) = delete;
+  Sorter& operator=(const Sorter&) = delete;
+  Sorter(Sorter&& /*other*/) {}
+  Sorter& operator=(Sorter&& /*other*/) { return *this; }
+
+  void operator()(uint16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(uint16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+  void operator()(uint32_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(uint32_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+  void operator()(uint64_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(uint64_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  void operator()(int16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(int16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+  void operator()(int32_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(int32_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+  void operator()(int64_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(int64_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  // These two must only be called if hwy::HaveFloat16() is true.
+  void operator()(float16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(float16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  void operator()(float* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(float* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  // These two must only be called if hwy::HaveFloat64() is true.
+  void operator()(double* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(double* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  void operator()(uint128_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(uint128_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  void operator()(K64V64* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(K64V64* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  void operator()(K32V32* HWY_RESTRICT keys, size_t n, SortAscending) const;
+  void operator()(K32V32* HWY_RESTRICT keys, size_t n, SortDescending) const;
+
+  // Unused
+  static void Fill24Bytes(const void*, size_t, void*);
+  static bool HaveFloat64();  // Can also use hwy::HaveFloat64 directly.
+
+ private:
+  void Delete();
+
+  template <typename T>
+  T* Get() const {
+    return unused_;
+  }
+
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS(push)
+  HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wunused-private-field")
+#endif
+  void* unused_ = nullptr;
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS(pop)
+#endif
+};
+
+// Used by vqsort-inl unless VQSORT_ONLY_STATIC.
+HWY_CONTRIB_DLLEXPORT bool Fill16BytesSecure(void* bytes);
+
+// Unused, only provided for binary compatibility.
+HWY_CONTRIB_DLLEXPORT uint64_t* GetGeneratorState();
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128a.cc
new file mode 100644
index 0000000000..fbee0cb2a3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_128a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void Sort128Asc(uint128_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(Sort128Asc);
+}  // namespace
+
+void VQSort(uint128_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(Sort128Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128d.cc
new file mode 100644
index 0000000000..ec61bdf146
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_128d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_128d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void Sort128Desc(uint128_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(Sort128Desc);
+}  // namespace
+
+void VQSort(uint128_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(Sort128Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16a.cc
new file mode 100644
index 0000000000..a93e547d51
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16a.cc
@@ -0,0 +1,55 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF16Asc(float16_t* HWY_RESTRICT keys, size_t num) {
+#if HWY_HAVE_FLOAT16
+  return VQSortStatic(keys, num, SortAscending());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF16Asc);
+}  // namespace
+
+void VQSort(float16_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortF16Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16d.cc
new file mode 100644
index 0000000000..4a29c66c6f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f16d.cc
@@ -0,0 +1,55 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF16Desc(float16_t* HWY_RESTRICT keys, size_t num) {
+#if HWY_HAVE_FLOAT16
+  return VQSortStatic(keys, num, SortDescending());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF16Desc);
+}  // namespace
+
+void VQSort(float16_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortF16Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32a.cc
new file mode 100644
index 0000000000..b25b0b307c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f32a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF32Asc(float* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF32Asc);
+}  // namespace
+
+void VQSort(float* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortF32Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32d.cc
new file mode 100644
index 0000000000..3d87f3d1e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f32d.cc
@@ -0,0 +1,50 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f32d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/traits-inl.h"
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF32Desc(float* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF32Desc);
+}  // namespace
+
+void VQSort(float* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortF32Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64a.cc
new file mode 100644
index 0000000000..af29794853
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64a.cc
@@ -0,0 +1,55 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f64a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF64Asc(double* HWY_RESTRICT keys, size_t num) {
+#if HWY_HAVE_FLOAT64
+  return VQSortStatic(keys, num, SortAscending());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF64Asc);
+}  // namespace
+
+void VQSort(double* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortF64Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64d.cc
new file mode 100644
index 0000000000..a389348eea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_f64d.cc
@@ -0,0 +1,55 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f64d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortF64Desc(double* HWY_RESTRICT keys, size_t num) {
+#if HWY_HAVE_FLOAT64
+  return VQSortStatic(keys, num, SortDescending());
+#else
+  (void)keys;
+  (void)num;
+  HWY_ASSERT(0);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortF64Desc);
+}  // namespace
+
+void VQSort(double* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortF64Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16a.cc
new file mode 100644
index 0000000000..aff3ca7854
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i16a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI16Asc(int16_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI16Asc);
+}  // namespace
+
+void VQSort(int16_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortI16Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16d.cc
new file mode 100644
index 0000000000..f857e020b2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i16d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i16d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI16Desc(int16_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI16Desc);
+}  // namespace
+
+void VQSort(int16_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortI16Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32a.cc
new file mode 100644
index 0000000000..2943ed17f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i32a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI32Asc(int32_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI32Asc);
+}  // namespace
+
+void VQSort(int32_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortI32Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32d.cc
new file mode 100644
index 0000000000..e9e8be8626
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i32d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i32d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI32Desc(int32_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI32Desc);
+}  // namespace
+
+void VQSort(int32_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortI32Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64a.cc
new file mode 100644
index 0000000000..890d4b89d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i64a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI64Asc(int64_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI64Asc);
+}  // namespace
+
+void VQSort(int64_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortI64Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64d.cc
new file mode 100644
index 0000000000..7fe7c16091
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_i64d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i64d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortI64Desc(int64_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortI64Desc);
+}  // namespace
+
+void VQSort(int64_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortI64Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128a.cc
new file mode 100644
index 0000000000..d9dc293dd1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128a.cc
@@ -0,0 +1,52 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+// clang-format off
+// (avoid line break, which would prevent Copybara rules from matching)
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv128a.cc"  //NOLINT
+// clang-format on
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortKV128Asc(K64V64* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortKV128Asc);
+}  // namespace
+
+void VQSort(K64V64* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortKV128Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128d.cc
new file mode 100644
index 0000000000..36da35ab35
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv128d.cc
@@ -0,0 +1,52 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+// clang-format off
+// (avoid line break, which would prevent Copybara rules from matching)
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv128d.cc"  //NOLINT
+// clang-format on
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortKV128Desc(K64V64* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortKV128Desc);
+}  // namespace
+
+void VQSort(K64V64* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortKV128Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64a.cc
new file mode 100644
index 0000000000..7c79af9c18
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64a.cc
@@ -0,0 +1,52 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+// clang-format off
+// (avoid line break, which would prevent Copybara rules from matching)
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv64a.cc"  //NOLINT
+// clang-format on
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortKV64Asc(K32V32* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortKV64Asc);
+}  // namespace
+
+void VQSort(K32V32* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortKV64Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64d.cc
new file mode 100644
index 0000000000..21a64c01ca
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_kv64d.cc
@@ -0,0 +1,52 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+// clang-format off
+// (avoid line break, which would prevent Copybara rules from matching)
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv64d.cc"  //NOLINT
+// clang-format on
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortKV64Desc(K32V32* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortKV64Desc);
+}  // namespace
+
+void VQSort(K32V32* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortKV64Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16a.cc
new file mode 100644
index 0000000000..194a9f89f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u16a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU16Asc(uint16_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU16Asc);
+}  // namespace
+
+void VQSort(uint16_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortU16Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16d.cc
new file mode 100644
index 0000000000..06ebca3db4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u16d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u16d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU16Desc(uint16_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU16Desc);
+}  // namespace
+
+void VQSort(uint16_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortU16Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32a.cc
new file mode 100644
index 0000000000..2fa7471b8f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u32a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU32Asc(uint32_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU32Asc);
+}  // namespace
+
+void VQSort(uint32_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortU32Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32d.cc
new file mode 100644
index 0000000000..697073b456
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u32d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u32d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU32Desc(uint32_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU32Desc);
+}  // namespace
+
+void VQSort(uint32_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortU32Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64a.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64a.cc
new file mode 100644
index 0000000000..fef5f1a2ae
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64a.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u64a.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU64Asc(uint64_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortAscending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU64Asc);
+}  // namespace
+
+void VQSort(uint64_t* HWY_RESTRICT keys, size_t n, SortAscending) {
+  HWY_DYNAMIC_DISPATCH(SortU64Asc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64d.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64d.cc
new file mode 100644
index 0000000000..ba23a13597
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/sort/vqsort_u64d.cc
@@ -0,0 +1,49 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/sort/vqsort.h"  // VQSort
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u64d.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// After foreach_target
+#include "hwy/contrib/sort/vqsort-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+void SortU64Desc(uint64_t* HWY_RESTRICT keys, size_t num) {
+  return VQSortStatic(keys, num, SortDescending());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(SortU64Desc);
+}  // namespace
+
+void VQSort(uint64_t* HWY_RESTRICT keys, size_t n, SortDescending) {
+  HWY_DYNAMIC_DISPATCH(SortU64Desc)(keys, n);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/README.md b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/README.md
new file mode 100644
index 0000000000..396b122db2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/README.md
@@ -0,0 +1,31 @@
+# Unroller
+
+All contents of the `unroller` folder are experimental and subject to changes.
+
+`Unroller` is a templated function that automatically implements common optimizations that are usually handled by compilers when writing scalar code. Modern CPUs operate much more efficiently when non-dependent calculations are packed into an instruction pipeline. For scalar code, this often means a compiler will take a one-line loop, and compile it down to hundreds of lines of machine code in order to fully capture these efficiencies. 
+
+As of today (2023-07-06), compilers are not nearly as good at implementing these optimizations for code written in SIMD intrinsics. `Unroller` is a templated function that takes in an `UnrollerUnit` of SIMD instructions, and then implements unrolling, reordering, hoisting and tail-handling (URHT optimizations) of arrays of data being processed with SIMD intrinsics. 
+
+### `UnrollerUnit`
+
+`UnrollerUnit` and `UnrollerUnit2D` are a base classes of functions that `Unroller` needs implemented in order to properly handle URHT. `UnrollerUnit` has default implementations for all but the `Func` method, which defines the SIMD operation to be applied. Many examples of how to implement these functions are in the tests. 
+
+### Doubling values of an array example
+
+```
+struct DoubleUnit : UnrollerUnit<DoubleUnit, int, int> {
+  using TT = ScalableTag<int>;
+  inline Vec<TT> Func(ptrdiff_t idx, Vec<TT> x, Vec<TT> y) {
+    TT d;
+    return Mul(x, Set(d, 2));
+  }
+};
+```
+
+Leaving all other methods in their default state, the following code will double all the values in array `a` and place them in `r`
+
+```
+DoubleUnit dblunit;
+int r[N];
+Unroller(dblunit, a, r, N);
+```
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller-inl.h
new file mode 100644
index 0000000000..4ed8c25c07
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller-inl.h
@@ -0,0 +1,470 @@
+// Copyright 2023 Matthew Kolbe
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(HIGHWAY_HWY_CONTRIB_UNROLLER_UNROLLER_INL_H_) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_UNROLLER_UNROLLER_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_UNROLLER_UNROLLER_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_UNROLLER_UNROLLER_INL_H_
+#endif
+
+#include <cstdlib>  // std::abs
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+namespace hn = hwy::HWY_NAMESPACE;
+
+template <class DERIVED, typename IN_T, typename OUT_T>
+struct UnrollerUnit {
+  static constexpr size_t kMaxTSize = HWY_MAX(sizeof(IN_T), sizeof(OUT_T));
+  using LargerT = SignedFromSize<kMaxTSize>;  // only the size matters.
+
+  DERIVED* me() { return static_cast<DERIVED*>(this); }
+
+  static constexpr size_t MaxUnitLanes() {
+    return HWY_MAX_LANES_D(hn::ScalableTag<LargerT>);
+  }
+  static size_t ActualLanes() { return Lanes(hn::ScalableTag<LargerT>()); }
+
+  using LargerD = hn::CappedTag<LargerT, MaxUnitLanes()>;
+  using IT = hn::Rebind<IN_T, LargerD>;
+  using OT = hn::Rebind<OUT_T, LargerD>;
+  IT d_in;
+  OT d_out;
+  using Y_VEC = hn::Vec<OT>;
+  using X_VEC = hn::Vec<IT>;
+
+  Y_VEC Func(const ptrdiff_t idx, const X_VEC x, const Y_VEC y) {
+    return me()->Func(idx, x, y);
+  }
+
+  X_VEC X0Init() { return me()->X0InitImpl(); }
+
+  X_VEC X0InitImpl() { return hn::Zero(d_in); }
+
+  Y_VEC YInit() { return me()->YInitImpl(); }
+
+  Y_VEC YInitImpl() { return hn::Zero(d_out); }
+
+  X_VEC Load(const ptrdiff_t idx, IN_T* from) {
+    return me()->LoadImpl(idx, from);
+  }
+
+  X_VEC LoadImpl(const ptrdiff_t idx, IN_T* from) {
+    return hn::LoadU(d_in, from + idx);
+  }
+
+  // MaskLoad can take in either a positive or negative number for `places`. if
+  // the number is positive, then it loads the top `places` values, and if it's
+  // negative, it loads the bottom |places| values. example: places = 3
+  //      | o | o | o | x | x | x | x | x |
+  // example places = -3
+  //      | x | x | x | x | x | o | o | o |
+  X_VEC MaskLoad(const ptrdiff_t idx, IN_T* from, const ptrdiff_t places) {
+    return me()->MaskLoadImpl(idx, from, places);
+  }
+
+  X_VEC MaskLoadImpl(const ptrdiff_t idx, IN_T* from, const ptrdiff_t places) {
+    auto mask = hn::FirstN(d_in, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d_in,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    return hn::MaskedLoad(mask, d_in, from + idx);
+  }
+
+  bool StoreAndShortCircuit(const ptrdiff_t idx, OUT_T* to, const Y_VEC x) {
+    return me()->StoreAndShortCircuitImpl(idx, to, x);
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, OUT_T* to, const Y_VEC x) {
+    hn::StoreU(x, d_out, to + idx);
+    return true;
+  }
+
+  ptrdiff_t MaskStore(const ptrdiff_t idx, OUT_T* to, const Y_VEC x,
+                      ptrdiff_t const places) {
+    return me()->MaskStoreImpl(idx, to, x, places);
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, OUT_T* to, const Y_VEC x,
+                          const ptrdiff_t places) {
+    auto mask = hn::FirstN(d_out, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d_out,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    hn::BlendedStore(x, mask, d_out, to + idx);
+    return std::abs(places);
+  }
+
+  ptrdiff_t Reduce(const Y_VEC x, OUT_T* to) { return me()->ReduceImpl(x, to); }
+
+  ptrdiff_t ReduceImpl(const Y_VEC x, OUT_T* to) {
+    // default does nothing
+    (void)x;
+    (void)to;
+    return 0;
+  }
+
+  void Reduce(const Y_VEC x0, const Y_VEC x1, const Y_VEC x2, Y_VEC* y) {
+    me()->ReduceImpl(x0, x1, x2, y);
+  }
+
+  void ReduceImpl(const Y_VEC x0, const Y_VEC x1, const Y_VEC x2, Y_VEC* y) {
+    // default does nothing
+    (void)x0;
+    (void)x1;
+    (void)x2;
+    (void)y;
+  }
+};
+
+template <class DERIVED, typename IN0_T, typename IN1_T, typename OUT_T>
+struct UnrollerUnit2D {
+  DERIVED* me() { return static_cast<DERIVED*>(this); }
+
+  static constexpr size_t kMaxTSize =
+      HWY_MAX(sizeof(IN0_T), HWY_MAX(sizeof(IN1_T), sizeof(OUT_T)));
+  using LargerT = SignedFromSize<kMaxTSize>;  // only the size matters.
+
+  static constexpr size_t MaxUnitLanes() {
+    return HWY_MAX_LANES_D(hn::ScalableTag<LargerT>);
+  }
+  static size_t ActualLanes() { return Lanes(hn::ScalableTag<LargerT>()); }
+
+  using LargerD = hn::CappedTag<LargerT, MaxUnitLanes()>;
+
+  using I0T = hn::Rebind<IN0_T, LargerD>;
+  using I1T = hn::Rebind<IN1_T, LargerD>;
+  using OT = hn::Rebind<OUT_T, LargerD>;
+  I0T d_in0;
+  I1T d_in1;
+  OT d_out;
+  using Y_VEC = hn::Vec<OT>;
+  using X0_VEC = hn::Vec<I0T>;
+  using X1_VEC = hn::Vec<I1T>;
+
+  hn::Vec<OT> Func(const ptrdiff_t idx, const hn::Vec<I0T> x0,
+                   const hn::Vec<I1T> x1, const Y_VEC y) {
+    return me()->Func(idx, x0, x1, y);
+  }
+
+  X0_VEC X0Init() { return me()->X0InitImpl(); }
+
+  X0_VEC X0InitImpl() { return hn::Zero(d_in0); }
+
+  X1_VEC X1Init() { return me()->X1InitImpl(); }
+
+  X1_VEC X1InitImpl() { return hn::Zero(d_in1); }
+
+  Y_VEC YInit() { return me()->YInitImpl(); }
+
+  Y_VEC YInitImpl() { return hn::Zero(d_out); }
+
+  X0_VEC Load0(const ptrdiff_t idx, IN0_T* from) {
+    return me()->Load0Impl(idx, from);
+  }
+
+  X0_VEC Load0Impl(const ptrdiff_t idx, IN0_T* from) {
+    return hn::LoadU(d_in0, from + idx);
+  }
+
+  X1_VEC Load1(const ptrdiff_t idx, IN1_T* from) {
+    return me()->Load1Impl(idx, from);
+  }
+
+  X1_VEC Load1Impl(const ptrdiff_t idx, IN1_T* from) {
+    return hn::LoadU(d_in1, from + idx);
+  }
+
+  // maskload can take in either a positive or negative number for `places`. if
+  // the number is positive, then it loads the top `places` values, and if it's
+  // negative, it loads the bottom |places| values. example: places = 3
+  //      | o | o | o | x | x | x | x | x |
+  // example places = -3
+  //      | x | x | x | x | x | o | o | o |
+  X0_VEC MaskLoad0(const ptrdiff_t idx, IN0_T* from, const ptrdiff_t places) {
+    return me()->MaskLoad0Impl(idx, from, places);
+  }
+
+  X0_VEC MaskLoad0Impl(const ptrdiff_t idx, IN0_T* from,
+                       const ptrdiff_t places) {
+    auto mask = hn::FirstN(d_in0, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d_in0,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    return hn::MaskedLoad(mask, d_in0, from + idx);
+  }
+
+  hn::Vec<I1T> MaskLoad1(const ptrdiff_t idx, IN1_T* from,
+                         const ptrdiff_t places) {
+    return me()->MaskLoad1Impl(idx, from, places);
+  }
+
+  hn::Vec<I1T> MaskLoad1Impl(const ptrdiff_t idx, IN1_T* from,
+                             const ptrdiff_t places) {
+    auto mask = hn::FirstN(d_in1, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d_in1,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    return hn::MaskedLoad(mask, d_in1, from + idx);
+  }
+
+  // store returns a bool that is `false` when
+  bool StoreAndShortCircuit(const ptrdiff_t idx, OUT_T* to, const Y_VEC x) {
+    return me()->StoreAndShortCircuitImpl(idx, to, x);
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, OUT_T* to, const Y_VEC x) {
+    hn::StoreU(x, d_out, to + idx);
+    return true;
+  }
+
+  ptrdiff_t MaskStore(const ptrdiff_t idx, OUT_T* to, const Y_VEC x,
+                      const ptrdiff_t places) {
+    return me()->MaskStoreImpl(idx, to, x, places);
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, OUT_T* to, const Y_VEC x,
+                          const ptrdiff_t places) {
+    auto mask = hn::FirstN(d_out, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d_out,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    hn::BlendedStore(x, mask, d_out, to + idx);
+    return std::abs(places);
+  }
+
+  ptrdiff_t Reduce(const Y_VEC x, OUT_T* to) { return me()->ReduceImpl(x, to); }
+
+  ptrdiff_t ReduceImpl(const Y_VEC x, OUT_T* to) {
+    // default does nothing
+    (void)x;
+    (void)to;
+    return 0;
+  }
+
+  void Reduce(const Y_VEC x0, const Y_VEC x1, const Y_VEC x2, Y_VEC* y) {
+    me()->ReduceImpl(x0, x1, x2, y);
+  }
+
+  void ReduceImpl(const Y_VEC x0, const Y_VEC x1, const Y_VEC x2, Y_VEC* y) {
+    // default does nothing
+    (void)x0;
+    (void)x1;
+    (void)x2;
+    (void)y;
+  }
+};
+
+template <class FUNC, typename IN_T, typename OUT_T>
+inline void Unroller(FUNC& f, IN_T* HWY_RESTRICT x, OUT_T* HWY_RESTRICT y,
+                     const ptrdiff_t n) {
+  auto xx = f.X0Init();
+  auto yy = f.YInit();
+  ptrdiff_t i = 0;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  constexpr auto lane_sz =
+      static_cast<ptrdiff_t>(RemoveRef<FUNC>::MaxUnitLanes());
+  if (n < lane_sz) {
+    const DFromV<decltype(yy)> d;
+    // this may not fit on the stack for HWY_RVV, but we do not reach this code
+    // there
+    HWY_ALIGN IN_T xtmp[static_cast<size_t>(lane_sz)];
+    HWY_ALIGN OUT_T ytmp[static_cast<size_t>(lane_sz)];
+
+    CopyBytes(x, xtmp, static_cast<size_t>(n) * sizeof(IN_T));
+    xx = f.MaskLoad(0, xtmp, n);
+    yy = f.Func(0, xx, yy);
+    Store(Zero(d), d, ytmp);
+    i += f.MaskStore(0, ytmp, yy, n);
+    i += f.Reduce(yy, ytmp);
+    CopyBytes(ytmp, y, static_cast<size_t>(i) * sizeof(OUT_T));
+    return;
+  }
+#endif
+
+  const ptrdiff_t actual_lanes =
+      static_cast<ptrdiff_t>(RemoveRef<FUNC>::ActualLanes());
+  if (n > 4 * actual_lanes) {
+    auto xx1 = f.X0Init();
+    auto yy1 = f.YInit();
+    auto xx2 = f.X0Init();
+    auto yy2 = f.YInit();
+    auto xx3 = f.X0Init();
+    auto yy3 = f.YInit();
+
+    while (i + 4 * actual_lanes - 1 < n) {
+      xx = f.Load(i, x);
+      i += actual_lanes;
+      xx1 = f.Load(i, x);
+      i += actual_lanes;
+      xx2 = f.Load(i, x);
+      i += actual_lanes;
+      xx3 = f.Load(i, x);
+      i -= 3 * actual_lanes;
+
+      yy = f.Func(i, xx, yy);
+      yy1 = f.Func(i + actual_lanes, xx1, yy1);
+      yy2 = f.Func(i + 2 * actual_lanes, xx2, yy2);
+      yy3 = f.Func(i + 3 * actual_lanes, xx3, yy3);
+
+      if (!f.StoreAndShortCircuit(i, y, yy)) return;
+      i += actual_lanes;
+      if (!f.StoreAndShortCircuit(i, y, yy1)) return;
+      i += actual_lanes;
+      if (!f.StoreAndShortCircuit(i, y, yy2)) return;
+      i += actual_lanes;
+      if (!f.StoreAndShortCircuit(i, y, yy3)) return;
+      i += actual_lanes;
+    }
+
+    f.Reduce(yy3, yy2, yy1, &yy);
+  }
+
+  while (i + actual_lanes - 1 < n) {
+    xx = f.Load(i, x);
+    yy = f.Func(i, xx, yy);
+    if (!f.StoreAndShortCircuit(i, y, yy)) return;
+    i += actual_lanes;
+  }
+
+  if (i != n) {
+    xx = f.MaskLoad(n - actual_lanes, x, i - n);
+    yy = f.Func(n - actual_lanes, xx, yy);
+    f.MaskStore(n - actual_lanes, y, yy, i - n);
+  }
+
+  f.Reduce(yy, y);
+}
+
+template <class FUNC, typename IN0_T, typename IN1_T, typename OUT_T>
+inline void Unroller(FUNC& HWY_RESTRICT f, IN0_T* HWY_RESTRICT x0,
+                     IN1_T* HWY_RESTRICT x1, OUT_T* HWY_RESTRICT y,
+                     const ptrdiff_t n) {
+  const ptrdiff_t lane_sz =
+      static_cast<ptrdiff_t>(RemoveRef<FUNC>::ActualLanes());
+
+  auto xx00 = f.X0Init();
+  auto xx10 = f.X1Init();
+  auto yy = f.YInit();
+
+  ptrdiff_t i = 0;
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  if (n < lane_sz) {
+    const DFromV<decltype(yy)> d;
+    // this may not fit on the stack for HWY_RVV, but we do not reach this code
+    // there
+    constexpr auto max_lane_sz =
+        static_cast<ptrdiff_t>(RemoveRef<FUNC>::MaxUnitLanes());
+    HWY_ALIGN IN0_T xtmp0[static_cast<size_t>(max_lane_sz)];
+    HWY_ALIGN IN1_T xtmp1[static_cast<size_t>(max_lane_sz)];
+    HWY_ALIGN OUT_T ytmp[static_cast<size_t>(max_lane_sz)];
+
+    CopyBytes(x0, xtmp0, static_cast<size_t>(n) * sizeof(IN0_T));
+    CopyBytes(x1, xtmp1, static_cast<size_t>(n) * sizeof(IN1_T));
+    xx00 = f.MaskLoad0(0, xtmp0, n);
+    xx10 = f.MaskLoad1(0, xtmp1, n);
+    yy = f.Func(0, xx00, xx10, yy);
+    Store(Zero(d), d, ytmp);
+    i += f.MaskStore(0, ytmp, yy, n);
+    i += f.Reduce(yy, ytmp);
+    CopyBytes(ytmp, y, static_cast<size_t>(i) * sizeof(OUT_T));
+    return;
+  }
+#endif
+
+  if (n > 4 * lane_sz) {
+    auto xx01 = f.X0Init();
+    auto xx11 = f.X1Init();
+    auto yy1 = f.YInit();
+    auto xx02 = f.X0Init();
+    auto xx12 = f.X1Init();
+    auto yy2 = f.YInit();
+    auto xx03 = f.X0Init();
+    auto xx13 = f.X1Init();
+    auto yy3 = f.YInit();
+
+    while (i + 4 * lane_sz - 1 < n) {
+      xx00 = f.Load0(i, x0);
+      xx10 = f.Load1(i, x1);
+      i += lane_sz;
+      xx01 = f.Load0(i, x0);
+      xx11 = f.Load1(i, x1);
+      i += lane_sz;
+      xx02 = f.Load0(i, x0);
+      xx12 = f.Load1(i, x1);
+      i += lane_sz;
+      xx03 = f.Load0(i, x0);
+      xx13 = f.Load1(i, x1);
+      i -= 3 * lane_sz;
+
+      yy = f.Func(i, xx00, xx10, yy);
+      yy1 = f.Func(i + lane_sz, xx01, xx11, yy1);
+      yy2 = f.Func(i + 2 * lane_sz, xx02, xx12, yy2);
+      yy3 = f.Func(i + 3 * lane_sz, xx03, xx13, yy3);
+
+      if (!f.StoreAndShortCircuit(i, y, yy)) return;
+      i += lane_sz;
+      if (!f.StoreAndShortCircuit(i, y, yy1)) return;
+      i += lane_sz;
+      if (!f.StoreAndShortCircuit(i, y, yy2)) return;
+      i += lane_sz;
+      if (!f.StoreAndShortCircuit(i, y, yy3)) return;
+      i += lane_sz;
+    }
+
+    f.Reduce(yy3, yy2, yy1, &yy);
+  }
+
+  while (i + lane_sz - 1 < n) {
+    xx00 = f.Load0(i, x0);
+    xx10 = f.Load1(i, x1);
+    yy = f.Func(i, xx00, xx10, yy);
+    if (!f.StoreAndShortCircuit(i, y, yy)) return;
+    i += lane_sz;
+  }
+
+  if (i != n) {
+    xx00 = f.MaskLoad0(n - lane_sz, x0, i - n);
+    xx10 = f.MaskLoad1(n - lane_sz, x1, i - n);
+    yy = f.Func(n - lane_sz, xx00, xx10, yy);
+    f.MaskStore(n - lane_sz, y, yy, i - n);
+  }
+
+  f.Reduce(yy, y);
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_UNROLLER_UNROLLER_INL_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller_test.cc
new file mode 100644
index 0000000000..c3a4d8061b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/contrib/unroller/unroller_test.cc
@@ -0,0 +1,486 @@
+// Copyright Google LLC 2021
+//           Matthew Kolbe 2023
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "hwy/base.h"
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/unroller/unroller_test.cc"  //NOLINT
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/contrib/unroller/unroller-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+T SimpleDot(const T* pa, const T* pb, size_t num) {
+  T sum = 0;
+  for (size_t i = 0; i < num; ++i) {
+    sum += pa[i] * pb[i];
+  }
+  return sum;
+}
+
+template <typename T>
+T SimpleAcc(const T* pa, size_t num) {
+  T sum = 0;
+  for (size_t i = 0; i < num; ++i) {
+    sum += pa[i];
+  }
+  return sum;
+}
+
+template <typename T>
+T SimpleMin(const T* pa, size_t num) {
+  T min = HighestValue<T>();
+  for (size_t i = 0; i < num; ++i) {
+    if (min > pa[i]) min = pa[i];
+  }
+  return min;
+}
+
+template <typename T>
+struct MultiplyUnit : UnrollerUnit2D<MultiplyUnit<T>, T, T, T> {
+  using TT = hn::ScalableTag<T>;
+  HWY_INLINE hn::Vec<TT> Func(ptrdiff_t idx, const hn::Vec<TT> x0,
+                              const hn::Vec<TT> x1, const hn::Vec<TT> y) {
+    (void)idx;
+    (void)y;
+    return hn::Mul(x0, x1);
+  }
+};
+
+template <typename FROM_T, typename TO_T>
+struct ConvertUnit : UnrollerUnit<ConvertUnit<FROM_T, TO_T>, FROM_T, TO_T> {
+  using Base = UnrollerUnit<ConvertUnit<FROM_T, TO_T>, FROM_T, TO_T>;
+  using Base::MaxUnitLanes;
+  using typename Base::LargerD;
+
+  using TT_FROM = hn::Rebind<FROM_T, LargerD>;
+  using TT_TO = hn::Rebind<TO_T, LargerD>;
+
+  template <
+      class ToD, class FromV,
+      hwy::EnableIf<(sizeof(TFromV<FromV>) > sizeof(TFromD<ToD>))>* = nullptr>
+  static HWY_INLINE hn::Vec<ToD> DoConvertVector(ToD d, FromV v) {
+    return hn::DemoteTo(d, v);
+  }
+  template <
+      class ToD, class FromV,
+      hwy::EnableIf<(sizeof(TFromV<FromV>) == sizeof(TFromD<ToD>))>* = nullptr>
+  static HWY_INLINE hn::Vec<ToD> DoConvertVector(ToD d, FromV v) {
+    return hn::ConvertTo(d, v);
+  }
+  template <
+      class ToD, class FromV,
+      hwy::EnableIf<(sizeof(TFromV<FromV>) < sizeof(TFromD<ToD>))>* = nullptr>
+  static HWY_INLINE hn::Vec<ToD> DoConvertVector(ToD d, FromV v) {
+    return hn::PromoteTo(d, v);
+  }
+
+  hn::Vec<TT_TO> Func(ptrdiff_t idx, const hn::Vec<TT_FROM> x,
+                      const hn::Vec<TT_TO> y) {
+    (void)idx;
+    (void)y;
+    TT_TO d;
+    return DoConvertVector(d, x);
+  }
+};
+
+// Caveat: stores lane indices as MakeSigned<T>, which may overflow for 8-bit T
+// on HWY_RVV.
+template <typename T>
+struct FindUnit : UnrollerUnit<FindUnit<T>, T, MakeSigned<T>> {
+  using TI = MakeSigned<T>;
+  using Base = UnrollerUnit<FindUnit<T>, T, TI>;
+  using Base::ActualLanes;
+  using Base::MaxUnitLanes;
+
+  using D = hn::CappedTag<T, MaxUnitLanes()>;
+  T to_find;
+  D d;
+  using DI = RebindToSigned<D>;
+  DI di;
+
+  FindUnit<T>(T find) : to_find(find) {}
+
+  hn::Vec<DI> Func(ptrdiff_t idx, const hn::Vec<D> x, const hn::Vec<DI> y) {
+    const Mask<D> msk = hn::Eq(x, hn::Set(d, to_find));
+    const TI first_idx = static_cast<TI>(hn::FindFirstTrue(d, msk));
+    if (first_idx > -1)
+      return hn::Set(di, static_cast<TI>(static_cast<TI>(idx) + first_idx));
+    else
+      return y;
+  }
+
+  hn::Vec<D> X0InitImpl() {
+    // TODO(janwas): use NaN for float
+    return hn::Set(d, hwy::AddWithWraparound(hwy::IsFloatTag<T>(), to_find, 1));
+  }
+
+  hn::Vec<DI> YInitImpl() { return hn::Set(di, TI{-1}); }
+
+  hn::Vec<D> MaskLoadImpl(const ptrdiff_t idx, T* from,
+                          const ptrdiff_t places) {
+    auto mask = hn::FirstN(d, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+    return hn::IfThenElse(mask, hn::MaskedLoad(mask, d, from + idx),
+                          X0InitImpl());
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, TI* to,
+                                const hn::Vec<DI> x) {
+    (void)idx;
+
+    TI a = hn::GetLane(x);
+    to[0] = a;
+
+    if (a == -1) return true;
+
+    return false;
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, TI* to, const hn::Vec<DI> x,
+                          const ptrdiff_t places) {
+    (void)idx;
+    (void)places;
+    TI a = hn::GetLane(x);
+    to[0] = a;
+    return 1;
+  }
+};
+
+template <typename T>
+struct AccumulateUnit : UnrollerUnit<AccumulateUnit<T>, T, T> {
+  using TT = hn::ScalableTag<T>;
+  hn::Vec<TT> Func(ptrdiff_t idx, const hn::Vec<TT> x, const hn::Vec<TT> y) {
+    (void)idx;
+    return hn::Add(x, y);
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, T* to,
+                                const hn::Vec<TT> x) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    return true;
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, T* to, const hn::Vec<TT> x,
+                          const ptrdiff_t places) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    (void)places;
+    return 0;
+  }
+
+  ptrdiff_t ReduceImpl(const hn::Vec<TT> x, T* to) {
+    const hn::ScalableTag<T> d;
+    (*to) = hn::ReduceSum(d, x);
+    return 1;
+  }
+
+  void ReduceImpl(const hn::Vec<TT> x0, const hn::Vec<TT> x1,
+                  const hn::Vec<TT> x2, hn::Vec<TT>* y) {
+    (*y) = hn::Add(hn::Add(*y, x0), hn::Add(x1, x2));
+  }
+};
+
+template <typename T>
+struct MinUnit : UnrollerUnit<MinUnit<T>, T, T> {
+  using Base = UnrollerUnit<MinUnit<T>, T, T>;
+  using Base::ActualLanes;
+
+  using TT = hn::ScalableTag<T>;
+  TT d;
+
+  hn::Vec<TT> Func(const ptrdiff_t idx, const hn::Vec<TT> x,
+                   const hn::Vec<TT> y) {
+    (void)idx;
+    return hn::Min(y, x);
+  }
+
+  hn::Vec<TT> YInitImpl() { return hn::Set(d, HighestValue<T>()); }
+
+  hn::Vec<TT> MaskLoadImpl(const ptrdiff_t idx, T* from,
+                           const ptrdiff_t places) {
+    auto mask = hn::FirstN(d, static_cast<size_t>(places));
+    auto maskneg = hn::Not(hn::FirstN(
+        d,
+        static_cast<size_t>(places + static_cast<ptrdiff_t>(ActualLanes()))));
+    if (places < 0) mask = maskneg;
+
+    auto def = YInitImpl();
+    return hn::MaskedLoadOr(def, mask, d, from + idx);
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, T* to,
+                                const hn::Vec<TT> x) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    return true;
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, T* to, const hn::Vec<TT> x,
+                          const ptrdiff_t places) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    (void)places;
+    return 0;
+  }
+
+  ptrdiff_t ReduceImpl(const hn::Vec<TT> x, T* to) {
+    const hn::ScalableTag<T> d;
+    auto minvect = hn::MinOfLanes(d, x);
+    (*to) = hn::ExtractLane(minvect, 0);
+    return 1;
+  }
+
+  void ReduceImpl(const hn::Vec<TT> x0, const hn::Vec<TT> x1,
+                  const hn::Vec<TT> x2, hn::Vec<TT>* y) {
+    auto a = hn::Min(x1, x0);
+    auto b = hn::Min(*y, x2);
+    (*y) = hn::Min(a, b);
+  }
+};
+
+template <typename T>
+struct DotUnit : UnrollerUnit2D<DotUnit<T>, T, T, T> {
+  using TT = hn::ScalableTag<T>;
+
+  hn::Vec<TT> Func(const ptrdiff_t idx, const hn::Vec<TT> x0,
+                   const hn::Vec<TT> x1, const hn::Vec<TT> y) {
+    (void)idx;
+    return hn::MulAdd(x0, x1, y);
+  }
+
+  bool StoreAndShortCircuitImpl(const ptrdiff_t idx, T* to,
+                                const hn::Vec<TT> x) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    return true;
+  }
+
+  ptrdiff_t MaskStoreImpl(const ptrdiff_t idx, T* to, const hn::Vec<TT> x,
+                          const ptrdiff_t places) {
+    // no stores in a reducer
+    (void)idx;
+    (void)to;
+    (void)x;
+    (void)places;
+    return 0;
+  }
+
+  ptrdiff_t ReduceImpl(const hn::Vec<TT> x, T* to) {
+    const hn::ScalableTag<T> d;
+    (*to) = hn::ReduceSum(d, x);
+    return 1;
+  }
+
+  void ReduceImpl(const hn::Vec<TT> x0, const hn::Vec<TT> x1,
+                  const hn::Vec<TT> x2, hn::Vec<TT>* y) {
+    (*y) = hn::Add(hn::Add(*y, x0), hn::Add(x1, x2));
+  }
+};
+
+template <typename T>
+void SetValue(const float value, T* HWY_RESTRICT ptr) {
+  *ptr = static_cast<T>(value);
+}
+
+template <class D>
+std::vector<size_t> Counts(D d) {
+  const size_t N = Lanes(d);
+  return std::vector<size_t>{1,
+                             3,
+                             7,
+                             16,
+                             HWY_MAX(N / 2, 1),
+                             HWY_MAX(2 * N / 3, 1),
+                             N,
+                             N + 1,
+                             4 * N / 3,
+                             3 * N,
+                             8 * N,
+                             8 * N + 2,
+                             256 * N - 1,
+                             256 * N};
+}
+
+struct TestDot {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // TODO(janwas): avoid internal compiler error
+#if HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || HWY_COMPILER_MSVC
+    (void)d;
+#else
+    RandomState rng;
+    const auto random_t = [&rng]() {
+      const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
+      return static_cast<float>(bits - 512) * (1.0f / 64);
+    };
+
+    for (size_t num : Counts(d)) {
+      AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(num);
+      AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(num);
+      AlignedFreeUniquePtr<T[]> py = AllocateAligned<T>(num);
+
+      HWY_ASSERT(pa && pb && py);
+      T* a = pa.get();
+      T* b = pb.get();
+      T* y = py.get();
+
+      size_t i = 0;
+      for (; i < num; ++i) {
+        SetValue(random_t(), a + i);
+        SetValue(random_t(), b + i);
+      }
+
+      auto expected_dot = SimpleDot(a, b, num);
+      MultiplyUnit<T> multfn;
+      Unroller(multfn, a, b, y, static_cast<ptrdiff_t>(num));
+      AccumulateUnit<T> accfn;
+      T dot_via_mul_acc;
+      Unroller(accfn, y, &dot_via_mul_acc, static_cast<ptrdiff_t>(num));
+      // Cast because std::abs does not support _Float16.
+      const T tolerance =
+          T{32} * hwy::Epsilon<T>() *
+          static_cast<T>(std::abs(static_cast<double>(expected_dot)));
+      HWY_ASSERT(static_cast<T>(std::abs(static_cast<double>(
+                     expected_dot - dot_via_mul_acc))) < tolerance);
+
+      DotUnit<T> dotfn;
+      T dotr;
+      Unroller(dotfn, a, b, &dotr, static_cast<ptrdiff_t>(num));
+      HWY_ASSERT(static_cast<T>(std::abs(
+                     static_cast<double>(expected_dot - dotr))) < tolerance);
+
+      auto expected_min = SimpleMin(a, num);
+      MinUnit<T> minfn;
+      T minr;
+      Unroller(minfn, a, &minr, static_cast<ptrdiff_t>(num));
+
+      HWY_ASSERT(std::abs(static_cast<double>(expected_min - minr)) < 1e-7);
+    }
+#endif
+  }
+};
+
+struct TestConvert {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // TODO(janwas): avoid internal compiler error
+#if HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || HWY_COMPILER_MSVC
+    (void)d;
+#else
+    for (size_t num : Counts(d)) {
+      AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(num);
+      AlignedFreeUniquePtr<int[]> pto = AllocateAligned<int>(num);
+      HWY_ASSERT(pa && pto);
+      T* HWY_RESTRICT a = pa.get();
+      int* HWY_RESTRICT to = pto.get();
+
+      for (size_t i = 0; i < num; ++i) {
+        a[i] = static_cast<T>(static_cast<double>(i) * 0.25);
+      }
+
+      ConvertUnit<T, int> cvtfn;
+      Unroller(cvtfn, a, to, static_cast<ptrdiff_t>(num));
+      for (size_t i = 0; i < num; ++i) {
+        fprintf(stderr, "%zu of %zu size %zu %f -> %d\n", i, num, sizeof(T),
+                a[i], to[i]);
+        // TODO(janwas): RVV QEMU fcvt_rtz appears to 'truncate' 4.75 to 5.
+        HWY_ASSERT(
+            static_cast<int>(a[i]) == to[i] ||
+            (HWY_TARGET == HWY_RVV && static_cast<int>(a[i]) == to[i] - 1));
+      }
+
+      ConvertUnit<int, T> cvtbackfn;
+      Unroller(cvtbackfn, to, a, static_cast<ptrdiff_t>(num));
+      for (size_t i = 0; i < num; ++i) {
+        fprintf(stderr, "%zu of %zu size %zu %d -> %f\n", i, num, sizeof(T),
+                to[i], a[i]);
+        HWY_ASSERT_EQ(static_cast<T>(to[i]), a[i]);
+      }
+    }
+#endif
+  }
+};
+
+struct TestFind {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    for (size_t num : Counts(d)) {
+      AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(num);
+      HWY_ASSERT(pa);
+      T* a = pa.get();
+
+      for (size_t i = 0; i < num; ++i) a[i] = (T)i;
+
+      FindUnit<T> cvtfn((T)(num - 1));
+      MakeSigned<T> idx = 0;
+      Unroller(cvtfn, a, &idx, static_cast<ptrdiff_t>(num));
+      HWY_ASSERT(static_cast<MakeUnsigned<T>>(idx) < num);
+      HWY_ASSERT(a[idx] == (T)(num - 1));
+
+      FindUnit<T> cvtfnzero((T)(0));
+      Unroller(cvtfnzero, a, &idx, static_cast<ptrdiff_t>(num));
+      HWY_ASSERT(static_cast<MakeUnsigned<T>>(idx) < num);
+      HWY_ASSERT(a[idx] == (T)(0));
+
+      FindUnit<T> cvtfnnotin((T)(num));
+      Unroller(cvtfnnotin, a, &idx, static_cast<ptrdiff_t>(num));
+      HWY_ASSERT(idx == -1);
+    }
+  }
+};
+
+void TestAllDot() { ForFloatTypes(ForPartialVectors<TestDot>()); }
+void TestAllConvert() { ForFloat3264Types(ForPartialVectors<TestConvert>()); }
+void TestAllFind() { ForFloatTypes(ForPartialVectors<TestFind>()); }
+
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(UnrollerTest);
+HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllDot);
+HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllConvert);
+HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllFind);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/detect_compiler_arch.h b/third-party/libjxl/libjxl/third_party/highway/hwy/detect_compiler_arch.h
new file mode 100644
index 0000000000..081b6fffb3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/detect_compiler_arch.h
@@ -0,0 +1,281 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_DETECT_COMPILER_ARCH_H_
+#define HIGHWAY_HWY_DETECT_COMPILER_ARCH_H_
+
+// Detects compiler and arch from predefined macros. Zero dependencies for
+// inclusion by foreach_target.h.
+
+// Add to #if conditions to prevent IDE from graying out code.
+#if (defined __CDT_PARSER__) || (defined __INTELLISENSE__) || \
+    (defined Q_CREATOR_RUN) || (defined __CLANGD__) ||        \
+    (defined GROK_ELLIPSIS_BUILD)
+#define HWY_IDE 1
+#else
+#define HWY_IDE 0
+#endif
+
+//------------------------------------------------------------------------------
+// Compiler
+
+// Actual MSVC, not clang-cl, which defines _MSC_VER but doesn't behave like
+// MSVC in other aspects (e.g. HWY_DIAGNOSTICS).
+#if defined(_MSC_VER) && !defined(__clang__)
+#define HWY_COMPILER_MSVC _MSC_VER
+#else
+#define HWY_COMPILER_MSVC 0
+#endif
+
+#if defined(_MSC_VER) && defined(__clang__)
+#define HWY_COMPILER_CLANGCL _MSC_VER
+#else
+#define HWY_COMPILER_CLANGCL 0
+#endif
+
+#ifdef __INTEL_COMPILER
+#define HWY_COMPILER_ICC __INTEL_COMPILER
+#else
+#define HWY_COMPILER_ICC 0
+#endif
+
+#ifdef __INTEL_LLVM_COMPILER
+#define HWY_COMPILER_ICX __INTEL_LLVM_COMPILER
+#else
+#define HWY_COMPILER_ICX 0
+#endif
+
+// HWY_COMPILER_GCC is a generic macro for all compilers implementing the GNU
+// compiler extensions (eg. Clang, Intel...)
+#ifdef __GNUC__
+#define HWY_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define HWY_COMPILER_GCC 0
+#endif
+
+// Clang or clang-cl, not GCC.
+#ifdef __clang__
+// In case of Apple LLVM (whose version number is unrelated to that of LLVM) or
+// an invalid version number, deduce it from the presence of warnings.
+// Originally based on
+// https://github.com/simd-everywhere/simde/blob/47d6e603de9d04ee05cdfbc57cf282a02be1bf2a/simde/simde-detect-clang.h#L59.
+// Please send updates below to them as well, thanks!
+#if defined(__apple_build_version__) || __clang_major__ >= 999
+#if __has_attribute(nouwtable)  // no new warnings in 16.0
+#define HWY_COMPILER_CLANG 1600
+#elif __has_warning("-Warray-parameter")
+#define HWY_COMPILER_CLANG 1500
+#elif __has_warning("-Wbitwise-instead-of-logical")
+#define HWY_COMPILER_CLANG 1400
+#elif __has_warning("-Wreserved-identifier")
+#define HWY_COMPILER_CLANG 1300
+#elif __has_warning("-Wformat-insufficient-args")
+#define HWY_COMPILER_CLANG 1200
+#elif __has_warning("-Wimplicit-const-int-float-conversion")
+#define HWY_COMPILER_CLANG 1100
+#elif __has_warning("-Wmisleading-indentation")
+#define HWY_COMPILER_CLANG 1000
+#elif defined(__FILE_NAME__)
+#define HWY_COMPILER_CLANG 900
+#elif __has_warning("-Wextra-semi-stmt") || \
+    __has_builtin(__builtin_rotateleft32)
+#define HWY_COMPILER_CLANG 800
+// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently
+// based on Clang 7, but does not support the warning we test.
+// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and
+// https://trac.macports.org/wiki/XcodeVersionInfo.
+#elif __has_warning("-Wc++98-compat-extra-semi") || \
+    (defined(__apple_build_version__) && __apple_build_version__ >= 10010000)
+#define HWY_COMPILER_CLANG 700
+#else  // Anything older than 7.0 is not recommended for Highway.
+#define HWY_COMPILER_CLANG 600
+#endif  // __has_warning chain
+#define HWY_COMPILER3_CLANG (HWY_COMPILER_CLANG * 100)
+#else  // use normal version
+#define HWY_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
+#define HWY_COMPILER3_CLANG \
+  (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#endif
+#else  // Not clang
+#define HWY_COMPILER_CLANG 0
+#define HWY_COMPILER3_CLANG 0
+#endif
+
+#if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG && !HWY_COMPILER_ICC
+#define HWY_COMPILER_GCC_ACTUAL HWY_COMPILER_GCC
+#else
+#define HWY_COMPILER_GCC_ACTUAL 0
+#endif
+
+// More than one may be nonzero, but we want at least one.
+#if 0 == (HWY_COMPILER_MSVC + HWY_COMPILER_CLANGCL + HWY_COMPILER_ICC + \
+          HWY_COMPILER_GCC + HWY_COMPILER_CLANG)
+#error "Unsupported compiler"
+#endif
+
+// We should only detect one of these (only clang/clangcl overlap)
+#if 1 <                                                                     \
+    (!!HWY_COMPILER_MSVC + !!HWY_COMPILER_ICC + !!HWY_COMPILER_GCC_ACTUAL + \
+     !!(HWY_COMPILER_CLANGCL | HWY_COMPILER_CLANG))
+#error "Detected multiple compilers"
+#endif
+
+#ifdef __has_builtin
+#define HWY_HAS_BUILTIN(name) __has_builtin(name)
+#else
+#define HWY_HAS_BUILTIN(name) 0
+#endif
+
+#ifdef __has_attribute
+#define HWY_HAS_ATTRIBUTE(name) __has_attribute(name)
+#else
+#define HWY_HAS_ATTRIBUTE(name) 0
+#endif
+
+#ifdef __has_cpp_attribute
+#define HWY_HAS_CPP_ATTRIBUTE(name) __has_cpp_attribute(name)
+#else
+#define HWY_HAS_CPP_ATTRIBUTE(name) 0
+#endif
+
+#ifdef __has_feature
+#define HWY_HAS_FEATURE(name) __has_feature(name)
+#else
+#define HWY_HAS_FEATURE(name) 0
+#endif
+
+//------------------------------------------------------------------------------
+// Architecture
+
+#if defined(__i386__) || defined(_M_IX86)
+#define HWY_ARCH_X86_32 1
+#else
+#define HWY_ARCH_X86_32 0
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define HWY_ARCH_X86_64 1
+#else
+#define HWY_ARCH_X86_64 0
+#endif
+
+#if HWY_ARCH_X86_32 && HWY_ARCH_X86_64
+#error "Cannot have both x86-32 and x86-64"
+#endif
+
+#if HWY_ARCH_X86_32 || HWY_ARCH_X86_64
+#define HWY_ARCH_X86 1
+#else
+#define HWY_ARCH_X86 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC) || defined(__powerpc__)
+#define HWY_ARCH_PPC 1
+#else
+#define HWY_ARCH_PPC 0
+#endif
+
+// aarch32 is currently not supported; please raise an issue if you want it.
+#if defined(__ARM_ARCH_ISA_A64) || defined(__aarch64__) || defined(_M_ARM64)
+#define HWY_ARCH_ARM_A64 1
+#else
+#define HWY_ARCH_ARM_A64 0
+#endif
+
+#if (defined(__ARM_ARCH) && __ARM_ARCH == 7) || (defined(_M_ARM) && _M_ARM == 7)
+#define HWY_ARCH_ARM_V7 1
+#else
+#define HWY_ARCH_ARM_V7 0
+#endif
+
+#if HWY_ARCH_ARM_A64 && HWY_ARCH_ARM_V7
+#error "Cannot have both A64 and V7"
+#endif
+
+// Any *supported* version of Arm, i.e. 7 or later
+#if HWY_ARCH_ARM_A64 || HWY_ARCH_ARM_V7
+#define HWY_ARCH_ARM 1
+#else
+#define HWY_ARCH_ARM 0
+#endif
+
+// Older than Armv7 (e.g. armel aka Armv5) => we do not support SIMD.
+#if (defined(__arm__) || defined(_M_ARM)) && !HWY_ARCH_ARM
+#define HWY_ARCH_ARM_OLD 1
+#else
+#define HWY_ARCH_ARM_OLD 0
+#endif
+
+#if defined(__EMSCRIPTEN__) || defined(__wasm__) || defined(__WASM__)
+#define HWY_ARCH_WASM 1
+#else
+#define HWY_ARCH_WASM 0
+#endif
+
+#ifdef __riscv
+#define HWY_ARCH_RVV 1
+#else
+#define HWY_ARCH_RVV 0
+#endif
+
+// It is an error to detect multiple architectures at the same time, but OK to
+// detect none of the above.
+#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
+     HWY_ARCH_WASM + HWY_ARCH_RVV) > 1
+#error "Must not detect more than one architecture"
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+#define HWY_OS_WIN 1
+#else
+#define HWY_OS_WIN 0
+#endif
+
+#if defined(linux) || defined(__linux__)
+#define HWY_OS_LINUX 1
+#else
+#define HWY_OS_LINUX 0
+#endif
+
+//------------------------------------------------------------------------------
+// Endianness
+
+#if HWY_COMPILER_MSVC
+#if HWY_ARCH_PPC && defined(_XBOX_VER) && _XBOX_VER >= 200
+// XBox 360 is big-endian
+#define HWY_IS_LITTLE_ENDIAN 0
+#define HWY_IS_BIG_ENDIAN 1
+#else
+// All other targets supported by MSVC are little-endian
+#define HWY_IS_LITTLE_ENDIAN 1
+#define HWY_IS_BIG_ENDIAN 0
+#endif  // HWY_ARCH_PPC && defined(_XBOX_VER) && _XBOX_VER >= 200
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+    __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HWY_IS_LITTLE_ENDIAN 1
+#define HWY_IS_BIG_ENDIAN 0
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+    __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HWY_IS_LITTLE_ENDIAN 0
+#define HWY_IS_BIG_ENDIAN 1
+#else
+#error "Unable to detect endianness or unsupported byte order"
+#endif
+
+#if (HWY_IS_LITTLE_ENDIAN + HWY_IS_BIG_ENDIAN) != 1
+#error "Must only detect one byte order"
+#endif
+
+#endif  // HIGHWAY_HWY_DETECT_COMPILER_ARCH_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/detect_targets.h b/third-party/libjxl/libjxl/third_party/highway/hwy/detect_targets.h
new file mode 100644
index 0000000000..c99fc27754
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/detect_targets.h
@@ -0,0 +1,644 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_DETECT_TARGETS_H_
+#define HIGHWAY_HWY_DETECT_TARGETS_H_
+
+// Defines targets and chooses which to enable.
+
+#include "hwy/detect_compiler_arch.h"
+
+//------------------------------------------------------------------------------
+// Optional configuration
+
+// See g3doc/quick_reference.md for documentation of these macros.
+
+// Uncomment to override the default baseline determined from predefined macros:
+// #define HWY_BASELINE_TARGETS (HWY_SSE4 | HWY_SCALAR)
+
+// Uncomment to override the default blocklist:
+// #define HWY_BROKEN_TARGETS HWY_AVX3
+
+// Uncomment to definitely avoid generating those target(s):
+// #define HWY_DISABLED_TARGETS HWY_SSE4
+
+// Uncomment to avoid emitting BMI/BMI2/FMA instructions (allows generating
+// AVX2 target for VMs which support AVX2 but not the other instruction sets)
+// #define HWY_DISABLE_BMI2_FMA
+
+// Uncomment to enable these on MSVC even if the predefined macros are not set.
+// #define HWY_WANT_SSE2 1
+// #define HWY_WANT_SSSE3 1
+// #define HWY_WANT_SSE4 1
+
+//------------------------------------------------------------------------------
+// Targets
+
+// Unique bit value for each target. A lower value is "better" (e.g. more lanes)
+// than a higher value within the same group/platform - see HWY_STATIC_TARGET.
+//
+// All values are unconditionally defined so we can test HWY_TARGETS without
+// first checking the HWY_ARCH_*.
+//
+// The C99 preprocessor evaluates #if expressions using intmax_t types. This
+// holds at least 64 bits in practice (verified 2022-07-18 via Godbolt on
+// 32-bit clang/GCC/MSVC compilers for x86/Arm7/AArch32/RISC-V/WASM). We now
+// avoid overflow when computing HWY_TARGETS (subtracting one instead of
+// left-shifting 2^62), but still do not use bit 63 because it is the sign bit.
+
+// --------------------------- x86: 15 targets (+ one fallback)
+// Bits 0..3 reserved (4 targets)
+#define HWY_AVX3_SPR (1LL << 4)
+// Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
+// Currently HWY_AVX3_DL plus a special case for CompressStore (10x as fast).
+// We may later also use VPCONFLICT.
+#define HWY_AVX3_ZEN4 (1LL << 6)  // see HWY_WANT_AVX3_ZEN4 below
+
+// Currently satisfiable by Ice Lake (VNNI, VPCLMULQDQ, VPOPCNTDQ, VBMI, VBMI2,
+// VAES, BITALG, GFNI). Later to be added: BF16 (Cooper Lake). VP2INTERSECT is
+// only in Tiger Lake?
+#define HWY_AVX3_DL (1LL << 7)  // see HWY_WANT_AVX3_DL below
+#define HWY_AVX3 (1LL << 8)     // HWY_AVX2 plus AVX-512F/BW/CD/DQ/VL
+#define HWY_AVX2 (1LL << 9)     // HWY_SSE4 plus BMI2 + F16 + FMA
+// Bit 10: reserved
+#define HWY_SSE4 (1LL << 11)   // SSE4.2 plus AES + CLMUL
+#define HWY_SSSE3 (1LL << 12)  // S-SSE3
+// Bit 13: reserved for SSE3
+#define HWY_SSE2 (1LL << 14)
+// The highest bit in the HWY_TARGETS mask that a x86 target can have. Used for
+// dynamic dispatch. All x86 target bits must be lower or equal to
+// (1 << HWY_HIGHEST_TARGET_BIT_X86) and they can only use
+// HWY_MAX_DYNAMIC_TARGETS in total.
+#define HWY_HIGHEST_TARGET_BIT_X86 14
+
+// --------------------------- Arm: 15 targets (+ one fallback)
+// Bits 15..23 reserved (9 targets)
+#define HWY_SVE2_128 (1LL << 24)  // specialized target (e.g. Arm N2)
+#define HWY_SVE_256 (1LL << 25)   // specialized target (e.g. Arm V1)
+#define HWY_SVE2 (1LL << 26)
+#define HWY_SVE (1LL << 27)
+#define HWY_NEON (1LL << 28)  // Implies support for AES
+#define HWY_NEON_WITHOUT_AES (1LL << 29)
+#define HWY_HIGHEST_TARGET_BIT_ARM 29
+
+// --------------------------- RISC-V: 9 targets (+ one fallback)
+// Bits 30..36 reserved (7 targets)
+#define HWY_RVV (1LL << 37)
+// Bit 38 reserved
+#define HWY_HIGHEST_TARGET_BIT_RVV 38
+
+// --------------------------- Future expansion: 4 targets
+// Bits 39..42 reserved
+
+// --------------------------- IBM Power: 9 targets (+ one fallback)
+// Bits 43..46 reserved (4 targets)
+#define HWY_PPC10 (1LL << 47)  // v3.1
+#define HWY_PPC9 (1LL << 48)   // v3.0
+#define HWY_PPC8 (1LL << 49)   // v2.07
+// Bits 50..51 reserved for prior VSX/AltiVec (2 targets)
+#define HWY_HIGHEST_TARGET_BIT_PPC 51
+
+// --------------------------- WebAssembly: 9 targets (+ one fallback)
+// Bits 52..57 reserved (6 targets)
+#define HWY_WASM_EMU256 (1LL << 58)  // Experimental
+#define HWY_WASM (1LL << 59)
+// Bits 60 reserved
+#define HWY_HIGHEST_TARGET_BIT_WASM 60
+
+// --------------------------- Emulation: 2 targets
+
+#define HWY_EMU128 (1LL << 61)
+// We do not add/left-shift, so this will not overflow to a negative number.
+#define HWY_SCALAR (1LL << 62)
+#define HWY_HIGHEST_TARGET_BIT_SCALAR 62
+
+// Do not use bit 63 - would be confusing to have negative numbers.
+
+//------------------------------------------------------------------------------
+// Set default blocklists
+
+// Disabled means excluded from enabled at user's request. A separate config
+// macro allows disabling without deactivating the blocklist below.
+#ifndef HWY_DISABLED_TARGETS
+#define HWY_DISABLED_TARGETS 0
+#endif
+
+// Broken means excluded from enabled due to known compiler issues. We define
+// separate HWY_BROKEN_* and then OR them together (more than one might apply).
+
+// x86 clang-6: we saw multiple AVX2/3 compile errors and in one case invalid
+// SSE4 codegen (possibly only for msan), so disable all those targets.
+#if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
+
+#define HWY_BROKEN_CLANG6 (HWY_SSE4 | (HWY_SSE4 - 1))
+// This entails a major speed reduction, so warn unless the user explicitly
+// opts in to scalar-only.
+#if !defined(HWY_COMPILE_ONLY_SCALAR)
+#pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.")
+#endif
+
+#else
+#define HWY_BROKEN_CLANG6 0
+#endif
+
+// 32-bit may fail to compile AVX2/3.
+#if HWY_ARCH_X86_32
+#define HWY_BROKEN_32BIT (HWY_AVX2 | (HWY_AVX2 - 1))
+#else
+#define HWY_BROKEN_32BIT 0
+#endif
+
+// MSVC AVX3 support is buggy: https://github.com/Mysticial/Flops/issues/16
+#if HWY_COMPILER_MSVC != 0
+#define HWY_BROKEN_MSVC (HWY_AVX3 | (HWY_AVX3 - 1))
+#else
+#define HWY_BROKEN_MSVC 0
+#endif
+
+// AVX3_DL and AVX3_ZEN4 require clang >= 7 (ensured above), gcc >= 8.1 or ICC
+// 2021.
+#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 801) || \
+    (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
+#define HWY_BROKEN_AVX3_DL_ZEN4 (HWY_AVX3_DL | HWY_AVX3_ZEN4)
+#else
+#define HWY_BROKEN_AVX3_DL_ZEN4 0
+#endif
+
+// AVX3_SPR requires clang >= 14, gcc >= 12, or ICC 2021.
+#if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1400) ||      \
+    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200) || \
+    (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
+#define HWY_BROKEN_AVX3_SPR (HWY_AVX3_SPR)
+#else
+#define HWY_BROKEN_AVX3_SPR 0
+#endif
+
+// armv7be has not been tested and is not yet supported.
+#if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
+#define HWY_BROKEN_ARM7_BIG_ENDIAN (HWY_NEON | HWY_NEON_WITHOUT_AES)
+#else
+#define HWY_BROKEN_ARM7_BIG_ENDIAN 0
+#endif
+
+// armv7-a without a detected vfpv4 is not supported
+// (for example Cortex-A8, Cortex-A9)
+// vfpv4 always have neon half-float _and_ FMA.
+#if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
+    !defined(__ARM_VFPV4__) &&                        \
+    !((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
+#define HWY_BROKEN_ARM7_WITHOUT_VFP4 (HWY_NEON | HWY_NEON_WITHOUT_AES)
+#else
+#define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
+#endif
+
+// SVE[2] require recent clang or gcc versions.
+#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
+    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
+#define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
+#else
+#define HWY_BROKEN_SVE 0
+#endif
+
+#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1100)
+// GCC 10 supports the -mcpu=power10 option but does not support the PPC10
+// vector intrinsics
+#define HWY_BROKEN_PPC10 (HWY_PPC10)
+#elif HWY_ARCH_PPC && HWY_IS_BIG_ENDIAN &&                                   \
+    ((HWY_COMPILER3_CLANG && HWY_COMPILER3_CLANG < 160001) ||                \
+     (HWY_COMPILER_GCC_ACTUAL >= 1200 && HWY_COMPILER_GCC_ACTUAL <= 1203) || \
+     (HWY_COMPILER_GCC_ACTUAL >= 1300 && HWY_COMPILER_GCC_ACTUAL <= 1301))
+// GCC 12.0 through 12.3 and GCC 13.0 through 13.1 have a compiler bug where the
+// vsldoi instruction is sometimes incorrectly optimized out (and this causes
+// some of the Highway unit tests to fail on big-endian PPC10). Details about
+// this compiler bug can be found at
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109069, and this bug will be
+// fixed in the upcoming GCC 12.4 and 13.2 releases.
+
+// Clang 16.0.0 and earlier (but not Clang 16.0.1 and later) have a compiler
+// bug in the LLVM DAGCombiner that causes a zero-extend followed by an
+// element insert into a vector, followed by a vector shuffle to be incorrectly
+// optimized on big-endian PPC (and which caused some of the Highway unit tests
+// to fail on big-endian PPC10).
+
+// Details about this bug, which has already been fixed in Clang 16.0.1 and
+// later, can be found at https://github.com/llvm/llvm-project/issues/61315.
+#define HWY_BROKEN_PPC10 (HWY_PPC10)
+#else
+#define HWY_BROKEN_PPC10 0
+#endif
+
+// Allow the user to override this without any guarantee of success.
+#ifndef HWY_BROKEN_TARGETS
+
+#define HWY_BROKEN_TARGETS                                     \
+  (HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC |    \
+   HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR |             \
+   HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
+   HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
+
+#endif  // HWY_BROKEN_TARGETS
+
+// Enabled means not disabled nor blocklisted.
+#define HWY_ENABLED(targets) \
+  ((targets) & ~((HWY_DISABLED_TARGETS) | (HWY_BROKEN_TARGETS)))
+
+// Opt-out for EMU128 (affected by a GCC bug on multiple arches, fixed in 12.3:
+// see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106322). This is separate
+// from HWY_BROKEN_TARGETS because it affects the fallback target, which must
+// always be enabled. If 1, we instead choose HWY_SCALAR even without
+// HWY_COMPILE_ONLY_SCALAR being set.
+#if !defined(HWY_BROKEN_EMU128)  // allow overriding
+#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1203) || \
+    defined(HWY_NO_LIBCXX)
+#define HWY_BROKEN_EMU128 1
+#else
+#define HWY_BROKEN_EMU128 0
+#endif
+#endif  // HWY_BROKEN_EMU128
+
+//------------------------------------------------------------------------------
+// Detect baseline targets using predefined macros
+
+// Baseline means the targets for which the compiler is allowed to generate
+// instructions, implying the target CPU would have to support them. This does
+// not take the blocklist into account.
+
+#if defined(HWY_COMPILE_ONLY_SCALAR) || HWY_BROKEN_EMU128
+#define HWY_BASELINE_SCALAR HWY_SCALAR
+#else
+#define HWY_BASELINE_SCALAR HWY_EMU128
+#endif
+
+// Also check HWY_ARCH to ensure that simulating unknown platforms ends up with
+// HWY_TARGET == HWY_BASELINE_SCALAR.
+
+#if HWY_ARCH_WASM && defined(__wasm_simd128__)
+#if defined(HWY_WANT_WASM2)
+#define HWY_BASELINE_WASM HWY_WASM_EMU256
+#else
+#define HWY_BASELINE_WASM HWY_WASM
+#endif  // HWY_WANT_WASM2
+#else
+#define HWY_BASELINE_WASM 0
+#endif
+
+// GCC or Clang.
+#if HWY_ARCH_PPC && HWY_COMPILER_GCC && defined(__ALTIVEC__) && \
+    defined(__VSX__) && defined(__POWER8_VECTOR__) &&           \
+    (defined(__CRYPTO__) || defined(HWY_DISABLE_PPC8_CRYPTO))
+#define HWY_BASELINE_PPC8 HWY_PPC8
+#else
+#define HWY_BASELINE_PPC8 0
+#endif
+
+#if HWY_BASELINE_PPC8 != 0 && defined(__POWER9_VECTOR__)
+#define HWY_BASELINE_PPC9 HWY_PPC9
+#else
+#define HWY_BASELINE_PPC9 0
+#endif
+
+#if HWY_BASELINE_PPC9 != 0 && \
+    (defined(_ARCH_PWR10) || defined(__POWER10_VECTOR__))
+#define HWY_BASELINE_PPC10 HWY_PPC10
+#else
+#define HWY_BASELINE_PPC10 0
+#endif
+
+#define HWY_BASELINE_SVE2 0
+#define HWY_BASELINE_SVE 0
+#define HWY_BASELINE_NEON 0
+
+#if HWY_ARCH_ARM
+
+#if defined(__ARM_FEATURE_SVE2)
+#undef HWY_BASELINE_SVE2  // was 0, will be re-defined
+// If user specified -msve-vector-bits=128, they assert the vector length is
+// 128 bits and we should use the HWY_SVE2_128 (more efficient for some ops).
+#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 128
+#define HWY_BASELINE_SVE2 HWY_SVE2_128
+// Otherwise we're not sure what the vector length will be. The baseline must be
+// unconditionally valid, so we can only assume HWY_SVE2. However, when running
+// on a CPU with 128-bit vectors, user code that supports dynamic dispatch will
+// still benefit from HWY_SVE2_128 because we add it to HWY_ATTAINABLE_TARGETS.
+#else
+#define HWY_BASELINE_SVE2 HWY_SVE2
+#endif  // __ARM_FEATURE_SVE_BITS
+#endif  // __ARM_FEATURE_SVE2
+
+#if defined(__ARM_FEATURE_SVE)
+#undef HWY_BASELINE_SVE  // was 0, will be re-defined
+// See above. If user-specified vector length matches our optimization, use it.
+#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
+#define HWY_BASELINE_SVE HWY_SVE_256
+#else
+#define HWY_BASELINE_SVE HWY_SVE
+#endif  // __ARM_FEATURE_SVE_BITS
+#endif  // __ARM_FEATURE_SVE
+
+// GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#undef HWY_BASELINE_NEON
+#if defined(__ARM_FEATURE_AES)
+#define HWY_BASELINE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
+#else
+#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
+#endif
+#endif
+
+#endif  // HWY_ARCH_ARM
+
+// Special handling for MSVC because it has fewer predefined macros:
+#if HWY_COMPILER_MSVC
+
+#if HWY_ARCH_X86_32
+#if _M_IX86_FP >= 2
+#define HWY_CHECK_SSE2 1
+#else
+#define HWY_CHECK_SSE2 0
+#endif
+#elif HWY_ARCH_X86_64
+#define HWY_CHECK_SSE2 1
+#else
+#define HWY_CHECK_SSE2 0
+#endif
+
+// 1) We can only be sure SSSE3/SSE4 are enabled if AVX is:
+//    https://stackoverflow.com/questions/18563978/.
+#if defined(__AVX__)
+#define HWY_CHECK_SSSE3 1
+#define HWY_CHECK_SSE4 1
+#else
+#define HWY_CHECK_SSSE3 0
+#define HWY_CHECK_SSE4 0
+#endif
+
+// 2) Cannot check for PCLMUL/AES and BMI2/FMA/F16C individually; we assume
+//    PCLMUL/AES are available if SSE4 is, and BMI2/FMA/F16C if AVX2 is.
+#define HWY_CHECK_PCLMUL_AES 1
+#define HWY_CHECK_BMI2_FMA 1
+#define HWY_CHECK_F16C 1
+
+#else  // non-MSVC
+
+#if defined(__SSE2__)
+#define HWY_CHECK_SSE2 1
+#else
+#define HWY_CHECK_SSE2 0
+#endif
+
+#if defined(__SSSE3__)
+#define HWY_CHECK_SSSE3 1
+#else
+#define HWY_CHECK_SSSE3 0
+#endif
+
+#if defined(__SSE4_1__) && defined(__SSE4_2__)
+#define HWY_CHECK_SSE4 1
+#else
+#define HWY_CHECK_SSE4 0
+#endif
+
+// If these are disabled, they should not gate the availability of SSE4/AVX2.
+#if defined(HWY_DISABLE_PCLMUL_AES) || (defined(__PCLMUL__) && defined(__AES__))
+#define HWY_CHECK_PCLMUL_AES 1
+#else
+#define HWY_CHECK_PCLMUL_AES 0
+#endif
+
+#if defined(HWY_DISABLE_BMI2_FMA) || (defined(__BMI2__) && defined(__FMA__))
+#define HWY_CHECK_BMI2_FMA 1
+#else
+#define HWY_CHECK_BMI2_FMA 0
+#endif
+
+#if defined(HWY_DISABLE_F16C) || defined(__F16C__)
+#define HWY_CHECK_F16C 1
+#else
+#define HWY_CHECK_F16C 0
+#endif
+
+#endif  // non-MSVC
+
+#if HWY_ARCH_X86 && (HWY_WANT_SSE2 || HWY_CHECK_SSE2)
+#define HWY_BASELINE_SSE2 HWY_SSE2
+#else
+#define HWY_BASELINE_SSE2 0
+#endif
+
+#if HWY_ARCH_X86 && (HWY_WANT_SSSE3 || HWY_CHECK_SSSE3)
+#define HWY_BASELINE_SSSE3 HWY_SSSE3
+#else
+#define HWY_BASELINE_SSSE3 0
+#endif
+
+#if HWY_ARCH_X86 && (HWY_WANT_SSE4 || (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
+#define HWY_BASELINE_SSE4 HWY_SSE4
+#else
+#define HWY_BASELINE_SSE4 0
+#endif
+
+#if HWY_BASELINE_SSE4 != 0 && HWY_CHECK_BMI2_FMA && HWY_CHECK_F16C && \
+    defined(__AVX2__)
+#define HWY_BASELINE_AVX2 HWY_AVX2
+#else
+#define HWY_BASELINE_AVX2 0
+#endif
+
+// Require everything in AVX2 plus AVX-512 flags (also set by MSVC)
+#if HWY_BASELINE_AVX2 != 0 && defined(__AVX512F__) && defined(__AVX512BW__) && \
+    defined(__AVX512DQ__) && defined(__AVX512VL__)
+#define HWY_BASELINE_AVX3 HWY_AVX3
+#else
+#define HWY_BASELINE_AVX3 0
+#endif
+
+// TODO(janwas): not yet known whether these will be set by MSVC
+#if HWY_BASELINE_AVX3 != 0 && defined(__AVX512VNNI__) && defined(__VAES__) && \
+    defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) &&                     \
+    defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) &&               \
+    defined(__AVX512BITALG__)
+#define HWY_BASELINE_AVX3_DL HWY_AVX3_DL
+#else
+#define HWY_BASELINE_AVX3_DL 0
+#endif
+
+// The ZEN4-optimized AVX3 target is numerically lower than AVX3_DL and is thus
+// considered better. Do not enable it unless the user explicitly requests it -
+// we do not want to choose the ZEN4 path on Intel because it could be slower.
+#if defined(HWY_WANT_AVX3_ZEN4) && HWY_BASELINE_AVX3_DL != 0
+#define HWY_BASELINE_AVX3_ZEN4 HWY_AVX3_ZEN4
+#else
+#define HWY_BASELINE_AVX3_ZEN4 0
+#endif
+
+#if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512FP16__)
+#define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
+#else
+#define HWY_BASELINE_AVX3_SPR 0
+#endif
+
+// RVV requires intrinsics 0.11 or later, see #1156.
+#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
+#define HWY_BASELINE_RVV HWY_RVV
+#else
+#define HWY_BASELINE_RVV 0
+#endif
+
+// Allow the user to override this without any guarantee of success.
+#ifndef HWY_BASELINE_TARGETS
+#define HWY_BASELINE_TARGETS                                           \
+  (HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 |       \
+   HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 | HWY_BASELINE_SVE2 |        \
+   HWY_BASELINE_SVE | HWY_BASELINE_NEON | HWY_BASELINE_SSE2 |          \
+   HWY_BASELINE_SSSE3 | HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 |        \
+   HWY_BASELINE_AVX3 | HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | \
+   HWY_BASELINE_AVX3_SPR | HWY_BASELINE_RVV)
+#endif  // HWY_BASELINE_TARGETS
+
+//------------------------------------------------------------------------------
+// Choose target for static dispatch
+
+#define HWY_ENABLED_BASELINE HWY_ENABLED(HWY_BASELINE_TARGETS)
+#if HWY_ENABLED_BASELINE == 0
+#error "At least one baseline target must be defined and enabled"
+#endif
+
+// Best baseline, used for static dispatch. This is the least-significant 1-bit
+// within HWY_ENABLED_BASELINE and lower bit values imply "better".
+#define HWY_STATIC_TARGET (HWY_ENABLED_BASELINE & -HWY_ENABLED_BASELINE)
+
+// Start by assuming static dispatch. If we later use dynamic dispatch, this
+// will be defined to other targets during the multiple-inclusion, and finally
+// return to the initial value. Defining this outside begin/end_target ensures
+// inl headers successfully compile by themselves (required by Bazel).
+#define HWY_TARGET HWY_STATIC_TARGET
+
+//------------------------------------------------------------------------------
+// Choose targets for dynamic dispatch according to one of four policies
+
+#if 1 < (defined(HWY_COMPILE_ONLY_SCALAR) + defined(HWY_COMPILE_ONLY_EMU128) + \
+         defined(HWY_COMPILE_ONLY_STATIC))
+#error "Can only define one of HWY_COMPILE_ONLY_{SCALAR|EMU128|STATIC} - bug?"
+#endif
+// Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
+
+// Clang, GCC and MSVC allow runtime dispatch on x86.
+#if HWY_ARCH_X86
+#define HWY_HAVE_RUNTIME_DISPATCH 1
+// On Arm/PPC, currently only GCC does, and we require Linux to detect CPU
+// capabilities.
+#elif (HWY_ARCH_ARM || HWY_ARCH_PPC) && HWY_COMPILER_GCC_ACTUAL && \
+    HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
+#define HWY_HAVE_RUNTIME_DISPATCH 1
+#else
+#define HWY_HAVE_RUNTIME_DISPATCH 0
+#endif
+
+// AVX3_DL is not widely available yet. To reduce code size and compile time,
+// only include it in the set of attainable targets (for dynamic dispatch) if
+// the user opts in, OR it is in the baseline (we check whether enabled below).
+#if defined(HWY_WANT_AVX3_DL) || (HWY_BASELINE_TARGETS & HWY_AVX3_DL)
+#define HWY_ATTAINABLE_AVX3_DL (HWY_AVX3_DL)
+#else
+#define HWY_ATTAINABLE_AVX3_DL 0
+#endif
+
+#if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
+#define HWY_ATTAINABLE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
+#elif HWY_ARCH_ARM  // static dispatch, or HWY_ARCH_ARM_V7
+#define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
+#else
+#define HWY_ATTAINABLE_NEON 0
+#endif
+
+#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
+                         (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
+#define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
+#else
+#define HWY_ATTAINABLE_SVE 0
+#endif
+
+#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
+                         (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
+#define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
+#else
+#define HWY_ATTAINABLE_SVE2 0
+#endif
+
+#if HWY_ARCH_PPC && defined(__ALTIVEC__) && \
+    (!HWY_COMPILER_CLANG || HWY_BASELINE_PPC8 != 0)
+#define HWY_ATTAINABLE_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
+#else
+#define HWY_ATTAINABLE_PPC 0
+#endif
+
+// Attainable means enabled and the compiler allows intrinsics (even when not
+// allowed to autovectorize). Used in 3 and 4.
+#if HWY_ARCH_X86
+#define HWY_ATTAINABLE_TARGETS                                               \
+  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 |        \
+              HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
+              HWY_AVX3_SPR)
+#elif HWY_ARCH_ARM
+#define HWY_ATTAINABLE_TARGETS                                                 \
+  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
+              HWY_ATTAINABLE_SVE2)
+#elif HWY_ARCH_PPC
+#define HWY_ATTAINABLE_TARGETS \
+  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_PPC)
+#else
+#define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
+#endif  // HWY_ARCH_*
+
+// 1) For older compilers: avoid SIMD intrinsics, but still support all ops.
+#if defined(HWY_COMPILE_ONLY_EMU128) && !HWY_BROKEN_EMU128
+#undef HWY_STATIC_TARGET
+#define HWY_STATIC_TARGET HWY_EMU128  // override baseline
+#define HWY_TARGETS HWY_EMU128
+
+// 1b) HWY_SCALAR is less capable than HWY_EMU128 (which supports all ops), but
+// we currently still support it for backwards compatibility.
+#elif defined(HWY_COMPILE_ONLY_SCALAR) || \
+    (defined(HWY_COMPILE_ONLY_EMU128) && HWY_BROKEN_EMU128)
+#undef HWY_STATIC_TARGET
+#define HWY_STATIC_TARGET HWY_SCALAR  // override baseline
+#define HWY_TARGETS HWY_SCALAR
+
+// 2) For forcing static dispatch without code changes (removing HWY_EXPORT)
+#elif defined(HWY_COMPILE_ONLY_STATIC)
+#define HWY_TARGETS HWY_STATIC_TARGET
+
+// 3) For tests: include all attainable targets (in particular: scalar)
+#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
+#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
+
+// 4) Default: attainable WITHOUT non-best baseline. This reduces code size by
+// excluding superseded targets, in particular scalar. Note: HWY_STATIC_TARGET
+// may be 2^62 (HWY_SCALAR), so we must not left-shift/add it. Subtracting one
+// sets all lower bits (better targets), then we also include the static target.
+#else
+#define HWY_TARGETS \
+  (HWY_ATTAINABLE_TARGETS & ((HWY_STATIC_TARGET - 1LL) | HWY_STATIC_TARGET))
+
+#endif  // target policy
+
+// HWY_ONCE and the multiple-inclusion mechanism rely on HWY_STATIC_TARGET being
+// one of the dynamic targets. This also implies HWY_TARGETS != 0 and
+// (HWY_TARGETS & HWY_ENABLED_BASELINE) != 0.
+#if (HWY_TARGETS & HWY_STATIC_TARGET) == 0
+#error "Logic error: best baseline should be included in dynamic targets"
+#endif
+
+#endif  // HIGHWAY_HWY_DETECT_TARGETS_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/examples/benchmark.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/benchmark.cc
new file mode 100644
index 0000000000..98aae6819b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/benchmark.cc
@@ -0,0 +1,253 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>  // abort
+
+#include <cmath>  // std::abs
+#include <memory>
+#include <numeric>  // std::iota, std::inner_product
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/examples/benchmark.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// Must come after foreach_target.h to avoid redefinition errors.
+#include "hwy/aligned_allocator.h"
+#include "hwy/highway.h"
+#include "hwy/nanobenchmark.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::CombineShiftRightLanes;
+#endif
+
+class TwoArray {
+ public:
+  // Must be a multiple of the vector lane count * 8.
+  static size_t NumItems() { return 3456; }
+
+  TwoArray()
+      : a_(AllocateAligned<float>(NumItems() * 2)), b_(a_.get() + NumItems()) {
+    // = 1, but compiler doesn't know
+    const float init = static_cast<float>(Unpredictable1());
+    std::iota(a_.get(), a_.get() + NumItems(), init);
+    std::iota(b_, b_ + NumItems(), init);
+  }
+
+ protected:
+  AlignedFreeUniquePtr<float[]> a_;
+  float* b_;
+};
+
+// Measures durations, verifies results, prints timings.
+template <class Benchmark>
+void RunBenchmark(const char* caption) {
+  printf("%10s: ", caption);
+  const size_t kNumInputs = 1;
+  const size_t num_items = Benchmark::NumItems() * size_t(Unpredictable1());
+  const FuncInput inputs[kNumInputs] = {num_items};
+  Result results[kNumInputs];
+
+  Benchmark benchmark;
+
+  Params p;
+  p.verbose = false;
+  p.max_evals = 7;
+  p.target_rel_mad = 0.002;
+  const size_t num_results = MeasureClosure(
+      [&benchmark](const FuncInput input) { return benchmark(input); }, inputs,
+      kNumInputs, results, p);
+  if (num_results != kNumInputs) {
+    fprintf(stderr, "MeasureClosure failed.\n");
+  }
+
+  benchmark.Verify(num_items);
+
+  for (size_t i = 0; i < num_results; ++i) {
+    const double cycles_per_item =
+        results[i].ticks / static_cast<double>(results[i].input);
+    const double mad = results[i].variability * cycles_per_item;
+    printf("%6d: %6.3f (+/- %5.3f)\n", static_cast<int>(results[i].input),
+           cycles_per_item, mad);
+  }
+}
+
+void Intro() {
+  const float in[16] = {1, 2, 3, 4, 5, 6};
+  float out[16];
+  const ScalableTag<float> d;  // largest possible vector
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    const auto vec = LoadU(d, in + i);  // no alignment requirement
+    auto result = Mul(vec, vec);
+    result = Add(result, result);  // can update if not const
+    StoreU(result, d, out + i);
+  }
+  printf("\nF(x)->2*x^2, F(%.0f) = %.1f\n", in[2], out[2]);
+}
+
+// BEGINNER: dot product
+// 0.4 cyc/float = bronze, 0.25 = silver, 0.15 = gold!
+class BenchmarkDot : public TwoArray {
+ public:
+  BenchmarkDot() : dot_{-1.0f} {}
+
+  FuncOutput operator()(const size_t num_items) {
+    const ScalableTag<float> d;
+    const size_t N = Lanes(d);
+    using V = decltype(Zero(d));
+    // Compiler doesn't make independent sum* accumulators, so unroll manually.
+    // We cannot use an array because V might be a sizeless type. For reasonable
+    // code, we unroll 4x, but 8x might help (2 FMA ports * 4 cycle latency).
+    V sum0 = Zero(d);
+    V sum1 = Zero(d);
+    V sum2 = Zero(d);
+    V sum3 = Zero(d);
+    const float* const HWY_RESTRICT pa = &a_[0];
+    const float* const HWY_RESTRICT pb = b_;
+    for (size_t i = 0; i < num_items; i += 4 * N) {
+      const auto a0 = Load(d, pa + i + 0 * N);
+      const auto b0 = Load(d, pb + i + 0 * N);
+      sum0 = MulAdd(a0, b0, sum0);
+      const auto a1 = Load(d, pa + i + 1 * N);
+      const auto b1 = Load(d, pb + i + 1 * N);
+      sum1 = MulAdd(a1, b1, sum1);
+      const auto a2 = Load(d, pa + i + 2 * N);
+      const auto b2 = Load(d, pb + i + 2 * N);
+      sum2 = MulAdd(a2, b2, sum2);
+      const auto a3 = Load(d, pa + i + 3 * N);
+      const auto b3 = Load(d, pb + i + 3 * N);
+      sum3 = MulAdd(a3, b3, sum3);
+    }
+    // Reduction tree: sum of all accumulators by pairs into sum0.
+    sum0 = Add(sum0, sum1);
+    sum2 = Add(sum2, sum3);
+    sum0 = Add(sum0, sum2);
+    // Remember to store the result in `dot_` for verification; see `Verify`.
+    dot_ = ReduceSum(d, sum0);
+    // Return the result so that the benchmarking framework can ensure that the
+    // computation is not elided by the compiler.
+    return static_cast<FuncOutput>(dot_);
+  }
+  void Verify(size_t num_items) {
+    if (dot_ == -1.0f) {
+      fprintf(stderr, "Dot: must call Verify after benchmark");
+      abort();
+    }
+
+    const float expected =
+        std::inner_product(a_.get(), a_.get() + num_items, b_, 0.0f);
+    const float rel_err = std::abs(expected - dot_) / expected;
+    if (rel_err > 1.1E-6f) {
+      fprintf(stderr, "Dot: expected %e actual %e (%e)\n", expected, dot_,
+              rel_err);
+      abort();
+    }
+  }
+
+ private:
+  float dot_;  // for Verify
+};
+
+// INTERMEDIATE: delta coding
+// 1.0 cycles/float = bronze, 0.7 = silver, 0.4 = gold!
+struct BenchmarkDelta : public TwoArray {
+  FuncOutput operator()(const size_t num_items) const {
+#if HWY_TARGET == HWY_SCALAR
+    b_[0] = a_[0];
+    for (size_t i = 1; i < num_items; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+#elif HWY_CAP_GE256
+    // Larger vectors are split into 128-bit blocks, easiest to use the
+    // unaligned load support to shift between them.
+    const ScalableTag<float> df;
+    const size_t N = Lanes(df);
+    size_t i;
+    b_[0] = a_[0];
+    for (i = 1; i < N; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+    for (; i < num_items; i += N) {
+      const auto a = Load(df, &a_[i]);
+      const auto shifted = LoadU(df, &a_[i - 1]);
+      Store(a - shifted, df, &b_[i]);
+    }
+#else  // 128-bit
+    // Slightly better than unaligned loads
+    const HWY_CAPPED(float, 4) df;
+    const size_t N = Lanes(df);
+    size_t i;
+    b_[0] = a_[0];
+    for (i = 1; i < N; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+    auto prev = Load(df, &a_[0]);
+    for (; i < num_items; i += Lanes(df)) {
+      const auto a = Load(df, &a_[i]);
+      const auto shifted = CombineShiftRightLanes<3>(df, a, prev);
+      prev = a;
+      Store(Sub(a, shifted), df, &b_[i]);
+    }
+#endif
+    return static_cast<FuncOutput>(b_[num_items - 1]);
+  }
+
+  void Verify(size_t num_items) {
+    for (size_t i = 0; i < num_items; ++i) {
+      const float expected = (i == 0) ? a_[0] : a_[i] - a_[i - 1];
+      const float err = std::abs(expected - b_[i]);
+      if (err > 1E-6f) {
+        fprintf(stderr, "Delta: expected %e, actual %e\n", expected, b_[i]);
+      }
+    }
+  }
+};
+
+void RunBenchmarks() {
+  Intro();
+  printf("------------------------ %s\n", TargetName(HWY_TARGET));
+  RunBenchmark<BenchmarkDot>("dot");
+  RunBenchmark<BenchmarkDelta>("delta");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_EXPORT(RunBenchmarks);
+
+void Run() {
+  for (int64_t target : SupportedAndGeneratedTargets()) {
+    SetSupportedTargetsForTest(target);
+    HWY_DYNAMIC_DISPATCH(RunBenchmarks)();
+  }
+  SetSupportedTargetsForTest(0);  // Reset the mask afterwards.
+}
+
+}  // namespace hwy
+
+int main(int /*argc*/, char** /*argv*/) {
+  hwy::Run();
+  return 0;
+}
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton-inl.h
new file mode 100644
index 0000000000..8c7c6569e0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton-inl.h
@@ -0,0 +1,64 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Demo of functions that might be called from multiple SIMD modules (either
+// other -inl.h files, or a .cc file between begin/end_target-inl). This is
+// optional - all SIMD code can reside in .cc files. However, this allows
+// splitting code into different files while still inlining instead of requiring
+// calling through function pointers.
+
+// Per-target include guard. This is only required when using dynamic dispatch,
+// i.e. including foreach_target.h. For static dispatch, a normal include
+// guard would be fine because the header is only compiled once.
+#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#else
+#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#endif
+
+// It is fine to #include normal or *-inl headers.
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace skeleton {
+namespace HWY_NAMESPACE {
+
+// Highway ops reside here; ADL does not find templates nor builtins.
+namespace hn = hwy::HWY_NAMESPACE;
+
+// Example of a type-agnostic (caller-specified lane type) and width-agnostic
+// (uses best available instruction set) function in a header.
+//
+// Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
+template <class D, typename T>
+HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
+                                 const T* HWY_RESTRICT add_array,
+                                 const size_t size, T* HWY_RESTRICT x_array) {
+  for (size_t i = 0; i < size; i += hn::Lanes(d)) {
+    const auto mul = hn::Load(d, mul_array + i);
+    const auto add = hn::Load(d, add_array + i);
+    auto x = hn::Load(d, x_array + i);
+    x = hn::MulAdd(mul, x, add);
+    hn::Store(x, d, x_array + i);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+#endif  // include guard
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.cc
new file mode 100644
index 0000000000..8ac6f77986
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.cc
@@ -0,0 +1,129 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/examples/skeleton.h"
+
+#include <stdio.h>
+
+// >>>> for dynamic dispatch only, skip if you want static dispatch
+
+// First undef to prevent error when re-included.
+#undef HWY_TARGET_INCLUDE
+// For dynamic dispatch, specify the name of the current file (unfortunately
+// __FILE__ is not reliable) so that foreach_target.h can re-include it.
+#define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc"
+// Generates code for each enabled target by re-including this source file.
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// <<<< end of dynamic dispatch
+
+// Must come after foreach_target.h to avoid redefinition errors.
+#include "hwy/highway.h"
+
+// Optional, can instead add HWY_ATTR to all functions.
+HWY_BEFORE_NAMESPACE();
+
+namespace skeleton {
+// This namespace name is unique per target, which allows code for multiple
+// targets to co-exist in the same translation unit. Required when using dynamic
+// dispatch, otherwise optional.
+namespace HWY_NAMESPACE {
+
+// Highway ops reside here; ADL does not find templates nor builtins.
+namespace hn = hwy::HWY_NAMESPACE;
+
+// Computes log2 by converting to a vector of floats. Compiled once per target.
+template <class DF>
+HWY_ATTR_NO_MSAN void OneFloorLog2(const DF df,
+                                   const uint8_t* HWY_RESTRICT values,
+                                   uint8_t* HWY_RESTRICT log2) {
+  // Type tags for converting to other element types (Rebind = same count).
+  const hn::RebindToSigned<DF> d32;
+  const hn::Rebind<uint8_t, DF> d8;
+  using VI32 = hn::Vec<decltype(d32)>;
+
+  const VI32 vi32 = hn::PromoteTo(d32, hn::Load(d8, values));
+  const VI32 bits = hn::BitCast(d32, hn::ConvertTo(df, vi32));
+  const VI32 exponent = hn::Sub(hn::ShiftRight<23>(bits), hn::Set(d32, 127));
+  hn::Store(hn::DemoteTo(d8, exponent), d8, log2);
+}
+
+void CodepathDemo() {
+  // Highway defaults to portability, but per-target codepaths may be selected
+  // via #if HWY_TARGET == HWY_SSE4 or by testing capability macros:
+#if HWY_HAVE_INTEGER64
+  const char* gather = "Has int64";
+#else
+  const char* gather = "No int64";
+#endif
+  printf("Target %s: %s\n", hwy::TargetName(HWY_TARGET), gather);
+}
+
+void FloorLog2(const uint8_t* HWY_RESTRICT values, size_t count,
+               uint8_t* HWY_RESTRICT log2) {
+  CodepathDemo();
+
+  const hn::ScalableTag<float> df;
+  const size_t N = hn::Lanes(df);
+  size_t i = 0;
+  for (; i + N <= count; i += N) {
+    OneFloorLog2(df, values + i, log2 + i);
+  }
+  for (; i < count; ++i) {
+    hn::CappedTag<float, 1> d1;
+    OneFloorLog2(d1, values + i, log2 + i);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+// The table of pointers to the various implementations in HWY_NAMESPACE must
+// be compiled only once (foreach_target #includes this file multiple times).
+// HWY_ONCE is true for only one of these 'compilation passes'.
+#if HWY_ONCE
+
+namespace skeleton {
+
+// This macro declares a static array used for dynamic dispatch; it resides in
+// the same outer namespace that contains FloorLog2.
+HWY_EXPORT(FloorLog2);
+
+// This function is optional and only needed in the case of exposing it in the
+// header file. Otherwise using HWY_DYNAMIC_DISPATCH(FloorLog2) in this module
+// is equivalent to inlining this function.
+HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in,
+                                 const size_t count,
+                                 uint8_t* HWY_RESTRICT out) {
+  // This must reside outside of HWY_NAMESPACE because it references (calls the
+  // appropriate one from) the per-target implementations there.
+  // For static dispatch, use HWY_STATIC_DISPATCH.
+  return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out);
+}
+
+HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in,
+                                      const size_t count,
+                                      uint8_t* HWY_RESTRICT out) {
+  const auto ptr = HWY_DYNAMIC_POINTER(FloorLog2);
+  return ptr(in, count, out);
+}
+
+// Optional: anything to compile only once, e.g. non-SIMD implementations of
+// public functions provided by this module, can go inside #if HWY_ONCE.
+
+}  // namespace skeleton
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.h b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.h
new file mode 100644
index 0000000000..134193404a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton.h
@@ -0,0 +1,38 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Demo interface to target-specific code in skeleton.cc
+
+// Normal header with include guard and namespace.
+#ifndef HIGHWAY_HWY_EXAMPLES_SKELETON_H_
+#define HIGHWAY_HWY_EXAMPLES_SKELETON_H_
+
+// Platform-specific definitions used for declaring an interface, independent of
+// the SIMD instruction set.
+#include "hwy/base.h"  // HWY_RESTRICT
+
+namespace skeleton {
+
+// Computes base-2 logarithm by converting to float. Supports dynamic dispatch.
+HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in, size_t count,
+                                 uint8_t* HWY_RESTRICT out);
+
+// Same, but uses HWY_DYNAMIC_POINTER to save a function pointer and call it.
+HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in,
+                                      size_t count, uint8_t* HWY_RESTRICT out);
+
+}  // namespace skeleton
+
+#endif  // HIGHWAY_HWY_EXAMPLES_SKELETON_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton_test.cc
new file mode 100644
index 0000000000..58911da4e2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/examples/skeleton_test.cc
@@ -0,0 +1,123 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Example of unit test for the "skeleton" library.
+
+#include "hwy/examples/skeleton.h"
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "examples/skeleton_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+
+// Must come after foreach_target.h to avoid redefinition errors.
+#include "hwy/highway.h"
+#include "hwy/nanobenchmark.h"  // Unpredictable1
+#include "hwy/tests/test_util-inl.h"
+
+// Optional: factor out parts of the implementation into *-inl.h
+// (must also come after foreach_target.h to avoid redefinition errors)
+#include "hwy/examples/skeleton-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace skeleton {
+namespace HWY_NAMESPACE {
+
+namespace hn = hwy::HWY_NAMESPACE;
+
+// Calls function defined in skeleton.cc.
+struct TestFloorLog2 {
+  template <class T, class DF>
+  HWY_NOINLINE void operator()(T /*unused*/, DF df) {
+    const size_t count = 5 * hn::Lanes(df);
+    auto in = hwy::AllocateAligned<uint8_t>(count);
+    auto expected = hwy::AllocateAligned<uint8_t>(count);
+    auto out = hwy::AllocateAligned<uint8_t>(count);
+    HWY_ASSERT(in && expected && out);
+
+    hwy::RandomState rng;
+    for (size_t i = 0; i < count; ++i) {
+      expected[i] = Random32(&rng) & 7;
+      in[i] = static_cast<uint8_t>(1u << expected[i]);
+    }
+    CallFloorLog2(in.get(), count, out.get());
+    int sum = 0;
+    for (size_t i = 0; i < count; ++i) {
+      HWY_ASSERT_EQ(expected[i], out[i]);
+      sum += out[i];
+    }
+
+    for (size_t i = 0; i < count; ++i) {
+      out[i] = static_cast<uint8_t>(hwy::Unpredictable1());
+    }
+
+    SavedCallFloorLog2(in.get(), count, out.get());
+    for (size_t i = 0; i < count; ++i) {
+      HWY_ASSERT_EQ(expected[i], out[i]);
+      sum += out[i];
+    }
+
+    hwy::PreventElision(sum);
+  }
+};
+
+HWY_NOINLINE void TestAllFloorLog2() {
+  hn::ForPartialVectors<TestFloorLog2>()(float());
+}
+
+// Calls function defined in skeleton-inl.h.
+struct TestSumMulAdd {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    hwy::RandomState rng;
+    const size_t count = 4096;
+    HWY_ASSERT_EQ(size_t{0}, count % hn::Lanes(d));
+    auto mul = hwy::AllocateAligned<T>(count);
+    auto x = hwy::AllocateAligned<T>(count);
+    auto add = hwy::AllocateAligned<T>(count);
+    for (size_t i = 0; i < count; ++i) {
+      mul[i] = static_cast<T>(Random32(&rng) & 0xF);
+      x[i] = static_cast<T>(Random32(&rng) & 0xFF);
+      add[i] = static_cast<T>(Random32(&rng) & 0xFF);
+    }
+    double expected_sum = 0.0;
+    for (size_t i = 0; i < count; ++i) {
+      expected_sum += mul[i] * x[i] + add[i];
+    }
+
+    MulAddLoop(d, mul.get(), add.get(), count, x.get());
+    HWY_ASSERT_EQ(4344240.0, expected_sum);
+  }
+};
+
+HWY_NOINLINE void TestAllSumMulAdd() {
+  hn::ForFloatTypes(hn::ForPartialVectors<TestSumMulAdd>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace skeleton {
+HWY_BEFORE_TEST(SkeletonTest);
+HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllFloorLog2);
+HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllSumMulAdd);
+}  // namespace skeleton
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/foreach_target.h b/third-party/libjxl/libjxl/third_party/highway/hwy/foreach_target.h
new file mode 100644
index 0000000000..ca3e5a246f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/foreach_target.h
@@ -0,0 +1,340 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_FOREACH_TARGET_H_
+#define HIGHWAY_HWY_FOREACH_TARGET_H_
+
+// Re-includes the translation unit zero or more times to compile for any
+// targets except HWY_STATIC_TARGET. Defines unique HWY_TARGET each time so that
+// highway.h defines the corresponding macro/namespace.
+
+#include "hwy/detect_targets.h"
+
+// *_inl.h may include other headers, which requires include guards to prevent
+// repeated inclusion. The guards must be reset after compiling each target, so
+// the header is again visible. This is done by flipping HWY_TARGET_TOGGLE,
+// defining it if undefined and vice versa. This macro is initially undefined
+// so that IDEs don't gray out the contents of each header.
+#ifdef HWY_TARGET_TOGGLE
+#error "This macro must not be defined outside foreach_target.h"
+#endif
+
+#ifdef HWY_HIGHWAY_INCLUDED  // highway.h include guard
+// Trigger fixup at the bottom of this header.
+#define HWY_ALREADY_INCLUDED
+
+// The next highway.h must re-include set_macros-inl.h because the first
+// highway.h chose the static target instead of what we will set below.
+#undef HWY_SET_MACROS_PER_TARGET
+#endif
+
+// Disable HWY_EXPORT in user code until we have generated all targets. Note
+// that a subsequent highway.h will not override this definition.
+#undef HWY_ONCE
+#define HWY_ONCE (0 || HWY_IDE)
+
+// Avoid warnings on #include HWY_TARGET_INCLUDE by hiding them from the IDE;
+// also skip if only 1 target defined (no re-inclusion will be necessary).
+#if !HWY_IDE && (HWY_TARGETS != HWY_STATIC_TARGET)
+
+#if !defined(HWY_TARGET_INCLUDE)
+#error ">1 target enabled => define HWY_TARGET_INCLUDE before foreach_target.h"
+#endif
+
+// ------------------------------ HWY_ARCH_X86
+
+#if (HWY_TARGETS & HWY_SSE2) && (HWY_STATIC_TARGET != HWY_SSE2)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SSE2
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SSSE3) && (HWY_STATIC_TARGET != HWY_SSSE3)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SSSE3
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SSE4) && (HWY_STATIC_TARGET != HWY_SSE4)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SSE4
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX2) && (HWY_STATIC_TARGET != HWY_AVX2)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX2
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX3) && (HWY_STATIC_TARGET != HWY_AVX3)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX3
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX3_DL) && (HWY_STATIC_TARGET != HWY_AVX3_DL)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX3_DL
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX3_ZEN4) && (HWY_STATIC_TARGET != HWY_AVX3_ZEN4)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX3_ZEN4
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX3_SPR) && (HWY_STATIC_TARGET != HWY_AVX3_SPR)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX3_SPR
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+// ------------------------------ HWY_ARCH_ARM
+
+#if (HWY_TARGETS & HWY_NEON_WITHOUT_AES) && \
+    (HWY_STATIC_TARGET != HWY_NEON_WITHOUT_AES)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_NEON_WITHOUT_AES
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_NEON) && (HWY_STATIC_TARGET != HWY_NEON)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_NEON
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SVE) && (HWY_STATIC_TARGET != HWY_SVE)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SVE
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SVE2) && (HWY_STATIC_TARGET != HWY_SVE2)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SVE2
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SVE_256) && (HWY_STATIC_TARGET != HWY_SVE_256)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SVE_256
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SVE2_128) && (HWY_STATIC_TARGET != HWY_SVE2_128)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SVE2_128
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+// ------------------------------ HWY_ARCH_WASM
+
+#if (HWY_TARGETS & HWY_WASM_EMU256) && (HWY_STATIC_TARGET != HWY_WASM_EMU256)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_WASM_EMU256
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_WASM) && (HWY_STATIC_TARGET != HWY_WASM)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_WASM
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+// ------------------------------ HWY_ARCH_PPC
+
+#if (HWY_TARGETS & HWY_PPC8) && (HWY_STATIC_TARGET != HWY_PPC8)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_PPC8
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_PPC9) && (HWY_STATIC_TARGET != HWY_PPC9)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_PPC9
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_PPC10) && (HWY_STATIC_TARGET != HWY_PPC10)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_PPC10
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+// ------------------------------ HWY_ARCH_RVV
+
+#if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_RVV
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+// ------------------------------ Scalar
+
+#if (HWY_TARGETS & HWY_EMU128) && (HWY_STATIC_TARGET != HWY_EMU128)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_EMU128
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SCALAR) && (HWY_STATIC_TARGET != HWY_SCALAR)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SCALAR
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#endif  // !HWY_IDE && (HWY_TARGETS != HWY_STATIC_TARGET)
+
+// Now that all but the static target have been generated, re-enable HWY_EXPORT.
+#undef HWY_ONCE
+#define HWY_ONCE 1
+
+// If we re-include once per enabled target, the translation unit's
+// implementation would have to be skipped via #if to avoid redefining symbols.
+// We instead skip the re-include for HWY_STATIC_TARGET, and generate its
+// implementation when resuming compilation of the translation unit.
+#undef HWY_TARGET
+#define HWY_TARGET HWY_STATIC_TARGET
+
+#ifdef HWY_ALREADY_INCLUDED
+// Revert the previous toggle to prevent redefinitions for the static target.
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+
+// Force re-inclusion of set_macros-inl.h now that HWY_TARGET is restored.
+#ifdef HWY_SET_MACROS_PER_TARGET
+#undef HWY_SET_MACROS_PER_TARGET
+#else
+#define HWY_SET_MACROS_PER_TARGET
+#endif
+#endif
+
+#endif  // HIGHWAY_HWY_FOREACH_TARGET_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/highway.h b/third-party/libjxl/libjxl/third_party/highway/hwy/highway.h
new file mode 100644
index 0000000000..99d74619db
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/highway.h
@@ -0,0 +1,435 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Main header required before using vector types.
+
+// IWYU pragma: begin_exports
+#include "hwy/base.h"
+#include "hwy/detect_compiler_arch.h"
+#include "hwy/highway_export.h"
+#include "hwy/targets.h"
+// IWYU pragma: end_exports
+
+// This include guard is checked by foreach_target, so avoid the usual _H_
+// suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
+// after/outside this include guard.
+#ifndef HWY_HIGHWAY_INCLUDED
+#define HWY_HIGHWAY_INCLUDED
+
+namespace hwy {
+
+// API version (https://semver.org/); keep in sync with CMakeLists.txt.
+#define HWY_MAJOR 1
+#define HWY_MINOR 0
+#define HWY_PATCH 6
+
+//------------------------------------------------------------------------------
+// Shorthand for tags (defined in shared-inl.h) used to select overloads.
+// Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
+// HWY_CAPPED(T, N).
+
+// HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
+// registers in the group, and is ignored on targets that do not support groups.
+#define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
+#define HWY_FULL2(T, LMUL) \
+  hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))>
+#define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
+// Workaround for MSVC grouping __VA_ARGS__ into a single argument
+#define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
+// Trailing comma avoids -pedantic false alarm
+#define HWY_CHOOSE_FULL(...) \
+  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
+#define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
+
+// Vector of up to MAX_N lanes. It's better to use full vectors where possible.
+#define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
+
+//------------------------------------------------------------------------------
+// Export user functions for static/dynamic dispatch
+
+// Evaluates to 0 inside a translation unit if it is generating anything but the
+// static target (the last one if multiple targets are enabled). Used to prevent
+// redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
+// compile once anyway, so this is 1 unless it is or has been included.
+#ifndef HWY_ONCE
+#define HWY_ONCE 1
+#endif
+
+// HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
+// HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
+// defined), and can be used to deduce the return type of Choose*.
+#if HWY_STATIC_TARGET == HWY_SCALAR
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_EMU128
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_RVV
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_WASM_EMU256
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM_EMU256::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_WASM
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_NEON_WITHOUT_AES
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_NEON
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE2
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE_256
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE2_128
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_PPC8
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_PPC9
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC9::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_PPC10
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC10::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SSE2
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE2::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SSSE3
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SSE4
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX2
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX3
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX3_DL
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX3_ZEN4
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_ZEN4::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX3_SPR
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_SPR::FUNC_NAME
+#endif
+
+// HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
+// nullptr is that target was not compiled.
+#if HWY_TARGETS & HWY_EMU128
+#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
+#elif HWY_TARGETS & HWY_SCALAR
+#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
+#else
+// When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at
+// runtime, fall back to the baseline with HWY_STATIC_DISPATCH().
+#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
+#endif
+
+#if HWY_TARGETS & HWY_WASM_EMU256
+#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME
+#else
+#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_WASM
+#define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
+#else
+#define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_RVV
+#define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
+#else
+#define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_NEON_WITHOUT_AES
+#define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) &N_NEON_WITHOUT_AES::FUNC_NAME
+#else
+#define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_NEON
+#define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
+#else
+#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE
+#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE2
+#define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE_256
+#define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE2_128
+#define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_PPC8
+#define HWY_CHOOSE_PPC8(FUNC_NAME) &N_PPC8::FUNC_NAME
+#else
+#define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_PPC9
+#define HWY_CHOOSE_PPC9(FUNC_NAME) &N_PPC9::FUNC_NAME
+#else
+#define HWY_CHOOSE_PPC9(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_PPC10
+#define HWY_CHOOSE_PPC10(FUNC_NAME) &N_PPC10::FUNC_NAME
+#else
+#define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SSE2
+#define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME
+#else
+#define HWY_CHOOSE_SSE2(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SSSE3
+#define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
+#else
+#define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SSE4
+#define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
+#else
+#define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX2
+#define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX3
+#define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX3_DL
+#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX3_ZEN4
+#define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) &N_AVX3_ZEN4::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX3_SPR
+#define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) &N_AVX3_SPR::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) nullptr
+#endif
+
+// MSVC 2017 workaround: the non-type template parameter to ChooseAndCall
+// apparently cannot be an array. Use a function pointer instead, which has the
+// disadvantage that we call the static (not best) target on the first call to
+// any HWY_DYNAMIC_DISPATCH.
+#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915
+#define HWY_DISPATCH_WORKAROUND 1
+#else
+#define HWY_DISPATCH_WORKAROUND 0
+#endif
+
+// Provides a static member function which is what is called during the first
+// HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
+// this function are the first entry in the tables created by HWY_EXPORT.
+template <typename RetType, typename... Args>
+struct FunctionCache {
+ public:
+  typedef RetType(FunctionType)(Args...);
+
+#if HWY_DISPATCH_WORKAROUND
+  template <FunctionType* const func>
+  static RetType ChooseAndCall(Args... args) {
+    ChosenTarget& chosen_target = GetChosenTarget();
+    chosen_target.Update(SupportedTargets());
+    return (*func)(args...);
+  }
+#else
+  // A template function that when instantiated has the same signature as the
+  // function being called. This function initializes the bit array of targets
+  // supported by the current CPU and then calls the appropriate entry within
+  // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
+  // exported functions, even those defined by different translation units,
+  // will dispatch directly to the best available target.
+  template <FunctionType* const table[]>
+  static RetType ChooseAndCall(Args... args) {
+    ChosenTarget& chosen_target = GetChosenTarget();
+    chosen_target.Update(SupportedTargets());
+    return (table[chosen_target.GetIndex()])(args...);
+  }
+#endif  // HWY_DISPATCH_WORKAROUND
+};
+
+// Used to deduce the template parameters RetType and Args from a function.
+template <typename RetType, typename... Args>
+FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
+  return FunctionCache<RetType, Args...>();
+}
+
+#define HWY_DISPATCH_TABLE(FUNC_NAME) \
+  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
+
+// HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
+// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
+// static array must be defined at the same namespace level as the function
+// it is exporting.
+// After being exported, it can be called from other parts of the same source
+// file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
+// like in the following example:
+//
+//   #include "hwy/highway.h"
+//   HWY_BEFORE_NAMESPACE();
+//   namespace skeleton {
+//   namespace HWY_NAMESPACE {
+//
+//   void MyFunction(int a, char b, const char* c) { ... }
+//
+//   // NOLINTNEXTLINE(google-readability-namespace-comments)
+//   }  // namespace HWY_NAMESPACE
+//   }  // namespace skeleton
+//   HWY_AFTER_NAMESPACE();
+//
+//   namespace skeleton {
+//   HWY_EXPORT(MyFunction);  // Defines the dispatch table in this scope.
+//
+//   void MyFunction(int a, char b, const char* c) {
+//     return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
+//   }
+//   }  // namespace skeleton
+//
+
+#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
+
+// Simplified version for IDE or the dynamic dispatch case with only one target.
+// This case still uses a table, although of a single element, to provide the
+// same compile error conditions as with the dynamic dispatch case when multiple
+// targets are being compiled.
+#define HWY_EXPORT(FUNC_NAME)                                             \
+  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
+  HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
+#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
+#define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
+
+#else
+
+// Simplified version for MSVC 2017: function pointer instead of table.
+#if HWY_DISPATCH_WORKAROUND
+
+#define HWY_EXPORT(FUNC_NAME)                                                \
+  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
+      FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = {                            \
+      /* The first entry in the table initializes the global cache and       \
+       * calls the function from HWY_STATIC_TARGET. */                       \
+      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(               \
+          FUNC_NAME)))::ChooseAndCall<&HWY_STATIC_DISPATCH(FUNC_NAME)>,      \
+      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
+      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
+  }
+
+#else
+
+// Dynamic dispatch case with one entry per dynamic target plus the fallback
+// target and the initialization wrapper.
+#define HWY_EXPORT(FUNC_NAME)                                                \
+  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
+      FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = {                            \
+      /* The first entry in the table initializes the global cache and       \
+       * calls the appropriate function. */                                  \
+      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(               \
+          FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>,        \
+      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
+      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
+  }
+
+#endif  // HWY_DISPATCH_WORKAROUND
+
+#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
+  (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
+#define HWY_DYNAMIC_POINTER(FUNC_NAME) \
+  (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
+
+#endif  // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
+
+// DEPRECATED names; please use HWY_HAVE_* instead.
+#define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
+#define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
+#define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
+
+}  // namespace hwy
+
+#endif  // HWY_HIGHWAY_INCLUDED
+
+//------------------------------------------------------------------------------
+
+// NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
+// to include them once per target, which is ensured by the toggle check.
+// Because ops/*.h are included under it, they do not need their own guard.
+#if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
+#ifdef HWY_HIGHWAY_PER_TARGET
+#undef HWY_HIGHWAY_PER_TARGET
+#else
+#define HWY_HIGHWAY_PER_TARGET
+#endif
+
+// These define ops inside namespace hwy::HWY_NAMESPACE.
+#if HWY_TARGET == HWY_SSE2 || HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
+#include "hwy/ops/x86_128-inl.h"
+#elif HWY_TARGET == HWY_AVX2
+#include "hwy/ops/x86_256-inl.h"
+#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
+    HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
+#include "hwy/ops/x86_512-inl.h"
+#elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
+    HWY_TARGET == HWY_PPC10
+#include "hwy/ops/ppc_vsx-inl.h"
+#elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
+#include "hwy/ops/arm_neon-inl.h"
+#elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
+    HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
+#include "hwy/ops/arm_sve-inl.h"
+#elif HWY_TARGET == HWY_WASM_EMU256
+#include "hwy/ops/wasm_256-inl.h"
+#elif HWY_TARGET == HWY_WASM
+#include "hwy/ops/wasm_128-inl.h"
+#elif HWY_TARGET == HWY_RVV
+#include "hwy/ops/rvv-inl.h"
+#elif HWY_TARGET == HWY_EMU128
+#include "hwy/ops/emu128-inl.h"
+#elif HWY_TARGET == HWY_SCALAR
+#include "hwy/ops/scalar-inl.h"
+#else
+#pragma message("HWY_TARGET does not match any known target")
+#endif  // HWY_TARGET
+
+#include "hwy/ops/generic_ops-inl.h"
+
+#endif  // HWY_HIGHWAY_PER_TARGET
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/highway_export.h b/third-party/libjxl/libjxl/third_party/highway/hwy/highway_export.h
new file mode 100644
index 0000000000..30edc17d01
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/highway_export.h
@@ -0,0 +1,74 @@
+// Pseudo-generated file to handle both cmake & bazel build system.
+
+// Initial generation done using cmake code:
+// include(GenerateExportHeader)
+// generate_export_header(hwy EXPORT_MACRO_NAME HWY_DLLEXPORT EXPORT_FILE_NAME
+// hwy/highway_export.h)
+// code reformatted using clang-format --style=Google
+
+#ifndef HWY_DLLEXPORT_H
+#define HWY_DLLEXPORT_H
+
+#if !defined(HWY_SHARED_DEFINE)
+#define HWY_DLLEXPORT
+#define HWY_CONTRIB_DLLEXPORT
+#define HWY_TEST_DLLEXPORT
+#else  // !HWY_SHARED_DEFINE
+
+#ifndef HWY_DLLEXPORT
+#if defined(hwy_EXPORTS)
+/* We are building this library */
+#ifdef _WIN32
+#define HWY_DLLEXPORT __declspec(dllexport)
+#else
+#define HWY_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#else  // defined(hwy_EXPORTS)
+/* We are using this library */
+#ifdef _WIN32
+#define HWY_DLLEXPORT __declspec(dllimport)
+#else
+#define HWY_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#endif  // defined(hwy_EXPORTS)
+#endif  // HWY_DLLEXPORT
+
+#ifndef HWY_CONTRIB_DLLEXPORT
+#if defined(hwy_contrib_EXPORTS)
+/* We are building this library */
+#ifdef _WIN32
+#define HWY_CONTRIB_DLLEXPORT __declspec(dllexport)
+#else
+#define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#else  // defined(hwy_contrib_EXPORTS)
+/* We are using this library */
+#ifdef _WIN32
+#define HWY_CONTRIB_DLLEXPORT __declspec(dllimport)
+#else
+#define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#endif  // defined(hwy_contrib_EXPORTS)
+#endif  // HWY_CONTRIB_DLLEXPORT
+
+#ifndef HWY_TEST_DLLEXPORT
+#if defined(hwy_test_EXPORTS)
+/* We are building this library */
+#ifdef _WIN32
+#define HWY_TEST_DLLEXPORT __declspec(dllexport)
+#else
+#define HWY_TEST_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#else  // defined(hwy_test_EXPORTS)
+/* We are using this library */
+#ifdef _WIN32
+#define HWY_TEST_DLLEXPORT __declspec(dllimport)
+#else
+#define HWY_TEST_DLLEXPORT __attribute__((visibility("default")))
+#endif
+#endif  // defined(hwy_test_EXPORTS)
+#endif  // HWY_TEST_DLLEXPORT
+
+#endif  // !HWY_SHARED_DEFINE
+
+#endif /* HWY_DLLEXPORT_H */
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/highway_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/highway_test.cc
new file mode 100644
index 0000000000..50d36ea739
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/highway_test.cc
@@ -0,0 +1,591 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <algorithm>  // std::fill
+#include <bitset>
+#include <string>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "highway_test.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/nanobenchmark.h"  // Unpredictable1
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <size_t kLimit, typename T>
+HWY_NOINLINE void TestCappedLimit(T /* tag */) {
+  CappedTag<T, kLimit> d;
+  // Ensure two ops compile
+  HWY_ASSERT_VEC_EQ(d, Zero(d), Set(d, T{0}));
+
+  // Ensure we do not write more than kLimit lanes
+  const size_t N = Lanes(d);
+  if (kLimit < N) {
+    auto lanes = AllocateAligned<T>(N);
+    HWY_ASSERT(lanes);
+    std::fill(lanes.get(), lanes.get() + N, T{0});
+    Store(Set(d, T{1}), d, lanes.get());
+    for (size_t i = kLimit; i < N; ++i) {
+      HWY_ASSERT_EQ(lanes[i], T{0});
+    }
+  }
+}
+
+// Adapter for ForAllTypes - we are constructing our own Simd<> and thus do not
+// use ForPartialVectors etc.
+struct TestCapped {
+  template <typename T>
+  void operator()(T t) const {
+    TestCappedLimit<1>(t);
+    TestCappedLimit<3>(t);
+    TestCappedLimit<5>(t);
+    TestCappedLimit<1ull << 15>(t);
+  }
+};
+
+HWY_NOINLINE void TestAllCapped() { ForAllTypes(TestCapped()); }
+
+// For testing that ForPartialVectors reaches every possible size:
+using NumLanesSet = std::bitset<HWY_MAX_BYTES + 1>;
+
+// Monostate pattern because ForPartialVectors takes a template argument, not a
+// functor by reference.
+static NumLanesSet* NumLanesForSize(size_t sizeof_t) {
+  HWY_ASSERT(sizeof_t <= sizeof(uint64_t));
+  static NumLanesSet num_lanes[sizeof(uint64_t) + 1];
+  return num_lanes + sizeof_t;
+}
+static size_t* MaxLanesForSize(size_t sizeof_t) {
+  HWY_ASSERT(sizeof_t <= sizeof(uint64_t));
+  static size_t num_lanes[sizeof(uint64_t) + 1] = {0};
+  return num_lanes + sizeof_t;
+}
+
+struct TestMaxLanes {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) const {
+    const size_t N = Lanes(d);
+    const size_t kMax = MaxLanes(d);  // for RVV, includes LMUL
+    HWY_ASSERT(N <= kMax);
+    HWY_ASSERT(kMax <= (HWY_MAX_BYTES / sizeof(T)));
+
+    NumLanesForSize(sizeof(T))->set(N);
+    *MaxLanesForSize(sizeof(T)) = HWY_MAX(*MaxLanesForSize(sizeof(T)), N);
+  }
+};
+
+class TestFracNLanes {
+ private:
+  template <int kNewPow2, class D>
+  using DWithPow2 =
+      Simd<TFromD<D>, D::template NewN<kNewPow2, HWY_MAX_LANES_D(D)>(),
+           kNewPow2>;
+
+  template <typename T1, size_t N1, int kPow2, typename T2, size_t N2>
+  static HWY_INLINE void DoTestFracNLanes(Simd<T1, N1, 0> /*d1*/,
+                                          Simd<T2, N2, kPow2> d2) {
+    using D2 = Simd<T2, N2, kPow2>;
+    static_assert(IsSame<T1, T2>(), "T1 and T2 should be the same type");
+    static_assert(N2 > HWY_MAX_BYTES, "N2 > HWY_MAX_BYTES should be true");
+    static_assert(HWY_MAX_LANES_D(D2) == N1,
+                  "HWY_MAX_LANES_D(D2) should be equal to N1");
+    static_assert(N1 <= HWY_LANES(T2), "N1 <= HWY_LANES(T2) should be true");
+
+    TestMaxLanes()(T2(), d2);
+  }
+
+#if HWY_TARGET != HWY_SCALAR
+  template <class T, HWY_IF_LANES_LE(4, HWY_LANES(T))>
+  static HWY_INLINE void DoTest4LanesWithPow3(T /*unused*/) {
+    // If HWY_LANES(T) >= 4 is true, do DoTestFracNLanes for the
+    // MaxLanes(d) == 4, kPow2 == 3 case
+    const Simd<T, 4, 0> d;
+    DoTestFracNLanes(d, DWithPow2<3, decltype(d)>());
+  }
+  template <class T, HWY_IF_LANES_GT(4, HWY_LANES(T))>
+  static HWY_INLINE void DoTest4LanesWithPow3(T /*unused*/) {
+    // If HWY_LANES(T) < 4, do nothing
+  }
+#endif
+
+ public:
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    const Simd<T, 1, 0> d1;
+    DoTestFracNLanes(d1, DWithPow2<1, decltype(d1)>());
+    DoTestFracNLanes(d1, DWithPow2<2, decltype(d1)>());
+    DoTestFracNLanes(d1, DWithPow2<3, decltype(d1)>());
+
+#if HWY_TARGET != HWY_SCALAR
+    const Simd<T, 2, 0> d2;
+    DoTestFracNLanes(d2, DWithPow2<2, decltype(d2)>());
+    DoTestFracNLanes(d2, DWithPow2<3, decltype(d2)>());
+
+    DoTest4LanesWithPow3(T());
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllMaxLanes() {
+  ForAllTypes(ForPartialVectors<TestMaxLanes>());
+
+  // Ensure ForPartialVectors visited all powers of two [1, N].
+  for (size_t sizeof_t : {sizeof(uint8_t), sizeof(uint16_t), sizeof(uint32_t),
+                          sizeof(uint64_t)}) {
+    const size_t N = *MaxLanesForSize(sizeof_t);
+    for (size_t i = 1; i <= N; i += i) {
+      if (!NumLanesForSize(sizeof_t)->test(i)) {
+        fprintf(stderr, "T=%d: did not visit for N=%d, max=%d\n",
+                static_cast<int>(sizeof_t), static_cast<int>(i),
+                static_cast<int>(N));
+        HWY_ASSERT(false);
+      }
+    }
+  }
+
+  ForAllTypes(TestFracNLanes());
+}
+
+struct TestSet {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Zero
+    const Vec<D> v0 = Zero(d);
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    HWY_ASSERT(expected);
+    std::fill(expected.get(), expected.get() + N, T{0});
+    HWY_ASSERT_VEC_EQ(d, expected.get(), v0);
+
+    // Set
+    const Vec<D> v2 = Set(d, T{2});
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = 2;
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), v2);
+
+    // Iota
+    const Vec<D> vi = Iota(d, T(5));
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(5 + i);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), vi);
+
+    // Undefined. This may result in a 'using uninitialized memory' warning
+    // here, even though we already suppress warnings in Undefined.
+    HWY_DIAGNOSTICS(push)
+    HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+#if HWY_COMPILER_GCC_ACTUAL
+    HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wmaybe-uninitialized")
+#endif
+    const Vec<D> vu = Undefined(d);
+    Store(vu, d, expected.get());
+    HWY_DIAGNOSTICS(pop)
+  }
+};
+
+HWY_NOINLINE void TestAllSet() { ForAllTypes(ForPartialVectors<TestSet>()); }
+
+// Ensures wraparound (mod 2^bits)
+struct TestOverflow {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v1 = Set(d, T{1});
+    const Vec<D> vmax = Set(d, LimitsMax<T>());
+    const Vec<D> vmin = Set(d, LimitsMin<T>());
+    // Unsigned underflow / negative -> positive
+    HWY_ASSERT_VEC_EQ(d, vmax, Sub(vmin, v1));
+    // Unsigned overflow / positive -> negative
+    HWY_ASSERT_VEC_EQ(d, vmin, Add(vmax, v1));
+  }
+};
+
+HWY_NOINLINE void TestAllOverflow() {
+  ForIntegerTypes(ForPartialVectors<TestOverflow>());
+}
+
+struct TestClamp {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v0 = Zero(d);
+    const Vec<D> v1 = Set(d, T{1});
+    const Vec<D> v2 = Set(d, T{2});
+
+    HWY_ASSERT_VEC_EQ(d, v1, Clamp(v2, v0, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Clamp(v0, v1, v2));
+  }
+};
+
+HWY_NOINLINE void TestAllClamp() {
+  ForAllTypes(ForPartialVectors<TestClamp>());
+}
+
+struct TestSignBitInteger {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v0 = Zero(d);
+    const Vec<D> all = VecFromMask(d, Eq(v0, v0));
+    const Vec<D> vs = SignBit(d);
+    const Vec<D> other = Sub(vs, Set(d, T{1}));
+
+    // Shifting left by one => overflow, equal zero
+    HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs));
+    // Verify the lower bits are zero (only +/- and logical ops are available
+    // for all types)
+    HWY_ASSERT_VEC_EQ(d, all, Add(vs, other));
+  }
+};
+
+struct TestSignBitFloat {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v0 = Zero(d);
+    const Vec<D> vs = SignBit(d);
+    const Vec<D> vp = Set(d, static_cast<T>(2.25));
+    const Vec<D> vn = Set(d, static_cast<T>(-2.25));
+    HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn);
+    HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp);
+    HWY_ASSERT_VEC_EQ(d, v0, vs);
+  }
+};
+
+HWY_NOINLINE void TestAllSignBit() {
+  ForIntegerTypes(ForPartialVectors<TestSignBitInteger>());
+  ForFloatTypes(ForPartialVectors<TestSignBitFloat>());
+}
+
+// TODO(b/287462770): inline to work around incorrect SVE codegen
+template <class D, class V>
+HWY_INLINE void AssertNaN(D d, VecArg<V> v, const char* file, int line) {
+  using T = TFromD<D>;
+  const size_t N = Lanes(d);
+  if (!AllTrue(d, IsNaN(v))) {
+    Print(d, "not all NaN", v, 0, N);
+    Print(d, "mask", VecFromMask(d, IsNaN(v)), 0, N);
+    const std::string type_name = TypeName(T(), N);
+    // RVV lacks PRIu64 and MSYS still has problems with %zu, so print bytes to
+    // avoid truncating doubles.
+    uint8_t bytes[HWY_MAX(sizeof(T), 8)] = {0};
+    const T lane = GetLane(v);
+    CopyBytes<sizeof(T)>(&lane, bytes);
+    Abort(file, line,
+          "Expected %s NaN, got %E (bytes %02x %02x %02x %02x %02x %02x %02x "
+          "%02x)",
+          type_name.c_str(), static_cast<double>(lane), bytes[0], bytes[1],
+          bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7]);
+  }
+}
+
+#define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__)
+
+struct TestNaN {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v1 = Set(d, static_cast<T>(Unpredictable1()));
+    const Vec<D> nan = IfThenElse(Eq(v1, Set(d, T{1})), NaN(d), v1);
+    HWY_ASSERT_NAN(d, nan);
+
+    // Arithmetic
+    HWY_ASSERT_NAN(d, Add(nan, v1));
+    HWY_ASSERT_NAN(d, Add(v1, nan));
+    HWY_ASSERT_NAN(d, Sub(nan, v1));
+    HWY_ASSERT_NAN(d, Sub(v1, nan));
+    HWY_ASSERT_NAN(d, Mul(nan, v1));
+    HWY_ASSERT_NAN(d, Mul(v1, nan));
+    HWY_ASSERT_NAN(d, Div(nan, v1));
+    HWY_ASSERT_NAN(d, Div(v1, nan));
+
+    // FMA
+    HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1));
+    HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1));
+    HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan));
+    HWY_ASSERT_NAN(d, MulSub(nan, v1, v1));
+    HWY_ASSERT_NAN(d, MulSub(v1, nan, v1));
+    HWY_ASSERT_NAN(d, MulSub(v1, v1, nan));
+    HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1));
+    HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1));
+    HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan));
+    HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1));
+    HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1));
+    HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan));
+
+    // Rcp/Sqrt
+    HWY_ASSERT_NAN(d, Sqrt(nan));
+
+    // Sign manipulation
+    HWY_ASSERT_NAN(d, Abs(nan));
+    HWY_ASSERT_NAN(d, Neg(nan));
+    HWY_ASSERT_NAN(d, CopySign(nan, v1));
+    HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1));
+
+    // Rounding
+    HWY_ASSERT_NAN(d, Ceil(nan));
+    HWY_ASSERT_NAN(d, Floor(nan));
+    HWY_ASSERT_NAN(d, Round(nan));
+    HWY_ASSERT_NAN(d, Trunc(nan));
+
+    // Logical (And/AndNot/Xor will clear NaN!)
+    HWY_ASSERT_NAN(d, Or(nan, v1));
+
+    // Comparison
+    HWY_ASSERT(AllFalse(d, Eq(nan, v1)));
+    HWY_ASSERT(AllFalse(d, Gt(nan, v1)));
+    HWY_ASSERT(AllFalse(d, Lt(nan, v1)));
+    HWY_ASSERT(AllFalse(d, Ge(nan, v1)));
+    HWY_ASSERT(AllFalse(d, Le(nan, v1)));
+
+    // Reduction
+    HWY_ASSERT_NAN(d, SumOfLanes(d, nan));
+    HWY_ASSERT_NAN(d, Set(d, ReduceSum(d, nan)));
+// TODO(janwas): re-enable after QEMU/Spike are fixed
+#if HWY_TARGET != HWY_RVV
+    HWY_ASSERT_NAN(d, MinOfLanes(d, nan));
+    HWY_ASSERT_NAN(d, MaxOfLanes(d, nan));
+#endif
+
+    // Min/Max
+#if (HWY_ARCH_X86 || HWY_ARCH_WASM) && (HWY_TARGET < HWY_EMU128)
+    // Native WASM or x86 SIMD return the second operand if any input is NaN.
+    HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
+    HWY_ASSERT_NAN(d, Min(v1, nan));
+    HWY_ASSERT_NAN(d, Max(v1, nan));
+#elif HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7
+    // Armv7 NEON returns NaN if any input is NaN.
+    HWY_ASSERT_NAN(d, Min(v1, nan));
+    HWY_ASSERT_NAN(d, Max(v1, nan));
+    HWY_ASSERT_NAN(d, Min(nan, v1));
+    HWY_ASSERT_NAN(d, Max(nan, v1));
+#else
+    // IEEE 754-2019 minimumNumber is defined as the other argument if exactly
+    // one is NaN, and qNaN if both are.
+    HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan));
+#endif
+    HWY_ASSERT_NAN(d, Min(nan, nan));
+    HWY_ASSERT_NAN(d, Max(nan, nan));
+
+    // AbsDiff
+    HWY_ASSERT_NAN(d, AbsDiff(nan, v1));
+    HWY_ASSERT_NAN(d, AbsDiff(v1, nan));
+
+    // Approximate*
+    HWY_ASSERT_NAN(d, ApproximateReciprocal(nan));
+    HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan));
+  }
+};
+
+HWY_NOINLINE void TestAllNaN() {
+  ForFloatTypes(ForPartialVectors<TestNaN>());
+}
+
+struct TestIsNaN {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v1 = Set(d, static_cast<T>(Unpredictable1()));
+    const Vec<D> inf = IfThenElse(Eq(v1, Set(d, T{1})), Inf(d), v1);
+    const Vec<D> nan = IfThenElse(Eq(v1, Set(d, T{1})), NaN(d), v1);
+    const Vec<D> neg = Set(d, T{-1});
+    HWY_ASSERT_NAN(d, nan);
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(inf));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(CopySign(inf, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(nan));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(CopySign(nan, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(v1));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Zero(d)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::LowestValue<T>())));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::HighestValue<T>())));
+  }
+};
+
+HWY_NOINLINE void TestAllIsNaN() {
+  ForFloatTypes(ForPartialVectors<TestIsNaN>());
+}
+
+struct TestIsInf {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v1 = Set(d, static_cast<T>(Unpredictable1()));
+    const Vec<D> inf = IfThenElse(Eq(v1, Set(d, T{1})), Inf(d), v1);
+    const Vec<D> nan = IfThenElse(Eq(v1, Set(d, T{1})), NaN(d), v1);
+    const Vec<D> neg = Set(d, T{-1});
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(inf));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(CopySign(inf, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(nan));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(CopySign(nan, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(v1));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Zero(d)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::LowestValue<T>())));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::HighestValue<T>())));
+  }
+};
+
+HWY_NOINLINE void TestAllIsInf() {
+  ForFloatTypes(ForPartialVectors<TestIsInf>());
+}
+
+struct TestIsFinite {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v1 = Set(d, static_cast<T>(Unpredictable1()));
+    const Vec<D> inf = IfThenElse(Eq(v1, Set(d, T{1})), Inf(d), v1);
+    const Vec<D> nan = IfThenElse(Eq(v1, Set(d, T{1})), NaN(d), v1);
+    const Vec<D> neg = Set(d, T{-1});
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(inf));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(inf, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(nan));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(nan, neg)));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(v1));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Zero(d)));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Set(d, hwy::LowestValue<T>())));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d),
+                       IsFinite(Set(d, hwy::HighestValue<T>())));
+  }
+};
+
+HWY_NOINLINE void TestAllIsFinite() {
+  ForFloatTypes(ForPartialVectors<TestIsFinite>());
+}
+
+struct TestCopyAndAssign {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // copy V
+    const Vec<D> v3 = Iota(d, 3);
+    auto v3b(v3);
+    HWY_ASSERT_VEC_EQ(d, v3, v3b);
+
+    // assign V
+    auto v3c = Undefined(d);
+    v3c = v3;
+    HWY_ASSERT_VEC_EQ(d, v3, v3c);
+  }
+};
+
+HWY_NOINLINE void TestAllCopyAndAssign() {
+  ForAllTypes(ForPartialVectors<TestCopyAndAssign>());
+}
+
+struct TestGetLane {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    HWY_ASSERT_EQ(T{0}, GetLane(Zero(d)));
+    HWY_ASSERT_EQ(T{1}, GetLane(Set(d, T{1})));
+  }
+};
+
+HWY_NOINLINE void TestAllGetLane() {
+  ForAllTypes(ForPartialVectors<TestGetLane>());
+}
+
+struct TestDFromV {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Vec<D> v0 = Zero(d);
+    using D0 = DFromV<decltype(v0)>;              // not necessarily same as D
+    const Vec<D> v0b = And(v0, Set(D0(), T{1}));  // vectors can interoperate
+    HWY_ASSERT_VEC_EQ(d, v0, v0b);
+  }
+};
+
+HWY_NOINLINE void TestAllDFromV() {
+  ForAllTypes(ForPartialVectors<TestDFromV>());
+}
+
+struct TestBlocks {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    const size_t num_of_blocks = Blocks(d);
+    static constexpr size_t kNumOfLanesPer16ByteBlk = 16 / sizeof(T);
+    HWY_ASSERT(num_of_blocks >= 1);
+    HWY_ASSERT(num_of_blocks <= d.MaxBlocks());
+    HWY_ASSERT(
+        num_of_blocks ==
+        ((N < kNumOfLanesPer16ByteBlk) ? 1 : (N / kNumOfLanesPer16ByteBlk)));
+  }
+};
+
+HWY_NOINLINE void TestAllBlocks() {
+  ForAllTypes(ForPartialVectors<TestDFromV>());
+}
+
+struct TestBlockDFromD {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const BlockDFromD<decltype(d)> d_block;
+    static_assert(d_block.MaxBytes() <= 16,
+                  "d_block.MaxBytes() <= 16 must be true");
+    static_assert(d_block.MaxBytes() <= d.MaxBytes(),
+                  "d_block.MaxBytes() <= d.MaxBytes() must be true");
+    static_assert(d.MaxBytes() > 16 || d_block.MaxBytes() == d.MaxBytes(),
+                  "d_block.MaxBytes() == d.MaxBytes() must be true if "
+                  "d.MaxBytes() is less than or equal to 16");
+    static_assert(d.MaxBytes() < 16 || d_block.MaxBytes() == 16,
+                  "d_block.MaxBytes() == 16 must be true if d.MaxBytes() is "
+                  "greater than or equal to 16");
+    static_assert(
+        IsSame<Vec<decltype(d_block)>, decltype(ExtractBlock<0>(Zero(d)))>(),
+        "Vec<decltype(d_block)> should be the same vector type as "
+        "decltype(ExtractBlock<0>(Zero(d)))");
+    const size_t d_bytes = Lanes(d) * sizeof(T);
+    const size_t d_block_bytes = Lanes(d_block) * sizeof(T);
+    HWY_ASSERT(d_block_bytes >= 1);
+    HWY_ASSERT(d_block_bytes <= d_bytes);
+    HWY_ASSERT(d_block_bytes <= 16);
+    HWY_ASSERT(d_bytes > 16 || d_block_bytes == d_bytes);
+    HWY_ASSERT(d_bytes < 16 || d_block_bytes == 16);
+  }
+};
+
+HWY_NOINLINE void TestAllBlockDFromD() {
+  ForAllTypes(ForPartialVectors<TestBlockDFromD>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(HighwayTest);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCapped);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllMaxLanes);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllClamp);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsNaN);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsInf);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsFinite);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllDFromV);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlocks);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlockDFromD);
+}  // namespace hwy
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/hwy.version b/third-party/libjxl/libjxl/third_party/highway/hwy/hwy.version
new file mode 100644
index 0000000000..9ff6be6a2d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/hwy.version
@@ -0,0 +1,19 @@
+HWY_0 {
+  global:
+    extern "C++" {
+      *hwy::*;
+    };
+
+  local:
+    # Hide all the std namespace symbols. std namespace is explicitly marked
+    # as visibility(default) and header-only functions or methods (such as those
+    # from templates) should be exposed in shared libraries as weak symbols but
+    # this is only needed when we expose those types in the shared library API
+    # in any way. We don't use C++ std types in the API and we also don't
+    # support exceptions in the library.
+    # See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36022 for a discussion
+    # about this.
+    extern "C++" {
+      *std::*;
+    };
+};
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.cc
new file mode 100644
index 0000000000..ea5549f3d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.cc
@@ -0,0 +1,300 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/nanobenchmark.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>  // clock_gettime
+
+#include <algorithm>  // std::sort, std::find_if
+#include <numeric>    // std::iota
+#include <random>
+#include <vector>
+
+#include "hwy/robust_statistics.h"
+#include "hwy/timer-inl.h"
+#include "hwy/timer.h"
+
+namespace hwy {
+namespace {
+namespace timer = hwy::HWY_NAMESPACE::timer;
+
+static const timer::Ticks timer_resolution = platform::TimerResolution();
+
+// Estimates the expected value of "lambda" values with a variable number of
+// samples until the variability "rel_mad" is less than "max_rel_mad".
+template <class Lambda>
+timer::Ticks SampleUntilStable(const double max_rel_mad, double* rel_mad,
+                               const Params& p, const Lambda& lambda) {
+  // Choose initial samples_per_eval based on a single estimated duration.
+  timer::Ticks t0 = timer::Start();
+  lambda();
+  timer::Ticks t1 = timer::Stop();  // Caller checks HaveTimerStop
+  timer::Ticks est = t1 - t0;
+  static const double ticks_per_second = platform::InvariantTicksPerSecond();
+  const size_t ticks_per_eval =
+      static_cast<size_t>(ticks_per_second * p.seconds_per_eval);
+  size_t samples_per_eval = est == 0
+                                ? p.min_samples_per_eval
+                                : static_cast<size_t>(ticks_per_eval / est);
+  samples_per_eval = HWY_MAX(samples_per_eval, p.min_samples_per_eval);
+
+  std::vector<timer::Ticks> samples;
+  samples.reserve(1 + samples_per_eval);
+  samples.push_back(est);
+
+  // Percentage is too strict for tiny differences, so also allow a small
+  // absolute "median absolute deviation".
+  const timer::Ticks max_abs_mad = (timer_resolution + 99) / 100;
+  *rel_mad = 0.0;  // ensure initialized
+
+  for (size_t eval = 0; eval < p.max_evals; ++eval, samples_per_eval *= 2) {
+    samples.reserve(samples.size() + samples_per_eval);
+    for (size_t i = 0; i < samples_per_eval; ++i) {
+      t0 = timer::Start();
+      lambda();
+      t1 = timer::Stop();  // Caller checks HaveTimerStop
+      samples.push_back(t1 - t0);
+    }
+
+    if (samples.size() >= p.min_mode_samples) {
+      est = robust_statistics::Mode(samples.data(), samples.size());
+    } else {
+      // For "few" (depends also on the variance) samples, Median is safer.
+      est = robust_statistics::Median(samples.data(), samples.size());
+    }
+    NANOBENCHMARK_CHECK(est != 0);
+
+    // Median absolute deviation (mad) is a robust measure of 'variability'.
+    const timer::Ticks abs_mad = robust_statistics::MedianAbsoluteDeviation(
+        samples.data(), samples.size(), est);
+    *rel_mad = static_cast<double>(abs_mad) / static_cast<double>(est);
+
+    if (*rel_mad <= max_rel_mad || abs_mad <= max_abs_mad) {
+      if (p.verbose) {
+        printf("%6d samples => %5d (abs_mad=%4d, rel_mad=%4.2f%%)\n",
+               static_cast<int>(samples.size()), static_cast<int>(est),
+               static_cast<int>(abs_mad), *rel_mad * 100.0);
+      }
+      return est;
+    }
+  }
+
+  if (p.verbose) {
+    printf("WARNING: rel_mad=%4.2f%% still exceeds %4.2f%% after %6d samples\n",
+           *rel_mad * 100.0, max_rel_mad * 100.0,
+           static_cast<int>(samples.size()));
+  }
+  return est;
+}
+
+using InputVec = std::vector<FuncInput>;
+
+// Returns vector of unique input values.
+InputVec UniqueInputs(const FuncInput* inputs, const size_t num_inputs) {
+  InputVec unique(inputs, inputs + num_inputs);
+  std::sort(unique.begin(), unique.end());
+  unique.erase(std::unique(unique.begin(), unique.end()), unique.end());
+  return unique;
+}
+
+// Returns how often we need to call func for sufficient precision.
+size_t NumSkip(const Func func, const uint8_t* arg, const InputVec& unique,
+               const Params& p) {
+  // Min elapsed ticks for any input.
+  timer::Ticks min_duration = ~timer::Ticks(0);
+
+  for (const FuncInput input : unique) {
+    double rel_mad;
+    const timer::Ticks total = SampleUntilStable(
+        p.target_rel_mad, &rel_mad, p,
+        [func, arg, input]() { PreventElision(func(arg, input)); });
+    min_duration = HWY_MIN(min_duration, total - timer_resolution);
+  }
+
+  // Number of repetitions required to reach the target resolution.
+  const size_t max_skip = p.precision_divisor;
+  // Number of repetitions given the estimated duration.
+  const size_t num_skip =
+      min_duration == 0
+          ? 0
+          : static_cast<size_t>((max_skip + min_duration - 1) / min_duration);
+  if (p.verbose) {
+    printf("res=%d max_skip=%d min_dur=%d num_skip=%d\n",
+           static_cast<int>(timer_resolution), static_cast<int>(max_skip),
+           static_cast<int>(min_duration), static_cast<int>(num_skip));
+  }
+  return num_skip;
+}
+
+// Replicates inputs until we can omit "num_skip" occurrences of an input.
+InputVec ReplicateInputs(const FuncInput* inputs, const size_t num_inputs,
+                         const size_t num_unique, const size_t num_skip,
+                         const Params& p) {
+  InputVec full;
+  if (num_unique == 1) {
+    full.assign(p.subset_ratio * num_skip, inputs[0]);
+    return full;
+  }
+
+  full.reserve(p.subset_ratio * num_skip * num_inputs);
+  for (size_t i = 0; i < p.subset_ratio * num_skip; ++i) {
+    full.insert(full.end(), inputs, inputs + num_inputs);
+  }
+  std::mt19937 rng;
+  std::shuffle(full.begin(), full.end(), rng);
+  return full;
+}
+
+// Copies the "full" to "subset" in the same order, but with "num_skip"
+// randomly selected occurrences of "input_to_skip" removed.
+void FillSubset(const InputVec& full, const FuncInput input_to_skip,
+                const size_t num_skip, InputVec* subset) {
+  const size_t count =
+      static_cast<size_t>(std::count(full.begin(), full.end(), input_to_skip));
+  // Generate num_skip random indices: which occurrence to skip.
+  std::vector<uint32_t> omit(count);
+  std::iota(omit.begin(), omit.end(), 0);
+  // omit[] is the same on every call, but that's OK because they identify the
+  // Nth instance of input_to_skip, so the position within full[] differs.
+  std::mt19937 rng;
+  std::shuffle(omit.begin(), omit.end(), rng);
+  omit.resize(num_skip);
+  std::sort(omit.begin(), omit.end());
+
+  uint32_t occurrence = ~0u;  // 0 after preincrement
+  size_t idx_omit = 0;        // cursor within omit[]
+  size_t idx_subset = 0;      // cursor within *subset
+  for (const FuncInput next : full) {
+    if (next == input_to_skip) {
+      ++occurrence;
+      // Haven't removed enough already
+      if (idx_omit < num_skip) {
+        // This one is up for removal
+        if (occurrence == omit[idx_omit]) {
+          ++idx_omit;
+          continue;
+        }
+      }
+    }
+    if (idx_subset < subset->size()) {
+      (*subset)[idx_subset++] = next;
+    }
+  }
+  NANOBENCHMARK_CHECK(idx_subset == subset->size());
+  NANOBENCHMARK_CHECK(idx_omit == omit.size());
+  NANOBENCHMARK_CHECK(occurrence == count - 1);
+}
+
+// Returns total ticks elapsed for all inputs.
+timer::Ticks TotalDuration(const Func func, const uint8_t* arg,
+                           const InputVec* inputs, const Params& p,
+                           double* max_rel_mad) {
+  double rel_mad;
+  const timer::Ticks duration =
+      SampleUntilStable(p.target_rel_mad, &rel_mad, p, [func, arg, inputs]() {
+        for (const FuncInput input : *inputs) {
+          PreventElision(func(arg, input));
+        }
+      });
+  *max_rel_mad = HWY_MAX(*max_rel_mad, rel_mad);
+  return duration;
+}
+
+// (Nearly) empty Func for measuring timer overhead/resolution.
+HWY_NOINLINE FuncOutput EmptyFunc(const void* /*arg*/, const FuncInput input) {
+  return input;
+}
+
+// Returns overhead of accessing inputs[] and calling a function; this will
+// be deducted from future TotalDuration return values.
+timer::Ticks Overhead(const uint8_t* arg, const InputVec* inputs,
+                      const Params& p) {
+  double rel_mad;
+  // Zero tolerance because repeatability is crucial and EmptyFunc is fast.
+  return SampleUntilStable(0.0, &rel_mad, p, [arg, inputs]() {
+    for (const FuncInput input : *inputs) {
+      PreventElision(EmptyFunc(arg, input));
+    }
+  });
+}
+
+}  // namespace
+
+HWY_DLLEXPORT int Unpredictable1() { return timer::Start() != ~0ULL; }
+
+HWY_DLLEXPORT size_t Measure(const Func func, const uint8_t* arg,
+                             const FuncInput* inputs, const size_t num_inputs,
+                             Result* results, const Params& p) {
+  NANOBENCHMARK_CHECK(num_inputs != 0);
+
+  char cpu100[100];
+  if (!platform::HaveTimerStop(cpu100)) {
+    fprintf(stderr, "CPU '%s' does not support RDTSCP, skipping benchmark.\n",
+            cpu100);
+    return 0;
+  }
+
+  const InputVec& unique = UniqueInputs(inputs, num_inputs);
+
+  const size_t num_skip = NumSkip(func, arg, unique, p);  // never 0
+  if (num_skip == 0) return 0;  // NumSkip already printed error message
+  // (slightly less work on x86 to cast from signed integer)
+  const float mul = 1.0f / static_cast<float>(static_cast<int>(num_skip));
+
+  const InputVec& full =
+      ReplicateInputs(inputs, num_inputs, unique.size(), num_skip, p);
+  InputVec subset(full.size() - num_skip);
+
+  const timer::Ticks overhead = Overhead(arg, &full, p);
+  const timer::Ticks overhead_skip = Overhead(arg, &subset, p);
+  if (overhead < overhead_skip) {
+    fprintf(stderr, "Measurement failed: overhead %d < %d\n",
+            static_cast<int>(overhead), static_cast<int>(overhead_skip));
+    return 0;
+  }
+
+  if (p.verbose) {
+    printf("#inputs=%5d,%5d overhead=%5d,%5d\n", static_cast<int>(full.size()),
+           static_cast<int>(subset.size()), static_cast<int>(overhead),
+           static_cast<int>(overhead_skip));
+  }
+
+  double max_rel_mad = 0.0;
+  const timer::Ticks total = TotalDuration(func, arg, &full, p, &max_rel_mad);
+
+  for (size_t i = 0; i < unique.size(); ++i) {
+    FillSubset(full, unique[i], num_skip, &subset);
+    const timer::Ticks total_skip =
+        TotalDuration(func, arg, &subset, p, &max_rel_mad);
+
+    if (total < total_skip) {
+      fprintf(stderr, "Measurement failed: total %f < %f\n",
+              static_cast<double>(total), static_cast<double>(total_skip));
+      return 0;
+    }
+
+    const timer::Ticks duration =
+        (total - overhead) - (total_skip - overhead_skip);
+    results[i].input = unique[i];
+    results[i].ticks = static_cast<float>(duration) * mul;
+    results[i].variability = static_cast<float>(max_rel_mad);
+  }
+
+  return unique.size();
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.h b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.h
new file mode 100644
index 0000000000..46bfc4b0a8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark.h
@@ -0,0 +1,171 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_NANOBENCHMARK_H_
+#define HIGHWAY_HWY_NANOBENCHMARK_H_
+
+// Benchmarks functions of a single integer argument with realistic branch
+// prediction hit rates. Uses a robust estimator to summarize the measurements.
+// The precision is about 0.2%.
+//
+// Examples: see nanobenchmark_test.cc.
+//
+// Background: Microbenchmarks such as http://github.com/google/benchmark
+// can measure elapsed times on the order of a microsecond. Shorter functions
+// are typically measured by repeating them thousands of times and dividing
+// the total elapsed time by this count. Unfortunately, repetition (especially
+// with the same input parameter!) influences the runtime. In time-critical
+// code, it is reasonable to expect warm instruction/data caches and TLBs,
+// but a perfect record of which branches will be taken is unrealistic.
+// Unless the application also repeatedly invokes the measured function with
+// the same parameter, the benchmark is measuring something very different -
+// a best-case result, almost as if the parameter were made a compile-time
+// constant. This may lead to erroneous conclusions about branch-heavy
+// algorithms outperforming branch-free alternatives.
+//
+// Our approach differs in three ways. Adding fences to the timer functions
+// reduces variability due to instruction reordering, improving the timer
+// resolution to about 40 CPU cycles. However, shorter functions must still
+// be invoked repeatedly. For more realistic branch prediction performance,
+// we vary the input parameter according to a user-specified distribution.
+// Thus, instead of VaryInputs(Measure(Repeat(func))), we change the
+// loop nesting to Measure(Repeat(VaryInputs(func))). We also estimate the
+// central tendency of the measurement samples with the "half sample mode",
+// which is more robust to outliers and skewed data than the mean or median.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/highway_export.h"
+#include "hwy/timer.h"
+
+// Enables sanity checks that verify correct operation at the cost of
+// longer benchmark runs.
+#ifndef NANOBENCHMARK_ENABLE_CHECKS
+#define NANOBENCHMARK_ENABLE_CHECKS 0
+#endif
+
+#define NANOBENCHMARK_CHECK_ALWAYS(condition)                             \
+  while (!(condition)) {                                                  \
+    fprintf(stderr, "Nanobenchmark check failed at line %d\n", __LINE__); \
+    abort();                                                              \
+  }
+
+#if NANOBENCHMARK_ENABLE_CHECKS
+#define NANOBENCHMARK_CHECK(condition) NANOBENCHMARK_CHECK_ALWAYS(condition)
+#else
+#define NANOBENCHMARK_CHECK(condition)
+#endif
+
+namespace hwy {
+
+// Returns 1, but without the compiler knowing what the value is. This prevents
+// optimizing out code.
+HWY_DLLEXPORT int Unpredictable1();
+
+// Input influencing the function being measured (e.g. number of bytes to copy).
+using FuncInput = size_t;
+
+// "Proof of work" returned by Func to ensure the compiler does not elide it.
+using FuncOutput = uint64_t;
+
+// Function to measure: either 1) a captureless lambda or function with two
+// arguments or 2) a lambda with capture, in which case the first argument
+// is reserved for use by MeasureClosure.
+using Func = FuncOutput (*)(const void*, FuncInput);
+
+// Internal parameters that determine precision/resolution/measuring time.
+struct Params {
+  // Best-case precision, expressed as a divisor of the timer resolution.
+  // Larger => more calls to Func and higher precision.
+  size_t precision_divisor = 1024;
+
+  // Ratio between full and subset input distribution sizes. Cannot be less
+  // than 2; larger values increase measurement time but more faithfully
+  // model the given input distribution.
+  size_t subset_ratio = 2;
+
+  // Together with the estimated Func duration, determines how many times to
+  // call Func before checking the sample variability. Larger values increase
+  // measurement time, memory/cache use and precision.
+  double seconds_per_eval = 4E-3;
+
+  // The minimum number of samples before estimating the central tendency.
+  size_t min_samples_per_eval = 7;
+
+  // The mode is better than median for estimating the central tendency of
+  // skewed/fat-tailed distributions, but it requires sufficient samples
+  // relative to the width of half-ranges.
+  size_t min_mode_samples = 64;
+
+  // Maximum permissible variability (= median absolute deviation / center).
+  double target_rel_mad = 0.002;
+
+  // Abort after this many evals without reaching target_rel_mad. This
+  // prevents infinite loops.
+  size_t max_evals = 9;
+
+  // Whether to print additional statistics to stdout.
+  bool verbose = true;
+};
+
+// Measurement result for each unique input.
+struct Result {
+  FuncInput input;
+
+  // Robust estimate (mode or median) of duration.
+  float ticks;
+
+  // Measure of variability (median absolute deviation relative to "ticks").
+  float variability;
+};
+
+// Precisely measures the number of ticks elapsed when calling "func" with the
+// given inputs, shuffled to ensure realistic branch prediction hit rates.
+//
+// "func" returns a 'proof of work' to ensure its computations are not elided.
+// "arg" is passed to Func, or reserved for internal use by MeasureClosure.
+// "inputs" is an array of "num_inputs" (not necessarily unique) arguments to
+//   "func". The values should be chosen to maximize coverage of "func". This
+//   represents a distribution, so a value's frequency should reflect its
+//   probability in the real application. Order does not matter; for example, a
+//   uniform distribution over [0, 4) could be represented as {3,0,2,1}.
+// Returns how many Result were written to "results": one per unique input, or
+//   zero if the measurement failed (an error message goes to stderr).
+HWY_DLLEXPORT size_t Measure(Func func, const uint8_t* arg,
+                             const FuncInput* inputs, size_t num_inputs,
+                             Result* results, const Params& p = Params());
+
+// Calls operator() of the given closure (lambda function).
+template <class Closure>
+static FuncOutput CallClosure(const Closure* f, const FuncInput input) {
+  return (*f)(input);
+}
+
+// Same as Measure, except "closure" is typically a lambda function of
+// FuncInput -> FuncOutput with a capture list.
+template <class Closure>
+static inline size_t MeasureClosure(const Closure& closure,
+                                    const FuncInput* inputs,
+                                    const size_t num_inputs, Result* results,
+                                    const Params& p = Params()) {
+  return Measure(reinterpret_cast<Func>(&CallClosure<Closure>),
+                 reinterpret_cast<const uint8_t*>(&closure), inputs, num_inputs,
+                 results, p);
+}
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_NANOBENCHMARK_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark_test.cc
new file mode 100644
index 0000000000..2a10c68a19
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/nanobenchmark_test.cc
@@ -0,0 +1,90 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/nanobenchmark.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <random>
+
+#include "hwy/tests/test_util-inl.h"
+
+namespace hwy {
+namespace {
+
+// Governs duration of test; avoid timeout in debug builds.
+#if HWY_IS_DEBUG_BUILD
+constexpr size_t kMaxEvals = 3;
+#else
+constexpr size_t kMaxEvals = 4;
+#endif
+
+FuncOutput Div(const void*, FuncInput in) {
+  // Here we're measuring the throughput because benchmark invocations are
+  // independent. Any dividend will do; the divisor is nonzero.
+  return 0xFFFFF / in;
+}
+
+template <size_t N>
+void MeasureDiv(const FuncInput (&inputs)[N]) {
+  printf("Measuring integer division (output on final two lines)\n");
+  Result results[N];
+  Params params;
+  params.max_evals = kMaxEvals;
+  const size_t num_results = Measure(&Div, nullptr, inputs, N, results, params);
+  for (size_t i = 0; i < num_results; ++i) {
+    printf("%5d: %6.2f ticks; MAD=%4.2f%%\n",
+           static_cast<int>(results[i].input), results[i].ticks,
+           results[i].variability * 100.0);
+  }
+}
+
+std::mt19937 rng;
+
+// A function whose runtime depends on rng.
+FuncOutput Random(const void* /*arg*/, FuncInput in) {
+  const size_t r = rng() & 0xF;
+  FuncOutput ret = static_cast<FuncOutput>(in);
+  for (size_t i = 0; i < r; ++i) {
+    ret /= ((rng() & 1) + 2);
+  }
+  return ret;
+}
+
+// Ensure the measured variability is high.
+template <size_t N>
+void MeasureRandom(const FuncInput (&inputs)[N]) {
+  Result results[N];
+  Params p;
+  p.max_evals = kMaxEvals;
+  p.verbose = false;
+  const size_t num_results = Measure(&Random, nullptr, inputs, N, results, p);
+  for (size_t i = 0; i < num_results; ++i) {
+    NANOBENCHMARK_CHECK(results[i].variability > 1E-3);
+  }
+}
+
+TEST(NanobenchmarkTest, RunAll) {
+  const int unpredictable = Unpredictable1();  // == 1, unknown to compiler.
+  static const FuncInput inputs[] = {static_cast<FuncInput>(unpredictable) + 2,
+                                     static_cast<FuncInput>(unpredictable + 9)};
+
+  MeasureDiv(inputs);
+  MeasureRandom(inputs);
+}
+
+}  // namespace
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_neon-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_neon-inl.h
new file mode 100644
index 0000000000..4dbdf64cf8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_neon-inl.h
@@ -0,0 +1,8625 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit Arm NEON vectors and operations.
+// External include guard in highway.h - see comment there.
+
+// Arm NEON intrinsics are documented at:
+// https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]
+
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+
+// Must come after HWY_BEFORE_NAMESPACE so that the intrinsics are compiled with
+// the same target attribute as our code, see #834.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
+#include <arm_neon.h>  // NOLINT(build/include_order)
+HWY_DIAGNOSTICS(pop)
+
+// Must come after arm_neon.h.
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+namespace detail {  // for code folding and Raw128
+
+// Macros used to define single and double function calls for multiple types
+// for full and half vectors. These macros are undefined at the end of the file.
+
+// HWY_NEON_BUILD_TPL_* is the template<...> prefix to the function.
+#define HWY_NEON_BUILD_TPL_1
+#define HWY_NEON_BUILD_TPL_2
+#define HWY_NEON_BUILD_TPL_3
+
+// HWY_NEON_BUILD_RET_* is return type; type arg is without _t suffix so we can
+// extend it to int32x4x2_t packs.
+#define HWY_NEON_BUILD_RET_1(type, size) Vec128<type##_t, size>
+#define HWY_NEON_BUILD_RET_2(type, size) Vec128<type##_t, size>
+#define HWY_NEON_BUILD_RET_3(type, size) Vec128<type##_t, size>
+
+// HWY_NEON_BUILD_PARAM_* is the list of parameters the function receives.
+#define HWY_NEON_BUILD_PARAM_1(type, size) const Vec128<type##_t, size> a
+#define HWY_NEON_BUILD_PARAM_2(type, size) \
+  const Vec128<type##_t, size> a, const Vec128<type##_t, size> b
+#define HWY_NEON_BUILD_PARAM_3(type, size)                        \
+  const Vec128<type##_t, size> a, const Vec128<type##_t, size> b, \
+      const Vec128<type##_t, size> c
+
+// HWY_NEON_BUILD_ARG_* is the list of arguments passed to the underlying
+// function.
+#define HWY_NEON_BUILD_ARG_1 a.raw
+#define HWY_NEON_BUILD_ARG_2 a.raw, b.raw
+#define HWY_NEON_BUILD_ARG_3 a.raw, b.raw, c.raw
+
+// We use HWY_NEON_EVAL(func, ...) to delay the evaluation of func until after
+// the __VA_ARGS__ have been expanded. This allows "func" to be a macro on
+// itself like with some of the library "functions" such as vshlq_u8. For
+// example, HWY_NEON_EVAL(vshlq_u8, MY_PARAMS) where MY_PARAMS is defined as
+// "a, b" (without the quotes) will end up expanding "vshlq_u8(a, b)" if needed.
+// Directly writing vshlq_u8(MY_PARAMS) would fail since vshlq_u8() macro
+// expects two arguments.
+#define HWY_NEON_EVAL(func, ...) func(__VA_ARGS__)
+
+// Main macro definition that defines a single function for the given type and
+// size of vector, using the underlying (prefix##infix##suffix) function and
+// the template, return type, parameters and arguments defined by the "args"
+// parameters passed here (see HWY_NEON_BUILD_* macros defined before).
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args) \
+  HWY_CONCAT(HWY_NEON_BUILD_TPL_, args)                                      \
+  HWY_API HWY_CONCAT(HWY_NEON_BUILD_RET_, args)(type, size)                  \
+      name(HWY_CONCAT(HWY_NEON_BUILD_PARAM_, args)(type, size)) {            \
+    return HWY_CONCAT(HWY_NEON_BUILD_RET_, args)(type, size)(                \
+        HWY_NEON_EVAL(prefix##infix##suffix, HWY_NEON_BUILD_ARG_##args));    \
+  }
+
+// The HWY_NEON_DEF_FUNCTION_* macros define all the variants of a function
+// called "name" using the set of neon functions starting with the given
+// "prefix" for all the variants of certain types, as specified next to each
+// macro. For example, the prefix "vsub" can be used to define the operator-
+// using args=2.
+
+// uint8_t
+#define HWY_NEON_DEF_FUNCTION_UINT_8(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(uint8, 16, name, prefix##q, infix, u8, args) \
+  HWY_NEON_DEF_FUNCTION(uint8, 8, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8, 4, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8, 2, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8, 1, name, prefix, infix, u8, args)
+
+// int8_t
+#define HWY_NEON_DEF_FUNCTION_INT_8(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(int8, 16, name, prefix##q, infix, s8, args) \
+  HWY_NEON_DEF_FUNCTION(int8, 8, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8, 4, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8, 2, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8, 1, name, prefix, infix, s8, args)
+
+// uint16_t
+#define HWY_NEON_DEF_FUNCTION_UINT_16(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(uint16, 8, name, prefix##q, infix, u16, args) \
+  HWY_NEON_DEF_FUNCTION(uint16, 4, name, prefix, infix, u16, args)    \
+  HWY_NEON_DEF_FUNCTION(uint16, 2, name, prefix, infix, u16, args)    \
+  HWY_NEON_DEF_FUNCTION(uint16, 1, name, prefix, infix, u16, args)
+
+// int16_t
+#define HWY_NEON_DEF_FUNCTION_INT_16(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(int16, 8, name, prefix##q, infix, s16, args) \
+  HWY_NEON_DEF_FUNCTION(int16, 4, name, prefix, infix, s16, args)    \
+  HWY_NEON_DEF_FUNCTION(int16, 2, name, prefix, infix, s16, args)    \
+  HWY_NEON_DEF_FUNCTION(int16, 1, name, prefix, infix, s16, args)
+
+// uint32_t
+#define HWY_NEON_DEF_FUNCTION_UINT_32(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(uint32, 4, name, prefix##q, infix, u32, args) \
+  HWY_NEON_DEF_FUNCTION(uint32, 2, name, prefix, infix, u32, args)    \
+  HWY_NEON_DEF_FUNCTION(uint32, 1, name, prefix, infix, u32, args)
+
+// int32_t
+#define HWY_NEON_DEF_FUNCTION_INT_32(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(int32, 4, name, prefix##q, infix, s32, args) \
+  HWY_NEON_DEF_FUNCTION(int32, 2, name, prefix, infix, s32, args)    \
+  HWY_NEON_DEF_FUNCTION(int32, 1, name, prefix, infix, s32, args)
+
+// uint64_t
+#define HWY_NEON_DEF_FUNCTION_UINT_64(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(uint64, 2, name, prefix##q, infix, u64, args) \
+  HWY_NEON_DEF_FUNCTION(uint64, 1, name, prefix, infix, u64, args)
+
+// int64_t
+#define HWY_NEON_DEF_FUNCTION_INT_64(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(int64, 2, name, prefix##q, infix, s64, args) \
+  HWY_NEON_DEF_FUNCTION(int64, 1, name, prefix, infix, s64, args)
+
+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
+#define HWY_NEON_HAVE_BFLOAT16 1
+#else
+#define HWY_NEON_HAVE_BFLOAT16 0
+#endif
+
+// bfloat16_t
+#if HWY_NEON_HAVE_BFLOAT16
+#define HWY_NEON_DEF_FUNCTION_BFLOAT_16(name, prefix, infix, args)       \
+  HWY_NEON_DEF_FUNCTION(bfloat16, 8, name, prefix##q, infix, bf16, args) \
+  HWY_NEON_DEF_FUNCTION(bfloat16, 4, name, prefix, infix, bf16, args)    \
+  HWY_NEON_DEF_FUNCTION(bfloat16, 2, name, prefix, infix, bf16, args)    \
+  HWY_NEON_DEF_FUNCTION(bfloat16, 1, name, prefix, infix, bf16, args)
+#else
+#define HWY_NEON_DEF_FUNCTION_BFLOAT_16(name, prefix, infix, args)
+#endif
+
+// Used for conversion instructions if HWY_NEON_HAVE_FLOAT16C.
+#define HWY_NEON_DEF_FUNCTION_FLOAT_16_UNCONDITIONAL(name, prefix, infix, \
+                                                     args)                \
+  HWY_NEON_DEF_FUNCTION(float16, 8, name, prefix##q, infix, f16, args)    \
+  HWY_NEON_DEF_FUNCTION(float16, 4, name, prefix, infix, f16, args)       \
+  HWY_NEON_DEF_FUNCTION(float16, 2, name, prefix, infix, f16, args)       \
+  HWY_NEON_DEF_FUNCTION(float16, 1, name, prefix, infix, f16, args)
+
+// float16_t
+#if HWY_HAVE_FLOAT16
+#define HWY_NEON_DEF_FUNCTION_FLOAT_16(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16_UNCONDITIONAL(name, prefix, infix, args)
+#else
+#define HWY_NEON_DEF_FUNCTION_FLOAT_16(name, prefix, infix, args)
+#endif
+
+// float
+#define HWY_NEON_DEF_FUNCTION_FLOAT_32(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(float32, 4, name, prefix##q, infix, f32, args) \
+  HWY_NEON_DEF_FUNCTION(float32, 2, name, prefix, infix, f32, args)    \
+  HWY_NEON_DEF_FUNCTION(float32, 1, name, prefix, infix, f32, args)
+
+// double
+#if HWY_HAVE_FLOAT64
+#define HWY_NEON_DEF_FUNCTION_FLOAT_64(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(float64, 2, name, prefix##q, infix, f64, args) \
+  HWY_NEON_DEF_FUNCTION(float64, 1, name, prefix, infix, f64, args)
+#else
+#define HWY_NEON_DEF_FUNCTION_FLOAT_64(name, prefix, infix, args)
+#endif
+
+// Helper macros to define for more than one type.
+// uint8_t, uint16_t and uint32_t
+#define HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_8(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_UINT_16(name, prefix, infix, args)            \
+  HWY_NEON_DEF_FUNCTION_UINT_32(name, prefix, infix, args)
+
+// int8_t, int16_t and int32_t
+#define HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_8(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_INT_16(name, prefix, infix, args)            \
+  HWY_NEON_DEF_FUNCTION_INT_32(name, prefix, infix, args)
+
+// uint8_t, uint16_t, uint32_t and uint64_t
+#define HWY_NEON_DEF_FUNCTION_UINTS(name, prefix, infix, args)  \
+  HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_64(name, prefix, infix, args)
+
+// int8_t, int16_t, int32_t and int64_t
+#define HWY_NEON_DEF_FUNCTION_INTS(name, prefix, infix, args)  \
+  HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_64(name, prefix, infix, args)
+
+// All int*_t and uint*_t up to 64
+#define HWY_NEON_DEF_FUNCTION_INTS_UINTS(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INTS(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_UINTS(name, prefix, infix, args)
+
+#define HWY_NEON_DEF_FUNCTION_FLOAT_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16(name, prefix, infix, args)          \
+  HWY_NEON_DEF_FUNCTION_FLOAT_32(name, prefix, infix, args)
+
+#define HWY_NEON_DEF_FUNCTION_ALL_FLOATS(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16_32(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION_FLOAT_64(name, prefix, infix, args)
+
+// All previous types.
+#define HWY_NEON_DEF_FUNCTION_ALL_TYPES(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INTS_UINTS(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION_ALL_FLOATS(name, prefix, infix, args)
+
+#define HWY_NEON_DEF_FUNCTION_UI_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args)     \
+  HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args)
+
+#define HWY_NEON_DEF_FUNCTION_UIF_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UI_8_16_32(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16_32(name, prefix, infix, args)
+
+#define HWY_NEON_DEF_FUNCTION_UIF_64(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_64(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION_INT_64(name, prefix, infix, args)       \
+  HWY_NEON_DEF_FUNCTION_FLOAT_64(name, prefix, infix, args)
+
+// For vzip1/2
+#define HWY_NEON_DEF_FUNCTION_FULL_UI_64(name, prefix, infix, args)   \
+  HWY_NEON_DEF_FUNCTION(uint64, 2, name, prefix##q, infix, u64, args) \
+  HWY_NEON_DEF_FUNCTION(int64, 2, name, prefix##q, infix, s64, args)
+#define HWY_NEON_DEF_FUNCTION_FULL_UIF_64(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_FULL_UI_64(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(float64, 2, name, prefix##q, infix, f64, args)
+
+// For eor3q, which is only defined for full vectors.
+#define HWY_NEON_DEF_FUNCTION_FULL_UI(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION(uint8, 16, name, prefix##q, infix, u8, args)  \
+  HWY_NEON_DEF_FUNCTION(uint16, 8, name, prefix##q, infix, u16, args) \
+  HWY_NEON_DEF_FUNCTION(uint32, 4, name, prefix##q, infix, u32, args) \
+  HWY_NEON_DEF_FUNCTION(int8, 16, name, prefix##q, infix, s8, args)   \
+  HWY_NEON_DEF_FUNCTION(int16, 8, name, prefix##q, infix, s16, args)  \
+  HWY_NEON_DEF_FUNCTION(int32, 4, name, prefix##q, infix, s32, args)  \
+  HWY_NEON_DEF_FUNCTION_FULL_UI_64(name, prefix, infix, args)
+// Emulation of some intrinsics on armv7.
+#if HWY_ARCH_ARM_V7
+#define vuzp1_s8(x, y) vuzp_s8(x, y).val[0]
+#define vuzp1_u8(x, y) vuzp_u8(x, y).val[0]
+#define vuzp1_s16(x, y) vuzp_s16(x, y).val[0]
+#define vuzp1_u16(x, y) vuzp_u16(x, y).val[0]
+#define vuzp1_s32(x, y) vuzp_s32(x, y).val[0]
+#define vuzp1_u32(x, y) vuzp_u32(x, y).val[0]
+#define vuzp1_f32(x, y) vuzp_f32(x, y).val[0]
+#define vuzp1q_s8(x, y) vuzpq_s8(x, y).val[0]
+#define vuzp1q_u8(x, y) vuzpq_u8(x, y).val[0]
+#define vuzp1q_s16(x, y) vuzpq_s16(x, y).val[0]
+#define vuzp1q_u16(x, y) vuzpq_u16(x, y).val[0]
+#define vuzp1q_s32(x, y) vuzpq_s32(x, y).val[0]
+#define vuzp1q_u32(x, y) vuzpq_u32(x, y).val[0]
+#define vuzp1q_f32(x, y) vuzpq_f32(x, y).val[0]
+#define vuzp2_s8(x, y) vuzp_s8(x, y).val[1]
+#define vuzp2_u8(x, y) vuzp_u8(x, y).val[1]
+#define vuzp2_s16(x, y) vuzp_s16(x, y).val[1]
+#define vuzp2_u16(x, y) vuzp_u16(x, y).val[1]
+#define vuzp2_s32(x, y) vuzp_s32(x, y).val[1]
+#define vuzp2_u32(x, y) vuzp_u32(x, y).val[1]
+#define vuzp2_f32(x, y) vuzp_f32(x, y).val[1]
+#define vuzp2q_s8(x, y) vuzpq_s8(x, y).val[1]
+#define vuzp2q_u8(x, y) vuzpq_u8(x, y).val[1]
+#define vuzp2q_s16(x, y) vuzpq_s16(x, y).val[1]
+#define vuzp2q_u16(x, y) vuzpq_u16(x, y).val[1]
+#define vuzp2q_s32(x, y) vuzpq_s32(x, y).val[1]
+#define vuzp2q_u32(x, y) vuzpq_u32(x, y).val[1]
+#define vuzp2q_f32(x, y) vuzpq_f32(x, y).val[1]
+#define vzip1_s8(x, y) vzip_s8(x, y).val[0]
+#define vzip1_u8(x, y) vzip_u8(x, y).val[0]
+#define vzip1_s16(x, y) vzip_s16(x, y).val[0]
+#define vzip1_u16(x, y) vzip_u16(x, y).val[0]
+#define vzip1_f32(x, y) vzip_f32(x, y).val[0]
+#define vzip1_u32(x, y) vzip_u32(x, y).val[0]
+#define vzip1_s32(x, y) vzip_s32(x, y).val[0]
+#define vzip1q_s8(x, y) vzipq_s8(x, y).val[0]
+#define vzip1q_u8(x, y) vzipq_u8(x, y).val[0]
+#define vzip1q_s16(x, y) vzipq_s16(x, y).val[0]
+#define vzip1q_u16(x, y) vzipq_u16(x, y).val[0]
+#define vzip1q_s32(x, y) vzipq_s32(x, y).val[0]
+#define vzip1q_u32(x, y) vzipq_u32(x, y).val[0]
+#define vzip1q_f32(x, y) vzipq_f32(x, y).val[0]
+#define vzip2_s8(x, y) vzip_s8(x, y).val[1]
+#define vzip2_u8(x, y) vzip_u8(x, y).val[1]
+#define vzip2_s16(x, y) vzip_s16(x, y).val[1]
+#define vzip2_u16(x, y) vzip_u16(x, y).val[1]
+#define vzip2_s32(x, y) vzip_s32(x, y).val[1]
+#define vzip2_u32(x, y) vzip_u32(x, y).val[1]
+#define vzip2_f32(x, y) vzip_f32(x, y).val[1]
+#define vzip2q_s8(x, y) vzipq_s8(x, y).val[1]
+#define vzip2q_u8(x, y) vzipq_u8(x, y).val[1]
+#define vzip2q_s16(x, y) vzipq_s16(x, y).val[1]
+#define vzip2q_u16(x, y) vzipq_u16(x, y).val[1]
+#define vzip2q_s32(x, y) vzipq_s32(x, y).val[1]
+#define vzip2q_u32(x, y) vzipq_u32(x, y).val[1]
+#define vzip2q_f32(x, y) vzipq_f32(x, y).val[1]
+#endif
+
+// Wrappers over uint8x16x2_t etc. so we can define StoreInterleaved2
+// overloads for all vector types, even those (bfloat16_t) where the
+// underlying vector is the same as others (uint16_t).
+template <typename T, size_t N>
+struct Tuple2;
+template <typename T, size_t N>
+struct Tuple3;
+template <typename T, size_t N>
+struct Tuple4;
+
+template <>
+struct Tuple2<uint8_t, 16> {
+  uint8x16x2_t raw;
+};
+template <size_t N>
+struct Tuple2<uint8_t, N> {
+  uint8x8x2_t raw;
+};
+template <>
+struct Tuple2<int8_t, 16> {
+  int8x16x2_t raw;
+};
+template <size_t N>
+struct Tuple2<int8_t, N> {
+  int8x8x2_t raw;
+};
+template <>
+struct Tuple2<uint16_t, 8> {
+  uint16x8x2_t raw;
+};
+template <size_t N>
+struct Tuple2<uint16_t, N> {
+  uint16x4x2_t raw;
+};
+template <>
+struct Tuple2<int16_t, 8> {
+  int16x8x2_t raw;
+};
+template <size_t N>
+struct Tuple2<int16_t, N> {
+  int16x4x2_t raw;
+};
+template <>
+struct Tuple2<uint32_t, 4> {
+  uint32x4x2_t raw;
+};
+template <size_t N>
+struct Tuple2<uint32_t, N> {
+  uint32x2x2_t raw;
+};
+template <>
+struct Tuple2<int32_t, 4> {
+  int32x4x2_t raw;
+};
+template <size_t N>
+struct Tuple2<int32_t, N> {
+  int32x2x2_t raw;
+};
+template <>
+struct Tuple2<uint64_t, 2> {
+  uint64x2x2_t raw;
+};
+template <size_t N>
+struct Tuple2<uint64_t, N> {
+  uint64x1x2_t raw;
+};
+template <>
+struct Tuple2<int64_t, 2> {
+  int64x2x2_t raw;
+};
+template <size_t N>
+struct Tuple2<int64_t, N> {
+  int64x1x2_t raw;
+};
+
+template <>
+struct Tuple2<float16_t, 8> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x8x2_t raw;
+#else
+  uint16x8x2_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple2<float16_t, N> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x4x2_t raw;
+#else
+  uint16x4x2_t raw;
+#endif
+};
+template <>
+struct Tuple2<bfloat16_t, 8> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x8x2_t raw;
+#else
+  uint16x8x2_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple2<bfloat16_t, N> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x4x2_t raw;
+#else
+  uint16x4x2_t raw;
+#endif
+};
+
+template <>
+struct Tuple2<float32_t, 4> {
+  float32x4x2_t raw;
+};
+template <size_t N>
+struct Tuple2<float32_t, N> {
+  float32x2x2_t raw;
+};
+#if HWY_HAVE_FLOAT64
+template <>
+struct Tuple2<float64_t, 2> {
+  float64x2x2_t raw;
+};
+template <size_t N>
+struct Tuple2<float64_t, N> {
+  float64x1x2_t raw;
+};
+#endif  // HWY_HAVE_FLOAT64
+
+template <>
+struct Tuple3<uint8_t, 16> {
+  uint8x16x3_t raw;
+};
+template <size_t N>
+struct Tuple3<uint8_t, N> {
+  uint8x8x3_t raw;
+};
+template <>
+struct Tuple3<int8_t, 16> {
+  int8x16x3_t raw;
+};
+template <size_t N>
+struct Tuple3<int8_t, N> {
+  int8x8x3_t raw;
+};
+template <>
+struct Tuple3<uint16_t, 8> {
+  uint16x8x3_t raw;
+};
+template <size_t N>
+struct Tuple3<uint16_t, N> {
+  uint16x4x3_t raw;
+};
+template <>
+struct Tuple3<int16_t, 8> {
+  int16x8x3_t raw;
+};
+template <size_t N>
+struct Tuple3<int16_t, N> {
+  int16x4x3_t raw;
+};
+template <>
+struct Tuple3<uint32_t, 4> {
+  uint32x4x3_t raw;
+};
+template <size_t N>
+struct Tuple3<uint32_t, N> {
+  uint32x2x3_t raw;
+};
+template <>
+struct Tuple3<int32_t, 4> {
+  int32x4x3_t raw;
+};
+template <size_t N>
+struct Tuple3<int32_t, N> {
+  int32x2x3_t raw;
+};
+template <>
+struct Tuple3<uint64_t, 2> {
+  uint64x2x3_t raw;
+};
+template <size_t N>
+struct Tuple3<uint64_t, N> {
+  uint64x1x3_t raw;
+};
+template <>
+struct Tuple3<int64_t, 2> {
+  int64x2x3_t raw;
+};
+template <size_t N>
+struct Tuple3<int64_t, N> {
+  int64x1x3_t raw;
+};
+
+template <>
+struct Tuple3<float16_t, 8> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x8x3_t raw;
+#else
+  uint16x8x3_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple3<float16_t, N> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x4x3_t raw;
+#else
+  uint16x4x3_t raw;
+#endif
+};
+template <>
+struct Tuple3<bfloat16_t, 8> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x8x3_t raw;
+#else
+  uint16x8x3_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple3<bfloat16_t, N> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x4x3_t raw;
+#else
+  uint16x4x3_t raw;
+#endif
+};
+
+template <>
+struct Tuple3<float32_t, 4> {
+  float32x4x3_t raw;
+};
+template <size_t N>
+struct Tuple3<float32_t, N> {
+  float32x2x3_t raw;
+};
+#if HWY_HAVE_FLOAT64
+template <>
+struct Tuple3<float64_t, 2> {
+  float64x2x3_t raw;
+};
+template <size_t N>
+struct Tuple3<float64_t, N> {
+  float64x1x3_t raw;
+};
+#endif  // HWY_HAVE_FLOAT64
+
+template <>
+struct Tuple4<uint8_t, 16> {
+  uint8x16x4_t raw;
+};
+template <size_t N>
+struct Tuple4<uint8_t, N> {
+  uint8x8x4_t raw;
+};
+template <>
+struct Tuple4<int8_t, 16> {
+  int8x16x4_t raw;
+};
+template <size_t N>
+struct Tuple4<int8_t, N> {
+  int8x8x4_t raw;
+};
+template <>
+struct Tuple4<uint16_t, 8> {
+  uint16x8x4_t raw;
+};
+template <size_t N>
+struct Tuple4<uint16_t, N> {
+  uint16x4x4_t raw;
+};
+template <>
+struct Tuple4<int16_t, 8> {
+  int16x8x4_t raw;
+};
+template <size_t N>
+struct Tuple4<int16_t, N> {
+  int16x4x4_t raw;
+};
+template <>
+struct Tuple4<uint32_t, 4> {
+  uint32x4x4_t raw;
+};
+template <size_t N>
+struct Tuple4<uint32_t, N> {
+  uint32x2x4_t raw;
+};
+template <>
+struct Tuple4<int32_t, 4> {
+  int32x4x4_t raw;
+};
+template <size_t N>
+struct Tuple4<int32_t, N> {
+  int32x2x4_t raw;
+};
+template <>
+struct Tuple4<uint64_t, 2> {
+  uint64x2x4_t raw;
+};
+template <size_t N>
+struct Tuple4<uint64_t, N> {
+  uint64x1x4_t raw;
+};
+template <>
+struct Tuple4<int64_t, 2> {
+  int64x2x4_t raw;
+};
+template <size_t N>
+struct Tuple4<int64_t, N> {
+  int64x1x4_t raw;
+};
+
+template <>
+struct Tuple4<float16_t, 8> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x8x4_t raw;
+#else
+  uint16x8x4_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple4<float16_t, N> {
+#if HWY_NEON_HAVE_FLOAT16C
+  float16x4x4_t raw;
+#else
+  uint16x4x4_t raw;
+#endif
+};
+template <>
+struct Tuple4<bfloat16_t, 8> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x8x4_t raw;
+#else
+  uint16x8x4_t raw;
+#endif
+};
+template <size_t N>
+struct Tuple4<bfloat16_t, N> {
+#if HWY_NEON_HAVE_BFLOAT16
+  bfloat16x4x4_t raw;
+#else
+  uint16x4x4_t raw;
+#endif
+};
+
+template <>
+struct Tuple4<float32_t, 4> {
+  float32x4x4_t raw;
+};
+template <size_t N>
+struct Tuple4<float32_t, N> {
+  float32x2x4_t raw;
+};
+#if HWY_HAVE_FLOAT64
+template <>
+struct Tuple4<float64_t, 2> {
+  float64x2x4_t raw;
+};
+template <size_t N>
+struct Tuple4<float64_t, N> {
+  float64x1x4_t raw;
+};
+#endif  // HWY_HAVE_FLOAT64
+
+template <typename T, size_t N>
+struct Raw128;
+
+// 128
+template <>
+struct Raw128<uint8_t, 16> {
+  using type = uint8x16_t;
+};
+
+template <>
+struct Raw128<uint16_t, 8> {
+  using type = uint16x8_t;
+};
+
+template <>
+struct Raw128<uint32_t, 4> {
+  using type = uint32x4_t;
+};
+
+template <>
+struct Raw128<uint64_t, 2> {
+  using type = uint64x2_t;
+};
+
+template <>
+struct Raw128<int8_t, 16> {
+  using type = int8x16_t;
+};
+
+template <>
+struct Raw128<int16_t, 8> {
+  using type = int16x8_t;
+};
+
+template <>
+struct Raw128<int32_t, 4> {
+  using type = int32x4_t;
+};
+
+template <>
+struct Raw128<int64_t, 2> {
+  using type = int64x2_t;
+};
+
+template <>
+struct Raw128<float16_t, 8> {
+#if HWY_NEON_HAVE_FLOAT16C
+  using type = float16x8_t;
+#else
+  using type = uint16x8_t;
+#endif
+};
+
+template <>
+struct Raw128<bfloat16_t, 8> {
+#if HWY_NEON_HAVE_BFLOAT16
+  using type = bfloat16x8_t;
+#else
+  using type = uint16x8_t;
+#endif
+};
+
+template <>
+struct Raw128<float, 4> {
+  using type = float32x4_t;
+};
+
+#if HWY_HAVE_FLOAT64
+template <>
+struct Raw128<double, 2> {
+  using type = float64x2_t;
+};
+#endif  // HWY_HAVE_FLOAT64
+
+// 64
+template <>
+struct Raw128<uint8_t, 8> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct Raw128<uint16_t, 4> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<uint32_t, 2> {
+  using type = uint32x2_t;
+};
+
+template <>
+struct Raw128<uint64_t, 1> {
+  using type = uint64x1_t;
+};
+
+template <>
+struct Raw128<int8_t, 8> {
+  using type = int8x8_t;
+};
+
+template <>
+struct Raw128<int16_t, 4> {
+  using type = int16x4_t;
+};
+
+template <>
+struct Raw128<int32_t, 2> {
+  using type = int32x2_t;
+};
+
+template <>
+struct Raw128<int64_t, 1> {
+  using type = int64x1_t;
+};
+
+template <>
+struct Raw128<float16_t, 4> {
+#if HWY_NEON_HAVE_FLOAT16C
+  using type = float16x4_t;
+#else
+  using type = uint16x4_t;
+#endif
+};
+
+template <>
+struct Raw128<bfloat16_t, 4> {
+#if HWY_NEON_HAVE_BFLOAT16
+  using type = bfloat16x4_t;
+#else
+  using type = uint16x4_t;
+#endif
+};
+
+template <>
+struct Raw128<float, 2> {
+  using type = float32x2_t;
+};
+
+#if HWY_HAVE_FLOAT64
+template <>
+struct Raw128<double, 1> {
+  using type = float64x1_t;
+};
+#endif  // HWY_HAVE_FLOAT64
+
+// 32 (same as 64)
+template <>
+struct Raw128<uint8_t, 4> : public Raw128<uint8_t, 8> {};
+
+template <>
+struct Raw128<uint16_t, 2> : public Raw128<uint16_t, 4> {};
+
+template <>
+struct Raw128<uint32_t, 1> : public Raw128<uint32_t, 2> {};
+
+template <>
+struct Raw128<int8_t, 4> : public Raw128<int8_t, 8> {};
+
+template <>
+struct Raw128<int16_t, 2> : public Raw128<int16_t, 4> {};
+
+template <>
+struct Raw128<int32_t, 1> : public Raw128<int32_t, 2> {};
+
+template <>
+struct Raw128<float16_t, 2> : public Raw128<float16_t, 4> {};
+
+template <>
+struct Raw128<bfloat16_t, 2> : public Raw128<bfloat16_t, 4> {};
+
+template <>
+struct Raw128<float, 1> : public Raw128<float, 2> {};
+
+// 16 (same as 64)
+template <>
+struct Raw128<uint8_t, 2> : public Raw128<uint8_t, 8> {};
+
+template <>
+struct Raw128<uint16_t, 1> : public Raw128<uint16_t, 4> {};
+
+template <>
+struct Raw128<int8_t, 2> : public Raw128<int8_t, 8> {};
+
+template <>
+struct Raw128<int16_t, 1> : public Raw128<int16_t, 4> {};
+
+template <>
+struct Raw128<float16_t, 1> : public Raw128<float16_t, 4> {};
+
+template <>
+struct Raw128<bfloat16_t, 1> : public Raw128<bfloat16_t, 4> {};
+
+// 8 (same as 64)
+template <>
+struct Raw128<uint8_t, 1> : public Raw128<uint8_t, 8> {};
+
+template <>
+struct Raw128<int8_t, 1> : public Raw128<int8_t, 8> {};
+
+}  // namespace detail
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+ public:
+  using Raw = typename detail::Raw128<T, N>::type;
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = N;  // only for DFromV
+
+  HWY_INLINE Vec128() {}
+  Vec128(const Vec128&) = default;
+  Vec128& operator=(const Vec128&) = default;
+  HWY_INLINE explicit Vec128(const Raw raw) : raw(raw) {}
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T>
+using Vec64 = Vec128<T, 8 / sizeof(T)>;
+
+template <typename T>
+using Vec32 = Vec128<T, 4 / sizeof(T)>;
+
+template <typename T>
+using Vec16 = Vec128<T, 2 / sizeof(T)>;
+
+// FF..FF or 0.
+template <typename T, size_t N = 16 / sizeof(T)>
+class Mask128 {
+  // Arm C Language Extensions return and expect unsigned type.
+  using Raw = typename detail::Raw128<MakeUnsigned<T>, N>::type;
+
+ public:
+  using PrivateT = T;                     // only for DFromM
+  static constexpr size_t kPrivateN = N;  // only for DFromM
+
+  HWY_INLINE Mask128() {}
+  Mask128(const Mask128&) = default;
+  Mask128& operator=(const Mask128&) = default;
+  HWY_INLINE explicit Mask128(const Raw raw) : raw(raw) {}
+
+  Raw raw;
+};
+
+template <typename T>
+using Mask64 = Mask128<T, 8 / sizeof(T)>;
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class M>
+using DFromM = Simd<typename M::PrivateT, M::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ Set
+
+namespace detail {
+// We want to route any combination of N/kPow2 to the intrinsics depending on
+// whether the requested size is <= 64 bits or 128. HWY_NEON_BUILD_TPL is
+// unconditional and currently does not accept inputs (such as whether the
+// vector is 64 or 128-bit). Thus we are not able to use HWY_IF_V_SIZE_D for
+// SFINAE. We instead define a private NativeSet which receives a Simd<> whose
+// kPow2 has already been folded into its N.
+#define HWY_NEON_BUILD_TPL_HWY_SET
+#define HWY_NEON_BUILD_RET_HWY_SET(type, size) Vec128<type##_t, size>
+#define HWY_NEON_BUILD_PARAM_HWY_SET(type, size) \
+  Simd<type##_t, size, 0> /* tag */, type##_t t
+#define HWY_NEON_BUILD_ARG_HWY_SET t
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(NativeSet, vdup, _n_, HWY_SET)
+HWY_NEON_DEF_FUNCTION_BFLOAT_16(NativeSet, vdup, _n_, HWY_SET)
+#if !HWY_HAVE_FLOAT16
+HWY_NEON_DEF_FUNCTION_FLOAT_16_UNCONDITIONAL(NativeSet, vdup, _n_, HWY_SET)
+#endif
+
+#undef HWY_NEON_BUILD_TPL_HWY_SET
+#undef HWY_NEON_BUILD_RET_HWY_SET
+#undef HWY_NEON_BUILD_PARAM_HWY_SET
+#undef HWY_NEON_BUILD_ARG_HWY_SET
+
+#if !HWY_NEON_HAVE_BFLOAT16
+// BF16: return u16.
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API Vec128<bfloat16_t, MaxLanes(D())> NativeSet(D d, bfloat16_t t) {
+  uint16_t tu;
+  CopyBytes<sizeof(tu)>(&t, &tu);
+  return Vec128<bfloat16_t, d.MaxLanes()>(Set(RebindToUnsigned<D>(), tu).raw);
+}
+#endif  // !HWY_NEON_HAVE_BFLOAT16
+
+}  // namespace detail
+
+// Full vector. Cannot yet use VFromD because that is defined in terms of Set.
+// Do not use a typename T = TFromD<D> argument because T will be deduced from
+// the actual argument type, which can differ from TFromD<D>.
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T>
+HWY_INLINE Vec128<TFromD<D>> Set(D /* tag */, T t) {
+  return detail::NativeSet(Full128<TFromD<D>>(), static_cast<TFromD<D>>(t));
+}
+
+// Partial vector: create 64-bit and return wrapper.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), typename T>
+HWY_API Vec128<TFromD<D>, MaxLanes(D())> Set(D /* tag */, T t) {
+  const Full64<TFromD<D>> dfull;
+  return Vec128<TFromD<D>, MaxLanes(D())>(
+      detail::NativeSet(dfull, static_cast<TFromD<D>>(t)).raw);
+}
+
+template <class D>
+using VFromD = decltype(Set(D(), TFromD<D>()));
+
+template <class D>
+HWY_API VFromD<D> Zero(D d) {
+  // Default ctor also works for bfloat16_t and float16_t.
+  return Set(d, TFromD<D>{});
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+#if HWY_COMPILER_GCC_ACTUAL
+HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wmaybe-uninitialized")
+#endif
+
+template <class D>
+HWY_API VFromD<D> Undefined(D /*tag*/) {
+  VFromD<D> v;
+  return v;
+}
+
+HWY_DIAGNOSTICS(pop)
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef uint8_t GccU8RawVectType __attribute__((__vector_size__(8)));
+  constexpr GccU8RawVectType kU8Iota0 = {0, 1, 2, 3, 4, 5, 6, 7};
+  const VFromD<decltype(du)> vu8_iota0(reinterpret_cast<uint8x8_t>(kU8Iota0));
+#else
+  alignas(8) static constexpr uint8_t kU8Iota0[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  const VFromD<decltype(du)> vu8_iota0(
+      Load(Full64<TFromD<decltype(du)>>(), kU8Iota0).raw);
+#endif
+  return BitCast(d, vu8_iota0);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef uint8_t GccU8RawVectType __attribute__((__vector_size__(16)));
+  constexpr GccU8RawVectType kU8Iota0 = {0, 1, 2,  3,  4,  5,  6,  7,
+                                         8, 9, 10, 11, 12, 13, 14, 15};
+  const VFromD<decltype(du)> vu8_iota0(reinterpret_cast<uint8x16_t>(kU8Iota0));
+#else
+  alignas(16) static constexpr uint8_t kU8Iota0[16] = {
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  const auto vu8_iota0 = Load(du, kU8Iota0);
+#endif
+  return BitCast(d, vu8_iota0);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  using T = TFromD<decltype(d)>;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef T GccRawVectType __attribute__((__vector_size__(8)));
+  constexpr GccRawVectType kIota0 = {T{0}, T{1}, T{2}, static_cast<T>(3)};
+  return VFromD<D>(reinterpret_cast<typename VFromD<D>::Raw>(kIota0));
+#else
+  alignas(8) static constexpr T kIota0[4] = {T{0}, T{1}, T{2},
+                                             static_cast<T>(3)};
+  return Load(d, kIota0);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  using T = TFromD<decltype(d)>;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef T GccRawVectType __attribute__((__vector_size__(16)));
+  constexpr GccRawVectType kIota0 = {T{0}, T{1}, T{2}, static_cast<T>(3),
+                                     T{4}, T{5}, T{6}, static_cast<T>(7)};
+  return VFromD<D>(reinterpret_cast<typename VFromD<D>::Raw>(kIota0));
+#else
+  alignas(16) static constexpr T kU16Iota0[8] = {
+      T{0}, T{1}, T{2}, static_cast<T>(3), T{4}, T{5}, T{6}, static_cast<T>(7)};
+  return Load(d, kIota0);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef uint32_t GccU32RawVectType __attribute__((__vector_size__(8)));
+  constexpr GccU32RawVectType kU32Iota0 = {0, 1};
+  const VFromD<decltype(du)> vu32_iota0(
+      reinterpret_cast<uint32x2_t>(kU32Iota0));
+#else
+  alignas(8) static constexpr uint32_t kU32Iota0[2] = {0, 1};
+  const VFromD<decltype(du)> vu32_iota0{
+      Load(Full64<TFromD<decltype(du)>>(), kU32Iota0).raw};
+#endif
+  return BitCast(d, vu32_iota0);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef uint32_t GccU32RawVectType __attribute__((__vector_size__(16)));
+  constexpr GccU32RawVectType kU32Iota0 = {0, 1, 2, 3};
+  const VFromD<decltype(du)> vu32_iota0(
+      reinterpret_cast<uint32x4_t>(kU32Iota0));
+#else
+  alignas(16) static constexpr uint32_t kU32Iota0[4] = {0, 1, 2, 3};
+  const auto vu32_iota0 = Load(du, kU32Iota0);
+#endif
+  return BitCast(d, vu32_iota0);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef float GccF32RawVectType __attribute__((__vector_size__(8)));
+  constexpr GccF32RawVectType kF32Iota0 = {0.0f, 1.0f};
+  return VFromD<decltype(d)>(reinterpret_cast<float32x2_t>(kF32Iota0));
+#else
+  alignas(8) static constexpr float kF32Iota0[2] = {0.0f, 1.0f};
+  return VFromD<decltype(d)>{
+      Load(Full64<TFromD<decltype(d)>>(), kF32Iota0).raw};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef float GccF32RawVectType __attribute__((__vector_size__(16)));
+  constexpr GccF32RawVectType kF32Iota0 = {0.0f, 1.0f, 2.0f, 3.0f};
+  return VFromD<decltype(d)>(reinterpret_cast<float32x4_t>(kF32Iota0));
+#else
+  alignas(16) static constexpr float kF32Iota0[4] = {0.0f, 1.0f, 2.0f, 3.0f};
+  return Load(d, kF32Iota0);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  return Zero(d);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef uint64_t GccU64RawVectType __attribute__((__vector_size__(16)));
+  constexpr GccU64RawVectType kU64Iota0 = {0, 1};
+  const VFromD<decltype(du)> vu64_iota0(
+      reinterpret_cast<uint64x2_t>(kU64Iota0));
+#else
+  alignas(16) static constexpr uint64_t kU64Iota0[4] = {0, 1};
+  const auto vu64_iota0 = Load(du, kU64Iota0);
+#endif
+  return BitCast(d, vu64_iota0);
+}
+
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL
+  typedef double GccF64RawVectType __attribute__((__vector_size__(16)));
+  constexpr GccF64RawVectType kF64Iota0 = {0.0, 1.0};
+  return VFromD<decltype(d)>(reinterpret_cast<float64x2_t>(kF64Iota0));
+#else
+  alignas(16) static constexpr double kF64Iota0[4] = {0.0, 1.0};
+  return Load(d, kF64Iota0);
+#endif
+}
+#endif  // HWY_HAVE_FLOAT64
+
+#if HWY_COMPILER_MSVC
+template <class V, HWY_IF_V_SIZE_LE_V(V, 4)>
+static HWY_INLINE V MaskOutIota(V v) {
+  constexpr size_t kVecSizeInBytes = HWY_MAX_LANES_V(V) * sizeof(TFromV<V>);
+  constexpr uint64_t kU64MaskOutMask =
+      hwy::LimitsMax<hwy::UnsignedFromSize<kVecSizeInBytes>>();
+
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  using VU8 = VFromD<decltype(du8)>;
+  const auto mask_out_mask =
+      BitCast(d, VU8(vreinterpret_u8_u64(vdup_n_u64(kU64MaskOutMask))));
+  return v & mask_out_mask;
+}
+template <class V, HWY_IF_V_SIZE_GT_V(V, 4)>
+static HWY_INLINE V MaskOutIota(V v) {
+  return v;
+}
+#endif
+
+}  // namespace detail
+
+template <class D, typename T2>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  const auto result_iota =
+      detail::Iota0(d) + Set(d, static_cast<TFromD<D>>(first));
+#if HWY_COMPILER_MSVC
+  return detail::MaskOutIota(result_iota);
+#else
+  return result_iota;
+#endif
+}
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ Combine
+
+// Full result
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> Combine(D /* tag */, Vec64<uint8_t> hi,
+                                Vec64<uint8_t> lo) {
+  return Vec128<uint8_t>(vcombine_u8(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> Combine(D /* tag */, Vec64<uint16_t> hi,
+                                 Vec64<uint16_t> lo) {
+  return Vec128<uint16_t>(vcombine_u16(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> Combine(D /* tag */, Vec64<uint32_t> hi,
+                                 Vec64<uint32_t> lo) {
+  return Vec128<uint32_t>(vcombine_u32(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_U64_D(D)>
+HWY_API Vec128<uint64_t> Combine(D /* tag */, Vec64<uint64_t> hi,
+                                 Vec64<uint64_t> lo) {
+  return Vec128<uint64_t>(vcombine_u64(lo.raw, hi.raw));
+}
+
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec128<int8_t> Combine(D /* tag */, Vec64<int8_t> hi,
+                               Vec64<int8_t> lo) {
+  return Vec128<int8_t>(vcombine_s8(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> Combine(D /* tag */, Vec64<int16_t> hi,
+                                Vec64<int16_t> lo) {
+  return Vec128<int16_t>(vcombine_s16(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> Combine(D /* tag */, Vec64<int32_t> hi,
+                                Vec64<int32_t> lo) {
+  return Vec128<int32_t>(vcombine_s32(lo.raw, hi.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> Combine(D /* tag */, Vec64<int64_t> hi,
+                                Vec64<int64_t> lo) {
+  return Vec128<int64_t>(vcombine_s64(lo.raw, hi.raw));
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> Combine(D d, Vec64<float16_t> hi,
+                                  Vec64<float16_t> lo) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec128<float16_t>(vcombine_f16(lo.raw, hi.raw));
+#else
+  const RebindToUnsigned<D> du;
+  const Half<decltype(du)> duh;
+  return BitCast(d, Combine(du, BitCast(duh, hi), BitCast(duh, lo)));
+#endif
+}
+
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API Vec128<bfloat16_t> Combine(D d, Vec64<bfloat16_t> hi,
+                                   Vec64<bfloat16_t> lo) {
+#if HWY_NEON_HAVE_BFLOAT16
+  (void)d;
+  return Vec128<bfloat16_t>(vcombine_bf16(lo.raw, hi.raw));
+#else
+  const RebindToUnsigned<D> du;
+  const Half<decltype(du)> duh;
+  return BitCast(d, Combine(du, BitCast(duh, hi), BitCast(duh, lo)));
+#endif
+}
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec128<float> Combine(D /* tag */, Vec64<float> hi, Vec64<float> lo) {
+  return Vec128<float>(vcombine_f32(lo.raw, hi.raw));
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> Combine(D /* tag */, Vec64<double> hi,
+                               Vec64<double> lo) {
+  return Vec128<double>(vcombine_f64(lo.raw, hi.raw));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// Converts from Vec128<T, N> to Vec128<uint8_t, N * sizeof(T)> using the
+// vreinterpret*_u8_*() set of functions.
+#define HWY_NEON_BUILD_TPL_HWY_CAST_TO_U8
+#define HWY_NEON_BUILD_RET_HWY_CAST_TO_U8(type, size) \
+  Vec128<uint8_t, size * sizeof(type##_t)>
+#define HWY_NEON_BUILD_PARAM_HWY_CAST_TO_U8(type, size) Vec128<type##_t, size> v
+#define HWY_NEON_BUILD_ARG_HWY_CAST_TO_U8 v.raw
+
+// Special case of u8 to u8 since vreinterpret*_u8_u8 is obviously not defined.
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> BitCastToByte(Vec128<uint8_t, N> v) {
+  return v;
+}
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(BitCastToByte, vreinterpret, _u8_,
+                                 HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_BFLOAT_16(BitCastToByte, vreinterpret, _u8_,
+                                HWY_CAST_TO_U8)
+
+HWY_NEON_DEF_FUNCTION_INTS(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_16(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_32(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_64(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+
+#if !HWY_HAVE_FLOAT16
+#if HWY_NEON_HAVE_FLOAT16C
+HWY_NEON_DEF_FUNCTION_FLOAT_16_UNCONDITIONAL(BitCastToByte, vreinterpret, _u8_,
+                                             HWY_CAST_TO_U8)
+#else
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N * 2> BitCastToByte(Vec128<float16_t, N> v) {
+  return BitCastToByte(Vec128<uint16_t, N>(v.raw));
+}
+#endif  // HWY_NEON_HAVE_FLOAT16C
+#endif  // !HWY_HAVE_FLOAT16
+
+#if !HWY_NEON_HAVE_BFLOAT16
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N * 2> BitCastToByte(Vec128<bfloat16_t, N> v) {
+  return BitCastToByte(Vec128<uint16_t, N>(v.raw));
+}
+#endif  // !HWY_NEON_HAVE_BFLOAT16
+
+#undef HWY_NEON_BUILD_TPL_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_RET_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_PARAM_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_ARG_HWY_CAST_TO_U8
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// 64-bit or less:
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<RebindToUnsigned<D>> v) {
+  return VFromD<D>(vreinterpret_s8_u8(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(vreinterpret_u16_u8(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(vreinterpret_s16_u8(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(vreinterpret_u32_u8(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(vreinterpret_s32_u8(v.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U64_D(D)>
+HWY_INLINE Vec64<uint64_t> BitCastFromByte(D /* tag */, Vec64<uint8_t> v) {
+  return Vec64<uint64_t>(vreinterpret_u64_u8(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I64_D(D)>
+HWY_INLINE Vec64<int64_t> BitCastFromByte(D /* tag */, Vec64<uint8_t> v) {
+  return Vec64<int64_t>(vreinterpret_s64_u8(v.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F16_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D d, VFromD<Repartition<uint8_t, D>> v) {
+#if HWY_HAVE_FLOAT16 || HWY_NEON_HAVE_FLOAT16C
+  (void)d;
+  return VFromD<D>(vreinterpret_f16_u8(v.raw));
+#else
+  const RebindToUnsigned<D> du;
+  return VFromD<decltype(d)>(BitCastFromByte(du, v).raw);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(vreinterpret_f32_u8(v.raw));
+}
+
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_INLINE Vec64<double> BitCastFromByte(D /* tag */, Vec64<uint8_t> v) {
+  return Vec64<double>(vreinterpret_f64_u8(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// 128-bit full:
+
+template <class D, HWY_IF_I8_D(D)>
+HWY_INLINE Vec128<int8_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<int8_t>(vreinterpretq_s8_u8(v.raw));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_INLINE Vec128<uint16_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<uint16_t>(vreinterpretq_u16_u8(v.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_INLINE Vec128<int16_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<int16_t>(vreinterpretq_s16_u8(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_INLINE Vec128<uint32_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<uint32_t>(vreinterpretq_u32_u8(v.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_INLINE Vec128<int32_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<int32_t>(vreinterpretq_s32_u8(v.raw));
+}
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE Vec128<uint64_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<uint64_t>(vreinterpretq_u64_u8(v.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_INLINE Vec128<int64_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<int64_t>(vreinterpretq_s64_u8(v.raw));
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_INLINE Vec128<float16_t> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+#if HWY_HAVE_FLOAT16 || HWY_NEON_HAVE_FLOAT16C
+  return Vec128<float16_t>(vreinterpretq_f16_u8(v.raw));
+#else
+  return Vec128<float16_t>(BitCastFromByte(RebindToUnsigned<D>(), v).raw);
+#endif
+}
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_INLINE Vec128<float> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<float>(vreinterpretq_f32_u8(v.raw));
+}
+
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_F64_D(D)>
+HWY_INLINE Vec128<double> BitCastFromByte(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<double>(vreinterpretq_f64_u8(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// Special case for bfloat16_t, which may have the same Raw as uint16_t.
+template <class D, HWY_IF_BF16_D(D)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>(BitCastFromByte(RebindToUnsigned<D>(), v).raw);
+}
+
+}  // namespace detail
+
+template <class D, class FromT>
+HWY_API VFromD<D> BitCast(D d,
+                          Vec128<FromT, Repartition<FromT, D>().MaxLanes()> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ ResizeBitCast
+
+// <= 8 byte vector to <= 8 byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_LE_V(FromV, 8),
+          HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, VFromD<decltype(du8)>{detail::BitCastToByte(v).raw});
+}
+
+// 16-byte vector to 16-byte vector: same as BitCast
+template <class D, class FromV, HWY_IF_V_SIZE_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, v);
+}
+
+// 16-byte vector to <= 8-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_V(FromV, 16),
+          HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const DFromV<decltype(v)> d_from;
+  const Half<decltype(d_from)> dh_from;
+  return ResizeBitCast(d, LowerHalf(dh_from, v));
+}
+
+// <= 8-bit vector to 16-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_LE_V(FromV, 8),
+          HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const Full64<TFromV<FromV>> d_full64_from;
+  const Full128<TFromV<FromV>> d_full128_from;
+  return BitCast(d, Combine(d_full128_from, Zero(d_full64_from),
+                            ResizeBitCast(d_full64_from, v)));
+}
+
+// ------------------------------ GetLane
+
+namespace detail {
+#define HWY_NEON_BUILD_TPL_HWY_GET template <size_t kLane>
+#define HWY_NEON_BUILD_RET_HWY_GET(type, size) type##_t
+#define HWY_NEON_BUILD_PARAM_HWY_GET(type, size) Vec128<type##_t, size> v
+#define HWY_NEON_BUILD_ARG_HWY_GET v.raw, kLane
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(GetLane, vget, _lane_, HWY_GET)
+
+#undef HWY_NEON_BUILD_TPL_HWY_GET
+#undef HWY_NEON_BUILD_RET_HWY_GET
+#undef HWY_NEON_BUILD_PARAM_HWY_GET
+#undef HWY_NEON_BUILD_ARG_HWY_GET
+
+}  // namespace detail
+
+template <class V>
+HWY_API TFromV<V> GetLane(const V v) {
+  return detail::GetLane<0>(v);
+}
+
+// ------------------------------ ExtractLane
+
+// Requires one overload per vector length because GetLane<3> is a compile error
+// if v is a uint32x2_t.
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 1> v, size_t i) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return detail::GetLane<0>(v);
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 2> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::GetLane<0>(v);
+      case 1:
+        return detail::GetLane<1>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[2];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 4> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::GetLane<0>(v);
+      case 1:
+        return detail::GetLane<1>(v);
+      case 2:
+        return detail::GetLane<2>(v);
+      case 3:
+        return detail::GetLane<3>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[4];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 8> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::GetLane<0>(v);
+      case 1:
+        return detail::GetLane<1>(v);
+      case 2:
+        return detail::GetLane<2>(v);
+      case 3:
+        return detail::GetLane<3>(v);
+      case 4:
+        return detail::GetLane<4>(v);
+      case 5:
+        return detail::GetLane<5>(v);
+      case 6:
+        return detail::GetLane<6>(v);
+      case 7:
+        return detail::GetLane<7>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[8];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 16> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::GetLane<0>(v);
+      case 1:
+        return detail::GetLane<1>(v);
+      case 2:
+        return detail::GetLane<2>(v);
+      case 3:
+        return detail::GetLane<3>(v);
+      case 4:
+        return detail::GetLane<4>(v);
+      case 5:
+        return detail::GetLane<5>(v);
+      case 6:
+        return detail::GetLane<6>(v);
+      case 7:
+        return detail::GetLane<7>(v);
+      case 8:
+        return detail::GetLane<8>(v);
+      case 9:
+        return detail::GetLane<9>(v);
+      case 10:
+        return detail::GetLane<10>(v);
+      case 11:
+        return detail::GetLane<11>(v);
+      case 12:
+        return detail::GetLane<12>(v);
+      case 13:
+        return detail::GetLane<13>(v);
+      case 14:
+        return detail::GetLane<14>(v);
+      case 15:
+        return detail::GetLane<15>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[16];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+// ------------------------------ InsertLane
+
+namespace detail {
+#define HWY_NEON_BUILD_TPL_HWY_INSERT template <size_t kLane>
+#define HWY_NEON_BUILD_RET_HWY_INSERT(type, size) Vec128<type##_t, size>
+#define HWY_NEON_BUILD_PARAM_HWY_INSERT(type, size) \
+  Vec128<type##_t, size> v, type##_t t
+#define HWY_NEON_BUILD_ARG_HWY_INSERT t, v.raw, kLane
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(InsertLane, vset, _lane_, HWY_INSERT)
+
+#undef HWY_NEON_BUILD_TPL_HWY_INSERT
+#undef HWY_NEON_BUILD_RET_HWY_INSERT
+#undef HWY_NEON_BUILD_PARAM_HWY_INSERT
+#undef HWY_NEON_BUILD_ARG_HWY_INSERT
+
+}  // namespace detail
+
+// Requires one overload per vector length because InsertLane<3> may be a
+// compile error.
+
+template <typename T>
+HWY_API Vec128<T, 1> InsertLane(const Vec128<T, 1> v, size_t i, T t) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return Set(DFromV<decltype(v)>(), t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 2> InsertLane(const Vec128<T, 2> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[2];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 4> InsertLane(const Vec128<T, 4> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[4];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 8> InsertLane(const Vec128<T, 8> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[8];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 16> InsertLane(const Vec128<T, 16> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+      case 8:
+        return detail::InsertLane<8>(v, t);
+      case 9:
+        return detail::InsertLane<9>(v, t);
+      case 10:
+        return detail::InsertLane<10>(v, t);
+      case 11:
+        return detail::InsertLane<11>(v, t);
+      case 12:
+        return detail::InsertLane<12>(v, t);
+      case 13:
+        return detail::InsertLane<13>(v, t);
+      case 14:
+        return detail::InsertLane<14>(v, t);
+      case 15:
+        return detail::InsertLane<15>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[16];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator+, vadd, _, 2)
+
+// ------------------------------ Subtraction
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator-, vsub, _, 2)
+
+// ------------------------------ SumsOf8
+
+HWY_API Vec128<uint64_t> SumsOf8(const Vec128<uint8_t> v) {
+  return Vec128<uint64_t>(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v.raw))));
+}
+HWY_API Vec64<uint64_t> SumsOf8(const Vec64<uint8_t> v) {
+  return Vec64<uint64_t>(vpaddl_u32(vpaddl_u16(vpaddl_u8(v.raw))));
+}
+
+// ------------------------------ SaturatedAdd
+
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U32_SATURATED_ADDSUB
+#undef HWY_NATIVE_U32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U64_SATURATED_ADDSUB
+#undef HWY_NATIVE_U64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U64_SATURATED_ADDSUB
+#endif
+
+// Returns a + b clamped to the destination range.
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(SaturatedAdd, vqadd, _, 2)
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(SaturatedSub, vqsub, _, 2)
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+HWY_NEON_DEF_FUNCTION_UINT_8(AverageRound, vrhadd, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_16(AverageRound, vrhadd, _, 2)
+
+// ------------------------------ Neg
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Neg, vneg, _, 1)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Neg, vneg, _, 1)  // i64 implemented below
+
+#if !HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Neg(const Vec128<float16_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return BitCast(d, Xor(BitCast(du, v), Set(du, SignMask<TU>())));
+}
+#endif  // !HWY_HAVE_FLOAT16
+
+// There is no vneg for bf16, but we can cast to f16 (emulated or native).
+template <size_t N>
+HWY_API Vec128<bfloat16_t, N> Neg(const Vec128<bfloat16_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const Rebind<float16_t, decltype(d)> df16;
+  return BitCast(d, Neg(BitCast(df16, v)));
+}
+
+HWY_API Vec64<int64_t> Neg(const Vec64<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec64<int64_t>(vneg_s64(v.raw));
+#else
+  return Zero(DFromV<decltype(v)>()) - v;
+#endif
+}
+
+HWY_API Vec128<int64_t> Neg(const Vec128<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t>(vnegq_s64(v.raw));
+#else
+  return Zero(DFromV<decltype(v)>()) - v;
+#endif
+}
+
+// ------------------------------ ShiftLeft
+
+// Customize HWY_NEON_DEF_FUNCTION to special-case count=0 (not supported).
+#pragma push_macro("HWY_NEON_DEF_FUNCTION")
+#undef HWY_NEON_DEF_FUNCTION
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args)   \
+  template <int kBits>                                                         \
+  HWY_API Vec128<type##_t, size> name(const Vec128<type##_t, size> v) {        \
+    return kBits == 0 ? v                                                      \
+                      : Vec128<type##_t, size>(HWY_NEON_EVAL(                  \
+                            prefix##infix##suffix, v.raw, HWY_MAX(1, kBits))); \
+  }
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(ShiftLeft, vshl, _n_, ignored)
+
+HWY_NEON_DEF_FUNCTION_UINTS(ShiftRight, vshr, _n_, ignored)
+HWY_NEON_DEF_FUNCTION_INTS(ShiftRight, vshr, _n_, ignored)
+
+#pragma pop_macro("HWY_NEON_DEF_FUNCTION")
+
+// ------------------------------ RotateRight (ShiftRight, Or)
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> RotateRight(const Vec128<T, N> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// NOTE: vxarq_u64 can be applied to uint64_t, but we do not yet have a
+// mechanism for checking for extensions to Armv8.
+
+// ------------------------------ Shl
+
+HWY_API Vec128<uint8_t> operator<<(Vec128<uint8_t> v, Vec128<uint8_t> bits) {
+  return Vec128<uint8_t>(vshlq_u8(v.raw, vreinterpretq_s8_u8(bits.raw)));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint8_t, N, 8)>
+HWY_API Vec128<uint8_t, N> operator<<(Vec128<uint8_t, N> v,
+                                      Vec128<uint8_t, N> bits) {
+  return Vec128<uint8_t, N>(vshl_u8(v.raw, vreinterpret_s8_u8(bits.raw)));
+}
+
+HWY_API Vec128<uint16_t> operator<<(Vec128<uint16_t> v, Vec128<uint16_t> bits) {
+  return Vec128<uint16_t>(vshlq_u16(v.raw, vreinterpretq_s16_u16(bits.raw)));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint16_t, N, 8)>
+HWY_API Vec128<uint16_t, N> operator<<(Vec128<uint16_t, N> v,
+                                       Vec128<uint16_t, N> bits) {
+  return Vec128<uint16_t, N>(vshl_u16(v.raw, vreinterpret_s16_u16(bits.raw)));
+}
+
+HWY_API Vec128<uint32_t> operator<<(Vec128<uint32_t> v, Vec128<uint32_t> bits) {
+  return Vec128<uint32_t>(vshlq_u32(v.raw, vreinterpretq_s32_u32(bits.raw)));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint32_t, N, 8)>
+HWY_API Vec128<uint32_t, N> operator<<(Vec128<uint32_t, N> v,
+                                       Vec128<uint32_t, N> bits) {
+  return Vec128<uint32_t, N>(vshl_u32(v.raw, vreinterpret_s32_u32(bits.raw)));
+}
+
+HWY_API Vec128<uint64_t> operator<<(Vec128<uint64_t> v, Vec128<uint64_t> bits) {
+  return Vec128<uint64_t>(vshlq_u64(v.raw, vreinterpretq_s64_u64(bits.raw)));
+}
+HWY_API Vec64<uint64_t> operator<<(Vec64<uint64_t> v, Vec64<uint64_t> bits) {
+  return Vec64<uint64_t>(vshl_u64(v.raw, vreinterpret_s64_u64(bits.raw)));
+}
+
+HWY_API Vec128<int8_t> operator<<(Vec128<int8_t> v, Vec128<int8_t> bits) {
+  return Vec128<int8_t>(vshlq_s8(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int8_t, N, 8)>
+HWY_API Vec128<int8_t, N> operator<<(Vec128<int8_t, N> v,
+                                     Vec128<int8_t, N> bits) {
+  return Vec128<int8_t, N>(vshl_s8(v.raw, bits.raw));
+}
+
+HWY_API Vec128<int16_t> operator<<(Vec128<int16_t> v, Vec128<int16_t> bits) {
+  return Vec128<int16_t>(vshlq_s16(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> operator<<(Vec128<int16_t, N> v,
+                                      Vec128<int16_t, N> bits) {
+  return Vec128<int16_t, N>(vshl_s16(v.raw, bits.raw));
+}
+
+HWY_API Vec128<int32_t> operator<<(Vec128<int32_t> v, Vec128<int32_t> bits) {
+  return Vec128<int32_t>(vshlq_s32(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int32_t, N, 8)>
+HWY_API Vec128<int32_t, N> operator<<(Vec128<int32_t, N> v,
+                                      Vec128<int32_t, N> bits) {
+  return Vec128<int32_t, N>(vshl_s32(v.raw, bits.raw));
+}
+
+HWY_API Vec128<int64_t> operator<<(Vec128<int64_t> v, Vec128<int64_t> bits) {
+  return Vec128<int64_t>(vshlq_s64(v.raw, bits.raw));
+}
+HWY_API Vec64<int64_t> operator<<(Vec64<int64_t> v, Vec64<int64_t> bits) {
+  return Vec64<int64_t>(vshl_s64(v.raw, bits.raw));
+}
+
+// ------------------------------ Shr (Neg)
+
+HWY_API Vec128<uint8_t> operator>>(Vec128<uint8_t> v, Vec128<uint8_t> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int8x16_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint8_t>(vshlq_u8(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint8_t, N, 8)>
+HWY_API Vec128<uint8_t, N> operator>>(Vec128<uint8_t, N> v,
+                                      Vec128<uint8_t, N> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int8x8_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint8_t, N>(vshl_u8(v.raw, neg_bits));
+}
+
+HWY_API Vec128<uint16_t> operator>>(Vec128<uint16_t> v, Vec128<uint16_t> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int16x8_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint16_t>(vshlq_u16(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint16_t, N, 8)>
+HWY_API Vec128<uint16_t, N> operator>>(Vec128<uint16_t, N> v,
+                                       Vec128<uint16_t, N> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int16x4_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint16_t, N>(vshl_u16(v.raw, neg_bits));
+}
+
+HWY_API Vec128<uint32_t> operator>>(Vec128<uint32_t> v, Vec128<uint32_t> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int32x4_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint32_t>(vshlq_u32(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint32_t, N, 8)>
+HWY_API Vec128<uint32_t, N> operator>>(Vec128<uint32_t, N> v,
+                                       Vec128<uint32_t, N> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int32x2_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint32_t, N>(vshl_u32(v.raw, neg_bits));
+}
+
+HWY_API Vec128<uint64_t> operator>>(Vec128<uint64_t> v, Vec128<uint64_t> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int64x2_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec128<uint64_t>(vshlq_u64(v.raw, neg_bits));
+}
+HWY_API Vec64<uint64_t> operator>>(Vec64<uint64_t> v, Vec64<uint64_t> bits) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  const int64x1_t neg_bits = Neg(BitCast(di, bits)).raw;
+  return Vec64<uint64_t>(vshl_u64(v.raw, neg_bits));
+}
+
+HWY_API Vec128<int8_t> operator>>(Vec128<int8_t> v, Vec128<int8_t> bits) {
+  return Vec128<int8_t>(vshlq_s8(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int8_t, N, 8)>
+HWY_API Vec128<int8_t, N> operator>>(Vec128<int8_t, N> v,
+                                     Vec128<int8_t, N> bits) {
+  return Vec128<int8_t, N>(vshl_s8(v.raw, Neg(bits).raw));
+}
+
+HWY_API Vec128<int16_t> operator>>(Vec128<int16_t> v, Vec128<int16_t> bits) {
+  return Vec128<int16_t>(vshlq_s16(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> operator>>(Vec128<int16_t, N> v,
+                                      Vec128<int16_t, N> bits) {
+  return Vec128<int16_t, N>(vshl_s16(v.raw, Neg(bits).raw));
+}
+
+HWY_API Vec128<int32_t> operator>>(Vec128<int32_t> v, Vec128<int32_t> bits) {
+  return Vec128<int32_t>(vshlq_s32(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int32_t, N, 8)>
+HWY_API Vec128<int32_t, N> operator>>(Vec128<int32_t, N> v,
+                                      Vec128<int32_t, N> bits) {
+  return Vec128<int32_t, N>(vshl_s32(v.raw, Neg(bits).raw));
+}
+
+HWY_API Vec128<int64_t> operator>>(Vec128<int64_t> v, Vec128<int64_t> bits) {
+  return Vec128<int64_t>(vshlq_s64(v.raw, Neg(bits).raw));
+}
+HWY_API Vec64<int64_t> operator>>(Vec64<int64_t> v, Vec64<int64_t> bits) {
+  return Vec64<int64_t>(vshl_s64(v.raw, Neg(bits).raw));
+}
+
+// ------------------------------ ShiftLeftSame (Shl)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, int bits) {
+  return v << Set(DFromV<decltype(v)>(), static_cast<T>(bits));
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRightSame(const Vec128<T, N> v, int bits) {
+  return v >> Set(DFromV<decltype(v)>(), static_cast<T>(bits));
+}
+
+// ------------------------------ Int/float multiplication
+
+// Per-target flag to prevent generic_ops-inl.h from defining 8-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+
+// All except ui64
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator*, vmul, _, 2)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator*, vmul, _, 2)
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator*, vmul, _, 2)
+
+// ------------------------------ Integer multiplication
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec128<int16_t> MulHigh(Vec128<int16_t> a, Vec128<int16_t> b) {
+  int32x4_t rlo = vmull_s16(vget_low_s16(a.raw), vget_low_s16(b.raw));
+#if HWY_ARCH_ARM_A64
+  int32x4_t rhi = vmull_high_s16(a.raw, b.raw);
+#else
+  int32x4_t rhi = vmull_s16(vget_high_s16(a.raw), vget_high_s16(b.raw));
+#endif
+  return Vec128<int16_t>(
+      vuzp2q_s16(vreinterpretq_s16_s32(rlo), vreinterpretq_s16_s32(rhi)));
+}
+HWY_API Vec128<uint16_t> MulHigh(Vec128<uint16_t> a, Vec128<uint16_t> b) {
+  uint32x4_t rlo = vmull_u16(vget_low_u16(a.raw), vget_low_u16(b.raw));
+#if HWY_ARCH_ARM_A64
+  uint32x4_t rhi = vmull_high_u16(a.raw, b.raw);
+#else
+  uint32x4_t rhi = vmull_u16(vget_high_u16(a.raw), vget_high_u16(b.raw));
+#endif
+  return Vec128<uint16_t>(
+      vuzp2q_u16(vreinterpretq_u16_u32(rlo), vreinterpretq_u16_u32(rhi)));
+}
+
+template <size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> MulHigh(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  int16x8_t hi_lo = vreinterpretq_s16_s32(vmull_s16(a.raw, b.raw));
+  return Vec128<int16_t, N>(vget_low_s16(vuzp2q_s16(hi_lo, hi_lo)));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint16_t, N, 8)>
+HWY_API Vec128<uint16_t, N> MulHigh(Vec128<uint16_t, N> a,
+                                    Vec128<uint16_t, N> b) {
+  uint16x8_t hi_lo = vreinterpretq_u16_u32(vmull_u16(a.raw, b.raw));
+  return Vec128<uint16_t, N>(vget_low_u16(vuzp2q_u16(hi_lo, hi_lo)));
+}
+
+HWY_API Vec128<int16_t> MulFixedPoint15(Vec128<int16_t> a, Vec128<int16_t> b) {
+  return Vec128<int16_t>(vqrdmulhq_s16(a.raw, b.raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>(vqrdmulh_s16(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point division
+
+// Emulate missing intrinsic
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+HWY_INLINE float64x1_t vrecpe_f64(float64x1_t raw) {
+  const CappedTag<double, 1> d;
+  const Twice<decltype(d)> dt;
+  using VT = VFromD<decltype(dt)>;
+  return LowerHalf(d, VT(vrecpeq_f64(Combine(dt, v, v).raw))).raw;
+}
+#endif
+
+// Approximate reciprocal
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(ApproximateReciprocal, vrecpe, _, 1)
+
+#if HWY_HAVE_FLOAT64
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator/, vdiv, _, 2)
+#else   // !HWY_HAVE_FLOAT64
+namespace detail {
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(ReciprocalNewtonRaphsonStep, vrecps, _, 2)
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> operator/(Vec128<T, N> a, Vec128<T, N> b) {
+  auto x = ApproximateReciprocal(b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  return a * x;
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ Absolute value of difference.
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(AbsDiff, vabd, _, 2)
+HWY_NEON_DEF_FUNCTION_UI_8_16_32(AbsDiff, vabd, _, 2)  // no UI64
+
+#ifdef HWY_NATIVE_INTEGER_ABS_DIFF
+#undef HWY_NATIVE_INTEGER_ABS_DIFF
+#else
+#define HWY_NATIVE_INTEGER_ABS_DIFF
+#endif
+
+// ------------------------------ Integer multiply-add
+
+// Per-target flag to prevent generic_ops-inl.h from defining int MulAdd.
+#ifdef HWY_NATIVE_INT_FMA
+#undef HWY_NATIVE_INT_FMA
+#else
+#define HWY_NATIVE_INT_FMA
+#endif
+
+// Wrappers for changing argument order to what intrinsics expect.
+namespace detail {
+// All except ui64
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(MulAdd, vmla, _, 3)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(MulAdd, vmla, _, 3)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(NegMulAdd, vmls, _, 3)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(NegMulAdd, vmls, _, 3)
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T), HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return detail::MulAdd(add, mul, x);
+}
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T), HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  return detail::NegMulAdd(add, mul, x);
+}
+
+// 64-bit integer
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T), HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return Add(Mul(mul, x), add);
+}
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T), HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  return Sub(add, Mul(mul, x));
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+namespace detail {
+
+#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
+// Wrappers for changing argument order to what intrinsics expect.
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(MulAdd, vfma, _, 3)
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(NegMulAdd, vfms, _, 3)
+#else
+// Emulate. Matches intrinsics arg order.
+template <size_t N>
+HWY_API Vec128<float, N> MulAdd(Vec128<float, N> add, Vec128<float, N> mul,
+                                Vec128<float, N> x) {
+  return mul * x + add;
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> NegMulAdd(Vec128<float, N> add, Vec128<float, N> mul,
+                                   Vec128<float, N> x) {
+  return add - mul * x;
+}
+
+#endif  // defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return detail::MulAdd(add, mul, x);
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  return detail::NegMulAdd(add, mul, x);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> sub) {
+  return MulAdd(mul, x, Neg(sub));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> NegMulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> sub) {
+  return Neg(MulAdd(mul, x, sub));
+}
+
+// ------------------------------ Floating-point square root (IfThenZeroElse)
+
+// Emulate missing intrinsic
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 490
+HWY_INLINE float64x1_t vrsqrte_f64(float64x1_t raw) {
+  const CappedTag<double, 1> d;
+  const Twice<decltype(d)> dt;
+  using VT = VFromD<decltype(dt)>;
+  const VFromD<decltype(d)> v(raw);
+  return LowerHalf(d, VT(vrsqrteq_f64(Combine(dt, v, v).raw))).raw;
+}
+#endif
+
+// Approximate reciprocal square root
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(ApproximateReciprocalSqrt, vrsqrte, _, 1)
+
+#if HWY_HAVE_FLOAT64
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+// Full precision square root
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Sqrt, vsqrt, _, 1)
+#else   // !HWY_HAVE_FLOAT64
+namespace detail {
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(ReciprocalSqrtStep, vrsqrts, _, 2)
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Sqrt(const Vec128<T, N> v) {
+  auto recip = ApproximateReciprocalSqrt(v);
+
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+
+  const auto root = v * recip;
+  return IfThenZeroElse(v == Zero(Simd<T, N, 0>()), root);
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+// There is no 64-bit vmvn, so cast instead of using HWY_NEON_DEF_FUNCTION.
+template <typename T>
+HWY_API Vec128<T> Not(const Vec128<T> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Vec128<uint8_t>(vmvnq_u8(BitCast(d8, v).raw)));
+}
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N> Not(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = decltype(Zero(d8));
+  return BitCast(d, V8(vmvn_u8(BitCast(d8, v).raw)));
+}
+
+// ------------------------------ And
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(And, vand, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> And(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, BitCast(du, a) & BitCast(du, b));
+}
+
+// ------------------------------ AndNot
+
+namespace detail {
+// reversed_andnot returns a & ~b.
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(reversed_andnot, vbic, _, 2)
+}  // namespace detail
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> AndNot(const Vec128<T, N> not_mask,
+                            const Vec128<T, N> mask) {
+  return detail::reversed_andnot(mask, not_mask);
+}
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> AndNot(const Vec128<T, N> not_mask,
+                            const Vec128<T, N> mask) {
+  const DFromV<decltype(mask)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  VFromD<decltype(du)> ret =
+      detail::reversed_andnot(BitCast(du, mask), BitCast(du, not_mask));
+  return BitCast(d, ret);
+}
+
+// ------------------------------ Or
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(Or, vorr, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Or(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, BitCast(du, a) | BitCast(du, b));
+}
+
+// ------------------------------ Xor
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(Xor, veor, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Xor(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, BitCast(du, a) ^ BitCast(du, b));
+}
+
+// ------------------------------ Xor3
+#if HWY_ARCH_ARM_A64 && defined(__ARM_FEATURE_SHA3)
+HWY_NEON_DEF_FUNCTION_FULL_UI(Xor3, veor3, _, 3)
+
+// Half vectors are not natively supported. Two Xor are likely more efficient
+// than Combine to 128-bit.
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8), HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Xor3(const Vec128<T, N> x1, const Vec128<T, N> x2,
+                          const Vec128<T, N> x3) {
+  const DFromV<decltype(x1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Xor3(BitCast(du, x1), BitCast(du, x2), BitCast(du, x3)));
+}
+
+#else
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+#endif
+
+// ------------------------------ Or3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or3(Vec128<T, N> o1, Vec128<T, N> o2, Vec128<T, N> o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OrAnd(Vec128<T, N> o, Vec128<T, N> a1, Vec128<T, N> a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfVecThenElse(Vec128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return IfThenElse(MaskFromVec(mask), yes, no);
+}
+
+// ------------------------------ BitwiseIfThenElse
+
+#ifdef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#undef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#else
+#define HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#endif
+
+template <class V>
+HWY_API V BitwiseIfThenElse(V mask, V yes, V no) {
+  return IfVecThenElse(mask, yes, no);
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ I64/U64 AbsDiff
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> AbsDiff(const Vec128<int64_t, N> a,
+                                   const Vec128<int64_t, N> b) {
+  return Max(a, b) - Min(a, b);
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> AbsDiff(const Vec128<uint64_t, N> a,
+                                    const Vec128<uint64_t, N> b) {
+  return Or(SaturatedSub(a, b), SaturatedSub(b, a));
+}
+
+// ------------------------------ PopulationCount
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec128<T> PopulationCount(hwy::SizeTag<1> /* tag */, Vec128<T> v) {
+  const Full128<uint8_t> d8;
+  return Vec128<T>(vcntq_u8(BitCast(d8, v).raw));
+}
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<1> /* tag */,
+                                        Vec128<T, N> v) {
+  const Simd<uint8_t, N, 0> d8;
+  return Vec128<T, N>(vcnt_u8(BitCast(d8, v).raw));
+}
+
+// NEON lacks popcount for lane sizes > 1, so take pairwise sums of the bytes.
+template <typename T>
+HWY_INLINE Vec128<T> PopulationCount(hwy::SizeTag<2> /* tag */, Vec128<T> v) {
+  const Full128<uint8_t> d8;
+  const uint8x16_t bytes = vcntq_u8(BitCast(d8, v).raw);
+  return Vec128<T>(vpaddlq_u8(bytes));
+}
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<2> /* tag */,
+                                        Vec128<T, N> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d8;
+  const uint8x8_t bytes = vcnt_u8(BitCast(d8, v).raw);
+  return Vec128<T, N>(vpaddl_u8(bytes));
+}
+
+template <typename T>
+HWY_INLINE Vec128<T> PopulationCount(hwy::SizeTag<4> /* tag */, Vec128<T> v) {
+  const Full128<uint8_t> d8;
+  const uint8x16_t bytes = vcntq_u8(BitCast(d8, v).raw);
+  return Vec128<T>(vpaddlq_u16(vpaddlq_u8(bytes)));
+}
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<4> /* tag */,
+                                        Vec128<T, N> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d8;
+  const uint8x8_t bytes = vcnt_u8(BitCast(d8, v).raw);
+  return Vec128<T, N>(vpaddl_u16(vpaddl_u8(bytes)));
+}
+
+template <typename T>
+HWY_INLINE Vec128<T> PopulationCount(hwy::SizeTag<8> /* tag */, Vec128<T> v) {
+  const Full128<uint8_t> d8;
+  const uint8x16_t bytes = vcntq_u8(BitCast(d8, v).raw);
+  return Vec128<T>(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(bytes))));
+}
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<8> /* tag */,
+                                        Vec128<T, N> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d8;
+  const uint8x8_t bytes = vcnt_u8(BitCast(d8, v).raw);
+  return Vec128<T, N>(vpaddl_u32(vpaddl_u16(vpaddl_u8(bytes))));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> PopulationCount(Vec128<T, N> v) {
+  return detail::PopulationCount(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+// ================================================== SIGN
+
+// ------------------------------ Abs
+// i64 is implemented after BroadcastSignBit.
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Abs, vabs, _, 1)
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Abs, vabs, _, 1)
+
+// ------------------------------ CopySign
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(Vec128<T, N> magn, Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(Vec128<T, N> abs, Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(abs)> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <typename T, size_t N, HWY_IF_SIGNED(T)>
+HWY_API Vec128<T, N> BroadcastSignBit(const Vec128<T, N> v) {
+  return ShiftRight<sizeof(T) * 8 - 1>(v);
+}
+
+// ================================================== MASK
+
+// ------------------------------ To/from vector
+
+// Mask and Vec have the same representation (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  const Simd<MakeUnsigned<T>, N, 0> du;
+  return Mask128<T, N>(BitCast(du, v).raw);
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <class D>
+HWY_API VFromD<D> VecFromMask(D d, const MFromD<D> m) {
+  // Raw type of masks is unsigned.
+  const RebindToUnsigned<D> du;
+  return BitCast(d, VFromD<decltype(du)>(m.raw));
+}
+
+// ------------------------------ RebindMask (MaskFromVec)
+
+template <typename TFrom, size_t NFrom, class DTo>
+HWY_API MFromD<DTo> RebindMask(DTo /* tag */, Mask128<TFrom, NFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>(m.raw);
+}
+
+// ------------------------------ IfThenElse
+
+#define HWY_NEON_BUILD_TPL_HWY_IF
+#define HWY_NEON_BUILD_RET_HWY_IF(type, size) Vec128<type##_t, size>
+#define HWY_NEON_BUILD_PARAM_HWY_IF(type, size)                         \
+  const Mask128<type##_t, size> mask, const Vec128<type##_t, size> yes, \
+      const Vec128<type##_t, size> no
+#define HWY_NEON_BUILD_ARG_HWY_IF mask.raw, yes.raw, no.raw
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(IfThenElse, vbsl, _, HWY_IF)
+
+#undef HWY_NEON_BUILD_TPL_HWY_IF
+#undef HWY_NEON_BUILD_RET_HWY_IF
+#undef HWY_NEON_BUILD_PARAM_HWY_IF
+#undef HWY_NEON_BUILD_ARG_HWY_IF
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return yes & VecFromMask(DFromV<decltype(yes)>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return AndNot(VecFromMask(DFromV<decltype(no)>(), mask), no);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+  const DFromV<decltype(no)> d;
+  const RebindToSigned<decltype(d)> di;
+
+  Mask128<T, N> m = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+  return IfThenElse(m, yes, no);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return Max(zero, v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  return MaskFromVec(Not(VecFromMask(DFromM<decltype(m)>(), m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// ------------------------------ Shuffle2301 (for i64 compares)
+
+// Swap 32-bit halves in 64-bits
+HWY_API Vec64<uint32_t> Shuffle2301(const Vec64<uint32_t> v) {
+  return Vec64<uint32_t>(vrev64_u32(v.raw));
+}
+HWY_API Vec64<int32_t> Shuffle2301(const Vec64<int32_t> v) {
+  return Vec64<int32_t>(vrev64_s32(v.raw));
+}
+HWY_API Vec64<float> Shuffle2301(const Vec64<float> v) {
+  return Vec64<float>(vrev64_f32(v.raw));
+}
+HWY_API Vec128<uint32_t> Shuffle2301(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>(vrev64q_u32(v.raw));
+}
+HWY_API Vec128<int32_t> Shuffle2301(const Vec128<int32_t> v) {
+  return Vec128<int32_t>(vrev64q_s32(v.raw));
+}
+HWY_API Vec128<float> Shuffle2301(const Vec128<float> v) {
+  return Vec128<float>(vrev64q_f32(v.raw));
+}
+
+#define HWY_NEON_BUILD_TPL_HWY_COMPARE
+#define HWY_NEON_BUILD_RET_HWY_COMPARE(type, size) Mask128<type##_t, size>
+#define HWY_NEON_BUILD_PARAM_HWY_COMPARE(type, size) \
+  const Vec128<type##_t, size> a, const Vec128<type##_t, size> b
+#define HWY_NEON_BUILD_ARG_HWY_COMPARE a.raw, b.raw
+
+// ------------------------------ Equality
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator==, vceq, _, HWY_COMPARE)
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator==, vceq, _, HWY_COMPARE)
+#else
+// No 64-bit comparisons on armv7: emulate them below, after Shuffle2301.
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator==, vceq, _, HWY_COMPARE)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator==, vceq, _, HWY_COMPARE)
+#endif
+
+// ------------------------------ Strict inequality (signed, float)
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator<, vclt, _, HWY_COMPARE)
+#else
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator<, vclt, _, HWY_COMPARE)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator<, vclt, _, HWY_COMPARE)
+#endif
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<, vclt, _, HWY_COMPARE)
+
+// ------------------------------ Weak inequality (float)
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator<=, vcle, _, HWY_COMPARE)
+#else
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator<=, vcle, _, HWY_COMPARE)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator<=, vcle, _, HWY_COMPARE)
+#endif
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<=, vcle, _, HWY_COMPARE)
+
+#undef HWY_NEON_BUILD_TPL_HWY_COMPARE
+#undef HWY_NEON_BUILD_RET_HWY_COMPARE
+#undef HWY_NEON_BUILD_PARAM_HWY_COMPARE
+#undef HWY_NEON_BUILD_ARG_HWY_COMPARE
+
+// ------------------------------ Armv7 i64 compare (Shuffle2301, Eq)
+
+#if HWY_ARCH_ARM_V7
+
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator==(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  const Simd<int32_t, N * 2, 0> d32;
+  const Simd<int64_t, N, 0> d64;
+  const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b)));
+  const auto cmp64 = cmp32 & Shuffle2301(cmp32);
+  return MaskFromVec(BitCast(d64, cmp64));
+}
+
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator==(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  const Simd<uint32_t, N * 2, 0> d32;
+  const Simd<uint64_t, N, 0> d64;
+  const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b)));
+  const auto cmp64 = cmp32 & Shuffle2301(cmp32);
+  return MaskFromVec(BitCast(d64, cmp64));
+}
+
+HWY_API Mask128<int64_t> operator<(const Vec128<int64_t> a,
+                                   const Vec128<int64_t> b) {
+  const int64x2_t sub = vqsubq_s64(a.raw, b.raw);
+  return MaskFromVec(BroadcastSignBit(Vec128<int64_t>(sub)));
+}
+HWY_API Mask128<int64_t, 1> operator<(const Vec64<int64_t> a,
+                                      const Vec64<int64_t> b) {
+  const int64x1_t sub = vqsub_s64(a.raw, b.raw);
+  return MaskFromVec(BroadcastSignBit(Vec64<int64_t>(sub)));
+}
+
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator<(const Vec128<uint64_t, N> a,
+                                       const Vec128<uint64_t, N> b) {
+  const DFromV<decltype(a)> du;
+  const RebindToSigned<decltype(du)> di;
+  const Vec128<uint64_t, N> msb = AndNot(a, b) | AndNot(a ^ b, a - b);
+  return MaskFromVec(BitCast(du, BroadcastSignBit(BitCast(di, msb))));
+}
+
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator<=(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  return Not(b < a);
+}
+
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator<=(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  return Not(b < a);
+}
+
+#endif
+
+// ------------------------------ operator!= (operator==)
+
+// Customize HWY_NEON_DEF_FUNCTION to call 2 functions.
+#pragma push_macro("HWY_NEON_DEF_FUNCTION")
+#undef HWY_NEON_DEF_FUNCTION
+// This cannot have _any_ template argument (in x86_128 we can at least have N
+// as an argument), otherwise it is not more specialized than rewritten
+// operator== in C++20, leading to compile errors.
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args) \
+  HWY_API Mask128<type##_t, size> name(Vec128<type##_t, size> a,             \
+                                       Vec128<type##_t, size> b) {           \
+    return Not(a == b);                                                      \
+  }
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator!=, ignored, ignored, ignored)
+
+#pragma pop_macro("HWY_NEON_DEF_FUNCTION")
+
+// ------------------------------ Reversed comparisons
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>(Vec128<T, N> a, Vec128<T, N> b) {
+  return operator<(b, a);
+}
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  return operator<=(b, a);
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D>
+HWY_API MFromD<D> FirstN(D d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  using TI = TFromD<decltype(di)>;
+  return RebindMask(d, detail::Iota0(di) < Set(di, static_cast<TI>(num)));
+}
+
+// ------------------------------ TestBit (Eq)
+
+#define HWY_NEON_BUILD_TPL_HWY_TESTBIT
+#define HWY_NEON_BUILD_RET_HWY_TESTBIT(type, size) Mask128<type##_t, size>
+#define HWY_NEON_BUILD_PARAM_HWY_TESTBIT(type, size) \
+  Vec128<type##_t, size> v, Vec128<type##_t, size> bit
+#define HWY_NEON_BUILD_ARG_HWY_TESTBIT v.raw, bit.raw
+
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(TestBit, vtst, _, HWY_TESTBIT)
+#else
+// No 64-bit versions on armv7
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(TestBit, vtst, _, HWY_TESTBIT)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(TestBit, vtst, _, HWY_TESTBIT)
+
+template <size_t N>
+HWY_API Mask128<uint64_t, N> TestBit(Vec128<uint64_t, N> v,
+                                     Vec128<uint64_t, N> bit) {
+  return (v & bit) == bit;
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> TestBit(Vec128<int64_t, N> v,
+                                    Vec128<int64_t, N> bit) {
+  return (v & bit) == bit;
+}
+
+#endif
+#undef HWY_NEON_BUILD_TPL_HWY_TESTBIT
+#undef HWY_NEON_BUILD_RET_HWY_TESTBIT
+#undef HWY_NEON_BUILD_PARAM_HWY_TESTBIT
+#undef HWY_NEON_BUILD_ARG_HWY_TESTBIT
+
+// ------------------------------ Abs i64 (IfThenElse, BroadcastSignBit)
+HWY_API Vec128<int64_t> Abs(const Vec128<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t>(vabsq_s64(v.raw));
+#else
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+HWY_API Vec64<int64_t> Abs(const Vec64<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec64<int64_t>(vabs_s64(v.raw));
+#else
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+// ------------------------------ Min (IfThenElse, BroadcastSignBit)
+
+// Unsigned
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(Min, vmin, _, 2)
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Min(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, b, a);
+#else
+  const DFromV<decltype(a)> du;
+  const RebindToSigned<decltype(du)> di;
+  return BitCast(du, BitCast(di, a) - BitCast(di, SaturatedSub(a, b)));
+#endif
+}
+
+// Signed
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Min, vmin, _, 2)
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> Min(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, b, a);
+#else
+  const Vec128<int64_t, N> sign = SaturatedSub(a, b);
+  return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), a, b);
+#endif
+}
+
+// Float: IEEE minimumNumber on v8
+#if HWY_ARCH_ARM_A64
+
+HWY_NEON_DEF_FUNCTION_FLOAT_16_32(Min, vminnm, _, 2)
+
+// GCC 6.5 and earlier are missing the 64-bit (non-q) intrinsic, so define
+// in terms of the 128-bit intrinsic.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_V(V, 8), HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V F64Vec64Min(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return LowerHalf(d, Min(ZeroExtendVector(dt, a), ZeroExtendVector(dt, b)));
+}
+
+}  // namespace detail
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+
+HWY_API Vec64<double> Min(Vec64<double> a, Vec64<double> b) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+  return detail::F64Vec64Min(a, b);
+#else
+  return Vec64<double>(vminnm_f64(a.raw, b.raw));
+#endif
+}
+
+HWY_API Vec128<double> Min(Vec128<double> a, Vec128<double> b) {
+  return Vec128<double>(vminnmq_f64(a.raw, b.raw));
+}
+
+#else
+// Armv7: NaN if any is NaN.
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Min, vmin, _, 2)
+#endif  // HWY_ARCH_ARM_A64
+
+// ------------------------------ Max (IfThenElse, BroadcastSignBit)
+
+// Unsigned (no u64)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(Max, vmax, _, 2)
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Max(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, a, b);
+#else
+  const DFromV<decltype(a)> du;
+  const RebindToSigned<decltype(du)> di;
+  return BitCast(du, BitCast(di, b) + BitCast(di, SaturatedSub(a, b)));
+#endif
+}
+
+// Signed (no i64)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Max, vmax, _, 2)
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> Max(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, a, b);
+#else
+  const Vec128<int64_t, N> sign = SaturatedSub(a, b);
+  return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), b, a);
+#endif
+}
+
+// Float: IEEE minimumNumber on v8
+#if HWY_ARCH_ARM_A64
+
+HWY_NEON_DEF_FUNCTION_FLOAT_16_32(Max, vmaxnm, _, 2)
+
+// GCC 6.5 and earlier are missing the 64-bit (non-q) intrinsic, so define
+// in terms of the 128-bit intrinsic.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_V(V, 8), HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V F64Vec64Max(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return LowerHalf(d, Max(ZeroExtendVector(dt, a), ZeroExtendVector(dt, b)));
+}
+
+}  // namespace detail
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+
+HWY_API Vec64<double> Max(Vec64<double> a, Vec64<double> b) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+  return detail::F64Vec64Max(a, b);
+#else
+  return Vec64<double>(vmaxnm_f64(a.raw, b.raw));
+#endif
+}
+
+HWY_API Vec128<double> Max(Vec128<double> a, Vec128<double> b) {
+  return Vec128<double>(vmaxnmq_f64(a.raw, b.raw));
+}
+
+#else
+// Armv7: NaN if any is NaN.
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Max, vmax, _, 2)
+#endif  // HWY_ARCH_ARM_A64
+
+// ================================================== MEMORY
+
+// ------------------------------ Load 128
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> LoadU(D /* tag */,
+                              const uint8_t* HWY_RESTRICT unaligned) {
+  return Vec128<uint8_t>(vld1q_u8(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> LoadU(D /* tag */,
+                               const uint16_t* HWY_RESTRICT unaligned) {
+  return Vec128<uint16_t>(vld1q_u16(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> LoadU(D /* tag */,
+                               const uint32_t* HWY_RESTRICT unaligned) {
+  return Vec128<uint32_t>(vld1q_u32(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API Vec128<uint64_t> LoadU(D /* tag */,
+                               const uint64_t* HWY_RESTRICT unaligned) {
+  return Vec128<uint64_t>(vld1q_u64(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I8_D(D)>
+HWY_API Vec128<int8_t> LoadU(D /* tag */,
+                             const int8_t* HWY_RESTRICT unaligned) {
+  return Vec128<int8_t>(vld1q_s8(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> LoadU(D /* tag */,
+                              const int16_t* HWY_RESTRICT unaligned) {
+  return Vec128<int16_t>(vld1q_s16(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> LoadU(D /* tag */,
+                              const int32_t* HWY_RESTRICT unaligned) {
+  return Vec128<int32_t>(vld1q_s32(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> LoadU(D /* tag */,
+                              const int64_t* HWY_RESTRICT unaligned) {
+  return Vec128<int64_t>(vld1q_s64(unaligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float> LoadU(D /* tag */, const float* HWY_RESTRICT unaligned) {
+  return Vec128<float>(vld1q_f32(unaligned));
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double> LoadU(D /* tag */,
+                             const double* HWY_RESTRICT unaligned) {
+  return Vec128<double>(vld1q_f64(unaligned));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ Load 64
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> LoadU(D /* tag */, const uint8_t* HWY_RESTRICT p) {
+  return Vec64<uint8_t>(vld1_u8(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> LoadU(D /* tag */, const uint16_t* HWY_RESTRICT p) {
+  return Vec64<uint16_t>(vld1_u16(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> LoadU(D /* tag */, const uint32_t* HWY_RESTRICT p) {
+  return Vec64<uint32_t>(vld1_u32(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U64_D(D)>
+HWY_API Vec64<uint64_t> LoadU(D /* tag */, const uint64_t* HWY_RESTRICT p) {
+  return Vec64<uint64_t>(vld1_u64(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API Vec64<int8_t> LoadU(D /* tag */, const int8_t* HWY_RESTRICT p) {
+  return Vec64<int8_t>(vld1_s8(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API Vec64<int16_t> LoadU(D /* tag */, const int16_t* HWY_RESTRICT p) {
+  return Vec64<int16_t>(vld1_s16(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> LoadU(D /* tag */, const int32_t* HWY_RESTRICT p) {
+  return Vec64<int32_t>(vld1_s32(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I64_D(D)>
+HWY_API Vec64<int64_t> LoadU(D /* tag */, const int64_t* HWY_RESTRICT p) {
+  return Vec64<int64_t>(vld1_s64(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API Vec64<float> LoadU(D /* tag */, const float* HWY_RESTRICT p) {
+  return Vec64<float>(vld1_f32(p));
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_API Vec64<double> LoadU(D /* tag */, const double* HWY_RESTRICT p) {
+  return Vec64<double>(vld1_f64(p));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ Load 32
+
+// Actual 32-bit broadcast load - used to implement the other lane types
+// because reinterpret_cast of the pointer leads to incorrect codegen on GCC.
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U32_D(D)>
+HWY_API Vec32<uint32_t> LoadU(D /*tag*/, const uint32_t* HWY_RESTRICT p) {
+  return Vec32<uint32_t>(vld1_dup_u32(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> LoadU(D /*tag*/, const int32_t* HWY_RESTRICT p) {
+  return Vec32<int32_t>(vld1_dup_s32(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_F32_D(D)>
+HWY_API Vec32<float> LoadU(D /*tag*/, const float* HWY_RESTRICT p) {
+  return Vec32<float>(vld1_dup_f32(p));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_NOT_SPECIAL_FLOAT_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const Repartition<uint32_t, decltype(d)> d32;
+  uint32_t buf;
+  CopyBytes<4>(p, &buf);
+  return BitCast(d, LoadU(d32, &buf));
+}
+
+// ------------------------------ Load 16
+
+// Actual 16-bit broadcast load - used to implement the other lane types
+// because reinterpret_cast of the pointer leads to incorrect codegen on GCC.
+template <class D, HWY_IF_LANES_D(D, 1), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const uint16_t* HWY_RESTRICT p) {
+  return VFromD<D>(vld1_dup_u16(p));
+}
+template <class D, HWY_IF_LANES_D(D, 1), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const int16_t* HWY_RESTRICT p) {
+  return VFromD<D>(vld1_dup_s16(p));
+}
+
+// 8-bit x2
+template <class D, HWY_IF_LANES_D(D, 2), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const Repartition<uint16_t, decltype(d)> d16;
+  uint16_t buf;
+  CopyBytes<2>(p, &buf);
+  return BitCast(d, LoadU(d16, &buf));
+}
+
+// ------------------------------ Load 8
+template <class D, HWY_IF_LANES_D(D, 1), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const uint8_t* HWY_RESTRICT p) {
+  return VFromD<D>(vld1_dup_u8(p));
+}
+template <class D, HWY_IF_LANES_D(D, 1), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const int8_t* HWY_RESTRICT p) {
+  return VFromD<D>(vld1_dup_s8(p));
+}
+
+// ------------------------------ Load misc
+
+// [b]float16_t may use the same Raw as uint16_t, so forward to that.
+template <class D, HWY_IF_SPECIAL_FLOAT_D(D)>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du16;
+  const auto pu16 = reinterpret_cast<const uint16_t*>(p);
+  return BitCast(d, LoadU(du16, pu16));
+}
+
+// On Arm, Load is the same as LoadU.
+template <class D>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+template <class D>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT aligned) {
+  return IfThenElseZero(m, Load(d, aligned));
+}
+
+template <class D>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT aligned) {
+  return IfThenElse(m, Load(d, aligned), v);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+// ------------------------------ Store 128
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API void StoreU(Vec128<uint8_t> v, D /* tag */,
+                    uint8_t* HWY_RESTRICT unaligned) {
+  vst1q_u8(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API void StoreU(Vec128<uint16_t> v, D /* tag */,
+                    uint16_t* HWY_RESTRICT unaligned) {
+  vst1q_u16(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API void StoreU(Vec128<uint32_t> v, D /* tag */,
+                    uint32_t* HWY_RESTRICT unaligned) {
+  vst1q_u32(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API void StoreU(Vec128<uint64_t> v, D /* tag */,
+                    uint64_t* HWY_RESTRICT unaligned) {
+  vst1q_u64(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I8_D(D)>
+HWY_API void StoreU(Vec128<int8_t> v, D /* tag */,
+                    int8_t* HWY_RESTRICT unaligned) {
+  vst1q_s8(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API void StoreU(Vec128<int16_t> v, D /* tag */,
+                    int16_t* HWY_RESTRICT unaligned) {
+  vst1q_s16(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API void StoreU(Vec128<int32_t> v, D /* tag */,
+                    int32_t* HWY_RESTRICT unaligned) {
+  vst1q_s32(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API void StoreU(Vec128<int64_t> v, D /* tag */,
+                    int64_t* HWY_RESTRICT unaligned) {
+  vst1q_s64(unaligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec128<float> v, D /* tag */,
+                    float* HWY_RESTRICT unaligned) {
+  vst1q_f32(unaligned, v.raw);
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API void StoreU(Vec128<double> v, D /* tag */,
+                    double* HWY_RESTRICT unaligned) {
+  vst1q_f64(unaligned, v.raw);
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ Store 64
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API void StoreU(Vec64<uint8_t> v, D /* tag */, uint8_t* HWY_RESTRICT p) {
+  vst1_u8(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API void StoreU(Vec64<uint16_t> v, D /* tag */, uint16_t* HWY_RESTRICT p) {
+  vst1_u16(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API void StoreU(Vec64<uint32_t> v, D /* tag */, uint32_t* HWY_RESTRICT p) {
+  vst1_u32(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U64_D(D)>
+HWY_API void StoreU(Vec64<uint64_t> v, D /* tag */, uint64_t* HWY_RESTRICT p) {
+  vst1_u64(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API void StoreU(Vec64<int8_t> v, D /* tag */, int8_t* HWY_RESTRICT p) {
+  vst1_s8(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API void StoreU(Vec64<int16_t> v, D /* tag */, int16_t* HWY_RESTRICT p) {
+  vst1_s16(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API void StoreU(Vec64<int32_t> v, D /* tag */, int32_t* HWY_RESTRICT p) {
+  vst1_s32(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I64_D(D)>
+HWY_API void StoreU(Vec64<int64_t> v, D /* tag */, int64_t* HWY_RESTRICT p) {
+  vst1_s64(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec64<float> v, D /* tag */, float* HWY_RESTRICT p) {
+  vst1_f32(p, v.raw);
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_API void StoreU(Vec64<double> v, D /* tag */, double* HWY_RESTRICT p) {
+  vst1_f64(p, v.raw);
+}
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ Store 32
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U32_D(D)>
+HWY_API void StoreU(Vec32<uint32_t> v, D, uint32_t* HWY_RESTRICT p) {
+  vst1_lane_u32(p, v.raw, 0);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I32_D(D)>
+HWY_API void StoreU(Vec32<int32_t> v, D, int32_t* HWY_RESTRICT p) {
+  vst1_lane_s32(p, v.raw, 0);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec32<float> v, D, float* HWY_RESTRICT p) {
+  vst1_lane_f32(p, v.raw, 0);
+}
+
+// Overload 16-bit types directly to avoid ambiguity with [b]float16_t.
+template <class D, HWY_IF_V_SIZE_D(D, 4), typename T = TFromD<D>,
+          HWY_IF_T_SIZE(T, 1)>
+HWY_API void StoreU(Vec32<T> v, D d, T* HWY_RESTRICT p) {
+  Repartition<uint32_t, decltype(d)> d32;
+  uint32_t buf = GetLane(BitCast(d32, v));
+  CopyBytes<4>(&buf, p);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API void StoreU(Vec32<uint16_t> v, D d, uint16_t* HWY_RESTRICT p) {
+  Repartition<uint32_t, decltype(d)> d32;
+  uint32_t buf = GetLane(BitCast(d32, v));
+  CopyBytes<4>(&buf, p);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I16_D(D)>
+HWY_API void StoreU(Vec32<int16_t> v, D d, int16_t* HWY_RESTRICT p) {
+  Repartition<uint32_t, decltype(d)> d32;
+  uint32_t buf = GetLane(BitCast(d32, v));
+  CopyBytes<4>(&buf, p);
+}
+
+// ------------------------------ Store 16
+
+template <class D, HWY_IF_V_SIZE_D(D, 2), HWY_IF_U16_D(D)>
+HWY_API void StoreU(Vec16<uint16_t> v, D, uint16_t* HWY_RESTRICT p) {
+  vst1_lane_u16(p, v.raw, 0);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 2), HWY_IF_I16_D(D)>
+HWY_API void StoreU(Vec16<int16_t> v, D, int16_t* HWY_RESTRICT p) {
+  vst1_lane_s16(p, v.raw, 0);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 2), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void StoreU(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  const Repartition<uint16_t, decltype(d)> d16;
+  const uint16_t buf = GetLane(BitCast(d16, v));
+  CopyBytes<2>(&buf, p);
+}
+
+// ------------------------------ Store 8
+
+template <class D, HWY_IF_V_SIZE_D(D, 1), HWY_IF_U8_D(D)>
+HWY_API void StoreU(Vec128<uint8_t, 1> v, D, uint8_t* HWY_RESTRICT p) {
+  vst1_lane_u8(p, v.raw, 0);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 1), HWY_IF_I8_D(D)>
+HWY_API void StoreU(Vec128<int8_t, 1> v, D, int8_t* HWY_RESTRICT p) {
+  vst1_lane_s8(p, v.raw, 0);
+}
+
+// [b]float16_t may use the same Raw as uint16_t, so forward to that.
+template <class D, HWY_IF_SPECIAL_FLOAT_D(D)>
+HWY_API void StoreU(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du16;
+  const auto pu16 = reinterpret_cast<uint16_t*>(p);
+  return StoreU(BitCast(du16, v), du16, pu16);
+}
+
+HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_GCC_ACTUAL
+HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wmaybe-uninitialized")
+#endif
+
+// On Arm, Store is the same as StoreU.
+template <class D>
+HWY_API void Store(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  StoreU(v, d, aligned);
+}
+
+HWY_DIAGNOSTICS(pop)
+
+template <class D>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  // Treat as unsigned so that we correctly support float16.
+  const RebindToUnsigned<decltype(d)> du;
+  const auto blended =
+      IfThenElse(RebindMask(du, m), BitCast(du, v), BitCast(du, LoadU(d, p)));
+  StoreU(BitCast(d, blended), d, p);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <class D>
+HWY_API void Stream(const VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+#if HWY_ARCH_ARM_A64
+#if HWY_COMPILER_GCC
+  __builtin_prefetch(aligned, 1, 0);
+#elif HWY_COMPILER_MSVC
+  __prefetch2(aligned, 0x11);
+#endif
+#endif
+  Store(v, d, aligned);
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ ConvertTo
+
+#if HWY_ARCH_ARM_A64 && HWY_HAVE_FLOAT16
+
+// TODO(janwas): use macro generator instead of handwritten
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> ConvertTo(D /* tag */, Vec128<int16_t> v) {
+  return Vec128<float16_t>(vcvtq_f16_s16(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>(vcvt_f16_s16(v.raw));
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> ConvertTo(D /* tag */, Vec128<uint16_t> v) {
+  return Vec128<float16_t>(vcvtq_f16_u16(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>(vcvt_f16_u16(v.raw));
+}
+
+#endif  // HWY_ARCH_ARM_A64 && HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec128<float> ConvertTo(D /* tag */, Vec128<int32_t> v) {
+  return Vec128<float>(vcvtq_f32_s32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<RebindToSigned<D>> v) {
+  return VFromD<D>(vcvt_f32_s32(v.raw));
+}
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec128<float> ConvertTo(D /* tag */, Vec128<uint32_t> v) {
+  return Vec128<float>(vcvtq_f32_u32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<RebindToUnsigned<D>> v) {
+  return VFromD<D>(vcvt_f32_u32(v.raw));
+}
+
+// Truncates (rounds toward zero).
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> ConvertTo(D /* tag */, Vec128<float> v) {
+  return Vec128<int32_t>(vcvtq_s32_f32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<RebindToFloat<D>> v) {
+  return VFromD<D>(vcvt_s32_f32(v.raw));
+}
+
+#if HWY_HAVE_FLOAT64
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> ConvertTo(D /* tag */, Vec128<int64_t> v) {
+  return Vec128<double>(vcvtq_f64_s64(v.raw));
+}
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec64<double> ConvertTo(D /* tag */, Vec64<int64_t> v) {
+// GCC 6.5 and earlier are missing the 64-bit (non-q) intrinsic.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+  return Set(Full64<double>(), static_cast<double>(GetLane(v)));
+#else
+  return Vec64<double>(vcvt_f64_s64(v.raw));
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> ConvertTo(D /* tag */, Vec128<uint64_t> v) {
+  return Vec128<double>(vcvtq_f64_u64(v.raw));
+}
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec64<double> ConvertTo(D /* tag */, Vec64<uint64_t> v) {
+  return Vec64<double>(vcvt_f64_u64(v.raw));
+}
+
+// Truncates (rounds toward zero).
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> ConvertTo(D /* tag */, Vec128<double> v) {
+  return Vec128<int64_t>(vcvtq_s64_f64(v.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec64<int64_t> ConvertTo(D di, Vec64<double> v) {
+  // GCC 6.5 and earlier are missing the 64-bit (non-q) intrinsic. Use the
+  // 128-bit version to avoid UB from casting double -> int64_t.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+  const Full128<double> ddt;
+  const Twice<decltype(di)> dit;
+  return LowerHalf(di, ConvertTo(dit, Combine(ddt, v, v)));
+#else
+  (void)di;
+  return Vec64<int64_t>(vcvt_s64_f64(v.raw));
+#endif
+}
+
+#endif  // HWY_HAVE_FLOAT64
+
+#if HWY_ARCH_ARM_A64 && HWY_HAVE_FLOAT16
+
+// Truncates (rounds toward zero).
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> ConvertTo(D /* tag */, Vec128<float16_t> v) {
+  return Vec128<int16_t>(vcvtq_s16_f16(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<float16_t, D>> v) {
+  return VFromD<D>(vcvt_s16_f16(v.raw));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> ConvertTo(D /* tag */, Vec128<float16_t> v) {
+  return Vec128<uint16_t>(vcvtq_u16_f16(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<float16_t, D>> v) {
+  return VFromD<D>(vcvt_u16_f16(v.raw));
+}
+
+#endif  // HWY_ARCH_ARM_A64 && HWY_HAVE_FLOAT16
+
+// ------------------------------ PromoteTo (ConvertTo)
+
+// Unsigned: zero-extend to full vector.
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> PromoteTo(D /* tag */, Vec64<uint8_t> v) {
+  return Vec128<uint16_t>(vmovl_u8(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> PromoteTo(D /* tag */, Vec32<uint8_t> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return Vec128<uint32_t>(vmovl_u16(vget_low_u16(a)));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> PromoteTo(D /* tag */, Vec64<uint16_t> v) {
+  return Vec128<uint32_t>(vmovl_u16(v.raw));
+}
+template <class D, HWY_IF_U64_D(D)>
+HWY_API Vec128<uint64_t> PromoteTo(D /* tag */, Vec64<uint32_t> v) {
+  return Vec128<uint64_t>(vmovl_u32(v.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> PromoteTo(D d, Vec64<uint8_t> v) {
+  return BitCast(d, Vec128<uint16_t>(vmovl_u8(v.raw)));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteTo(D d, Vec32<uint8_t> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return BitCast(d, Vec128<uint32_t>(vmovl_u16(vget_low_u16(a))));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteTo(D d, Vec64<uint16_t> v) {
+  return BitCast(d, Vec128<uint32_t>(vmovl_u16(v.raw)));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> PromoteTo(D d, Vec64<uint32_t> v) {
+  return BitCast(d, Vec128<uint64_t>(vmovl_u32(v.raw)));
+}
+
+// Unsigned: zero-extend to half vector.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>(vget_low_u16(vmovl_u8(v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(v.raw)))));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>(vget_low_u32(vmovl_u16(v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>(vget_low_u64(vmovl_u32(v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<uint8_t, D>> v) {
+  using VU16 = VFromD<RebindToUnsigned<D>>;
+  return BitCast(d, VU16(vget_low_u16(vmovl_u8(v.raw))));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  const uint32x4_t u32 = vmovl_u16(vget_low_u16(vmovl_u8(v.raw)));
+  return VFromD<D>(vget_low_s32(vreinterpretq_s32_u32(u32)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>(vget_low_s32(vreinterpretq_s32_u32(vmovl_u16(v.raw))));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<uint32_t, D>> v) {
+  using DU = RebindToUnsigned<D>;
+  return BitCast(d, VFromD<DU>(vget_low_u64(vmovl_u32(v.raw))));
+}
+
+// U8/U16 to U64/I64: First, zero-extend to U32, and then zero-extend to
+// TFromD<D>
+template <class D, class V, HWY_IF_UI64_D(D),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_V(V)), HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> PromoteTo(D d, V v) {
+  const Rebind<uint32_t, decltype(d)> du32;
+  return PromoteTo(d, PromoteTo(du32, v));
+}
+
+// Signed: replicate sign bit to full vector.
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> PromoteTo(D /* tag */, Vec64<int8_t> v) {
+  return Vec128<int16_t>(vmovl_s8(v.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteTo(D /* tag */, Vec32<int8_t> v) {
+  int16x8_t a = vmovl_s8(v.raw);
+  return Vec128<int32_t>(vmovl_s16(vget_low_s16(a)));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteTo(D /* tag */, Vec64<int16_t> v) {
+  return Vec128<int32_t>(vmovl_s16(v.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> PromoteTo(D /* tag */, Vec64<int32_t> v) {
+  return Vec128<int64_t>(vmovl_s32(v.raw));
+}
+
+// Signed: replicate sign bit to half vector.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+  return VFromD<D>(vget_low_s16(vmovl_s8(v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+  return VFromD<D>(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(v.raw)))));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>(vget_low_s32(vmovl_s16(v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>(vget_low_s64(vmovl_s32(v.raw)));
+}
+
+// I8/I16 to I64: First, promote to I32, and then promote to I64
+template <class D, class V, HWY_IF_I64_D(D),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_V(V)), HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> PromoteTo(D d, V v) {
+  const Rebind<int32_t, decltype(d)> di32;
+  return PromoteTo(d, PromoteTo(di32, v));
+}
+
+#if HWY_NEON_HAVE_FLOAT16C
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec128<float> PromoteTo(D /* tag */, Vec64<float16_t> v) {
+  return Vec128<float>(vcvt_f32_f16(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<float16_t, D>> v) {
+  return VFromD<D>(vget_low_f32(vcvt_f32_f16(v.raw)));
+}
+
+#endif  // HWY_NEON_HAVE_FLOAT16C
+
+#if HWY_HAVE_FLOAT64
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> PromoteTo(D /* tag */, Vec64<float> v) {
+  return Vec128<double>(vcvt_f64_f32(v.raw));
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec64<double> PromoteTo(D /* tag */, Vec32<float> v) {
+  return Vec64<double>(vget_low_f64(vcvt_f64_f32(v.raw)));
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> PromoteTo(D /* tag */, Vec64<int32_t> v) {
+  const int64x2_t i64 = vmovl_s32(v.raw);
+  return Vec128<double>(vcvtq_f64_s64(i64));
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec64<double> PromoteTo(D d, Vec32<int32_t> v) {
+  return ConvertTo(d, Vec64<int64_t>(vget_low_s64(vmovl_s32(v.raw))));
+}
+
+#endif  // HWY_HAVE_FLOAT64
+
+// ------------------------------ PromoteUpperTo
+
+#if HWY_ARCH_ARM_A64
+
+// Per-target flag to prevent generic_ops-inl.h from defining PromoteUpperTo.
+#ifdef HWY_NATIVE_PROMOTE_UPPER_TO
+#undef HWY_NATIVE_PROMOTE_UPPER_TO
+#else
+#define HWY_NATIVE_PROMOTE_UPPER_TO
+#endif
+
+// Unsigned: zero-extend to full vector.
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> PromoteUpperTo(D /* tag */, Vec128<uint8_t> v) {
+  return Vec128<uint16_t>(vmovl_high_u8(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> PromoteUpperTo(D /* tag */, Vec128<uint16_t> v) {
+  return Vec128<uint32_t>(vmovl_high_u16(v.raw));
+}
+template <class D, HWY_IF_U64_D(D)>
+HWY_API Vec128<uint64_t> PromoteUpperTo(D /* tag */, Vec128<uint32_t> v) {
+  return Vec128<uint64_t>(vmovl_high_u32(v.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> PromoteUpperTo(D d, Vec128<uint8_t> v) {
+  return BitCast(d, Vec128<uint16_t>(vmovl_high_u8(v.raw)));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteUpperTo(D d, Vec128<uint16_t> v) {
+  return BitCast(d, Vec128<uint32_t>(vmovl_high_u16(v.raw)));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> PromoteUpperTo(D d, Vec128<uint32_t> v) {
+  return BitCast(d, Vec128<uint64_t>(vmovl_high_u32(v.raw)));
+}
+
+// Signed: replicate sign bit to full vector.
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> PromoteUpperTo(D /* tag */, Vec128<int8_t> v) {
+  return Vec128<int16_t>(vmovl_high_s8(v.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> PromoteUpperTo(D /* tag */, Vec128<int16_t> v) {
+  return Vec128<int32_t>(vmovl_high_s16(v.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec128<int64_t> PromoteUpperTo(D /* tag */, Vec128<int32_t> v) {
+  return Vec128<int64_t>(vmovl_high_s32(v.raw));
+}
+
+#if HWY_NEON_HAVE_FLOAT16C
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec128<float> PromoteUpperTo(D /* tag */, Vec128<float16_t> v) {
+  return Vec128<float>(vcvt_high_f32_f16(v.raw));
+}
+
+#endif  // HWY_NEON_HAVE_FLOAT16C
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D df32, VFromD<Repartition<bfloat16_t, D>> v) {
+  const Repartition<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteUpperTo(di32, BitCast(du16, v))));
+}
+
+#if HWY_HAVE_FLOAT64
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> PromoteUpperTo(D /* tag */, Vec128<float> v) {
+  return Vec128<double>(vcvt_high_f64_f32(v.raw));
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec128<double> PromoteUpperTo(D /* tag */, Vec128<int32_t> v) {
+  const int64x2_t i64 = vmovl_high_s32(v.raw);
+  return Vec128<double>(vcvtq_f64_s64(i64));
+}
+
+#endif  // HWY_HAVE_FLOAT64
+
+// Generic version for <=64 bit input/output (_high is only for full vectors).
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), class V>
+HWY_API VFromD<D> PromoteUpperTo(D d, V v) {
+  const Rebind<TFromV<V>, decltype(d)> dh;
+  return PromoteTo(d, UpperHalf(dh, v));
+}
+
+#endif  // HWY_ARCH_ARM_A64
+
+// ------------------------------ DemoteTo (ConvertTo)
+
+// From full vector to half or quarter
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> DemoteTo(D /* tag */, Vec128<int32_t> v) {
+  return Vec64<uint16_t>(vqmovun_s32(v.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec64<int16_t> DemoteTo(D /* tag */, Vec128<int32_t> v) {
+  return Vec64<int16_t>(vqmovn_s32(v.raw));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec32<uint8_t> DemoteTo(D /* tag */, Vec128<int32_t> v) {
+  const uint16x4_t a = vqmovun_s32(v.raw);
+  return Vec32<uint8_t>(vqmovn_u16(vcombine_u16(a, a)));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> DemoteTo(D /* tag */, Vec128<int16_t> v) {
+  return Vec64<uint8_t>(vqmovun_s16(v.raw));
+}
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec32<int8_t> DemoteTo(D /* tag */, Vec128<int32_t> v) {
+  const int16x4_t a = vqmovn_s32(v.raw);
+  return Vec32<int8_t>(vqmovn_s16(vcombine_s16(a, a)));
+}
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec64<int8_t> DemoteTo(D /* tag */, Vec128<int16_t> v) {
+  return Vec64<int8_t>(vqmovn_s16(v.raw));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> DemoteTo(D /* tag */, Vec128<uint32_t> v) {
+  return Vec64<uint16_t>(vqmovn_u32(v.raw));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec32<uint8_t> DemoteTo(D /* tag */, Vec128<uint32_t> v) {
+  const uint16x4_t a = vqmovn_u32(v.raw);
+  return Vec32<uint8_t>(vqmovn_u16(vcombine_u16(a, a)));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> DemoteTo(D /* tag */, Vec128<uint16_t> v) {
+  return Vec64<uint8_t>(vqmovn_u16(v.raw));
+}
+
+// From half vector to partial half
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>(vqmovun_s32(vcombine_s32(v.raw, v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>(vqmovn_s32(vcombine_s32(v.raw, v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const uint16x4_t a = vqmovun_s32(vcombine_s32(v.raw, v.raw));
+  return VFromD<D>(vqmovn_u16(vcombine_u16(a, a)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>(vqmovun_s16(vcombine_s16(v.raw, v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const int16x4_t a = vqmovn_s32(vcombine_s32(v.raw, v.raw));
+  return VFromD<D>(vqmovn_s16(vcombine_s16(a, a)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>(vqmovn_s16(vcombine_s16(v.raw, v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>(vqmovn_u32(vcombine_u32(v.raw, v.raw)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const uint16x4_t a = vqmovn_u32(vcombine_u32(v.raw, v.raw));
+  return VFromD<D>(vqmovn_u16(vcombine_u16(a, a)));
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>(vqmovn_u16(vcombine_u16(v.raw, v.raw)));
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> DemoteTo(D /* tag */, Vec128<int64_t> v) {
+  return Vec64<int32_t>(vqmovn_s64(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> DemoteTo(D /* tag */, Vec128<int64_t> v) {
+  return Vec64<uint32_t>(vqmovun_s64(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> DemoteTo(D /* tag */, Vec128<uint64_t> v) {
+  return Vec64<uint32_t>(vqmovn_u64(v.raw));
+}
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D d, Vec128<uint64_t> v) {
+  const Rebind<int32_t, D> di32;
+  return DemoteTo(d, DemoteTo(di32, v));
+}
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D d, Vec128<int64_t> v) {
+  const Rebind<uint32_t, D> du32;
+  return DemoteTo(d, DemoteTo(du32, v));
+}
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D d, Vec128<uint64_t> v) {
+  const Rebind<uint32_t, D> du32;
+  return DemoteTo(d, DemoteTo(du32, v));
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> DemoteTo(D /* tag */, Vec64<int64_t> v) {
+  return Vec32<int32_t>(vqmovn_s64(vcombine_s64(v.raw, v.raw)));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec32<uint32_t> DemoteTo(D /* tag */, Vec64<int64_t> v) {
+  return Vec32<uint32_t>(vqmovun_s64(vcombine_s64(v.raw, v.raw)));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec32<uint32_t> DemoteTo(D /* tag */, Vec64<uint64_t> v) {
+  return Vec32<uint32_t>(vqmovn_u64(vcombine_u64(v.raw, v.raw)));
+}
+template <class D, HWY_IF_SIGNED_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> DemoteTo(D d, Vec64<int64_t> v) {
+  const Rebind<int32_t, D> di32;
+  return DemoteTo(d, DemoteTo(di32, v));
+}
+template <class D, HWY_IF_UNSIGNED_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> DemoteTo(D d, Vec64<int64_t> v) {
+  const Rebind<uint32_t, D> du32;
+  return DemoteTo(d, DemoteTo(du32, v));
+}
+template <class D, HWY_IF_LANES_D(D, 1), HWY_IF_UNSIGNED_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> DemoteTo(D d, Vec64<uint64_t> v) {
+  const Rebind<uint32_t, D> du32;
+  return DemoteTo(d, DemoteTo(du32, v));
+}
+
+#if HWY_NEON_HAVE_FLOAT16C
+
+// We already toggled HWY_NATIVE_F16C above.
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec64<float16_t> DemoteTo(D /* tag */, Vec128<float> v) {
+  return Vec64<float16_t>{vcvt_f16_f32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<float, D>> v) {
+  return VFromD<D>(vcvt_f16_f32(vcombine_f32(v.raw, v.raw)));
+}
+
+#endif  // HWY_NEON_HAVE_FLOAT16C
+
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, VFromD<Rebind<float, D>> v) {
+  const Rebind<int32_t, decltype(dbf16)> di32;
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = BitCast(di32, ShiftRight<16>(BitCast(du32, v)));
+  return BitCast(dbf16, DemoteTo(du16, bits_in_32));
+}
+
+#if HWY_HAVE_FLOAT64
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec64<float> DemoteTo(D /* tag */, Vec128<double> v) {
+  return Vec64<float>(vcvt_f32_f64(v.raw));
+}
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec32<float> DemoteTo(D /* tag */, Vec64<double> v) {
+  return Vec32<float>(vcvt_f32_f64(vcombine_f64(v.raw, v.raw)));
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> DemoteTo(D /* tag */, Vec128<double> v) {
+  const int64x2_t i64 = vcvtq_s64_f64(v.raw);
+  return Vec64<int32_t>(vqmovn_s64(i64));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> DemoteTo(D /* tag */, Vec64<double> v) {
+  // There is no i64x1 -> i32x1 narrow, so Combine to 128-bit. Do so with the
+  // f64 input already to also avoid the missing vcvt_s64_f64 in GCC 6.4.
+  const Full128<double> ddt;
+  const Full128<int64_t> dit;
+  return Vec32<int32_t>(vqmovn_s64(ConvertTo(dit, Combine(ddt, v, v)).raw));
+}
+
+#endif  // HWY_HAVE_FLOAT64
+
+HWY_API Vec32<uint8_t> U8FromU32(Vec128<uint32_t> v) {
+  const uint8x16_t org_v = detail::BitCastToByte(v).raw;
+  const uint8x16_t w = vuzp1q_u8(org_v, org_v);
+  return Vec32<uint8_t>(vget_low_u8(vuzp1q_u8(w, w)));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(uint32_t, N, 8)>
+HWY_API Vec128<uint8_t, N> U8FromU32(Vec128<uint32_t, N> v) {
+  const uint8x8_t org_v = detail::BitCastToByte(v).raw;
+  const uint8x8_t w = vuzp1_u8(org_v, org_v);
+  return Vec128<uint8_t, N>(vuzp1_u8(w, w));
+}
+
+// ------------------------------ Round (IfThenElse, mask, logical)
+
+#if HWY_ARCH_ARM_A64
+// Toward nearest integer
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Round, vrndn, _, 1)
+
+// Toward zero, aka truncate
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Trunc, vrnd, _, 1)
+
+// Toward +infinity, aka ceiling
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Ceil, vrndp, _, 1)
+
+// Toward -infinity, aka floor
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Floor, vrndm, _, 1)
+#else
+
+// ------------------------------ Trunc
+
+// Armv7 only supports truncation to integer. We can either convert back to
+// float (3 floating-point and 2 logic operations) or manipulate the binary32
+// representation, clearing the lowest 23-exp mantissa bits. This requires 9
+// integer operations and 3 constants, which is likely more expensive.
+
+namespace detail {
+
+// The original value is already the desired result if NaN or the magnitude is
+// large (i.e. the value is already an integer).
+template <size_t N>
+HWY_INLINE Mask128<float, N> UseInt(const Vec128<float, N> v) {
+  return Abs(v) < Set(Simd<float, N, 0>(), MantissaEnd<float>());
+}
+
+}  // namespace detail
+
+template <size_t N>
+HWY_API Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), int_f, v);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Round(const Vec128<float, N> v) {
+  const DFromV<decltype(v)> df;
+
+  // Armv7 also lacks a native NearestInt, but we can instead rely on rounding
+  // (we assume the current mode is nearest-even) after addition with a large
+  // value such that no mantissa bits remain. We may need a compiler flag for
+  // precise floating-point to prevent this from being "optimized" out.
+  const auto max = Set(df, MantissaEnd<float>());
+  const auto large = CopySignToAbs(max, v);
+  const auto added = large + v;
+  const auto rounded = added - large;
+
+  // Keep original if NaN or the magnitude is large (already an int).
+  return IfThenElse(Abs(v) < max, rounded, v);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a positive non-integer ends up smaller; if so, add 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f < v)));
+
+  return IfThenElse(detail::UseInt(v), int_f - neg1, v);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Floor(const Vec128<float, N> v) {
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a negative non-integer ends up larger; if so, subtract 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f > v)));
+
+  return IfThenElse(detail::UseInt(v), int_f + neg1, v);
+}
+
+#endif
+
+// ------------------------------ NearestInt (Round)
+
+#if HWY_ARCH_ARM_A64
+
+HWY_API Vec128<int32_t> NearestInt(const Vec128<float> v) {
+  return Vec128<int32_t>(vcvtnq_s32_f32(v.raw));
+}
+template <size_t N, HWY_IF_V_SIZE_LE(float, N, 8)>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  return Vec128<int32_t, N>(vcvtn_s32_f32(v.raw));
+}
+
+#else
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return ConvertTo(di, Round(v));
+}
+
+#endif
+
+// ------------------------------ Floating-point classification
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
+  return v != v;
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Mask128<T, N> IsInf(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Mask128<T, N> IsFinite(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+// ================================================== SWIZZLE
+
+// ------------------------------ LowerHalf
+
+// <= 64 bit: just return different type
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>(v.raw);
+}
+
+HWY_API Vec64<uint8_t> LowerHalf(Vec128<uint8_t> v) {
+  return Vec64<uint8_t>(vget_low_u8(v.raw));
+}
+HWY_API Vec64<uint16_t> LowerHalf(Vec128<uint16_t> v) {
+  return Vec64<uint16_t>(vget_low_u16(v.raw));
+}
+HWY_API Vec64<uint32_t> LowerHalf(Vec128<uint32_t> v) {
+  return Vec64<uint32_t>(vget_low_u32(v.raw));
+}
+HWY_API Vec64<uint64_t> LowerHalf(Vec128<uint64_t> v) {
+  return Vec64<uint64_t>(vget_low_u64(v.raw));
+}
+HWY_API Vec64<int8_t> LowerHalf(Vec128<int8_t> v) {
+  return Vec64<int8_t>(vget_low_s8(v.raw));
+}
+HWY_API Vec64<int16_t> LowerHalf(Vec128<int16_t> v) {
+  return Vec64<int16_t>(vget_low_s16(v.raw));
+}
+HWY_API Vec64<int32_t> LowerHalf(Vec128<int32_t> v) {
+  return Vec64<int32_t>(vget_low_s32(v.raw));
+}
+HWY_API Vec64<int64_t> LowerHalf(Vec128<int64_t> v) {
+  return Vec64<int64_t>(vget_low_s64(v.raw));
+}
+HWY_API Vec64<float> LowerHalf(Vec128<float> v) {
+  return Vec64<float>(vget_low_f32(v.raw));
+}
+#if HWY_HAVE_FLOAT16
+HWY_API Vec64<float16_t> LowerHalf(Vec128<float16_t> v) {
+  return Vec64<float16_t>(vget_low_f16(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT16
+#if HWY_HAVE_FLOAT64
+HWY_API Vec64<double> LowerHalf(Vec128<double> v) {
+  return Vec64<double>(vget_low_f64(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+template <class V, HWY_IF_SPECIAL_FLOAT_V(V), HWY_IF_V_SIZE_V(V, 16)>
+HWY_API VFromD<Half<DFromV<V>>> LowerHalf(V v) {
+  const Full128<uint16_t> du;
+  const Half<DFromV<V>> dh;
+  return BitCast(dh, LowerHalf(BitCast(du, v)));
+}
+
+template <class DH>
+HWY_API VFromD<DH> LowerHalf(DH /* tag */, VFromD<Twice<DH>> v) {
+  return LowerHalf(v);
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+// 128-bit
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec128<T> CombineShiftRightBytes(D d, Vec128<T> hi, Vec128<T> lo) {
+  static_assert(0 < kBytes && kBytes < 16, "kBytes must be in [1, 15]");
+  const Repartition<uint8_t, decltype(d)> d8;
+  uint8x16_t v8 = vextq_u8(BitCast(d8, lo).raw, BitCast(d8, hi).raw, kBytes);
+  return BitCast(d, Vec128<uint8_t>(v8));
+}
+
+// 64-bit
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec64<T> CombineShiftRightBytes(D d, Vec64<T> hi, Vec64<T> lo) {
+  static_assert(0 < kBytes && kBytes < 8, "kBytes must be in [1, 7]");
+  const Repartition<uint8_t, decltype(d)> d8;
+  uint8x8_t v8 = vext_u8(BitCast(d8, lo).raw, BitCast(d8, hi).raw, kBytes);
+  return BitCast(d, VFromD<decltype(d8)>(v8));
+}
+
+// <= 32-bit defined after ShiftLeftBytes.
+
+// ------------------------------ Shift vector by constant #bytes
+
+namespace detail {
+
+// Partially specialize because kBytes = 0 and >= size are compile errors;
+// callers replace the latter with 0xFF for easier specialization.
+template <int kBytes>
+struct ShiftLeftBytesT {
+  // Full
+  template <class T>
+  HWY_INLINE Vec128<T> operator()(const Vec128<T> v) {
+    const Full128<T> d;
+    return CombineShiftRightBytes<16 - kBytes>(d, v, Zero(d));
+  }
+
+  // Partial
+  template <class T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    // Expand to 64-bit so we only use the native EXT instruction.
+    const Full64<T> d64;
+    const auto zero64 = Zero(d64);
+    const decltype(zero64) v64(v.raw);
+    return Vec128<T, N>(
+        CombineShiftRightBytes<8 - kBytes>(d64, v64, zero64).raw);
+  }
+};
+template <>
+struct ShiftLeftBytesT<0> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return v;
+  }
+};
+template <>
+struct ShiftLeftBytesT<0xFF> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return Xor(v, v);
+  }
+};
+
+template <int kBytes>
+struct ShiftRightBytesT {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(Vec128<T, N> v) {
+    const DFromV<decltype(v)> d;
+    // For < 64-bit vectors, zero undefined lanes so we shift in zeros.
+    if (d.MaxBytes() < 8) {
+      constexpr size_t kReg = d.MaxBytes() == 16 ? 16 : 8;
+      const Simd<T, kReg / sizeof(T), 0> dreg;
+      v = Vec128<T, N>(
+          IfThenElseZero(FirstN(dreg, N), VFromD<decltype(dreg)>(v.raw)).raw);
+    }
+    return CombineShiftRightBytes<kBytes>(d, Zero(d), v);
+  }
+};
+template <>
+struct ShiftRightBytesT<0> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return v;
+  }
+};
+template <>
+struct ShiftRightBytesT<0xFF> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return Xor(v, v);
+  }
+};
+
+}  // namespace detail
+
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftLeftBytes(D d, VFromD<D> v) {
+  return detail::ShiftLeftBytesT<(kBytes >= d.MaxBytes() ? 0xFF : kBytes)>()(v);
+}
+
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftBytes(Vec128<T, N> v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftLeftLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(TFromD<D>)>(BitCast(d8, v)));
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(Vec128<T, N> v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftRightBytes(D d, VFromD<D> v) {
+  return detail::ShiftRightBytesT<(kBytes >= d.MaxBytes() ? 0xFF : kBytes)>()(
+      v);
+}
+
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftRightLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(
+      d, ShiftRightBytes<kLanes * sizeof(TFromD<D>)>(d8, BitCast(d8, v)));
+}
+
+// Calls ShiftLeftBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Full64<uint8_t> d_full8;
+  const Repartition<TFromD<D>, decltype(d_full8)> d_full;
+  using V64 = VFromD<decltype(d_full8)>;
+  const V64 hi64(BitCast(d8, hi).raw);
+  // Move into most-significant bytes
+  const V64 lo64 = ShiftLeftBytes<8 - kSize>(V64(BitCast(d8, lo).raw));
+  const V64 r = CombineShiftRightBytes<8 - kSize + kBytes>(d_full8, hi64, lo64);
+  // After casting to full 64-bit vector of correct type, shrink to 32-bit
+  return VFromD<D>(BitCast(d_full, r).raw);
+}
+
+// ------------------------------ UpperHalf (ShiftRightBytes)
+
+// Full input
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> UpperHalf(D /* tag */, Vec128<uint8_t> v) {
+  return Vec64<uint8_t>(vget_high_u8(v.raw));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> UpperHalf(D /* tag */, Vec128<uint16_t> v) {
+  return Vec64<uint16_t>(vget_high_u16(v.raw));
+}
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> UpperHalf(D /* tag */, Vec128<uint32_t> v) {
+  return Vec64<uint32_t>(vget_high_u32(v.raw));
+}
+template <class D, HWY_IF_U64_D(D)>
+HWY_API Vec64<uint64_t> UpperHalf(D /* tag */, Vec128<uint64_t> v) {
+  return Vec64<uint64_t>(vget_high_u64(v.raw));
+}
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec64<int8_t> UpperHalf(D /* tag */, Vec128<int8_t> v) {
+  return Vec64<int8_t>(vget_high_s8(v.raw));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec64<int16_t> UpperHalf(D /* tag */, Vec128<int16_t> v) {
+  return Vec64<int16_t>(vget_high_s16(v.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> UpperHalf(D /* tag */, Vec128<int32_t> v) {
+  return Vec64<int32_t>(vget_high_s32(v.raw));
+}
+template <class D, HWY_IF_I64_D(D)>
+HWY_API Vec64<int64_t> UpperHalf(D /* tag */, Vec128<int64_t> v) {
+  return Vec64<int64_t>(vget_high_s64(v.raw));
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec64<float16_t> UpperHalf(D /* tag */, Vec128<float16_t> v) {
+  return Vec64<float16_t>(vget_high_f16(v.raw));
+}
+#endif
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec64<float> UpperHalf(D /* tag */, Vec128<float> v) {
+  return Vec64<float>(vget_high_f32(v.raw));
+}
+#if HWY_HAVE_FLOAT64
+template <class D, HWY_IF_F64_D(D)>
+HWY_API Vec64<double> UpperHalf(D /* tag */, Vec128<double> v) {
+  return Vec64<double>(vget_high_f64(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+template <class D, HWY_IF_SPECIAL_FLOAT_D(D), HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> UpperHalf(D dh, VFromD<Twice<D>> v) {
+  const RebindToUnsigned<Twice<decltype(dh)>> du;
+  const Half<decltype(du)> duh;
+  return BitCast(dh, UpperHalf(duh, BitCast(du, v)));
+}
+
+// Partial
+template <class DH, HWY_IF_V_SIZE_LE_D(DH, 4)>
+HWY_API VFromD<DH> UpperHalf(DH dh, VFromD<Twice<DH>> v) {
+  const Twice<DH> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const VFromD<decltype(du)> upper =
+      ShiftRightBytes<dh.MaxBytes()>(du, BitCast(du, v));
+  return VFromD<DH>(BitCast(d, upper).raw);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+#if HWY_ARCH_ARM_A64
+// Unsigned
+template <int kLane>
+HWY_API Vec128<uint8_t> Broadcast(Vec128<uint8_t> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return Vec128<uint8_t>(vdupq_laneq_u8(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint8_t, N, 8)>
+HWY_API Vec128<uint8_t, N> Broadcast(Vec128<uint8_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint8_t, N>(vdup_lane_u8(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint16_t> Broadcast(Vec128<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<uint16_t>(vdupq_laneq_u16(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint16_t, N, 8)>
+HWY_API Vec128<uint16_t, N> Broadcast(Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint16_t, N>(vdup_lane_u16(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint32_t> Broadcast(Vec128<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<uint32_t>(vdupq_laneq_u32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint32_t, N, 8)>
+HWY_API Vec128<uint32_t, N> Broadcast(Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>(vdup_lane_u32(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint64_t> Broadcast(Vec128<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<uint64_t>(vdupq_laneq_u64(v.raw, kLane));
+}
+// Vec64<uint64_t> is defined below.
+
+// Signed
+template <int kLane>
+HWY_API Vec128<int8_t> Broadcast(Vec128<int8_t> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return Vec128<int8_t>(vdupq_laneq_s8(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int8_t, N, 8)>
+HWY_API Vec128<int8_t, N> Broadcast(Vec128<int8_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int8_t, N>(vdup_lane_s8(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int16_t> Broadcast(Vec128<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<int16_t>(vdupq_laneq_s16(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> Broadcast(Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int16_t, N>(vdup_lane_s16(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int32_t> Broadcast(Vec128<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<int32_t>(vdupq_laneq_s32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int32_t, N, 8)>
+HWY_API Vec128<int32_t, N> Broadcast(Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>(vdup_lane_s32(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int64_t> Broadcast(Vec128<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<int64_t>(vdupq_laneq_s64(v.raw, kLane));
+}
+// Vec64<int64_t> is defined below.
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <int kLane>
+HWY_API Vec128<float16_t> Broadcast(Vec128<float16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<float16_t>(vdupq_laneq_f16(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(float16_t, N, 8)>
+HWY_API Vec128<float16_t, N> Broadcast(Vec128<float16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float16_t, N>(vdup_lane_f16(v.raw, kLane));
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <int kLane>
+HWY_API Vec128<float> Broadcast(Vec128<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<float>(vdupq_laneq_f32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(float, N, 8)>
+HWY_API Vec128<float, N> Broadcast(Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>(vdup_lane_f32(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<double> Broadcast(Vec128<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<double>(vdupq_laneq_f64(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec64<double> Broadcast(Vec64<double> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+
+#else  // !HWY_ARCH_ARM_A64
+// No vdupq_laneq_* on armv7: use vgetq_lane_* + vdupq_n_*.
+
+// Unsigned
+template <int kLane>
+HWY_API Vec128<uint8_t> Broadcast(Vec128<uint8_t> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return Vec128<uint8_t>(vdupq_n_u8(vgetq_lane_u8(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint8_t, N, 8)>
+HWY_API Vec128<uint8_t, N> Broadcast(Vec128<uint8_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint8_t, N>(vdup_lane_u8(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint16_t> Broadcast(Vec128<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<uint16_t>(vdupq_n_u16(vgetq_lane_u16(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint16_t, N, 8)>
+HWY_API Vec128<uint16_t, N> Broadcast(Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint16_t, N>(vdup_lane_u16(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint32_t> Broadcast(Vec128<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<uint32_t>(vdupq_n_u32(vgetq_lane_u32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(uint32_t, N, 8)>
+HWY_API Vec128<uint32_t, N> Broadcast(Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>(vdup_lane_u32(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<uint64_t> Broadcast(Vec128<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<uint64_t>(vdupq_n_u64(vgetq_lane_u64(v.raw, kLane)));
+}
+// Vec64<uint64_t> is defined below.
+
+// Signed
+template <int kLane>
+HWY_API Vec128<int8_t> Broadcast(Vec128<int8_t> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return Vec128<int8_t>(vdupq_n_s8(vgetq_lane_s8(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int8_t, N, 8)>
+HWY_API Vec128<int8_t, N> Broadcast(Vec128<int8_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int8_t, N>(vdup_lane_s8(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int16_t> Broadcast(Vec128<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<int16_t>(vdupq_n_s16(vgetq_lane_s16(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> Broadcast(Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int16_t, N>(vdup_lane_s16(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int32_t> Broadcast(Vec128<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<int32_t>(vdupq_n_s32(vgetq_lane_s32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(int32_t, N, 8)>
+HWY_API Vec128<int32_t, N> Broadcast(Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>(vdup_lane_s32(v.raw, kLane));
+}
+template <int kLane>
+HWY_API Vec128<int64_t> Broadcast(Vec128<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<int64_t>(vdupq_n_s64(vgetq_lane_s64(v.raw, kLane)));
+}
+// Vec64<int64_t> is defined below.
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <int kLane>
+HWY_API Vec128<float16_t> Broadcast(Vec128<float16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<float16_t>(vdupq_n_f16(vgetq_lane_f16(v.raw, kLane)));
+}
+#endif  // HWY_HAVE_FLOAT16
+template <int kLane>
+HWY_API Vec128<float> Broadcast(Vec128<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<float>(vdupq_n_f32(vgetq_lane_f32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_V_SIZE_LE(float, N, 8)>
+HWY_API Vec128<float, N> Broadcast(Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>(vdup_lane_f32(v.raw, kLane));
+}
+
+#endif  // HWY_ARCH_ARM_A64
+
+template <int kLane>
+HWY_API Vec64<uint64_t> Broadcast(Vec64<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+template <int kLane>
+HWY_API Vec64<int64_t> Broadcast(Vec64<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N>
+struct Indices128 {
+  typename detail::Raw128<T, N>::type raw;
+};
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Iota(d8, 0);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Zero(d8);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+  return Load(d8, kByteOffsets);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+  return Load(d8, kByteOffsets);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
+  return Load(d8, kByteOffsets);
+}
+
+}  // namespace detail
+
+template <class D, typename TI, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  (void)d;
+  return Indices128<TFromD<D>, MaxLanes(D())>{BitCast(d, vec).raw};
+}
+
+template <class D, typename TI,
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = VFromD<decltype(d8)>;
+
+  // Broadcast each lane index to all bytes of T and shift to bytes
+  const V8 lane_indices = TableLookupBytes(
+      BitCast(d8, vec), detail::IndicesFromVecBroadcastLaneBytes(d));
+  constexpr int kIndexShiftAmt = static_cast<int>(FloorLog2(sizeof(T)));
+  const V8 byte_indices = ShiftLeft<kIndexShiftAmt>(lane_indices);
+  const V8 sum = Add(byte_indices, detail::IndicesFromVecByteOffsets(d));
+  return Indices128<TFromD<D>, MaxLanes(D())>{BitCast(d, sum).raw};
+}
+
+template <class D, typename TI>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> SetTableIndices(D d,
+                                                             const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return BitCast(
+      d, TableLookupBytes(BitCast(di, v), BitCast(di, Vec128<T, N>{idx.raw})));
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 4)>
+HWY_API Vec128<T, N> TwoTablesLookupLanes(Vec128<T, N> a, Vec128<T, N> b,
+                                          Indices128<T, N> idx) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+// TableLookupLanes currently requires table and index vectors to be the same
+// size, though a half-length index vector would be sufficient here.
+#if HWY_IS_MSAN
+  const Vec128<T, N> idx_vec{idx.raw};
+  const Indices128<T, N * 2> idx2{Combine(dt, idx_vec, idx_vec).raw};
+#else
+  // We only keep LowerHalf of the result, which is valid in idx.
+  const Indices128<T, N * 2> idx2{idx.raw};
+#endif
+  return LowerHalf(d, TableLookupLanes(Combine(dt, b, a), idx2));
+}
+
+template <typename T>
+HWY_API Vec64<T> TwoTablesLookupLanes(Vec64<T> a, Vec64<T> b,
+                                      Indices128<T, 8 / sizeof(T)> idx) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto a_u8 = BitCast(du8, a);
+  const auto b_u8 = BitCast(du8, b);
+  const auto idx_u8 = BitCast(du8, Vec64<T>{idx.raw});
+
+#if HWY_ARCH_ARM_A64
+  const Twice<decltype(du8)> dt_u8;
+  return BitCast(
+      d, Vec64<uint8_t>{vqtbl1_u8(Combine(dt_u8, b_u8, a_u8).raw, idx_u8.raw)});
+#else
+  detail::Tuple2<uint8_t, du8.MaxLanes()> tup = {{{a_u8.raw, b_u8.raw}}};
+  return BitCast(d, Vec64<uint8_t>{vtbl2_u8(tup.raw, idx_u8.raw)});
+#endif
+}
+
+template <typename T>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T, 16 / sizeof(T)> idx) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto a_u8 = BitCast(du8, a);
+  const auto b_u8 = BitCast(du8, b);
+  const auto idx_u8 = BitCast(du8, Vec128<T>{idx.raw});
+
+#if HWY_ARCH_ARM_A64
+  detail::Tuple2<uint8_t, du8.MaxLanes()> tup = {{{a_u8.raw, b_u8.raw}}};
+  return BitCast(d, Vec128<uint8_t>{vqtbl2q_u8(tup.raw, idx_u8.raw)});
+#else
+  const Half<decltype(d)> dh;
+  const Repartition<uint8_t, decltype(dh)> dh_u8;
+  const auto a_lo_u8 = LowerHalf(dh_u8, a_u8);
+  const auto a_hi_u8 = UpperHalf(dh_u8, a_u8);
+  const auto b_lo_u8 = LowerHalf(dh_u8, b_u8);
+  const auto b_hi_u8 = UpperHalf(dh_u8, b_u8);
+  const auto idx_lo_u8 = LowerHalf(dh_u8, idx_u8);
+  const auto idx_hi_u8 = UpperHalf(dh_u8, idx_u8);
+
+  detail::Tuple4<uint8_t, dh_u8.MaxLanes()> tup = {
+      {{a_lo_u8.raw, a_hi_u8.raw, b_lo_u8.raw, b_hi_u8.raw}}};
+  const auto lo_result =
+      BitCast(dh, Vec64<uint8_t>{vtbl4_u8(tup.raw, idx_lo_u8.raw)});
+  const auto hi_result =
+      BitCast(dh, Vec64<uint8_t>{vtbl4_u8(tup.raw, idx_hi_u8.raw)});
+  return Combine(d, hi_result, lo_result);
+#endif
+}
+
+// ------------------------------ Reverse2 (CombineShiftRightBytes)
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev16_u8(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> Reverse2(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint8_t>(vrev16q_u8(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev32_u16(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> Reverse2(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint16_t>(vrev32q_u16(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev64_u32(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Reverse2(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint32_t>(vrev64q_u32(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  return CombineShiftRightBytes<8>(d, v, v);
+}
+
+// ------------------------------ Reverse4 (Reverse2)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev32_u8(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> Reverse4(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint8_t>(vrev32q_u8(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev64_u16(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> Reverse4(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint16_t>(vrev64q_u16(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  const RepartitionToWide<RebindToUnsigned<decltype(d)>> duw;
+  return BitCast(d, Reverse2(duw, BitCast(duw, Reverse2(d, v))));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D /* tag */, VFromD<D>) {
+  HWY_ASSERT(0);  // don't have 8 u64 lanes
+}
+
+// ------------------------------ Reverse8 (Reverse2, Reverse4)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>(vrev64_u8(BitCast(du, v).raw)));
+}
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> Reverse8(D d, Vec128<T> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Vec128<uint8_t>(vrev64q_u8(BitCast(du, v).raw)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d, Reverse2(du64, BitCast(du64, Reverse4(d, v))));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> Reverse8(D, VFromD<D>) {
+  HWY_ASSERT(0);  // don't have 8 lanes if larger than 16-bit
+}
+
+// ------------------------------ Reverse (Reverse2, Reverse4, Reverse8)
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API Vec128<T, 1> Reverse(D /* tag */, Vec128<T, 1> v) {
+  return v;
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 2)>
+HWY_API Vec128<T, 2> Reverse(D d, Vec128<T, 2> v) {
+  return Reverse2(d, v);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 4)>
+HWY_API Vec128<T, 4> Reverse(D d, Vec128<T, 4> v) {
+  return Reverse4(d, v);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 8)>
+HWY_API Vec128<T, 8> Reverse(D d, Vec128<T, 8> v) {
+  return Reverse8(d, v);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 16)>
+HWY_API Vec128<T> Reverse(D d, Vec128<T> v) {
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d, Reverse2(du64, BitCast(du64, Reverse8(d, v))));
+}
+
+// ------------------------------ ReverseBits
+
+#if HWY_ARCH_ARM_A64
+
+#ifdef HWY_NATIVE_REVERSE_BITS_UI8
+#undef HWY_NATIVE_REVERSE_BITS_UI8
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI8
+#endif
+
+HWY_NEON_DEF_FUNCTION_INT_8(ReverseBits, vrbit, _, 1)
+HWY_NEON_DEF_FUNCTION_UINT_8(ReverseBits, vrbit, _, 1)
+
+#endif  // HWY_ARCH_ARM_A64
+
+// ------------------------------ Other shuffles (TableLookupBytes)
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 64-bit halves
+template <typename T>
+HWY_API Vec128<T> Shuffle1032(Vec128<T> v) {
+  return CombineShiftRightBytes<8>(DFromV<decltype(v)>(), v, v);
+}
+template <typename T>
+HWY_API Vec128<T> Shuffle01(Vec128<T> v) {
+  return CombineShiftRightBytes<8>(DFromV<decltype(v)>(), v, v);
+}
+
+// Rotate right 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle0321(Vec128<T> v) {
+  return CombineShiftRightBytes<4>(DFromV<decltype(v)>(), v, v);
+}
+
+// Rotate left 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle2103(Vec128<T> v) {
+  return CombineShiftRightBytes<12>(DFromV<decltype(v)>(), v, v);
+}
+
+// Reverse
+template <typename T>
+HWY_API Vec128<T> Shuffle0123(Vec128<T> v) {
+  return Reverse4(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ InterleaveLower
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(InterleaveLower, vzip1, _, 2)
+#if HWY_ARCH_ARM_A64
+// N=1 makes no sense (in that case, there would be no upper/lower).
+HWY_NEON_DEF_FUNCTION_FULL_UIF_64(InterleaveLower, vzip1, _, 2)
+#else
+// Emulated version for Armv7.
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> InterleaveLower(Vec128<T> a, Vec128<T> b) {
+  const DFromV<decltype(a)> d;
+  return CombineShiftRightBytes<8>(d, b, Shuffle01(a));
+}
+#endif
+
+// < 64 bit parts
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 4)>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>(InterleaveLower(Vec64<T>(a.raw), Vec64<T>(b.raw)).raw);
+}
+
+// Additional overload for the optional Simd<> tag.
+template <class D>
+HWY_API VFromD<D> InterleaveLower(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return InterleaveLower(a, b);
+}
+
+// ------------------------------ InterleaveUpper (UpperHalf)
+
+// All functions inside detail lack the required D parameter.
+namespace detail {
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(InterleaveUpper, vzip2, _, 2)
+
+#if HWY_ARCH_ARM_A64
+// N=1 makes no sense (in that case, there would be no upper/lower).
+HWY_NEON_DEF_FUNCTION_FULL_UIF_64(InterleaveUpper, vzip2, _, 2)
+#else
+// Emulated version for Armv7.
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> InterleaveUpper(Vec128<T> a, Vec128<T> b) {
+  const DFromV<decltype(a)> d;
+  return CombineShiftRightBytes<8>(d, Shuffle01(b), a);
+}
+#endif
+}  // namespace detail
+
+// Full register
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return detail::InterleaveUpper(a, b);
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const Half<decltype(d)> d2;
+  const VFromD<D> a2(UpperHalf(d2, a).raw);
+  const VFromD<D> b2(UpperHalf(d2, b).raw);
+  return InterleaveLower(d, a2, b2);
+}
+
+// ------------------------------ ZipLower/ZipUpper (InterleaveLower)
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveLower(D(), a, b));
+}
+
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveUpper(D(), a, b));
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+namespace detail {
+
+#if HWY_COMPILER_GCC || HWY_COMPILER_CLANG
+
+#ifdef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#undef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#else
+#define HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#endif
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t /*x3*/,
+                                                const uint32_t /*x2*/,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  typedef uint32_t GccU32RawVectType __attribute__((__vector_size__(8)));
+  const GccU32RawVectType raw = {x0, x1};
+  return ResizeBitCast(d, Vec64<uint32_t>(reinterpret_cast<uint32x2_t>(raw)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                                const uint32_t x2,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  typedef uint32_t GccU32RawVectType __attribute__((__vector_size__(16)));
+  const GccU32RawVectType raw = {x0, x1, x2, x3};
+  return ResizeBitCast(d, Vec128<uint32_t>(reinterpret_cast<uint32x4_t>(raw)));
+}
+#endif  // HWY_COMPILER_GCC || HWY_COMPILER_CLANG
+
+template <size_t kLaneSize, size_t kVectSize, class V,
+          HWY_IF_LANES_GT_D(DFromV<V>, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x88> /*idx_3210_tag*/,
+                                  hwy::SizeTag<kLaneSize> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+
+  const auto evens = BitCast(dw, ConcatEven(d, v, v));
+  return BitCast(d, InterleaveLower(dw, evens, evens));
+}
+
+template <size_t kLaneSize, size_t kVectSize, class V,
+          HWY_IF_LANES_GT_D(DFromV<V>, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xDD> /*idx_3210_tag*/,
+                                  hwy::SizeTag<kLaneSize> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+
+  const auto odds = BitCast(dw, ConcatOdd(d, v, v));
+  return BitCast(d, InterleaveLower(dw, odds, odds));
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xFA> /*idx_3210_tag*/,
+                                  hwy::SizeTag<2> /*lane_size_tag*/,
+                                  hwy::SizeTag<8> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveUpper(d, v, v);
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  using TU = UnsignedFromSize<d.MaxBytes()>;
+  const Repartition<TU, decltype(d)> du;
+  return BitCast(d, BitCast(du, v) << Set(
+                        du, static_cast<TU>(amt * sizeof(TFromV<V>) * 8)));
+}
+
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto idx =
+      Iota(du8, static_cast<uint8_t>(size_t{0} - amt * sizeof(TFromV<V>)));
+  return BitCast(d, TableLookupBytesOr0(BitCast(du8, v), idx));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideUpLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+      case 8:
+        return ShiftLeftLanes<8>(d, v);
+      case 9:
+        return ShiftLeftLanes<9>(d, v);
+      case 10:
+        return ShiftLeftLanes<10>(d, v);
+      case 11:
+        return ShiftLeftLanes<11>(d, v);
+      case 12:
+        return ShiftLeftLanes<12>(d, v);
+      case 13:
+        return ShiftLeftLanes<13>(d, v);
+      case 14:
+        return ShiftLeftLanes<14>(d, v);
+      case 15:
+        return ShiftLeftLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+// ------------------------------ SlideDownLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  using TU = UnsignedFromSize<d.MaxBytes()>;
+  const Repartition<TU, decltype(d)> du;
+  return BitCast(d,
+                 BitCast(du, v) << Set(
+                     du, static_cast<TU>(TU{0} - amt * sizeof(TFromV<V>) * 8)));
+}
+
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<int8_t, decltype(d)> di8;
+  auto idx = Iota(di8, static_cast<int8_t>(amt * sizeof(TFromV<V>)));
+  idx = Or(idx, VecFromMask(di8, idx > Set(di8, int8_t{15})));
+  return BitCast(d, TableLookupBytesOr0(BitCast(di8, v), idx));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideDownLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+      case 8:
+        return ShiftRightLanes<8>(d, v);
+      case 9:
+        return ShiftRightLanes<9>(d, v);
+      case 10:
+        return ShiftRightLanes<10>(d, v);
+      case 11:
+        return ShiftRightLanes<11>(d, v);
+      case 12:
+        return ShiftRightLanes<12>(d, v);
+      case 13:
+        return ShiftRightLanes<13>(d, v);
+      case 14:
+        return ShiftRightLanes<14>(d, v);
+      case 15:
+        return ShiftRightLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+#if HWY_NEON_HAVE_BFLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API Vec128<float> ReorderWidenMulAccumulate(D /*d32*/, Vec128<bfloat16_t> a,
+                                                Vec128<bfloat16_t> b,
+                                                const Vec128<float> sum0,
+                                                Vec128<float>& /*sum1*/) {
+  return Vec128<float>(vbfdotq_f32(sum0.raw, a.raw, b.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(
+    D /*d32*/, VFromD<Repartition<bfloat16_t, D>> a,
+    VFromD<Repartition<bfloat16_t, D>> b, const VFromD<D> sum0,
+    VFromD<D>& /*sum1*/) {
+  return VFromD<D>(vbfdot_f32(sum0.raw, a.raw, b.raw));
+}
+
+#else
+
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 df32, V16 a, V16 b,
+                                              const VFromD<D32> sum0,
+                                              VFromD<D32>& sum1) {
+  const RebindToUnsigned<decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+#endif  // HWY_NEON_HAVE_BFLOAT16
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> ReorderWidenMulAccumulate(D /*d32*/, Vec128<int16_t> a,
+                                                  Vec128<int16_t> b,
+                                                  const Vec128<int32_t> sum0,
+                                                  Vec128<int32_t>& sum1) {
+#if HWY_ARCH_ARM_A64
+  sum1 = Vec128<int32_t>(vmlal_high_s16(sum1.raw, a.raw, b.raw));
+#else
+  const Full64<int16_t> dh;
+  sum1 = Vec128<int32_t>(
+      vmlal_s16(sum1.raw, UpperHalf(dh, a).raw, UpperHalf(dh, b).raw));
+#endif
+  return Vec128<int32_t>(
+      vmlal_s16(sum0.raw, LowerHalf(a).raw, LowerHalf(b).raw));
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> ReorderWidenMulAccumulate(D d32, Vec64<int16_t> a,
+                                                 Vec64<int16_t> b,
+                                                 const Vec64<int32_t> sum0,
+                                                 Vec64<int32_t>& sum1) {
+  // vmlal writes into the upper half, which the caller cannot use, so
+  // split into two halves.
+  const Vec128<int32_t> mul_3210(vmull_s16(a.raw, b.raw));
+  const Vec64<int32_t> mul_32 = UpperHalf(d32, mul_3210);
+  sum1 += mul_32;
+  return sum0 + LowerHalf(mul_3210);
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> ReorderWidenMulAccumulate(D d32, Vec32<int16_t> a,
+                                                 Vec32<int16_t> b,
+                                                 const Vec32<int32_t> sum0,
+                                                 Vec32<int32_t>& sum1) {
+  const Vec128<int32_t> mul_xx10(vmull_s16(a.raw, b.raw));
+  const Vec64<int32_t> mul_10(LowerHalf(mul_xx10));
+  const Vec32<int32_t> mul0 = LowerHalf(d32, mul_10);
+  const Vec32<int32_t> mul1 = UpperHalf(d32, mul_10);
+  sum1 += mul1;
+  return sum0 + mul0;
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderWidenMulAccumulate(D /*d32*/,
+                                                   Vec128<uint16_t> a,
+                                                   Vec128<uint16_t> b,
+                                                   const Vec128<uint32_t> sum0,
+                                                   Vec128<uint32_t>& sum1) {
+#if HWY_ARCH_ARM_A64
+  sum1 = Vec128<uint32_t>(vmlal_high_u16(sum1.raw, a.raw, b.raw));
+#else
+  const Full64<uint16_t> dh;
+  sum1 = Vec128<uint32_t>(
+      vmlal_u16(sum1.raw, UpperHalf(dh, a).raw, UpperHalf(dh, b).raw));
+#endif
+  return Vec128<uint32_t>(
+      vmlal_u16(sum0.raw, LowerHalf(a).raw, LowerHalf(b).raw));
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> ReorderWidenMulAccumulate(D d32, Vec64<uint16_t> a,
+                                                  Vec64<uint16_t> b,
+                                                  const Vec64<uint32_t> sum0,
+                                                  Vec64<uint32_t>& sum1) {
+  // vmlal writes into the upper half, which the caller cannot use, so
+  // split into two halves.
+  const Vec128<uint32_t> mul_3210(vmull_u16(a.raw, b.raw));
+  const Vec64<uint32_t> mul_32 = UpperHalf(d32, mul_3210);
+  sum1 += mul_32;
+  return sum0 + LowerHalf(mul_3210);
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec32<uint32_t> ReorderWidenMulAccumulate(D du32, Vec32<uint16_t> a,
+                                                  Vec32<uint16_t> b,
+                                                  const Vec32<uint32_t> sum0,
+                                                  Vec32<uint32_t>& sum1) {
+  const Vec128<uint32_t> mul_xx10(vmull_u16(a.raw, b.raw));
+  const Vec64<uint32_t> mul_10(LowerHalf(mul_xx10));
+  const Vec32<uint32_t> mul0 = LowerHalf(du32, mul_10);
+  const Vec32<uint32_t> mul1 = UpperHalf(du32, mul_10);
+  sum1 += mul1;
+  return sum0 + mul0;
+}
+
+// ------------------------------ Combine partial (InterleaveLower)
+// < 64bit input, <= 64 bit result
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> Combine(D d, VFromD<Half<D>> hi, VFromD<Half<D>> lo) {
+  // First double N (only lower halves will be used).
+  const VFromD<D> hi2(hi.raw);
+  const VFromD<D> lo2(lo.raw);
+  // Repartition to two unsigned lanes (each the size of the valid input).
+  const Simd<UnsignedFromSize<d.MaxBytes() / 2>, 2, 0> du;
+  return BitCast(d, InterleaveLower(BitCast(du, lo2), BitCast(du, hi2)));
+}
+
+// ------------------------------ RearrangeToOddPlusEven (Combine)
+
+template <size_t N>
+HWY_API Vec128<float, N> RearrangeToOddPlusEven(Vec128<float, N> sum0,
+                                                Vec128<float, N> sum1) {
+#if HWY_NEON_HAVE_BFLOAT16
+  (void)sum1;  // unused by bf16 ReorderWidenMulAccumulate
+  return sum0;
+#else
+  return Add(sum0, sum1);
+#endif
+}
+
+HWY_API Vec128<int32_t> RearrangeToOddPlusEven(Vec128<int32_t> sum0,
+                                               Vec128<int32_t> sum1) {
+// vmlal_s16 multiplied the lower half into sum0 and upper into sum1.
+#if HWY_ARCH_ARM_A64  // pairwise sum is available and what we want
+  return Vec128<int32_t>(vpaddq_s32(sum0.raw, sum1.raw));
+#else
+  const Full128<int32_t> d;
+  const Half<decltype(d)> d64;
+  const Vec64<int32_t> hi(
+      vpadd_s32(LowerHalf(d64, sum1).raw, UpperHalf(d64, sum1).raw));
+  const Vec64<int32_t> lo(
+      vpadd_s32(LowerHalf(d64, sum0).raw, UpperHalf(d64, sum0).raw));
+  return Combine(Full128<int32_t>(), hi, lo);
+#endif
+}
+
+HWY_API Vec64<int32_t> RearrangeToOddPlusEven(Vec64<int32_t> sum0,
+                                              Vec64<int32_t> sum1) {
+  // vmlal_s16 multiplied the lower half into sum0 and upper into sum1.
+  return Vec64<int32_t>(vpadd_s32(sum0.raw, sum1.raw));
+}
+
+HWY_API Vec32<int32_t> RearrangeToOddPlusEven(Vec32<int32_t> sum0,
+                                              Vec32<int32_t> sum1) {
+  // Only one widened sum per register, so add them for sum of odd and even.
+  return sum0 + sum1;
+}
+
+HWY_API Vec128<uint32_t> RearrangeToOddPlusEven(Vec128<uint32_t> sum0,
+                                                Vec128<uint32_t> sum1) {
+// vmlal_s16 multiplied the lower half into sum0 and upper into sum1.
+#if HWY_ARCH_ARM_A64  // pairwise sum is available and what we want
+  return Vec128<uint32_t>(vpaddq_u32(sum0.raw, sum1.raw));
+#else
+  const Full128<uint32_t> d;
+  const Half<decltype(d)> d64;
+  const Vec64<uint32_t> hi(
+      vpadd_u32(LowerHalf(d64, sum1).raw, UpperHalf(d64, sum1).raw));
+  const Vec64<uint32_t> lo(
+      vpadd_u32(LowerHalf(d64, sum0).raw, UpperHalf(d64, sum0).raw));
+  return Combine(Full128<uint32_t>(), hi, lo);
+#endif
+}
+
+HWY_API Vec64<uint32_t> RearrangeToOddPlusEven(Vec64<uint32_t> sum0,
+                                               Vec64<uint32_t> sum1) {
+  // vmlal_u16 multiplied the lower half into sum0 and upper into sum1.
+  return Vec64<uint32_t>(vpadd_u32(sum0.raw, sum1.raw));
+}
+
+HWY_API Vec32<uint32_t> RearrangeToOddPlusEven(Vec32<uint32_t> sum0,
+                                               Vec32<uint32_t> sum1) {
+  // Only one widened sum per register, so add them for sum of odd and even.
+  return sum0 + sum1;
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+#if HWY_NEON_HAVE_BFLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API Vec128<float> WidenMulPairwiseAdd(D d32, Vec128<bfloat16_t> a,
+                                          Vec128<bfloat16_t> b) {
+  return Vec128<float>(vbfdotq_f32(Zero(d32).raw, a.raw, b.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D d32,
+                                      VFromD<Repartition<bfloat16_t, D>> a,
+                                      VFromD<Repartition<bfloat16_t, D>> b) {
+  return VFromD<D>(vbfdot_f32(Zero(d32).raw, a.raw, b.raw));
+}
+
+#else
+template <class D32, HWY_IF_F32_D(D32)>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(
+    D32 df32, VFromD<Repartition<bfloat16_t, D32>> a,
+    VFromD<Repartition<bfloat16_t, D32>> b) {
+  const RebindToUnsigned<decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be),
+                Mul(BitCast(df32, ao), BitCast(df32, bo)));
+}
+#endif  // HWY_NEON_HAVE_BFLOAT16
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> WidenMulPairwiseAdd(D /*d32*/, Vec128<int16_t> a,
+                                            Vec128<int16_t> b) {
+  Vec128<int32_t> sum1;
+#if HWY_ARCH_ARM_A64
+  sum1 = Vec128<int32_t>(vmull_high_s16(a.raw, b.raw));
+#else
+  const Full64<int16_t> dh;
+  sum1 = Vec128<int32_t>(vmull_s16(UpperHalf(dh, a).raw, UpperHalf(dh, b).raw));
+#endif
+  Vec128<int32_t> sum0 =
+      Vec128<int32_t>(vmull_s16(LowerHalf(a).raw, LowerHalf(b).raw));
+  return RearrangeToOddPlusEven(sum0, sum1);
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> WidenMulPairwiseAdd(D d32, Vec64<int16_t> a,
+                                           Vec64<int16_t> b) {
+  // vmlal writes into the upper half, which the caller cannot use, so
+  // split into two halves.
+  const Vec128<int32_t> mul_3210(vmull_s16(a.raw, b.raw));
+  const Vec64<int32_t> mul0 = LowerHalf(mul_3210);
+  const Vec64<int32_t> mul1 = UpperHalf(d32, mul_3210);
+  return RearrangeToOddPlusEven(mul0, mul1);
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> WidenMulPairwiseAdd(D d32, Vec32<int16_t> a,
+                                           Vec32<int16_t> b) {
+  const Vec128<int32_t> mul_xx10(vmull_s16(a.raw, b.raw));
+  const Vec64<int32_t> mul_10(LowerHalf(mul_xx10));
+  const Vec32<int32_t> mul0 = LowerHalf(d32, mul_10);
+  const Vec32<int32_t> mul1 = UpperHalf(d32, mul_10);
+  return RearrangeToOddPlusEven(mul0, mul1);
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> WidenMulPairwiseAdd(D /*d32*/, Vec128<uint16_t> a,
+                                             Vec128<uint16_t> b) {
+  Vec128<uint32_t> sum1;
+#if HWY_ARCH_ARM_A64
+  sum1 = Vec128<uint32_t>(vmull_high_u16(a.raw, b.raw));
+#else
+  const Full64<uint16_t> dh;
+  sum1 =
+      Vec128<uint32_t>(vmull_u16(UpperHalf(dh, a).raw, UpperHalf(dh, b).raw));
+#endif
+  Vec128<uint32_t> sum0 =
+      Vec128<uint32_t>(vmull_u16(LowerHalf(a).raw, LowerHalf(b).raw));
+  return RearrangeToOddPlusEven(sum0, sum1);
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> WidenMulPairwiseAdd(D d32, Vec64<uint16_t> a,
+                                            Vec64<uint16_t> b) {
+  // vmlal writes into the upper half, which the caller cannot use, so
+  // split into two halves.
+  const Vec128<uint32_t> mul_3210(vmull_u16(a.raw, b.raw));
+  const Vec64<uint32_t> mul0 = LowerHalf(mul_3210);
+  const Vec64<uint32_t> mul1 = UpperHalf(d32, mul_3210);
+  return RearrangeToOddPlusEven(mul0, mul1);
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec32<uint32_t> WidenMulPairwiseAdd(D d32, Vec32<uint16_t> a,
+                                            Vec32<uint16_t> b) {
+  const Vec128<uint32_t> mul_xx10(vmull_u16(a.raw, b.raw));
+  const Vec64<uint32_t> mul_10(LowerHalf(mul_xx10));
+  const Vec32<uint32_t> mul0 = LowerHalf(d32, mul_10);
+  const Vec32<uint32_t> mul1 = UpperHalf(d32, mul_10);
+  return RearrangeToOddPlusEven(mul0, mul1);
+}
+
+// ------------------------------ ZeroExtendVector (Combine)
+
+template <class D>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+  return Combine(d, Zero(Half<decltype(d)>()), lo);
+}
+
+// ------------------------------ ConcatLowerLower
+
+// 64 or 128-bit input: just interleave
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  // Treat half-width input as a single lane and interleave them.
+  const Repartition<UnsignedFromSize<d.MaxBytes() / 2>, decltype(d)> du;
+  return BitCast(d, InterleaveLower(BitCast(du, lo), BitCast(du, hi)));
+}
+
+namespace detail {
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(InterleaveEven, vtrn1, _, 2)
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(InterleaveOdd, vtrn2, _, 2)
+#else
+
+// vtrn returns a struct with even and odd result.
+#define HWY_NEON_BUILD_TPL_HWY_TRN
+#define HWY_NEON_BUILD_RET_HWY_TRN(type, size) type##x##size##x2_t
+// Pass raw args so we can accept uint16x2 args, for which there is no
+// corresponding uint16x2x2 return type.
+#define HWY_NEON_BUILD_PARAM_HWY_TRN(TYPE, size) \
+  Raw128<TYPE##_t, size>::type a, Raw128<TYPE##_t, size>::type b
+#define HWY_NEON_BUILD_ARG_HWY_TRN a, b
+
+// Cannot use UINT8 etc. type macros because the x2_t tuples are only defined
+// for full and half vectors.
+HWY_NEON_DEF_FUNCTION(uint8, 16, InterleaveEvenOdd, vtrnq, _, u8, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(uint8, 8, InterleaveEvenOdd, vtrn, _, u8, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(uint16, 8, InterleaveEvenOdd, vtrnq, _, u16, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(uint16, 4, InterleaveEvenOdd, vtrn, _, u16, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(uint32, 4, InterleaveEvenOdd, vtrnq, _, u32, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(uint32, 2, InterleaveEvenOdd, vtrn, _, u32, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int8, 16, InterleaveEvenOdd, vtrnq, _, s8, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int8, 8, InterleaveEvenOdd, vtrn, _, s8, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int16, 8, InterleaveEvenOdd, vtrnq, _, s16, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int16, 4, InterleaveEvenOdd, vtrn, _, s16, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int32, 4, InterleaveEvenOdd, vtrnq, _, s32, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(int32, 2, InterleaveEvenOdd, vtrn, _, s32, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(float32, 4, InterleaveEvenOdd, vtrnq, _, f32, HWY_TRN)
+HWY_NEON_DEF_FUNCTION(float32, 2, InterleaveEvenOdd, vtrn, _, f32, HWY_TRN)
+
+#undef HWY_NEON_BUILD_TPL_HWY_TRN
+#undef HWY_NEON_BUILD_RET_HWY_TRN
+#undef HWY_NEON_BUILD_PARAM_HWY_TRN
+#undef HWY_NEON_BUILD_ARG_HWY_TRN
+
+#endif  // HWY_ARCH_ARM_A64
+}  // namespace detail
+
+// <= 32-bit input/output
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  // Treat half-width input as two lanes and take every second one.
+  const Repartition<UnsignedFromSize<d.MaxBytes() / 2>, decltype(d)> du;
+#if HWY_ARCH_ARM_A64
+  return BitCast(d, detail::InterleaveEven(BitCast(du, lo), BitCast(du, hi)));
+#else
+  using VU = VFromD<decltype(du)>;
+  return BitCast(
+      d, VU(detail::InterleaveEvenOdd(BitCast(du, lo).raw, BitCast(du, hi).raw)
+                .val[0]));
+#endif
+}
+
+// ------------------------------ ConcatUpperUpper
+
+// 64 or 128-bit input: just interleave
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  // Treat half-width input as a single lane and interleave them.
+  const Repartition<UnsignedFromSize<d.MaxBytes() / 2>, decltype(d)> du;
+  return BitCast(d, InterleaveUpper(du, BitCast(du, lo), BitCast(du, hi)));
+}
+
+// <= 32-bit input/output
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  // Treat half-width input as two lanes and take every second one.
+  const Repartition<UnsignedFromSize<d.MaxBytes() / 2>, decltype(d)> du;
+#if HWY_ARCH_ARM_A64
+  return BitCast(d, detail::InterleaveOdd(BitCast(du, lo), BitCast(du, hi)));
+#else
+  using VU = VFromD<decltype(du)>;
+  return BitCast(
+      d, VU(detail::InterleaveEvenOdd(BitCast(du, lo).raw, BitCast(du, hi).raw)
+                .val[1]));
+#endif
+}
+
+// ------------------------------ ConcatLowerUpper (ShiftLeftBytes)
+
+// 64 or 128-bit input: extract from concatenated
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  return CombineShiftRightBytes<d.MaxBytes() / 2>(d, hi, lo);
+}
+
+// <= 32-bit input/output
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Full64<uint8_t> d8x8;
+  const Full64<TFromD<D>> d64;
+  using V8x8 = VFromD<decltype(d8x8)>;
+  const V8x8 hi8x8(BitCast(d8, hi).raw);
+  // Move into most-significant bytes
+  const V8x8 lo8x8 = ShiftLeftBytes<8 - kSize>(V8x8(BitCast(d8, lo).raw));
+  const V8x8 r = CombineShiftRightBytes<8 - kSize / 2>(d8x8, hi8x8, lo8x8);
+  // Back to original lane type, then shrink N.
+  return VFromD<D>(BitCast(d64, r).raw);
+}
+
+// ------------------------------ ConcatUpperLower
+
+// Works for all N.
+template <class D>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  return IfThenElse(FirstN(d, Lanes(d) / 2), lo, hi);
+}
+
+// ------------------------------ ConcatOdd (InterleaveUpper)
+
+namespace detail {
+// There is no vuzpq_u64.
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(ConcatEven, vuzp1, _, 2)
+HWY_NEON_DEF_FUNCTION_UIF_8_16_32(ConcatOdd, vuzp2, _, 2)
+}  // namespace detail
+
+// Full/half vector
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> ConcatOdd(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return detail::ConcatOdd(lo, hi);
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatOdd(D d, Vec32<T> hi, Vec32<T> lo) {
+  const Twice<decltype(d)> d2;
+  const Repartition<uint16_t, decltype(d2)> dw2;
+  const VFromD<decltype(d2)> hi2(hi.raw);
+  const VFromD<decltype(d2)> lo2(lo.raw);
+  const VFromD<decltype(dw2)> Hx1Lx1 = BitCast(dw2, ConcatOdd(d2, hi2, lo2));
+  // Compact into two pairs of u8, skipping the invalid x lanes. Could also use
+  // vcopy_lane_u16, but that's A64-only.
+  return Vec32<T>(BitCast(d2, ConcatEven(dw2, Hx1Lx1, Hx1Lx1)).raw);
+}
+
+// Any type x2
+template <class D, HWY_IF_LANES_D(D, 2), typename T = TFromD<D>>
+HWY_API Vec128<T, 2> ConcatOdd(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveUpper(d, lo, hi);
+}
+
+// ------------------------------ ConcatEven (InterleaveLower)
+
+// Full/half vector
+template <class D, HWY_IF_V_SIZE_GT_D(D, 4)>
+HWY_API VFromD<D> ConcatEven(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return detail::ConcatEven(lo, hi);
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatEven(D d, Vec32<T> hi, Vec32<T> lo) {
+  const Twice<decltype(d)> d2;
+  const Repartition<uint16_t, decltype(d2)> dw2;
+  const VFromD<decltype(d2)> hi2(hi.raw);
+  const VFromD<decltype(d2)> lo2(lo.raw);
+  const VFromD<decltype(dw2)> Hx0Lx0 = BitCast(dw2, ConcatEven(d2, hi2, lo2));
+  // Compact into two pairs of u8, skipping the invalid x lanes. Could also use
+  // vcopy_lane_u16, but that's A64-only.
+  return Vec32<T>(BitCast(d2, ConcatEven(dw2, Hx0Lx0, Hx0Lx0)).raw);
+}
+
+// Any type x2
+template <class D, HWY_IF_LANES_D(D, 2), typename T = TFromD<D>>
+HWY_API Vec128<T, 2> ConcatEven(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveLower(d, lo, hi);
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+#if HWY_ARCH_ARM_A64
+  return detail::InterleaveEven(v, v);
+#else
+  return Vec128<T, N>(detail::InterleaveEvenOdd(v.raw, v.raw).val[0]);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  return InterleaveLower(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+#if HWY_ARCH_ARM_A64
+  return detail::InterleaveOdd(v, v);
+#else
+  return Vec128<T, N>(detail::InterleaveEvenOdd(v.raw, v.raw).val[1]);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return InterleaveUpper(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ OddEven (IfThenElse)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBytes[16] = {
+      ((0 / sizeof(T)) & 1) ? 0 : 0xFF,  ((1 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((2 / sizeof(T)) & 1) ? 0 : 0xFF,  ((3 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((4 / sizeof(T)) & 1) ? 0 : 0xFF,  ((5 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((6 / sizeof(T)) & 1) ? 0 : 0xFF,  ((7 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((8 / sizeof(T)) & 1) ? 0 : 0xFF,  ((9 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((10 / sizeof(T)) & 1) ? 0 : 0xFF, ((11 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((12 / sizeof(T)) & 1) ? 0 : 0xFF, ((13 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((14 / sizeof(T)) & 1) ? 0 : 0xFF, ((15 / sizeof(T)) & 1) ? 0 : 0xFF,
+  };
+  const auto vec = BitCast(d, Load(d8, kBytes));
+  return IfThenElse(MaskFromVec(vec), b, a);
+}
+
+// ------------------------------ OddEvenBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEvenBlocks(Vec128<T, N> /* odd */, Vec128<T, N> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SwapAdjacentBlocks(Vec128<T, N> v) {
+  return v;
+}
+
+// ------------------------------ ReverseBlocks
+// Single block: no change
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// ------------------------------ ReorderDemote2To (OddEven)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_BF16_D(D),
+          class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  return BitCast(dbf16, ConcatOdd(du16, BitCast(du16, b), BitCast(du16, a)));
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> ReorderDemote2To(D d32, Vec128<int64_t> a,
+                                         Vec128<int64_t> b) {
+  const Vec64<int32_t> a32(vqmovn_s64(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d32;
+  return Vec128<int32_t>(vqmovn_high_s64(a32.raw, b.raw));
+#else
+  const Vec64<int32_t> b32(vqmovn_s64(b.raw));
+  return Combine(d32, b32, a32);
+#endif
+}
+
+template <class D, HWY_IF_I32_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d32, VFromD<Repartition<int64_t, D>> a,
+                                   VFromD<Repartition<int64_t, D>> b) {
+  const Rebind<int64_t, decltype(d32)> dt;
+  return DemoteTo(d32, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D d32, Vec128<int64_t> a,
+                                          Vec128<int64_t> b) {
+  const Vec64<uint32_t> a32(vqmovun_s64(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d32;
+  return Vec128<uint32_t>(vqmovun_high_s64(a32.raw, b.raw));
+#else
+  const Vec64<uint32_t> b32(vqmovun_s64(b.raw));
+  return Combine(d32, b32, a32);
+#endif
+}
+
+template <class D, HWY_IF_U32_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d32, VFromD<Repartition<int64_t, D>> a,
+                                   VFromD<Repartition<int64_t, D>> b) {
+  const Rebind<int64_t, decltype(d32)> dt;
+  return DemoteTo(d32, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D d32, Vec128<uint64_t> a,
+                                          Vec128<uint64_t> b) {
+  const Vec64<uint32_t> a32(vqmovn_u64(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d32;
+  return Vec128<uint32_t>(vqmovn_high_u64(a32.raw, b.raw));
+#else
+  const Vec64<uint32_t> b32(vqmovn_u64(b.raw));
+  return Combine(d32, b32, a32);
+#endif
+}
+
+template <class D, HWY_IF_U32_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d32, VFromD<Repartition<uint64_t, D>> a,
+                                   VFromD<Repartition<uint64_t, D>> b) {
+  const Rebind<uint64_t, decltype(d32)> dt;
+  return DemoteTo(d32, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> ReorderDemote2To(D d16, Vec128<int32_t> a,
+                                         Vec128<int32_t> b) {
+  const Vec64<int16_t> a16(vqmovn_s32(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d16;
+  return Vec128<int16_t>(vqmovn_high_s32(a16.raw, b.raw));
+#else
+  const Vec64<int16_t> b16(vqmovn_s32(b.raw));
+  return Combine(d16, b16, a16);
+#endif
+}
+
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec64<int16_t> ReorderDemote2To(D /*d16*/, Vec64<int32_t> a,
+                                        Vec64<int32_t> b) {
+  const Full128<int32_t> d32;
+  const Vec128<int32_t> ab = Combine(d32, b, a);
+  return Vec64<int16_t>(vqmovn_s32(ab.raw));
+}
+
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec32<int16_t> ReorderDemote2To(D /*d16*/, Vec32<int32_t> a,
+                                        Vec32<int32_t> b) {
+  const Full128<int32_t> d32;
+  const Vec64<int32_t> ab(vzip1_s32(a.raw, b.raw));
+  return Vec32<int16_t>(vqmovn_s32(Combine(d32, ab, ab).raw));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> ReorderDemote2To(D d16, Vec128<int32_t> a,
+                                          Vec128<int32_t> b) {
+  const Vec64<uint16_t> a16(vqmovun_s32(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d16;
+  return Vec128<uint16_t>(vqmovun_high_s32(a16.raw, b.raw));
+#else
+  const Vec64<uint16_t> b16(vqmovun_s32(b.raw));
+  return Combine(d16, b16, a16);
+#endif
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> ReorderDemote2To(D /*d16*/, Vec64<int32_t> a,
+                                         Vec64<int32_t> b) {
+  const Full128<int32_t> d32;
+  const Vec128<int32_t> ab = Combine(d32, b, a);
+  return Vec64<uint16_t>(vqmovun_s32(ab.raw));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec32<uint16_t> ReorderDemote2To(D /*d16*/, Vec32<int32_t> a,
+                                         Vec32<int32_t> b) {
+  const Full128<int32_t> d32;
+  const Vec64<int32_t> ab(vzip1_s32(a.raw, b.raw));
+  return Vec32<uint16_t>(vqmovun_s32(Combine(d32, ab, ab).raw));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> ReorderDemote2To(D d16, Vec128<uint32_t> a,
+                                          Vec128<uint32_t> b) {
+  const Vec64<uint16_t> a16(vqmovn_u32(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d16;
+  return Vec128<uint16_t>(vqmovn_high_u32(a16.raw, b.raw));
+#else
+  const Vec64<uint16_t> b16(vqmovn_u32(b.raw));
+  return Combine(d16, b16, a16);
+#endif
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> ReorderDemote2To(D /*d16*/, Vec64<uint32_t> a,
+                                         Vec64<uint32_t> b) {
+  const Full128<uint32_t> d32;
+  const Vec128<uint32_t> ab = Combine(d32, b, a);
+  return Vec64<uint16_t>(vqmovn_u32(ab.raw));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec32<uint16_t> ReorderDemote2To(D /*d16*/, Vec32<uint32_t> a,
+                                         Vec32<uint32_t> b) {
+  const Full128<uint32_t> d32;
+  const Vec64<uint32_t> ab(vzip1_u32(a.raw, b.raw));
+  return Vec32<uint16_t>(vqmovn_u32(Combine(d32, ab, ab).raw));
+}
+
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec128<int8_t> ReorderDemote2To(D d8, Vec128<int16_t> a,
+                                        Vec128<int16_t> b) {
+  const Vec64<int8_t> a8(vqmovn_s16(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d8;
+  return Vec128<int8_t>(vqmovn_high_s16(a8.raw, b.raw));
+#else
+  const Vec64<int8_t> b8(vqmovn_s16(b.raw));
+  return Combine(d8, b8, a8);
+#endif
+}
+
+template <class D, HWY_IF_I8_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d8, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const Rebind<int16_t, decltype(d8)> dt;
+  return DemoteTo(d8, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> ReorderDemote2To(D d8, Vec128<int16_t> a,
+                                         Vec128<int16_t> b) {
+  const Vec64<uint8_t> a8(vqmovun_s16(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d8;
+  return Vec128<uint8_t>(vqmovun_high_s16(a8.raw, b.raw));
+#else
+  const Vec64<uint8_t> b8(vqmovun_s16(b.raw));
+  return Combine(d8, b8, a8);
+#endif
+}
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d8, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const Rebind<int16_t, decltype(d8)> dt;
+  return DemoteTo(d8, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> ReorderDemote2To(D d8, Vec128<uint16_t> a,
+                                         Vec128<uint16_t> b) {
+  const Vec64<uint8_t> a8(vqmovn_u16(a.raw));
+#if HWY_ARCH_ARM_A64
+  (void)d8;
+  return Vec128<uint8_t>(vqmovn_high_u16(a8.raw, b.raw));
+#else
+  const Vec64<uint8_t> b8(vqmovn_u16(b.raw));
+  return Combine(d8, b8, a8);
+#endif
+}
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ReorderDemote2To(D d8, VFromD<Repartition<uint16_t, D>> a,
+                                   VFromD<Repartition<uint16_t, D>> b) {
+  const Rebind<uint16_t, decltype(d8)> dt;
+  return DemoteTo(d8, Combine(dt, b, a));
+}
+
+template <class D, class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return ReorderDemote2To(d, a, b);
+}
+
+template <class D, HWY_IF_BF16_D(D), class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> OrderedDemote2To(D dbf16, V32 a, V32 b) {
+  return ReorderDemote2To(dbf16, a, b);
+}
+
+// ================================================== CRYPTO
+
+// (aarch64 or Arm7) and (__ARM_FEATURE_AES or HWY_HAVE_RUNTIME_DISPATCH).
+// Otherwise, rely on generic_ops-inl.h to emulate AESRound / CLMul*.
+#if HWY_TARGET == HWY_NEON
+
+#ifdef HWY_NATIVE_AES
+#undef HWY_NATIVE_AES
+#else
+#define HWY_NATIVE_AES
+#endif
+
+HWY_API Vec128<uint8_t> AESRound(Vec128<uint8_t> state,
+                                 Vec128<uint8_t> round_key) {
+  // NOTE: it is important that AESE and AESMC be consecutive instructions so
+  // they can be fused. AESE includes AddRoundKey, which is a different ordering
+  // than the AES-NI semantics we adopted, so XOR by 0 and later with the actual
+  // round key (the compiler will hopefully optimize this for multiple rounds).
+  return Vec128<uint8_t>(vaesmcq_u8(vaeseq_u8(state.raw, vdupq_n_u8(0)))) ^
+         round_key;
+}
+
+HWY_API Vec128<uint8_t> AESLastRound(Vec128<uint8_t> state,
+                                     Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>(vaeseq_u8(state.raw, vdupq_n_u8(0))) ^ round_key;
+}
+
+HWY_API Vec128<uint8_t> AESInvMixColumns(Vec128<uint8_t> state) {
+  return Vec128<uint8_t>{vaesimcq_u8(state.raw)};
+}
+
+HWY_API Vec128<uint8_t> AESRoundInv(Vec128<uint8_t> state,
+                                    Vec128<uint8_t> round_key) {
+  // NOTE: it is important that AESD and AESIMC be consecutive instructions so
+  // they can be fused. AESD includes AddRoundKey, which is a different ordering
+  // than the AES-NI semantics we adopted, so XOR by 0 and later with the actual
+  // round key (the compiler will hopefully optimize this for multiple rounds).
+  return Vec128<uint8_t>(vaesimcq_u8(vaesdq_u8(state.raw, vdupq_n_u8(0)))) ^
+         round_key;
+}
+
+HWY_API Vec128<uint8_t> AESLastRoundInv(Vec128<uint8_t> state,
+                                        Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>(vaesdq_u8(state.raw, vdupq_n_u8(0))) ^ round_key;
+}
+
+HWY_API Vec128<uint64_t> CLMulLower(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  return Vec128<uint64_t>((uint64x2_t)vmull_p64(GetLane(a), GetLane(b)));
+}
+
+HWY_API Vec128<uint64_t> CLMulUpper(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  return Vec128<uint64_t>(
+      (uint64x2_t)vmull_high_p64((poly64x2_t)a.raw, (poly64x2_t)b.raw));
+}
+
+#endif  // HWY_TARGET == HWY_NEON
+
+// ================================================== MISC
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, VFromD<Rebind<bfloat16_t, D>> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+// ------------------------------ Truncations
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
+          HWY_IF_UNSIGNED(TFrom), HWY_IF_UNSIGNED(TTo),
+          hwy::EnableIf<(sizeof(TTo) < sizeof(TFrom))>* = nullptr>
+HWY_API Vec128<TTo, 1> TruncateTo(DTo /* tag */, Vec128<TFrom, 1> v) {
+  const Repartition<TTo, DFromV<decltype(v)>> d;
+  return Vec128<TTo, 1>{BitCast(d, v).raw};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec16<uint8_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  const auto v3 = detail::ConcatEven(v2, v2);
+  const auto v4 = detail::ConcatEven(v3, v3);
+  return LowerHalf(LowerHalf(LowerHalf(v4)));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec32<uint16_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Repartition<uint16_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  const auto v3 = detail::ConcatEven(v2, v2);
+  return LowerHalf(LowerHalf(v3));
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Repartition<uint32_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  return LowerHalf(v2);
+}
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  const auto v3 = detail::ConcatEven(v2, v2);
+  return LowerHalf(LowerHalf(v3));
+}
+
+template <class D, HWY_IF_U16_D(D), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const Repartition<uint16_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  return LowerHalf(v2);
+}
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = detail::ConcatEven(v1, v1);
+  return LowerHalf(v2);
+}
+
+// ------------------------------ MulEven (ConcatEven)
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec128<int16_t> MulEven(Vec128<int8_t> a, Vec128<int8_t> b) {
+  const DFromV<decltype(a)> d;
+  int8x16_t a_packed = ConcatEven(d, a, a).raw;
+  int8x16_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int16_t>(
+      vmull_s8(vget_low_s8(a_packed), vget_low_s8(b_packed)));
+}
+HWY_API Vec128<uint16_t> MulEven(Vec128<uint8_t> a, Vec128<uint8_t> b) {
+  const DFromV<decltype(a)> d;
+  uint8x16_t a_packed = ConcatEven(d, a, a).raw;
+  uint8x16_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint16_t>(
+      vmull_u8(vget_low_u8(a_packed), vget_low_u8(b_packed)));
+}
+HWY_API Vec128<int32_t> MulEven(Vec128<int16_t> a, Vec128<int16_t> b) {
+  const DFromV<decltype(a)> d;
+  int16x8_t a_packed = ConcatEven(d, a, a).raw;
+  int16x8_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int32_t>(
+      vmull_s16(vget_low_s16(a_packed), vget_low_s16(b_packed)));
+}
+HWY_API Vec128<uint32_t> MulEven(Vec128<uint16_t> a, Vec128<uint16_t> b) {
+  const DFromV<decltype(a)> d;
+  uint16x8_t a_packed = ConcatEven(d, a, a).raw;
+  uint16x8_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint32_t>(
+      vmull_u16(vget_low_u16(a_packed), vget_low_u16(b_packed)));
+}
+HWY_API Vec128<int64_t> MulEven(Vec128<int32_t> a, Vec128<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  int32x4_t a_packed = ConcatEven(d, a, a).raw;
+  int32x4_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int64_t>(
+      vmull_s32(vget_low_s32(a_packed), vget_low_s32(b_packed)));
+}
+HWY_API Vec128<uint64_t> MulEven(Vec128<uint32_t> a, Vec128<uint32_t> b) {
+  const DFromV<decltype(a)> d;
+  uint32x4_t a_packed = ConcatEven(d, a, a).raw;
+  uint32x4_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint64_t>(
+      vmull_u32(vget_low_u32(a_packed), vget_low_u32(b_packed)));
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, (N + 1) / 2> MulEven(Vec128<int8_t, N> a,
+                                             Vec128<int8_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int8x8_t a_packed = ConcatEven(d, a, a).raw;
+  int8x8_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int16_t, (N + 1) / 2>(
+      vget_low_s16(vmull_s8(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, (N + 1) / 2> MulEven(Vec128<uint8_t, N> a,
+                                              Vec128<uint8_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint8x8_t a_packed = ConcatEven(d, a, a).raw;
+  uint8x8_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint16_t, (N + 1) / 2>(
+      vget_low_u16(vmull_u8(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<int32_t, (N + 1) / 2> MulEven(Vec128<int16_t, N> a,
+                                             Vec128<int16_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int16x4_t a_packed = ConcatEven(d, a, a).raw;
+  int16x4_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int32_t, (N + 1) / 2>(
+      vget_low_s32(vmull_s16(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, (N + 1) / 2> MulEven(Vec128<uint16_t, N> a,
+                                              Vec128<uint16_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint16x4_t a_packed = ConcatEven(d, a, a).raw;
+  uint16x4_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint32_t, (N + 1) / 2>(
+      vget_low_u32(vmull_u16(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulEven(Vec128<int32_t, N> a,
+                                             Vec128<int32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int32x2_t a_packed = ConcatEven(d, a, a).raw;
+  int32x2_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<int64_t, (N + 1) / 2>(
+      vget_low_s64(vmull_s32(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulEven(Vec128<uint32_t, N> a,
+                                              Vec128<uint32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint32x2_t a_packed = ConcatEven(d, a, a).raw;
+  uint32x2_t b_packed = ConcatEven(d, b, b).raw;
+  return Vec128<uint64_t, (N + 1) / 2>(
+      vget_low_u64(vmull_u32(a_packed, b_packed)));
+}
+
+HWY_INLINE Vec128<uint64_t> MulEven(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  uint64_t hi;
+  uint64_t lo = Mul128(vgetq_lane_u64(a.raw, 0), vgetq_lane_u64(b.raw, 0), &hi);
+  return Vec128<uint64_t>(vsetq_lane_u64(hi, vdupq_n_u64(lo), 1));
+}
+
+// Multiplies odd lanes (1, 3 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec128<int16_t> MulOdd(Vec128<int8_t> a, Vec128<int8_t> b) {
+  const DFromV<decltype(a)> d;
+  int8x16_t a_packed = ConcatOdd(d, a, a).raw;
+  int8x16_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int16_t>(
+      vmull_s8(vget_low_s8(a_packed), vget_low_s8(b_packed)));
+}
+HWY_API Vec128<uint16_t> MulOdd(Vec128<uint8_t> a, Vec128<uint8_t> b) {
+  const DFromV<decltype(a)> d;
+  uint8x16_t a_packed = ConcatOdd(d, a, a).raw;
+  uint8x16_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint16_t>(
+      vmull_u8(vget_low_u8(a_packed), vget_low_u8(b_packed)));
+}
+HWY_API Vec128<int32_t> MulOdd(Vec128<int16_t> a, Vec128<int16_t> b) {
+  const DFromV<decltype(a)> d;
+  int16x8_t a_packed = ConcatOdd(d, a, a).raw;
+  int16x8_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int32_t>(
+      vmull_s16(vget_low_s16(a_packed), vget_low_s16(b_packed)));
+}
+HWY_API Vec128<uint32_t> MulOdd(Vec128<uint16_t> a, Vec128<uint16_t> b) {
+  const DFromV<decltype(a)> d;
+  uint16x8_t a_packed = ConcatOdd(d, a, a).raw;
+  uint16x8_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint32_t>(
+      vmull_u16(vget_low_u16(a_packed), vget_low_u16(b_packed)));
+}
+HWY_API Vec128<int64_t> MulOdd(Vec128<int32_t> a, Vec128<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  int32x4_t a_packed = ConcatOdd(d, a, a).raw;
+  int32x4_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int64_t>(
+      vmull_s32(vget_low_s32(a_packed), vget_low_s32(b_packed)));
+}
+HWY_API Vec128<uint64_t> MulOdd(Vec128<uint32_t> a, Vec128<uint32_t> b) {
+  const DFromV<decltype(a)> d;
+  uint32x4_t a_packed = ConcatOdd(d, a, a).raw;
+  uint32x4_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint64_t>(
+      vmull_u32(vget_low_u32(a_packed), vget_low_u32(b_packed)));
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, (N + 1) / 2> MulOdd(Vec128<int8_t, N> a,
+                                            Vec128<int8_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int8x8_t a_packed = ConcatOdd(d, a, a).raw;
+  int8x8_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int16_t, (N + 1) / 2>(
+      vget_low_s16(vmull_s8(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, (N + 1) / 2> MulOdd(Vec128<uint8_t, N> a,
+                                             Vec128<uint8_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint8x8_t a_packed = ConcatOdd(d, a, a).raw;
+  uint8x8_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint16_t, (N + 1) / 2>(
+      vget_low_u16(vmull_u8(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<int32_t, (N + 1) / 2> MulOdd(Vec128<int16_t, N> a,
+                                            Vec128<int16_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int16x4_t a_packed = ConcatOdd(d, a, a).raw;
+  int16x4_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int32_t, (N + 1) / 2>(
+      vget_low_s32(vmull_s16(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, (N + 1) / 2> MulOdd(Vec128<uint16_t, N> a,
+                                             Vec128<uint16_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint16x4_t a_packed = ConcatOdd(d, a, a).raw;
+  uint16x4_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint32_t, (N + 1) / 2>(
+      vget_low_u32(vmull_u16(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulOdd(Vec128<int32_t, N> a,
+                                            Vec128<int32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  int32x2_t a_packed = ConcatOdd(d, a, a).raw;
+  int32x2_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<int64_t, (N + 1) / 2>(
+      vget_low_s64(vmull_s32(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulOdd(Vec128<uint32_t, N> a,
+                                             Vec128<uint32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  uint32x2_t a_packed = ConcatOdd(d, a, a).raw;
+  uint32x2_t b_packed = ConcatOdd(d, b, b).raw;
+  return Vec128<uint64_t, (N + 1) / 2>(
+      vget_low_u64(vmull_u32(a_packed, b_packed)));
+}
+
+HWY_INLINE Vec128<uint64_t> MulOdd(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  uint64_t hi;
+  uint64_t lo = Mul128(vgetq_lane_u64(a.raw, 1), vgetq_lane_u64(b.raw, 1), &hi);
+  return Vec128<uint64_t>(vsetq_lane_u64(hi, vdupq_n_u64(lo), 1));
+}
+
+// ------------------------------ TableLookupBytes (Combine, LowerHalf)
+
+// Both full
+template <typename T, typename TI>
+HWY_API Vec128<TI> TableLookupBytes(Vec128<T> bytes, Vec128<TI> from) {
+  const DFromV<decltype(from)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+#if HWY_ARCH_ARM_A64
+  return BitCast(d, Vec128<uint8_t>(vqtbl1q_u8(BitCast(d8, bytes).raw,
+                                               BitCast(d8, from).raw)));
+#else
+  uint8x16_t table0 = BitCast(d8, bytes).raw;
+  uint8x8x2_t table;
+  table.val[0] = vget_low_u8(table0);
+  table.val[1] = vget_high_u8(table0);
+  uint8x16_t idx = BitCast(d8, from).raw;
+  uint8x8_t low = vtbl2_u8(table, vget_low_u8(idx));
+  uint8x8_t hi = vtbl2_u8(table, vget_high_u8(idx));
+  return BitCast(d, Vec128<uint8_t>(vcombine_u8(low, hi)));
+#endif
+}
+
+// Partial index vector
+template <typename T, typename TI, size_t NI, HWY_IF_V_SIZE_LE(TI, NI, 8)>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec128<T> bytes, Vec128<TI, NI> from) {
+  const Full128<TI> d_full;
+  const Vec64<TI> from64(from.raw);
+  const auto idx_full = Combine(d_full, from64, from64);
+  const auto out_full = TableLookupBytes(bytes, idx_full);
+  return Vec128<TI, NI>(LowerHalf(Half<decltype(d_full)>(), out_full).raw);
+}
+
+// Partial table vector
+template <typename T, size_t N, typename TI, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<TI> TableLookupBytes(Vec128<T, N> bytes, Vec128<TI> from) {
+  const Full128<T> d_full;
+  return TableLookupBytes(Combine(d_full, bytes, bytes), from);
+}
+
+// Partial both
+template <typename T, size_t N, typename TI, size_t NI,
+          HWY_IF_V_SIZE_LE(T, N, 8), HWY_IF_V_SIZE_LE(TI, NI, 8)>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec128<T, N> bytes,
+                                        Vec128<TI, NI> from) {
+  const DFromV<decltype(bytes)> d;
+  const Simd<TI, NI, 0> d_idx;
+  const Repartition<uint8_t, decltype(d_idx)> d_idx8;
+  // uint8x8
+  const auto bytes8 = BitCast(Repartition<uint8_t, decltype(d)>(), bytes);
+  const auto from8 = BitCast(d_idx8, from);
+  const VFromD<decltype(d_idx8)> v8(vtbl1_u8(bytes8.raw, from8.raw));
+  return BitCast(d_idx, v8);
+}
+
+// For all vector widths; Arm anyway zeroes if >= 0x10.
+template <class V, class VI>
+HWY_API VI TableLookupBytesOr0(V bytes, VI from) {
+  return TableLookupBytes(bytes, from);
+}
+
+// ---------------------------- AESKeyGenAssist (AESLastRound, TableLookupBytes)
+
+#if HWY_TARGET == HWY_NEON
+template <uint8_t kRcon>
+HWY_API Vec128<uint8_t> AESKeyGenAssist(Vec128<uint8_t> v) {
+  alignas(16) static constexpr uint8_t kRconXorMask[16] = {
+      0, kRcon, 0, 0, 0, 0, 0, 0, 0, kRcon, 0, 0, 0, 0, 0, 0};
+  alignas(16) static constexpr uint8_t kRotWordShuffle[16] = {
+      0, 13, 10, 7, 1, 14, 11, 4, 8, 5, 2, 15, 9, 6, 3, 12};
+  const DFromV<decltype(v)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto w13 = BitCast(d, DupOdd(BitCast(du32, v)));
+  const auto sub_word_result = AESLastRound(w13, Load(d, kRconXorMask));
+  return TableLookupBytes(sub_word_result, Load(d, kRotWordShuffle));
+}
+#endif  // HWY_TARGET == HWY_NEON
+
+// ------------------------------ Scatter in generic_ops-inl.h
+// ------------------------------ Gather in generic_ops-inl.h
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1 for any T: no-op
+template <typename T>
+HWY_INLINE T ReduceMin(hwy::SizeTag<sizeof(T)> /* tag */, Vec128<T, 1> v) {
+  return GetLane(v);
+}
+template <typename T>
+HWY_INLINE T ReduceMax(hwy::SizeTag<sizeof(T)> /* tag */, Vec128<T, 1> v) {
+  return GetLane(v);
+}
+template <typename T>
+HWY_INLINE T ReduceSum(hwy::SizeTag<sizeof(T)> /* tag */, Vec128<T, 1> v) {
+  return GetLane(v);
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> SumOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                   Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MinOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                   Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MaxOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                   Vec128<T, 1> v) {
+  return v;
+}
+
+// full vectors
+#if HWY_ARCH_ARM_A64
+
+// TODO(janwas): use normal HWY_NEON_DEF, then FULL type list.
+#define HWY_NEON_DEF_REDUCTION(type, size, name, prefix, infix, suffix) \
+  HWY_API type##_t name(hwy::SizeTag<sizeof(type##_t)>,                 \
+                        Vec128<type##_t, size> v) {                     \
+    return HWY_NEON_EVAL(prefix##infix##suffix, v.raw);                 \
+  }
+
+// Excludes u64/s64 (missing minv/maxv) and f16 (missing addv).
+#define HWY_NEON_DEF_REDUCTION_CORE_TYPES(name, prefix)       \
+  HWY_NEON_DEF_REDUCTION(uint8, 8, name, prefix, _, u8)       \
+  HWY_NEON_DEF_REDUCTION(uint8, 16, name, prefix##q, _, u8)   \
+  HWY_NEON_DEF_REDUCTION(uint16, 4, name, prefix, _, u16)     \
+  HWY_NEON_DEF_REDUCTION(uint16, 8, name, prefix##q, _, u16)  \
+  HWY_NEON_DEF_REDUCTION(uint32, 2, name, prefix, _, u32)     \
+  HWY_NEON_DEF_REDUCTION(uint32, 4, name, prefix##q, _, u32)  \
+  HWY_NEON_DEF_REDUCTION(int8, 8, name, prefix, _, s8)        \
+  HWY_NEON_DEF_REDUCTION(int8, 16, name, prefix##q, _, s8)    \
+  HWY_NEON_DEF_REDUCTION(int16, 4, name, prefix, _, s16)      \
+  HWY_NEON_DEF_REDUCTION(int16, 8, name, prefix##q, _, s16)   \
+  HWY_NEON_DEF_REDUCTION(int32, 2, name, prefix, _, s32)      \
+  HWY_NEON_DEF_REDUCTION(int32, 4, name, prefix##q, _, s32)   \
+  HWY_NEON_DEF_REDUCTION(float32, 2, name, prefix, _, f32)    \
+  HWY_NEON_DEF_REDUCTION(float32, 4, name, prefix##q, _, f32) \
+  HWY_NEON_DEF_REDUCTION(float64, 2, name, prefix##q, _, f64)
+
+// Different interface than HWY_NEON_DEF_FUNCTION_FULL_UI_64.
+#define HWY_NEON_DEF_REDUCTION_UI64(name, prefix)            \
+  HWY_NEON_DEF_REDUCTION(uint64, 2, name, prefix##q, _, u64) \
+  HWY_NEON_DEF_REDUCTION(int64, 2, name, prefix##q, _, s64)
+
+#if HWY_HAVE_FLOAT16
+#define HWY_NEON_DEF_REDUCTION_F16(name, prefix)           \
+  HWY_NEON_DEF_REDUCTION(float16, 4, name, prefix, _, f16) \
+  HWY_NEON_DEF_REDUCTION(float16, 8, name, prefix##q, _, f16)
+#else
+#define HWY_NEON_DEF_REDUCTION_F16(name, prefix)
+#endif
+
+HWY_NEON_DEF_REDUCTION_CORE_TYPES(ReduceMin, vminv)
+HWY_NEON_DEF_REDUCTION_CORE_TYPES(ReduceMax, vmaxv)
+HWY_NEON_DEF_REDUCTION_F16(ReduceMin, vminv)
+HWY_NEON_DEF_REDUCTION_F16(ReduceMax, vmaxv)
+
+HWY_NEON_DEF_REDUCTION_CORE_TYPES(ReduceSum, vaddv)
+HWY_NEON_DEF_REDUCTION_UI64(ReduceSum, vaddv)
+
+#if HWY_HAVE_FLOAT16
+HWY_API float16_t ReduceSum(hwy::SizeTag<2>, Vec64<float16_t> v) {
+  const float16x4_t x2 = vpadd_f16(v.raw, v.raw);
+  return GetLane(Vec64<float16_t>(vpadd_f16(x2, x2)));
+}
+HWY_API float16_t ReduceSum(hwy::SizeTag<2> tag, Vec128<float16_t> v) {
+  return ReduceSum(tag, LowerHalf(Vec128<float16_t>(vpaddq_f16(v.raw, v.raw))));
+}
+#endif
+
+#undef HWY_NEON_DEF_REDUCTION_CORE_TYPES
+#undef HWY_NEON_DEF_REDUCTION_F16
+#undef HWY_NEON_DEF_REDUCTION_UI64
+#undef HWY_NEON_DEF_REDUCTION
+
+// Need some fallback implementations for [ui]64x2 and [ui]16x2.
+#define HWY_IF_SUM_REDUCTION(T) HWY_IF_T_SIZE_ONE_OF(T, 1 << 2)
+#define HWY_IF_MINMAX_REDUCTION(T) HWY_IF_T_SIZE_ONE_OF(T, (1 << 8) | (1 << 2))
+
+// Implement Min/Max/SumOfLanes in terms of the corresponding reduction.
+template <size_t N, typename V>
+HWY_API V MinOfLanes(hwy::SizeTag<N> tag, V v) {
+  return Set(DFromV<decltype(v)>(), ReduceMin(tag, v));
+}
+template <size_t N, typename V>
+HWY_API V MaxOfLanes(hwy::SizeTag<N> tag, V v) {
+  return Set(DFromV<decltype(v)>(), ReduceMax(tag, v));
+}
+template <size_t N, typename V>
+HWY_API V SumOfLanes(hwy::SizeTag<N> tag, V v) {
+  return Set(DFromV<decltype(v)>(), ReduceSum(tag, v));
+}
+
+#else
+
+// For arm7, we implement reductions using a series of pairwise operations. This
+// produces the full vector result, so we express Reduce* in terms of *OfLanes.
+#define HWY_NEON_BUILD_TYPE_T(type, size) type##x##size##_t
+#define HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION(type, size) Vec128<type##_t, size>
+#define HWY_NEON_DEF_PAIRWISE_REDUCTION(type, size, name, prefix, suffix)    \
+  HWY_API HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION(type, size) name##OfLanes(   \
+      hwy::SizeTag<sizeof(type##_t)>, Vec128<type##_t, size> v) {            \
+    HWY_NEON_BUILD_TYPE_T(type, size) tmp = prefix##_##suffix(v.raw, v.raw); \
+    if ((size / 2) > 1) tmp = prefix##_##suffix(tmp, tmp);                   \
+    if ((size / 4) > 1) tmp = prefix##_##suffix(tmp, tmp);                   \
+    return HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION(type, size)(tmp);           \
+  }                                                                          \
+  HWY_API type##_t Reduce##name(hwy::SizeTag<sizeof(type##_t)> tag,          \
+                                Vec128<type##_t, size> v) {                  \
+    return GetLane(name##OfLanes(tag, v));                                   \
+  }
+
+// For the wide versions, the pairwise operations produce a half-length vector.
+// We produce that value with a Reduce*Vector helper method, and express Reduce*
+// and *OfLanes in terms of the helper.
+#define HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(type, size, half, name, prefix, \
+                                             suffix)                         \
+  HWY_API HWY_NEON_BUILD_TYPE_T(type, half)                                  \
+      Reduce##name##Vector(Vec128<type##_t, size> v) {                       \
+    HWY_NEON_BUILD_TYPE_T(type, half) tmp;                                   \
+    tmp = prefix##_##suffix(vget_high_##suffix(v.raw),                       \
+                            vget_low_##suffix(v.raw));                       \
+    if ((size / 2) > 1) tmp = prefix##_##suffix(tmp, tmp);                   \
+    if ((size / 4) > 1) tmp = prefix##_##suffix(tmp, tmp);                   \
+    if ((size / 8) > 1) tmp = prefix##_##suffix(tmp, tmp);                   \
+    return tmp;                                                              \
+  }                                                                          \
+  HWY_API type##_t Reduce##name(hwy::SizeTag<sizeof(type##_t)>,              \
+                                Vec128<type##_t, size> v) {                  \
+    const HWY_NEON_BUILD_TYPE_T(type, half) tmp = Reduce##name##Vector(v);   \
+    return HWY_NEON_EVAL(vget_lane_##suffix, tmp, 0);                        \
+  }                                                                          \
+  HWY_API HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION(type, size) name##OfLanes(   \
+      hwy::SizeTag<sizeof(type##_t)>, Vec128<type##_t, size> v) {            \
+    const HWY_NEON_BUILD_TYPE_T(type, half) tmp = Reduce##name##Vector(v);   \
+    return HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION(                            \
+        type, size)(vcombine_##suffix(tmp, tmp));                            \
+  }
+
+#define HWY_NEON_DEF_PAIRWISE_REDUCTIONS(name, prefix)                  \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(uint32, 2, name, prefix, u32)         \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(uint16, 4, name, prefix, u16)         \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(uint8, 8, name, prefix, u8)           \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(int32, 2, name, prefix, s32)          \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(int16, 4, name, prefix, s16)          \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(int8, 8, name, prefix, s8)            \
+  HWY_NEON_DEF_PAIRWISE_REDUCTION(float32, 2, name, prefix, f32)        \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(uint32, 4, 2, name, prefix, u32) \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(uint16, 8, 4, name, prefix, u16) \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(uint8, 16, 8, name, prefix, u8)  \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(int32, 4, 2, name, prefix, s32)  \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(int16, 8, 4, name, prefix, s16)  \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(int8, 16, 8, name, prefix, s8)   \
+  HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION(float32, 4, 2, name, prefix, f32)
+
+HWY_NEON_DEF_PAIRWISE_REDUCTIONS(Sum, vpadd)
+HWY_NEON_DEF_PAIRWISE_REDUCTIONS(Min, vpmin)
+HWY_NEON_DEF_PAIRWISE_REDUCTIONS(Max, vpmax)
+
+#undef HWY_NEON_DEF_PAIRWISE_REDUCTIONS
+#undef HWY_NEON_DEF_WIDE_PAIRWISE_REDUCTION
+#undef HWY_NEON_DEF_PAIRWISE_REDUCTION
+#undef HWY_NEON_BUILD_RET_PAIRWISE_REDUCTION
+#undef HWY_NEON_BUILD_TYPE_T
+
+// Need fallback min/max implementations for [ui]64x2 and [ui]16x2.
+#define HWY_IF_SUM_REDUCTION(T) HWY_IF_T_SIZE_ONE_OF(T, 1 << 2 | 1 << 8)
+#define HWY_IF_MINMAX_REDUCTION(T) HWY_IF_T_SIZE_ONE_OF(T, 1 << 2 | 1 << 8)
+
+#endif
+
+}  // namespace detail
+
+// [ui]16/[ui]64: N=2 -- special case for pairs of very small or large lanes
+template <class D, typename T, HWY_IF_SUM_REDUCTION(T)>
+HWY_API Vec128<T, 2> SumOfLanes(D /* tag */, Vec128<T, 2> v10) {
+  return v10 + Reverse2(Simd<T, 2, 0>(), v10);
+}
+
+template <class D, typename T, HWY_IF_SUM_REDUCTION(T)>
+HWY_API T ReduceSum(D d, Vec128<T, 2> v10) {
+  return GetLane(SumOfLanes(d, v10));
+}
+
+template <class D, typename T, HWY_IF_MINMAX_REDUCTION(T)>
+HWY_API Vec128<T, 2> MinOfLanes(D /* tag */, Vec128<T, 2> v10) {
+  return Min(v10, Reverse2(Simd<T, 2, 0>(), v10));
+}
+template <class D, typename T, HWY_IF_MINMAX_REDUCTION(T)>
+HWY_API Vec128<T, 2> MaxOfLanes(D /* tag */, Vec128<T, 2> v10) {
+  return Max(v10, Reverse2(Simd<T, 2, 0>(), v10));
+}
+
+#undef HWY_IF_SUM_REDUCTION
+#undef HWY_IF_MINMAX_REDUCTION
+
+template <class D>
+HWY_API VFromD<D> SumOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::SumOfLanes(hwy::SizeTag<sizeof(TFromD<D>)>(), v);
+}
+template <class D>
+HWY_API TFromD<D> ReduceSum(D /* tag */, VFromD<D> v) {
+  return detail::ReduceSum(hwy::SizeTag<sizeof(TFromD<D>)>(), v);
+}
+template <class D>
+HWY_API VFromD<D> MinOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MinOfLanes(hwy::SizeTag<sizeof(TFromD<D>)>(), v);
+}
+template <class D>
+HWY_API VFromD<D> MaxOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MaxOfLanes(hwy::SizeTag<sizeof(TFromD<D>)>(), v);
+}
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+namespace detail {
+
+// Helper function to set 64 bits and potentially return a smaller vector. The
+// overload is required to call the q vs non-q intrinsics. Note that 8-bit
+// LoadMaskBits only requires 16 bits, but 64 avoids casting.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_INLINE VFromD<D> Set64(D /* tag */, uint64_t mask_bits) {
+  const auto v64 = Vec64<uint64_t>(vdup_n_u64(mask_bits));
+  return VFromD<D>(BitCast(Full64<TFromD<D>>(), v64).raw);
+}
+template <typename T>
+HWY_INLINE Vec128<T> Set64(Full128<T> d, uint64_t mask_bits) {
+  return BitCast(d, Vec128<uint64_t>(vdupq_n_u64(mask_bits)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  // Easier than Set(), which would require an >8-bit type, which would not
+  // compile for T=uint8_t, N=1.
+  const auto vmask_bits = Set64(du, mask_bits);
+
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+  alignas(16) static constexpr uint8_t kRep8[16] = {0, 0, 0, 0, 0, 0, 0, 0,
+                                                    1, 1, 1, 1, 1, 1, 1, 1};
+  const auto rep8 = TableLookupBytes(vmask_bits, Load(du, kRep8));
+
+  alignas(16) static constexpr uint8_t kBit[16] = {1, 2, 4, 8, 16, 32, 64, 128,
+                                                   1, 2, 4, 8, 16, 32, 64, 128};
+  return RebindMask(d, TestBit(rep8, LoadDup128(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint16_t kBit[8] = {1, 2, 4, 8, 16, 32, 64, 128};
+  const auto vmask_bits = Set(du, static_cast<uint16_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint32_t kBit[8] = {1, 2, 4, 8};
+  const auto vmask_bits = Set(du, static_cast<uint32_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint64_t kBit[8] = {1, 2};
+  return RebindMask(d, TestBit(Set(du, mask_bits), Load(du, kBit)));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  uint64_t mask_bits = 0;
+  CopyBytes<(d.MaxLanes() + 7) / 8>(bits, &mask_bits);
+  return detail::LoadMaskBits(d, mask_bits);
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+// Returns mask[i]? 0xF : 0 in each nibble. This is more efficient than
+// BitsFromMask for use in (partial) CountTrue, FindFirstTrue and AllFalse.
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_INLINE uint64_t NibblesFromMask(D d, MFromD<D> mask) {
+  const Full128<uint16_t> du16;
+  const Vec128<uint16_t> vu16 = BitCast(du16, VecFromMask(d, mask));
+  const Vec64<uint8_t> nib(vshrn_n_u16(vu16.raw, 4));
+  return GetLane(BitCast(Full64<uint64_t>(), nib));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8)>
+HWY_INLINE uint64_t NibblesFromMask(D d, MFromD<D> mask) {
+  // There is no vshrn_n_u16 for uint16x4, so zero-extend.
+  const Twice<decltype(d)> d2;
+  const VFromD<decltype(d2)> v128 = ZeroExtendVector(d2, VecFromMask(d, mask));
+  // No need to mask, upper half is zero thanks to ZeroExtendVector.
+  return NibblesFromMask(d2, MaskFromVec(v128));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_INLINE uint64_t NibblesFromMask(D d, MFromD<D> mask) {
+  const Mask64<TFromD<D>> mask64(mask.raw);
+  const uint64_t nib = NibblesFromMask(Full64<TFromD<D>>(), mask64);
+  // Clear nibbles from upper half of 64-bits
+  return nib & ((1ull << (d.MaxBytes() * 4)) - 1);
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/, Mask128<T> mask) {
+  alignas(16) static constexpr uint8_t kSliceLanes[16] = {
+      1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80,
+  };
+  const Full128<uint8_t> du;
+  const Vec128<uint8_t> values =
+      BitCast(du, VecFromMask(Full128<T>(), mask)) & Load(du, kSliceLanes);
+
+#if HWY_ARCH_ARM_A64
+  // Can't vaddv - we need two separate bytes (16 bits).
+  const uint8x8_t x2 = vget_low_u8(vpaddq_u8(values.raw, values.raw));
+  const uint8x8_t x4 = vpadd_u8(x2, x2);
+  const uint8x8_t x8 = vpadd_u8(x4, x4);
+  return vget_lane_u64(vreinterpret_u64_u8(x8), 0) & 0xFFFF;
+#else
+  // Don't have vpaddq, so keep doubling lane size.
+  const uint16x8_t x2 = vpaddlq_u8(values.raw);
+  const uint32x4_t x4 = vpaddlq_u16(x2);
+  const uint64x2_t x8 = vpaddlq_u32(x4);
+  return (vgetq_lane_u64(x8, 1) << 8) | vgetq_lane_u64(x8, 0);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/, Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) static constexpr uint8_t kSliceLanes[8] = {1,    2,    4,    8,
+                                                        0x10, 0x20, 0x40, 0x80};
+  const DFromM<decltype(mask)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec128<uint8_t, N> slice(Load(Full64<uint8_t>(), kSliceLanes).raw);
+  const Vec128<uint8_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+
+#if HWY_ARCH_ARM_A64
+  return vaddv_u8(values.raw);
+#else
+  const uint16x4_t x2 = vpaddl_u8(values.raw);
+  const uint32x2_t x4 = vpaddl_u16(x2);
+  const uint64x1_t x8 = vpaddl_u32(x4);
+  return vget_lane_u64(x8, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/, Mask128<T> mask) {
+  alignas(16) static constexpr uint16_t kSliceLanes[8] = {
+      1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80};
+  const Full128<T> d;
+  const Full128<uint16_t> du;
+  const Vec128<uint16_t> values =
+      BitCast(du, VecFromMask(d, mask)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u16(values.raw);
+#else
+  const uint32x4_t x2 = vpaddlq_u16(values.raw);
+  const uint64x2_t x4 = vpaddlq_u32(x2);
+  return vgetq_lane_u64(x4, 0) + vgetq_lane_u64(x4, 1);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/, Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) static constexpr uint16_t kSliceLanes[4] = {1, 2, 4, 8};
+  const DFromM<decltype(mask)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec128<uint16_t, N> slice(Load(Full64<uint16_t>(), kSliceLanes).raw);
+  const Vec128<uint16_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+#if HWY_ARCH_ARM_A64
+  return vaddv_u16(values.raw);
+#else
+  const uint32x2_t x2 = vpaddl_u16(values.raw);
+  const uint64x1_t x4 = vpaddl_u32(x2);
+  return vget_lane_u64(x4, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/, Mask128<T> mask) {
+  alignas(16) static constexpr uint32_t kSliceLanes[4] = {1, 2, 4, 8};
+  const Full128<T> d;
+  const Full128<uint32_t> du;
+  const Vec128<uint32_t> values =
+      BitCast(du, VecFromMask(d, mask)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u32(values.raw);
+#else
+  const uint64x2_t x2 = vpaddlq_u32(values.raw);
+  return vgetq_lane_u64(x2, 0) + vgetq_lane_u64(x2, 1);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/, Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) static constexpr uint32_t kSliceLanes[2] = {1, 2};
+  const DFromM<decltype(mask)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec128<uint32_t, N> slice(Load(Full64<uint32_t>(), kSliceLanes).raw);
+  const Vec128<uint32_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+#if HWY_ARCH_ARM_A64
+  return vaddv_u32(values.raw);
+#else
+  const uint64x1_t x2 = vpaddl_u32(values.raw);
+  return vget_lane_u64(x2, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, Mask128<T> m) {
+  alignas(16) static constexpr uint64_t kSliceLanes[2] = {1, 2};
+  const Full128<T> d;
+  const Full128<uint64_t> du;
+  const Vec128<uint64_t> values =
+      BitCast(du, VecFromMask(d, m)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u64(values.raw);
+#else
+  return vgetq_lane_u64(values.raw, 0) + vgetq_lane_u64(values.raw, 1);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, Mask128<T, 1> m) {
+  const Full64<T> d;
+  const Full64<uint64_t> du;
+  const Vec64<uint64_t> values = BitCast(du, VecFromMask(d, m)) & Set(du, 1);
+  return vget_lane_u64(values.raw, 0);
+}
+
+// Returns the lowest N for the BitsFromMask result.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t bits) {
+  return ((N * sizeof(T)) >= 8) ? bits : (bits & ((1ull << N) - 1));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+// Returns number of lanes whose mask is set.
+//
+// Masks are either FF..FF or 0. Unfortunately there is no reduce-sub op
+// ("vsubv"). ANDing with 1 would work but requires a constant. Negating also
+// changes each lane to 1 (if mask set) or 0.
+// NOTE: PopCount also operates on vectors, so we still have to do horizontal
+// sums separately. We specialize CountTrue for full vectors (negating instead
+// of PopCount because it avoids an extra shift), and use PopCount of
+// NibblesFromMask for partial vectors.
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<1> /*tag*/, Mask128<T> mask) {
+  const Full128<int8_t> di;
+  const int8x16_t ones =
+      vnegq_s8(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return static_cast<size_t>(vaddvq_s8(ones));
+#else
+  const int16x8_t x2 = vpaddlq_s8(ones);
+  const int32x4_t x4 = vpaddlq_s16(x2);
+  const int64x2_t x8 = vpaddlq_s32(x4);
+  return static_cast<size_t>(vgetq_lane_s64(x8, 0) + vgetq_lane_s64(x8, 1));
+#endif
+}
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<2> /*tag*/, Mask128<T> mask) {
+  const Full128<int16_t> di;
+  const int16x8_t ones =
+      vnegq_s16(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return static_cast<size_t>(vaddvq_s16(ones));
+#else
+  const int32x4_t x2 = vpaddlq_s16(ones);
+  const int64x2_t x4 = vpaddlq_s32(x2);
+  return static_cast<size_t>(vgetq_lane_s64(x4, 0) + vgetq_lane_s64(x4, 1));
+#endif
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<4> /*tag*/, Mask128<T> mask) {
+  const Full128<int32_t> di;
+  const int32x4_t ones =
+      vnegq_s32(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return static_cast<size_t>(vaddvq_s32(ones));
+#else
+  const int64x2_t x2 = vpaddlq_s32(ones);
+  return static_cast<size_t>(vgetq_lane_s64(x2, 0) + vgetq_lane_s64(x2, 1));
+#endif
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<8> /*tag*/, Mask128<T> mask) {
+#if HWY_ARCH_ARM_A64
+  const Full128<int64_t> di;
+  const int64x2_t ones =
+      vnegq_s64(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+  return static_cast<size_t>(vaddvq_s64(ones));
+#else
+  const Full128<uint64_t> du;
+  const auto mask_u = VecFromMask(du, RebindMask(du, mask));
+  const uint64x2_t ones = vshrq_n_u64(mask_u.raw, 63);
+  return static_cast<size_t>(vgetq_lane_u64(ones, 0) + vgetq_lane_u64(ones, 1));
+#endif
+}
+
+}  // namespace detail
+
+// Full
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CountTrue(D /* tag */, Mask128<T> mask) {
+  return detail::CountTrue(hwy::SizeTag<sizeof(T)>(), mask);
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API size_t CountTrue(D d, MFromD<D> mask) {
+  constexpr int kDiv = 4 * sizeof(TFromD<D>);
+  return PopCount(detail::NibblesFromMask(d, mask)) / kDiv;
+}
+
+template <class D>
+HWY_API size_t FindKnownFirstTrue(D d, MFromD<D> mask) {
+  const uint64_t nib = detail::NibblesFromMask(d, mask);
+  constexpr size_t kDiv = 4 * sizeof(TFromD<D>);
+  return Num0BitsBelowLS1Bit_Nonzero64(nib) / kDiv;
+}
+
+template <class D>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+  const uint64_t nib = detail::NibblesFromMask(d, mask);
+  if (nib == 0) return -1;
+  constexpr size_t kDiv = 4 * sizeof(TFromD<D>);
+  return static_cast<intptr_t>(Num0BitsBelowLS1Bit_Nonzero64(nib) / kDiv);
+}
+
+template <class D>
+HWY_API size_t FindKnownLastTrue(D d, MFromD<D> mask) {
+  const uint64_t nib = detail::NibblesFromMask(d, mask);
+  constexpr size_t kDiv = 4 * sizeof(TFromD<D>);
+  return (63 - Num0BitsAboveMS1Bit_Nonzero64(nib)) / kDiv;
+}
+
+template <class D>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+  const uint64_t nib = detail::NibblesFromMask(d, mask);
+  if (nib == 0) return -1;
+  constexpr size_t kDiv = 4 * sizeof(TFromD<D>);
+  return static_cast<intptr_t>((63 - Num0BitsAboveMS1Bit_Nonzero64(nib)) /
+                               kDiv);
+}
+
+// `p` points to at least 8 writable bytes.
+template <class D>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (d.MaxLanes() + 7) / 8;
+  CopyBytes<kNumBytes>(&mask_bits, bits);
+  return kNumBytes;
+}
+
+template <class D>
+HWY_API bool AllFalse(D d, MFromD<D> m) {
+  return detail::NibblesFromMask(d, m) == 0;
+}
+
+// Full
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllTrue(D d, Mask128<T> m) {
+  return detail::NibblesFromMask(d, m) == ~0ull;
+}
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API bool AllTrue(D d, MFromD<D> m) {
+  return detail::NibblesFromMask(d, m) == (1ull << (d.MaxBytes() * 4)) - 1;
+}
+
+// ------------------------------ Compress
+
+template <typename T>
+struct CompressIsPartition {
+  enum { value = (sizeof(T) != 1) };
+};
+
+namespace detail {
+
+// Load 8 bytes, replicate into upper half so ZipLower can use the lower half.
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_INLINE Vec128<uint8_t> Load8Bytes(D /*tag*/, const uint8_t* bytes) {
+  return Vec128<uint8_t>(vreinterpretq_u8_u64(
+      vld1q_dup_u64(reinterpret_cast<const uint64_t*>(bytes))));
+}
+
+// Load 8 bytes and return half-reg with N <= 8 bytes.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_INLINE VFromD<D> Load8Bytes(D d, const uint8_t* bytes) {
+  return Load(d, bytes);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<2> /*tag*/,
+                                    uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N, 0> du;
+
+  // NEON does not provide an equivalent of AVX2 permutevar, so we need byte
+  // indices for VTBL (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[256 * 8] = {
+      // PrintCompress16x8Tables
+      0,  2,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      2,  0,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      4,  0,  2,  6,  8,  10, 12, 14, /**/ 0, 4,  2,  6,  8,  10, 12, 14,  //
+      2,  4,  0,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      6,  0,  2,  4,  8,  10, 12, 14, /**/ 0, 6,  2,  4,  8,  10, 12, 14,  //
+      2,  6,  0,  4,  8,  10, 12, 14, /**/ 0, 2,  6,  4,  8,  10, 12, 14,  //
+      4,  6,  0,  2,  8,  10, 12, 14, /**/ 0, 4,  6,  2,  8,  10, 12, 14,  //
+      2,  4,  6,  0,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      8,  0,  2,  4,  6,  10, 12, 14, /**/ 0, 8,  2,  4,  6,  10, 12, 14,  //
+      2,  8,  0,  4,  6,  10, 12, 14, /**/ 0, 2,  8,  4,  6,  10, 12, 14,  //
+      4,  8,  0,  2,  6,  10, 12, 14, /**/ 0, 4,  8,  2,  6,  10, 12, 14,  //
+      2,  4,  8,  0,  6,  10, 12, 14, /**/ 0, 2,  4,  8,  6,  10, 12, 14,  //
+      6,  8,  0,  2,  4,  10, 12, 14, /**/ 0, 6,  8,  2,  4,  10, 12, 14,  //
+      2,  6,  8,  0,  4,  10, 12, 14, /**/ 0, 2,  6,  8,  4,  10, 12, 14,  //
+      4,  6,  8,  0,  2,  10, 12, 14, /**/ 0, 4,  6,  8,  2,  10, 12, 14,  //
+      2,  4,  6,  8,  0,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      10, 0,  2,  4,  6,  8,  12, 14, /**/ 0, 10, 2,  4,  6,  8,  12, 14,  //
+      2,  10, 0,  4,  6,  8,  12, 14, /**/ 0, 2,  10, 4,  6,  8,  12, 14,  //
+      4,  10, 0,  2,  6,  8,  12, 14, /**/ 0, 4,  10, 2,  6,  8,  12, 14,  //
+      2,  4,  10, 0,  6,  8,  12, 14, /**/ 0, 2,  4,  10, 6,  8,  12, 14,  //
+      6,  10, 0,  2,  4,  8,  12, 14, /**/ 0, 6,  10, 2,  4,  8,  12, 14,  //
+      2,  6,  10, 0,  4,  8,  12, 14, /**/ 0, 2,  6,  10, 4,  8,  12, 14,  //
+      4,  6,  10, 0,  2,  8,  12, 14, /**/ 0, 4,  6,  10, 2,  8,  12, 14,  //
+      2,  4,  6,  10, 0,  8,  12, 14, /**/ 0, 2,  4,  6,  10, 8,  12, 14,  //
+      8,  10, 0,  2,  4,  6,  12, 14, /**/ 0, 8,  10, 2,  4,  6,  12, 14,  //
+      2,  8,  10, 0,  4,  6,  12, 14, /**/ 0, 2,  8,  10, 4,  6,  12, 14,  //
+      4,  8,  10, 0,  2,  6,  12, 14, /**/ 0, 4,  8,  10, 2,  6,  12, 14,  //
+      2,  4,  8,  10, 0,  6,  12, 14, /**/ 0, 2,  4,  8,  10, 6,  12, 14,  //
+      6,  8,  10, 0,  2,  4,  12, 14, /**/ 0, 6,  8,  10, 2,  4,  12, 14,  //
+      2,  6,  8,  10, 0,  4,  12, 14, /**/ 0, 2,  6,  8,  10, 4,  12, 14,  //
+      4,  6,  8,  10, 0,  2,  12, 14, /**/ 0, 4,  6,  8,  10, 2,  12, 14,  //
+      2,  4,  6,  8,  10, 0,  12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      12, 0,  2,  4,  6,  8,  10, 14, /**/ 0, 12, 2,  4,  6,  8,  10, 14,  //
+      2,  12, 0,  4,  6,  8,  10, 14, /**/ 0, 2,  12, 4,  6,  8,  10, 14,  //
+      4,  12, 0,  2,  6,  8,  10, 14, /**/ 0, 4,  12, 2,  6,  8,  10, 14,  //
+      2,  4,  12, 0,  6,  8,  10, 14, /**/ 0, 2,  4,  12, 6,  8,  10, 14,  //
+      6,  12, 0,  2,  4,  8,  10, 14, /**/ 0, 6,  12, 2,  4,  8,  10, 14,  //
+      2,  6,  12, 0,  4,  8,  10, 14, /**/ 0, 2,  6,  12, 4,  8,  10, 14,  //
+      4,  6,  12, 0,  2,  8,  10, 14, /**/ 0, 4,  6,  12, 2,  8,  10, 14,  //
+      2,  4,  6,  12, 0,  8,  10, 14, /**/ 0, 2,  4,  6,  12, 8,  10, 14,  //
+      8,  12, 0,  2,  4,  6,  10, 14, /**/ 0, 8,  12, 2,  4,  6,  10, 14,  //
+      2,  8,  12, 0,  4,  6,  10, 14, /**/ 0, 2,  8,  12, 4,  6,  10, 14,  //
+      4,  8,  12, 0,  2,  6,  10, 14, /**/ 0, 4,  8,  12, 2,  6,  10, 14,  //
+      2,  4,  8,  12, 0,  6,  10, 14, /**/ 0, 2,  4,  8,  12, 6,  10, 14,  //
+      6,  8,  12, 0,  2,  4,  10, 14, /**/ 0, 6,  8,  12, 2,  4,  10, 14,  //
+      2,  6,  8,  12, 0,  4,  10, 14, /**/ 0, 2,  6,  8,  12, 4,  10, 14,  //
+      4,  6,  8,  12, 0,  2,  10, 14, /**/ 0, 4,  6,  8,  12, 2,  10, 14,  //
+      2,  4,  6,  8,  12, 0,  10, 14, /**/ 0, 2,  4,  6,  8,  12, 10, 14,  //
+      10, 12, 0,  2,  4,  6,  8,  14, /**/ 0, 10, 12, 2,  4,  6,  8,  14,  //
+      2,  10, 12, 0,  4,  6,  8,  14, /**/ 0, 2,  10, 12, 4,  6,  8,  14,  //
+      4,  10, 12, 0,  2,  6,  8,  14, /**/ 0, 4,  10, 12, 2,  6,  8,  14,  //
+      2,  4,  10, 12, 0,  6,  8,  14, /**/ 0, 2,  4,  10, 12, 6,  8,  14,  //
+      6,  10, 12, 0,  2,  4,  8,  14, /**/ 0, 6,  10, 12, 2,  4,  8,  14,  //
+      2,  6,  10, 12, 0,  4,  8,  14, /**/ 0, 2,  6,  10, 12, 4,  8,  14,  //
+      4,  6,  10, 12, 0,  2,  8,  14, /**/ 0, 4,  6,  10, 12, 2,  8,  14,  //
+      2,  4,  6,  10, 12, 0,  8,  14, /**/ 0, 2,  4,  6,  10, 12, 8,  14,  //
+      8,  10, 12, 0,  2,  4,  6,  14, /**/ 0, 8,  10, 12, 2,  4,  6,  14,  //
+      2,  8,  10, 12, 0,  4,  6,  14, /**/ 0, 2,  8,  10, 12, 4,  6,  14,  //
+      4,  8,  10, 12, 0,  2,  6,  14, /**/ 0, 4,  8,  10, 12, 2,  6,  14,  //
+      2,  4,  8,  10, 12, 0,  6,  14, /**/ 0, 2,  4,  8,  10, 12, 6,  14,  //
+      6,  8,  10, 12, 0,  2,  4,  14, /**/ 0, 6,  8,  10, 12, 2,  4,  14,  //
+      2,  6,  8,  10, 12, 0,  4,  14, /**/ 0, 2,  6,  8,  10, 12, 4,  14,  //
+      4,  6,  8,  10, 12, 0,  2,  14, /**/ 0, 4,  6,  8,  10, 12, 2,  14,  //
+      2,  4,  6,  8,  10, 12, 0,  14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      14, 0,  2,  4,  6,  8,  10, 12, /**/ 0, 14, 2,  4,  6,  8,  10, 12,  //
+      2,  14, 0,  4,  6,  8,  10, 12, /**/ 0, 2,  14, 4,  6,  8,  10, 12,  //
+      4,  14, 0,  2,  6,  8,  10, 12, /**/ 0, 4,  14, 2,  6,  8,  10, 12,  //
+      2,  4,  14, 0,  6,  8,  10, 12, /**/ 0, 2,  4,  14, 6,  8,  10, 12,  //
+      6,  14, 0,  2,  4,  8,  10, 12, /**/ 0, 6,  14, 2,  4,  8,  10, 12,  //
+      2,  6,  14, 0,  4,  8,  10, 12, /**/ 0, 2,  6,  14, 4,  8,  10, 12,  //
+      4,  6,  14, 0,  2,  8,  10, 12, /**/ 0, 4,  6,  14, 2,  8,  10, 12,  //
+      2,  4,  6,  14, 0,  8,  10, 12, /**/ 0, 2,  4,  6,  14, 8,  10, 12,  //
+      8,  14, 0,  2,  4,  6,  10, 12, /**/ 0, 8,  14, 2,  4,  6,  10, 12,  //
+      2,  8,  14, 0,  4,  6,  10, 12, /**/ 0, 2,  8,  14, 4,  6,  10, 12,  //
+      4,  8,  14, 0,  2,  6,  10, 12, /**/ 0, 4,  8,  14, 2,  6,  10, 12,  //
+      2,  4,  8,  14, 0,  6,  10, 12, /**/ 0, 2,  4,  8,  14, 6,  10, 12,  //
+      6,  8,  14, 0,  2,  4,  10, 12, /**/ 0, 6,  8,  14, 2,  4,  10, 12,  //
+      2,  6,  8,  14, 0,  4,  10, 12, /**/ 0, 2,  6,  8,  14, 4,  10, 12,  //
+      4,  6,  8,  14, 0,  2,  10, 12, /**/ 0, 4,  6,  8,  14, 2,  10, 12,  //
+      2,  4,  6,  8,  14, 0,  10, 12, /**/ 0, 2,  4,  6,  8,  14, 10, 12,  //
+      10, 14, 0,  2,  4,  6,  8,  12, /**/ 0, 10, 14, 2,  4,  6,  8,  12,  //
+      2,  10, 14, 0,  4,  6,  8,  12, /**/ 0, 2,  10, 14, 4,  6,  8,  12,  //
+      4,  10, 14, 0,  2,  6,  8,  12, /**/ 0, 4,  10, 14, 2,  6,  8,  12,  //
+      2,  4,  10, 14, 0,  6,  8,  12, /**/ 0, 2,  4,  10, 14, 6,  8,  12,  //
+      6,  10, 14, 0,  2,  4,  8,  12, /**/ 0, 6,  10, 14, 2,  4,  8,  12,  //
+      2,  6,  10, 14, 0,  4,  8,  12, /**/ 0, 2,  6,  10, 14, 4,  8,  12,  //
+      4,  6,  10, 14, 0,  2,  8,  12, /**/ 0, 4,  6,  10, 14, 2,  8,  12,  //
+      2,  4,  6,  10, 14, 0,  8,  12, /**/ 0, 2,  4,  6,  10, 14, 8,  12,  //
+      8,  10, 14, 0,  2,  4,  6,  12, /**/ 0, 8,  10, 14, 2,  4,  6,  12,  //
+      2,  8,  10, 14, 0,  4,  6,  12, /**/ 0, 2,  8,  10, 14, 4,  6,  12,  //
+      4,  8,  10, 14, 0,  2,  6,  12, /**/ 0, 4,  8,  10, 14, 2,  6,  12,  //
+      2,  4,  8,  10, 14, 0,  6,  12, /**/ 0, 2,  4,  8,  10, 14, 6,  12,  //
+      6,  8,  10, 14, 0,  2,  4,  12, /**/ 0, 6,  8,  10, 14, 2,  4,  12,  //
+      2,  6,  8,  10, 14, 0,  4,  12, /**/ 0, 2,  6,  8,  10, 14, 4,  12,  //
+      4,  6,  8,  10, 14, 0,  2,  12, /**/ 0, 4,  6,  8,  10, 14, 2,  12,  //
+      2,  4,  6,  8,  10, 14, 0,  12, /**/ 0, 2,  4,  6,  8,  10, 14, 12,  //
+      12, 14, 0,  2,  4,  6,  8,  10, /**/ 0, 12, 14, 2,  4,  6,  8,  10,  //
+      2,  12, 14, 0,  4,  6,  8,  10, /**/ 0, 2,  12, 14, 4,  6,  8,  10,  //
+      4,  12, 14, 0,  2,  6,  8,  10, /**/ 0, 4,  12, 14, 2,  6,  8,  10,  //
+      2,  4,  12, 14, 0,  6,  8,  10, /**/ 0, 2,  4,  12, 14, 6,  8,  10,  //
+      6,  12, 14, 0,  2,  4,  8,  10, /**/ 0, 6,  12, 14, 2,  4,  8,  10,  //
+      2,  6,  12, 14, 0,  4,  8,  10, /**/ 0, 2,  6,  12, 14, 4,  8,  10,  //
+      4,  6,  12, 14, 0,  2,  8,  10, /**/ 0, 4,  6,  12, 14, 2,  8,  10,  //
+      2,  4,  6,  12, 14, 0,  8,  10, /**/ 0, 2,  4,  6,  12, 14, 8,  10,  //
+      8,  12, 14, 0,  2,  4,  6,  10, /**/ 0, 8,  12, 14, 2,  4,  6,  10,  //
+      2,  8,  12, 14, 0,  4,  6,  10, /**/ 0, 2,  8,  12, 14, 4,  6,  10,  //
+      4,  8,  12, 14, 0,  2,  6,  10, /**/ 0, 4,  8,  12, 14, 2,  6,  10,  //
+      2,  4,  8,  12, 14, 0,  6,  10, /**/ 0, 2,  4,  8,  12, 14, 6,  10,  //
+      6,  8,  12, 14, 0,  2,  4,  10, /**/ 0, 6,  8,  12, 14, 2,  4,  10,  //
+      2,  6,  8,  12, 14, 0,  4,  10, /**/ 0, 2,  6,  8,  12, 14, 4,  10,  //
+      4,  6,  8,  12, 14, 0,  2,  10, /**/ 0, 4,  6,  8,  12, 14, 2,  10,  //
+      2,  4,  6,  8,  12, 14, 0,  10, /**/ 0, 2,  4,  6,  8,  12, 14, 10,  //
+      10, 12, 14, 0,  2,  4,  6,  8,  /**/ 0, 10, 12, 14, 2,  4,  6,  8,   //
+      2,  10, 12, 14, 0,  4,  6,  8,  /**/ 0, 2,  10, 12, 14, 4,  6,  8,   //
+      4,  10, 12, 14, 0,  2,  6,  8,  /**/ 0, 4,  10, 12, 14, 2,  6,  8,   //
+      2,  4,  10, 12, 14, 0,  6,  8,  /**/ 0, 2,  4,  10, 12, 14, 6,  8,   //
+      6,  10, 12, 14, 0,  2,  4,  8,  /**/ 0, 6,  10, 12, 14, 2,  4,  8,   //
+      2,  6,  10, 12, 14, 0,  4,  8,  /**/ 0, 2,  6,  10, 12, 14, 4,  8,   //
+      4,  6,  10, 12, 14, 0,  2,  8,  /**/ 0, 4,  6,  10, 12, 14, 2,  8,   //
+      2,  4,  6,  10, 12, 14, 0,  8,  /**/ 0, 2,  4,  6,  10, 12, 14, 8,   //
+      8,  10, 12, 14, 0,  2,  4,  6,  /**/ 0, 8,  10, 12, 14, 2,  4,  6,   //
+      2,  8,  10, 12, 14, 0,  4,  6,  /**/ 0, 2,  8,  10, 12, 14, 4,  6,   //
+      4,  8,  10, 12, 14, 0,  2,  6,  /**/ 0, 4,  8,  10, 12, 14, 2,  6,   //
+      2,  4,  8,  10, 12, 14, 0,  6,  /**/ 0, 2,  4,  8,  10, 12, 14, 6,   //
+      6,  8,  10, 12, 14, 0,  2,  4,  /**/ 0, 6,  8,  10, 12, 14, 2,  4,   //
+      2,  6,  8,  10, 12, 14, 0,  4,  /**/ 0, 2,  6,  8,  10, 12, 14, 4,   //
+      4,  6,  8,  10, 12, 14, 0,  2,  /**/ 0, 4,  6,  8,  10, 12, 14, 2,   //
+      2,  4,  6,  8,  10, 12, 14, 0,  /**/ 0, 2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx = Load8Bytes(d8, table + mask_bits * 8);
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(hwy::SizeTag<2> /*tag*/,
+                                       uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N, 0> du;
+
+  // NEON does not provide an equivalent of AVX2 permutevar, so we need byte
+  // indices for VTBL (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[256 * 8] = {
+      // PrintCompressNot16x8Tables
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 14, 0,   //
+      0, 4,  6,  8,  10, 12, 14, 2,  /**/ 4,  6,  8,  10, 12, 14, 0,  2,   //
+      0, 2,  6,  8,  10, 12, 14, 4,  /**/ 2,  6,  8,  10, 12, 14, 0,  4,   //
+      0, 6,  8,  10, 12, 14, 2,  4,  /**/ 6,  8,  10, 12, 14, 0,  2,  4,   //
+      0, 2,  4,  8,  10, 12, 14, 6,  /**/ 2,  4,  8,  10, 12, 14, 0,  6,   //
+      0, 4,  8,  10, 12, 14, 2,  6,  /**/ 4,  8,  10, 12, 14, 0,  2,  6,   //
+      0, 2,  8,  10, 12, 14, 4,  6,  /**/ 2,  8,  10, 12, 14, 0,  4,  6,   //
+      0, 8,  10, 12, 14, 2,  4,  6,  /**/ 8,  10, 12, 14, 0,  2,  4,  6,   //
+      0, 2,  4,  6,  10, 12, 14, 8,  /**/ 2,  4,  6,  10, 12, 14, 0,  8,   //
+      0, 4,  6,  10, 12, 14, 2,  8,  /**/ 4,  6,  10, 12, 14, 0,  2,  8,   //
+      0, 2,  6,  10, 12, 14, 4,  8,  /**/ 2,  6,  10, 12, 14, 0,  4,  8,   //
+      0, 6,  10, 12, 14, 2,  4,  8,  /**/ 6,  10, 12, 14, 0,  2,  4,  8,   //
+      0, 2,  4,  10, 12, 14, 6,  8,  /**/ 2,  4,  10, 12, 14, 0,  6,  8,   //
+      0, 4,  10, 12, 14, 2,  6,  8,  /**/ 4,  10, 12, 14, 0,  2,  6,  8,   //
+      0, 2,  10, 12, 14, 4,  6,  8,  /**/ 2,  10, 12, 14, 0,  4,  6,  8,   //
+      0, 10, 12, 14, 2,  4,  6,  8,  /**/ 10, 12, 14, 0,  2,  4,  6,  8,   //
+      0, 2,  4,  6,  8,  12, 14, 10, /**/ 2,  4,  6,  8,  12, 14, 0,  10,  //
+      0, 4,  6,  8,  12, 14, 2,  10, /**/ 4,  6,  8,  12, 14, 0,  2,  10,  //
+      0, 2,  6,  8,  12, 14, 4,  10, /**/ 2,  6,  8,  12, 14, 0,  4,  10,  //
+      0, 6,  8,  12, 14, 2,  4,  10, /**/ 6,  8,  12, 14, 0,  2,  4,  10,  //
+      0, 2,  4,  8,  12, 14, 6,  10, /**/ 2,  4,  8,  12, 14, 0,  6,  10,  //
+      0, 4,  8,  12, 14, 2,  6,  10, /**/ 4,  8,  12, 14, 0,  2,  6,  10,  //
+      0, 2,  8,  12, 14, 4,  6,  10, /**/ 2,  8,  12, 14, 0,  4,  6,  10,  //
+      0, 8,  12, 14, 2,  4,  6,  10, /**/ 8,  12, 14, 0,  2,  4,  6,  10,  //
+      0, 2,  4,  6,  12, 14, 8,  10, /**/ 2,  4,  6,  12, 14, 0,  8,  10,  //
+      0, 4,  6,  12, 14, 2,  8,  10, /**/ 4,  6,  12, 14, 0,  2,  8,  10,  //
+      0, 2,  6,  12, 14, 4,  8,  10, /**/ 2,  6,  12, 14, 0,  4,  8,  10,  //
+      0, 6,  12, 14, 2,  4,  8,  10, /**/ 6,  12, 14, 0,  2,  4,  8,  10,  //
+      0, 2,  4,  12, 14, 6,  8,  10, /**/ 2,  4,  12, 14, 0,  6,  8,  10,  //
+      0, 4,  12, 14, 2,  6,  8,  10, /**/ 4,  12, 14, 0,  2,  6,  8,  10,  //
+      0, 2,  12, 14, 4,  6,  8,  10, /**/ 2,  12, 14, 0,  4,  6,  8,  10,  //
+      0, 12, 14, 2,  4,  6,  8,  10, /**/ 12, 14, 0,  2,  4,  6,  8,  10,  //
+      0, 2,  4,  6,  8,  10, 14, 12, /**/ 2,  4,  6,  8,  10, 14, 0,  12,  //
+      0, 4,  6,  8,  10, 14, 2,  12, /**/ 4,  6,  8,  10, 14, 0,  2,  12,  //
+      0, 2,  6,  8,  10, 14, 4,  12, /**/ 2,  6,  8,  10, 14, 0,  4,  12,  //
+      0, 6,  8,  10, 14, 2,  4,  12, /**/ 6,  8,  10, 14, 0,  2,  4,  12,  //
+      0, 2,  4,  8,  10, 14, 6,  12, /**/ 2,  4,  8,  10, 14, 0,  6,  12,  //
+      0, 4,  8,  10, 14, 2,  6,  12, /**/ 4,  8,  10, 14, 0,  2,  6,  12,  //
+      0, 2,  8,  10, 14, 4,  6,  12, /**/ 2,  8,  10, 14, 0,  4,  6,  12,  //
+      0, 8,  10, 14, 2,  4,  6,  12, /**/ 8,  10, 14, 0,  2,  4,  6,  12,  //
+      0, 2,  4,  6,  10, 14, 8,  12, /**/ 2,  4,  6,  10, 14, 0,  8,  12,  //
+      0, 4,  6,  10, 14, 2,  8,  12, /**/ 4,  6,  10, 14, 0,  2,  8,  12,  //
+      0, 2,  6,  10, 14, 4,  8,  12, /**/ 2,  6,  10, 14, 0,  4,  8,  12,  //
+      0, 6,  10, 14, 2,  4,  8,  12, /**/ 6,  10, 14, 0,  2,  4,  8,  12,  //
+      0, 2,  4,  10, 14, 6,  8,  12, /**/ 2,  4,  10, 14, 0,  6,  8,  12,  //
+      0, 4,  10, 14, 2,  6,  8,  12, /**/ 4,  10, 14, 0,  2,  6,  8,  12,  //
+      0, 2,  10, 14, 4,  6,  8,  12, /**/ 2,  10, 14, 0,  4,  6,  8,  12,  //
+      0, 10, 14, 2,  4,  6,  8,  12, /**/ 10, 14, 0,  2,  4,  6,  8,  12,  //
+      0, 2,  4,  6,  8,  14, 10, 12, /**/ 2,  4,  6,  8,  14, 0,  10, 12,  //
+      0, 4,  6,  8,  14, 2,  10, 12, /**/ 4,  6,  8,  14, 0,  2,  10, 12,  //
+      0, 2,  6,  8,  14, 4,  10, 12, /**/ 2,  6,  8,  14, 0,  4,  10, 12,  //
+      0, 6,  8,  14, 2,  4,  10, 12, /**/ 6,  8,  14, 0,  2,  4,  10, 12,  //
+      0, 2,  4,  8,  14, 6,  10, 12, /**/ 2,  4,  8,  14, 0,  6,  10, 12,  //
+      0, 4,  8,  14, 2,  6,  10, 12, /**/ 4,  8,  14, 0,  2,  6,  10, 12,  //
+      0, 2,  8,  14, 4,  6,  10, 12, /**/ 2,  8,  14, 0,  4,  6,  10, 12,  //
+      0, 8,  14, 2,  4,  6,  10, 12, /**/ 8,  14, 0,  2,  4,  6,  10, 12,  //
+      0, 2,  4,  6,  14, 8,  10, 12, /**/ 2,  4,  6,  14, 0,  8,  10, 12,  //
+      0, 4,  6,  14, 2,  8,  10, 12, /**/ 4,  6,  14, 0,  2,  8,  10, 12,  //
+      0, 2,  6,  14, 4,  8,  10, 12, /**/ 2,  6,  14, 0,  4,  8,  10, 12,  //
+      0, 6,  14, 2,  4,  8,  10, 12, /**/ 6,  14, 0,  2,  4,  8,  10, 12,  //
+      0, 2,  4,  14, 6,  8,  10, 12, /**/ 2,  4,  14, 0,  6,  8,  10, 12,  //
+      0, 4,  14, 2,  6,  8,  10, 12, /**/ 4,  14, 0,  2,  6,  8,  10, 12,  //
+      0, 2,  14, 4,  6,  8,  10, 12, /**/ 2,  14, 0,  4,  6,  8,  10, 12,  //
+      0, 14, 2,  4,  6,  8,  10, 12, /**/ 14, 0,  2,  4,  6,  8,  10, 12,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 0,  14,  //
+      0, 4,  6,  8,  10, 12, 2,  14, /**/ 4,  6,  8,  10, 12, 0,  2,  14,  //
+      0, 2,  6,  8,  10, 12, 4,  14, /**/ 2,  6,  8,  10, 12, 0,  4,  14,  //
+      0, 6,  8,  10, 12, 2,  4,  14, /**/ 6,  8,  10, 12, 0,  2,  4,  14,  //
+      0, 2,  4,  8,  10, 12, 6,  14, /**/ 2,  4,  8,  10, 12, 0,  6,  14,  //
+      0, 4,  8,  10, 12, 2,  6,  14, /**/ 4,  8,  10, 12, 0,  2,  6,  14,  //
+      0, 2,  8,  10, 12, 4,  6,  14, /**/ 2,  8,  10, 12, 0,  4,  6,  14,  //
+      0, 8,  10, 12, 2,  4,  6,  14, /**/ 8,  10, 12, 0,  2,  4,  6,  14,  //
+      0, 2,  4,  6,  10, 12, 8,  14, /**/ 2,  4,  6,  10, 12, 0,  8,  14,  //
+      0, 4,  6,  10, 12, 2,  8,  14, /**/ 4,  6,  10, 12, 0,  2,  8,  14,  //
+      0, 2,  6,  10, 12, 4,  8,  14, /**/ 2,  6,  10, 12, 0,  4,  8,  14,  //
+      0, 6,  10, 12, 2,  4,  8,  14, /**/ 6,  10, 12, 0,  2,  4,  8,  14,  //
+      0, 2,  4,  10, 12, 6,  8,  14, /**/ 2,  4,  10, 12, 0,  6,  8,  14,  //
+      0, 4,  10, 12, 2,  6,  8,  14, /**/ 4,  10, 12, 0,  2,  6,  8,  14,  //
+      0, 2,  10, 12, 4,  6,  8,  14, /**/ 2,  10, 12, 0,  4,  6,  8,  14,  //
+      0, 10, 12, 2,  4,  6,  8,  14, /**/ 10, 12, 0,  2,  4,  6,  8,  14,  //
+      0, 2,  4,  6,  8,  12, 10, 14, /**/ 2,  4,  6,  8,  12, 0,  10, 14,  //
+      0, 4,  6,  8,  12, 2,  10, 14, /**/ 4,  6,  8,  12, 0,  2,  10, 14,  //
+      0, 2,  6,  8,  12, 4,  10, 14, /**/ 2,  6,  8,  12, 0,  4,  10, 14,  //
+      0, 6,  8,  12, 2,  4,  10, 14, /**/ 6,  8,  12, 0,  2,  4,  10, 14,  //
+      0, 2,  4,  8,  12, 6,  10, 14, /**/ 2,  4,  8,  12, 0,  6,  10, 14,  //
+      0, 4,  8,  12, 2,  6,  10, 14, /**/ 4,  8,  12, 0,  2,  6,  10, 14,  //
+      0, 2,  8,  12, 4,  6,  10, 14, /**/ 2,  8,  12, 0,  4,  6,  10, 14,  //
+      0, 8,  12, 2,  4,  6,  10, 14, /**/ 8,  12, 0,  2,  4,  6,  10, 14,  //
+      0, 2,  4,  6,  12, 8,  10, 14, /**/ 2,  4,  6,  12, 0,  8,  10, 14,  //
+      0, 4,  6,  12, 2,  8,  10, 14, /**/ 4,  6,  12, 0,  2,  8,  10, 14,  //
+      0, 2,  6,  12, 4,  8,  10, 14, /**/ 2,  6,  12, 0,  4,  8,  10, 14,  //
+      0, 6,  12, 2,  4,  8,  10, 14, /**/ 6,  12, 0,  2,  4,  8,  10, 14,  //
+      0, 2,  4,  12, 6,  8,  10, 14, /**/ 2,  4,  12, 0,  6,  8,  10, 14,  //
+      0, 4,  12, 2,  6,  8,  10, 14, /**/ 4,  12, 0,  2,  6,  8,  10, 14,  //
+      0, 2,  12, 4,  6,  8,  10, 14, /**/ 2,  12, 0,  4,  6,  8,  10, 14,  //
+      0, 12, 2,  4,  6,  8,  10, 14, /**/ 12, 0,  2,  4,  6,  8,  10, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 0,  12, 14,  //
+      0, 4,  6,  8,  10, 2,  12, 14, /**/ 4,  6,  8,  10, 0,  2,  12, 14,  //
+      0, 2,  6,  8,  10, 4,  12, 14, /**/ 2,  6,  8,  10, 0,  4,  12, 14,  //
+      0, 6,  8,  10, 2,  4,  12, 14, /**/ 6,  8,  10, 0,  2,  4,  12, 14,  //
+      0, 2,  4,  8,  10, 6,  12, 14, /**/ 2,  4,  8,  10, 0,  6,  12, 14,  //
+      0, 4,  8,  10, 2,  6,  12, 14, /**/ 4,  8,  10, 0,  2,  6,  12, 14,  //
+      0, 2,  8,  10, 4,  6,  12, 14, /**/ 2,  8,  10, 0,  4,  6,  12, 14,  //
+      0, 8,  10, 2,  4,  6,  12, 14, /**/ 8,  10, 0,  2,  4,  6,  12, 14,  //
+      0, 2,  4,  6,  10, 8,  12, 14, /**/ 2,  4,  6,  10, 0,  8,  12, 14,  //
+      0, 4,  6,  10, 2,  8,  12, 14, /**/ 4,  6,  10, 0,  2,  8,  12, 14,  //
+      0, 2,  6,  10, 4,  8,  12, 14, /**/ 2,  6,  10, 0,  4,  8,  12, 14,  //
+      0, 6,  10, 2,  4,  8,  12, 14, /**/ 6,  10, 0,  2,  4,  8,  12, 14,  //
+      0, 2,  4,  10, 6,  8,  12, 14, /**/ 2,  4,  10, 0,  6,  8,  12, 14,  //
+      0, 4,  10, 2,  6,  8,  12, 14, /**/ 4,  10, 0,  2,  6,  8,  12, 14,  //
+      0, 2,  10, 4,  6,  8,  12, 14, /**/ 2,  10, 0,  4,  6,  8,  12, 14,  //
+      0, 10, 2,  4,  6,  8,  12, 14, /**/ 10, 0,  2,  4,  6,  8,  12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  0,  10, 12, 14,  //
+      0, 4,  6,  8,  2,  10, 12, 14, /**/ 4,  6,  8,  0,  2,  10, 12, 14,  //
+      0, 2,  6,  8,  4,  10, 12, 14, /**/ 2,  6,  8,  0,  4,  10, 12, 14,  //
+      0, 6,  8,  2,  4,  10, 12, 14, /**/ 6,  8,  0,  2,  4,  10, 12, 14,  //
+      0, 2,  4,  8,  6,  10, 12, 14, /**/ 2,  4,  8,  0,  6,  10, 12, 14,  //
+      0, 4,  8,  2,  6,  10, 12, 14, /**/ 4,  8,  0,  2,  6,  10, 12, 14,  //
+      0, 2,  8,  4,  6,  10, 12, 14, /**/ 2,  8,  0,  4,  6,  10, 12, 14,  //
+      0, 8,  2,  4,  6,  10, 12, 14, /**/ 8,  0,  2,  4,  6,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  0,  8,  10, 12, 14,  //
+      0, 4,  6,  2,  8,  10, 12, 14, /**/ 4,  6,  0,  2,  8,  10, 12, 14,  //
+      0, 2,  6,  4,  8,  10, 12, 14, /**/ 2,  6,  0,  4,  8,  10, 12, 14,  //
+      0, 6,  2,  4,  8,  10, 12, 14, /**/ 6,  0,  2,  4,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  0,  6,  8,  10, 12, 14,  //
+      0, 4,  2,  6,  8,  10, 12, 14, /**/ 4,  0,  2,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  0,  4,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 0,  2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx = Load8Bytes(d8, table + mask_bits * 8);
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<4> /*tag*/,
+                                    uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[16 * 16] = {
+      // PrintCompress32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,   //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,   //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,   //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(hwy::SizeTag<4> /*tag*/,
+                                       uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[16 * 16] = {
+      // PrintCompressNot32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11, 12, 13,
+      14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+      12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+      12, 13, 14, 15};
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+#if HWY_HAVE_INTEGER64 || HWY_HAVE_FLOAT64
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<8> /*tag*/,
+                                    uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      // PrintCompress64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(hwy::SizeTag<8> /*tag*/,
+                                       uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[4 * 16] = {
+      // PrintCompressNot64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+#endif
+
+// Helper function called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Compress(Vec128<T, N> v, uint64_t mask_bits) {
+  const auto idx =
+      detail::IdxFromBits<T, N>(hwy::SizeTag<sizeof(T)>(), mask_bits);
+  using D = DFromV<decltype(v)>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> CompressNot(Vec128<T, N> v, uint64_t mask_bits) {
+  const auto idx =
+      detail::IdxFromNotBits<T, N>(hwy::SizeTag<sizeof(T)>(), mask_bits);
+  using D = DFromV<decltype(v)>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+}  // namespace detail
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> Compress(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  // If mask[1] = 1 and mask[0] = 0, then swap both halves, else keep.
+  const DFromV<decltype(v)> d;
+  const Vec128<T, N> m = VecFromMask(d, mask);
+  const Vec128<T, N> maskL = DupEven(m);
+  const Vec128<T, N> maskH = DupOdd(m);
+  const Vec128<T, N> swap = AndNot(maskL, maskH);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 byte lanes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  return detail::Compress(v, detail::BitsFromMask(mask));
+}
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> CompressNot(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> CompressNot(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 0 and mask[0] = 1, then swap both halves, else keep.
+  const DFromV<decltype(v)> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskH, maskL);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 byte lanes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> CompressNot(Vec128<T, N> v, Mask128<T, N> mask) {
+  // For partial vectors, we cannot pull the Not() into the table because
+  // BitsFromMask clears the upper bits.
+  if (N < 16 / sizeof(T)) {
+    return detail::Compress(v, detail::BitsFromMask(Not(mask)));
+  }
+  return detail::CompressNot(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+// ------------------------------ CompressBits
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_INLINE Vec128<T, N> CompressBits(Vec128<T, N> v,
+                                     const uint8_t* HWY_RESTRICT bits) {
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (N + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+
+  return detail::Compress(v, mask_bits);
+}
+
+// ------------------------------ CompressStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  StoreU(detail::Compress(v, mask_bits), d, unaligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;  // so we can support fp16/bf16
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  const size_t count = PopCount(mask_bits);
+  const MFromD<D> store_mask = RebindMask(d, FirstN(du, count));
+  const VFromD<decltype(du)> compressed =
+      detail::Compress(BitCast(du, v), mask_bits);
+  BlendedStore(BitCast(d, compressed), store_mask, d, unaligned);
+  return count;
+}
+
+// ------------------------------ CompressBitsStore
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (d.MaxLanes() + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (d.MaxLanes() < 8) {
+    mask_bits &= (1ull << d.MaxLanes()) - 1;
+  }
+
+  StoreU(detail::Compress(v, mask_bits), d, unaligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ LoadInterleaved2
+
+// Per-target flag to prevent generic_ops-inl.h from defining LoadInterleaved2.
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+namespace detail {
+#define HWY_NEON_BUILD_TPL_HWY_LOAD_INT
+#define HWY_NEON_BUILD_ARG_HWY_LOAD_INT from
+
+#if HWY_ARCH_ARM_A64
+#define HWY_IF_LOAD_INT(D) HWY_IF_V_SIZE_GT_D(D, 4)
+#define HWY_NEON_DEF_FUNCTION_LOAD_INT HWY_NEON_DEF_FUNCTION_ALL_TYPES
+#else
+// Exclude 64x2 and f64x1, which are only supported on aarch64
+#define HWY_IF_LOAD_INT(D)                                                 \
+  HWY_IF_V_SIZE_GT_D(D, 4),                                                \
+      hwy::EnableIf<(HWY_MAX_LANES_D(D) == 1 || sizeof(TFromD<D>) < 8)>* = \
+          nullptr
+#define HWY_NEON_DEF_FUNCTION_LOAD_INT(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args)    \
+  HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args)   \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16_32(name, prefix, infix, args)    \
+  HWY_NEON_DEF_FUNCTION(int64, 1, name, prefix, infix, s64, args) \
+  HWY_NEON_DEF_FUNCTION(uint64, 1, name, prefix, infix, u64, args)
+#endif  // HWY_ARCH_ARM_A64
+
+// Must return raw tuple because Tuple2 lack a ctor, and we cannot use
+// brace-initialization in HWY_NEON_DEF_FUNCTION because some functions return
+// void.
+#define HWY_NEON_BUILD_RET_HWY_LOAD_INT(type, size) \
+  decltype(Tuple2<type##_t, size>().raw)
+// Tuple tag arg allows overloading (cannot just overload on return type)
+#define HWY_NEON_BUILD_PARAM_HWY_LOAD_INT(type, size) \
+  const type##_t *from, Tuple2<type##_t, size>
+HWY_NEON_DEF_FUNCTION_LOAD_INT(LoadInterleaved2, vld2, _, HWY_LOAD_INT)
+#undef HWY_NEON_BUILD_RET_HWY_LOAD_INT
+#undef HWY_NEON_BUILD_PARAM_HWY_LOAD_INT
+
+#define HWY_NEON_BUILD_RET_HWY_LOAD_INT(type, size) \
+  decltype(Tuple3<type##_t, size>().raw)
+#define HWY_NEON_BUILD_PARAM_HWY_LOAD_INT(type, size) \
+  const type##_t *from, Tuple3<type##_t, size>
+HWY_NEON_DEF_FUNCTION_LOAD_INT(LoadInterleaved3, vld3, _, HWY_LOAD_INT)
+#undef HWY_NEON_BUILD_PARAM_HWY_LOAD_INT
+#undef HWY_NEON_BUILD_RET_HWY_LOAD_INT
+
+#define HWY_NEON_BUILD_RET_HWY_LOAD_INT(type, size) \
+  decltype(Tuple4<type##_t, size>().raw)
+#define HWY_NEON_BUILD_PARAM_HWY_LOAD_INT(type, size) \
+  const type##_t *from, Tuple4<type##_t, size>
+HWY_NEON_DEF_FUNCTION_LOAD_INT(LoadInterleaved4, vld4, _, HWY_LOAD_INT)
+#undef HWY_NEON_BUILD_PARAM_HWY_LOAD_INT
+#undef HWY_NEON_BUILD_RET_HWY_LOAD_INT
+
+#undef HWY_NEON_DEF_FUNCTION_LOAD_INT
+#undef HWY_NEON_BUILD_TPL_HWY_LOAD_INT
+#undef HWY_NEON_BUILD_ARG_HWY_LOAD_INT
+}  // namespace detail
+
+template <class D, HWY_IF_LOAD_INT(D), typename T = TFromD<D>>
+HWY_API void LoadInterleaved2(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  auto raw =
+      detail::LoadInterleaved2(unaligned, detail::Tuple2<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+}
+
+// <= 32 bits: avoid loading more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void LoadInterleaved2(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  // The smallest vector registers are 64-bits and we want space for two.
+  alignas(16) T buf[2 * 8 / sizeof(T)] = {};
+  CopyBytes<d.MaxBytes() * 2>(unaligned, buf);
+  auto raw = detail::LoadInterleaved2(buf, detail::Tuple2<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void LoadInterleaved2(D d, T* HWY_RESTRICT unaligned, Vec128<T>& v0,
+                              Vec128<T>& v1) {
+  const Half<decltype(d)> dh;
+  VFromD<decltype(dh)> v00, v10, v01, v11;
+  LoadInterleaved2(dh, unaligned, v00, v10);
+  LoadInterleaved2(dh, unaligned + 2, v01, v11);
+  v0 = Combine(d, v01, v00);
+  v1 = Combine(d, v11, v10);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+// ------------------------------ LoadInterleaved3
+
+template <class D, HWY_IF_LOAD_INT(D), typename T = TFromD<D>>
+HWY_API void LoadInterleaved3(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  auto raw =
+      detail::LoadInterleaved3(unaligned, detail::Tuple3<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+  v2 = VFromD<D>(raw.val[2]);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void LoadInterleaved3(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  // The smallest vector registers are 64-bits and we want space for three.
+  alignas(16) T buf[3 * 8 / sizeof(T)] = {};
+  CopyBytes<d.MaxBytes() * 3>(unaligned, buf);
+  auto raw = detail::LoadInterleaved3(buf, detail::Tuple3<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+  v2 = VFromD<D>(raw.val[2]);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              Vec128<T>& v0, Vec128<T>& v1, Vec128<T>& v2) {
+  const Half<decltype(d)> dh;
+  VFromD<decltype(dh)> v00, v10, v20, v01, v11, v21;
+  LoadInterleaved3(dh, unaligned, v00, v10, v20);
+  LoadInterleaved3(dh, unaligned + 3, v01, v11, v21);
+  v0 = Combine(d, v01, v00);
+  v1 = Combine(d, v11, v10);
+  v2 = Combine(d, v21, v20);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+// ------------------------------ LoadInterleaved4
+
+template <class D, HWY_IF_LOAD_INT(D), typename T = TFromD<D>>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  auto raw =
+      detail::LoadInterleaved4(unaligned, detail::Tuple4<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+  v2 = VFromD<D>(raw.val[2]);
+  v3 = VFromD<D>(raw.val[3]);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  alignas(16) T buf[4 * 8 / sizeof(T)] = {};
+  CopyBytes<d.MaxBytes() * 4>(unaligned, buf);
+  auto raw = detail::LoadInterleaved4(buf, detail::Tuple4<T, d.MaxLanes()>());
+  v0 = VFromD<D>(raw.val[0]);
+  v1 = VFromD<D>(raw.val[1]);
+  v2 = VFromD<D>(raw.val[2]);
+  v3 = VFromD<D>(raw.val[3]);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned,
+                              Vec128<T>& v0, Vec128<T>& v1, Vec128<T>& v2,
+                              Vec128<T>& v3) {
+  const Half<decltype(d)> dh;
+  VFromD<decltype(dh)> v00, v10, v20, v30, v01, v11, v21, v31;
+  LoadInterleaved4(dh, unaligned, v00, v10, v20, v30);
+  LoadInterleaved4(dh, unaligned + 4, v01, v11, v21, v31);
+  v0 = Combine(d, v01, v00);
+  v1 = Combine(d, v11, v10);
+  v2 = Combine(d, v21, v20);
+  v3 = Combine(d, v31, v30);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+#undef HWY_IF_LOAD_INT
+
+// ------------------------------ StoreInterleaved2
+
+namespace detail {
+#define HWY_NEON_BUILD_TPL_HWY_STORE_INT
+#define HWY_NEON_BUILD_RET_HWY_STORE_INT(type, size) void
+#define HWY_NEON_BUILD_ARG_HWY_STORE_INT to, tup.raw
+
+#if HWY_ARCH_ARM_A64
+#define HWY_IF_STORE_INT(D) HWY_IF_V_SIZE_GT_D(D, 4)
+#define HWY_NEON_DEF_FUNCTION_STORE_INT HWY_NEON_DEF_FUNCTION_ALL_TYPES
+#else
+// Exclude 64x2 and f64x1, which are only supported on aarch64
+#define HWY_IF_STORE_INT(D)                                                \
+  HWY_IF_V_SIZE_GT_D(D, 4),                                                \
+      hwy::EnableIf<(HWY_MAX_LANES_D(D) == 1 || sizeof(TFromD<D>) < 8)>* = \
+          nullptr
+#define HWY_NEON_DEF_FUNCTION_STORE_INT(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args)     \
+  HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args)    \
+  HWY_NEON_DEF_FUNCTION_FLOAT_16_32(name, prefix, infix, args)     \
+  HWY_NEON_DEF_FUNCTION(int64, 1, name, prefix, infix, s64, args)  \
+  HWY_NEON_DEF_FUNCTION(uint64, 1, name, prefix, infix, u64, args)
+#endif  // HWY_ARCH_ARM_A64
+
+#define HWY_NEON_BUILD_PARAM_HWY_STORE_INT(type, size) \
+  Tuple2<type##_t, size> tup, type##_t *to
+HWY_NEON_DEF_FUNCTION_STORE_INT(StoreInterleaved2, vst2, _, HWY_STORE_INT)
+#undef HWY_NEON_BUILD_PARAM_HWY_STORE_INT
+
+#define HWY_NEON_BUILD_PARAM_HWY_STORE_INT(type, size) \
+  Tuple3<type##_t, size> tup, type##_t *to
+HWY_NEON_DEF_FUNCTION_STORE_INT(StoreInterleaved3, vst3, _, HWY_STORE_INT)
+#undef HWY_NEON_BUILD_PARAM_HWY_STORE_INT
+
+#define HWY_NEON_BUILD_PARAM_HWY_STORE_INT(type, size) \
+  Tuple4<type##_t, size> tup, type##_t *to
+HWY_NEON_DEF_FUNCTION_STORE_INT(StoreInterleaved4, vst4, _, HWY_STORE_INT)
+#undef HWY_NEON_BUILD_PARAM_HWY_STORE_INT
+
+#undef HWY_NEON_DEF_FUNCTION_STORE_INT
+#undef HWY_NEON_BUILD_TPL_HWY_STORE_INT
+#undef HWY_NEON_BUILD_RET_HWY_STORE_INT
+#undef HWY_NEON_BUILD_ARG_HWY_STORE_INT
+}  // namespace detail
+
+template <class D, HWY_IF_STORE_INT(D), typename T = TFromD<D>>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  detail::Tuple2<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw}}};
+  detail::StoreInterleaved2(tup, unaligned);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  alignas(16) T buf[2 * 8 / sizeof(T)];
+  detail::Tuple2<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw}}};
+  detail::StoreInterleaved2(tup, buf);
+  CopyBytes<d.MaxBytes() * 2>(buf, unaligned);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void StoreInterleaved2(Vec128<T> v0, Vec128<T> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  StoreInterleaved2(LowerHalf(dh, v0), LowerHalf(dh, v1), dh, unaligned);
+  StoreInterleaved2(UpperHalf(dh, v0), UpperHalf(dh, v1), dh, unaligned + 2);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+// ------------------------------ StoreInterleaved3
+
+template <class D, HWY_IF_STORE_INT(D), typename T = TFromD<D>>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               T* HWY_RESTRICT unaligned) {
+  detail::Tuple3<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw, v2.raw}}};
+  detail::StoreInterleaved3(tup, unaligned);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               T* HWY_RESTRICT unaligned) {
+  alignas(16) T buf[3 * 8 / sizeof(T)];
+  detail::Tuple3<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw, v2.raw}}};
+  detail::StoreInterleaved3(tup, buf);
+  CopyBytes<d.MaxBytes() * 3>(buf, unaligned);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void StoreInterleaved3(Vec128<T> v0, Vec128<T> v1, Vec128<T> v2, D d,
+                               T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  StoreInterleaved3(LowerHalf(dh, v0), LowerHalf(dh, v1), LowerHalf(dh, v2), dh,
+                    unaligned);
+  StoreInterleaved3(UpperHalf(dh, v0), UpperHalf(dh, v1), UpperHalf(dh, v2), dh,
+                    unaligned + 3);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+// ------------------------------ StoreInterleaved4
+
+template <class D, HWY_IF_STORE_INT(D), typename T = TFromD<D>>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d, T* HWY_RESTRICT unaligned) {
+  detail::Tuple4<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw, v2.raw, v3.raw}}};
+  detail::StoreInterleaved4(tup, unaligned);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), typename T = TFromD<D>>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d, T* HWY_RESTRICT unaligned) {
+  alignas(16) T buf[4 * 8 / sizeof(T)];
+  detail::Tuple4<T, d.MaxLanes()> tup = {{{v0.raw, v1.raw, v2.raw, v3.raw}}};
+  detail::StoreInterleaved4(tup, buf);
+  CopyBytes<d.MaxBytes() * 4>(buf, unaligned);
+}
+
+#if HWY_ARCH_ARM_V7
+// 64x2: split into two 64x1
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API void StoreInterleaved4(Vec128<T> v0, Vec128<T> v1, Vec128<T> v2,
+                               Vec128<T> v3, D d, T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  StoreInterleaved4(LowerHalf(dh, v0), LowerHalf(dh, v1), LowerHalf(dh, v2),
+                    LowerHalf(dh, v3), dh, unaligned);
+  StoreInterleaved4(UpperHalf(dh, v0), UpperHalf(dh, v1), UpperHalf(dh, v2),
+                    UpperHalf(dh, v3), dh, unaligned + 4);
+}
+#endif  // HWY_ARCH_ARM_V7
+
+#undef HWY_IF_STORE_INT
+
+// ------------------------------ Additional mask logical operations
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrAfterFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetAtOrAfterFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const auto vmask = VecFromMask(d, mask);
+  return MaskFromVec(Or(vmask, InterleaveLower(vmask, vmask)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const auto vmask = VecFromMask(d, mask);
+  const auto neg_vmask =
+      ResizeBitCast(d, Neg(ResizeBitCast(Full64<int64_t>(), vmask)));
+  return MaskFromVec(Or(vmask, neg_vmask));
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetAtOrAfterFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const Repartition<int64_t, decltype(d)> di64;
+
+  auto vmask = BitCast(di64, VecFromMask(d, mask));
+  vmask = Or(vmask, Neg(vmask));
+
+  // Copy the sign bit of the first int64_t lane to the second int64_t lane
+  const auto vmask2 = BroadcastSignBit(InterleaveLower(Zero(di64), vmask));
+  return MaskFromVec(BitCast(d, Or(vmask, vmask2)));
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetOnlyFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetOnlyFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = BitCast(di, VecFromMask(d, mask));
+  const auto zero = Zero(di);
+  const auto vmask2 = VecFromMask(di, InterleaveLower(zero, vmask) == zero);
+  return MaskFromVec(BitCast(d, And(vmask, vmask2)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = ResizeBitCast(Full64<int64_t>(), VecFromMask(d, mask));
+  const auto only_first_vmask =
+      BitCast(d, Neg(ResizeBitCast(di, And(vmask, Neg(vmask)))));
+  return MaskFromVec(only_first_vmask);
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetOnlyFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int64_t, decltype(d)> di64;
+
+  const auto zero = Zero(di64);
+  const auto vmask = BitCast(di64, VecFromMask(d, mask));
+  const auto vmask2 = VecFromMask(di64, InterleaveLower(zero, vmask) == zero);
+  const auto only_first_vmask = Neg(BitCast(di, And(vmask, Neg(vmask))));
+  return MaskFromVec(BitCast(d, And(only_first_vmask, BitCast(di, vmask2))));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrBeforeFirst(Mask128<T, 1> /*mask*/) {
+  const FixedTag<T, 1> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = MakeSigned<T>;
+
+  return RebindMask(d, MaskFromVec(Set(di, TI(-1))));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 1)>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  return SetBeforeFirst(MaskFromVec(ShiftLeftLanes<1>(VecFromMask(d, mask))));
+}
+
+// ------------------------------ Lt128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Lt128(D d, VFromD<D> a, VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "T must be u64");
+  // Truth table of Eq and Lt for Hi and Lo u64.
+  // (removed lines with (=H && cH) or (=L && cL) - cannot both be true)
+  // =H =L cH cL  | out = cH | (=H & cL)
+  //  0  0  0  0  |  0
+  //  0  0  0  1  |  0
+  //  0  0  1  0  |  1
+  //  0  0  1  1  |  1
+  //  0  1  0  0  |  0
+  //  0  1  0  1  |  0
+  //  0  1  1  0  |  1
+  //  1  0  0  0  |  0
+  //  1  0  0  1  |  1
+  //  1  1  0  0  |  0
+  const MFromD<D> eqHL = Eq(a, b);
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  // We need to bring cL to the upper lane/bit corresponding to cH. Comparing
+  // the result of InterleaveUpper/Lower requires 9 ops, whereas shifting the
+  // comparison result leftwards requires only 4. IfThenElse compiles to the
+  // same code as OrAnd().
+  const VFromD<D> ltLx = DupEven(ltHL);
+  const VFromD<D> outHx = IfThenElse(eqHL, ltLx, ltHL);
+  return MaskFromVec(DupOdd(outHx));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Lt128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  return MaskFromVec(InterleaveUpper(d, ltHL, ltHL));
+}
+
+// ------------------------------ Eq128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Eq128(D d, VFromD<D> a, VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "T must be u64");
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  return MaskFromVec(And(Reverse2(d, eqHL), eqHL));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Eq128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  return MaskFromVec(InterleaveUpper(d, eqHL, eqHL));
+}
+
+// ------------------------------ Ne128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Ne128(D d, VFromD<D> a, VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "T must be u64");
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  return MaskFromVec(Or(Reverse2(d, neHL), neHL));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Ne128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  return MaskFromVec(InterleaveUpper(d, neHL, neHL));
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+// Without a native OddEven, it seems infeasible to go faster than Lt128.
+template <class D>
+HWY_INLINE VFromD<D> Min128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128(d, b, a), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Min128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, b, a), a, b);
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#ifdef HWY_NATIVE_LEADING_ZERO_COUNT
+#undef HWY_NATIVE_LEADING_ZERO_COUNT
+#else
+#define HWY_NATIVE_LEADING_ZERO_COUNT
+#endif
+
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(LeadingZeroCount, vclz, _, 1)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(LeadingZeroCount, vclz, _, 1)
+
+template <class V, HWY_IF_UI64_D(DFromV<V>)>
+HWY_API V LeadingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint32_t, decltype(d)> du32;
+
+  const auto v_k32 = BitCast(du32, Set(du, 32));
+  const auto v_u32_lzcnt = LeadingZeroCount(BitCast(du32, v)) + v_k32;
+  const auto v_u32_lo_lzcnt =
+      And(v_u32_lzcnt, BitCast(du32, Set(du, 0xFFFFFFFFu)));
+  const auto v_u32_hi_lzcnt =
+      BitCast(du32, ShiftRight<32>(BitCast(du, v_u32_lzcnt)));
+
+  return BitCast(
+      d, IfThenElse(v_u32_hi_lzcnt == v_k32, v_u32_lo_lzcnt, v_u32_hi_lzcnt));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  using T = TFromD<decltype(d)>;
+  return BitCast(d, Set(d, T{sizeof(T) * 8 - 1}) - LeadingZeroCount(v));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V), HWY_IF_T_SIZE_V(V, 1)>
+HWY_API V TrailingZeroCount(V v) {
+  return LeadingZeroCount(ReverseBits(v));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API V TrailingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return LeadingZeroCount(
+      ReverseLaneBytes(BitCast(d, ReverseBits(BitCast(du8, v)))));
+}
+
+namespace detail {  // for code folding
+#if HWY_ARCH_ARM_V7
+#undef vuzp1_s8
+#undef vuzp1_u8
+#undef vuzp1_s16
+#undef vuzp1_u16
+#undef vuzp1_s32
+#undef vuzp1_u32
+#undef vuzp1_f32
+#undef vuzp1q_s8
+#undef vuzp1q_u8
+#undef vuzp1q_s16
+#undef vuzp1q_u16
+#undef vuzp1q_s32
+#undef vuzp1q_u32
+#undef vuzp1q_f32
+#undef vuzp2_s8
+#undef vuzp2_u8
+#undef vuzp2_s16
+#undef vuzp2_u16
+#undef vuzp2_s32
+#undef vuzp2_u32
+#undef vuzp2_f32
+#undef vuzp2q_s8
+#undef vuzp2q_u8
+#undef vuzp2q_s16
+#undef vuzp2q_u16
+#undef vuzp2q_s32
+#undef vuzp2q_u32
+#undef vuzp2q_f32
+#undef vzip1_s8
+#undef vzip1_u8
+#undef vzip1_s16
+#undef vzip1_u16
+#undef vzip1_s32
+#undef vzip1_u32
+#undef vzip1_f32
+#undef vzip1q_s8
+#undef vzip1q_u8
+#undef vzip1q_s16
+#undef vzip1q_u16
+#undef vzip1q_s32
+#undef vzip1q_u32
+#undef vzip1q_f32
+#undef vzip2_s8
+#undef vzip2_u8
+#undef vzip2_s16
+#undef vzip2_u16
+#undef vzip2_s32
+#undef vzip2_u32
+#undef vzip2_f32
+#undef vzip2q_s8
+#undef vzip2q_u8
+#undef vzip2q_s16
+#undef vzip2q_u16
+#undef vzip2q_s32
+#undef vzip2q_u32
+#undef vzip2q_f32
+#endif
+
+#undef HWY_NEON_BUILD_ARG_1
+#undef HWY_NEON_BUILD_ARG_2
+#undef HWY_NEON_BUILD_ARG_3
+#undef HWY_NEON_BUILD_PARAM_1
+#undef HWY_NEON_BUILD_PARAM_2
+#undef HWY_NEON_BUILD_PARAM_3
+#undef HWY_NEON_BUILD_RET_1
+#undef HWY_NEON_BUILD_RET_2
+#undef HWY_NEON_BUILD_RET_3
+#undef HWY_NEON_BUILD_TPL_1
+#undef HWY_NEON_BUILD_TPL_2
+#undef HWY_NEON_BUILD_TPL_3
+#undef HWY_NEON_DEF_FUNCTION
+#undef HWY_NEON_DEF_FUNCTION_ALL_FLOATS
+#undef HWY_NEON_DEF_FUNCTION_ALL_TYPES
+#undef HWY_NEON_DEF_FUNCTION_BFLOAT_16
+#undef HWY_NEON_DEF_FUNCTION_FLOAT_16
+#undef HWY_NEON_DEF_FUNCTION_FLOAT_16_32
+#undef HWY_NEON_DEF_FUNCTION_FLOAT_32
+#undef HWY_NEON_DEF_FUNCTION_FLOAT_64
+#undef HWY_NEON_DEF_FUNCTION_FULL_UI
+#undef HWY_NEON_DEF_FUNCTION_FULL_UI_64
+#undef HWY_NEON_DEF_FUNCTION_FULL_UIF_64
+#undef HWY_NEON_DEF_FUNCTION_INT_16
+#undef HWY_NEON_DEF_FUNCTION_INT_32
+#undef HWY_NEON_DEF_FUNCTION_INT_64
+#undef HWY_NEON_DEF_FUNCTION_INT_8
+#undef HWY_NEON_DEF_FUNCTION_INT_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_INTS
+#undef HWY_NEON_DEF_FUNCTION_INTS_UINTS
+#undef HWY_NEON_DEF_FUNCTION_UI_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_UIF_64
+#undef HWY_NEON_DEF_FUNCTION_UIF_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_UINT_16
+#undef HWY_NEON_DEF_FUNCTION_UINT_32
+#undef HWY_NEON_DEF_FUNCTION_UINT_64
+#undef HWY_NEON_DEF_FUNCTION_UINT_8
+#undef HWY_NEON_DEF_FUNCTION_UINT_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_UINTS
+#undef HWY_NEON_EVAL
+
+}  // namespace detail
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_sve-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_sve-inl.h
new file mode 100644
index 0000000000..944c5df7b0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/arm_sve-inl.h
@@ -0,0 +1,5050 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Arm SVE[2] vectors (length not known at compile time).
+// External include guard in highway.h - see comment there.
+
+#include <arm_sve.h>
+
+#include "hwy/ops/shared-inl.h"
+
+// Arm C215 declares that SVE vector lengths will always be a power of two.
+// We default to relying on this, which makes some operations more efficient.
+// You can still opt into fixups by setting this to 0 (unsupported).
+#ifndef HWY_SVE_IS_POW2
+#define HWY_SVE_IS_POW2 1
+#endif
+
+#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
+#define HWY_SVE_HAVE_2 1
+#else
+#define HWY_SVE_HAVE_2 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <class V>
+struct DFromV_t {};  // specialized in macros
+template <class V>
+using DFromV = typename DFromV_t<RemoveConst<V>>::type;
+
+template <class V>
+using TFromV = TFromD<DFromV<V>>;
+
+// ================================================== MACROS
+
+// Generate specializations and function definitions using X macros. Although
+// harder to read and debug, writing everything manually is too bulky.
+
+namespace detail {  // for code folding
+
+// Args: BASE, CHAR, BITS, HALF, NAME, OP
+
+// Unsigned:
+#define HWY_SVE_FOREACH_U08(X_MACRO, NAME, OP) X_MACRO(uint, u, 8, 8, NAME, OP)
+#define HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP) X_MACRO(uint, u, 16, 8, NAME, OP)
+#define HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP) \
+  X_MACRO(uint, u, 32, 16, NAME, OP)
+#define HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP) \
+  X_MACRO(uint, u, 64, 32, NAME, OP)
+
+// Signed:
+#define HWY_SVE_FOREACH_I08(X_MACRO, NAME, OP) X_MACRO(int, s, 8, 8, NAME, OP)
+#define HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP) X_MACRO(int, s, 16, 8, NAME, OP)
+#define HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP) X_MACRO(int, s, 32, 16, NAME, OP)
+#define HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP) X_MACRO(int, s, 64, 32, NAME, OP)
+
+// Float:
+#define HWY_SVE_FOREACH_F16(X_MACRO, NAME, OP) \
+  X_MACRO(float, f, 16, 16, NAME, OP)
+#define HWY_SVE_FOREACH_F32(X_MACRO, NAME, OP) \
+  X_MACRO(float, f, 32, 16, NAME, OP)
+#define HWY_SVE_FOREACH_F64(X_MACRO, NAME, OP) \
+  X_MACRO(float, f, 64, 32, NAME, OP)
+
+#if HWY_SVE_HAVE_BFLOAT16
+#define HWY_SVE_FOREACH_BF16(X_MACRO, NAME, OP) \
+  X_MACRO(bfloat, bf, 16, 16, NAME, OP)
+#else
+#define HWY_SVE_FOREACH_BF16(X_MACRO, NAME, OP)
+#endif
+
+// For all element sizes:
+#define HWY_SVE_FOREACH_U(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U08(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_I(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_I08(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP)
+
+// HWY_SVE_FOREACH_F does not include HWY_SVE_FOREACH_BF16 because SVE lacks
+// bf16 overloads for some intrinsics (especially less-common arithmetic).
+#define HWY_SVE_FOREACH_F(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_F16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F64(X_MACRO, NAME, OP)
+
+// Commonly used type categories for a given element size:
+#define HWY_SVE_FOREACH_UI08(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U08(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I08(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UI16(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UI32(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UI64(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UIF3264(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_UI32(X_MACRO, NAME, OP)          \
+  HWY_SVE_FOREACH_UI64(X_MACRO, NAME, OP)          \
+  HWY_SVE_FOREACH_F32(X_MACRO, NAME, OP)           \
+  HWY_SVE_FOREACH_F64(X_MACRO, NAME, OP)
+
+// Commonly used type categories:
+#define HWY_SVE_FOREACH_UI(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_IF(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_I(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_F(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F(X_MACRO, NAME, OP)
+
+// Assemble types for use in x-macros
+#define HWY_SVE_T(BASE, BITS) BASE##BITS##_t
+#define HWY_SVE_D(BASE, BITS, N, POW2) Simd<HWY_SVE_T(BASE, BITS), N, POW2>
+#define HWY_SVE_V(BASE, BITS) sv##BASE##BITS##_t
+#define HWY_SVE_TUPLE(BASE, BITS, MUL) sv##BASE##BITS##x##MUL##_t
+
+}  // namespace detail
+
+#define HWY_SPECIALIZE(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <>                                            \
+  struct DFromV_t<HWY_SVE_V(BASE, BITS)> {               \
+    using type = ScalableTag<HWY_SVE_T(BASE, BITS)>;     \
+  };
+
+HWY_SVE_FOREACH(HWY_SPECIALIZE, _, _)
+HWY_SVE_FOREACH_BF16(HWY_SPECIALIZE, _, _)
+#undef HWY_SPECIALIZE
+
+// Note: _x (don't-care value for inactive lanes) avoids additional MOVPRFX
+// instructions, and we anyway only use it when the predicate is ptrue.
+
+// vector = f(vector), e.g. Not
+#define HWY_SVE_RETV_ARGPV(BASE, CHAR, BITS, HALF, NAME, OP)    \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v);   \
+  }
+#define HWY_SVE_RETV_ARGV(BASE, CHAR, BITS, HALF, NAME, OP)     \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return sv##OP##_##CHAR##BITS(v);                            \
+  }
+
+// vector = f(vector, scalar), e.g. detail::AddN
+#define HWY_SVE_RETV_ARGPVN(BASE, CHAR, BITS, HALF, NAME, OP)    \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_T(BASE, BITS) b) {   \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), a, b); \
+  }
+#define HWY_SVE_RETV_ARGVN(BASE, CHAR, BITS, HALF, NAME, OP)   \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_T(BASE, BITS) b) { \
+    return sv##OP##_##CHAR##BITS(a, b);                        \
+  }
+
+// vector = f(vector, vector), e.g. Add
+#define HWY_SVE_RETV_ARGPVV(BASE, CHAR, BITS, HALF, NAME, OP)    \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) {   \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), a, b); \
+  }
+#define HWY_SVE_RETV_ARGVV(BASE, CHAR, BITS, HALF, NAME, OP)   \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \
+    return sv##OP##_##CHAR##BITS(a, b);                        \
+  }
+
+#define HWY_SVE_RETV_ARGVVV(BASE, CHAR, BITS, HALF, NAME, OP) \
+  HWY_API HWY_SVE_V(BASE, BITS)                               \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b,  \
+           HWY_SVE_V(BASE, BITS) c) {                         \
+    return sv##OP##_##CHAR##BITS(a, b, c);                    \
+  }
+
+// ------------------------------ Lanes
+
+namespace detail {
+
+// Returns actual lanes of a hardware vector without rounding to a power of two.
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE size_t AllHardwareLanes() {
+  return svcntb_pat(SV_ALL);
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE size_t AllHardwareLanes() {
+  return svcnth_pat(SV_ALL);
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE size_t AllHardwareLanes() {
+  return svcntw_pat(SV_ALL);
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE size_t AllHardwareLanes() {
+  return svcntd_pat(SV_ALL);
+}
+
+// All-true mask from a macro
+
+#if HWY_SVE_IS_POW2
+#define HWY_SVE_ALL_PTRUE(BITS) svptrue_b##BITS()
+#define HWY_SVE_PTRUE(BITS) svptrue_b##BITS()
+#else
+#define HWY_SVE_ALL_PTRUE(BITS) svptrue_pat_b##BITS(SV_ALL)
+#define HWY_SVE_PTRUE(BITS) svptrue_pat_b##BITS(SV_POW2)
+#endif  // HWY_SVE_IS_POW2
+
+}  // namespace detail
+
+#if HWY_HAVE_SCALABLE
+
+// Returns actual number of lanes after capping by N and shifting. May return 0
+// (e.g. for "1/8th" of a u32x4 - would be 1 for 1/8th of u32x8).
+template <typename T, size_t N, int kPow2>
+HWY_API size_t Lanes(Simd<T, N, kPow2> d) {
+  const size_t actual = detail::AllHardwareLanes<T>();
+  constexpr size_t kMaxLanes = MaxLanes(d);
+  constexpr int kClampedPow2 = HWY_MIN(kPow2, 0);
+  // Common case of full vectors: avoid any extra instructions.
+  if (detail::IsFull(d)) return actual;
+  return HWY_MIN(detail::ScaleByPower(actual, kClampedPow2), kMaxLanes);
+}
+
+#endif  // HWY_HAVE_SCALABLE
+
+// ================================================== MASK INIT
+
+// One mask bit per byte; only the one belonging to the lowest byte is valid.
+
+// ------------------------------ FirstN
+#define HWY_SVE_FIRSTN(BASE, CHAR, BITS, HALF, NAME, OP)                       \
+  template <size_t N, int kPow2>                                               \
+  HWY_API svbool_t NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d, size_t count) {     \
+    const size_t limit = detail::IsFull(d) ? count : HWY_MIN(Lanes(d), count); \
+    return sv##OP##_b##BITS##_u32(uint32_t{0}, static_cast<uint32_t>(limit));  \
+  }
+HWY_SVE_FOREACH(HWY_SVE_FIRSTN, FirstN, whilelt)
+HWY_SVE_FOREACH_BF16(HWY_SVE_FIRSTN, FirstN, whilelt)
+
+#undef HWY_SVE_FIRSTN
+
+template <class D>
+using MFromD = decltype(FirstN(D(), 0));
+
+#if !HWY_HAVE_FLOAT16
+template <class D, HWY_IF_F16_D(D)>
+MFromD<RebindToUnsigned<D>> FirstN(D /* tag */, size_t count) {
+  return FirstN(RebindToUnsigned<D>(), count);
+}
+#endif  // !HWY_HAVE_FLOAT16
+
+#if !HWY_SVE_HAVE_BFLOAT16
+template <class D, HWY_IF_BF16_D(D)>
+MFromD<RebindToUnsigned<D>> FirstN(D /* tag */, size_t count) {
+  return FirstN(RebindToUnsigned<D>(), count);
+}
+#endif  // !HWY_SVE_HAVE_BFLOAT16
+
+namespace detail {
+
+#define HWY_SVE_WRAP_PTRUE(BASE, CHAR, BITS, HALF, NAME, OP)            \
+  template <size_t N, int kPow2>                                        \
+  HWY_API svbool_t NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */) {      \
+    return HWY_SVE_PTRUE(BITS);                                         \
+  }                                                                     \
+  template <size_t N, int kPow2>                                        \
+  HWY_API svbool_t All##NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */) { \
+    return HWY_SVE_ALL_PTRUE(BITS);                                     \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_WRAP_PTRUE, PTrue, ptrue)  // return all-true
+HWY_SVE_FOREACH_BF16(HWY_SVE_WRAP_PTRUE, PTrue, ptrue)
+#undef HWY_SVE_WRAP_PTRUE
+
+HWY_API svbool_t PFalse() { return svpfalse_b(); }
+
+// Returns all-true if d is HWY_FULL or FirstN(N) after capping N.
+//
+// This is used in functions that load/store memory; other functions (e.g.
+// arithmetic) can ignore d and use PTrue instead.
+template <class D>
+svbool_t MakeMask(D d) {
+  return IsFull(d) ? PTrue(d) : FirstN(d, Lanes(d));
+}
+
+}  // namespace detail
+
+// ================================================== INIT
+
+// ------------------------------ Set
+// vector = f(d, scalar), e.g. Set
+#define HWY_SVE_SET(BASE, CHAR, BITS, HALF, NAME, OP)                         \
+  template <size_t N, int kPow2>                                              \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, \
+                                     HWY_SVE_T(BASE, BITS) arg) {             \
+    return sv##OP##_##CHAR##BITS(arg);                                        \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_SET, Set, dup_n)
+HWY_SVE_FOREACH_BF16(HWY_SVE_SET, Set, dup_n)
+#if !HWY_SVE_HAVE_BFLOAT16
+// Required for Zero and VFromD
+template <size_t N, int kPow2>
+svuint16_t Set(Simd<bfloat16_t, N, kPow2> d, bfloat16_t arg) {
+  return Set(RebindToUnsigned<decltype(d)>(), arg.bits);
+}
+#endif  // HWY_SVE_HAVE_BFLOAT16
+#undef HWY_SVE_SET
+
+template <class D>
+using VFromD = decltype(Set(D(), TFromD<D>()));
+
+using VBF16 = VFromD<ScalableTag<bfloat16_t>>;
+
+// ------------------------------ Zero
+
+template <class D>
+VFromD<D> Zero(D d) {
+  // Cast to support bfloat16_t.
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Set(du, 0));
+}
+
+// ------------------------------ Undefined
+
+#define HWY_SVE_UNDEFINED(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <size_t N, int kPow2>                            \
+  HWY_API HWY_SVE_V(BASE, BITS)                             \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */) {       \
+    return sv##OP##_##CHAR##BITS();                         \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_UNDEFINED, Undefined, undef)
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// u8: no change
+#define HWY_SVE_CAST_NOP(BASE, CHAR, BITS, HALF, NAME, OP)                \
+  HWY_API HWY_SVE_V(BASE, BITS) BitCastToByte(HWY_SVE_V(BASE, BITS) v) {  \
+    return v;                                                             \
+  }                                                                       \
+  template <size_t N, int kPow2>                                          \
+  HWY_API HWY_SVE_V(BASE, BITS) BitCastFromByte(                          \
+      HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(BASE, BITS) v) { \
+    return v;                                                             \
+  }
+
+// All other types
+#define HWY_SVE_CAST(BASE, CHAR, BITS, HALF, NAME, OP)                        \
+  HWY_INLINE svuint8_t BitCastToByte(HWY_SVE_V(BASE, BITS) v) {               \
+    return sv##OP##_u8_##CHAR##BITS(v);                                       \
+  }                                                                           \
+  template <size_t N, int kPow2>                                              \
+  HWY_INLINE HWY_SVE_V(BASE, BITS)                                            \
+      BitCastFromByte(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, svuint8_t v) { \
+    return sv##OP##_##CHAR##BITS##_u8(v);                                     \
+  }
+
+HWY_SVE_FOREACH_U08(HWY_SVE_CAST_NOP, _, _)
+HWY_SVE_FOREACH_I08(HWY_SVE_CAST, _, reinterpret)
+HWY_SVE_FOREACH_UI16(HWY_SVE_CAST, _, reinterpret)
+HWY_SVE_FOREACH_UI32(HWY_SVE_CAST, _, reinterpret)
+HWY_SVE_FOREACH_UI64(HWY_SVE_CAST, _, reinterpret)
+HWY_SVE_FOREACH_F(HWY_SVE_CAST, _, reinterpret)
+HWY_SVE_FOREACH_BF16(HWY_SVE_CAST, _, reinterpret)
+
+#undef HWY_SVE_CAST_NOP
+#undef HWY_SVE_CAST
+
+#if !HWY_SVE_HAVE_BFLOAT16
+template <size_t N, int kPow2>
+HWY_INLINE VBF16 BitCastFromByte(Simd<bfloat16_t, N, kPow2> /* d */,
+                                 svuint8_t v) {
+  return BitCastFromByte(Simd<uint16_t, N, kPow2>(), v);
+}
+#endif  // !HWY_SVE_HAVE_BFLOAT16
+
+}  // namespace detail
+
+template <class D, class FromV>
+HWY_API VFromD<D> BitCast(D d, FromV v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Tuple
+
+// tuples = f(d, v..), e.g. Create2
+#define HWY_SVE_CREATE(BASE, CHAR, BITS, HALF, NAME, OP)                 \
+  template <size_t N, int kPow2>                                         \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 2)                                   \
+      NAME##2(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */,                   \
+              HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1) {      \
+    return sv##OP##2_##CHAR##BITS(v0, v1);                               \
+  }                                                                      \
+  template <size_t N, int kPow2>                                         \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 3) NAME##3(                          \
+      HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(BASE, BITS) v0, \
+      HWY_SVE_V(BASE, BITS) v1, HWY_SVE_V(BASE, BITS) v2) {              \
+    return sv##OP##3_##CHAR##BITS(v0, v1, v2);                           \
+  }                                                                      \
+  template <size_t N, int kPow2>                                         \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 4)                                   \
+      NAME##4(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */,                   \
+              HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1,        \
+              HWY_SVE_V(BASE, BITS) v2, HWY_SVE_V(BASE, BITS) v3) {      \
+    return sv##OP##4_##CHAR##BITS(v0, v1, v2, v3);                       \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_CREATE, Create, create)
+HWY_SVE_FOREACH_BF16(HWY_SVE_CREATE, Create, create)
+#undef HWY_SVE_CREATE
+
+template <class D>
+using Vec2 = decltype(Create2(D(), Zero(D()), Zero(D())));
+template <class D>
+using Vec3 = decltype(Create3(D(), Zero(D()), Zero(D()), Zero(D())));
+template <class D>
+using Vec4 = decltype(Create4(D(), Zero(D()), Zero(D()), Zero(D()), Zero(D())));
+
+#define HWY_SVE_GET(BASE, CHAR, BITS, HALF, NAME, OP)                         \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME##2(HWY_SVE_TUPLE(BASE, BITS, 2) tuple) { \
+    return sv##OP##2_##CHAR##BITS(tuple, kIndex);                             \
+  }                                                                           \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME##3(HWY_SVE_TUPLE(BASE, BITS, 3) tuple) { \
+    return sv##OP##3_##CHAR##BITS(tuple, kIndex);                             \
+  }                                                                           \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME##4(HWY_SVE_TUPLE(BASE, BITS, 4) tuple) { \
+    return sv##OP##4_##CHAR##BITS(tuple, kIndex);                             \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_GET, Get, get)
+HWY_SVE_FOREACH_BF16(HWY_SVE_GET, Get, get)
+#undef HWY_SVE_GET
+
+#define HWY_SVE_SET(BASE, CHAR, BITS, HALF, NAME, OP)                          \
+  template <size_t kIndex>                                                     \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 2)                                         \
+      NAME##2(HWY_SVE_TUPLE(BASE, BITS, 2) tuple, HWY_SVE_V(BASE, BITS) vec) { \
+    return sv##OP##2_##CHAR##BITS(tuple, kIndex, vec);                         \
+  }                                                                            \
+  template <size_t kIndex>                                                     \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 3)                                         \
+      NAME##3(HWY_SVE_TUPLE(BASE, BITS, 3) tuple, HWY_SVE_V(BASE, BITS) vec) { \
+    return sv##OP##3_##CHAR##BITS(tuple, kIndex, vec);                         \
+  }                                                                            \
+  template <size_t kIndex>                                                     \
+  HWY_API HWY_SVE_TUPLE(BASE, BITS, 4)                                         \
+      NAME##4(HWY_SVE_TUPLE(BASE, BITS, 4) tuple, HWY_SVE_V(BASE, BITS) vec) { \
+    return sv##OP##4_##CHAR##BITS(tuple, kIndex, vec);                         \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_SET, Set, set)
+HWY_SVE_FOREACH_BF16(HWY_SVE_SET, Set, set)
+#undef HWY_SVE_SET
+
+// ------------------------------ ResizeBitCast
+
+// Same as BitCast on SVE
+template <class D, class FromV>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, v);
+}
+
+// ================================================== LOGICAL
+
+// detail::*N() functions accept a scalar argument to avoid extra Set().
+
+// ------------------------------ Not
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPV, Not, not )  // NOLINT
+
+// ------------------------------ And
+
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN, AndN, and_n)
+}  // namespace detail
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV, And, and)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V And(const V a, const V b) {
+  const DFromV<V> df;
+  const RebindToUnsigned<decltype(df)> du;
+  return BitCast(df, And(BitCast(du, a), BitCast(du, b)));
+}
+
+// ------------------------------ Or
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV, Or, orr)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Or(const V a, const V b) {
+  const DFromV<V> df;
+  const RebindToUnsigned<decltype(df)> du;
+  return BitCast(df, Or(BitCast(du, a), BitCast(du, b)));
+}
+
+// ------------------------------ Xor
+
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN, XorN, eor_n)
+}  // namespace detail
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV, Xor, eor)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Xor(const V a, const V b) {
+  const DFromV<V> df;
+  const RebindToUnsigned<decltype(df)> du;
+  return BitCast(df, Xor(BitCast(du, a), BitCast(du, b)));
+}
+
+// ------------------------------ AndNot
+
+namespace detail {
+#define HWY_SVE_RETV_ARGPVN_SWAP(BASE, CHAR, BITS, HALF, NAME, OP) \
+  HWY_API HWY_SVE_V(BASE, BITS)                                    \
+      NAME(HWY_SVE_T(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) {     \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), b, a);   \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN_SWAP, AndNotN, bic_n)
+#undef HWY_SVE_RETV_ARGPVN_SWAP
+}  // namespace detail
+
+#define HWY_SVE_RETV_ARGPVV_SWAP(BASE, CHAR, BITS, HALF, NAME, OP) \
+  HWY_API HWY_SVE_V(BASE, BITS)                                    \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) {     \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), b, a);   \
+  }
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV_SWAP, AndNot, bic)
+#undef HWY_SVE_RETV_ARGPVV_SWAP
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V AndNot(const V a, const V b) {
+  const DFromV<V> df;
+  const RebindToUnsigned<decltype(df)> du;
+  return BitCast(df, AndNot(BitCast(du, a), BitCast(du, b)));
+}
+
+// ------------------------------ Xor3
+
+#if HWY_SVE_HAVE_2
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVVV, Xor3, eor3)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Xor3(const V x1, const V x2, const V x3) {
+  const DFromV<V> df;
+  const RebindToUnsigned<decltype(df)> du;
+  return BitCast(df, Xor3(BitCast(du, x1), BitCast(du, x2), BitCast(du, x3)));
+}
+
+#else
+template <class V>
+HWY_API V Xor3(V x1, V x2, V x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+#endif
+
+// ------------------------------ Or3
+template <class V>
+HWY_API V Or3(V o1, V o2, V o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+template <class V>
+HWY_API V OrAnd(const V o, const V a1, const V a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ PopulationCount
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+// Need to return original type instead of unsigned.
+#define HWY_SVE_POPCNT(BASE, CHAR, BITS, HALF, NAME, OP)               \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) {        \
+    return BitCast(DFromV<decltype(v)>(),                              \
+                   sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v)); \
+  }
+HWY_SVE_FOREACH_UI(HWY_SVE_POPCNT, PopulationCount, cnt)
+#undef HWY_SVE_POPCNT
+
+// ================================================== SIGN
+
+// ------------------------------ Neg
+HWY_SVE_FOREACH_IF(HWY_SVE_RETV_ARGPV, Neg, neg)
+
+HWY_API VBF16 Neg(VBF16 v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return BitCast(d, Xor(BitCast(du, v), Set(du, SignMask<TU>())));
+}
+
+// ------------------------------ Abs
+HWY_SVE_FOREACH_IF(HWY_SVE_RETV_ARGPV, Abs, abs)
+
+// ================================================== ARITHMETIC
+
+// Per-target flags to prevent generic_ops-inl.h defining Add etc.
+#ifdef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#undef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#else
+#define HWY_NATIVE_OPERATOR_REPLACEMENTS
+#endif
+
+// ------------------------------ Add
+
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVN, AddN, add_n)
+}  // namespace detail
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVV, Add, add)
+
+// ------------------------------ Sub
+
+namespace detail {
+// Can't use HWY_SVE_RETV_ARGPVN because caller wants to specify pg.
+#define HWY_SVE_RETV_ARGPVN_MASK(BASE, CHAR, BITS, HALF, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS)                                             \
+      NAME(svbool_t pg, HWY_SVE_V(BASE, BITS) a, HWY_SVE_T(BASE, BITS) b) { \
+    return sv##OP##_##CHAR##BITS##_z(pg, a, b);                             \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVN_MASK, SubN, sub_n)
+#undef HWY_SVE_RETV_ARGPVN_MASK
+}  // namespace detail
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVV, Sub, sub)
+
+// ------------------------------ SumsOf8
+HWY_API svuint64_t SumsOf8(const svuint8_t v) {
+  const ScalableTag<uint32_t> du32;
+  const ScalableTag<uint64_t> du64;
+  const svbool_t pg = detail::PTrue(du64);
+
+  const svuint32_t sums_of_4 = svdot_n_u32(Zero(du32), v, 1);
+  // Compute pairwise sum of u32 and extend to u64.
+  // TODO(janwas): on SVE2, we can instead use svaddp.
+  const svuint64_t hi = svlsr_n_u64_x(pg, BitCast(du64, sums_of_4), 32);
+  // Isolate the lower 32 bits (to be added to the upper 32 and zero-extended)
+  const svuint64_t lo = svextw_u64_x(pg, BitCast(du64, sums_of_4));
+  return Add(hi, lo);
+}
+
+// ------------------------------ SaturatedAdd
+
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U32_SATURATED_ADDSUB
+#undef HWY_NATIVE_U32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U64_SATURATED_ADDSUB
+#undef HWY_NATIVE_U64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U64_SATURATED_ADDSUB
+#endif
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, SaturatedAdd, qadd)
+
+// ------------------------------ SaturatedSub
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, SaturatedSub, qsub)
+
+// ------------------------------ AbsDiff
+#ifdef HWY_NATIVE_INTEGER_ABS_DIFF
+#undef HWY_NATIVE_INTEGER_ABS_DIFF
+#else
+#define HWY_NATIVE_INTEGER_ABS_DIFF
+#endif
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVV, AbsDiff, abd)
+
+// ------------------------------ ShiftLeft[Same]
+
+#define HWY_SVE_SHIFT_N(BASE, CHAR, BITS, HALF, NAME, OP)               \
+  template <int kBits>                                                  \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) {         \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v, kBits);    \
+  }                                                                     \
+  HWY_API HWY_SVE_V(BASE, BITS)                                         \
+      NAME##Same(HWY_SVE_V(BASE, BITS) v, HWY_SVE_T(uint, BITS) bits) { \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v, bits);     \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_SHIFT_N, ShiftLeft, lsl_n)
+
+// ------------------------------ ShiftRight[Same]
+
+HWY_SVE_FOREACH_U(HWY_SVE_SHIFT_N, ShiftRight, lsr_n)
+HWY_SVE_FOREACH_I(HWY_SVE_SHIFT_N, ShiftRight, asr_n)
+
+#undef HWY_SVE_SHIFT_N
+
+// ------------------------------ RotateRight
+
+// TODO(janwas): svxar on SVE2
+template <int kBits, class V>
+HWY_API V RotateRight(const V v) {
+  constexpr size_t kSizeInBits = sizeof(TFromV<V>) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// ------------------------------ Shl/r
+
+#define HWY_SVE_SHIFT(BASE, CHAR, BITS, HALF, NAME, OP)           \
+  HWY_API HWY_SVE_V(BASE, BITS)                                   \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(BASE, BITS) bits) { \
+    const RebindToUnsigned<DFromV<decltype(v)>> du;               \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v,      \
+                                     BitCast(du, bits));          \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_SHIFT, Shl, lsl)
+
+HWY_SVE_FOREACH_U(HWY_SVE_SHIFT, Shr, lsr)
+HWY_SVE_FOREACH_I(HWY_SVE_SHIFT, Shr, asr)
+
+#undef HWY_SVE_SHIFT
+
+// ------------------------------ Min/Max
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV, Min, min)
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVV, Max, max)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPVV, Min, minnm)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPVV, Max, maxnm)
+
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN, MinN, min_n)
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN, MaxN, max_n)
+}  // namespace detail
+
+// ------------------------------ Mul
+
+// Per-target flags to prevent generic_ops-inl.h defining 8/64-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGPVV, Mul, mul)
+
+// ------------------------------ MulHigh
+HWY_SVE_FOREACH_UI16(HWY_SVE_RETV_ARGPVV, MulHigh, mulh)
+// Not part of API, used internally:
+HWY_SVE_FOREACH_UI08(HWY_SVE_RETV_ARGPVV, MulHigh, mulh)
+HWY_SVE_FOREACH_UI32(HWY_SVE_RETV_ARGPVV, MulHigh, mulh)
+HWY_SVE_FOREACH_U64(HWY_SVE_RETV_ARGPVV, MulHigh, mulh)
+
+// ------------------------------ MulFixedPoint15
+HWY_API svint16_t MulFixedPoint15(svint16_t a, svint16_t b) {
+#if HWY_SVE_HAVE_2
+  return svqrdmulh_s16(a, b);
+#else
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  const svuint16_t lo = BitCast(du, Mul(a, b));
+  const svint16_t hi = MulHigh(a, b);
+  // We want (lo + 0x4000) >> 15, but that can overflow, and if it does we must
+  // carry that into the result. Instead isolate the top two bits because only
+  // they can influence the result.
+  const svuint16_t lo_top2 = ShiftRight<14>(lo);
+  // Bits 11: add 2, 10: add 1, 01: add 1, 00: add 0.
+  const svuint16_t rounding = ShiftRight<1>(detail::AddN(lo_top2, 1));
+  return Add(Add(hi, hi), BitCast(d, rounding));
+#endif
+}
+
+// ------------------------------ Div
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPVV, Div, div)
+
+// ------------------------------ ApproximateReciprocal
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGV, ApproximateReciprocal, recpe)
+
+// ------------------------------ Sqrt
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPV, Sqrt, sqrt)
+
+// ------------------------------ ApproximateReciprocalSqrt
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGV, ApproximateReciprocalSqrt, rsqrte)
+
+// ------------------------------ MulAdd
+
+// Per-target flag to prevent generic_ops-inl.h from defining int MulAdd.
+#ifdef HWY_NATIVE_INT_FMA
+#undef HWY_NATIVE_INT_FMA
+#else
+#define HWY_NATIVE_INT_FMA
+#endif
+
+#define HWY_SVE_FMA(BASE, CHAR, BITS, HALF, NAME, OP)                   \
+  HWY_API HWY_SVE_V(BASE, BITS)                                         \
+      NAME(HWY_SVE_V(BASE, BITS) mul, HWY_SVE_V(BASE, BITS) x,          \
+           HWY_SVE_V(BASE, BITS) add) {                                 \
+    return sv##OP##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), x, mul, add); \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_FMA, MulAdd, mad)
+
+// ------------------------------ NegMulAdd
+HWY_SVE_FOREACH(HWY_SVE_FMA, NegMulAdd, msb)
+
+// ------------------------------ MulSub
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, MulSub, nmsb)
+
+// ------------------------------ NegMulSub
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, NegMulSub, nmad)
+
+#undef HWY_SVE_FMA
+
+// ------------------------------ Round etc.
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPV, Round, rintn)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPV, Floor, rintm)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPV, Ceil, rintp)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGPV, Trunc, rintz)
+
+// ================================================== MASK
+
+// ------------------------------ RebindMask
+template <class D, typename MFrom>
+HWY_API svbool_t RebindMask(const D /*d*/, const MFrom mask) {
+  return mask;
+}
+
+// ------------------------------ Mask logical
+
+HWY_API svbool_t Not(svbool_t m) {
+  // We don't know the lane type, so assume 8-bit. For larger types, this will
+  // de-canonicalize the predicate, i.e. set bits to 1 even though they do not
+  // correspond to the lowest byte in the lane. Arm says such bits are ignored.
+  return svnot_b_z(HWY_SVE_PTRUE(8), m);
+}
+HWY_API svbool_t And(svbool_t a, svbool_t b) {
+  return svand_b_z(b, b, a);  // same order as AndNot for consistency
+}
+HWY_API svbool_t AndNot(svbool_t a, svbool_t b) {
+  return svbic_b_z(b, b, a);  // reversed order like NEON
+}
+HWY_API svbool_t Or(svbool_t a, svbool_t b) {
+  return svsel_b(a, a, b);  // a ? true : b
+}
+HWY_API svbool_t Xor(svbool_t a, svbool_t b) {
+  return svsel_b(a, svnand_b_z(a, a, b), b);  // a ? !(a & b) : b.
+}
+
+HWY_API svbool_t ExclusiveNeither(svbool_t a, svbool_t b) {
+  return svnor_b_z(HWY_SVE_PTRUE(8), a, b);  // !a && !b, undefined if a && b.
+}
+
+// ------------------------------ CountTrue
+
+#define HWY_SVE_COUNT_TRUE(BASE, CHAR, BITS, HALF, NAME, OP)           \
+  template <size_t N, int kPow2>                                       \
+  HWY_API size_t NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d, svbool_t m) { \
+    return sv##OP##_b##BITS(detail::MakeMask(d), m);                   \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_COUNT_TRUE, CountTrue, cntp)
+#undef HWY_SVE_COUNT_TRUE
+
+// For 16-bit Compress: full vector, not limited to SV_POW2.
+namespace detail {
+
+#define HWY_SVE_COUNT_TRUE_FULL(BASE, CHAR, BITS, HALF, NAME, OP)            \
+  template <size_t N, int kPow2>                                             \
+  HWY_API size_t NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, svbool_t m) { \
+    return sv##OP##_b##BITS(svptrue_b##BITS(), m);                           \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_COUNT_TRUE_FULL, CountTrueFull, cntp)
+#undef HWY_SVE_COUNT_TRUE_FULL
+
+}  // namespace detail
+
+// ------------------------------ AllFalse
+template <class D>
+HWY_API bool AllFalse(D d, svbool_t m) {
+  return !svptest_any(detail::MakeMask(d), m);
+}
+
+// ------------------------------ AllTrue
+template <class D>
+HWY_API bool AllTrue(D d, svbool_t m) {
+  return CountTrue(d, m) == Lanes(d);
+}
+
+// ------------------------------ FindFirstTrue
+template <class D>
+HWY_API intptr_t FindFirstTrue(D d, svbool_t m) {
+  return AllFalse(d, m) ? intptr_t{-1}
+                        : static_cast<intptr_t>(
+                              CountTrue(d, svbrkb_b_z(detail::MakeMask(d), m)));
+}
+
+// ------------------------------ FindKnownFirstTrue
+template <class D>
+HWY_API size_t FindKnownFirstTrue(D d, svbool_t m) {
+  return CountTrue(d, svbrkb_b_z(detail::MakeMask(d), m));
+}
+
+// ------------------------------ IfThenElse
+#define HWY_SVE_IF_THEN_ELSE(BASE, CHAR, BITS, HALF, NAME, OP)                \
+  HWY_API HWY_SVE_V(BASE, BITS)                                               \
+      NAME(svbool_t m, HWY_SVE_V(BASE, BITS) yes, HWY_SVE_V(BASE, BITS) no) { \
+    return sv##OP##_##CHAR##BITS(m, yes, no);                                 \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_IF_THEN_ELSE, IfThenElse, sel)
+#undef HWY_SVE_IF_THEN_ELSE
+
+// ------------------------------ IfThenElseZero
+template <class V>
+HWY_API V IfThenElseZero(const svbool_t mask, const V yes) {
+  return IfThenElse(mask, yes, Zero(DFromV<V>()));
+}
+
+// ------------------------------ IfThenZeroElse
+template <class V>
+HWY_API V IfThenZeroElse(const svbool_t mask, const V no) {
+  return IfThenElse(mask, Zero(DFromV<V>()), no);
+}
+
+// ------------------------------ Additional mask logical operations
+HWY_API svbool_t SetBeforeFirst(svbool_t m) {
+  // We don't know the lane type, so assume 8-bit. For larger types, this will
+  // de-canonicalize the predicate, i.e. set bits to 1 even though they do not
+  // correspond to the lowest byte in the lane. Arm says such bits are ignored.
+  return svbrkb_b_z(HWY_SVE_PTRUE(8), m);
+}
+
+HWY_API svbool_t SetAtOrBeforeFirst(svbool_t m) {
+  // We don't know the lane type, so assume 8-bit. For larger types, this will
+  // de-canonicalize the predicate, i.e. set bits to 1 even though they do not
+  // correspond to the lowest byte in the lane. Arm says such bits are ignored.
+  return svbrka_b_z(HWY_SVE_PTRUE(8), m);
+}
+
+HWY_API svbool_t SetOnlyFirst(svbool_t m) { return svbrka_b_z(m, m); }
+
+HWY_API svbool_t SetAtOrAfterFirst(svbool_t m) {
+  return Not(SetBeforeFirst(m));
+}
+
+// ================================================== COMPARE
+
+// mask = f(vector, vector)
+#define HWY_SVE_COMPARE(BASE, CHAR, BITS, HALF, NAME, OP)                   \
+  HWY_API svbool_t NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \
+    return sv##OP##_##CHAR##BITS(HWY_SVE_PTRUE(BITS), a, b);                \
+  }
+#define HWY_SVE_COMPARE_N(BASE, CHAR, BITS, HALF, NAME, OP)                 \
+  HWY_API svbool_t NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_T(BASE, BITS) b) { \
+    return sv##OP##_##CHAR##BITS(HWY_SVE_PTRUE(BITS), a, b);                \
+  }
+
+// ------------------------------ Eq
+HWY_SVE_FOREACH(HWY_SVE_COMPARE, Eq, cmpeq)
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, EqN, cmpeq_n)
+}  // namespace detail
+
+// ------------------------------ Ne
+HWY_SVE_FOREACH(HWY_SVE_COMPARE, Ne, cmpne)
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, NeN, cmpne_n)
+}  // namespace detail
+
+// ------------------------------ Lt
+HWY_SVE_FOREACH(HWY_SVE_COMPARE, Lt, cmplt)
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, LtN, cmplt_n)
+}  // namespace detail
+
+// ------------------------------ Le
+HWY_SVE_FOREACH(HWY_SVE_COMPARE, Le, cmple)
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, LeN, cmple_n)
+}  // namespace detail
+
+// ------------------------------ Gt/Ge (swapped order)
+template <class V>
+HWY_API svbool_t Gt(const V a, const V b) {
+  return Lt(b, a);
+}
+template <class V>
+HWY_API svbool_t Ge(const V a, const V b) {
+  return Le(b, a);
+}
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, GeN, cmpge_n)
+HWY_SVE_FOREACH(HWY_SVE_COMPARE_N, GtN, cmpgt_n)
+}  // namespace detail
+
+#undef HWY_SVE_COMPARE
+#undef HWY_SVE_COMPARE_N
+
+// ------------------------------ TestBit
+template <class V>
+HWY_API svbool_t TestBit(const V a, const V bit) {
+  return detail::NeN(And(a, bit), 0);
+}
+
+// ------------------------------ MaskFromVec (Ne)
+template <class V>
+HWY_API svbool_t MaskFromVec(const V v) {
+  return detail::NeN(v, static_cast<TFromV<V>>(0));
+}
+
+// ------------------------------ VecFromMask
+template <class D>
+HWY_API VFromD<D> VecFromMask(const D d, svbool_t mask) {
+  const RebindToSigned<D> di;
+  // This generates MOV imm, whereas svdup_n_s8_z generates MOV scalar, which
+  // requires an extra instruction plus M0 pipeline.
+  return BitCast(d, IfThenElseZero(mask, Set(di, -1)));
+}
+
+// ------------------------------ IfVecThenElse (MaskFromVec, IfThenElse)
+
+#if HWY_SVE_HAVE_2
+
+#define HWY_SVE_IF_VEC(BASE, CHAR, BITS, HALF, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS)                                   \
+      NAME(HWY_SVE_V(BASE, BITS) mask, HWY_SVE_V(BASE, BITS) yes, \
+           HWY_SVE_V(BASE, BITS) no) {                            \
+    return sv##OP##_##CHAR##BITS(yes, no, mask);                  \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_IF_VEC, IfVecThenElse, bsl)
+#undef HWY_SVE_IF_VEC
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V IfVecThenElse(const V mask, const V yes, const V no) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, IfVecThenElse(BitCast(du, mask), BitCast(du, yes), BitCast(du, no)));
+}
+
+#else
+
+template <class V>
+HWY_API V IfVecThenElse(const V mask, const V yes, const V no) {
+  return Or(And(mask, yes), AndNot(mask, no));
+}
+
+#endif  // HWY_SVE_HAVE_2
+
+// ------------------------------ BitwiseIfThenElse
+
+#ifdef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#undef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#else
+#define HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#endif
+
+template <class V>
+HWY_API V BitwiseIfThenElse(V mask, V yes, V no) {
+  return IfVecThenElse(mask, yes, no);
+}
+
+// ------------------------------ CopySign (BitwiseIfThenElse)
+template <class V>
+HWY_API V CopySign(const V magn, const V sign) {
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <class V>
+HWY_API V CopySignToAbs(const V abs, const V sign) {
+#if HWY_SVE_HAVE_2  // CopySign is more efficient than OrAnd
+  return CopySign(abs, sign);
+#else
+  const DFromV<V> d;
+  return OrAnd(abs, SignBit(d), sign);
+#endif
+}
+
+// ------------------------------ Floating-point classification (Ne)
+
+template <class V>
+HWY_API svbool_t IsNaN(const V v) {
+  return Ne(v, v);  // could also use cmpuo
+}
+
+template <class V>
+HWY_API svbool_t IsInf(const V v) {
+  using T = TFromV<V>;
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, detail::EqN(Add(vi, vi), hwy::MaxExponentTimes2<T>()));
+}
+
+// Returns whether normal/subnormal/zero.
+template <class V>
+HWY_API svbool_t IsFinite(const V v) {
+  using T = TFromV<V>;
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, detail::LtN(exp, hwy::MaxExponentField<T>()));
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load/MaskedLoad/LoadDup128/Store/Stream
+
+#define HWY_SVE_LOAD(BASE, CHAR, BITS, HALF, NAME, OP)     \
+  template <size_t N, int kPow2>                           \
+  HWY_API HWY_SVE_V(BASE, BITS)                            \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d,              \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) { \
+    return sv##OP##_##CHAR##BITS(detail::MakeMask(d), p);  \
+  }
+
+#define HWY_SVE_MASKED_LOAD(BASE, CHAR, BITS, HALF, NAME, OP)   \
+  template <size_t N, int kPow2>                                \
+  HWY_API HWY_SVE_V(BASE, BITS)                                 \
+      NAME(svbool_t m, HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) {      \
+    return sv##OP##_##CHAR##BITS(m, p);                         \
+  }
+
+#define HWY_SVE_LOAD_DUP128(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <size_t N, int kPow2>                              \
+  HWY_API HWY_SVE_V(BASE, BITS)                               \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */,           \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) {    \
+    /* All-true predicate to load all 128 bits. */            \
+    return sv##OP##_##CHAR##BITS(HWY_SVE_PTRUE(8), p);        \
+  }
+
+#define HWY_SVE_STORE(BASE, CHAR, BITS, HALF, NAME, OP)       \
+  template <size_t N, int kPow2>                              \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v,                  \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) d,        \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) { \
+    sv##OP##_##CHAR##BITS(detail::MakeMask(d), p, v);         \
+  }
+
+#define HWY_SVE_BLENDED_STORE(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <size_t N, int kPow2>                                \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v, svbool_t m,        \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) /* d */,    \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) {   \
+    sv##OP##_##CHAR##BITS(m, p, v);                             \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_LOAD, Load, ld1)
+HWY_SVE_FOREACH(HWY_SVE_MASKED_LOAD, MaskedLoad, ld1)
+HWY_SVE_FOREACH(HWY_SVE_STORE, Store, st1)
+HWY_SVE_FOREACH(HWY_SVE_STORE, Stream, stnt1)
+HWY_SVE_FOREACH(HWY_SVE_BLENDED_STORE, BlendedStore, st1)
+
+HWY_SVE_FOREACH_BF16(HWY_SVE_LOAD, Load, ld1)
+HWY_SVE_FOREACH_BF16(HWY_SVE_MASKED_LOAD, MaskedLoad, ld1)
+HWY_SVE_FOREACH_BF16(HWY_SVE_STORE, Store, st1)
+HWY_SVE_FOREACH_BF16(HWY_SVE_STORE, Stream, stnt1)
+HWY_SVE_FOREACH_BF16(HWY_SVE_BLENDED_STORE, BlendedStore, st1)
+
+#if HWY_TARGET != HWY_SVE2_128
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_LOAD_DUP128, LoadDupFull128, ld1rq)
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_SVE2_128
+
+#undef HWY_SVE_LOAD
+#undef HWY_SVE_MASKED_LOAD
+#undef HWY_SVE_LOAD_DUP128
+#undef HWY_SVE_STORE
+#undef HWY_SVE_BLENDED_STORE
+
+#if !HWY_SVE_HAVE_BFLOAT16
+
+template <size_t N, int kPow2>
+HWY_API VBF16 Load(Simd<bfloat16_t, N, kPow2> d,
+                   const bfloat16_t* HWY_RESTRICT p) {
+  return Load(RebindToUnsigned<decltype(d)>(),
+              reinterpret_cast<const uint16_t * HWY_RESTRICT>(p));
+}
+
+#endif  // !HWY_SVE_HAVE_BFLOAT16
+
+#if HWY_TARGET == HWY_SVE2_128
+// On the HWY_SVE2_128 target, LoadDup128 is the same as Load since vectors
+// cannot exceed 16 bytes on the HWY_SVE2_128 target.
+template <class D>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+#else
+// If D().MaxBytes() <= 16 is true, simply do a Load operation.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// If D().MaxBytes() > 16 is true, need to load the vector using ld1rq
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16),
+          hwy::EnableIf<!IsSame<TFromD<D>, bfloat16_t>()>* = nullptr>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return detail::LoadDupFull128(d, p);
+}
+
+#if !HWY_SVE_HAVE_BFLOAT16
+
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16), HWY_IF_BF16_D(D)>
+HWY_API VBF16 LoadDup128(D d, const bfloat16_t* HWY_RESTRICT p) {
+  return detail::LoadDupFull128(
+      RebindToUnsigned<decltype(d)>(),
+      reinterpret_cast<const uint16_t * HWY_RESTRICT>(p));
+}
+#endif  // !HWY_SVE_HAVE_BFLOAT16
+
+#endif  // HWY_TARGET != HWY_SVE2_128
+
+#if !HWY_SVE_HAVE_BFLOAT16
+
+template <size_t N, int kPow2>
+HWY_API void Store(VBF16 v, Simd<bfloat16_t, N, kPow2> d,
+                   bfloat16_t* HWY_RESTRICT p) {
+  Store(v, RebindToUnsigned<decltype(d)>(),
+        reinterpret_cast<uint16_t * HWY_RESTRICT>(p));
+}
+
+#endif
+
+// ------------------------------ Load/StoreU
+
+// SVE only requires lane alignment, not natural alignment of the entire
+// vector.
+template <class D>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+template <class V, class D>
+HWY_API void StoreU(const V v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ------------------------------ MaskedLoadOr
+
+// SVE MaskedLoad hard-codes zero, so this requires an extra blend.
+template <class D>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElse(m, MaskedLoad(m, d, p), v);
+}
+
+// ------------------------------ ScatterOffset/Index
+
+#ifdef HWY_NATIVE_SCATTER
+#undef HWY_NATIVE_SCATTER
+#else
+#define HWY_NATIVE_SCATTER
+#endif
+
+#define HWY_SVE_SCATTER_OFFSET(BASE, CHAR, BITS, HALF, NAME, OP)             \
+  template <size_t N, int kPow2>                                             \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v,                                 \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) d,                       \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base,               \
+                    HWY_SVE_V(int, BITS) offset) {                           \
+    sv##OP##_s##BITS##offset_##CHAR##BITS(detail::MakeMask(d), base, offset, \
+                                          v);                                \
+  }
+
+#define HWY_SVE_MASKED_SCATTER_INDEX(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <size_t N, int kPow2>                                       \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v, svbool_t m,               \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) /*d*/,             \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base,         \
+                    HWY_SVE_V(int, BITS) index) {                      \
+    sv##OP##_s##BITS##index_##CHAR##BITS(m, base, index, v);           \
+  }
+
+HWY_SVE_FOREACH_UIF3264(HWY_SVE_SCATTER_OFFSET, ScatterOffset, st1_scatter)
+HWY_SVE_FOREACH_UIF3264(HWY_SVE_MASKED_SCATTER_INDEX, MaskedScatterIndex,
+                        st1_scatter)
+#undef HWY_SVE_SCATTER_OFFSET
+#undef HWY_SVE_MASKED_SCATTER_INDEX
+
+template <class D>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p,
+                          VFromD<RebindToSigned<D>> indices) {
+  MaskedScatterIndex(v, detail::MakeMask(d), d, p, indices);
+}
+
+// ------------------------------ GatherOffset/Index
+
+#ifdef HWY_NATIVE_GATHER
+#undef HWY_NATIVE_GATHER
+#else
+#define HWY_NATIVE_GATHER
+#endif
+
+#define HWY_SVE_GATHER_OFFSET(BASE, CHAR, BITS, HALF, NAME, OP)             \
+  template <size_t N, int kPow2>                                            \
+  HWY_API HWY_SVE_V(BASE, BITS)                                             \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d,                               \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base,                 \
+           HWY_SVE_V(int, BITS) offset) {                                   \
+    return sv##OP##_s##BITS##offset_##CHAR##BITS(detail::MakeMask(d), base, \
+                                                 offset);                   \
+  }
+#define HWY_SVE_MASKED_GATHER_INDEX(BASE, CHAR, BITS, HALF, NAME, OP) \
+  template <size_t N, int kPow2>                                      \
+  HWY_API HWY_SVE_V(BASE, BITS)                                       \
+      NAME(svbool_t m, HWY_SVE_D(BASE, BITS, N, kPow2) /*d*/,         \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base,           \
+           HWY_SVE_V(int, BITS) index) {                              \
+    return sv##OP##_s##BITS##index_##CHAR##BITS(m, base, index);      \
+  }
+
+HWY_SVE_FOREACH_UIF3264(HWY_SVE_GATHER_OFFSET, GatherOffset, ld1_gather)
+HWY_SVE_FOREACH_UIF3264(HWY_SVE_MASKED_GATHER_INDEX, MaskedGatherIndex,
+                        ld1_gather)
+#undef HWY_SVE_GATHER_OFFSET
+#undef HWY_SVE_MASKED_GATHER_INDEX
+
+template <class D>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT p,
+                              VFromD<RebindToSigned<D>> indices) {
+  return MaskedGatherIndex(detail::MakeMask(d), d, p, indices);
+}
+
+// ------------------------------ LoadInterleaved2
+
+// Per-target flag to prevent generic_ops-inl.h from defining LoadInterleaved2.
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+#define HWY_SVE_LOAD2(BASE, CHAR, BITS, HALF, NAME, OP)                       \
+  template <size_t N, int kPow2>                                              \
+  HWY_API void NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d,                        \
+                    const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned,     \
+                    HWY_SVE_V(BASE, BITS) & v0, HWY_SVE_V(BASE, BITS) & v1) { \
+    const HWY_SVE_TUPLE(BASE, BITS, 2) tuple =                                \
+        sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned);                \
+    v0 = svget2(tuple, 0);                                                    \
+    v1 = svget2(tuple, 1);                                                    \
+  }
+HWY_SVE_FOREACH(HWY_SVE_LOAD2, LoadInterleaved2, ld2)
+
+#undef HWY_SVE_LOAD2
+
+// ------------------------------ LoadInterleaved3
+
+#define HWY_SVE_LOAD3(BASE, CHAR, BITS, HALF, NAME, OP)                     \
+  template <size_t N, int kPow2>                                            \
+  HWY_API void NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d,                      \
+                    const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned,   \
+                    HWY_SVE_V(BASE, BITS) & v0, HWY_SVE_V(BASE, BITS) & v1, \
+                    HWY_SVE_V(BASE, BITS) & v2) {                           \
+    const HWY_SVE_TUPLE(BASE, BITS, 3) tuple =                              \
+        sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned);              \
+    v0 = svget3(tuple, 0);                                                  \
+    v1 = svget3(tuple, 1);                                                  \
+    v2 = svget3(tuple, 2);                                                  \
+  }
+HWY_SVE_FOREACH(HWY_SVE_LOAD3, LoadInterleaved3, ld3)
+
+#undef HWY_SVE_LOAD3
+
+// ------------------------------ LoadInterleaved4
+
+#define HWY_SVE_LOAD4(BASE, CHAR, BITS, HALF, NAME, OP)                       \
+  template <size_t N, int kPow2>                                              \
+  HWY_API void NAME(HWY_SVE_D(BASE, BITS, N, kPow2) d,                        \
+                    const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned,     \
+                    HWY_SVE_V(BASE, BITS) & v0, HWY_SVE_V(BASE, BITS) & v1,   \
+                    HWY_SVE_V(BASE, BITS) & v2, HWY_SVE_V(BASE, BITS) & v3) { \
+    const HWY_SVE_TUPLE(BASE, BITS, 4) tuple =                                \
+        sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned);                \
+    v0 = svget4(tuple, 0);                                                    \
+    v1 = svget4(tuple, 1);                                                    \
+    v2 = svget4(tuple, 2);                                                    \
+    v3 = svget4(tuple, 3);                                                    \
+  }
+HWY_SVE_FOREACH(HWY_SVE_LOAD4, LoadInterleaved4, ld4)
+
+#undef HWY_SVE_LOAD4
+
+// ------------------------------ StoreInterleaved2
+
+#define HWY_SVE_STORE2(BASE, CHAR, BITS, HALF, NAME, OP)                       \
+  template <size_t N, int kPow2>                                               \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1,        \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) d,                         \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned) {          \
+    sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned, Create2(d, v0, v1)); \
+  }
+HWY_SVE_FOREACH(HWY_SVE_STORE2, StoreInterleaved2, st2)
+
+#undef HWY_SVE_STORE2
+
+// ------------------------------ StoreInterleaved3
+
+#define HWY_SVE_STORE3(BASE, CHAR, BITS, HALF, NAME, OP)                \
+  template <size_t N, int kPow2>                                        \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1, \
+                    HWY_SVE_V(BASE, BITS) v2,                           \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) d,                  \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned) {   \
+    sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned,               \
+                          Create3(d, v0, v1, v2));                      \
+  }
+HWY_SVE_FOREACH(HWY_SVE_STORE3, StoreInterleaved3, st3)
+
+#undef HWY_SVE_STORE3
+
+// ------------------------------ StoreInterleaved4
+
+#define HWY_SVE_STORE4(BASE, CHAR, BITS, HALF, NAME, OP)                \
+  template <size_t N, int kPow2>                                        \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1, \
+                    HWY_SVE_V(BASE, BITS) v2, HWY_SVE_V(BASE, BITS) v3, \
+                    HWY_SVE_D(BASE, BITS, N, kPow2) d,                  \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned) {   \
+    sv##OP##_##CHAR##BITS(detail::MakeMask(d), unaligned,               \
+                          Create4(d, v0, v1, v2, v3));                  \
+  }
+HWY_SVE_FOREACH(HWY_SVE_STORE4, StoreInterleaved4, st4)
+
+#undef HWY_SVE_STORE4
+
+// ================================================== CONVERT
+
+// ------------------------------ PromoteTo
+
+// Same sign
+#define HWY_SVE_PROMOTE_TO(BASE, CHAR, BITS, HALF, NAME, OP)                \
+  template <size_t N, int kPow2>                                            \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(                                       \
+      HWY_SVE_D(BASE, BITS, N, kPow2) /* tag */, HWY_SVE_V(BASE, HALF) v) { \
+    return sv##OP##_##CHAR##BITS(v);                                        \
+  }
+
+HWY_SVE_FOREACH_UI16(HWY_SVE_PROMOTE_TO, PromoteTo, unpklo)
+HWY_SVE_FOREACH_UI32(HWY_SVE_PROMOTE_TO, PromoteTo, unpklo)
+HWY_SVE_FOREACH_UI64(HWY_SVE_PROMOTE_TO, PromoteTo, unpklo)
+
+// 2x
+template <size_t N, int kPow2>
+HWY_API svuint32_t PromoteTo(Simd<uint32_t, N, kPow2> dto, svuint8_t vfrom) {
+  const RepartitionToWide<DFromV<decltype(vfrom)>> d2;
+  return PromoteTo(dto, PromoteTo(d2, vfrom));
+}
+template <size_t N, int kPow2>
+HWY_API svint32_t PromoteTo(Simd<int32_t, N, kPow2> dto, svint8_t vfrom) {
+  const RepartitionToWide<DFromV<decltype(vfrom)>> d2;
+  return PromoteTo(dto, PromoteTo(d2, vfrom));
+}
+template <size_t N, int kPow2>
+HWY_API svuint64_t PromoteTo(Simd<uint64_t, N, kPow2> dto, svuint16_t vfrom) {
+  const RepartitionToWide<DFromV<decltype(vfrom)>> d2;
+  return PromoteTo(dto, PromoteTo(d2, vfrom));
+}
+template <size_t N, int kPow2>
+HWY_API svint64_t PromoteTo(Simd<int64_t, N, kPow2> dto, svint16_t vfrom) {
+  const RepartitionToWide<DFromV<decltype(vfrom)>> d2;
+  return PromoteTo(dto, PromoteTo(d2, vfrom));
+}
+
+// 3x
+template <size_t N, int kPow2>
+HWY_API svuint64_t PromoteTo(Simd<uint64_t, N, kPow2> dto, svuint8_t vfrom) {
+  const RepartitionToNarrow<decltype(dto)> d4;
+  const RepartitionToNarrow<decltype(d4)> d2;
+  return PromoteTo(dto, PromoteTo(d4, PromoteTo(d2, vfrom)));
+}
+template <size_t N, int kPow2>
+HWY_API svint64_t PromoteTo(Simd<int64_t, N, kPow2> dto, svint8_t vfrom) {
+  const RepartitionToNarrow<decltype(dto)> d4;
+  const RepartitionToNarrow<decltype(d4)> d2;
+  return PromoteTo(dto, PromoteTo(d4, PromoteTo(d2, vfrom)));
+}
+
+// Sign change
+template <class D, class V, HWY_IF_SIGNED_D(D), HWY_IF_UNSIGNED_V(V),
+          HWY_IF_LANES_GT(sizeof(TFromD<D>), sizeof(TFromV<V>))>
+HWY_API VFromD<D> PromoteTo(D di, V v) {
+  const RebindToUnsigned<decltype(di)> du;
+  return BitCast(di, PromoteTo(du, v));
+}
+
+// ------------------------------ PromoteTo F
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+// Unlike Highway's ZipLower, this returns the same type.
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGVV, ZipLowerSame, zip1)
+}  // namespace detail
+
+template <size_t N, int kPow2>
+HWY_API svfloat32_t PromoteTo(Simd<float32_t, N, kPow2> /* d */,
+                              const svfloat16_t v) {
+  // svcvt* expects inputs in even lanes, whereas Highway wants lower lanes, so
+  // first replicate each lane once.
+  const svfloat16_t vv = detail::ZipLowerSame(v, v);
+  return svcvt_f32_f16_x(detail::PTrue(Simd<float16_t, N, kPow2>()), vv);
+}
+
+template <size_t N, int kPow2>
+HWY_API svfloat64_t PromoteTo(Simd<float64_t, N, kPow2> /* d */,
+                              const svfloat32_t v) {
+  const svfloat32_t vv = detail::ZipLowerSame(v, v);
+  return svcvt_f64_f32_x(detail::PTrue(Simd<float32_t, N, kPow2>()), vv);
+}
+
+template <size_t N, int kPow2>
+HWY_API svfloat64_t PromoteTo(Simd<float64_t, N, kPow2> /* d */,
+                              const svint32_t v) {
+  const svint32_t vv = detail::ZipLowerSame(v, v);
+  return svcvt_f64_s32_x(detail::PTrue(Simd<int32_t, N, kPow2>()), vv);
+}
+
+// For 16-bit Compress
+namespace detail {
+HWY_SVE_FOREACH_UI32(HWY_SVE_PROMOTE_TO, PromoteUpperTo, unpkhi)
+#undef HWY_SVE_PROMOTE_TO
+
+template <size_t N, int kPow2>
+HWY_API svfloat32_t PromoteUpperTo(Simd<float, N, kPow2> df, svfloat16_t v) {
+  const RebindToUnsigned<decltype(df)> du;
+  const RepartitionToNarrow<decltype(du)> dn;
+  return BitCast(df, PromoteUpperTo(du, BitCast(dn, v)));
+}
+
+}  // namespace detail
+
+// ------------------------------ DemoteTo U
+
+namespace detail {
+
+// Saturates unsigned vectors to half/quarter-width TN.
+template <typename TN, class VU>
+VU SaturateU(VU v) {
+  return detail::MinN(v, static_cast<TFromV<VU>>(LimitsMax<TN>()));
+}
+
+// Saturates unsigned vectors to half/quarter-width TN.
+template <typename TN, class VI>
+VI SaturateI(VI v) {
+  return detail::MinN(detail::MaxN(v, LimitsMin<TN>()), LimitsMax<TN>());
+}
+
+}  // namespace detail
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svint16_t v) {
+#if HWY_SVE_HAVE_2
+  const svuint8_t vn = BitCast(dn, svqxtunb_s16(v));
+#else
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint16_t clamped = BitCast(du, detail::MaxN(v, 0));
+  // Saturate to unsigned-max and halve the width.
+  const svuint8_t vn = BitCast(dn, detail::SaturateU<TN>(clamped));
+#endif
+  return svuzp1_u8(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t DemoteTo(Simd<uint16_t, N, kPow2> dn, const svint32_t v) {
+#if HWY_SVE_HAVE_2
+  const svuint16_t vn = BitCast(dn, svqxtunb_s32(v));
+#else
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint32_t clamped = BitCast(du, detail::MaxN(v, 0));
+  // Saturate to unsigned-max and halve the width.
+  const svuint16_t vn = BitCast(dn, detail::SaturateU<TN>(clamped));
+#endif
+  return svuzp1_u16(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svint32_t v) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const RepartitionToNarrow<decltype(du)> d2;
+#if HWY_SVE_HAVE_2
+  const svuint16_t cast16 = BitCast(d2, svqxtnb_u16(svqxtunb_s32(v)));
+#else
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint32_t clamped = BitCast(du, detail::MaxN(v, 0));
+  // Saturate to unsigned-max and quarter the width.
+  const svuint16_t cast16 = BitCast(d2, detail::SaturateU<TN>(clamped));
+#endif
+  const svuint8_t x2 = BitCast(dn, svuzp1_u16(cast16, cast16));
+  return svuzp1_u8(x2, x2);
+}
+
+HWY_API svuint8_t U8FromU32(const svuint32_t v) {
+  const DFromV<svuint32_t> du32;
+  const RepartitionToNarrow<decltype(du32)> du16;
+  const RepartitionToNarrow<decltype(du16)> du8;
+
+  const svuint16_t cast16 = BitCast(du16, v);
+  const svuint16_t x2 = svuzp1_u16(cast16, cast16);
+  const svuint8_t cast8 = BitCast(du8, x2);
+  return svuzp1_u8(cast8, cast8);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svuint16_t v) {
+#if HWY_SVE_HAVE_2
+  const svuint8_t vn = BitCast(dn, svqxtnb_u16(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint8_t vn = BitCast(dn, detail::SaturateU<TN>(v));
+#endif
+  return svuzp1_u8(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t DemoteTo(Simd<uint16_t, N, kPow2> dn, const svuint32_t v) {
+#if HWY_SVE_HAVE_2
+  const svuint16_t vn = BitCast(dn, svqxtnb_u32(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint16_t vn = BitCast(dn, detail::SaturateU<TN>(v));
+#endif
+  return svuzp1_u16(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svuint32_t v) {
+  using TN = TFromD<decltype(dn)>;
+  return U8FromU32(detail::SaturateU<TN>(v));
+}
+
+// ------------------------------ Truncations
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t TruncateTo(Simd<uint8_t, N, kPow2> /* tag */,
+                             const svuint64_t v) {
+  const DFromV<svuint8_t> d;
+  const svuint8_t v1 = BitCast(d, v);
+  const svuint8_t v2 = svuzp1_u8(v1, v1);
+  const svuint8_t v3 = svuzp1_u8(v2, v2);
+  return svuzp1_u8(v3, v3);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t TruncateTo(Simd<uint16_t, N, kPow2> /* tag */,
+                              const svuint64_t v) {
+  const DFromV<svuint16_t> d;
+  const svuint16_t v1 = BitCast(d, v);
+  const svuint16_t v2 = svuzp1_u16(v1, v1);
+  return svuzp1_u16(v2, v2);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t TruncateTo(Simd<uint32_t, N, kPow2> /* tag */,
+                              const svuint64_t v) {
+  const DFromV<svuint32_t> d;
+  const svuint32_t v1 = BitCast(d, v);
+  return svuzp1_u32(v1, v1);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t TruncateTo(Simd<uint8_t, N, kPow2> /* tag */,
+                             const svuint32_t v) {
+  const DFromV<svuint8_t> d;
+  const svuint8_t v1 = BitCast(d, v);
+  const svuint8_t v2 = svuzp1_u8(v1, v1);
+  return svuzp1_u8(v2, v2);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t TruncateTo(Simd<uint16_t, N, kPow2> /* tag */,
+                              const svuint32_t v) {
+  const DFromV<svuint16_t> d;
+  const svuint16_t v1 = BitCast(d, v);
+  return svuzp1_u16(v1, v1);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t TruncateTo(Simd<uint8_t, N, kPow2> /* tag */,
+                             const svuint16_t v) {
+  const DFromV<svuint8_t> d;
+  const svuint8_t v1 = BitCast(d, v);
+  return svuzp1_u8(v1, v1);
+}
+
+// ------------------------------ DemoteTo I
+
+template <size_t N, int kPow2>
+HWY_API svint8_t DemoteTo(Simd<int8_t, N, kPow2> dn, const svint16_t v) {
+#if HWY_SVE_HAVE_2
+  const svint8_t vn = BitCast(dn, svqxtnb_s16(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svint8_t vn = BitCast(dn, detail::SaturateI<TN>(v));
+#endif
+  return svuzp1_s8(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svint16_t DemoteTo(Simd<int16_t, N, kPow2> dn, const svint32_t v) {
+#if HWY_SVE_HAVE_2
+  const svint16_t vn = BitCast(dn, svqxtnb_s32(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svint16_t vn = BitCast(dn, detail::SaturateI<TN>(v));
+#endif
+  return svuzp1_s16(vn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svint8_t DemoteTo(Simd<int8_t, N, kPow2> dn, const svint32_t v) {
+  const RepartitionToWide<decltype(dn)> d2;
+#if HWY_SVE_HAVE_2
+  const svint16_t cast16 = BitCast(d2, svqxtnb_s16(svqxtnb_s32(v)));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svint16_t cast16 = BitCast(d2, detail::SaturateI<TN>(v));
+#endif
+  const svint8_t v2 = BitCast(dn, svuzp1_s16(cast16, cast16));
+  return BitCast(dn, svuzp1_s8(v2, v2));
+}
+
+// ------------------------------ I64/U64 DemoteTo
+
+template <size_t N, int kPow2>
+HWY_API svint32_t DemoteTo(Simd<int32_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtnb_s64(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateI<TN>(v));
+#endif
+  return BitCast(dn, TruncateTo(dn_u, vn));
+}
+
+template <size_t N, int kPow2>
+HWY_API svint16_t DemoteTo(Simd<int16_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtnb_s32(svqxtnb_s64(v)));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateI<TN>(v));
+#endif
+  return BitCast(dn, TruncateTo(dn_u, vn));
+}
+
+template <size_t N, int kPow2>
+HWY_API svint8_t DemoteTo(Simd<int8_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateI<TN>(v));
+  return BitCast(dn, TruncateTo(dn_u, vn));
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t DemoteTo(Simd<uint32_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtunb_s64(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint64_t clamped = BitCast(du64, detail::MaxN(v, 0));
+  // Saturate to unsigned-max
+  const svuint64_t vn = detail::SaturateU<TN>(clamped);
+#endif
+  return TruncateTo(dn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t DemoteTo(Simd<uint16_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtnb_u32(svqxtunb_s64(v)));
+#else
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint64_t clamped = BitCast(du64, detail::MaxN(v, 0));
+  // Saturate to unsigned-max
+  const svuint64_t vn = detail::SaturateU<TN>(clamped);
+#endif
+  return TruncateTo(dn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+  using TN = TFromD<decltype(dn)>;
+  // First clamp negative numbers to zero and cast to unsigned.
+  const svuint64_t clamped = BitCast(du64, detail::MaxN(v, 0));
+  // Saturate to unsigned-max
+  const svuint64_t vn = detail::SaturateU<TN>(clamped);
+  return TruncateTo(dn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t DemoteTo(Simd<uint32_t, N, kPow2> dn, const svuint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtnb_u64(v));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateU<TN>(v));
+#endif
+  return TruncateTo(dn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t DemoteTo(Simd<uint16_t, N, kPow2> dn, const svuint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+#if HWY_SVE_HAVE_2
+  const svuint64_t vn = BitCast(du64, svqxtnb_u32(svqxtnb_u64(v)));
+#else
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateU<TN>(v));
+#endif
+  return TruncateTo(dn, vn);
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t DemoteTo(Simd<uint8_t, N, kPow2> dn, const svuint64_t v) {
+  const Rebind<uint64_t, decltype(dn)> du64;
+  using TN = TFromD<decltype(dn)>;
+  const svuint64_t vn = BitCast(du64, detail::SaturateU<TN>(v));
+  return TruncateTo(dn, vn);
+}
+
+// ------------------------------ ConcatEven/ConcatOdd
+
+// WARNING: the upper half of these needs fixing up (uzp1/uzp2 use the
+// full vector length, not rounded down to a power of two as we require).
+namespace detail {
+
+#define HWY_SVE_CONCAT_EVERY_SECOND(BASE, CHAR, BITS, HALF, NAME, OP) \
+  HWY_INLINE HWY_SVE_V(BASE, BITS)                                    \
+      NAME(HWY_SVE_V(BASE, BITS) hi, HWY_SVE_V(BASE, BITS) lo) {      \
+    return sv##OP##_##CHAR##BITS(lo, hi);                             \
+  }
+HWY_SVE_FOREACH(HWY_SVE_CONCAT_EVERY_SECOND, ConcatEvenFull, uzp1)
+HWY_SVE_FOREACH(HWY_SVE_CONCAT_EVERY_SECOND, ConcatOddFull, uzp2)
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
+HWY_SVE_FOREACH(HWY_SVE_CONCAT_EVERY_SECOND, ConcatEvenBlocks, uzp1q)
+HWY_SVE_FOREACH(HWY_SVE_CONCAT_EVERY_SECOND, ConcatOddBlocks, uzp2q)
+#endif
+#undef HWY_SVE_CONCAT_EVERY_SECOND
+
+// Used to slide up / shift whole register left; mask indicates which range
+// to take from lo, and the rest is filled from hi starting at its lowest.
+#define HWY_SVE_SPLICE(BASE, CHAR, BITS, HALF, NAME, OP)                   \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(                                      \
+      HWY_SVE_V(BASE, BITS) hi, HWY_SVE_V(BASE, BITS) lo, svbool_t mask) { \
+    return sv##OP##_##CHAR##BITS(mask, lo, hi);                            \
+  }
+HWY_SVE_FOREACH(HWY_SVE_SPLICE, Splice, splice)
+#undef HWY_SVE_SPLICE
+
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_SVE_IS_POW2
+  if (detail::IsFull(d)) return detail::ConcatOddFull(hi, lo);
+#endif
+  const VFromD<D> hi_odd = detail::ConcatOddFull(hi, hi);
+  const VFromD<D> lo_odd = detail::ConcatOddFull(lo, lo);
+  return detail::Splice(hi_odd, lo_odd, FirstN(d, Lanes(d) / 2));
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_SVE_IS_POW2
+  if (detail::IsFull(d)) return detail::ConcatEvenFull(hi, lo);
+#endif
+  const VFromD<D> hi_odd = detail::ConcatEvenFull(hi, hi);
+  const VFromD<D> lo_odd = detail::ConcatEvenFull(lo, lo);
+  return detail::Splice(hi_odd, lo_odd, FirstN(d, Lanes(d) / 2));
+}
+
+// ------------------------------ DemoteTo F
+
+// We already toggled HWY_NATIVE_F16C above.
+
+template <size_t N, int kPow2>
+HWY_API svfloat16_t DemoteTo(Simd<float16_t, N, kPow2> d, const svfloat32_t v) {
+  const svfloat16_t in_even = svcvt_f16_f32_x(detail::PTrue(d), v);
+  return detail::ConcatEvenFull(in_even,
+                                in_even);  // lower half
+}
+
+template <size_t N, int kPow2>
+HWY_API VBF16 DemoteTo(Simd<bfloat16_t, N, kPow2> dbf16, svfloat32_t v) {
+  const svuint16_t in_even = BitCast(ScalableTag<uint16_t>(), v);
+  return BitCast(dbf16, detail::ConcatOddFull(in_even, in_even));  // lower half
+}
+
+template <size_t N, int kPow2>
+HWY_API svfloat32_t DemoteTo(Simd<float32_t, N, kPow2> d, const svfloat64_t v) {
+  const svfloat32_t in_even = svcvt_f32_f64_x(detail::PTrue(d), v);
+  return detail::ConcatEvenFull(in_even,
+                                in_even);  // lower half
+}
+
+template <size_t N, int kPow2>
+HWY_API svint32_t DemoteTo(Simd<int32_t, N, kPow2> d, const svfloat64_t v) {
+  const svint32_t in_even = svcvt_s32_f64_x(detail::PTrue(d), v);
+  return detail::ConcatEvenFull(in_even,
+                                in_even);  // lower half
+}
+
+// ------------------------------ ConvertTo F
+
+#define HWY_SVE_CONVERT(BASE, CHAR, BITS, HALF, NAME, OP)                      \
+  /* signed integers */                                                        \
+  template <size_t N, int kPow2>                                               \
+  HWY_API HWY_SVE_V(BASE, BITS)                                                \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(int, BITS) v) {  \
+    return sv##OP##_##CHAR##BITS##_s##BITS##_x(HWY_SVE_PTRUE(BITS), v);        \
+  }                                                                            \
+  /* unsigned integers */                                                      \
+  template <size_t N, int kPow2>                                               \
+  HWY_API HWY_SVE_V(BASE, BITS)                                                \
+      NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(uint, BITS) v) { \
+    return sv##OP##_##CHAR##BITS##_u##BITS##_x(HWY_SVE_PTRUE(BITS), v);        \
+  }                                                                            \
+  /* Truncates (rounds toward zero). */                                        \
+  template <size_t N, int kPow2>                                               \
+  HWY_API HWY_SVE_V(int, BITS)                                                 \
+      NAME(HWY_SVE_D(int, BITS, N, kPow2) /* d */, HWY_SVE_V(BASE, BITS) v) {  \
+    return sv##OP##_s##BITS##_##CHAR##BITS##_x(HWY_SVE_PTRUE(BITS), v);        \
+  }
+
+// API only requires f32 but we provide f64 for use by Iota.
+HWY_SVE_FOREACH_F(HWY_SVE_CONVERT, ConvertTo, cvt)
+#undef HWY_SVE_CONVERT
+
+// ------------------------------ NearestInt (Round, ConvertTo)
+template <class VF, class DI = RebindToSigned<DFromV<VF>>>
+HWY_API VFromD<DI> NearestInt(VF v) {
+  // No single instruction, round then truncate.
+  return ConvertTo(DI(), Round(v));
+}
+
+// ------------------------------ Iota (Add, ConvertTo)
+
+#define HWY_SVE_IOTA(BASE, CHAR, BITS, HALF, NAME, OP)                        \
+  template <size_t N, int kPow2>                                              \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, \
+                                     HWY_SVE_T(BASE, BITS) first) {           \
+    return sv##OP##_##CHAR##BITS(first, 1);                                   \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_IOTA, Iota, index)
+#undef HWY_SVE_IOTA
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToSigned<D> di;
+  return detail::AddN(ConvertTo(d, Iota(di, 0)), first);
+}
+
+// ------------------------------ InterleaveLower
+
+template <class D, class V>
+HWY_API V InterleaveLower(D d, const V a, const V b) {
+  static_assert(IsSame<TFromD<D>, TFromV<V>>(), "D/V mismatch");
+#if HWY_TARGET == HWY_SVE2_128
+  (void)d;
+  return detail::ZipLowerSame(a, b);
+#else
+  // Move lower halves of blocks to lower half of vector.
+  const Repartition<uint64_t, decltype(d)> d64;
+  const auto a64 = BitCast(d64, a);
+  const auto b64 = BitCast(d64, b);
+  const auto a_blocks = detail::ConcatEvenFull(a64, a64);  // lower half
+  const auto b_blocks = detail::ConcatEvenFull(b64, b64);
+  return detail::ZipLowerSame(BitCast(d, a_blocks), BitCast(d, b_blocks));
+#endif
+}
+
+template <class V>
+HWY_API V InterleaveLower(const V a, const V b) {
+  return InterleaveLower(DFromV<V>(), a, b);
+}
+
+// ------------------------------ InterleaveUpper
+
+// Only use zip2 if vector are a powers of two, otherwise getting the actual
+// "upper half" requires MaskUpperHalf.
+#if HWY_TARGET == HWY_SVE2_128
+namespace detail {
+// Unlike Highway's ZipUpper, this returns the same type.
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGVV, ZipUpperSame, zip2)
+}  // namespace detail
+#endif
+
+// Full vector: guaranteed to have at least one block
+template <class D, class V = VFromD<D>,
+          hwy::EnableIf<detail::IsFull(D())>* = nullptr>
+HWY_API V InterleaveUpper(D d, const V a, const V b) {
+#if HWY_TARGET == HWY_SVE2_128
+  (void)d;
+  return detail::ZipUpperSame(a, b);
+#else
+  // Move upper halves of blocks to lower half of vector.
+  const Repartition<uint64_t, decltype(d)> d64;
+  const auto a64 = BitCast(d64, a);
+  const auto b64 = BitCast(d64, b);
+  const auto a_blocks = detail::ConcatOddFull(a64, a64);  // lower half
+  const auto b_blocks = detail::ConcatOddFull(b64, b64);
+  return detail::ZipLowerSame(BitCast(d, a_blocks), BitCast(d, b_blocks));
+#endif
+}
+
+// Capped/fraction: need runtime check
+template <class D, class V = VFromD<D>,
+          hwy::EnableIf<!detail::IsFull(D())>* = nullptr>
+HWY_API V InterleaveUpper(D d, const V a, const V b) {
+  // Less than one block: treat as capped
+  if (Lanes(d) * sizeof(TFromD<D>) < 16) {
+    const Half<decltype(d)> d2;
+    return InterleaveLower(d, UpperHalf(d2, a), UpperHalf(d2, b));
+  }
+  return InterleaveUpper(DFromV<V>(), a, b);
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+
+namespace detail {
+
+template <size_t kLaneSize, size_t kVectSize, class V,
+          HWY_IF_NOT_T_SIZE_V(V, 8)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x88> /*idx_3210_tag*/,
+                                  hwy::SizeTag<kLaneSize> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+
+  const auto evens = BitCast(dw, ConcatEvenFull(v, v));
+  return BitCast(d, ZipLowerSame(evens, evens));
+}
+
+template <size_t kLaneSize, size_t kVectSize, class V,
+          HWY_IF_NOT_T_SIZE_V(V, 8)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xDD> /*idx_3210_tag*/,
+                                  hwy::SizeTag<kLaneSize> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+
+  const auto odds = BitCast(dw, ConcatOddFull(v, v));
+  return BitCast(d, ZipLowerSame(odds, odds));
+}
+
+}  // namespace detail
+
+// ================================================== COMBINE
+
+namespace detail {
+
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 32:
+      return svptrue_pat_b8(SV_VL16);
+    case 16:
+      return svptrue_pat_b8(SV_VL8);
+    case 8:
+      return svptrue_pat_b8(SV_VL4);
+    case 4:
+      return svptrue_pat_b8(SV_VL2);
+    default:
+      return svptrue_pat_b8(SV_VL1);
+  }
+}
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 16:
+      return svptrue_pat_b16(SV_VL8);
+    case 8:
+      return svptrue_pat_b16(SV_VL4);
+    case 4:
+      return svptrue_pat_b16(SV_VL2);
+    default:
+      return svptrue_pat_b16(SV_VL1);
+  }
+}
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 8:
+      return svptrue_pat_b32(SV_VL4);
+    case 4:
+      return svptrue_pat_b32(SV_VL2);
+    default:
+      return svptrue_pat_b32(SV_VL1);
+  }
+}
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 4:
+      return svptrue_pat_b64(SV_VL2);
+    default:
+      return svptrue_pat_b64(SV_VL1);
+  }
+}
+#endif
+#if HWY_TARGET == HWY_SVE2_128 || HWY_IDE
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 16:
+      return svptrue_pat_b8(SV_VL8);
+    case 8:
+      return svptrue_pat_b8(SV_VL4);
+    case 4:
+      return svptrue_pat_b8(SV_VL2);
+    case 2:
+    case 1:
+    default:
+      return svptrue_pat_b8(SV_VL1);
+  }
+}
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+svbool_t MaskLowerHalf(D d) {
+  switch (Lanes(d)) {
+    case 8:
+      return svptrue_pat_b16(SV_VL4);
+    case 4:
+      return svptrue_pat_b16(SV_VL2);
+    case 2:
+    case 1:
+    default:
+      return svptrue_pat_b16(SV_VL1);
+  }
+}
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+svbool_t MaskLowerHalf(D d) {
+  return svptrue_pat_b32(Lanes(d) == 4 ? SV_VL2 : SV_VL1);
+}
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+svbool_t MaskLowerHalf(D /*d*/) {
+  return svptrue_pat_b64(SV_VL1);
+}
+#endif  // HWY_TARGET == HWY_SVE2_128
+#if HWY_TARGET != HWY_SVE_256 && HWY_TARGET != HWY_SVE2_128
+template <class D>
+svbool_t MaskLowerHalf(D d) {
+  return FirstN(d, Lanes(d) / 2);
+}
+#endif
+
+template <class D>
+svbool_t MaskUpperHalf(D d) {
+  // TODO(janwas): WHILEGE on SVE2
+  if (HWY_SVE_IS_POW2 && IsFull(d)) {
+    return Not(MaskLowerHalf(d));
+  }
+
+  // For Splice to work as intended, make sure bits above Lanes(d) are zero.
+  return AndNot(MaskLowerHalf(d), detail::MakeMask(d));
+}
+
+// Right-shift vector pair by constexpr; can be used to slide down (=N) or up
+// (=Lanes()-N).
+#define HWY_SVE_EXT(BASE, CHAR, BITS, HALF, NAME, OP)            \
+  template <size_t kIndex>                                       \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_V(BASE, BITS) hi, HWY_SVE_V(BASE, BITS) lo) { \
+    return sv##OP##_##CHAR##BITS(lo, hi, kIndex);                \
+  }
+HWY_SVE_FOREACH(HWY_SVE_EXT, Ext, ext)
+#undef HWY_SVE_EXT
+
+}  // namespace detail
+
+// ------------------------------ ConcatUpperLower
+template <class D, class V>
+HWY_API V ConcatUpperLower(const D d, const V hi, const V lo) {
+  return IfThenElse(detail::MaskLowerHalf(d), lo, hi);
+}
+
+// ------------------------------ ConcatLowerLower
+template <class D, class V>
+HWY_API V ConcatLowerLower(const D d, const V hi, const V lo) {
+  if (detail::IsFull(d)) {
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP64) && HWY_TARGET == HWY_SVE_256
+    return detail::ConcatEvenBlocks(hi, lo);
+#endif
+#if HWY_TARGET == HWY_SVE2_128
+    const Repartition<uint64_t, D> du64;
+    const auto lo64 = BitCast(du64, lo);
+    return BitCast(d, InterleaveLower(du64, lo64, BitCast(du64, hi)));
+#endif
+  }
+  return detail::Splice(hi, lo, detail::MaskLowerHalf(d));
+}
+
+// ------------------------------ ConcatLowerUpper
+template <class D, class V>
+HWY_API V ConcatLowerUpper(const D d, const V hi, const V lo) {
+#if HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128  // constexpr Lanes
+  if (detail::IsFull(d)) {
+    return detail::Ext<Lanes(d) / 2>(hi, lo);
+  }
+#endif
+  return detail::Splice(hi, lo, detail::MaskUpperHalf(d));
+}
+
+// ------------------------------ ConcatUpperUpper
+template <class D, class V>
+HWY_API V ConcatUpperUpper(const D d, const V hi, const V lo) {
+  if (detail::IsFull(d)) {
+#if defined(__ARM_FEATURE_SVE_MATMUL_FP64) && HWY_TARGET == HWY_SVE_256
+    return detail::ConcatOddBlocks(hi, lo);
+#endif
+#if HWY_TARGET == HWY_SVE2_128
+    const Repartition<uint64_t, D> du64;
+    const auto lo64 = BitCast(du64, lo);
+    return BitCast(d, InterleaveUpper(du64, lo64, BitCast(du64, hi)));
+#endif
+  }
+  const svbool_t mask_upper = detail::MaskUpperHalf(d);
+  const V lo_upper = detail::Splice(lo, lo, mask_upper);
+  return IfThenElse(mask_upper, hi, lo_upper);
+}
+
+// ------------------------------ Combine
+template <class D, class V2>
+HWY_API VFromD<D> Combine(const D d, const V2 hi, const V2 lo) {
+  return ConcatLowerLower(d, hi, lo);
+}
+
+// ------------------------------ ZeroExtendVector
+template <class D, class V>
+HWY_API V ZeroExtendVector(const D d, const V lo) {
+  return Combine(d, Zero(Half<D>()), lo);
+}
+
+// ------------------------------ Lower/UpperHalf
+
+template <class D2, class V>
+HWY_API V LowerHalf(D2 /* tag */, const V v) {
+  return v;
+}
+
+template <class V>
+HWY_API V LowerHalf(const V v) {
+  return v;
+}
+
+template <class DH, class V>
+HWY_API V UpperHalf(const DH dh, const V v) {
+  const Twice<decltype(dh)> d;
+  // Cast so that we support bfloat16_t.
+  const RebindToUnsigned<decltype(d)> du;
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+#if HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128  // constexpr Lanes
+  return BitCast(d, detail::Ext<Lanes(dh)>(vu, vu));
+#else
+  const MFromD<decltype(du)> mask = detail::MaskUpperHalf(du);
+  return BitCast(d, detail::Splice(vu, vu, mask));
+#endif
+}
+
+// ================================================== REDUCE
+
+// These return T, whereas the Highway op returns a broadcasted vector.
+namespace detail {
+#define HWY_SVE_REDUCE_ADD(BASE, CHAR, BITS, HALF, NAME, OP)                   \
+  HWY_API HWY_SVE_T(BASE, BITS) NAME(svbool_t pg, HWY_SVE_V(BASE, BITS) v) {   \
+    /* The intrinsic returns [u]int64_t; truncate to T so we can broadcast. */ \
+    using T = HWY_SVE_T(BASE, BITS);                                           \
+    using TU = MakeUnsigned<T>;                                                \
+    constexpr uint64_t kMask = LimitsMax<TU>();                                \
+    return static_cast<T>(static_cast<TU>(                                     \
+        static_cast<uint64_t>(sv##OP##_##CHAR##BITS(pg, v)) & kMask));         \
+  }
+
+#define HWY_SVE_REDUCE(BASE, CHAR, BITS, HALF, NAME, OP)                     \
+  HWY_API HWY_SVE_T(BASE, BITS) NAME(svbool_t pg, HWY_SVE_V(BASE, BITS) v) { \
+    return sv##OP##_##CHAR##BITS(pg, v);                                     \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_REDUCE_ADD, SumOfLanesM, addv)
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, SumOfLanesM, addv)
+
+HWY_SVE_FOREACH_UI(HWY_SVE_REDUCE, MinOfLanesM, minv)
+HWY_SVE_FOREACH_UI(HWY_SVE_REDUCE, MaxOfLanesM, maxv)
+// NaN if all are
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, MinOfLanesM, minnmv)
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, MaxOfLanesM, maxnmv)
+
+#undef HWY_SVE_REDUCE
+#undef HWY_SVE_REDUCE_ADD
+}  // namespace detail
+
+template <class D, class V>
+V SumOfLanes(D d, V v) {
+  return Set(d, detail::SumOfLanesM(detail::MakeMask(d), v));
+}
+
+template <class D, class V>
+TFromV<V> ReduceSum(D d, V v) {
+  return detail::SumOfLanesM(detail::MakeMask(d), v);
+}
+
+template <class D, class V>
+V MinOfLanes(D d, V v) {
+  return Set(d, detail::MinOfLanesM(detail::MakeMask(d), v));
+}
+
+template <class D, class V>
+V MaxOfLanes(D d, V v) {
+  return Set(d, detail::MaxOfLanesM(detail::MakeMask(d), v));
+}
+
+// ================================================== SWIZZLE
+
+// ------------------------------ GetLane
+
+namespace detail {
+#define HWY_SVE_GET_LANE(BASE, CHAR, BITS, HALF, NAME, OP) \
+  HWY_INLINE HWY_SVE_T(BASE, BITS)                         \
+      NAME(HWY_SVE_V(BASE, BITS) v, svbool_t mask) {       \
+    return sv##OP##_##CHAR##BITS(mask, v);                 \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_GET_LANE, GetLaneM, lasta)
+HWY_SVE_FOREACH(HWY_SVE_GET_LANE, ExtractLastMatchingLaneM, lastb)
+#undef HWY_SVE_GET_LANE
+}  // namespace detail
+
+template <class V>
+HWY_API TFromV<V> GetLane(V v) {
+  return detail::GetLaneM(v, detail::PFalse());
+}
+
+// ------------------------------ ExtractLane
+template <class V>
+HWY_API TFromV<V> ExtractLane(V v, size_t i) {
+  return detail::GetLaneM(v, FirstN(DFromV<V>(), i));
+}
+
+// ------------------------------ InsertLane (IfThenElse)
+template <class V>
+HWY_API V InsertLane(const V v, size_t i, TFromV<V> t) {
+  const DFromV<V> d;
+  const auto is_i = detail::EqN(Iota(d, 0), static_cast<TFromV<V>>(i));
+  return IfThenElse(RebindMask(d, is_i), Set(d, t), v);
+}
+
+// ------------------------------ DupEven
+
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGVV, InterleaveEven, trn1)
+}  // namespace detail
+
+template <class V>
+HWY_API V DupEven(const V v) {
+  return detail::InterleaveEven(v, v);
+}
+
+// ------------------------------ DupOdd
+
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGVV, InterleaveOdd, trn2)
+}  // namespace detail
+
+template <class V>
+HWY_API V DupOdd(const V v) {
+  return detail::InterleaveOdd(v, v);
+}
+
+// ------------------------------ OddEven
+
+#if HWY_SVE_HAVE_2
+
+#define HWY_SVE_ODD_EVEN(BASE, CHAR, BITS, HALF, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS)                                     \
+      NAME(HWY_SVE_V(BASE, BITS) odd, HWY_SVE_V(BASE, BITS) even) { \
+    return sv##OP##_##CHAR##BITS(even, odd, /*xor=*/0);             \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_ODD_EVEN, OddEven, eortb_n)
+#undef HWY_SVE_ODD_EVEN
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V OddEven(const V odd, const V even) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, OddEven(BitCast(du, odd), BitCast(du, even)));
+}
+
+#else
+
+template <class V>
+HWY_API V OddEven(const V odd, const V even) {
+  const auto odd_in_even = detail::Ext<1>(odd, odd);
+  return detail::InterleaveEven(even, odd_in_even);
+}
+
+#endif  // HWY_TARGET
+
+// ------------------------------ OddEvenBlocks
+template <class V>
+HWY_API V OddEvenBlocks(const V odd, const V even) {
+  const DFromV<V> d;
+#if HWY_TARGET == HWY_SVE_256
+  return ConcatUpperLower(d, odd, even);
+#elif HWY_TARGET == HWY_SVE2_128
+  (void)odd;
+  (void)d;
+  return even;
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  constexpr size_t kShift = CeilLog2(16 / sizeof(TU));
+  const auto idx_block = ShiftRight<kShift>(Iota(du, 0));
+  const auto lsb = detail::AndN(idx_block, static_cast<TU>(1));
+  const svbool_t is_even = detail::EqN(lsb, static_cast<TU>(0));
+  return IfThenElse(is_even, even, odd);
+#endif
+}
+
+// ------------------------------ TableLookupLanes
+
+template <class D, class VI>
+HWY_API VFromD<RebindToUnsigned<D>> IndicesFromVec(D d, VI vec) {
+  using TI = TFromV<VI>;
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index/lane size mismatch");
+  const RebindToUnsigned<D> du;
+  const auto indices = BitCast(du, vec);
+#if HWY_IS_DEBUG_BUILD
+  using TU = MakeUnsigned<TI>;
+  const size_t twice_max_lanes = Lanes(d) * 2;
+  HWY_DASSERT(AllTrue(
+      du, Eq(indices,
+             detail::AndN(indices, static_cast<TU>(twice_max_lanes - 1)))));
+#else
+  (void)d;
+#endif
+  return indices;
+}
+
+template <class D, typename TI>
+HWY_API VFromD<RebindToUnsigned<D>> SetTableIndices(D d, const TI* idx) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+  return IndicesFromVec(d, LoadU(Rebind<TI, D>(), idx));
+}
+
+#define HWY_SVE_TABLE(BASE, CHAR, BITS, HALF, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(uint, BITS) idx) { \
+    return sv##OP##_##CHAR##BITS(v, idx);                        \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_TABLE, TableLookupLanes, tbl)
+#undef HWY_SVE_TABLE
+
+#if HWY_SVE_HAVE_2
+namespace detail {
+#define HWY_SVE_TABLE2(BASE, CHAR, BITS, HALF, NAME, OP)                    \
+  HWY_API HWY_SVE_V(BASE, BITS)                                             \
+      NAME(HWY_SVE_TUPLE(BASE, BITS, 2) tuple, HWY_SVE_V(uint, BITS) idx) { \
+    return sv##OP##_##CHAR##BITS(tuple, idx);                               \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_TABLE2, NativeTwoTableLookupLanes, tbl2)
+#undef HWY_SVE_TABLE
+}  // namespace detail
+#endif  // HWY_SVE_HAVE_2
+
+template <class D>
+HWY_API VFromD<D> TwoTablesLookupLanes(D d, VFromD<D> a, VFromD<D> b,
+                                       VFromD<RebindToUnsigned<D>> idx) {
+  // SVE2 has an instruction for this, but it only works for full 2^n vectors.
+#if HWY_SVE_HAVE_2 && HWY_SVE_IS_POW2
+  if (detail::IsFull(d)) {
+    return detail::NativeTwoTableLookupLanes(Create2(d, a, b), idx);
+  }
+#endif
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  const size_t num_of_lanes = Lanes(d);
+  const auto idx_mod = detail::AndN(idx, static_cast<TU>(num_of_lanes - 1));
+  const auto sel_a_mask = Eq(idx, idx_mod);
+
+  const auto a_lookup_result = TableLookupLanes(a, idx_mod);
+  const auto b_lookup_result = TableLookupLanes(b, idx_mod);
+  return IfThenElse(sel_a_mask, a_lookup_result, b_lookup_result);
+}
+
+template <class V>
+HWY_API V TwoTablesLookupLanes(V a, V b,
+                               VFromD<RebindToUnsigned<DFromV<V>>> idx) {
+  const DFromV<decltype(a)> d;
+  return TwoTablesLookupLanes(d, a, b, idx);
+}
+
+// ------------------------------ SwapAdjacentBlocks (TableLookupLanes)
+
+namespace detail {
+
+template <typename T, size_t N, int kPow2>
+constexpr size_t LanesPerBlock(Simd<T, N, kPow2> d) {
+  // We might have a capped vector smaller than a block, so honor that.
+  return HWY_MIN(16 / sizeof(T), MaxLanes(d));
+}
+
+}  // namespace detail
+
+template <class V>
+HWY_API V SwapAdjacentBlocks(const V v) {
+  const DFromV<V> d;
+#if HWY_TARGET == HWY_SVE_256
+  return ConcatLowerUpper(d, v, v);
+#elif HWY_TARGET == HWY_SVE2_128
+  (void)d;
+  return v;
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr auto kLanesPerBlock =
+      static_cast<TFromD<decltype(du)>>(detail::LanesPerBlock(d));
+  const VFromD<decltype(du)> idx = detail::XorN(Iota(du, 0), kLanesPerBlock);
+  return TableLookupLanes(v, idx);
+#endif
+}
+
+// ------------------------------ Reverse
+
+namespace detail {
+
+#define HWY_SVE_REVERSE(BASE, CHAR, BITS, HALF, NAME, OP)       \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return sv##OP##_##CHAR##BITS(v);                            \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_REVERSE, ReverseFull, rev)
+#undef HWY_SVE_REVERSE
+
+}  // namespace detail
+
+template <class D, class V>
+HWY_API V Reverse(D d, V v) {
+  using T = TFromD<D>;
+  const auto reversed = detail::ReverseFull(v);
+  if (HWY_SVE_IS_POW2 && detail::IsFull(d)) return reversed;
+  // Shift right to remove extra (non-pow2 and remainder) lanes.
+  // TODO(janwas): on SVE2, use WHILEGE.
+  // Avoids FirstN truncating to the return vector size. Must also avoid Not
+  // because that is limited to SV_POW2.
+  const ScalableTag<T> dfull;
+  const svbool_t all_true = detail::AllPTrue(dfull);
+  const size_t all_lanes = detail::AllHardwareLanes<T>();
+  const size_t want_lanes = Lanes(d);
+  HWY_DASSERT(want_lanes <= all_lanes);
+  const svbool_t mask =
+      svnot_b_z(all_true, FirstN(dfull, all_lanes - want_lanes));
+  return detail::Splice(reversed, reversed, mask);
+}
+
+// ------------------------------ Reverse2
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+  return BitCast(d, svrevb_u16_x(detail::PTrue(d), BitCast(dw, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+  return BitCast(d, svrevh_u32_x(detail::PTrue(d), BitCast(dw, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+  return BitCast(d, svrevw_u64_x(detail::PTrue(d), BitCast(dw, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {  // 3210
+#if HWY_TARGET == HWY_SVE2_128
+  if (detail::IsFull(d)) {
+    return detail::Ext<1>(v, v);
+  }
+#endif
+  (void)d;
+  const auto odd_in_even = detail::Ext<1>(v, v);  // x321
+  return detail::InterleaveEven(odd_in_even, v);  // 2301
+}
+
+// ------------------------------ Reverse4 (TableLookupLanes)
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<RepartitionToWide<decltype(du)>> du32;
+  return BitCast(d, svrevb_u32_x(detail::PTrue(d), BitCast(du32, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<RepartitionToWide<decltype(du)>> du64;
+  return BitCast(d, svrevh_u64_x(detail::PTrue(d), BitCast(du64, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  if (HWY_TARGET == HWY_SVE2_128 && detail::IsFull(d)) {
+    return detail::ReverseFull(v);
+  }
+  // TODO(janwas): is this approach faster than Shuffle0123?
+  const RebindToUnsigned<decltype(d)> du;
+  const auto idx = detail::XorN(Iota(du, 0), 3);
+  return TableLookupLanes(v, idx);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  if (HWY_TARGET == HWY_SVE_256 && detail::IsFull(d)) {
+    return detail::ReverseFull(v);
+  }
+  // TODO(janwas): is this approach faster than Shuffle0123?
+  const RebindToUnsigned<decltype(d)> du;
+  const auto idx = detail::XorN(Iota(du, 0), 3);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Reverse8 (TableLookupLanes)
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d, svrevb_u64_x(detail::PTrue(d), BitCast(du64, v)));
+}
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const auto idx = detail::XorN(Iota(du, 0), 7);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------- ReverseBits
+
+#ifdef HWY_NATIVE_REVERSE_BITS_UI8
+#undef HWY_NATIVE_REVERSE_BITS_UI8
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI8
+#endif
+
+#ifdef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#undef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#endif
+
+#define HWY_SVE_REVERSE_BITS(BASE, CHAR, BITS, HALF, NAME, OP)  \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    const DFromV<decltype(v)> d;                                \
+    return sv##OP##_##CHAR##BITS##_x(detail::PTrue(d), v);      \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_REVERSE_BITS, ReverseBits, rbit)
+#undef HWY_SVE_REVERSE_BITS
+
+// ------------------------------ SlideUpLanes
+
+template <class D>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  return detail::Splice(v, Zero(d), FirstN(d, amt));
+}
+
+// ------------------------------ Slide1Up
+
+#ifdef HWY_NATIVE_SLIDE1_UP_DOWN
+#undef HWY_NATIVE_SLIDE1_UP_DOWN
+#else
+#define HWY_NATIVE_SLIDE1_UP_DOWN
+#endif
+
+template <class D>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  return SlideUpLanes(d, v, 1);
+}
+
+// ------------------------------ SlideDownLanes (TableLookupLanes)
+
+template <class D>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const auto idx = Iota(du, static_cast<TU>(amt));
+  return IfThenElseZero(FirstN(d, Lanes(d) - amt), TableLookupLanes(v, idx));
+}
+
+// ------------------------------ Slide1Down
+
+template <class D>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  return SlideDownLanes(d, v, 1);
+}
+
+// ------------------------------ Block insert/extract/broadcast ops
+#if HWY_TARGET != HWY_SVE2_128
+
+#ifdef HWY_NATIVE_BLK_INSERT_EXTRACT
+#undef HWY_NATIVE_BLK_INSERT_EXTRACT
+#else
+#define HWY_NATIVE_BLK_INSERT_EXTRACT
+#endif
+
+template <int kBlockIdx, class V>
+HWY_API V InsertBlock(V v, V blk_to_insert) {
+  const DFromV<decltype(v)> d;
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+
+#if HWY_TARGET == HWY_SVE_256
+  return (kBlockIdx == 0) ? ConcatUpperLower(d, v, blk_to_insert)
+                          : ConcatLowerLower(d, blk_to_insert, v);
+#else
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+
+  constexpr size_t kBlockOffset =
+      static_cast<size_t>(kBlockIdx) * kLanesPerBlock;
+  const auto splice_mask = FirstN(d, kBlockOffset);
+  const auto sel_lo_mask = FirstN(d, kBlockOffset + kLanesPerBlock);
+
+  const auto splice_result = detail::Splice(blk_to_insert, v, splice_mask);
+  return IfThenElse(sel_lo_mask, splice_result, v);
+#endif
+}
+
+template <int kBlockIdx, class V>
+HWY_API V ExtractBlock(V v) {
+  const DFromV<decltype(v)> d;
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+
+  if (kBlockIdx == 0) return v;
+
+#if HWY_TARGET == HWY_SVE_256
+  return UpperHalf(Half<decltype(d)>(), v);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  constexpr size_t kBlockOffset =
+      static_cast<size_t>(kBlockIdx) * kLanesPerBlock;
+  const auto splice_mask =
+      RebindMask(d, detail::LtN(Iota(du, static_cast<TU>(0u - kBlockOffset)),
+                                static_cast<TU>(kLanesPerBlock)));
+  return detail::Splice(v, v, splice_mask);
+#endif
+}
+
+template <int kBlockIdx, class V>
+HWY_API V BroadcastBlock(V v) {
+  const DFromV<decltype(v)> d;
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+
+#if HWY_TARGET == HWY_SVE_256
+  return (kBlockIdx == 0) ? ConcatLowerLower(d, v, v)
+                          : ConcatUpperUpper(d, v, v);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  constexpr size_t kBlockOffset =
+      static_cast<size_t>(kBlockIdx) * kLanesPerBlock;
+
+  const auto idx = detail::AddN(
+      detail::AndN(Iota(du, TU{0}), static_cast<TU>(kLanesPerBlock - 1)),
+      static_cast<TU>(kBlockOffset));
+  return TableLookupLanes(v, idx);
+#endif
+}
+
+#endif  // HWY_TARGET != HWY_SVE2_128
+
+// ------------------------------ Compress (PromoteTo)
+
+template <typename T>
+struct CompressIsPartition {
+#if HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
+  // Optimization for 64-bit lanes (could also be applied to 32-bit, but that
+  // requires a larger table).
+  enum { value = (sizeof(T) == 8) };
+#else
+  enum { value = 0 };
+#endif  // HWY_TARGET == HWY_SVE_256
+};
+
+#define HWY_SVE_COMPRESS(BASE, CHAR, BITS, HALF, NAME, OP)                     \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v, svbool_t mask) { \
+    return sv##OP##_##CHAR##BITS(mask, v);                                     \
+  }
+
+#if HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
+HWY_SVE_FOREACH_UI32(HWY_SVE_COMPRESS, Compress, compact)
+HWY_SVE_FOREACH_F32(HWY_SVE_COMPRESS, Compress, compact)
+#else
+HWY_SVE_FOREACH_UIF3264(HWY_SVE_COMPRESS, Compress, compact)
+#endif
+#undef HWY_SVE_COMPRESS
+
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_API V Compress(V v, svbool_t mask) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du64;
+
+  // Convert mask into bitfield via horizontal sum (faster than ORV) of masked
+  // bits 1, 2, 4, 8. Pre-multiply by N so we can use it as an offset for
+  // SetTableIndices.
+  const svuint64_t bits = Shl(Set(du64, 1), Iota(du64, 2));
+  const size_t offset = detail::SumOfLanesM(mask, bits);
+
+  // See CompressIsPartition.
+  alignas(16) static constexpr uint64_t table[4 * 16] = {
+      // PrintCompress64x4Tables
+      0, 1, 2, 3, 0, 1, 2, 3, 1, 0, 2, 3, 0, 1, 2, 3, 2, 0, 1, 3, 0, 2,
+      1, 3, 1, 2, 0, 3, 0, 1, 2, 3, 3, 0, 1, 2, 0, 3, 1, 2, 1, 3, 0, 2,
+      0, 1, 3, 2, 2, 3, 0, 1, 0, 2, 3, 1, 1, 2, 3, 0, 0, 1, 2, 3};
+  return TableLookupLanes(v, SetTableIndices(d, table + offset));
+}
+
+#endif  // HWY_TARGET == HWY_SVE_256
+#if HWY_TARGET == HWY_SVE2_128 || HWY_IDE
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_API V Compress(V v, svbool_t mask) {
+  // If mask == 10: swap via splice. A mask of 00 or 11 leaves v unchanged, 10
+  // swaps upper/lower (the lower half is set to the upper half, and the
+  // remaining upper half is filled from the lower half of the second v), and
+  // 01 is invalid because it would ConcatLowerLower. zip1 and AndNot keep 10
+  // unchanged and map everything else to 00.
+  const svbool_t maskLL = svzip1_b64(mask, mask);  // broadcast lower lane
+  return detail::Splice(v, v, AndNot(maskLL, mask));
+}
+
+#endif  // HWY_TARGET == HWY_SVE2_128
+
+template <class V, HWY_IF_T_SIZE_V(V, 2)>
+HWY_API V Compress(V v, svbool_t mask16) {
+  static_assert(!IsSame<V, svfloat16_t>(), "Must use overload");
+  const DFromV<V> d16;
+
+  // Promote vector and mask to 32-bit
+  const RepartitionToWide<decltype(d16)> dw;
+  const auto v32L = PromoteTo(dw, v);
+  const auto v32H = detail::PromoteUpperTo(dw, v);
+  const svbool_t mask32L = svunpklo_b(mask16);
+  const svbool_t mask32H = svunpkhi_b(mask16);
+
+  const auto compressedL = Compress(v32L, mask32L);
+  const auto compressedH = Compress(v32H, mask32H);
+
+  // Demote to 16-bit (already in range) - separately so we can splice
+  const V evenL = BitCast(d16, compressedL);
+  const V evenH = BitCast(d16, compressedH);
+  const V v16L = detail::ConcatEvenFull(evenL, evenL);  // lower half
+  const V v16H = detail::ConcatEvenFull(evenH, evenH);
+
+  // We need to combine two vectors of non-constexpr length, so the only option
+  // is Splice, which requires us to synthesize a mask. NOTE: this function uses
+  // full vectors (SV_ALL instead of SV_POW2), hence we need unmasked svcnt.
+  const size_t countL = detail::CountTrueFull(dw, mask32L);
+  const auto compressed_maskL = FirstN(d16, countL);
+  return detail::Splice(v16H, v16L, compressed_maskL);
+}
+
+// Must treat float16_t as integers so we can ConcatEven.
+HWY_API svfloat16_t Compress(svfloat16_t v, svbool_t mask16) {
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+  return BitCast(df, Compress(BitCast(di, v), mask16));
+}
+
+// ------------------------------ CompressNot
+
+// 2 or 4 bytes
+template <class V, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4))>
+HWY_API V CompressNot(V v, const svbool_t mask) {
+  return Compress(v, Not(mask));
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_API V CompressNot(V v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE2_128 || HWY_IDE
+  // If mask == 01: swap via splice. A mask of 00 or 11 leaves v unchanged, 10
+  // swaps upper/lower (the lower half is set to the upper half, and the
+  // remaining upper half is filled from the lower half of the second v), and
+  // 01 is invalid because it would ConcatLowerLower. zip1 and AndNot map
+  // 01 to 10, and everything else to 00.
+  const svbool_t maskLL = svzip1_b64(mask, mask);  // broadcast lower lane
+  return detail::Splice(v, v, AndNot(mask, maskLL));
+#endif
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du64;
+
+  // Convert mask into bitfield via horizontal sum (faster than ORV) of masked
+  // bits 1, 2, 4, 8. Pre-multiply by N so we can use it as an offset for
+  // SetTableIndices.
+  const svuint64_t bits = Shl(Set(du64, 1), Iota(du64, 2));
+  const size_t offset = detail::SumOfLanesM(mask, bits);
+
+  // See CompressIsPartition.
+  alignas(16) static constexpr uint64_t table[4 * 16] = {
+      // PrintCompressNot64x4Tables
+      0, 1, 2, 3, 1, 2, 3, 0, 0, 2, 3, 1, 2, 3, 0, 1, 0, 1, 3, 2, 1, 3,
+      0, 2, 0, 3, 1, 2, 3, 0, 1, 2, 0, 1, 2, 3, 1, 2, 0, 3, 0, 2, 1, 3,
+      2, 0, 1, 3, 0, 1, 2, 3, 1, 0, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+  return TableLookupLanes(v, SetTableIndices(d, table + offset));
+#endif  // HWY_TARGET == HWY_SVE_256
+
+  return Compress(v, Not(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API svuint64_t CompressBlocksNot(svuint64_t v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE2_128
+  (void)mask;
+  return v;
+#endif
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+  uint64_t bits = 0;           // predicate reg is 32-bit
+  CopyBytes<4>(&mask, &bits);  // not same size - 64-bit more efficient
+  // Concatenate LSB for upper and lower blocks, pre-scale by 4 for table idx.
+  const size_t offset = ((bits & 1) ? 4u : 0u) + ((bits & 0x10000) ? 8u : 0u);
+  // See CompressIsPartition. Manually generated; flip halves if mask = [0, 1].
+  alignas(16) static constexpr uint64_t table[4 * 4] = {0, 1, 2, 3, 2, 3, 0, 1,
+                                                        0, 1, 2, 3, 0, 1, 2, 3};
+  const ScalableTag<uint64_t> d;
+  return TableLookupLanes(v, SetTableIndices(d, table + offset));
+#endif
+
+  return CompressNot(v, mask);
+}
+
+// ------------------------------ CompressStore
+template <class V, class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(const V v, const svbool_t mask, const D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  StoreU(Compress(v, mask), d, unaligned);
+  return CountTrue(d, mask);
+}
+
+// ------------------------------ CompressBlendedStore
+template <class V, class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(const V v, const svbool_t mask, const D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const size_t count = CountTrue(d, mask);
+  const svbool_t store_mask = FirstN(d, count);
+  BlendedStore(Compress(v, mask), store_mask, d, unaligned);
+  return count;
+}
+
+// ================================================== MASK (2)
+
+// ------------------------------ FindKnownLastTrue
+template <class D>
+HWY_API size_t FindKnownLastTrue(D d, svbool_t m) {
+  const RebindToUnsigned<decltype(d)> du;
+  return static_cast<size_t>(detail::ExtractLastMatchingLaneM(
+      Iota(du, 0), And(m, detail::MakeMask(d))));
+}
+
+// ------------------------------ FindLastTrue
+template <class D>
+HWY_API intptr_t FindLastTrue(D d, svbool_t m) {
+  return AllFalse(d, m) ? intptr_t{-1}
+                        : static_cast<intptr_t>(FindKnownLastTrue(d, m));
+}
+
+// ================================================== BLOCKWISE
+
+// ------------------------------ CombineShiftRightBytes
+
+// Prevent accidentally using these for 128-bit vectors - should not be
+// necessary.
+#if HWY_TARGET != HWY_SVE2_128
+namespace detail {
+
+// For x86-compatible behaviour mandated by Highway API: TableLookupBytes
+// offsets are implicitly relative to the start of their 128-bit block.
+template <class D, class V>
+HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0) {
+  using T = MakeUnsigned<TFromD<D>>;
+  return detail::AndNotN(static_cast<T>(LanesPerBlock(d) - 1), iota0);
+}
+
+template <size_t kLanes, class D, HWY_IF_T_SIZE_D(D, 1)>
+svbool_t FirstNPerBlock(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(du);
+  const svuint8_t idx_mod =
+      svdupq_n_u8(0 % kLanesPerBlock, 1 % kLanesPerBlock, 2 % kLanesPerBlock,
+                  3 % kLanesPerBlock, 4 % kLanesPerBlock, 5 % kLanesPerBlock,
+                  6 % kLanesPerBlock, 7 % kLanesPerBlock, 8 % kLanesPerBlock,
+                  9 % kLanesPerBlock, 10 % kLanesPerBlock, 11 % kLanesPerBlock,
+                  12 % kLanesPerBlock, 13 % kLanesPerBlock, 14 % kLanesPerBlock,
+                  15 % kLanesPerBlock);
+  return detail::LtN(BitCast(du, idx_mod), kLanes);
+}
+template <size_t kLanes, class D, HWY_IF_T_SIZE_D(D, 2)>
+svbool_t FirstNPerBlock(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(du);
+  const svuint16_t idx_mod =
+      svdupq_n_u16(0 % kLanesPerBlock, 1 % kLanesPerBlock, 2 % kLanesPerBlock,
+                   3 % kLanesPerBlock, 4 % kLanesPerBlock, 5 % kLanesPerBlock,
+                   6 % kLanesPerBlock, 7 % kLanesPerBlock);
+  return detail::LtN(BitCast(du, idx_mod), kLanes);
+}
+template <size_t kLanes, class D, HWY_IF_T_SIZE_D(D, 4)>
+svbool_t FirstNPerBlock(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(du);
+  const svuint32_t idx_mod =
+      svdupq_n_u32(0 % kLanesPerBlock, 1 % kLanesPerBlock, 2 % kLanesPerBlock,
+                   3 % kLanesPerBlock);
+  return detail::LtN(BitCast(du, idx_mod), kLanes);
+}
+template <size_t kLanes, class D, HWY_IF_T_SIZE_D(D, 8)>
+svbool_t FirstNPerBlock(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(du);
+  const svuint64_t idx_mod =
+      svdupq_n_u64(0 % kLanesPerBlock, 1 % kLanesPerBlock);
+  return detail::LtN(BitCast(du, idx_mod), kLanes);
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_SVE2_128
+
+template <size_t kBytes, class D, class V = VFromD<D>>
+HWY_API V CombineShiftRightBytes(const D d, const V hi, const V lo) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  const auto hi8 = BitCast(d8, hi);
+  const auto lo8 = BitCast(d8, lo);
+#if HWY_TARGET == HWY_SVE2_128
+  return BitCast(d, detail::Ext<kBytes>(hi8, lo8));
+#else
+  const auto hi_up = detail::Splice(hi8, hi8, FirstN(d8, 16 - kBytes));
+  const auto lo_down = detail::Ext<kBytes>(lo8, lo8);
+  const svbool_t is_lo = detail::FirstNPerBlock<16 - kBytes>(d8);
+  return BitCast(d, IfThenElse(is_lo, lo_down, hi_up));
+#endif
+}
+
+// ------------------------------ Shuffle2301
+template <class V>
+HWY_API V Shuffle2301(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  return Reverse2(d, v);
+}
+
+// ------------------------------ Shuffle2103
+template <class V>
+HWY_API V Shuffle2103(const V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  const svuint8_t v8 = BitCast(d8, v);
+  return BitCast(d, CombineShiftRightBytes<12>(d8, v8, v8));
+}
+
+// ------------------------------ Shuffle0321
+template <class V>
+HWY_API V Shuffle0321(const V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  const svuint8_t v8 = BitCast(d8, v);
+  return BitCast(d, CombineShiftRightBytes<4>(d8, v8, v8));
+}
+
+// ------------------------------ Shuffle1032
+template <class V>
+HWY_API V Shuffle1032(const V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  const svuint8_t v8 = BitCast(d8, v);
+  return BitCast(d, CombineShiftRightBytes<8>(d8, v8, v8));
+}
+
+// ------------------------------ Shuffle01
+template <class V>
+HWY_API V Shuffle01(const V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  static_assert(sizeof(TFromD<decltype(d)>) == 8, "Defined for 64-bit types");
+  const svuint8_t v8 = BitCast(d8, v);
+  return BitCast(d, CombineShiftRightBytes<8>(d8, v8, v8));
+}
+
+// ------------------------------ Shuffle0123
+template <class V>
+HWY_API V Shuffle0123(const V v) {
+  return Shuffle2301(Shuffle1032(v));
+}
+
+// ------------------------------ ReverseBlocks (Reverse, Shuffle01)
+template <class D, class V = VFromD<D>>
+HWY_API V ReverseBlocks(D d, V v) {
+#if HWY_TARGET == HWY_SVE_256
+  if (detail::IsFull(d)) {
+    return SwapAdjacentBlocks(v);
+  } else if (detail::IsFull(Twice<D>())) {
+    return v;
+  }
+#elif HWY_TARGET == HWY_SVE2_128
+  (void)d;
+  return v;
+#endif
+  const Repartition<uint64_t, D> du64;
+  return BitCast(d, Shuffle01(Reverse(du64, BitCast(du64, v))));
+}
+
+// ------------------------------ TableLookupBytes
+
+template <class V, class VI>
+HWY_API VI TableLookupBytes(const V v, const VI idx) {
+  const DFromV<VI> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+#if HWY_TARGET == HWY_SVE2_128
+  return BitCast(d, TableLookupLanes(BitCast(du8, v), BitCast(du8, idx)));
+#else
+  const auto offsets128 = detail::OffsetsOf128BitBlocks(du8, Iota(du8, 0));
+  const auto idx8 = Add(BitCast(du8, idx), offsets128);
+  return BitCast(d, TableLookupLanes(BitCast(du8, v), idx8));
+#endif
+}
+
+template <class V, class VI>
+HWY_API VI TableLookupBytesOr0(const V v, const VI idx) {
+  const DFromV<VI> d;
+  // Mask size must match vector type, so cast everything to this type.
+  const Repartition<int8_t, decltype(d)> di8;
+
+  auto idx8 = BitCast(di8, idx);
+  const auto msb = detail::LtN(idx8, 0);
+
+  const auto lookup = TableLookupBytes(BitCast(di8, v), idx8);
+  return BitCast(d, IfThenZeroElse(msb, lookup));
+}
+
+// ------------------------------ Broadcast
+
+#ifdef HWY_NATIVE_BROADCASTLANE
+#undef HWY_NATIVE_BROADCASTLANE
+#else
+#define HWY_NATIVE_BROADCASTLANE
+#endif
+
+namespace detail {
+#define HWY_SVE_BROADCAST(BASE, CHAR, BITS, HALF, NAME, OP)        \
+  template <int kLane>                                             \
+  HWY_INLINE HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return sv##OP##_##CHAR##BITS(v, kLane);                        \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_BROADCAST, BroadcastLane, dup_lane)
+#undef HWY_SVE_BROADCAST
+}  // namespace detail
+
+template <int kLane, class V>
+HWY_API V Broadcast(const V v) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(du);
+  static_assert(0 <= kLane && kLane < kLanesPerBlock, "Invalid lane");
+#if HWY_TARGET == HWY_SVE2_128
+  return detail::BroadcastLane<kLane>(v);
+#else
+  auto idx = detail::OffsetsOf128BitBlocks(du, Iota(du, 0));
+  if (kLane != 0) {
+    idx = detail::AddN(idx, kLane);
+  }
+  return TableLookupLanes(v, idx);
+#endif
+}
+
+template <int kLane, class V>
+HWY_API V BroadcastLane(const V v) {
+  static_assert(0 <= kLane && kLane < HWY_MAX_LANES_V(V), "Invalid lane");
+  return detail::BroadcastLane<kLane>(v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+template <size_t kLanes, class D, class V = VFromD<D>>
+HWY_API V ShiftLeftLanes(D d, const V v) {
+  const auto zero = Zero(d);
+  const auto shifted = detail::Splice(v, zero, FirstN(d, kLanes));
+#if HWY_TARGET == HWY_SVE2_128
+  return shifted;
+#else
+  // Match x86 semantics by zeroing lower lanes in 128-bit blocks
+  return IfThenElse(detail::FirstNPerBlock<kLanes>(d), zero, shifted);
+#endif
+}
+
+template <size_t kLanes, class V>
+HWY_API V ShiftLeftLanes(const V v) {
+  return ShiftLeftLanes<kLanes>(DFromV<V>(), v);
+}
+
+// ------------------------------ ShiftRightLanes
+template <size_t kLanes, class D, class V = VFromD<D>>
+HWY_API V ShiftRightLanes(D d, V v) {
+  // For capped/fractional vectors, clear upper lanes so we shift in zeros.
+  if (!detail::IsFull(d)) {
+    v = IfThenElseZero(detail::MakeMask(d), v);
+  }
+
+#if HWY_TARGET == HWY_SVE2_128
+  return detail::Ext<kLanes>(Zero(d), v);
+#else
+  const auto shifted = detail::Ext<kLanes>(v, v);
+  // Match x86 semantics by zeroing upper lanes in 128-bit blocks
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  const svbool_t mask = detail::FirstNPerBlock<kLanesPerBlock - kLanes>(d);
+  return IfThenElseZero(mask, shifted);
+#endif
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class D, class V = VFromD<D>>
+HWY_API V ShiftLeftBytes(const D d, const V v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftLanes<kBytes>(BitCast(d8, v)));
+}
+
+template <int kBytes, class V>
+HWY_API V ShiftLeftBytes(const V v) {
+  return ShiftLeftBytes<kBytes>(DFromV<V>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, class V = VFromD<D>>
+HWY_API V ShiftRightBytes(const D d, const V v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftRightLanes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ ZipLower
+
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  const RepartitionToNarrow<DW> dn;
+  static_assert(IsSame<TFromD<decltype(dn)>, TFromV<V>>(), "D/V mismatch");
+  return BitCast(dw, InterleaveLower(dn, a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(const V a, const V b) {
+  return BitCast(DW(), InterleaveLower(D(), a, b));
+}
+
+// ------------------------------ ZipUpper
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  const RepartitionToNarrow<DW> dn;
+  static_assert(IsSame<TFromD<decltype(dn)>, TFromV<V>>(), "D/V mismatch");
+  return BitCast(dw, InterleaveUpper(dn, a, b));
+}
+
+// ================================================== Ops with dependencies
+
+// ------------------------------ PromoteTo bfloat16 (ZipLower)
+template <size_t N, int kPow2>
+HWY_API svfloat32_t PromoteTo(Simd<float32_t, N, kPow2> df32, VBF16 v) {
+  const ScalableTag<uint16_t> du16;
+  return BitCast(df32, detail::ZipLowerSame(svdup_n_u16(0), BitCast(du16, v)));
+}
+
+// ------------------------------ ReorderDemote2To (OddEven)
+
+template <size_t N, int kPow2>
+HWY_API VBF16 ReorderDemote2To(Simd<bfloat16_t, N, kPow2> dbf16, svfloat32_t a,
+                               svfloat32_t b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const svuint32_t b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+template <size_t N, int kPow2>
+HWY_API svint16_t ReorderDemote2To(Simd<int16_t, N, kPow2> d16, svint32_t a,
+                                   svint32_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d16;
+  const svint16_t a_in_even = svqxtnb_s32(a);
+  return svqxtnt_s32(a_in_even, b);
+#else
+  const svint16_t a16 = BitCast(d16, detail::SaturateI<int16_t>(a));
+  const svint16_t b16 = BitCast(d16, detail::SaturateI<int16_t>(b));
+  return detail::InterleaveEven(a16, b16);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t ReorderDemote2To(Simd<uint16_t, N, kPow2> d16, svint32_t a,
+                                    svint32_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d16;
+  const svuint16_t a_in_even = svqxtunb_s32(a);
+  return svqxtunt_s32(a_in_even, b);
+#else
+  const Repartition<uint32_t, decltype(d16)> du32;
+  const svuint32_t clamped_a = BitCast(du32, detail::MaxN(a, 0));
+  const svuint32_t clamped_b = BitCast(du32, detail::MaxN(b, 0));
+  const svuint16_t a16 = BitCast(d16, detail::SaturateU<uint16_t>(clamped_a));
+  const svuint16_t b16 = BitCast(d16, detail::SaturateU<uint16_t>(clamped_b));
+  return detail::InterleaveEven(a16, b16);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint16_t ReorderDemote2To(Simd<uint16_t, N, kPow2> d16, svuint32_t a,
+                                    svuint32_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d16;
+  const svuint16_t a_in_even = svqxtnb_u32(a);
+  return svqxtnt_u32(a_in_even, b);
+#else
+  const svuint16_t a16 = BitCast(d16, detail::SaturateU<uint16_t>(a));
+  const svuint16_t b16 = BitCast(d16, detail::SaturateU<uint16_t>(b));
+  return detail::InterleaveEven(a16, b16);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svint8_t ReorderDemote2To(Simd<int8_t, N, kPow2> d8, svint16_t a,
+                                  svint16_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d8;
+  const svint8_t a_in_even = svqxtnb_s16(a);
+  return svqxtnt_s16(a_in_even, b);
+#else
+  const svint8_t a8 = BitCast(d8, detail::SaturateI<int8_t>(a));
+  const svint8_t b8 = BitCast(d8, detail::SaturateI<int8_t>(b));
+  return detail::InterleaveEven(a8, b8);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t ReorderDemote2To(Simd<uint8_t, N, kPow2> d8, svint16_t a,
+                                   svint16_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d8;
+  const svuint8_t a_in_even = svqxtunb_s16(a);
+  return svqxtunt_s16(a_in_even, b);
+#else
+  const Repartition<uint16_t, decltype(d8)> du16;
+  const svuint16_t clamped_a = BitCast(du16, detail::MaxN(a, 0));
+  const svuint16_t clamped_b = BitCast(du16, detail::MaxN(b, 0));
+  const svuint8_t a8 = BitCast(d8, detail::SaturateU<uint8_t>(clamped_a));
+  const svuint8_t b8 = BitCast(d8, detail::SaturateU<uint8_t>(clamped_b));
+  return detail::InterleaveEven(a8, b8);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint8_t ReorderDemote2To(Simd<uint8_t, N, kPow2> d8, svuint16_t a,
+                                   svuint16_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d8;
+  const svuint8_t a_in_even = svqxtnb_u16(a);
+  return svqxtnt_u16(a_in_even, b);
+#else
+  const svuint8_t a8 = BitCast(d8, detail::SaturateU<uint8_t>(a));
+  const svuint8_t b8 = BitCast(d8, detail::SaturateU<uint8_t>(b));
+  return detail::InterleaveEven(a8, b8);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svint32_t ReorderDemote2To(Simd<int32_t, N, kPow2> d32, svint64_t a,
+                                   svint64_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  const svint32_t a_in_even = svqxtnb_s64(a);
+  return svqxtnt_s64(a_in_even, b);
+#else
+  const svint32_t a32 = BitCast(d32, detail::SaturateI<int32_t>(a));
+  const svint32_t b32 = BitCast(d32, detail::SaturateI<int32_t>(b));
+  return detail::InterleaveEven(a32, b32);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t ReorderDemote2To(Simd<uint32_t, N, kPow2> d32, svint64_t a,
+                                    svint64_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  const svuint32_t a_in_even = svqxtunb_s64(a);
+  return svqxtunt_s64(a_in_even, b);
+#else
+  const Repartition<uint64_t, decltype(d32)> du64;
+  const svuint64_t clamped_a = BitCast(du64, detail::MaxN(a, 0));
+  const svuint64_t clamped_b = BitCast(du64, detail::MaxN(b, 0));
+  const svuint32_t a32 = BitCast(d32, detail::SaturateU<uint32_t>(clamped_a));
+  const svuint32_t b32 = BitCast(d32, detail::SaturateU<uint32_t>(clamped_b));
+  return detail::InterleaveEven(a32, b32);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t ReorderDemote2To(Simd<uint32_t, N, kPow2> d32, svuint64_t a,
+                                    svuint64_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  const svuint32_t a_in_even = svqxtnb_u64(a);
+  return svqxtnt_u64(a_in_even, b);
+#else
+  const svuint32_t a32 = BitCast(d32, detail::SaturateU<uint32_t>(a));
+  const svuint32_t b32 = BitCast(d32, detail::SaturateU<uint32_t>(b));
+  return detail::InterleaveEven(a32, b32);
+#endif
+}
+
+template <class D, class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2)>
+HWY_API VFromD<D> OrderedDemote2To(D dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  const auto demoted_a = DemoteTo(dnh, a);
+  const auto demoted_b = DemoteTo(dnh, b);
+  return Combine(dn, demoted_b, demoted_a);
+}
+
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API VBF16 OrderedDemote2To(D dn, svfloat32_t a, svfloat32_t b) {
+  const Half<decltype(dn)> dnh;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+  const RebindToUnsigned<decltype(dnh)> dnh_u;
+  const auto demoted_a = DemoteTo(dnh, a);
+  const auto demoted_b = DemoteTo(dnh, b);
+  return BitCast(
+      dn, Combine(dn_u, BitCast(dnh_u, demoted_b), BitCast(dnh_u, demoted_a)));
+}
+
+// ------------------------------ ZeroIfNegative (Lt, IfThenElse)
+template <class V>
+HWY_API V ZeroIfNegative(const V v) {
+  return IfThenZeroElse(detail::LtN(v, 0), v);
+}
+
+// ------------------------------ BroadcastSignBit (ShiftRight)
+template <class V>
+HWY_API V BroadcastSignBit(const V v) {
+  return ShiftRight<sizeof(TFromV<V>) * 8 - 1>(v);
+}
+
+// ------------------------------ IfNegativeThenElse (BroadcastSignBit)
+template <class V>
+HWY_API V IfNegativeThenElse(V v, V yes, V no) {
+  static_assert(IsSigned<TFromV<V>>(), "Only works for signed/float");
+  const DFromV<V> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const svbool_t m = detail::LtN(BitCast(di, v), 0);
+  return IfThenElse(m, yes, no);
+}
+
+// ------------------------------ AverageRound (ShiftRight)
+
+#if HWY_SVE_HAVE_2
+HWY_SVE_FOREACH_U08(HWY_SVE_RETV_ARGPVV, AverageRound, rhadd)
+HWY_SVE_FOREACH_U16(HWY_SVE_RETV_ARGPVV, AverageRound, rhadd)
+#else
+template <class V>
+V AverageRound(const V a, const V b) {
+  return ShiftRight<1>(detail::AddN(Add(a, b), 1));
+}
+#endif  // HWY_SVE_HAVE_2
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE svbool_t LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  // TODO(janwas): with SVE2.1, load to vector, then PMOV
+  const RebindToUnsigned<D> du;
+  const svuint8_t iota = Iota(du, 0);
+
+  // Load correct number of bytes (bits/8) with 7 zeros after each.
+  const svuint8_t bytes = BitCast(du, svld1ub_u64(detail::PTrue(d), bits));
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+  const svuint8_t rep8 = svtbl_u8(bytes, detail::AndNotN(7, iota));
+
+  const svuint8_t bit =
+      svdupq_n_u8(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
+  return TestBit(rep8, bit);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE svbool_t LoadMaskBits(D /* tag */,
+                                 const uint8_t* HWY_RESTRICT bits) {
+  const RebindToUnsigned<D> du;
+  const Repartition<uint8_t, D> du8;
+
+  // There may be up to 128 bits; avoid reading past the end.
+  const svuint8_t bytes = svld1(FirstN(du8, (Lanes(du) + 7) / 8), bits);
+
+  // Replicate bytes 16x such that each lane contains the bit that governs it.
+  const svuint8_t rep16 = svtbl_u8(bytes, ShiftRight<4>(Iota(du8, 0)));
+
+  const svuint16_t bit = svdupq_n_u16(1, 2, 4, 8, 16, 32, 64, 128);
+  return TestBit(BitCast(du, rep16), bit);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE svbool_t LoadMaskBits(D /* tag */,
+                                 const uint8_t* HWY_RESTRICT bits) {
+  const RebindToUnsigned<D> du;
+  const Repartition<uint8_t, D> du8;
+
+  // Upper bound = 2048 bits / 32 bit = 64 bits; at least 8 bytes are readable,
+  // so we can skip computing the actual length (Lanes(du)+7)/8.
+  const svuint8_t bytes = svld1(FirstN(du8, 8), bits);
+
+  // Replicate bytes 32x such that each lane contains the bit that governs it.
+  const svuint8_t rep32 = svtbl_u8(bytes, ShiftRight<5>(Iota(du8, 0)));
+
+  // 1, 2, 4, 8, 16, 32, 64, 128,  1, 2 ..
+  const svuint32_t bit = Shl(Set(du, 1), detail::AndN(Iota(du, 0), 7));
+
+  return TestBit(BitCast(du, rep32), bit);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE svbool_t LoadMaskBits(D /* tag */,
+                                 const uint8_t* HWY_RESTRICT bits) {
+  const RebindToUnsigned<D> du;
+
+  // Max 2048 bits = 32 lanes = 32 input bits; replicate those into each lane.
+  // The "at least 8 byte" guarantee in quick_reference ensures this is safe.
+  uint32_t mask_bits;
+  CopyBytes<4>(bits, &mask_bits);  // copy from bytes
+  const auto vbits = Set(du, mask_bits);
+
+  // 2 ^ {0,1, .., 31}, will not have more lanes than that.
+  const svuint64_t bit = Shl(Set(du, 1), Iota(du, 0));
+
+  return TestBit(vbits, bit);
+}
+
+// ------------------------------ StoreMaskBits
+
+namespace detail {
+
+// For each mask lane (governing lane type T), store 1 or 0 in BYTE lanes.
+template <class T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE svuint8_t BoolFromMask(svbool_t m) {
+  return svdup_n_u8_z(m, 1);
+}
+template <class T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE svuint8_t BoolFromMask(svbool_t m) {
+  const ScalableTag<uint8_t> d8;
+  const svuint8_t b16 = BitCast(d8, svdup_n_u16_z(m, 1));
+  return detail::ConcatEvenFull(b16, b16);  // lower half
+}
+template <class T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE svuint8_t BoolFromMask(svbool_t m) {
+  return U8FromU32(svdup_n_u32_z(m, 1));
+}
+template <class T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE svuint8_t BoolFromMask(svbool_t m) {
+  const ScalableTag<uint32_t> d32;
+  const svuint32_t b64 = BitCast(d32, svdup_n_u64_z(m, 1));
+  return U8FromU32(detail::ConcatEvenFull(b64, b64));  // lower half
+}
+
+// Compacts groups of 8 u8 into 8 contiguous bits in a 64-bit lane.
+HWY_INLINE svuint64_t BitsFromBool(svuint8_t x) {
+  const ScalableTag<uint8_t> d8;
+  const ScalableTag<uint16_t> d16;
+  const ScalableTag<uint32_t> d32;
+  const ScalableTag<uint64_t> d64;
+  // TODO(janwas): could use SVE2 BDEP, but it's optional.
+  x = Or(x, BitCast(d8, ShiftRight<7>(BitCast(d16, x))));
+  x = Or(x, BitCast(d8, ShiftRight<14>(BitCast(d32, x))));
+  x = Or(x, BitCast(d8, ShiftRight<28>(BitCast(d64, x))));
+  return BitCast(d64, x);
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 writable bytes.
+// TODO(janwas): specialize for HWY_SVE_256
+// TODO(janwas): with SVE2.1, use PMOV to store to vector, then StoreU
+template <class D>
+HWY_API size_t StoreMaskBits(D d, svbool_t m, uint8_t* bits) {
+  svuint64_t bits_in_u64 =
+      detail::BitsFromBool(detail::BoolFromMask<TFromD<D>>(m));
+
+  const size_t num_bits = Lanes(d);
+  const size_t num_bytes = (num_bits + 8 - 1) / 8;  // Round up, see below
+
+  // Truncate each u64 to 8 bits and store to u8.
+  svst1b_u64(FirstN(ScalableTag<uint64_t>(), num_bytes), bits, bits_in_u64);
+
+  // Non-full byte, need to clear the undefined upper bits. Can happen for
+  // capped/fractional vectors or large T and small hardware vectors.
+  if (num_bits < 8) {
+    const int mask = static_cast<int>((1ull << num_bits) - 1);
+    bits[0] = static_cast<uint8_t>(bits[0] & mask);
+  }
+  // Else: we wrote full bytes because num_bits is a power of two >= 8.
+
+  return num_bytes;
+}
+
+// ------------------------------ CompressBits (LoadMaskBits)
+template <class V, HWY_IF_NOT_T_SIZE_V(V, 1)>
+HWY_INLINE V CompressBits(V v, const uint8_t* HWY_RESTRICT bits) {
+  return Compress(v, LoadMaskBits(DFromV<V>(), bits));
+}
+
+// ------------------------------ CompressBitsStore (LoadMaskBits)
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  return CompressStore(v, LoadMaskBits(d, bits), d, unaligned);
+}
+
+// ------------------------------ Expand (StoreMaskBits)
+
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+namespace detail {
+
+HWY_INLINE svuint8_t IndicesForExpandFromBits(uint64_t mask_bits) {
+  const CappedTag<uint8_t, 8> du8;
+  alignas(16) static constexpr uint8_t table[8 * 256] = {
+      // PrintExpand8x8Tables
+      128, 128, 128, 128, 128, 128, 128, 128,  //
+      0,   128, 128, 128, 128, 128, 128, 128,  //
+      128, 0,   128, 128, 128, 128, 128, 128,  //
+      0,   1,   128, 128, 128, 128, 128, 128,  //
+      128, 128, 0,   128, 128, 128, 128, 128,  //
+      0,   128, 1,   128, 128, 128, 128, 128,  //
+      128, 0,   1,   128, 128, 128, 128, 128,  //
+      0,   1,   2,   128, 128, 128, 128, 128,  //
+      128, 128, 128, 0,   128, 128, 128, 128,  //
+      0,   128, 128, 1,   128, 128, 128, 128,  //
+      128, 0,   128, 1,   128, 128, 128, 128,  //
+      0,   1,   128, 2,   128, 128, 128, 128,  //
+      128, 128, 0,   1,   128, 128, 128, 128,  //
+      0,   128, 1,   2,   128, 128, 128, 128,  //
+      128, 0,   1,   2,   128, 128, 128, 128,  //
+      0,   1,   2,   3,   128, 128, 128, 128,  //
+      128, 128, 128, 128, 0,   128, 128, 128,  //
+      0,   128, 128, 128, 1,   128, 128, 128,  //
+      128, 0,   128, 128, 1,   128, 128, 128,  //
+      0,   1,   128, 128, 2,   128, 128, 128,  //
+      128, 128, 0,   128, 1,   128, 128, 128,  //
+      0,   128, 1,   128, 2,   128, 128, 128,  //
+      128, 0,   1,   128, 2,   128, 128, 128,  //
+      0,   1,   2,   128, 3,   128, 128, 128,  //
+      128, 128, 128, 0,   1,   128, 128, 128,  //
+      0,   128, 128, 1,   2,   128, 128, 128,  //
+      128, 0,   128, 1,   2,   128, 128, 128,  //
+      0,   1,   128, 2,   3,   128, 128, 128,  //
+      128, 128, 0,   1,   2,   128, 128, 128,  //
+      0,   128, 1,   2,   3,   128, 128, 128,  //
+      128, 0,   1,   2,   3,   128, 128, 128,  //
+      0,   1,   2,   3,   4,   128, 128, 128,  //
+      128, 128, 128, 128, 128, 0,   128, 128,  //
+      0,   128, 128, 128, 128, 1,   128, 128,  //
+      128, 0,   128, 128, 128, 1,   128, 128,  //
+      0,   1,   128, 128, 128, 2,   128, 128,  //
+      128, 128, 0,   128, 128, 1,   128, 128,  //
+      0,   128, 1,   128, 128, 2,   128, 128,  //
+      128, 0,   1,   128, 128, 2,   128, 128,  //
+      0,   1,   2,   128, 128, 3,   128, 128,  //
+      128, 128, 128, 0,   128, 1,   128, 128,  //
+      0,   128, 128, 1,   128, 2,   128, 128,  //
+      128, 0,   128, 1,   128, 2,   128, 128,  //
+      0,   1,   128, 2,   128, 3,   128, 128,  //
+      128, 128, 0,   1,   128, 2,   128, 128,  //
+      0,   128, 1,   2,   128, 3,   128, 128,  //
+      128, 0,   1,   2,   128, 3,   128, 128,  //
+      0,   1,   2,   3,   128, 4,   128, 128,  //
+      128, 128, 128, 128, 0,   1,   128, 128,  //
+      0,   128, 128, 128, 1,   2,   128, 128,  //
+      128, 0,   128, 128, 1,   2,   128, 128,  //
+      0,   1,   128, 128, 2,   3,   128, 128,  //
+      128, 128, 0,   128, 1,   2,   128, 128,  //
+      0,   128, 1,   128, 2,   3,   128, 128,  //
+      128, 0,   1,   128, 2,   3,   128, 128,  //
+      0,   1,   2,   128, 3,   4,   128, 128,  //
+      128, 128, 128, 0,   1,   2,   128, 128,  //
+      0,   128, 128, 1,   2,   3,   128, 128,  //
+      128, 0,   128, 1,   2,   3,   128, 128,  //
+      0,   1,   128, 2,   3,   4,   128, 128,  //
+      128, 128, 0,   1,   2,   3,   128, 128,  //
+      0,   128, 1,   2,   3,   4,   128, 128,  //
+      128, 0,   1,   2,   3,   4,   128, 128,  //
+      0,   1,   2,   3,   4,   5,   128, 128,  //
+      128, 128, 128, 128, 128, 128, 0,   128,  //
+      0,   128, 128, 128, 128, 128, 1,   128,  //
+      128, 0,   128, 128, 128, 128, 1,   128,  //
+      0,   1,   128, 128, 128, 128, 2,   128,  //
+      128, 128, 0,   128, 128, 128, 1,   128,  //
+      0,   128, 1,   128, 128, 128, 2,   128,  //
+      128, 0,   1,   128, 128, 128, 2,   128,  //
+      0,   1,   2,   128, 128, 128, 3,   128,  //
+      128, 128, 128, 0,   128, 128, 1,   128,  //
+      0,   128, 128, 1,   128, 128, 2,   128,  //
+      128, 0,   128, 1,   128, 128, 2,   128,  //
+      0,   1,   128, 2,   128, 128, 3,   128,  //
+      128, 128, 0,   1,   128, 128, 2,   128,  //
+      0,   128, 1,   2,   128, 128, 3,   128,  //
+      128, 0,   1,   2,   128, 128, 3,   128,  //
+      0,   1,   2,   3,   128, 128, 4,   128,  //
+      128, 128, 128, 128, 0,   128, 1,   128,  //
+      0,   128, 128, 128, 1,   128, 2,   128,  //
+      128, 0,   128, 128, 1,   128, 2,   128,  //
+      0,   1,   128, 128, 2,   128, 3,   128,  //
+      128, 128, 0,   128, 1,   128, 2,   128,  //
+      0,   128, 1,   128, 2,   128, 3,   128,  //
+      128, 0,   1,   128, 2,   128, 3,   128,  //
+      0,   1,   2,   128, 3,   128, 4,   128,  //
+      128, 128, 128, 0,   1,   128, 2,   128,  //
+      0,   128, 128, 1,   2,   128, 3,   128,  //
+      128, 0,   128, 1,   2,   128, 3,   128,  //
+      0,   1,   128, 2,   3,   128, 4,   128,  //
+      128, 128, 0,   1,   2,   128, 3,   128,  //
+      0,   128, 1,   2,   3,   128, 4,   128,  //
+      128, 0,   1,   2,   3,   128, 4,   128,  //
+      0,   1,   2,   3,   4,   128, 5,   128,  //
+      128, 128, 128, 128, 128, 0,   1,   128,  //
+      0,   128, 128, 128, 128, 1,   2,   128,  //
+      128, 0,   128, 128, 128, 1,   2,   128,  //
+      0,   1,   128, 128, 128, 2,   3,   128,  //
+      128, 128, 0,   128, 128, 1,   2,   128,  //
+      0,   128, 1,   128, 128, 2,   3,   128,  //
+      128, 0,   1,   128, 128, 2,   3,   128,  //
+      0,   1,   2,   128, 128, 3,   4,   128,  //
+      128, 128, 128, 0,   128, 1,   2,   128,  //
+      0,   128, 128, 1,   128, 2,   3,   128,  //
+      128, 0,   128, 1,   128, 2,   3,   128,  //
+      0,   1,   128, 2,   128, 3,   4,   128,  //
+      128, 128, 0,   1,   128, 2,   3,   128,  //
+      0,   128, 1,   2,   128, 3,   4,   128,  //
+      128, 0,   1,   2,   128, 3,   4,   128,  //
+      0,   1,   2,   3,   128, 4,   5,   128,  //
+      128, 128, 128, 128, 0,   1,   2,   128,  //
+      0,   128, 128, 128, 1,   2,   3,   128,  //
+      128, 0,   128, 128, 1,   2,   3,   128,  //
+      0,   1,   128, 128, 2,   3,   4,   128,  //
+      128, 128, 0,   128, 1,   2,   3,   128,  //
+      0,   128, 1,   128, 2,   3,   4,   128,  //
+      128, 0,   1,   128, 2,   3,   4,   128,  //
+      0,   1,   2,   128, 3,   4,   5,   128,  //
+      128, 128, 128, 0,   1,   2,   3,   128,  //
+      0,   128, 128, 1,   2,   3,   4,   128,  //
+      128, 0,   128, 1,   2,   3,   4,   128,  //
+      0,   1,   128, 2,   3,   4,   5,   128,  //
+      128, 128, 0,   1,   2,   3,   4,   128,  //
+      0,   128, 1,   2,   3,   4,   5,   128,  //
+      128, 0,   1,   2,   3,   4,   5,   128,  //
+      0,   1,   2,   3,   4,   5,   6,   128,  //
+      128, 128, 128, 128, 128, 128, 128, 0,    //
+      0,   128, 128, 128, 128, 128, 128, 1,    //
+      128, 0,   128, 128, 128, 128, 128, 1,    //
+      0,   1,   128, 128, 128, 128, 128, 2,    //
+      128, 128, 0,   128, 128, 128, 128, 1,    //
+      0,   128, 1,   128, 128, 128, 128, 2,    //
+      128, 0,   1,   128, 128, 128, 128, 2,    //
+      0,   1,   2,   128, 128, 128, 128, 3,    //
+      128, 128, 128, 0,   128, 128, 128, 1,    //
+      0,   128, 128, 1,   128, 128, 128, 2,    //
+      128, 0,   128, 1,   128, 128, 128, 2,    //
+      0,   1,   128, 2,   128, 128, 128, 3,    //
+      128, 128, 0,   1,   128, 128, 128, 2,    //
+      0,   128, 1,   2,   128, 128, 128, 3,    //
+      128, 0,   1,   2,   128, 128, 128, 3,    //
+      0,   1,   2,   3,   128, 128, 128, 4,    //
+      128, 128, 128, 128, 0,   128, 128, 1,    //
+      0,   128, 128, 128, 1,   128, 128, 2,    //
+      128, 0,   128, 128, 1,   128, 128, 2,    //
+      0,   1,   128, 128, 2,   128, 128, 3,    //
+      128, 128, 0,   128, 1,   128, 128, 2,    //
+      0,   128, 1,   128, 2,   128, 128, 3,    //
+      128, 0,   1,   128, 2,   128, 128, 3,    //
+      0,   1,   2,   128, 3,   128, 128, 4,    //
+      128, 128, 128, 0,   1,   128, 128, 2,    //
+      0,   128, 128, 1,   2,   128, 128, 3,    //
+      128, 0,   128, 1,   2,   128, 128, 3,    //
+      0,   1,   128, 2,   3,   128, 128, 4,    //
+      128, 128, 0,   1,   2,   128, 128, 3,    //
+      0,   128, 1,   2,   3,   128, 128, 4,    //
+      128, 0,   1,   2,   3,   128, 128, 4,    //
+      0,   1,   2,   3,   4,   128, 128, 5,    //
+      128, 128, 128, 128, 128, 0,   128, 1,    //
+      0,   128, 128, 128, 128, 1,   128, 2,    //
+      128, 0,   128, 128, 128, 1,   128, 2,    //
+      0,   1,   128, 128, 128, 2,   128, 3,    //
+      128, 128, 0,   128, 128, 1,   128, 2,    //
+      0,   128, 1,   128, 128, 2,   128, 3,    //
+      128, 0,   1,   128, 128, 2,   128, 3,    //
+      0,   1,   2,   128, 128, 3,   128, 4,    //
+      128, 128, 128, 0,   128, 1,   128, 2,    //
+      0,   128, 128, 1,   128, 2,   128, 3,    //
+      128, 0,   128, 1,   128, 2,   128, 3,    //
+      0,   1,   128, 2,   128, 3,   128, 4,    //
+      128, 128, 0,   1,   128, 2,   128, 3,    //
+      0,   128, 1,   2,   128, 3,   128, 4,    //
+      128, 0,   1,   2,   128, 3,   128, 4,    //
+      0,   1,   2,   3,   128, 4,   128, 5,    //
+      128, 128, 128, 128, 0,   1,   128, 2,    //
+      0,   128, 128, 128, 1,   2,   128, 3,    //
+      128, 0,   128, 128, 1,   2,   128, 3,    //
+      0,   1,   128, 128, 2,   3,   128, 4,    //
+      128, 128, 0,   128, 1,   2,   128, 3,    //
+      0,   128, 1,   128, 2,   3,   128, 4,    //
+      128, 0,   1,   128, 2,   3,   128, 4,    //
+      0,   1,   2,   128, 3,   4,   128, 5,    //
+      128, 128, 128, 0,   1,   2,   128, 3,    //
+      0,   128, 128, 1,   2,   3,   128, 4,    //
+      128, 0,   128, 1,   2,   3,   128, 4,    //
+      0,   1,   128, 2,   3,   4,   128, 5,    //
+      128, 128, 0,   1,   2,   3,   128, 4,    //
+      0,   128, 1,   2,   3,   4,   128, 5,    //
+      128, 0,   1,   2,   3,   4,   128, 5,    //
+      0,   1,   2,   3,   4,   5,   128, 6,    //
+      128, 128, 128, 128, 128, 128, 0,   1,    //
+      0,   128, 128, 128, 128, 128, 1,   2,    //
+      128, 0,   128, 128, 128, 128, 1,   2,    //
+      0,   1,   128, 128, 128, 128, 2,   3,    //
+      128, 128, 0,   128, 128, 128, 1,   2,    //
+      0,   128, 1,   128, 128, 128, 2,   3,    //
+      128, 0,   1,   128, 128, 128, 2,   3,    //
+      0,   1,   2,   128, 128, 128, 3,   4,    //
+      128, 128, 128, 0,   128, 128, 1,   2,    //
+      0,   128, 128, 1,   128, 128, 2,   3,    //
+      128, 0,   128, 1,   128, 128, 2,   3,    //
+      0,   1,   128, 2,   128, 128, 3,   4,    //
+      128, 128, 0,   1,   128, 128, 2,   3,    //
+      0,   128, 1,   2,   128, 128, 3,   4,    //
+      128, 0,   1,   2,   128, 128, 3,   4,    //
+      0,   1,   2,   3,   128, 128, 4,   5,    //
+      128, 128, 128, 128, 0,   128, 1,   2,    //
+      0,   128, 128, 128, 1,   128, 2,   3,    //
+      128, 0,   128, 128, 1,   128, 2,   3,    //
+      0,   1,   128, 128, 2,   128, 3,   4,    //
+      128, 128, 0,   128, 1,   128, 2,   3,    //
+      0,   128, 1,   128, 2,   128, 3,   4,    //
+      128, 0,   1,   128, 2,   128, 3,   4,    //
+      0,   1,   2,   128, 3,   128, 4,   5,    //
+      128, 128, 128, 0,   1,   128, 2,   3,    //
+      0,   128, 128, 1,   2,   128, 3,   4,    //
+      128, 0,   128, 1,   2,   128, 3,   4,    //
+      0,   1,   128, 2,   3,   128, 4,   5,    //
+      128, 128, 0,   1,   2,   128, 3,   4,    //
+      0,   128, 1,   2,   3,   128, 4,   5,    //
+      128, 0,   1,   2,   3,   128, 4,   5,    //
+      0,   1,   2,   3,   4,   128, 5,   6,    //
+      128, 128, 128, 128, 128, 0,   1,   2,    //
+      0,   128, 128, 128, 128, 1,   2,   3,    //
+      128, 0,   128, 128, 128, 1,   2,   3,    //
+      0,   1,   128, 128, 128, 2,   3,   4,    //
+      128, 128, 0,   128, 128, 1,   2,   3,    //
+      0,   128, 1,   128, 128, 2,   3,   4,    //
+      128, 0,   1,   128, 128, 2,   3,   4,    //
+      0,   1,   2,   128, 128, 3,   4,   5,    //
+      128, 128, 128, 0,   128, 1,   2,   3,    //
+      0,   128, 128, 1,   128, 2,   3,   4,    //
+      128, 0,   128, 1,   128, 2,   3,   4,    //
+      0,   1,   128, 2,   128, 3,   4,   5,    //
+      128, 128, 0,   1,   128, 2,   3,   4,    //
+      0,   128, 1,   2,   128, 3,   4,   5,    //
+      128, 0,   1,   2,   128, 3,   4,   5,    //
+      0,   1,   2,   3,   128, 4,   5,   6,    //
+      128, 128, 128, 128, 0,   1,   2,   3,    //
+      0,   128, 128, 128, 1,   2,   3,   4,    //
+      128, 0,   128, 128, 1,   2,   3,   4,    //
+      0,   1,   128, 128, 2,   3,   4,   5,    //
+      128, 128, 0,   128, 1,   2,   3,   4,    //
+      0,   128, 1,   128, 2,   3,   4,   5,    //
+      128, 0,   1,   128, 2,   3,   4,   5,    //
+      0,   1,   2,   128, 3,   4,   5,   6,    //
+      128, 128, 128, 0,   1,   2,   3,   4,    //
+      0,   128, 128, 1,   2,   3,   4,   5,    //
+      128, 0,   128, 1,   2,   3,   4,   5,    //
+      0,   1,   128, 2,   3,   4,   5,   6,    //
+      128, 128, 0,   1,   2,   3,   4,   5,    //
+      0,   128, 1,   2,   3,   4,   5,   6,    //
+      128, 0,   1,   2,   3,   4,   5,   6,    //
+      0,   1,   2,   3,   4,   5,   6,   7};
+  return Load(du8, table + mask_bits * 8);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE svuint8_t LaneIndicesFromByteIndices(D, svuint8_t idx) {
+  return idx;
+}
+template <class D, class DU = RebindToUnsigned<D>, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<DU> LaneIndicesFromByteIndices(D, svuint8_t idx) {
+  return PromoteTo(DU(), idx);
+}
+
+// General case when we don't know the vector size, 8 elements at a time.
+template <class V>
+HWY_INLINE V ExpandLoop(V v, svbool_t mask) {
+  const DFromV<V> d;
+  uint8_t mask_bytes[256 / 8];
+  StoreMaskBits(d, mask, mask_bytes);
+
+  // ShiftLeftLanes is expensive, so we're probably better off storing to memory
+  // and loading the final result.
+  alignas(16) TFromV<V> out[2 * MaxLanes(d)];
+
+  svbool_t next = svpfalse_b();
+  size_t input_consumed = 0;
+  const V iota = Iota(d, 0);
+  for (size_t i = 0; i < Lanes(d); i += 8) {
+    uint64_t mask_bits = mask_bytes[i / 8];
+
+    // We want to skip past the v lanes already consumed. There is no
+    // instruction for variable-shift-reg, but we can splice.
+    const V vH = detail::Splice(v, v, next);
+    input_consumed += PopCount(mask_bits);
+    next = detail::GeN(iota, static_cast<TFromV<V>>(input_consumed));
+
+    const auto idx = detail::LaneIndicesFromByteIndices(
+        d, detail::IndicesForExpandFromBits(mask_bits));
+    const V expand = TableLookupLanes(vH, idx);
+    StoreU(expand, d, out + i);
+  }
+  return LoadU(d, out);
+}
+
+}  // namespace detail
+
+template <class V, HWY_IF_T_SIZE_V(V, 1)>
+HWY_API V Expand(V v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE2_128 || HWY_IDE
+  const DFromV<V> d;
+  uint8_t mask_bytes[256 / 8];
+  StoreMaskBits(d, mask, mask_bytes);
+  const uint64_t maskL = mask_bytes[0];
+  const uint64_t maskH = mask_bytes[1];
+
+  // We want to skip past the v bytes already consumed by expandL. There is no
+  // instruction for shift-reg by variable bytes, but we can splice. Instead of
+  // GeN, Not(FirstN()) would also work.
+  using T = TFromV<V>;
+  const T countL = static_cast<T>(PopCount(maskL));
+  const V vH = detail::Splice(v, v, detail::GeN(Iota(d, 0), countL));
+
+  const svuint8_t idxL = detail::IndicesForExpandFromBits(maskL);
+  const svuint8_t idxH = detail::IndicesForExpandFromBits(maskH);
+  return Combine(d, TableLookupLanes(vH, idxH), TableLookupLanes(v, idxL));
+#else
+  return detail::ExpandLoop(v, mask);
+#endif
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 2)>
+HWY_API V Expand(V v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE2_128 || HWY_IDE  // 16x8
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du16;
+  const Rebind<uint8_t, decltype(d)> du8;
+  // Convert mask into bitfield via horizontal sum (faster than ORV) of 8 bits.
+  // Pre-multiply by N so we can use it as an offset for Load.
+  const svuint16_t bits = Shl(Set(du16, 1), Iota(du16, 3));
+  const size_t offset = detail::SumOfLanesM(mask, bits);
+
+  // Storing as 8-bit reduces table size from 4 KiB to 2 KiB. We cannot apply
+  // the nibble trick used below because not all indices fit within one lane.
+  alignas(16) static constexpr uint8_t table[8 * 256] = {
+      // PrintExpand16x8LaneTables
+      255, 255, 255, 255, 255, 255, 255, 255,  //
+      0,   255, 255, 255, 255, 255, 255, 255,  //
+      255, 0,   255, 255, 255, 255, 255, 255,  //
+      0,   1,   255, 255, 255, 255, 255, 255,  //
+      255, 255, 0,   255, 255, 255, 255, 255,  //
+      0,   255, 1,   255, 255, 255, 255, 255,  //
+      255, 0,   1,   255, 255, 255, 255, 255,  //
+      0,   1,   2,   255, 255, 255, 255, 255,  //
+      255, 255, 255, 0,   255, 255, 255, 255,  //
+      0,   255, 255, 1,   255, 255, 255, 255,  //
+      255, 0,   255, 1,   255, 255, 255, 255,  //
+      0,   1,   255, 2,   255, 255, 255, 255,  //
+      255, 255, 0,   1,   255, 255, 255, 255,  //
+      0,   255, 1,   2,   255, 255, 255, 255,  //
+      255, 0,   1,   2,   255, 255, 255, 255,  //
+      0,   1,   2,   3,   255, 255, 255, 255,  //
+      255, 255, 255, 255, 0,   255, 255, 255,  //
+      0,   255, 255, 255, 1,   255, 255, 255,  //
+      255, 0,   255, 255, 1,   255, 255, 255,  //
+      0,   1,   255, 255, 2,   255, 255, 255,  //
+      255, 255, 0,   255, 1,   255, 255, 255,  //
+      0,   255, 1,   255, 2,   255, 255, 255,  //
+      255, 0,   1,   255, 2,   255, 255, 255,  //
+      0,   1,   2,   255, 3,   255, 255, 255,  //
+      255, 255, 255, 0,   1,   255, 255, 255,  //
+      0,   255, 255, 1,   2,   255, 255, 255,  //
+      255, 0,   255, 1,   2,   255, 255, 255,  //
+      0,   1,   255, 2,   3,   255, 255, 255,  //
+      255, 255, 0,   1,   2,   255, 255, 255,  //
+      0,   255, 1,   2,   3,   255, 255, 255,  //
+      255, 0,   1,   2,   3,   255, 255, 255,  //
+      0,   1,   2,   3,   4,   255, 255, 255,  //
+      255, 255, 255, 255, 255, 0,   255, 255,  //
+      0,   255, 255, 255, 255, 1,   255, 255,  //
+      255, 0,   255, 255, 255, 1,   255, 255,  //
+      0,   1,   255, 255, 255, 2,   255, 255,  //
+      255, 255, 0,   255, 255, 1,   255, 255,  //
+      0,   255, 1,   255, 255, 2,   255, 255,  //
+      255, 0,   1,   255, 255, 2,   255, 255,  //
+      0,   1,   2,   255, 255, 3,   255, 255,  //
+      255, 255, 255, 0,   255, 1,   255, 255,  //
+      0,   255, 255, 1,   255, 2,   255, 255,  //
+      255, 0,   255, 1,   255, 2,   255, 255,  //
+      0,   1,   255, 2,   255, 3,   255, 255,  //
+      255, 255, 0,   1,   255, 2,   255, 255,  //
+      0,   255, 1,   2,   255, 3,   255, 255,  //
+      255, 0,   1,   2,   255, 3,   255, 255,  //
+      0,   1,   2,   3,   255, 4,   255, 255,  //
+      255, 255, 255, 255, 0,   1,   255, 255,  //
+      0,   255, 255, 255, 1,   2,   255, 255,  //
+      255, 0,   255, 255, 1,   2,   255, 255,  //
+      0,   1,   255, 255, 2,   3,   255, 255,  //
+      255, 255, 0,   255, 1,   2,   255, 255,  //
+      0,   255, 1,   255, 2,   3,   255, 255,  //
+      255, 0,   1,   255, 2,   3,   255, 255,  //
+      0,   1,   2,   255, 3,   4,   255, 255,  //
+      255, 255, 255, 0,   1,   2,   255, 255,  //
+      0,   255, 255, 1,   2,   3,   255, 255,  //
+      255, 0,   255, 1,   2,   3,   255, 255,  //
+      0,   1,   255, 2,   3,   4,   255, 255,  //
+      255, 255, 0,   1,   2,   3,   255, 255,  //
+      0,   255, 1,   2,   3,   4,   255, 255,  //
+      255, 0,   1,   2,   3,   4,   255, 255,  //
+      0,   1,   2,   3,   4,   5,   255, 255,  //
+      255, 255, 255, 255, 255, 255, 0,   255,  //
+      0,   255, 255, 255, 255, 255, 1,   255,  //
+      255, 0,   255, 255, 255, 255, 1,   255,  //
+      0,   1,   255, 255, 255, 255, 2,   255,  //
+      255, 255, 0,   255, 255, 255, 1,   255,  //
+      0,   255, 1,   255, 255, 255, 2,   255,  //
+      255, 0,   1,   255, 255, 255, 2,   255,  //
+      0,   1,   2,   255, 255, 255, 3,   255,  //
+      255, 255, 255, 0,   255, 255, 1,   255,  //
+      0,   255, 255, 1,   255, 255, 2,   255,  //
+      255, 0,   255, 1,   255, 255, 2,   255,  //
+      0,   1,   255, 2,   255, 255, 3,   255,  //
+      255, 255, 0,   1,   255, 255, 2,   255,  //
+      0,   255, 1,   2,   255, 255, 3,   255,  //
+      255, 0,   1,   2,   255, 255, 3,   255,  //
+      0,   1,   2,   3,   255, 255, 4,   255,  //
+      255, 255, 255, 255, 0,   255, 1,   255,  //
+      0,   255, 255, 255, 1,   255, 2,   255,  //
+      255, 0,   255, 255, 1,   255, 2,   255,  //
+      0,   1,   255, 255, 2,   255, 3,   255,  //
+      255, 255, 0,   255, 1,   255, 2,   255,  //
+      0,   255, 1,   255, 2,   255, 3,   255,  //
+      255, 0,   1,   255, 2,   255, 3,   255,  //
+      0,   1,   2,   255, 3,   255, 4,   255,  //
+      255, 255, 255, 0,   1,   255, 2,   255,  //
+      0,   255, 255, 1,   2,   255, 3,   255,  //
+      255, 0,   255, 1,   2,   255, 3,   255,  //
+      0,   1,   255, 2,   3,   255, 4,   255,  //
+      255, 255, 0,   1,   2,   255, 3,   255,  //
+      0,   255, 1,   2,   3,   255, 4,   255,  //
+      255, 0,   1,   2,   3,   255, 4,   255,  //
+      0,   1,   2,   3,   4,   255, 5,   255,  //
+      255, 255, 255, 255, 255, 0,   1,   255,  //
+      0,   255, 255, 255, 255, 1,   2,   255,  //
+      255, 0,   255, 255, 255, 1,   2,   255,  //
+      0,   1,   255, 255, 255, 2,   3,   255,  //
+      255, 255, 0,   255, 255, 1,   2,   255,  //
+      0,   255, 1,   255, 255, 2,   3,   255,  //
+      255, 0,   1,   255, 255, 2,   3,   255,  //
+      0,   1,   2,   255, 255, 3,   4,   255,  //
+      255, 255, 255, 0,   255, 1,   2,   255,  //
+      0,   255, 255, 1,   255, 2,   3,   255,  //
+      255, 0,   255, 1,   255, 2,   3,   255,  //
+      0,   1,   255, 2,   255, 3,   4,   255,  //
+      255, 255, 0,   1,   255, 2,   3,   255,  //
+      0,   255, 1,   2,   255, 3,   4,   255,  //
+      255, 0,   1,   2,   255, 3,   4,   255,  //
+      0,   1,   2,   3,   255, 4,   5,   255,  //
+      255, 255, 255, 255, 0,   1,   2,   255,  //
+      0,   255, 255, 255, 1,   2,   3,   255,  //
+      255, 0,   255, 255, 1,   2,   3,   255,  //
+      0,   1,   255, 255, 2,   3,   4,   255,  //
+      255, 255, 0,   255, 1,   2,   3,   255,  //
+      0,   255, 1,   255, 2,   3,   4,   255,  //
+      255, 0,   1,   255, 2,   3,   4,   255,  //
+      0,   1,   2,   255, 3,   4,   5,   255,  //
+      255, 255, 255, 0,   1,   2,   3,   255,  //
+      0,   255, 255, 1,   2,   3,   4,   255,  //
+      255, 0,   255, 1,   2,   3,   4,   255,  //
+      0,   1,   255, 2,   3,   4,   5,   255,  //
+      255, 255, 0,   1,   2,   3,   4,   255,  //
+      0,   255, 1,   2,   3,   4,   5,   255,  //
+      255, 0,   1,   2,   3,   4,   5,   255,  //
+      0,   1,   2,   3,   4,   5,   6,   255,  //
+      255, 255, 255, 255, 255, 255, 255, 0,    //
+      0,   255, 255, 255, 255, 255, 255, 1,    //
+      255, 0,   255, 255, 255, 255, 255, 1,    //
+      0,   1,   255, 255, 255, 255, 255, 2,    //
+      255, 255, 0,   255, 255, 255, 255, 1,    //
+      0,   255, 1,   255, 255, 255, 255, 2,    //
+      255, 0,   1,   255, 255, 255, 255, 2,    //
+      0,   1,   2,   255, 255, 255, 255, 3,    //
+      255, 255, 255, 0,   255, 255, 255, 1,    //
+      0,   255, 255, 1,   255, 255, 255, 2,    //
+      255, 0,   255, 1,   255, 255, 255, 2,    //
+      0,   1,   255, 2,   255, 255, 255, 3,    //
+      255, 255, 0,   1,   255, 255, 255, 2,    //
+      0,   255, 1,   2,   255, 255, 255, 3,    //
+      255, 0,   1,   2,   255, 255, 255, 3,    //
+      0,   1,   2,   3,   255, 255, 255, 4,    //
+      255, 255, 255, 255, 0,   255, 255, 1,    //
+      0,   255, 255, 255, 1,   255, 255, 2,    //
+      255, 0,   255, 255, 1,   255, 255, 2,    //
+      0,   1,   255, 255, 2,   255, 255, 3,    //
+      255, 255, 0,   255, 1,   255, 255, 2,    //
+      0,   255, 1,   255, 2,   255, 255, 3,    //
+      255, 0,   1,   255, 2,   255, 255, 3,    //
+      0,   1,   2,   255, 3,   255, 255, 4,    //
+      255, 255, 255, 0,   1,   255, 255, 2,    //
+      0,   255, 255, 1,   2,   255, 255, 3,    //
+      255, 0,   255, 1,   2,   255, 255, 3,    //
+      0,   1,   255, 2,   3,   255, 255, 4,    //
+      255, 255, 0,   1,   2,   255, 255, 3,    //
+      0,   255, 1,   2,   3,   255, 255, 4,    //
+      255, 0,   1,   2,   3,   255, 255, 4,    //
+      0,   1,   2,   3,   4,   255, 255, 5,    //
+      255, 255, 255, 255, 255, 0,   255, 1,    //
+      0,   255, 255, 255, 255, 1,   255, 2,    //
+      255, 0,   255, 255, 255, 1,   255, 2,    //
+      0,   1,   255, 255, 255, 2,   255, 3,    //
+      255, 255, 0,   255, 255, 1,   255, 2,    //
+      0,   255, 1,   255, 255, 2,   255, 3,    //
+      255, 0,   1,   255, 255, 2,   255, 3,    //
+      0,   1,   2,   255, 255, 3,   255, 4,    //
+      255, 255, 255, 0,   255, 1,   255, 2,    //
+      0,   255, 255, 1,   255, 2,   255, 3,    //
+      255, 0,   255, 1,   255, 2,   255, 3,    //
+      0,   1,   255, 2,   255, 3,   255, 4,    //
+      255, 255, 0,   1,   255, 2,   255, 3,    //
+      0,   255, 1,   2,   255, 3,   255, 4,    //
+      255, 0,   1,   2,   255, 3,   255, 4,    //
+      0,   1,   2,   3,   255, 4,   255, 5,    //
+      255, 255, 255, 255, 0,   1,   255, 2,    //
+      0,   255, 255, 255, 1,   2,   255, 3,    //
+      255, 0,   255, 255, 1,   2,   255, 3,    //
+      0,   1,   255, 255, 2,   3,   255, 4,    //
+      255, 255, 0,   255, 1,   2,   255, 3,    //
+      0,   255, 1,   255, 2,   3,   255, 4,    //
+      255, 0,   1,   255, 2,   3,   255, 4,    //
+      0,   1,   2,   255, 3,   4,   255, 5,    //
+      255, 255, 255, 0,   1,   2,   255, 3,    //
+      0,   255, 255, 1,   2,   3,   255, 4,    //
+      255, 0,   255, 1,   2,   3,   255, 4,    //
+      0,   1,   255, 2,   3,   4,   255, 5,    //
+      255, 255, 0,   1,   2,   3,   255, 4,    //
+      0,   255, 1,   2,   3,   4,   255, 5,    //
+      255, 0,   1,   2,   3,   4,   255, 5,    //
+      0,   1,   2,   3,   4,   5,   255, 6,    //
+      255, 255, 255, 255, 255, 255, 0,   1,    //
+      0,   255, 255, 255, 255, 255, 1,   2,    //
+      255, 0,   255, 255, 255, 255, 1,   2,    //
+      0,   1,   255, 255, 255, 255, 2,   3,    //
+      255, 255, 0,   255, 255, 255, 1,   2,    //
+      0,   255, 1,   255, 255, 255, 2,   3,    //
+      255, 0,   1,   255, 255, 255, 2,   3,    //
+      0,   1,   2,   255, 255, 255, 3,   4,    //
+      255, 255, 255, 0,   255, 255, 1,   2,    //
+      0,   255, 255, 1,   255, 255, 2,   3,    //
+      255, 0,   255, 1,   255, 255, 2,   3,    //
+      0,   1,   255, 2,   255, 255, 3,   4,    //
+      255, 255, 0,   1,   255, 255, 2,   3,    //
+      0,   255, 1,   2,   255, 255, 3,   4,    //
+      255, 0,   1,   2,   255, 255, 3,   4,    //
+      0,   1,   2,   3,   255, 255, 4,   5,    //
+      255, 255, 255, 255, 0,   255, 1,   2,    //
+      0,   255, 255, 255, 1,   255, 2,   3,    //
+      255, 0,   255, 255, 1,   255, 2,   3,    //
+      0,   1,   255, 255, 2,   255, 3,   4,    //
+      255, 255, 0,   255, 1,   255, 2,   3,    //
+      0,   255, 1,   255, 2,   255, 3,   4,    //
+      255, 0,   1,   255, 2,   255, 3,   4,    //
+      0,   1,   2,   255, 3,   255, 4,   5,    //
+      255, 255, 255, 0,   1,   255, 2,   3,    //
+      0,   255, 255, 1,   2,   255, 3,   4,    //
+      255, 0,   255, 1,   2,   255, 3,   4,    //
+      0,   1,   255, 2,   3,   255, 4,   5,    //
+      255, 255, 0,   1,   2,   255, 3,   4,    //
+      0,   255, 1,   2,   3,   255, 4,   5,    //
+      255, 0,   1,   2,   3,   255, 4,   5,    //
+      0,   1,   2,   3,   4,   255, 5,   6,    //
+      255, 255, 255, 255, 255, 0,   1,   2,    //
+      0,   255, 255, 255, 255, 1,   2,   3,    //
+      255, 0,   255, 255, 255, 1,   2,   3,    //
+      0,   1,   255, 255, 255, 2,   3,   4,    //
+      255, 255, 0,   255, 255, 1,   2,   3,    //
+      0,   255, 1,   255, 255, 2,   3,   4,    //
+      255, 0,   1,   255, 255, 2,   3,   4,    //
+      0,   1,   2,   255, 255, 3,   4,   5,    //
+      255, 255, 255, 0,   255, 1,   2,   3,    //
+      0,   255, 255, 1,   255, 2,   3,   4,    //
+      255, 0,   255, 1,   255, 2,   3,   4,    //
+      0,   1,   255, 2,   255, 3,   4,   5,    //
+      255, 255, 0,   1,   255, 2,   3,   4,    //
+      0,   255, 1,   2,   255, 3,   4,   5,    //
+      255, 0,   1,   2,   255, 3,   4,   5,    //
+      0,   1,   2,   3,   255, 4,   5,   6,    //
+      255, 255, 255, 255, 0,   1,   2,   3,    //
+      0,   255, 255, 255, 1,   2,   3,   4,    //
+      255, 0,   255, 255, 1,   2,   3,   4,    //
+      0,   1,   255, 255, 2,   3,   4,   5,    //
+      255, 255, 0,   255, 1,   2,   3,   4,    //
+      0,   255, 1,   255, 2,   3,   4,   5,    //
+      255, 0,   1,   255, 2,   3,   4,   5,    //
+      0,   1,   2,   255, 3,   4,   5,   6,    //
+      255, 255, 255, 0,   1,   2,   3,   4,    //
+      0,   255, 255, 1,   2,   3,   4,   5,    //
+      255, 0,   255, 1,   2,   3,   4,   5,    //
+      0,   1,   255, 2,   3,   4,   5,   6,    //
+      255, 255, 0,   1,   2,   3,   4,   5,    //
+      0,   255, 1,   2,   3,   4,   5,   6,    //
+      255, 0,   1,   2,   3,   4,   5,   6,    //
+      0,   1,   2,   3,   4,   5,   6,   7};
+  const svuint16_t indices = PromoteTo(du16, Load(du8, table + offset));
+  return TableLookupLanes(v, indices);  // already zeros mask=false lanes
+#else
+  return detail::ExpandLoop(v, mask);
+#endif
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 4)>
+HWY_API V Expand(V v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE  // 32x8
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du32;
+  // Convert mask into bitfield via horizontal sum (faster than ORV).
+  const svuint32_t bits = Shl(Set(du32, 1), Iota(du32, 0));
+  const size_t code = detail::SumOfLanesM(mask, bits);
+
+  alignas(16) constexpr uint32_t packed_array[256] = {
+      // PrintExpand32x8.
+      0xffffffff, 0xfffffff0, 0xffffff0f, 0xffffff10, 0xfffff0ff, 0xfffff1f0,
+      0xfffff10f, 0xfffff210, 0xffff0fff, 0xffff1ff0, 0xffff1f0f, 0xffff2f10,
+      0xffff10ff, 0xffff21f0, 0xffff210f, 0xffff3210, 0xfff0ffff, 0xfff1fff0,
+      0xfff1ff0f, 0xfff2ff10, 0xfff1f0ff, 0xfff2f1f0, 0xfff2f10f, 0xfff3f210,
+      0xfff10fff, 0xfff21ff0, 0xfff21f0f, 0xfff32f10, 0xfff210ff, 0xfff321f0,
+      0xfff3210f, 0xfff43210, 0xff0fffff, 0xff1ffff0, 0xff1fff0f, 0xff2fff10,
+      0xff1ff0ff, 0xff2ff1f0, 0xff2ff10f, 0xff3ff210, 0xff1f0fff, 0xff2f1ff0,
+      0xff2f1f0f, 0xff3f2f10, 0xff2f10ff, 0xff3f21f0, 0xff3f210f, 0xff4f3210,
+      0xff10ffff, 0xff21fff0, 0xff21ff0f, 0xff32ff10, 0xff21f0ff, 0xff32f1f0,
+      0xff32f10f, 0xff43f210, 0xff210fff, 0xff321ff0, 0xff321f0f, 0xff432f10,
+      0xff3210ff, 0xff4321f0, 0xff43210f, 0xff543210, 0xf0ffffff, 0xf1fffff0,
+      0xf1ffff0f, 0xf2ffff10, 0xf1fff0ff, 0xf2fff1f0, 0xf2fff10f, 0xf3fff210,
+      0xf1ff0fff, 0xf2ff1ff0, 0xf2ff1f0f, 0xf3ff2f10, 0xf2ff10ff, 0xf3ff21f0,
+      0xf3ff210f, 0xf4ff3210, 0xf1f0ffff, 0xf2f1fff0, 0xf2f1ff0f, 0xf3f2ff10,
+      0xf2f1f0ff, 0xf3f2f1f0, 0xf3f2f10f, 0xf4f3f210, 0xf2f10fff, 0xf3f21ff0,
+      0xf3f21f0f, 0xf4f32f10, 0xf3f210ff, 0xf4f321f0, 0xf4f3210f, 0xf5f43210,
+      0xf10fffff, 0xf21ffff0, 0xf21fff0f, 0xf32fff10, 0xf21ff0ff, 0xf32ff1f0,
+      0xf32ff10f, 0xf43ff210, 0xf21f0fff, 0xf32f1ff0, 0xf32f1f0f, 0xf43f2f10,
+      0xf32f10ff, 0xf43f21f0, 0xf43f210f, 0xf54f3210, 0xf210ffff, 0xf321fff0,
+      0xf321ff0f, 0xf432ff10, 0xf321f0ff, 0xf432f1f0, 0xf432f10f, 0xf543f210,
+      0xf3210fff, 0xf4321ff0, 0xf4321f0f, 0xf5432f10, 0xf43210ff, 0xf54321f0,
+      0xf543210f, 0xf6543210, 0x0fffffff, 0x1ffffff0, 0x1fffff0f, 0x2fffff10,
+      0x1ffff0ff, 0x2ffff1f0, 0x2ffff10f, 0x3ffff210, 0x1fff0fff, 0x2fff1ff0,
+      0x2fff1f0f, 0x3fff2f10, 0x2fff10ff, 0x3fff21f0, 0x3fff210f, 0x4fff3210,
+      0x1ff0ffff, 0x2ff1fff0, 0x2ff1ff0f, 0x3ff2ff10, 0x2ff1f0ff, 0x3ff2f1f0,
+      0x3ff2f10f, 0x4ff3f210, 0x2ff10fff, 0x3ff21ff0, 0x3ff21f0f, 0x4ff32f10,
+      0x3ff210ff, 0x4ff321f0, 0x4ff3210f, 0x5ff43210, 0x1f0fffff, 0x2f1ffff0,
+      0x2f1fff0f, 0x3f2fff10, 0x2f1ff0ff, 0x3f2ff1f0, 0x3f2ff10f, 0x4f3ff210,
+      0x2f1f0fff, 0x3f2f1ff0, 0x3f2f1f0f, 0x4f3f2f10, 0x3f2f10ff, 0x4f3f21f0,
+      0x4f3f210f, 0x5f4f3210, 0x2f10ffff, 0x3f21fff0, 0x3f21ff0f, 0x4f32ff10,
+      0x3f21f0ff, 0x4f32f1f0, 0x4f32f10f, 0x5f43f210, 0x3f210fff, 0x4f321ff0,
+      0x4f321f0f, 0x5f432f10, 0x4f3210ff, 0x5f4321f0, 0x5f43210f, 0x6f543210,
+      0x10ffffff, 0x21fffff0, 0x21ffff0f, 0x32ffff10, 0x21fff0ff, 0x32fff1f0,
+      0x32fff10f, 0x43fff210, 0x21ff0fff, 0x32ff1ff0, 0x32ff1f0f, 0x43ff2f10,
+      0x32ff10ff, 0x43ff21f0, 0x43ff210f, 0x54ff3210, 0x21f0ffff, 0x32f1fff0,
+      0x32f1ff0f, 0x43f2ff10, 0x32f1f0ff, 0x43f2f1f0, 0x43f2f10f, 0x54f3f210,
+      0x32f10fff, 0x43f21ff0, 0x43f21f0f, 0x54f32f10, 0x43f210ff, 0x54f321f0,
+      0x54f3210f, 0x65f43210, 0x210fffff, 0x321ffff0, 0x321fff0f, 0x432fff10,
+      0x321ff0ff, 0x432ff1f0, 0x432ff10f, 0x543ff210, 0x321f0fff, 0x432f1ff0,
+      0x432f1f0f, 0x543f2f10, 0x432f10ff, 0x543f21f0, 0x543f210f, 0x654f3210,
+      0x3210ffff, 0x4321fff0, 0x4321ff0f, 0x5432ff10, 0x4321f0ff, 0x5432f1f0,
+      0x5432f10f, 0x6543f210, 0x43210fff, 0x54321ff0, 0x54321f0f, 0x65432f10,
+      0x543210ff, 0x654321f0, 0x6543210f, 0x76543210};
+
+  // For lane i, shift the i-th 4-bit index down and mask with 0xF because
+  // svtbl zeros outputs if the index is out of bounds.
+  const svuint32_t packed = Set(du32, packed_array[code]);
+  const svuint32_t indices = detail::AndN(Shr(packed, svindex_u32(0, 4)), 0xF);
+  return TableLookupLanes(v, indices);  // already zeros mask=false lanes
+#elif HWY_TARGET == HWY_SVE2_128        // 32x4
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du32;
+  // Convert mask into bitfield via horizontal sum (faster than ORV).
+  const svuint32_t bits = Shl(Set(du32, 1), Iota(du32, 0));
+  const size_t offset = detail::SumOfLanesM(mask, bits);
+
+  alignas(16) constexpr uint32_t packed_array[16] = {
+      // PrintExpand64x4Nibble - same for 32x4.
+      0x0000ffff, 0x0000fff0, 0x0000ff0f, 0x0000ff10, 0x0000f0ff, 0x0000f1f0,
+      0x0000f10f, 0x0000f210, 0x00000fff, 0x00001ff0, 0x00001f0f, 0x00002f10,
+      0x000010ff, 0x000021f0, 0x0000210f, 0x00003210};
+
+  // For lane i, shift the i-th 4-bit index down and mask with 0xF because
+  // svtbl zeros outputs if the index is out of bounds.
+  const svuint32_t packed = Set(du32, packed_array[offset]);
+  const svuint32_t indices = detail::AndN(Shr(packed, svindex_u32(0, 4)), 0xF);
+  return TableLookupLanes(v, indices);  // already zeros mask=false lanes
+#else
+  return detail::ExpandLoop(v, mask);
+#endif
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_API V Expand(V v, svbool_t mask) {
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE  // 64x4
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du64;
+
+  // Convert mask into bitfield via horizontal sum (faster than ORV) of masked
+  // bits 1, 2, 4, 8. Pre-multiply by N so we can use it as an offset for
+  // SetTableIndices.
+  const svuint64_t bits = Shl(Set(du64, 1), Iota(du64, 2));
+  const size_t offset = detail::SumOfLanesM(mask, bits);
+
+  alignas(16) static constexpr uint64_t table[4 * 16] = {
+      // PrintExpand64x4Tables - small enough to store uncompressed.
+      255, 255, 255, 255, 0, 255, 255, 255, 255, 0, 255, 255, 0, 1, 255, 255,
+      255, 255, 0,   255, 0, 255, 1,   255, 255, 0, 1,   255, 0, 1, 2,   255,
+      255, 255, 255, 0,   0, 255, 255, 1,   255, 0, 255, 1,   0, 1, 255, 2,
+      255, 255, 0,   1,   0, 255, 1,   2,   255, 0, 1,   2,   0, 1, 2,   3};
+  // This already zeros mask=false lanes.
+  return TableLookupLanes(v, SetTableIndices(d, table + offset));
+#elif HWY_TARGET == HWY_SVE2_128  // 64x2
+  // Same as Compress, just zero out the mask=false lanes.
+  return IfThenElseZero(mask, Compress(v, mask));
+#else
+  return detail::ExpandLoop(v, mask);
+#endif
+}
+
+// ------------------------------ LoadExpand
+
+template <class D>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  return Expand(LoadU(d, unaligned), mask);
+}
+
+// ------------------------------ MulEven (InterleaveEven)
+
+#if HWY_SVE_HAVE_2
+namespace detail {
+#define HWY_SVE_MUL_EVEN(BASE, CHAR, BITS, HALF, NAME, OP)     \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, HALF) a, HWY_SVE_V(BASE, HALF) b) { \
+    return sv##OP##_##CHAR##BITS(a, b);                        \
+  }
+
+HWY_SVE_FOREACH_UI16(HWY_SVE_MUL_EVEN, MulEvenNative, mullb)
+HWY_SVE_FOREACH_UI32(HWY_SVE_MUL_EVEN, MulEvenNative, mullb)
+HWY_SVE_FOREACH_UI64(HWY_SVE_MUL_EVEN, MulEvenNative, mullb)
+HWY_SVE_FOREACH_UI16(HWY_SVE_MUL_EVEN, MulOddNative, mullt)
+HWY_SVE_FOREACH_UI32(HWY_SVE_MUL_EVEN, MulOddNative, mullt)
+HWY_SVE_FOREACH_UI64(HWY_SVE_MUL_EVEN, MulOddNative, mullt)
+#undef HWY_SVE_MUL_EVEN
+}  // namespace detail
+#endif
+
+template <class V, class DW = RepartitionToWide<DFromV<V>>,
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API VFromD<DW> MulEven(const V a, const V b) {
+#if HWY_SVE_HAVE_2
+  return BitCast(DW(), detail::MulEvenNative(a, b));
+#else
+  const auto lo = Mul(a, b);
+  const auto hi = MulHigh(a, b);
+  return BitCast(DW(), detail::InterleaveEven(lo, hi));
+#endif
+}
+
+template <class V, class DW = RepartitionToWide<DFromV<V>>,
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API VFromD<DW> MulOdd(const V a, const V b) {
+#if HWY_SVE_HAVE_2
+  return BitCast(DW(), detail::MulOddNative(a, b));
+#else
+  const auto lo = Mul(a, b);
+  const auto hi = MulHigh(a, b);
+  return BitCast(DW(), detail::InterleaveOdd(lo, hi));
+#endif
+}
+
+HWY_API svuint64_t MulEven(const svuint64_t a, const svuint64_t b) {
+  const auto lo = Mul(a, b);
+  const auto hi = MulHigh(a, b);
+  return detail::InterleaveEven(lo, hi);
+}
+
+HWY_API svuint64_t MulOdd(const svuint64_t a, const svuint64_t b) {
+  const auto lo = Mul(a, b);
+  const auto hi = MulHigh(a, b);
+  return detail::InterleaveOdd(lo, hi);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+template <size_t N, int kPow2>
+HWY_API svfloat32_t WidenMulPairwiseAdd(Simd<float, N, kPow2> df32, VBF16 a,
+                                        VBF16 b) {
+#if HWY_SVE_HAVE_BFLOAT16
+  const svfloat32_t even = svbfmlalb_f32(Zero(df32), a, b);
+  return svbfmlalt_f32(even, a, b);
+#else
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be),
+                Mul(BitCast(df32, ao), BitCast(df32, bo)));
+#endif  // HWY_SVE_HAVE_BFLOAT16
+}
+
+template <size_t N, int kPow2>
+HWY_API svint32_t WidenMulPairwiseAdd(Simd<int32_t, N, kPow2> d32, svint16_t a,
+                                      svint16_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  return svmlalt_s32(svmullb_s32(a, b), a, b);
+#else
+  const svbool_t pg = detail::PTrue(d32);
+  // Shifting extracts the odd lanes as RearrangeToOddPlusEven prefers.
+  // Fortunately SVE has sign-extension for the even lanes.
+  const svint32_t ae = svexth_s32_x(pg, BitCast(d32, a));
+  const svint32_t be = svexth_s32_x(pg, BitCast(d32, b));
+  const svint32_t ao = ShiftRight<16>(BitCast(d32, a));
+  const svint32_t bo = ShiftRight<16>(BitCast(d32, b));
+  return svmla_s32_x(pg, svmul_s32_x(pg, ao, bo), ae, be);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t WidenMulPairwiseAdd(Simd<uint32_t, N, kPow2> d32,
+                                       svuint16_t a, svuint16_t b) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  return svmlalt_u32(svmullb_u32(a, b), a, b);
+#else
+  const svbool_t pg = detail::PTrue(d32);
+  // Shifting extracts the odd lanes as RearrangeToOddPlusEven prefers.
+  // Fortunately SVE has sign-extension for the even lanes.
+  const svuint32_t ae = svexth_u32_x(pg, BitCast(d32, a));
+  const svuint32_t be = svexth_u32_x(pg, BitCast(d32, b));
+  const svuint32_t ao = ShiftRight<16>(BitCast(d32, a));
+  const svuint32_t bo = ShiftRight<16>(BitCast(d32, b));
+  return svmla_u32_x(pg, svmul_u32_x(pg, ao, bo), ae, be);
+#endif
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+template <size_t N, int kPow2>
+HWY_API svfloat32_t ReorderWidenMulAccumulate(Simd<float, N, kPow2> df32,
+                                              VBF16 a, VBF16 b,
+                                              const svfloat32_t sum0,
+                                              svfloat32_t& sum1) {
+#if HWY_SVE_HAVE_BFLOAT16
+  (void)df32;
+  sum1 = svbfmlalt_f32(sum1, a, b);
+  return svbfmlalb_f32(sum0, a, b);
+#else
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+#endif  // HWY_SVE_HAVE_BFLOAT16
+}
+
+template <size_t N, int kPow2>
+HWY_API svint32_t ReorderWidenMulAccumulate(Simd<int32_t, N, kPow2> d32,
+                                            svint16_t a, svint16_t b,
+                                            const svint32_t sum0,
+                                            svint32_t& sum1) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  sum1 = svmlalt_s32(sum1, a, b);
+  return svmlalb_s32(sum0, a, b);
+#else
+  const svbool_t pg = detail::PTrue(d32);
+  // Shifting extracts the odd lanes as RearrangeToOddPlusEven prefers.
+  // Fortunately SVE has sign-extension for the even lanes.
+  const svint32_t ae = svexth_s32_x(pg, BitCast(d32, a));
+  const svint32_t be = svexth_s32_x(pg, BitCast(d32, b));
+  const svint32_t ao = ShiftRight<16>(BitCast(d32, a));
+  const svint32_t bo = ShiftRight<16>(BitCast(d32, b));
+  sum1 = svmla_s32_x(pg, sum1, ao, bo);
+  return svmla_s32_x(pg, sum0, ae, be);
+#endif
+}
+
+template <size_t N, int kPow2>
+HWY_API svuint32_t ReorderWidenMulAccumulate(Simd<uint32_t, N, kPow2> d32,
+                                             svuint16_t a, svuint16_t b,
+                                             const svuint32_t sum0,
+                                             svuint32_t& sum1) {
+#if HWY_SVE_HAVE_2
+  (void)d32;
+  sum1 = svmlalt_u32(sum1, a, b);
+  return svmlalb_u32(sum0, a, b);
+#else
+  const svbool_t pg = detail::PTrue(d32);
+  // Shifting extracts the odd lanes as RearrangeToOddPlusEven prefers.
+  // Fortunately SVE has sign-extension for the even lanes.
+  const svuint32_t ae = svexth_u32_x(pg, BitCast(d32, a));
+  const svuint32_t be = svexth_u32_x(pg, BitCast(d32, b));
+  const svuint32_t ao = ShiftRight<16>(BitCast(d32, a));
+  const svuint32_t bo = ShiftRight<16>(BitCast(d32, b));
+  sum1 = svmla_u32_x(pg, sum1, ao, bo);
+  return svmla_u32_x(pg, sum0, ae, be);
+#endif
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <class VW>
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  // sum0 is the sum of bottom/even lanes and sum1 of top/odd lanes.
+  return Add(sum0, sum1);
+}
+
+// ------------------------------ SumOfMulQuadAccumulate
+
+#ifdef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(DI32 /*di32*/, svint8_t a,
+                                            svint8_t b, svint32_t sum) {
+  return svdot_s32(sum, a, b);
+}
+
+#ifdef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DU32, HWY_IF_U32_D(DU32)>
+HWY_API VFromD<DU32> SumOfMulQuadAccumulate(DU32 /*du32*/, svuint8_t a,
+                                            svuint8_t b, svuint32_t sum) {
+  return svdot_u32(sum, a, b);
+}
+
+#ifdef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(DI32 di32, svuint8_t a_u,
+                                            svint8_t b_i, svint32_t sum) {
+  // TODO: use svusdot_u32 on SVE targets that require support for both SVE2
+  // and SVE I8MM.
+
+  const RebindToUnsigned<decltype(di32)> du32;
+  const Repartition<uint8_t, decltype(di32)> du8;
+
+  const auto b_u = BitCast(du8, b_i);
+  const auto result_sum0 = svdot_u32(BitCast(du32, sum), a_u, b_u);
+  const auto result_sum1 =
+      ShiftLeft<8>(svdot_u32(Zero(du32), a_u, ShiftRight<7>(b_u)));
+
+  return BitCast(di32, Sub(result_sum0, result_sum1));
+}
+
+#ifdef HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI64, HWY_IF_I64_D(DI64)>
+HWY_API VFromD<DI64> SumOfMulQuadAccumulate(DI64 /*di64*/, svint16_t a,
+                                            svint16_t b, svint64_t sum) {
+  return svdot_s64(sum, a, b);
+}
+
+#ifdef HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DU64, HWY_IF_U64_D(DU64)>
+HWY_API VFromD<DU64> SumOfMulQuadAccumulate(DU64 /*du64*/, svuint16_t a,
+                                            svuint16_t b, svuint64_t sum) {
+  return svdot_u64(sum, a, b);
+}
+
+// ------------------------------ AESRound / CLMul
+
+#if defined(__ARM_FEATURE_SVE2_AES) || \
+    (HWY_SVE_HAVE_2 && HWY_HAVE_RUNTIME_DISPATCH)
+
+// Per-target flag to prevent generic_ops-inl.h from defining AESRound.
+#ifdef HWY_NATIVE_AES
+#undef HWY_NATIVE_AES
+#else
+#define HWY_NATIVE_AES
+#endif
+
+HWY_API svuint8_t AESRound(svuint8_t state, svuint8_t round_key) {
+  // It is not clear whether E and MC fuse like they did on NEON.
+  return Xor(svaesmc_u8(svaese_u8(state, svdup_n_u8(0))), round_key);
+}
+
+HWY_API svuint8_t AESLastRound(svuint8_t state, svuint8_t round_key) {
+  return Xor(svaese_u8(state, svdup_n_u8(0)), round_key);
+}
+
+HWY_API svuint8_t AESInvMixColumns(svuint8_t state) {
+  return svaesimc_u8(state);
+}
+
+HWY_API svuint8_t AESRoundInv(svuint8_t state, svuint8_t round_key) {
+  return Xor(svaesimc_u8(svaesd_u8(state, svdup_n_u8(0))), round_key);
+}
+
+HWY_API svuint8_t AESLastRoundInv(svuint8_t state, svuint8_t round_key) {
+  return Xor(svaesd_u8(state, svdup_n_u8(0)), round_key);
+}
+
+template <uint8_t kRcon>
+HWY_API svuint8_t AESKeyGenAssist(svuint8_t v) {
+  alignas(16) static constexpr uint8_t kRconXorMask[16] = {
+      0, kRcon, 0, 0, 0, 0, 0, 0, 0, kRcon, 0, 0, 0, 0, 0, 0};
+  alignas(16) static constexpr uint8_t kRotWordShuffle[16] = {
+      0, 13, 10, 7, 1, 14, 11, 4, 8, 5, 2, 15, 9, 6, 3, 12};
+  const DFromV<decltype(v)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto w13 = BitCast(d, DupOdd(BitCast(du32, v)));
+  const auto sub_word_result = AESLastRound(w13, LoadDup128(d, kRconXorMask));
+  return TableLookupBytes(sub_word_result, LoadDup128(d, kRotWordShuffle));
+}
+
+HWY_API svuint64_t CLMulLower(const svuint64_t a, const svuint64_t b) {
+  return svpmullb_pair(a, b);
+}
+
+HWY_API svuint64_t CLMulUpper(const svuint64_t a, const svuint64_t b) {
+  return svpmullt_pair(a, b);
+}
+
+#endif  // __ARM_FEATURE_SVE2_AES
+
+// ------------------------------ Lt128
+
+namespace detail {
+#define HWY_SVE_DUP(BASE, CHAR, BITS, HALF, NAME, OP)                        \
+  template <size_t N, int kPow2>                                             \
+  HWY_API svbool_t NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /*d*/, svbool_t m) { \
+    return sv##OP##_b##BITS(m, m);                                           \
+  }
+
+HWY_SVE_FOREACH_U(HWY_SVE_DUP, DupEvenB, trn1)  // actually for bool
+HWY_SVE_FOREACH_U(HWY_SVE_DUP, DupOddB, trn2)   // actually for bool
+#undef HWY_SVE_DUP
+
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+template <class D>
+HWY_INLINE svuint64_t Lt128Vec(D d, const svuint64_t a, const svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t eqHx = Eq(a, b);  // only odd lanes used
+  // Convert to vector: more pipelines can execute vector TRN* instructions
+  // than the predicate version.
+  const svuint64_t ltHL = VecFromMask(d, Lt(a, b));
+  // Move into upper lane: ltL if the upper half is equal, otherwise ltH.
+  // Requires an extra IfThenElse because INSR, EXT, TRN2 are unpredicated.
+  const svuint64_t ltHx = IfThenElse(eqHx, DupEven(ltHL), ltHL);
+  // Duplicate upper lane into lower.
+  return DupOdd(ltHx);
+}
+#endif
+}  // namespace detail
+
+template <class D>
+HWY_INLINE svbool_t Lt128(D d, const svuint64_t a, const svuint64_t b) {
+#if HWY_TARGET == HWY_SVE_256
+  return MaskFromVec(detail::Lt128Vec(d, a, b));
+#else
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t eqHx = Eq(a, b);  // only odd lanes used
+  const svbool_t ltHL = Lt(a, b);
+  // Move into upper lane: ltL if the upper half is equal, otherwise ltH.
+  const svbool_t ltHx = svsel_b(eqHx, detail::DupEvenB(d, ltHL), ltHL);
+  // Duplicate upper lane into lower.
+  return detail::DupOddB(d, ltHx);
+#endif  // HWY_TARGET != HWY_SVE_256
+}
+
+// ------------------------------ Lt128Upper
+
+template <class D>
+HWY_INLINE svbool_t Lt128Upper(D d, svuint64_t a, svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t ltHL = Lt(a, b);
+  return detail::DupOddB(d, ltHL);
+}
+
+// ------------------------------ Eq128, Ne128
+
+#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
+namespace detail {
+
+template <class D>
+HWY_INLINE svuint64_t Eq128Vec(D d, const svuint64_t a, const svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  // Convert to vector: more pipelines can execute vector TRN* instructions
+  // than the predicate version.
+  const svuint64_t eqHL = VecFromMask(d, Eq(a, b));
+  // Duplicate upper and lower.
+  const svuint64_t eqHH = DupOdd(eqHL);
+  const svuint64_t eqLL = DupEven(eqHL);
+  return And(eqLL, eqHH);
+}
+
+template <class D>
+HWY_INLINE svuint64_t Ne128Vec(D d, const svuint64_t a, const svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  // Convert to vector: more pipelines can execute vector TRN* instructions
+  // than the predicate version.
+  const svuint64_t neHL = VecFromMask(d, Ne(a, b));
+  // Duplicate upper and lower.
+  const svuint64_t neHH = DupOdd(neHL);
+  const svuint64_t neLL = DupEven(neHL);
+  return Or(neLL, neHH);
+}
+
+}  // namespace detail
+#endif
+
+template <class D>
+HWY_INLINE svbool_t Eq128(D d, const svuint64_t a, const svuint64_t b) {
+#if HWY_TARGET == HWY_SVE_256
+  return MaskFromVec(detail::Eq128Vec(d, a, b));
+#else
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t eqHL = Eq(a, b);
+  const svbool_t eqHH = detail::DupOddB(d, eqHL);
+  const svbool_t eqLL = detail::DupEvenB(d, eqHL);
+  return And(eqLL, eqHH);
+#endif  // HWY_TARGET != HWY_SVE_256
+}
+
+template <class D>
+HWY_INLINE svbool_t Ne128(D d, const svuint64_t a, const svuint64_t b) {
+#if HWY_TARGET == HWY_SVE_256
+  return MaskFromVec(detail::Ne128Vec(d, a, b));
+#else
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t neHL = Ne(a, b);
+  const svbool_t neHH = detail::DupOddB(d, neHL);
+  const svbool_t neLL = detail::DupEvenB(d, neHL);
+  return Or(neLL, neHH);
+#endif  // HWY_TARGET != HWY_SVE_256
+}
+
+// ------------------------------ Eq128Upper, Ne128Upper
+
+template <class D>
+HWY_INLINE svbool_t Eq128Upper(D d, svuint64_t a, svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t eqHL = Eq(a, b);
+  return detail::DupOddB(d, eqHL);
+}
+
+template <class D>
+HWY_INLINE svbool_t Ne128Upper(D d, svuint64_t a, svuint64_t b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const svbool_t neHL = Ne(a, b);
+  return detail::DupOddB(d, neHL);
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+template <class D>
+HWY_INLINE svuint64_t Min128(D d, const svuint64_t a, const svuint64_t b) {
+#if HWY_TARGET == HWY_SVE_256
+  return IfVecThenElse(detail::Lt128Vec(d, a, b), a, b);
+#else
+  return IfThenElse(Lt128(d, a, b), a, b);
+#endif
+}
+
+template <class D>
+HWY_INLINE svuint64_t Max128(D d, const svuint64_t a, const svuint64_t b) {
+#if HWY_TARGET == HWY_SVE_256
+  return IfVecThenElse(detail::Lt128Vec(d, b, a), a, b);
+#else
+  return IfThenElse(Lt128(d, b, a), a, b);
+#endif
+}
+
+template <class D>
+HWY_INLINE svuint64_t Min128Upper(D d, const svuint64_t a, const svuint64_t b) {
+  return IfThenElse(Lt128Upper(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE svuint64_t Max128Upper(D d, const svuint64_t a, const svuint64_t b) {
+  return IfThenElse(Lt128Upper(d, b, a), a, b);
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#ifdef HWY_NATIVE_LEADING_ZERO_COUNT
+#undef HWY_NATIVE_LEADING_ZERO_COUNT
+#else
+#define HWY_NATIVE_LEADING_ZERO_COUNT
+#endif
+
+#define HWY_SVE_LEADING_ZERO_COUNT(BASE, CHAR, BITS, HALF, NAME, OP)   \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) {        \
+    const DFromV<decltype(v)> d;                                       \
+    return BitCast(d, sv##OP##_##CHAR##BITS##_x(detail::PTrue(d), v)); \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_LEADING_ZERO_COUNT, LeadingZeroCount, clz)
+#undef HWY_SVE_LEADING_ZERO_COUNT
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V TrailingZeroCount(V v) {
+  return LeadingZeroCount(ReverseBits(v));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  using T = TFromD<decltype(d)>;
+  return BitCast(d, Sub(Set(d, T{sizeof(T) * 8 - 1}), LeadingZeroCount(v)));
+}
+
+// ================================================== END MACROS
+namespace detail {  // for code folding
+#undef HWY_SVE_ALL_PTRUE
+#undef HWY_SVE_D
+#undef HWY_SVE_FOREACH
+#undef HWY_SVE_FOREACH_BF16
+#undef HWY_SVE_FOREACH_F
+#undef HWY_SVE_FOREACH_F16
+#undef HWY_SVE_FOREACH_F32
+#undef HWY_SVE_FOREACH_F64
+#undef HWY_SVE_FOREACH_I
+#undef HWY_SVE_FOREACH_I08
+#undef HWY_SVE_FOREACH_I16
+#undef HWY_SVE_FOREACH_I32
+#undef HWY_SVE_FOREACH_I64
+#undef HWY_SVE_FOREACH_IF
+#undef HWY_SVE_FOREACH_U
+#undef HWY_SVE_FOREACH_U08
+#undef HWY_SVE_FOREACH_U16
+#undef HWY_SVE_FOREACH_U32
+#undef HWY_SVE_FOREACH_U64
+#undef HWY_SVE_FOREACH_UI
+#undef HWY_SVE_FOREACH_UI08
+#undef HWY_SVE_FOREACH_UI16
+#undef HWY_SVE_FOREACH_UI32
+#undef HWY_SVE_FOREACH_UI64
+#undef HWY_SVE_FOREACH_UIF3264
+#undef HWY_SVE_HAVE_2
+#undef HWY_SVE_PTRUE
+#undef HWY_SVE_RETV_ARGPV
+#undef HWY_SVE_RETV_ARGPVN
+#undef HWY_SVE_RETV_ARGPVV
+#undef HWY_SVE_RETV_ARGV
+#undef HWY_SVE_RETV_ARGVN
+#undef HWY_SVE_RETV_ARGVV
+#undef HWY_SVE_RETV_ARGVVV
+#undef HWY_SVE_T
+#undef HWY_SVE_UNDEFINED
+#undef HWY_SVE_V
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/emu128-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/emu128-inl.h
new file mode 100644
index 0000000000..1aba5ec40b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/emu128-inl.h
@@ -0,0 +1,2728 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Single-element vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#include <cmath>  // std::abs, std::isnan
+
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+using Full128 = Simd<T, 16 / sizeof(T), 0>;
+
+// (Wrapper class required for overloading comparison operators.)
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Vec128 {
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = N;  // only for DFromV
+
+  HWY_INLINE Vec128() = default;
+  Vec128(const Vec128&) = default;
+  Vec128& operator=(const Vec128&) = default;
+
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  // Behave like wasm128 (vectors can always hold 128 bits). generic_ops-inl.h
+  // relies on this for LoadInterleaved*. CAVEAT: this method of padding
+  // prevents using range for, especially in SumOfLanes, where it would be
+  // incorrect. Moving padding to another field would require handling the case
+  // where N = 16 / sizeof(T) (i.e. there is no padding), which is also awkward.
+  T raw[16 / sizeof(T)] = {};
+};
+
+// 0 or FF..FF, same size as Vec128.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Mask128 {
+  using Raw = hwy::MakeUnsigned<T>;
+  static HWY_INLINE Raw FromBool(bool b) {
+    return b ? static_cast<Raw>(~Raw{0}) : 0;
+  }
+
+  // Must match the size of Vec128.
+  Raw bits[16 / sizeof(T)] = {};
+};
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ Zero
+
+// Use HWY_MAX_LANES_D here because VFromD is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> v;  // zero-initialized
+  return v;
+}
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ BitCast
+
+template <class D, class VFrom>
+HWY_API VFromD<D> BitCast(D /* tag */, VFrom v) {
+  VFromD<D> to;
+  CopySameSize(&v, &to);
+  return to;
+}
+
+// ------------------------------ ResizeBitCast
+
+template <class D, class VFrom>
+HWY_API VFromD<D> ResizeBitCast(D d, VFrom v) {
+  using DFrom = DFromV<VFrom>;
+  using TFrom = TFromD<DFrom>;
+  using TTo = TFromD<D>;
+
+  constexpr size_t kFromByteLen = sizeof(TFrom) * HWY_MAX_LANES_D(DFrom);
+  constexpr size_t kToByteLen = sizeof(TTo) * HWY_MAX_LANES_D(D);
+  constexpr size_t kCopyByteLen = HWY_MIN(kFromByteLen, kToByteLen);
+
+  VFromD<D> to = Zero(d);
+  CopyBytes<kCopyByteLen>(&v, &to);
+  return to;
+}
+
+namespace detail {
+
+// ResizeBitCast on the HWY_EMU128 target has zero-extending semantics if
+// VFromD<DTo> is a larger vector than FromV
+template <class FromSizeTag, class ToSizeTag, class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(FromSizeTag /* from_size_tag */,
+                                               ToSizeTag /* to_size_tag */,
+                                               DTo d_to, DFrom /* d_from */,
+                                               VFromD<DFrom> v) {
+  return ResizeBitCast(d_to, v);
+}
+
+}  // namespace detail
+
+// ------------------------------ Set
+template <class D, typename T2>
+HWY_API VFromD<D> Set(D d, const T2 t) {
+  VFromD<D> v;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    v.raw[i] = static_cast<TFromD<D>>(t);
+  }
+  return v;
+}
+
+// ------------------------------ Undefined
+template <class D>
+HWY_API VFromD<D> Undefined(D d) {
+  return Zero(d);
+}
+
+// ------------------------------ Iota
+
+template <class D, typename T = TFromD<D>, typename T2>
+HWY_API VFromD<D> Iota(D d, T2 first) {
+  VFromD<D> v;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    v.raw[i] =
+        AddWithWraparound(hwy::IsFloatTag<T>(), static_cast<T>(first), i);
+  }
+  return v;
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  VFromD<decltype(du)> vu = BitCast(du, v);
+  for (size_t i = 0; i < N; ++i) {
+    vu.raw[i] = static_cast<TU>(~vu.raw[i]);
+  }
+  return BitCast(d, vu);
+}
+
+// ------------------------------ And
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  auto au = BitCast(du, a);
+  auto bu = BitCast(du, b);
+  for (size_t i = 0; i < N; ++i) {
+    au.raw[i] &= bu.raw[i];
+  }
+  return BitCast(d, au);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(Vec128<T, N> a, Vec128<T, N> b) {
+  return And(a, b);
+}
+
+// ------------------------------ AndNot
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> a, Vec128<T, N> b) {
+  return And(Not(a), b);
+}
+
+// ------------------------------ Or
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  auto au = BitCast(du, a);
+  auto bu = BitCast(du, b);
+  for (size_t i = 0; i < N; ++i) {
+    au.raw[i] |= bu.raw[i];
+  }
+  return BitCast(d, au);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(Vec128<T, N> a, Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+// ------------------------------ Xor
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  auto au = BitCast(du, a);
+  auto bu = BitCast(du, b);
+  for (size_t i = 0; i < N; ++i) {
+    au.raw[i] ^= bu.raw[i];
+  }
+  return BitCast(d, au);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(Vec128<T, N> a, Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ Xor3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+// ------------------------------ Or3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or3(Vec128<T, N> o1, Vec128<T, N> o2, Vec128<T, N> o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OrAnd(Vec128<T, N> o, Vec128<T, N> a1, Vec128<T, N> a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfVecThenElse(Vec128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return Or(And(mask, yes), AndNot(mask, no));
+}
+
+// ------------------------------ CopySign
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(Vec128<T, N> magn, Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(Vec128<T, N> abs, Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(abs)> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ------------------------------ BroadcastSignBit
+template <typename T, size_t N>
+HWY_API Vec128<T, N> BroadcastSignBit(Vec128<T, N> v) {
+  // This is used inside ShiftRight, so we cannot implement in terms of it.
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = v.raw[i] < 0 ? T(-1) : T(0);
+  }
+  return v;
+}
+
+// ------------------------------ Mask
+
+// v must be 0 or FF..FF.
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(Vec128<T, N> v) {
+  Mask128<T, N> mask;
+  CopySameSize(&v, &mask);
+  return mask;
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <class DTo, class MFrom>
+HWY_API MFromD<DTo> RebindMask(DTo /* tag */, MFrom mask) {
+  MFromD<DTo> to;
+  CopySameSize(&mask, &to);
+  return to;
+}
+
+template <typename T, size_t N>
+Vec128<T, N> VecFromMask(Mask128<T, N> mask) {
+  Vec128<T, N> v;
+  CopySameSize(&mask, &v);
+  return v;
+}
+
+template <class D>
+VFromD<D> VecFromMask(D /* tag */, MFromD<D> mask) {
+  return VecFromMask(mask);
+}
+
+template <class D>
+HWY_API MFromD<D> FirstN(D d, size_t n) {
+  MFromD<D> m;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    m.bits[i] = MFromD<D>::FromBool(i < n);
+  }
+  return m;
+}
+
+// Returns mask ? yes : no.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return IfVecThenElse(VecFromMask(mask), yes, no);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  const DFromV<decltype(yes)> d;
+  return IfVecThenElse(VecFromMask(mask), yes, Zero(d));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  const DFromV<decltype(no)> d;
+  return IfVecThenElse(VecFromMask(mask), Zero(d), no);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto vi = BitCast(di, v);
+
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = vi.raw[i] < 0 ? yes.raw[i] : no.raw[i];
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  return IfNegativeThenElse(v, Zero(d), v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(Mask128<T, N> m) {
+  return MaskFromVec(Not(VecFromMask(Simd<T, N, 0>(), m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+// ================================================== SHIFTS
+
+// ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
+
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeft(Vec128<T, N> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  using TU = hwy::MakeUnsigned<T>;
+  for (size_t i = 0; i < N; ++i) {
+    const TU raw_u = static_cast<TU>(v.raw[i]);
+    v.raw[i] = static_cast<T>(raw_u << kBits);
+  }
+  return v;
+}
+
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRight(Vec128<T, N> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = static_cast<T>(v.raw[i] >> kBits);
+  }
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    for (size_t i = 0; i < N; ++i) {
+      const TU shifted = static_cast<TU>(static_cast<TU>(v.raw[i]) >> kBits);
+      const TU sign = v.raw[i] < 0 ? static_cast<TU>(~TU{0}) : 0;
+      const size_t sign_shift =
+          static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - kBits);
+      const TU upper = static_cast<TU>(sign << sign_shift);
+      v.raw[i] = static_cast<T>(shifted | upper);
+    }
+  } else {  // T is unsigned
+    for (size_t i = 0; i < N; ++i) {
+      v.raw[i] = static_cast<T>(v.raw[i] >> kBits);
+    }
+  }
+#endif
+  return v;
+}
+
+// ------------------------------ RotateRight (ShiftRight)
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> RotateRight(const Vec128<T, N> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// ------------------------------ ShiftLeftSame
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftSame(Vec128<T, N> v, int bits) {
+  for (size_t i = 0; i < N; ++i) {
+    const auto shifted = static_cast<hwy::MakeUnsigned<T>>(v.raw[i]) << bits;
+    v.raw[i] = static_cast<T>(shifted);
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRightSame(Vec128<T, N> v, int bits) {
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = static_cast<T>(v.raw[i] >> bits);
+  }
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    for (size_t i = 0; i < N; ++i) {
+      const TU shifted = static_cast<TU>(static_cast<TU>(v.raw[i]) >> bits);
+      const TU sign = v.raw[i] < 0 ? static_cast<TU>(~TU{0}) : 0;
+      const size_t sign_shift =
+          static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - bits);
+      const TU upper = static_cast<TU>(sign << sign_shift);
+      v.raw[i] = static_cast<T>(shifted | upper);
+    }
+  } else {
+    for (size_t i = 0; i < N; ++i) {
+      v.raw[i] = static_cast<T>(v.raw[i] >> bits);  // unsigned, logical shift
+    }
+  }
+#endif
+  return v;
+}
+
+// ------------------------------ Shl
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, Vec128<T, N> bits) {
+  for (size_t i = 0; i < N; ++i) {
+    const auto shifted = static_cast<hwy::MakeUnsigned<T>>(v.raw[i])
+                         << bits.raw[i];
+    v.raw[i] = static_cast<T>(shifted);
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, Vec128<T, N> bits) {
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = static_cast<T>(v.raw[i] >> bits.raw[i]);
+  }
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    for (size_t i = 0; i < N; ++i) {
+      const TU shifted =
+          static_cast<TU>(static_cast<TU>(v.raw[i]) >> bits.raw[i]);
+      const TU sign = v.raw[i] < 0 ? static_cast<TU>(~TU{0}) : 0;
+      const size_t sign_shift = static_cast<size_t>(
+          static_cast<int>(sizeof(TU)) * 8 - 1 - bits.raw[i]);
+      const TU upper = static_cast<TU>(sign << sign_shift);
+      v.raw[i] = static_cast<T>(shifted | upper);
+    }
+  } else {  // T is unsigned
+    for (size_t i = 0; i < N; ++i) {
+      v.raw[i] = static_cast<T>(v.raw[i] >> bits.raw[i]);
+    }
+  }
+#endif
+  return v;
+}
+
+// ================================================== ARITHMETIC
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Add(hwy::NonFloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    const uint64_t a64 = static_cast<uint64_t>(a.raw[i]);
+    const uint64_t b64 = static_cast<uint64_t>(b.raw[i]);
+    a.raw[i] = static_cast<T>((a64 + b64) & static_cast<uint64_t>(~T(0)));
+  }
+  return a;
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Sub(hwy::NonFloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    const uint64_t a64 = static_cast<uint64_t>(a.raw[i]);
+    const uint64_t b64 = static_cast<uint64_t>(b.raw[i]);
+    a.raw[i] = static_cast<T>((a64 - b64) & static_cast<uint64_t>(~T(0)));
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Add(hwy::FloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] += b.raw[i];
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Sub(hwy::FloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] -= b.raw[i];
+  }
+  return a;
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator-(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Sub(hwy::IsFloatTag<T>(), a, b);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator+(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Add(hwy::IsFloatTag<T>(), a, b);
+}
+
+// ------------------------------ SumsOf8
+
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 7) / 8> SumsOf8(Vec128<uint8_t, N> v) {
+  Vec128<uint64_t, (N + 7) / 8> sums;
+  for (size_t i = 0; i < N; ++i) {
+    sums.raw[i / 8] += v.raw[i];
+  }
+  return sums;
+}
+
+// ------------------------------ SaturatedAdd
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> SaturatedAdd(Vec128<T, N> a, Vec128<T, N> b) {
+  using TW = MakeSigned<MakeWide<T>>;
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<T>(HWY_MIN(
+        HWY_MAX(hwy::LowestValue<T>(), static_cast<TW>(a.raw[i]) + b.raw[i]),
+        hwy::HighestValue<T>()));
+  }
+  return a;
+}
+
+// ------------------------------ SaturatedSub
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> SaturatedSub(Vec128<T, N> a, Vec128<T, N> b) {
+  using TW = MakeSigned<MakeWide<T>>;
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<T>(HWY_MIN(
+        HWY_MAX(hwy::LowestValue<T>(), static_cast<TW>(a.raw[i]) - b.raw[i]),
+        hwy::HighestValue<T>()));
+  }
+  return a;
+}
+
+// ------------------------------ AverageRound
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AverageRound(Vec128<T, N> a, Vec128<T, N> b) {
+  static_assert(!IsSigned<T>(), "Only for unsigned");
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<T>((a.raw[i] + b.raw[i] + 1) / 2);
+  }
+  return a;
+}
+
+// ------------------------------ Abs
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Abs(SignedTag /*tag*/, Vec128<T, N> a) {
+  for (size_t i = 0; i < N; ++i) {
+    const T s = a.raw[i];
+    const T min = hwy::LimitsMin<T>();
+    a.raw[i] = static_cast<T>((s >= 0 || s == min) ? a.raw[i] : -s);
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Abs(hwy::FloatTag /*tag*/, Vec128<T, N> v) {
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = std::abs(v.raw[i]);
+  }
+  return v;
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Abs(Vec128<T, N> a) {
+  return detail::Abs(hwy::TypeTag<T>(), a);
+}
+
+// ------------------------------ Min/Max
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Min(hwy::NonFloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = HWY_MIN(a.raw[i], b.raw[i]);
+  }
+  return a;
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Max(hwy::NonFloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = HWY_MAX(a.raw[i], b.raw[i]);
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Min(hwy::FloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    if (std::isnan(a.raw[i])) {
+      a.raw[i] = b.raw[i];
+    } else if (std::isnan(b.raw[i])) {
+      // no change
+    } else {
+      a.raw[i] = HWY_MIN(a.raw[i], b.raw[i]);
+    }
+  }
+  return a;
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Max(hwy::FloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    if (std::isnan(a.raw[i])) {
+      a.raw[i] = b.raw[i];
+    } else if (std::isnan(b.raw[i])) {
+      // no change
+    } else {
+      a.raw[i] = HWY_MAX(a.raw[i], b.raw[i]);
+    }
+  }
+  return a;
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Min(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Min(hwy::IsFloatTag<T>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Max(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Max(hwy::IsFloatTag<T>(), a, b);
+}
+
+// ------------------------------ Neg
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Neg(hwy::NonFloatTag /*tag*/, Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  return Zero(d) - v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Neg(hwy::FloatTag /*tag*/, Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  return Xor(v, SignBit(d));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Neg(hwy::SpecialTag /*tag*/, Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  return Xor(v, SignBit(d));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Neg(Vec128<T, N> v) {
+  return detail::Neg(hwy::IsFloatTag<T>(), v);
+}
+
+// ------------------------------ Mul/Div
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Mul(hwy::FloatTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] *= b.raw[i];
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Mul(SignedTag /*tag*/, Vec128<T, N> a, Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<T>(static_cast<uint64_t>(a.raw[i]) *
+                              static_cast<uint64_t>(b.raw[i]));
+  }
+  return a;
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Mul(UnsignedTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<T>(static_cast<uint64_t>(a.raw[i]) *
+                              static_cast<uint64_t>(b.raw[i]));
+  }
+  return a;
+}
+
+}  // namespace detail
+
+// Per-target flags to prevent generic_ops-inl.h defining 8/64-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator*(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Mul(hwy::TypeTag<T>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator/(Vec128<T, N> a, Vec128<T, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = (b.raw[i] == T{0}) ? 0 : a.raw[i] / b.raw[i];
+  }
+  return a;
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulHigh(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<int16_t>((int32_t{a.raw[i]} * b.raw[i]) >> 16);
+  }
+  return a;
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> MulHigh(Vec128<uint16_t, N> a,
+                                    Vec128<uint16_t, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    // Cast to uint32_t first to prevent overflow. Otherwise the result of
+    // uint16_t * uint16_t is in "int" which may overflow. In practice the
+    // result is the same but this way it is also defined.
+    a.raw[i] = static_cast<uint16_t>(
+        (static_cast<uint32_t>(a.raw[i]) * static_cast<uint32_t>(b.raw[i])) >>
+        16);
+  }
+  return a;
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  for (size_t i = 0; i < N; ++i) {
+    a.raw[i] = static_cast<int16_t>((a.raw[i] * b.raw[i] + 16384) >> 15);
+  }
+  return a;
+}
+
+// Multiplies even lanes (0, 2, ..) and returns the double-wide result.
+template <class T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulEven(Vec128<T, N> a,
+                                                 Vec128<T, N> b) {
+  using TW = MakeWide<T>;
+  Vec128<TW, (N + 1) / 2> mul;
+  for (size_t i = 0; i < N; i += 2) {
+    const TW a_wide = a.raw[i];
+    mul.raw[i / 2] = static_cast<TW>(a_wide * b.raw[i]);
+  }
+  return mul;
+}
+
+// Multiplies odd lanes (1, 3, ..) and returns the double-wide result.
+template <class T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulOdd(Vec128<T, N> a,
+                                                Vec128<T, N> b) {
+  using TW = MakeWide<T>;
+  Vec128<TW, (N + 1) / 2> mul;
+  for (size_t i = 0; i < N; i += 2) {
+    const TW a_wide = a.raw[i + 1];
+    mul.raw[i / 2] = static_cast<TW>(a_wide * b.raw[i + 1]);
+  }
+  return mul;
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocal(Vec128<float, N> v) {
+  for (size_t i = 0; i < N; ++i) {
+    // Zero inputs are allowed, but callers are responsible for replacing the
+    // return value with something else (typically using IfThenElse). This check
+    // avoids a ubsan error. The result is arbitrary.
+    v.raw[i] = (std::abs(v.raw[i]) == 0.0f) ? 0.0f : 1.0f / v.raw[i];
+  }
+  return v;
+}
+
+// generic_ops takes care of integer T.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> AbsDiff(Vec128<T, N> a, Vec128<T, N> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return mul * x + add;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  return add - mul * x;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> sub) {
+  return mul * x - sub;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> NegMulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> sub) {
+  return Neg(mul) * x - sub;
+}
+
+// ------------------------------ Floating-point square root
+
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocalSqrt(Vec128<float, N> v) {
+  for (size_t i = 0; i < N; ++i) {
+    const float half = v.raw[i] * 0.5f;
+    uint32_t bits;
+    CopySameSize(&v.raw[i], &bits);
+    // Initial guess based on log2(f)
+    bits = 0x5F3759DF - (bits >> 1);
+    CopySameSize(&bits, &v.raw[i]);
+    // One Newton-Raphson iteration
+    v.raw[i] = v.raw[i] * (1.5f - (half * v.raw[i] * v.raw[i]));
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Sqrt(Vec128<T, N> v) {
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = std::sqrt(v.raw[i]);
+  }
+  return v;
+}
+
+// ------------------------------ Floating-point rounding
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Round(Vec128<T, N> v) {
+  using TI = MakeSigned<T>;
+  const Vec128<T, N> a = Abs(v);
+  for (size_t i = 0; i < N; ++i) {
+    if (!(a.raw[i] < MantissaEnd<T>())) {  // Huge or NaN
+      continue;
+    }
+    const T bias = v.raw[i] < T(0.0) ? T(-0.5) : T(0.5);
+    const TI rounded = static_cast<TI>(v.raw[i] + bias);
+    if (rounded == 0) {
+      v.raw[i] = v.raw[i] < 0 ? T{-0} : T{0};
+      continue;
+    }
+    const T rounded_f = static_cast<T>(rounded);
+    // Round to even
+    if ((rounded & 1) && std::abs(rounded_f - v.raw[i]) == T(0.5)) {
+      v.raw[i] = static_cast<T>(rounded - (v.raw[i] < T(0) ? -1 : 1));
+      continue;
+    }
+    v.raw[i] = rounded_f;
+  }
+  return v;
+}
+
+// Round-to-nearest even.
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(Vec128<float, N> v) {
+  using T = float;
+  using TI = int32_t;
+
+  const Vec128<float, N> abs = Abs(v);
+  Vec128<int32_t, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    const bool signbit = std::signbit(v.raw[i]);
+
+    if (!(abs.raw[i] < MantissaEnd<T>())) {  // Huge or NaN
+      // Check if too large to cast or NaN
+      if (!(abs.raw[i] <= static_cast<T>(LimitsMax<TI>()))) {
+        ret.raw[i] = signbit ? LimitsMin<TI>() : LimitsMax<TI>();
+        continue;
+      }
+      ret.raw[i] = static_cast<TI>(v.raw[i]);
+      continue;
+    }
+    const T bias = v.raw[i] < T(0.0) ? T(-0.5) : T(0.5);
+    const TI rounded = static_cast<TI>(v.raw[i] + bias);
+    if (rounded == 0) {
+      ret.raw[i] = 0;
+      continue;
+    }
+    const T rounded_f = static_cast<T>(rounded);
+    // Round to even
+    if ((rounded & 1) && std::abs(rounded_f - v.raw[i]) == T(0.5)) {
+      ret.raw[i] = rounded - (signbit ? -1 : 1);
+      continue;
+    }
+    ret.raw[i] = rounded;
+  }
+  return ret;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Trunc(Vec128<T, N> v) {
+  using TI = MakeSigned<T>;
+  const Vec128<T, N> abs = Abs(v);
+  for (size_t i = 0; i < N; ++i) {
+    if (!(abs.raw[i] <= MantissaEnd<T>())) {  // Huge or NaN
+      continue;
+    }
+    const TI truncated = static_cast<TI>(v.raw[i]);
+    if (truncated == 0) {
+      v.raw[i] = v.raw[i] < 0 ? -T{0} : T{0};
+      continue;
+    }
+    v.raw[i] = static_cast<T>(truncated);
+  }
+  return v;
+}
+
+// Toward +infinity, aka ceiling
+template <typename Float, size_t N>
+Vec128<Float, N> Ceil(Vec128<Float, N> v) {
+  constexpr int kMantissaBits = MantissaBits<Float>();
+  using Bits = MakeUnsigned<Float>;
+  const Bits kExponentMask = MaxExponentField<Float>();
+  const Bits kMantissaMask = MantissaMask<Float>();
+  const Bits kBias = kExponentMask / 2;
+
+  for (size_t i = 0; i < N; ++i) {
+    const bool positive = v.raw[i] > Float(0.0);
+
+    Bits bits;
+    CopySameSize(&v.raw[i], &bits);
+
+    const int exponent =
+        static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+    // Already an integer.
+    if (exponent >= kMantissaBits) continue;
+    // |v| <= 1 => 0 or 1.
+    if (exponent < 0) {
+      v.raw[i] = positive ? Float{1} : Float{-0.0};
+      continue;
+    }
+
+    const Bits mantissa_mask = kMantissaMask >> exponent;
+    // Already an integer
+    if ((bits & mantissa_mask) == 0) continue;
+
+    // Clear fractional bits and round up
+    if (positive) bits += (kMantissaMask + 1) >> exponent;
+    bits &= ~mantissa_mask;
+
+    CopySameSize(&bits, &v.raw[i]);
+  }
+  return v;
+}
+
+// Toward -infinity, aka floor
+template <typename Float, size_t N>
+Vec128<Float, N> Floor(Vec128<Float, N> v) {
+  constexpr int kMantissaBits = MantissaBits<Float>();
+  using Bits = MakeUnsigned<Float>;
+  const Bits kExponentMask = MaxExponentField<Float>();
+  const Bits kMantissaMask = MantissaMask<Float>();
+  const Bits kBias = kExponentMask / 2;
+
+  for (size_t i = 0; i < N; ++i) {
+    const bool negative = v.raw[i] < Float(0.0);
+
+    Bits bits;
+    CopySameSize(&v.raw[i], &bits);
+
+    const int exponent =
+        static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+    // Already an integer.
+    if (exponent >= kMantissaBits) continue;
+    // |v| <= 1 => -1 or 0.
+    if (exponent < 0) {
+      v.raw[i] = negative ? Float(-1.0) : Float(0.0);
+      continue;
+    }
+
+    const Bits mantissa_mask = kMantissaMask >> exponent;
+    // Already an integer
+    if ((bits & mantissa_mask) == 0) continue;
+
+    // Clear fractional bits and round down
+    if (negative) bits += (kMantissaMask + 1) >> exponent;
+    bits &= ~mantissa_mask;
+
+    CopySameSize(&bits, &v.raw[i]);
+  }
+  return v;
+}
+
+// ------------------------------ Floating-point classification
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsNaN(Vec128<T, N> v) {
+  Mask128<T, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    // std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
+    MakeUnsigned<T> bits;
+    CopySameSize(&v.raw[i], &bits);
+    bits += bits;
+    bits >>= 1;  // clear sign bit
+    // NaN if all exponent bits are set and the mantissa is not zero.
+    ret.bits[i] = Mask128<T, N>::FromBool(bits > ExponentMask<T>());
+  }
+  return ret;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsInf(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsFinite(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  using VI = VFromD<decltype(di)>;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VI exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+// ================================================== COMPARE
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator==(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] == b.raw[i]);
+  }
+  return m;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator!=(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] != b.raw[i]);
+  }
+  return m;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] < b.raw[i]);
+  }
+  return m;
+}
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] > b.raw[i]);
+  }
+  return m;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<=(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] <= b.raw[i]);
+  }
+  return m;
+}
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  Mask128<T, N> m;
+  for (size_t i = 0; i < N; ++i) {
+    m.bits[i] = Mask128<T, N>::FromBool(a.raw[i] >= b.raw[i]);
+  }
+  return m;
+}
+
+// ------------------------------ Lt128
+
+// Only makes sense for full vectors of u64.
+template <class D>
+HWY_API MFromD<D> Lt128(D /* tag */, Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  const bool lt =
+      (a.raw[1] < b.raw[1]) || (a.raw[1] == b.raw[1] && a.raw[0] < b.raw[0]);
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(lt);
+  return ret;
+}
+
+template <class D>
+HWY_API MFromD<D> Lt128Upper(D /* tag */, Vec128<uint64_t> a,
+                             Vec128<uint64_t> b) {
+  const bool lt = a.raw[1] < b.raw[1];
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(lt);
+  return ret;
+}
+
+// ------------------------------ Eq128
+
+// Only makes sense for full vectors of u64.
+template <class D>
+HWY_API MFromD<D> Eq128(D /* tag */, Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  const bool eq = a.raw[1] == b.raw[1] && a.raw[0] == b.raw[0];
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(eq);
+  return ret;
+}
+
+template <class D>
+HWY_API Mask128<uint64_t> Ne128(D /* tag */, Vec128<uint64_t> a,
+                                Vec128<uint64_t> b) {
+  const bool ne = a.raw[1] != b.raw[1] || a.raw[0] != b.raw[0];
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(ne);
+  return ret;
+}
+
+template <class D>
+HWY_API MFromD<D> Eq128Upper(D /* tag */, Vec128<uint64_t> a,
+                             Vec128<uint64_t> b) {
+  const bool eq = a.raw[1] == b.raw[1];
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(eq);
+  return ret;
+}
+
+template <class D>
+HWY_API MFromD<D> Ne128Upper(D /* tag */, Vec128<uint64_t> a,
+                             Vec128<uint64_t> b) {
+  const bool ne = a.raw[1] != b.raw[1];
+  Mask128<uint64_t> ret;
+  ret.bits[0] = ret.bits[1] = Mask128<uint64_t>::FromBool(ne);
+  return ret;
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+template <class D>
+HWY_API VFromD<D> Min128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128(d, a, b), a, b);
+}
+
+template <class D>
+HWY_API VFromD<D> Max128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128(d, b, a), a, b);
+}
+
+template <class D>
+HWY_API VFromD<D> Min128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, a, b), a, b);
+}
+
+template <class D>
+HWY_API VFromD<D> Max128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, b, a), a, b);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT aligned) {
+  VFromD<D> v;
+  CopyBytes<d.MaxBytes()>(aligned, v.raw);  // copy from array
+  return v;
+}
+
+template <class D>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElseZero(m, LoadU(d, p));
+}
+
+template <class D>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElse(m, LoadU(d, p), v);
+}
+
+template <class D>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// In some use cases, "load single lane" is sufficient; otherwise avoid this.
+template <class D>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT aligned) {
+  return Load(d, aligned);
+}
+
+#ifdef HWY_NATIVE_LOAD_N
+#undef HWY_NATIVE_LOAD_N
+#else
+#define HWY_NATIVE_LOAD_N
+#endif
+
+template <class D>
+HWY_API VFromD<D> LoadN(D d, const TFromD<D>* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  VFromD<D> v = Zero(d);
+  const size_t N = Lanes(d);
+  const size_t num_of_lanes_to_load = HWY_MIN(max_lanes_to_load, N);
+  CopyBytes(p, v.raw, num_of_lanes_to_load * sizeof(TFromD<D>));
+  return v;
+}
+
+// ------------------------------ Store
+
+template <class D>
+HWY_API void Store(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  CopyBytes<d.MaxBytes()>(v.raw, aligned);  // copy to array
+}
+
+template <class D>
+HWY_API void StoreU(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+template <class D>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (m.bits[i]) p[i] = v.raw[i];
+  }
+}
+
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+template <class D>
+HWY_API void StoreN(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const size_t N = Lanes(d);
+  const size_t num_of_lanes_to_store = HWY_MIN(max_lanes_to_store, N);
+  CopyBytes(v.raw, p, num_of_lanes_to_store * sizeof(TFromD<D>));
+}
+
+// ------------------------------ LoadInterleaved2/3/4
+
+// Per-target flag to prevent generic_ops-inl.h from defining LoadInterleaved2.
+// We implement those here because scalar code is likely faster than emulation
+// via shuffles.
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved2(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  alignas(16) T buf0[MaxLanes(d)];
+  alignas(16) T buf1[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    buf0[i] = *unaligned++;
+    buf1[i] = *unaligned++;
+  }
+  v0 = Load(d, buf0);
+  v1 = Load(d, buf1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved3(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  alignas(16) T buf0[MaxLanes(d)];
+  alignas(16) T buf1[MaxLanes(d)];
+  alignas(16) T buf2[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    buf0[i] = *unaligned++;
+    buf1[i] = *unaligned++;
+    buf2[i] = *unaligned++;
+  }
+  v0 = Load(d, buf0);
+  v1 = Load(d, buf1);
+  v2 = Load(d, buf2);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  alignas(16) T buf0[MaxLanes(d)];
+  alignas(16) T buf1[MaxLanes(d)];
+  alignas(16) T buf2[MaxLanes(d)];
+  alignas(16) T buf3[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    buf0[i] = *unaligned++;
+    buf1[i] = *unaligned++;
+    buf2[i] = *unaligned++;
+    buf3[i] = *unaligned++;
+  }
+  v0 = Load(d, buf0);
+  v1 = Load(d, buf1);
+  v2 = Load(d, buf2);
+  v3 = Load(d, buf3);
+}
+
+// ------------------------------ StoreInterleaved2/3/4
+
+template <class D>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    *unaligned++ = v0.raw[i];
+    *unaligned++ = v1.raw[i];
+  }
+}
+
+template <class D>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    *unaligned++ = v0.raw[i];
+    *unaligned++ = v1.raw[i];
+    *unaligned++ = v2.raw[i];
+  }
+}
+
+template <class D>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    *unaligned++ = v0.raw[i];
+    *unaligned++ = v1.raw[i];
+    *unaligned++ = v2.raw[i];
+    *unaligned++ = v3.raw[i];
+  }
+}
+
+// ------------------------------ Stream
+template <class D>
+HWY_API void Stream(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  Store(v, d, aligned);
+}
+
+// ------------------------------ Scatter in generic_ops-inl.h
+// ------------------------------ Gather in generic_ops-inl.h
+
+// ================================================== CONVERT
+
+// ConvertTo and DemoteTo with floating-point input and integer output truncate
+// (rounding toward zero).
+
+template <class DTo, typename TFrom, HWY_IF_NOT_SPECIAL_FLOAT(TFrom)>
+HWY_API VFromD<DTo> PromoteTo(DTo d, Vec128<TFrom, HWY_MAX_LANES_D(DTo)> from) {
+  static_assert(sizeof(TFromD<DTo>) > sizeof(TFrom), "Not promoting");
+  VFromD<DTo> ret;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    // For bits Y > X, floatX->floatY and intX->intY are always representable.
+    ret.raw[i] = static_cast<TFromD<DTo>>(from.raw[i]);
+  }
+  return ret;
+}
+
+// MSVC 19.10 cannot deduce the argument type if HWY_IF_FLOAT(TFrom) is here,
+// so we overload for TFrom=double and ToT={float,int32_t}.
+template <class D, HWY_IF_F32_D(D)>
+HWY_API VFromD<D> DemoteTo(D d, VFromD<Rebind<double, D>> from) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    // Prevent ubsan errors when converting float to narrower integer/float
+    if (std::isinf(from.raw[i]) ||
+        std::fabs(from.raw[i]) > static_cast<double>(HighestValue<float>())) {
+      ret.raw[i] = std::signbit(from.raw[i]) ? LowestValue<float>()
+                                             : HighestValue<float>();
+      continue;
+    }
+    ret.raw[i] = static_cast<float>(from.raw[i]);
+  }
+  return ret;
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D d, VFromD<Rebind<double, D>> from) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    // Prevent ubsan errors when converting int32_t to narrower integer/int32_t
+    if (std::isinf(from.raw[i]) ||
+        std::fabs(from.raw[i]) > static_cast<double>(HighestValue<int32_t>())) {
+      ret.raw[i] = std::signbit(from.raw[i]) ? LowestValue<int32_t>()
+                                             : HighestValue<int32_t>();
+      continue;
+    }
+    ret.raw[i] = static_cast<int32_t>(from.raw[i]);
+  }
+  return ret;
+}
+
+template <class DTo, typename TFrom, size_t N, HWY_IF_SIGNED(TFrom),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DTo>)>
+HWY_API VFromD<DTo> DemoteTo(DTo /* tag */, Vec128<TFrom, N> from) {
+  using TTo = TFromD<DTo>;
+  static_assert(sizeof(TTo) < sizeof(TFrom), "Not demoting");
+
+  VFromD<DTo> ret;
+  for (size_t i = 0; i < N; ++i) {
+    // Int to int: choose closest value in ToT to `from` (avoids UB)
+    from.raw[i] =
+        HWY_MIN(HWY_MAX(LimitsMin<TTo>(), from.raw[i]), LimitsMax<TTo>());
+    ret.raw[i] = static_cast<TTo>(from.raw[i]);
+  }
+  return ret;
+}
+
+template <class DTo, typename TFrom, size_t N, HWY_IF_UNSIGNED(TFrom),
+          HWY_IF_UNSIGNED_D(DTo)>
+HWY_API VFromD<DTo> DemoteTo(DTo /* tag */, Vec128<TFrom, N> from) {
+  using TTo = TFromD<DTo>;
+  static_assert(sizeof(TTo) < sizeof(TFrom), "Not demoting");
+
+  VFromD<DTo> ret;
+  for (size_t i = 0; i < N; ++i) {
+    // Int to int: choose closest value in ToT to `from` (avoids UB)
+    from.raw[i] = HWY_MIN(from.raw[i], LimitsMax<TTo>());
+    ret.raw[i] = static_cast<TTo>(from.raw[i]);
+  }
+  return ret;
+}
+
+template <class DBF16, HWY_IF_BF16_D(DBF16), class VF32>
+HWY_API VFromD<DBF16> ReorderDemote2To(DBF16 dbf16, VF32 a, VF32 b) {
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const VFromD<decltype(du32)> b_in_lower = ShiftRight<16>(BitCast(du32, b));
+  // Avoid OddEven - we want the upper half of `a` even on big-endian systems.
+  const VFromD<decltype(du32)> a_mask = Set(du32, 0xFFFF0000);
+  return BitCast(dbf16, IfVecThenElse(a_mask, BitCast(du32, a), b_in_lower));
+}
+
+template <class DN, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>), class V,
+          HWY_IF_SIGNED_V(V), HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DN, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const RepartitionToWide<decltype(dn)> dw;
+  const size_t NW = Lanes(dw);
+  using TN = TFromD<DN>;
+  const TN min = LimitsMin<TN>();
+  const TN max = LimitsMax<TN>();
+  VFromD<DN> ret;
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, a.raw[i]), max));
+  }
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[NW + i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, b.raw[i]), max));
+  }
+  return ret;
+}
+
+template <class DN, HWY_IF_UNSIGNED_D(DN), class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DN, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const RepartitionToWide<decltype(dn)> dw;
+  const size_t NW = Lanes(dw);
+  using TN = TFromD<DN>;
+  const TN max = LimitsMax<TN>();
+  VFromD<DN> ret;
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[i] = static_cast<TN>(HWY_MIN(a.raw[i], max));
+  }
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[NW + i] = static_cast<TN>(HWY_MIN(b.raw[i], max));
+  }
+  return ret;
+}
+
+template <class DN, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>), class V,
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DN, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> OrderedDemote2To(DN dn, V a, V b) {
+  return ReorderDemote2To(dn, a, b);
+}
+
+template <class DN, HWY_IF_BF16_D(DN), class V, HWY_IF_F32_D(DFromV<V>),
+          HWY_IF_LANES_D(DN, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> OrderedDemote2To(DN dn, V a, V b) {
+  const RebindToUnsigned<DFromV<decltype(a)>> du32;
+  const size_t NW = Lanes(du32);
+  VFromD<Repartition<uint16_t, DN>> ret;
+
+  const auto a_bits = BitCast(du32, a);
+  const auto b_bits = BitCast(du32, b);
+
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[i] = static_cast<uint16_t>(a_bits.raw[i] >> 16);
+  }
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[NW + i] = static_cast<uint16_t>(b_bits.raw[i] >> 16);
+  }
+  return BitCast(dn, ret);
+}
+
+namespace detail {
+
+HWY_INLINE void StoreU16ToF16(const uint16_t val,
+                              hwy::float16_t* HWY_RESTRICT to) {
+  CopySameSize(&val, to);
+}
+
+HWY_INLINE uint16_t U16FromF16(const hwy::float16_t* HWY_RESTRICT from) {
+  uint16_t bits16;
+  CopySameSize(from, &bits16);
+  return bits16;
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_F32_D(D), size_t N>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<bfloat16_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = F32FromBF16(v.raw[i]);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_BF16_D(D), size_t N>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec128<float, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = BF16FromF32(v.raw[i]);
+  }
+  return ret;
+}
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename TFrom, typename DTo>
+HWY_API VFromD<DTo> ConvertTo(hwy::FloatTag /*tag*/, DTo /*tag*/,
+                              Vec128<TFrom, HWY_MAX_LANES_D(DTo)> from) {
+  using ToT = TFromD<DTo>;
+  static_assert(sizeof(ToT) == sizeof(TFrom), "Should have same size");
+  VFromD<DTo> ret;
+  constexpr size_t N = HWY_MAX_LANES_D(DTo);
+  for (size_t i = 0; i < N; ++i) {
+    // float## -> int##: return closest representable value. We cannot exactly
+    // represent LimitsMax<ToT> in TFrom, so use double.
+    const double f = static_cast<double>(from.raw[i]);
+    if (std::isinf(from.raw[i]) ||
+        std::fabs(f) > static_cast<double>(LimitsMax<ToT>())) {
+      ret.raw[i] =
+          std::signbit(from.raw[i]) ? LimitsMin<ToT>() : LimitsMax<ToT>();
+      continue;
+    }
+    ret.raw[i] = static_cast<ToT>(from.raw[i]);
+  }
+  return ret;
+}
+
+template <typename TFrom, typename DTo>
+HWY_API VFromD<DTo> ConvertTo(hwy::NonFloatTag /*tag*/, DTo /* tag */,
+                              Vec128<TFrom, HWY_MAX_LANES_D(DTo)> from) {
+  using ToT = TFromD<DTo>;
+  static_assert(sizeof(ToT) == sizeof(TFrom), "Should have same size");
+  VFromD<DTo> ret;
+  constexpr size_t N = HWY_MAX_LANES_D(DTo);
+  for (size_t i = 0; i < N; ++i) {
+    // int## -> float##: no check needed
+    ret.raw[i] = static_cast<ToT>(from.raw[i]);
+  }
+  return ret;
+}
+
+}  // namespace detail
+
+template <class DTo, typename TFrom>
+HWY_API VFromD<DTo> ConvertTo(DTo d, Vec128<TFrom, HWY_MAX_LANES_D(DTo)> from) {
+  return detail::ConvertTo(hwy::IsFloatTag<TFrom>(), d, from);
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(Vec128<uint32_t, N> v) {
+  return DemoteTo(Simd<uint8_t, N, 0>(), v);
+}
+
+// ------------------------------ Truncations
+
+template <class D, HWY_IF_U8_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint64_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint8_t>(v.raw[i] & 0xFF);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_U16_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint64_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint16_t>(v.raw[i] & 0xFFFF);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_U32_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint64_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint32_t>(v.raw[i] & 0xFFFFFFFFu);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_U8_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint32_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint8_t>(v.raw[i] & 0xFF);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_U16_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint32_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint16_t>(v.raw[i] & 0xFFFF);
+  }
+  return ret;
+}
+
+template <class D, HWY_IF_U8_D(D), size_t N>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint16_t, N> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = static_cast<uint8_t>(v.raw[i] & 0xFF);
+  }
+  return ret;
+}
+
+#ifdef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#undef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#else
+#define HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#endif
+
+template <class DN, HWY_IF_UNSIGNED_D(DN), class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DN, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> OrderedTruncate2To(DN dn, V a, V b) {
+  const RepartitionToWide<decltype(dn)> dw;
+  const size_t NW = Lanes(dw);
+  using TW = TFromD<decltype(dw)>;
+  using TN = TFromD<decltype(dn)>;
+  VFromD<DN> ret;
+  constexpr TW max_val{LimitsMax<TN>()};
+
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[i] = static_cast<TN>(a.raw[i] & max_val);
+  }
+  for (size_t i = 0; i < NW; ++i) {
+    ret.raw[NW + i] = static_cast<TN>(b.raw[i] & max_val);
+  }
+  return ret;
+}
+
+// ================================================== COMBINE
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  Vec128<T, N / 2> ret;
+  CopyBytes<N / 2 * sizeof(T)>(v.raw, ret.raw);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return LowerHalf(v);
+}
+
+template <class D>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  VFromD<D> ret;
+  CopyBytes<d.MaxBytes()>(&v.raw[MaxLanes(d)], ret.raw);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> v) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;  // zero-initialized
+  CopyBytes<dh.MaxBytes()>(v.raw, ret.raw);
+  return ret;
+}
+
+template <class D, class VH = VFromD<Half<D>>>
+HWY_API VFromD<D> Combine(D d, VH hi_half, VH lo_half) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  CopyBytes<dh.MaxBytes()>(lo_half.raw, &ret.raw[0]);
+  CopyBytes<dh.MaxBytes()>(hi_half.raw, &ret.raw[MaxLanes(dh)]);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  CopyBytes<dh.MaxBytes()>(lo.raw, &ret.raw[0]);
+  CopyBytes<dh.MaxBytes()>(hi.raw, &ret.raw[MaxLanes(dh)]);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  CopyBytes<dh.MaxBytes()>(&lo.raw[MaxLanes(dh)], &ret.raw[0]);
+  CopyBytes<dh.MaxBytes()>(&hi.raw[MaxLanes(dh)], &ret.raw[MaxLanes(dh)]);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatLowerUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  CopyBytes<dh.MaxBytes()>(&lo.raw[MaxLanes(dh)], &ret.raw[0]);
+  CopyBytes<dh.MaxBytes()>(hi.raw, &ret.raw[MaxLanes(dh)]);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  CopyBytes<dh.MaxBytes()>(lo.raw, &ret.raw[0]);
+  CopyBytes<dh.MaxBytes()>(&hi.raw[MaxLanes(dh)], &ret.raw[MaxLanes(dh)]);
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(dh); ++i) {
+    ret.raw[i] = lo.raw[2 * i];
+  }
+  for (size_t i = 0; i < MaxLanes(dh); ++i) {
+    ret.raw[MaxLanes(dh) + i] = hi.raw[2 * i];
+  }
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(dh); ++i) {
+    ret.raw[i] = lo.raw[2 * i + 1];
+  }
+  for (size_t i = 0; i < MaxLanes(dh); ++i) {
+    ret.raw[MaxLanes(dh) + i] = hi.raw[2 * i + 1];
+  }
+  return ret;
+}
+
+// ------------------------------ CombineShiftRightBytes
+template <int kBytes, class D>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  VFromD<D> ret;
+  const uint8_t* HWY_RESTRICT lo8 =
+      reinterpret_cast<const uint8_t * HWY_RESTRICT>(lo.raw);
+  uint8_t* HWY_RESTRICT ret8 =
+      reinterpret_cast<uint8_t * HWY_RESTRICT>(ret.raw);
+  CopyBytes<d.MaxBytes() - kBytes>(lo8 + kBytes, ret8);
+  CopyBytes<kBytes>(hi.raw, ret8 + d.MaxBytes() - kBytes);
+  return ret;
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftLeftBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  VFromD<D> ret;
+  uint8_t* HWY_RESTRICT ret8 =
+      reinterpret_cast<uint8_t * HWY_RESTRICT>(ret.raw);
+  ZeroBytes<kBytes>(ret8);
+  CopyBytes<d.MaxBytes() - kBytes>(v.raw, ret8 + kBytes);
+  return ret;
+}
+
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftBytes(Vec128<T, N> v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+template <int kLanes, class D, typename T = TFromD<D>>
+HWY_API VFromD<D> ShiftLeftLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(Vec128<T, N> v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftRightBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  VFromD<D> ret;
+  const uint8_t* HWY_RESTRICT v8 =
+      reinterpret_cast<const uint8_t * HWY_RESTRICT>(v.raw);
+  uint8_t* HWY_RESTRICT ret8 =
+      reinterpret_cast<uint8_t * HWY_RESTRICT>(ret.raw);
+  CopyBytes<d.MaxBytes() - kBytes>(v8 + kBytes, ret8);
+  ZeroBytes<kBytes>(ret8 + d.MaxBytes() - kBytes);
+  return ret;
+}
+
+// ------------------------------ ShiftRightLanes
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftRightLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  return BitCast(d, ShiftRightBytes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ================================================== SWIZZLE
+
+template <typename T, size_t N>
+HWY_API T GetLane(Vec128<T, N> v) {
+  return v.raw[0];
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> InsertLane(Vec128<T, N> v, size_t i, T t) {
+  v.raw[i] = t;
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API T ExtractLane(Vec128<T, N> v, size_t i) {
+  return v.raw[i];
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  for (size_t i = 0; i < N; i += 2) {
+    v.raw[i + 1] = v.raw[i];
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  for (size_t i = 0; i < N; i += 2) {
+    v.raw[i] = v.raw[i + 1];
+  }
+  return v;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(Vec128<T, N> odd, Vec128<T, N> even) {
+  for (size_t i = 0; i < N; i += 2) {
+    odd.raw[i] = even.raw[i];
+  }
+  return odd;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEvenBlocks(Vec128<T, N> /* odd */, Vec128<T, N> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SwapAdjacentBlocks(Vec128<T, N> v) {
+  return v;
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N>
+struct Indices128 {
+  MakeSigned<T> raw[N];
+};
+
+template <class D, typename TI, size_t N>
+HWY_API Indices128<TFromD<D>, N> IndicesFromVec(D d, Vec128<TI, N> vec) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index/lane size must match");
+  Indices128<TFromD<D>, N> ret;
+  CopyBytes<d.MaxBytes()>(vec.raw, ret.raw);
+  return ret;
+}
+
+template <class D, typename TI>
+HWY_API Indices128<TFromD<D>, HWY_MAX_LANES_D(D)> SetTableIndices(
+    D d, const TI* idx) {
+  return IndicesFromVec(d, LoadU(Rebind<TI, D>(), idx));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+  Vec128<T, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    ret.raw[i] = v.raw[idx.raw[i]];
+  }
+  return ret;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TwoTablesLookupLanes(Vec128<T, N> a, Vec128<T, N> b,
+                                          Indices128<T, N> idx) {
+  using TI = MakeSigned<T>;
+  Vec128<T, N> ret;
+  constexpr TI kVecLaneIdxMask = static_cast<TI>(N - 1);
+  for (size_t i = 0; i < N; ++i) {
+    const auto src_idx = idx.raw[i];
+    const auto masked_src_lane_idx = src_idx & kVecLaneIdxMask;
+    ret.raw[i] = (src_idx < static_cast<TI>(N)) ? a.raw[masked_src_lane_idx]
+                                                : b.raw[masked_src_lane_idx];
+  }
+  return ret;
+}
+
+// ------------------------------ ReverseBlocks
+template <class D>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return v;  // Single block: no change
+}
+
+// ------------------------------ Reverse
+
+template <class D>
+HWY_API VFromD<D> Reverse(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    ret.raw[i] = v.raw[MaxLanes(d) - 1 - i];
+  }
+  return ret;
+}
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+template <class D>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); i += 2) {
+    ret.raw[i + 0] = v.raw[i + 1];
+    ret.raw[i + 1] = v.raw[i + 0];
+  }
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); i += 4) {
+    ret.raw[i + 0] = v.raw[i + 3];
+    ret.raw[i + 1] = v.raw[i + 2];
+    ret.raw[i + 2] = v.raw[i + 1];
+    ret.raw[i + 3] = v.raw[i + 0];
+  }
+  return ret;
+}
+
+template <class D>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(d); i += 8) {
+    ret.raw[i + 0] = v.raw[i + 7];
+    ret.raw[i + 1] = v.raw[i + 6];
+    ret.raw[i + 2] = v.raw[i + 5];
+    ret.raw[i + 3] = v.raw[i + 4];
+    ret.raw[i + 4] = v.raw[i + 3];
+    ret.raw[i + 5] = v.raw[i + 2];
+    ret.raw[i + 6] = v.raw[i + 1];
+    ret.raw[i + 7] = v.raw[i + 0];
+  }
+  return ret;
+}
+
+// ------------------------------ SlideUpLanes
+
+template <class D>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  VFromD<D> ret = Zero(d);
+  constexpr size_t N = HWY_MAX_LANES_D(D);
+  const size_t clamped_amt = HWY_MIN(amt, N);
+  CopyBytes(v.raw, ret.raw + clamped_amt,
+            (N - clamped_amt) * sizeof(TFromD<D>));
+  return ret;
+}
+
+// ------------------------------ SlideDownLanes
+
+template <class D>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  VFromD<D> ret = Zero(d);
+  constexpr size_t N = HWY_MAX_LANES_D(D);
+  const size_t clamped_amt = HWY_MIN(amt, N);
+  CopyBytes(v.raw + clamped_amt, ret.raw,
+            (N - clamped_amt) * sizeof(TFromD<D>));
+  return ret;
+}
+
+// ================================================== BLOCKWISE
+
+// ------------------------------ Shuffle*
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shuffle2301(Vec128<T, N> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit");
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Reverse2(DFromV<decltype(v)>(), v);
+}
+
+// Swap 64-bit halves
+template <typename T>
+HWY_API Vec128<T> Shuffle1032(Vec128<T> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit");
+  Vec128<T> ret;
+  ret.raw[3] = v.raw[1];
+  ret.raw[2] = v.raw[0];
+  ret.raw[1] = v.raw[3];
+  ret.raw[0] = v.raw[2];
+  return ret;
+}
+template <typename T>
+HWY_API Vec128<T> Shuffle01(Vec128<T> v) {
+  static_assert(sizeof(T) == 8, "Only for 64-bit");
+  return Reverse2(DFromV<decltype(v)>(), v);
+}
+
+// Rotate right 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle0321(Vec128<T> v) {
+  Vec128<T> ret;
+  ret.raw[3] = v.raw[0];
+  ret.raw[2] = v.raw[3];
+  ret.raw[1] = v.raw[2];
+  ret.raw[0] = v.raw[1];
+  return ret;
+}
+
+// Rotate left 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle2103(Vec128<T> v) {
+  Vec128<T> ret;
+  ret.raw[3] = v.raw[2];
+  ret.raw[2] = v.raw[1];
+  ret.raw[1] = v.raw[0];
+  ret.raw[0] = v.raw[3];
+  return ret;
+}
+
+template <typename T>
+HWY_API Vec128<T> Shuffle0123(Vec128<T> v) {
+  return Reverse4(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ Broadcast
+template <int kLane, typename T, size_t N>
+HWY_API Vec128<T, N> Broadcast(Vec128<T, N> v) {
+  for (size_t i = 0; i < N; ++i) {
+    v.raw[i] = v.raw[kLane];
+  }
+  return v;
+}
+
+// ------------------------------ TableLookupBytes, TableLookupBytesOr0
+
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec128<T, N> v,
+                                        Vec128<TI, NI> indices) {
+  const uint8_t* HWY_RESTRICT v_bytes =
+      reinterpret_cast<const uint8_t * HWY_RESTRICT>(v.raw);
+  const uint8_t* HWY_RESTRICT idx_bytes =
+      reinterpret_cast<const uint8_t*>(indices.raw);
+  Vec128<TI, NI> ret;
+  uint8_t* HWY_RESTRICT ret_bytes =
+      reinterpret_cast<uint8_t * HWY_RESTRICT>(ret.raw);
+  for (size_t i = 0; i < NI * sizeof(TI); ++i) {
+    const size_t idx = idx_bytes[i];
+    // Avoid out of bounds reads.
+    ret_bytes[i] = idx < sizeof(T) * N ? v_bytes[idx] : 0;
+  }
+  return ret;
+}
+
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytesOr0(Vec128<T, N> v,
+                                           Vec128<TI, NI> indices) {
+  // Same as TableLookupBytes, which already returns 0 if out of bounds.
+  return TableLookupBytes(v, indices);
+}
+
+// ------------------------------ InterleaveLower/InterleaveUpper
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  Vec128<T, N> ret;
+  for (size_t i = 0; i < N / 2; ++i) {
+    ret.raw[2 * i + 0] = a.raw[i];
+    ret.raw[2 * i + 1] = b.raw[i];
+  }
+  return ret;
+}
+
+// Additional overload for the optional tag.
+template <class V>
+HWY_API V InterleaveLower(DFromV<V> /* tag */, V a, V b) {
+  return InterleaveLower(a, b);
+}
+
+template <class D>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  for (size_t i = 0; i < MaxLanes(dh); ++i) {
+    ret.raw[2 * i + 0] = a.raw[MaxLanes(dh) + i];
+    ret.raw[2 * i + 1] = b.raw[MaxLanes(dh) + i];
+  }
+  return ret;
+}
+
+// ------------------------------ ZipLower/ZipUpper (InterleaveLower)
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveLower(D(), a, b));
+}
+
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveUpper(D(), a, b));
+}
+
+// ================================================== MASK
+
+template <class D>
+HWY_API bool AllFalse(D d, MFromD<D> mask) {
+  typename MFromD<D>::Raw or_sum = 0;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    or_sum |= mask.bits[i];
+  }
+  return or_sum == 0;
+}
+
+template <class D>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  constexpr uint64_t kAll = LimitsMax<typename MFromD<D>::Raw>();
+  uint64_t and_sum = kAll;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    and_sum &= mask.bits[i];
+  }
+  return and_sum == kAll;
+}
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  MFromD<D> m;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    const size_t bit = size_t{1} << (i & 7);
+    const size_t idx_byte = i >> 3;
+    m.bits[i] = MFromD<D>::FromBool((bits[idx_byte] & bit) != 0);
+  }
+  return m;
+}
+
+// `p` points to at least 8 writable bytes.
+template <class D>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  bits[0] = 0;
+  if (MaxLanes(d) > 8) bits[1] = 0;  // MaxLanes(d) <= 16, so max two bytes
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    const size_t bit = size_t{1} << (i & 7);
+    const size_t idx_byte = i >> 3;
+    if (mask.bits[i]) {
+      bits[idx_byte] = static_cast<uint8_t>(bits[idx_byte] | bit);
+    }
+  }
+  return MaxLanes(d) > 8 ? 2 : 1;
+}
+
+template <class D>
+HWY_API size_t CountTrue(D d, MFromD<D> mask) {
+  size_t count = 0;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    count += mask.bits[i] != 0;
+  }
+  return count;
+}
+
+template <class D>
+HWY_API size_t FindKnownFirstTrue(D d, MFromD<D> mask) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask.bits[i] != 0) return i;
+  }
+  HWY_DASSERT(false);
+  return 0;
+}
+
+template <class D>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask.bits[i] != 0) return static_cast<intptr_t>(i);
+  }
+  return intptr_t{-1};
+}
+
+template <class D>
+HWY_API size_t FindKnownLastTrue(D d, MFromD<D> mask) {
+  for (intptr_t i = static_cast<intptr_t>(MaxLanes(d) - 1); i >= 0; i--) {
+    if (mask.bits[i] != 0) return static_cast<size_t>(i);
+  }
+  HWY_DASSERT(false);
+  return 0;
+}
+
+template <class D>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+  for (intptr_t i = static_cast<intptr_t>(MaxLanes(d) - 1); i >= 0; i--) {
+    if (mask.bits[i] != 0) return i;
+  }
+  return intptr_t{-1};
+}
+
+// ------------------------------ Compress
+
+template <typename T>
+struct CompressIsPartition {
+  enum { value = (sizeof(T) != 1) };
+};
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  size_t count = 0;
+  Vec128<T, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    if (mask.bits[i]) {
+      ret.raw[count++] = v.raw[i];
+    }
+  }
+  for (size_t i = 0; i < N; ++i) {
+    if (!mask.bits[i]) {
+      ret.raw[count++] = v.raw[i];
+    }
+  }
+  HWY_DASSERT(count == N);
+  return ret;
+}
+
+// ------------------------------ Expand
+
+// Could also just allow generic_ops-inl.h to implement these, but use our
+// simple implementation below to ensure the test is correct.
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, const Mask128<T, N> mask) {
+  size_t in_pos = 0;
+  Vec128<T, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    if (mask.bits[i]) {
+      ret.raw[i] = v.raw[in_pos++];
+    } else {
+      ret.raw[i] = T();  // zero, also works for float16_t
+    }
+  }
+  return ret;
+}
+
+// ------------------------------ LoadExpand
+
+template <class D>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  size_t in_pos = 0;
+  VFromD<D> ret;
+  for (size_t i = 0; i < Lanes(d); ++i) {
+    if (mask.bits[i]) {
+      ret.raw[i] = unaligned[in_pos++];
+    } else {
+      ret.raw[i] = TFromD<D>();  // zero, also works for float16_t
+    }
+  }
+  return ret;
+}
+
+// ------------------------------ CompressNot
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CompressNot(Vec128<T, N> v, Mask128<T, N> mask) {
+  size_t count = 0;
+  Vec128<T, N> ret;
+  for (size_t i = 0; i < N; ++i) {
+    if (!mask.bits[i]) {
+      ret.raw[count++] = v.raw[i];
+    }
+  }
+  for (size_t i = 0; i < N; ++i) {
+    if (mask.bits[i]) {
+      ret.raw[count++] = v.raw[i];
+    }
+  }
+  HWY_DASSERT(count == N);
+  return ret;
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+// ------------------------------ CompressBits
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v,
+                                  const uint8_t* HWY_RESTRICT bits) {
+  return Compress(v, LoadMaskBits(Simd<T, N, 0>(), bits));
+}
+
+// ------------------------------ CompressStore
+
+// generic_ops-inl defines the 8-bit versions.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  size_t count = 0;
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask.bits[i]) {
+      unaligned[count++] = v.raw[i];
+    }
+  }
+  return count;
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> mask, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  return CompressStore(v, mask, d, unaligned);
+}
+
+// ------------------------------ CompressBitsStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  const MFromD<D> mask = LoadMaskBits(d, bits);
+  StoreU(Compress(v, mask), d, unaligned);
+  return CountTrue(d, mask);
+}
+
+// ------------------------------ Additional mask logical operations
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrAfterFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+
+template <class T, size_t N, HWY_IF_LANES_GT(N, 1)>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  using TU = hwy::MakeUnsigned<T>;
+
+  Mask128<T, N> result;
+  TU result_lane_mask{0};
+  for (size_t i = 0; i < N; i++) {
+    result_lane_mask = static_cast<TU>(result_lane_mask | mask.bits[i]);
+    result.bits[i] = result_lane_mask;
+  }
+  return result;
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  using TU = hwy::MakeUnsigned<T>;
+  using TI = hwy::MakeSigned<T>;
+
+  Mask128<T, N> result;
+  TU result_lane_mask = static_cast<TU>(~TU{0});
+  for (size_t i = 0; i < N; i++) {
+    const auto curr_lane_mask_bits = mask.bits[i];
+    result.bits[i] = static_cast<TU>(curr_lane_mask_bits & result_lane_mask);
+    result_lane_mask =
+        static_cast<TU>(result_lane_mask &
+                        static_cast<TU>(-static_cast<TI>(mask.bits[i] == 0)));
+  }
+  return result;
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  using TU = hwy::MakeUnsigned<T>;
+  using TI = hwy::MakeSigned<T>;
+
+  Mask128<T, N> result;
+  TU result_lane_mask = static_cast<TU>(~TU{0});
+  for (size_t i = 0; i < N; i++) {
+    result.bits[i] = result_lane_mask;
+    result_lane_mask =
+        static_cast<TU>(result_lane_mask &
+                        static_cast<TU>(-static_cast<TI>(mask.bits[i] == 0)));
+  }
+  return result;
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+template <class D, HWY_IF_F32_D(D), class VBF16>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D df32, VBF16 a, VBF16 b) {
+  const Rebind<uint32_t, decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Avoid ZipLower/Upper so this also works on big-endian systems.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return Mul(BitCast(df32, ae), BitCast(df32, be)) +
+         Mul(BitCast(df32, ao), BitCast(df32, bo));
+}
+
+template <class D, HWY_IF_I32_D(D), class VI16>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D d32, VI16 a, VI16 b) {
+  using VI32 = VFromD<decltype(d32)>;
+  // Manual sign extension requires two shifts for even lanes.
+  const VI32 ae = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, a)));
+  const VI32 be = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, b)));
+  const VI32 ao = ShiftRight<16>(BitCast(d32, a));
+  const VI32 bo = ShiftRight<16>(BitCast(d32, b));
+  return Add(Mul(ae, be), Mul(ao, bo));
+}
+
+template <class D, HWY_IF_U32_D(D), class VU16>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D du32, VU16 a, VU16 b) {
+  const auto lo16_mask = Set(du32, 0x0000FFFFu);
+
+  const auto a0 = And(BitCast(du32, a), lo16_mask);
+  const auto b0 = And(BitCast(du32, b), lo16_mask);
+
+  const auto a1 = ShiftRight<16>(BitCast(du32, a));
+  const auto b1 = ShiftRight<16>(BitCast(du32, b));
+
+  return Add(Mul(a0, b0), Mul(a1, b1));
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+template <class D, HWY_IF_F32_D(D), size_t N, class VBF16>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(D df32, VBF16 a, VBF16 b,
+                                            const Vec128<float, N> sum0,
+                                            Vec128<float, N>& sum1) {
+  const Rebind<uint32_t, decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Avoid ZipLower/Upper so this also works on big-endian systems.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+template <class D, HWY_IF_I32_D(D), size_t N, class VI16>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(D d32, VI16 a, VI16 b,
+                                            const Vec128<int32_t, N> sum0,
+                                            Vec128<int32_t, N>& sum1) {
+  using VI32 = VFromD<decltype(d32)>;
+  // Manual sign extension requires two shifts for even lanes.
+  const VI32 ae = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, a)));
+  const VI32 be = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, b)));
+  const VI32 ao = ShiftRight<16>(BitCast(d32, a));
+  const VI32 bo = ShiftRight<16>(BitCast(d32, b));
+  sum1 = Add(Mul(ao, bo), sum1);
+  return Add(Mul(ae, be), sum0);
+}
+
+template <class D, HWY_IF_U32_D(D), size_t N, class VU16>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(D du32, VU16 a, VU16 b,
+                                            const Vec128<uint32_t, N> sum0,
+                                            Vec128<uint32_t, N>& sum1) {
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 lo16_mask = Set(du32, uint32_t{0x0000FFFFu});
+  const VU32 ae = And(BitCast(du32, a), lo16_mask);
+  const VU32 be = And(BitCast(du32, b), lo16_mask);
+  const VU32 ao = ShiftRight<16>(BitCast(du32, a));
+  const VU32 bo = ShiftRight<16>(BitCast(du32, b));
+  sum1 = Add(Mul(ao, bo), sum1);
+  return Add(Mul(ae, be), sum0);
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <class VW>
+HWY_API VW RearrangeToOddPlusEven(VW sum0, VW sum1) {
+  return Add(sum0, sum1);
+}
+
+// ================================================== REDUCTIONS
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> SumOfLanes(D d, VFromD<D> v) {
+  T sum = T{0};
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    sum += v.raw[i];
+  }
+  return Set(d, sum);
+}
+template <class D, typename T = TFromD<D>>
+HWY_API T ReduceSum(D d, VFromD<D> v) {
+  T sum = T{0};
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    sum += v.raw[i];
+  }
+  return sum;
+}
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  T min = HighestValue<T>();
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    min = HWY_MIN(min, v.raw[i]);
+  }
+  return Set(d, min);
+}
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  T max = LowestValue<T>();
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    max = HWY_MAX(max, v.raw[i]);
+  }
+  return Set(d, max);
+}
+
+// ================================================== OPS WITH DEPENDENCIES
+
+// ------------------------------ MulEven/Odd 64x64 (UpperHalf)
+
+HWY_INLINE Vec128<uint64_t> MulEven(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  alignas(16) uint64_t mul[2];
+  mul[0] = Mul128(GetLane(a), GetLane(b), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+}
+
+HWY_INLINE Vec128<uint64_t> MulOdd(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  alignas(16) uint64_t mul[2];
+  const Half<Full128<uint64_t>> d2;
+  mul[0] =
+      Mul128(GetLane(UpperHalf(d2, a)), GetLane(UpperHalf(d2, b)), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/generic_ops-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/generic_ops-inl.h
new file mode 100644
index 0000000000..c0e8caa9f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/generic_ops-inl.h
@@ -0,0 +1,4596 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Target-independent types/functions defined after target-specific ops.
+
+#include "hwy/base.h"
+
+// Define detail::Shuffle1230 etc, but only when viewing the current header;
+// normally this is included via highway.h, which includes ops/*.h.
+#if HWY_IDE && !defined(HWY_HIGHWAY_INCLUDED)
+#include "hwy/detect_targets.h"
+#include "hwy/ops/emu128-inl.h"
+#endif  // HWY_IDE
+
+// Relies on the external include guard in highway.h.
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// The lane type of a vector type, e.g. float for Vec<ScalableTag<float>>.
+template <class V>
+using LaneType = decltype(GetLane(V()));
+
+// Vector type, e.g. Vec128<float> for CappedTag<float, 4>. Useful as the return
+// type of functions that do not take a vector argument, or as an argument type
+// if the function only has a template argument for D, or for explicit type
+// names instead of auto. This may be a built-in type.
+template <class D>
+using Vec = decltype(Zero(D()));
+
+// Mask type. Useful as the return type of functions that do not take a mask
+// argument, or as an argument type if the function only has a template argument
+// for D, or for explicit type names instead of auto.
+template <class D>
+using Mask = decltype(MaskFromVec(Zero(D())));
+
+// Returns the closest value to v within [lo, hi].
+template <class V>
+HWY_API V Clamp(const V v, const V lo, const V hi) {
+  return Min(Max(lo, v), hi);
+}
+
+// CombineShiftRightBytes (and -Lanes) are not available for the scalar target,
+// and RVV has its own implementation of -Lanes.
+#if HWY_TARGET != HWY_SCALAR && HWY_TARGET != HWY_RVV
+
+template <size_t kLanes, class D>
+HWY_API VFromD<D> CombineShiftRightLanes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  static_assert(kBytes < 16, "Shift count is per-block");
+  return CombineShiftRightBytes<kBytes>(d, hi, lo);
+}
+
+#endif
+
+// Returns lanes with the most significant bit set and all other bits zero.
+template <class D>
+HWY_API Vec<D> SignBit(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Set(du, SignMask<TFromD<D>>()));
+}
+
+// Returns quiet NaN.
+template <class D>
+HWY_API Vec<D> NaN(D d) {
+  const RebindToSigned<D> di;
+  // LimitsMax sets all exponent and mantissa bits to 1. The exponent plus
+  // mantissa MSB (to indicate quiet) would be sufficient.
+  return BitCast(d, Set(di, LimitsMax<TFromD<decltype(di)>>()));
+}
+
+// Returns positive infinity.
+template <class D>
+HWY_API Vec<D> Inf(D d) {
+  const RebindToUnsigned<D> du;
+  using T = TFromD<D>;
+  using TU = TFromD<decltype(du)>;
+  const TU max_x2 = static_cast<TU>(MaxExponentTimes2<T>());
+  return BitCast(d, Set(du, max_x2 >> 1));
+}
+
+// ------------------------------ ZeroExtendResizeBitCast
+
+// The implementation of detail::ZeroExtendResizeBitCast for the HWY_EMU128
+// target is in emu128-inl.h, and the implementation of
+// detail::ZeroExtendResizeBitCast for the HWY_SCALAR target is in scalar-inl.h
+#if HWY_TARGET != HWY_EMU128 && HWY_TARGET != HWY_SCALAR
+namespace detail {
+
+#if HWY_HAVE_SCALABLE
+template <size_t kFromVectSize, size_t kToVectSize, class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<kFromVectSize> /* from_size_tag */,
+    hwy::SizeTag<kToVectSize> /* to_size_tag */, DTo d_to, DFrom d_from,
+    VFromD<DFrom> v) {
+  const Repartition<uint8_t, DTo> d_to_u8;
+  const auto resized = ResizeBitCast(d_to_u8, v);
+  // Zero the upper bytes which were not present/valid in d_from.
+  const size_t num_bytes = Lanes(Repartition<uint8_t, decltype(d_from)>());
+  return BitCast(d_to, IfThenElseZero(FirstN(d_to_u8, num_bytes), resized));
+}
+#else   // target that uses fixed-size vectors
+// Truncating or same-size resizing cast: same as ResizeBitCast
+template <size_t kFromVectSize, size_t kToVectSize, class DTo, class DFrom,
+          HWY_IF_LANES_LE(kToVectSize, kFromVectSize)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<kFromVectSize> /* from_size_tag */,
+    hwy::SizeTag<kToVectSize> /* to_size_tag */, DTo d_to, DFrom /*d_from*/,
+    VFromD<DFrom> v) {
+  return ResizeBitCast(d_to, v);
+}
+
+// Resizing cast to vector that has twice the number of lanes of the source
+// vector
+template <size_t kFromVectSize, size_t kToVectSize, class DTo, class DFrom,
+          HWY_IF_LANES(kToVectSize, kFromVectSize * 2)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<kFromVectSize> /* from_size_tag */,
+    hwy::SizeTag<kToVectSize> /* to_size_tag */, DTo d_to, DFrom d_from,
+    VFromD<DFrom> v) {
+  const Twice<decltype(d_from)> dt_from;
+  return BitCast(d_to, ZeroExtendVector(dt_from, v));
+}
+
+// Resizing cast to vector that has more than twice the number of lanes of the
+// source vector
+template <size_t kFromVectSize, size_t kToVectSize, class DTo, class DFrom,
+          HWY_IF_LANES_GT(kToVectSize, kFromVectSize * 2)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<kFromVectSize> /* from_size_tag */,
+    hwy::SizeTag<kToVectSize> /* to_size_tag */, DTo d_to, DFrom /*d_from*/,
+    VFromD<DFrom> v) {
+  using TFrom = TFromD<DFrom>;
+  constexpr size_t kNumOfFromLanes = kFromVectSize / sizeof(TFrom);
+  const Repartition<TFrom, decltype(d_to)> d_resize_to;
+  return BitCast(d_to, IfThenElseZero(FirstN(d_resize_to, kNumOfFromLanes),
+                                      ResizeBitCast(d_resize_to, v)));
+}
+#endif  // HWY_HAVE_SCALABLE
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_EMU128 && HWY_TARGET != HWY_SCALAR
+
+template <class DTo, class DFrom>
+HWY_API VFromD<DTo> ZeroExtendResizeBitCast(DTo d_to, DFrom d_from,
+                                            VFromD<DFrom> v) {
+  return detail::ZeroExtendResizeBitCast(hwy::SizeTag<d_from.MaxBytes()>(),
+                                         hwy::SizeTag<d_to.MaxBytes()>(), d_to,
+                                         d_from, v);
+}
+
+// ------------------------------ SafeFillN
+
+template <class D, typename T = TFromD<D>>
+HWY_API void SafeFillN(const size_t num, const T value, D d,
+                       T* HWY_RESTRICT to) {
+#if HWY_MEM_OPS_MIGHT_FAULT
+  (void)d;
+  for (size_t i = 0; i < num; ++i) {
+    to[i] = value;
+  }
+#else
+  BlendedStore(Set(d, value), FirstN(d, num), d, to);
+#endif
+}
+
+// ------------------------------ SafeCopyN
+
+template <class D, typename T = TFromD<D>>
+HWY_API void SafeCopyN(const size_t num, D d, const T* HWY_RESTRICT from,
+                       T* HWY_RESTRICT to) {
+#if HWY_MEM_OPS_MIGHT_FAULT
+  (void)d;
+  for (size_t i = 0; i < num; ++i) {
+    to[i] = from[i];
+  }
+#else
+  const Mask<D> mask = FirstN(d, num);
+  BlendedStore(MaskedLoad(mask, d, from), mask, d, to);
+#endif
+}
+
+// ------------------------------ BitwiseIfThenElse
+#if (defined(HWY_NATIVE_BITWISE_IF_THEN_ELSE) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#undef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#else
+#define HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#endif
+
+template <class V>
+HWY_API V BitwiseIfThenElse(V mask, V yes, V no) {
+  return Or(And(mask, yes), AndNot(mask, no));
+}
+
+#endif  // HWY_NATIVE_BITWISE_IF_THEN_ELSE
+
+// "Include guard": skip if native instructions are available. The generic
+// implementation is currently shared between x86_* and wasm_*, and is too large
+// to duplicate.
+
+#if HWY_IDE || \
+    (defined(HWY_NATIVE_LOAD_STORE_INTERLEAVED) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+// ------------------------------ LoadInterleaved2
+
+template <class D, HWY_IF_LANES_GT_D(D, 1)>
+HWY_API void LoadInterleaved2(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  const VFromD<D> A = LoadU(d, unaligned);  // v1[1] v0[1] v1[0] v0[0]
+  const VFromD<D> B = LoadU(d, unaligned + Lanes(d));
+  v0 = ConcatEven(d, B, A);
+  v1 = ConcatOdd(d, B, A);
+}
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API void LoadInterleaved2(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+}
+
+// ------------------------------ LoadInterleaved3 (CombineShiftRightBytes)
+
+namespace detail {
+
+#if HWY_IDE
+template <class V>
+HWY_INLINE V ShuffleTwo1230(V a, V /* b */) {
+  return a;
+}
+template <class V>
+HWY_INLINE V ShuffleTwo2301(V a, V /* b */) {
+  return a;
+}
+template <class V>
+HWY_INLINE V ShuffleTwo3012(V a, V /* b */) {
+  return a;
+}
+#endif  // HWY_IDE
+
+// Default for <= 128-bit vectors; x86_256 and x86_512 have their own overload.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void LoadTransposedBlocks3(D d,
+                                      const TFromD<D>* HWY_RESTRICT unaligned,
+                                      VFromD<D>& A, VFromD<D>& B,
+                                      VFromD<D>& C) {
+  constexpr size_t kN = MaxLanes(d);
+  A = LoadU(d, unaligned + 0 * kN);
+  B = LoadU(d, unaligned + 1 * kN);
+  C = LoadU(d, unaligned + 2 * kN);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 16)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  const RebindToUnsigned<decltype(d)> du;
+  using V = VFromD<D>;
+  // Compact notation so these fit on one line: 12 := v1[2].
+  V A;  // 05 24 14 04 23 13 03 22 12 02 21 11 01 20 10 00
+  V B;  // 1a 0a 29 19 09 28 18 08 27 17 07 26 16 06 25 15
+  V C;  // 2f 1f 0f 2e 1e 0e 2d 1d 0d 2c 1c 0c 2b 1b 0b 2a
+  detail::LoadTransposedBlocks3(d, unaligned, A, B, C);
+  // Compress all lanes belonging to v0 into consecutive lanes.
+  constexpr uint8_t Z = 0x80;
+  alignas(16) static constexpr uint8_t kIdx_v0A[16] = {
+      0, 3, 6, 9, 12, 15, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0B[16] = {
+      Z, Z, Z, Z, Z, Z, 2, 5, 8, 11, 14, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 1, 4, 7, 10, 13};
+  alignas(16) static constexpr uint8_t kIdx_v1A[16] = {
+      1, 4, 7, 10, 13, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1B[16] = {
+      Z, Z, Z, Z, Z, 0, 3, 6, 9, 12, 15, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 2, 5, 8, 11, 14};
+  alignas(16) static constexpr uint8_t kIdx_v2A[16] = {
+      2, 5, 8, 11, 14, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2B[16] = {
+      Z, Z, Z, Z, Z, 1, 4, 7, 10, 13, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0, 3, 6, 9, 12, 15};
+  const V v0L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v0A)));
+  const V v0M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v0B)));
+  const V v0U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v0C)));
+  const V v1L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v1A)));
+  const V v1M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v1B)));
+  const V v1U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v1C)));
+  const V v2L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v2A)));
+  const V v2M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v2B)));
+  const V v2U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v2C)));
+  v0 = Xor3(v0L, v0M, v0U);
+  v1 = Xor3(v1L, v1M, v1U);
+  v2 = Xor3(v2L, v2M, v2U);
+}
+
+// 8-bit lanes x8
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  const RebindToUnsigned<decltype(d)> du;
+  using V = VFromD<D>;
+  V A;  // v1[2] v0[2] v2[1] v1[1] v0[1] v2[0] v1[0] v0[0]
+  V B;  // v0[5] v2[4] v1[4] v0[4] v2[3] v1[3] v0[3] v2[2]
+  V C;  // v2[7] v1[7] v0[7] v2[6] v1[6] v0[6] v2[5] v1[5]
+  detail::LoadTransposedBlocks3(d, unaligned, A, B, C);
+  // Compress all lanes belonging to v0 into consecutive lanes.
+  constexpr uint8_t Z = 0x80;
+  alignas(16) static constexpr uint8_t kIdx_v0A[16] = {0, 3, 6, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0B[16] = {Z, Z, Z, 1, 4, 7, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0C[16] = {Z, Z, Z, Z, Z, Z, 2, 5};
+  alignas(16) static constexpr uint8_t kIdx_v1A[16] = {1, 4, 7, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1B[16] = {Z, Z, Z, 2, 5, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1C[16] = {Z, Z, Z, Z, Z, 0, 3, 6};
+  alignas(16) static constexpr uint8_t kIdx_v2A[16] = {2, 5, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2B[16] = {Z, Z, 0, 3, 6, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2C[16] = {Z, Z, Z, Z, Z, 1, 4, 7};
+  const V v0L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v0A)));
+  const V v0M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v0B)));
+  const V v0U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v0C)));
+  const V v1L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v1A)));
+  const V v1M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v1B)));
+  const V v1U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v1C)));
+  const V v2L = BitCast(d, TableLookupBytesOr0(A, LoadDup128(du, kIdx_v2A)));
+  const V v2M = BitCast(d, TableLookupBytesOr0(B, LoadDup128(du, kIdx_v2B)));
+  const V v2U = BitCast(d, TableLookupBytesOr0(C, LoadDup128(du, kIdx_v2C)));
+  v0 = Xor3(v0L, v0M, v0U);
+  v1 = Xor3(v1L, v1M, v1U);
+  v2 = Xor3(v2L, v2M, v2U);
+}
+
+// 16-bit lanes x8
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint8_t, decltype(du)> du8;
+  using V = VFromD<D>;
+  V A;  // v1[2] v0[2] v2[1] v1[1] v0[1] v2[0] v1[0] v0[0]
+  V B;  // v0[5] v2[4] v1[4] v0[4] v2[3] v1[3] v0[3] v2[2]
+  V C;  // v2[7] v1[7] v0[7] v2[6] v1[6] v0[6] v2[5] v1[5]
+  detail::LoadTransposedBlocks3(d, unaligned, A, B, C);
+  // Compress all lanes belonging to v0 into consecutive lanes. Same as above,
+  // but each element of the array contains a byte index for a byte of a lane.
+  constexpr uint8_t Z = 0x80;
+  alignas(16) static constexpr uint8_t kIdx_v0A[16] = {
+      0x00, 0x01, 0x06, 0x07, 0x0C, 0x0D, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0B[16] = {
+      Z, Z, Z, Z, Z, Z, 0x02, 0x03, 0x08, 0x09, 0x0E, 0x0F, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v0C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0x04, 0x05, 0x0A, 0x0B};
+  alignas(16) static constexpr uint8_t kIdx_v1A[16] = {
+      0x02, 0x03, 0x08, 0x09, 0x0E, 0x0F, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1B[16] = {
+      Z, Z, Z, Z, Z, Z, 0x04, 0x05, 0x0A, 0x0B, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v1C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0x00, 0x01, 0x06, 0x07, 0x0C, 0x0D};
+  alignas(16) static constexpr uint8_t kIdx_v2A[16] = {
+      0x04, 0x05, 0x0A, 0x0B, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2B[16] = {
+      Z, Z, Z, Z, 0x00, 0x01, 0x06, 0x07, 0x0C, 0x0D, Z, Z, Z, Z, Z, Z};
+  alignas(16) static constexpr uint8_t kIdx_v2C[16] = {
+      Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, 0x02, 0x03, 0x08, 0x09, 0x0E, 0x0F};
+  const V v0L = TableLookupBytesOr0(A, BitCast(d, LoadDup128(du8, kIdx_v0A)));
+  const V v0M = TableLookupBytesOr0(B, BitCast(d, LoadDup128(du8, kIdx_v0B)));
+  const V v0U = TableLookupBytesOr0(C, BitCast(d, LoadDup128(du8, kIdx_v0C)));
+  const V v1L = TableLookupBytesOr0(A, BitCast(d, LoadDup128(du8, kIdx_v1A)));
+  const V v1M = TableLookupBytesOr0(B, BitCast(d, LoadDup128(du8, kIdx_v1B)));
+  const V v1U = TableLookupBytesOr0(C, BitCast(d, LoadDup128(du8, kIdx_v1C)));
+  const V v2L = TableLookupBytesOr0(A, BitCast(d, LoadDup128(du8, kIdx_v2A)));
+  const V v2M = TableLookupBytesOr0(B, BitCast(d, LoadDup128(du8, kIdx_v2B)));
+  const V v2U = TableLookupBytesOr0(C, BitCast(d, LoadDup128(du8, kIdx_v2C)));
+  v0 = Xor3(v0L, v0M, v0U);
+  v1 = Xor3(v1L, v1M, v1U);
+  v2 = Xor3(v2L, v2M, v2U);
+}
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 4)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  using V = VFromD<D>;
+  V A;  // v0[1] v2[0] v1[0] v0[0]
+  V B;  // v1[2] v0[2] v2[1] v1[1]
+  V C;  // v2[3] v1[3] v0[3] v2[2]
+  detail::LoadTransposedBlocks3(d, unaligned, A, B, C);
+
+  const V vxx_02_03_xx = OddEven(C, B);
+  v0 = detail::ShuffleTwo1230(A, vxx_02_03_xx);
+
+  // Shuffle2301 takes the upper/lower halves of the output from one input, so
+  // we cannot just combine 13 and 10 with 12 and 11 (similar to v0/v2). Use
+  // OddEven because it may have higher throughput than Shuffle.
+  const V vxx_xx_10_11 = OddEven(A, B);
+  const V v12_13_xx_xx = OddEven(B, C);
+  v1 = detail::ShuffleTwo2301(vxx_xx_10_11, v12_13_xx_xx);
+
+  const V vxx_20_21_xx = OddEven(B, A);
+  v2 = detail::ShuffleTwo3012(vxx_20_21_xx, C);
+}
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 2)>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  VFromD<D> A;  // v1[0] v0[0]
+  VFromD<D> B;  // v0[1] v2[0]
+  VFromD<D> C;  // v2[1] v1[1]
+  detail::LoadTransposedBlocks3(d, unaligned, A, B, C);
+  v0 = OddEven(B, A);
+  v1 = CombineShiftRightBytes<sizeof(TFromD<D>)>(d, C, A);
+  v2 = OddEven(C, B);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API void LoadInterleaved3(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+  v2 = LoadU(d, unaligned + 2);
+}
+
+// ------------------------------ LoadInterleaved4
+
+namespace detail {
+
+// Default for <= 128-bit vectors; x86_256 and x86_512 have their own overload.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void LoadTransposedBlocks4(D d,
+                                      const TFromD<D>* HWY_RESTRICT unaligned,
+                                      VFromD<D>& vA, VFromD<D>& vB,
+                                      VFromD<D>& vC, VFromD<D>& vD) {
+  constexpr size_t kN = MaxLanes(d);
+  vA = LoadU(d, unaligned + 0 * kN);
+  vB = LoadU(d, unaligned + 1 * kN);
+  vC = LoadU(d, unaligned + 2 * kN);
+  vD = LoadU(d, unaligned + 3 * kN);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 16)>
+HWY_API void LoadInterleaved4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  const Repartition<uint64_t, decltype(d)> d64;
+  using V64 = VFromD<decltype(d64)>;
+  using V = VFromD<D>;
+  // 16 lanes per block; the lowest four blocks are at the bottom of vA..vD.
+  // Here int[i] means the four interleaved values of the i-th 4-tuple and
+  // int[3..0] indicates four consecutive 4-tuples (0 = least-significant).
+  V vA;  // int[13..10] int[3..0]
+  V vB;  // int[17..14] int[7..4]
+  V vC;  // int[1b..18] int[b..8]
+  V vD;  // int[1f..1c] int[f..c]
+  detail::LoadTransposedBlocks4(d, unaligned, vA, vB, vC, vD);
+
+  // For brevity, the comments only list the lower block (upper = lower + 0x10)
+  const V v5140 = InterleaveLower(d, vA, vB);  // int[5,1,4,0]
+  const V vd9c8 = InterleaveLower(d, vC, vD);  // int[d,9,c,8]
+  const V v7362 = InterleaveUpper(d, vA, vB);  // int[7,3,6,2]
+  const V vfbea = InterleaveUpper(d, vC, vD);  // int[f,b,e,a]
+
+  const V v6420 = InterleaveLower(d, v5140, v7362);  // int[6,4,2,0]
+  const V veca8 = InterleaveLower(d, vd9c8, vfbea);  // int[e,c,a,8]
+  const V v7531 = InterleaveUpper(d, v5140, v7362);  // int[7,5,3,1]
+  const V vfdb9 = InterleaveUpper(d, vd9c8, vfbea);  // int[f,d,b,9]
+
+  const V64 v10L = BitCast(d64, InterleaveLower(d, v6420, v7531));  // v10[7..0]
+  const V64 v10U = BitCast(d64, InterleaveLower(d, veca8, vfdb9));  // v10[f..8]
+  const V64 v32L = BitCast(d64, InterleaveUpper(d, v6420, v7531));  // v32[7..0]
+  const V64 v32U = BitCast(d64, InterleaveUpper(d, veca8, vfdb9));  // v32[f..8]
+
+  v0 = BitCast(d, InterleaveLower(d64, v10L, v10U));
+  v1 = BitCast(d, InterleaveUpper(d64, v10L, v10U));
+  v2 = BitCast(d, InterleaveLower(d64, v32L, v32U));
+  v3 = BitCast(d, InterleaveUpper(d64, v32L, v32U));
+}
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 8)>
+HWY_API void LoadInterleaved4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  // In the last step, we interleave by half of the block size, which is usually
+  // 8 bytes but half that for 8-bit x8 vectors.
+  using TW = hwy::UnsignedFromSize<d.MaxBytes() == 8 ? 4 : 8>;
+  const Repartition<TW, decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+
+  // (Comments are for 256-bit vectors.)
+  // 8 lanes per block; the lowest four blocks are at the bottom of vA..vD.
+  VFromD<D> vA;  // v3210[9]v3210[8] v3210[1]v3210[0]
+  VFromD<D> vB;  // v3210[b]v3210[a] v3210[3]v3210[2]
+  VFromD<D> vC;  // v3210[d]v3210[c] v3210[5]v3210[4]
+  VFromD<D> vD;  // v3210[f]v3210[e] v3210[7]v3210[6]
+  detail::LoadTransposedBlocks4(d, unaligned, vA, vB, vC, vD);
+
+  const VFromD<D> va820 = InterleaveLower(d, vA, vB);  // v3210[a,8] v3210[2,0]
+  const VFromD<D> vec64 = InterleaveLower(d, vC, vD);  // v3210[e,c] v3210[6,4]
+  const VFromD<D> vb931 = InterleaveUpper(d, vA, vB);  // v3210[b,9] v3210[3,1]
+  const VFromD<D> vfd75 = InterleaveUpper(d, vC, vD);  // v3210[f,d] v3210[7,5]
+
+  const VW v10_b830 =  // v10[b..8] v10[3..0]
+      BitCast(dw, InterleaveLower(d, va820, vb931));
+  const VW v10_fc74 =  // v10[f..c] v10[7..4]
+      BitCast(dw, InterleaveLower(d, vec64, vfd75));
+  const VW v32_b830 =  // v32[b..8] v32[3..0]
+      BitCast(dw, InterleaveUpper(d, va820, vb931));
+  const VW v32_fc74 =  // v32[f..c] v32[7..4]
+      BitCast(dw, InterleaveUpper(d, vec64, vfd75));
+
+  v0 = BitCast(d, InterleaveLower(dw, v10_b830, v10_fc74));
+  v1 = BitCast(d, InterleaveUpper(dw, v10_b830, v10_fc74));
+  v2 = BitCast(d, InterleaveLower(dw, v32_b830, v32_fc74));
+  v3 = BitCast(d, InterleaveUpper(dw, v32_b830, v32_fc74));
+}
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 4)>
+HWY_API void LoadInterleaved4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  using V = VFromD<D>;
+  V vA;  // v3210[4] v3210[0]
+  V vB;  // v3210[5] v3210[1]
+  V vC;  // v3210[6] v3210[2]
+  V vD;  // v3210[7] v3210[3]
+  detail::LoadTransposedBlocks4(d, unaligned, vA, vB, vC, vD);
+  const V v10e = InterleaveLower(d, vA, vC);  // v1[6,4] v0[6,4] v1[2,0] v0[2,0]
+  const V v10o = InterleaveLower(d, vB, vD);  // v1[7,5] v0[7,5] v1[3,1] v0[3,1]
+  const V v32e = InterleaveUpper(d, vA, vC);  // v3[6,4] v2[6,4] v3[2,0] v2[2,0]
+  const V v32o = InterleaveUpper(d, vB, vD);  // v3[7,5] v2[7,5] v3[3,1] v2[3,1]
+
+  v0 = InterleaveLower(d, v10e, v10o);
+  v1 = InterleaveUpper(d, v10e, v10o);
+  v2 = InterleaveLower(d, v32e, v32o);
+  v3 = InterleaveUpper(d, v32e, v32o);
+}
+
+template <class D, HWY_IF_LANES_PER_BLOCK_D(D, 2)>
+HWY_API void LoadInterleaved4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  VFromD<D> vA, vB, vC, vD;
+  detail::LoadTransposedBlocks4(d, unaligned, vA, vB, vC, vD);
+  v0 = InterleaveLower(d, vA, vC);
+  v1 = InterleaveUpper(d, vA, vC);
+  v2 = InterleaveLower(d, vB, vD);
+  v3 = InterleaveUpper(d, vB, vD);
+}
+
+// Any T x1
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+  v2 = LoadU(d, unaligned + 2);
+  v3 = LoadU(d, unaligned + 3);
+}
+
+// ------------------------------ StoreInterleaved2
+
+namespace detail {
+
+// Default for <= 128-bit vectors; x86_256 and x86_512 have their own overload.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void StoreTransposedBlocks2(VFromD<D> A, VFromD<D> B, D d,
+                                       TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t kN = MaxLanes(d);
+  StoreU(A, d, unaligned + 0 * kN);
+  StoreU(B, d, unaligned + 1 * kN);
+}
+
+}  // namespace detail
+
+// >= 128 bit vector
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const auto v10L = InterleaveLower(d, v0, v1);  // .. v1[0] v0[0]
+  const auto v10U = InterleaveUpper(d, v0, v1);  // .. v1[kN/2] v0[kN/2]
+  detail::StoreTransposedBlocks2(v10L, v10U, d, unaligned);
+}
+
+// <= 64 bits
+template <class V, class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API void StoreInterleaved2(V part0, V part1, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const Twice<decltype(d)> d2;
+  const auto v0 = ZeroExtendVector(d2, part0);
+  const auto v1 = ZeroExtendVector(d2, part1);
+  const auto v10 = InterleaveLower(d2, v0, v1);
+  StoreU(v10, d2, unaligned);
+}
+
+// ------------------------------ StoreInterleaved3 (CombineShiftRightBytes,
+// TableLookupBytes)
+
+namespace detail {
+
+// Default for <= 128-bit vectors; x86_256 and x86_512 have their own overload.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void StoreTransposedBlocks3(VFromD<D> A, VFromD<D> B, VFromD<D> C,
+                                       D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t kN = MaxLanes(d);
+  StoreU(A, d, unaligned + 0 * kN);
+  StoreU(B, d, unaligned + 1 * kN);
+  StoreU(C, d, unaligned + 2 * kN);
+}
+
+}  // namespace detail
+
+// >= 128-bit vector, 8-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const auto k5 = Set(du, TU{5});
+  const auto k6 = Set(du, TU{6});
+
+  // Interleave (v0,v1,v2) to (MSB on left, lane 0 on right):
+  // v0[5], v2[4],v1[4],v0[4] .. v2[0],v1[0],v0[0]. We're expanding v0 lanes
+  // to their place, with 0x80 so lanes to be filled from other vectors are 0
+  // to enable blending by ORing together.
+  alignas(16) static constexpr uint8_t tbl_v0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_v1[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  // The interleaved vectors will be named A, B, C; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A0 = LoadDup128(du, tbl_v0);
+  const auto shuf_A1 = LoadDup128(du, tbl_v1);  // cannot reuse shuf_A0 (has 5)
+  const auto shuf_A2 = CombineShiftRightBytes<15>(du, shuf_A1, shuf_A1);
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);  // 5..4..3..2..1..0
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);  // ..4..3..2..1..0.
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);  // .4..3..2..1..0..
+  const VFromD<D> A = BitCast(d, A0 | A1 | A2);
+
+  // B: v1[10],v0[10], v2[9],v1[9],v0[9] .. , v2[6],v1[6],v0[6], v2[5],v1[5]
+  const auto shuf_B0 = shuf_A2 + k6;  // .A..9..8..7..6..
+  const auto shuf_B1 = shuf_A0 + k5;  // A..9..8..7..6..5
+  const auto shuf_B2 = shuf_A1 + k5;  // ..9..8..7..6..5.
+  const auto B0 = TableLookupBytesOr0(v0, shuf_B0);
+  const auto B1 = TableLookupBytesOr0(v1, shuf_B1);
+  const auto B2 = TableLookupBytesOr0(v2, shuf_B2);
+  const VFromD<D> B = BitCast(d, B0 | B1 | B2);
+
+  // C: v2[15],v1[15],v0[15], v2[11],v1[11],v0[11], v2[10]
+  const auto shuf_C0 = shuf_B2 + k6;  // ..F..E..D..C..B.
+  const auto shuf_C1 = shuf_B0 + k5;  // .F..E..D..C..B..
+  const auto shuf_C2 = shuf_B1 + k5;  // F..E..D..C..B..A
+  const auto C0 = TableLookupBytesOr0(v0, shuf_C0);
+  const auto C1 = TableLookupBytesOr0(v1, shuf_C1);
+  const auto C2 = TableLookupBytesOr0(v2, shuf_C2);
+  const VFromD<D> C = BitCast(d, C0 | C1 | C2);
+
+  detail::StoreTransposedBlocks3(A, B, C, d, unaligned);
+}
+
+// >= 128-bit vector, 16-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto k2 = Set(du8, uint8_t{2 * sizeof(TFromD<D>)});
+  const auto k3 = Set(du8, uint8_t{3 * sizeof(TFromD<D>)});
+
+  // Interleave (v0,v1,v2) to (MSB on left, lane 0 on right):
+  // v1[2],v0[2], v2[1],v1[1],v0[1], v2[0],v1[0],v0[0]. 0x80 so lanes to be
+  // filled from other vectors are 0 for blending. Note that these are byte
+  // indices for 16-bit lanes.
+  alignas(16) static constexpr uint8_t tbl_v1[16] = {
+      0x80, 0x80, 0,    1,    0x80, 0x80, 0x80, 0x80,
+      2,    3,    0x80, 0x80, 0x80, 0x80, 4,    5};
+  alignas(16) static constexpr uint8_t tbl_v2[16] = {
+      0x80, 0x80, 0x80, 0x80, 0,    1,    0x80, 0x80,
+      0x80, 0x80, 2,    3,    0x80, 0x80, 0x80, 0x80};
+
+  // The interleaved vectors will be named A, B, C; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A1 = LoadDup128(du8, tbl_v1);  // 2..1..0.
+                                                 // .2..1..0
+  const auto shuf_A0 = CombineShiftRightBytes<2>(du8, shuf_A1, shuf_A1);
+  const auto shuf_A2 = LoadDup128(du8, tbl_v2);  // ..1..0..
+
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);
+  const VFromD<D> A = BitCast(d, A0 | A1 | A2);
+
+  // B: v0[5] v2[4],v1[4],v0[4], v2[3],v1[3],v0[3], v2[2]
+  const auto shuf_B0 = shuf_A1 + k3;  // 5..4..3.
+  const auto shuf_B1 = shuf_A2 + k3;  // ..4..3..
+  const auto shuf_B2 = shuf_A0 + k2;  // .4..3..2
+  const auto B0 = TableLookupBytesOr0(v0, shuf_B0);
+  const auto B1 = TableLookupBytesOr0(v1, shuf_B1);
+  const auto B2 = TableLookupBytesOr0(v2, shuf_B2);
+  const VFromD<D> B = BitCast(d, B0 | B1 | B2);
+
+  // C: v2[7],v1[7],v0[7], v2[6],v1[6],v0[6], v2[5],v1[5]
+  const auto shuf_C0 = shuf_B1 + k3;  // ..7..6..
+  const auto shuf_C1 = shuf_B2 + k3;  // .7..6..5
+  const auto shuf_C2 = shuf_B0 + k2;  // 7..6..5.
+  const auto C0 = TableLookupBytesOr0(v0, shuf_C0);
+  const auto C1 = TableLookupBytesOr0(v1, shuf_C1);
+  const auto C2 = TableLookupBytesOr0(v2, shuf_C2);
+  const VFromD<D> C = BitCast(d, C0 | C1 | C2);
+
+  detail::StoreTransposedBlocks3(A, B, C, d, unaligned);
+}
+
+// >= 128-bit vector, 32-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 4), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const RepartitionToWide<decltype(d)> dw;
+
+  const VFromD<D> v10_v00 = InterleaveLower(d, v0, v1);
+  const VFromD<D> v01_v20 = OddEven(v0, v2);
+  // A: v0[1], v2[0],v1[0],v0[0] (<- lane 0)
+  const VFromD<D> A = BitCast(
+      d, InterleaveLower(dw, BitCast(dw, v10_v00), BitCast(dw, v01_v20)));
+
+  const VFromD<D> v1_321 = ShiftRightLanes<1>(d, v1);
+  const VFromD<D> v0_32 = ShiftRightLanes<2>(d, v0);
+  const VFromD<D> v21_v11 = OddEven(v2, v1_321);
+  const VFromD<D> v12_v02 = OddEven(v1_321, v0_32);
+  // B: v1[2],v0[2], v2[1],v1[1]
+  const VFromD<D> B = BitCast(
+      d, InterleaveLower(dw, BitCast(dw, v21_v11), BitCast(dw, v12_v02)));
+
+  // Notation refers to the upper 2 lanes of the vector for InterleaveUpper.
+  const VFromD<D> v23_v13 = OddEven(v2, v1_321);
+  const VFromD<D> v03_v22 = OddEven(v0, v2);
+  // C: v2[3],v1[3],v0[3], v2[2]
+  const VFromD<D> C = BitCast(
+      d, InterleaveUpper(dw, BitCast(dw, v03_v22), BitCast(dw, v23_v13)));
+
+  detail::StoreTransposedBlocks3(A, B, C, d, unaligned);
+}
+
+// >= 128-bit vector, 64-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const VFromD<D> A = InterleaveLower(d, v0, v1);
+  const VFromD<D> B = OddEven(v0, v2);
+  const VFromD<D> C = InterleaveUpper(d, v1, v2);
+  detail::StoreTransposedBlocks3(A, B, C, d, unaligned);
+}
+
+// 64-bit vector, 8-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_V_SIZE_D(D, 8)>
+HWY_API void StoreInterleaved3(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and first result.
+  constexpr size_t kFullN = 16 / sizeof(TFromD<D>);
+  const Full128<uint8_t> du;
+  const Full128<TFromD<D>> d_full;
+  const auto k5 = Set(du, uint8_t{5});
+  const auto k6 = Set(du, uint8_t{6});
+
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+
+  // Interleave (v0,v1,v2) to (MSB on left, lane 0 on right):
+  // v1[2],v0[2], v2[1],v1[1],v0[1], v2[0],v1[0],v0[0]. 0x80 so lanes to be
+  // filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_v0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_v1[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  // The interleaved vectors will be named A, B, C; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A0 = Load(du, tbl_v0);
+  const auto shuf_A1 = Load(du, tbl_v1);  // cannot reuse shuf_A0 (5 in MSB)
+  const auto shuf_A2 = CombineShiftRightBytes<15>(du, shuf_A1, shuf_A1);
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);  // 5..4..3..2..1..0
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);  // ..4..3..2..1..0.
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);  // .4..3..2..1..0..
+  const auto A = BitCast(d_full, A0 | A1 | A2);
+  StoreU(A, d_full, unaligned + 0 * kFullN);
+
+  // Second (HALF) vector: v2[7],v1[7],v0[7], v2[6],v1[6],v0[6], v2[5],v1[5]
+  const auto shuf_B0 = shuf_A2 + k6;  // ..7..6..
+  const auto shuf_B1 = shuf_A0 + k5;  // .7..6..5
+  const auto shuf_B2 = shuf_A1 + k5;  // 7..6..5.
+  const auto B0 = TableLookupBytesOr0(v0, shuf_B0);
+  const auto B1 = TableLookupBytesOr0(v1, shuf_B1);
+  const auto B2 = TableLookupBytesOr0(v2, shuf_B2);
+  const VFromD<D> B{BitCast(d_full, B0 | B1 | B2).raw};
+  StoreU(B, d, unaligned + 1 * kFullN);
+}
+
+// 64-bit vector, 16-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_LANES_D(D, 4)>
+HWY_API void StoreInterleaved3(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, D dh,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const Twice<D> d_full;
+  const Full128<uint8_t> du8;
+  const auto k2 = Set(du8, uint8_t{2 * sizeof(TFromD<D>)});
+  const auto k3 = Set(du8, uint8_t{3 * sizeof(TFromD<D>)});
+
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+
+  // Interleave part (v0,v1,v2) to full (MSB on left, lane 0 on right):
+  // v1[2],v0[2], v2[1],v1[1],v0[1], v2[0],v1[0],v0[0]. We're expanding v0 lanes
+  // to their place, with 0x80 so lanes to be filled from other vectors are 0
+  // to enable blending by ORing together.
+  alignas(16) static constexpr uint8_t tbl_v1[16] = {
+      0x80, 0x80, 0,    1,    0x80, 0x80, 0x80, 0x80,
+      2,    3,    0x80, 0x80, 0x80, 0x80, 4,    5};
+  alignas(16) static constexpr uint8_t tbl_v2[16] = {
+      0x80, 0x80, 0x80, 0x80, 0,    1,    0x80, 0x80,
+      0x80, 0x80, 2,    3,    0x80, 0x80, 0x80, 0x80};
+
+  // The interleaved vectors will be named A, B; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A1 = Load(du8, tbl_v1);  // 2..1..0.
+                                           // .2..1..0
+  const auto shuf_A0 = CombineShiftRightBytes<2>(du8, shuf_A1, shuf_A1);
+  const auto shuf_A2 = Load(du8, tbl_v2);  // ..1..0..
+
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);
+  const VFromD<decltype(d_full)> A = BitCast(d_full, A0 | A1 | A2);
+  StoreU(A, d_full, unaligned);
+
+  // Second (HALF) vector: v2[3],v1[3],v0[3], v2[2]
+  const auto shuf_B0 = shuf_A1 + k3;  // ..3.
+  const auto shuf_B1 = shuf_A2 + k3;  // .3..
+  const auto shuf_B2 = shuf_A0 + k2;  // 3..2
+  const auto B0 = TableLookupBytesOr0(v0, shuf_B0);
+  const auto B1 = TableLookupBytesOr0(v1, shuf_B1);
+  const auto B2 = TableLookupBytesOr0(v2, shuf_B2);
+  const VFromD<decltype(d_full)> B = BitCast(d_full, B0 | B1 | B2);
+  StoreU(VFromD<D>{B.raw}, dh, unaligned + MaxLanes(d_full));
+}
+
+// 64-bit vector, 32-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 4), HWY_IF_LANES_D(D, 2)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // (same code as 128-bit vector, 64-bit lanes)
+  const VFromD<D> v10_v00 = InterleaveLower(d, v0, v1);
+  const VFromD<D> v01_v20 = OddEven(v0, v2);
+  const VFromD<D> v21_v11 = InterleaveUpper(d, v1, v2);
+  constexpr size_t kN = MaxLanes(d);
+  StoreU(v10_v00, d, unaligned + 0 * kN);
+  StoreU(v01_v20, d, unaligned + 1 * kN);
+  StoreU(v21_v11, d, unaligned + 2 * kN);
+}
+
+// 64-bit lanes are handled by the N=1 case below.
+
+// <= 32-bit vector, 8-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_V_SIZE_LE_D(D, 4),
+          HWY_IF_LANES_GT_D(D, 1)>
+HWY_API void StoreInterleaved3(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and result.
+  const Full128<uint8_t> du;
+  const Full128<TFromD<D>> d_full;
+
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+
+  // Interleave (v0,v1,v2). We're expanding v0 lanes to their place, with 0x80
+  // so lanes to be filled from other vectors are 0 to enable blending by ORing
+  // together.
+  alignas(16) static constexpr uint8_t tbl_v0[16] = {
+      0,    0x80, 0x80, 1,    0x80, 0x80, 2,    0x80,
+      0x80, 3,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+  // The interleaved vector will be named A; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A0 = Load(du, tbl_v0);
+  const auto shuf_A1 = CombineShiftRightBytes<15>(du, shuf_A0, shuf_A0);
+  const auto shuf_A2 = CombineShiftRightBytes<14>(du, shuf_A0, shuf_A0);
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);  // ......3..2..1..0
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);  // .....3..2..1..0.
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);  // ....3..2..1..0..
+  const VFromD<decltype(d_full)> A = BitCast(d_full, A0 | A1 | A2);
+  alignas(16) TFromD<D> buf[MaxLanes(d_full)];
+  StoreU(A, d_full, buf);
+  CopyBytes<d.MaxBytes() * 3>(buf, unaligned);
+}
+
+// 32-bit vector, 16-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_LANES_D(D, 2)>
+HWY_API void StoreInterleaved3(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and result.
+  const Full128<uint8_t> du8;
+  const Full128<TFromD<D>> d_full;
+
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+
+  // Interleave (v0,v1,v2). We're expanding v0 lanes to their place, with 0x80
+  // so lanes to be filled from other vectors are 0 to enable blending by ORing
+  // together.
+  alignas(16) static constexpr uint8_t tbl_v2[16] = {
+      0x80, 0x80, 0x80, 0x80, 0,    1,    0x80, 0x80,
+      0x80, 0x80, 2,    3,    0x80, 0x80, 0x80, 0x80};
+  // The interleaved vector will be named A; temporaries with suffix
+  // 0..2 indicate which input vector's lanes they hold.
+  const auto shuf_A2 =  // ..1..0..
+      Load(du8, tbl_v2);
+  const auto shuf_A1 =  // ...1..0.
+      CombineShiftRightBytes<2>(du8, shuf_A2, shuf_A2);
+  const auto shuf_A0 =  // ....1..0
+      CombineShiftRightBytes<4>(du8, shuf_A2, shuf_A2);
+  const auto A0 = TableLookupBytesOr0(v0, shuf_A0);  // ..1..0
+  const auto A1 = TableLookupBytesOr0(v1, shuf_A1);  // .1..0.
+  const auto A2 = TableLookupBytesOr0(v2, shuf_A2);  // 1..0..
+  const auto A = BitCast(d_full, A0 | A1 | A2);
+  alignas(16) TFromD<D> buf[MaxLanes(d_full)];
+  StoreU(A, d_full, buf);
+  CopyBytes<d.MaxBytes() * 3>(buf, unaligned);
+}
+
+// Single-element vector, any lane size: just store directly
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+  StoreU(v2, d, unaligned + 2);
+}
+
+// ------------------------------ StoreInterleaved4
+
+namespace detail {
+
+// Default for <= 128-bit vectors; x86_256 and x86_512 have their own overload.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void StoreTransposedBlocks4(VFromD<D> vA, VFromD<D> vB, VFromD<D> vC,
+                                       VFromD<D> vD, D d,
+                                       TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t kN = MaxLanes(d);
+  StoreU(vA, d, unaligned + 0 * kN);
+  StoreU(vB, d, unaligned + 1 * kN);
+  StoreU(vC, d, unaligned + 2 * kN);
+  StoreU(vD, d, unaligned + 3 * kN);
+}
+
+}  // namespace detail
+
+// >= 128-bit vector, 8..32-bit lanes
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  const RepartitionToWide<decltype(d)> dw;
+  const auto v10L = ZipLower(dw, v0, v1);  // .. v1[0] v0[0]
+  const auto v32L = ZipLower(dw, v2, v3);
+  const auto v10U = ZipUpper(dw, v0, v1);
+  const auto v32U = ZipUpper(dw, v2, v3);
+  // The interleaved vectors are vA, vB, vC, vD.
+  const VFromD<D> vA = BitCast(d, InterleaveLower(dw, v10L, v32L));  // 3210
+  const VFromD<D> vB = BitCast(d, InterleaveUpper(dw, v10L, v32L));
+  const VFromD<D> vC = BitCast(d, InterleaveLower(dw, v10U, v32U));
+  const VFromD<D> vD = BitCast(d, InterleaveUpper(dw, v10U, v32U));
+  detail::StoreTransposedBlocks4(vA, vB, vC, vD, d, unaligned);
+}
+
+// >= 128-bit vector, 64-bit lanes
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // The interleaved vectors are vA, vB, vC, vD.
+  const VFromD<D> vA = InterleaveLower(d, v0, v1);  // v1[0] v0[0]
+  const VFromD<D> vB = InterleaveLower(d, v2, v3);
+  const VFromD<D> vC = InterleaveUpper(d, v0, v1);
+  const VFromD<D> vD = InterleaveUpper(d, v2, v3);
+  detail::StoreTransposedBlocks4(vA, vB, vC, vD, d, unaligned);
+}
+
+// 64-bit vector, 8..32-bit lanes
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8), HWY_IF_V_SIZE_D(D, 8)>
+HWY_API void StoreInterleaved4(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, VFromD<D> part3, D /* tag */,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Full128<TFromD<D>> d_full;
+  const RepartitionToWide<decltype(d_full)> dw;
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+  const VFromD<decltype(d_full)> v3{part3.raw};
+  const auto v10 = ZipLower(dw, v0, v1);  // v1[0] v0[0]
+  const auto v32 = ZipLower(dw, v2, v3);
+  const auto A = BitCast(d_full, InterleaveLower(dw, v10, v32));
+  const auto B = BitCast(d_full, InterleaveUpper(dw, v10, v32));
+  StoreU(A, d_full, unaligned);
+  StoreU(B, d_full, unaligned + MaxLanes(d_full));
+}
+
+// 64-bit vector, 64-bit lane
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_LANES_D(D, 1)>
+HWY_API void StoreInterleaved4(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, VFromD<D> part3, D /* tag */,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Full128<TFromD<D>> d_full;
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+  const VFromD<decltype(d_full)> v3{part3.raw};
+  const auto A = InterleaveLower(d_full, v0, v1);  // v1[0] v0[0]
+  const auto B = InterleaveLower(d_full, v2, v3);
+  StoreU(A, d_full, unaligned);
+  StoreU(B, d_full, unaligned + MaxLanes(d_full));
+}
+
+// <= 32-bit vectors
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API void StoreInterleaved4(VFromD<D> part0, VFromD<D> part1,
+                               VFromD<D> part2, VFromD<D> part3, D d,
+                               TFromD<D>* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Full128<TFromD<D>> d_full;
+  const RepartitionToWide<decltype(d_full)> dw;
+  const VFromD<decltype(d_full)> v0{part0.raw};
+  const VFromD<decltype(d_full)> v1{part1.raw};
+  const VFromD<decltype(d_full)> v2{part2.raw};
+  const VFromD<decltype(d_full)> v3{part3.raw};
+  const auto v10 = ZipLower(dw, v0, v1);  // .. v1[0] v0[0]
+  const auto v32 = ZipLower(dw, v2, v3);
+  const auto v3210 = BitCast(d_full, InterleaveLower(dw, v10, v32));
+  alignas(16) TFromD<D> buf[MaxLanes(d_full)];
+  StoreU(v3210, d_full, buf);
+  CopyBytes<d.MaxBytes() * 4>(buf, unaligned);
+}
+
+#endif  // HWY_NATIVE_LOAD_STORE_INTERLEAVED
+
+// ------------------------------ LoadN
+#if (defined(HWY_NATIVE_LOAD_N) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_LOAD_N
+#undef HWY_NATIVE_LOAD_N
+#else
+#define HWY_NATIVE_LOAD_N
+#endif
+
+#if HWY_MEM_OPS_MIGHT_FAULT && !HWY_HAVE_SCALABLE
+namespace detail {
+
+template <class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> LoadNResizeBitCast(DTo d_to, DFrom d_from,
+                                          VFromD<DFrom> v) {
+#if HWY_TARGET <= HWY_SSE2
+  // On SSE2/SSSE3/SSE4, the LoadU operation will zero out any lanes of v.raw
+  // past the first (lowest-index) Lanes(d_from) lanes of v.raw if
+  // sizeof(decltype(v.raw)) > d_from.MaxBytes() is true
+  (void)d_from;
+  return ResizeBitCast(d_to, v);
+#else
+  // On other targets such as PPC/NEON, the contents of any lanes past the first
+  // (lowest-index) Lanes(d_from) lanes of v.raw might be non-zero if
+  // sizeof(decltype(v.raw)) > d_from.MaxBytes() is true.
+  return ZeroExtendResizeBitCast(d_to, d_from, v);
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 1),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  return (max_lanes_to_load > 0) ? LoadU(d, p) : Zero(d);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const FixedTag<TFromD<D>, 1> d1;
+
+  if (max_lanes_to_load >= 2) {
+    return LoadU(d, p);
+  } else {
+    return (max_lanes_to_load == 1)
+               ? detail::LoadNResizeBitCast(d, d1, LoadU(d1, p))
+               : Zero(d);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const FixedTag<TFromD<D>, 2> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_load <= 1)
+    return (max_lanes_to_load == 1)
+               ? detail::LoadNResizeBitCast(d, d1, LoadU(d1, p))
+               : Zero(d);
+  else if (max_lanes_to_load > 3)
+    return LoadU(d, p);
+
+  const auto v_lo = LoadU(d2, p);
+  if (max_lanes_to_load == 3) {
+    return Combine(d, detail::LoadNResizeBitCast(d2, d1, LoadU(d1, p + 2)),
+                   v_lo);
+  } else {
+    return detail::LoadNResizeBitCast(d, d2, v_lo);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const FixedTag<TFromD<D>, 4> d4;
+  const Half<decltype(d4)> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_load <= 1)
+    return (max_lanes_to_load == 1)
+               ? detail::LoadNResizeBitCast(d, d1, LoadU(d1, p))
+               : Zero(d);
+  else if (max_lanes_to_load >= 8)
+    return LoadU(d, p);
+
+  const size_t leading_len = max_lanes_to_load & 4;
+  VFromD<decltype(d4)> v_trailing = Zero(d4);
+
+  if ((max_lanes_to_load & 2) != 0) {
+    const auto v_trailing_lo2 = LoadU(d2, p + leading_len);
+    if ((max_lanes_to_load & 1) != 0) {
+      v_trailing = Combine(
+          d4,
+          detail::LoadNResizeBitCast(d2, d1, LoadU(d1, p + leading_len + 2)),
+          v_trailing_lo2);
+    } else {
+      v_trailing = detail::LoadNResizeBitCast(d4, d2, v_trailing_lo2);
+    }
+  } else if ((max_lanes_to_load & 1) != 0) {
+    v_trailing = detail::LoadNResizeBitCast(d4, d1, LoadU(d1, p + leading_len));
+  }
+
+  if (leading_len != 0) {
+    return Combine(d, v_trailing, LoadU(d4, p));
+  } else {
+    return detail::LoadNResizeBitCast(d, d4, v_trailing);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 16),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const FixedTag<TFromD<D>, 8> d8;
+  const Half<decltype(d8)> d4;
+  const Half<decltype(d4)> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_load <= 1)
+    return (max_lanes_to_load == 1)
+               ? detail::LoadNResizeBitCast(d, d1, LoadU(d1, p))
+               : Zero(d);
+  else if (max_lanes_to_load >= 16)
+    return LoadU(d, p);
+
+  const size_t leading_len = max_lanes_to_load & 12;
+  VFromD<decltype(d4)> v_trailing = Zero(d4);
+
+  if ((max_lanes_to_load & 2) != 0) {
+    const auto v_trailing_lo2 = LoadU(d2, p + leading_len);
+    if ((max_lanes_to_load & 1) != 0) {
+      v_trailing = Combine(
+          d4,
+          detail::LoadNResizeBitCast(d2, d1, LoadU(d1, p + leading_len + 2)),
+          v_trailing_lo2);
+    } else {
+      v_trailing = detail::LoadNResizeBitCast(d4, d2, v_trailing_lo2);
+    }
+  } else if ((max_lanes_to_load & 1) != 0) {
+    v_trailing = detail::LoadNResizeBitCast(d4, d1, LoadU(d1, p + leading_len));
+  }
+
+  if (leading_len != 0) {
+    if (leading_len >= 8) {
+      const auto v_hi7 = ((leading_len & 4) != 0)
+                             ? Combine(d8, v_trailing, LoadU(d4, p + 8))
+                             : detail::LoadNResizeBitCast(d8, d4, v_trailing);
+      return Combine(d, v_hi7, LoadU(d8, p));
+    } else {
+      return detail::LoadNResizeBitCast(d, d8,
+                                        Combine(d8, v_trailing, LoadU(d4, p)));
+    }
+  } else {
+    return detail::LoadNResizeBitCast(d, d4, v_trailing);
+  }
+}
+
+#if HWY_MAX_BYTES >= 32
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16), typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const size_t N = Lanes(d);
+  if (max_lanes_to_load >= N) {
+    return LoadU(d, p);
+  }
+
+  const Half<decltype(d)> dh;
+  const size_t half_N = Lanes(dh);
+  if (max_lanes_to_load <= half_N) {
+    return ZeroExtendVector(d, LoadN(dh, p, max_lanes_to_load));
+  } else {
+    const auto v_lo = LoadU(dh, p);
+    const auto v_hi = LoadN(dh, p + half_N, max_lanes_to_load - half_N);
+    return Combine(d, v_hi, v_lo);
+  }
+}
+#endif  // HWY_MAX_BYTES >= 32
+#else   // !HWY_MEM_OPS_MIGHT_FAULT || HWY_HAVE_SCALABLE
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+#if HWY_MEM_OPS_MIGHT_FAULT
+  if (max_lanes_to_load <= 0) return Zero(d);
+#endif
+
+  const size_t N = Lanes(d);
+  return MaskedLoad(FirstN(d, HWY_MIN(max_lanes_to_load, N)), d, p);
+}
+#endif  // HWY_MEM_OPS_MIGHT_FAULT && !HWY_HAVE_SCALABLE
+
+#endif
+
+// ------------------------------ StoreN
+#if (defined(HWY_NATIVE_STORE_N) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+#if HWY_MEM_OPS_MIGHT_FAULT && !HWY_HAVE_SCALABLE
+namespace detail {
+
+template <class DH, HWY_IF_V_SIZE_LE_D(DH, 4)>
+HWY_INLINE VFromD<DH> StoreNGetUpperHalf(DH dh, VFromD<Twice<DH>> v) {
+  constexpr size_t kMinShrVectBytes =
+      (HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES) ? 8 : 16;
+  const FixedTag<uint8_t, kMinShrVectBytes> d_shift;
+  return ResizeBitCast(
+      dh, ShiftRightBytes<dh.MaxBytes()>(d_shift, ResizeBitCast(d_shift, v)));
+}
+
+template <class DH, HWY_IF_V_SIZE_GT_D(DH, 4)>
+HWY_INLINE VFromD<DH> StoreNGetUpperHalf(DH dh, VFromD<Twice<DH>> v) {
+  return UpperHalf(dh, v);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 1),
+          typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  if (max_lanes_to_store > 0) {
+    StoreU(v, d, p);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2),
+          typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  if (max_lanes_to_store > 1) {
+    StoreU(v, d, p);
+  } else if (max_lanes_to_store == 1) {
+    const FixedTag<TFromD<D>, 1> d1;
+    StoreU(LowerHalf(d1, v), d1, p);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4),
+          typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const FixedTag<TFromD<D>, 2> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_store > 1) {
+    if (max_lanes_to_store >= 4) {
+      StoreU(v, d, p);
+    } else {
+      StoreU(ResizeBitCast(d2, v), d2, p);
+      if (max_lanes_to_store == 3) {
+        StoreU(ResizeBitCast(d1, detail::StoreNGetUpperHalf(d2, v)), d1, p + 2);
+      }
+    }
+  } else if (max_lanes_to_store == 1) {
+    StoreU(ResizeBitCast(d1, v), d1, p);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8),
+          typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const FixedTag<TFromD<D>, 4> d4;
+  const Half<decltype(d4)> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_store <= 1) {
+    if (max_lanes_to_store == 1) {
+      StoreU(ResizeBitCast(d1, v), d1, p);
+    }
+  } else if (max_lanes_to_store >= 8) {
+    StoreU(v, d, p);
+  } else if (max_lanes_to_store >= 4) {
+    StoreU(LowerHalf(d4, v), d4, p);
+    StoreN(detail::StoreNGetUpperHalf(d4, v), d4, p + 4,
+           max_lanes_to_store - 4);
+  } else {
+    StoreN(LowerHalf(d4, v), d4, p, max_lanes_to_store);
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 16),
+          typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const FixedTag<TFromD<D>, 8> d8;
+  const Half<decltype(d8)> d4;
+  const Half<decltype(d4)> d2;
+  const Half<decltype(d2)> d1;
+
+  if (max_lanes_to_store <= 1) {
+    if (max_lanes_to_store == 1) {
+      StoreU(ResizeBitCast(d1, v), d1, p);
+    }
+  } else if (max_lanes_to_store >= 16) {
+    StoreU(v, d, p);
+  } else if (max_lanes_to_store >= 8) {
+    StoreU(LowerHalf(d8, v), d8, p);
+    StoreN(detail::StoreNGetUpperHalf(d8, v), d8, p + 8,
+           max_lanes_to_store - 8);
+  } else {
+    StoreN(LowerHalf(d8, v), d8, p, max_lanes_to_store);
+  }
+}
+
+#if HWY_MAX_BYTES >= 32
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16), typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const size_t N = Lanes(d);
+  if (max_lanes_to_store >= N) {
+    StoreU(v, d, p);
+    return;
+  }
+
+  const Half<decltype(d)> dh;
+  const size_t half_N = Lanes(dh);
+  if (max_lanes_to_store <= half_N) {
+    StoreN(LowerHalf(dh, v), dh, p, max_lanes_to_store);
+  } else {
+    StoreU(LowerHalf(dh, v), dh, p);
+    StoreN(UpperHalf(dh, v), dh, p + half_N, max_lanes_to_store - half_N);
+  }
+}
+#endif  // HWY_MAX_BYTES >= 32
+
+#else  // !HWY_MEM_OPS_MIGHT_FAULT || HWY_HAVE_SCALABLE
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const size_t N = Lanes(d);
+  const size_t clamped_max_lanes_to_store = HWY_MIN(max_lanes_to_store, N);
+#if HWY_MEM_OPS_MIGHT_FAULT
+  if (clamped_max_lanes_to_store == 0) return;
+#endif
+
+  BlendedStore(v, FirstN(d, clamped_max_lanes_to_store), d, p);
+
+#if HWY_MEM_OPS_MIGHT_FAULT
+  detail::MaybeUnpoison(p, clamped_max_lanes_to_store);
+#endif
+}
+#endif  // HWY_MEM_OPS_MIGHT_FAULT && !HWY_HAVE_SCALABLE
+
+#endif  // (defined(HWY_NATIVE_STORE_N) == defined(HWY_TARGET_TOGGLE))
+
+// ------------------------------ Scatter
+
+#if (defined(HWY_NATIVE_SCATTER) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_SCATTER
+#undef HWY_NATIVE_SCATTER
+#else
+#define HWY_NATIVE_SCATTER
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API void ScatterOffset(VFromD<D> v, D d, T* HWY_RESTRICT base,
+                           VFromD<RebindToSigned<D>> offset) {
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  Store(v, d, lanes);
+
+  HWY_ALIGN TI offset_lanes[MaxLanes(d)];
+  Store(offset, di, offset_lanes);
+
+  uint8_t* base_bytes = reinterpret_cast<uint8_t*>(base);
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    CopyBytes<sizeof(T)>(&lanes[i], base_bytes + offset_lanes[i]);
+  }
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void ScatterIndex(VFromD<D> v, D d, T* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  Store(v, d, lanes);
+
+  HWY_ALIGN TI index_lanes[MaxLanes(d)];
+  Store(index, di, index_lanes);
+
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    base[index_lanes[i]] = lanes[i];
+  }
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D d,
+                                T* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  Store(v, d, lanes);
+
+  HWY_ALIGN TI index_lanes[MaxLanes(d)];
+  Store(index, di, index_lanes);
+
+  HWY_ALIGN TI mask_lanes[MaxLanes(di)];
+  Store(BitCast(di, VecFromMask(d, m)), di, mask_lanes);
+
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask_lanes[i]) base[index_lanes[i]] = lanes[i];
+  }
+}
+
+#endif  // (defined(HWY_NATIVE_SCATTER) == defined(HWY_TARGET_TOGGLE))
+
+// ------------------------------ Gather
+
+#if (defined(HWY_NATIVE_GATHER) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_GATHER
+#undef HWY_NATIVE_GATHER
+#else
+#define HWY_NATIVE_GATHER
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> GatherOffset(D d, const T* HWY_RESTRICT base,
+                               VFromD<RebindToSigned<D>> offset) {
+  const RebindToSigned<D> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN TI offset_lanes[MaxLanes(d)];
+  Store(offset, di, offset_lanes);
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  const uint8_t* base_bytes = reinterpret_cast<const uint8_t*>(base);
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    CopyBytes<sizeof(T)>(base_bytes + offset_lanes[i], &lanes[i]);
+  }
+  return Load(d, lanes);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> GatherIndex(D d, const T* HWY_RESTRICT base,
+                              VFromD<RebindToSigned<D>> index) {
+  const RebindToSigned<D> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN TI index_lanes[MaxLanes(d)];
+  Store(index, di, index_lanes);
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    lanes[i] = base[index_lanes[i]];
+  }
+  return Load(d, lanes);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaskedGatherIndex(MFromD<D> m, D d,
+                                    const T* HWY_RESTRICT base,
+                                    VFromD<RebindToSigned<D>> index) {
+  const RebindToSigned<D> di;
+  using TI = TFromD<decltype(di)>;
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+
+  HWY_ALIGN TI index_lanes[MaxLanes(di)];
+  Store(index, di, index_lanes);
+
+  HWY_ALIGN TI mask_lanes[MaxLanes(di)];
+  Store(BitCast(di, VecFromMask(d, m)), di, mask_lanes);
+
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    lanes[i] = mask_lanes[i] ? base[index_lanes[i]] : T{0};
+  }
+  return Load(d, lanes);
+}
+
+#endif  // (defined(HWY_NATIVE_GATHER) == defined(HWY_TARGET_TOGGLE))
+
+// ------------------------------ Integer AbsDiff and SumsOf8AbsDiff
+
+#if (defined(HWY_NATIVE_INTEGER_ABS_DIFF) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_INTEGER_ABS_DIFF
+#undef HWY_NATIVE_INTEGER_ABS_DIFF
+#else
+#define HWY_NATIVE_INTEGER_ABS_DIFF
+#endif
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V AbsDiff(V a, V b) {
+  return Sub(Max(a, b), Min(a, b));
+}
+
+#endif  // HWY_NATIVE_INTEGER_ABS_DIFF
+
+#if (defined(HWY_NATIVE_SUMS_OF_8_ABS_DIFF) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#undef HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#else
+#define HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#endif
+
+template <class V, HWY_IF_U8_D(DFromV<V>),
+          HWY_IF_V_SIZE_GT_D(DFromV<V>, (HWY_TARGET == HWY_SCALAR ? 0 : 4))>
+HWY_API Vec<Repartition<uint64_t, DFromV<V>>> SumsOf8AbsDiff(V a, V b) {
+  return SumsOf8(AbsDiff(a, b));
+}
+
+#endif  // HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+
+// ------------------------------ SaturatedAdd/SaturatedSub for UI32/UI64
+
+#if (defined(HWY_NATIVE_I32_SATURATED_ADDSUB) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+template <class V, HWY_IF_I32_D(DFromV<V>)>
+HWY_API V SaturatedAdd(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = Add(a, b);
+  const auto overflow_mask = AndNot(Xor(a, b), Xor(a, sum));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int32_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, sum);
+}
+
+template <class V, HWY_IF_I32_D(DFromV<V>)>
+HWY_API V SaturatedSub(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = Sub(a, b);
+  const auto overflow_mask = And(Xor(a, b), Xor(a, diff));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int32_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, diff);
+}
+
+#endif  // HWY_NATIVE_I32_SATURATED_ADDSUB
+
+#if (defined(HWY_NATIVE_I64_SATURATED_ADDSUB) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+template <class V, HWY_IF_I64_D(DFromV<V>)>
+HWY_API V SaturatedAdd(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = Add(a, b);
+  const auto overflow_mask = AndNot(Xor(a, b), Xor(a, sum));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int64_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, sum);
+}
+
+template <class V, HWY_IF_I64_D(DFromV<V>)>
+HWY_API V SaturatedSub(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = Sub(a, b);
+  const auto overflow_mask = And(Xor(a, b), Xor(a, diff));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int64_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, diff);
+}
+
+#endif  // HWY_NATIVE_I64_SATURATED_ADDSUB
+
+#if (defined(HWY_NATIVE_U32_SATURATED_ADDSUB) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_U32_SATURATED_ADDSUB
+#undef HWY_NATIVE_U32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U32_SATURATED_ADDSUB
+#endif
+
+template <class V, HWY_IF_U32_D(DFromV<V>)>
+HWY_API V SaturatedAdd(V a, V b) {
+  return Add(a, Min(b, Not(a)));
+}
+
+template <class V, HWY_IF_U32_D(DFromV<V>)>
+HWY_API V SaturatedSub(V a, V b) {
+  return Sub(a, Min(a, b));
+}
+
+#endif  // HWY_NATIVE_U32_SATURATED_ADDSUB
+
+#if (defined(HWY_NATIVE_U64_SATURATED_ADDSUB) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_U64_SATURATED_ADDSUB
+#undef HWY_NATIVE_U64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U64_SATURATED_ADDSUB
+#endif
+
+template <class V, HWY_IF_U64_D(DFromV<V>)>
+HWY_API V SaturatedAdd(V a, V b) {
+  return Add(a, Min(b, Not(a)));
+}
+
+template <class V, HWY_IF_U64_D(DFromV<V>)>
+HWY_API V SaturatedSub(V a, V b) {
+  return Sub(a, Min(a, b));
+}
+
+#endif  // HWY_NATIVE_U64_SATURATED_ADDSUB
+
+// ------------------------------ Unsigned to signed demotions
+
+template <class DN, HWY_IF_SIGNED_D(DN), class V, HWY_IF_UNSIGNED_V(V),
+          class V2 = VFromD<Rebind<TFromV<V>, DN>>,
+          hwy::EnableIf<(sizeof(TFromD<DN>) < sizeof(TFromV<V>))>* = nullptr,
+          HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_D(DFromV<V2>))>
+HWY_API VFromD<DN> DemoteTo(DN dn, V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+
+  // First, do a signed to signed demotion. This will convert any values
+  // that are greater than hwy::HighestValue<MakeSigned<TFromV<V>>>() to a
+  // negative value.
+  const auto i2i_demote_result = DemoteTo(dn, BitCast(di, v));
+
+  // Second, convert any negative values to hwy::HighestValue<TFromD<DN>>()
+  // using an unsigned Min operation.
+  const auto max_signed_val = Set(dn, hwy::HighestValue<TFromD<DN>>());
+
+  return BitCast(
+      dn, Min(BitCast(dn_u, i2i_demote_result), BitCast(dn_u, max_signed_val)));
+}
+
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+template <class DN, HWY_IF_SIGNED_D(DN), class V, HWY_IF_UNSIGNED_V(V),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_D(DFromV<V2>))>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RebindToSigned<decltype(d)> di;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+
+  // First, do a signed to signed demotion. This will convert any values
+  // that are greater than hwy::HighestValue<MakeSigned<TFromV<V>>>() to a
+  // negative value.
+  const auto i2i_demote_result =
+      ReorderDemote2To(dn, BitCast(di, a), BitCast(di, b));
+
+  // Second, convert any negative values to hwy::HighestValue<TFromD<DN>>()
+  // using an unsigned Min operation.
+  const auto max_signed_val = Set(dn, hwy::HighestValue<TFromD<DN>>());
+
+  return BitCast(
+      dn, Min(BitCast(dn_u, i2i_demote_result), BitCast(dn_u, max_signed_val)));
+}
+#endif
+
+// ------------------------------ PromoteLowerTo
+
+// There is no codegen advantage for a native version of this. It is provided
+// only for convenience.
+template <class D, class V>
+HWY_API VFromD<D> PromoteLowerTo(D d, V v) {
+  // Lanes(d) may differ from Lanes(DFromV<V>()). Use the lane type from V
+  // because it cannot be deduced from D (could be either bf16 or f16).
+  const Rebind<TFromV<V>, decltype(d)> dh;
+  return PromoteTo(d, LowerHalf(dh, v));
+}
+
+// ------------------------------ PromoteUpperTo
+
+#if (defined(HWY_NATIVE_PROMOTE_UPPER_TO) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_PROMOTE_UPPER_TO
+#undef HWY_NATIVE_PROMOTE_UPPER_TO
+#else
+#define HWY_NATIVE_PROMOTE_UPPER_TO
+#endif
+
+// This requires UpperHalf.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+
+template <class D, class V>
+HWY_API VFromD<D> PromoteUpperTo(D d, V v) {
+  // Lanes(d) may differ from Lanes(DFromV<V>()). Use the lane type from V
+  // because it cannot be deduced from D (could be either bf16 or f16).
+  const Rebind<TFromV<V>, decltype(d)> dh;
+  return PromoteTo(d, UpperHalf(dh, v));
+}
+
+#endif  // HWY_TARGET != HWY_SCALAR
+#endif  // HWY_NATIVE_PROMOTE_UPPER_TO
+
+// ------------------------------ float16_t <-> float
+
+#if (defined(HWY_NATIVE_F16C) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, VFromD<Rebind<float16_t, D>> v) {
+  const RebindToSigned<decltype(df32)> di32;
+  const RebindToUnsigned<decltype(df32)> du32;
+  const Rebind<uint16_t, decltype(df32)> du16;
+  using VU32 = VFromD<decltype(du32)>;
+
+  const VU32 bits16 = PromoteTo(du32, BitCast(du16, v));
+  const VU32 sign = ShiftRight<15>(bits16);
+  const VU32 biased_exp = And(ShiftRight<10>(bits16), Set(du32, 0x1F));
+  const VU32 mantissa = And(bits16, Set(du32, 0x3FF));
+  const VU32 subnormal =
+      BitCast(du32, Mul(ConvertTo(df32, BitCast(di32, mantissa)),
+                        Set(df32, 1.0f / 16384 / 1024)));
+
+  const VU32 biased_exp32 = Add(biased_exp, Set(du32, 127 - 15));
+  const VU32 mantissa32 = ShiftLeft<23 - 10>(mantissa);
+  const VU32 normal = Or(ShiftLeft<23>(biased_exp32), mantissa32);
+  const VU32 bits32 = IfThenElse(Eq(biased_exp, Zero(du32)), subnormal, normal);
+  return BitCast(df32, Or(ShiftLeft<31>(sign), bits32));
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D df16, VFromD<Rebind<float, D>> v) {
+  const RebindToUnsigned<decltype(df16)> du16;
+  const Rebind<uint32_t, decltype(df16)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  using VU32 = VFromD<decltype(du32)>;
+  using VI32 = VFromD<decltype(di32)>;
+
+  const VU32 bits32 = BitCast(du32, v);
+  const VU32 sign = ShiftRight<31>(bits32);
+  const VU32 biased_exp32 = And(ShiftRight<23>(bits32), Set(du32, 0xFF));
+  const VU32 mantissa32 = And(bits32, Set(du32, 0x7FFFFF));
+
+  const VI32 k15 = Set(di32, 15);
+  const VI32 exp = Min(Sub(BitCast(di32, biased_exp32), Set(di32, 127)), k15);
+  const MFromD<decltype(di32)> is_tiny = Lt(exp, Set(di32, -24));
+
+  const MFromD<decltype(di32)> is_subnormal = Lt(exp, Set(di32, -14));
+  const VU32 biased_exp16 =
+      BitCast(du32, IfThenZeroElse(is_subnormal, Add(exp, k15)));
+  const VU32 sub_exp = BitCast(du32, Sub(Set(di32, -14), exp));  // [1, 11)
+  // Clamp shift counts to prevent warnings in emu_128 Shr.
+  const VU32 k31 = Set(du32, 31);
+  const VU32 shift_m = Min(Add(Set(du32, 13), sub_exp), k31);
+  const VU32 shift_1 = Min(Sub(Set(du32, 10), sub_exp), k31);
+  const VU32 sub_m = Add(Shl(Set(du32, 1), shift_1), Shr(mantissa32, shift_m));
+  const VU32 mantissa16 = IfThenElse(RebindMask(du32, is_subnormal), sub_m,
+                                     ShiftRight<13>(mantissa32));  // <1024
+
+  const VU32 sign16 = ShiftLeft<15>(sign);
+  const VU32 normal16 = Or3(sign16, ShiftLeft<10>(biased_exp16), mantissa16);
+  const VI32 bits16 = IfThenZeroElse(is_tiny, BitCast(di32, normal16));
+  return BitCast(df16, DemoteTo(du16, bits16));
+}
+
+#endif  // HWY_NATIVE_F16C
+
+// ------------------------------ OrderedTruncate2To
+
+#if HWY_IDE || \
+    (defined(HWY_NATIVE_ORDERED_TRUNCATE_2_TO) == defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#undef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#else
+#define HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#endif
+
+// (Must come after HWY_TARGET_TOGGLE, else we don't reset it for scalar)
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+template <class DN, HWY_IF_UNSIGNED_D(DN), class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          HWY_IF_LANES_D(DFromV<VFromD<DN>>, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<DN> OrderedTruncate2To(DN dn, V a, V b) {
+  return ConcatEven(dn, BitCast(dn, b), BitCast(dn, a));
+}
+#endif  // HWY_TARGET != HWY_SCALAR
+#endif  // HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#if (defined(HWY_NATIVE_LEADING_ZERO_COUNT) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_LEADING_ZERO_COUNT
+#undef HWY_NATIVE_LEADING_ZERO_COUNT
+#else
+#define HWY_NATIVE_LEADING_ZERO_COUNT
+#endif
+
+namespace detail {
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const RebindToFloat<decltype(d)> df;
+#if HWY_TARGET > HWY_AVX3 && HWY_TARGET <= HWY_SSE2
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int16_t, decltype(d)> di16;
+
+  // On SSE2/SSSE3/SSE4/AVX2, do an int32_t to float conversion, followed
+  // by a unsigned right shift of the uint32_t bit representation of the
+  // floating point values by 23, followed by an int16_t Min
+  // operation as we are only interested in the biased exponent that would
+  // result from a uint32_t to float conversion.
+
+  // An int32_t to float vector conversion is also much more efficient on
+  // SSE2/SSSE3/SSE4/AVX2 than an uint32_t vector to float vector conversion
+  // as an uint32_t vector to float vector conversion on SSE2/SSSE3/SSE4/AVX2
+  // requires multiple instructions whereas an int32_t to float vector
+  // conversion can be carried out using a single instruction on
+  // SSE2/SSSE3/SSE4/AVX2.
+
+  const auto f32_bits = BitCast(d, ConvertTo(df, BitCast(di, v)));
+  return BitCast(d, Min(BitCast(di16, ShiftRight<23>(f32_bits)),
+                        BitCast(di16, Set(d, 158))));
+#else
+  const auto f32_bits = BitCast(d, ConvertTo(df, v));
+  return BitCast(d, ShiftRight<23>(f32_bits));
+#endif
+}
+
+template <class V, HWY_IF_U32_D(DFromV<V>)>
+HWY_INLINE V I32RangeU32ToF32BiasedExp(V v) {
+  // I32RangeU32ToF32BiasedExp is similar to UIntToF32BiasedExp, but
+  // I32RangeU32ToF32BiasedExp assumes that v[i] is between 0 and 2147483647.
+  const DFromV<decltype(v)> d;
+  const RebindToFloat<decltype(d)> df;
+#if HWY_TARGET > HWY_AVX3 && HWY_TARGET <= HWY_SSE2
+  const RebindToSigned<decltype(d)> d_src;
+#else
+  const RebindToUnsigned<decltype(d)> d_src;
+#endif
+  const auto f32_bits = BitCast(d, ConvertTo(df, BitCast(d_src, v)));
+  return ShiftRight<23>(f32_bits);
+}
+
+template <class D, HWY_IF_U16_D(D), HWY_IF_LANES_LE_D(D, HWY_MAX_BYTES / 4)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const Rebind<uint32_t, decltype(d)> du32;
+  const auto f32_biased_exp_as_u32 =
+      I32RangeU32ToF32BiasedExp(PromoteTo(du32, v));
+  return TruncateTo(d, f32_biased_exp_as_u32);
+}
+
+#if HWY_TARGET != HWY_SCALAR
+template <class D, HWY_IF_U16_D(D), HWY_IF_LANES_GT_D(D, HWY_MAX_BYTES / 4)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const Rebind<uint32_t, decltype(dh)> du32;
+
+  const auto lo_u32 = PromoteTo(du32, LowerHalf(dh, v));
+  const auto hi_u32 = PromoteTo(du32, UpperHalf(dh, v));
+
+  const auto lo_f32_biased_exp_as_u32 = I32RangeU32ToF32BiasedExp(lo_u32);
+  const auto hi_f32_biased_exp_as_u32 = I32RangeU32ToF32BiasedExp(hi_u32);
+#if HWY_TARGET <= HWY_SSE2
+  const RebindToSigned<decltype(du32)> di32;
+  const RebindToSigned<decltype(d)> di;
+  return BitCast(d,
+                 OrderedDemote2To(di, BitCast(di32, lo_f32_biased_exp_as_u32),
+                                  BitCast(di32, hi_f32_biased_exp_as_u32)));
+#else
+  return OrderedTruncate2To(d, lo_f32_biased_exp_as_u32,
+                            hi_f32_biased_exp_as_u32);
+#endif
+}
+#endif  // HWY_TARGET != HWY_SCALAR
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_LANES_LE_D(D, HWY_MAX_BYTES / 4)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const Rebind<uint32_t, decltype(d)> du32;
+  const auto f32_biased_exp_as_u32 =
+      I32RangeU32ToF32BiasedExp(PromoteTo(du32, v));
+  return U8FromU32(f32_biased_exp_as_u32);
+}
+
+#if HWY_TARGET != HWY_SCALAR
+template <class D, HWY_IF_U8_D(D), HWY_IF_LANES_GT_D(D, HWY_MAX_BYTES / 4),
+          HWY_IF_LANES_LE_D(D, HWY_MAX_BYTES / 2)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const Rebind<uint32_t, decltype(dh)> du32;
+  const Repartition<uint16_t, decltype(du32)> du16;
+
+  const auto lo_u32 = PromoteTo(du32, LowerHalf(dh, v));
+  const auto hi_u32 = PromoteTo(du32, UpperHalf(dh, v));
+
+  const auto lo_f32_biased_exp_as_u32 = I32RangeU32ToF32BiasedExp(lo_u32);
+  const auto hi_f32_biased_exp_as_u32 = I32RangeU32ToF32BiasedExp(hi_u32);
+
+#if HWY_TARGET <= HWY_SSE2
+  const RebindToSigned<decltype(du32)> di32;
+  const RebindToSigned<decltype(du16)> di16;
+  const auto f32_biased_exp_as_i16 =
+      OrderedDemote2To(di16, BitCast(di32, lo_f32_biased_exp_as_u32),
+                       BitCast(di32, hi_f32_biased_exp_as_u32));
+  return DemoteTo(d, f32_biased_exp_as_i16);
+#else
+  const auto f32_biased_exp_as_u16 = OrderedTruncate2To(
+      du16, lo_f32_biased_exp_as_u32, hi_f32_biased_exp_as_u32);
+  return TruncateTo(d, f32_biased_exp_as_u16);
+#endif
+}
+
+template <class D, HWY_IF_U8_D(D), HWY_IF_LANES_GT_D(D, HWY_MAX_BYTES / 2)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const Half<decltype(dh)> dq;
+  const Rebind<uint32_t, decltype(dq)> du32;
+  const Repartition<uint16_t, decltype(du32)> du16;
+
+  const auto lo_half = LowerHalf(dh, v);
+  const auto hi_half = UpperHalf(dh, v);
+
+  const auto u32_q0 = PromoteTo(du32, LowerHalf(dq, lo_half));
+  const auto u32_q1 = PromoteTo(du32, UpperHalf(dq, lo_half));
+  const auto u32_q2 = PromoteTo(du32, LowerHalf(dq, hi_half));
+  const auto u32_q3 = PromoteTo(du32, UpperHalf(dq, hi_half));
+
+  const auto f32_biased_exp_as_u32_q0 = I32RangeU32ToF32BiasedExp(u32_q0);
+  const auto f32_biased_exp_as_u32_q1 = I32RangeU32ToF32BiasedExp(u32_q1);
+  const auto f32_biased_exp_as_u32_q2 = I32RangeU32ToF32BiasedExp(u32_q2);
+  const auto f32_biased_exp_as_u32_q3 = I32RangeU32ToF32BiasedExp(u32_q3);
+
+#if HWY_TARGET <= HWY_SSE2
+  const RebindToSigned<decltype(du32)> di32;
+  const RebindToSigned<decltype(du16)> di16;
+
+  const auto lo_f32_biased_exp_as_i16 =
+      OrderedDemote2To(di16, BitCast(di32, f32_biased_exp_as_u32_q0),
+                       BitCast(di32, f32_biased_exp_as_u32_q1));
+  const auto hi_f32_biased_exp_as_i16 =
+      OrderedDemote2To(di16, BitCast(di32, f32_biased_exp_as_u32_q2),
+                       BitCast(di32, f32_biased_exp_as_u32_q3));
+  return OrderedDemote2To(d, lo_f32_biased_exp_as_i16,
+                          hi_f32_biased_exp_as_i16);
+#else
+  const auto lo_f32_biased_exp_as_u16 = OrderedTruncate2To(
+      du16, f32_biased_exp_as_u32_q0, f32_biased_exp_as_u32_q1);
+  const auto hi_f32_biased_exp_as_u16 = OrderedTruncate2To(
+      du16, f32_biased_exp_as_u32_q2, f32_biased_exp_as_u32_q3);
+  return OrderedTruncate2To(d, lo_f32_biased_exp_as_u16,
+                            hi_f32_biased_exp_as_u16);
+#endif
+}
+#endif  // HWY_TARGET != HWY_SCALAR
+
+#if HWY_TARGET == HWY_SCALAR
+template <class D>
+using F32ExpLzcntMinMaxRepartition = RebindToUnsigned<D>;
+#elif HWY_TARGET >= HWY_SSSE3 && HWY_TARGET <= HWY_SSE2
+template <class D>
+using F32ExpLzcntMinMaxRepartition = Repartition<uint8_t, D>;
+#else
+template <class D>
+using F32ExpLzcntMinMaxRepartition =
+    Repartition<UnsignedFromSize<HWY_MIN(sizeof(TFromD<D>), 4)>, D>;
+#endif
+
+template <class V>
+using F32ExpLzcntMinMaxCmpV = VFromD<F32ExpLzcntMinMaxRepartition<DFromV<V>>>;
+
+template <class V>
+HWY_INLINE F32ExpLzcntMinMaxCmpV<V> F32ExpLzcntMinMaxBitCast(V v) {
+  const DFromV<decltype(v)> d;
+  const F32ExpLzcntMinMaxRepartition<decltype(d)> d2;
+  return BitCast(d2, v);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> UIntToF32BiasedExp(D d, VFromD<D> v) {
+#if HWY_TARGET == HWY_SCALAR
+  const uint64_t u64_val = GetLane(v);
+  const float f32_val = static_cast<float>(u64_val);
+  uint32_t f32_bits;
+  CopySameSize(&f32_val, &f32_bits);
+  return Set(d, static_cast<uint64_t>(f32_bits >> 23));
+#else
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto f32_biased_exp = UIntToF32BiasedExp(du32, BitCast(du32, v));
+  const auto f32_biased_exp_adj =
+      IfThenZeroElse(Eq(f32_biased_exp, Zero(du32)),
+                     BitCast(du32, Set(d, 0x0000002000000000u)));
+  const auto adj_f32_biased_exp = Add(f32_biased_exp, f32_biased_exp_adj);
+
+  return ShiftRight<32>(BitCast(
+      d, Max(F32ExpLzcntMinMaxBitCast(adj_f32_biased_exp),
+             F32ExpLzcntMinMaxBitCast(Reverse2(du32, adj_f32_biased_exp)))));
+#endif
+}
+
+template <class V, HWY_IF_UNSIGNED_V(V)>
+HWY_INLINE V UIntToF32BiasedExp(V v) {
+  const DFromV<decltype(v)> d;
+  return UIntToF32BiasedExp(d, v);
+}
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_INLINE V NormalizeForUIntTruncConvToF32(V v) {
+  return v;
+}
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 4) | (1 << 8))>
+HWY_INLINE V NormalizeForUIntTruncConvToF32(V v) {
+  // If v[i] >= 16777216 is true, make sure that the bit at
+  // HighestSetBitIndex(v[i]) - 24 is zeroed out to ensure that any inexact
+  // conversion to single-precision floating point is rounded down.
+
+  // This zeroing-out can be accomplished through the AndNot operation below.
+  return AndNot(ShiftRight<24>(v), v);
+}
+
+}  // namespace detail
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  const auto f32_biased_exp = detail::UIntToF32BiasedExp(
+      detail::NormalizeForUIntTruncConvToF32(BitCast(du, v)));
+  return BitCast(d, Sub(f32_biased_exp, Set(du, TU{127})));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V LeadingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  constexpr TU kNumOfBitsInT{sizeof(TU) * 8};
+  const auto f32_biased_exp = detail::UIntToF32BiasedExp(
+      detail::NormalizeForUIntTruncConvToF32(BitCast(du, v)));
+  const auto lz_count = Sub(Set(du, TU{kNumOfBitsInT + 126}), f32_biased_exp);
+
+  return BitCast(d,
+                 Min(detail::F32ExpLzcntMinMaxBitCast(lz_count),
+                     detail::F32ExpLzcntMinMaxBitCast(Set(du, kNumOfBitsInT))));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V TrailingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;
+  using TU = TFromD<decltype(du)>;
+
+  const auto vi = BitCast(di, v);
+  const auto lowest_bit = BitCast(du, And(vi, Neg(vi)));
+
+  constexpr TU kNumOfBitsInT{sizeof(TU) * 8};
+  const auto f32_biased_exp = detail::UIntToF32BiasedExp(lowest_bit);
+  const auto tz_count = Sub(f32_biased_exp, Set(du, TU{127}));
+
+  return BitCast(d,
+                 Min(detail::F32ExpLzcntMinMaxBitCast(tz_count),
+                     detail::F32ExpLzcntMinMaxBitCast(Set(du, kNumOfBitsInT))));
+}
+#endif  // HWY_NATIVE_LEADING_ZERO_COUNT
+
+// ------------------------------ AESRound
+
+// Cannot implement on scalar: need at least 16 bytes for TableLookupBytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+
+// Define for white-box testing, even if native instructions are available.
+namespace detail {
+
+// Constant-time: computes inverse in GF(2^4) based on "Accelerating AES with
+// Vector Permute Instructions" and the accompanying assembly language
+// implementation: https://crypto.stanford.edu/vpaes/vpaes.tgz. See also Botan:
+// https://botan.randombit.net/doxygen/aes__vperm_8cpp_source.html .
+//
+// A brute-force 256 byte table lookup can also be made constant-time, and
+// possibly competitive on NEON, but this is more performance-portable
+// especially for x86 and large vectors.
+
+template <class V>  // u8
+HWY_INLINE V SubBytesMulInverseAndAffineLookup(V state, V affine_tblL,
+                                               V affine_tblU) {
+  const DFromV<V> du;
+  const auto mask = Set(du, uint8_t{0xF});
+
+  // Change polynomial basis to GF(2^4)
+  {
+    alignas(16) static constexpr uint8_t basisL[16] = {
+        0x00, 0x70, 0x2A, 0x5A, 0x98, 0xE8, 0xB2, 0xC2,
+        0x08, 0x78, 0x22, 0x52, 0x90, 0xE0, 0xBA, 0xCA};
+    alignas(16) static constexpr uint8_t basisU[16] = {
+        0x00, 0x4D, 0x7C, 0x31, 0x7D, 0x30, 0x01, 0x4C,
+        0x81, 0xCC, 0xFD, 0xB0, 0xFC, 0xB1, 0x80, 0xCD};
+    const auto sL = And(state, mask);
+    const auto sU = ShiftRight<4>(state);  // byte shift => upper bits are zero
+    const auto gf4L = TableLookupBytes(LoadDup128(du, basisL), sL);
+    const auto gf4U = TableLookupBytes(LoadDup128(du, basisU), sU);
+    state = Xor(gf4L, gf4U);
+  }
+
+  // Inversion in GF(2^4). Elements 0 represent "infinity" (division by 0) and
+  // cause TableLookupBytesOr0 to return 0.
+  alignas(16) static constexpr uint8_t kZetaInv[16] = {
+      0x80, 7, 11, 15, 6, 10, 4, 1, 9, 8, 5, 2, 12, 14, 13, 3};
+  alignas(16) static constexpr uint8_t kInv[16] = {
+      0x80, 1, 8, 13, 15, 6, 5, 14, 2, 12, 11, 10, 9, 3, 7, 4};
+  const auto tbl = LoadDup128(du, kInv);
+  const auto sL = And(state, mask);      // L=low nibble, U=upper
+  const auto sU = ShiftRight<4>(state);  // byte shift => upper bits are zero
+  const auto sX = Xor(sU, sL);
+  const auto invL = TableLookupBytes(LoadDup128(du, kZetaInv), sL);
+  const auto invU = TableLookupBytes(tbl, sU);
+  const auto invX = TableLookupBytes(tbl, sX);
+  const auto outL = Xor(sX, TableLookupBytesOr0(tbl, Xor(invL, invU)));
+  const auto outU = Xor(sU, TableLookupBytesOr0(tbl, Xor(invL, invX)));
+
+  const auto affL = TableLookupBytesOr0(affine_tblL, outL);
+  const auto affU = TableLookupBytesOr0(affine_tblU, outU);
+  return Xor(affL, affU);
+}
+
+template <class V>  // u8
+HWY_INLINE V SubBytes(V state) {
+  const DFromV<V> du;
+  // Linear skew (cannot bake 0x63 bias into the table because out* indices
+  // may have the infinity flag set).
+  alignas(16) static constexpr uint8_t kAffineL[16] = {
+      0x00, 0xC7, 0xBD, 0x6F, 0x17, 0x6D, 0xD2, 0xD0,
+      0x78, 0xA8, 0x02, 0xC5, 0x7A, 0xBF, 0xAA, 0x15};
+  alignas(16) static constexpr uint8_t kAffineU[16] = {
+      0x00, 0x6A, 0xBB, 0x5F, 0xA5, 0x74, 0xE4, 0xCF,
+      0xFA, 0x35, 0x2B, 0x41, 0xD1, 0x90, 0x1E, 0x8E};
+  return Xor(SubBytesMulInverseAndAffineLookup(state, LoadDup128(du, kAffineL),
+                                               LoadDup128(du, kAffineU)),
+             Set(du, uint8_t{0x63}));
+}
+
+template <class V>  // u8
+HWY_INLINE V InvSubBytes(V state) {
+  const DFromV<V> du;
+  alignas(16) static constexpr uint8_t kGF2P4InvToGF2P8InvL[16]{
+      0x00, 0x40, 0xF9, 0x7E, 0x53, 0xEA, 0x87, 0x13,
+      0x2D, 0x3E, 0x94, 0xD4, 0xB9, 0x6D, 0xAA, 0xC7};
+  alignas(16) static constexpr uint8_t kGF2P4InvToGF2P8InvU[16]{
+      0x00, 0x1D, 0x44, 0x93, 0x0F, 0x56, 0xD7, 0x12,
+      0x9C, 0x8E, 0xC5, 0xD8, 0x59, 0x81, 0x4B, 0xCA};
+
+  // Apply the inverse affine transformation
+  const auto b = Xor(Xor3(Or(ShiftLeft<1>(state), ShiftRight<7>(state)),
+                          Or(ShiftLeft<3>(state), ShiftRight<5>(state)),
+                          Or(ShiftLeft<6>(state), ShiftRight<2>(state))),
+                     Set(du, uint8_t{0x05}));
+
+  // The GF(2^8) multiplicative inverse is computed as follows:
+  // - Changing the polynomial basis to GF(2^4)
+  // - Computing the GF(2^4) multiplicative inverse
+  // - Converting the GF(2^4) multiplicative inverse to the GF(2^8)
+  //   multiplicative inverse through table lookups using the
+  //   kGF2P4InvToGF2P8InvL and kGF2P4InvToGF2P8InvU tables
+  return SubBytesMulInverseAndAffineLookup(
+      b, LoadDup128(du, kGF2P4InvToGF2P8InvL),
+      LoadDup128(du, kGF2P4InvToGF2P8InvU));
+}
+
+}  // namespace detail
+
+#endif  // HWY_TARGET != HWY_SCALAR
+
+// "Include guard": skip if native AES instructions are available.
+#if (defined(HWY_NATIVE_AES) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_AES
+#undef HWY_NATIVE_AES
+#else
+#define HWY_NATIVE_AES
+#endif
+
+// (Must come after HWY_TARGET_TOGGLE, else we don't reset it for scalar)
+#if HWY_TARGET != HWY_SCALAR
+
+namespace detail {
+
+template <class V>  // u8
+HWY_API V ShiftRows(const V state) {
+  const DFromV<V> du;
+  alignas(16) static constexpr uint8_t kShiftRow[16] = {
+      0,  5,  10, 15,  // transposed: state is column major
+      4,  9,  14, 3,   //
+      8,  13, 2,  7,   //
+      12, 1,  6,  11};
+  const auto shift_row = LoadDup128(du, kShiftRow);
+  return TableLookupBytes(state, shift_row);
+}
+
+template <class V>  // u8
+HWY_API V InvShiftRows(const V state) {
+  const DFromV<V> du;
+  alignas(16) static constexpr uint8_t kShiftRow[16] = {
+      0,  13, 10, 7,   // transposed: state is column major
+      4,  1,  14, 11,  //
+      8,  5,  2,  15,  //
+      12, 9,  6,  3};
+  const auto shift_row = LoadDup128(du, kShiftRow);
+  return TableLookupBytes(state, shift_row);
+}
+
+template <class V>  // u8
+HWY_API V GF2P8Mod11BMulBy2(V v) {
+  const DFromV<V> du;
+  const RebindToSigned<decltype(du)> di;  // can only do signed comparisons
+  const auto msb = Lt(BitCast(di, v), Zero(di));
+  const auto overflow = BitCast(du, IfThenElseZero(msb, Set(di, int8_t{0x1B})));
+  return Xor(Add(v, v), overflow);  // = v*2 in GF(2^8).
+}
+
+template <class V>  // u8
+HWY_API V MixColumns(const V state) {
+  const DFromV<V> du;
+  // For each column, the rows are the sum of GF(2^8) matrix multiplication by:
+  // 2 3 1 1  // Let s := state*1, d := state*2, t := state*3.
+  // 1 2 3 1  // d are on diagonal, no permutation needed.
+  // 1 1 2 3  // t1230 indicates column indices of threes for the 4 rows.
+  // 3 1 1 2  // We also need to compute s2301 and s3012 (=1230 o 2301).
+  alignas(16) static constexpr uint8_t k2301[16] = {
+      2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
+  alignas(16) static constexpr uint8_t k1230[16] = {
+      1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12};
+  const auto d = GF2P8Mod11BMulBy2(state);  // = state*2 in GF(2^8).
+  const auto s2301 = TableLookupBytes(state, LoadDup128(du, k2301));
+  const auto d_s2301 = Xor(d, s2301);
+  const auto t_s2301 = Xor(state, d_s2301);  // t(s*3) = XOR-sum {s, d(s*2)}
+  const auto t1230_s3012 = TableLookupBytes(t_s2301, LoadDup128(du, k1230));
+  return Xor(d_s2301, t1230_s3012);  // XOR-sum of 4 terms
+}
+
+template <class V>  // u8
+HWY_API V InvMixColumns(const V state) {
+  const DFromV<V> du;
+  // For each column, the rows are the sum of GF(2^8) matrix multiplication by:
+  // 14 11 13  9
+  //  9 14 11 13
+  // 13  9 14 11
+  // 11 13  9 14
+  alignas(16) static constexpr uint8_t k2301[16] = {
+      2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
+  alignas(16) static constexpr uint8_t k1230[16] = {
+      1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12};
+  const auto v1230 = LoadDup128(du, k1230);
+
+  const auto sx2 = GF2P8Mod11BMulBy2(state); /* = state*2 in GF(2^8) */
+  const auto sx4 = GF2P8Mod11BMulBy2(sx2);   /* = state*4 in GF(2^8) */
+  const auto sx8 = GF2P8Mod11BMulBy2(sx4);   /* = state*8 in GF(2^8) */
+  const auto sx9 = Xor(sx8, state);          /* = state*9 in GF(2^8) */
+  const auto sx11 = Xor(sx9, sx2);           /* = state*11 in GF(2^8) */
+  const auto sx13 = Xor(sx9, sx4);           /* = state*13 in GF(2^8) */
+  const auto sx14 = Xor3(sx8, sx4, sx2);     /* = state*14 in GF(2^8) */
+
+  const auto sx13_0123_sx9_1230 = Xor(sx13, TableLookupBytes(sx9, v1230));
+  const auto sx14_0123_sx11_1230 = Xor(sx14, TableLookupBytes(sx11, v1230));
+  const auto sx13_2301_sx9_3012 =
+      TableLookupBytes(sx13_0123_sx9_1230, LoadDup128(du, k2301));
+  return Xor(sx14_0123_sx11_1230, sx13_2301_sx9_3012);
+}
+
+}  // namespace detail
+
+template <class V>  // u8
+HWY_API V AESRound(V state, const V round_key) {
+  // Intel docs swap the first two steps, but it does not matter because
+  // ShiftRows is a permutation and SubBytes is independent of lane index.
+  state = detail::SubBytes(state);
+  state = detail::ShiftRows(state);
+  state = detail::MixColumns(state);
+  state = Xor(state, round_key);  // AddRoundKey
+  return state;
+}
+
+template <class V>  // u8
+HWY_API V AESLastRound(V state, const V round_key) {
+  // LIke AESRound, but without MixColumns.
+  state = detail::SubBytes(state);
+  state = detail::ShiftRows(state);
+  state = Xor(state, round_key);  // AddRoundKey
+  return state;
+}
+
+template <class V>
+HWY_API V AESInvMixColumns(V state) {
+  return detail::InvMixColumns(state);
+}
+
+template <class V>  // u8
+HWY_API V AESRoundInv(V state, const V round_key) {
+  state = detail::InvSubBytes(state);
+  state = detail::InvShiftRows(state);
+  state = detail::InvMixColumns(state);
+  state = Xor(state, round_key);  // AddRoundKey
+  return state;
+}
+
+template <class V>  // u8
+HWY_API V AESLastRoundInv(V state, const V round_key) {
+  // Like AESRoundInv, but without InvMixColumns.
+  state = detail::InvSubBytes(state);
+  state = detail::InvShiftRows(state);
+  state = Xor(state, round_key);  // AddRoundKey
+  return state;
+}
+
+template <uint8_t kRcon, class V, HWY_IF_U8_D(DFromV<V>)>
+HWY_API V AESKeyGenAssist(V v) {
+  alignas(16) static constexpr uint8_t kRconXorMask[16] = {
+      0, 0, 0, 0, kRcon, 0, 0, 0, 0, 0, 0, 0, kRcon, 0, 0, 0};
+  alignas(16) static constexpr uint8_t kRotWordShuffle[16] = {
+      4, 5, 6, 7, 5, 6, 7, 4, 12, 13, 14, 15, 13, 14, 15, 12};
+  const DFromV<decltype(v)> d;
+  const auto sub_word_result = detail::SubBytes(v);
+  const auto rot_word_result =
+      TableLookupBytes(sub_word_result, LoadDup128(d, kRotWordShuffle));
+  return Xor(rot_word_result, LoadDup128(d, kRconXorMask));
+}
+
+// Constant-time implementation inspired by
+// https://www.bearssl.org/constanttime.html, but about half the cost because we
+// use 64x64 multiplies and 128-bit XORs.
+template <class V>
+HWY_API V CLMulLower(V a, V b) {
+  const DFromV<V> d;
+  static_assert(IsSame<TFromD<decltype(d)>, uint64_t>(), "V must be u64");
+  const auto k1 = Set(d, 0x1111111111111111ULL);
+  const auto k2 = Set(d, 0x2222222222222222ULL);
+  const auto k4 = Set(d, 0x4444444444444444ULL);
+  const auto k8 = Set(d, 0x8888888888888888ULL);
+  const auto a0 = And(a, k1);
+  const auto a1 = And(a, k2);
+  const auto a2 = And(a, k4);
+  const auto a3 = And(a, k8);
+  const auto b0 = And(b, k1);
+  const auto b1 = And(b, k2);
+  const auto b2 = And(b, k4);
+  const auto b3 = And(b, k8);
+
+  auto m0 = Xor(MulEven(a0, b0), MulEven(a1, b3));
+  auto m1 = Xor(MulEven(a0, b1), MulEven(a1, b0));
+  auto m2 = Xor(MulEven(a0, b2), MulEven(a1, b1));
+  auto m3 = Xor(MulEven(a0, b3), MulEven(a1, b2));
+  m0 = Xor(m0, Xor(MulEven(a2, b2), MulEven(a3, b1)));
+  m1 = Xor(m1, Xor(MulEven(a2, b3), MulEven(a3, b2)));
+  m2 = Xor(m2, Xor(MulEven(a2, b0), MulEven(a3, b3)));
+  m3 = Xor(m3, Xor(MulEven(a2, b1), MulEven(a3, b0)));
+  return Or(Or(And(m0, k1), And(m1, k2)), Or(And(m2, k4), And(m3, k8)));
+}
+
+template <class V>
+HWY_API V CLMulUpper(V a, V b) {
+  const DFromV<V> d;
+  static_assert(IsSame<TFromD<decltype(d)>, uint64_t>(), "V must be u64");
+  const auto k1 = Set(d, 0x1111111111111111ULL);
+  const auto k2 = Set(d, 0x2222222222222222ULL);
+  const auto k4 = Set(d, 0x4444444444444444ULL);
+  const auto k8 = Set(d, 0x8888888888888888ULL);
+  const auto a0 = And(a, k1);
+  const auto a1 = And(a, k2);
+  const auto a2 = And(a, k4);
+  const auto a3 = And(a, k8);
+  const auto b0 = And(b, k1);
+  const auto b1 = And(b, k2);
+  const auto b2 = And(b, k4);
+  const auto b3 = And(b, k8);
+
+  auto m0 = Xor(MulOdd(a0, b0), MulOdd(a1, b3));
+  auto m1 = Xor(MulOdd(a0, b1), MulOdd(a1, b0));
+  auto m2 = Xor(MulOdd(a0, b2), MulOdd(a1, b1));
+  auto m3 = Xor(MulOdd(a0, b3), MulOdd(a1, b2));
+  m0 = Xor(m0, Xor(MulOdd(a2, b2), MulOdd(a3, b1)));
+  m1 = Xor(m1, Xor(MulOdd(a2, b3), MulOdd(a3, b2)));
+  m2 = Xor(m2, Xor(MulOdd(a2, b0), MulOdd(a3, b3)));
+  m3 = Xor(m3, Xor(MulOdd(a2, b1), MulOdd(a3, b0)));
+  return Or(Or(And(m0, k1), And(m1, k2)), Or(And(m2, k4), And(m3, k8)));
+}
+
+#endif  // HWY_NATIVE_AES
+#endif  // HWY_TARGET != HWY_SCALAR
+
+// ------------------------------ PopulationCount
+
+// "Include guard": skip if native POPCNT-related instructions are available.
+#if (defined(HWY_NATIVE_POPCNT) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+// This overload requires vectors to be at least 16 bytes, which is the case
+// for LMUL >= 2.
+#undef HWY_IF_POPCNT
+#if HWY_TARGET == HWY_RVV
+#define HWY_IF_POPCNT(D) \
+  hwy::EnableIf<D().Pow2() >= 1 && D().MaxLanes() >= 16>* = nullptr
+#else
+// Other targets only have these two overloads which are mutually exclusive, so
+// no further conditions are required.
+#define HWY_IF_POPCNT(D) void* = nullptr
+#endif  // HWY_TARGET == HWY_RVV
+
+template <class V, class D = DFromV<V>, HWY_IF_U8_D(D),
+          HWY_IF_V_SIZE_GT_D(D, 8), HWY_IF_POPCNT(D)>
+HWY_API V PopulationCount(V v) {
+  const D d;
+  HWY_ALIGN constexpr uint8_t kLookup[16] = {
+      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+  };
+  const auto lo = And(v, Set(d, uint8_t{0xF}));
+  const auto hi = ShiftRight<4>(v);
+  const auto lookup = LoadDup128(d, kLookup);
+  return Add(TableLookupBytes(lookup, hi), TableLookupBytes(lookup, lo));
+}
+
+// RVV has a specialization that avoids the Set().
+#if HWY_TARGET != HWY_RVV
+// Slower fallback for capped vectors.
+template <class V, class D = DFromV<V>, HWY_IF_U8_D(D),
+          HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API V PopulationCount(V v) {
+  const D d;
+  // See https://arxiv.org/pdf/1611.07612.pdf, Figure 3
+  const V k33 = Set(d, uint8_t{0x33});
+  v = Sub(v, And(ShiftRight<1>(v), Set(d, uint8_t{0x55})));
+  v = Add(And(ShiftRight<2>(v), k33), And(v, k33));
+  return And(Add(v, ShiftRight<4>(v)), Set(d, uint8_t{0x0F}));
+}
+#endif  // HWY_TARGET != HWY_RVV
+
+template <class V, class D = DFromV<V>, HWY_IF_U16_D(D)>
+HWY_API V PopulationCount(V v) {
+  const D d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  const auto vals = BitCast(d, PopulationCount(BitCast(d8, v)));
+  return Add(ShiftRight<8>(vals), And(vals, Set(d, uint16_t{0xFF})));
+}
+
+template <class V, class D = DFromV<V>, HWY_IF_U32_D(D)>
+HWY_API V PopulationCount(V v) {
+  const D d;
+  Repartition<uint16_t, decltype(d)> d16;
+  auto vals = BitCast(d, PopulationCount(BitCast(d16, v)));
+  return Add(ShiftRight<16>(vals), And(vals, Set(d, uint32_t{0xFF})));
+}
+
+#if HWY_HAVE_INTEGER64
+template <class V, class D = DFromV<V>, HWY_IF_U64_D(D)>
+HWY_API V PopulationCount(V v) {
+  const D d;
+  Repartition<uint32_t, decltype(d)> d32;
+  auto vals = BitCast(d, PopulationCount(BitCast(d32, v)));
+  return Add(ShiftRight<32>(vals), And(vals, Set(d, 0xFFULL)));
+}
+#endif
+
+#endif  // HWY_NATIVE_POPCNT
+
+// ------------------------------ 8-bit multiplication
+
+// "Include guard": skip if native 8-bit mul instructions are available.
+#if (defined(HWY_NATIVE_MUL_8) == defined(HWY_TARGET_TOGGLE)) || HWY_IDE
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+
+// 8 bit and fits in wider reg: promote
+template <class V, HWY_IF_T_SIZE_V(V, 1),
+          HWY_IF_V_SIZE_LE_V(V, HWY_MAX_BYTES / 2)>
+HWY_API V operator*(const V a, const V b) {
+  const DFromV<decltype(a)> d;
+  const Rebind<MakeWide<TFromV<V>>, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;    // TruncateTo result
+  const RebindToUnsigned<decltype(dw)> dwu;  // TruncateTo input
+  const VFromD<decltype(dw)> mul = PromoteTo(dw, a) * PromoteTo(dw, b);
+  // TruncateTo is cheaper than ConcatEven.
+  return BitCast(d, TruncateTo(du, BitCast(dwu, mul)));
+}
+
+// 8 bit full reg: promote halves
+template <class V, HWY_IF_T_SIZE_V(V, 1),
+          HWY_IF_V_SIZE_GT_V(V, HWY_MAX_BYTES / 2)>
+HWY_API V operator*(const V a, const V b) {
+  const DFromV<decltype(a)> d;
+  const Half<decltype(d)> dh;
+  const Twice<RepartitionToWide<decltype(dh)>> dw;
+  const VFromD<decltype(dw)> a0 = PromoteTo(dw, LowerHalf(dh, a));
+  const VFromD<decltype(dw)> a1 = PromoteTo(dw, UpperHalf(dh, a));
+  const VFromD<decltype(dw)> b0 = PromoteTo(dw, LowerHalf(dh, b));
+  const VFromD<decltype(dw)> b1 = PromoteTo(dw, UpperHalf(dh, b));
+  const VFromD<decltype(dw)> m0 = a0 * b0;
+  const VFromD<decltype(dw)> m1 = a1 * b1;
+  return ConcatEven(d, BitCast(d, m1), BitCast(d, m0));
+}
+
+#endif  // HWY_NATIVE_MUL_8
+
+// ------------------------------ 64-bit multiplication
+
+// "Include guard": skip if native 64-bit mul instructions are available.
+#if (defined(HWY_NATIVE_MUL_64) == defined(HWY_TARGET_TOGGLE)) || HWY_IDE
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+// Single-lane i64 or u64
+template <class V, HWY_IF_T_SIZE_V(V, 8), HWY_IF_V_SIZE_V(V, 8),
+          HWY_IF_NOT_FLOAT_V(V)>
+HWY_API V operator*(V x, V y) {
+  const DFromV<V> d;
+  using T = TFromD<decltype(d)>;
+  using TU = MakeUnsigned<T>;
+  const TU xu = static_cast<TU>(GetLane(x));
+  const TU yu = static_cast<TU>(GetLane(y));
+  return Set(d, static_cast<T>(xu * yu));
+}
+
+template <class V, class D64 = DFromV<V>, HWY_IF_U64_D(D64),
+          HWY_IF_V_SIZE_GT_D(D64, 8)>
+HWY_API V operator*(V x, V y) {
+  RepartitionToNarrow<D64> d32;
+  auto x32 = BitCast(d32, x);
+  auto y32 = BitCast(d32, y);
+  auto lolo = BitCast(d32, MulEven(x32, y32));
+  auto lohi = BitCast(d32, MulEven(x32, BitCast(d32, ShiftRight<32>(y))));
+  auto hilo = BitCast(d32, MulEven(BitCast(d32, ShiftRight<32>(x)), y32));
+  auto hi = BitCast(d32, ShiftLeft<32>(BitCast(D64{}, lohi + hilo)));
+  return BitCast(D64{}, lolo + hi);
+}
+template <class V, class DI64 = DFromV<V>, HWY_IF_I64_D(DI64),
+          HWY_IF_V_SIZE_GT_D(DI64, 8)>
+HWY_API V operator*(V x, V y) {
+  RebindToUnsigned<DI64> du64;
+  return BitCast(DI64{}, BitCast(du64, x) * BitCast(du64, y));
+}
+
+#endif  // HWY_NATIVE_MUL_64
+
+// ------------------------------ MulAdd / NegMulAdd
+
+// "Include guard": skip if native int MulAdd instructions are available.
+#if (defined(HWY_NATIVE_INT_FMA) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_INT_FMA
+#undef HWY_NATIVE_INT_FMA
+#else
+#define HWY_NATIVE_INT_FMA
+#endif
+
+template <class V, HWY_IF_NOT_FLOAT_V(V)>
+HWY_API V MulAdd(V mul, V x, V add) {
+  return Add(Mul(mul, x), add);
+}
+
+template <class V, HWY_IF_NOT_FLOAT_V(V)>
+HWY_API V NegMulAdd(V mul, V x, V add) {
+  return Sub(add, Mul(mul, x));
+}
+
+#endif  // HWY_NATIVE_INT_FMA
+
+// ------------------------------ SatWidenMulPairwiseAdd
+
+#if (defined(HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#undef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#else
+#define HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#endif
+
+template <class DI16, class VU8, class VI8,
+          class VU8_2 = Vec<Repartition<uint8_t, DI16>>, HWY_IF_I16_D(DI16),
+          HWY_IF_U8_D(DFromV<VU8>), HWY_IF_I8_D(DFromV<VI8>),
+          HWY_IF_LANES_D(DFromV<VU8>, HWY_MAX_LANES_V(VI8)),
+          HWY_IF_LANES_D(DFromV<VU8>, HWY_MAX_LANES_V(VU8_2))>
+HWY_API Vec<DI16> SatWidenMulPairwiseAdd(DI16 di16, VU8 a, VI8 b) {
+  const RebindToUnsigned<decltype(di16)> du16;
+
+  const auto a0 = And(BitCast(di16, a), Set(di16, int16_t{0x00FF}));
+  const auto b0 = ShiftRight<8>(ShiftLeft<8>(BitCast(di16, b)));
+
+  const auto a1 = BitCast(di16, ShiftRight<8>(BitCast(du16, a)));
+  const auto b1 = ShiftRight<8>(BitCast(di16, b));
+
+  return SaturatedAdd(Mul(a0, b0), Mul(a1, b1));
+}
+
+#endif
+
+// ------------------------------ SumOfMulQuadAccumulate
+
+#if (defined(HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(DI32 di32,
+                                            VFromD<Repartition<int8_t, DI32>> a,
+                                            VFromD<Repartition<int8_t, DI32>> b,
+                                            VFromD<DI32> sum) {
+  const Repartition<int16_t, decltype(di32)> di16;
+
+  const auto a0 = ShiftRight<8>(ShiftLeft<8>(BitCast(di16, a)));
+  const auto b0 = ShiftRight<8>(ShiftLeft<8>(BitCast(di16, b)));
+
+  const auto a1 = ShiftRight<8>(BitCast(di16, a));
+  const auto b1 = ShiftRight<8>(BitCast(di16, b));
+
+  return Add(sum, Add(WidenMulPairwiseAdd(di32, a0, b0),
+                      WidenMulPairwiseAdd(di32, a1, b1)));
+}
+
+#endif
+
+#if (defined(HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DU32, HWY_IF_U32_D(DU32)>
+HWY_API VFromD<DU32> SumOfMulQuadAccumulate(
+    DU32 du32, VFromD<Repartition<uint8_t, DU32>> a,
+    VFromD<Repartition<uint8_t, DU32>> b, VFromD<DU32> sum) {
+  const Repartition<uint16_t, decltype(du32)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  const RebindToSigned<decltype(du32)> di32;
+
+  const auto lo8_mask = Set(di16, int16_t{0x00FF});
+  const auto a0 = And(BitCast(di16, a), lo8_mask);
+  const auto b0 = And(BitCast(di16, b), lo8_mask);
+
+  const auto a1 = BitCast(di16, ShiftRight<8>(BitCast(du16, a)));
+  const auto b1 = BitCast(di16, ShiftRight<8>(BitCast(du16, b)));
+
+  return Add(sum, Add(BitCast(du32, WidenMulPairwiseAdd(di32, a0, b0)),
+                      BitCast(du32, WidenMulPairwiseAdd(di32, a1, b1))));
+}
+
+#endif
+
+#if (defined(HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(
+    DI32 di32, VFromD<Repartition<uint8_t, DI32>> a_u,
+    VFromD<Repartition<int8_t, DI32>> b_i, VFromD<DI32> sum) {
+  const Repartition<int16_t, decltype(di32)> di16;
+  const RebindToUnsigned<decltype(di16)> du16;
+
+  const auto a0 = And(BitCast(di16, a_u), Set(di16, int16_t{0x00FF}));
+  const auto b0 = ShiftRight<8>(ShiftLeft<8>(BitCast(di16, b_i)));
+
+  const auto a1 = BitCast(di16, ShiftRight<8>(BitCast(du16, a_u)));
+  const auto b1 = ShiftRight<8>(BitCast(di16, b_i));
+
+  // NOTE: SatWidenMulPairwiseAdd(di16, a_u, b_i) cannot be used in
+  // SumOfMulQuadAccumulate as it is possible for
+  // a_u[0]*b_i[0]+a_u[1]*b_i[1] to overflow an int16_t if a_u[0], b_i[0],
+  // a_u[1], and b_i[1] are all non-zero and b_i[0] and b_i[1] have the same
+  // sign.
+
+  return Add(sum, Add(WidenMulPairwiseAdd(di32, a0, b0),
+                      WidenMulPairwiseAdd(di32, a1, b1)));
+}
+
+#endif
+
+#if (defined(HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+#endif
+
+#if HWY_HAVE_INTEGER64
+template <class DI64, HWY_IF_I64_D(DI64)>
+HWY_API VFromD<DI64> SumOfMulQuadAccumulate(
+    DI64 di64, VFromD<Repartition<int16_t, DI64>> a,
+    VFromD<Repartition<int16_t, DI64>> b, VFromD<DI64> sum) {
+  const Repartition<int32_t, decltype(di64)> di32;
+
+  // WidenMulPairwiseAdd(di32, a, b) is okay here as
+  // a[0]*b[0]+a[1]*b[1] is between -2147418112 and 2147483648 and as
+  // a[0]*b[0]+a[1]*b[1] can only overflow an int32_t if
+  // a[0], b[0], a[1], and b[1] are all equal to -32768.
+
+  const auto i32_pairwise_sum = WidenMulPairwiseAdd(di32, a, b);
+  const auto i32_pairwise_sum_overflow =
+      VecFromMask(di32, Eq(i32_pairwise_sum, Set(di32, LimitsMin<int32_t>())));
+
+  // The upper 32 bits of sum0 and sum1 need to be zeroed out in the case of
+  // overflow.
+  const auto hi32_mask = Set(di64, static_cast<int64_t>(~int64_t{0xFFFFFFFF}));
+  const auto p0_zero_out_mask =
+      ShiftLeft<32>(BitCast(di64, i32_pairwise_sum_overflow));
+  const auto p1_zero_out_mask =
+      And(BitCast(di64, i32_pairwise_sum_overflow), hi32_mask);
+
+  const auto p0 =
+      AndNot(p0_zero_out_mask,
+             ShiftRight<32>(ShiftLeft<32>(BitCast(di64, i32_pairwise_sum))));
+  const auto p1 =
+      AndNot(p1_zero_out_mask, ShiftRight<32>(BitCast(di64, i32_pairwise_sum)));
+
+  return Add(sum, Add(p0, p1));
+}
+#endif  // HWY_HAVE_INTEGER64
+#endif  // HWY_NATIVE_I16_I16_SUMOFMULQUADACCUMULATE
+
+#if (defined(HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE) == \
+     defined(HWY_TARGET_TOGGLE))
+
+#ifdef HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+#endif
+
+#if HWY_HAVE_INTEGER64
+template <class DU64, HWY_IF_U64_D(DU64)>
+HWY_API VFromD<DU64> SumOfMulQuadAccumulate(
+    DU64 du64, VFromD<Repartition<uint16_t, DU64>> a,
+    VFromD<Repartition<uint16_t, DU64>> b, VFromD<DU64> sum) {
+  const auto u32_even_prod = MulEven(a, b);
+  const auto u32_odd_prod = MulOdd(a, b);
+
+  const auto lo32_mask = Set(du64, uint64_t{0xFFFFFFFFu});
+
+  const auto p0 = Add(And(BitCast(du64, u32_even_prod), lo32_mask),
+                      And(BitCast(du64, u32_odd_prod), lo32_mask));
+  const auto p1 = Add(ShiftRight<32>(BitCast(du64, u32_even_prod)),
+                      ShiftRight<32>(BitCast(du64, u32_odd_prod)));
+
+  return Add(sum, Add(p0, p1));
+}
+#endif  // HWY_HAVE_INTEGER64
+#endif  // HWY_NATIVE_U16_U16_SUMOFMULQUADACCUMULATE
+
+// ------------------------------ F64 ApproximateReciprocal
+
+#if (defined(HWY_NATIVE_F64_APPROX_RECIP) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+#if HWY_HAVE_FLOAT64
+template <class V, HWY_IF_F64_D(DFromV<V>)>
+HWY_API V ApproximateReciprocal(V v) {
+  const DFromV<decltype(v)> d;
+  return Div(Set(d, 1.0), v);
+}
+#endif  // HWY_HAVE_FLOAT64
+
+#endif  // HWY_NATIVE_F64_APPROX_RECIP
+
+// ------------------------------ F64 ApproximateReciprocalSqrt
+
+#if (defined(HWY_NATIVE_F64_APPROX_RSQRT) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+#if HWY_HAVE_FLOAT64
+template <class V, HWY_IF_F64_D(DFromV<V>)>
+HWY_API V ApproximateReciprocalSqrt(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto half = Mul(v, Set(d, 0.5));
+  // Initial guess based on log2(f)
+  const auto guess = BitCast(d, Sub(Set(du, uint64_t{0x5FE6EB50C7B537A9u}),
+                                    ShiftRight<1>(BitCast(du, v))));
+  // One Newton-Raphson iteration
+  return Mul(guess, NegMulAdd(Mul(half, guess), guess, Set(d, 1.5)));
+}
+#endif  // HWY_HAVE_FLOAT64
+
+#endif  // HWY_NATIVE_F64_APPROX_RSQRT
+
+// ------------------------------ Compress*
+
+// "Include guard": skip if native 8-bit compress instructions are available.
+#if (defined(HWY_NATIVE_COMPRESS8) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_COMPRESS8
+#undef HWY_NATIVE_COMPRESS8
+#else
+#define HWY_NATIVE_COMPRESS8
+#endif
+
+template <class V, class D, typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API size_t CompressBitsStore(V v, const uint8_t* HWY_RESTRICT bits, D d,
+                                 T* unaligned) {
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  Store(v, d, lanes);
+
+  const Simd<T, HWY_MIN(MaxLanes(d), 8), 0> d8;
+  T* HWY_RESTRICT pos = unaligned;
+
+  HWY_ALIGN constexpr T table[2048] = {
+      0, 1, 2, 3, 4, 5, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      1, 0, 2, 3, 4, 5, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      2, 0, 1, 3, 4, 5, 6, 7, /**/ 0, 2, 1, 3, 4, 5, 6, 7,  //
+      1, 2, 0, 3, 4, 5, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      3, 0, 1, 2, 4, 5, 6, 7, /**/ 0, 3, 1, 2, 4, 5, 6, 7,  //
+      1, 3, 0, 2, 4, 5, 6, 7, /**/ 0, 1, 3, 2, 4, 5, 6, 7,  //
+      2, 3, 0, 1, 4, 5, 6, 7, /**/ 0, 2, 3, 1, 4, 5, 6, 7,  //
+      1, 2, 3, 0, 4, 5, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      4, 0, 1, 2, 3, 5, 6, 7, /**/ 0, 4, 1, 2, 3, 5, 6, 7,  //
+      1, 4, 0, 2, 3, 5, 6, 7, /**/ 0, 1, 4, 2, 3, 5, 6, 7,  //
+      2, 4, 0, 1, 3, 5, 6, 7, /**/ 0, 2, 4, 1, 3, 5, 6, 7,  //
+      1, 2, 4, 0, 3, 5, 6, 7, /**/ 0, 1, 2, 4, 3, 5, 6, 7,  //
+      3, 4, 0, 1, 2, 5, 6, 7, /**/ 0, 3, 4, 1, 2, 5, 6, 7,  //
+      1, 3, 4, 0, 2, 5, 6, 7, /**/ 0, 1, 3, 4, 2, 5, 6, 7,  //
+      2, 3, 4, 0, 1, 5, 6, 7, /**/ 0, 2, 3, 4, 1, 5, 6, 7,  //
+      1, 2, 3, 4, 0, 5, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      5, 0, 1, 2, 3, 4, 6, 7, /**/ 0, 5, 1, 2, 3, 4, 6, 7,  //
+      1, 5, 0, 2, 3, 4, 6, 7, /**/ 0, 1, 5, 2, 3, 4, 6, 7,  //
+      2, 5, 0, 1, 3, 4, 6, 7, /**/ 0, 2, 5, 1, 3, 4, 6, 7,  //
+      1, 2, 5, 0, 3, 4, 6, 7, /**/ 0, 1, 2, 5, 3, 4, 6, 7,  //
+      3, 5, 0, 1, 2, 4, 6, 7, /**/ 0, 3, 5, 1, 2, 4, 6, 7,  //
+      1, 3, 5, 0, 2, 4, 6, 7, /**/ 0, 1, 3, 5, 2, 4, 6, 7,  //
+      2, 3, 5, 0, 1, 4, 6, 7, /**/ 0, 2, 3, 5, 1, 4, 6, 7,  //
+      1, 2, 3, 5, 0, 4, 6, 7, /**/ 0, 1, 2, 3, 5, 4, 6, 7,  //
+      4, 5, 0, 1, 2, 3, 6, 7, /**/ 0, 4, 5, 1, 2, 3, 6, 7,  //
+      1, 4, 5, 0, 2, 3, 6, 7, /**/ 0, 1, 4, 5, 2, 3, 6, 7,  //
+      2, 4, 5, 0, 1, 3, 6, 7, /**/ 0, 2, 4, 5, 1, 3, 6, 7,  //
+      1, 2, 4, 5, 0, 3, 6, 7, /**/ 0, 1, 2, 4, 5, 3, 6, 7,  //
+      3, 4, 5, 0, 1, 2, 6, 7, /**/ 0, 3, 4, 5, 1, 2, 6, 7,  //
+      1, 3, 4, 5, 0, 2, 6, 7, /**/ 0, 1, 3, 4, 5, 2, 6, 7,  //
+      2, 3, 4, 5, 0, 1, 6, 7, /**/ 0, 2, 3, 4, 5, 1, 6, 7,  //
+      1, 2, 3, 4, 5, 0, 6, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      6, 0, 1, 2, 3, 4, 5, 7, /**/ 0, 6, 1, 2, 3, 4, 5, 7,  //
+      1, 6, 0, 2, 3, 4, 5, 7, /**/ 0, 1, 6, 2, 3, 4, 5, 7,  //
+      2, 6, 0, 1, 3, 4, 5, 7, /**/ 0, 2, 6, 1, 3, 4, 5, 7,  //
+      1, 2, 6, 0, 3, 4, 5, 7, /**/ 0, 1, 2, 6, 3, 4, 5, 7,  //
+      3, 6, 0, 1, 2, 4, 5, 7, /**/ 0, 3, 6, 1, 2, 4, 5, 7,  //
+      1, 3, 6, 0, 2, 4, 5, 7, /**/ 0, 1, 3, 6, 2, 4, 5, 7,  //
+      2, 3, 6, 0, 1, 4, 5, 7, /**/ 0, 2, 3, 6, 1, 4, 5, 7,  //
+      1, 2, 3, 6, 0, 4, 5, 7, /**/ 0, 1, 2, 3, 6, 4, 5, 7,  //
+      4, 6, 0, 1, 2, 3, 5, 7, /**/ 0, 4, 6, 1, 2, 3, 5, 7,  //
+      1, 4, 6, 0, 2, 3, 5, 7, /**/ 0, 1, 4, 6, 2, 3, 5, 7,  //
+      2, 4, 6, 0, 1, 3, 5, 7, /**/ 0, 2, 4, 6, 1, 3, 5, 7,  //
+      1, 2, 4, 6, 0, 3, 5, 7, /**/ 0, 1, 2, 4, 6, 3, 5, 7,  //
+      3, 4, 6, 0, 1, 2, 5, 7, /**/ 0, 3, 4, 6, 1, 2, 5, 7,  //
+      1, 3, 4, 6, 0, 2, 5, 7, /**/ 0, 1, 3, 4, 6, 2, 5, 7,  //
+      2, 3, 4, 6, 0, 1, 5, 7, /**/ 0, 2, 3, 4, 6, 1, 5, 7,  //
+      1, 2, 3, 4, 6, 0, 5, 7, /**/ 0, 1, 2, 3, 4, 6, 5, 7,  //
+      5, 6, 0, 1, 2, 3, 4, 7, /**/ 0, 5, 6, 1, 2, 3, 4, 7,  //
+      1, 5, 6, 0, 2, 3, 4, 7, /**/ 0, 1, 5, 6, 2, 3, 4, 7,  //
+      2, 5, 6, 0, 1, 3, 4, 7, /**/ 0, 2, 5, 6, 1, 3, 4, 7,  //
+      1, 2, 5, 6, 0, 3, 4, 7, /**/ 0, 1, 2, 5, 6, 3, 4, 7,  //
+      3, 5, 6, 0, 1, 2, 4, 7, /**/ 0, 3, 5, 6, 1, 2, 4, 7,  //
+      1, 3, 5, 6, 0, 2, 4, 7, /**/ 0, 1, 3, 5, 6, 2, 4, 7,  //
+      2, 3, 5, 6, 0, 1, 4, 7, /**/ 0, 2, 3, 5, 6, 1, 4, 7,  //
+      1, 2, 3, 5, 6, 0, 4, 7, /**/ 0, 1, 2, 3, 5, 6, 4, 7,  //
+      4, 5, 6, 0, 1, 2, 3, 7, /**/ 0, 4, 5, 6, 1, 2, 3, 7,  //
+      1, 4, 5, 6, 0, 2, 3, 7, /**/ 0, 1, 4, 5, 6, 2, 3, 7,  //
+      2, 4, 5, 6, 0, 1, 3, 7, /**/ 0, 2, 4, 5, 6, 1, 3, 7,  //
+      1, 2, 4, 5, 6, 0, 3, 7, /**/ 0, 1, 2, 4, 5, 6, 3, 7,  //
+      3, 4, 5, 6, 0, 1, 2, 7, /**/ 0, 3, 4, 5, 6, 1, 2, 7,  //
+      1, 3, 4, 5, 6, 0, 2, 7, /**/ 0, 1, 3, 4, 5, 6, 2, 7,  //
+      2, 3, 4, 5, 6, 0, 1, 7, /**/ 0, 2, 3, 4, 5, 6, 1, 7,  //
+      1, 2, 3, 4, 5, 6, 0, 7, /**/ 0, 1, 2, 3, 4, 5, 6, 7,  //
+      7, 0, 1, 2, 3, 4, 5, 6, /**/ 0, 7, 1, 2, 3, 4, 5, 6,  //
+      1, 7, 0, 2, 3, 4, 5, 6, /**/ 0, 1, 7, 2, 3, 4, 5, 6,  //
+      2, 7, 0, 1, 3, 4, 5, 6, /**/ 0, 2, 7, 1, 3, 4, 5, 6,  //
+      1, 2, 7, 0, 3, 4, 5, 6, /**/ 0, 1, 2, 7, 3, 4, 5, 6,  //
+      3, 7, 0, 1, 2, 4, 5, 6, /**/ 0, 3, 7, 1, 2, 4, 5, 6,  //
+      1, 3, 7, 0, 2, 4, 5, 6, /**/ 0, 1, 3, 7, 2, 4, 5, 6,  //
+      2, 3, 7, 0, 1, 4, 5, 6, /**/ 0, 2, 3, 7, 1, 4, 5, 6,  //
+      1, 2, 3, 7, 0, 4, 5, 6, /**/ 0, 1, 2, 3, 7, 4, 5, 6,  //
+      4, 7, 0, 1, 2, 3, 5, 6, /**/ 0, 4, 7, 1, 2, 3, 5, 6,  //
+      1, 4, 7, 0, 2, 3, 5, 6, /**/ 0, 1, 4, 7, 2, 3, 5, 6,  //
+      2, 4, 7, 0, 1, 3, 5, 6, /**/ 0, 2, 4, 7, 1, 3, 5, 6,  //
+      1, 2, 4, 7, 0, 3, 5, 6, /**/ 0, 1, 2, 4, 7, 3, 5, 6,  //
+      3, 4, 7, 0, 1, 2, 5, 6, /**/ 0, 3, 4, 7, 1, 2, 5, 6,  //
+      1, 3, 4, 7, 0, 2, 5, 6, /**/ 0, 1, 3, 4, 7, 2, 5, 6,  //
+      2, 3, 4, 7, 0, 1, 5, 6, /**/ 0, 2, 3, 4, 7, 1, 5, 6,  //
+      1, 2, 3, 4, 7, 0, 5, 6, /**/ 0, 1, 2, 3, 4, 7, 5, 6,  //
+      5, 7, 0, 1, 2, 3, 4, 6, /**/ 0, 5, 7, 1, 2, 3, 4, 6,  //
+      1, 5, 7, 0, 2, 3, 4, 6, /**/ 0, 1, 5, 7, 2, 3, 4, 6,  //
+      2, 5, 7, 0, 1, 3, 4, 6, /**/ 0, 2, 5, 7, 1, 3, 4, 6,  //
+      1, 2, 5, 7, 0, 3, 4, 6, /**/ 0, 1, 2, 5, 7, 3, 4, 6,  //
+      3, 5, 7, 0, 1, 2, 4, 6, /**/ 0, 3, 5, 7, 1, 2, 4, 6,  //
+      1, 3, 5, 7, 0, 2, 4, 6, /**/ 0, 1, 3, 5, 7, 2, 4, 6,  //
+      2, 3, 5, 7, 0, 1, 4, 6, /**/ 0, 2, 3, 5, 7, 1, 4, 6,  //
+      1, 2, 3, 5, 7, 0, 4, 6, /**/ 0, 1, 2, 3, 5, 7, 4, 6,  //
+      4, 5, 7, 0, 1, 2, 3, 6, /**/ 0, 4, 5, 7, 1, 2, 3, 6,  //
+      1, 4, 5, 7, 0, 2, 3, 6, /**/ 0, 1, 4, 5, 7, 2, 3, 6,  //
+      2, 4, 5, 7, 0, 1, 3, 6, /**/ 0, 2, 4, 5, 7, 1, 3, 6,  //
+      1, 2, 4, 5, 7, 0, 3, 6, /**/ 0, 1, 2, 4, 5, 7, 3, 6,  //
+      3, 4, 5, 7, 0, 1, 2, 6, /**/ 0, 3, 4, 5, 7, 1, 2, 6,  //
+      1, 3, 4, 5, 7, 0, 2, 6, /**/ 0, 1, 3, 4, 5, 7, 2, 6,  //
+      2, 3, 4, 5, 7, 0, 1, 6, /**/ 0, 2, 3, 4, 5, 7, 1, 6,  //
+      1, 2, 3, 4, 5, 7, 0, 6, /**/ 0, 1, 2, 3, 4, 5, 7, 6,  //
+      6, 7, 0, 1, 2, 3, 4, 5, /**/ 0, 6, 7, 1, 2, 3, 4, 5,  //
+      1, 6, 7, 0, 2, 3, 4, 5, /**/ 0, 1, 6, 7, 2, 3, 4, 5,  //
+      2, 6, 7, 0, 1, 3, 4, 5, /**/ 0, 2, 6, 7, 1, 3, 4, 5,  //
+      1, 2, 6, 7, 0, 3, 4, 5, /**/ 0, 1, 2, 6, 7, 3, 4, 5,  //
+      3, 6, 7, 0, 1, 2, 4, 5, /**/ 0, 3, 6, 7, 1, 2, 4, 5,  //
+      1, 3, 6, 7, 0, 2, 4, 5, /**/ 0, 1, 3, 6, 7, 2, 4, 5,  //
+      2, 3, 6, 7, 0, 1, 4, 5, /**/ 0, 2, 3, 6, 7, 1, 4, 5,  //
+      1, 2, 3, 6, 7, 0, 4, 5, /**/ 0, 1, 2, 3, 6, 7, 4, 5,  //
+      4, 6, 7, 0, 1, 2, 3, 5, /**/ 0, 4, 6, 7, 1, 2, 3, 5,  //
+      1, 4, 6, 7, 0, 2, 3, 5, /**/ 0, 1, 4, 6, 7, 2, 3, 5,  //
+      2, 4, 6, 7, 0, 1, 3, 5, /**/ 0, 2, 4, 6, 7, 1, 3, 5,  //
+      1, 2, 4, 6, 7, 0, 3, 5, /**/ 0, 1, 2, 4, 6, 7, 3, 5,  //
+      3, 4, 6, 7, 0, 1, 2, 5, /**/ 0, 3, 4, 6, 7, 1, 2, 5,  //
+      1, 3, 4, 6, 7, 0, 2, 5, /**/ 0, 1, 3, 4, 6, 7, 2, 5,  //
+      2, 3, 4, 6, 7, 0, 1, 5, /**/ 0, 2, 3, 4, 6, 7, 1, 5,  //
+      1, 2, 3, 4, 6, 7, 0, 5, /**/ 0, 1, 2, 3, 4, 6, 7, 5,  //
+      5, 6, 7, 0, 1, 2, 3, 4, /**/ 0, 5, 6, 7, 1, 2, 3, 4,  //
+      1, 5, 6, 7, 0, 2, 3, 4, /**/ 0, 1, 5, 6, 7, 2, 3, 4,  //
+      2, 5, 6, 7, 0, 1, 3, 4, /**/ 0, 2, 5, 6, 7, 1, 3, 4,  //
+      1, 2, 5, 6, 7, 0, 3, 4, /**/ 0, 1, 2, 5, 6, 7, 3, 4,  //
+      3, 5, 6, 7, 0, 1, 2, 4, /**/ 0, 3, 5, 6, 7, 1, 2, 4,  //
+      1, 3, 5, 6, 7, 0, 2, 4, /**/ 0, 1, 3, 5, 6, 7, 2, 4,  //
+      2, 3, 5, 6, 7, 0, 1, 4, /**/ 0, 2, 3, 5, 6, 7, 1, 4,  //
+      1, 2, 3, 5, 6, 7, 0, 4, /**/ 0, 1, 2, 3, 5, 6, 7, 4,  //
+      4, 5, 6, 7, 0, 1, 2, 3, /**/ 0, 4, 5, 6, 7, 1, 2, 3,  //
+      1, 4, 5, 6, 7, 0, 2, 3, /**/ 0, 1, 4, 5, 6, 7, 2, 3,  //
+      2, 4, 5, 6, 7, 0, 1, 3, /**/ 0, 2, 4, 5, 6, 7, 1, 3,  //
+      1, 2, 4, 5, 6, 7, 0, 3, /**/ 0, 1, 2, 4, 5, 6, 7, 3,  //
+      3, 4, 5, 6, 7, 0, 1, 2, /**/ 0, 3, 4, 5, 6, 7, 1, 2,  //
+      1, 3, 4, 5, 6, 7, 0, 2, /**/ 0, 1, 3, 4, 5, 6, 7, 2,  //
+      2, 3, 4, 5, 6, 7, 0, 1, /**/ 0, 2, 3, 4, 5, 6, 7, 1,  //
+      1, 2, 3, 4, 5, 6, 7, 0, /**/ 0, 1, 2, 3, 4, 5, 6, 7};
+
+  for (size_t i = 0; i < Lanes(d); i += 8) {
+    // Each byte worth of bits is the index of one of 256 8-byte ranges, and its
+    // population count determines how far to advance the write position.
+    const size_t bits8 = bits[i / 8];
+    const auto indices = Load(d8, table + bits8 * 8);
+    const auto compressed = TableLookupBytes(LoadU(d8, lanes + i), indices);
+    StoreU(compressed, d8, pos);
+    pos += PopCount(bits8);
+  }
+  return static_cast<size_t>(pos - unaligned);
+}
+
+template <class V, class M, class D, typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API size_t CompressStore(V v, M mask, D d, T* HWY_RESTRICT unaligned) {
+  uint8_t bits[HWY_MAX(size_t{8}, MaxLanes(d) / 8)];
+  (void)StoreMaskBits(d, mask, bits);
+  return CompressBitsStore(v, bits, d, unaligned);
+}
+
+template <class V, class M, class D, typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API size_t CompressBlendedStore(V v, M mask, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  HWY_ALIGN T buf[MaxLanes(d)];
+  const size_t bytes = CompressStore(v, mask, d, buf);
+  BlendedStore(Load(d, buf), FirstN(d, bytes), d, unaligned);
+  return bytes;
+}
+
+// For reasons unknown, HWY_IF_T_SIZE_V is a compile error in SVE.
+template <class V, class M, typename T = TFromV<V>, HWY_IF_T_SIZE(T, 1)>
+HWY_API V Compress(V v, const M mask) {
+  const DFromV<V> d;
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  (void)CompressStore(v, mask, d, lanes);
+  return Load(d, lanes);
+}
+
+template <class V, typename T = TFromV<V>, HWY_IF_T_SIZE(T, 1)>
+HWY_API V CompressBits(V v, const uint8_t* HWY_RESTRICT bits) {
+  const DFromV<V> d;
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  (void)CompressBitsStore(v, bits, d, lanes);
+  return Load(d, lanes);
+}
+
+template <class V, class M, typename T = TFromV<V>, HWY_IF_T_SIZE(T, 1)>
+HWY_API V CompressNot(V v, M mask) {
+  return Compress(v, Not(mask));
+}
+
+#endif  // HWY_NATIVE_COMPRESS8
+
+// ------------------------------ Expand
+
+// "Include guard": skip if native 8/16-bit Expand/LoadExpand are available.
+// Note that this generic implementation assumes <= 128 bit fixed vectors;
+// the SVE and RVV targets provide their own native implementations.
+#if (defined(HWY_NATIVE_EXPAND) == defined(HWY_TARGET_TOGGLE)) || HWY_IDE
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+namespace detail {
+
+#if HWY_IDE
+template <class M>
+HWY_INLINE uint64_t BitsFromMask(M /* mask */) {
+  return 0;
+}
+#endif  // HWY_IDE
+
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> IndicesForExpandFromBits(uint64_t mask_bits) {
+  static_assert(N <= 8, "Should only be called for half-vectors");
+  const Simd<uint8_t, N, 0> du8;
+  HWY_DASSERT(mask_bits < 0x100);
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintExpand8x8Tables
+      128, 128, 128, 128, 128, 128, 128, 128,  //
+      0,   128, 128, 128, 128, 128, 128, 128,  //
+      128, 0,   128, 128, 128, 128, 128, 128,  //
+      0,   1,   128, 128, 128, 128, 128, 128,  //
+      128, 128, 0,   128, 128, 128, 128, 128,  //
+      0,   128, 1,   128, 128, 128, 128, 128,  //
+      128, 0,   1,   128, 128, 128, 128, 128,  //
+      0,   1,   2,   128, 128, 128, 128, 128,  //
+      128, 128, 128, 0,   128, 128, 128, 128,  //
+      0,   128, 128, 1,   128, 128, 128, 128,  //
+      128, 0,   128, 1,   128, 128, 128, 128,  //
+      0,   1,   128, 2,   128, 128, 128, 128,  //
+      128, 128, 0,   1,   128, 128, 128, 128,  //
+      0,   128, 1,   2,   128, 128, 128, 128,  //
+      128, 0,   1,   2,   128, 128, 128, 128,  //
+      0,   1,   2,   3,   128, 128, 128, 128,  //
+      128, 128, 128, 128, 0,   128, 128, 128,  //
+      0,   128, 128, 128, 1,   128, 128, 128,  //
+      128, 0,   128, 128, 1,   128, 128, 128,  //
+      0,   1,   128, 128, 2,   128, 128, 128,  //
+      128, 128, 0,   128, 1,   128, 128, 128,  //
+      0,   128, 1,   128, 2,   128, 128, 128,  //
+      128, 0,   1,   128, 2,   128, 128, 128,  //
+      0,   1,   2,   128, 3,   128, 128, 128,  //
+      128, 128, 128, 0,   1,   128, 128, 128,  //
+      0,   128, 128, 1,   2,   128, 128, 128,  //
+      128, 0,   128, 1,   2,   128, 128, 128,  //
+      0,   1,   128, 2,   3,   128, 128, 128,  //
+      128, 128, 0,   1,   2,   128, 128, 128,  //
+      0,   128, 1,   2,   3,   128, 128, 128,  //
+      128, 0,   1,   2,   3,   128, 128, 128,  //
+      0,   1,   2,   3,   4,   128, 128, 128,  //
+      128, 128, 128, 128, 128, 0,   128, 128,  //
+      0,   128, 128, 128, 128, 1,   128, 128,  //
+      128, 0,   128, 128, 128, 1,   128, 128,  //
+      0,   1,   128, 128, 128, 2,   128, 128,  //
+      128, 128, 0,   128, 128, 1,   128, 128,  //
+      0,   128, 1,   128, 128, 2,   128, 128,  //
+      128, 0,   1,   128, 128, 2,   128, 128,  //
+      0,   1,   2,   128, 128, 3,   128, 128,  //
+      128, 128, 128, 0,   128, 1,   128, 128,  //
+      0,   128, 128, 1,   128, 2,   128, 128,  //
+      128, 0,   128, 1,   128, 2,   128, 128,  //
+      0,   1,   128, 2,   128, 3,   128, 128,  //
+      128, 128, 0,   1,   128, 2,   128, 128,  //
+      0,   128, 1,   2,   128, 3,   128, 128,  //
+      128, 0,   1,   2,   128, 3,   128, 128,  //
+      0,   1,   2,   3,   128, 4,   128, 128,  //
+      128, 128, 128, 128, 0,   1,   128, 128,  //
+      0,   128, 128, 128, 1,   2,   128, 128,  //
+      128, 0,   128, 128, 1,   2,   128, 128,  //
+      0,   1,   128, 128, 2,   3,   128, 128,  //
+      128, 128, 0,   128, 1,   2,   128, 128,  //
+      0,   128, 1,   128, 2,   3,   128, 128,  //
+      128, 0,   1,   128, 2,   3,   128, 128,  //
+      0,   1,   2,   128, 3,   4,   128, 128,  //
+      128, 128, 128, 0,   1,   2,   128, 128,  //
+      0,   128, 128, 1,   2,   3,   128, 128,  //
+      128, 0,   128, 1,   2,   3,   128, 128,  //
+      0,   1,   128, 2,   3,   4,   128, 128,  //
+      128, 128, 0,   1,   2,   3,   128, 128,  //
+      0,   128, 1,   2,   3,   4,   128, 128,  //
+      128, 0,   1,   2,   3,   4,   128, 128,  //
+      0,   1,   2,   3,   4,   5,   128, 128,  //
+      128, 128, 128, 128, 128, 128, 0,   128,  //
+      0,   128, 128, 128, 128, 128, 1,   128,  //
+      128, 0,   128, 128, 128, 128, 1,   128,  //
+      0,   1,   128, 128, 128, 128, 2,   128,  //
+      128, 128, 0,   128, 128, 128, 1,   128,  //
+      0,   128, 1,   128, 128, 128, 2,   128,  //
+      128, 0,   1,   128, 128, 128, 2,   128,  //
+      0,   1,   2,   128, 128, 128, 3,   128,  //
+      128, 128, 128, 0,   128, 128, 1,   128,  //
+      0,   128, 128, 1,   128, 128, 2,   128,  //
+      128, 0,   128, 1,   128, 128, 2,   128,  //
+      0,   1,   128, 2,   128, 128, 3,   128,  //
+      128, 128, 0,   1,   128, 128, 2,   128,  //
+      0,   128, 1,   2,   128, 128, 3,   128,  //
+      128, 0,   1,   2,   128, 128, 3,   128,  //
+      0,   1,   2,   3,   128, 128, 4,   128,  //
+      128, 128, 128, 128, 0,   128, 1,   128,  //
+      0,   128, 128, 128, 1,   128, 2,   128,  //
+      128, 0,   128, 128, 1,   128, 2,   128,  //
+      0,   1,   128, 128, 2,   128, 3,   128,  //
+      128, 128, 0,   128, 1,   128, 2,   128,  //
+      0,   128, 1,   128, 2,   128, 3,   128,  //
+      128, 0,   1,   128, 2,   128, 3,   128,  //
+      0,   1,   2,   128, 3,   128, 4,   128,  //
+      128, 128, 128, 0,   1,   128, 2,   128,  //
+      0,   128, 128, 1,   2,   128, 3,   128,  //
+      128, 0,   128, 1,   2,   128, 3,   128,  //
+      0,   1,   128, 2,   3,   128, 4,   128,  //
+      128, 128, 0,   1,   2,   128, 3,   128,  //
+      0,   128, 1,   2,   3,   128, 4,   128,  //
+      128, 0,   1,   2,   3,   128, 4,   128,  //
+      0,   1,   2,   3,   4,   128, 5,   128,  //
+      128, 128, 128, 128, 128, 0,   1,   128,  //
+      0,   128, 128, 128, 128, 1,   2,   128,  //
+      128, 0,   128, 128, 128, 1,   2,   128,  //
+      0,   1,   128, 128, 128, 2,   3,   128,  //
+      128, 128, 0,   128, 128, 1,   2,   128,  //
+      0,   128, 1,   128, 128, 2,   3,   128,  //
+      128, 0,   1,   128, 128, 2,   3,   128,  //
+      0,   1,   2,   128, 128, 3,   4,   128,  //
+      128, 128, 128, 0,   128, 1,   2,   128,  //
+      0,   128, 128, 1,   128, 2,   3,   128,  //
+      128, 0,   128, 1,   128, 2,   3,   128,  //
+      0,   1,   128, 2,   128, 3,   4,   128,  //
+      128, 128, 0,   1,   128, 2,   3,   128,  //
+      0,   128, 1,   2,   128, 3,   4,   128,  //
+      128, 0,   1,   2,   128, 3,   4,   128,  //
+      0,   1,   2,   3,   128, 4,   5,   128,  //
+      128, 128, 128, 128, 0,   1,   2,   128,  //
+      0,   128, 128, 128, 1,   2,   3,   128,  //
+      128, 0,   128, 128, 1,   2,   3,   128,  //
+      0,   1,   128, 128, 2,   3,   4,   128,  //
+      128, 128, 0,   128, 1,   2,   3,   128,  //
+      0,   128, 1,   128, 2,   3,   4,   128,  //
+      128, 0,   1,   128, 2,   3,   4,   128,  //
+      0,   1,   2,   128, 3,   4,   5,   128,  //
+      128, 128, 128, 0,   1,   2,   3,   128,  //
+      0,   128, 128, 1,   2,   3,   4,   128,  //
+      128, 0,   128, 1,   2,   3,   4,   128,  //
+      0,   1,   128, 2,   3,   4,   5,   128,  //
+      128, 128, 0,   1,   2,   3,   4,   128,  //
+      0,   128, 1,   2,   3,   4,   5,   128,  //
+      128, 0,   1,   2,   3,   4,   5,   128,  //
+      0,   1,   2,   3,   4,   5,   6,   128,  //
+      128, 128, 128, 128, 128, 128, 128, 0,    //
+      0,   128, 128, 128, 128, 128, 128, 1,    //
+      128, 0,   128, 128, 128, 128, 128, 1,    //
+      0,   1,   128, 128, 128, 128, 128, 2,    //
+      128, 128, 0,   128, 128, 128, 128, 1,    //
+      0,   128, 1,   128, 128, 128, 128, 2,    //
+      128, 0,   1,   128, 128, 128, 128, 2,    //
+      0,   1,   2,   128, 128, 128, 128, 3,    //
+      128, 128, 128, 0,   128, 128, 128, 1,    //
+      0,   128, 128, 1,   128, 128, 128, 2,    //
+      128, 0,   128, 1,   128, 128, 128, 2,    //
+      0,   1,   128, 2,   128, 128, 128, 3,    //
+      128, 128, 0,   1,   128, 128, 128, 2,    //
+      0,   128, 1,   2,   128, 128, 128, 3,    //
+      128, 0,   1,   2,   128, 128, 128, 3,    //
+      0,   1,   2,   3,   128, 128, 128, 4,    //
+      128, 128, 128, 128, 0,   128, 128, 1,    //
+      0,   128, 128, 128, 1,   128, 128, 2,    //
+      128, 0,   128, 128, 1,   128, 128, 2,    //
+      0,   1,   128, 128, 2,   128, 128, 3,    //
+      128, 128, 0,   128, 1,   128, 128, 2,    //
+      0,   128, 1,   128, 2,   128, 128, 3,    //
+      128, 0,   1,   128, 2,   128, 128, 3,    //
+      0,   1,   2,   128, 3,   128, 128, 4,    //
+      128, 128, 128, 0,   1,   128, 128, 2,    //
+      0,   128, 128, 1,   2,   128, 128, 3,    //
+      128, 0,   128, 1,   2,   128, 128, 3,    //
+      0,   1,   128, 2,   3,   128, 128, 4,    //
+      128, 128, 0,   1,   2,   128, 128, 3,    //
+      0,   128, 1,   2,   3,   128, 128, 4,    //
+      128, 0,   1,   2,   3,   128, 128, 4,    //
+      0,   1,   2,   3,   4,   128, 128, 5,    //
+      128, 128, 128, 128, 128, 0,   128, 1,    //
+      0,   128, 128, 128, 128, 1,   128, 2,    //
+      128, 0,   128, 128, 128, 1,   128, 2,    //
+      0,   1,   128, 128, 128, 2,   128, 3,    //
+      128, 128, 0,   128, 128, 1,   128, 2,    //
+      0,   128, 1,   128, 128, 2,   128, 3,    //
+      128, 0,   1,   128, 128, 2,   128, 3,    //
+      0,   1,   2,   128, 128, 3,   128, 4,    //
+      128, 128, 128, 0,   128, 1,   128, 2,    //
+      0,   128, 128, 1,   128, 2,   128, 3,    //
+      128, 0,   128, 1,   128, 2,   128, 3,    //
+      0,   1,   128, 2,   128, 3,   128, 4,    //
+      128, 128, 0,   1,   128, 2,   128, 3,    //
+      0,   128, 1,   2,   128, 3,   128, 4,    //
+      128, 0,   1,   2,   128, 3,   128, 4,    //
+      0,   1,   2,   3,   128, 4,   128, 5,    //
+      128, 128, 128, 128, 0,   1,   128, 2,    //
+      0,   128, 128, 128, 1,   2,   128, 3,    //
+      128, 0,   128, 128, 1,   2,   128, 3,    //
+      0,   1,   128, 128, 2,   3,   128, 4,    //
+      128, 128, 0,   128, 1,   2,   128, 3,    //
+      0,   128, 1,   128, 2,   3,   128, 4,    //
+      128, 0,   1,   128, 2,   3,   128, 4,    //
+      0,   1,   2,   128, 3,   4,   128, 5,    //
+      128, 128, 128, 0,   1,   2,   128, 3,    //
+      0,   128, 128, 1,   2,   3,   128, 4,    //
+      128, 0,   128, 1,   2,   3,   128, 4,    //
+      0,   1,   128, 2,   3,   4,   128, 5,    //
+      128, 128, 0,   1,   2,   3,   128, 4,    //
+      0,   128, 1,   2,   3,   4,   128, 5,    //
+      128, 0,   1,   2,   3,   4,   128, 5,    //
+      0,   1,   2,   3,   4,   5,   128, 6,    //
+      128, 128, 128, 128, 128, 128, 0,   1,    //
+      0,   128, 128, 128, 128, 128, 1,   2,    //
+      128, 0,   128, 128, 128, 128, 1,   2,    //
+      0,   1,   128, 128, 128, 128, 2,   3,    //
+      128, 128, 0,   128, 128, 128, 1,   2,    //
+      0,   128, 1,   128, 128, 128, 2,   3,    //
+      128, 0,   1,   128, 128, 128, 2,   3,    //
+      0,   1,   2,   128, 128, 128, 3,   4,    //
+      128, 128, 128, 0,   128, 128, 1,   2,    //
+      0,   128, 128, 1,   128, 128, 2,   3,    //
+      128, 0,   128, 1,   128, 128, 2,   3,    //
+      0,   1,   128, 2,   128, 128, 3,   4,    //
+      128, 128, 0,   1,   128, 128, 2,   3,    //
+      0,   128, 1,   2,   128, 128, 3,   4,    //
+      128, 0,   1,   2,   128, 128, 3,   4,    //
+      0,   1,   2,   3,   128, 128, 4,   5,    //
+      128, 128, 128, 128, 0,   128, 1,   2,    //
+      0,   128, 128, 128, 1,   128, 2,   3,    //
+      128, 0,   128, 128, 1,   128, 2,   3,    //
+      0,   1,   128, 128, 2,   128, 3,   4,    //
+      128, 128, 0,   128, 1,   128, 2,   3,    //
+      0,   128, 1,   128, 2,   128, 3,   4,    //
+      128, 0,   1,   128, 2,   128, 3,   4,    //
+      0,   1,   2,   128, 3,   128, 4,   5,    //
+      128, 128, 128, 0,   1,   128, 2,   3,    //
+      0,   128, 128, 1,   2,   128, 3,   4,    //
+      128, 0,   128, 1,   2,   128, 3,   4,    //
+      0,   1,   128, 2,   3,   128, 4,   5,    //
+      128, 128, 0,   1,   2,   128, 3,   4,    //
+      0,   128, 1,   2,   3,   128, 4,   5,    //
+      128, 0,   1,   2,   3,   128, 4,   5,    //
+      0,   1,   2,   3,   4,   128, 5,   6,    //
+      128, 128, 128, 128, 128, 0,   1,   2,    //
+      0,   128, 128, 128, 128, 1,   2,   3,    //
+      128, 0,   128, 128, 128, 1,   2,   3,    //
+      0,   1,   128, 128, 128, 2,   3,   4,    //
+      128, 128, 0,   128, 128, 1,   2,   3,    //
+      0,   128, 1,   128, 128, 2,   3,   4,    //
+      128, 0,   1,   128, 128, 2,   3,   4,    //
+      0,   1,   2,   128, 128, 3,   4,   5,    //
+      128, 128, 128, 0,   128, 1,   2,   3,    //
+      0,   128, 128, 1,   128, 2,   3,   4,    //
+      128, 0,   128, 1,   128, 2,   3,   4,    //
+      0,   1,   128, 2,   128, 3,   4,   5,    //
+      128, 128, 0,   1,   128, 2,   3,   4,    //
+      0,   128, 1,   2,   128, 3,   4,   5,    //
+      128, 0,   1,   2,   128, 3,   4,   5,    //
+      0,   1,   2,   3,   128, 4,   5,   6,    //
+      128, 128, 128, 128, 0,   1,   2,   3,    //
+      0,   128, 128, 128, 1,   2,   3,   4,    //
+      128, 0,   128, 128, 1,   2,   3,   4,    //
+      0,   1,   128, 128, 2,   3,   4,   5,    //
+      128, 128, 0,   128, 1,   2,   3,   4,    //
+      0,   128, 1,   128, 2,   3,   4,   5,    //
+      128, 0,   1,   128, 2,   3,   4,   5,    //
+      0,   1,   2,   128, 3,   4,   5,   6,    //
+      128, 128, 128, 0,   1,   2,   3,   4,    //
+      0,   128, 128, 1,   2,   3,   4,   5,    //
+      128, 0,   128, 1,   2,   3,   4,   5,    //
+      0,   1,   128, 2,   3,   4,   5,   6,    //
+      128, 128, 0,   1,   2,   3,   4,   5,    //
+      0,   128, 1,   2,   3,   4,   5,   6,    //
+      128, 0,   1,   2,   3,   4,   5,   6,    //
+      0,   1,   2,   3,   4,   5,   6,   7};
+  return LoadU(du8, table + mask_bits * 8);
+}
+
+}  // namespace detail
+
+// Half vector of bytes: one table lookup
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, Mask128<T, N> mask) {
+  const DFromV<decltype(v)> d;
+
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  const Vec128<uint8_t, N> indices =
+      detail::IndicesForExpandFromBits<N>(mask_bits);
+  return BitCast(d, TableLookupBytesOr0(v, indices));
+}
+
+// Full vector of bytes: two table lookups
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> Expand(Vec128<T> v, Mask128<T> mask) {
+  const Full128<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Half<decltype(du)> duh;
+  const Vec128<uint8_t> vu = BitCast(du, v);
+
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  const uint64_t maskL = mask_bits & 0xFF;
+  const uint64_t maskH = mask_bits >> 8;
+
+  // We want to skip past the v bytes already consumed by idxL. There is no
+  // instruction for shift-reg by variable bytes. Storing v itself would work
+  // but would involve a store-load forwarding stall. We instead shuffle using
+  // loaded indices. multishift_epi64_epi8 would also help, but if we have that,
+  // we probably also have native 8-bit Expand.
+  alignas(16) static constexpr uint8_t iota[32] = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,
+      11,  12,  13,  14,  15,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
+  const VFromD<decltype(du)> shift = LoadU(du, iota + PopCount(maskL));
+  const VFromD<decltype(duh)> vL = LowerHalf(duh, vu);
+  const VFromD<decltype(duh)> vH =
+      LowerHalf(duh, TableLookupBytesOr0(vu, shift));
+
+  const VFromD<decltype(duh)> idxL = detail::IndicesForExpandFromBits<8>(maskL);
+  const VFromD<decltype(duh)> idxH = detail::IndicesForExpandFromBits<8>(maskH);
+
+  const VFromD<decltype(duh)> expandL = TableLookupBytesOr0(vL, idxL);
+  const VFromD<decltype(duh)> expandH = TableLookupBytesOr0(vH, idxH);
+  return BitCast(d, Combine(du, expandH, expandL));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, Mask128<T, N> mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  const Rebind<uint8_t, decltype(d)> du8;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+
+  // Storing as 8-bit reduces table size from 4 KiB to 2 KiB. We cannot apply
+  // the nibble trick used below because not all indices fit within one lane.
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintExpand16x8ByteTables
+      128, 128, 128, 128, 128, 128, 128, 128,  //
+      0,   128, 128, 128, 128, 128, 128, 128,  //
+      128, 0,   128, 128, 128, 128, 128, 128,  //
+      0,   2,   128, 128, 128, 128, 128, 128,  //
+      128, 128, 0,   128, 128, 128, 128, 128,  //
+      0,   128, 2,   128, 128, 128, 128, 128,  //
+      128, 0,   2,   128, 128, 128, 128, 128,  //
+      0,   2,   4,   128, 128, 128, 128, 128,  //
+      128, 128, 128, 0,   128, 128, 128, 128,  //
+      0,   128, 128, 2,   128, 128, 128, 128,  //
+      128, 0,   128, 2,   128, 128, 128, 128,  //
+      0,   2,   128, 4,   128, 128, 128, 128,  //
+      128, 128, 0,   2,   128, 128, 128, 128,  //
+      0,   128, 2,   4,   128, 128, 128, 128,  //
+      128, 0,   2,   4,   128, 128, 128, 128,  //
+      0,   2,   4,   6,   128, 128, 128, 128,  //
+      128, 128, 128, 128, 0,   128, 128, 128,  //
+      0,   128, 128, 128, 2,   128, 128, 128,  //
+      128, 0,   128, 128, 2,   128, 128, 128,  //
+      0,   2,   128, 128, 4,   128, 128, 128,  //
+      128, 128, 0,   128, 2,   128, 128, 128,  //
+      0,   128, 2,   128, 4,   128, 128, 128,  //
+      128, 0,   2,   128, 4,   128, 128, 128,  //
+      0,   2,   4,   128, 6,   128, 128, 128,  //
+      128, 128, 128, 0,   2,   128, 128, 128,  //
+      0,   128, 128, 2,   4,   128, 128, 128,  //
+      128, 0,   128, 2,   4,   128, 128, 128,  //
+      0,   2,   128, 4,   6,   128, 128, 128,  //
+      128, 128, 0,   2,   4,   128, 128, 128,  //
+      0,   128, 2,   4,   6,   128, 128, 128,  //
+      128, 0,   2,   4,   6,   128, 128, 128,  //
+      0,   2,   4,   6,   8,   128, 128, 128,  //
+      128, 128, 128, 128, 128, 0,   128, 128,  //
+      0,   128, 128, 128, 128, 2,   128, 128,  //
+      128, 0,   128, 128, 128, 2,   128, 128,  //
+      0,   2,   128, 128, 128, 4,   128, 128,  //
+      128, 128, 0,   128, 128, 2,   128, 128,  //
+      0,   128, 2,   128, 128, 4,   128, 128,  //
+      128, 0,   2,   128, 128, 4,   128, 128,  //
+      0,   2,   4,   128, 128, 6,   128, 128,  //
+      128, 128, 128, 0,   128, 2,   128, 128,  //
+      0,   128, 128, 2,   128, 4,   128, 128,  //
+      128, 0,   128, 2,   128, 4,   128, 128,  //
+      0,   2,   128, 4,   128, 6,   128, 128,  //
+      128, 128, 0,   2,   128, 4,   128, 128,  //
+      0,   128, 2,   4,   128, 6,   128, 128,  //
+      128, 0,   2,   4,   128, 6,   128, 128,  //
+      0,   2,   4,   6,   128, 8,   128, 128,  //
+      128, 128, 128, 128, 0,   2,   128, 128,  //
+      0,   128, 128, 128, 2,   4,   128, 128,  //
+      128, 0,   128, 128, 2,   4,   128, 128,  //
+      0,   2,   128, 128, 4,   6,   128, 128,  //
+      128, 128, 0,   128, 2,   4,   128, 128,  //
+      0,   128, 2,   128, 4,   6,   128, 128,  //
+      128, 0,   2,   128, 4,   6,   128, 128,  //
+      0,   2,   4,   128, 6,   8,   128, 128,  //
+      128, 128, 128, 0,   2,   4,   128, 128,  //
+      0,   128, 128, 2,   4,   6,   128, 128,  //
+      128, 0,   128, 2,   4,   6,   128, 128,  //
+      0,   2,   128, 4,   6,   8,   128, 128,  //
+      128, 128, 0,   2,   4,   6,   128, 128,  //
+      0,   128, 2,   4,   6,   8,   128, 128,  //
+      128, 0,   2,   4,   6,   8,   128, 128,  //
+      0,   2,   4,   6,   8,   10,  128, 128,  //
+      128, 128, 128, 128, 128, 128, 0,   128,  //
+      0,   128, 128, 128, 128, 128, 2,   128,  //
+      128, 0,   128, 128, 128, 128, 2,   128,  //
+      0,   2,   128, 128, 128, 128, 4,   128,  //
+      128, 128, 0,   128, 128, 128, 2,   128,  //
+      0,   128, 2,   128, 128, 128, 4,   128,  //
+      128, 0,   2,   128, 128, 128, 4,   128,  //
+      0,   2,   4,   128, 128, 128, 6,   128,  //
+      128, 128, 128, 0,   128, 128, 2,   128,  //
+      0,   128, 128, 2,   128, 128, 4,   128,  //
+      128, 0,   128, 2,   128, 128, 4,   128,  //
+      0,   2,   128, 4,   128, 128, 6,   128,  //
+      128, 128, 0,   2,   128, 128, 4,   128,  //
+      0,   128, 2,   4,   128, 128, 6,   128,  //
+      128, 0,   2,   4,   128, 128, 6,   128,  //
+      0,   2,   4,   6,   128, 128, 8,   128,  //
+      128, 128, 128, 128, 0,   128, 2,   128,  //
+      0,   128, 128, 128, 2,   128, 4,   128,  //
+      128, 0,   128, 128, 2,   128, 4,   128,  //
+      0,   2,   128, 128, 4,   128, 6,   128,  //
+      128, 128, 0,   128, 2,   128, 4,   128,  //
+      0,   128, 2,   128, 4,   128, 6,   128,  //
+      128, 0,   2,   128, 4,   128, 6,   128,  //
+      0,   2,   4,   128, 6,   128, 8,   128,  //
+      128, 128, 128, 0,   2,   128, 4,   128,  //
+      0,   128, 128, 2,   4,   128, 6,   128,  //
+      128, 0,   128, 2,   4,   128, 6,   128,  //
+      0,   2,   128, 4,   6,   128, 8,   128,  //
+      128, 128, 0,   2,   4,   128, 6,   128,  //
+      0,   128, 2,   4,   6,   128, 8,   128,  //
+      128, 0,   2,   4,   6,   128, 8,   128,  //
+      0,   2,   4,   6,   8,   128, 10,  128,  //
+      128, 128, 128, 128, 128, 0,   2,   128,  //
+      0,   128, 128, 128, 128, 2,   4,   128,  //
+      128, 0,   128, 128, 128, 2,   4,   128,  //
+      0,   2,   128, 128, 128, 4,   6,   128,  //
+      128, 128, 0,   128, 128, 2,   4,   128,  //
+      0,   128, 2,   128, 128, 4,   6,   128,  //
+      128, 0,   2,   128, 128, 4,   6,   128,  //
+      0,   2,   4,   128, 128, 6,   8,   128,  //
+      128, 128, 128, 0,   128, 2,   4,   128,  //
+      0,   128, 128, 2,   128, 4,   6,   128,  //
+      128, 0,   128, 2,   128, 4,   6,   128,  //
+      0,   2,   128, 4,   128, 6,   8,   128,  //
+      128, 128, 0,   2,   128, 4,   6,   128,  //
+      0,   128, 2,   4,   128, 6,   8,   128,  //
+      128, 0,   2,   4,   128, 6,   8,   128,  //
+      0,   2,   4,   6,   128, 8,   10,  128,  //
+      128, 128, 128, 128, 0,   2,   4,   128,  //
+      0,   128, 128, 128, 2,   4,   6,   128,  //
+      128, 0,   128, 128, 2,   4,   6,   128,  //
+      0,   2,   128, 128, 4,   6,   8,   128,  //
+      128, 128, 0,   128, 2,   4,   6,   128,  //
+      0,   128, 2,   128, 4,   6,   8,   128,  //
+      128, 0,   2,   128, 4,   6,   8,   128,  //
+      0,   2,   4,   128, 6,   8,   10,  128,  //
+      128, 128, 128, 0,   2,   4,   6,   128,  //
+      0,   128, 128, 2,   4,   6,   8,   128,  //
+      128, 0,   128, 2,   4,   6,   8,   128,  //
+      0,   2,   128, 4,   6,   8,   10,  128,  //
+      128, 128, 0,   2,   4,   6,   8,   128,  //
+      0,   128, 2,   4,   6,   8,   10,  128,  //
+      128, 0,   2,   4,   6,   8,   10,  128,  //
+      0,   2,   4,   6,   8,   10,  12,  128,  //
+      128, 128, 128, 128, 128, 128, 128, 0,    //
+      0,   128, 128, 128, 128, 128, 128, 2,    //
+      128, 0,   128, 128, 128, 128, 128, 2,    //
+      0,   2,   128, 128, 128, 128, 128, 4,    //
+      128, 128, 0,   128, 128, 128, 128, 2,    //
+      0,   128, 2,   128, 128, 128, 128, 4,    //
+      128, 0,   2,   128, 128, 128, 128, 4,    //
+      0,   2,   4,   128, 128, 128, 128, 6,    //
+      128, 128, 128, 0,   128, 128, 128, 2,    //
+      0,   128, 128, 2,   128, 128, 128, 4,    //
+      128, 0,   128, 2,   128, 128, 128, 4,    //
+      0,   2,   128, 4,   128, 128, 128, 6,    //
+      128, 128, 0,   2,   128, 128, 128, 4,    //
+      0,   128, 2,   4,   128, 128, 128, 6,    //
+      128, 0,   2,   4,   128, 128, 128, 6,    //
+      0,   2,   4,   6,   128, 128, 128, 8,    //
+      128, 128, 128, 128, 0,   128, 128, 2,    //
+      0,   128, 128, 128, 2,   128, 128, 4,    //
+      128, 0,   128, 128, 2,   128, 128, 4,    //
+      0,   2,   128, 128, 4,   128, 128, 6,    //
+      128, 128, 0,   128, 2,   128, 128, 4,    //
+      0,   128, 2,   128, 4,   128, 128, 6,    //
+      128, 0,   2,   128, 4,   128, 128, 6,    //
+      0,   2,   4,   128, 6,   128, 128, 8,    //
+      128, 128, 128, 0,   2,   128, 128, 4,    //
+      0,   128, 128, 2,   4,   128, 128, 6,    //
+      128, 0,   128, 2,   4,   128, 128, 6,    //
+      0,   2,   128, 4,   6,   128, 128, 8,    //
+      128, 128, 0,   2,   4,   128, 128, 6,    //
+      0,   128, 2,   4,   6,   128, 128, 8,    //
+      128, 0,   2,   4,   6,   128, 128, 8,    //
+      0,   2,   4,   6,   8,   128, 128, 10,   //
+      128, 128, 128, 128, 128, 0,   128, 2,    //
+      0,   128, 128, 128, 128, 2,   128, 4,    //
+      128, 0,   128, 128, 128, 2,   128, 4,    //
+      0,   2,   128, 128, 128, 4,   128, 6,    //
+      128, 128, 0,   128, 128, 2,   128, 4,    //
+      0,   128, 2,   128, 128, 4,   128, 6,    //
+      128, 0,   2,   128, 128, 4,   128, 6,    //
+      0,   2,   4,   128, 128, 6,   128, 8,    //
+      128, 128, 128, 0,   128, 2,   128, 4,    //
+      0,   128, 128, 2,   128, 4,   128, 6,    //
+      128, 0,   128, 2,   128, 4,   128, 6,    //
+      0,   2,   128, 4,   128, 6,   128, 8,    //
+      128, 128, 0,   2,   128, 4,   128, 6,    //
+      0,   128, 2,   4,   128, 6,   128, 8,    //
+      128, 0,   2,   4,   128, 6,   128, 8,    //
+      0,   2,   4,   6,   128, 8,   128, 10,   //
+      128, 128, 128, 128, 0,   2,   128, 4,    //
+      0,   128, 128, 128, 2,   4,   128, 6,    //
+      128, 0,   128, 128, 2,   4,   128, 6,    //
+      0,   2,   128, 128, 4,   6,   128, 8,    //
+      128, 128, 0,   128, 2,   4,   128, 6,    //
+      0,   128, 2,   128, 4,   6,   128, 8,    //
+      128, 0,   2,   128, 4,   6,   128, 8,    //
+      0,   2,   4,   128, 6,   8,   128, 10,   //
+      128, 128, 128, 0,   2,   4,   128, 6,    //
+      0,   128, 128, 2,   4,   6,   128, 8,    //
+      128, 0,   128, 2,   4,   6,   128, 8,    //
+      0,   2,   128, 4,   6,   8,   128, 10,   //
+      128, 128, 0,   2,   4,   6,   128, 8,    //
+      0,   128, 2,   4,   6,   8,   128, 10,   //
+      128, 0,   2,   4,   6,   8,   128, 10,   //
+      0,   2,   4,   6,   8,   10,  128, 12,   //
+      128, 128, 128, 128, 128, 128, 0,   2,    //
+      0,   128, 128, 128, 128, 128, 2,   4,    //
+      128, 0,   128, 128, 128, 128, 2,   4,    //
+      0,   2,   128, 128, 128, 128, 4,   6,    //
+      128, 128, 0,   128, 128, 128, 2,   4,    //
+      0,   128, 2,   128, 128, 128, 4,   6,    //
+      128, 0,   2,   128, 128, 128, 4,   6,    //
+      0,   2,   4,   128, 128, 128, 6,   8,    //
+      128, 128, 128, 0,   128, 128, 2,   4,    //
+      0,   128, 128, 2,   128, 128, 4,   6,    //
+      128, 0,   128, 2,   128, 128, 4,   6,    //
+      0,   2,   128, 4,   128, 128, 6,   8,    //
+      128, 128, 0,   2,   128, 128, 4,   6,    //
+      0,   128, 2,   4,   128, 128, 6,   8,    //
+      128, 0,   2,   4,   128, 128, 6,   8,    //
+      0,   2,   4,   6,   128, 128, 8,   10,   //
+      128, 128, 128, 128, 0,   128, 2,   4,    //
+      0,   128, 128, 128, 2,   128, 4,   6,    //
+      128, 0,   128, 128, 2,   128, 4,   6,    //
+      0,   2,   128, 128, 4,   128, 6,   8,    //
+      128, 128, 0,   128, 2,   128, 4,   6,    //
+      0,   128, 2,   128, 4,   128, 6,   8,    //
+      128, 0,   2,   128, 4,   128, 6,   8,    //
+      0,   2,   4,   128, 6,   128, 8,   10,   //
+      128, 128, 128, 0,   2,   128, 4,   6,    //
+      0,   128, 128, 2,   4,   128, 6,   8,    //
+      128, 0,   128, 2,   4,   128, 6,   8,    //
+      0,   2,   128, 4,   6,   128, 8,   10,   //
+      128, 128, 0,   2,   4,   128, 6,   8,    //
+      0,   128, 2,   4,   6,   128, 8,   10,   //
+      128, 0,   2,   4,   6,   128, 8,   10,   //
+      0,   2,   4,   6,   8,   128, 10,  12,   //
+      128, 128, 128, 128, 128, 0,   2,   4,    //
+      0,   128, 128, 128, 128, 2,   4,   6,    //
+      128, 0,   128, 128, 128, 2,   4,   6,    //
+      0,   2,   128, 128, 128, 4,   6,   8,    //
+      128, 128, 0,   128, 128, 2,   4,   6,    //
+      0,   128, 2,   128, 128, 4,   6,   8,    //
+      128, 0,   2,   128, 128, 4,   6,   8,    //
+      0,   2,   4,   128, 128, 6,   8,   10,   //
+      128, 128, 128, 0,   128, 2,   4,   6,    //
+      0,   128, 128, 2,   128, 4,   6,   8,    //
+      128, 0,   128, 2,   128, 4,   6,   8,    //
+      0,   2,   128, 4,   128, 6,   8,   10,   //
+      128, 128, 0,   2,   128, 4,   6,   8,    //
+      0,   128, 2,   4,   128, 6,   8,   10,   //
+      128, 0,   2,   4,   128, 6,   8,   10,   //
+      0,   2,   4,   6,   128, 8,   10,  12,   //
+      128, 128, 128, 128, 0,   2,   4,   6,    //
+      0,   128, 128, 128, 2,   4,   6,   8,    //
+      128, 0,   128, 128, 2,   4,   6,   8,    //
+      0,   2,   128, 128, 4,   6,   8,   10,   //
+      128, 128, 0,   128, 2,   4,   6,   8,    //
+      0,   128, 2,   128, 4,   6,   8,   10,   //
+      128, 0,   2,   128, 4,   6,   8,   10,   //
+      0,   2,   4,   128, 6,   8,   10,  12,   //
+      128, 128, 128, 0,   2,   4,   6,   8,    //
+      0,   128, 128, 2,   4,   6,   8,   10,   //
+      128, 0,   128, 2,   4,   6,   8,   10,   //
+      0,   2,   128, 4,   6,   8,   10,  12,   //
+      128, 128, 0,   2,   4,   6,   8,   10,   //
+      0,   128, 2,   4,   6,   8,   10,  12,   //
+      128, 0,   2,   4,   6,   8,   10,  12,   //
+      0,   2,   4,   6,   8,   10,  12,  14};
+  // Extend to double length because InterleaveLower will only use the (valid)
+  // lower half, and we want N u16.
+  const Twice<decltype(du8)> du8x2;
+  const Vec128<uint8_t, 2 * N> indices8 =
+      ZeroExtendVector(du8x2, Load(du8, table + mask_bits * 8));
+  const Vec128<uint16_t, N> indices16 =
+      BitCast(du, InterleaveLower(du8x2, indices8, indices8));
+  // TableLookupBytesOr0 operates on bytes. To convert u16 lane indices to byte
+  // indices, add 0 to even and 1 to odd byte lanes.
+  const Vec128<uint16_t, N> byte_indices = Add(indices16, Set(du, 0x0100));
+  return BitCast(d, TableLookupBytesOr0(v, byte_indices));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, Mask128<T, N> mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+
+  alignas(16) static constexpr uint32_t packed_array[16] = {
+      // PrintExpand64x4Nibble - same for 32x4.
+      0x0000ffff, 0x0000fff0, 0x0000ff0f, 0x0000ff10, 0x0000f0ff, 0x0000f1f0,
+      0x0000f10f, 0x0000f210, 0x00000fff, 0x00001ff0, 0x00001f0f, 0x00002f10,
+      0x000010ff, 0x000021f0, 0x0000210f, 0x00003210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 2).
+  const Vec128<uint32_t, N> packed = Set(du, packed_array[mask_bits]);
+  alignas(16) static constexpr uint32_t shifts[4] = {0, 4, 8, 12};
+  Vec128<uint32_t, N> indices = packed >> Load(du, shifts);
+  // AVX2 _mm256_permutexvar_epi32 will ignore upper bits, but IndicesFromVec
+  // checks bounds, so clear the upper bits.
+  indices = And(indices, Set(du, N - 1));
+  const Vec128<uint32_t, N> expand =
+      TableLookupLanes(BitCast(du, v), IndicesFromVec(du, indices));
+  // TableLookupLanes cannot also zero masked-off lanes, so do that now.
+  return IfThenElseZero(mask, BitCast(d, expand));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Expand(Vec128<T> v, Mask128<T> mask) {
+  // Same as Compress, just zero out the mask=false lanes.
+  return IfThenElseZero(mask, Compress(v, mask));
+}
+
+// For single-element vectors, this is at least as fast as native.
+template <typename T>
+HWY_API Vec128<T, 1> Expand(Vec128<T, 1> v, Mask128<T, 1> mask) {
+  return IfThenElseZero(mask, v);
+}
+
+// ------------------------------ LoadExpand
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  return Expand(LoadU(d, unaligned), mask);
+}
+
+#endif  // HWY_NATIVE_EXPAND
+
+// ------------------------------ TwoTablesLookupLanes
+
+template <class D>
+using IndicesFromD = decltype(IndicesFromVec(D(), Zero(RebindToUnsigned<D>())));
+
+// RVV/SVE have their own implementations of
+// TwoTablesLookupLanes(D d, VFromD<D> a, VFromD<D> b, IndicesFromD<D> idx)
+#if HWY_TARGET != HWY_RVV && HWY_TARGET != HWY_SVE &&      \
+    HWY_TARGET != HWY_SVE2 && HWY_TARGET != HWY_SVE_256 && \
+    HWY_TARGET != HWY_SVE2_128
+template <class D>
+HWY_API VFromD<D> TwoTablesLookupLanes(D /*d*/, VFromD<D> a, VFromD<D> b,
+                                       IndicesFromD<D> idx) {
+  return TwoTablesLookupLanes(a, b, idx);
+}
+#endif
+
+// ------------------------------ Reverse2, Reverse4, Reverse8 (8-bit)
+
+#if (defined(HWY_NATIVE_REVERSE2_8) == defined(HWY_TARGET_TOGGLE)) || HWY_IDE
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+#undef HWY_PREFER_ROTATE
+// Platforms on which RotateRight is likely faster than TableLookupBytes.
+// RVV and SVE anyway have their own implementation of this.
+#if HWY_TARGET == HWY_SSE2 || HWY_TARGET <= HWY_AVX3 || \
+    HWY_TARGET == HWY_WASM || HWY_TARGET == HWY_PPC8
+#define HWY_PREFER_ROTATE 1
+#else
+#define HWY_PREFER_ROTATE 0
+#endif
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  // Exclude AVX3 because its 16-bit RotateRight is actually 3 instructions.
+#if HWY_PREFER_ROTATE && HWY_TARGET > HWY_AVX3
+  const Repartition<uint16_t, decltype(d)> du16;
+  return BitCast(d, RotateRight<8>(BitCast(du16, v)));
+#else
+  alignas(16) static constexpr TFromD<D> kShuffle[16] = {
+      1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
+  return TableLookupBytes(v, LoadDup128(d, kShuffle));
+#endif
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+#if HWY_PREFER_ROTATE
+  const Repartition<uint16_t, decltype(d)> du16;
+  return BitCast(d, Reverse2(du16, BitCast(du16, Reverse2(d, v))));
+#else
+  alignas(16) static constexpr uint8_t kShuffle[16] = {
+      3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12};
+  const Repartition<uint8_t, decltype(d)> du8;
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du8, kShuffle)));
+#endif
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+#if HWY_PREFER_ROTATE
+  const Repartition<uint32_t, D> du32;
+  return BitCast(d, Reverse2(du32, BitCast(du32, Reverse4(d, v))));
+#else
+  alignas(16) static constexpr uint8_t kShuffle[16] = {
+      7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
+  const Repartition<uint8_t, decltype(d)> du8;
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du8, kShuffle)));
+#endif
+}
+
+#endif  // HWY_NATIVE_REVERSE2_8
+
+// ------------------------------ ReverseLaneBytes
+
+#if (defined(HWY_NATIVE_REVERSE_LANE_BYTES) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_REVERSE_LANE_BYTES
+#undef HWY_NATIVE_REVERSE_LANE_BYTES
+#else
+#define HWY_NATIVE_REVERSE_LANE_BYTES
+#endif
+
+template <class V, HWY_IF_T_SIZE_V(V, 2)>
+HWY_API V ReverseLaneBytes(V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, Reverse2(du8, BitCast(du8, v)));
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 4)>
+HWY_API V ReverseLaneBytes(V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, Reverse4(du8, BitCast(du8, v)));
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_API V ReverseLaneBytes(V v) {
+  const DFromV<V> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, Reverse8(du8, BitCast(du8, v)));
+}
+
+#endif  // HWY_NATIVE_REVERSE_LANE_BYTES
+
+// ------------------------------ ReverseBits
+
+// On these targets, we emulate 8-bit shifts using 16-bit shifts and therefore
+// require at least two lanes to BitCast to 16-bit. We avoid Highway's 8-bit
+// shifts because those would add extra masking already taken care of by
+// UI8ReverseBitsStep. Note that AVX3_DL/AVX3_ZEN4 support GFNI and use it to
+// implement ReverseBits, so this code is not used there.
+#undef HWY_REVERSE_BITS_MIN_BYTES
+#if ((HWY_TARGET >= HWY_AVX3 && HWY_TARGET <= HWY_SSE2) || \
+     HWY_TARGET == HWY_WASM || HWY_TARGET == HWY_WASM_EMU256)
+#define HWY_REVERSE_BITS_MIN_BYTES 2
+#else
+#define HWY_REVERSE_BITS_MIN_BYTES 1
+#endif
+
+#if (defined(HWY_NATIVE_REVERSE_BITS_UI8) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_REVERSE_BITS_UI8
+#undef HWY_NATIVE_REVERSE_BITS_UI8
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI8
+#endif
+
+namespace detail {
+
+template <int kShiftAmt, int kShrResultMask, class V,
+          HWY_IF_V_SIZE_GT_D(DFromV<V>, HWY_REVERSE_BITS_MIN_BYTES - 1)>
+HWY_INLINE V UI8ReverseBitsStep(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_REVERSE_BITS_MIN_BYTES == 2
+  const Repartition<uint16_t, decltype(d)> d_shift;
+#else
+  const RebindToUnsigned<decltype(d)> d_shift;
+#endif
+
+  const auto v_to_shift = BitCast(d_shift, v);
+  const auto shl_result = BitCast(d, ShiftLeft<kShiftAmt>(v_to_shift));
+  const auto shr_result = BitCast(d, ShiftRight<kShiftAmt>(v_to_shift));
+  const auto shr_result_mask =
+      BitCast(d, Set(du, static_cast<uint8_t>(kShrResultMask)));
+  return Or(And(shr_result, shr_result_mask),
+            AndNot(shr_result_mask, shl_result));
+}
+
+#if HWY_REVERSE_BITS_MIN_BYTES == 2
+template <int kShiftAmt, int kShrResultMask, class V,
+          HWY_IF_V_SIZE_D(DFromV<V>, 1)>
+HWY_INLINE V UI8ReverseBitsStep(V v) {
+  return V{UI8ReverseBitsStep<kShiftAmt, kShrResultMask>(Vec128<uint8_t>{v.raw})
+               .raw};
+}
+#endif
+
+}  // namespace detail
+
+template <class V, HWY_IF_T_SIZE_V(V, 1)>
+HWY_API V ReverseBits(V v) {
+  auto result = detail::UI8ReverseBitsStep<1, 0x55>(v);
+  result = detail::UI8ReverseBitsStep<2, 0x33>(result);
+  result = detail::UI8ReverseBitsStep<4, 0x0F>(result);
+  return result;
+}
+
+#endif  // HWY_NATIVE_REVERSE_BITS_UI8
+
+#if (defined(HWY_NATIVE_REVERSE_BITS_UI16_32_64) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#undef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#endif
+
+template <class V, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4) | (1 << 8)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V ReverseBits(V v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return ReverseLaneBytes(BitCast(d, ReverseBits(BitCast(du8, v))));
+}
+#endif  // HWY_NATIVE_REVERSE_BITS_UI16_32_64
+
+// ------------------------------ Per4LaneBlockShuffle
+
+#if (defined(HWY_NATIVE_PER4LANEBLKSHUF_DUP32) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#undef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#else
+#define HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#endif
+
+#if HWY_TARGET != HWY_SCALAR
+namespace detail {
+
+template <class D>
+HWY_INLINE Vec<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                             const uint32_t x2,
+                                             const uint32_t x1,
+                                             const uint32_t x0) {
+  alignas(16) const uint32_t lanes[4] = {x0, x1, x2, x3};
+
+#if HWY_TARGET == HWY_RVV
+  constexpr int kPow2 = d.Pow2();
+  constexpr int kLoadPow2 = HWY_MAX(kPow2, -1);
+  const ScalableTag<uint32_t, kLoadPow2> d_load;
+#else
+  constexpr size_t kMaxBytes = d.MaxBytes();
+#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
+  constexpr size_t kMinLanesToLoad = 2;
+#else
+  constexpr size_t kMinLanesToLoad = 4;
+#endif
+  constexpr size_t kNumToLoad =
+      HWY_MAX(kMaxBytes / sizeof(uint32_t), kMinLanesToLoad);
+  const CappedTag<uint32_t, kNumToLoad> d_load;
+#endif
+
+  return ResizeBitCast(d, LoadDup128(d_load, lanes));
+}
+
+}  // namespace detail
+#endif
+
+#endif  // HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+
+#if HWY_TARGET != HWY_SCALAR
+namespace detail {
+
+template <class V>
+HWY_INLINE V Per2LaneBlockShuffle(hwy::SizeTag<0> /*idx_10_tag*/, V v) {
+  return DupEven(v);
+}
+
+template <class V>
+HWY_INLINE V Per2LaneBlockShuffle(hwy::SizeTag<1> /*idx_10_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse2(d, v);
+}
+
+template <class V>
+HWY_INLINE V Per2LaneBlockShuffle(hwy::SizeTag<2> /*idx_10_tag*/, V v) {
+  return v;
+}
+
+template <class V>
+HWY_INLINE V Per2LaneBlockShuffle(hwy::SizeTag<3> /*idx_10_tag*/, V v) {
+  return DupOdd(v);
+}
+
+HWY_INLINE uint32_t U8x4Per4LaneBlkIndices(const uint32_t idx3,
+                                           const uint32_t idx2,
+                                           const uint32_t idx1,
+                                           const uint32_t idx0) {
+#if HWY_IS_LITTLE_ENDIAN
+  return static_cast<uint32_t>((idx3 << 24) | (idx2 << 16) | (idx1 << 8) |
+                               idx0);
+#else
+  return static_cast<uint32_t>(idx3 | (idx2 << 8) | (idx1 << 16) |
+                               (idx0 << 24));
+#endif
+}
+
+template <class D>
+HWY_INLINE Vec<D> TblLookupPer4LaneBlkU8IdxInBlk(D d, const uint32_t idx3,
+                                                 const uint32_t idx2,
+                                                 const uint32_t idx1,
+                                                 const uint32_t idx0) {
+#if HWY_TARGET == HWY_RVV
+  const AdjustSimdTagToMinVecPow2<Repartition<uint32_t, D>> du32;
+#else
+  const Repartition<uint32_t, D> du32;
+#endif
+
+  return ResizeBitCast(
+      d, Set(du32, U8x4Per4LaneBlkIndices(idx3, idx2, idx1, idx0)));
+}
+
+#if HWY_HAVE_SCALABLE || HWY_TARGET == HWY_SVE_256 || \
+    HWY_TARGET == HWY_SVE2_128 || HWY_TARGET == HWY_EMU128
+#define HWY_PER_4_BLK_TBL_LOOKUP_LANES_ENABLE(D) void* = nullptr
+#else
+#define HWY_PER_4_BLK_TBL_LOOKUP_LANES_ENABLE(D) HWY_IF_T_SIZE_D(D, 8)
+
+template <class V, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_INLINE V Per4LaneBlkShufDoTblLookup(V v, V idx) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, TableLookupBytes(BitCast(du8, v), BitCast(du8, idx)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE Vec<D> TblLookupPer4LaneBlkShufIdx(D d, const uint32_t idx3,
+                                              const uint32_t idx2,
+                                              const uint32_t idx1,
+                                              const uint32_t idx0) {
+  const Repartition<uint32_t, decltype(d)> du32;
+  const uint32_t idx3210 = U8x4Per4LaneBlkIndices(idx3, idx2, idx1, idx0);
+  const auto v_byte_idx = Per4LaneBlkShufDupSet4xU32(
+      du32, static_cast<uint32_t>(idx3210 + 0x0C0C0C0C),
+      static_cast<uint32_t>(idx3210 + 0x08080808),
+      static_cast<uint32_t>(idx3210 + 0x04040404),
+      static_cast<uint32_t>(idx3210));
+  return ResizeBitCast(d, v_byte_idx);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE Vec<D> TblLookupPer4LaneBlkShufIdx(D d, const uint32_t idx3,
+                                              const uint32_t idx2,
+                                              const uint32_t idx1,
+                                              const uint32_t idx0) {
+  const Repartition<uint32_t, decltype(d)> du32;
+#if HWY_IS_LITTLE_ENDIAN
+  const uint32_t idx10 = static_cast<uint32_t>((idx1 << 16) | idx0);
+  const uint32_t idx32 = static_cast<uint32_t>((idx3 << 16) | idx2);
+  constexpr uint32_t kLaneByteOffsets{0x01000100};
+#else
+  const uint32_t idx10 = static_cast<uint32_t>(idx1 | (idx0 << 16));
+  const uint32_t idx32 = static_cast<uint32_t>(idx3 | (idx2 << 16));
+  constexpr uint32_t kLaneByteOffsets{0x00010001};
+#endif
+  constexpr uint32_t kHiLaneByteOffsets{kLaneByteOffsets + 0x08080808u};
+
+  const auto v_byte_idx = Per4LaneBlkShufDupSet4xU32(
+      du32, static_cast<uint32_t>(idx32 * 0x0202u + kHiLaneByteOffsets),
+      static_cast<uint32_t>(idx10 * 0x0202u + kHiLaneByteOffsets),
+      static_cast<uint32_t>(idx32 * 0x0202u + kLaneByteOffsets),
+      static_cast<uint32_t>(idx10 * 0x0202u + kLaneByteOffsets));
+  return ResizeBitCast(d, v_byte_idx);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE Vec<D> TblLookupPer4LaneBlkShufIdx(D d, const uint32_t idx3,
+                                              const uint32_t idx2,
+                                              const uint32_t idx1,
+                                              const uint32_t idx0) {
+  const Repartition<uint32_t, decltype(d)> du32;
+#if HWY_IS_LITTLE_ENDIAN
+  constexpr uint32_t kLaneByteOffsets{0x03020100};
+#else
+  constexpr uint32_t kLaneByteOffsets{0x00010203};
+#endif
+
+  const auto v_byte_idx = Per4LaneBlkShufDupSet4xU32(
+      du32, static_cast<uint32_t>(idx3 * 0x04040404u + kLaneByteOffsets),
+      static_cast<uint32_t>(idx2 * 0x04040404u + kLaneByteOffsets),
+      static_cast<uint32_t>(idx1 * 0x04040404u + kLaneByteOffsets),
+      static_cast<uint32_t>(idx0 * 0x04040404u + kLaneByteOffsets));
+  return ResizeBitCast(d, v_byte_idx);
+}
+#endif
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> TblLookupPer4LaneBlkIdxInBlk(D d, const uint32_t idx3,
+                                                  const uint32_t idx2,
+                                                  const uint32_t idx1,
+                                                  const uint32_t idx0) {
+  return TblLookupPer4LaneBlkU8IdxInBlk(d, idx3, idx2, idx1, idx0);
+}
+
+#if HWY_TARGET == HWY_RVV
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> TblLookupPer4LaneBlkIdxInBlk(D d, const uint32_t idx3,
+                                                  const uint32_t idx2,
+                                                  const uint32_t idx1,
+                                                  const uint32_t idx0) {
+  const Rebind<uint8_t, decltype(d)> du8;
+  return PromoteTo(d,
+                   TblLookupPer4LaneBlkU8IdxInBlk(du8, idx3, idx2, idx1, idx0));
+}
+#else
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> TblLookupPer4LaneBlkIdxInBlk(D d, const uint32_t idx3,
+                                                  const uint32_t idx2,
+                                                  const uint32_t idx1,
+                                                  const uint32_t idx0) {
+  const uint16_t u16_idx0 = static_cast<uint16_t>(idx0);
+  const uint16_t u16_idx1 = static_cast<uint16_t>(idx1);
+  const uint16_t u16_idx2 = static_cast<uint16_t>(idx2);
+  const uint16_t u16_idx3 = static_cast<uint16_t>(idx3);
+  alignas(16)
+      const uint16_t indices[8] = {u16_idx0, u16_idx1, u16_idx2, u16_idx3,
+                                   u16_idx0, u16_idx1, u16_idx2, u16_idx3};
+
+#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
+  constexpr size_t kMinLanesToLoad = 4;
+#else
+  constexpr size_t kMinLanesToLoad = 8;
+#endif
+  constexpr size_t kNumToLoad = HWY_MAX(HWY_MAX_LANES_D(D), kMinLanesToLoad);
+  const CappedTag<uint16_t, kNumToLoad> d_load;
+
+  return ResizeBitCast(d, LoadDup128(d_load, indices));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<D> TblLookupPer4LaneBlkIdxInBlk(D d, const uint32_t idx3,
+                                                  const uint32_t idx2,
+                                                  const uint32_t idx1,
+                                                  const uint32_t idx0) {
+  return Per4LaneBlkShufDupSet4xU32(d, idx3, idx2, idx1, idx0);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> TblLookupPer4LaneBlkIdxInBlk(D d, const uint32_t idx3,
+                                                  const uint32_t idx2,
+                                                  const uint32_t idx1,
+                                                  const uint32_t idx0) {
+  const RebindToUnsigned<decltype(d)> du;
+  const Rebind<uint32_t, decltype(d)> du32;
+  return BitCast(d, PromoteTo(du, Per4LaneBlkShufDupSet4xU32(du32, idx3, idx2,
+                                                             idx1, idx0)));
+}
+#endif
+
+template <class D, HWY_PER_4_BLK_TBL_LOOKUP_LANES_ENABLE(D)>
+HWY_INLINE IndicesFromD<D> TblLookupPer4LaneBlkShufIdx(D d, const uint32_t idx3,
+                                                       const uint32_t idx2,
+                                                       const uint32_t idx1,
+                                                       const uint32_t idx0) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  auto idx_in_blk = TblLookupPer4LaneBlkIdxInBlk(du, idx3, idx2, idx1, idx0);
+
+  constexpr size_t kN = HWY_MAX_LANES_D(D);
+  if (kN < 4) {
+    idx_in_blk = And(idx_in_blk, Set(du, static_cast<TU>(kN - 1)));
+  }
+
+#if HWY_TARGET == HWY_RVV
+  const auto blk_offsets = AndS(Iota0(du), static_cast<TU>(~TU{3}));
+#else
+  const auto blk_offsets =
+      And(Iota(du, TU{0}), Set(du, static_cast<TU>(~TU{3})));
+#endif
+  return IndicesFromVec(d, Add(idx_in_blk, blk_offsets));
+}
+
+template <class V, HWY_PER_4_BLK_TBL_LOOKUP_LANES_ENABLE(DFromV<V>)>
+HWY_INLINE V Per4LaneBlkShufDoTblLookup(V v, IndicesFromD<DFromV<V>> idx) {
+  return TableLookupLanes(v, idx);
+}
+
+#undef HWY_PER_4_BLK_TBL_LOOKUP_LANES_ENABLE
+
+template <class V>
+HWY_INLINE V TblLookupPer4LaneBlkShuf(V v, size_t idx3210) {
+  const DFromV<decltype(v)> d;
+  const uint32_t idx3 = static_cast<uint32_t>((idx3210 >> 6) & 3);
+  const uint32_t idx2 = static_cast<uint32_t>((idx3210 >> 4) & 3);
+  const uint32_t idx1 = static_cast<uint32_t>((idx3210 >> 2) & 3);
+  const uint32_t idx0 = static_cast<uint32_t>(idx3210 & 3);
+  const auto idx = TblLookupPer4LaneBlkShufIdx(d, idx3, idx2, idx1, idx0);
+  return Per4LaneBlkShufDoTblLookup(v, idx);
+}
+
+// The detail::Per4LaneBlockShuffle overloads that have the extra lane_size_tag
+// and vect_size_tag parameters are only called for vectors that have at
+// least 4 lanes (or scalable vectors that might possibly have 4 or more lanes)
+template <size_t kIdx3210, size_t kLaneSize, size_t kVectSize, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<kLaneSize> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  return TblLookupPer4LaneBlkShuf(v, kIdx3210);
+}
+
+#if HWY_HAVE_FLOAT64
+template <class V>
+HWY_INLINE VFromD<RepartitionToWide<DFromV<V>>> Per4LaneBlockShufCastToWide(
+    hwy::FloatTag /* type_tag */, hwy::SizeTag<4> /* lane_size_tag */, V v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  return BitCast(dw, v);
+}
+#endif
+
+template <size_t kLaneSize, class V>
+HWY_INLINE VFromD<RepartitionToWide<RebindToUnsigned<DFromV<V>>>>
+Per4LaneBlockShufCastToWide(hwy::FloatTag /* type_tag */,
+                            hwy::SizeTag<kLaneSize> /* lane_size_tag */, V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> dw;
+  return BitCast(dw, v);
+}
+
+template <size_t kLaneSize, class V>
+HWY_INLINE VFromD<RepartitionToWide<DFromV<V>>> Per4LaneBlockShufCastToWide(
+    hwy::NonFloatTag /* type_tag */,
+    hwy::SizeTag<kLaneSize> /* lane_size_tag */, V v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  return BitCast(dw, v);
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x1B> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse4(d, v);
+}
+
+template <class V,
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) |
+                                        (HWY_HAVE_INTEGER64 ? (1 << 4) : 0))>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x44> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const auto vw = Per4LaneBlockShufCastToWide(
+      hwy::IsFloatTag<TFromV<V>>(), hwy::SizeTag<sizeof(TFromV<V>)>(), v);
+  return BitCast(d, DupEven(vw));
+}
+
+template <class V,
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) |
+                                        (HWY_HAVE_INTEGER64 ? (1 << 4) : 0))>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x4E> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const auto vw = Per4LaneBlockShufCastToWide(
+      hwy::IsFloatTag<TFromV<V>>(), hwy::SizeTag<sizeof(TFromV<V>)>(), v);
+  const DFromV<decltype(vw)> dw;
+  return BitCast(d, Reverse2(dw, vw));
+}
+
+#if HWY_MAX_BYTES >= 32
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x4E> /*idx_3210_tag*/, V v) {
+  return SwapAdjacentBlocks(v);
+}
+#endif
+
+template <class V, HWY_IF_LANES_D(DFromV<V>, 4),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x50> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveLower(d, v, v);
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x50> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveLower(d, v, v);
+}
+
+template <class V, HWY_IF_LANES_D(DFromV<V>, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x88> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return ConcatEven(d, v, v);
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xA0> /*idx_3210_tag*/, V v) {
+  return DupEven(v);
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xB1> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse2(d, v);
+}
+
+template <class V, HWY_IF_LANES_D(DFromV<V>, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xDD> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return ConcatOdd(d, v, v);
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xE4> /*idx_3210_tag*/, V v) {
+  return v;
+}
+
+template <class V,
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) |
+                                        (HWY_HAVE_INTEGER64 ? (1 << 4) : 0))>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xEE> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const auto vw = Per4LaneBlockShufCastToWide(
+      hwy::IsFloatTag<TFromV<V>>(), hwy::SizeTag<sizeof(TFromV<V>)>(), v);
+  return BitCast(d, DupOdd(vw));
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xF5> /*idx_3210_tag*/, V v) {
+  return DupOdd(v);
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 4)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xFA> /*idx_3210_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveUpper(d, v, v);
+}
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> idx_3210_tag, V v) {
+  const DFromV<decltype(v)> d;
+  return Per4LaneBlockShuffle(idx_3210_tag, hwy::SizeTag<sizeof(TFromV<V>)>(),
+                              hwy::SizeTag<d.MaxBytes()>(), v);
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_SCALAR
+
+template <size_t kIdx3, size_t kIdx2, size_t kIdx1, size_t kIdx0, class V,
+          HWY_IF_LANES_D(DFromV<V>, 1)>
+HWY_API V Per4LaneBlockShuffle(V v) {
+  static_assert(kIdx0 <= 3, "kIdx0 <= 3 must be true");
+  static_assert(kIdx1 <= 3, "kIdx1 <= 3 must be true");
+  static_assert(kIdx2 <= 3, "kIdx2 <= 3 must be true");
+  static_assert(kIdx3 <= 3, "kIdx3 <= 3 must be true");
+
+  return v;
+}
+
+#if HWY_TARGET != HWY_SCALAR
+template <size_t kIdx3, size_t kIdx2, size_t kIdx1, size_t kIdx0, class V,
+          HWY_IF_LANES_D(DFromV<V>, 2)>
+HWY_API V Per4LaneBlockShuffle(V v) {
+  static_assert(kIdx0 <= 3, "kIdx0 <= 3 must be true");
+  static_assert(kIdx1 <= 3, "kIdx1 <= 3 must be true");
+  static_assert(kIdx2 <= 3, "kIdx2 <= 3 must be true");
+  static_assert(kIdx3 <= 3, "kIdx3 <= 3 must be true");
+
+  constexpr bool isReverse2 = (kIdx0 == 1 || kIdx1 == 0) && (kIdx0 != kIdx1);
+  constexpr size_t kPer2BlkIdx0 = (kIdx0 <= 1) ? kIdx0 : (isReverse2 ? 1 : 0);
+  constexpr size_t kPer2BlkIdx1 = (kIdx1 <= 1) ? kIdx1 : (isReverse2 ? 0 : 1);
+
+  constexpr size_t kIdx10 = (kPer2BlkIdx1 << 1) | kPer2BlkIdx0;
+  static_assert(kIdx10 <= 3, "kIdx10 <= 3 must be true");
+  return detail::Per2LaneBlockShuffle(hwy::SizeTag<kIdx10>(), v);
+}
+
+template <size_t kIdx3, size_t kIdx2, size_t kIdx1, size_t kIdx0, class V,
+          HWY_IF_LANES_GT_D(DFromV<V>, 2)>
+HWY_API V Per4LaneBlockShuffle(V v) {
+  static_assert(kIdx0 <= 3, "kIdx0 <= 3 must be true");
+  static_assert(kIdx1 <= 3, "kIdx1 <= 3 must be true");
+  static_assert(kIdx2 <= 3, "kIdx2 <= 3 must be true");
+  static_assert(kIdx3 <= 3, "kIdx3 <= 3 must be true");
+
+  constexpr size_t kIdx3210 =
+      (kIdx3 << 6) | (kIdx2 << 4) | (kIdx1 << 2) | kIdx0;
+  return detail::Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210>(), v);
+}
+#endif
+
+// ------------------------------ Blocks
+
+template <class D>
+HWY_API size_t Blocks(D d) {
+  return (d.MaxBytes() <= 16) ? 1 : ((Lanes(d) * sizeof(TFromD<D>) + 15) / 16);
+}
+
+// ------------------------------ Block insert/extract/broadcast ops
+#if (defined(HWY_NATIVE_BLK_INSERT_EXTRACT) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_BLK_INSERT_EXTRACT
+#undef HWY_NATIVE_BLK_INSERT_EXTRACT
+#else
+#define HWY_NATIVE_BLK_INSERT_EXTRACT
+#endif
+
+template <int kBlockIdx, class V, HWY_IF_V_SIZE_LE_V(V, 16)>
+HWY_API V InsertBlock(V /*v*/, V blk_to_insert) {
+  static_assert(kBlockIdx == 0, "Invalid block index");
+  return blk_to_insert;
+}
+
+template <int kBlockIdx, class V, HWY_IF_V_SIZE_LE_V(V, 16)>
+HWY_API V ExtractBlock(V v) {
+  static_assert(kBlockIdx == 0, "Invalid block index");
+  return v;
+}
+
+template <int kBlockIdx, class V, HWY_IF_V_SIZE_LE_V(V, 16)>
+HWY_API V BroadcastBlock(V v) {
+  static_assert(kBlockIdx == 0, "Invalid block index");
+  return v;
+}
+
+#endif  // HWY_NATIVE_BLK_INSERT_EXTRACT
+
+// ------------------------------ BroadcastLane
+#if (defined(HWY_NATIVE_BROADCASTLANE) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_BROADCASTLANE
+#undef HWY_NATIVE_BROADCASTLANE
+#else
+#define HWY_NATIVE_BROADCASTLANE
+#endif
+
+template <int kLane, class V, HWY_IF_V_SIZE_LE_V(V, 16)>
+HWY_API V BroadcastLane(V v) {
+  return Broadcast<kLane>(v);
+}
+
+#endif  // HWY_NATIVE_BROADCASTLANE
+
+// ------------------------------ Slide1Up and Slide1Down
+#if (defined(HWY_NATIVE_SLIDE1_UP_DOWN) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_SLIDE1_UP_DOWN
+#undef HWY_NATIVE_SLIDE1_UP_DOWN
+#else
+#define HWY_NATIVE_SLIDE1_UP_DOWN
+#endif
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> /*v*/) {
+  return Zero(d);
+}
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> /*v*/) {
+  return Zero(d);
+}
+
+#if HWY_TARGET != HWY_SCALAR
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  return ShiftLeftLanes<1>(d, v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  return ShiftRightLanes<1>(d, v);
+}
+#endif  // HWY_TARGET != HWY_SCALAR
+
+#endif  // HWY_NATIVE_SLIDE1_UP_DOWN
+
+// ------------------------------ SlideUpBlocks
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> SlideUpBlocks(D /*d*/, VFromD<D> v) {
+  static_assert(kBlocks == 0, "kBlocks == 0 must be true");
+  return v;
+}
+
+#if HWY_HAVE_SCALABLE || HWY_TARGET == HWY_SVE_256
+template <int kBlocks, class D, HWY_IF_V_SIZE_GT_D(D, 16)>
+HWY_API VFromD<D> SlideUpBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && static_cast<size_t>(kBlocks) < d.MaxBlocks(),
+                "kBlocks must be between 0 and d.MaxBlocks() - 1");
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  return SlideUpLanes(d, v, static_cast<size_t>(kBlocks) * kLanesPerBlock);
+}
+#endif
+
+// ------------------------------ SlideDownBlocks
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> SlideDownBlocks(D /*d*/, VFromD<D> v) {
+  static_assert(kBlocks == 0, "kBlocks == 0 must be true");
+  return v;
+}
+
+#if HWY_HAVE_SCALABLE || HWY_TARGET == HWY_SVE_256
+template <int kBlocks, class D, HWY_IF_V_SIZE_GT_D(D, 16)>
+HWY_API VFromD<D> SlideDownBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && static_cast<size_t>(kBlocks) < d.MaxBlocks(),
+                "kBlocks must be between 0 and d.MaxBlocks() - 1");
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  return SlideDownLanes(d, v, static_cast<size_t>(kBlocks) * kLanesPerBlock);
+}
+#endif
+
+// ================================================== Operator wrapper
+
+// SVE* and RVV currently cannot define operators and have already defined
+// (only) the corresponding functions such as Add.
+#if (defined(HWY_NATIVE_OPERATOR_REPLACEMENTS) == defined(HWY_TARGET_TOGGLE))
+#ifdef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#undef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#else
+#define HWY_NATIVE_OPERATOR_REPLACEMENTS
+#endif
+
+template <class V>
+HWY_API V Add(V a, V b) {
+  return a + b;
+}
+template <class V>
+HWY_API V Sub(V a, V b) {
+  return a - b;
+}
+
+template <class V>
+HWY_API V Mul(V a, V b) {
+  return a * b;
+}
+template <class V>
+HWY_API V Div(V a, V b) {
+  return a / b;
+}
+
+template <class V>
+V Shl(V a, V b) {
+  return a << b;
+}
+template <class V>
+V Shr(V a, V b) {
+  return a >> b;
+}
+
+template <class V>
+HWY_API auto Eq(V a, V b) -> decltype(a == b) {
+  return a == b;
+}
+template <class V>
+HWY_API auto Ne(V a, V b) -> decltype(a == b) {
+  return a != b;
+}
+template <class V>
+HWY_API auto Lt(V a, V b) -> decltype(a == b) {
+  return a < b;
+}
+
+template <class V>
+HWY_API auto Gt(V a, V b) -> decltype(a == b) {
+  return a > b;
+}
+template <class V>
+HWY_API auto Ge(V a, V b) -> decltype(a == b) {
+  return a >= b;
+}
+
+template <class V>
+HWY_API auto Le(V a, V b) -> decltype(a == b) {
+  return a <= b;
+}
+
+#endif  // HWY_NATIVE_OPERATOR_REPLACEMENTS
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/ppc_vsx-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/ppc_vsx-inl.h
new file mode 100644
index 0000000000..dfa990388e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/ppc_vsx-inl.h
@@ -0,0 +1,5339 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit vectors for VSX
+// External include guard in highway.h - see comment there.
+
+#pragma push_macro("vector")
+#pragma push_macro("pixel")
+#pragma push_macro("bool")
+
+#undef vector
+#undef pixel
+#undef bool
+
+#include <altivec.h>
+
+#pragma pop_macro("vector")
+#pragma pop_macro("pixel")
+#pragma pop_macro("bool")
+
+#include "hwy/ops/shared-inl.h"
+
+// clang's altivec.h gates some intrinsics behind #ifdef __POWER10_VECTOR__, and
+// some GCC do the same for _ARCH_PWR10.
+// This means we can only use POWER10-specific intrinsics in static dispatch
+// mode (where the -mpower10-vector compiler flag is passed). Same for PPC9.
+// On other compilers, the usual target check is sufficient.
+#if HWY_TARGET <= HWY_PPC9 && \
+    (defined(_ARCH_PWR9) || defined(__POWER9_VECTOR__))
+#define HWY_PPC_HAVE_9 1
+#else
+#define HWY_PPC_HAVE_9 0
+#endif
+
+#if HWY_TARGET <= HWY_PPC10 && \
+    (defined(_ARCH_PWR10) || defined(__POWER10_VECTOR__))
+#define HWY_PPC_HAVE_10 1
+#else
+#define HWY_PPC_HAVE_10 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+template <typename T>
+struct Raw128;
+
+// Each Raw128 specialization defines the following typedefs:
+// - type:
+//   the backing Altivec/VSX raw vector type of the Vec128<T, N> type
+// - RawBoolVec:
+//   the backing Altivec/VSX raw __bool vector type of the Mask128<T, N> type
+// - RawT:
+//   the lane type for intrinsics, in particular vec_splat
+// - AlignedRawVec:
+//   the 128-bit GCC/Clang vector type for aligned loads/stores
+// - UnalignedRawVec:
+//   the 128-bit GCC/Clang vector type for unaligned loads/stores
+#define HWY_VSX_RAW128(LANE_TYPE, RAW_VECT_LANE_TYPE, RAW_BOOL_VECT_LANE_TYPE) \
+  template <>                                                                  \
+  struct Raw128<LANE_TYPE> {                                                   \
+    using type = __vector RAW_VECT_LANE_TYPE;                                  \
+    using RawBoolVec = __vector __bool RAW_BOOL_VECT_LANE_TYPE;                \
+    using RawT = RAW_VECT_LANE_TYPE;                                           \
+    typedef LANE_TYPE AlignedRawVec                                            \
+        __attribute__((__vector_size__(16), __aligned__(16), __may_alias__));  \
+    typedef LANE_TYPE UnalignedRawVec __attribute__((                          \
+        __vector_size__(16), __aligned__(alignof(LANE_TYPE)), __may_alias__)); \
+  };
+
+HWY_VSX_RAW128(int8_t, signed char, char)
+HWY_VSX_RAW128(uint8_t, unsigned char, char)
+HWY_VSX_RAW128(int16_t, signed short, short)     // NOLINT(runtime/int)
+HWY_VSX_RAW128(uint16_t, unsigned short, short)  // NOLINT(runtime/int)
+HWY_VSX_RAW128(int32_t, signed int, int)
+HWY_VSX_RAW128(uint32_t, unsigned int, int)
+HWY_VSX_RAW128(int64_t, signed long long, long long)     // NOLINT(runtime/int)
+HWY_VSX_RAW128(uint64_t, unsigned long long, long long)  // NOLINT(runtime/int)
+HWY_VSX_RAW128(float, float, int)
+HWY_VSX_RAW128(double, double, long long)  // NOLINT(runtime/int)
+
+template <>
+struct Raw128<bfloat16_t> : public Raw128<uint16_t> {};
+
+template <>
+struct Raw128<float16_t> : public Raw128<uint16_t> {};
+
+#undef HWY_VSX_RAW128
+
+}  // namespace detail
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename detail::Raw128<T>::type;
+
+ public:
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = N;  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T>
+using Vec64 = Vec128<T, 8 / sizeof(T)>;
+
+template <typename T>
+using Vec32 = Vec128<T, 4 / sizeof(T)>;
+
+template <typename T>
+using Vec16 = Vec128<T, 2 / sizeof(T)>;
+
+// FF..FF or 0.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Mask128 {
+  typename detail::Raw128<T>::RawBoolVec raw;
+
+  using PrivateT = T;                     // only for DFromM
+  static constexpr size_t kPrivateN = N;  // only for DFromM
+};
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class M>
+using DFromM = Simd<typename M::PrivateT, M::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ Zero
+
+// Returns an all-zero vector/part.
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  // There is no vec_splats for 64-bit, so we cannot rely on casting the 0
+  // argument in order to select the correct overload. We instead cast the
+  // return vector type; see also the comment in BitCast.
+  return Vec128<T, HWY_MAX_LANES_D(D)>{
+      reinterpret_cast<typename detail::Raw128<T>::type>(vec_splats(0))};
+}
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ BitCast
+
+template <class D, typename FromT>
+HWY_API VFromD<D> BitCast(D /*d*/,
+                          Vec128<FromT, Repartition<FromT, D>().MaxLanes()> v) {
+  // C-style casts are not sufficient when compiling with
+  // -fno-lax-vector-conversions, which will be the future default in Clang,
+  // but reinterpret_cast is.
+  return VFromD<D>{
+      reinterpret_cast<typename detail::Raw128<TFromD<D>>::type>(v.raw)};
+}
+
+// ------------------------------ ResizeBitCast
+
+template <class D, typename FromV>
+HWY_API VFromD<D> ResizeBitCast(D /*d*/, FromV v) {
+  // C-style casts are not sufficient when compiling with
+  // -fno-lax-vector-conversions, which will be the future default in Clang,
+  // but reinterpret_cast is.
+  return VFromD<D>{
+      reinterpret_cast<typename detail::Raw128<TFromD<D>>::type>(v.raw)};
+}
+
+// ------------------------------ Set
+
+// Returns a vector/part with all lanes set to "t".
+template <class D, HWY_IF_NOT_SPECIAL_FLOAT(TFromD<D>)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  using RawLane = typename detail::Raw128<TFromD<D>>::RawT;
+  return VFromD<D>{vec_splats(static_cast<RawLane>(t))};
+}
+
+// Returns a vector with uninitialized elements.
+template <class D>
+HWY_API VFromD<D> Undefined(D d) {
+#if HWY_COMPILER_GCC_ACTUAL
+  // Suppressing maybe-uninitialized both here and at the caller does not work,
+  // so initialize.
+  return Zero(d);
+#else
+  HWY_DIAGNOSTICS(push)
+  HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+  typename detail::Raw128<TFromD<D>>::type raw;
+  return VFromD<decltype(d)>{raw};
+  HWY_DIAGNOSTICS(pop)
+#endif
+}
+
+// ------------------------------ GetLane
+
+// Gets the single value stored in a vector/part.
+
+template <typename T, size_t N>
+HWY_API T GetLane(Vec128<T, N> v) {
+  return static_cast<T>(v.raw[0]);
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ And
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{vec_and(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> not_mask, Vec128<T, N> mask) {
+  const DFromV<decltype(mask)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(
+      d, VU{vec_andc(BitCast(du, mask).raw, BitCast(du, not_mask).raw)});
+}
+
+// ------------------------------ Or
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{vec_or(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+
+// ------------------------------ Xor
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{vec_xor(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+
+// ------------------------------ Not
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{vec_nor(BitCast(du, v).raw, BitCast(du, v).raw)});
+}
+
+// ------------------------------ IsConstantRawAltivecVect
+namespace detail {
+
+template <class RawV>
+static HWY_INLINE bool IsConstantRawAltivecVect(
+    hwy::SizeTag<1> /* lane_size_tag */, RawV v) {
+  return __builtin_constant_p(v[0]) && __builtin_constant_p(v[1]) &&
+         __builtin_constant_p(v[2]) && __builtin_constant_p(v[3]) &&
+         __builtin_constant_p(v[4]) && __builtin_constant_p(v[5]) &&
+         __builtin_constant_p(v[6]) && __builtin_constant_p(v[7]) &&
+         __builtin_constant_p(v[8]) && __builtin_constant_p(v[9]) &&
+         __builtin_constant_p(v[10]) && __builtin_constant_p(v[11]) &&
+         __builtin_constant_p(v[12]) && __builtin_constant_p(v[13]) &&
+         __builtin_constant_p(v[14]) && __builtin_constant_p(v[15]);
+}
+
+template <class RawV>
+static HWY_INLINE bool IsConstantRawAltivecVect(
+    hwy::SizeTag<2> /* lane_size_tag */, RawV v) {
+  return __builtin_constant_p(v[0]) && __builtin_constant_p(v[1]) &&
+         __builtin_constant_p(v[2]) && __builtin_constant_p(v[3]) &&
+         __builtin_constant_p(v[4]) && __builtin_constant_p(v[5]) &&
+         __builtin_constant_p(v[6]) && __builtin_constant_p(v[7]);
+}
+
+template <class RawV>
+static HWY_INLINE bool IsConstantRawAltivecVect(
+    hwy::SizeTag<4> /* lane_size_tag */, RawV v) {
+  return __builtin_constant_p(v[0]) && __builtin_constant_p(v[1]) &&
+         __builtin_constant_p(v[2]) && __builtin_constant_p(v[3]);
+}
+
+template <class RawV>
+static HWY_INLINE bool IsConstantRawAltivecVect(
+    hwy::SizeTag<8> /* lane_size_tag */, RawV v) {
+  return __builtin_constant_p(v[0]) && __builtin_constant_p(v[1]);
+}
+
+template <class RawV>
+static HWY_INLINE bool IsConstantRawAltivecVect(RawV v) {
+  return IsConstantRawAltivecVect(hwy::SizeTag<sizeof(decltype(v[0]))>(), v);
+}
+
+}  // namespace detail
+
+// ------------------------------ TernaryLogic
+#if HWY_PPC_HAVE_10
+namespace detail {
+
+// NOTE: the kTernLogOp bits of the PPC10 TernaryLogic operation are in reverse
+// order of the kTernLogOp bits of AVX3
+// _mm_ternarylogic_epi64(a, b, c, kTernLogOp)
+template <uint8_t kTernLogOp, class V>
+HWY_INLINE V TernaryLogic(V a, V b, V c) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const auto a_raw = BitCast(du, a).raw;
+  const auto b_raw = BitCast(du, b).raw;
+  const auto c_raw = BitCast(du, c).raw;
+
+#if HWY_COMPILER_GCC_ACTUAL
+  // Use inline assembly on GCC to work around GCC compiler bug
+  typename detail::Raw128<TFromV<VU>>::type raw_ternlog_result;
+  __asm__("xxeval %x0,%x1,%x2,%x3,%4"
+          : "=wa"(raw_ternlog_result)
+          : "wa"(a_raw), "wa"(b_raw), "wa"(c_raw),
+            "n"(static_cast<unsigned>(kTernLogOp))
+          :);
+#else
+  const auto raw_ternlog_result =
+      vec_ternarylogic(a_raw, b_raw, c_raw, kTernLogOp);
+#endif
+
+  return BitCast(d, VU{raw_ternlog_result});
+}
+
+}  // namespace detail
+#endif  // HWY_PPC_HAVE_10
+
+// ------------------------------ Xor3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+#if HWY_PPC_HAVE_10
+#if defined(__OPTIMIZE__)
+  if (static_cast<int>(detail::IsConstantRawAltivecVect(x1.raw)) +
+          static_cast<int>(detail::IsConstantRawAltivecVect(x2.raw)) +
+          static_cast<int>(detail::IsConstantRawAltivecVect(x3.raw)) >=
+      2) {
+    return Xor(x1, Xor(x2, x3));
+  } else  // NOLINT
+#endif
+  {
+    return detail::TernaryLogic<0x69>(x1, x2, x3);
+  }
+#else
+  return Xor(x1, Xor(x2, x3));
+#endif
+}
+
+// ------------------------------ Or3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or3(Vec128<T, N> o1, Vec128<T, N> o2, Vec128<T, N> o3) {
+#if HWY_PPC_HAVE_10
+#if defined(__OPTIMIZE__)
+  if (static_cast<int>(detail::IsConstantRawAltivecVect(o1.raw)) +
+          static_cast<int>(detail::IsConstantRawAltivecVect(o2.raw)) +
+          static_cast<int>(detail::IsConstantRawAltivecVect(o3.raw)) >=
+      2) {
+    return Or(o1, Or(o2, o3));
+  } else  // NOLINT
+#endif
+  {
+    return detail::TernaryLogic<0x7F>(o1, o2, o3);
+  }
+#else
+  return Or(o1, Or(o2, o3));
+#endif
+}
+
+// ------------------------------ OrAnd
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OrAnd(Vec128<T, N> o, Vec128<T, N> a1, Vec128<T, N> a2) {
+#if HWY_PPC_HAVE_10
+#if defined(__OPTIMIZE__)
+  if (detail::IsConstantRawAltivecVect(a1.raw) &&
+      detail::IsConstantRawAltivecVect(a2.raw)) {
+    return Or(o, And(a1, a2));
+  } else  // NOLINT
+#endif
+  {
+    return detail::TernaryLogic<0x1F>(o, a1, a2);
+  }
+#else
+  return Or(o, And(a1, a2));
+#endif
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfVecThenElse(Vec128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  const DFromV<decltype(yes)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{vec_sel(BitCast(du, no).raw, BitCast(du, yes).raw,
+                                      BitCast(du, mask).raw)});
+}
+
+// ------------------------------ BitwiseIfThenElse
+
+#ifdef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#undef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#else
+#define HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#endif
+
+template <class V>
+HWY_API V BitwiseIfThenElse(V mask, V yes, V no) {
+  return IfVecThenElse(mask, yes, no);
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(Vec128<T, N> a, Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(Vec128<T, N> a, Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(Vec128<T, N> a, Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ================================================== SIGN
+
+// ------------------------------ Neg
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_INLINE Vec128<T, N> Neg(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_neg(v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return Xor(v, SignBit(DFromV<decltype(v)>()));
+}
+
+// ------------------------------ Abs
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <class T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> Abs(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_abs(v.raw)};
+}
+
+// ------------------------------ CopySign
+
+template <size_t N>
+HWY_API Vec128<float, N> CopySign(Vec128<float, N> magn,
+                                  Vec128<float, N> sign) {
+  // Work around compiler bugs that are there with vec_cpsgn on older versions
+  // of GCC/Clang
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200
+  return Vec128<float, N>{__builtin_vec_copysign(magn.raw, sign.raw)};
+#elif HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1200 && \
+    HWY_HAS_BUILTIN(__builtin_vsx_xvcpsgnsp)
+  return Vec128<float, N>{__builtin_vsx_xvcpsgnsp(magn.raw, sign.raw)};
+#else
+  return Vec128<float, N>{vec_cpsgn(sign.raw, magn.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> CopySign(Vec128<double, N> magn,
+                                   Vec128<double, N> sign) {
+  // Work around compiler bugs that are there with vec_cpsgn on older versions
+  // of GCC/Clang
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200
+  return Vec128<double, N>{__builtin_vec_copysign(magn.raw, sign.raw)};
+#elif HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1200 && \
+    HWY_HAS_BUILTIN(__builtin_vsx_xvcpsgndp)
+  return Vec128<double, N>{__builtin_vsx_xvcpsgndp(magn.raw, sign.raw)};
+#else
+  return Vec128<double, N>{vec_cpsgn(sign.raw, magn.raw)};
+#endif
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(Vec128<T, N> abs, Vec128<T, N> sign) {
+  // PPC8 can also handle abs < 0, so no extra action needed.
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  return CopySign(abs, sign);
+}
+
+// ================================================== MEMORY (1)
+
+// Note: type punning is safe because the types are tagged with may_alias.
+// (https://godbolt.org/z/fqrWjfjsP)
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API Vec128<T> Load(D /* tag */, const T* HWY_RESTRICT aligned) {
+  using LoadRaw = typename detail::Raw128<T>::AlignedRawVec;
+  const LoadRaw* HWY_RESTRICT p = reinterpret_cast<const LoadRaw*>(aligned);
+  using ResultRaw = typename detail::Raw128<T>::type;
+  return Vec128<T>{reinterpret_cast<ResultRaw>(*p)};
+}
+
+// Any <= 64 bit
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), typename T = TFromD<D>>
+HWY_API VFromD<D> Load(D d, const T* HWY_RESTRICT p) {
+  using BitsT = UnsignedFromSize<d.MaxBytes()>;
+
+  BitsT bits;
+  const Repartition<BitsT, decltype(d)> d_bits;
+  CopyBytes<d.MaxBytes()>(p, &bits);
+  return BitCast(d, Set(d_bits, bits));
+}
+
+// ================================================== MASK
+
+// ------------------------------ Mask
+
+// Mask and Vec are both backed by vector types (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(Vec128<T, N> v) {
+  using Raw = typename detail::Raw128<T>::RawBoolVec;
+  return Mask128<T, N>{reinterpret_cast<Raw>(v.raw)};
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(Mask128<T, N> v) {
+  return Vec128<T, N>{
+      reinterpret_cast<typename detail::Raw128<T>::type>(v.raw)};
+}
+
+template <class D>
+HWY_API VFromD<D> VecFromMask(D /* tag */, MFromD<D> v) {
+  return VFromD<D>{
+      reinterpret_cast<typename detail::Raw128<TFromD<D>>::type>(v.raw)};
+}
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  const DFromV<decltype(yes)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{vec_sel(
+                        BitCast(du, no).raw, BitCast(du, yes).raw, mask.raw)});
+}
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  const DFromV<decltype(yes)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d,
+                 VFromD<decltype(du)>{vec_and(BitCast(du, yes).raw, mask.raw)});
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  const DFromV<decltype(no)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d,
+                 VFromD<decltype(du)>{vec_andc(BitCast(du, no).raw, mask.raw)});
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(Mask128<T, N> m) {
+  return Mask128<T, N>{vec_nor(m.raw, m.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(Mask128<T, N> a, Mask128<T, N> b) {
+  return Mask128<T, N>{vec_and(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(Mask128<T, N> a, Mask128<T, N> b) {
+  return Mask128<T, N>{vec_andc(b.raw, a.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(Mask128<T, N> a, Mask128<T, N> b) {
+  return Mask128<T, N>{vec_or(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(Mask128<T, N> a, Mask128<T, N> b) {
+  return Mask128<T, N>{vec_xor(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(Mask128<T, N> a, Mask128<T, N> b) {
+  return Mask128<T, N>{vec_nor(a.raw, b.raw)};
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> BroadcastSignBit(Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>{
+      vec_sra(v.raw, vec_splats(static_cast<unsigned char>(7)))};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> BroadcastSignBit(Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{
+      vec_sra(v.raw, vec_splats(static_cast<unsigned short>(15)))};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> BroadcastSignBit(Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{vec_sra(v.raw, vec_splats(31u))};
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> BroadcastSignBit(Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{vec_sra(v.raw, vec_splats(63ULL))};
+}
+
+// ------------------------------ ShiftLeftSame
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> ShiftLeftSame(Vec128<T, N> v, const int bits) {
+  using TU = typename detail::Raw128<MakeUnsigned<T>>::RawT;
+  return Vec128<T, N>{vec_sl(v.raw, vec_splats(static_cast<TU>(bits)))};
+}
+
+// ------------------------------ ShiftRightSame
+
+template <typename T, size_t N, HWY_IF_UNSIGNED(T)>
+HWY_API Vec128<T, N> ShiftRightSame(Vec128<T, N> v, const int bits) {
+  using TU = typename detail::Raw128<MakeUnsigned<T>>::RawT;
+  return Vec128<T, N>{vec_sr(v.raw, vec_splats(static_cast<TU>(bits)))};
+}
+
+template <typename T, size_t N, HWY_IF_SIGNED(T)>
+HWY_API Vec128<T, N> ShiftRightSame(Vec128<T, N> v, const int bits) {
+  using TU = typename detail::Raw128<MakeUnsigned<T>>::RawT;
+  return Vec128<T, N>{vec_sra(v.raw, vec_splats(static_cast<TU>(bits)))};
+}
+
+// ------------------------------ ShiftLeft
+
+template <int kBits, typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> ShiftLeft(Vec128<T, N> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return ShiftLeftSame(v, kBits);
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits, typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> ShiftRight(Vec128<T, N> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return ShiftRightSame(v, kBits);
+}
+
+// ================================================== SWIZZLE (1)
+
+// ------------------------------ TableLookupBytes
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec128<T, N> bytes,
+                                        Vec128<TI, NI> from) {
+  const Repartition<uint8_t, DFromV<decltype(from)>> du8_from;
+  return Vec128<TI, NI>{reinterpret_cast<typename detail::Raw128<TI>::type>(
+      vec_perm(bytes.raw, bytes.raw, BitCast(du8_from, from).raw))};
+}
+
+// ------------------------------ TableLookupBytesOr0
+// For all vector widths; Altivec/VSX needs zero out
+template <class V, class VI>
+HWY_API VI TableLookupBytesOr0(const V bytes, const VI from) {
+  const DFromV<VI> di;
+  Repartition<int8_t, decltype(di)> di8;
+  const VI zeroOutMask = BitCast(di, BroadcastSignBit(BitCast(di8, from)));
+  return AndNot(zeroOutMask, TableLookupBytes(bytes, from));
+}
+
+// ------------------------------ Reverse
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_GT_D(D, 1)>
+HWY_API Vec128<T> Reverse(D /* tag */, Vec128<T> v) {
+  return Vec128<T>{vec_reve(v.raw)};
+}
+
+// ------------------------------ Shuffles (Reverse)
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shuffle2301(Vec128<T, N> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  const __vector unsigned char kShuffle = {4,  5,  6,  7,  0, 1, 2,  3,
+                                           12, 13, 14, 15, 8, 9, 10, 11};
+  return Vec128<T, N>{vec_perm(v.raw, v.raw, kShuffle)};
+}
+
+// These are used by generic_ops-inl to implement LoadInterleaved3. As with
+// Intel's shuffle* intrinsics and InterleaveLower, the lower half of the output
+// comes from the first argument.
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo2301(Vec32<T> a, Vec32<T> b) {
+  const __vector unsigned char kShuffle16 = {1, 0, 19, 18};
+  return Vec32<T>{vec_perm(a.raw, b.raw, kShuffle16)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo2301(Vec64<T> a, Vec64<T> b) {
+  const __vector unsigned char kShuffle = {2, 3, 0, 1, 22, 23, 20, 21};
+  return Vec64<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo2301(Vec128<T> a, Vec128<T> b) {
+  const __vector unsigned char kShuffle = {4,  5,  6,  7,  0,  1,  2,  3,
+                                           28, 29, 30, 31, 24, 25, 26, 27};
+  return Vec128<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo1230(Vec32<T> a, Vec32<T> b) {
+  const __vector unsigned char kShuffle = {0, 3, 18, 17};
+  return Vec32<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo1230(Vec64<T> a, Vec64<T> b) {
+  const __vector unsigned char kShuffle = {0, 1, 6, 7, 20, 21, 18, 19};
+  return Vec64<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo1230(Vec128<T> a, Vec128<T> b) {
+  const __vector unsigned char kShuffle = {0,  1,  2,  3,  12, 13, 14, 15,
+                                           24, 25, 26, 27, 20, 21, 22, 23};
+  return Vec128<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo3012(Vec32<T> a, Vec32<T> b) {
+  const __vector unsigned char kShuffle = {2, 1, 16, 19};
+  return Vec32<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo3012(Vec64<T> a, Vec64<T> b) {
+  const __vector unsigned char kShuffle = {4, 5, 2, 3, 16, 17, 22, 23};
+  return Vec64<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo3012(Vec128<T> a, Vec128<T> b) {
+  const __vector unsigned char kShuffle = {8,  9,  10, 11, 4,  5,  6,  7,
+                                           16, 17, 18, 19, 28, 29, 30, 31};
+  return Vec128<T>{vec_perm(a.raw, b.raw, kShuffle)};
+}
+
+}  // namespace detail
+
+// Swap 64-bit halves
+template <class T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Shuffle1032(Vec128<T> v) {
+  const Full128<T> d;
+  const Full128<uint64_t> du64;
+  return BitCast(d, Reverse(du64, BitCast(du64, v)));
+}
+template <class T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Shuffle01(Vec128<T> v) {
+  return Reverse(Full128<T>(), v);
+}
+
+// Rotate right 32 bits
+template <class T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Shuffle0321(Vec128<T> v) {
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<T>{vec_sld(v.raw, v.raw, 12)};
+#else
+  return Vec128<T>{vec_sld(v.raw, v.raw, 4)};
+#endif
+}
+// Rotate left 32 bits
+template <class T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Shuffle2103(Vec128<T> v) {
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<T>{vec_sld(v.raw, v.raw, 4)};
+#else
+  return Vec128<T>{vec_sld(v.raw, v.raw, 12)};
+#endif
+}
+
+template <class T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Shuffle0123(Vec128<T> v) {
+  return Reverse(Full128<T>(), v);
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <class DTo, typename TFrom, size_t NFrom>
+HWY_API MFromD<DTo> RebindMask(DTo /*dto*/, Mask128<TFrom, NFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>{m.raw};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Equality
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator==(Vec128<T, N> a, Vec128<T, N> b) {
+  return Mask128<T, N>{vec_cmpeq(a.raw, b.raw)};
+}
+
+// ------------------------------ Inequality
+
+// This cannot have T as a template argument, otherwise it is not more
+// specialized than rewritten operator== in C++20, leading to compile
+// errors: https://gcc.godbolt.org/z/xsrPhPvPT.
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator!=(Vec128<uint8_t, N> a,
+                                       Vec128<uint8_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<uint8_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator!=(Vec128<uint16_t, N> a,
+                                        Vec128<uint16_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<uint16_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator!=(Vec128<uint32_t, N> a,
+                                        Vec128<uint32_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<uint32_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator!=(Vec128<uint64_t, N> a,
+                                        Vec128<uint64_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator!=(Vec128<int8_t, N> a,
+                                      Vec128<int8_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<int8_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator!=(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<int16_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator!=(Vec128<int32_t, N> a,
+                                       Vec128<int32_t, N> b) {
+#if HWY_PPC_HAVE_9
+  return Mask128<int32_t, N>{vec_cmpne(a.raw, b.raw)};
+#else
+  return Not(a == b);
+#endif
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator!=(Vec128<int64_t, N> a,
+                                       Vec128<int64_t, N> b) {
+  return Not(a == b);
+}
+
+template <size_t N>
+HWY_API Mask128<float, N> operator!=(Vec128<float, N> a, Vec128<float, N> b) {
+  return Not(a == b);
+}
+
+template <size_t N>
+HWY_API Mask128<double, N> operator!=(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Not(a == b);
+}
+
+// ------------------------------ Strict inequality
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_INLINE Mask128<T, N> operator>(Vec128<T, N> a, Vec128<T, N> b) {
+  return Mask128<T, N>{vec_cmpgt(a.raw, b.raw)};
+}
+
+// ------------------------------ Weak inequality
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  return Mask128<T, N>{vec_cmpge(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  return Not(b > a);
+}
+
+// ------------------------------ Reversed comparisons
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Mask128<T, N> operator<(Vec128<T, N> a, Vec128<T, N> b) {
+  return b > a;
+}
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Mask128<T, N> operator<=(Vec128<T, N> a, Vec128<T, N> b) {
+  return b >= a;
+}
+
+// ================================================== MEMORY (2)
+
+// ------------------------------ Load
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API Vec128<T> LoadU(D /* tag */, const T* HWY_RESTRICT p) {
+  using LoadRaw = typename detail::Raw128<T>::UnalignedRawVec;
+  const LoadRaw* HWY_RESTRICT praw = reinterpret_cast<const LoadRaw*>(p);
+  using ResultRaw = typename detail::Raw128<T>::type;
+  return Vec128<T>{reinterpret_cast<ResultRaw>(*praw)};
+}
+
+// For < 128 bit, LoadU == Load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), typename T = TFromD<D>>
+HWY_API VFromD<D> LoadU(D d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> LoadDup128(D d, const T* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+#if HWY_PPC_HAVE_9
+#ifdef HWY_NATIVE_LOAD_N
+#undef HWY_NATIVE_LOAD_N
+#else
+#define HWY_NATIVE_LOAD_N
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+#if HWY_COMPILER_GCC && !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(max_lanes_to_load) && max_lanes_to_load == 0) {
+    return Zero(d);
+  }
+
+  if (__builtin_constant_p(max_lanes_to_load >= HWY_MAX_LANES_D(D)) &&
+      max_lanes_to_load >= HWY_MAX_LANES_D(D)) {
+    return LoadU(d, p);
+  }
+#endif
+
+  const size_t num_of_bytes_to_load =
+      HWY_MIN(max_lanes_to_load, HWY_MAX_LANES_D(D)) * sizeof(TFromD<D>);
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(
+      d,
+      VFromD<decltype(du8)>{vec_xl_len(
+          const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(p)),
+          num_of_bytes_to_load)});
+}
+#endif
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  constexpr __vector unsigned char kU8Iota0 = {0, 1, 2,  3,  4,  5,  6,  7,
+                                               8, 9, 10, 11, 12, 13, 14, 15};
+  return BitCast(d, VFromD<RebindToUnsigned<D>>{kU8Iota0});
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_NOT_SPECIAL_FLOAT_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  constexpr __vector unsigned short kU16Iota0 = {0, 1, 2, 3, 4, 5, 6, 7};
+  return BitCast(d, VFromD<RebindToUnsigned<D>>{kU16Iota0});
+}
+
+template <class D, HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  constexpr __vector unsigned int kU32Iota0 = {0, 1, 2, 3};
+  return BitCast(d, VFromD<RebindToUnsigned<D>>{kU32Iota0});
+}
+
+template <class D, HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+  constexpr __vector unsigned long long kU64Iota0 = {0, 1};
+  return BitCast(d, VFromD<RebindToUnsigned<D>>{kU64Iota0});
+}
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  constexpr __vector float kF32Iota0 = {0.0f, 1.0f, 2.0f, 3.0f};
+  return VFromD<D>{kF32Iota0};
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  constexpr __vector double kF64Iota0 = {0.0, 1.0};
+  return VFromD<D>{kF64Iota0};
+}
+
+}  // namespace detail
+
+template <class D, typename T2>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  return detail::Iota0(d) + Set(d, static_cast<TFromD<D>>(first));
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D>
+HWY_API MFromD<D> FirstN(D d, size_t num) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return RebindMask(d, Iota(du, 0) < Set(du, static_cast<TU>(num)));
+}
+
+// ------------------------------ MaskedLoad
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d, const T* HWY_RESTRICT p) {
+  return IfThenElseZero(m, LoadU(d, p));
+}
+
+// ------------------------------ MaskedLoadOr
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const T* HWY_RESTRICT p) {
+  return IfThenElse(m, LoadU(d, p), v);
+}
+
+// ------------------------------ Store
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API void Store(Vec128<T> v, D /* tag */, T* HWY_RESTRICT aligned) {
+  using StoreRaw = typename detail::Raw128<T>::AlignedRawVec;
+  *reinterpret_cast<StoreRaw*>(aligned) = reinterpret_cast<StoreRaw>(v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API void StoreU(Vec128<T> v, D /* tag */, T* HWY_RESTRICT p) {
+  using StoreRaw = typename detail::Raw128<T>::UnalignedRawVec;
+  *reinterpret_cast<StoreRaw*>(p) = reinterpret_cast<StoreRaw>(v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), typename T = TFromD<D>>
+HWY_API void Store(VFromD<D> v, D d, T* HWY_RESTRICT p) {
+  using BitsT = UnsignedFromSize<d.MaxBytes()>;
+
+  const Repartition<BitsT, decltype(d)> d_bits;
+  const BitsT bits = GetLane(BitCast(d_bits, v));
+  CopyBytes<d.MaxBytes()>(&bits, p);
+}
+
+// For < 128 bit, StoreU == Store.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), typename T = TFromD<D>>
+HWY_API void StoreU(VFromD<D> v, D d, T* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+#if HWY_PPC_HAVE_9
+
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+#if HWY_COMPILER_GCC && !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(max_lanes_to_store) && max_lanes_to_store == 0) {
+    return;
+  }
+
+  if (__builtin_constant_p(max_lanes_to_store >= HWY_MAX_LANES_D(D)) &&
+      max_lanes_to_store >= HWY_MAX_LANES_D(D)) {
+    StoreU(v, d, p);
+    return;
+  }
+#endif
+
+  const size_t num_of_bytes_to_store =
+      HWY_MIN(max_lanes_to_store, HWY_MAX_LANES_D(D)) * sizeof(TFromD<D>);
+  const Repartition<uint8_t, decltype(d)> du8;
+  vec_xst_len(BitCast(du8, v).raw, reinterpret_cast<unsigned char*>(p),
+              num_of_bytes_to_store);
+}
+#endif
+
+// ------------------------------ BlendedStore
+
+template <class D>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  const RebindToSigned<decltype(d)> di;  // for testing mask if T=bfloat16_t.
+  using TI = TFromD<decltype(di)>;
+  alignas(16) TI buf[MaxLanes(d)];
+  alignas(16) TI mask[MaxLanes(d)];
+  Store(BitCast(di, v), di, buf);
+  Store(BitCast(di, VecFromMask(d, m)), di, mask);
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask[i]) {
+      CopySameSize(buf + i, p + i);
+    }
+  }
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> operator+(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_add(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> operator-(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_sub(a.raw, b.raw)};
+}
+
+// ------------------------------ SumsOf8
+namespace detail {
+
+// Casts nominally int32_t result to D.
+template <class D>
+HWY_INLINE VFromD<D> AltivecVsum4sbs(D d, __vector signed char a,
+                                     __vector signed int b) {
+  const Repartition<int32_t, D> di32;
+#ifdef __OPTIMIZE__
+  if (IsConstantRawAltivecVect(a) && IsConstantRawAltivecVect(b)) {
+    const int64_t sum0 =
+        static_cast<int64_t>(a[0]) + static_cast<int64_t>(a[1]) +
+        static_cast<int64_t>(a[2]) + static_cast<int64_t>(a[3]) +
+        static_cast<int64_t>(b[0]);
+    const int64_t sum1 =
+        static_cast<int64_t>(a[4]) + static_cast<int64_t>(a[5]) +
+        static_cast<int64_t>(a[6]) + static_cast<int64_t>(a[7]) +
+        static_cast<int64_t>(b[1]);
+    const int64_t sum2 =
+        static_cast<int64_t>(a[8]) + static_cast<int64_t>(a[9]) +
+        static_cast<int64_t>(a[10]) + static_cast<int64_t>(a[11]) +
+        static_cast<int64_t>(b[2]);
+    const int64_t sum3 =
+        static_cast<int64_t>(a[12]) + static_cast<int64_t>(a[13]) +
+        static_cast<int64_t>(a[14]) + static_cast<int64_t>(a[15]) +
+        static_cast<int64_t>(b[3]);
+    const int32_t sign0 = static_cast<int32_t>(sum0 >> 63);
+    const int32_t sign1 = static_cast<int32_t>(sum1 >> 63);
+    const int32_t sign2 = static_cast<int32_t>(sum2 >> 63);
+    const int32_t sign3 = static_cast<int32_t>(sum3 >> 63);
+    using Raw = typename detail::Raw128<int32_t>::type;
+    return BitCast(
+        d,
+        VFromD<decltype(di32)>{Raw{
+            (sign0 == (sum0 >> 31)) ? static_cast<int32_t>(sum0)
+                                    : static_cast<int32_t>(sign0 ^ 0x7FFFFFFF),
+            (sign1 == (sum1 >> 31)) ? static_cast<int32_t>(sum1)
+                                    : static_cast<int32_t>(sign1 ^ 0x7FFFFFFF),
+            (sign2 == (sum2 >> 31)) ? static_cast<int32_t>(sum2)
+                                    : static_cast<int32_t>(sign2 ^ 0x7FFFFFFF),
+            (sign3 == (sum3 >> 31))
+                ? static_cast<int32_t>(sum3)
+                : static_cast<int32_t>(sign3 ^ 0x7FFFFFFF)}});
+  } else  // NOLINT
+#endif
+  {
+    return BitCast(d, VFromD<decltype(di32)>{vec_vsum4sbs(a, b)});
+  }
+}
+
+// Casts nominally uint32_t result to D.
+template <class D>
+HWY_INLINE VFromD<D> AltivecVsum4ubs(D d, __vector unsigned char a,
+                                     __vector unsigned int b) {
+  const Repartition<uint32_t, D> du32;
+#ifdef __OPTIMIZE__
+  if (IsConstantRawAltivecVect(a) && IsConstantRawAltivecVect(b)) {
+    const uint64_t sum0 =
+        static_cast<uint64_t>(a[0]) + static_cast<uint64_t>(a[1]) +
+        static_cast<uint64_t>(a[2]) + static_cast<uint64_t>(a[3]) +
+        static_cast<uint64_t>(b[0]);
+    const uint64_t sum1 =
+        static_cast<uint64_t>(a[4]) + static_cast<uint64_t>(a[5]) +
+        static_cast<uint64_t>(a[6]) + static_cast<uint64_t>(a[7]) +
+        static_cast<uint64_t>(b[1]);
+    const uint64_t sum2 =
+        static_cast<uint64_t>(a[8]) + static_cast<uint64_t>(a[9]) +
+        static_cast<uint64_t>(a[10]) + static_cast<uint64_t>(a[11]) +
+        static_cast<uint64_t>(b[2]);
+    const uint64_t sum3 =
+        static_cast<uint64_t>(a[12]) + static_cast<uint64_t>(a[13]) +
+        static_cast<uint64_t>(a[14]) + static_cast<uint64_t>(a[15]) +
+        static_cast<uint64_t>(b[3]);
+    return BitCast(
+        d,
+        VFromD<decltype(du32)>{(__vector unsigned int){
+            static_cast<unsigned int>(sum0 <= 0xFFFFFFFFu ? sum0 : 0xFFFFFFFFu),
+            static_cast<unsigned int>(sum1 <= 0xFFFFFFFFu ? sum1 : 0xFFFFFFFFu),
+            static_cast<unsigned int>(sum2 <= 0xFFFFFFFFu ? sum2 : 0xFFFFFFFFu),
+            static_cast<unsigned int>(sum3 <= 0xFFFFFFFFu ? sum3
+                                                          : 0xFFFFFFFFu)}});
+  } else  // NOLINT
+#endif
+  {
+    return BitCast(d, VFromD<decltype(du32)>{vec_vsum4ubs(a, b)});
+  }
+}
+
+// Casts nominally int32_t result to D.
+template <class D>
+HWY_INLINE VFromD<D> AltivecVsum2sws(D d, __vector signed int a,
+                                     __vector signed int b) {
+  const Repartition<int32_t, D> di32;
+#ifdef __OPTIMIZE__
+  const Repartition<uint64_t, D> du64;
+  constexpr int kDestLaneOffset = HWY_IS_BIG_ENDIAN;
+  if (IsConstantRawAltivecVect(a) && __builtin_constant_p(b[kDestLaneOffset]) &&
+      __builtin_constant_p(b[kDestLaneOffset + 2])) {
+    const int64_t sum0 = static_cast<int64_t>(a[0]) +
+                         static_cast<int64_t>(a[1]) +
+                         static_cast<int64_t>(b[kDestLaneOffset]);
+    const int64_t sum1 = static_cast<int64_t>(a[2]) +
+                         static_cast<int64_t>(a[3]) +
+                         static_cast<int64_t>(b[kDestLaneOffset + 2]);
+    const int32_t sign0 = static_cast<int32_t>(sum0 >> 63);
+    const int32_t sign1 = static_cast<int32_t>(sum1 >> 63);
+    return BitCast(d, VFromD<decltype(du64)>{(__vector unsigned long long){
+                          (sign0 == (sum0 >> 31))
+                              ? static_cast<uint32_t>(sum0)
+                              : static_cast<uint32_t>(sign0 ^ 0x7FFFFFFF),
+                          (sign1 == (sum1 >> 31))
+                              ? static_cast<uint32_t>(sum1)
+                              : static_cast<uint32_t>(sign1 ^ 0x7FFFFFFF)}});
+  } else  // NOLINT
+#endif
+  {
+    __vector signed int sum;
+
+    // Inline assembly is used for vsum2sws to avoid unnecessary shuffling
+    // on little-endian PowerPC targets as the result of the vsum2sws
+    // instruction will already be in the correct lanes on little-endian
+    // PowerPC targets.
+    __asm__("vsum2sws %0,%1,%2" : "=v"(sum) : "v"(a), "v"(b));
+
+    return BitCast(d, VFromD<decltype(di32)>{sum});
+  }
+}
+
+}  // namespace detail
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N / 8> SumsOf8(Vec128<uint8_t, N> v) {
+  const Repartition<uint64_t, DFromV<decltype(v)>> du64;
+  const Repartition<int32_t, decltype(du64)> di32;
+  const RebindToUnsigned<decltype(di32)> du32;
+
+  return detail::AltivecVsum2sws(
+      du64, detail::AltivecVsum4ubs(di32, v.raw, Zero(du32).raw).raw,
+      Zero(di32).raw);
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U32_SATURATED_ADDSUB
+#undef HWY_NATIVE_U32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U32_SATURATED_ADDSUB
+#endif
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T),
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> SaturatedAdd(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_adds(a.raw, b.raw)};
+}
+
+#if HWY_PPC_HAVE_10
+
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+template <class V, HWY_IF_I64_D(DFromV<V>)>
+HWY_API V SaturatedAdd(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = Add(a, b);
+  const auto overflow_mask =
+      BroadcastSignBit(detail::TernaryLogic<0x42>(a, b, sum));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int64_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, sum);
+}
+
+#endif  // HWY_PPC_HAVE_10
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T),
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> SaturatedSub(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_subs(a.raw, b.raw)};
+}
+
+#if HWY_PPC_HAVE_10
+
+template <class V, HWY_IF_I64_D(DFromV<V>)>
+HWY_API V SaturatedSub(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = Sub(a, b);
+  const auto overflow_mask =
+      BroadcastSignBit(detail::TernaryLogic<0x18>(a, b, diff));
+  const auto overflow_result =
+      Xor(BroadcastSignBit(a), Set(d, LimitsMax<int64_t>()));
+  return IfNegativeThenElse(overflow_mask, overflow_result, diff);
+}
+
+#endif  // HWY_PPC_HAVE_10
+
+// ------------------------------ AverageRound
+
+// Returns (a + b + 1) / 2
+
+template <typename T, size_t N, HWY_IF_UNSIGNED(T),
+          HWY_IF_T_SIZE_ONE_OF(T, 0x6)>
+HWY_API Vec128<T, N> AverageRound(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_avg(a.raw, b.raw)};
+}
+
+// ------------------------------ Multiplication
+
+// Per-target flags to prevent generic_ops-inl.h defining 8/64-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> operator*(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{a.raw * b.raw};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2), HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> MulHigh(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const VFromD<decltype(dw)> p1{vec_mule(a.raw, b.raw)};
+  const VFromD<decltype(dw)> p2{vec_mulo(a.raw, b.raw)};
+#if HWY_IS_LITTLE_ENDIAN
+  const __vector unsigned char kShuffle = {2,  3,  18, 19, 6,  7,  22, 23,
+                                           10, 11, 26, 27, 14, 15, 30, 31};
+#else
+  const __vector unsigned char kShuffle = {0, 1, 16, 17, 4,  5,  20, 21,
+                                           8, 9, 24, 25, 12, 13, 28, 29};
+#endif
+  return BitCast(d, VFromD<decltype(dw)>{vec_perm(p1.raw, p2.raw, kShuffle)});
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  const Vec128<int16_t> zero = Zero(Full128<int16_t>());
+  return Vec128<int16_t, N>{vec_mradds(a.raw, b.raw, zero.raw)};
+}
+
+// Multiplies even lanes (0, 2, ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+template <typename T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulEven(Vec128<T, N> a,
+                                                 Vec128<T, N> b) {
+  return Vec128<MakeWide<T>, (N + 1) / 2>{vec_mule(a.raw, b.raw)};
+}
+
+// Multiplies odd lanes (1, 3, ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+template <typename T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulOdd(Vec128<T, N> a,
+                                                Vec128<T, N> b) {
+  return Vec128<MakeWide<T>, (N + 1) / 2>{vec_mulo(a.raw, b.raw)};
+}
+
+// ------------------------------ RotateRight
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> RotateRight(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Vec128<T, N>{vec_rl(v.raw, Set(d, kSizeInBits - kBits).raw)};
+}
+
+// ------------------------------ ZeroIfNegative (BroadcastSignBit)
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only works for float");
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+  return IfThenElse(mask, Zero(d), v);
+}
+
+// ------------------------------ IfNegativeThenElse
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+
+  const DFromV<decltype(v)> d;
+#if HWY_PPC_HAVE_10
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{vec_blendv(
+             BitCast(du, no).raw, BitCast(du, yes).raw, BitCast(du, v).raw)});
+#else
+  const RebindToSigned<decltype(d)> di;
+  return IfThenElse(MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v)))),
+                    yes, no);
+#endif
+}
+
+// generic_ops takes care of integer T.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> AbsDiff(Vec128<T, N> a, Vec128<T, N> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return Vec128<T, N>{vec_madd(mul.raw, x.raw, add.raw)};
+}
+
+// Returns add - mul * x
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  // NOTE: the vec_nmsub operation below computes -(mul * x - add),
+  // which is equivalent to add - mul * x in the round-to-nearest
+  // and round-towards-zero rounding modes
+  return Vec128<T, N>{vec_nmsub(mul.raw, x.raw, add.raw)};
+}
+
+// Returns mul * x - sub
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> MulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> sub) {
+  return Vec128<T, N>{vec_msub(mul.raw, x.raw, sub.raw)};
+}
+
+// Returns -mul * x - sub
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> NegMulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> sub) {
+  // NOTE: The vec_nmadd operation below computes -(mul * x + sub),
+  // which is equivalent to -mul * x - sub in the round-to-nearest
+  // and round-towards-zero rounding modes
+  return Vec128<T, N>{vec_nmadd(mul.raw, x.raw, sub.raw)};
+}
+
+// ------------------------------ Floating-point div
+// Approximate reciprocal
+
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> ApproximateReciprocal(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_re(v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> operator/(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_div(a.raw, b.raw)};
+}
+
+// ------------------------------ Floating-point square root
+
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+// Approximate reciprocal square root
+template <class T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> ApproximateReciprocalSqrt(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_rsqrte(v.raw)};
+}
+
+// Full precision square root
+template <class T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Sqrt(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_sqrt(v.raw)};
+}
+
+// ------------------------------ Min (Gt, IfThenElse)
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> Min(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_min(a.raw, b.raw)};
+}
+
+// ------------------------------ Max (Gt, IfThenElse)
+
+template <typename T, size_t N, HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec128<T, N> Max(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_max(a.raw, b.raw)};
+}
+
+// ------------------------------- Integer AbsDiff for PPC9/PPC10
+
+#if HWY_PPC_HAVE_9
+#ifdef HWY_NATIVE_INTEGER_ABS_DIFF
+#undef HWY_NATIVE_INTEGER_ABS_DIFF
+#else
+#define HWY_NATIVE_INTEGER_ABS_DIFF
+#endif
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API V AbsDiff(const V a, const V b) {
+  return V{vec_absd(a.raw, b.raw)};
+}
+
+template <class V, HWY_IF_U64_D(DFromV<V>)>
+HWY_API V AbsDiff(const V a, const V b) {
+  return Sub(Max(a, b), Min(a, b));
+}
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V AbsDiff(const V a, const V b) {
+  return Sub(Max(a, b), Min(a, b));
+}
+
+#endif  // HWY_PPC_HAVE_9
+
+// ================================================== MEMORY (3)
+
+// ------------------------------ Non-temporal stores
+
+template <class D>
+HWY_API void Stream(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  __builtin_prefetch(aligned, 1, 0);
+  Store(v, d, aligned);
+}
+
+// ------------------------------ Scatter in generic_ops-inl.h
+// ------------------------------ Gather in generic_ops-inl.h
+
+// ================================================== SWIZZLE (2)
+
+// ------------------------------ LowerHalf
+
+// Returns upper/lower half of a vector.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{v.raw};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>{v.raw};
+}
+
+// ------------------------------ ShiftLeftBytes
+
+// NOTE: The ShiftLeftBytes operation moves the elements of v to the right
+// by kBytes bytes and zeroes out the first kBytes bytes of v on both
+// little-endian and big-endian PPC targets
+// (same behavior as the HWY_EMU128 ShiftLeftBytes operation on both
+// little-endian and big-endian targets)
+
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftLeftBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  if (kBytes == 0) return v;
+  const auto zeros = Zero(d);
+#if HWY_IS_LITTLE_ENDIAN
+  return VFromD<D>{vec_sld(v.raw, zeros.raw, kBytes)};
+#else
+  return VFromD<D>{vec_sld(zeros.raw, v.raw, (-kBytes) & 15)};
+#endif
+}
+
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftBytes(Vec128<T, N> v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+// NOTE: The ShiftLeftLanes operation moves the elements of v to the right
+// by kLanes lanes and zeroes out the first kLanes lanes of v on both
+// little-endian and big-endian PPC targets
+// (same behavior as the HWY_EMU128 ShiftLeftLanes operation on both
+// little-endian and big-endian targets)
+
+template <int kLanes, class D, typename T = TFromD<D>>
+HWY_API VFromD<D> ShiftLeftLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(Vec128<T, N> v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+
+// NOTE: The ShiftRightBytes operation moves the elements of v to the left
+// by kBytes bytes and zeroes out the last kBytes bytes of v on both
+// little-endian and big-endian PPC targets
+// (same behavior as the HWY_EMU128 ShiftRightBytes operation on both
+// little-endian and big-endian targets)
+
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftRightBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  if (kBytes == 0) return v;
+
+  // For partial vectors, clear upper lanes so we shift in zeros.
+  if (d.MaxBytes() != 16) {
+    const Full128<TFromD<D>> dfull;
+    VFromD<decltype(dfull)> vfull{v.raw};
+    v = VFromD<D>{IfThenElseZero(FirstN(dfull, MaxLanes(d)), vfull).raw};
+  }
+
+  const auto zeros = Zero(d);
+#if HWY_IS_LITTLE_ENDIAN
+  return VFromD<D>{vec_sld(zeros.raw, v.raw, (-kBytes) & 15)};
+#else
+  return VFromD<D>{vec_sld(v.raw, zeros.raw, kBytes)};
+#endif
+}
+
+// ------------------------------ ShiftRightLanes
+
+// NOTE: The ShiftRightLanes operation moves the elements of v to the left
+// by kLanes lanes and zeroes out the last kLanes lanes of v on both
+// little-endian and big-endian PPC targets
+// (same behavior as the HWY_EMU128 ShiftRightLanes operation on both
+// little-endian and big-endian targets)
+
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftRightLanes(D d, VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  return BitCast(d, ShiftRightBytes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ UpperHalf (ShiftRightBytes)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  return LowerHalf(d, ShiftRightBytes<d.MaxBytes()>(Twice<D>(), v));
+}
+
+// ------------------------------ ExtractLane
+template <typename T, size_t N>
+HWY_API T ExtractLane(Vec128<T, N> v, size_t i) {
+  return static_cast<T>(v.raw[i]);
+}
+
+// ------------------------------ InsertLane
+template <typename T, size_t N>
+HWY_API Vec128<T, N> InsertLane(Vec128<T, N> v, size_t i, T t) {
+#if HWY_IS_LITTLE_ENDIAN
+  typename detail::Raw128<T>::type raw_result = v.raw;
+  raw_result[i] = t;
+  return Vec128<T, N>{raw_result};
+#else
+  // On ppc64be without this, mul_test fails, but swizzle_test passes.
+  DFromV<decltype(v)> d;
+  alignas(16) T lanes[16 / sizeof(T)];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+#endif
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+// NOTE: The CombineShiftRightBytes operation below moves the elements of lo to
+// the left by kBytes bytes and moves the elements of hi right by (d.MaxBytes()
+// - kBytes) bytes on both little-endian and big-endian PPC targets.
+
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API Vec128<T> CombineShiftRightBytes(D /*d*/, Vec128<T> hi, Vec128<T> lo) {
+  constexpr size_t kSize = 16;
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<T>{vec_sld(hi.raw, lo.raw, (-kBytes) & 15)};
+#else
+  return Vec128<T>{vec_sld(lo.raw, hi.raw, kBytes)};
+#endif
+}
+
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = Vec128<uint8_t>;
+  const DFromV<V8> dfull8;
+  const Repartition<TFromD<D>, decltype(dfull8)> dfull;
+  const V8 hi8{BitCast(d8, hi).raw};
+  // Move into most-significant bytes
+  const V8 lo8 = ShiftLeftBytes<16 - kSize>(V8{BitCast(d8, lo).raw});
+  const V8 r = CombineShiftRightBytes<16 - kSize + kBytes>(dfull8, hi8, lo8);
+  return VFromD<D>{BitCast(dfull, r).raw};
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T, size_t N>
+HWY_API Vec128<T, N> Broadcast(Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{vec_splat(v.raw, kLane)};
+}
+
+// ------------------------------ TableLookupLanes (Shuffle01)
+
+// Returned by SetTableIndices/IndicesFromVec for use by TableLookupLanes.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Indices128 {
+  __vector unsigned char raw;
+};
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Iota(d8, 0);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+#else
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
+#endif
+  return VFromD<decltype(d8)>{kBroadcastLaneBytes};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+#else
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15};
+#endif
+  return VFromD<decltype(d8)>{kBroadcastLaneBytes};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+#else
+  constexpr __vector unsigned char kBroadcastLaneBytes = {
+      7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15};
+#endif
+  return VFromD<decltype(d8)>{kBroadcastLaneBytes};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Zero(d8);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr __vector unsigned char kByteOffsets = {0, 1, 0, 1, 0, 1, 0, 1,
+                                                   0, 1, 0, 1, 0, 1, 0, 1};
+  return VFromD<decltype(d8)>{kByteOffsets};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr __vector unsigned char kByteOffsets = {0, 1, 2, 3, 0, 1, 2, 3,
+                                                   0, 1, 2, 3, 0, 1, 2, 3};
+  return VFromD<decltype(d8)>{kByteOffsets};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr __vector unsigned char kByteOffsets = {0, 1, 2, 3, 4, 5, 6, 7,
+                                                   0, 1, 2, 3, 4, 5, 6, 7};
+  return VFromD<decltype(d8)>{kByteOffsets};
+}
+
+}  // namespace detail
+
+template <class D, typename TI, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Indices128<TFromD<D>, MaxLanes(D())>{BitCast(d8, vec).raw};
+}
+
+template <class D, typename TI,
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = VFromD<decltype(d8)>;
+
+  // Broadcast each lane index to all bytes of T and shift to bytes
+  const V8 lane_indices = TableLookupBytes(
+      BitCast(d8, vec), detail::IndicesFromVecBroadcastLaneBytes(d));
+  constexpr int kIndexShiftAmt = static_cast<int>(FloorLog2(sizeof(T)));
+  const V8 byte_indices = ShiftLeft<kIndexShiftAmt>(lane_indices);
+  const V8 sum = Add(byte_indices, detail::IndicesFromVecByteOffsets(d));
+  return Indices128<TFromD<D>, MaxLanes(D())>{sum.raw};
+}
+
+template <class D, typename TI>
+HWY_API Indices128<TFromD<D>, HWY_MAX_LANES_D(D)> SetTableIndices(
+    D d, const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, TableLookupBytes(v, VFromD<decltype(d8)>{idx.raw}));
+}
+
+// Single lane: no change
+template <typename T>
+HWY_API Vec128<T, 1> TableLookupLanes(Vec128<T, 1> v,
+                                      Indices128<T, 1> /* idx */) {
+  return v;
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N> TwoTablesLookupLanes(Vec128<T, N> a, Vec128<T, N> b,
+                                          Indices128<T, N> idx) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  const Repartition<uint8_t, decltype(dt)> dt_u8;
+// TableLookupLanes currently requires table and index vectors to be the same
+// size, though a half-length index vector would be sufficient here.
+#if HWY_IS_MSAN
+  const Vec128<T, N> idx_vec{idx.raw};
+  const Indices128<T, N * 2> idx2{Combine(dt, idx_vec, idx_vec).raw};
+#else
+  // We only keep LowerHalf of the result, which is valid in idx.
+  const Indices128<T, N * 2> idx2{idx.raw};
+#endif
+  return LowerHalf(
+      d, TableLookupBytes(Combine(dt, b, a),
+                          BitCast(dt, VFromD<decltype(dt_u8)>{idx2.raw})));
+}
+
+template <typename T>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+  return Vec128<T>{vec_perm(a.raw, b.raw, idx.raw)};
+}
+
+// ------------------------------ ReverseBlocks
+
+// Single block: no change
+template <class D>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// ------------------------------ Reverse (Shuffle0123, Shuffle2301)
+
+// Single lane: no change
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API Vec128<T, 1> Reverse(D /* tag */, Vec128<T, 1> v) {
+  return v;
+}
+
+// 32-bit x2: shuffle
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec64<T> Reverse(D /* tag */, Vec64<T> v) {
+  return Vec64<T>{Shuffle2301(Vec128<T>{v.raw}).raw};
+}
+
+// 16-bit x4: shuffle
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> Reverse(D /* tag */, Vec64<T> v) {
+  const __vector unsigned char kShuffle = {6,  7,  4,  5,  2,  3,  0, 1,
+                                           14, 15, 12, 13, 10, 11, 8, 9};
+  return Vec64<T>{vec_perm(v.raw, v.raw, kShuffle)};
+}
+
+// 16-bit x2: rotate bytes
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec32<T> Reverse(D d, Vec32<T> v) {
+  const RepartitionToWide<RebindToUnsigned<decltype(d)>> du32;
+  return BitCast(d, RotateRight<16>(Reverse(du32, BitCast(du32, v))));
+}
+
+// ------------------------------- ReverseLaneBytes
+
+#if HWY_PPC_HAVE_9 && \
+    (HWY_COMPILER_GCC_ACTUAL >= 710 || HWY_COMPILER_CLANG >= 400)
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit ReverseLaneBytes.
+#ifdef HWY_NATIVE_REVERSE_LANE_BYTES
+#undef HWY_NATIVE_REVERSE_LANE_BYTES
+#else
+#define HWY_NATIVE_REVERSE_LANE_BYTES
+#endif
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API V ReverseLaneBytes(V v) {
+  return V{vec_revb(v.raw)};
+}
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const Repartition<uint16_t, decltype(d)> du16;
+  return BitCast(d, ReverseLaneBytes(BitCast(du16, v)));
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d, ReverseLaneBytes(BitCast(du32, v)));
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d, ReverseLaneBytes(BitCast(du64, v)));
+}
+
+#endif  // HWY_PPC_HAVE_9
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec16<T> Reverse(D d, Vec16<T> v) {
+  return Reverse2(d, v);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> Reverse(D d, Vec32<T> v) {
+  return Reverse4(d, v);
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec64<T> Reverse(D d, Vec64<T> v) {
+  return Reverse8(d, v);
+}
+
+// ------------------------------ Reverse2
+
+// Single lane: no change
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API Vec128<T, 1> Reverse2(D /* tag */, Vec128<T, 1> v) {
+  return v;
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d, RotateRight<16>(BitCast(du32, v)));
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d, RotateRight<32>(BitCast(du64, v)));
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API VFromD<D> Reverse2(D /* tag */, VFromD<D> v) {
+  return Shuffle01(v);
+}
+
+// ------------------------------ Reverse4
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D /*d*/, VFromD<D> v) {
+  const __vector unsigned char kShuffle = {6,  7,  4,  5,  2,  3,  0, 1,
+                                           14, 15, 12, 13, 10, 11, 8, 9};
+  return VFromD<D>{vec_perm(v.raw, v.raw, kShuffle)};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  return Reverse(d, v);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D /* tag */, VFromD<D> /* v */) {
+  HWY_ASSERT(0);  // don't have 4 u64 lanes
+}
+
+// ------------------------------ Reverse8
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, VFromD<D> v) {
+  return Reverse(d, v);
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> Reverse8(D /* tag */, VFromD<D> /* v */) {
+  HWY_ASSERT(0);  // don't have 8 lanes if larger than 16-bit
+}
+
+// ------------------------------ InterleaveLower
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{vec_mergeh(a.raw, b.raw)};
+}
+
+// Additional overload for the optional tag
+template <class D>
+HWY_API VFromD<D> InterleaveLower(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return InterleaveLower(a, b);
+}
+
+// ------------------------------ InterleaveUpper (UpperHalf)
+
+// Full
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> InterleaveUpper(D /* tag */, Vec128<T> a, Vec128<T> b) {
+  return Vec128<T>{vec_mergel(a.raw, b.raw)};
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const Half<decltype(d)> d2;
+  return InterleaveLower(d, VFromD<D>{UpperHalf(d2, a).raw},
+                         VFromD<D>{UpperHalf(d2, b).raw});
+}
+
+// ------------------------------ ZipLower/ZipUpper (InterleaveLower)
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveLower(D(), a, b));
+}
+
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveUpper(D(), a, b));
+}
+
+// ------------------------------ Per4LaneBlkShufDupSet4xU32
+
+// Used by hwy/ops/generic_ops-inl.h to implement Per4LaneBlockShuffle
+namespace detail {
+
+#ifdef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#undef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#else
+#define HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#endif
+
+template <class D>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                                const uint32_t x2,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  const __vector unsigned int raw = {x0, x1, x2, x3};
+  return ResizeBitCast(d, Vec128<uint32_t>{raw});
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+template <class D>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+  using VU8 = VFromD<decltype(du8)>;
+  const auto v_shift_amt =
+      BitCast(Full128<uint8_t>(),
+              Set(Full128<uint32_t>(),
+                  static_cast<uint32_t>(amt * sizeof(TFromD<D>) * 8)));
+
+#if HWY_IS_LITTLE_ENDIAN
+  return BitCast(d, VU8{vec_slo(BitCast(du8, v).raw, v_shift_amt.raw)});
+#else
+  return BitCast(d, VU8{vec_sro(BitCast(du8, v).raw, v_shift_amt.raw)});
+#endif
+}
+
+// ------------------------------ SlideDownLanes
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  using TU = UnsignedFromSize<d.MaxBytes()>;
+  const Repartition<TU, decltype(d)> du;
+  const auto v_shift_amt =
+      Set(du, static_cast<TU>(amt * sizeof(TFromD<D>) * 8));
+
+#if HWY_IS_LITTLE_ENDIAN
+  return BitCast(d, BitCast(du, v) >> v_shift_amt);
+#else
+  return BitCast(d, BitCast(du, v) << v_shift_amt);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+  using VU8 = VFromD<decltype(du8)>;
+  const auto v_shift_amt =
+      BitCast(Full128<uint8_t>(),
+              Set(Full128<uint32_t>(),
+                  static_cast<uint32_t>(amt * sizeof(TFromD<D>) * 8)));
+
+#if HWY_IS_LITTLE_ENDIAN
+  return BitCast(d, VU8{vec_sro(BitCast(du8, v).raw, v_shift_amt.raw)});
+#else
+  return BitCast(d, VU8{vec_slo(BitCast(du8, v).raw, v_shift_amt.raw)});
+#endif
+}
+
+// ================================================== COMBINE
+
+// ------------------------------ Combine (InterleaveLower)
+
+// N = N/2 + N/2 (upper half undefined)
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), class VH = VFromD<Half<D>>>
+HWY_API VFromD<D> Combine(D d, VH hi_half, VH lo_half) {
+  const Half<decltype(d)> dh;
+  // Treat half-width input as one lane, and expand to two lanes.
+  using VU = Vec128<UnsignedFromSize<dh.MaxBytes()>, 2>;
+  using Raw = typename detail::Raw128<TFromV<VU>>::type;
+  const VU lo{reinterpret_cast<Raw>(lo_half.raw)};
+  const VU hi{reinterpret_cast<Raw>(hi_half.raw)};
+  return BitCast(d, InterleaveLower(lo, hi));
+}
+
+// ------------------------------ ZeroExtendVector (Combine, IfThenElseZero)
+
+template <class D>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+  const Half<D> dh;
+  return IfThenElseZero(FirstN(d, MaxLanes(dh)), VFromD<D>{lo.raw});
+}
+
+// ------------------------------ Concat full (InterleaveLower)
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatLowerLower(D d, Vec128<T> hi, Vec128<T> lo) {
+  const Repartition<uint64_t, decltype(d)> d64;
+  return BitCast(d, InterleaveLower(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatUpperUpper(D d, Vec128<T> hi, Vec128<T> lo) {
+  const Repartition<uint64_t, decltype(d)> d64;
+  return BitCast(d, InterleaveUpper(d64, BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatLowerUpper(D d, Vec128<T> hi, Vec128<T> lo) {
+  return CombineShiftRightBytes<8>(d, hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatUpperLower(D /*d*/, Vec128<T> hi, Vec128<T> lo) {
+  const __vector unsigned char kShuffle = {0,  1,  2,  3,  4,  5,  6,  7,
+                                           24, 25, 26, 27, 28, 29, 30, 31};
+  return Vec128<T>{vec_perm(lo.raw, hi.raw, kShuffle)};
+}
+
+// ------------------------------ Concat partial (Combine, LowerHalf)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+// ------------------------------ TruncateTo
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          hwy::EnableIf<(sizeof(FromT) >= sizeof(TFromD<D>) * 2)>* = nullptr,
+          HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<FromT, 1> v) {
+  using Raw = typename detail::Raw128<TFromD<D>>::type;
+#if HWY_IS_LITTLE_ENDIAN
+  return VFromD<D>{reinterpret_cast<Raw>(v.raw)};
+#else
+  return VFromD<D>{reinterpret_cast<Raw>(
+      vec_sld(v.raw, v.raw, sizeof(FromT) - sizeof(TFromD<D>)))};
+#endif
+}
+
+namespace detail {
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          HWY_IF_T_SIZE(FromT, sizeof(TFromD<D>) * 2), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Truncate2To(
+    D /* tag */, Vec128<FromT, Repartition<FromT, D>().MaxLanes()> lo,
+    Vec128<FromT, Repartition<FromT, D>().MaxLanes()> hi) {
+  return VFromD<D>{vec_pack(lo.raw, hi.raw)};
+}
+
+}  // namespace detail
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          HWY_IF_T_SIZE(FromT, sizeof(TFromD<D>) * 2), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D /* d */,
+                             Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  return VFromD<D>{vec_pack(v.raw, v.raw)};
+}
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          hwy::EnableIf<(sizeof(FromT) >= sizeof(TFromD<D>) * 4)>* = nullptr,
+          HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> TruncateTo(D d,
+                             Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const Rebind<MakeNarrow<FromT>, decltype(d)> d2;
+  return TruncateTo(d, TruncateTo(d2, v));
+}
+
+// ------------------------------ ConcatOdd (TruncateTo)
+
+// 8-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> ConcatOdd(D d, Vec128<T> hi, Vec128<T> lo) {
+  const Repartition<uint16_t, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_IS_LITTLE_ENDIAN
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec128<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec128<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+#else
+  const Vec128<uint16_t> uH = BitCast(dw, hi);
+  const Vec128<uint16_t> uL = BitCast(dw, lo);
+#endif
+  return BitCast(d, detail::Truncate2To(du, uL, uH));
+}
+
+// 8-bit x8
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec64<T> ConcatOdd(D /*d*/, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactOddU8 = {1, 3, 5, 7, 17, 19, 21, 23};
+  return Vec64<T>{vec_perm(lo.raw, hi.raw, kCompactOddU8)};
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatOdd(D /*d*/, Vec32<T> hi, Vec32<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactOddU8 = {1, 3, 17, 19};
+  return Vec32<T>{vec_perm(lo.raw, hi.raw, kCompactOddU8)};
+}
+
+// 16-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> ConcatOdd(D d, Vec128<T> hi, Vec128<T> lo) {
+  const Repartition<uint32_t, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_IS_LITTLE_ENDIAN
+  const Vec128<uint32_t> uH = ShiftRight<16>(BitCast(dw, hi));
+  const Vec128<uint32_t> uL = ShiftRight<16>(BitCast(dw, lo));
+#else
+  const Vec128<uint32_t> uH = BitCast(dw, hi);
+  const Vec128<uint32_t> uL = BitCast(dw, lo);
+#endif
+  return BitCast(d, detail::Truncate2To(du, uL, uH));
+}
+
+// 16-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ConcatOdd(D /*d*/, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactOddU16 = {2, 3, 6, 7, 18, 19, 22, 23};
+  return Vec64<T>{vec_perm(lo.raw, hi.raw, kCompactOddU16)};
+}
+
+// 32-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ConcatOdd(D d, Vec128<T> hi, Vec128<T> lo) {
+#if HWY_IS_LITTLE_ENDIAN
+  (void)d;
+  const __vector unsigned char kShuffle = {4,  5,  6,  7,  12, 13, 14, 15,
+                                           20, 21, 22, 23, 28, 29, 30, 31};
+  return Vec128<T>{vec_perm(lo.raw, hi.raw, kShuffle)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint64_t, decltype(d)> dw;
+  return BitCast(d, detail::Truncate2To(du, BitCast(dw, lo), BitCast(dw, hi)));
+#endif
+}
+
+// Any type x2
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 2)>
+HWY_API Vec128<T, 2> ConcatOdd(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveUpper(d, lo, hi);
+}
+
+// ------------------------------ ConcatEven (TruncateTo)
+
+// 8-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> ConcatEven(D d, Vec128<T> hi, Vec128<T> lo) {
+  const Repartition<uint16_t, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_IS_LITTLE_ENDIAN
+  const Vec128<uint16_t> uH = BitCast(dw, hi);
+  const Vec128<uint16_t> uL = BitCast(dw, lo);
+#else
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec128<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec128<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+#endif
+  return BitCast(d, detail::Truncate2To(du, uL, uH));
+}
+
+// 8-bit x8
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec64<T> ConcatEven(D /*d*/, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactEvenU8 = {0, 2, 4, 6, 16, 18, 20, 22};
+  return Vec64<T>{vec_perm(lo.raw, hi.raw, kCompactEvenU8)};
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatEven(D /*d*/, Vec32<T> hi, Vec32<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactEvenU8 = {0, 2, 16, 18};
+  return Vec32<T>{vec_perm(lo.raw, hi.raw, kCompactEvenU8)};
+}
+
+// 16-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> ConcatEven(D d, Vec128<T> hi, Vec128<T> lo) {
+  // Isolate lower 16 bits per u32 so we can pack.
+  const Repartition<uint32_t, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_IS_LITTLE_ENDIAN
+  const Vec128<uint32_t> uH = BitCast(dw, hi);
+  const Vec128<uint32_t> uL = BitCast(dw, lo);
+#else
+  const Vec128<uint32_t> uH = ShiftRight<16>(BitCast(dw, hi));
+  const Vec128<uint32_t> uL = ShiftRight<16>(BitCast(dw, lo));
+#endif
+  return BitCast(d, detail::Truncate2To(du, uL, uH));
+}
+
+// 16-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ConcatEven(D /*d*/, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half, no need to zero.
+  const __vector unsigned char kCompactEvenU16 = {0, 1, 4, 5, 16, 17, 20, 21};
+  return Vec64<T>{vec_perm(lo.raw, hi.raw, kCompactEvenU16)};
+}
+
+// 32-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ConcatEven(D d, Vec128<T> hi, Vec128<T> lo) {
+#if HWY_IS_LITTLE_ENDIAN
+  const Repartition<uint64_t, decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, detail::Truncate2To(du, BitCast(dw, lo), BitCast(dw, hi)));
+#else
+  (void)d;
+  constexpr __vector unsigned char kShuffle = {0,  1,  2,  3,  8,  9,  10, 11,
+                                               16, 17, 18, 19, 24, 25, 26, 27};
+  return Vec128<T>{vec_perm(lo.raw, hi.raw, kShuffle)};
+#endif
+}
+
+// Any T x2
+template <typename D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 2)>
+HWY_API Vec128<T, 2> ConcatEven(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveLower(d, lo, hi);
+}
+
+// ------------------------------ OrderedTruncate2To (ConcatEven, ConcatOdd)
+#ifdef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#undef HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#else
+#define HWY_NATIVE_ORDERED_TRUNCATE_2_TO
+#endif
+
+template <class D, HWY_IF_UNSIGNED_D(D), class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<D> OrderedTruncate2To(D d, V a, V b) {
+#if HWY_IS_LITTLE_ENDIAN
+  return ConcatEven(d, BitCast(d, b), BitCast(d, a));
+#else
+  return ConcatOdd(d, BitCast(d, b), BitCast(d, a));
+#endif
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T>
+HWY_API Vec128<T, 1> DupEven(Vec128<T, 1> v) {
+  return v;
+}
+
+template <typename T>
+HWY_API Vec128<T, 2> DupEven(Vec128<T, 2> v) {
+  return InterleaveLower(DFromV<decltype(v)>(), v, v);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  constexpr __vector unsigned char kShuffle = {0, 0, 2,  2,  4,  4,  6,  6,
+                                               8, 8, 10, 10, 12, 12, 14, 14};
+  return TableLookupBytes(v, BitCast(d, VFromD<decltype(du8)>{kShuffle}));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  constexpr __vector unsigned char kShuffle = {0, 1, 0, 1, 4,  5,  4,  5,
+                                               8, 9, 8, 9, 12, 13, 12, 13};
+  return TableLookupBytes(v, BitCast(d, VFromD<decltype(du8)>{kShuffle}));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> DupEven(Vec128<T> v) {
+  return Vec128<T>{vec_mergee(v.raw, v.raw)};
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  constexpr __vector unsigned char kShuffle = {1, 1, 3,  3,  5,  5,  7,  7,
+                                               9, 9, 11, 11, 13, 13, 15, 15};
+  return TableLookupBytes(v, BitCast(d, VFromD<decltype(du8)>{kShuffle}));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  constexpr __vector unsigned char kShuffle = {2,  3,  2,  3,  6,  7,  6,  7,
+                                               10, 11, 10, 11, 14, 15, 14, 15};
+  return TableLookupBytes(v, BitCast(d, VFromD<decltype(du8)>{kShuffle}));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_mergeo(v.raw, v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return InterleaveUpper(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ OddEven (IfThenElse)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec128<T, N> OddEven(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const __vector unsigned char mask = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                       0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfVecThenElse(BitCast(d, Vec128<uint8_t, N>{mask}), b, a);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> OddEven(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const __vector unsigned char mask = {0xFF, 0xFF, 0, 0, 0xFF, 0xFF, 0, 0,
+                                       0xFF, 0xFF, 0, 0, 0xFF, 0xFF, 0, 0};
+  return IfVecThenElse(BitCast(d, Vec128<uint8_t, N * 2>{mask}), b, a);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, N> OddEven(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const __vector unsigned char mask = {0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0,
+                                       0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0};
+  return IfVecThenElse(BitCast(d, Vec128<uint8_t, N * 4>{mask}), b, a);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T, N> OddEven(Vec128<T, N> a, Vec128<T, N> b) {
+  // Same as ConcatUpperLower for full vectors; do not call that because this
+  // is more efficient for 64x1 vectors.
+  const DFromV<decltype(a)> d;
+  const __vector unsigned char mask = {
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0};
+  return IfVecThenElse(BitCast(d, Vec128<uint8_t, N * 8>{mask}), b, a);
+}
+
+// ------------------------------ OddEvenBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEvenBlocks(Vec128<T, N> /* odd */, Vec128<T, N> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SwapAdjacentBlocks(Vec128<T, N> v) {
+  return v;
+}
+
+// ------------------------------ Shl
+
+namespace detail {
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shl(hwy::UnsignedTag /*tag*/, Vec128<T, N> v,
+                         Vec128<T, N> bits) {
+  return Vec128<T, N>{vec_sl(v.raw, bits.raw)};
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shl(hwy::SignedTag /*tag*/, Vec128<T, N> v,
+                         Vec128<T, N> bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  return BitCast(di,
+                 Shl(hwy::UnsignedTag(), BitCast(du, v), BitCast(du, bits)));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, Vec128<T, N> bits) {
+  return detail::Shl(hwy::TypeTag<T>(), v, bits);
+}
+
+// ------------------------------ Shr
+
+namespace detail {
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shr(hwy::UnsignedTag /*tag*/, Vec128<T, N> v,
+                         Vec128<T, N> bits) {
+  return Vec128<T, N>{vec_sr(v.raw, bits.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shr(hwy::SignedTag /*tag*/, Vec128<T, N> v,
+                         Vec128<T, N> bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  return Vec128<T, N>{vec_sra(v.raw, BitCast(du, bits).raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, Vec128<T, N> bits) {
+  return detail::Shr(hwy::TypeTag<T>(), v, bits);
+}
+
+// ------------------------------ MulEven/Odd 64x64 (UpperHalf)
+
+HWY_INLINE Vec128<uint64_t> MulEven(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+#if HWY_PPC_HAVE_10 && defined(__SIZEOF_INT128__)
+  using VU64 = __vector unsigned long long;
+  const VU64 mul128_result = reinterpret_cast<VU64>(vec_mule(a.raw, b.raw));
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<uint64_t>{mul128_result};
+#else
+  // Need to swap the two halves of mul128_result on big-endian targets as
+  // the upper 64 bits of the product are in lane 0 of mul128_result and
+  // the lower 64 bits of the product are in lane 1 of mul128_result
+  return Vec128<uint64_t>{vec_sld(mul128_result, mul128_result, 8)};
+#endif
+#else
+  alignas(16) uint64_t mul[2];
+  mul[0] = Mul128(GetLane(a), GetLane(b), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+#endif
+}
+
+HWY_INLINE Vec128<uint64_t> MulOdd(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+#if HWY_PPC_HAVE_10 && defined(__SIZEOF_INT128__)
+  using VU64 = __vector unsigned long long;
+  const VU64 mul128_result = reinterpret_cast<VU64>(vec_mulo(a.raw, b.raw));
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<uint64_t>{mul128_result};
+#else
+  // Need to swap the two halves of mul128_result on big-endian targets as
+  // the upper 64 bits of the product are in lane 0 of mul128_result and
+  // the lower 64 bits of the product are in lane 1 of mul128_result
+  return Vec128<uint64_t>{vec_sld(mul128_result, mul128_result, 8)};
+#endif
+#else
+  alignas(16) uint64_t mul[2];
+  const Full64<uint64_t> d2;
+  mul[0] =
+      Mul128(GetLane(UpperHalf(d2, a)), GetLane(UpperHalf(d2, b)), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+#endif
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 df32, V16 a, V16 b) {
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Lane order within sum0/1 is undefined, hence we can avoid the
+  // longer-latency lane-crossing PromoteTo. Using shift/and instead of Zip
+  // leads to the odd/even order that RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be),
+                Mul(BitCast(df32, ao), BitCast(df32, bo)));
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence vec_msum is safe.
+template <class D32, HWY_IF_UI32_D(D32),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 d32, V16 a, V16 b) {
+  return VFromD<D32>{vec_msum(a.raw, b.raw, Zero(d32).raw)};
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 df32, V16 a, V16 b,
+                                              VFromD<D32> sum0,
+                                              VFromD<D32>& sum1) {
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Lane order within sum0/1 is undefined, hence we can avoid the
+  // longer-latency lane-crossing PromoteTo. Using shift/and instead of Zip
+  // leads to the odd/even order that RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence vec_msum is safe.
+template <class D32, HWY_IF_UI32_D(D32),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 /* tag */, V16 a, V16 b,
+                                              VFromD<D32> sum0,
+                                              VFromD<D32>& /*sum1*/) {
+  return VFromD<D32>{vec_msum(a.raw, b.raw, sum0.raw)};
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <size_t N>
+HWY_API Vec128<int32_t, N> RearrangeToOddPlusEven(Vec128<int32_t, N> sum0,
+                                                  Vec128<int32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> RearrangeToOddPlusEven(
+    Vec128<uint32_t, N> sum0, Vec128<uint32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <class VW>
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  return Add(sum0, sum1);
+}
+
+// ------------------------------ SumOfMulQuadAccumulate
+#ifdef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#endif
+template <class DU32, HWY_IF_U32_D(DU32)>
+HWY_API VFromD<DU32> SumOfMulQuadAccumulate(
+    DU32 /*du32*/, VFromD<Repartition<uint8_t, DU32>> a,
+    VFromD<Repartition<uint8_t, DU32>> b, VFromD<DU32> sum) {
+  return VFromD<DU32>{vec_msum(a.raw, b.raw, sum.raw)};
+}
+
+#ifdef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32), HWY_IF_V_SIZE_LE_D(DI32, 16)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(
+    DI32 /*di32*/, VFromD<Repartition<uint8_t, DI32>> a_u,
+    VFromD<Repartition<int8_t, DI32>> b_i, VFromD<DI32> sum) {
+  return VFromD<DI32>{vec_msum(b_i.raw, a_u.raw, sum.raw)};
+}
+
+#ifdef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#endif
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(DI32 di32,
+                                            VFromD<Repartition<int8_t, DI32>> a,
+                                            VFromD<Repartition<int8_t, DI32>> b,
+                                            VFromD<DI32> sum) {
+  const Repartition<uint8_t, decltype(di32)> du8;
+
+  const auto result_sum_0 =
+      SumOfMulQuadAccumulate(di32, BitCast(du8, a), b, sum);
+  const auto result_sum_1 = ShiftLeft<8>(detail::AltivecVsum4sbs(
+      di32, And(b, BroadcastSignBit(a)).raw, Zero(di32).raw));
+  return result_sum_0 - result_sum_1;
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned to signed/unsigned: zero-extend.
+template <class D, typename FromT, HWY_IF_T_SIZE_D(D, 2 * sizeof(FromT)),
+          HWY_IF_NOT_FLOAT_D(D), HWY_IF_UNSIGNED(FromT)>
+HWY_API VFromD<D> PromoteTo(D /* d */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  // First pretend the input has twice the lanes - the upper half will be
+  // ignored by ZipLower.
+  const Rebind<FromT, Twice<D>> d2;
+  const VFromD<decltype(d2)> twice{v.raw};
+  // Then cast to narrow as expected by ZipLower, in case the sign of FromT
+  // differs from that of D.
+  const RepartitionToNarrow<D> dn;
+
+#if HWY_IS_LITTLE_ENDIAN
+  return ZipLower(BitCast(dn, twice), Zero(dn));
+#else
+  return ZipLower(Zero(dn), BitCast(dn, twice));
+#endif
+}
+
+// Signed: replicate sign bit.
+template <class D, typename FromT, HWY_IF_T_SIZE_D(D, 2 * sizeof(FromT)),
+          HWY_IF_NOT_FLOAT_D(D), HWY_IF_SIGNED(FromT)>
+HWY_API VFromD<D> PromoteTo(D /* d */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  using Raw = typename detail::Raw128<TFromD<D>>::type;
+  return VFromD<D>{reinterpret_cast<Raw>(vec_unpackh(v.raw))};
+}
+
+// 8-bit to 32-bit: First, promote to 16-bit, and then convert to 32-bit.
+template <class D, typename FromT, HWY_IF_T_SIZE_D(D, 4), HWY_IF_NOT_FLOAT_D(D),
+          HWY_IF_T_SIZE(FromT, 1)>
+HWY_API VFromD<D> PromoteTo(D d32,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const DFromV<decltype(v)> d8;
+  const Rebind<MakeWide<FromT>, decltype(d8)> d16;
+  return PromoteTo(d32, PromoteTo(d16, v));
+}
+
+// 8-bit or 16-bit to 64-bit: First, promote to MakeWide<FromT>, and then
+// convert to 64-bit.
+template <class D, typename FromT, HWY_IF_T_SIZE_D(D, 8), HWY_IF_NOT_FLOAT_D(D),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(FromT),
+          HWY_IF_T_SIZE_ONE_OF(FromT, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> PromoteTo(D d64,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const Rebind<MakeWide<FromT>, decltype(d64)> dw;
+  return PromoteTo(d64, PromoteTo(dw, v));
+}
+
+#if HWY_PPC_HAVE_9
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> PromoteTo(D /*tag*/, VFromD<Rebind<float16_t, D>> v) {
+  return VFromD<D>{vec_extract_fp32_from_shorth(v.raw)};
+}
+
+#endif  // HWY_PPC_HAVE_9
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, VFromD<Rebind<bfloat16_t, D>> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<float, D>> v) {
+  const __vector float raw_v = InterleaveLower(v, v).raw;
+#if HWY_IS_LITTLE_ENDIAN
+  return VFromD<D>{vec_doubleo(raw_v)};
+#else
+  return VFromD<D>{vec_doublee(raw_v)};
+#endif
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const __vector signed int raw_v = InterleaveLower(v, v).raw;
+#if HWY_IS_LITTLE_ENDIAN
+  return VFromD<D>{vec_doubleo(raw_v)};
+#else
+  return VFromD<D>{vec_doublee(raw_v)};
+#endif
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_SIGNED(FromT), HWY_IF_T_SIZE(FromT, sizeof(TFromD<D>) * 2)>
+HWY_API VFromD<D> DemoteTo(D /* tag */,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  return VFromD<D>{vec_packsu(v.raw, v.raw)};
+}
+
+template <class D, typename FromT, HWY_IF_SIGNED_D(D), HWY_IF_SIGNED(FromT),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE(FromT, sizeof(TFromD<D>) * 2)>
+HWY_API VFromD<D> DemoteTo(D /* tag */,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  return VFromD<D>{vec_packs(v.raw, v.raw)};
+}
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE(FromT, sizeof(TFromD<D>) * 2)>
+HWY_API VFromD<D> DemoteTo(D /* tag */,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  return VFromD<D>{vec_packs(v.raw, v.raw)};
+}
+
+template <class D, class FromT, HWY_IF_SIGNED_D(D), HWY_IF_SIGNED(FromT),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          hwy::EnableIf<(sizeof(FromT) >= sizeof(TFromD<D>) * 4)>* = nullptr>
+HWY_API VFromD<D> DemoteTo(D d,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const Rebind<MakeNarrow<FromT>, D> d2;
+  return DemoteTo(d, DemoteTo(d2, v));
+}
+
+template <class D, class FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_UNSIGNED(FromT),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          hwy::EnableIf<(sizeof(FromT) >= sizeof(TFromD<D>) * 4)>* = nullptr>
+HWY_API VFromD<D> DemoteTo(D d,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const Rebind<MakeNarrow<FromT>, D> d2;
+  return DemoteTo(d, DemoteTo(d2, v));
+}
+
+template <class D, class FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_SIGNED(FromT),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          hwy::EnableIf<(sizeof(FromT) >= sizeof(TFromD<D>) * 4)>* = nullptr>
+HWY_API VFromD<D> DemoteTo(D d,
+                           Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  const Rebind<MakeUnsigned<MakeNarrow<FromT>>, D> d2;
+  return DemoteTo(d, DemoteTo(d2, v));
+}
+
+#if HWY_PPC_HAVE_9 && \
+    (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN(__builtin_vsx_xvcvsphp))
+
+// We already toggled HWY_NATIVE_F16C above.
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D df16, VFromD<Rebind<float, D>> v) {
+// Avoid vec_pack_to_short_fp32 on Clang because its implementation is buggy.
+#if HWY_COMPILER_GCC_ACTUAL
+  (void)df16;
+  return VFromD<D>{vec_pack_to_short_fp32(v.raw, v.raw)};
+#elif HWY_HAS_BUILTIN(__builtin_vsx_xvcvsphp)
+  // Work around bug in the clang implementation of vec_pack_to_short_fp32
+  // by using the __builtin_vsx_xvcvsphp builtin on PPC9/PPC10 targets
+  // if the __builtin_vsx_xvcvsphp intrinsic is available
+  const RebindToUnsigned<decltype(df16)> du16;
+  const Rebind<uint32_t, D> du;
+  const VFromD<decltype(du)> bits16{
+      reinterpret_cast<__vector unsigned int>(__builtin_vsx_xvcvsphp(v.raw))};
+  return BitCast(df16, TruncateTo(du16, bits16));
+#else
+#error "Only define the function if we have a native implementation"
+#endif
+}
+
+#endif  // HWY_PPC_HAVE_9
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, VFromD<Rebind<float, D>> v) {
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = ShiftRight<16>(BitCast(du32, v));
+  return BitCast(dbf16, TruncateTo(du16, bits_in_32));
+}
+
+template <class D, HWY_IF_BF16_D(D), class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+#if HWY_IS_LITTLE_ENDIAN
+  const auto a_in_odd = a;
+  const auto b_in_even = ShiftRight<16>(BitCast(du32, b));
+#else
+  const auto a_in_odd = ShiftRight<16>(BitCast(du32, a));
+  const auto b_in_even = b;
+#endif
+  return BitCast(dbf16,
+                 OddEven(BitCast(du16, a_in_odd), BitCast(du16, b_in_even)));
+}
+
+// Specializations for partial vectors because vec_packs sets lanes above 2*N.
+template <class DN, typename V, HWY_IF_V_SIZE_LE_D(DN, 4), HWY_IF_SIGNED_D(DN),
+          HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 8), HWY_IF_SIGNED_D(DN),
+          HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const VFromD<decltype(dn_full)> v_full{vec_packs(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 16), HWY_IF_SIGNED_D(DN),
+          HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN /*dn*/, V a, V b) {
+  return VFromD<DN>{vec_packs(a.raw, b.raw)};
+}
+
+template <class DN, typename V, HWY_IF_V_SIZE_LE_D(DN, 4),
+          HWY_IF_UNSIGNED_D(DN), HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 8), HWY_IF_UNSIGNED_D(DN),
+          HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const VFromD<decltype(dn_full)> v_full{vec_packsu(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 16), HWY_IF_UNSIGNED_D(DN),
+          HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN /*dn*/, V a, V b) {
+  return VFromD<DN>{vec_packsu(a.raw, b.raw)};
+}
+
+template <class DN, typename V, HWY_IF_V_SIZE_LE_D(DN, 4),
+          HWY_IF_UNSIGNED_D(DN), HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 8), HWY_IF_UNSIGNED_D(DN),
+          HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const VFromD<decltype(dn_full)> v_full{vec_packs(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 16), HWY_IF_UNSIGNED_D(DN),
+          HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN /*dn*/, V a, V b) {
+  return VFromD<DN>{vec_packs(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>), class V,
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return ReorderDemote2To(d, a, b);
+}
+
+template <class D, HWY_IF_BF16_D(D), class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> OrderedDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+#if HWY_IS_LITTLE_ENDIAN
+  return BitCast(dbf16, ConcatOdd(du16, BitCast(du16, b), BitCast(du16, a)));
+#else
+  return BitCast(dbf16, ConcatEven(du16, BitCast(du16, b), BitCast(du16, a)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_F32_D(D)>
+HWY_API Vec32<float> DemoteTo(D /* tag */, Vec64<double> v) {
+  return Vec32<float>{vec_floate(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API Vec64<float> DemoteTo(D d, Vec128<double> v) {
+#if HWY_IS_LITTLE_ENDIAN
+  const Vec128<float> f64_to_f32{vec_floate(v.raw)};
+#else
+  const Vec128<float> f64_to_f32{vec_floato(v.raw)};
+#endif
+
+  const RebindToUnsigned<D> du;
+  const Rebind<uint64_t, D> du64;
+  return Vec64<float>{
+      BitCast(d, TruncateTo(du, BitCast(du64, f64_to_f32))).raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I32_D(D)>
+HWY_API Vec32<int32_t> DemoteTo(D /* tag */, Vec64<double> v) {
+  return Vec32<int32_t>{vec_signede(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API Vec64<int32_t> DemoteTo(D /* tag */, Vec128<double> v) {
+#if HWY_IS_LITTLE_ENDIAN
+  const Vec128<int32_t> f64_to_i32{vec_signede(v.raw)};
+#else
+  const Vec128<int32_t> f64_to_i32{vec_signedo(v.raw)};
+#endif
+
+  const Rebind<int64_t, D> di64;
+  const Vec128<int64_t> vi64 = BitCast(di64, f64_to_i32);
+  return Vec64<int32_t>{vec_pack(vi64.raw, vi64.raw)};
+}
+
+// For already range-limited input [0, 255].
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(Vec128<uint32_t, N> v) {
+  const Rebind<uint16_t, DFromV<decltype(v)>> du16;
+  const Rebind<uint8_t, decltype(du16)> du8;
+  return TruncateTo(du8, TruncateTo(du16, v));
+}
+// ------------------------------ Integer <=> fp (ShiftRight, OddEven)
+
+// Note: altivec.h vec_ct* currently contain C casts which triggers
+// -Wdeprecate-lax-vec-conv-all warnings, so disable them.
+
+template <class D, typename FromT, HWY_IF_F32_D(D), HWY_IF_NOT_FLOAT(FromT),
+          HWY_IF_T_SIZE_D(D, sizeof(FromT))>
+HWY_API VFromD<D> ConvertTo(D /* tag */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS_OFF(disable : 5219, ignored "-Wdeprecate-lax-vec-conv-all")
+#endif
+  return VFromD<D>{vec_ctf(v.raw, 0)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+template <class D, typename FromT, HWY_IF_F64_D(D), HWY_IF_NOT_FLOAT(FromT),
+          HWY_IF_T_SIZE_D(D, sizeof(FromT))>
+HWY_API VFromD<D> ConvertTo(D /* tag */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  return VFromD<D>{vec_double(v.raw)};
+}
+
+// Truncates (rounds toward zero).
+template <class D, typename FromT, HWY_IF_SIGNED_D(D), HWY_IF_FLOAT(FromT),
+          HWY_IF_T_SIZE_D(D, sizeof(FromT))>
+HWY_API VFromD<D> ConvertTo(D /* tag */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS_OFF(disable : 5219, ignored "-Wdeprecate-lax-vec-conv-all")
+#endif
+  return VFromD<D>{vec_cts(v.raw, 0)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+template <class D, typename FromT, HWY_IF_UNSIGNED_D(D), HWY_IF_FLOAT(FromT),
+          HWY_IF_T_SIZE_D(D, sizeof(FromT))>
+HWY_API VFromD<D> ConvertTo(D /* tag */,
+                            Vec128<FromT, Rebind<FromT, D>().MaxLanes()> v) {
+  HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS_OFF(disable : 5219, ignored "-Wdeprecate-lax-vec-conv-all")
+#endif
+  return VFromD<D>{vec_ctu(v.raw, 0)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(Vec128<float, N> v) {
+  HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_CLANG
+  HWY_DIAGNOSTICS_OFF(disable : 5219, ignored "-Wdeprecate-lax-vec-conv-all")
+#endif
+  return Vec128<int32_t, N>{vec_cts(vec_round(v.raw), 0)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+// ------------------------------ Floating-point rounding (ConvertTo)
+
+// Toward nearest integer, ties to even
+template <size_t N>
+HWY_API Vec128<float, N> Round(Vec128<float, N> v) {
+  return Vec128<float, N>{vec_round(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> Round(Vec128<double, N> v) {
+  return Vec128<double, N>{vec_rint(v.raw)};
+}
+
+// Toward zero, aka truncate
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Trunc(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_trunc(v.raw)};
+}
+
+// Toward +infinity, aka ceiling
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Ceil(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_ceil(v.raw)};
+}
+
+// Toward -infinity, aka floor
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Floor(Vec128<T, N> v) {
+  return Vec128<T, N>{vec_floor(v.raw)};
+}
+
+// ------------------------------ Floating-point classification
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsNaN(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  return v != v;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsInf(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  using TU = MakeUnsigned<T>;
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(
+      d,
+      Eq(Add(vu, vu), Set(du, static_cast<TU>(hwy::MaxExponentTimes2<T>()))));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsFinite(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  using TU = MakeUnsigned<T>;
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, check for exponent<max.
+  return RebindMask(
+      d,
+      Lt(Add(vu, vu), Set(du, static_cast<TU>(hwy::MaxExponentTimes2<T>()))));
+}
+
+// ================================================== CRYPTO
+
+#if !defined(HWY_DISABLE_PPC8_CRYPTO)
+
+// Per-target flag to prevent generic_ops-inl.h from defining AESRound.
+#ifdef HWY_NATIVE_AES
+#undef HWY_NATIVE_AES
+#else
+#define HWY_NATIVE_AES
+#endif
+
+namespace detail {
+#if HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1600
+using CipherTag = Full128<uint64_t>;
+#else
+using CipherTag = Full128<uint8_t>;
+#endif  // !HWY_COMPILER_CLANG
+using CipherVec = VFromD<CipherTag>;
+}  // namespace detail
+
+HWY_API Vec128<uint8_t> AESRound(Vec128<uint8_t> state,
+                                 Vec128<uint8_t> round_key) {
+  const detail::CipherTag dc;
+  const Full128<uint8_t> du8;
+#if HWY_IS_LITTLE_ENDIAN
+  return Reverse(du8,
+                 BitCast(du8, detail::CipherVec{vec_cipher_be(
+                                  BitCast(dc, Reverse(du8, state)).raw,
+                                  BitCast(dc, Reverse(du8, round_key)).raw)}));
+#else
+  return BitCast(du8, detail::CipherVec{vec_cipher_be(
+                          BitCast(dc, state).raw, BitCast(dc, round_key).raw)});
+#endif
+}
+
+HWY_API Vec128<uint8_t> AESLastRound(Vec128<uint8_t> state,
+                                     Vec128<uint8_t> round_key) {
+  const detail::CipherTag dc;
+  const Full128<uint8_t> du8;
+#if HWY_IS_LITTLE_ENDIAN
+  return Reverse(du8,
+                 BitCast(du8, detail::CipherVec{vec_cipherlast_be(
+                                  BitCast(dc, Reverse(du8, state)).raw,
+                                  BitCast(dc, Reverse(du8, round_key)).raw)}));
+#else
+  return BitCast(du8, detail::CipherVec{vec_cipherlast_be(
+                          BitCast(dc, state).raw, BitCast(dc, round_key).raw)});
+#endif
+}
+
+HWY_API Vec128<uint8_t> AESRoundInv(Vec128<uint8_t> state,
+                                    Vec128<uint8_t> round_key) {
+  const detail::CipherTag dc;
+  const Full128<uint8_t> du8;
+#if HWY_IS_LITTLE_ENDIAN
+  return Xor(Reverse(du8, BitCast(du8, detail::CipherVec{vec_ncipher_be(
+                                           BitCast(dc, Reverse(du8, state)).raw,
+                                           Zero(dc).raw)})),
+             round_key);
+#else
+  return Xor(BitCast(du8, detail::CipherVec{vec_ncipher_be(
+                              BitCast(dc, state).raw, Zero(dc).raw)}),
+             round_key);
+#endif
+}
+
+HWY_API Vec128<uint8_t> AESLastRoundInv(Vec128<uint8_t> state,
+                                        Vec128<uint8_t> round_key) {
+  const detail::CipherTag dc;
+  const Full128<uint8_t> du8;
+#if HWY_IS_LITTLE_ENDIAN
+  return Reverse(du8,
+                 BitCast(du8, detail::CipherVec{vec_ncipherlast_be(
+                                  BitCast(dc, Reverse(du8, state)).raw,
+                                  BitCast(dc, Reverse(du8, round_key)).raw)}));
+#else
+  return BitCast(du8, detail::CipherVec{vec_ncipherlast_be(
+                          BitCast(dc, state).raw, BitCast(dc, round_key).raw)});
+#endif
+}
+
+HWY_API Vec128<uint8_t> AESInvMixColumns(Vec128<uint8_t> state) {
+  const Full128<uint8_t> du8;
+  const auto zero = Zero(du8);
+
+  // PPC8/PPC9/PPC10 does not have a single instruction for the AES
+  // InvMixColumns operation like ARM Crypto, SVE2 Crypto, or AES-NI do.
+
+  // The AESInvMixColumns operation can be carried out on PPC8/PPC9/PPC10
+  // by doing an AESLastRound operation with a zero round_key followed by an
+  // AESRoundInv operation with a zero round_key.
+  return AESRoundInv(AESLastRound(state, zero), zero);
+}
+
+template <uint8_t kRcon>
+HWY_API Vec128<uint8_t> AESKeyGenAssist(Vec128<uint8_t> v) {
+  constexpr __vector unsigned char kRconXorMask = {0, 0, 0, 0, kRcon, 0, 0, 0,
+                                                   0, 0, 0, 0, kRcon, 0, 0, 0};
+  constexpr __vector unsigned char kRotWordShuffle = {
+      4, 5, 6, 7, 5, 6, 7, 4, 12, 13, 14, 15, 13, 14, 15, 12};
+  const detail::CipherTag dc;
+  const Full128<uint8_t> du8;
+  const auto sub_word_result =
+      BitCast(du8, detail::CipherVec{vec_sbox_be(BitCast(dc, v).raw)});
+  const auto rot_word_result =
+      TableLookupBytes(sub_word_result, Vec128<uint8_t>{kRotWordShuffle});
+  return Xor(rot_word_result, Vec128<uint8_t>{kRconXorMask});
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> CLMulLower(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  // NOTE: Lane 1 of both a and b need to be zeroed out for the
+  // vec_pmsum_be operation below as the vec_pmsum_be operation
+  // does a carryless multiplication of each 64-bit half and then
+  // adds the two halves using an bitwise XOR operation.
+
+  const DFromV<decltype(a)> d;
+  const auto zero = Zero(d);
+
+  using VU64 = __vector unsigned long long;
+  const VU64 pmsum_result = reinterpret_cast<VU64>(
+      vec_pmsum_be(InterleaveLower(a, zero).raw, InterleaveLower(b, zero).raw));
+
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<uint64_t, N>{pmsum_result};
+#else
+  // Need to swap the two halves of pmsum_result on big-endian targets as
+  // the upper 64 bits of the carryless multiplication result are in lane 0 of
+  // pmsum_result and the lower 64 bits of the carryless multiplication result
+  // are in lane 1 of mul128_result
+  return Vec128<uint64_t, N>{vec_sld(pmsum_result, pmsum_result, 8)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> CLMulUpper(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  // NOTE: Lane 0 of both a and b need to be zeroed out for the
+  // vec_pmsum_be operation below as the vec_pmsum_be operation
+  // does a carryless multiplication of each 64-bit half and then
+  // adds the two halves using an bitwise XOR operation.
+
+  const DFromV<decltype(a)> d;
+  const auto zero = Zero(d);
+
+  using VU64 = __vector unsigned long long;
+  const VU64 pmsum_result = reinterpret_cast<VU64>(
+      vec_pmsum_be(vec_mergel(zero.raw, a.raw), vec_mergel(zero.raw, b.raw)));
+
+#if HWY_IS_LITTLE_ENDIAN
+  return Vec128<uint64_t, N>{pmsum_result};
+#else
+  // Need to swap the two halves of pmsum_result on big-endian targets as
+  // the upper 64 bits of the carryless multiplication result are in lane 0 of
+  // pmsum_result and the lower 64 bits of the carryless multiplication result
+  // are in lane 1 of mul128_result
+  return Vec128<uint64_t, N>{vec_sld(pmsum_result, pmsum_result, 8)};
+#endif
+}
+
+#endif  // !defined(HWY_DISABLE_PPC8_CRYPTO)
+
+// ================================================== MISC
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D /*d*/, uint64_t mask_bits) {
+#if HWY_PPC_HAVE_10
+  const Vec128<uint8_t> mask_vec{vec_genbm(mask_bits)};
+
+#if HWY_IS_LITTLE_ENDIAN
+  return MFromD<D>{MaskFromVec(mask_vec).raw};
+#else
+  return MFromD<D>{MaskFromVec(Reverse(Full128<uint8_t>(), mask_vec)).raw};
+#endif  // HWY_IS_LITTLE_ENDIAN
+
+#else  // PPC9 or earlier
+  const Full128<uint8_t> du8;
+  const Full128<uint16_t> du16;
+  const Vec128<uint8_t> vbits =
+      BitCast(du8, Set(du16, static_cast<uint16_t>(mask_bits)));
+
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+#if HWY_IS_LITTLE_ENDIAN
+  const __vector unsigned char kRep8 = {0, 0, 0, 0, 0, 0, 0, 0,
+                                        1, 1, 1, 1, 1, 1, 1, 1};
+#else
+  const __vector unsigned char kRep8 = {1, 1, 1, 1, 1, 1, 1, 1,
+                                        0, 0, 0, 0, 0, 0, 0, 0};
+#endif  // HWY_IS_LITTLE_ENDIAN
+
+  const Vec128<uint8_t> rep8{vec_perm(vbits.raw, vbits.raw, kRep8)};
+  const __vector unsigned char kBit = {1, 2, 4, 8, 16, 32, 64, 128,
+                                       1, 2, 4, 8, 16, 32, 64, 128};
+  return MFromD<D>{TestBit(rep8, Vec128<uint8_t>{kBit}).raw};
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D /*d*/, uint64_t mask_bits) {
+#if HWY_PPC_HAVE_10
+  const Vec128<uint16_t> mask_vec{vec_genhm(mask_bits)};
+
+#if HWY_IS_LITTLE_ENDIAN
+  return MFromD<D>{MaskFromVec(mask_vec).raw};
+#else
+  return MFromD<D>{MaskFromVec(Reverse(Full128<uint16_t>(), mask_vec)).raw};
+#endif  // HWY_IS_LITTLE_ENDIAN
+
+#else   // PPC9 or earlier
+  const __vector unsigned short kBit = {1, 2, 4, 8, 16, 32, 64, 128};
+  const auto vmask_bits =
+      Set(Full128<uint16_t>(), static_cast<uint16_t>(mask_bits));
+  return MFromD<D>{TestBit(vmask_bits, Vec128<uint16_t>{kBit}).raw};
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D /*d*/, uint64_t mask_bits) {
+#if HWY_PPC_HAVE_10
+  const Vec128<uint32_t> mask_vec{vec_genwm(mask_bits)};
+
+#if HWY_IS_LITTLE_ENDIAN
+  return MFromD<D>{MaskFromVec(mask_vec).raw};
+#else
+  return MFromD<D>{MaskFromVec(Reverse(Full128<uint32_t>(), mask_vec)).raw};
+#endif  // HWY_IS_LITTLE_ENDIAN
+
+#else   // PPC9 or earlier
+  const __vector unsigned int kBit = {1, 2, 4, 8};
+  const auto vmask_bits =
+      Set(Full128<uint32_t>(), static_cast<uint32_t>(mask_bits));
+  return MFromD<D>{TestBit(vmask_bits, Vec128<uint32_t>{kBit}).raw};
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D /*d*/, uint64_t mask_bits) {
+#if HWY_PPC_HAVE_10
+  const Vec128<uint64_t> mask_vec{vec_gendm(mask_bits)};
+
+#if HWY_IS_LITTLE_ENDIAN
+  return MFromD<D>{MaskFromVec(mask_vec).raw};
+#else
+  return MFromD<D>{MaskFromVec(Reverse(Full128<uint64_t>(), mask_vec)).raw};
+#endif  // HWY_IS_LITTLE_ENDIAN
+
+#else   // PPC9 or earlier
+  const __vector unsigned long long kBit = {1, 2};
+  const auto vmask_bits =
+      Set(Full128<uint64_t>(), static_cast<uint64_t>(mask_bits));
+  return MFromD<D>{TestBit(vmask_bits, Vec128<uint64_t>{kBit}).raw};
+#endif  // HWY_PPC_HAVE_10
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_LANES_LE_D(D, 8)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  // If there are 8 or fewer lanes, simply convert bits[0] to a uint64_t
+  uint64_t mask_bits = bits[0];
+
+  constexpr size_t kN = MaxLanes(d);
+  if (kN < 8) mask_bits &= (1u << kN) - 1;
+
+  return detail::LoadMaskBits128(d, mask_bits);
+}
+
+template <class D, HWY_IF_LANES_D(D, 16)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  // First, copy the mask bits to a uint16_t as there as there are at most
+  // 16 lanes in a vector.
+
+  // Copying the mask bits to a uint16_t first will also ensure that the
+  // mask bits are loaded into the lower 16 bits on big-endian PPC targets.
+  uint16_t u16_mask_bits;
+  CopyBytes<sizeof(uint16_t)>(bits, &u16_mask_bits);
+
+#if HWY_IS_LITTLE_ENDIAN
+  return detail::LoadMaskBits128(d, u16_mask_bits);
+#else
+  // On big-endian targets, u16_mask_bits need to be byte swapped as bits
+  // contains the mask bits in little-endian byte order
+
+  // GCC/Clang will optimize the load of u16_mask_bits and byte swap to a
+  // single lhbrx instruction on big-endian PPC targets when optimizations
+  // are enabled.
+#if HWY_HAS_BUILTIN(__builtin_bswap16)
+  return detail::LoadMaskBits128(d, __builtin_bswap16(u16_mask_bits));
+#else
+  return detail::LoadMaskBits128(
+      d, static_cast<uint16_t>((u16_mask_bits << 8) | (u16_mask_bits >> 8)));
+#endif
+#endif
+}
+
+template <typename T>
+struct CompressIsPartition {
+  // generic_ops-inl does not guarantee IsPartition for 8-bit.
+  enum { value = (sizeof(T) != 1) };
+};
+
+// ------------------------------ StoreMaskBits
+
+namespace detail {
+
+#if !HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN
+// fallback for missing vec_extractm
+template <size_t N>
+HWY_INLINE uint64_t ExtractSignBits(Vec128<uint8_t, N> sign_bits,
+                                    __vector unsigned char bit_shuffle) {
+  // clang POWER8 and 9 targets appear to differ in their return type of
+  // vec_vbpermq: unsigned or signed, so cast to avoid a warning.
+  using VU64 = detail::Raw128<uint64_t>::type;
+  const Vec128<uint64_t> extracted{
+      reinterpret_cast<VU64>(vec_vbpermq(sign_bits.raw, bit_shuffle))};
+  return extracted.raw[HWY_IS_LITTLE_ENDIAN];
+}
+
+#endif  // !HWY_PPC_HAVE_10
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/, Mask128<T, N> mask) {
+  const DFromM<decltype(mask)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const VFromD<decltype(du8)> sign_bits = BitCast(du8, VecFromMask(d, mask));
+#if HWY_PPC_HAVE_10 && HWY_IS_LITTLE_ENDIAN
+  return static_cast<uint64_t>(vec_extractm(sign_bits.raw));
+#else
+  const __vector unsigned char kBitShuffle = {120, 112, 104, 96, 88, 80, 72, 64,
+                                              56,  48,  40,  32, 24, 16, 8,  0};
+  return ExtractSignBits(sign_bits, kBitShuffle);
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/, Mask128<T, N> mask) {
+  const DFromM<decltype(mask)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const VFromD<decltype(du8)> sign_bits = BitCast(du8, VecFromMask(d, mask));
+
+#if HWY_PPC_HAVE_10 && HWY_IS_LITTLE_ENDIAN
+  const RebindToUnsigned<decltype(d)> du;
+  return static_cast<uint64_t>(vec_extractm(BitCast(du, sign_bits).raw));
+#else
+#if HWY_IS_LITTLE_ENDIAN
+  const __vector unsigned char kBitShuffle = {
+      112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128};
+#else
+  const __vector unsigned char kBitShuffle = {
+      128, 128, 128, 128, 128, 128, 128, 128, 112, 96, 80, 64, 48, 32, 16, 0};
+#endif
+  return ExtractSignBits(sign_bits, kBitShuffle);
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/, Mask128<T, N> mask) {
+  const DFromM<decltype(mask)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const VFromD<decltype(du8)> sign_bits = BitCast(du8, VecFromMask(d, mask));
+#if HWY_PPC_HAVE_10 && HWY_IS_LITTLE_ENDIAN
+  const RebindToUnsigned<decltype(d)> du;
+  return static_cast<uint64_t>(vec_extractm(BitCast(du, sign_bits).raw));
+#else
+#if HWY_IS_LITTLE_ENDIAN
+  const __vector unsigned char kBitShuffle = {96,  64,  32,  0,   128, 128,
+                                              128, 128, 128, 128, 128, 128,
+                                              128, 128, 128, 128};
+#else
+  const __vector unsigned char kBitShuffle = {128, 128, 128, 128, 128, 128,
+                                              128, 128, 128, 128, 128, 128,
+                                              96,  64,  32,  0};
+#endif
+  return ExtractSignBits(sign_bits, kBitShuffle);
+#endif  // HWY_PPC_HAVE_10
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, Mask128<T, N> mask) {
+  const DFromM<decltype(mask)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const VFromD<decltype(du8)> sign_bits = BitCast(du8, VecFromMask(d, mask));
+#if HWY_PPC_HAVE_10 && HWY_IS_LITTLE_ENDIAN
+  const RebindToUnsigned<decltype(d)> du;
+  return static_cast<uint64_t>(vec_extractm(BitCast(du, sign_bits).raw));
+#else
+#if HWY_IS_LITTLE_ENDIAN
+  const __vector unsigned char kBitShuffle = {64,  0,   128, 128, 128, 128,
+                                              128, 128, 128, 128, 128, 128,
+                                              128, 128, 128, 128};
+#else
+  const __vector unsigned char kBitShuffle = {128, 128, 128, 128, 128, 128,
+                                              128, 128, 128, 128, 128, 128,
+                                              128, 128, 64,  0};
+#endif
+  return ExtractSignBits(sign_bits, kBitShuffle);
+#endif  // HWY_PPC_HAVE_10
+}
+
+// Returns the lowest N of the mask bits.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t mask_bits) {
+  return ((N * sizeof(T)) == 16) ? mask_bits : mask_bits & ((1ull << N) - 1);
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_LANES_LE_D(D, 8)>
+HWY_API size_t StoreMaskBits(D /*d*/, MFromD<D> mask, uint8_t* bits) {
+  // For vectors with 8 or fewer lanes, simply cast the result of BitsFromMask
+  // to an uint8_t and store the result in bits[0].
+  bits[0] = static_cast<uint8_t>(detail::BitsFromMask(mask));
+  return sizeof(uint8_t);
+}
+
+template <class D, HWY_IF_LANES_D(D, 16)>
+HWY_API size_t StoreMaskBits(D /*d*/, MFromD<D> mask, uint8_t* bits) {
+  const auto mask_bits = detail::BitsFromMask(mask);
+
+  // First convert mask_bits to a uint16_t as we only want to store
+  // the lower 16 bits of mask_bits as there are 16 lanes in mask.
+
+  // Converting mask_bits to a uint16_t first will also ensure that
+  // the lower 16 bits of mask_bits are stored instead of the upper 16 bits
+  // of mask_bits on big-endian PPC targets.
+#if HWY_IS_LITTLE_ENDIAN
+  const uint16_t u16_mask_bits = static_cast<uint16_t>(mask_bits);
+#else
+  // On big-endian targets, the bytes of mask_bits need to be swapped
+  // as StoreMaskBits expects the mask bits to be stored in little-endian
+  // byte order.
+
+  // GCC will also optimize the byte swap and CopyBytes operations below
+  // to a single sthbrx instruction when optimizations are enabled on
+  // big-endian PPC targets
+#if HWY_HAS_BUILTIN(__builtin_bswap16)
+  const uint16_t u16_mask_bits =
+      __builtin_bswap16(static_cast<uint16_t>(mask_bits));
+#else
+  const uint16_t u16_mask_bits = static_cast<uint16_t>(
+      (mask_bits << 8) | (static_cast<uint16_t>(mask_bits) >> 8));
+#endif
+#endif
+
+  CopyBytes<sizeof(uint16_t)>(&u16_mask_bits, bits);
+  return sizeof(uint16_t);
+}
+
+// ------------------------------ Mask testing
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API bool AllFalse(D d, MFromD<D> mask) {
+  const RebindToUnsigned<decltype(d)> du;
+  return static_cast<bool>(vec_all_eq(RebindMask(du, mask).raw, Zero(du).raw));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return static_cast<bool>(
+      vec_all_eq(RebindMask(du, mask).raw, Set(du, hwy::LimitsMax<TU>()).raw));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API bool AllFalse(D d, MFromD<D> mask) {
+  const Full128<TFromD<D>> d_full;
+  constexpr size_t kN = MaxLanes(d);
+  return AllFalse(d_full, MFromD<decltype(d_full)>{
+                              vec_and(mask.raw, FirstN(d_full, kN).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  const Full128<TFromD<D>> d_full;
+  constexpr size_t kN = MaxLanes(d);
+  return AllTrue(d_full, MFromD<decltype(d_full)>{
+                             vec_or(mask.raw, Not(FirstN(d_full, kN)).raw)});
+}
+
+template <class D>
+HWY_API size_t CountTrue(D /* tag */, MFromD<D> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+#if HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+namespace detail {
+
+template <class V>
+static HWY_INLINE size_t VsxCntlzLsbb(V v) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200 && \
+    HWY_IS_LITTLE_ENDIAN
+  // Use inline assembly to work around bug in GCC 11 and earlier on
+  // little-endian PPC9
+  int idx;
+  __asm__("vctzlsbb %0,%1" : "=r"(idx) : "v"(v.raw));
+  return static_cast<size_t>(idx);
+#else
+  return static_cast<size_t>(vec_cntlz_lsbb(v.raw));
+#endif
+}
+
+template <class V>
+static HWY_INLINE size_t VsxCnttzLsbb(V v) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200 && \
+    HWY_IS_LITTLE_ENDIAN
+  // Use inline assembly to work around bug in GCC 11 and earlier on
+  // little-endian PPC9
+  int idx;
+  __asm__("vclzlsbb %0,%1" : "=r"(idx) : "v"(v.raw));
+  return static_cast<size_t>(idx);
+#else
+  return static_cast<size_t>(vec_cnttz_lsbb(v.raw));
+#endif
+}
+
+}  // namespace detail
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownFirstTrue(D d, MFromD<D> mask) {
+// For little-endian PPC10, BitsFromMask is already efficient.
+#if HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  if (detail::IsFull(d)) {
+    const Repartition<uint8_t, D> d8;
+    const auto bytes = BitCast(d8, VecFromMask(d, mask));
+    return detail::VsxCntlzLsbb(bytes) / sizeof(T);
+  }
+#endif  // HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  (void)d;
+  return Num0BitsBelowLS1Bit_Nonzero64(detail::BitsFromMask(mask));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+// For little-endian PPC10, BitsFromMask is already efficient.
+#if HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  constexpr size_t kN = 16 / sizeof(T);
+  if (detail::IsFull(d)) {
+    const Repartition<uint8_t, D> d8;
+    const auto bytes = BitCast(d8, VecFromMask(d, mask));
+    const size_t idx = detail::VsxCntlzLsbb(bytes) / sizeof(T);
+    return idx == kN ? -1 : static_cast<intptr_t>(idx);
+  }
+#endif  // HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  (void)d;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  return mask_bits ? intptr_t(Num0BitsBelowLS1Bit_Nonzero64(mask_bits)) : -1;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownLastTrue(D d, MFromD<D> mask) {
+// For little-endian PPC10, BitsFromMask is already efficient.
+#if HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  if (detail::IsFull(d)) {
+    const Repartition<uint8_t, D> d8;
+    const auto bytes = BitCast(d8, VecFromMask(d, mask));
+    const size_t idx = detail::VsxCnttzLsbb(bytes) / sizeof(T);
+    return 16 / sizeof(T) - 1 - idx;
+  }
+#endif  // HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  (void)d;
+  return 63 - Num0BitsAboveMS1Bit_Nonzero64(detail::BitsFromMask(mask));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+// For little-endian PPC10, BitsFromMask is already efficient.
+#if HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  constexpr size_t kN = 16 / sizeof(T);
+  if (detail::IsFull(d)) {
+    const Repartition<uint8_t, D> d8;
+    const auto bytes = BitCast(d8, VecFromMask(d, mask));
+    const size_t idx = detail::VsxCnttzLsbb(bytes) / sizeof(T);
+    return idx == kN ? -1 : static_cast<intptr_t>(kN - 1 - idx);
+  }
+#endif  // HWY_PPC_HAVE_9 && (!HWY_PPC_HAVE_10 || HWY_IS_BIG_ENDIAN)
+  (void)d;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  return mask_bits ? intptr_t(63 - Num0BitsAboveMS1Bit_Nonzero64(mask_bits))
+                   : -1;
+}
+
+// ------------------------------ Compress, CompressBits
+
+namespace detail {
+
+// Also works for N < 8 because the first 16 4-tuples only reference bytes 0-6.
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Twice<decltype(d8)> d8t;
+  const RebindToUnsigned<decltype(d)> du;
+
+  // To reduce cache footprint, store lane indices and convert to byte indices
+  // (2*lane + 0..1), with the doubling baked into the table. It's not clear
+  // that the additional cost of unpacking nibbles is worthwhile.
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintCompress16x8Tables
+      0,  2,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      2,  0,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      4,  0,  2,  6,  8,  10, 12, 14, /**/ 0, 4,  2,  6,  8,  10, 12, 14,  //
+      2,  4,  0,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      6,  0,  2,  4,  8,  10, 12, 14, /**/ 0, 6,  2,  4,  8,  10, 12, 14,  //
+      2,  6,  0,  4,  8,  10, 12, 14, /**/ 0, 2,  6,  4,  8,  10, 12, 14,  //
+      4,  6,  0,  2,  8,  10, 12, 14, /**/ 0, 4,  6,  2,  8,  10, 12, 14,  //
+      2,  4,  6,  0,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      8,  0,  2,  4,  6,  10, 12, 14, /**/ 0, 8,  2,  4,  6,  10, 12, 14,  //
+      2,  8,  0,  4,  6,  10, 12, 14, /**/ 0, 2,  8,  4,  6,  10, 12, 14,  //
+      4,  8,  0,  2,  6,  10, 12, 14, /**/ 0, 4,  8,  2,  6,  10, 12, 14,  //
+      2,  4,  8,  0,  6,  10, 12, 14, /**/ 0, 2,  4,  8,  6,  10, 12, 14,  //
+      6,  8,  0,  2,  4,  10, 12, 14, /**/ 0, 6,  8,  2,  4,  10, 12, 14,  //
+      2,  6,  8,  0,  4,  10, 12, 14, /**/ 0, 2,  6,  8,  4,  10, 12, 14,  //
+      4,  6,  8,  0,  2,  10, 12, 14, /**/ 0, 4,  6,  8,  2,  10, 12, 14,  //
+      2,  4,  6,  8,  0,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      10, 0,  2,  4,  6,  8,  12, 14, /**/ 0, 10, 2,  4,  6,  8,  12, 14,  //
+      2,  10, 0,  4,  6,  8,  12, 14, /**/ 0, 2,  10, 4,  6,  8,  12, 14,  //
+      4,  10, 0,  2,  6,  8,  12, 14, /**/ 0, 4,  10, 2,  6,  8,  12, 14,  //
+      2,  4,  10, 0,  6,  8,  12, 14, /**/ 0, 2,  4,  10, 6,  8,  12, 14,  //
+      6,  10, 0,  2,  4,  8,  12, 14, /**/ 0, 6,  10, 2,  4,  8,  12, 14,  //
+      2,  6,  10, 0,  4,  8,  12, 14, /**/ 0, 2,  6,  10, 4,  8,  12, 14,  //
+      4,  6,  10, 0,  2,  8,  12, 14, /**/ 0, 4,  6,  10, 2,  8,  12, 14,  //
+      2,  4,  6,  10, 0,  8,  12, 14, /**/ 0, 2,  4,  6,  10, 8,  12, 14,  //
+      8,  10, 0,  2,  4,  6,  12, 14, /**/ 0, 8,  10, 2,  4,  6,  12, 14,  //
+      2,  8,  10, 0,  4,  6,  12, 14, /**/ 0, 2,  8,  10, 4,  6,  12, 14,  //
+      4,  8,  10, 0,  2,  6,  12, 14, /**/ 0, 4,  8,  10, 2,  6,  12, 14,  //
+      2,  4,  8,  10, 0,  6,  12, 14, /**/ 0, 2,  4,  8,  10, 6,  12, 14,  //
+      6,  8,  10, 0,  2,  4,  12, 14, /**/ 0, 6,  8,  10, 2,  4,  12, 14,  //
+      2,  6,  8,  10, 0,  4,  12, 14, /**/ 0, 2,  6,  8,  10, 4,  12, 14,  //
+      4,  6,  8,  10, 0,  2,  12, 14, /**/ 0, 4,  6,  8,  10, 2,  12, 14,  //
+      2,  4,  6,  8,  10, 0,  12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      12, 0,  2,  4,  6,  8,  10, 14, /**/ 0, 12, 2,  4,  6,  8,  10, 14,  //
+      2,  12, 0,  4,  6,  8,  10, 14, /**/ 0, 2,  12, 4,  6,  8,  10, 14,  //
+      4,  12, 0,  2,  6,  8,  10, 14, /**/ 0, 4,  12, 2,  6,  8,  10, 14,  //
+      2,  4,  12, 0,  6,  8,  10, 14, /**/ 0, 2,  4,  12, 6,  8,  10, 14,  //
+      6,  12, 0,  2,  4,  8,  10, 14, /**/ 0, 6,  12, 2,  4,  8,  10, 14,  //
+      2,  6,  12, 0,  4,  8,  10, 14, /**/ 0, 2,  6,  12, 4,  8,  10, 14,  //
+      4,  6,  12, 0,  2,  8,  10, 14, /**/ 0, 4,  6,  12, 2,  8,  10, 14,  //
+      2,  4,  6,  12, 0,  8,  10, 14, /**/ 0, 2,  4,  6,  12, 8,  10, 14,  //
+      8,  12, 0,  2,  4,  6,  10, 14, /**/ 0, 8,  12, 2,  4,  6,  10, 14,  //
+      2,  8,  12, 0,  4,  6,  10, 14, /**/ 0, 2,  8,  12, 4,  6,  10, 14,  //
+      4,  8,  12, 0,  2,  6,  10, 14, /**/ 0, 4,  8,  12, 2,  6,  10, 14,  //
+      2,  4,  8,  12, 0,  6,  10, 14, /**/ 0, 2,  4,  8,  12, 6,  10, 14,  //
+      6,  8,  12, 0,  2,  4,  10, 14, /**/ 0, 6,  8,  12, 2,  4,  10, 14,  //
+      2,  6,  8,  12, 0,  4,  10, 14, /**/ 0, 2,  6,  8,  12, 4,  10, 14,  //
+      4,  6,  8,  12, 0,  2,  10, 14, /**/ 0, 4,  6,  8,  12, 2,  10, 14,  //
+      2,  4,  6,  8,  12, 0,  10, 14, /**/ 0, 2,  4,  6,  8,  12, 10, 14,  //
+      10, 12, 0,  2,  4,  6,  8,  14, /**/ 0, 10, 12, 2,  4,  6,  8,  14,  //
+      2,  10, 12, 0,  4,  6,  8,  14, /**/ 0, 2,  10, 12, 4,  6,  8,  14,  //
+      4,  10, 12, 0,  2,  6,  8,  14, /**/ 0, 4,  10, 12, 2,  6,  8,  14,  //
+      2,  4,  10, 12, 0,  6,  8,  14, /**/ 0, 2,  4,  10, 12, 6,  8,  14,  //
+      6,  10, 12, 0,  2,  4,  8,  14, /**/ 0, 6,  10, 12, 2,  4,  8,  14,  //
+      2,  6,  10, 12, 0,  4,  8,  14, /**/ 0, 2,  6,  10, 12, 4,  8,  14,  //
+      4,  6,  10, 12, 0,  2,  8,  14, /**/ 0, 4,  6,  10, 12, 2,  8,  14,  //
+      2,  4,  6,  10, 12, 0,  8,  14, /**/ 0, 2,  4,  6,  10, 12, 8,  14,  //
+      8,  10, 12, 0,  2,  4,  6,  14, /**/ 0, 8,  10, 12, 2,  4,  6,  14,  //
+      2,  8,  10, 12, 0,  4,  6,  14, /**/ 0, 2,  8,  10, 12, 4,  6,  14,  //
+      4,  8,  10, 12, 0,  2,  6,  14, /**/ 0, 4,  8,  10, 12, 2,  6,  14,  //
+      2,  4,  8,  10, 12, 0,  6,  14, /**/ 0, 2,  4,  8,  10, 12, 6,  14,  //
+      6,  8,  10, 12, 0,  2,  4,  14, /**/ 0, 6,  8,  10, 12, 2,  4,  14,  //
+      2,  6,  8,  10, 12, 0,  4,  14, /**/ 0, 2,  6,  8,  10, 12, 4,  14,  //
+      4,  6,  8,  10, 12, 0,  2,  14, /**/ 0, 4,  6,  8,  10, 12, 2,  14,  //
+      2,  4,  6,  8,  10, 12, 0,  14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      14, 0,  2,  4,  6,  8,  10, 12, /**/ 0, 14, 2,  4,  6,  8,  10, 12,  //
+      2,  14, 0,  4,  6,  8,  10, 12, /**/ 0, 2,  14, 4,  6,  8,  10, 12,  //
+      4,  14, 0,  2,  6,  8,  10, 12, /**/ 0, 4,  14, 2,  6,  8,  10, 12,  //
+      2,  4,  14, 0,  6,  8,  10, 12, /**/ 0, 2,  4,  14, 6,  8,  10, 12,  //
+      6,  14, 0,  2,  4,  8,  10, 12, /**/ 0, 6,  14, 2,  4,  8,  10, 12,  //
+      2,  6,  14, 0,  4,  8,  10, 12, /**/ 0, 2,  6,  14, 4,  8,  10, 12,  //
+      4,  6,  14, 0,  2,  8,  10, 12, /**/ 0, 4,  6,  14, 2,  8,  10, 12,  //
+      2,  4,  6,  14, 0,  8,  10, 12, /**/ 0, 2,  4,  6,  14, 8,  10, 12,  //
+      8,  14, 0,  2,  4,  6,  10, 12, /**/ 0, 8,  14, 2,  4,  6,  10, 12,  //
+      2,  8,  14, 0,  4,  6,  10, 12, /**/ 0, 2,  8,  14, 4,  6,  10, 12,  //
+      4,  8,  14, 0,  2,  6,  10, 12, /**/ 0, 4,  8,  14, 2,  6,  10, 12,  //
+      2,  4,  8,  14, 0,  6,  10, 12, /**/ 0, 2,  4,  8,  14, 6,  10, 12,  //
+      6,  8,  14, 0,  2,  4,  10, 12, /**/ 0, 6,  8,  14, 2,  4,  10, 12,  //
+      2,  6,  8,  14, 0,  4,  10, 12, /**/ 0, 2,  6,  8,  14, 4,  10, 12,  //
+      4,  6,  8,  14, 0,  2,  10, 12, /**/ 0, 4,  6,  8,  14, 2,  10, 12,  //
+      2,  4,  6,  8,  14, 0,  10, 12, /**/ 0, 2,  4,  6,  8,  14, 10, 12,  //
+      10, 14, 0,  2,  4,  6,  8,  12, /**/ 0, 10, 14, 2,  4,  6,  8,  12,  //
+      2,  10, 14, 0,  4,  6,  8,  12, /**/ 0, 2,  10, 14, 4,  6,  8,  12,  //
+      4,  10, 14, 0,  2,  6,  8,  12, /**/ 0, 4,  10, 14, 2,  6,  8,  12,  //
+      2,  4,  10, 14, 0,  6,  8,  12, /**/ 0, 2,  4,  10, 14, 6,  8,  12,  //
+      6,  10, 14, 0,  2,  4,  8,  12, /**/ 0, 6,  10, 14, 2,  4,  8,  12,  //
+      2,  6,  10, 14, 0,  4,  8,  12, /**/ 0, 2,  6,  10, 14, 4,  8,  12,  //
+      4,  6,  10, 14, 0,  2,  8,  12, /**/ 0, 4,  6,  10, 14, 2,  8,  12,  //
+      2,  4,  6,  10, 14, 0,  8,  12, /**/ 0, 2,  4,  6,  10, 14, 8,  12,  //
+      8,  10, 14, 0,  2,  4,  6,  12, /**/ 0, 8,  10, 14, 2,  4,  6,  12,  //
+      2,  8,  10, 14, 0,  4,  6,  12, /**/ 0, 2,  8,  10, 14, 4,  6,  12,  //
+      4,  8,  10, 14, 0,  2,  6,  12, /**/ 0, 4,  8,  10, 14, 2,  6,  12,  //
+      2,  4,  8,  10, 14, 0,  6,  12, /**/ 0, 2,  4,  8,  10, 14, 6,  12,  //
+      6,  8,  10, 14, 0,  2,  4,  12, /**/ 0, 6,  8,  10, 14, 2,  4,  12,  //
+      2,  6,  8,  10, 14, 0,  4,  12, /**/ 0, 2,  6,  8,  10, 14, 4,  12,  //
+      4,  6,  8,  10, 14, 0,  2,  12, /**/ 0, 4,  6,  8,  10, 14, 2,  12,  //
+      2,  4,  6,  8,  10, 14, 0,  12, /**/ 0, 2,  4,  6,  8,  10, 14, 12,  //
+      12, 14, 0,  2,  4,  6,  8,  10, /**/ 0, 12, 14, 2,  4,  6,  8,  10,  //
+      2,  12, 14, 0,  4,  6,  8,  10, /**/ 0, 2,  12, 14, 4,  6,  8,  10,  //
+      4,  12, 14, 0,  2,  6,  8,  10, /**/ 0, 4,  12, 14, 2,  6,  8,  10,  //
+      2,  4,  12, 14, 0,  6,  8,  10, /**/ 0, 2,  4,  12, 14, 6,  8,  10,  //
+      6,  12, 14, 0,  2,  4,  8,  10, /**/ 0, 6,  12, 14, 2,  4,  8,  10,  //
+      2,  6,  12, 14, 0,  4,  8,  10, /**/ 0, 2,  6,  12, 14, 4,  8,  10,  //
+      4,  6,  12, 14, 0,  2,  8,  10, /**/ 0, 4,  6,  12, 14, 2,  8,  10,  //
+      2,  4,  6,  12, 14, 0,  8,  10, /**/ 0, 2,  4,  6,  12, 14, 8,  10,  //
+      8,  12, 14, 0,  2,  4,  6,  10, /**/ 0, 8,  12, 14, 2,  4,  6,  10,  //
+      2,  8,  12, 14, 0,  4,  6,  10, /**/ 0, 2,  8,  12, 14, 4,  6,  10,  //
+      4,  8,  12, 14, 0,  2,  6,  10, /**/ 0, 4,  8,  12, 14, 2,  6,  10,  //
+      2,  4,  8,  12, 14, 0,  6,  10, /**/ 0, 2,  4,  8,  12, 14, 6,  10,  //
+      6,  8,  12, 14, 0,  2,  4,  10, /**/ 0, 6,  8,  12, 14, 2,  4,  10,  //
+      2,  6,  8,  12, 14, 0,  4,  10, /**/ 0, 2,  6,  8,  12, 14, 4,  10,  //
+      4,  6,  8,  12, 14, 0,  2,  10, /**/ 0, 4,  6,  8,  12, 14, 2,  10,  //
+      2,  4,  6,  8,  12, 14, 0,  10, /**/ 0, 2,  4,  6,  8,  12, 14, 10,  //
+      10, 12, 14, 0,  2,  4,  6,  8,  /**/ 0, 10, 12, 14, 2,  4,  6,  8,   //
+      2,  10, 12, 14, 0,  4,  6,  8,  /**/ 0, 2,  10, 12, 14, 4,  6,  8,   //
+      4,  10, 12, 14, 0,  2,  6,  8,  /**/ 0, 4,  10, 12, 14, 2,  6,  8,   //
+      2,  4,  10, 12, 14, 0,  6,  8,  /**/ 0, 2,  4,  10, 12, 14, 6,  8,   //
+      6,  10, 12, 14, 0,  2,  4,  8,  /**/ 0, 6,  10, 12, 14, 2,  4,  8,   //
+      2,  6,  10, 12, 14, 0,  4,  8,  /**/ 0, 2,  6,  10, 12, 14, 4,  8,   //
+      4,  6,  10, 12, 14, 0,  2,  8,  /**/ 0, 4,  6,  10, 12, 14, 2,  8,   //
+      2,  4,  6,  10, 12, 14, 0,  8,  /**/ 0, 2,  4,  6,  10, 12, 14, 8,   //
+      8,  10, 12, 14, 0,  2,  4,  6,  /**/ 0, 8,  10, 12, 14, 2,  4,  6,   //
+      2,  8,  10, 12, 14, 0,  4,  6,  /**/ 0, 2,  8,  10, 12, 14, 4,  6,   //
+      4,  8,  10, 12, 14, 0,  2,  6,  /**/ 0, 4,  8,  10, 12, 14, 2,  6,   //
+      2,  4,  8,  10, 12, 14, 0,  6,  /**/ 0, 2,  4,  8,  10, 12, 14, 6,   //
+      6,  8,  10, 12, 14, 0,  2,  4,  /**/ 0, 6,  8,  10, 12, 14, 2,  4,   //
+      2,  6,  8,  10, 12, 14, 0,  4,  /**/ 0, 2,  6,  8,  10, 12, 14, 4,   //
+      4,  6,  8,  10, 12, 14, 0,  2,  /**/ 0, 4,  6,  8,  10, 12, 14, 2,   //
+      2,  4,  6,  8,  10, 12, 14, 0,  /**/ 0, 2,  4,  6,  8,  10, 12, 14};
+
+  const VFromD<decltype(d8t)> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const VFromD<decltype(du)> pairs = ZipLower(byte_idx, byte_idx);
+  constexpr uint16_t kPairIndexIncrement =
+      HWY_IS_LITTLE_ENDIAN ? 0x0100 : 0x0001;
+
+  return BitCast(d, pairs + Set(du, kPairIndexIncrement));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Twice<decltype(d8)> d8t;
+  const RebindToUnsigned<decltype(d)> du;
+
+  // To reduce cache footprint, store lane indices and convert to byte indices
+  // (2*lane + 0..1), with the doubling baked into the table. It's not clear
+  // that the additional cost of unpacking nibbles is worthwhile.
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintCompressNot16x8Tables
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 14, 0,   //
+      0, 4,  6,  8,  10, 12, 14, 2,  /**/ 4,  6,  8,  10, 12, 14, 0,  2,   //
+      0, 2,  6,  8,  10, 12, 14, 4,  /**/ 2,  6,  8,  10, 12, 14, 0,  4,   //
+      0, 6,  8,  10, 12, 14, 2,  4,  /**/ 6,  8,  10, 12, 14, 0,  2,  4,   //
+      0, 2,  4,  8,  10, 12, 14, 6,  /**/ 2,  4,  8,  10, 12, 14, 0,  6,   //
+      0, 4,  8,  10, 12, 14, 2,  6,  /**/ 4,  8,  10, 12, 14, 0,  2,  6,   //
+      0, 2,  8,  10, 12, 14, 4,  6,  /**/ 2,  8,  10, 12, 14, 0,  4,  6,   //
+      0, 8,  10, 12, 14, 2,  4,  6,  /**/ 8,  10, 12, 14, 0,  2,  4,  6,   //
+      0, 2,  4,  6,  10, 12, 14, 8,  /**/ 2,  4,  6,  10, 12, 14, 0,  8,   //
+      0, 4,  6,  10, 12, 14, 2,  8,  /**/ 4,  6,  10, 12, 14, 0,  2,  8,   //
+      0, 2,  6,  10, 12, 14, 4,  8,  /**/ 2,  6,  10, 12, 14, 0,  4,  8,   //
+      0, 6,  10, 12, 14, 2,  4,  8,  /**/ 6,  10, 12, 14, 0,  2,  4,  8,   //
+      0, 2,  4,  10, 12, 14, 6,  8,  /**/ 2,  4,  10, 12, 14, 0,  6,  8,   //
+      0, 4,  10, 12, 14, 2,  6,  8,  /**/ 4,  10, 12, 14, 0,  2,  6,  8,   //
+      0, 2,  10, 12, 14, 4,  6,  8,  /**/ 2,  10, 12, 14, 0,  4,  6,  8,   //
+      0, 10, 12, 14, 2,  4,  6,  8,  /**/ 10, 12, 14, 0,  2,  4,  6,  8,   //
+      0, 2,  4,  6,  8,  12, 14, 10, /**/ 2,  4,  6,  8,  12, 14, 0,  10,  //
+      0, 4,  6,  8,  12, 14, 2,  10, /**/ 4,  6,  8,  12, 14, 0,  2,  10,  //
+      0, 2,  6,  8,  12, 14, 4,  10, /**/ 2,  6,  8,  12, 14, 0,  4,  10,  //
+      0, 6,  8,  12, 14, 2,  4,  10, /**/ 6,  8,  12, 14, 0,  2,  4,  10,  //
+      0, 2,  4,  8,  12, 14, 6,  10, /**/ 2,  4,  8,  12, 14, 0,  6,  10,  //
+      0, 4,  8,  12, 14, 2,  6,  10, /**/ 4,  8,  12, 14, 0,  2,  6,  10,  //
+      0, 2,  8,  12, 14, 4,  6,  10, /**/ 2,  8,  12, 14, 0,  4,  6,  10,  //
+      0, 8,  12, 14, 2,  4,  6,  10, /**/ 8,  12, 14, 0,  2,  4,  6,  10,  //
+      0, 2,  4,  6,  12, 14, 8,  10, /**/ 2,  4,  6,  12, 14, 0,  8,  10,  //
+      0, 4,  6,  12, 14, 2,  8,  10, /**/ 4,  6,  12, 14, 0,  2,  8,  10,  //
+      0, 2,  6,  12, 14, 4,  8,  10, /**/ 2,  6,  12, 14, 0,  4,  8,  10,  //
+      0, 6,  12, 14, 2,  4,  8,  10, /**/ 6,  12, 14, 0,  2,  4,  8,  10,  //
+      0, 2,  4,  12, 14, 6,  8,  10, /**/ 2,  4,  12, 14, 0,  6,  8,  10,  //
+      0, 4,  12, 14, 2,  6,  8,  10, /**/ 4,  12, 14, 0,  2,  6,  8,  10,  //
+      0, 2,  12, 14, 4,  6,  8,  10, /**/ 2,  12, 14, 0,  4,  6,  8,  10,  //
+      0, 12, 14, 2,  4,  6,  8,  10, /**/ 12, 14, 0,  2,  4,  6,  8,  10,  //
+      0, 2,  4,  6,  8,  10, 14, 12, /**/ 2,  4,  6,  8,  10, 14, 0,  12,  //
+      0, 4,  6,  8,  10, 14, 2,  12, /**/ 4,  6,  8,  10, 14, 0,  2,  12,  //
+      0, 2,  6,  8,  10, 14, 4,  12, /**/ 2,  6,  8,  10, 14, 0,  4,  12,  //
+      0, 6,  8,  10, 14, 2,  4,  12, /**/ 6,  8,  10, 14, 0,  2,  4,  12,  //
+      0, 2,  4,  8,  10, 14, 6,  12, /**/ 2,  4,  8,  10, 14, 0,  6,  12,  //
+      0, 4,  8,  10, 14, 2,  6,  12, /**/ 4,  8,  10, 14, 0,  2,  6,  12,  //
+      0, 2,  8,  10, 14, 4,  6,  12, /**/ 2,  8,  10, 14, 0,  4,  6,  12,  //
+      0, 8,  10, 14, 2,  4,  6,  12, /**/ 8,  10, 14, 0,  2,  4,  6,  12,  //
+      0, 2,  4,  6,  10, 14, 8,  12, /**/ 2,  4,  6,  10, 14, 0,  8,  12,  //
+      0, 4,  6,  10, 14, 2,  8,  12, /**/ 4,  6,  10, 14, 0,  2,  8,  12,  //
+      0, 2,  6,  10, 14, 4,  8,  12, /**/ 2,  6,  10, 14, 0,  4,  8,  12,  //
+      0, 6,  10, 14, 2,  4,  8,  12, /**/ 6,  10, 14, 0,  2,  4,  8,  12,  //
+      0, 2,  4,  10, 14, 6,  8,  12, /**/ 2,  4,  10, 14, 0,  6,  8,  12,  //
+      0, 4,  10, 14, 2,  6,  8,  12, /**/ 4,  10, 14, 0,  2,  6,  8,  12,  //
+      0, 2,  10, 14, 4,  6,  8,  12, /**/ 2,  10, 14, 0,  4,  6,  8,  12,  //
+      0, 10, 14, 2,  4,  6,  8,  12, /**/ 10, 14, 0,  2,  4,  6,  8,  12,  //
+      0, 2,  4,  6,  8,  14, 10, 12, /**/ 2,  4,  6,  8,  14, 0,  10, 12,  //
+      0, 4,  6,  8,  14, 2,  10, 12, /**/ 4,  6,  8,  14, 0,  2,  10, 12,  //
+      0, 2,  6,  8,  14, 4,  10, 12, /**/ 2,  6,  8,  14, 0,  4,  10, 12,  //
+      0, 6,  8,  14, 2,  4,  10, 12, /**/ 6,  8,  14, 0,  2,  4,  10, 12,  //
+      0, 2,  4,  8,  14, 6,  10, 12, /**/ 2,  4,  8,  14, 0,  6,  10, 12,  //
+      0, 4,  8,  14, 2,  6,  10, 12, /**/ 4,  8,  14, 0,  2,  6,  10, 12,  //
+      0, 2,  8,  14, 4,  6,  10, 12, /**/ 2,  8,  14, 0,  4,  6,  10, 12,  //
+      0, 8,  14, 2,  4,  6,  10, 12, /**/ 8,  14, 0,  2,  4,  6,  10, 12,  //
+      0, 2,  4,  6,  14, 8,  10, 12, /**/ 2,  4,  6,  14, 0,  8,  10, 12,  //
+      0, 4,  6,  14, 2,  8,  10, 12, /**/ 4,  6,  14, 0,  2,  8,  10, 12,  //
+      0, 2,  6,  14, 4,  8,  10, 12, /**/ 2,  6,  14, 0,  4,  8,  10, 12,  //
+      0, 6,  14, 2,  4,  8,  10, 12, /**/ 6,  14, 0,  2,  4,  8,  10, 12,  //
+      0, 2,  4,  14, 6,  8,  10, 12, /**/ 2,  4,  14, 0,  6,  8,  10, 12,  //
+      0, 4,  14, 2,  6,  8,  10, 12, /**/ 4,  14, 0,  2,  6,  8,  10, 12,  //
+      0, 2,  14, 4,  6,  8,  10, 12, /**/ 2,  14, 0,  4,  6,  8,  10, 12,  //
+      0, 14, 2,  4,  6,  8,  10, 12, /**/ 14, 0,  2,  4,  6,  8,  10, 12,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 0,  14,  //
+      0, 4,  6,  8,  10, 12, 2,  14, /**/ 4,  6,  8,  10, 12, 0,  2,  14,  //
+      0, 2,  6,  8,  10, 12, 4,  14, /**/ 2,  6,  8,  10, 12, 0,  4,  14,  //
+      0, 6,  8,  10, 12, 2,  4,  14, /**/ 6,  8,  10, 12, 0,  2,  4,  14,  //
+      0, 2,  4,  8,  10, 12, 6,  14, /**/ 2,  4,  8,  10, 12, 0,  6,  14,  //
+      0, 4,  8,  10, 12, 2,  6,  14, /**/ 4,  8,  10, 12, 0,  2,  6,  14,  //
+      0, 2,  8,  10, 12, 4,  6,  14, /**/ 2,  8,  10, 12, 0,  4,  6,  14,  //
+      0, 8,  10, 12, 2,  4,  6,  14, /**/ 8,  10, 12, 0,  2,  4,  6,  14,  //
+      0, 2,  4,  6,  10, 12, 8,  14, /**/ 2,  4,  6,  10, 12, 0,  8,  14,  //
+      0, 4,  6,  10, 12, 2,  8,  14, /**/ 4,  6,  10, 12, 0,  2,  8,  14,  //
+      0, 2,  6,  10, 12, 4,  8,  14, /**/ 2,  6,  10, 12, 0,  4,  8,  14,  //
+      0, 6,  10, 12, 2,  4,  8,  14, /**/ 6,  10, 12, 0,  2,  4,  8,  14,  //
+      0, 2,  4,  10, 12, 6,  8,  14, /**/ 2,  4,  10, 12, 0,  6,  8,  14,  //
+      0, 4,  10, 12, 2,  6,  8,  14, /**/ 4,  10, 12, 0,  2,  6,  8,  14,  //
+      0, 2,  10, 12, 4,  6,  8,  14, /**/ 2,  10, 12, 0,  4,  6,  8,  14,  //
+      0, 10, 12, 2,  4,  6,  8,  14, /**/ 10, 12, 0,  2,  4,  6,  8,  14,  //
+      0, 2,  4,  6,  8,  12, 10, 14, /**/ 2,  4,  6,  8,  12, 0,  10, 14,  //
+      0, 4,  6,  8,  12, 2,  10, 14, /**/ 4,  6,  8,  12, 0,  2,  10, 14,  //
+      0, 2,  6,  8,  12, 4,  10, 14, /**/ 2,  6,  8,  12, 0,  4,  10, 14,  //
+      0, 6,  8,  12, 2,  4,  10, 14, /**/ 6,  8,  12, 0,  2,  4,  10, 14,  //
+      0, 2,  4,  8,  12, 6,  10, 14, /**/ 2,  4,  8,  12, 0,  6,  10, 14,  //
+      0, 4,  8,  12, 2,  6,  10, 14, /**/ 4,  8,  12, 0,  2,  6,  10, 14,  //
+      0, 2,  8,  12, 4,  6,  10, 14, /**/ 2,  8,  12, 0,  4,  6,  10, 14,  //
+      0, 8,  12, 2,  4,  6,  10, 14, /**/ 8,  12, 0,  2,  4,  6,  10, 14,  //
+      0, 2,  4,  6,  12, 8,  10, 14, /**/ 2,  4,  6,  12, 0,  8,  10, 14,  //
+      0, 4,  6,  12, 2,  8,  10, 14, /**/ 4,  6,  12, 0,  2,  8,  10, 14,  //
+      0, 2,  6,  12, 4,  8,  10, 14, /**/ 2,  6,  12, 0,  4,  8,  10, 14,  //
+      0, 6,  12, 2,  4,  8,  10, 14, /**/ 6,  12, 0,  2,  4,  8,  10, 14,  //
+      0, 2,  4,  12, 6,  8,  10, 14, /**/ 2,  4,  12, 0,  6,  8,  10, 14,  //
+      0, 4,  12, 2,  6,  8,  10, 14, /**/ 4,  12, 0,  2,  6,  8,  10, 14,  //
+      0, 2,  12, 4,  6,  8,  10, 14, /**/ 2,  12, 0,  4,  6,  8,  10, 14,  //
+      0, 12, 2,  4,  6,  8,  10, 14, /**/ 12, 0,  2,  4,  6,  8,  10, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 0,  12, 14,  //
+      0, 4,  6,  8,  10, 2,  12, 14, /**/ 4,  6,  8,  10, 0,  2,  12, 14,  //
+      0, 2,  6,  8,  10, 4,  12, 14, /**/ 2,  6,  8,  10, 0,  4,  12, 14,  //
+      0, 6,  8,  10, 2,  4,  12, 14, /**/ 6,  8,  10, 0,  2,  4,  12, 14,  //
+      0, 2,  4,  8,  10, 6,  12, 14, /**/ 2,  4,  8,  10, 0,  6,  12, 14,  //
+      0, 4,  8,  10, 2,  6,  12, 14, /**/ 4,  8,  10, 0,  2,  6,  12, 14,  //
+      0, 2,  8,  10, 4,  6,  12, 14, /**/ 2,  8,  10, 0,  4,  6,  12, 14,  //
+      0, 8,  10, 2,  4,  6,  12, 14, /**/ 8,  10, 0,  2,  4,  6,  12, 14,  //
+      0, 2,  4,  6,  10, 8,  12, 14, /**/ 2,  4,  6,  10, 0,  8,  12, 14,  //
+      0, 4,  6,  10, 2,  8,  12, 14, /**/ 4,  6,  10, 0,  2,  8,  12, 14,  //
+      0, 2,  6,  10, 4,  8,  12, 14, /**/ 2,  6,  10, 0,  4,  8,  12, 14,  //
+      0, 6,  10, 2,  4,  8,  12, 14, /**/ 6,  10, 0,  2,  4,  8,  12, 14,  //
+      0, 2,  4,  10, 6,  8,  12, 14, /**/ 2,  4,  10, 0,  6,  8,  12, 14,  //
+      0, 4,  10, 2,  6,  8,  12, 14, /**/ 4,  10, 0,  2,  6,  8,  12, 14,  //
+      0, 2,  10, 4,  6,  8,  12, 14, /**/ 2,  10, 0,  4,  6,  8,  12, 14,  //
+      0, 10, 2,  4,  6,  8,  12, 14, /**/ 10, 0,  2,  4,  6,  8,  12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  0,  10, 12, 14,  //
+      0, 4,  6,  8,  2,  10, 12, 14, /**/ 4,  6,  8,  0,  2,  10, 12, 14,  //
+      0, 2,  6,  8,  4,  10, 12, 14, /**/ 2,  6,  8,  0,  4,  10, 12, 14,  //
+      0, 6,  8,  2,  4,  10, 12, 14, /**/ 6,  8,  0,  2,  4,  10, 12, 14,  //
+      0, 2,  4,  8,  6,  10, 12, 14, /**/ 2,  4,  8,  0,  6,  10, 12, 14,  //
+      0, 4,  8,  2,  6,  10, 12, 14, /**/ 4,  8,  0,  2,  6,  10, 12, 14,  //
+      0, 2,  8,  4,  6,  10, 12, 14, /**/ 2,  8,  0,  4,  6,  10, 12, 14,  //
+      0, 8,  2,  4,  6,  10, 12, 14, /**/ 8,  0,  2,  4,  6,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  0,  8,  10, 12, 14,  //
+      0, 4,  6,  2,  8,  10, 12, 14, /**/ 4,  6,  0,  2,  8,  10, 12, 14,  //
+      0, 2,  6,  4,  8,  10, 12, 14, /**/ 2,  6,  0,  4,  8,  10, 12, 14,  //
+      0, 6,  2,  4,  8,  10, 12, 14, /**/ 6,  0,  2,  4,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  0,  6,  8,  10, 12, 14,  //
+      0, 4,  2,  6,  8,  10, 12, 14, /**/ 4,  0,  2,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  0,  4,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 0,  2,  4,  6,  8,  10, 12, 14};
+
+  const VFromD<decltype(d8t)> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const VFromD<decltype(du)> pairs = ZipLower(byte_idx, byte_idx);
+  constexpr uint16_t kPairIndexIncrement =
+      HWY_IS_LITTLE_ENDIAN ? 0x0100 : 0x0001;
+
+  return BitCast(d, pairs + Set(du, kPairIndexIncrement));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[256] = {
+      // PrintCompress32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,   //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,   //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,   //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[256] = {
+      // PrintCompressNot32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11, 12, 13,
+      14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+      12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+      12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      // PrintCompress64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      // PrintCompressNot64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v, uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  HWY_DASSERT(mask_bits < (1ull << N));
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  return BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressNotBits(Vec128<T, N> v, uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  HWY_DASSERT(mask_bits < (1ull << N));
+  const auto indices = BitCast(du, detail::IndicesFromNotBits128(d, mask_bits));
+  return BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+}
+
+}  // namespace detail
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> Compress(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Compress(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 1 and mask[0] = 0, then swap both halves, else keep.
+  const Full128<T> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskL, maskH);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 bytes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  return detail::CompressBits(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressNot
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> CompressNot(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> CompressNot(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 0 and mask[0] = 1, then swap both halves, else keep.
+  const Full128<T> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskH, maskL);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 bytes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> CompressNot(Vec128<T, N> v, Mask128<T, N> mask) {
+  // For partial vectors, we cannot pull the Not() into the table because
+  // BitsFromMask clears the upper bits.
+  if (N < 16 / sizeof(T)) {
+    return detail::CompressBits(v, detail::BitsFromMask(Not(mask)));
+  }
+  return detail::CompressNotBits(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v,
+                                  const uint8_t* HWY_RESTRICT bits) {
+  // As there are at most 8 lanes in v if sizeof(TFromD<D>) > 1, simply
+  // convert bits[0] to a uint64_t
+  uint64_t mask_bits = bits[0];
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+
+  return detail::CompressBits(v, mask_bits);
+}
+
+// ------------------------------ CompressStore, CompressBitsStore
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> m, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  HWY_DASSERT(mask_bits < (1ull << MaxLanes(d)));
+  const size_t count = PopCount(mask_bits);
+
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  StoreU(compressed, d, unaligned);
+  return count;
+}
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  HWY_DASSERT(mask_bits < (1ull << MaxLanes(d)));
+  const size_t count = PopCount(mask_bits);
+
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  BlendedStore(compressed, FirstN(d, count), d, unaligned);
+  return count;
+}
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  // As there are at most 8 lanes in v if sizeof(TFromD<D>) > 1, simply
+  // convert bits[0] to a uint64_t
+  uint64_t mask_bits = bits[0];
+  constexpr size_t kN = MaxLanes(d);
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+  const size_t count = PopCount(mask_bits);
+
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  StoreU(compressed, d, unaligned);
+
+  return count;
+}
+
+// ------------------------------ StoreInterleaved2/3/4
+
+// HWY_NATIVE_LOAD_STORE_INTERLEAVED not set, hence defined in
+// generic_ops-inl.h.
+
+// ------------------------------ Additional mask logical operations
+namespace detail {
+
+#if HWY_IS_LITTLE_ENDIAN
+template <class V>
+HWY_INLINE V Per64BitBlkRevLanesOnBe(V v) {
+  return v;
+}
+template <class V>
+HWY_INLINE V Per128BitBlkRevLanesOnBe(V v) {
+  return v;
+}
+#else
+template <class V, HWY_IF_T_SIZE_V(V, 1)>
+HWY_INLINE V Per64BitBlkRevLanesOnBe(V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse8(d, v);
+}
+template <class V, HWY_IF_T_SIZE_V(V, 2)>
+HWY_INLINE V Per64BitBlkRevLanesOnBe(V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse4(d, v);
+}
+template <class V, HWY_IF_T_SIZE_V(V, 4)>
+HWY_INLINE V Per64BitBlkRevLanesOnBe(V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse2(d, v);
+}
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V Per64BitBlkRevLanesOnBe(V v) {
+  return v;
+}
+template <class V>
+HWY_INLINE V Per128BitBlkRevLanesOnBe(V v) {
+  const DFromV<decltype(v)> d;
+  return Reverse(d, v);
+}
+#endif
+
+template <class V>
+HWY_INLINE V I128Subtract(V a, V b) {
+#if defined(__SIZEOF_INT128__)
+  using VU128 = __vector unsigned __int128;
+  const V diff_i128{reinterpret_cast<typename detail::Raw128<TFromV<V>>::type>(
+      vec_sub(reinterpret_cast<VU128>(a.raw), reinterpret_cast<VU128>(b.raw)))};
+#else
+  const DFromV<decltype(a)> d;
+  const Repartition<uint64_t, decltype(d)> du64;
+
+  const auto u64_a = BitCast(du64, a);
+  const auto u64_b = BitCast(du64, b);
+
+  const auto diff_u64 = u64_a - u64_b;
+  const auto borrow_u64 = VecFromMask(du64, u64_a < u64_b);
+
+#if HWY_IS_LITTLE_ENDIAN
+  const auto borrow_u64_shifted = ShiftLeftBytes<8>(du64, borrow_u64);
+#else
+  const auto borrow_u64_shifted = ShiftRightBytes<8>(du64, borrow_u64);
+#endif
+
+  const auto diff_i128 = BitCast(d, diff_u64 + borrow_u64_shifted);
+#endif
+
+  return diff_i128;
+}
+
+}  // namespace detail
+
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrAfterFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetAtOrAfterFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const auto vmask = VecFromMask(d, mask);
+  return MaskFromVec(Or(vmask, InterleaveLower(vmask, vmask)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const Full64<T> d_full64;
+
+  const auto vmask = VecFromMask(d, mask);
+  const auto vmask_le64 =
+      BitCast(Full64<int64_t>(),
+              detail::Per64BitBlkRevLanesOnBe(ResizeBitCast(d_full64, vmask)));
+  const auto neg_vmask_le64 = Neg(vmask_le64);
+  const auto neg_vmask = ResizeBitCast(
+      d, detail::Per64BitBlkRevLanesOnBe(BitCast(d_full64, neg_vmask_le64)));
+
+  return MaskFromVec(Or(vmask, neg_vmask));
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetAtOrAfterFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  auto vmask = VecFromMask(d, mask);
+
+  const auto vmask_le128 = detail::Per128BitBlkRevLanesOnBe(vmask);
+  const auto neg_vmask_le128 = detail::I128Subtract(Zero(d), vmask_le128);
+  const auto neg_vmask = detail::Per128BitBlkRevLanesOnBe(neg_vmask_le128);
+
+  return MaskFromVec(BitCast(d, Or(vmask, neg_vmask)));
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetOnlyFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetOnlyFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = BitCast(di, VecFromMask(d, mask));
+  const auto zero = Zero(di);
+  const auto vmask2 = VecFromMask(di, InterleaveLower(zero, vmask) == zero);
+  return MaskFromVec(BitCast(d, And(vmask, vmask2)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const Full64<T> d_full64;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = VecFromMask(d, mask);
+  const auto vmask_le64 =
+      BitCast(Full64<int64_t>(),
+              detail::Per64BitBlkRevLanesOnBe(ResizeBitCast(d_full64, vmask)));
+  const auto neg_vmask_le64 = Neg(vmask_le64);
+  const auto neg_vmask = ResizeBitCast(
+      d, detail::Per64BitBlkRevLanesOnBe(BitCast(d_full64, neg_vmask_le64)));
+
+  const auto first_vmask = BitCast(di, And(vmask, neg_vmask));
+  return MaskFromVec(BitCast(d, Or(first_vmask, Neg(first_vmask))));
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetOnlyFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = VecFromMask(d, mask);
+  const auto vmask_le128 = detail::Per128BitBlkRevLanesOnBe(vmask);
+  const auto neg_vmask_le128 = detail::I128Subtract(Zero(d), vmask_le128);
+  const auto neg_vmask = detail::Per128BitBlkRevLanesOnBe(neg_vmask_le128);
+
+  return MaskFromVec(BitCast(d, Neg(BitCast(di, And(vmask, neg_vmask)))));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrBeforeFirst(Mask128<T, 1> /*mask*/) {
+  const FixedTag<T, 1> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = MakeSigned<T>;
+
+  return RebindMask(d, MaskFromVec(Set(di, TI(-1))));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 1)>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  return SetBeforeFirst(MaskFromVec(ShiftLeftLanes<1>(VecFromMask(d, mask))));
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1 for any T: no-op
+template <typename T>
+HWY_INLINE Vec128<T, 1> SumOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MinOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MaxOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+
+// u32/i32/f32:
+
+// N=2
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, 2> SumOfLanes(Vec128<T, 2> v10) {
+  // NOTE: AltivecVsum2sws cannot be used here as AltivecVsum2sws
+  // computes the signed saturated sum of the lanes.
+  return v10 + Shuffle2301(v10);
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, 2> MinOfLanes(Vec128<T, 2> v10) {
+  return Min(v10, Shuffle2301(v10));
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, 2> MaxOfLanes(Vec128<T, 2> v10) {
+  return Max(v10, Shuffle2301(v10));
+}
+
+// N=4 (full)
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T> SumOfLanes(Vec128<T> v3210) {
+  // NOTE: AltivecVsumsws cannot be used here as AltivecVsumsws
+  // computes the signed saturated sum of the lanes.
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = v3210 + v1032;
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T> MinOfLanes(Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Min(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Min(v20_31_20_31, v31_20_31_20);
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T> MaxOfLanes(Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Max(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Max(v20_31_20_31, v31_20_31_20);
+}
+
+// u64/i64/f64:
+
+// N=2 (full)
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T> SumOfLanes(Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return v10 + v01;
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T> MinOfLanes(Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Min(v10, v01);
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T> MaxOfLanes(Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Max(v10, v01);
+}
+
+// Casts nominally int32_t result to D.
+template <class D>
+HWY_INLINE VFromD<D> AltivecVsum4shs(D d, __vector signed short a,
+                                     __vector signed int b) {
+  const Repartition<int32_t, D> di32;
+#ifdef __OPTIMIZE__
+  if (IsConstantRawAltivecVect(a) && IsConstantRawAltivecVect(b)) {
+    const int64_t sum0 = static_cast<int64_t>(a[0]) +
+                         static_cast<int64_t>(a[1]) +
+                         static_cast<int64_t>(b[0]);
+    const int64_t sum1 = static_cast<int64_t>(a[2]) +
+                         static_cast<int64_t>(a[3]) +
+                         static_cast<int64_t>(b[1]);
+    const int64_t sum2 = static_cast<int64_t>(a[4]) +
+                         static_cast<int64_t>(a[5]) +
+                         static_cast<int64_t>(b[2]);
+    const int64_t sum3 = static_cast<int64_t>(a[6]) +
+                         static_cast<int64_t>(a[7]) +
+                         static_cast<int64_t>(b[3]);
+    const int32_t sign0 = static_cast<int32_t>(sum0 >> 63);
+    const int32_t sign1 = static_cast<int32_t>(sum1 >> 63);
+    const int32_t sign2 = static_cast<int32_t>(sum2 >> 63);
+    const int32_t sign3 = static_cast<int32_t>(sum3 >> 63);
+    using Raw = typename detail::Raw128<int32_t>::type;
+    return BitCast(
+        d,
+        VFromD<decltype(di32)>{Raw{
+            (sign0 == (sum0 >> 31)) ? static_cast<int32_t>(sum0)
+                                    : static_cast<int32_t>(sign0 ^ 0x7FFFFFFF),
+            (sign1 == (sum1 >> 31)) ? static_cast<int32_t>(sum1)
+                                    : static_cast<int32_t>(sign1 ^ 0x7FFFFFFF),
+            (sign2 == (sum2 >> 31)) ? static_cast<int32_t>(sum2)
+                                    : static_cast<int32_t>(sign2 ^ 0x7FFFFFFF),
+            (sign3 == (sum3 >> 31))
+                ? static_cast<int32_t>(sum3)
+                : static_cast<int32_t>(sign3 ^ 0x7FFFFFFF)}});
+  } else  // NOLINT
+#endif
+  {
+    return BitCast(d, VFromD<decltype(di32)>{vec_vsum4shs(a, b)});
+  }
+}
+
+// Casts nominally int32_t result to D.
+template <class D>
+HWY_INLINE VFromD<D> AltivecVsumsws(D d, __vector signed int a,
+                                    __vector signed int b) {
+  const Repartition<int32_t, D> di32;
+#ifdef __OPTIMIZE__
+  constexpr int kDestLaneOffset = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  if (IsConstantRawAltivecVect(a) && __builtin_constant_p(b[kDestLaneOffset])) {
+    const int64_t sum =
+        static_cast<int64_t>(a[0]) + static_cast<int64_t>(a[1]) +
+        static_cast<int64_t>(a[2]) + static_cast<int64_t>(a[3]) +
+        static_cast<int64_t>(b[kDestLaneOffset]);
+    const int32_t sign = static_cast<int32_t>(sum >> 63);
+#if HWY_IS_LITTLE_ENDIAN
+    return BitCast(
+        d, VFromD<decltype(di32)>{(__vector signed int){
+               (sign == (sum >> 31)) ? static_cast<int32_t>(sum)
+                                     : static_cast<int32_t>(sign ^ 0x7FFFFFFF),
+               0, 0, 0}});
+#else
+    return BitCast(d, VFromD<decltype(di32)>{(__vector signed int){
+                          0, 0, 0,
+                          (sign == (sum >> 31))
+                              ? static_cast<int32_t>(sum)
+                              : static_cast<int32_t>(sign ^ 0x7FFFFFFF)}});
+#endif
+  } else  // NOLINT
+#endif
+  {
+    __vector signed int sum;
+
+    // Inline assembly is used for vsumsws to avoid unnecessary shuffling
+    // on little-endian PowerPC targets as the result of the vsumsws
+    // instruction will already be in the correct lanes on little-endian
+    // PowerPC targets.
+    __asm__("vsumsws %0,%1,%2" : "=v"(sum) : "v"(a), "v"(b));
+
+    return BitCast(d, VFromD<decltype(di32)>{sum});
+  }
+}
+
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N / 2> AltivecU16SumsOf2(Vec128<uint16_t, N> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di16;
+  const RepartitionToWide<decltype(di16)> di32;
+  return AltivecVsum4shs(di32, Xor(BitCast(di16, v), Set(di16, -32768)).raw,
+                         Set(di32, 65536).raw);
+}
+
+HWY_API Vec32<uint16_t> SumOfLanes(Vec32<uint16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_BIG_ENDIAN;
+  DFromV<decltype(v)> du16;
+  return Broadcast<kSumLaneIdx>(BitCast(du16, AltivecU16SumsOf2(v)));
+}
+
+HWY_API Vec64<uint16_t> SumOfLanes(Vec64<uint16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  const Full64<uint16_t> du16;
+  const auto zero = Zero(Full128<int32_t>());
+  return Broadcast<kSumLaneIdx>(
+      AltivecVsum2sws(du16, AltivecU16SumsOf2(v).raw, zero.raw));
+}
+
+HWY_API Vec128<uint16_t> SumOfLanes(Vec128<uint16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 7;
+  const Full128<uint16_t> du16;
+  const auto zero = Zero(Full128<int32_t>());
+  return Broadcast<kSumLaneIdx>(
+      AltivecVsumsws(du16, AltivecU16SumsOf2(v).raw, zero.raw));
+}
+
+HWY_API Vec32<int16_t> SumOfLanes(Vec32<int16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_BIG_ENDIAN;
+  const Full32<int16_t> di16;
+  const auto zero = Zero(Full128<int32_t>());
+  return Broadcast<kSumLaneIdx>(AltivecVsum4shs(di16, v.raw, zero.raw));
+}
+
+HWY_API Vec64<int16_t> SumOfLanes(Vec64<int16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  const Full128<int32_t> di32;
+  const Full64<int16_t> di16;
+  const auto zero = Zero(di32);
+  return Broadcast<kSumLaneIdx>(AltivecVsum2sws(
+      di16, AltivecVsum4shs(di32, v.raw, zero.raw).raw, zero.raw));
+}
+
+HWY_API Vec128<int16_t> SumOfLanes(Vec128<int16_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 7;
+  const Full128<int16_t> di16;
+  const Full128<int32_t> di32;
+  const auto zero = Zero(di32);
+  return Broadcast<kSumLaneIdx>(AltivecVsumsws(
+      di16, AltivecVsum4shs(di32, v.raw, zero.raw).raw, zero.raw));
+}
+
+// u8, N=2, N=4, N=8, N=16:
+HWY_API Vec16<uint8_t> SumOfLanes(Vec16<uint8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  const Full16<uint8_t> du8;
+  const Full16<uint16_t> du16;
+  const Twice<decltype(du8)> dt_u8;
+  const Twice<decltype(du16)> dt_u16;
+  const Full128<uint32_t> du32;
+  return LowerHalf(Broadcast<kSumLaneIdx>(AltivecVsum4ubs(
+      dt_u8, BitCast(dt_u8, Combine(dt_u16, Zero(du16), BitCast(du16, v))).raw,
+      Zero(du32).raw)));
+}
+
+HWY_API Vec32<uint8_t> SumOfLanes(Vec32<uint8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  const Full128<uint32_t> du32;
+  const Full32<uint8_t> du8;
+  return Broadcast<kSumLaneIdx>(AltivecVsum4ubs(du8, v.raw, Zero(du32).raw));
+}
+
+HWY_API Vec64<uint8_t> SumOfLanes(Vec64<uint8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 7;
+  const Full64<uint8_t> du8;
+  return Broadcast<kSumLaneIdx>(BitCast(du8, SumsOf8(v)));
+}
+
+HWY_API Vec128<uint8_t> SumOfLanes(Vec128<uint8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 15;
+
+  const Full128<uint32_t> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  const Full128<uint8_t> du8;
+  const Vec128<uint32_t> zero = Zero(du32);
+  return Broadcast<kSumLaneIdx>(
+      AltivecVsumsws(du8, AltivecVsum4ubs(di32, v.raw, zero.raw).raw,
+                     BitCast(di32, zero).raw));
+}
+
+HWY_API Vec16<int8_t> SumOfLanes(Vec16<int8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+
+  const Full128<uint16_t> du16;
+  const Repartition<int32_t, decltype(du16)> di32;
+  const Repartition<int8_t, decltype(du16)> di8;
+  const Vec128<int8_t> zzvv = BitCast(
+      di8, InterleaveLower(BitCast(du16, Vec128<int8_t>{v.raw}), Zero(du16)));
+  return Vec16<int8_t>{
+      Broadcast<kSumLaneIdx>(AltivecVsum4sbs(di8, zzvv.raw, Zero(di32).raw))
+          .raw};
+}
+
+HWY_API Vec32<int8_t> SumOfLanes(Vec32<int8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 3;
+  const Full32<int8_t> di8;
+  const Vec128<int32_t> zero = Zero(Full128<int32_t>());
+  return Broadcast<kSumLaneIdx>(AltivecVsum4sbs(di8, v.raw, zero.raw));
+}
+
+HWY_API Vec64<int8_t> SumOfLanes(Vec64<int8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 7;
+  const Full128<int32_t> di32;
+  const Vec128<int32_t> zero = Zero(di32);
+  const Full64<int8_t> di8;
+  return Broadcast<kSumLaneIdx>(AltivecVsum2sws(
+      di8, AltivecVsum4sbs(di32, v.raw, zero.raw).raw, zero.raw));
+}
+
+HWY_API Vec128<int8_t> SumOfLanes(Vec128<int8_t> v) {
+  constexpr int kSumLaneIdx = HWY_IS_LITTLE_ENDIAN ? 0 : 15;
+  const Full128<int8_t> di8;
+  const Full128<int32_t> di32;
+  const Vec128<int32_t> zero = Zero(di32);
+  return Broadcast<kSumLaneIdx>(AltivecVsumsws(
+      di8, AltivecVsum4sbs(di32, v.raw, zero.raw).raw, zero.raw));
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(uint8_t, N, 4)>
+HWY_API Vec128<uint8_t, N> MaxOfLanes(Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<uint8_t, N> vm = Max(v, Reverse2(d, v));
+  vm = Max(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Max(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Max(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(uint8_t, N, 4)>
+HWY_API Vec128<uint8_t, N> MinOfLanes(Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<uint8_t, N> vm = Min(v, Reverse2(d, v));
+  vm = Min(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Min(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Min(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_API Vec128<int8_t, N> MaxOfLanes(Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<int8_t, N> vm = Max(v, Reverse2(d, v));
+  vm = Max(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Max(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Max(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_API Vec128<int8_t, N> MinOfLanes(Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<int8_t, N> vm = Min(v, Reverse2(d, v));
+  vm = Min(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Min(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Min(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(uint16_t, N, 2)>
+HWY_API Vec128<uint16_t, N> MinOfLanes(Vec128<uint16_t, N> v) {
+  const Simd<uint16_t, N, 0> d;
+  const RepartitionToWide<decltype(d)> d32;
+#if HWY_IS_LITTLE_ENDIAN
+  const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+#else
+  const auto even = ShiftRight<16>(BitCast(d32, v));
+  const auto odd = And(BitCast(d32, v), Set(d32, 0xFFFF));
+#endif
+  const auto min = MinOfLanes(Min(even, odd));
+  // Also broadcast into odd lanes on little-endian and into even lanes
+  // on big-endian
+  return Vec128<uint16_t, N>{vec_pack(min.raw, min.raw)};
+}
+template <size_t N, HWY_IF_V_SIZE_GT(int16_t, N, 2)>
+HWY_API Vec128<int16_t, N> MinOfLanes(Vec128<int16_t, N> v) {
+  const Simd<int16_t, N, 0> d;
+  const RepartitionToWide<decltype(d)> d32;
+  // Sign-extend
+#if HWY_IS_LITTLE_ENDIAN
+  const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+#else
+  const auto even = ShiftRight<16>(BitCast(d32, v));
+  const auto odd = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+#endif
+  const auto min = MinOfLanes(Min(even, odd));
+  // Also broadcast into odd lanes on little-endian and into even lanes
+  // on big-endian
+  return Vec128<int16_t, N>{vec_pack(min.raw, min.raw)};
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(uint16_t, N, 2)>
+HWY_API Vec128<uint16_t, N> MaxOfLanes(Vec128<uint16_t, N> v) {
+  const Simd<uint16_t, N, 0> d;
+  const RepartitionToWide<decltype(d)> d32;
+#if HWY_IS_LITTLE_ENDIAN
+  const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+#else
+  const auto even = ShiftRight<16>(BitCast(d32, v));
+  const auto odd = And(BitCast(d32, v), Set(d32, 0xFFFF));
+#endif
+  const auto max = MaxOfLanes(Max(even, odd));
+  // Also broadcast into odd lanes.
+  return Vec128<uint16_t, N>{vec_pack(max.raw, max.raw)};
+}
+template <size_t N, HWY_IF_V_SIZE_GT(int16_t, N, 2)>
+HWY_API Vec128<int16_t, N> MaxOfLanes(Vec128<int16_t, N> v) {
+  const Simd<int16_t, N, 0> d;
+  const RepartitionToWide<decltype(d)> d32;
+  // Sign-extend
+#if HWY_IS_LITTLE_ENDIAN
+  const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+#else
+  const auto even = ShiftRight<16>(BitCast(d32, v));
+  const auto odd = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+#endif
+  const auto max = MaxOfLanes(Max(even, odd));
+  // Also broadcast into odd lanes on little-endian and into even lanes
+  // on big-endian
+  return Vec128<int16_t, N>{vec_pack(max.raw, max.raw)};
+}
+
+}  // namespace detail
+
+// Supported for u/i/f 32/64. Returns the same value in each lane.
+template <class D>
+HWY_API VFromD<D> SumOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::SumOfLanes(v);
+}
+template <class D>
+HWY_API TFromD<D> ReduceSum(D /* tag */, VFromD<D> v) {
+  return GetLane(detail::SumOfLanes(v));
+}
+template <class D>
+HWY_API VFromD<D> MinOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MinOfLanes(v);
+}
+template <class D>
+HWY_API VFromD<D> MaxOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MaxOfLanes(v);
+}
+
+// ------------------------------ Lt128
+
+namespace detail {
+
+// Returns vector-mask for Lt128.
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Lt128Vec(D d, V a, V b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+#if HWY_PPC_HAVE_10 && defined(__SIZEOF_INT128__)
+  (void)d;
+  using VU64 = __vector unsigned long long;
+  using VU128 = __vector unsigned __int128;
+#if HWY_IS_LITTLE_ENDIAN
+  const VU128 a_u128 = reinterpret_cast<VU128>(a.raw);
+  const VU128 b_u128 = reinterpret_cast<VU128>(b.raw);
+#else
+  // NOTE: Need to swap the halves of both a and b on big-endian targets
+  // as the upper 64 bits of a and b are in lane 1 and the lower 64 bits
+  // of a and b are in lane 0 whereas the vec_cmplt operation below expects
+  // the upper 64 bits in lane 0 and the lower 64 bits in lane 1 on
+  // big-endian PPC targets.
+  const VU128 a_u128 = reinterpret_cast<VU128>(vec_sld(a.raw, a.raw, 8));
+  const VU128 b_u128 = reinterpret_cast<VU128>(vec_sld(b.raw, b.raw, 8));
+#endif
+  return V{reinterpret_cast<VU64>(vec_cmplt(a_u128, b_u128))};
+#else  // !HWY_PPC_HAVE_10
+  // Truth table of Eq and Lt for Hi and Lo u64.
+  // (removed lines with (=H && cH) or (=L && cL) - cannot both be true)
+  // =H =L cH cL  | out = cH | (=H & cL)
+  //  0  0  0  0  |  0
+  //  0  0  0  1  |  0
+  //  0  0  1  0  |  1
+  //  0  0  1  1  |  1
+  //  0  1  0  0  |  0
+  //  0  1  0  1  |  0
+  //  0  1  1  0  |  1
+  //  1  0  0  0  |  0
+  //  1  0  0  1  |  1
+  //  1  1  0  0  |  0
+  const auto eqHL = Eq(a, b);
+  const V ltHL = VecFromMask(d, Lt(a, b));
+  const V ltLX = ShiftLeftLanes<1>(ltHL);
+  const V vecHx = IfThenElse(eqHL, ltLX, ltHL);
+  return InterleaveUpper(d, vecHx, vecHx);
+#endif
+}
+
+// Returns vector-mask for Eq128.
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Eq128Vec(D d, V a, V b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+#if HWY_PPC_HAVE_10 && defined(__SIZEOF_INT128__)
+  (void)d;
+  using VU64 = __vector unsigned long long;
+  using VU128 = __vector unsigned __int128;
+  return V{reinterpret_cast<VU64>(vec_cmpeq(reinterpret_cast<VU128>(a.raw),
+                                            reinterpret_cast<VU128>(b.raw)))};
+#else
+  const auto eqHL = VecFromMask(d, Eq(a, b));
+  const auto eqLH = Reverse2(d, eqHL);
+  return And(eqHL, eqLH);
+#endif
+}
+
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Ne128Vec(D d, V a, V b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+#if HWY_PPC_HAVE_10 && defined(__SIZEOF_INT128__)
+  (void)d;
+  using VU64 = __vector unsigned long long;
+  using VU128 = __vector unsigned __int128;
+  return V{reinterpret_cast<VU64>(vec_cmpne(reinterpret_cast<VU128>(a.raw),
+                                            reinterpret_cast<VU128>(b.raw)))};
+#else
+  const auto neHL = VecFromMask(d, Ne(a, b));
+  const auto neLH = Reverse2(d, neHL);
+  return Or(neHL, neLH);
+#endif
+}
+
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Lt128UpperVec(D d, V a, V b) {
+  const V ltHL = VecFromMask(d, Lt(a, b));
+  return InterleaveUpper(d, ltHL, ltHL);
+}
+
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Eq128UpperVec(D d, V a, V b) {
+  const V eqHL = VecFromMask(d, Eq(a, b));
+  return InterleaveUpper(d, eqHL, eqHL);
+}
+
+template <class D, class V = VFromD<D>>
+HWY_INLINE V Ne128UpperVec(D d, V a, V b) {
+  const V neHL = VecFromMask(d, Ne(a, b));
+  return InterleaveUpper(d, neHL, neHL);
+}
+
+}  // namespace detail
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Lt128(D d, V a, V b) {
+  return MaskFromVec(detail::Lt128Vec(d, a, b));
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Eq128(D d, V a, V b) {
+  return MaskFromVec(detail::Eq128Vec(d, a, b));
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Ne128(D d, V a, V b) {
+  return MaskFromVec(detail::Ne128Vec(d, a, b));
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Lt128Upper(D d, V a, V b) {
+  return MaskFromVec(detail::Lt128UpperVec(d, a, b));
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Eq128Upper(D d, V a, V b) {
+  return MaskFromVec(detail::Eq128UpperVec(d, a, b));
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API MFromD<D> Ne128Upper(D d, V a, V b) {
+  return MaskFromVec(detail::Ne128UpperVec(d, a, b));
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+// Avoids the extra MaskFromVec in Lt128.
+template <class D, class V = VFromD<D>>
+HWY_API V Min128(D d, const V a, const V b) {
+  return IfVecThenElse(detail::Lt128Vec(d, a, b), a, b);
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API V Max128(D d, const V a, const V b) {
+  return IfVecThenElse(detail::Lt128Vec(d, b, a), a, b);
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API V Min128Upper(D d, const V a, const V b) {
+  return IfVecThenElse(detail::Lt128UpperVec(d, a, b), a, b);
+}
+
+template <class D, class V = VFromD<D>>
+HWY_API V Max128Upper(D d, const V a, const V b) {
+  return IfVecThenElse(detail::Lt128UpperVec(d, b, a), a, b);
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#ifdef HWY_NATIVE_LEADING_ZERO_COUNT
+#undef HWY_NATIVE_LEADING_ZERO_COUNT
+#else
+#define HWY_NATIVE_LEADING_ZERO_COUNT
+#endif
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{vec_cntlz(v.raw)};
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  using T = TFromD<decltype(d)>;
+  return BitCast(d, Set(d, T{sizeof(T) * 8 - 1}) - LeadingZeroCount(v));
+}
+
+#if HWY_PPC_HAVE_9
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V TrailingZeroCount(V v) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700
+  return V{vec_vctz(v.raw)};
+#else
+  return V{vec_cnttz(v.raw)};
+#endif
+}
+#else
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V TrailingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+
+  const auto vi = BitCast(di, v);
+  const auto lowest_bit = And(vi, Neg(vi));
+  constexpr TI kNumOfBitsInT{sizeof(TI) * 8};
+  const auto bit_idx = HighestSetBitIndex(lowest_bit);
+  return BitCast(d, IfThenElse(MaskFromVec(BroadcastSignBit(bit_idx)),
+                               Set(di, kNumOfBitsInT), bit_idx));
+}
+#endif
+
+#undef HWY_PPC_HAVE_9
+#undef HWY_PPC_HAVE_10
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/rvv-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/rvv-inl.h
new file mode 100644
index 0000000000..c5b76db875
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/rvv-inl.h
@@ -0,0 +1,4887 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// RISC-V V vectors (length not known at compile time).
+// External include guard in highway.h - see comment there.
+
+#include <riscv_vector.h>
+
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Support for vfloat16m*_t and PromoteTo/DemoteTo.
+#ifdef __riscv_zvfhmin
+#define HWY_RVV_HAVE_F16C 1
+#else
+#define HWY_RVV_HAVE_F16C 0
+#endif
+
+template <class V>
+struct DFromV_t {};  // specialized in macros
+template <class V>
+using DFromV = typename DFromV_t<RemoveConst<V>>::type;
+
+template <class V>
+using TFromV = TFromD<DFromV<V>>;
+
+template <typename T, size_t N, int kPow2>
+constexpr size_t MLenFromD(Simd<T, N, kPow2> /* tag */) {
+  // Returns divisor = type bits / LMUL. Folding *8 into the ScaleByPower
+  // argument enables fractional LMUL < 1. Limit to 64 because that is the
+  // largest value for which vbool##_t are defined.
+  return HWY_MIN(64, sizeof(T) * 8 * 8 / detail::ScaleByPower(8, kPow2));
+}
+
+namespace detail {
+
+template <class D>
+class AdjustSimdTagToMinVecPow2_t {};
+
+template <typename T, size_t N, int kPow2>
+class AdjustSimdTagToMinVecPow2_t<Simd<T, N, kPow2>> {
+ private:
+  using D = Simd<T, N, kPow2>;
+  static constexpr int kMinVecPow2 =
+      -3 + static_cast<int>(FloorLog2(sizeof(T)));
+  static constexpr size_t kNumMaxLanes = HWY_MAX_LANES_D(D);
+  static constexpr int kNewPow2 = HWY_MAX(kPow2, kMinVecPow2);
+  static constexpr size_t kNewN = D::template NewN<kNewPow2, kNumMaxLanes>();
+
+ public:
+  using type = Simd<T, kNewN, kNewPow2>;
+};
+
+template <class D>
+using AdjustSimdTagToMinVecPow2 =
+    typename AdjustSimdTagToMinVecPow2_t<RemoveConst<D>>::type;
+
+}  // namespace detail
+
+// ================================================== MACROS
+
+// Generate specializations and function definitions using X macros. Although
+// harder to read and debug, writing everything manually is too bulky.
+
+namespace detail {  // for code folding
+
+// For all mask sizes MLEN: (1/Nth of a register, one bit per lane)
+// The first three arguments are arbitrary SEW, LMUL, SHIFT such that
+// SEW >> SHIFT = MLEN.
+#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
+  X_MACRO(64, 0, 64, NAME, OP)               \
+  X_MACRO(32, 0, 32, NAME, OP)               \
+  X_MACRO(16, 0, 16, NAME, OP)               \
+  X_MACRO(8, 0, 8, NAME, OP)                 \
+  X_MACRO(8, 1, 4, NAME, OP)                 \
+  X_MACRO(8, 2, 2, NAME, OP)                 \
+  X_MACRO(8, 3, 1, NAME, OP)
+
+// For given SEW, iterate over one of LMULS: _TRUNC, _EXT, _ALL. This allows
+// reusing type lists such as HWY_RVV_FOREACH_U for _ALL (the usual case) or
+// _EXT (for Combine). To achieve this, we HWY_CONCAT with the LMULS suffix.
+//
+// Precompute SEW/LMUL => MLEN to allow token-pasting the result. For the same
+// reason, also pass the double-width and half SEW and LMUL (suffixed D and H,
+// respectively). "__" means there is no corresponding LMUL (e.g. LMULD for m8).
+// Args: BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP
+
+// LMULS = _TRUNC: truncatable (not the smallest LMUL)
+#define HWY_RVV_FOREACH_08_TRUNC(X_MACRO, BASE, CHAR, NAME, OP)            \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, /*MLEN=*/16, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, /*MLEN=*/8, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, /*MLEN=*/4, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, /*MLEN=*/2, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, /*MLEN=*/1, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_TRUNC(X_MACRO, BASE, CHAR, NAME, OP)           \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, /*MLEN=*/16, NAME, OP)   \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, /*MLEN=*/8, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, /*MLEN=*/4, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, /*MLEN=*/2, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_TRUNC(X_MACRO, BASE, CHAR, NAME, OP)          \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, /*MLEN=*/16, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, /*MLEN=*/8, NAME, OP)   \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_TRUNC(X_MACRO, BASE, CHAR, NAME, OP)         \
+  X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, /*MLEN=*/16, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, /*MLEN=*/8, NAME, OP)
+
+// LMULS = _DEMOTE: can demote from SEW*LMUL to SEWH*LMULH.
+#define HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)           \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, /*MLEN=*/16, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, /*MLEN=*/8, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, /*MLEN=*/4, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, /*MLEN=*/2, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, /*MLEN=*/1, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)           \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, /*MLEN=*/32, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, /*MLEN=*/16, NAME, OP)    \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, /*MLEN=*/8, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, /*MLEN=*/4, NAME, OP)      \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, /*MLEN=*/2, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)           \
+  X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, /*MLEN=*/32, NAME, OP)   \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, /*MLEN=*/16, NAME, OP)    \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, /*MLEN=*/8, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)         \
+  X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, /*MLEN=*/32, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, /*MLEN=*/16, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, /*MLEN=*/8, NAME, OP)
+
+// LMULS = _LE2: <= 2
+#define HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP)              \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf8, mf4, __, -3, /*MLEN=*/64, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, /*MLEN=*/16, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, /*MLEN=*/8, NAME, OP)     \
+  X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP)              \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, /*MLEN=*/32, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, /*MLEN=*/16, NAME, OP)    \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, /*MLEN=*/8, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP)              \
+  X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, /*MLEN=*/32, NAME, OP)   \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, /*MLEN=*/16, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP)            \
+  X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, /*MLEN=*/32, NAME, OP)
+
+// LMULS = _EXT: not the largest LMUL
+#define HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, /*MLEN=*/2, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, /*MLEN=*/8, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, /*MLEN=*/16, NAME, OP)
+
+// LMULS = _ALL (2^MinPow2() <= LMUL <= 8)
+#define HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, /*MLEN=*/1, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, /*MLEN=*/2, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP)       \
+  X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, /*MLEN=*/8, NAME, OP)
+
+// 'Virtual' LMUL. This upholds the Highway guarantee that vectors are at least
+// 128 bit and LowerHalf is defined whenever there are at least 2 lanes, even
+// though RISC-V LMUL must be at least SEW/64 (notice that this rules out
+// LMUL=1/2 for SEW=64). To bridge the gap, we add overloads for kPow2 equal to
+// one less than should be supported, with all other parameters (vector type
+// etc.) unchanged. For D with the lowest kPow2 ('virtual LMUL'), Lanes()
+// returns half of what it usually would.
+//
+// Notice that we can only add overloads whenever there is a D argument: those
+// are unique with respect to non-virtual-LMUL overloads because their kPow2
+// template argument differs. Otherwise, there is no actual vuint64mf2_t, and
+// defining another overload with the same LMUL would be an error. Thus we have
+// a separate _VIRT category for HWY_RVV_FOREACH*, and the common case is
+// _ALL_VIRT (meaning the regular LMUL plus the VIRT overloads), used in most
+// functions that take a D.
+
+#define HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -3, /*MLEN=*/64, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -2, /*MLEN=*/64, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, -1, /*MLEN=*/64, NAME, OP)
+
+// ALL + VIRT
+#define HWY_RVV_FOREACH_08_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+// LE2 + VIRT
+#define HWY_RVV_FOREACH_08_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+// EXT + VIRT
+#define HWY_RVV_FOREACH_08_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+// DEMOTE + VIRT
+#define HWY_RVV_FOREACH_08_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_16_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_32_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+#define HWY_RVV_FOREACH_64_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
+  HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP)            \
+  HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
+
+// SEW for unsigned:
+#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, uint, u, NAME, OP)
+
+// SEW for signed:
+#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, int, i, NAME, OP)
+
+// SEW for float:
+
+// Used for conversion instructions if HWY_RVV_HAVE_F16C.
+#define HWY_RVV_FOREACH_F16_UNCONDITIONAL(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, float, f, NAME, OP)
+
+#if HWY_HAVE_FLOAT16
+// Full support for f16 in all ops
+#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_F16_UNCONDITIONAL(X_MACRO, NAME, OP, LMULS)
+#else
+#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
+#endif
+#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, float, f, NAME, OP)
+#define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS) \
+  HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, float, f, NAME, OP)
+
+// Commonly used type/SEW groups:
+#define HWY_RVV_FOREACH_UI08(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS)         \
+  HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)           \
+  HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)           \
+  HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)           \
+  HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)           \
+  HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS)         \
+  HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS)
+
+// For all combinations of SEW:
+#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
+
+// Commonly used type categories:
+#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS)        \
+  HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
+
+#define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS) \
+  HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)     \
+  HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
+
+// Assemble types for use in x-macros
+#define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
+#define HWY_RVV_D(BASE, SEW, N, SHIFT) Simd<HWY_RVV_T(BASE, SEW), N, SHIFT>
+#define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
+#define HWY_RVV_TUP(BASE, SEW, LMUL, TUP) v##BASE##SEW##LMUL##x##TUP##_t
+#define HWY_RVV_M(MLEN) vbool##MLEN##_t
+
+}  // namespace detail
+
+// Until we have full intrinsic support for fractional LMUL, mixed-precision
+// code can use LMUL 1..8 (adequate unless they need many registers).
+#define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <>                                                                  \
+  struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> {                                \
+    using Lane = HWY_RVV_T(BASE, SEW);                                         \
+    using type = ScalableTag<Lane, SHIFT>;                                     \
+  };
+
+HWY_RVV_FOREACH(HWY_SPECIALIZE, _, _, _ALL)
+#undef HWY_SPECIALIZE
+
+// ------------------------------ Lanes
+
+// WARNING: we want to query VLMAX/sizeof(T), but this may actually change VL!
+#define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  template <size_t N>                                                         \
+  HWY_API size_t NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) {                     \
+    constexpr size_t kFull = HWY_LANES(HWY_RVV_T(BASE, SEW));                 \
+    constexpr size_t kCap = MaxLanes(d);                                      \
+    /* If no cap, avoid generating a constant by using VLMAX. */              \
+    return N == kFull ? __riscv_vsetvlmax_e##SEW##LMUL()                      \
+                      : __riscv_vsetvl_e##SEW##LMUL(kCap);                    \
+  }
+
+#define HWY_RVV_LANES_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                       \
+  HWY_API size_t NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) {                   \
+    constexpr size_t kCap = MaxLanes(d);                                    \
+    /* In case of virtual LMUL (intrinsics do not provide "uint16mf8_t") */ \
+    /* vsetvl may or may not be correct, so do it ourselves. */             \
+    const size_t actual =                                                   \
+        detail::ScaleByPower(__riscv_vlenb() / (SEW / 8), SHIFT);           \
+    return HWY_MIN(actual, kCap);                                           \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_LANES, Lanes, setvlmax_e, _ALL)
+HWY_RVV_FOREACH(HWY_RVV_LANES_VIRT, Lanes, lenb, _VIRT)
+// If not already defined via HWY_RVV_FOREACH, define the overloads because
+// they do not require any new instruction.
+#if !HWY_HAVE_FLOAT16
+HWY_RVV_FOREACH_F16_UNCONDITIONAL(HWY_RVV_LANES, Lanes, setvlmax_e, _ALL)
+HWY_RVV_FOREACH_F16_UNCONDITIONAL(HWY_RVV_LANES_VIRT, Lanes, lenb, _VIRT)
+#endif
+#undef HWY_RVV_LANES
+#undef HWY_RVV_LANES_VIRT
+
+template <size_t N, int kPow2>
+HWY_API size_t Lanes(Simd<bfloat16_t, N, kPow2> /* tag*/) {
+  return Lanes(Simd<int16_t, N, kPow2>());
+}
+
+// ------------------------------ Common x-macros
+
+// Last argument to most intrinsics. Use when the op has no d arg of its own,
+// which means there is no user-specified cap.
+#define HWY_RVV_AVL(SEW, SHIFT) \
+  Lanes(ScalableTag<HWY_RVV_T(uint, SEW), SHIFT>())
+
+// vector = f(vector), e.g. Not
+#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,  \
+                          SHIFT, MLEN, NAME, OP)                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {   \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+// vector = f(vector, scalar), e.g. detail::AddS
+#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,  \
+                           SHIFT, MLEN, NAME, OP)                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) {           \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+// vector = f(vector, vector), e.g. Add
+#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) {    \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(a, b,                       \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+// mask = f(mask)
+#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) {   \
+    return __riscv_vm##OP##_m_b##MLEN(m, ~0ull);      \
+  }
+
+// ================================================== INIT
+
+// ------------------------------ Set
+
+#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                    MLEN, NAME, OP)                                         \
+  template <size_t N>                                                       \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_T(BASE, SEW) arg) {    \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(arg, Lanes(d));                \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_SET, Set, mv_v_x, _ALL_VIRT)
+HWY_RVV_FOREACH_F(HWY_RVV_SET, Set, fmv_v_f, _ALL_VIRT)
+#undef HWY_RVV_SET
+
+// Treat bfloat16_t as int16_t (using the previously defined Set overloads);
+// required for Zero and VFromD.
+template <size_t N, int kPow2>
+decltype(Set(Simd<int16_t, N, kPow2>(), 0)) Set(Simd<bfloat16_t, N, kPow2> d,
+                                                bfloat16_t arg) {
+  return Set(RebindToSigned<decltype(d)>(), arg.bits);
+}
+#if !HWY_HAVE_FLOAT16  // Otherwise already defined above.
+// WARNING: returns a different type than emulated bfloat16_t so that we can
+// implement PromoteTo overloads for both bfloat16_t and float16_t, and also
+// provide a Neg(float16_t) overload that coexists with Neg(int16_t).
+template <size_t N, int kPow2>
+decltype(Set(Simd<uint16_t, N, kPow2>(), 0)) Set(Simd<float16_t, N, kPow2> d,
+                                                 float16_t arg) {
+  uint16_t bits;
+  CopySameSize(&arg, &bits);
+  return Set(RebindToUnsigned<decltype(d)>(), bits);
+}
+#endif
+
+template <class D>
+using VFromD = decltype(Set(D(), TFromD<D>()));
+
+// ------------------------------ Zero
+
+template <class D>
+HWY_API VFromD<D> Zero(D d) {
+  // Cast to support bfloat16_t.
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Set(du, 0));
+}
+
+// ------------------------------ Undefined
+
+// RVV vundefined is 'poisoned' such that even XORing a _variable_ initialized
+// by it gives unpredictable results. It should only be used for maskoff, so
+// keep it internal. For the Highway op, just use Zero (single instruction).
+namespace detail {
+#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                          SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                      \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                       \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) /* tag */) {                     \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(); /* no AVL */               \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_UNDEFINED, Undefined, undefined, _ALL)
+#undef HWY_RVV_UNDEFINED
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> Undefined(D d) {
+  return Zero(d);
+}
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// Halves LMUL. (Use LMUL arg for the source so we can use _TRUNC.)
+#define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMULH) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {    \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULH(          \
+        v); /* no AVL */                                                      \
+  }
+HWY_RVV_FOREACH(HWY_RVV_TRUNC, Trunc, lmul_trunc, _TRUNC)
+#undef HWY_RVV_TRUNC
+
+// Doubles LMUL to `d2` (the arg is only necessary for _VIRT).
+#define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                    MLEN, NAME, OP)                                         \
+  template <size_t N>                                                       \
+  HWY_API HWY_RVV_V(BASE, SEW, LMULD)                                       \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) /* d2 */,                     \
+           HWY_RVV_V(BASE, SEW, LMUL) v) {                                  \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULD(        \
+        v); /* no AVL */                                                    \
+  }
+HWY_RVV_FOREACH(HWY_RVV_EXT, Ext, lmul_ext, _EXT)
+#undef HWY_RVV_EXT
+
+// For virtual LMUL e.g. 'uint32mf4_t', the return type should be mf2, which is
+// the same as the actual input type.
+#define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                         SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                     \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                      \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) /* d2 */,                   \
+           HWY_RVV_V(BASE, SEW, LMUL) v) {                                \
+    return v;                                                             \
+  }
+HWY_RVV_FOREACH(HWY_RVV_EXT_VIRT, Ext, lmul_ext, _VIRT)
+#undef HWY_RVV_EXT_VIRT
+
+#if !HWY_HAVE_FLOAT16
+template <class D, HWY_IF_F16_D(D)>
+VFromD<D> Ext(D d, VFromD<Half<D>> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const Half<decltype(du)> duh;
+  return BitCast(d, Ext(du, BitCast(duh, v)));
+}
+#endif
+
+template <class D, HWY_IF_BF16_D(D)>
+VFromD<D> Ext(D d, VFromD<Half<D>> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  const Half<decltype(du)> duh;
+  return BitCast(d, Ext(du, BitCast(duh, v)));
+}
+
+// For BitCastToByte, the D arg is only to prevent duplicate definitions caused
+// by _ALL_VIRT.
+
+// There is no reinterpret from u8 <-> u8, so just return.
+#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                        SHIFT, MLEN, NAME, OP)                           \
+  template <typename T, size_t N>                                        \
+  HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,      \
+                                         vuint8##LMUL##_t v) {           \
+    return v;                                                            \
+  }                                                                      \
+  template <size_t N>                                                    \
+  HWY_API vuint8##LMUL##_t BitCastFromByte(                              \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMUL##_t v) {      \
+    return v;                                                            \
+  }
+
+// For i8, need a single reinterpret (HWY_RVV_CAST_IF does two).
+#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                        SHIFT, MLEN, NAME, OP)                           \
+  template <typename T, size_t N>                                        \
+  HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,      \
+                                         vint8##LMUL##_t v) {            \
+    return __riscv_vreinterpret_v_i8##LMUL##_u8##LMUL(v);                \
+  }                                                                      \
+  template <size_t N>                                                    \
+  HWY_API vint8##LMUL##_t BitCastFromByte(                               \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMUL##_t v) {      \
+    return __riscv_vreinterpret_v_u8##LMUL##_i8##LMUL(v);                \
+  }
+
+// Separate u/i because clang only provides signed <-> unsigned reinterpret for
+// the same SEW.
+#define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <typename T, size_t N>                                              \
+  HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,            \
+                                         HWY_RVV_V(BASE, SEW, LMUL) v) {       \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v);                  \
+  }                                                                            \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                          \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMUL##_t v) {            \
+    return __riscv_v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v);                  \
+  }
+
+// Signed/Float: first cast to/from unsigned
+#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                        SHIFT, MLEN, NAME, OP)                           \
+  template <typename T, size_t N>                                        \
+  HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,      \
+                                         HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return __riscv_v##OP##_v_u##SEW##LMUL##_u8##LMUL(                    \
+        __riscv_v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v));          \
+  }                                                                      \
+  template <size_t N>                                                    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                    \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMUL##_t v) {      \
+    return __riscv_v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL(           \
+        __riscv_v##OP##_v_u8##LMUL##_u##SEW##LMUL(v));                   \
+  }
+
+// Additional versions for virtual LMUL using LMULH for byte vectors.
+#define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                            SHIFT, MLEN, NAME, OP)                           \
+  template <typename T, size_t N>                                            \
+  HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,         \
+                                          HWY_RVV_V(BASE, SEW, LMUL) v) {    \
+    return detail::Trunc(__riscv_v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v)); \
+  }                                                                          \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                        \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMULH##_t v) {         \
+    HWY_RVV_D(uint, 8, N, SHIFT + 1) d2;                                     \
+    const vuint8##LMUL##_t v2 = detail::Ext(d2, v);                          \
+    return __riscv_v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v2);               \
+  }
+
+// Signed/Float: first cast to/from unsigned
+#define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                             SHIFT, MLEN, NAME, OP)                           \
+  template <typename T, size_t N>                                             \
+  HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> /* d */,          \
+                                          HWY_RVV_V(BASE, SEW, LMUL) v) {     \
+    return detail::Trunc(__riscv_v##OP##_v_u##SEW##LMUL##_u8##LMUL(           \
+        __riscv_v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v)));              \
+  }                                                                           \
+  template <size_t N>                                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                         \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */, vuint8##LMULH##_t v) {          \
+    HWY_RVV_D(uint, 8, N, SHIFT + 1) d2;                                      \
+    const vuint8##LMUL##_t v2 = detail::Ext(d2, v);                           \
+    return __riscv_v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL(                \
+        __riscv_v##OP##_v_u8##LMUL##_u##SEW##LMUL(v2));                       \
+  }
+
+HWY_RVV_FOREACH_U08(HWY_RVV_CAST_U8, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_I08(HWY_RVV_CAST_I8, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_U163264(HWY_RVV_CAST_U, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_I163264(HWY_RVV_CAST_IF, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_U163264(HWY_RVV_CAST_VIRT_U, _, reinterpret, _VIRT)
+HWY_RVV_FOREACH_I163264(HWY_RVV_CAST_VIRT_IF, _, reinterpret, _VIRT)
+HWY_RVV_FOREACH_F(HWY_RVV_CAST_IF, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_CAST_VIRT_IF, _, reinterpret, _VIRT)
+#if HWY_HAVE_FLOAT16     // HWY_RVV_FOREACH_F already covered float16_
+#elif HWY_RVV_HAVE_F16C  // zvfhmin provides reinterpret* intrinsics:
+HWY_RVV_FOREACH_F16_UNCONDITIONAL(HWY_RVV_CAST_IF, _, reinterpret, _ALL)
+HWY_RVV_FOREACH_F16_UNCONDITIONAL(HWY_RVV_CAST_VIRT_IF, _, reinterpret, _VIRT)
+#else
+template <size_t N, int kPow2>
+HWY_INLINE VFromD<Simd<uint16_t, N, kPow2>> BitCastFromByte(
+    Simd<float16_t, N, kPow2> /* d */, VFromD<Simd<uint8_t, N, kPow2>> v) {
+  return BitCastFromByte(Simd<uint16_t, N, kPow2>(), v);
+}
+#endif
+
+#undef HWY_RVV_CAST_U8
+#undef HWY_RVV_CAST_I8
+#undef HWY_RVV_CAST_U
+#undef HWY_RVV_CAST_IF
+#undef HWY_RVV_CAST_VIRT_U
+#undef HWY_RVV_CAST_VIRT_IF
+
+template <size_t N, int kPow2>
+HWY_INLINE VFromD<Simd<int16_t, N, kPow2>> BitCastFromByte(
+    Simd<bfloat16_t, N, kPow2> /* d */, VFromD<Simd<uint8_t, N, kPow2>> v) {
+  return BitCastFromByte(Simd<int16_t, N, kPow2>(), v);
+}
+
+}  // namespace detail
+
+template <class D, class FromV>
+HWY_API VFromD<D> BitCast(D d, FromV v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(d, v));
+}
+
+// ------------------------------ Iota
+
+namespace detail {
+
+#define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT,  \
+                     MLEN, NAME, OP)                                          \
+  template <size_t N>                                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(Lanes(d));                       \
+  }
+
+// For i8 lanes, this may well wrap around. Unsigned only is less error-prone.
+HWY_RVV_FOREACH_U(HWY_RVV_IOTA, Iota0, id_v, _ALL_VIRT)
+#undef HWY_RVV_IOTA
+
+// Used by Expand.
+#define HWY_RVV_MASKED_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                            SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_M(MLEN) mask) {         \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(mask, Lanes(d));                \
+  }
+
+HWY_RVV_FOREACH_U(HWY_RVV_MASKED_IOTA, MaskedIota, iota_m, _ALL_VIRT)
+#undef HWY_RVV_MASKED_IOTA
+
+}  // namespace detail
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGV, Not, not, _ALL)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Not(const V v) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Not(BitCast(DU(), v)));
+}
+
+// ------------------------------ And
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, AndS, and_vx, _ALL)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, And, and, _ALL)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V And(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), And(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Or
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Or, or, _ALL)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Or(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Or(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Xor
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, XorS, xor_vx, _ALL)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Xor, xor, _ALL)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Xor(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Xor(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ AndNot
+template <class V>
+HWY_API V AndNot(const V not_a, const V b) {
+  return And(Not(not_a), b);
+}
+
+// ------------------------------ Xor3
+template <class V>
+HWY_API V Xor3(V x1, V x2, V x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+// ------------------------------ Or3
+template <class V>
+HWY_API V Or3(V o1, V o2, V o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+template <class V>
+HWY_API V OrAnd(const V o, const V a1, const V a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ CopySign
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, CopySign, fsgnj, _ALL)
+
+template <class V>
+HWY_API V CopySignToAbs(const V abs, const V sign) {
+  // RVV can also handle abs < 0, so no extra action needed.
+  return CopySign(abs, sign);
+}
+
+// ================================================== ARITHMETIC
+
+// Per-target flags to prevent generic_ops-inl.h defining Add etc.
+#ifdef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#undef HWY_NATIVE_OPERATOR_REPLACEMENTS
+#else
+#define HWY_NATIVE_OPERATOR_REPLACEMENTS
+#endif
+
+// ------------------------------ Add
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, AddS, add_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, AddS, fadd_vf, _ALL)
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, ReverseSubS, rsub_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, ReverseSubS, frsub_vf, _ALL)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Add, add, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Add, fadd, _ALL)
+
+// ------------------------------ Sub
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, SubS, sub_vx, _ALL)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Sub, sub, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Sub, fsub, _ALL)
+
+// ------------------------------ SaturatedAdd
+
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U32_SATURATED_ADDSUB
+#undef HWY_NATIVE_U32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_U64_SATURATED_ADDSUB
+#undef HWY_NATIVE_U64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_U64_SATURATED_ADDSUB
+#endif
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, SaturatedAdd, saddu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, SaturatedAdd, sadd, _ALL)
+
+// ------------------------------ SaturatedSub
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, SaturatedSub, ssubu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, SaturatedSub, ssub, _ALL)
+
+// ------------------------------ AverageRound
+
+// Define this to opt-out of the default behavior, which is AVOID on certain
+// compiler versions. You can define only this to use VXRM, or define both this
+// and HWY_RVV_AVOID_VXRM to always avoid VXRM.
+#ifndef HWY_RVV_CHOOSE_VXRM
+
+// Assume that GCC-13 defaults to 'avoid VXRM'. Tested with GCC 13.1.0.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400
+#define HWY_RVV_AVOID_VXRM
+// Clang 16 with __riscv_v_intrinsic == 11000 may either require VXRM or avoid.
+// Assume earlier versions avoid.
+#elif HWY_COMPILER_CLANG && \
+    (HWY_COMPILER_CLANG < 1600 || __riscv_v_intrinsic < 11000)
+#define HWY_RVV_AVOID_VXRM
+#endif
+
+#endif  // HWY_RVV_CHOOSE_VXRM
+
+// Adding __RISCV_VXRM_* was a backwards-incompatible change and it is not clear
+// how to detect whether it is supported or required. #ifdef __RISCV_VXRM_RDN
+// does not work because it seems to be a compiler built-in, but neither does
+// __has_builtin(__RISCV_VXRM_RDN). The intrinsics version was also not updated,
+// so we require a macro to opt out of the new intrinsics.
+#ifdef HWY_RVV_AVOID_VXRM
+#define HWY_RVV_INSERT_VXRM(vxrm, avl) avl
+#define __RISCV_VXRM_RNU
+#define __RISCV_VXRM_RDN
+#else  // default: use new vxrm arguments
+#define HWY_RVV_INSERT_VXRM(vxrm, avl) vxrm, avl
+#endif
+
+// Extra rounding mode = up argument.
+#define HWY_RVV_RETV_AVERAGE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,  \
+                             SHIFT, MLEN, NAME, OP)                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) {       \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(                               \
+        a, b, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RNU, HWY_RVV_AVL(SEW, SHIFT))); \
+  }
+
+HWY_RVV_FOREACH_U08(HWY_RVV_RETV_AVERAGE, AverageRound, aaddu, _ALL)
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_AVERAGE, AverageRound, aaddu, _ALL)
+
+#undef HWY_RVV_RETV_AVERAGE
+
+// ------------------------------ ShiftLeft[Same]
+
+// Intrinsics do not define .vi forms, so use .vx instead.
+#define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT,  \
+                      MLEN, NAME, OP)                                          \
+  template <int kBits>                                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {      \
+    return __riscv_v##OP##_vx_##CHAR##SEW##LMUL(v, kBits,                      \
+                                                HWY_RVV_AVL(SEW, SHIFT));      \
+  }                                                                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) {                     \
+    return __riscv_v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits), \
+                                                HWY_RVV_AVL(SEW, SHIFT));      \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_SHIFT, ShiftLeft, sll, _ALL)
+
+// ------------------------------ ShiftRight[Same]
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT, ShiftRight, srl, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT, ShiftRight, sra, _ALL)
+
+#undef HWY_RVV_SHIFT
+
+// ------------------------------ SumsOf8 (ShiftRight, Add)
+template <class VU8>
+HWY_API VFromD<Repartition<uint64_t, DFromV<VU8>>> SumsOf8(const VU8 v) {
+  const DFromV<VU8> du8;
+  const RepartitionToWide<decltype(du8)> du16;
+  const RepartitionToWide<decltype(du16)> du32;
+  const RepartitionToWide<decltype(du32)> du64;
+  using VU16 = VFromD<decltype(du16)>;
+
+  const VU16 vFDB97531 = ShiftRight<8>(BitCast(du16, v));
+  const VU16 vECA86420 = detail::AndS(BitCast(du16, v), 0xFF);
+  const VU16 sFE_DC_BA_98_76_54_32_10 = Add(vFDB97531, vECA86420);
+
+  const VU16 szz_FE_zz_BA_zz_76_zz_32 =
+      BitCast(du16, ShiftRight<16>(BitCast(du32, sFE_DC_BA_98_76_54_32_10)));
+  const VU16 sxx_FC_xx_B8_xx_74_xx_30 =
+      Add(sFE_DC_BA_98_76_54_32_10, szz_FE_zz_BA_zz_76_zz_32);
+  const VU16 szz_zz_xx_FC_zz_zz_xx_74 =
+      BitCast(du16, ShiftRight<32>(BitCast(du64, sxx_FC_xx_B8_xx_74_xx_30)));
+  const VU16 sxx_xx_xx_F8_xx_xx_xx_70 =
+      Add(sxx_FC_xx_B8_xx_74_xx_30, szz_zz_xx_FC_zz_zz_xx_74);
+  return detail::AndS(BitCast(du64, sxx_xx_xx_F8_xx_xx_xx_70), 0xFFFFull);
+}
+
+// ------------------------------ RotateRight
+template <int kBits, class V>
+HWY_API V RotateRight(const V v) {
+  constexpr size_t kSizeInBits = sizeof(TFromV<V>) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// ------------------------------ Shl
+#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,   \
+                         SHIFT, MLEN, NAME, OP)                             \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(v, bits,                    \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT_VV, Shl, sll, _ALL)
+
+#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,   \
+                         SHIFT, MLEN, NAME, OP)                             \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
+    const HWY_RVV_D(uint, SEW, HWY_LANES(HWY_RVV_T(BASE, SEW)), SHIFT) du;  \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(v, BitCast(du, bits),       \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT_II, Shl, sll, _ALL)
+
+// ------------------------------ Shr
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT_VV, Shr, srl, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT_II, Shr, sra, _ALL)
+
+#undef HWY_RVV_SHIFT_II
+#undef HWY_RVV_SHIFT_VV
+
+// ------------------------------ Min
+
+namespace detail {
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVS, MinS, minu_vx, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVS, MinS, min_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, MinS, fmin_vf, _ALL)
+
+}  // namespace detail
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, Min, minu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, Min, min, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Min, fmin, _ALL)
+
+// ------------------------------ Max
+
+namespace detail {
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVS, MaxS, maxu_vx, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVS, MaxS, max_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, MaxS, fmax_vf, _ALL)
+
+}  // namespace detail
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, Max, maxu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, Max, max, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Max, fmax, _ALL)
+
+// ------------------------------ Mul
+
+// Per-target flags to prevent generic_ops-inl.h defining 8/64-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Mul, mul, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Mul, fmul, _ALL)
+
+// ------------------------------ MulHigh
+
+// Only for internal use (Highway only promises MulHigh for 16-bit inputs).
+// Used by MulEven; vwmul does not work for m8.
+namespace detail {
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, MulHigh, mulh, _ALL)
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, MulHigh, mulhu, _ALL)
+}  // namespace detail
+
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, MulHigh, mulhu, _ALL)
+HWY_RVV_FOREACH_I16(HWY_RVV_RETV_ARGVV, MulHigh, mulh, _ALL)
+
+// ------------------------------ MulFixedPoint15
+
+// Extra rounding mode = up argument.
+#define HWY_RVV_MUL15(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT,  \
+                      MLEN, NAME, OP)                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) {       \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(                               \
+        a, b, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RNU, HWY_RVV_AVL(SEW, SHIFT))); \
+  }
+
+HWY_RVV_FOREACH_I16(HWY_RVV_MUL15, MulFixedPoint15, smul, _ALL)
+
+#undef HWY_RVV_MUL15
+
+// ------------------------------ Div
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Div, fdiv, _ALL)
+
+// ------------------------------ ApproximateReciprocal
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV, ApproximateReciprocal, frec7, _ALL)
+
+// ------------------------------ Sqrt
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV, Sqrt, fsqrt, _ALL)
+
+// ------------------------------ ApproximateReciprocalSqrt
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV, ApproximateReciprocalSqrt, frsqrt7, _ALL)
+
+// ------------------------------ MulAdd
+
+// Per-target flag to prevent generic_ops-inl.h from defining int MulAdd.
+#ifdef HWY_NATIVE_INT_FMA
+#undef HWY_NATIVE_INT_FMA
+#else
+#define HWY_NATIVE_INT_FMA
+#endif
+
+// Note: op is still named vv, not vvv.
+#define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                    MLEN, NAME, OP)                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x,    \
+           HWY_RVV_V(BASE, SEW, LMUL) add) {                                \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x,                \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_FMA, MulAdd, macc, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, MulAdd, fmacc, _ALL)
+
+// ------------------------------ NegMulAdd
+HWY_RVV_FOREACH_UI(HWY_RVV_FMA, NegMulAdd, nmsac, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, NegMulAdd, fnmsac, _ALL)
+
+// ------------------------------ MulSub
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, MulSub, fmsac, _ALL)
+
+// ------------------------------ NegMulSub
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, NegMulSub, fnmacc, _ALL)
+
+#undef HWY_RVV_FMA
+
+// ================================================== COMPARE
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0. The XX in
+// vboolXX_t is a power of two divisor for vector bits. SEW=8 / LMUL=1 = 1/8th
+// of all bits; SEW=8 / LMUL=4 = half of all bits.
+
+// SFINAE for mapping Simd<> to MLEN (up to 64).
+#define HWY_RVV_IF_MLEN_D(D, MLEN) \
+  hwy::EnableIf<MLenFromD(D()) == MLEN>* = nullptr
+
+// Specialized for RVV instead of the generic test_util-inl.h implementation
+// because more efficient, and helps implement MFromD.
+
+#define HWY_RVV_MASK_FALSE(SEW, SHIFT, MLEN, NAME, OP) \
+  template <class D, HWY_RVV_IF_MLEN_D(D, MLEN)>       \
+  HWY_API HWY_RVV_M(MLEN) NAME(D d) {                  \
+    return __riscv_vm##OP##_m_b##MLEN(Lanes(d));       \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_MASK_FALSE, MaskFalse, clr)
+#undef HWY_RVV_MASK_FALSE
+#undef HWY_RVV_IF_MLEN_D
+
+template <class D>
+using MFromD = decltype(MaskFalse(D()));
+
+// mask = f(vector, vector)
+#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_M(MLEN)                                                   \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) {    \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(                  \
+        a, b, HWY_RVV_AVL(SEW, SHIFT));                                     \
+  }
+
+// mask = f(vector, scalar)
+#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_M(MLEN)                                                   \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) {          \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL##_b##MLEN(                     \
+        a, b, HWY_RVV_AVL(SEW, SHIFT));                                     \
+  }
+
+// ------------------------------ Eq
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVV, Eq, mseq, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Eq, mfeq, _ALL)
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVS, EqS, mseq_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVS, EqS, mfeq_vf, _ALL)
+}  // namespace detail
+
+// ------------------------------ Ne
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVV, Ne, msne, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Ne, mfne, _ALL)
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVS, NeS, msne_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVS, NeS, mfne_vf, _ALL)
+}  // namespace detail
+
+// ------------------------------ Lt
+HWY_RVV_FOREACH_U(HWY_RVV_RETM_ARGVV, Lt, msltu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETM_ARGVV, Lt, mslt, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Lt, mflt, _ALL)
+
+namespace detail {
+HWY_RVV_FOREACH_I(HWY_RVV_RETM_ARGVS, LtS, mslt_vx, _ALL)
+HWY_RVV_FOREACH_U(HWY_RVV_RETM_ARGVS, LtS, msltu_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVS, LtS, mflt_vf, _ALL)
+}  // namespace detail
+
+// ------------------------------ Le
+HWY_RVV_FOREACH_U(HWY_RVV_RETM_ARGVV, Le, msleu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_RETM_ARGVV, Le, msle, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Le, mfle, _ALL)
+
+#undef HWY_RVV_RETM_ARGVV
+#undef HWY_RVV_RETM_ARGVS
+
+// ------------------------------ Gt/Ge
+
+template <class V>
+HWY_API auto Ge(const V a, const V b) -> decltype(Le(a, b)) {
+  return Le(b, a);
+}
+
+template <class V>
+HWY_API auto Gt(const V a, const V b) -> decltype(Lt(a, b)) {
+  return Lt(b, a);
+}
+
+// ------------------------------ TestBit
+template <class V>
+HWY_API auto TestBit(const V a, const V bit) -> decltype(Eq(a, bit)) {
+  return detail::NeS(And(a, bit), 0);
+}
+
+// ------------------------------ Not
+// NOLINTNEXTLINE
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGM, Not, not )
+
+// ------------------------------ And
+
+// mask = f(mask_a, mask_b) (note arg2,arg1 order!)
+#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP)                 \
+  HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
+    return __riscv_vm##OP##_mm_b##MLEN(b, a, HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, And, and)
+
+// ------------------------------ AndNot
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, AndNot, andn)
+
+// ------------------------------ Or
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, Or, or)
+
+// ------------------------------ Xor
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, Xor, xor)
+
+// ------------------------------ ExclusiveNeither
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, ExclusiveNeither, xnor)
+
+#undef HWY_RVV_RETM_ARGMM
+
+// ------------------------------ IfThenElse
+
+#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                             SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes,                 \
+           HWY_RVV_V(BASE, SEW, LMUL) no) {                                   \
+    return __riscv_v##OP##_vvm_##CHAR##SEW##LMUL(no, yes, m,                  \
+                                                 HWY_RVV_AVL(SEW, SHIFT));    \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_IF_THEN_ELSE, IfThenElse, merge, _ALL)
+
+#undef HWY_RVV_IF_THEN_ELSE
+
+// ------------------------------ IfThenElseZero
+template <class M, class V>
+HWY_API V IfThenElseZero(const M mask, const V yes) {
+  return IfThenElse(mask, yes, Zero(DFromV<V>()));
+}
+
+// ------------------------------ IfThenZeroElse
+
+#define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
+                                  LMULH, SHIFT, MLEN, NAME, OP)             \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) no) {              \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(no, 0, m,                      \
+                                             HWY_RVV_AVL(SEW, SHIFT));      \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_IF_THEN_ZERO_ELSE, IfThenZeroElse, merge_vxm, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_IF_THEN_ZERO_ELSE, IfThenZeroElse, fmerge_vfm, _ALL)
+
+#undef HWY_RVV_IF_THEN_ZERO_ELSE
+
+// ------------------------------ MaskFromVec
+template <class V>
+HWY_API MFromD<DFromV<V>> MaskFromVec(const V v) {
+  return detail::NeS(v, 0);
+}
+
+// ------------------------------ RebindMask
+template <class D, typename MFrom>
+HWY_API MFromD<D> RebindMask(const D /*d*/, const MFrom mask) {
+  // No need to check lane size/LMUL are the same: if not, casting MFrom to
+  // MFromD<D> would fail.
+  return mask;
+}
+
+// ------------------------------ VecFromMask
+
+// Returns mask ? ~0 : 0. No longer use sub.vx(Zero(), 1, mask) because per the
+// default mask-agnostic policy, the result of inactive lanes may also be ~0.
+#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                              SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_M(MLEN) m) {              \
+    const RebindToSigned<decltype(d)> di;                                      \
+    using TI = TFromD<decltype(di)>;                                           \
+    return BitCast(                                                            \
+        d, __riscv_v##OP##_i##SEW##LMUL(Zero(di), TI{-1}, m, Lanes(d)));       \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_VEC_FROM_MASK, VecFromMask, merge_vxm, _ALL_VIRT)
+
+#undef HWY_RVV_VEC_FROM_MASK
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> VecFromMask(const D d, MFromD<D> mask) {
+  return BitCast(d, VecFromMask(RebindToUnsigned<D>(), mask));
+}
+
+// ------------------------------ IfVecThenElse (MaskFromVec)
+template <class V>
+HWY_API V IfVecThenElse(const V mask, const V yes, const V no) {
+  return IfThenElse(MaskFromVec(mask), yes, no);
+}
+
+// ------------------------------ ZeroIfNegative
+template <class V>
+HWY_API V ZeroIfNegative(const V v) {
+  return IfThenZeroElse(detail::LtS(v, 0), v);
+}
+
+// ------------------------------ BroadcastSignBit
+template <class V>
+HWY_API V BroadcastSignBit(const V v) {
+  return ShiftRight<sizeof(TFromV<V>) * 8 - 1>(v);
+}
+
+// ------------------------------ IfNegativeThenElse (BroadcastSignBit)
+template <class V>
+HWY_API V IfNegativeThenElse(V v, V yes, V no) {
+  static_assert(IsSigned<TFromV<V>>(), "Only works for signed/float");
+  const DFromV<V> d;
+  const RebindToSigned<decltype(d)> di;
+
+  MFromD<decltype(d)> m = detail::LtS(BitCast(di, v), 0);
+  return IfThenElse(m, yes, no);
+}
+
+// ------------------------------ FindFirstTrue
+
+#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP)            \
+  template <class D>                                                   \
+  HWY_API intptr_t FindFirstTrue(D d, HWY_RVV_M(MLEN) m) {             \
+    static_assert(MLenFromD(d) == MLEN, "Type mismatch");              \
+    return __riscv_vfirst_m_b##MLEN(m, Lanes(d));                      \
+  }                                                                    \
+  template <class D>                                                   \
+  HWY_API size_t FindKnownFirstTrue(D d, HWY_RVV_M(MLEN) m) {          \
+    static_assert(MLenFromD(d) == MLEN, "Type mismatch");              \
+    return static_cast<size_t>(__riscv_vfirst_m_b##MLEN(m, Lanes(d))); \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_FIND_FIRST_TRUE, , _)
+#undef HWY_RVV_FIND_FIRST_TRUE
+
+// ------------------------------ AllFalse
+template <class D>
+HWY_API bool AllFalse(D d, MFromD<D> m) {
+  return FindFirstTrue(d, m) < 0;
+}
+
+// ------------------------------ AllTrue
+
+#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP)          \
+  template <class D>                                          \
+  HWY_API bool AllTrue(D d, HWY_RVV_M(MLEN) m) {              \
+    static_assert(MLenFromD(d) == MLEN, "Type mismatch");     \
+    return AllFalse(d, __riscv_vmnot_m_b##MLEN(m, Lanes(d))); \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_ALL_TRUE, _, _)
+#undef HWY_RVV_ALL_TRUE
+
+// ------------------------------ CountTrue
+
+#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP)    \
+  template <class D>                                      \
+  HWY_API size_t CountTrue(D d, HWY_RVV_M(MLEN) m) {      \
+    static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
+    return __riscv_vcpop_m_b##MLEN(m, Lanes(d));          \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_COUNT_TRUE, _, _)
+#undef HWY_RVV_COUNT_TRUE
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+#define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                     MLEN, NAME, OP)                                         \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d,                                 \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                    \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL(p, Lanes(d));            \
+  }
+HWY_RVV_FOREACH(HWY_RVV_LOAD, Load, le, _ALL_VIRT)
+#undef HWY_RVV_LOAD
+
+// There is no native BF16, treat as uint16_t.
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<int16_t, N, kPow2>> Load(Simd<bfloat16_t, N, kPow2> d,
+                                             const bfloat16_t* HWY_RESTRICT p) {
+  return Load(RebindToSigned<decltype(d)>(),
+              reinterpret_cast<const int16_t * HWY_RESTRICT>(p));
+}
+
+template <size_t N, int kPow2>
+HWY_API void Store(VFromD<Simd<int16_t, N, kPow2>> v,
+                   Simd<bfloat16_t, N, kPow2> d, bfloat16_t* HWY_RESTRICT p) {
+  Store(v, RebindToSigned<decltype(d)>(),
+        reinterpret_cast<int16_t * HWY_RESTRICT>(p));
+}
+
+#if !HWY_HAVE_FLOAT16  // Otherwise already defined above.
+
+// NOTE: different type for float16_t than bfloat16_t, see Set().
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<uint16_t, N, kPow2>> Load(Simd<float16_t, N, kPow2> d,
+                                              const float16_t* HWY_RESTRICT p) {
+  return Load(RebindToUnsigned<decltype(d)>(),
+              reinterpret_cast<const uint16_t * HWY_RESTRICT>(p));
+}
+
+template <size_t N, int kPow2>
+HWY_API void Store(VFromD<Simd<uint16_t, N, kPow2>> v,
+                   Simd<float16_t, N, kPow2> d, float16_t* HWY_RESTRICT p) {
+  Store(v, RebindToUnsigned<decltype(d)>(),
+        reinterpret_cast<uint16_t * HWY_RESTRICT>(p));
+}
+
+#endif  // !HWY_HAVE_FLOAT16
+
+// ------------------------------ LoadU
+template <class D>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  // RVV only requires element alignment, not vector alignment.
+  return Load(d, p);
+}
+
+// ------------------------------ MaskedLoad
+
+#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                            SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(BASE, SEW, N, SHIFT) d,              \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                    \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL##_mu(m, Zero(d), p,      \
+                                                         Lanes(d));          \
+  }                                                                          \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME##Or(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m,              \
+               HWY_RVV_D(BASE, SEW, N, SHIFT) d,                             \
+               const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL##_mu(m, v, p, Lanes(d)); \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_MASKED_LOAD, MaskedLoad, le, _ALL_VIRT)
+#undef HWY_RVV_MASKED_LOAD
+
+// ------------------------------ Store
+
+#define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  template <size_t N>                                                         \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v,                             \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                         \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                  \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, Lanes(d));          \
+  }
+HWY_RVV_FOREACH(HWY_RVV_STORE, Store, se, _ALL_VIRT)
+#undef HWY_RVV_STORE
+
+// ------------------------------ BlendedStore
+
+#define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                              SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                          \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m,           \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                          \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                   \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, p, v, Lanes(d));    \
+  }
+HWY_RVV_FOREACH(HWY_RVV_BLENDED_STORE, BlendedStore, se, _ALL_VIRT)
+#undef HWY_RVV_BLENDED_STORE
+
+// ------------------------------ StoreN
+
+namespace detail {
+
+#define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API void NAME(size_t count, HWY_RVV_V(BASE, SEW, LMUL) v,                \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) /* d */,                    \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {                   \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, count);              \
+  }
+HWY_RVV_FOREACH(HWY_RVV_STOREN, StoreN, se, _ALL_VIRT)
+#undef HWY_RVV_STOREN
+
+}  // namespace detail
+
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+template <class D, typename T = TFromD<D>,
+          hwy::EnableIf<hwy::IsSame<T, TFromV<VFromD<D>>>()>* = nullptr>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  // NOTE: Need to call Lanes(d) and clamp max_lanes_to_store to Lanes(d), even
+  // if MaxLanes(d) >= MaxLanes(DFromV<VFromD<D>>()) is true, as it is possible
+  // for detail::StoreN(max_lanes_to_store, v, d, p) to store fewer than
+  // Lanes(DFromV<VFromD<D>>()) lanes to p if
+  // max_lanes_to_store > Lanes(DFromV<VFromD<D>>()) and
+  // max_lanes_to_store < 2 * Lanes(DFromV<VFromD<D>>()) are both true.
+
+  // Also need to make sure that no more than Lanes(d) lanes are stored to p
+  // if Lanes(d) < Lanes(DFromV<VFromD<D>>()) is true, which is possible if
+  // MaxLanes(d) < MaxLanes(DFromV<VFromD<D>>()) or
+  // d.Pow2() < DFromV<VFromD<D>>().Pow2() is true.
+  const size_t N = Lanes(d);
+  detail::StoreN(HWY_MIN(max_lanes_to_store, N), v, d, p);
+}
+
+// StoreN for BF16/F16 vectors
+template <class D, typename T = TFromD<D>,
+          hwy::EnableIf<!hwy::IsSame<T, TFromV<VFromD<D>>>()>* = nullptr,
+          HWY_IF_SPECIAL_FLOAT(T)>
+HWY_API void StoreN(VFromD<D> v, D /*d*/, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  using TStore = TFromV<VFromD<D>>;
+  const Rebind<TStore, D> d_store;
+  const size_t N = Lanes(d_store);
+  detail::StoreN(HWY_MIN(max_lanes_to_store, N), v, d_store,
+                 reinterpret_cast<TStore * HWY_RESTRICT>(p));
+}
+
+// ------------------------------ StoreU
+template <class V, class D>
+HWY_API void StoreU(const V v, D d, TFromD<D>* HWY_RESTRICT p) {
+  // RVV only requires element alignment, not vector alignment.
+  Store(v, d, p);
+}
+
+// ------------------------------ Stream
+template <class V, class D, typename T>
+HWY_API void Stream(const V v, D d, T* HWY_RESTRICT aligned) {
+  Store(v, d, aligned);
+}
+
+// ------------------------------ ScatterOffset
+
+#ifdef HWY_NATIVE_SCATTER
+#undef HWY_NATIVE_SCATTER
+#else
+#define HWY_NATIVE_SCATTER
+#endif
+
+#define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                        SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                    \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v,                        \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                    \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,            \
+                    HWY_RVV_V(int, SEW, LMUL) offset) {                  \
+    const RebindToUnsigned<decltype(d)> du;                              \
+    return __riscv_v##OP##ei##SEW##_v_##CHAR##SEW##LMUL(                 \
+        base, BitCast(du, offset), v, Lanes(d));                         \
+  }
+HWY_RVV_FOREACH(HWY_RVV_SCATTER, ScatterOffset, sux, _ALL_VIRT)
+#undef HWY_RVV_SCATTER
+
+// ------------------------------ ScatterIndex
+template <class D>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> indices) {
+  constexpr size_t kBits = CeilLog2(sizeof(TFromD<D>));
+  return ScatterOffset(v, d, base, ShiftLeft<kBits>(indices));
+}
+
+// ------------------------------ MaskedScatterIndex
+
+#define HWY_RVV_MASKED_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
+                               LMULH, SHIFT, MLEN, NAME, OP)             \
+  template <size_t N>                                                    \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m,     \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                    \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,            \
+                    HWY_RVV_V(int, SEW, LMUL) indices) {                 \
+    const RebindToUnsigned<decltype(d)> du;                              \
+    constexpr size_t kBits = CeilLog2(sizeof(TFromD<decltype(d)>));      \
+    return __riscv_v##OP##ei##SEW##_v_##CHAR##SEW##LMUL##_m(             \
+        m, base, ShiftLeft<kBits>(BitCast(du, indices)), v, Lanes(d));   \
+  }
+HWY_RVV_FOREACH(HWY_RVV_MASKED_SCATTER, MaskedScatterIndex, sux, _ALL_VIRT)
+#undef HWY_RVV_MASKED_SCATTER
+
+// ------------------------------ GatherOffset
+
+#ifdef HWY_NATIVE_GATHER
+#undef HWY_NATIVE_GATHER
+#else
+#define HWY_NATIVE_GATHER
+#endif
+
+#define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d,                                   \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,                     \
+           HWY_RVV_V(int, SEW, LMUL) offset) {                                 \
+    const RebindToUnsigned<decltype(d)> du;                                    \
+    return __riscv_v##OP##ei##SEW##_v_##CHAR##SEW##LMUL(                       \
+        base, BitCast(du, offset), Lanes(d));                                  \
+  }
+HWY_RVV_FOREACH(HWY_RVV_GATHER, GatherOffset, lux, _ALL_VIRT)
+#undef HWY_RVV_GATHER
+
+// ------------------------------ GatherIndex
+
+template <class D>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT base,
+                              const VFromD<RebindToSigned<D>> index) {
+  constexpr size_t kBits = CeilLog2(sizeof(TFromD<D>));
+  return GatherOffset(d, base, ShiftLeft<kBits>(index));
+}
+
+// ------------------------------ MaskedGatherIndex
+
+#define HWY_RVV_MASKED_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                              SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(BASE, SEW, N, SHIFT) d,                \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,                     \
+           HWY_RVV_V(int, SEW, LMUL) indices) {                                \
+    const RebindToUnsigned<decltype(d)> du;                                    \
+    constexpr size_t kBits = CeilLog2(SEW / 8);                                \
+    return __riscv_v##OP##ei##SEW##_v_##CHAR##SEW##LMUL##_mu(                  \
+        m, Zero(d), base, ShiftLeft<kBits>(BitCast(du, indices)), Lanes(d));   \
+  }
+HWY_RVV_FOREACH(HWY_RVV_MASKED_GATHER, MaskedGatherIndex, lux, _ALL_VIRT)
+#undef HWY_RVV_MASKED_GATHER
+
+// ================================================== CONVERT
+
+// ------------------------------ PromoteTo
+
+// SEW is for the input.
+#define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,     \
+                        SHIFT, MLEN, NAME, OP)                               \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME(                                 \
+      HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return __riscv_v##OP##CHAR##SEWD##LMULD(v, Lanes(d));                    \
+  }
+
+HWY_RVV_FOREACH_U08(HWY_RVV_PROMOTE, PromoteTo, zext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_U16(HWY_RVV_PROMOTE, PromoteTo, zext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_U32(HWY_RVV_PROMOTE, PromoteTo, zext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_I08(HWY_RVV_PROMOTE, PromoteTo, sext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_I16(HWY_RVV_PROMOTE, PromoteTo, sext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_I32(HWY_RVV_PROMOTE, PromoteTo, sext_vf2_, _EXT_VIRT)
+HWY_RVV_FOREACH_F32(HWY_RVV_PROMOTE, PromoteTo, fwcvt_f_f_v_, _EXT_VIRT)
+
+#if HWY_HAVE_FLOAT16 || HWY_RVV_HAVE_F16C
+
+HWY_RVV_FOREACH_F16_UNCONDITIONAL(HWY_RVV_PROMOTE, PromoteTo, fwcvt_f_f_v_,
+                                  _EXT_VIRT)
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+#endif  // HWY_HAVE_FLOAT16 || HWY_RVV_HAVE_F16C
+
+#undef HWY_RVV_PROMOTE
+
+// The above X-macro cannot handle 4x promotion nor type switching.
+// TODO(janwas): use BASE2 arg to allow the latter.
+#define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN, \
+                        SHIFT, ADD)                                            \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, BITS, LMUL)                                          \
+      PromoteTo(HWY_RVV_D(BASE, BITS, N, SHIFT + ADD) d,                       \
+                HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) {                      \
+    return __riscv_v##OP##CHAR##BITS##LMUL(v, Lanes(d));                       \
+  }
+
+#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)        \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -2, 1) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -1, 1) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1, 0, 1)   \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2, 1, 1)   \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4, 2, 1)
+
+#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)        \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4, -2, 2) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2, -1, 2) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1, 0, 2)   \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2, 1, 2)
+
+#define HWY_RVV_PROMOTE_X4_FROM_U8(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, mf2, mf8, -3, 2) \
+  HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
+
+#define HWY_RVV_PROMOTE_X8(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)        \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf8, -3, 3) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf4, -2, 3) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, mf2, -1, 3) \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m1, 0, 3)
+
+HWY_RVV_PROMOTE_X8(zext_vf8_, uint, u, 64, uint, 8)
+HWY_RVV_PROMOTE_X8(sext_vf8_, int, i, 64, int, 8)
+
+HWY_RVV_PROMOTE_X4_FROM_U8(zext_vf4_, uint, u, 32, uint, 8)
+HWY_RVV_PROMOTE_X4_FROM_U8(sext_vf4_, int, i, 32, int, 8)
+HWY_RVV_PROMOTE_X4(zext_vf4_, uint, u, 64, uint, 16)
+HWY_RVV_PROMOTE_X4(sext_vf4_, int, i, 64, int, 16)
+
+// i32 to f64
+HWY_RVV_PROMOTE_X2(fwcvt_f_x_v_, float, f, 64, int, 32)
+
+#undef HWY_RVV_PROMOTE_X8
+#undef HWY_RVV_PROMOTE_X4_FROM_U8
+#undef HWY_RVV_PROMOTE_X4
+#undef HWY_RVV_PROMOTE_X2
+#undef HWY_RVV_PROMOTE
+
+// I16->I64 or U16->U64 PromoteTo with virtual LMUL
+template <size_t N>
+HWY_API auto PromoteTo(Simd<int64_t, N, -1> d,
+                       VFromD<Rebind<int16_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return PromoteTo(ScalableTag<int64_t>(), v);
+}
+
+template <size_t N>
+HWY_API auto PromoteTo(Simd<uint64_t, N, -1> d,
+                       VFromD<Rebind<uint16_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return PromoteTo(ScalableTag<uint64_t>(), v);
+}
+
+// Unsigned to signed: cast for unsigned promote.
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int16_t, N, kPow2> d,
+                       VFromD<Rebind<uint8_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int32_t, N, kPow2> d,
+                       VFromD<Rebind<uint8_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int32_t, N, kPow2> d,
+                       VFromD<Rebind<uint16_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int64_t, N, kPow2> d,
+                       VFromD<Rebind<uint32_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int64_t, N, kPow2> d,
+                       VFromD<Rebind<uint16_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<int64_t, N, kPow2> d,
+                       VFromD<Rebind<uint8_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  return BitCast(d, PromoteTo(RebindToUnsigned<decltype(d)>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API auto PromoteTo(Simd<float32_t, N, kPow2> d,
+                       VFromD<Rebind<bfloat16_t, decltype(d)>> v)
+    -> VFromD<decltype(d)> {
+  const RebindToSigned<decltype(d)> di32;
+  const Rebind<uint16_t, decltype(d)> du16;
+  return BitCast(d, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+// ------------------------------ DemoteTo U
+
+// SEW is for the source so we can use _DEMOTE_VIRT.
+#define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME(                                   \
+      HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) {   \
+    return __riscv_v##OP##CHAR##SEWH##LMULH(                                   \
+        v, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));                \
+  }
+
+// Unsigned -> unsigned
+HWY_RVV_FOREACH_U16(HWY_RVV_DEMOTE, DemoteTo, nclipu_wx_, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_U32(HWY_RVV_DEMOTE, DemoteTo, nclipu_wx_, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_U64(HWY_RVV_DEMOTE, DemoteTo, nclipu_wx_, _DEMOTE_VIRT)
+
+// SEW is for the source so we can use _DEMOTE_VIRT.
+#define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                              SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(uint, SEWH, LMULH) NAME(                                   \
+      HWY_RVV_D(uint, SEWH, N, SHIFT - 1) dn, HWY_RVV_V(int, SEW, LMUL) v) {   \
+    const HWY_RVV_D(uint, SEW, N, SHIFT) du;                                   \
+    /* First clamp negative numbers to zero to match x86 packus. */            \
+    return DemoteTo(dn, BitCast(du, detail::MaxS(v, 0)));                      \
+  }
+HWY_RVV_FOREACH_I64(HWY_RVV_DEMOTE_I_TO_U, DemoteTo, _, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_I32(HWY_RVV_DEMOTE_I_TO_U, DemoteTo, _, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_I16(HWY_RVV_DEMOTE_I_TO_U, DemoteTo, _, _DEMOTE_VIRT)
+#undef HWY_RVV_DEMOTE_I_TO_U
+
+template <size_t N>
+HWY_API vuint8mf8_t DemoteTo(Simd<uint8_t, N, -3> d, const vint32mf2_t v) {
+  return __riscv_vnclipu_wx_u8mf8(
+      DemoteTo(Simd<uint16_t, N, -2>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8mf4_t DemoteTo(Simd<uint8_t, N, -2> d, const vint32m1_t v) {
+  return __riscv_vnclipu_wx_u8mf4(
+      DemoteTo(Simd<uint16_t, N, -1>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8mf2_t DemoteTo(Simd<uint8_t, N, -1> d, const vint32m2_t v) {
+  return __riscv_vnclipu_wx_u8mf2(
+      DemoteTo(Simd<uint16_t, N, 0>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8m1_t DemoteTo(Simd<uint8_t, N, 0> d, const vint32m4_t v) {
+  return __riscv_vnclipu_wx_u8m1(
+      DemoteTo(Simd<uint16_t, N, 1>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8m2_t DemoteTo(Simd<uint8_t, N, 1> d, const vint32m8_t v) {
+  return __riscv_vnclipu_wx_u8m2(
+      DemoteTo(Simd<uint16_t, N, 2>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+
+template <size_t N>
+HWY_API vuint8mf8_t DemoteTo(Simd<uint8_t, N, -3> d, const vuint32mf2_t v) {
+  return __riscv_vnclipu_wx_u8mf8(
+      DemoteTo(Simd<uint16_t, N, -2>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8mf4_t DemoteTo(Simd<uint8_t, N, -2> d, const vuint32m1_t v) {
+  return __riscv_vnclipu_wx_u8mf4(
+      DemoteTo(Simd<uint16_t, N, -1>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8mf2_t DemoteTo(Simd<uint8_t, N, -1> d, const vuint32m2_t v) {
+  return __riscv_vnclipu_wx_u8mf2(
+      DemoteTo(Simd<uint16_t, N, 0>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8m1_t DemoteTo(Simd<uint8_t, N, 0> d, const vuint32m4_t v) {
+  return __riscv_vnclipu_wx_u8m1(
+      DemoteTo(Simd<uint16_t, N, 1>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+template <size_t N>
+HWY_API vuint8m2_t DemoteTo(Simd<uint8_t, N, 1> d, const vuint32m8_t v) {
+  return __riscv_vnclipu_wx_u8m2(
+      DemoteTo(Simd<uint16_t, N, 2>(), v), 0,
+      HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<uint8_t, N, kPow2>> DemoteTo(
+    Simd<uint8_t, N, kPow2> d, VFromD<Simd<int64_t, N, kPow2 + 3>> v) {
+  return DemoteTo(d, DemoteTo(Simd<uint32_t, N, kPow2 + 2>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<uint8_t, N, kPow2>> DemoteTo(
+    Simd<uint8_t, N, kPow2> d, VFromD<Simd<uint64_t, N, kPow2 + 3>> v) {
+  return DemoteTo(d, DemoteTo(Simd<uint32_t, N, kPow2 + 2>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<uint16_t, N, kPow2>> DemoteTo(
+    Simd<uint16_t, N, kPow2> d, VFromD<Simd<int64_t, N, kPow2 + 2>> v) {
+  return DemoteTo(d, DemoteTo(Simd<uint32_t, N, kPow2 + 1>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<uint16_t, N, kPow2>> DemoteTo(
+    Simd<uint16_t, N, kPow2> d, VFromD<Simd<uint64_t, N, kPow2 + 2>> v) {
+  return DemoteTo(d, DemoteTo(Simd<uint32_t, N, kPow2 + 1>(), v));
+}
+
+HWY_API vuint8mf8_t U8FromU32(const vuint32mf2_t v) {
+  const size_t avl = Lanes(ScalableTag<uint8_t, -3>());
+  return __riscv_vnclipu_wx_u8mf8(
+      __riscv_vnclipu_wx_u16mf4(v, 0,
+                                HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl)),
+      0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+HWY_API vuint8mf4_t U8FromU32(const vuint32m1_t v) {
+  const size_t avl = Lanes(ScalableTag<uint8_t, -2>());
+  return __riscv_vnclipu_wx_u8mf4(
+      __riscv_vnclipu_wx_u16mf2(v, 0,
+                                HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl)),
+      0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+HWY_API vuint8mf2_t U8FromU32(const vuint32m2_t v) {
+  const size_t avl = Lanes(ScalableTag<uint8_t, -1>());
+  return __riscv_vnclipu_wx_u8mf2(
+      __riscv_vnclipu_wx_u16m1(v, 0,
+                               HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl)),
+      0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+HWY_API vuint8m1_t U8FromU32(const vuint32m4_t v) {
+  const size_t avl = Lanes(ScalableTag<uint8_t, 0>());
+  return __riscv_vnclipu_wx_u8m1(
+      __riscv_vnclipu_wx_u16m2(v, 0,
+                               HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl)),
+      0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+HWY_API vuint8m2_t U8FromU32(const vuint32m8_t v) {
+  const size_t avl = Lanes(ScalableTag<uint8_t, 1>());
+  return __riscv_vnclipu_wx_u8m2(
+      __riscv_vnclipu_wx_u16m4(v, 0,
+                               HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl)),
+      0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+// ------------------------------ Truncations
+
+template <size_t N>
+HWY_API vuint8mf8_t TruncateTo(Simd<uint8_t, N, -3> d,
+                               const VFromD<Simd<uint64_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m1_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint32mf2_t v2 = __riscv_vnclipu_wx_u32mf2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  const vuint16mf4_t v3 = __riscv_vnclipu_wx_u16mf4(
+      v2, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf8(v3, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf4_t TruncateTo(Simd<uint8_t, N, -2> d,
+                               const VFromD<Simd<uint64_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m2_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint32m1_t v2 = __riscv_vnclipu_wx_u32m1(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  const vuint16mf2_t v3 = __riscv_vnclipu_wx_u16mf2(
+      v2, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf4(v3, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf2_t TruncateTo(Simd<uint8_t, N, -1> d,
+                               const VFromD<Simd<uint64_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m4_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint32m2_t v2 = __riscv_vnclipu_wx_u32m2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  const vuint16m1_t v3 = __riscv_vnclipu_wx_u16m1(
+      v2, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf2(v3, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m1_t TruncateTo(Simd<uint8_t, N, 0> d,
+                              const VFromD<Simd<uint64_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m8_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint32m4_t v2 = __riscv_vnclipu_wx_u32m4(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  const vuint16m2_t v3 = __riscv_vnclipu_wx_u16m2(
+      v2, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8m1(v3, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16mf4_t TruncateTo(Simd<uint16_t, N, -2> d,
+                                const VFromD<Simd<uint64_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m1_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  const vuint32mf2_t v2 = __riscv_vnclipu_wx_u32mf2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u16mf4(v2, 0,
+                                   HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16mf2_t TruncateTo(Simd<uint16_t, N, -1> d,
+                                const VFromD<Simd<uint64_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m2_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  const vuint32m1_t v2 = __riscv_vnclipu_wx_u32m1(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u16mf2(v2, 0,
+                                   HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16m1_t TruncateTo(Simd<uint16_t, N, 0> d,
+                               const VFromD<Simd<uint64_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m4_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  const vuint32m2_t v2 = __riscv_vnclipu_wx_u32m2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u16m1(v2, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16m2_t TruncateTo(Simd<uint16_t, N, 1> d,
+                               const VFromD<Simd<uint64_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m8_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  const vuint32m4_t v2 = __riscv_vnclipu_wx_u32m4(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u16m2(v2, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint32mf2_t TruncateTo(Simd<uint32_t, N, -1> d,
+                                const VFromD<Simd<uint64_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m1_t v1 = __riscv_vand(v, 0xFFFFFFFFu, avl);
+  return __riscv_vnclipu_wx_u32mf2(v1, 0,
+                                   HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint32m1_t TruncateTo(Simd<uint32_t, N, 0> d,
+                               const VFromD<Simd<uint64_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m2_t v1 = __riscv_vand(v, 0xFFFFFFFFu, avl);
+  return __riscv_vnclipu_wx_u32m1(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint32m2_t TruncateTo(Simd<uint32_t, N, 1> d,
+                               const VFromD<Simd<uint64_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m4_t v1 = __riscv_vand(v, 0xFFFFFFFFu, avl);
+  return __riscv_vnclipu_wx_u32m2(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint32m4_t TruncateTo(Simd<uint32_t, N, 2> d,
+                               const VFromD<Simd<uint64_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint64m8_t v1 = __riscv_vand(v, 0xFFFFFFFFu, avl);
+  return __riscv_vnclipu_wx_u32m4(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf8_t TruncateTo(Simd<uint8_t, N, -3> d,
+                               const VFromD<Simd<uint32_t, N, -1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32mf2_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint16mf4_t v2 = __riscv_vnclipu_wx_u16mf4(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf8(v2, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf4_t TruncateTo(Simd<uint8_t, N, -2> d,
+                               const VFromD<Simd<uint32_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m1_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint16mf2_t v2 = __riscv_vnclipu_wx_u16mf2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf4(v2, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf2_t TruncateTo(Simd<uint8_t, N, -1> d,
+                               const VFromD<Simd<uint32_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m2_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint16m1_t v2 = __riscv_vnclipu_wx_u16m1(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8mf2(v2, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m1_t TruncateTo(Simd<uint8_t, N, 0> d,
+                              const VFromD<Simd<uint32_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m4_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint16m2_t v2 = __riscv_vnclipu_wx_u16m2(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8m1(v2, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m2_t TruncateTo(Simd<uint8_t, N, 1> d,
+                              const VFromD<Simd<uint32_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m8_t v1 = __riscv_vand(v, 0xFF, avl);
+  const vuint16m4_t v2 = __riscv_vnclipu_wx_u16m4(
+      v1, 0, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+  return __riscv_vnclipu_wx_u8m2(v2, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16mf4_t TruncateTo(Simd<uint16_t, N, -2> d,
+                                const VFromD<Simd<uint32_t, N, -1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32mf2_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  return __riscv_vnclipu_wx_u16mf4(v1, 0,
+                                   HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16mf2_t TruncateTo(Simd<uint16_t, N, -1> d,
+                                const VFromD<Simd<uint32_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m1_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  return __riscv_vnclipu_wx_u16mf2(v1, 0,
+                                   HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16m1_t TruncateTo(Simd<uint16_t, N, 0> d,
+                               const VFromD<Simd<uint32_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m2_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  return __riscv_vnclipu_wx_u16m1(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16m2_t TruncateTo(Simd<uint16_t, N, 1> d,
+                               const VFromD<Simd<uint32_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m4_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  return __riscv_vnclipu_wx_u16m2(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint16m4_t TruncateTo(Simd<uint16_t, N, 2> d,
+                               const VFromD<Simd<uint32_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint32m8_t v1 = __riscv_vand(v, 0xFFFF, avl);
+  return __riscv_vnclipu_wx_u16m4(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf8_t TruncateTo(Simd<uint8_t, N, -3> d,
+                               const VFromD<Simd<uint16_t, N, -2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16mf4_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8mf8(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf4_t TruncateTo(Simd<uint8_t, N, -2> d,
+                               const VFromD<Simd<uint16_t, N, -1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16mf2_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8mf4(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8mf2_t TruncateTo(Simd<uint8_t, N, -1> d,
+                               const VFromD<Simd<uint16_t, N, 0>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16m1_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8mf2(v1, 0,
+                                  HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m1_t TruncateTo(Simd<uint8_t, N, 0> d,
+                              const VFromD<Simd<uint16_t, N, 1>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16m2_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8m1(v1, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m2_t TruncateTo(Simd<uint8_t, N, 1> d,
+                              const VFromD<Simd<uint16_t, N, 2>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16m4_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8m2(v1, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+template <size_t N>
+HWY_API vuint8m4_t TruncateTo(Simd<uint8_t, N, 2> d,
+                              const VFromD<Simd<uint16_t, N, 3>> v) {
+  const size_t avl = Lanes(d);
+  const vuint16m8_t v1 = __riscv_vand(v, 0xFF, avl);
+  return __riscv_vnclipu_wx_u8m4(v1, 0,
+                                 HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, avl));
+}
+
+// ------------------------------ DemoteTo I
+
+HWY_RVV_FOREACH_I16(HWY_RVV_DEMOTE, DemoteTo, nclip_wx_, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_I32(HWY_RVV_DEMOTE, DemoteTo, nclip_wx_, _DEMOTE_VIRT)
+HWY_RVV_FOREACH_I64(HWY_RVV_DEMOTE, DemoteTo, nclip_wx_, _DEMOTE_VIRT)
+
+template <size_t N>
+HWY_API vint8mf8_t DemoteTo(Simd<int8_t, N, -3> d, const vint32mf2_t v) {
+  return DemoteTo(d, DemoteTo(Simd<int16_t, N, -2>(), v));
+}
+template <size_t N>
+HWY_API vint8mf4_t DemoteTo(Simd<int8_t, N, -2> d, const vint32m1_t v) {
+  return DemoteTo(d, DemoteTo(Simd<int16_t, N, -1>(), v));
+}
+template <size_t N>
+HWY_API vint8mf2_t DemoteTo(Simd<int8_t, N, -1> d, const vint32m2_t v) {
+  return DemoteTo(d, DemoteTo(Simd<int16_t, N, 0>(), v));
+}
+template <size_t N>
+HWY_API vint8m1_t DemoteTo(Simd<int8_t, N, 0> d, const vint32m4_t v) {
+  return DemoteTo(d, DemoteTo(Simd<int16_t, N, 1>(), v));
+}
+template <size_t N>
+HWY_API vint8m2_t DemoteTo(Simd<int8_t, N, 1> d, const vint32m8_t v) {
+  return DemoteTo(d, DemoteTo(Simd<int16_t, N, 2>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<int8_t, N, kPow2>> DemoteTo(
+    Simd<int8_t, N, kPow2> d, VFromD<Simd<int64_t, N, kPow2 + 3>> v) {
+  return DemoteTo(d, DemoteTo(Simd<int32_t, N, kPow2 + 2>(), v));
+}
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<int16_t, N, kPow2>> DemoteTo(
+    Simd<int16_t, N, kPow2> d, VFromD<Simd<int64_t, N, kPow2 + 2>> v) {
+  return DemoteTo(d, DemoteTo(Simd<int32_t, N, kPow2 + 1>(), v));
+}
+
+#undef HWY_RVV_DEMOTE
+
+// ------------------------------ DemoteTo F
+
+// SEW is for the source so we can use _DEMOTE_VIRT.
+#define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,    \
+                         SHIFT, MLEN, NAME, OP)                              \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME(                                 \
+      HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return __riscv_v##OP##SEWH##LMULH(v, Lanes(d));                          \
+  }
+
+#if HWY_HAVE_FLOAT16 || HWY_RVV_HAVE_F16C
+HWY_RVV_FOREACH_F32(HWY_RVV_DEMOTE_F, DemoteTo, fncvt_rod_f_f_w_f, _DEMOTE_VIRT)
+#endif
+HWY_RVV_FOREACH_F64(HWY_RVV_DEMOTE_F, DemoteTo, fncvt_rod_f_f_w_f, _DEMOTE_VIRT)
+#undef HWY_RVV_DEMOTE_F
+
+// TODO(janwas): add BASE2 arg to allow generating this via DEMOTE_F.
+template <size_t N>
+HWY_API vint32mf2_t DemoteTo(Simd<int32_t, N, -2> d, const vfloat64m1_t v) {
+  return __riscv_vfncvt_rtz_x_f_w_i32mf2(v, Lanes(d));
+}
+template <size_t N>
+HWY_API vint32mf2_t DemoteTo(Simd<int32_t, N, -1> d, const vfloat64m1_t v) {
+  return __riscv_vfncvt_rtz_x_f_w_i32mf2(v, Lanes(d));
+}
+template <size_t N>
+HWY_API vint32m1_t DemoteTo(Simd<int32_t, N, 0> d, const vfloat64m2_t v) {
+  return __riscv_vfncvt_rtz_x_f_w_i32m1(v, Lanes(d));
+}
+template <size_t N>
+HWY_API vint32m2_t DemoteTo(Simd<int32_t, N, 1> d, const vfloat64m4_t v) {
+  return __riscv_vfncvt_rtz_x_f_w_i32m2(v, Lanes(d));
+}
+template <size_t N>
+HWY_API vint32m4_t DemoteTo(Simd<int32_t, N, 2> d, const vfloat64m8_t v) {
+  return __riscv_vfncvt_rtz_x_f_w_i32m4(v, Lanes(d));
+}
+
+// SEW is for the source so we can use _DEMOTE_VIRT.
+#define HWY_RVV_DEMOTE_TO_SHR_16(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD,   \
+                                 LMULH, SHIFT, MLEN, NAME, OP)               \
+  template <size_t N>                                                        \
+  HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME(                                 \
+      HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return __riscv_v##OP##CHAR##SEWH##LMULH(                                 \
+        v, 16, HWY_RVV_INSERT_VXRM(__RISCV_VXRM_RDN, Lanes(d)));             \
+  }
+namespace detail {
+HWY_RVV_FOREACH_U32(HWY_RVV_DEMOTE_TO_SHR_16, DemoteToShr16, nclipu_wx_,
+                    _DEMOTE_VIRT)
+}
+#undef HWY_RVV_DEMOTE_TO_SHR_16
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<bfloat16_t, N, kPow2>> DemoteTo(
+    Simd<bfloat16_t, N, kPow2> d, VFromD<Simd<float, N, kPow2 + 1>> v) {
+  const RebindToUnsigned<decltype(d)> du16;
+  const Rebind<uint32_t, decltype(d)> du32;
+  return BitCast(d, detail::DemoteToShr16(du16, BitCast(du32, v)));
+}
+
+// ------------------------------ ConvertTo F
+
+#define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,       \
+                        SHIFT, MLEN, NAME, OP)                                 \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo(                                \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(int, SEW, LMUL) v) {         \
+    return __riscv_vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d));                      \
+  }                                                                            \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo(                                \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(uint, SEW, LMUL) v) {        \
+    return __riscv_vfcvt_f_xu_v_f##SEW##LMUL(v, Lanes(d));                     \
+  }                                                                            \
+  /* Truncates (rounds toward zero). */                                        \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(int, SEW, N, SHIFT) d, \
+                                              HWY_RVV_V(BASE, SEW, LMUL) v) {  \
+    return __riscv_vfcvt_rtz_x_f_v_i##SEW##LMUL(v, Lanes(d));                  \
+  }                                                                            \
+// API only requires f32 but we provide f64 for internal use.
+HWY_RVV_FOREACH_F(HWY_RVV_CONVERT, _, _, _ALL_VIRT)
+#undef HWY_RVV_CONVERT
+
+// Uses default rounding mode. Must be separate because there is no D arg.
+#define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,       \
+                        SHIFT, MLEN, NAME, OP)                                 \
+  HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return __riscv_vfcvt_x_f_v_i##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT));       \
+  }
+HWY_RVV_FOREACH_F(HWY_RVV_NEAREST, _, _, _ALL)
+#undef HWY_RVV_NEAREST
+
+// ================================================== COMBINE
+
+namespace detail {
+
+// For x86-compatible behaviour mandated by Highway API: TableLookupBytes
+// offsets are implicitly relative to the start of their 128-bit block.
+template <typename T, size_t N, int kPow2>
+HWY_INLINE size_t LanesPerBlock(Simd<T, N, kPow2> d) {
+  // kMinVecBytes is the minimum size of VFromD<decltype(d)> in bytes
+  constexpr size_t kMinVecBytes =
+      ScaleByPower(16, HWY_MAX(HWY_MIN(kPow2, 3), -3));
+  // kMinVecLanes is the minimum number of lanes in VFromD<decltype(d)>
+  constexpr size_t kMinVecLanes = (kMinVecBytes + sizeof(T) - 1) / sizeof(T);
+  // kMaxLpb is the maximum number of lanes per block
+  constexpr size_t kMaxLpb = HWY_MIN(16 / sizeof(T), MaxLanes(d));
+
+  // If kMaxLpb <= kMinVecLanes is true, then kMaxLpb <= Lanes(d) is true
+  if (kMaxLpb <= kMinVecLanes) return kMaxLpb;
+
+  // Fractional LMUL: Lanes(d) may be smaller than kMaxLpb, so honor that.
+  const size_t lanes_per_vec = Lanes(d);
+  return HWY_MIN(lanes_per_vec, kMaxLpb);
+}
+
+template <class D, class V>
+HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0) {
+  using T = MakeUnsigned<TFromD<D>>;
+  return AndS(iota0, static_cast<T>(~(LanesPerBlock(d) - 1)));
+}
+
+template <size_t kLanes, class D>
+HWY_INLINE MFromD<D> FirstNPerBlock(D /* tag */) {
+  const RebindToUnsigned<D> du;
+  const RebindToSigned<D> di;
+  using TU = TFromD<decltype(du)>;
+  const auto idx_mod = AndS(Iota0(du), static_cast<TU>(LanesPerBlock(du) - 1));
+  return LtS(BitCast(di, idx_mod), static_cast<TFromD<decltype(di)>>(kLanes));
+}
+
+#define HWY_RVV_SLIDE_UP(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,  \
+                         SHIFT, MLEN, NAME, OP)                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                       \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
+           size_t lanes) {                                                 \
+    return __riscv_v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes,           \
+                                                HWY_RVV_AVL(SEW, SHIFT));  \
+  }
+
+#define HWY_RVV_SLIDE_DOWN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) src, size_t lanes) {                  \
+    return __riscv_v##OP##_vx_##CHAR##SEW##LMUL(src, lanes,                 \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_SLIDE_UP, SlideUp, slideup, _ALL)
+HWY_RVV_FOREACH(HWY_RVV_SLIDE_DOWN, SlideDown, slidedown, _ALL)
+
+#undef HWY_RVV_SLIDE_UP
+#undef HWY_RVV_SLIDE_DOWN
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+template <class D>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  return detail::SlideUp(Zero(d), v, amt);
+}
+
+// ------------------------------ SlideDownLanes
+template <class D>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  v = detail::SlideDown(v, amt);
+  // Zero out upper lanes if v is a partial vector
+  if (MaxLanes(d) < MaxLanes(DFromV<decltype(v)>())) {
+    v = IfThenElseZero(FirstN(d, Lanes(d) - amt), v);
+  }
+  return v;
+}
+
+// ------------------------------ ConcatUpperLower
+template <class D, class V>
+HWY_API V ConcatUpperLower(D d, const V hi, const V lo) {
+  return IfThenElse(FirstN(d, Lanes(d) / 2), lo, hi);
+}
+
+// ------------------------------ ConcatLowerLower
+template <class D, class V>
+HWY_API V ConcatLowerLower(D d, const V hi, const V lo) {
+  return detail::SlideUp(lo, hi, Lanes(d) / 2);
+}
+
+// ------------------------------ ConcatUpperUpper
+template <class D, class V>
+HWY_API V ConcatUpperUpper(D d, const V hi, const V lo) {
+  // Move upper half into lower
+  const auto lo_down = detail::SlideDown(lo, Lanes(d) / 2);
+  return ConcatUpperLower(d, hi, lo_down);
+}
+
+// ------------------------------ ConcatLowerUpper
+template <class D, class V>
+HWY_API V ConcatLowerUpper(D d, const V hi, const V lo) {
+  // Move half of both inputs to the other half
+  const auto hi_up = detail::SlideUp(hi, hi, Lanes(d) / 2);
+  const auto lo_down = detail::SlideDown(lo, Lanes(d) / 2);
+  return ConcatUpperLower(d, hi_up, lo_down);
+}
+
+// ------------------------------ Combine
+template <class D2, class V>
+HWY_API VFromD<D2> Combine(D2 d2, const V hi, const V lo) {
+  return detail::SlideUp(detail::Ext(d2, lo), detail::Ext(d2, hi),
+                         Lanes(d2) / 2);
+}
+
+// ------------------------------ ZeroExtendVector
+template <class D2, class V>
+HWY_API VFromD<D2> ZeroExtendVector(D2 d2, const V lo) {
+  return Combine(d2, Xor(lo, lo), lo);
+}
+
+// ------------------------------ Lower/UpperHalf
+
+namespace detail {
+
+// RVV may only support LMUL >= SEW/64; returns whether that holds for D. Note
+// that SEW = sizeof(T)*8 and LMUL = 1 << d.Pow2(). Add 3 to Pow2 to avoid
+// negative shift counts.
+template <class D>
+constexpr bool IsSupportedLMUL(D d) {
+  return (size_t{1} << (d.Pow2() + 3)) >= sizeof(TFromD<D>);
+}
+
+}  // namespace detail
+
+// If IsSupportedLMUL, just 'truncate' i.e. halve LMUL.
+template <class DH, hwy::EnableIf<detail::IsSupportedLMUL(DH())>* = nullptr>
+HWY_API VFromD<DH> LowerHalf(const DH /* tag */, const VFromD<Twice<DH>> v) {
+  return detail::Trunc(v);
+}
+
+// Otherwise, there is no corresponding intrinsic type (e.g. vuint64mf2_t), and
+// the hardware may set "vill" if we attempt such an LMUL. However, the V
+// extension on application processors requires Zvl128b, i.e. VLEN >= 128, so it
+// still makes sense to have half of an SEW=64 vector. We instead just return
+// the vector, and rely on the kPow2 in DH to halve the return value of Lanes().
+template <class DH, class V,
+          hwy::EnableIf<!detail::IsSupportedLMUL(DH())>* = nullptr>
+HWY_API V LowerHalf(const DH /* tag */, const V v) {
+  return v;
+}
+
+// Same, but without D arg
+template <class V>
+HWY_API VFromD<Half<DFromV<V>>> LowerHalf(const V v) {
+  return LowerHalf(Half<DFromV<V>>(), v);
+}
+
+template <class DH>
+HWY_API VFromD<DH> UpperHalf(const DH d2, const VFromD<Twice<DH>> v) {
+  return LowerHalf(d2, detail::SlideDown(v, Lanes(d2)));
+}
+
+// ================================================== SWIZZLE
+
+namespace detail {
+// Special instruction for 1 lane is presumably faster?
+#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {      \
+    return __riscv_v##OP##_##CHAR##SEW##LMUL(v, 0, HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_SLIDE1, Slide1Up, slide1up_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_SLIDE1, Slide1Up, fslide1up_vf, _ALL)
+HWY_RVV_FOREACH_UI(HWY_RVV_SLIDE1, Slide1Down, slide1down_vx, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_SLIDE1, Slide1Down, fslide1down_vf, _ALL)
+#undef HWY_RVV_SLIDE1
+}  // namespace detail
+
+// ------------------------------ Slide1Up and Slide1Down
+#ifdef HWY_NATIVE_SLIDE1_UP_DOWN
+#undef HWY_NATIVE_SLIDE1_UP_DOWN
+#else
+#define HWY_NATIVE_SLIDE1_UP_DOWN
+#endif
+
+template <class D>
+HWY_API VFromD<D> Slide1Up(D /*d*/, VFromD<D> v) {
+  return detail::Slide1Up(v);
+}
+
+template <class D>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  v = detail::Slide1Down(v);
+  // Zero out upper lanes if v is a partial vector
+  if (MaxLanes(d) < MaxLanes(DFromV<decltype(v)>())) {
+    v = IfThenElseZero(FirstN(d, Lanes(d) - 1), v);
+  }
+  return v;
+}
+
+// ------------------------------ GetLane
+
+#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,     \
+                         SHIFT, MLEN, NAME, OP)                               \
+  HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {           \
+    return __riscv_v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); /* no AVL */ \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_GET_LANE, GetLane, mv_x, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_GET_LANE, GetLane, fmv_f, _ALL)
+#undef HWY_RVV_GET_LANE
+
+// ------------------------------ ExtractLane
+template <class V>
+HWY_API TFromV<V> ExtractLane(const V v, size_t i) {
+  return GetLane(detail::SlideDown(v, i));
+}
+
+// ------------------------------ Additional mask logical operations
+
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGM, SetOnlyFirst, sof)
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGM, SetBeforeFirst, sbf)
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGM, SetAtOrBeforeFirst, sif)
+
+#define HWY_RVV_SET_AT_OR_AFTER_FIRST(SEW, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_M(MLEN) SetAtOrAfterFirst(HWY_RVV_M(MLEN) m) {  \
+    return Not(SetBeforeFirst(m));                                \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_SET_AT_OR_AFTER_FIRST, _, _)
+#undef HWY_RVV_SET_AT_OR_AFTER_FIRST
+
+// ------------------------------ InsertLane
+
+template <class V, HWY_IF_NOT_T_SIZE_V(V, 1)>
+HWY_API V InsertLane(const V v, size_t i, TFromV<V> t) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;  // Iota0 is unsigned only
+  using TU = TFromD<decltype(du)>;
+  const auto is_i = detail::EqS(detail::Iota0(du), static_cast<TU>(i));
+  return IfThenElse(RebindMask(d, is_i), Set(d, t), v);
+}
+
+// For 8-bit lanes, Iota0 might overflow.
+template <class V, HWY_IF_T_SIZE_V(V, 1)>
+HWY_API V InsertLane(const V v, size_t i, TFromV<V> t) {
+  const DFromV<V> d;
+  const auto zero = Zero(d);
+  const auto one = Set(d, 1);
+  const auto ge_i = Eq(detail::SlideUp(zero, one, i), one);
+  const auto is_i = SetOnlyFirst(ge_i);
+  return IfThenElse(RebindMask(d, is_i), Set(d, t), v);
+}
+
+// ------------------------------ OddEven
+
+namespace detail {
+
+// Faster version using a wide constant instead of Iota0 + AndS.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> IsEven(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> duw;
+  return RebindMask(d, detail::NeS(BitCast(du, Set(duw, 1)), 0u));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> IsEven(D d) {
+  const RebindToUnsigned<decltype(d)> du;  // Iota0 is unsigned only
+  return detail::EqS(detail::AndS(detail::Iota0(du), 1), 0);
+}
+
+// Also provide the negated form because there is no native CompressNot.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> IsOdd(D d) {
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(du)> duw;
+  return RebindMask(d, detail::EqS(BitCast(du, Set(duw, 1)), 0u));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> IsOdd(D d) {
+  const RebindToUnsigned<decltype(d)> du;  // Iota0 is unsigned only
+  return detail::NeS(detail::AndS(detail::Iota0(du), 1), 0);
+}
+
+}  // namespace detail
+
+template <class V>
+HWY_API V OddEven(const V a, const V b) {
+  return IfThenElse(detail::IsEven(DFromV<V>()), b, a);
+}
+
+// ------------------------------ DupEven (OddEven)
+template <class V>
+HWY_API V DupEven(const V v) {
+  const V up = detail::Slide1Up(v);
+  return OddEven(up, v);
+}
+
+// ------------------------------ DupOdd (OddEven)
+template <class V>
+HWY_API V DupOdd(const V v) {
+  const V down = detail::Slide1Down(v);
+  return OddEven(v, down);
+}
+
+// ------------------------------ OddEvenBlocks
+template <class V>
+HWY_API V OddEvenBlocks(const V a, const V b) {
+  const RebindToUnsigned<DFromV<V>> du;  // Iota0 is unsigned only
+  constexpr size_t kShift = CeilLog2(16 / sizeof(TFromV<V>));
+  const auto idx_block = ShiftRight<kShift>(detail::Iota0(du));
+  const auto is_even = detail::EqS(detail::AndS(idx_block, 1), 0);
+  return IfThenElse(is_even, b, a);
+}
+
+// ------------------------------ SwapAdjacentBlocks
+template <class V>
+HWY_API V SwapAdjacentBlocks(const V v) {
+  const DFromV<V> d;
+  const size_t lpb = detail::LanesPerBlock(d);
+  const V down = detail::SlideDown(v, lpb);
+  const V up = detail::SlideUp(v, v, lpb);
+  return OddEvenBlocks(up, down);
+}
+
+// ------------------------------ TableLookupLanes
+
+template <class D, class VI>
+HWY_API VFromD<RebindToUnsigned<D>> IndicesFromVec(D d, VI vec) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TFromV<VI>), "Index != lane");
+  const RebindToUnsigned<decltype(d)> du;  // instead of <D>: avoids unused d.
+  const auto indices = BitCast(du, vec);
+#if HWY_IS_DEBUG_BUILD
+  using TU = TFromD<decltype(du)>;
+  const size_t twice_num_of_lanes = Lanes(d) * 2;
+  HWY_DASSERT(AllTrue(
+      du, Eq(indices,
+             detail::AndS(indices, static_cast<TU>(twice_num_of_lanes - 1)))));
+#endif
+  return indices;
+}
+
+template <class D, typename TI>
+HWY_API VFromD<RebindToUnsigned<D>> SetTableIndices(D d, const TI* idx) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+  return IndicesFromVec(d, LoadU(Rebind<TI, D>(), idx));
+}
+
+// TODO(janwas): avoid using this for 8-bit; wrap in detail namespace.
+// For large 8-bit vectors, index overflow will lead to incorrect results.
+// Reverse already uses TableLookupLanes16 to prevent this.
+#define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) {    \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(v, idx,                       \
+                                                HWY_RVV_AVL(SEW, SHIFT));     \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_TABLE, TableLookupLanes, rgather, _ALL)
+#undef HWY_RVV_TABLE
+
+namespace detail {
+
+// Used by I8/U8 Reverse
+#define HWY_RVV_TABLE16(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,     \
+                        SHIFT, MLEN, NAME, OP)                               \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEWD, LMULD) idx) { \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(v, idx,                      \
+                                                HWY_RVV_AVL(SEW, SHIFT));    \
+  }
+
+HWY_RVV_FOREACH_UI08(HWY_RVV_TABLE16, TableLookupLanes16, rgatherei16, _EXT)
+#undef HWY_RVV_TABLE16
+
+// Used by Expand.
+#define HWY_RVV_MASKED_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,  \
+                             SHIFT, MLEN, NAME, OP)                            \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_M(MLEN) mask, HWY_RVV_V(BASE, SEW, LMUL) maskedoff,         \
+           HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) {     \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL##_mu(mask, maskedoff, v, idx,  \
+                                                     HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_MASKED_TABLE, MaskedTableLookupLanes, rgather, _ALL)
+#undef HWY_RVV_MASKED_TABLE
+
+#define HWY_RVV_MASKED_TABLE16(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD,       \
+                               LMULH, SHIFT, MLEN, NAME, OP)                   \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(HWY_RVV_M(MLEN) mask, HWY_RVV_V(BASE, SEW, LMUL) maskedoff,         \
+           HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEWD, LMULD) idx) {   \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL##_mu(mask, maskedoff, v, idx,  \
+                                                     HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+HWY_RVV_FOREACH_UI08(HWY_RVV_MASKED_TABLE16, MaskedTableLookupLanes16,
+                     rgatherei16, _EXT)
+#undef HWY_RVV_MASKED_TABLE16
+
+}  // namespace detail
+
+// ------------------------------ Reverse (TableLookupLanes)
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> Reverse(D d, VFromD<D> v) {
+  const Rebind<uint16_t, decltype(d)> du16;
+  const size_t N = Lanes(d);
+  const auto idx =
+      detail::ReverseSubS(detail::Iota0(du16), static_cast<uint16_t>(N - 1));
+  return detail::TableLookupLanes16(v, idx);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_POW2_GT_D(D, 2)>
+HWY_API VFromD<D> Reverse(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const Rebind<uint16_t, decltype(dh)> du16;
+  const size_t half_n = Lanes(dh);
+  const auto idx = detail::ReverseSubS(detail::Iota0(du16),
+                                       static_cast<uint16_t>(half_n - 1));
+  const auto reversed_lo = detail::TableLookupLanes16(LowerHalf(dh, v), idx);
+  const auto reversed_hi = detail::TableLookupLanes16(UpperHalf(dh, v), idx);
+  return Combine(d, reversed_lo, reversed_hi);
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> Reverse(D /* tag */, VFromD<D> v) {
+  const RebindToUnsigned<D> du;
+  using TU = TFromD<decltype(du)>;
+  const size_t N = Lanes(du);
+  const auto idx =
+      detail::ReverseSubS(detail::Iota0(du), static_cast<TU>(N - 1));
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Reverse2 (RotateRight, OddEven)
+
+// Per-target flags to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+// Shifting and adding requires fewer instructions than blending, but casting to
+// u32 only works for LMUL in [1/2, 8].
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint16_t, D>> du16;
+  return ResizeBitCast(d, RotateRight<8>(ResizeBitCast(du16, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint32_t, D>> du32;
+  return ResizeBitCast(d, RotateRight<16>(ResizeBitCast(du32, v)));
+}
+
+// Shifting and adding requires fewer instructions than blending, but casting to
+// u64 does not work for LMUL < 1.
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint64_t, D>> du64;
+  return ResizeBitCast(d, RotateRight<32>(ResizeBitCast(du64, v)));
+}
+
+template <class D, class V = VFromD<D>, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API V Reverse2(D /* tag */, const V v) {
+  const V up = detail::Slide1Up(v);
+  const V down = detail::Slide1Down(v);
+  return OddEven(up, down);
+}
+
+// ------------------------------ Reverse4 (TableLookupLanes)
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint16_t, D>> du16;
+  return ResizeBitCast(d, Reverse2(du16, ResizeBitCast(du16, Reverse2(d, v))));
+}
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const RebindToUnsigned<D> du;
+  const auto idx = detail::XorS(detail::Iota0(du), 3);
+  return BitCast(d, TableLookupLanes(BitCast(du, v), idx));
+}
+
+// ------------------------------ Reverse8 (TableLookupLanes)
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint32_t, D>> du32;
+  return ResizeBitCast(d, Reverse2(du32, ResizeBitCast(du32, Reverse4(d, v))));
+}
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const RebindToUnsigned<D> du;
+  const auto idx = detail::XorS(detail::Iota0(du), 7);
+  return BitCast(d, TableLookupLanes(BitCast(du, v), idx));
+}
+
+// ------------------------------ ReverseBlocks (Reverse, Shuffle01)
+template <class D, class V = VFromD<D>>
+HWY_API V ReverseBlocks(D d, V v) {
+  const detail::AdjustSimdTagToMinVecPow2<Repartition<uint64_t, D>> du64;
+  const size_t N = Lanes(du64);
+  const auto rev =
+      detail::ReverseSubS(detail::Iota0(du64), static_cast<uint64_t>(N - 1));
+  // Swap lo/hi u64 within each block
+  const auto idx = detail::XorS(rev, 1);
+  return ResizeBitCast(d, TableLookupLanes(ResizeBitCast(du64, v), idx));
+}
+
+// ------------------------------ Compress
+
+// RVV supports all lane types natively.
+#ifdef HWY_NATIVE_COMPRESS8
+#undef HWY_NATIVE_COMPRESS8
+#else
+#define HWY_NATIVE_COMPRESS8
+#endif
+
+template <typename T>
+struct CompressIsPartition {
+  enum { value = 0 };
+};
+
+#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                         SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                      \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) {          \
+    return __riscv_v##OP##_vm_##CHAR##SEW##LMUL(v, mask,                  \
+                                                HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_COMPRESS, Compress, compress, _ALL)
+#undef HWY_RVV_COMPRESS
+
+// ------------------------------ Expand
+
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+// >= 2-byte lanes: idx lanes will not overflow.
+template <class V, class M, HWY_IF_NOT_T_SIZE_V(V, 1)>
+HWY_API V Expand(V v, const M mask) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto idx = detail::MaskedIota(du, RebindMask(du, mask));
+  const V zero = Zero(d);
+  return detail::MaskedTableLookupLanes(mask, zero, v, idx);
+}
+
+// 1-byte lanes, LMUL < 8: promote idx to u16.
+template <class V, class M, HWY_IF_T_SIZE_V(V, 1), class D = DFromV<V>,
+          HWY_IF_POW2_LE_D(D, 2)>
+HWY_API V Expand(V v, const M mask) {
+  const D d;
+  const Rebind<uint16_t, decltype(d)> du16;
+  const auto idx = detail::MaskedIota(du16, RebindMask(du16, mask));
+  const V zero = Zero(d);
+  return detail::MaskedTableLookupLanes16(mask, zero, v, idx);
+}
+
+// 1-byte lanes, max LMUL: unroll 2x.
+template <class V, class M, HWY_IF_T_SIZE_V(V, 1), class D = DFromV<V>,
+          HWY_IF_POW2_GT_D(DFromV<V>, 2)>
+HWY_API V Expand(V v, const M mask) {
+  const D d;
+  const Half<D> dh;
+  const auto v0 = LowerHalf(dh, v);
+  // TODO(janwas): skip vec<->mask if we can cast masks.
+  const V vmask = VecFromMask(d, mask);
+  const auto m0 = MaskFromVec(LowerHalf(dh, vmask));
+
+  // Cannot just use UpperHalf, must shift by the number of inputs consumed.
+  const size_t count = CountTrue(dh, m0);
+  const auto v1 = detail::Trunc(detail::SlideDown(v, count));
+  const auto m1 = MaskFromVec(UpperHalf(dh, vmask));
+  return Combine(d, Expand(v1, m1), Expand(v0, m0));
+}
+
+// ------------------------------ LoadExpand
+template <class D>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  return Expand(LoadU(d, unaligned), mask);
+}
+
+// ------------------------------ CompressNot
+template <class V, class M>
+HWY_API V CompressNot(V v, const M mask) {
+  return Compress(v, Not(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+template <class V, class M>
+HWY_API V CompressBlocksNot(V v, const M mask) {
+  return CompressNot(v, mask);
+}
+
+// ------------------------------ CompressStore
+template <class V, class M, class D>
+HWY_API size_t CompressStore(const V v, const M mask, const D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  StoreU(Compress(v, mask), d, unaligned);
+  return CountTrue(d, mask);
+}
+
+// ------------------------------ CompressBlendedStore
+template <class V, class M, class D>
+HWY_API size_t CompressBlendedStore(const V v, const M mask, const D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const size_t count = CountTrue(d, mask);
+  detail::StoreN(count, Compress(v, mask), d, unaligned);
+  return count;
+}
+
+// ================================================== COMPARE (2)
+
+// ------------------------------ FindLastTrue
+
+template <class D>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> m) {
+  const RebindToSigned<decltype(d)> di;
+  const intptr_t fft_rev_idx =
+      FindFirstTrue(d, MaskFromVec(Reverse(di, VecFromMask(di, m))));
+  return (fft_rev_idx >= 0)
+             ? (static_cast<intptr_t>(Lanes(d) - 1) - fft_rev_idx)
+             : intptr_t{-1};
+}
+
+template <class D>
+HWY_API size_t FindKnownLastTrue(D d, MFromD<D> m) {
+  const RebindToSigned<decltype(d)> di;
+  const size_t fft_rev_idx =
+      FindKnownFirstTrue(d, MaskFromVec(Reverse(di, VecFromMask(di, m))));
+  return Lanes(d) - 1 - fft_rev_idx;
+}
+
+// ------------------------------ ConcatOdd (Compress)
+
+namespace detail {
+
+#define HWY_RVV_NARROW(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t kShift>                                                     \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEWD, LMULD) v) {    \
+    return __riscv_v##OP##_wx_##CHAR##SEW##LMUL(v, kShift,                     \
+                                                HWY_RVV_AVL(SEWD, SHIFT + 1)); \
+  }
+
+HWY_RVV_FOREACH_U08(HWY_RVV_NARROW, Narrow, nsrl, _EXT)
+HWY_RVV_FOREACH_U16(HWY_RVV_NARROW, Narrow, nsrl, _EXT)
+HWY_RVV_FOREACH_U32(HWY_RVV_NARROW, Narrow, nsrl, _EXT)
+#undef HWY_RVV_NARROW
+
+}  // namespace detail
+
+// Casting to wider and narrowing is the fastest for < 64-bit lanes.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8), HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kBits = sizeof(TFromD<D>) * 8;
+  const Twice<decltype(d)> dt;
+  const RepartitionToWide<RebindToUnsigned<decltype(dt)>> dtuw;
+  const VFromD<decltype(dtuw)> hl = BitCast(dtuw, Combine(dt, hi, lo));
+  return BitCast(d, detail::Narrow<kBits>(hl));
+}
+
+// 64-bit: Combine+Compress.
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Twice<decltype(d)> dt;
+  const VFromD<decltype(dt)> hl = Combine(dt, hi, lo);
+  return LowerHalf(d, Compress(hl, detail::IsOdd(dt)));
+}
+
+// Any type, max LMUL: Compress both, then Combine.
+template <class D, HWY_IF_POW2_GT_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  const MFromD<D> is_odd = detail::IsOdd(d);
+  const VFromD<decltype(d)> hi_odd = Compress(hi, is_odd);
+  const VFromD<decltype(d)> lo_odd = Compress(lo, is_odd);
+  return Combine(d, LowerHalf(dh, hi_odd), LowerHalf(dh, lo_odd));
+}
+
+// ------------------------------ ConcatEven (Compress)
+
+// Casting to wider and narrowing is the fastest for < 64-bit lanes.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 8), HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Twice<decltype(d)> dt;
+  const RepartitionToWide<RebindToUnsigned<decltype(dt)>> dtuw;
+  const VFromD<decltype(dtuw)> hl = BitCast(dtuw, Combine(dt, hi, lo));
+  return BitCast(d, detail::Narrow<0>(hl));
+}
+
+// 64-bit: Combine+Compress.
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Twice<decltype(d)> dt;
+  const VFromD<decltype(dt)> hl = Combine(dt, hi, lo);
+  return LowerHalf(d, Compress(hl, detail::IsEven(dt)));
+}
+
+// Any type, max LMUL: Compress both, then Combine.
+template <class D, HWY_IF_POW2_GT_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> dh;
+  const MFromD<D> is_even = detail::IsEven(d);
+  const VFromD<decltype(d)> hi_even = Compress(hi, is_even);
+  const VFromD<decltype(d)> lo_even = Compress(lo, is_even);
+  return Combine(d, LowerHalf(dh, hi_even), LowerHalf(dh, lo_even));
+}
+
+// ================================================== BLOCKWISE
+
+// ------------------------------ CombineShiftRightBytes
+template <size_t kBytes, class D, class V = VFromD<D>>
+HWY_API V CombineShiftRightBytes(const D d, const V hi, V lo) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  const auto hi8 = BitCast(d8, hi);
+  const auto lo8 = BitCast(d8, lo);
+  const auto hi_up = detail::SlideUp(hi8, hi8, 16 - kBytes);
+  const auto lo_down = detail::SlideDown(lo8, kBytes);
+  const auto is_lo = detail::FirstNPerBlock<16 - kBytes>(d8);
+  return BitCast(d, IfThenElse(is_lo, lo_down, hi_up));
+}
+
+// ------------------------------ CombineShiftRightLanes
+template <size_t kLanes, class D, class V = VFromD<D>>
+HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo) {
+  constexpr size_t kLanesUp = 16 / sizeof(TFromV<V>) - kLanes;
+  const auto hi_up = detail::SlideUp(hi, hi, kLanesUp);
+  const auto lo_down = detail::SlideDown(lo, kLanes);
+  const auto is_lo = detail::FirstNPerBlock<kLanesUp>(d);
+  return IfThenElse(is_lo, lo_down, hi_up);
+}
+
+// ------------------------------ Shuffle2301 (ShiftLeft)
+template <class V>
+HWY_API V Shuffle2301(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  const Repartition<uint64_t, decltype(d)> du64;
+  const auto v64 = BitCast(du64, v);
+  return BitCast(d, Or(ShiftRight<32>(v64), ShiftLeft<32>(v64)));
+}
+
+// ------------------------------ Shuffle2103
+template <class V>
+HWY_API V Shuffle2103(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  return CombineShiftRightLanes<3>(d, v, v);
+}
+
+// ------------------------------ Shuffle0321
+template <class V>
+HWY_API V Shuffle0321(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  return CombineShiftRightLanes<1>(d, v, v);
+}
+
+// ------------------------------ Shuffle1032
+template <class V>
+HWY_API V Shuffle1032(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 4, "Defined for 32-bit types");
+  return CombineShiftRightLanes<2>(d, v, v);
+}
+
+// ------------------------------ Shuffle01
+template <class V>
+HWY_API V Shuffle01(const V v) {
+  const DFromV<V> d;
+  static_assert(sizeof(TFromD<decltype(d)>) == 8, "Defined for 64-bit types");
+  return CombineShiftRightLanes<1>(d, v, v);
+}
+
+// ------------------------------ Shuffle0123
+template <class V>
+HWY_API V Shuffle0123(const V v) {
+  return Shuffle2301(Shuffle1032(v));
+}
+
+// ------------------------------ TableLookupBytes
+
+// Extends or truncates a vector to match the given d.
+namespace detail {
+
+template <class D>
+HWY_INLINE VFromD<D> ChangeLMUL(D /* d */, VFromD<D> v) {
+  return v;
+}
+
+// LMUL of VFromD<D> < LMUL of V: need to truncate v
+template <class D, class V,
+          hwy::EnableIf<IsSame<TFromD<D>, TFromV<V>>()>* = nullptr,
+          HWY_IF_POW2_LE_D(DFromV<VFromD<D>>, DFromV<V>().Pow2() - 1)>
+HWY_INLINE VFromD<D> ChangeLMUL(D d, V v) {
+  const DFromV<decltype(v)> d_from;
+  const Half<decltype(d_from)> dh_from;
+  static_assert(
+      DFromV<VFromD<decltype(dh_from)>>().Pow2() < DFromV<V>().Pow2(),
+      "The LMUL of VFromD<decltype(dh_from)> must be less than the LMUL of V");
+  static_assert(
+      DFromV<VFromD<D>>().Pow2() <= DFromV<VFromD<decltype(dh_from)>>().Pow2(),
+      "The LMUL of VFromD<D> must be less than or equal to the LMUL of "
+      "VFromD<decltype(dh_from)>");
+  return ChangeLMUL(d, Trunc(v));
+}
+
+// LMUL of VFromD<D> > LMUL of V: need to extend v
+template <class D, class V,
+          hwy::EnableIf<IsSame<TFromD<D>, TFromV<V>>()>* = nullptr,
+          HWY_IF_POW2_GT_D(DFromV<VFromD<D>>, DFromV<V>().Pow2())>
+HWY_INLINE VFromD<D> ChangeLMUL(D d, V v) {
+  const DFromV<decltype(v)> d_from;
+  const Twice<decltype(d_from)> dt_from;
+  static_assert(DFromV<VFromD<decltype(dt_from)>>().Pow2() > DFromV<V>().Pow2(),
+                "The LMUL of VFromD<decltype(dt_from)> must be greater than "
+                "the LMUL of V");
+  static_assert(
+      DFromV<VFromD<D>>().Pow2() >= DFromV<VFromD<decltype(dt_from)>>().Pow2(),
+      "The LMUL of VFromD<D> must be greater than or equal to the LMUL of "
+      "VFromD<decltype(dt_from)>");
+  return ChangeLMUL(d, Ext(dt_from, v));
+}
+
+}  // namespace detail
+
+template <class VT, class VI>
+HWY_API VI TableLookupBytes(const VT vt, const VI vi) {
+  const DFromV<VT> dt;  // T=table, I=index.
+  const DFromV<VI> di;
+  const Repartition<uint8_t, decltype(dt)> dt8;
+  const Repartition<uint8_t, decltype(di)> di8;
+  // Required for producing half-vectors with table lookups from a full vector.
+  // If we instead run at the LMUL of the index vector, lookups into the table
+  // would be truncated. Thus we run at the larger of the two LMULs and truncate
+  // the result vector to the original index LMUL.
+  constexpr int kPow2T = dt8.Pow2();
+  constexpr int kPow2I = di8.Pow2();
+  const Simd<uint8_t, MaxLanes(di8), HWY_MAX(kPow2T, kPow2I)> dm8;  // m=max
+  const auto vmt = detail::ChangeLMUL(dm8, BitCast(dt8, vt));
+  const auto vmi = detail::ChangeLMUL(dm8, BitCast(di8, vi));
+  auto offsets = detail::OffsetsOf128BitBlocks(dm8, detail::Iota0(dm8));
+  // If the table is shorter, wrap around offsets so they do not reference
+  // undefined lanes in the newly extended vmt.
+  if (kPow2T < kPow2I) {
+    offsets = detail::AndS(offsets, static_cast<uint8_t>(Lanes(dt8) - 1));
+  }
+  const auto out = TableLookupLanes(vmt, Add(vmi, offsets));
+  return BitCast(di, detail::ChangeLMUL(di8, out));
+}
+
+template <class VT, class VI>
+HWY_API VI TableLookupBytesOr0(const VT vt, const VI idx) {
+  const DFromV<VI> di;
+  const Repartition<int8_t, decltype(di)> di8;
+  const auto idx8 = BitCast(di8, idx);
+  const auto lookup = TableLookupBytes(vt, idx8);
+  return BitCast(di, IfThenZeroElse(detail::LtS(idx8, 0), lookup));
+}
+
+// ------------------------------ TwoTablesLookupLanes
+
+// TODO(janwas): special-case 8-bit lanes to safely handle VL >= 256
+template <class D, HWY_IF_POW2_LE_D(D, 2)>
+HWY_API VFromD<D> TwoTablesLookupLanes(D d, VFromD<D> a, VFromD<D> b,
+                                       VFromD<RebindToUnsigned<D>> idx) {
+  const Twice<decltype(d)> dt;
+  const RebindToUnsigned<decltype(dt)> dt_u;
+  const auto combined_tbl = Combine(dt, b, a);
+  const auto combined_idx = Combine(dt_u, idx, idx);
+  return LowerHalf(d, TableLookupLanes(combined_tbl, combined_idx));
+}
+
+template <class D, HWY_IF_POW2_GT_D(D, 2)>
+HWY_API VFromD<D> TwoTablesLookupLanes(D d, VFromD<D> a, VFromD<D> b,
+                                       VFromD<RebindToUnsigned<D>> idx) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  const size_t num_of_lanes = Lanes(d);
+  const auto idx_mod = detail::AndS(idx, static_cast<TU>(num_of_lanes - 1));
+  const auto sel_a_mask = Ne(idx, idx_mod);  // FALSE if a
+
+  const auto a_lookup_result = TableLookupLanes(a, idx_mod);
+  return detail::MaskedTableLookupLanes(sel_a_mask, a_lookup_result, b,
+                                        idx_mod);
+}
+
+template <class V>
+HWY_API V TwoTablesLookupLanes(V a, V b,
+                               VFromD<RebindToUnsigned<DFromV<V>>> idx) {
+  const DFromV<decltype(a)> d;
+  return TwoTablesLookupLanes(d, a, b, idx);
+}
+
+// ------------------------------ Broadcast
+template <int kLane, class V>
+HWY_API V Broadcast(const V v) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  HWY_DASSERT(0 <= kLane && kLane < detail::LanesPerBlock(d));
+  auto idx = detail::OffsetsOf128BitBlocks(d, detail::Iota0(du));
+  if (kLane != 0) {
+    idx = detail::AddS(idx, kLane);
+  }
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ BroadcastLane
+#ifdef HWY_NATIVE_BROADCASTLANE
+#undef HWY_NATIVE_BROADCASTLANE
+#else
+#define HWY_NATIVE_BROADCASTLANE
+#endif
+
+namespace detail {
+
+#define HWY_RVV_BROADCAST_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD,  \
+                               LMULH, SHIFT, MLEN, NAME, OP)              \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                      \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, size_t idx) {                    \
+    return __riscv_v##OP##_vx_##CHAR##SEW##LMUL(v, idx,                   \
+                                                HWY_RVV_AVL(SEW, SHIFT)); \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_BROADCAST_LANE, BroadcastLane, rgather, _ALL)
+#undef HWY_RVV_BROADCAST_LANE
+
+}  // namespace detail
+
+template <int kLane, class V>
+HWY_API V BroadcastLane(V v) {
+  static_assert(0 <= kLane && kLane < HWY_MAX_LANES_V(V), "Invalid lane");
+  return detail::BroadcastLane(v, static_cast<size_t>(kLane));
+}
+
+// ------------------------------ InsertBlock
+#ifdef HWY_NATIVE_BLK_INSERT_EXTRACT
+#undef HWY_NATIVE_BLK_INSERT_EXTRACT
+#else
+#define HWY_NATIVE_BLK_INSERT_EXTRACT
+#endif
+
+template <int kBlockIdx, class V>
+HWY_API V InsertBlock(V v, VFromD<BlockDFromD<DFromV<V>>> blk_to_insert) {
+  const DFromV<decltype(v)> d;
+  using TU = If<(sizeof(TFromV<V>) == 1 && DFromV<V>().Pow2() >= -2), uint16_t,
+                MakeUnsigned<TFromV<V>>>;
+  using TIdx = If<sizeof(TU) == 1, uint16_t, TU>;
+
+  const Repartition<TU, decltype(d)> du;
+  const Rebind<TIdx, decltype(du)> d_idx;
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+  constexpr size_t kMaxLanesPerBlock = 16 / sizeof(TU);
+
+  constexpr size_t kBlkByteOffset =
+      static_cast<size_t>(kBlockIdx) * kMaxLanesPerBlock;
+  const auto vu = BitCast(du, v);
+  const auto vblk = ResizeBitCast(du, blk_to_insert);
+  const auto vblk_shifted = detail::SlideUp(vblk, vblk, kBlkByteOffset);
+  const auto insert_mask = RebindMask(
+      du, detail::LtS(detail::SubS(detail::Iota0(d_idx),
+                                   static_cast<TIdx>(kBlkByteOffset)),
+                      static_cast<TIdx>(kMaxLanesPerBlock)));
+
+  return BitCast(d, IfThenElse(insert_mask, vblk_shifted, vu));
+}
+
+// ------------------------------ BroadcastBlock
+template <int kBlockIdx, class V, HWY_IF_POW2_LE_D(DFromV<V>, -3)>
+HWY_API V BroadcastBlock(V v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const Rebind<uint16_t, decltype(d)> du16;
+
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+
+  const auto idx = detail::AddS(detail::AndS(detail::Iota0(du16), uint16_t{15}),
+                                static_cast<uint16_t>(kBlockIdx * 16));
+  return BitCast(d, detail::TableLookupLanes16(BitCast(du8, v), idx));
+}
+
+template <int kBlockIdx, class V, HWY_IF_POW2_GT_D(DFromV<V>, -3)>
+HWY_API V BroadcastBlock(V v) {
+  const DFromV<decltype(v)> d;
+  using TU = If<sizeof(TFromV<V>) == 1, uint16_t, MakeUnsigned<TFromV<V>>>;
+  const Repartition<TU, decltype(d)> du;
+
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+  constexpr size_t kMaxLanesPerBlock = 16 / sizeof(TU);
+
+  const auto idx = detail::AddS(
+      detail::AndS(detail::Iota0(du), static_cast<TU>(kMaxLanesPerBlock - 1)),
+      static_cast<TU>(static_cast<size_t>(kBlockIdx) * kMaxLanesPerBlock));
+  return BitCast(d, TableLookupLanes(BitCast(du, v), idx));
+}
+
+// ------------------------------ ExtractBlock
+template <int kBlockIdx, class V>
+HWY_API VFromD<BlockDFromD<DFromV<V>>> ExtractBlock(V v) {
+  const DFromV<decltype(v)> d;
+  const BlockDFromD<decltype(d)> d_block;
+
+  static_assert(0 <= kBlockIdx && kBlockIdx < d.MaxBlocks(),
+                "Invalid block index");
+  constexpr size_t kMaxLanesPerBlock = 16 / sizeof(TFromD<decltype(d)>);
+  constexpr size_t kBlkByteOffset =
+      static_cast<size_t>(kBlockIdx) * kMaxLanesPerBlock;
+
+  return ResizeBitCast(d_block, detail::SlideDown(v, kBlkByteOffset));
+}
+
+// ------------------------------ ShiftLeftLanes
+
+template <size_t kLanes, class D, class V = VFromD<D>>
+HWY_API V ShiftLeftLanes(const D d, const V v) {
+  const RebindToSigned<decltype(d)> di;
+  const RebindToUnsigned<decltype(d)> du;
+  using TI = TFromD<decltype(di)>;
+  const auto shifted = detail::SlideUp(v, v, kLanes);
+  // Match x86 semantics by zeroing lower lanes in 128-bit blocks
+  const auto idx_mod =
+      detail::AndS(BitCast(di, detail::Iota0(du)),
+                   static_cast<TI>(detail::LanesPerBlock(di) - 1));
+  const auto clear = detail::LtS(idx_mod, static_cast<TI>(kLanes));
+  return IfThenZeroElse(clear, shifted);
+}
+
+template <size_t kLanes, class V>
+HWY_API V ShiftLeftLanes(const V v) {
+  return ShiftLeftLanes<kLanes>(DFromV<V>(), v);
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftLeftBytes(D d, const VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftLanes<kBytes>(BitCast(d8, v)));
+}
+
+template <int kBytes, class V>
+HWY_API V ShiftLeftBytes(const V v) {
+  return ShiftLeftBytes<kBytes>(DFromV<V>(), v);
+}
+
+// ------------------------------ ShiftRightLanes
+template <size_t kLanes, typename T, size_t N, int kPow2,
+          class V = VFromD<Simd<T, N, kPow2>>>
+HWY_API V ShiftRightLanes(const Simd<T, N, kPow2> d, V v) {
+  const RebindToSigned<decltype(d)> di;
+  const RebindToUnsigned<decltype(d)> du;
+  using TI = TFromD<decltype(di)>;
+  // For partial vectors, clear upper lanes so we shift in zeros.
+  if (N <= 16 / sizeof(T)) {
+    v = IfThenElseZero(FirstN(d, N), v);
+  }
+
+  const auto shifted = detail::SlideDown(v, kLanes);
+  // Match x86 semantics by zeroing upper lanes in 128-bit blocks
+  const size_t lpb = detail::LanesPerBlock(di);
+  const auto idx_mod =
+      detail::AndS(BitCast(di, detail::Iota0(du)), static_cast<TI>(lpb - 1));
+  const auto keep = detail::LtS(idx_mod, static_cast<TI>(lpb - kLanes));
+  return IfThenElseZero(keep, shifted);
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, class V = VFromD<D>>
+HWY_API V ShiftRightBytes(const D d, const V v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftRightLanes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ InterleaveLower
+
+template <class D, class V>
+HWY_API V InterleaveLower(D d, const V a, const V b) {
+  static_assert(IsSame<TFromD<D>, TFromV<V>>(), "D/V mismatch");
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const auto i = detail::Iota0(du);
+  const auto idx_mod = ShiftRight<1>(
+      detail::AndS(i, static_cast<TU>(detail::LanesPerBlock(du) - 1)));
+  const auto idx = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+template <class V>
+HWY_API V InterleaveLower(const V a, const V b) {
+  return InterleaveLower(DFromV<V>(), a, b);
+}
+
+// ------------------------------ InterleaveUpper
+
+template <class D, class V>
+HWY_API V InterleaveUpper(const D d, const V a, const V b) {
+  static_assert(IsSame<TFromD<D>, TFromV<V>>(), "D/V mismatch");
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const size_t lpb = detail::LanesPerBlock(du);
+  const auto i = detail::Iota0(du);
+  const auto idx_mod = ShiftRight<1>(detail::AndS(i, static_cast<TU>(lpb - 1)));
+  const auto idx_lower = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto idx = detail::AddS(idx_lower, static_cast<TU>(lpb / 2));
+  const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+// ------------------------------ ZipLower
+
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  const RepartitionToNarrow<DW> dn;
+  static_assert(IsSame<TFromD<decltype(dn)>, TFromV<V>>(), "D/V mismatch");
+  return BitCast(dw, InterleaveLower(dn, a, b));
+}
+
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+
+// ------------------------------ ZipUpper
+template <class DW, class V>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  const RepartitionToNarrow<DW> dn;
+  static_assert(IsSame<TFromD<decltype(dn)>, TFromV<V>>(), "D/V mismatch");
+  return BitCast(dw, InterleaveUpper(dn, a, b));
+}
+
+// ================================================== REDUCE
+
+// vector = f(vector, zero_m1)
+#define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <class D>                                                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                           \
+      NAME(D d, HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) {   \
+    return Set(d,                                                              \
+               GetLane(__riscv_v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1( \
+                   v, v0, Lanes(d))));                                         \
+  }
+
+// ------------------------------ SumOfLanes
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_REDUCE, RedSum, redsum, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedSum, fredusum, _ALL)
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> SumOfLanes(D d, const VFromD<D> v) {
+  const auto v0 = Zero(ScalableTag<TFromD<D>>());  // always m1
+  return detail::RedSum(d, v, v0);
+}
+
+template <class D>
+HWY_API TFromD<D> ReduceSum(D d, const VFromD<D> v) {
+  return GetLane(SumOfLanes(d, v));
+}
+
+// ------------------------------ MinOfLanes
+namespace detail {
+HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMin, redminu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMin, redmin, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMin, fredmin, _ALL)
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> MinOfLanes(D d, const VFromD<D> v) {
+  using T = TFromD<D>;
+  const ScalableTag<T> d1;  // always m1
+  const auto neutral = Set(d1, HighestValue<T>());
+  return detail::RedMin(d, v, neutral);
+}
+
+// ------------------------------ MaxOfLanes
+namespace detail {
+HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMax, redmaxu, _ALL)
+HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMax, redmax, _ALL)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMax, fredmax, _ALL)
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> MaxOfLanes(D d, const VFromD<D> v) {
+  using T = TFromD<D>;
+  const ScalableTag<T> d1;  // always m1
+  const auto neutral = Set(d1, LowestValue<T>());
+  return detail::RedMax(d, v, neutral);
+}
+
+#undef HWY_RVV_REDUCE
+
+// ================================================== Ops with dependencies
+
+// ------------------------------ LoadInterleaved2
+
+// Per-target flag to prevent generic_ops-inl.h from defining LoadInterleaved2.
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+// Requires Clang 16+, GCC 14+; otherwise emulated in generic_ops-inl.h.
+#if HWY_HAVE_TUPLE
+
+#define HWY_RVV_GET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT,   \
+                    MLEN, NAME, OP)                                           \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME##2(HWY_RVV_TUP(BASE, SEW, LMUL, 2) tup) {                          \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##x2_##CHAR##SEW##LMUL(tup,     \
+                                                                     kIndex); \
+  }                                                                           \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME##3(HWY_RVV_TUP(BASE, SEW, LMUL, 3) tup) {                          \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##x3_##CHAR##SEW##LMUL(tup,     \
+                                                                     kIndex); \
+  }                                                                           \
+  template <size_t kIndex>                                                    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME##4(HWY_RVV_TUP(BASE, SEW, LMUL, 4) tup) {                          \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##x4_##CHAR##SEW##LMUL(tup,     \
+                                                                     kIndex); \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_GET, Get, get, _LE2)
+#undef HWY_RVV_GET
+
+#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                    MLEN, NAME, OP)                                         \
+  template <size_t kIndex>                                                  \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 2) NAME##2(                          \
+      HWY_RVV_TUP(BASE, SEW, LMUL, 2) tup, HWY_RVV_V(BASE, SEW, LMUL) v) {  \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMUL##x2(     \
+        tup, kIndex, v);                                                    \
+  }                                                                         \
+  template <size_t kIndex>                                                  \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 3) NAME##3(                          \
+      HWY_RVV_TUP(BASE, SEW, LMUL, 3) tup, HWY_RVV_V(BASE, SEW, LMUL) v) {  \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMUL##x3(     \
+        tup, kIndex, v);                                                    \
+  }                                                                         \
+  template <size_t kIndex>                                                  \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 4) NAME##4(                          \
+      HWY_RVV_TUP(BASE, SEW, LMUL, 4) tup, HWY_RVV_V(BASE, SEW, LMUL) v) {  \
+    return __riscv_v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMUL##x4(     \
+        tup, kIndex, v);                                                    \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_SET, Set, set, _LE2)
+#undef HWY_RVV_SET
+
+// RVV does not provide vcreate, so implement using Set.
+#define HWY_RVV_CREATE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 2)                                      \
+      NAME##2(HWY_RVV_D(BASE, SEW, N, SHIFT) /*d*/,                            \
+              HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1) {  \
+    HWY_RVV_TUP(BASE, SEW, LMUL, 2) tup{};                                     \
+    tup = Set2<0>(tup, v0);                                                    \
+    tup = Set2<1>(tup, v1);                                                    \
+    return tup;                                                                \
+  }                                                                            \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 3) NAME##3(                             \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) /*d*/, HWY_RVV_V(BASE, SEW, LMUL) v0,     \
+      HWY_RVV_V(BASE, SEW, LMUL) v1, HWY_RVV_V(BASE, SEW, LMUL) v2) {          \
+    HWY_RVV_TUP(BASE, SEW, LMUL, 3) tup{};                                     \
+    tup = Set3<0>(tup, v0);                                                    \
+    tup = Set3<1>(tup, v1);                                                    \
+    tup = Set3<2>(tup, v2);                                                    \
+    return tup;                                                                \
+  }                                                                            \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_TUP(BASE, SEW, LMUL, 4)                                      \
+      NAME##4(HWY_RVV_D(BASE, SEW, N, SHIFT) /*d*/,                            \
+              HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1,    \
+              HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3) {  \
+    HWY_RVV_TUP(BASE, SEW, LMUL, 4) tup{};                                     \
+    tup = Set4<0>(tup, v0);                                                    \
+    tup = Set4<1>(tup, v1);                                                    \
+    tup = Set4<2>(tup, v2);                                                    \
+    tup = Set4<3>(tup, v3);                                                    \
+    return tup;                                                                \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_CREATE, Create, xx, _LE2_VIRT)
+#undef HWY_RVV_CREATE
+
+template <class D>
+using Vec2 = decltype(Create2(D(), Zero(D()), Zero(D())));
+template <class D>
+using Vec3 = decltype(Create3(D(), Zero(D()), Zero(D()), Zero(D())));
+template <class D>
+using Vec4 = decltype(Create4(D(), Zero(D()), Zero(D()), Zero(D()), Zero(D())));
+
+#define HWY_RVV_LOAD2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  template <size_t N>                                                         \
+  HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d,                         \
+                    const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned,      \
+                    HWY_RVV_V(BASE, SEW, LMUL) & v0,                          \
+                    HWY_RVV_V(BASE, SEW, LMUL) & v1) {                        \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 2) tup =                               \
+        __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x2(unaligned, Lanes(d)); \
+    v0 = Get2<0>(tup);                                                        \
+    v1 = Get2<1>(tup);                                                        \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_LOAD2, LoadInterleaved2, lseg2, _LE2_VIRT)
+#undef HWY_RVV_LOAD2
+
+// ------------------------------ LoadInterleaved3
+
+#define HWY_RVV_LOAD3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  template <size_t N>                                                         \
+  HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d,                         \
+                    const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned,      \
+                    HWY_RVV_V(BASE, SEW, LMUL) & v0,                          \
+                    HWY_RVV_V(BASE, SEW, LMUL) & v1,                          \
+                    HWY_RVV_V(BASE, SEW, LMUL) & v2) {                        \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 3) tup =                               \
+        __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x3(unaligned, Lanes(d)); \
+    v0 = Get3<0>(tup);                                                        \
+    v1 = Get3<1>(tup);                                                        \
+    v2 = Get3<2>(tup);                                                        \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_LOAD3, LoadInterleaved3, lseg3, _LE2_VIRT)
+#undef HWY_RVV_LOAD3
+
+// ------------------------------ LoadInterleaved4
+
+#define HWY_RVV_LOAD4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                      MLEN, NAME, OP)                                         \
+  template <size_t N>                                                         \
+  HWY_API void NAME(                                                          \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) d,                                       \
+      const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned,                    \
+      HWY_RVV_V(BASE, SEW, LMUL) & v0, HWY_RVV_V(BASE, SEW, LMUL) & v1,       \
+      HWY_RVV_V(BASE, SEW, LMUL) & v2, HWY_RVV_V(BASE, SEW, LMUL) & v3) {     \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 4) tup =                               \
+        __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x4(unaligned, Lanes(d)); \
+    v0 = Get4<0>(tup);                                                        \
+    v1 = Get4<1>(tup);                                                        \
+    v2 = Get4<2>(tup);                                                        \
+    v3 = Get4<3>(tup);                                                        \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_LOAD4, LoadInterleaved4, lseg4, _LE2_VIRT)
+#undef HWY_RVV_LOAD4
+
+// ------------------------------ StoreInterleaved2
+
+#define HWY_RVV_STORE2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v0,                             \
+                    HWY_RVV_V(BASE, SEW, LMUL) v1,                             \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                          \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) {           \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 2) tup = Create2(d, v0, v1);            \
+    __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x2(unaligned, tup, Lanes(d)); \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_STORE2, StoreInterleaved2, sseg2, _LE2_VIRT)
+#undef HWY_RVV_STORE2
+
+// ------------------------------ StoreInterleaved3
+
+#define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API void NAME(                                                           \
+      HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1,            \
+      HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_D(BASE, SEW, N, SHIFT) d,         \
+      HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) {                         \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 3) tup = Create3(d, v0, v1, v2);        \
+    __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x3(unaligned, tup, Lanes(d)); \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_STORE3, StoreInterleaved3, sseg3, _LE2_VIRT)
+#undef HWY_RVV_STORE3
+
+// ------------------------------ StoreInterleaved4
+
+#define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
+                       MLEN, NAME, OP)                                         \
+  template <size_t N>                                                          \
+  HWY_API void NAME(                                                           \
+      HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1,            \
+      HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3,            \
+      HWY_RVV_D(BASE, SEW, N, SHIFT) d,                                        \
+      HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) {                         \
+    const HWY_RVV_TUP(BASE, SEW, LMUL, 4) tup = Create4(d, v0, v1, v2, v3);    \
+    __riscv_v##OP##e##SEW##_v_##CHAR##SEW##LMUL##x4(unaligned, tup, Lanes(d)); \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_FOREACH(HWY_RVV_STORE4, StoreInterleaved4, sseg4, _LE2_VIRT)
+#undef HWY_RVV_STORE4
+
+#else  // !HWY_HAVE_TUPLE
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved2(D d, const T* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1) {
+  const VFromD<D> A = LoadU(d, unaligned);  // v1[1] v0[1] v1[0] v0[0]
+  const VFromD<D> B = LoadU(d, unaligned + Lanes(d));
+  v0 = ConcatEven(d, B, A);
+  v1 = ConcatOdd(d, B, A);
+}
+
+namespace detail {
+#define HWY_RVV_LOAD_STRIDED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                             SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                          \
+      NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d,                                  \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p, size_t stride) {      \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL(                          \
+        p, static_cast<ptrdiff_t>(stride), Lanes(d));                         \
+  }
+HWY_RVV_FOREACH(HWY_RVV_LOAD_STRIDED, LoadStrided, lse, _ALL_VIRT)
+#undef HWY_RVV_LOAD_STRIDED
+}  // namespace detail
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2) {
+  // Offsets are bytes, and this is not documented.
+  v0 = detail::LoadStrided(d, unaligned + 0, 3 * sizeof(T));
+  v1 = detail::LoadStrided(d, unaligned + 1, 3 * sizeof(T));
+  v2 = detail::LoadStrided(d, unaligned + 2, 3 * sizeof(T));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                              VFromD<D>& v0, VFromD<D>& v1, VFromD<D>& v2,
+                              VFromD<D>& v3) {
+  // Offsets are bytes, and this is not documented.
+  v0 = detail::LoadStrided(d, unaligned + 0, 4 * sizeof(T));
+  v1 = detail::LoadStrided(d, unaligned + 1, 4 * sizeof(T));
+  v2 = detail::LoadStrided(d, unaligned + 2, 4 * sizeof(T));
+  v3 = detail::LoadStrided(d, unaligned + 3, 4 * sizeof(T));
+}
+
+// Not 64-bit / max LMUL: interleave via promote, slide, OddEven.
+template <class D, typename T = TFromD<D>, HWY_IF_NOT_T_SIZE_D(D, 8),
+          HWY_IF_POW2_LE_D(D, 2)>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<D> du;
+  const Twice<RepartitionToWide<decltype(du)>> duw;
+  const Twice<decltype(d)> dt;
+  // Interleave with zero by promoting to wider (unsigned) type.
+  const VFromD<decltype(dt)> w0 = BitCast(dt, PromoteTo(duw, BitCast(du, v0)));
+  const VFromD<decltype(dt)> w1 = BitCast(dt, PromoteTo(duw, BitCast(du, v1)));
+  // OR second vector into the zero-valued lanes (faster than OddEven).
+  StoreU(Or(w0, detail::Slide1Up(w1)), dt, unaligned);
+}
+
+// Can promote, max LMUL: two half-length
+template <class D, typename T = TFromD<D>, HWY_IF_NOT_T_SIZE_D(D, 8),
+          HWY_IF_POW2_GT_D(D, 2)>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  StoreInterleaved2(LowerHalf(dh, v0), LowerHalf(dh, v1), d, unaligned);
+  StoreInterleaved2(UpperHalf(dh, v0), UpperHalf(dh, v1), d,
+                    unaligned + Lanes(d));
+}
+
+namespace detail {
+#define HWY_RVV_STORE_STRIDED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                              SHIFT, MLEN, NAME, OP)                           \
+  template <size_t N>                                                          \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v,                              \
+                    HWY_RVV_D(BASE, SEW, N, SHIFT) d,                          \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p, size_t stride) {    \
+    return __riscv_v##OP##SEW##_v_##CHAR##SEW##LMUL(                           \
+        p, static_cast<ptrdiff_t>(stride), v, Lanes(d));                       \
+  }
+HWY_RVV_FOREACH(HWY_RVV_STORE_STRIDED, StoreStrided, sse, _ALL_VIRT)
+#undef HWY_RVV_STORE_STRIDED
+}  // namespace detail
+
+// 64-bit: strided
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API void StoreInterleaved2(VFromD<D> v0, VFromD<D> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  // Offsets are bytes, and this is not documented.
+  detail::StoreStrided(v0, d, unaligned + 0, 2 * sizeof(T));
+  detail::StoreStrided(v1, d, unaligned + 1, 2 * sizeof(T));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreInterleaved3(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, D d,
+                               T* HWY_RESTRICT unaligned) {
+  // Offsets are bytes, and this is not documented.
+  detail::StoreStrided(v0, d, unaligned + 0, 3 * sizeof(T));
+  detail::StoreStrided(v1, d, unaligned + 1, 3 * sizeof(T));
+  detail::StoreStrided(v2, d, unaligned + 2, 3 * sizeof(T));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreInterleaved4(VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                               VFromD<D> v3, D d, T* HWY_RESTRICT unaligned) {
+  // Offsets are bytes, and this is not documented.
+  detail::StoreStrided(v0, d, unaligned + 0, 4 * sizeof(T));
+  detail::StoreStrided(v1, d, unaligned + 1, 4 * sizeof(T));
+  detail::StoreStrided(v2, d, unaligned + 2, 4 * sizeof(T));
+  detail::StoreStrided(v3, d, unaligned + 3, 4 * sizeof(T));
+}
+
+#endif  // HWY_HAVE_TUPLE
+
+// ------------------------------ ResizeBitCast
+
+template <class D, class FromV>
+HWY_API VFromD<D> ResizeBitCast(D /*d*/, FromV v) {
+  const DFromV<decltype(v)> d_from;
+  const Repartition<uint8_t, decltype(d_from)> du8_from;
+  const DFromV<VFromD<D>> d_to;
+  const Repartition<uint8_t, decltype(d_to)> du8_to;
+  return BitCast(d_to, detail::ChangeLMUL(du8_to, BitCast(du8_from, v)));
+}
+
+// ------------------------------ PopulationCount (ShiftRight)
+
+// Handles LMUL < 2 or capped vectors, which generic_ops-inl cannot.
+template <typename V, class D = DFromV<V>, HWY_IF_U8_D(D),
+          hwy::EnableIf<D().Pow2() < 1 || D().MaxLanes() < 16>* = nullptr>
+HWY_API V PopulationCount(V v) {
+  // See https://arxiv.org/pdf/1611.07612.pdf, Figure 3
+  v = Sub(v, detail::AndS(ShiftRight<1>(v), 0x55));
+  v = Add(detail::AndS(ShiftRight<2>(v), 0x33), detail::AndS(v, 0x33));
+  return detail::AndS(Add(v, ShiftRight<4>(v)), 0x0F);
+}
+
+// ------------------------------ LoadDup128
+
+template <class D>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* const HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  // Make sure that no more than 16 bytes are loaded from p
+  constexpr int kLoadPow2 = d.Pow2();
+  constexpr size_t kMaxLanesToLoad =
+      HWY_MIN(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>));
+  constexpr size_t kLoadN = D::template NewN<kLoadPow2, kMaxLanesToLoad>();
+  const Simd<TFromD<D>, kLoadN, kLoadPow2> d_load;
+  static_assert(d_load.MaxBytes() <= 16,
+                "d_load.MaxBytes() <= 16 must be true");
+  static_assert((d.MaxBytes() < 16) || (d_load.MaxBytes() == 16),
+                "d_load.MaxBytes() == 16 must be true if d.MaxBytes() >= 16 is "
+                "true");
+  static_assert((d.MaxBytes() >= 16) || (d_load.MaxBytes() == d.MaxBytes()),
+                "d_load.MaxBytes() == d.MaxBytes() must be true if "
+                "d.MaxBytes() < 16 is true");
+
+  const VFromD<D> loaded = Load(d_load, p);
+  if (d.MaxBytes() <= 16) return loaded;
+
+  // idx must be unsigned for TableLookupLanes.
+  using TU = TFromD<decltype(du)>;
+  const TU mask = static_cast<TU>(detail::LanesPerBlock(d) - 1);
+  // Broadcast the first block.
+  const VFromD<RebindToUnsigned<D>> idx = detail::AndS(detail::Iota0(du), mask);
+  // Safe even for 8-bit lanes because indices never exceed 15.
+  return TableLookupLanes(loaded, idx);
+}
+
+// ------------------------------ LoadMaskBits
+
+// Support all combinations of T and SHIFT(LMUL) without explicit overloads for
+// each. First overload for MLEN=1..64.
+namespace detail {
+
+// Maps D to MLEN (wrapped in SizeTag), such that #mask_bits = VLEN/MLEN. MLEN
+// increases with lane size and decreases for increasing LMUL. Cap at 64, the
+// largest supported by HWY_RVV_FOREACH_B (and intrinsics), for virtual LMUL
+// e.g. vuint16mf8_t: (8*2 << 3) == 128.
+template <class D>
+using MaskTag = hwy::SizeTag<HWY_MIN(
+    64, detail::ScaleByPower(8 * sizeof(TFromD<D>), -D().Pow2()))>;
+
+#define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)                \
+  HWY_INLINE HWY_RVV_M(MLEN)                                              \
+      NAME(hwy::SizeTag<MLEN> /* tag */, const uint8_t* bits, size_t N) { \
+    return __riscv_v##OP##_v_b##MLEN(bits, N);                            \
+  }
+HWY_RVV_FOREACH_B(HWY_RVV_LOAD_MASK_BITS, LoadMaskBits, lm)
+#undef HWY_RVV_LOAD_MASK_BITS
+}  // namespace detail
+
+template <class D, class MT = detail::MaskTag<D>>
+HWY_API auto LoadMaskBits(D d, const uint8_t* bits)
+    -> decltype(detail::LoadMaskBits(MT(), bits, Lanes(d))) {
+  return detail::LoadMaskBits(MT(), bits, Lanes(d));
+}
+
+// ------------------------------ StoreMaskBits
+#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)               \
+  template <class D>                                                      \
+  HWY_API size_t NAME(D d, HWY_RVV_M(MLEN) m, uint8_t* bits) {            \
+    const size_t N = Lanes(d);                                            \
+    __riscv_v##OP##_v_b##MLEN(bits, m, N);                                \
+    /* Non-full byte, need to clear the undefined upper bits. */          \
+    /* Use MaxLanes and sizeof(T) to move some checks to compile-time. */ \
+    constexpr bool kLessThan8 =                                           \
+        detail::ScaleByPower(16 / sizeof(TFromD<D>), d.Pow2()) < 8;       \
+    if (MaxLanes(d) < 8 || (kLessThan8 && N < 8)) {                       \
+      const int mask = (1 << N) - 1;                                      \
+      bits[0] = static_cast<uint8_t>(bits[0] & mask);                     \
+    }                                                                     \
+    return (N + 7) / 8;                                                   \
+  }
+HWY_RVV_FOREACH_B(HWY_RVV_STORE_MASK_BITS, StoreMaskBits, sm)
+#undef HWY_RVV_STORE_MASK_BITS
+
+// ------------------------------ CompressBits, CompressBitsStore (LoadMaskBits)
+
+template <class V>
+HWY_INLINE V CompressBits(V v, const uint8_t* HWY_RESTRICT bits) {
+  return Compress(v, LoadMaskBits(DFromV<V>(), bits));
+}
+
+template <class D>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  return CompressStore(v, LoadMaskBits(d, bits), d, unaligned);
+}
+
+// ------------------------------ FirstN (Iota0, Lt, RebindMask, SlideUp)
+
+// Disallow for 8-bit because Iota is likely to overflow.
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const RebindToUnsigned<D> du;
+  using TU = TFromD<decltype(du)>;
+  return RebindMask(d, detail::LtS(detail::Iota0(du), static_cast<TU>(n)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const auto zero = Zero(d);
+  const auto one = Set(d, 1);
+  return Eq(detail::SlideUp(one, zero, n), one);
+}
+
+// ------------------------------ Neg (Sub)
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Neg(const V v) {
+  return detail::ReverseSubS(v, 0);
+}
+
+// vector = f(vector), but argument is repeated
+#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
+                           SHIFT, MLEN, NAME, OP)                           \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {   \
+    return __riscv_v##OP##_vv_##CHAR##SEW##LMUL(v, v,                       \
+                                                HWY_RVV_AVL(SEW, SHIFT));   \
+  }
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV2, Neg, fsgnjn, _ALL)
+
+#if !HWY_HAVE_FLOAT16
+
+template <class V, HWY_IF_U16_D(DFromV<V>)>  // float16_t
+HWY_API V Neg(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return BitCast(d, Xor(BitCast(du, v), Set(du, SignMask<TU>())));
+}
+
+#endif  // !HWY_HAVE_FLOAT16
+
+// ------------------------------ Abs (Max, Neg)
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Abs(const V v) {
+  return Max(v, Neg(v));
+}
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV2, Abs, fsgnjx, _ALL)
+
+#undef HWY_RVV_RETV_ARGV2
+
+// ------------------------------ AbsDiff (Abs, Sub)
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V AbsDiff(const V a, const V b) {
+  return Abs(Sub(a, b));
+}
+
+// ------------------------------ Round  (NearestInt, ConvertTo, CopySign)
+
+// IEEE-754 roundToIntegralTiesToEven returns floating-point, but we do not have
+// a dedicated instruction for that. Rounding to integer and converting back to
+// float is correct except when the input magnitude is large, in which case the
+// input was already an integer (because mantissa >> exponent is zero).
+
+namespace detail {
+enum RoundingModes { kNear, kTrunc, kDown, kUp };
+
+template <class V>
+HWY_INLINE auto UseInt(const V v) -> decltype(MaskFromVec(v)) {
+  return detail::LtS(Abs(v), MantissaEnd<TFromV<V>>());
+}
+
+}  // namespace detail
+
+template <class V>
+HWY_API V Round(const V v) {
+  const DFromV<V> df;
+
+  const auto integer = NearestInt(v);  // round using current mode
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Trunc (ConvertTo)
+template <class V>
+HWY_API V Trunc(const V v) {
+  const DFromV<V> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Ceil
+template <class V>
+HWY_API V Ceil(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kUp));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Floor
+template <class V>
+HWY_API V Floor(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kDown));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Floating-point classification (Ne)
+
+// vfclass does not help because it would require 3 instructions (to AND and
+// then compare the bits), whereas these are just 1-3 integer instructions.
+
+template <class V>
+HWY_API MFromD<DFromV<V>> IsNaN(const V v) {
+  return Ne(v, v);
+}
+
+template <class V, class D = DFromV<V>>
+HWY_API MFromD<D> IsInf(const V v) {
+  const D d;
+  const RebindToSigned<decltype(d)> di;
+  using T = TFromD<D>;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, detail::EqS(Add(vi, vi), hwy::MaxExponentTimes2<T>()));
+}
+
+// Returns whether normal/subnormal/zero.
+template <class V, class D = DFromV<V>>
+HWY_API MFromD<D> IsFinite(const V v) {
+  const D d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  using T = TFromD<D>;
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, detail::LtS(exp, hwy::MaxExponentField<T>()));
+}
+
+// ------------------------------ Iota (ConvertTo)
+
+template <class D, HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  return detail::AddS(detail::Iota0(d), first);
+}
+
+template <class D, HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  return detail::AddS(BitCast(d, detail::Iota0(du)), first);
+}
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  const RebindToSigned<D> di;
+  return detail::AddS(ConvertTo(d, BitCast(di, detail::Iota0(du))), first);
+}
+
+// ------------------------------ MulEven/Odd (Mul, OddEven)
+
+template <class V, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4)),
+          class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> MulEven(const V a, const V b) {
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  return BitCast(DW(), OddEven(detail::Slide1Up(hi), lo));
+}
+
+template <class V, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4)),
+          class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> MulOdd(const V a, const V b) {
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  return BitCast(DW(), OddEven(hi, detail::Slide1Down(lo)));
+}
+
+// There is no 64x64 vwmul.
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V MulEven(const V a, const V b) {
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  return OddEven(detail::Slide1Up(hi), lo);
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 8)>
+HWY_INLINE V MulOdd(const V a, const V b) {
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  return OddEven(hi, detail::Slide1Down(lo));
+}
+
+// ------------------------------ ReorderDemote2To (OddEven, Combine)
+
+template <size_t N, int kPow2>
+HWY_API VFromD<Simd<bfloat16_t, N, kPow2>> ReorderDemote2To(
+    Simd<bfloat16_t, N, kPow2> dbf16,
+    VFromD<RepartitionToWide<decltype(dbf16)>> a,
+    VFromD<RepartitionToWide<decltype(dbf16)>> b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const RebindToUnsigned<DFromV<decltype(a)>> du32;
+  const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+// If LMUL is not the max, Combine first to avoid another DemoteTo.
+template <class DN, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>),
+          HWY_IF_POW2_LE_D(DN, 2), class V, HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Rebind<TFromV<V>, DN> dt;
+  const VFromD<decltype(dt)> ab = Combine(dt, b, a);
+  return DemoteTo(dn, ab);
+}
+
+template <class DN, HWY_IF_UNSIGNED_D(DN), HWY_IF_POW2_LE_D(DN, 2), class V,
+          HWY_IF_UNSIGNED_V(V), HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Rebind<TFromV<V>, DN> dt;
+  const VFromD<decltype(dt)> ab = Combine(dt, b, a);
+  return DemoteTo(dn, ab);
+}
+
+// Max LMUL: must DemoteTo first, then Combine.
+template <class DN, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>),
+          HWY_IF_POW2_GT_D(DN, 2), class V, HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  const VFromD<decltype(dnh)> demoted_a = DemoteTo(dnh, a);
+  const VFromD<decltype(dnh)> demoted_b = DemoteTo(dnh, b);
+  return Combine(dn, demoted_b, demoted_a);
+}
+
+template <class DN, HWY_IF_UNSIGNED_D(DN), HWY_IF_POW2_GT_D(DN, 2), class V,
+          HWY_IF_UNSIGNED_V(V), HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  const VFromD<decltype(dnh)> demoted_a = DemoteTo(dnh, a);
+  const VFromD<decltype(dnh)> demoted_b = DemoteTo(dnh, b);
+  return Combine(dn, demoted_b, demoted_a);
+}
+
+// If LMUL is not the max, Combine first to avoid another DemoteTo.
+template <class DN, HWY_IF_BF16_D(DN), HWY_IF_POW2_LE_D(DN, 2), class V,
+          HWY_IF_F32_D(DFromV<V>),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> OrderedDemote2To(DN dn, V a, V b) {
+  const Rebind<TFromV<V>, DN> dt;
+  const VFromD<decltype(dt)> ab = Combine(dt, b, a);
+  return DemoteTo(dn, ab);
+}
+
+// Max LMUL: must DemoteTo first, then Combine.
+template <class DN, HWY_IF_BF16_D(DN), HWY_IF_POW2_GT_D(DN, 2), class V,
+          HWY_IF_F32_D(DFromV<V>),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> OrderedDemote2To(DN dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+  const RebindToUnsigned<decltype(dnh)> dnh_u;
+  const auto demoted_a = BitCast(dnh_u, DemoteTo(dnh, a));
+  const auto demoted_b = BitCast(dnh_u, DemoteTo(dnh, b));
+  return BitCast(dn, Combine(dn_u, demoted_b, demoted_a));
+}
+
+template <class DN, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>), class V,
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2),
+          class V2 = VFromD<Repartition<TFromV<V>, DN>>,
+          hwy::EnableIf<DFromV<V>().Pow2() == DFromV<V2>().Pow2()>* = nullptr>
+HWY_API VFromD<DN> OrderedDemote2To(DN dn, V a, V b) {
+  return ReorderDemote2To(dn, a, b);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 df32, V16 a, V16 b) {
+  const RebindToUnsigned<decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be),
+                Mul(BitCast(df32, ao), BitCast(df32, bo)));
+}
+
+template <class D, HWY_IF_I32_D(D), class VI16>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D d32, VI16 a, VI16 b) {
+  using VI32 = VFromD<decltype(d32)>;
+  // Manual sign extension requires two shifts for even lanes.
+  const VI32 ae = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, a)));
+  const VI32 be = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, b)));
+  const VI32 ao = ShiftRight<16>(BitCast(d32, a));
+  const VI32 bo = ShiftRight<16>(BitCast(d32, b));
+  return Add(Mul(ae, be), Mul(ao, bo));
+}
+
+template <class D, HWY_IF_U32_D(D), class VI16>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D du32, VI16 a, VI16 b) {
+  using VU32 = VFromD<decltype(du32)>;
+  // Manual sign extension requires two shifts for even lanes.
+  const VU32 ae = detail::AndS(BitCast(du32, a), uint32_t{0x0000FFFFu});
+  const VU32 be = detail::AndS(BitCast(du32, b), uint32_t{0x0000FFFFu});
+  const VU32 ao = ShiftRight<16>(BitCast(du32, a));
+  const VU32 bo = ShiftRight<16>(BitCast(du32, b));
+  return Add(Mul(ae, be), Mul(ao, bo));
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+namespace detail {
+
+// Non-overloaded wrapper function so we can define DF32 in template args.
+template <size_t N, int kPow2, class DF32 = Simd<float, N, kPow2>,
+          class VF32 = VFromD<DF32>,
+          class DBF16 = Repartition<bfloat16_t, Simd<float, N, kPow2>>>
+HWY_API VF32 ReorderWidenMulAccumulateBF16(Simd<float, N, kPow2> df32,
+                                           VFromD<DBF16> a, VFromD<DBF16> b,
+                                           const VF32 sum0, VF32& sum1) {
+  const RebindToUnsigned<DF32> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+#define HWY_RVV_WIDEN_MACC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH,    \
+                           SHIFT, MLEN, NAME, OP)                              \
+  template <size_t N>                                                          \
+  HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME(                                   \
+      HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEWD, LMULD) sum, \
+      HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) {            \
+    return __riscv_v##OP##CHAR##SEWD##LMULD(sum, a, b, Lanes(d));              \
+  }
+
+HWY_RVV_FOREACH_I16(HWY_RVV_WIDEN_MACC, WidenMulAcc, wmacc_vv_, _EXT_VIRT)
+HWY_RVV_FOREACH_U16(HWY_RVV_WIDEN_MACC, WidenMulAcc, wmaccu_vv_, _EXT_VIRT)
+#undef HWY_RVV_WIDEN_MACC
+
+// If LMUL is not the max, we can WidenMul first (3 instructions).
+template <class D32, HWY_IF_POW2_LE_D(D32, 2), class V32 = VFromD<D32>,
+          class D16 = RepartitionToNarrow<D32>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulateI16(D32 d32, VFromD<D16> a,
+                                                 VFromD<D16> b, const V32 sum0,
+                                                 V32& sum1) {
+  const Twice<decltype(d32)> d32t;
+  using V32T = VFromD<decltype(d32t)>;
+  V32T sum = Combine(d32t, sum1, sum0);
+  sum = detail::WidenMulAcc(d32t, sum, a, b);
+  sum1 = UpperHalf(d32, sum);
+  return LowerHalf(d32, sum);
+}
+
+// Max LMUL: must LowerHalf first (4 instructions).
+template <class D32, HWY_IF_POW2_GT_D(D32, 2), class V32 = VFromD<D32>,
+          class D16 = RepartitionToNarrow<D32>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulateI16(D32 d32, VFromD<D16> a,
+                                                 VFromD<D16> b, const V32 sum0,
+                                                 V32& sum1) {
+  const Half<D16> d16h;
+  using V16H = VFromD<decltype(d16h)>;
+  const V16H a0 = LowerHalf(d16h, a);
+  const V16H a1 = UpperHalf(d16h, a);
+  const V16H b0 = LowerHalf(d16h, b);
+  const V16H b1 = UpperHalf(d16h, b);
+  sum1 = detail::WidenMulAcc(d32, sum1, a1, b1);
+  return detail::WidenMulAcc(d32, sum0, a0, b0);
+}
+
+// If LMUL is not the max, we can WidenMul first (3 instructions).
+template <class D32, HWY_IF_POW2_LE_D(D32, 2), class V32 = VFromD<D32>,
+          class D16 = RepartitionToNarrow<D32>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulateU16(D32 d32, VFromD<D16> a,
+                                                 VFromD<D16> b, const V32 sum0,
+                                                 V32& sum1) {
+  const Twice<decltype(d32)> d32t;
+  using V32T = VFromD<decltype(d32t)>;
+  V32T sum = Combine(d32t, sum1, sum0);
+  sum = detail::WidenMulAcc(d32t, sum, a, b);
+  sum1 = UpperHalf(d32, sum);
+  return LowerHalf(d32, sum);
+}
+
+// Max LMUL: must LowerHalf first (4 instructions).
+template <class D32, HWY_IF_POW2_GT_D(D32, 2), class V32 = VFromD<D32>,
+          class D16 = RepartitionToNarrow<D32>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulateU16(D32 d32, VFromD<D16> a,
+                                                 VFromD<D16> b, const V32 sum0,
+                                                 V32& sum1) {
+  const Half<D16> d16h;
+  using V16H = VFromD<decltype(d16h)>;
+  const V16H a0 = LowerHalf(d16h, a);
+  const V16H a1 = UpperHalf(d16h, a);
+  const V16H b0 = LowerHalf(d16h, b);
+  const V16H b1 = UpperHalf(d16h, b);
+  sum1 = detail::WidenMulAcc(d32, sum1, a1, b1);
+  return detail::WidenMulAcc(d32, sum0, a0, b0);
+}
+
+}  // namespace detail
+
+template <size_t N, int kPow2, class VN, class VW>
+HWY_API VW ReorderWidenMulAccumulate(Simd<float, N, kPow2> d32, VN a, VN b,
+                                     const VW sum0, VW& sum1) {
+  return detail::ReorderWidenMulAccumulateBF16(d32, a, b, sum0, sum1);
+}
+
+template <size_t N, int kPow2, class VN, class VW>
+HWY_API VW ReorderWidenMulAccumulate(Simd<int32_t, N, kPow2> d32, VN a, VN b,
+                                     const VW sum0, VW& sum1) {
+  return detail::ReorderWidenMulAccumulateI16(d32, a, b, sum0, sum1);
+}
+
+template <size_t N, int kPow2, class VN, class VW>
+HWY_API VW ReorderWidenMulAccumulate(Simd<uint32_t, N, kPow2> d32, VN a, VN b,
+                                     const VW sum0, VW& sum1) {
+  return detail::ReorderWidenMulAccumulateU16(d32, a, b, sum0, sum1);
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+
+template <class VW, HWY_IF_SIGNED_V(VW)>  // vint32_t*
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  // vwmacc doubles LMUL, so we require a pairwise sum here. This op is
+  // expected to be less frequent than ReorderWidenMulAccumulate, hence it's
+  // preferable to do the extra work here rather than do manual odd/even
+  // extraction there.
+  const DFromV<VW> di32;
+  const RebindToUnsigned<decltype(di32)> du32;
+  const Twice<decltype(di32)> di32x2;
+  const RepartitionToWide<decltype(di32x2)> di64x2;
+  const RebindToUnsigned<decltype(di64x2)> du64x2;
+  const auto combined = BitCast(di64x2, Combine(di32x2, sum1, sum0));
+  // Isolate odd/even int32 in int64 lanes.
+  const auto even = ShiftRight<32>(ShiftLeft<32>(combined));  // sign extend
+  const auto odd = ShiftRight<32>(combined);
+  return BitCast(di32, TruncateTo(du32, BitCast(du64x2, Add(even, odd))));
+}
+
+// For max LMUL, we cannot Combine again and instead manually unroll.
+HWY_API vint32m8_t RearrangeToOddPlusEven(vint32m8_t sum0, vint32m8_t sum1) {
+  const DFromV<vint32m8_t> d;
+  const Half<decltype(d)> dh;
+  const vint32m4_t lo =
+      RearrangeToOddPlusEven(LowerHalf(sum0), UpperHalf(dh, sum0));
+  const vint32m4_t hi =
+      RearrangeToOddPlusEven(LowerHalf(sum1), UpperHalf(dh, sum1));
+  return Combine(d, hi, lo);
+}
+
+template <class VW, HWY_IF_UNSIGNED_V(VW)>  // vuint32_t*
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  // vwmacc doubles LMUL, so we require a pairwise sum here. This op is
+  // expected to be less frequent than ReorderWidenMulAccumulate, hence it's
+  // preferable to do the extra work here rather than do manual odd/even
+  // extraction there.
+  const DFromV<VW> du32;
+  const Twice<decltype(du32)> du32x2;
+  const RepartitionToWide<decltype(du32x2)> du64x2;
+  const auto combined = BitCast(du64x2, Combine(du32x2, sum1, sum0));
+  // Isolate odd/even int32 in int64 lanes.
+  const auto even = detail::AndS(combined, uint64_t{0xFFFFFFFFu});
+  const auto odd = ShiftRight<32>(combined);
+  return TruncateTo(du32, Add(even, odd));
+}
+
+// For max LMUL, we cannot Combine again and instead manually unroll.
+HWY_API vuint32m8_t RearrangeToOddPlusEven(vuint32m8_t sum0, vuint32m8_t sum1) {
+  const DFromV<vuint32m8_t> d;
+  const Half<decltype(d)> dh;
+  const vuint32m4_t lo =
+      RearrangeToOddPlusEven(LowerHalf(sum0), UpperHalf(dh, sum0));
+  const vuint32m4_t hi =
+      RearrangeToOddPlusEven(LowerHalf(sum1), UpperHalf(dh, sum1));
+  return Combine(d, hi, lo);
+}
+
+template <class VW, HWY_IF_FLOAT_V(VW)>  // vfloat*
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  return Add(sum0, sum1);  // invariant already holds
+}
+
+// ------------------------------ Lt128
+template <class D>
+HWY_INLINE MFromD<D> Lt128(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  // Truth table of Eq and Compare for Hi and Lo u64.
+  // (removed lines with (=H && cH) or (=L && cL) - cannot both be true)
+  // =H =L cH cL  | out = cH | (=H & cL)
+  //  0  0  0  0  |  0
+  //  0  0  0  1  |  0
+  //  0  0  1  0  |  1
+  //  0  0  1  1  |  1
+  //  0  1  0  0  |  0
+  //  0  1  0  1  |  0
+  //  0  1  1  0  |  1
+  //  1  0  0  0  |  0
+  //  1  0  0  1  |  1
+  //  1  1  0  0  |  0
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  // Shift leftward so L can influence H.
+  const VFromD<D> ltLx = detail::Slide1Up(ltHL);
+  const VFromD<D> vecHx = OrAnd(ltHL, eqHL, ltLx);
+  // Replicate H to its neighbor.
+  return MaskFromVec(OddEven(vecHx, detail::Slide1Down(vecHx)));
+}
+
+// ------------------------------ Lt128Upper
+template <class D>
+HWY_INLINE MFromD<D> Lt128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  const VFromD<D> down = detail::Slide1Down(ltHL);
+  // b(267743505): Clang compiler bug, workaround is DoNotOptimize
+  asm volatile("" : : "r,m"(GetLane(down)) : "memory");
+  // Replicate H to its neighbor.
+  return MaskFromVec(OddEven(ltHL, down));
+}
+
+// ------------------------------ Eq128
+template <class D>
+HWY_INLINE MFromD<D> Eq128(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  const VFromD<D> eqLH = Reverse2(d, eqHL);
+  const VFromD<D> eq = And(eqHL, eqLH);
+  // b(267743505): Clang compiler bug, workaround is DoNotOptimize
+  asm volatile("" : : "r,m"(GetLane(eq)) : "memory");
+  return MaskFromVec(eq);
+}
+
+// ------------------------------ Eq128Upper
+template <class D>
+HWY_INLINE MFromD<D> Eq128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  // Replicate H to its neighbor.
+  return MaskFromVec(OddEven(eqHL, detail::Slide1Down(eqHL)));
+}
+
+// ------------------------------ Ne128
+template <class D>
+HWY_INLINE MFromD<D> Ne128(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  const VFromD<D> neLH = Reverse2(d, neHL);
+  // b(267743505): Clang compiler bug, workaround is DoNotOptimize
+  asm volatile("" : : "r,m"(GetLane(neLH)) : "memory");
+  return MaskFromVec(Or(neHL, neLH));
+}
+
+// ------------------------------ Ne128Upper
+template <class D>
+HWY_INLINE MFromD<D> Ne128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
+  static_assert(IsSame<TFromD<D>, uint64_t>(), "D must be u64");
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  const VFromD<D> down = detail::Slide1Down(neHL);
+  // b(267743505): Clang compiler bug, workaround is DoNotOptimize
+  asm volatile("" : : "r,m"(GetLane(down)) : "memory");
+  // Replicate H to its neighbor.
+  return MaskFromVec(OddEven(neHL, down));
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+template <class D>
+HWY_INLINE VFromD<D> Min128(D /* tag */, const VFromD<D> a, const VFromD<D> b) {
+  const VFromD<D> aXH = detail::Slide1Down(a);
+  const VFromD<D> bXH = detail::Slide1Down(b);
+  const VFromD<D> minHL = Min(a, b);
+  const MFromD<D> ltXH = Lt(aXH, bXH);
+  const MFromD<D> eqXH = Eq(aXH, bXH);
+  // If the upper lane is the decider, take lo from the same reg.
+  const VFromD<D> lo = IfThenElse(ltXH, a, b);
+  // The upper lane is just minHL; if they are equal, we also need to use the
+  // actual min of the lower lanes.
+  return OddEven(minHL, IfThenElse(eqXH, minHL, lo));
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128(D /* tag */, const VFromD<D> a, const VFromD<D> b) {
+  const VFromD<D> aXH = detail::Slide1Down(a);
+  const VFromD<D> bXH = detail::Slide1Down(b);
+  const VFromD<D> maxHL = Max(a, b);
+  const MFromD<D> ltXH = Lt(aXH, bXH);
+  const MFromD<D> eqXH = Eq(aXH, bXH);
+  // If the upper lane is the decider, take lo from the same reg.
+  const VFromD<D> lo = IfThenElse(ltXH, b, a);
+  // The upper lane is just maxHL; if they are equal, we also need to use the
+  // actual min of the lower lanes.
+  return OddEven(maxHL, IfThenElse(eqXH, maxHL, lo));
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Min128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, b, a), a, b);
+}
+
+// ================================================== END MACROS
+namespace detail {  // for code folding
+#undef HWY_RVV_AVL
+#undef HWY_RVV_D
+#undef HWY_RVV_FOREACH
+#undef HWY_RVV_FOREACH_08_ALL
+#undef HWY_RVV_FOREACH_08_ALL_VIRT
+#undef HWY_RVV_FOREACH_08_DEMOTE
+#undef HWY_RVV_FOREACH_08_DEMOTE_VIRT
+#undef HWY_RVV_FOREACH_08_EXT
+#undef HWY_RVV_FOREACH_08_EXT_VIRT
+#undef HWY_RVV_FOREACH_08_TRUNC
+#undef HWY_RVV_FOREACH_08_VIRT
+#undef HWY_RVV_FOREACH_16_ALL
+#undef HWY_RVV_FOREACH_16_ALL_VIRT
+#undef HWY_RVV_FOREACH_16_DEMOTE
+#undef HWY_RVV_FOREACH_16_DEMOTE_VIRT
+#undef HWY_RVV_FOREACH_16_EXT
+#undef HWY_RVV_FOREACH_16_EXT_VIRT
+#undef HWY_RVV_FOREACH_16_TRUNC
+#undef HWY_RVV_FOREACH_16_VIRT
+#undef HWY_RVV_FOREACH_32_ALL
+#undef HWY_RVV_FOREACH_32_ALL_VIRT
+#undef HWY_RVV_FOREACH_32_DEMOTE
+#undef HWY_RVV_FOREACH_32_DEMOTE_VIRT
+#undef HWY_RVV_FOREACH_32_EXT
+#undef HWY_RVV_FOREACH_32_EXT_VIRT
+#undef HWY_RVV_FOREACH_32_TRUNC
+#undef HWY_RVV_FOREACH_32_VIRT
+#undef HWY_RVV_FOREACH_64_ALL
+#undef HWY_RVV_FOREACH_64_ALL_VIRT
+#undef HWY_RVV_FOREACH_64_DEMOTE
+#undef HWY_RVV_FOREACH_64_DEMOTE_VIRT
+#undef HWY_RVV_FOREACH_64_EXT
+#undef HWY_RVV_FOREACH_64_EXT_VIRT
+#undef HWY_RVV_FOREACH_64_TRUNC
+#undef HWY_RVV_FOREACH_64_VIRT
+#undef HWY_RVV_FOREACH_B
+#undef HWY_RVV_FOREACH_F
+#undef HWY_RVV_FOREACH_F16
+#undef HWY_RVV_FOREACH_F32
+#undef HWY_RVV_FOREACH_F3264
+#undef HWY_RVV_FOREACH_F64
+#undef HWY_RVV_FOREACH_I
+#undef HWY_RVV_FOREACH_I08
+#undef HWY_RVV_FOREACH_I16
+#undef HWY_RVV_FOREACH_I163264
+#undef HWY_RVV_FOREACH_I32
+#undef HWY_RVV_FOREACH_I64
+#undef HWY_RVV_FOREACH_U
+#undef HWY_RVV_FOREACH_U08
+#undef HWY_RVV_FOREACH_U16
+#undef HWY_RVV_FOREACH_U163264
+#undef HWY_RVV_FOREACH_U32
+#undef HWY_RVV_FOREACH_U64
+#undef HWY_RVV_FOREACH_UI
+#undef HWY_RVV_FOREACH_UI08
+#undef HWY_RVV_FOREACH_UI16
+#undef HWY_RVV_FOREACH_UI163264
+#undef HWY_RVV_FOREACH_UI32
+#undef HWY_RVV_FOREACH_UI3264
+#undef HWY_RVV_FOREACH_UI64
+#undef HWY_RVV_INSERT_VXRM
+#undef HWY_RVV_M
+#undef HWY_RVV_RETM_ARGM
+#undef HWY_RVV_RETV_ARGV
+#undef HWY_RVV_RETV_ARGVS
+#undef HWY_RVV_RETV_ARGVV
+#undef HWY_RVV_T
+#undef HWY_RVV_V
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/scalar-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/scalar-inl.h
new file mode 100644
index 0000000000..cfd98f7fab
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/scalar-inl.h
@@ -0,0 +1,1921 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Single-element vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#ifndef HWY_NO_LIBCXX
+#include <math.h>  // sqrtf
+#endif
+
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Single instruction, single data.
+template <typename T>
+using Sisd = Simd<T, 1, 0>;
+
+// (Wrapper class required for overloading comparison operators.)
+template <typename T>
+struct Vec1 {
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = 1;  // only for DFromV
+
+  HWY_INLINE Vec1() = default;
+  Vec1(const Vec1&) = default;
+  Vec1& operator=(const Vec1&) = default;
+  HWY_INLINE explicit Vec1(const T t) : raw(t) {}
+
+  HWY_INLINE Vec1& operator*=(const Vec1 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec1& operator/=(const Vec1 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec1& operator+=(const Vec1 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec1& operator-=(const Vec1 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec1& operator&=(const Vec1 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec1& operator|=(const Vec1 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec1& operator^=(const Vec1 other) {
+    return *this = (*this ^ other);
+  }
+
+  T raw;
+};
+
+// 0 or FF..FF, same size as Vec1.
+template <typename T>
+class Mask1 {
+  using Raw = hwy::MakeUnsigned<T>;
+
+ public:
+  static HWY_INLINE Mask1<T> FromBool(bool b) {
+    Mask1<T> mask;
+    mask.bits = b ? static_cast<Raw>(~Raw{0}) : 0;
+    return mask;
+  }
+
+  Raw bits;
+};
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ BitCast
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
+HWY_API Vec1<TTo> BitCast(DTo /* tag */, Vec1<TFrom> v) {
+  static_assert(sizeof(TTo) <= sizeof(TFrom), "Promoting is undefined");
+  TTo to;
+  CopyBytes<sizeof(TTo)>(&v.raw, &to);  // not same size - ok to shrink
+  return Vec1<TTo>(to);
+}
+
+// ------------------------------ Zero
+
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Vec1<T> Zero(D /* tag */) {
+  Vec1<T> v;
+  ZeroBytes<sizeof(v.raw)>(&v.raw);
+  return v;
+}
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ Set
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>, typename T2>
+HWY_API Vec1<T> Set(D /* tag */, const T2 t) {
+  return Vec1<T>(static_cast<T>(t));
+}
+
+// ------------------------------ Undefined
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Vec1<T> Undefined(D d) {
+  return Zero(d);
+}
+
+// ------------------------------ Iota
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>, typename T2>
+HWY_API Vec1<T> Iota(const D /* tag */, const T2 first) {
+  return Vec1<T>(static_cast<T>(first));
+}
+
+// ------------------------------ ResizeBitCast
+
+template <class D, typename FromV>
+HWY_API VFromD<D> ResizeBitCast(D /* tag */, FromV v) {
+  using TFrom = TFromV<FromV>;
+  using TTo = TFromD<D>;
+  constexpr size_t kCopyLen = HWY_MIN(sizeof(TFrom), sizeof(TTo));
+  TTo to = TTo{0};
+  CopyBytes<kCopyLen>(&v.raw, &to);
+  return VFromD<D>(to);
+}
+
+namespace detail {
+
+// ResizeBitCast on the HWY_SCALAR target has zero-extending semantics if
+// sizeof(TFromD<DTo>) is greater than sizeof(TFromV<FromV>)
+template <class FromSizeTag, class ToSizeTag, class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(FromSizeTag /* from_size_tag */,
+                                               ToSizeTag /* to_size_tag */,
+                                               DTo d_to, DFrom /*d_from*/,
+                                               VFromD<DFrom> v) {
+  return ResizeBitCast(d_to, v);
+}
+
+}  // namespace detail
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T>
+HWY_API Vec1<T> Not(const Vec1<T> v) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(static_cast<TU>(~BitCast(du, v).raw)));
+}
+
+// ------------------------------ And
+
+template <typename T>
+HWY_API Vec1<T> And(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw & BitCast(du, b).raw));
+}
+template <typename T>
+HWY_API Vec1<T> operator&(const Vec1<T> a, const Vec1<T> b) {
+  return And(a, b);
+}
+
+// ------------------------------ AndNot
+
+template <typename T>
+HWY_API Vec1<T> AndNot(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(static_cast<TU>(~BitCast(du, a).raw &
+                                                     BitCast(du, b).raw)));
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_API Vec1<T> Or(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw | BitCast(du, b).raw));
+}
+template <typename T>
+HWY_API Vec1<T> operator|(const Vec1<T> a, const Vec1<T> b) {
+  return Or(a, b);
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_API Vec1<T> Xor(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw ^ BitCast(du, b).raw));
+}
+template <typename T>
+HWY_API Vec1<T> operator^(const Vec1<T> a, const Vec1<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ Xor3
+
+template <typename T>
+HWY_API Vec1<T> Xor3(Vec1<T> x1, Vec1<T> x2, Vec1<T> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+// ------------------------------ Or3
+
+template <typename T>
+HWY_API Vec1<T> Or3(Vec1<T> o1, Vec1<T> o2, Vec1<T> o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+
+template <typename T>
+HWY_API Vec1<T> OrAnd(const Vec1<T> o, const Vec1<T> a1, const Vec1<T> a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ Mask
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
+HWY_API Mask1<TTo> RebindMask(DTo /*tag*/, Mask1<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return Mask1<TTo>{m.bits};
+}
+
+// v must be 0 or FF..FF.
+template <typename T>
+HWY_API Mask1<T> MaskFromVec(const Vec1<T> v) {
+  Mask1<T> mask;
+  CopySameSize(&v, &mask);
+  return mask;
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <typename T>
+Vec1<T> VecFromMask(const Mask1<T> mask) {
+  Vec1<T> v;
+  CopySameSize(&mask, &v);
+  return v;
+}
+
+template <class D, typename T = TFromD<D>>
+Vec1<T> VecFromMask(D /* tag */, const Mask1<T> mask) {
+  Vec1<T> v;
+  CopySameSize(&mask, &v);
+  return v;
+}
+
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Mask1<T> FirstN(D /*tag*/, size_t n) {
+  return Mask1<T>::FromBool(n != 0);
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T>
+HWY_API Vec1<T> IfVecThenElse(Vec1<T> mask, Vec1<T> yes, Vec1<T> no) {
+  return IfThenElse(MaskFromVec(mask), yes, no);
+}
+
+// ------------------------------ CopySign
+template <typename T>
+HWY_API Vec1<T> CopySign(const Vec1<T> magn, const Vec1<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <typename T>
+HWY_API Vec1<T> CopySignToAbs(const Vec1<T> abs, const Vec1<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const Sisd<T> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ------------------------------ BroadcastSignBit
+template <typename T>
+HWY_API Vec1<T> BroadcastSignBit(const Vec1<T> v) {
+  // This is used inside ShiftRight, so we cannot implement in terms of it.
+  return v.raw < 0 ? Vec1<T>(T(-1)) : Vec1<T>(0);
+}
+
+// ------------------------------ PopulationCount
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+template <typename T>
+HWY_API Vec1<T> PopulationCount(Vec1<T> v) {
+  return Vec1<T>(static_cast<T>(PopCount(v.raw)));
+}
+
+// ------------------------------ IfThenElse
+
+// Returns mask ? yes : no.
+template <typename T>
+HWY_API Vec1<T> IfThenElse(const Mask1<T> mask, const Vec1<T> yes,
+                           const Vec1<T> no) {
+  return mask.bits ? yes : no;
+}
+
+template <typename T>
+HWY_API Vec1<T> IfThenElseZero(const Mask1<T> mask, const Vec1<T> yes) {
+  return mask.bits ? yes : Vec1<T>(0);
+}
+
+template <typename T>
+HWY_API Vec1<T> IfThenZeroElse(const Mask1<T> mask, const Vec1<T> no) {
+  return mask.bits ? Vec1<T>(0) : no;
+}
+
+template <typename T>
+HWY_API Vec1<T> IfNegativeThenElse(Vec1<T> v, Vec1<T> yes, Vec1<T> no) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto vi = BitCast(di, v);
+
+  return vi.raw < 0 ? yes : no;
+}
+
+template <typename T>
+HWY_API Vec1<T> ZeroIfNegative(const Vec1<T> v) {
+  return v.raw < 0 ? Vec1<T>(0) : v;
+}
+
+// ------------------------------ Mask logical
+
+template <typename T>
+HWY_API Mask1<T> Not(const Mask1<T> m) {
+  return MaskFromVec(Not(VecFromMask(Sisd<T>(), m)));
+}
+
+template <typename T>
+HWY_API Mask1<T> And(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> AndNot(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> Or(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> Xor(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> ExclusiveNeither(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+template <class T>
+HWY_API Mask1<T> SetAtOrAfterFirst(Mask1<T> mask) {
+  return mask;
+}
+
+template <class T>
+HWY_API Mask1<T> SetBeforeFirst(Mask1<T> mask) {
+  return Not(mask);
+}
+
+template <class T>
+HWY_API Mask1<T> SetOnlyFirst(Mask1<T> mask) {
+  return mask;
+}
+
+template <class T>
+HWY_API Mask1<T> SetAtOrBeforeFirst(Mask1<T> /*mask*/) {
+  return Mask1<T>::FromBool(true);
+}
+
+// ================================================== SHIFTS
+
+// ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
+
+template <int kBits, typename T>
+HWY_API Vec1<T> ShiftLeft(const Vec1<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return Vec1<T>(
+      static_cast<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << kBits));
+}
+
+template <int kBits, typename T>
+HWY_API Vec1<T> ShiftRight(const Vec1<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  return Vec1<T>(static_cast<T>(v.raw >> kBits));
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    const Sisd<TU> du;
+    const TU shifted = static_cast<TU>(BitCast(du, v).raw >> kBits);
+    const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
+    const size_t sign_shift =
+        static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - kBits);
+    const TU upper = static_cast<TU>(sign << sign_shift);
+    return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
+  } else {  // T is unsigned
+    return Vec1<T>(static_cast<T>(v.raw >> kBits));
+  }
+#endif
+}
+
+// ------------------------------ RotateRight (ShiftRight)
+template <int kBits, typename T>
+HWY_API Vec1<T> RotateRight(const Vec1<T> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// ------------------------------ ShiftLeftSame (BroadcastSignBit)
+
+template <typename T>
+HWY_API Vec1<T> ShiftLeftSame(const Vec1<T> v, int bits) {
+  return Vec1<T>(
+      static_cast<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << bits));
+}
+
+template <typename T>
+HWY_API Vec1<T> ShiftRightSame(const Vec1<T> v, int bits) {
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  return Vec1<T>(static_cast<T>(v.raw >> bits));
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    const Sisd<TU> du;
+    const TU shifted = static_cast<TU>(BitCast(du, v).raw >> bits);
+    const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
+    const size_t sign_shift =
+        static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - bits);
+    const TU upper = static_cast<TU>(sign << sign_shift);
+    return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
+  } else {  // T is unsigned
+    return Vec1<T>(static_cast<T>(v.raw >> bits));
+  }
+#endif
+}
+
+// ------------------------------ Shl
+
+// Single-lane => same as ShiftLeftSame except for the argument type.
+template <typename T>
+HWY_API Vec1<T> operator<<(const Vec1<T> v, const Vec1<T> bits) {
+  return ShiftLeftSame(v, static_cast<int>(bits.raw));
+}
+
+template <typename T>
+HWY_API Vec1<T> operator>>(const Vec1<T> v, const Vec1<T> bits) {
+  return ShiftRightSame(v, static_cast<int>(bits.raw));
+}
+
+// ================================================== ARITHMETIC
+
+template <typename T>
+HWY_API Vec1<T> operator+(Vec1<T> a, Vec1<T> b) {
+  const uint64_t a64 = static_cast<uint64_t>(a.raw);
+  const uint64_t b64 = static_cast<uint64_t>(b.raw);
+  return Vec1<T>(static_cast<T>((a64 + b64) & static_cast<uint64_t>(~T(0))));
+}
+HWY_API Vec1<float> operator+(const Vec1<float> a, const Vec1<float> b) {
+  return Vec1<float>(a.raw + b.raw);
+}
+HWY_API Vec1<double> operator+(const Vec1<double> a, const Vec1<double> b) {
+  return Vec1<double>(a.raw + b.raw);
+}
+
+template <typename T>
+HWY_API Vec1<T> operator-(Vec1<T> a, Vec1<T> b) {
+  const uint64_t a64 = static_cast<uint64_t>(a.raw);
+  const uint64_t b64 = static_cast<uint64_t>(b.raw);
+  return Vec1<T>(static_cast<T>((a64 - b64) & static_cast<uint64_t>(~T(0))));
+}
+HWY_API Vec1<float> operator-(const Vec1<float> a, const Vec1<float> b) {
+  return Vec1<float>(a.raw - b.raw);
+}
+HWY_API Vec1<double> operator-(const Vec1<double> a, const Vec1<double> b) {
+  return Vec1<double>(a.raw - b.raw);
+}
+
+// ------------------------------ SumsOf8
+
+HWY_API Vec1<uint64_t> SumsOf8(const Vec1<uint8_t> v) {
+  return Vec1<uint64_t>(v.raw);
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec1<uint8_t> SaturatedAdd(const Vec1<uint8_t> a,
+                                   const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(
+      static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw + b.raw), 255)));
+}
+HWY_API Vec1<uint16_t> SaturatedAdd(const Vec1<uint16_t> a,
+                                    const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>(
+      HWY_MIN(HWY_MAX(0, static_cast<int32_t>(a.raw) + b.raw), 65535)));
+}
+
+// Signed
+HWY_API Vec1<int8_t> SaturatedAdd(const Vec1<int8_t> a, const Vec1<int8_t> b) {
+  return Vec1<int8_t>(
+      static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw + b.raw), 127)));
+}
+HWY_API Vec1<int16_t> SaturatedAdd(const Vec1<int16_t> a,
+                                   const Vec1<int16_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>(
+      HWY_MIN(HWY_MAX(-32768, static_cast<int32_t>(a.raw) + b.raw), 32767)));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec1<uint8_t> SaturatedSub(const Vec1<uint8_t> a,
+                                   const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(
+      static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw - b.raw), 255)));
+}
+HWY_API Vec1<uint16_t> SaturatedSub(const Vec1<uint16_t> a,
+                                    const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>(
+      HWY_MIN(HWY_MAX(0, static_cast<int32_t>(a.raw) - b.raw), 65535)));
+}
+
+// Signed
+HWY_API Vec1<int8_t> SaturatedSub(const Vec1<int8_t> a, const Vec1<int8_t> b) {
+  return Vec1<int8_t>(
+      static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw - b.raw), 127)));
+}
+HWY_API Vec1<int16_t> SaturatedSub(const Vec1<int16_t> a,
+                                   const Vec1<int16_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>(
+      HWY_MIN(HWY_MAX(-32768, static_cast<int32_t>(a.raw) - b.raw), 32767)));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+HWY_API Vec1<uint8_t> AverageRound(const Vec1<uint8_t> a,
+                                   const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(static_cast<uint8_t>((a.raw + b.raw + 1) / 2));
+}
+HWY_API Vec1<uint16_t> AverageRound(const Vec1<uint16_t> a,
+                                    const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>((a.raw + b.raw + 1) / 2));
+}
+
+// ------------------------------ Absolute value
+
+template <typename T>
+HWY_API Vec1<T> Abs(const Vec1<T> a) {
+  const T i = a.raw;
+  if (i >= 0 || i == hwy::LimitsMin<T>()) return a;
+  return Vec1<T>(static_cast<T>(-i & T{-1}));
+}
+HWY_API Vec1<float> Abs(Vec1<float> a) {
+  int32_t i;
+  CopyBytes<sizeof(i)>(&a.raw, &i);
+  i &= 0x7FFFFFFF;
+  CopyBytes<sizeof(i)>(&i, &a.raw);
+  return a;
+}
+HWY_API Vec1<double> Abs(Vec1<double> a) {
+  int64_t i;
+  CopyBytes<sizeof(i)>(&a.raw, &i);
+  i &= 0x7FFFFFFFFFFFFFFFL;
+  CopyBytes<sizeof(i)>(&i, &a.raw);
+  return a;
+}
+
+// ------------------------------ Min/Max
+
+// <cmath> may be unavailable, so implement our own.
+namespace detail {
+
+static inline float Abs(float f) {
+  uint32_t i;
+  CopyBytes<4>(&f, &i);
+  i &= 0x7FFFFFFFu;
+  CopyBytes<4>(&i, &f);
+  return f;
+}
+static inline double Abs(double f) {
+  uint64_t i;
+  CopyBytes<8>(&f, &i);
+  i &= 0x7FFFFFFFFFFFFFFFull;
+  CopyBytes<8>(&i, &f);
+  return f;
+}
+
+static inline bool SignBit(float f) {
+  uint32_t i;
+  CopyBytes<4>(&f, &i);
+  return (i >> 31) != 0;
+}
+static inline bool SignBit(double f) {
+  uint64_t i;
+  CopyBytes<8>(&f, &i);
+  return (i >> 63) != 0;
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(HWY_MIN(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
+  if (isnan(a.raw)) return b;
+  if (isnan(b.raw)) return a;
+  return Vec1<T>(HWY_MIN(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(HWY_MAX(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
+  if (isnan(a.raw)) return b;
+  if (isnan(b.raw)) return a;
+  return Vec1<T>(HWY_MAX(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point negate
+
+template <typename T, HWY_IF_FLOAT_OR_SPECIAL(T)>
+HWY_API Vec1<T> Neg(const Vec1<T> v) {
+  return Xor(v, SignBit(Sisd<T>()));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec1<T> Neg(const Vec1<T> v) {
+  return Zero(Sisd<T>()) - v;
+}
+
+// ------------------------------ mul/div
+
+// Per-target flags to prevent generic_ops-inl.h defining 8/64-bit operator*.
+#ifdef HWY_NATIVE_MUL_8
+#undef HWY_NATIVE_MUL_8
+#else
+#define HWY_NATIVE_MUL_8
+#endif
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(static_cast<T>(double{a.raw} * b.raw));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(static_cast<T>(static_cast<uint64_t>(a.raw) *
+                                static_cast<uint64_t>(b.raw)));
+}
+
+template <typename T>
+HWY_API Vec1<T> operator/(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(a.raw / b.raw);
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec1<int16_t> MulHigh(const Vec1<int16_t> a, const Vec1<int16_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>((a.raw * b.raw) >> 16));
+}
+HWY_API Vec1<uint16_t> MulHigh(const Vec1<uint16_t> a, const Vec1<uint16_t> b) {
+  // Cast to uint32_t first to prevent overflow. Otherwise the result of
+  // uint16_t * uint16_t is in "int" which may overflow. In practice the result
+  // is the same but this way it is also defined.
+  return Vec1<uint16_t>(static_cast<uint16_t>(
+      (static_cast<uint32_t>(a.raw) * static_cast<uint32_t>(b.raw)) >> 16));
+}
+
+HWY_API Vec1<int16_t> MulFixedPoint15(Vec1<int16_t> a, Vec1<int16_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>((a.raw * b.raw + 16384) >> 15));
+}
+
+// Multiplies even lanes (0, 2 ..) and returns the double-wide result.
+template <class T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec1<MakeWide<T>> MulEven(const Vec1<T> a, const Vec1<T> b) {
+  using TW = MakeWide<T>;
+  const TW a_wide = a.raw;
+  return Vec1<TW>(static_cast<TW>(a_wide * b.raw));
+}
+
+// Approximate reciprocal
+HWY_API Vec1<float> ApproximateReciprocal(const Vec1<float> v) {
+  // Zero inputs are allowed, but callers are responsible for replacing the
+  // return value with something else (typically using IfThenElse). This check
+  // avoids a ubsan error. The return value is arbitrary.
+  if (v.raw == 0.0f) return Vec1<float>(0.0f);
+  return Vec1<float>(1.0f / v.raw);
+}
+
+// generic_ops takes care of integer T.
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec1<T> AbsDiff(const Vec1<T> a, const Vec1<T> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+template <typename T>
+HWY_API Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> add) {
+  return mul * x + add;
+}
+
+template <typename T>
+HWY_API Vec1<T> NegMulAdd(const Vec1<T> mul, const Vec1<T> x,
+                          const Vec1<T> add) {
+  return add - mul * x;
+}
+
+template <typename T>
+HWY_API Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> sub) {
+  return mul * x - sub;
+}
+
+template <typename T>
+HWY_API Vec1<T> NegMulSub(const Vec1<T> mul, const Vec1<T> x,
+                          const Vec1<T> sub) {
+  return Neg(mul) * x - sub;
+}
+
+// ------------------------------ Floating-point square root
+
+// Approximate reciprocal square root
+HWY_API Vec1<float> ApproximateReciprocalSqrt(const Vec1<float> v) {
+  float f = v.raw;
+  const float half = f * 0.5f;
+  uint32_t bits;
+  CopySameSize(&f, &bits);
+  // Initial guess based on log2(f)
+  bits = 0x5F3759DF - (bits >> 1);
+  CopySameSize(&bits, &f);
+  // One Newton-Raphson iteration
+  return Vec1<float>(f * (1.5f - (half * f * f)));
+}
+
+// Square root
+HWY_API Vec1<float> Sqrt(Vec1<float> v) {
+#if defined(HWY_NO_LIBCXX)
+#if HWY_COMPILER_GCC_ACTUAL
+  return Vec1<float>(__builtin_sqrt(v.raw));
+#else
+  uint32_t bits;
+  CopyBytes<sizeof(bits)>(&v, &bits);
+  // Coarse approximation, letting the exponent LSB leak into the mantissa
+  bits = (1 << 29) + (bits >> 1) - (1 << 22);
+  CopyBytes<sizeof(bits)>(&bits, &v);
+  return v;
+#endif  // !HWY_COMPILER_GCC_ACTUAL
+#else
+  return Vec1<float>(sqrtf(v.raw));
+#endif  // !HWY_NO_LIBCXX
+}
+HWY_API Vec1<double> Sqrt(Vec1<double> v) {
+#if defined(HWY_NO_LIBCXX)
+#if HWY_COMPILER_GCC_ACTUAL
+  return Vec1<double>(__builtin_sqrt(v.raw));
+#else
+  uint64_t bits;
+  CopyBytes<sizeof(bits)>(&v, &bits);
+  // Coarse approximation, letting the exponent LSB leak into the mantissa
+  bits = (1ULL << 61) + (bits >> 1) - (1ULL << 51);
+  CopyBytes<sizeof(bits)>(&bits, &v);
+  return v;
+#endif  // !HWY_COMPILER_GCC_ACTUAL
+#else
+  return Vec1<double>(sqrt(v.raw));
+#endif  // HWY_NO_LIBCXX
+}
+
+// ------------------------------ Floating-point rounding
+
+template <typename T>
+HWY_API Vec1<T> Round(const Vec1<T> v) {
+  using TI = MakeSigned<T>;
+  if (!(Abs(v).raw < MantissaEnd<T>())) {  // Huge or NaN
+    return v;
+  }
+  const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
+  const TI rounded = static_cast<TI>(v.raw + bias);
+  if (rounded == 0) return CopySignToAbs(Vec1<T>(0), v);
+  // Round to even
+  if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
+    return Vec1<T>(static_cast<T>(rounded - (v.raw < T(0) ? -1 : 1)));
+  }
+  return Vec1<T>(static_cast<T>(rounded));
+}
+
+// Round-to-nearest even.
+HWY_API Vec1<int32_t> NearestInt(const Vec1<float> v) {
+  using T = float;
+  using TI = int32_t;
+
+  const T abs = Abs(v).raw;
+  const bool is_sign = detail::SignBit(v.raw);
+
+  if (!(abs < MantissaEnd<T>())) {  // Huge or NaN
+    // Check if too large to cast or NaN
+    if (!(abs <= static_cast<T>(LimitsMax<TI>()))) {
+      return Vec1<TI>(is_sign ? LimitsMin<TI>() : LimitsMax<TI>());
+    }
+    return Vec1<int32_t>(static_cast<TI>(v.raw));
+  }
+  const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
+  const TI rounded = static_cast<TI>(v.raw + bias);
+  if (rounded == 0) return Vec1<int32_t>(0);
+  // Round to even
+  if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
+    return Vec1<TI>(rounded - (is_sign ? -1 : 1));
+  }
+  return Vec1<TI>(rounded);
+}
+
+template <typename T>
+HWY_API Vec1<T> Trunc(const Vec1<T> v) {
+  using TI = MakeSigned<T>;
+  if (!(Abs(v).raw <= MantissaEnd<T>())) {  // Huge or NaN
+    return v;
+  }
+  const TI truncated = static_cast<TI>(v.raw);
+  if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
+  return Vec1<T>(static_cast<T>(truncated));
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Ceiling(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool positive = f > Float(0.0);
+
+  Bits bits;
+  CopySameSize(&v, &bits);
+
+  const int exponent =
+      static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => 0 or 1.
+  if (exponent < 0) return positive ? V(1) : V(-0.0);
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round up
+  if (positive) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopySameSize(&bits, &f);
+  return V(f);
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Floor(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool negative = f < Float(0.0);
+
+  Bits bits;
+  CopySameSize(&v, &bits);
+
+  const int exponent =
+      static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => -1 or 0.
+  if (exponent < 0) return V(negative ? Float(-1.0) : Float(0.0));
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round down
+  if (negative) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopySameSize(&bits, &f);
+  return V(f);
+}
+
+// Toward +infinity, aka ceiling
+HWY_API Vec1<float> Ceil(const Vec1<float> v) {
+  return Ceiling<float, uint32_t, 23, 8>(v);
+}
+HWY_API Vec1<double> Ceil(const Vec1<double> v) {
+  return Ceiling<double, uint64_t, 52, 11>(v);
+}
+
+// Toward -infinity, aka floor
+HWY_API Vec1<float> Floor(const Vec1<float> v) {
+  return Floor<float, uint32_t, 23, 8>(v);
+}
+HWY_API Vec1<double> Floor(const Vec1<double> v) {
+  return Floor<double, uint64_t, 52, 11>(v);
+}
+
+// ================================================== COMPARE
+
+template <typename T>
+HWY_API Mask1<T> operator==(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw == b.raw);
+}
+
+template <typename T>
+HWY_API Mask1<T> operator!=(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw != b.raw);
+}
+
+template <typename T>
+HWY_API Mask1<T> TestBit(const Vec1<T> v, const Vec1<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+template <typename T>
+HWY_API Mask1<T> operator<(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw < b.raw);
+}
+template <typename T>
+HWY_API Mask1<T> operator>(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw > b.raw);
+}
+
+template <typename T>
+HWY_API Mask1<T> operator<=(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw <= b.raw);
+}
+template <typename T>
+HWY_API Mask1<T> operator>=(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw >= b.raw);
+}
+
+// ------------------------------ Floating-point classification (==)
+
+template <typename T>
+HWY_API Mask1<T> IsNaN(const Vec1<T> v) {
+  // std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
+  MakeUnsigned<T> bits;
+  CopySameSize(&v, &bits);
+  bits += bits;
+  bits >>= 1;  // clear sign bit
+  // NaN if all exponent bits are set and the mantissa is not zero.
+  return Mask1<T>::FromBool(bits > ExponentMask<T>());
+}
+
+HWY_API Mask1<float> IsInf(const Vec1<float> v) {
+  const Sisd<float> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec1<uint32_t> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, (vu + vu) == Set(du, 0xFF000000u));
+}
+HWY_API Mask1<double> IsInf(const Vec1<double> v) {
+  const Sisd<double> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec1<uint64_t> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, (vu + vu) == Set(du, 0xFFE0000000000000ull));
+}
+
+HWY_API Mask1<float> IsFinite(const Vec1<float> v) {
+  const Vec1<uint32_t> vu = BitCast(Sisd<uint32_t>(), v);
+  // Shift left to clear the sign bit, check whether exponent != max value.
+  return Mask1<float>::FromBool((vu.raw << 1) < 0xFF000000u);
+}
+HWY_API Mask1<double> IsFinite(const Vec1<double> v) {
+  const Vec1<uint64_t> vu = BitCast(Sisd<uint64_t>(), v);
+  // Shift left to clear the sign bit, check whether exponent != max value.
+  return Mask1<double>::FromBool((vu.raw << 1) < 0xFFE0000000000000ull);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Vec1<T> Load(D /* tag */, const T* HWY_RESTRICT aligned) {
+  T t;
+  CopySameSize(aligned, &t);
+  return Vec1<T>(t);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> MaskedLoad(Mask1<T> m, D d, const T* HWY_RESTRICT aligned) {
+  return IfThenElseZero(m, Load(d, aligned));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> MaskedLoadOr(Vec1<T> v, Mask1<T> m, D d,
+                             const T* HWY_RESTRICT aligned) {
+  return IfThenElse(m, Load(d, aligned), v);
+}
+
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Vec1<T> LoadU(D d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// In some use cases, "load single lane" is sufficient; otherwise avoid this.
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Vec1<T> LoadDup128(D d, const T* HWY_RESTRICT aligned) {
+  return Load(d, aligned);
+}
+
+#ifdef HWY_NATIVE_LOAD_N
+#undef HWY_NATIVE_LOAD_N
+#else
+#define HWY_NATIVE_LOAD_N
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  return (max_lanes_to_load > 0) ? Load(d, p) : Zero(d);
+}
+
+// ------------------------------ Store
+
+template <class D, typename T = TFromD<D>>
+HWY_API void Store(const Vec1<T> v, D /* tag */, T* HWY_RESTRICT aligned) {
+  CopySameSize(&v.raw, aligned);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreU(const Vec1<T> v, D d, T* HWY_RESTRICT p) {
+  return Store(v, d, p);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void BlendedStore(const Vec1<T> v, Mask1<T> m, D d, T* HWY_RESTRICT p) {
+  if (!m.bits) return;
+  StoreU(v, d, p);
+}
+
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  if (max_lanes_to_store > 0) {
+    Store(v, d, p);
+  }
+}
+
+// ------------------------------ LoadInterleaved2/3/4
+
+// Per-target flag to prevent generic_ops-inl.h from defining StoreInterleaved2.
+#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#else
+#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
+#endif
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved2(D d, const T* HWY_RESTRICT unaligned, Vec1<T>& v0,
+                              Vec1<T>& v1) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved3(D d, const T* HWY_RESTRICT unaligned, Vec1<T>& v0,
+                              Vec1<T>& v1, Vec1<T>& v2) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+  v2 = LoadU(d, unaligned + 2);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadInterleaved4(D d, const T* HWY_RESTRICT unaligned, Vec1<T>& v0,
+                              Vec1<T>& v1, Vec1<T>& v2, Vec1<T>& v3) {
+  v0 = LoadU(d, unaligned + 0);
+  v1 = LoadU(d, unaligned + 1);
+  v2 = LoadU(d, unaligned + 2);
+  v3 = LoadU(d, unaligned + 3);
+}
+
+// ------------------------------ StoreInterleaved2/3/4
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreInterleaved2(const Vec1<T> v0, const Vec1<T> v1, D d,
+                               T* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreInterleaved3(const Vec1<T> v0, const Vec1<T> v1,
+                               const Vec1<T> v2, D d,
+                               T* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+  StoreU(v2, d, unaligned + 2);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreInterleaved4(const Vec1<T> v0, const Vec1<T> v1,
+                               const Vec1<T> v2, const Vec1<T> v3, D d,
+                               T* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+  StoreU(v2, d, unaligned + 2);
+  StoreU(v3, d, unaligned + 3);
+}
+
+// ------------------------------ Stream
+
+template <class D, typename T = TFromD<D>>
+HWY_API void Stream(const Vec1<T> v, D d, T* HWY_RESTRICT aligned) {
+  return Store(v, d, aligned);
+}
+
+// ------------------------------ Scatter
+
+#ifdef HWY_NATIVE_SCATTER
+#undef HWY_NATIVE_SCATTER
+#else
+#define HWY_NATIVE_SCATTER
+#endif
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API void ScatterOffset(Vec1<T> v, D d, T* base, Vec1<TI> offset) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  uint8_t* const base8 = reinterpret_cast<uint8_t*>(base) + offset.raw;
+  Store(v, d, reinterpret_cast<T*>(base8));
+}
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API void ScatterIndex(Vec1<T> v, D d, T* HWY_RESTRICT base,
+                          Vec1<TI> index) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  Store(v, d, base + index.raw);
+}
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API void MaskedScatterIndex(Vec1<T> v, Mask1<T> m, D d,
+                                T* HWY_RESTRICT base, Vec1<TI> index) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  if (m.bits) Store(v, d, base + index.raw);
+}
+
+// ------------------------------ Gather
+
+#ifdef HWY_NATIVE_GATHER
+#undef HWY_NATIVE_GATHER
+#else
+#define HWY_NATIVE_GATHER
+#endif
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<TI> offset) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  const intptr_t addr =
+      reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
+  return Load(d, reinterpret_cast<const T*>(addr));
+}
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base, Vec1<TI> index) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  return Load(d, base + index.raw);
+}
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
+                                  Vec1<TI> index) {
+  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
+  return MaskedLoad(m, d, base + index.raw);
+}
+
+// ================================================== CONVERT
+
+// ConvertTo and DemoteTo with floating-point input and integer output truncate
+// (rounding toward zero).
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
+HWY_API Vec1<TTo> PromoteTo(DTo /* tag */, Vec1<TFrom> from) {
+  static_assert(sizeof(TTo) > sizeof(TFrom), "Not promoting");
+  // For bits Y > X, floatX->floatY and intX->intY are always representable.
+  return Vec1<TTo>(static_cast<TTo>(from.raw));
+}
+
+// MSVC 19.10 cannot deduce the argument type if HWY_IF_FLOAT(TFrom) is here,
+// so we overload for TFrom=double and TTo={float,int32_t}.
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec1<float> DemoteTo(D /* tag */, Vec1<double> from) {
+  // Prevent ubsan errors when converting float to narrower integer/float
+  if (IsInf(from).bits ||
+      Abs(from).raw > static_cast<double>(HighestValue<float>())) {
+    return Vec1<float>(detail::SignBit(from.raw) ? LowestValue<float>()
+                                                 : HighestValue<float>());
+  }
+  return Vec1<float>(static_cast<float>(from.raw));
+}
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec1<int32_t> DemoteTo(D /* tag */, Vec1<double> from) {
+  // Prevent ubsan errors when converting int32_t to narrower integer/int32_t
+  if (IsInf(from).bits ||
+      Abs(from).raw > static_cast<double>(HighestValue<int32_t>())) {
+    return Vec1<int32_t>(detail::SignBit(from.raw) ? LowestValue<int32_t>()
+                                                   : HighestValue<int32_t>());
+  }
+  return Vec1<int32_t>(static_cast<int32_t>(from.raw));
+}
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
+          HWY_IF_SIGNED(TFrom), HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DTo>)>
+HWY_API Vec1<TTo> DemoteTo(DTo /* tag */, Vec1<TFrom> from) {
+  static_assert(!IsFloat<TFrom>(), "TFrom=double are handled above");
+  static_assert(sizeof(TTo) < sizeof(TFrom), "Not demoting");
+
+  // Int to int: choose closest value in TTo to `from` (avoids UB)
+  from.raw = HWY_MIN(HWY_MAX(LimitsMin<TTo>(), from.raw), LimitsMax<TTo>());
+  return Vec1<TTo>(static_cast<TTo>(from.raw));
+}
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
+          HWY_IF_UNSIGNED(TFrom), HWY_IF_UNSIGNED_D(DTo)>
+HWY_API Vec1<TTo> DemoteTo(DTo /* tag */, Vec1<TFrom> from) {
+  static_assert(!IsFloat<TFrom>(), "TFrom=double are handled above");
+  static_assert(sizeof(TTo) < sizeof(TFrom), "Not demoting");
+
+  // Int to int: choose closest value in TTo to `from` (avoids UB)
+  from.raw = HWY_MIN(from.raw, LimitsMax<TTo>());
+  return Vec1<TTo>(static_cast<TTo>(from.raw));
+}
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions;
+// use this scalar version to verify the vector implementation.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec1<float> PromoteTo(D /* tag */, const Vec1<float16_t> v) {
+  return Vec1<float>(F32FromF16(v.raw));
+}
+
+template <class D, HWY_IF_F32_D(D)>
+HWY_API Vec1<float> PromoteTo(D d, const Vec1<bfloat16_t> v) {
+  return Set(d, F32FromBF16(v.raw));
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec1<float16_t> DemoteTo(D /* tag */, const Vec1<float> v) {
+  return Vec1<float16_t>(F16FromF32(v.raw));
+}
+
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API Vec1<bfloat16_t> DemoteTo(D d, const Vec1<float> v) {
+  return Set(d, BF16FromF32(v.raw));
+}
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
+          HWY_IF_FLOAT(TFrom)>
+HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
+  static_assert(sizeof(TTo) == sizeof(TFrom), "Should have same size");
+  // float## -> int##: return closest representable value. We cannot exactly
+  // represent LimitsMax<TTo> in TFrom, so use double.
+  const double f = static_cast<double>(from.raw);
+  if (IsInf(from).bits ||
+      Abs(Vec1<double>(f)).raw > static_cast<double>(LimitsMax<TTo>())) {
+    return Vec1<TTo>(detail::SignBit(from.raw) ? LimitsMin<TTo>()
+                                               : LimitsMax<TTo>());
+  }
+  return Vec1<TTo>(static_cast<TTo>(from.raw));
+}
+
+template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
+          HWY_IF_NOT_FLOAT(TFrom)>
+HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
+  static_assert(sizeof(TTo) == sizeof(TFrom), "Should have same size");
+  // int## -> float##: no check needed
+  return Vec1<TTo>(static_cast<TTo>(from.raw));
+}
+
+HWY_API Vec1<uint8_t> U8FromU32(const Vec1<uint32_t> v) {
+  return DemoteTo(Sisd<uint8_t>(), v);
+}
+
+// ------------------------------ TruncateTo
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec1<uint8_t> TruncateTo(D /* tag */, Vec1<uint64_t> v) {
+  return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec1<uint16_t> TruncateTo(D /* tag */, Vec1<uint64_t> v) {
+  return Vec1<uint16_t>{static_cast<uint16_t>(v.raw & 0xFFFF)};
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec1<uint32_t> TruncateTo(D /* tag */, Vec1<uint64_t> v) {
+  return Vec1<uint32_t>{static_cast<uint32_t>(v.raw & 0xFFFFFFFFu)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec1<uint8_t> TruncateTo(D /* tag */, Vec1<uint32_t> v) {
+  return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec1<uint16_t> TruncateTo(D /* tag */, Vec1<uint32_t> v) {
+  return Vec1<uint16_t>{static_cast<uint16_t>(v.raw & 0xFFFF)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec1<uint8_t> TruncateTo(D /* tag */, Vec1<uint16_t> v) {
+  return Vec1<uint8_t>{static_cast<uint8_t>(v.raw & 0xFF)};
+}
+
+// ================================================== COMBINE
+// UpperHalf, ZeroExtendVector, Combine, Concat* are unsupported.
+
+template <typename T>
+HWY_API Vec1<T> LowerHalf(Vec1<T> v) {
+  return v;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> LowerHalf(D /* tag */, Vec1<T> v) {
+  return v;
+}
+
+// ================================================== SWIZZLE
+
+template <typename T>
+HWY_API T GetLane(const Vec1<T> v) {
+  return v.raw;
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec1<T> v, size_t i) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return v.raw;
+}
+
+template <typename T>
+HWY_API Vec1<T> InsertLane(Vec1<T> v, size_t i, T t) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  v.raw = t;
+  return v;
+}
+
+template <typename T>
+HWY_API Vec1<T> DupEven(Vec1<T> v) {
+  return v;
+}
+// DupOdd is unsupported.
+
+template <typename T>
+HWY_API Vec1<T> OddEven(Vec1<T> /* odd */, Vec1<T> even) {
+  return even;
+}
+
+template <typename T>
+HWY_API Vec1<T> OddEvenBlocks(Vec1<T> /* odd */, Vec1<T> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T>
+HWY_API Vec1<T> SwapAdjacentBlocks(Vec1<T> v) {
+  return v;
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T>
+struct Indices1 {
+  MakeSigned<T> raw;
+};
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Indices1<T> IndicesFromVec(D, Vec1<TI> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane size");
+  HWY_DASSERT(vec.raw <= 1);
+  return Indices1<T>{static_cast<MakeSigned<T>>(vec.raw)};
+}
+
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>, typename TI>
+HWY_API Indices1<T> SetTableIndices(D d, const TI* idx) {
+  return IndicesFromVec(d, LoadU(Sisd<TI>(), idx));
+}
+
+template <typename T>
+HWY_API Vec1<T> TableLookupLanes(const Vec1<T> v, const Indices1<T> /* idx */) {
+  return v;
+}
+
+template <typename T>
+HWY_API Vec1<T> TwoTablesLookupLanes(const Vec1<T> a, const Vec1<T> b,
+                                     const Indices1<T> idx) {
+  return (idx.raw == 0) ? a : b;
+}
+
+// ------------------------------ ReverseBlocks
+
+// Single block: no change
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> ReverseBlocks(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+// ------------------------------ Reverse
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> Reverse(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+// Per-target flag to prevent generic_ops-inl.h defining 8-bit Reverse2/4/8.
+#ifdef HWY_NATIVE_REVERSE2_8
+#undef HWY_NATIVE_REVERSE2_8
+#else
+#define HWY_NATIVE_REVERSE2_8
+#endif
+
+// Must not be called:
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> Reverse2(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> Reverse4(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> Reverse8(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+// ------------------------------ ReverseLaneBytes
+
+#ifdef HWY_NATIVE_REVERSE_LANE_BYTES
+#undef HWY_NATIVE_REVERSE_LANE_BYTES
+#else
+#define HWY_NATIVE_REVERSE_LANE_BYTES
+#endif
+
+HWY_API Vec1<uint16_t> ReverseLaneBytes(Vec1<uint16_t> v) {
+  const uint32_t val{v.raw};
+  return Vec1<uint16_t>(
+      static_cast<uint16_t>(((val << 8) & 0xFF00u) | ((val >> 8) & 0x00FFu)));
+}
+
+HWY_API Vec1<uint32_t> ReverseLaneBytes(Vec1<uint32_t> v) {
+  const uint32_t val = v.raw;
+  return Vec1<uint32_t>(static_cast<uint32_t>(
+      ((val << 24) & 0xFF000000u) | ((val << 8) & 0x00FF0000u) |
+      ((val >> 8) & 0x0000FF00u) | ((val >> 24) & 0x000000FFu)));
+}
+
+HWY_API Vec1<uint64_t> ReverseLaneBytes(Vec1<uint64_t> v) {
+  const uint64_t val = v.raw;
+  return Vec1<uint64_t>(static_cast<uint64_t>(
+      ((val << 56) & 0xFF00000000000000u) |
+      ((val << 40) & 0x00FF000000000000u) |
+      ((val << 24) & 0x0000FF0000000000u) | ((val << 8) & 0x000000FF00000000u) |
+      ((val >> 8) & 0x00000000FF000000u) | ((val >> 24) & 0x0000000000FF0000u) |
+      ((val >> 40) & 0x000000000000FF00u) |
+      ((val >> 56) & 0x00000000000000FFu)));
+}
+
+template <class V, HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API V ReverseLaneBytes(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, ReverseLaneBytes(BitCast(du, v)));
+}
+
+// ------------------------------ ReverseBits
+#ifdef HWY_NATIVE_REVERSE_BITS_UI8
+#undef HWY_NATIVE_REVERSE_BITS_UI8
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI8
+#endif
+
+#ifdef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#undef HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI16_32_64
+#endif
+
+namespace detail {
+
+template <class T>
+HWY_INLINE T ReverseBitsOfEachByte(T val) {
+  using TU = MakeUnsigned<T>;
+  constexpr TU kMaxUnsignedVal{LimitsMax<TU>()};
+  constexpr TU kShrMask1 =
+      static_cast<TU>(0x5555555555555555u & kMaxUnsignedVal);
+  constexpr TU kShrMask2 =
+      static_cast<TU>(0x3333333333333333u & kMaxUnsignedVal);
+  constexpr TU kShrMask3 =
+      static_cast<TU>(0x0F0F0F0F0F0F0F0Fu & kMaxUnsignedVal);
+
+  constexpr TU kShlMask1 = static_cast<TU>(~kShrMask1);
+  constexpr TU kShlMask2 = static_cast<TU>(~kShrMask2);
+  constexpr TU kShlMask3 = static_cast<TU>(~kShrMask3);
+
+  TU result = static_cast<TU>(val);
+  result = static_cast<TU>(((result << 1) & kShlMask1) |
+                           ((result >> 1) & kShrMask1));
+  result = static_cast<TU>(((result << 2) & kShlMask2) |
+                           ((result >> 2) & kShrMask2));
+  result = static_cast<TU>(((result << 4) & kShlMask3) |
+                           ((result >> 4) & kShrMask3));
+  return static_cast<T>(result);
+}
+
+}  // namespace detail
+
+template <class V, HWY_IF_UNSIGNED_V(V), HWY_IF_T_SIZE_V(V, 1)>
+HWY_API V ReverseBits(V v) {
+  return V(detail::ReverseBitsOfEachByte(v.raw));
+}
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API V ReverseBits(V v) {
+  return ReverseLaneBytes(V(detail::ReverseBitsOfEachByte(v.raw)));
+}
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V ReverseBits(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, ReverseBits(BitCast(du, v)));
+}
+
+// ------------------------------ SlideUpLanes
+
+template <typename D>
+HWY_API VFromD<D> SlideUpLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+// ------------------------------ SlideDownLanes
+
+template <typename D>
+HWY_API VFromD<D> SlideDownLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+// ================================================== BLOCKWISE
+// Shift*Bytes, CombineShiftRightBytes, Interleave*, Shuffle* are unsupported.
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T>
+HWY_API Vec1<T> Broadcast(const Vec1<T> v) {
+  static_assert(kLane == 0, "Scalar only has one lane");
+  return v;
+}
+
+// ------------------------------ TableLookupBytes, TableLookupBytesOr0
+
+template <typename T, typename TI>
+HWY_API Vec1<TI> TableLookupBytes(const Vec1<T> in, const Vec1<TI> indices) {
+  uint8_t in_bytes[sizeof(T)];
+  uint8_t idx_bytes[sizeof(T)];
+  uint8_t out_bytes[sizeof(T)];
+  CopyBytes<sizeof(T)>(&in, &in_bytes);  // copy to bytes
+  CopyBytes<sizeof(T)>(&indices, &idx_bytes);
+  for (size_t i = 0; i < sizeof(T); ++i) {
+    out_bytes[i] = in_bytes[idx_bytes[i]];
+  }
+  TI out;
+  CopyBytes<sizeof(TI)>(&out_bytes, &out);
+  return Vec1<TI>{out};
+}
+
+template <typename T, typename TI>
+HWY_API Vec1<TI> TableLookupBytesOr0(const Vec1<T> in, const Vec1<TI> indices) {
+  uint8_t in_bytes[sizeof(T)];
+  uint8_t idx_bytes[sizeof(T)];
+  uint8_t out_bytes[sizeof(T)];
+  CopyBytes<sizeof(T)>(&in, &in_bytes);  // copy to bytes
+  CopyBytes<sizeof(T)>(&indices, &idx_bytes);
+  for (size_t i = 0; i < sizeof(T); ++i) {
+    out_bytes[i] = idx_bytes[i] & 0x80 ? 0 : in_bytes[idx_bytes[i]];
+  }
+  TI out;
+  CopyBytes<sizeof(TI)>(&out_bytes, &out);
+  return Vec1<TI>{out};
+}
+
+// ------------------------------ ZipLower
+
+HWY_API Vec1<uint16_t> ZipLower(Vec1<uint8_t> a, Vec1<uint8_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>((uint32_t{b.raw} << 8) + a.raw));
+}
+HWY_API Vec1<uint32_t> ZipLower(Vec1<uint16_t> a, Vec1<uint16_t> b) {
+  return Vec1<uint32_t>((uint32_t{b.raw} << 16) + a.raw);
+}
+HWY_API Vec1<uint64_t> ZipLower(Vec1<uint32_t> a, Vec1<uint32_t> b) {
+  return Vec1<uint64_t>((uint64_t{b.raw} << 32) + a.raw);
+}
+HWY_API Vec1<int16_t> ZipLower(Vec1<int8_t> a, Vec1<int8_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>((int32_t{b.raw} << 8) + a.raw));
+}
+HWY_API Vec1<int32_t> ZipLower(Vec1<int16_t> a, Vec1<int16_t> b) {
+  return Vec1<int32_t>((int32_t{b.raw} << 16) + a.raw);
+}
+HWY_API Vec1<int64_t> ZipLower(Vec1<int32_t> a, Vec1<int32_t> b) {
+  return Vec1<int64_t>((int64_t{b.raw} << 32) + a.raw);
+}
+
+template <class DW, typename TW = TFromD<DW>, typename TN = MakeNarrow<TW>>
+HWY_API Vec1<TW> ZipLower(DW /* tag */, Vec1<TN> a, Vec1<TN> b) {
+  return Vec1<TW>(static_cast<TW>((TW{b.raw} << (sizeof(TN) * 8)) + a.raw));
+}
+
+// ================================================== MASK
+
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllFalse(D /* tag */, const Mask1<T> mask) {
+  return mask.bits == 0;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllTrue(D /* tag */, const Mask1<T> mask) {
+  return mask.bits != 0;
+}
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_API Mask1<T> LoadMaskBits(D /* tag */, const uint8_t* HWY_RESTRICT bits) {
+  return Mask1<T>::FromBool((bits[0] & 1) != 0);
+}
+
+// `p` points to at least 8 writable bytes.
+template <class D, typename T = TFromD<D>>
+HWY_API size_t StoreMaskBits(D d, const Mask1<T> mask, uint8_t* bits) {
+  *bits = AllTrue(d, mask);
+  return 1;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CountTrue(D /* tag */, const Mask1<T> mask) {
+  return mask.bits == 0 ? 0 : 1;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindFirstTrue(D /* tag */, const Mask1<T> mask) {
+  return mask.bits == 0 ? -1 : 0;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, const Mask1<T> /* m */) {
+  return 0;  // There is only one lane and we know it is true.
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindLastTrue(D /* tag */, const Mask1<T> mask) {
+  return mask.bits == 0 ? -1 : 0;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownLastTrue(D /* tag */, const Mask1<T> /* m */) {
+  return 0;  // There is only one lane and we know it is true.
+}
+
+// ------------------------------ Compress, CompressBits
+
+template <typename T>
+struct CompressIsPartition {
+  enum { value = 1 };
+};
+
+template <typename T>
+HWY_API Vec1<T> Compress(Vec1<T> v, const Mask1<T> /* mask */) {
+  // A single lane is already partitioned by definition.
+  return v;
+}
+
+template <typename T>
+HWY_API Vec1<T> CompressNot(Vec1<T> v, const Mask1<T> /* mask */) {
+  // A single lane is already partitioned by definition.
+  return v;
+}
+
+// ------------------------------ CompressStore
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressStore(Vec1<T> v, const Mask1<T> mask, D d,
+                             T* HWY_RESTRICT unaligned) {
+  StoreU(Compress(v, mask), d, unaligned);
+  return CountTrue(d, mask);
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressBlendedStore(Vec1<T> v, const Mask1<T> mask, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  if (!mask.bits) return 0;
+  StoreU(v, d, unaligned);
+  return 1;
+}
+
+// ------------------------------ CompressBits
+template <typename T>
+HWY_API Vec1<T> CompressBits(Vec1<T> v, const uint8_t* HWY_RESTRICT /*bits*/) {
+  return v;
+}
+
+// ------------------------------ CompressBitsStore
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressBitsStore(Vec1<T> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, T* HWY_RESTRICT unaligned) {
+  const Mask1<T> mask = LoadMaskBits(d, bits);
+  StoreU(Compress(v, mask), d, unaligned);
+  return CountTrue(d, mask);
+}
+
+// ------------------------------ Expand
+
+// generic_ops-inl.h requires Vec64/128, so implement [Load]Expand here.
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+template <typename T>
+HWY_API Vec1<T> Expand(Vec1<T> v, const Mask1<T> mask) {
+  return IfThenElseZero(mask, v);
+}
+
+// ------------------------------ LoadExpand
+template <class D>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  return MaskedLoad(mask, d, unaligned);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+template <class D32, HWY_IF_F32_D(D32)>
+HWY_API Vec1<float> WidenMulPairwiseAdd(D32 /* tag */, Vec1<bfloat16_t> a,
+                                        Vec1<bfloat16_t> b) {
+  return Vec1<float>(F32FromBF16(a.raw)) * Vec1<float>(F32FromBF16(b.raw));
+}
+
+template <class D32, HWY_IF_I32_D(D32)>
+HWY_API Vec1<int32_t> WidenMulPairwiseAdd(D32 /* tag */, Vec1<int16_t> a,
+                                          Vec1<int16_t> b) {
+  return Vec1<int32_t>(a.raw * b.raw);
+}
+
+// ------------------------------ SatWidenMulPairwiseAdd
+
+#ifdef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#undef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#else
+#define HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#endif
+
+template <class DI16, HWY_IF_I16_D(DI16)>
+HWY_API Vec1<int16_t> SatWidenMulPairwiseAdd(DI16 /* tag */, Vec1<uint8_t> a,
+                                             Vec1<int8_t> b) {
+  // Saturation of a.raw * b.raw is not needed on the HWY_SCALAR target as the
+  // input vectors only have 1 lane on the HWY_SCALAR target and as
+  // a.raw * b.raw is between -32640 and 32385, which is already within the
+  // range of an int16_t.
+
+  // On other targets, a saturated addition of a[0]*b[0] + a[1]*b[1] is needed
+  // as it is possible for the addition of a[0]*b[0] + a[1]*b[1] to overflow if
+  // a[0], a[1], b[0], and b[1] are all non-zero and b[0] and b[1] both have the
+  // same sign.
+
+  return Vec1<int16_t>(static_cast<int16_t>(a.raw) *
+                       static_cast<int16_t>(b.raw));
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+template <class D32, HWY_IF_F32_D(D32)>
+HWY_API Vec1<float> ReorderWidenMulAccumulate(D32 /* tag */, Vec1<bfloat16_t> a,
+                                              Vec1<bfloat16_t> b,
+                                              const Vec1<float> sum0,
+                                              Vec1<float>& /* sum1 */) {
+  return MulAdd(Vec1<float>(F32FromBF16(a.raw)),
+                Vec1<float>(F32FromBF16(b.raw)), sum0);
+}
+
+template <class D32, HWY_IF_I32_D(D32)>
+HWY_API Vec1<int32_t> ReorderWidenMulAccumulate(D32 /* tag */, Vec1<int16_t> a,
+                                                Vec1<int16_t> b,
+                                                const Vec1<int32_t> sum0,
+                                                Vec1<int32_t>& /* sum1 */) {
+  return Vec1<int32_t>(a.raw * b.raw + sum0.raw);
+}
+
+template <class DU32, HWY_IF_U32_D(DU32)>
+HWY_API Vec1<uint32_t> ReorderWidenMulAccumulate(DU32 /* tag */,
+                                                 Vec1<uint16_t> a,
+                                                 Vec1<uint16_t> b,
+                                                 const Vec1<uint32_t> sum0,
+                                                 Vec1<uint32_t>& /* sum1 */) {
+  return Vec1<uint32_t>(static_cast<uint32_t>(a.raw) * b.raw + sum0.raw);
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <typename TW>
+HWY_API Vec1<TW> RearrangeToOddPlusEven(Vec1<TW> sum0, Vec1<TW> /* sum1 */) {
+  return sum0;  // invariant already holds
+}
+
+// ================================================== REDUCTIONS
+
+// Sum of all lanes, i.e. the only one.
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> SumOfLanes(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+template <class D, typename T = TFromD<D>>
+HWY_API T ReduceSum(D /* tag */, const Vec1<T> v) {
+  return GetLane(v);
+}
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> MinOfLanes(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+template <class D, typename T = TFromD<D>>
+HWY_API Vec1<T> MaxOfLanes(D /* tag */, const Vec1<T> v) {
+  return v;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/set_macros-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/set_macros-inl.h
new file mode 100644
index 0000000000..d8bed3e291
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/set_macros-inl.h
@@ -0,0 +1,578 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Sets macros based on HWY_TARGET.
+
+// This include guard is toggled by foreach_target, so avoid the usual _H_
+// suffix to prevent copybara from renaming it.
+#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
+#ifdef HWY_SET_MACROS_PER_TARGET
+#undef HWY_SET_MACROS_PER_TARGET
+#else
+#define HWY_SET_MACROS_PER_TARGET
+#endif
+
+#endif  // HWY_SET_MACROS_PER_TARGET
+
+#include "hwy/detect_compiler_arch.h"  // IWYU: export
+#include "hwy/detect_targets.h"        // IWYU: export
+
+#undef HWY_NAMESPACE
+#undef HWY_ALIGN
+#undef HWY_MAX_BYTES
+#undef HWY_LANES
+
+#undef HWY_HAVE_SCALABLE
+#undef HWY_HAVE_TUPLE
+#undef HWY_HAVE_INTEGER64
+#undef HWY_HAVE_FLOAT16
+#undef HWY_HAVE_FLOAT64
+#undef HWY_MEM_OPS_MIGHT_FAULT
+#undef HWY_NATIVE_FMA
+#undef HWY_CAP_GE256
+#undef HWY_CAP_GE512
+
+// Supported on all targets except RVV (requires GCC 14 or upcoming Clang)
+#if HWY_TARGET == HWY_RVV &&                                        \
+    ((HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
+     (HWY_COMPILER_CLANG))
+#define HWY_HAVE_TUPLE 0
+#else
+#define HWY_HAVE_TUPLE 1
+#endif
+
+// For internal use (clamping/validating N for Simd<>)
+#undef HWY_MAX_N
+#if HWY_TARGET == HWY_SCALAR
+#define HWY_MAX_N 1
+#else
+#define HWY_MAX_N 65536
+#endif
+
+// For internal use (clamping kPow2 for Simd<>)
+#undef HWY_MAX_POW2
+// For HWY_TARGET == HWY_RVV, LMUL <= 8. Even on other targets, we want to
+// support say Rebind<uint64_t, Simd<uint8_t, 1, 0>> d; whose kPow2 is also 3.
+// However, those other targets do not actually support multiple vectors, and
+// thus Lanes(d) must not exceed Lanes(ScalableTag<T>()).
+#define HWY_MAX_POW2 3
+
+// User-visible. Loose lower bound that guarantees HWY_MAX_BYTES >>
+// (-HWY_MIN_POW2) <= 1. Useful for terminating compile-time recursions.
+#undef HWY_MIN_POW2
+#if HWY_TARGET == HWY_RVV
+#define HWY_MIN_POW2 -16
+#else
+// Tighter bound for other targets, whose vectors are smaller, to potentially
+// save compile time.
+#define HWY_MIN_POW2 -8
+#endif  // HWY_TARGET == HWY_RVV
+
+#undef HWY_TARGET_STR
+
+#if defined(HWY_DISABLE_PCLMUL_AES)
+#define HWY_TARGET_STR_PCLMUL_AES ""
+#else
+#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
+#endif
+
+#if defined(HWY_DISABLE_BMI2_FMA)
+#define HWY_TARGET_STR_BMI2_FMA ""
+#else
+#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
+#endif
+
+#if defined(HWY_DISABLE_F16C)
+#define HWY_TARGET_STR_F16C ""
+#else
+#define HWY_TARGET_STR_F16C ",f16c"
+#endif
+
+#define HWY_TARGET_STR_SSE2 "sse2"
+
+#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
+
+#define HWY_TARGET_STR_SSE4 \
+  HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
+// Include previous targets, which are the half-vectors of the next target.
+#define HWY_TARGET_STR_AVX2 \
+  HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
+#define HWY_TARGET_STR_AVX3 \
+  HWY_TARGET_STR_AVX2 ",avx512f,avx512cd,avx512vl,avx512dq,avx512bw"
+#define HWY_TARGET_STR_AVX3_DL                                       \
+  HWY_TARGET_STR_AVX3                                                \
+  ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avx512vnni,avx512bitalg," \
+  "avx512vpopcntdq,gfni"
+
+#define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_DL ",avx512fp16"
+
+#if defined(HWY_DISABLE_PPC8_CRYPTO)
+#define HWY_TARGET_STR_PPC8_CRYPTO ""
+#else
+#define HWY_TARGET_STR_PPC8_CRYPTO ",crypto"
+#endif
+
+#define HWY_TARGET_STR_PPC8 \
+  "altivec,vsx,power8-vector" HWY_TARGET_STR_PPC8_CRYPTO
+#define HWY_TARGET_STR_PPC9 HWY_TARGET_STR_PPC8 ",power9-vector"
+
+#if HWY_COMPILER_CLANG
+#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",power10-vector"
+#else
+#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10"
+#endif
+
+// Before include guard so we redefine HWY_TARGET_STR on each include,
+// governed by the current HWY_TARGET.
+
+//-----------------------------------------------------------------------------
+// SSE2
+#if HWY_TARGET == HWY_SSE2
+
+#define HWY_NAMESPACE N_SSE2
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_TARGET_STR HWY_TARGET_STR_SSE2
+//-----------------------------------------------------------------------------
+// SSSE3
+#elif HWY_TARGET == HWY_SSSE3
+
+#define HWY_NAMESPACE N_SSSE3
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
+
+//-----------------------------------------------------------------------------
+// SSE4
+#elif HWY_TARGET == HWY_SSE4
+
+#define HWY_NAMESPACE N_SSE4
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
+
+//-----------------------------------------------------------------------------
+// AVX2
+#elif HWY_TARGET == HWY_AVX2
+
+#define HWY_NAMESPACE N_AVX2
+#define HWY_ALIGN alignas(32)
+#define HWY_MAX_BYTES 32
+#define HWY_LANES(T) (32 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+
+#ifdef HWY_DISABLE_BMI2_FMA
+#define HWY_NATIVE_FMA 0
+#else
+#define HWY_NATIVE_FMA 1
+#endif
+
+#define HWY_CAP_GE256 1
+#define HWY_CAP_GE512 0
+
+#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
+
+//-----------------------------------------------------------------------------
+// AVX3[_DL]
+#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
+    HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
+
+#define HWY_ALIGN alignas(64)
+#define HWY_MAX_BYTES 64
+#define HWY_LANES(T) (64 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#if (HWY_TARGET == HWY_AVX3_SPR) && 0  // TODO(janwas): enable after testing
+#define HWY_HAVE_FLOAT16 1
+#else
+#define HWY_HAVE_FLOAT16 0
+#endif
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 0
+#define HWY_NATIVE_FMA 1
+#define HWY_CAP_GE256 1
+#define HWY_CAP_GE512 1
+
+#if HWY_TARGET == HWY_AVX3
+
+#define HWY_NAMESPACE N_AVX3
+#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
+
+#elif HWY_TARGET == HWY_AVX3_DL
+
+#define HWY_NAMESPACE N_AVX3_DL
+#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
+
+#elif HWY_TARGET == HWY_AVX3_ZEN4
+
+#define HWY_NAMESPACE N_AVX3_ZEN4
+// Currently the same as HWY_AVX3_DL: both support Icelake.
+#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
+
+#elif HWY_TARGET == HWY_AVX3_SPR
+
+#define HWY_NAMESPACE N_AVX3_SPR
+#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_SPR
+
+#else
+#error "Logic error"
+#endif  // HWY_TARGET
+
+//-----------------------------------------------------------------------------
+// PPC8, PPC9, PPC10
+#elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
+    HWY_TARGET == HWY_PPC10
+
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if HWY_TARGET == HWY_PPC8
+
+#define HWY_NAMESPACE N_PPC8
+#define HWY_TARGET_STR HWY_TARGET_STR_PPC8
+
+#elif HWY_TARGET == HWY_PPC9
+
+#define HWY_NAMESPACE N_PPC9
+#define HWY_TARGET_STR HWY_TARGET_STR_PPC9
+
+#elif HWY_TARGET == HWY_PPC10
+
+#define HWY_NAMESPACE N_PPC10
+#define HWY_TARGET_STR HWY_TARGET_STR_PPC10
+
+#else
+#error "Logic error"
+#endif  // HWY_TARGET == HWY_PPC10
+
+//-----------------------------------------------------------------------------
+// NEON
+#elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
+
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+#define HWY_HAVE_FLOAT16 1
+#else
+#define HWY_HAVE_FLOAT16 0
+#endif
+
+#if HWY_ARCH_ARM_A64
+#define HWY_HAVE_FLOAT64 1
+#else
+#define HWY_HAVE_FLOAT64 0
+#endif
+
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+
+#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
+#define HWY_NATIVE_FMA 1
+#else
+#define HWY_NATIVE_FMA 0
+#endif
+
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if HWY_TARGET == HWY_NEON_WITHOUT_AES
+#define HWY_NAMESPACE N_NEON_WITHOUT_AES
+#else
+#define HWY_NAMESPACE N_NEON
+#endif
+
+// Can use pragmas instead of -march compiler flag
+#if HWY_HAVE_RUNTIME_DISPATCH
+#if HWY_ARCH_ARM_V7
+
+// The __attribute__((target(+neon-vfpv4)) was introduced in gcc >= 8.
+#if HWY_COMPILER_GCC_ACTUAL >= 800
+#define HWY_TARGET_STR "+neon-vfpv4"
+#else   // GCC < 7
+// Do not define HWY_TARGET_STR (no pragma).
+#endif  // HWY_COMPILER_GCC_ACTUAL
+
+#else  // !HWY_ARCH_ARM_V7
+
+#if HWY_TARGET == HWY_NEON_WITHOUT_AES
+// Do not define HWY_TARGET_STR (no pragma).
+#else
+#define HWY_TARGET_STR "+crypto"
+#endif  // HWY_TARGET == HWY_NEON_WITHOUT_AES
+
+#endif  // HWY_ARCH_ARM_V7
+#else   // !HWY_HAVE_RUNTIME_DISPATCH
+// HWY_TARGET_STR remains undefined
+#endif
+
+//-----------------------------------------------------------------------------
+// SVE[2]
+#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
+    HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
+
+// SVE only requires lane alignment, not natural alignment of the entire vector.
+#define HWY_ALIGN alignas(8)
+
+// Value ensures MaxLanes() is the tightest possible upper bound to reduce
+// overallocation.
+#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
+
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 0
+#define HWY_NATIVE_FMA 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if HWY_TARGET == HWY_SVE2
+#define HWY_NAMESPACE N_SVE2
+#define HWY_MAX_BYTES 256
+#define HWY_HAVE_SCALABLE 1
+#elif HWY_TARGET == HWY_SVE_256
+#define HWY_NAMESPACE N_SVE_256
+#define HWY_MAX_BYTES 32
+#define HWY_HAVE_SCALABLE 0
+#elif HWY_TARGET == HWY_SVE2_128
+#define HWY_NAMESPACE N_SVE2_128
+#define HWY_MAX_BYTES 16
+#define HWY_HAVE_SCALABLE 0
+#else
+#define HWY_NAMESPACE N_SVE
+#define HWY_MAX_BYTES 256
+#define HWY_HAVE_SCALABLE 1
+#endif
+
+// Can use pragmas instead of -march compiler flag
+#if HWY_HAVE_RUNTIME_DISPATCH
+#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
+#define HWY_TARGET_STR "+sve2-aes"
+#else
+#define HWY_TARGET_STR "+sve"
+#endif
+#else
+// HWY_TARGET_STR remains undefined
+#endif
+
+//-----------------------------------------------------------------------------
+// WASM
+#elif HWY_TARGET == HWY_WASM
+
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_WASM
+
+#define HWY_TARGET_STR "simd128"
+
+//-----------------------------------------------------------------------------
+// WASM_EMU256
+#elif HWY_TARGET == HWY_WASM_EMU256
+
+#define HWY_ALIGN alignas(32)
+#define HWY_MAX_BYTES 32
+#define HWY_LANES(T) (32 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 0
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 1
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_WASM_EMU256
+
+#define HWY_TARGET_STR "simd128"
+
+//-----------------------------------------------------------------------------
+// RVV
+#elif HWY_TARGET == HWY_RVV
+
+// RVV only requires lane alignment, not natural alignment of the entire vector,
+// and the compiler already aligns builtin types, so nothing to do here.
+#define HWY_ALIGN
+
+// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
+#define HWY_MAX_BYTES 65536
+
+// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
+// LMUL. This is the tightest possible upper bound.
+#define HWY_LANES(T) (8192 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 1
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 0
+#define HWY_NATIVE_FMA 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if defined(__riscv_zvfh)
+#define HWY_HAVE_FLOAT16 1
+#else
+#define HWY_HAVE_FLOAT16 0
+#endif
+
+#define HWY_NAMESPACE N_RVV
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+// (rv64gcv is not a valid target)
+
+//-----------------------------------------------------------------------------
+// EMU128
+#elif HWY_TARGET == HWY_EMU128
+
+#define HWY_ALIGN alignas(16)
+#define HWY_MAX_BYTES 16
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_EMU128
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+
+//-----------------------------------------------------------------------------
+// SCALAR
+#elif HWY_TARGET == HWY_SCALAR
+
+#define HWY_ALIGN
+#define HWY_MAX_BYTES 8
+#define HWY_LANES(T) 1
+
+#define HWY_HAVE_SCALABLE 0
+#define HWY_HAVE_INTEGER64 1
+#define HWY_HAVE_FLOAT16 0
+#define HWY_HAVE_FLOAT64 1
+#define HWY_MEM_OPS_MIGHT_FAULT 0
+#define HWY_NATIVE_FMA 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_SCALAR
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+
+#else
+#pragma message("HWY_TARGET does not match any known target")
+#endif  // HWY_TARGET
+
+// Override this to 1 in asan/msan builds, which will still fault.
+#if HWY_IS_ASAN || HWY_IS_MSAN
+#undef HWY_MEM_OPS_MIGHT_FAULT
+#define HWY_MEM_OPS_MIGHT_FAULT 1
+#endif
+
+// Clang <9 requires this be invoked at file scope, before any namespace.
+#undef HWY_BEFORE_NAMESPACE
+#if defined(HWY_TARGET_STR)
+#define HWY_BEFORE_NAMESPACE()        \
+  HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
+  static_assert(true, "For requiring trailing semicolon")
+#else
+// avoids compiler warning if no HWY_TARGET_STR
+#define HWY_BEFORE_NAMESPACE() \
+  static_assert(true, "For requiring trailing semicolon")
+#endif
+
+// Clang <9 requires any namespaces be closed before this macro.
+#undef HWY_AFTER_NAMESPACE
+#if defined(HWY_TARGET_STR)
+#define HWY_AFTER_NAMESPACE() \
+  HWY_POP_ATTRIBUTES          \
+  static_assert(true, "For requiring trailing semicolon")
+#else
+// avoids compiler warning if no HWY_TARGET_STR
+#define HWY_AFTER_NAMESPACE() \
+  static_assert(true, "For requiring trailing semicolon")
+#endif
+
+#undef HWY_ATTR
+#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
+#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
+#else
+#define HWY_ATTR
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/shared-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/shared-inl.h
new file mode 100644
index 0000000000..8b4ec41a4a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/shared-inl.h
@@ -0,0 +1,520 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target definitions shared by ops/*.h and user code.
+
+// IWYU pragma: begin_exports
+// Export does not seem to be recursive, so re-export these (also in base.h)
+#include <stddef.h>
+
+#include "hwy/base.h"
+// "IWYU pragma: keep" does not work for this include, so hide it from the IDE.
+#if !HWY_IDE
+#include <stdint.h>
+#endif
+
+#include "hwy/detect_compiler_arch.h"
+
+// Separate header because foreach_target.h re-enables its include guard.
+#include "hwy/ops/set_macros-inl.h"
+
+// IWYU pragma: end_exports
+
+#if HWY_IS_MSAN
+#include <sanitizer/msan_interface.h>
+#endif
+
+// We are covered by the highway.h include guard, but generic_ops-inl.h
+// includes this again #if HWY_IDE.
+#if defined(HIGHWAY_HWY_OPS_SHARED_TOGGLE) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_OPS_SHARED_TOGGLE
+#undef HIGHWAY_HWY_OPS_SHARED_TOGGLE
+#else
+#define HIGHWAY_HWY_OPS_SHARED_TOGGLE
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// NOTE: GCC generates incorrect code for vector arguments to non-inlined
+// functions in two situations:
+// - on Windows and GCC 10.3, passing by value crashes due to unaligned loads:
+//   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412.
+// - on aarch64 and GCC 9.3.0 or 11.2.1, passing by value causes many (but not
+//   all) tests to fail.
+//
+// We therefore pass by const& only on GCC and (Windows or aarch64). This alias
+// must be used for all vector/mask parameters of functions marked HWY_NOINLINE,
+// and possibly also other functions that are not inlined.
+#if HWY_COMPILER_GCC_ACTUAL && (HWY_OS_WIN || HWY_ARCH_ARM_A64)
+template <class V>
+using VecArg = const V&;
+#else
+template <class V>
+using VecArg = V;
+#endif
+
+namespace detail {
+
+// Returns N * 2^pow2. N is the number of lanes in a full vector and pow2 the
+// desired fraction or multiple of it, see Simd<>. `pow2` is most often in
+// [-3, 3] but can also be lower for user-specified fractions.
+constexpr size_t ScaleByPower(size_t N, int pow2) {
+  return pow2 >= 0 ? (N << pow2) : (N >> (-pow2));
+}
+
+template <typename T>
+HWY_INLINE void MaybeUnpoison(T* HWY_RESTRICT unaligned, size_t count) {
+  // Workaround for MSAN not marking compressstore as initialized (b/233326619)
+#if HWY_IS_MSAN
+  __msan_unpoison(unaligned, count * sizeof(T));
+#else
+  (void)unaligned;
+  (void)count;
+#endif
+}
+
+}  // namespace detail
+
+// Highway operations are implemented as overloaded functions selected using a
+// zero-sized tag type D := Simd<T, N, kPow2>. T denotes the lane type.
+//
+// N defines how many lanes are in a 'full' vector, typically equal to
+// HWY_LANES(T) (which is the actual count on targets with vectors of known
+// size, and an upper bound in case of scalable vectors), otherwise a
+// user-specified limit at most that large.
+//
+// 2^kPow2 is a _subsequently_ applied scaling factor that indicates the
+// desired fraction of a 'full' vector: 0 means full, -1 means half; 1,2,3
+// means two/four/eight full vectors ganged together. The largest supported
+// kPow2 is `HWY_MAX_POW2` and the aliases below take care of clamping
+// user-specified values to that. Note that `Simd<T, 1, 0>` and `Simd<T, 2, -1>`
+// have the same `MaxLanes` and `Lanes`.
+//
+// We can theoretically keep halving Lanes(), but recursive instantiations of
+// kPow2 - 1 will eventually fail e.g. because -64 is not a valid shift count.
+// Users must terminate such compile-time recursions at or above HWY_MIN_POW2.
+//
+// WARNING: do not use N directly because it may be a special representation of
+// a fractional MaxLanes. This arises when we Rebind Simd<uint8_t, 1, 0> to
+// Simd<uint32_t, ??, 2>. RVV requires that the last argument (kPow2) be two,
+// but we want MaxLanes to be the same in both cases. Hence ?? is a
+// fixed-point encoding of 1/4.
+//
+// Instead of referring to Simd<> directly, users create D via aliases:
+// - ScalableTag<T> for a full vector;
+// - ScalableTag<T, kPow2>() for a fraction/group, where `kPow2` is
+//   interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`;
+// - CappedTag<T, kLimit> for a vector with up to kLimit lanes; or
+// - FixedTag<T, kNumLanes> for a vector with exactly kNumLanes lanes.
+//
+// Instead of N, use Lanes(D()) for the actual number of lanes at runtime and
+// D().MaxLanes() for a constexpr upper bound. Both are powers of two.
+template <typename Lane, size_t N, int kPow2>
+struct Simd {
+  constexpr Simd() = default;
+  using T = Lane;
+
+ private:
+  static_assert(sizeof(Lane) <= 8, "Lanes are up to 64-bit");
+  // 20 bits are sufficient for any HWY_MAX_BYTES. This is the 'normal' value of
+  // N when kFrac == 0, otherwise it is one (see FracN).
+  static constexpr size_t kWhole = N & 0xFFFFF;
+  // Fractional part is in the bits above kWhole.
+  static constexpr int kFrac = static_cast<int>(N >> 20);
+  // Can be 8x larger because kPow2 may be as low as -3 (Rebind of a larger
+  // type to u8 results in fractions).
+  static_assert(kWhole <= 8 * HWY_MAX_N && kFrac <= 3, "Out of range");
+  static_assert(kFrac == 0 || kWhole == 1, "If frac, whole must be 1");
+  static_assert((kWhole & (kWhole - 1)) == 0 && kWhole != 0, "Not 2^x");
+  // Important to check this here because kPow2 <= -64 causes confusing
+  // compile errors (invalid shift count).
+  static_assert(kPow2 >= HWY_MIN_POW2, "Forgot kPow2 recursion terminator?");
+  // However, do NOT verify kPow2 <= HWY_MAX_POW2 - users should be able to
+  // Rebind<uint64_t, ScalableTag<uint8_t, 3>> in order to discover that its
+  // kPow2 is out of bounds.
+
+ public:
+  // Upper bound on the number of lanes (tight if !HWY_HAVE_SCALABLE). In the
+  // common case, N == kWhole, but if kFrac is nonzero, we deduct it from kPow2.
+  // E.g. Rebind<uint32_t, Simd<uint8_t, 1, 0>> is Simd<uint32_t, 0x200001, 2>.
+  // The resulting number of lanes is still 1 because this N represents 1/4
+  // (the ratio of the sizes). Note that RVV requires kPow2 to be the ratio of
+  // the sizes so that the correct LMUL overloads are chosen, even if N is
+  // small enough that it would fit in an LMUL=1 vector.
+  //
+  // Cannot be an enum because GCC warns when using enums and non-enums in the
+  // same expression. Cannot be a static constexpr function (MSVC limitation).
+  // Rounded up to one so this is a valid array length.
+  //
+  // Do not use this directly - only 'public' so it is visible from the accessor
+  // macro required by MSVC.
+  static constexpr size_t kPrivateLanes =
+      HWY_MAX(size_t{1}, detail::ScaleByPower(kWhole, kPow2 - kFrac));
+
+  constexpr size_t MaxLanes() const { return kPrivateLanes; }
+  constexpr size_t MaxBytes() const { return kPrivateLanes * sizeof(Lane); }
+  constexpr size_t MaxBlocks() const { return (MaxBytes() + 15) / 16; }
+  // For SFINAE on RVV.
+  constexpr int Pow2() const { return kPow2; }
+
+  // ------------------------------ Changing lane type or count
+  // Do not use any of these directly. Anything used from member typedefs cannot
+  // be made private, but functions only used within other functions can.
+
+  // Returns number of NewT lanes that fit within MaxBytes().
+  template <typename NewT>
+  static constexpr size_t RepartitionLanes() {
+    // Round up to correctly handle larger NewT.
+    return (kPrivateLanes * sizeof(T) + sizeof(NewT) - 1) / sizeof(NewT);
+  }
+
+  // Returns the new kPow2 required for lanes of type NewT.
+  template <typename NewT>
+  static constexpr int RebindPow2() {
+    return kPow2 +
+           ((sizeof(NewT) >= sizeof(T))
+                ? static_cast<int>(CeilLog2(sizeof(NewT) / sizeof(T)))
+                : -static_cast<int>(CeilLog2(sizeof(T) / sizeof(NewT))));
+  }
+
+ private:
+  // Returns 0 or whole NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
+  template <int kNewPow2, size_t kNewMaxLanes>
+  static constexpr size_t WholeN() {
+    return detail::ScaleByPower(kNewMaxLanes, -kNewPow2);
+  }
+
+  // Returns fractional NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
+  template <int kNewPow2, size_t kNewMaxLanes>
+  static constexpr size_t FracN() {
+    // Only reached if kNewPow2 > CeilLog2(kNewMaxLanes) >= 0 (else WholeN
+    // would not have been zero), but clamp to zero to avoid warnings. kFrac is
+    // the difference, stored in the upper bits of N, and we also set kWhole =
+    // 1 so that the new kPrivateLanes = kNewMaxLanes.
+    static_assert(HWY_MAX_N <= (size_t{1} << 20), "Change bit shift");
+    return static_cast<size_t>(
+        1 + (HWY_MAX(0, kNewPow2 - static_cast<int>(CeilLog2(kNewMaxLanes)))
+             << 20));
+  }
+
+ public:
+  // Returns (whole or fractional) NewN, see above.
+  template <int kNewPow2, size_t kNewMaxLanes>
+  static constexpr size_t NewN() {
+    // We require a fraction if inverting kNewPow2 results in 0.
+    return WholeN<kNewPow2, kNewMaxLanes>() == 0
+               ? FracN<kNewPow2, kNewMaxLanes>()
+               : WholeN<kNewPow2, kNewMaxLanes>();
+  }
+
+  // PromoteTo/DemoteTo() with another lane type, but same number of lanes.
+  template <typename NewT>
+  using Rebind =
+      Simd<NewT, NewN<RebindPow2<NewT>(), kPrivateLanes>(), RebindPow2<NewT>()>;
+
+  // Change lane type while keeping the same vector size, e.g. for MulEven.
+  template <typename NewT>
+  using Repartition =
+      Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>;
+
+  // Half the lanes while keeping the same lane type, e.g. for LowerHalf.
+  using Half = Simd<T, N, kPow2 - 1>;
+
+  // Twice the lanes while keeping the same lane type, e.g. for Combine.
+  using Twice = Simd<T, N, kPow2 + 1>;
+};
+
+namespace detail {
+
+template <typename T, size_t N, int kPow2>
+constexpr bool IsFull(Simd<T, N, kPow2> /* d */) {
+  return N == HWY_LANES(T) && kPow2 == 0;
+}
+
+// Struct wrappers enable validation of arguments via static_assert.
+template <typename T, size_t N, int kPow2>
+struct ClampNAndPow2 {
+  using type = Simd<T, HWY_MIN(N, HWY_MAX_N), HWY_MIN(kPow2, HWY_MAX_POW2)>;
+};
+
+template <typename T, int kPow2>
+struct ScalableTagChecker {
+  using type = typename ClampNAndPow2<T, HWY_LANES(T), kPow2>::type;
+};
+
+template <typename T, size_t kLimit, int kPow2>
+struct CappedTagChecker {
+  static_assert(kLimit != 0, "Does not make sense to have zero lanes");
+  // Safely handle non-power-of-two inputs by rounding down, which is allowed by
+  // CappedTag. Otherwise, Simd<T, 3, 0> would static_assert.
+  static constexpr size_t kLimitPow2 = size_t{1} << hwy::FloorLog2(kLimit);
+  static constexpr size_t N = HWY_MIN(kLimitPow2, HWY_LANES(T));
+  using type = typename ClampNAndPow2<T, N, kPow2>::type;
+};
+
+template <typename T, size_t kNumLanes>
+struct FixedTagChecker {
+  static_assert(kNumLanes != 0, "Does not make sense to have zero lanes");
+  static_assert(kNumLanes <= HWY_LANES(T), "Too many lanes");
+  using type = Simd<T, kNumLanes, 0>;
+};
+
+}  // namespace detail
+
+// ------------------------------ Aliases for Simd<>
+
+// Tag describing a full vector (kPow2 == 0: the most common usage, e.g. 1D
+// loops where the application does not care about the vector size) or a
+// fraction/multiple of one. Fractions (kPow2 < 0) are useful for arguments or
+// return values of type promotion and demotion. User-specified kPow2 is
+// interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
+template <typename T, int kPow2 = 0>
+using ScalableTag = typename detail::ScalableTagChecker<T, kPow2>::type;
+
+// Tag describing a vector with *up to* kLimit active lanes, even on targets
+// with scalable vectors and HWY_SCALAR. The runtime lane count `Lanes(tag)` may
+// be less than kLimit, and is 1 on HWY_SCALAR. This alias is typically used for
+// 1D loops with a relatively low application-defined upper bound, e.g. for 8x8
+// DCTs. However, it is better if data structures are designed to be
+// vector-length-agnostic (e.g. a hybrid SoA where there are chunks of `M >=
+// MaxLanes(d)` DC components followed by M AC1, .., and M AC63; this would
+// enable vector-length-agnostic loops using ScalableTag). User-specified kPow2
+// is interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
+template <typename T, size_t kLimit, int kPow2 = 0>
+using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type;
+
+#if !HWY_HAVE_SCALABLE
+// If the vector size is known, and the app knows it does not want more than
+// kLimit lanes, then capping can be beneficial. For example, AVX-512 has lower
+// IPC and potentially higher costs for unaligned load/store vs. 256-bit AVX2.
+template <typename T, size_t kLimit, int kPow2 = 0>
+using CappedTagIfFixed = CappedTag<T, kLimit, kPow2>;
+#else  // HWY_HAVE_SCALABLE
+// .. whereas on RVV/SVE, the cost of clamping Lanes() may exceed the benefit.
+template <typename T, size_t kLimit, int kPow2 = 0>
+using CappedTagIfFixed = ScalableTag<T, kPow2>;
+#endif
+
+// Alias for a tag describing a vector with *exactly* kNumLanes active lanes,
+// even on targets with scalable vectors. Requires `kNumLanes` to be a power of
+// two not exceeding `HWY_LANES(T)`.
+//
+// NOTE: if the application does not need to support HWY_SCALAR (+), use this
+// instead of CappedTag to emphasize that there will be exactly kNumLanes lanes.
+// This is useful for data structures that rely on exactly 128-bit SIMD, but
+// these are discouraged because they cannot benefit from wider vectors.
+// Instead, applications would ideally define a larger problem size and loop
+// over it with the (unknown size) vectors from ScalableTag.
+//
+// + e.g. if the baseline is known to support SIMD, or the application requires
+//   ops such as TableLookupBytes not supported by HWY_SCALAR.
+template <typename T, size_t kNumLanes>
+using FixedTag = typename detail::FixedTagChecker<T, kNumLanes>::type;
+
+// Convenience form for fixed sizes.
+template <typename T>
+using Full16 = Simd<T, 2 / sizeof(T), 0>;
+
+template <typename T>
+using Full32 = Simd<T, 4 / sizeof(T), 0>;
+
+template <typename T>
+using Full64 = Simd<T, 8 / sizeof(T), 0>;
+
+template <typename T>
+using Full128 = Simd<T, 16 / sizeof(T), 0>;
+
+// ------------------------------ Accessors for Simd<>
+
+// Lane type.
+template <class D>
+using TFromD = typename D::T;
+
+// Upper bound on the number of lanes, typically used for SFINAE conditions and
+// to allocate storage for targets with known vector sizes. Note: this may be a
+// loose bound, instead use Lanes() as the actual size for AllocateAligned.
+// MSVC workaround: use static constant directly instead of a function.
+#define HWY_MAX_LANES_D(D) D::kPrivateLanes
+
+// Non-macro form of HWY_MAX_LANES_D in case that is preferable. WARNING: the
+// macro form may be required for MSVC, which has limitations on deducing
+// arguments.
+template <class D>
+HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D) {
+  return HWY_MAX_LANES_D(D);
+}
+
+#if !HWY_HAVE_SCALABLE
+
+// If non-scalable, this is constexpr; otherwise the target's header defines a
+// non-constexpr version of this function. This is the actual vector length,
+// used when advancing loop counters.
+template <class D>
+HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t Lanes(D) {
+  return HWY_MAX_LANES_D(D);
+}
+
+#endif  // !HWY_HAVE_SCALABLE
+
+// Tag for the same number of lanes as D, but with the LaneType T.
+template <class T, class D>
+using Rebind = typename D::template Rebind<T>;
+
+template <class D>
+using RebindToSigned = Rebind<MakeSigned<TFromD<D>>, D>;
+template <class D>
+using RebindToUnsigned = Rebind<MakeUnsigned<TFromD<D>>, D>;
+template <class D>
+using RebindToFloat = Rebind<MakeFloat<TFromD<D>>, D>;
+
+// Tag for the same total size as D, but with the LaneType T.
+template <class T, class D>
+using Repartition = typename D::template Repartition<T>;
+
+template <class D>
+using RepartitionToWide = Repartition<MakeWide<TFromD<D>>, D>;
+template <class D>
+using RepartitionToNarrow = Repartition<MakeNarrow<TFromD<D>>, D>;
+
+// Tag for the same lane type as D, but half the lanes.
+template <class D>
+using Half = typename D::Half;
+
+// Tag for the same lane type as D, but twice the lanes.
+template <class D>
+using Twice = typename D::Twice;
+
+// Tag for a 16-byte block with the same lane type as D
+#if HWY_HAVE_SCALABLE
+namespace detail {
+
+template <class D>
+class BlockDFromD_t {};
+
+template <typename T, size_t N, int kPow2>
+class BlockDFromD_t<Simd<T, N, kPow2>> {
+  using D = Simd<T, N, kPow2>;
+  static constexpr int kNewPow2 = HWY_MIN(kPow2, 0);
+  static constexpr size_t kMaxLpb = HWY_MIN(16 / sizeof(T), HWY_MAX_LANES_D(D));
+  static constexpr size_t kNewN = D::template NewN<kNewPow2, kMaxLpb>();
+
+ public:
+  using type = Simd<T, kNewN, kNewPow2>;
+};
+
+}  // namespace detail
+
+template <class D>
+using BlockDFromD = typename detail::BlockDFromD_t<RemoveConst<D>>::type;
+#else
+template <class D>
+using BlockDFromD =
+    Simd<TFromD<D>, HWY_MIN(16 / sizeof(TFromD<D>), HWY_MAX_LANES_D(D)), 0>;
+#endif
+
+// ------------------------------ Choosing overloads (SFINAE)
+
+// Same as base.h macros but with a Simd<T, N, kPow2> argument instead of T.
+#define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(TFromD<D>)
+#define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(TFromD<D>)
+#define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(TFromD<D>)
+#define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(TFromD<D>)
+#define HWY_IF_FLOAT3264_D(D) HWY_IF_FLOAT3264(TFromD<D>)
+#define HWY_IF_NOT_FLOAT3264_D(D) HWY_IF_NOT_FLOAT3264(TFromD<D>)
+#define HWY_IF_SPECIAL_FLOAT_D(D) HWY_IF_SPECIAL_FLOAT(TFromD<D>)
+#define HWY_IF_NOT_SPECIAL_FLOAT_D(D) HWY_IF_NOT_SPECIAL_FLOAT(TFromD<D>)
+#define HWY_IF_FLOAT_OR_SPECIAL_D(D) HWY_IF_FLOAT_OR_SPECIAL(TFromD<D>)
+#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D) \
+  HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>)
+
+#define HWY_IF_T_SIZE_D(D, bytes) HWY_IF_T_SIZE(TFromD<D>, bytes)
+#define HWY_IF_NOT_T_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE(TFromD<D>, bytes)
+#define HWY_IF_T_SIZE_ONE_OF_D(D, bit_array) \
+  HWY_IF_T_SIZE_ONE_OF(TFromD<D>, bit_array)
+
+#define HWY_IF_LANES_D(D, lanes) HWY_IF_LANES(HWY_MAX_LANES_D(D), lanes)
+#define HWY_IF_LANES_LE_D(D, lanes) HWY_IF_LANES_LE(HWY_MAX_LANES_D(D), lanes)
+#define HWY_IF_LANES_GT_D(D, lanes) HWY_IF_LANES_GT(HWY_MAX_LANES_D(D), lanes)
+#define HWY_IF_LANES_PER_BLOCK_D(D, lanes) \
+  HWY_IF_LANES_PER_BLOCK(                  \
+      TFromD<D>, HWY_MIN(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>)), lanes)
+
+#define HWY_IF_POW2_LE_D(D, pow2) hwy::EnableIf<D().Pow2() <= pow2>* = nullptr
+#define HWY_IF_POW2_GT_D(D, pow2) hwy::EnableIf<(D().Pow2() > pow2)>* = nullptr
+
+#define HWY_IF_U8_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint8_t>()>* = nullptr
+#define HWY_IF_U16_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint16_t>()>* = nullptr
+#define HWY_IF_U32_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint32_t>()>* = nullptr
+#define HWY_IF_U64_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint64_t>()>* = nullptr
+
+#define HWY_IF_I8_D(D) hwy::EnableIf<IsSame<TFromD<D>, int8_t>()>* = nullptr
+#define HWY_IF_I16_D(D) hwy::EnableIf<IsSame<TFromD<D>, int16_t>()>* = nullptr
+#define HWY_IF_I32_D(D) hwy::EnableIf<IsSame<TFromD<D>, int32_t>()>* = nullptr
+#define HWY_IF_I64_D(D) hwy::EnableIf<IsSame<TFromD<D>, int64_t>()>* = nullptr
+
+// Use instead of HWY_IF_T_SIZE_D to avoid ambiguity with float16_t/float/double
+// overloads.
+#define HWY_IF_UI16_D(D) HWY_IF_UI16(TFromD<D>)
+#define HWY_IF_UI32_D(D) HWY_IF_UI32(TFromD<D>)
+#define HWY_IF_UI64_D(D) HWY_IF_UI64(TFromD<D>)
+
+#define HWY_IF_BF16_D(D) \
+  hwy::EnableIf<IsSame<TFromD<D>, hwy::bfloat16_t>()>* = nullptr
+#define HWY_IF_F16_D(D) \
+  hwy::EnableIf<IsSame<TFromD<D>, hwy::float16_t>()>* = nullptr
+#define HWY_IF_F32_D(D) hwy::EnableIf<IsSame<TFromD<D>, float>()>* = nullptr
+#define HWY_IF_F64_D(D) hwy::EnableIf<IsSame<TFromD<D>, double>()>* = nullptr
+
+#define HWY_IF_V_SIZE_D(D, bytes) \
+  HWY_IF_V_SIZE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
+#define HWY_IF_V_SIZE_LE_D(D, bytes) \
+  HWY_IF_V_SIZE_LE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
+#define HWY_IF_V_SIZE_GT_D(D, bytes) \
+  HWY_IF_V_SIZE_GT(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
+
+// Same, but with a vector argument. ops/*-inl.h define their own TFromV.
+#define HWY_IF_UNSIGNED_V(V) HWY_IF_UNSIGNED(TFromV<V>)
+#define HWY_IF_SIGNED_V(V) HWY_IF_SIGNED(TFromV<V>)
+#define HWY_IF_FLOAT_V(V) HWY_IF_FLOAT(TFromV<V>)
+#define HWY_IF_NOT_FLOAT_V(V) HWY_IF_NOT_FLOAT(TFromV<V>)
+#define HWY_IF_SPECIAL_FLOAT_V(V) HWY_IF_SPECIAL_FLOAT(TFromV<V>)
+#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V) \
+  HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromV<V>)
+
+#define HWY_IF_T_SIZE_V(V, bytes) HWY_IF_T_SIZE(TFromV<V>, bytes)
+#define HWY_IF_NOT_T_SIZE_V(V, bytes) HWY_IF_NOT_T_SIZE(TFromV<V>, bytes)
+#define HWY_IF_T_SIZE_ONE_OF_V(V, bit_array) \
+  HWY_IF_T_SIZE_ONE_OF(TFromV<V>, bit_array)
+
+#define HWY_MAX_LANES_V(V) HWY_MAX_LANES_D(DFromV<V>)
+#define HWY_IF_V_SIZE_V(V, bytes) \
+  HWY_IF_V_SIZE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
+#define HWY_IF_V_SIZE_LE_V(V, bytes) \
+  HWY_IF_V_SIZE_LE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
+#define HWY_IF_V_SIZE_GT_V(V, bytes) \
+  HWY_IF_V_SIZE_GT(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
+
+// Old names (deprecated)
+#define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_T_SIZE_D(D, bytes)
+#define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE_D(D, bytes)
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_OPS_SHARED_TOGGLE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/tuple-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/tuple-inl.h
new file mode 100644
index 0000000000..9def0610b0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/tuple-inl.h
@@ -0,0 +1,125 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tuple support. Included by those ops/* that lack native tuple types, after
+// they define VFromD and before they use the tuples e.g. for LoadInterleaved2.
+// Assumes we are already in the HWY_NAMESPACE and under an include guard.
+
+// If viewing this header standalone, define VFromD to avoid IDE warnings.
+// This is normally set by set_macros-inl.h before this header is included.
+#if !defined(HWY_NAMESPACE)
+#include "hwy/base.h"
+template <class D>
+using VFromD = int;
+#endif
+
+// On SVE, Vec2..4 are aliases to built-in types.
+template <class D>
+struct Vec2 {
+  VFromD<D> v0;
+  VFromD<D> v1;
+};
+
+template <class D>
+struct Vec3 {
+  VFromD<D> v0;
+  VFromD<D> v1;
+  VFromD<D> v2;
+};
+
+template <class D>
+struct Vec4 {
+  VFromD<D> v0;
+  VFromD<D> v1;
+  VFromD<D> v2;
+  VFromD<D> v3;
+};
+
+// D arg is unused but allows deducing D.
+template <class D>
+HWY_API Vec2<D> Create2(D /* tag */, VFromD<D> v0, VFromD<D> v1) {
+  return Vec2<D>{v0, v1};
+}
+
+template <class D>
+HWY_API Vec3<D> Create3(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2) {
+  return Vec3<D>{v0, v1, v2};
+}
+
+template <class D>
+HWY_API Vec4<D> Create4(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
+                        VFromD<D> v3) {
+  return Vec4<D>{v0, v1, v2, v3};
+}
+
+template <size_t kIndex, class D>
+HWY_API VFromD<D> Get2(Vec2<D> tuple) {
+  static_assert(kIndex < 2, "Tuple index out of bounds");
+  return kIndex == 0 ? tuple.v0 : tuple.v1;
+}
+
+template <size_t kIndex, class D>
+HWY_API VFromD<D> Get3(Vec3<D> tuple) {
+  static_assert(kIndex < 3, "Tuple index out of bounds");
+  return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : tuple.v2;
+}
+
+template <size_t kIndex, class D>
+HWY_API VFromD<D> Get4(Vec4<D> tuple) {
+  static_assert(kIndex < 4, "Tuple index out of bounds");
+  return kIndex == 0   ? tuple.v0
+         : kIndex == 1 ? tuple.v1
+         : kIndex == 2 ? tuple.v2
+                       : tuple.v3;
+}
+
+template <size_t kIndex, class D>
+HWY_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val) {
+  static_assert(kIndex < 2, "Tuple index out of bounds");
+  if (kIndex == 0) {
+    tuple.v0 = val;
+  } else {
+    tuple.v1 = val;
+  }
+  return tuple;
+}
+
+template <size_t kIndex, class D>
+HWY_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val) {
+  static_assert(kIndex < 3, "Tuple index out of bounds");
+  if (kIndex == 0) {
+    tuple.v0 = val;
+  } else if (kIndex == 1) {
+    tuple.v1 = val;
+  } else {
+    tuple.v2 = val;
+  }
+  return tuple;
+}
+
+template <size_t kIndex, class D>
+HWY_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val) {
+  static_assert(kIndex < 4, "Tuple index out of bounds");
+  if (kIndex == 0) {
+    tuple.v0 = val;
+  } else if (kIndex == 1) {
+    tuple.v1 = val;
+  } else if (kIndex == 2) {
+    tuple.v2 = val;
+  } else {
+    tuple.v3 = val;
+  }
+  return tuple;
+}
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_128-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_128-inl.h
new file mode 100644
index 0000000000..b3f1b66d76
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_128-inl.h
@@ -0,0 +1,5718 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit WASM vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#include <wasm_simd128.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+#ifdef HWY_WASM_OLD_NAMES
+#define wasm_i8x16_shuffle wasm_v8x16_shuffle
+#define wasm_i16x8_shuffle wasm_v16x8_shuffle
+#define wasm_i32x4_shuffle wasm_v32x4_shuffle
+#define wasm_i64x2_shuffle wasm_v64x2_shuffle
+#define wasm_u16x8_extend_low_u8x16 wasm_i16x8_widen_low_u8x16
+#define wasm_u32x4_extend_low_u16x8 wasm_i32x4_widen_low_u16x8
+#define wasm_i32x4_extend_low_i16x8 wasm_i32x4_widen_low_i16x8
+#define wasm_i16x8_extend_low_i8x16 wasm_i16x8_widen_low_i8x16
+#define wasm_u32x4_extend_high_u16x8 wasm_i32x4_widen_high_u16x8
+#define wasm_i32x4_extend_high_i16x8 wasm_i32x4_widen_high_i16x8
+#define wasm_i32x4_trunc_sat_f32x4 wasm_i32x4_trunc_saturate_f32x4
+#define wasm_i62x2_trunc_sat_f64x2 wasm_i64x2_trunc_saturate_f64x2
+#define wasm_u8x16_add_sat wasm_u8x16_add_saturate
+#define wasm_u8x16_sub_sat wasm_u8x16_sub_saturate
+#define wasm_u16x8_add_sat wasm_u16x8_add_saturate
+#define wasm_u16x8_sub_sat wasm_u16x8_sub_saturate
+#define wasm_i8x16_add_sat wasm_i8x16_add_saturate
+#define wasm_i8x16_sub_sat wasm_i8x16_sub_saturate
+#define wasm_i16x8_add_sat wasm_i16x8_add_saturate
+#define wasm_i16x8_sub_sat wasm_i16x8_sub_saturate
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+#if HWY_TARGET == HWY_WASM_EMU256
+template <typename T>
+using Full256 = Simd<T, 32 / sizeof(T), 0>;
+#endif
+
+namespace detail {
+
+template <typename T>
+struct Raw128 {
+  using type = __v128_u;
+};
+template <>
+struct Raw128<float> {
+  using type = __f32x4;
+};
+template <>
+struct Raw128<double> {
+  using type = __f64x2;
+};
+
+}  // namespace detail
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename detail::Raw128<T>::type;
+
+ public:
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = N;  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T>
+using Vec64 = Vec128<T, 8 / sizeof(T)>;
+
+template <typename T>
+using Vec32 = Vec128<T, 4 / sizeof(T)>;
+
+template <typename T>
+using Vec16 = Vec128<T, 2 / sizeof(T)>;
+
+// FF..FF or 0.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Mask128 {
+  using PrivateT = T;                     // only for DFromM
+  static constexpr size_t kPrivateN = N;  // only for DFromM
+
+  typename detail::Raw128<T>::type raw;
+};
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class M>
+using DFromM = Simd<typename M::PrivateT, M::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ Zero
+
+// Use HWY_MAX_LANES_D here because VFromD is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<TFromD<D>, HWY_MAX_LANES_D(D)>{wasm_i32x4_splat(0)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<TFromD<D>, HWY_MAX_LANES_D(D)>{wasm_f32x4_splat(0.0f)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<TFromD<D>, HWY_MAX_LANES_D(D)>{wasm_f64x2_splat(0.0)};
+}
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_INLINE __v128_u BitCastToInteger(__v128_u v) { return v; }
+HWY_INLINE __v128_u BitCastToInteger(__f32x4 v) {
+  return static_cast<__v128_u>(v);
+}
+HWY_INLINE __v128_u BitCastToInteger(__f64x2 v) {
+  return static_cast<__v128_u>(v);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<uint8_t, N * sizeof(T)> BitCastToByte(Vec128<T, N> v) {
+  return Vec128<uint8_t, N * sizeof(T)>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger128 {
+  HWY_INLINE __v128_u operator()(__v128_u v) { return v; }
+};
+template <>
+struct BitCastFromInteger128<float> {
+  HWY_INLINE __f32x4 operator()(__v128_u v) { return static_cast<__f32x4>(v); }
+};
+template <>
+struct BitCastFromInteger128<double> {
+  HWY_INLINE __f64x2 operator()(__v128_u v) { return static_cast<__f64x2>(v); }
+};
+
+template <class D>
+HWY_INLINE VFromD<D> BitCastFromByte(D d, Vec128<uint8_t, d.MaxBytes()> v) {
+  return VFromD<D>{BitCastFromInteger128<TFromD<D>>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <class D, typename FromT>
+HWY_API VFromD<D> BitCast(D d,
+                          Vec128<FromT, Repartition<FromT, D>().MaxLanes()> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ ResizeBitCast
+
+template <class D, typename FromV, HWY_IF_V_SIZE_LE_V(FromV, 16),
+          HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const Repartition<uint8_t, decltype(d)> du8_to;
+  return BitCast(d, VFromD<decltype(du8_to)>{detail::BitCastToInteger(v.raw)});
+}
+
+// ------------------------------ Set
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{wasm_i8x16_splat(static_cast<int8_t>(t))};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{wasm_i16x8_splat(static_cast<int16_t>(t))};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{wasm_i32x4_splat(static_cast<int32_t>(t))};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{wasm_i64x2_splat(static_cast<int64_t>(t))};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, const float t) {
+  return VFromD<D>{wasm_f32x4_splat(t)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, const double t) {
+  return VFromD<D>{wasm_f64x2_splat(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// For all vector sizes.
+template <class D>
+HWY_API VFromD<D> Undefined(D d) {
+  return Zero(d);
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// For all vector sizes.
+template <class D, typename T = TFromD<D>, typename T2>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  HWY_ALIGN T lanes[MaxLanes(d)];
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    lanes[i] =
+        AddWithWraparound(hwy::IsFloatTag<T>(), static_cast<T>(first), i);
+  }
+  return Load(d, lanes);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator+(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator+(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator+(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator+(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{wasm_i64x2_add(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator+(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator+(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator+(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator+(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{wasm_i64x2_add(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator+(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator+(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_f64x2_add(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator-(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator-(Vec128<uint16_t, N> a,
+                                      Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator-(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator-(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{wasm_i64x2_sub(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator-(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator-(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator-(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator-(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{wasm_i64x2_sub(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator-(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator-(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_f64x2_sub(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedAdd(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_add_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedAdd(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_add_sat(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedAdd(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_add_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedAdd(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_add_sat(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedSub(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_sub_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedSub(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_sub_sat(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedSub(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_sub_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedSub(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_sub_sat(a.raw, b.raw)};
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> AverageRound(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_avgr(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> AverageRound(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_avgr(a.raw, b.raw)};
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+HWY_API Vec128<int8_t, N> Abs(const Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>{wasm_i8x16_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Abs(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Abs(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Abs(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{wasm_i64x2_abs(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Abs(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Abs(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_abs(v.raw)};
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+// Unsigned
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeft(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_i16x8_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRight(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_u16x8_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeft(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeft(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{wasm_i64x2_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRight(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{wasm_u32x4_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRight(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{wasm_u64x2_shr(v.raw, kBits)};
+}
+
+// Signed
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeft(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftRight(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeft(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeft(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{wasm_i64x2_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftRight(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftRight(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{wasm_i64x2_shr(v.raw, kBits)};
+}
+
+// 8-bit
+template <int kBits, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeft(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{ShiftLeft<kBits>(Vec128<MakeWide<T>>{v.raw}).raw};
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRight(const Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRight<kBits>(Vec128<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int8_t, N> ShiftRight(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ RotateRight (ShiftRight, Or)
+template <int kBits, typename T, size_t N>
+HWY_API Vec128<T, N> RotateRight(const Vec128<T, N> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+// After https://reviews.llvm.org/D108415 shift argument became unsigned.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeftSame(const Vec128<uint16_t, N> v,
+                                          const int bits) {
+  return Vec128<uint16_t, N>{wasm_i16x8_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRightSame(const Vec128<uint16_t, N> v,
+                                           const int bits) {
+  return Vec128<uint16_t, N>{wasm_u16x8_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeftSame(const Vec128<uint32_t, N> v,
+                                          const int bits) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRightSame(const Vec128<uint32_t, N> v,
+                                           const int bits) {
+  return Vec128<uint32_t, N>{wasm_u32x4_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeftSame(const Vec128<uint64_t, N> v,
+                                          const int bits) {
+  return Vec128<uint64_t, N>{wasm_i64x2_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRightSame(const Vec128<uint64_t, N> v,
+                                           const int bits) {
+  return Vec128<uint64_t, N>{wasm_u64x2_shr(v.raw, bits)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeftSame(const Vec128<int16_t, N> v,
+                                         const int bits) {
+  return Vec128<int16_t, N>{wasm_i16x8_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftRightSame(const Vec128<int16_t, N> v,
+                                          const int bits) {
+  return Vec128<int16_t, N>{wasm_i16x8_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeftSame(const Vec128<int32_t, N> v,
+                                         const int bits) {
+  return Vec128<int32_t, N>{wasm_i32x4_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftRightSame(const Vec128<int32_t, N> v,
+                                          const int bits) {
+  return Vec128<int32_t, N>{wasm_i32x4_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeftSame(const Vec128<int64_t, N> v,
+                                         const int bits) {
+  return Vec128<int64_t, N>{wasm_i64x2_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftRightSame(const Vec128<int64_t, N> v,
+                                          const int bits) {
+  return Vec128<int64_t, N>{wasm_i64x2_shr(v.raw, bits)};
+}
+
+// 8-bit
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, const int bits) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{
+      ShiftLeftSame(Vec128<MakeWide<T>>{v.raw}, bits).raw};
+  return shifted & Set(d8, static_cast<T>((0xFF << bits) & 0xFF));
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRightSame(Vec128<uint8_t, N> v,
+                                          const int bits) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRightSame(Vec128<uint16_t>{v.raw}, bits).raw};
+  return shifted & Set(d8, 0xFF >> bits);
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> ShiftRightSame(Vec128<int8_t, N> v, const int bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> bits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ignore Wsign-conversion
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ Minimum
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Min(Vec128<uint8_t, N> a, Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Min(Vec128<uint16_t, N> a, Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Min(Vec128<uint32_t, N> a, Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_u32x4_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Min(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+  // Avoid wasm_u64x2_extract_lane - not all implementations have it yet.
+  const uint64_t a0 = static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 0));
+  const uint64_t b0 = static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 0));
+  const uint64_t a1 = static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 1));
+  const uint64_t b1 = static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 1));
+  alignas(16) uint64_t min[2] = {HWY_MIN(a0, b0), HWY_MIN(a1, b1)};
+  return Vec128<uint64_t, N>{wasm_v128_load(min)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Min(Vec128<int8_t, N> a, Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Min(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Min(Vec128<int32_t, N> a, Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Min(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+  alignas(16) int64_t min[4];
+  min[0] = HWY_MIN(wasm_i64x2_extract_lane(a.raw, 0),
+                   wasm_i64x2_extract_lane(b.raw, 0));
+  min[1] = HWY_MIN(wasm_i64x2_extract_lane(a.raw, 1),
+                   wasm_i64x2_extract_lane(b.raw, 1));
+  return Vec128<int64_t, N>{wasm_v128_load(min)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Min(Vec128<float, N> a, Vec128<float, N> b) {
+  // Equivalent to a < b ? a : b (taking into account our swapped arg order,
+  // so that Min(NaN, x) is x to match x86).
+  return Vec128<float, N>{wasm_f32x4_pmin(b.raw, a.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Min(Vec128<double, N> a, Vec128<double, N> b) {
+  // Equivalent to a < b ? a : b (taking into account our swapped arg order,
+  // so that Min(NaN, x) is x to match x86).
+  return Vec128<double, N>{wasm_f64x2_pmin(b.raw, a.raw)};
+}
+
+// ------------------------------ Maximum
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Max(Vec128<uint8_t, N> a, Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Max(Vec128<uint16_t, N> a, Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Max(Vec128<uint32_t, N> a, Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_u32x4_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Max(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+  // Avoid wasm_u64x2_extract_lane - not all implementations have it yet.
+  const uint64_t a0 = static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 0));
+  const uint64_t b0 = static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 0));
+  const uint64_t a1 = static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 1));
+  const uint64_t b1 = static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 1));
+  alignas(16) uint64_t max[2] = {HWY_MAX(a0, b0), HWY_MAX(a1, b1)};
+  return Vec128<uint64_t, N>{wasm_v128_load(max)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Max(Vec128<int8_t, N> a, Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Max(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Max(Vec128<int32_t, N> a, Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Max(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+  alignas(16) int64_t max[2];
+  max[0] = HWY_MAX(wasm_i64x2_extract_lane(a.raw, 0),
+                   wasm_i64x2_extract_lane(b.raw, 0));
+  max[1] = HWY_MAX(wasm_i64x2_extract_lane(a.raw, 1),
+                   wasm_i64x2_extract_lane(b.raw, 1));
+  return Vec128<int64_t, N>{wasm_v128_load(max)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Max(Vec128<float, N> a, Vec128<float, N> b) {
+  // Equivalent to b < a ? a : b (taking into account our swapped arg order,
+  // so that Max(NaN, x) is x to match x86).
+  return Vec128<float, N>{wasm_f32x4_pmax(b.raw, a.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Max(Vec128<double, N> a, Vec128<double, N> b) {
+  // Equivalent to b < a ? a : b (taking into account our swapped arg order,
+  // so that Max(NaN, x) is x to match x86).
+  return Vec128<double, N>{wasm_f64x2_pmax(b.raw, a.raw)};
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator*(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_mul(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator*(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_mul(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator*(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_mul(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator*(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_mul(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> MulHigh(const Vec128<uint16_t, N> a,
+                                    const Vec128<uint16_t, N> b) {
+  const auto l = wasm_u32x4_extmul_low_u16x8(a.raw, b.raw);
+  const auto h = wasm_u32x4_extmul_high_u16x8(a.raw, b.raw);
+  // TODO(eustas): shift-right + narrow?
+  return Vec128<uint16_t, N>{
+      wasm_i16x8_shuffle(l, h, 1, 3, 5, 7, 9, 11, 13, 15)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulHigh(const Vec128<int16_t, N> a,
+                                   const Vec128<int16_t, N> b) {
+  const auto l = wasm_i32x4_extmul_low_i16x8(a.raw, b.raw);
+  const auto h = wasm_i32x4_extmul_high_i16x8(a.raw, b.raw);
+  // TODO(eustas): shift-right + narrow?
+  return Vec128<int16_t, N>{
+      wasm_i16x8_shuffle(l, h, 1, 3, 5, 7, 9, 11, 13, 15)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_q15mulr_sat(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and returns the double-width result.
+template <class T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2)),
+          HWY_IF_SIGNED(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulEven(const Vec128<T, N> a,
+                                                 const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  constexpr int kSrcBits = sizeof(T) * 8;
+
+  const auto ae =
+      ShiftRight<kSrcBits>(ShiftLeft<kSrcBits>(ResizeBitCast(dw, a)));
+  const auto be =
+      ShiftRight<kSrcBits>(ShiftLeft<kSrcBits>(ResizeBitCast(dw, b)));
+  return ae * be;
+}
+template <class T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2)),
+          HWY_IF_UNSIGNED(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulEven(const Vec128<T, N> a,
+                                                 const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const auto kEvenMask = Set(dw, LimitsMax<T>());
+
+  const auto ae = And(ResizeBitCast(dw, a), kEvenMask);
+  const auto be = And(ResizeBitCast(dw, b), kEvenMask);
+  return ae * be;
+}
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulEven(const Vec128<int32_t, N> a,
+                                             const Vec128<int32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const auto ae = ShiftRight<32>(ShiftLeft<32>(ResizeBitCast(dw, a))).raw;
+  const auto be = ShiftRight<32>(ShiftLeft<32>(ResizeBitCast(dw, b))).raw;
+  return Vec128<int64_t, (N + 1) / 2>{wasm_i64x2_mul(ae, be)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulEven(const Vec128<uint32_t, N> a,
+                                              const Vec128<uint32_t, N> b) {
+  const auto kEvenMask = wasm_i32x4_make(-1, 0, -1, 0);
+  const auto ae = wasm_v128_and(a.raw, kEvenMask);
+  const auto be = wasm_v128_and(b.raw, kEvenMask);
+  return Vec128<uint64_t, (N + 1) / 2>{wasm_i64x2_mul(ae, be)};
+}
+
+// Multiplies odd lanes (1, 3 ..) and returns the double-width result.
+template <class T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulOdd(const Vec128<T, N> a,
+                                                const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  constexpr int kSrcBits = sizeof(T) * 8;
+
+  const auto ao = ShiftRight<kSrcBits>(BitCast(dw, a));
+  const auto bo = ShiftRight<kSrcBits>(BitCast(dw, b));
+  return ao * bo;
+}
+template <class T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<MakeWide<T>, (N + 1) / 2> MulOdd(const Vec128<T, N> a,
+                                                const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+
+  const auto ao = ShiftRight<32>(BitCast(dw, a));
+  const auto bo = ShiftRight<32>(BitCast(dw, b));
+  return Vec128<MakeWide<T>, (N + 1) / 2>{wasm_i64x2_mul(ao.raw, bo.raw)};
+}
+
+// ------------------------------ Negate
+
+template <typename T, size_t N, HWY_IF_FLOAT_OR_SPECIAL(T)>
+HWY_API Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return Xor(v, SignBit(DFromV<decltype(v)>()));
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> Neg(const Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>{wasm_i8x16_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Neg(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Neg(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Neg(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{wasm_i64x2_neg(v.raw)};
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+HWY_API Vec128<float, N> operator*(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_mul(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator*(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_f64x2_mul(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> operator/(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_div(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator/(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_f64x2_div(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ApproximateReciprocal(const Vec128<T, N> v) {
+  return Set(DFromV<decltype(v)>(), T{1.0}) / v;
+}
+
+// Integer overload defined in generic_ops-inl.h.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> AbsDiff(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> add) {
+  return mul * x + add;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> NegMulAdd(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> add) {
+  return add - mul * x;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                            Vec128<T, N> sub) {
+  return mul * x - sub;
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> NegMulSub(Vec128<T, N> mul, Vec128<T, N> x,
+                               Vec128<T, N> sub) {
+  return Neg(mul) * x - sub;
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+HWY_API Vec128<float, N> Sqrt(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_sqrt(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Sqrt(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_sqrt(v.raw)};
+}
+
+// Approximate reciprocal square root
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ApproximateReciprocalSqrt(const Vec128<T, N> v) {
+  // TODO(eustas): find cheaper a way to calculate this.
+  return Set(DFromV<decltype(v)>(), T{1.0}) / Sqrt(v);
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, ties to even
+template <size_t N>
+HWY_API Vec128<float, N> Round(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_nearest(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Round(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_nearest(v.raw)};
+}
+
+// Toward zero, aka truncate
+template <size_t N>
+HWY_API Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_trunc(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Trunc(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_trunc(v.raw)};
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+HWY_API Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_ceil(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Ceil(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_ceil(v.raw)};
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+HWY_API Vec128<float, N> Floor(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_floor(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Floor(const Vec128<double, N> v) {
+  return Vec128<double, N>{wasm_f64x2_floor(v.raw)};
+}
+
+// ------------------------------ Floating-point classification
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
+  return v != v;
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Mask128<T, N> IsInf(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Mask128<T, N> IsFinite(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  return Mask128<T, N>{v.raw};
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <typename TFrom, size_t NFrom, class DTo>
+HWY_API MFromD<DTo> RebindMask(DTo /* tag */, Mask128<TFrom, NFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>{m.raw};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator==(const Vec128<uint8_t, N> a,
+                                       const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{wasm_i8x16_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator==(const Vec128<uint16_t, N> a,
+                                        const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{wasm_i16x8_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator==(const Vec128<uint32_t, N> a,
+                                        const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{wasm_i32x4_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator==(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  return Mask128<uint64_t, N>{wasm_i64x2_eq(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator==(const Vec128<int8_t, N> a,
+                                      const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator==(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator==(const Vec128<int32_t, N> a,
+                                       const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator==(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{wasm_i64x2_eq(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Mask128<float, N> operator==(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator==(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{wasm_f64x2_eq(a.raw, b.raw)};
+}
+
+// ------------------------------ Inequality
+
+// Unsigned
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator!=(const Vec128<uint8_t, N> a,
+                                       const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{wasm_i8x16_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator!=(const Vec128<uint16_t, N> a,
+                                        const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{wasm_i16x8_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator!=(const Vec128<uint32_t, N> a,
+                                        const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{wasm_i32x4_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator!=(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  return Mask128<uint64_t, N>{wasm_i64x2_ne(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator!=(const Vec128<int8_t, N> a,
+                                      const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator!=(const Vec128<int16_t, N> a,
+                                       const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator!=(const Vec128<int32_t, N> a,
+                                       const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator!=(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{wasm_i64x2_ne(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Mask128<float, N> operator!=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_ne(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator!=(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{wasm_f64x2_ne(a.raw, b.raw)};
+}
+
+// ------------------------------ Strict inequality
+
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>(const Vec128<int8_t, N> a,
+                                     const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>(const Vec128<int16_t, N> a,
+                                      const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>(const Vec128<int32_t, N> a,
+                                      const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>(const Vec128<int64_t, N> a,
+                                      const Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{wasm_i64x2_gt(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator>(const Vec128<uint8_t, N> a,
+                                      const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{wasm_u8x16_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator>(const Vec128<uint16_t, N> a,
+                                       const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{wasm_u16x8_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator>(const Vec128<uint32_t, N> a,
+                                       const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{wasm_u32x4_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator>(const Vec128<uint64_t, N> a,
+                                       const Vec128<uint64_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint32_t, decltype(d)> d32;
+  const auto a32 = BitCast(d32, a);
+  const auto b32 = BitCast(d32, b);
+  // If the upper halves are not equal, this is the answer.
+  const auto m_gt = a32 > b32;
+
+  // Otherwise, the lower half decides.
+  const auto m_eq = a32 == b32;
+  const auto lo_in_hi = wasm_i32x4_shuffle(m_gt.raw, m_gt.raw, 0, 0, 2, 2);
+  const auto lo_gt = And(m_eq, MaskFromVec(VFromD<decltype(d32)>{lo_in_hi}));
+
+  const auto gt = Or(lo_gt, m_gt);
+  // Copy result in upper 32 bits to lower 32 bits.
+  return Mask128<uint64_t, N>{wasm_i32x4_shuffle(gt.raw, gt.raw, 1, 1, 3, 3)};
+}
+
+template <size_t N>
+HWY_API Mask128<float, N> operator>(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>(const Vec128<double, N> a,
+                                     const Vec128<double, N> b) {
+  return Mask128<double, N>{wasm_f64x2_gt(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return operator>(b, a);
+}
+
+// ------------------------------ Weak inequality
+
+// Float >=
+template <size_t N>
+HWY_API Mask128<float, N> operator>=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>=(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{wasm_f64x2_ge(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>=(const Vec128<int8_t, N> a,
+                                      const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>=(const Vec128<int16_t, N> a,
+                                       const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>=(const Vec128<int32_t, N> a,
+                                       const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>=(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{wasm_i64x2_ge(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator>=(const Vec128<uint8_t, N> a,
+                                       const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{wasm_u8x16_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator>=(const Vec128<uint16_t, N> a,
+                                        const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{wasm_u16x8_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator>=(const Vec128<uint32_t, N> a,
+                                        const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{wasm_u32x4_ge(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator>=(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  return Not(b > a);
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<=(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return operator>=(b, a);
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API MFromD<D> FirstN(D d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons may be cheaper.
+  using TI = TFromD<decltype(di)>;
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<TI>(num)));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_v128_not(v.raw)};
+}
+
+// ------------------------------ And
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_and(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> not_mask, Vec128<T, N> mask) {
+  return Vec128<T, N>{wasm_v128_andnot(mask.raw, not_mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_or(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_xor(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor3
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+// ------------------------------ Or3
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or3(Vec128<T, N> o1, Vec128<T, N> o2, Vec128<T, N> o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+// ------------------------------ OrAnd
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OrAnd(Vec128<T, N> o, Vec128<T, N> a1, Vec128<T, N> a2) {
+  return Or(o, And(a1, a2));
+}
+
+// ------------------------------ IfVecThenElse
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfVecThenElse(Vec128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return IfThenElse(MaskFromVec(mask), yes, no);
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(const Vec128<T, N> magn,
+                              const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(const Vec128<T, N> abs,
+                                   const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(abs)> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ------------------------------ BroadcastSignBit (compare)
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> BroadcastSignBit(const Vec128<T, N> v) {
+  return ShiftRight<sizeof(T) * 8 - 1>(v);
+}
+template <size_t N>
+HWY_API Vec128<int8_t, N> BroadcastSignBit(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  return VecFromMask(d, v < Zero(d));
+}
+
+// ------------------------------ Mask
+
+template <class D>
+HWY_API VFromD<D> VecFromMask(D /* tag */, MFromD<D> v) {
+  return VFromD<D>{v.raw};
+}
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return Vec128<T, N>{wasm_v128_bitselect(yes.raw, no.raw, mask.raw)};
+}
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return yes & VecFromMask(DFromV<decltype(yes)>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return AndNot(VecFromMask(DFromV<decltype(no)>(), mask), no);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+
+  v = BitCast(d, BroadcastSignBit(BitCast(di, v)));
+  return IfThenElse(MaskFromVec(v), yes, no);
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const auto zero = Zero(d);
+  return IfThenElse(Mask128<T, N>{(v > zero).raw}, v, zero);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  const DFromM<decltype(m)> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(const Mask128<T, N> a, Mask128<T, N> b) {
+  const DFromM<decltype(a)> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+// ------------------------------ Shl (BroadcastSignBit, IfThenElse)
+
+// The x86 multiply-by-Pow2() trick will not work because WASM saturates
+// float->int correctly to 2^31-1 (not 2^31). Because WASM's shifts take a
+// scalar count operand, per-lane shift instructions would require extract_lane
+// for each lane, and hoping that shuffle is correctly mapped to a native
+// instruction. Using non-vector shifts would incur a store-load forwarding
+// stall when loading the result vector. We instead test bits of the shift
+// count to "predicate" a shift of the entire vector by a constant.
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<5>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftLeft<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<12>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftLeft<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<27>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<16>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftLeft<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = MakeUnsigned<T>;
+  alignas(16) TU lanes[2] = {};
+  alignas(16) TU bits_lanes[2] = {};
+  Store(BitCast(du, v), du, lanes);
+  Store(BitCast(du, bits), du, bits_lanes);
+  lanes[0] <<= (bits_lanes[0] & 63);
+  lanes[1] <<= (bits_lanes[1] & 63);
+  return BitCast(d, Load(du, lanes));
+}
+
+// ------------------------------ Shr (BroadcastSignBit, IfThenElse)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<5>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftRight<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<12>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftRight<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<27>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<16>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftRight<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[2] = {};
+  alignas(16) T bits_lanes[2] = {};
+  Store(v, d, lanes);
+  Store(bits, d, bits_lanes);
+  lanes[0] >>= (bits_lanes[0] & 63);
+  lanes[1] >>= (bits_lanes[1] & 63);
+  return Load(d, lanes);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), typename T = TFromD<D>>
+HWY_API Vec128<T> Load(D /* tag */, const T* HWY_RESTRICT aligned) {
+  return Vec128<T>{wasm_v128_load(aligned)};
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT p) {
+  VFromD<D> v;
+  CopyBytes<d.MaxBytes()>(p, &v);
+  return v;
+}
+
+// LoadU == Load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d, const T* HWY_RESTRICT aligned) {
+  return IfThenElseZero(m, Load(d, aligned));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const T* HWY_RESTRICT aligned) {
+  return IfThenElse(m, Load(d, aligned), v);
+}
+
+// ------------------------------ Store
+
+namespace detail {
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  return static_cast<T>(wasm_i8x16_extract_lane(v.raw, kLane));
+}
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  return static_cast<T>(wasm_i16x8_extract_lane(v.raw, kLane));
+}
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  return static_cast<T>(wasm_i32x4_extract_lane(v.raw, kLane));
+}
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  return static_cast<T>(wasm_i64x2_extract_lane(v.raw, kLane));
+}
+
+template <size_t kLane, size_t N>
+HWY_INLINE float ExtractLane(const Vec128<float, N> v) {
+  return wasm_f32x4_extract_lane(v.raw, kLane);
+}
+template <size_t kLane, size_t N>
+HWY_INLINE double ExtractLane(const Vec128<double, N> v) {
+  return wasm_f64x2_extract_lane(v.raw, kLane);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API void Store(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT aligned) {
+  wasm_v128_store(aligned, v.raw);
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API void Store(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  CopyBytes<d.MaxBytes()>(&v, p);
+}
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API void Store(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT p) {
+  *p = detail::ExtractLane<0>(v);
+}
+
+// StoreU == Store.
+template <class D>
+HWY_API void StoreU(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+template <class D>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  StoreU(IfThenElse(m, v, LoadU(d, p)), d, p);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <class D>
+HWY_API void Stream(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT aligned) {
+  wasm_v128_store(aligned, v.raw);
+}
+
+// ------------------------------ Scatter in generic_ops-inl.h
+// ------------------------------ Gather in generic_ops-inl.h
+
+// ================================================== SWIZZLE
+
+// ------------------------------ ExtractLane
+
+// One overload per vector length just in case *_extract_lane raise compile
+// errors if their argument is out of bounds (even if that would never be
+// reached at runtime).
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 1> v, size_t i) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return detail::ExtractLane<0>(v);
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 2> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[2];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 4> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[4];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 8> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+      case 4:
+        return detail::ExtractLane<4>(v);
+      case 5:
+        return detail::ExtractLane<5>(v);
+      case 6:
+        return detail::ExtractLane<6>(v);
+      case 7:
+        return detail::ExtractLane<7>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[8];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 16> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+      case 4:
+        return detail::ExtractLane<4>(v);
+      case 5:
+        return detail::ExtractLane<5>(v);
+      case 6:
+        return detail::ExtractLane<6>(v);
+      case 7:
+        return detail::ExtractLane<7>(v);
+      case 8:
+        return detail::ExtractLane<8>(v);
+      case 9:
+        return detail::ExtractLane<9>(v);
+      case 10:
+        return detail::ExtractLane<10>(v);
+      case 11:
+        return detail::ExtractLane<11>(v);
+      case 12:
+        return detail::ExtractLane<12>(v);
+      case 13:
+        return detail::ExtractLane<13>(v);
+      case 14:
+        return detail::ExtractLane<14>(v);
+      case 15:
+        return detail::ExtractLane<15>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[16];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+// ------------------------------ GetLane
+template <typename T, size_t N>
+HWY_API T GetLane(const Vec128<T, N> v) {
+  return detail::ExtractLane<0>(v);
+}
+
+// ------------------------------ InsertLane
+
+namespace detail {
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  return Vec128<T, N>{
+      wasm_i8x16_replace_lane(v.raw, kLane, static_cast<int8_t>(t))};
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  return Vec128<T, N>{
+      wasm_i16x8_replace_lane(v.raw, kLane, static_cast<int16_t>(t))};
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  return Vec128<T, N>{
+      wasm_i32x4_replace_lane(v.raw, kLane, static_cast<int32_t>(t))};
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  return Vec128<T, N>{
+      wasm_i64x2_replace_lane(v.raw, kLane, static_cast<int64_t>(t))};
+}
+
+template <size_t kLane, size_t N>
+HWY_INLINE Vec128<float, N> InsertLane(const Vec128<float, N> v, float t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  return Vec128<float, N>{wasm_f32x4_replace_lane(v.raw, kLane, t)};
+}
+
+template <size_t kLane, size_t N>
+HWY_INLINE Vec128<double, N> InsertLane(const Vec128<double, N> v, double t) {
+  static_assert(kLane < 2, "Lane index out of bounds");
+  return Vec128<double, N>{wasm_f64x2_replace_lane(v.raw, kLane, t)};
+}
+
+}  // namespace detail
+
+// Requires one overload per vector length because InsertLane<3> may be a
+// compile error if it calls wasm_f64x2_replace_lane.
+
+template <typename T>
+HWY_API Vec128<T, 1> InsertLane(const Vec128<T, 1> v, size_t i, T t) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return Set(DFromV<decltype(v)>(), t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 2> InsertLane(const Vec128<T, 2> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[2];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 4> InsertLane(const Vec128<T, 4> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[4];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 8> InsertLane(const Vec128<T, 8> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[8];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+template <typename T>
+HWY_API Vec128<T, 16> InsertLane(const Vec128<T, 16> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+      case 8:
+        return detail::InsertLane<8>(v, t);
+      case 9:
+        return detail::InsertLane<9>(v, t);
+      case 10:
+        return detail::InsertLane<10>(v, t);
+      case 11:
+        return detail::InsertLane<11>(v, t);
+      case 12:
+        return detail::InsertLane<12>(v, t);
+      case 13:
+        return detail::InsertLane<13>(v, t);
+      case 14:
+        return detail::InsertLane<14>(v, t);
+      case 15:
+        return detail::InsertLane<15>(v, t);
+    }
+  }
+#endif
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[16];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+// ------------------------------ LowerHalf
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{v.raw};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>{v.raw};
+}
+
+// ------------------------------ ShiftLeftBytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftLeftBytes(D /* tag */, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const __i8x16 zero = wasm_i8x16_splat(0);
+  switch (kBytes) {
+    case 0:
+      return v;
+
+    case 1:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 0, 1, 2, 3, 4, 5, 6,
+                                          7, 8, 9, 10, 11, 12, 13, 14)};
+
+    case 2:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 0, 1, 2, 3, 4, 5,
+                                          6, 7, 8, 9, 10, 11, 12, 13)};
+
+    case 3:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 0, 1, 2, 3,
+                                          4, 5, 6, 7, 8, 9, 10, 11, 12)};
+
+    case 4:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 0, 1, 2,
+                                          3, 4, 5, 6, 7, 8, 9, 10, 11)};
+
+    case 5:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 0, 1,
+                                          2, 3, 4, 5, 6, 7, 8, 9, 10)};
+
+    case 6:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          0, 1, 2, 3, 4, 5, 6, 7, 8, 9)};
+
+    case 7:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 0, 1, 2, 3, 4, 5, 6, 7, 8)};
+
+    case 8:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 0, 1, 2, 3, 4, 5, 6, 7)};
+
+    case 9:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 0, 1, 2, 3, 4, 5, 6)};
+
+    case 10:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 0, 1, 2, 3, 4, 5)};
+
+    case 11:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 0, 1, 2, 3, 4)};
+
+    case 12:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 0, 1, 2, 3)};
+
+    case 13:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 0, 1, 2)};
+
+    case 14:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 0,
+                                          1)};
+
+    case 15:
+      return VFromD<D>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          0)};
+  }
+  return VFromD<D>{zero};
+}
+
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftBytes(Vec128<T, N> v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftLeftLanes(D d, const VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  return BitCast(d, ShiftLeftBytes<kBytes>(BitCast(d8, v)));
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(const Vec128<T, N> v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+namespace detail {
+
+// Helper function allows zeroing invalid lanes in caller.
+template <int kBytes, typename T, size_t N>
+HWY_API __i8x16 ShrBytes(const Vec128<T, N> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const __i8x16 zero = wasm_i8x16_splat(0);
+
+  switch (kBytes) {
+    case 0:
+      return v.raw;
+
+    case 1:
+      return wasm_i8x16_shuffle(v.raw, zero, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                                12, 13, 14, 15, 16);
+
+    case 2:
+      return wasm_i8x16_shuffle(v.raw, zero, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                13, 14, 15, 16, 16);
+
+    case 3:
+      return wasm_i8x16_shuffle(v.raw, zero, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                13, 14, 15, 16, 16, 16);
+
+    case 4:
+      return wasm_i8x16_shuffle(v.raw, zero, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+                                14, 15, 16, 16, 16, 16);
+
+    case 5:
+      return wasm_i8x16_shuffle(v.raw, zero, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                                15, 16, 16, 16, 16, 16);
+
+    case 6:
+      return wasm_i8x16_shuffle(v.raw, zero, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                                16, 16, 16, 16, 16, 16);
+
+    case 7:
+      return wasm_i8x16_shuffle(v.raw, zero, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 8:
+      return wasm_i8x16_shuffle(v.raw, zero, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 9:
+      return wasm_i8x16_shuffle(v.raw, zero, 9, 10, 11, 12, 13, 14, 15, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 10:
+      return wasm_i8x16_shuffle(v.raw, zero, 10, 11, 12, 13, 14, 15, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 11:
+      return wasm_i8x16_shuffle(v.raw, zero, 11, 12, 13, 14, 15, 16, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 12:
+      return wasm_i8x16_shuffle(v.raw, zero, 12, 13, 14, 15, 16, 16, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 13:
+      return wasm_i8x16_shuffle(v.raw, zero, 13, 14, 15, 16, 16, 16, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 14:
+      return wasm_i8x16_shuffle(v.raw, zero, 14, 15, 16, 16, 16, 16, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+
+    case 15:
+      return wasm_i8x16_shuffle(v.raw, zero, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+                                16, 16, 16, 16, 16, 16, 16);
+    case 16:
+      return zero;
+  }
+}
+
+}  // namespace detail
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, class D>
+HWY_API VFromD<D> ShiftRightBytes(D d, VFromD<D> v) {
+  // For partial vectors, clear upper lanes so we shift in zeros.
+  if (d.MaxBytes() != 16) {
+    const Full128<TFromD<D>> dfull;
+    const VFromD<decltype(dfull)> vfull{v.raw};
+    v = VFromD<D>{IfThenElseZero(FirstN(dfull, MaxLanes(d)), vfull).raw};
+  }
+  return VFromD<D>{detail::ShrBytes<kBytes>(v)};
+}
+
+// ------------------------------ ShiftRightLanes
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftRightLanes(D d, const VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  return BitCast(d, ShiftRightBytes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ UpperHalf (ShiftRightBytes)
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec64<T> UpperHalf(D /* tag */, const Vec128<T> v) {
+  return Vec64<T>{wasm_i32x4_shuffle(v.raw, v.raw, 2, 3, 2, 3)};
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  return LowerHalf(d, ShiftRightBytes<d.MaxBytes()>(Twice<D>(), v));
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec128<T> CombineShiftRightBytes(D /* tag */, Vec128<T> hi,
+                                         Vec128<T> lo) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  switch (kBytes) {
+    case 0:
+      return lo;
+
+    case 1:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 1, 2, 3, 4, 5, 6, 7,
+                                          8, 9, 10, 11, 12, 13, 14, 15, 16)};
+
+    case 2:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 2, 3, 4, 5, 6, 7, 8,
+                                          9, 10, 11, 12, 13, 14, 15, 16, 17)};
+
+    case 3:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 3, 4, 5, 6, 7, 8, 9,
+                                          10, 11, 12, 13, 14, 15, 16, 17, 18)};
+
+    case 4:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 4, 5, 6, 7, 8, 9, 10,
+                                          11, 12, 13, 14, 15, 16, 17, 18, 19)};
+
+    case 5:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 5, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 17, 18, 19, 20)};
+
+    case 6:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                          21)};
+
+    case 7:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 7, 8, 9, 10, 11, 12,
+                                          13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                          22)};
+
+    case 8:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 8, 9, 10, 11, 12, 13,
+                                          14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                          23)};
+
+    case 9:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 9, 10, 11, 12, 13, 14,
+                                          15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                          24)};
+
+    case 10:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 10, 11, 12, 13, 14,
+                                          15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                          24, 25)};
+
+    case 11:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                          25, 26)};
+
+    case 12:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 12, 13, 14, 15, 16,
+                                          17, 18, 19, 20, 21, 22, 23, 24, 25,
+                                          26, 27)};
+
+    case 13:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 13, 14, 15, 16, 17,
+                                          18, 19, 20, 21, 22, 23, 24, 25, 26,
+                                          27, 28)};
+
+    case 14:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 14, 15, 16, 17, 18,
+                                          19, 20, 21, 22, 23, 24, 25, 26, 27,
+                                          28, 29)};
+
+    case 15:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 15, 16, 17, 18, 19,
+                                          20, 21, 22, 23, 24, 25, 26, 27, 28,
+                                          29, 30)};
+  }
+  return hi;
+}
+
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = Vec128<uint8_t>;
+  const DFromV<V8> dfull8;
+  const Repartition<TFromD<D>, decltype(dfull8)> dfull;
+  const V8 hi8{BitCast(d8, hi).raw};
+  // Move into most-significant bytes
+  const V8 lo8 = ShiftLeftBytes<16 - kSize>(V8{BitCast(d8, lo).raw});
+  const V8 r = CombineShiftRightBytes<16 - kSize + kBytes>(dfull8, hi8, lo8);
+  return VFromD<D>{BitCast(dfull, r).raw};
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{wasm_i8x16_shuffle(
+      v.raw, v.raw, kLane, kLane, kLane, kLane, kLane, kLane, kLane, kLane,
+      kLane, kLane, kLane, kLane, kLane, kLane, kLane, kLane)};
+}
+
+template <int kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{wasm_i16x8_shuffle(v.raw, v.raw, kLane, kLane, kLane,
+                                         kLane, kLane, kLane, kLane, kLane)};
+}
+
+template <int kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{
+      wasm_i32x4_shuffle(v.raw, v.raw, kLane, kLane, kLane, kLane)};
+}
+
+template <int kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{wasm_i64x2_shuffle(v.raw, v.raw, kLane, kLane)};
+}
+
+// ------------------------------ TableLookupBytes
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(const Vec128<T, N> bytes,
+                                        const Vec128<TI, NI> from) {
+  return Vec128<TI, NI>{wasm_i8x16_swizzle(bytes.raw, from.raw)};
+}
+
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytesOr0(const Vec128<T, N> bytes,
+                                           const Vec128<TI, NI> from) {
+  const DFromV<decltype(from)> d;
+  // Mask size must match vector type, so cast everything to this type.
+  Repartition<int8_t, decltype(d)> di8;
+  Repartition<int8_t, DFromV<decltype(bytes)>> d_bytes8;
+  const auto msb = BitCast(di8, from) < Zero(di8);
+  const auto lookup =
+      TableLookupBytes(BitCast(d_bytes8, bytes), BitCast(di8, from));
+  return BitCast(d, IfThenZeroElse(msb, lookup));
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shuffle2301(const Vec128<T, N> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 0, 3, 2)};
+}
+
+// These are used by generic_ops-inl to implement LoadInterleaved3.
+namespace detail {
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShuffleTwo2301(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i8x16_shuffle(a.raw, b.raw, 1, 0, 3 + 16, 2 + 16,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> ShuffleTwo2301(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i16x8_shuffle(a.raw, b.raw, 1, 0, 3 + 8, 2 + 8,
+                                         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> ShuffleTwo2301(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i32x4_shuffle(a.raw, b.raw, 1, 0, 3 + 4, 2 + 4)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShuffleTwo1230(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i8x16_shuffle(a.raw, b.raw, 0, 3, 2 + 16, 1 + 16,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> ShuffleTwo1230(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i16x8_shuffle(a.raw, b.raw, 0, 3, 2 + 8, 1 + 8,
+                                         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> ShuffleTwo1230(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 3, 2 + 4, 1 + 4)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShuffleTwo3012(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i8x16_shuffle(a.raw, b.raw, 2, 1, 0 + 16, 3 + 16,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
+                                         0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> ShuffleTwo3012(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i16x8_shuffle(a.raw, b.raw, 2, 1, 0 + 8, 3 + 8,
+                                         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> ShuffleTwo3012(const Vec128<T, N> a,
+                                    const Vec128<T, N> b) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 1, 0 + 4, 3 + 4)};
+}
+
+}  // namespace detail
+
+// Swap 64-bit halves
+template <typename T>
+HWY_API Vec128<T> Shuffle01(const Vec128<T> v) {
+  static_assert(sizeof(T) == 8, "Only for 64-bit lanes");
+  return Vec128<T>{wasm_i64x2_shuffle(v.raw, v.raw, 1, 0)};
+}
+template <typename T>
+HWY_API Vec128<T> Shuffle1032(const Vec128<T> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  return Vec128<T>{wasm_i64x2_shuffle(v.raw, v.raw, 1, 0)};
+}
+
+// Rotate right 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle0321(const Vec128<T> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  return Vec128<T>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 2, 3, 0)};
+}
+
+// Rotate left 32 bits
+template <typename T>
+HWY_API Vec128<T> Shuffle2103(const Vec128<T> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  return Vec128<T>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 0, 1, 2)};
+}
+
+// Reverse
+template <typename T>
+HWY_API Vec128<T> Shuffle0123(const Vec128<T> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  return Vec128<T>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 2, 1, 0)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Indices128 {
+  __v128_u raw;
+};
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Iota(d8, 0);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecBroadcastLaneBytes(
+    D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+  return Load(d8, kBroadcastLaneBytes);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return Zero(d8);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+  return Load(d8, kByteOffsets);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+  return Load(d8, kByteOffsets);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<Repartition<uint8_t, D>> IndicesFromVecByteOffsets(D d) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
+  return Load(d8, kByteOffsets);
+}
+
+}  // namespace detail
+
+template <class D, typename TI, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_D(D, 1)>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  (void)d;
+  return Indices128<TFromD<D>, MaxLanes(D())>{vec.raw};
+}
+
+template <class D, typename TI, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 2) | (1 << 4) | (1 << 8))>
+HWY_API Indices128<TFromD<D>, MaxLanes(D())> IndicesFromVec(
+    D d, Vec128<TI, MaxLanes(D())> vec) {
+  using T = TFromD<D>;
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  HWY_DASSERT(AllTrue(
+      du, Lt(BitCast(du, vec), Set(du, static_cast<TU>(MaxLanes(d) * 2)))));
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = VFromD<decltype(d8)>;
+
+  // Broadcast each lane index to all bytes of T and shift to bytes
+  const V8 lane_indices = TableLookupBytes(
+      BitCast(d8, vec), detail::IndicesFromVecBroadcastLaneBytes(d));
+  constexpr int kIndexShiftAmt = static_cast<int>(FloorLog2(sizeof(T)));
+  const V8 byte_indices = ShiftLeft<kIndexShiftAmt>(lane_indices);
+  const V8 sum = Add(byte_indices, detail::IndicesFromVecByteOffsets(d));
+  return Indices128<TFromD<D>, MaxLanes(D())>{BitCast(d, sum).raw};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), typename TI>
+HWY_API Indices128<TFromD<D>, HWY_MAX_LANES_D(D)> SetTableIndices(
+    D d, const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+  using TI = MakeSigned<T>;
+  const DFromV<decltype(v)> d;
+  const Rebind<TI, decltype(d)> di;
+  return BitCast(d, TableLookupBytes(BitCast(di, v), Vec128<TI, N>{idx.raw}));
+}
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N> TwoTablesLookupLanes(Vec128<T, N> a, Vec128<T, N> b,
+                                          Indices128<T, N> idx) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+// TableLookupLanes currently requires table and index vectors to be the same
+// size, though a half-length index vector would be sufficient here.
+#if HWY_IS_MSAN
+  const Vec128<T, N> idx_vec{idx.raw};
+  const Indices128<T, N * 2> idx2{Combine(dt, idx_vec, idx_vec).raw};
+#else
+  // We only keep LowerHalf of the result, which is valid in idx.
+  const Indices128<T, N * 2> idx2{idx.raw};
+#endif
+  return LowerHalf(d, TableLookupLanes(Combine(dt, b, a), idx2));
+}
+
+template <typename T>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+
+  const VFromD<decltype(du8)> byte_idx{idx.raw};
+  const auto byte_idx_mod = byte_idx & Set(du8, uint8_t{0x0F});
+  // If ANDing did not change the index, it is for the lower half.
+  const auto is_lo = (byte_idx == byte_idx_mod);
+
+  return BitCast(d, IfThenElse(is_lo, TableLookupBytes(a, byte_idx_mod),
+                               TableLookupBytes(b, byte_idx_mod)));
+}
+
+// ------------------------------ Reverse (Shuffle0123, Shuffle2301, Shuffle01)
+
+// Single lane: no change
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 1)>
+HWY_API Vec128<T, 1> Reverse(D /* tag */, Vec128<T, 1> v) {
+  return v;
+}
+
+// 32-bit x2: shuffle
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec64<T> Reverse(D /* tag */, const Vec64<T> v) {
+  return Vec64<T>{Shuffle2301(Vec128<T>{v.raw}).raw};
+}
+
+// 64-bit x2: shuffle
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Reverse(D /* tag */, const Vec128<T> v) {
+  return Shuffle01(v);
+}
+
+// 32-bit x2: shuffle
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> Reverse(D /* tag */, const Vec128<T> v) {
+  return Shuffle0123(v);
+}
+
+// 16-bit
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  const RepartitionToWide<RebindToUnsigned<decltype(d)>> du32;
+  return BitCast(d, RotateRight<16>(Reverse(du32, BitCast(du32, v))));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  static constexpr int kN = 16 + Lanes(d);
+  return VFromD<D>{wasm_i8x16_shuffle(
+      v.raw, v.raw,
+      // kN is adjusted to ensure we have valid indices for all lengths.
+      kN - 1, kN - 2, kN - 3, kN - 4, kN - 5, kN - 6, kN - 7, kN - 8, kN - 9,
+      kN - 10, kN - 11, kN - 12, kN - 13, kN - 14, kN - 15, kN - 16)};
+}
+
+// ------------------------------ Reverse2
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse2(D d, const VFromD<D> v) {
+  const RepartitionToWide<RebindToUnsigned<decltype(d)>> dw;
+  return BitCast(d, RotateRight<16>(BitCast(dw, v)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse2(D /* tag */, const VFromD<D> v) {
+  return Shuffle2301(v);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse2(D /* tag */, const VFromD<D> v) {
+  return Shuffle01(v);
+}
+
+// ------------------------------ Reverse4
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D> v) {
+  return VFromD<D>{wasm_i16x8_shuffle(v.raw, v.raw, 3, 2, 1, 0, 7, 6, 5, 4)};
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D> v) {
+  return Shuffle0123(v);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D>) {
+  HWY_ASSERT(0);  // don't have 8 u64 lanes
+}
+
+// ------------------------------ Reverse8
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  return Reverse(d, v);
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> Reverse8(D /* tag */, const VFromD<D>) {
+  HWY_ASSERT(0);  // don't have 8 lanes for > 16-bit lanes
+}
+
+// ------------------------------ InterleaveLower
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> InterleaveLower(Vec128<uint8_t, N> a,
+                                           Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_shuffle(
+      a.raw, b.raw, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> InterleaveLower(Vec128<uint16_t, N> a,
+                                            Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 0, 8, 1, 9, 2, 10, 3, 11)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> InterleaveLower(Vec128<uint32_t, N> a,
+                                            Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> InterleaveLower(Vec128<uint64_t, N> a,
+                                            Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{wasm_i64x2_shuffle(a.raw, b.raw, 0, 2)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> InterleaveLower(Vec128<int8_t, N> a,
+                                          Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_shuffle(
+      a.raw, b.raw, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> InterleaveLower(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 0, 8, 1, 9, 2, 10, 3, 11)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> InterleaveLower(Vec128<int32_t, N> a,
+                                           Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> InterleaveLower(Vec128<int64_t, N> a,
+                                           Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{wasm_i64x2_shuffle(a.raw, b.raw, 0, 2)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> InterleaveLower(Vec128<float, N> a,
+                                         Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> InterleaveLower(Vec128<double, N> a,
+                                          Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_i64x2_shuffle(a.raw, b.raw, 0, 2)};
+}
+
+// Additional overload for the optional tag (all vector lengths).
+template <class D>
+HWY_API VFromD<D> InterleaveLower(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return InterleaveLower(a, b);
+}
+
+// ------------------------------ InterleaveUpper (UpperHalf)
+
+// All functions inside detail lack the required D parameter.
+namespace detail {
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> InterleaveUpper(Vec128<uint8_t, N> a,
+                                           Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_shuffle(a.raw, b.raw, 8, 24, 9, 25, 10,
+                                               26, 11, 27, 12, 28, 13, 29, 14,
+                                               30, 15, 31)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> InterleaveUpper(Vec128<uint16_t, N> a,
+                                            Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 4, 12, 5, 13, 6, 14, 7, 15)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> InterleaveUpper(Vec128<uint32_t, N> a,
+                                            Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> InterleaveUpper(Vec128<uint64_t, N> a,
+                                            Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{wasm_i64x2_shuffle(a.raw, b.raw, 1, 3)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> InterleaveUpper(Vec128<int8_t, N> a,
+                                          Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_shuffle(a.raw, b.raw, 8, 24, 9, 25, 10,
+                                              26, 11, 27, 12, 28, 13, 29, 14,
+                                              30, 15, 31)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> InterleaveUpper(Vec128<int16_t, N> a,
+                                           Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 4, 12, 5, 13, 6, 14, 7, 15)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> InterleaveUpper(Vec128<int32_t, N> a,
+                                           Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> InterleaveUpper(Vec128<int64_t, N> a,
+                                           Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{wasm_i64x2_shuffle(a.raw, b.raw, 1, 3)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> InterleaveUpper(Vec128<float, N> a,
+                                         Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> InterleaveUpper(Vec128<double, N> a,
+                                          Vec128<double, N> b) {
+  return Vec128<double, N>{wasm_i64x2_shuffle(a.raw, b.raw, 1, 3)};
+}
+
+}  // namespace detail
+
+// Full
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> InterleaveUpper(D /* tag */, Vec128<T> a, Vec128<T> b) {
+  return detail::InterleaveUpper(a, b);
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const Half<decltype(d)> d2;
+  return InterleaveLower(d, VFromD<D>{UpperHalf(d2, a).raw},
+                         VFromD<D>{UpperHalf(d2, b).raw});
+}
+
+// ------------------------------ ZipLower/ZipUpper (InterleaveLower)
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveLower(D(), a, b));
+}
+
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveUpper(D(), a, b));
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+namespace detail {
+
+template <size_t kIdx3210, size_t kVectSize, class V,
+          HWY_IF_LANES_LE(kVectSize, 16)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<1> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+  return V{wasm_i8x16_shuffle(v.raw, v.raw, kIdx0, kIdx1, kIdx2, kIdx3,
+                              kIdx0 + 4, kIdx1 + 4, kIdx2 + 4, kIdx3 + 4,
+                              kIdx0 + 8, kIdx1 + 8, kIdx2 + 8, kIdx3 + 8,
+                              kIdx0 + 12, kIdx1 + 12, kIdx2 + 12, kIdx3 + 12)};
+}
+
+template <size_t kIdx3210, size_t kVectSize, class V,
+          HWY_IF_LANES_LE(kVectSize, 16)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<2> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+  return V{wasm_i16x8_shuffle(v.raw, v.raw, kIdx0, kIdx1, kIdx2, kIdx3,
+                              kIdx0 + 4, kIdx1 + 4, kIdx2 + 4, kIdx3 + 4)};
+}
+
+template <size_t kIdx3210, size_t kVectSize, class V,
+          HWY_IF_LANES_LE(kVectSize, 16)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+  return V{wasm_i32x4_shuffle(v.raw, v.raw, kIdx0, kIdx1, kIdx2, kIdx3)};
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Full64<uint64_t> du64;
+  const auto vu64 = ResizeBitCast(du64, v);
+  return ResizeBitCast(
+      d, ShiftLeftSame(vu64, static_cast<int>(amt * sizeof(TFromV<V>) * 8)));
+}
+
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto idx =
+      Iota(du8, static_cast<uint8_t>(size_t{0} - amt * sizeof(TFromV<V>)));
+  return BitCast(d, TableLookupBytesOr0(BitCast(du8, v), idx));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideUpLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+      case 8:
+        return ShiftLeftLanes<8>(d, v);
+      case 9:
+        return ShiftLeftLanes<9>(d, v);
+      case 10:
+        return ShiftLeftLanes<10>(d, v);
+      case 11:
+        return ShiftLeftLanes<11>(d, v);
+      case 12:
+        return ShiftLeftLanes<12>(d, v);
+      case 13:
+        return ShiftLeftLanes<13>(d, v);
+      case 14:
+        return ShiftLeftLanes<14>(d, v);
+      case 15:
+        return ShiftLeftLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+// ------------------------------ SlideDownLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<UnsignedFromSize<d.MaxBytes()>, decltype(d)> dv;
+  return BitCast(d,
+                 ShiftRightSame(BitCast(dv, v),
+                                static_cast<int>(amt * sizeof(TFromV<V>) * 8)));
+}
+
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<int8_t, decltype(d)> di8;
+  auto idx = Iota(di8, static_cast<int8_t>(amt * sizeof(TFromV<V>)));
+  idx = Or(idx, VecFromMask(di8, idx > Set(di8, int8_t{15})));
+  return BitCast(d, TableLookupBytesOr0(BitCast(di8, v), idx));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideDownLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+      case 8:
+        return ShiftRightLanes<8>(d, v);
+      case 9:
+        return ShiftRightLanes<9>(d, v);
+      case 10:
+        return ShiftRightLanes<10>(d, v);
+      case 11:
+        return ShiftRightLanes<11>(d, v);
+      case 12:
+        return ShiftRightLanes<12>(d, v);
+      case 13:
+        return ShiftRightLanes<13>(d, v);
+      case 14:
+        return ShiftRightLanes<14>(d, v);
+      case 15:
+        return ShiftRightLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+// ================================================== COMBINE
+
+// ------------------------------ Combine (InterleaveLower)
+
+// N = N/2 + N/2 (upper half undefined)
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), class VH = VFromD<Half<D>>>
+HWY_API VFromD<D> Combine(D d, VH hi_half, VH lo_half) {
+  const Half<decltype(d)> dh;
+  const RebindToUnsigned<decltype(dh)> duh;
+  // Treat half-width input as one lane, and expand to two lanes.
+  using VU = Vec128<UnsignedFromSize<dh.MaxBytes()>, 2>;
+  const VU lo{BitCast(duh, lo_half).raw};
+  const VU hi{BitCast(duh, hi_half).raw};
+  return BitCast(d, InterleaveLower(lo, hi));
+}
+
+// ------------------------------ ZeroExtendVector (IfThenElseZero)
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+  const Half<D> dh;
+  return IfThenElseZero(FirstN(d, MaxLanes(dh)), VFromD<D>{lo.raw});
+}
+
+// ------------------------------ ConcatLowerLower
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatLowerLower(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i64x2_shuffle(lo.raw, hi.raw, 0, 2)};
+}
+
+// ------------------------------ ConcatUpperUpper
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatUpperUpper(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i64x2_shuffle(lo.raw, hi.raw, 1, 3)};
+}
+
+// ------------------------------ ConcatLowerUpper
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatLowerUpper(D d, Vec128<T> hi, Vec128<T> lo) {
+  return CombineShiftRightBytes<8>(d, hi, lo);
+}
+
+// ------------------------------ ConcatUpperLower
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> ConcatUpperLower(D d, Vec128<T> hi, Vec128<T> lo) {
+  return IfThenElse(FirstN(d, Lanes(d) / 2), lo, hi);
+}
+
+// ------------------------------ Concat partial (Combine, LowerHalf)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, const VFromD<D> hi,
+                                   const VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+// ------------------------------ ConcatOdd
+
+// 8-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> ConcatOdd(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 1, 3, 5, 7, 9, 11, 13, 15,
+                                      17, 19, 21, 23, 25, 27, 29, 31)};
+}
+
+// 8-bit x8
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec64<T> ConcatOdd(D /* tag */, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half.
+  return Vec128<T, 8>{wasm_i8x16_shuffle(lo.raw, hi.raw, 1, 3, 5, 7, 17, 19, 21,
+                                         23, 1, 3, 5, 7, 17, 19, 21, 23)};
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatOdd(D /* tag */, Vec32<T> hi, Vec32<T> lo) {
+  // Don't care about upper 3/4.
+  return Vec128<T, 4>{wasm_i8x16_shuffle(lo.raw, hi.raw, 1, 3, 17, 19, 1, 3, 17,
+                                         19, 1, 3, 17, 19, 1, 3, 17, 19)};
+}
+
+// 16-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> ConcatOdd(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{
+      wasm_i16x8_shuffle(lo.raw, hi.raw, 1, 3, 5, 7, 9, 11, 13, 15)};
+}
+
+// 16-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ConcatOdd(D /* tag */, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half.
+  return Vec128<T, 4>{
+      wasm_i16x8_shuffle(lo.raw, hi.raw, 1, 3, 9, 11, 1, 3, 9, 11)};
+}
+
+// 32-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ConcatOdd(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i32x4_shuffle(lo.raw, hi.raw, 1, 3, 5, 7)};
+}
+
+// Any T x2
+template <class D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 2)>
+HWY_API Vec128<T, 2> ConcatOdd(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveUpper(d, lo, hi);
+}
+
+// ------------------------------ ConcatEven (InterleaveLower)
+
+// 8-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> ConcatEven(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 0, 2, 4, 6, 8, 10, 12, 14,
+                                      16, 18, 20, 22, 24, 26, 28, 30)};
+}
+
+// 8-bit x8
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec64<T> ConcatEven(D /* tag */, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half.
+  return Vec64<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 0, 2, 4, 6, 16, 18, 20, 22,
+                                     0, 2, 4, 6, 16, 18, 20, 22)};
+}
+
+// 8-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ConcatEven(D /* tag */, Vec32<T> hi, Vec32<T> lo) {
+  // Don't care about upper 3/4.
+  return Vec32<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 0, 2, 16, 18, 0, 2, 16, 18,
+                                     0, 2, 16, 18, 0, 2, 16, 18)};
+}
+
+// 16-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> ConcatEven(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{
+      wasm_i16x8_shuffle(lo.raw, hi.raw, 0, 2, 4, 6, 8, 10, 12, 14)};
+}
+
+// 16-bit x4
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ConcatEven(D /* tag */, Vec64<T> hi, Vec64<T> lo) {
+  // Don't care about upper half.
+  return Vec64<T>{wasm_i16x8_shuffle(lo.raw, hi.raw, 0, 2, 8, 10, 0, 2, 8, 10)};
+}
+
+// 32-bit full
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ConcatEven(D /* tag */, Vec128<T> hi, Vec128<T> lo) {
+  return Vec128<T>{wasm_i32x4_shuffle(lo.raw, hi.raw, 0, 2, 4, 6)};
+}
+
+// Any T x2
+template <typename D, typename T = TFromD<D>, HWY_IF_LANES_D(D, 2)>
+HWY_API Vec128<T, 2> ConcatEven(D d, Vec128<T, 2> hi, Vec128<T, 2> lo) {
+  return InterleaveLower(d, lo, hi);
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i8x16_shuffle(v.raw, v.raw, 0, 0, 2, 2, 4, 4, 6, 6,
+                                         8, 8, 10, 10, 12, 12, 14, 14)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i16x8_shuffle(v.raw, v.raw, 0, 0, 2, 2, 4, 4, 6, 6)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> DupEven(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i32x4_shuffle(v.raw, v.raw, 0, 0, 2, 2)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupEven(const Vec128<T, N> v) {
+  return InterleaveLower(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i8x16_shuffle(v.raw, v.raw, 1, 1, 3, 3, 5, 5, 7, 7,
+                                         9, 9, 11, 11, 13, 13, 15, 15)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i16x8_shuffle(v.raw, v.raw, 1, 1, 3, 3, 5, 5, 7, 7)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 1, 3, 3)};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupOdd(const Vec128<T, N> v) {
+  return InterleaveUpper(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ OddEven
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> OddEven(hwy::SizeTag<1> /* tag */, const Vec128<T, N> a,
+                                const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t mask[16] = {
+      0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a);
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> OddEven(hwy::SizeTag<2> /* tag */, const Vec128<T, N> a,
+                                const Vec128<T, N> b) {
+  return Vec128<T, N>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 8, 1, 10, 3, 12, 5, 14, 7)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> OddEven(hwy::SizeTag<4> /* tag */, const Vec128<T, N> a,
+                                const Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_i32x4_shuffle(a.raw, b.raw, 4, 1, 6, 3)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> OddEven(hwy::SizeTag<8> /* tag */, const Vec128<T, N> a,
+                                const Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_i64x2_shuffle(a.raw, b.raw, 2, 1)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return detail::OddEven(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+template <size_t N>
+HWY_API Vec128<float, N> OddEven(const Vec128<float, N> a,
+                                 const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_i32x4_shuffle(a.raw, b.raw, 4, 1, 6, 3)};
+}
+
+// ------------------------------ OddEvenBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEvenBlocks(Vec128<T, N> /* odd */, Vec128<T, N> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SwapAdjacentBlocks(Vec128<T, N> v) {
+  return v;
+}
+
+// ------------------------------ ReverseBlocks
+
+// Single block: no change
+template <class D>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>{wasm_u16x8_extend_low_u8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>{wasm_u32x4_extend_low_u16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>{wasm_u64x2_extend_low_u32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>{
+      wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(v.raw))};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>{wasm_u16x8_extend_low_u8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>{wasm_u32x4_extend_low_u16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>{wasm_u64x2_extend_low_u32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+  return VFromD<D>{
+      wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(v.raw))};
+}
+
+// U8/U16 to U64/I64: First, zero-extend to U32, and then zero-extend to
+// TFromD<D>
+template <class D, class V, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_V(V)), HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> PromoteTo(D d, V v) {
+  const Rebind<uint32_t, decltype(d)> du32;
+  return PromoteTo(d, PromoteTo(du32, v));
+}
+
+// Signed: replicate sign bit.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+  return VFromD<D>{wasm_i16x8_extend_low_i8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{wasm_i32x4_extend_low_i16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{wasm_i64x2_extend_low_i32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+  return VFromD<D>{
+      wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(v.raw))};
+}
+
+// I8/I16 to I64: First, promote to I32, and then promote to I64
+template <class D, class V, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_V(V)), HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> PromoteTo(D d, V v) {
+  const Rebind<int32_t, decltype(d)> di32;
+  return PromoteTo(d, PromoteTo(di32, v));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, VFromD<Rebind<bfloat16_t, D>> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{wasm_f64x2_convert_low_i32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<float, D>> v) {
+  return VFromD<D>{wasm_f64x2_promote_low_f32x4(v.raw)};
+}
+
+// ------------------------------ PromoteUpperTo
+
+// Per-target flag to prevent generic_ops-inl.h from defining PromoteUpperTo.
+#ifdef HWY_NATIVE_PROMOTE_UPPER_TO
+#undef HWY_NATIVE_PROMOTE_UPPER_TO
+#else
+#define HWY_NATIVE_PROMOTE_UPPER_TO
+#endif
+
+// Unsigned: zero-extend.
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>{wasm_u16x8_extend_high_u8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint16_t, D>> v) {
+  return VFromD<D>{wasm_u32x4_extend_high_u16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint32_t, D>> v) {
+  return VFromD<D>{wasm_u64x2_extend_high_u32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint8_t, D>> v) {
+  return VFromD<D>{wasm_u16x8_extend_high_u8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint16_t, D>> v) {
+  return VFromD<D>{wasm_u32x4_extend_high_u16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<uint32_t, D>> v) {
+  return VFromD<D>{wasm_u64x2_extend_high_u32x4(v.raw)};
+}
+
+// Signed: replicate sign bit.
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<int8_t, D>> v) {
+  return VFromD<D>{wasm_i16x8_extend_high_i8x16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<int16_t, D>> v) {
+  return VFromD<D>{wasm_i32x4_extend_high_i16x8(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D /* tag */,
+                                 VFromD<Repartition<int32_t, D>> v) {
+  return VFromD<D>{wasm_i64x2_extend_high_i32x4(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D df32, VFromD<Repartition<float16_t, D>> v) {
+  const Rebind<float16_t, decltype(df32)> dh;
+  return PromoteTo(df32, UpperHalf(dh, v));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D df32, VFromD<Repartition<bfloat16_t, D>> v) {
+  const Repartition<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteUpperTo(di32, BitCast(du16, v))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D dd, VFromD<Repartition<int32_t, D>> v) {
+  // There is no wasm_f64x2_convert_high_i32x4.
+  return PromoteTo(dd, UpperHalf(Rebind<int32_t, D>(), v));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteUpperTo(D dd, VFromD<Repartition<float, D>> v) {
+  // There is no wasm_f64x2_promote_high_f32x4.
+  return PromoteTo(dd, UpperHalf(Rebind<float, D>(), v));
+}
+
+// Generic version for <=64 bit input/output (_high is only for full vectors).
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), class V>
+HWY_API VFromD<D> PromoteUpperTo(D d, V v) {
+  const Rebind<TFromV<V>, decltype(d)> dh;
+  return PromoteTo(d, UpperHalf(dh, v));
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{wasm_u16x8_narrow_i32x4(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{wasm_i16x8_narrow_i32x4(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return VFromD<D>{wasm_u8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{wasm_u8x16_narrow_i16x8(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return VFromD<D>{wasm_i8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{wasm_i8x16_narrow_i16x8(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_UNSIGNED_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<uint32_t, D>> v) {
+  const DFromV<decltype(v)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  return DemoteTo(dn, BitCast(di32, Min(v, Set(du32, 0x7FFFFFFF))));
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D du8, VFromD<Rebind<uint16_t, D>> v) {
+  const DFromV<decltype(v)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  return DemoteTo(du8, BitCast(di16, Min(v, Set(du16, 0x7FFF))));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, VFromD<Rebind<float, D>> v) {
+  const Rebind<int32_t, decltype(dbf16)> di32;
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = BitCast(di32, ShiftRight<16>(BitCast(du32, v)));
+  return BitCast(dbf16, DemoteTo(du16, bits_in_32));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<double, D>> v) {
+  return VFromD<D>{wasm_i32x4_trunc_sat_f64x2_zero(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<double, D>> v) {
+  return VFromD<D>{wasm_f32x4_demote_f64x2_zero(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_BF16_D(D),
+          class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+// Specializations for partial vectors because i16x8_narrow_i32x4 sets lanes
+// above 2*N.
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec32<int16_t> ReorderDemote2To(D dn, Vec32<int32_t> a,
+                                        Vec32<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec64<int16_t> ReorderDemote2To(D dn, Vec64<int32_t> a,
+                                        Vec64<int32_t> b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const Vec128<int16_t> v_full{wasm_i16x8_narrow_i32x4(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> ReorderDemote2To(D /* tag */, Vec128<int32_t> a,
+                                         Vec128<int32_t> b) {
+  return Vec128<int16_t>{wasm_i16x8_narrow_i32x4(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec32<uint16_t> ReorderDemote2To(D dn, Vec32<int32_t> a,
+                                         Vec32<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> ReorderDemote2To(D dn, Vec64<int32_t> a,
+                                         Vec64<int32_t> b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const Vec128<int16_t> v_full{wasm_u16x8_narrow_i32x4(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> ReorderDemote2To(D /* tag */, Vec128<int32_t> a,
+                                          Vec128<int32_t> b) {
+  return Vec128<uint16_t>{wasm_u16x8_narrow_i32x4(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec128<uint32_t> a,
+                                   Vec128<uint32_t> b) {
+  const DFromV<decltype(a)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  const auto max_i32 = Set(du32, 0x7FFFFFFFu);
+
+  const auto clamped_a = BitCast(di32, Min(a, max_i32));
+  const auto clamped_b = BitCast(di32, Min(b, max_i32));
+  return ReorderDemote2To(dn, clamped_a, clamped_b);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint32_t, D>> a,
+                                   VFromD<Repartition<uint32_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+// Specializations for partial vectors because i8x16_narrow_i16x8 sets lanes
+// above 2*N.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec64<int8_t> ReorderDemote2To(D dn, Vec64<int16_t> a,
+                                       Vec64<int16_t> b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const Vec128<int8_t> v_full{wasm_i8x16_narrow_i16x8(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec128<int8_t> ReorderDemote2To(D /* tag */, Vec128<int16_t> a,
+                                        Vec128<int16_t> b) {
+  return Vec128<int8_t>{wasm_i8x16_narrow_i16x8(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> ReorderDemote2To(D dn, Vec64<int16_t> a,
+                                        Vec64<int16_t> b) {
+  const Twice<decltype(dn)> dn_full;
+  const Repartition<uint32_t, decltype(dn_full)> du32_full;
+
+  const Vec128<uint8_t> v_full{wasm_u8x16_narrow_i16x8(a.raw, b.raw)};
+  const auto vu32_full = BitCast(du32_full, v_full);
+  return LowerHalf(
+      BitCast(dn_full, ConcatEven(du32_full, vu32_full, vu32_full)));
+}
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> ReorderDemote2To(D /* tag */, Vec128<int16_t> a,
+                                         Vec128<int16_t> b) {
+  return Vec128<uint8_t>{wasm_u8x16_narrow_i16x8(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec128<uint16_t> a,
+                                   Vec128<uint16_t> b) {
+  const DFromV<decltype(a)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  const auto max_i16 = Set(du16, 0x7FFFu);
+
+  const auto clamped_a = BitCast(di16, Min(a, max_i16));
+  const auto clamped_b = BitCast(di16, Min(b, max_i16));
+  return ReorderDemote2To(dn, clamped_a, clamped_b);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint16_t, D>> a,
+                                   VFromD<Repartition<uint16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+// For already range-limited input [0, 255].
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(const Vec128<uint32_t, N> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return Vec128<uint8_t, N>{
+      wasm_u8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+// ------------------------------ Truncations
+
+template <typename From, class DTo, HWY_IF_LANES_D(DTo, 1)>
+HWY_API VFromD<DTo> TruncateTo(DTo /* tag */, Vec128<From, 1> v) {
+  // BitCast requires the same size; DTo might be u8x1 and v u16x1.
+  const Repartition<TFromD<DTo>, DFromV<decltype(v)>> dto;
+  return VFromD<DTo>{BitCast(dto, v).raw};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec16<uint8_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Full128<uint8_t> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = ConcatEven(d, v1, v1);
+  const auto v4 = ConcatEven(d, v2, v2);
+  return LowerHalf(LowerHalf(LowerHalf(ConcatEven(d, v4, v4))));
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec32<uint16_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Full128<uint16_t> d;
+  const auto v1 = BitCast(d, v);
+  const auto v2 = ConcatEven(d, v1, v1);
+  return LowerHalf(LowerHalf(ConcatEven(d, v2, v2)));
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec64<uint32_t> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  const Full128<uint32_t> d;
+  const auto v1 = BitCast(d, v);
+  return LowerHalf(ConcatEven(d, v1, v1));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d;
+  const auto v1 = Vec128<uint8_t>{v.raw};
+  const auto v2 = ConcatEven(d, v1, v1);
+  const auto v3 = ConcatEven(d, v2, v2);
+  return VFromD<D>{v3.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const Repartition<uint16_t, DFromV<decltype(v)>> d;
+  const auto v1 = Vec128<uint16_t>{v.raw};
+  const auto v2 = ConcatEven(d, v1, v1);
+  return VFromD<D>{v2.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  const Repartition<uint8_t, DFromV<decltype(v)>> d;
+  const auto v1 = Vec128<uint8_t>{v.raw};
+  const auto v2 = ConcatEven(d, v1, v1);
+  return VFromD<D>{v2.raw};
+}
+
+// ------------------------------ Demotions to/from i64
+
+namespace detail {
+template <class D, HWY_IF_UNSIGNED_D(D)>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64MaskOutResult(
+    D /*dn*/, VFromD<Rebind<uint64_t, D>> v) {
+  return v;
+}
+
+template <class D, HWY_IF_SIGNED_D(D)>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64MaskOutResult(
+    D /*dn*/, VFromD<Rebind<uint64_t, D>> v) {
+  const DFromV<decltype(v)> du64;
+  return And(v,
+             Set(du64, static_cast<uint64_t>(hwy::HighestValue<TFromD<D>>())));
+}
+
+template <class D>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64Saturate(
+    D dn, VFromD<Rebind<uint64_t, D>> v) {
+  const Rebind<uint64_t, D> du64;
+  const RebindToSigned<decltype(du64)> di64;
+  constexpr int kShiftAmt = static_cast<int>(sizeof(TFromD<D>) * 8) -
+                            static_cast<int>(hwy::IsSigned<TFromD<D>>());
+
+  const auto too_big = BitCast(
+      du64, VecFromMask(
+                di64, Gt(BitCast(di64, ShiftRight<kShiftAmt>(v)), Zero(di64))));
+  return DemoteFromU64MaskOutResult(dn, Or(v, too_big));
+}
+
+template <class D, class V>
+HWY_INLINE VFromD<D> ReorderDemote2From64To32Combine(D dn, V a, V b) {
+  return ConcatEven(dn, BitCast(dn, b), BitCast(dn, a));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<int64_t, D>> v) {
+  const DFromV<decltype(v)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+
+  // Negative values are saturated by first saturating their bitwise inverse
+  // and then inverting the saturation result
+  const auto invert_mask = BitCast(du64, BroadcastSignBit(v));
+  const auto saturated_vals = Xor(
+      invert_mask,
+      detail::DemoteFromU64Saturate(dn, Xor(invert_mask, BitCast(du64, v))));
+  return BitCast(dn, TruncateTo(dn_u, saturated_vals));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<int64_t, D>> v) {
+  const DFromV<decltype(v)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+
+  const auto non_neg_vals = BitCast(du64, AndNot(BroadcastSignBit(v), v));
+  return TruncateTo(dn, detail::DemoteFromU64Saturate(dn, non_neg_vals));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<uint64_t, D>> v) {
+  return TruncateTo(dn, detail::DemoteFromU64Saturate(dn, v));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_T_SIZE_D(D, 4),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int64_t, D>> a,
+                                   VFromD<Repartition<int64_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint64_t, D>> a,
+                                   VFromD<Repartition<uint64_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> ReorderDemote2To(D dn, Vec128<int64_t> a,
+                                         Vec128<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+
+  // Negative values are saturated by first saturating their bitwise inverse
+  // and then inverting the saturation result
+  const auto invert_mask_a = BitCast(du64, BroadcastSignBit(a));
+  const auto invert_mask_b = BitCast(du64, BroadcastSignBit(b));
+  const auto saturated_a = Xor(
+      invert_mask_a,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_a, BitCast(du64, a))));
+  const auto saturated_b = Xor(
+      invert_mask_b,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_b, BitCast(du64, b))));
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D dn, Vec128<int64_t> a,
+                                          Vec128<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(a), a)));
+  const auto saturated_b = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(b), b)));
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D dn, Vec128<uint64_t> a,
+                                          Vec128<uint64_t> b) {
+  const Half<decltype(dn)> dnh;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(dnh, a);
+  const auto saturated_b = detail::DemoteFromU64Saturate(dnh, b);
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>), class V,
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return ReorderDemote2To(d, a, b);
+}
+
+template <class D, HWY_IF_BF16_D(D), class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> OrderedDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  return BitCast(dbf16, ConcatOdd(du16, BitCast(du16, b), BitCast(du16, a)));
+}
+
+// ------------------------------ ConvertTo
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{wasm_f32x4_convert_i32x4(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>{wasm_f32x4_convert_u32x4(v.raw)};
+}
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D dd, VFromD<Rebind<int64_t, D>> v) {
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const Repartition<uint32_t, decltype(dd)> d32;
+  const Repartition<uint64_t, decltype(dd)> d64;
+
+  // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63
+  const auto k84_63 = Set(d64, 0x4530000080000000ULL);
+  const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63);
+
+  // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven)
+  const auto k52 = Set(d32, 0x43300000);
+  const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v)));
+
+  const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL));
+  return (v_upper - k84_63_52) + v_lower;  // order matters!
+}
+
+namespace detail {
+template <class VW>
+HWY_INLINE VFromD<Rebind<double, DFromV<VW>>> U64ToF64VecFast(VW w) {
+  const DFromV<decltype(w)> d64;
+  const RebindToFloat<decltype(d64)> dd;
+  const auto cnst2_52_dbl = Set(dd, 0x0010000000000000);  // 2^52
+  return BitCast(dd, Or(w, BitCast(d64, cnst2_52_dbl))) - cnst2_52_dbl;
+}
+}  // namespace detail
+
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D dd, VFromD<Rebind<uint64_t, D>> v) {
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const RebindToUnsigned<decltype(dd)> d64;
+  using VU = VFromD<decltype(d64)>;
+
+  const VU msk_lo = Set(d64, 0xFFFFFFFF);
+  const auto cnst2_32_dbl = Set(dd, 4294967296.0);  // 2^32
+
+  // Extract the 32 lowest/highest significant bits of v
+  const VU v_lo = And(v, msk_lo);
+  const VU v_hi = ShiftRight<32>(v);
+
+  const auto v_lo_dbl = detail::U64ToF64VecFast(v_lo);
+  return MulAdd(cnst2_32_dbl, detail::U64ToF64VecFast(v_hi), v_lo_dbl);
+}
+
+// Truncates (rounds toward zero).
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<float, D>> v) {
+  return VFromD<D>{wasm_i32x4_trunc_sat_f32x4(v.raw)};
+}
+
+template <class DI, HWY_IF_I64_D(DI)>
+HWY_API VFromD<DI> ConvertTo(DI di, VFromD<Rebind<double, DI>> v) {
+  using VI = VFromD<decltype(di)>;
+  using MI = MFromD<decltype(di)>;
+  const RebindToUnsigned<decltype(di)> du;
+  using VU = VFromD<decltype(du)>;
+  const Repartition<uint16_t, decltype(di)> du16;
+  const VI k1075 = Set(di, 1075);  // biased exponent of 2^52
+
+  // Exponent indicates whether the number can be represented as int64_t.
+  const VU biased_exp = ShiftRight<52>(BitCast(du, v)) & Set(du, 0x7FF);
+  const MI in_range = BitCast(di, biased_exp) < Set(di, 1086);
+
+  // If we were to cap the exponent at 51 and add 2^52, the number would be in
+  // [2^52, 2^53) and mantissa bits could be read out directly. We need to
+  // round-to-0 (truncate).
+  // Use 16-bit saturated unsigned subtraction to compute shift_mnt and
+  // shift_int since biased_exp[i] is a non-negative integer that is less than
+  // or equal to 2047.
+  // The upper 48 bits of both shift_mnt and shift_int are guaranteed to be
+  // zero as the upper 48 bits of both k1075 and biased_exp are zero.
+
+  const VU shift_mnt = BitCast(
+      du, SaturatedSub(BitCast(du16, k1075), BitCast(du16, biased_exp)));
+  const VU shift_int = BitCast(
+      du, SaturatedSub(BitCast(du16, biased_exp), BitCast(du16, k1075)));
+  const VU mantissa = BitCast(du, v) & Set(du, (1ULL << 52) - 1);
+  // Include implicit 1-bit
+  VU int53 = (mantissa | Set(du, 1ULL << 52)) >> shift_mnt;
+  // WASM clamps shift count; zero if greater.
+  const MI tiny = BitCast(di, shift_mnt) > Set(di, 63);
+  int53 = IfThenZeroElse(RebindMask(du, tiny), int53);
+
+  // For inputs larger than 2^53 - 1, insert zeros at the bottom.
+  // For inputs less than 2^63, the implicit 1-bit is guaranteed not to be
+  // shifted out of the left shift result below as shift_int[i] <= 10 is true
+  // for any inputs that are less than 2^63.
+  const VU shifted = int53 << shift_int;
+
+  // Saturate to LimitsMin (unchanged when negating below) or LimitsMax.
+  const VI sign_mask = BroadcastSignBit(BitCast(di, v));
+  const VI limit = Set(di, LimitsMax<int64_t>()) - sign_mask;
+  const VI magnitude = IfThenElse(in_range, BitCast(di, shifted), limit);
+
+  // If the input was negative, negate the integer (two's complement).
+  return (magnitude ^ sign_mask) - sign_mask;
+}
+
+// ------------------------------ NearestInt (Round)
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  return ConvertTo(RebindToSigned<DFromV<decltype(v)>>(), Round(v));
+}
+
+// ================================================== MISC
+
+// ------------------------------ SumsOf8 (ShiftRight, Add)
+template <size_t N>
+HWY_API Vec128<uint64_t, N / 8> SumsOf8(const Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> du8;
+  const RepartitionToWide<decltype(du8)> du16;
+  const RepartitionToWide<decltype(du16)> du32;
+  const RepartitionToWide<decltype(du32)> du64;
+  using VU16 = VFromD<decltype(du16)>;
+
+  const VU16 vFDB97531 = ShiftRight<8>(BitCast(du16, v));
+  const VU16 vECA86420 = And(BitCast(du16, v), Set(du16, 0xFF));
+  const VU16 sFE_DC_BA_98_76_54_32_10 = Add(vFDB97531, vECA86420);
+
+  const VU16 szz_FE_zz_BA_zz_76_zz_32 =
+      BitCast(du16, ShiftRight<16>(BitCast(du32, sFE_DC_BA_98_76_54_32_10)));
+  const VU16 sxx_FC_xx_B8_xx_74_xx_30 =
+      Add(sFE_DC_BA_98_76_54_32_10, szz_FE_zz_BA_zz_76_zz_32);
+  const VU16 szz_zz_xx_FC_zz_zz_xx_74 =
+      BitCast(du16, ShiftRight<32>(BitCast(du64, sxx_FC_xx_B8_xx_74_xx_30)));
+  const VU16 sxx_xx_xx_F8_xx_xx_xx_70 =
+      Add(sxx_FC_xx_B8_xx_74_xx_30, szz_zz_xx_FC_zz_zz_xx_74);
+  return And(BitCast(du64, sxx_xx_xx_F8_xx_xx_xx_70), Set(du64, 0xFFFF));
+}
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  // Easier than Set(), which would require an >8-bit type, which would not
+  // compile for T=uint8_t, N=1.
+  const VFromD<D> vbits{wasm_i32x4_splat(static_cast<int32_t>(bits))};
+
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+  alignas(16) static constexpr uint8_t kRep8[16] = {0, 0, 0, 0, 0, 0, 0, 0,
+                                                    1, 1, 1, 1, 1, 1, 1, 1};
+  const auto rep8 = TableLookupBytes(vbits, Load(du, kRep8));
+
+  alignas(16) static constexpr uint8_t kBit[16] = {1, 2, 4, 8, 16, 32, 64, 128,
+                                                   1, 2, 4, 8, 16, 32, 64, 128};
+  return RebindMask(d, TestBit(rep8, LoadDup128(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint16_t kBit[8] = {1, 2, 4, 8, 16, 32, 64, 128};
+  return RebindMask(
+      d, TestBit(Set(du, static_cast<uint16_t>(bits)), Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint32_t kBit[8] = {1, 2, 4, 8};
+  return RebindMask(
+      d, TestBit(Set(du, static_cast<uint32_t>(bits)), Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> LoadMaskBits(D d, uint64_t bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint64_t kBit[8] = {1, 2};
+  return RebindMask(d, TestBit(Set(du, bits), Load(du, kBit)));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  uint64_t mask_bits = 0;
+  CopyBytes<(MaxLanes(d) + 7) / 8>(bits, &mask_bits);
+  return detail::LoadMaskBits(d, mask_bits);
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+// Full
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T> mask) {
+  alignas(16) uint64_t lanes[2];
+  wasm_v128_store(lanes, mask.raw);
+
+  constexpr uint64_t kMagic = 0x103070F1F3F80ULL;
+  const uint64_t lo = ((lanes[0] * kMagic) >> 56);
+  const uint64_t hi = ((lanes[1] * kMagic) >> 48) & 0xFF00;
+  return (hi + lo);
+}
+
+// 64-bit
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T, 8> mask) {
+  constexpr uint64_t kMagic = 0x103070F1F3F80ULL;
+  return (static_cast<uint64_t>(wasm_i64x2_extract_lane(mask.raw, 0)) *
+          kMagic) >>
+         56;
+}
+
+// 32-bit or less: need masking
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 4)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  uint64_t bytes = static_cast<uint64_t>(wasm_i64x2_extract_lane(mask.raw, 0));
+  // Clear potentially undefined bytes.
+  bytes &= (1ULL << (N * 8)) - 1;
+  constexpr uint64_t kMagic = 0x103070F1F3F80ULL;
+  return (bytes * kMagic) >> 56;
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  const __i16x8 zero = wasm_i16x8_splat(0);
+  const Mask128<uint8_t, N> mask8{wasm_i8x16_narrow_i16x8(mask.raw, zero)};
+  return BitsFromMask(hwy::SizeTag<1>(), mask8);
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  const __i32x4 mask_i = static_cast<__i32x4>(mask.raw);
+  const __i32x4 slice = wasm_i32x4_make(1, 2, 4, 8);
+  const __i32x4 sliced_mask = wasm_v128_and(mask_i, slice);
+  alignas(16) uint32_t lanes[4];
+  wasm_v128_store(lanes, sliced_mask);
+  return lanes[0] | lanes[1] | lanes[2] | lanes[3];
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  const __i64x2 mask_i = static_cast<__i64x2>(mask.raw);
+  const __i64x2 slice = wasm_i64x2_make(1, 2);
+  const __i64x2 sliced_mask = wasm_v128_and(mask_i, slice);
+  alignas(16) uint64_t lanes[2];
+  wasm_v128_store(lanes, sliced_mask);
+  return lanes[0] | lanes[1];
+}
+
+// Returns the lowest N bits for the BitsFromMask result.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t bits) {
+  return ((N * sizeof(T)) == 16) ? bits : bits & ((1ull << N) - 1);
+}
+
+// Returns 0xFF for bytes with index >= N, otherwise 0.
+template <size_t N>
+constexpr __i8x16 BytesAbove() {
+  return /**/
+      (N == 0)    ? wasm_i32x4_make(-1, -1, -1, -1)
+      : (N == 4)  ? wasm_i32x4_make(0, -1, -1, -1)
+      : (N == 8)  ? wasm_i32x4_make(0, 0, -1, -1)
+      : (N == 12) ? wasm_i32x4_make(0, 0, 0, -1)
+      : (N == 16) ? wasm_i32x4_make(0, 0, 0, 0)
+      : (N == 2)  ? wasm_i16x8_make(0, -1, -1, -1, -1, -1, -1, -1)
+      : (N == 6)  ? wasm_i16x8_make(0, 0, 0, -1, -1, -1, -1, -1)
+      : (N == 10) ? wasm_i16x8_make(0, 0, 0, 0, 0, -1, -1, -1)
+      : (N == 14) ? wasm_i16x8_make(0, 0, 0, 0, 0, 0, 0, -1)
+      : (N == 1)  ? wasm_i8x16_make(0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                    -1, -1, -1, -1, -1)
+      : (N == 3)  ? wasm_i8x16_make(0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                    -1, -1, -1, -1)
+      : (N == 5)  ? wasm_i8x16_make(0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1,
+                                    -1, -1, -1, -1)
+      : (N == 7)  ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1,
+                                    -1, -1, -1)
+      : (N == 9)  ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1,
+                                    -1, -1, -1)
+      : (N == 11)
+          ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1)
+      : (N == 13)
+          ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1)
+          : wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1);
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<1> tag, const Mask128<T> m) {
+  return PopCount(BitsFromMask(tag, m));
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<2> tag, const Mask128<T> m) {
+  return PopCount(BitsFromMask(tag, m));
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<4> /*tag*/, const Mask128<T> m) {
+  const __i32x4 var_shift = wasm_i32x4_make(1, 2, 4, 8);
+  const __i32x4 shifted_bits = wasm_v128_and(m.raw, var_shift);
+  alignas(16) uint64_t lanes[2];
+  wasm_v128_store(lanes, shifted_bits);
+  return PopCount(lanes[0] | lanes[1]);
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<8> /*tag*/, const Mask128<T> m) {
+  alignas(16) int64_t lanes[2];
+  wasm_v128_store(lanes, m.raw);
+  return static_cast<size_t>(-(lanes[0] + lanes[1]));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 writable bytes.
+template <class D>
+HWY_API size_t StoreMaskBits(D d, const MFromD<D> mask, uint8_t* bits) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (d.MaxLanes() + 7) / 8;
+  CopyBytes<kNumBytes>(&mask_bits, bits);
+  return kNumBytes;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API size_t CountTrue(D /* tag */, const MFromD<D> m) {
+  return detail::CountTrue(hwy::SizeTag<sizeof(TFromD<D>)>(), m);
+}
+
+// Partial
+template <class D, typename T = TFromD<D>, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API size_t CountTrue(D d, MFromD<D> m) {
+  // Ensure all undefined bytes are 0.
+  const MFromD<D> mask{detail::BytesAbove<d.MaxBytes()>()};
+  const Full128<T> dfull;
+  return CountTrue(dfull, Mask128<T>{AndNot(mask, m).raw});
+}
+
+// Full vector
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API bool AllFalse(D d, const MFromD<D> m) {
+  const auto v8 = BitCast(Full128<int8_t>(), VecFromMask(d, m));
+  return !wasm_v128_any_true(v8.raw);
+}
+
+// Full vector
+namespace detail {
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<1> /*tag*/, const Mask128<T> m) {
+  return wasm_i8x16_all_true(m.raw);
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<2> /*tag*/, const Mask128<T> m) {
+  return wasm_i16x8_all_true(m.raw);
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<4> /*tag*/, const Mask128<T> m) {
+  return wasm_i32x4_all_true(m.raw);
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<8> /*tag*/, const Mask128<T> m) {
+  return wasm_i64x2_all_true(m.raw);
+}
+
+}  // namespace detail
+
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllTrue(D /* tag */, const Mask128<T> m) {
+  return detail::AllTrue(hwy::SizeTag<sizeof(T)>(), m);
+}
+
+// Partial vectors
+
+template <class D, typename T = TFromD<D>, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API bool AllFalse(D d, const MFromD<D> m) {
+  // Ensure all undefined bytes are 0.
+  const MFromD<D> mask{detail::BytesAbove<d.MaxBytes()>()};
+  return AllFalse(Full128<T>(), Mask128<T>{AndNot(mask, m).raw});
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API bool AllTrue(D d, const MFromD<D> m) {
+  // Ensure all undefined bytes are FF.
+  const MFromD<D> mask{detail::BytesAbove<d.MaxBytes()>()};
+  return AllTrue(Full128<T>(), Mask128<T>{Or(mask, m).raw});
+}
+
+template <class D>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, const MFromD<D> mask) {
+  const uint32_t bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return Num0BitsBelowLS1Bit_Nonzero32(bits);
+}
+
+template <class D>
+HWY_API intptr_t FindFirstTrue(D /* tag */, const MFromD<D> mask) {
+  const uint32_t bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return bits ? static_cast<intptr_t>(Num0BitsBelowLS1Bit_Nonzero32(bits)) : -1;
+}
+
+template <class D>
+HWY_API size_t FindKnownLastTrue(D /* tag */, const MFromD<D> mask) {
+  const uint32_t bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(bits);
+}
+
+template <class D>
+HWY_API intptr_t FindLastTrue(D /* tag */, const MFromD<D> mask) {
+  const uint32_t bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return bits
+             ? (31 - static_cast<intptr_t>(Num0BitsAboveMS1Bit_Nonzero32(bits)))
+             : -1;
+}
+
+// ------------------------------ Compress
+
+namespace detail {
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> IdxFromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N, 0> d;
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N, 0> du;
+
+  // We need byte indices for TableLookupBytes (one vector's worth for each of
+  // 256 combinations of 8 mask bits). Loading them directly requires 4 KiB. We
+  // can instead store lane indices and convert to byte indices (2*lane + 0..1),
+  // with the doubling baked into the table. Unpacking nibbles is likely more
+  // costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[256 * 8] = {
+      // PrintCompress16x8Tables
+      0,  2,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      2,  0,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      4,  0,  2,  6,  8,  10, 12, 14, /**/ 0, 4,  2,  6,  8,  10, 12, 14,  //
+      2,  4,  0,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      6,  0,  2,  4,  8,  10, 12, 14, /**/ 0, 6,  2,  4,  8,  10, 12, 14,  //
+      2,  6,  0,  4,  8,  10, 12, 14, /**/ 0, 2,  6,  4,  8,  10, 12, 14,  //
+      4,  6,  0,  2,  8,  10, 12, 14, /**/ 0, 4,  6,  2,  8,  10, 12, 14,  //
+      2,  4,  6,  0,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      8,  0,  2,  4,  6,  10, 12, 14, /**/ 0, 8,  2,  4,  6,  10, 12, 14,  //
+      2,  8,  0,  4,  6,  10, 12, 14, /**/ 0, 2,  8,  4,  6,  10, 12, 14,  //
+      4,  8,  0,  2,  6,  10, 12, 14, /**/ 0, 4,  8,  2,  6,  10, 12, 14,  //
+      2,  4,  8,  0,  6,  10, 12, 14, /**/ 0, 2,  4,  8,  6,  10, 12, 14,  //
+      6,  8,  0,  2,  4,  10, 12, 14, /**/ 0, 6,  8,  2,  4,  10, 12, 14,  //
+      2,  6,  8,  0,  4,  10, 12, 14, /**/ 0, 2,  6,  8,  4,  10, 12, 14,  //
+      4,  6,  8,  0,  2,  10, 12, 14, /**/ 0, 4,  6,  8,  2,  10, 12, 14,  //
+      2,  4,  6,  8,  0,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      10, 0,  2,  4,  6,  8,  12, 14, /**/ 0, 10, 2,  4,  6,  8,  12, 14,  //
+      2,  10, 0,  4,  6,  8,  12, 14, /**/ 0, 2,  10, 4,  6,  8,  12, 14,  //
+      4,  10, 0,  2,  6,  8,  12, 14, /**/ 0, 4,  10, 2,  6,  8,  12, 14,  //
+      2,  4,  10, 0,  6,  8,  12, 14, /**/ 0, 2,  4,  10, 6,  8,  12, 14,  //
+      6,  10, 0,  2,  4,  8,  12, 14, /**/ 0, 6,  10, 2,  4,  8,  12, 14,  //
+      2,  6,  10, 0,  4,  8,  12, 14, /**/ 0, 2,  6,  10, 4,  8,  12, 14,  //
+      4,  6,  10, 0,  2,  8,  12, 14, /**/ 0, 4,  6,  10, 2,  8,  12, 14,  //
+      2,  4,  6,  10, 0,  8,  12, 14, /**/ 0, 2,  4,  6,  10, 8,  12, 14,  //
+      8,  10, 0,  2,  4,  6,  12, 14, /**/ 0, 8,  10, 2,  4,  6,  12, 14,  //
+      2,  8,  10, 0,  4,  6,  12, 14, /**/ 0, 2,  8,  10, 4,  6,  12, 14,  //
+      4,  8,  10, 0,  2,  6,  12, 14, /**/ 0, 4,  8,  10, 2,  6,  12, 14,  //
+      2,  4,  8,  10, 0,  6,  12, 14, /**/ 0, 2,  4,  8,  10, 6,  12, 14,  //
+      6,  8,  10, 0,  2,  4,  12, 14, /**/ 0, 6,  8,  10, 2,  4,  12, 14,  //
+      2,  6,  8,  10, 0,  4,  12, 14, /**/ 0, 2,  6,  8,  10, 4,  12, 14,  //
+      4,  6,  8,  10, 0,  2,  12, 14, /**/ 0, 4,  6,  8,  10, 2,  12, 14,  //
+      2,  4,  6,  8,  10, 0,  12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      12, 0,  2,  4,  6,  8,  10, 14, /**/ 0, 12, 2,  4,  6,  8,  10, 14,  //
+      2,  12, 0,  4,  6,  8,  10, 14, /**/ 0, 2,  12, 4,  6,  8,  10, 14,  //
+      4,  12, 0,  2,  6,  8,  10, 14, /**/ 0, 4,  12, 2,  6,  8,  10, 14,  //
+      2,  4,  12, 0,  6,  8,  10, 14, /**/ 0, 2,  4,  12, 6,  8,  10, 14,  //
+      6,  12, 0,  2,  4,  8,  10, 14, /**/ 0, 6,  12, 2,  4,  8,  10, 14,  //
+      2,  6,  12, 0,  4,  8,  10, 14, /**/ 0, 2,  6,  12, 4,  8,  10, 14,  //
+      4,  6,  12, 0,  2,  8,  10, 14, /**/ 0, 4,  6,  12, 2,  8,  10, 14,  //
+      2,  4,  6,  12, 0,  8,  10, 14, /**/ 0, 2,  4,  6,  12, 8,  10, 14,  //
+      8,  12, 0,  2,  4,  6,  10, 14, /**/ 0, 8,  12, 2,  4,  6,  10, 14,  //
+      2,  8,  12, 0,  4,  6,  10, 14, /**/ 0, 2,  8,  12, 4,  6,  10, 14,  //
+      4,  8,  12, 0,  2,  6,  10, 14, /**/ 0, 4,  8,  12, 2,  6,  10, 14,  //
+      2,  4,  8,  12, 0,  6,  10, 14, /**/ 0, 2,  4,  8,  12, 6,  10, 14,  //
+      6,  8,  12, 0,  2,  4,  10, 14, /**/ 0, 6,  8,  12, 2,  4,  10, 14,  //
+      2,  6,  8,  12, 0,  4,  10, 14, /**/ 0, 2,  6,  8,  12, 4,  10, 14,  //
+      4,  6,  8,  12, 0,  2,  10, 14, /**/ 0, 4,  6,  8,  12, 2,  10, 14,  //
+      2,  4,  6,  8,  12, 0,  10, 14, /**/ 0, 2,  4,  6,  8,  12, 10, 14,  //
+      10, 12, 0,  2,  4,  6,  8,  14, /**/ 0, 10, 12, 2,  4,  6,  8,  14,  //
+      2,  10, 12, 0,  4,  6,  8,  14, /**/ 0, 2,  10, 12, 4,  6,  8,  14,  //
+      4,  10, 12, 0,  2,  6,  8,  14, /**/ 0, 4,  10, 12, 2,  6,  8,  14,  //
+      2,  4,  10, 12, 0,  6,  8,  14, /**/ 0, 2,  4,  10, 12, 6,  8,  14,  //
+      6,  10, 12, 0,  2,  4,  8,  14, /**/ 0, 6,  10, 12, 2,  4,  8,  14,  //
+      2,  6,  10, 12, 0,  4,  8,  14, /**/ 0, 2,  6,  10, 12, 4,  8,  14,  //
+      4,  6,  10, 12, 0,  2,  8,  14, /**/ 0, 4,  6,  10, 12, 2,  8,  14,  //
+      2,  4,  6,  10, 12, 0,  8,  14, /**/ 0, 2,  4,  6,  10, 12, 8,  14,  //
+      8,  10, 12, 0,  2,  4,  6,  14, /**/ 0, 8,  10, 12, 2,  4,  6,  14,  //
+      2,  8,  10, 12, 0,  4,  6,  14, /**/ 0, 2,  8,  10, 12, 4,  6,  14,  //
+      4,  8,  10, 12, 0,  2,  6,  14, /**/ 0, 4,  8,  10, 12, 2,  6,  14,  //
+      2,  4,  8,  10, 12, 0,  6,  14, /**/ 0, 2,  4,  8,  10, 12, 6,  14,  //
+      6,  8,  10, 12, 0,  2,  4,  14, /**/ 0, 6,  8,  10, 12, 2,  4,  14,  //
+      2,  6,  8,  10, 12, 0,  4,  14, /**/ 0, 2,  6,  8,  10, 12, 4,  14,  //
+      4,  6,  8,  10, 12, 0,  2,  14, /**/ 0, 4,  6,  8,  10, 12, 2,  14,  //
+      2,  4,  6,  8,  10, 12, 0,  14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      14, 0,  2,  4,  6,  8,  10, 12, /**/ 0, 14, 2,  4,  6,  8,  10, 12,  //
+      2,  14, 0,  4,  6,  8,  10, 12, /**/ 0, 2,  14, 4,  6,  8,  10, 12,  //
+      4,  14, 0,  2,  6,  8,  10, 12, /**/ 0, 4,  14, 2,  6,  8,  10, 12,  //
+      2,  4,  14, 0,  6,  8,  10, 12, /**/ 0, 2,  4,  14, 6,  8,  10, 12,  //
+      6,  14, 0,  2,  4,  8,  10, 12, /**/ 0, 6,  14, 2,  4,  8,  10, 12,  //
+      2,  6,  14, 0,  4,  8,  10, 12, /**/ 0, 2,  6,  14, 4,  8,  10, 12,  //
+      4,  6,  14, 0,  2,  8,  10, 12, /**/ 0, 4,  6,  14, 2,  8,  10, 12,  //
+      2,  4,  6,  14, 0,  8,  10, 12, /**/ 0, 2,  4,  6,  14, 8,  10, 12,  //
+      8,  14, 0,  2,  4,  6,  10, 12, /**/ 0, 8,  14, 2,  4,  6,  10, 12,  //
+      2,  8,  14, 0,  4,  6,  10, 12, /**/ 0, 2,  8,  14, 4,  6,  10, 12,  //
+      4,  8,  14, 0,  2,  6,  10, 12, /**/ 0, 4,  8,  14, 2,  6,  10, 12,  //
+      2,  4,  8,  14, 0,  6,  10, 12, /**/ 0, 2,  4,  8,  14, 6,  10, 12,  //
+      6,  8,  14, 0,  2,  4,  10, 12, /**/ 0, 6,  8,  14, 2,  4,  10, 12,  //
+      2,  6,  8,  14, 0,  4,  10, 12, /**/ 0, 2,  6,  8,  14, 4,  10, 12,  //
+      4,  6,  8,  14, 0,  2,  10, 12, /**/ 0, 4,  6,  8,  14, 2,  10, 12,  //
+      2,  4,  6,  8,  14, 0,  10, 12, /**/ 0, 2,  4,  6,  8,  14, 10, 12,  //
+      10, 14, 0,  2,  4,  6,  8,  12, /**/ 0, 10, 14, 2,  4,  6,  8,  12,  //
+      2,  10, 14, 0,  4,  6,  8,  12, /**/ 0, 2,  10, 14, 4,  6,  8,  12,  //
+      4,  10, 14, 0,  2,  6,  8,  12, /**/ 0, 4,  10, 14, 2,  6,  8,  12,  //
+      2,  4,  10, 14, 0,  6,  8,  12, /**/ 0, 2,  4,  10, 14, 6,  8,  12,  //
+      6,  10, 14, 0,  2,  4,  8,  12, /**/ 0, 6,  10, 14, 2,  4,  8,  12,  //
+      2,  6,  10, 14, 0,  4,  8,  12, /**/ 0, 2,  6,  10, 14, 4,  8,  12,  //
+      4,  6,  10, 14, 0,  2,  8,  12, /**/ 0, 4,  6,  10, 14, 2,  8,  12,  //
+      2,  4,  6,  10, 14, 0,  8,  12, /**/ 0, 2,  4,  6,  10, 14, 8,  12,  //
+      8,  10, 14, 0,  2,  4,  6,  12, /**/ 0, 8,  10, 14, 2,  4,  6,  12,  //
+      2,  8,  10, 14, 0,  4,  6,  12, /**/ 0, 2,  8,  10, 14, 4,  6,  12,  //
+      4,  8,  10, 14, 0,  2,  6,  12, /**/ 0, 4,  8,  10, 14, 2,  6,  12,  //
+      2,  4,  8,  10, 14, 0,  6,  12, /**/ 0, 2,  4,  8,  10, 14, 6,  12,  //
+      6,  8,  10, 14, 0,  2,  4,  12, /**/ 0, 6,  8,  10, 14, 2,  4,  12,  //
+      2,  6,  8,  10, 14, 0,  4,  12, /**/ 0, 2,  6,  8,  10, 14, 4,  12,  //
+      4,  6,  8,  10, 14, 0,  2,  12, /**/ 0, 4,  6,  8,  10, 14, 2,  12,  //
+      2,  4,  6,  8,  10, 14, 0,  12, /**/ 0, 2,  4,  6,  8,  10, 14, 12,  //
+      12, 14, 0,  2,  4,  6,  8,  10, /**/ 0, 12, 14, 2,  4,  6,  8,  10,  //
+      2,  12, 14, 0,  4,  6,  8,  10, /**/ 0, 2,  12, 14, 4,  6,  8,  10,  //
+      4,  12, 14, 0,  2,  6,  8,  10, /**/ 0, 4,  12, 14, 2,  6,  8,  10,  //
+      2,  4,  12, 14, 0,  6,  8,  10, /**/ 0, 2,  4,  12, 14, 6,  8,  10,  //
+      6,  12, 14, 0,  2,  4,  8,  10, /**/ 0, 6,  12, 14, 2,  4,  8,  10,  //
+      2,  6,  12, 14, 0,  4,  8,  10, /**/ 0, 2,  6,  12, 14, 4,  8,  10,  //
+      4,  6,  12, 14, 0,  2,  8,  10, /**/ 0, 4,  6,  12, 14, 2,  8,  10,  //
+      2,  4,  6,  12, 14, 0,  8,  10, /**/ 0, 2,  4,  6,  12, 14, 8,  10,  //
+      8,  12, 14, 0,  2,  4,  6,  10, /**/ 0, 8,  12, 14, 2,  4,  6,  10,  //
+      2,  8,  12, 14, 0,  4,  6,  10, /**/ 0, 2,  8,  12, 14, 4,  6,  10,  //
+      4,  8,  12, 14, 0,  2,  6,  10, /**/ 0, 4,  8,  12, 14, 2,  6,  10,  //
+      2,  4,  8,  12, 14, 0,  6,  10, /**/ 0, 2,  4,  8,  12, 14, 6,  10,  //
+      6,  8,  12, 14, 0,  2,  4,  10, /**/ 0, 6,  8,  12, 14, 2,  4,  10,  //
+      2,  6,  8,  12, 14, 0,  4,  10, /**/ 0, 2,  6,  8,  12, 14, 4,  10,  //
+      4,  6,  8,  12, 14, 0,  2,  10, /**/ 0, 4,  6,  8,  12, 14, 2,  10,  //
+      2,  4,  6,  8,  12, 14, 0,  10, /**/ 0, 2,  4,  6,  8,  12, 14, 10,  //
+      10, 12, 14, 0,  2,  4,  6,  8,  /**/ 0, 10, 12, 14, 2,  4,  6,  8,   //
+      2,  10, 12, 14, 0,  4,  6,  8,  /**/ 0, 2,  10, 12, 14, 4,  6,  8,   //
+      4,  10, 12, 14, 0,  2,  6,  8,  /**/ 0, 4,  10, 12, 14, 2,  6,  8,   //
+      2,  4,  10, 12, 14, 0,  6,  8,  /**/ 0, 2,  4,  10, 12, 14, 6,  8,   //
+      6,  10, 12, 14, 0,  2,  4,  8,  /**/ 0, 6,  10, 12, 14, 2,  4,  8,   //
+      2,  6,  10, 12, 14, 0,  4,  8,  /**/ 0, 2,  6,  10, 12, 14, 4,  8,   //
+      4,  6,  10, 12, 14, 0,  2,  8,  /**/ 0, 4,  6,  10, 12, 14, 2,  8,   //
+      2,  4,  6,  10, 12, 14, 0,  8,  /**/ 0, 2,  4,  6,  10, 12, 14, 8,   //
+      8,  10, 12, 14, 0,  2,  4,  6,  /**/ 0, 8,  10, 12, 14, 2,  4,  6,   //
+      2,  8,  10, 12, 14, 0,  4,  6,  /**/ 0, 2,  8,  10, 12, 14, 4,  6,   //
+      4,  8,  10, 12, 14, 0,  2,  6,  /**/ 0, 4,  8,  10, 12, 14, 2,  6,   //
+      2,  4,  8,  10, 12, 14, 0,  6,  /**/ 0, 2,  4,  8,  10, 12, 14, 6,   //
+      6,  8,  10, 12, 14, 0,  2,  4,  /**/ 0, 6,  8,  10, 12, 14, 2,  4,   //
+      2,  6,  8,  10, 12, 14, 0,  4,  /**/ 0, 2,  6,  8,  10, 12, 14, 4,   //
+      4,  6,  8,  10, 12, 14, 0,  2,  /**/ 0, 4,  6,  8,  10, 12, 14, 2,   //
+      2,  4,  6,  8,  10, 12, 14, 0,  /**/ 0, 2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N, 0> d;
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N, 0> du;
+
+  // We need byte indices for TableLookupBytes (one vector's worth for each of
+  // 256 combinations of 8 mask bits). Loading them directly requires 4 KiB. We
+  // can instead store lane indices and convert to byte indices (2*lane + 0..1),
+  // with the doubling baked into the table. Unpacking nibbles is likely more
+  // costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[256 * 8] = {
+      // PrintCompressNot16x8Tables
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 14, 0,   //
+      0, 4,  6,  8,  10, 12, 14, 2,  /**/ 4,  6,  8,  10, 12, 14, 0,  2,   //
+      0, 2,  6,  8,  10, 12, 14, 4,  /**/ 2,  6,  8,  10, 12, 14, 0,  4,   //
+      0, 6,  8,  10, 12, 14, 2,  4,  /**/ 6,  8,  10, 12, 14, 0,  2,  4,   //
+      0, 2,  4,  8,  10, 12, 14, 6,  /**/ 2,  4,  8,  10, 12, 14, 0,  6,   //
+      0, 4,  8,  10, 12, 14, 2,  6,  /**/ 4,  8,  10, 12, 14, 0,  2,  6,   //
+      0, 2,  8,  10, 12, 14, 4,  6,  /**/ 2,  8,  10, 12, 14, 0,  4,  6,   //
+      0, 8,  10, 12, 14, 2,  4,  6,  /**/ 8,  10, 12, 14, 0,  2,  4,  6,   //
+      0, 2,  4,  6,  10, 12, 14, 8,  /**/ 2,  4,  6,  10, 12, 14, 0,  8,   //
+      0, 4,  6,  10, 12, 14, 2,  8,  /**/ 4,  6,  10, 12, 14, 0,  2,  8,   //
+      0, 2,  6,  10, 12, 14, 4,  8,  /**/ 2,  6,  10, 12, 14, 0,  4,  8,   //
+      0, 6,  10, 12, 14, 2,  4,  8,  /**/ 6,  10, 12, 14, 0,  2,  4,  8,   //
+      0, 2,  4,  10, 12, 14, 6,  8,  /**/ 2,  4,  10, 12, 14, 0,  6,  8,   //
+      0, 4,  10, 12, 14, 2,  6,  8,  /**/ 4,  10, 12, 14, 0,  2,  6,  8,   //
+      0, 2,  10, 12, 14, 4,  6,  8,  /**/ 2,  10, 12, 14, 0,  4,  6,  8,   //
+      0, 10, 12, 14, 2,  4,  6,  8,  /**/ 10, 12, 14, 0,  2,  4,  6,  8,   //
+      0, 2,  4,  6,  8,  12, 14, 10, /**/ 2,  4,  6,  8,  12, 14, 0,  10,  //
+      0, 4,  6,  8,  12, 14, 2,  10, /**/ 4,  6,  8,  12, 14, 0,  2,  10,  //
+      0, 2,  6,  8,  12, 14, 4,  10, /**/ 2,  6,  8,  12, 14, 0,  4,  10,  //
+      0, 6,  8,  12, 14, 2,  4,  10, /**/ 6,  8,  12, 14, 0,  2,  4,  10,  //
+      0, 2,  4,  8,  12, 14, 6,  10, /**/ 2,  4,  8,  12, 14, 0,  6,  10,  //
+      0, 4,  8,  12, 14, 2,  6,  10, /**/ 4,  8,  12, 14, 0,  2,  6,  10,  //
+      0, 2,  8,  12, 14, 4,  6,  10, /**/ 2,  8,  12, 14, 0,  4,  6,  10,  //
+      0, 8,  12, 14, 2,  4,  6,  10, /**/ 8,  12, 14, 0,  2,  4,  6,  10,  //
+      0, 2,  4,  6,  12, 14, 8,  10, /**/ 2,  4,  6,  12, 14, 0,  8,  10,  //
+      0, 4,  6,  12, 14, 2,  8,  10, /**/ 4,  6,  12, 14, 0,  2,  8,  10,  //
+      0, 2,  6,  12, 14, 4,  8,  10, /**/ 2,  6,  12, 14, 0,  4,  8,  10,  //
+      0, 6,  12, 14, 2,  4,  8,  10, /**/ 6,  12, 14, 0,  2,  4,  8,  10,  //
+      0, 2,  4,  12, 14, 6,  8,  10, /**/ 2,  4,  12, 14, 0,  6,  8,  10,  //
+      0, 4,  12, 14, 2,  6,  8,  10, /**/ 4,  12, 14, 0,  2,  6,  8,  10,  //
+      0, 2,  12, 14, 4,  6,  8,  10, /**/ 2,  12, 14, 0,  4,  6,  8,  10,  //
+      0, 12, 14, 2,  4,  6,  8,  10, /**/ 12, 14, 0,  2,  4,  6,  8,  10,  //
+      0, 2,  4,  6,  8,  10, 14, 12, /**/ 2,  4,  6,  8,  10, 14, 0,  12,  //
+      0, 4,  6,  8,  10, 14, 2,  12, /**/ 4,  6,  8,  10, 14, 0,  2,  12,  //
+      0, 2,  6,  8,  10, 14, 4,  12, /**/ 2,  6,  8,  10, 14, 0,  4,  12,  //
+      0, 6,  8,  10, 14, 2,  4,  12, /**/ 6,  8,  10, 14, 0,  2,  4,  12,  //
+      0, 2,  4,  8,  10, 14, 6,  12, /**/ 2,  4,  8,  10, 14, 0,  6,  12,  //
+      0, 4,  8,  10, 14, 2,  6,  12, /**/ 4,  8,  10, 14, 0,  2,  6,  12,  //
+      0, 2,  8,  10, 14, 4,  6,  12, /**/ 2,  8,  10, 14, 0,  4,  6,  12,  //
+      0, 8,  10, 14, 2,  4,  6,  12, /**/ 8,  10, 14, 0,  2,  4,  6,  12,  //
+      0, 2,  4,  6,  10, 14, 8,  12, /**/ 2,  4,  6,  10, 14, 0,  8,  12,  //
+      0, 4,  6,  10, 14, 2,  8,  12, /**/ 4,  6,  10, 14, 0,  2,  8,  12,  //
+      0, 2,  6,  10, 14, 4,  8,  12, /**/ 2,  6,  10, 14, 0,  4,  8,  12,  //
+      0, 6,  10, 14, 2,  4,  8,  12, /**/ 6,  10, 14, 0,  2,  4,  8,  12,  //
+      0, 2,  4,  10, 14, 6,  8,  12, /**/ 2,  4,  10, 14, 0,  6,  8,  12,  //
+      0, 4,  10, 14, 2,  6,  8,  12, /**/ 4,  10, 14, 0,  2,  6,  8,  12,  //
+      0, 2,  10, 14, 4,  6,  8,  12, /**/ 2,  10, 14, 0,  4,  6,  8,  12,  //
+      0, 10, 14, 2,  4,  6,  8,  12, /**/ 10, 14, 0,  2,  4,  6,  8,  12,  //
+      0, 2,  4,  6,  8,  14, 10, 12, /**/ 2,  4,  6,  8,  14, 0,  10, 12,  //
+      0, 4,  6,  8,  14, 2,  10, 12, /**/ 4,  6,  8,  14, 0,  2,  10, 12,  //
+      0, 2,  6,  8,  14, 4,  10, 12, /**/ 2,  6,  8,  14, 0,  4,  10, 12,  //
+      0, 6,  8,  14, 2,  4,  10, 12, /**/ 6,  8,  14, 0,  2,  4,  10, 12,  //
+      0, 2,  4,  8,  14, 6,  10, 12, /**/ 2,  4,  8,  14, 0,  6,  10, 12,  //
+      0, 4,  8,  14, 2,  6,  10, 12, /**/ 4,  8,  14, 0,  2,  6,  10, 12,  //
+      0, 2,  8,  14, 4,  6,  10, 12, /**/ 2,  8,  14, 0,  4,  6,  10, 12,  //
+      0, 8,  14, 2,  4,  6,  10, 12, /**/ 8,  14, 0,  2,  4,  6,  10, 12,  //
+      0, 2,  4,  6,  14, 8,  10, 12, /**/ 2,  4,  6,  14, 0,  8,  10, 12,  //
+      0, 4,  6,  14, 2,  8,  10, 12, /**/ 4,  6,  14, 0,  2,  8,  10, 12,  //
+      0, 2,  6,  14, 4,  8,  10, 12, /**/ 2,  6,  14, 0,  4,  8,  10, 12,  //
+      0, 6,  14, 2,  4,  8,  10, 12, /**/ 6,  14, 0,  2,  4,  8,  10, 12,  //
+      0, 2,  4,  14, 6,  8,  10, 12, /**/ 2,  4,  14, 0,  6,  8,  10, 12,  //
+      0, 4,  14, 2,  6,  8,  10, 12, /**/ 4,  14, 0,  2,  6,  8,  10, 12,  //
+      0, 2,  14, 4,  6,  8,  10, 12, /**/ 2,  14, 0,  4,  6,  8,  10, 12,  //
+      0, 14, 2,  4,  6,  8,  10, 12, /**/ 14, 0,  2,  4,  6,  8,  10, 12,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 0,  14,  //
+      0, 4,  6,  8,  10, 12, 2,  14, /**/ 4,  6,  8,  10, 12, 0,  2,  14,  //
+      0, 2,  6,  8,  10, 12, 4,  14, /**/ 2,  6,  8,  10, 12, 0,  4,  14,  //
+      0, 6,  8,  10, 12, 2,  4,  14, /**/ 6,  8,  10, 12, 0,  2,  4,  14,  //
+      0, 2,  4,  8,  10, 12, 6,  14, /**/ 2,  4,  8,  10, 12, 0,  6,  14,  //
+      0, 4,  8,  10, 12, 2,  6,  14, /**/ 4,  8,  10, 12, 0,  2,  6,  14,  //
+      0, 2,  8,  10, 12, 4,  6,  14, /**/ 2,  8,  10, 12, 0,  4,  6,  14,  //
+      0, 8,  10, 12, 2,  4,  6,  14, /**/ 8,  10, 12, 0,  2,  4,  6,  14,  //
+      0, 2,  4,  6,  10, 12, 8,  14, /**/ 2,  4,  6,  10, 12, 0,  8,  14,  //
+      0, 4,  6,  10, 12, 2,  8,  14, /**/ 4,  6,  10, 12, 0,  2,  8,  14,  //
+      0, 2,  6,  10, 12, 4,  8,  14, /**/ 2,  6,  10, 12, 0,  4,  8,  14,  //
+      0, 6,  10, 12, 2,  4,  8,  14, /**/ 6,  10, 12, 0,  2,  4,  8,  14,  //
+      0, 2,  4,  10, 12, 6,  8,  14, /**/ 2,  4,  10, 12, 0,  6,  8,  14,  //
+      0, 4,  10, 12, 2,  6,  8,  14, /**/ 4,  10, 12, 0,  2,  6,  8,  14,  //
+      0, 2,  10, 12, 4,  6,  8,  14, /**/ 2,  10, 12, 0,  4,  6,  8,  14,  //
+      0, 10, 12, 2,  4,  6,  8,  14, /**/ 10, 12, 0,  2,  4,  6,  8,  14,  //
+      0, 2,  4,  6,  8,  12, 10, 14, /**/ 2,  4,  6,  8,  12, 0,  10, 14,  //
+      0, 4,  6,  8,  12, 2,  10, 14, /**/ 4,  6,  8,  12, 0,  2,  10, 14,  //
+      0, 2,  6,  8,  12, 4,  10, 14, /**/ 2,  6,  8,  12, 0,  4,  10, 14,  //
+      0, 6,  8,  12, 2,  4,  10, 14, /**/ 6,  8,  12, 0,  2,  4,  10, 14,  //
+      0, 2,  4,  8,  12, 6,  10, 14, /**/ 2,  4,  8,  12, 0,  6,  10, 14,  //
+      0, 4,  8,  12, 2,  6,  10, 14, /**/ 4,  8,  12, 0,  2,  6,  10, 14,  //
+      0, 2,  8,  12, 4,  6,  10, 14, /**/ 2,  8,  12, 0,  4,  6,  10, 14,  //
+      0, 8,  12, 2,  4,  6,  10, 14, /**/ 8,  12, 0,  2,  4,  6,  10, 14,  //
+      0, 2,  4,  6,  12, 8,  10, 14, /**/ 2,  4,  6,  12, 0,  8,  10, 14,  //
+      0, 4,  6,  12, 2,  8,  10, 14, /**/ 4,  6,  12, 0,  2,  8,  10, 14,  //
+      0, 2,  6,  12, 4,  8,  10, 14, /**/ 2,  6,  12, 0,  4,  8,  10, 14,  //
+      0, 6,  12, 2,  4,  8,  10, 14, /**/ 6,  12, 0,  2,  4,  8,  10, 14,  //
+      0, 2,  4,  12, 6,  8,  10, 14, /**/ 2,  4,  12, 0,  6,  8,  10, 14,  //
+      0, 4,  12, 2,  6,  8,  10, 14, /**/ 4,  12, 0,  2,  6,  8,  10, 14,  //
+      0, 2,  12, 4,  6,  8,  10, 14, /**/ 2,  12, 0,  4,  6,  8,  10, 14,  //
+      0, 12, 2,  4,  6,  8,  10, 14, /**/ 12, 0,  2,  4,  6,  8,  10, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 0,  12, 14,  //
+      0, 4,  6,  8,  10, 2,  12, 14, /**/ 4,  6,  8,  10, 0,  2,  12, 14,  //
+      0, 2,  6,  8,  10, 4,  12, 14, /**/ 2,  6,  8,  10, 0,  4,  12, 14,  //
+      0, 6,  8,  10, 2,  4,  12, 14, /**/ 6,  8,  10, 0,  2,  4,  12, 14,  //
+      0, 2,  4,  8,  10, 6,  12, 14, /**/ 2,  4,  8,  10, 0,  6,  12, 14,  //
+      0, 4,  8,  10, 2,  6,  12, 14, /**/ 4,  8,  10, 0,  2,  6,  12, 14,  //
+      0, 2,  8,  10, 4,  6,  12, 14, /**/ 2,  8,  10, 0,  4,  6,  12, 14,  //
+      0, 8,  10, 2,  4,  6,  12, 14, /**/ 8,  10, 0,  2,  4,  6,  12, 14,  //
+      0, 2,  4,  6,  10, 8,  12, 14, /**/ 2,  4,  6,  10, 0,  8,  12, 14,  //
+      0, 4,  6,  10, 2,  8,  12, 14, /**/ 4,  6,  10, 0,  2,  8,  12, 14,  //
+      0, 2,  6,  10, 4,  8,  12, 14, /**/ 2,  6,  10, 0,  4,  8,  12, 14,  //
+      0, 6,  10, 2,  4,  8,  12, 14, /**/ 6,  10, 0,  2,  4,  8,  12, 14,  //
+      0, 2,  4,  10, 6,  8,  12, 14, /**/ 2,  4,  10, 0,  6,  8,  12, 14,  //
+      0, 4,  10, 2,  6,  8,  12, 14, /**/ 4,  10, 0,  2,  6,  8,  12, 14,  //
+      0, 2,  10, 4,  6,  8,  12, 14, /**/ 2,  10, 0,  4,  6,  8,  12, 14,  //
+      0, 10, 2,  4,  6,  8,  12, 14, /**/ 10, 0,  2,  4,  6,  8,  12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  0,  10, 12, 14,  //
+      0, 4,  6,  8,  2,  10, 12, 14, /**/ 4,  6,  8,  0,  2,  10, 12, 14,  //
+      0, 2,  6,  8,  4,  10, 12, 14, /**/ 2,  6,  8,  0,  4,  10, 12, 14,  //
+      0, 6,  8,  2,  4,  10, 12, 14, /**/ 6,  8,  0,  2,  4,  10, 12, 14,  //
+      0, 2,  4,  8,  6,  10, 12, 14, /**/ 2,  4,  8,  0,  6,  10, 12, 14,  //
+      0, 4,  8,  2,  6,  10, 12, 14, /**/ 4,  8,  0,  2,  6,  10, 12, 14,  //
+      0, 2,  8,  4,  6,  10, 12, 14, /**/ 2,  8,  0,  4,  6,  10, 12, 14,  //
+      0, 8,  2,  4,  6,  10, 12, 14, /**/ 8,  0,  2,  4,  6,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  0,  8,  10, 12, 14,  //
+      0, 4,  6,  2,  8,  10, 12, 14, /**/ 4,  6,  0,  2,  8,  10, 12, 14,  //
+      0, 2,  6,  4,  8,  10, 12, 14, /**/ 2,  6,  0,  4,  8,  10, 12, 14,  //
+      0, 6,  2,  4,  8,  10, 12, 14, /**/ 6,  0,  2,  4,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  0,  6,  8,  10, 12, 14,  //
+      0, 4,  2,  6,  8,  10, 12, 14, /**/ 4,  0,  2,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  0,  4,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 0,  2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, N> IdxFromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[16 * 16] = {
+      // PrintCompress32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,   //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,   //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,   //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[16 * 16] = {
+      // PrintCompressNot32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11, 12, 13,
+      14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+      12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+      12, 13, 14, 15};
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T, N> IdxFromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[4 * 16] = {
+      // PrintCompress64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T, N> IdxFromNotBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[4 * 16] = {
+      // PrintCompressNot64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N, 0> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+// Helper functions called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Compress(Vec128<T, N> v, const uint64_t mask_bits) {
+  const auto idx = detail::IdxFromBits<T, N>(mask_bits);
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return BitCast(d, TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> CompressNot(Vec128<T, N> v, const uint64_t mask_bits) {
+  const auto idx = detail::IdxFromNotBits<T, N>(mask_bits);
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return BitCast(d, TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+}  // namespace detail
+
+template <typename T>
+struct CompressIsPartition {
+#if HWY_TARGET == HWY_WASM_EMU256
+  enum { value = 0 };
+#else
+  enum { value = (sizeof(T) != 1) };
+#endif
+};
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> Compress(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Compress(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 1 and mask[0] = 0, then swap both halves, else keep.
+  const Full128<T> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskL, maskH);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 byte lanes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 2))>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  return detail::Compress(v, detail::BitsFromMask(mask));
+}
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> CompressNot(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> CompressNot(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 0 and mask[0] = 1, then swap both halves, else keep.
+  const Full128<T> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskH, maskL);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 byte lanes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> CompressNot(Vec128<T, N> v, Mask128<T, N> mask) {
+  // For partial vectors, we cannot pull the Not() into the table because
+  // BitsFromMask clears the upper bits.
+  if (N < 16 / sizeof(T)) {
+    return detail::Compress(v, detail::BitsFromMask(Not(mask)));
+  }
+  return detail::CompressNot(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+// ------------------------------ CompressBits
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v,
+                                  const uint8_t* HWY_RESTRICT bits) {
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (N + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+
+  return detail::Compress(v, mask_bits);
+}
+
+// ------------------------------ CompressStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  const auto c = detail::Compress(v, mask_bits);
+  StoreU(c, d, unaligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;  // so we can support fp16/bf16
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  const size_t count = PopCount(mask_bits);
+  const VFromD<decltype(du)> compressed =
+      detail::Compress(BitCast(du, v), mask_bits);
+  const MFromD<D> store_mask = RebindMask(d, FirstN(du, count));
+  BlendedStore(BitCast(d, compressed), store_mask, d, unaligned);
+  return count;
+}
+
+// ------------------------------ CompressBitsStore
+
+template <class D, HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  uint64_t mask_bits = 0;
+  constexpr size_t kN = MaxLanes(d);
+  CopyBytes<(kN + 7) / 8>(bits, &mask_bits);
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+
+  const auto c = detail::Compress(v, mask_bits);
+  StoreU(c, d, unaligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ StoreInterleaved2/3/4
+
+// HWY_NATIVE_LOAD_STORE_INTERLEAVED not set, hence defined in
+// generic_ops-inl.h.
+
+// ------------------------------ Additional mask logical operations
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrAfterFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetAtOrAfterFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const auto vmask = VecFromMask(d, mask);
+  return MaskFromVec(Or(vmask, InterleaveLower(vmask, vmask)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const auto vmask = VecFromMask(d, mask);
+  const auto neg_vmask =
+      ResizeBitCast(d, Neg(ResizeBitCast(Full64<int64_t>(), vmask)));
+  return MaskFromVec(Or(vmask, neg_vmask));
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetAtOrAfterFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const Repartition<int64_t, decltype(d)> di64;
+
+  auto vmask = BitCast(di64, VecFromMask(d, mask));
+  vmask = Or(vmask, Neg(vmask));
+
+  // Copy the sign bit of the first int64_t lane to the second int64_t lane
+  const auto vmask2 = BroadcastSignBit(InterleaveLower(Zero(di64), vmask));
+  return MaskFromVec(BitCast(d, Or(vmask, vmask2)));
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetOnlyFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetOnlyFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = BitCast(di, VecFromMask(d, mask));
+  const auto zero = Zero(di);
+  const auto vmask2 = VecFromMask(di, InterleaveLower(zero, vmask) == zero);
+  return MaskFromVec(BitCast(d, And(vmask, vmask2)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = ResizeBitCast(Full64<int64_t>(), VecFromMask(d, mask));
+  const auto only_first_vmask =
+      BitCast(d, Neg(ResizeBitCast(di, And(vmask, Neg(vmask)))));
+  return MaskFromVec(only_first_vmask);
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetOnlyFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int64_t, decltype(d)> di64;
+
+  const auto zero = Zero(di64);
+  const auto vmask = BitCast(di64, VecFromMask(d, mask));
+  const auto vmask2 = VecFromMask(di64, InterleaveLower(zero, vmask) == zero);
+  const auto only_first_vmask = Neg(BitCast(di, And(vmask, Neg(vmask))));
+  return MaskFromVec(BitCast(d, And(only_first_vmask, BitCast(di, vmask2))));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrBeforeFirst(Mask128<T, 1> /*mask*/) {
+  const FixedTag<T, 1> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = MakeSigned<T>;
+
+  return RebindMask(d, MaskFromVec(Set(di, TI(-1))));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 1)>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  return SetBeforeFirst(MaskFromVec(ShiftLeftLanes<1>(VecFromMask(d, mask))));
+}
+
+// ------------------------------ MulEven/Odd (Load)
+
+HWY_INLINE Vec128<uint64_t> MulEven(const Vec128<uint64_t> a,
+                                    const Vec128<uint64_t> b) {
+  alignas(16) uint64_t mul[2];
+  mul[0] =
+      Mul128(static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 0)),
+             static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 0)), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+}
+
+HWY_INLINE Vec128<uint64_t> MulOdd(const Vec128<uint64_t> a,
+                                   const Vec128<uint64_t> b) {
+  alignas(16) uint64_t mul[2];
+  mul[0] =
+      Mul128(static_cast<uint64_t>(wasm_i64x2_extract_lane(a.raw, 1)),
+             static_cast<uint64_t>(wasm_i64x2_extract_lane(b.raw, 1)), &mul[1]);
+  return Load(Full128<uint64_t>(), mul);
+}
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
+
+// Generic for all vector lengths.
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 df32, V16 a, V16 b) {
+  const Rebind<uint32_t, decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return Mul(BitCast(df32, ae), BitCast(df32, be)) +
+         Mul(BitCast(df32, ao), BitCast(df32, bo));
+}
+
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 df32, V16 a, V16 b,
+                                              const VFromD<D32> sum0,
+                                              VFromD<D32>& sum1) {
+  const Rebind<uint32_t, decltype(df32)> du32;
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);  // bfloat16 is the upper half of f32
+  // Using shift/and instead of Zip leads to the odd/even order that
+  // RearrangeToOddPlusEven prefers.
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence i32x4_dot_i16x8 is
+// safe.
+template <class D32, HWY_IF_I32_D(D32), HWY_IF_V_SIZE_LE_D(D32, 16),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 /* tag */, V16 a, V16 b) {
+  return VFromD<D32>{wasm_i32x4_dot_i16x8(a.raw, b.raw)};
+}
+
+template <class DU32, HWY_IF_U32_D(DU32), HWY_IF_V_SIZE_LE_D(DU32, 16),
+          class VU16 = VFromD<RepartitionToNarrow<DU32>>>
+HWY_API VFromD<DU32> WidenMulPairwiseAdd(DU32 du32, VU16 a, VU16 b) {
+  const auto lo16_mask = Set(du32, 0x0000FFFFu);
+
+  const auto a0 = And(BitCast(du32, a), lo16_mask);
+  const auto b0 = And(BitCast(du32, b), lo16_mask);
+
+  const auto a1 = ShiftRight<16>(BitCast(du32, a));
+  const auto b1 = ShiftRight<16>(BitCast(du32, b));
+
+  return MulAdd(a1, b1, a0 * b0);
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence i32x4_dot_i16x8 is
+// safe.
+template <class D32, HWY_IF_I32_D(D32), HWY_IF_V_SIZE_LE_D(D32, 16),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 d, V16 a, V16 b,
+                                              const VFromD<D32> sum0,
+                                              VFromD<D32>& /*sum1*/) {
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence i32x4_dot_i16x8 is
+// safe.
+template <class DU32, HWY_IF_U32_D(DU32), HWY_IF_V_SIZE_LE_D(DU32, 16),
+          class VU16 = VFromD<RepartitionToNarrow<DU32>>>
+HWY_API VFromD<DU32> ReorderWidenMulAccumulate(DU32 d, VU16 a, VU16 b,
+                                               const VFromD<DU32> sum0,
+                                               VFromD<DU32>& /*sum1*/) {
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <size_t N>
+HWY_API Vec128<int32_t, N> RearrangeToOddPlusEven(
+    const Vec128<int32_t, N> sum0, const Vec128<int32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> RearrangeToOddPlusEven(
+    const Vec128<uint32_t, N> sum0, const Vec128<uint32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> RearrangeToOddPlusEven(const Vec128<float, N> sum0,
+                                                const Vec128<float, N> sum1) {
+  return Add(sum0, sum1);
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1: no-op
+template <typename T>
+HWY_INLINE Vec128<T, 1> SumOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MinOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MaxOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+
+// N=2
+template <typename T>
+HWY_INLINE Vec128<T, 2> SumOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Add(v10, Reverse2(d, v10));
+}
+template <typename T>
+HWY_INLINE Vec128<T, 2> MinOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Min(v10, Reverse2(d, v10));
+}
+template <typename T>
+HWY_INLINE Vec128<T, 2> MaxOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Max(v10, Reverse2(d, v10));
+}
+
+// N=4 (only 16/32-bit, else >128-bit)
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> SumOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Add(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Add(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> MinOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Min(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Min(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> MaxOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Max(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Max(v03_12_12_03, v12_03_03_12);
+}
+
+// N=8 (only 16-bit, else >128-bit)
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, 8> SumOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Add(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Add(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Add(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 8> MinOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Min(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Min(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Min(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 8> MaxOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Max(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Max(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Max(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_INLINE T ReduceSum(Vec128<T, N> v) {
+  return GetLane(SumOfLanes(v));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> SumOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::SumOfLanes(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API TFromD<D> ReduceSum(D /* tag */, VFromD<D> v) {
+  return detail::ReduceSum(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> MinOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MinOfLanes(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> MaxOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MaxOfLanes(v);
+}
+
+// ------------------------------ Lt128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_INLINE MFromD<D> Lt128(D d, VFromD<D> a, VFromD<D> b) {
+  // Truth table of Eq and Lt for Hi and Lo u64.
+  // (removed lines with (=H && cH) or (=L && cL) - cannot both be true)
+  // =H =L cH cL  | out = cH | (=H & cL)
+  //  0  0  0  0  |  0
+  //  0  0  0  1  |  0
+  //  0  0  1  0  |  1
+  //  0  0  1  1  |  1
+  //  0  1  0  0  |  0
+  //  0  1  0  1  |  0
+  //  0  1  1  0  |  1
+  //  1  0  0  0  |  0
+  //  1  0  0  1  |  1
+  //  1  1  0  0  |  0
+  const MFromD<D> eqHL = Eq(a, b);
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  // We need to bring cL to the upper lane/bit corresponding to cH. Comparing
+  // the result of InterleaveUpper/Lower requires 9 ops, whereas shifting the
+  // comparison result leftwards requires only 4. IfThenElse compiles to the
+  // same code as OrAnd().
+  const VFromD<D> ltLx = DupEven(ltHL);
+  const VFromD<D> outHx = IfThenElse(eqHL, ltLx, ltHL);
+  return MaskFromVec(DupOdd(outHx));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Lt128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  return MaskFromVec(InterleaveUpper(d, ltHL, ltHL));
+}
+
+// ------------------------------ Eq128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_INLINE MFromD<D> Eq128(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  return MaskFromVec(And(Reverse2(d, eqHL), eqHL));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Eq128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  return MaskFromVec(InterleaveUpper(d, eqHL, eqHL));
+}
+
+// ------------------------------ Ne128
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_INLINE MFromD<D> Ne128(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  return MaskFromVec(Or(Reverse2(d, neHL), neHL));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE MFromD<D> Ne128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  return MaskFromVec(InterleaveUpper(d, neHL, neHL));
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+// Without a native OddEven, it seems infeasible to go faster than Lt128.
+template <class D>
+HWY_INLINE VFromD<D> Min128(D d, const VFromD<D> a, const VFromD<D> b) {
+  return IfThenElse(Lt128(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128(D d, const VFromD<D> a, const VFromD<D> b) {
+  return IfThenElse(Lt128(d, b, a), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Min128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, a, b), a, b);
+}
+
+template <class D>
+HWY_INLINE VFromD<D> Max128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
+  return IfThenElse(Lt128Upper(d, b, a), a, b);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_256-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_256-inl.h
new file mode 100644
index 0000000000..29084e64e8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/wasm_256-inl.h
@@ -0,0 +1,2408 @@
+// Copyright 2021 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 256-bit WASM vectors and operations. Experimental.
+// External include guard in highway.h - see comment there.
+
+// For half-width vectors. Already includes base.h and shared-inl.h.
+#include "hwy/ops/wasm_128-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+class Vec256 {
+ public:
+  using PrivateT = T;                                  // only for DFromV
+  static constexpr size_t kPrivateN = 32 / sizeof(T);  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec256& operator*=(const Vec256 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec256& operator/=(const Vec256 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec256& operator+=(const Vec256 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec256& operator-=(const Vec256 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec256& operator&=(const Vec256 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec256& operator|=(const Vec256 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec256& operator^=(const Vec256 other) {
+    return *this = (*this ^ other);
+  }
+
+  Vec128<T> v0;
+  Vec128<T> v1;
+};
+
+template <typename T>
+struct Mask256 {
+  Mask128<T> m0;
+  Mask128<T> m1;
+};
+
+// ------------------------------ Zero
+
+// Avoid VFromD here because it is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API Vec256<TFromD<D>> Zero(D d) {
+  const Half<decltype(d)> dh;
+  Vec256<TFromD<D>> ret;
+  ret.v0 = ret.v1 = Zero(dh);
+  return ret;
+}
+
+// ------------------------------ BitCast
+template <class D, typename TFrom>
+HWY_API VFromD<D> BitCast(D d, Vec256<TFrom> v) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  ret.v0 = BitCast(dh, v.v0);
+  ret.v1 = BitCast(dh, v.v1);
+  return ret;
+}
+
+// ------------------------------ ResizeBitCast
+
+// 32-byte vector to 32-byte vector: Same as BitCast
+template <class D, typename FromV, HWY_IF_V_SIZE_V(FromV, 32),
+          HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, v);
+}
+
+// <= 16-byte vector to 32-byte vector
+template <class D, typename FromV, HWY_IF_V_SIZE_LE_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  ret.v0 = ResizeBitCast(dh, v);
+  ret.v1 = Zero(dh);
+  return ret;
+}
+
+// 32-byte vector to <= 16-byte vector
+template <class D, typename FromV, HWY_IF_V_SIZE_V(FromV, 32),
+          HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return ResizeBitCast(d, v.v0);
+}
+
+// ------------------------------ Set
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename T2>
+HWY_API VFromD<D> Set(D d, const T2 t) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  ret.v0 = ret.v1 = Set(dh, static_cast<TFromD<D>>(t));
+  return ret;
+}
+
+// Undefined, Iota defined in wasm_128.
+
+// ================================================== ARITHMETIC
+
+template <typename T>
+HWY_API Vec256<T> operator+(Vec256<T> a, const Vec256<T> b) {
+  a.v0 += b.v0;
+  a.v1 += b.v1;
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> operator-(Vec256<T> a, const Vec256<T> b) {
+  a.v0 -= b.v0;
+  a.v1 -= b.v1;
+  return a;
+}
+
+// ------------------------------ SumsOf8
+HWY_API Vec256<uint64_t> SumsOf8(const Vec256<uint8_t> v) {
+  Vec256<uint64_t> ret;
+  ret.v0 = SumsOf8(v.v0);
+  ret.v1 = SumsOf8(v.v1);
+  return ret;
+}
+
+template <typename T>
+HWY_API Vec256<T> SaturatedAdd(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = SaturatedAdd(a.v0, b.v0);
+  a.v1 = SaturatedAdd(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> SaturatedSub(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = SaturatedSub(a.v0, b.v0);
+  a.v1 = SaturatedSub(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> AverageRound(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = AverageRound(a.v0, b.v0);
+  a.v1 = AverageRound(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> Abs(Vec256<T> v) {
+  v.v0 = Abs(v.v0);
+  v.v1 = Abs(v.v1);
+  return v;
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+template <int kBits, typename T>
+HWY_API Vec256<T> ShiftLeft(Vec256<T> v) {
+  v.v0 = ShiftLeft<kBits>(v.v0);
+  v.v1 = ShiftLeft<kBits>(v.v1);
+  return v;
+}
+
+template <int kBits, typename T>
+HWY_API Vec256<T> ShiftRight(Vec256<T> v) {
+  v.v0 = ShiftRight<kBits>(v.v0);
+  v.v1 = ShiftRight<kBits>(v.v1);
+  return v;
+}
+
+// ------------------------------ RotateRight (ShiftRight, Or)
+template <int kBits, typename T>
+HWY_API Vec256<T> RotateRight(const Vec256<T> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v), ShiftLeft<kSizeInBits - kBits>(v));
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+template <typename T>
+HWY_API Vec256<T> ShiftLeftSame(Vec256<T> v, const int bits) {
+  v.v0 = ShiftLeftSame(v.v0, bits);
+  v.v1 = ShiftLeftSame(v.v1, bits);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> ShiftRightSame(Vec256<T> v, const int bits) {
+  v.v0 = ShiftRightSame(v.v0, bits);
+  v.v1 = ShiftRightSame(v.v1, bits);
+  return v;
+}
+
+// ------------------------------ Min, Max
+template <typename T>
+HWY_API Vec256<T> Min(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = Min(a.v0, b.v0);
+  a.v1 = Min(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> Max(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = Max(a.v0, b.v0);
+  a.v1 = Max(a.v1, b.v1);
+  return a;
+}
+// ------------------------------ Integer multiplication
+
+template <typename T>
+HWY_API Vec256<T> operator*(Vec256<T> a, const Vec256<T> b) {
+  a.v0 *= b.v0;
+  a.v1 *= b.v1;
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> MulHigh(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = MulHigh(a.v0, b.v0);
+  a.v1 = MulHigh(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> MulFixedPoint15(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = MulFixedPoint15(a.v0, b.v0);
+  a.v1 = MulFixedPoint15(a.v1, b.v1);
+  return a;
+}
+
+// Cannot use MakeWide because that returns uint128_t for uint64_t, but we want
+// uint64_t.
+template <class T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec256<MakeWide<T>> MulEven(Vec256<T> a, const Vec256<T> b) {
+  Vec256<MakeWide<T>> ret;
+  ret.v0 = MulEven(a.v0, b.v0);
+  ret.v1 = MulEven(a.v1, b.v1);
+  return ret;
+}
+HWY_API Vec256<uint64_t> MulEven(Vec256<uint64_t> a, const Vec256<uint64_t> b) {
+  Vec256<uint64_t> ret;
+  ret.v0 = MulEven(a.v0, b.v0);
+  ret.v1 = MulEven(a.v1, b.v1);
+  return ret;
+}
+
+template <class T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec256<MakeWide<T>> MulOdd(Vec256<T> a, const Vec256<T> b) {
+  Vec256<MakeWide<T>> ret;
+  ret.v0 = MulOdd(a.v0, b.v0);
+  ret.v1 = MulOdd(a.v1, b.v1);
+  return ret;
+}
+HWY_API Vec256<uint64_t> MulOdd(Vec256<uint64_t> a, const Vec256<uint64_t> b) {
+  Vec256<uint64_t> ret;
+  ret.v0 = MulOdd(a.v0, b.v0);
+  ret.v1 = MulOdd(a.v1, b.v1);
+  return ret;
+}
+
+// ------------------------------ Negate
+template <typename T>
+HWY_API Vec256<T> Neg(Vec256<T> v) {
+  v.v0 = Neg(v.v0);
+  v.v1 = Neg(v.v1);
+  return v;
+}
+
+// ------------------------------ AbsDiff
+// generic_ops takes care of integer T.
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec256<T> AbsDiff(const Vec256<T> a, const Vec256<T> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point division
+template <typename T>
+HWY_API Vec256<T> operator/(Vec256<T> a, const Vec256<T> b) {
+  a.v0 /= b.v0;
+  a.v1 /= b.v1;
+  return a;
+}
+
+// Approximate reciprocal
+HWY_API Vec256<float> ApproximateReciprocal(const Vec256<float> v) {
+  const Vec256<float> one = Set(Full256<float>(), 1.0f);
+  return one / v;
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+HWY_API Vec256<float> MulAdd(Vec256<float> mul, Vec256<float> x,
+                             Vec256<float> add) {
+  mul.v0 = MulAdd(mul.v0, x.v0, add.v0);
+  mul.v1 = MulAdd(mul.v1, x.v1, add.v1);
+  return mul;
+}
+
+HWY_API Vec256<float> NegMulAdd(Vec256<float> mul, Vec256<float> x,
+                                Vec256<float> add) {
+  mul.v0 = NegMulAdd(mul.v0, x.v0, add.v0);
+  mul.v1 = NegMulAdd(mul.v1, x.v1, add.v1);
+  return mul;
+}
+
+HWY_API Vec256<float> MulSub(Vec256<float> mul, Vec256<float> x,
+                             Vec256<float> sub) {
+  mul.v0 = MulSub(mul.v0, x.v0, sub.v0);
+  mul.v1 = MulSub(mul.v1, x.v1, sub.v1);
+  return mul;
+}
+
+HWY_API Vec256<float> NegMulSub(Vec256<float> mul, Vec256<float> x,
+                                Vec256<float> sub) {
+  mul.v0 = NegMulSub(mul.v0, x.v0, sub.v0);
+  mul.v1 = NegMulSub(mul.v1, x.v1, sub.v1);
+  return mul;
+}
+
+// ------------------------------ Floating-point square root
+
+template <typename T>
+HWY_API Vec256<T> Sqrt(Vec256<T> v) {
+  v.v0 = Sqrt(v.v0);
+  v.v1 = Sqrt(v.v1);
+  return v;
+}
+
+// Approximate reciprocal square root
+HWY_API Vec256<float> ApproximateReciprocalSqrt(const Vec256<float> v) {
+  // TODO(eustas): find cheaper a way to calculate this.
+  const Vec256<float> one = Set(Full256<float>(), 1.0f);
+  return one / Sqrt(v);
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, ties to even
+HWY_API Vec256<float> Round(Vec256<float> v) {
+  v.v0 = Round(v.v0);
+  v.v1 = Round(v.v1);
+  return v;
+}
+
+// Toward zero, aka truncate
+HWY_API Vec256<float> Trunc(Vec256<float> v) {
+  v.v0 = Trunc(v.v0);
+  v.v1 = Trunc(v.v1);
+  return v;
+}
+
+// Toward +infinity, aka ceiling
+HWY_API Vec256<float> Ceil(Vec256<float> v) {
+  v.v0 = Ceil(v.v0);
+  v.v1 = Ceil(v.v1);
+  return v;
+}
+
+// Toward -infinity, aka floor
+HWY_API Vec256<float> Floor(Vec256<float> v) {
+  v.v0 = Floor(v.v0);
+  v.v1 = Floor(v.v1);
+  return v;
+}
+
+// ------------------------------ Floating-point classification
+
+template <typename T>
+HWY_API Mask256<T> IsNaN(const Vec256<T> v) {
+  return v != v;
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Mask256<T> IsInf(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Mask256<T> IsFinite(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // 'Shift left' to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater).
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(Add(vu, vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <class DTo, typename TFrom, typename TTo = TFromD<DTo>>
+HWY_API MFromD<DTo> RebindMask(DTo /*tag*/, Mask256<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return MFromD<DTo>{Mask128<TTo>{m.m0.raw}, Mask128<TTo>{m.m1.raw}};
+}
+
+template <typename T>
+HWY_API Mask256<T> TestBit(Vec256<T> v, Vec256<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator==(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator==(a.v0, b.v0);
+  m.m1 = operator==(a.v1, b.v1);
+  return m;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator!=(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator!=(a.v0, b.v0);
+  m.m1 = operator!=(a.v1, b.v1);
+  return m;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator<(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator<(a.v0, b.v0);
+  m.m1 = operator<(a.v1, b.v1);
+  return m;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator>(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator>(a.v0, b.v0);
+  m.m1 = operator>(a.v1, b.v1);
+  return m;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator<=(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator<=(a.v0, b.v0);
+  m.m1 = operator<=(a.v1, b.v1);
+  return m;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator>=(Vec256<T> a, const Vec256<T> b) {
+  Mask256<T> m;
+  m.m0 = operator>=(a.v0, b.v0);
+  m.m1 = operator>=(a.v1, b.v1);
+  return m;
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API MFromD<D> FirstN(const D d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons may be cheaper.
+  using TI = TFromD<decltype(di)>;
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<TI>(num)));
+}
+
+// ================================================== LOGICAL
+
+template <typename T>
+HWY_API Vec256<T> Not(Vec256<T> v) {
+  v.v0 = Not(v.v0);
+  v.v1 = Not(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> And(Vec256<T> a, Vec256<T> b) {
+  a.v0 = And(a.v0, b.v0);
+  a.v1 = And(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> AndNot(Vec256<T> not_mask, Vec256<T> mask) {
+  not_mask.v0 = AndNot(not_mask.v0, mask.v0);
+  not_mask.v1 = AndNot(not_mask.v1, mask.v1);
+  return not_mask;
+}
+
+template <typename T>
+HWY_API Vec256<T> Or(Vec256<T> a, Vec256<T> b) {
+  a.v0 = Or(a.v0, b.v0);
+  a.v1 = Or(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> Xor(Vec256<T> a, Vec256<T> b) {
+  a.v0 = Xor(a.v0, b.v0);
+  a.v1 = Xor(a.v1, b.v1);
+  return a;
+}
+
+template <typename T>
+HWY_API Vec256<T> Xor3(Vec256<T> x1, Vec256<T> x2, Vec256<T> x3) {
+  return Xor(x1, Xor(x2, x3));
+}
+
+template <typename T>
+HWY_API Vec256<T> Or3(Vec256<T> o1, Vec256<T> o2, Vec256<T> o3) {
+  return Or(o1, Or(o2, o3));
+}
+
+template <typename T>
+HWY_API Vec256<T> OrAnd(Vec256<T> o, Vec256<T> a1, Vec256<T> a2) {
+  return Or(o, And(a1, a2));
+}
+
+template <typename T>
+HWY_API Vec256<T> IfVecThenElse(Vec256<T> mask, Vec256<T> yes, Vec256<T> no) {
+  return IfThenElse(MaskFromVec(mask), yes, no);
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T>
+HWY_API Vec256<T> operator&(const Vec256<T> a, const Vec256<T> b) {
+  return And(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator|(const Vec256<T> a, const Vec256<T> b) {
+  return Or(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator^(const Vec256<T> a, const Vec256<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+template <typename T>
+HWY_API Vec256<T> CopySign(const Vec256<T> magn, const Vec256<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(magn)> d;
+  return BitwiseIfThenElse(SignBit(d), sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+template <typename T>
+HWY_API Vec256<T> CopySignToAbs(const Vec256<T> abs, const Vec256<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const DFromV<decltype(sign)> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T>
+HWY_API Mask256<T> MaskFromVec(const Vec256<T> v) {
+  Mask256<T> m;
+  m.m0 = MaskFromVec(v.v0);
+  m.m1 = MaskFromVec(v.v1);
+  return m;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> VecFromMask(D d, Mask256<T> m) {
+  const Half<decltype(d)> dh;
+  Vec256<T> v;
+  v.v0 = VecFromMask(dh, m.m0);
+  v.v1 = VecFromMask(dh, m.m1);
+  return v;
+}
+
+// mask ? yes : no
+template <typename T>
+HWY_API Vec256<T> IfThenElse(Mask256<T> mask, Vec256<T> yes, Vec256<T> no) {
+  yes.v0 = IfThenElse(mask.m0, yes.v0, no.v0);
+  yes.v1 = IfThenElse(mask.m1, yes.v1, no.v1);
+  return yes;
+}
+
+// mask ? yes : 0
+template <typename T>
+HWY_API Vec256<T> IfThenElseZero(Mask256<T> mask, Vec256<T> yes) {
+  return yes & VecFromMask(DFromV<decltype(yes)>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T>
+HWY_API Vec256<T> IfThenZeroElse(Mask256<T> mask, Vec256<T> no) {
+  return AndNot(VecFromMask(DFromV<decltype(no)>(), mask), no);
+}
+
+template <typename T>
+HWY_API Vec256<T> IfNegativeThenElse(Vec256<T> v, Vec256<T> yes, Vec256<T> no) {
+  v.v0 = IfNegativeThenElse(v.v0, yes.v0, no.v0);
+  v.v1 = IfNegativeThenElse(v.v1, yes.v1, no.v1);
+  return v;
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec256<T> ZeroIfNegative(Vec256<T> v) {
+  return IfThenZeroElse(v < Zero(DFromV<decltype(v)>()), v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T>
+HWY_API Mask256<T> Not(const Mask256<T> m) {
+  return MaskFromVec(Not(VecFromMask(Full256<T>(), m)));
+}
+
+template <typename T>
+HWY_API Mask256<T> And(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> AndNot(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Or(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Xor(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> ExclusiveNeither(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+// ------------------------------ Shl (BroadcastSignBit, IfThenElse)
+template <typename T, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec256<T> operator<<(Vec256<T> v, const Vec256<T> bits) {
+  v.v0 = operator<<(v.v0, bits.v0);
+  v.v1 = operator<<(v.v1, bits.v1);
+  return v;
+}
+
+// ------------------------------ Shr (BroadcastSignBit, IfThenElse)
+template <typename T, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec256<T> operator>>(Vec256<T> v, const Vec256<T> bits) {
+  v.v0 = operator>>(v.v0, bits.v0);
+  v.v1 = operator>>(v.v1, bits.v1);
+  return v;
+}
+
+// ------------------------------ BroadcastSignBit (compare, VecFromMask)
+
+template <typename T, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec256<T> BroadcastSignBit(const Vec256<T> v) {
+  return ShiftRight<sizeof(T) * 8 - 1>(v);
+}
+HWY_API Vec256<int8_t> BroadcastSignBit(const Vec256<int8_t> v) {
+  const DFromV<decltype(v)> d;
+  return VecFromMask(d, v < Zero(d));
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT aligned) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  ret.v0 = Load(dh, aligned);
+  ret.v1 = Load(dh, aligned + Lanes(dh));
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> MaskedLoad(Mask256<T> m, D d, const T* HWY_RESTRICT aligned) {
+  return IfThenElseZero(m, Load(d, aligned));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> MaskedLoadOr(Vec256<T> v, Mask256<T> m, D d,
+                               const T* HWY_RESTRICT aligned) {
+  return IfThenElse(m, Load(d, aligned), v);
+}
+
+// LoadU == Load.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  ret.v0 = ret.v1 = Load(dh, p);
+  return ret;
+}
+
+// ------------------------------ Store
+
+template <class D, typename T = TFromD<D>>
+HWY_API void Store(Vec256<T> v, D d, T* HWY_RESTRICT aligned) {
+  const Half<decltype(d)> dh;
+  Store(v.v0, dh, aligned);
+  Store(v.v1, dh, aligned + Lanes(dh));
+}
+
+// StoreU == Store.
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreU(Vec256<T> v, D d, T* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API void BlendedStore(Vec256<T> v, Mask256<T> m, D d, T* HWY_RESTRICT p) {
+  StoreU(IfThenElse(m, v, LoadU(d, p)), d, p);
+}
+
+// ------------------------------ Stream
+template <class D, typename T = TFromD<D>>
+HWY_API void Stream(Vec256<T> v, D d, T* HWY_RESTRICT aligned) {
+  // Same as aligned stores.
+  Store(v, d, aligned);
+}
+
+// ------------------------------ Scatter, Gather defined in wasm_128
+
+// ================================================== SWIZZLE
+
+// ------------------------------ ExtractLane
+template <typename T>
+HWY_API T ExtractLane(const Vec256<T> v, size_t i) {
+  alignas(32) T lanes[32 / sizeof(T)];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+// ------------------------------ InsertLane
+template <typename T>
+HWY_API Vec256<T> InsertLane(const Vec256<T> v, size_t i, T t) {
+  DFromV<decltype(v)> d;
+  alignas(32) T lanes[32 / sizeof(T)];
+  Store(v, d, lanes);
+  lanes[i] = t;
+  return Load(d, lanes);
+}
+
+// ------------------------------ ExtractBlock
+template <int kBlockIdx, class T>
+HWY_API Vec128<T> ExtractBlock(Vec256<T> v) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+  return (kBlockIdx == 0) ? v.v0 : v.v1;
+}
+
+// ------------------------------ InsertBlock
+template <int kBlockIdx, class T>
+HWY_API Vec256<T> InsertBlock(Vec256<T> v, Vec128<T> blk_to_insert) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+  Vec256<T> result;
+  if (kBlockIdx == 0) {
+    result.v0 = blk_to_insert;
+    result.v1 = v.v1;
+  } else {
+    result.v0 = v.v0;
+    result.v1 = blk_to_insert;
+  }
+  return result;
+}
+
+// ------------------------------ BroadcastBlock
+template <int kBlockIdx, class T>
+HWY_API Vec256<T> BroadcastBlock(Vec256<T> v) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+  Vec256<T> result;
+  result.v0 = result.v1 = (kBlockIdx == 0 ? v.v0 : v.v1);
+  return result;
+}
+
+// ------------------------------ LowerHalf
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> LowerHalf(D /* tag */, Vec256<T> v) {
+  return v.v0;
+}
+
+template <typename T>
+HWY_API Vec128<T> LowerHalf(Vec256<T> v) {
+  return v.v0;
+}
+
+// ------------------------------ GetLane (LowerHalf)
+template <typename T>
+HWY_API T GetLane(const Vec256<T> v) {
+  return GetLane(LowerHalf(v));
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ShiftLeftBytes(D d, Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  v.v0 = ShiftLeftBytes<kBytes>(dh, v.v0);
+  v.v1 = ShiftLeftBytes<kBytes>(dh, v.v1);
+  return v;
+}
+
+template <int kBytes, typename T>
+HWY_API Vec256<T> ShiftLeftBytes(Vec256<T> v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+template <int kLanes, class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ShiftLeftLanes(D d, const Vec256<T> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+template <int kLanes, typename T>
+HWY_API Vec256<T> ShiftLeftLanes(const Vec256<T> v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ShiftRightBytes(D d, Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  v.v0 = ShiftRightBytes<kBytes>(dh, v.v0);
+  v.v1 = ShiftRightBytes<kBytes>(dh, v.v1);
+  return v;
+}
+
+// ------------------------------ ShiftRightLanes
+template <int kLanes, class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ShiftRightLanes(D d, const Vec256<T> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ UpperHalf (ShiftRightBytes)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec128<T> UpperHalf(D /* tag */, const Vec256<T> v) {
+  return v.v1;
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+template <int kBytes, class D, typename T = TFromD<D>>
+HWY_API Vec256<T> CombineShiftRightBytes(D d, Vec256<T> hi, Vec256<T> lo) {
+  const Half<decltype(d)> dh;
+  hi.v0 = CombineShiftRightBytes<kBytes>(dh, hi.v0, lo.v0);
+  hi.v1 = CombineShiftRightBytes<kBytes>(dh, hi.v1, lo.v1);
+  return hi;
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T>
+HWY_API Vec256<T> Broadcast(const Vec256<T> v) {
+  Vec256<T> ret;
+  ret.v0 = Broadcast<kLane>(v.v0);
+  ret.v1 = Broadcast<kLane>(v.v1);
+  return ret;
+}
+
+template <int kLane, typename T>
+HWY_API Vec256<T> BroadcastLane(const Vec256<T> v) {
+  constexpr int kLanesPerBlock = static_cast<int>(16 / sizeof(T));
+  static_assert(0 <= kLane && kLane < kLanesPerBlock * 2, "Invalid lane");
+  constexpr int kLaneInBlkIdx = kLane & (kLanesPerBlock - 1);
+  Vec256<T> ret;
+  ret.v0 = ret.v1 =
+      Broadcast<kLaneInBlkIdx>(kLane >= kLanesPerBlock ? v.v1 : v.v0);
+  return ret;
+}
+
+// ------------------------------ TableLookupBytes
+
+// Both full
+template <typename T, typename TI>
+HWY_API Vec256<TI> TableLookupBytes(const Vec256<T> bytes, Vec256<TI> from) {
+  from.v0 = TableLookupBytes(bytes.v0, from.v0);
+  from.v1 = TableLookupBytes(bytes.v1, from.v1);
+  return from;
+}
+
+// Partial index vector
+template <typename T, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec256<T> bytes,
+                                        const Vec128<TI, NI> from) {
+  // First expand to full 128, then 256.
+  const auto from_256 = ZeroExtendVector(Full256<TI>(), Vec128<TI>{from.raw});
+  const auto tbl_full = TableLookupBytes(bytes, from_256);
+  // Shrink to 128, then partial.
+  return Vec128<TI, NI>{LowerHalf(Full128<TI>(), tbl_full).raw};
+}
+
+// Partial table vector
+template <typename T, size_t N, typename TI>
+HWY_API Vec256<TI> TableLookupBytes(Vec128<T, N> bytes, const Vec256<TI> from) {
+  // First expand to full 128, then 256.
+  const auto bytes_256 = ZeroExtendVector(Full256<T>(), Vec128<T>{bytes.raw});
+  return TableLookupBytes(bytes_256, from);
+}
+
+// Partial both are handled by wasm_128.
+
+template <class V, class VI>
+HWY_API VI TableLookupBytesOr0(V bytes, VI from) {
+  // wasm out-of-bounds policy already zeros, so TableLookupBytes is fine.
+  return TableLookupBytes(bytes, from);
+}
+
+// ------------------------------ Hard-coded shuffles
+
+template <typename T>
+HWY_API Vec256<T> Shuffle01(Vec256<T> v) {
+  v.v0 = Shuffle01(v.v0);
+  v.v1 = Shuffle01(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> Shuffle2301(Vec256<T> v) {
+  v.v0 = Shuffle2301(v.v0);
+  v.v1 = Shuffle2301(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> Shuffle1032(Vec256<T> v) {
+  v.v0 = Shuffle1032(v.v0);
+  v.v1 = Shuffle1032(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> Shuffle0321(Vec256<T> v) {
+  v.v0 = Shuffle0321(v.v0);
+  v.v1 = Shuffle0321(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> Shuffle2103(Vec256<T> v) {
+  v.v0 = Shuffle2103(v.v0);
+  v.v1 = Shuffle2103(v.v1);
+  return v;
+}
+
+template <typename T>
+HWY_API Vec256<T> Shuffle0123(Vec256<T> v) {
+  v.v0 = Shuffle0123(v.v0);
+  v.v1 = Shuffle0123(v.v1);
+  return v;
+}
+
+// Used by generic_ops-inl.h
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo2301(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = ShuffleTwo2301(a.v0, b.v0);
+  a.v1 = ShuffleTwo2301(a.v1, b.v1);
+  return a;
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo1230(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = ShuffleTwo1230(a.v0, b.v0);
+  a.v1 = ShuffleTwo1230(a.v1, b.v1);
+  return a;
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo3012(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = ShuffleTwo3012(a.v0, b.v0);
+  a.v1 = ShuffleTwo3012(a.v1, b.v1);
+  return a;
+}
+
+}  // namespace detail
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T>
+struct Indices256 {
+  __v128_u i0;
+  __v128_u i1;
+};
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Indices256<T> IndicesFromVec(D /* tag */, Vec256<TI> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+  Indices256<T> ret;
+  ret.i0 = vec.v0.raw;
+  ret.i1 = vec.v1.raw;
+  return ret;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename TI>
+HWY_API Indices256<TFromD<D>> SetTableIndices(D d, const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T>
+HWY_API Vec256<T> TableLookupLanes(const Vec256<T> v, Indices256<T> idx) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const auto idx_i0 = IndicesFromVec(dh, Vec128<T>{idx.i0});
+  const auto idx_i1 = IndicesFromVec(dh, Vec128<T>{idx.i1});
+
+  Vec256<T> result;
+  result.v0 = TwoTablesLookupLanes(v.v0, v.v1, idx_i0);
+  result.v1 = TwoTablesLookupLanes(v.v0, v.v1, idx_i1);
+  return result;
+}
+
+template <typename T>
+HWY_API Vec256<T> TableLookupLanesOr0(Vec256<T> v, Indices256<T> idx) {
+  // The out of bounds behavior will already zero lanes.
+  return TableLookupLanesOr0(v, idx);
+}
+
+template <typename T>
+HWY_API Vec256<T> TwoTablesLookupLanes(const Vec256<T> a, const Vec256<T> b,
+                                       Indices256<T> idx) {
+  const DFromV<decltype(a)> d;
+  const Half<decltype(d)> dh;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = MakeUnsigned<T>;
+  constexpr size_t kLanesPerVect = 32 / sizeof(TU);
+
+  Vec256<TU> vi;
+  vi.v0 = Vec128<TU>{idx.i0};
+  vi.v1 = Vec128<TU>{idx.i1};
+  const auto vmod = vi & Set(du, TU{kLanesPerVect - 1});
+  const auto is_lo = RebindMask(d, vi == vmod);
+
+  const auto idx_i0 = IndicesFromVec(dh, vmod.v0);
+  const auto idx_i1 = IndicesFromVec(dh, vmod.v1);
+
+  Vec256<T> result_lo;
+  Vec256<T> result_hi;
+  result_lo.v0 = TwoTablesLookupLanes(a.v0, a.v1, idx_i0);
+  result_lo.v1 = TwoTablesLookupLanes(a.v0, a.v1, idx_i1);
+  result_hi.v0 = TwoTablesLookupLanes(b.v0, b.v1, idx_i0);
+  result_hi.v1 = TwoTablesLookupLanes(b.v0, b.v1, idx_i1);
+  return IfThenElse(is_lo, result_lo, result_hi);
+}
+
+// ------------------------------ Reverse
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> Reverse(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v1 = Reverse(dh, v.v0);  // note reversed v1 member order
+  ret.v0 = Reverse(dh, v.v1);
+  return ret;
+}
+
+// ------------------------------ Reverse2
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> Reverse2(D d, Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  v.v0 = Reverse2(dh, v.v0);
+  v.v1 = Reverse2(dh, v.v1);
+  return v;
+}
+
+// ------------------------------ Reverse4
+
+// Each block has only 2 lanes, so swap blocks and their lanes.
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> Reverse4(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Reverse2(dh, v.v1);  // swapped
+  ret.v1 = Reverse2(dh, v.v0);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Vec256<T> Reverse4(D d, Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  v.v0 = Reverse4(dh, v.v0);
+  v.v1 = Reverse4(dh, v.v1);
+  return v;
+}
+
+// ------------------------------ Reverse8
+
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> Reverse8(D /* tag */, Vec256<T> /* v */) {
+  HWY_ASSERT(0);  // don't have 8 u64 lanes
+}
+
+// Each block has only 4 lanes, so swap blocks and their lanes.
+template <class D, typename T = TFromD<D>, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> Reverse8(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Reverse4(dh, v.v1);  // swapped
+  ret.v1 = Reverse4(dh, v.v0);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API Vec256<T> Reverse8(D d, Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  v.v0 = Reverse8(dh, v.v0);
+  v.v1 = Reverse8(dh, v.v1);
+  return v;
+}
+
+// ------------------------------ InterleaveLower
+
+template <typename T>
+HWY_API Vec256<T> InterleaveLower(Vec256<T> a, Vec256<T> b) {
+  a.v0 = InterleaveLower(a.v0, b.v0);
+  a.v1 = InterleaveLower(a.v1, b.v1);
+  return a;
+}
+
+// wasm_128 already defines a template with D, V, V args.
+
+// ------------------------------ InterleaveUpper (UpperHalf)
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> InterleaveUpper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  a.v0 = InterleaveUpper(dh, a.v0, b.v0);
+  a.v1 = InterleaveUpper(dh, a.v1, b.v1);
+  return a;
+}
+
+// ------------------------------ ZipLower/ZipUpper defined in wasm_128
+
+// ================================================== COMBINE
+
+// ------------------------------ Combine (InterleaveLower)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> Combine(D /* d */, Vec128<T> hi, Vec128<T> lo) {
+  Vec256<T> ret;
+  ret.v1 = hi;
+  ret.v0 = lo;
+  return ret;
+}
+
+// ------------------------------ ZeroExtendVector (Combine)
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ZeroExtendVector(D d, Vec128<T> lo) {
+  const Half<decltype(d)> dh;
+  return Combine(d, Zero(dh), lo);
+}
+
+// ------------------------------ ZeroExtendResizeBitCast
+
+namespace detail {
+
+template <size_t kFromVectSize, class DTo, class DFrom,
+          HWY_IF_LANES_LE(kFromVectSize, 8)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<kFromVectSize> /* from_size_tag */,
+    hwy::SizeTag<32> /* to_size_tag */, DTo d_to, DFrom d_from,
+    VFromD<DFrom> v) {
+  const Half<decltype(d_to)> dh_to;
+  return ZeroExtendVector(d_to, ZeroExtendResizeBitCast(dh_to, d_from, v));
+}
+
+}  // namespace detail
+
+// ------------------------------ ConcatLowerLower
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatLowerLower(D /* tag */, Vec256<T> hi, Vec256<T> lo) {
+  Vec256<T> ret;
+  ret.v1 = hi.v0;
+  ret.v0 = lo.v0;
+  return ret;
+}
+
+// ------------------------------ ConcatUpperUpper
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatUpperUpper(D /* tag */, Vec256<T> hi, Vec256<T> lo) {
+  Vec256<T> ret;
+  ret.v1 = hi.v1;
+  ret.v0 = lo.v1;
+  return ret;
+}
+
+// ------------------------------ ConcatLowerUpper
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatLowerUpper(D /* tag */, Vec256<T> hi, Vec256<T> lo) {
+  Vec256<T> ret;
+  ret.v1 = hi.v0;
+  ret.v0 = lo.v1;
+  return ret;
+}
+
+// ------------------------------ ConcatUpperLower
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatUpperLower(D /* tag */, Vec256<T> hi, Vec256<T> lo) {
+  Vec256<T> ret;
+  ret.v1 = hi.v1;
+  ret.v0 = lo.v0;
+  return ret;
+}
+
+// ------------------------------ ConcatOdd
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatOdd(D d, Vec256<T> hi, Vec256<T> lo) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = ConcatOdd(dh, lo.v1, lo.v0);
+  ret.v1 = ConcatOdd(dh, hi.v1, hi.v0);
+  return ret;
+}
+
+// ------------------------------ ConcatEven
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ConcatEven(D d, Vec256<T> hi, Vec256<T> lo) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = ConcatEven(dh, lo.v1, lo.v0);
+  ret.v1 = ConcatEven(dh, hi.v1, hi.v0);
+  return ret;
+}
+
+// ------------------------------ DupEven
+template <typename T>
+HWY_API Vec256<T> DupEven(Vec256<T> v) {
+  v.v0 = DupEven(v.v0);
+  v.v1 = DupEven(v.v1);
+  return v;
+}
+
+// ------------------------------ DupOdd
+template <typename T>
+HWY_API Vec256<T> DupOdd(Vec256<T> v) {
+  v.v0 = DupOdd(v.v0);
+  v.v1 = DupOdd(v.v1);
+  return v;
+}
+
+// ------------------------------ OddEven
+template <typename T>
+HWY_API Vec256<T> OddEven(Vec256<T> a, const Vec256<T> b) {
+  a.v0 = OddEven(a.v0, b.v0);
+  a.v1 = OddEven(a.v1, b.v1);
+  return a;
+}
+
+// ------------------------------ OddEvenBlocks
+template <typename T>
+HWY_API Vec256<T> OddEvenBlocks(Vec256<T> odd, Vec256<T> even) {
+  odd.v0 = even.v0;
+  return odd;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+template <typename T>
+HWY_API Vec256<T> SwapAdjacentBlocks(Vec256<T> v) {
+  Vec256<T> ret;
+  ret.v0 = v.v1;  // swapped order
+  ret.v1 = v.v0;
+  return ret;
+}
+
+// ------------------------------ ReverseBlocks
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> ReverseBlocks(D /* tag */, const Vec256<T> v) {
+  return SwapAdjacentBlocks(v);  // 2 blocks, so Swap = Reverse
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+namespace detail {
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<1> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  using VH = VFromD<decltype(dh)>;
+
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+
+  V ret;
+  ret.v0 = VH{wasm_i8x16_shuffle(
+      v.v0.raw, v.v0.raw, kIdx0, kIdx1, kIdx2, kIdx3, kIdx0 + 4, kIdx1 + 4,
+      kIdx2 + 4, kIdx3 + 4, kIdx0 + 8, kIdx1 + 8, kIdx2 + 8, kIdx3 + 8,
+      kIdx0 + 12, kIdx1 + 12, kIdx2 + 12, kIdx3 + 12)};
+  ret.v1 = VH{wasm_i8x16_shuffle(
+      v.v1.raw, v.v1.raw, kIdx0, kIdx1, kIdx2, kIdx3, kIdx0 + 4, kIdx1 + 4,
+      kIdx2 + 4, kIdx3 + 4, kIdx0 + 8, kIdx1 + 8, kIdx2 + 8, kIdx3 + 8,
+      kIdx0 + 12, kIdx1 + 12, kIdx2 + 12, kIdx3 + 12)};
+  return ret;
+}
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<2> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  using VH = VFromD<decltype(dh)>;
+
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+
+  V ret;
+  ret.v0 = VH{wasm_i16x8_shuffle(v.v0.raw, v.v0.raw, kIdx0, kIdx1, kIdx2, kIdx3,
+                                 kIdx0 + 4, kIdx1 + 4, kIdx2 + 4, kIdx3 + 4)};
+  ret.v1 = VH{wasm_i16x8_shuffle(v.v1.raw, v.v1.raw, kIdx0, kIdx1, kIdx2, kIdx3,
+                                 kIdx0 + 4, kIdx1 + 4, kIdx2 + 4, kIdx3 + 4)};
+  return ret;
+}
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  using VH = VFromD<decltype(dh)>;
+
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+
+  V ret;
+  ret.v0 =
+      VH{wasm_i32x4_shuffle(v.v0.raw, v.v0.raw, kIdx0, kIdx1, kIdx2, kIdx3)};
+  ret.v1 =
+      VH{wasm_i32x4_shuffle(v.v1.raw, v.v1.raw, kIdx0, kIdx1, kIdx2, kIdx3)};
+  return ret;
+}
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  using VH = VFromD<decltype(dh)>;
+
+  constexpr int kIdx3 = static_cast<int>((kIdx3210 >> 6) & 3);
+  constexpr int kIdx2 = static_cast<int>((kIdx3210 >> 4) & 3);
+  constexpr int kIdx1 = static_cast<int>((kIdx3210 >> 2) & 3);
+  constexpr int kIdx0 = static_cast<int>(kIdx3210 & 3);
+
+  V ret;
+  ret.v0 = VH{wasm_i64x2_shuffle(v.v0.raw, v.v1.raw, kIdx0, kIdx1)};
+  ret.v1 = VH{wasm_i64x2_shuffle(v.v0.raw, v.v1.raw, kIdx2, kIdx3)};
+  return ret;
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpBlocks
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideUpBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 1,
+                "kBlocks must be between 0 and 1");
+  return (kBlocks == 1) ? ConcatLowerLower(d, v, Zero(d)) : v;
+}
+
+// ------------------------------ SlideDownBlocks
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideDownBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 1,
+                "kBlocks must be between 0 and 1");
+  const Half<decltype(d)> dh;
+  return (kBlocks == 1) ? ZeroExtendVector(d, UpperHalf(dh, v)) : v;
+}
+
+// ------------------------------ SlideUpLanes
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const Half<decltype(d)> dh;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToUnsigned<decltype(dh)> dh_u;
+  const auto vu = BitCast(du, v);
+  VFromD<D> ret;
+
+#if !HWY_IS_DEBUG_BUILD
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  if (__builtin_constant_p(amt) && amt < kLanesPerBlock) {
+    switch (amt * sizeof(TFromD<D>)) {
+      case 0:
+        return v;
+      case 1:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<1>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<15>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 2:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<2>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<14>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 3:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<3>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<13>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 4:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<4>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<12>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 5:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<5>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<11>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 6:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<6>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<10>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 7:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<7>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<9>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 8:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<8>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<8>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 9:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<9>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<7>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 10:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<10>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<6>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 11:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<11>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<5>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 12:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<12>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<4>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 13:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<13>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<3>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 14:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<14>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<2>(dh_u, vu.v1, vu.v0));
+        return ret;
+      case 15:
+        ret.v0 = BitCast(dh, ShiftLeftBytes<15>(dh_u, vu.v0));
+        ret.v1 = BitCast(dh, CombineShiftRightBytes<1>(dh_u, vu.v1, vu.v0));
+        return ret;
+    }
+  }
+
+  if (__builtin_constant_p(amt >= kLanesPerBlock) && amt >= kLanesPerBlock) {
+    ret.v0 = Zero(dh);
+    ret.v1 = SlideUpLanes(dh, LowerHalf(dh, v), amt - kLanesPerBlock);
+    return ret;
+  }
+#endif
+
+  const Repartition<uint8_t, decltype(d)> du8;
+  const RebindToSigned<decltype(du8)> di8;
+  const Half<decltype(di8)> dh_i8;
+
+  const auto lo_byte_idx = BitCast(
+      di8,
+      Iota(du8, static_cast<uint8_t>(size_t{0} - amt * sizeof(TFromD<D>))));
+
+  const auto hi_byte_idx =
+      UpperHalf(dh_i8, lo_byte_idx) - Set(dh_i8, int8_t{16});
+  const auto hi_sel_mask =
+      UpperHalf(dh_i8, lo_byte_idx) > Set(dh_i8, int8_t{15});
+
+  ret = BitCast(d,
+                TableLookupBytesOr0(ConcatLowerLower(du, vu, vu), lo_byte_idx));
+  ret.v1 =
+      BitCast(dh, IfThenElse(hi_sel_mask,
+                             TableLookupBytes(UpperHalf(dh_u, vu), hi_byte_idx),
+                             BitCast(dh_i8, ret.v1)));
+  return ret;
+}
+
+// ------------------------------ Slide1Up
+template <typename D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  const Half<decltype(d)> dh;
+  constexpr int kShrByteAmt = static_cast<int>(16 - sizeof(TFromD<D>));
+  ret.v0 = ShiftLeftLanes<1>(dh, v.v0);
+  ret.v1 = CombineShiftRightBytes<kShrByteAmt>(dh, v.v1, v.v0);
+  return ret;
+}
+
+// ------------------------------ SlideDownLanes
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const Half<decltype(d)> dh;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToUnsigned<decltype(dh)> dh_u;
+  VFromD<D> ret;
+
+  const auto vu = BitCast(du, v);
+
+#if !HWY_IS_DEBUG_BUILD
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  if (__builtin_constant_p(amt) && amt < kLanesPerBlock) {
+    switch (amt * sizeof(TFromD<D>)) {
+      case 0:
+        return v;
+      case 1:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<1>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<1>(dh_u, vu.v1));
+        return ret;
+      case 2:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<2>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<2>(dh_u, vu.v1));
+        return ret;
+      case 3:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<3>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<3>(dh_u, vu.v1));
+        return ret;
+      case 4:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<4>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<4>(dh_u, vu.v1));
+        return ret;
+      case 5:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<5>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<5>(dh_u, vu.v1));
+        return ret;
+      case 6:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<6>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<6>(dh_u, vu.v1));
+        return ret;
+      case 7:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<7>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<7>(dh_u, vu.v1));
+        return ret;
+      case 8:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<8>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<8>(dh_u, vu.v1));
+        return ret;
+      case 9:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<9>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<9>(dh_u, vu.v1));
+        return ret;
+      case 10:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<10>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<10>(dh_u, vu.v1));
+        return ret;
+      case 11:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<11>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<11>(dh_u, vu.v1));
+        return ret;
+      case 12:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<12>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<12>(dh_u, vu.v1));
+        return ret;
+      case 13:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<13>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<13>(dh_u, vu.v1));
+        return ret;
+      case 14:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<14>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<14>(dh_u, vu.v1));
+        return ret;
+      case 15:
+        ret.v0 = BitCast(dh, CombineShiftRightBytes<15>(dh_u, vu.v1, vu.v0));
+        ret.v1 = BitCast(dh, ShiftRightBytes<15>(dh_u, vu.v1));
+        return ret;
+    }
+  }
+
+  if (__builtin_constant_p(amt >= kLanesPerBlock) && amt >= kLanesPerBlock) {
+    ret.v0 = SlideDownLanes(dh, UpperHalf(dh, v), amt - kLanesPerBlock);
+    ret.v1 = Zero(dh);
+    return ret;
+  }
+#endif
+
+  const Repartition<uint8_t, decltype(d)> du8;
+  const Half<decltype(du8)> dh_u8;
+
+  const auto lo_byte_idx =
+      Iota(du8, static_cast<uint8_t>(amt * sizeof(TFromD<D>)));
+  const auto u8_16 = Set(du8, uint8_t{16});
+  const auto hi_byte_idx = lo_byte_idx - u8_16;
+
+  const auto lo_sel_mask =
+      LowerHalf(dh_u8, lo_byte_idx) < LowerHalf(dh_u8, u8_16);
+  ret = BitCast(d, IfThenElseZero(hi_byte_idx < u8_16,
+                                  TableLookupBytes(ConcatUpperUpper(du, vu, vu),
+                                                   hi_byte_idx)));
+  ret.v0 =
+      BitCast(dh, IfThenElse(lo_sel_mask,
+                             TableLookupBytes(LowerHalf(dh_u, vu),
+                                              LowerHalf(dh_u8, lo_byte_idx)),
+                             BitCast(dh_u8, LowerHalf(dh, ret))));
+  return ret;
+}
+
+// ------------------------------ Slide1Down
+template <typename D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  VFromD<D> ret;
+  const Half<decltype(d)> dh;
+  constexpr int kShrByteAmt = static_cast<int>(sizeof(TFromD<D>));
+  ret.v0 = CombineShiftRightBytes<kShrByteAmt>(dh, v.v1, v.v0);
+  ret.v1 = ShiftRightBytes<kShrByteAmt>(dh, v.v1);
+  return ret;
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ PromoteTo
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename TN,
+          HWY_IF_T_SIZE_D(D, sizeof(TN) * 2)>
+HWY_API VFromD<D> PromoteTo(D d, Vec128<TN> v) {
+  const Half<decltype(d)> dh;
+  VFromD<D> ret;
+  // PromoteLowerTo is defined later in generic_ops-inl.h.
+  ret.v0 = PromoteTo(dh, LowerHalf(v));
+  ret.v1 = PromoteUpperTo(dh, v);
+  return ret;
+}
+
+// 4x promotion: 8-bit to 32-bit or 16-bit to 64-bit
+template <class DW, HWY_IF_V_SIZE_D(DW, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(DW, (1 << 4) | (1 << 8)),
+          HWY_IF_NOT_FLOAT_D(DW), typename TN,
+          HWY_IF_T_SIZE_D(DW, sizeof(TN) * 4), HWY_IF_NOT_FLOAT_NOR_SPECIAL(TN)>
+HWY_API Vec256<TFromD<DW>> PromoteTo(DW d, Vec64<TN> v) {
+  const Half<decltype(d)> dh;
+  // 16-bit lanes for UI8->UI32, 32-bit lanes for UI16->UI64
+  const Rebind<MakeWide<TN>, decltype(d)> d2;
+  const auto v_2x = PromoteTo(d2, v);
+  Vec256<TFromD<DW>> ret;
+  // PromoteLowerTo is defined later in generic_ops-inl.h.
+  ret.v0 = PromoteTo(dh, LowerHalf(v_2x));
+  ret.v1 = PromoteUpperTo(dh, v_2x);
+  return ret;
+}
+
+// 8x promotion: 8-bit to 64-bit
+template <class DW, HWY_IF_V_SIZE_D(DW, 32), HWY_IF_T_SIZE_D(DW, 8),
+          HWY_IF_NOT_FLOAT_D(DW), typename TN, HWY_IF_T_SIZE(TN, 1)>
+HWY_API Vec256<TFromD<DW>> PromoteTo(DW d, Vec32<TN> v) {
+  const Half<decltype(d)> dh;
+  const Repartition<MakeWide<MakeWide<TN>>, decltype(dh)> d4;  // 32-bit lanes
+  const auto v32 = PromoteTo(d4, v);
+  Vec256<TFromD<DW>> ret;
+  // PromoteLowerTo is defined later in generic_ops-inl.h.
+  ret.v0 = PromoteTo(dh, LowerHalf(v32));
+  ret.v1 = PromoteUpperTo(dh, v32);
+  return ret;
+}
+
+// ------------------------------ PromoteUpperTo
+
+// Not native, but still define this here because wasm_128 toggles
+// HWY_NATIVE_PROMOTE_UPPER_TO.
+template <class D, class T>
+HWY_API VFromD<D> PromoteUpperTo(D d, Vec256<T> v) {
+  // Lanes(d) may differ from Lanes(DFromV<decltype(v)>()). Use the lane type
+  // from v because it cannot be deduced from D (could be either bf16 or f16).
+  const Rebind<T, decltype(d)> dh;
+  return PromoteTo(d, UpperHalf(dh, v));
+}
+
+// ------------------------------ DemoteTo
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  return Vec128<uint16_t>{wasm_u16x8_narrow_i32x4(v.v0.raw, v.v1.raw)};
+}
+
+template <class D, HWY_IF_I16_D(D)>
+HWY_API Vec128<int16_t> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  return Vec128<int16_t>{wasm_i16x8_narrow_i32x4(v.v0.raw, v.v1.raw)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.v0.raw, v.v1.raw);
+  return Vec64<uint8_t>{wasm_u8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> DemoteTo(D /* tag */, Vec256<int16_t> v) {
+  return Vec128<uint8_t>{wasm_u8x16_narrow_i16x8(v.v0.raw, v.v1.raw)};
+}
+
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec64<int8_t> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.v0.raw, v.v1.raw);
+  return Vec64<int8_t>{wasm_i8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <class D, HWY_IF_I8_D(D)>
+HWY_API Vec128<int8_t> DemoteTo(D /* tag */, Vec256<int16_t> v) {
+  return Vec128<int8_t>{wasm_i8x16_narrow_i16x8(v.v0.raw, v.v1.raw)};
+}
+
+template <class D, HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> DemoteTo(D di, Vec256<double> v) {
+  const Vec64<int32_t> lo{wasm_i32x4_trunc_sat_f64x2_zero(v.v0.raw)};
+  const Vec64<int32_t> hi{wasm_i32x4_trunc_sat_f64x2_zero(v.v1.raw)};
+  return Combine(di, hi, lo);
+}
+
+template <class D, HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> DemoteTo(D d16, Vec256<float> v) {
+  const Half<decltype(d16)> d16h;
+  const Vec64<float16_t> lo = DemoteTo(d16h, v.v0);
+  const Vec64<float16_t> hi = DemoteTo(d16h, v.v1);
+  return Combine(d16, hi, lo);
+}
+
+template <class D, HWY_IF_BF16_D(D)>
+HWY_API Vec128<bfloat16_t> DemoteTo(D dbf16, Vec256<float> v) {
+  const Half<decltype(dbf16)> dbf16h;
+  const Vec64<bfloat16_t> lo = DemoteTo(dbf16h, v.v0);
+  const Vec64<bfloat16_t> hi = DemoteTo(dbf16h, v.v1);
+  return Combine(dbf16, hi, lo);
+}
+
+// For already range-limited input [0, 255].
+HWY_API Vec64<uint8_t> U8FromU32(Vec256<uint32_t> v) {
+  const Full64<uint8_t> du8;
+  const Full256<int32_t> di32;  // no unsigned DemoteTo
+  return DemoteTo(du8, BitCast(di32, v));
+}
+
+// ------------------------------ Truncations
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec32<uint8_t> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  return Vec32<uint8_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 8, 16, 24, 0,
+                                           8, 16, 24, 0, 8, 16, 24, 0, 8, 16,
+                                           24)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec64<uint16_t> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  return Vec64<uint16_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 1, 8, 9, 16,
+                                            17, 24, 25, 0, 1, 8, 9, 16, 17, 24,
+                                            25)};
+}
+
+template <class D, HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  return Vec128<uint32_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 1, 2, 3, 8,
+                                             9, 10, 11, 16, 17, 18, 19, 24, 25,
+                                             26, 27)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec64<uint8_t> TruncateTo(D /* tag */, Vec256<uint32_t> v) {
+  return Vec64<uint8_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 4, 8, 12, 16,
+                                           20, 24, 28, 0, 4, 8, 12, 16, 20, 24,
+                                           28)};
+}
+
+template <class D, HWY_IF_U16_D(D)>
+HWY_API Vec128<uint16_t> TruncateTo(D /* tag */, Vec256<uint32_t> v) {
+  return Vec128<uint16_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 1, 4, 5, 8,
+                                             9, 12, 13, 16, 17, 20, 21, 24, 25,
+                                             28, 29)};
+}
+
+template <class D, HWY_IF_U8_D(D)>
+HWY_API Vec128<uint8_t> TruncateTo(D /* tag */, Vec256<uint16_t> v) {
+  return Vec128<uint8_t>{wasm_i8x16_shuffle(v.v0.raw, v.v1.raw, 0, 2, 4, 6, 8,
+                                            10, 12, 14, 16, 18, 20, 22, 24, 26,
+                                            28, 30)};
+}
+
+// ------------------------------ ReorderDemote2To
+template <class DBF16, HWY_IF_BF16_D(DBF16)>
+HWY_API Vec256<bfloat16_t> ReorderDemote2To(DBF16 dbf16, Vec256<float> a,
+                                            Vec256<float> b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  return BitCast(dbf16, ConcatOdd(du16, BitCast(du16, b), BitCast(du16, a)));
+}
+
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 32),
+          HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<DN>), HWY_IF_SIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  VFromD<DN> demoted;
+  demoted.v0 = DemoteTo(dnh, a);
+  demoted.v1 = DemoteTo(dnh, b);
+  return demoted;
+}
+
+template <class DN, typename V, HWY_IF_V_SIZE_D(DN, 32), HWY_IF_UNSIGNED_D(DN),
+          HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_D(DN, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<DN>) * 2)>
+HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
+  const Half<decltype(dn)> dnh;
+  VFromD<DN> demoted;
+  demoted.v0 = DemoteTo(dnh, a);
+  demoted.v1 = DemoteTo(dnh, b);
+  return demoted;
+}
+
+// ------------------------------ Convert i32 <=> f32 (Round)
+
+template <class DTo, typename TFrom, typename TTo = TFromD<DTo>>
+HWY_API Vec256<TTo> ConvertTo(DTo d, const Vec256<TFrom> v) {
+  const Half<decltype(d)> dh;
+  Vec256<TTo> ret;
+  ret.v0 = ConvertTo(dh, v.v0);
+  ret.v1 = ConvertTo(dh, v.v1);
+  return ret;
+}
+
+HWY_API Vec256<int32_t> NearestInt(const Vec256<float> v) {
+  return ConvertTo(Full256<int32_t>(), Round(v));
+}
+
+// ================================================== MISC
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  const Half<decltype(d)> dh;
+  MFromD<D> ret;
+  ret.m0 = LoadMaskBits(dh, bits);
+  // If size=4, one 128-bit vector has 4 mask bits; otherwise 2 for size=8.
+  // Both halves fit in one byte's worth of mask bits.
+  constexpr size_t kBitsPerHalf = 16 / sizeof(TFromD<D>);
+  const uint8_t bits_upper[8] = {static_cast<uint8_t>(bits[0] >> kBitsPerHalf)};
+  ret.m1 = LoadMaskBits(dh, bits_upper);
+  return ret;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  const Half<decltype(d)> dh;
+  MFromD<D> ret;
+  ret.m0 = LoadMaskBits(dh, bits);
+  constexpr size_t kLanesPerHalf = 16 / sizeof(TFromD<D>);
+  constexpr size_t kBytesPerHalf = kLanesPerHalf / 8;
+  static_assert(kBytesPerHalf != 0, "Lane size <= 16 bits => at least 8 lanes");
+  ret.m1 = LoadMaskBits(dh, bits + kBytesPerHalf);
+  return ret;
+}
+
+// ------------------------------ Mask
+
+// `p` points to at least 8 writable bytes.
+template <class D, typename T = TFromD<D>,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 8))>
+HWY_API size_t StoreMaskBits(D d, const Mask256<T> mask, uint8_t* bits) {
+  const Half<decltype(d)> dh;
+  StoreMaskBits(dh, mask.m0, bits);
+  const uint8_t lo = bits[0];
+  StoreMaskBits(dh, mask.m1, bits);
+  // If size=4, one 128-bit vector has 4 mask bits; otherwise 2 for size=8.
+  // Both halves fit in one byte's worth of mask bits.
+  constexpr size_t kBitsPerHalf = 16 / sizeof(T);
+  bits[0] = static_cast<uint8_t>(lo | (bits[0] << kBitsPerHalf));
+  return (kBitsPerHalf * 2 + 7) / 8;
+}
+
+template <class D, typename T = TFromD<D>,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API size_t StoreMaskBits(D d, const Mask256<T> mask, uint8_t* bits) {
+  const Half<decltype(d)> dh;
+  constexpr size_t kLanesPerHalf = 16 / sizeof(T);
+  constexpr size_t kBytesPerHalf = kLanesPerHalf / 8;
+  static_assert(kBytesPerHalf != 0, "Lane size <= 16 bits => at least 8 lanes");
+  StoreMaskBits(dh, mask.m0, bits);
+  StoreMaskBits(dh, mask.m1, bits + kBytesPerHalf);
+  return kBytesPerHalf * 2;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CountTrue(D d, const Mask256<T> m) {
+  const Half<decltype(d)> dh;
+  return CountTrue(dh, m.m0) + CountTrue(dh, m.m1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllFalse(D d, const Mask256<T> m) {
+  const Half<decltype(d)> dh;
+  return AllFalse(dh, m.m0) && AllFalse(dh, m.m1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API bool AllTrue(D d, const Mask256<T> m) {
+  const Half<decltype(d)> dh;
+  return AllTrue(dh, m.m0) && AllTrue(dh, m.m1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownFirstTrue(D d, const Mask256<T> mask) {
+  const Half<decltype(d)> dh;
+  const intptr_t lo = FindFirstTrue(dh, mask.m0);  // not known
+  constexpr size_t kLanesPerHalf = 16 / sizeof(T);
+  return lo >= 0 ? static_cast<size_t>(lo)
+                 : kLanesPerHalf + FindKnownFirstTrue(dh, mask.m1);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindFirstTrue(D d, const Mask256<T> mask) {
+  const Half<decltype(d)> dh;
+  const intptr_t lo = FindFirstTrue(dh, mask.m0);
+  constexpr int kLanesPerHalf = 16 / sizeof(T);
+  if (lo >= 0) return lo;
+
+  const intptr_t hi = FindFirstTrue(dh, mask.m1);
+  return hi + (hi >= 0 ? kLanesPerHalf : 0);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t FindKnownLastTrue(D d, const Mask256<T> mask) {
+  const Half<decltype(d)> dh;
+  const intptr_t hi = FindLastTrue(dh, mask.m1);  // not known
+  constexpr size_t kLanesPerHalf = 16 / sizeof(T);
+  return hi >= 0 ? kLanesPerHalf + static_cast<size_t>(hi)
+                 : FindKnownLastTrue(dh, mask.m0);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API intptr_t FindLastTrue(D d, const Mask256<T> mask) {
+  const Half<decltype(d)> dh;
+  constexpr int kLanesPerHalf = 16 / sizeof(T);
+  const intptr_t hi = FindLastTrue(dh, mask.m1);
+  return hi >= 0 ? kLanesPerHalf + hi : FindLastTrue(dh, mask.m0);
+}
+
+// ------------------------------ CompressStore
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressStore(Vec256<T> v, const Mask256<T> mask, D d,
+                             T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  const size_t count = CompressStore(v.v0, mask.m0, dh, unaligned);
+  const size_t count2 = CompressStore(v.v1, mask.m1, dh, unaligned + count);
+  return count + count2;
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressBlendedStore(Vec256<T> v, const Mask256<T> m, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  const Half<decltype(d)> dh;
+  const size_t count = CompressBlendedStore(v.v0, m.m0, dh, unaligned);
+  const size_t count2 = CompressBlendedStore(v.v1, m.m1, dh, unaligned + count);
+  return count + count2;
+}
+
+// ------------------------------ CompressBitsStore
+
+template <class D, typename T = TFromD<D>>
+HWY_API size_t CompressBitsStore(Vec256<T> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, T* HWY_RESTRICT unaligned) {
+  const Mask256<T> m = LoadMaskBits(d, bits);
+  return CompressStore(v, m, d, unaligned);
+}
+
+// ------------------------------ Compress
+template <typename T>
+HWY_API Vec256<T> Compress(const Vec256<T> v, const Mask256<T> mask) {
+  const DFromV<decltype(v)> d;
+  alignas(32) T lanes[32 / sizeof(T)] = {};
+  (void)CompressStore(v, mask, d, lanes);
+  return Load(d, lanes);
+}
+
+// ------------------------------ CompressNot
+template <typename T>
+HWY_API Vec256<T> CompressNot(Vec256<T> v, const Mask256<T> mask) {
+  return Compress(v, Not(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec256<uint64_t> CompressBlocksNot(Vec256<uint64_t> v,
+                                           Mask256<uint64_t> mask) {
+  const Full128<uint64_t> dh;
+  // Because the non-selected (mask=1) blocks are undefined, we can return the
+  // input unless mask = 01, in which case we must bring down the upper block.
+  return AllTrue(dh, AndNot(mask.m1, mask.m0)) ? SwapAdjacentBlocks(v) : v;
+}
+
+// ------------------------------ CompressBits
+template <typename T>
+HWY_API Vec256<T> CompressBits(Vec256<T> v, const uint8_t* HWY_RESTRICT bits) {
+  const Mask256<T> m = LoadMaskBits(DFromV<decltype(v)>(), bits);
+  return Compress(v, m);
+}
+
+// ------------------------------ Expand
+template <typename T>
+HWY_API Vec256<T> Expand(const Vec256<T> v, const Mask256<T> mask) {
+  Vec256<T> ret;
+  const Full256<T> d;
+  const Half<decltype(d)> dh;
+  alignas(32) T lanes[32 / sizeof(T)] = {};
+  Store(v, d, lanes);
+  ret.v0 = Expand(v.v0, mask.m0);
+  ret.v1 = Expand(LoadU(dh, lanes + CountTrue(dh, mask.m0)), mask.m1);
+  return ret;
+}
+
+// ------------------------------ LoadExpand
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  return Expand(LoadU(d, unaligned), mask);
+}
+
+// ------------------------------ LoadInterleaved3/4
+
+// Implemented in generic_ops, we just overload LoadTransposedBlocks3/4.
+
+namespace detail {
+
+// Input:
+// 1 0 (<- first block of unaligned)
+// 3 2
+// 5 4
+// Output:
+// 3 0
+// 4 1
+// 5 2
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadTransposedBlocks3(D d, const T* HWY_RESTRICT unaligned,
+                                   Vec256<T>& A, Vec256<T>& B, Vec256<T>& C) {
+  const Vec256<T> v10 = LoadU(d, unaligned + 0 * MaxLanes(d));
+  const Vec256<T> v32 = LoadU(d, unaligned + 1 * MaxLanes(d));
+  const Vec256<T> v54 = LoadU(d, unaligned + 2 * MaxLanes(d));
+
+  A = ConcatUpperLower(d, v32, v10);
+  B = ConcatLowerUpper(d, v54, v10);
+  C = ConcatUpperLower(d, v54, v32);
+}
+
+// Input (128-bit blocks):
+// 1 0 (first block of unaligned)
+// 3 2
+// 5 4
+// 7 6
+// Output:
+// 4 0 (LSB of A)
+// 5 1
+// 6 2
+// 7 3
+template <class D, typename T = TFromD<D>>
+HWY_API void LoadTransposedBlocks4(D d, const T* HWY_RESTRICT unaligned,
+                                   Vec256<T>& vA, Vec256<T>& vB, Vec256<T>& vC,
+                                   Vec256<T>& vD) {
+  const Vec256<T> v10 = LoadU(d, unaligned + 0 * MaxLanes(d));
+  const Vec256<T> v32 = LoadU(d, unaligned + 1 * MaxLanes(d));
+  const Vec256<T> v54 = LoadU(d, unaligned + 2 * MaxLanes(d));
+  const Vec256<T> v76 = LoadU(d, unaligned + 3 * MaxLanes(d));
+
+  vA = ConcatLowerLower(d, v54, v10);
+  vB = ConcatUpperUpper(d, v54, v10);
+  vC = ConcatLowerLower(d, v76, v32);
+  vD = ConcatUpperUpper(d, v76, v32);
+}
+
+}  // namespace detail
+
+// ------------------------------ StoreInterleaved2/3/4 (ConcatUpperLower)
+
+// Implemented in generic_ops, we just overload StoreTransposedBlocks2/3/4.
+
+namespace detail {
+
+// Input (128-bit blocks):
+// 2 0 (LSB of i)
+// 3 1
+// Output:
+// 1 0
+// 3 2
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreTransposedBlocks2(Vec256<T> i, Vec256<T> j, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  const Vec256<T> out0 = ConcatLowerLower(d, j, i);
+  const Vec256<T> out1 = ConcatUpperUpper(d, j, i);
+  StoreU(out0, d, unaligned + 0 * MaxLanes(d));
+  StoreU(out1, d, unaligned + 1 * MaxLanes(d));
+}
+
+// Input (128-bit blocks):
+// 3 0 (LSB of i)
+// 4 1
+// 5 2
+// Output:
+// 1 0
+// 3 2
+// 5 4
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreTransposedBlocks3(Vec256<T> i, Vec256<T> j, Vec256<T> k, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  const Vec256<T> out0 = ConcatLowerLower(d, j, i);
+  const Vec256<T> out1 = ConcatUpperLower(d, i, k);
+  const Vec256<T> out2 = ConcatUpperUpper(d, k, j);
+  StoreU(out0, d, unaligned + 0 * MaxLanes(d));
+  StoreU(out1, d, unaligned + 1 * MaxLanes(d));
+  StoreU(out2, d, unaligned + 2 * MaxLanes(d));
+}
+
+// Input (128-bit blocks):
+// 4 0 (LSB of i)
+// 5 1
+// 6 2
+// 7 3
+// Output:
+// 1 0
+// 3 2
+// 5 4
+// 7 6
+template <class D, typename T = TFromD<D>>
+HWY_API void StoreTransposedBlocks4(Vec256<T> i, Vec256<T> j, Vec256<T> k,
+                                    Vec256<T> l, D d,
+                                    T* HWY_RESTRICT unaligned) {
+  // Write lower halves, then upper.
+  const Vec256<T> out0 = ConcatLowerLower(d, j, i);
+  const Vec256<T> out1 = ConcatLowerLower(d, l, k);
+  StoreU(out0, d, unaligned + 0 * MaxLanes(d));
+  StoreU(out1, d, unaligned + 1 * MaxLanes(d));
+  const Vec256<T> out2 = ConcatUpperUpper(d, j, i);
+  const Vec256<T> out3 = ConcatUpperUpper(d, l, k);
+  StoreU(out2, d, unaligned + 2 * MaxLanes(d));
+  StoreU(out3, d, unaligned + 3 * MaxLanes(d));
+}
+
+}  // namespace detail
+
+// ------------------------------ Additional mask logical operations
+
+template <class T>
+HWY_API Mask256<T> SetAtOrAfterFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  const Half<decltype(d)> dh;
+  const Repartition<int64_t, decltype(dh)> dh_i64;
+
+  Mask256<T> result;
+  result.m0 = SetAtOrAfterFirst(mask.m0);
+  result.m1 = SetAtOrAfterFirst(mask.m1);
+
+  // Copy the sign bit of the lower 128-bit half to the upper 128-bit half
+  const auto vmask_lo = BitCast(dh_i64, VecFromMask(dh, result.m0));
+  result.m1 =
+      Or(result.m1, MaskFromVec(BitCast(dh, BroadcastSignBit(InterleaveUpper(
+                                                dh_i64, vmask_lo, vmask_lo)))));
+
+  return result;
+}
+
+template <class T>
+HWY_API Mask256<T> SetBeforeFirst(Mask256<T> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask256<T> SetOnlyFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int64_t, decltype(d)> di64;
+  const Half<decltype(di64)> dh_i64;
+
+  const auto zero = Zero(di64);
+  const auto vmask = BitCast(di64, VecFromMask(d, mask));
+
+  const auto vmask_eq_0 = VecFromMask(di64, vmask == zero);
+  auto vmask2_lo = LowerHalf(dh_i64, vmask_eq_0);
+  auto vmask2_hi = UpperHalf(dh_i64, vmask_eq_0);
+
+  vmask2_lo = And(vmask2_lo, InterleaveLower(vmask2_lo, vmask2_lo));
+  vmask2_hi = And(ConcatLowerUpper(dh_i64, vmask2_hi, vmask2_lo),
+                  InterleaveUpper(dh_i64, vmask2_lo, vmask2_lo));
+  vmask2_lo = InterleaveLower(Set(dh_i64, int64_t{-1}), vmask2_lo);
+
+  const auto vmask2 = Combine(di64, vmask2_hi, vmask2_lo);
+  const auto only_first_vmask = Neg(BitCast(di, And(vmask, Neg(vmask))));
+  return MaskFromVec(BitCast(d, And(only_first_vmask, BitCast(di, vmask2))));
+}
+
+template <class T>
+HWY_API Mask256<T> SetAtOrBeforeFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  constexpr size_t kLanesPerBlock = MaxLanes(d) / 2;
+
+  const auto vmask = VecFromMask(d, mask);
+  const auto vmask_lo = ConcatLowerLower(d, vmask, Zero(d));
+  return SetBeforeFirst(
+      MaskFromVec(CombineShiftRightBytes<(kLanesPerBlock - 1) * sizeof(T)>(
+          d, vmask, vmask_lo)));
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+template <class D32, typename T16, typename T32 = TFromD<D32>>
+HWY_API Vec256<T32> WidenMulPairwiseAdd(D32 d32, Vec256<T16> a, Vec256<T16> b) {
+  const Half<decltype(d32)> d32h;
+  Vec256<T32> result;
+  result.v0 = WidenMulPairwiseAdd(d32h, a.v0, b.v0);
+  result.v1 = WidenMulPairwiseAdd(d32h, a.v1, b.v1);
+  return result;
+}
+
+// ------------------------------ ReorderWidenMulAccumulate
+template <class D32, typename T16, typename T32 = TFromD<D32>>
+HWY_API Vec256<T32> ReorderWidenMulAccumulate(D32 d32, Vec256<T16> a,
+                                              Vec256<T16> b, Vec256<T32> sum0,
+                                              Vec256<T32>& sum1) {
+  const Half<decltype(d32)> d32h;
+  sum0.v0 = ReorderWidenMulAccumulate(d32h, a.v0, b.v0, sum0.v0, sum1.v0);
+  sum0.v1 = ReorderWidenMulAccumulate(d32h, a.v1, b.v1, sum0.v1, sum1.v1);
+  return sum0;
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <typename TW>
+HWY_API Vec256<TW> RearrangeToOddPlusEven(Vec256<TW> sum0, Vec256<TW> sum1) {
+  sum0.v0 = RearrangeToOddPlusEven(sum0.v0, sum1.v0);
+  sum0.v1 = RearrangeToOddPlusEven(sum0.v1, sum1.v1);
+  return sum0;
+}
+
+// ------------------------------ Reductions
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> SumOfLanes(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  const Vec128<T> lo = SumOfLanes(dh, Add(v.v0, v.v1));
+  return Combine(d, lo, lo);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API T ReduceSum(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  return ReduceSum(dh, Add(v.v0, v.v1));
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> MinOfLanes(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  const Vec128<T> lo = MinOfLanes(dh, Min(v.v0, v.v1));
+  return Combine(d, lo, lo);
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_API Vec256<T> MaxOfLanes(D d, const Vec256<T> v) {
+  const Half<decltype(d)> dh;
+  const Vec128<T> lo = MaxOfLanes(dh, Max(v.v0, v.v1));
+  return Combine(d, lo, lo);
+}
+
+// ------------------------------ Lt128
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Lt128(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Lt128(dh, a.v0, b.v0);
+  ret.m1 = Lt128(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Lt128Upper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Lt128Upper(dh, a.v0, b.v0);
+  ret.m1 = Lt128Upper(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Eq128(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Eq128(dh, a.v0, b.v0);
+  ret.m1 = Eq128(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Eq128Upper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Eq128Upper(dh, a.v0, b.v0);
+  ret.m1 = Eq128Upper(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Ne128(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Ne128(dh, a.v0, b.v0);
+  ret.m1 = Ne128(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Mask256<T> Ne128Upper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Mask256<T> ret;
+  ret.m0 = Ne128Upper(dh, a.v0, b.v0);
+  ret.m1 = Ne128Upper(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Vec256<T> Min128(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Min128(dh, a.v0, b.v0);
+  ret.v1 = Min128(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Vec256<T> Max128(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Max128(dh, a.v0, b.v0);
+  ret.v1 = Max128(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Vec256<T> Min128Upper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Min128Upper(dh, a.v0, b.v0);
+  ret.v1 = Min128Upper(dh, a.v1, b.v1);
+  return ret;
+}
+
+template <class D, typename T = TFromD<D>>
+HWY_INLINE Vec256<T> Max128Upper(D d, Vec256<T> a, Vec256<T> b) {
+  const Half<decltype(d)> dh;
+  Vec256<T> ret;
+  ret.v0 = Max128Upper(dh, a.v0, b.v0);
+  ret.v1 = Max128Upper(dh, a.v1, b.v1);
+  return ret;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_128-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_128-inl.h
new file mode 100644
index 0000000000..fd944ffef2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_128-inl.h
@@ -0,0 +1,10756 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit vectors and SSE4 instructions, plus some AVX2 and AVX512-VL
+// operations when compiling for those targets.
+// External include guard in highway.h - see comment there.
+
+// Must come before HWY_DIAGNOSTICS and HWY_COMPILER_GCC_ACTUAL
+#include "hwy/base.h"
+
+// Avoid uninitialized warnings in GCC's emmintrin.h - see
+// https://github.com/google/highway/issues/710 and pull/902
+HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_GCC_ACTUAL
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+HWY_DIAGNOSTICS_OFF(disable : 4701 4703 6001 26494,
+                    ignored "-Wmaybe-uninitialized")
+#endif
+
+#include <emmintrin.h>
+#include <stdio.h>
+#if HWY_TARGET == HWY_SSSE3
+#include <tmmintrin.h>  // SSSE3
+#elif HWY_TARGET <= HWY_SSE4
+#include <smmintrin.h>  // SSE4
+#ifndef HWY_DISABLE_PCLMUL_AES
+#include <wmmintrin.h>  // CLMUL
+#endif
+#endif
+
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+template <typename T>
+struct Raw128 {
+  using type = __m128i;
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct Raw128<float16_t> {
+  using type = __m128h;
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct Raw128<float> {
+  using type = __m128;
+};
+template <>
+struct Raw128<double> {
+  using type = __m128d;
+};
+
+}  // namespace detail
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename detail::Raw128<T>::type;
+
+ public:
+  using PrivateT = T;                     // only for DFromV
+  static constexpr size_t kPrivateN = N;  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T>
+using Vec64 = Vec128<T, 8 / sizeof(T)>;
+
+template <typename T>
+using Vec32 = Vec128<T, 4 / sizeof(T)>;
+
+template <typename T>
+using Vec16 = Vec128<T, 2 / sizeof(T)>;
+
+#if HWY_TARGET <= HWY_AVX3
+
+namespace detail {
+
+// Template arg: sizeof(lane type)
+template <size_t size>
+struct RawMask128 {};
+template <>
+struct RawMask128<1> {
+  using type = __mmask16;
+};
+template <>
+struct RawMask128<2> {
+  using type = __mmask8;
+};
+template <>
+struct RawMask128<4> {
+  using type = __mmask8;
+};
+template <>
+struct RawMask128<8> {
+  using type = __mmask8;
+};
+
+}  // namespace detail
+
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Mask128 {
+  using Raw = typename detail::RawMask128<sizeof(T)>::type;
+
+  static Mask128<T, N> FromBits(uint64_t mask_bits) {
+    return Mask128<T, N>{static_cast<Raw>(mask_bits)};
+  }
+
+  Raw raw;
+};
+
+#else  // AVX2 or below
+
+// FF..FF or 0.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Mask128 {
+  typename detail::Raw128<T>::type raw;
+};
+
+#endif  // AVX2 or below
+
+namespace detail {
+
+// Returns the lowest N of the _mm_movemask* bits.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t mask_bits) {
+  return ((N * sizeof(T)) == 16) ? mask_bits : mask_bits & ((1ull << N) - 1);
+}
+
+}  // namespace detail
+
+#if HWY_TARGET <= HWY_AVX3
+namespace detail {
+
+// Used by Expand() emulation, which is required for both AVX3 and AVX2.
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(mask.raw);
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <class V>
+using DFromV = Simd<typename V::PrivateT, V::kPrivateN, 0>;
+
+template <class V>
+using TFromV = typename V::PrivateT;
+
+// ------------------------------ Zero
+
+// Use HWY_MAX_LANES_D here because VFromD is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<TFromD<D>, HWY_MAX_LANES_D(D)>{_mm_setzero_si128()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_BF16_D(D)>
+HWY_API Vec128<bfloat16_t, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<bfloat16_t, HWY_MAX_LANES_D(D)>{_mm_setzero_si128()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return Vec128<float16_t, HWY_MAX_LANES_D(D)>{_mm_setzero_ph()};
+#else
+  return Vec128<float16_t, HWY_MAX_LANES_D(D)>{_mm_setzero_si128()};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<float, HWY_MAX_LANES_D(D)>{_mm_setzero_ps()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double, HWY_MAX_LANES_D(D)> Zero(D /* tag */) {
+  return Vec128<double, HWY_MAX_LANES_D(D)>{_mm_setzero_pd()};
+}
+
+// Using the existing Zero function instead of a dedicated function for
+// deduction avoids having to forward-declare Vec256 here.
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Tuple (VFromD)
+#include "hwy/ops/tuple-inl.h"
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_INLINE __m128i BitCastToInteger(__m128i v) { return v; }
+#if HWY_HAVE_FLOAT16
+HWY_INLINE __m128i BitCastToInteger(__m128h v) { return _mm_castph_si128(v); }
+#endif  // HWY_HAVE_FLOAT16
+HWY_INLINE __m128i BitCastToInteger(__m128 v) { return _mm_castps_si128(v); }
+HWY_INLINE __m128i BitCastToInteger(__m128d v) { return _mm_castpd_si128(v); }
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<uint8_t, N * sizeof(T)> BitCastToByte(Vec128<T, N> v) {
+  return Vec128<uint8_t, N * sizeof(T)>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger128 {
+  HWY_INLINE __m128i operator()(__m128i v) { return v; }
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger128<float16_t> {
+  HWY_INLINE __m128h operator()(__m128i v) { return _mm_castsi128_ph(v); }
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger128<float> {
+  HWY_INLINE __m128 operator()(__m128i v) { return _mm_castsi128_ps(v); }
+};
+template <>
+struct BitCastFromInteger128<double> {
+  HWY_INLINE __m128d operator()(__m128i v) { return _mm_castsi128_pd(v); }
+};
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */,
+                                     Vec128<uint8_t, D().MaxBytes()> v) {
+  return VFromD<D>{BitCastFromInteger128<TFromD<D>>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <class D, typename FromT, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> BitCast(D d,
+                          Vec128<FromT, Repartition<FromT, D>().MaxLanes()> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI16_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm_set1_epi32(static_cast<int>(t))};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, float16_t t) {
+  return VFromD<D>{_mm_set1_ph(t)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, float t) {
+  return VFromD<D>{_mm_set1_ps(t)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, double t) {
+  return VFromD<D>{_mm_set1_pd(t)};
+}
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_SPECIAL_FLOAT_D(D)>
+HWY_API VFromD<D> Set(D df, TFromD<D> t) {
+  const RebindToUnsigned<decltype(df)> du;
+  static_assert(sizeof(TFromD<D>) == 2, "Expecting [b]f16");
+  uint16_t bits;
+  CopyBytes<2>(&t, &bits);
+  return BitCast(df, Set(du, bits));
+}
+
+// ------------------------------ Undefined
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return VFromD<D>{_mm_undefined_si128()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+  return VFromD<D>{_mm_undefined_si128()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return VFromD<D>{_mm_undefined_ph()};
+#else
+  return VFromD<D>{_mm_undefined_si128()};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+  return VFromD<D>{_mm_undefined_ps()};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+  return VFromD<D>{_mm_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ GetLane
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API T GetLane(const Vec128<T, N> v) {
+  return static_cast<T>(_mm_cvtsi128_si32(v.raw) & 0xFF);
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API T GetLane(const Vec128<T, N> v) {
+  return static_cast<T>(_mm_cvtsi128_si32(v.raw) & 0xFFFF);
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API T GetLane(const Vec128<T, N> v) {
+  return static_cast<T>(_mm_cvtsi128_si32(v.raw));
+}
+template <size_t N>
+HWY_API float GetLane(const Vec128<float, N> v) {
+  return _mm_cvtss_f32(v.raw);
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API T GetLane(const Vec128<T, N> v) {
+#if HWY_ARCH_X86_32
+  const DFromV<decltype(v)> d;
+  alignas(16) T lanes[2];
+  Store(v, d, lanes);
+  return lanes[0];
+#else
+  return static_cast<T>(_mm_cvtsi128_si64(v.raw));
+#endif
+}
+template <size_t N>
+HWY_API double GetLane(const Vec128<double, N> v) {
+  return _mm_cvtsd_f64(v.raw);
+}
+
+// ------------------------------ ResizeBitCast
+
+template <class D, class FromV, HWY_IF_V_SIZE_LE_V(FromV, 16),
+          HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, VFromD<decltype(du8)>{detail::BitCastToInteger(v.raw)});
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ And
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm_and_si128(a.raw, b.raw)});
+}
+template <size_t N>
+HWY_API Vec128<float, N> And(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_and_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> And(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> not_mask, Vec128<T, N> mask) {
+  const DFromV<decltype(mask)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm_andnot_si128(not_mask.raw, mask.raw)});
+}
+template <size_t N>
+HWY_API Vec128<float, N> AndNot(Vec128<float, N> not_mask,
+                                Vec128<float, N> mask) {
+  return Vec128<float, N>{_mm_andnot_ps(not_mask.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> AndNot(Vec128<double, N> not_mask,
+                                 Vec128<double, N> mask) {
+  return Vec128<double, N>{_mm_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm_or_si128(a.raw, b.raw)});
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Or(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_or_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Or(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm_xor_si128(a.raw, b.raw)});
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Xor(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_xor_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Xor(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Not
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+#if HWY_TARGET <= HWY_AVX3
+  const __m128i vu = BitCast(du, v).raw;
+  return BitCast(d, VU{_mm_ternarylogic_epi32(vu, vu, vu, 0x55)});
+#else
+  return Xor(v, BitCast(d, VU{_mm_set1_epi32(-1)}));
+#endif
+}
+
+// ------------------------------ Xor3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor3(Vec128<T, N> x1, Vec128<T, N> x2, Vec128<T, N> x3) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(x1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m128i ret = _mm_ternarylogic_epi64(
+      BitCast(du, x1).raw, BitCast(du, x2).raw, BitCast(du, x3).raw, 0x96);
+  return BitCast(d, VU{ret});
+#else
+  return Xor(x1, Xor(x2, x3));
+#endif
+}
+
+// ------------------------------ Or3
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or3(Vec128<T, N> o1, Vec128<T, N> o2, Vec128<T, N> o3) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(o1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m128i ret = _mm_ternarylogic_epi64(
+      BitCast(du, o1).raw, BitCast(du, o2).raw, BitCast(du, o3).raw, 0xFE);
+  return BitCast(d, VU{ret});
+#else
+  return Or(o1, Or(o2, o3));
+#endif
+}
+
+// ------------------------------ OrAnd
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OrAnd(Vec128<T, N> o, Vec128<T, N> a1, Vec128<T, N> a2) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(o)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m128i ret = _mm_ternarylogic_epi64(
+      BitCast(du, o).raw, BitCast(du, a1).raw, BitCast(du, a2).raw, 0xF8);
+  return BitCast(d, VU{ret});
+#else
+  return Or(o, And(a1, a2));
+#endif
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfVecThenElse(Vec128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(no)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(
+      d, VU{_mm_ternarylogic_epi64(BitCast(du, mask).raw, BitCast(du, yes).raw,
+                                   BitCast(du, no).raw, 0xCA)});
+#else
+  return IfThenElse(MaskFromVec(mask), yes, no);
+#endif
+}
+
+// ------------------------------ BitwiseIfThenElse
+#if HWY_TARGET <= HWY_AVX3
+
+#ifdef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#undef HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#else
+#define HWY_NATIVE_BITWISE_IF_THEN_ELSE
+#endif
+
+template <class V>
+HWY_API V BitwiseIfThenElse(V mask, V yes, V no) {
+  return IfVecThenElse(mask, yes, no);
+}
+
+#endif
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ PopulationCount
+
+// 8/16 require BITALG, 32/64 require VPOPCNTDQ.
+#if HWY_TARGET <= HWY_AVX3_DL
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<1> /* tag */,
+                                        Vec128<T, N> v) {
+  return Vec128<T, N>{_mm_popcnt_epi8(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<2> /* tag */,
+                                        Vec128<T, N> v) {
+  return Vec128<T, N>{_mm_popcnt_epi16(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<4> /* tag */,
+                                        Vec128<T, N> v) {
+  return Vec128<T, N>{_mm_popcnt_epi32(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> PopulationCount(hwy::SizeTag<8> /* tag */,
+                                        Vec128<T, N> v) {
+  return Vec128<T, N>{_mm_popcnt_epi64(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> PopulationCount(Vec128<T, N> v) {
+  return detail::PopulationCount(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ================================================== SIGN
+
+// ------------------------------ Neg
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Neg(hwy::FloatTag /*tag*/, const Vec128<T, N> v) {
+  return Xor(v, SignBit(DFromV<decltype(v)>()));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Neg(hwy::SpecialTag /*tag*/, const Vec128<T, N> v) {
+  return Xor(v, SignBit(DFromV<decltype(v)>()));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Neg(hwy::SignedTag /*tag*/, const Vec128<T, N> v) {
+  return Zero(DFromV<decltype(v)>()) - v;
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return detail::Neg(hwy::TypeTag<T>(), v);
+}
+
+// ------------------------------ Floating-point Abs
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Abs(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+  return v & BitCast(d, Set(di, static_cast<TI>(~SignMask<TI>())));
+}
+
+// ------------------------------ CopySign
+// Generic for all vector lengths.
+template <class V>
+HWY_API V CopySign(const V magn, const V sign) {
+  static_assert(IsFloat<TFromV<V>>(), "Only makes sense for floating-point");
+
+  const DFromV<decltype(magn)> d;
+  const auto msb = SignBit(d);
+
+  // Truth table for msb, magn, sign | bitwise msb ? sign : mag
+  //                  0    0     0   |  0
+  //                  0    0     1   |  0
+  //                  0    1     0   |  1
+  //                  0    1     1   |  1
+  //                  1    0     0   |  0
+  //                  1    0     1   |  1
+  //                  1    1     0   |  0
+  //                  1    1     1   |  1
+  return BitwiseIfThenElse(msb, sign, magn);
+}
+
+// ------------------------------ CopySignToAbs
+// Generic for all vector lengths.
+template <class V>
+HWY_API V CopySignToAbs(const V abs, const V sign) {
+  const DFromV<decltype(abs)> d;
+  return OrAnd(abs, SignBit(d), sign);
+}
+
+// ================================================== MASK
+
+#if HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ IfThenElse
+
+// Returns mask ? b : a.
+
+namespace detail {
+
+// Templates for signed/unsigned integer of a particular size.
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElse(hwy::SizeTag<1> /* tag */,
+                                   Mask128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_blend_epi8(mask.raw, no.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElse(hwy::SizeTag<2> /* tag */,
+                                   Mask128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_blend_epi16(mask.raw, no.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElse(hwy::SizeTag<4> /* tag */,
+                                   Mask128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_blend_epi32(mask.raw, no.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElse(hwy::SizeTag<8> /* tag */,
+                                   Mask128<T, N> mask, Vec128<T, N> yes,
+                                   Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_blend_epi64(mask.raw, no.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return detail::IfThenElse(hwy::SizeTag<sizeof(T)>(), mask, yes, no);
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> IfThenElse(Mask128<float16_t, N> mask,
+                                        Vec128<float16_t, N> yes,
+                                        Vec128<float16_t, N> no) {
+  return Vec128<float16_t, N>{_mm_mask_blend_ph(mask.raw, no.raw, yes.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <size_t N>
+HWY_API Vec128<float, N> IfThenElse(Mask128<float, N> mask,
+                                    Vec128<float, N> yes, Vec128<float, N> no) {
+  return Vec128<float, N>{_mm_mask_blend_ps(mask.raw, no.raw, yes.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> IfThenElse(Mask128<double, N> mask,
+                                     Vec128<double, N> yes,
+                                     Vec128<double, N> no) {
+  return Vec128<double, N>{_mm_mask_blend_pd(mask.raw, no.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElseZero(hwy::SizeTag<1> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> yes) {
+  return Vec128<T, N>{_mm_maskz_mov_epi8(mask.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElseZero(hwy::SizeTag<2> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> yes) {
+  return Vec128<T, N>{_mm_maskz_mov_epi16(mask.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElseZero(hwy::SizeTag<4> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> yes) {
+  return Vec128<T, N>{_mm_maskz_mov_epi32(mask.raw, yes.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElseZero(hwy::SizeTag<8> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> yes) {
+  return Vec128<T, N>{_mm_maskz_mov_epi64(mask.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return detail::IfThenElseZero(hwy::SizeTag<sizeof(T)>(), mask, yes);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> IfThenElseZero(Mask128<float, N> mask,
+                                        Vec128<float, N> yes) {
+  return Vec128<float, N>{_mm_maskz_mov_ps(mask.raw, yes.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> IfThenElseZero(Mask128<double, N> mask,
+                                         Vec128<double, N> yes) {
+  return Vec128<double, N>{_mm_maskz_mov_pd(mask.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenZeroElse(hwy::SizeTag<1> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> no) {
+  // xor_epi8/16 are missing, but we have sub, which is just as fast for u8/16.
+  return Vec128<T, N>{_mm_mask_sub_epi8(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenZeroElse(hwy::SizeTag<2> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_sub_epi16(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenZeroElse(hwy::SizeTag<4> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_xor_epi32(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenZeroElse(hwy::SizeTag<8> /* tag */,
+                                       Mask128<T, N> mask, Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_mask_xor_epi64(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return detail::IfThenZeroElse(hwy::SizeTag<sizeof(T)>(), mask, no);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> IfThenZeroElse(Mask128<float, N> mask,
+                                        Vec128<float, N> no) {
+  return Vec128<float, N>{_mm_mask_xor_ps(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> IfThenZeroElse(Mask128<double, N> mask,
+                                         Vec128<double, N> no) {
+  return Vec128<double, N>{_mm_mask_xor_pd(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+// ------------------------------ Mask logical
+
+// For Clang and GCC, mask intrinsics (KORTEST) weren't added until recently.
+#if !defined(HWY_COMPILER_HAS_MASK_INTRINSICS)
+#if HWY_COMPILER_MSVC != 0 || HWY_COMPILER_GCC_ACTUAL >= 700 || \
+    HWY_COMPILER_CLANG >= 800
+#define HWY_COMPILER_HAS_MASK_INTRINSICS 1
+#else
+#define HWY_COMPILER_HAS_MASK_INTRINSICS 0
+#endif
+#endif  // HWY_COMPILER_HAS_MASK_INTRINSICS
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> And(hwy::SizeTag<1> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kand_mask16(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask16>(a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> And(hwy::SizeTag<2> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> And(hwy::SizeTag<4> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> And(hwy::SizeTag<8> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw & b.raw)};
+#endif
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> AndNot(hwy::SizeTag<1> /*tag*/, const Mask128<T, N> a,
+                                const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kandn_mask16(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask16>(~a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> AndNot(hwy::SizeTag<2> /*tag*/, const Mask128<T, N> a,
+                                const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> AndNot(hwy::SizeTag<4> /*tag*/, const Mask128<T, N> a,
+                                const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~a.raw & b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> AndNot(hwy::SizeTag<8> /*tag*/, const Mask128<T, N> a,
+                                const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~a.raw & b.raw)};
+#endif
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Or(hwy::SizeTag<1> /*tag*/, const Mask128<T, N> a,
+                            const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kor_mask16(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask16>(a.raw | b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Or(hwy::SizeTag<2> /*tag*/, const Mask128<T, N> a,
+                            const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw | b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Or(hwy::SizeTag<4> /*tag*/, const Mask128<T, N> a,
+                            const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw | b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Or(hwy::SizeTag<8> /*tag*/, const Mask128<T, N> a,
+                            const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw | b.raw)};
+#endif
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Xor(hwy::SizeTag<1> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxor_mask16(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask16>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Xor(hwy::SizeTag<2> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Xor(hwy::SizeTag<4> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Xor(hwy::SizeTag<8> /*tag*/, const Mask128<T, N> a,
+                             const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(a.raw ^ b.raw)};
+#endif
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> ExclusiveNeither(hwy::SizeTag<1> /*tag*/,
+                                          const Mask128<T, N> a,
+                                          const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxnor_mask16(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask16>(~(a.raw ^ b.raw) & 0xFFFF)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> ExclusiveNeither(hwy::SizeTag<2> /*tag*/,
+                                          const Mask128<T, N> a,
+                                          const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{_kxnor_mask8(a.raw, b.raw)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0xFF)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> ExclusiveNeither(hwy::SizeTag<4> /*tag*/,
+                                          const Mask128<T, N> a,
+                                          const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{static_cast<__mmask8>(_kxnor_mask8(a.raw, b.raw) & 0xF)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0xF)};
+#endif
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> ExclusiveNeither(hwy::SizeTag<8> /*tag*/,
+                                          const Mask128<T, N> a,
+                                          const Mask128<T, N> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask128<T, N>{static_cast<__mmask8>(_kxnor_mask8(a.raw, b.raw) & 0x3)};
+#else
+  return Mask128<T, N>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0x3)};
+#endif
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  return detail::And(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  return detail::AndNot(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  return detail::Or(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  return detail::Xor(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  // Flip only the valid bits.
+  // TODO(janwas): use _knot intrinsics if N >= 8.
+  return Xor(m, Mask128<T, N>::FromBits((1ull << N) - 1));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(const Mask128<T, N> a, Mask128<T, N> b) {
+  return detail::ExclusiveNeither(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+#else  // AVX2 or below
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  return Mask128<T, N>{v.raw};
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{v.raw};
+}
+
+// Generic for all vector lengths.
+template <class D>
+HWY_API VFromD<D> VecFromMask(D /* tag */, MFromD<D> v) {
+  return VecFromMask(v);
+}
+
+#if HWY_TARGET >= HWY_SSSE3
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  const auto vmask = VecFromMask(DFromV<decltype(no)>(), mask);
+  return Or(And(vmask, yes), AndNot(vmask, no));
+}
+
+#else  // HWY_TARGET < HWY_SSSE3
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_blendv_epi8(no.raw, yes.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> IfThenElse(Mask128<float, N> mask,
+                                    Vec128<float, N> yes, Vec128<float, N> no) {
+  return Vec128<float, N>{_mm_blendv_ps(no.raw, yes.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> IfThenElse(Mask128<double, N> mask,
+                                     Vec128<double, N> yes,
+                                     Vec128<double, N> no) {
+  return Vec128<double, N>{_mm_blendv_pd(no.raw, yes.raw, mask.raw)};
+}
+
+#endif  // HWY_TARGET >= HWY_SSSE3
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return yes & VecFromMask(DFromV<decltype(yes)>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return AndNot(VecFromMask(DFromV<decltype(no)>(), mask), no);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> ExclusiveNeither(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N, 0> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ ShiftLeft
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeft(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeft(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeft(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{_mm_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeft(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{_mm_slli_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeft(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{_mm_slli_epi32(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeft(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{_mm_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeft(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{ShiftLeft<kBits>(Vec128<MakeWide<T>>{v.raw}).raw};
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRight(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_srli_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRight(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_srli_epi32(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRight(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{_mm_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRight(const Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRight<kBits>(Vec128<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftRight(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{_mm_srai_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftRight(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{_mm_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int8_t, N> ShiftRight(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// i64 is implemented after BroadcastSignBit.
+
+// ================================================== MEMORY (1)
+
+// Clang static analysis claims the memory immediately after a partial vector
+// store is uninitialized, and also flags the input to partial loads (at least
+// for loadl_pd) as "garbage". This is a false alarm because msan does not
+// raise errors. We work around this by using CopyBytes instead of intrinsics,
+// but only for the analyzer to avoid potentially bad code generation.
+// Unfortunately __clang_analyzer__ was not defined for clang-tidy prior to v7.
+#ifndef HWY_SAFE_PARTIAL_LOAD_STORE
+#if defined(__clang_analyzer__) || \
+    (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
+#define HWY_SAFE_PARTIAL_LOAD_STORE 1
+#else
+#define HWY_SAFE_PARTIAL_LOAD_STORE 0
+#endif
+#endif  // HWY_SAFE_PARTIAL_LOAD_STORE
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> Load(D /* tag */, const TFromD<D>* HWY_RESTRICT aligned) {
+  return VFromD<D>{_mm_load_si128(reinterpret_cast<const __m128i*>(aligned))};
+}
+// Generic for all vector lengths greater than or equal to 16 bytes.
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> Load(D d, const bfloat16_t* HWY_RESTRICT aligned) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Load(du, reinterpret_cast<const uint16_t*>(aligned)));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> Load(D d, const float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  return Vec128<float16_t>{_mm_load_ph(aligned)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Load(du, reinterpret_cast<const uint16_t*>(aligned)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float> Load(D /* tag */, const float* HWY_RESTRICT aligned) {
+  return Vec128<float>{_mm_load_ps(aligned)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double> Load(D /* tag */, const double* HWY_RESTRICT aligned) {
+  return Vec128<double>{_mm_load_pd(aligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_loadu_si128(reinterpret_cast<const __m128i*>(p))};
+}
+// Generic for all vector lengths greater than or equal to 16 bytes.
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> LoadU(D d, const bfloat16_t* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, LoadU(du, reinterpret_cast<const uint16_t*>(p)));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> LoadU(D d, const float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec128<float16_t>{_mm_loadu_ph(p)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, LoadU(du, reinterpret_cast<const uint16_t*>(p)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float> LoadU(D /* tag */, const float* HWY_RESTRICT p) {
+  return Vec128<float>{_mm_loadu_ps(p)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double> LoadU(D /* tag */, const double* HWY_RESTRICT p) {
+  return Vec128<double>{_mm_loadu_pd(p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128i v = _mm_setzero_si128();
+  CopyBytes<8>(p, &v);  // not same size
+#else
+  const __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p));
+#endif
+  return BitCast(d, VFromD<decltype(du)>{v});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API Vec64<float> Load(D /* tag */, const float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128 v = _mm_setzero_ps();
+  CopyBytes<8>(p, &v);  // not same size
+  return Vec64<float>{v};
+#else
+  const __m128 hi = _mm_setzero_ps();
+  return Vec64<float>{_mm_loadl_pi(hi, reinterpret_cast<const __m64*>(p))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_API Vec64<double> Load(D /* tag */, const double* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128d v = _mm_setzero_pd();
+  CopyBytes<8>(p, &v);  // not same size
+  return Vec64<double>{v};
+#else
+  return Vec64<double>{_mm_load_sd(p)};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_F32_D(D)>
+HWY_API Vec32<float> Load(D /* tag */, const float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128 v = _mm_setzero_ps();
+  CopyBytes<4>(p, &v);  // not same size
+  return Vec32<float>{v};
+#else
+  return Vec32<float>{_mm_load_ss(p)};
+#endif
+}
+
+// Any <= 32 bit except <float, 1>
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> Load(D d, const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  // Clang ArgumentPromotionPass seems to break this code. We can unpoison
+  // before SetTableIndices -> LoadU -> Load and the memory is poisoned again.
+  detail::MaybeUnpoison(p, Lanes(d));
+
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128i v = Zero(Full128<TFromD<decltype(du)>>()).raw;
+  CopyBytes<d.MaxBytes()>(p, &v);  // not same size as VFromD
+#else
+  int32_t bits = 0;
+  CopyBytes<d.MaxBytes()>(p, &bits);  // not same size as VFromD
+  const __m128i v = _mm_cvtsi32_si128(bits);
+#endif
+  return BitCast(d, VFromD<decltype(du)>{v});
+}
+
+// For < 128 bit, LoadU == Load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+// ------------------------------ Store
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void Store(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT aligned) {
+  _mm_store_si128(reinterpret_cast<__m128i*>(aligned), v.raw);
+}
+// Generic for all vector lengths greater than or equal to 16 bytes.
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API void Store(VFromD<D> v, D d, bfloat16_t* HWY_RESTRICT aligned) {
+  const RebindToUnsigned<decltype(d)> du;
+  Store(BitCast(du, v), du, reinterpret_cast<uint16_t*>(aligned));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API void Store(Vec128<float16_t> v, D d, float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  _mm_store_ph(aligned, v.raw);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  Store(BitCast(du, v), du, reinterpret_cast<uint16_t*>(aligned));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API void Store(Vec128<float> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm_store_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API void Store(Vec128<double> v, D /* tag */,
+                   double* HWY_RESTRICT aligned) {
+  _mm_store_pd(aligned, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void StoreU(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT p) {
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v.raw);
+}
+// Generic for all vector lengths greater than or equal to 16 bytes.
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API void StoreU(VFromD<D> v, D d, bfloat16_t* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;
+  StoreU(BitCast(du, v), du, reinterpret_cast<uint16_t*>(p));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API void StoreU(Vec128<float16_t> v, D d, float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  _mm_storeu_ph(p, v.raw);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  StoreU(BitCast(du, v), du, reinterpret_cast<uint16_t*>(p));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec128<float> v, D /* tag */, float* HWY_RESTRICT p) {
+  _mm_storeu_ps(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API void StoreU(Vec128<double> v, D /* tag */, double* HWY_RESTRICT p) {
+  _mm_storeu_pd(p, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API void Store(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  (void)d;
+  CopyBytes<8>(&v, p);  // not same size
+#else
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm_storel_epi64(reinterpret_cast<__m128i*>(p), BitCast(du, v).raw);
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API void Store(Vec64<float> v, D /* tag */, float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<8>(&v, p);  // not same size
+#else
+  _mm_storel_pi(reinterpret_cast<__m64*>(p), v.raw);
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_API void Store(Vec64<double> v, D /* tag */, double* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<8>(&v, p);  // not same size
+#else
+  _mm_storel_pd(p, v.raw);
+#endif
+}
+
+// Any <= 32 bit except <float, 1>
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API void Store(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  CopyBytes<d.MaxBytes()>(&v, p);  // not same size
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_F32_D(D)>
+HWY_API void Store(Vec32<float> v, D /* tag */, float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<4>(&v, p);  // not same size
+#else
+  _mm_store_ss(p, v.raw);
+#endif
+}
+
+// For < 128 bit, StoreU == Store.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API void StoreU(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ================================================== SWIZZLE (1)
+
+// ------------------------------ TableLookupBytes
+template <typename T, size_t N, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(const Vec128<T, N> bytes,
+                                        const Vec128<TI, NI> from) {
+#if HWY_TARGET == HWY_SSE2
+#if HWY_COMPILER_GCC_ACTUAL && HWY_HAS_BUILTIN(__builtin_shuffle)
+  typedef uint8_t GccU8RawVectType __attribute__((__vector_size__(16)));
+  return Vec128<TI, NI>{reinterpret_cast<typename detail::Raw128<TI>::type>(
+      __builtin_shuffle(reinterpret_cast<GccU8RawVectType>(bytes.raw),
+                        reinterpret_cast<GccU8RawVectType>(from.raw)))};
+#else
+  const DFromV<decltype(from)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const Full128<uint8_t> du8_full;
+
+  const DFromV<decltype(bytes)> d_bytes;
+  const Repartition<uint8_t, decltype(d_bytes)> du8_bytes;
+
+  alignas(16) uint8_t result_bytes[16];
+  alignas(16) uint8_t u8_bytes[16];
+  alignas(16) uint8_t from_bytes[16];
+
+  Store(Vec128<uint8_t>{BitCast(du8_bytes, bytes).raw}, du8_full, u8_bytes);
+  Store(Vec128<uint8_t>{BitCast(du8, from).raw}, du8_full, from_bytes);
+
+  for (int i = 0; i < 16; i++) {
+    result_bytes[i] = u8_bytes[from_bytes[i] & 15];
+  }
+
+  return BitCast(d, VFromD<decltype(du8)>{Load(du8_full, result_bytes).raw});
+#endif
+#else  // SSSE3 or newer
+  return Vec128<TI, NI>{_mm_shuffle_epi8(bytes.raw, from.raw)};
+#endif
+}
+
+// ------------------------------ TableLookupBytesOr0
+// For all vector widths; x86 anyway zeroes if >= 0x80 on SSSE3/SSE4/AVX2/AVX3
+template <class V, class VI>
+HWY_API VI TableLookupBytesOr0(const V bytes, const VI from) {
+#if HWY_TARGET == HWY_SSE2
+  const DFromV<decltype(from)> d;
+  const Repartition<int8_t, decltype(d)> di8;
+
+  const auto di8_from = BitCast(di8, from);
+  return BitCast(d, IfThenZeroElse(di8_from < Zero(di8),
+                                   TableLookupBytes(bytes, di8_from)));
+#else
+  return TableLookupBytes(bytes, from);
+#endif
+}
+
+// ------------------------------ Shuffles (ShiftRight, TableLookupBytes)
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shuffle2301(const Vec128<T, N> v) {
+  static_assert(sizeof(T) == 4, "Only for 32-bit lanes");
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<T, N>{_mm_shuffle_epi32(v.raw, 0xB1)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> Shuffle2301(const Vec128<float, N> v) {
+  static_assert(N == 2 || N == 4, "Does not make sense for N=1");
+  return Vec128<float, N>{_mm_shuffle_ps(v.raw, v.raw, 0xB1)};
+}
+
+// These are used by generic_ops-inl to implement LoadInterleaved3. As with
+// Intel's shuffle* intrinsics and InterleaveLower, the lower half of the output
+// comes from the first argument.
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo2301(const Vec32<T> a, const Vec32<T> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+#if HWY_TARGET == HWY_SSE2
+  Vec32<uint16_t> ba_shuffled{
+      _mm_shufflelo_epi16(ba.raw, _MM_SHUFFLE(3, 0, 3, 0))};
+  return BitCast(d, Or(ShiftLeft<8>(ba_shuffled), ShiftRight<8>(ba_shuffled)));
+#else
+  alignas(16) const T kShuffle[8] = {1, 0, 7, 6};
+  return Vec32<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo2301(const Vec64<T> a, const Vec64<T> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+#if HWY_TARGET == HWY_SSE2
+  Vec64<uint32_t> ba_shuffled{
+      _mm_shuffle_epi32(ba.raw, _MM_SHUFFLE(3, 0, 3, 0))};
+  return Vec64<T>{
+      _mm_shufflelo_epi16(ba_shuffled.raw, _MM_SHUFFLE(2, 3, 0, 1))};
+#else
+  alignas(16) const T kShuffle[8] = {0x0302, 0x0100, 0x0f0e, 0x0d0c};
+  return Vec64<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo2301(const Vec128<T> a, const Vec128<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(2, 3, 0, 1);
+  return BitCast(d, Vec128<float>{_mm_shuffle_ps(BitCast(df, a).raw,
+                                                 BitCast(df, b).raw, m)});
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo1230(const Vec32<T> a, const Vec32<T> b) {
+  const DFromV<decltype(a)> d;
+#if HWY_TARGET == HWY_SSE2
+  const auto zero = Zero(d);
+  const Rebind<int16_t, decltype(d)> di16;
+  const Vec32<int16_t> a_shuffled{_mm_shufflelo_epi16(
+      _mm_unpacklo_epi8(a.raw, zero.raw), _MM_SHUFFLE(3, 0, 3, 0))};
+  const Vec32<int16_t> b_shuffled{_mm_shufflelo_epi16(
+      _mm_unpacklo_epi8(b.raw, zero.raw), _MM_SHUFFLE(1, 2, 1, 2))};
+  const auto ba_shuffled = Combine(di16, b_shuffled, a_shuffled);
+  return Vec32<T>{_mm_packus_epi16(ba_shuffled.raw, ba_shuffled.raw)};
+#else
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+  alignas(16) const T kShuffle[8] = {0, 3, 6, 5};
+  return Vec32<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo1230(const Vec64<T> a, const Vec64<T> b) {
+  const DFromV<decltype(a)> d;
+#if HWY_TARGET == HWY_SSE2
+  const Vec32<T> a_shuffled{
+      _mm_shufflelo_epi16(a.raw, _MM_SHUFFLE(3, 0, 3, 0))};
+  const Vec32<T> b_shuffled{
+      _mm_shufflelo_epi16(b.raw, _MM_SHUFFLE(1, 2, 1, 2))};
+  return Combine(d, b_shuffled, a_shuffled);
+#else
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+  alignas(16) const T kShuffle[8] = {0x0100, 0x0706, 0x0d0c, 0x0b0a};
+  return Vec64<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo1230(const Vec128<T> a, const Vec128<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(1, 2, 3, 0);
+  return BitCast(d, Vec128<float>{_mm_shuffle_ps(BitCast(df, a).raw,
+                                                 BitCast(df, b).raw, m)});
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec32<T> ShuffleTwo3012(const Vec32<T> a, const Vec32<T> b) {
+  const DFromV<decltype(a)> d;
+#if HWY_TARGET == HWY_SSE2
+  const auto zero = Zero(d);
+  const Rebind<int16_t, decltype(d)> di16;
+  const Vec32<int16_t> a_shuffled{_mm_shufflelo_epi16(
+      _mm_unpacklo_epi8(a.raw, zero.raw), _MM_SHUFFLE(1, 2, 1, 2))};
+  const Vec32<int16_t> b_shuffled{_mm_shufflelo_epi16(
+      _mm_unpacklo_epi8(b.raw, zero.raw), _MM_SHUFFLE(3, 0, 3, 0))};
+  const auto ba_shuffled = Combine(di16, b_shuffled, a_shuffled);
+  return Vec32<T>{_mm_packus_epi16(ba_shuffled.raw, ba_shuffled.raw)};
+#else
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+  alignas(16) const T kShuffle[8] = {2, 1, 4, 7};
+  return Vec32<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> ShuffleTwo3012(const Vec64<T> a, const Vec64<T> b) {
+  const DFromV<decltype(a)> d;
+#if HWY_TARGET == HWY_SSE2
+  const Vec32<T> a_shuffled{
+      _mm_shufflelo_epi16(a.raw, _MM_SHUFFLE(1, 2, 1, 2))};
+  const Vec32<T> b_shuffled{
+      _mm_shufflelo_epi16(b.raw, _MM_SHUFFLE(3, 0, 3, 0))};
+  return Combine(d, b_shuffled, a_shuffled);
+#else
+  const Twice<decltype(d)> d2;
+  const auto ba = Combine(d2, b, a);
+  alignas(16) const T kShuffle[8] = {0x0504, 0x0302, 0x0908, 0x0f0e};
+  return Vec64<T>{TableLookupBytes(ba, Load(d2, kShuffle)).raw};
+#endif
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T> ShuffleTwo3012(const Vec128<T> a, const Vec128<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(3, 0, 1, 2);
+  return BitCast(d, Vec128<float>{_mm_shuffle_ps(BitCast(df, a).raw,
+                                                 BitCast(df, b).raw, m)});
+}
+
+}  // namespace detail
+
+// Swap 64-bit halves
+HWY_API Vec128<uint32_t> Shuffle1032(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<int32_t> Shuffle1032(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<float> Shuffle1032(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x4E)};
+}
+HWY_API Vec128<uint64_t> Shuffle01(const Vec128<uint64_t> v) {
+  return Vec128<uint64_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<int64_t> Shuffle01(const Vec128<int64_t> v) {
+  return Vec128<int64_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<double> Shuffle01(const Vec128<double> v) {
+  return Vec128<double>{_mm_shuffle_pd(v.raw, v.raw, 1)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec128<uint32_t> Shuffle0321(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec128<int32_t> Shuffle0321(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec128<float> Shuffle0321(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x39)};
+}
+// Rotate left 32 bits
+HWY_API Vec128<uint32_t> Shuffle2103(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec128<int32_t> Shuffle2103(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec128<float> Shuffle2103(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x93)};
+}
+
+// Reverse
+HWY_API Vec128<uint32_t> Shuffle0123(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec128<int32_t> Shuffle0123(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec128<float> Shuffle0123(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x1B)};
+}
+
+// ================================================== COMPARE
+
+#if HWY_TARGET <= HWY_AVX3
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0.
+
+// ------------------------------ MaskFromVec
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> MaskFromVec(hwy::SizeTag<1> /*tag*/,
+                                     const Vec128<T, N> v) {
+  return Mask128<T, N>{_mm_movepi8_mask(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> MaskFromVec(hwy::SizeTag<2> /*tag*/,
+                                     const Vec128<T, N> v) {
+  return Mask128<T, N>{_mm_movepi16_mask(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> MaskFromVec(hwy::SizeTag<4> /*tag*/,
+                                     const Vec128<T, N> v) {
+  return Mask128<T, N>{_mm_movepi32_mask(v.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> MaskFromVec(hwy::SizeTag<8> /*tag*/,
+                                     const Vec128<T, N> v) {
+  return Mask128<T, N>{_mm_movepi64_mask(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  return detail::MaskFromVec(hwy::SizeTag<sizeof(T)>(), v);
+}
+// There do not seem to be native floating-point versions of these instructions.
+template <size_t N>
+HWY_API Mask128<float, N> MaskFromVec(const Vec128<float, N> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return Mask128<float, N>{MaskFromVec(BitCast(di, v)).raw};
+}
+template <size_t N>
+HWY_API Mask128<double, N> MaskFromVec(const Vec128<double, N> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return Mask128<double, N>{MaskFromVec(BitCast(di, v)).raw};
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(VFromD<D>()));
+
+// ------------------------------ VecFromMask
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{_mm_movm_epi8(v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI16(T)>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{_mm_movm_epi16(v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{_mm_movm_epi32(v.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{_mm_movm_epi64(v.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> VecFromMask(const Mask128<float16_t, N> v) {
+  return Vec128<float16_t, N>{_mm_castsi128_ph(_mm_movm_epi16(v.raw))};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <size_t N>
+HWY_API Vec128<float, N> VecFromMask(const Mask128<float, N> v) {
+  return Vec128<float, N>{_mm_castsi128_ps(_mm_movm_epi32(v.raw))};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> VecFromMask(const Mask128<double, N> v) {
+  return Vec128<double, N>{_mm_castsi128_pd(_mm_movm_epi64(v.raw))};
+}
+
+// Generic for all vector lengths.
+template <class D>
+HWY_API VFromD<D> VecFromMask(D /* tag */, MFromD<D> v) {
+  return VecFromMask(v);
+}
+
+// ------------------------------ RebindMask (MaskFromVec)
+
+template <typename TFrom, size_t NFrom, class DTo, HWY_IF_V_SIZE_LE_D(DTo, 16)>
+HWY_API MFromD<DTo> RebindMask(DTo /* tag */, Mask128<TFrom, NFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>{m.raw};
+}
+
+// ------------------------------ TestBit
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> TestBit(hwy::SizeTag<1> /*tag*/, const Vec128<T, N> v,
+                                 const Vec128<T, N> bit) {
+  return Mask128<T, N>{_mm_test_epi8_mask(v.raw, bit.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> TestBit(hwy::SizeTag<2> /*tag*/, const Vec128<T, N> v,
+                                 const Vec128<T, N> bit) {
+  return Mask128<T, N>{_mm_test_epi16_mask(v.raw, bit.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> TestBit(hwy::SizeTag<4> /*tag*/, const Vec128<T, N> v,
+                                 const Vec128<T, N> bit) {
+  return Mask128<T, N>{_mm_test_epi32_mask(v.raw, bit.raw)};
+}
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> TestBit(hwy::SizeTag<8> /*tag*/, const Vec128<T, N> v,
+                                 const Vec128<T, N> bit) {
+  return Mask128<T, N>{_mm_test_epi64_mask(v.raw, bit.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(const Vec128<T, N> v, const Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return detail::TestBit(hwy::SizeTag<sizeof(T)>(), v, bit);
+}
+
+// ------------------------------ Equality
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask128<T, N> operator==(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpeq_epi8_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI16(T)>
+HWY_API Mask128<T, N> operator==(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpeq_epi16_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Mask128<T, N> operator==(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpeq_epi32_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Mask128<T, N> operator==(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpeq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float16_t, N> operator==(Vec128<float16_t, N> a,
+                                         Vec128<float16_t, N> b) {
+  return Mask128<float16_t, N>{_mm_cmp_ph_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float, N> operator==(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmp_ps_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+template <size_t N>
+HWY_API Mask128<double, N> operator==(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmp_pd_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+// ------------------------------ Inequality
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpneq_epi8_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI16(T)>
+HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpneq_epi16_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpneq_epi32_mask(a.raw, b.raw)};
+}
+
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Mask128<T, N> operator!=(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Mask128<T, N>{_mm_cmpneq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float16_t, N> operator!=(Vec128<float16_t, N> a,
+                                         Vec128<float16_t, N> b) {
+  return Mask128<float16_t, N>{_mm_cmp_ph_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float, N> operator!=(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmp_ps_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+template <size_t N>
+HWY_API Mask128<double, N> operator!=(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmp_pd_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>(Vec128<int8_t, N> a, Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpgt_epi8_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>(Vec128<int16_t, N> a,
+                                      Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpgt_epi16_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>(Vec128<int32_t, N> a,
+                                      Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpgt_epi32_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>(Vec128<int64_t, N> a,
+                                      Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{_mm_cmpgt_epi64_mask(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator>(Vec128<uint8_t, N> a,
+                                      Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{_mm_cmpgt_epu8_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator>(Vec128<uint16_t, N> a,
+                                       Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{_mm_cmpgt_epu16_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator>(Vec128<uint32_t, N> a,
+                                       Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{_mm_cmpgt_epu32_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator>(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  return Mask128<uint64_t, N>{_mm_cmpgt_epu64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float16_t, N> operator>(Vec128<float16_t, N> a,
+                                        Vec128<float16_t, N> b) {
+  return Mask128<float16_t, N>{_mm_cmp_ph_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float, N> operator>(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmp_ps_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>(Vec128<double, N> a, Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmp_pd_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+// ------------------------------ Weak inequality
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float16_t, N> operator>=(Vec128<float16_t, N> a,
+                                         Vec128<float16_t, N> b) {
+  return Mask128<float16_t, N>{_mm_cmp_ph_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Mask128<float, N> operator>=(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmp_ps_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>=(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmp_pd_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>=(Vec128<int8_t, N> a,
+                                      Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpge_epi8_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>=(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpge_epi16_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>=(Vec128<int32_t, N> a,
+                                       Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpge_epi32_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>=(Vec128<int64_t, N> a,
+                                       Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{_mm_cmpge_epi64_mask(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator>=(Vec128<uint8_t, N> a,
+                                       Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{_mm_cmpge_epu8_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator>=(Vec128<uint16_t, N> a,
+                                        Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{_mm_cmpge_epu16_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator>=(Vec128<uint32_t, N> a,
+                                        Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{_mm_cmpge_epu32_mask(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator>=(Vec128<uint64_t, N> a,
+                                        Vec128<uint64_t, N> b) {
+  return Mask128<uint64_t, N>{_mm_cmpge_epu64_mask(a.raw, b.raw)};
+}
+
+#else  // AVX2 or below
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <class DTo, typename TFrom, size_t NFrom, HWY_IF_V_SIZE_LE_D(DTo, 16)>
+HWY_API MFromD<DTo> RebindMask(DTo dto, Mask128<TFrom, NFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  const Simd<TFrom, NFrom, 0> d;
+  return MaskFromVec(BitCast(dto, VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator==(Vec128<uint8_t, N> a,
+                                       Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{_mm_cmpeq_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator==(Vec128<uint16_t, N> a,
+                                        Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{_mm_cmpeq_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator==(Vec128<uint32_t, N> a,
+                                        Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{_mm_cmpeq_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator==(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const DFromV<decltype(a)> d64;
+  const RepartitionToNarrow<decltype(d64)> d32;
+  const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b)));
+  const auto cmp64 = cmp32 & Shuffle2301(cmp32);
+  return MaskFromVec(BitCast(d64, cmp64));
+#else
+  return Mask128<uint64_t, N>{_mm_cmpeq_epi64(a.raw, b.raw)};
+#endif
+}
+
+// Signed
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator==(Vec128<int8_t, N> a,
+                                      Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpeq_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator==(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpeq_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator==(Vec128<int32_t, N> a,
+                                       Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpeq_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator==(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  // Same as signed ==; avoid duplicating the SSSE3 version.
+  const DFromV<decltype(a)> d;
+  RebindToUnsigned<decltype(d)> du;
+  return RebindMask(d, BitCast(du, a) == BitCast(du, b));
+}
+
+// Float
+template <size_t N>
+HWY_API Mask128<float, N> operator==(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpeq_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator==(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpeq_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Inequality
+
+// This cannot have T as a template argument, otherwise it is not more
+// specialized than rewritten operator== in C++20, leading to compile
+// errors: https://gcc.godbolt.org/z/xsrPhPvPT.
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator!=(Vec128<uint8_t, N> a,
+                                       Vec128<uint8_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator!=(Vec128<uint16_t, N> a,
+                                        Vec128<uint16_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator!=(Vec128<uint32_t, N> a,
+                                        Vec128<uint32_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator!=(Vec128<uint64_t, N> a,
+                                        Vec128<uint64_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator!=(Vec128<int8_t, N> a,
+                                      Vec128<int8_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator!=(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator!=(Vec128<int32_t, N> a,
+                                       Vec128<int32_t, N> b) {
+  return Not(a == b);
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator!=(Vec128<int64_t, N> a,
+                                       Vec128<int64_t, N> b) {
+  return Not(a == b);
+}
+
+template <size_t N>
+HWY_API Mask128<float, N> operator!=(Vec128<float, N> a, Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpneq_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator!=(Vec128<double, N> a,
+                                      Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpneq_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Strict inequality
+
+namespace detail {
+
+template <size_t N>
+HWY_INLINE Mask128<int8_t, N> Gt(hwy::SignedTag /*tag*/, Vec128<int8_t, N> a,
+                                 Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpgt_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_INLINE Mask128<int16_t, N> Gt(hwy::SignedTag /*tag*/, Vec128<int16_t, N> a,
+                                  Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpgt_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_INLINE Mask128<int32_t, N> Gt(hwy::SignedTag /*tag*/, Vec128<int32_t, N> a,
+                                  Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpgt_epi32(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_INLINE Mask128<int64_t, N> Gt(hwy::SignedTag /*tag*/,
+                                  const Vec128<int64_t, N> a,
+                                  const Vec128<int64_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  // See https://stackoverflow.com/questions/65166174/:
+  const DFromV<decltype(a)> d;
+  const RepartitionToNarrow<decltype(d)> d32;
+  const Vec128<int64_t, N> m_eq32{Eq(BitCast(d32, a), BitCast(d32, b)).raw};
+  const Vec128<int64_t, N> m_gt32{Gt(BitCast(d32, a), BitCast(d32, b)).raw};
+  // If a.upper is greater, upper := true. Otherwise, if a.upper == b.upper:
+  // upper := b-a (unsigned comparison result of lower). Otherwise: upper := 0.
+  const __m128i upper = OrAnd(m_gt32, m_eq32, Sub(b, a)).raw;
+  // Duplicate upper to lower half.
+  return Mask128<int64_t, N>{_mm_shuffle_epi32(upper, _MM_SHUFFLE(3, 3, 1, 1))};
+#else
+  return Mask128<int64_t, N>{_mm_cmpgt_epi64(a.raw, b.raw)};  // SSE4.2
+#endif
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Gt(hwy::UnsignedTag /*tag*/, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  const DFromV<decltype(a)> du;
+  const RebindToSigned<decltype(du)> di;
+  const Vec128<T, N> msb = Set(du, (LimitsMax<T>() >> 1) + 1);
+  const auto sa = BitCast(di, Xor(a, msb));
+  const auto sb = BitCast(di, Xor(b, msb));
+  return RebindMask(du, Gt(hwy::SignedTag(), sa, sb));
+}
+
+template <size_t N>
+HWY_INLINE Mask128<float, N> Gt(hwy::FloatTag /*tag*/, Vec128<float, N> a,
+                                Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpgt_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_INLINE Mask128<double, N> Gt(hwy::FloatTag /*tag*/, Vec128<double, N> a,
+                                 Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpgt_pd(a.raw, b.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> operator>(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Gt(hwy::TypeTag<T>(), a, b);
+}
+
+// ------------------------------ Weak inequality
+
+namespace detail {
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Ge(hwy::SignedTag tag, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  return Not(Gt(tag, b, a));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> Ge(hwy::UnsignedTag tag, Vec128<T, N> a,
+                            Vec128<T, N> b) {
+  return Not(Gt(tag, b, a));
+}
+
+template <size_t N>
+HWY_INLINE Mask128<float, N> Ge(hwy::FloatTag /*tag*/, Vec128<float, N> a,
+                                Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpge_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_INLINE Mask128<double, N> Ge(hwy::FloatTag /*tag*/, Vec128<double, N> a,
+                                 Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpge_pd(a.raw, b.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  return detail::Ge(hwy::TypeTag<T>(), a, b);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Reversed comparisons
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<(Vec128<T, N> a, Vec128<T, N> b) {
+  return b > a;
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<=(Vec128<T, N> a, Vec128<T, N> b) {
+  return b >= a;
+}
+
+// ------------------------------ Iota (Load)
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_epi8(
+      static_cast<char>(15), static_cast<char>(14), static_cast<char>(13),
+      static_cast<char>(12), static_cast<char>(11), static_cast<char>(10),
+      static_cast<char>(9), static_cast<char>(8), static_cast<char>(7),
+      static_cast<char>(6), static_cast<char>(5), static_cast<char>(4),
+      static_cast<char>(3), static_cast<char>(2), static_cast<char>(1),
+      static_cast<char>(0))};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_epi16(int16_t{7}, int16_t{6}, int16_t{5}, int16_t{4},
+                                 int16_t{3}, int16_t{2}, int16_t{1},
+                                 int16_t{0})};
+}
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_ph(float16_t{7}, float16_t{6}, float16_t{5},
+                              float16_t{4}, float16_t{3}, float16_t{2},
+                              float16_t{1}, float16_t{0})};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{
+      _mm_set_epi32(int32_t{3}, int32_t{2}, int32_t{1}, int32_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_epi64x(int64_t{1}, int64_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm_set_pd(1.0, 0.0)};
+}
+
+#if HWY_COMPILER_MSVC
+template <class V, HWY_IF_V_SIZE_V(V, 1)>
+static HWY_INLINE V MaskOutVec128Iota(V v) {
+  const V mask_out_mask{_mm_set_epi32(0, 0, 0, 0xFF)};
+  return v & mask_out_mask;
+}
+template <class V, HWY_IF_V_SIZE_V(V, 2)>
+static HWY_INLINE V MaskOutVec128Iota(V v) {
+#if HWY_TARGET <= HWY_SSE4
+  return V{_mm_blend_epi16(v.raw, _mm_setzero_si128(), 0xFE)};
+#else
+  const V mask_out_mask{_mm_set_epi32(0, 0, 0, 0xFFFF)};
+  return v & mask_out_mask;
+#endif
+}
+template <class V, HWY_IF_V_SIZE_V(V, 4)>
+static HWY_INLINE V MaskOutVec128Iota(V v) {
+  const DFromV<decltype(v)> d;
+  const Repartition<float, decltype(d)> df;
+  using VF = VFromD<decltype(df)>;
+  return BitCast(d, VF{_mm_move_ss(_mm_setzero_ps(), BitCast(df, v).raw)});
+}
+template <class V, HWY_IF_V_SIZE_V(V, 8)>
+static HWY_INLINE V MaskOutVec128Iota(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{_mm_move_epi64(BitCast(du, v).raw)});
+}
+template <class V, HWY_IF_V_SIZE_GT_V(V, 8)>
+static HWY_INLINE V MaskOutVec128Iota(V v) {
+  return v;
+}
+#endif
+
+}  // namespace detail
+
+template <class D, typename T2, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  const auto result_iota =
+      detail::Iota0(d) + Set(d, static_cast<TFromD<D>>(first));
+#if HWY_COMPILER_MSVC
+  return detail::MaskOutVec128Iota(result_iota);
+#else
+  return result_iota;
+#endif
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D, class M = MFromD<D>, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API M FirstN(D d, size_t num) {
+#if HWY_TARGET <= HWY_AVX3
+  constexpr size_t kN = MaxLanes(d);
+#if HWY_ARCH_X86_64
+  const uint64_t all = (1ull << kN) - 1;
+  // BZHI only looks at the lower 8 bits of n!
+  return M::FromBits((num > 255) ? all : _bzhi_u64(all, num));
+#else
+  const uint32_t all = static_cast<uint32_t>((1ull << kN) - 1);
+  // BZHI only looks at the lower 8 bits of n!
+  return M::FromBits((num > 255) ? all
+                                 : _bzhi_u32(all, static_cast<uint32_t>(num)));
+#endif  // HWY_ARCH_X86_64
+#else   // HWY_TARGET > HWY_AVX3
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  using TI = TFromD<decltype(di)>;
+  return RebindMask(d, detail::Iota0(di) < Set(di, static_cast<TI>(num)));
+#endif  // HWY_TARGET <= HWY_AVX3
+}
+
+// ================================================== MEMORY (2)
+
+// ------------------------------ MaskedLoad
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_maskz_loadu_epi8(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{_mm_maskz_loadu_epi16(m.raw, p)});
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_maskz_loadu_epi32(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_maskz_loadu_epi64(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const float* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_maskz_loadu_ps(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const double* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_maskz_loadu_pd(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_epi8(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_epi16(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_epi32(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_epi64(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const float* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_ps(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const double* HWY_RESTRICT p) {
+  return VFromD<D>{_mm_mask_loadu_pd(v.raw, m.raw, p)};
+}
+
+#elif HWY_TARGET == HWY_AVX2
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  auto p_p = reinterpret_cast<const int*>(p);  // NOLINT
+  return VFromD<D>{_mm_maskload_epi32(p_p, m.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  auto p_p = reinterpret_cast<const long long*>(p);  // NOLINT
+  return VFromD<D>{_mm_maskload_epi64(p_p, m.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d, const float* HWY_RESTRICT p) {
+  const RebindToSigned<decltype(d)> di;
+  return VFromD<D>{_mm_maskload_ps(p, BitCast(di, VecFromMask(d, m)).raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d, const double* HWY_RESTRICT p) {
+  const RebindToSigned<decltype(d)> di;
+  return VFromD<D>{_mm_maskload_pd(p, BitCast(di, VecFromMask(d, m)).raw)};
+}
+
+// There is no maskload_epi8/16, so blend instead.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElseZero(m, LoadU(d, p));
+}
+
+#else  // <= SSE4
+
+// Avoid maskmov* - its nontemporal 'hint' causes it to bypass caches (slow).
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElseZero(m, LoadU(d, p));
+}
+
+#endif
+
+// ------------------------------ MaskedLoadOr
+
+#if HWY_TARGET > HWY_AVX3  // else: native
+
+// Generic for all vector lengths.
+template <class D>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElse(m, LoadU(d, p), v);
+}
+
+#endif  // HWY_TARGET > HWY_AVX3
+
+// ------------------------------ LoadN
+
+#if HWY_TARGET <= HWY_AVX2
+#ifdef HWY_NATIVE_LOAD_N
+#undef HWY_NATIVE_LOAD_N
+#else
+#define HWY_NATIVE_LOAD_N
+#endif
+
+template <class D,
+          HWY_IF_T_SIZE_ONE_OF_D(
+              D, (HWY_TARGET <= HWY_AVX3 ? ((1 << 1) | (1 << 2)) : 0) |
+                     (1 << 4) | (1 << 8)),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p,
+                        size_t max_lanes_to_load) {
+  const size_t num_of_lanes_to_load =
+      HWY_MIN(max_lanes_to_load, HWY_MAX_LANES_D(D));
+  const FixedTag<TFromD<D>, HWY_MAX(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>))>
+      d_full;
+  return ResizeBitCast(
+      d, MaskedLoad(FirstN(d_full, num_of_lanes_to_load), d_full, p));
+}
+
+#if HWY_TARGET > HWY_AVX3
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadLeadingN(VFromD<D> /*load_mask*/, D /*d*/,
+                                              const T* HWY_RESTRICT /*p*/,
+                                              VFromD<D> v_trailing) {
+  return v_trailing;
+}
+
+template <class D, HWY_IF_V_SIZE_GT_D(D, 2), typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadLeadingN(VFromD<D> load_mask, D d,
+                                              const T* HWY_RESTRICT p,
+                                              VFromD<D> v_trailing) {
+  using DI32 = Repartition<int32_t, D>;
+  const FixedTag<int32_t, HWY_MAX(HWY_MAX_LANES_D(DI32), 4)> di32_full;
+
+  // ResizeBitCast of load_mask to di32 is okay below if
+  // d.MaxBytes() < di32.MaxBytes() is true as any lanes of load_mask.raw past
+  // the first (lowest-index) lanes of load_mask.raw will have already been
+  // zeroed out
+  return ResizeBitCast(
+      d, IfNegativeThenElse(
+             ResizeBitCast(di32_full, load_mask),
+             MaskedLoad(MaskFromVec(ResizeBitCast(di32_full, load_mask)),
+                        di32_full, reinterpret_cast<const int32_t*>(p)),
+             ResizeBitCast(di32_full, v_trailing)));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadTrailingN(VFromD<D> /*load_mask*/, D d,
+                                               const T* HWY_RESTRICT p,
+                                               size_t num_of_lanes_to_load) {
+  return (num_of_lanes_to_load > 0) ? LoadU(d, p) : Zero(d);
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_LANES_D(D, 2),
+          typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadTrailingN(VFromD<D> /*load_mask*/, D d,
+                                               const T* HWY_RESTRICT p,
+                                               size_t num_of_lanes_to_load) {
+  if (num_of_lanes_to_load > 1) {
+    return LoadU(d, p);
+  } else {
+    const FixedTag<TFromD<D>, 1> d1;
+    return (num_of_lanes_to_load == 1) ? ResizeBitCast(d, LoadU(d1, p))
+                                       : Zero(d);
+  }
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 1), HWY_IF_LANES_GT_D(D, 2),
+          typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadTrailingN(VFromD<D> load_mask, D d,
+                                               const T* HWY_RESTRICT p,
+                                               size_t num_of_lanes_to_load) {
+  const size_t trailing_n = num_of_lanes_to_load & 3;
+  if (trailing_n != 0) {
+    VFromD<D> v_trailing = And(load_mask, Set(d, p[num_of_lanes_to_load - 1]));
+
+    if ((trailing_n & 2) != 0) {
+      const Repartition<int16_t, decltype(d)> di16;
+      int16_t i16_bits;
+      CopyBytes<sizeof(int16_t)>(p + num_of_lanes_to_load - trailing_n,
+                                 &i16_bits);
+      v_trailing = BitCast(
+          d, IfNegativeThenElse(BitCast(di16, load_mask), Set(di16, i16_bits),
+                                BitCast(di16, v_trailing)));
+    }
+
+    return v_trailing;
+  } else {
+    return Zero(d);
+  }
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_LANES_GT_D(D, 1),
+          typename T = TFromD<D>>
+HWY_INLINE VFromD<D> AVX2UIF8Or16LoadTrailingN(VFromD<D> load_mask, D d,
+                                               const T* HWY_RESTRICT p,
+                                               size_t num_of_lanes_to_load) {
+  if ((num_of_lanes_to_load & 1) != 0) {
+    return And(load_mask, Set(d, p[num_of_lanes_to_load - 1]));
+  } else {
+    return Zero(d);
+  }
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          typename T = TFromD<D>>
+HWY_API VFromD<D> LoadN(D d, const T* HWY_RESTRICT p, size_t N) {
+  const size_t num_of_lanes_to_load = HWY_MIN(N, HWY_MAX_LANES_D(D));
+  const FixedTag<TFromD<D>, HWY_MAX(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>))>
+      d_full;
+
+  const auto load_mask = ResizeBitCast(
+      d, VecFromMask(d_full, FirstN(d_full, num_of_lanes_to_load)));
+  const auto v_trailing =
+      detail::AVX2UIF8Or16LoadTrailingN(load_mask, d, p, num_of_lanes_to_load);
+
+#if HWY_COMPILER_GCC && !HWY_IS_DEBUG_BUILD
+  if (__builtin_constant_p(num_of_lanes_to_load < (4 / sizeof(TFromD<D>))) &&
+      num_of_lanes_to_load < (4 / sizeof(TFromD<D>))) {
+    return v_trailing;
+  }
+#endif
+
+  return detail::AVX2UIF8Or16LoadLeadingN(load_mask, d, p, v_trailing);
+}
+
+#endif  // HWY_TARGET > HWY_AVX3
+#endif  // HWY_TARGET <= HWY_AVX2
+
+// ------------------------------ BlendedStore
+
+namespace detail {
+
+// There is no maskload_epi8/16 with which we could safely implement
+// BlendedStore. Manual blending is also unsafe because loading a full vector
+// that crosses the array end causes asan faults. Resort to scalar code; the
+// caller should instead use memcpy, assuming m is FirstN(d, n).
+template <class D>
+HWY_API void ScalarMaskedStore(VFromD<D> v, MFromD<D> m, D d,
+                               TFromD<D>* HWY_RESTRICT p) {
+  const RebindToSigned<decltype(d)> di;  // for testing mask if T=bfloat16_t.
+  using TI = TFromD<decltype(di)>;
+  alignas(16) TI buf[MaxLanes(d)];
+  alignas(16) TI mask[MaxLanes(d)];
+  Store(BitCast(di, v), di, buf);
+  Store(BitCast(di, VecFromMask(d, m)), di, mask);
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask[i]) {
+      CopySameSize(buf + i, p + i);
+    }
+  }
+}
+}  // namespace detail
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm_mask_storeu_epi8(p, m.raw, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm_mask_storeu_epi16(reinterpret_cast<uint16_t*>(p), RebindMask(du, m).raw,
+                        BitCast(du, v).raw);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<int*>(p);  // NOLINT
+  _mm_mask_storeu_epi32(pi, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<long long*>(p);  // NOLINT
+  _mm_mask_storeu_epi64(pi, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D, float* HWY_RESTRICT p) {
+  _mm_mask_storeu_ps(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D, double* HWY_RESTRICT p) {
+  _mm_mask_storeu_pd(p, m.raw, v.raw);
+}
+
+#elif HWY_TARGET == HWY_AVX2
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  detail::ScalarMaskedStore(v, m, d, p);
+}
+
+namespace detail {
+
+template <class D, class V, class M, HWY_IF_UI32_D(D)>
+HWY_INLINE void NativeBlendedStore(V v, M m, TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<int*>(p);  // NOLINT
+  _mm_maskstore_epi32(pi, m.raw, v.raw);
+}
+
+template <class D, class V, class M, HWY_IF_UI64_D(D)>
+HWY_INLINE void NativeBlendedStore(V v, M m, TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<long long*>(p);  // NOLINT
+  _mm_maskstore_epi64(pi, m.raw, v.raw);
+}
+
+template <class D, class V, class M, HWY_IF_F32_D(D)>
+HWY_INLINE void NativeBlendedStore(V v, M m, float* HWY_RESTRICT p) {
+  _mm_maskstore_ps(p, m.raw, v.raw);
+}
+
+template <class D, class V, class M, HWY_IF_F64_D(D)>
+HWY_INLINE void NativeBlendedStore(V v, M m, double* HWY_RESTRICT p) {
+  _mm_maskstore_pd(p, m.raw, v.raw);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  const RebindToSigned<decltype(d)> di;
+  // For partial vectors, avoid writing other lanes by zeroing their mask.
+  if (d.MaxBytes() < 16) {
+    const Full128<TFromD<D>> dfull;
+    const Mask128<TFromD<D>> mfull{m.raw};
+    m = MFromD<D>{And(mfull, FirstN(dfull, MaxLanes(d))).raw};
+  }
+
+  // Float/double require, and unsigned ints tolerate, signed int masks.
+  detail::NativeBlendedStore<D>(v, RebindMask(di, m), p);
+}
+
+#else  // <= SSE4
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  // Avoid maskmov* - its nontemporal 'hint' causes it to bypass caches (slow).
+  detail::ScalarMaskedStore(v, m, d, p);
+}
+
+#endif  // SSE4
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator+(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_add_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator+(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_add_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator+(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_add_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator+(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator+(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_add_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator+(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_add_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator+(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_add_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator+(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{_mm_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> operator+(const Vec128<float16_t, N> a,
+                                       const Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_add_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> operator+(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_add_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator+(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator-(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_sub_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator-(Vec128<uint16_t, N> a,
+                                      Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_sub_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator-(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_sub_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator-(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator-(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_sub_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator-(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_sub_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator-(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_sub_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator-(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{_mm_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> operator-(const Vec128<float16_t, N> a,
+                                       const Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_sub_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> operator-(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_sub_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator-(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ SumsOf8
+template <size_t N>
+HWY_API Vec128<uint64_t, N / 8> SumsOf8(const Vec128<uint8_t, N> v) {
+  return Vec128<uint64_t, N / 8>{_mm_sad_epu8(v.raw, _mm_setzero_si128())};
+}
+
+#ifdef HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#undef HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#else
+#define HWY_NATIVE_SUMS_OF_8_ABS_DIFF
+#endif
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N / 8> SumsOf8AbsDiff(const Vec128<uint8_t, N> a,
+                                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint64_t, N / 8>{_mm_sad_epu8(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedAdd(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_adds_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedAdd(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedAdd(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_adds_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedAdd(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_adds_epi16(a.raw, b.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+#ifdef HWY_NATIVE_I32_SATURATED_ADDSUB
+#undef HWY_NATIVE_I32_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I32_SATURATED_ADDSUB
+#endif
+
+#ifdef HWY_NATIVE_I64_SATURATED_ADDSUB
+#undef HWY_NATIVE_I64_SATURATED_ADDSUB
+#else
+#define HWY_NATIVE_I64_SATURATED_ADDSUB
+#endif
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> SaturatedAdd(Vec128<int32_t, N> a,
+                                        Vec128<int32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec128<int32_t, N>{_mm_ternarylogic_epi32(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec128<int32_t, N> overflow_result{_mm_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> SaturatedAdd(Vec128<int64_t, N> a,
+                                        Vec128<int64_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec128<int64_t, N>{_mm_ternarylogic_epi64(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec128<int64_t, N> overflow_result{_mm_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedSub(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_subs_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedSub(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedSub(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_subs_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedSub(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_subs_epi16(a.raw, b.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <size_t N>
+HWY_API Vec128<int32_t, N> SaturatedSub(Vec128<int32_t, N> a,
+                                        Vec128<int32_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec128<int32_t, N>{_mm_ternarylogic_epi32(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec128<int32_t, N> overflow_result{_mm_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> SaturatedSub(Vec128<int64_t, N> a,
+                                        Vec128<int64_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec128<int64_t, N>{_mm_ternarylogic_epi64(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec128<int64_t, N> overflow_result{_mm_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ AverageRound
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> AverageRound(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_avg_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> AverageRound(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Integer multiplication
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator*(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_mullo_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator*(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_mullo_epi16(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> MulHigh(const Vec128<uint16_t, N> a,
+                                    const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_mulhi_epu16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulHigh(const Vec128<int16_t, N> a,
+                                   const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_mulhi_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+template <class V, HWY_IF_U8_D(DFromV<V>)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulEven(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const auto lo8_mask = Set(dw, uint16_t{0x00FF});
+  return And(ResizeBitCast(dw, a), lo8_mask) *
+         And(ResizeBitCast(dw, b), lo8_mask);
+}
+
+template <class V, HWY_IF_I8_D(DFromV<V>)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulEven(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  return ShiftRight<8>(ShiftLeft<8>(ResizeBitCast(dw, a))) *
+         ShiftRight<8>(ShiftLeft<8>(ResizeBitCast(dw, b)));
+}
+
+template <class V, HWY_IF_UI16_D(DFromV<V>)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulEven(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const RepartitionToNarrow<decltype(dw)> dw_as_d16;
+
+  const auto lo = ResizeBitCast(dw, a * b);
+  const auto hi = ShiftLeft<16>(ResizeBitCast(dw, MulHigh(a, b)));
+  return BitCast(dw, OddEven(BitCast(dw_as_d16, hi), BitCast(dw_as_d16, lo)));
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulEven(const Vec128<uint32_t, N> a,
+                                              const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, (N + 1) / 2>{_mm_mul_epu32(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulEven(const Vec128<int32_t, N> a,
+                                             const Vec128<int32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const RebindToUnsigned<decltype(d)> du;
+
+  // p[i] = (((a[i] >> 31) * (a[i] >> 31)) << 64) +
+  //        (((a[i] >> 31) * b[i]) << 32) +
+  //        (((b[i] >> 31) * a[i]) << 32) +
+  //        ((a[i] & int64_t{0xFFFFFFFF}) * (b[i] & int64_t{0xFFFFFFFF}))
+
+  // ((a[i] >> 31) * (a[i] >> 31)) << 64 does not need to be computed as the
+  // lower 64 bits of ((a[i] >> 31) * (a[i] >> 31)) << 64 is zero.
+
+  // (((a[i] >> 31) * b[i]) << 32) + (((b[i] >> 31) * a[i]) << 32) ==
+  // -((((a[i] >> 31) & b[i]) + ((b[i] >> 31) & a[i])) << 32)
+
+  // ((a[i] & int64_t{0xFFFFFFFF}) * (b[i] & int64_t{0xFFFFFFFF})) can be
+  // computed using MulEven(BitCast(du, a), BitCast(du, b))
+
+  const auto neg_p_hi = ShiftLeft<32>(
+      ResizeBitCast(dw, And(ShiftRight<31>(a), b) + And(ShiftRight<31>(b), a)));
+  const auto p_lo = BitCast(dw, MulEven(BitCast(du, a), BitCast(du, b)));
+  return p_lo - neg_p_hi;
+#else
+  return Vec128<int64_t, (N + 1) / 2>{_mm_mul_epi32(a.raw, b.raw)};
+#endif
+}
+
+template <class V, HWY_IF_T_SIZE_V(V, 1)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulOdd(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  return ShiftRight<8>(ResizeBitCast(dw, a)) *
+         ShiftRight<8>(ResizeBitCast(dw, b));
+}
+
+template <class V, HWY_IF_UI16_D(DFromV<V>)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulOdd(V a, V b) {
+  const DFromV<decltype(a)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const RebindToUnsigned<decltype(dw)> dw_u;
+  const RepartitionToNarrow<decltype(dw)> dw_as_d16;
+
+  const auto lo = ShiftRight<16>(BitCast(dw_u, ResizeBitCast(dw, a * b)));
+  const auto hi = ResizeBitCast(dw, MulHigh(a, b));
+  return BitCast(dw, OddEven(BitCast(dw_as_d16, hi), BitCast(dw_as_d16, lo)));
+}
+
+template <class V, HWY_IF_UI32_D(DFromV<V>)>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulOdd(V a, V b) {
+  return MulEven(DupOdd(a), DupOdd(b));
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator*(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  // Not as inefficient as it looks: _mm_mullo_epi32 has 10 cycle latency.
+  // 64-bit right shift would also work but also needs port 5, so no benefit.
+  // Notation: x=don't care, z=0.
+  const __m128i a_x3x1 = _mm_shuffle_epi32(a.raw, _MM_SHUFFLE(3, 3, 1, 1));
+  const auto mullo_x2x0 = MulEven(a, b);
+  const __m128i b_x3x1 = _mm_shuffle_epi32(b.raw, _MM_SHUFFLE(3, 3, 1, 1));
+  const auto mullo_x3x1 =
+      MulEven(Vec128<uint32_t, N>{a_x3x1}, Vec128<uint32_t, N>{b_x3x1});
+  // We could _mm_slli_epi64 by 32 to get 3z1z and OR with z2z0, but generating
+  // the latter requires one more instruction or a constant.
+  const __m128i mul_20 =
+      _mm_shuffle_epi32(mullo_x2x0.raw, _MM_SHUFFLE(2, 0, 2, 0));
+  const __m128i mul_31 =
+      _mm_shuffle_epi32(mullo_x3x1.raw, _MM_SHUFFLE(2, 0, 2, 0));
+  return Vec128<uint32_t, N>{_mm_unpacklo_epi32(mul_20, mul_31)};
+#else
+  return Vec128<uint32_t, N>{_mm_mullo_epi32(a.raw, b.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator*(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  // Same as unsigned; avoid duplicating the SSSE3 code.
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, BitCast(du, a) * BitCast(du, b));
+}
+
+// ------------------------------ RotateRight (ShiftRight, Or)
+
+template <int kBits, typename T, size_t N,
+          HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API Vec128<T, N> RotateRight(const Vec128<T, N> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  // AVX3 does not support 8/16-bit.
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> RotateRight(const Vec128<uint32_t, N> v) {
+  static_assert(0 <= kBits && kBits < 32, "Invalid shift count");
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint32_t, N>{_mm_ror_epi32(v.raw, kBits)};
+#else
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v), ShiftLeft<HWY_MIN(31, 32 - kBits)>(v));
+#endif
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> RotateRight(const Vec128<uint64_t, N> v) {
+  static_assert(0 <= kBits && kBits < 64, "Invalid shift count");
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint64_t, N>{_mm_ror_epi64(v.raw, kBits)};
+#else
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v), ShiftLeft<HWY_MIN(63, 64 - kBits)>(v));
+#endif
+}
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> BroadcastSignBit(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  return VecFromMask(v < Zero(d));
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> BroadcastSignBit(const Vec128<int16_t, N> v) {
+  return ShiftRight<15>(v);
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> BroadcastSignBit(const Vec128<int32_t, N> v) {
+  return ShiftRight<31>(v);
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> BroadcastSignBit(const Vec128<int64_t, N> v) {
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET <= HWY_AVX3
+  (void)d;
+  return Vec128<int64_t, N>{_mm_srai_epi64(v.raw, 63)};
+#elif HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_SSE4
+  return VecFromMask(v < Zero(d));
+#else
+  // Efficient Lt() requires SSE4.2 and BLENDVPD requires SSE4.1. 32-bit shift
+  // avoids generating a zero.
+  const RepartitionToNarrow<decltype(d)> d32;
+  const auto sign = ShiftRight<31>(BitCast(d32, v));
+  return Vec128<int64_t, N>{
+      _mm_shuffle_epi32(sign.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+#endif
+}
+
+// ------------------------------ Integer Abs
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+HWY_API Vec128<int8_t, N> Abs(const Vec128<int8_t, N> v) {
+#if HWY_COMPILER_MSVC || HWY_TARGET == HWY_SSE2
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto zero = Zero(du);
+  const auto v_as_u8 = BitCast(du, v);
+  return BitCast(d, Min(v_as_u8, zero - v_as_u8));
+#else
+  return Vec128<int8_t, N>{_mm_abs_epi8(v.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> Abs(const Vec128<int16_t, N> v) {
+#if HWY_TARGET == HWY_SSE2
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return Max(v, zero - v);
+#else
+  return Vec128<int16_t, N>{_mm_abs_epi16(v.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> Abs(const Vec128<int32_t, N> v) {
+#if HWY_TARGET <= HWY_SSSE3
+  return Vec128<int32_t, N>{_mm_abs_epi32(v.raw)};
+#else
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> Abs(const Vec128<int64_t, N> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int64_t, N>{_mm_abs_epi64(v.raw)};
+#else
+  const auto zero = Zero(DFromV<decltype(v)>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+// GCC <14 and Clang <11 do not follow the Intel documentation for AVX-512VL
+// srli_epi64: the count should be unsigned int. Note that this is not the same
+// as the Shift3264Count in x86_512-inl.h (GCC also requires int).
+#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
+    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400)
+using Shift64Count = int;
+#else
+// Assume documented behavior. Clang 12, GCC 14 and MSVC 14.28.29910 match this.
+using Shift64Count = unsigned int;
+#endif
+
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftRight(const Vec128<int64_t, N> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int64_t, N>{
+      _mm_srai_epi64(v.raw, static_cast<Shift64Count>(kBits))};
+#else
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto right = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto sign = ShiftLeft<64 - kBits>(BroadcastSignBit(v));
+  return right | sign;
+#endif
+}
+
+// ------------------------------ ZeroIfNegative (BroadcastSignBit)
+template <typename T, size_t N>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only works for float");
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET >= HWY_SSSE3
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+#else
+  const auto mask = MaskFromVec(v);  // MSB is sufficient for BLENDVPS
+#endif
+  return IfThenElse(mask, Zero(d), v);
+}
+
+// ------------------------------ IfNegativeThenElse
+template <size_t N>
+HWY_API Vec128<int8_t, N> IfNegativeThenElse(const Vec128<int8_t, N> v,
+                                             const Vec128<int8_t, N> yes,
+                                             const Vec128<int8_t, N> no) {
+// int8: IfThenElse only looks at the MSB on SSE4 or newer
+#if HWY_TARGET <= HWY_SSE4
+  const auto mask = MaskFromVec(v);
+#else
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+#endif
+
+  return IfThenElse(mask, yes, no);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+
+// 16-bit: no native blendv on AVX2 or earlier, so copy sign to lower byte's
+// MSB.
+#if HWY_TARGET <= HWY_AVX3
+  const auto mask = MaskFromVec(v);
+#else
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+#endif
+
+  return IfThenElse(mask, yes, no);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 8))>
+HWY_API Vec128<T, N> IfNegativeThenElse(Vec128<T, N> v, Vec128<T, N> yes,
+                                        Vec128<T, N> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+  const DFromV<decltype(v)> d;
+
+#if HWY_TARGET > HWY_AVX3 && HWY_TARGET <= HWY_SSE4
+  // 32/64-bit: use float IfThenElse on SSE4/AVX2, which only looks at the MSB
+  // on SSE4 or later.
+  const RebindToFloat<decltype(d)> df;
+  const auto mask = MaskFromVec(BitCast(df, v));
+  return BitCast(d, IfThenElse(mask, BitCast(df, yes), BitCast(df, no)));
+#else  // SSE2, SSSE3, or AVX3
+
+#if HWY_TARGET <= HWY_AVX3
+  // No need to cast to float or broadcast sign bit on AVX3 as IfThenElse only
+  // looks at the MSB on AVX3
+  (void)d;
+  const auto mask = MaskFromVec(v);
+#else
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+#endif
+
+  return IfThenElse(mask, yes, no);
+#endif
+}
+
+// ------------------------------ ShiftLeftSame
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeftSame(const Vec128<uint16_t, N> v,
+                                          const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint16_t, N>{_mm_slli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint16_t, N>{_mm_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeftSame(const Vec128<uint32_t, N> v,
+                                          const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint32_t, N>{_mm_slli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint32_t, N>{_mm_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeftSame(const Vec128<uint64_t, N> v,
+                                          const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint64_t, N>{_mm_slli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint64_t, N>{_mm_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeftSame(const Vec128<int16_t, N> v,
+                                         const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int16_t, N>{_mm_slli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec128<int16_t, N>{_mm_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeftSame(const Vec128<int32_t, N> v,
+                                         const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int32_t, N>{_mm_slli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec128<int32_t, N>{_mm_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeftSame(const Vec128<int64_t, N> v,
+                                         const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int64_t, N>{_mm_slli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec128<int64_t, N>{_mm_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, const int bits) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{
+      ShiftLeftSame(Vec128<MakeWide<T>>{v.raw}, bits).raw};
+  return shifted & Set(d8, static_cast<T>((0xFF << bits) & 0xFF));
+}
+
+// ------------------------------ ShiftRightSame (BroadcastSignBit)
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRightSame(const Vec128<uint16_t, N> v,
+                                           const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint16_t, N>{_mm_srli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint16_t, N>{_mm_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRightSame(const Vec128<uint32_t, N> v,
+                                           const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint32_t, N>{_mm_srli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint32_t, N>{_mm_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRightSame(const Vec128<uint64_t, N> v,
+                                           const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<uint64_t, N>{_mm_srli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec128<uint64_t, N>{_mm_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRightSame(Vec128<uint8_t, N> v,
+                                          const int bits) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRightSame(Vec128<uint16_t>{v.raw}, bits).raw};
+  return shifted & Set(d8, static_cast<uint8_t>(0xFF >> bits));
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftRightSame(const Vec128<int16_t, N> v,
+                                          const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int16_t, N>{_mm_srai_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec128<int16_t, N>{_mm_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftRightSame(const Vec128<int32_t, N> v,
+                                          const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int32_t, N>{_mm_srai_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec128<int32_t, N>{_mm_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftRightSame(const Vec128<int64_t, N> v,
+                                          const int bits) {
+#if HWY_TARGET <= HWY_AVX3
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec128<int64_t, N>{
+        _mm_srai_epi64(v.raw, static_cast<Shift64Count>(bits))};
+  }
+#endif
+  return Vec128<int64_t, N>{_mm_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+#else
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto right = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto sign = ShiftLeftSame(BroadcastSignBit(v), 64 - bits);
+  return right | sign;
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> ShiftRightSame(Vec128<int8_t, N> v, const int bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign =
+      BitCast(di, Set(du, static_cast<uint8_t>(0x80 >> bits)));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Floating-point mul / div
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> operator*(Vec128<float16_t, N> a,
+                                       Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_mul_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> operator*(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<float, 1> operator*(const Vec128<float, 1> a,
+                                   const Vec128<float, 1> b) {
+  return Vec128<float, 1>{_mm_mul_ss(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator*(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_mul_pd(a.raw, b.raw)};
+}
+HWY_API Vec64<double> operator*(const Vec64<double> a, const Vec64<double> b) {
+  return Vec64<double>{_mm_mul_sd(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> operator/(const Vec128<float16_t, N> a,
+                                       const Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_div_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> operator/(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<float, 1> operator/(const Vec128<float, 1> a,
+                                   const Vec128<float, 1> b) {
+  return Vec128<float, 1>{_mm_div_ss(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator/(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_div_pd(a.raw, b.raw)};
+}
+HWY_API Vec64<double> operator/(const Vec64<double> a, const Vec64<double> b) {
+  return Vec64<double>{_mm_div_sd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> ApproximateReciprocal(
+    const Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{_mm_rcp_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocal(const Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_rcp_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> ApproximateReciprocal(const Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_rcp_ss(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+#ifdef HWY_NATIVE_F64_APPROX_RECIP
+#undef HWY_NATIVE_F64_APPROX_RECIP
+#else
+#define HWY_NATIVE_F64_APPROX_RECIP
+#endif
+
+HWY_API Vec128<double> ApproximateReciprocal(Vec128<double> v) {
+  return Vec128<double>{_mm_rcp14_pd(v.raw)};
+}
+HWY_API Vec64<double> ApproximateReciprocal(Vec64<double> v) {
+  return Vec64<double>{_mm_rcp14_sd(v.raw, v.raw)};
+}
+#endif
+
+// Generic for all vector lengths.
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V AbsDiff(V a, V b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> MulAdd(Vec128<float16_t, N> mul,
+                                    Vec128<float16_t, N> x,
+                                    Vec128<float16_t, N> add) {
+  return Vec128<float16_t, N>{_mm_fmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float16_t, N> NegMulAdd(Vec128<float16_t, N> mul,
+                                       Vec128<float16_t, N> x,
+                                       Vec128<float16_t, N> add) {
+  return Vec128<float16_t, N>{_mm_fnmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float16_t, N> MulSub(Vec128<float16_t, N> mul,
+                                    Vec128<float16_t, N> x,
+                                    Vec128<float16_t, N> sub) {
+  return Vec128<float16_t, N>{_mm_fmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float16_t, N> NegMulSub(Vec128<float16_t, N> mul,
+                                       Vec128<float16_t, N> x,
+                                       Vec128<float16_t, N> sub) {
+  return Vec128<float16_t, N>{_mm_fnmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> MulAdd(Vec128<float, N> mul, Vec128<float, N> x,
+                                Vec128<float, N> add) {
+#if HWY_TARGET >= HWY_SSE4
+  return mul * x + add;
+#else
+  return Vec128<float, N>{_mm_fmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> MulAdd(Vec128<double, N> mul, Vec128<double, N> x,
+                                 Vec128<double, N> add) {
+#if HWY_TARGET >= HWY_SSE4
+  return mul * x + add;
+#else
+  return Vec128<double, N>{_mm_fmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns add - mul * x
+template <size_t N>
+HWY_API Vec128<float, N> NegMulAdd(Vec128<float, N> mul, Vec128<float, N> x,
+                                   Vec128<float, N> add) {
+#if HWY_TARGET >= HWY_SSE4
+  return add - mul * x;
+#else
+  return Vec128<float, N>{_mm_fnmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> NegMulAdd(Vec128<double, N> mul, Vec128<double, N> x,
+                                    Vec128<double, N> add) {
+#if HWY_TARGET >= HWY_SSE4
+  return add - mul * x;
+#else
+  return Vec128<double, N>{_mm_fnmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> MulSub(Vec128<float, N> mul, Vec128<float, N> x,
+                                Vec128<float, N> sub) {
+#if HWY_TARGET >= HWY_SSE4
+  return mul * x - sub;
+#else
+  return Vec128<float, N>{_mm_fmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> MulSub(Vec128<double, N> mul, Vec128<double, N> x,
+                                 Vec128<double, N> sub) {
+#if HWY_TARGET >= HWY_SSE4
+  return mul * x - sub;
+#else
+  return Vec128<double, N>{_mm_fmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> NegMulSub(Vec128<float, N> mul, Vec128<float, N> x,
+                                   Vec128<float, N> sub) {
+#if HWY_TARGET >= HWY_SSE4
+  return Neg(mul) * x - sub;
+#else
+  return Vec128<float, N>{_mm_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> NegMulSub(Vec128<double, N> mul, Vec128<double, N> x,
+                                    Vec128<double, N> sub) {
+#if HWY_TARGET >= HWY_SSE4
+  return Neg(mul) * x - sub;
+#else
+  return Vec128<double, N>{_mm_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Sqrt(Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{_mm_sqrt_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Sqrt(Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_sqrt_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> Sqrt(Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_sqrt_ss(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Sqrt(Vec128<double, N> v) {
+  return Vec128<double, N>{_mm_sqrt_pd(v.raw)};
+}
+HWY_API Vec64<double> Sqrt(Vec64<double> v) {
+  return Vec64<double>{_mm_sqrt_sd(_mm_setzero_pd(), v.raw)};
+}
+
+// Approximate reciprocal square root
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> ApproximateReciprocalSqrt(Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{_mm_rsqrt_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocalSqrt(Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_rsqrt_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> ApproximateReciprocalSqrt(Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_rsqrt_ss(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+#ifdef HWY_NATIVE_F64_APPROX_RSQRT
+#undef HWY_NATIVE_F64_APPROX_RSQRT
+#else
+#define HWY_NATIVE_F64_APPROX_RSQRT
+#endif
+
+HWY_API Vec64<double> ApproximateReciprocalSqrt(Vec64<double> v) {
+  return Vec64<double>{_mm_rsqrt14_sd(v.raw, v.raw)};
+}
+HWY_API Vec128<double> ApproximateReciprocalSqrt(Vec128<double> v) {
+#if HWY_COMPILER_MSVC
+  const DFromV<decltype(v)> d;
+  return Vec128<double>{_mm_mask_rsqrt14_pd(
+      Undefined(d).raw, static_cast<__mmask8>(0xFF), v.raw)};
+#else
+  return Vec128<double>{_mm_rsqrt14_pd(v.raw)};
+#endif
+}
+#endif
+
+// ------------------------------ Min (Gt, IfThenElse)
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE HWY_MAYBE_UNUSED Vec128<T, N> MinU(const Vec128<T, N> a,
+                                              const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;
+  const auto msb = Set(du, static_cast<T>(T(1) << (sizeof(T) * 8 - 1)));
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, b, a);
+}
+
+}  // namespace detail
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Min(Vec128<uint8_t, N> a, Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_min_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Min(Vec128<uint16_t, N> a, Vec128<uint16_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return detail::MinU(a, b);
+#else
+  return Vec128<uint16_t, N>{_mm_min_epu16(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Min(Vec128<uint32_t, N> a, Vec128<uint32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return detail::MinU(a, b);
+#else
+  return Vec128<uint32_t, N>{_mm_min_epu32(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Min(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint64_t, N>{_mm_min_epu64(a.raw, b.raw)};
+#else
+  return detail::MinU(a, b);
+#endif
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Min(Vec128<int8_t, N> a, Vec128<int8_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return IfThenElse(a < b, a, b);
+#else
+  return Vec128<int8_t, N>{_mm_min_epi8(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Min(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_min_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Min(Vec128<int32_t, N> a, Vec128<int32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return IfThenElse(a < b, a, b);
+#else
+  return Vec128<int32_t, N>{_mm_min_epi32(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Min(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int64_t, N>{_mm_min_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, a, b);
+#endif
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Min(Vec128<float16_t, N> a,
+                                 Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_min_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Min(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_min_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Min(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Max (Gt, IfThenElse)
+
+namespace detail {
+template <typename T, size_t N>
+HWY_INLINE HWY_MAYBE_UNUSED Vec128<T, N> MaxU(const Vec128<T, N> a,
+                                              const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;
+  const auto msb = Set(du, static_cast<T>(T(1) << (sizeof(T) * 8 - 1)));
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, a, b);
+}
+
+}  // namespace detail
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Max(Vec128<uint8_t, N> a, Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_max_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Max(Vec128<uint16_t, N> a, Vec128<uint16_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return detail::MaxU(a, b);
+#else
+  return Vec128<uint16_t, N>{_mm_max_epu16(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Max(Vec128<uint32_t, N> a, Vec128<uint32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return detail::MaxU(a, b);
+#else
+  return Vec128<uint32_t, N>{_mm_max_epu32(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Max(Vec128<uint64_t, N> a, Vec128<uint64_t, N> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint64_t, N>{_mm_max_epu64(a.raw, b.raw)};
+#else
+  return detail::MaxU(a, b);
+#endif
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Max(Vec128<int8_t, N> a, Vec128<int8_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return IfThenElse(a < b, b, a);
+#else
+  return Vec128<int8_t, N>{_mm_max_epi8(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Max(Vec128<int16_t, N> a, Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_max_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Max(Vec128<int32_t, N> a, Vec128<int32_t, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  return IfThenElse(a < b, b, a);
+#else
+  return Vec128<int32_t, N>{_mm_max_epi32(a.raw, b.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Max(Vec128<int64_t, N> a, Vec128<int64_t, N> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int64_t, N>{_mm_max_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, b, a);
+#endif
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Max(Vec128<float16_t, N> a,
+                                 Vec128<float16_t, N> b) {
+  return Vec128<float16_t, N>{_mm_max_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Max(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_max_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Max(Vec128<double, N> a, Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_max_pd(a.raw, b.raw)};
+}
+
+// ================================================== MEMORY (3)
+
+// ------------------------------ Non-temporal stores
+
+// On clang6, we see incorrect code generated for _mm_stream_pi, so
+// round even partial vectors up to 16 bytes.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API void Stream(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm_stream_si128(reinterpret_cast<__m128i*>(aligned), BitCast(du, v).raw);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API void Stream(VFromD<D> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm_stream_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API void Stream(VFromD<D> v, D /* tag */, double* HWY_RESTRICT aligned) {
+  _mm_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Scatter
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+// Unfortunately the GCC/Clang intrinsics do not accept int64_t*.
+using GatherIndex64 = long long int;  // NOLINT(runtime/int)
+static_assert(sizeof(GatherIndex64) == 8, "Must be 64-bit type");
+
+#if HWY_TARGET <= HWY_AVX3
+
+#ifdef HWY_NATIVE_SCATTER
+#undef HWY_NATIVE_SCATTER
+#else
+#define HWY_NATIVE_SCATTER
+#endif
+
+namespace detail {
+
+template <int kScale, class D, class VI, HWY_IF_UI32_D(D)>
+HWY_INLINE void NativeScatter128(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                                 VI index) {
+  if (d.MaxBytes() == 16) {
+    _mm_i32scatter_epi32(base, index.raw, v.raw, kScale);
+  } else {
+    const __mmask8 mask = (1u << MaxLanes(d)) - 1;
+    _mm_mask_i32scatter_epi32(base, mask, index.raw, v.raw, kScale);
+  }
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI64_D(D)>
+HWY_INLINE void NativeScatter128(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                                 VI index) {
+  if (d.MaxBytes() == 16) {
+    _mm_i64scatter_epi64(base, index.raw, v.raw, kScale);
+  } else {
+    const __mmask8 mask = (1u << MaxLanes(d)) - 1;
+    _mm_mask_i64scatter_epi64(base, mask, index.raw, v.raw, kScale);
+  }
+}
+
+template <int kScale, class D, class VI, HWY_IF_F32_D(D)>
+HWY_INLINE void NativeScatter128(VFromD<D> v, D d, float* HWY_RESTRICT base,
+                                 VI index) {
+  if (d.MaxBytes() == 16) {
+    _mm_i32scatter_ps(base, index.raw, v.raw, kScale);
+  } else {
+    const __mmask8 mask = (1u << MaxLanes(d)) - 1;
+    _mm_mask_i32scatter_ps(base, mask, index.raw, v.raw, kScale);
+  }
+}
+
+template <int kScale, class D, class VI, HWY_IF_F64_D(D)>
+HWY_INLINE void NativeScatter128(VFromD<D> v, D d, double* HWY_RESTRICT base,
+                                 VI index) {
+  if (d.MaxBytes() == 16) {
+    _mm_i64scatter_pd(base, index.raw, v.raw, kScale);
+  } else {
+    const __mmask8 mask = (1u << MaxLanes(d)) - 1;
+    _mm_mask_i64scatter_pd(base, mask, index.raw, v.raw, kScale);
+  }
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI32_D(D)>
+HWY_INLINE void NativeMaskedScatter128(VFromD<D> v, MFromD<D> m, D d,
+                                       TFromD<D>* HWY_RESTRICT base, VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+  _mm_mask_i32scatter_epi32(base, m.raw, index.raw, v.raw, kScale);
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI64_D(D)>
+HWY_INLINE void NativeMaskedScatter128(VFromD<D> v, MFromD<D> m, D d,
+                                       TFromD<D>* HWY_RESTRICT base, VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+  _mm_mask_i64scatter_epi64(base, m.raw, index.raw, v.raw, kScale);
+}
+
+template <int kScale, class D, class VI, HWY_IF_F32_D(D)>
+HWY_INLINE void NativeMaskedScatter128(VFromD<D> v, MFromD<D> m, D d,
+                                       float* HWY_RESTRICT base, VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+  _mm_mask_i32scatter_ps(base, m.raw, index.raw, v.raw, kScale);
+}
+
+template <int kScale, class D, class VI, HWY_IF_F64_D(D)>
+HWY_INLINE void NativeMaskedScatter128(VFromD<D> v, MFromD<D> m, D d,
+                                       double* HWY_RESTRICT base, VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+  _mm_mask_i64scatter_pd(base, m.raw, index.raw, v.raw, kScale);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API void ScatterOffset(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                           VFromD<RebindToSigned<D>> offset) {
+  return detail::NativeScatter128<1>(v, d, base, offset);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  return detail::NativeScatter128<sizeof(TFromD<D>)>(v, d, base, index);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D d,
+                                TFromD<D>* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  return detail::NativeMaskedScatter128<sizeof(TFromD<D>)>(v, m, d, base,
+                                                           index);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Gather (Load/Store)
+
+#if HWY_TARGET <= HWY_AVX2
+
+#ifdef HWY_NATIVE_GATHER
+#undef HWY_NATIVE_GATHER
+#else
+#define HWY_NATIVE_GATHER
+#endif
+
+namespace detail {
+
+template <int kScale, class D, class VI, HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> NativeGather128(D /* tag */,
+                                     const TFromD<D>* HWY_RESTRICT base,
+                                     VI index) {
+  return VFromD<D>{_mm_i32gather_epi32(reinterpret_cast<const int32_t*>(base),
+                                       index.raw, kScale)};
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> NativeGather128(D /* tag */,
+                                     const TFromD<D>* HWY_RESTRICT base,
+                                     VI index) {
+  return VFromD<D>{_mm_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, kScale)};
+}
+
+template <int kScale, class D, class VI, HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> NativeGather128(D /* tag */,
+                                     const float* HWY_RESTRICT base, VI index) {
+  return VFromD<D>{_mm_i32gather_ps(base, index.raw, kScale)};
+}
+
+template <int kScale, class D, class VI, HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> NativeGather128(D /* tag */,
+                                     const double* HWY_RESTRICT base,
+                                     VI index) {
+  return VFromD<D>{_mm_i64gather_pd(base, index.raw, kScale)};
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> NativeMaskedGather128(MFromD<D> m, D d,
+                                           const TFromD<D>* HWY_RESTRICT base,
+                                           VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{_mm_mmask_i32gather_epi32(
+      Zero(d).raw, m.raw, index.raw, reinterpret_cast<const int32_t*>(base),
+      kScale)};
+#else
+  return VFromD<D>{_mm_mask_i32gather_epi32(
+      Zero(d).raw, reinterpret_cast<const int32_t*>(base), index.raw, m.raw,
+      kScale)};
+#endif
+}
+
+template <int kScale, class D, class VI, HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> NativeMaskedGather128(MFromD<D> m, D d,
+                                           const TFromD<D>* HWY_RESTRICT base,
+                                           VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{_mm_mmask_i64gather_epi64(
+      Zero(d).raw, m.raw, index.raw,
+      reinterpret_cast<const GatherIndex64*>(base), kScale)};
+#else
+  return VFromD<D>{_mm_mask_i64gather_epi64(
+      Zero(d).raw, reinterpret_cast<const GatherIndex64*>(base), index.raw,
+      m.raw, kScale)};
+#endif
+}
+
+template <int kScale, class D, class VI, HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> NativeMaskedGather128(MFromD<D> m, D d,
+                                           const float* HWY_RESTRICT base,
+                                           VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{
+      _mm_mmask_i32gather_ps(Zero(d).raw, m.raw, index.raw, base, kScale)};
+#else
+  return VFromD<D>{
+      _mm_mask_i32gather_ps(Zero(d).raw, base, index.raw, m.raw, kScale)};
+#endif
+}
+
+template <int kScale, class D, class VI, HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> NativeMaskedGather128(MFromD<D> m, D d,
+                                           const double* HWY_RESTRICT base,
+                                           VI index) {
+  // For partial vectors, ensure upper mask lanes are zero to prevent faults.
+  if (!detail::IsFull(d)) m = And(m, FirstN(d, Lanes(d)));
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{
+      _mm_mmask_i64gather_pd(Zero(d).raw, m.raw, index.raw, base, kScale)};
+#else
+  return VFromD<D>{
+      _mm_mask_i64gather_pd(Zero(d).raw, base, index.raw, m.raw, kScale)};
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), typename T = TFromD<D>, class VI>
+HWY_API VFromD<D> GatherOffset(D d, const T* HWY_RESTRICT base, VI offset) {
+  static_assert(sizeof(T) == sizeof(TFromV<VI>), "Index/lane size must match");
+  return detail::NativeGather128<1>(d, base, offset);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), typename T = TFromD<D>, class VI>
+HWY_API VFromD<D> GatherIndex(D d, const T* HWY_RESTRICT base, VI index) {
+  static_assert(sizeof(T) == sizeof(TFromV<VI>), "Index/lane size must match");
+  return detail::NativeGather128<sizeof(T)>(d, base, index);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), typename T = TFromD<D>, class VI>
+HWY_API VFromD<D> MaskedGatherIndex(MFromD<D> m, D d,
+                                    const T* HWY_RESTRICT base, VI index) {
+  static_assert(sizeof(T) == sizeof(TFromV<VI>), "Index/lane size must match");
+  return detail::NativeMaskedGather128<sizeof(T)>(m, d, base, index);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX2
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE (2)
+
+// ------------------------------ LowerHalf
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{v.raw};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>{v.raw};
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ShiftLeftBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm_slli_si128(BitCast(du, v).raw, kBytes)});
+}
+
+// Generic for all vector lengths.
+template <int kBytes, class V>
+HWY_API V ShiftLeftBytes(const V v) {
+  return ShiftLeftBytes<kBytes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftLeftLanes
+
+// Generic for all vector lengths.
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftLeftLanes(D d, const VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(TFromD<D>)>(BitCast(d8, v)));
+}
+
+// Generic for all vector lengths.
+template <int kLanes, class V>
+HWY_API V ShiftLeftLanes(const V v) {
+  return ShiftLeftLanes<kLanes>(DFromV<decltype(v)>(), v);
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ShiftRightBytes(D d, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const RebindToUnsigned<decltype(d)> du;
+  // For partial vectors, clear upper lanes so we shift in zeros.
+  if (d.MaxBytes() != 16) {
+    const Full128<TFromD<D>> dfull;
+    const VFromD<decltype(dfull)> vfull{v.raw};
+    v = VFromD<D>{IfThenElseZero(FirstN(dfull, MaxLanes(d)), vfull).raw};
+  }
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm_srli_si128(BitCast(du, v).raw, kBytes)});
+}
+
+// ------------------------------ ShiftRightLanes
+// Generic for all vector lengths.
+template <int kLanes, class D>
+HWY_API VFromD<D> ShiftRightLanes(D d, const VFromD<D> v) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  constexpr size_t kBytes = kLanes * sizeof(TFromD<D>);
+  return BitCast(d, ShiftRightBytes<kBytes>(d8, BitCast(d8, v)));
+}
+
+// ------------------------------ UpperHalf (ShiftRightBytes)
+
+// Full input: copy hi into lo (smaller instruction encoding than shifts).
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  const Twice<RebindToUnsigned<decltype(d)>> dut;
+  using VUT = VFromD<decltype(dut)>;  // for float16_t
+  const VUT vut = BitCast(dut, v);
+  return BitCast(d, LowerHalf(VUT{_mm_unpackhi_epi64(vut.raw, vut.raw)}));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API Vec64<float> UpperHalf(D /* tag */, Vec128<float> v) {
+  return Vec64<float>{_mm_movehl_ps(v.raw, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_F64_D(D)>
+HWY_API Vec64<double> UpperHalf(D /* tag */, Vec128<double> v) {
+  return Vec64<double>{_mm_unpackhi_pd(v.raw, v.raw)};
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  return LowerHalf(d, ShiftRightBytes<d.MaxBytes()>(Twice<D>(), v));
+}
+
+// ------------------------------ ExtractLane (UpperHalf)
+
+namespace detail {
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  const int pair = _mm_extract_epi16(v.raw, kLane / 2);
+  constexpr int kShift = kLane & 1 ? 8 : 0;
+  return static_cast<T>((pair >> kShift) & 0xFF);
+#else
+  return static_cast<T>(_mm_extract_epi8(v.raw, kLane) & 0xFF);
+#endif
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const uint16_t lane = static_cast<uint16_t>(
+      _mm_extract_epi16(BitCast(du, v).raw, kLane) & 0xFFFF);
+  T ret;
+  CopySameSize(&lane, &ret);  // for float16_t
+  return ret;
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_UI32(T)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  return static_cast<T>(_mm_cvtsi128_si32(
+      (kLane == 0) ? v.raw : _mm_shuffle_epi32(v.raw, kLane)));
+#else
+  return static_cast<T>(_mm_extract_epi32(v.raw, kLane));
+#endif
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_UI64(T)>
+HWY_INLINE T ExtractLane(const Vec128<T, N> v) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_ARCH_X86_32
+  alignas(16) T lanes[2];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[kLane];
+#elif HWY_TARGET >= HWY_SSSE3
+  return static_cast<T>(
+      _mm_cvtsi128_si64((kLane == 0) ? v.raw : _mm_shuffle_epi32(v.raw, 0xEE)));
+#else
+  return static_cast<T>(_mm_extract_epi64(v.raw, kLane));
+#endif
+}
+
+template <size_t kLane, size_t N>
+HWY_INLINE float ExtractLane(const Vec128<float, N> v) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  return _mm_cvtss_f32((kLane == 0) ? v.raw
+                                    : _mm_shuffle_ps(v.raw, v.raw, kLane));
+#else
+  // Bug in the intrinsic, returns int but should be float.
+  const int32_t bits = _mm_extract_ps(v.raw, kLane);
+  float ret;
+  CopySameSize(&bits, &ret);
+  return ret;
+#endif
+}
+
+// There is no extract_pd; two overloads because there is no UpperHalf for N=1.
+template <size_t kLane>
+HWY_INLINE double ExtractLane(const Vec64<double> v) {
+  static_assert(kLane == 0, "Lane index out of bounds");
+  return GetLane(v);
+}
+
+template <size_t kLane>
+HWY_INLINE double ExtractLane(const Vec128<double> v) {
+  static_assert(kLane < 2, "Lane index out of bounds");
+  const Half<DFromV<decltype(v)>> dh;
+  return kLane == 0 ? GetLane(v) : GetLane(UpperHalf(dh, v));
+}
+
+}  // namespace detail
+
+// Requires one overload per vector length because ExtractLane<3> may be a
+// compile error if it calls _mm_extract_epi64.
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 1> v, size_t i) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return GetLane(v);
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 2> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[2];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 4> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[4];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 8> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+      case 4:
+        return detail::ExtractLane<4>(v);
+      case 5:
+        return detail::ExtractLane<5>(v);
+      case 6:
+        return detail::ExtractLane<6>(v);
+      case 7:
+        return detail::ExtractLane<7>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[8];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+template <typename T>
+HWY_API T ExtractLane(const Vec128<T, 16> v, size_t i) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::ExtractLane<0>(v);
+      case 1:
+        return detail::ExtractLane<1>(v);
+      case 2:
+        return detail::ExtractLane<2>(v);
+      case 3:
+        return detail::ExtractLane<3>(v);
+      case 4:
+        return detail::ExtractLane<4>(v);
+      case 5:
+        return detail::ExtractLane<5>(v);
+      case 6:
+        return detail::ExtractLane<6>(v);
+      case 7:
+        return detail::ExtractLane<7>(v);
+      case 8:
+        return detail::ExtractLane<8>(v);
+      case 9:
+        return detail::ExtractLane<9>(v);
+      case 10:
+        return detail::ExtractLane<10>(v);
+      case 11:
+        return detail::ExtractLane<11>(v);
+      case 12:
+        return detail::ExtractLane<12>(v);
+      case 13:
+        return detail::ExtractLane<13>(v);
+      case 14:
+        return detail::ExtractLane<14>(v);
+      case 15:
+        return detail::ExtractLane<15>(v);
+    }
+  }
+#endif
+  alignas(16) T lanes[16];
+  Store(v, DFromV<decltype(v)>(), lanes);
+  return lanes[i];
+}
+
+// ------------------------------ InsertLane (UpperHalf)
+
+namespace detail {
+
+template <class V>
+HWY_INLINE V InsertLaneUsingBroadcastAndBlend(V v, size_t i, TFromV<V> t) {
+  const DFromV<decltype(v)> d;
+
+#if HWY_TARGET <= HWY_AVX3
+  using RawMask = decltype(MaskFromVec(VFromD<decltype(d)>()).raw);
+  const auto mask = MFromD<decltype(d)>{static_cast<RawMask>(uint64_t{1} << i)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const auto mask = RebindMask(d, Iota(du, 0) == Set(du, static_cast<TU>(i)));
+#endif
+
+  return IfThenElse(mask, Set(d, t), v);
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  return InsertLaneUsingBroadcastAndBlend(v, kLane, t);
+#else
+  return Vec128<T, N>{_mm_insert_epi8(v.raw, t, kLane)};
+#endif
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  uint16_t bits;
+  CopySameSize(&t, &bits);  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{
+                        _mm_insert_epi16(BitCast(du, v).raw, bits, kLane)});
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_UI32(T)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  return InsertLaneUsingBroadcastAndBlend(v, kLane, t);
+#else
+  MakeSigned<T> ti;
+  CopySameSize(&t, &ti);  // don't just cast because T might be float.
+  return Vec128<T, N>{_mm_insert_epi32(v.raw, ti, kLane)};
+#endif
+}
+
+template <size_t kLane, typename T, size_t N, HWY_IF_UI64(T)>
+HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3 || HWY_ARCH_X86_32
+  const DFromV<decltype(v)> d;
+  const RebindToFloat<decltype(d)> df;
+  const auto vt = BitCast(df, Set(d, t));
+  if (kLane == 0) {
+    return BitCast(
+        d, Vec128<double, N>{_mm_shuffle_pd(vt.raw, BitCast(df, v).raw, 2)});
+  }
+  return BitCast(
+      d, Vec128<double, N>{_mm_shuffle_pd(BitCast(df, v).raw, vt.raw, 0)});
+#else
+  MakeSigned<T> ti;
+  CopySameSize(&t, &ti);  // don't just cast because T might be float.
+  return Vec128<T, N>{_mm_insert_epi64(v.raw, ti, kLane)};
+#endif
+}
+
+template <size_t kLane, size_t N>
+HWY_INLINE Vec128<float, N> InsertLane(const Vec128<float, N> v, float t) {
+  static_assert(kLane < N, "Lane index out of bounds");
+#if HWY_TARGET >= HWY_SSSE3
+  return InsertLaneUsingBroadcastAndBlend(v, kLane, t);
+#else
+  return Vec128<float, N>{_mm_insert_ps(v.raw, _mm_set_ss(t), kLane << 4)};
+#endif
+}
+
+// There is no insert_pd; two overloads because there is no UpperHalf for N=1.
+template <size_t kLane>
+HWY_INLINE Vec128<double, 1> InsertLane(const Vec128<double, 1> v, double t) {
+  static_assert(kLane == 0, "Lane index out of bounds");
+  return Set(DFromV<decltype(v)>(), t);
+}
+
+template <size_t kLane>
+HWY_INLINE Vec128<double> InsertLane(const Vec128<double> v, double t) {
+  static_assert(kLane < 2, "Lane index out of bounds");
+  const DFromV<decltype(v)> d;
+  const Vec128<double> vt = Set(d, t);
+  if (kLane == 0) {
+    return Vec128<double>{_mm_shuffle_pd(vt.raw, v.raw, 2)};
+  }
+  return Vec128<double>{_mm_shuffle_pd(v.raw, vt.raw, 0)};
+}
+
+}  // namespace detail
+
+// Requires one overload per vector length because InsertLane<3> may be a
+// compile error if it calls _mm_insert_epi64.
+
+template <typename T>
+HWY_API Vec128<T, 1> InsertLane(const Vec128<T, 1> v, size_t i, T t) {
+  HWY_DASSERT(i == 0);
+  (void)i;
+  return Set(DFromV<decltype(v)>(), t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 2> InsertLane(const Vec128<T, 2> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+    }
+  }
+#endif
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 4> InsertLane(const Vec128<T, 4> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+    }
+  }
+#endif
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 8> InsertLane(const Vec128<T, 8> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+    }
+  }
+#endif
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+template <typename T>
+HWY_API Vec128<T, 16> InsertLane(const Vec128<T, 16> v, size_t i, T t) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(i)) {
+    switch (i) {
+      case 0:
+        return detail::InsertLane<0>(v, t);
+      case 1:
+        return detail::InsertLane<1>(v, t);
+      case 2:
+        return detail::InsertLane<2>(v, t);
+      case 3:
+        return detail::InsertLane<3>(v, t);
+      case 4:
+        return detail::InsertLane<4>(v, t);
+      case 5:
+        return detail::InsertLane<5>(v, t);
+      case 6:
+        return detail::InsertLane<6>(v, t);
+      case 7:
+        return detail::InsertLane<7>(v, t);
+      case 8:
+        return detail::InsertLane<8>(v, t);
+      case 9:
+        return detail::InsertLane<9>(v, t);
+      case 10:
+        return detail::InsertLane<10>(v, t);
+      case 11:
+        return detail::InsertLane<11>(v, t);
+      case 12:
+        return detail::InsertLane<12>(v, t);
+      case 13:
+        return detail::InsertLane<13>(v, t);
+      case 14:
+        return detail::InsertLane<14>(v, t);
+      case 15:
+        return detail::InsertLane<15>(v, t);
+    }
+  }
+#endif
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+#if HWY_TARGET == HWY_SSE2
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  static_assert(0 < kBytes && kBytes < 16, "kBytes invalid");
+  return Or(ShiftRightBytes<kBytes>(d, lo), ShiftLeftBytes<16 - kBytes>(d, hi));
+}
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+
+  const Twice<decltype(d)> dt;
+  return VFromD<D>{ShiftRightBytes<kBytes>(dt, Combine(dt, hi, lo)).raw};
+}
+#else
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Vec128<uint8_t>{_mm_alignr_epi8(
+                        BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)});
+}
+
+template <int kBytes, class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  constexpr size_t kSize = d.MaxBytes();
+  static_assert(0 < kBytes && kBytes < kSize, "kBytes invalid");
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = Vec128<uint8_t>;
+  const DFromV<V8> dfull8;
+  const Repartition<TFromD<D>, decltype(dfull8)> dfull;
+  const V8 hi8{BitCast(d8, hi).raw};
+  // Move into most-significant bytes
+  const V8 lo8 = ShiftLeftBytes<16 - kSize>(V8{BitCast(d8, lo).raw});
+  const V8 r = CombineShiftRightBytes<16 - kSize + kBytes>(dfull8, hi8, lo8);
+  return VFromD<D>{BitCast(dfull, r).raw};
+}
+#endif
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  if (kLane < 4) {
+    const __m128i lo = _mm_shufflelo_epi16(vu.raw, (0x55 * kLane) & 0xFF);
+    return BitCast(d, VU{_mm_unpacklo_epi64(lo, lo)});
+  } else {
+    const __m128i hi = _mm_shufflehi_epi16(vu.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return BitCast(d, VU{_mm_unpackhi_epi64(hi, hi)});
+  }
+}
+
+template <int kLane, typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{_mm_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+
+template <int kLane, typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<T, N>{_mm_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+template <int kLane, size_t N>
+HWY_API Vec128<float, N> Broadcast(const Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>{_mm_shuffle_ps(v.raw, v.raw, 0x55 * kLane)};
+}
+
+template <int kLane, size_t N>
+HWY_API Vec128<double, N> Broadcast(const Vec128<double, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<double, N>{_mm_shuffle_pd(v.raw, v.raw, 3 * kLane)};
+}
+
+// ------------------------------ TableLookupLanes (Shuffle01)
+
+// Returned by SetTableIndices/IndicesFromVec for use by TableLookupLanes.
+template <typename T, size_t N = 16 / sizeof(T)>
+struct Indices128 {
+  __m128i raw;
+};
+
+template <class D, typename T = TFromD<D>, typename TI, size_t kN,
+          HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE(T, 1)>
+HWY_API Indices128<T, kN> IndicesFromVec(D d, Vec128<TI, kN> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Rebind<TI, decltype(d)> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, kN * 2))));
+#endif
+
+  // No change as byte indices are always used for 8-bit lane types
+  (void)d;
+  return Indices128<T, kN>{vec.raw};
+}
+
+template <class D, typename T = TFromD<D>, typename TI, size_t kN,
+          HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE(T, 2)>
+HWY_API Indices128<T, kN> IndicesFromVec(D d, Vec128<TI, kN> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Rebind<TI, decltype(d)> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, kN * 2))));
+#endif
+
+#if HWY_TARGET <= HWY_AVX3 || HWY_TARGET == HWY_SSE2
+  (void)d;
+  return Indices128<T, kN>{vec.raw};
+#else   // SSSE3, SSE4, or AVX2
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = VFromD<decltype(d8)>;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+
+  // Broadcast each lane index to all 4 bytes of T
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+  const V8 lane_indices = TableLookupBytes(vec, Load(d8, kBroadcastLaneBytes));
+
+  // Shift to bytes
+  const Repartition<uint16_t, decltype(d)> d16;
+  const V8 byte_indices = BitCast(d8, ShiftLeft<1>(BitCast(d16, lane_indices)));
+
+  return Indices128<T, kN>{Add(byte_indices, Load(d8, kByteOffsets)).raw};
+#endif  // HWY_TARGET <= HWY_AVX3 || HWY_TARGET == HWY_SSE2
+}
+
+template <class D, typename T = TFromD<D>, typename TI, size_t kN,
+          HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE(T, 4)>
+HWY_API Indices128<T, kN> IndicesFromVec(D d, Vec128<TI, kN> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Rebind<TI, decltype(d)> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, kN * 2))));
+#endif
+
+#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SSE2
+  (void)d;
+  return Indices128<T, kN>{vec.raw};
+#else
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = VFromD<decltype(d8)>;
+  alignas(16) static constexpr uint8_t kByteOffsets[16] = {
+      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+
+  // Broadcast each lane index to all 4 bytes of T
+  alignas(16) static constexpr uint8_t kBroadcastLaneBytes[16] = {
+      0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+  const V8 lane_indices = TableLookupBytes(vec, Load(d8, kBroadcastLaneBytes));
+
+  // Shift to bytes
+  const Repartition<uint16_t, decltype(d)> d16;
+  const V8 byte_indices = BitCast(d8, ShiftLeft<2>(BitCast(d16, lane_indices)));
+
+  return Indices128<T, kN>{Add(byte_indices, Load(d8, kByteOffsets)).raw};
+#endif
+}
+
+template <class D, typename T = TFromD<D>, typename TI, size_t kN,
+          HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE(T, 8)>
+HWY_API Indices128<T, kN> IndicesFromVec(D d, Vec128<TI, kN> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Rebind<TI, decltype(d)> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, static_cast<TI>(kN * 2)))));
+#else
+  (void)d;
+#endif
+
+  // No change - even without AVX3, we can shuffle+blend.
+  return Indices128<T, kN>{vec.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), typename TI>
+HWY_API Indices128<TFromD<D>, HWY_MAX_LANES_D(D)> SetTableIndices(
+    D d, const TI* idx) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+  return TableLookupBytes(v, Vec128<T, N>{idx.raw});
+}
+
+template <typename T, size_t N, HWY_IF_UI16(T)>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return {_mm_permutexvar_epi16(idx.raw, v.raw)};
+#elif HWY_TARGET == HWY_SSE2
+#if HWY_COMPILER_GCC_ACTUAL && HWY_HAS_BUILTIN(__builtin_shuffle)
+  typedef uint16_t GccU16RawVectType __attribute__((__vector_size__(16)));
+  return Vec128<T, N>{reinterpret_cast<typename detail::Raw128<T>::type>(
+      __builtin_shuffle(reinterpret_cast<GccU16RawVectType>(v.raw),
+                        reinterpret_cast<GccU16RawVectType>(idx.raw)))};
+#else
+  const Full128<T> d_full;
+  alignas(16) T src_lanes[8];
+  alignas(16) uint16_t indices[8];
+  alignas(16) T result_lanes[8];
+
+  Store(Vec128<T>{v.raw}, d_full, src_lanes);
+  _mm_store_si128(reinterpret_cast<__m128i*>(indices), idx.raw);
+
+  for (int i = 0; i < 8; i++) {
+    result_lanes[i] = src_lanes[indices[i] & 7u];
+  }
+
+  return Vec128<T, N>{Load(d_full, result_lanes).raw};
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_HAS_BUILTIN(__builtin_shuffle)
+#else
+  return TableLookupBytes(v, Vec128<T, N>{idx.raw});
+#endif
+}
+
+#if HWY_HAVE_FLOAT16
+template <size_t N, HWY_IF_V_SIZE_GT(float16_t, N, 2)>
+HWY_API Vec128<float16_t, N> TableLookupLanes(Vec128<float16_t, N> v,
+                                              Indices128<float16_t, N> idx) {
+  return {_mm_permutexvar_ph(idx.raw, v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec128<T, N> TableLookupLanes(Vec128<T, N> v, Indices128<T, N> idx) {
+#if HWY_TARGET <= HWY_AVX2
+  const DFromV<decltype(v)> d;
+  const RebindToFloat<decltype(d)> df;
+  const Vec128<float, N> perm{_mm_permutevar_ps(BitCast(df, v).raw, idx.raw)};
+  return BitCast(d, perm);
+#elif HWY_TARGET == HWY_SSE2
+#if HWY_COMPILER_GCC_ACTUAL && HWY_HAS_BUILTIN(__builtin_shuffle)
+  typedef uint32_t GccU32RawVectType __attribute__((__vector_size__(16)));
+  return Vec128<T, N>{reinterpret_cast<typename detail::Raw128<T>::type>(
+      __builtin_shuffle(reinterpret_cast<GccU32RawVectType>(v.raw),
+                        reinterpret_cast<GccU32RawVectType>(idx.raw)))};
+#else
+  const Full128<T> d_full;
+  alignas(16) T src_lanes[4];
+  alignas(16) uint32_t indices[4];
+  alignas(16) T result_lanes[4];
+
+  Store(Vec128<T>{v.raw}, d_full, src_lanes);
+  _mm_store_si128(reinterpret_cast<__m128i*>(indices), idx.raw);
+
+  for (int i = 0; i < 4; i++) {
+    result_lanes[i] = src_lanes[indices[i] & 3u];
+  }
+
+  return Vec128<T, N>{Load(d_full, result_lanes).raw};
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_HAS_BUILTIN(__builtin_shuffle)
+#else   // SSSE3 or SSE4
+  return TableLookupBytes(v, Vec128<T, N>{idx.raw});
+#endif
+}
+
+#if HWY_TARGET <= HWY_SSSE3
+template <size_t N, HWY_IF_V_SIZE_GT(float, N, 4)>
+HWY_API Vec128<float, N> TableLookupLanes(Vec128<float, N> v,
+                                          Indices128<float, N> idx) {
+#if HWY_TARGET <= HWY_AVX2
+  return Vec128<float, N>{_mm_permutevar_ps(v.raw, idx.raw)};
+#else   // SSSE3 or SSE4
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+  return BitCast(df,
+                 TableLookupBytes(BitCast(di, v), Vec128<int32_t, N>{idx.raw}));
+#endif  // HWY_TARGET <= HWY_AVX2
+}
+#endif  // HWY_TARGET <= HWY_SSSE3
+
+// Single lane: no change
+template <typename T>
+HWY_API Vec128<T, 1> TableLookupLanes(Vec128<T, 1> v,
+                                      Indices128<T, 1> /* idx */) {
+  return v;
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec128<T> TableLookupLanes(Vec128<T> v, Indices128<T> idx) {
+  const DFromV<decltype(v)> d;
+  Vec128<int64_t> vidx{idx.raw};
+#if HWY_TARGET <= HWY_AVX2
+  // There is no _mm_permute[x]var_epi64.
+  vidx += vidx;  // bit1 is the decider (unusual)
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec128<double>{_mm_permutevar_pd(BitCast(df, v).raw, vidx.raw)});
+#else
+  // Only 2 lanes: can swap+blend. Choose v if vidx == iota. To avoid a 64-bit
+  // comparison (expensive on SSSE3), just invert the upper lane and subtract 1
+  // to obtain an all-zero or all-one mask.
+  const RebindToSigned<decltype(d)> di;
+  const Vec128<int64_t> same = (vidx ^ Iota(di, 0)) - Set(di, 1);
+  const Mask128<T> mask_same = RebindMask(d, MaskFromVec(same));
+  return IfThenElse(mask_same, v, Shuffle01(v));
+#endif
+}
+
+HWY_API Vec128<double> TableLookupLanes(Vec128<double> v,
+                                        Indices128<double> idx) {
+  Vec128<int64_t> vidx{idx.raw};
+#if HWY_TARGET <= HWY_AVX2
+  vidx += vidx;  // bit1 is the decider (unusual)
+  return Vec128<double>{_mm_permutevar_pd(v.raw, vidx.raw)};
+#else
+  // Only 2 lanes: can swap+blend. Choose v if vidx == iota. To avoid a 64-bit
+  // comparison (expensive on SSSE3), just invert the upper lane and subtract 1
+  // to obtain an all-zero or all-one mask.
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const Vec128<int64_t> same = (vidx ^ Iota(di, 0)) - Set(di, 1);
+  const Mask128<double> mask_same = RebindMask(d, MaskFromVec(same));
+  return IfThenElse(mask_same, v, Shuffle01(v));
+#endif
+}
+
+// ------------------------------ ReverseBlocks
+
+// Single block: no change
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// ------------------------------ Reverse (Shuffle0123, Shuffle2301)
+
+// Single lane: no change
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> Reverse(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// 32-bit x2: shuffle
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse(D /* tag */, const VFromD<D> v) {
+  return VFromD<D>{Shuffle2301(Vec128<TFromD<D>>{v.raw}).raw};
+}
+
+// 64-bit x2: shuffle
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse(D /* tag */, const VFromD<D> v) {
+  return Shuffle01(v);
+}
+
+// 32-bit x4: shuffle
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse(D /* tag */, const VFromD<D> v) {
+  return Shuffle0123(v);
+}
+
+// 16-bit
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2),
+          HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  constexpr size_t kN = MaxLanes(d);
+  if (kN == 1) return v;
+  if (kN == 2) {
+    return BitCast(d, VU{_mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(0, 1, 0, 1))});
+  }
+  if (kN == 4) {
+    return BitCast(d, VU{_mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(0, 1, 2, 3))});
+  }
+
+#if HWY_TARGET == HWY_SSE2
+  const VU rev4{
+      _mm_shufflehi_epi16(_mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(0, 1, 2, 3)),
+                          _MM_SHUFFLE(0, 1, 2, 3))};
+  return BitCast(d, VU{_mm_shuffle_epi32(rev4.raw, _MM_SHUFFLE(1, 0, 3, 2))});
+#else
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0F0E, 0x0D0C, 0x0B0A, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 1),
+          HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  constexpr int kN = static_cast<int>(MaxLanes(d));
+  if (kN == 1) return v;
+#if HWY_TARGET <= HWY_SSSE3
+  // NOTE: Lanes with negative shuffle control mask values are set to zero.
+  alignas(16) static constexpr int8_t kReverse[16] = {
+      kN - 1, kN - 2,  kN - 3,  kN - 4,  kN - 5,  kN - 6,  kN - 7,  kN - 8,
+      kN - 9, kN - 10, kN - 11, kN - 12, kN - 13, kN - 14, kN - 15, kN - 16};
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> idx = Load(di, kReverse);
+  return VFromD<D>{_mm_shuffle_epi8(BitCast(di, v).raw, idx.raw)};
+#else
+  const RepartitionToWide<decltype(d)> d16;
+  return BitCast(d, Reverse(d16, RotateRight<8>(BitCast(d16, v))));
+#endif
+}
+
+// ------------------------------ Reverse2
+
+// Single lane: no change
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D /* tag */, VFromD<D> v) {
+  return v;
+}
+
+// Generic for all vector lengths (128-bit sufficient if SSE2).
+template <class D, HWY_IF_T_SIZE_D(D, 2), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D d, VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d, RotateRight<16>(BitCast(du32, v)));
+#elif HWY_TARGET == HWY_SSE2
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  constexpr size_t kN = MaxLanes(d);
+  __m128i shuf_result = _mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(2, 3, 0, 1));
+  if (kN > 4) {
+    shuf_result = _mm_shufflehi_epi16(shuf_result, _MM_SHUFFLE(2, 3, 0, 1));
+  }
+  return BitCast(d, VU{shuf_result});
+#else
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0302, 0x0100, 0x0706, 0x0504, 0x0B0A, 0x0908, 0x0F0E, 0x0D0C};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+#endif
+}
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_T_SIZE_D(D, 4), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D /* tag */, VFromD<D> v) {
+  return Shuffle2301(v);
+}
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_T_SIZE_D(D, 8), HWY_IF_LANES_GT_D(D, 1)>
+HWY_API VFromD<D> Reverse2(D /* tag */, VFromD<D> v) {
+  return Shuffle01(v);
+}
+
+// ------------------------------ Reverse4
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D d, VFromD<D> v) {
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  // 4x 16-bit: a single shufflelo suffices.
+  constexpr size_t kN = MaxLanes(d);
+  if (kN <= 4) {
+    return BitCast(d, VU{_mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(0, 1, 2, 3))});
+  }
+
+#if HWY_TARGET == HWY_SSE2
+  return BitCast(d, VU{_mm_shufflehi_epi16(
+                        _mm_shufflelo_epi16(vu.raw, _MM_SHUFFLE(0, 1, 2, 3)),
+                        _MM_SHUFFLE(0, 1, 2, 3))});
+#else
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0706, 0x0504, 0x0302, 0x0100, 0x0F0E, 0x0D0C, 0x0B0A, 0x0908};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+#endif
+}
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D> v) {
+  return Shuffle0123(v);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D /* tag */, VFromD<D> /* v */) {
+  HWY_ASSERT(0);  // don't have 4 u64 lanes
+}
+
+// ------------------------------ Reverse8
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+#if HWY_TARGET == HWY_SSE2
+  const RepartitionToWide<decltype(d)> dw;
+  return Reverse2(d, BitCast(d, Shuffle0123(BitCast(dw, v))));
+#else
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0F0E, 0x0D0C, 0x0B0A, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> Reverse8(D /* tag */, VFromD<D> /* v */) {
+  HWY_ASSERT(0);  // don't have 8 lanes if larger than 16-bit
+}
+
+// ------------------------------ ReverseBits
+
+#if HWY_TARGET <= HWY_AVX3_DL
+
+#ifdef HWY_NATIVE_REVERSE_BITS_UI8
+#undef HWY_NATIVE_REVERSE_BITS_UI8
+#else
+#define HWY_NATIVE_REVERSE_BITS_UI8
+#endif
+
+template <class V, HWY_IF_T_SIZE_V(V, 1), HWY_IF_V_SIZE_LE_D(DFromV<V>, 16)>
+HWY_API V ReverseBits(V v) {
+  const Full128<uint64_t> du64_full;
+  const auto affine_matrix = Set(du64_full, 0x8040201008040201u);
+  return V{_mm_gf2p8affine_epi64_epi8(v.raw, affine_matrix.raw, 0)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ------------------------------ InterleaveLower
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_unpacklo_epi8(a.raw, b.raw)};
+}
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm_unpacklo_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_unpacklo_epi32(a.raw, b.raw)};
+}
+template <typename T, size_t N, HWY_IF_UI64(T)>
+HWY_API Vec128<T, N> InterleaveLower(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_unpacklo_epi64(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> InterleaveLower(Vec128<float, N> a,
+                                         Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_unpacklo_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> InterleaveLower(Vec128<double, N> a,
+                                          Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_unpacklo_pd(a.raw, b.raw)};
+}
+
+// Generic for all vector lengths.
+template <class D>
+HWY_API VFromD<D> InterleaveLower(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return InterleaveLower(a, b);
+}
+
+// ------------------------------ InterleaveUpper (UpperHalf)
+
+// Full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm_unpackhi_epi8(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm_unpackhi_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm_unpackhi_epi32(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm_unpackhi_epi64(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm_unpackhi_ps(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm_unpackhi_pd(a.raw, b.raw)};
+}
+
+// Partial
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const Half<decltype(d)> d2;
+  return InterleaveLower(d, VFromD<D>{UpperHalf(d2, a).raw},
+                         VFromD<D>{UpperHalf(d2, b).raw});
+}
+
+// -------------------------- I8/U8 Broadcast (InterleaveLower, InterleaveUpper)
+
+template <int kLane, class T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> Broadcast(const Vec128<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  const DFromV<decltype(v)> d;
+
+#if HWY_TARGET == HWY_SSE2
+  const Full128<T> d_full;
+  const Vec128<T> v_full{v.raw};
+  const auto v_interleaved = (kLane < 8)
+                                 ? InterleaveLower(d_full, v_full, v_full)
+                                 : InterleaveUpper(d_full, v_full, v_full);
+  return ResizeBitCast(
+      d, Broadcast<kLane & 7>(BitCast(Full128<uint16_t>(), v_interleaved)));
+#else
+  return TableLookupBytes(v, Set(d, static_cast<T>(kLane)));
+#endif
+}
+
+// ------------------------------ ZipLower/ZipUpper (InterleaveLower)
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+// Generic for all vector lengths.
+template <class V, class DW = RepartitionToWide<DFromV<V>>>
+HWY_API VFromD<DW> ZipLower(V a, V b) {
+  return BitCast(DW(), InterleaveLower(a, b));
+}
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipLower(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveLower(D(), a, b));
+}
+
+template <class V, class D = DFromV<V>, class DW = RepartitionToWide<D>>
+HWY_API VFromD<DW> ZipUpper(DW dw, V a, V b) {
+  return BitCast(dw, InterleaveUpper(D(), a, b));
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+namespace detail {
+
+#ifdef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#undef HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#else
+#define HWY_NATIVE_PER4LANEBLKSHUF_DUP32
+#endif
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                                const uint32_t x2,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  return ResizeBitCast(
+      d, Vec128<uint32_t>{_mm_set_epi32(
+             static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+             static_cast<int32_t>(x1), static_cast<int32_t>(x0))});
+}
+
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<2> /*lane_size_tag*/,
+                                  hwy::SizeTag<8> /*vect_size_tag*/, V v) {
+  return V{_mm_shufflelo_epi16(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+#if HWY_TARGET == HWY_SSE2
+template <size_t kIdx3210, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<2> /*lane_size_tag*/,
+                                  hwy::SizeTag<16> /*vect_size_tag*/, V v) {
+  constexpr int kShuffle = static_cast<int>(kIdx3210 & 0xFF);
+  return V{_mm_shufflehi_epi16(_mm_shufflelo_epi16(v.raw, kShuffle), kShuffle)};
+}
+
+template <size_t kIdx3210, size_t kVectSize, class V,
+          hwy::EnableIf<(kVectSize == 4 || kVectSize == 8)>* = nullptr>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> idx_3210_tag,
+                                  hwy::SizeTag<1> /*lane_size_tag*/,
+                                  hwy::SizeTag<kVectSize> /*vect_size_tag*/,
+                                  V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Rebind<uint16_t, decltype(d)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+
+  const auto vu16 = PromoteTo(du16, BitCast(du, v));
+  const auto shuf16_result = Per4LaneBlockShuffle(
+      idx_3210_tag, hwy::SizeTag<2>(), hwy::SizeTag<kVectSize * 2>(), vu16);
+  return BitCast(d, DemoteTo(du, BitCast(di16, shuf16_result)));
+}
+
+template <size_t kIdx3210, size_t kVectSize, class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> idx_3210_tag,
+                                  hwy::SizeTag<1> /*lane_size_tag*/,
+                                  hwy::SizeTag<16> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint16_t, decltype(d)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+
+  const auto zero = Zero(d);
+  const auto v_lo16 = BitCast(du16, InterleaveLower(d, v, zero));
+  const auto v_hi16 = BitCast(du16, InterleaveUpper(d, v, zero));
+
+  const auto lo_shuf_result = Per4LaneBlockShuffle(
+      idx_3210_tag, hwy::SizeTag<2>(), hwy::SizeTag<16>(), v_lo16);
+  const auto hi_shuf_result = Per4LaneBlockShuffle(
+      idx_3210_tag, hwy::SizeTag<2>(), hwy::SizeTag<16>(), v_hi16);
+
+  return BitCast(d, OrderedDemote2To(du, BitCast(di16, lo_shuf_result),
+                                     BitCast(di16, hi_shuf_result)));
+}
+#endif
+
+template <size_t kIdx3210, class V, HWY_IF_NOT_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<16> /*vect_size_tag*/, V v) {
+  return V{_mm_shuffle_epi32(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<16> /*vect_size_tag*/, V v) {
+  return V{_mm_shuffle_ps(v.raw, v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Full64<uint64_t> du64;
+  const auto vu64 = ResizeBitCast(du64, v);
+  return ResizeBitCast(
+      d, ShiftLeftSame(vu64, static_cast<int>(amt * sizeof(TFromV<V>) * 8)));
+}
+
+#if HWY_TARGET <= HWY_SSSE3
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  const auto idx =
+      Iota(du8, static_cast<uint8_t>(size_t{0} - amt * sizeof(TFromV<V>)));
+  return BitCast(d, TableLookupBytesOr0(BitCast(du8, v), idx));
+}
+#else
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideUpLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<int32_t, decltype(d)> di32;
+  const Repartition<uint64_t, decltype(d)> du64;
+  constexpr size_t kNumOfLanesPerU64 = 8 / sizeof(TFromV<V>);
+
+  const auto vu64 = BitCast(du64, v);
+  const auto v_hi = IfVecThenElse(
+      BitCast(du64, Set(di32, -static_cast<int32_t>(amt >= kNumOfLanesPerU64))),
+      BitCast(du64, ShiftLeftBytes<8>(du64, vu64)), vu64);
+  const auto v_lo = ShiftLeftBytes<8>(du64, v_hi);
+
+  const int shl_amt = static_cast<int>((amt * sizeof(TFromV<V>) * 8) & 63);
+  return BitCast(
+      d, Or(ShiftLeftSame(v_hi, shl_amt), ShiftRightSame(v_lo, 64 - shl_amt)));
+}
+#endif
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideUpLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftLeftLanes<1>(d, v);
+      case 2:
+        return ShiftLeftLanes<2>(d, v);
+      case 3:
+        return ShiftLeftLanes<3>(d, v);
+      case 4:
+        return ShiftLeftLanes<4>(d, v);
+      case 5:
+        return ShiftLeftLanes<5>(d, v);
+      case 6:
+        return ShiftLeftLanes<6>(d, v);
+      case 7:
+        return ShiftLeftLanes<7>(d, v);
+      case 8:
+        return ShiftLeftLanes<8>(d, v);
+      case 9:
+        return ShiftLeftLanes<9>(d, v);
+      case 10:
+        return ShiftLeftLanes<10>(d, v);
+      case 11:
+        return ShiftLeftLanes<11>(d, v);
+      case 12:
+        return ShiftLeftLanes<12>(d, v);
+      case 13:
+        return ShiftLeftLanes<13>(d, v);
+      case 14:
+        return ShiftLeftLanes<14>(d, v);
+      case 15:
+        return ShiftLeftLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideUpLanes(v, amt);
+}
+
+// ------------------------------ SlideDownLanes
+
+namespace detail {
+
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<UnsignedFromSize<d.MaxBytes()>, decltype(d)> dv;
+  return BitCast(d,
+                 ShiftRightSame(BitCast(dv, v),
+                                static_cast<int>(amt * sizeof(TFromV<V>) * 8)));
+}
+
+#if HWY_TARGET <= HWY_SSSE3
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<int8_t, decltype(d)> di8;
+  auto idx = Iota(di8, static_cast<int8_t>(amt * sizeof(TFromV<V>)));
+  idx = Or(idx, VecFromMask(di8, idx > Set(di8, int8_t{15})));
+  return BitCast(d, TableLookupBytesOr0(BitCast(di8, v), idx));
+}
+#else
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V SlideDownLanes(V v, size_t amt) {
+  const DFromV<decltype(v)> d;
+  const Repartition<int32_t, decltype(d)> di32;
+  const Repartition<uint64_t, decltype(d)> du64;
+  constexpr size_t kNumOfLanesPerU64 = 8 / sizeof(TFromV<V>);
+
+  const auto vu64 = BitCast(du64, v);
+  const auto v_lo = IfVecThenElse(
+      BitCast(du64, Set(di32, -static_cast<int32_t>(amt >= kNumOfLanesPerU64))),
+      BitCast(du64, ShiftRightBytes<8>(du64, vu64)), vu64);
+  const auto v_hi = ShiftRightBytes<8>(du64, v_lo);
+
+  const int shr_amt = static_cast<int>((amt * sizeof(TFromV<V>) * 8) & 63);
+  return BitCast(
+      d, Or(ShiftRightSame(v_lo, shr_amt), ShiftLeftSame(v_hi, 64 - shr_amt)));
+}
+#endif
+
+}  // namespace detail
+
+template <class D, HWY_IF_LANES_D(D, 1)>
+HWY_API VFromD<D> SlideDownLanes(D /*d*/, VFromD<D> v, size_t /*amt*/) {
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 4)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_LANES_D(D, 8)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_LANES_D(D, 16)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return ShiftRightLanes<1>(d, v);
+      case 2:
+        return ShiftRightLanes<2>(d, v);
+      case 3:
+        return ShiftRightLanes<3>(d, v);
+      case 4:
+        return ShiftRightLanes<4>(d, v);
+      case 5:
+        return ShiftRightLanes<5>(d, v);
+      case 6:
+        return ShiftRightLanes<6>(d, v);
+      case 7:
+        return ShiftRightLanes<7>(d, v);
+      case 8:
+        return ShiftRightLanes<8>(d, v);
+      case 9:
+        return ShiftRightLanes<9>(d, v);
+      case 10:
+        return ShiftRightLanes<10>(d, v);
+      case 11:
+        return ShiftRightLanes<11>(d, v);
+      case 12:
+        return ShiftRightLanes<12>(d, v);
+      case 13:
+        return ShiftRightLanes<13>(d, v);
+      case 14:
+        return ShiftRightLanes<14>(d, v);
+      case 15:
+        return ShiftRightLanes<15>(d, v);
+    }
+  }
+#else
+  (void)d;
+#endif
+
+  return detail::SlideDownLanes(v, amt);
+}
+
+// ================================================== MEMORY (4)
+
+// ------------------------------ StoreN (ExtractLane)
+
+#if HWY_TARGET <= HWY_AVX2
+
+#ifdef HWY_NATIVE_STORE_N
+#undef HWY_NATIVE_STORE_N
+#else
+#define HWY_NATIVE_STORE_N
+#endif
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(
+                       D, (HWY_TARGET <= HWY_AVX3 ? ((1 << 1) | (1 << 2)) : 0) |
+                              (1 << 4) | (1 << 8))>
+HWY_API void StoreN(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const size_t num_of_lanes_to_store =
+      HWY_MIN(max_lanes_to_store, HWY_MAX_LANES_D(D));
+
+#if HWY_COMPILER_MSVC
+  // Work around MSVC compiler bug by using a HWY_FENCE before the BlendedStore
+  HWY_FENCE;
+#endif
+
+  BlendedStore(v, FirstN(d, num_of_lanes_to_store), d, p);
+
+#if HWY_COMPILER_MSVC
+  // Work around MSVC compiler bug by using a HWY_FENCE after the BlendedStore
+  HWY_FENCE;
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX3
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_D(D, 1)>
+HWY_API void StoreN(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  if (max_lanes_to_store > 0) {
+    StoreU(v, d, p);
+  }
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_D(D, 2)>
+HWY_API void StoreN(VFromD<D> v, D /*d*/, TFromD<D>* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  if (max_lanes_to_store >= 1) {
+    p[static_cast<size_t>(max_lanes_to_store > 1)] = detail::ExtractLane<1>(v);
+    p[0] = GetLane(v);
+  }
+}
+
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void AVX2UIF8Or16StoreTrailingN(VFromD<D> v_trailing, D /*d*/,
+                                        TFromD<D>* HWY_RESTRICT p,
+                                        size_t num_of_lanes_to_store) {
+  // AVX2UIF8Or16StoreTrailingN should only be called for an I8/U8 vector if
+  // (num_of_lanes_to_store & 3) != 0 is true
+  const auto v_full128 = ResizeBitCast(Full128<TFromD<D>>(), v_trailing);
+  if ((num_of_lanes_to_store & 2) != 0) {
+    const uint16_t u16_bits = GetLane(BitCast(Full128<uint16_t>(), v_full128));
+    p[num_of_lanes_to_store - 1] = detail::ExtractLane<2>(v_full128);
+    CopyBytes<sizeof(uint16_t)>(&u16_bits,
+                                p + (num_of_lanes_to_store & ~size_t{3}));
+  } else {
+    p[num_of_lanes_to_store - 1] = GetLane(v_full128);
+  }
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_API void AVX2UIF8Or16StoreTrailingN(VFromD<D> v_trailing, D /*d*/,
+                                        TFromD<D>* HWY_RESTRICT p,
+                                        size_t num_of_lanes_to_store) {
+  // AVX2UIF8Or16StoreTrailingN should only be called for an I16/U16/F16/BF16
+  // vector if (num_of_lanes_to_store & 1) == 1 is true
+  p[num_of_lanes_to_store - 1] = GetLane(v_trailing);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_GT_D(D, 2)>
+HWY_API void StoreN(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT p,
+                    size_t max_lanes_to_store) {
+  const size_t num_of_lanes_to_store =
+      HWY_MIN(max_lanes_to_store, HWY_MAX_LANES_D(D));
+
+  const FixedTag<TFromD<D>, HWY_MAX(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>))>
+      d_full;
+  const RebindToUnsigned<decltype(d_full)> du_full;
+  const Repartition<int32_t, decltype(d_full)> di32_full;
+
+  const auto i32_store_mask = BitCast(
+      di32_full, VecFromMask(du_full, FirstN(du_full, num_of_lanes_to_store)));
+  const auto vi32 = ResizeBitCast(di32_full, v);
+
+#if HWY_COMPILER_MSVC
+  // Work around MSVC compiler bug by using a HWY_FENCE before the BlendedStore
+  HWY_FENCE;
+#endif
+
+  BlendedStore(vi32, MaskFromVec(i32_store_mask), di32_full,
+               reinterpret_cast<int32_t*>(p));
+
+  constexpr size_t kNumOfLanesPerI32 = 4 / sizeof(TFromD<D>);
+  constexpr size_t kTrailingLenMask = kNumOfLanesPerI32 - 1;
+  const size_t trailing_n = (num_of_lanes_to_store & kTrailingLenMask);
+
+  if (trailing_n != 0) {
+    const auto v_trailing = ResizeBitCast(
+        d, SlideDownLanes(di32_full, vi32,
+                          num_of_lanes_to_store / kNumOfLanesPerI32));
+    detail::AVX2UIF8Or16StoreTrailingN(v_trailing, d, p, num_of_lanes_to_store);
+  }
+
+#if HWY_COMPILER_MSVC
+  // Work around MSVC compiler bug by using a HWY_FENCE after the BlendedStore
+  HWY_FENCE;
+#endif
+}
+#endif  // HWY_TARGET > HWY_AVX3
+#endif  // HWY_TARGET <= HWY_AVX2
+
+// ================================================== COMBINE
+
+// ------------------------------ Combine (InterleaveLower)
+
+// N = N/2 + N/2 (upper half undefined)
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), class VH = VFromD<Half<D>>>
+HWY_API VFromD<D> Combine(D d, VH hi_half, VH lo_half) {
+  const Half<decltype(d)> dh;
+  const RebindToUnsigned<decltype(dh)> duh;
+  // Treat half-width input as one lane, and expand to two lanes.
+  using VU = Vec128<UnsignedFromSize<dh.MaxBytes()>, 2>;
+  const VU lo{BitCast(duh, lo_half).raw};
+  const VU hi{BitCast(duh, hi_half).raw};
+  return BitCast(d, InterleaveLower(lo, hi));
+}
+
+// ------------------------------ ZeroExtendVector (Combine, IfThenElseZero)
+
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  const Half<decltype(du)> duh;
+  return BitCast(d, VFromD<decltype(du)>{_mm_move_epi64(BitCast(duh, lo).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+  const Half<D> dh;
+  return IfThenElseZero(FirstN(d, MaxLanes(dh)), VFromD<D>{lo.raw});
+}
+
+// ------------------------------ Concat full (InterleaveLower)
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint64_t, decltype(d)> d64;
+  return BitCast(d, InterleaveLower(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint64_t, decltype(d)> d64;
+  return BitCast(d, InterleaveUpper(d64, BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <class D, HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  return CombineShiftRightBytes<8>(d, hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<double, decltype(d)> dd;
+#if HWY_TARGET >= HWY_SSSE3
+  return BitCast(
+      d, Vec128<double>{_mm_shuffle_pd(BitCast(dd, lo).raw, BitCast(dd, hi).raw,
+                                       _MM_SHUFFLE2(1, 0))});
+#else
+  // _mm_blend_epi16 has throughput 1/cycle on SKX, whereas _pd can do 3/cycle.
+  return BitCast(d, Vec128<double>{_mm_blend_pd(BitCast(dd, hi).raw,
+                                                BitCast(dd, lo).raw, 1)});
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float> ConcatUpperLower(D d, Vec128<float> hi,
+                                       Vec128<float> lo) {
+#if HWY_TARGET >= HWY_SSSE3
+  (void)d;
+  return Vec128<float>{_mm_shuffle_ps(lo.raw, hi.raw, _MM_SHUFFLE(3, 2, 1, 0))};
+#else
+  // _mm_shuffle_ps has throughput 1/cycle on SKX, whereas blend can do 3/cycle.
+  const RepartitionToWide<decltype(d)> dd;
+  return BitCast(d, Vec128<double>{_mm_blend_pd(BitCast(dd, hi).raw,
+                                                BitCast(dd, lo).raw, 1)});
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double> ConcatUpperLower(D /* tag */, Vec128<double> hi,
+                                        Vec128<double> lo) {
+#if HWY_TARGET >= HWY_SSSE3
+  return Vec128<double>{_mm_shuffle_pd(lo.raw, hi.raw, _MM_SHUFFLE2(1, 0))};
+#else
+  // _mm_shuffle_pd has throughput 1/cycle on SKX, whereas blend can do 3/cycle.
+  return Vec128<double>{_mm_blend_pd(hi.raw, lo.raw, 1)};
+#endif
+}
+
+// ------------------------------ Concat partial (Combine, LowerHalf)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperUpper(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatLowerUpper(D d, const VFromD<D> hi,
+                                   const VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, LowerHalf(d2, hi), UpperHalf(d2, lo));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ConcatUpperLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return Combine(d, UpperHalf(d2, hi), LowerHalf(d2, lo));
+}
+
+// ------------------------------ ConcatOdd
+
+// 8-bit full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint16_t, decltype(d)> dw;
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec128<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec128<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+  return VFromD<D>{_mm_packus_epi16(uL.raw, uH.raw)};
+}
+
+// 8-bit x8
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  const Repartition<uint16_t, decltype(d)> dw;
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec64<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec64<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packus_epi16(uL.raw, uH.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+#else
+  const Repartition<uint32_t, decltype(d)> du32;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactOddU8[8] = {1, 3, 5, 7};
+  const VFromD<D> shuf = BitCast(d, Load(Full64<uint8_t>(), kCompactOddU8));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du32, BitCast(du32, L), BitCast(du32, H)));
+#endif
+}
+
+// 8-bit x4
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  const Repartition<uint16_t, decltype(d)> dw;
+  const Twice<decltype(dw)> dw_2;
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec32<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec32<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+  const Vec64<uint16_t> uHL = Combine(dw_2, uH, uL);
+  return VFromD<D>{_mm_packus_epi16(uHL.raw, uHL.raw)};
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactOddU8[4] = {1, 3};
+  const VFromD<D> shuf = BitCast(d, Load(Full32<uint8_t>(), kCompactOddU8));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du16, BitCast(du16, L), BitCast(du16, H)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  // Right-shift 16 bits per i32 - a *signed* shift of 0x8000xxxx returns
+  // 0xFFFF8000, which correctly saturates to 0x8000.
+  const Repartition<int32_t, decltype(d)> dw;
+  const Vec128<int32_t> uH = ShiftRight<16>(BitCast(dw, hi));
+  const Vec128<int32_t> uL = ShiftRight<16>(BitCast(dw, lo));
+  return VFromD<D>{_mm_packs_epi32(uL.raw, uH.raw)};
+}
+
+// 16-bit x4
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  // Right-shift 16 bits per i32 - a *signed* shift of 0x8000xxxx returns
+  // 0xFFFF8000, which correctly saturates to 0x8000.
+  const Repartition<int32_t, decltype(d)> dw;
+  const Vec64<int32_t> uH = ShiftRight<16>(BitCast(dw, hi));
+  const Vec64<int32_t> uL = ShiftRight<16>(BitCast(dw, lo));
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packs_epi32(uL.raw, uH.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+#else
+  const Repartition<uint32_t, decltype(d)> du32;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactOddU16[8] = {2, 3, 6, 7};
+  const VFromD<D> shuf = BitCast(d, Load(Full64<uint8_t>(), kCompactOddU16));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du32, BitCast(du32, L), BitCast(du32, H)));
+#endif
+}
+
+// 32-bit full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec128<float>{_mm_shuffle_ps(BitCast(df, lo).raw, BitCast(df, hi).raw,
+                                      _MM_SHUFFLE(3, 1, 3, 1))});
+}
+
+// Any type x2
+template <class D, HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  return InterleaveUpper(d, lo, hi);
+}
+
+// ------------------------------ ConcatEven (InterleaveLower)
+
+// 8-bit full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint16_t, decltype(d)> dw;
+  // Isolate lower 8 bits per u16 so we can pack.
+  const Vec128<uint16_t> mask = Set(dw, 0x00FF);
+  const Vec128<uint16_t> uH = And(BitCast(dw, hi), mask);
+  const Vec128<uint16_t> uL = And(BitCast(dw, lo), mask);
+  return VFromD<D>{_mm_packus_epi16(uL.raw, uH.raw)};
+}
+
+// 8-bit x8
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  const Repartition<uint16_t, decltype(d)> dw;
+  // Isolate lower 8 bits per u16 so we can pack.
+  const Vec64<uint16_t> mask = Set(dw, 0x00FF);
+  const Vec64<uint16_t> uH = And(BitCast(dw, hi), mask);
+  const Vec64<uint16_t> uL = And(BitCast(dw, lo), mask);
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packus_epi16(uL.raw, uH.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+#else
+  const Repartition<uint32_t, decltype(d)> du32;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactEvenU8[8] = {0, 2, 4, 6};
+  const VFromD<D> shuf = BitCast(d, Load(Full64<uint8_t>(), kCompactEvenU8));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du32, BitCast(du32, L), BitCast(du32, H)));
+#endif
+}
+
+// 8-bit x4
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  const Repartition<uint16_t, decltype(d)> dw;
+  const Twice<decltype(dw)> dw_2;
+  // Isolate lower 8 bits per u16 so we can pack.
+  const Vec32<uint16_t> mask = Set(dw, 0x00FF);
+  const Vec32<uint16_t> uH = And(BitCast(dw, hi), mask);
+  const Vec32<uint16_t> uL = And(BitCast(dw, lo), mask);
+  const Vec64<uint16_t> uHL = Combine(dw_2, uH, uL);
+  return VFromD<D>{_mm_packus_epi16(uHL.raw, uHL.raw)};
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactEvenU8[4] = {0, 2};
+  const VFromD<D> shuf = BitCast(d, Load(Full32<uint8_t>(), kCompactEvenU8));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du16, BitCast(du16, L), BitCast(du16, H)));
+#endif
+}
+
+// 16-bit full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET <= HWY_SSE4
+  // Isolate lower 16 bits per u32 so we can pack.
+  const Repartition<uint32_t, decltype(d)> dw;
+  const Vec128<uint32_t> mask = Set(dw, 0x0000FFFF);
+  const Vec128<uint32_t> uH = And(BitCast(dw, hi), mask);
+  const Vec128<uint32_t> uL = And(BitCast(dw, lo), mask);
+  return VFromD<D>{_mm_packus_epi32(uL.raw, uH.raw)};
+#elif HWY_TARGET == HWY_SSE2
+  const Repartition<uint32_t, decltype(d)> dw;
+  return ConcatOdd(d, BitCast(d, ShiftLeft<16>(BitCast(dw, hi))),
+                   BitCast(d, ShiftLeft<16>(BitCast(dw, lo))));
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  // packs_epi32 saturates 0x8000 to 0x7FFF. Instead ConcatEven within the two
+  // inputs, then concatenate them.
+  alignas(16)
+      const uint16_t kCompactEvenU16[8] = {0x0100, 0x0504, 0x0908, 0x0D0C};
+  const VFromD<D> shuf = BitCast(d, Load(du, kCompactEvenU16));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return ConcatLowerLower(d, H, L);
+#endif
+}
+
+// 16-bit x4
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+#if HWY_TARGET == HWY_SSE2
+  const Repartition<uint32_t, decltype(d)> dw;
+  return ConcatOdd(d, BitCast(d, ShiftLeft<16>(BitCast(dw, hi))),
+                   BitCast(d, ShiftLeft<16>(BitCast(dw, lo))));
+#else
+  const Repartition<uint32_t, decltype(d)> du32;
+  // Don't care about upper half, no need to zero.
+  alignas(16) const uint8_t kCompactEvenU16[8] = {0, 1, 4, 5};
+  const VFromD<D> shuf = BitCast(d, Load(Full64<uint8_t>(), kCompactEvenU16));
+  const VFromD<D> L = TableLookupBytes(lo, shuf);
+  const VFromD<D> H = TableLookupBytes(hi, shuf);
+  return BitCast(d, InterleaveLower(du32, BitCast(du32, L), BitCast(du32, H)));
+#endif
+}
+
+// 32-bit full
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec128<float>{_mm_shuffle_ps(BitCast(df, lo).raw, BitCast(df, hi).raw,
+                                      _MM_SHUFFLE(2, 0, 2, 0))});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatEven(D /* d */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm_shuffle_ps(lo.raw, hi.raw, _MM_SHUFFLE(2, 0, 2, 0))};
+}
+
+// Any T x2
+template <class D, HWY_IF_LANES_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  return InterleaveLower(d, lo, hi);
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T>
+HWY_API Vec128<T, 1> DupEven(const Vec128<T, 1> v) {
+  return v;
+}
+
+template <typename T>
+HWY_API Vec128<T, 2> DupEven(const Vec128<T, 2> v) {
+  return InterleaveLower(DFromV<decltype(v)>(), v, v);
+}
+
+template <typename V, HWY_IF_T_SIZE_V(V, 1), HWY_IF_V_SIZE_GT_V(V, 2)>
+HWY_API V DupEven(V v) {
+  const DFromV<decltype(v)> d;
+
+#if HWY_TARGET <= HWY_SSSE3
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint8_t kShuffle[16] = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du, kShuffle)));
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  return IfVecThenElse(BitCast(d, Set(du16, uint16_t{0xFF00})),
+                       BitCast(d, ShiftLeft<8>(BitCast(du16, v))), v);
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec64<T> DupEven(const Vec64<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{
+                        _mm_shufflelo_epi16(v.raw, _MM_SHUFFLE(2, 2, 0, 0))});
+}
+
+// Generic for all vector lengths.
+template <class V, HWY_IF_T_SIZE_V(V, 2)>
+HWY_API V DupEven(const V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+#if HWY_TARGET <= HWY_SSSE3
+  alignas(16) static constexpr uint16_t kShuffle[8] = {
+      0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0d0c, 0x0d0c};
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du, kShuffle)));
+#else
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm_shufflehi_epi16(
+             _mm_shufflelo_epi16(BitCast(du, v).raw, _MM_SHUFFLE(2, 2, 0, 0)),
+             _MM_SHUFFLE(2, 2, 0, 0))});
+#endif
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec128<T> DupEven(Vec128<T> v) {
+  return Vec128<T>{_mm_shuffle_epi32(v.raw, _MM_SHUFFLE(2, 2, 0, 0))};
+}
+
+HWY_API Vec128<float> DupEven(Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, _MM_SHUFFLE(2, 2, 0, 0))};
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T, 1> DupOdd(Vec128<T, 1> v) {
+  return v;
+}
+
+template <typename V, HWY_IF_T_SIZE_V(V, 1), HWY_IF_V_SIZE_GT_V(V, 1)>
+HWY_API V DupOdd(V v) {
+  const DFromV<decltype(v)> d;
+
+#if HWY_TARGET <= HWY_SSSE3
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint8_t kShuffle[16] = {
+      1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du, kShuffle)));
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  return IfVecThenElse(BitCast(d, Set(du16, uint16_t{0x00FF})),
+                       BitCast(d, ShiftRight<8>(BitCast(du16, v))), v);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2), HWY_IF_LANES_LE(N, 4)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{_mm_shufflelo_epi16(
+                        BitCast(du, v).raw, _MM_SHUFFLE(3, 3, 1, 1))});
+}
+
+// Generic for all vector lengths.
+template <typename V, HWY_IF_T_SIZE_V(V, 2), HWY_IF_V_SIZE_GT_V(V, 8)>
+HWY_API V DupOdd(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+#if HWY_TARGET <= HWY_SSSE3
+  alignas(16) static constexpr uint16_t kShuffle[8] = {
+      0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e};
+  return TableLookupBytes(v, BitCast(d, LoadDup128(du, kShuffle)));
+#else
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm_shufflehi_epi16(
+             _mm_shufflelo_epi16(BitCast(du, v).raw, _MM_SHUFFLE(3, 3, 1, 1)),
+             _MM_SHUFFLE(3, 3, 1, 1))});
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_API Vec128<T, N> DupOdd(Vec128<T, N> v) {
+  return Vec128<T, N>{_mm_shuffle_epi32(v.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+}
+template <size_t N>
+HWY_API Vec128<float, N> DupOdd(Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_shuffle_ps(v.raw, v.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T, N> DupOdd(const Vec128<T, N> v) {
+  return InterleaveUpper(DFromV<decltype(v)>(), v, v);
+}
+
+// ------------------------------ TwoTablesLookupLanes (DupEven)
+
+template <typename T, size_t N, HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Vec128<T, N> TwoTablesLookupLanes(Vec128<T, N> a, Vec128<T, N> b,
+                                          Indices128<T, N> idx) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+// TableLookupLanes currently requires table and index vectors to be the same
+// size, though a half-length index vector would be sufficient here.
+#if HWY_IS_MSAN
+  const Vec128<T, N> idx_vec{idx.raw};
+  const Indices128<T, N * 2> idx2{Combine(dt, idx_vec, idx_vec).raw};
+#else
+  // We only keep LowerHalf of the result, which is valid in idx.
+  const Indices128<T, N * 2> idx2{idx.raw};
+#endif
+  return LowerHalf(d, TableLookupLanes(Combine(dt, b, a), idx2));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec128<T>{_mm_permutex2var_epi8(a.raw, idx.raw, b.raw)};
+#else  // AVX3 or below
+  const DFromV<decltype(a)> d;
+  const Vec128<T> idx_vec{idx.raw};
+
+#if HWY_TARGET <= HWY_SSE4
+  const Repartition<uint16_t, decltype(d)> du16;
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<3>(BitCast(du16, idx_vec))));
+#else
+  const RebindToSigned<decltype(d)> di;
+  const auto sel_hi_mask =
+      RebindMask(d, BitCast(di, idx_vec) > Set(di, int8_t{15}));
+#endif
+
+  const auto lo_lookup_result = TableLookupBytes(a, idx_vec);
+#if HWY_TARGET <= HWY_AVX3
+  const Vec128<T> lookup_result{_mm_mask_shuffle_epi8(
+      lo_lookup_result.raw, sel_hi_mask.raw, b.raw, idx_vec.raw)};
+  return lookup_result;
+#else
+  const auto hi_lookup_result = TableLookupBytes(b, idx_vec);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#endif  // HWY_TARGET <= HWY_AVX3
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<T>{_mm_permutex2var_epi16(a.raw, idx.raw, b.raw)};
+#elif HWY_TARGET == HWY_SSE2
+  const DFromV<decltype(a)> d;
+  const RebindToSigned<decltype(d)> di;
+  const Vec128<T> idx_vec{idx.raw};
+  const auto sel_hi_mask =
+      RebindMask(d, BitCast(di, idx_vec) > Set(di, int16_t{7}));
+  const auto lo_lookup_result = TableLookupLanes(a, idx);
+  const auto hi_lookup_result = TableLookupLanes(b, idx);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#else
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du8, a), BitCast(du8, b),
+                                         Indices128<uint8_t>{idx.raw}));
+#endif
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<T>{_mm_permutex2var_epi32(a.raw, idx.raw, b.raw)};
+#else  // AVX2 or below
+  const DFromV<decltype(a)> d;
+
+#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SSE2
+  const Vec128<T> idx_vec{idx.raw};
+
+#if HWY_TARGET <= HWY_AVX2
+  const RebindToFloat<decltype(d)> d_sel;
+  const auto sel_hi_mask = MaskFromVec(BitCast(d_sel, ShiftLeft<29>(idx_vec)));
+#else
+  const RebindToSigned<decltype(d)> d_sel;
+  const auto sel_hi_mask = BitCast(d_sel, idx_vec) > Set(d_sel, int32_t{3});
+#endif
+
+  const auto lo_lookup_result = BitCast(d_sel, TableLookupLanes(a, idx));
+  const auto hi_lookup_result = BitCast(d_sel, TableLookupLanes(b, idx));
+  return BitCast(d,
+                 IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result));
+#else   // SSSE3 or SSE4
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du8, a), BitCast(du8, b),
+                                         Indices128<uint8_t>{idx.raw}));
+#endif  // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SSE2
+#endif  // HWY_TARGET <= HWY_AVX3
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec128<float16_t> TwoTablesLookupLanes(Vec128<float16_t> a,
+                                               Vec128<float16_t> b,
+                                               Indices128<float16_t> idx) {
+  return Vec128<float16_t>{_mm_permutex2var_ph(a.raw, idx.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec128<float> TwoTablesLookupLanes(Vec128<float> a, Vec128<float> b,
+                                           Indices128<float> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<float>{_mm_permutex2var_ps(a.raw, idx.raw, b.raw)};
+#elif HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SSE2
+  const DFromV<decltype(a)> d;
+
+#if HWY_TARGET <= HWY_AVX2
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<29>(Vec128<int32_t>{idx.raw})));
+#else
+  const RebindToSigned<decltype(d)> di;
+  const auto sel_hi_mask =
+      RebindMask(d, Vec128<int32_t>{idx.raw} > Set(di, int32_t{3}));
+#endif
+
+  const auto lo_lookup_result = TableLookupLanes(a, idx);
+  const auto hi_lookup_result = TableLookupLanes(b, idx);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#else  // SSSE3 or SSE4
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du8, a), BitCast(du8, b),
+                                         Indices128<uint8_t>{idx.raw}));
+#endif
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec128<T> TwoTablesLookupLanes(Vec128<T> a, Vec128<T> b,
+                                       Indices128<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<T>{_mm_permutex2var_epi64(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const Vec128<T> idx_vec{idx.raw};
+  const Indices128<T> idx_mod{And(idx_vec, Set(d, T{1})).raw};
+
+#if HWY_TARGET <= HWY_SSE4
+  const RebindToFloat<decltype(d)> d_sel;
+  const auto sel_hi_mask = MaskFromVec(BitCast(d_sel, ShiftLeft<62>(idx_vec)));
+#else   // SSE2 or SSSE3
+  const Repartition<int32_t, decltype(d)> di32;
+  const RebindToSigned<decltype(d)> d_sel;
+  const auto sel_hi_mask = MaskFromVec(
+      BitCast(d_sel, VecFromMask(di32, DupEven(BitCast(di32, idx_vec)) >
+                                           Set(di32, int32_t{1}))));
+#endif  // HWY_TARGET <= HWY_SSE4
+
+  const auto lo_lookup_result = BitCast(d_sel, TableLookupLanes(a, idx_mod));
+  const auto hi_lookup_result = BitCast(d_sel, TableLookupLanes(b, idx_mod));
+  return BitCast(d,
+                 IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result));
+#endif  // HWY_TARGET <= HWY_AVX3
+}
+
+HWY_API Vec128<double> TwoTablesLookupLanes(Vec128<double> a, Vec128<double> b,
+                                            Indices128<double> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<double>{_mm_permutex2var_pd(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const RebindToSigned<decltype(d)> di;
+  const Vec128<int64_t> idx_vec{idx.raw};
+  const Indices128<double> idx_mod{And(idx_vec, Set(di, int64_t{1})).raw};
+
+#if HWY_TARGET <= HWY_SSE4
+  const auto sel_hi_mask = MaskFromVec(BitCast(d, ShiftLeft<62>(idx_vec)));
+#else   // SSE2 or SSSE3
+  const Repartition<int32_t, decltype(d)> di32;
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, VecFromMask(di32, DupEven(BitCast(di32, idx_vec)) >
+                                                   Set(di32, int32_t{1}))));
+#endif  // HWY_TARGET <= HWY_SSE4
+
+  const auto lo_lookup_result = TableLookupLanes(a, idx_mod);
+  const auto hi_lookup_result = TableLookupLanes(b, idx_mod);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#endif  // HWY_TARGET <= HWY_AVX3
+}
+
+// ------------------------------ OddEven (IfThenElse)
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t mask[16] = {
+      0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) static constexpr uint8_t mask[16] = {
+      0xFF, 0xFF, 0, 0, 0xFF, 0xFF, 0, 0, 0xFF, 0xFF, 0, 0, 0xFF, 0xFF, 0, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a);
+#else
+  return Vec128<T, N>{_mm_blend_epi16(a.raw, b.raw, 0x55)};
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_UI32(T)>
+HWY_INLINE Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const __m128i odd = _mm_shuffle_epi32(a.raw, _MM_SHUFFLE(3, 1, 3, 1));
+  const __m128i even = _mm_shuffle_epi32(b.raw, _MM_SHUFFLE(2, 0, 2, 0));
+  return Vec128<T, N>{_mm_unpacklo_epi32(even, odd)};
+#else
+  // _mm_blend_epi16 has throughput 1/cycle on SKX, whereas _ps can do 3/cycle.
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(d, Vec128<float, N>{_mm_blend_ps(BitCast(df, a).raw,
+                                                  BitCast(df, b).raw, 5)});
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+  // Same as ConcatUpperLower for full vectors; do not call that because this
+  // is more efficient for 64x1 vectors.
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> dd;
+#if HWY_TARGET >= HWY_SSSE3
+  return BitCast(
+      d, Vec128<double, N>{_mm_shuffle_pd(
+             BitCast(dd, b).raw, BitCast(dd, a).raw, _MM_SHUFFLE2(1, 0))});
+#else
+  // _mm_shuffle_pd has throughput 1/cycle on SKX, whereas blend can do 3/cycle.
+  return BitCast(d, Vec128<double, N>{_mm_blend_pd(BitCast(dd, a).raw,
+                                                   BitCast(dd, b).raw, 1)});
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> OddEven(Vec128<float, N> a, Vec128<float, N> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  // SHUFPS must fill the lower half of the output from one input, so we
+  // need another shuffle. Unpack avoids another immediate byte.
+  const __m128 odd = _mm_shuffle_ps(a.raw, a.raw, _MM_SHUFFLE(3, 1, 3, 1));
+  const __m128 even = _mm_shuffle_ps(b.raw, b.raw, _MM_SHUFFLE(2, 0, 2, 0));
+  return Vec128<float, N>{_mm_unpacklo_ps(even, odd)};
+#else
+  return Vec128<float, N>{_mm_blend_ps(a.raw, b.raw, 5)};
+#endif
+}
+
+// ------------------------------ OddEvenBlocks
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEvenBlocks(Vec128<T, N> /* odd */, Vec128<T, N> even) {
+  return even;
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SwapAdjacentBlocks(Vec128<T, N> v) {
+  return v;
+}
+
+// ------------------------------ Shl (ZipLower, Mul)
+
+// Use AVX2/3 variable shifts where available, otherwise multiply by powers of
+// two from loading float exponents, which is considerably faster (according
+// to LLVM-MCA) than scalar or testing bits: https://gcc.godbolt.org/z/9G7Y9v.
+
+namespace detail {
+#if HWY_TARGET == HWY_AVX2  // Unused for AVX3 - we use sllv directly
+template <class V>
+HWY_API V AVX2ShlU16Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint32_t, decltype(d)> du32;
+  return TruncateTo(d, PromoteTo(du32, v) << PromoteTo(du32, bits));
+}
+#elif HWY_TARGET > HWY_AVX2
+// Returns 2^v for use as per-lane multipliers to emulate 16-bit shifts.
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<MakeUnsigned<T>> Pow2(const Vec128<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RepartitionToWide<decltype(d)> dw;
+  const Rebind<float, decltype(dw)> df;
+  const auto zero = Zero(d);
+  // Move into exponent (this u16 will become the upper half of an f32)
+  const auto exp = ShiftLeft<23 - 16>(v);
+  const auto upper = exp + Set(d, 0x3F80);  // upper half of 1.0f
+  // Insert 0 into lower halves for reinterpreting as binary32.
+  const auto f0 = ZipLower(dw, zero, upper);
+  const auto f1 = ZipUpper(dw, zero, upper);
+  // See cvtps comment below.
+  const VFromD<decltype(dw)> bits0{_mm_cvtps_epi32(BitCast(df, f0).raw)};
+  const VFromD<decltype(dw)> bits1{_mm_cvtps_epi32(BitCast(df, f1).raw)};
+#if HWY_TARGET <= HWY_SSE4
+  return VFromD<decltype(du)>{_mm_packus_epi32(bits0.raw, bits1.raw)};
+#else
+  return ConcatEven(du, BitCast(du, bits1), BitCast(du, bits0));
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 2), HWY_IF_LANES_LE(N, 4)>
+HWY_INLINE Vec128<MakeUnsigned<T>, N> Pow2(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Twice<decltype(du)> dt_u;
+  const RepartitionToWide<decltype(dt_u)> dt_w;
+  const RebindToFloat<decltype(dt_w)> dt_f;
+  // Move into exponent (this u16 will become the upper half of an f32)
+  const auto exp = ShiftLeft<23 - 16>(v);
+  const auto upper = exp + Set(d, 0x3F80);  // upper half of 1.0f
+  // Insert 0 into lower halves for reinterpreting as binary32.
+  const auto f0 = ZipLower(dt_w, Zero(dt_u), ResizeBitCast(dt_u, upper));
+  // See cvtps comment below.
+  const VFromD<decltype(dt_w)> bits0{_mm_cvtps_epi32(BitCast(dt_f, f0).raw)};
+#if HWY_TARGET <= HWY_SSE4
+  return VFromD<decltype(du)>{_mm_packus_epi32(bits0.raw, bits0.raw)};
+#elif HWY_TARGET == HWY_SSSE3
+  alignas(16)
+      const uint16_t kCompactEvenU16[8] = {0x0100, 0x0504, 0x0908, 0x0D0C};
+  return TableLookupBytes(bits0, Load(du, kCompactEvenU16));
+#else
+  const RebindToSigned<decltype(dt_w)> dt_i32;
+  const auto bits0_i32 = ShiftRight<16>(BitCast(dt_i32, ShiftLeft<16>(bits0)));
+  return VFromD<decltype(du)>{_mm_packs_epi32(bits0_i32.raw, bits0_i32.raw)};
+#endif
+}
+
+// Same, for 32-bit shifts.
+template <typename T, size_t N, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec128<MakeUnsigned<T>, N> Pow2(const Vec128<T, N> v) {
+  const DFromV<decltype(v)> d;
+  const auto exp = ShiftLeft<23>(v);
+  const auto f = exp + Set(d, 0x3F800000);  // 1.0f
+  // Do not use ConvertTo because we rely on the native 0x80..00 overflow
+  // behavior.
+  return Vec128<MakeUnsigned<T>, N>{_mm_cvtps_epi32(_mm_castsi128_ps(f.raw))};
+}
+
+#endif  // HWY_TARGET > HWY_AVX2
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Shl(hwy::UnsignedTag /*tag*/, Vec128<uint16_t, N> v,
+                                Vec128<uint16_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint16_t, N>{_mm_sllv_epi16(v.raw, bits.raw)};
+#elif HWY_TARGET == HWY_AVX2
+  return AVX2ShlU16Vec128(v, bits);
+#else
+  return v * Pow2(bits);
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX3
+HWY_API Vec16<uint16_t> Shl(hwy::UnsignedTag /*tag*/, Vec16<uint16_t> v,
+                            Vec16<uint16_t> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<uint16_t> bits16{_mm_cvtepu16_epi64(bits.raw)};
+#else
+  const auto bits16 = And(bits, Vec16<uint16_t>{_mm_set_epi64x(0, 0xFFFF)});
+#endif
+  return Vec16<uint16_t>{_mm_sll_epi16(v.raw, bits16.raw)};
+}
+#endif
+
+#if HWY_TARGET <= HWY_AVX3
+template <class V>
+HWY_INLINE V AVX2ShlU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint16_t, decltype(d)> du16;
+  return TruncateTo(d, PromoteTo(du16, v) << PromoteTo(du16, bits));
+}
+#elif HWY_TARGET <= HWY_AVX2
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V AVX2ShlU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint32_t, decltype(d)> du32;
+  return TruncateTo(d, PromoteTo(du32, v) << PromoteTo(du32, bits));
+}
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V AVX2ShlU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<uint16_t, decltype(d)> du16;
+  const Rebind<uint32_t, decltype(dh)> dh_u32;
+
+  const VFromD<decltype(dh_u32)> lo_shl_result =
+      PromoteTo(dh_u32, LowerHalf(dh, v))
+      << PromoteTo(dh_u32, LowerHalf(dh, bits));
+  const VFromD<decltype(dh_u32)> hi_shl_result =
+      PromoteTo(dh_u32, UpperHalf(dh, v))
+      << PromoteTo(dh_u32, UpperHalf(dh, bits));
+  const VFromD<decltype(du16)> u16_shl_result = ConcatEven(
+      du16, BitCast(du16, hi_shl_result), BitCast(du16, lo_shl_result));
+  return TruncateTo(d, u16_shl_result);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// 8-bit: may use the Shl overload for uint16_t.
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Shl(hwy::UnsignedTag tag, Vec128<uint8_t, N> v,
+                               Vec128<uint8_t, N> bits) {
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  (void)tag;
+  // kMask[i] = 0xFF >> i
+  alignas(16) static constexpr uint8_t kMasks[16] = {
+      0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00};
+  // kShl[i] = 1 << i
+  alignas(16) static constexpr uint8_t kShl[16] = {1,    2,    4,    8,   0x10,
+                                                   0x20, 0x40, 0x80, 0x00};
+  v = And(v, TableLookupBytes(Load(Full64<uint8_t>(), kMasks), bits));
+  const VFromD<decltype(d)> mul =
+      TableLookupBytes(Load(Full64<uint8_t>(), kShl), bits);
+  return VFromD<decltype(d)>{_mm_gf2p8mul_epi8(v.raw, mul.raw)};
+#elif HWY_TARGET <= HWY_AVX2
+  (void)tag;
+  (void)d;
+  return AVX2ShlU8Vec128(v, bits);
+#else
+  const Repartition<uint16_t, decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW even_mask = Set(dw, 0x00FF);
+  const VW odd_mask = Set(dw, 0xFF00);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  // Shift even lanes in-place
+  const VW evens = Shl(tag, vw, And(bits16, even_mask));
+  const VW odds = Shl(tag, And(vw, odd_mask), ShiftRight<8>(bits16));
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+#endif
+}
+HWY_API Vec128<uint8_t, 1> Shl(hwy::UnsignedTag /*tag*/, Vec128<uint8_t, 1> v,
+                               Vec128<uint8_t, 1> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<uint16_t> bits8{_mm_cvtepu8_epi64(bits.raw)};
+#else
+  const Vec16<uint16_t> bits8 =
+      And(Vec16<uint16_t>{bits.raw}, Vec16<uint16_t>{_mm_set_epi64x(0, 0xFF)});
+#endif
+  return Vec128<uint8_t, 1>{_mm_sll_epi16(v.raw, bits8.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Shl(hwy::UnsignedTag /*tag*/, Vec128<uint32_t, N> v,
+                                Vec128<uint32_t, N> bits) {
+#if HWY_TARGET >= HWY_SSE4
+  return v * Pow2(bits);
+#else
+  return Vec128<uint32_t, N>{_mm_sllv_epi32(v.raw, bits.raw)};
+#endif
+}
+
+#if HWY_TARGET >= HWY_SSE4
+HWY_API Vec32<uint32_t> Shl(hwy::UnsignedTag /*tag*/, Vec32<uint32_t> v,
+                            const Vec32<uint32_t> bits) {
+#if HWY_TARGET == HWY_SSE4
+  const Vec32<uint32_t> bits32{_mm_cvtepu32_epi64(bits.raw)};
+#else
+  const auto bits32 =
+      Combine(Full64<uint32_t>(), Zero(Full32<uint32_t>()), bits);
+#endif
+  return Vec32<uint32_t>{_mm_sll_epi32(v.raw, bits32.raw)};
+}
+#endif
+
+HWY_API Vec128<uint64_t> Shl(hwy::UnsignedTag /*tag*/, Vec128<uint64_t> v,
+                             Vec128<uint64_t> bits) {
+#if HWY_TARGET >= HWY_SSE4
+  const DFromV<decltype(v)> d;
+  // Individual shifts and combine
+  const Vec128<uint64_t> out0{_mm_sll_epi64(v.raw, bits.raw)};
+  const __m128i bits1 = _mm_unpackhi_epi64(bits.raw, bits.raw);
+  const Vec128<uint64_t> out1{_mm_sll_epi64(v.raw, bits1)};
+  return ConcatUpperLower(d, out1, out0);
+#else
+  return Vec128<uint64_t>{_mm_sllv_epi64(v.raw, bits.raw)};
+#endif
+}
+HWY_API Vec64<uint64_t> Shl(hwy::UnsignedTag /*tag*/, Vec64<uint64_t> v,
+                            Vec64<uint64_t> bits) {
+  return Vec64<uint64_t>{_mm_sll_epi64(v.raw, bits.raw)};
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Shl(hwy::SignedTag /*tag*/, Vec128<T, N> v,
+                         Vec128<T, N> bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  return BitCast(di,
+                 Shl(hwy::UnsignedTag(), BitCast(du, v), BitCast(du, bits)));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, Vec128<T, N> bits) {
+  return detail::Shl(hwy::TypeTag<T>(), v, bits);
+}
+
+// ------------------------------ Shr (mul, mask, BroadcastSignBit)
+
+// Use AVX2+ variable shifts except for SSSE3/SSE4. There, we use
+// widening multiplication by powers of two obtained by loading float exponents,
+// followed by a constant right-shift. This is still faster than a scalar or
+// bit-test approach: https://gcc.godbolt.org/z/9G7Y9v.
+
+#if HWY_TARGET <= HWY_AVX2
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3
+template <class V>
+HWY_INLINE V AVX2ShrU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint16_t, decltype(d)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  return DemoteTo(d,
+                  BitCast(di16, PromoteTo(du16, v) >> PromoteTo(du16, bits)));
+}
+#else   // AVX2
+template <class V>
+HWY_INLINE V AVX2ShrU16Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint32_t, decltype(d)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  return DemoteTo(d,
+                  BitCast(di32, PromoteTo(du32, v) >> PromoteTo(du32, bits)));
+}
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V AVX2ShrU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint32_t, decltype(d)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  return DemoteTo(d,
+                  BitCast(di32, PromoteTo(du32, v) >> PromoteTo(du32, bits)));
+}
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V AVX2ShrU8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int16_t, decltype(d)> di16;
+  const Rebind<uint16_t, decltype(d)> du16;
+  const Rebind<int32_t, decltype(dh)> dh_i32;
+  const Rebind<uint32_t, decltype(dh)> dh_u32;
+
+  const auto lo_shr_result =
+      BitCast(dh_i32, PromoteTo(dh_u32, LowerHalf(dh, v)) >>
+                          PromoteTo(dh_u32, LowerHalf(dh, bits)));
+  const auto hi_shr_result =
+      BitCast(dh_i32, PromoteTo(dh_u32, UpperHalf(dh, v)) >>
+                          PromoteTo(dh_u32, UpperHalf(dh, bits)));
+  const auto i16_shr_result =
+      BitCast(di16, OrderedDemote2To(du16, lo_shr_result, hi_shr_result));
+  return DemoteTo(d, i16_shr_result);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+}  // namespace detail
+#endif  // HWY_TARGET <= HWY_AVX2
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator>>(Vec128<uint16_t, N> in,
+                                       const Vec128<uint16_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<uint16_t, N>{_mm_srlv_epi16(in.raw, bits.raw)};
+#elif HWY_TARGET <= HWY_AVX2
+  return detail::AVX2ShrU16Vec128(in, bits);
+#else
+  const DFromV<decltype(in)> d;
+  // For bits=0, we cannot mul by 2^16, so fix the result later.
+  const auto out = MulHigh(in, detail::Pow2(Set(d, 16) - bits));
+  // Replace output with input where bits == 0.
+  return IfThenElse(bits == Zero(d), in, out);
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX3
+HWY_API Vec16<uint16_t> operator>>(const Vec16<uint16_t> in,
+                                   const Vec16<uint16_t> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<uint16_t> bits16{_mm_cvtepu16_epi64(bits.raw)};
+#else
+  const auto bits16 = And(bits, Vec16<uint16_t>{_mm_set_epi64x(0, 0xFFFF)});
+#endif
+  return Vec16<uint16_t>{_mm_srl_epi16(in.raw, bits16.raw)};
+}
+#endif
+
+// 8-bit uses 16-bit shifts.
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator>>(Vec128<uint8_t, N> in,
+                                      const Vec128<uint8_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX2
+  return detail::AVX2ShrU8Vec128(in, bits);
+#else
+  const DFromV<decltype(in)> d;
+  const Repartition<uint16_t, decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW mask = Set(dw, 0x00FF);
+  const VW vw = BitCast(dw, in);
+  const VW bits16 = BitCast(dw, bits);
+  const VW evens = And(vw, mask) >> And(bits16, mask);
+  // Shift odd lanes in-place
+  const VW odds = vw >> ShiftRight<8>(bits16);
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+#endif
+}
+HWY_API Vec128<uint8_t, 1> operator>>(const Vec128<uint8_t, 1> in,
+                                      const Vec128<uint8_t, 1> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<uint16_t> in8{_mm_cvtepu8_epi16(in.raw)};
+  const Vec16<uint16_t> bits8{_mm_cvtepu8_epi64(bits.raw)};
+#else
+  const Vec16<uint16_t> mask{_mm_set_epi64x(0, 0xFF)};
+  const Vec16<uint16_t> in8 = And(Vec16<uint16_t>{in.raw}, mask);
+  const Vec16<uint16_t> bits8 = And(Vec16<uint16_t>{bits.raw}, mask);
+#endif
+  return Vec128<uint8_t, 1>{_mm_srl_epi16(in8.raw, bits8.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator>>(const Vec128<uint32_t, N> in,
+                                       const Vec128<uint32_t, N> bits) {
+#if HWY_TARGET >= HWY_SSE4
+  // 32x32 -> 64 bit mul, then shift right by 32.
+  const DFromV<decltype(in)> d32;
+  // Move odd lanes into position for the second mul. Shuffle more gracefully
+  // handles N=1 than repartitioning to u64 and shifting 32 bits right.
+  const Vec128<uint32_t, N> in31{_mm_shuffle_epi32(in.raw, 0x31)};
+  // For bits=0, we cannot mul by 2^32, so fix the result later.
+  const auto mul = detail::Pow2(Set(d32, 32) - bits);
+  const auto out20 = ShiftRight<32>(MulEven(in, mul));  // z 2 z 0
+  const Vec128<uint32_t, N> mul31{_mm_shuffle_epi32(mul.raw, 0x31)};
+  // No need to shift right, already in the correct position.
+  const auto out31 = BitCast(d32, MulEven(in31, mul31));  // 3 ? 1 ?
+  const Vec128<uint32_t, N> out = OddEven(out31, BitCast(d32, out20));
+  // Replace output with input where bits == 0.
+  return IfThenElse(bits == Zero(d32), in, out);
+#else
+  return Vec128<uint32_t, N>{_mm_srlv_epi32(in.raw, bits.raw)};
+#endif
+}
+
+#if HWY_TARGET >= HWY_SSE4
+HWY_API Vec128<uint32_t, 1> operator>>(const Vec128<uint32_t, 1> in,
+                                       const Vec128<uint32_t, 1> bits) {
+#if HWY_TARGET == HWY_SSE4
+  const Vec32<uint32_t> bits32{_mm_cvtepu32_epi64(bits.raw)};
+#else
+  const auto bits32 =
+      Combine(Full64<uint32_t>(), Zero(Full32<uint32_t>()), bits);
+#endif
+  return Vec128<uint32_t, 1>{_mm_srl_epi32(in.raw, bits32.raw)};
+}
+#endif
+
+HWY_API Vec128<uint64_t> operator>>(const Vec128<uint64_t> v,
+                                    const Vec128<uint64_t> bits) {
+#if HWY_TARGET >= HWY_SSE4
+  const DFromV<decltype(v)> d;
+  // Individual shifts and combine
+  const Vec128<uint64_t> out0{_mm_srl_epi64(v.raw, bits.raw)};
+  const __m128i bits1 = _mm_unpackhi_epi64(bits.raw, bits.raw);
+  const Vec128<uint64_t> out1{_mm_srl_epi64(v.raw, bits1)};
+  return ConcatUpperLower(d, out1, out0);
+#else
+  return Vec128<uint64_t>{_mm_srlv_epi64(v.raw, bits.raw)};
+#endif
+}
+HWY_API Vec64<uint64_t> operator>>(const Vec64<uint64_t> v,
+                                   const Vec64<uint64_t> bits) {
+  return Vec64<uint64_t>{_mm_srl_epi64(v.raw, bits.raw)};
+}
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3
+template <class V>
+HWY_INLINE V AVX2ShrI8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int16_t, decltype(d)> di16;
+  return DemoteTo(d, PromoteTo(di16, v) >> PromoteTo(di16, bits));
+}
+#elif HWY_TARGET <= HWY_AVX2  // AVX2
+template <class V>
+HWY_INLINE V AVX2ShrI16Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+  return DemoteTo(d, PromoteTo(di32, v) >> PromoteTo(di32, bits));
+}
+template <class V, HWY_IF_V_SIZE_LE_V(V, 8)>
+HWY_INLINE V AVX2ShrI8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+  return DemoteTo(d, PromoteTo(di32, v) >> PromoteTo(di32, bits));
+}
+template <class V, HWY_IF_V_SIZE_V(V, 16)>
+HWY_INLINE V AVX2ShrI8Vec128(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int16_t, decltype(d)> di16;
+  const Rebind<int32_t, decltype(dh)> dh_i32;
+
+  const auto lo_shr_result = PromoteTo(dh_i32, LowerHalf(dh, v)) >>
+                             PromoteTo(dh_i32, LowerHalf(dh, bits));
+  const auto hi_shr_result = PromoteTo(dh_i32, UpperHalf(dh, v)) >>
+                             PromoteTo(dh_i32, UpperHalf(dh, bits));
+  const auto i16_shr_result =
+      OrderedDemote2To(di16, lo_shr_result, hi_shr_result);
+  return DemoteTo(d, i16_shr_result);
+}
+#endif
+
+#if HWY_TARGET > HWY_AVX3
+// Also used in x86_256-inl.h.
+template <class DI, class V>
+HWY_INLINE V SignedShr(const DI di, const V v, const V count_i) {
+  const RebindToUnsigned<DI> du;
+  const auto count = BitCast(du, count_i);  // same type as value to shift
+  // Clear sign and restore afterwards. This is preferable to shifting the MSB
+  // downwards because Shr is somewhat more expensive than Shl.
+  const auto sign = BroadcastSignBit(v);
+  const auto abs = BitCast(du, v ^ sign);  // off by one, but fixed below
+  return BitCast(di, abs >> count) ^ sign;
+}
+#endif
+
+}  // namespace detail
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator>>(Vec128<int16_t, N> v,
+                                      Vec128<int16_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int16_t, N>{_mm_srav_epi16(v.raw, bits.raw)};
+#elif HWY_TARGET <= HWY_AVX2
+  return detail::AVX2ShrI16Vec128(v, bits);
+#else
+  const DFromV<decltype(v)> d;
+  return detail::SignedShr(d, v, bits);
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX3
+HWY_API Vec16<int16_t> operator>>(Vec16<int16_t> v, Vec16<int16_t> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<int16_t> bits16{_mm_cvtepu16_epi64(bits.raw)};
+#else
+  const auto bits16 = And(bits, Vec16<int16_t>{_mm_set_epi64x(0, 0xFFFF)});
+#endif
+  return Vec16<int16_t>{_mm_sra_epi16(v.raw, bits16.raw)};
+}
+#endif
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator>>(Vec128<int8_t, N> v,
+                                     Vec128<int8_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX2
+  return detail::AVX2ShrI8Vec128(v, bits);
+#else
+  const DFromV<decltype(v)> d;
+  return detail::SignedShr(d, v, bits);
+#endif
+}
+HWY_API Vec128<int8_t, 1> operator>>(Vec128<int8_t, 1> v,
+                                     Vec128<int8_t, 1> bits) {
+#if HWY_TARGET <= HWY_SSE4
+  const Vec16<int16_t> vi16{_mm_cvtepi8_epi16(v.raw)};
+  const Vec16<uint16_t> bits8{_mm_cvtepu8_epi64(bits.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  const Rebind<int16_t, decltype(d)> di16;
+  const Twice<decltype(d)> dt;
+
+  const auto vi16 = ShiftRight<8>(BitCast(di16, Combine(dt, v, v)));
+  const Vec16<uint16_t> bits8 =
+      And(Vec16<uint16_t>{bits.raw}, Vec16<uint16_t>{_mm_set_epi64x(0, 0xFF)});
+#endif
+  return Vec128<int8_t, 1>{_mm_sra_epi16(vi16.raw, bits8.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator>>(Vec128<int32_t, N> v,
+                                      Vec128<int32_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX2
+  return Vec128<int32_t, N>{_mm_srav_epi32(v.raw, bits.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  return detail::SignedShr(d, v, bits);
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX2
+HWY_API Vec32<int32_t> operator>>(Vec32<int32_t> v, Vec32<int32_t> bits) {
+#if HWY_TARGET == HWY_SSE4
+  const Vec32<uint32_t> bits32{_mm_cvtepu32_epi64(bits.raw)};
+#else
+  const auto bits32 = Combine(Full64<int32_t>(), Zero(Full32<int32_t>()), bits);
+#endif
+  return Vec32<int32_t>{_mm_sra_epi32(v.raw, bits32.raw)};
+}
+#endif
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator>>(Vec128<int64_t, N> v,
+                                      Vec128<int64_t, N> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec128<int64_t, N>{_mm_srav_epi64(v.raw, bits.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  return detail::SignedShr(d, v, bits);
+#endif
+}
+
+// ------------------------------ MulEven/Odd 64x64 (UpperHalf)
+
+HWY_INLINE Vec128<uint64_t> MulEven(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  const DFromV<decltype(a)> d;
+  alignas(16) uint64_t mul[2];
+  mul[0] = Mul128(GetLane(a), GetLane(b), &mul[1]);
+  return Load(d, mul);
+}
+
+HWY_INLINE Vec128<uint64_t> MulOdd(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  const DFromV<decltype(a)> d;
+  const Half<decltype(d)> d2;
+  alignas(16) uint64_t mul[2];
+  const uint64_t a1 = GetLane(UpperHalf(d2, a));
+  const uint64_t b1 = GetLane(UpperHalf(d2, b));
+  mul[0] = Mul128(a1, b1, &mul[1]);
+  return Load(d, mul);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+
+// Generic for all vector lengths.
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 df32, V16 a, V16 b) {
+  // TODO(janwas): _mm_dpbf16_ps when available
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Lane order within sum0/1 is undefined, hence we can avoid the
+  // longer-latency lane-crossing PromoteTo. Using shift/and instead of Zip
+  // leads to the odd/even order that RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be),
+                Mul(BitCast(df32, ao), BitCast(df32, bo)));
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence madd_epi16 is safe.
+template <class D32, HWY_IF_I32_D(D32), HWY_IF_V_SIZE_LE_D(D32, 16),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> WidenMulPairwiseAdd(D32 /* tag */, V16 a, V16 b) {
+  return VFromD<D32>{_mm_madd_epi16(a.raw, b.raw)};
+}
+
+// Generic for all vector lengths.
+template <class DU32, HWY_IF_U32_D(DU32),
+          class VU16 = VFromD<RepartitionToNarrow<DU32>>>
+HWY_API VFromD<DU32> WidenMulPairwiseAdd(DU32 du32, VU16 a, VU16 b) {
+  const auto p_lo = a * b;
+  const auto p_hi = MulHigh(a, b);
+
+  const auto p_hi1_lo0 = BitCast(du32, OddEven(p_hi, p_lo));
+  const auto p_hi0_lo1 = Or(ShiftLeft<16>(BitCast(du32, p_hi)),
+                            ShiftRight<16>(BitCast(du32, p_lo)));
+  return Add(BitCast(du32, p_hi1_lo0), BitCast(du32, p_hi0_lo1));
+}
+
+// ------------------------------ SatWidenMulPairwiseAdd
+
+#if HWY_TARGET <= HWY_SSSE3
+
+#ifdef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#undef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#else
+#define HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
+#endif
+
+// Even if N=1, the input is always at least 2 lanes, hence _mm_maddubs_epi16
+// is safe.
+template <class DI16, HWY_IF_I16_D(DI16), HWY_IF_V_SIZE_LE_D(DI16, 16)>
+HWY_API VFromD<DI16> SatWidenMulPairwiseAdd(
+    DI16 /* tag */, VFromD<Repartition<uint8_t, DI16>> a,
+    VFromD<Repartition<int8_t, DI16>> b) {
+  return VFromD<DI16>{_mm_maddubs_epi16(a.raw, b.raw)};
+}
+
+#endif
+
+// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ShiftLeft)
+
+// Generic for all vector lengths.
+template <class D32, HWY_IF_F32_D(D32),
+          class V16 = VFromD<Repartition<bfloat16_t, D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 df32, V16 a, V16 b,
+                                              const VFromD<D32> sum0,
+                                              VFromD<D32>& sum1) {
+  // TODO(janwas): _mm_dpbf16_ps when available
+  const RebindToUnsigned<decltype(df32)> du32;
+  // Lane order within sum0/1 is undefined, hence we can avoid the
+  // longer-latency lane-crossing PromoteTo. Using shift/and instead of Zip
+  // leads to the odd/even order that RearrangeToOddPlusEven prefers.
+  using VU32 = VFromD<decltype(du32)>;
+  const VU32 odd = Set(du32, 0xFFFF0000u);
+  const VU32 ae = ShiftLeft<16>(BitCast(du32, a));
+  const VU32 ao = And(BitCast(du32, a), odd);
+  const VU32 be = ShiftLeft<16>(BitCast(du32, b));
+  const VU32 bo = And(BitCast(du32, b), odd);
+  sum1 = MulAdd(BitCast(df32, ao), BitCast(df32, bo), sum1);
+  return MulAdd(BitCast(df32, ae), BitCast(df32, be), sum0);
+}
+
+// Even if N=1, the input is always at least 2 lanes, hence madd_epi16 is safe.
+template <class D32, HWY_IF_I32_D(D32), HWY_IF_V_SIZE_LE_D(D32, 16),
+          class V16 = VFromD<RepartitionToNarrow<D32>>>
+HWY_API VFromD<D32> ReorderWidenMulAccumulate(D32 d, V16 a, V16 b,
+                                              const VFromD<D32> sum0,
+                                              VFromD<D32>& /*sum1*/) {
+  (void)d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  return VFromD<D32>{_mm_dpwssd_epi32(sum0.raw, a.raw, b.raw)};
+#else
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+#endif
+}
+
+template <class DU32, HWY_IF_U32_D(DU32),
+          class VU16 = VFromD<RepartitionToNarrow<DU32>>>
+HWY_API VFromD<DU32> ReorderWidenMulAccumulate(DU32 d, VU16 a, VU16 b,
+                                               const VFromD<DU32> sum0,
+                                               VFromD<DU32>& /*sum1*/) {
+  (void)d;
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+template <size_t N>
+HWY_API Vec128<int32_t, N> RearrangeToOddPlusEven(const Vec128<int32_t, N> sum0,
+                                                  Vec128<int32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> RearrangeToOddPlusEven(
+    const Vec128<uint32_t, N> sum0, Vec128<uint32_t, N> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+template <class VW>
+HWY_API VW RearrangeToOddPlusEven(const VW sum0, const VW sum1) {
+  return Add(sum0, sum1);
+}
+
+// ------------------------------ SumOfMulQuadAccumulate
+#if HWY_TARGET <= HWY_AVX3_DL
+
+#ifdef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_I8_SUMOFMULQUADACCUMULATE
+#endif
+
+template <class DI32, HWY_IF_I32_D(DI32), HWY_IF_V_SIZE_LE_D(DI32, 16)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(
+    DI32 /*di32*/, VFromD<Repartition<uint8_t, DI32>> a_u,
+    VFromD<Repartition<int8_t, DI32>> b_i, VFromD<DI32> sum) {
+  return VFromD<DI32>{_mm_dpbusd_epi32(sum.raw, a_u.raw, b_i.raw)};
+}
+
+#ifdef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_I8_I8_SUMOFMULQUADACCUMULATE
+#endif
+template <class DI32, HWY_IF_I32_D(DI32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(DI32 di32,
+                                            VFromD<Repartition<int8_t, DI32>> a,
+                                            VFromD<Repartition<int8_t, DI32>> b,
+                                            VFromD<DI32> sum) {
+  const Repartition<uint8_t, decltype(di32)> du8;
+
+  const auto a_u = BitCast(du8, a);
+  const auto result_sum_0 = SumOfMulQuadAccumulate(di32, a_u, b, sum);
+  const auto result_sum_1 = ShiftLeft<8>(
+      SumOfMulQuadAccumulate(di32, ShiftRight<7>(a_u), b, Zero(di32)));
+  return result_sum_0 - result_sum_1;
+}
+
+#ifdef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#undef HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#else
+#define HWY_NATIVE_U8_U8_SUMOFMULQUADACCUMULATE
+#endif
+template <class DU32, HWY_IF_U32_D(DU32)>
+HWY_API VFromD<DU32> SumOfMulQuadAccumulate(
+    DU32 du32, VFromD<Repartition<uint8_t, DU32>> a,
+    VFromD<Repartition<uint8_t, DU32>> b, VFromD<DU32> sum) {
+  const Repartition<uint8_t, decltype(du32)> du8;
+  const RebindToSigned<decltype(du8)> di8;
+  const RebindToSigned<decltype(du32)> di32;
+
+  const auto b_i = BitCast(di8, b);
+  const auto result_sum_0 =
+      SumOfMulQuadAccumulate(di32, a, b_i, BitCast(di32, sum));
+  const auto result_sum_1 = ShiftLeft<8>(
+      SumOfMulQuadAccumulate(di32, a, BroadcastSignBit(b_i), Zero(di32)));
+
+  return BitCast(du32, result_sum_0 - result_sum_1);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const __m128i zero = _mm_setzero_si128();
+  return VFromD<D>{_mm_unpacklo_epi8(v.raw, zero)};
+#else
+  return VFromD<D>{_mm_cvtepu8_epi16(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  return VFromD<D>{_mm_unpacklo_epi16(v.raw, _mm_setzero_si128())};
+#else
+  return VFromD<D>{_mm_cvtepu16_epi32(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  return VFromD<D>{_mm_unpacklo_epi32(v.raw, _mm_setzero_si128())};
+#else
+  return VFromD<D>{_mm_cvtepu32_epi64(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<uint8_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i u16 = _mm_unpacklo_epi8(v.raw, zero);
+  return VFromD<D>{_mm_unpacklo_epi16(u16, zero)};
+#else
+  return VFromD<D>{_mm_cvtepu8_epi32(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<uint8_t, D>> v) {
+#if HWY_TARGET > HWY_SSSE3
+  const Rebind<uint32_t, decltype(d)> du32;
+  return PromoteTo(d, PromoteTo(du32, v));
+#elif HWY_TARGET == HWY_SSSE3
+  alignas(16) static constexpr int8_t kShuffle[16] = {
+      0, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1};
+  const Repartition<int8_t, decltype(d)> di8;
+  return TableLookupBytesOr0(v, BitCast(d, Load(di8, kShuffle)));
+#else
+  (void)d;
+  return VFromD<D>{_mm_cvtepu8_epi64(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<uint16_t, D>> v) {
+#if HWY_TARGET > HWY_SSSE3
+  const Rebind<uint32_t, decltype(d)> du32;
+  return PromoteTo(d, PromoteTo(du32, v));
+#elif HWY_TARGET == HWY_SSSE3
+  alignas(16) static constexpr int8_t kShuffle[16] = {
+      0, 1, -1, -1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, -1, -1};
+  const Repartition<int8_t, decltype(d)> di8;
+  return TableLookupBytesOr0(v, BitCast(d, Load(di8, kShuffle)));
+#else
+  (void)d;
+  return VFromD<D>{_mm_cvtepu16_epi64(v.raw)};
+#endif
+}
+
+// Unsigned to signed: same plus cast.
+template <class D, class V, HWY_IF_SIGNED_D(D), HWY_IF_UNSIGNED_V(V),
+          HWY_IF_LANES_GT(sizeof(TFromD<D>), sizeof(TFromV<V>)),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_V(V))>
+HWY_API VFromD<D> PromoteTo(D di, V v) {
+  const RebindToUnsigned<decltype(di)> du;
+  return BitCast(di, PromoteTo(du, v));
+}
+
+// Signed: replicate sign bit.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  return ShiftRight<8>(VFromD<D>{_mm_unpacklo_epi8(v.raw, v.raw)});
+#else
+  return VFromD<D>{_mm_cvtepi8_epi16(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  return ShiftRight<16>(VFromD<D>{_mm_unpacklo_epi16(v.raw, v.raw)});
+#else
+  return VFromD<D>{_mm_cvtepi16_epi32(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  return ShiftRight<32>(VFromD<D>{_mm_unpacklo_epi32(v.raw, v.raw)});
+#else
+  return VFromD<D>{_mm_cvtepi32_epi64(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int8_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const __m128i x2 = _mm_unpacklo_epi8(v.raw, v.raw);
+  const __m128i x4 = _mm_unpacklo_epi16(x2, x2);
+  return ShiftRight<24>(VFromD<D>{x4});
+#else
+  return VFromD<D>{_mm_cvtepi8_epi32(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<int8_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const Repartition<int32_t, decltype(d)> di32;
+  const Half<decltype(di32)> dh_i32;
+  const VFromD<decltype(di32)> x4{PromoteTo(dh_i32, v).raw};
+  const VFromD<decltype(di32)> s4{
+      _mm_shufflelo_epi16(x4.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+  return ZipLower(d, x4, s4);
+#else
+  (void)d;
+  return VFromD<D>{_mm_cvtepi8_epi64(v.raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D d, VFromD<Rebind<int16_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const Repartition<int32_t, decltype(d)> di32;
+  const Half<decltype(di32)> dh_i32;
+  const VFromD<decltype(di32)> x2{PromoteTo(dh_i32, v).raw};
+  const VFromD<decltype(di32)> s2{
+      _mm_shufflelo_epi16(x2.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+  return ZipLower(d, x2, s2);
+#else
+  (void)d;
+  return VFromD<D>{_mm_cvtepi16_epi64(v.raw)};
+#endif
+}
+
+#if HWY_TARGET < HWY_SSE4 && !defined(HWY_DISABLE_F16C)
+
+// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions.
+#ifdef HWY_NATIVE_F16C
+#undef HWY_NATIVE_F16C
+#else
+#define HWY_NATIVE_F16C
+#endif
+
+// Workaround for origin tracking bug in Clang msan prior to 11.0
+// (spurious "uninitialized memory" for TestF16 with "ORIGIN: invalid")
+#if HWY_IS_MSAN && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1100)
+#define HWY_INLINE_F16 HWY_NOINLINE
+#else
+#define HWY_INLINE_F16 HWY_INLINE
+#endif
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_INLINE_F16 VFromD<D> PromoteTo(D /*tag*/, VFromD<Rebind<float16_t, D>> v) {
+  return VFromD<D>{_mm_cvtph_ps(v.raw)};
+}
+
+#endif  // HWY_NATIVE_F16C
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, VFromD<Rebind<bfloat16_t, D>> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<float, D>> v) {
+  return VFromD<D>{_mm_cvtps_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{_mm_cvtepi32_pd(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{_mm_packs_epi32(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+#if HWY_TARGET >= HWY_SSSE3
+  const Rebind<int32_t, D> di32;
+  const auto zero_if_neg = AndNot(ShiftRight<31>(v), v);
+  const auto too_big = VecFromMask(di32, Gt(v, Set(di32, 0xFFFF)));
+  const auto clamped = Or(zero_if_neg, too_big);
+#if HWY_TARGET == HWY_SSE2
+  const Rebind<uint16_t, decltype(di32)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  return BitCast(du16, DemoteTo(di16, ShiftRight<16>(ShiftLeft<16>(clamped))));
+#else
+  const Repartition<uint16_t, decltype(di32)> du16;
+  // Lower 2 bytes from each 32-bit lane; same as return type for fewer casts.
+  alignas(16) static constexpr uint16_t kLower2Bytes[16] = {
+      0x0100, 0x0504, 0x0908, 0x0D0C, 0x8080, 0x8080, 0x8080, 0x8080};
+  const auto lo2 = Load(du16, kLower2Bytes);
+  return VFromD<D>{TableLookupBytes(BitCast(du16, clamped), lo2).raw};
+#endif
+#else
+  return VFromD<D>{_mm_packus_epi32(v.raw, v.raw)};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D du16, VFromD<Rebind<uint32_t, D>> v) {
+  const DFromV<decltype(v)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+#if HWY_TARGET >= HWY_SSSE3
+  const auto too_big =
+      VecFromMask(di32, Gt(BitCast(di32, ShiftRight<16>(v)), Zero(di32)));
+  const auto clamped = Or(BitCast(di32, v), too_big);
+#if HWY_TARGET == HWY_SSE2
+  const RebindToSigned<decltype(du16)> di16;
+  return BitCast(du16, DemoteTo(di16, ShiftRight<16>(ShiftLeft<16>(clamped))));
+#else
+  (void)du16;
+  const Repartition<uint16_t, decltype(di32)> du16_full;
+  // Lower 2 bytes from each 32-bit lane; same as return type for fewer casts.
+  alignas(16) static constexpr uint16_t kLower2Bytes[16] = {
+      0x0100, 0x0504, 0x0908, 0x0D0C, 0x8080, 0x8080, 0x8080, 0x8080};
+  const auto lo2 = Load(du16_full, kLower2Bytes);
+  return VFromD<D>{TableLookupBytes(BitCast(du16_full, clamped), lo2).raw};
+#endif
+#else
+  return DemoteTo(du16, BitCast(di32, Min(v, Set(du32, 0x7FFFFFFF))));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const __m128i i16 = _mm_packs_epi32(v.raw, v.raw);
+  return VFromD<D>{_mm_packus_epi16(i16, i16)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{_mm_packus_epi16(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  const __m128i i16 = _mm_packs_epi32(v.raw, v.raw);
+  return VFromD<D>{_mm_packs_epi16(i16, i16)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{_mm_packs_epi16(v.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D du8, VFromD<Rebind<uint32_t, D>> v) {
+#if HWY_TARGET <= HWY_AVX3
+  // NOTE: _mm_cvtusepi32_epi8 is a saturated conversion of 32-bit unsigned
+  // integers to 8-bit unsigned integers
+  (void)du8;
+  return VFromD<D>{_mm_cvtusepi32_epi8(v.raw)};
+#else
+  const DFromV<decltype(v)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  const auto max_i32 = Set(du32, 0x7FFFFFFFu);
+
+#if HWY_TARGET >= HWY_SSSE3
+  // On SSE2/SSSE3, clamp u32 values to an i32 using the u8 Min operation
+  // as SSE2/SSSE3 can do an u8 Min operation in a single instruction.
+
+  // The u8 Min operation below leaves the lower 24 bits of each 32-bit
+  // lane unchanged.
+
+  // The u8 Min operation below will leave any values that are less than or
+  // equal to 0x7FFFFFFF unchanged.
+
+  // For values that are greater than or equal to 0x80000000, the u8 Min
+  // operation below will force the upper 8 bits to 0x7F and leave the lower
+  // 24 bits unchanged.
+
+  // An u8 Min operation is okay here as any clamped value that is greater than
+  // or equal to 0x80000000 will be clamped to a value between 0x7F000000 and
+  // 0x7FFFFFFF through the u8 Min operation below, which will then be converted
+  // to 0xFF through the i32->u8 demotion.
+  const Repartition<uint8_t, decltype(du32)> du32_as_du8;
+  const auto clamped = BitCast(
+      di32, Min(BitCast(du32_as_du8, v), BitCast(du32_as_du8, max_i32)));
+#else
+  const auto clamped = BitCast(di32, Min(v, max_i32));
+#endif
+
+  return DemoteTo(du8, clamped);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D du8, VFromD<Rebind<uint16_t, D>> v) {
+  const DFromV<decltype(v)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  const auto max_i16 = Set(du16, 0x7FFF);
+
+#if HWY_TARGET >= HWY_SSSE3
+  // On SSE2/SSSE3, clamp u16 values to an i16 using the u8 Min operation
+  // as SSE2/SSSE3 can do an u8 Min operation in a single instruction.
+
+  // The u8 Min operation below leaves the lower 8 bits of each 16-bit
+  // lane unchanged.
+
+  // The u8 Min operation below will leave any values that are less than or
+  // equal to 0x7FFF unchanged.
+
+  // For values that are greater than or equal to 0x8000, the u8 Min
+  // operation below will force the upper 8 bits to 0x7F and leave the lower
+  // 8 bits unchanged.
+
+  // An u8 Min operation is okay here as any clamped value that is greater than
+  // or equal to 0x8000 will be clamped to a value between 0x7F00 and
+  // 0x7FFF through the u8 Min operation below, which will then be converted
+  // to 0xFF through the i16->u8 demotion.
+  const Repartition<uint8_t, decltype(du16)> du16_as_du8;
+  const auto clamped = BitCast(
+      di16, Min(BitCast(du16_as_du8, v), BitCast(du16_as_du8, max_i16)));
+#else
+  const auto clamped = BitCast(di16, Min(v, max_i16));
+#endif
+
+  return DemoteTo(du8, clamped);
+}
+
+#if HWY_TARGET < HWY_SSE4 && !defined(HWY_DISABLE_F16C)
+
+// HWY_NATIVE_F16C was already toggled above.
+
+// Work around MSVC warning for _mm_cvtps_ph (8 is actually a valid immediate).
+// clang-cl requires a non-empty string, so we 'ignore' the irrelevant -Wmain.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4556, ignored "-Wmain")
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /*tag*/, VFromD<Rebind<float, D>> v) {
+  return VFromD<D>{_mm_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+#endif  // F16C
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, VFromD<Rebind<float, D>> v) {
+  // TODO(janwas): _mm_cvtneps_pbh once we have avx512bf16.
+  const Rebind<int32_t, decltype(dbf16)> di32;
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = BitCast(di32, ShiftRight<16>(BitCast(du32, v)));
+  return BitCast(dbf16, DemoteTo(du16, bits_in_32));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_BF16_D(D),
+          class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, V32 a, V32 b) {
+  // TODO(janwas): _mm_cvtne2ps_pbh once we have avx512bf16.
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+// Specializations for partial vectors because packs_epi32 sets lanes above 2*N.
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec32<int32_t> a, Vec32<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec64<int32_t> a,
+                                   Vec64<int32_t> b) {
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packs_epi32(a.raw, b.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec128<int32_t> a,
+                                   Vec128<int32_t> b) {
+  return VFromD<D>{_mm_packs_epi32(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec32<int32_t> a, Vec32<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec64<int32_t> a, Vec64<int32_t> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+#else
+  (void)dn;
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packus_epi32(a.raw, b.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec128<int32_t> a, Vec128<int32_t> b) {
+#if HWY_TARGET >= HWY_SSSE3
+  const Half<decltype(dn)> dnh;
+  const auto u16_a = DemoteTo(dnh, a);
+  const auto u16_b = DemoteTo(dnh, b);
+  return Combine(dn, u16_b, u16_a);
+#else
+  (void)dn;
+  return VFromD<D>{_mm_packus_epi32(a.raw, b.raw)};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec128<uint32_t> a,
+                                   Vec128<uint32_t> b) {
+  const DFromV<decltype(a)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  const auto max_i32 = Set(du32, 0x7FFFFFFFu);
+
+#if HWY_TARGET >= HWY_SSSE3
+  const Repartition<uint8_t, decltype(du32)> du32_as_du8;
+  // On SSE2/SSSE3, clamp a and b using u8 Min operation
+  const auto clamped_a = BitCast(
+      di32, Min(BitCast(du32_as_du8, a), BitCast(du32_as_du8, max_i32)));
+  const auto clamped_b = BitCast(
+      di32, Min(BitCast(du32_as_du8, b), BitCast(du32_as_du8, max_i32)));
+#else
+  const auto clamped_a = BitCast(di32, Min(a, max_i32));
+  const auto clamped_b = BitCast(di32, Min(b, max_i32));
+#endif
+
+  return ReorderDemote2To(dn, clamped_a, clamped_b);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint32_t, D>> a,
+                                   VFromD<Repartition<uint32_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+// Specializations for partial vectors because packs_epi32 sets lanes above 2*N.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec64<int16_t> a,
+                                   Vec64<int16_t> b) {
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packs_epi16(a.raw, b.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec128<int16_t> a,
+                                   Vec128<int16_t> b) {
+  return VFromD<D>{_mm_packs_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int16_t, D>> a,
+                                   VFromD<Repartition<int16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec64<int16_t> a,
+                                   Vec64<int16_t> b) {
+  return VFromD<D>{_mm_shuffle_epi32(_mm_packus_epi16(a.raw, b.raw),
+                                     _MM_SHUFFLE(2, 0, 2, 0))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec128<int16_t> a,
+                                   Vec128<int16_t> b) {
+  return VFromD<D>{_mm_packus_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec128<uint16_t> a,
+                                   Vec128<uint16_t> b) {
+  const DFromV<decltype(a)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  const auto max_i16 = Set(du16, 0x7FFFu);
+
+#if HWY_TARGET >= HWY_SSSE3
+  const Repartition<uint8_t, decltype(du16)> du16_as_du8;
+  // On SSE2/SSSE3, clamp a and b using u8 Min operation
+  const auto clamped_a = BitCast(
+      di16, Min(BitCast(du16_as_du8, a), BitCast(du16_as_du8, max_i16)));
+  const auto clamped_b = BitCast(
+      di16, Min(BitCast(du16_as_du8, b), BitCast(du16_as_du8, max_i16)));
+#else
+  const auto clamped_a = BitCast(di16, Min(a, max_i16));
+  const auto clamped_b = BitCast(di16, Min(b, max_i16));
+#endif
+
+  return ReorderDemote2To(dn, clamped_a, clamped_b);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint16_t, D>> a,
+                                   VFromD<Repartition<uint16_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_V_SIZE_LE_D(D, 16), class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2)>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return ReorderDemote2To(d, a, b);
+}
+
+template <class D, HWY_IF_BF16_D(D), class V32 = VFromD<Repartition<float, D>>>
+HWY_API VFromD<D> OrderedDemote2To(D dbf16, V32 a, V32 b) {
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  return BitCast(dbf16, ConcatOdd(du16, BitCast(du16, b), BitCast(du16, a)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<double, D>> v) {
+  return VFromD<D>{_mm_cvtpd_ps(v.raw)};
+}
+
+namespace detail {
+
+// Generic for all vector lengths.
+template <class D>
+HWY_INLINE VFromD<D> ClampF64ToI32Max(D d, VFromD<D> v) {
+  // The max can be exactly represented in binary64, so clamping beforehand
+  // prevents x86 conversion from raising an exception and returning 80..00.
+  return Min(v, Set(d, 2147483647.0));
+}
+
+// For ConvertTo float->int of same size, clamping before conversion would
+// change the result because the max integer value is not exactly representable.
+// Instead detect the overflow result after conversion and fix it.
+// Generic for all vector lengths.
+template <class DI>
+HWY_INLINE VFromD<DI> FixConversionOverflow(DI di,
+                                            VFromD<RebindToFloat<DI>> original,
+                                            VFromD<DI> converted) {
+  // Combinations of original and output sign:
+  //   --: normal <0 or -huge_val to 80..00: OK
+  //   -+: -0 to 0                         : OK
+  //   +-: +huge_val to 80..00             : xor with FF..FF to get 7F..FF
+  //   ++: normal >0                       : OK
+  const VFromD<DI> sign_wrong = AndNot(BitCast(di, original), converted);
+#if HWY_COMPILER_GCC_ACTUAL
+  // Critical GCC 11 compiler bug (possibly also GCC 10): omits the Xor; also
+  // Add() if using that instead. Work around with one more instruction.
+  const RebindToUnsigned<DI> du;
+  const VFromD<DI> mask = BroadcastSignBit(sign_wrong);
+  const VFromD<DI> max = BitCast(di, ShiftRight<1>(BitCast(du, mask)));
+  return IfVecThenElse(mask, max, converted);
+#else
+  return Xor(converted, BroadcastSignBit(sign_wrong));
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D),
+          class DF = Rebind<double, D>>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<DF> v) {
+  const VFromD<DF> clamped = detail::ClampF64ToI32Max(DF(), v);
+  return VFromD<D>{_mm_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(const Vec128<uint32_t, N> v) {
+#if HWY_TARGET == HWY_SSE2
+  const RebindToSigned<DFromV<decltype(v)>> di32;
+  const Rebind<uint8_t, decltype(di32)> du8;
+  return DemoteTo(du8, BitCast(di32, v));
+#else
+  const DFromV<decltype(v)> d32;
+  const Repartition<uint8_t, decltype(d32)> d8;
+  alignas(16) static constexpr uint32_t k8From32[4] = {
+      0x0C080400u, 0x0C080400u, 0x0C080400u, 0x0C080400u};
+  // Also replicate bytes into all 32 bit lanes for safety.
+  const auto quad = TableLookupBytes(v, Load(d32, k8From32));
+  return LowerHalf(LowerHalf(BitCast(d8, quad)));
+#endif
+}
+
+// ------------------------------ MulFixedPoint15
+
+#if HWY_TARGET == HWY_SSE2
+HWY_API Vec128<int16_t> MulFixedPoint15(const Vec128<int16_t> a,
+                                        const Vec128<int16_t> b) {
+  const DFromV<decltype(a)> d;
+  const Repartition<int32_t, decltype(d)> di32;
+
+  auto lo_product = a * b;
+  auto hi_product = MulHigh(a, b);
+
+  const VFromD<decltype(di32)> i32_product_lo{
+      _mm_unpacklo_epi16(lo_product.raw, hi_product.raw)};
+  const VFromD<decltype(di32)> i32_product_hi{
+      _mm_unpackhi_epi16(lo_product.raw, hi_product.raw)};
+
+  const auto round_up_incr = Set(di32, 0x4000);
+  return ReorderDemote2To(d, ShiftRight<15>(i32_product_lo + round_up_incr),
+                          ShiftRight<15>(i32_product_hi + round_up_incr));
+}
+
+template <size_t N, HWY_IF_V_SIZE_LE(int16_t, N, 8)>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(const Vec128<int16_t, N> a,
+                                           const Vec128<int16_t, N> b) {
+  const DFromV<decltype(a)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+
+  const auto lo_product = a * b;
+  const auto hi_product = MulHigh(a, b);
+  const VFromD<decltype(di32)> i32_product{
+      _mm_unpacklo_epi16(lo_product.raw, hi_product.raw)};
+
+  return DemoteTo(d, ShiftRight<15>(i32_product + Set(di32, 0x4000)));
+}
+#else
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulFixedPoint15(const Vec128<int16_t, N> a,
+                                           const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_mulhrs_epi16(a.raw, b.raw)};
+}
+#endif
+
+// ------------------------------ Truncations
+
+template <typename From, class DTo, HWY_IF_LANES_D(DTo, 1)>
+HWY_API VFromD<DTo> TruncateTo(DTo /* tag */, Vec128<From, 1> v) {
+  // BitCast requires the same size; DTo might be u8x1 and v u16x1.
+  const Repartition<TFromD<DTo>, DFromV<decltype(v)>> dto;
+  return VFromD<DTo>{BitCast(dto, v).raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 2), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D d, Vec128<uint64_t> v) {
+#if HWY_TARGET == HWY_SSE2
+  const Vec128<uint8_t, 1> lo{v.raw};
+  const Vec128<uint8_t, 1> hi{_mm_unpackhi_epi64(v.raw, v.raw)};
+  return Combine(d, hi, lo);
+#else
+  const Repartition<uint8_t, DFromV<decltype(v)>> d8;
+  (void)d;
+  alignas(16) static constexpr uint8_t kIdx[16] = {0, 8, 0, 8, 0, 8, 0, 8,
+                                                   0, 8, 0, 8, 0, 8, 0, 8};
+  const Vec128<uint8_t> v8 = TableLookupBytes(v, Load(d8, kIdx));
+  return LowerHalf(LowerHalf(LowerHalf(v8)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D d, Vec128<uint64_t> v) {
+#if HWY_TARGET == HWY_SSE2
+  const Vec128<uint16_t, 1> lo{v.raw};
+  const Vec128<uint16_t, 1> hi{_mm_unpackhi_epi64(v.raw, v.raw)};
+  return Combine(d, hi, lo);
+#else
+  (void)d;
+  const Repartition<uint16_t, DFromV<decltype(v)>> d16;
+  alignas(16) static constexpr uint16_t kIdx[8] = {
+      0x100u, 0x908u, 0x100u, 0x908u, 0x100u, 0x908u, 0x100u, 0x908u};
+  const Vec128<uint16_t> v16 = TableLookupBytes(v, Load(d16, kIdx));
+  return LowerHalf(LowerHalf(v16));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec128<uint64_t> v) {
+  return VFromD<D>{_mm_shuffle_epi32(v.raw, 0x88)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const DFromV<decltype(v)> du32;
+#if HWY_TARGET == HWY_SSE2
+  const RebindToSigned<decltype(du32)> di32;
+  const Rebind<uint8_t, decltype(di32)> du8;
+  return DemoteTo(du8, BitCast(di32, ShiftRight<24>(ShiftLeft<24>(v))));
+#else
+  const Repartition<uint8_t, decltype(du32)> d;
+  alignas(16) static constexpr uint8_t kIdx[16] = {
+      0x0u, 0x4u, 0x8u, 0xCu, 0x0u, 0x4u, 0x8u, 0xCu,
+      0x0u, 0x4u, 0x8u, 0xCu, 0x0u, 0x4u, 0x8u, 0xCu};
+  return LowerHalf(LowerHalf(TableLookupBytes(v, Load(d, kIdx))));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint32_t, D>> v) {
+  const DFromV<decltype(v)> du32;
+#if HWY_TARGET == HWY_SSE2
+  const RebindToSigned<decltype(du32)> di32;
+  const Rebind<uint16_t, decltype(di32)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  return BitCast(
+      du16, DemoteTo(di16, ShiftRight<16>(BitCast(di32, ShiftLeft<16>(v)))));
+#else
+  const Repartition<uint16_t, decltype(du32)> d;
+  return LowerHalf(ConcatEven(d, BitCast(d, v), BitCast(d, v)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  const DFromV<decltype(v)> du16;
+#if HWY_TARGET == HWY_SSE2
+  const RebindToSigned<decltype(du16)> di16;
+  const Rebind<uint8_t, decltype(di16)> du8;
+  const RebindToSigned<decltype(du8)> di8;
+  return BitCast(du8,
+                 DemoteTo(di8, ShiftRight<8>(BitCast(di16, ShiftLeft<8>(v)))));
+#else
+  const Repartition<uint8_t, decltype(du16)> d;
+  return LowerHalf(ConcatEven(d, BitCast(d, v), BitCast(d, v)));
+#endif
+}
+
+// ------------------------------ Demotions to/from i64
+
+#if HWY_TARGET <= HWY_AVX3
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  return VFromD<D>{_mm_cvtsepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  return VFromD<D>{_mm_cvtsepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  return VFromD<D>{_mm_cvtsepi64_epi8(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm_maskz_cvtusepi64_epi32(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm_maskz_cvtusepi64_epi16(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<int64_t, D>> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm_maskz_cvtusepi64_epi8(non_neg_mask, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint64_t, D>> v) {
+  return VFromD<D>{_mm_cvtusepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 4), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint64_t, D>> v) {
+  return VFromD<D>{_mm_cvtusepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 2), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, VFromD<Rebind<uint64_t, D>> v) {
+  return VFromD<D>{_mm_cvtusepi64_epi8(v.raw)};
+}
+#else   // AVX2 or below
+namespace detail {
+template <class D, HWY_IF_UNSIGNED_D(D)>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64MaskOutResult(
+    D /*dn*/, VFromD<Rebind<uint64_t, D>> v) {
+  return v;
+}
+
+template <class D, HWY_IF_SIGNED_D(D)>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64MaskOutResult(
+    D /*dn*/, VFromD<Rebind<uint64_t, D>> v) {
+  const DFromV<decltype(v)> du64;
+  return And(v,
+             Set(du64, static_cast<uint64_t>(hwy::HighestValue<TFromD<D>>())));
+}
+
+template <class D>
+HWY_INLINE VFromD<Rebind<uint64_t, D>> DemoteFromU64Saturate(
+    D dn, VFromD<Rebind<uint64_t, D>> v) {
+  const Rebind<uint64_t, D> du64;
+  const RebindToSigned<decltype(du64)> di64;
+  constexpr int kShiftAmt = static_cast<int>(sizeof(TFromD<D>) * 8) -
+                            static_cast<int>(hwy::IsSigned<TFromD<D>>());
+
+  const auto too_big = BitCast(
+      du64, VecFromMask(
+                di64, Gt(BitCast(di64, ShiftRight<kShiftAmt>(v)), Zero(di64))));
+  return DemoteFromU64MaskOutResult(dn, Or(v, too_big));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), class V>
+HWY_INLINE VFromD<D> ReorderDemote2From64To32Combine(D dn, V a, V b) {
+  return ConcatEven(dn, BitCast(dn, b), BitCast(dn, a));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<int64_t, D>> v) {
+  const DFromV<decltype(v)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const RebindToUnsigned<decltype(dn)> dn_u;
+
+  // Negative values are saturated by first saturating their bitwise inverse
+  // and then inverting the saturation result
+  const auto invert_mask = BitCast(du64, BroadcastSignBit(v));
+  const auto saturated_vals = Xor(
+      invert_mask,
+      detail::DemoteFromU64Saturate(dn, Xor(invert_mask, BitCast(du64, v))));
+  return BitCast(dn, TruncateTo(dn_u, saturated_vals));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<int64_t, D>> v) {
+  const DFromV<decltype(v)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+
+  const auto non_neg_vals = BitCast(du64, AndNot(BroadcastSignBit(v), v));
+  return TruncateTo(dn, detail::DemoteFromU64Saturate(dn, non_neg_vals));
+}
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2) | (1 << 4)),
+          HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, VFromD<Rebind<uint64_t, D>> v) {
+  return TruncateTo(dn, detail::DemoteFromU64Saturate(dn, v));
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, HWY_MAX_BYTES / 2),
+          HWY_IF_T_SIZE_D(D, 4), HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<int64_t, D>> a,
+                                   VFromD<Repartition<int64_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, HWY_MAX_BYTES / 2), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, VFromD<Repartition<uint64_t, D>> a,
+                                   VFromD<Repartition<uint64_t, D>> b) {
+  const DFromV<decltype(a)> d;
+  const Twice<decltype(d)> dt;
+  return DemoteTo(dn, Combine(dt, b, a));
+}
+
+#if HWY_TARGET > HWY_AVX2
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API Vec128<int32_t> ReorderDemote2To(D dn, Vec128<int64_t> a,
+                                         Vec128<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+
+  // Negative values are saturated by first saturating their bitwise inverse
+  // and then inverting the saturation result
+  const auto invert_mask_a = BitCast(du64, BroadcastSignBit(a));
+  const auto invert_mask_b = BitCast(du64, BroadcastSignBit(b));
+  const auto saturated_a = Xor(
+      invert_mask_a,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_a, BitCast(du64, a))));
+  const auto saturated_b = Xor(
+      invert_mask_b,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_b, BitCast(du64, b))));
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D dn, Vec128<int64_t> a,
+                                          Vec128<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(a), a)));
+  const auto saturated_b = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(b), b)));
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API Vec128<uint32_t> ReorderDemote2To(D dn, Vec128<uint64_t> a,
+                                          Vec128<uint64_t> b) {
+  const Half<decltype(dn)> dnh;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(dnh, a);
+  const auto saturated_b = detail::DemoteFromU64Saturate(dnh, b);
+
+  return ConcatEven(dn, BitCast(dn, saturated_b), BitCast(dn, saturated_a));
+}
+#endif  // HWY_TARGET > HWY_AVX2
+
+// ------------------------------ Integer <=> fp (ShiftRight, OddEven)
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<uint16_t, D>> v) {
+  return VFromD<D>{_mm_cvtepu16_ph(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<int16_t, D>> v) {
+  return VFromD<D>{_mm_cvtepi16_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, VFromD<Rebind<int32_t, D>> v) {
+  return VFromD<D>{_mm_cvtepi32_ps(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*df*/, VFromD<Rebind<uint32_t, D>> v) {
+  return VFromD<D>{_mm_cvtepu32_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*dd*/, VFromD<Rebind<int64_t, D>> v) {
+  return VFromD<D>{_mm_cvtepi64_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*dd*/, VFromD<Rebind<uint64_t, D>> v) {
+  return VFromD<D>{_mm_cvtepu64_pd(v.raw)};
+}
+#else   // AVX2 or below
+// Generic for all vector lengths.
+template <class D, HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D df, VFromD<Rebind<uint32_t, D>> v) {
+  // Based on wim's approach (https://stackoverflow.com/questions/34066228/)
+  const RebindToUnsigned<decltype(df)> du32;
+  const RebindToSigned<decltype(df)> d32;
+
+  const auto msk_lo = Set(du32, 0xFFFF);
+  const auto cnst2_16_flt = Set(df, 65536.0f);  // 2^16
+
+  // Extract the 16 lowest/highest significant bits of v and cast to signed int
+  const auto v_lo = BitCast(d32, And(v, msk_lo));
+  const auto v_hi = BitCast(d32, ShiftRight<16>(v));
+  return MulAdd(cnst2_16_flt, ConvertTo(df, v_hi), ConvertTo(df, v_lo));
+}
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D dd, VFromD<Rebind<int64_t, D>> v) {
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const Repartition<uint32_t, decltype(dd)> d32;
+  const Repartition<uint64_t, decltype(dd)> d64;
+
+  // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63
+  const auto k84_63 = Set(d64, 0x4530000080000000ULL);
+  const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63);
+
+  // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven)
+  const auto k52 = Set(d32, 0x43300000);
+  const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v)));
+
+  const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL));
+  return (v_upper - k84_63_52) + v_lower;  // order matters!
+}
+
+namespace detail {
+template <class VW>
+HWY_INLINE VFromD<Rebind<double, DFromV<VW>>> U64ToF64VecFast(VW w) {
+  const DFromV<decltype(w)> d64;
+  const RebindToFloat<decltype(d64)> dd;
+  const auto cnst2_52_dbl = Set(dd, 0x0010000000000000);  // 2^52
+  return BitCast(dd, Or(w, BitCast(d64, cnst2_52_dbl))) - cnst2_52_dbl;
+}
+}  // namespace detail
+
+// Generic for all vector lengths.
+template <class D, HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D dd, VFromD<Rebind<uint64_t, D>> v) {
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const RebindToUnsigned<decltype(dd)> d64;
+  using VU = VFromD<decltype(d64)>;
+
+  const VU msk_lo = Set(d64, 0xFFFFFFFF);
+  const auto cnst2_32_dbl = Set(dd, 4294967296.0);  // 2^32
+
+  // Extract the 32 lowest/highest significant bits of v
+  const VU v_lo = And(v, msk_lo);
+  const VU v_hi = ShiftRight<32>(v);
+
+  const auto v_lo_dbl = detail::U64ToF64VecFast(v_lo);
+  return MulAdd(cnst2_32_dbl, detail::U64ToF64VecFast(v_hi), v_lo_dbl);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// Truncates (rounds toward zero).
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ConvertTo(D di, VFromD<RebindToFloat<D>> v) {
+  return detail::FixConversionOverflow(
+      di, v, VFromD<RebindToSigned<D>>{_mm_cvttph_epi16(v.raw)});
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ConvertTo(D di, VFromD<RebindToFloat<D>> v) {
+  return detail::FixConversionOverflow(
+      di, v, VFromD<RebindToSigned<D>>{_mm_cvttps_epi32(v.raw)});
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <class DI, HWY_IF_V_SIZE_LE_D(DI, 16), HWY_IF_I64_D(DI)>
+HWY_API VFromD<DI> ConvertTo(DI di, VFromD<RebindToFloat<DI>> v) {
+  return detail::FixConversionOverflow(di, v,
+                                       VFromD<DI>{_mm_cvttpd_epi64(v.raw)});
+}
+
+#else  // AVX2 or below
+
+#if HWY_ARCH_X86_64
+template <class DI, HWY_IF_V_SIZE_D(DI, 8), HWY_IF_I64_D(DI)>
+HWY_API VFromD<DI> ConvertTo(DI di, Vec64<double> v) {
+  const Vec64<int64_t> i0{_mm_cvtsi64_si128(_mm_cvttsd_si64(v.raw))};
+  return detail::FixConversionOverflow(di, v, i0);
+}
+template <class DI, HWY_IF_V_SIZE_D(DI, 16), HWY_IF_I64_D(DI)>
+HWY_API VFromD<DI> ConvertTo(DI di, Vec128<double> v) {
+  const __m128i i0 = _mm_cvtsi64_si128(_mm_cvttsd_si64(v.raw));
+  const Full64<double> dd2;
+  const __m128i i1 = _mm_cvtsi64_si128(_mm_cvttsd_si64(UpperHalf(dd2, v).raw));
+  return detail::FixConversionOverflow(
+      di, v, Vec128<int64_t>{_mm_unpacklo_epi64(i0, i1)});
+}
+#endif  // HWY_ARCH_X86_64
+
+#if !HWY_ARCH_X86_64 || HWY_TARGET <= HWY_AVX2
+template <class DI, HWY_IF_V_SIZE_GT_D(DI, (HWY_ARCH_X86_64 ? 16 : 0)),
+          HWY_IF_I64_D(DI)>
+HWY_API VFromD<DI> ConvertTo(DI di, VFromD<Rebind<double, DI>> v) {
+  using VI = VFromD<decltype(di)>;
+  const RebindToUnsigned<decltype(di)> du;
+  using VU = VFromD<decltype(du)>;
+  const Repartition<uint16_t, decltype(di)> du16;
+  const VI k1075 = Set(di, 1075); /* biased exponent of 2^52 */
+
+  // Exponent indicates whether the number can be represented as int64_t.
+  const VU biased_exp = ShiftRight<52>(BitCast(du, v)) & Set(du, 0x7FF);
+#if HWY_TARGET <= HWY_SSE4
+  const auto in_range = BitCast(di, biased_exp) < Set(di, 1086);
+#else
+  const Repartition<int32_t, decltype(di)> di32;
+  const auto in_range = MaskFromVec(BitCast(
+      di,
+      VecFromMask(di32, DupEven(BitCast(di32, biased_exp)) < Set(di32, 1086))));
+#endif
+
+  // If we were to cap the exponent at 51 and add 2^52, the number would be in
+  // [2^52, 2^53) and mantissa bits could be read out directly. We need to
+  // round-to-0 (truncate), but changing rounding mode in MXCSR hits a
+  // compiler reordering bug: https://gcc.godbolt.org/z/4hKj6c6qc . We instead
+  // manually shift the mantissa into place (we already have many of the
+  // inputs anyway).
+
+  // Use 16-bit saturated unsigned subtraction to compute shift_mnt and
+  // shift_int since biased_exp[i] is a non-negative integer that is less than
+  // or equal to 2047.
+
+  // 16-bit saturated unsigned subtraction is also more efficient than a
+  // 64-bit subtraction followed by a 64-bit signed Max operation on
+  // SSE2/SSSE3/SSE4/AVX2.
+
+  // The upper 48 bits of both shift_mnt and shift_int are guaranteed to be
+  // zero as the upper 48 bits of both k1075 and biased_exp are zero.
+
+  const VU shift_mnt = BitCast(
+      du, SaturatedSub(BitCast(du16, k1075), BitCast(du16, biased_exp)));
+  const VU shift_int = BitCast(
+      du, SaturatedSub(BitCast(du16, biased_exp), BitCast(du16, k1075)));
+  const VU mantissa = BitCast(du, v) & Set(du, (1ULL << 52) - 1);
+  // Include implicit 1-bit. NOTE: the shift count may exceed 63; we rely on x86
+  // returning zero in that case.
+  const VU int53 = (mantissa | Set(du, 1ULL << 52)) >> shift_mnt;
+
+  // For inputs larger than 2^53 - 1, insert zeros at the bottom.
+
+  // For inputs less than 2^63, the implicit 1-bit is guaranteed not to be
+  // shifted out of the left shift result below as shift_int[i] <= 10 is true
+  // for any inputs that are less than 2^63.
+
+  const VU shifted = int53 << shift_int;
+
+  // Saturate to LimitsMin (unchanged when negating below) or LimitsMax.
+  const VI sign_mask = BroadcastSignBit(BitCast(di, v));
+  const VI limit = Set(di, LimitsMax<int64_t>()) - sign_mask;
+  const VI magnitude = IfThenElse(in_range, BitCast(di, shifted), limit);
+
+  // If the input was negative, negate the integer (two's complement).
+  return (magnitude ^ sign_mask) - sign_mask;
+}
+#endif  // !HWY_ARCH_X86_64 || HWY_TARGET <= HWY_AVX2
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return detail::FixConversionOverflow(
+      di, v, VFromD<decltype(di)>{_mm_cvtps_epi32(v.raw)});
+}
+
+// ------------------------------ Floating-point rounding (ConvertTo)
+
+#if HWY_TARGET >= HWY_SSSE3
+
+// Toward nearest integer, ties to even
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Round(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  // Rely on rounding after addition with a large value such that no mantissa
+  // bits remain (assuming the current mode is nearest-even). We may need a
+  // compiler flag for precise floating-point to prevent "optimizing" this out.
+  const DFromV<decltype(v)> df;
+  const auto max = Set(df, MantissaEnd<T>());
+  const auto large = CopySignToAbs(max, v);
+  const auto added = large + v;
+  const auto rounded = added - large;
+  // Keep original if NaN or the magnitude is large (already an int).
+  return IfThenElse(Abs(v) < max, rounded, v);
+}
+
+namespace detail {
+
+// Truncating to integer and converting back to float is correct except when the
+// input magnitude is large, in which case the input was already an integer
+// (because mantissa >> exponent is zero).
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> UseInt(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  return Abs(v) < Set(d, MantissaEnd<T>());
+}
+
+}  // namespace detail
+
+// Toward zero, aka truncate
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Trunc(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// Toward +infinity, aka ceiling
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Ceil(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a positive non-integer ends up smaller; if so, add 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f < v)));
+
+  return IfThenElse(detail::UseInt(v), int_f - neg1, v);
+}
+
+// Toward -infinity, aka floor
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Floor(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a negative non-integer ends up larger; if so, subtract 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f > v)));
+
+  return IfThenElse(detail::UseInt(v), int_f + neg1, v);
+}
+
+#else
+
+// Toward nearest integer, ties to even
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Round(const Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{
+      _mm_roundscale_ph(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Round(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Round(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Trunc(const Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{
+      _mm_roundscale_ph(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Trunc(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Ceil(const Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{
+      _mm_roundscale_ph(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Ceil(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+#if HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float16_t, N> Floor(const Vec128<float16_t, N> v) {
+  return Vec128<float16_t, N>{
+      _mm_roundscale_ph(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <size_t N>
+HWY_API Vec128<float, N> Floor(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Floor(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+#endif  // !HWY_SSSE3
+
+// ------------------------------ Floating-point classification
+
+#define HWY_X86_FPCLASS_QNAN 0x01
+#define HWY_X86_FPCLASS_POS0 0x02
+#define HWY_X86_FPCLASS_NEG0 0x04
+#define HWY_X86_FPCLASS_POS_INF 0x08
+#define HWY_X86_FPCLASS_NEG_INF 0x10
+#define HWY_X86_FPCLASS_SUBNORMAL 0x20
+#define HWY_X86_FPCLASS_NEG 0x40
+#define HWY_X86_FPCLASS_SNAN 0x80
+
+#if HWY_HAVE_FLOAT16 || HWY_IDE
+
+template <size_t N>
+HWY_API Mask128<float16_t, N> IsNaN(const Vec128<float16_t, N> v) {
+  return Mask128<float16_t, N>{
+      _mm_fpclass_ph_mask(v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+}
+
+template <size_t N>
+HWY_API Mask128<float16_t, N> IsInf(const Vec128<float16_t, N> v) {
+  return Mask128<float16_t, N>{_mm_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+
+template <size_t N>
+HWY_API Mask128<float16_t, N> IsFinite(const Vec128<float16_t, N> v) {
+  // fpclass doesn't have a flag for positive, so we have to check for inf/NaN
+  // and negate the mask.
+  return Not(Mask128<float16_t, N>{_mm_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+#endif  // HWY_HAVE_FLOAT16
+
+template <size_t N>
+HWY_API Mask128<float, N> IsNaN(const Vec128<float, N> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Mask128<float, N>{
+      _mm_fpclass_ps_mask(v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+#else
+  return Mask128<float, N>{_mm_cmpunord_ps(v.raw, v.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Mask128<double, N> IsNaN(const Vec128<double, N> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Mask128<double, N>{
+      _mm_fpclass_pd_mask(v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+#else
+  return Mask128<double, N>{_mm_cmpunord_pd(v.raw, v.raw)};
+#endif
+}
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <size_t N>
+HWY_API Mask128<float, N> IsInf(const Vec128<float, N> v) {
+  return Mask128<float, N>{_mm_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> IsInf(const Vec128<double, N> v) {
+  return Mask128<double, N>{_mm_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+
+// Returns whether normal/subnormal/zero.
+template <size_t N>
+HWY_API Mask128<float, N> IsFinite(const Vec128<float, N> v) {
+  // fpclass doesn't have a flag for positive, so we have to check for inf/NaN
+  // and negate the mask.
+  return Not(Mask128<float, N>{_mm_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+template <size_t N>
+HWY_API Mask128<double, N> IsFinite(const Vec128<double, N> v) {
+  return Not(Mask128<double, N>{_mm_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+#else
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsInf(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T, size_t N>
+HWY_API Mask128<T, N> IsFinite(const Vec128<T, N> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // Shift left to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater). MSVC seems to generate
+  // incorrect code if we instead add vu + vu.
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(ShiftLeft<1>(vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ================================================== CRYPTO
+
+#if !defined(HWY_DISABLE_PCLMUL_AES) && HWY_TARGET <= HWY_SSE4
+
+// Per-target flag to prevent generic_ops-inl.h from defining AESRound.
+#ifdef HWY_NATIVE_AES
+#undef HWY_NATIVE_AES
+#else
+#define HWY_NATIVE_AES
+#endif
+
+HWY_API Vec128<uint8_t> AESRound(Vec128<uint8_t> state,
+                                 Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>{_mm_aesenc_si128(state.raw, round_key.raw)};
+}
+
+HWY_API Vec128<uint8_t> AESLastRound(Vec128<uint8_t> state,
+                                     Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>{_mm_aesenclast_si128(state.raw, round_key.raw)};
+}
+
+HWY_API Vec128<uint8_t> AESInvMixColumns(Vec128<uint8_t> state) {
+  return Vec128<uint8_t>{_mm_aesimc_si128(state.raw)};
+}
+
+HWY_API Vec128<uint8_t> AESRoundInv(Vec128<uint8_t> state,
+                                    Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>{_mm_aesdec_si128(state.raw, round_key.raw)};
+}
+
+HWY_API Vec128<uint8_t> AESLastRoundInv(Vec128<uint8_t> state,
+                                        Vec128<uint8_t> round_key) {
+  return Vec128<uint8_t>{_mm_aesdeclast_si128(state.raw, round_key.raw)};
+}
+
+template <uint8_t kRcon>
+HWY_API Vec128<uint8_t> AESKeyGenAssist(Vec128<uint8_t> v) {
+  return Vec128<uint8_t>{_mm_aeskeygenassist_si128(v.raw, kRcon)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> CLMulLower(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_clmulepi64_si128(a.raw, b.raw, 0x00)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> CLMulUpper(Vec128<uint64_t, N> a,
+                                       Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_clmulepi64_si128(a.raw, b.raw, 0x11)};
+}
+
+#endif  // !defined(HWY_DISABLE_PCLMUL_AES) && HWY_TARGET <= HWY_SSE4
+
+// ================================================== MISC
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+#if HWY_TARGET > HWY_AVX3
+namespace detail {
+
+template <class D, HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  // Easier than Set(), which would require an >8-bit type, which would not
+  // compile for T=uint8_t, kN=1.
+  const VFromD<D> vbits{_mm_cvtsi32_si128(static_cast<int>(mask_bits))};
+
+#if HWY_TARGET == HWY_SSE2
+  // {b0, b1, ...} ===> {b0, b0, b1, b1, ...}
+  __m128i unpacked_vbits = _mm_unpacklo_epi8(vbits.raw, vbits.raw);
+  // {b0, b0, b1, b1, ...} ==> {b0, b0, b0, b0, b1, b1, b1, b1, ...}
+  unpacked_vbits = _mm_unpacklo_epi16(unpacked_vbits, unpacked_vbits);
+  // {b0, b0, b0, b0, b1, b1, b1, b1, ...} ==>
+  // {b0, b0, b0, b0, b0, b0, b0, b0, b1, b1, b1, b1, b1, b1, b1, b1}
+  const VFromD<decltype(du)> rep8{
+      _mm_unpacklo_epi32(unpacked_vbits, unpacked_vbits)};
+#else
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+  alignas(16) static constexpr uint8_t kRep8[16] = {0, 0, 0, 0, 0, 0, 0, 0,
+                                                    1, 1, 1, 1, 1, 1, 1, 1};
+  const auto rep8 = TableLookupBytes(vbits, Load(du, kRep8));
+#endif
+
+  alignas(16) static constexpr uint8_t kBit[16] = {1, 2, 4, 8, 16, 32, 64, 128,
+                                                   1, 2, 4, 8, 16, 32, 64, 128};
+  return RebindMask(d, TestBit(rep8, LoadDup128(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint16_t kBit[8] = {1, 2, 4, 8, 16, 32, 64, 128};
+  const auto vmask_bits = Set(du, static_cast<uint16_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint32_t kBit[8] = {1, 2, 4, 8};
+  const auto vmask_bits = Set(du, static_cast<uint32_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE MFromD<D> LoadMaskBits128(D d, uint64_t mask_bits) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(16) static constexpr uint64_t kBit[8] = {1, 2};
+  return RebindMask(d, TestBit(Set(du, mask_bits), Load(du, kBit)));
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET > HWY_AVX3
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  constexpr size_t kN = MaxLanes(d);
+#if HWY_TARGET <= HWY_AVX3
+  (void)d;
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+
+  return MFromD<D>::FromBits(mask_bits);
+#else
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+
+  return detail::LoadMaskBits128(d, mask_bits);
+#endif
+}
+
+template <typename T>
+struct CompressIsPartition {
+#if HWY_TARGET <= HWY_AVX3
+  // AVX3 supports native compress, but a table-based approach allows
+  // 'partitioning' (also moving mask=false lanes to the top), which helps
+  // vqsort. This is only feasible for eight or less lanes, i.e. sizeof(T) == 8
+  // on AVX3. For simplicity, we only use tables for 64-bit lanes (not AVX3
+  // u32x8 etc.).
+  enum { value = (sizeof(T) == 8) };
+#else
+  // generic_ops-inl does not guarantee IsPartition for 8-bit.
+  enum { value = (sizeof(T) != 1) };
+#endif
+};
+
+#if HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ StoreMaskBits
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  constexpr size_t kN = MaxLanes(d);
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+  CopyBytes<kNumBytes>(&mask.raw, bits);
+
+  // Non-full byte, need to clear the undefined upper bits.
+  if (kN < 8) {
+    const int mask_bits = (1 << kN) - 1;
+    bits[0] = static_cast<uint8_t>(bits[0] & mask_bits);
+  }
+
+  return kNumBytes;
+}
+
+// ------------------------------ Mask testing
+
+// Beware: the suffix indicates the number of mask bits, not lane size!
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t CountTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint64_t mask_bits = uint64_t{mask.raw} & ((1ull << kN) - 1);
+  return PopCount(mask_bits);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t FindKnownFirstTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint32_t mask_bits = uint32_t{mask.raw} & ((1u << kN) - 1);
+  return Num0BitsBelowLS1Bit_Nonzero32(mask_bits);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint32_t mask_bits = uint32_t{mask.raw} & ((1u << kN) - 1);
+  return mask_bits ? intptr_t(Num0BitsBelowLS1Bit_Nonzero32(mask_bits)) : -1;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t FindKnownLastTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint32_t mask_bits = uint32_t{mask.raw} & ((1u << kN) - 1);
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(mask_bits);
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint32_t mask_bits = uint32_t{mask.raw} & ((1u << kN) - 1);
+  return mask_bits ? intptr_t(31 - Num0BitsAboveMS1Bit_Nonzero32(mask_bits))
+                   : -1;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API bool AllFalse(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint64_t mask_bits = uint64_t{mask.raw} & ((1ull << kN) - 1);
+  return mask_bits == 0;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  constexpr size_t kN = MaxLanes(d);
+  const uint64_t mask_bits = uint64_t{mask.raw} & ((1ull << kN) - 1);
+  // Cannot use _kortestc because we may have less than 8 mask bits.
+  return mask_bits == (1ull << kN) - 1;
+}
+
+// ------------------------------ Compress
+
+// 8-16 bit Compress, CompressStore defined in x86_512 because they use Vec512.
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> Compress(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(float, N, 4)>
+HWY_API Vec128<float, N> Compress(Vec128<float, N> v, Mask128<float, N> mask) {
+  return Vec128<float, N>{_mm_maskz_compress_ps(mask.raw, v.raw)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Compress(Vec128<T> v, Mask128<T> mask) {
+  HWY_DASSERT(mask.raw < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  const auto index = Load(d8, u8_indices + 16 * mask.raw);
+  return BitCast(d, TableLookupBytes(BitCast(d8, v), index));
+}
+
+// ------------------------------ CompressNot (Compress)
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> CompressNot(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> CompressNot(Vec128<T> v, Mask128<T> mask) {
+  // See CompressIsPartition, PrintCompressNot64x2NibbleTables
+  alignas(16) static constexpr uint64_t packed_array[16] = {
+      0x00000010, 0x00000001, 0x00000010, 0x00000010};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 2) -
+  // _mm_permutexvar_epi64 will ignore the upper bits.
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du64;
+  const auto packed = Set(du64, packed_array[mask.raw]);
+  alignas(16) static constexpr uint64_t shifts[2] = {0, 4};
+  const auto indices = Indices128<T>{(packed >> Load(du64, shifts)).raw};
+  return TableLookupLanes(v, indices);
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+// ------------------------------ CompressStore (defined in x86_512)
+
+// ------------------------------ CompressBlendedStore (CompressStore)
+template <class D, HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  // AVX-512 already does the blending at no extra cost (latency 11,
+  // rthroughput 2 - same as compress plus store).
+  if (HWY_TARGET == HWY_AVX3_DL ||
+      (HWY_TARGET != HWY_AVX3_ZEN4 && sizeof(TFromD<D>) > 2)) {
+    // We're relying on the mask to blend. Clear the undefined upper bits.
+    constexpr size_t kN = MaxLanes(d);
+    if (kN != 16 / sizeof(TFromD<D>)) {
+      m = And(m, FirstN(d, kN));
+    }
+    return CompressStore(v, m, d, unaligned);
+  } else {
+    const size_t count = CountTrue(d, m);
+    const VFromD<D> compressed = Compress(v, m);
+#if HWY_MEM_OPS_MIGHT_FAULT
+    // BlendedStore tests mask for each lane, but we know that the mask is
+    // FirstN, so we can just copy.
+    alignas(16) TFromD<D> buf[MaxLanes(d)];
+    Store(compressed, d, buf);
+    CopyBytes(buf, unaligned, count * sizeof(TFromD<D>));
+#else
+    BlendedStore(compressed, FirstN(d, count), d, unaligned);
+#endif
+    detail::MaybeUnpoison(unaligned, count);
+    return count;
+  }
+}
+
+// ------------------------------ CompressBitsStore (defined in x86_512)
+
+#else  // AVX2 or below
+
+// ------------------------------ StoreMaskBits
+
+namespace detail {
+
+constexpr HWY_INLINE uint64_t U64FromInt(int mask_bits) {
+  return static_cast<uint64_t>(static_cast<unsigned>(mask_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const auto sign_bits = BitCast(d, VecFromMask(d, mask)).raw;
+  return U64FromInt(_mm_movemask_epi8(sign_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  const auto sign_bits = _mm_packs_epi16(mask.raw, _mm_setzero_si128());
+  return U64FromInt(_mm_movemask_epi8(sign_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/, Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const Simd<float, N, 0> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask));
+  return U64FromInt(_mm_movemask_ps(sign_bits.raw));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const Simd<double, N, 0> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask));
+  return U64FromInt(_mm_movemask_pd(sign_bits.raw));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  constexpr size_t kNumBytes = (MaxLanes(d) + 7) / 8;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  CopyBytes<kNumBytes>(&mask_bits, bits);
+  return kNumBytes;
+}
+
+// ------------------------------ Mask testing
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API bool AllFalse(D /* tag */, MFromD<D> mask) {
+  // Cheaper than PTEST, which is 2 uop / 3L.
+  return detail::BitsFromMask(mask) == 0;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  constexpr uint64_t kAllBits = (1ull << MaxLanes(d)) - 1;
+  return detail::BitsFromMask(mask) == kAllBits;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t CountTrue(D /* tag */, MFromD<D> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, MFromD<D> mask) {
+  return Num0BitsBelowLS1Bit_Nonzero32(
+      static_cast<uint32_t>(detail::BitsFromMask(mask)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API intptr_t FindFirstTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return mask_bits ? intptr_t(Num0BitsBelowLS1Bit_Nonzero32(mask_bits)) : -1;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API size_t FindKnownLastTrue(D /* tag */, MFromD<D> mask) {
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(
+                  static_cast<uint32_t>(detail::BitsFromMask(mask)));
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API intptr_t FindLastTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return mask_bits ? intptr_t(31 - Num0BitsAboveMS1Bit_Nonzero32(mask_bits))
+                   : -1;
+}
+
+// ------------------------------ Compress, CompressBits
+
+namespace detail {
+
+// Also works for N < 8 because the first 16 4-tuples only reference bytes 0-6.
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Twice<decltype(d8)> d8t;
+  const RebindToUnsigned<decltype(d)> du;
+
+  // compress_epi16 requires VBMI2 and there is no permutevar_epi16, so we need
+  // byte indices for PSHUFB (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintCompress16x8Tables
+      0,  2,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      2,  0,  4,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      4,  0,  2,  6,  8,  10, 12, 14, /**/ 0, 4,  2,  6,  8,  10, 12, 14,  //
+      2,  4,  0,  6,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      6,  0,  2,  4,  8,  10, 12, 14, /**/ 0, 6,  2,  4,  8,  10, 12, 14,  //
+      2,  6,  0,  4,  8,  10, 12, 14, /**/ 0, 2,  6,  4,  8,  10, 12, 14,  //
+      4,  6,  0,  2,  8,  10, 12, 14, /**/ 0, 4,  6,  2,  8,  10, 12, 14,  //
+      2,  4,  6,  0,  8,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      8,  0,  2,  4,  6,  10, 12, 14, /**/ 0, 8,  2,  4,  6,  10, 12, 14,  //
+      2,  8,  0,  4,  6,  10, 12, 14, /**/ 0, 2,  8,  4,  6,  10, 12, 14,  //
+      4,  8,  0,  2,  6,  10, 12, 14, /**/ 0, 4,  8,  2,  6,  10, 12, 14,  //
+      2,  4,  8,  0,  6,  10, 12, 14, /**/ 0, 2,  4,  8,  6,  10, 12, 14,  //
+      6,  8,  0,  2,  4,  10, 12, 14, /**/ 0, 6,  8,  2,  4,  10, 12, 14,  //
+      2,  6,  8,  0,  4,  10, 12, 14, /**/ 0, 2,  6,  8,  4,  10, 12, 14,  //
+      4,  6,  8,  0,  2,  10, 12, 14, /**/ 0, 4,  6,  8,  2,  10, 12, 14,  //
+      2,  4,  6,  8,  0,  10, 12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      10, 0,  2,  4,  6,  8,  12, 14, /**/ 0, 10, 2,  4,  6,  8,  12, 14,  //
+      2,  10, 0,  4,  6,  8,  12, 14, /**/ 0, 2,  10, 4,  6,  8,  12, 14,  //
+      4,  10, 0,  2,  6,  8,  12, 14, /**/ 0, 4,  10, 2,  6,  8,  12, 14,  //
+      2,  4,  10, 0,  6,  8,  12, 14, /**/ 0, 2,  4,  10, 6,  8,  12, 14,  //
+      6,  10, 0,  2,  4,  8,  12, 14, /**/ 0, 6,  10, 2,  4,  8,  12, 14,  //
+      2,  6,  10, 0,  4,  8,  12, 14, /**/ 0, 2,  6,  10, 4,  8,  12, 14,  //
+      4,  6,  10, 0,  2,  8,  12, 14, /**/ 0, 4,  6,  10, 2,  8,  12, 14,  //
+      2,  4,  6,  10, 0,  8,  12, 14, /**/ 0, 2,  4,  6,  10, 8,  12, 14,  //
+      8,  10, 0,  2,  4,  6,  12, 14, /**/ 0, 8,  10, 2,  4,  6,  12, 14,  //
+      2,  8,  10, 0,  4,  6,  12, 14, /**/ 0, 2,  8,  10, 4,  6,  12, 14,  //
+      4,  8,  10, 0,  2,  6,  12, 14, /**/ 0, 4,  8,  10, 2,  6,  12, 14,  //
+      2,  4,  8,  10, 0,  6,  12, 14, /**/ 0, 2,  4,  8,  10, 6,  12, 14,  //
+      6,  8,  10, 0,  2,  4,  12, 14, /**/ 0, 6,  8,  10, 2,  4,  12, 14,  //
+      2,  6,  8,  10, 0,  4,  12, 14, /**/ 0, 2,  6,  8,  10, 4,  12, 14,  //
+      4,  6,  8,  10, 0,  2,  12, 14, /**/ 0, 4,  6,  8,  10, 2,  12, 14,  //
+      2,  4,  6,  8,  10, 0,  12, 14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      12, 0,  2,  4,  6,  8,  10, 14, /**/ 0, 12, 2,  4,  6,  8,  10, 14,  //
+      2,  12, 0,  4,  6,  8,  10, 14, /**/ 0, 2,  12, 4,  6,  8,  10, 14,  //
+      4,  12, 0,  2,  6,  8,  10, 14, /**/ 0, 4,  12, 2,  6,  8,  10, 14,  //
+      2,  4,  12, 0,  6,  8,  10, 14, /**/ 0, 2,  4,  12, 6,  8,  10, 14,  //
+      6,  12, 0,  2,  4,  8,  10, 14, /**/ 0, 6,  12, 2,  4,  8,  10, 14,  //
+      2,  6,  12, 0,  4,  8,  10, 14, /**/ 0, 2,  6,  12, 4,  8,  10, 14,  //
+      4,  6,  12, 0,  2,  8,  10, 14, /**/ 0, 4,  6,  12, 2,  8,  10, 14,  //
+      2,  4,  6,  12, 0,  8,  10, 14, /**/ 0, 2,  4,  6,  12, 8,  10, 14,  //
+      8,  12, 0,  2,  4,  6,  10, 14, /**/ 0, 8,  12, 2,  4,  6,  10, 14,  //
+      2,  8,  12, 0,  4,  6,  10, 14, /**/ 0, 2,  8,  12, 4,  6,  10, 14,  //
+      4,  8,  12, 0,  2,  6,  10, 14, /**/ 0, 4,  8,  12, 2,  6,  10, 14,  //
+      2,  4,  8,  12, 0,  6,  10, 14, /**/ 0, 2,  4,  8,  12, 6,  10, 14,  //
+      6,  8,  12, 0,  2,  4,  10, 14, /**/ 0, 6,  8,  12, 2,  4,  10, 14,  //
+      2,  6,  8,  12, 0,  4,  10, 14, /**/ 0, 2,  6,  8,  12, 4,  10, 14,  //
+      4,  6,  8,  12, 0,  2,  10, 14, /**/ 0, 4,  6,  8,  12, 2,  10, 14,  //
+      2,  4,  6,  8,  12, 0,  10, 14, /**/ 0, 2,  4,  6,  8,  12, 10, 14,  //
+      10, 12, 0,  2,  4,  6,  8,  14, /**/ 0, 10, 12, 2,  4,  6,  8,  14,  //
+      2,  10, 12, 0,  4,  6,  8,  14, /**/ 0, 2,  10, 12, 4,  6,  8,  14,  //
+      4,  10, 12, 0,  2,  6,  8,  14, /**/ 0, 4,  10, 12, 2,  6,  8,  14,  //
+      2,  4,  10, 12, 0,  6,  8,  14, /**/ 0, 2,  4,  10, 12, 6,  8,  14,  //
+      6,  10, 12, 0,  2,  4,  8,  14, /**/ 0, 6,  10, 12, 2,  4,  8,  14,  //
+      2,  6,  10, 12, 0,  4,  8,  14, /**/ 0, 2,  6,  10, 12, 4,  8,  14,  //
+      4,  6,  10, 12, 0,  2,  8,  14, /**/ 0, 4,  6,  10, 12, 2,  8,  14,  //
+      2,  4,  6,  10, 12, 0,  8,  14, /**/ 0, 2,  4,  6,  10, 12, 8,  14,  //
+      8,  10, 12, 0,  2,  4,  6,  14, /**/ 0, 8,  10, 12, 2,  4,  6,  14,  //
+      2,  8,  10, 12, 0,  4,  6,  14, /**/ 0, 2,  8,  10, 12, 4,  6,  14,  //
+      4,  8,  10, 12, 0,  2,  6,  14, /**/ 0, 4,  8,  10, 12, 2,  6,  14,  //
+      2,  4,  8,  10, 12, 0,  6,  14, /**/ 0, 2,  4,  8,  10, 12, 6,  14,  //
+      6,  8,  10, 12, 0,  2,  4,  14, /**/ 0, 6,  8,  10, 12, 2,  4,  14,  //
+      2,  6,  8,  10, 12, 0,  4,  14, /**/ 0, 2,  6,  8,  10, 12, 4,  14,  //
+      4,  6,  8,  10, 12, 0,  2,  14, /**/ 0, 4,  6,  8,  10, 12, 2,  14,  //
+      2,  4,  6,  8,  10, 12, 0,  14, /**/ 0, 2,  4,  6,  8,  10, 12, 14,  //
+      14, 0,  2,  4,  6,  8,  10, 12, /**/ 0, 14, 2,  4,  6,  8,  10, 12,  //
+      2,  14, 0,  4,  6,  8,  10, 12, /**/ 0, 2,  14, 4,  6,  8,  10, 12,  //
+      4,  14, 0,  2,  6,  8,  10, 12, /**/ 0, 4,  14, 2,  6,  8,  10, 12,  //
+      2,  4,  14, 0,  6,  8,  10, 12, /**/ 0, 2,  4,  14, 6,  8,  10, 12,  //
+      6,  14, 0,  2,  4,  8,  10, 12, /**/ 0, 6,  14, 2,  4,  8,  10, 12,  //
+      2,  6,  14, 0,  4,  8,  10, 12, /**/ 0, 2,  6,  14, 4,  8,  10, 12,  //
+      4,  6,  14, 0,  2,  8,  10, 12, /**/ 0, 4,  6,  14, 2,  8,  10, 12,  //
+      2,  4,  6,  14, 0,  8,  10, 12, /**/ 0, 2,  4,  6,  14, 8,  10, 12,  //
+      8,  14, 0,  2,  4,  6,  10, 12, /**/ 0, 8,  14, 2,  4,  6,  10, 12,  //
+      2,  8,  14, 0,  4,  6,  10, 12, /**/ 0, 2,  8,  14, 4,  6,  10, 12,  //
+      4,  8,  14, 0,  2,  6,  10, 12, /**/ 0, 4,  8,  14, 2,  6,  10, 12,  //
+      2,  4,  8,  14, 0,  6,  10, 12, /**/ 0, 2,  4,  8,  14, 6,  10, 12,  //
+      6,  8,  14, 0,  2,  4,  10, 12, /**/ 0, 6,  8,  14, 2,  4,  10, 12,  //
+      2,  6,  8,  14, 0,  4,  10, 12, /**/ 0, 2,  6,  8,  14, 4,  10, 12,  //
+      4,  6,  8,  14, 0,  2,  10, 12, /**/ 0, 4,  6,  8,  14, 2,  10, 12,  //
+      2,  4,  6,  8,  14, 0,  10, 12, /**/ 0, 2,  4,  6,  8,  14, 10, 12,  //
+      10, 14, 0,  2,  4,  6,  8,  12, /**/ 0, 10, 14, 2,  4,  6,  8,  12,  //
+      2,  10, 14, 0,  4,  6,  8,  12, /**/ 0, 2,  10, 14, 4,  6,  8,  12,  //
+      4,  10, 14, 0,  2,  6,  8,  12, /**/ 0, 4,  10, 14, 2,  6,  8,  12,  //
+      2,  4,  10, 14, 0,  6,  8,  12, /**/ 0, 2,  4,  10, 14, 6,  8,  12,  //
+      6,  10, 14, 0,  2,  4,  8,  12, /**/ 0, 6,  10, 14, 2,  4,  8,  12,  //
+      2,  6,  10, 14, 0,  4,  8,  12, /**/ 0, 2,  6,  10, 14, 4,  8,  12,  //
+      4,  6,  10, 14, 0,  2,  8,  12, /**/ 0, 4,  6,  10, 14, 2,  8,  12,  //
+      2,  4,  6,  10, 14, 0,  8,  12, /**/ 0, 2,  4,  6,  10, 14, 8,  12,  //
+      8,  10, 14, 0,  2,  4,  6,  12, /**/ 0, 8,  10, 14, 2,  4,  6,  12,  //
+      2,  8,  10, 14, 0,  4,  6,  12, /**/ 0, 2,  8,  10, 14, 4,  6,  12,  //
+      4,  8,  10, 14, 0,  2,  6,  12, /**/ 0, 4,  8,  10, 14, 2,  6,  12,  //
+      2,  4,  8,  10, 14, 0,  6,  12, /**/ 0, 2,  4,  8,  10, 14, 6,  12,  //
+      6,  8,  10, 14, 0,  2,  4,  12, /**/ 0, 6,  8,  10, 14, 2,  4,  12,  //
+      2,  6,  8,  10, 14, 0,  4,  12, /**/ 0, 2,  6,  8,  10, 14, 4,  12,  //
+      4,  6,  8,  10, 14, 0,  2,  12, /**/ 0, 4,  6,  8,  10, 14, 2,  12,  //
+      2,  4,  6,  8,  10, 14, 0,  12, /**/ 0, 2,  4,  6,  8,  10, 14, 12,  //
+      12, 14, 0,  2,  4,  6,  8,  10, /**/ 0, 12, 14, 2,  4,  6,  8,  10,  //
+      2,  12, 14, 0,  4,  6,  8,  10, /**/ 0, 2,  12, 14, 4,  6,  8,  10,  //
+      4,  12, 14, 0,  2,  6,  8,  10, /**/ 0, 4,  12, 14, 2,  6,  8,  10,  //
+      2,  4,  12, 14, 0,  6,  8,  10, /**/ 0, 2,  4,  12, 14, 6,  8,  10,  //
+      6,  12, 14, 0,  2,  4,  8,  10, /**/ 0, 6,  12, 14, 2,  4,  8,  10,  //
+      2,  6,  12, 14, 0,  4,  8,  10, /**/ 0, 2,  6,  12, 14, 4,  8,  10,  //
+      4,  6,  12, 14, 0,  2,  8,  10, /**/ 0, 4,  6,  12, 14, 2,  8,  10,  //
+      2,  4,  6,  12, 14, 0,  8,  10, /**/ 0, 2,  4,  6,  12, 14, 8,  10,  //
+      8,  12, 14, 0,  2,  4,  6,  10, /**/ 0, 8,  12, 14, 2,  4,  6,  10,  //
+      2,  8,  12, 14, 0,  4,  6,  10, /**/ 0, 2,  8,  12, 14, 4,  6,  10,  //
+      4,  8,  12, 14, 0,  2,  6,  10, /**/ 0, 4,  8,  12, 14, 2,  6,  10,  //
+      2,  4,  8,  12, 14, 0,  6,  10, /**/ 0, 2,  4,  8,  12, 14, 6,  10,  //
+      6,  8,  12, 14, 0,  2,  4,  10, /**/ 0, 6,  8,  12, 14, 2,  4,  10,  //
+      2,  6,  8,  12, 14, 0,  4,  10, /**/ 0, 2,  6,  8,  12, 14, 4,  10,  //
+      4,  6,  8,  12, 14, 0,  2,  10, /**/ 0, 4,  6,  8,  12, 14, 2,  10,  //
+      2,  4,  6,  8,  12, 14, 0,  10, /**/ 0, 2,  4,  6,  8,  12, 14, 10,  //
+      10, 12, 14, 0,  2,  4,  6,  8,  /**/ 0, 10, 12, 14, 2,  4,  6,  8,   //
+      2,  10, 12, 14, 0,  4,  6,  8,  /**/ 0, 2,  10, 12, 14, 4,  6,  8,   //
+      4,  10, 12, 14, 0,  2,  6,  8,  /**/ 0, 4,  10, 12, 14, 2,  6,  8,   //
+      2,  4,  10, 12, 14, 0,  6,  8,  /**/ 0, 2,  4,  10, 12, 14, 6,  8,   //
+      6,  10, 12, 14, 0,  2,  4,  8,  /**/ 0, 6,  10, 12, 14, 2,  4,  8,   //
+      2,  6,  10, 12, 14, 0,  4,  8,  /**/ 0, 2,  6,  10, 12, 14, 4,  8,   //
+      4,  6,  10, 12, 14, 0,  2,  8,  /**/ 0, 4,  6,  10, 12, 14, 2,  8,   //
+      2,  4,  6,  10, 12, 14, 0,  8,  /**/ 0, 2,  4,  6,  10, 12, 14, 8,   //
+      8,  10, 12, 14, 0,  2,  4,  6,  /**/ 0, 8,  10, 12, 14, 2,  4,  6,   //
+      2,  8,  10, 12, 14, 0,  4,  6,  /**/ 0, 2,  8,  10, 12, 14, 4,  6,   //
+      4,  8,  10, 12, 14, 0,  2,  6,  /**/ 0, 4,  8,  10, 12, 14, 2,  6,   //
+      2,  4,  8,  10, 12, 14, 0,  6,  /**/ 0, 2,  4,  8,  10, 12, 14, 6,   //
+      6,  8,  10, 12, 14, 0,  2,  4,  /**/ 0, 6,  8,  10, 12, 14, 2,  4,   //
+      2,  6,  8,  10, 12, 14, 0,  4,  /**/ 0, 2,  6,  8,  10, 12, 14, 4,   //
+      4,  6,  8,  10, 12, 14, 0,  2,  /**/ 0, 4,  6,  8,  10, 12, 14, 2,   //
+      2,  4,  6,  8,  10, 12, 14, 0,  /**/ 0, 2,  4,  6,  8,  10, 12, 14};
+
+  const VFromD<decltype(d8t)> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const VFromD<decltype(du)> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 2)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Twice<decltype(d8)> d8t;
+  const RebindToUnsigned<decltype(d)> du;
+
+  // compress_epi16 requires VBMI2 and there is no permutevar_epi16, so we need
+  // byte indices for PSHUFB (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) static constexpr uint8_t table[2048] = {
+      // PrintCompressNot16x8Tables
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 14, 0,   //
+      0, 4,  6,  8,  10, 12, 14, 2,  /**/ 4,  6,  8,  10, 12, 14, 0,  2,   //
+      0, 2,  6,  8,  10, 12, 14, 4,  /**/ 2,  6,  8,  10, 12, 14, 0,  4,   //
+      0, 6,  8,  10, 12, 14, 2,  4,  /**/ 6,  8,  10, 12, 14, 0,  2,  4,   //
+      0, 2,  4,  8,  10, 12, 14, 6,  /**/ 2,  4,  8,  10, 12, 14, 0,  6,   //
+      0, 4,  8,  10, 12, 14, 2,  6,  /**/ 4,  8,  10, 12, 14, 0,  2,  6,   //
+      0, 2,  8,  10, 12, 14, 4,  6,  /**/ 2,  8,  10, 12, 14, 0,  4,  6,   //
+      0, 8,  10, 12, 14, 2,  4,  6,  /**/ 8,  10, 12, 14, 0,  2,  4,  6,   //
+      0, 2,  4,  6,  10, 12, 14, 8,  /**/ 2,  4,  6,  10, 12, 14, 0,  8,   //
+      0, 4,  6,  10, 12, 14, 2,  8,  /**/ 4,  6,  10, 12, 14, 0,  2,  8,   //
+      0, 2,  6,  10, 12, 14, 4,  8,  /**/ 2,  6,  10, 12, 14, 0,  4,  8,   //
+      0, 6,  10, 12, 14, 2,  4,  8,  /**/ 6,  10, 12, 14, 0,  2,  4,  8,   //
+      0, 2,  4,  10, 12, 14, 6,  8,  /**/ 2,  4,  10, 12, 14, 0,  6,  8,   //
+      0, 4,  10, 12, 14, 2,  6,  8,  /**/ 4,  10, 12, 14, 0,  2,  6,  8,   //
+      0, 2,  10, 12, 14, 4,  6,  8,  /**/ 2,  10, 12, 14, 0,  4,  6,  8,   //
+      0, 10, 12, 14, 2,  4,  6,  8,  /**/ 10, 12, 14, 0,  2,  4,  6,  8,   //
+      0, 2,  4,  6,  8,  12, 14, 10, /**/ 2,  4,  6,  8,  12, 14, 0,  10,  //
+      0, 4,  6,  8,  12, 14, 2,  10, /**/ 4,  6,  8,  12, 14, 0,  2,  10,  //
+      0, 2,  6,  8,  12, 14, 4,  10, /**/ 2,  6,  8,  12, 14, 0,  4,  10,  //
+      0, 6,  8,  12, 14, 2,  4,  10, /**/ 6,  8,  12, 14, 0,  2,  4,  10,  //
+      0, 2,  4,  8,  12, 14, 6,  10, /**/ 2,  4,  8,  12, 14, 0,  6,  10,  //
+      0, 4,  8,  12, 14, 2,  6,  10, /**/ 4,  8,  12, 14, 0,  2,  6,  10,  //
+      0, 2,  8,  12, 14, 4,  6,  10, /**/ 2,  8,  12, 14, 0,  4,  6,  10,  //
+      0, 8,  12, 14, 2,  4,  6,  10, /**/ 8,  12, 14, 0,  2,  4,  6,  10,  //
+      0, 2,  4,  6,  12, 14, 8,  10, /**/ 2,  4,  6,  12, 14, 0,  8,  10,  //
+      0, 4,  6,  12, 14, 2,  8,  10, /**/ 4,  6,  12, 14, 0,  2,  8,  10,  //
+      0, 2,  6,  12, 14, 4,  8,  10, /**/ 2,  6,  12, 14, 0,  4,  8,  10,  //
+      0, 6,  12, 14, 2,  4,  8,  10, /**/ 6,  12, 14, 0,  2,  4,  8,  10,  //
+      0, 2,  4,  12, 14, 6,  8,  10, /**/ 2,  4,  12, 14, 0,  6,  8,  10,  //
+      0, 4,  12, 14, 2,  6,  8,  10, /**/ 4,  12, 14, 0,  2,  6,  8,  10,  //
+      0, 2,  12, 14, 4,  6,  8,  10, /**/ 2,  12, 14, 0,  4,  6,  8,  10,  //
+      0, 12, 14, 2,  4,  6,  8,  10, /**/ 12, 14, 0,  2,  4,  6,  8,  10,  //
+      0, 2,  4,  6,  8,  10, 14, 12, /**/ 2,  4,  6,  8,  10, 14, 0,  12,  //
+      0, 4,  6,  8,  10, 14, 2,  12, /**/ 4,  6,  8,  10, 14, 0,  2,  12,  //
+      0, 2,  6,  8,  10, 14, 4,  12, /**/ 2,  6,  8,  10, 14, 0,  4,  12,  //
+      0, 6,  8,  10, 14, 2,  4,  12, /**/ 6,  8,  10, 14, 0,  2,  4,  12,  //
+      0, 2,  4,  8,  10, 14, 6,  12, /**/ 2,  4,  8,  10, 14, 0,  6,  12,  //
+      0, 4,  8,  10, 14, 2,  6,  12, /**/ 4,  8,  10, 14, 0,  2,  6,  12,  //
+      0, 2,  8,  10, 14, 4,  6,  12, /**/ 2,  8,  10, 14, 0,  4,  6,  12,  //
+      0, 8,  10, 14, 2,  4,  6,  12, /**/ 8,  10, 14, 0,  2,  4,  6,  12,  //
+      0, 2,  4,  6,  10, 14, 8,  12, /**/ 2,  4,  6,  10, 14, 0,  8,  12,  //
+      0, 4,  6,  10, 14, 2,  8,  12, /**/ 4,  6,  10, 14, 0,  2,  8,  12,  //
+      0, 2,  6,  10, 14, 4,  8,  12, /**/ 2,  6,  10, 14, 0,  4,  8,  12,  //
+      0, 6,  10, 14, 2,  4,  8,  12, /**/ 6,  10, 14, 0,  2,  4,  8,  12,  //
+      0, 2,  4,  10, 14, 6,  8,  12, /**/ 2,  4,  10, 14, 0,  6,  8,  12,  //
+      0, 4,  10, 14, 2,  6,  8,  12, /**/ 4,  10, 14, 0,  2,  6,  8,  12,  //
+      0, 2,  10, 14, 4,  6,  8,  12, /**/ 2,  10, 14, 0,  4,  6,  8,  12,  //
+      0, 10, 14, 2,  4,  6,  8,  12, /**/ 10, 14, 0,  2,  4,  6,  8,  12,  //
+      0, 2,  4,  6,  8,  14, 10, 12, /**/ 2,  4,  6,  8,  14, 0,  10, 12,  //
+      0, 4,  6,  8,  14, 2,  10, 12, /**/ 4,  6,  8,  14, 0,  2,  10, 12,  //
+      0, 2,  6,  8,  14, 4,  10, 12, /**/ 2,  6,  8,  14, 0,  4,  10, 12,  //
+      0, 6,  8,  14, 2,  4,  10, 12, /**/ 6,  8,  14, 0,  2,  4,  10, 12,  //
+      0, 2,  4,  8,  14, 6,  10, 12, /**/ 2,  4,  8,  14, 0,  6,  10, 12,  //
+      0, 4,  8,  14, 2,  6,  10, 12, /**/ 4,  8,  14, 0,  2,  6,  10, 12,  //
+      0, 2,  8,  14, 4,  6,  10, 12, /**/ 2,  8,  14, 0,  4,  6,  10, 12,  //
+      0, 8,  14, 2,  4,  6,  10, 12, /**/ 8,  14, 0,  2,  4,  6,  10, 12,  //
+      0, 2,  4,  6,  14, 8,  10, 12, /**/ 2,  4,  6,  14, 0,  8,  10, 12,  //
+      0, 4,  6,  14, 2,  8,  10, 12, /**/ 4,  6,  14, 0,  2,  8,  10, 12,  //
+      0, 2,  6,  14, 4,  8,  10, 12, /**/ 2,  6,  14, 0,  4,  8,  10, 12,  //
+      0, 6,  14, 2,  4,  8,  10, 12, /**/ 6,  14, 0,  2,  4,  8,  10, 12,  //
+      0, 2,  4,  14, 6,  8,  10, 12, /**/ 2,  4,  14, 0,  6,  8,  10, 12,  //
+      0, 4,  14, 2,  6,  8,  10, 12, /**/ 4,  14, 0,  2,  6,  8,  10, 12,  //
+      0, 2,  14, 4,  6,  8,  10, 12, /**/ 2,  14, 0,  4,  6,  8,  10, 12,  //
+      0, 14, 2,  4,  6,  8,  10, 12, /**/ 14, 0,  2,  4,  6,  8,  10, 12,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 12, 0,  14,  //
+      0, 4,  6,  8,  10, 12, 2,  14, /**/ 4,  6,  8,  10, 12, 0,  2,  14,  //
+      0, 2,  6,  8,  10, 12, 4,  14, /**/ 2,  6,  8,  10, 12, 0,  4,  14,  //
+      0, 6,  8,  10, 12, 2,  4,  14, /**/ 6,  8,  10, 12, 0,  2,  4,  14,  //
+      0, 2,  4,  8,  10, 12, 6,  14, /**/ 2,  4,  8,  10, 12, 0,  6,  14,  //
+      0, 4,  8,  10, 12, 2,  6,  14, /**/ 4,  8,  10, 12, 0,  2,  6,  14,  //
+      0, 2,  8,  10, 12, 4,  6,  14, /**/ 2,  8,  10, 12, 0,  4,  6,  14,  //
+      0, 8,  10, 12, 2,  4,  6,  14, /**/ 8,  10, 12, 0,  2,  4,  6,  14,  //
+      0, 2,  4,  6,  10, 12, 8,  14, /**/ 2,  4,  6,  10, 12, 0,  8,  14,  //
+      0, 4,  6,  10, 12, 2,  8,  14, /**/ 4,  6,  10, 12, 0,  2,  8,  14,  //
+      0, 2,  6,  10, 12, 4,  8,  14, /**/ 2,  6,  10, 12, 0,  4,  8,  14,  //
+      0, 6,  10, 12, 2,  4,  8,  14, /**/ 6,  10, 12, 0,  2,  4,  8,  14,  //
+      0, 2,  4,  10, 12, 6,  8,  14, /**/ 2,  4,  10, 12, 0,  6,  8,  14,  //
+      0, 4,  10, 12, 2,  6,  8,  14, /**/ 4,  10, 12, 0,  2,  6,  8,  14,  //
+      0, 2,  10, 12, 4,  6,  8,  14, /**/ 2,  10, 12, 0,  4,  6,  8,  14,  //
+      0, 10, 12, 2,  4,  6,  8,  14, /**/ 10, 12, 0,  2,  4,  6,  8,  14,  //
+      0, 2,  4,  6,  8,  12, 10, 14, /**/ 2,  4,  6,  8,  12, 0,  10, 14,  //
+      0, 4,  6,  8,  12, 2,  10, 14, /**/ 4,  6,  8,  12, 0,  2,  10, 14,  //
+      0, 2,  6,  8,  12, 4,  10, 14, /**/ 2,  6,  8,  12, 0,  4,  10, 14,  //
+      0, 6,  8,  12, 2,  4,  10, 14, /**/ 6,  8,  12, 0,  2,  4,  10, 14,  //
+      0, 2,  4,  8,  12, 6,  10, 14, /**/ 2,  4,  8,  12, 0,  6,  10, 14,  //
+      0, 4,  8,  12, 2,  6,  10, 14, /**/ 4,  8,  12, 0,  2,  6,  10, 14,  //
+      0, 2,  8,  12, 4,  6,  10, 14, /**/ 2,  8,  12, 0,  4,  6,  10, 14,  //
+      0, 8,  12, 2,  4,  6,  10, 14, /**/ 8,  12, 0,  2,  4,  6,  10, 14,  //
+      0, 2,  4,  6,  12, 8,  10, 14, /**/ 2,  4,  6,  12, 0,  8,  10, 14,  //
+      0, 4,  6,  12, 2,  8,  10, 14, /**/ 4,  6,  12, 0,  2,  8,  10, 14,  //
+      0, 2,  6,  12, 4,  8,  10, 14, /**/ 2,  6,  12, 0,  4,  8,  10, 14,  //
+      0, 6,  12, 2,  4,  8,  10, 14, /**/ 6,  12, 0,  2,  4,  8,  10, 14,  //
+      0, 2,  4,  12, 6,  8,  10, 14, /**/ 2,  4,  12, 0,  6,  8,  10, 14,  //
+      0, 4,  12, 2,  6,  8,  10, 14, /**/ 4,  12, 0,  2,  6,  8,  10, 14,  //
+      0, 2,  12, 4,  6,  8,  10, 14, /**/ 2,  12, 0,  4,  6,  8,  10, 14,  //
+      0, 12, 2,  4,  6,  8,  10, 14, /**/ 12, 0,  2,  4,  6,  8,  10, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  10, 0,  12, 14,  //
+      0, 4,  6,  8,  10, 2,  12, 14, /**/ 4,  6,  8,  10, 0,  2,  12, 14,  //
+      0, 2,  6,  8,  10, 4,  12, 14, /**/ 2,  6,  8,  10, 0,  4,  12, 14,  //
+      0, 6,  8,  10, 2,  4,  12, 14, /**/ 6,  8,  10, 0,  2,  4,  12, 14,  //
+      0, 2,  4,  8,  10, 6,  12, 14, /**/ 2,  4,  8,  10, 0,  6,  12, 14,  //
+      0, 4,  8,  10, 2,  6,  12, 14, /**/ 4,  8,  10, 0,  2,  6,  12, 14,  //
+      0, 2,  8,  10, 4,  6,  12, 14, /**/ 2,  8,  10, 0,  4,  6,  12, 14,  //
+      0, 8,  10, 2,  4,  6,  12, 14, /**/ 8,  10, 0,  2,  4,  6,  12, 14,  //
+      0, 2,  4,  6,  10, 8,  12, 14, /**/ 2,  4,  6,  10, 0,  8,  12, 14,  //
+      0, 4,  6,  10, 2,  8,  12, 14, /**/ 4,  6,  10, 0,  2,  8,  12, 14,  //
+      0, 2,  6,  10, 4,  8,  12, 14, /**/ 2,  6,  10, 0,  4,  8,  12, 14,  //
+      0, 6,  10, 2,  4,  8,  12, 14, /**/ 6,  10, 0,  2,  4,  8,  12, 14,  //
+      0, 2,  4,  10, 6,  8,  12, 14, /**/ 2,  4,  10, 0,  6,  8,  12, 14,  //
+      0, 4,  10, 2,  6,  8,  12, 14, /**/ 4,  10, 0,  2,  6,  8,  12, 14,  //
+      0, 2,  10, 4,  6,  8,  12, 14, /**/ 2,  10, 0,  4,  6,  8,  12, 14,  //
+      0, 10, 2,  4,  6,  8,  12, 14, /**/ 10, 0,  2,  4,  6,  8,  12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  8,  0,  10, 12, 14,  //
+      0, 4,  6,  8,  2,  10, 12, 14, /**/ 4,  6,  8,  0,  2,  10, 12, 14,  //
+      0, 2,  6,  8,  4,  10, 12, 14, /**/ 2,  6,  8,  0,  4,  10, 12, 14,  //
+      0, 6,  8,  2,  4,  10, 12, 14, /**/ 6,  8,  0,  2,  4,  10, 12, 14,  //
+      0, 2,  4,  8,  6,  10, 12, 14, /**/ 2,  4,  8,  0,  6,  10, 12, 14,  //
+      0, 4,  8,  2,  6,  10, 12, 14, /**/ 4,  8,  0,  2,  6,  10, 12, 14,  //
+      0, 2,  8,  4,  6,  10, 12, 14, /**/ 2,  8,  0,  4,  6,  10, 12, 14,  //
+      0, 8,  2,  4,  6,  10, 12, 14, /**/ 8,  0,  2,  4,  6,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  6,  0,  8,  10, 12, 14,  //
+      0, 4,  6,  2,  8,  10, 12, 14, /**/ 4,  6,  0,  2,  8,  10, 12, 14,  //
+      0, 2,  6,  4,  8,  10, 12, 14, /**/ 2,  6,  0,  4,  8,  10, 12, 14,  //
+      0, 6,  2,  4,  8,  10, 12, 14, /**/ 6,  0,  2,  4,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  4,  0,  6,  8,  10, 12, 14,  //
+      0, 4,  2,  6,  8,  10, 12, 14, /**/ 4,  0,  2,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 2,  0,  4,  6,  8,  10, 12, 14,  //
+      0, 2,  4,  6,  8,  10, 12, 14, /**/ 0,  2,  4,  6,  8,  10, 12, 14};
+
+  const VFromD<decltype(d8t)> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const VFromD<decltype(du)> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[256] = {
+      // PrintCompress32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  8,  9,  10, 11,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,   //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,   //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,   //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 4)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[256] = {
+      // PrintCompressNot32x4Tables
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  8,  9,  10, 11, 12, 13,
+      14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+      12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 4,  5,  6,  7,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  12, 13, 14, 15, 0,  1,
+      2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15, 8,  9,  10, 11,
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  4,  5,
+      6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 4,  5,  6,  7,  0,  1,  2,  3,
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+      12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> IndicesFromBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      // PrintCompress64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <class D, HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> IndicesFromNotBits128(D d, uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) static constexpr uint8_t u8_indices[64] = {
+      // PrintCompressNot64x2Tables
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15,
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, u8_indices + 16 * mask_bits));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v, uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  HWY_DASSERT(mask_bits < (1ull << N));
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  return BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressNotBits(Vec128<T, N> v, uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+
+  HWY_DASSERT(mask_bits < (1ull << N));
+  const auto indices = BitCast(du, detail::IndicesFromNotBits128(d, mask_bits));
+  return BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+}
+
+}  // namespace detail
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> Compress(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> Compress(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 1 and mask[0] = 0, then swap both halves, else keep.
+  const DFromV<decltype(v)> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskL, maskH);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+// General case, 2 or 4 bytes
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, Mask128<T, N> mask) {
+  return detail::CompressBits(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressNot
+
+// Single lane: no-op
+template <typename T>
+HWY_API Vec128<T, 1> CompressNot(Vec128<T, 1> v, Mask128<T, 1> /*m*/) {
+  return v;
+}
+
+// Two lanes: conditional swap
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec128<T> CompressNot(Vec128<T> v, Mask128<T> mask) {
+  // If mask[1] = 0 and mask[0] = 1, then swap both halves, else keep.
+  const DFromV<decltype(v)> d;
+  const Vec128<T> m = VecFromMask(d, mask);
+  const Vec128<T> maskL = DupEven(m);
+  const Vec128<T> maskH = DupOdd(m);
+  const Vec128<T> swap = AndNot(maskH, maskL);
+  return IfVecThenElse(swap, Shuffle01(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_API Vec128<T, N> CompressNot(Vec128<T, N> v, Mask128<T, N> mask) {
+  // For partial vectors, we cannot pull the Not() into the table because
+  // BitsFromMask clears the upper bits.
+  if (N < 16 / sizeof(T)) {
+    return detail::CompressBits(v, detail::BitsFromMask(Not(mask)));
+  }
+  return detail::CompressNotBits(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressBlocksNot
+HWY_API Vec128<uint64_t> CompressBlocksNot(Vec128<uint64_t> v,
+                                           Mask128<uint64_t> /* m */) {
+  return v;
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec128<T, N> CompressBits(Vec128<T, N> v,
+                                  const uint8_t* HWY_RESTRICT bits) {
+  uint64_t mask_bits = 0;
+  constexpr size_t kNumBytes = (N + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+
+  return detail::CompressBits(v, mask_bits);
+}
+
+// ------------------------------ CompressStore, CompressBitsStore
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> m, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  HWY_DASSERT(mask_bits < (1ull << MaxLanes(d)));
+  const size_t count = PopCount(mask_bits);
+
+  // Avoid _mm_maskmoveu_si128 (>500 cycle latency because it bypasses caches).
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  StoreU(compressed, d, unaligned);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  HWY_DASSERT(mask_bits < (1ull << MaxLanes(d)));
+  const size_t count = PopCount(mask_bits);
+
+  // Avoid _mm_maskmoveu_si128 (>500 cycle latency because it bypasses caches).
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  BlendedStore(compressed, FirstN(d, count), d, unaligned);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+
+  uint64_t mask_bits = 0;
+  constexpr size_t kN = MaxLanes(d);
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+  const size_t count = PopCount(mask_bits);
+
+  // Avoid _mm_maskmoveu_si128 (>500 cycle latency because it bypasses caches).
+  const auto indices = BitCast(du, detail::IndicesFromBits128(d, mask_bits));
+  const auto compressed = BitCast(d, TableLookupBytes(BitCast(du, v), indices));
+  StoreU(compressed, d, unaligned);
+
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Expand
+
+// Otherwise, use the generic_ops-inl.h fallback.
+#if HWY_TARGET <= HWY_AVX3 || HWY_IDE
+
+// The native instructions for 8/16-bit actually require VBMI2 (HWY_AVX3_DL),
+// but we still want to override generic_ops-inl's table-based implementation
+// whenever we have the 32-bit expand provided by AVX3.
+#ifdef HWY_NATIVE_EXPAND
+#undef HWY_NATIVE_EXPAND
+#else
+#define HWY_NATIVE_EXPAND
+#endif
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3_DL || HWY_IDE  // VBMI2
+
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> NativeExpand(Vec128<uint8_t, N> v,
+                                           Mask128<uint8_t, N> mask) {
+  return Vec128<uint8_t, N>{_mm_maskz_expand_epi8(mask.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<uint16_t, N> NativeExpand(Vec128<uint16_t, N> v,
+                                            Mask128<uint16_t, N> mask) {
+  return Vec128<uint16_t, N>{_mm_maskz_expand_epi16(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint8_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm_maskz_expandloadu_epi8(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint16_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm_maskz_expandloadu_epi16(mask.raw, unaligned)};
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+template <size_t N>
+HWY_INLINE Vec128<uint32_t, N> NativeExpand(Vec128<uint32_t, N> v,
+                                            Mask128<uint32_t, N> mask) {
+  return Vec128<uint32_t, N>{_mm_maskz_expand_epi32(mask.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<uint64_t, N> NativeExpand(Vec128<uint64_t, N> v,
+                                            Mask128<uint64_t, N> mask) {
+  return Vec128<uint64_t, N>{_mm_maskz_expand_epi64(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint32_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm_maskz_expandloadu_epi32(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16), HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint64_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm_maskz_expandloadu_epi64(mask.raw, unaligned)};
+}
+
+}  // namespace detail
+
+// Otherwise, 8/16-bit are implemented in x86_512 using PromoteTo.
+#if HWY_TARGET <= HWY_AVX3_DL || HWY_IDE  // VBMI2
+
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, Mask128<T, N> mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+template <typename T, size_t N, HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 8))>
+HWY_API Vec128<T, N> Expand(Vec128<T, N> v, Mask128<T, N> mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+}
+
+// ------------------------------ LoadExpand
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+#else
+  return Expand(LoadU(d, unaligned), mask);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+#else
+  return Expand(LoadU(d, unaligned), mask);
+#endif
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ StoreInterleaved2/3/4
+
+// HWY_NATIVE_LOAD_STORE_INTERLEAVED not set, hence defined in
+// generic_ops-inl.h.
+
+// ------------------------------ Additional mask logical operations
+
+#if HWY_TARGET <= HWY_AVX3
+namespace detail {
+
+template <class T, HWY_IF_LANES_LE(sizeof(T), 4)>
+static HWY_INLINE uint32_t AVX3Blsi(T x) {
+  using TU = MakeUnsigned<T>;
+  const auto u32_val = static_cast<uint32_t>(static_cast<TU>(x));
+#if HWY_COMPILER_CLANGCL
+  return static_cast<uint32_t>(u32_val & (0u - u32_val));
+#else
+  return static_cast<uint32_t>(_blsi_u32(u32_val));
+#endif
+}
+template <class T, HWY_IF_T_SIZE(T, 8)>
+static HWY_INLINE uint64_t AVX3Blsi(T x) {
+  const auto u64_val = static_cast<uint64_t>(x);
+#if HWY_COMPILER_CLANGCL || HWY_ARCH_X86_32
+  return static_cast<uint64_t>(u64_val & (0ULL - u64_val));
+#else
+  return static_cast<uint64_t>(_blsi_u64(u64_val));
+#endif
+}
+
+template <class T, HWY_IF_LANES_LE(sizeof(T), 4)>
+static HWY_INLINE uint32_t AVX3Blsmsk(T x) {
+  using TU = MakeUnsigned<T>;
+  const auto u32_val = static_cast<uint32_t>(static_cast<TU>(x));
+#if HWY_COMPILER_CLANGCL
+  return static_cast<uint32_t>(u32_val ^ (u32_val - 1u));
+#else
+  return static_cast<uint32_t>(_blsmsk_u32(u32_val));
+#endif
+}
+template <class T, HWY_IF_T_SIZE(T, 8)>
+static HWY_INLINE uint64_t AVX3Blsmsk(T x) {
+  const auto u64_val = static_cast<uint64_t>(x);
+#if HWY_COMPILER_CLANGCL || HWY_ARCH_X86_32
+  return static_cast<uint64_t>(u64_val ^ (u64_val - 1ULL));
+#else
+  return static_cast<uint64_t>(_blsmsk_u64(u64_val));
+#endif
+}
+
+}  // namespace detail
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  constexpr uint32_t kActiveElemMask = (uint32_t{1} << N) - 1;
+  return Mask128<T, N>{static_cast<typename Mask128<T, N>::Raw>(
+      (0u - detail::AVX3Blsi(mask.raw)) & kActiveElemMask)};
+}
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  constexpr uint32_t kActiveElemMask = (uint32_t{1} << N) - 1;
+  return Mask128<T, N>{static_cast<typename Mask128<T, N>::Raw>(
+      (detail::AVX3Blsi(mask.raw) - 1u) & kActiveElemMask)};
+}
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  constexpr uint32_t kActiveElemMask = (uint32_t{1} << N) - 1;
+  return Mask128<T, N>{static_cast<typename Mask128<T, N>::Raw>(
+      detail::AVX3Blsmsk(mask.raw) & kActiveElemMask)};
+}
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  return Mask128<T, N>{
+      static_cast<typename Mask128<T, N>::Raw>(detail::AVX3Blsi(mask.raw))};
+}
+#else   // AVX2 or below
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrAfterFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetAtOrAfterFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const auto vmask = VecFromMask(d, mask);
+  return MaskFromVec(Or(vmask, InterleaveLower(vmask, vmask)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetAtOrAfterFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const auto vmask = VecFromMask(d, mask);
+  const auto neg_vmask =
+      ResizeBitCast(d, Neg(ResizeBitCast(Full64<int64_t>(), vmask)));
+  return MaskFromVec(Or(vmask, neg_vmask));
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetAtOrAfterFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const Repartition<int64_t, decltype(d)> di64;
+  const Repartition<float, decltype(d)> df32;
+  const Repartition<int32_t, decltype(d)> di32;
+  using VF = VFromD<decltype(df32)>;
+
+  auto vmask = BitCast(di64, VecFromMask(d, mask));
+  vmask = Or(vmask, Neg(vmask));
+
+  // Copy the sign bit of the first int64_t lane to the second int64_t lane
+  const auto vmask2 = BroadcastSignBit(
+      BitCast(di32, VF{_mm_shuffle_ps(Zero(df32).raw, BitCast(df32, vmask).raw,
+                                      _MM_SHUFFLE(1, 1, 0, 0))}));
+  return MaskFromVec(BitCast(d, Or(vmask, BitCast(di64, vmask2))));
+}
+
+template <class T, size_t N>
+HWY_API Mask128<T, N> SetBeforeFirst(Mask128<T, N> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetOnlyFirst(Mask128<T, 1> mask) {
+  return mask;
+}
+template <class T>
+HWY_API Mask128<T, 2> SetOnlyFirst(Mask128<T, 2> mask) {
+  const FixedTag<T, 2> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = BitCast(di, VecFromMask(d, mask));
+  const auto zero = Zero(di);
+  const auto vmask2 = VecFromMask(di, InterleaveLower(zero, vmask) == zero);
+  return MaskFromVec(BitCast(d, And(vmask, vmask2)));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 2), HWY_IF_V_SIZE_LE(T, N, 8)>
+HWY_API Mask128<T, N> SetOnlyFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  const RebindToSigned<decltype(d)> di;
+
+  const auto vmask = ResizeBitCast(Full64<int64_t>(), VecFromMask(d, mask));
+  const auto only_first_vmask =
+      BitCast(d, Neg(ResizeBitCast(di, And(vmask, Neg(vmask)))));
+  return MaskFromVec(only_first_vmask);
+}
+template <class T, HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_API Mask128<T> SetOnlyFirst(Mask128<T> mask) {
+  const Full128<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int64_t, decltype(d)> di64;
+
+  const auto zero = Zero(di64);
+  const auto vmask = BitCast(di64, VecFromMask(d, mask));
+  const auto vmask2 = VecFromMask(di64, InterleaveLower(zero, vmask) == zero);
+  const auto only_first_vmask = Neg(BitCast(di, And(vmask, Neg(vmask))));
+  return MaskFromVec(BitCast(d, And(only_first_vmask, BitCast(di, vmask2))));
+}
+
+template <class T>
+HWY_API Mask128<T, 1> SetAtOrBeforeFirst(Mask128<T, 1> /*mask*/) {
+  const FixedTag<T, 1> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = MakeSigned<T>;
+
+  return RebindMask(d, MaskFromVec(Set(di, TI(-1))));
+}
+template <class T, size_t N, HWY_IF_LANES_GT(N, 1)>
+HWY_API Mask128<T, N> SetAtOrBeforeFirst(Mask128<T, N> mask) {
+  const Simd<T, N, 0> d;
+  return SetBeforeFirst(MaskFromVec(ShiftLeftLanes<1>(VecFromMask(d, mask))));
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1: no-op
+template <typename T>
+HWY_INLINE Vec128<T, 1> SumOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MinOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec128<T, 1> MaxOfLanes(Vec128<T, 1> v) {
+  return v;
+}
+
+// N=2
+template <typename T>
+HWY_INLINE Vec128<T, 2> SumOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Add(v10, Reverse2(d, v10));
+}
+template <typename T>
+HWY_INLINE Vec128<T, 2> MinOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Min(v10, Reverse2(d, v10));
+}
+template <typename T>
+HWY_INLINE Vec128<T, 2> MaxOfLanes(Vec128<T, 2> v10) {
+  const DFromV<decltype(v10)> d;
+  return Max(v10, Reverse2(d, v10));
+}
+
+// N=4 (only 16/32-bit, else >128-bit)
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> SumOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Add(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Add(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> MinOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Min(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Min(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 2) | (1 << 4))>
+HWY_INLINE Vec128<T, 4> MaxOfLanes(Vec128<T, 4> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Max(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Max(v03_12_12_03, v12_03_03_12);
+}
+
+#undef HWY_X86_IF_NOT_MINPOS
+#if HWY_TARGET <= HWY_SSE4
+// Skip the T_SIZE = 2 overload in favor of the following two.
+#define HWY_X86_IF_NOT_MINPOS(T) \
+  hwy::EnableIf<!IsSame<T, uint16_t>()>* = nullptr
+
+HWY_INLINE Vec128<uint16_t> MinOfLanes(Vec128<uint16_t> v) {
+  return Broadcast<0>(Vec128<uint16_t>{_mm_minpos_epu16(v.raw)});
+}
+
+HWY_INLINE Vec128<uint16_t> MaxOfLanes(Vec128<uint16_t> v) {
+  const DFromV<decltype(v)> d;
+  const Vec128<uint16_t> max = Set(d, LimitsMax<uint16_t>());
+  return max - MinOfLanes(max - v);
+}
+#else
+#define HWY_X86_IF_NOT_MINPOS(T) hwy::EnableIf<true>* = nullptr
+#endif  // HWY_TARGET <= HWY_SSE4
+
+// N=8 (only 16-bit, else >128-bit)
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec128<T, 8> SumOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Add(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Add(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Add(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE(T, 2), HWY_X86_IF_NOT_MINPOS(T)>
+HWY_INLINE Vec128<T, 8> MinOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Min(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Min(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Min(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE(T, 2), HWY_X86_IF_NOT_MINPOS(T)>
+HWY_INLINE Vec128<T, 8> MaxOfLanes(Vec128<T, 8> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Max(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Max(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Max(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_INLINE T ReduceSum(Vec128<T, N> v) {
+  return GetLane(SumOfLanes(v));
+}
+
+// u8, N=8, N=16:
+HWY_INLINE uint8_t ReduceSum(Vec64<uint8_t> v) {
+  return static_cast<uint8_t>(GetLane(SumsOf8(v)) & 0xFF);
+}
+HWY_INLINE Vec64<uint8_t> SumOfLanes(Vec64<uint8_t> v) {
+  const Full64<uint8_t> d;
+  return Set(d, ReduceSum(v));
+}
+HWY_INLINE uint8_t ReduceSum(Vec128<uint8_t> v) {
+  uint64_t sums = ReduceSum(SumsOf8(v));
+  return static_cast<uint8_t>(sums & 0xFF);
+}
+HWY_INLINE Vec128<uint8_t> SumOfLanes(Vec128<uint8_t> v) {
+  const DFromV<decltype(v)> d;
+  return Set(d, ReduceSum(v));
+}
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_INLINE int8_t ReduceSum(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto is_neg = v < Zero(d);
+
+  // Sum positive and negative lanes separately, then combine to get the result.
+  const auto positive = SumsOf8(BitCast(du, IfThenZeroElse(is_neg, v)));
+  const auto negative = SumsOf8(BitCast(du, IfThenElseZero(is_neg, Abs(v))));
+  return static_cast<int8_t>(ReduceSum(positive - negative) & 0xFF);
+}
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_INLINE Vec128<int8_t, N> SumOfLanes(const Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  return Set(d, ReduceSum(v));
+}
+
+#if HWY_TARGET <= HWY_SSE4
+HWY_INLINE Vec64<uint8_t> MinOfLanes(Vec64<uint8_t> v) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint16_t, decltype(d)> d16;
+  return TruncateTo(d, MinOfLanes(PromoteTo(d16, v)));
+}
+HWY_INLINE Vec128<uint8_t> MinOfLanes(Vec128<uint8_t> v) {
+  const Half<DFromV<decltype(v)>> d;
+  Vec64<uint8_t> result =
+      Min(MinOfLanes(UpperHalf(d, v)), MinOfLanes(LowerHalf(d, v)));
+  return Combine(DFromV<decltype(v)>(), result, result);
+}
+
+HWY_INLINE Vec64<uint8_t> MaxOfLanes(Vec64<uint8_t> v) {
+  const Vec64<uint8_t> m(Set(DFromV<decltype(v)>(), LimitsMax<uint8_t>()));
+  return m - MinOfLanes(m - v);
+}
+HWY_INLINE Vec128<uint8_t> MaxOfLanes(Vec128<uint8_t> v) {
+  const Vec128<uint8_t> m(Set(DFromV<decltype(v)>(), LimitsMax<uint8_t>()));
+  return m - MinOfLanes(m - v);
+}
+#elif HWY_TARGET >= HWY_SSSE3
+template <size_t N, HWY_IF_V_SIZE_GT(uint8_t, N, 4)>
+HWY_API Vec128<uint8_t, N> MaxOfLanes(Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<uint8_t, N> vm = Max(v, Reverse2(d, v));
+  vm = Max(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Max(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Max(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+
+template <size_t N, HWY_IF_V_SIZE_GT(uint8_t, N, 4)>
+HWY_API Vec128<uint8_t, N> MinOfLanes(Vec128<uint8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> d16;
+  const RepartitionToWide<decltype(d16)> d32;
+  Vec128<uint8_t, N> vm = Min(v, Reverse2(d, v));
+  vm = Min(vm, BitCast(d, Reverse2(d16, BitCast(d16, vm))));
+  vm = Min(vm, BitCast(d, Reverse2(d32, BitCast(d32, vm))));
+  if (N > 8) {
+    const RepartitionToWide<decltype(d32)> d64;
+    vm = Min(vm, BitCast(d, Reverse2(d64, BitCast(d64, vm))));
+  }
+  return vm;
+}
+#endif
+
+// Implement min/max of i8 in terms of u8 by toggling the sign bit.
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_INLINE Vec128<int8_t, N> MinOfLanes(Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mask = SignBit(du);
+  const auto vu = Xor(BitCast(du, v), mask);
+  return BitCast(d, Xor(MinOfLanes(vu), mask));
+}
+template <size_t N, HWY_IF_V_SIZE_GT(int8_t, N, 4)>
+HWY_INLINE Vec128<int8_t, N> MaxOfLanes(Vec128<int8_t, N> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mask = SignBit(du);
+  const auto vu = Xor(BitCast(du, v), mask);
+  return BitCast(d, Xor(MaxOfLanes(vu), mask));
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> SumOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::SumOfLanes(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API TFromD<D> ReduceSum(D /* tag */, VFromD<D> v) {
+  return detail::ReduceSum(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> MinOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MinOfLanes(v);
+}
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_API VFromD<D> MaxOfLanes(D /* tag */, VFromD<D> v) {
+  return detail::MaxOfLanes(v);
+}
+
+// ------------------------------ Lt128
+
+namespace detail {
+
+// Returns vector-mask for Lt128. Generic for all vector lengths.
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Lt128Vec(const D d, VFromD<D> a, VFromD<D> b) {
+  // Truth table of Eq and Lt for Hi and Lo u64.
+  // (removed lines with (=H && cH) or (=L && cL) - cannot both be true)
+  // =H =L cH cL  | out = cH | (=H & cL)
+  //  0  0  0  0  |  0
+  //  0  0  0  1  |  0
+  //  0  0  1  0  |  1
+  //  0  0  1  1  |  1
+  //  0  1  0  0  |  0
+  //  0  1  0  1  |  0
+  //  0  1  1  0  |  1
+  //  1  0  0  0  |  0
+  //  1  0  0  1  |  1
+  //  1  1  0  0  |  0
+  const auto eqHL = Eq(a, b);
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  const VFromD<D> ltLX = ShiftLeftLanes<1>(ltHL);
+  const VFromD<D> vecHx = IfThenElse(eqHL, ltLX, ltHL);
+  return InterleaveUpper(d, vecHx, vecHx);
+}
+
+// Returns vector-mask for Eq128. Generic for all vector lengths.
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Eq128Vec(D d, VFromD<D> a, VFromD<D> b) {
+  const auto eqHL = VecFromMask(d, Eq(a, b));
+  const auto eqLH = Reverse2(d, eqHL);
+  return And(eqHL, eqLH);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Ne128Vec(D d, VFromD<D> a, VFromD<D> b) {
+  const auto neHL = VecFromMask(d, Ne(a, b));
+  const auto neLH = Reverse2(d, neHL);
+  return Or(neHL, neLH);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Lt128UpperVec(D d, VFromD<D> a, VFromD<D> b) {
+  // No specialization required for AVX-512: Mask <-> Vec is fast, and
+  // copying mask bits to their neighbor seems infeasible.
+  const VFromD<D> ltHL = VecFromMask(d, Lt(a, b));
+  return InterleaveUpper(d, ltHL, ltHL);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Eq128UpperVec(D d, VFromD<D> a, VFromD<D> b) {
+  // No specialization required for AVX-512: Mask <-> Vec is fast, and
+  // copying mask bits to their neighbor seems infeasible.
+  const VFromD<D> eqHL = VecFromMask(d, Eq(a, b));
+  return InterleaveUpper(d, eqHL, eqHL);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> Ne128UpperVec(D d, VFromD<D> a, VFromD<D> b) {
+  // No specialization required for AVX-512: Mask <-> Vec is fast, and
+  // copying mask bits to their neighbor seems infeasible.
+  const VFromD<D> neHL = VecFromMask(d, Ne(a, b));
+  return InterleaveUpper(d, neHL, neHL);
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Lt128(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Lt128Vec(d, a, b));
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Eq128(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Eq128Vec(d, a, b));
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Ne128(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Ne128Vec(d, a, b));
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Lt128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Lt128UpperVec(d, a, b));
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Eq128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Eq128UpperVec(d, a, b));
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API MFromD<D> Ne128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return MaskFromVec(detail::Ne128UpperVec(d, a, b));
+}
+
+// ------------------------------ Min128, Max128 (Lt128)
+
+// Avoids the extra MaskFromVec in Lt128.
+template <class D, HWY_IF_U64_D(D)>
+HWY_API VFromD<D> Min128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfVecThenElse(detail::Lt128Vec(d, a, b), a, b);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API VFromD<D> Max128(D d, VFromD<D> a, VFromD<D> b) {
+  return IfVecThenElse(detail::Lt128Vec(d, b, a), a, b);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API VFromD<D> Min128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfVecThenElse(detail::Lt128UpperVec(d, a, b), a, b);
+}
+
+template <class D, HWY_IF_U64_D(D)>
+HWY_API VFromD<D> Max128Upper(D d, VFromD<D> a, VFromD<D> b) {
+  return IfVecThenElse(detail::Lt128UpperVec(d, b, a), a, b);
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#if HWY_TARGET <= HWY_AVX3
+
+#ifdef HWY_NATIVE_LEADING_ZERO_COUNT
+#undef HWY_NATIVE_LEADING_ZERO_COUNT
+#else
+#define HWY_NATIVE_LEADING_ZERO_COUNT
+#endif
+
+template <class V, HWY_IF_UI32(TFromV<V>), HWY_IF_V_SIZE_LE_D(DFromV<V>, 16)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm_lzcnt_epi32(v.raw)};
+}
+
+template <class V, HWY_IF_UI64(TFromV<V>), HWY_IF_V_SIZE_LE_D(DFromV<V>, 16)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm_lzcnt_epi64(v.raw)};
+}
+
+// HighestSetBitIndex and TrailingZeroCount is implemented in x86_512-inl.h
+// for AVX3 targets
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+// Note that the GCC warnings are not suppressed if we only wrap the *intrin.h -
+// the warning seems to be issued at the call site of intrinsics, i.e. our code.
+HWY_DIAGNOSTICS(pop)
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_256-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_256-inl.h
new file mode 100644
index 0000000000..2f188e7273
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_256-inl.h
@@ -0,0 +1,7428 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 256-bit vectors and AVX2 instructions, plus some AVX512-VL operations when
+// compiling for that target.
+// External include guard in highway.h - see comment there.
+
+// WARNING: most operations do not cross 128-bit block boundaries. In
+// particular, "Broadcast", pack and zip behavior may be surprising.
+
+// Must come before HWY_DIAGNOSTICS and HWY_COMPILER_CLANGCL
+#include "hwy/base.h"
+
+// Avoid uninitialized warnings in GCC's avx512fintrin.h - see
+// https://github.com/google/highway/issues/710)
+HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_GCC_ACTUAL
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+HWY_DIAGNOSTICS_OFF(disable : 4701 4703 6001 26494,
+                    ignored "-Wmaybe-uninitialized")
+#endif
+
+// Must come before HWY_COMPILER_CLANGCL
+#include <immintrin.h>  // AVX2+
+
+#if HWY_COMPILER_CLANGCL
+// Including <immintrin.h> should be enough, but Clang's headers helpfully skip
+// including these headers when _MSC_VER is defined, like when using clang-cl.
+// Include these directly here.
+#include <avxintrin.h>
+// avxintrin defines __m256i and must come before avx2intrin.
+#include <avx2intrin.h>
+#include <bmi2intrin.h>  // _pext_u64
+#include <f16cintrin.h>
+#include <fmaintrin.h>
+#include <smmintrin.h>
+#endif  // HWY_COMPILER_CLANGCL
+
+// For half-width vectors. Already includes base.h.
+#include "hwy/ops/shared-inl.h"
+// Already included by shared-inl, but do it again to avoid IDE warnings.
+#include "hwy/ops/x86_128-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace detail {
+
+template <typename T>
+struct Raw256 {
+  using type = __m256i;
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct Raw256<float16_t> {
+  using type = __m256h;
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct Raw256<float> {
+  using type = __m256;
+};
+template <>
+struct Raw256<double> {
+  using type = __m256d;
+};
+
+}  // namespace detail
+
+template <typename T>
+class Vec256 {
+  using Raw = typename detail::Raw256<T>::type;
+
+ public:
+  using PrivateT = T;                                  // only for DFromV
+  static constexpr size_t kPrivateN = 32 / sizeof(T);  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec256& operator*=(const Vec256 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec256& operator/=(const Vec256 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec256& operator+=(const Vec256 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec256& operator-=(const Vec256 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec256& operator&=(const Vec256 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec256& operator|=(const Vec256 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec256& operator^=(const Vec256 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+#if HWY_TARGET <= HWY_AVX3
+
+namespace detail {
+
+// Template arg: sizeof(lane type)
+template <size_t size>
+struct RawMask256 {};
+template <>
+struct RawMask256<1> {
+  using type = __mmask32;
+};
+template <>
+struct RawMask256<2> {
+  using type = __mmask16;
+};
+template <>
+struct RawMask256<4> {
+  using type = __mmask8;
+};
+template <>
+struct RawMask256<8> {
+  using type = __mmask8;
+};
+
+}  // namespace detail
+
+template <typename T>
+struct Mask256 {
+  using Raw = typename detail::RawMask256<sizeof(T)>::type;
+
+  static Mask256<T> FromBits(uint64_t mask_bits) {
+    return Mask256<T>{static_cast<Raw>(mask_bits)};
+  }
+
+  Raw raw;
+};
+
+#else  // AVX2
+
+// FF..FF or 0.
+template <typename T>
+struct Mask256 {
+  typename detail::Raw256<T>::type raw;
+};
+
+#endif  // AVX2
+
+#if HWY_TARGET <= HWY_AVX3
+namespace detail {
+
+// Used by Expand() emulation, which is required for both AVX3 and AVX2.
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(const Mask256<T> mask) {
+  return mask.raw;
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <typename T>
+using Full256 = Simd<T, 32 / sizeof(T), 0>;
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_INLINE __m256i BitCastToInteger(__m256i v) { return v; }
+#if HWY_HAVE_FLOAT16
+HWY_INLINE __m256i BitCastToInteger(__m256h v) {
+  return _mm256_castph_si256(v);
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_INLINE __m256i BitCastToInteger(__m256 v) { return _mm256_castps_si256(v); }
+HWY_INLINE __m256i BitCastToInteger(__m256d v) {
+  return _mm256_castpd_si256(v);
+}
+
+template <typename T>
+HWY_INLINE Vec256<uint8_t> BitCastToByte(Vec256<T> v) {
+  return Vec256<uint8_t>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger256 {
+  HWY_INLINE __m256i operator()(__m256i v) { return v; }
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger256<float16_t> {
+  HWY_INLINE __m256h operator()(__m256i v) { return _mm256_castsi256_ph(v); }
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger256<float> {
+  HWY_INLINE __m256 operator()(__m256i v) { return _mm256_castsi256_ps(v); }
+};
+template <>
+struct BitCastFromInteger256<double> {
+  HWY_INLINE __m256d operator()(__m256i v) { return _mm256_castsi256_pd(v); }
+};
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */, Vec256<uint8_t> v) {
+  return VFromD<D>{BitCastFromInteger256<TFromD<D>>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename FromT>
+HWY_API VFromD<D> BitCast(D d, Vec256<FromT> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Zero
+
+// Cannot use VFromD here because it is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API Vec256<TFromD<D>> Zero(D /* tag */) {
+  return Vec256<TFromD<D>>{_mm256_setzero_si256()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API Vec256<bfloat16_t> Zero(D /* tag */) {
+  return Vec256<bfloat16_t>{_mm256_setzero_si256()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> Zero(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return Vec256<float16_t>{_mm256_setzero_ph()};
+#else
+  return Vec256<float16_t>{_mm256_setzero_si256()};
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> Zero(D /* tag */) {
+  return Vec256<float>{_mm256_setzero_ps()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> Zero(D /* tag */) {
+  return Vec256<double>{_mm256_setzero_pd()};
+}
+
+// ------------------------------ Set
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm256_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI16_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm256_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm256_set1_epi32(static_cast<int>(t))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm256_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+// bfloat16_t is handled by x86_128-inl.h.
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> Set(D /* tag */, float16_t t) {
+  return Vec256<float16_t>{_mm256_set1_ph(t)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> Set(D /* tag */, float t) {
+  return Vec256<float>{_mm256_set1_ps(t)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> Set(D /* tag */, double t) {
+  return Vec256<double>{_mm256_set1_pd(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> Undefined(D /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return VFromD<D>{_mm256_undefined_si256()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API Vec256<bfloat16_t> Undefined(D /* tag */) {
+  return Vec256<bfloat16_t>{_mm256_undefined_si256()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> Undefined(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return Vec256<float16_t>{_mm256_undefined_ph()};
+#else
+  return Vec256<float16_t>{_mm256_undefined_si256()};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> Undefined(D /* tag */) {
+  return Vec256<float>{_mm256_undefined_ps()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> Undefined(D /* tag */) {
+  return Vec256<double>{_mm256_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ ResizeBitCast
+
+// 32-byte vector to 32-byte vector (or 64-byte vector to 64-byte vector on
+// AVX3)
+template <class D, class FromV, HWY_IF_V_SIZE_GT_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D, HWY_MAX_LANES_V(FromV) * sizeof(TFromV<FromV>))>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, v);
+}
+
+// 32-byte vector to 16-byte vector (or 64-byte vector to 32-byte vector on
+// AVX3)
+template <class D, class FromV, HWY_IF_V_SIZE_GT_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D,
+                          (HWY_MAX_LANES_V(FromV) * sizeof(TFromV<FromV>)) / 2)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  const DFromV<decltype(v)> d_from;
+  const Half<decltype(d_from)> dh_from;
+  return BitCast(d, LowerHalf(dh_from, v));
+}
+
+// 32-byte vector (or 64-byte vector on AVX3) to <= 8-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_GT_V(FromV, 16),
+          HWY_IF_V_SIZE_LE_D(D, 8)>
+HWY_API VFromD<D> ResizeBitCast(D /*d*/, FromV v) {
+  return VFromD<D>{ResizeBitCast(Full128<TFromD<D>>(), v).raw};
+}
+
+// <= 16-byte vector to 32-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_LE_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, Vec256<uint8_t>{_mm256_castsi128_si256(
+                        ResizeBitCast(Full128<uint8_t>(), v).raw)});
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ And
+
+template <typename T>
+HWY_API Vec256<T> And(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm256_and_si256(a.raw, b.raw)});
+}
+
+HWY_API Vec256<float> And(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_and_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> And(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T>
+HWY_API Vec256<T> AndNot(Vec256<T> not_mask, Vec256<T> mask) {
+  const DFromV<decltype(mask)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm256_andnot_si256(not_mask.raw, mask.raw)});
+}
+HWY_API Vec256<float> AndNot(Vec256<float> not_mask, Vec256<float> mask) {
+  return Vec256<float>{_mm256_andnot_ps(not_mask.raw, mask.raw)};
+}
+HWY_API Vec256<double> AndNot(Vec256<double> not_mask, Vec256<double> mask) {
+  return Vec256<double>{_mm256_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_API Vec256<T> Or(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm256_or_si256(a.raw, b.raw)});
+}
+
+HWY_API Vec256<float> Or(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_or_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Or(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_API Vec256<T> Xor(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm256_xor_si256(a.raw, b.raw)});
+}
+
+HWY_API Vec256<float> Xor(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_xor_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Xor(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Not
+template <typename T>
+HWY_API Vec256<T> Not(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  using TU = MakeUnsigned<T>;
+#if HWY_TARGET <= HWY_AVX3
+  const __m256i vu = BitCast(RebindToUnsigned<decltype(d)>(), v).raw;
+  return BitCast(d, Vec256<TU>{_mm256_ternarylogic_epi32(vu, vu, vu, 0x55)});
+#else
+  return Xor(v, BitCast(d, Vec256<TU>{_mm256_set1_epi32(-1)}));
+#endif
+}
+
+// ------------------------------ Xor3
+template <typename T>
+HWY_API Vec256<T> Xor3(Vec256<T> x1, Vec256<T> x2, Vec256<T> x3) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(x1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m256i ret = _mm256_ternarylogic_epi64(
+      BitCast(du, x1).raw, BitCast(du, x2).raw, BitCast(du, x3).raw, 0x96);
+  return BitCast(d, VU{ret});
+#else
+  return Xor(x1, Xor(x2, x3));
+#endif
+}
+
+// ------------------------------ Or3
+template <typename T>
+HWY_API Vec256<T> Or3(Vec256<T> o1, Vec256<T> o2, Vec256<T> o3) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(o1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m256i ret = _mm256_ternarylogic_epi64(
+      BitCast(du, o1).raw, BitCast(du, o2).raw, BitCast(du, o3).raw, 0xFE);
+  return BitCast(d, VU{ret});
+#else
+  return Or(o1, Or(o2, o3));
+#endif
+}
+
+// ------------------------------ OrAnd
+template <typename T>
+HWY_API Vec256<T> OrAnd(Vec256<T> o, Vec256<T> a1, Vec256<T> a2) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(o)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m256i ret = _mm256_ternarylogic_epi64(
+      BitCast(du, o).raw, BitCast(du, a1).raw, BitCast(du, a2).raw, 0xF8);
+  return BitCast(d, VU{ret});
+#else
+  return Or(o, And(a1, a2));
+#endif
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T>
+HWY_API Vec256<T> IfVecThenElse(Vec256<T> mask, Vec256<T> yes, Vec256<T> no) {
+#if HWY_TARGET <= HWY_AVX3
+  const DFromV<decltype(yes)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{_mm256_ternarylogic_epi64(BitCast(du, mask).raw,
+                                                 BitCast(du, yes).raw,
+                                                 BitCast(du, no).raw, 0xCA)});
+#else
+  return IfThenElse(MaskFromVec(mask), yes, no);
+#endif
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T>
+HWY_API Vec256<T> operator&(const Vec256<T> a, const Vec256<T> b) {
+  return And(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator|(const Vec256<T> a, const Vec256<T> b) {
+  return Or(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator^(const Vec256<T> a, const Vec256<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ PopulationCount
+
+// 8/16 require BITALG, 32/64 require VPOPCNTDQ.
+#if HWY_TARGET <= HWY_AVX3_DL
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec256<T> PopulationCount(hwy::SizeTag<1> /* tag */, Vec256<T> v) {
+  return Vec256<T>{_mm256_popcnt_epi8(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> PopulationCount(hwy::SizeTag<2> /* tag */, Vec256<T> v) {
+  return Vec256<T>{_mm256_popcnt_epi16(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> PopulationCount(hwy::SizeTag<4> /* tag */, Vec256<T> v) {
+  return Vec256<T>{_mm256_popcnt_epi32(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> PopulationCount(hwy::SizeTag<8> /* tag */, Vec256<T> v) {
+  return Vec256<T>{_mm256_popcnt_epi64(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> PopulationCount(Vec256<T> v) {
+  return detail::PopulationCount(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ================================================== MASK
+
+#if HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ IfThenElse
+
+// Returns mask ? b : a.
+
+namespace detail {
+
+// Templates for signed/unsigned integer of a particular size.
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElse(hwy::SizeTag<1> /* tag */, Mask256<T> mask,
+                                Vec256<T> yes, Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_blend_epi8(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElse(hwy::SizeTag<2> /* tag */, Mask256<T> mask,
+                                Vec256<T> yes, Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_blend_epi16(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElse(hwy::SizeTag<4> /* tag */, Mask256<T> mask,
+                                Vec256<T> yes, Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_blend_epi32(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElse(hwy::SizeTag<8> /* tag */, Mask256<T> mask,
+                                Vec256<T> yes, Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_blend_epi64(mask.raw, no.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> IfThenElse(Mask256<T> mask, Vec256<T> yes, Vec256<T> no) {
+  return detail::IfThenElse(hwy::SizeTag<sizeof(T)>(), mask, yes, no);
+}
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> IfThenElse(Mask256<float16_t> mask,
+                                     Vec256<float16_t> yes,
+                                     Vec256<float16_t> no) {
+  return Vec256<float16_t>{_mm256_mask_blend_ph(mask.raw, no.raw, yes.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> IfThenElse(Mask256<float> mask, Vec256<float> yes,
+                                 Vec256<float> no) {
+  return Vec256<float>{_mm256_mask_blend_ps(mask.raw, no.raw, yes.raw)};
+}
+HWY_API Vec256<double> IfThenElse(Mask256<double> mask, Vec256<double> yes,
+                                  Vec256<double> no) {
+  return Vec256<double>{_mm256_mask_blend_pd(mask.raw, no.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElseZero(hwy::SizeTag<1> /* tag */, Mask256<T> mask,
+                                    Vec256<T> yes) {
+  return Vec256<T>{_mm256_maskz_mov_epi8(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElseZero(hwy::SizeTag<2> /* tag */, Mask256<T> mask,
+                                    Vec256<T> yes) {
+  return Vec256<T>{_mm256_maskz_mov_epi16(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElseZero(hwy::SizeTag<4> /* tag */, Mask256<T> mask,
+                                    Vec256<T> yes) {
+  return Vec256<T>{_mm256_maskz_mov_epi32(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenElseZero(hwy::SizeTag<8> /* tag */, Mask256<T> mask,
+                                    Vec256<T> yes) {
+  return Vec256<T>{_mm256_maskz_mov_epi64(mask.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Vec256<T> IfThenElseZero(Mask256<T> mask, Vec256<T> yes) {
+  return detail::IfThenElseZero(hwy::SizeTag<sizeof(T)>(), mask, yes);
+}
+HWY_API Vec256<float> IfThenElseZero(Mask256<float> mask, Vec256<float> yes) {
+  return Vec256<float>{_mm256_maskz_mov_ps(mask.raw, yes.raw)};
+}
+HWY_API Vec256<double> IfThenElseZero(Mask256<double> mask,
+                                      Vec256<double> yes) {
+  return Vec256<double>{_mm256_maskz_mov_pd(mask.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec256<T> IfThenZeroElse(hwy::SizeTag<1> /* tag */, Mask256<T> mask,
+                                    Vec256<T> no) {
+  // xor_epi8/16 are missing, but we have sub, which is just as fast for u8/16.
+  return Vec256<T>{_mm256_mask_sub_epi8(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenZeroElse(hwy::SizeTag<2> /* tag */, Mask256<T> mask,
+                                    Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_sub_epi16(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenZeroElse(hwy::SizeTag<4> /* tag */, Mask256<T> mask,
+                                    Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_xor_epi32(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec256<T> IfThenZeroElse(hwy::SizeTag<8> /* tag */, Mask256<T> mask,
+                                    Vec256<T> no) {
+  return Vec256<T>{_mm256_mask_xor_epi64(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Vec256<T> IfThenZeroElse(Mask256<T> mask, Vec256<T> no) {
+  return detail::IfThenZeroElse(hwy::SizeTag<sizeof(T)>(), mask, no);
+}
+HWY_API Vec256<float> IfThenZeroElse(Mask256<float> mask, Vec256<float> no) {
+  return Vec256<float>{_mm256_mask_xor_ps(no.raw, mask.raw, no.raw, no.raw)};
+}
+HWY_API Vec256<double> IfThenZeroElse(Mask256<double> mask, Vec256<double> no) {
+  return Vec256<double>{_mm256_mask_xor_pd(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+template <typename T>
+HWY_API Vec256<T> ZeroIfNegative(const Vec256<T> v) {
+  static_assert(IsSigned<T>(), "Only for float");
+  // AVX3 MaskFromVec only looks at the MSB
+  return IfThenZeroElse(MaskFromVec(v), v);
+}
+
+// ------------------------------ Mask logical
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask256<T> And(hwy::SizeTag<1> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kand_mask32(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask32>(a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> And(hwy::SizeTag<2> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kand_mask16(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask16>(a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> And(hwy::SizeTag<4> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> And(hwy::SizeTag<8> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> AndNot(hwy::SizeTag<1> /*tag*/, const Mask256<T> a,
+                             const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kandn_mask32(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask32>(~a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> AndNot(hwy::SizeTag<2> /*tag*/, const Mask256<T> a,
+                             const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kandn_mask16(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask16>(~a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> AndNot(hwy::SizeTag<4> /*tag*/, const Mask256<T> a,
+                             const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(~a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> AndNot(hwy::SizeTag<8> /*tag*/, const Mask256<T> a,
+                             const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(~a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> Or(hwy::SizeTag<1> /*tag*/, const Mask256<T> a,
+                         const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kor_mask32(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask32>(a.raw | b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Or(hwy::SizeTag<2> /*tag*/, const Mask256<T> a,
+                         const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kor_mask16(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask16>(a.raw | b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Or(hwy::SizeTag<4> /*tag*/, const Mask256<T> a,
+                         const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw | b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Or(hwy::SizeTag<8> /*tag*/, const Mask256<T> a,
+                         const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw | b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> Xor(hwy::SizeTag<1> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxor_mask32(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask32>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Xor(hwy::SizeTag<2> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxor_mask16(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask16>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Xor(hwy::SizeTag<4> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> Xor(hwy::SizeTag<8> /*tag*/, const Mask256<T> a,
+                          const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(a.raw ^ b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> ExclusiveNeither(hwy::SizeTag<1> /*tag*/,
+                                       const Mask256<T> a, const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxnor_mask32(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask32>(~(a.raw ^ b.raw) & 0xFFFFFFFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> ExclusiveNeither(hwy::SizeTag<2> /*tag*/,
+                                       const Mask256<T> a, const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxnor_mask16(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask16>(~(a.raw ^ b.raw) & 0xFFFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> ExclusiveNeither(hwy::SizeTag<4> /*tag*/,
+                                       const Mask256<T> a, const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{_kxnor_mask8(a.raw, b.raw)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0xFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask256<T> ExclusiveNeither(hwy::SizeTag<8> /*tag*/,
+                                       const Mask256<T> a, const Mask256<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask256<T>{static_cast<__mmask8>(_kxnor_mask8(a.raw, b.raw) & 0xF)};
+#else
+  return Mask256<T>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0xF)};
+#endif
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask256<T> And(const Mask256<T> a, Mask256<T> b) {
+  return detail::And(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask256<T> AndNot(const Mask256<T> a, Mask256<T> b) {
+  return detail::AndNot(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask256<T> Or(const Mask256<T> a, Mask256<T> b) {
+  return detail::Or(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask256<T> Xor(const Mask256<T> a, Mask256<T> b) {
+  return detail::Xor(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask256<T> Not(const Mask256<T> m) {
+  // Flip only the valid bits.
+  constexpr size_t N = 32 / sizeof(T);
+  return Xor(m, Mask256<T>::FromBits((1ull << N) - 1));
+}
+
+template <typename T>
+HWY_API Mask256<T> ExclusiveNeither(const Mask256<T> a, Mask256<T> b) {
+  return detail::ExclusiveNeither(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+#else  // AVX2
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T>
+HWY_API Mask256<T> MaskFromVec(const Vec256<T> v) {
+  return Mask256<T>{v.raw};
+}
+
+template <typename T>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{v.raw};
+}
+
+// ------------------------------ IfThenElse
+
+// mask ? yes : no
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Vec256<T> IfThenElse(Mask256<T> mask, Vec256<T> yes, Vec256<T> no) {
+  return Vec256<T>{_mm256_blendv_epi8(no.raw, yes.raw, mask.raw)};
+}
+HWY_API Vec256<float> IfThenElse(Mask256<float> mask, Vec256<float> yes,
+                                 Vec256<float> no) {
+  return Vec256<float>{_mm256_blendv_ps(no.raw, yes.raw, mask.raw)};
+}
+HWY_API Vec256<double> IfThenElse(Mask256<double> mask, Vec256<double> yes,
+                                  Vec256<double> no) {
+  return Vec256<double>{_mm256_blendv_pd(no.raw, yes.raw, mask.raw)};
+}
+
+// mask ? yes : 0
+template <typename T>
+HWY_API Vec256<T> IfThenElseZero(Mask256<T> mask, Vec256<T> yes) {
+  const DFromV<decltype(yes)> d;
+  return yes & VecFromMask(d, mask);
+}
+
+// mask ? 0 : no
+template <typename T>
+HWY_API Vec256<T> IfThenZeroElse(Mask256<T> mask, Vec256<T> no) {
+  const DFromV<decltype(no)> d;
+  return AndNot(VecFromMask(d, mask), no);
+}
+
+template <typename T>
+HWY_API Vec256<T> ZeroIfNegative(Vec256<T> v) {
+  static_assert(IsSigned<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const auto zero = Zero(d);
+  // AVX2 IfThenElse only looks at the MSB for 32/64-bit lanes
+  return IfThenElse(MaskFromVec(v), zero, v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T>
+HWY_API Mask256<T> Not(const Mask256<T> m) {
+  const Full256<T> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T>
+HWY_API Mask256<T> And(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> AndNot(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Or(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Xor(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> ExclusiveNeither(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), Not(VecFromMask(d, b))));
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ================================================== COMPARE
+
+#if HWY_TARGET <= HWY_AVX3
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0.
+
+template <class DTo, HWY_IF_V_SIZE_D(DTo, 32), typename TFrom>
+HWY_API MFromD<DTo> RebindMask(DTo /*tag*/, Mask256<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>{m.raw};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask256<T> TestBit(hwy::SizeTag<1> /*tag*/, const Vec256<T> v,
+                              const Vec256<T> bit) {
+  return Mask256<T>{_mm256_test_epi8_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> TestBit(hwy::SizeTag<2> /*tag*/, const Vec256<T> v,
+                              const Vec256<T> bit) {
+  return Mask256<T>{_mm256_test_epi16_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> TestBit(hwy::SizeTag<4> /*tag*/, const Vec256<T> v,
+                              const Vec256<T> bit) {
+  return Mask256<T>{_mm256_test_epi32_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> TestBit(hwy::SizeTag<8> /*tag*/, const Vec256<T> v,
+                              const Vec256<T> bit) {
+  return Mask256<T>{_mm256_test_epi64_mask(v.raw, bit.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask256<T> TestBit(const Vec256<T> v, const Vec256<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return detail::TestBit(hwy::SizeTag<sizeof(T)>(), v, bit);
+}
+
+// ------------------------------ Equality
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask256<T> operator==(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi8_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI16(T)>
+HWY_API Mask256<T> operator==(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi16_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Mask256<T> operator==(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi32_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Mask256<T> operator==(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask256<float16_t> operator==(Vec256<float16_t> a,
+                                      Vec256<float16_t> b) {
+  return Mask256<float16_t>{_mm256_cmp_ph_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Mask256<float> operator==(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+HWY_API Mask256<double> operator==(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+// ------------------------------ Inequality
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask256<T> operator!=(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpneq_epi8_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI16(T)>
+HWY_API Mask256<T> operator!=(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpneq_epi16_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Mask256<T> operator!=(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpneq_epi32_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Mask256<T> operator!=(const Vec256<T> a, const Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpneq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask256<float16_t> operator!=(Vec256<float16_t> a,
+                                      Vec256<float16_t> b) {
+  return Mask256<float16_t>{_mm256_cmp_ph_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Mask256<float> operator!=(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+HWY_API Mask256<double> operator!=(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+// ------------------------------ Strict inequality
+
+HWY_API Mask256<int8_t> operator>(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Mask256<int8_t>{_mm256_cmpgt_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int16_t> operator>(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpgt_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int32_t> operator>(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpgt_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int64_t> operator>(Vec256<int64_t> a, Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpgt_epi64_mask(a.raw, b.raw)};
+}
+
+HWY_API Mask256<uint8_t> operator>(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Mask256<uint8_t>{_mm256_cmpgt_epu8_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint16_t> operator>(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Mask256<uint16_t>{_mm256_cmpgt_epu16_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint32_t> operator>(Vec256<uint32_t> a, Vec256<uint32_t> b) {
+  return Mask256<uint32_t>{_mm256_cmpgt_epu32_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint64_t> operator>(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+  return Mask256<uint64_t>{_mm256_cmpgt_epu64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask256<float16_t> operator>(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Mask256<float16_t>{_mm256_cmp_ph_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Mask256<float> operator>(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+HWY_API Mask256<double> operator>(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+// ------------------------------ Weak inequality
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask256<float16_t> operator>=(Vec256<float16_t> a,
+                                      Vec256<float16_t> b) {
+  return Mask256<float16_t>{_mm256_cmp_ph_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask256<float> operator>=(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+HWY_API Mask256<double> operator>=(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+HWY_API Mask256<int8_t> operator>=(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Mask256<int8_t>{_mm256_cmpge_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int16_t> operator>=(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpge_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int32_t> operator>=(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpge_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<int64_t> operator>=(Vec256<int64_t> a, Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpge_epi64_mask(a.raw, b.raw)};
+}
+
+HWY_API Mask256<uint8_t> operator>=(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Mask256<uint8_t>{_mm256_cmpge_epu8_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint16_t> operator>=(const Vec256<uint16_t> a,
+                                     const Vec256<uint16_t> b) {
+  return Mask256<uint16_t>{_mm256_cmpge_epu16_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint32_t> operator>=(const Vec256<uint32_t> a,
+                                     const Vec256<uint32_t> b) {
+  return Mask256<uint32_t>{_mm256_cmpge_epu32_mask(a.raw, b.raw)};
+}
+HWY_API Mask256<uint64_t> operator>=(const Vec256<uint64_t> a,
+                                     const Vec256<uint64_t> b) {
+  return Mask256<uint64_t>{_mm256_cmpge_epu64_mask(a.raw, b.raw)};
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask256<T> MaskFromVec(hwy::SizeTag<1> /*tag*/, const Vec256<T> v) {
+  return Mask256<T>{_mm256_movepi8_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> MaskFromVec(hwy::SizeTag<2> /*tag*/, const Vec256<T> v) {
+  return Mask256<T>{_mm256_movepi16_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> MaskFromVec(hwy::SizeTag<4> /*tag*/, const Vec256<T> v) {
+  return Mask256<T>{_mm256_movepi32_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask256<T> MaskFromVec(hwy::SizeTag<8> /*tag*/, const Vec256<T> v) {
+  return Mask256<T>{_mm256_movepi64_mask(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Mask256<T> MaskFromVec(const Vec256<T> v) {
+  return detail::MaskFromVec(hwy::SizeTag<sizeof(T)>(), v);
+}
+// There do not seem to be native floating-point versions of these instructions.
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Mask256<T> MaskFromVec(const Vec256<T> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return Mask256<T>{MaskFromVec(BitCast(di, v)).raw};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{_mm256_movm_epi8(v.raw)};
+}
+
+template <typename T, HWY_IF_UI16(T)>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{_mm256_movm_epi16(v.raw)};
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{_mm256_movm_epi32(v.raw)};
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{_mm256_movm_epi64(v.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> VecFromMask(const Mask256<float16_t> v) {
+  return Vec256<float16_t>{_mm256_castsi256_ph(_mm256_movm_epi16(v.raw))};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Vec256<float> VecFromMask(const Mask256<float> v) {
+  return Vec256<float>{_mm256_castsi256_ps(_mm256_movm_epi32(v.raw))};
+}
+
+HWY_API Vec256<double> VecFromMask(const Mask256<double> v) {
+  return Vec256<double>{_mm256_castsi256_pd(_mm256_movm_epi64(v.raw))};
+}
+
+#else  // AVX2
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <class DTo, HWY_IF_V_SIZE_D(DTo, 32), typename TFrom>
+HWY_API MFromD<DTo> RebindMask(DTo d_to, Mask256<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  const Full256<TFrom> dfrom;
+  return MaskFromVec(BitCast(d_to, VecFromMask(dfrom, m)));
+}
+
+template <typename T>
+HWY_API Mask256<T> TestBit(const Vec256<T> v, const Vec256<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Equality
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask256<T> operator==(Vec256<T> a, Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi8(a.raw, b.raw)};
+}
+
+template <typename T, HWY_IF_UI16(T)>
+HWY_API Mask256<T> operator==(Vec256<T> a, Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi16(a.raw, b.raw)};
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Mask256<T> operator==(Vec256<T> a, Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi32(a.raw, b.raw)};
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Mask256<T> operator==(Vec256<T> a, Vec256<T> b) {
+  return Mask256<T>{_mm256_cmpeq_epi64(a.raw, b.raw)};
+}
+
+HWY_API Mask256<float> operator==(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+HWY_API Mask256<double> operator==(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+// ------------------------------ Inequality
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Mask256<T> operator!=(Vec256<T> a, Vec256<T> b) {
+  return Not(a == b);
+}
+HWY_API Mask256<float> operator!=(Vec256<float> a, Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+HWY_API Mask256<double> operator!=(Vec256<double> a, Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+// ------------------------------ Strict inequality
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+// Pre-9.3 GCC immintrin.h uses char, which may be unsigned, causing cmpgt_epi8
+// to perform an unsigned comparison instead of the intended signed. Workaround
+// is to cast to an explicitly signed type. See https://godbolt.org/z/PL7Ujy
+#if HWY_COMPILER_GCC_ACTUAL != 0 && HWY_COMPILER_GCC_ACTUAL < 903
+#define HWY_AVX2_GCC_CMPGT8_WORKAROUND 1
+#else
+#define HWY_AVX2_GCC_CMPGT8_WORKAROUND 0
+#endif
+
+HWY_API Mask256<int8_t> Gt(hwy::SignedTag /*tag*/, Vec256<int8_t> a,
+                           Vec256<int8_t> b) {
+#if HWY_AVX2_GCC_CMPGT8_WORKAROUND
+  using i8x32 = signed char __attribute__((__vector_size__(32)));
+  return Mask256<int8_t>{static_cast<__m256i>(reinterpret_cast<i8x32>(a.raw) >
+                                              reinterpret_cast<i8x32>(b.raw))};
+#else
+  return Mask256<int8_t>{_mm256_cmpgt_epi8(a.raw, b.raw)};
+#endif
+}
+HWY_API Mask256<int16_t> Gt(hwy::SignedTag /*tag*/, Vec256<int16_t> a,
+                            Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpgt_epi16(a.raw, b.raw)};
+}
+HWY_API Mask256<int32_t> Gt(hwy::SignedTag /*tag*/, Vec256<int32_t> a,
+                            Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpgt_epi32(a.raw, b.raw)};
+}
+HWY_API Mask256<int64_t> Gt(hwy::SignedTag /*tag*/, Vec256<int64_t> a,
+                            Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpgt_epi64(a.raw, b.raw)};
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> Gt(hwy::UnsignedTag /*tag*/, Vec256<T> a, Vec256<T> b) {
+  const Full256<T> du;
+  const RebindToSigned<decltype(du)> di;
+  const Vec256<T> msb = Set(du, (LimitsMax<T>() >> 1) + 1);
+  return RebindMask(du, BitCast(di, Xor(a, msb)) > BitCast(di, Xor(b, msb)));
+}
+
+HWY_API Mask256<float> Gt(hwy::FloatTag /*tag*/, Vec256<float> a,
+                          Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_GT_OQ)};
+}
+HWY_API Mask256<double> Gt(hwy::FloatTag /*tag*/, Vec256<double> a,
+                           Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask256<T> operator>(Vec256<T> a, Vec256<T> b) {
+  return detail::Gt(hwy::TypeTag<T>(), a, b);
+}
+
+// ------------------------------ Weak inequality
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask256<T> Ge(hwy::SignedTag tag, Vec256<T> a, Vec256<T> b) {
+  return Not(Gt(tag, b, a));
+}
+
+template <typename T>
+HWY_INLINE Mask256<T> Ge(hwy::UnsignedTag tag, Vec256<T> a, Vec256<T> b) {
+  return Not(Gt(tag, b, a));
+}
+
+HWY_INLINE Mask256<float> Ge(hwy::FloatTag /*tag*/, Vec256<float> a,
+                             Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_GE_OQ)};
+}
+HWY_INLINE Mask256<double> Ge(hwy::FloatTag /*tag*/, Vec256<double> a,
+                              Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask256<T> operator>=(Vec256<T> a, Vec256<T> b) {
+  return detail::Ge(hwy::TypeTag<T>(), a, b);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Reversed comparisons
+
+template <typename T>
+HWY_API Mask256<T> operator<(const Vec256<T> a, const Vec256<T> b) {
+  return b > a;
+}
+
+template <typename T>
+HWY_API Mask256<T> operator<=(const Vec256<T> a, const Vec256<T> b) {
+  return b >= a;
+}
+
+// ------------------------------ Min (Gt, IfThenElse)
+
+// Unsigned
+HWY_API Vec256<uint8_t> Min(const Vec256<uint8_t> a, const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_min_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> Min(const Vec256<uint16_t> a,
+                             const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_min_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> Min(const Vec256<uint32_t> a,
+                             const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_min_epu32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> Min(const Vec256<uint64_t> a,
+                             const Vec256<uint64_t> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<uint64_t>{_mm256_min_epu64(a.raw, b.raw)};
+#else
+  const Full256<uint64_t> du;
+  const Full256<int64_t> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, b, a);
+#endif
+}
+
+// Signed
+HWY_API Vec256<int8_t> Min(const Vec256<int8_t> a, const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_min_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> Min(const Vec256<int16_t> a, const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_min_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> Min(const Vec256<int32_t> a, const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_min_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> Min(const Vec256<int64_t> a, const Vec256<int64_t> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int64_t>{_mm256_min_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, a, b);
+#endif
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Min(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_min_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Min(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_min_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Min(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Max (Gt, IfThenElse)
+
+// Unsigned
+HWY_API Vec256<uint8_t> Max(const Vec256<uint8_t> a, const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_max_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> Max(const Vec256<uint16_t> a,
+                             const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_max_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> Max(const Vec256<uint32_t> a,
+                             const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_max_epu32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> Max(const Vec256<uint64_t> a,
+                             const Vec256<uint64_t> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<uint64_t>{_mm256_max_epu64(a.raw, b.raw)};
+#else
+  const Full256<uint64_t> du;
+  const Full256<int64_t> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, a, b);
+#endif
+}
+
+// Signed
+HWY_API Vec256<int8_t> Max(const Vec256<int8_t> a, const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_max_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> Max(const Vec256<int16_t> a, const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_max_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> Max(const Vec256<int32_t> a, const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_max_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> Max(const Vec256<int64_t> a, const Vec256<int64_t> b) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int64_t>{_mm256_max_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, b, a);
+#endif
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Max(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_max_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Max(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_max_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Max(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_max_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Iota
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm256_set_epi8(
+      static_cast<char>(31), static_cast<char>(30), static_cast<char>(29),
+      static_cast<char>(28), static_cast<char>(27), static_cast<char>(26),
+      static_cast<char>(25), static_cast<char>(24), static_cast<char>(23),
+      static_cast<char>(22), static_cast<char>(21), static_cast<char>(20),
+      static_cast<char>(19), static_cast<char>(18), static_cast<char>(17),
+      static_cast<char>(16), static_cast<char>(15), static_cast<char>(14),
+      static_cast<char>(13), static_cast<char>(12), static_cast<char>(11),
+      static_cast<char>(10), static_cast<char>(9), static_cast<char>(8),
+      static_cast<char>(7), static_cast<char>(6), static_cast<char>(5),
+      static_cast<char>(4), static_cast<char>(3), static_cast<char>(2),
+      static_cast<char>(1), static_cast<char>(0))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm256_set_epi16(
+      int16_t{15}, int16_t{14}, int16_t{13}, int16_t{12}, int16_t{11},
+      int16_t{10}, int16_t{9}, int16_t{8}, int16_t{7}, int16_t{6}, int16_t{5},
+      int16_t{4}, int16_t{3}, int16_t{2}, int16_t{1}, int16_t{0})};
+}
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{
+      _mm256_set_ph(float16_t{15}, float16_t{14}, float16_t{13}, float16_t{12},
+                    float16_t{11}, float16_t{10}, float16_t{9}, float16_t{8},
+                    float16_t{7}, float16_t{6}, float16_t{5}, float16_t{4},
+                    float16_t{3}, float16_t{2}, float16_t{1}, float16_t{0})};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm256_set_epi32(int32_t{7}, int32_t{6}, int32_t{5},
+                                    int32_t{4}, int32_t{3}, int32_t{2},
+                                    int32_t{1}, int32_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{
+      _mm256_set_epi64x(int64_t{3}, int64_t{2}, int64_t{1}, int64_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{
+      _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm256_set_pd(3.0, 2.0, 1.0, 0.0)};
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename T2>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  return detail::Iota0(d) + Set(d, static_cast<TFromD<D>>(first));
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), class M = MFromD<D>>
+HWY_API M FirstN(const D d, size_t n) {
+#if HWY_TARGET <= HWY_AVX3
+  (void)d;
+  constexpr size_t kN = MaxLanes(d);
+#if HWY_ARCH_X86_64
+  const uint64_t all = (1ull << kN) - 1;
+  // BZHI only looks at the lower 8 bits of n!
+  return M::FromBits((n > 255) ? all : _bzhi_u64(all, n));
+#else
+  const uint32_t all = static_cast<uint32_t>((1ull << kN) - 1);
+  // BZHI only looks at the lower 8 bits of n!
+  return M::FromBits((n > 255) ? all
+                               : _bzhi_u32(all, static_cast<uint32_t>(n)));
+#endif  // HWY_ARCH_X86_64
+#else
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  using TI = TFromD<decltype(di)>;
+  return RebindMask(d, detail::Iota0(di) < Set(di, static_cast<TI>(n)));
+#endif
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+HWY_API Vec256<uint8_t> operator+(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> operator+(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator+(Vec256<uint32_t> a, Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> operator+(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> operator+(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> operator+(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator+(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> operator+(Vec256<int64_t> a, Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> operator+(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_add_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> operator+(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_add_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator+(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+HWY_API Vec256<uint8_t> operator-(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> operator-(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator-(Vec256<uint32_t> a, Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> operator-(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> operator-(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> operator-(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator-(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> operator-(Vec256<int64_t> a, Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> operator-(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_sub_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> operator-(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_sub_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator-(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ SumsOf8
+HWY_API Vec256<uint64_t> SumsOf8(Vec256<uint8_t> v) {
+  return Vec256<uint64_t>{_mm256_sad_epu8(v.raw, _mm256_setzero_si256())};
+}
+
+HWY_API Vec256<uint64_t> SumsOf8AbsDiff(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint64_t>{_mm256_sad_epu8(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec256<uint8_t> SaturatedAdd(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_adds_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> SaturatedAdd(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> SaturatedAdd(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_adds_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> SaturatedAdd(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_adds_epi16(a.raw, b.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+HWY_API Vec256<int32_t> SaturatedAdd(Vec256<int32_t> a, Vec256<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec256<int32_t>{_mm256_ternarylogic_epi32(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec256<int32_t> overflow_result{_mm256_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+
+HWY_API Vec256<int64_t> SaturatedAdd(Vec256<int64_t> a, Vec256<int64_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec256<int64_t>{_mm256_ternarylogic_epi64(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec256<int64_t> overflow_result{_mm256_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec256<uint8_t> SaturatedSub(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_subs_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> SaturatedSub(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> SaturatedSub(Vec256<int8_t> a, Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_subs_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> SaturatedSub(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_subs_epi16(a.raw, b.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+HWY_API Vec256<int32_t> SaturatedSub(Vec256<int32_t> a, Vec256<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec256<int32_t>{_mm256_ternarylogic_epi32(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec256<int32_t> overflow_result{_mm256_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+
+HWY_API Vec256<int64_t> SaturatedSub(Vec256<int64_t> a, Vec256<int64_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec256<int64_t>{_mm256_ternarylogic_epi64(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec256<int64_t> overflow_result{_mm256_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+HWY_API Vec256<uint8_t> AverageRound(Vec256<uint8_t> a, Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_avg_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> AverageRound(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Abs (Sub)
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+HWY_API Vec256<int8_t> Abs(Vec256<int8_t> v) {
+#if HWY_COMPILER_MSVC
+  // Workaround for incorrect codegen? (wrong result)
+  const DFromV<decltype(v)> d;
+  const auto zero = Zero(d);
+  return Vec256<int8_t>{_mm256_max_epi8(v.raw, (zero - v).raw)};
+#else
+  return Vec256<int8_t>{_mm256_abs_epi8(v.raw)};
+#endif
+}
+HWY_API Vec256<int16_t> Abs(const Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_abs_epi16(v.raw)};
+}
+HWY_API Vec256<int32_t> Abs(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_abs_epi32(v.raw)};
+}
+// i64 is implemented after BroadcastSignBit.
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec256<T> Abs(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  using TI = TFromD<decltype(di)>;
+  return v & BitCast(d, Set(di, static_cast<TI>(~SignMask<TI>())));
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+HWY_API Vec256<uint16_t> operator*(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator*(Vec256<uint32_t> a, Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_mullo_epi32(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int16_t> operator*(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator*(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_mullo_epi32(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec256<uint16_t> MulHigh(Vec256<uint16_t> a, Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_mulhi_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> MulHigh(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_mulhi_epi16(a.raw, b.raw)};
+}
+
+HWY_API Vec256<int16_t> MulFixedPoint15(Vec256<int16_t> a, Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_mulhrs_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec256<int64_t> MulEven(Vec256<int32_t> a, Vec256<int32_t> b) {
+  return Vec256<int64_t>{_mm256_mul_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> MulEven(Vec256<uint32_t> a, Vec256<uint32_t> b) {
+  return Vec256<uint64_t>{_mm256_mul_epu32(a.raw, b.raw)};
+}
+
+// ------------------------------ ShiftLeft
+
+template <int kBits>
+HWY_API Vec256<uint16_t> ShiftLeft(Vec256<uint16_t> v) {
+  return Vec256<uint16_t>{_mm256_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint32_t> ShiftLeft(Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint64_t> ShiftLeft(Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int16_t> ShiftLeft(Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int32_t> ShiftLeft(Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int64_t> ShiftLeft(Vec256<int64_t> v) {
+  return Vec256<int64_t>{_mm256_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> ShiftLeft(const Vec256<T> v) {
+  const Full256<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeft<kBits>(BitCast(d16, v)));
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits>
+HWY_API Vec256<uint16_t> ShiftRight(Vec256<uint16_t> v) {
+  return Vec256<uint16_t>{_mm256_srli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint32_t> ShiftRight(Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_srli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint64_t> ShiftRight(Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint8_t> ShiftRight(Vec256<uint8_t> v) {
+  const Full256<uint8_t> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec256<uint8_t> shifted{ShiftRight<kBits>(Vec256<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits>
+HWY_API Vec256<int16_t> ShiftRight(Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_srai_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int32_t> ShiftRight(Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int8_t> ShiftRight(Vec256<int8_t> v) {
+  const Full256<int8_t> di;
+  const Full256<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// i64 is implemented after BroadcastSignBit.
+
+// ------------------------------ RotateRight
+
+template <int kBits, typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API Vec256<T> RotateRight(const Vec256<T> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  // AVX3 does not support 8/16-bit.
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+template <int kBits>
+HWY_API Vec256<uint32_t> RotateRight(const Vec256<uint32_t> v) {
+  static_assert(0 <= kBits && kBits < 32, "Invalid shift count");
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<uint32_t>{_mm256_ror_epi32(v.raw, kBits)};
+#else
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v), ShiftLeft<HWY_MIN(31, 32 - kBits)>(v));
+#endif
+}
+
+template <int kBits>
+HWY_API Vec256<uint64_t> RotateRight(const Vec256<uint64_t> v) {
+  static_assert(0 <= kBits && kBits < 64, "Invalid shift count");
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<uint64_t>{_mm256_ror_epi64(v.raw, kBits)};
+#else
+  if (kBits == 0) return v;
+  return Or(ShiftRight<kBits>(v), ShiftLeft<HWY_MIN(63, 64 - kBits)>(v));
+#endif
+}
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+HWY_API Vec256<int8_t> BroadcastSignBit(const Vec256<int8_t> v) {
+  const DFromV<decltype(v)> d;
+  return VecFromMask(v < Zero(d));
+}
+
+HWY_API Vec256<int16_t> BroadcastSignBit(const Vec256<int16_t> v) {
+  return ShiftRight<15>(v);
+}
+
+HWY_API Vec256<int32_t> BroadcastSignBit(const Vec256<int32_t> v) {
+  return ShiftRight<31>(v);
+}
+
+HWY_API Vec256<int64_t> BroadcastSignBit(const Vec256<int64_t> v) {
+#if HWY_TARGET == HWY_AVX2
+  const DFromV<decltype(v)> d;
+  return VecFromMask(v < Zero(d));
+#else
+  return Vec256<int64_t>{_mm256_srai_epi64(v.raw, 63)};
+#endif
+}
+
+template <int kBits>
+HWY_API Vec256<int64_t> ShiftRight(const Vec256<int64_t> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int64_t>{
+      _mm256_srai_epi64(v.raw, static_cast<Shift64Count>(kBits))};
+#else
+  const Full256<int64_t> di;
+  const Full256<uint64_t> du;
+  const auto right = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto sign = ShiftLeft<64 - kBits>(BroadcastSignBit(v));
+  return right | sign;
+#endif
+}
+
+HWY_API Vec256<int64_t> Abs(const Vec256<int64_t> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int64_t>{_mm256_abs_epi64(v.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  const auto zero = Zero(d);
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+// ------------------------------ IfNegativeThenElse (BroadcastSignBit)
+HWY_API Vec256<int8_t> IfNegativeThenElse(Vec256<int8_t> v, Vec256<int8_t> yes,
+                                          Vec256<int8_t> no) {
+  // int8: AVX2 IfThenElse only looks at the MSB.
+  return IfThenElse(MaskFromVec(v), yes, no);
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec256<T> IfNegativeThenElse(Vec256<T> v, Vec256<T> yes, Vec256<T> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+
+#if HWY_TARGET <= HWY_AVX3
+  const auto mask = MaskFromVec(v);
+#else
+  // 16-bit: no native blendv on AVX2, so copy sign to lower byte's MSB.
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto mask = MaskFromVec(BitCast(d, BroadcastSignBit(BitCast(di, v))));
+#endif
+
+  return IfThenElse(mask, yes, no);
+}
+
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 8))>
+HWY_API Vec256<T> IfNegativeThenElse(Vec256<T> v, Vec256<T> yes, Vec256<T> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+
+#if HWY_TARGET <= HWY_AVX3
+  // No need to cast to float on AVX3 as IfThenElse only looks at the MSB on
+  // AVX3
+  return IfThenElse(MaskFromVec(v), yes, no);
+#else
+  const DFromV<decltype(v)> d;
+  const RebindToFloat<decltype(d)> df;
+  // 32/64-bit: use float IfThenElse, which only looks at the MSB.
+  const MFromD<decltype(df)> msb = MaskFromVec(BitCast(df, v));
+  return BitCast(d, IfThenElse(msb, BitCast(df, yes), BitCast(df, no)));
+#endif
+}
+
+// ------------------------------ ShiftLeftSame
+
+HWY_API Vec256<uint16_t> ShiftLeftSame(const Vec256<uint16_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint16_t>{_mm256_slli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint16_t>{_mm256_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint32_t> ShiftLeftSame(const Vec256<uint32_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint32_t>{_mm256_slli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint32_t>{_mm256_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint64_t> ShiftLeftSame(const Vec256<uint64_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint64_t>{_mm256_slli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint64_t>{_mm256_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int16_t> ShiftLeftSame(const Vec256<int16_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int16_t>{_mm256_slli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec256<int16_t>{_mm256_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int32_t> ShiftLeftSame(const Vec256<int32_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int32_t>{_mm256_slli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec256<int32_t>{_mm256_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int64_t> ShiftLeftSame(const Vec256<int64_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int64_t>{_mm256_slli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec256<int64_t>{_mm256_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> ShiftLeftSame(const Vec256<T> v, const int bits) {
+  const Full256<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeftSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, static_cast<T>((0xFF << bits) & 0xFF));
+}
+
+// ------------------------------ ShiftRightSame (BroadcastSignBit)
+
+HWY_API Vec256<uint16_t> ShiftRightSame(const Vec256<uint16_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint16_t>{_mm256_srli_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint16_t>{_mm256_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint32_t> ShiftRightSame(const Vec256<uint32_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint32_t>{_mm256_srli_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint32_t>{_mm256_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint64_t> ShiftRightSame(const Vec256<uint64_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<uint64_t>{_mm256_srli_epi64(v.raw, bits)};
+  }
+#endif
+  return Vec256<uint64_t>{_mm256_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<uint8_t> ShiftRightSame(Vec256<uint8_t> v, const int bits) {
+  const Full256<uint8_t> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftRightSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, static_cast<uint8_t>(0xFF >> bits));
+}
+
+HWY_API Vec256<int16_t> ShiftRightSame(const Vec256<int16_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int16_t>{_mm256_srai_epi16(v.raw, bits)};
+  }
+#endif
+  return Vec256<int16_t>{_mm256_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int32_t> ShiftRightSame(const Vec256<int32_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int32_t>{_mm256_srai_epi32(v.raw, bits)};
+  }
+#endif
+  return Vec256<int32_t>{_mm256_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<int64_t> ShiftRightSame(const Vec256<int64_t> v,
+                                       const int bits) {
+#if HWY_TARGET <= HWY_AVX3
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec256<int64_t>{
+        _mm256_srai_epi64(v.raw, static_cast<Shift64Count>(bits))};
+  }
+#endif
+  return Vec256<int64_t>{_mm256_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+#else
+  const Full256<int64_t> di;
+  const Full256<uint64_t> du;
+  const auto right = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto sign = ShiftLeftSame(BroadcastSignBit(v), 64 - bits);
+  return right | sign;
+#endif
+}
+
+HWY_API Vec256<int8_t> ShiftRightSame(Vec256<int8_t> v, const int bits) {
+  const Full256<int8_t> di;
+  const Full256<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign =
+      BitCast(di, Set(du, static_cast<uint8_t>(0x80 >> bits)));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Neg (Xor, Sub)
+
+// Tag dispatch instead of SFINAE for MSVC 2017 compatibility
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec256<T> Neg(hwy::FloatTag /*tag*/, const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  return Xor(v, SignBit(d));
+}
+
+template <typename T>
+HWY_INLINE Vec256<T> Neg(hwy::SpecialTag /*tag*/, const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  return Xor(v, SignBit(d));
+}
+
+// Not floating-point
+template <typename T>
+HWY_INLINE Vec256<T> Neg(hwy::SignedTag /*tag*/, const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  return Zero(d) - v;
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> Neg(const Vec256<T> v) {
+  return detail::Neg(hwy::TypeTag<T>(), v);
+}
+
+// ------------------------------ Floating-point mul / div
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> operator*(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_mul_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> operator*(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator*(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_mul_pd(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> operator/(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_div_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> operator/(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator/(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_div_pd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> ApproximateReciprocal(Vec256<float16_t> v) {
+  return Vec256<float16_t>{_mm256_rcp_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Vec256<float> ApproximateReciprocal(Vec256<float> v) {
+  return Vec256<float>{_mm256_rcp_ps(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+HWY_API Vec256<double> ApproximateReciprocal(Vec256<double> v) {
+  return Vec256<double>{_mm256_rcp14_pd(v.raw)};
+}
+#endif
+
+// ------------------------------ Floating-point multiply-add variants
+
+#if HWY_HAVE_FLOAT16
+
+HWY_API Vec256<float16_t> MulAdd(Vec256<float16_t> mul, Vec256<float16_t> x,
+                                 Vec256<float16_t> add) {
+  return Vec256<float16_t>{_mm256_fmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+HWY_API Vec256<float16_t> NegMulAdd(Vec256<float16_t> mul, Vec256<float16_t> x,
+                                    Vec256<float16_t> add) {
+  return Vec256<float16_t>{_mm256_fnmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+HWY_API Vec256<float16_t> MulSub(Vec256<float16_t> mul, Vec256<float16_t> x,
+                                 Vec256<float16_t> sub) {
+  return Vec256<float16_t>{_mm256_fmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+HWY_API Vec256<float16_t> NegMulSub(Vec256<float16_t> mul, Vec256<float16_t> x,
+                                    Vec256<float16_t> sub) {
+  return Vec256<float16_t>{_mm256_fnmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Vec256<float> MulAdd(Vec256<float> mul, Vec256<float> x,
+                             Vec256<float> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x + add;
+#else
+  return Vec256<float>{_mm256_fmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+HWY_API Vec256<double> MulAdd(Vec256<double> mul, Vec256<double> x,
+                              Vec256<double> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x + add;
+#else
+  return Vec256<double>{_mm256_fmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+HWY_API Vec256<float> NegMulAdd(Vec256<float> mul, Vec256<float> x,
+                                Vec256<float> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return add - mul * x;
+#else
+  return Vec256<float>{_mm256_fnmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+HWY_API Vec256<double> NegMulAdd(Vec256<double> mul, Vec256<double> x,
+                                 Vec256<double> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return add - mul * x;
+#else
+  return Vec256<double>{_mm256_fnmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+HWY_API Vec256<float> MulSub(Vec256<float> mul, Vec256<float> x,
+                             Vec256<float> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x - sub;
+#else
+  return Vec256<float>{_mm256_fmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+HWY_API Vec256<double> MulSub(Vec256<double> mul, Vec256<double> x,
+                              Vec256<double> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x - sub;
+#else
+  return Vec256<double>{_mm256_fmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+HWY_API Vec256<float> NegMulSub(Vec256<float> mul, Vec256<float> x,
+                                Vec256<float> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return Neg(mul * x) - sub;
+#else
+  return Vec256<float>{_mm256_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+HWY_API Vec256<double> NegMulSub(Vec256<double> mul, Vec256<double> x,
+                                 Vec256<double> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return Neg(mul * x) - sub;
+#else
+  return Vec256<double>{_mm256_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Sqrt(Vec256<float16_t> v) {
+  return Vec256<float16_t>{_mm256_sqrt_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Sqrt(Vec256<float> v) {
+  return Vec256<float>{_mm256_sqrt_ps(v.raw)};
+}
+HWY_API Vec256<double> Sqrt(Vec256<double> v) {
+  return Vec256<double>{_mm256_sqrt_pd(v.raw)};
+}
+
+// Approximate reciprocal square root
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> ApproximateReciprocalSqrt(Vec256<float16_t> v) {
+  return Vec256<float16_t>{_mm256_rsqrt_ph(v.raw)};
+}
+#endif
+HWY_API Vec256<float> ApproximateReciprocalSqrt(Vec256<float> v) {
+  return Vec256<float>{_mm256_rsqrt_ps(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+HWY_API Vec256<double> ApproximateReciprocalSqrt(Vec256<double> v) {
+#if HWY_COMPILER_MSVC
+  const DFromV<decltype(v)> d;
+  return Vec256<double>{_mm256_mask_rsqrt14_pd(
+      Undefined(d).raw, static_cast<__mmask8>(0xFF), v.raw)};
+#else
+  return Vec256<double>{_mm256_rsqrt14_pd(v.raw)};
+#endif
+}
+#endif
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, tie to even
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Round(Vec256<float16_t> v) {
+  return Vec256<float16_t>{_mm256_roundscale_ph(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Round(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Round(Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Trunc(Vec256<float16_t> v) {
+  return Vec256<float16_t>{
+      _mm256_roundscale_ph(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Trunc(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Trunc(Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Ceil(Vec256<float16_t> v) {
+  return Vec256<float16_t>{
+      _mm256_roundscale_ph(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Ceil(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Ceil(Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> Floor(Vec256<float16_t> v) {
+  return Vec256<float16_t>{
+      _mm256_roundscale_ph(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> Floor(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Floor(Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+// ------------------------------ Floating-point classification
+
+#if HWY_HAVE_FLOAT16 || HWY_IDE
+
+HWY_API Mask256<float16_t> IsNaN(Vec256<float16_t> v) {
+  return Mask256<float16_t>{_mm256_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+}
+
+HWY_API Mask256<float16_t> IsInf(Vec256<float16_t> v) {
+  return Mask256<float16_t>{_mm256_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+
+HWY_API Mask256<float16_t> IsFinite(Vec256<float16_t> v) {
+  // fpclass doesn't have a flag for positive, so we have to check for inf/NaN
+  // and negate the mask.
+  return Not(Mask256<float16_t>{_mm256_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask256<float> IsNaN(Vec256<float> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Mask256<float>{_mm256_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+#else
+  return Mask256<float>{_mm256_cmp_ps(v.raw, v.raw, _CMP_UNORD_Q)};
+#endif
+}
+HWY_API Mask256<double> IsNaN(Vec256<double> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return Mask256<double>{_mm256_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+#else
+  return Mask256<double>{_mm256_cmp_pd(v.raw, v.raw, _CMP_UNORD_Q)};
+#endif
+}
+
+#if HWY_TARGET <= HWY_AVX3
+
+HWY_API Mask256<float> IsInf(Vec256<float> v) {
+  return Mask256<float>{_mm256_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+HWY_API Mask256<double> IsInf(Vec256<double> v) {
+  return Mask256<double>{_mm256_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+
+HWY_API Mask256<float> IsFinite(Vec256<float> v) {
+  // fpclass doesn't have a flag for positive, so we have to check for inf/NaN
+  // and negate the mask.
+  return Not(Mask256<float>{_mm256_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+HWY_API Mask256<double> IsFinite(Vec256<double> v) {
+  return Not(Mask256<double>{_mm256_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+#else
+
+template <typename T>
+HWY_API Mask256<T> IsInf(const Vec256<T> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  const VFromD<decltype(di)> vi = BitCast(di, v);
+  // 'Shift left' to clear the sign bit, check for exponent=max and mantissa=0.
+  return RebindMask(d, Eq(Add(vi, vi), Set(di, hwy::MaxExponentTimes2<T>())));
+}
+
+// Returns whether normal/subnormal/zero.
+template <typename T>
+HWY_API Mask256<T> IsFinite(const Vec256<T> v) {
+  static_assert(IsFloat<T>(), "Only for float");
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const RebindToSigned<decltype(d)> di;  // cheaper than unsigned comparison
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  // Shift left to clear the sign bit, then right so we can compare with the
+  // max exponent (cannot compare with MaxExponentTimes2 directly because it is
+  // negative and non-negative floats would be greater). MSVC seems to generate
+  // incorrect code if we instead add vu + vu.
+  const VFromD<decltype(di)> exp =
+      BitCast(di, ShiftRight<hwy::MantissaBits<T>() + 1>(ShiftLeft<1>(vu)));
+  return RebindMask(d, Lt(exp, Set(di, hwy::MaxExponentField<T>())));
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> Load(D /* tag */, const TFromD<D>* HWY_RESTRICT aligned) {
+  return VFromD<D>{
+      _mm256_load_si256(reinterpret_cast<const __m256i*>(aligned))};
+}
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> Load(D d, const float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec256<float16_t>{_mm256_load_ph(aligned)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Load(du, reinterpret_cast<const uint16_t*>(aligned)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> Load(D /* tag */, const float* HWY_RESTRICT aligned) {
+  return Vec256<float>{_mm256_load_ps(aligned)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> Load(D /* tag */, const double* HWY_RESTRICT aligned) {
+  return Vec256<double>{_mm256_load_pd(aligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_loadu_si256(reinterpret_cast<const __m256i*>(p))};
+}
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> LoadU(D d, const float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec256<float16_t>{_mm256_loadu_ph(p)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, LoadU(du, reinterpret_cast<const uint16_t*>(p)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> LoadU(D /* tag */, const float* HWY_RESTRICT p) {
+  return Vec256<float>{_mm256_loadu_ps(p)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> LoadU(D /* tag */, const double* HWY_RESTRICT p) {
+  return Vec256<double>{_mm256_loadu_pd(p)};
+}
+
+// ------------------------------ MaskedLoad
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_maskz_loadu_epi8(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{_mm256_maskz_loadu_epi16(m.raw, p)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_maskz_loadu_epi32(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_maskz_loadu_epi64(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> MaskedLoad(Mask256<float> m, D /* tag */,
+                                 const float* HWY_RESTRICT p) {
+  return Vec256<float>{_mm256_maskz_loadu_ps(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> MaskedLoad(Mask256<double> m, D /* tag */,
+                                  const double* HWY_RESTRICT p) {
+  return Vec256<double>{_mm256_maskz_loadu_pd(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_mask_loadu_epi8(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm256_mask_loadu_epi16(v.raw, m.raw, p)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_mask_loadu_epi32(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm256_mask_loadu_epi64(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> MaskedLoadOr(VFromD<D> v, Mask256<float> m, D /* tag */,
+                                   const float* HWY_RESTRICT p) {
+  return Vec256<float>{_mm256_mask_loadu_ps(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> MaskedLoadOr(VFromD<D> v, Mask256<double> m, D /* tag */,
+                                    const double* HWY_RESTRICT p) {
+  return Vec256<double>{_mm256_mask_loadu_pd(v.raw, m.raw, p)};
+}
+
+#else  //  AVX2
+
+// There is no maskload_epi8/16, so blend instead.
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return IfThenElseZero(m, LoadU(d, p));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<const int*>(p);  // NOLINT
+  return VFromD<D>{_mm256_maskload_epi32(pi, m.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<const long long*>(p);  // NOLINT
+  return VFromD<D>{_mm256_maskload_epi64(pi, m.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> MaskedLoad(Mask256<float> m, D d,
+                                 const float* HWY_RESTRICT p) {
+  const Vec256<int32_t> mi =
+      BitCast(RebindToSigned<decltype(d)>(), VecFromMask(d, m));
+  return Vec256<float>{_mm256_maskload_ps(p, mi.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> MaskedLoad(Mask256<double> m, D d,
+                                  const double* HWY_RESTRICT p) {
+  const Vec256<int64_t> mi =
+      BitCast(RebindToSigned<decltype(d)>(), VecFromMask(d, m));
+  return Vec256<double>{_mm256_maskload_pd(p, mi.raw)};
+}
+
+#endif
+
+// ------------------------------ LoadDup128
+
+// Loads 128 bit and duplicates into both 128-bit halves. This avoids the
+// 3-cycle cost of moving data between 128-bit halves and avoids port 5.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> LoadDup128(D /* tag */, const TFromD<D>* HWY_RESTRICT p) {
+  const Full128<TFromD<D>> d128;
+#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1931
+  // Workaround for incorrect results with _mm256_broadcastsi128_si256. Note
+  // that MSVC also lacks _mm256_zextsi128_si256, but cast (which leaves the
+  // upper half undefined) is fine because we're overwriting that anyway.
+  // This workaround seems in turn to generate incorrect code in MSVC 2022
+  // (19.31), so use broadcastsi128 there.
+  const __m128i v128 = LoadU(d128, p).raw;
+  return VFromD<D>{
+      _mm256_inserti128_si256(_mm256_castsi128_si256(v128), v128, 1)};
+#else
+  // The preferred path. This is perhaps surprising, because vbroadcasti128
+  // with xmm input has 7 cycle latency on Intel, but Clang >= 7 is able to
+  // pattern-match this to vbroadcastf128 with a memory operand as desired.
+  return VFromD<D>{_mm256_broadcastsi128_si256(LoadU(d128, p).raw)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> LoadDup128(D /* tag */, const float* HWY_RESTRICT p) {
+#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1931
+  const Full128<float> d128;
+  const __m128 v128 = LoadU(d128, p).raw;
+  return Vec256<float>{
+      _mm256_insertf128_ps(_mm256_castps128_ps256(v128), v128, 1)};
+#else
+  return Vec256<float>{_mm256_broadcast_ps(reinterpret_cast<const __m128*>(p))};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> LoadDup128(D /* tag */, const double* HWY_RESTRICT p) {
+#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1931
+  const Full128<double> d128;
+  const __m128d v128 = LoadU(d128, p).raw;
+  return Vec256<double>{
+      _mm256_insertf128_pd(_mm256_castpd128_pd256(v128), v128, 1)};
+#else
+  return Vec256<double>{
+      _mm256_broadcast_pd(reinterpret_cast<const __m128d*>(p))};
+#endif
+}
+
+// ------------------------------ Store
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void Store(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT aligned) {
+  _mm256_store_si256(reinterpret_cast<__m256i*>(aligned), v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API void Store(Vec256<float16_t> v, D d, float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  _mm256_store_ph(aligned, v.raw);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  Store(BitCast(du, v), du, reinterpret_cast<uint16_t*>(aligned));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void Store(Vec256<float> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm256_store_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void Store(Vec256<double> v, D /* tag */,
+                   double* HWY_RESTRICT aligned) {
+  _mm256_store_pd(aligned, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void StoreU(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT p) {
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(p), v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API void StoreU(Vec256<float16_t> v, D d, float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  _mm256_storeu_ph(p, v.raw);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  StoreU(BitCast(du, v), du, reinterpret_cast<uint16_t*>(p));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec256<float> v, D /* tag */, float* HWY_RESTRICT p) {
+  _mm256_storeu_ps(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void StoreU(Vec256<double> v, D /* tag */, double* HWY_RESTRICT p) {
+  _mm256_storeu_pd(p, v.raw);
+}
+
+// ------------------------------ BlendedStore
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm256_mask_storeu_epi8(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm256_mask_storeu_epi16(reinterpret_cast<uint16_t*>(p),
+                           RebindMask(du, m).raw, BitCast(du, v).raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm256_mask_storeu_epi32(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm256_mask_storeu_epi64(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void BlendedStore(Vec256<float> v, Mask256<float> m, D /* tag */,
+                          float* HWY_RESTRICT p) {
+  _mm256_mask_storeu_ps(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void BlendedStore(Vec256<double> v, Mask256<double> m, D /* tag */,
+                          double* HWY_RESTRICT p) {
+  _mm256_mask_storeu_pd(p, m.raw, v.raw);
+}
+
+#else  //  AVX2
+
+// Intel SDM says "No AC# reported for any mask bit combinations". However, AMD
+// allows AC# if "Alignment checking enabled and: 256-bit memory operand not
+// 32-byte aligned". Fortunately AC# is not enabled by default and requires both
+// OS support (CR0) and the application to set rflags.AC. We assume these remain
+// disabled because x86/x64 code and compiler output often contain misaligned
+// scalar accesses, which would also fault.
+//
+// Caveat: these are slow on AMD Jaguar/Bulldozer.
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  // There is no maskload_epi8/16. Blending is also unsafe because loading a
+  // full vector that crosses the array end causes asan faults. Resort to scalar
+  // code; the caller should instead use memcpy, assuming m is FirstN(d, n).
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  alignas(32) TU buf[MaxLanes(d)];
+  alignas(32) TU mask[MaxLanes(d)];
+  Store(BitCast(du, v), du, buf);
+  Store(BitCast(du, VecFromMask(d, m)), du, mask);
+  for (size_t i = 0; i < MaxLanes(d); ++i) {
+    if (mask[i]) {
+      CopySameSize(buf + i, p + i);
+    }
+  }
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<int*>(p);  // NOLINT
+  _mm256_maskstore_epi32(pi, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  auto pi = reinterpret_cast<long long*>(p);  // NOLINT
+  _mm256_maskstore_epi64(pi, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void BlendedStore(Vec256<float> v, Mask256<float> m, D d,
+                          float* HWY_RESTRICT p) {
+  const Vec256<int32_t> mi =
+      BitCast(RebindToSigned<decltype(d)>(), VecFromMask(d, m));
+  _mm256_maskstore_ps(p, mi.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void BlendedStore(Vec256<double> v, Mask256<double> m, D d,
+                          double* HWY_RESTRICT p) {
+  const Vec256<int64_t> mi =
+      BitCast(RebindToSigned<decltype(d)>(), VecFromMask(d, m));
+  _mm256_maskstore_pd(p, mi.raw, v.raw);
+}
+
+#endif
+
+// ------------------------------ Non-temporal stores
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API void Stream(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm256_stream_si256(reinterpret_cast<__m256i*>(aligned), BitCast(du, v).raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void Stream(Vec256<float> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm256_stream_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void Stream(Vec256<double> v, D /* tag */,
+                    double* HWY_RESTRICT aligned) {
+  _mm256_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ ScatterOffset
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+#if HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */,
+                           TFromD<D>* HWY_RESTRICT base,
+                           Vec256<int32_t> offset) {
+  _mm256_i32scatter_epi32(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */,
+                           TFromD<D>* HWY_RESTRICT base,
+                           Vec256<int64_t> offset) {
+  _mm256_i64scatter_epi64(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */, float* HWY_RESTRICT base,
+                           const Vec256<int32_t> offset) {
+  _mm256_i32scatter_ps(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */, double* HWY_RESTRICT base,
+                           const Vec256<int64_t> offset) {
+  _mm256_i64scatter_pd(base, offset.raw, v.raw, 1);
+}
+
+// ------------------------------ ScatterIndex
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm256_i32scatter_epi32(base, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm256_i64scatter_epi64(base, index.raw, v.raw, 8);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */, float* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm256_i32scatter_ps(base, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */, double* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm256_i64scatter_pd(base, index.raw, v.raw, 8);
+}
+
+// ------------------------------ MaskedScatterIndex
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                TFromD<D>* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm256_mask_i32scatter_epi32(base, m.raw, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                TFromD<D>* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm256_mask_i64scatter_epi64(base, m.raw, index.raw, v.raw, 8);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                float* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm256_mask_i32scatter_ps(base, m.raw, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                double* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm256_mask_i64scatter_pd(base, m.raw, index.raw, v.raw, 8);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Gather
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> GatherOffset(D /* tag */,
+                                  const TFromD<D>* HWY_RESTRICT base,
+                                  Vec256<int32_t> offset) {
+  return VFromD<D>{_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), offset.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> GatherIndex(D /* tag */,
+                                 const TFromD<D>* HWY_RESTRICT base,
+                                 Vec256<int32_t> index) {
+  return VFromD<D>{_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), index.raw, 4)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> GatherOffset(D /* tag */,
+                                  const TFromD<D>* HWY_RESTRICT base,
+                                  Vec256<int64_t> offset) {
+  return VFromD<D>{_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), offset.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> GatherIndex(D /* tag */,
+                                 const TFromD<D>* HWY_RESTRICT base,
+                                 Vec256<int64_t> index) {
+  return VFromD<D>{_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, 8)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> GatherOffset(D /* tag */, const float* HWY_RESTRICT base,
+                                   Vec256<int32_t> offset) {
+  return Vec256<float>{_mm256_i32gather_ps(base, offset.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> GatherIndex(D /* tag */, const float* HWY_RESTRICT base,
+                                  Vec256<int32_t> index) {
+  return Vec256<float>{_mm256_i32gather_ps(base, index.raw, 4)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> GatherOffset(D /* tag */,
+                                    const double* HWY_RESTRICT base,
+                                    Vec256<int64_t> offset) {
+  return Vec256<double>{_mm256_i64gather_pd(base, offset.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> GatherIndex(D /* tag */, const double* HWY_RESTRICT base,
+                                   Vec256<int64_t> index) {
+  return Vec256<double>{_mm256_i64gather_pd(base, index.raw, 8)};
+}
+
+// ------------------------------ MaskedGatherIndex
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> MaskedGatherIndex(MFromD<D> m, D d,
+                                       const TFromD<D>* HWY_RESTRICT base,
+                                       Vec256<int32_t> index) {
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{
+      _mm256_mmask_i32gather_epi32(Zero(d).raw, m.raw, index.raw,
+                                   reinterpret_cast<const int32_t*>(base), 4)};
+#else
+  return VFromD<D>{_mm256_mask_i32gather_epi32(
+      Zero(d).raw, reinterpret_cast<const int32_t*>(base), index.raw, m.raw,
+      4)};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> MaskedGatherIndex(MFromD<D> m, D d,
+                                       const TFromD<D>* HWY_RESTRICT base,
+                                       Vec256<int64_t> index) {
+#if HWY_TARGET <= HWY_AVX3
+  return VFromD<D>{_mm256_mmask_i64gather_epi64(
+      Zero(d).raw, m.raw, index.raw,
+      reinterpret_cast<const GatherIndex64*>(base), 8)};
+#else
+  // For reasons unknown, _mm256_mask_i64gather_epi64 returns all-zeros.
+  const RebindToFloat<D> df;
+  return BitCast(d, Vec256<double>{_mm256_mask_i64gather_pd(
+                        Zero(df).raw, reinterpret_cast<const double*>(base),
+                        index.raw, RebindMask(df, m).raw, 8)});
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> MaskedGatherIndex(MFromD<D> m, D d,
+                                        const float* HWY_RESTRICT base,
+                                        Vec256<int32_t> index) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<float>{
+      _mm256_mmask_i32gather_ps(Zero(d).raw, m.raw, index.raw, base, 4)};
+#else
+  return Vec256<float>{
+      _mm256_mask_i32gather_ps(Zero(d).raw, base, index.raw, m.raw, 4)};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> MaskedGatherIndex(MFromD<D> m, D d,
+                                         const double* HWY_RESTRICT base,
+                                         Vec256<int64_t> index) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<double>{
+      _mm256_mmask_i64gather_pd(Zero(d).raw, m.raw, index.raw, base, 8)};
+#else
+  return Vec256<double>{
+      _mm256_mask_i64gather_pd(Zero(d).raw, base, index.raw, m.raw, 8)};
+#endif
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE
+
+// ------------------------------ LowerHalf
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{_mm256_castsi256_si128(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_BF16_D(D)>
+HWY_API Vec128<bfloat16_t> LowerHalf(D /* tag */, Vec256<bfloat16_t> v) {
+  return Vec128<bfloat16_t>{_mm256_castsi256_si128(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API Vec128<float16_t> LowerHalf(D /* tag */, Vec256<float16_t> v) {
+#if HWY_HAVE_FLOAT16
+  return Vec128<float16_t>{_mm256_castph256_ph128(v.raw)};
+#else
+  return Vec128<float16_t>{_mm256_castsi256_si128(v.raw)};
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API Vec128<float> LowerHalf(D /* tag */, Vec256<float> v) {
+  return Vec128<float>{_mm256_castps256_ps128(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API Vec128<double> LowerHalf(D /* tag */, Vec256<double> v) {
+  return Vec128<double>{_mm256_castpd256_pd128(v.raw)};
+}
+
+template <typename T>
+HWY_API Vec128<T> LowerHalf(Vec256<T> v) {
+  const Full128<T> dh;
+  return LowerHalf(dh, v);
+}
+
+// ------------------------------ UpperHalf
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  const Twice<decltype(d)> dut;
+  return BitCast(d, VFromD<decltype(du)>{
+                        _mm256_extracti128_si256(BitCast(dut, v).raw, 1)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> UpperHalf(D /* tag */, Vec256<float> v) {
+  return VFromD<D>{_mm256_extractf128_ps(v.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> UpperHalf(D /* tag */, Vec256<double> v) {
+  return VFromD<D>{_mm256_extractf128_pd(v.raw, 1)};
+}
+
+// ------------------------------ ExtractLane (Store)
+template <typename T>
+HWY_API T ExtractLane(const Vec256<T> v, size_t i) {
+  const DFromV<decltype(v)> d;
+  HWY_DASSERT(i < Lanes(d));
+
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+  if (__builtin_constant_p(i < kLanesPerBlock) && (i < kLanesPerBlock)) {
+    return ExtractLane(LowerHalf(Half<decltype(d)>(), v), i);
+  }
+#endif
+
+  alignas(32) T lanes[32 / sizeof(T)];
+  Store(v, d, lanes);
+  return lanes[i];
+}
+
+// ------------------------------ InsertLane (Store)
+template <typename T>
+HWY_API Vec256<T> InsertLane(const Vec256<T> v, size_t i, T t) {
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+// ------------------------------ GetLane (LowerHalf)
+template <typename T>
+HWY_API T GetLane(const Vec256<T> v) {
+  return GetLane(LowerHalf(v));
+}
+
+// ------------------------------ ExtractBlock (LowerHalf, UpperHalf)
+
+template <int kBlockIdx, class T>
+HWY_API Vec128<T> ExtractBlock(Vec256<T> v) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+  const Half<DFromV<decltype(v)>> dh;
+  return (kBlockIdx == 0) ? LowerHalf(dh, v) : UpperHalf(dh, v);
+}
+
+// ------------------------------ ZeroExtendVector
+
+// Unfortunately the initial _mm256_castsi128_si256 intrinsic leaves the upper
+// bits undefined. Although it makes sense for them to be zero (VEX encoded
+// 128-bit instructions zero the upper lanes to avoid large penalties), a
+// compiler could decide to optimize out code that relies on this.
+//
+// The newer _mm256_zextsi128_si256 intrinsic fixes this by specifying the
+// zeroing, but it is not available on MSVC until 15.7 nor GCC until 10.1. For
+// older GCC, we can still obtain the desired code thanks to pattern
+// recognition; note that the expensive insert instruction is not actually
+// generated, see https://gcc.godbolt.org/z/1MKGaP.
+
+#if !defined(HWY_HAVE_ZEXT)
+#if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC >= 1915) ||  \
+    (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG >= 500) || \
+    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL >= 1000)
+#define HWY_HAVE_ZEXT 1
+#else
+#define HWY_HAVE_ZEXT 0
+#endif
+#endif  // defined(HWY_HAVE_ZEXT)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> ZeroExtendVector(D /* tag */, VFromD<Half<D>> lo) {
+#if HWY_HAVE_ZEXT
+  return VFromD<D>{_mm256_zextsi128_si256(lo.raw)};
+#else
+  return VFromD<D>{_mm256_inserti128_si256(_mm256_setzero_si256(), lo.raw, 0)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API Vec256<bfloat16_t> ZeroExtendVector(D d, Vec128<bfloat16_t> lo) {
+  (void)d;
+#if HWY_HAVE_ZEXT
+  return VFromD<D>{_mm256_zextsi128_si256(lo.raw)};
+#else
+  return VFromD<D>{_mm256_inserti128_si256(_mm256_setzero_si256(), lo.raw, 0)};
+#endif  // HWY_HAVE_ZEXT
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API Vec256<float16_t> ZeroExtendVector(D d, Vec128<float16_t> lo) {
+#if HWY_HAVE_FLOAT16
+#if HWY_HAVE_ZEXT
+  (void)d;
+  return Vec256<float16_t>{_mm256_zextph128_ph256(lo.raw)};
+#else
+  const RebindToUnsigned<D> du;
+  return BitCast(d, ZeroExtendVector(du, BitCast(du, lo)));
+#endif  // HWY_HAVE_ZEXT
+#else
+  (void)d;
+#if HWY_HAVE_ZEXT
+  return VFromD<D>{_mm256_zextsi128_si256(lo.raw)};
+#else
+  return VFromD<D>{_mm256_inserti128_si256(_mm256_setzero_si256(), lo.raw, 0)};
+#endif  // HWY_HAVE_ZEXT
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> ZeroExtendVector(D /* tag */, Vec128<float> lo) {
+#if HWY_HAVE_ZEXT
+  return Vec256<float>{_mm256_zextps128_ps256(lo.raw)};
+#else
+  return Vec256<float>{_mm256_insertf128_ps(_mm256_setzero_ps(), lo.raw, 0)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ZeroExtendVector(D /* tag */, Vec128<double> lo) {
+#if HWY_HAVE_ZEXT
+  return Vec256<double>{_mm256_zextpd128_pd256(lo.raw)};
+#else
+  return Vec256<double>{_mm256_insertf128_pd(_mm256_setzero_pd(), lo.raw, 0)};
+#endif
+}
+
+// ------------------------------ ZeroExtendResizeBitCast
+
+namespace detail {
+
+template <class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<8> /* from_size_tag */, hwy::SizeTag<32> /* to_size_tag */,
+    DTo d_to, DFrom d_from, VFromD<DFrom> v) {
+  const Twice<decltype(d_from)> dt_from;
+  const Twice<decltype(dt_from)> dq_from;
+  return BitCast(d_to, ZeroExtendVector(dq_from, ZeroExtendVector(dt_from, v)));
+}
+
+}  // namespace detail
+
+// ------------------------------ Combine
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> Combine(D d, VFromD<Half<D>> hi, VFromD<Half<D>> lo) {
+  const auto lo256 = ZeroExtendVector(d, lo);
+  return VFromD<D>{_mm256_inserti128_si256(lo256.raw, hi.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> Combine(D d, Vec128<float> hi, Vec128<float> lo) {
+  const auto lo256 = ZeroExtendVector(d, lo);
+  return Vec256<float>{_mm256_insertf128_ps(lo256.raw, hi.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> Combine(D d, Vec128<double> hi, Vec128<double> lo) {
+  const auto lo256 = ZeroExtendVector(d, lo);
+  return Vec256<double>{_mm256_insertf128_pd(lo256.raw, hi.raw, 1)};
+}
+
+// ------------------------------ ShiftLeftBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ShiftLeftBytes(D /* tag */, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bslli_epi128.
+  return VFromD<D>{_mm256_slli_si256(v.raw, kBytes)};
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ShiftRightBytes(D /* tag */, VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bsrli_epi128.
+  return VFromD<D>{_mm256_srli_si256(v.raw, kBytes)};
+}
+
+// ------------------------------ CombineShiftRightBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Vec256<uint8_t>{_mm256_alignr_epi8(
+                        BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)});
+}
+
+// ------------------------------ Broadcast
+
+template <int kLane, typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec256<T> Broadcast(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m256i lo = _mm256_shufflelo_epi16(vu.raw, (0x55 * kLane) & 0xFF);
+    return BitCast(d, VU{_mm256_unpacklo_epi64(lo, lo)});
+  } else {
+    const __m256i hi =
+        _mm256_shufflehi_epi16(vu.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return BitCast(d, VU{_mm256_unpackhi_epi64(hi, hi)});
+  }
+}
+template <int kLane, typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> Broadcast(const Vec256<T> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec256<T>{_mm256_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+
+template <int kLane, typename T, HWY_IF_UI64(T)>
+HWY_API Vec256<T> Broadcast(const Vec256<T> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec256<T>{_mm256_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+template <int kLane>
+HWY_API Vec256<float> Broadcast(Vec256<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x55 * kLane)};
+}
+
+template <int kLane>
+HWY_API Vec256<double> Broadcast(const Vec256<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec256<double>{_mm256_shuffle_pd(v.raw, v.raw, 15 * kLane)};
+}
+
+// ------------------------------ BroadcastBlock
+
+template <int kBlockIdx, class T>
+HWY_API Vec256<T> BroadcastBlock(Vec256<T> v) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+  const DFromV<decltype(v)> d;
+  return (kBlockIdx == 0) ? ConcatLowerLower(d, v, v)
+                          : ConcatUpperUpper(d, v, v);
+}
+
+// ------------------------------ BroadcastLane
+
+namespace detail {
+
+template <class T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<T>{_mm256_broadcastb_epi8(LowerHalf(dh, v).raw)};
+}
+
+template <class T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<T>{_mm256_broadcastw_epi16(LowerHalf(dh, v).raw)};
+}
+
+template <class T, HWY_IF_UI32(T)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<T>{_mm256_broadcastd_epi32(LowerHalf(dh, v).raw)};
+}
+
+template <class T, HWY_IF_UI64(T)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<T>{_mm256_broadcastq_epi64(LowerHalf(dh, v).raw)};
+}
+
+HWY_INLINE Vec256<float> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                       Vec256<float> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<float>{_mm256_broadcastss_ps(LowerHalf(dh, v).raw)};
+}
+
+HWY_INLINE Vec256<double> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                        Vec256<double> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return Vec256<double>{_mm256_broadcastsd_pd(LowerHalf(dh, v).raw)};
+}
+
+template <size_t kLaneIdx, class T, hwy::EnableIf<kLaneIdx != 0>* = nullptr,
+          HWY_IF_NOT_T_SIZE(T, 8)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<kLaneIdx> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+  constexpr int kBlockIdx = static_cast<int>(kLaneIdx / kLanesPerBlock);
+  constexpr int kLaneInBlkIdx =
+      static_cast<int>(kLaneIdx) & (kLanesPerBlock - 1);
+  return Broadcast<kLaneInBlkIdx>(BroadcastBlock<kBlockIdx>(v));
+}
+
+template <size_t kLaneIdx, class T, hwy::EnableIf<kLaneIdx != 0>* = nullptr,
+          HWY_IF_UI64(T)>
+HWY_INLINE Vec256<T> BroadcastLane(hwy::SizeTag<kLaneIdx> /* lane_idx_tag */,
+                                   Vec256<T> v) {
+  static_assert(kLaneIdx <= 3, "Invalid lane");
+  return Vec256<T>{
+      _mm256_permute4x64_epi64(v.raw, static_cast<int>(0x55 * kLaneIdx))};
+}
+
+template <size_t kLaneIdx, hwy::EnableIf<kLaneIdx != 0>* = nullptr>
+HWY_INLINE Vec256<double> BroadcastLane(
+    hwy::SizeTag<kLaneIdx> /* lane_idx_tag */, Vec256<double> v) {
+  static_assert(kLaneIdx <= 3, "Invalid lane");
+  return Vec256<double>{
+      _mm256_permute4x64_pd(v.raw, static_cast<int>(0x55 * kLaneIdx))};
+}
+
+}  // namespace detail
+
+template <int kLaneIdx, class T>
+HWY_API Vec256<T> BroadcastLane(Vec256<T> v) {
+  static_assert(kLaneIdx >= 0, "Invalid lane");
+  return detail::BroadcastLane(hwy::SizeTag<static_cast<size_t>(kLaneIdx)>(),
+                               v);
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec256<int32_t> have lanes 7,6,5,4,3,2,1,0 (0 is
+// least-significant). Shuffle0321 rotates four-lane blocks one lane to the
+// right (the previous least-significant lane is now most-significant =>
+// 47650321). These could also be implemented via CombineShiftRightBytes but
+// the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> Shuffle2301(const Vec256<T> v) {
+  return Vec256<T>{_mm256_shuffle_epi32(v.raw, 0xB1)};
+}
+HWY_API Vec256<float> Shuffle2301(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0xB1)};
+}
+
+// Used by generic_ops-inl.h
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo2301(const Vec256<T> a, const Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(2, 3, 0, 1);
+  return BitCast(d, Vec256<float>{_mm256_shuffle_ps(BitCast(df, a).raw,
+                                                    BitCast(df, b).raw, m)});
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo1230(const Vec256<T> a, const Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(1, 2, 3, 0);
+  return BitCast(d, Vec256<float>{_mm256_shuffle_ps(BitCast(df, a).raw,
+                                                    BitCast(df, b).raw, m)});
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> ShuffleTwo3012(const Vec256<T> a, const Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  constexpr int m = _MM_SHUFFLE(3, 0, 1, 2);
+  return BitCast(d, Vec256<float>{_mm256_shuffle_ps(BitCast(df, a).raw,
+                                                    BitCast(df, b).raw, m)});
+}
+
+}  // namespace detail
+
+// Swap 64-bit halves
+HWY_API Vec256<uint32_t> Shuffle1032(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<int32_t> Shuffle1032(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<float> Shuffle1032(const Vec256<float> v) {
+  // Shorter encoding than _mm256_permute_ps.
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x4E)};
+}
+HWY_API Vec256<uint64_t> Shuffle01(const Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<int64_t> Shuffle01(const Vec256<int64_t> v) {
+  return Vec256<int64_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<double> Shuffle01(const Vec256<double> v) {
+  // Shorter encoding than _mm256_permute_pd.
+  return Vec256<double>{_mm256_shuffle_pd(v.raw, v.raw, 5)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec256<uint32_t> Shuffle0321(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec256<int32_t> Shuffle0321(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec256<float> Shuffle0321(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x39)};
+}
+// Rotate left 32 bits
+HWY_API Vec256<uint32_t> Shuffle2103(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec256<int32_t> Shuffle2103(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec256<float> Shuffle2103(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x93)};
+}
+
+// Reverse
+HWY_API Vec256<uint32_t> Shuffle0123(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec256<int32_t> Shuffle0123(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec256<float> Shuffle0123(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x1B)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices/IndicesFromVec for use by TableLookupLanes.
+template <typename T>
+struct Indices256 {
+  __m256i raw;
+};
+
+// 8-bit lanes: indices remain unchanged
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1), typename TI>
+HWY_API Indices256<TFromD<D>> IndicesFromVec(D /* tag */, Vec256<TI> vec) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Full256<TI> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, static_cast<TI>(2 * Lanes(di))))));
+#endif
+  return Indices256<TFromD<D>>{vec.raw};
+}
+
+// 16-bit lanes: convert indices to 32x8 unless AVX3 is available
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2), typename TI>
+HWY_API Indices256<TFromD<D>> IndicesFromVec(D /* tag */, Vec256<TI> vec) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+  const Full256<TI> di;
+#if HWY_IS_DEBUG_BUILD
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, static_cast<TI>(2 * Lanes(di))))));
+#endif
+
+#if HWY_TARGET <= HWY_AVX3
+  (void)di;
+  return Indices256<TFromD<D>>{vec.raw};
+#else
+  const Repartition<uint8_t, decltype(di)> d8;
+  using V8 = VFromD<decltype(d8)>;
+  alignas(32) static constexpr uint8_t kByteOffsets[32] = {
+      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+
+  // Broadcast each lane index to all 2 bytes of T
+  alignas(32) static constexpr uint8_t kBroadcastLaneBytes[32] = {
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14,
+      0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+  const V8 lane_indices = TableLookupBytes(vec, Load(d8, kBroadcastLaneBytes));
+
+  // Shift to bytes
+  const Repartition<uint16_t, decltype(di)> d16;
+  const V8 byte_indices = BitCast(d8, ShiftLeft<1>(BitCast(d16, lane_indices)));
+
+  return Indices256<TFromD<D>>{Add(byte_indices, Load(d8, kByteOffsets)).raw};
+#endif  // HWY_TARGET <= HWY_AVX3
+}
+
+// Native 8x32 instruction: indices remain unchanged
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 4), typename TI>
+HWY_API Indices256<TFromD<D>> IndicesFromVec(D /* tag */, Vec256<TI> vec) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const Full256<TI> di;
+  HWY_DASSERT(AllFalse(di, Lt(vec, Zero(di))) &&
+              AllTrue(di, Lt(vec, Set(di, static_cast<TI>(2 * Lanes(di))))));
+#endif
+  return Indices256<TFromD<D>>{vec.raw};
+}
+
+// 64-bit lanes: convert indices to 8x32 unless AVX3 is available
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8), typename TI>
+HWY_API Indices256<TFromD<D>> IndicesFromVec(D d, Vec256<TI> idx64) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Index size must match lane");
+  const Rebind<TI, decltype(d)> di;
+  (void)di;  // potentially unused
+#if HWY_IS_DEBUG_BUILD
+  HWY_DASSERT(AllFalse(di, Lt(idx64, Zero(di))) &&
+              AllTrue(di, Lt(idx64, Set(di, static_cast<TI>(2 * Lanes(di))))));
+#endif
+
+#if HWY_TARGET <= HWY_AVX3
+  (void)d;
+  return Indices256<TFromD<D>>{idx64.raw};
+#else
+  const Repartition<float, decltype(d)> df;  // 32-bit!
+  // Replicate 64-bit index into upper 32 bits
+  const Vec256<TI> dup =
+      BitCast(di, Vec256<float>{_mm256_moveldup_ps(BitCast(df, idx64).raw)});
+  // For each idx64 i, idx32 are 2*i and 2*i+1.
+  const Vec256<TI> idx32 = dup + dup + Set(di, TI(1) << 32);
+  return Indices256<TFromD<D>>{idx32.raw};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), typename TI>
+HWY_API Indices256<TFromD<D>> SetTableIndices(D d, const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> TableLookupLanes(Vec256<T> v, Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<T>{_mm256_permutexvar_epi8(idx.raw, v.raw)};
+#else
+  const Vec256<T> idx_vec{idx.raw};
+  const DFromV<decltype(v)> d;
+  const Repartition<uint16_t, decltype(d)> du16;
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<3>(BitCast(du16, idx_vec))));
+
+  const auto a = ConcatLowerLower(d, v, v);
+  const auto b = ConcatUpperUpper(d, v, v);
+  const auto lo_lookup_result = TableLookupBytes(a, idx_vec);
+
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_mask_shuffle_epi8(
+      lo_lookup_result.raw, sel_hi_mask.raw, b.raw, idx_vec.raw)};
+#else
+  const auto hi_lookup_result = TableLookupBytes(b, idx_vec);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#endif  // HWY_TARGET <= HWY_AVX3
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2), HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec256<T> TableLookupLanes(Vec256<T> v, Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_permutexvar_epi16(idx.raw, v.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(
+      d, TableLookupLanes(BitCast(du8, v), Indices256<uint8_t>{idx.raw}));
+#endif
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> TableLookupLanes(Vec256<float16_t> v,
+                                           Indices256<float16_t> idx) {
+  return Vec256<float16_t>{_mm256_permutexvar_ph(idx.raw, v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> TableLookupLanes(Vec256<T> v, Indices256<T> idx) {
+  return Vec256<T>{_mm256_permutevar8x32_epi32(v.raw, idx.raw)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> TableLookupLanes(Vec256<T> v, Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_permutexvar_epi64(idx.raw, v.raw)};
+#else
+  return Vec256<T>{_mm256_permutevar8x32_epi32(v.raw, idx.raw)};
+#endif
+}
+
+HWY_API Vec256<float> TableLookupLanes(const Vec256<float> v,
+                                       const Indices256<float> idx) {
+  return Vec256<float>{_mm256_permutevar8x32_ps(v.raw, idx.raw)};
+}
+
+HWY_API Vec256<double> TableLookupLanes(const Vec256<double> v,
+                                        const Indices256<double> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<double>{_mm256_permutexvar_pd(idx.raw, v.raw)};
+#else
+  const Full256<double> df;
+  const Full256<uint64_t> du;
+  return BitCast(df, Vec256<uint64_t>{_mm256_permutevar8x32_epi32(
+                         BitCast(du, v).raw, idx.raw)});
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> TwoTablesLookupLanes(Vec256<T> a, Vec256<T> b,
+                                       Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<T>{_mm256_permutex2var_epi8(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<2>(Vec256<uint16_t>{idx.raw})));
+  const auto lo_lookup_result = TableLookupLanes(a, idx);
+  const auto hi_lookup_result = TableLookupLanes(b, idx);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec256<T> TwoTablesLookupLanes(Vec256<T> a, Vec256<T> b,
+                                       Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_permutex2var_epi16(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const Repartition<uint8_t, decltype(d)> du8;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du8, a), BitCast(du8, b),
+                                         Indices256<uint8_t>{idx.raw}));
+#endif
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> TwoTablesLookupLanes(Vec256<T> a, Vec256<T> b,
+                                       Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_permutex2var_epi32(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  const Vec256<T> idx_vec{idx.raw};
+
+  const auto sel_hi_mask = MaskFromVec(BitCast(df, ShiftLeft<28>(idx_vec)));
+  const auto lo_lookup_result = BitCast(df, TableLookupLanes(a, idx));
+  const auto hi_lookup_result = BitCast(df, TableLookupLanes(b, idx));
+  return BitCast(d,
+                 IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result));
+#endif
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec256<float16_t> TwoTablesLookupLanes(Vec256<float16_t> a,
+                                               Vec256<float16_t> b,
+                                               Indices256<float16_t> idx) {
+  return Vec256<float16_t>{_mm256_permutex2var_ph(a.raw, idx.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec256<float> TwoTablesLookupLanes(Vec256<float> a, Vec256<float> b,
+                                           Indices256<float> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<float>{_mm256_permutex2var_ps(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const auto sel_hi_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<28>(Vec256<uint32_t>{idx.raw})));
+  const auto lo_lookup_result = TableLookupLanes(a, idx);
+  const auto hi_lookup_result = TableLookupLanes(b, idx);
+  return IfThenElse(sel_hi_mask, hi_lookup_result, lo_lookup_result);
+#endif
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec256<T> TwoTablesLookupLanes(Vec256<T> a, Vec256<T> b,
+                                       Indices256<T> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<T>{_mm256_permutex2var_epi64(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du32, a), BitCast(du32, b),
+                                         Indices256<uint32_t>{idx.raw}));
+#endif
+}
+
+HWY_API Vec256<double> TwoTablesLookupLanes(Vec256<double> a, Vec256<double> b,
+                                            Indices256<double> idx) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<double>{_mm256_permutex2var_pd(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d, TwoTablesLookupLanes(BitCast(du32, a), BitCast(du32, b),
+                                         Indices256<uint32_t>{idx.raw}));
+#endif
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T>
+HWY_API Vec256<T> SwapAdjacentBlocks(Vec256<T> v) {
+  return Vec256<T>{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(1, 0, 3, 2))};
+}
+
+HWY_API Vec256<double> SwapAdjacentBlocks(Vec256<double> v) {
+  return Vec256<double>{_mm256_permute4x64_pd(v.raw, _MM_SHUFFLE(1, 0, 3, 2))};
+}
+
+HWY_API Vec256<float> SwapAdjacentBlocks(Vec256<float> v) {
+  // Assume no domain-crossing penalty between float/double (true on SKX).
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  return BitCast(d, SwapAdjacentBlocks(BitCast(dw, v)));
+}
+
+// ------------------------------ Reverse (RotateRight)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  alignas(32) static constexpr int32_t kReverse[8] = {7, 6, 5, 4, 3, 2, 1, 0};
+  return TableLookupLanes(v, SetTableIndices(d, kReverse));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  alignas(32) static constexpr int64_t kReverse[4] = {3, 2, 1, 0};
+  return TableLookupLanes(v, SetTableIndices(d, kReverse));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToSigned<decltype(d)> di;
+  alignas(32) static constexpr int16_t kReverse[16] = {
+      15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+  const Vec256<int16_t> idx = Load(di, kReverse);
+  return BitCast(d, Vec256<int16_t>{
+                        _mm256_permutexvar_epi16(idx.raw, BitCast(di, v).raw)});
+#else
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0F0E, 0x0D0C, 0x0B0A, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100};
+  const auto rev128 = TableLookupBytes(v, LoadDup128(di, kShuffle));
+  return VFromD<D>{
+      _mm256_permute4x64_epi64(rev128.raw, _MM_SHUFFLE(1, 0, 3, 2))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(32) static constexpr TFromD<D> kReverse[32] = {
+      31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
+      15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0};
+  return TableLookupLanes(v, SetTableIndices(d, kReverse));
+#else
+  // First reverse bytes within blocks via PSHUFB, then swap blocks.
+  alignas(32) static constexpr TFromD<D> kReverse[32] = {
+      15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+      15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+  return SwapAdjacentBlocks(TableLookupBytes(v, Load(d, kReverse)));
+#endif
+}
+
+// ------------------------------ Reverse2 (in x86_128)
+
+// ------------------------------ Reverse4 (SwapAdjacentBlocks)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0706, 0x0504, 0x0302, 0x0100, 0x0F0E, 0x0D0C, 0x0B0A, 0x0908};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+}
+
+// 32 bit Reverse4 defined in x86_128.
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D> v) {
+  // Could also use _mm256_permute4x64_epi64.
+  return SwapAdjacentBlocks(Shuffle01(v));
+}
+
+// ------------------------------ Reverse8
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(16) static constexpr int16_t kShuffle[8] = {
+      0x0F0E, 0x0D0C, 0x0B0A, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100};
+  return BitCast(d, TableLookupBytes(v, LoadDup128(di, kShuffle)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  return Reverse(d, v);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse8(D /* tag */, const VFromD<D> /* v */) {
+  HWY_ASSERT(0);  // AVX2 does not have 8 64-bit lanes
+}
+
+// ------------------------------ ReverseBits
+
+#if HWY_TARGET <= HWY_AVX3_DL
+template <class V, HWY_IF_T_SIZE_V(V, 1), HWY_IF_V_SIZE_D(DFromV<V>, 32)>
+HWY_API V ReverseBits(V v) {
+  const Full256<uint64_t> du64;
+  const auto affine_matrix = Set(du64, 0x8040201008040201u);
+  return V{_mm256_gf2p8affine_epi64_epi8(v.raw, affine_matrix.raw, 0)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ------------------------------ InterleaveLower
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> InterleaveLower(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_unpacklo_epi8(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec256<T> InterleaveLower(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm256_unpacklo_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> InterleaveLower(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_unpacklo_epi32(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec256<T> InterleaveLower(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> InterleaveLower(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_unpacklo_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> InterleaveLower(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_unpacklo_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ InterleaveUpper
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm256_unpackhi_epi8(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm256_unpackhi_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm256_unpackhi_epi32(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm256_unpackhi_epi64(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm256_unpackhi_ps(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm256_unpackhi_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Blocks (LowerHalf, ZeroExtendVector)
+
+// _mm256_broadcastsi128_si256 has 7 cycle latency on ICL.
+// _mm256_permute2x128_si256 is slow on Zen1 (8 uops), so we avoid it (at no
+// extra cost) for LowerLower and UpperLower.
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatLowerLower(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Half<decltype(d)> d2;
+  return VFromD<D>{_mm256_inserti128_si256(lo.raw, LowerHalf(d2, hi).raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> ConcatLowerLower(D d, Vec256<float> hi,
+                                       Vec256<float> lo) {
+  const Half<decltype(d)> d2;
+  return Vec256<float>{_mm256_insertf128_ps(lo.raw, LowerHalf(d2, hi).raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatLowerLower(D d, Vec256<double> hi,
+                                        Vec256<double> lo) {
+  const Half<decltype(d)> d2;
+  return Vec256<double>{_mm256_insertf128_pd(lo.raw, LowerHalf(d2, hi).raw, 1)};
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves / swap blocks)
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatLowerUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm256_permute2x128_si256(lo.raw, hi.raw, 0x21)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> ConcatLowerUpper(D /* tag */, Vec256<float> hi,
+                                       Vec256<float> lo) {
+  return Vec256<float>{_mm256_permute2f128_ps(lo.raw, hi.raw, 0x21)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatLowerUpper(D /* tag */, Vec256<double> hi,
+                                        Vec256<double> lo) {
+  return Vec256<double>{_mm256_permute2f128_pd(lo.raw, hi.raw, 0x21)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatUpperLower(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm256_blend_epi32(hi.raw, lo.raw, 0x0F)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> ConcatUpperLower(D /* tag */, Vec256<float> hi,
+                                       Vec256<float> lo) {
+  return Vec256<float>{_mm256_blend_ps(hi.raw, lo.raw, 0x0F)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatUpperLower(D /* tag */, Vec256<double> hi,
+                                        Vec256<double> lo) {
+  return Vec256<double>{_mm256_blend_pd(hi.raw, lo.raw, 3)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatUpperUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm256_permute2x128_si256(lo.raw, hi.raw, 0x31)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API Vec256<float> ConcatUpperUpper(D /* tag */, Vec256<float> hi,
+                                       Vec256<float> lo) {
+  return Vec256<float>{_mm256_permute2f128_ps(lo.raw, hi.raw, 0x31)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatUpperUpper(D /* tag */, Vec256<double> hi,
+                                        Vec256<double> lo) {
+  return Vec256<double>{_mm256_permute2f128_pd(lo.raw, hi.raw, 0x31)};
+}
+
+// ---------------------------- InsertBlock (ConcatLowerLower, ConcatUpperLower)
+template <int kBlockIdx, class T>
+HWY_API Vec256<T> InsertBlock(Vec256<T> v, Vec128<T> blk_to_insert) {
+  static_assert(kBlockIdx == 0 || kBlockIdx == 1, "Invalid block index");
+
+  const DFromV<decltype(v)> d;
+  const auto vec_to_insert = ResizeBitCast(d, blk_to_insert);
+  return (kBlockIdx == 0) ? ConcatUpperLower(d, v, vec_to_insert)
+                          : ConcatLowerLower(d, vec_to_insert, v);
+}
+
+// ------------------------------ ConcatOdd
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(32) static constexpr uint8_t kIdx[32] = {
+      1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+      33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63};
+  return BitCast(
+      d, Vec256<uint16_t>{_mm256_permutex2var_epi8(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Unsigned 8-bit shift so we can pack.
+  const Vec256<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec256<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+  const __m256i u8 = _mm256_packus_epi16(uL.raw, uH.raw);
+  return VFromD<D>{_mm256_permute4x64_epi64(u8, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(32) static constexpr uint16_t kIdx[16] = {
+      1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  return BitCast(
+      d, Vec256<uint16_t>{_mm256_permutex2var_epi16(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Unsigned 16-bit shift so we can pack.
+  const Vec256<uint32_t> uH = ShiftRight<16>(BitCast(dw, hi));
+  const Vec256<uint32_t> uL = ShiftRight<16>(BitCast(dw, lo));
+  const __m256i u16 = _mm256_packus_epi32(uL.raw, uH.raw);
+  return VFromD<D>{_mm256_permute4x64_epi64(u16, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(32) static constexpr uint32_t kIdx[8] = {1, 3, 5, 7, 9, 11, 13, 15};
+  return BitCast(
+      d, Vec256<uint32_t>{_mm256_permutex2var_epi32(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RebindToFloat<decltype(d)> df;
+  const Vec256<float> v3131{_mm256_shuffle_ps(
+      BitCast(df, lo).raw, BitCast(df, hi).raw, _MM_SHUFFLE(3, 1, 3, 1))};
+  return VFromD<D>{_mm256_permute4x64_epi64(BitCast(du, v3131).raw,
+                                            _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(32) static constexpr uint32_t kIdx[8] = {1, 3, 5, 7, 9, 11, 13, 15};
+  return VFromD<D>{_mm256_permutex2var_ps(lo.raw, Load(du, kIdx).raw, hi.raw)};
+#else
+  const VFromD<D> v3131{
+      _mm256_shuffle_ps(lo.raw, hi.raw, _MM_SHUFFLE(3, 1, 3, 1))};
+  return BitCast(d, Vec256<uint32_t>{_mm256_permute4x64_epi64(
+                        BitCast(du, v3131).raw, _MM_SHUFFLE(3, 1, 2, 0))});
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(64) static constexpr uint64_t kIdx[4] = {1, 3, 5, 7};
+  return BitCast(
+      d, Vec256<uint64_t>{_mm256_permutex2var_epi64(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RebindToFloat<decltype(d)> df;
+  const Vec256<double> v31{
+      _mm256_shuffle_pd(BitCast(df, lo).raw, BitCast(df, hi).raw, 15)};
+  return VFromD<D>{
+      _mm256_permute4x64_epi64(BitCast(du, v31).raw, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatOdd(D d, Vec256<double> hi, Vec256<double> lo) {
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[4] = {1, 3, 5, 7};
+  return Vec256<double>{
+      _mm256_permutex2var_pd(lo.raw, Load(du, kIdx).raw, hi.raw)};
+#else
+  (void)d;
+  const Vec256<double> v31{_mm256_shuffle_pd(lo.raw, hi.raw, 15)};
+  return Vec256<double>{
+      _mm256_permute4x64_pd(v31.raw, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+// ------------------------------ ConcatEven
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(64) static constexpr uint8_t kIdx[32] = {
+      0,  2,  4,  6,  8,  10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+      32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62};
+  return BitCast(
+      d, Vec256<uint32_t>{_mm256_permutex2var_epi8(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Isolate lower 8 bits per u16 so we can pack.
+  const Vec256<uint16_t> mask = Set(dw, 0x00FF);
+  const Vec256<uint16_t> uH = And(BitCast(dw, hi), mask);
+  const Vec256<uint16_t> uL = And(BitCast(dw, lo), mask);
+  const __m256i u8 = _mm256_packus_epi16(uL.raw, uH.raw);
+  return VFromD<D>{_mm256_permute4x64_epi64(u8, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(64) static constexpr uint16_t kIdx[16] = {
+      0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30};
+  return BitCast(
+      d, Vec256<uint32_t>{_mm256_permutex2var_epi16(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Isolate lower 16 bits per u32 so we can pack.
+  const Vec256<uint32_t> mask = Set(dw, 0x0000FFFF);
+  const Vec256<uint32_t> uH = And(BitCast(dw, hi), mask);
+  const Vec256<uint32_t> uL = And(BitCast(dw, lo), mask);
+  const __m256i u16 = _mm256_packus_epi32(uL.raw, uH.raw);
+  return VFromD<D>{_mm256_permute4x64_epi64(u16, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(64) static constexpr uint32_t kIdx[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+  return BitCast(
+      d, Vec256<uint32_t>{_mm256_permutex2var_epi32(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RebindToFloat<decltype(d)> df;
+  const Vec256<float> v2020{_mm256_shuffle_ps(
+      BitCast(df, lo).raw, BitCast(df, hi).raw, _MM_SHUFFLE(2, 0, 2, 0))};
+  return VFromD<D>{_mm256_permute4x64_epi64(BitCast(du, v2020).raw,
+                                            _MM_SHUFFLE(3, 1, 2, 0))};
+
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(64) static constexpr uint32_t kIdx[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+  return VFromD<D>{_mm256_permutex2var_ps(lo.raw, Load(du, kIdx).raw, hi.raw)};
+#else
+  const VFromD<D> v2020{
+      _mm256_shuffle_ps(lo.raw, hi.raw, _MM_SHUFFLE(2, 0, 2, 0))};
+  return BitCast(d, Vec256<uint32_t>{_mm256_permute4x64_epi64(
+                        BitCast(du, v2020).raw, _MM_SHUFFLE(3, 1, 2, 0))});
+
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3
+  alignas(64) static constexpr uint64_t kIdx[4] = {0, 2, 4, 6};
+  return BitCast(
+      d, Vec256<uint64_t>{_mm256_permutex2var_epi64(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RebindToFloat<decltype(d)> df;
+  const Vec256<double> v20{
+      _mm256_shuffle_pd(BitCast(df, lo).raw, BitCast(df, hi).raw, 0)};
+  return VFromD<D>{
+      _mm256_permute4x64_epi64(BitCast(du, v20).raw, _MM_SHUFFLE(3, 1, 2, 0))};
+
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API Vec256<double> ConcatEven(D d, Vec256<double> hi, Vec256<double> lo) {
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[4] = {0, 2, 4, 6};
+  return Vec256<double>{
+      _mm256_permutex2var_pd(lo.raw, Load(du, kIdx).raw, hi.raw)};
+#else
+  (void)d;
+  const Vec256<double> v20{_mm256_shuffle_pd(lo.raw, hi.raw, 0)};
+  return Vec256<double>{
+      _mm256_permute4x64_pd(v20.raw, _MM_SHUFFLE(3, 1, 2, 0))};
+#endif
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> DupEven(Vec256<T> v) {
+  return Vec256<T>{_mm256_shuffle_epi32(v.raw, _MM_SHUFFLE(2, 2, 0, 0))};
+}
+HWY_API Vec256<float> DupEven(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_shuffle_ps(v.raw, v.raw, _MM_SHUFFLE(2, 2, 0, 0))};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> DupEven(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveLower(d, v, v);
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec256<T> DupOdd(Vec256<T> v) {
+  return Vec256<T>{_mm256_shuffle_epi32(v.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+}
+HWY_API Vec256<float> DupOdd(Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_shuffle_ps(v.raw, v.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> DupOdd(const Vec256<T> v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveUpper(d, v, v);
+}
+
+// ------------------------------ OddEven
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec256<T> OddEven(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const Full256<uint8_t> d8;
+  alignas(32) static constexpr uint8_t mask[16] = {
+      0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, LoadDup128(d8, mask))), b, a);
+}
+
+template <typename T, HWY_IF_UI16(T)>
+HWY_INLINE Vec256<T> OddEven(Vec256<T> a, Vec256<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{_mm256_blend_epi16(
+                        BitCast(du, a).raw, BitCast(du, b).raw, 0x55)});
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_INLINE Vec256<float16_t> OddEven(Vec256<float16_t> a, Vec256<float16_t> b) {
+  return Vec256<float16_t>{_mm256_mask_blend_ph(a.raw, b.raw, 0x55)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_INLINE Vec256<T> OddEven(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_blend_epi32(a.raw, b.raw, 0x55)};
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_INLINE Vec256<T> OddEven(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_blend_epi32(a.raw, b.raw, 0x33)};
+}
+
+HWY_API Vec256<float> OddEven(Vec256<float> a, Vec256<float> b) {
+  return Vec256<float>{_mm256_blend_ps(a.raw, b.raw, 0x55)};
+}
+
+HWY_API Vec256<double> OddEven(Vec256<double> a, Vec256<double> b) {
+  return Vec256<double>{_mm256_blend_pd(a.raw, b.raw, 5)};
+}
+
+// ------------------------------ OddEvenBlocks
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+Vec256<T> OddEvenBlocks(Vec256<T> odd, Vec256<T> even) {
+  return Vec256<T>{_mm256_blend_epi32(odd.raw, even.raw, 0xFu)};
+}
+
+HWY_API Vec256<float> OddEvenBlocks(Vec256<float> odd, Vec256<float> even) {
+  return Vec256<float>{_mm256_blend_ps(odd.raw, even.raw, 0xFu)};
+}
+
+HWY_API Vec256<double> OddEvenBlocks(Vec256<double> odd, Vec256<double> even) {
+  return Vec256<double>{_mm256_blend_pd(odd.raw, even.raw, 0x3u)};
+}
+
+// ------------------------------ ReverseBlocks (SwapAdjacentBlocks)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> ReverseBlocks(D /*d*/, VFromD<D> v) {
+  return SwapAdjacentBlocks(v);
+}
+
+// ------------------------------ TableLookupBytes (ZeroExtendVector)
+
+// Both full
+template <typename T, typename TI>
+HWY_API Vec256<TI> TableLookupBytes(Vec256<T> bytes, Vec256<TI> from) {
+  return Vec256<TI>{_mm256_shuffle_epi8(bytes.raw, from.raw)};
+}
+
+// Partial index vector
+template <typename T, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec256<T> bytes, Vec128<TI, NI> from) {
+  const Full256<TI> di;
+  const Half<decltype(di)> dih;
+  // First expand to full 128, then 256.
+  const auto from_256 = ZeroExtendVector(di, Vec128<TI>{from.raw});
+  const auto tbl_full = TableLookupBytes(bytes, from_256);
+  // Shrink to 128, then partial.
+  return Vec128<TI, NI>{LowerHalf(dih, tbl_full).raw};
+}
+
+// Partial table vector
+template <typename T, size_t N, typename TI>
+HWY_API Vec256<TI> TableLookupBytes(Vec128<T, N> bytes, Vec256<TI> from) {
+  const Full256<T> d;
+  // First expand to full 128, then 256.
+  const auto bytes_256 = ZeroExtendVector(d, Vec128<T>{bytes.raw});
+  return TableLookupBytes(bytes_256, from);
+}
+
+// Partial both are handled by x86_128.
+
+// ------------------------------ I8/U8 Broadcast (TableLookupBytes)
+
+template <int kLane, class T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> Broadcast(const Vec256<T> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return TableLookupBytes(v, Set(Full256<T>(), static_cast<T>(kLane)));
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                                const uint32_t x2,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  return BitCast(d, Vec256<uint32_t>{_mm256_set_epi32(
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0),
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0))});
+}
+
+template <size_t kIdx3210, class V, HWY_IF_NOT_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  return V{_mm256_shuffle_epi32(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  return V{_mm256_shuffle_ps(v.raw, v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0x44> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return ConcatLowerLower(d, v, v);
+}
+
+template <class V>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<0xEE> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  const DFromV<decltype(v)> d;
+  return ConcatUpperUpper(d, v, v);
+}
+
+template <size_t kIdx3210, class V, HWY_IF_NOT_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  return V{_mm256_permute4x64_epi64(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<32> /*vect_size_tag*/, V v) {
+  return V{_mm256_permute4x64_pd(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3
+template <int kI32Lanes, class V, HWY_IF_V_SIZE_V(V, 32)>
+HWY_INLINE V CombineShiftRightI32Lanes(V hi, V lo) {
+  const DFromV<decltype(hi)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d,
+                 Vec256<uint32_t>{_mm256_alignr_epi32(
+                     BitCast(du32, hi).raw, BitCast(du32, lo).raw, kI32Lanes)});
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32)>
+HWY_INLINE V CombineShiftRightI64Lanes(V hi, V lo) {
+  const DFromV<decltype(hi)> d;
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d,
+                 Vec256<uint64_t>{_mm256_alignr_epi64(
+                     BitCast(du64, hi).raw, BitCast(du64, lo).raw, kI64Lanes)});
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32)>
+HWY_INLINE V SlideUpI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI64Lanes<4 - kI64Lanes>(v, Zero(d));
+}
+#else   // AVX2
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32),
+          HWY_IF_NOT_FLOAT_D(DFromV<V>)>
+HWY_INLINE V SlideUpI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  constexpr int kIdx0 = (-kI64Lanes) & 3;
+  constexpr int kIdx1 = (-kI64Lanes + 1) & 3;
+  constexpr int kIdx2 = (-kI64Lanes + 2) & 3;
+  constexpr int kIdx3 = (-kI64Lanes + 3) & 3;
+  constexpr int kIdx3210 = _MM_SHUFFLE(kIdx3, kIdx2, kIdx1, kIdx0);
+  constexpr int kBlendMask = (1 << (kI64Lanes * 2)) - 1;
+
+  const DFromV<decltype(v)> d;
+  return V{_mm256_blend_epi32(_mm256_permute4x64_epi64(v.raw, kIdx3210),
+                              Zero(d).raw, kBlendMask)};
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32),
+          HWY_IF_FLOAT_D(DFromV<V>)>
+HWY_INLINE V SlideUpI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  constexpr int kIdx0 = (-kI64Lanes) & 3;
+  constexpr int kIdx1 = (-kI64Lanes + 1) & 3;
+  constexpr int kIdx2 = (-kI64Lanes + 2) & 3;
+  constexpr int kIdx3 = (-kI64Lanes + 3) & 3;
+  constexpr int kIdx3210 = _MM_SHUFFLE(kIdx3, kIdx2, kIdx1, kIdx0);
+  constexpr int kBlendMask = (1 << kI64Lanes) - 1;
+
+  const DFromV<decltype(v)> d;
+  const Repartition<double, decltype(d)> dd;
+  return BitCast(d, Vec256<double>{_mm256_blend_pd(
+                        _mm256_permute4x64_pd(BitCast(dd, v).raw, kIdx3210),
+                        Zero(dd).raw, kBlendMask)});
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(
+              D, (1 << 1) | ((HWY_TARGET > HWY_AVX3) ? (1 << 2) : 0))>
+HWY_INLINE VFromD<D> TableLookupSlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+
+  const auto idx_vec =
+      Iota(du8, static_cast<uint8_t>(size_t{0} - amt * sizeof(TFromD<D>)));
+  const Indices256<TFromD<D>> idx{idx_vec.raw};
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  return TwoTablesLookupLanes(v, Zero(d), idx);
+#else
+  return TableLookupLanes(v, idx);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | ((HWY_TARGET <= HWY_AVX3)
+                                                    ? ((1 << 2) | (1 << 8))
+                                                    : 0))>
+HWY_INLINE VFromD<D> TableLookupSlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  const auto idx = Iota(du, static_cast<TU>(size_t{0} - amt));
+#if HWY_TARGET <= HWY_AVX3
+  const auto masked_idx =
+      And(idx, Set(du, static_cast<TU>(MaxLanes(d) * 2 - 1)));
+  return TwoTablesLookupLanes(v, Zero(d), IndicesFromVec(d, masked_idx));
+#else
+  const auto masked_idx = And(idx, Set(du, static_cast<TU>(MaxLanes(d) - 1)));
+  return IfThenElseZero(RebindMask(d, idx == masked_idx),
+                        TableLookupLanes(v, IndicesFromVec(d, masked_idx)));
+#endif
+}
+
+#if HWY_TARGET > HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> TableLookupSlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const RepartitionToNarrow<D> dn;
+  return BitCast(d, TableLookupSlideUpLanes(dn, BitCast(dn, v), amt * 2));
+}
+#endif  // HWY_TARGET > HWY_AVX3
+
+}  // namespace detail
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideUpBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 1,
+                "kBlocks must be between 0 and 1");
+  return (kBlocks == 1) ? ConcatLowerLower(d, v, Zero(d)) : v;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  if (__builtin_constant_p(amt)) {
+    const auto v_lo = ConcatLowerLower(d, v, Zero(d));
+    switch (amt * sizeof(TFromD<D>)) {
+      case 0:
+        return v;
+      case 1:
+        return CombineShiftRightBytes<15>(d, v, v_lo);
+      case 2:
+        return CombineShiftRightBytes<14>(d, v, v_lo);
+      case 3:
+        return CombineShiftRightBytes<13>(d, v, v_lo);
+      case 4:
+#if HWY_TARGET <= HWY_AVX3
+        return detail::CombineShiftRightI32Lanes<7>(v, Zero(d));
+#else
+        return CombineShiftRightBytes<12>(d, v, v_lo);
+#endif
+      case 5:
+        return CombineShiftRightBytes<11>(d, v, v_lo);
+      case 6:
+        return CombineShiftRightBytes<10>(d, v, v_lo);
+      case 7:
+        return CombineShiftRightBytes<9>(d, v, v_lo);
+      case 8:
+        return detail::SlideUpI64Lanes<1>(v);
+      case 9:
+        return CombineShiftRightBytes<7>(d, v, v_lo);
+      case 10:
+        return CombineShiftRightBytes<6>(d, v, v_lo);
+      case 11:
+        return CombineShiftRightBytes<5>(d, v, v_lo);
+      case 12:
+#if HWY_TARGET <= HWY_AVX3
+        return detail::CombineShiftRightI32Lanes<5>(v, Zero(d));
+#else
+        return CombineShiftRightBytes<4>(d, v, v_lo);
+#endif
+      case 13:
+        return CombineShiftRightBytes<3>(d, v, v_lo);
+      case 14:
+        return CombineShiftRightBytes<2>(d, v, v_lo);
+      case 15:
+        return CombineShiftRightBytes<1>(d, v, v_lo);
+      case 16:
+        return ConcatLowerLower(d, v, Zero(d));
+#if HWY_TARGET <= HWY_AVX3
+      case 20:
+        return detail::CombineShiftRightI32Lanes<3>(v, Zero(d));
+#endif
+      case 24:
+        return detail::SlideUpI64Lanes<3>(v);
+#if HWY_TARGET <= HWY_AVX3
+      case 28:
+        return detail::CombineShiftRightI32Lanes<1>(v, Zero(d));
+#endif
+    }
+  }
+
+  if (__builtin_constant_p(amt >= kLanesPerBlock) && amt >= kLanesPerBlock) {
+    const Half<decltype(d)> dh;
+    return Combine(d, SlideUpLanes(dh, LowerHalf(dh, v), amt - kLanesPerBlock),
+                   Zero(dh));
+  }
+#endif
+
+  return detail::TableLookupSlideUpLanes(d, v, amt);
+}
+
+// ------------------------------ Slide1Up
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  const auto v_lo = ConcatLowerLower(d, v, Zero(d));
+  return CombineShiftRightBytes<15>(d, v, v_lo);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  const auto v_lo = ConcatLowerLower(d, v, Zero(d));
+  return CombineShiftRightBytes<14>(d, v, v_lo);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return detail::CombineShiftRightI32Lanes<7>(v, Zero(d));
+#else
+  const auto v_lo = ConcatLowerLower(d, v, Zero(d));
+  return CombineShiftRightBytes<12>(d, v, v_lo);
+#endif
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Slide1Up(D /*d*/, VFromD<D> v) {
+  return detail::SlideUpI64Lanes<1>(v);
+}
+
+// ------------------------------ SlideDownLanes
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32)>
+HWY_INLINE V SlideDownI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI64Lanes<kI64Lanes>(Zero(d), v);
+}
+#else   // AVX2
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32),
+          HWY_IF_NOT_FLOAT_D(DFromV<V>)>
+HWY_INLINE V SlideDownI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  constexpr int kIdx1 = (kI64Lanes + 1) & 3;
+  constexpr int kIdx2 = (kI64Lanes + 2) & 3;
+  constexpr int kIdx3 = (kI64Lanes + 3) & 3;
+  constexpr int kIdx3210 = _MM_SHUFFLE(kIdx3, kIdx2, kIdx1, kI64Lanes);
+  constexpr int kBlendMask =
+      static_cast<int>((0xFFu << ((4 - kI64Lanes) * 2)) & 0xFFu);
+
+  const DFromV<decltype(v)> d;
+  return V{_mm256_blend_epi32(_mm256_permute4x64_epi64(v.raw, kIdx3210),
+                              Zero(d).raw, kBlendMask)};
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 32),
+          HWY_IF_FLOAT_D(DFromV<V>)>
+HWY_INLINE V SlideDownI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 3,
+                "kI64Lanes must be between 0 and 3");
+  constexpr int kIdx1 = (kI64Lanes + 1) & 3;
+  constexpr int kIdx2 = (kI64Lanes + 2) & 3;
+  constexpr int kIdx3 = (kI64Lanes + 3) & 3;
+  constexpr int kIdx3210 = _MM_SHUFFLE(kIdx3, kIdx2, kIdx1, kI64Lanes);
+  constexpr int kBlendMask = (0x0F << (4 - kI64Lanes)) & 0x0F;
+
+  const DFromV<decltype(v)> d;
+  const Repartition<double, decltype(d)> dd;
+  return BitCast(d, Vec256<double>{_mm256_blend_pd(
+                        _mm256_permute4x64_pd(BitCast(dd, v).raw, kIdx3210),
+                        Zero(dd).raw, kBlendMask)});
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(
+              D, (1 << 1) | ((HWY_TARGET > HWY_AVX3) ? (1 << 2) : 0))>
+HWY_INLINE VFromD<D> TableLookupSlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+
+  auto idx_vec = Iota(du8, static_cast<uint8_t>(amt * sizeof(TFromD<D>)));
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  const auto result_mask = idx_vec < Set(du8, uint8_t{32});
+  return VFromD<D>{
+      _mm256_maskz_permutexvar_epi8(result_mask.raw, idx_vec.raw, v.raw)};
+#else
+  const RebindToSigned<decltype(du8)> di8;
+  idx_vec =
+      Or(idx_vec, BitCast(du8, VecFromMask(di8, BitCast(di8, idx_vec) >
+                                                    Set(di8, int8_t{31}))));
+  return TableLookupLanes(v, Indices256<TFromD<D>>{idx_vec.raw});
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_GT_D(D, 16),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | ((HWY_TARGET <= HWY_AVX3)
+                                                    ? ((1 << 2) | (1 << 8))
+                                                    : 0))>
+HWY_INLINE VFromD<D> TableLookupSlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  const auto idx = Iota(du, static_cast<TU>(amt));
+  const auto masked_idx = And(idx, Set(du, static_cast<TU>(MaxLanes(d) - 1)));
+
+  return IfThenElseZero(RebindMask(d, idx == masked_idx),
+                        TableLookupLanes(v, IndicesFromVec(d, masked_idx)));
+}
+
+#if HWY_TARGET > HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_INLINE VFromD<D> TableLookupSlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const RepartitionToNarrow<D> dn;
+  return BitCast(d, TableLookupSlideDownLanes(dn, BitCast(dn, v), amt * 2));
+}
+#endif  // HWY_TARGET > HWY_AVX3
+
+}  // namespace detail
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideDownBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 1,
+                "kBlocks must be between 0 and 1");
+  const Half<decltype(d)> dh;
+  return (kBlocks == 1) ? ZeroExtendVector(d, UpperHalf(dh, v)) : v;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  constexpr size_t kLanesPerBlock = 16 / sizeof(TFromD<D>);
+  const Half<decltype(d)> dh;
+  if (__builtin_constant_p(amt)) {
+    const auto v_hi = ZeroExtendVector(d, UpperHalf(dh, v));
+    switch (amt * sizeof(TFromD<D>)) {
+      case 0:
+        return v;
+      case 1:
+        return CombineShiftRightBytes<1>(d, v_hi, v);
+      case 2:
+        return CombineShiftRightBytes<2>(d, v_hi, v);
+      case 3:
+        return CombineShiftRightBytes<3>(d, v_hi, v);
+      case 4:
+#if HWY_TARGET <= HWY_AVX3
+        return detail::CombineShiftRightI32Lanes<1>(Zero(d), v);
+#else
+        return CombineShiftRightBytes<4>(d, v_hi, v);
+#endif
+      case 5:
+        return CombineShiftRightBytes<5>(d, v_hi, v);
+      case 6:
+        return CombineShiftRightBytes<6>(d, v_hi, v);
+      case 7:
+        return CombineShiftRightBytes<7>(d, v_hi, v);
+      case 8:
+        return detail::SlideDownI64Lanes<1>(v);
+      case 9:
+        return CombineShiftRightBytes<9>(d, v_hi, v);
+      case 10:
+        return CombineShiftRightBytes<10>(d, v_hi, v);
+      case 11:
+        return CombineShiftRightBytes<11>(d, v_hi, v);
+      case 12:
+#if HWY_TARGET <= HWY_AVX3
+        return detail::CombineShiftRightI32Lanes<3>(Zero(d), v);
+#else
+        return CombineShiftRightBytes<12>(d, v_hi, v);
+#endif
+      case 13:
+        return CombineShiftRightBytes<13>(d, v_hi, v);
+      case 14:
+        return CombineShiftRightBytes<14>(d, v_hi, v);
+      case 15:
+        return CombineShiftRightBytes<15>(d, v_hi, v);
+      case 16:
+        return v_hi;
+#if HWY_TARGET <= HWY_AVX3
+      case 20:
+        return detail::CombineShiftRightI32Lanes<5>(Zero(d), v);
+#endif
+      case 24:
+        return detail::SlideDownI64Lanes<3>(v);
+#if HWY_TARGET <= HWY_AVX3
+      case 28:
+        return detail::CombineShiftRightI32Lanes<7>(Zero(d), v);
+#endif
+    }
+  }
+
+  if (__builtin_constant_p(amt >= kLanesPerBlock) && amt >= kLanesPerBlock) {
+    return ZeroExtendVector(
+        d, SlideDownLanes(dh, UpperHalf(dh, v), amt - kLanesPerBlock));
+  }
+#endif
+
+  return detail::TableLookupSlideDownLanes(d, v, amt);
+}
+
+// ------------------------------ Slide1Down
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const auto v_hi = ZeroExtendVector(d, UpperHalf(dh, v));
+  return CombineShiftRightBytes<1>(d, v_hi, v);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  const Half<decltype(d)> dh;
+  const auto v_hi = ZeroExtendVector(d, UpperHalf(dh, v));
+  return CombineShiftRightBytes<2>(d, v_hi, v);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3
+  return detail::CombineShiftRightI32Lanes<1>(Zero(d), v);
+#else
+  const Half<decltype(d)> dh;
+  const auto v_hi = ZeroExtendVector(d, UpperHalf(dh, v));
+  return CombineShiftRightBytes<4>(d, v_hi, v);
+#endif
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Slide1Down(D /*d*/, VFromD<D> v) {
+  return detail::SlideDownI64Lanes<1>(v);
+}
+
+// ------------------------------ Shl (Mul, ZipLower)
+
+namespace detail {
+
+#if HWY_TARGET > HWY_AVX3 && !HWY_IDE  // AVX2 or older
+template <class V>
+HWY_INLINE V AVX2ShlU16Vec256(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<uint32_t, decltype(dh)> du32;
+
+  const auto lo_shl_result = PromoteTo(du32, LowerHalf(dh, v))
+                             << PromoteTo(du32, LowerHalf(dh, bits));
+  const auto hi_shl_result = PromoteTo(du32, UpperHalf(dh, v))
+                             << PromoteTo(du32, UpperHalf(dh, bits));
+  return ConcatEven(d, BitCast(d, hi_shl_result), BitCast(d, lo_shl_result));
+}
+#endif
+
+HWY_INLINE Vec256<uint16_t> Shl(hwy::UnsignedTag /*tag*/, Vec256<uint16_t> v,
+                                Vec256<uint16_t> bits) {
+#if HWY_TARGET <= HWY_AVX3 || HWY_IDE
+  return Vec256<uint16_t>{_mm256_sllv_epi16(v.raw, bits.raw)};
+#else
+  return AVX2ShlU16Vec256(v, bits);
+#endif
+}
+
+// 8-bit: may use the Shl overload for uint16_t.
+HWY_API Vec256<uint8_t> Shl(hwy::UnsignedTag tag, Vec256<uint8_t> v,
+                            Vec256<uint8_t> bits) {
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  (void)tag;
+  // kMask[i] = 0xFF >> i
+  alignas(16) static constexpr uint8_t kMasks[16] = {
+      0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00};
+  // kShl[i] = 1 << i
+  alignas(16) static constexpr uint8_t kShl[16] = {1,    2,    4,    8,   0x10,
+                                                   0x20, 0x40, 0x80, 0x00};
+  v = And(v, TableLookupBytes(LoadDup128(d, kMasks), bits));
+  const VFromD<decltype(d)> mul = TableLookupBytes(LoadDup128(d, kShl), bits);
+  return VFromD<decltype(d)>{_mm256_gf2p8mul_epi8(v.raw, mul.raw)};
+#else
+  const Repartition<uint16_t, decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW even_mask = Set(dw, 0x00FF);
+  const VW odd_mask = Set(dw, 0xFF00);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  // Shift even lanes in-place
+  const VW evens = Shl(tag, vw, And(bits16, even_mask));
+  const VW odds = Shl(tag, And(vw, odd_mask), ShiftRight<8>(bits16));
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+#endif
+}
+
+HWY_INLINE Vec256<uint32_t> Shl(hwy::UnsignedTag /*tag*/, Vec256<uint32_t> v,
+                                Vec256<uint32_t> bits) {
+  return Vec256<uint32_t>{_mm256_sllv_epi32(v.raw, bits.raw)};
+}
+
+HWY_INLINE Vec256<uint64_t> Shl(hwy::UnsignedTag /*tag*/, Vec256<uint64_t> v,
+                                Vec256<uint64_t> bits) {
+  return Vec256<uint64_t>{_mm256_sllv_epi64(v.raw, bits.raw)};
+}
+
+template <typename T>
+HWY_INLINE Vec256<T> Shl(hwy::SignedTag /*tag*/, Vec256<T> v, Vec256<T> bits) {
+  // Signed left shifts are the same as unsigned.
+  const Full256<T> di;
+  const Full256<MakeUnsigned<T>> du;
+  return BitCast(di,
+                 Shl(hwy::UnsignedTag(), BitCast(du, v), BitCast(du, bits)));
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> operator<<(Vec256<T> v, Vec256<T> bits) {
+  return detail::Shl(hwy::TypeTag<T>(), v, bits);
+}
+
+// ------------------------------ Shr (MulHigh, IfThenElse, Not)
+
+#if HWY_TARGET > HWY_AVX3  // AVX2
+namespace detail {
+
+template <class V>
+HWY_INLINE V AVX2ShrU16Vec256(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int32_t, decltype(dh)> di32;
+  const Rebind<uint32_t, decltype(dh)> du32;
+
+  const auto lo_shr_result =
+      PromoteTo(du32, LowerHalf(dh, v)) >> PromoteTo(du32, LowerHalf(dh, bits));
+  const auto hi_shr_result =
+      PromoteTo(du32, UpperHalf(dh, v)) >> PromoteTo(du32, UpperHalf(dh, bits));
+  return OrderedDemote2To(d, BitCast(di32, lo_shr_result),
+                          BitCast(di32, hi_shr_result));
+}
+
+}  // namespace detail
+#endif
+
+HWY_API Vec256<uint16_t> operator>>(Vec256<uint16_t> v, Vec256<uint16_t> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<uint16_t>{_mm256_srlv_epi16(v.raw, bits.raw)};
+#else
+  return detail::AVX2ShrU16Vec256(v, bits);
+#endif
+}
+
+// 8-bit uses 16-bit shifts.
+HWY_API Vec256<uint8_t> operator>>(Vec256<uint8_t> v, Vec256<uint8_t> bits) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW mask = Set(dw, 0x00FF);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  const VW evens = And(vw, mask) >> And(bits16, mask);
+  // Shift odd lanes in-place
+  const VW odds = vw >> ShiftRight<8>(bits16);
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+}
+
+HWY_API Vec256<uint32_t> operator>>(Vec256<uint32_t> v, Vec256<uint32_t> bits) {
+  return Vec256<uint32_t>{_mm256_srlv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<uint64_t> operator>>(Vec256<uint64_t> v, Vec256<uint64_t> bits) {
+  return Vec256<uint64_t>{_mm256_srlv_epi64(v.raw, bits.raw)};
+}
+
+#if HWY_TARGET > HWY_AVX3  // AVX2
+namespace detail {
+
+template <class V>
+HWY_INLINE V AVX2ShrI16Vec256(V v, V bits) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int32_t, decltype(dh)> di32;
+
+  const auto lo_shr_result =
+      PromoteTo(di32, LowerHalf(dh, v)) >> PromoteTo(di32, LowerHalf(dh, bits));
+  const auto hi_shr_result =
+      PromoteTo(di32, UpperHalf(dh, v)) >> PromoteTo(di32, UpperHalf(dh, bits));
+  return OrderedDemote2To(d, lo_shr_result, hi_shr_result);
+}
+
+}  // namespace detail
+#endif
+
+HWY_API Vec256<int16_t> operator>>(Vec256<int16_t> v, Vec256<int16_t> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int16_t>{_mm256_srav_epi16(v.raw, bits.raw)};
+#else
+  return detail::AVX2ShrI16Vec256(v, bits);
+#endif
+}
+
+// 8-bit uses 16-bit shifts.
+HWY_API Vec256<int8_t> operator>>(Vec256<int8_t> v, Vec256<int8_t> bits) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const RebindToUnsigned<decltype(dw)> dw_u;
+  using VW = VFromD<decltype(dw)>;
+  const VW mask = Set(dw, 0x00FF);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  const VW evens = ShiftRight<8>(ShiftLeft<8>(vw)) >> And(bits16, mask);
+  // Shift odd lanes in-place
+  const VW odds = vw >> BitCast(dw, ShiftRight<8>(BitCast(dw_u, bits16)));
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+}
+
+HWY_API Vec256<int32_t> operator>>(Vec256<int32_t> v, Vec256<int32_t> bits) {
+  return Vec256<int32_t>{_mm256_srav_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<int64_t> operator>>(Vec256<int64_t> v, Vec256<int64_t> bits) {
+#if HWY_TARGET <= HWY_AVX3
+  return Vec256<int64_t>{_mm256_srav_epi64(v.raw, bits.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  return detail::SignedShr(d, v, bits);
+#endif
+}
+
+HWY_INLINE Vec256<uint64_t> MulEven(const Vec256<uint64_t> a,
+                                    const Vec256<uint64_t> b) {
+  const Full256<uint64_t> du64;
+  const RepartitionToNarrow<decltype(du64)> du32;
+  const auto maskL = Set(du64, 0xFFFFFFFFULL);
+  const auto a32 = BitCast(du32, a);
+  const auto b32 = BitCast(du32, b);
+  // Inputs for MulEven: we only need the lower 32 bits
+  const auto aH = Shuffle2301(a32);
+  const auto bH = Shuffle2301(b32);
+
+  // Knuth double-word multiplication. We use 32x32 = 64 MulEven and only need
+  // the even (lower 64 bits of every 128-bit block) results. See
+  // https://github.com/hcs0/Hackers-Delight/blob/master/muldwu.c.tat
+  const auto aLbL = MulEven(a32, b32);
+  const auto w3 = aLbL & maskL;
+
+  const auto t2 = MulEven(aH, b32) + ShiftRight<32>(aLbL);
+  const auto w2 = t2 & maskL;
+  const auto w1 = ShiftRight<32>(t2);
+
+  const auto t = MulEven(a32, bH) + w2;
+  const auto k = ShiftRight<32>(t);
+
+  const auto mulH = MulEven(aH, bH) + w1 + k;
+  const auto mulL = ShiftLeft<32>(t) + w3;
+  return InterleaveLower(mulL, mulH);
+}
+
+HWY_INLINE Vec256<uint64_t> MulOdd(const Vec256<uint64_t> a,
+                                   const Vec256<uint64_t> b) {
+  const Full256<uint64_t> du64;
+  const RepartitionToNarrow<decltype(du64)> du32;
+  const auto maskL = Set(du64, 0xFFFFFFFFULL);
+  const auto a32 = BitCast(du32, a);
+  const auto b32 = BitCast(du32, b);
+  // Inputs for MulEven: we only need bits [95:64] (= upper half of input)
+  const auto aH = Shuffle2301(a32);
+  const auto bH = Shuffle2301(b32);
+
+  // Same as above, but we're using the odd results (upper 64 bits per block).
+  const auto aLbL = MulEven(a32, b32);
+  const auto w3 = aLbL & maskL;
+
+  const auto t2 = MulEven(aH, b32) + ShiftRight<32>(aLbL);
+  const auto w2 = t2 & maskL;
+  const auto w1 = ShiftRight<32>(t2);
+
+  const auto t = MulEven(a32, bH) + w2;
+  const auto k = ShiftRight<32>(t);
+
+  const auto mulH = MulEven(aH, bH) + w1 + k;
+  const auto mulL = ShiftLeft<32>(t) + w3;
+  return InterleaveUpper(du64, mulL, mulH);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D /*d32*/, Vec256<int16_t> a,
+                                      Vec256<int16_t> b) {
+  return VFromD<D>{_mm256_madd_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ SatWidenMulPairwiseAdd
+
+template <class DI16, HWY_IF_V_SIZE_D(DI16, 32), HWY_IF_I16_D(DI16)>
+HWY_API VFromD<DI16> SatWidenMulPairwiseAdd(
+    DI16 /* tag */, VFromD<Repartition<uint8_t, DI16>> a,
+    VFromD<Repartition<int8_t, DI16>> b) {
+  return VFromD<DI16>{_mm256_maddubs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ ReorderWidenMulAccumulate
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(D d, Vec256<int16_t> a,
+                                            Vec256<int16_t> b,
+                                            const VFromD<D> sum0,
+                                            VFromD<D>& /*sum1*/) {
+  (void)d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  return VFromD<D>{_mm256_dpwssd_epi32(sum0.raw, a.raw, b.raw)};
+#else
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+#endif
+}
+
+// ------------------------------ RearrangeToOddPlusEven
+HWY_API Vec256<int32_t> RearrangeToOddPlusEven(const Vec256<int32_t> sum0,
+                                               Vec256<int32_t> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+HWY_API Vec256<uint32_t> RearrangeToOddPlusEven(const Vec256<uint32_t> sum0,
+                                                Vec256<uint32_t> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+// ------------------------------ SumOfMulQuadAccumulate
+
+#if HWY_TARGET <= HWY_AVX3_DL
+
+template <class DI32, HWY_IF_V_SIZE_D(DI32, 32)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(
+    DI32 /*di32*/, VFromD<Repartition<uint8_t, DI32>> a_u,
+    VFromD<Repartition<int8_t, DI32>> b_i, VFromD<DI32> sum) {
+  return VFromD<DI32>{_mm256_dpbusd_epi32(sum.raw, a_u.raw, b_i.raw)};
+}
+
+#endif
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<float> v) {
+  return VFromD<D>{_mm256_cvtps_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int32_t> v) {
+  return VFromD<D>{_mm256_cvtepi32_pd(v.raw)};
+}
+
+// Unsigned: zero-extend.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then Zip* would be faster.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint8_t> v) {
+  return VFromD<D>{_mm256_cvtepu8_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint8_t, 8> v) {
+  return VFromD<D>{_mm256_cvtepu8_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint16_t> v) {
+  return VFromD<D>{_mm256_cvtepu16_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint32_t> v) {
+  return VFromD<D>{_mm256_cvtepu32_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec64<uint16_t> v) {
+  return VFromD<D>{_mm256_cvtepu16_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec32<uint8_t> v) {
+  return VFromD<D>{_mm256_cvtepu8_epi64(v.raw)};
+}
+
+// Signed: replicate sign bit.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then ZipUpper/lo followed by
+// signed shift would be faster.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int8_t> v) {
+  return VFromD<D>{_mm256_cvtepi8_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int8_t, 8> v) {
+  return VFromD<D>{_mm256_cvtepi8_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int16_t> v) {
+  return VFromD<D>{_mm256_cvtepi16_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int32_t> v) {
+  return VFromD<D>{_mm256_cvtepi32_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec64<int16_t> v) {
+  return VFromD<D>{_mm256_cvtepi16_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec32<int8_t> v) {
+  return VFromD<D>{_mm256_cvtepi8_epi64(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const __m256i u16 = _mm256_packus_epi32(v.raw, v.raw);
+  // Concatenating lower halves of both 128-bit blocks afterward is more
+  // efficient than an extra input with low block = high block of v.
+  return VFromD<D>{_mm256_castsi256_si128(_mm256_permute4x64_epi64(u16, 0x88))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, Vec256<uint32_t> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return DemoteTo(dn, BitCast(di, Min(v, Set(d, 0x7FFFFFFFu))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const __m256i i16 = _mm256_packs_epi32(v.raw, v.raw);
+  return VFromD<D>{_mm256_castsi256_si128(_mm256_permute4x64_epi64(i16, 0x88))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const __m256i i16_blocks = _mm256_packs_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i i16_concat = _mm256_permute4x64_epi64(i16_blocks, 0x88);
+  const __m128i i16 = _mm256_castsi256_si128(i16_concat);
+  return VFromD<D>{_mm_packus_epi16(i16, i16)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, Vec256<uint32_t> v) {
+#if HWY_TARGET <= HWY_AVX3
+  (void)dn;
+  return VFromD<D>{_mm256_cvtusepi32_epi8(v.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return DemoteTo(dn, BitCast(di, Min(v, Set(d, 0x7FFFFFFFu))));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int16_t> v) {
+  const __m256i u8 = _mm256_packus_epi16(v.raw, v.raw);
+  return VFromD<D>{_mm256_castsi256_si128(_mm256_permute4x64_epi64(u8, 0x88))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, Vec256<uint16_t> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return DemoteTo(dn, BitCast(di, Min(v, Set(d, 0x7FFFu))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int32_t> v) {
+  const __m256i i16_blocks = _mm256_packs_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i i16_concat = _mm256_permute4x64_epi64(i16_blocks, 0x88);
+  const __m128i i16 = _mm256_castsi256_si128(i16_concat);
+  return VFromD<D>{_mm_packs_epi16(i16, i16)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int16_t> v) {
+  const __m256i i8 = _mm256_packs_epi16(v.raw, v.raw);
+  return VFromD<D>{_mm256_castsi256_si128(_mm256_permute4x64_epi64(i8, 0x88))};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  return VFromD<D>{_mm256_cvtsepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  return VFromD<D>{_mm256_cvtsepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  return VFromD<D>{_mm256_cvtsepi64_epi8(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm256_maskz_cvtusepi64_epi32(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm256_maskz_cvtusepi64_epi16(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm256_maskz_cvtusepi64_epi8(non_neg_mask, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<uint64_t> v) {
+  return VFromD<D>{_mm256_cvtusepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<uint64_t> v) {
+  return VFromD<D>{_mm256_cvtusepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<uint64_t> v) {
+  return VFromD<D>{_mm256_cvtusepi64_epi8(v.raw)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+#ifndef HWY_DISABLE_F16C
+
+// Avoid "value of intrinsic immediate argument '8' is out of range '0 - 7'".
+// 8 is the correct value of _MM_FROUND_NO_EXC, which is allowed here.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4556, ignored "-Wsign-conversion")
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D df16, Vec256<float> v) {
+  (void)df16;
+  return VFromD<D>{_mm256_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+#endif  // HWY_DISABLE_F16C
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, Vec256<float> v) {
+  // TODO(janwas): _mm256_cvtneps_pbh once we have avx512bf16.
+  const Rebind<int32_t, decltype(dbf16)> di32;
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = BitCast(di32, ShiftRight<16>(BitCast(du32, v)));
+  return BitCast(dbf16, DemoteTo(du16, bits_in_32));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, Vec256<float> a, Vec256<float> b) {
+  // TODO(janwas): _mm256_cvtne2ps_pbh once we have avx512bf16.
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const Vec256<uint32_t> b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /*d16*/, Vec256<int32_t> a,
+                                   Vec256<int32_t> b) {
+  return VFromD<D>{_mm256_packs_epi32(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /*d16*/, Vec256<int32_t> a,
+                                   Vec256<int32_t> b) {
+  return VFromD<D>{_mm256_packus_epi32(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec256<uint32_t> a,
+                                   Vec256<uint32_t> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto max_i32 = Set(d, 0x7FFFFFFFu);
+  return ReorderDemote2To(dn, BitCast(di, Min(a, max_i32)),
+                          BitCast(di, Min(b, max_i32)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /*d16*/, Vec256<int16_t> a,
+                                   Vec256<int16_t> b) {
+  return VFromD<D>{_mm256_packs_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /*d16*/, Vec256<int16_t> a,
+                                   Vec256<int16_t> b) {
+  return VFromD<D>{_mm256_packus_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec256<uint16_t> a,
+                                   Vec256<uint16_t> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToSigned<decltype(d)> di;
+  const auto max_i16 = Set(d, 0x7FFFu);
+  return ReorderDemote2To(dn, BitCast(di, Min(a, max_i16)),
+                          BitCast(di, Min(b, max_i16)));
+}
+
+#if HWY_TARGET > HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API Vec256<int32_t> ReorderDemote2To(D dn, Vec256<int64_t> a,
+                                         Vec256<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+  const Repartition<float, decltype(dn)> dn_f;
+
+  // Negative values are saturated by first saturating their bitwise inverse
+  // and then inverting the saturation result
+  const auto invert_mask_a = BitCast(du64, BroadcastSignBit(a));
+  const auto invert_mask_b = BitCast(du64, BroadcastSignBit(b));
+  const auto saturated_a = Xor(
+      invert_mask_a,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_a, BitCast(du64, a))));
+  const auto saturated_b = Xor(
+      invert_mask_b,
+      detail::DemoteFromU64Saturate(dnh, Xor(invert_mask_b, BitCast(du64, b))));
+
+  return BitCast(dn,
+                 Vec256<float>{_mm256_shuffle_ps(BitCast(dn_f, saturated_a).raw,
+                                                 BitCast(dn_f, saturated_b).raw,
+                                                 _MM_SHUFFLE(2, 0, 2, 0))});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API Vec256<uint32_t> ReorderDemote2To(D dn, Vec256<int64_t> a,
+                                          Vec256<int64_t> b) {
+  const DFromV<decltype(a)> di64;
+  const RebindToUnsigned<decltype(di64)> du64;
+  const Half<decltype(dn)> dnh;
+  const Repartition<float, decltype(dn)> dn_f;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(a), a)));
+  const auto saturated_b = detail::DemoteFromU64Saturate(
+      dnh, BitCast(du64, AndNot(BroadcastSignBit(b), b)));
+
+  return BitCast(dn,
+                 Vec256<float>{_mm256_shuffle_ps(BitCast(dn_f, saturated_a).raw,
+                                                 BitCast(dn_f, saturated_b).raw,
+                                                 _MM_SHUFFLE(2, 0, 2, 0))});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API Vec256<uint32_t> ReorderDemote2To(D dn, Vec256<uint64_t> a,
+                                          Vec256<uint64_t> b) {
+  const Half<decltype(dn)> dnh;
+  const Repartition<float, decltype(dn)> dn_f;
+
+  const auto saturated_a = detail::DemoteFromU64Saturate(dnh, a);
+  const auto saturated_b = detail::DemoteFromU64Saturate(dnh, b);
+
+  return BitCast(dn,
+                 Vec256<float>{_mm256_shuffle_ps(BitCast(dn_f, saturated_a).raw,
+                                                 BitCast(dn_f, saturated_b).raw,
+                                                 _MM_SHUFFLE(2, 0, 2, 0))});
+}
+#endif  // HWY_TARGET > HWY_AVX3
+
+template <class D, class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2),
+          HWY_IF_T_SIZE_ONE_OF_V(V,
+                                 (1 << 1) | (1 << 2) | (1 << 4) |
+                                     ((HWY_TARGET > HWY_AVX3) ? (1 << 8) : 0))>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return VFromD<D>{_mm256_permute4x64_epi64(ReorderDemote2To(d, a, b).raw,
+                                            _MM_SHUFFLE(3, 1, 2, 0))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<double> v) {
+  return VFromD<D>{_mm256_cvtpd_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec256<double> v) {
+  const Full256<double> d64;
+  const auto clamped = detail::ClampF64ToI32Max(d64, v);
+  return VFromD<D>{_mm256_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+HWY_API Vec128<uint8_t, 8> U8FromU32(const Vec256<uint32_t> v) {
+  const Full256<uint32_t> d32;
+  const Full64<uint8_t> d8;
+  alignas(32) static constexpr uint32_t k8From32[8] = {
+      0x0C080400u, ~0u, ~0u, ~0u, ~0u, 0x0C080400u, ~0u, ~0u};
+  // Place first four bytes in lo[0], remaining 4 in hi[1].
+  const auto quad = TableLookupBytes(v, Load(d32, k8From32));
+  // Interleave both quadruplets - OR instead of unpack reduces port5 pressure.
+  const auto lo = LowerHalf(quad);
+  const auto hi = UpperHalf(Half<decltype(d32)>(), quad);
+  return BitCast(d8, LowerHalf(lo | hi));
+}
+
+// ------------------------------ Truncations
+
+namespace detail {
+
+// LO and HI each hold four indices of bytes within a 128-bit block.
+template <uint32_t LO, uint32_t HI, typename T>
+HWY_INLINE Vec128<uint32_t> LookupAndConcatHalves(Vec256<T> v) {
+  const Full256<uint32_t> d32;
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(32) static constexpr uint32_t kMap[8] = {
+      LO, HI, 0x10101010 + LO, 0x10101010 + HI, 0, 0, 0, 0};
+  const auto result = _mm256_permutexvar_epi8(Load(d32, kMap).raw, v.raw);
+#else
+  alignas(32) static constexpr uint32_t kMap[8] = {LO,  HI,  ~0u, ~0u,
+                                                   ~0u, ~0u, LO,  HI};
+  const auto quad = TableLookupBytes(v, Load(d32, kMap));
+  const auto result = _mm256_permute4x64_epi64(quad.raw, 0xCC);
+  // Possible alternative:
+  // const auto lo = LowerHalf(quad);
+  // const auto hi = UpperHalf(Half<decltype(d32)>(), quad);
+  // const auto result = lo | hi;
+#endif
+
+  return Vec128<uint32_t>{_mm256_castsi256_si128(result)};
+}
+
+// LO and HI each hold two indices of bytes within a 128-bit block.
+template <uint16_t LO, uint16_t HI, typename T>
+HWY_INLINE Vec128<uint32_t, 2> LookupAndConcatQuarters(Vec256<T> v) {
+  const Full256<uint16_t> d16;
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(32) static constexpr uint16_t kMap[16] = {
+      LO, HI, 0x1010 + LO, 0x1010 + HI, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  const auto result = _mm256_permutexvar_epi8(Load(d16, kMap).raw, v.raw);
+  return LowerHalf(Vec128<uint32_t>{_mm256_castsi256_si128(result)});
+#else
+  constexpr uint16_t ff = static_cast<uint16_t>(~0u);
+  alignas(32) static constexpr uint16_t kMap[16] = {
+      LO, ff, HI, ff, ff, ff, ff, ff, ff, ff, ff, ff, LO, ff, HI, ff};
+  const auto quad = TableLookupBytes(v, Load(d16, kMap));
+  const auto mixed = _mm256_permute4x64_epi64(quad.raw, 0xCC);
+  const auto half = _mm256_castsi256_si128(mixed);
+  return LowerHalf(Vec128<uint32_t>{_mm_packus_epi32(half, half)});
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 4), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  const Full256<uint32_t> d32;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(32) static constexpr uint32_t kMap[8] = {0x18100800u, 0, 0, 0,
+                                                   0,           0, 0, 0};
+  const auto result = _mm256_permutexvar_epi8(Load(d32, kMap).raw, v.raw);
+  return LowerHalf(LowerHalf(LowerHalf(Vec256<uint8_t>{result})));
+#else
+  alignas(32) static constexpr uint32_t kMap[8] = {0xFFFF0800u, ~0u, ~0u, ~0u,
+                                                   0x0800FFFFu, ~0u, ~0u, ~0u};
+  const auto quad = TableLookupBytes(v, Load(d32, kMap));
+  const auto lo = LowerHalf(quad);
+  const auto hi = UpperHalf(Half<decltype(d32)>(), quad);
+  const auto result = lo | hi;
+  return LowerHalf(LowerHalf(Vec128<uint8_t>{result.raw}));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  const auto result = detail::LookupAndConcatQuarters<0x100, 0x908>(v);
+  return VFromD<D>{result.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint64_t> v) {
+  const Full256<uint32_t> d32;
+  alignas(32) static constexpr uint32_t kEven[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto v32 =
+      TableLookupLanes(BitCast(d32, v), SetTableIndices(d32, kEven));
+  return LowerHalf(Vec256<uint32_t>{v32.raw});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint32_t> v) {
+  const auto full = detail::LookupAndConcatQuarters<0x400, 0xC08>(v);
+  return VFromD<D>{full.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint32_t> v) {
+  const auto full = detail::LookupAndConcatHalves<0x05040100, 0x0D0C0908>(v);
+  return VFromD<D>{full.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, Vec256<uint16_t> v) {
+  const auto full = detail::LookupAndConcatHalves<0x06040200, 0x0E0C0A08>(v);
+  return VFromD<D>{full.raw};
+}
+
+// ------------------------------ Integer <=> fp (ShiftRight, OddEven)
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec256<uint16_t> v) {
+  return VFromD<D>{_mm256_cvtepu16_ph(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec256<int16_t> v) {
+  return VFromD<D>{_mm256_cvtepi16_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec256<int32_t> v) {
+  return VFromD<D>{_mm256_cvtepi32_ps(v.raw)};
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*df*/, Vec256<uint32_t> v) {
+  return VFromD<D>{_mm256_cvtepu32_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*dd*/, Vec256<int64_t> v) {
+  return VFromD<D>{_mm256_cvtepi64_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /*dd*/, Vec256<uint64_t> v) {
+  return VFromD<D>{_mm256_cvtepu64_pd(v.raw)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// Truncates (rounds toward zero).
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ConvertTo(D d, Vec256<float16_t> v) {
+  return detail::FixConversionOverflow(d, v,
+                                       VFromD<D>{_mm256_cvttph_epi16(v.raw)});
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ConvertTo(D d, Vec256<float> v) {
+  return detail::FixConversionOverflow(d, v,
+                                       VFromD<D>{_mm256_cvttps_epi32(v.raw)});
+}
+
+#if HWY_TARGET <= HWY_AVX3
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> ConvertTo(D di, Vec256<double> v) {
+  return detail::FixConversionOverflow(di, v,
+                                       VFromD<D>{_mm256_cvttpd_epi64(v.raw)});
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+HWY_API Vec256<int32_t> NearestInt(const Vec256<float> v) {
+  const Full256<int32_t> di;
+  return detail::FixConversionOverflow(
+      di, v, Vec256<int32_t>{_mm256_cvtps_epi32(v.raw)});
+}
+
+#ifndef HWY_DISABLE_F16C
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, Vec128<float16_t> v) {
+  (void)df32;
+#if HWY_HAVE_FLOAT16
+  const RebindToUnsigned<DFromV<decltype(v)>> du16;
+  return VFromD<D>{_mm256_cvtph_ps(BitCast(du16, v).raw)};
+#else
+  return VFromD<D>{_mm256_cvtph_ps(v.raw)};
+#endif  // HWY_HAVE_FLOAT16
+}
+
+#endif  // HWY_DISABLE_F16C
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, Vec128<bfloat16_t> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+// ================================================== CRYPTO
+
+#if !defined(HWY_DISABLE_PCLMUL_AES)
+
+HWY_API Vec256<uint8_t> AESRound(Vec256<uint8_t> state,
+                                 Vec256<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint8_t>{_mm256_aesenc_epi128(state.raw, round_key.raw)};
+#else
+  const Full256<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, AESRound(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESRound(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec256<uint8_t> AESLastRound(Vec256<uint8_t> state,
+                                     Vec256<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint8_t>{_mm256_aesenclast_epi128(state.raw, round_key.raw)};
+#else
+  const Full256<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d,
+                 AESLastRound(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESLastRound(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec256<uint8_t> AESRoundInv(Vec256<uint8_t> state,
+                                    Vec256<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint8_t>{_mm256_aesdec_epi128(state.raw, round_key.raw)};
+#else
+  const Full256<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, AESRoundInv(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESRoundInv(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec256<uint8_t> AESLastRoundInv(Vec256<uint8_t> state,
+                                        Vec256<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint8_t>{_mm256_aesdeclast_epi128(state.raw, round_key.raw)};
+#else
+  const Full256<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(
+      d, AESLastRoundInv(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+      AESLastRoundInv(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+template <class V, HWY_IF_V_SIZE_GT_V(V, 16), HWY_IF_U8_D(DFromV<V>)>
+HWY_API V AESInvMixColumns(V state) {
+  const DFromV<decltype(state)> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  // On AVX3_DL, it is more efficient to do an InvMixColumns operation for a
+  // 256-bit or 512-bit vector by doing a AESLastRound operation
+  // (_mm256_aesenclast_epi128/_mm512_aesenclast_epi128) followed by a
+  // AESRoundInv operation (_mm256_aesdec_epi128/_mm512_aesdec_epi128) than to
+  // split the vector into 128-bit vectors, carrying out multiple
+  // _mm_aesimc_si128 operations, and then combining the _mm_aesimc_si128
+  // results back into a 256-bit or 512-bit vector.
+  const auto zero = Zero(d);
+  return AESRoundInv(AESLastRound(state, zero), zero);
+#else
+  const Half<decltype(d)> dh;
+  return Combine(d, AESInvMixColumns(UpperHalf(dh, state)),
+                 AESInvMixColumns(LowerHalf(dh, state)));
+#endif
+}
+
+template <uint8_t kRcon>
+HWY_API Vec256<uint8_t> AESKeyGenAssist(Vec256<uint8_t> v) {
+  const Full256<uint8_t> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(16) static constexpr uint8_t kRconXorMask[16] = {
+      0, kRcon, 0, 0, 0, 0, 0, 0, 0, kRcon, 0, 0, 0, 0, 0, 0};
+  alignas(16) static constexpr uint8_t kRotWordShuffle[16] = {
+      0, 13, 10, 7, 1, 14, 11, 4, 8, 5, 2, 15, 9, 6, 3, 12};
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto w13 = BitCast(d, DupOdd(BitCast(du32, v)));
+  const auto sub_word_result = AESLastRound(w13, LoadDup128(d, kRconXorMask));
+  return TableLookupBytes(sub_word_result, LoadDup128(d, kRotWordShuffle));
+#else
+  const Half<decltype(d)> d2;
+  return Combine(d, AESKeyGenAssist<kRcon>(UpperHalf(d2, v)),
+                 AESKeyGenAssist<kRcon>(LowerHalf(v)));
+#endif
+}
+
+HWY_API Vec256<uint64_t> CLMulLower(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint64_t>{_mm256_clmulepi64_epi128(a.raw, b.raw, 0x00)};
+#else
+  const Full256<uint64_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, CLMulLower(UpperHalf(d2, a), UpperHalf(d2, b)),
+                 CLMulLower(LowerHalf(a), LowerHalf(b)));
+#endif
+}
+
+HWY_API Vec256<uint64_t> CLMulUpper(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec256<uint64_t>{_mm256_clmulepi64_epi128(a.raw, b.raw, 0x11)};
+#else
+  const Full256<uint64_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, CLMulUpper(UpperHalf(d2, a), UpperHalf(d2, b)),
+                 CLMulUpper(LowerHalf(a), LowerHalf(b)));
+#endif
+}
+
+#endif  // HWY_DISABLE_PCLMUL_AES
+
+// ================================================== MISC
+
+#if HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ LoadMaskBits
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  constexpr size_t kN = MaxLanes(d);
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+
+  uint64_t mask_bits = 0;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+
+  return MFromD<D>::FromBits(mask_bits);
+}
+
+// ------------------------------ StoreMaskBits
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  constexpr size_t kN = MaxLanes(d);
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+
+  CopyBytes<kNumBytes>(&mask.raw, bits);
+
+  // Non-full byte, need to clear the undefined upper bits.
+  if (kN < 8) {
+    const int mask_bits = static_cast<int>((1ull << kN) - 1);
+    bits[0] = static_cast<uint8_t>(bits[0] & mask_bits);
+  }
+  return kNumBytes;
+}
+
+// ------------------------------ Mask testing
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t CountTrue(D /* tag */, MFromD<D> mask) {
+  return PopCount(static_cast<uint64_t>(mask.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, MFromD<D> mask) {
+  return Num0BitsBelowLS1Bit_Nonzero32(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+  return mask.raw ? static_cast<intptr_t>(FindKnownFirstTrue(d, mask))
+                  : intptr_t{-1};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t FindKnownLastTrue(D /* tag */, MFromD<D> mask) {
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+  return mask.raw ? static_cast<intptr_t>(FindKnownLastTrue(d, mask))
+                  : intptr_t{-1};
+}
+
+// Beware: the suffix indicates the number of mask bits, not lane size!
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<1> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask32_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<2> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask16_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<4> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask8_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<8> /*tag*/, const Mask256<T> mask) {
+  return (uint64_t{mask.raw} & 0xF) == 0;
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API bool AllFalse(D /* tag */, MFromD<D> mask) {
+  return detail::AllFalse(hwy::SizeTag<sizeof(TFromD<D>)>(), mask);
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<1> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask32_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFFFFFFFu;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<2> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask16_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFFFu;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<4> /*tag*/, const Mask256<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask8_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFu;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<8> /*tag*/, const Mask256<T> mask) {
+  // Cannot use _kortestc because we have less than 8 mask bits.
+  return mask.raw == 0xFu;
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API bool AllTrue(D /* tag */, const MFromD<D> mask) {
+  return detail::AllTrue(hwy::SizeTag<sizeof(TFromD<D>)>(), mask);
+}
+
+// ------------------------------ Compress
+
+// 16-bit is defined in x86_512 so we can use 512-bit vectors.
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> Compress(Vec256<T> v, Mask256<T> mask) {
+  return Vec256<T>{_mm256_maskz_compress_epi32(mask.raw, v.raw)};
+}
+
+HWY_API Vec256<float> Compress(Vec256<float> v, Mask256<float> mask) {
+  return Vec256<float>{_mm256_maskz_compress_ps(mask.raw, v.raw)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> Compress(Vec256<T> v, Mask256<T> mask) {
+  // See CompressIsPartition.
+  alignas(16) static constexpr uint64_t packed_array[16] = {
+      // PrintCompress64x4NibbleTables
+      0x00003210, 0x00003210, 0x00003201, 0x00003210, 0x00003102, 0x00003120,
+      0x00003021, 0x00003210, 0x00002103, 0x00002130, 0x00002031, 0x00002310,
+      0x00001032, 0x00001320, 0x00000321, 0x00003210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 2) -
+  // _mm256_permutexvar_epi64 will ignore the upper bits.
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du64;
+  const auto packed = Set(du64, packed_array[mask.raw]);
+  alignas(64) static constexpr uint64_t shifts[4] = {0, 4, 8, 12};
+  const auto indices = Indices256<T>{(packed >> Load(du64, shifts)).raw};
+  return TableLookupLanes(v, indices);
+}
+
+// ------------------------------ CompressNot (Compress)
+
+// Implemented in x86_512 for lane size != 8.
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> CompressNot(Vec256<T> v, Mask256<T> mask) {
+  // See CompressIsPartition.
+  alignas(16) static constexpr uint64_t packed_array[16] = {
+      // PrintCompressNot64x4NibbleTables
+      0x00003210, 0x00000321, 0x00001320, 0x00001032, 0x00002310, 0x00002031,
+      0x00002130, 0x00002103, 0x00003210, 0x00003021, 0x00003120, 0x00003102,
+      0x00003210, 0x00003201, 0x00003210, 0x00003210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 2) -
+  // _mm256_permutexvar_epi64 will ignore the upper bits.
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du64;
+  const auto packed = Set(du64, packed_array[mask.raw]);
+  alignas(32) static constexpr uint64_t shifts[4] = {0, 4, 8, 12};
+  const auto indices = Indices256<T>{(packed >> Load(du64, shifts)).raw};
+  return TableLookupLanes(v, indices);
+}
+
+// ------------------------------ CompressStore (defined in x86_512)
+// ------------------------------ CompressBlendedStore (defined in x86_512)
+// ------------------------------ CompressBitsStore (defined in x86_512)
+
+#else  // AVX2
+
+// ------------------------------ LoadMaskBits (TestBit)
+
+namespace detail {
+
+// 256 suffix avoids ambiguity with x86_128 without needing HWY_IF_V_SIZE.
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Mask256<T> LoadMaskBits256(uint64_t mask_bits) {
+  const Full256<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto vbits = BitCast(du, Set(du32, static_cast<uint32_t>(mask_bits)));
+
+  // Replicate bytes 8x such that each byte contains the bit that governs it.
+  const Repartition<uint64_t, decltype(d)> du64;
+  alignas(32) static constexpr uint64_t kRep8[4] = {
+      0x0000000000000000ull, 0x0101010101010101ull, 0x0202020202020202ull,
+      0x0303030303030303ull};
+  const auto rep8 = TableLookupBytes(vbits, BitCast(du, Load(du64, kRep8)));
+
+  alignas(32) static constexpr uint8_t kBit[16] = {1, 2, 4, 8, 16, 32, 64, 128,
+                                                   1, 2, 4, 8, 16, 32, 64, 128};
+  return RebindMask(d, TestBit(rep8, LoadDup128(du, kBit)));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Mask256<T> LoadMaskBits256(uint64_t mask_bits) {
+  const Full256<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(32) static constexpr uint16_t kBit[16] = {
+      1,     2,     4,     8,     16,     32,     64,     128,
+      0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
+  const auto vmask_bits = Set(du, static_cast<uint16_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Mask256<T> LoadMaskBits256(uint64_t mask_bits) {
+  const Full256<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(32) static constexpr uint32_t kBit[8] = {1, 2, 4, 8, 16, 32, 64, 128};
+  const auto vmask_bits = Set(du, static_cast<uint32_t>(mask_bits));
+  return RebindMask(d, TestBit(vmask_bits, Load(du, kBit)));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Mask256<T> LoadMaskBits256(uint64_t mask_bits) {
+  const Full256<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(32) static constexpr uint64_t kBit[8] = {1, 2, 4, 8};
+  return RebindMask(d, TestBit(Set(du, mask_bits), Load(du, kBit)));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API MFromD<D> LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
+  constexpr size_t kN = MaxLanes(d);
+  constexpr size_t kNumBytes = (kN + 7) / 8;
+
+  uint64_t mask_bits = 0;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+
+  if (kN < 8) {
+    mask_bits &= (1ull << kN) - 1;
+  }
+
+  return detail::LoadMaskBits256<TFromD<D>>(mask_bits);
+}
+
+// ------------------------------ StoreMaskBits
+
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE uint64_t BitsFromMask(const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<uint8_t> d8;
+  const auto sign_bits = BitCast(d8, VecFromMask(d, mask)).raw;
+  // Prevent sign-extension of 32-bit masks because the intrinsic returns int.
+  return static_cast<uint32_t>(_mm256_movemask_epi8(sign_bits));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE uint64_t BitsFromMask(const Mask256<T> mask) {
+#if !defined(HWY_DISABLE_BMI2_FMA) && !defined(HWY_DISABLE_PEXT_ON_AVX2)
+  const Full256<T> d;
+  const Full256<uint8_t> d8;
+  const Mask256<uint8_t> mask8 = MaskFromVec(BitCast(d8, VecFromMask(d, mask)));
+  const uint64_t sign_bits8 = BitsFromMask(mask8);
+  // Skip the bits from the lower byte of each u16 (better not to use the
+  // same packs_epi16 as SSE4, because that requires an extra swizzle here).
+  return _pext_u32(static_cast<uint32_t>(sign_bits8), 0xAAAAAAAAu);
+#else
+  // Slow workaround for when BMI2 is disabled
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  // Bytes [0, 8) and [16, 24) have the same sign bits as the input lanes.
+  const auto sign_bits = _mm256_packs_epi16(mask.raw, _mm256_setzero_si256());
+  // Move odd qwords (value zero) to top so they don't affect the mask value.
+  const auto compressed = _mm256_castsi256_si128(
+      _mm256_permute4x64_epi64(sign_bits, _MM_SHUFFLE(3, 1, 2, 0)));
+  return static_cast<unsigned>(_mm_movemask_epi8(compressed));
+#endif  // HWY_ARCH_X86_64
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE uint64_t BitsFromMask(const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<float> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask)).raw;
+  return static_cast<unsigned>(_mm256_movemask_ps(sign_bits));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE uint64_t BitsFromMask(const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<double> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask)).raw;
+  return static_cast<unsigned>(_mm256_movemask_pd(sign_bits));
+}
+
+}  // namespace detail
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t StoreMaskBits(D d, MFromD<D> mask, uint8_t* bits) {
+  constexpr size_t N = Lanes(d);
+  constexpr size_t kNumBytes = (N + 7) / 8;
+
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  CopyBytes<kNumBytes>(&mask_bits, bits);
+  return kNumBytes;
+}
+
+// ------------------------------ Mask testing
+
+// Specialize for 16-bit lanes to avoid unnecessary pext. This assumes each mask
+// lane is 0 or ~0.
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API bool AllFalse(D d, MFromD<D> mask) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Mask256<uint8_t> mask8 = MaskFromVec(BitCast(d8, VecFromMask(d, mask)));
+  return detail::BitsFromMask(mask8) == 0;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_T_SIZE_D(D, 2)>
+HWY_API bool AllFalse(D /* tag */, MFromD<D> mask) {
+  // Cheaper than PTEST, which is 2 uop / 3L.
+  return detail::BitsFromMask(mask) == 0;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Mask256<uint8_t> mask8 = MaskFromVec(BitCast(d8, VecFromMask(d, mask)));
+  return detail::BitsFromMask(mask8) == (1ull << 32) - 1;
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_T_SIZE_D(D, 2)>
+HWY_API bool AllTrue(D d, MFromD<D> mask) {
+  constexpr uint64_t kAllBits = (1ull << Lanes(d)) - 1;
+  return detail::BitsFromMask(mask) == kAllBits;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API size_t CountTrue(D d, MFromD<D> mask) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Mask256<uint8_t> mask8 = MaskFromVec(BitCast(d8, VecFromMask(d, mask)));
+  return PopCount(detail::BitsFromMask(mask8)) >> 1;
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_T_SIZE_D(D, 2)>
+HWY_API size_t CountTrue(D /* tag */, MFromD<D> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return Num0BitsBelowLS1Bit_Nonzero32(mask_bits);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API intptr_t FindFirstTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return mask_bits ? intptr_t(Num0BitsBelowLS1Bit_Nonzero32(mask_bits)) : -1;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API size_t FindKnownLastTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(mask_bits);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API intptr_t FindLastTrue(D /* tag */, MFromD<D> mask) {
+  const uint32_t mask_bits = static_cast<uint32_t>(detail::BitsFromMask(mask));
+  return mask_bits ? intptr_t(31 - Num0BitsAboveMS1Bit_Nonzero32(mask_bits))
+                   : -1;
+}
+
+// ------------------------------ Compress, CompressBits
+
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec256<uint32_t> IndicesFromBits256(uint64_t mask_bits) {
+  const Full256<uint32_t> d32;
+  // We need a masked Iota(). With 8 lanes, there are 256 combinations and a LUT
+  // of SetTableIndices would require 8 KiB, a large part of L1D. The other
+  // alternative is _pext_u64, but this is extremely slow on Zen2 (18 cycles)
+  // and unavailable in 32-bit builds. We instead compress each index into 4
+  // bits, for a total of 1 KiB.
+  alignas(16) static constexpr uint32_t packed_array[256] = {
+      // PrintCompress32x8Tables
+      0x76543210, 0x76543218, 0x76543209, 0x76543298, 0x7654310a, 0x765431a8,
+      0x765430a9, 0x76543a98, 0x7654210b, 0x765421b8, 0x765420b9, 0x76542b98,
+      0x765410ba, 0x76541ba8, 0x76540ba9, 0x7654ba98, 0x7653210c, 0x765321c8,
+      0x765320c9, 0x76532c98, 0x765310ca, 0x76531ca8, 0x76530ca9, 0x7653ca98,
+      0x765210cb, 0x76521cb8, 0x76520cb9, 0x7652cb98, 0x76510cba, 0x7651cba8,
+      0x7650cba9, 0x765cba98, 0x7643210d, 0x764321d8, 0x764320d9, 0x76432d98,
+      0x764310da, 0x76431da8, 0x76430da9, 0x7643da98, 0x764210db, 0x76421db8,
+      0x76420db9, 0x7642db98, 0x76410dba, 0x7641dba8, 0x7640dba9, 0x764dba98,
+      0x763210dc, 0x76321dc8, 0x76320dc9, 0x7632dc98, 0x76310dca, 0x7631dca8,
+      0x7630dca9, 0x763dca98, 0x76210dcb, 0x7621dcb8, 0x7620dcb9, 0x762dcb98,
+      0x7610dcba, 0x761dcba8, 0x760dcba9, 0x76dcba98, 0x7543210e, 0x754321e8,
+      0x754320e9, 0x75432e98, 0x754310ea, 0x75431ea8, 0x75430ea9, 0x7543ea98,
+      0x754210eb, 0x75421eb8, 0x75420eb9, 0x7542eb98, 0x75410eba, 0x7541eba8,
+      0x7540eba9, 0x754eba98, 0x753210ec, 0x75321ec8, 0x75320ec9, 0x7532ec98,
+      0x75310eca, 0x7531eca8, 0x7530eca9, 0x753eca98, 0x75210ecb, 0x7521ecb8,
+      0x7520ecb9, 0x752ecb98, 0x7510ecba, 0x751ecba8, 0x750ecba9, 0x75ecba98,
+      0x743210ed, 0x74321ed8, 0x74320ed9, 0x7432ed98, 0x74310eda, 0x7431eda8,
+      0x7430eda9, 0x743eda98, 0x74210edb, 0x7421edb8, 0x7420edb9, 0x742edb98,
+      0x7410edba, 0x741edba8, 0x740edba9, 0x74edba98, 0x73210edc, 0x7321edc8,
+      0x7320edc9, 0x732edc98, 0x7310edca, 0x731edca8, 0x730edca9, 0x73edca98,
+      0x7210edcb, 0x721edcb8, 0x720edcb9, 0x72edcb98, 0x710edcba, 0x71edcba8,
+      0x70edcba9, 0x7edcba98, 0x6543210f, 0x654321f8, 0x654320f9, 0x65432f98,
+      0x654310fa, 0x65431fa8, 0x65430fa9, 0x6543fa98, 0x654210fb, 0x65421fb8,
+      0x65420fb9, 0x6542fb98, 0x65410fba, 0x6541fba8, 0x6540fba9, 0x654fba98,
+      0x653210fc, 0x65321fc8, 0x65320fc9, 0x6532fc98, 0x65310fca, 0x6531fca8,
+      0x6530fca9, 0x653fca98, 0x65210fcb, 0x6521fcb8, 0x6520fcb9, 0x652fcb98,
+      0x6510fcba, 0x651fcba8, 0x650fcba9, 0x65fcba98, 0x643210fd, 0x64321fd8,
+      0x64320fd9, 0x6432fd98, 0x64310fda, 0x6431fda8, 0x6430fda9, 0x643fda98,
+      0x64210fdb, 0x6421fdb8, 0x6420fdb9, 0x642fdb98, 0x6410fdba, 0x641fdba8,
+      0x640fdba9, 0x64fdba98, 0x63210fdc, 0x6321fdc8, 0x6320fdc9, 0x632fdc98,
+      0x6310fdca, 0x631fdca8, 0x630fdca9, 0x63fdca98, 0x6210fdcb, 0x621fdcb8,
+      0x620fdcb9, 0x62fdcb98, 0x610fdcba, 0x61fdcba8, 0x60fdcba9, 0x6fdcba98,
+      0x543210fe, 0x54321fe8, 0x54320fe9, 0x5432fe98, 0x54310fea, 0x5431fea8,
+      0x5430fea9, 0x543fea98, 0x54210feb, 0x5421feb8, 0x5420feb9, 0x542feb98,
+      0x5410feba, 0x541feba8, 0x540feba9, 0x54feba98, 0x53210fec, 0x5321fec8,
+      0x5320fec9, 0x532fec98, 0x5310feca, 0x531feca8, 0x530feca9, 0x53feca98,
+      0x5210fecb, 0x521fecb8, 0x520fecb9, 0x52fecb98, 0x510fecba, 0x51fecba8,
+      0x50fecba9, 0x5fecba98, 0x43210fed, 0x4321fed8, 0x4320fed9, 0x432fed98,
+      0x4310feda, 0x431feda8, 0x430feda9, 0x43feda98, 0x4210fedb, 0x421fedb8,
+      0x420fedb9, 0x42fedb98, 0x410fedba, 0x41fedba8, 0x40fedba9, 0x4fedba98,
+      0x3210fedc, 0x321fedc8, 0x320fedc9, 0x32fedc98, 0x310fedca, 0x31fedca8,
+      0x30fedca9, 0x3fedca98, 0x210fedcb, 0x21fedcb8, 0x20fedcb9, 0x2fedcb98,
+      0x10fedcba, 0x1fedcba8, 0x0fedcba9, 0xfedcba98};
+
+  // No need to mask because _mm256_permutevar8x32_epi32 ignores bits 3..31.
+  // Just shift each copy of the 32 bit LUT to extract its 4-bit fields.
+  // If broadcasting 32-bit from memory incurs the 3-cycle block-crossing
+  // latency, it may be faster to use LoadDup128 and PSHUFB.
+  const auto packed = Set(d32, packed_array[mask_bits]);
+  alignas(32) static constexpr uint32_t shifts[8] = {0,  4,  8,  12,
+                                                     16, 20, 24, 28};
+  return packed >> Load(d32, shifts);
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec256<uint32_t> IndicesFromBits256(uint64_t mask_bits) {
+  const Full256<uint32_t> d32;
+
+  // For 64-bit, we still need 32-bit indices because there is no 64-bit
+  // permutevar, but there are only 4 lanes, so we can afford to skip the
+  // unpacking and load the entire index vector directly.
+  alignas(32) static constexpr uint32_t u32_indices[128] = {
+      // PrintCompress64x4PairTables
+      0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 2,  3,  4,  5,  6,  7,
+      10, 11, 0,  1,  4,  5,  6, 7, 8, 9, 10, 11, 4,  5,  6,  7,
+      12, 13, 0,  1,  2,  3,  6, 7, 8, 9, 12, 13, 2,  3,  6,  7,
+      10, 11, 12, 13, 0,  1,  6, 7, 8, 9, 10, 11, 12, 13, 6,  7,
+      14, 15, 0,  1,  2,  3,  4, 5, 8, 9, 14, 15, 2,  3,  4,  5,
+      10, 11, 14, 15, 0,  1,  4, 5, 8, 9, 10, 11, 14, 15, 4,  5,
+      12, 13, 14, 15, 0,  1,  2, 3, 8, 9, 12, 13, 14, 15, 2,  3,
+      10, 11, 12, 13, 14, 15, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15};
+  return Load(d32, u32_indices + 8 * mask_bits);
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec256<uint32_t> IndicesFromNotBits256(uint64_t mask_bits) {
+  const Full256<uint32_t> d32;
+  // We need a masked Iota(). With 8 lanes, there are 256 combinations and a LUT
+  // of SetTableIndices would require 8 KiB, a large part of L1D. The other
+  // alternative is _pext_u64, but this is extremely slow on Zen2 (18 cycles)
+  // and unavailable in 32-bit builds. We instead compress each index into 4
+  // bits, for a total of 1 KiB.
+  alignas(16) static constexpr uint32_t packed_array[256] = {
+      // PrintCompressNot32x8Tables
+      0xfedcba98, 0x8fedcba9, 0x9fedcba8, 0x98fedcba, 0xafedcb98, 0xa8fedcb9,
+      0xa9fedcb8, 0xa98fedcb, 0xbfedca98, 0xb8fedca9, 0xb9fedca8, 0xb98fedca,
+      0xbafedc98, 0xba8fedc9, 0xba9fedc8, 0xba98fedc, 0xcfedba98, 0xc8fedba9,
+      0xc9fedba8, 0xc98fedba, 0xcafedb98, 0xca8fedb9, 0xca9fedb8, 0xca98fedb,
+      0xcbfeda98, 0xcb8feda9, 0xcb9feda8, 0xcb98feda, 0xcbafed98, 0xcba8fed9,
+      0xcba9fed8, 0xcba98fed, 0xdfecba98, 0xd8fecba9, 0xd9fecba8, 0xd98fecba,
+      0xdafecb98, 0xda8fecb9, 0xda9fecb8, 0xda98fecb, 0xdbfeca98, 0xdb8feca9,
+      0xdb9feca8, 0xdb98feca, 0xdbafec98, 0xdba8fec9, 0xdba9fec8, 0xdba98fec,
+      0xdcfeba98, 0xdc8feba9, 0xdc9feba8, 0xdc98feba, 0xdcafeb98, 0xdca8feb9,
+      0xdca9feb8, 0xdca98feb, 0xdcbfea98, 0xdcb8fea9, 0xdcb9fea8, 0xdcb98fea,
+      0xdcbafe98, 0xdcba8fe9, 0xdcba9fe8, 0xdcba98fe, 0xefdcba98, 0xe8fdcba9,
+      0xe9fdcba8, 0xe98fdcba, 0xeafdcb98, 0xea8fdcb9, 0xea9fdcb8, 0xea98fdcb,
+      0xebfdca98, 0xeb8fdca9, 0xeb9fdca8, 0xeb98fdca, 0xebafdc98, 0xeba8fdc9,
+      0xeba9fdc8, 0xeba98fdc, 0xecfdba98, 0xec8fdba9, 0xec9fdba8, 0xec98fdba,
+      0xecafdb98, 0xeca8fdb9, 0xeca9fdb8, 0xeca98fdb, 0xecbfda98, 0xecb8fda9,
+      0xecb9fda8, 0xecb98fda, 0xecbafd98, 0xecba8fd9, 0xecba9fd8, 0xecba98fd,
+      0xedfcba98, 0xed8fcba9, 0xed9fcba8, 0xed98fcba, 0xedafcb98, 0xeda8fcb9,
+      0xeda9fcb8, 0xeda98fcb, 0xedbfca98, 0xedb8fca9, 0xedb9fca8, 0xedb98fca,
+      0xedbafc98, 0xedba8fc9, 0xedba9fc8, 0xedba98fc, 0xedcfba98, 0xedc8fba9,
+      0xedc9fba8, 0xedc98fba, 0xedcafb98, 0xedca8fb9, 0xedca9fb8, 0xedca98fb,
+      0xedcbfa98, 0xedcb8fa9, 0xedcb9fa8, 0xedcb98fa, 0xedcbaf98, 0xedcba8f9,
+      0xedcba9f8, 0xedcba98f, 0xfedcba98, 0xf8edcba9, 0xf9edcba8, 0xf98edcba,
+      0xfaedcb98, 0xfa8edcb9, 0xfa9edcb8, 0xfa98edcb, 0xfbedca98, 0xfb8edca9,
+      0xfb9edca8, 0xfb98edca, 0xfbaedc98, 0xfba8edc9, 0xfba9edc8, 0xfba98edc,
+      0xfcedba98, 0xfc8edba9, 0xfc9edba8, 0xfc98edba, 0xfcaedb98, 0xfca8edb9,
+      0xfca9edb8, 0xfca98edb, 0xfcbeda98, 0xfcb8eda9, 0xfcb9eda8, 0xfcb98eda,
+      0xfcbaed98, 0xfcba8ed9, 0xfcba9ed8, 0xfcba98ed, 0xfdecba98, 0xfd8ecba9,
+      0xfd9ecba8, 0xfd98ecba, 0xfdaecb98, 0xfda8ecb9, 0xfda9ecb8, 0xfda98ecb,
+      0xfdbeca98, 0xfdb8eca9, 0xfdb9eca8, 0xfdb98eca, 0xfdbaec98, 0xfdba8ec9,
+      0xfdba9ec8, 0xfdba98ec, 0xfdceba98, 0xfdc8eba9, 0xfdc9eba8, 0xfdc98eba,
+      0xfdcaeb98, 0xfdca8eb9, 0xfdca9eb8, 0xfdca98eb, 0xfdcbea98, 0xfdcb8ea9,
+      0xfdcb9ea8, 0xfdcb98ea, 0xfdcbae98, 0xfdcba8e9, 0xfdcba9e8, 0xfdcba98e,
+      0xfedcba98, 0xfe8dcba9, 0xfe9dcba8, 0xfe98dcba, 0xfeadcb98, 0xfea8dcb9,
+      0xfea9dcb8, 0xfea98dcb, 0xfebdca98, 0xfeb8dca9, 0xfeb9dca8, 0xfeb98dca,
+      0xfebadc98, 0xfeba8dc9, 0xfeba9dc8, 0xfeba98dc, 0xfecdba98, 0xfec8dba9,
+      0xfec9dba8, 0xfec98dba, 0xfecadb98, 0xfeca8db9, 0xfeca9db8, 0xfeca98db,
+      0xfecbda98, 0xfecb8da9, 0xfecb9da8, 0xfecb98da, 0xfecbad98, 0xfecba8d9,
+      0xfecba9d8, 0xfecba98d, 0xfedcba98, 0xfed8cba9, 0xfed9cba8, 0xfed98cba,
+      0xfedacb98, 0xfeda8cb9, 0xfeda9cb8, 0xfeda98cb, 0xfedbca98, 0xfedb8ca9,
+      0xfedb9ca8, 0xfedb98ca, 0xfedbac98, 0xfedba8c9, 0xfedba9c8, 0xfedba98c,
+      0xfedcba98, 0xfedc8ba9, 0xfedc9ba8, 0xfedc98ba, 0xfedcab98, 0xfedca8b9,
+      0xfedca9b8, 0xfedca98b, 0xfedcba98, 0xfedcb8a9, 0xfedcb9a8, 0xfedcb98a,
+      0xfedcba98, 0xfedcba89, 0xfedcba98, 0xfedcba98};
+
+  // No need to mask because <_mm256_permutevar8x32_epi32> ignores bits 3..31.
+  // Just shift each copy of the 32 bit LUT to extract its 4-bit fields.
+  // If broadcasting 32-bit from memory incurs the 3-cycle block-crossing
+  // latency, it may be faster to use LoadDup128 and PSHUFB.
+  const Vec256<uint32_t> packed = Set(d32, packed_array[mask_bits]);
+  alignas(32) static constexpr uint32_t shifts[8] = {0,  4,  8,  12,
+                                                     16, 20, 24, 28};
+  return packed >> Load(d32, shifts);
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec256<uint32_t> IndicesFromNotBits256(uint64_t mask_bits) {
+  const Full256<uint32_t> d32;
+
+  // For 64-bit, we still need 32-bit indices because there is no 64-bit
+  // permutevar, but there are only 4 lanes, so we can afford to skip the
+  // unpacking and load the entire index vector directly.
+  alignas(32) static constexpr uint32_t u32_indices[128] = {
+      // PrintCompressNot64x4PairTables
+      8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, 8,  9,
+      8, 9, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, 8,  9,  10, 11,
+      8, 9, 10, 11, 14, 15, 12, 13, 10, 11, 14, 15, 8,  9,  12, 13,
+      8, 9, 14, 15, 10, 11, 12, 13, 14, 15, 8,  9,  10, 11, 12, 13,
+      8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 8,  9,  14, 15,
+      8, 9, 12, 13, 10, 11, 14, 15, 12, 13, 8,  9,  10, 11, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 8,  9,  12, 13, 14, 15,
+      8, 9, 10, 11, 12, 13, 14, 15, 8,  9,  10, 11, 12, 13, 14, 15};
+  return Load(d32, u32_indices + 8 * mask_bits);
+}
+
+template <typename T, HWY_IF_NOT_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> Compress(Vec256<T> v, const uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+
+  HWY_DASSERT(mask_bits < (1ull << Lanes(d)));
+  // 32-bit indices because we only have _mm256_permutevar8x32_epi32 (there is
+  // no instruction for 4x64).
+  const Indices256<uint32_t> indices{IndicesFromBits256<T>(mask_bits).raw};
+  return BitCast(d, TableLookupLanes(BitCast(du32, v), indices));
+}
+
+// LUTs are infeasible for 2^16 possible masks, so splice together two
+// half-vector Compress.
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> Compress(Vec256<T> v, const uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto vu16 = BitCast(du, v);  // (required for float16_t inputs)
+  const Half<decltype(du)> duh;
+  const auto half0 = LowerHalf(duh, vu16);
+  const auto half1 = UpperHalf(duh, vu16);
+
+  const uint64_t mask_bits0 = mask_bits & 0xFF;
+  const uint64_t mask_bits1 = mask_bits >> 8;
+  const auto compressed0 = detail::CompressBits(half0, mask_bits0);
+  const auto compressed1 = detail::CompressBits(half1, mask_bits1);
+
+  alignas(32) uint16_t all_true[16] = {};
+  // Store mask=true lanes, left to right.
+  const size_t num_true0 = PopCount(mask_bits0);
+  Store(compressed0, duh, all_true);
+  StoreU(compressed1, duh, all_true + num_true0);
+
+  if (hwy::HWY_NAMESPACE::CompressIsPartition<T>::value) {
+    // Store mask=false lanes, right to left. The second vector fills the upper
+    // half with right-aligned false lanes. The first vector is shifted
+    // rightwards to overwrite the true lanes of the second.
+    alignas(32) uint16_t all_false[16] = {};
+    const size_t num_true1 = PopCount(mask_bits1);
+    Store(compressed1, duh, all_false + 8);
+    StoreU(compressed0, duh, all_false + num_true1);
+
+    const auto mask = FirstN(du, num_true0 + num_true1);
+    return BitCast(d,
+                   IfThenElse(mask, Load(du, all_true), Load(du, all_false)));
+  } else {
+    // Only care about the mask=true lanes.
+    return BitCast(d, Load(du, all_true));
+  }
+}
+
+template <typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 4) | (1 << 8))>
+HWY_INLINE Vec256<T> CompressNot(Vec256<T> v, const uint64_t mask_bits) {
+  const DFromV<decltype(v)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+
+  HWY_DASSERT(mask_bits < (1ull << Lanes(d)));
+  // 32-bit indices because we only have _mm256_permutevar8x32_epi32 (there is
+  // no instruction for 4x64).
+  const Indices256<uint32_t> indices{IndicesFromNotBits256<T>(mask_bits).raw};
+  return BitCast(d, TableLookupLanes(BitCast(du32, v), indices));
+}
+
+// LUTs are infeasible for 2^16 possible masks, so splice together two
+// half-vector Compress.
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> CompressNot(Vec256<T> v, const uint64_t mask_bits) {
+  // Compress ensures only the lower 16 bits are set, so flip those.
+  return Compress(v, mask_bits ^ 0xFFFF);
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec256<T> Compress(Vec256<T> v, Mask256<T> m) {
+  return detail::Compress(v, detail::BitsFromMask(m));
+}
+
+template <typename T, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec256<T> CompressNot(Vec256<T> v, Mask256<T> m) {
+  return detail::CompressNot(v, detail::BitsFromMask(m));
+}
+
+HWY_API Vec256<uint64_t> CompressBlocksNot(Vec256<uint64_t> v,
+                                           Mask256<uint64_t> mask) {
+  return CompressNot(v, mask);
+}
+
+template <typename T, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_API Vec256<T> CompressBits(Vec256<T> v, const uint8_t* HWY_RESTRICT bits) {
+  constexpr size_t N = 32 / sizeof(T);
+  constexpr size_t kNumBytes = (N + 7) / 8;
+
+  uint64_t mask_bits = 0;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+
+  return detail::Compress(v, mask_bits);
+}
+
+// ------------------------------ CompressStore, CompressBitsStore
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> m, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  const size_t count = PopCount(mask_bits);
+  StoreU(detail::Compress(v, mask_bits), d, unaligned);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  const size_t count = PopCount(mask_bits);
+
+  const RebindToUnsigned<decltype(d)> du;
+  const Repartition<uint32_t, decltype(d)> du32;
+  HWY_DASSERT(mask_bits < (1ull << Lanes(d)));
+  // 32-bit indices because we only have _mm256_permutevar8x32_epi32 (there is
+  // no instruction for 4x64). Nibble MSB encodes FirstN.
+  const Vec256<uint32_t> idx_mask =
+      detail::IndicesFromBits256<TFromD<D>>(mask_bits);
+  // Shift nibble MSB into MSB
+  const Mask256<uint32_t> mask32 = MaskFromVec(ShiftLeft<28>(idx_mask));
+  // First cast to unsigned (RebindMask cannot change lane size)
+  const MFromD<decltype(du)> mask_u{mask32.raw};
+  const MFromD<D> mask = RebindMask(d, mask_u);
+  const VFromD<D> compressed = BitCast(
+      d,
+      TableLookupLanes(BitCast(du32, v), Indices256<uint32_t>{idx_mask.raw}));
+
+  BlendedStore(compressed, mask, d, unaligned);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(m);
+  const size_t count = PopCount(mask_bits);
+  const VFromD<D> compressed = detail::Compress(v, mask_bits);
+
+#if HWY_MEM_OPS_MIGHT_FAULT  // true if HWY_IS_MSAN
+  // BlendedStore tests mask for each lane, but we know that the mask is
+  // FirstN, so we can just copy.
+  alignas(32) TFromD<D> buf[16];
+  Store(compressed, d, buf);
+  CopyBytes(buf, unaligned, count * sizeof(TFromD<D>));
+#else
+  BlendedStore(compressed, FirstN(d, count), d, unaligned);
+#endif
+  return count;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  constexpr size_t kNumBytes = (N + 7) / 8;
+
+  uint64_t mask_bits = 0;
+  CopyBytes<kNumBytes>(bits, &mask_bits);
+
+  if (N < 8) {
+    mask_bits &= (1ull << N) - 1;
+  }
+  const size_t count = PopCount(mask_bits);
+
+  StoreU(detail::Compress(v, mask_bits), d, unaligned);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Expand
+
+// Always define Expand/LoadExpand because generic_ops only does so for Vec128.
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3_DL || HWY_IDE  // VBMI2
+
+HWY_INLINE Vec256<uint8_t> NativeExpand(Vec256<uint8_t> v,
+                                        Mask256<uint8_t> mask) {
+  return Vec256<uint8_t>{_mm256_maskz_expand_epi8(mask.raw, v.raw)};
+}
+
+HWY_INLINE Vec256<uint16_t> NativeExpand(Vec256<uint16_t> v,
+                                         Mask256<uint16_t> mask) {
+  return Vec256<uint16_t>{_mm256_maskz_expand_epi16(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint8_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm256_maskz_expandloadu_epi8(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint16_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm256_maskz_expandloadu_epi16(mask.raw, unaligned)};
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+#if HWY_TARGET <= HWY_AVX3 || HWY_IDE
+
+HWY_INLINE Vec256<uint32_t> NativeExpand(Vec256<uint32_t> v,
+                                         Mask256<uint32_t> mask) {
+  return Vec256<uint32_t>{_mm256_maskz_expand_epi32(mask.raw, v.raw)};
+}
+
+HWY_INLINE Vec256<uint64_t> NativeExpand(Vec256<uint64_t> v,
+                                         Mask256<uint64_t> mask) {
+  return Vec256<uint64_t>{_mm256_maskz_expand_epi64(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint32_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm256_maskz_expandloadu_epi32(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(MFromD<D> mask, D /* d */,
+                                      const uint64_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm256_maskz_expandloadu_epi64(mask.raw, unaligned)};
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3
+
+}  // namespace detail
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec256<T> Expand(Vec256<T> v, Mask256<T> mask) {
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+#else
+  // LUTs are infeasible for so many mask combinations, so Combine two
+  // half-vector Expand.
+  const Half<decltype(d)> dh;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  constexpr size_t N = 32 / sizeof(T);
+  const size_t countL = PopCount(mask_bits & ((1 << (N / 2)) - 1));
+  const Mask128<T> maskL = MaskFromVec(LowerHalf(VecFromMask(d, mask)));
+  const Vec128<T> expandL = Expand(LowerHalf(v), maskL);
+  // We have to shift the input by a variable number of bytes, but there isn't
+  // a table-driven option for that until VBMI, and CPUs with that likely also
+  // have VBMI2 and thus native Expand.
+  alignas(32) T lanes[N];
+  Store(v, d, lanes);
+  const Mask128<T> maskH = MaskFromVec(UpperHalf(dh, VecFromMask(d, mask)));
+  const Vec128<T> expandH = Expand(LoadU(dh, lanes + countL), maskH);
+  return Combine(d, expandH, expandL);
+#endif
+}
+
+// If AVX3, this is already implemented by x86_512.
+#if HWY_TARGET != HWY_AVX3
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec256<T> Expand(Vec256<T> v, Mask256<T> mask) {
+  const Full256<T> d;
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), RebindMask(du, mask)));
+#else   // AVX2
+  // LUTs are infeasible for 2^16 possible masks, so splice together two
+  // half-vector Expand.
+  const Half<decltype(d)> dh;
+  const Mask128<T> maskL = MaskFromVec(LowerHalf(VecFromMask(d, mask)));
+  const Vec128<T> expandL = Expand(LowerHalf(v), maskL);
+  // We have to shift the input by a variable number of u16. permutevar_epi16
+  // requires AVX3 and if we had that, we'd use native u32 Expand. The only
+  // alternative is re-loading, which incurs a store to load forwarding stall.
+  alignas(32) T lanes[32 / sizeof(T)];
+  Store(v, d, lanes);
+  const Vec128<T> vH = LoadU(dh, lanes + CountTrue(dh, maskL));
+  const Mask128<T> maskH = MaskFromVec(UpperHalf(dh, VecFromMask(d, mask)));
+  const Vec128<T> expandH = Expand(vH, maskH);
+  return Combine(d, expandH, expandL);
+#endif  // AVX2
+}
+
+#endif  // HWY_TARGET != HWY_AVX3
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec256<T> Expand(Vec256<T> v, Mask256<T> mask) {
+  const Full256<T> d;
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+
+  alignas(16) constexpr uint32_t packed_array[256] = {
+      // PrintExpand32x8Nibble.
+      0xffffffff, 0xfffffff0, 0xffffff0f, 0xffffff10, 0xfffff0ff, 0xfffff1f0,
+      0xfffff10f, 0xfffff210, 0xffff0fff, 0xffff1ff0, 0xffff1f0f, 0xffff2f10,
+      0xffff10ff, 0xffff21f0, 0xffff210f, 0xffff3210, 0xfff0ffff, 0xfff1fff0,
+      0xfff1ff0f, 0xfff2ff10, 0xfff1f0ff, 0xfff2f1f0, 0xfff2f10f, 0xfff3f210,
+      0xfff10fff, 0xfff21ff0, 0xfff21f0f, 0xfff32f10, 0xfff210ff, 0xfff321f0,
+      0xfff3210f, 0xfff43210, 0xff0fffff, 0xff1ffff0, 0xff1fff0f, 0xff2fff10,
+      0xff1ff0ff, 0xff2ff1f0, 0xff2ff10f, 0xff3ff210, 0xff1f0fff, 0xff2f1ff0,
+      0xff2f1f0f, 0xff3f2f10, 0xff2f10ff, 0xff3f21f0, 0xff3f210f, 0xff4f3210,
+      0xff10ffff, 0xff21fff0, 0xff21ff0f, 0xff32ff10, 0xff21f0ff, 0xff32f1f0,
+      0xff32f10f, 0xff43f210, 0xff210fff, 0xff321ff0, 0xff321f0f, 0xff432f10,
+      0xff3210ff, 0xff4321f0, 0xff43210f, 0xff543210, 0xf0ffffff, 0xf1fffff0,
+      0xf1ffff0f, 0xf2ffff10, 0xf1fff0ff, 0xf2fff1f0, 0xf2fff10f, 0xf3fff210,
+      0xf1ff0fff, 0xf2ff1ff0, 0xf2ff1f0f, 0xf3ff2f10, 0xf2ff10ff, 0xf3ff21f0,
+      0xf3ff210f, 0xf4ff3210, 0xf1f0ffff, 0xf2f1fff0, 0xf2f1ff0f, 0xf3f2ff10,
+      0xf2f1f0ff, 0xf3f2f1f0, 0xf3f2f10f, 0xf4f3f210, 0xf2f10fff, 0xf3f21ff0,
+      0xf3f21f0f, 0xf4f32f10, 0xf3f210ff, 0xf4f321f0, 0xf4f3210f, 0xf5f43210,
+      0xf10fffff, 0xf21ffff0, 0xf21fff0f, 0xf32fff10, 0xf21ff0ff, 0xf32ff1f0,
+      0xf32ff10f, 0xf43ff210, 0xf21f0fff, 0xf32f1ff0, 0xf32f1f0f, 0xf43f2f10,
+      0xf32f10ff, 0xf43f21f0, 0xf43f210f, 0xf54f3210, 0xf210ffff, 0xf321fff0,
+      0xf321ff0f, 0xf432ff10, 0xf321f0ff, 0xf432f1f0, 0xf432f10f, 0xf543f210,
+      0xf3210fff, 0xf4321ff0, 0xf4321f0f, 0xf5432f10, 0xf43210ff, 0xf54321f0,
+      0xf543210f, 0xf6543210, 0x0fffffff, 0x1ffffff0, 0x1fffff0f, 0x2fffff10,
+      0x1ffff0ff, 0x2ffff1f0, 0x2ffff10f, 0x3ffff210, 0x1fff0fff, 0x2fff1ff0,
+      0x2fff1f0f, 0x3fff2f10, 0x2fff10ff, 0x3fff21f0, 0x3fff210f, 0x4fff3210,
+      0x1ff0ffff, 0x2ff1fff0, 0x2ff1ff0f, 0x3ff2ff10, 0x2ff1f0ff, 0x3ff2f1f0,
+      0x3ff2f10f, 0x4ff3f210, 0x2ff10fff, 0x3ff21ff0, 0x3ff21f0f, 0x4ff32f10,
+      0x3ff210ff, 0x4ff321f0, 0x4ff3210f, 0x5ff43210, 0x1f0fffff, 0x2f1ffff0,
+      0x2f1fff0f, 0x3f2fff10, 0x2f1ff0ff, 0x3f2ff1f0, 0x3f2ff10f, 0x4f3ff210,
+      0x2f1f0fff, 0x3f2f1ff0, 0x3f2f1f0f, 0x4f3f2f10, 0x3f2f10ff, 0x4f3f21f0,
+      0x4f3f210f, 0x5f4f3210, 0x2f10ffff, 0x3f21fff0, 0x3f21ff0f, 0x4f32ff10,
+      0x3f21f0ff, 0x4f32f1f0, 0x4f32f10f, 0x5f43f210, 0x3f210fff, 0x4f321ff0,
+      0x4f321f0f, 0x5f432f10, 0x4f3210ff, 0x5f4321f0, 0x5f43210f, 0x6f543210,
+      0x10ffffff, 0x21fffff0, 0x21ffff0f, 0x32ffff10, 0x21fff0ff, 0x32fff1f0,
+      0x32fff10f, 0x43fff210, 0x21ff0fff, 0x32ff1ff0, 0x32ff1f0f, 0x43ff2f10,
+      0x32ff10ff, 0x43ff21f0, 0x43ff210f, 0x54ff3210, 0x21f0ffff, 0x32f1fff0,
+      0x32f1ff0f, 0x43f2ff10, 0x32f1f0ff, 0x43f2f1f0, 0x43f2f10f, 0x54f3f210,
+      0x32f10fff, 0x43f21ff0, 0x43f21f0f, 0x54f32f10, 0x43f210ff, 0x54f321f0,
+      0x54f3210f, 0x65f43210, 0x210fffff, 0x321ffff0, 0x321fff0f, 0x432fff10,
+      0x321ff0ff, 0x432ff1f0, 0x432ff10f, 0x543ff210, 0x321f0fff, 0x432f1ff0,
+      0x432f1f0f, 0x543f2f10, 0x432f10ff, 0x543f21f0, 0x543f210f, 0x654f3210,
+      0x3210ffff, 0x4321fff0, 0x4321ff0f, 0x5432ff10, 0x4321f0ff, 0x5432f1f0,
+      0x5432f10f, 0x6543f210, 0x43210fff, 0x54321ff0, 0x54321f0f, 0x65432f10,
+      0x543210ff, 0x654321f0, 0x6543210f, 0x76543210,
+  };
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 3).
+  const Vec256<uint32_t> packed = Set(du, packed_array[mask_bits]);
+  alignas(32) constexpr uint32_t shifts[8] = {0, 4, 8, 12, 16, 20, 24, 28};
+  // TableLookupLanes ignores upper bits; avoid bounds-check in IndicesFromVec.
+  const Indices256<uint32_t> indices{(packed >> Load(du, shifts)).raw};
+  const Vec256<uint32_t> expand = TableLookupLanes(BitCast(du, v), indices);
+  // TableLookupLanes cannot also zero masked-off lanes, so do that now.
+  return IfThenElseZero(mask, BitCast(d, expand));
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec256<T> Expand(Vec256<T> v, Mask256<T> mask) {
+  const Full256<T> d;
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+
+  alignas(16) constexpr uint64_t packed_array[16] = {
+      // PrintExpand64x4Nibble.
+      0x0000ffff, 0x0000fff0, 0x0000ff0f, 0x0000ff10, 0x0000f0ff, 0x0000f1f0,
+      0x0000f10f, 0x0000f210, 0x00000fff, 0x00001ff0, 0x00001f0f, 0x00002f10,
+      0x000010ff, 0x000021f0, 0x0000210f, 0x00003210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 2).
+  const Vec256<uint64_t> packed = Set(du, packed_array[mask_bits]);
+  alignas(32) constexpr uint64_t shifts[8] = {0, 4, 8, 12, 16, 20, 24, 28};
+#if HWY_TARGET <= HWY_AVX3  // native 64-bit TableLookupLanes
+  // TableLookupLanes ignores upper bits; avoid bounds-check in IndicesFromVec.
+  const Indices256<uint64_t> indices{(packed >> Load(du, shifts)).raw};
+#else
+  // 64-bit TableLookupLanes on AVX2 requires IndicesFromVec, which checks
+  // bounds, so clear the upper bits.
+  const Vec256<uint64_t> masked = And(packed >> Load(du, shifts), Set(du, 3));
+  const Indices256<uint64_t> indices = IndicesFromVec(du, masked);
+#endif
+  const Vec256<uint64_t> expand = TableLookupLanes(BitCast(du, v), indices);
+  // TableLookupLanes cannot also zero masked-off lanes, so do that now.
+  return IfThenElseZero(mask, BitCast(d, expand));
+#endif
+}
+
+// ------------------------------ LoadExpand
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+#else
+  return Expand(LoadU(d, unaligned), mask);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET <= HWY_AVX3
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+#else
+  return Expand(LoadU(d, unaligned), mask);
+#endif
+}
+
+// ------------------------------ LoadInterleaved3/4
+
+// Implemented in generic_ops, we just overload LoadTransposedBlocks3/4.
+
+namespace detail {
+// Input:
+// 1 0 (<- first block of unaligned)
+// 3 2
+// 5 4
+// Output:
+// 3 0
+// 4 1
+// 5 2
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API void LoadTransposedBlocks3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                                   VFromD<D>& A, VFromD<D>& B, VFromD<D>& C) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> v10 = LoadU(d, unaligned + 0 * N);  // 1 0
+  const VFromD<D> v32 = LoadU(d, unaligned + 1 * N);
+  const VFromD<D> v54 = LoadU(d, unaligned + 2 * N);
+
+  A = ConcatUpperLower(d, v32, v10);
+  B = ConcatLowerUpper(d, v54, v10);
+  C = ConcatUpperLower(d, v54, v32);
+}
+
+// Input (128-bit blocks):
+// 1 0 (first block of unaligned)
+// 3 2
+// 5 4
+// 7 6
+// Output:
+// 4 0 (LSB of vA)
+// 5 1
+// 6 2
+// 7 3
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API void LoadTransposedBlocks4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                                   VFromD<D>& vA, VFromD<D>& vB, VFromD<D>& vC,
+                                   VFromD<D>& vD) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> v10 = LoadU(d, unaligned + 0 * N);
+  const VFromD<D> v32 = LoadU(d, unaligned + 1 * N);
+  const VFromD<D> v54 = LoadU(d, unaligned + 2 * N);
+  const VFromD<D> v76 = LoadU(d, unaligned + 3 * N);
+
+  vA = ConcatLowerLower(d, v54, v10);
+  vB = ConcatUpperUpper(d, v54, v10);
+  vC = ConcatLowerLower(d, v76, v32);
+  vD = ConcatUpperUpper(d, v76, v32);
+}
+}  // namespace detail
+
+// ------------------------------ StoreInterleaved2/3/4 (ConcatUpperLower)
+
+// Implemented in generic_ops, we just overload StoreTransposedBlocks2/3/4.
+
+namespace detail {
+// Input (128-bit blocks):
+// 2 0 (LSB of i)
+// 3 1
+// Output:
+// 1 0
+// 3 2
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API void StoreTransposedBlocks2(VFromD<D> i, VFromD<D> j, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  const auto out0 = ConcatLowerLower(d, j, i);
+  const auto out1 = ConcatUpperUpper(d, j, i);
+  StoreU(out0, d, unaligned + 0 * N);
+  StoreU(out1, d, unaligned + 1 * N);
+}
+
+// Input (128-bit blocks):
+// 3 0 (LSB of i)
+// 4 1
+// 5 2
+// Output:
+// 1 0
+// 3 2
+// 5 4
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API void StoreTransposedBlocks3(VFromD<D> i, VFromD<D> j, VFromD<D> k, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  const auto out0 = ConcatLowerLower(d, j, i);
+  const auto out1 = ConcatUpperLower(d, i, k);
+  const auto out2 = ConcatUpperUpper(d, k, j);
+  StoreU(out0, d, unaligned + 0 * N);
+  StoreU(out1, d, unaligned + 1 * N);
+  StoreU(out2, d, unaligned + 2 * N);
+}
+
+// Input (128-bit blocks):
+// 4 0 (LSB of i)
+// 5 1
+// 6 2
+// 7 3
+// Output:
+// 1 0
+// 3 2
+// 5 4
+// 7 6
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API void StoreTransposedBlocks4(VFromD<D> i, VFromD<D> j, VFromD<D> k,
+                                    VFromD<D> l, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  // Write lower halves, then upper.
+  const auto out0 = ConcatLowerLower(d, j, i);
+  const auto out1 = ConcatLowerLower(d, l, k);
+  StoreU(out0, d, unaligned + 0 * N);
+  StoreU(out1, d, unaligned + 1 * N);
+  const auto out2 = ConcatUpperUpper(d, j, i);
+  const auto out3 = ConcatUpperUpper(d, l, k);
+  StoreU(out2, d, unaligned + 2 * N);
+  StoreU(out3, d, unaligned + 3 * N);
+}
+}  // namespace detail
+
+// ------------------------------ Additional mask logical operations
+
+#if HWY_TARGET <= HWY_AVX3
+template <class T>
+HWY_API Mask256<T> SetAtOrAfterFirst(Mask256<T> mask) {
+  constexpr size_t N = Lanes(Full256<T>());
+  constexpr uint32_t kActiveElemMask =
+      static_cast<uint32_t>((uint64_t{1} << N) - 1);
+  return Mask256<T>{static_cast<typename Mask256<T>::Raw>(
+      (0u - detail::AVX3Blsi(mask.raw)) & kActiveElemMask)};
+}
+template <class T>
+HWY_API Mask256<T> SetBeforeFirst(Mask256<T> mask) {
+  constexpr size_t N = Lanes(Full256<T>());
+  constexpr uint32_t kActiveElemMask =
+      static_cast<uint32_t>((uint64_t{1} << N) - 1);
+  return Mask256<T>{static_cast<typename Mask256<T>::Raw>(
+      (detail::AVX3Blsi(mask.raw) - 1u) & kActiveElemMask)};
+}
+template <class T>
+HWY_API Mask256<T> SetAtOrBeforeFirst(Mask256<T> mask) {
+  constexpr size_t N = Lanes(Full256<T>());
+  constexpr uint32_t kActiveElemMask =
+      static_cast<uint32_t>((uint64_t{1} << N) - 1);
+  return Mask256<T>{static_cast<typename Mask256<T>::Raw>(
+      detail::AVX3Blsmsk(mask.raw) & kActiveElemMask)};
+}
+template <class T>
+HWY_API Mask256<T> SetOnlyFirst(Mask256<T> mask) {
+  return Mask256<T>{
+      static_cast<typename Mask256<T>::Raw>(detail::AVX3Blsi(mask.raw))};
+}
+#else   // AVX2
+template <class T>
+HWY_API Mask256<T> SetAtOrAfterFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  const Repartition<int64_t, decltype(d)> di64;
+  const Repartition<float, decltype(d)> df32;
+  const Repartition<int32_t, decltype(d)> di32;
+  const Half<decltype(di64)> dh_i64;
+  const Half<decltype(di32)> dh_i32;
+  using VF32 = VFromD<decltype(df32)>;
+
+  auto vmask = BitCast(di64, VecFromMask(d, mask));
+  vmask = Or(vmask, Neg(vmask));
+
+  // Copy the sign bit of the even int64_t lanes to the odd int64_t lanes
+  const auto vmask2 = BitCast(
+      di32, VF32{_mm256_shuffle_ps(Zero(df32).raw, BitCast(df32, vmask).raw,
+                                   _MM_SHUFFLE(1, 1, 0, 0))});
+  vmask = Or(vmask, BitCast(di64, BroadcastSignBit(vmask2)));
+
+  // Copy the sign bit of the lower 128-bit half to the upper 128-bit half
+  const auto vmask3 =
+      BroadcastSignBit(Broadcast<3>(BitCast(dh_i32, LowerHalf(dh_i64, vmask))));
+  vmask = Or(vmask, BitCast(di64, Combine(di32, vmask3, Zero(dh_i32))));
+  return MaskFromVec(BitCast(d, vmask));
+}
+
+template <class T>
+HWY_API Mask256<T> SetBeforeFirst(Mask256<T> mask) {
+  return Not(SetAtOrAfterFirst(mask));
+}
+
+template <class T>
+HWY_API Mask256<T> SetOnlyFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  const RebindToSigned<decltype(d)> di;
+  const Repartition<int64_t, decltype(d)> di64;
+  const Half<decltype(di64)> dh_i64;
+
+  const auto zero = Zero(di64);
+  const auto vmask = BitCast(di64, VecFromMask(d, mask));
+
+  const auto vmask_eq_0 = VecFromMask(di64, vmask == zero);
+  auto vmask2_lo = LowerHalf(dh_i64, vmask_eq_0);
+  auto vmask2_hi = UpperHalf(dh_i64, vmask_eq_0);
+
+  vmask2_lo = And(vmask2_lo, InterleaveLower(vmask2_lo, vmask2_lo));
+  vmask2_hi = And(ConcatLowerUpper(dh_i64, vmask2_hi, vmask2_lo),
+                  InterleaveUpper(dh_i64, vmask2_lo, vmask2_lo));
+  vmask2_lo = InterleaveLower(Set(dh_i64, int64_t{-1}), vmask2_lo);
+
+  const auto vmask2 = Combine(di64, vmask2_hi, vmask2_lo);
+  const auto only_first_vmask = Neg(BitCast(di, And(vmask, Neg(vmask))));
+  return MaskFromVec(BitCast(d, And(only_first_vmask, BitCast(di, vmask2))));
+}
+
+template <class T>
+HWY_API Mask256<T> SetAtOrBeforeFirst(Mask256<T> mask) {
+  const Full256<T> d;
+  constexpr size_t kLanesPerBlock = MaxLanes(d) / 2;
+
+  const auto vmask = VecFromMask(d, mask);
+  const auto vmask_lo = ConcatLowerLower(d, vmask, Zero(d));
+  return SetBeforeFirst(
+      MaskFromVec(CombineShiftRightBytes<(kLanesPerBlock - 1) * sizeof(T)>(
+          d, vmask, vmask_lo)));
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// These functions start with each lane per 128-bit block being reduced with the
+// corresponding lane in the other block, so we use the same logic as x86_128
+// but running on both blocks at the same time. There are two (64-bit) to eight
+// (16-bit) lanes per block.
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec256<T> SumOfLanes(Vec256<T> v10) {
+  const DFromV<decltype(v10)> d;
+  return Add(v10, Reverse2(d, v10));
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec256<T> MinOfLanes(Vec256<T> v10) {
+  const DFromV<decltype(v10)> d;
+  return Min(v10, Reverse2(d, v10));
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_INLINE Vec256<T> MaxOfLanes(Vec256<T> v10) {
+  const DFromV<decltype(v10)> d;
+  return Max(v10, Reverse2(d, v10));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec256<T> SumOfLanes(Vec256<T> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Add(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Add(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec256<T> MinOfLanes(Vec256<T> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Min(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Min(v03_12_12_03, v12_03_03_12);
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_INLINE Vec256<T> MaxOfLanes(Vec256<T> v3210) {
+  using V = decltype(v3210);
+  const DFromV<V> d;
+  const V v0123 = Reverse4(d, v3210);
+  const V v03_12_12_03 = Max(v3210, v0123);
+  const V v12_03_03_12 = Reverse2(d, v03_12_12_03);
+  return Max(v03_12_12_03, v12_03_03_12);
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> SumOfLanes(Vec256<T> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Add(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Add(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Add(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> MinOfLanes(Vec256<T> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Min(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Min(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Min(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec256<T> MaxOfLanes(Vec256<T> v76543210) {
+  using V = decltype(v76543210);
+  const DFromV<V> d;
+  // The upper half is reversed from the lower half; omit for brevity.
+  const V v34_25_16_07 = Max(v76543210, Reverse8(d, v76543210));
+  const V v0347_1625_1625_0347 = Max(v34_25_16_07, Reverse4(d, v34_25_16_07));
+  return Max(v0347_1625_1625_0347, Reverse2(d, v0347_1625_1625_0347));
+}
+
+}  // namespace detail
+
+// Supported for >8-bit types. Returns the broadcasted result.
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> SumOfLanes(D /*d*/, VFromD<D> vHL) {
+  const VFromD<D> vLH = SwapAdjacentBlocks(vHL);
+  return detail::SumOfLanes(Add(vLH, vHL));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API TFromD<D> ReduceSum(D d, VFromD<D> v) {
+  return GetLane(SumOfLanes(d, v));
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API float16_t ReduceSum(D, VFromD<D> v) {
+  return _mm256_reduce_add_ph(v.raw);
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> MinOfLanes(D /*d*/, VFromD<D> vHL) {
+  const VFromD<D> vLH = SwapAdjacentBlocks(vHL);
+  return detail::MinOfLanes(Min(vLH, vHL));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32)>
+HWY_API VFromD<D> MaxOfLanes(D /*d*/, VFromD<D> vHL) {
+  const VFromD<D> vLH = SwapAdjacentBlocks(vHL);
+  return detail::MaxOfLanes(Max(vLH, vHL));
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+#if HWY_TARGET <= HWY_AVX3
+template <class V, HWY_IF_UI32(TFromV<V>), HWY_IF_V_SIZE_D(DFromV<V>, 32)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm256_lzcnt_epi32(v.raw)};
+}
+
+template <class V, HWY_IF_UI64(TFromV<V>), HWY_IF_V_SIZE_D(DFromV<V>, 32)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm256_lzcnt_epi64(v.raw)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+// Note that the GCC warnings are not suppressed if we only wrap the *intrin.h -
+// the warning seems to be issued at the call site of intrinsics, i.e. our code.
+HWY_DIAGNOSTICS(pop)
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_512-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_512-inl.h
new file mode 100644
index 0000000000..189d58dc3c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/ops/x86_512-inl.h
@@ -0,0 +1,6733 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 512-bit AVX512 vectors and operations.
+// External include guard in highway.h - see comment there.
+
+// WARNING: most operations do not cross 128-bit block boundaries. In
+// particular, "Broadcast", pack and zip behavior may be surprising.
+
+// Must come before HWY_DIAGNOSTICS and HWY_COMPILER_CLANGCL
+#include "hwy/base.h"
+
+// Avoid uninitialized warnings in GCC's avx512fintrin.h - see
+// https://github.com/google/highway/issues/710)
+HWY_DIAGNOSTICS(push)
+#if HWY_COMPILER_GCC_ACTUAL
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+HWY_DIAGNOSTICS_OFF(disable : 4701 4703 6001 26494,
+                    ignored "-Wmaybe-uninitialized")
+#endif
+
+#include <immintrin.h>  // AVX2+
+
+#if HWY_COMPILER_CLANGCL
+// Including <immintrin.h> should be enough, but Clang's headers helpfully skip
+// including these headers when _MSC_VER is defined, like when using clang-cl.
+// Include these directly here.
+// clang-format off
+#include <smmintrin.h>
+
+#include <avxintrin.h>
+// avxintrin defines __m256i and must come before avx2intrin.
+#include <avx2intrin.h>
+#include <f16cintrin.h>
+#include <fmaintrin.h>
+
+#include <avx512fintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlbwintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512vldqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512vlcdintrin.h>
+
+#if HWY_TARGET <= HWY_AVX3_DL
+#include <avx512bitalgintrin.h>
+#include <avx512vlbitalgintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmivlintrin.h>
+#include <avx512vbmi2intrin.h>
+#include <avx512vlvbmi2intrin.h>
+#include <avx512vpopcntdqintrin.h>
+#include <avx512vpopcntdqvlintrin.h>
+#include <avx512vnniintrin.h>
+#include <avx512vlvnniintrin.h>
+// Must come after avx512fintrin, else will not define 512-bit intrinsics.
+#include <vaesintrin.h>
+#include <vpclmulqdqintrin.h>
+#include <gfniintrin.h>
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+#if HWY_TARGET <= HWY_AVX3_SPR
+#include <avx512fp16intrin.h>
+#include <avx512vlfp16intrin.h>
+#endif  // HWY_TARGET <= HWY_AVX3_SPR
+
+// clang-format on
+#endif  // HWY_COMPILER_CLANGCL
+
+// For half-width vectors. Already includes base.h and shared-inl.h.
+#include "hwy/ops/x86_256-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+namespace detail {
+
+template <typename T>
+struct Raw512 {
+  using type = __m512i;
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct Raw512<float16_t> {
+  using type = __m512h;
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct Raw512<float> {
+  using type = __m512;
+};
+template <>
+struct Raw512<double> {
+  using type = __m512d;
+};
+
+// Template arg: sizeof(lane type)
+template <size_t size>
+struct RawMask512 {};
+template <>
+struct RawMask512<1> {
+  using type = __mmask64;
+};
+template <>
+struct RawMask512<2> {
+  using type = __mmask32;
+};
+template <>
+struct RawMask512<4> {
+  using type = __mmask16;
+};
+template <>
+struct RawMask512<8> {
+  using type = __mmask8;
+};
+
+}  // namespace detail
+
+template <typename T>
+class Vec512 {
+  using Raw = typename detail::Raw512<T>::type;
+
+ public:
+  using PrivateT = T;                                  // only for DFromV
+  static constexpr size_t kPrivateN = 64 / sizeof(T);  // only for DFromV
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec512& operator*=(const Vec512 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec512& operator/=(const Vec512 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec512& operator+=(const Vec512 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec512& operator-=(const Vec512 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec512& operator&=(const Vec512 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec512& operator|=(const Vec512 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec512& operator^=(const Vec512 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// Mask register: one bit per lane.
+template <typename T>
+struct Mask512 {
+  using Raw = typename detail::RawMask512<sizeof(T)>::type;
+  Raw raw;
+};
+
+template <typename T>
+using Full512 = Simd<T, 64 / sizeof(T), 0>;
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_INLINE __m512i BitCastToInteger(__m512i v) { return v; }
+#if HWY_HAVE_FLOAT16
+HWY_INLINE __m512i BitCastToInteger(__m512h v) {
+  return _mm512_castph_si512(v);
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_INLINE __m512i BitCastToInteger(__m512 v) { return _mm512_castps_si512(v); }
+HWY_INLINE __m512i BitCastToInteger(__m512d v) {
+  return _mm512_castpd_si512(v);
+}
+
+template <typename T>
+HWY_INLINE Vec512<uint8_t> BitCastToByte(Vec512<T> v) {
+  return Vec512<uint8_t>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger512 {
+  HWY_INLINE __m512i operator()(__m512i v) { return v; }
+};
+#if HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger512<float16_t> {
+  HWY_INLINE __m512h operator()(__m512i v) { return _mm512_castsi512_ph(v); }
+};
+#endif  // HWY_HAVE_FLOAT16
+template <>
+struct BitCastFromInteger512<float> {
+  HWY_INLINE __m512 operator()(__m512i v) { return _mm512_castsi512_ps(v); }
+};
+template <>
+struct BitCastFromInteger512<double> {
+  HWY_INLINE __m512d operator()(__m512i v) { return _mm512_castsi512_pd(v); }
+};
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_INLINE VFromD<D> BitCastFromByte(D /* tag */, Vec512<uint8_t> v) {
+  return VFromD<D>{BitCastFromInteger512<TFromD<D>>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), typename FromT>
+HWY_API VFromD<D> BitCast(D d, Vec512<FromT> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm512_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI16_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm512_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm512_set1_epi32(static_cast<int>(t))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> Set(D /* tag */, TFromD<D> t) {
+  return VFromD<D>{_mm512_set1_epi64(static_cast<long long>(t))};  // NOLINT
+}
+// bfloat16_t is handled by x86_128-inl.h.
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API Vec512<float16_t> Set(D /* tag */, float16_t t) {
+  return Vec512<float16_t>{_mm512_set1_ph(t)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API Vec512<float> Set(D /* tag */, float t) {
+  return Vec512<float>{_mm512_set1_ps(t)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> Set(D /* tag */, double t) {
+  return Vec512<double>{_mm512_set1_pd(t)};
+}
+
+// ------------------------------ Zero (Set)
+
+// GCC pre-9.1 lacked setzero, so use Set instead.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 900
+
+// Cannot use VFromD here because it is defined in terms of Zero.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_SPECIAL_FLOAT_D(D)>
+HWY_API Vec512<TFromD<D>> Zero(D d) {
+  return Set(d, TFromD<D>{0});
+}
+// BitCast is defined below, but the Raw type is the same, so use that.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_BF16_D(D)>
+HWY_API Vec512<bfloat16_t> Zero(D /* tag */) {
+  const RebindToUnsigned<D> du;
+  return Vec512<bfloat16_t>{Set(du, 0).raw};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API Vec512<float16_t> Zero(D /* tag */) {
+  const RebindToUnsigned<D> du;
+  return Vec512<float16_t>{Set(du, 0).raw};
+}
+
+#else
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API Vec512<TFromD<D>> Zero(D /* tag */) {
+  return Vec512<TFromD<D>>{_mm512_setzero_si512()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_BF16_D(D)>
+HWY_API Vec512<bfloat16_t> Zero(D /* tag */) {
+  return Vec512<bfloat16_t>{_mm512_setzero_si512()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API Vec512<float16_t> Zero(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return Vec512<float16_t>{_mm512_setzero_ph()};
+#else
+  return Vec512<float16_t>{_mm512_setzero_si512()};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API Vec512<float> Zero(D /* tag */) {
+  return Vec512<float>{_mm512_setzero_ps()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> Zero(D /* tag */) {
+  return Vec512<double>{_mm512_setzero_pd()};
+}
+
+#endif  // HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 900
+
+// ------------------------------ Undefined
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API Vec512<TFromD<D>> Undefined(D /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return Vec512<TFromD<D>>{_mm512_undefined_epi32()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_BF16_D(D)>
+HWY_API Vec512<bfloat16_t> Undefined(D /* tag */) {
+  return Vec512<bfloat16_t>{_mm512_undefined_epi32()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API Vec512<float16_t> Undefined(D /* tag */) {
+#if HWY_HAVE_FLOAT16
+  return Vec512<float16_t>{_mm512_undefined_ph()};
+#else
+  return Vec512<float16_t>{_mm512_undefined_epi32()};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API Vec512<float> Undefined(D /* tag */) {
+  return Vec512<float>{_mm512_undefined_ps()};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> Undefined(D /* tag */) {
+  return Vec512<double>{_mm512_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ ResizeBitCast
+
+// 64-byte vector to 16-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_V(FromV, 64),
+          HWY_IF_V_SIZE_D(D, 16)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, Vec128<uint8_t>{_mm512_castsi512_si128(
+                        BitCast(Full512<uint8_t>(), v).raw)});
+}
+
+// <= 16-byte vector to 64-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_LE_V(FromV, 16),
+          HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, Vec512<uint8_t>{_mm512_castsi128_si512(
+                        ResizeBitCast(Full128<uint8_t>(), v).raw)});
+}
+
+// 32-byte vector to 64-byte vector
+template <class D, class FromV, HWY_IF_V_SIZE_V(FromV, 32),
+          HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> ResizeBitCast(D d, FromV v) {
+  return BitCast(d, Vec512<uint8_t>{_mm512_castsi256_si512(
+                        BitCast(Full256<uint8_t>(), v).raw)});
+}
+
+// ----------------------------- Iota
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 900
+  // Missing set_epi8/16.
+  alignas(64) static constexpr TFromD<D> kIota[64] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+      32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+      48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+  return Load(d, kIota);
+#else
+  (void)d;
+  return VFromD<D>{_mm512_set_epi8(
+      static_cast<char>(63), static_cast<char>(62), static_cast<char>(61),
+      static_cast<char>(60), static_cast<char>(59), static_cast<char>(58),
+      static_cast<char>(57), static_cast<char>(56), static_cast<char>(55),
+      static_cast<char>(54), static_cast<char>(53), static_cast<char>(52),
+      static_cast<char>(51), static_cast<char>(50), static_cast<char>(49),
+      static_cast<char>(48), static_cast<char>(47), static_cast<char>(46),
+      static_cast<char>(45), static_cast<char>(44), static_cast<char>(43),
+      static_cast<char>(42), static_cast<char>(41), static_cast<char>(40),
+      static_cast<char>(39), static_cast<char>(38), static_cast<char>(37),
+      static_cast<char>(36), static_cast<char>(35), static_cast<char>(34),
+      static_cast<char>(33), static_cast<char>(32), static_cast<char>(31),
+      static_cast<char>(30), static_cast<char>(29), static_cast<char>(28),
+      static_cast<char>(27), static_cast<char>(26), static_cast<char>(25),
+      static_cast<char>(24), static_cast<char>(23), static_cast<char>(22),
+      static_cast<char>(21), static_cast<char>(20), static_cast<char>(19),
+      static_cast<char>(18), static_cast<char>(17), static_cast<char>(16),
+      static_cast<char>(15), static_cast<char>(14), static_cast<char>(13),
+      static_cast<char>(12), static_cast<char>(11), static_cast<char>(10),
+      static_cast<char>(9), static_cast<char>(8), static_cast<char>(7),
+      static_cast<char>(6), static_cast<char>(5), static_cast<char>(4),
+      static_cast<char>(3), static_cast<char>(2), static_cast<char>(1),
+      static_cast<char>(0))};
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D d) {
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 900
+  // Missing set_epi8/16.
+  alignas(64) static constexpr TFromD<D> kIota[32] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  return Load(d, kIota);
+#else
+  (void)d;
+  return VFromD<D>{_mm512_set_epi16(
+      int16_t{31}, int16_t{30}, int16_t{29}, int16_t{28}, int16_t{27},
+      int16_t{26}, int16_t{25}, int16_t{24}, int16_t{23}, int16_t{22},
+      int16_t{21}, int16_t{20}, int16_t{19}, int16_t{18}, int16_t{17},
+      int16_t{16}, int16_t{15}, int16_t{14}, int16_t{13}, int16_t{12},
+      int16_t{11}, int16_t{10}, int16_t{9}, int16_t{8}, int16_t{7}, int16_t{6},
+      int16_t{5}, int16_t{4}, int16_t{3}, int16_t{2}, int16_t{1}, int16_t{0})};
+#endif
+}
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm512_set_ph(
+      float16_t{31}, float16_t{30}, float16_t{29}, float16_t{28}, float16_t{27},
+      float16_t{26}, float16_t{25}, float16_t{24}, float16_t{23}, float16_t{22},
+      float16_t{21}, float16_t{20}, float16_t{19}, float16_t{18}, float16_t{17},
+      float16_t{16}, float16_t{15}, float16_t{14}, float16_t{13}, float16_t{12},
+      float16_t{11}, float16_t{10}, float16_t{9}, float16_t{8}, float16_t{7},
+      float16_t{6}, float16_t{5}, float16_t{4}, float16_t{3}, float16_t{2},
+      float16_t{1}, float16_t{0})};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm512_set_epi32(
+      int32_t{15}, int32_t{14}, int32_t{13}, int32_t{12}, int32_t{11},
+      int32_t{10}, int32_t{9}, int32_t{8}, int32_t{7}, int32_t{6}, int32_t{5},
+      int32_t{4}, int32_t{3}, int32_t{2}, int32_t{1}, int32_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm512_set_epi64(int64_t{7}, int64_t{6}, int64_t{5},
+                                    int64_t{4}, int64_t{3}, int64_t{2},
+                                    int64_t{1}, int64_t{0})};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f,
+                                 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f,
+                                 0.0f)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_INLINE VFromD<D> Iota0(D /*d*/) {
+  return VFromD<D>{_mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0)};
+}
+
+}  // namespace detail
+
+template <class D, typename T2, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> Iota(D d, const T2 first) {
+  return detail::Iota0(d) + Set(d, static_cast<TFromD<D>>(first));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T>
+HWY_API Vec512<T> Not(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m512i vu = BitCast(du, v).raw;
+  return BitCast(d, VU{_mm512_ternarylogic_epi32(vu, vu, vu, 0x55)});
+}
+
+// ------------------------------ And
+
+template <typename T>
+HWY_API Vec512<T> And(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm512_and_si512(a.raw, b.raw)});
+}
+
+HWY_API Vec512<float> And(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_and_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> And(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T>
+HWY_API Vec512<T> AndNot(const Vec512<T> not_mask, const Vec512<T> mask) {
+  const DFromV<decltype(mask)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(
+      d, VFromD<decltype(du)>{_mm512_andnot_si512(not_mask.raw, mask.raw)});
+}
+HWY_API Vec512<float> AndNot(const Vec512<float> not_mask,
+                             const Vec512<float> mask) {
+  return Vec512<float>{_mm512_andnot_ps(not_mask.raw, mask.raw)};
+}
+HWY_API Vec512<double> AndNot(const Vec512<double> not_mask,
+                              const Vec512<double> mask) {
+  return Vec512<double>{_mm512_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_API Vec512<T> Or(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm512_or_si512(a.raw, b.raw)});
+}
+
+HWY_API Vec512<float> Or(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_or_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Or(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_API Vec512<T> Xor(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;  // for float16_t
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, VFromD<decltype(du)>{_mm512_xor_si512(a.raw, b.raw)});
+}
+
+HWY_API Vec512<float> Xor(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_xor_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Xor(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor3
+template <typename T>
+HWY_API Vec512<T> Xor3(Vec512<T> x1, Vec512<T> x2, Vec512<T> x3) {
+  const DFromV<decltype(x1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m512i ret = _mm512_ternarylogic_epi64(
+      BitCast(du, x1).raw, BitCast(du, x2).raw, BitCast(du, x3).raw, 0x96);
+  return BitCast(d, VU{ret});
+}
+
+// ------------------------------ Or3
+template <typename T>
+HWY_API Vec512<T> Or3(Vec512<T> o1, Vec512<T> o2, Vec512<T> o3) {
+  const DFromV<decltype(o1)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m512i ret = _mm512_ternarylogic_epi64(
+      BitCast(du, o1).raw, BitCast(du, o2).raw, BitCast(du, o3).raw, 0xFE);
+  return BitCast(d, VU{ret});
+}
+
+// ------------------------------ OrAnd
+template <typename T>
+HWY_API Vec512<T> OrAnd(Vec512<T> o, Vec512<T> a1, Vec512<T> a2) {
+  const DFromV<decltype(o)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const __m512i ret = _mm512_ternarylogic_epi64(
+      BitCast(du, o).raw, BitCast(du, a1).raw, BitCast(du, a2).raw, 0xF8);
+  return BitCast(d, VU{ret});
+}
+
+// ------------------------------ IfVecThenElse
+template <typename T>
+HWY_API Vec512<T> IfVecThenElse(Vec512<T> mask, Vec512<T> yes, Vec512<T> no) {
+  const DFromV<decltype(yes)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  return BitCast(d, VU{_mm512_ternarylogic_epi64(BitCast(du, mask).raw,
+                                                 BitCast(du, yes).raw,
+                                                 BitCast(du, no).raw, 0xCA)});
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T>
+HWY_API Vec512<T> operator&(const Vec512<T> a, const Vec512<T> b) {
+  return And(a, b);
+}
+
+template <typename T>
+HWY_API Vec512<T> operator|(const Vec512<T> a, const Vec512<T> b) {
+  return Or(a, b);
+}
+
+template <typename T>
+HWY_API Vec512<T> operator^(const Vec512<T> a, const Vec512<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ PopulationCount
+
+// 8/16 require BITALG, 32/64 require VPOPCNTDQ.
+#if HWY_TARGET <= HWY_AVX3_DL
+
+#ifdef HWY_NATIVE_POPCNT
+#undef HWY_NATIVE_POPCNT
+#else
+#define HWY_NATIVE_POPCNT
+#endif
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec512<T> PopulationCount(hwy::SizeTag<1> /* tag */, Vec512<T> v) {
+  return Vec512<T>{_mm512_popcnt_epi8(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> PopulationCount(hwy::SizeTag<2> /* tag */, Vec512<T> v) {
+  return Vec512<T>{_mm512_popcnt_epi16(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> PopulationCount(hwy::SizeTag<4> /* tag */, Vec512<T> v) {
+  return Vec512<T>{_mm512_popcnt_epi32(v.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> PopulationCount(hwy::SizeTag<8> /* tag */, Vec512<T> v) {
+  return Vec512<T>{_mm512_popcnt_epi64(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec512<T> PopulationCount(Vec512<T> v) {
+  return detail::PopulationCount(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ================================================== MASK
+
+// ------------------------------ FirstN
+
+// Possibilities for constructing a bitmask of N ones:
+// - kshift* only consider the lowest byte of the shift count, so they would
+//   not correctly handle large n.
+// - Scalar shifts >= 64 are UB.
+// - BZHI has the desired semantics; we assume AVX-512 implies BMI2. However,
+//   we need 64-bit masks for sizeof(T) == 1, so special-case 32-bit builds.
+
+#if HWY_ARCH_X86_32
+namespace detail {
+
+// 32 bit mask is sufficient for lane size >= 2.
+template <typename T, HWY_IF_NOT_T_SIZE(T, 1)>
+HWY_INLINE Mask512<T> FirstN(size_t n) {
+  Mask512<T> m;
+  const uint32_t all = ~uint32_t{0};
+  // BZHI only looks at the lower 8 bits of n!
+  m.raw = static_cast<decltype(m.raw)>((n > 255) ? all : _bzhi_u32(all, n));
+  return m;
+}
+
+#if HWY_COMPILER_MSVC >= 1920 || HWY_COMPILER_GCC_ACTUAL >= 900 || \
+    HWY_COMPILER_CLANG || HWY_COMPILER_ICC
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Mask512<T> FirstN(size_t n) {
+  uint32_t lo_mask;
+  uint32_t hi_mask;
+  uint32_t hi_mask_len;
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(n >= 32) && n >= 32) {
+    if (__builtin_constant_p(n >= 64) && n >= 64) {
+      hi_mask_len = 32u;
+    } else {
+      hi_mask_len = ((n <= 287) ? static_cast<uint32_t>(n) : 287u) - 32u;
+    }
+    lo_mask = hi_mask = 0xFFFFFFFFu;
+  } else  // NOLINT(readability/braces)
+#endif
+  {
+    const uint32_t lo_mask_len = (n <= 255) ? static_cast<uint32_t>(n) : 255u;
+    lo_mask = _bzhi_u32(0xFFFFFFFFu, lo_mask_len);
+
+#if HWY_COMPILER_GCC
+    if (__builtin_constant_p(lo_mask_len <= 32) && lo_mask_len <= 32) {
+      return Mask512<T>{static_cast<__mmask64>(lo_mask)};
+    }
+#endif
+
+    _addcarry_u32(_subborrow_u32(0, lo_mask_len, 32u, &hi_mask_len),
+                  0xFFFFFFFFu, 0u, &hi_mask);
+  }
+  hi_mask = _bzhi_u32(hi_mask, hi_mask_len);
+#if HWY_COMPILER_GCC && !HWY_COMPILER_ICC
+  if (__builtin_constant_p((static_cast<uint64_t>(hi_mask) << 32) | lo_mask))
+#endif
+    return Mask512<T>{static_cast<__mmask64>(
+        (static_cast<uint64_t>(hi_mask) << 32) | lo_mask)};
+#if HWY_COMPILER_GCC && !HWY_COMPILER_ICC
+  else
+    return Mask512<T>{_mm512_kunpackd(static_cast<__mmask64>(hi_mask),
+                                      static_cast<__mmask64>(lo_mask))};
+#endif
+}
+#else
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Mask512<T> FirstN(size_t n) {
+  const uint64_t bits = n < 64 ? ((1ULL << n) - 1) : ~uint64_t{0};
+  return Mask512<T>{static_cast<__mmask64>(bits)};
+}
+#endif
+}  // namespace detail
+#endif  // HWY_ARCH_X86_32
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API MFromD<D> FirstN(D /* tag */, size_t n) {
+#if HWY_ARCH_X86_64
+  MFromD<D> m;
+  const uint64_t all = ~uint64_t{0};
+  // BZHI only looks at the lower 8 bits of n!
+  m.raw = static_cast<decltype(m.raw)>((n > 255) ? all : _bzhi_u64(all, n));
+  return m;
+#else
+  return detail::FirstN<T>(n);
+#endif  // HWY_ARCH_X86_64
+}
+
+// ------------------------------ IfThenElse
+
+// Returns mask ? b : a.
+
+namespace detail {
+
+// Templates for signed/unsigned integer of a particular size.
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElse(hwy::SizeTag<1> /* tag */,
+                                const Mask512<T> mask, const Vec512<T> yes,
+                                const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_blend_epi8(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElse(hwy::SizeTag<2> /* tag */,
+                                const Mask512<T> mask, const Vec512<T> yes,
+                                const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_blend_epi16(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElse(hwy::SizeTag<4> /* tag */,
+                                const Mask512<T> mask, const Vec512<T> yes,
+                                const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_blend_epi32(mask.raw, no.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElse(hwy::SizeTag<8> /* tag */,
+                                const Mask512<T> mask, const Vec512<T> yes,
+                                const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_blend_epi64(mask.raw, no.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec512<T> IfThenElse(const Mask512<T> mask, const Vec512<T> yes,
+                             const Vec512<T> no) {
+  return detail::IfThenElse(hwy::SizeTag<sizeof(T)>(), mask, yes, no);
+}
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> IfThenElse(Mask512<float16_t> mask,
+                                     Vec512<float16_t> yes,
+                                     Vec512<float16_t> no) {
+  return Vec512<float16_t>{_mm512_mask_blend_ph(mask.raw, no.raw, yes.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> IfThenElse(Mask512<float> mask, Vec512<float> yes,
+                                 Vec512<float> no) {
+  return Vec512<float>{_mm512_mask_blend_ps(mask.raw, no.raw, yes.raw)};
+}
+HWY_API Vec512<double> IfThenElse(Mask512<double> mask, Vec512<double> yes,
+                                  Vec512<double> no) {
+  return Vec512<double>{_mm512_mask_blend_pd(mask.raw, no.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElseZero(hwy::SizeTag<1> /* tag */,
+                                    const Mask512<T> mask,
+                                    const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi8(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElseZero(hwy::SizeTag<2> /* tag */,
+                                    const Mask512<T> mask,
+                                    const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi16(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElseZero(hwy::SizeTag<4> /* tag */,
+                                    const Mask512<T> mask,
+                                    const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi32(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenElseZero(hwy::SizeTag<8> /* tag */,
+                                    const Mask512<T> mask,
+                                    const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi64(mask.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Vec512<T> IfThenElseZero(const Mask512<T> mask, const Vec512<T> yes) {
+  return detail::IfThenElseZero(hwy::SizeTag<sizeof(T)>(), mask, yes);
+}
+HWY_API Vec512<float> IfThenElseZero(Mask512<float> mask, Vec512<float> yes) {
+  return Vec512<float>{_mm512_maskz_mov_ps(mask.raw, yes.raw)};
+}
+HWY_API Vec512<double> IfThenElseZero(Mask512<double> mask,
+                                      Vec512<double> yes) {
+  return Vec512<double>{_mm512_maskz_mov_pd(mask.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec512<T> IfThenZeroElse(hwy::SizeTag<1> /* tag */,
+                                    const Mask512<T> mask, const Vec512<T> no) {
+  // xor_epi8/16 are missing, but we have sub, which is just as fast for u8/16.
+  return Vec512<T>{_mm512_mask_sub_epi8(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenZeroElse(hwy::SizeTag<2> /* tag */,
+                                    const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_sub_epi16(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenZeroElse(hwy::SizeTag<4> /* tag */,
+                                    const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_xor_epi32(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_INLINE Vec512<T> IfThenZeroElse(hwy::SizeTag<8> /* tag */,
+                                    const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_xor_epi64(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT3264(T)>
+HWY_API Vec512<T> IfThenZeroElse(const Mask512<T> mask, const Vec512<T> no) {
+  return detail::IfThenZeroElse(hwy::SizeTag<sizeof(T)>(), mask, no);
+}
+HWY_API Vec512<float> IfThenZeroElse(Mask512<float> mask, Vec512<float> no) {
+  return Vec512<float>{_mm512_mask_xor_ps(no.raw, mask.raw, no.raw, no.raw)};
+}
+HWY_API Vec512<double> IfThenZeroElse(Mask512<double> mask, Vec512<double> no) {
+  return Vec512<double>{_mm512_mask_xor_pd(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+template <typename T>
+HWY_API Vec512<T> IfNegativeThenElse(Vec512<T> v, Vec512<T> yes, Vec512<T> no) {
+  static_assert(IsSigned<T>(), "Only works for signed/float");
+  // AVX3 MaskFromVec only looks at the MSB
+  return IfThenElse(MaskFromVec(v), yes, no);
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec512<T> ZeroIfNegative(const Vec512<T> v) {
+  // AVX3 MaskFromVec only looks at the MSB
+  return IfThenZeroElse(MaskFromVec(v), v);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+HWY_API Vec512<uint8_t> operator+(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> operator+(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator+(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> operator+(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> operator+(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> operator+(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator+(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> operator+(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> operator+(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_add_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> operator+(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_add_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator+(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+HWY_API Vec512<uint8_t> operator-(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> operator-(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator-(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> operator-(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> operator-(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> operator-(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator-(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> operator-(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> operator-(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_sub_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> operator-(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_sub_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator-(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ SumsOf8
+HWY_API Vec512<uint64_t> SumsOf8(const Vec512<uint8_t> v) {
+  const Full512<uint8_t> d;
+  return Vec512<uint64_t>{_mm512_sad_epu8(v.raw, Zero(d).raw)};
+}
+
+HWY_API Vec512<uint64_t> SumsOf8AbsDiff(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint64_t>{_mm512_sad_epu8(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedAdd
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec512<uint8_t> SaturatedAdd(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_adds_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> SaturatedAdd(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> SaturatedAdd(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_adds_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> SaturatedAdd(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_adds_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ SaturatedSub
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec512<uint8_t> SaturatedSub(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_subs_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> SaturatedSub(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> SaturatedSub(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_subs_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> SaturatedSub(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_subs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+HWY_API Vec512<uint8_t> AverageRound(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_avg_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> AverageRound(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Abs (Sub)
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+HWY_API Vec512<int8_t> Abs(const Vec512<int8_t> v) {
+#if HWY_COMPILER_MSVC
+  // Workaround for incorrect codegen? (untested due to internal compiler error)
+  const DFromV<decltype(v)> d;
+  const auto zero = Zero(d);
+  return Vec512<int8_t>{_mm512_max_epi8(v.raw, (zero - v).raw)};
+#else
+  return Vec512<int8_t>{_mm512_abs_epi8(v.raw)};
+#endif
+}
+HWY_API Vec512<int16_t> Abs(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_abs_epi16(v.raw)};
+}
+HWY_API Vec512<int32_t> Abs(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_abs_epi32(v.raw)};
+}
+HWY_API Vec512<int64_t> Abs(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_abs_epi64(v.raw)};
+}
+
+// These aren't native instructions, they also involve AND with constant.
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Abs(const Vec512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_abs_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Vec512<float> Abs(const Vec512<float> v) {
+  return Vec512<float>{_mm512_abs_ps(v.raw)};
+}
+HWY_API Vec512<double> Abs(const Vec512<double> v) {
+// Workaround: _mm512_abs_pd expects __m512, so implement it ourselves.
+#if HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 803
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  return And(v, BitCast(d, Set(du, 0x7FFFFFFFFFFFFFFFULL)));
+#else
+  return Vec512<double>{_mm512_abs_pd(v.raw)};
+#endif
+}
+// ------------------------------ ShiftLeft
+
+template <int kBits>
+HWY_API Vec512<uint16_t> ShiftLeft(const Vec512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint32_t> ShiftLeft(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint64_t> ShiftLeft(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int16_t> ShiftLeft(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int32_t> ShiftLeft(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int64_t> ShiftLeft(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> ShiftLeft(const Vec512<T> v) {
+  const DFromV<decltype(v)> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeft<kBits>(BitCast(d16, v)));
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits>
+HWY_API Vec512<uint16_t> ShiftRight(const Vec512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_srli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint32_t> ShiftRight(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_srli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint64_t> ShiftRight(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint8_t> ShiftRight(const Vec512<uint8_t> v) {
+  const DFromV<decltype(v)> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec512<uint8_t> shifted{ShiftRight<kBits>(Vec512<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits>
+HWY_API Vec512<int16_t> ShiftRight(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_srai_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int32_t> ShiftRight(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int64_t> ShiftRight(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_srai_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int8_t> ShiftRight(const Vec512<int8_t> v) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ RotateRight
+
+template <int kBits, typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2))>
+HWY_API Vec512<T> RotateRight(const Vec512<T> v) {
+  constexpr size_t kSizeInBits = sizeof(T) * 8;
+  static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
+  if (kBits == 0) return v;
+  // AVX3 does not support 8/16-bit.
+  return Or(ShiftRight<kBits>(v),
+            ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
+}
+
+template <int kBits>
+HWY_API Vec512<uint32_t> RotateRight(const Vec512<uint32_t> v) {
+  static_assert(0 <= kBits && kBits < 32, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Vec512<uint32_t>{_mm512_ror_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint64_t> RotateRight(const Vec512<uint64_t> v) {
+  static_assert(0 <= kBits && kBits < 64, "Invalid shift count");
+  if (kBits == 0) return v;
+  return Vec512<uint64_t>{_mm512_ror_epi64(v.raw, kBits)};
+}
+
+// ------------------------------ ShiftLeftSame
+
+// GCC <14 and Clang <11 do not follow the Intel documentation for AVX-512
+// shift-with-immediate: the counts should all be unsigned int.
+#if HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100
+using Shift16Count = int;
+using Shift3264Count = int;
+#elif HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400
+// GCC 11.0 requires these, prior versions used a macro+cast and don't care.
+using Shift16Count = int;
+using Shift3264Count = unsigned int;
+#else
+// Assume documented behavior. Clang 11, GCC 14 and MSVC 14.28.29910 match this.
+using Shift16Count = unsigned int;
+using Shift3264Count = unsigned int;
+#endif
+
+HWY_API Vec512<uint16_t> ShiftLeftSame(const Vec512<uint16_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint16_t>{
+        _mm512_slli_epi16(v.raw, static_cast<Shift16Count>(bits))};
+  }
+#endif
+  return Vec512<uint16_t>{_mm512_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint32_t> ShiftLeftSame(const Vec512<uint32_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint32_t>{
+        _mm512_slli_epi32(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<uint32_t>{_mm512_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint64_t> ShiftLeftSame(const Vec512<uint64_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint64_t>{
+        _mm512_slli_epi64(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<uint64_t>{_mm512_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int16_t> ShiftLeftSame(const Vec512<int16_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int16_t>{
+        _mm512_slli_epi16(v.raw, static_cast<Shift16Count>(bits))};
+  }
+#endif
+  return Vec512<int16_t>{_mm512_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int32_t> ShiftLeftSame(const Vec512<int32_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int32_t>{
+        _mm512_slli_epi32(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<int32_t>{_mm512_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int64_t> ShiftLeftSame(const Vec512<int64_t> v, const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int64_t>{
+        _mm512_slli_epi64(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<int64_t>{_mm512_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> ShiftLeftSame(const Vec512<T> v, const int bits) {
+  const DFromV<decltype(v)> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeftSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, static_cast<T>((0xFF << bits) & 0xFF));
+}
+
+// ------------------------------ ShiftRightSame
+
+HWY_API Vec512<uint16_t> ShiftRightSame(const Vec512<uint16_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint16_t>{
+        _mm512_srli_epi16(v.raw, static_cast<Shift16Count>(bits))};
+  }
+#endif
+  return Vec512<uint16_t>{_mm512_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint32_t> ShiftRightSame(const Vec512<uint32_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint32_t>{
+        _mm512_srli_epi32(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<uint32_t>{_mm512_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint64_t> ShiftRightSame(const Vec512<uint64_t> v,
+                                        const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<uint64_t>{
+        _mm512_srli_epi64(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<uint64_t>{_mm512_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<uint8_t> ShiftRightSame(Vec512<uint8_t> v, const int bits) {
+  const DFromV<decltype(v)> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftRightSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, static_cast<uint8_t>(0xFF >> bits));
+}
+
+HWY_API Vec512<int16_t> ShiftRightSame(const Vec512<int16_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int16_t>{
+        _mm512_srai_epi16(v.raw, static_cast<Shift16Count>(bits))};
+  }
+#endif
+  return Vec512<int16_t>{_mm512_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int32_t> ShiftRightSame(const Vec512<int32_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int32_t>{
+        _mm512_srai_epi32(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<int32_t>{_mm512_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<int64_t> ShiftRightSame(const Vec512<int64_t> v,
+                                       const int bits) {
+#if HWY_COMPILER_GCC
+  if (__builtin_constant_p(bits)) {
+    return Vec512<int64_t>{
+        _mm512_srai_epi64(v.raw, static_cast<Shift3264Count>(bits))};
+  }
+#endif
+  return Vec512<int64_t>{_mm512_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int8_t> ShiftRightSame(Vec512<int8_t> v, const int bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign =
+      BitCast(di, Set(du, static_cast<uint8_t>(0x80 >> bits)));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Minimum
+
+// Unsigned
+HWY_API Vec512<uint8_t> Min(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_min_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> Min(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_min_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> Min(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_min_epu32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> Min(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_min_epu64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> Min(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_min_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> Min(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_min_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> Min(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_min_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> Min(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_min_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Min(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_min_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Min(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_min_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Min(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Maximum
+
+// Unsigned
+HWY_API Vec512<uint8_t> Max(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_max_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> Max(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_max_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> Max(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_max_epu32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> Max(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_max_epu64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> Max(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_max_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> Max(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_max_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> Max(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_max_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> Max(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_max_epi64(a.raw, b.raw)};
+}
+
+// Float
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Max(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_max_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Max(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_max_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Max(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_max_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Integer multiplication
+
+// Per-target flag to prevent generic_ops-inl.h from defining 64-bit operator*.
+#ifdef HWY_NATIVE_MUL_64
+#undef HWY_NATIVE_MUL_64
+#else
+#define HWY_NATIVE_MUL_64
+#endif
+
+// Unsigned
+HWY_API Vec512<uint16_t> operator*(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator*(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> operator*(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_mullo_epi64(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> operator*(Vec256<uint64_t> a, Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_mullo_epi64(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator*(Vec128<uint64_t, N> a,
+                                      Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_mullo_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int16_t> operator*(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator*(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> operator*(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_mullo_epi64(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> operator*(Vec256<int64_t> a, Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_mullo_epi64(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator*(Vec128<int64_t, N> a,
+                                     Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{_mm_mullo_epi64(a.raw, b.raw)};
+}
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec512<uint16_t> MulHigh(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_mulhi_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> MulHigh(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_mulhi_epi16(a.raw, b.raw)};
+}
+
+HWY_API Vec512<int16_t> MulFixedPoint15(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_mulhrs_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec512<int64_t> MulEven(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Vec512<int64_t>{_mm512_mul_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> MulEven(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Vec512<uint64_t>{_mm512_mul_epu32(a.raw, b.raw)};
+}
+
+// ------------------------------ Neg (Sub)
+
+template <typename T, HWY_IF_FLOAT_OR_SPECIAL(T)>
+HWY_API Vec512<T> Neg(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  return Xor(v, SignBit(d));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
+HWY_API Vec512<T> Neg(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  return Zero(d) - v;
+}
+
+// ------------------------------ Floating-point mul / div
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> operator*(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_mul_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> operator*(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator*(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_mul_pd(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> operator/(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Vec512<float16_t>{_mm512_div_ph(a.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> operator/(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator/(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_div_pd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> ApproximateReciprocal(const Vec512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_rcp_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> ApproximateReciprocal(const Vec512<float> v) {
+  return Vec512<float>{_mm512_rcp14_ps(v.raw)};
+}
+
+HWY_API Vec512<double> ApproximateReciprocal(Vec512<double> v) {
+  return Vec512<double>{_mm512_rcp14_pd(v.raw)};
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+#if HWY_HAVE_FLOAT16
+
+HWY_API Vec512<float16_t> MulAdd(Vec512<float16_t> mul, Vec512<float16_t> x,
+                                 Vec512<float16_t> add) {
+  return Vec512<float16_t>{_mm512_fmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+HWY_API Vec512<float16_t> NegMulAdd(Vec512<float16_t> mul, Vec512<float16_t> x,
+                                    Vec512<float16_t> add) {
+  return Vec512<float16_t>{_mm512_fnmadd_ph(mul.raw, x.raw, add.raw)};
+}
+
+HWY_API Vec512<float16_t> MulSub(Vec512<float16_t> mul, Vec512<float16_t> x,
+                                 Vec512<float16_t> sub) {
+  return Vec512<float16_t>{_mm512_fmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+HWY_API Vec512<float16_t> NegMulSub(Vec512<float16_t> mul, Vec512<float16_t> x,
+                                    Vec512<float16_t> sub) {
+  return Vec512<float16_t>{_mm512_fnmsub_ph(mul.raw, x.raw, sub.raw)};
+}
+
+#endif  // HWY_HAVE_FLOAT16
+
+// Returns mul * x + add
+HWY_API Vec512<float> MulAdd(Vec512<float> mul, Vec512<float> x,
+                             Vec512<float> add) {
+  return Vec512<float>{_mm512_fmadd_ps(mul.raw, x.raw, add.raw)};
+}
+HWY_API Vec512<double> MulAdd(Vec512<double> mul, Vec512<double> x,
+                              Vec512<double> add) {
+  return Vec512<double>{_mm512_fmadd_pd(mul.raw, x.raw, add.raw)};
+}
+
+// Returns add - mul * x
+HWY_API Vec512<float> NegMulAdd(Vec512<float> mul, Vec512<float> x,
+                                Vec512<float> add) {
+  return Vec512<float>{_mm512_fnmadd_ps(mul.raw, x.raw, add.raw)};
+}
+HWY_API Vec512<double> NegMulAdd(Vec512<double> mul, Vec512<double> x,
+                                 Vec512<double> add) {
+  return Vec512<double>{_mm512_fnmadd_pd(mul.raw, x.raw, add.raw)};
+}
+
+// Returns mul * x - sub
+HWY_API Vec512<float> MulSub(Vec512<float> mul, Vec512<float> x,
+                             Vec512<float> sub) {
+  return Vec512<float>{_mm512_fmsub_ps(mul.raw, x.raw, sub.raw)};
+}
+HWY_API Vec512<double> MulSub(Vec512<double> mul, Vec512<double> x,
+                              Vec512<double> sub) {
+  return Vec512<double>{_mm512_fmsub_pd(mul.raw, x.raw, sub.raw)};
+}
+
+// Returns -mul * x - sub
+HWY_API Vec512<float> NegMulSub(Vec512<float> mul, Vec512<float> x,
+                                Vec512<float> sub) {
+  return Vec512<float>{_mm512_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+}
+HWY_API Vec512<double> NegMulSub(Vec512<double> mul, Vec512<double> x,
+                                 Vec512<double> sub) {
+  return Vec512<double>{_mm512_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Sqrt(const Vec512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_sqrt_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Sqrt(const Vec512<float> v) {
+  return Vec512<float>{_mm512_sqrt_ps(v.raw)};
+}
+HWY_API Vec512<double> Sqrt(const Vec512<double> v) {
+  return Vec512<double>{_mm512_sqrt_pd(v.raw)};
+}
+
+// Approximate reciprocal square root
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> ApproximateReciprocalSqrt(Vec512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_rsqrt_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> ApproximateReciprocalSqrt(Vec512<float> v) {
+  return Vec512<float>{_mm512_rsqrt14_ps(v.raw)};
+}
+
+HWY_API Vec512<double> ApproximateReciprocalSqrt(Vec512<double> v) {
+  return Vec512<double>{_mm512_rsqrt14_pd(v.raw)};
+}
+
+// ------------------------------ Floating-point rounding
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+// Toward nearest integer, tie to even
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Round(Vec512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_roundscale_ph(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Round(Vec512<float> v) {
+  return Vec512<float>{_mm512_roundscale_ps(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Round(Vec512<double> v) {
+  return Vec512<double>{_mm512_roundscale_pd(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Trunc(Vec512<float16_t> v) {
+  return Vec512<float16_t>{
+      _mm512_roundscale_ph(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Trunc(Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Trunc(Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Ceil(Vec512<float16_t> v) {
+  return Vec512<float16_t>{
+      _mm512_roundscale_ph(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Ceil(Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Ceil(Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> Floor(Vec512<float16_t> v) {
+  return Vec512<float16_t>{
+      _mm512_roundscale_ph(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> Floor(Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Floor(Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== COMPARE
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0.
+
+template <class DTo, typename TFrom>
+HWY_API MFromD<DTo> RebindMask(DTo /*tag*/, Mask512<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TFromD<DTo>), "Must have same size");
+  return MFromD<DTo>{m.raw};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask512<T> TestBit(hwy::SizeTag<1> /*tag*/, Vec512<T> v,
+                              Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi8_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> TestBit(hwy::SizeTag<2> /*tag*/, Vec512<T> v,
+                              Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi16_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> TestBit(hwy::SizeTag<4> /*tag*/, Vec512<T> v,
+                              Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi32_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> TestBit(hwy::SizeTag<8> /*tag*/, Vec512<T> v,
+                              Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi64_mask(v.raw, bit.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask512<T> TestBit(const Vec512<T> v, const Vec512<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return detail::TestBit(hwy::SizeTag<sizeof(T)>(), v, bit);
+}
+
+// ------------------------------ Equality
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask512<T> operator==(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpeq_epi8_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Mask512<T> operator==(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpeq_epi16_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Mask512<T> operator==(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpeq_epi32_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Mask512<T> operator==(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpeq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask512<float16_t> operator==(Vec512<float16_t> a,
+                                      Vec512<float16_t> b) {
+  return Mask512<float16_t>{_mm512_cmp_ph_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask512<float> operator==(Vec512<float> a, Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+HWY_API Mask512<double> operator==(Vec512<double> a, Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+// ------------------------------ Inequality
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Mask512<T> operator!=(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpneq_epi8_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Mask512<T> operator!=(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpneq_epi16_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Mask512<T> operator!=(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpneq_epi32_mask(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Mask512<T> operator!=(Vec512<T> a, Vec512<T> b) {
+  return Mask512<T>{_mm512_cmpneq_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask512<float16_t> operator!=(Vec512<float16_t> a,
+                                      Vec512<float16_t> b) {
+  return Mask512<float16_t>{_mm512_cmp_ph_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask512<float> operator!=(Vec512<float> a, Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+HWY_API Mask512<double> operator!=(Vec512<double> a, Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_NEQ_OQ)};
+}
+
+// ------------------------------ Strict inequality
+
+HWY_API Mask512<uint8_t> operator>(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Mask512<uint8_t>{_mm512_cmpgt_epu8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint16_t> operator>(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Mask512<uint16_t>{_mm512_cmpgt_epu16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint32_t> operator>(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Mask512<uint32_t>{_mm512_cmpgt_epu32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint64_t> operator>(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Mask512<uint64_t>{_mm512_cmpgt_epu64_mask(a.raw, b.raw)};
+}
+
+HWY_API Mask512<int8_t> operator>(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Mask512<int8_t>{_mm512_cmpgt_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int16_t> operator>(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Mask512<int16_t>{_mm512_cmpgt_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int32_t> operator>(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Mask512<int32_t>{_mm512_cmpgt_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int64_t> operator>(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Mask512<int64_t>{_mm512_cmpgt_epi64_mask(a.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask512<float16_t> operator>(Vec512<float16_t> a, Vec512<float16_t> b) {
+  return Mask512<float16_t>{_mm512_cmp_ph_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask512<float> operator>(Vec512<float> a, Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+HWY_API Mask512<double> operator>(Vec512<double> a, Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+// ------------------------------ Weak inequality
+
+#if HWY_HAVE_FLOAT16
+HWY_API Mask512<float16_t> operator>=(Vec512<float16_t> a,
+                                      Vec512<float16_t> b) {
+  return Mask512<float16_t>{_mm512_cmp_ph_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask512<float> operator>=(Vec512<float> a, Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+HWY_API Mask512<double> operator>=(Vec512<double> a, Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+HWY_API Mask512<uint8_t> operator>=(Vec512<uint8_t> a, Vec512<uint8_t> b) {
+  return Mask512<uint8_t>{_mm512_cmpge_epu8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint16_t> operator>=(Vec512<uint16_t> a, Vec512<uint16_t> b) {
+  return Mask512<uint16_t>{_mm512_cmpge_epu16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint32_t> operator>=(Vec512<uint32_t> a, Vec512<uint32_t> b) {
+  return Mask512<uint32_t>{_mm512_cmpge_epu32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint64_t> operator>=(Vec512<uint64_t> a, Vec512<uint64_t> b) {
+  return Mask512<uint64_t>{_mm512_cmpge_epu64_mask(a.raw, b.raw)};
+}
+
+HWY_API Mask512<int8_t> operator>=(Vec512<int8_t> a, Vec512<int8_t> b) {
+  return Mask512<int8_t>{_mm512_cmpge_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int16_t> operator>=(Vec512<int16_t> a, Vec512<int16_t> b) {
+  return Mask512<int16_t>{_mm512_cmpge_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int32_t> operator>=(Vec512<int32_t> a, Vec512<int32_t> b) {
+  return Mask512<int32_t>{_mm512_cmpge_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int64_t> operator>=(Vec512<int64_t> a, Vec512<int64_t> b) {
+  return Mask512<int64_t>{_mm512_cmpge_epi64_mask(a.raw, b.raw)};
+}
+
+// ------------------------------ Reversed comparisons
+
+template <typename T>
+HWY_API Mask512<T> operator<(Vec512<T> a, Vec512<T> b) {
+  return b > a;
+}
+
+template <typename T>
+HWY_API Mask512<T> operator<=(Vec512<T> a, Vec512<T> b) {
+  return b >= a;
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask512<T> MaskFromVec(hwy::SizeTag<1> /*tag*/, Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi8_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> MaskFromVec(hwy::SizeTag<2> /*tag*/, Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi16_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> MaskFromVec(hwy::SizeTag<4> /*tag*/, Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi32_mask(v.raw)};
+}
+template <typename T>
+HWY_INLINE Mask512<T> MaskFromVec(hwy::SizeTag<8> /*tag*/, Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi64_mask(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Mask512<T> MaskFromVec(Vec512<T> v) {
+  return detail::MaskFromVec(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Mask512<T> MaskFromVec(Vec512<T> v) {
+  const RebindToSigned<DFromV<decltype(v)>> di;
+  return Mask512<T>{MaskFromVec(BitCast(di, v)).raw};
+}
+
+HWY_API Vec512<uint8_t> VecFromMask(Mask512<uint8_t> v) {
+  return Vec512<uint8_t>{_mm512_movm_epi8(v.raw)};
+}
+HWY_API Vec512<int8_t> VecFromMask(Mask512<int8_t> v) {
+  return Vec512<int8_t>{_mm512_movm_epi8(v.raw)};
+}
+
+HWY_API Vec512<uint16_t> VecFromMask(Mask512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_movm_epi16(v.raw)};
+}
+HWY_API Vec512<int16_t> VecFromMask(Mask512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_movm_epi16(v.raw)};
+}
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> VecFromMask(Mask512<float16_t> v) {
+  return Vec512<float16_t>{_mm512_castsi512_ph(_mm512_movm_epi16(v.raw))};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Vec512<uint32_t> VecFromMask(Mask512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_movm_epi32(v.raw)};
+}
+HWY_API Vec512<int32_t> VecFromMask(Mask512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_movm_epi32(v.raw)};
+}
+HWY_API Vec512<float> VecFromMask(Mask512<float> v) {
+  return Vec512<float>{_mm512_castsi512_ps(_mm512_movm_epi32(v.raw))};
+}
+
+HWY_API Vec512<uint64_t> VecFromMask(Mask512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_movm_epi64(v.raw)};
+}
+HWY_API Vec512<int64_t> VecFromMask(Mask512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_movm_epi64(v.raw)};
+}
+HWY_API Vec512<double> VecFromMask(Mask512<double> v) {
+  return Vec512<double>{_mm512_castsi512_pd(_mm512_movm_epi64(v.raw))};
+}
+
+// ------------------------------ Mask logical
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE Mask512<T> Not(hwy::SizeTag<1> /*tag*/, Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask64(m.raw)};
+#else
+  return Mask512<T>{~m.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Not(hwy::SizeTag<2> /*tag*/, Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask32(m.raw)};
+#else
+  return Mask512<T>{~m.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Not(hwy::SizeTag<4> /*tag*/, Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask16(m.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(~m.raw & 0xFFFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Not(hwy::SizeTag<8> /*tag*/, Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask8(m.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(~m.raw & 0xFF)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask512<T> And(hwy::SizeTag<1> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> And(hwy::SizeTag<2> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> And(hwy::SizeTag<4> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> And(hwy::SizeTag<8> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask512<T> AndNot(hwy::SizeTag<1> /*tag*/, Mask512<T> a,
+                             Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{~a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> AndNot(hwy::SizeTag<2> /*tag*/, Mask512<T> a,
+                             Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{~a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> AndNot(hwy::SizeTag<4> /*tag*/, Mask512<T> a,
+                             Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(~a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> AndNot(hwy::SizeTag<8> /*tag*/, Mask512<T> a,
+                             Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(~a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask512<T> Or(hwy::SizeTag<1> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw | b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Or(hwy::SizeTag<2> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw | b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Or(hwy::SizeTag<4> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw | b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Or(hwy::SizeTag<8> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw | b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask512<T> Xor(hwy::SizeTag<1> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw ^ b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Xor(hwy::SizeTag<2> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw ^ b.raw};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Xor(hwy::SizeTag<4> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> Xor(hwy::SizeTag<8> /*tag*/, Mask512<T> a, Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw ^ b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_INLINE Mask512<T> ExclusiveNeither(hwy::SizeTag<1> /*tag*/, Mask512<T> a,
+                                       Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxnor_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{~(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> ExclusiveNeither(hwy::SizeTag<2> /*tag*/, Mask512<T> a,
+                                       Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxnor_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<__mmask32>(~(a.raw ^ b.raw) & 0xFFFFFFFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> ExclusiveNeither(hwy::SizeTag<4> /*tag*/, Mask512<T> a,
+                                       Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxnor_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<__mmask16>(~(a.raw ^ b.raw) & 0xFFFF)};
+#endif
+}
+template <typename T>
+HWY_INLINE Mask512<T> ExclusiveNeither(hwy::SizeTag<8> /*tag*/, Mask512<T> a,
+                                       Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxnor_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<__mmask8>(~(a.raw ^ b.raw) & 0xFF)};
+#endif
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask512<T> Not(Mask512<T> m) {
+  return detail::Not(hwy::SizeTag<sizeof(T)>(), m);
+}
+
+template <typename T>
+HWY_API Mask512<T> And(Mask512<T> a, Mask512<T> b) {
+  return detail::And(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> AndNot(Mask512<T> a, Mask512<T> b) {
+  return detail::AndNot(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> Or(Mask512<T> a, Mask512<T> b) {
+  return detail::Or(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> Xor(Mask512<T> a, Mask512<T> b) {
+  return detail::Xor(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> ExclusiveNeither(Mask512<T> a, Mask512<T> b) {
+  return detail::ExclusiveNeither(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+HWY_API Vec512<int8_t> BroadcastSignBit(Vec512<int8_t> v) {
+  const DFromV<decltype(v)> d;
+  return VecFromMask(v < Zero(d));
+}
+
+HWY_API Vec512<int16_t> BroadcastSignBit(Vec512<int16_t> v) {
+  return ShiftRight<15>(v);
+}
+
+HWY_API Vec512<int32_t> BroadcastSignBit(Vec512<int32_t> v) {
+  return ShiftRight<31>(v);
+}
+
+HWY_API Vec512<int64_t> BroadcastSignBit(Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_srai_epi64(v.raw, 63)};
+}
+
+// ------------------------------ Floating-point classification (Not)
+
+#if HWY_HAVE_FLOAT16 || HWY_IDE
+
+HWY_API Mask512<float16_t> IsNaN(Vec512<float16_t> v) {
+  return Mask512<float16_t>{_mm512_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+}
+
+HWY_API Mask512<float16_t> IsInf(Vec512<float16_t> v) {
+  return Mask512<float16_t>{_mm512_fpclass_ph_mask(v.raw, 0x18)};
+}
+
+// Returns whether normal/subnormal/zero. fpclass doesn't have a flag for
+// positive, so we have to check for inf/NaN and negate.
+HWY_API Mask512<float16_t> IsFinite(Vec512<float16_t> v) {
+  return Not(Mask512<float16_t>{_mm512_fpclass_ph_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+#endif  // HWY_HAVE_FLOAT16
+
+HWY_API Mask512<float> IsNaN(Vec512<float> v) {
+  return Mask512<float>{_mm512_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+}
+HWY_API Mask512<double> IsNaN(Vec512<double> v) {
+  return Mask512<double>{_mm512_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN)};
+}
+
+HWY_API Mask512<float> IsInf(Vec512<float> v) {
+  return Mask512<float>{_mm512_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+HWY_API Mask512<double> IsInf(Vec512<double> v) {
+  return Mask512<double>{_mm512_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)};
+}
+
+// Returns whether normal/subnormal/zero. fpclass doesn't have a flag for
+// positive, so we have to check for inf/NaN and negate.
+HWY_API Mask512<float> IsFinite(Vec512<float> v) {
+  return Not(Mask512<float>{_mm512_fpclass_ps_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+HWY_API Mask512<double> IsFinite(Vec512<double> v) {
+  return Not(Mask512<double>{_mm512_fpclass_pd_mask(
+      v.raw, HWY_X86_FPCLASS_SNAN | HWY_X86_FPCLASS_QNAN |
+                 HWY_X86_FPCLASS_NEG_INF | HWY_X86_FPCLASS_POS_INF)});
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> Load(D /* tag */, const TFromD<D>* HWY_RESTRICT aligned) {
+  return VFromD<D>{_mm512_load_si512(aligned)};
+}
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API Vec512<float16_t> Load(D d, const float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec512<float16_t>{_mm512_load_ph(aligned)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, Load(du, reinterpret_cast<const uint16_t*>(aligned)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API Vec512<float> Load(D /* tag */, const float* HWY_RESTRICT aligned) {
+  return Vec512<float>{_mm512_load_ps(aligned)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Load(D /* tag */, const double* HWY_RESTRICT aligned) {
+  return VFromD<D>{_mm512_load_pd(aligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_loadu_si512(p)};
+}
+
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API Vec512<float16_t> LoadU(D d, const float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  (void)d;
+  return Vec512<float16_t>{_mm512_loadu_ph(p)};
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  return BitCast(d, LoadU(du, reinterpret_cast<const uint16_t*>(p)));
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API Vec512<float> LoadU(D /* tag */, const float* HWY_RESTRICT p) {
+  return Vec512<float>{_mm512_loadu_ps(p)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> LoadU(D /* tag */, const double* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_loadu_pd(p)};
+}
+
+// ------------------------------ MaskedLoad
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_maskz_loadu_epi8(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D d,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<D> du;  // for float16_t
+  return BitCast(d, VFromD<decltype(du)>{_mm512_maskz_loadu_epi16(
+                        m.raw, reinterpret_cast<const uint16_t*>(p))});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_maskz_loadu_epi32(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoad(MFromD<D> m, D /* tag */,
+                             const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_maskz_loadu_epi64(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API Vec512<float> MaskedLoad(Mask512<float> m, D /* tag */,
+                                 const float* HWY_RESTRICT p) {
+  return Vec512<float>{_mm512_maskz_loadu_ps(m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> MaskedLoad(Mask512<double> m, D /* tag */,
+                                  const double* HWY_RESTRICT p) {
+  return Vec512<double>{_mm512_maskz_loadu_pd(m.raw, p)};
+}
+
+// ------------------------------ MaskedLoadOr
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_mask_loadu_epi8(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D d,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  return VFromD<D>{_mm512_mask_loadu_epi16(
+      BitCast(du, v).raw, m.raw, reinterpret_cast<const uint16_t*>(p))};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_mask_loadu_epi32(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, MFromD<D> m, D /* tag */,
+                               const TFromD<D>* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_mask_loadu_epi64(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, Mask512<float> m, D /* tag */,
+                               const float* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_mask_loadu_ps(v.raw, m.raw, p)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MaskedLoadOr(VFromD<D> v, Mask512<double> m, D /* tag */,
+                               const double* HWY_RESTRICT p) {
+  return VFromD<D>{_mm512_mask_loadu_pd(v.raw, m.raw, p)};
+}
+
+// ------------------------------ LoadDup128
+
+// Loads 128 bit and duplicates into both 128-bit halves. This avoids the
+// 3-cycle cost of moving data between 128-bit halves and avoids port 5.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> LoadDup128(D /* tag */,
+                             const TFromD<D>* const HWY_RESTRICT p) {
+  const Full128<TFromD<D>> d128;
+  return VFromD<D>{_mm512_broadcast_i32x4(LoadU(d128, p).raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> LoadDup128(D /* tag */, const float* HWY_RESTRICT p) {
+  const __m128 x4 = _mm_loadu_ps(p);
+  return VFromD<D>{_mm512_broadcast_f32x4(x4)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> LoadDup128(D /* tag */, const double* HWY_RESTRICT p) {
+  const __m128d x2 = _mm_loadu_pd(p);
+  return VFromD<D>{_mm512_broadcast_f64x2(x2)};
+}
+
+// ------------------------------ Store
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void Store(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT aligned) {
+  _mm512_store_si512(reinterpret_cast<__m512i*>(aligned), v.raw);
+}
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API void Store(Vec512<float16_t> v, D /* tag */,
+                   float16_t* HWY_RESTRICT aligned) {
+#if HWY_HAVE_FLOAT16
+  _mm512_store_ph(aligned, v.raw);
+#else
+  _mm512_store_si512(reinterpret_cast<__m512i*>(aligned), v.raw);
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void Store(Vec512<float> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm512_store_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void Store(VFromD<D> v, D /* tag */, double* HWY_RESTRICT aligned) {
+  _mm512_store_pd(aligned, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API void StoreU(VFromD<D> v, D /* tag */, TFromD<D>* HWY_RESTRICT p) {
+  _mm512_storeu_si512(reinterpret_cast<__m512i*>(p), v.raw);
+}
+// bfloat16_t is handled by x86_128-inl.h.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API void StoreU(Vec512<float16_t> v, D /* tag */,
+                    float16_t* HWY_RESTRICT p) {
+#if HWY_HAVE_FLOAT16
+  _mm512_storeu_ph(p, v.raw);
+#else
+  _mm512_storeu_si512(reinterpret_cast<__m512i*>(p), v.raw);
+#endif  // HWY_HAVE_FLOAT16
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void StoreU(Vec512<float> v, D /* tag */, float* HWY_RESTRICT p) {
+  _mm512_storeu_ps(p, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void StoreU(Vec512<double> v, D /* tag */, double* HWY_RESTRICT p) {
+  _mm512_storeu_pd(p, v.raw);
+}
+
+// ------------------------------ BlendedStore
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm512_mask_storeu_epi8(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                          TFromD<D>* HWY_RESTRICT p) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm512_mask_storeu_epi16(reinterpret_cast<uint16_t*>(p), m.raw,
+                           BitCast(du, v).raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm512_mask_storeu_epi32(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API void BlendedStore(VFromD<D> v, MFromD<D> m, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT p) {
+  _mm512_mask_storeu_epi64(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void BlendedStore(Vec512<float> v, Mask512<float> m, D /* tag */,
+                          float* HWY_RESTRICT p) {
+  _mm512_mask_storeu_ps(p, m.raw, v.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void BlendedStore(Vec512<double> v, Mask512<double> m, D /* tag */,
+                          double* HWY_RESTRICT p) {
+  _mm512_mask_storeu_pd(p, m.raw, v.raw);
+}
+
+// ------------------------------ Non-temporal stores
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API void Stream(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT aligned) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  _mm512_stream_si512(reinterpret_cast<__m512i*>(aligned), BitCast(du, v).raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void Stream(VFromD<D> v, D /* tag */, float* HWY_RESTRICT aligned) {
+  _mm512_stream_ps(aligned, v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void Stream(VFromD<D> v, D /* tag */, double* HWY_RESTRICT aligned) {
+  _mm512_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ ScatterOffset
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */,
+                           TFromD<D>* HWY_RESTRICT base,
+                           VFromD<RebindToSigned<D>> offset) {
+  _mm512_i32scatter_epi32(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */,
+                           TFromD<D>* HWY_RESTRICT base,
+                           VFromD<RebindToSigned<D>> offset) {
+  _mm512_i64scatter_epi64(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */, float* HWY_RESTRICT base,
+                           Vec512<int32_t> offset) {
+  _mm512_i32scatter_ps(base, offset.raw, v.raw, 1);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void ScatterOffset(VFromD<D> v, D /* tag */, double* HWY_RESTRICT base,
+                           Vec512<int64_t> offset) {
+  _mm512_i64scatter_pd(base, offset.raw, v.raw, 1);
+}
+
+// ------------------------------ ScatterIndex
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm512_i32scatter_epi32(base, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */,
+                          TFromD<D>* HWY_RESTRICT base,
+                          VFromD<RebindToSigned<D>> index) {
+  _mm512_i64scatter_epi64(base, index.raw, v.raw, 8);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */, float* HWY_RESTRICT base,
+                          Vec512<int32_t> index) {
+  _mm512_i32scatter_ps(base, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void ScatterIndex(VFromD<D> v, D /* tag */, double* HWY_RESTRICT base,
+                          Vec512<int64_t> index) {
+  _mm512_i64scatter_pd(base, index.raw, v.raw, 8);
+}
+
+// ------------------------------ MaskedScatterIndex
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                TFromD<D>* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm512_mask_i32scatter_epi32(base, m.raw, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                TFromD<D>* HWY_RESTRICT base,
+                                VFromD<RebindToSigned<D>> index) {
+  _mm512_mask_i64scatter_epi64(base, m.raw, index.raw, v.raw, 8);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                float* HWY_RESTRICT base,
+                                Vec512<int32_t> index) {
+  _mm512_mask_i32scatter_ps(base, m.raw, index.raw, v.raw, 4);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API void MaskedScatterIndex(VFromD<D> v, MFromD<D> m, D /* tag */,
+                                double* HWY_RESTRICT base,
+                                Vec512<int64_t> index) {
+  _mm512_mask_i64scatter_pd(base, m.raw, index.raw, v.raw, 8);
+}
+
+// ------------------------------ Gather
+
+namespace detail {
+
+template <int kScale, typename T, HWY_IF_UI32(T)>
+HWY_INLINE Vec512<T> NativeGather(const T* HWY_RESTRICT base,
+                                  Vec512<int32_t> index) {
+  return Vec512<T>{_mm512_i32gather_epi32(index.raw, base, kScale)};
+}
+
+template <int kScale, typename T, HWY_IF_UI64(T)>
+HWY_INLINE Vec512<T> NativeGather(const T* HWY_RESTRICT base,
+                                  Vec512<int64_t> index) {
+  return Vec512<T>{_mm512_i64gather_epi64(index.raw, base, kScale)};
+}
+
+template <int kScale>
+HWY_INLINE Vec512<float> NativeGather(const float* HWY_RESTRICT base,
+                                      Vec512<int32_t> index) {
+  return Vec512<float>{_mm512_i32gather_ps(index.raw, base, kScale)};
+}
+
+template <int kScale>
+HWY_INLINE Vec512<double> NativeGather(const double* HWY_RESTRICT base,
+                                       Vec512<int64_t> index) {
+  return Vec512<double>{_mm512_i64gather_pd(index.raw, base, kScale)};
+}
+
+template <int kScale, typename T, HWY_IF_UI32(T)>
+HWY_INLINE Vec512<T> NativeMaskedGather(Mask512<T> m,
+                                        const T* HWY_RESTRICT base,
+                                        Vec512<int32_t> index) {
+  const Full512<T> d;
+  return Vec512<T>{
+      _mm512_mask_i32gather_epi32(Zero(d).raw, m.raw, index.raw, base, kScale)};
+}
+
+template <int kScale, typename T, HWY_IF_UI64(T)>
+HWY_INLINE Vec512<T> NativeMaskedGather(Mask512<T> m,
+                                        const T* HWY_RESTRICT base,
+                                        Vec512<int64_t> index) {
+  const Full512<T> d;
+  return Vec512<T>{
+      _mm512_mask_i64gather_epi64(Zero(d).raw, m.raw, index.raw, base, kScale)};
+}
+
+template <int kScale>
+HWY_INLINE Vec512<float> NativeMaskedGather(Mask512<float> m,
+                                            const float* HWY_RESTRICT base,
+                                            Vec512<int32_t> index) {
+  const Full512<float> d;
+  return Vec512<float>{
+      _mm512_mask_i32gather_ps(Zero(d).raw, m.raw, index.raw, base, kScale)};
+}
+
+template <int kScale>
+HWY_INLINE Vec512<double> NativeMaskedGather(Mask512<double> m,
+                                             const double* HWY_RESTRICT base,
+                                             Vec512<int64_t> index) {
+  const Full512<double> d;
+  return Vec512<double>{
+      _mm512_mask_i64gather_pd(Zero(d).raw, m.raw, index.raw, base, kScale)};
+}
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), typename TI>
+HWY_API VFromD<D> GatherOffset(D /* tag */, const TFromD<D>* HWY_RESTRICT base,
+                               Vec512<TI> offset) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Must match for portability");
+  return detail::NativeGather<1>(base, offset);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), typename TI>
+HWY_API VFromD<D> GatherIndex(D /* tag */, const TFromD<D>* HWY_RESTRICT base,
+                              Vec512<TI> index) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Must match for portability");
+  return detail::NativeGather<sizeof(TFromD<D>)>(base, index);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), typename TI>
+HWY_API VFromD<D> MaskedGatherIndex(MFromD<D> m, D /* tag */,
+                                    const TFromD<D>* HWY_RESTRICT base,
+                                    Vec512<TI> index) {
+  static_assert(sizeof(TFromD<D>) == sizeof(TI), "Must match for portability");
+  return detail::NativeMaskedGather<sizeof(TFromD<D>)>(m, base, index);
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE
+
+// ------------------------------ LowerHalf
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{_mm512_castsi512_si256(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, Vec512<bfloat16_t> v) {
+  return VFromD<D>{_mm512_castsi512_si256(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, Vec512<float16_t> v) {
+#if HWY_HAVE_FLOAT16
+  return VFromD<D>{_mm512_castph512_ph256(v.raw)};
+#else
+  return VFromD<D>{_mm512_castsi512_si256(v.raw)};
+#endif  // HWY_HAVE_FLOAT16
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, Vec512<float> v) {
+  return VFromD<D>{_mm512_castps512_ps256(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> LowerHalf(D /* tag */, Vec512<double> v) {
+  return VFromD<D>{_mm512_castpd512_pd256(v.raw)};
+}
+
+template <typename T>
+HWY_API Vec256<T> LowerHalf(Vec512<T> v) {
+  const Half<DFromV<decltype(v)>> dh;
+  return LowerHalf(dh, v);
+}
+
+// ------------------------------ UpperHalf
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> UpperHalf(D d, VFromD<Twice<D>> v) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  const Twice<decltype(d)> dut;
+  return BitCast(d, VFromD<decltype(du)>{
+                        _mm512_extracti32x8_epi32(BitCast(dut, v).raw, 1)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> UpperHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{_mm512_extractf32x8_ps(v.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> UpperHalf(D /* tag */, VFromD<Twice<D>> v) {
+  return VFromD<D>{_mm512_extractf64x4_pd(v.raw, 1)};
+}
+
+// ------------------------------ ExtractLane (Store)
+template <typename T>
+HWY_API T ExtractLane(const Vec512<T> v, size_t i) {
+  const DFromV<decltype(v)> d;
+  HWY_DASSERT(i < Lanes(d));
+
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+  if (__builtin_constant_p(i < kLanesPerBlock) && (i < kLanesPerBlock)) {
+    return ExtractLane(ResizeBitCast(Full128<T>(), v), i);
+  }
+#endif
+
+  alignas(64) T lanes[Lanes(d)];
+  Store(v, d, lanes);
+  return lanes[i];
+}
+
+// ------------------------------ ExtractBlock
+template <int kBlockIdx, class T, hwy::EnableIf<(kBlockIdx <= 1)>* = nullptr>
+HWY_API Vec128<T> ExtractBlock(Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  return ExtractBlock<kBlockIdx>(LowerHalf(dh, v));
+}
+
+template <int kBlockIdx, class T, hwy::EnableIf<(kBlockIdx > 1)>* = nullptr>
+HWY_API Vec128<T> ExtractBlock(Vec512<T> v) {
+  static_assert(kBlockIdx <= 3, "Invalid block index");
+  return Vec128<T>{_mm512_extracti32x4_epi32(v.raw, kBlockIdx)};
+}
+
+template <int kBlockIdx, hwy::EnableIf<(kBlockIdx > 1)>* = nullptr>
+HWY_API Vec128<float> ExtractBlock(Vec512<float> v) {
+  static_assert(kBlockIdx <= 3, "Invalid block index");
+  return Vec128<float>{_mm512_extractf32x4_ps(v.raw, kBlockIdx)};
+}
+
+template <int kBlockIdx, hwy::EnableIf<(kBlockIdx > 1)>* = nullptr>
+HWY_API Vec128<double> ExtractBlock(Vec512<double> v) {
+  static_assert(kBlockIdx <= 3, "Invalid block index");
+  return Vec128<double>{_mm512_extractf64x2_pd(v.raw, kBlockIdx)};
+}
+
+// ------------------------------ InsertLane (Store)
+template <typename T>
+HWY_API Vec512<T> InsertLane(const Vec512<T> v, size_t i, T t) {
+  return detail::InsertLaneUsingBroadcastAndBlend(v, i, t);
+}
+
+// ------------------------------ InsertBlock
+namespace detail {
+
+template <typename T>
+HWY_INLINE Vec512<T> InsertBlock(hwy::SizeTag<0> /* blk_idx_tag */, Vec512<T> v,
+                                 Vec128<T> blk_to_insert) {
+  const DFromV<decltype(v)> d;
+  const auto insert_mask = FirstN(d, 16 / sizeof(T));
+  return IfThenElse(insert_mask, ResizeBitCast(d, blk_to_insert), v);
+}
+
+template <size_t kBlockIdx, typename T>
+HWY_INLINE Vec512<T> InsertBlock(hwy::SizeTag<kBlockIdx> /* blk_idx_tag */,
+                                 Vec512<T> v, Vec128<T> blk_to_insert) {
+  return Vec512<T>{_mm512_inserti32x4(v.raw, blk_to_insert.raw,
+                                      static_cast<int>(kBlockIdx & 3))};
+}
+
+template <size_t kBlockIdx, hwy::EnableIf<kBlockIdx != 0>* = nullptr>
+HWY_INLINE Vec512<float> InsertBlock(hwy::SizeTag<kBlockIdx> /* blk_idx_tag */,
+                                     Vec512<float> v,
+                                     Vec128<float> blk_to_insert) {
+  return Vec512<float>{_mm512_insertf32x4(v.raw, blk_to_insert.raw,
+                                          static_cast<int>(kBlockIdx & 3))};
+}
+
+template <size_t kBlockIdx, hwy::EnableIf<kBlockIdx != 0>* = nullptr>
+HWY_INLINE Vec512<double> InsertBlock(hwy::SizeTag<kBlockIdx> /* blk_idx_tag */,
+                                      Vec512<double> v,
+                                      Vec128<double> blk_to_insert) {
+  return Vec512<double>{_mm512_insertf64x2(v.raw, blk_to_insert.raw,
+                                           static_cast<int>(kBlockIdx & 3))};
+}
+
+}  // namespace detail
+
+template <int kBlockIdx, class T>
+HWY_API Vec512<T> InsertBlock(Vec512<T> v, Vec128<T> blk_to_insert) {
+  static_assert(0 <= kBlockIdx && kBlockIdx <= 3, "Invalid block index");
+  return detail::InsertBlock(hwy::SizeTag<static_cast<size_t>(kBlockIdx)>(), v,
+                             blk_to_insert);
+}
+
+// ------------------------------ GetLane (LowerHalf)
+template <typename T>
+HWY_API T GetLane(const Vec512<T> v) {
+  return GetLane(LowerHalf(v));
+}
+
+// ------------------------------ ZeroExtendVector
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_D(D)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+#if HWY_HAVE_ZEXT  // See definition/comment in x86_256-inl.h.
+  (void)d;
+  return VFromD<D>{_mm512_zextsi256_si512(lo.raw)};
+#else
+  return VFromD<D>{_mm512_inserti32x8(Zero(d).raw, lo.raw, 0)};
+#endif
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+#if HWY_HAVE_ZEXT
+  (void)d;
+  return VFromD<D>{_mm512_zextph256_ph512(lo.raw)};
+#else
+  const RebindToUnsigned<D> du;
+  return BitCast(d, ZeroExtendVector(du, BitCast(du, lo)));
+#endif
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+#if HWY_HAVE_ZEXT
+  (void)d;
+  return VFromD<D>{_mm512_zextps256_ps512(lo.raw)};
+#else
+  return VFromD<D>{_mm512_insertf32x8(Zero(d).raw, lo.raw, 0)};
+#endif
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ZeroExtendVector(D d, VFromD<Half<D>> lo) {
+#if HWY_HAVE_ZEXT
+  (void)d;
+  return VFromD<D>{_mm512_zextpd256_pd512(lo.raw)};
+#else
+  return VFromD<D>{_mm512_insertf64x4(Zero(d).raw, lo.raw, 0)};
+#endif
+}
+
+// ------------------------------ ZeroExtendResizeBitCast
+
+namespace detail {
+
+template <class DTo, class DFrom, HWY_IF_NOT_FLOAT3264_D(DTo)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<16> /* from_size_tag */, hwy::SizeTag<64> /* to_size_tag */,
+    DTo d_to, DFrom d_from, VFromD<DFrom> v) {
+  const Repartition<uint8_t, decltype(d_from)> du8_from;
+  const auto vu8 = BitCast(du8_from, v);
+#if HWY_HAVE_ZEXT
+  (void)d_to;
+  return VFromD<DTo>{_mm512_zextsi128_si512(vu8.raw)};
+#else
+  return VFromD<DTo>{_mm512_inserti32x4(Zero(d_to).raw, vu8.raw, 0)};
+#endif
+}
+
+template <class DTo, class DFrom, HWY_IF_F32_D(DTo)>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<16> /* from_size_tag */, hwy::SizeTag<64> /* to_size_tag */,
+    DTo d_to, DFrom d_from, VFromD<DFrom> v) {
+  const Repartition<float, decltype(d_from)> df32_from;
+  const auto vf32 = BitCast(df32_from, v);
+#if HWY_HAVE_ZEXT
+  (void)d_to;
+  return Vec512<float>{_mm512_zextps128_ps512(vf32.raw)};
+#else
+  return Vec512<float>{_mm512_insertf32x4(Zero(d_to).raw, vf32.raw, 0)};
+#endif
+}
+
+template <class DTo, class DFrom, HWY_IF_F64_D(DTo)>
+HWY_INLINE Vec512<double> ZeroExtendResizeBitCast(
+    hwy::SizeTag<16> /* from_size_tag */, hwy::SizeTag<64> /* to_size_tag */,
+    DTo d_to, DFrom d_from, VFromD<DFrom> v) {
+  const Repartition<double, decltype(d_from)> df64_from;
+  const auto vf64 = BitCast(df64_from, v);
+#if HWY_HAVE_ZEXT
+  (void)d_to;
+  return Vec512<double>{_mm512_zextpd128_pd512(vf64.raw)};
+#else
+  return Vec512<double>{_mm512_insertf64x2(Zero(d_to).raw, vf64.raw, 0)};
+#endif
+}
+
+template <class DTo, class DFrom>
+HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(
+    hwy::SizeTag<8> /* from_size_tag */, hwy::SizeTag<64> /* to_size_tag */,
+    DTo d_to, DFrom d_from, VFromD<DFrom> v) {
+  const Twice<decltype(d_from)> dt_from;
+  return ZeroExtendResizeBitCast(hwy::SizeTag<16>(), hwy::SizeTag<64>(), d_to,
+                                 dt_from, ZeroExtendVector(dt_from, v));
+}
+
+}  // namespace detail
+
+// ------------------------------ Combine
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> Combine(D d, VFromD<Half<D>> hi, VFromD<Half<D>> lo) {
+  const RebindToUnsigned<decltype(d)> du;  // for float16_t
+  const Half<decltype(du)> duh;
+  const __m512i lo512 = ZeroExtendVector(du, BitCast(duh, lo)).raw;
+  return VFromD<D>{_mm512_inserti32x8(lo512, BitCast(duh, hi).raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> Combine(D d, VFromD<Half<D>> hi, VFromD<Half<D>> lo) {
+  return VFromD<D>{_mm512_insertf32x8(ZeroExtendVector(d, lo).raw, hi.raw, 1)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Combine(D d, VFromD<Half<D>> hi, VFromD<Half<D>> lo) {
+  return VFromD<D>{_mm512_insertf64x4(ZeroExtendVector(d, lo).raw, hi.raw, 1)};
+}
+
+// ------------------------------ ShiftLeftBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> ShiftLeftBytes(D /* tag */, const VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return VFromD<D>{_mm512_bslli_epi128(v.raw, kBytes)};
+}
+
+// ------------------------------ ShiftRightBytes
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> ShiftRightBytes(D /* tag */, const VFromD<D> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return VFromD<D>{_mm512_bsrli_epi128(v.raw, kBytes)};
+}
+
+// ------------------------------ CombineShiftRightBytes
+
+template <int kBytes, class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> CombineShiftRightBytes(D d, VFromD<D> hi, VFromD<D> lo) {
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Vec512<uint8_t>{_mm512_alignr_epi8(
+                        BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)});
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec512<T> Broadcast(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;
+  const VU vu = BitCast(du, v);  // for float16_t
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m512i lo = _mm512_shufflelo_epi16(vu.raw, (0x55 * kLane) & 0xFF);
+    return BitCast(d, VU{_mm512_unpacklo_epi64(lo, lo)});
+  } else {
+    const __m512i hi =
+        _mm512_shufflehi_epi16(vu.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return BitCast(d, VU{_mm512_unpackhi_epi64(hi, hi)});
+  }
+}
+
+template <int kLane, typename T, HWY_IF_UI32(T)>
+HWY_API Vec512<T> Broadcast(const Vec512<T> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0x55 * kLane);
+  return Vec512<T>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+
+template <int kLane, typename T, HWY_IF_UI64(T)>
+HWY_API Vec512<T> Broadcast(const Vec512<T> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = kLane ? _MM_PERM_DCDC : _MM_PERM_BABA;
+  return Vec512<T>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+
+template <int kLane>
+HWY_API Vec512<float> Broadcast(const Vec512<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0x55 * kLane);
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, perm)};
+}
+
+template <int kLane>
+HWY_API Vec512<double> Broadcast(const Vec512<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0xFF * kLane);
+  return Vec512<double>{_mm512_shuffle_pd(v.raw, v.raw, perm)};
+}
+
+// ------------------------------ BroadcastBlock
+template <int kBlockIdx, class T>
+HWY_API Vec512<T> BroadcastBlock(Vec512<T> v) {
+  static_assert(0 <= kBlockIdx && kBlockIdx <= 3, "Invalid block index");
+  return Vec512<T>{_mm512_shuffle_i32x4(v.raw, v.raw, 0x55 * kBlockIdx)};
+}
+
+template <int kBlockIdx>
+HWY_API Vec512<float> BroadcastBlock(Vec512<float> v) {
+  static_assert(0 <= kBlockIdx && kBlockIdx <= 3, "Invalid block index");
+  return Vec512<float>{_mm512_shuffle_f32x4(v.raw, v.raw, 0x55 * kBlockIdx)};
+}
+
+template <int kBlockIdx>
+HWY_API Vec512<double> BroadcastBlock(Vec512<double> v) {
+  static_assert(0 <= kBlockIdx && kBlockIdx <= 3, "Invalid block index");
+  return Vec512<double>{_mm512_shuffle_f64x2(v.raw, v.raw, 0x55 * kBlockIdx)};
+}
+
+// ------------------------------ BroadcastLane
+
+namespace detail {
+
+template <class T, HWY_IF_T_SIZE(T, 1)>
+HWY_INLINE Vec512<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec512<T> v) {
+  return Vec512<T>{_mm512_broadcastb_epi8(ResizeBitCast(Full128<T>(), v).raw)};
+}
+
+template <class T, HWY_IF_T_SIZE(T, 2)>
+HWY_INLINE Vec512<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec512<T> v) {
+  return Vec512<T>{_mm512_broadcastw_epi16(ResizeBitCast(Full128<T>(), v).raw)};
+}
+
+template <class T, HWY_IF_UI32(T)>
+HWY_INLINE Vec512<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec512<T> v) {
+  return Vec512<T>{_mm512_broadcastd_epi32(ResizeBitCast(Full128<T>(), v).raw)};
+}
+
+template <class T, HWY_IF_UI64(T)>
+HWY_INLINE Vec512<T> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                   Vec512<T> v) {
+  return Vec512<T>{_mm512_broadcastq_epi64(ResizeBitCast(Full128<T>(), v).raw)};
+}
+
+HWY_INLINE Vec512<float> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                       Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_broadcastss_ps(ResizeBitCast(Full128<float>(), v).raw)};
+}
+
+HWY_INLINE Vec512<double> BroadcastLane(hwy::SizeTag<0> /* lane_idx_tag */,
+                                        Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_broadcastsd_pd(ResizeBitCast(Full128<double>(), v).raw)};
+}
+
+template <size_t kLaneIdx, class T, hwy::EnableIf<kLaneIdx != 0>* = nullptr>
+HWY_INLINE Vec512<T> BroadcastLane(hwy::SizeTag<kLaneIdx> /* lane_idx_tag */,
+                                   Vec512<T> v) {
+  constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+  constexpr int kBlockIdx = static_cast<int>(kLaneIdx / kLanesPerBlock);
+  constexpr int kLaneInBlkIdx =
+      static_cast<int>(kLaneIdx) & (kLanesPerBlock - 1);
+  return Broadcast<kLaneInBlkIdx>(BroadcastBlock<kBlockIdx>(v));
+}
+
+}  // namespace detail
+
+template <int kLaneIdx, class T>
+HWY_API Vec512<T> BroadcastLane(Vec512<T> v) {
+  static_assert(0 <= kLaneIdx, "Invalid lane");
+  return detail::BroadcastLane(hwy::SizeTag<static_cast<size_t>(kLaneIdx)>(),
+                               v);
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec512<int32_t> have lanes 7,6,5,4,3,2,1,0 (0 is
+// least-significant). Shuffle0321 rotates four-lane blocks one lane to the
+// right (the previous least-significant lane is now most-significant =>
+// 47650321). These could also be implemented via CombineShiftRightBytes but
+// the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec512<T> Shuffle2301(const Vec512<T> v) {
+  return Vec512<T>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CDAB)};
+}
+HWY_API Vec512<float> Shuffle2301(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_CDAB)};
+}
+
+namespace detail {
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> ShuffleTwo2301(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec512<float>{_mm512_shuffle_ps(BitCast(df, a).raw, BitCast(df, b).raw,
+                                         _MM_PERM_CDAB)});
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> ShuffleTwo1230(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec512<float>{_mm512_shuffle_ps(BitCast(df, a).raw, BitCast(df, b).raw,
+                                         _MM_PERM_BCDA)});
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> ShuffleTwo3012(const Vec512<T> a, const Vec512<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToFloat<decltype(d)> df;
+  return BitCast(
+      d, Vec512<float>{_mm512_shuffle_ps(BitCast(df, a).raw, BitCast(df, b).raw,
+                                         _MM_PERM_DABC)});
+}
+
+}  // namespace detail
+
+// Swap 64-bit halves
+HWY_API Vec512<uint32_t> Shuffle1032(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<int32_t> Shuffle1032(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<float> Shuffle1032(const Vec512<float> v) {
+  // Shorter encoding than _mm512_permute_ps.
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<uint64_t> Shuffle01(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<int64_t> Shuffle01(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<double> Shuffle01(const Vec512<double> v) {
+  // Shorter encoding than _mm512_permute_pd.
+  return Vec512<double>{_mm512_shuffle_pd(v.raw, v.raw, _MM_PERM_BBBB)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec512<uint32_t> Shuffle0321(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ADCB)};
+}
+HWY_API Vec512<int32_t> Shuffle0321(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ADCB)};
+}
+HWY_API Vec512<float> Shuffle0321(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_ADCB)};
+}
+// Rotate left 32 bits
+HWY_API Vec512<uint32_t> Shuffle2103(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CBAD)};
+}
+HWY_API Vec512<int32_t> Shuffle2103(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CBAD)};
+}
+HWY_API Vec512<float> Shuffle2103(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_CBAD)};
+}
+
+// Reverse
+HWY_API Vec512<uint32_t> Shuffle0123(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ABCD)};
+}
+HWY_API Vec512<int32_t> Shuffle0123(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ABCD)};
+}
+HWY_API Vec512<float> Shuffle0123(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_ABCD)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices/IndicesFromVec for use by TableLookupLanes.
+template <typename T>
+struct Indices512 {
+  __m512i raw;
+};
+
+template <class D, typename T = TFromD<D>, typename TI>
+HWY_API Indices512<T> IndicesFromVec(D /* tag */, Vec512<TI> vec) {
+  static_assert(sizeof(T) == sizeof(TI), "Index size must match lane");
+#if HWY_IS_DEBUG_BUILD
+  const DFromV<decltype(vec)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  using TU = MakeUnsigned<T>;
+  const auto vec_u = BitCast(du, vec);
+  HWY_DASSERT(
+      AllTrue(du, Lt(vec_u, Set(du, static_cast<TU>(128 / sizeof(T))))));
+#endif
+  return Indices512<T>{vec.raw};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), typename TI>
+HWY_API Indices512<TFromD<D>> SetTableIndices(D d, const TI* idx) {
+  const Rebind<TI, decltype(d)> di;
+  return IndicesFromVec(d, LoadU(di, idx));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> TableLookupLanes(Vec512<T> v, Indices512<T> idx) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<T>{_mm512_permutexvar_epi8(idx.raw, v.raw)};
+#else
+  const DFromV<decltype(v)> d;
+  const Repartition<uint16_t, decltype(d)> du16;
+  const Vec512<T> idx_vec{idx.raw};
+
+  const auto bd_sel_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<3>(BitCast(du16, idx_vec))));
+  const auto cd_sel_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<2>(BitCast(du16, idx_vec))));
+
+  const Vec512<T> v_a{_mm512_shuffle_i32x4(v.raw, v.raw, 0x00)};
+  const Vec512<T> v_b{_mm512_shuffle_i32x4(v.raw, v.raw, 0x55)};
+  const Vec512<T> v_c{_mm512_shuffle_i32x4(v.raw, v.raw, 0xAA)};
+  const Vec512<T> v_d{_mm512_shuffle_i32x4(v.raw, v.raw, 0xFF)};
+
+  const auto shuf_a = TableLookupBytes(v_a, idx_vec);
+  const auto shuf_c = TableLookupBytes(v_c, idx_vec);
+  const Vec512<T> shuf_ab{_mm512_mask_shuffle_epi8(shuf_a.raw, bd_sel_mask.raw,
+                                                   v_b.raw, idx_vec.raw)};
+  const Vec512<T> shuf_cd{_mm512_mask_shuffle_epi8(shuf_c.raw, bd_sel_mask.raw,
+                                                   v_d.raw, idx_vec.raw)};
+  return IfThenElse(cd_sel_mask, shuf_cd, shuf_ab);
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2), HWY_IF_NOT_SPECIAL_FLOAT(T)>
+HWY_API Vec512<T> TableLookupLanes(Vec512<T> v, Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutexvar_epi16(idx.raw, v.raw)};
+}
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> TableLookupLanes(Vec512<float16_t> v,
+                                           Indices512<float16_t> idx) {
+  return Vec512<float16_t>{_mm512_permutexvar_ph(idx.raw, v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> TableLookupLanes(Vec512<T> v, Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutexvar_epi32(idx.raw, v.raw)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> TableLookupLanes(Vec512<T> v, Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutexvar_epi64(idx.raw, v.raw)};
+}
+
+HWY_API Vec512<float> TableLookupLanes(Vec512<float> v, Indices512<float> idx) {
+  return Vec512<float>{_mm512_permutexvar_ps(idx.raw, v.raw)};
+}
+
+HWY_API Vec512<double> TableLookupLanes(Vec512<double> v,
+                                        Indices512<double> idx) {
+  return Vec512<double>{_mm512_permutexvar_pd(idx.raw, v.raw)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> TwoTablesLookupLanes(Vec512<T> a, Vec512<T> b,
+                                       Indices512<T> idx) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<T>{_mm512_permutex2var_epi8(a.raw, idx.raw, b.raw)};
+#else
+  const DFromV<decltype(a)> d;
+  const auto b_sel_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<1>(Vec512<uint16_t>{idx.raw})));
+  return IfThenElse(b_sel_mask, TableLookupLanes(b, idx),
+                    TableLookupLanes(a, idx));
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec512<T> TwoTablesLookupLanes(Vec512<T> a, Vec512<T> b,
+                                       Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutex2var_epi16(a.raw, idx.raw, b.raw)};
+}
+
+template <typename T, HWY_IF_UI32(T)>
+HWY_API Vec512<T> TwoTablesLookupLanes(Vec512<T> a, Vec512<T> b,
+                                       Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutex2var_epi32(a.raw, idx.raw, b.raw)};
+}
+
+#if HWY_HAVE_FLOAT16
+HWY_API Vec512<float16_t> TwoTablesLookupLanes(Vec512<float16_t> a,
+                                               Vec512<float16_t> b,
+                                               Indices512<float16_t> idx) {
+  return Vec512<float16_t>{_mm512_permutex2var_ph(a.raw, idx.raw, b.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+HWY_API Vec512<float> TwoTablesLookupLanes(Vec512<float> a, Vec512<float> b,
+                                           Indices512<float> idx) {
+  return Vec512<float>{_mm512_permutex2var_ps(a.raw, idx.raw, b.raw)};
+}
+
+template <typename T, HWY_IF_UI64(T)>
+HWY_API Vec512<T> TwoTablesLookupLanes(Vec512<T> a, Vec512<T> b,
+                                       Indices512<T> idx) {
+  return Vec512<T>{_mm512_permutex2var_epi64(a.raw, idx.raw, b.raw)};
+}
+
+HWY_API Vec512<double> TwoTablesLookupLanes(Vec512<double> a, Vec512<double> b,
+                                            Indices512<double> idx) {
+  return Vec512<double>{_mm512_permutex2var_pd(a.raw, idx.raw, b.raw)};
+}
+
+// ------------------------------ Reverse
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  const RebindToSigned<decltype(d)> di;
+  alignas(64) static constexpr int8_t kReverse[64] = {
+      63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
+      47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
+      31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
+      15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0};
+  const Vec512<int8_t> idx = Load(di, kReverse);
+  return BitCast(
+      d, Vec512<int8_t>{_mm512_permutexvar_epi8(idx.raw, BitCast(di, v).raw)});
+#else
+  const RepartitionToWide<decltype(d)> d16;
+  return BitCast(d, Reverse(d16, RotateRight<8>(BitCast(d16, v))));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(64) static constexpr int16_t kReverse[32] = {
+      31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
+      15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0};
+  const Vec512<int16_t> idx = Load(di, kReverse);
+  return BitCast(d, Vec512<int16_t>{
+                        _mm512_permutexvar_epi16(idx.raw, BitCast(di, v).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  alignas(64) static constexpr int32_t kReverse[16] = {
+      15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+  return TableLookupLanes(v, SetTableIndices(d, kReverse));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse(D d, const VFromD<D> v) {
+  alignas(64) static constexpr int64_t kReverse[8] = {7, 6, 5, 4, 3, 2, 1, 0};
+  return TableLookupLanes(v, SetTableIndices(d, kReverse));
+}
+
+// ------------------------------ Reverse2 (in x86_128)
+
+// ------------------------------ Reverse4
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse4(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(64) static constexpr int16_t kReverse4[32] = {
+      3,  2,  1,  0,  7,  6,  5,  4,  11, 10, 9,  8,  15, 14, 13, 12,
+      19, 18, 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28};
+  const Vec512<int16_t> idx = Load(di, kReverse4);
+  return BitCast(d, Vec512<int16_t>{
+                        _mm512_permutexvar_epi16(idx.raw, BitCast(di, v).raw)});
+}
+
+// 32 bit Reverse4 defined in x86_128.
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> Reverse4(D /* tag */, const VFromD<D> v) {
+  return VFromD<D>{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(0, 1, 2, 3))};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> Reverse4(D /* tag */, VFromD<D> v) {
+  return VFromD<D>{_mm512_permutex_pd(v.raw, _MM_SHUFFLE(0, 1, 2, 3))};
+}
+
+// ------------------------------ Reverse8
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(64) static constexpr int16_t kReverse8[32] = {
+      7,  6,  5,  4,  3,  2,  1,  0,  15, 14, 13, 12, 11, 10, 9,  8,
+      23, 22, 21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24};
+  const Vec512<int16_t> idx = Load(di, kReverse8);
+  return BitCast(d, Vec512<int16_t>{
+                        _mm512_permutexvar_epi16(idx.raw, BitCast(di, v).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  const RebindToSigned<decltype(d)> di;
+  alignas(64) static constexpr int32_t kReverse8[16] = {
+      7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
+  const Vec512<int32_t> idx = Load(di, kReverse8);
+  return BitCast(d, Vec512<int32_t>{
+                        _mm512_permutexvar_epi32(idx.raw, BitCast(di, v).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Reverse8(D d, const VFromD<D> v) {
+  return Reverse(d, v);
+}
+
+// ------------------------------ ReverseBits
+
+#if HWY_TARGET <= HWY_AVX3_DL
+template <class V, HWY_IF_T_SIZE_V(V, 1), HWY_IF_V_SIZE_D(DFromV<V>, 64)>
+HWY_API V ReverseBits(V v) {
+  const Full512<uint64_t> du64;
+  const auto affine_matrix = Set(du64, 0x8040201008040201u);
+  return V{_mm512_gf2p8affine_epi64_epi8(v.raw, affine_matrix.raw, 0)};
+}
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+// ------------------------------ InterleaveLower
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> InterleaveLower(Vec512<T> a, Vec512<T> b) {
+  return Vec512<T>{_mm512_unpacklo_epi8(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec512<T> InterleaveLower(Vec512<T> a, Vec512<T> b) {
+  const DFromV<decltype(a)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm512_unpacklo_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> InterleaveLower(Vec512<T> a, Vec512<T> b) {
+  return Vec512<T>{_mm512_unpacklo_epi32(a.raw, b.raw)};
+}
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> InterleaveLower(Vec512<T> a, Vec512<T> b) {
+  return Vec512<T>{_mm512_unpacklo_epi64(a.raw, b.raw)};
+}
+HWY_API Vec512<float> InterleaveLower(Vec512<float> a, Vec512<float> b) {
+  return Vec512<float>{_mm512_unpacklo_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> InterleaveLower(Vec512<double> a, Vec512<double> b) {
+  return Vec512<double>{_mm512_unpacklo_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ InterleaveUpper
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm512_unpackhi_epi8(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> InterleaveUpper(D d, VFromD<D> a, VFromD<D> b) {
+  const RebindToUnsigned<decltype(d)> du;
+  using VU = VFromD<decltype(du)>;  // for float16_t
+  return BitCast(
+      d, VU{_mm512_unpackhi_epi16(BitCast(du, a).raw, BitCast(du, b).raw)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm512_unpackhi_epi32(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm512_unpackhi_epi64(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm512_unpackhi_ps(a.raw, b.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> InterleaveUpper(D /* tag */, VFromD<D> a, VFromD<D> b) {
+  return VFromD<D>{_mm512_unpackhi_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Concat* halves
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatLowerLower(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_i32x4(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatLowerLower(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_f32x4(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> ConcatLowerLower(D /* tag */, Vec512<double> hi,
+                                        Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatUpperUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_i32x4(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatUpperUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_f32x4(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> ConcatUpperUpper(D /* tag */, Vec512<double> hi,
+                                        Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves / swap blocks)
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatLowerUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_i32x4(lo.raw, hi.raw, _MM_PERM_BADC)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatLowerUpper(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  return VFromD<D>{_mm512_shuffle_f32x4(lo.raw, hi.raw, _MM_PERM_BADC)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> ConcatLowerUpper(D /* tag */, Vec512<double> hi,
+                                        Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, _MM_PERM_BADC)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ConcatUpperLower(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  // There are no imm8 blend in AVX512. Use blend16 because 32-bit masks
+  // are efficiently loaded from 32-bit regs.
+  const __mmask32 mask = /*_cvtu32_mask32 */ (0x0000FFFF);
+  return VFromD<D>{_mm512_mask_blend_epi16(mask, hi.raw, lo.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatUpperLower(D /* tag */, VFromD<D> hi, VFromD<D> lo) {
+  const __mmask16 mask = /*_cvtu32_mask16 */ (0x00FF);
+  return VFromD<D>{_mm512_mask_blend_ps(mask, hi.raw, lo.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API Vec512<double> ConcatUpperLower(D /* tag */, Vec512<double> hi,
+                                        Vec512<double> lo) {
+  const __mmask8 mask = /*_cvtu32_mask8 */ (0x0F);
+  return Vec512<double>{_mm512_mask_blend_pd(mask, hi.raw, lo.raw)};
+}
+
+// ------------------------------ ConcatOdd
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(64) static constexpr uint8_t kIdx[64] = {
+      1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,
+      27,  29,  31,  33,  35,  37,  39,  41,  43,  45,  47,  49,  51,
+      53,  55,  57,  59,  61,  63,  65,  67,  69,  71,  73,  75,  77,
+      79,  81,  83,  85,  87,  89,  91,  93,  95,  97,  99,  101, 103,
+      105, 107, 109, 111, 113, 115, 117, 119, 121, 123, 125, 127};
+  return BitCast(
+      d, Vec512<uint8_t>{_mm512_permutex2var_epi8(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Right-shift 8 bits per u16 so we can pack.
+  const Vec512<uint16_t> uH = ShiftRight<8>(BitCast(dw, hi));
+  const Vec512<uint16_t> uL = ShiftRight<8>(BitCast(dw, lo));
+  const Vec512<uint64_t> u8{_mm512_packus_epi16(uL.raw, uH.raw)};
+  // Undo block interleave: lower half = even u64 lanes, upper = odd u64 lanes.
+  const Full512<uint64_t> du64;
+  alignas(64) static constexpr uint64_t kIdx[8] = {0, 2, 4, 6, 1, 3, 5, 7};
+  return BitCast(d, TableLookupLanes(u8, SetTableIndices(du64, kIdx)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint16_t kIdx[32] = {
+      1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+      33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63};
+  return BitCast(
+      d, Vec512<uint16_t>{_mm512_permutex2var_epi16(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint32_t kIdx[16] = {
+      1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  return BitCast(
+      d, Vec512<uint32_t>{_mm512_permutex2var_epi32(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint32_t kIdx[16] = {
+      1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  return VFromD<D>{_mm512_permutex2var_ps(lo.raw, Load(du, kIdx).raw, hi.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[8] = {1, 3, 5, 7, 9, 11, 13, 15};
+  return BitCast(
+      d, Vec512<uint64_t>{_mm512_permutex2var_epi64(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConcatOdd(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[8] = {1, 3, 5, 7, 9, 11, 13, 15};
+  return VFromD<D>{_mm512_permutex2var_pd(lo.raw, Load(du, kIdx).raw, hi.raw)};
+}
+
+// ------------------------------ ConcatEven
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(64) static constexpr uint8_t kIdx[64] = {
+      0,   2,   4,   6,   8,   10,  12,  14,  16,  18,  20,  22,  24,
+      26,  28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,
+      52,  54,  56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
+      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98,  100, 102,
+      104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126};
+  return BitCast(
+      d, Vec512<uint32_t>{_mm512_permutex2var_epi8(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+#else
+  const RepartitionToWide<decltype(du)> dw;
+  // Isolate lower 8 bits per u16 so we can pack.
+  const Vec512<uint16_t> mask = Set(dw, 0x00FF);
+  const Vec512<uint16_t> uH = And(BitCast(dw, hi), mask);
+  const Vec512<uint16_t> uL = And(BitCast(dw, lo), mask);
+  const Vec512<uint64_t> u8{_mm512_packus_epi16(uL.raw, uH.raw)};
+  // Undo block interleave: lower half = even u64 lanes, upper = odd u64 lanes.
+  const Full512<uint64_t> du64;
+  alignas(64) static constexpr uint64_t kIdx[8] = {0, 2, 4, 6, 1, 3, 5, 7};
+  return BitCast(d, TableLookupLanes(u8, SetTableIndices(du64, kIdx)));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint16_t kIdx[32] = {
+      0,  2,  4,  6,  8,  10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+      32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62};
+  return BitCast(
+      d, Vec512<uint32_t>{_mm512_permutex2var_epi16(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint32_t kIdx[16] = {
+      0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30};
+  return BitCast(
+      d, Vec512<uint32_t>{_mm512_permutex2var_epi32(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint32_t kIdx[16] = {
+      0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30};
+  return VFromD<D>{_mm512_permutex2var_ps(lo.raw, Load(du, kIdx).raw, hi.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI64_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+  return BitCast(
+      d, Vec512<uint64_t>{_mm512_permutex2var_epi64(
+             BitCast(du, lo).raw, Load(du, kIdx).raw, BitCast(du, hi).raw)});
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {
+  const RebindToUnsigned<decltype(d)> du;
+  alignas(64) static constexpr uint64_t kIdx[8] = {0, 2, 4, 6, 8, 10, 12, 14};
+  return VFromD<D>{_mm512_permutex2var_pd(lo.raw, Load(du, kIdx).raw, hi.raw)};
+}
+
+// ------------------------------ DupEven (InterleaveLower)
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> DupEven(Vec512<T> v) {
+  return Vec512<T>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CCAA)};
+}
+HWY_API Vec512<float> DupEven(Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_CCAA)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> DupEven(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveLower(d, v, v);
+}
+
+// ------------------------------ DupOdd (InterleaveUpper)
+
+template <typename T, HWY_IF_T_SIZE(T, 4)>
+HWY_API Vec512<T> DupOdd(Vec512<T> v) {
+  return Vec512<T>{_mm512_shuffle_epi32(v.raw, _MM_PERM_DDBB)};
+}
+HWY_API Vec512<float> DupOdd(Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_DDBB)};
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> DupOdd(const Vec512<T> v) {
+  const DFromV<decltype(v)> d;
+  return InterleaveUpper(d, v, v);
+}
+
+// ------------------------------ OddEven (IfThenElse)
+
+template <typename T>
+HWY_API Vec512<T> OddEven(const Vec512<T> a, const Vec512<T> b) {
+  constexpr size_t s = sizeof(T);
+  constexpr int shift = s == 1 ? 0 : s == 2 ? 32 : s == 4 ? 48 : 56;
+  return IfThenElse(Mask512<T>{0x5555555555555555ull >> shift}, b, a);
+}
+
+// ------------------------------ OddEvenBlocks
+
+template <typename T>
+HWY_API Vec512<T> OddEvenBlocks(Vec512<T> odd, Vec512<T> even) {
+  return Vec512<T>{_mm512_mask_blend_epi64(__mmask8{0x33u}, odd.raw, even.raw)};
+}
+
+HWY_API Vec512<float> OddEvenBlocks(Vec512<float> odd, Vec512<float> even) {
+  return Vec512<float>{
+      _mm512_mask_blend_ps(__mmask16{0x0F0Fu}, odd.raw, even.raw)};
+}
+
+HWY_API Vec512<double> OddEvenBlocks(Vec512<double> odd, Vec512<double> even) {
+  return Vec512<double>{
+      _mm512_mask_blend_pd(__mmask8{0x33u}, odd.raw, even.raw)};
+}
+
+// ------------------------------ SwapAdjacentBlocks
+
+template <typename T>
+HWY_API Vec512<T> SwapAdjacentBlocks(Vec512<T> v) {
+  return Vec512<T>{_mm512_shuffle_i32x4(v.raw, v.raw, _MM_PERM_CDAB)};
+}
+
+HWY_API Vec512<float> SwapAdjacentBlocks(Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_f32x4(v.raw, v.raw, _MM_PERM_CDAB)};
+}
+
+HWY_API Vec512<double> SwapAdjacentBlocks(Vec512<double> v) {
+  return Vec512<double>{_mm512_shuffle_f64x2(v.raw, v.raw, _MM_PERM_CDAB)};
+}
+
+// ------------------------------ ReverseBlocks
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT3264_D(D)>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return VFromD<D>{_mm512_shuffle_i32x4(v.raw, v.raw, _MM_PERM_ABCD)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return VFromD<D>{_mm512_shuffle_f32x4(v.raw, v.raw, _MM_PERM_ABCD)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ReverseBlocks(D /* tag */, VFromD<D> v) {
+  return VFromD<D>{_mm512_shuffle_f64x2(v.raw, v.raw, _MM_PERM_ABCD)};
+}
+
+// ------------------------------ TableLookupBytes (ZeroExtendVector)
+
+// Both full
+template <typename T, typename TI>
+HWY_API Vec512<TI> TableLookupBytes(Vec512<T> bytes, Vec512<TI> indices) {
+  return Vec512<TI>{_mm512_shuffle_epi8(bytes.raw, indices.raw)};
+}
+
+// Partial index vector
+template <typename T, typename TI, size_t NI>
+HWY_API Vec128<TI, NI> TableLookupBytes(Vec512<T> bytes, Vec128<TI, NI> from) {
+  const Full512<TI> d512;
+  const Half<decltype(d512)> d256;
+  const Half<decltype(d256)> d128;
+  // First expand to full 128, then 256, then 512.
+  const Vec128<TI> from_full{from.raw};
+  const auto from_512 =
+      ZeroExtendVector(d512, ZeroExtendVector(d256, from_full));
+  const auto tbl_full = TableLookupBytes(bytes, from_512);
+  // Shrink to 256, then 128, then partial.
+  return Vec128<TI, NI>{LowerHalf(d128, LowerHalf(d256, tbl_full)).raw};
+}
+template <typename T, typename TI>
+HWY_API Vec256<TI> TableLookupBytes(Vec512<T> bytes, Vec256<TI> from) {
+  const DFromV<decltype(from)> dih;
+  const Twice<decltype(dih)> di;
+  const auto from_512 = ZeroExtendVector(di, from);
+  return LowerHalf(dih, TableLookupBytes(bytes, from_512));
+}
+
+// Partial table vector
+template <typename T, size_t N, typename TI>
+HWY_API Vec512<TI> TableLookupBytes(Vec128<T, N> bytes, Vec512<TI> from) {
+  const DFromV<decltype(from)> d512;
+  const Half<decltype(d512)> d256;
+  const Half<decltype(d256)> d128;
+  // First expand to full 128, then 256, then 512.
+  const Vec128<T> bytes_full{bytes.raw};
+  const auto bytes_512 =
+      ZeroExtendVector(d512, ZeroExtendVector(d256, bytes_full));
+  return TableLookupBytes(bytes_512, from);
+}
+template <typename T, typename TI>
+HWY_API Vec512<TI> TableLookupBytes(Vec256<T> bytes, Vec512<TI> from) {
+  const Full512<T> d;
+  return TableLookupBytes(ZeroExtendVector(d, bytes), from);
+}
+
+// Partial both are handled by x86_128/256.
+
+// ------------------------------ I8/U8 Broadcast (TableLookupBytes)
+
+template <int kLane, class T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> Broadcast(const Vec512<T> v) {
+  static_assert(0 <= kLane && kLane < 16, "Invalid lane");
+  return TableLookupBytes(v, Set(Full512<T>(), static_cast<T>(kLane)));
+}
+
+// ------------------------------ Per4LaneBlockShuffle
+
+namespace detail {
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_INLINE VFromD<D> Per4LaneBlkShufDupSet4xU32(D d, const uint32_t x3,
+                                                const uint32_t x2,
+                                                const uint32_t x1,
+                                                const uint32_t x0) {
+  return BitCast(d, Vec512<uint32_t>{_mm512_set_epi32(
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0),
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0),
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0),
+                        static_cast<int32_t>(x3), static_cast<int32_t>(x2),
+                        static_cast<int32_t>(x1), static_cast<int32_t>(x0))});
+}
+
+template <size_t kIdx3210, class V, HWY_IF_NOT_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<64> /*vect_size_tag*/, V v) {
+  return V{
+      _mm512_shuffle_epi32(v.raw, static_cast<_MM_PERM_ENUM>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<4> /*lane_size_tag*/,
+                                  hwy::SizeTag<64> /*vect_size_tag*/, V v) {
+  return V{_mm512_shuffle_ps(v.raw, v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_NOT_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<64> /*vect_size_tag*/, V v) {
+  return V{_mm512_permutex_epi64(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+template <size_t kIdx3210, class V, HWY_IF_FLOAT(TFromV<V>)>
+HWY_INLINE V Per4LaneBlockShuffle(hwy::SizeTag<kIdx3210> /*idx_3210_tag*/,
+                                  hwy::SizeTag<8> /*lane_size_tag*/,
+                                  hwy::SizeTag<64> /*vect_size_tag*/, V v) {
+  return V{_mm512_permutex_pd(v.raw, static_cast<int>(kIdx3210 & 0xFF))};
+}
+
+}  // namespace detail
+
+// ------------------------------ SlideUpLanes
+
+namespace detail {
+
+template <int kI32Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V CombineShiftRightI32Lanes(V hi, V lo) {
+  const DFromV<decltype(hi)> d;
+  const Repartition<uint32_t, decltype(d)> du32;
+  return BitCast(d,
+                 Vec512<uint32_t>{_mm512_alignr_epi32(
+                     BitCast(du32, hi).raw, BitCast(du32, lo).raw, kI32Lanes)});
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V CombineShiftRightI64Lanes(V hi, V lo) {
+  const DFromV<decltype(hi)> d;
+  const Repartition<uint64_t, decltype(d)> du64;
+  return BitCast(d,
+                 Vec512<uint64_t>{_mm512_alignr_epi64(
+                     BitCast(du64, hi).raw, BitCast(du64, lo).raw, kI64Lanes)});
+}
+
+template <int kI32Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V SlideUpI32Lanes(V v) {
+  static_assert(0 <= kI32Lanes && kI32Lanes <= 15,
+                "kI32Lanes must be between 0 and 15");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI32Lanes<16 - kI32Lanes>(v, Zero(d));
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V SlideUpI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 7,
+                "kI64Lanes must be between 0 and 7");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI64Lanes<8 - kI64Lanes>(v, Zero(d));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> TableLookupSlideUpLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  const auto byte_idx = Iota(du8, static_cast<uint8_t>(size_t{0} - amt));
+  return TwoTablesLookupLanes(v, Zero(d), Indices512<TFromD<D>>{byte_idx.raw});
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  const Repartition<uint64_t, decltype(d)> du64;
+  const auto byte_idx = Iota(du8, static_cast<uint8_t>(size_t{0} - (amt & 15)));
+  const auto blk_u64_idx =
+      Iota(du64, static_cast<uint64_t>(uint64_t{0} - ((amt >> 4) << 1)));
+
+  const VFromD<D> even_blocks{
+      _mm512_shuffle_i32x4(v.raw, v.raw, _MM_SHUFFLE(2, 2, 0, 0))};
+  const VFromD<D> odd_blocks{
+      _mm512_shuffle_i32x4(v.raw, v.raw, _MM_SHUFFLE(3, 1, 1, 3))};
+  const auto odd_sel_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<3>(BitCast(du16, byte_idx))));
+  const auto even_blk_lookup_result =
+      BitCast(d, TableLookupBytes(even_blocks, byte_idx));
+  const VFromD<D> blockwise_slide_up_result{
+      _mm512_mask_shuffle_epi8(even_blk_lookup_result.raw, odd_sel_mask.raw,
+                               odd_blocks.raw, byte_idx.raw)};
+  return BitCast(d, TwoTablesLookupLanes(
+                        BitCast(du64, blockwise_slide_up_result), Zero(du64),
+                        Indices512<uint64_t>{blk_u64_idx.raw}));
+#endif
+}
+
+}  // namespace detail
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> SlideUpBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 3,
+                "kBlocks must be between 0 and 3");
+  switch (kBlocks) {
+    case 0:
+      return v;
+    case 1:
+      return detail::SlideUpI64Lanes<2>(v);
+    case 2:
+      return ConcatLowerLower(d, v, Zero(d));
+    case 3:
+      return detail::SlideUpI64Lanes<6>(v);
+  }
+
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return detail::SlideUpI32Lanes<1>(v);
+      case 2:
+        return detail::SlideUpI64Lanes<1>(v);
+      case 3:
+        return detail::SlideUpI32Lanes<3>(v);
+      case 4:
+        return detail::SlideUpI64Lanes<2>(v);
+      case 5:
+        return detail::SlideUpI32Lanes<5>(v);
+      case 6:
+        return detail::SlideUpI64Lanes<3>(v);
+      case 7:
+        return detail::SlideUpI32Lanes<7>(v);
+      case 8:
+        return ConcatLowerLower(d, v, Zero(d));
+      case 9:
+        return detail::SlideUpI32Lanes<9>(v);
+      case 10:
+        return detail::SlideUpI64Lanes<5>(v);
+      case 11:
+        return detail::SlideUpI32Lanes<11>(v);
+      case 12:
+        return detail::SlideUpI64Lanes<6>(v);
+      case 13:
+        return detail::SlideUpI32Lanes<13>(v);
+      case 14:
+        return detail::SlideUpI64Lanes<7>(v);
+      case 15:
+        return detail::SlideUpI32Lanes<15>(v);
+    }
+  }
+#endif
+
+  return detail::TableLookupSlideUpLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return detail::SlideUpI64Lanes<1>(v);
+      case 2:
+        return detail::SlideUpI64Lanes<2>(v);
+      case 3:
+        return detail::SlideUpI64Lanes<3>(v);
+      case 4:
+        return ConcatLowerLower(d, v, Zero(d));
+      case 5:
+        return detail::SlideUpI64Lanes<5>(v);
+      case 6:
+        return detail::SlideUpI64Lanes<6>(v);
+      case 7:
+        return detail::SlideUpI64Lanes<7>(v);
+    }
+  }
+#endif
+
+  return detail::TableLookupSlideUpLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    if ((amt & 3) == 0) {
+      const Repartition<uint32_t, decltype(d)> du32;
+      return BitCast(d, SlideUpLanes(du32, BitCast(du32, v), amt >> 2));
+    } else if ((amt & 1) == 0) {
+      const Repartition<uint16_t, decltype(d)> du16;
+      return BitCast(
+          d, detail::TableLookupSlideUpLanes(du16, BitCast(du16, v), amt >> 1));
+    }
+#if HWY_TARGET > HWY_AVX3_DL
+    else if (amt <= 63) {  // NOLINT(readability/braces)
+      const Repartition<uint64_t, decltype(d)> du64;
+      const size_t blk_u64_slideup_amt = (amt >> 4) << 1;
+      const auto vu64 = BitCast(du64, v);
+      const auto v_hi =
+          BitCast(d, SlideUpLanes(du64, vu64, blk_u64_slideup_amt));
+      const auto v_lo =
+          (blk_u64_slideup_amt <= 4)
+              ? BitCast(d, SlideUpLanes(du64, vu64, blk_u64_slideup_amt + 2))
+              : Zero(d);
+      switch (amt & 15) {
+        case 1:
+          return CombineShiftRightBytes<15>(d, v_hi, v_lo);
+        case 3:
+          return CombineShiftRightBytes<13>(d, v_hi, v_lo);
+        case 5:
+          return CombineShiftRightBytes<11>(d, v_hi, v_lo);
+        case 7:
+          return CombineShiftRightBytes<9>(d, v_hi, v_lo);
+        case 9:
+          return CombineShiftRightBytes<7>(d, v_hi, v_lo);
+        case 11:
+          return CombineShiftRightBytes<5>(d, v_hi, v_lo);
+        case 13:
+          return CombineShiftRightBytes<3>(d, v_hi, v_lo);
+        case 15:
+          return CombineShiftRightBytes<1>(d, v_hi, v_lo);
+      }
+    }
+#endif  // HWY_TARGET > HWY_AVX3_DL
+  }
+#endif
+
+  return detail::TableLookupSlideUpLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> SlideUpLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt) && (amt & 1) == 0) {
+    const Repartition<uint32_t, decltype(d)> du32;
+    return BitCast(d, SlideUpLanes(du32, BitCast(du32, v), amt >> 1));
+  }
+#endif
+
+  return detail::TableLookupSlideUpLanes(d, v, amt);
+}
+
+// ------------------------------ Slide1Up
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return detail::TableLookupSlideUpLanes(d, v, 1);
+#else
+  const auto v_lo = detail::SlideUpI64Lanes<2>(v);
+  return CombineShiftRightBytes<15>(d, v, v_lo);
+#endif
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Slide1Up(D d, VFromD<D> v) {
+  return detail::TableLookupSlideUpLanes(d, v, 1);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Slide1Up(D /*d*/, VFromD<D> v) {
+  return detail::SlideUpI32Lanes<1>(v);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Slide1Up(D /*d*/, VFromD<D> v) {
+  return detail::SlideUpI64Lanes<1>(v);
+}
+
+// ------------------------------ SlideDownLanes
+
+namespace detail {
+
+template <int kI32Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V SlideDownI32Lanes(V v) {
+  static_assert(0 <= kI32Lanes && kI32Lanes <= 15,
+                "kI32Lanes must be between 0 and 15");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI32Lanes<kI32Lanes>(Zero(d), v);
+}
+
+template <int kI64Lanes, class V, HWY_IF_V_SIZE_V(V, 64)>
+HWY_INLINE V SlideDownI64Lanes(V v) {
+  static_assert(0 <= kI64Lanes && kI64Lanes <= 7,
+                "kI64Lanes must be between 0 and 7");
+  const DFromV<decltype(v)> d;
+  return CombineShiftRightI64Lanes<kI64Lanes>(Zero(d), v);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_INLINE VFromD<D> TableLookupSlideDownLanes(D d, VFromD<D> v, size_t amt) {
+  const Repartition<uint8_t, decltype(d)> du8;
+
+#if HWY_TARGET <= HWY_AVX3_DL
+  auto byte_idx = Iota(du8, static_cast<uint8_t>(amt));
+  return TwoTablesLookupLanes(v, Zero(d), Indices512<TFromD<D>>{byte_idx.raw});
+#else
+  const Repartition<uint16_t, decltype(d)> du16;
+  const Repartition<uint64_t, decltype(d)> du64;
+  const auto byte_idx = Iota(du8, static_cast<uint8_t>(amt & 15));
+  const auto blk_u64_idx = Iota(du64, static_cast<uint64_t>(((amt >> 4) << 1)));
+
+  const VFromD<D> even_blocks{
+      _mm512_shuffle_i32x4(v.raw, v.raw, _MM_SHUFFLE(0, 2, 2, 0))};
+  const VFromD<D> odd_blocks{
+      _mm512_shuffle_i32x4(v.raw, v.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+  const auto odd_sel_mask =
+      MaskFromVec(BitCast(d, ShiftLeft<3>(BitCast(du16, byte_idx))));
+  const VFromD<D> even_blk_lookup_result{
+      _mm512_maskz_shuffle_epi8(static_cast<__mmask64>(0x0000FFFFFFFFFFFFULL),
+                                even_blocks.raw, byte_idx.raw)};
+  const VFromD<D> blockwise_slide_up_result{
+      _mm512_mask_shuffle_epi8(even_blk_lookup_result.raw, odd_sel_mask.raw,
+                               odd_blocks.raw, byte_idx.raw)};
+  return BitCast(d, TwoTablesLookupLanes(
+                        BitCast(du64, blockwise_slide_up_result), Zero(du64),
+                        Indices512<uint64_t>{blk_u64_idx.raw}));
+#endif
+}
+
+}  // namespace detail
+
+template <int kBlocks, class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> SlideDownBlocks(D d, VFromD<D> v) {
+  static_assert(0 <= kBlocks && kBlocks <= 3,
+                "kBlocks must be between 0 and 3");
+  const Half<decltype(d)> dh;
+  switch (kBlocks) {
+    case 0:
+      return v;
+    case 1:
+      return detail::SlideDownI64Lanes<2>(v);
+    case 2:
+      return ZeroExtendVector(d, UpperHalf(dh, v));
+    case 3:
+      return detail::SlideDownI64Lanes<6>(v);
+  }
+
+  return v;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    const Half<decltype(d)> dh;
+    switch (amt) {
+      case 1:
+        return detail::SlideDownI32Lanes<1>(v);
+      case 2:
+        return detail::SlideDownI64Lanes<1>(v);
+      case 3:
+        return detail::SlideDownI32Lanes<3>(v);
+      case 4:
+        return detail::SlideDownI64Lanes<2>(v);
+      case 5:
+        return detail::SlideDownI32Lanes<5>(v);
+      case 6:
+        return detail::SlideDownI64Lanes<3>(v);
+      case 7:
+        return detail::SlideDownI32Lanes<7>(v);
+      case 8:
+        return ZeroExtendVector(d, UpperHalf(dh, v));
+      case 9:
+        return detail::SlideDownI32Lanes<9>(v);
+      case 10:
+        return detail::SlideDownI64Lanes<5>(v);
+      case 11:
+        return detail::SlideDownI32Lanes<11>(v);
+      case 12:
+        return detail::SlideDownI64Lanes<6>(v);
+      case 13:
+        return detail::SlideDownI32Lanes<13>(v);
+      case 14:
+        return detail::SlideDownI64Lanes<7>(v);
+      case 15:
+        return detail::SlideDownI32Lanes<15>(v);
+    }
+  }
+#endif
+
+  return detail::TableLookupSlideDownLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    const Half<decltype(d)> dh;
+    switch (amt) {
+      case 0:
+        return v;
+      case 1:
+        return detail::SlideDownI64Lanes<1>(v);
+      case 2:
+        return detail::SlideDownI64Lanes<2>(v);
+      case 3:
+        return detail::SlideDownI64Lanes<3>(v);
+      case 4:
+        return ZeroExtendVector(d, UpperHalf(dh, v));
+      case 5:
+        return detail::SlideDownI64Lanes<5>(v);
+      case 6:
+        return detail::SlideDownI64Lanes<6>(v);
+      case 7:
+        return detail::SlideDownI64Lanes<7>(v);
+    }
+  }
+#endif
+
+  return detail::TableLookupSlideDownLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt)) {
+    if ((amt & 3) == 0) {
+      const Repartition<uint32_t, decltype(d)> du32;
+      return BitCast(d, SlideDownLanes(du32, BitCast(du32, v), amt >> 2));
+    } else if ((amt & 1) == 0) {
+      const Repartition<uint16_t, decltype(d)> du16;
+      return BitCast(d, detail::TableLookupSlideDownLanes(
+                            du16, BitCast(du16, v), amt >> 1));
+    }
+#if HWY_TARGET > HWY_AVX3_DL
+    else if (amt <= 63) {  // NOLINT(readability/braces)
+      const Repartition<uint64_t, decltype(d)> du64;
+      const size_t blk_u64_slidedown_amt = (amt >> 4) << 1;
+      const auto vu64 = BitCast(du64, v);
+      const auto v_lo =
+          BitCast(d, SlideDownLanes(du64, vu64, blk_u64_slidedown_amt));
+      const auto v_hi =
+          (blk_u64_slidedown_amt <= 4)
+              ? BitCast(d,
+                        SlideDownLanes(du64, vu64, blk_u64_slidedown_amt + 2))
+              : Zero(d);
+      switch (amt & 15) {
+        case 1:
+          return CombineShiftRightBytes<1>(d, v_hi, v_lo);
+        case 3:
+          return CombineShiftRightBytes<3>(d, v_hi, v_lo);
+        case 5:
+          return CombineShiftRightBytes<5>(d, v_hi, v_lo);
+        case 7:
+          return CombineShiftRightBytes<7>(d, v_hi, v_lo);
+        case 9:
+          return CombineShiftRightBytes<9>(d, v_hi, v_lo);
+        case 11:
+          return CombineShiftRightBytes<11>(d, v_hi, v_lo);
+        case 13:
+          return CombineShiftRightBytes<13>(d, v_hi, v_lo);
+        case 15:
+          return CombineShiftRightBytes<15>(d, v_hi, v_lo);
+      }
+    }
+#endif
+  }
+#endif
+
+  return detail::TableLookupSlideDownLanes(d, v, amt);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> SlideDownLanes(D d, VFromD<D> v, size_t amt) {
+#if !HWY_IS_DEBUG_BUILD && HWY_COMPILER_GCC  // includes clang
+  if (__builtin_constant_p(amt) && (amt & 1) == 0) {
+    const Repartition<uint32_t, decltype(d)> du32;
+    return BitCast(d, SlideDownLanes(du32, BitCast(du32, v), amt >> 1));
+  }
+#endif
+
+  return detail::TableLookupSlideDownLanes(d, v, amt);
+}
+
+// ------------------------------ Slide1Down
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return detail::TableLookupSlideDownLanes(d, v, 1);
+#else
+  const auto v_hi = detail::SlideDownI64Lanes<2>(v);
+  return CombineShiftRightBytes<1>(d, v_hi, v);
+#endif
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 2)>
+HWY_API VFromD<D> Slide1Down(D d, VFromD<D> v) {
+  return detail::TableLookupSlideDownLanes(d, v, 1);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 4)>
+HWY_API VFromD<D> Slide1Down(D /*d*/, VFromD<D> v) {
+  return detail::SlideDownI32Lanes<1>(v);
+}
+
+template <typename D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 8)>
+HWY_API VFromD<D> Slide1Down(D /*d*/, VFromD<D> v) {
+  return detail::SlideDownI64Lanes<1>(v);
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then Zip* would be faster.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<uint8_t> v) {
+  return VFromD<D>{_mm512_cvtepu8_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint8_t> v) {
+  return VFromD<D>{_mm512_cvtepu8_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<uint16_t> v) {
+  return VFromD<D>{_mm512_cvtepu16_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<uint32_t> v) {
+  return VFromD<D>{_mm512_cvtepu32_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<uint16_t> v) {
+  return VFromD<D>{_mm512_cvtepu16_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec64<uint8_t> v) {
+  return VFromD<D>{_mm512_cvtepu8_epi64(v.raw)};
+}
+
+// Signed: replicate sign bit.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then ZipUpper/lo followed by
+// signed shift would be faster.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<int8_t> v) {
+  return VFromD<D>{_mm512_cvtepi8_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int8_t> v) {
+  return VFromD<D>{_mm512_cvtepi8_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<int16_t> v) {
+  return VFromD<D>{_mm512_cvtepi16_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<int32_t> v) {
+  return VFromD<D>{_mm512_cvtepi32_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec128<int16_t> v) {
+  return VFromD<D>{_mm512_cvtepi16_epi64(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec64<int8_t> v) {
+  return VFromD<D>{_mm512_cvtepi8_epi64(v.raw)};
+}
+
+// Float
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<float16_t> v) {
+#if HWY_HAVE_FLOAT16
+  const RebindToUnsigned<DFromV<decltype(v)>> du16;
+  return VFromD<D>{_mm512_cvtph_ps(BitCast(du16, v).raw)};
+#else
+  return VFromD<D>{_mm512_cvtph_ps(v.raw)};
+#endif  // HWY_HAVE_FLOAT16
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> PromoteTo(D df32, Vec256<bfloat16_t> v) {
+  const Rebind<uint16_t, decltype(df32)> du16;
+  const RebindToSigned<decltype(df32)> di32;
+  return BitCast(df32, ShiftLeft<16>(PromoteTo(di32, BitCast(du16, v))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<float> v) {
+  return VFromD<D>{_mm512_cvtps_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> PromoteTo(D /* tag */, Vec256<int32_t> v) {
+  return VFromD<D>{_mm512_cvtepi32_pd(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int32_t> v) {
+  const Full512<uint64_t> du64;
+  const Vec512<uint16_t> u16{_mm512_packus_epi32(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(du64, kLanes);
+  const Vec512<uint16_t> even{_mm512_permutexvar_epi64(idx64.raw, u16.raw)};
+  return LowerHalf(even);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, Vec512<uint32_t> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return DemoteTo(dn, BitCast(di, Min(v, Set(d, 0x7FFFFFFFu))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int32_t> v) {
+  const Full512<uint64_t> du64;
+  const Vec512<int16_t> i16{_mm512_packs_epi32(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(du64, kLanes);
+  const Vec512<int16_t> even{_mm512_permutexvar_epi64(idx64.raw, i16.raw)};
+  return LowerHalf(even);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int32_t> v) {
+  const Full512<uint32_t> du32;
+  const Vec512<int16_t> i16{_mm512_packs_epi32(v.raw, v.raw)};
+  const Vec512<uint8_t> u8{_mm512_packus_epi16(i16.raw, i16.raw)};
+
+  alignas(16) static constexpr uint32_t kLanes[4] = {0, 4, 8, 12};
+  const auto idx32 = LoadDup128(du32, kLanes);
+  const Vec512<uint8_t> fixed{_mm512_permutexvar_epi32(idx32.raw, u8.raw)};
+  return LowerHalf(LowerHalf(fixed));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<uint32_t> v) {
+  return VFromD<D>{_mm512_cvtusepi32_epi8(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int16_t> v) {
+  const Full512<uint64_t> du64;
+  const Vec512<uint8_t> u8{_mm512_packus_epi16(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(du64, kLanes);
+  const Vec512<uint8_t> even{_mm512_permutexvar_epi64(idx64.raw, u8.raw)};
+  return LowerHalf(even);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D dn, Vec512<uint16_t> v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  return DemoteTo(dn, BitCast(di, Min(v, Set(d, 0x7FFFu))));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int32_t> v) {
+  const Full512<uint32_t> du32;
+  const Vec512<int16_t> i16{_mm512_packs_epi32(v.raw, v.raw)};
+  const Vec512<int8_t> i8{_mm512_packs_epi16(i16.raw, i16.raw)};
+
+  alignas(16) static constexpr uint32_t kLanes[16] = {0, 4, 8, 12, 0, 4, 8, 12,
+                                                      0, 4, 8, 12, 0, 4, 8, 12};
+  const auto idx32 = LoadDup128(du32, kLanes);
+  const Vec512<int8_t> fixed{_mm512_permutexvar_epi32(idx32.raw, i8.raw)};
+  return LowerHalf(LowerHalf(fixed));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int16_t> v) {
+  const Full512<uint64_t> du64;
+  const Vec512<int8_t> u8{_mm512_packs_epi16(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(du64, kLanes);
+  const Vec512<int8_t> even{_mm512_permutexvar_epi64(idx64.raw, u8.raw)};
+  return LowerHalf(even);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  return VFromD<D>{_mm512_cvtsepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  return VFromD<D>{_mm512_cvtsepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  return VFromD<D>{_mm512_cvtsepi64_epi8(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm512_maskz_cvtusepi64_epi32(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm512_maskz_cvtusepi64_epi16(non_neg_mask, v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<int64_t> v) {
+  const auto neg_mask = MaskFromVec(v);
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  const __mmask8 non_neg_mask = _knot_mask8(neg_mask.raw);
+#else
+  const __mmask8 non_neg_mask = static_cast<__mmask8>(~neg_mask.raw);
+#endif
+  return VFromD<D>{_mm512_maskz_cvtusepi64_epi8(non_neg_mask, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<uint64_t> v) {
+  return VFromD<D>{_mm512_cvtusepi64_epi32(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<uint64_t> v) {
+  return VFromD<D>{_mm512_cvtusepi64_epi16(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<uint64_t> v) {
+  return VFromD<D>{_mm512_cvtusepi64_epi8(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<float> v) {
+  // Work around warnings in the intrinsic definitions (passing -1 as a mask).
+  HWY_DIAGNOSTICS(push)
+  HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+  return VFromD<D>{_mm512_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> DemoteTo(D dbf16, Vec512<float> v) {
+  // TODO(janwas): _mm512_cvtneps_pbh once we have avx512bf16.
+  const Rebind<int32_t, decltype(dbf16)> di32;
+  const Rebind<uint32_t, decltype(dbf16)> du32;  // for logical shift right
+  const Rebind<uint16_t, decltype(dbf16)> du16;
+  const auto bits_in_32 = BitCast(di32, ShiftRight<16>(BitCast(du32, v)));
+  return BitCast(dbf16, DemoteTo(du16, bits_in_32));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_BF16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dbf16, Vec512<float> a, Vec512<float> b) {
+  // TODO(janwas): _mm512_cvtne2ps_pbh once we have avx512bf16.
+  const RebindToUnsigned<decltype(dbf16)> du16;
+  const Repartition<uint32_t, decltype(dbf16)> du32;
+  const Vec512<uint32_t> b_in_even = ShiftRight<16>(BitCast(du32, b));
+  return BitCast(dbf16, OddEven(BitCast(du16, a), BitCast(du16, b_in_even)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec512<int32_t> a,
+                                   Vec512<int32_t> b) {
+  return VFromD<D>{_mm512_packs_epi32(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec512<int32_t> a,
+                                   Vec512<int32_t> b) {
+  return VFromD<D>{_mm512_packus_epi32(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec512<uint32_t> a,
+                                   Vec512<uint32_t> b) {
+  const DFromV<decltype(a)> du32;
+  const RebindToSigned<decltype(du32)> di32;
+  const auto max_i32 = Set(du32, 0x7FFFFFFFu);
+
+  return ReorderDemote2To(dn, BitCast(di32, Min(a, max_i32)),
+                          BitCast(di32, Min(b, max_i32)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec512<int16_t> a,
+                                   Vec512<int16_t> b) {
+  return VFromD<D>{_mm512_packs_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D /* tag */, Vec512<int16_t> a,
+                                   Vec512<int16_t> b) {
+  return VFromD<D>{_mm512_packus_epi16(a.raw, b.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec512<uint16_t> a,
+                                   Vec512<uint16_t> b) {
+  const DFromV<decltype(a)> du16;
+  const RebindToSigned<decltype(du16)> di16;
+  const auto max_i16 = Set(du16, 0x7FFFu);
+
+  return ReorderDemote2To(dn, BitCast(di16, Min(a, max_i16)),
+                          BitCast(di16, Min(b, max_i16)));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_UI32_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec512<int64_t> a, Vec512<int64_t> b) {
+  const Half<decltype(dn)> dnh;
+  return Combine(dn, DemoteTo(dnh, b), DemoteTo(dnh, a));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> ReorderDemote2To(D dn, Vec512<uint64_t> a,
+                                   Vec512<uint64_t> b) {
+  const Half<decltype(dn)> dnh;
+  return Combine(dn, DemoteTo(dnh, b), DemoteTo(dnh, a));
+}
+
+template <class D, class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2) | (1 << 4))>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  const Full512<uint64_t> du64;
+  alignas(64) static constexpr uint64_t kIdx[8] = {0, 2, 4, 6, 1, 3, 5, 7};
+  return BitCast(d, TableLookupLanes(BitCast(du64, ReorderDemote2To(d, a, b)),
+                                     SetTableIndices(du64, kIdx)));
+}
+
+template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>),
+          HWY_IF_V_SIZE_GT_D(D, 16), class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_V(V, sizeof(TFromD<D>) * 2),
+          HWY_IF_LANES_D(D, HWY_MAX_LANES_D(DFromV<V>) * 2),
+          HWY_IF_T_SIZE_V(V, 8)>
+HWY_API VFromD<D> OrderedDemote2To(D d, V a, V b) {
+  return ReorderDemote2To(d, a, b);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<double> v) {
+  return VFromD<D>{_mm512_cvtpd_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> DemoteTo(D /* tag */, Vec512<double> v) {
+  const Full512<double> d64;
+  const auto clamped = detail::ClampF64ToI32Max(d64, v);
+  return VFromD<D>{_mm512_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+HWY_API Vec128<uint8_t> U8FromU32(const Vec512<uint32_t> v) {
+  const DFromV<decltype(v)> d32;
+  // In each 128 bit block, gather the lower byte of 4 uint32_t lanes into the
+  // lowest 4 bytes.
+  alignas(16) static constexpr uint32_t k8From32[4] = {0x0C080400u, ~0u, ~0u,
+                                                       ~0u};
+  const auto quads = TableLookupBytes(v, LoadDup128(d32, k8From32));
+  // Gather the lowest 4 bytes of 4 128-bit blocks.
+  alignas(16) static constexpr uint32_t kIndex32[4] = {0, 4, 8, 12};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi32(LoadDup128(d32, kIndex32).raw, quads.raw)};
+  return LowerHalf(LowerHalf(bytes));
+}
+
+// ------------------------------ Truncations
+
+template <class D, HWY_IF_V_SIZE_D(D, 8), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D d, const Vec512<uint64_t> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  (void)d;
+  const Full512<uint8_t> d8;
+  alignas(16) static constexpr uint8_t k8From64[16] = {
+      0, 8, 16, 24, 32, 40, 48, 56, 0, 8, 16, 24, 32, 40, 48, 56};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi8(LoadDup128(d8, k8From64).raw, v.raw)};
+  return LowerHalf(LowerHalf(LowerHalf(bytes)));
+#else
+  const Full512<uint32_t> d32;
+  alignas(64) static constexpr uint32_t kEven[16] = {0, 2, 4, 6, 8, 10, 12, 14,
+                                                     0, 2, 4, 6, 8, 10, 12, 14};
+  const Vec512<uint32_t> even{
+      _mm512_permutexvar_epi32(Load(d32, kEven).raw, v.raw)};
+  return TruncateTo(d, LowerHalf(even));
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, const Vec512<uint64_t> v) {
+  const Full512<uint16_t> d16;
+  alignas(16) static constexpr uint16_t k16From64[8] = {0,  4,  8,  12,
+                                                        16, 20, 24, 28};
+  const Vec512<uint16_t> bytes{
+      _mm512_permutexvar_epi16(LoadDup128(d16, k16From64).raw, v.raw)};
+  return LowerHalf(LowerHalf(bytes));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, const Vec512<uint64_t> v) {
+  const Full512<uint32_t> d32;
+  alignas(64) static constexpr uint32_t kEven[16] = {0, 2, 4, 6, 8, 10, 12, 14,
+                                                     0, 2, 4, 6, 8, 10, 12, 14};
+  const Vec512<uint32_t> even{
+      _mm512_permutexvar_epi32(Load(d32, kEven).raw, v.raw)};
+  return LowerHalf(even);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 16), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, const Vec512<uint32_t> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  const Full512<uint8_t> d8;
+  alignas(16) static constexpr uint8_t k8From32[16] = {
+      0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi8(LoadDup128(d8, k8From32).raw, v.raw)};
+#else
+  const Full512<uint32_t> d32;
+  // In each 128 bit block, gather the lower byte of 4 uint32_t lanes into the
+  // lowest 4 bytes.
+  alignas(16) static constexpr uint32_t k8From32[4] = {0x0C080400u, ~0u, ~0u,
+                                                       ~0u};
+  const auto quads = TableLookupBytes(v, LoadDup128(d32, k8From32));
+  // Gather the lowest 4 bytes of 4 128-bit blocks.
+  alignas(16) static constexpr uint32_t kIndex32[4] = {0, 4, 8, 12};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi32(LoadDup128(d32, kIndex32).raw, quads.raw)};
+#endif
+  return LowerHalf(LowerHalf(bytes));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, const Vec512<uint32_t> v) {
+  const Full512<uint16_t> d16;
+  alignas(64) static constexpr uint16_t k16From32[32] = {
+      0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+      0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30};
+  const Vec512<uint16_t> bytes{
+      _mm512_permutexvar_epi16(Load(d16, k16From32).raw, v.raw)};
+  return LowerHalf(bytes);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_API VFromD<D> TruncateTo(D /* tag */, const Vec512<uint16_t> v) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  const Full512<uint8_t> d8;
+  alignas(64) static constexpr uint8_t k8From16[64] = {
+      0,  2,  4,  6,  8,  10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+      32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
+      0,  2,  4,  6,  8,  10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+      32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi8(Load(d8, k8From16).raw, v.raw)};
+#else
+  const Full512<uint32_t> d32;
+  alignas(16) static constexpr uint32_t k16From32[4] = {
+      0x06040200u, 0x0E0C0A08u, 0x06040200u, 0x0E0C0A08u};
+  const auto quads = TableLookupBytes(v, LoadDup128(d32, k16From32));
+  alignas(64) static constexpr uint32_t kIndex32[16] = {
+      0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi32(Load(d32, kIndex32).raw, quads.raw)};
+#endif
+  return LowerHalf(bytes);
+}
+
+// ------------------------------ Convert integer <=> floating point
+
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec512<uint16_t> v) {
+  return VFromD<D>{_mm512_cvtepu16_ph(v.raw)};
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec512<int16_t> v) {
+  return VFromD<D>{_mm512_cvtepi16_ph(v.raw)};
+}
+#endif  // HWY_HAVE_FLOAT16
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec512<int32_t> v) {
+  return VFromD<D>{_mm512_cvtepi32_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag */, Vec512<int64_t> v) {
+  return VFromD<D>{_mm512_cvtepi64_pd(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag*/, Vec512<uint32_t> v) {
+  return VFromD<D>{_mm512_cvtepu32_ps(v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> ConvertTo(D /* tag*/, Vec512<uint64_t> v) {
+  return VFromD<D>{_mm512_cvtepu64_pd(v.raw)};
+}
+
+// Truncates (rounds toward zero).
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> ConvertTo(D d, Vec512<float16_t> v) {
+  return detail::FixConversionOverflow(d, v,
+                                       VFromD<D>{_mm512_cvttph_epi16(v.raw)});
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ConvertTo(D d, Vec512<float> v) {
+  return detail::FixConversionOverflow(d, v,
+                                       VFromD<D>{_mm512_cvttps_epi32(v.raw)});
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> ConvertTo(D di, Vec512<double> v) {
+  return detail::FixConversionOverflow(di, v,
+                                       VFromD<D>{_mm512_cvttpd_epi64(v.raw)});
+}
+
+HWY_API Vec512<int32_t> NearestInt(const Vec512<float> v) {
+  const Full512<int32_t> di;
+  return detail::FixConversionOverflow(
+      di, v, Vec512<int32_t>{_mm512_cvtps_epi32(v.raw)});
+}
+
+// ================================================== CRYPTO
+
+#if !defined(HWY_DISABLE_PCLMUL_AES)
+
+HWY_API Vec512<uint8_t> AESRound(Vec512<uint8_t> state,
+                                 Vec512<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint8_t>{_mm512_aesenc_epi128(state.raw, round_key.raw)};
+#else
+  const DFromV<decltype(state)> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, AESRound(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESRound(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec512<uint8_t> AESLastRound(Vec512<uint8_t> state,
+                                     Vec512<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint8_t>{_mm512_aesenclast_epi128(state.raw, round_key.raw)};
+#else
+  const DFromV<decltype(state)> d;
+  const Half<decltype(d)> d2;
+  return Combine(d,
+                 AESLastRound(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESLastRound(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec512<uint8_t> AESRoundInv(Vec512<uint8_t> state,
+                                    Vec512<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint8_t>{_mm512_aesdec_epi128(state.raw, round_key.raw)};
+#else
+  const Full512<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(d, AESRoundInv(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+                 AESRoundInv(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+HWY_API Vec512<uint8_t> AESLastRoundInv(Vec512<uint8_t> state,
+                                        Vec512<uint8_t> round_key) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint8_t>{_mm512_aesdeclast_epi128(state.raw, round_key.raw)};
+#else
+  const Full512<uint8_t> d;
+  const Half<decltype(d)> d2;
+  return Combine(
+      d, AESLastRoundInv(UpperHalf(d2, state), UpperHalf(d2, round_key)),
+      AESLastRoundInv(LowerHalf(state), LowerHalf(round_key)));
+#endif
+}
+
+template <uint8_t kRcon>
+HWY_API Vec512<uint8_t> AESKeyGenAssist(Vec512<uint8_t> v) {
+  const Full512<uint8_t> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  alignas(16) static constexpr uint8_t kRconXorMask[16] = {
+      0, kRcon, 0, 0, 0, 0, 0, 0, 0, kRcon, 0, 0, 0, 0, 0, 0};
+  alignas(16) static constexpr uint8_t kRotWordShuffle[16] = {
+      0, 13, 10, 7, 1, 14, 11, 4, 8, 5, 2, 15, 9, 6, 3, 12};
+  const Repartition<uint32_t, decltype(d)> du32;
+  const auto w13 = BitCast(d, DupOdd(BitCast(du32, v)));
+  const auto sub_word_result = AESLastRound(w13, LoadDup128(d, kRconXorMask));
+  return TableLookupBytes(sub_word_result, LoadDup128(d, kRotWordShuffle));
+#else
+  const Half<decltype(d)> d2;
+  return Combine(d, AESKeyGenAssist<kRcon>(UpperHalf(d2, v)),
+                 AESKeyGenAssist<kRcon>(LowerHalf(v)));
+#endif
+}
+
+HWY_API Vec512<uint64_t> CLMulLower(Vec512<uint64_t> va, Vec512<uint64_t> vb) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint64_t>{_mm512_clmulepi64_epi128(va.raw, vb.raw, 0x00)};
+#else
+  alignas(64) uint64_t a[8];
+  alignas(64) uint64_t b[8];
+  const DFromV<decltype(va)> d;
+  const Half<Half<decltype(d)>> d128;
+  Store(va, d, a);
+  Store(vb, d, b);
+  for (size_t i = 0; i < 8; i += 2) {
+    const auto mul = CLMulLower(Load(d128, a + i), Load(d128, b + i));
+    Store(mul, d128, a + i);
+  }
+  return Load(d, a);
+#endif
+}
+
+HWY_API Vec512<uint64_t> CLMulUpper(Vec512<uint64_t> va, Vec512<uint64_t> vb) {
+#if HWY_TARGET <= HWY_AVX3_DL
+  return Vec512<uint64_t>{_mm512_clmulepi64_epi128(va.raw, vb.raw, 0x11)};
+#else
+  alignas(64) uint64_t a[8];
+  alignas(64) uint64_t b[8];
+  const DFromV<decltype(va)> d;
+  const Half<Half<decltype(d)>> d128;
+  Store(va, d, a);
+  Store(vb, d, b);
+  for (size_t i = 0; i < 8; i += 2) {
+    const auto mul = CLMulUpper(Load(d128, a + i), Load(d128, b + i));
+    Store(mul, d128, a + i);
+  }
+  return Load(d, a);
+#endif
+}
+
+#endif  // HWY_DISABLE_PCLMUL_AES
+
+// ================================================== MISC
+
+// ------------------------------ I32/I64 SaturatedAdd (MaskFromVec)
+
+HWY_API Vec512<int32_t> SaturatedAdd(Vec512<int32_t> a, Vec512<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec512<int32_t>{_mm512_ternarylogic_epi32(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec512<int32_t> overflow_result{_mm512_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+
+HWY_API Vec512<int64_t> SaturatedAdd(Vec512<int64_t> a, Vec512<int64_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto sum = a + b;
+  const auto overflow_mask = MaskFromVec(
+      Vec512<int64_t>{_mm512_ternarylogic_epi64(a.raw, b.raw, sum.raw, 0x42)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec512<int64_t> overflow_result{_mm512_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, sum);
+}
+
+// ------------------------------ I32/I64 SaturatedSub (MaskFromVec)
+
+HWY_API Vec512<int32_t> SaturatedSub(Vec512<int32_t> a, Vec512<int32_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec512<int32_t>{_mm512_ternarylogic_epi32(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i32_max = Set(d, LimitsMax<int32_t>());
+  const Vec512<int32_t> overflow_result{_mm512_mask_ternarylogic_epi32(
+      i32_max.raw, MaskFromVec(a).raw, i32_max.raw, i32_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+
+HWY_API Vec512<int64_t> SaturatedSub(Vec512<int64_t> a, Vec512<int64_t> b) {
+  const DFromV<decltype(a)> d;
+  const auto diff = a - b;
+  const auto overflow_mask = MaskFromVec(
+      Vec512<int64_t>{_mm512_ternarylogic_epi64(a.raw, b.raw, diff.raw, 0x18)});
+  const auto i64_max = Set(d, LimitsMax<int64_t>());
+  const Vec512<int64_t> overflow_result{_mm512_mask_ternarylogic_epi64(
+      i64_max.raw, MaskFromVec(a).raw, i64_max.raw, i64_max.raw, 0x55)};
+  return IfThenElse(overflow_mask, overflow_result, diff);
+}
+
+// ------------------------------ Mask testing
+
+// Beware: the suffix indicates the number of mask bits, not lane size!
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<1> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask64_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<2> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask32_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<4> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask16_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllFalse(hwy::SizeTag<8> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask8_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0;
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API bool AllFalse(D /* tag */, const MFromD<D> mask) {
+  return detail::AllFalse(hwy::SizeTag<sizeof(TFromD<D>)>(), mask);
+}
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<1> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask64_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFFFFFFFFFFFFFFFull;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<2> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask32_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFFFFFFFull;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<4> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask16_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFFFull;
+#endif
+}
+template <typename T>
+HWY_INLINE bool AllTrue(hwy::SizeTag<8> /*tag*/, const Mask512<T> mask) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask8_u8(mask.raw, mask.raw);
+#else
+  return mask.raw == 0xFFull;
+#endif
+}
+
+}  // namespace detail
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API bool AllTrue(D /* tag */, const MFromD<D> mask) {
+  return detail::AllTrue(hwy::SizeTag<sizeof(TFromD<D>)>(), mask);
+}
+
+// `p` points to at least 8 readable bytes, not all of which need be valid.
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API MFromD<D> LoadMaskBits(D /* tag */, const uint8_t* HWY_RESTRICT bits) {
+  MFromD<D> mask;
+  CopyBytes<8 / sizeof(TFromD<D>)>(bits, &mask.raw);
+  // N >= 8 (= 512 / 64), so no need to mask invalid bits.
+  return mask;
+}
+
+// `p` points to at least 8 writable bytes.
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API size_t StoreMaskBits(D /* tag */, MFromD<D> mask, uint8_t* bits) {
+  const size_t kNumBytes = 8 / sizeof(TFromD<D>);
+  CopyBytes<kNumBytes>(&mask.raw, bits);
+  // N >= 8 (= 512 / 64), so no need to mask invalid bits.
+  return kNumBytes;
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API size_t CountTrue(D /* tag */, const MFromD<D> mask) {
+  return PopCount(static_cast<uint64_t>(mask.raw));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, MFromD<D> mask) {
+  return Num0BitsBelowLS1Bit_Nonzero32(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API size_t FindKnownFirstTrue(D /* tag */, MFromD<D> mask) {
+  return Num0BitsBelowLS1Bit_Nonzero64(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API intptr_t FindFirstTrue(D d, MFromD<D> mask) {
+  return mask.raw ? static_cast<intptr_t>(FindKnownFirstTrue(d, mask))
+                  : intptr_t{-1};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_NOT_T_SIZE_D(D, 1)>
+HWY_API size_t FindKnownLastTrue(D /* tag */, MFromD<D> mask) {
+  return 31 - Num0BitsAboveMS1Bit_Nonzero32(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_T_SIZE_D(D, 1)>
+HWY_API size_t FindKnownLastTrue(D /* tag */, MFromD<D> mask) {
+  return 63 - Num0BitsAboveMS1Bit_Nonzero64(mask.raw);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API intptr_t FindLastTrue(D d, MFromD<D> mask) {
+  return mask.raw ? static_cast<intptr_t>(FindKnownLastTrue(d, mask))
+                  : intptr_t{-1};
+}
+
+// ------------------------------ Compress
+
+// Always implement 8-bit here even if we lack VBMI2 because we can do better
+// than generic_ops (8 at a time) via the native 32-bit compress (16 at a time).
+#ifdef HWY_NATIVE_COMPRESS8
+#undef HWY_NATIVE_COMPRESS8
+#else
+#define HWY_NATIVE_COMPRESS8
+#endif
+
+namespace detail {
+
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> NativeCompress(const Vec128<uint8_t, N> v,
+                                             const Mask128<uint8_t, N> mask) {
+  return Vec128<uint8_t, N>{_mm_maskz_compress_epi8(mask.raw, v.raw)};
+}
+HWY_INLINE Vec256<uint8_t> NativeCompress(const Vec256<uint8_t> v,
+                                          const Mask256<uint8_t> mask) {
+  return Vec256<uint8_t>{_mm256_maskz_compress_epi8(mask.raw, v.raw)};
+}
+HWY_INLINE Vec512<uint8_t> NativeCompress(const Vec512<uint8_t> v,
+                                          const Mask512<uint8_t> mask) {
+  return Vec512<uint8_t>{_mm512_maskz_compress_epi8(mask.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<uint16_t, N> NativeCompress(const Vec128<uint16_t, N> v,
+                                              const Mask128<uint16_t, N> mask) {
+  return Vec128<uint16_t, N>{_mm_maskz_compress_epi16(mask.raw, v.raw)};
+}
+HWY_INLINE Vec256<uint16_t> NativeCompress(const Vec256<uint16_t> v,
+                                           const Mask256<uint16_t> mask) {
+  return Vec256<uint16_t>{_mm256_maskz_compress_epi16(mask.raw, v.raw)};
+}
+HWY_INLINE Vec512<uint16_t> NativeCompress(const Vec512<uint16_t> v,
+                                           const Mask512<uint16_t> mask) {
+  return Vec512<uint16_t>{_mm512_maskz_compress_epi16(mask.raw, v.raw)};
+}
+
+// Slow on Zen4, do not even define these to prevent accidental usage.
+#if HWY_TARGET != HWY_AVX3_ZEN4
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<uint8_t, N> v,
+                                    Mask128<uint8_t, N> mask,
+                                    uint8_t* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_epi8(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<uint8_t> v, Mask256<uint8_t> mask,
+                                    uint8_t* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_epi8(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<uint8_t> v, Mask512<uint8_t> mask,
+                                    uint8_t* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_epi8(unaligned, mask.raw, v.raw);
+}
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<uint16_t, N> v,
+                                    Mask128<uint16_t, N> mask,
+                                    uint16_t* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_epi16(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<uint16_t> v, Mask256<uint16_t> mask,
+                                    uint16_t* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_epi16(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<uint16_t> v, Mask512<uint16_t> mask,
+                                    uint16_t* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_epi16(unaligned, mask.raw, v.raw);
+}
+
+#endif  // HWY_TARGET != HWY_AVX3_ZEN4
+
+HWY_INLINE Vec512<uint8_t> NativeExpand(Vec512<uint8_t> v,
+                                        Mask512<uint8_t> mask) {
+  return Vec512<uint8_t>{_mm512_maskz_expand_epi8(mask.raw, v.raw)};
+}
+
+HWY_INLINE Vec512<uint16_t> NativeExpand(Vec512<uint16_t> v,
+                                         Mask512<uint16_t> mask) {
+  return Vec512<uint16_t>{_mm512_maskz_expand_epi16(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U8_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(Mask512<uint8_t> mask, D /* d */,
+                                      const uint8_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm512_maskz_expandloadu_epi8(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(Mask512<uint16_t> mask, D /* d */,
+                                      const uint16_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm512_maskz_expandloadu_epi16(mask.raw, unaligned)};
+}
+
+#endif  // HWY_TARGET <= HWY_AVX3_DL
+
+template <size_t N>
+HWY_INLINE Vec128<uint32_t, N> NativeCompress(Vec128<uint32_t, N> v,
+                                              Mask128<uint32_t, N> mask) {
+  return Vec128<uint32_t, N>{_mm_maskz_compress_epi32(mask.raw, v.raw)};
+}
+HWY_INLINE Vec256<uint32_t> NativeCompress(Vec256<uint32_t> v,
+                                           Mask256<uint32_t> mask) {
+  return Vec256<uint32_t>{_mm256_maskz_compress_epi32(mask.raw, v.raw)};
+}
+HWY_INLINE Vec512<uint32_t> NativeCompress(Vec512<uint32_t> v,
+                                           Mask512<uint32_t> mask) {
+  return Vec512<uint32_t>{_mm512_maskz_compress_epi32(mask.raw, v.raw)};
+}
+// We use table-based compress for 64-bit lanes, see CompressIsPartition.
+
+// Slow on Zen4, do not even define these to prevent accidental usage.
+#if HWY_TARGET != HWY_AVX3_ZEN4
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<uint32_t, N> v,
+                                    Mask128<uint32_t, N> mask,
+                                    uint32_t* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_epi32(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<uint32_t> v, Mask256<uint32_t> mask,
+                                    uint32_t* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_epi32(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<uint32_t> v, Mask512<uint32_t> mask,
+                                    uint32_t* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_epi32(unaligned, mask.raw, v.raw);
+}
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<uint64_t, N> v,
+                                    Mask128<uint64_t, N> mask,
+                                    uint64_t* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_epi64(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<uint64_t> v, Mask256<uint64_t> mask,
+                                    uint64_t* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_epi64(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<uint64_t> v, Mask512<uint64_t> mask,
+                                    uint64_t* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_epi64(unaligned, mask.raw, v.raw);
+}
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<float, N> v, Mask128<float, N> mask,
+                                    float* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_ps(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<float> v, Mask256<float> mask,
+                                    float* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_ps(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<float> v, Mask512<float> mask,
+                                    float* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_ps(unaligned, mask.raw, v.raw);
+}
+
+template <size_t N>
+HWY_INLINE void NativeCompressStore(Vec128<double, N> v,
+                                    Mask128<double, N> mask,
+                                    double* HWY_RESTRICT unaligned) {
+  _mm_mask_compressstoreu_pd(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec256<double> v, Mask256<double> mask,
+                                    double* HWY_RESTRICT unaligned) {
+  _mm256_mask_compressstoreu_pd(unaligned, mask.raw, v.raw);
+}
+HWY_INLINE void NativeCompressStore(Vec512<double> v, Mask512<double> mask,
+                                    double* HWY_RESTRICT unaligned) {
+  _mm512_mask_compressstoreu_pd(unaligned, mask.raw, v.raw);
+}
+
+#endif  // HWY_TARGET != HWY_AVX3_ZEN4
+
+HWY_INLINE Vec512<uint32_t> NativeExpand(Vec512<uint32_t> v,
+                                         Mask512<uint32_t> mask) {
+  return Vec512<uint32_t>{_mm512_maskz_expand_epi32(mask.raw, v.raw)};
+}
+
+HWY_INLINE Vec512<uint64_t> NativeExpand(Vec512<uint64_t> v,
+                                         Mask512<uint64_t> mask) {
+  return Vec512<uint64_t>{_mm512_maskz_expand_epi64(mask.raw, v.raw)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(Mask512<uint32_t> mask, D /* d */,
+                                      const uint32_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm512_maskz_expandloadu_epi32(mask.raw, unaligned)};
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_INLINE VFromD<D> NativeLoadExpand(Mask512<uint64_t> mask, D /* d */,
+                                      const uint64_t* HWY_RESTRICT unaligned) {
+  return VFromD<D>{_mm512_maskz_expandloadu_epi64(mask.raw, unaligned)};
+}
+
+// For u8x16 and <= u16x16 we can avoid store+load for Compress because there is
+// only a single compressed vector (u32x16). Other EmuCompress are implemented
+// after the EmuCompressStore they build upon.
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> EmuCompress(Vec128<uint8_t, N> v,
+                                          Mask128<uint8_t, N> mask) {
+  const DFromV<decltype(v)> d;
+  const Rebind<uint32_t, decltype(d)> d32;
+  const VFromD<decltype(d32)> v0 = PromoteTo(d32, v);
+
+  const uint64_t mask_bits{mask.raw};
+  // Mask type is __mmask16 if v is full 128, else __mmask8.
+  using M32 = MFromD<decltype(d32)>;
+  const M32 m0{static_cast<typename M32::Raw>(mask_bits)};
+  return TruncateTo(d, Compress(v0, m0));
+}
+
+template <size_t N>
+HWY_INLINE Vec128<uint16_t, N> EmuCompress(Vec128<uint16_t, N> v,
+                                           Mask128<uint16_t, N> mask) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+  const RebindToUnsigned<decltype(di32)> du32;
+  const MFromD<decltype(du32)> mask32{static_cast<__mmask8>(mask.raw)};
+  // DemoteTo is 2 ops, but likely lower latency than TruncateTo on SKX.
+  // Only i32 -> u16 is supported, whereas NativeCompress expects u32.
+  const VFromD<decltype(du32)> v32 = BitCast(du32, PromoteTo(di32, v));
+  return DemoteTo(d, BitCast(di32, NativeCompress(v32, mask32)));
+}
+
+HWY_INLINE Vec256<uint16_t> EmuCompress(Vec256<uint16_t> v,
+                                        Mask256<uint16_t> mask) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+  const RebindToUnsigned<decltype(di32)> du32;
+  const Mask512<uint32_t> mask32{static_cast<__mmask16>(mask.raw)};
+  const Vec512<uint32_t> v32 = BitCast(du32, PromoteTo(di32, v));
+  return DemoteTo(d, BitCast(di32, NativeCompress(v32, mask32)));
+}
+
+// See above - small-vector EmuCompressStore are implemented via EmuCompress.
+template <class D, HWY_IF_V_SIZE_LE_D(D, 16)>
+HWY_INLINE void EmuCompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                                 TFromD<D>* HWY_RESTRICT unaligned) {
+  StoreU(EmuCompress(v, mask), d, unaligned);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U16_D(D)>
+HWY_INLINE void EmuCompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                                 TFromD<D>* HWY_RESTRICT unaligned) {
+  StoreU(EmuCompress(v, mask), d, unaligned);
+}
+
+// Main emulation logic for wider vector, starting with EmuCompressStore because
+// it is most convenient to merge pieces using memory (concatenating vectors at
+// byte offsets is difficult).
+template <class D, HWY_IF_V_SIZE_D(D, 32), HWY_IF_U8_D(D)>
+HWY_INLINE void EmuCompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                                 TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits{mask.raw};
+  const Half<decltype(d)> dh;
+  const Rebind<uint32_t, decltype(dh)> d32;
+  const Vec512<uint32_t> v0 = PromoteTo(d32, LowerHalf(v));
+  const Vec512<uint32_t> v1 = PromoteTo(d32, UpperHalf(dh, v));
+  const Mask512<uint32_t> m0{static_cast<__mmask16>(mask_bits & 0xFFFFu)};
+  const Mask512<uint32_t> m1{static_cast<__mmask16>(mask_bits >> 16)};
+  const Vec128<uint8_t> c0 = TruncateTo(dh, NativeCompress(v0, m0));
+  const Vec128<uint8_t> c1 = TruncateTo(dh, NativeCompress(v1, m1));
+  uint8_t* HWY_RESTRICT pos = unaligned;
+  StoreU(c0, dh, pos);
+  StoreU(c1, dh, pos + CountTrue(d32, m0));
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U8_D(D)>
+HWY_INLINE void EmuCompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                                 TFromD<D>* HWY_RESTRICT unaligned) {
+  const uint64_t mask_bits{mask.raw};
+  const Half<Half<decltype(d)>> dq;
+  const Rebind<uint32_t, decltype(dq)> d32;
+  alignas(64) uint8_t lanes[64];
+  Store(v, d, lanes);
+  const Vec512<uint32_t> v0 = PromoteTo(d32, LowerHalf(LowerHalf(v)));
+  const Vec512<uint32_t> v1 = PromoteTo(d32, Load(dq, lanes + 16));
+  const Vec512<uint32_t> v2 = PromoteTo(d32, Load(dq, lanes + 32));
+  const Vec512<uint32_t> v3 = PromoteTo(d32, Load(dq, lanes + 48));
+  const Mask512<uint32_t> m0{static_cast<__mmask16>(mask_bits & 0xFFFFu)};
+  const Mask512<uint32_t> m1{
+      static_cast<uint16_t>((mask_bits >> 16) & 0xFFFFu)};
+  const Mask512<uint32_t> m2{
+      static_cast<uint16_t>((mask_bits >> 32) & 0xFFFFu)};
+  const Mask512<uint32_t> m3{static_cast<__mmask16>(mask_bits >> 48)};
+  const Vec128<uint8_t> c0 = TruncateTo(dq, NativeCompress(v0, m0));
+  const Vec128<uint8_t> c1 = TruncateTo(dq, NativeCompress(v1, m1));
+  const Vec128<uint8_t> c2 = TruncateTo(dq, NativeCompress(v2, m2));
+  const Vec128<uint8_t> c3 = TruncateTo(dq, NativeCompress(v3, m3));
+  uint8_t* HWY_RESTRICT pos = unaligned;
+  StoreU(c0, dq, pos);
+  pos += CountTrue(d32, m0);
+  StoreU(c1, dq, pos);
+  pos += CountTrue(d32, m1);
+  StoreU(c2, dq, pos);
+  pos += CountTrue(d32, m2);
+  StoreU(c3, dq, pos);
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_INLINE void EmuCompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                                 TFromD<D>* HWY_RESTRICT unaligned) {
+  const Repartition<int32_t, decltype(d)> di32;
+  const RebindToUnsigned<decltype(di32)> du32;
+  const Half<decltype(d)> dh;
+  const Vec512<uint32_t> promoted0 =
+      BitCast(du32, PromoteTo(di32, LowerHalf(dh, v)));
+  const Vec512<uint32_t> promoted1 =
+      BitCast(du32, PromoteTo(di32, UpperHalf(dh, v)));
+
+  const uint64_t mask_bits{mask.raw};
+  const uint64_t maskL = mask_bits & 0xFFFF;
+  const uint64_t maskH = mask_bits >> 16;
+  const Mask512<uint32_t> mask0{static_cast<__mmask16>(maskL)};
+  const Mask512<uint32_t> mask1{static_cast<__mmask16>(maskH)};
+  const Vec512<uint32_t> compressed0 = NativeCompress(promoted0, mask0);
+  const Vec512<uint32_t> compressed1 = NativeCompress(promoted1, mask1);
+
+  const Vec256<uint16_t> demoted0 = DemoteTo(dh, BitCast(di32, compressed0));
+  const Vec256<uint16_t> demoted1 = DemoteTo(dh, BitCast(di32, compressed1));
+
+  // Store 256-bit halves
+  StoreU(demoted0, dh, unaligned);
+  StoreU(demoted1, dh, unaligned + PopCount(maskL));
+}
+
+// Finally, the remaining EmuCompress for wide vectors, using EmuCompressStore.
+template <typename T>  // 1 or 2 bytes
+HWY_INLINE Vec512<T> EmuCompress(Vec512<T> v, Mask512<T> mask) {
+  const DFromV<decltype(v)> d;
+  alignas(64) T buf[2 * Lanes(d)];
+  EmuCompressStore(v, mask, d, buf);
+  return Load(d, buf);
+}
+
+HWY_INLINE Vec256<uint8_t> EmuCompress(Vec256<uint8_t> v,
+                                       const Mask256<uint8_t> mask) {
+  const DFromV<decltype(v)> d;
+  alignas(32) uint8_t buf[2 * 32 / sizeof(uint8_t)];
+  EmuCompressStore(v, mask, d, buf);
+  return Load(d, buf);
+}
+
+}  // namespace detail
+
+template <class V, class M, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API V Compress(V v, const M mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  return BitCast(d, detail::NativeCompress(BitCast(du, v), mu));
+#else
+  return BitCast(d, detail::EmuCompress(BitCast(du, v), mu));
+#endif
+}
+
+template <class V, class M, HWY_IF_T_SIZE_V(V, 4)>
+HWY_API V Compress(V v, const M mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeCompress(BitCast(du, v), mu));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> Compress(Vec512<T> v, Mask512<T> mask) {
+  // See CompressIsPartition. u64 is faster than u32.
+  alignas(16) static constexpr uint64_t packed_array[256] = {
+      // From PrintCompress32x8Tables, without the FirstN extension (there is
+      // no benefit to including them because 64-bit CompressStore is anyway
+      // masked, but also no harm because TableLookupLanes ignores the MSB).
+      0x76543210, 0x76543210, 0x76543201, 0x76543210, 0x76543102, 0x76543120,
+      0x76543021, 0x76543210, 0x76542103, 0x76542130, 0x76542031, 0x76542310,
+      0x76541032, 0x76541320, 0x76540321, 0x76543210, 0x76532104, 0x76532140,
+      0x76532041, 0x76532410, 0x76531042, 0x76531420, 0x76530421, 0x76534210,
+      0x76521043, 0x76521430, 0x76520431, 0x76524310, 0x76510432, 0x76514320,
+      0x76504321, 0x76543210, 0x76432105, 0x76432150, 0x76432051, 0x76432510,
+      0x76431052, 0x76431520, 0x76430521, 0x76435210, 0x76421053, 0x76421530,
+      0x76420531, 0x76425310, 0x76410532, 0x76415320, 0x76405321, 0x76453210,
+      0x76321054, 0x76321540, 0x76320541, 0x76325410, 0x76310542, 0x76315420,
+      0x76305421, 0x76354210, 0x76210543, 0x76215430, 0x76205431, 0x76254310,
+      0x76105432, 0x76154320, 0x76054321, 0x76543210, 0x75432106, 0x75432160,
+      0x75432061, 0x75432610, 0x75431062, 0x75431620, 0x75430621, 0x75436210,
+      0x75421063, 0x75421630, 0x75420631, 0x75426310, 0x75410632, 0x75416320,
+      0x75406321, 0x75463210, 0x75321064, 0x75321640, 0x75320641, 0x75326410,
+      0x75310642, 0x75316420, 0x75306421, 0x75364210, 0x75210643, 0x75216430,
+      0x75206431, 0x75264310, 0x75106432, 0x75164320, 0x75064321, 0x75643210,
+      0x74321065, 0x74321650, 0x74320651, 0x74326510, 0x74310652, 0x74316520,
+      0x74306521, 0x74365210, 0x74210653, 0x74216530, 0x74206531, 0x74265310,
+      0x74106532, 0x74165320, 0x74065321, 0x74653210, 0x73210654, 0x73216540,
+      0x73206541, 0x73265410, 0x73106542, 0x73165420, 0x73065421, 0x73654210,
+      0x72106543, 0x72165430, 0x72065431, 0x72654310, 0x71065432, 0x71654320,
+      0x70654321, 0x76543210, 0x65432107, 0x65432170, 0x65432071, 0x65432710,
+      0x65431072, 0x65431720, 0x65430721, 0x65437210, 0x65421073, 0x65421730,
+      0x65420731, 0x65427310, 0x65410732, 0x65417320, 0x65407321, 0x65473210,
+      0x65321074, 0x65321740, 0x65320741, 0x65327410, 0x65310742, 0x65317420,
+      0x65307421, 0x65374210, 0x65210743, 0x65217430, 0x65207431, 0x65274310,
+      0x65107432, 0x65174320, 0x65074321, 0x65743210, 0x64321075, 0x64321750,
+      0x64320751, 0x64327510, 0x64310752, 0x64317520, 0x64307521, 0x64375210,
+      0x64210753, 0x64217530, 0x64207531, 0x64275310, 0x64107532, 0x64175320,
+      0x64075321, 0x64753210, 0x63210754, 0x63217540, 0x63207541, 0x63275410,
+      0x63107542, 0x63175420, 0x63075421, 0x63754210, 0x62107543, 0x62175430,
+      0x62075431, 0x62754310, 0x61075432, 0x61754320, 0x60754321, 0x67543210,
+      0x54321076, 0x54321760, 0x54320761, 0x54327610, 0x54310762, 0x54317620,
+      0x54307621, 0x54376210, 0x54210763, 0x54217630, 0x54207631, 0x54276310,
+      0x54107632, 0x54176320, 0x54076321, 0x54763210, 0x53210764, 0x53217640,
+      0x53207641, 0x53276410, 0x53107642, 0x53176420, 0x53076421, 0x53764210,
+      0x52107643, 0x52176430, 0x52076431, 0x52764310, 0x51076432, 0x51764320,
+      0x50764321, 0x57643210, 0x43210765, 0x43217650, 0x43207651, 0x43276510,
+      0x43107652, 0x43176520, 0x43076521, 0x43765210, 0x42107653, 0x42176530,
+      0x42076531, 0x42765310, 0x41076532, 0x41765320, 0x40765321, 0x47653210,
+      0x32107654, 0x32176540, 0x32076541, 0x32765410, 0x31076542, 0x31765420,
+      0x30765421, 0x37654210, 0x21076543, 0x21765430, 0x20765431, 0x27654310,
+      0x10765432, 0x17654320, 0x07654321, 0x76543210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 3) -
+  // _mm512_permutexvar_epi64 will ignore the upper bits.
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du64;
+  const auto packed = Set(du64, packed_array[mask.raw]);
+  alignas(64) static constexpr uint64_t shifts[8] = {0,  4,  8,  12,
+                                                     16, 20, 24, 28};
+  const auto indices = Indices512<T>{(packed >> Load(du64, shifts)).raw};
+  return TableLookupLanes(v, indices);
+}
+
+// ------------------------------ Expand
+
+template <typename T, HWY_IF_T_SIZE(T, 1)>
+HWY_API Vec512<T> Expand(Vec512<T> v, const Mask512<T> mask) {
+  const Full512<T> d;
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+#else
+  // LUTs are infeasible for 2^64 possible masks, so splice together two
+  // half-vector Expand.
+  const Full256<T> dh;
+  constexpr size_t N = Lanes(d);
+  // We have to shift the input by a variable number of u8. Shuffling requires
+  // VBMI2, in which case we would already have NativeExpand. We instead
+  // load at an offset, which may incur a store to load forwarding stall.
+  alignas(64) T lanes[N];
+  Store(v, d, lanes);
+  using Bits = typename Mask256<T>::Raw;
+  const Mask256<T> maskL{
+      static_cast<Bits>(mask.raw & Bits{(1ULL << (N / 2)) - 1})};
+  const Mask256<T> maskH{static_cast<Bits>(mask.raw >> (N / 2))};
+  const size_t countL = CountTrue(dh, maskL);
+  const Vec256<T> expandL = Expand(LowerHalf(v), maskL);
+  const Vec256<T> expandH = Expand(LoadU(dh, lanes + countL), maskH);
+  return Combine(d, expandH, expandL);
+#endif
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 2)>
+HWY_API Vec512<T> Expand(Vec512<T> v, const Mask512<T> mask) {
+  const Full512<T> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Vec512<uint16_t> vu = BitCast(du, v);
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  return BitCast(d, detail::NativeExpand(vu, RebindMask(du, mask)));
+#else   // AVX3
+  // LUTs are infeasible for 2^32 possible masks, so splice together two
+  // half-vector Expand.
+  const Full256<T> dh;
+  constexpr size_t N = Lanes(d);
+  using Bits = typename Mask256<T>::Raw;
+  const Mask256<T> maskL{
+      static_cast<Bits>(mask.raw & Bits{(1ULL << (N / 2)) - 1})};
+  const Mask256<T> maskH{static_cast<Bits>(mask.raw >> (N / 2))};
+  // In AVX3 we can permutevar, which avoids a potential store to load
+  // forwarding stall vs. reloading the input.
+  alignas(64) uint16_t iota[64] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                                   11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                   22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  const Vec512<uint16_t> indices = LoadU(du, iota + CountTrue(dh, maskL));
+  const Vec512<uint16_t> shifted{_mm512_permutexvar_epi16(indices.raw, vu.raw)};
+  const Vec256<T> expandL = Expand(LowerHalf(v), maskL);
+  const Vec256<T> expandH = Expand(LowerHalf(BitCast(d, shifted)), maskH);
+  return Combine(d, expandH, expandL);
+#endif  // AVX3
+}
+
+template <class V, class M, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 4) | (1 << 8))>
+HWY_API V Expand(V v, const M mask) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeExpand(BitCast(du, v), mu));
+}
+
+// For smaller vectors, it is likely more efficient to promote to 32-bit.
+// This works for u8x16, u16x8, u16x16 (can be promoted to u32x16), but is
+// unnecessary if HWY_AVX3_DL, which provides native instructions.
+#if HWY_TARGET > HWY_AVX3_DL  // no VBMI2
+
+template <class V, class M, HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_LE_D(DFromV<V>, 16)>
+HWY_API V Expand(V v, M mask) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  const Rebind<uint32_t, decltype(d)> du32;
+  const VFromD<decltype(du)> vu = BitCast(du, v);
+  using M32 = MFromD<decltype(du32)>;
+  const M32 m32{static_cast<typename M32::Raw>(mask.raw)};
+  return BitCast(d, TruncateTo(du, Expand(PromoteTo(du32, vu), m32)));
+}
+
+#endif  // HWY_TARGET > HWY_AVX3_DL
+
+// ------------------------------ LoadExpand
+
+template <class D, HWY_IF_V_SIZE_D(D, 64),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+#else
+  return Expand(LoadU(d, unaligned), mask);
+#endif
+}
+
+template <class D, HWY_IF_V_SIZE_D(D, 64),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API VFromD<D> LoadExpand(MFromD<D> mask, D d,
+                             const TFromD<D>* HWY_RESTRICT unaligned) {
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  const TU* HWY_RESTRICT pu = reinterpret_cast<const TU*>(unaligned);
+  const MFromD<decltype(du)> mu = RebindMask(du, mask);
+  return BitCast(d, detail::NativeLoadExpand(mu, du, pu));
+}
+
+// ------------------------------ CompressNot
+
+template <class V, class M, HWY_IF_NOT_T_SIZE_V(V, 8)>
+HWY_API V CompressNot(V v, const M mask) {
+  return Compress(v, Not(mask));
+}
+
+template <typename T, HWY_IF_T_SIZE(T, 8)>
+HWY_API Vec512<T> CompressNot(Vec512<T> v, Mask512<T> mask) {
+  // See CompressIsPartition. u64 is faster than u32.
+  alignas(16) static constexpr uint64_t packed_array[256] = {
+      // From PrintCompressNot32x8Tables, without the FirstN extension (there is
+      // no benefit to including them because 64-bit CompressStore is anyway
+      // masked, but also no harm because TableLookupLanes ignores the MSB).
+      0x76543210, 0x07654321, 0x17654320, 0x10765432, 0x27654310, 0x20765431,
+      0x21765430, 0x21076543, 0x37654210, 0x30765421, 0x31765420, 0x31076542,
+      0x32765410, 0x32076541, 0x32176540, 0x32107654, 0x47653210, 0x40765321,
+      0x41765320, 0x41076532, 0x42765310, 0x42076531, 0x42176530, 0x42107653,
+      0x43765210, 0x43076521, 0x43176520, 0x43107652, 0x43276510, 0x43207651,
+      0x43217650, 0x43210765, 0x57643210, 0x50764321, 0x51764320, 0x51076432,
+      0x52764310, 0x52076431, 0x52176430, 0x52107643, 0x53764210, 0x53076421,
+      0x53176420, 0x53107642, 0x53276410, 0x53207641, 0x53217640, 0x53210764,
+      0x54763210, 0x54076321, 0x54176320, 0x54107632, 0x54276310, 0x54207631,
+      0x54217630, 0x54210763, 0x54376210, 0x54307621, 0x54317620, 0x54310762,
+      0x54327610, 0x54320761, 0x54321760, 0x54321076, 0x67543210, 0x60754321,
+      0x61754320, 0x61075432, 0x62754310, 0x62075431, 0x62175430, 0x62107543,
+      0x63754210, 0x63075421, 0x63175420, 0x63107542, 0x63275410, 0x63207541,
+      0x63217540, 0x63210754, 0x64753210, 0x64075321, 0x64175320, 0x64107532,
+      0x64275310, 0x64207531, 0x64217530, 0x64210753, 0x64375210, 0x64307521,
+      0x64317520, 0x64310752, 0x64327510, 0x64320751, 0x64321750, 0x64321075,
+      0x65743210, 0x65074321, 0x65174320, 0x65107432, 0x65274310, 0x65207431,
+      0x65217430, 0x65210743, 0x65374210, 0x65307421, 0x65317420, 0x65310742,
+      0x65327410, 0x65320741, 0x65321740, 0x65321074, 0x65473210, 0x65407321,
+      0x65417320, 0x65410732, 0x65427310, 0x65420731, 0x65421730, 0x65421073,
+      0x65437210, 0x65430721, 0x65431720, 0x65431072, 0x65432710, 0x65432071,
+      0x65432170, 0x65432107, 0x76543210, 0x70654321, 0x71654320, 0x71065432,
+      0x72654310, 0x72065431, 0x72165430, 0x72106543, 0x73654210, 0x73065421,
+      0x73165420, 0x73106542, 0x73265410, 0x73206541, 0x73216540, 0x73210654,
+      0x74653210, 0x74065321, 0x74165320, 0x74106532, 0x74265310, 0x74206531,
+      0x74216530, 0x74210653, 0x74365210, 0x74306521, 0x74316520, 0x74310652,
+      0x74326510, 0x74320651, 0x74321650, 0x74321065, 0x75643210, 0x75064321,
+      0x75164320, 0x75106432, 0x75264310, 0x75206431, 0x75216430, 0x75210643,
+      0x75364210, 0x75306421, 0x75316420, 0x75310642, 0x75326410, 0x75320641,
+      0x75321640, 0x75321064, 0x75463210, 0x75406321, 0x75416320, 0x75410632,
+      0x75426310, 0x75420631, 0x75421630, 0x75421063, 0x75436210, 0x75430621,
+      0x75431620, 0x75431062, 0x75432610, 0x75432061, 0x75432160, 0x75432106,
+      0x76543210, 0x76054321, 0x76154320, 0x76105432, 0x76254310, 0x76205431,
+      0x76215430, 0x76210543, 0x76354210, 0x76305421, 0x76315420, 0x76310542,
+      0x76325410, 0x76320541, 0x76321540, 0x76321054, 0x76453210, 0x76405321,
+      0x76415320, 0x76410532, 0x76425310, 0x76420531, 0x76421530, 0x76421053,
+      0x76435210, 0x76430521, 0x76431520, 0x76431052, 0x76432510, 0x76432051,
+      0x76432150, 0x76432105, 0x76543210, 0x76504321, 0x76514320, 0x76510432,
+      0x76524310, 0x76520431, 0x76521430, 0x76521043, 0x76534210, 0x76530421,
+      0x76531420, 0x76531042, 0x76532410, 0x76532041, 0x76532140, 0x76532104,
+      0x76543210, 0x76540321, 0x76541320, 0x76541032, 0x76542310, 0x76542031,
+      0x76542130, 0x76542103, 0x76543210, 0x76543021, 0x76543120, 0x76543102,
+      0x76543210, 0x76543201, 0x76543210, 0x76543210};
+
+  // For lane i, shift the i-th 4-bit index down to bits [0, 3) -
+  // _mm512_permutexvar_epi64 will ignore the upper bits.
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du64;
+  const auto packed = Set(du64, packed_array[mask.raw]);
+  alignas(64) static constexpr uint64_t shifts[8] = {0,  4,  8,  12,
+                                                     16, 20, 24, 28};
+  const auto indices = Indices512<T>{(packed >> Load(du64, shifts)).raw};
+  return TableLookupLanes(v, indices);
+}
+
+// uint64_t lanes. Only implement for 256 and 512-bit vectors because this is a
+// no-op for 128-bit.
+template <class V, class M, HWY_IF_V_SIZE_GT_D(DFromV<V>, 16)>
+HWY_API V CompressBlocksNot(V v, M mask) {
+  return CompressNot(v, mask);
+}
+
+// ------------------------------ CompressBits
+template <class V>
+HWY_API V CompressBits(V v, const uint8_t* HWY_RESTRICT bits) {
+  return Compress(v, LoadMaskBits(DFromV<V>(), bits));
+}
+
+// ------------------------------ CompressStore
+
+// Generic for all vector lengths.
+
+template <class D, HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 1) | (1 << 2))>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET == HWY_AVX3_ZEN4
+  StoreU(Compress(v, mask), d, unaligned);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+  auto pu = reinterpret_cast<TFromD<decltype(du)> * HWY_RESTRICT>(unaligned);
+
+#if HWY_TARGET <= HWY_AVX3_DL  // VBMI2
+  detail::NativeCompressStore(BitCast(du, v), mu, pu);
+#else
+  detail::EmuCompressStore(BitCast(du, v), mu, du, pu);
+#endif
+#endif  // HWY_TARGET != HWY_AVX3_ZEN4
+  const size_t count = CountTrue(d, mask);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+template <class D, HWY_IF_NOT_FLOAT_D(D),
+          HWY_IF_T_SIZE_ONE_OF_D(D, (1 << 4) | (1 << 8))>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET == HWY_AVX3_ZEN4
+  StoreU(Compress(v, mask), d, unaligned);
+#else
+  const RebindToUnsigned<decltype(d)> du;
+  const auto mu = RebindMask(du, mask);
+  using TU = TFromD<decltype(du)>;
+  TU* HWY_RESTRICT pu = reinterpret_cast<TU*>(unaligned);
+  detail::NativeCompressStore(BitCast(du, v), mu, pu);
+#endif  // HWY_TARGET != HWY_AVX3_ZEN4
+  const size_t count = CountTrue(d, mask);
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+// Additional overloads to avoid casting to uint32_t (delay?).
+template <class D, HWY_IF_FLOAT3264_D(D)>
+HWY_API size_t CompressStore(VFromD<D> v, MFromD<D> mask, D d,
+                             TFromD<D>* HWY_RESTRICT unaligned) {
+#if HWY_TARGET == HWY_AVX3_ZEN4
+  StoreU(Compress(v, mask), d, unaligned);
+#else
+  (void)d;
+  detail::NativeCompressStore(v, mask, unaligned);
+#endif  // HWY_TARGET != HWY_AVX3_ZEN4
+  const size_t count = PopCount(uint64_t{mask.raw});
+  detail::MaybeUnpoison(unaligned, count);
+  return count;
+}
+
+// ------------------------------ CompressBlendedStore
+template <class D, HWY_IF_V_SIZE_GT_D(D, 8)>
+HWY_API size_t CompressBlendedStore(VFromD<D> v, MFromD<D> m, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  // Native CompressStore already does the blending at no extra cost (latency
+  // 11, rthroughput 2 - same as compress plus store).
+  if (HWY_TARGET == HWY_AVX3_DL ||
+      (HWY_TARGET != HWY_AVX3_ZEN4 && sizeof(TFromD<D>) > 2)) {
+    return CompressStore(v, m, d, unaligned);
+  } else {
+    const size_t count = CountTrue(d, m);
+    BlendedStore(Compress(v, m), FirstN(d, count), d, unaligned);
+    detail::MaybeUnpoison(unaligned, count);
+    return count;
+  }
+}
+
+// ------------------------------ CompressBitsStore
+// Generic for all vector lengths.
+template <class D>
+HWY_API size_t CompressBitsStore(VFromD<D> v, const uint8_t* HWY_RESTRICT bits,
+                                 D d, TFromD<D>* HWY_RESTRICT unaligned) {
+  return CompressStore(v, LoadMaskBits(d, bits), d, unaligned);
+}
+
+// ------------------------------ LoadInterleaved4
+
+// Actually implemented in generic_ops, we just overload LoadTransposedBlocks4.
+namespace detail {
+
+// Type-safe wrapper.
+template <_MM_PERM_ENUM kPerm, typename T>
+Vec512<T> Shuffle128(const Vec512<T> lo, const Vec512<T> hi) {
+  return Vec512<T>{_mm512_shuffle_i64x2(lo.raw, hi.raw, kPerm)};
+}
+template <_MM_PERM_ENUM kPerm>
+Vec512<float> Shuffle128(const Vec512<float> lo, const Vec512<float> hi) {
+  return Vec512<float>{_mm512_shuffle_f32x4(lo.raw, hi.raw, kPerm)};
+}
+template <_MM_PERM_ENUM kPerm>
+Vec512<double> Shuffle128(const Vec512<double> lo, const Vec512<double> hi) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, kPerm)};
+}
+
+// Input (128-bit blocks):
+// 3 2 1 0 (<- first block in unaligned)
+// 7 6 5 4
+// b a 9 8
+// Output:
+// 9 6 3 0 (LSB of A)
+// a 7 4 1
+// b 8 5 2
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API void LoadTransposedBlocks3(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                                   VFromD<D>& A, VFromD<D>& B, VFromD<D>& C) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> v3210 = LoadU(d, unaligned + 0 * N);
+  const VFromD<D> v7654 = LoadU(d, unaligned + 1 * N);
+  const VFromD<D> vba98 = LoadU(d, unaligned + 2 * N);
+
+  const VFromD<D> v5421 = detail::Shuffle128<_MM_PERM_BACB>(v3210, v7654);
+  const VFromD<D> va976 = detail::Shuffle128<_MM_PERM_CBDC>(v7654, vba98);
+
+  A = detail::Shuffle128<_MM_PERM_CADA>(v3210, va976);
+  B = detail::Shuffle128<_MM_PERM_DBCA>(v5421, va976);
+  C = detail::Shuffle128<_MM_PERM_DADB>(v5421, vba98);
+}
+
+// Input (128-bit blocks):
+// 3 2 1 0 (<- first block in unaligned)
+// 7 6 5 4
+// b a 9 8
+// f e d c
+// Output:
+// c 8 4 0 (LSB of A)
+// d 9 5 1
+// e a 6 2
+// f b 7 3
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API void LoadTransposedBlocks4(D d, const TFromD<D>* HWY_RESTRICT unaligned,
+                                   VFromD<D>& vA, VFromD<D>& vB, VFromD<D>& vC,
+                                   VFromD<D>& vD) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> v3210 = LoadU(d, unaligned + 0 * N);
+  const VFromD<D> v7654 = LoadU(d, unaligned + 1 * N);
+  const VFromD<D> vba98 = LoadU(d, unaligned + 2 * N);
+  const VFromD<D> vfedc = LoadU(d, unaligned + 3 * N);
+
+  const VFromD<D> v5410 = detail::Shuffle128<_MM_PERM_BABA>(v3210, v7654);
+  const VFromD<D> vdc98 = detail::Shuffle128<_MM_PERM_BABA>(vba98, vfedc);
+  const VFromD<D> v7632 = detail::Shuffle128<_MM_PERM_DCDC>(v3210, v7654);
+  const VFromD<D> vfeba = detail::Shuffle128<_MM_PERM_DCDC>(vba98, vfedc);
+  vA = detail::Shuffle128<_MM_PERM_CACA>(v5410, vdc98);
+  vB = detail::Shuffle128<_MM_PERM_DBDB>(v5410, vdc98);
+  vC = detail::Shuffle128<_MM_PERM_CACA>(v7632, vfeba);
+  vD = detail::Shuffle128<_MM_PERM_DBDB>(v7632, vfeba);
+}
+
+}  // namespace detail
+
+// ------------------------------ StoreInterleaved2
+
+// Implemented in generic_ops, we just overload StoreTransposedBlocks2/3/4.
+
+namespace detail {
+
+// Input (128-bit blocks):
+// 6 4 2 0 (LSB of i)
+// 7 5 3 1
+// Output:
+// 3 2 1 0
+// 7 6 5 4
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API void StoreTransposedBlocks2(const VFromD<D> i, const VFromD<D> j, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  const auto j1_j0_i1_i0 = detail::Shuffle128<_MM_PERM_BABA>(i, j);
+  const auto j3_j2_i3_i2 = detail::Shuffle128<_MM_PERM_DCDC>(i, j);
+  const auto j1_i1_j0_i0 =
+      detail::Shuffle128<_MM_PERM_DBCA>(j1_j0_i1_i0, j1_j0_i1_i0);
+  const auto j3_i3_j2_i2 =
+      detail::Shuffle128<_MM_PERM_DBCA>(j3_j2_i3_i2, j3_j2_i3_i2);
+  StoreU(j1_i1_j0_i0, d, unaligned + 0 * N);
+  StoreU(j3_i3_j2_i2, d, unaligned + 1 * N);
+}
+
+// Input (128-bit blocks):
+// 9 6 3 0 (LSB of i)
+// a 7 4 1
+// b 8 5 2
+// Output:
+// 3 2 1 0
+// 7 6 5 4
+// b a 9 8
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API void StoreTransposedBlocks3(const VFromD<D> i, const VFromD<D> j,
+                                    const VFromD<D> k, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> j2_j0_i2_i0 = detail::Shuffle128<_MM_PERM_CACA>(i, j);
+  const VFromD<D> i3_i1_k2_k0 = detail::Shuffle128<_MM_PERM_DBCA>(k, i);
+  const VFromD<D> j3_j1_k3_k1 = detail::Shuffle128<_MM_PERM_DBDB>(k, j);
+
+  const VFromD<D> out0 =  // i1 k0 j0 i0
+      detail::Shuffle128<_MM_PERM_CACA>(j2_j0_i2_i0, i3_i1_k2_k0);
+  const VFromD<D> out1 =  // j2 i2 k1 j1
+      detail::Shuffle128<_MM_PERM_DBAC>(j3_j1_k3_k1, j2_j0_i2_i0);
+  const VFromD<D> out2 =  // k3 j3 i3 k2
+      detail::Shuffle128<_MM_PERM_BDDB>(i3_i1_k2_k0, j3_j1_k3_k1);
+
+  StoreU(out0, d, unaligned + 0 * N);
+  StoreU(out1, d, unaligned + 1 * N);
+  StoreU(out2, d, unaligned + 2 * N);
+}
+
+// Input (128-bit blocks):
+// c 8 4 0 (LSB of i)
+// d 9 5 1
+// e a 6 2
+// f b 7 3
+// Output:
+// 3 2 1 0
+// 7 6 5 4
+// b a 9 8
+// f e d c
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API void StoreTransposedBlocks4(const VFromD<D> i, const VFromD<D> j,
+                                    const VFromD<D> k, const VFromD<D> l, D d,
+                                    TFromD<D>* HWY_RESTRICT unaligned) {
+  constexpr size_t N = Lanes(d);
+  const VFromD<D> j1_j0_i1_i0 = detail::Shuffle128<_MM_PERM_BABA>(i, j);
+  const VFromD<D> l1_l0_k1_k0 = detail::Shuffle128<_MM_PERM_BABA>(k, l);
+  const VFromD<D> j3_j2_i3_i2 = detail::Shuffle128<_MM_PERM_DCDC>(i, j);
+  const VFromD<D> l3_l2_k3_k2 = detail::Shuffle128<_MM_PERM_DCDC>(k, l);
+  const VFromD<D> out0 =
+      detail::Shuffle128<_MM_PERM_CACA>(j1_j0_i1_i0, l1_l0_k1_k0);
+  const VFromD<D> out1 =
+      detail::Shuffle128<_MM_PERM_DBDB>(j1_j0_i1_i0, l1_l0_k1_k0);
+  const VFromD<D> out2 =
+      detail::Shuffle128<_MM_PERM_CACA>(j3_j2_i3_i2, l3_l2_k3_k2);
+  const VFromD<D> out3 =
+      detail::Shuffle128<_MM_PERM_DBDB>(j3_j2_i3_i2, l3_l2_k3_k2);
+  StoreU(out0, d, unaligned + 0 * N);
+  StoreU(out1, d, unaligned + 1 * N);
+  StoreU(out2, d, unaligned + 2 * N);
+  StoreU(out3, d, unaligned + 3 * N);
+}
+
+}  // namespace detail
+
+// ------------------------------ Additional mask logical operations
+
+template <class T>
+HWY_API Mask512<T> SetAtOrAfterFirst(Mask512<T> mask) {
+  return Mask512<T>{
+      static_cast<typename Mask512<T>::Raw>(0u - detail::AVX3Blsi(mask.raw))};
+}
+template <class T>
+HWY_API Mask512<T> SetBeforeFirst(Mask512<T> mask) {
+  return Mask512<T>{
+      static_cast<typename Mask512<T>::Raw>(detail::AVX3Blsi(mask.raw) - 1u)};
+}
+template <class T>
+HWY_API Mask512<T> SetAtOrBeforeFirst(Mask512<T> mask) {
+  return Mask512<T>{
+      static_cast<typename Mask512<T>::Raw>(detail::AVX3Blsmsk(mask.raw))};
+}
+template <class T>
+HWY_API Mask512<T> SetOnlyFirst(Mask512<T> mask) {
+  return Mask512<T>{
+      static_cast<typename Mask512<T>::Raw>(detail::AVX3Blsi(mask.raw))};
+}
+
+// ------------------------------ Shl (LoadDup128)
+
+HWY_API Vec512<uint16_t> operator<<(Vec512<uint16_t> v, Vec512<uint16_t> bits) {
+  return Vec512<uint16_t>{_mm512_sllv_epi16(v.raw, bits.raw)};
+}
+
+// 8-bit: may use the << overload for uint16_t.
+HWY_API Vec512<uint8_t> operator<<(Vec512<uint8_t> v, Vec512<uint8_t> bits) {
+  const DFromV<decltype(v)> d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  // kMask[i] = 0xFF >> i
+  alignas(16) static constexpr uint8_t kMasks[16] = {
+      0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00};
+  // kShl[i] = 1 << i
+  alignas(16) static constexpr uint8_t kShl[16] = {0x01, 0x02, 0x04, 0x08,
+                                                   0x10, 0x20, 0x40, 0x80};
+  v = And(v, TableLookupBytes(LoadDup128(d, kMasks), bits));
+  const VFromD<decltype(d)> mul = TableLookupBytes(LoadDup128(d, kShl), bits);
+  return VFromD<decltype(d)>{_mm512_gf2p8mul_epi8(v.raw, mul.raw)};
+#else
+  const Repartition<uint16_t, decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW even_mask = Set(dw, 0x00FF);
+  const VW odd_mask = Set(dw, 0xFF00);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  // Shift even lanes in-place
+  const VW evens = vw << And(bits16, even_mask);
+  const VW odds = And(vw, odd_mask) << ShiftRight<8>(bits16);
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+#endif
+}
+
+HWY_API Vec512<uint32_t> operator<<(const Vec512<uint32_t> v,
+                                    const Vec512<uint32_t> bits) {
+  return Vec512<uint32_t>{_mm512_sllv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint64_t> operator<<(const Vec512<uint64_t> v,
+                                    const Vec512<uint64_t> bits) {
+  return Vec512<uint64_t>{_mm512_sllv_epi64(v.raw, bits.raw)};
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, HWY_IF_SIGNED(T)>
+HWY_API Vec512<T> operator<<(const Vec512<T> v, const Vec512<T> bits) {
+  const DFromV<decltype(v)> di;
+  const RebindToUnsigned<decltype(di)> du;
+  return BitCast(di, BitCast(du, v) << BitCast(du, bits));
+}
+
+// ------------------------------ Shr (IfVecThenElse)
+
+HWY_API Vec512<uint16_t> operator>>(const Vec512<uint16_t> v,
+                                    const Vec512<uint16_t> bits) {
+  return Vec512<uint16_t>{_mm512_srlv_epi16(v.raw, bits.raw)};
+}
+
+// 8-bit uses 16-bit shifts.
+HWY_API Vec512<uint8_t> operator>>(Vec512<uint8_t> v, Vec512<uint8_t> bits) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  using VW = VFromD<decltype(dw)>;
+  const VW mask = Set(dw, 0x00FF);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  const VW evens = And(vw, mask) >> And(bits16, mask);
+  // Shift odd lanes in-place
+  const VW odds = vw >> ShiftRight<8>(bits16);
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+}
+
+HWY_API Vec512<uint32_t> operator>>(const Vec512<uint32_t> v,
+                                    const Vec512<uint32_t> bits) {
+  return Vec512<uint32_t>{_mm512_srlv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint64_t> operator>>(const Vec512<uint64_t> v,
+                                    const Vec512<uint64_t> bits) {
+  return Vec512<uint64_t>{_mm512_srlv_epi64(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<int16_t> operator>>(const Vec512<int16_t> v,
+                                   const Vec512<int16_t> bits) {
+  return Vec512<int16_t>{_mm512_srav_epi16(v.raw, bits.raw)};
+}
+
+// 8-bit uses 16-bit shifts.
+HWY_API Vec512<int8_t> operator>>(Vec512<int8_t> v, Vec512<int8_t> bits) {
+  const DFromV<decltype(v)> d;
+  const RepartitionToWide<decltype(d)> dw;
+  const RebindToUnsigned<decltype(dw)> dw_u;
+  using VW = VFromD<decltype(dw)>;
+  const VW mask = Set(dw, 0x00FF);
+  const VW vw = BitCast(dw, v);
+  const VW bits16 = BitCast(dw, bits);
+  const VW evens = ShiftRight<8>(ShiftLeft<8>(vw)) >> And(bits16, mask);
+  // Shift odd lanes in-place
+  const VW odds = vw >> BitCast(dw, ShiftRight<8>(BitCast(dw_u, bits16)));
+  return OddEven(BitCast(d, odds), BitCast(d, evens));
+}
+
+HWY_API Vec512<int32_t> operator>>(const Vec512<int32_t> v,
+                                   const Vec512<int32_t> bits) {
+  return Vec512<int32_t>{_mm512_srav_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<int64_t> operator>>(const Vec512<int64_t> v,
+                                   const Vec512<int64_t> bits) {
+  return Vec512<int64_t>{_mm512_srav_epi64(v.raw, bits.raw)};
+}
+
+// ------------------------------ MulEven/Odd (Shuffle2301, InterleaveLower)
+
+HWY_INLINE Vec512<uint64_t> MulEven(const Vec512<uint64_t> a,
+                                    const Vec512<uint64_t> b) {
+  const DFromV<decltype(a)> du64;
+  const RepartitionToNarrow<decltype(du64)> du32;
+  const auto maskL = Set(du64, 0xFFFFFFFFULL);
+  const auto a32 = BitCast(du32, a);
+  const auto b32 = BitCast(du32, b);
+  // Inputs for MulEven: we only need the lower 32 bits
+  const auto aH = Shuffle2301(a32);
+  const auto bH = Shuffle2301(b32);
+
+  // Knuth double-word multiplication. We use 32x32 = 64 MulEven and only need
+  // the even (lower 64 bits of every 128-bit block) results. See
+  // https://github.com/hcs0/Hackers-Delight/blob/master/muldwu.c.tat
+  const auto aLbL = MulEven(a32, b32);
+  const auto w3 = aLbL & maskL;
+
+  const auto t2 = MulEven(aH, b32) + ShiftRight<32>(aLbL);
+  const auto w2 = t2 & maskL;
+  const auto w1 = ShiftRight<32>(t2);
+
+  const auto t = MulEven(a32, bH) + w2;
+  const auto k = ShiftRight<32>(t);
+
+  const auto mulH = MulEven(aH, bH) + w1 + k;
+  const auto mulL = ShiftLeft<32>(t) + w3;
+  return InterleaveLower(mulL, mulH);
+}
+
+HWY_INLINE Vec512<uint64_t> MulOdd(const Vec512<uint64_t> a,
+                                   const Vec512<uint64_t> b) {
+  const DFromV<decltype(a)> du64;
+  const RepartitionToNarrow<decltype(du64)> du32;
+  const auto maskL = Set(du64, 0xFFFFFFFFULL);
+  const auto a32 = BitCast(du32, a);
+  const auto b32 = BitCast(du32, b);
+  // Inputs for MulEven: we only need bits [95:64] (= upper half of input)
+  const auto aH = Shuffle2301(a32);
+  const auto bH = Shuffle2301(b32);
+
+  // Same as above, but we're using the odd results (upper 64 bits per block).
+  const auto aLbL = MulEven(a32, b32);
+  const auto w3 = aLbL & maskL;
+
+  const auto t2 = MulEven(aH, b32) + ShiftRight<32>(aLbL);
+  const auto w2 = t2 & maskL;
+  const auto w1 = ShiftRight<32>(t2);
+
+  const auto t = MulEven(a32, bH) + w2;
+  const auto k = ShiftRight<32>(t);
+
+  const auto mulH = MulEven(aH, bH) + w1 + k;
+  const auto mulL = ShiftLeft<32>(t) + w3;
+  return InterleaveUpper(du64, mulL, mulH);
+}
+
+// ------------------------------ WidenMulPairwiseAdd
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> WidenMulPairwiseAdd(D /*d32*/, Vec512<int16_t> a,
+                                      Vec512<int16_t> b) {
+  return VFromD<D>{_mm512_madd_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ SatWidenMulPairwiseAdd
+
+template <class DI16, HWY_IF_V_SIZE_D(DI16, 64), HWY_IF_I16_D(DI16)>
+HWY_API VFromD<DI16> SatWidenMulPairwiseAdd(
+    DI16 /* tag */, VFromD<Repartition<uint8_t, DI16>> a,
+    VFromD<Repartition<int8_t, DI16>> b) {
+  return VFromD<DI16>{_mm512_maddubs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ ReorderWidenMulAccumulate
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> ReorderWidenMulAccumulate(D d, Vec512<int16_t> a,
+                                            Vec512<int16_t> b,
+                                            const VFromD<D> sum0,
+                                            VFromD<D>& /*sum1*/) {
+  (void)d;
+#if HWY_TARGET <= HWY_AVX3_DL
+  return VFromD<D>{_mm512_dpwssd_epi32(sum0.raw, a.raw, b.raw)};
+#else
+  return sum0 + WidenMulPairwiseAdd(d, a, b);
+#endif
+}
+
+HWY_API Vec512<int32_t> RearrangeToOddPlusEven(const Vec512<int32_t> sum0,
+                                               Vec512<int32_t> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+HWY_API Vec512<uint32_t> RearrangeToOddPlusEven(const Vec512<uint32_t> sum0,
+                                                Vec512<uint32_t> /*sum1*/) {
+  return sum0;  // invariant already holds
+}
+
+// ------------------------------ SumOfMulQuadAccumulate
+
+#if HWY_TARGET <= HWY_AVX3_DL
+
+template <class DI32, HWY_IF_V_SIZE_D(DI32, 64)>
+HWY_API VFromD<DI32> SumOfMulQuadAccumulate(
+    DI32 /*di32*/, VFromD<Repartition<uint8_t, DI32>> a_u,
+    VFromD<Repartition<int8_t, DI32>> b_i, VFromD<DI32> sum) {
+  return VFromD<DI32>{_mm512_dpbusd_epi32(sum.raw, a_u.raw, b_i.raw)};
+}
+
+#endif
+
+// ------------------------------ Reductions
+
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return _mm512_reduce_add_epi32(v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return _mm512_reduce_add_epi64(v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return static_cast<uint32_t>(_mm512_reduce_add_epi32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return static_cast<uint64_t>(_mm512_reduce_add_epi64(v.raw));
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return _mm512_reduce_add_ph(v.raw);
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return _mm512_reduce_add_ps(v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API TFromD<D> ReduceSum(D, VFromD<D> v) {
+  return _mm512_reduce_add_pd(v.raw);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API TFromD<D> ReduceSum(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto sum = ReduceSum(d32, even + odd);
+  return static_cast<uint16_t>(sum);
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API TFromD<D> ReduceSum(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  // Sign-extend
+  const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto sum = ReduceSum(d32, even + odd);
+  return static_cast<int16_t>(sum);
+}
+
+// Returns the sum in each lane.
+template <class D, HWY_IF_V_SIZE_D(D, 64)>
+HWY_API VFromD<D> SumOfLanes(D d, VFromD<D> v) {
+  return Set(d, ReduceSum(d, v));
+}
+
+// Returns the minimum in each lane.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_epi32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_epi64(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_epu32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_epu64(v.raw));
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_ph(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_ps(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_min_pd(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto min = MinOfLanes(d32, Min(even, odd));
+  // Also broadcast into odd lanes.
+  return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> MinOfLanes(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  // Sign-extend
+  const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto min = MinOfLanes(d32, Min(even, odd));
+  // Also broadcast into odd lanes.
+  return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
+}
+
+// Returns the maximum in each lane.
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I32_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_epi32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I64_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_epi64(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U32_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_epu32(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U64_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_epu64(v.raw));
+}
+#if HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F16_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_ph(v.raw));
+}
+#endif  // HWY_HAVE_FLOAT16
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F32_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_ps(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_F64_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  return Set(d, _mm512_reduce_max_pd(v.raw));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_U16_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto min = MaxOfLanes(d32, Max(even, odd));
+  // Also broadcast into odd lanes.
+  return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
+}
+template <class D, HWY_IF_V_SIZE_D(D, 64), HWY_IF_I16_D(D)>
+HWY_API VFromD<D> MaxOfLanes(D d, VFromD<D> v) {
+  const RepartitionToWide<decltype(d)> d32;
+  // Sign-extend
+  const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
+  const auto odd = ShiftRight<16>(BitCast(d32, v));
+  const auto min = MaxOfLanes(d32, Max(even, odd));
+  // Also broadcast into odd lanes.
+  return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
+}
+
+// -------------------- LeadingZeroCount, TrailingZeroCount, HighestSetBitIndex
+
+template <class V, HWY_IF_UI32(TFromV<V>), HWY_IF_V_SIZE_D(DFromV<V>, 64)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm512_lzcnt_epi32(v.raw)};
+}
+
+template <class V, HWY_IF_UI64(TFromV<V>), HWY_IF_V_SIZE_D(DFromV<V>, 64)>
+HWY_API V LeadingZeroCount(V v) {
+  return V{_mm512_lzcnt_epi64(v.raw)};
+}
+
+namespace detail {
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_LE_D(DFromV<V>, 16)>
+HWY_INLINE V Lzcnt32ForU8OrU16(V v) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int32_t, decltype(d)> di32;
+  const Rebind<uint32_t, decltype(d)> du32;
+
+  const auto v_lz_count = LeadingZeroCount(PromoteTo(du32, v));
+  return DemoteTo(d, BitCast(di32, v_lz_count));
+}
+
+template <class V, HWY_IF_UNSIGNED_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2)),
+          HWY_IF_LANES_D(DFromV<V>, 32)>
+HWY_INLINE VFromD<Rebind<uint16_t, DFromV<V>>> Lzcnt32ForU8OrU16AsU16(V v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int32_t, decltype(dh)> di32;
+  const Rebind<uint32_t, decltype(dh)> du32;
+  const Rebind<uint16_t, decltype(d)> du16;
+
+  const auto lo_v_lz_count =
+      LeadingZeroCount(PromoteTo(du32, LowerHalf(dh, v)));
+  const auto hi_v_lz_count =
+      LeadingZeroCount(PromoteTo(du32, UpperHalf(dh, v)));
+  return OrderedDemote2To(du16, BitCast(di32, lo_v_lz_count),
+                          BitCast(di32, hi_v_lz_count));
+}
+
+HWY_INLINE Vec256<uint8_t> Lzcnt32ForU8OrU16(Vec256<uint8_t> v) {
+  const DFromV<decltype(v)> d;
+  const Rebind<int16_t, decltype(d)> di16;
+  return DemoteTo(d, BitCast(di16, Lzcnt32ForU8OrU16AsU16(v)));
+}
+
+HWY_INLINE Vec512<uint8_t> Lzcnt32ForU8OrU16(Vec512<uint8_t> v) {
+  const DFromV<decltype(v)> d;
+  const Half<decltype(d)> dh;
+  const Rebind<int16_t, decltype(dh)> di16;
+
+  const auto lo_half = LowerHalf(dh, v);
+  const auto hi_half = UpperHalf(dh, v);
+
+  const auto lo_v_lz_count = BitCast(di16, Lzcnt32ForU8OrU16AsU16(lo_half));
+  const auto hi_v_lz_count = BitCast(di16, Lzcnt32ForU8OrU16AsU16(hi_half));
+  return OrderedDemote2To(d, lo_v_lz_count, hi_v_lz_count);
+}
+
+HWY_INLINE Vec512<uint16_t> Lzcnt32ForU8OrU16(Vec512<uint16_t> v) {
+  return Lzcnt32ForU8OrU16AsU16(v);
+}
+
+}  // namespace detail
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API V LeadingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+
+  constexpr TU kNumOfBitsInT{sizeof(TU) * 8};
+  const auto v_lzcnt32 = detail::Lzcnt32ForU8OrU16(BitCast(du, v));
+  return BitCast(d, Min(v_lzcnt32 - Set(du, TU{32 - kNumOfBitsInT}),
+                        Set(du, TU{kNumOfBitsInT})));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 1) | (1 << 2))>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToUnsigned<decltype(d)> du;
+  using TU = TFromD<decltype(du)>;
+  return BitCast(d,
+                 Set(du, TU{31}) - detail::Lzcnt32ForU8OrU16(BitCast(du, v)));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V),
+          HWY_IF_T_SIZE_ONE_OF_V(V, (1 << 4) | (1 << 8))>
+HWY_API V HighestSetBitIndex(V v) {
+  const DFromV<decltype(v)> d;
+  using T = TFromD<decltype(d)>;
+  return BitCast(d, Set(d, T{sizeof(T) * 8 - 1}) - LeadingZeroCount(v));
+}
+
+template <class V, HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V)>
+HWY_API V TrailingZeroCount(V v) {
+  const DFromV<decltype(v)> d;
+  const RebindToSigned<decltype(d)> di;
+  using T = TFromD<decltype(d)>;
+
+  const auto vi = BitCast(di, v);
+  const auto lowest_bit = BitCast(d, And(vi, Neg(vi)));
+  constexpr T kNumOfBitsInT{sizeof(T) * 8};
+  const auto bit_idx = HighestSetBitIndex(lowest_bit);
+  return IfThenElse(MaskFromVec(bit_idx), Set(d, kNumOfBitsInT), bit_idx);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+// Note that the GCC warnings are not suppressed if we only wrap the *intrin.h -
+// the warning seems to be issued at the call site of intrinsics, i.e. our code.
+HWY_DIAGNOSTICS(pop)
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.cc
new file mode 100644
index 0000000000..63e696163b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.cc
@@ -0,0 +1,56 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/per_target.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/per_target.cc"
+#include "hwy/foreach_target.h"  // IWYU pragma: keep
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+size_t GetVectorBytes() { return Lanes(ScalableTag<uint8_t>()); }
+bool GetHaveFloat16() { return HWY_HAVE_FLOAT16 != 0; }
+bool GetHaveFloat64() { return HWY_HAVE_FLOAT64 != 0; }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(GetVectorBytes);
+HWY_EXPORT(GetHaveFloat16);
+HWY_EXPORT(GetHaveFloat64);
+}  // namespace
+
+HWY_DLLEXPORT size_t VectorBytes() {
+  return HWY_DYNAMIC_DISPATCH(GetVectorBytes)();
+}
+
+HWY_DLLEXPORT bool HaveFloat16() {
+  return HWY_DYNAMIC_DISPATCH(GetHaveFloat16)();
+}
+
+HWY_DLLEXPORT bool HaveFloat64() {
+  return HWY_DYNAMIC_DISPATCH(GetHaveFloat64)();
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.h b/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.h
new file mode 100644
index 0000000000..52c316ec58
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/per_target.h
@@ -0,0 +1,44 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_PER_TARGET_H_
+#define HIGHWAY_HWY_PER_TARGET_H_
+
+#include <stddef.h>
+
+#include "hwy/highway_export.h"
+
+// Functions to query the capabilities of the target that will be called by
+// HWY_DYNAMIC_DISPATCH, which is not necessarily the current target.
+
+namespace hwy {
+
+// Returns size in bytes of a vector, i.e. `Lanes(ScalableTag<uint8_t>())`.
+//
+// Do not cache the result, which may change after calling DisableTargets, or
+// if software requests a different vector size (e.g. when entering/exiting SME
+// streaming mode). Instead call this right before the code that depends on the
+// result, without any DisableTargets or SME transition in-between. Note that
+// this involves an indirect call, so prefer not to call this frequently nor
+// unnecessarily.
+HWY_DLLEXPORT size_t VectorBytes();
+
+// Returns whether 16/64-bit floats are a supported lane type.
+HWY_DLLEXPORT bool HaveFloat16();
+HWY_DLLEXPORT bool HaveFloat64();
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_PER_TARGET_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/print-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/print-inl.h
new file mode 100644
index 0000000000..46881a29da
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/print-inl.h
@@ -0,0 +1,62 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Print() function
+
+#include "hwy/highway.h"
+#include "hwy/print.h"
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_PRINT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_PRINT_INL_H_
+#undef HIGHWAY_HWY_PRINT_INL_H_
+#else
+#define HIGHWAY_HWY_PRINT_INL_H_
+#endif
+
+#if HWY_TARGET == HWY_RVV
+#include "hwy/aligned_allocator.h"
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Prints lanes around `lane`, in memory order.
+template <class D, class V = VFromD<D>>
+HWY_API void Print(const D d, const char* caption, V v, size_t lane_u = 0,
+                   size_t max_lanes = 7) {
+  const size_t N = Lanes(d);
+  using T = TFromD<D>;
+#if HWY_TARGET == HWY_RVV
+  auto storage = AllocateAligned<T>(N);
+  T* HWY_RESTRICT lanes = storage.get();
+#else
+  // This works around an SVE compile error on GCC 11 and 12. Calling
+  // AllocateAligned here would seem to require it be marked with HWY_ATTR.
+  HWY_ALIGN T lanes[MaxLanes(d)];
+#endif
+  Store(v, d, lanes);
+
+  const auto info = hwy::detail::MakeTypeInfo<T>();
+  hwy::detail::PrintArray(info, caption, lanes, N, lane_u, max_lanes);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // per-target include guard
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/print.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/print.cc
new file mode 100644
index 0000000000..ac9d3f873d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/print.cc
@@ -0,0 +1,112 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/print.h"
+
+#include <stdio.h>
+
+#include "hwy/base.h"
+#include "hwy/detect_compiler_arch.h"
+
+namespace hwy {
+namespace detail {
+
+HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100) {
+  const char prefix = info.is_float ? 'f' : (info.is_signed ? 'i' : 'u');
+  // Omit the xN suffix for scalars.
+  if (N == 1) {
+    // NOLINTNEXTLINE
+    snprintf(string100, 64, "%c%d", prefix,
+             static_cast<int>(info.sizeof_t * 8));
+  } else {
+    // NOLINTNEXTLINE
+    snprintf(string100, 64, "%c%dx%d", prefix,
+             static_cast<int>(info.sizeof_t * 8), static_cast<int>(N));
+  }
+}
+
+HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
+                            char* string100) {
+  if (info.sizeof_t == 1) {
+    uint8_t byte;
+    CopyBytes<1>(ptr, &byte);  // endian-safe: we ensured sizeof(T)=1.
+    snprintf(string100, 100, "0x%02X", byte);  // NOLINT
+  } else if (info.sizeof_t == 2) {
+    if (info.is_bf16) {
+      const double value = static_cast<double>(F32FromBF16Mem(ptr));
+      snprintf(string100, 100, "%.3f", value);  // NOLINT
+    } else if (info.is_float) {
+      const double value = static_cast<double>(F32FromF16Mem(ptr));
+      snprintf(string100, 100, "%.4f", value);  // NOLINT
+    } else {
+      uint16_t bits;
+      CopyBytes<2>(ptr, &bits);
+      snprintf(string100, 100, "0x%04X", bits);  // NOLINT
+    }
+  } else if (info.sizeof_t == 4) {
+    if (info.is_float) {
+      float value;
+      CopyBytes<4>(ptr, &value);
+      snprintf(string100, 100, "%.9f", static_cast<double>(value));  // NOLINT
+    } else if (info.is_signed) {
+      int32_t value;
+      CopyBytes<4>(ptr, &value);
+      snprintf(string100, 100, "%d", value);  // NOLINT
+    } else {
+      uint32_t value;
+      CopyBytes<4>(ptr, &value);
+      snprintf(string100, 100, "%u", value);  // NOLINT
+    }
+  } else {
+    HWY_ASSERT(info.sizeof_t == 8);
+    if (info.is_float) {
+      double value;
+      CopyBytes<8>(ptr, &value);
+      snprintf(string100, 100, "%.18f", value);  // NOLINT
+    } else {
+      const uint8_t* ptr8 = reinterpret_cast<const uint8_t*>(ptr);
+      uint32_t lo, hi;
+      CopyBytes<4>(ptr8 + (HWY_IS_LITTLE_ENDIAN ? 0 : 4), &lo);
+      CopyBytes<4>(ptr8 + (HWY_IS_LITTLE_ENDIAN ? 4 : 0), &hi);
+      snprintf(string100, 100, "0x%08x%08x", hi, lo);  // NOLINT
+    }
+  }
+}
+
+HWY_DLLEXPORT void PrintArray(const TypeInfo& info, const char* caption,
+                              const void* array_void, size_t N, size_t lane_u,
+                              size_t max_lanes) {
+  const uint8_t* array_bytes = reinterpret_cast<const uint8_t*>(array_void);
+
+  char type_name[100];
+  TypeName(info, N, type_name);
+
+  const intptr_t lane = intptr_t(lane_u);
+  const size_t begin = static_cast<size_t>(HWY_MAX(0, lane - 2));
+  const size_t end = HWY_MIN(begin + max_lanes, N);
+  fprintf(stderr, "%s %s [%d+ ->]:\n  ", type_name, caption,
+          static_cast<int>(begin));
+  for (size_t i = begin; i < end; ++i) {
+    const void* ptr = array_bytes + i * info.sizeof_t;
+    char str[100];
+    ToString(info, ptr, str);
+    fprintf(stderr, "%s,", str);
+  }
+  if (begin >= end) fprintf(stderr, "(out of bounds)");
+  fprintf(stderr, "\n");
+}
+
+}  // namespace detail
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/print.h b/third-party/libjxl/libjxl/third_party/highway/hwy/print.h
new file mode 100644
index 0000000000..e61631e650
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/print.h
@@ -0,0 +1,75 @@
+// Copyright 2022 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HWY_PRINT_H_
+#define HWY_PRINT_H_
+
+// Helpers for printing vector lanes.
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include "hwy/base.h"
+#include "hwy/highway_export.h"
+
+namespace hwy {
+
+namespace detail {
+
+// For implementing value comparisons etc. as type-erased functions to reduce
+// template bloat.
+struct TypeInfo {
+  size_t sizeof_t;
+  bool is_float;
+  bool is_signed;
+  bool is_bf16;
+};
+
+template <typename T>
+HWY_INLINE TypeInfo MakeTypeInfo() {
+  TypeInfo info;
+  info.sizeof_t = sizeof(T);
+  info.is_float = IsFloat<T>();
+  info.is_signed = IsSigned<T>();
+  info.is_bf16 = IsSame<T, bfloat16_t>();
+  return info;
+}
+
+HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100);
+HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
+                            char* string100);
+
+HWY_DLLEXPORT void PrintArray(const TypeInfo& info, const char* caption,
+                              const void* array_void, size_t N,
+                              size_t lane_u = 0, size_t max_lanes = 7);
+
+}  // namespace detail
+
+template <typename T>
+HWY_NOINLINE void PrintValue(T value) {
+  char str[100];
+  detail::ToString(hwy::detail::MakeTypeInfo<T>(), &value, str);
+  fprintf(stderr, "%s,", str);
+}
+
+template <typename T>
+HWY_NOINLINE void PrintArray(const T* value, size_t count) {
+  detail::PrintArray(hwy::detail::MakeTypeInfo<T>(), "", value, count, 0,
+                     count);
+}
+
+}  // namespace hwy
+
+#endif  // HWY_PRINT_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/robust_statistics.h b/third-party/libjxl/libjxl/third_party/highway/hwy/robust_statistics.h
new file mode 100644
index 0000000000..1cf3e5d2f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/robust_statistics.h
@@ -0,0 +1,148 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_ROBUST_STATISTICS_H_
+#define HIGHWAY_HWY_ROBUST_STATISTICS_H_
+
+#include <algorithm>  // std::sort, std::find_if
+#include <limits>
+#include <utility>  // std::pair
+#include <vector>
+
+#include "hwy/base.h"
+
+namespace hwy {
+namespace robust_statistics {
+
+// Sorts integral values in ascending order (e.g. for Mode). About 3x faster
+// than std::sort for input distributions with very few unique values.
+template <class T>
+void CountingSort(T* values, size_t num_values) {
+  // Unique values and their frequency (similar to flat_map).
+  using Unique = std::pair<T, int>;
+  std::vector<Unique> unique;
+  for (size_t i = 0; i < num_values; ++i) {
+    const T value = values[i];
+    const auto pos =
+        std::find_if(unique.begin(), unique.end(),
+                     [value](const Unique u) { return u.first == value; });
+    if (pos == unique.end()) {
+      unique.push_back(std::make_pair(value, 1));
+    } else {
+      ++pos->second;
+    }
+  }
+
+  // Sort in ascending order of value (pair.first).
+  std::sort(unique.begin(), unique.end());
+
+  // Write that many copies of each unique value to the array.
+  T* HWY_RESTRICT p = values;
+  for (const auto& value_count : unique) {
+    std::fill(p, p + value_count.second, value_count.first);
+    p += value_count.second;
+  }
+  HWY_ASSERT(p == values + num_values);
+}
+
+// @return i in [idx_begin, idx_begin + half_count) that minimizes
+// sorted[i + half_count] - sorted[i].
+template <typename T>
+size_t MinRange(const T* const HWY_RESTRICT sorted, const size_t idx_begin,
+                const size_t half_count) {
+  T min_range = std::numeric_limits<T>::max();
+  size_t min_idx = 0;
+
+  for (size_t idx = idx_begin; idx < idx_begin + half_count; ++idx) {
+    HWY_ASSERT(sorted[idx] <= sorted[idx + half_count]);
+    const T range = sorted[idx + half_count] - sorted[idx];
+    if (range < min_range) {
+      min_range = range;
+      min_idx = idx;
+    }
+  }
+
+  return min_idx;
+}
+
+// Returns an estimate of the mode by calling MinRange on successively
+// halved intervals. "sorted" must be in ascending order. This is the
+// Half Sample Mode estimator proposed by Bickel in "On a fast, robust
+// estimator of the mode", with complexity O(N log N). The mode is less
+// affected by outliers in highly-skewed distributions than the median.
+// The averaging operation below assumes "T" is an unsigned integer type.
+template <typename T>
+T ModeOfSorted(const T* const HWY_RESTRICT sorted, const size_t num_values) {
+  size_t idx_begin = 0;
+  size_t half_count = num_values / 2;
+  while (half_count > 1) {
+    idx_begin = MinRange(sorted, idx_begin, half_count);
+    half_count >>= 1;
+  }
+
+  const T x = sorted[idx_begin + 0];
+  if (half_count == 0) {
+    return x;
+  }
+  HWY_ASSERT(half_count == 1);
+  const T average = (x + sorted[idx_begin + 1] + 1) / 2;
+  return average;
+}
+
+// Returns the mode. Side effect: sorts "values".
+template <typename T>
+T Mode(T* values, const size_t num_values) {
+  CountingSort(values, num_values);
+  return ModeOfSorted(values, num_values);
+}
+
+template <typename T, size_t N>
+T Mode(T (&values)[N]) {
+  return Mode(&values[0], N);
+}
+
+// Returns the median value. Side effect: sorts "values".
+template <typename T>
+T Median(T* values, const size_t num_values) {
+  HWY_ASSERT(num_values != 0);
+  std::sort(values, values + num_values);
+  const size_t half = num_values / 2;
+  // Odd count: return middle
+  if (num_values % 2) {
+    return values[half];
+  }
+  // Even count: return average of middle two.
+  return (values[half] + values[half - 1] + 1) / 2;
+}
+
+// Returns a robust measure of variability.
+template <typename T>
+T MedianAbsoluteDeviation(const T* values, const size_t num_values,
+                          const T median) {
+  HWY_ASSERT(num_values != 0);
+  std::vector<T> abs_deviations;
+  abs_deviations.reserve(num_values);
+  for (size_t i = 0; i < num_values; ++i) {
+    const int64_t abs = std::abs(static_cast<int64_t>(values[i]) -
+                                 static_cast<int64_t>(median));
+    abs_deviations.push_back(static_cast<T>(abs));
+  }
+  return Median(abs_deviations.data(), num_values);
+}
+
+}  // namespace robust_statistics
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_ROBUST_STATISTICS_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/targets.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/targets.cc
new file mode 100644
index 0000000000..8cf2005fca
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/targets.cc
@@ -0,0 +1,567 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/targets.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>  // abort / exit
+
+#include "hwy/highway.h"
+#include "hwy/per_target.h"  // VectorBytes
+
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif
+
+#if HWY_ARCH_X86
+#include <xmmintrin.h>
+#if HWY_COMPILER_MSVC
+#include <intrin.h>
+#else  // !HWY_COMPILER_MSVC
+#include <cpuid.h>
+#endif  // HWY_COMPILER_MSVC
+
+#elif (HWY_ARCH_ARM || HWY_ARCH_PPC) && HWY_OS_LINUX
+// sys/auxv.h does not always include asm/hwcap.h, or define HWCAP*, hence we
+// still include this directly. See #1199.
+#ifndef TOOLCHAIN_MISS_ASM_HWCAP_H
+#include <asm/hwcap.h>
+#endif
+#ifndef TOOLCHAIN_MISS_SYS_AUXV_H
+#include <sys/auxv.h>
+#endif
+
+#endif  // HWY_ARCH_*
+
+namespace hwy {
+namespace {
+
+// When running tests, this value can be set to the mocked supported targets
+// mask. Only written to from a single thread before the test starts.
+int64_t supported_targets_for_test_ = 0;
+
+// Mask of targets disabled at runtime with DisableTargets.
+int64_t supported_mask_ = LimitsMax<int64_t>();
+
+#if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
+namespace x86 {
+
+// Calls CPUID instruction with eax=level and ecx=count and returns the result
+// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
+HWY_INLINE void Cpuid(const uint32_t level, const uint32_t count,
+                      uint32_t* HWY_RESTRICT abcd) {
+#if HWY_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else   // HWY_COMPILER_MSVC
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif  // HWY_COMPILER_MSVC
+}
+
+HWY_INLINE bool IsBitSet(const uint32_t reg, const int index) {
+  return (reg & (1U << index)) != 0;
+}
+
+// Returns the lower 32 bits of extended control register 0.
+// Requires CPU support for "OSXSAVE" (see below).
+uint32_t ReadXCR0() {
+#if HWY_COMPILER_MSVC
+  return static_cast<uint32_t>(_xgetbv(0));
+#else   // HWY_COMPILER_MSVC
+  uint32_t xcr0, xcr0_high;
+  const uint32_t index = 0;
+  asm volatile(".byte 0x0F, 0x01, 0xD0"
+               : "=a"(xcr0), "=d"(xcr0_high)
+               : "c"(index));
+  return xcr0;
+#endif  // HWY_COMPILER_MSVC
+}
+
+bool IsAMD() {
+  uint32_t abcd[4];
+  Cpuid(0, 0, abcd);
+  const uint32_t max_level = abcd[0];
+  return max_level >= 1 && abcd[1] == 0x68747541 && abcd[2] == 0x444d4163 &&
+         abcd[3] == 0x69746e65;
+}
+
+// Arbitrary bit indices indicating which instruction set extensions are
+// supported. Use enum to ensure values are distinct.
+enum class FeatureIndex : uint32_t {
+  kSSE = 0,
+  kSSE2,
+  kSSE3,
+  kSSSE3,
+
+  kSSE41,
+  kSSE42,
+  kCLMUL,
+  kAES,
+
+  kAVX,
+  kAVX2,
+  kF16C,
+  kFMA,
+  kLZCNT,
+  kBMI,
+  kBMI2,
+
+  kAVX512F,
+  kAVX512VL,
+  kAVX512CD,
+  kAVX512DQ,
+  kAVX512BW,
+  kAVX512FP16,
+
+  kVNNI,
+  kVPCLMULQDQ,
+  kVBMI,
+  kVBMI2,
+  kVAES,
+  kPOPCNTDQ,
+  kBITALG,
+  kGFNI,
+
+  kSentinel
+};
+static_assert(static_cast<size_t>(FeatureIndex::kSentinel) < 64,
+              "Too many bits for u64");
+
+HWY_INLINE constexpr uint64_t Bit(FeatureIndex index) {
+  return 1ull << static_cast<size_t>(index);
+}
+
+// Returns bit array of FeatureIndex from CPUID feature flags.
+uint64_t FlagsFromCPUID() {
+  uint64_t flags = 0;  // return value
+  uint32_t abcd[4];
+  Cpuid(0, 0, abcd);
+  const uint32_t max_level = abcd[0];
+
+  // Standard feature flags
+  Cpuid(1, 0, abcd);
+  flags |= IsBitSet(abcd[3], 25) ? Bit(FeatureIndex::kSSE) : 0;
+  flags |= IsBitSet(abcd[3], 26) ? Bit(FeatureIndex::kSSE2) : 0;
+  flags |= IsBitSet(abcd[2], 0) ? Bit(FeatureIndex::kSSE3) : 0;
+  flags |= IsBitSet(abcd[2], 1) ? Bit(FeatureIndex::kCLMUL) : 0;
+  flags |= IsBitSet(abcd[2], 9) ? Bit(FeatureIndex::kSSSE3) : 0;
+  flags |= IsBitSet(abcd[2], 12) ? Bit(FeatureIndex::kFMA) : 0;
+  flags |= IsBitSet(abcd[2], 19) ? Bit(FeatureIndex::kSSE41) : 0;
+  flags |= IsBitSet(abcd[2], 20) ? Bit(FeatureIndex::kSSE42) : 0;
+  flags |= IsBitSet(abcd[2], 25) ? Bit(FeatureIndex::kAES) : 0;
+  flags |= IsBitSet(abcd[2], 28) ? Bit(FeatureIndex::kAVX) : 0;
+  flags |= IsBitSet(abcd[2], 29) ? Bit(FeatureIndex::kF16C) : 0;
+
+  // Extended feature flags
+  Cpuid(0x80000001U, 0, abcd);
+  flags |= IsBitSet(abcd[2], 5) ? Bit(FeatureIndex::kLZCNT) : 0;
+
+  // Extended features
+  if (max_level >= 7) {
+    Cpuid(7, 0, abcd);
+    flags |= IsBitSet(abcd[1], 3) ? Bit(FeatureIndex::kBMI) : 0;
+    flags |= IsBitSet(abcd[1], 5) ? Bit(FeatureIndex::kAVX2) : 0;
+    flags |= IsBitSet(abcd[1], 8) ? Bit(FeatureIndex::kBMI2) : 0;
+
+    flags |= IsBitSet(abcd[1], 16) ? Bit(FeatureIndex::kAVX512F) : 0;
+    flags |= IsBitSet(abcd[1], 17) ? Bit(FeatureIndex::kAVX512DQ) : 0;
+    flags |= IsBitSet(abcd[1], 28) ? Bit(FeatureIndex::kAVX512CD) : 0;
+    flags |= IsBitSet(abcd[1], 30) ? Bit(FeatureIndex::kAVX512BW) : 0;
+    flags |= IsBitSet(abcd[1], 31) ? Bit(FeatureIndex::kAVX512VL) : 0;
+
+    flags |= IsBitSet(abcd[2], 1) ? Bit(FeatureIndex::kVBMI) : 0;
+    flags |= IsBitSet(abcd[2], 6) ? Bit(FeatureIndex::kVBMI2) : 0;
+    flags |= IsBitSet(abcd[2], 8) ? Bit(FeatureIndex::kGFNI) : 0;
+    flags |= IsBitSet(abcd[2], 9) ? Bit(FeatureIndex::kVAES) : 0;
+    flags |= IsBitSet(abcd[2], 10) ? Bit(FeatureIndex::kVPCLMULQDQ) : 0;
+    flags |= IsBitSet(abcd[2], 11) ? Bit(FeatureIndex::kVNNI) : 0;
+    flags |= IsBitSet(abcd[2], 12) ? Bit(FeatureIndex::kBITALG) : 0;
+    flags |= IsBitSet(abcd[2], 14) ? Bit(FeatureIndex::kPOPCNTDQ) : 0;
+
+    flags |= IsBitSet(abcd[3], 23) ? Bit(FeatureIndex::kAVX512FP16) : 0;
+  }
+
+  return flags;
+}
+
+// Each Highway target requires a 'group' of multiple features/flags.
+constexpr uint64_t kGroupSSE2 =
+    Bit(FeatureIndex::kSSE) | Bit(FeatureIndex::kSSE2);
+
+constexpr uint64_t kGroupSSSE3 =
+    Bit(FeatureIndex::kSSE3) | Bit(FeatureIndex::kSSSE3) | kGroupSSE2;
+
+constexpr uint64_t kGroupSSE4 =
+    Bit(FeatureIndex::kSSE41) | Bit(FeatureIndex::kSSE42) |
+    Bit(FeatureIndex::kCLMUL) | Bit(FeatureIndex::kAES) | kGroupSSSE3;
+
+// We normally assume BMI/BMI2/FMA are available if AVX2 is. This allows us to
+// use BZHI and (compiler-generated) MULX. However, VirtualBox lacks them
+// [https://www.virtualbox.org/ticket/15471]. Thus we provide the option of
+// avoiding using and requiring these so AVX2 can still be used.
+#ifdef HWY_DISABLE_BMI2_FMA
+constexpr uint64_t kGroupBMI2_FMA = 0;
+#else
+constexpr uint64_t kGroupBMI2_FMA = Bit(FeatureIndex::kBMI) |
+                                    Bit(FeatureIndex::kBMI2) |
+                                    Bit(FeatureIndex::kFMA);
+#endif
+
+#ifdef HWY_DISABLE_F16C
+constexpr uint64_t kGroupF16C = 0;
+#else
+constexpr uint64_t kGroupF16C = Bit(FeatureIndex::kF16C);
+#endif
+
+constexpr uint64_t kGroupAVX2 =
+    Bit(FeatureIndex::kAVX) | Bit(FeatureIndex::kAVX2) |
+    Bit(FeatureIndex::kLZCNT) | kGroupBMI2_FMA | kGroupF16C | kGroupSSE4;
+
+constexpr uint64_t kGroupAVX3 =
+    Bit(FeatureIndex::kAVX512F) | Bit(FeatureIndex::kAVX512VL) |
+    Bit(FeatureIndex::kAVX512DQ) | Bit(FeatureIndex::kAVX512BW) |
+    Bit(FeatureIndex::kAVX512CD) | kGroupAVX2;
+
+constexpr uint64_t kGroupAVX3_DL =
+    Bit(FeatureIndex::kVNNI) | Bit(FeatureIndex::kVPCLMULQDQ) |
+    Bit(FeatureIndex::kVBMI) | Bit(FeatureIndex::kVBMI2) |
+    Bit(FeatureIndex::kVAES) | Bit(FeatureIndex::kPOPCNTDQ) |
+    Bit(FeatureIndex::kBITALG) | Bit(FeatureIndex::kGFNI) | kGroupAVX3;
+
+constexpr uint64_t kGroupAVX3_SPR =
+    Bit(FeatureIndex::kAVX512FP16) | kGroupAVX3_DL;
+
+int64_t DetectTargets() {
+  int64_t bits = 0;  // return value of supported targets.
+#if HWY_ARCH_X86_64
+  bits |= HWY_SSE2;  // always present in x64
+#endif
+
+  const uint64_t flags = FlagsFromCPUID();
+  // Set target bit(s) if all their group's flags are all set.
+  if ((flags & kGroupAVX3_SPR) == kGroupAVX3_SPR) {
+    bits |= HWY_AVX3_SPR;
+  }
+  if ((flags & kGroupAVX3_DL) == kGroupAVX3_DL) {
+    bits |= HWY_AVX3_DL;
+  }
+  if ((flags & kGroupAVX3) == kGroupAVX3) {
+    bits |= HWY_AVX3;
+  }
+  if ((flags & kGroupAVX2) == kGroupAVX2) {
+    bits |= HWY_AVX2;
+  }
+  if ((flags & kGroupSSE4) == kGroupSSE4) {
+    bits |= HWY_SSE4;
+  }
+  if ((flags & kGroupSSSE3) == kGroupSSSE3) {
+    bits |= HWY_SSSE3;
+  }
+#if HWY_ARCH_X86_32
+  if ((flags & kGroupSSE2) == kGroupSSE2) {
+    bits |= HWY_SSE2;
+  }
+#endif
+
+  // Clear bits if the OS does not support XSAVE - otherwise, registers
+  // are not preserved across context switches.
+  uint32_t abcd[4];
+  Cpuid(1, 0, abcd);
+  const bool has_osxsave = IsBitSet(abcd[2], 27);
+  if (has_osxsave) {
+    const uint32_t xcr0 = ReadXCR0();
+    const int64_t min_avx3 = HWY_AVX3 | HWY_AVX3_DL | HWY_AVX3_SPR;
+    const int64_t min_avx2 = HWY_AVX2 | min_avx3;
+    // XMM
+    if (!IsBitSet(xcr0, 1)) {
+#if HWY_ARCH_X86_64
+      // The HWY_SSE2, HWY_SSSE3, and HWY_SSE4 bits do not need to be
+      // cleared on x86_64, even if bit 1 of XCR0 is not set, as
+      // the lower 128 bits of XMM0-XMM15 are guaranteed to be
+      // preserved across context switches on x86_64
+
+      // Only clear the AVX2/AVX3 bits on x86_64 if bit 1 of XCR0 is not set
+      bits &= ~min_avx2;
+#else
+      bits &= ~(HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | min_avx2);
+#endif
+    }
+    // YMM
+    if (!IsBitSet(xcr0, 2)) {
+      bits &= ~min_avx2;
+    }
+    // opmask, ZMM lo/hi
+    if (!IsBitSet(xcr0, 5) || !IsBitSet(xcr0, 6) || !IsBitSet(xcr0, 7)) {
+      bits &= ~min_avx3;
+    }
+  }  // has_osxsave
+
+  // This is mainly to work around the slow Zen4 CompressStore. It's unclear
+  // whether subsequent AMD models will be affected; assume yes.
+  if ((bits & HWY_AVX3_DL) && IsAMD()) {
+    bits |= HWY_AVX3_ZEN4;
+  }
+
+  return bits;
+}
+
+}  // namespace x86
+#elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
+namespace arm {
+int64_t DetectTargets() {
+  int64_t bits = 0;               // return value of supported targets.
+  using CapBits = unsigned long;  // NOLINT
+  const CapBits hw = getauxval(AT_HWCAP);
+  (void)hw;
+
+#if HWY_ARCH_ARM_A64
+  bits |= HWY_NEON_WITHOUT_AES;  // aarch64 always has NEON and VFPv4..
+
+  // .. but not necessarily AES, which is required for HWY_NEON.
+#if defined(HWCAP_AES)
+  if (hw & HWCAP_AES) {
+    bits |= HWY_NEON;
+  }
+#endif  // HWCAP_AES
+
+#if defined(HWCAP_SVE)
+  if (hw & HWCAP_SVE) {
+    bits |= HWY_SVE;
+  }
+#endif
+
+#if defined(HWCAP2_SVE2) && defined(HWCAP2_SVEAES)
+  const CapBits hw2 = getauxval(AT_HWCAP2);
+  if ((hw2 & HWCAP2_SVE2) && (hw2 & HWCAP2_SVEAES)) {
+    bits |= HWY_SVE2;
+  }
+#endif
+
+#else  // !HWY_ARCH_ARM_A64
+
+// Some old auxv.h / hwcap.h do not define these. If not, treat as unsupported.
+#if defined(HWCAP_NEON) && defined(HWCAP_VFPv4)
+  if ((hw & HWCAP_NEON) && (hw & HWCAP_VFPv4)) {
+    bits |= HWY_NEON_WITHOUT_AES;
+  }
+#endif
+
+  // aarch32 would check getauxval(AT_HWCAP2) & HWCAP2_AES, but we do not yet
+  // support that platform, and Armv7 lacks AES entirely. Because HWY_NEON
+  // requires native AES instructions, we do not enable that target here.
+
+#endif  // HWY_ARCH_ARM_A64
+  return bits;
+}
+}  // namespace arm
+#elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
+namespace ppc {
+
+#ifndef PPC_FEATURE_HAS_ALTIVEC
+#define PPC_FEATURE_HAS_ALTIVEC 0x10000000
+#endif
+
+#ifndef PPC_FEATURE_HAS_VSX
+#define PPC_FEATURE_HAS_VSX 0x00000080
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_2_07
+#define PPC_FEATURE2_ARCH_2_07 0x80000000
+#endif
+
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
+using CapBits = unsigned long;  // NOLINT
+
+// For AT_HWCAP, the others are for AT_HWCAP2
+constexpr CapBits kGroupVSX = PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX;
+
+#if defined(HWY_DISABLE_PPC8_CRYPTO)
+constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07;
+#else
+constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_VEC_CRYPTO;
+#endif
+constexpr CapBits kGroupPPC9 = kGroupPPC8 | PPC_FEATURE2_ARCH_3_00;
+constexpr CapBits kGroupPPC10 = kGroupPPC9 | PPC_FEATURE2_ARCH_3_1;
+
+int64_t DetectTargets() {
+  int64_t bits = 0;  // return value of supported targets.
+
+#if defined(AT_HWCAP) && defined(AT_HWCAP2)
+  const CapBits hw = getauxval(AT_HWCAP);
+
+  if ((hw & kGroupVSX) == kGroupVSX) {
+    const CapBits hw2 = getauxval(AT_HWCAP2);
+    if ((hw2 & kGroupPPC8) == kGroupPPC8) {
+      bits |= HWY_PPC8;
+    }
+    if ((hw2 & kGroupPPC9) == kGroupPPC9) {
+      bits |= HWY_PPC9;
+    }
+    if ((hw2 & kGroupPPC10) == kGroupPPC10) {
+      bits |= HWY_PPC10;
+    }
+  }  // VSX
+#endif  // defined(AT_HWCAP) && defined(AT_HWCAP2)
+
+  return bits;
+}
+}  // namespace ppc
+#endif  // HWY_ARCH_X86
+
+// Returns targets supported by the CPU, independently of DisableTargets.
+// Factored out of SupportedTargets to make its structure more obvious. Note
+// that x86 CPUID may take several hundred cycles.
+int64_t DetectTargets() {
+  // Apps will use only one of these (the default is EMU128), but compile flags
+  // for this TU may differ from that of the app, so allow both.
+  int64_t bits = HWY_SCALAR | HWY_EMU128;
+
+#if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
+  bits |= x86::DetectTargets();
+#elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
+  bits |= arm::DetectTargets();
+#elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
+  bits |= ppc::DetectTargets();
+
+#else
+  // TODO(janwas): detect support for WASM/RVV.
+  // This file is typically compiled without HWY_IS_TEST, but targets_test has
+  // it set, and will expect all of its HWY_TARGETS (= all attainable) to be
+  // supported.
+  bits |= HWY_ENABLED_BASELINE;
+#endif  // HWY_ARCH_*
+
+  if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) {
+    const uint64_t bits_u = static_cast<uint64_t>(bits);
+    const uint64_t enabled = static_cast<uint64_t>(HWY_ENABLED_BASELINE);
+    fprintf(stderr,
+            "WARNING: CPU supports 0x%08x%08x, software requires 0x%08x%08x\n",
+            static_cast<uint32_t>(bits_u >> 32),
+            static_cast<uint32_t>(bits_u & 0xFFFFFFFF),
+            static_cast<uint32_t>(enabled >> 32),
+            static_cast<uint32_t>(enabled & 0xFFFFFFFF));
+  }
+
+  return bits;
+}
+
+}  // namespace
+
+HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
+    Abort(const char* file, int line, const char* format, ...) {
+  char buf[2000];
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buf, sizeof(buf), format, args);
+  va_end(args);
+
+  fprintf(stderr, "Abort at %s:%d: %s\n", file, line, buf);
+
+// If compiled with any sanitizer, they can also print a stack trace.
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+  __sanitizer_print_stack_trace();
+#endif  // HWY_IS_*
+  fflush(stderr);
+
+// Now terminate the program:
+#if HWY_ARCH_RVV
+  exit(1);  // trap/abort just freeze Spike.
+#elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC
+  // Facilitates breaking into a debugger, but don't use this in non-debug
+  // builds because it looks like "illegal instruction", which is misleading.
+  __builtin_trap();
+#else
+  abort();  // Compile error without this due to HWY_NORETURN.
+#endif
+}
+
+HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) {
+  supported_mask_ = static_cast<int64_t>(~disabled_targets);
+  // This will take effect on the next call to SupportedTargets, which is
+  // called right before GetChosenTarget::Update. However, calling Update here
+  // would make it appear that HWY_DYNAMIC_DISPATCH was called, which we want
+  // to check in tests. We instead de-initialize such that the next
+  // HWY_DYNAMIC_DISPATCH calls GetChosenTarget::Update via FunctionCache.
+  GetChosenTarget().DeInit();
+}
+
+HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) {
+  supported_targets_for_test_ = targets;
+  GetChosenTarget().DeInit();  // see comment above
+}
+
+HWY_DLLEXPORT int64_t SupportedTargets() {
+  int64_t targets = supported_targets_for_test_;
+  if (HWY_LIKELY(targets == 0)) {
+    // Mock not active. Re-detect instead of caching just in case we're on a
+    // heterogeneous ISA (also requires some app support to pin threads). This
+    // is only reached on the first HWY_DYNAMIC_DISPATCH or after each call to
+    // DisableTargets or SetSupportedTargetsForTest.
+    targets = DetectTargets();
+
+    // VectorBytes invokes HWY_DYNAMIC_DISPATCH. To prevent infinite recursion,
+    // first set up ChosenTarget. No need to Update() again afterwards with the
+    // final targets - that will be done by a caller of this function.
+    GetChosenTarget().Update(targets);
+
+    // Now that we can call VectorBytes, check for targets with specific sizes.
+    if (HWY_ARCH_ARM_A64) {
+      const size_t vec_bytes = VectorBytes();  // uncached, see declaration
+      if ((targets & HWY_SVE) && vec_bytes == 32) {
+        targets = static_cast<int64_t>(targets | HWY_SVE_256);
+      } else {
+        targets = static_cast<int64_t>(targets & ~HWY_SVE_256);
+      }
+      if ((targets & HWY_SVE2) && vec_bytes == 16) {
+        targets = static_cast<int64_t>(targets | HWY_SVE2_128);
+      } else {
+        targets = static_cast<int64_t>(targets & ~HWY_SVE2_128);
+      }
+    }  // HWY_ARCH_ARM_A64
+  }
+
+  targets &= supported_mask_;
+  return targets == 0 ? HWY_STATIC_TARGET : targets;
+}
+
+HWY_DLLEXPORT ChosenTarget& GetChosenTarget() {
+  static ChosenTarget chosen_target;
+  return chosen_target;
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/targets.h b/third-party/libjxl/libjxl/third_party/highway/hwy/targets.h
new file mode 100644
index 0000000000..693e2e80c5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/targets.h
@@ -0,0 +1,338 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_TARGETS_H_
+#define HIGHWAY_HWY_TARGETS_H_
+
+// Allows opting out of C++ standard library usage, which is not available in
+// some Compiler Explorer environments.
+#ifndef HWY_NO_LIBCXX
+#include <vector>
+#endif
+
+// For SIMD module implementations and their callers. Defines which targets to
+// generate and call.
+
+#include "hwy/base.h"
+#include "hwy/detect_targets.h"
+#include "hwy/highway_export.h"
+
+#if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX)
+#include <atomic>
+#endif
+
+namespace hwy {
+
+// Returns bitfield of enabled targets that are supported on this CPU; there is
+// always at least one such target, hence the return value is never 0. The
+// targets returned may change after calling DisableTargets. This function is
+// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
+// calls to it if there is only a single target enabled.
+HWY_DLLEXPORT int64_t SupportedTargets();
+
+// Evaluates to a function call, or literal if there is a single target.
+#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
+#define HWY_SUPPORTED_TARGETS HWY_TARGETS
+#else
+#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
+#endif
+
+// Subsequent SupportedTargets will not return targets whose bit(s) are set in
+// `disabled_targets`. Exception: if SupportedTargets would return 0, it will
+// instead return HWY_STATIC_TARGET (there must always be one target to call).
+//
+// This function is useful for disabling targets known to be buggy, or if the
+// best available target is undesirable (perhaps due to throttling or memory
+// bandwidth limitations). Use SetSupportedTargetsForTest instead of this
+// function for iteratively enabling specific targets for testing.
+HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
+
+// Subsequent SupportedTargets will return the given set of targets, except
+// those disabled via DisableTargets. Call with a mask of 0 to disable the mock
+// and return to the normal SupportedTargets behavior. Used to run tests for
+// all targets.
+HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
+
+#ifndef HWY_NO_LIBCXX
+
+// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
+// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
+// is affected by the current SetSupportedTargetsForTest() mock if any.
+HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
+  std::vector<int64_t> ret;
+  for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
+       targets = targets & (targets - 1)) {
+    int64_t current_target = targets & ~(targets - 1);
+    ret.push_back(current_target);
+  }
+  return ret;
+}
+
+#endif  // HWY_NO_LIBCXX
+
+static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
+  switch (target) {
+#if HWY_ARCH_X86
+    case HWY_SSE2:
+      return "SSE2";
+    case HWY_SSSE3:
+      return "SSSE3";
+    case HWY_SSE4:
+      return "SSE4";
+    case HWY_AVX2:
+      return "AVX2";
+    case HWY_AVX3:
+      return "AVX3";
+    case HWY_AVX3_DL:
+      return "AVX3_DL";
+    case HWY_AVX3_ZEN4:
+      return "AVX3_ZEN4";
+    case HWY_AVX3_SPR:
+      return "AVX3_SPR";
+#endif
+
+#if HWY_ARCH_ARM
+    case HWY_SVE2_128:
+      return "SVE2_128";
+    case HWY_SVE_256:
+      return "SVE_256";
+    case HWY_SVE2:
+      return "SVE2";
+    case HWY_SVE:
+      return "SVE";
+    case HWY_NEON:
+      return "NEON";
+    case HWY_NEON_WITHOUT_AES:
+      return "NEON_WITHOUT_AES";
+#endif
+
+#if HWY_ARCH_PPC
+    case HWY_PPC8:
+      return "PPC8";
+    case HWY_PPC9:
+      return "PPC9";
+    case HWY_PPC10:
+      return "PPC10";
+#endif
+
+#if HWY_ARCH_WASM
+    case HWY_WASM:
+      return "WASM";
+    case HWY_WASM_EMU256:
+      return "WASM_EMU256";
+#endif
+
+#if HWY_ARCH_RVV
+    case HWY_RVV:
+      return "RVV";
+#endif
+
+    case HWY_EMU128:
+      return "EMU128";
+    case HWY_SCALAR:
+      return "SCALAR";
+
+    default:
+      return "Unknown";  // must satisfy gtest IsValidParamName()
+  }
+}
+
+// The maximum number of dynamic targets on any architecture is defined by
+// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
+
+// For the ChosenTarget mask and index we use a different bit arrangement than
+// in the HWY_TARGETS mask. Only the targets involved in the current
+// architecture are used in this mask, and therefore only the least significant
+// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
+// significant bit is set when the mask is not initialized, the next
+// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
+// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
+// that position and the next more significant bit is used for HWY_SCALAR (if
+// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
+// define equivalent values for HWY_TARGETS in this representation.
+// This mask representation allows to use ctz() on this mask and obtain a small
+// number that's used as an index of the table for dynamic dispatch. In this
+// way the first entry is used when the mask is uninitialized, the following
+// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
+// scalar.
+
+// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
+#define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
+
+// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
+// current architecture.
+#define HWY_CHOSEN_TARGET_SHIFT(X)                                    \
+  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
+    ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1))                           \
+   << 1)
+
+// The HWY_TARGETS mask in the ChosenTarget mask format.
+#define HWY_CHOSEN_TARGET_MASK_TARGETS \
+  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
+
+#if HWY_ARCH_X86
+// Maximum number of dynamic targets, changing this value is an ABI incompatible
+// change
+#define HWY_MAX_DYNAMIC_TARGETS 15
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
+// These must match the order in which the HWY_TARGETS are defined
+// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
+// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
+// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
+// corresponds to the best target. Don't include a "," at the end of the list.
+#define HWY_CHOOSE_TARGET_LIST(func_name)                     \
+  nullptr,                             /* reserved */         \
+      nullptr,                         /* reserved */         \
+      nullptr,                         /* reserved */         \
+      nullptr,                         /* reserved */         \
+      HWY_CHOOSE_AVX3_SPR(func_name),  /* AVX3_SPR */         \
+      nullptr,                         /* reserved */         \
+      HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */        \
+      HWY_CHOOSE_AVX3_DL(func_name),   /* AVX3_DL */          \
+      HWY_CHOOSE_AVX3(func_name),      /* AVX3 */             \
+      HWY_CHOOSE_AVX2(func_name),      /* AVX2 */             \
+      nullptr,                         /* AVX */              \
+      HWY_CHOOSE_SSE4(func_name),      /* SSE4 */             \
+      HWY_CHOOSE_SSSE3(func_name),     /* SSSE3 */            \
+      nullptr,                         /* reserved - SSE3? */ \
+      HWY_CHOOSE_SSE2(func_name)       /* SSE2 */
+
+#elif HWY_ARCH_ARM
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 15
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
+#define HWY_CHOOSE_TARGET_LIST(func_name)                       \
+  nullptr,                                   /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      nullptr,                               /* reserved */     \
+      HWY_CHOOSE_SVE2_128(func_name),        /* SVE2 128-bit */ \
+      HWY_CHOOSE_SVE_256(func_name),         /* SVE 256-bit */  \
+      HWY_CHOOSE_SVE2(func_name),            /* SVE2 */         \
+      HWY_CHOOSE_SVE(func_name),             /* SVE */          \
+      HWY_CHOOSE_NEON(func_name),            /* NEON */         \
+      HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
+
+#elif HWY_ARCH_RVV
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 9
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
+#define HWY_CHOOSE_TARGET_LIST(func_name)       \
+  nullptr,                       /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      HWY_CHOOSE_RVV(func_name), /* RVV */      \
+      nullptr                    /* reserved */
+
+#elif HWY_ARCH_PPC
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 9
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
+#define HWY_CHOOSE_TARGET_LIST(func_name)                          \
+  nullptr,                         /* reserved */                  \
+      nullptr,                     /* reserved */                  \
+      nullptr,                     /* reserved */                  \
+      nullptr,                     /* reserved */                  \
+      HWY_CHOOSE_PPC10(func_name), /* PPC10 */                     \
+      HWY_CHOOSE_PPC9(func_name),  /* PPC9 */                      \
+      HWY_CHOOSE_PPC8(func_name),  /* PPC8 */                      \
+      nullptr,                     /* reserved (VSX or AltiVec) */ \
+      nullptr                      /* reserved (VSX or AltiVec) */
+
+#elif HWY_ARCH_WASM
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 9
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
+#define HWY_CHOOSE_TARGET_LIST(func_name)                  \
+  nullptr,                               /* reserved */    \
+      nullptr,                           /* reserved */    \
+      nullptr,                           /* reserved */    \
+      nullptr,                           /* reserved */    \
+      nullptr,                           /* reserved */    \
+      nullptr,                           /* reserved */    \
+      HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
+      HWY_CHOOSE_WASM(func_name),        /* WASM */        \
+      nullptr                            /* reserved */
+
+#else
+// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
+// still creating single-entry tables in HWY_EXPORT to ensure portability.
+#define HWY_MAX_DYNAMIC_TARGETS 1
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
+#endif
+
+// Bitfield of supported and enabled targets. The format differs from that of
+// HWY_TARGETS; the lowest bit governs the first function pointer (which is
+// special in that it calls FunctionCache, then Update, then dispatches to the
+// actual implementation) in the tables created by HWY_EXPORT. Monostate (see
+// GetChosenTarget), thread-safe except on RVV.
+struct ChosenTarget {
+ public:
+  // Reset bits according to `targets` (typically the return value of
+  // SupportedTargets()). Postcondition: IsInitialized() == true.
+  void Update(int64_t targets) {
+    // These are `targets` shifted downwards, see above. Also include SCALAR
+    // (corresponds to the last entry in the function table) as fallback.
+    StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
+  }
+
+  // Reset to the uninitialized state, so that FunctionCache will call Update
+  // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
+  void DeInit() { StoreMask(1); }
+
+  // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
+  // function was called, which we check in tests.
+  bool IsInitialized() const { return LoadMask() != 1; }
+
+  // Return the index in the dynamic dispatch table to be used by the current
+  // CPU. Note that this method must be in the header file so it uses the value
+  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
+  // calls it, which may be different from others. This means we only enable
+  // those targets that were actually compiled in this module.
+  size_t HWY_INLINE GetIndex() const {
+    return hwy::Num0BitsBelowLS1Bit_Nonzero64(
+        static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
+  }
+
+ private:
+  // TODO(janwas): remove RVV once <atomic> is available
+#if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX)
+  int64_t LoadMask() const { return mask_; }
+  void StoreMask(int64_t mask) { mask_ = mask; }
+
+  int64_t mask_{1};  // Initialized to 1 so GetIndex() returns 0.
+#else
+  int64_t LoadMask() const { return mask_.load(); }
+  void StoreMask(int64_t mask) { mask_.store(mask); }
+
+  std::atomic<int64_t> mask_{1};  // Initialized to 1 so GetIndex() returns 0.
+#endif  // HWY_ARCH_RVV
+};
+
+// For internal use (e.g. by FunctionCache and DisableTargets).
+HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_TARGETS_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/targets_test.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/targets_test.cc
new file mode 100644
index 0000000000..d7fdeaf858
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/targets_test.cc
@@ -0,0 +1,159 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/targets.h"
+
+#include "hwy/detect_targets.h"
+#include "hwy/tests/test_util-inl.h"
+
+namespace fake {
+
+#define DECLARE_FUNCTION(TGT)                                                \
+  namespace N_##TGT {                                                        \
+    /* Function argument is just to ensure/demonstrate they are possible. */ \
+    int64_t FakeFunction(int) { return HWY_##TGT; }                          \
+  }
+
+DECLARE_FUNCTION(AVX3_SPR)
+DECLARE_FUNCTION(AVX3_ZEN4)
+DECLARE_FUNCTION(AVX3_DL)
+DECLARE_FUNCTION(AVX3)
+DECLARE_FUNCTION(AVX2)
+DECLARE_FUNCTION(SSE4)
+DECLARE_FUNCTION(SSSE3)
+DECLARE_FUNCTION(SSE2)
+
+DECLARE_FUNCTION(SVE2_128)
+DECLARE_FUNCTION(SVE_256)
+DECLARE_FUNCTION(SVE2)
+DECLARE_FUNCTION(SVE)
+DECLARE_FUNCTION(NEON)
+DECLARE_FUNCTION(NEON_WITHOUT_AES)
+
+DECLARE_FUNCTION(PPC10)
+DECLARE_FUNCTION(PPC9)
+DECLARE_FUNCTION(PPC8)
+
+DECLARE_FUNCTION(WASM)
+DECLARE_FUNCTION(WASM_EMU256)
+
+DECLARE_FUNCTION(RVV)
+
+DECLARE_FUNCTION(SCALAR)
+DECLARE_FUNCTION(EMU128)
+
+HWY_EXPORT(FakeFunction);
+
+void CallFunctionForTarget(int64_t target, int line) {
+  if ((HWY_TARGETS & target) == 0) return;
+  hwy::SetSupportedTargetsForTest(target);
+
+  // Call Update() first to make &HWY_DYNAMIC_DISPATCH() return
+  // the pointer to the already cached function.
+  hwy::GetChosenTarget().Update(hwy::SupportedTargets());
+
+  EXPECT_EQ(target, HWY_DYNAMIC_DISPATCH(FakeFunction)(42)) << line;
+
+  // Calling DeInit() will test that the initializer function
+  // also calls the right function.
+  hwy::GetChosenTarget().DeInit();
+
+#if HWY_DISPATCH_WORKAROUND
+  EXPECT_EQ(HWY_STATIC_TARGET, HWY_DYNAMIC_DISPATCH(FakeFunction)(42)) << line;
+#else
+  EXPECT_EQ(target, HWY_DYNAMIC_DISPATCH(FakeFunction)(42)) << line;
+#endif
+
+  // Second call uses the cached value from the previous call.
+  EXPECT_EQ(target, HWY_DYNAMIC_DISPATCH(FakeFunction)(42)) << line;
+}
+
+void CheckFakeFunction() {
+  // When adding a target, also add to DECLARE_FUNCTION above.
+  CallFunctionForTarget(HWY_AVX3_SPR, __LINE__);
+  CallFunctionForTarget(HWY_AVX3_ZEN4, __LINE__);
+  CallFunctionForTarget(HWY_AVX3_DL, __LINE__);
+  CallFunctionForTarget(HWY_AVX3, __LINE__);
+  CallFunctionForTarget(HWY_AVX2, __LINE__);
+  CallFunctionForTarget(HWY_SSE4, __LINE__);
+  CallFunctionForTarget(HWY_SSSE3, __LINE__);
+  CallFunctionForTarget(HWY_SSE2, __LINE__);
+
+  CallFunctionForTarget(HWY_SVE2_128, __LINE__);
+  CallFunctionForTarget(HWY_SVE_256, __LINE__);
+  CallFunctionForTarget(HWY_SVE2, __LINE__);
+  CallFunctionForTarget(HWY_SVE, __LINE__);
+  CallFunctionForTarget(HWY_NEON, __LINE__);
+  CallFunctionForTarget(HWY_NEON_WITHOUT_AES, __LINE__);
+
+  CallFunctionForTarget(HWY_PPC10, __LINE__);
+  CallFunctionForTarget(HWY_PPC9, __LINE__);
+  CallFunctionForTarget(HWY_PPC8, __LINE__);
+
+  CallFunctionForTarget(HWY_WASM, __LINE__);
+  CallFunctionForTarget(HWY_WASM_EMU256, __LINE__);
+
+  CallFunctionForTarget(HWY_RVV, __LINE__);
+  // The tables only have space for either HWY_SCALAR or HWY_EMU128; the former
+  // is opt-in only.
+#if defined(HWY_COMPILE_ONLY_SCALAR) || HWY_BROKEN_EMU128
+  CallFunctionForTarget(HWY_SCALAR, __LINE__);
+#else
+  CallFunctionForTarget(HWY_EMU128, __LINE__);
+#endif
+}
+
+}  // namespace fake
+
+namespace hwy {
+
+class HwyTargetsTest : public testing::Test {
+ protected:
+  void TearDown() override {
+    SetSupportedTargetsForTest(0);
+    DisableTargets(0);  // Reset the mask.
+  }
+};
+
+// Test that the order in the HWY_EXPORT static array matches the expected
+// value of the target bits. This is only checked for the targets that are
+// enabled in the current compilation.
+TEST_F(HwyTargetsTest, ChosenTargetOrderTest) { fake::CheckFakeFunction(); }
+
+TEST_F(HwyTargetsTest, DisabledTargetsTest) {
+  DisableTargets(~0LL);
+  // Check that disabling everything at least leaves the static target.
+  HWY_ASSERT(HWY_STATIC_TARGET == SupportedTargets());
+
+  DisableTargets(0);  // Reset the mask.
+  const int64_t current_targets = SupportedTargets();
+  const int64_t enabled_baseline = static_cast<int64_t>(HWY_ENABLED_BASELINE);
+  // Exclude these two because they are always returned by SupportedTargets.
+  const int64_t fallback = HWY_SCALAR | HWY_EMU128;
+  if ((current_targets & ~enabled_baseline & ~fallback) == 0) {
+    // We can't test anything else if the only compiled target is the baseline.
+    return;
+  }
+
+  // Get the lowest bit in the mask (the best target) and disable that one.
+  const int64_t best_target = current_targets & (~current_targets + 1);
+  DisableTargets(best_target);
+
+  // Check that the other targets are still enabled.
+  HWY_ASSERT((best_target ^ current_targets) == SupportedTargets());
+  DisableTargets(0);  // Reset the mask.
+}
+
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/timer-inl.h b/third-party/libjxl/libjxl/third_party/highway/hwy/timer-inl.h
new file mode 100644
index 0000000000..c286b0a8fe
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/timer-inl.h
@@ -0,0 +1,200 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// High-resolution and high-precision timer
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_TIMER_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_TIMER_INL_H_
+#undef HIGHWAY_HWY_TIMER_INL_H_
+#else
+#define HIGHWAY_HWY_TIMER_INL_H_
+#endif
+
+#include "hwy/highway.h"
+#include "hwy/timer.h"
+
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+#endif
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif
+
+#if defined(__HAIKU__)
+#include <OS.h>
+#endif
+
+#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
+#include <sys/platform/ppc.h>  // NOLINT __ppc_get_timebase_freq
+#endif
+
+#if HWY_ARCH_X86 && HWY_COMPILER_MSVC
+#include <intrin.h>
+#endif
+
+#include <time.h>  // clock_gettime
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+namespace timer {
+
+// Ticks := platform-specific timer values (CPU cycles on x86). Must be
+// unsigned to guarantee wraparound on overflow.
+using Ticks = uint64_t;
+
+// Start/Stop return absolute timestamps and must be placed immediately before
+// and after the region to measure. We provide separate Start/Stop functions
+// because they use different fences.
+//
+// Background: RDTSC is not 'serializing'; earlier instructions may complete
+// after it, and/or later instructions may complete before it. 'Fences' ensure
+// regions' elapsed times are independent of such reordering. The only
+// documented unprivileged serializing instruction is CPUID, which acts as a
+// full fence (no reordering across it in either direction). Unfortunately
+// the latency of CPUID varies wildly (perhaps made worse by not initializing
+// its EAX input). Because it cannot reliably be deducted from the region's
+// elapsed time, it must not be included in the region to measure (i.e.
+// between the two RDTSC).
+//
+// The newer RDTSCP is sometimes described as serializing, but it actually
+// only serves as a half-fence with release semantics. Although all
+// instructions in the region will complete before the final timestamp is
+// captured, subsequent instructions may leak into the region and increase the
+// elapsed time. Inserting another fence after the final RDTSCP would prevent
+// such reordering without affecting the measured region.
+//
+// Fortunately, such a fence exists. The LFENCE instruction is only documented
+// to delay later loads until earlier loads are visible. However, Intel's
+// reference manual says it acts as a full fence (waiting until all earlier
+// instructions have completed, and delaying later instructions until it
+// completes). AMD assigns the same behavior to MFENCE.
+//
+// We need a fence before the initial RDTSC to prevent earlier instructions
+// from leaking into the region, and arguably another after RDTSC to avoid
+// region instructions from completing before the timestamp is recorded.
+// When surrounded by fences, the additional RDTSCP half-fence provides no
+// benefit, so the initial timestamp can be recorded via RDTSC, which has
+// lower overhead than RDTSCP because it does not read TSC_AUX. In summary,
+// we define Start = LFENCE/RDTSC/LFENCE; Stop = RDTSCP/LFENCE.
+//
+// Using Start+Start leads to higher variance and overhead than Stop+Stop.
+// However, Stop+Stop includes an LFENCE in the region measurements, which
+// adds a delay dependent on earlier loads. The combination of Start+Stop
+// is faster than Start+Start and more consistent than Stop+Stop because
+// the first LFENCE already delayed subsequent loads before the measured
+// region. This combination seems not to have been considered in prior work:
+// http://akaros.cs.berkeley.edu/lxr/akaros/kern/arch/x86/rdtsc_test.c
+//
+// Note: performance counters can measure 'exact' instructions-retired or
+// (unhalted) cycle counts. The RDPMC instruction is not serializing and also
+// requires fences. Unfortunately, it is not accessible on all OSes and we
+// prefer to avoid kernel-mode drivers. Performance counters are also affected
+// by several under/over-count errata, so we use the TSC instead.
+
+// Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds,
+// divide by InvariantTicksPerSecond.
+inline Ticks Start() {
+  Ticks t;
+#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC
+  // pmccntr_el0 is privileged but cntvct_el0 is accessible in Linux and QEMU.
+  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+  t = __rdtsc();
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+#elif HWY_ARCH_X86_64
+  asm volatile(
+      "lfence\n\t"
+      "rdtsc\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rdx", "memory", "cc");
+#elif HWY_ARCH_RVV
+  asm volatile("rdtime %0" : "=r"(t));
+#elif defined(_WIN32) || defined(_WIN64)
+  LARGE_INTEGER counter;
+  (void)QueryPerformanceCounter(&counter);
+  t = counter.QuadPart;
+#elif defined(__APPLE__)
+  t = mach_absolute_time();
+#elif defined(__HAIKU__)
+  t = system_time_nsecs();  // since boot
+#else  // POSIX
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  t = static_cast<Ticks>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
+#endif
+  return t;
+}
+
+// WARNING: on x86, caller must check HasRDTSCP before using this!
+inline Ticks Stop() {
+  uint64_t t;
+#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC
+  // pmccntr_el0 is privileged but cntvct_el0 is accessible in Linux and QEMU.
+  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+  _ReadWriteBarrier();
+  unsigned aux;
+  t = __rdtscp(&aux);
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+#elif HWY_ARCH_X86_64
+  // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx).
+  asm volatile(
+      "rdtscp\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rcx", "rdx", "memory", "cc");
+#else
+  t = Start();
+#endif
+  return t;
+}
+
+}  // namespace timer
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // per-target include guard
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/timer.cc b/third-party/libjxl/libjxl/third_party/highway/hwy/timer.cc
new file mode 100644
index 0000000000..28b5892e9e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/timer.cc
@@ -0,0 +1,186 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/timer.h"
+
+#include <stdlib.h>
+
+#include <chrono>  //NOLINT
+
+#include "hwy/robust_statistics.h"
+#include "hwy/timer-inl.h"
+
+#if HWY_ARCH_X86 && !HWY_COMPILER_MSVC
+#include <cpuid.h>  // NOLINT
+#endif
+
+namespace hwy {
+namespace timer = hwy::HWY_NAMESPACE::timer;
+
+namespace platform {
+namespace {
+
+// Measures the actual current frequency of Ticks. We cannot rely on the nominal
+// frequency encoded in x86 BrandString because it is misleading on M1 Rosetta,
+// and not reported by AMD. CPUID 0x15 is also not yet widely supported. Also
+// used on RISC-V and aarch64.
+HWY_MAYBE_UNUSED double MeasureNominalClockRate() {
+  double max_ticks_per_sec = 0.0;
+  // Arbitrary, enough to ignore 2 outliers without excessive init time.
+  for (int rep = 0; rep < 3; ++rep) {
+    auto time0 = std::chrono::steady_clock::now();
+    using Time = decltype(time0);
+    const timer::Ticks ticks0 = timer::Start();
+    const Time time_min = time0 + std::chrono::milliseconds(10);
+
+    Time time1;
+    timer::Ticks ticks1;
+    for (;;) {
+      time1 = std::chrono::steady_clock::now();
+      // Ideally this would be Stop, but that requires RDTSCP on x86. To avoid
+      // another codepath, just use Start instead. now() presumably has its own
+      // fence-like behavior.
+      ticks1 = timer::Start();  // Do not use Stop, see comment above
+      if (time1 >= time_min) break;
+    }
+
+    const double dticks = static_cast<double>(ticks1 - ticks0);
+    std::chrono::duration<double, std::ratio<1>> dtime = time1 - time0;
+    const double ticks_per_sec = dticks / dtime.count();
+    max_ticks_per_sec = std::max(max_ticks_per_sec, ticks_per_sec);
+  }
+  return max_ticks_per_sec;
+}
+
+#if HWY_ARCH_X86
+
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* HWY_RESTRICT abcd) {
+#if HWY_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif
+}
+
+bool HasRDTSCP() {
+  uint32_t abcd[4];
+  Cpuid(0x80000001U, 0, abcd);         // Extended feature flags
+  return (abcd[3] & (1u << 27)) != 0;  // RDTSCP
+}
+
+void GetBrandString(char* cpu100) {
+  uint32_t abcd[4];
+
+  // Check if brand string is supported (it is on all reasonable Intel/AMD)
+  Cpuid(0x80000000U, 0, abcd);
+  if (abcd[0] < 0x80000004U) {
+    cpu100[0] = 0;
+    return;
+  }
+
+  for (size_t i = 0; i < 3; ++i) {
+    Cpuid(static_cast<uint32_t>(0x80000002U + i), 0, abcd);
+    CopyBytes<sizeof(abcd)>(&abcd[0], cpu100 + i * 16);  // not same size
+  }
+  cpu100[48] = 0;
+}
+
+#endif  // HWY_ARCH_X86
+
+}  // namespace
+
+HWY_DLLEXPORT double Now() {
+  static const double mul = 1.0 / InvariantTicksPerSecond();
+  return static_cast<double>(timer::Start()) * mul;
+}
+
+HWY_DLLEXPORT bool HaveTimerStop(char* cpu100) {
+#if HWY_ARCH_X86
+  if (!HasRDTSCP()) {
+    GetBrandString(cpu100);
+    return false;
+  }
+#endif
+  (void)cpu100;
+  return true;
+}
+
+HWY_DLLEXPORT double InvariantTicksPerSecond() {
+#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
+  return static_cast<double>(__ppc_get_timebase_freq());
+#elif HWY_ARCH_X86 || HWY_ARCH_RVV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC)
+  // We assume the x86 TSC is invariant; it is on all recent Intel/AMD CPUs.
+  static const double freq = MeasureNominalClockRate();
+  return freq;
+#elif defined(_WIN32) || defined(_WIN64)
+  LARGE_INTEGER freq;
+  (void)QueryPerformanceFrequency(&freq);
+  return static_cast<double>(freq.QuadPart);
+#elif defined(__APPLE__)
+  // https://developer.apple.com/library/mac/qa/qa1398/_index.html
+  mach_timebase_info_data_t timebase;
+  (void)mach_timebase_info(&timebase);
+  return static_cast<double>(timebase.denom) / timebase.numer * 1E9;
+#else
+  return 1E9;  // Haiku and clock_gettime return nanoseconds.
+#endif
+}
+
+HWY_DLLEXPORT uint64_t TimerResolution() {
+  char cpu100[100];
+  bool can_use_stop = HaveTimerStop(cpu100);
+
+  // For measuring timer overhead/resolution. Used in a nested loop =>
+  // quadratic time, acceptable because we know timer overhead is "low".
+  // constexpr because this is used to define array bounds.
+  constexpr size_t kTimerSamples = 256;
+
+  // Nested loop avoids exceeding stack/L1 capacity.
+  timer::Ticks repetitions[kTimerSamples];
+  for (size_t rep = 0; rep < kTimerSamples; ++rep) {
+    timer::Ticks samples[kTimerSamples];
+    if (can_use_stop) {
+      for (size_t i = 0; i < kTimerSamples; ++i) {
+        const timer::Ticks t0 = timer::Start();
+        const timer::Ticks t1 = timer::Stop();  // we checked HasRDTSCP above
+        samples[i] = t1 - t0;
+      }
+    } else {
+      for (size_t i = 0; i < kTimerSamples; ++i) {
+        const timer::Ticks t0 = timer::Start();
+        const timer::Ticks t1 = timer::Start();  // do not use Stop, see above
+        samples[i] = t1 - t0;
+      }
+    }
+    repetitions[rep] = robust_statistics::Mode(samples);
+  }
+  return robust_statistics::Mode(repetitions);
+}
+
+}  // namespace platform
+}  // namespace hwy
diff --git a/third-party/libjxl/libjxl/third_party/highway/hwy/timer.h b/third-party/libjxl/libjxl/third_party/highway/hwy/timer.h
new file mode 100644
index 0000000000..0ca46e24e0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/hwy/timer.h
@@ -0,0 +1,55 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_TIMER_H_
+#define HIGHWAY_HWY_TIMER_H_
+
+// Platform-specific timer functions. Provides Now() and functions for
+// interpreting and converting the timer-inl.h Ticks.
+
+#include <stdint.h>
+
+#include "hwy/highway_export.h"
+
+namespace hwy {
+namespace platform {
+
+// Returns current timestamp [in seconds] relative to an unspecified origin.
+// Features: monotonic (no negative elapsed time), steady (unaffected by system
+// time changes), high-resolution (on the order of microseconds).
+// Uses InvariantTicksPerSecond and the baseline version of timer::Start().
+HWY_DLLEXPORT double Now();
+
+// Functions for use with timer-inl.h:
+
+// Returns whether it is safe to call timer::Stop without executing an illegal
+// instruction; if false, fills cpu100 (a pointer to a 100 character buffer)
+// with the CPU brand string or an empty string if unknown.
+HWY_DLLEXPORT bool HaveTimerStop(char* cpu100);
+
+// Returns tick rate, useful for converting timer::Ticks to seconds. Invariant
+// means the tick counter frequency is independent of CPU throttling or sleep.
+// This call may be expensive, callers should cache the result.
+HWY_DLLEXPORT double InvariantTicksPerSecond();
+
+// Returns ticks elapsed in back to back timer calls, i.e. a function of the
+// timer resolution (minimum measurable difference) and overhead.
+// This call is expensive, callers should cache the result.
+HWY_DLLEXPORT uint64_t TimerResolution();
+
+}  // namespace platform
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_TIMER_H_
diff --git a/third-party/libjxl/libjxl/third_party/highway/libhwy-contrib.pc.in b/third-party/libjxl/libjxl/third_party/highway/libhwy-contrib.pc.in
new file mode 100644
index 0000000000..89c45f5e42
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/libhwy-contrib.pc.in
@@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: libhwy-contrib
+Description: Additions to Highway: dot product, image, math, sort
+Version: @HWY_LIBRARY_VERSION@
+Libs: -L${libdir} -lhwy_contrib
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/highway/libhwy-test.pc.in b/third-party/libjxl/libjxl/third_party/highway/libhwy-test.pc.in
new file mode 100644
index 0000000000..0416b10df3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/libhwy-test.pc.in
@@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: libhwy-test
+Description: Efficient and performance-portable SIMD wrapper, test helpers.
+Requires: gtest
+Version: @HWY_LIBRARY_VERSION@
+Libs: -L${libdir} -lhwy_test
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/highway/libhwy.pc.in b/third-party/libjxl/libjxl/third_party/highway/libhwy.pc.in
new file mode 100644
index 0000000000..643989275d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/libhwy.pc.in
@@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: libhwy
+Description: Efficient and performance-portable SIMD wrapper
+Version: @HWY_LIBRARY_VERSION@
+Libs: -L${libdir} -lhwy
+Cflags: -I${includedir} -D@DLLEXPORT_TO_DEFINE@
diff --git a/third-party/libjxl/libjxl/third_party/highway/preamble.js.lds b/third-party/libjxl/libjxl/third_party/highway/preamble.js.lds
new file mode 100644
index 0000000000..f484a19d2c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/preamble.js.lds
@@ -0,0 +1,9 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/* mock crypto module for benchmarks and unit tests or std::random_device fails at runtime */
+var crypto = { getRandomValues: function(array) { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } };
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/highway/run_tests.bat b/third-party/libjxl/libjxl/third_party/highway/run_tests.bat
new file mode 100644
index 0000000000..26600a2c4f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/run_tests.bat
@@ -0,0 +1,20 @@
+@echo off
+REM Switch directory of this batch file
+cd %~dp0
+
+if not exist build_win mkdir build_win
+
+cd build_win
+cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -G Ninja || goto error
+ninja || goto error
+ctest -j || goto error
+
+cd ..
+echo Success
+goto end
+
+:error
+echo Failure
+exit /b 1
+
+:end
diff --git a/third-party/libjxl/libjxl/third_party/highway/run_tests.sh b/third-party/libjxl/libjxl/third_party/highway/run_tests.sh
new file mode 100755
index 0000000000..54bae91a7e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/highway/run_tests.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+# Switch to directory of this script
+MYDIR=$(dirname $(realpath "$0"))
+cd "${MYDIR}"
+
+# Exit if anything fails
+set -e
+
+#######################################
+echo RELEASE
+rm -rf build && mkdir build && cd build
+cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+make -j && ctest -j && cd .. && rm -rf build
+
+#######################################
+echo DEBUG Clang 9
+rm -rf build_dbg && mkdir build_dbg && cd build_dbg
+CXX=clang++-9 CC=clang-9 cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_BUILD_TYPE=Debug
+make -j && ctest -j && cd .. && rm -rf build_dbg
+
+#######################################
+echo 32-bit GCC
+rm -rf build_32 && mkdir build_32 && cd build_32
+CFLAGS=-m32 CXXFLAGS=-m32 LDFLAGS=-m32 CXX=g++ CC=gcc cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DHWY_CMAKE_SSE2:BOOL=ON  -DCMAKE_BUILD_TYPE=Release
+make -j && ctest -j && cd .. && rm -rf build_32
+
+#######################################
+for VER in 10 11 12; do
+  echo GCC $VER
+  rm -rf build_g$VER && mkdir build_g$VER && cd build_g$VER
+  CC=gcc-$VER CXX=g++-$VER cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+  make -j && make test && cd .. && rm -rf build_g$VER
+done
+
+#######################################
+echo Armv7 GCC
+export QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf
+rm -rf build_arm7 && mkdir build_arm7 && cd build_arm7
+CC=arm-linux-gnueabihf-gcc-11 CXX=arm-linux-gnueabihf-g++-11 cmake .. -DHWY_CMAKE_ARM7:BOOL=ON -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+make -j && ctest -j && cd .. && rm -rf build_arm7
+
+#######################################
+echo Armv8 GCC
+export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
+rm -rf build_arm8 && mkdir build_arm8 && cd build_arm8
+CC=aarch64-linux-gnu-gcc-11 CXX=aarch64-linux-gnu-g++-11 cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+make -j && ctest -j && cd .. && rm -rf build_arm8
+
+#######################################
+echo POWER8 GCC
+export QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu
+rm -rf build_ppc8 && mkdir build_ppc8 && cd build_ppc8
+CC=powerpc64le-linux-gnu-gcc-12 CXX=powerpc64le-linux-gnu-g++-12 cmake .. -DCMAKE_BUILD_TYPE=Release -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-ppc64le-static -DCMAKE_C_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CXX_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CROSSCOMPILING=true -DCMAKE_CXX_FLAGS='-mcpu=power9 -mno-power9-vector -mpower8-vector'
+clear && make -j && ctest -j && cd .. && rm -rf build_ppc8
+
+#######################################
+echo POWER9 clang
+export QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu
+rm -rf build_ppc9 && mkdir build_ppc9 && cd build_ppc9
+CC=clang-15 CXX=clang++-15 cmake .. -DCMAKE_BUILD_TYPE=Release -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_C_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CXX_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CROSSCOMPILING=true -DCMAKE_CXX_FLAGS='-mcpu=power9'
+clear && make -j && ctest -j && cd .. && rm -rf build_ppc9
+
+#######################################
+echo POWER9 big endian GCC
+export QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu
+rm -rf build_ppc9be && mkdir build_ppc9be && cd build_ppc9be
+CC=powerpc64-linux-gnu-gcc-11 CXX=powerpc64-linux-gnu-g++-11 cmake .. -DCMAKE_BUILD_TYPE=Release -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-ppc64 -DCMAKE_C_COMPILER_TARGET="powerpc64-linux-musl" -DCMAKE_CXX_COMPILER_TARGET="powerpc64-linux-musl" -DCMAKE_CROSSCOMPILING=true  -DCMAKE_CXX_FLAGS='-mcpu=power9'
+clear && make -j && ctest -j && cd .. && rm -rf build_ppc9be
+
+#######################################
+echo POWER10 requires QEMU 7_2 and gcc because clang 15 crashes
+export QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu
+rm -rf build_ppc10 && mkdir build_ppc10 && cd build_ppc10
+CC=powerpc64le-linux-gnu-gcc-12 CXX=powerpc64le-linux-gnu-g++-12 cmake .. -DCMAKE_BUILD_TYPE=Release -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -DCMAKE_C_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CXX_COMPILER_TARGET="powerpc64le-linux-gnu" -DCMAKE_CROSSCOMPILING=true -DCMAKE_CXX_FLAGS='-mcpu=power10'
+clear && make -j && ctest -j && cd .. && rm -rf build_ppc10
+
+
+echo Success
diff --git a/third-party/libjxl/libjxl/third_party/lcms/AUTHORS b/third-party/libjxl/libjxl/third_party/lcms/AUTHORS
new file mode 100644
index 0000000000..dd801a9d1a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/AUTHORS
@@ -0,0 +1,52 @@
+
+Main Author
+------------
+Marti Maria 
+
+
+Contributors 
+------------
+Bob Friesenhahn 
+Kai-Uwe Behrmann
+Stuart Nixon
+Jordi Vilar
+Richard Hughes
+Auke Nauta
+Chris Evans (Google)
+Lorenzo Ridolfi 
+Robin Watts (Artifex)
+Shawn Pedersen 
+Andrew Brygin 
+Samuli Suominen 
+Florian H�ch
+Aurelien Jarno 
+Claudiu Cebuc
+Michael Vhrel (Artifex)
+Michal Cihar 
+Daniel Kaneider 
+Mateusz Jurczyk (Google)
+Paul Miller
+S�bastien L�on
+Christian Schmitz
+XhmikosR
+Stanislav Brabec (SuSe)
+Leonhard Gruenschloss (Google)
+Patrick Noffke
+Christopher James Halse Rogers
+John Hein
+Thomas Weber (Debian)
+Mark Allen
+Noel Carboni
+Sergei Trofimovic
+
+Special Thanks 
+--------------
+Artifex software
+AlienSkin software
+Jan Morovic
+Jos Vernon (WebSupergoo)
+Harald Schneider (Maxon)
+Christian Albrecht 
+Dimitrios Anastassakis 
+Lemke Software 
+Tim Zaman
diff --git a/third-party/libjxl/libjxl/third_party/lcms/COPYING b/third-party/libjxl/libjxl/third_party/lcms/COPYING
new file mode 100644
index 0000000000..fda5c9eb57
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/COPYING
@@ -0,0 +1,8 @@
+Little CMS
+Copyright (c) 1998-2011 Marti Maria Saguer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/ChangeLog b/third-party/libjxl/libjxl/third_party/lcms/ChangeLog
new file mode 100644
index 0000000000..6739c229ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/ChangeLog
@@ -0,0 +1,193 @@
+
+-----------------------
+2.9 Maintenance release
+-----------------------
+Several fixes related with security, and therefore not detailed here.
+C++ compiles now without warnings
+Added OSX and clang in travis
+Added a travis-ci test matrix for many compilers and OS. Thanks to Thomas Weber (debian) for this great improvement.
+testbed can now work with dynamic versions of library 
+Fixed wrong planar formatters regarding linestride interpretation
+
+-----------------------
+2.8 Featured release
+-----------------------
+
+Changed ChangeLog direction
+Fixed many typos in comments, thanks to Stefan Weil for doing that.
+Fixed localization bug, added a new test case crayons.icc thanks to Richard Hughes for providing the profile. 
+Fixed a bug in optimizer that made some formats (i.e, bits planar) unavailable
+Fixed misalignment problems on Alpha. The compiler does not align strings, and accessing begin of string as a uint16 makes code to fail.
+Added some extra checks to the tools and examples.
+Fixed a bug that prevented to read luminance tag
+BIG amount of functionality contributed/Sponsored  by Alien Skin Software: TransformStride, copyAlpha, performance plug-ins. Fixes some warnings as well.
+Added an extra _ to _stdcall to make it more portable
+Fixed a bug in transicc for named color profiles
+Fixed several compiler warnings
+Added support for Visual Studio 2015
+Fixed for XCODE project
+
+-----------------------
+2.7 Maintenance release
+-----------------------
+
+Added a version retrieval function 
+Added an option in transicc for working in bounded mode
+Fixed wrong handling of extra channels in some formatters.
+Added a project for VS2013
+Added license for iccjpeg.c
+New project for mac
+Added a global optimization that merges consecutive matrices in pipelines. Fixes loss of precision in some transforms
+Added a flag  to clip negative values in unbounded transforms (only gray, rgb, cmyk)
+Move unused var suppresor before the `return` statements.
+Remove dead code.
+Add missing comma in CGATS parser    
+utils/jpgicc/iccjpeg.c: Fix check if unsigned variable 'total_length'… …
+Some maintenance fixes
+Remove unused vcproj files
+Added a function to retrieve the iohandler of a given profile object
+Added a safety check on named color lists
+Fixed a macro clash on SNONE. 
+Fixed a possible segmentation fault in a non-happy path
+
+-----------------------
+2.6 Featured release
+-----------------------
+
+Added pthread dependency. From now lcms supports multithreading
+Fix for delete tag memory corruption
+Added directories for tiff, jpeg in configure script
+New locking plug-in, from Artifex
+Big revamp on Contexts, from Artifex
+Fixed memory leaks on error handling
+Changed endianness detection for PowerPC
+Added a way to retrieve matrix shaper always, no matter LUT is present
+Fixed a bug in PCS/Colorspace order when reading V2 Lab devicelinks
+Fixed some indexing out of bounds in floating point interpolation
+Fixed a double free in recovering from a previous error in default intent handler.
+
+-----------------------
+2.5 Maintenance release
+-----------------------
+
+Added some checks for non-happy path, mostly failing mallocs
+Transform2Devicelink now keeps white point when guessing deviceclass is enabled
+Rendering intent used when creating the transform is now propagated to profile header in cmsTransform2Devicelink. This is because 7.2.15 in spec 4.3
+Added a simple project for cppcheck
+Added support for VS2012
+Remove spurious tabs added by git merge of pull request
+Fixed a bug in parametric curves
+Added some fixes from XhmikosR
+Added TIFF Lab16 handling on tifficc
+More changes from Artifex
+Added identity curves support for write V2 LUT 
+Added a way to read the profile creator from header
+Added a reference for Mac MLU tag
+Fixed devicelink generation for 8 bits
+Several minor issues found by cppcheck
+Several improvements in cgats parser.
+Fixed some bugs on floating point curves.
+Fixed a bug on  big endian platforms not supporting uint64 or long long.
+Added error descriptions on cmsSmoothToneCurve 
+Added new cmsPlugInTHR() and fixed some race conditions (thanks to Artifex)
+update black point detection algorithm to reflect ICC changes
+Fixed some 64 bit warnings on size_t to uint32 conversions
+Fixed a multithead bug on optimization (StageDEF)
+RGB profiles using same tone curves for several channels are storing now only one copy of the curve (saves space)
+User defined parametric curves can now be saved in ICC profiles.
+
+--------------------
+2.4 Featured release
+--------------------
+
+Added a check for maximum input channels
+Fixed an uninitialized read on PatchLUT
+Fixed a bug in XYZ floating point PCS
+added half float variants (ABGR and so)
+Added formatter resolution after xform optimization plugin
+Fixed a bug in transicc when clot tables are present
+Added a conditional compilatio flag for "half" support
+Fixed a bug on named color profiles.
+Fixed a typo on tificc and jpgicc names, thanks to Elle Stone for reporting.
+Added half float support
+Increased security checks, thanks to Mateusz Jurczyk, from Google.
+Fixed a bug on IT8 reading of negative numbers.
+Fixed a bug on ending zero when saving a IT8 to memory
+Internal stage structs are now accessible through plug-in API
+Added a new plug-in type
+Added getPipelineContextID
+Fixed a bug in pipeline duplication
+gamma 1.0 can now operate in unbounded mode
+Exposed internal overview table for tone curves
+Added a new plug in entry for full transform
+Added support for transforms on planar data with different stride
+Added black point detection algorithm from Adobe paper
+Fixed a bug in black preservation checking
+Added performance improvements from several contributors, mostly Artifex
+Fixed uint64 to work in systems without long long native type
+Fixed a bug in the named color devicelink generation
+
+-----------------------
+2.3 Maintenance release
+-----------------------
+
+Added compatibility with Argyll's CGATS parser
+Updated to ICC spec 4.3
+Adding a memory alignment macro for CGATS parser
+Fixed a bug on the range of data in transicc, when colorant tag is specified
+Fixed Absolute colorimetric intent issues
+Fixed encoding for floating point tags in Lab/XYZ 
+Fixed a 0 byte allocation issue in _cmsCreateSubAllocChunk
+
+-----------------------
+2.2 Maintenance release
+-----------------------
+
+Pascal unit now is supported by Free Pascal Compiler
+Fixed a bug on ReadRAWtag 
+Added dictionary metatag support
+Fixed a bug in black preservation and slightly non-monotonic curves
+Added named color functionality
+Fixed a bug that made crash black preservation on CMYK2CMYK devicelinks
+Added functions to retrieve formatters from transforms
+Profiles with weird curves are not prone to p`relinearization optimization.
+changed memmove to memcpy in cache for xput improvement 
+Fixed GBD bug (out of bounds memory overwrite) 
+Fixed some potential issues as NULL dereferencing
+Updated linkicc to 2.1, cleanup
+Removed pthreads need
+Fixed severa bugs in absolute colorimetric intent
+
+-----------------------
+2.1 Maintenance release
+-----------------------
+
+Added bound check in floating point interpolation
+Fixed a bug on curve reversing when source curves have few points
+Added Duotone support (Bilinear interpolation)
+Fixed delphi interface
+linkicc now stores the rendering intent in the profile header
+Fixed several integer overflow and other integrity checks, thanks to Chris Evans
+Fixed an issue on curve inversion
+Fixed memory leaks on when recovering from errors
+Fixed a bug in psid and profile sequence tags 
+Fixed a bug in device link creation on v4 profiles
+Fixed a bug in tificc in floating point formats
+Peliminary Delphi wrapper 
+Fixed some typos in error messages
+Added cmsTagLinkedTo
+Fixed VC2010, VC2008 projects
+Added a check on jpgicc for NULL transforms
+Added UTILS_UNUSED_PARAMETER for samples
+Added cmsChangeBufferFormat for backwards compatibility
+Fixed a bug on Lab + Alpha float formatters, added such predefined formatters as well
+Fixed a bug on transicc that made profiles with output colorants info to malfunction
+Fixed a bug that prevented linkicc to work
+Fixed a bug on V2 CHAD construction, affects absolute colorimetric intent
+
+-----------------------
+2.0 Major version bump
+-----------------------
+
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/INSTALL b/third-party/libjxl/libjxl/third_party/lcms/INSTALL
new file mode 100644
index 0000000000..2e5ee4adf9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/INSTALL
@@ -0,0 +1,2 @@
+
+ Please see the documentation in doc folder
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Lib/BC/BC.txt b/third-party/libjxl/libjxl/third_party/lcms/Lib/BC/BC.txt
new file mode 100644
index 0000000000..146228d6af
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Lib/BC/BC.txt
@@ -0,0 +1 @@
+BC
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Lib/MS/MS.TXT b/third-party/libjxl/libjxl/third_party/lcms/Lib/MS/MS.TXT
new file mode 100644
index 0000000000..32dfbc9fa6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Lib/MS/MS.TXT
@@ -0,0 +1 @@
+MS
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/Makefile.am
new file mode 100644
index 0000000000..bf3c37b0a4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Makefile.am
@@ -0,0 +1,55 @@
+#
+# Top-Level Makefile for building LittleCMS 2
+#
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7.2 dist-zip foreign
+
+ACLOCAL_AMFLAGS = -I m4
+
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+
+# Directories containing Makefiles to 'make'
+SUBDIRS = src include utils/tificc utils/transicc utils/linkicc utils/jpgicc utils/psicc testbed
+
+# Additional files to distribute
+EXTRA_DIST = AUTHORS COPYING ChangeLog doc Projects include bin Lib INSTALL README.1ST autogen.sh lcms2.pc.in
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = lcms2.pc
+
+# Get names of plug-ins in the source package
+PLUGIN_DIRECTORIES = $(sort $(dir $(wildcard plugins/*/)))
+
+# Make sure get rid of VC stuff...
+clean-local:
+	find Projects -name "*.user" | xargs rm -rf
+	find Projects -name "Release" | xargs rm -rf
+	find Projects -name "Debug" | xargs rm -rf
+	find Projects -name "*.aps" | xargs rm -rf
+	find Projects -name "*.suo" | xargs rm -rf
+	find Projects -name "*.log" | xargs rm -rf
+	find Projects -name "*.sdf" | xargs rm -rf
+	find Projects -name "*.opensdf" | xargs rm -rf
+	find Projects -name "*.log" | xargs rm -rf
+	find Projects -name "ipch" | xargs rm -rf
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) clean ); done
+
+# Handle plug-ins
+all-local: 
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) all ); done
+	
+check-local: 
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) check ); done
+	
+install-exec-local:
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) install-exec ); done
+
+uninstall-local:
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) uninstall ); done
+
+
+
+	
+	
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/Makefile.in
new file mode 100644
index 0000000000..5ba7745977
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Makefile.in
@@ -0,0 +1,923 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Top-Level Makefile for building LittleCMS 2
+#
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
+	$(am__configure_deps) $(am__DIST_COMMON)
+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+ configure.lineno config.status.lineno
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES = lcms2.pc
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(pkgconfigdir)"
+DATA = $(pkgconfig_DATA)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	cscope distdir dist dist-all distcheck
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+CSCOPE = cscope
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/lcms2.pc.in AUTHORS \
+	COPYING ChangeLog INSTALL compile config.guess config.sub \
+	depcomp install-sh ltmain.sh missing
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+am__remove_distdir = \
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
+am__post_remove_distdir = $(am__remove_distdir)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+DIST_ARCHIVES = $(distdir).tar.gz $(distdir).zip
+GZIP_ENV = --best
+DIST_TARGETS = dist-gzip dist-zip
+distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
+distcleancheck_listfiles = find . -type f -print
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7.2 dist-zip foreign
+ACLOCAL_AMFLAGS = -I m4
+
+# Directories containing Makefiles to 'make'
+SUBDIRS = src include utils/tificc utils/transicc utils/linkicc utils/jpgicc utils/psicc testbed
+
+# Additional files to distribute
+EXTRA_DIST = AUTHORS COPYING ChangeLog doc Projects include bin Lib INSTALL README.1ST autogen.sh lcms2.pc.in
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = lcms2.pc
+
+# Get names of plug-ins in the source package
+PLUGIN_DIRECTORIES = $(sort $(dir $(wildcard plugins/*/)))
+all: all-recursive
+
+.SUFFIXES:
+am--refresh: Makefile
+	@:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \
+	      $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    echo ' $(SHELL) ./config.status'; \
+	    $(SHELL) ./config.status;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	$(SHELL) ./config.status --recheck
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	$(am__cd) $(srcdir) && $(AUTOCONF)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+$(am__aclocal_m4_deps):
+lcms2.pc: $(top_builddir)/config.status $(srcdir)/lcms2.pc.in
+	cd $(top_builddir) && $(SHELL) ./config.status $@
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool config.lt
+install-pkgconfigDATA: $(pkgconfig_DATA)
+	@$(NORMAL_INSTALL)
+	@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \
+	  $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \
+	done
+
+uninstall-pkgconfigDATA:
+	@$(NORMAL_UNINSTALL)
+	@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir)
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscope: cscope.files
+	test ! -s cscope.files \
+	  || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
+clean-cscope:
+	-rm -f cscope.files
+cscope.files: clean-cscope cscopelist
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+	-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
+
+distdir: $(DISTFILES)
+	$(am__remove_distdir)
+	test -d "$(distdir)" || mkdir "$(distdir)"
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+	-test -n "$(am__skip_mode_fix)" \
+	|| find "$(distdir)" -type d ! -perm -755 \
+		-exec chmod u+rwx,go+rx {} \; -o \
+	  ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+	  ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+	|| chmod -R a+r "$(distdir)"
+dist-gzip: distdir
+	tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
+	$(am__post_remove_distdir)
+
+dist-bzip2: distdir
+	tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+	$(am__post_remove_distdir)
+
+dist-lzip: distdir
+	tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
+	$(am__post_remove_distdir)
+
+dist-xz: distdir
+	tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
+	$(am__post_remove_distdir)
+
+dist-tarZ: distdir
+	@echo WARNING: "Support for distribution archives compressed with" \
+		       "legacy program 'compress' is deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
+	$(am__post_remove_distdir)
+
+dist-shar: distdir
+	@echo WARNING: "Support for shar distribution archives is" \
+	               "deprecated." >&2
+	@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
+	shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
+	$(am__post_remove_distdir)
+dist-zip: distdir
+	-rm -f $(distdir).zip
+	zip -rq $(distdir).zip $(distdir)
+	$(am__post_remove_distdir)
+
+dist dist-all:
+	$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
+	$(am__post_remove_distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+	case '$(DIST_ARCHIVES)' in \
+	*.tar.gz*) \
+	  GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
+	*.tar.bz2*) \
+	  bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
+	*.tar.lz*) \
+	  lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
+	*.tar.xz*) \
+	  xz -dc $(distdir).tar.xz | $(am__untar) ;;\
+	*.tar.Z*) \
+	  uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
+	*.shar.gz*) \
+	  GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
+	*.zip*) \
+	  unzip $(distdir).zip ;;\
+	esac
+	chmod -R a-w $(distdir)
+	chmod u+w $(distdir)
+	mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
+	chmod a-w $(distdir)
+	test -d $(distdir)/_build || exit 0; \
+	dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
+	  && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
+	  && am__cwd=`pwd` \
+	  && $(am__cd) $(distdir)/_build/sub \
+	  && ../../configure \
+	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
+	    $(DISTCHECK_CONFIGURE_FLAGS) \
+	    --srcdir=../.. --prefix="$$dc_install_base" \
+	  && $(MAKE) $(AM_MAKEFLAGS) \
+	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
+	  && $(MAKE) $(AM_MAKEFLAGS) check \
+	  && $(MAKE) $(AM_MAKEFLAGS) install \
+	  && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+	  && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+	  && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
+	        distuninstallcheck \
+	  && chmod -R a-w "$$dc_install_base" \
+	  && ({ \
+	       (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
+	       && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
+	            distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
+	      } || { rm -rf "$$dc_destdir"; exit 1; }) \
+	  && rm -rf "$$dc_destdir" \
+	  && $(MAKE) $(AM_MAKEFLAGS) dist \
+	  && rm -rf $(DIST_ARCHIVES) \
+	  && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
+	  && cd "$$am__cwd" \
+	  || exit 1
+	$(am__post_remove_distdir)
+	@(echo "$(distdir) archives ready for distribution: "; \
+	  list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
+	  sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
+distuninstallcheck:
+	@test -n '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: trying to run $@ with an empty' \
+	       '$$(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	$(am__cd) '$(distuninstallcheck_dir)' || { \
+	  echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+	  exit 1; \
+	}; \
+	test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
+	   || { echo "ERROR: files left after uninstall:" ; \
+	        if test -n "$(DESTDIR)"; then \
+	          echo "  (check DESTDIR support)"; \
+	        fi ; \
+	        $(distuninstallcheck_listfiles) ; \
+	        exit 1; } >&2
+distcleancheck: distclean
+	@if test '$(srcdir)' = . ; then \
+	  echo "ERROR: distcleancheck can only run from a VPATH build" ; \
+	  exit 1 ; \
+	fi
+	@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
+	  || { echo "ERROR: files left in build directory after distclean:" ; \
+	       $(distcleancheck_listfiles) ; \
+	       exit 1; } >&2
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) check-local
+check: check-recursive
+all-am: Makefile $(DATA) all-local
+installdirs: installdirs-recursive
+installdirs-am:
+	for dir in "$(DESTDIR)$(pkgconfigdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool clean-local mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-libtool \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am: install-pkgconfigDATA
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am: install-exec-local
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+	-rm -rf $(top_srcdir)/autom4te.cache
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-local uninstall-pkgconfigDATA
+
+.MAKE: $(am__recursive_targets) check-am install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \
+	am--refresh check check-am check-local clean clean-cscope \
+	clean-generic clean-libtool clean-local cscope cscopelist-am \
+	ctags ctags-am dist dist-all dist-bzip2 dist-gzip dist-lzip \
+	dist-shar dist-tarZ dist-xz dist-zip distcheck distclean \
+	distclean-generic distclean-libtool distclean-tags \
+	distcleancheck distdir distuninstallcheck dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-exec-local install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-pkgconfigDATA install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs installdirs-am maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-generic \
+	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
+	uninstall-am uninstall-local uninstall-pkgconfigDATA
+
+.PRECIOUS: Makefile
+
+
+# Make sure get rid of VC stuff...
+clean-local:
+	find Projects -name "*.user" | xargs rm -rf
+	find Projects -name "Release" | xargs rm -rf
+	find Projects -name "Debug" | xargs rm -rf
+	find Projects -name "*.aps" | xargs rm -rf
+	find Projects -name "*.suo" | xargs rm -rf
+	find Projects -name "*.log" | xargs rm -rf
+	find Projects -name "*.sdf" | xargs rm -rf
+	find Projects -name "*.opensdf" | xargs rm -rf
+	find Projects -name "*.log" | xargs rm -rf
+	find Projects -name "ipch" | xargs rm -rf
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) clean ); done
+
+# Handle plug-ins
+all-local: 
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) all ); done
+
+check-local: 
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) check ); done
+
+install-exec-local:
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) install-exec ); done
+
+uninstall-local:
+	@for d in $(PLUGIN_DIRECTORIES); do (cd $$d; $(MAKE) $(AM_MAKEFLAGS) uninstall ); done
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/.gitignore b/third-party/libjxl/libjxl/third_party/lcms/Projects/.gitignore
new file mode 100644
index 0000000000..d684b5c0f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/.gitignore
@@ -0,0 +1,6 @@
+**.opensdf
+**.sdf
+**.suo
+**.user
+Debug/
+Release/
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcms2.rc
new file mode 100644
index 0000000000..ed94a0c2af
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcms2.rc
@@ -0,0 +1,30 @@
+
+
+1 VERSIONINFO
+FILEVERSION 2, 8, 0, 0
+PRODUCTVERSION 2, 8, 0, 0
+FILEOS VOS_NT_WINDOWS32
+FILETYPE VFT_DLL
+{
+ BLOCK "StringFileInfo"
+ {
+  BLOCK "040904E4"
+  {
+   VALUE "CompanyName", "Marti Maria\000\000"
+   VALUE "FileDescription", "lcms color engine\000"
+   VALUE "FileVersion", "2.08\000\000"
+   VALUE "InternalName", "lcms2\000"
+   VALUE "LegalCopyright", "Copyright � Marti Maria 2015\000\000"
+   VALUE "OriginalFilename", "lcms2.dll\000"
+  }
+
+ }
+
+ BLOCK "VarFileInfo"
+ {
+  VALUE "Translation", 0x409, 1252
+ }
+
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lk b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lk
new file mode 100644
index 0000000000..ed4c0fcf63
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lk
@@ -0,0 +1,31 @@
+/x/aa/c/Tpd C0D32.OBJ+
+cmsalpha.obj+
+cmscam02.obj+
+cmscgats.obj+
+cmscnvrt.obj+
+cmserr.obj+
+cmsgamma.obj+
+cmsgmt.obj+
+cmshalf.obj+
+cmsintrp.obj+
+cmsio0.obj+
+cmsio1.obj+
+cmslut.obj+
+cmsmd5.obj+
+cmsmtrx.obj+
+cmsnamed.obj+
+cmsopt.obj+
+cmspack.obj+
+cmspcs.obj+
+cmsplugin.obj+
+cmsps2.obj+
+cmssamp.obj+
+cmssm.obj+
+cmstypes.obj+
+cmsvirt.obj+
+cmswtpnt.obj+
+cmsxform.obj
+..\..\bin\lcms2.dll
+
+cw32mt.lib import32.lib
+..\..\src\lcms2.def
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lst b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lst
new file mode 100644
index 0000000000..1c14947614
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/lcmsdll.lst
@@ -0,0 +1,29 @@
+-5 -C -DCMS_DLL -DCMS_DLL_BUILD
+-I..\..\include -K -O2 -a8 -d -ff -w -wucp -wsig -wdef -wnod -wamb				
+-OS	-RT- -R- -tWM -tWD  -w- -x- -c 			
+..\..\src\cmscam02.c
+..\..\src\cmscgats.c
+..\..\src\cmscnvrt.c
+..\..\src\cmserr.c
+..\..\src\cmsgamma.c
+..\..\src\cmsgmt.c
+..\..\src\cmsintrp.c
+..\..\src\cmsio0.c
+..\..\src\cmsio1.c
+..\..\src\cmslut.c
+..\..\src\cmsmd5.c
+..\..\src\cmsmtrx.c
+..\..\src\cmsnamed.c
+..\..\src\cmsopt.c
+..\..\src\cmspack.c
+..\..\src\cmspcs.c
+..\..\src\cmsplugin.c
+..\..\src\cmsps2.c
+..\..\src\cmssamp.c
+..\..\src\cmssm.c
+..\..\src\cmstypes.c
+..\..\src\cmsvirt.c
+..\..\src\cmswtpnt.c
+..\..\src\cmsxform.c
+..\..\src\cmshalf.c
+..\..\src\cmsalpha.c
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/mklcmsdll.bat b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/mklcmsdll.bat
new file mode 100644
index 0000000000..6db2f72470
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/BorlandC_5.5/mklcmsdll.bat
@@ -0,0 +1,14 @@
+@echo off
+echo.
+echo This will build the littlecms DLL using Borland C 5.5 compiler.
+echo.
+echo Press Ctrl-C to abort, or
+pause
+bcc32 @lcmsdll.lst
+if errorlevel 0 ilink32 @lcmsdll.lk
+if errorlevel 0 brc32 -fe ..\..\bin\lcms2.dll lcms2.rc
+del *.obj
+del *.res
+echo Done!
+								
+					
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj
new file mode 100644
index 0000000000..3ab16a26ce
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj
@@ -0,0 +1,201 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{62812507-F926-4968-96A9-17678460AD90}</ProjectGuid>
+    <RootNamespace>jpegicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);C:\jpeg-8d\</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);C:\jpeg-8d\</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);C:\jpeg-8d\</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);C:\jpeg-8d\</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj.filters
new file mode 100644
index 0000000000..a05c36d6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/jpegicc/jpegicc.vcxproj.filters
@@ -0,0 +1,31 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.rc
new file mode 100644
index 0000000000..75cb57a19b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.rc
@@ -0,0 +1,104 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#define APSTUDIO_HIDDEN_SYMBOLS
+#include "windows.h"
+#undef APSTUDIO_HIDDEN_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Spanish (Spain, International Sort) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ESN)
+LANGUAGE LANG_SPANISH, SUBLANG_SPANISH_MODERN
+#pragma code_page(1252)
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+1 VERSIONINFO
+ FILEVERSION 2,8,0,0
+ PRODUCTVERSION 2,8,0,0
+ FILEFLAGSMASK 0x0L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "CompanyName", "Marti Maria"
+            VALUE "FileDescription", "lcms color engine"
+            VALUE "FileVersion", "2.9.0.0"
+            VALUE "InternalName", "lcms"
+            VALUE "LegalCopyright", "Copyright � Marti Maria 2017"
+            VALUE "OriginalFilename", "lcms2.dll"
+            VALUE "ProductName", "LittleCMS color engine"
+            VALUE "ProductVersion", "2.9.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#define APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "#include ""windows.h""\r\n"
+    "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Spanish (Spain, International Sort) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.sln b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.sln
new file mode 100755
index 0000000000..3bf82f4882
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2.sln differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj
new file mode 100644
index 0000000000..9c67c79ce8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj
@@ -0,0 +1,236 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8C51BE48-ADB8-4089-A9EC-F6BF993A0548}</ProjectGuid>
+    <RootNamespace>lcms2_DLL</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">lcms2</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">lcms2</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">lcms2</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">lcms2</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+    <ClInclude Include="..\resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj.filters
new file mode 100644
index 0000000000..5ac8610d9e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_DLL/lcms2_DLL.vcxproj.filters
@@ -0,0 +1,118 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\resource.h">
+      <Filter>Resource Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <Filter>Source Files</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj
new file mode 100644
index 0000000000..f0aebf96e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj
@@ -0,0 +1,193 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{71DEDE59-3F1E-486B-A899-4283000F76B5}</ProjectGuid>
+    <RootNamespace>lcms2_static</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>
+      </MinimalRebuild>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj.filters
new file mode 100644
index 0000000000..c7e6901fb5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/lcms2_static/lcms2_static.vcxproj.filters
@@ -0,0 +1,105 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj
new file mode 100644
index 0000000000..5db9ea1837
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj
@@ -0,0 +1,182 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FBFBE1DC-DB84-4BA1-9552-B4780F457849}</ProjectGuid>
+    <RootNamespace>linkicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj.filters
new file mode 100644
index 0000000000..95c77cdbe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/linkicc/linkicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj
new file mode 100644
index 0000000000..2b4610f5c1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj
@@ -0,0 +1,182 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF6A8851-65FE-46F5-B9EF-14F0B671F693}</ProjectGuid>
+    <RootNamespace>psicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj.filters
new file mode 100644
index 0000000000..c42429d8be
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/psicc/psicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/resource.h b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/resource.h
new file mode 100644
index 0000000000..7655978dd4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by lcms2.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NO_MFC                     1
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj
new file mode 100644
index 0000000000..c26e3ccb76
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj
@@ -0,0 +1,201 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928A3A2B-46EF-4279-959C-513B3652FF0E}</ProjectGuid>
+    <RootNamespace>testbed</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\testbed\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\testbed\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c" />
+    <ClCompile Include="..\..\..\testbed\testplugin.c" />
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\testbed\testcms2.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj.filters
new file mode 100644
index 0000000000..fecb121905
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/testbed/testbed.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\testplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\testbed\testcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj
new file mode 100644
index 0000000000..311f83630a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj
@@ -0,0 +1,196 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{75B91835-CCD7-48BE-A606-A9C997D5DBEE}</ProjectGuid>
+    <RootNamespace>tiffdiff</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj.filters
new file mode 100644
index 0000000000..b7f9a80d10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tiffdiff/tiffdiff.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj
new file mode 100644
index 0000000000..9abce9210a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2256DE16-ED92-4A6F-9C54-F65BB61E64A2}</ProjectGuid>
+    <RootNamespace>tifficc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);;C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);;C:\tiff-4.0.2\libtiff</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);;C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);;C:\tiff-4.0.2\libtiff</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj.filters
new file mode 100644
index 0000000000..2e0e44d1ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/tifficc/tifficc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj
new file mode 100644
index 0000000000..5c27ff68d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj
@@ -0,0 +1,185 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9EE22D66-C849-474C-9ED5-C3E141DAB160}</ProjectGuid>
+    <RootNamespace>transicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj.filters
new file mode 100644
index 0000000000..3d45443026
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2010/transicc/transicc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj
new file mode 100755
index 0000000000..b13b0e10e1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj
@@ -0,0 +1,214 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{62812507-F926-4968-96A9-17678460AD90}</ProjectGuid>
+    <RootNamespace>jpegicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj.filters
new file mode 100755
index 0000000000..a05c36d6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/jpegicc/jpegicc.vcxproj.filters
@@ -0,0 +1,31 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.rc
new file mode 100755
index 0000000000..af5db8274b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.rc
@@ -0,0 +1,104 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#define APSTUDIO_HIDDEN_SYMBOLS
+#include "windows.h"
+#undef APSTUDIO_HIDDEN_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Spanish (Spain, International Sort) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ESN)
+LANGUAGE LANG_SPANISH, SUBLANG_SPANISH_MODERN
+#pragma code_page(1252)
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+1 VERSIONINFO
+ FILEVERSION 2,9,0,0
+ PRODUCTVERSION 2,9,0,0
+ FILEFLAGSMASK 0x0L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "CompanyName", "Marti Maria"
+            VALUE "FileDescription", "lcms color engine"
+            VALUE "FileVersion", "2.9.0.0"
+            VALUE "InternalName", "lcms"
+            VALUE "LegalCopyright", "Copyright � Marti Maria 2017"
+            VALUE "OriginalFilename", "lcms2.dll"
+            VALUE "ProductName", "LittleCMS color engine"
+            VALUE "ProductVersion", "2.9.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#define APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "#include ""windows.h""\r\n"
+    "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Spanish (Spain, International Sort) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.sln b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.sln
new file mode 100755
index 0000000000..aaf8d22f3e
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2.sln differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj
new file mode 100755
index 0000000000..9dcc657441
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj
@@ -0,0 +1,253 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8C51BE48-ADB8-4089-A9EC-F6BF993A0548}</ProjectGuid>
+    <RootNamespace>lcms2_DLL</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+    <ClInclude Include="..\resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj.filters
new file mode 100755
index 0000000000..5ac8610d9e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_DLL/lcms2_DLL.vcxproj.filters
@@ -0,0 +1,118 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\resource.h">
+      <Filter>Resource Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <Filter>Source Files</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj
new file mode 100755
index 0000000000..8e865864e7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj
@@ -0,0 +1,206 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{71DEDE59-3F1E-486B-A899-4283000F76B5}</ProjectGuid>
+    <RootNamespace>lcms2_static</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <StringPooling>true</StringPooling>
+      <FloatingPointModel>Precise</FloatingPointModel>
+      <FloatingPointExceptions>false</FloatingPointExceptions>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
+      <StringPooling>true</StringPooling>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj.filters
new file mode 100755
index 0000000000..c7e6901fb5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/lcms2_static/lcms2_static.vcxproj.filters
@@ -0,0 +1,105 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj
new file mode 100755
index 0000000000..28a151c8ca
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj
@@ -0,0 +1,191 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FBFBE1DC-DB84-4BA1-9552-B4780F457849}</ProjectGuid>
+    <RootNamespace>linkicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj.filters
new file mode 100755
index 0000000000..95c77cdbe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/linkicc/linkicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj
new file mode 100755
index 0000000000..5860571a4c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj
@@ -0,0 +1,191 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF6A8851-65FE-46F5-B9EF-14F0B671F693}</ProjectGuid>
+    <RootNamespace>psicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj.filters
new file mode 100755
index 0000000000..c42429d8be
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/psicc/psicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/resource.h b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/resource.h
new file mode 100755
index 0000000000..7655978dd4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by lcms2.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NO_MFC                     1
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj
new file mode 100755
index 0000000000..dabba79d55
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj
@@ -0,0 +1,207 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928A3A2B-46EF-4279-959C-513B3652FF0E}</ProjectGuid>
+    <RootNamespace>testbed</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\testbed\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\testbed\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c" />
+    <ClCompile Include="..\..\..\testbed\testplugin.c" />
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj.filters
new file mode 100755
index 0000000000..be19e1ee29
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/testbed/testbed.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\testplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj
new file mode 100755
index 0000000000..d4e8a4570e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj
@@ -0,0 +1,209 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{75B91835-CCD7-48BE-A606-A9C997D5DBEE}</ProjectGuid>
+    <RootNamespace>tiffdiff</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj.filters
new file mode 100755
index 0000000000..b7f9a80d10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tiffdiff/tiffdiff.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj
new file mode 100755
index 0000000000..b2f6d2fb4a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj
@@ -0,0 +1,210 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2256DE16-ED92-4A6F-9C54-F65BB61E64A2}</ProjectGuid>
+    <RootNamespace>tifficc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj.filters
new file mode 100755
index 0000000000..2e0e44d1ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/tifficc/tifficc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj
new file mode 100755
index 0000000000..86affa2dac
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj
@@ -0,0 +1,194 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9EE22D66-C849-474C-9ED5-C3E141DAB160}</ProjectGuid>
+    <RootNamespace>transicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj.filters
new file mode 100755
index 0000000000..3d45443026
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2012/transicc/transicc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj
new file mode 100755
index 0000000000..e0d92b3822
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj
@@ -0,0 +1,214 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{62812507-F926-4968-96A9-17678460AD90}</ProjectGuid>
+    <RootNamespace>jpegicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\jpeg-8d</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);;C:\jpeg-8d</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj.filters
new file mode 100755
index 0000000000..a05c36d6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/jpegicc/jpegicc.vcxproj.filters
@@ -0,0 +1,31 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.rc
new file mode 100755
index 0000000000..af5db8274b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.rc
@@ -0,0 +1,104 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#define APSTUDIO_HIDDEN_SYMBOLS
+#include "windows.h"
+#undef APSTUDIO_HIDDEN_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Spanish (Spain, International Sort) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ESN)
+LANGUAGE LANG_SPANISH, SUBLANG_SPANISH_MODERN
+#pragma code_page(1252)
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+1 VERSIONINFO
+ FILEVERSION 2,9,0,0
+ PRODUCTVERSION 2,9,0,0
+ FILEFLAGSMASK 0x0L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "CompanyName", "Marti Maria"
+            VALUE "FileDescription", "lcms color engine"
+            VALUE "FileVersion", "2.9.0.0"
+            VALUE "InternalName", "lcms"
+            VALUE "LegalCopyright", "Copyright � Marti Maria 2017"
+            VALUE "OriginalFilename", "lcms2.dll"
+            VALUE "ProductName", "LittleCMS color engine"
+            VALUE "ProductVersion", "2.9.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#define APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "#include ""windows.h""\r\n"
+    "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Spanish (Spain, International Sort) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.sln b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.sln
new file mode 100755
index 0000000000..d1b3869a94
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2.sln differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj
new file mode 100755
index 0000000000..efa7a9e278
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj
@@ -0,0 +1,253 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8C51BE48-ADB8-4089-A9EC-F6BF993A0548}</ProjectGuid>
+    <RootNamespace>lcms2_DLL</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+    <ClInclude Include="..\resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj.filters
new file mode 100755
index 0000000000..16408a1caa
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_DLL/lcms2_DLL.vcxproj.filters
@@ -0,0 +1,121 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\resource.h">
+      <Filter>Resource Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <Filter>Source Files</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj
new file mode 100755
index 0000000000..d86e3c0eed
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj
@@ -0,0 +1,207 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{71DEDE59-3F1E-486B-A899-4283000F76B5}</ProjectGuid>
+    <RootNamespace>lcms2_static</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <StringPooling>true</StringPooling>
+      <FloatingPointModel>Precise</FloatingPointModel>
+      <FloatingPointExceptions>false</FloatingPointExceptions>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
+      <StringPooling>true</StringPooling>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj.filters
new file mode 100755
index 0000000000..5366b06f21
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/lcms2_static/lcms2_static.vcxproj.filters
@@ -0,0 +1,108 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj
new file mode 100755
index 0000000000..fe4695269a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj
@@ -0,0 +1,191 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FBFBE1DC-DB84-4BA1-9552-B4780F457849}</ProjectGuid>
+    <RootNamespace>linkicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj.filters
new file mode 100755
index 0000000000..95c77cdbe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/linkicc/linkicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj
new file mode 100755
index 0000000000..0f24ed946a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj
@@ -0,0 +1,191 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF6A8851-65FE-46F5-B9EF-14F0B671F693}</ProjectGuid>
+    <RootNamespace>psicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj.filters
new file mode 100755
index 0000000000..c42429d8be
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/psicc/psicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/resource.h b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/resource.h
new file mode 100755
index 0000000000..7655978dd4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by lcms2.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NO_MFC                     1
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj
new file mode 100755
index 0000000000..502a1fe4af
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj
@@ -0,0 +1,207 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928A3A2B-46EF-4279-959C-513B3652FF0E}</ProjectGuid>
+    <RootNamespace>testbed</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\testbed\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\testbed\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c" />
+    <ClCompile Include="..\..\..\testbed\testplugin.c" />
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj.filters
new file mode 100755
index 0000000000..be19e1ee29
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/testbed/testbed.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\testplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj
new file mode 100755
index 0000000000..841a9d6a82
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj
@@ -0,0 +1,209 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{75B91835-CCD7-48BE-A606-A9C997D5DBEE}</ProjectGuid>
+    <RootNamespace>tiffdiff</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj.filters
new file mode 100755
index 0000000000..b7f9a80d10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tiffdiff/tiffdiff.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj
new file mode 100755
index 0000000000..b5a215e621
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj
@@ -0,0 +1,210 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2256DE16-ED92-4A6F-9C54-F65BB61E64A2}</ProjectGuid>
+    <RootNamespace>tifficc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj.filters
new file mode 100755
index 0000000000..2e0e44d1ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/tifficc/tifficc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj
new file mode 100755
index 0000000000..e5cbe738d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj
@@ -0,0 +1,194 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9EE22D66-C849-474C-9ED5-C3E141DAB160}</ProjectGuid>
+    <RootNamespace>transicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj.filters
new file mode 100755
index 0000000000..3d45443026
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2013/transicc/transicc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj
new file mode 100644
index 0000000000..ebe1e29197
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj
@@ -0,0 +1,214 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{62812507-F926-4968-96A9-17678460AD90}</ProjectGuid>
+    <RootNamespace>jpegicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj.filters
new file mode 100644
index 0000000000..a05c36d6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/jpegicc/jpegicc.vcxproj.filters
@@ -0,0 +1,31 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.rc
new file mode 100644
index 0000000000..2a762d0b23
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.rc
@@ -0,0 +1,104 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#define APSTUDIO_HIDDEN_SYMBOLS
+#include "windows.h"
+#undef APSTUDIO_HIDDEN_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Spanish (Spain, International Sort) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ESN)
+LANGUAGE LANG_SPANISH, SUBLANG_SPANISH_MODERN
+#pragma code_page(1252)
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+1 VERSIONINFO
+ FILEVERSION 2,9,0,0
+ PRODUCTVERSION 2,9,0,0
+ FILEFLAGSMASK 0x0L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "CompanyName", "Marti Maria"
+            VALUE "FileDescription", "lcms color engine"
+            VALUE "FileVersion", "2.9.0.0"
+            VALUE "InternalName", "lcms"
+            VALUE "LegalCopyright", "Copyright � Marti Maria 2017"
+            VALUE "OriginalFilename", "lcms2.dll"
+            VALUE "ProductName", "LittleCMS color engine"
+            VALUE "ProductVersion", "2.9.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#define APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "#include ""windows.h""\r\n"
+    "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Spanish (Spain, International Sort) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.sln b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.sln
new file mode 100644
index 0000000000..b48c204a95
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2.sln differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj
new file mode 100644
index 0000000000..8ea37b97d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj
@@ -0,0 +1,251 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8C51BE48-ADB8-4089-A9EC-F6BF993A0548}</ProjectGuid>
+    <RootNamespace>lcms2_DLL</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <StringPooling>false</StringPooling>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <StringPooling>false</StringPooling>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+    <ClInclude Include="..\resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj.filters
new file mode 100644
index 0000000000..255a147077
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_DLL/lcms2_DLL.vcxproj.filters
@@ -0,0 +1,121 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\resource.h">
+      <Filter>Resource Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <Filter>Source Files</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj
new file mode 100644
index 0000000000..bcfb7db62a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj
@@ -0,0 +1,208 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{71DEDE59-3F1E-486B-A899-4283000F76B5}</ProjectGuid>
+    <RootNamespace>lcms2_static</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <StringPooling>true</StringPooling>
+      <FloatingPointModel>Precise</FloatingPointModel>
+      <FloatingPointExceptions>false</FloatingPointExceptions>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
+      <StringPooling>true</StringPooling>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj.filters
new file mode 100644
index 0000000000..58d3cb7ee7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/lcms2_static/lcms2_static.vcxproj.filters
@@ -0,0 +1,108 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj
new file mode 100644
index 0000000000..18ee95568e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj
@@ -0,0 +1,192 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FBFBE1DC-DB84-4BA1-9552-B4780F457849}</ProjectGuid>
+    <RootNamespace>linkicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj.filters
new file mode 100644
index 0000000000..95c77cdbe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/linkicc/linkicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj
new file mode 100644
index 0000000000..938c31264d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj
@@ -0,0 +1,192 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF6A8851-65FE-46F5-B9EF-14F0B671F693}</ProjectGuid>
+    <RootNamespace>psicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj.filters
new file mode 100644
index 0000000000..c42429d8be
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/psicc/psicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/resource.h b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/resource.h
new file mode 100644
index 0000000000..7655978dd4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by lcms2.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NO_MFC                     1
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj
new file mode 100644
index 0000000000..63e979457d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj
@@ -0,0 +1,208 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928A3A2B-46EF-4279-959C-513B3652FF0E}</ProjectGuid>
+    <RootNamespace>testbed</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\testbed\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\testbed\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c" />
+    <ClCompile Include="..\..\..\testbed\testplugin.c" />
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj.filters
new file mode 100644
index 0000000000..993ee15119
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/testbed/testbed.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\testplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj
new file mode 100644
index 0000000000..63a5eef0c6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj
@@ -0,0 +1,210 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{75B91835-CCD7-48BE-A606-A9C997D5DBEE}</ProjectGuid>
+    <RootNamespace>tiffdiff</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj.filters
new file mode 100644
index 0000000000..b7f9a80d10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tiffdiff/tiffdiff.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj
new file mode 100644
index 0000000000..7de14c4b9a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj
@@ -0,0 +1,211 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2256DE16-ED92-4A6F-9C54-F65BB61E64A2}</ProjectGuid>
+    <RootNamespace>tifficc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);C:\code\tiff-4.0.3\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);C:\code\tiff-4.0.3\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);C:\code\tiff-4.0.3\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);C:\code\tiff-4.0.3\libtiff</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);C:\code\tiff-4.0.3\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);C:\code\tiff-4.0.3\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);C:\code\tiff-4.0.3\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);C:\code\tiff-4.0.3\libtiff</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj.filters
new file mode 100644
index 0000000000..2e0e44d1ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/tifficc/tifficc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj
new file mode 100644
index 0000000000..f96500605d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj
@@ -0,0 +1,196 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9EE22D66-C849-474C-9ED5-C3E141DAB160}</ProjectGuid>
+    <RootNamespace>transicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj.filters
new file mode 100644
index 0000000000..3d45443026
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2015/transicc/transicc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj
new file mode 100644
index 0000000000..39cfd00ca7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj
@@ -0,0 +1,215 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{62812507-F926-4968-96A9-17678460AD90}</ProjectGuid>
+    <RootNamespace>jpegicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);;C:\code\jpeg-9a</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);;C:\code\jpeg-9a</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libjpeg.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c" />
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj.filters
new file mode 100644
index 0000000000..a05c36d6d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/jpegicc/jpegicc.vcxproj.filters
@@ -0,0 +1,31 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\iccjpeg.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\jpgicc\jpgicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.rc b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.rc
new file mode 100644
index 0000000000..2a762d0b23
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.rc
@@ -0,0 +1,104 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#define APSTUDIO_HIDDEN_SYMBOLS
+#include "windows.h"
+#undef APSTUDIO_HIDDEN_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Spanish (Spain, International Sort) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ESN)
+LANGUAGE LANG_SPANISH, SUBLANG_SPANISH_MODERN
+#pragma code_page(1252)
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+1 VERSIONINFO
+ FILEVERSION 2,9,0,0
+ PRODUCTVERSION 2,9,0,0
+ FILEFLAGSMASK 0x0L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "CompanyName", "Marti Maria"
+            VALUE "FileDescription", "lcms color engine"
+            VALUE "FileVersion", "2.9.0.0"
+            VALUE "InternalName", "lcms"
+            VALUE "LegalCopyright", "Copyright � Marti Maria 2017"
+            VALUE "OriginalFilename", "lcms2.dll"
+            VALUE "ProductName", "LittleCMS color engine"
+            VALUE "ProductVersion", "2.9.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#define APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "#include ""windows.h""\r\n"
+    "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Spanish (Spain, International Sort) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.sln b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.sln
new file mode 100644
index 0000000000..b48c204a95
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2.sln differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj
new file mode 100644
index 0000000000..d1bf3eb616
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj
@@ -0,0 +1,251 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8C51BE48-ADB8-4089-A9EC-F6BF993A0548}</ProjectGuid>
+    <RootNamespace>lcms2_DLL</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <TargetName>lcms2</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <StringPooling>false</StringPooling>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <StringPooling>false</StringPooling>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>..\..\..\src\lcms2.def</ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;CMS_DLL_BUILD;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <StringPooling>true</StringPooling>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Windows</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+    <ClInclude Include="..\resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj.filters
new file mode 100644
index 0000000000..255a147077
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_DLL/lcms2_DLL.vcxproj.filters
@@ -0,0 +1,121 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\resource.h">
+      <Filter>Resource Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="..\lcms2.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\src\lcms2.def">
+      <Filter>Source Files</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj
new file mode 100644
index 0000000000..9fc05ce35d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj
@@ -0,0 +1,208 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{71DEDE59-3F1E-486B-A899-4283000F76B5}</ProjectGuid>
+    <RootNamespace>lcms2_static</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\Lib\MS\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\Lib\MS\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FunctionLevelLinking>
+      </FunctionLevelLinking>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <StringPooling>true</StringPooling>
+      <FloatingPointModel>Precise</FloatingPointModel>
+      <FloatingPointExceptions>false</FloatingPointExceptions>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ExceptionHandling>false</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
+      <StringPooling>true</StringPooling>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmsalpha.c" />
+    <ClCompile Include="..\..\..\src\cmscam02.c" />
+    <ClCompile Include="..\..\..\src\cmscgats.c" />
+    <ClCompile Include="..\..\..\src\cmscnvrt.c" />
+    <ClCompile Include="..\..\..\src\cmserr.c" />
+    <ClCompile Include="..\..\..\src\cmsgamma.c" />
+    <ClCompile Include="..\..\..\src\cmsgmt.c" />
+    <ClCompile Include="..\..\..\src\cmshalf.c" />
+    <ClCompile Include="..\..\..\src\cmsintrp.c" />
+    <ClCompile Include="..\..\..\src\cmsio0.c" />
+    <ClCompile Include="..\..\..\src\cmsio1.c" />
+    <ClCompile Include="..\..\..\src\cmslut.c" />
+    <ClCompile Include="..\..\..\src\cmsmd5.c" />
+    <ClCompile Include="..\..\..\src\cmsmtrx.c" />
+    <ClCompile Include="..\..\..\src\cmsnamed.c" />
+    <ClCompile Include="..\..\..\src\cmsopt.c" />
+    <ClCompile Include="..\..\..\src\cmspack.c" />
+    <ClCompile Include="..\..\..\src\cmspcs.c" />
+    <ClCompile Include="..\..\..\src\cmsplugin.c" />
+    <ClCompile Include="..\..\..\src\cmsps2.c" />
+    <ClCompile Include="..\..\..\src\cmssamp.c" />
+    <ClCompile Include="..\..\..\src\cmssm.c" />
+    <ClCompile Include="..\..\..\src\cmstypes.c" />
+    <ClCompile Include="..\..\..\src\cmsvirt.c" />
+    <ClCompile Include="..\..\..\src\cmswtpnt.c" />
+    <ClCompile Include="..\..\..\src\cmsxform.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h" />
+    <ClInclude Include="..\..\..\include\lcms2.h" />
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj.filters
new file mode 100644
index 0000000000..58d3cb7ee7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/lcms2_static/lcms2_static.vcxproj.filters
@@ -0,0 +1,108 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\src\cmscam02.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscgats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmscnvrt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmserr.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgamma.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsgmt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsintrp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsio1.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmslut.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmd5.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsmtrx.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsnamed.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspack.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmspcs.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsps2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssamp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmssm.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmstypes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsvirt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmswtpnt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsxform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmshalf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\src\cmsalpha.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\include\lcms2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\include\lcms2_plugin.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\lcms2_internal.h">
+      <Filter>Source Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj
new file mode 100644
index 0000000000..51586ddb61
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj
@@ -0,0 +1,192 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FBFBE1DC-DB84-4BA1-9552-B4780F457849}</ProjectGuid>
+    <RootNamespace>linkicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj.filters
new file mode 100644
index 0000000000..95c77cdbe2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/linkicc/linkicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\linkicc\linkicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj
new file mode 100644
index 0000000000..8f26e12874
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj
@@ -0,0 +1,192 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EF6A8851-65FE-46F5-B9EF-14F0B671F693}</ProjectGuid>
+    <RootNamespace>psicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c" />
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj.filters
new file mode 100644
index 0000000000..c42429d8be
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/psicc/psicc.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\psicc\psicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/resource.h b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/resource.h
new file mode 100644
index 0000000000..7655978dd4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by lcms2.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NO_MFC                     1
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj
new file mode 100644
index 0000000000..3f6aea3f6c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj
@@ -0,0 +1,208 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{928A3A2B-46EF-4279-959C-513B3652FF0E}</ProjectGuid>
+    <RootNamespace>testbed</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\testbed\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\testbed\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\testbed\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <Profile>false</Profile>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
+      <AdditionalIncludeDirectories>../../../include;../../../src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>Cdecl</CallingConvention>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <Profile>false</Profile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c" />
+    <ClCompile Include="..\..\..\testbed\testplugin.c" />
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj.filters
new file mode 100644
index 0000000000..993ee15119
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/testbed/testbed.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\testbed\testcms2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\testplugin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\testbed\zoo_icc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj
new file mode 100644
index 0000000000..3a6d837eb9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj
@@ -0,0 +1,210 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{75B91835-CCD7-48BE-A606-A9C997D5DBEE}</ProjectGuid>
+    <RootNamespace>tiffdiff</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;c:\tiff-4.0.2\libtiff;$(IncludePath)</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">C:\jpeg-8d;C:\tiff-4.0.2\libtiff;$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj.filters
new file mode 100644
index 0000000000..b7f9a80d10
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tiffdiff/tiffdiff.vcxproj.filters
@@ -0,0 +1,28 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tifdiff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj
new file mode 100644
index 0000000000..5ef954fde9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj
@@ -0,0 +1,211 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{2256DE16-ED92-4A6F-9C54-F65BB61E64A2}</ProjectGuid>
+    <RootNamespace>tifficc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IncludePath);C:\tiff-4.0.2\libtiff</IncludePath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+    <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);C:\tiff-4.0.2\libtiff</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>libtiff.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj.filters
new file mode 100644
index 0000000000..2e0e44d1ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/tifficc/tifficc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\tificc\tificc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj
new file mode 100644
index 0000000000..b3173d8602
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9EE22D66-C849-474C-9ED5-C3E141DAB160}</ProjectGuid>
+    <RootNamespace>transicc</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\bin\</OutDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\bin\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)_$(Platform)\</IntDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)_$(Platform)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <BrowseInformation>true</BrowseInformation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+    <Bscmake>
+      <PreserveSbr>true</PreserveSbr>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>../../../include;../../../utils/common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\vprf.c" />
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\lcms2_static\lcms2_static.vcxproj">
+      <Project>{71dede59-3f1e-486b-a899-4283000f76b5}</Project>
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj.filters b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj.filters
new file mode 100644
index 0000000000..3d45443026
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/VC2017/transicc/transicc.vcxproj.filters
@@ -0,0 +1,33 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\utils\common\vprf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\common\xgetopt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\utils\transicc\transicc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\utils\common\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/cppcheck/lcms2.cppcheck b/third-party/libjxl/libjxl/third_party/lcms/Projects/cppcheck/lcms2.cppcheck
new file mode 100755
index 0000000000..751884ff45
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/cppcheck/lcms2.cppcheck
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="1">
+    <includedir>
+        <dir name="../../include/"/>
+    </includedir>
+    <paths>
+        <dir name="../../src"/>
+    </paths>
+</project>
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/English.lproj/InfoPlist.strings b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/English.lproj/InfoPlist.strings
new file mode 100755
index 0000000000..045f111f6b
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/English.lproj/InfoPlist.strings differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/Info.plist b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/Info.plist
new file mode 100755
index 0000000000..1eaae0782e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/Info.plist
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>LittleCMS</string>
+	<key>CFBundleIconFile</key>
+	<string></string>
+	<key>CFBundleIdentifier</key>
+	<string>com.apple.carbonframeworktemplate</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>2.7</string>
+	<key>CFBundleSignature</key>
+	<string>lcms</string>
+	<key>CFBundleVersion</key>
+	<string>2.7</string>
+	<key>CSResourcesFileMapped</key>
+	<true/>
+</dict>
+</plist>
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/LittleCMS_Prefix.pch b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/LittleCMS_Prefix.pch
new file mode 100755
index 0000000000..40cdb9b313
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/LittleCMS_Prefix.pch
@@ -0,0 +1,5 @@
+//
+// Prefix header for all source files of the 'LittleCMS' target in the 'LittleCMS' project.
+//
+
+#include <Carbon/Carbon.h>
diff --git a/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/TestBed-Info.plist b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/TestBed-Info.plist
new file mode 100755
index 0000000000..aed5b9f416
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/Projects/mac/LittleCMS/TestBed-Info.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIdentifier</key>
+	<string>com.yourcompany.TestBed</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>BNDL</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+	<key>CSResourcesFileMapped</key>
+	<string>yes</string>
+</dict>
+</plist>
diff --git a/third-party/libjxl/libjxl/third_party/lcms/README.1ST b/third-party/libjxl/libjxl/third_party/lcms/README.1ST
new file mode 100644
index 0000000000..2e5ee4adf9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/README.1ST
@@ -0,0 +1,2 @@
+
+ Please see the documentation in doc folder
diff --git a/third-party/libjxl/libjxl/third_party/lcms/aclocal.m4 b/third-party/libjxl/libjxl/third_party/lcms/aclocal.m4
new file mode 100644
index 0000000000..766134617e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/aclocal.m4
@@ -0,0 +1,1199 @@
+# generated automatically by aclocal 1.15 -*- Autoconf -*-
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
+You have another version of autoconf.  It may work, but is not guaranteed to.
+If you have problems, you may need to regenerate the build system entirely.
+To do so, use the procedure documented by the package, typically 'autoreconf'.])])
+
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_AUTOMAKE_VERSION(VERSION)
+# ----------------------------
+# Automake X.Y traces this macro to ensure aclocal.m4 has been
+# generated from the m4 files accompanying Automake X.Y.
+# (This private macro should not be called outside this file.)
+AC_DEFUN([AM_AUTOMAKE_VERSION],
+[am__api_version='1.15'
+dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
+dnl require some minimum version.  Point them to the right macro.
+m4_if([$1], [1.15], [],
+      [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
+])
+
+# _AM_AUTOCONF_VERSION(VERSION)
+# -----------------------------
+# aclocal traces this macro to find the Autoconf version.
+# This is a private macro too.  Using m4_define simplifies
+# the logic in aclocal, which can simply ignore this definition.
+m4_define([_AM_AUTOCONF_VERSION], [])
+
+# AM_SET_CURRENT_AUTOMAKE_VERSION
+# -------------------------------
+# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
+# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
+AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
+[AM_AUTOMAKE_VERSION([1.15])dnl
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
+
+# AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
+# $ac_aux_dir to '$srcdir/foo'.  In other projects, it is set to
+# '$srcdir', '$srcdir/..', or '$srcdir/../..'.
+#
+# Of course, Automake must honor this variable whenever it calls a
+# tool from the auxiliary directory.  The problem is that $srcdir (and
+# therefore $ac_aux_dir as well) can be either absolute or relative,
+# depending on how configure is run.  This is pretty annoying, since
+# it makes $ac_aux_dir quite unusable in subdirectories: in the top
+# source directory, any form will work fine, but in subdirectories a
+# relative path needs to be adjusted first.
+#
+# $ac_aux_dir/missing
+#    fails when called from a subdirectory if $ac_aux_dir is relative
+# $top_srcdir/$ac_aux_dir/missing
+#    fails if $ac_aux_dir is absolute,
+#    fails when called from a subdirectory in a VPATH build with
+#          a relative $ac_aux_dir
+#
+# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
+# are both prefixed by $srcdir.  In an in-source build this is usually
+# harmless because $srcdir is '.', but things will broke when you
+# start a VPATH build or use an absolute $srcdir.
+#
+# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
+# iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
+#   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
+# and then we would define $MISSING as
+#   MISSING="\${SHELL} $am_aux_dir/missing"
+# This will work as long as MISSING is not called from configure, because
+# unfortunately $(top_srcdir) has no meaning in configure.
+# However there are other variables, like CC, which are often used in
+# configure, and could therefore not use this "fixed" $ac_aux_dir.
+#
+# Another solution, used here, is to always expand $ac_aux_dir to an
+# absolute PATH.  The drawback is that using absolute paths prevent a
+# configured tree to be moved without reconfiguration.
+
+AC_DEFUN([AM_AUX_DIR_EXPAND],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+])
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ([2.52])dnl
+ m4_if([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+       [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+
+# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be
+# written in clear, in which case automake, when reading aclocal.m4,
+# will think it sees a *use*, and therefore will trigger all it's
+# C support machinery.  Also note that it means that autoscan, seeing
+# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
+
+
+# _AM_DEPENDENCIES(NAME)
+# ----------------------
+# See how the compiler implements dependency checking.
+# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC".
+# We try a few techniques and use that to set a single cache variable.
+#
+# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
+# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
+# dependency, and given that the user is not expected to run this macro,
+# just rely on AC_PROG_CC.
+AC_DEFUN([_AM_DEPENDENCIES],
+[AC_REQUIRE([AM_SET_DEPDIR])dnl
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
+AC_REQUIRE([AM_MAKE_INCLUDE])dnl
+AC_REQUIRE([AM_DEP_TRACK])dnl
+
+m4_if([$1], [CC],   [depcc="$CC"   am_compiler_list=],
+      [$1], [CXX],  [depcc="$CXX"  am_compiler_list=],
+      [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
+      [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'],
+      [$1], [UPC],  [depcc="$UPC"  am_compiler_list=],
+      [$1], [GCJ],  [depcc="$GCJ"  am_compiler_list='gcc3 gcc'],
+                    [depcc="$$1"   am_compiler_list=])
+
+AC_CACHE_CHECK([dependency style of $depcc],
+               [am_cv_$1_dependencies_compiler_type],
+[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_$1_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
+  fi
+  am__universal=false
+  m4_case([$1], [CC],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac],
+    [CXX],
+    [case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac])
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_$1_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_$1_dependencies_compiler_type=none
+fi
+])
+AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
+AM_CONDITIONAL([am__fastdep$1], [
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
+])
+
+
+# AM_SET_DEPDIR
+# -------------
+# Choose a directory name for dependency files.
+# This macro is AC_REQUIREd in _AM_DEPENDENCIES.
+AC_DEFUN([AM_SET_DEPDIR],
+[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
+])
+
+
+# AM_DEP_TRACK
+# ------------
+AC_DEFUN([AM_DEP_TRACK],
+[AC_ARG_ENABLE([dependency-tracking], [dnl
+AS_HELP_STRING(
+  [--enable-dependency-tracking],
+  [do not reject slow dependency extractors])
+AS_HELP_STRING(
+  [--disable-dependency-tracking],
+  [speeds up one-time build])])
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
+AC_SUBST([AMDEPBACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
+AC_SUBST([am__nodep])dnl
+_AM_SUBST_NOTMAKE([am__nodep])dnl
+])
+
+# Generate code to set up dependency tracking.              -*- Autoconf -*-
+
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+
+# _AM_OUTPUT_DEPENDENCY_COMMANDS
+# ------------------------------
+AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
+[{
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  case $CONFIG_FILES in
+  *\'*) eval set x "$CONFIG_FILES" ;;
+  *)   set x $CONFIG_FILES ;;
+  esac
+  shift
+  for mf
+  do
+    # Strip MF so we end up with the name of the file.
+    mf=`echo "$mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile or not.
+    # We used to match only the files named 'Makefile.in', but
+    # some people rename them; so instead we look at the file content.
+    # Grep'ing the first line is not enough: some people post-process
+    # each Makefile.in and add a new line on top of each file to say so.
+    # Grep'ing the whole file is not good either: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
+      dirpart=`AS_DIRNAME("$mf")`
+    else
+      continue
+    fi
+    # Extract the definition of DEPDIR, am__include, and am__quote
+    # from the Makefile without running 'make'.
+    DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
+    test -z "$DEPDIR" && continue
+    am__include=`sed -n 's/^am__include = //p' < "$mf"`
+    test -z "$am__include" && continue
+    am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
+    # Find all dependency output files, they are included files with
+    # $(DEPDIR) in their names.  We invoke sed twice because it is the
+    # simplest approach to changing $(DEPDIR) to its actual value in the
+    # expansion.
+    for file in `sed -n "
+      s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
+	 sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
+      # Make sure the directory exists.
+      test -f "$dirpart/$file" && continue
+      fdir=`AS_DIRNAME(["$file"])`
+      AS_MKDIR_P([$dirpart/$fdir])
+      # echo "creating $dirpart/$file"
+      echo '# dummy' > "$dirpart/$file"
+    done
+  done
+}
+])# _AM_OUTPUT_DEPENDENCY_COMMANDS
+
+
+# AM_OUTPUT_DEPENDENCY_COMMANDS
+# -----------------------------
+# This macro should only be invoked once -- use via AC_REQUIRE.
+#
+# This code is only required when automatic dependency tracking
+# is enabled.  FIXME.  This creates each '.P' file that we will
+# need in order to bootstrap the dependency handling code.
+AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
+[AC_CONFIG_COMMANDS([depfiles],
+     [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
+     [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
+])
+
+# Do all the work for Automake.                             -*- Autoconf -*-
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This macro actually does too much.  Some checks are only needed if
+# your package does certain things.  But this isn't really a big deal.
+
+dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
+m4_define([AC_PROG_CC],
+m4_defn([AC_PROG_CC])
+[_AM_PROG_CC_C_O
+])
+
+# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
+# AM_INIT_AUTOMAKE([OPTIONS])
+# -----------------------------------------------
+# The call with PACKAGE and VERSION arguments is the old style
+# call (pre autoconf-2.50), which is being phased out.  PACKAGE
+# and VERSION should now be passed to AC_INIT and removed from
+# the call to AM_INIT_AUTOMAKE.
+# We support both call styles for the transition.  After
+# the next Automake release, Autoconf can make the AC_INIT
+# arguments mandatory, and then we can depend on a new Autoconf
+# release and drop the old call support.
+AC_DEFUN([AM_INIT_AUTOMAKE],
+[AC_PREREQ([2.65])dnl
+dnl Autoconf wants to disallow AM_ names.  We explicitly allow
+dnl the ones we care about.
+m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
+AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
+AC_REQUIRE([AC_PROG_INSTALL])dnl
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+AC_SUBST([CYGPATH_W])
+
+# Define the identity of the package.
+dnl Distinguish between old-style and new-style calls.
+m4_ifval([$2],
+[AC_DIAGNOSE([obsolete],
+             [$0: two- and three-arguments forms are deprecated.])
+m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
+ AC_SUBST([PACKAGE], [$1])dnl
+ AC_SUBST([VERSION], [$2])],
+[_AM_SET_OPTIONS([$1])dnl
+dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
+m4_if(
+  m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
+  [ok:ok],,
+  [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
+ AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
+ AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
+
+_AM_IF_OPTION([no-define],,
+[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package])
+ AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl
+
+# Some tools Automake needs.
+AC_REQUIRE([AM_SANITY_CHECK])dnl
+AC_REQUIRE([AC_ARG_PROGRAM])dnl
+AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}])
+AM_MISSING_PROG([AUTOCONF], [autoconf])
+AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}])
+AM_MISSING_PROG([AUTOHEADER], [autoheader])
+AM_MISSING_PROG([MAKEINFO], [makeinfo])
+AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([AC_PROG_MAKE_SET])dnl
+AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+	      [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+			     [_AM_PROG_TAR([v7])])])
+_AM_IF_OPTION([no-dependencies],,
+[AC_PROVIDE_IFELSE([AC_PROG_CC],
+		  [_AM_DEPENDENCIES([CC])],
+		  [m4_define([AC_PROG_CC],
+			     m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_CXX],
+		  [_AM_DEPENDENCIES([CXX])],
+		  [m4_define([AC_PROG_CXX],
+			     m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJC],
+		  [_AM_DEPENDENCIES([OBJC])],
+		  [m4_define([AC_PROG_OBJC],
+			     m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
+		  [_AM_DEPENDENCIES([OBJCXX])],
+		  [m4_define([AC_PROG_OBJCXX],
+			     m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
+])
+AC_REQUIRE([AM_SILENT_RULES])dnl
+dnl The testsuite driver may need to know about EXEEXT, so add the
+dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This
+dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
+AC_CONFIG_COMMANDS_PRE(dnl
+[m4_provide_if([_AM_COMPILER_EXEEXT],
+  [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
+  fi
+fi
+dnl The trailing newline in this macro's definition is deliberate, for
+dnl backward compatibility and to allow trailing 'dnl'-style comments
+dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
+])
+
+dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
+dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
+dnl mangled by Autoconf and run in a shell conditional statement.
+m4_define([_AC_COMPILER_EXEEXT],
+m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
+
+# When config.status generates a header, we must update the stamp-h file.
+# This file resides in the same directory as the config header
+# that is generated.  The stamp files are numbered to have different names.
+
+# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
+# loop where config.status creates the headers, so we can generate
+# our stamp files there.
+AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
+[# Compute $1's index in $config_headers.
+_am_arg=$1
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_SH
+# ------------------
+# Define $install_sh.
+AC_DEFUN([AM_PROG_INSTALL_SH],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+AC_SUBST([install_sh])])
+
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Add --enable-maintainer-mode option to configure.         -*- Autoconf -*-
+# From Jim Meyering
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAINTAINER_MODE([DEFAULT-MODE])
+# ----------------------------------
+# Control maintainer-specific portions of Makefiles.
+# Default is to disable them, unless 'enable' is passed literally.
+# For symmetry, 'disable' may be passed as well.  Anyway, the user
+# can override the default with the --enable/--disable switch.
+AC_DEFUN([AM_MAINTAINER_MODE],
+[m4_case(m4_default([$1], [disable]),
+       [enable], [m4_define([am_maintainer_other], [disable])],
+       [disable], [m4_define([am_maintainer_other], [enable])],
+       [m4_define([am_maintainer_other], [enable])
+        m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
+AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
+  dnl maintainer-mode's default is 'disable' unless 'enable' is passed
+  AC_ARG_ENABLE([maintainer-mode],
+    [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode],
+      am_maintainer_other[ make rules and dependencies not useful
+      (and sometimes confusing) to the casual installer])],
+    [USE_MAINTAINER_MODE=$enableval],
+    [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
+  AC_MSG_RESULT([$USE_MAINTAINER_MODE])
+  AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
+  MAINT=$MAINTAINER_MODE_TRUE
+  AC_SUBST([MAINT])dnl
+]
+)
+
+# Check to see how 'make' treats includes.	            -*- Autoconf -*-
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAKE_INCLUDE()
+# -----------------
+# Check to see how make treats includes.
+AC_DEFUN([AM_MAKE_INCLUDE],
+[am_make=${MAKE-make}
+cat > confinc << 'END'
+am__doit:
+	@echo this is the am__doit target
+.PHONY: am__doit
+END
+# If we don't find an include directive, just comment out the code.
+AC_MSG_CHECKING([for style of include used by $am_make])
+am__include="#"
+am__quote=
+_am_result=none
+# First try GNU make style include.
+echo "include confinc" > confmf
+# Ignore all kinds of additional output from 'make'.
+case `$am_make -s -f confmf 2> /dev/null` in #(
+*the\ am__doit\ target*)
+  am__include=include
+  am__quote=
+  _am_result=GNU
+  ;;
+esac
+# Now try BSD make style include.
+if test "$am__include" = "#"; then
+   echo '.include "confinc"' > confmf
+   case `$am_make -s -f confmf 2> /dev/null` in #(
+   *the\ am__doit\ target*)
+     am__include=.include
+     am__quote="\""
+     _am_result=BSD
+     ;;
+   esac
+fi
+AC_SUBST([am__include])
+AC_SUBST([am__quote])
+AC_MSG_RESULT([$_am_result])
+rm -f confinc confmf
+])
+
+# Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
+
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MISSING_PROG(NAME, PROGRAM)
+# ------------------------------
+AC_DEFUN([AM_MISSING_PROG],
+[AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+# AM_MISSING_HAS_RUN
+# ------------------
+# Define MISSING if not defined so far and test if it is modern enough.
+# If it is, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([missing])dnl
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  AC_MSG_WARN(['missing' script is too old or missing])
+fi
+])
+
+# Helper functions for option handling.                     -*- Autoconf -*-
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_MANGLE_OPTION(NAME)
+# -----------------------
+AC_DEFUN([_AM_MANGLE_OPTION],
+[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
+
+# _AM_SET_OPTION(NAME)
+# --------------------
+# Set option NAME.  Presently that only means defining a flag for this option.
+AC_DEFUN([_AM_SET_OPTION],
+[m4_define(_AM_MANGLE_OPTION([$1]), [1])])
+
+# _AM_SET_OPTIONS(OPTIONS)
+# ------------------------
+# OPTIONS is a space-separated list of Automake options.
+AC_DEFUN([_AM_SET_OPTIONS],
+[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
+
+# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
+# -------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+AC_DEFUN([_AM_IF_OPTION],
+[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
+
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_CC_C_O
+# ---------------
+# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
+# to automatically call this.
+AC_DEFUN([_AM_PROG_CC_C_O],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([compile])dnl
+AC_LANG_PUSH([C])dnl
+AC_CACHE_CHECK(
+  [whether $CC understands -c and -o together],
+  [am_cv_prog_cc_c_o],
+  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i])
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+AC_LANG_POP([C])])
+
+# For backward compatibility.
+AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_RUN_LOG(COMMAND)
+# -------------------
+# Run COMMAND, save the exit status in ac_status, and log it.
+# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
+AC_DEFUN([AM_RUN_LOG],
+[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
+   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   (exit $ac_status); }])
+
+# Check to make sure that the build environment is sane.    -*- Autoconf -*-
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SANITY_CHECK
+# ---------------
+AC_DEFUN([AM_SANITY_CHECK],
+[AC_MSG_CHECKING([whether build environment is sane])
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[[\\\"\#\$\&\'\`$am_lf]]*)
+    AC_MSG_ERROR([unsafe absolute working directory name]);;
+esac
+case $srcdir in
+  *[[\\\"\#\$\&\'\`$am_lf\ \	]]*)
+    AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$[*]" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$[*]" != "X $srcdir/configure conftest.file" \
+	&& test "$[*]" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment])
+     fi
+     if test "$[2]" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$[2]" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+AC_MSG_RESULT([yes])
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+AC_CONFIG_COMMANDS_PRE(
+  [AC_MSG_CHECKING([that generated files are newer than configure])
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   AC_MSG_RESULT([done])])
+rm -f conftest.file
+])
+
+# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_SILENT_RULES([DEFAULT])
+# --------------------------
+# Enable less verbose build rules; with the default set to DEFAULT
+# ("yes" being less verbose, "no" or empty being verbose).
+AC_DEFUN([AM_SILENT_RULES],
+[AC_ARG_ENABLE([silent-rules], [dnl
+AS_HELP_STRING(
+  [--enable-silent-rules],
+  [less verbose build output (undo: "make V=1")])
+AS_HELP_STRING(
+  [--disable-silent-rules],
+  [verbose build output (undo: "make V=0")])dnl
+])
+case $enable_silent_rules in @%:@ (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
+esac
+dnl
+dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
+dnl do not support nested variable expansions.
+dnl See automake bug#9928 and bug#10237.
+am_make=${MAKE-make}
+AC_CACHE_CHECK([whether $am_make supports nested variables],
+   [am_cv_make_support_nested_variables],
+   [if AS_ECHO([['TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi])
+if test $am_cv_make_support_nested_variables = yes; then
+  dnl Using '$V' instead of '$(V)' breaks IRIX make.
+  AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AC_SUBST([AM_V])dnl
+AM_SUBST_NOTMAKE([AM_V])dnl
+AC_SUBST([AM_DEFAULT_V])dnl
+AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
+AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
+AM_BACKSLASH='\'
+AC_SUBST([AM_BACKSLASH])dnl
+_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
+])
+
+# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_STRIP
+# ---------------------
+# One issue with vendor 'install' (even GNU) is that you can't
+# specify the program used to strip binaries.  This is especially
+# annoying in cross-compiling environments, where the build's strip
+# is unlikely to handle the host's binaries.
+# Fortunately install-sh will honor a STRIPPROG variable, so we
+# always use install-sh in "make install-strip", and initialize
+# STRIPPROG with the value of the STRIP variable (set by the user).
+AC_DEFUN([AM_PROG_INSTALL_STRIP],
+[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+dnl Don't test for $cross_compiling = yes, because it might be 'maybe'.
+if test "$cross_compiling" != no; then
+  AC_CHECK_TOOL([STRIP], [strip], :)
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+AC_SUBST([INSTALL_STRIP_PROGRAM])])
+
+# Copyright (C) 2006-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# --------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
+
+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of 'v7', 'ustar', or 'pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+#
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+
+m4_if([$1], [v7],
+  [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
+
+  [m4_case([$1],
+    [ustar],
+     [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
+      # There is notably a 21 bits limit for the UID and the GID.  In fact,
+      # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
+      # and bug#13588).
+      am_max_uid=2097151 # 2^21 - 1
+      am_max_gid=$am_max_uid
+      # The $UID and $GID variables are not portable, so we need to resort
+      # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
+      # below are definitely unexpected, so allow the users to see them
+      # (that is, avoid stderr redirection).
+      am_uid=`id -u || echo unknown`
+      am_gid=`id -g || echo unknown`
+      AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
+      if test $am_uid -le $am_max_uid; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+         _am_tools=none
+      fi
+      AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
+      if test $am_gid -le $am_max_gid; then
+         AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+        _am_tools=none
+      fi],
+
+  [pax],
+    [],
+
+  [m4_fatal([Unknown tar format])])
+
+  AC_MSG_CHECKING([how to create a $1 tar archive])
+
+  # Go ahead even if we have the value already cached.  We do so because we
+  # need to set the values for the 'am__tar' and 'am__untar' variables.
+  _am_tools=${am_cv_prog_tar_$1-$_am_tools}
+
+  for _am_tool in $_am_tools; do
+    case $_am_tool in
+    gnutar)
+      for _am_tar in tar gnutar gtar; do
+        AM_RUN_LOG([$_am_tar --version]) && break
+      done
+      am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+      am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+      am__untar="$_am_tar -xf -"
+      ;;
+    plaintar)
+      # Must skip GNU tar: if it does not support --format= it doesn't create
+      # ustar tarball either.
+      (tar --version) >/dev/null 2>&1 && continue
+      am__tar='tar chf - "$$tardir"'
+      am__tar_='tar chf - "$tardir"'
+      am__untar='tar xf -'
+      ;;
+    pax)
+      am__tar='pax -L -x $1 -w "$$tardir"'
+      am__tar_='pax -L -x $1 -w "$tardir"'
+      am__untar='pax -r'
+      ;;
+    cpio)
+      am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+      am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+      am__untar='cpio -i -H $1 -d'
+      ;;
+    none)
+      am__tar=false
+      am__tar_=false
+      am__untar=false
+      ;;
+    esac
+
+    # If the value was cached, stop now.  We just wanted to have am__tar
+    # and am__untar set.
+    test -n "${am_cv_prog_tar_$1}" && break
+
+    # tar/untar a dummy directory, and stop if the command works.
+    rm -rf conftest.dir
+    mkdir conftest.dir
+    echo GrepMe > conftest.dir/file
+    AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+    rm -rf conftest.dir
+    if test -s conftest.tar; then
+      AM_RUN_LOG([$am__untar <conftest.tar])
+      AM_RUN_LOG([cat conftest.dir/file])
+      grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+    fi
+  done
+  rm -rf conftest.dir
+
+  AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+  AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([m4/acx_pthread.m4])
+m4_include([m4/ax_append_compile_flags.m4])
+m4_include([m4/ax_append_flag.m4])
+m4_include([m4/ax_check_compile_flag.m4])
+m4_include([m4/ax_gcc_func_attribute.m4])
+m4_include([m4/ax_require_defined.m4])
+m4_include([m4/libtool.m4])
+m4_include([m4/ltoptions.m4])
+m4_include([m4/ltsugar.m4])
+m4_include([m4/ltversion.m4])
+m4_include([m4/lt~obsolete.m4])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/autogen.sh b/third-party/libjxl/libjxl/third_party/lcms/autogen.sh
new file mode 100755
index 0000000000..0b8b74f0c4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/autogen.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# Run this to generate all the initial makefiles, etc.
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+DIE=0
+ACLOCAL_FLAGS="-I m4"
+
+(test -f $srcdir/configure.ac) || {
+    echo -n "**Error**: Directory $srcdir does not look like the"
+    echo " top-level package directory"
+    exit 1
+}
+
+(autoconf --version) < /dev/null > /dev/null 2>&1 || {
+  echo
+  echo "**Error**: You must have autoconf installed."
+  echo "Download the appropriate package for your distribution,"
+  echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
+  DIE=1
+}
+
+(grep "^LT_INIT" $srcdir/configure.ac >/dev/null) && {
+  (libtool --version) < /dev/null > /dev/null 2>&1 || {
+    echo
+    echo "**Error**: You must have libtool installed."
+    echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
+    DIE=1
+  }
+}
+
+(automake --version) < /dev/null > /dev/null 2>&1 || {
+  echo
+  echo "**Error**: You must have automake installed."
+  echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
+  DIE=1
+  NO_AUTOMAKE=yes
+}
+
+# if no automake, don't bother testing for aclocal
+test -n "$NO_AUTOMAKE" || (aclocal --version) < /dev/null > /dev/null 2>&1 || {
+  echo
+  echo "**Error**: Missing aclocal.  The version of automake"
+  echo "installed doesn't appear recent enough."
+  echo "You can get automake from ftp://ftp.gnu.org/pub/gnu/"
+  DIE=1
+}
+
+if test "$DIE" -eq 1; then
+  exit 1
+fi
+
+if test -z "$*"; then
+  echo "**Warning**: I am going to run configure with no arguments."
+  echo "If you wish to pass any to it, please specify them on the"
+  echo $0 " command line."
+  echo
+fi
+
+case $CC in
+xlc )
+  am_opt=--include-deps;;
+esac
+
+      aclocalinclude="$ACLOCAL_FLAGS"
+
+      if grep "^LT_INIT" configure.ac >/dev/null; then
+	if test -z "$NO_LIBTOOLIZE" ; then 
+	  echo "Running libtoolize..."
+	  libtoolize --force --copy
+	fi
+      fi
+      echo "Running aclocal $aclocalinclude ..."
+      aclocal $aclocalinclude
+      if grep "^AC_CONFIG_HEADERS" configure.ac >/dev/null; then
+	echo "Running autoheader..."
+	autoheader
+      fi
+      echo "Running automake --add-missing -copy --gnu -Wno-portability $am_opt ..."
+      automake --add-missing --copy --gnu -Wno-portability $am_opt
+      echo "Running autoconf ..."
+      autoconf
+
+conf_flags="--enable-maintainer-mode"
+
+if test x$NOCONFIGURE = x; then
+  echo "Running $srcdir/configure $conf_flags $@ ..."
+  $srcdir/configure $conf_flags "$@" \
+  && echo "Now type make to compile." || exit 1
+else
+  echo "Skipping configure process."
+fi
diff --git a/third-party/libjxl/libjxl/third_party/lcms/bin/Bin.txt b/third-party/libjxl/libjxl/third_party/lcms/bin/Bin.txt
new file mode 100644
index 0000000000..a8dac7a604
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/bin/Bin.txt
@@ -0,0 +1 @@
+Bin
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/compile b/third-party/libjxl/libjxl/third_party/lcms/compile
new file mode 100644
index 0000000000..531136b068
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/compile
@@ -0,0 +1,347 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2012-10-14.11; # UTC
+
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+
+eat=
+
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/config.guess b/third-party/libjxl/libjxl/third_party/lcms/config.guess
new file mode 100755
index 0000000000..e7464614ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/config.guess
@@ -0,0 +1,1421 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright 1992-2014 Free Software Foundation, Inc.
+
+timestamp='2014-11-04'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+#
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
+#
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+#
+# Please send patches to <config-patches@gnu.org>.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright 1992-2014 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+case "${UNAME_SYSTEM}" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval $set_cc_for_build
+	cat <<-EOF > $dummy.c
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	case "${UNAME_MACHINE_ARCH}" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
+	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently, or will in the future.
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval $set_cc_for_build
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep -q __ELF__
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+		os=netbsd
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "${UNAME_VERSION}" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "${machine}-${os}${release}"
+	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+	exit ;;
+    *:OpenBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	exit ;;
+    *:ekkoBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+	exit ;;
+    *:SolidBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	exit ;;
+    macppc:MirBSD:*:*)
+	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
+    *:MirBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE="alphaev5" ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE="alphaev56" ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE="alphapca56" ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE="alphapca57" ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE="alphaev6" ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE="alphaev67" ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE="alphaev69" ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE="alphaev7" ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE="alphaev79" ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-amigaos
+	exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-morphos
+	exit ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
+    *:OS400:*:*)
+	echo powerpc-ibm-os400
+	exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix${UNAME_RELEASE}
+	exit ;;
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7; exit ;;
+	esac ;;
+    s390x:SunOS:*:*)
+	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux${UNAME_RELEASE}
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval $set_cc_for_build
+	SUN_ARCH="i386"
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH="x86_64"
+	    fi
+	fi
+	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	exit ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos${UNAME_RELEASE}
+	exit ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos${UNAME_RELEASE}
+		;;
+	    sun4)
+		echo sparc-sun-sunos${UNAME_RELEASE}
+		;;
+	esac
+	exit ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos${UNAME_RELEASE}
+	exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten${UNAME_RELEASE}
+	exit ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten${UNAME_RELEASE}
+	exit ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix${UNAME_RELEASE}
+	exit ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix${UNAME_RELEASE}
+	exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix${UNAME_RELEASE}
+	exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c &&
+	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	    { echo "$SYSTEM_NAME"; exit; }
+	echo mips-mips-riscos${UNAME_RELEASE}
+	exit ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit ;;
+    AViiON:dgux:*:*)
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	then
+	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    then
+		echo m88k-dg-dgux${UNAME_RELEASE}
+	    else
+		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+	    fi
+	else
+	    echo i586-dg-dgux${UNAME_RELEASE}
+	fi
+	exit ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	exit ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval $set_cc_for_build
+		sed 's/^		//' << EOF >$dummy.c
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit ;;
+    *:AIX:*:[4567])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	exit ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+	echo romp-ibm-bsd4.4
+	exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	case "${UNAME_MACHINE}" in
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+			esac ;;
+		    esac
+		fi
+		if [ "${HP_ARCH}" = "" ]; then
+		    eval $set_cc_for_build
+		    sed 's/^		//' << EOF >$dummy.c
+
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
+
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
+
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
+EOF
+		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ ${HP_ARCH} = "hppa2.0w" ]
+	then
+	    eval $set_cc_for_build
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
+	    then
+		HP_ARCH="hppa2.0w"
+	    else
+		HP_ARCH="hppa64"
+	    fi
+	fi
+	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	exit ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux${HPUX_REV}
+	exit ;;
+    3050*:HI-UX:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+		{ echo "$SYSTEM_NAME"; exit; }
+	echo unknown-hitachi-hiuxwe2
+	exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+	echo hppa1.1-hp-bsd
+	exit ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+	echo hppa1.1-hp-osf
+	exit ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	else
+	    echo ${UNAME_MACHINE}-unknown-osf1
+	fi
+	exit ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+	exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+	exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+	exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+	exit ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    *:UNICOS/mp:*:*)
+	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    5000:UNIX_System_V:4.*:*)
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	exit ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	exit ;;
+    *:BSD/OS:*:*)
+	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	exit ;;
+    *:FreeBSD:*:*)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
+	    amd64)
+		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    *)
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	esac
+	exit ;;
+    i*:CYGWIN*:*)
+	echo ${UNAME_MACHINE}-pc-cygwin
+	exit ;;
+    *:MINGW64*:*)
+	echo ${UNAME_MACHINE}-pc-mingw64
+	exit ;;
+    *:MINGW*:*)
+	echo ${UNAME_MACHINE}-pc-mingw32
+	exit ;;
+    *:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
+    i*:windows32*:*)
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
+	exit ;;
+    i*:PW*:*)
+	echo ${UNAME_MACHINE}-pc-pw32
+	exit ;;
+    *:Interix*:*)
+	case ${UNAME_MACHINE} in
+	    x86)
+		echo i586-pc-interix${UNAME_RELEASE}
+		exit ;;
+	    authenticamd | genuineintel | EM64T)
+		echo x86_64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	esac ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+	echo i${UNAME_MACHINE}-pc-mks
+	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i586-pc-interix
+	exit ;;
+    i*:UWIN*:*)
+	echo ${UNAME_MACHINE}-pc-uwin
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	exit ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	exit ;;
+    i*86:Minix:*:*)
+	echo ${UNAME_MACHINE}-pc-minix
+	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    arm*:Linux:*:*)
+	eval $set_cc_for_build
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	else
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+	    fi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    cris:Linux:*:*)
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	exit ;;
+    crisv32:Linux:*:*)
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	exit ;;
+    frv:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    i*86:Linux:*:*)
+	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	exit ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    m32r*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=${UNAME_MACHINE}el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=${UNAME_MACHINE}
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
+	;;
+    openrisc*:Linux:*:*)
+	echo or1k-unknown-linux-${LIBC}
+	exit ;;
+    or32:Linux:*:* | or1k*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-${LIBC}
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-${LIBC}
+	exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
+	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
+	  *)    echo hppa-unknown-linux-${LIBC} ;;
+	esac
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-${LIBC}
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-${LIBC}
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-${LIBC}
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-${LIBC}
+	exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+	exit ;;
+    sh64*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    sh*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    vax:Linux:*:*)
+	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+	exit ;;
+    x86_64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    xtensa*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+	# Use sysv4.2uw... so that sysv4* matches it.
+	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	exit ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo ${UNAME_MACHINE}-pc-os2-emx
+	exit ;;
+    i*86:XTS-300:*:STOP)
+	echo ${UNAME_MACHINE}-unknown-stop
+	exit ;;
+    i*86:atheos:*:*)
+	echo ${UNAME_MACHINE}-unknown-atheos
+	exit ;;
+    i*86:syllable:*:*)
+	echo ${UNAME_MACHINE}-pc-syllable
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+	echo i386-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    i*86:*DOS:*:*)
+	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	exit ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+	else
+		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+	fi
+	exit ;;
+    i*86:*:5:[678]*)
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	exit ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+	else
+		echo ${UNAME_MACHINE}-pc-sysv32
+	fi
+	exit ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configuration will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+	exit ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	fi
+	exit ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv${UNAME_RELEASE}
+	exit ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo ${UNAME_MACHINE}-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo ${UNAME_MACHINE}-stratus-vos
+	exit ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux${UNAME_RELEASE}
+	exit ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+		echo mips-nec-sysv${UNAME_RELEASE}
+	else
+		echo mips-unknown-sysv${UNAME_RELEASE}
+	fi
+	exit ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux${UNAME_RELEASE}
+	exit ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	exit ;;
+    *:Rhapsody:*:*)
+	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	exit ;;
+    *:Darwin:*:*)
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	eval $set_cc_for_build
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
+	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		    grep IS_64BIT_ARCH >/dev/null
+		then
+		    case $UNAME_PROCESSOR in
+			i386) UNAME_PROCESSOR=x86_64 ;;
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		    esac
+		fi
+	    fi
+	elif test "$UNAME_PROCESSOR" = i386 ; then
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
+	    # that puts up a graphical alert prompting to install
+	    # developer tools.  Any system running Mac OS X 10.7 or
+	    # later (Darwin 11 and later) is required to have a 64-bit
+	    # processor. This is not true of the ARM version of Darwin
+	    # that Apple uses in portable devices.
+	    UNAME_PROCESSOR=x86_64
+	fi
+	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = "x86"; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	exit ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    NSE-*:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    NSR-?:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit ;;
+    DS/*:UNIX_System_V:*:*)
+	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	exit ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = "386"; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo ${UNAME_MACHINE}-unknown-plan9
+	exit ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit ;;
+    SEI:*:*:SEIUX)
+	echo mips-sei-seiux${UNAME_RELEASE}
+	exit ;;
+    *:DragonFly:*:*)
+	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	exit ;;
+    *:*VMS:*:*)
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "${UNAME_MACHINE}" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	exit ;;
+    i*86:rdos:*:*)
+	echo ${UNAME_MACHINE}-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo ${UNAME_MACHINE}-pc-aros
+	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
+esac
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/config.sub b/third-party/libjxl/libjxl/third_party/lcms/config.sub
new file mode 100755
index 0000000000..7ffe373784
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/config.sub
@@ -0,0 +1,1807 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright 1992-2014 Free Software Foundation, Inc.
+
+timestamp='2014-12-03'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
+
+
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright 1992-2014 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray | -microblaze*)
+		os=
+		basic_machine=$1
+		;;
+	-bluegene*)
+		os=-cnk
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+	-chorusrdb)
+		os=-chorusrdb
+		basic_machine=$1
+		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| aarch64 | aarch64_be \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arceb \
+	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+	| avr | avr32 \
+	| be32 | be64 \
+	| bfin \
+	| c4x | c8051 | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| epiphany \
+	| fido | fr30 | frv \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
+	| i370 | i860 | i960 | ia64 \
+	| ip2k | iq2000 \
+	| k1om \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64octeon | mips64octeonel \
+	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa32r6 | mipsisa32r6el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64r6 | mipsisa64r6el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipsr5900 | mipsr5900el \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
+	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 | nios2eb | nios2el \
+	| ns16k | ns32k \
+	| open8 | or1k | or1knd | or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pyramid \
+	| riscv32 | riscv64 \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
+	| we32k \
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
+		basic_machine=$basic_machine-unknown
+		;;
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| aarch64-* | aarch64_be-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| c8051-* | clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| k1om-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+	| microblaze-* | microblazeel-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64octeon-* | mips64octeonel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa32r6-* | mipsisa32r6el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64r6-* | mipsisa64r6el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipsr5900-* | mipsr5900el-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* | nios2eb-* | nios2el-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
+	| or1k*-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pyramid-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
+	| tron-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
+	| visium-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	microblaze*)
+		basic_machine=microblaze-xilinx
+		;;
+	mingw64)
+		basic_machine=x86_64-pc
+		os=-mingw64
+		;;
+	mingw32)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i686-pc
+		os=-msys
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc | ppcbe)	basic_machine=powerpc-unknown
+		;;
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos | rdos64)
+		basic_machine=x86_64-pc
+		os=-rdos
+		;;
+	rdos32)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	z80-*-coff)
+		basic_machine=z80-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+		basic_machine=sparc-sun
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+	# First match some system type aliases
+	# that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* | -plan9* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* | -aros* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-aros*)
+		os=-aros
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-nacl*)
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	c8051-*)
+		os=-elf
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-haiku)
+		os=-haiku
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-cnk*|-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo $basic_machine$os
+exit
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/configure b/third-party/libjxl/libjxl/third_party/lcms/configure
new file mode 100755
index 0000000000..952d80b801
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/configure
@@ -0,0 +1,20146 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69 for lcms2 2.9.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+
+  test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || (
+    ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    PATH=/empty FPATH=/empty; export PATH FPATH
+    test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\
+      || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+  if (eval "$as_required") 2>/dev/null; then :
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  as_found=:
+  case $as_dir in #(
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     # Try only shells that exist, to save several forks.
+	     as_shell=$as_dir/$as_base
+	     if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		    { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+		   if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  break 2
+fi
+fi
+	   done;;
+       esac
+  as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+	      { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+      if test "x$CONFIG_SHELL" != x; then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno; then :
+  $as_echo "$0: This script requires a shell more modern than all"
+  $as_echo "$0: the shells that I found on your system."
+  if test x${ZSH_VERSION+set} = xset ; then
+    $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    $as_echo "$0: Please tell bug-autoconf@gnu.org about your system,
+$0: including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='lcms2'
+PACKAGE_TARNAME='lcms2'
+PACKAGE_VERSION='2.9'
+PACKAGE_STRING='lcms2 2.9'
+PACKAGE_BUGREPORT=''
+PACKAGE_URL=''
+
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+LIBOBJS
+TIFFICC_DEPLIBS
+JPEGICC_DEPLIBS
+LCMS_LIB_DEPLIBS
+LIB_TIFF
+HasTIFF_FALSE
+HasTIFF_TRUE
+LIB_ZLIB
+HasZLIB_FALSE
+HasZLIB_TRUE
+LIB_JPEG
+HasJPEG_FALSE
+HasJPEG_TRUE
+LIB_THREAD
+LIB_MATH
+PTHREAD_CFLAGS
+PTHREAD_LIBS
+PTHREAD_CXX
+PTHREAD_CC
+acx_pthread_config
+inline
+MAINT
+MAINTAINER_MODE_FALSE
+MAINTAINER_MODE_TRUE
+LIBTOOL_DEPS
+CXXCPP
+OTOOL64
+OTOOL
+LIPO
+NMEDIT
+DSYMUTIL
+MANIFEST_TOOL
+RANLIB
+ac_ct_AR
+AR
+LN_S
+NM
+ac_ct_DUMPBIN
+DUMPBIN
+LD
+FGREP
+EGREP
+GREP
+SED
+LIBTOOL
+OBJDUMP
+DLLTOOL
+AS
+am__fastdepCXX_FALSE
+am__fastdepCXX_TRUE
+CXXDEPMODE
+ac_ct_CXX
+CXXFLAGS
+CXX
+CPP
+am__fastdepCC_FALSE
+am__fastdepCC_TRUE
+CCDEPMODE
+am__nodep
+AMDEPBACKSLASH
+AMDEP_FALSE
+AMDEP_TRUE
+am__quote
+am__include
+DEPDIR
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+AM_BACKSLASH
+AM_DEFAULT_VERBOSITY
+AM_DEFAULT_V
+AM_V
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+host_os
+host_vendor
+host_cpu
+host
+build_os
+build_vendor
+build_cpu
+build
+LIBRARY_AGE
+LIBRARY_REVISION
+LIBRARY_CURRENT
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+runstatedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_silent_rules
+enable_dependency_tracking
+enable_shared
+enable_static
+with_pic
+enable_fast_install
+with_gnu_ld
+with_sysroot
+enable_libtool_lock
+enable_maintainer_mode
+with_jpeg
+with_tiff
+with_zlib
+with_threads
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP
+CXX
+CXXFLAGS
+CCC
+CXXCPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+    ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+    runstatedir=$ac_optarg ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir runstatedir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures lcms2 2.9 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/lcms2]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+
+System types:
+  --build=BUILD     configure for building on BUILD [guessed]
+  --host=HOST       cross-compile to build programs to run on HOST [BUILD]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of lcms2 2.9:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-silent-rules   less verbose build output (undo: "make V=1")
+  --disable-silent-rules  verbose build output (undo: "make V=0")
+  --enable-dependency-tracking
+                          do not reject slow dependency extractors
+  --disable-dependency-tracking
+                          speeds up one-time build
+  --enable-shared[=PKGS]  build shared libraries [default=yes]
+  --enable-static[=PKGS]  build static libraries [default=yes]
+  --enable-fast-install[=PKGS]
+                          optimize for fast installation [default=yes]
+  --disable-libtool-lock  avoid locking (might break parallel builds)
+  --enable-maintainer-mode
+                          enable make rules and dependencies not useful (and
+                          sometimes confusing) to the casual installer
+
+Optional Packages:
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-pic[=PKGS]       try to use only PIC/non-PIC objects [default=use
+                          both]
+  --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
+  --with-sysroot=DIR Search for dependent libraries within DIR
+                        (or the compiler's sysroot if not specified).
+  --with-jpeg=DIR         use jpeg installed in DIR
+  --with-tiff=DIR         use tiff installed in DIR
+  --without-zlib          disable ZLIB support
+  --without-threads       disable POSIX threads API support
+
+Some influential environment variables:
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+  CXX         C++ compiler command
+  CXXFLAGS    C++ compiler flags
+  CXXCPP      C++ preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to the package provider.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+lcms2 configure 2.9
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_cxx_try_compile LINENO
+# ----------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
+# ac_fn_cxx_try_cpp LINENO
+# ------------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_cpp
+
+# ac_fn_cxx_try_link LINENO
+# -------------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_link
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by lcms2 $as_me 2.9, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    $as_echo "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	$as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+	{ $as_echo "$as_me:${as_lineno-$LINENO}:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+# Specify directory where m4 macros may be found.
+
+
+#
+# Libtool library revision control info
+# See the libtool documentation under the heading "Libtool's versioning
+# system" in order to understand the meaning of these fields
+#
+# Here are a set of rules to help you update your library version
+# information:
+#
+#  1. Start with version information of `0:0:0' for each libtool library.
+#  2. Update the version information only immediately before a public
+#     release of your software. More frequent updates are unnecessary, and
+#     only guarantee that the current interface number gets larger faster.
+#  3. If the library source code has changed at all since the last update,
+#     then increment revision (`c:r:a' becomes `c:r+1:a').
+#  4. If any interfaces have been added, removed, or changed since the last
+#     update, increment current, and set revision to 0.
+#  5. If any interfaces have been added since the last public release, then
+#     increment age.
+#  6. If any interfaces have been removed since the last public release,
+#     then set age to 0.
+#
+LIBRARY_CURRENT=2
+LIBRARY_REVISION=8
+LIBRARY_AGE=0
+
+
+# Obtain system type by running config.guess
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+# Make sure we can run config.sub.
+$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
+  as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
+$as_echo_n "checking build system type... " >&6; }
+if ${ac_cv_build+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+  ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
+test "x$ac_build_alias" = x &&
+  as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
+ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
+  as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
+$as_echo "$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
+$as_echo_n "checking host system type... " >&6; }
+if ${ac_cv_host+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+    as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
+$as_echo "$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+
+am__api_version='1.15'
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+  ./ | .// | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    rm -rf conftest.one conftest.two conftest.dir
+	    echo one > conftest.one
+	    echo two > conftest.two
+	    mkdir conftest.dir
+	    if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+	      test -s conftest.one && test -s conftest.two &&
+	      test -s conftest.dir/conftest.one &&
+	      test -s conftest.dir/conftest.two
+	    then
+	      ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	      break 3
+	    fi
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
+$as_echo_n "checking whether build environment is sane... " >&6; }
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \	]*)
+    as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$*" = "X"; then
+	# -L didn't work.
+	set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$*" != "X $srcdir/configure conftest.file" \
+	&& test "$*" != "X conftest.file $srcdir/configure"; then
+
+	# If neither matched, then we have a broken ls.  This can happen
+	# if, for instance, CONFIG_SHELL is bash and it inherits a
+	# broken ls alias from the environment.  This has actually
+	# happened.  Such a system could not be considered "sane".
+	as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment" "$LINENO" 5
+     fi
+     if test "$2" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error $? "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+
+rm -f conftest.file
+
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
+
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5
+$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\	*)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
+$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if ${ac_cv_path_mkdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in mkdir gmkdir; do
+	 for ac_exec_ext in '' $ac_executable_extensions; do
+	   as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
+	   case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+	     'mkdir (GNU coreutils) '* | \
+	     'mkdir (coreutils) '* | \
+	     'mkdir (fileutils) '4.1*)
+	       ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+	       break 3;;
+	   esac
+	 done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test "${ac_cv_path_mkdir+set}" = set; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+$as_echo "$MKDIR_P" >&6; }
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+	@echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  SET_MAKE=
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+# Check whether --enable-silent-rules was given.
+if test "${enable_silent_rules+set}" = set; then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5
+$as_echo_n "checking whether $am_make supports nested variables... " >&6; }
+if ${am_cv_make_support_nested_variables+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if $as_echo 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+	@$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5
+$as_echo "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='lcms2'
+ VERSION='2.9'
+
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+mkdir_p='$(MKDIR_P)'
+
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
+
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
+
+
+
+
+
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <http://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
+
+
+# Check for programs
+DEPDIR="${am__leading_dot}deps"
+
+ac_config_commands="$ac_config_commands depfiles"
+
+
+am_make=${MAKE-make}
+cat > confinc << 'END'
+am__doit:
+	@echo this is the am__doit target
+.PHONY: am__doit
+END
+# If we don't find an include directive, just comment out the code.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5
+$as_echo_n "checking for style of include used by $am_make... " >&6; }
+am__include="#"
+am__quote=
+_am_result=none
+# First try GNU make style include.
+echo "include confinc" > confmf
+# Ignore all kinds of additional output from 'make'.
+case `$am_make -s -f confmf 2> /dev/null` in #(
+*the\ am__doit\ target*)
+  am__include=include
+  am__quote=
+  _am_result=GNU
+  ;;
+esac
+# Now try BSD make style include.
+if test "$am__include" = "#"; then
+   echo '.include "confinc"' > confmf
+   case `$am_make -s -f confmf 2> /dev/null` in #(
+   *the\ am__doit\ target*)
+     am__include=.include
+     am__quote="\""
+     _am_result=BSD
+     ;;
+   esac
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5
+$as_echo "$_am_result" >&6; }
+rm -f confinc confmf
+
+# Check whether --enable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then :
+  enableval=$enable_dependency_tracking;
+fi
+
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+ if test "x$enable_dependency_tracking" != xno; then
+  AMDEP_TRUE=
+  AMDEP_FALSE='#'
+else
+  AMDEP_TRUE='#'
+  AMDEP_FALSE=
+fi
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+	if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+if test -z "$ac_file"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+else
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
+$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
+if ${am_cv_prog_cc_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+$as_echo "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+depcc="$CC"   am_compiler_list=
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
+$as_echo_n "checking dependency style of $depcc... " >&6; }
+if ${am_cv_CC_dependencies_compiler_type+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5
+$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+   case $ac_cv_prog_cc_stdc in #(
+  no) :
+    ac_cv_prog_cc_c99=no; ac_cv_prog_cc_c89=no ;; #(
+  *) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5
+$as_echo_n "checking for $CC option to accept ISO C99... " >&6; }
+if ${ac_cv_prog_cc_c99+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c99=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <stdio.h>
+
+// Check varargs macros.  These examples are taken from C99 6.10.3.5.
+#define debug(...) fprintf (stderr, __VA_ARGS__)
+#define showlist(...) puts (#__VA_ARGS__)
+#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__))
+static void
+test_varargs_macros (void)
+{
+  int x = 1234;
+  int y = 5678;
+  debug ("Flag");
+  debug ("X = %d\n", x);
+  showlist (The first, second, and third items.);
+  report (x>y, "x is %d but y is %d", x, y);
+}
+
+// Check long long types.
+#define BIG64 18446744073709551615ull
+#define BIG32 4294967295ul
+#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0)
+#if !BIG_OK
+  your preprocessor is broken;
+#endif
+#if BIG_OK
+#else
+  your preprocessor is broken;
+#endif
+static long long int bignum = -9223372036854775807LL;
+static unsigned long long int ubignum = BIG64;
+
+struct incomplete_array
+{
+  int datasize;
+  double data[];
+};
+
+struct named_init {
+  int number;
+  const wchar_t *name;
+  double average;
+};
+
+typedef const char *ccp;
+
+static inline int
+test_restrict (ccp restrict text)
+{
+  // See if C++-style comments work.
+  // Iterate through items via the restricted pointer.
+  // Also check for declarations in for loops.
+  for (unsigned int i = 0; *(text+i) != '\0'; ++i)
+    continue;
+  return 0;
+}
+
+// Check varargs and va_copy.
+static void
+test_varargs (const char *format, ...)
+{
+  va_list args;
+  va_start (args, format);
+  va_list args_copy;
+  va_copy (args_copy, args);
+
+  const char *str;
+  int number;
+  float fnumber;
+
+  while (*format)
+    {
+      switch (*format++)
+	{
+	case 's': // string
+	  str = va_arg (args_copy, const char *);
+	  break;
+	case 'd': // int
+	  number = va_arg (args_copy, int);
+	  break;
+	case 'f': // float
+	  fnumber = va_arg (args_copy, double);
+	  break;
+	default:
+	  break;
+	}
+    }
+  va_end (args_copy);
+  va_end (args);
+}
+
+int
+main ()
+{
+
+  // Check bool.
+  _Bool success = false;
+
+  // Check restrict.
+  if (test_restrict ("String literal") == 0)
+    success = true;
+  char *restrict newvar = "Another string";
+
+  // Check varargs.
+  test_varargs ("s, d' f .", "string", 65, 34.234);
+  test_varargs_macros ();
+
+  // Check flexible array members.
+  struct incomplete_array *ia =
+    malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10));
+  ia->datasize = 10;
+  for (int i = 0; i < ia->datasize; ++i)
+    ia->data[i] = i * 1.234;
+
+  // Check named initializers.
+  struct named_init ni = {
+    .number = 34,
+    .name = L"Test wide string",
+    .average = 543.34343,
+  };
+
+  ni.number = 58;
+
+  int dynamic_array[ni.number];
+  dynamic_array[ni.number - 1] = 543;
+
+  // work around unused variable warnings
+  return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x'
+	  || dynamic_array[ni.number - 1] != 543);
+
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c99=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c99" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c99" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c99"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5
+$as_echo "$ac_cv_prog_cc_c99" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c99" != xno; then :
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89
+else
+  ac_cv_prog_cc_stdc=no
+fi
+
+fi
+ ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO Standard C" >&5
+$as_echo_n "checking for $CC option to accept ISO Standard C... " >&6; }
+  if ${ac_cv_prog_cc_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+
+  case $ac_cv_prog_cc_stdc in #(
+  no) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;; #(
+  '') :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;; #(
+  *) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_stdc" >&5
+$as_echo "$ac_cv_prog_cc_stdc" >&6; } ;;
+esac
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if ${ac_cv_prog_CPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+  if test -n "$CCC"; then
+    CXX=$CCC
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CXX"; then
+  ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
+$as_echo "$CXX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CXX" && break
+  done
+fi
+if test -z "$CXX"; then
+  ac_ct_CXX=$CXX
+  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CXX"; then
+  ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CXX="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
+$as_echo "$ac_ct_CXX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CXX" && break
+done
+
+  if test "x$ac_ct_CXX" = x; then
+    CXX="g++"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CXX=$ac_ct_CXX
+  fi
+fi
+
+  fi
+fi
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
+$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
+$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GXX=yes
+else
+  GXX=
+fi
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
+$as_echo_n "checking whether $CXX accepts -g... " >&6; }
+if ${ac_cv_prog_cxx_g+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+   ac_cxx_werror_flag=yes
+   ac_cv_prog_cxx_g=no
+   CXXFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_cv_prog_cxx_g=yes
+else
+  CXXFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+  ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+	 CXXFLAGS="-g"
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_cv_prog_cxx_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
+$as_echo "$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+  CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+  if test "$GXX" = yes; then
+    CXXFLAGS="-g -O2"
+  else
+    CXXFLAGS="-g"
+  fi
+else
+  if test "$GXX" = yes; then
+    CXXFLAGS="-O2"
+  else
+    CXXFLAGS=
+  fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+depcc="$CXX"  am_compiler_list=
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
+$as_echo_n "checking dependency style of $depcc... " >&6; }
+if ${am_cv_CXX_dependencies_compiler_type+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CXX_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+	continue
+      else
+	break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CXX_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CXX_dependencies_compiler_type=none
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5
+$as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; }
+CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then
+  am__fastdepCXX_TRUE=
+  am__fastdepCXX_FALSE='#'
+else
+  am__fastdepCXX_TRUE='#'
+  am__fastdepCXX_FALSE=
+fi
+
+
+
+#AM_PROG_LD
+#AC_SUBST(LD)
+#AC_PROG_INSTALL
+#AC_PROG_MAKE_SET
+#AC_PROG_LN_S
+
+#
+# Tests for Windows
+#
+
+
+
+# Configure libtool
+# Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then :
+  enableval=$enable_shared; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_shared=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_shared=yes
+fi
+
+
+
+
+
+
+
+
+
+# Check whether --enable-static was given.
+if test "${enable_static+set}" = set; then :
+  enableval=$enable_static; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_static=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_static=yes
+fi
+
+
+
+
+
+
+
+
+
+enable_win32_dll=yes
+
+case $host in
+*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args.
+set dummy ${ac_tool_prefix}as; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AS+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AS"; then
+  ac_cv_prog_AS="$AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AS="${ac_tool_prefix}as"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AS=$ac_cv_prog_AS
+if test -n "$AS"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AS" >&5
+$as_echo "$AS" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AS"; then
+  ac_ct_AS=$AS
+  # Extract the first word of "as", so it can be a program name with args.
+set dummy as; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AS+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AS"; then
+  ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AS="as"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AS=$ac_cv_prog_ac_ct_AS
+if test -n "$ac_ct_AS"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5
+$as_echo "$ac_ct_AS" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AS" = x; then
+    AS="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AS=$ac_ct_AS
+  fi
+else
+  AS="$ac_cv_prog_AS"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_DLLTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+$as_echo "$DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+$as_echo "$ac_ct_DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OBJDUMP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+$as_echo "$OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+$as_echo "$ac_ct_OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+  ;;
+esac
+
+test -z "$AS" && AS=as
+
+
+
+
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+
+
+
+
+case `pwd` in
+  *\ * | *\	*)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
+$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;;
+esac
+
+
+
+macro_version='2.4.2'
+macro_revision='1.3337'
+
+
+
+
+
+
+
+
+
+
+
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+# Backslashify metacharacters that are still active within
+# double-quoted strings.
+sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\(["`\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to delay expansion of an escaped single quote.
+delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+
+ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5
+$as_echo_n "checking how to print strings... " >&6; }
+# Test print first, because it will be a builtin if present.
+if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
+   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='print -r --'
+elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='printf %s\n'
+else
+  # Use this function as a fallback that always works.
+  func_fallback_echo ()
+  {
+    eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+  }
+  ECHO='func_fallback_echo'
+fi
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO ""
+}
+
+case "$ECHO" in
+  printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5
+$as_echo "printf" >&6; } ;;
+  print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5
+$as_echo "print -r" >&6; } ;;
+  *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5
+$as_echo "cat" >&6; } ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
+$as_echo_n "checking for a sed that does not truncate output... " >&6; }
+if ${ac_cv_path_SED+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+            ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
+     for ac_i in 1 2 3 4 5 6 7; do
+       ac_script="$ac_script$as_nl$ac_script"
+     done
+     echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
+     { ac_script=; unset ac_script;}
+     if test -z "$SED"; then
+  ac_path_SED_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_SED" || continue
+# Check for GNU ac_path_SED and select it if it is found.
+  # Check for GNU $ac_path_SED
+case `"$ac_path_SED" --version 2>&1` in
+*GNU*)
+  ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo '' >> "conftest.nl"
+    "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_SED_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_SED="$ac_path_SED"
+      ac_path_SED_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_SED_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_SED"; then
+    as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
+  fi
+else
+  ac_cv_path_SED=$SED
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
+$as_echo "$ac_cv_path_SED" >&6; }
+ SED="$ac_cv_path_SED"
+  rm -f conftest.sed
+
+test -z "$SED" && SED=sed
+Xsed="$SED -e 1s/^X//"
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
+$as_echo_n "checking for fgrep... " >&6; }
+if ${ac_cv_path_FGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
+   then ac_cv_path_FGREP="$GREP -F"
+   else
+     if test -z "$FGREP"; then
+  ac_path_FGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in fgrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_FGREP" || continue
+# Check for GNU ac_path_FGREP and select it if it is found.
+  # Check for GNU $ac_path_FGREP
+case `"$ac_path_FGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'FGREP' >> "conftest.nl"
+    "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_FGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_FGREP="$ac_path_FGREP"
+      ac_path_FGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_FGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_FGREP"; then
+    as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_FGREP=$FGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5
+$as_echo "$ac_cv_path_FGREP" >&6; }
+ FGREP="$ac_cv_path_FGREP"
+
+
+test -z "$GREP" && GREP=grep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Check whether --with-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then :
+  withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
+else
+  with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
+$as_echo_n "checking for ld used by $CC... " >&6; }
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | ?:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+	ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
+$as_echo_n "checking for GNU ld... " >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
+$as_echo_n "checking for non-GNU ld... " >&6; }
+fi
+if ${lt_cv_path_LD+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+	test "$with_gnu_ld" != no && break
+	;;
+      *)
+	test "$with_gnu_ld" != yes && break
+	;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
+$as_echo "$LD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
+$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
+if ${lt_cv_prog_gnu_ld+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  # I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
+$as_echo "$lt_cv_prog_gnu_ld" >&6; }
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
+$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
+if ${lt_cv_path_NM+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5
+$as_echo "$lt_cv_path_NM" >&6; }
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in dumpbin "link -dump"
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_DUMPBIN+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DUMPBIN"; then
+  ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DUMPBIN=$ac_cv_prog_DUMPBIN
+if test -n "$DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5
+$as_echo "$DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$DUMPBIN" && break
+  done
+fi
+if test -z "$DUMPBIN"; then
+  ac_ct_DUMPBIN=$DUMPBIN
+  for ac_prog in dumpbin "link -dump"
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DUMPBIN"; then
+  ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN
+if test -n "$ac_ct_DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5
+$as_echo "$ac_ct_DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_DUMPBIN" && break
+done
+
+  if test "x$ac_ct_DUMPBIN" = x; then
+    DUMPBIN=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DUMPBIN=$ac_ct_DUMPBIN
+  fi
+fi
+
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
+$as_echo_n "checking the name lister ($NM) interface... " >&6; }
+if ${lt_cv_nm_interface+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: output\"" >&5)
+  cat conftest.out >&5
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5
+$as_echo "$lt_cv_nm_interface" >&6; }
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+$as_echo_n "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+$as_echo "no, using $LN_S" >&6; }
+fi
+
+# find the maximum length of command line arguments
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5
+$as_echo_n "checking the maximum length of command line arguments... " >&6; }
+if ${lt_cv_sys_max_cmd_len+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+    i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw* | cegcc*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  mint*)
+    # On MiNT this can take a long time and run out of memory.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536	# usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[	 ]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      # Make teststring a little bigger before we do anything with it.
+      # a 1K string should be a reasonable start.
+      for i in 1 2 3 4 5 6 7 8 ; do
+        teststring=$teststring$teststring
+      done
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      # If test is not a shell built-in, we'll probably end up computing a
+      # maximum length that is only half of the actual maximum length, but
+      # we can't tell.
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
+	         = "X$teststring$teststring"; } >/dev/null 2>&1 &&
+	      test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      # Only check the string length outside the loop.
+      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on
+      # massive amounts of additional arguments before passing them to the
+      # linker.  It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
+    ;;
+  esac
+
+fi
+
+if test -n $lt_cv_sys_max_cmd_len ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5
+$as_echo "$lt_cv_sys_max_cmd_len" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5
+$as_echo "none" >&6; }
+fi
+max_cmd_len=$lt_cv_sys_max_cmd_len
+
+
+
+
+
+
+: ${CP="cp -f"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5
+$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; }
+# Try some XSI features
+xsi_shell=no
+( _lt_dummy="a/b/c"
+  test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
+      = c,a/b,b/c, \
+    && eval 'test $(( 1 + 1 )) -eq 2 \
+    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
+  && xsi_shell=yes
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5
+$as_echo "$xsi_shell" >&6; }
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5
+$as_echo_n "checking whether the shell understands \"+=\"... " >&6; }
+lt_shell_append=no
+( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \
+    >/dev/null 2>&1 \
+  && lt_shell_append=yes
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5
+$as_echo "$lt_shell_append" >&6; }
+
+
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  lt_unset=unset
+else
+  lt_unset=false
+fi
+
+
+
+
+
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  lt_SP2NL='tr \040 \012'
+  lt_NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  lt_SP2NL='tr \100 \n'
+  lt_NL2SP='tr \r\n \100\100'
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5
+$as_echo_n "checking how to convert $build file names to $host format... " >&6; }
+if ${lt_cv_to_host_file_cmd+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
+        ;;
+    esac
+    ;;
+  *-*-cygwin* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_noop
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
+        ;;
+    esac
+    ;;
+  * ) # unhandled hosts (and "normal" native builds)
+    lt_cv_to_host_file_cmd=func_convert_file_noop
+    ;;
+esac
+
+fi
+
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5
+$as_echo "$lt_cv_to_host_file_cmd" >&6; }
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5
+$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; }
+if ${lt_cv_to_tool_file_cmd+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  #assume ordinary cross tools, or native build.
+lt_cv_to_tool_file_cmd=func_convert_file_noop
+case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
+        ;;
+    esac
+    ;;
+esac
+
+fi
+
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5
+$as_echo "$lt_cv_to_tool_file_cmd" >&6; }
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5
+$as_echo_n "checking for $LD option to reload object files... " >&6; }
+if ${lt_cv_ld_reload_flag+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_reload_flag='-r'
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5
+$as_echo "$lt_cv_ld_reload_flag" >&6; }
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    if test "$GCC" != yes; then
+      reload_cmds=false
+    fi
+    ;;
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OBJDUMP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+$as_echo "$OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+$as_echo "$ac_ct_OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5
+$as_echo_n "checking how to recognize dependent libraries... " >&6; }
+if ${lt_cv_deplibs_check_method+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix[4-9]*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[45]*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
+  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    # Keep this pattern in sync with the one in func_win32_libid.
+    lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
+  ;;
+
+cegcc*)
+  # use the weaker test based on 'objdump'. See mingw*.
+  lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | dragonfly*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+haiku*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix[3-9]*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+*nto* | *qnx*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+tpf*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5
+$as_echo "$lt_cv_deplibs_check_method" >&6; }
+
+file_magic_glob=
+want_nocaseglob=no
+if test "$build" = "$host"; then
+  case $host_os in
+  mingw* | pw32*)
+    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
+      want_nocaseglob=yes
+    else
+      file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"`
+    fi
+    ;;
+  esac
+fi
+
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_DLLTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+$as_echo "$DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+$as_echo "$ac_ct_DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5
+$as_echo_n "checking how to associate runtime and link libraries... " >&6; }
+if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_sharedlib_from_linklib_cmd='unknown'
+
+case $host_os in
+cygwin* | mingw* | pw32* | cegcc*)
+  # two different shell functions defined in ltmain.sh
+  # decide which to use based on capabilities of $DLLTOOL
+  case `$DLLTOOL --help 2>&1` in
+  *--identify-strict*)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
+    ;;
+  *)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
+    ;;
+  esac
+  ;;
+*)
+  # fallback: assume linklib IS sharedlib
+  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
+  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5
+$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; }
+sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
+test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  for ac_prog in ar
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$AR" && break
+  done
+fi
+if test -z "$AR"; then
+  ac_ct_AR=$AR
+  for ac_prog in ar
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AR="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_AR" && break
+done
+
+  if test "x$ac_ct_AR" = x; then
+    AR="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+fi
+
+: ${AR=ar}
+: ${AR_FLAGS=cru}
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5
+$as_echo_n "checking for archiver @FILE support... " >&6; }
+if ${lt_cv_ar_at_file+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ar_at_file=no
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  echo conftest.$ac_objext > conftest.lst
+      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5'
+      { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+      if test "$ac_status" -eq 0; then
+	# Ensure the archiver fails upon bogus file names.
+	rm -f conftest.$ac_objext libconftest.a
+	{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+	if test "$ac_status" -ne 0; then
+          lt_cv_ar_at_file=@
+        fi
+      fi
+      rm -f conftest.* libconftest.a
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5
+$as_echo "$lt_cv_ar_at_file" >&6; }
+
+if test "x$lt_cv_ar_at_file" = xno; then
+  archiver_list_spec=
+else
+  archiver_list_spec=$lt_cv_ar_at_file
+fi
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+test -z "$STRIP" && STRIP=:
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+test -z "$RANLIB" && RANLIB=:
+
+
+
+
+
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
+fi
+
+case $host_os in
+  darwin*)
+    lock_old_archive_extraction=yes ;;
+  *)
+    lock_old_archive_extraction=no ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5
+$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; }
+if ${lt_cv_sys_global_symbol_pipe+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32* | cegcc*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDEGRST]'
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris*)
+  symcode='[BDRT]'
+  ;;
+sco3.2v5*)
+  symcode='[DT]'
+  ;;
+sysv4.2uw2*)
+  symcode='[DT]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[ABDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[ABCDGIRSTW]' ;;
+esac
+
+# Transform an extracted symbol line into a proper C declaration.
+# Some systems (esp. on ia64) link data and code symbols differently,
+# so use this general approach.
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (void *) \&\2},/p'"
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"lib\2\", (void *) \&\2},/p'"
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# Try without a prefix underscore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+    # Fake it for dumpbin and say T for any non-static function
+    # and D for any global variable.
+    # Also find C++ and __fastcall symbols from MSVC++,
+    # which start with @ or ?.
+    lt_cv_sys_global_symbol_pipe="$AWK '"\
+"     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
+"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
+"     \$ 0!~/External *\|/{next};"\
+"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
+"     {if(hide[section]) next};"\
+"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
+"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
+"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
+"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
+"     ' prfx=^$ac_symprfx"
+  else
+    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[	 ]\($symcode$symcode*\)[	 ][	 ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+  fi
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<_LT_EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(void);
+void nm_test_func(void){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+_LT_EOF
+
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5
+  (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
+	if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<_LT_EOF > conftest.$ac_ext
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT_DLSYM_CONST
+#else
+# define LT_DLSYM_CONST const
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_LT_EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
+
+	  cat <<_LT_EOF >> conftest.$ac_ext
+
+/* The mapping between symbol names and symbols.  */
+LT_DLSYM_CONST struct {
+  const char *name;
+  void       *address;
+}
+lt__PROGRAM__LTX_preloaded_symbols[] =
+{
+  { "@PROGRAM@", (void *) 0 },
+_LT_EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
+	  cat <<\_LT_EOF >> conftest.$ac_ext
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt__PROGRAM__LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+_LT_EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_globsym_save_LIBS=$LIBS
+	  lt_globsym_save_CFLAGS=$CFLAGS
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
+	  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS=$lt_globsym_save_LIBS
+	  CFLAGS=$lt_globsym_save_CFLAGS
+	else
+	  echo "cannot find nm_test_func in $nlist" >&5
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -rf conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5
+$as_echo "failed" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
+$as_echo "ok" >&6; }
+fi
+
+# Response file support.
+if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+  nm_file_list_spec='@'
+elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then
+  nm_file_list_spec='@'
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5
+$as_echo_n "checking for sysroot... " >&6; }
+
+# Check whether --with-sysroot was given.
+if test "${with_sysroot+set}" = set; then :
+  withval=$with_sysroot;
+else
+  with_sysroot=no
+fi
+
+
+lt_sysroot=
+case ${with_sysroot} in #(
+ yes)
+   if test "$GCC" = yes; then
+     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
+   fi
+   ;; #(
+ /*)
+   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   ;; #(
+ no|'')
+   ;; #(
+ *)
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
+$as_echo "${with_sysroot}" >&6; }
+   as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5
+   ;;
+esac
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5
+$as_echo "${lt_sysroot:-no}" >&6; }
+
+
+
+
+
+# Check whether --enable-libtool-lock was given.
+if test "${enable_libtool_lock+set}" = set; then :
+  enableval=$enable_libtool_lock;
+fi
+
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.$ac_objext` in
+      *ELF-32*)
+	HPUX_IA64_MODE="32"
+	;;
+      *ELF-64*)
+	HPUX_IA64_MODE="64"
+	;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '#line '$LINENO' "configure"' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$lt_cv_prog_gnu_ld" = yes; then
+      case `/usr/bin/file conftest.$ac_objext` in
+	*32-bit*)
+	  LD="${LD-ld} -melf32bsmip"
+	  ;;
+	*N32*)
+	  LD="${LD-ld} -melf32bmipn32"
+	  ;;
+	*64-bit*)
+	  LD="${LD-ld} -melf64bmip"
+	;;
+      esac
+    else
+      case `/usr/bin/file conftest.$ac_objext` in
+	*32-bit*)
+	  LD="${LD-ld} -32"
+	  ;;
+	*N32*)
+	  LD="${LD-ld} -n32"
+	  ;;
+	*64-bit*)
+	  LD="${LD-ld} -64"
+	  ;;
+      esac
+    fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+      *32-bit*)
+	case $host in
+	  x86_64-*kfreebsd*-gnu)
+	    LD="${LD-ld} -m elf_i386_fbsd"
+	    ;;
+	  x86_64-*linux*)
+	    LD="${LD-ld} -m elf_i386"
+	    ;;
+	  ppc64-*linux*|powerpc64-*linux*)
+	    LD="${LD-ld} -m elf32ppclinux"
+	    ;;
+	  s390x-*linux*)
+	    LD="${LD-ld} -m elf_s390"
+	    ;;
+	  sparc64-*linux*)
+	    LD="${LD-ld} -m elf32_sparc"
+	    ;;
+	esac
+	;;
+      *64-bit*)
+	case $host in
+	  x86_64-*kfreebsd*-gnu)
+	    LD="${LD-ld} -m elf_x86_64_fbsd"
+	    ;;
+	  x86_64-*linux*)
+	    LD="${LD-ld} -m elf_x86_64"
+	    ;;
+	  ppc*-*linux*|powerpc*-*linux*)
+	    LD="${LD-ld} -m elf64ppc"
+	    ;;
+	  s390*-*linux*|s390*-*tpf*)
+	    LD="${LD-ld} -m elf64_s390"
+	    ;;
+	  sparc*-*linux*)
+	    LD="${LD-ld} -m elf64_sparc"
+	    ;;
+	esac
+	;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5
+$as_echo_n "checking whether the C compiler needs -belf... " >&6; }
+if ${lt_cv_cc_needs_belf+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+     cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_cc_needs_belf=yes
+else
+  lt_cv_cc_needs_belf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+     ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5
+$as_echo "$lt_cv_cc_needs_belf" >&6; }
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
+      *)
+	if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
+	  LD="${LD-ld} -64"
+	fi
+	;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+esac
+
+need_locks="$enable_libtool_lock"
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args.
+set dummy ${ac_tool_prefix}mt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_MANIFEST_TOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$MANIFEST_TOOL"; then
+  ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL
+if test -n "$MANIFEST_TOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5
+$as_echo "$MANIFEST_TOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_MANIFEST_TOOL"; then
+  ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL
+  # Extract the first word of "mt", so it can be a program name with args.
+set dummy mt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_MANIFEST_TOOL"; then
+  ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL
+if test -n "$ac_ct_MANIFEST_TOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5
+$as_echo "$ac_ct_MANIFEST_TOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_MANIFEST_TOOL" = x; then
+    MANIFEST_TOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL
+  fi
+else
+  MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL"
+fi
+
+test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5
+$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
+if ${lt_cv_path_mainfest_tool+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_path_mainfest_tool=no
+  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5
+  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
+  cat conftest.err >&5
+  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
+    lt_cv_path_mainfest_tool=yes
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5
+$as_echo "$lt_cv_path_mainfest_tool" >&6; }
+if test "x$lt_cv_path_mainfest_tool" != xyes; then
+  MANIFEST_TOOL=:
+fi
+
+
+
+
+
+
+  case $host_os in
+    rhapsody* | darwin*)
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_DSYMUTIL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DSYMUTIL"; then
+  ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DSYMUTIL=$ac_cv_prog_DSYMUTIL
+if test -n "$DSYMUTIL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5
+$as_echo "$DSYMUTIL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DSYMUTIL"; then
+  ac_ct_DSYMUTIL=$DSYMUTIL
+  # Extract the first word of "dsymutil", so it can be a program name with args.
+set dummy dsymutil; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DSYMUTIL"; then
+  ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL
+if test -n "$ac_ct_DSYMUTIL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5
+$as_echo "$ac_ct_DSYMUTIL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DSYMUTIL" = x; then
+    DSYMUTIL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DSYMUTIL=$ac_ct_DSYMUTIL
+  fi
+else
+  DSYMUTIL="$ac_cv_prog_DSYMUTIL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args.
+set dummy ${ac_tool_prefix}nmedit; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_NMEDIT+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NMEDIT"; then
+  ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+NMEDIT=$ac_cv_prog_NMEDIT
+if test -n "$NMEDIT"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5
+$as_echo "$NMEDIT" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_NMEDIT"; then
+  ac_ct_NMEDIT=$NMEDIT
+  # Extract the first word of "nmedit", so it can be a program name with args.
+set dummy nmedit; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_NMEDIT"; then
+  ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_NMEDIT="nmedit"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT
+if test -n "$ac_ct_NMEDIT"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5
+$as_echo "$ac_ct_NMEDIT" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_NMEDIT" = x; then
+    NMEDIT=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    NMEDIT=$ac_ct_NMEDIT
+  fi
+else
+  NMEDIT="$ac_cv_prog_NMEDIT"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args.
+set dummy ${ac_tool_prefix}lipo; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_LIPO+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$LIPO"; then
+  ac_cv_prog_LIPO="$LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+LIPO=$ac_cv_prog_LIPO
+if test -n "$LIPO"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5
+$as_echo "$LIPO" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_LIPO"; then
+  ac_ct_LIPO=$LIPO
+  # Extract the first word of "lipo", so it can be a program name with args.
+set dummy lipo; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_LIPO+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_LIPO"; then
+  ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_LIPO="lipo"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO
+if test -n "$ac_ct_LIPO"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5
+$as_echo "$ac_ct_LIPO" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_LIPO" = x; then
+    LIPO=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    LIPO=$ac_ct_LIPO
+  fi
+else
+  LIPO="$ac_cv_prog_LIPO"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}otool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+$as_echo "$OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL"; then
+  ac_ct_OTOOL=$OTOOL
+  # Extract the first word of "otool", so it can be a program name with args.
+set dummy otool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_OTOOL+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OTOOL"; then
+  ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OTOOL="otool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL
+if test -n "$ac_ct_OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5
+$as_echo "$ac_ct_OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL" = x; then
+    OTOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL=$ac_ct_OTOOL
+  fi
+else
+  OTOOL="$ac_cv_prog_OTOOL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args.
+set dummy ${ac_tool_prefix}otool64; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_OTOOL64+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL64"; then
+  ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL64=$ac_cv_prog_OTOOL64
+if test -n "$OTOOL64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5
+$as_echo "$OTOOL64" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL64"; then
+  ac_ct_OTOOL64=$OTOOL64
+  # Extract the first word of "otool64", so it can be a program name with args.
+set dummy otool64; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OTOOL64"; then
+  ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OTOOL64="otool64"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64
+if test -n "$ac_ct_OTOOL64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5
+$as_echo "$ac_ct_OTOOL64" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL64" = x; then
+    OTOOL64=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL64=$ac_ct_OTOOL64
+  fi
+else
+  OTOOL64="$ac_cv_prog_OTOOL64"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5
+$as_echo_n "checking for -single_module linker flag... " >&6; }
+if ${lt_cv_apple_cc_single_mod+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_apple_cc_single_mod=no
+      if test -z "${LT_MULTI_MODULE}"; then
+	# By default we will add the -single_module flag. You can override
+	# by either setting the environment variable LT_MULTI_MODULE
+	# non-empty at configure time, or by adding -multi_module to the
+	# link flags.
+	rm -rf libconftest.dylib*
+	echo "int foo(void){return 1;}" > conftest.c
+	echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+-dynamiclib -Wl,-single_module conftest.c" >&5
+	$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+	  -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
+        _lt_result=$?
+	# If there is a non-empty error log, and "single_module"
+	# appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+	  cat conftest.err >&5
+	# Otherwise, if the output was created with a 0 exit code from
+	# the compiler, it worked.
+	elif test -f libconftest.dylib && test $_lt_result -eq 0; then
+	  lt_cv_apple_cc_single_mod=yes
+	else
+	  cat conftest.err >&5
+	fi
+	rm -rf libconftest.dylib*
+	rm -f conftest.*
+      fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
+$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
+$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
+if ${lt_cv_ld_exported_symbols_list+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_exported_symbols_list=no
+      save_LDFLAGS=$LDFLAGS
+      echo "_main" > conftest.sym
+      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_ld_exported_symbols_list=yes
+else
+  lt_cv_ld_exported_symbols_list=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+	LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
+$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
+$as_echo_n "checking for -force_load linker flag... " >&6; }
+if ${lt_cv_ld_force_load+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_force_load=no
+      cat > conftest.c << _LT_EOF
+int forced_loaded() { return 2;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5
+      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5
+      echo "$AR cru libconftest.a conftest.o" >&5
+      $AR cru libconftest.a conftest.o 2>&5
+      echo "$RANLIB libconftest.a" >&5
+      $RANLIB libconftest.a 2>&5
+      cat > conftest.c << _LT_EOF
+int main() { return 0;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
+      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
+      _lt_result=$?
+      if test -s conftest.err && $GREP force_load conftest.err; then
+	cat conftest.err >&5
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
+	lt_cv_ld_force_load=yes
+      else
+	cat conftest.err >&5
+      fi
+        rm -f conftest.err libconftest.a conftest conftest.c
+        rm -rf conftest.dSYM
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5
+$as_echo "$lt_cv_ld_force_load" >&6; }
+    case $host_os in
+    rhapsody* | darwin1.[012])
+      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
+    darwin1.*)
+      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+    darwin*) # darwin 5.x on
+      # if running on 10.5 or later, the deployment target defaults
+      # to the OS version, if on x86, and 10.4, the deployment
+      # target defaults to 10.4. Don't you love it?
+      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
+	10.0,*86*-darwin8*|10.0,*-darwin[91]*)
+	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+	10.[012]*)
+	  _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+	10.*)
+	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+      esac
+    ;;
+  esac
+    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
+      _lt_dar_single_mod='$single_module'
+    fi
+    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
+      _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
+    else
+      _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    fi
+    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
+      _lt_dsymutil='~$DSYMUTIL $lib || :'
+    else
+      _lt_dsymutil=
+    fi
+    ;;
+  esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in dlfcn.h
+do :
+  ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default
+"
+if test "x$ac_cv_header_dlfcn_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DLFCN_H 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+func_stripname_cnf ()
+{
+  case ${2} in
+  .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+  *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+  esac
+} # func_stripname_cnf
+
+
+
+
+
+# Set options
+
+
+
+        enable_dlopen=no
+
+
+
+
+
+
+# Check whether --with-pic was given.
+if test "${with_pic+set}" = set; then :
+  withval=$with_pic; lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+	IFS="$lt_save_ifs"
+	if test "X$lt_pkg" = "X$lt_p"; then
+	  pic_mode=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  pic_mode=default
+fi
+
+
+test -z "$pic_mode" && pic_mode=default
+
+
+
+
+
+
+
+  # Check whether --enable-fast-install was given.
+if test "${enable_fast_install+set}" = set; then :
+  enableval=$enable_fast_install; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_fast_install=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_fast_install=yes
+fi
+
+
+
+
+
+
+
+
+
+
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ltmain"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+test -z "$LN_S" && LN_S="ln -s"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
+$as_echo_n "checking for objdir... " >&6; }
+if ${lt_cv_objdir+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
+$as_echo "$lt_cv_objdir" >&6; }
+objdir=$lt_cv_objdir
+
+
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define LT_OBJDIR "$lt_cv_objdir/"
+_ACEOF
+
+
+
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Global variables:
+ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$LD" && LD=ld
+test -z "$ac_objext" && ac_objext=o
+
+for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+
+# Only perform the check for file, if the check method requires it
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5
+$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; }
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/${ac_tool_prefix}file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file"
+      if test -n "$file_magic_test_file"; then
+	case $deplibs_check_method in
+	"file_magic "*)
+	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+	    $EGREP "$file_magic_regex" > /dev/null; then
+	    :
+	  else
+	    cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+	  fi ;;
+	esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+$as_echo "$MAGIC_CMD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+
+
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5
+$as_echo_n "checking for file... " >&6; }
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/file"
+      if test -n "$file_magic_test_file"; then
+	case $deplibs_check_method in
+	"file_magic "*)
+	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+	    $EGREP "$file_magic_regex" > /dev/null; then
+	    :
+	  else
+	    cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+	  fi ;;
+	esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+$as_echo "$MAGIC_CMD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  else
+    MAGIC_CMD=:
+  fi
+fi
+
+  fi
+  ;;
+esac
+
+# Use C for the default configuration in the libtool script
+
+lt_save_CC="$CC"
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+objext=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}'
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+# Save the default compiler, since it gets overwritten when the other
+# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
+compiler_DEFAULT=$CC
+
+# save warnings/boilerplate of simple test code
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+
+lt_prog_compiler_no_builtin_flag=
+
+if test "$GCC" = yes; then
+  case $cc_basename in
+  nvcc*)
+    lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;;
+  *)
+    lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;;
+  esac
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
+$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
+if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_rtti_exceptions=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="-fno-rtti -fno-exceptions"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_rtti_exceptions=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5
+$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; }
+
+if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then
+    lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions"
+else
+    :
+fi
+
+fi
+
+
+
+
+
+
+  lt_prog_compiler_wl=
+lt_prog_compiler_pic=
+lt_prog_compiler_static=
+
+
+  if test "$GCC" = yes; then
+    lt_prog_compiler_wl='-Wl,'
+    lt_prog_compiler_static='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+	# +Z the default
+	;;
+      *)
+	lt_prog_compiler_pic='-fPIC'
+	;;
+      esac
+      ;;
+
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      lt_prog_compiler_can_build_shared=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	lt_prog_compiler_pic=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      lt_prog_compiler_pic='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      lt_prog_compiler_wl='-Xlinker '
+      if test -n "$lt_prog_compiler_pic"; then
+        lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      lt_prog_compiler_wl='-Wl,'
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	lt_prog_compiler_static='-Bstatic'
+      else
+	lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	lt_prog_compiler_pic='+Z'
+	;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      lt_prog_compiler_static='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC (with -KPIC) is the default.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+	lt_prog_compiler_wl='-Wl,'
+	lt_prog_compiler_pic='-KPIC'
+	lt_prog_compiler_static='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+	lt_prog_compiler_wl='-Wl,'
+	lt_prog_compiler_pic='-fPIC'
+	lt_prog_compiler_static='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+	lt_prog_compiler_wl='-Wl,'
+	lt_prog_compiler_pic='--shared'
+	lt_prog_compiler_static='--static'
+	;;
+      nagfor*)
+	# NAG Fortran compiler
+	lt_prog_compiler_wl='-Wl,-Wl,,'
+	lt_prog_compiler_pic='-PIC'
+	lt_prog_compiler_static='-Bstatic'
+	;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+	# which looks to be a dead project)
+	lt_prog_compiler_wl='-Wl,'
+	lt_prog_compiler_pic='-fpic'
+	lt_prog_compiler_static='-Bstatic'
+        ;;
+      ccc*)
+        lt_prog_compiler_wl='-Wl,'
+        # All Alpha code is PIC.
+        lt_prog_compiler_static='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+	# IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+	lt_prog_compiler_wl='-Wl,'
+	lt_prog_compiler_pic='-qpic'
+	lt_prog_compiler_static='-qstaticlink'
+	;;
+      *)
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
+	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl=''
+	  ;;
+	*Sun\ F* | *Sun*Fortran*)
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl='-Qoption ld '
+	  ;;
+	*Sun\ C*)
+	  # Sun C 5.9
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl='-Wl,'
+	  ;;
+        *Intel*\ [CF]*Compiler*)
+	  lt_prog_compiler_wl='-Wl,'
+	  lt_prog_compiler_pic='-fPIC'
+	  lt_prog_compiler_static='-static'
+	  ;;
+	*Portland\ Group*)
+	  lt_prog_compiler_wl='-Wl,'
+	  lt_prog_compiler_pic='-fpic'
+	  lt_prog_compiler_static='-Bstatic'
+	  ;;
+	esac
+	;;
+      esac
+      ;;
+
+    newsos6)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      lt_prog_compiler_wl='-Wl,'
+      # All OSF/1 code is PIC.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    rdos*)
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    solaris*)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+	lt_prog_compiler_wl='-Qoption ld ';;
+      *)
+	lt_prog_compiler_wl='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      lt_prog_compiler_wl='-Qoption ld '
+      lt_prog_compiler_pic='-PIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+	lt_prog_compiler_pic='-Kconform_pic'
+	lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    unicos*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_can_build_shared=no
+      ;;
+
+    uts4*)
+      lt_prog_compiler_pic='-pic'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *)
+      lt_prog_compiler_can_build_shared=no
+      ;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic=
+    ;;
+  *)
+    lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC"
+    ;;
+esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
+$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
+if ${lt_cv_prog_compiler_pic+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5
+$as_echo "$lt_cv_prog_compiler_pic" >&6; }
+lt_prog_compiler_pic=$lt_cv_prog_compiler_pic
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5
+$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; }
+if ${lt_cv_prog_compiler_pic_works+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_works=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic -DPIC"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_pic_works=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5
+$as_echo "$lt_cv_prog_compiler_pic_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works" = xyes; then
+    case $lt_prog_compiler_pic in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;;
+     esac
+else
+    lt_prog_compiler_pic=
+     lt_prog_compiler_can_build_shared=no
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
+$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
+if ${lt_cv_prog_compiler_static_works+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_static_works=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5
+$as_echo "$lt_cv_prog_compiler_static_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works" = xyes; then
+    :
+else
+    lt_prog_compiler_static=
+fi
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if ${lt_cv_prog_compiler_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
+$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if ${lt_cv_prog_compiler_c_o+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
+$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
+$as_echo_n "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+$as_echo "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
+$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+
+  runpath_var=
+  allow_undefined_flag=
+  always_export_symbols=no
+  archive_cmds=
+  archive_expsym_cmds=
+  compiler_needs_object=no
+  enable_shared_with_static_runtimes=no
+  export_dynamic_flag_spec=
+  export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  hardcode_automatic=no
+  hardcode_direct=no
+  hardcode_direct_absolute=no
+  hardcode_libdir_flag_spec=
+  hardcode_libdir_separator=
+  hardcode_minus_L=no
+  hardcode_shlibpath_var=unsupported
+  inherit_rpath=no
+  link_all_deplibs=unknown
+  module_cmds=
+  module_expsym_cmds=
+  old_archive_from_new_cmds=
+  old_archive_from_expsyms_cmds=
+  thread_safe_flag_spec=
+  whole_archive_flag_spec=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  include_expsyms=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  ld_shlibs=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+	# The AIX port of GNU ld has always aspired to compatibility
+	# with the native linker.  However, as the warning in the GNU ld
+	# block says, versions before 2.19.5* couldn't really create working
+	# shared libraries, regardless of the interface used.
+	case `$LD -v 2>&1` in
+	  *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+	  *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;;
+	  *\ \(GNU\ Binutils\)\ [3-9]*) ;;
+	  *)
+	    lt_use_gnu_ld_interface=yes
+	    ;;
+	esac
+	;;
+      *)
+	lt_use_gnu_ld_interface=yes
+	;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    export_dynamic_flag_spec='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      whole_archive_flag_spec=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+	ld_shlibs=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	allow_undefined_flag=unsupported
+	# Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	# support --undefined.  This deserves some investigation.  FIXME
+	archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+	ld_shlibs=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless,
+      # as there is no search path for DLLs.
+      hardcode_libdir_flag_spec='-L$libdir'
+      export_dynamic_flag_spec='${wl}--export-all-symbols'
+      allow_undefined_flag=unsupported
+      always_export_symbols=no
+      enable_shared_with_static_runtimes=yes
+      export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
+      exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	# If the export-symbols file already is a .def file (1st line
+	# is EXPORTS), use it as is; otherwise, prepend...
+	archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	  cp $export_symbols $output_objdir/$soname.def;
+	else
+	  echo EXPORTS > $output_objdir/$soname.def;
+	  cat $export_symbols >> $output_objdir/$soname.def;
+	fi~
+	$CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+	ld_shlibs=no
+      fi
+      ;;
+
+    haiku*)
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      link_all_deplibs=yes
+      ;;
+
+    interix[3-9]*)
+      hardcode_direct=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+	case $cc_basename in
+	  diet\ *) tmp_diet=yes;;	# linux-dietlibc with static linking (!diet-dyn)
+	esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+	 && test "$tmp_diet" = no
+      then
+	tmp_addflag=' $pic_flag'
+	tmp_sharedflag='-shared'
+	case $cc_basename,$host_cpu in
+        pgcc*)				# Portland Group C compiler
+	  whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag'
+	  ;;
+	pgf77* | pgf90* | pgf95* | pgfortran*)
+					# Portland Group f77 and f90 compilers
+	  whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag -Mnomain' ;;
+	ecc*,ia64* | icc*,ia64*)	# Intel C compiler on ia64
+	  tmp_addflag=' -i_dynamic' ;;
+	efc*,ia64* | ifort*,ia64*)	# Intel Fortran compiler on ia64
+	  tmp_addflag=' -i_dynamic -nofor_main' ;;
+	ifc* | ifort*)			# Intel Fortran compiler
+	  tmp_addflag=' -nofor_main' ;;
+	lf95*)				# Lahey Fortran 8.1
+	  whole_archive_flag_spec=
+	  tmp_sharedflag='--shared' ;;
+	xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below)
+	  tmp_sharedflag='-qmkshrobj'
+	  tmp_addflag= ;;
+	nvcc*)	# Cuda Compiler Driver 2.2
+	  whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  compiler_needs_object=yes
+	  ;;
+	esac
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)			# Sun C 5.9
+	  whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  compiler_needs_object=yes
+	  tmp_sharedflag='-G' ;;
+	*Sun\ F*)			# Sun Fortran 8.3
+	  tmp_sharedflag='-G' ;;
+	esac
+	archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+	    cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+	    echo "local: *; };" >> $output_objdir/$libname.ver~
+	    $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+	case $cc_basename in
+	xlf* | bgf* | bgxlf* | mpixlf*)
+	  # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+	  whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
+	  hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+	  archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
+	  if test "x$supports_anon_versioning" = xyes; then
+	    archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+	      cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+	      echo "local: *; };" >> $output_objdir/$libname.ver~
+	      $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
+	  fi
+	  ;;
+	esac
+      else
+        ld_shlibs=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+	wlarc=
+      else
+	archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+	ld_shlibs=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	ld_shlibs=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
+	ld_shlibs=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+	;;
+	*)
+	  # For security reasons, it is highly recommended that you always
+	  # use absolute paths for naming shared libraries, and exclude the
+	  # DT_RUNPATH tag from executables and libraries.  But doing so
+	  # requires that you compile everything twice, which is a pain.
+	  if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+	    archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  else
+	    ld_shlibs=no
+	  fi
+	;;
+      esac
+      ;;
+
+    sunos4*)
+      archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	ld_shlibs=no
+      fi
+      ;;
+    esac
+
+    if test "$ld_shlibs" = no; then
+      runpath_var=
+      hardcode_libdir_flag_spec=
+      export_dynamic_flag_spec=
+      whole_archive_flag_spec=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      allow_undefined_flag=unsupported
+      always_export_symbols=yes
+      archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+	# Neither direct hardcoding nor static linking is supported with a
+	# broken collect2.
+	hardcode_direct=unsupported
+      fi
+      ;;
+
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+	# On IA64, the linker does run time linking by default, so we don't
+	# have to do anything special.
+	aix_use_runtimelinking=no
+	exp_sym_flag='-Bexport'
+	no_entry_flag=""
+      else
+	# If we're using GNU nm, then we don't want the "-C" option.
+	# -C means demangle to AIX nm, but means don't demangle with GNU nm
+	# Also, AIX nm treats weak defined symbols like other global
+	# defined symbols, whereas GNU nm marks them as "W".
+	if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+	  export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+	else
+	  export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+	fi
+	aix_use_runtimelinking=no
+
+	# Test if we are trying to use run time linking or normal
+	# AIX style linking. If -brtl is somewhere in LDFLAGS, we
+	# need to do runtime linking.
+	case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+	  for ld_flag in $LDFLAGS; do
+	  if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+	    aix_use_runtimelinking=yes
+	    break
+	  fi
+	  done
+	  ;;
+	esac
+
+	exp_sym_flag='-bexport'
+	no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      archive_cmds=''
+      hardcode_direct=yes
+      hardcode_direct_absolute=yes
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      file_list_spec='${wl}-f,'
+
+      if test "$GCC" = yes; then
+	case $host_os in aix4.[012]|aix4.[012].*)
+	# We only want to do this on AIX 4.2 and lower, the check
+	# below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" &&
+	   strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+	  then
+	  # We have reworked collect2
+	  :
+	  else
+	  # We have old collect2
+	  hardcode_direct=unsupported
+	  # It fails to find uninstalled libraries when the uninstalled
+	  # path is not listed in the libpath.  Setting hardcode_minus_L
+	  # to unsupported forces relinking
+	  hardcode_minus_L=yes
+	  hardcode_libdir_flag_spec='-L$libdir'
+	  hardcode_libdir_separator=
+	  fi
+	  ;;
+	esac
+	shared_flag='-shared'
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag="$shared_flag "'${wl}-G'
+	fi
+      else
+	# not using gcc
+	if test "$host_cpu" = ia64; then
+	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	# chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+	else
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag='${wl}-G'
+	  else
+	    shared_flag='${wl}-bM:SRE'
+	  fi
+	fi
+      fi
+
+      export_dynamic_flag_spec='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      always_export_symbols=yes
+      if test "$aix_use_runtimelinking" = yes; then
+	# Warning - without using the other runtime loading flags (-brtl),
+	# -berok will link without error, but may produce a broken library.
+	allow_undefined_flag='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if ${lt_cv_aix_libpath_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+	  /^0/ {
+	      s/^0  *\([^ ]*\) *$/\1/
+	      p
+	  }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+        archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+	if test "$host_cpu" = ia64; then
+	  hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+	  allow_undefined_flag="-z nodefs"
+	  archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+	else
+	 # Determine the default libpath from the value encoded in an
+	 # empty executable.
+	 if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if ${lt_cv_aix_libpath_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+	  /^0/ {
+	      s/^0  *\([^ ]*\) *$/\1/
+	      p
+	  }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+	 hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+	  # Warning - without using the other run time loading flags,
+	  # -berok will link without error, but may produce a broken library.
+	  no_undefined_flag=' ${wl}-bernotok'
+	  allow_undefined_flag=' ${wl}-berok'
+	  if test "$with_gnu_ld" = yes; then
+	    # We only use this code for GNU lds that support --whole-archive.
+	    whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	  else
+	    # Exported symbols can be pulled into shared objects from archives
+	    whole_archive_flag_spec='$convenience'
+	  fi
+	  archive_cmds_need_lc=yes
+	  # This is similar to how AIX traditionally builds its shared libraries.
+	  archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+	fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[45]*)
+      export_dynamic_flag_spec=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+	# Native MSVC
+	hardcode_libdir_flag_spec=' '
+	allow_undefined_flag=unsupported
+	always_export_symbols=yes
+	file_list_spec='@'
+	# Tell ltmain to make .lib files, not .a files.
+	libext=lib
+	# Tell ltmain to make .dll files, not .so files.
+	shrext_cmds=".dll"
+	# FIXME: Setting linknames here is a bad hack.
+	archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+	archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	    sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+	  else
+	    sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+	  fi~
+	  $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+	  linknames='
+	# The linker will not automatically build a static lib if we build a DLL.
+	# _LT_TAGVAR(old_archive_from_new_cmds, )='true'
+	enable_shared_with_static_runtimes=yes
+	exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+	export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
+	# Don't use ranlib
+	old_postinstall_cmds='chmod 644 $oldlib'
+	postlink_cmds='lt_outputfile="@OUTPUT@"~
+	  lt_tool_outputfile="@TOOL_OUTPUT@"~
+	  case $lt_outputfile in
+	    *.exe|*.EXE) ;;
+	    *)
+	      lt_outputfile="$lt_outputfile.exe"
+	      lt_tool_outputfile="$lt_tool_outputfile.exe"
+	      ;;
+	  esac~
+	  if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+	    $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+	    $RM "$lt_outputfile.manifest";
+	  fi'
+	;;
+      *)
+	# Assume MSVC wrapper
+	hardcode_libdir_flag_spec=' '
+	allow_undefined_flag=unsupported
+	# Tell ltmain to make .lib files, not .a files.
+	libext=lib
+	# Tell ltmain to make .dll files, not .so files.
+	shrext_cmds=".dll"
+	# FIXME: Setting linknames here is a bad hack.
+	archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+	# The linker will automatically build a .lib file if we build a DLL.
+	old_archive_from_new_cmds='true'
+	# FIXME: Should let the user specify the lib program.
+	old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs'
+	enable_shared_with_static_runtimes=yes
+	;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc=no
+  hardcode_direct=no
+  hardcode_automatic=yes
+  hardcode_shlibpath_var=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
+  else
+    whole_archive_flag_spec=''
+  fi
+  link_all_deplibs=yes
+  allow_undefined_flag="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+
+  else
+  ld_shlibs=no
+  fi
+
+      ;;
+
+    dgux*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+	archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+	archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_direct=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L=yes
+      export_dynamic_flag_spec='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+	archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+	hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+	hardcode_libdir_separator=:
+	hardcode_direct=yes
+	hardcode_direct_absolute=yes
+	export_dynamic_flag_spec='${wl}-E'
+	# hardcode_minus_L: Not really in the search PATH,
+	# but as the default location of the library.
+	hardcode_minus_L=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+	case $host_cpu in
+	hppa*64*)
+	  archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      else
+	case $host_cpu in
+	hppa*64*)
+	  archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+
+	  # Older versions of the 11.00 compiler do not understand -b yet
+	  # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
+	  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5
+$as_echo_n "checking if $CC understands -b... " >&6; }
+if ${lt_cv_prog_compiler__b+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler__b=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS -b"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler__b=yes
+       fi
+     else
+       lt_cv_prog_compiler__b=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5
+$as_echo "$lt_cv_prog_compiler__b" >&6; }
+
+if test x"$lt_cv_prog_compiler__b" = xyes; then
+    archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+else
+    archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+fi
+
+	  ;;
+	esac
+      fi
+      if test "$with_gnu_ld" = no; then
+	hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+	hardcode_libdir_separator=:
+
+	case $host_cpu in
+	hppa*64*|ia64*)
+	  hardcode_direct=no
+	  hardcode_shlibpath_var=no
+	  ;;
+	*)
+	  hardcode_direct=yes
+	  hardcode_direct_absolute=yes
+	  export_dynamic_flag_spec='${wl}-E'
+
+	  # hardcode_minus_L: Not really in the search PATH,
+	  # but as the default location of the library.
+	  hardcode_minus_L=yes
+	  ;;
+	esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+	archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	# Try to use the -exported_symbol ld option, if it does not
+	# work, assume that -exports_file does not work either and
+	# implicitly export all symbols.
+	# This should be the same for all languages, so no per-tag cache variable.
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
+$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
+if ${lt_cv_irix_exported_symbol+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  save_LDFLAGS="$LDFLAGS"
+	   LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+	   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo (void) { return 0; }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_irix_exported_symbol=yes
+else
+  lt_cv_irix_exported_symbol=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+           LDFLAGS="$save_LDFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5
+$as_echo "$lt_cv_irix_exported_symbol" >&6; }
+	if test "$lt_cv_irix_exported_symbol" = yes; then
+          archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+	fi
+      else
+	archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      inherit_rpath=yes
+      link_all_deplibs=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+	archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    newsos6)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_shlibpath_var=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+	hardcode_direct=yes
+	hardcode_shlibpath_var=no
+	hardcode_direct_absolute=yes
+	if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	  archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	  hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+	  export_dynamic_flag_spec='${wl}-E'
+	else
+	  case $host_os in
+	   openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+	     archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	     hardcode_libdir_flag_spec='-R$libdir'
+	     ;;
+	   *)
+	     archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	     hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+	     ;;
+	  esac
+	fi
+      else
+	ld_shlibs=no
+      fi
+      ;;
+
+    os2*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      allow_undefined_flag=unsupported
+      archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+	allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+	archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	allow_undefined_flag=' -expect_unresolved \*'
+	archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+
+    osf4* | osf5*)	# as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+	allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+	archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      else
+	allow_undefined_flag=' -expect_unresolved \*'
+	archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
+	$CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
+
+	# Both c and cxx compiler support -rpath directly
+	hardcode_libdir_flag_spec='-rpath $libdir'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_separator=:
+      ;;
+
+    solaris*)
+      no_undefined_flag=' -z defs'
+      if test "$GCC" = yes; then
+	wlarc='${wl}'
+	archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+	case `$CC -V 2>&1` in
+	*"Compilers 5.0"*)
+	  wlarc=''
+	  archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
+	  ;;
+	*)
+	  wlarc='${wl}'
+	  archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+	  ;;
+	esac
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_shlibpath_var=no
+      case $host_os in
+      solaris2.[0-5] | solaris2.[0-5].*) ;;
+      *)
+	# The compiler driver will combine and reorder linker options,
+	# but understands `-z linker_flag'.  GCC discards it without `$wl',
+	# but is careful enough not to reorder.
+	# Supported since Solaris 2.6 (maybe 2.5.1?)
+	if test "$GCC" = yes; then
+	  whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	else
+	  whole_archive_flag_spec='-z allextract$convenience -z defaultextract'
+	fi
+	;;
+      esac
+      link_all_deplibs=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+	# Use $CC to link under sequent, because it throws in some extra .o
+	# files that make .init and .fini sections work.
+	archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+	sni)
+	  archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  hardcode_direct=yes # is this really true???
+	;;
+	siemens)
+	  ## LD is ld it makes a PLAMLIB
+	  ## CC just makes a GrossModule.
+	  archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+	  reload_cmds='$CC -r -o $output$reload_objs'
+	  hardcode_direct=no
+        ;;
+	motorola)
+	  archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+	;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4.3*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var=no
+      export_dynamic_flag_spec='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	hardcode_shlibpath_var=no
+	runpath_var=LD_RUN_PATH
+	hardcode_runpath_var=yes
+	ld_shlibs=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      no_undefined_flag='${wl}-z,text'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      no_undefined_flag='${wl}-z,text'
+      allow_undefined_flag='${wl}-z,nodefs'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-R,$libdir'
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      export_dynamic_flag_spec='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      ld_shlibs=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+	export_dynamic_flag_spec='${wl}-Blargedynsym'
+	;;
+      esac
+    fi
+  fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5
+$as_echo "$ld_shlibs" >&6; }
+test "$ld_shlibs" = no && can_build_shared=no
+
+with_gnu_ld=$with_gnu_ld
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
+$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
+if ${lt_cv_archive_cmds_need_lc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  $RM conftest*
+	echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+	if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+	  soname=conftest
+	  lib=conftest
+	  libobjs=conftest.$ac_objext
+	  deplibs=
+	  wl=$lt_prog_compiler_wl
+	  pic_flag=$lt_prog_compiler_pic
+	  compiler_flags=-v
+	  linker_flags=-v
+	  verstring=
+	  output_objdir=.
+	  libname=conftest
+	  lt_save_allow_undefined_flag=$allow_undefined_flag
+	  allow_undefined_flag=
+	  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+	  then
+	    lt_cv_archive_cmds_need_lc=no
+	  else
+	    lt_cv_archive_cmds_need_lc=yes
+	  fi
+	  allow_undefined_flag=$lt_save_allow_undefined_flag
+	else
+	  cat conftest.err 1>&5
+	fi
+	$RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5
+$as_echo "$lt_cv_archive_cmds_need_lc" >&6; }
+      archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
+$as_echo_n "checking dynamic linker characteristics... " >&6; }
+
+if test "$GCC" = yes; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  case $host_os in
+    mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;;
+    *) lt_sed_strip_eq="s,=/,/,g" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
+  case $lt_search_path_spec in
+  *\;*)
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
+    ;;
+  *)
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
+    ;;
+  esac
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+	lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[lt_foo]++; }
+  if (lt_freq[lt_foo] == 1) { print lt_foo; }
+}'`
+  # AWK program above erroneously prepends '/' to C:/dos/paths
+  # for these hosts.
+  case $host_os in
+    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
+      $SED 's,/\([A-Za-z]:\),\1,g'` ;;
+  esac
+  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux # correct to gnu/linux during the next big refactor
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \
+	 LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\""
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[	 ]*hwcap[	 ]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*)	need_version=yes ;;
+    *)				need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+$as_echo "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
+$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" ||
+   test -n "$runpath_var" ||
+   test "X$hardcode_automatic" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no &&
+     test "$hardcode_minus_L" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action=unsupported
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5
+$as_echo "$hardcode_action" >&6; }
+
+if test "$hardcode_action" = relink ||
+   test "$inherit_rpath" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+  if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
+$as_echo_n "checking for dlopen in -ldl... " >&6; }
+if ${ac_cv_lib_dl_dlopen+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dlopen=yes
+else
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+
+fi
+
+    ;;
+
+  *)
+    ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
+if test "x$ac_cv_func_shl_load" = xyes; then :
+  lt_cv_dlopen="shl_load"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5
+$as_echo_n "checking for shl_load in -ldld... " >&6; }
+if ${ac_cv_lib_dld_shl_load+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dld_shl_load=yes
+else
+  ac_cv_lib_dld_shl_load=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5
+$as_echo "$ac_cv_lib_dld_shl_load" >&6; }
+if test "x$ac_cv_lib_dld_shl_load" = xyes; then :
+  lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
+else
+  ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
+if test "x$ac_cv_func_dlopen" = xyes; then :
+  lt_cv_dlopen="dlopen"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
+$as_echo_n "checking for dlopen in -ldl... " >&6; }
+if ${ac_cv_lib_dl_dlopen+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dlopen=yes
+else
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5
+$as_echo_n "checking for dlopen in -lsvld... " >&6; }
+if ${ac_cv_lib_svld_dlopen+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lsvld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_svld_dlopen=yes
+else
+  ac_cv_lib_svld_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5
+$as_echo "$ac_cv_lib_svld_dlopen" >&6; }
+if test "x$ac_cv_lib_svld_dlopen" = xyes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5
+$as_echo_n "checking for dld_link in -ldld... " >&6; }
+if ${ac_cv_lib_dld_dld_link+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link ();
+int
+main ()
+{
+return dld_link ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dld_dld_link=yes
+else
+  ac_cv_lib_dld_dld_link=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5
+$as_echo "$ac_cv_lib_dld_dld_link" >&6; }
+if test "x$ac_cv_lib_dld_dld_link" = xyes; then :
+  lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5
+$as_echo_n "checking whether a program can dlopen itself... " >&6; }
+if ${lt_cv_dlopen_self+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  	  if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+	  if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+	}
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5
+$as_echo "$lt_cv_dlopen_self" >&6; }
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5
+$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; }
+if ${lt_cv_dlopen_self_static+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  	  if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self_static=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+	  if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+	}
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self_static=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5
+$as_echo "$lt_cv_dlopen_self_static" >&6; }
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+striplib=
+old_striplib=
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5
+$as_echo_n "checking whether stripping libraries is possible... " >&6; }
+if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+  darwin*)
+    if test -n "$STRIP" ; then
+      striplib="$STRIP -x"
+      old_striplib="$STRIP -S"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+    ;;
+  *)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+  # Report which library types will actually be built
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5
+$as_echo_n "checking if libtool supports shared libraries... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5
+$as_echo "$can_build_shared" >&6; }
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5
+$as_echo_n "checking whether to build shared libraries... " >&6; }
+  test "$can_build_shared" = "no" && enable_shared=no
+
+  # On AIX, shared libraries and static libraries use the same namespace, and
+  # are all built from PIC.
+  case $host_os in
+  aix3*)
+    test "$enable_shared" = yes && enable_static=no
+    if test -n "$RANLIB"; then
+      archive_cmds="$archive_cmds~\$RANLIB \$lib"
+      postinstall_cmds='$RANLIB $lib'
+    fi
+    ;;
+
+  aix[4-9]*)
+    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+      test "$enable_shared" = yes && enable_static=no
+    fi
+    ;;
+  esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5
+$as_echo "$enable_shared" >&6; }
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5
+$as_echo_n "checking whether to build static libraries... " >&6; }
+  # Make sure either enable_shared or enable_static is yes.
+  test "$enable_shared" = yes || enable_static=yes
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5
+$as_echo "$enable_static" >&6; }
+
+
+
+
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+CC="$lt_save_CC"
+
+      if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
+$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
+if test -z "$CXXCPP"; then
+  if ${ac_cv_prog_CXXCPP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CXXCPP needs to be expanded
+    for CXXCPP in "$CXX -E" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CXXCPP=$CXXCPP
+
+fi
+  CXXCPP=$ac_cv_prog_CXXCPP
+else
+  ac_cv_prog_CXXCPP=$CXXCPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
+$as_echo "$CXXCPP" >&6; }
+ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+else
+  _lt_caught_CXX_error=yes
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+archive_cmds_need_lc_CXX=no
+allow_undefined_flag_CXX=
+always_export_symbols_CXX=no
+archive_expsym_cmds_CXX=
+compiler_needs_object_CXX=no
+export_dynamic_flag_spec_CXX=
+hardcode_direct_CXX=no
+hardcode_direct_absolute_CXX=no
+hardcode_libdir_flag_spec_CXX=
+hardcode_libdir_separator_CXX=
+hardcode_minus_L_CXX=no
+hardcode_shlibpath_var_CXX=unsupported
+hardcode_automatic_CXX=no
+inherit_rpath_CXX=no
+module_cmds_CXX=
+module_expsym_cmds_CXX=
+link_all_deplibs_CXX=unknown
+old_archive_cmds_CXX=$old_archive_cmds
+reload_flag_CXX=$reload_flag
+reload_cmds_CXX=$reload_cmds
+no_undefined_flag_CXX=
+whole_archive_flag_spec_CXX=
+enable_shared_with_static_runtimes_CXX=no
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+objext_CXX=$objext
+
+# No sense in running all these tests if we already determined that
+# the CXX compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_caught_CXX_error" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="int some_variable = 0;"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code='int main(int, char *[]) { return(0); }'
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+  # save warnings/boilerplate of simple test code
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC=$CC
+  lt_save_CFLAGS=$CFLAGS
+  lt_save_LD=$LD
+  lt_save_GCC=$GCC
+  GCC=$GXX
+  lt_save_with_gnu_ld=$with_gnu_ld
+  lt_save_path_LD=$lt_cv_path_LD
+  if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+    lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+  else
+    $as_unset lt_cv_prog_gnu_ld
+  fi
+  if test -n "${lt_cv_path_LDCXX+set}"; then
+    lt_cv_path_LD=$lt_cv_path_LDCXX
+  else
+    $as_unset lt_cv_path_LD
+  fi
+  test -z "${LDCXX+set}" || LD=$LDCXX
+  CC=${CXX-"c++"}
+  CFLAGS=$CXXFLAGS
+  compiler=$CC
+  compiler_CXX=$CC
+  for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+
+  if test -n "$compiler"; then
+    # We don't want -fno-exception when compiling C++ code, so set the
+    # no_builtin_flag separately
+    if test "$GXX" = yes; then
+      lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin'
+    else
+      lt_prog_compiler_no_builtin_flag_CXX=
+    fi
+
+    if test "$GXX" = yes; then
+      # Set up default GNU C++ configuration
+
+
+
+# Check whether --with-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then :
+  withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
+else
+  with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
+$as_echo_n "checking for ld used by $CC... " >&6; }
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | ?:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+	ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
+$as_echo_n "checking for GNU ld... " >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
+$as_echo_n "checking for non-GNU ld... " >&6; }
+fi
+if ${lt_cv_path_LD+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+	test "$with_gnu_ld" != no && break
+	;;
+      *)
+	test "$with_gnu_ld" != yes && break
+	;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
+$as_echo "$LD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
+$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
+if ${lt_cv_prog_gnu_ld+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  # I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
+$as_echo "$lt_cv_prog_gnu_ld" >&6; }
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+
+
+
+
+
+      # Check if GNU C++ uses GNU ld as the underlying linker, since the
+      # archiving commands below assume that GNU ld is being used.
+      if test "$with_gnu_ld" = yes; then
+        archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+        hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+        export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+
+        # If archive_cmds runs LD, not CC, wlarc should be empty
+        # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+        #     investigate it a little bit more. (MM)
+        wlarc='${wl}'
+
+        # ancient GNU ld didn't support --whole-archive et. al.
+        if eval "`$CC -print-prog-name=ld` --help 2>&1" |
+	  $GREP 'no-whole-archive' > /dev/null; then
+          whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+        else
+          whole_archive_flag_spec_CXX=
+        fi
+      else
+        with_gnu_ld=no
+        wlarc=
+
+        # A generic and very simple default shared library creation
+        # command for GNU C++ for the case where it uses the native
+        # linker, instead of GNU ld.  If possible, this setting should
+        # overridden to take advantage of the native linker features on
+        # the platform it is being used on.
+        archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+      fi
+
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+    else
+      GXX=no
+      with_gnu_ld=no
+      wlarc=
+    fi
+
+    # PORTME: fill in a description of your system's C++ link characteristics
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+    ld_shlibs_CXX=yes
+    case $host_os in
+      aix3*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+      aix[4-9]*)
+        if test "$host_cpu" = ia64; then
+          # On IA64, the linker does run time linking by default, so we don't
+          # have to do anything special.
+          aix_use_runtimelinking=no
+          exp_sym_flag='-Bexport'
+          no_entry_flag=""
+        else
+          aix_use_runtimelinking=no
+
+          # Test if we are trying to use run time linking or normal
+          # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+          # need to do runtime linking.
+          case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+	    for ld_flag in $LDFLAGS; do
+	      case $ld_flag in
+	      *-brtl*)
+	        aix_use_runtimelinking=yes
+	        break
+	        ;;
+	      esac
+	    done
+	    ;;
+          esac
+
+          exp_sym_flag='-bexport'
+          no_entry_flag='-bnoentry'
+        fi
+
+        # When large executables or shared objects are built, AIX ld can
+        # have problems creating the table of contents.  If linking a library
+        # or program results in "error TOC overflow" add -mminimal-toc to
+        # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+        # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+        archive_cmds_CXX=''
+        hardcode_direct_CXX=yes
+        hardcode_direct_absolute_CXX=yes
+        hardcode_libdir_separator_CXX=':'
+        link_all_deplibs_CXX=yes
+        file_list_spec_CXX='${wl}-f,'
+
+        if test "$GXX" = yes; then
+          case $host_os in aix4.[012]|aix4.[012].*)
+          # We only want to do this on AIX 4.2 and lower, the check
+          # below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" &&
+	     strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+	  then
+	    # We have reworked collect2
+	    :
+	  else
+	    # We have old collect2
+	    hardcode_direct_CXX=unsupported
+	    # It fails to find uninstalled libraries when the uninstalled
+	    # path is not listed in the libpath.  Setting hardcode_minus_L
+	    # to unsupported forces relinking
+	    hardcode_minus_L_CXX=yes
+	    hardcode_libdir_flag_spec_CXX='-L$libdir'
+	    hardcode_libdir_separator_CXX=
+	  fi
+          esac
+          shared_flag='-shared'
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag="$shared_flag "'${wl}-G'
+	  fi
+        else
+          # not using gcc
+          if test "$host_cpu" = ia64; then
+	  # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	  # chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+          else
+	    if test "$aix_use_runtimelinking" = yes; then
+	      shared_flag='${wl}-G'
+	    else
+	      shared_flag='${wl}-bM:SRE'
+	    fi
+          fi
+        fi
+
+        export_dynamic_flag_spec_CXX='${wl}-bexpall'
+        # It seems that -bexpall does not export symbols beginning with
+        # underscore (_), so it is better to generate a list of symbols to
+	# export.
+        always_export_symbols_CXX=yes
+        if test "$aix_use_runtimelinking" = yes; then
+          # Warning - without using the other runtime loading flags (-brtl),
+          # -berok will link without error, but may produce a broken library.
+          allow_undefined_flag_CXX='-berok'
+          # Determine the default libpath from the value encoded in an empty
+          # executable.
+          if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if ${lt_cv_aix_libpath__CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+	  /^0/ {
+	      s/^0  *\([^ ]*\) *$/\1/
+	      p
+	  }
+      }'
+  lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__CXX
+fi
+
+          hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+          archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+        else
+          if test "$host_cpu" = ia64; then
+	    hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib'
+	    allow_undefined_flag_CXX="-z nodefs"
+	    archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+          else
+	    # Determine the default libpath from the value encoded in an
+	    # empty executable.
+	    if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if ${lt_cv_aix_libpath__CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+	  /^0/ {
+	      s/^0  *\([^ ]*\) *$/\1/
+	      p
+	  }
+      }'
+  lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__CXX
+fi
+
+	    hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
+	    # Warning - without using the other run time loading flags,
+	    # -berok will link without error, but may produce a broken library.
+	    no_undefined_flag_CXX=' ${wl}-bernotok'
+	    allow_undefined_flag_CXX=' ${wl}-berok'
+	    if test "$with_gnu_ld" = yes; then
+	      # We only use this code for GNU lds that support --whole-archive.
+	      whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	    else
+	      # Exported symbols can be pulled into shared objects from archives
+	      whole_archive_flag_spec_CXX='$convenience'
+	    fi
+	    archive_cmds_need_lc_CXX=yes
+	    # This is similar to how AIX traditionally builds its shared
+	    # libraries.
+	    archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+          fi
+        fi
+        ;;
+
+      beos*)
+	if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	  allow_undefined_flag_CXX=unsupported
+	  # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	  # support --undefined.  This deserves some investigation.  FIXME
+	  archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	else
+	  ld_shlibs_CXX=no
+	fi
+	;;
+
+      chorus*)
+        case $cc_basename in
+          *)
+	  # FIXME: insert proper C++ library support
+	  ld_shlibs_CXX=no
+	  ;;
+        esac
+        ;;
+
+      cygwin* | mingw* | pw32* | cegcc*)
+	case $GXX,$cc_basename in
+	,cl* | no,cl*)
+	  # Native MSVC
+	  # hardcode_libdir_flag_spec is actually meaningless, as there is
+	  # no search path for DLLs.
+	  hardcode_libdir_flag_spec_CXX=' '
+	  allow_undefined_flag_CXX=unsupported
+	  always_export_symbols_CXX=yes
+	  file_list_spec_CXX='@'
+	  # Tell ltmain to make .lib files, not .a files.
+	  libext=lib
+	  # Tell ltmain to make .dll files, not .so files.
+	  shrext_cmds=".dll"
+	  # FIXME: Setting linknames here is a bad hack.
+	  archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+	  archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	      $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+	    else
+	      $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+	    fi~
+	    $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+	    linknames='
+	  # The linker will not automatically build a static lib if we build a DLL.
+	  # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true'
+	  enable_shared_with_static_runtimes_CXX=yes
+	  # Don't use ranlib
+	  old_postinstall_cmds_CXX='chmod 644 $oldlib'
+	  postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~
+	    lt_tool_outputfile="@TOOL_OUTPUT@"~
+	    case $lt_outputfile in
+	      *.exe|*.EXE) ;;
+	      *)
+		lt_outputfile="$lt_outputfile.exe"
+		lt_tool_outputfile="$lt_tool_outputfile.exe"
+		;;
+	    esac~
+	    func_to_tool_file "$lt_outputfile"~
+	    if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+	      $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+	      $RM "$lt_outputfile.manifest";
+	    fi'
+	  ;;
+	*)
+	  # g++
+	  # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless,
+	  # as there is no search path for DLLs.
+	  hardcode_libdir_flag_spec_CXX='-L$libdir'
+	  export_dynamic_flag_spec_CXX='${wl}--export-all-symbols'
+	  allow_undefined_flag_CXX=unsupported
+	  always_export_symbols_CXX=no
+	  enable_shared_with_static_runtimes_CXX=yes
+
+	  if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+	    archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	    # If the export-symbols file already is a .def file (1st line
+	    # is EXPORTS), use it as is; otherwise, prepend...
+	    archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	      cp $export_symbols $output_objdir/$soname.def;
+	    else
+	      echo EXPORTS > $output_objdir/$soname.def;
+	      cat $export_symbols >> $output_objdir/$soname.def;
+	    fi~
+	    $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	  else
+	    ld_shlibs_CXX=no
+	  fi
+	  ;;
+	esac
+	;;
+      darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc_CXX=no
+  hardcode_direct_CXX=no
+  hardcode_automatic_CXX=yes
+  hardcode_shlibpath_var_CXX=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
+  else
+    whole_archive_flag_spec_CXX=''
+  fi
+  link_all_deplibs_CXX=yes
+  allow_undefined_flag_CXX="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds_CXX="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+       if test "$lt_cv_apple_cc_single_mod" != "yes"; then
+      archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
+      archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
+    fi
+
+  else
+  ld_shlibs_CXX=no
+  fi
+
+	;;
+
+      dgux*)
+        case $cc_basename in
+          ec++*)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          ghcx*)
+	    # Green Hills C++ Compiler
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+        esac
+        ;;
+
+      freebsd2.*)
+        # C++ shared libraries reported to be fairly broken before
+	# switch to ELF
+        ld_shlibs_CXX=no
+        ;;
+
+      freebsd-elf*)
+        archive_cmds_need_lc_CXX=no
+        ;;
+
+      freebsd* | dragonfly*)
+        # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+        # conventions
+        ld_shlibs_CXX=yes
+        ;;
+
+      gnu*)
+        ;;
+
+      haiku*)
+        archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        link_all_deplibs_CXX=yes
+        ;;
+
+      hpux9*)
+        hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
+        hardcode_libdir_separator_CXX=:
+        export_dynamic_flag_spec_CXX='${wl}-E'
+        hardcode_direct_CXX=yes
+        hardcode_minus_L_CXX=yes # Not in the search PATH,
+				             # but as the default
+				             # location of the library.
+
+        case $cc_basename in
+          CC*)
+            # FIXME: insert proper C++ library support
+            ld_shlibs_CXX=no
+            ;;
+          aCC*)
+            archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            # Commands to make compiler produce verbose output that lists
+            # what "hidden" libraries, object files and flags are used when
+            # linking a shared library.
+            #
+            # There doesn't appear to be a way to prevent this compiler from
+            # explicitly linking system object files so we need to strip them
+            # from the output so that they don't get included in the library
+            # dependencies.
+            output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+            ;;
+          *)
+            if test "$GXX" = yes; then
+              archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            else
+              # FIXME: insert proper C++ library support
+              ld_shlibs_CXX=no
+            fi
+            ;;
+        esac
+        ;;
+
+      hpux10*|hpux11*)
+        if test $with_gnu_ld = no; then
+	  hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
+	  hardcode_libdir_separator_CXX=:
+
+          case $host_cpu in
+            hppa*64*|ia64*)
+              ;;
+            *)
+	      export_dynamic_flag_spec_CXX='${wl}-E'
+              ;;
+          esac
+        fi
+        case $host_cpu in
+          hppa*64*|ia64*)
+            hardcode_direct_CXX=no
+            hardcode_shlibpath_var_CXX=no
+            ;;
+          *)
+            hardcode_direct_CXX=yes
+            hardcode_direct_absolute_CXX=yes
+            hardcode_minus_L_CXX=yes # Not in the search PATH,
+					         # but as the default
+					         # location of the library.
+            ;;
+        esac
+
+        case $cc_basename in
+          CC*)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          aCC*)
+	    case $host_cpu in
+	      hppa*64*)
+	        archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	      ia64*)
+	        archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	      *)
+	        archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	    esac
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+	    ;;
+          *)
+	    if test "$GXX" = yes; then
+	      if test $with_gnu_ld = no; then
+	        case $host_cpu in
+	          hppa*64*)
+	            archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	          ia64*)
+	            archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	          *)
+	            archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	        esac
+	      fi
+	    else
+	      # FIXME: insert proper C++ library support
+	      ld_shlibs_CXX=no
+	    fi
+	    ;;
+        esac
+        ;;
+
+      interix[3-9]*)
+	hardcode_direct_CXX=no
+	hardcode_shlibpath_var_CXX=no
+	hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+	export_dynamic_flag_spec_CXX='${wl}-E'
+	# Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+	# Instead, shared libraries are loaded at an image base (0x10000000 by
+	# default) and relocated if they conflict, which is a slow very memory
+	# consuming and fragmenting process.  To avoid this, we pick a random,
+	# 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+	# time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+	archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+	archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+	;;
+      irix5* | irix6*)
+        case $cc_basename in
+          CC*)
+	    # SGI C++
+	    archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+	    # necessary to make sure instantiated templates are included
+	    # in the archive.
+	    old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs'
+	    ;;
+          *)
+	    if test "$GXX" = yes; then
+	      if test "$with_gnu_ld" = no; then
+	        archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	      else
+	        archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
+	      fi
+	    fi
+	    link_all_deplibs_CXX=yes
+	    ;;
+        esac
+        hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+        hardcode_libdir_separator_CXX=:
+        inherit_rpath_CXX=yes
+        ;;
+
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+        case $cc_basename in
+          KCC*)
+	    # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	    # KCC will only create a shared library if the output file
+	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
+	    # to its proper name (with version) after linking.
+	    archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+	    archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+
+	    hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+	    export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	    old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs'
+	    ;;
+	  icpc* | ecpc* )
+	    # Intel C++
+	    with_gnu_ld=yes
+	    # version 8.0 and above of icpc choke on multiply defined symbols
+	    # if we add $predep_objects and $postdep_objects, however 7.1 and
+	    # earlier do not add the objects themselves.
+	    case `$CC -V 2>&1` in
+	      *"Version 7."*)
+	        archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+		archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+		;;
+	      *)  # Version 8.0 or newer
+	        tmp_idyn=
+	        case $host_cpu in
+		  ia64*) tmp_idyn=' -i_dynamic';;
+		esac
+	        archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+		archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+		;;
+	    esac
+	    archive_cmds_need_lc_CXX=no
+	    hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+	    export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+	    whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	    ;;
+          pgCC* | pgcpp*)
+            # Portland Group C++ compiler
+	    case `$CC -V` in
+	    *pgCC\ [1-5].* | *pgcpp\ [1-5].*)
+	      prelink_cmds_CXX='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
+		compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
+	      old_archive_cmds_CXX='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
+		$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
+		$RANLIB $oldlib'
+	      archive_cmds_CXX='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+	      archive_expsym_cmds_CXX='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+	      ;;
+	    *) # Version 6 and above use weak symbols
+	      archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+	      archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+	      ;;
+	    esac
+
+	    hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir'
+	    export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+	    whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+            ;;
+	  cxx*)
+	    # Compaq C++
+	    archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+	    runpath_var=LD_RUN_PATH
+	    hardcode_libdir_flag_spec_CXX='-rpath $libdir'
+	    hardcode_libdir_separator_CXX=:
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
+	    ;;
+	  xl* | mpixl* | bgxl*)
+	    # IBM XL 8.0 on PPC, with GNU ld
+	    hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+	    export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+	    archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    if test "x$supports_anon_versioning" = xyes; then
+	      archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~
+		cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+		echo "local: *; };" >> $output_objdir/$libname.ver~
+		$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	    fi
+	    ;;
+	  *)
+	    case `$CC -V 2>&1 | sed 5q` in
+	    *Sun\ C*)
+	      # Sun C++ 5.9
+	      no_undefined_flag_CXX=' -zdefs'
+	      archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
+	      hardcode_libdir_flag_spec_CXX='-R$libdir'
+	      whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	      compiler_needs_object_CXX=yes
+
+	      # Not sure whether something based on
+	      # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
+	      # would be better.
+	      output_verbose_link_cmd='func_echo_all'
+
+	      # Archives containing C++ object files must be created using
+	      # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	      # necessary to make sure instantiated templates are included
+	      # in the archive.
+	      old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
+	      ;;
+	    esac
+	    ;;
+	esac
+	;;
+
+      lynxos*)
+        # FIXME: insert proper C++ library support
+	ld_shlibs_CXX=no
+	;;
+
+      m88k*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+	;;
+
+      mvs*)
+        case $cc_basename in
+          cxx*)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+	  *)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+	esac
+	;;
+
+      netbsd*)
+        if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	  archive_cmds_CXX='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+	  wlarc=
+	  hardcode_libdir_flag_spec_CXX='-R$libdir'
+	  hardcode_direct_CXX=yes
+	  hardcode_shlibpath_var_CXX=no
+	fi
+	# Workaround some broken pre-1.5 toolchains
+	output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+	;;
+
+      *nto* | *qnx*)
+        ld_shlibs_CXX=yes
+	;;
+
+      openbsd2*)
+        # C++ shared libraries are fairly broken
+	ld_shlibs_CXX=no
+	;;
+
+      openbsd*)
+	if test -f /usr/libexec/ld.so; then
+	  hardcode_direct_CXX=yes
+	  hardcode_shlibpath_var_CXX=no
+	  hardcode_direct_absolute_CXX=yes
+	  archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+	  hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+	  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	    archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+	    export_dynamic_flag_spec_CXX='${wl}-E'
+	    whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+	  fi
+	  output_verbose_link_cmd=func_echo_all
+	else
+	  ld_shlibs_CXX=no
+	fi
+	;;
+
+      osf3* | osf4* | osf5*)
+        case $cc_basename in
+          KCC*)
+	    # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	    # KCC will only create a shared library if the output file
+	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
+	    # to its proper name (with version) after linking.
+	    archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	    hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+	    hardcode_libdir_separator_CXX=:
+
+	    # Archives containing C++ object files must be created using
+	    # the KAI C++ compiler.
+	    case $host in
+	      osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;;
+	      *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;;
+	    esac
+	    ;;
+          RCC*)
+	    # Rational C++ 2.4.1
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          cxx*)
+	    case $host in
+	      osf3*)
+	        allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
+	        archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	        hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+		;;
+	      *)
+	        allow_undefined_flag_CXX=' -expect_unresolved \*'
+	        archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	        archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+	          echo "-hidden">> $lib.exp~
+	          $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp  `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
+	          $RM $lib.exp'
+	        hardcode_libdir_flag_spec_CXX='-rpath $libdir'
+		;;
+	    esac
+
+	    hardcode_libdir_separator_CXX=:
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+	    ;;
+	  *)
+	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	      allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
+	      case $host in
+	        osf3*)
+	          archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+		  ;;
+	        *)
+	          archive_cmds_CXX='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+		  ;;
+	      esac
+
+	      hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+	      hardcode_libdir_separator_CXX=:
+
+	      # Commands to make compiler produce verbose output that lists
+	      # what "hidden" libraries, object files and flags are used when
+	      # linking a shared library.
+	      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+	    else
+	      # FIXME: insert proper C++ library support
+	      ld_shlibs_CXX=no
+	    fi
+	    ;;
+        esac
+        ;;
+
+      psos*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+
+      sunos4*)
+        case $cc_basename in
+          CC*)
+	    # Sun C++ 4.x
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          lcc*)
+	    # Lucid
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+        esac
+        ;;
+
+      solaris*)
+        case $cc_basename in
+          CC* | sunCC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+            archive_cmds_need_lc_CXX=yes
+	    no_undefined_flag_CXX=' -zdefs'
+	    archive_cmds_CXX='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	    archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	      $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	    hardcode_libdir_flag_spec_CXX='-R$libdir'
+	    hardcode_shlibpath_var_CXX=no
+	    case $host_os in
+	      solaris2.[0-5] | solaris2.[0-5].*) ;;
+	      *)
+		# The compiler driver will combine and reorder linker options,
+		# but understands `-z linker_flag'.
+	        # Supported since Solaris 2.6 (maybe 2.5.1?)
+		whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract'
+	        ;;
+	    esac
+	    link_all_deplibs_CXX=yes
+
+	    output_verbose_link_cmd='func_echo_all'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	    # necessary to make sure instantiated templates are included
+	    # in the archive.
+	    old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
+	    ;;
+          gcx*)
+	    # Green Hills C++ Compiler
+	    archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+	    # The C++ compiler must be used to create the archive.
+	    old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+	    ;;
+          *)
+	    # GNU C++ compiler with Solaris linker
+	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	      no_undefined_flag_CXX=' ${wl}-z ${wl}defs'
+	      if $CC --version | $GREP -v '^2\.7' > /dev/null; then
+	        archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	        archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+		  $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	        # Commands to make compiler produce verbose output that lists
+	        # what "hidden" libraries, object files and flags are used when
+	        # linking a shared library.
+	        output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+	      else
+	        # g++ 2.7 appears to require `-G' NOT `-shared' on this
+	        # platform.
+	        archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	        archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+		  $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	        # Commands to make compiler produce verbose output that lists
+	        # what "hidden" libraries, object files and flags are used when
+	        # linking a shared library.
+	        output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+	      fi
+
+	      hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir'
+	      case $host_os in
+		solaris2.[0-5] | solaris2.[0-5].*) ;;
+		*)
+		  whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+		  ;;
+	      esac
+	    fi
+	    ;;
+        esac
+        ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      no_undefined_flag_CXX='${wl}-z,text'
+      archive_cmds_need_lc_CXX=no
+      hardcode_shlibpath_var_CXX=no
+      runpath_var='LD_RUN_PATH'
+
+      case $cc_basename in
+        CC*)
+	  archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+      esac
+      ;;
+
+      sysv5* | sco3.2v5* | sco5v6*)
+	# Note: We can NOT use -z defs as we might desire, because we do not
+	# link with -lc, and that would cause any symbols used from libc to
+	# always be unresolved, which means just about no library would
+	# ever link correctly.  If we're not using GNU ld we use -z text
+	# though, which does catch some bad symbols but isn't as heavy-handed
+	# as -z defs.
+	no_undefined_flag_CXX='${wl}-z,text'
+	allow_undefined_flag_CXX='${wl}-z,nodefs'
+	archive_cmds_need_lc_CXX=no
+	hardcode_shlibpath_var_CXX=no
+	hardcode_libdir_flag_spec_CXX='${wl}-R,$libdir'
+	hardcode_libdir_separator_CXX=':'
+	link_all_deplibs_CXX=yes
+	export_dynamic_flag_spec_CXX='${wl}-Bexport'
+	runpath_var='LD_RUN_PATH'
+
+	case $cc_basename in
+          CC*)
+	    archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~
+	      '"$old_archive_cmds_CXX"
+	    reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~
+	      '"$reload_cmds_CXX"
+	    ;;
+	  *)
+	    archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    ;;
+	esac
+      ;;
+
+      tandem*)
+        case $cc_basename in
+          NCC*)
+	    # NonStop-UX NCC 3.20
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    ld_shlibs_CXX=no
+	    ;;
+        esac
+        ;;
+
+      vxworks*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+
+      *)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+    esac
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
+$as_echo "$ld_shlibs_CXX" >&6; }
+    test "$ld_shlibs_CXX" = no && can_build_shared=no
+
+    GCC_CXX="$GXX"
+    LD_CXX="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    # Dependencies to place before and after the object being linked:
+predep_objects_CXX=
+postdep_objects_CXX=
+predeps_CXX=
+postdeps_CXX=
+compiler_lib_search_path_CXX=
+
+cat > conftest.$ac_ext <<_LT_EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+_LT_EOF
+
+
+_lt_libdeps_save_CFLAGS=$CFLAGS
+case "$CC $CFLAGS " in #(
+*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
+*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
+esac
+
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  for p in `eval "$output_verbose_link_cmd"`; do
+    case ${prev}${p} in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" ||
+          test $p = "-R"; then
+	 prev=$p
+	 continue
+       fi
+
+       # Expand the sysroot to ease extracting the directories later.
+       if test -z "$prev"; then
+         case $p in
+         -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
+         -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
+         -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
+         esac
+       fi
+       case $p in
+       =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
+       esac
+       if test "$pre_test_object_deps_done" = no; then
+	 case ${prev} in
+	 -L | -R)
+	   # Internal compiler library paths should come after those
+	   # provided the user.  The postdeps already come after the
+	   # user supplied libs so there is no need to process them.
+	   if test -z "$compiler_lib_search_path_CXX"; then
+	     compiler_lib_search_path_CXX="${prev}${p}"
+	   else
+	     compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}"
+	   fi
+	   ;;
+	 # The "-l" case would never come before the object being
+	 # linked, so don't bother handling this case.
+	 esac
+       else
+	 if test -z "$postdeps_CXX"; then
+	   postdeps_CXX="${prev}${p}"
+	 else
+	   postdeps_CXX="${postdeps_CXX} ${prev}${p}"
+	 fi
+       fi
+       prev=
+       ;;
+
+    *.lto.$objext) ;; # Ignore GCC LTO objects
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+	 pre_test_object_deps_done=yes
+	 continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 if test -z "$predep_objects_CXX"; then
+	   predep_objects_CXX="$p"
+	 else
+	   predep_objects_CXX="$predep_objects_CXX $p"
+	 fi
+       else
+	 if test -z "$postdep_objects_CXX"; then
+	   postdep_objects_CXX="$p"
+	 else
+	   postdep_objects_CXX="$postdep_objects_CXX $p"
+	 fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling CXX test program"
+fi
+
+$RM -f confest.$objext
+CFLAGS=$_lt_libdeps_save_CFLAGS
+
+# PORTME: override above test on systems where it is broken
+case $host_os in
+interix[3-9]*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  predep_objects_CXX=
+  postdep_objects_CXX=
+  postdeps_CXX=
+  ;;
+
+linux*)
+  case `$CC -V 2>&1 | sed 5q` in
+  *Sun\ C*)
+    # Sun C++ 5.9
+
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC* | sunCC*)
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+esac
+
+
+case " $postdeps_CXX " in
+*" -lc "*) archive_cmds_need_lc_CXX=no ;;
+esac
+ compiler_lib_search_dirs_CXX=
+if test -n "${compiler_lib_search_path_CXX}"; then
+ compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    lt_prog_compiler_wl_CXX=
+lt_prog_compiler_pic_CXX=
+lt_prog_compiler_static_CXX=
+
+
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    lt_prog_compiler_wl_CXX='-Wl,'
+    lt_prog_compiler_static_CXX='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	lt_prog_compiler_static_CXX='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic_CXX='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | cygwin* | os2* | pw32* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic_CXX='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      lt_prog_compiler_pic_CXX=
+      ;;
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static_CXX=
+      ;;
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	lt_prog_compiler_pic_CXX=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+	;;
+      *)
+	lt_prog_compiler_pic_CXX='-fPIC'
+	;;
+      esac
+      ;;
+    *qnx* | *nto*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic_CXX='-fPIC -shared'
+      ;;
+    *)
+      lt_prog_compiler_pic_CXX='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix[4-9]*)
+	# All AIX code is PIC.
+	if test "$host_cpu" = ia64; then
+	  # AIX 5 now supports IA64 processor
+	  lt_prog_compiler_static_CXX='-Bstatic'
+	else
+	  lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp'
+	fi
+	;;
+      chorus*)
+	case $cc_basename in
+	cxch68*)
+	  # Green Hills C++ Compiler
+	  # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+	  ;;
+	esac
+	;;
+      mingw* | cygwin* | os2* | pw32* | cegcc*)
+	# This hack is so that the source file can tell whether it is being
+	# built for inclusion in a dll (and should export symbols for example).
+	lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
+	;;
+      dgux*)
+	case $cc_basename in
+	  ec++*)
+	    lt_prog_compiler_pic_CXX='-KPIC'
+	    ;;
+	  ghcx*)
+	    # Green Hills C++ Compiler
+	    lt_prog_compiler_pic_CXX='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      freebsd* | dragonfly*)
+	# FreeBSD uses GNU C++
+	;;
+      hpux9* | hpux10* | hpux11*)
+	case $cc_basename in
+	  CC*)
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
+	    if test "$host_cpu" != ia64; then
+	      lt_prog_compiler_pic_CXX='+Z'
+	    fi
+	    ;;
+	  aCC*)
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
+	    case $host_cpu in
+	    hppa*64*|ia64*)
+	      # +Z the default
+	      ;;
+	    *)
+	      lt_prog_compiler_pic_CXX='+Z'
+	      ;;
+	    esac
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      interix*)
+	# This is c89, which is MS Visual C++ (no shared libs)
+	# Anyone wants to do a port?
+	;;
+      irix5* | irix6* | nonstopux*)
+	case $cc_basename in
+	  CC*)
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_static_CXX='-non_shared'
+	    # CC pic flag -KPIC is the default.
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+	case $cc_basename in
+	  KCC*)
+	    # KAI C++ Compiler
+	    lt_prog_compiler_wl_CXX='--backend -Wl,'
+	    lt_prog_compiler_pic_CXX='-fPIC'
+	    ;;
+	  ecpc* )
+	    # old Intel C++ for x86_64 which still supported -KPIC.
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_pic_CXX='-KPIC'
+	    lt_prog_compiler_static_CXX='-static'
+	    ;;
+	  icpc* )
+	    # Intel C++, used to be incompatible with GCC.
+	    # ICC 10 doesn't accept -KPIC any more.
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_pic_CXX='-fPIC'
+	    lt_prog_compiler_static_CXX='-static'
+	    ;;
+	  pgCC* | pgcpp*)
+	    # Portland Group C++ compiler
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_pic_CXX='-fpic'
+	    lt_prog_compiler_static_CXX='-Bstatic'
+	    ;;
+	  cxx*)
+	    # Compaq C++
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    lt_prog_compiler_pic_CXX=
+	    lt_prog_compiler_static_CXX='-non_shared'
+	    ;;
+	  xlc* | xlC* | bgxl[cC]* | mpixl[cC]*)
+	    # IBM XL 8.0, 9.0 on PPC and BlueGene
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_pic_CXX='-qpic'
+	    lt_prog_compiler_static_CXX='-qstaticlink'
+	    ;;
+	  *)
+	    case `$CC -V 2>&1 | sed 5q` in
+	    *Sun\ C*)
+	      # Sun C++ 5.9
+	      lt_prog_compiler_pic_CXX='-KPIC'
+	      lt_prog_compiler_static_CXX='-Bstatic'
+	      lt_prog_compiler_wl_CXX='-Qoption ld '
+	      ;;
+	    esac
+	    ;;
+	esac
+	;;
+      lynxos*)
+	;;
+      m88k*)
+	;;
+      mvs*)
+	case $cc_basename in
+	  cxx*)
+	    lt_prog_compiler_pic_CXX='-W c,exportall'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      netbsd*)
+	;;
+      *qnx* | *nto*)
+        # QNX uses GNU C++, but need to define -shared option too, otherwise
+        # it will coredump.
+        lt_prog_compiler_pic_CXX='-fPIC -shared'
+        ;;
+      osf3* | osf4* | osf5*)
+	case $cc_basename in
+	  KCC*)
+	    lt_prog_compiler_wl_CXX='--backend -Wl,'
+	    ;;
+	  RCC*)
+	    # Rational C++ 2.4.1
+	    lt_prog_compiler_pic_CXX='-pic'
+	    ;;
+	  cxx*)
+	    # Digital/Compaq C++
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    lt_prog_compiler_pic_CXX=
+	    lt_prog_compiler_static_CXX='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      psos*)
+	;;
+      solaris*)
+	case $cc_basename in
+	  CC* | sunCC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+	    lt_prog_compiler_pic_CXX='-KPIC'
+	    lt_prog_compiler_static_CXX='-Bstatic'
+	    lt_prog_compiler_wl_CXX='-Qoption ld '
+	    ;;
+	  gcx*)
+	    # Green Hills C++ Compiler
+	    lt_prog_compiler_pic_CXX='-PIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sunos4*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.x
+	    lt_prog_compiler_pic_CXX='-pic'
+	    lt_prog_compiler_static_CXX='-Bstatic'
+	    ;;
+	  lcc*)
+	    # Lucid
+	    lt_prog_compiler_pic_CXX='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+	case $cc_basename in
+	  CC*)
+	    lt_prog_compiler_wl_CXX='-Wl,'
+	    lt_prog_compiler_pic_CXX='-KPIC'
+	    lt_prog_compiler_static_CXX='-Bstatic'
+	    ;;
+	esac
+	;;
+      tandem*)
+	case $cc_basename in
+	  NCC*)
+	    # NonStop-UX NCC 3.20
+	    lt_prog_compiler_pic_CXX='-KPIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      vxworks*)
+	;;
+      *)
+	lt_prog_compiler_can_build_shared_CXX=no
+	;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic_CXX=
+    ;;
+  *)
+    lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC"
+    ;;
+esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
+$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
+if ${lt_cv_prog_compiler_pic_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; }
+lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic_CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5
+$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; }
+if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_works_CXX=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_pic_works_CXX=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works_CXX" = xyes; then
+    case $lt_prog_compiler_pic_CXX in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;;
+     esac
+else
+    lt_prog_compiler_pic_CXX=
+     lt_prog_compiler_can_build_shared_CXX=no
+fi
+
+fi
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
+$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
+if ${lt_cv_prog_compiler_static_works_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_static_works_CXX=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works_CXX=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works_CXX=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works_CXX" = xyes; then
+    :
+else
+    lt_prog_compiler_static_CXX=
+fi
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_CXX=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_CXX=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_CXX=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_CXX=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
+$as_echo_n "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+$as_echo "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
+$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+
+  export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  case $host_os in
+  aix[4-9]*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    # Also, AIX nm treats weak defined symbols like other global defined
+    # symbols, whereas GNU nm marks them as "W".
+    if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+      export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    else
+      export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    export_symbols_cmds_CXX="$ltdll_cmds"
+    ;;
+  cygwin* | mingw* | cegcc*)
+    case $cc_basename in
+    cl*)
+      exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+      ;;
+    *)
+      export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
+      exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+      ;;
+    esac
+    ;;
+  *)
+    export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+    ;;
+  esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
+$as_echo "$ld_shlibs_CXX" >&6; }
+test "$ld_shlibs_CXX" = no && can_build_shared=no
+
+with_gnu_ld_CXX=$with_gnu_ld
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc_CXX" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc_CXX=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds_CXX in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
+$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
+if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  $RM conftest*
+	echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+	if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+	  soname=conftest
+	  lib=conftest
+	  libobjs=conftest.$ac_objext
+	  deplibs=
+	  wl=$lt_prog_compiler_wl_CXX
+	  pic_flag=$lt_prog_compiler_pic_CXX
+	  compiler_flags=-v
+	  linker_flags=-v
+	  verstring=
+	  output_objdir=.
+	  libname=conftest
+	  lt_save_allow_undefined_flag=$allow_undefined_flag_CXX
+	  allow_undefined_flag_CXX=
+	  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+	  then
+	    lt_cv_archive_cmds_need_lc_CXX=no
+	  else
+	    lt_cv_archive_cmds_need_lc_CXX=yes
+	  fi
+	  allow_undefined_flag_CXX=$lt_save_allow_undefined_flag
+	else
+	  cat conftest.err 1>&5
+	fi
+	$RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5
+$as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; }
+      archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
+$as_echo_n "checking dynamic linker characteristics... " >&6; }
+
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux # correct to gnu/linux during the next big refactor
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \
+	 LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\""
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[	 ]*hwcap[	 ]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*)	need_version=yes ;;
+    *)				need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+$as_echo "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
+$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action_CXX=
+if test -n "$hardcode_libdir_flag_spec_CXX" ||
+   test -n "$runpath_var_CXX" ||
+   test "X$hardcode_automatic_CXX" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct_CXX" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" != no &&
+     test "$hardcode_minus_L_CXX" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action_CXX=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action_CXX=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action_CXX=unsupported
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5
+$as_echo "$hardcode_action_CXX" >&6; }
+
+if test "$hardcode_action_CXX" = relink ||
+   test "$inherit_rpath_CXX" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+
+  fi # test -n "$compiler"
+
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+  LDCXX=$LD
+  LD=$lt_save_LD
+  GCC=$lt_save_GCC
+  with_gnu_ld=$lt_save_with_gnu_ld
+  lt_cv_path_LDCXX=$lt_cv_path_LD
+  lt_cv_path_LD=$lt_save_path_LD
+  lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+  lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+fi # test "$_lt_caught_CXX_error" != yes
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        ac_config_commands="$ac_config_commands libtool"
+
+
+
+
+# Only expand once:
+
+
+
+
+# Add configure option --enable-maintainer-mode which enables dependency
+# checking and generation useful to package maintainers.  This is made an
+# option to avoid confusing end users.
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
+$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
+    # Check whether --enable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then :
+  enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
+else
+  USE_MAINTAINER_MODE=no
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
+$as_echo "$USE_MAINTAINER_MODE" >&6; }
+   if test $USE_MAINTAINER_MODE = yes; then
+  MAINTAINER_MODE_TRUE=
+  MAINTAINER_MODE_FALSE='#'
+else
+  MAINTAINER_MODE_TRUE='#'
+  MAINTAINER_MODE_FALSE=
+fi
+
+  MAINT=$MAINTAINER_MODE_TRUE
+
+
+
+# If the C compiler supports the keyword inline, do nothing. Otherwise
+# define inline to __inline__ or __inline if it accepts one of those,
+# otherwise define inline to be empty.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
+$as_echo_n "checking for inline... " >&6; }
+if ${ac_cv_c_inline+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_inline=no
+for ac_kw in inline __inline__ __inline; do
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __cplusplus
+typedef int foo_t;
+static $ac_kw foo_t static_foo () {return 0; }
+$ac_kw foo_t foo () {return 0; }
+#endif
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_inline=$ac_kw
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  test "$ac_cv_c_inline" != no && break
+done
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5
+$as_echo "$ac_cv_c_inline" >&6; }
+
+case $ac_cv_c_inline in
+  inline | yes) ;;
+  *)
+    case $ac_cv_c_inline in
+      no) ac_val=;;
+      *) ac_val=$ac_cv_c_inline;;
+    esac
+    cat >>confdefs.h <<_ACEOF
+#ifndef __cplusplus
+#define inline $ac_val
+#endif
+_ACEOF
+    ;;
+esac
+
+
+
+# Check if the C compiler supports the "visibility" function attribute
+# If supported, defines HAVE_FUNC_ATTRIBUTE_VISIBILITY
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __attribute__((visibility))" >&5
+$as_echo_n "checking for __attribute__((visibility))... " >&6; }
+if ${ax_cv_have_func_attribute_visibility+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+                    int foo_def( void ) __attribute__((visibility("default")));
+                    int foo_hid( void ) __attribute__((visibility("hidden")));
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+                                      if test -s conftest.err; then :
+  ax_cv_have_func_attribute_visibility=no
+else
+  ax_cv_have_func_attribute_visibility=yes
+fi
+else
+  ax_cv_have_func_attribute_visibility=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_func_attribute_visibility" >&5
+$as_echo "$ax_cv_have_func_attribute_visibility" >&6; }
+
+    if test yes = $ax_cv_have_func_attribute_visibility; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_FUNC_ATTRIBUTE_VISIBILITY 1
+_ACEOF
+
+fi
+
+
+
+
+# Check if the compiler supports "-fvisibility=hidden" and if yes, add it to CFLAGS
+# This means that symbols that are not marked explicitly for export (CMSAPI)
+# will not be reachable in the shared library.
+
+
+
+
+for flag in "-fvisibility=hidden"; do
+  as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$flag" | $as_tr_sh`
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $flag" >&5
+$as_echo_n "checking whether C compiler accepts $flag... " >&6; }
+if eval \${$as_CACHEVAR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+  ax_check_save_flags=$CFLAGS
+  CFLAGS="$CFLAGS  $flag"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$as_CACHEVAR=yes"
+else
+  eval "$as_CACHEVAR=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS=$ax_check_save_flags
+fi
+eval ac_res=\$$as_CACHEVAR
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+if eval test \"x\$"$as_CACHEVAR"\" = x"yes"; then :
+
+if ${CFLAGS+:} false; then :
+
+  case " $CFLAGS " in #(
+  *" $flag "*) :
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: : CFLAGS already contains \$flag"; } >&5
+  (: CFLAGS already contains $flag) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } ;; #(
+  *) :
+
+     as_fn_append CFLAGS " $flag"
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: : CFLAGS=\"\$CFLAGS\""; } >&5
+  (: CFLAGS="$CFLAGS") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+     ;;
+esac
+
+else
+
+  CFLAGS=$flag
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: : CFLAGS=\"\$CFLAGS\""; } >&5
+  (: CFLAGS="$CFLAGS") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+
+fi
+
+else
+  :
+fi
+
+done
+
+
+# If words are stored with the most significant byte first (like
+# Motorola and SPARC CPUs), define `WORDS_BIGENDIAN'.
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
+if ${ac_cv_c_bigendian+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_bigendian=unknown
+    # See if we're dealing with a universal compiler.
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __APPLE_CC__
+	       not a universal capable compiler
+	     #endif
+	     typedef int dummy;
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+	# Check for potential -arch flags.  It is not universal unless
+	# there are at least two -arch flags with different values.
+	ac_arch=
+	ac_prev=
+	for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+	 if test -n "$ac_prev"; then
+	   case $ac_word in
+	     i?86 | x86_64 | ppc | ppc64)
+	       if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+		 ac_arch=$ac_word
+	       else
+		 ac_cv_c_bigendian=universal
+		 break
+	       fi
+	       ;;
+	   esac
+	   ac_prev=
+	 elif test "x$ac_word" = "x-arch"; then
+	   ac_prev=arch
+	 fi
+       done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if sys/param.h defines the BYTE_ORDER macro.
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+	     #include <sys/param.h>
+
+int
+main ()
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+		     && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+		     && LITTLE_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+		#include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to _BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#ifndef _BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # Compile a test program.
+      if test "$cross_compiling" = yes; then :
+  # Try to guess by grepping values from an object file.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+short int ascii_mm[] =
+		  { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+		short int ascii_ii[] =
+		  { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+		int use_ascii (int i) {
+		  return ascii_mm[i] + ascii_ii[i];
+		}
+		short int ebcdic_ii[] =
+		  { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+		short int ebcdic_mm[] =
+		  { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+		int use_ebcdic (int i) {
+		  return ebcdic_mm[i] + ebcdic_ii[i];
+		}
+		extern int foo;
+
+int
+main ()
+{
+return use_ascii (foo) == use_ebcdic (foo);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+	      ac_cv_c_bigendian=yes
+	    fi
+	    if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+	      if test "$ac_cv_c_bigendian" = unknown; then
+		ac_cv_c_bigendian=no
+	      else
+		# finding both strings is unlikely to happen, but who knows?
+		ac_cv_c_bigendian=unknown
+	      fi
+	    fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+
+	     /* Are we little or big endian?  From Harbison&Steele.  */
+	     union
+	     {
+	       long int l;
+	       char c[sizeof (long int)];
+	     } u;
+	     u.l = 1;
+	     return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  ac_cv_c_bigendian=no
+else
+  ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+    fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+$as_echo "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+   yes)
+     $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
+;; #(
+   no)
+      ;; #(
+   universal)
+
+$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+     ;; #(
+   *)
+     as_fn_error $? "unknown endianness
+ presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
+ esac
+
+
+# Point to JPEG installed in DIR or disable JPEG with --without-jpeg.
+
+# Check whether --with-jpeg was given.
+if test "${with_jpeg+set}" = set; then :
+  withval=$with_jpeg;
+            if  test "x$withval" = "xno" ; then
+              with_jpeg='no'
+            else
+              if  test "x$withval" != "xyes" ; then
+                with_jpeg=$withval
+                JPEG_DIR=$withval
+                CPPFLAGS="$CPPFLAGS -I$JPEG_DIR/include"
+                LDFLAGS="$LDFLAGS -L$JPEG_DIR/lib"
+              fi
+              with_jpeg='yes'
+            fi
+
+else
+  with_jpeg='yes'
+fi
+
+
+# Point to TIFF installed in DIR or disable TIFF with --without-tiff.
+
+# Check whether --with-tiff was given.
+if test "${with_tiff+set}" = set; then :
+  withval=$with_tiff;
+            if  test "x$withval" = "xno" ; then
+              with_tiff='no'
+            else
+              if  test "x$withval" != "xyes" ; then
+                with_tiff=$withval
+                TIFF_DIR=$withval
+                CPPFLAGS="$CPPFLAGS -I$TIFF_DIR/include"
+                LDFLAGS="$LDFLAGS -L$TIFF_DIR/lib"
+              fi
+              with_tiff='yes'
+            fi
+
+else
+  with_tiff='yes'
+fi
+
+
+# Disable ZLIB
+
+# Check whether --with-zlib was given.
+if test "${with_zlib+set}" = set; then :
+  withval=$with_zlib; with_zlib=$withval
+else
+  with_zlib='yes'
+fi
+
+
+#
+# Determine POSIX threads settings
+#
+# Enable support for POSIX thread APIs
+
+# Check whether --with-threads was given.
+if test "${with_threads+set}" = set; then :
+  withval=$with_threads; with_threads=$withval
+else
+  with_threads='yes'
+fi
+
+
+have_threads=no
+if test "$with_threads" != 'no'
+then
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+acx_pthread_ok=no
+
+# We used to check for pthread.h first, but this fails if pthread.h
+# requires special compiler flags (e.g. on True64 or Sequent).
+# It gets checked for in the link test anyway.
+
+# First of all, check if the user has set any of the PTHREAD_LIBS,
+# etcetera environment variables, and if threads linking works using
+# them:
+if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5
+$as_echo_n "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; }
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_join ();
+int
+main ()
+{
+return pthread_join ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  acx_pthread_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5
+$as_echo "$acx_pthread_ok" >&6; }
+        if test x"$acx_pthread_ok" = xno; then
+                PTHREAD_LIBS=""
+                PTHREAD_CFLAGS=""
+        fi
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+fi
+
+# We must check for the threads library under a number of different
+# names; the ordering is very important because some systems
+# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
+# libraries is broken (non-POSIX).
+
+# Create a list of thread flags to try.  Items starting with a "-" are
+# C compiler flags, and other items are library names, except for "none"
+# which indicates that we try without any flags at all, and "pthread-config"
+# which is a program returning the flags for the Pth emulation library.
+
+acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt lpthread pthread-config"
+
+# The ordering *is* (sometimes) important.  Some notes on the
+# individual items follow:
+
+# pthreads: AIX (must check this before -lpthread)
+# none: in case threads are in libc; should be tried before -Kthread and
+#       other compiler flags to prevent continual compiler warnings
+# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
+# -pthreads: Solaris/gcc
+# -mthreads: Mingw32/gcc, Lynx/gcc
+# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+#      doesn't hurt to check since this sometimes defines pthreads too;
+#      also defines -D_REENTRANT)
+#      ... -mt is also the pthreads flag for HP/aCC
+# pthread: Linux, etcetera
+# --thread-safe: KAI C++
+# pthread-config: use pthread-config program (for GNU Pth library)
+
+case "${host_cpu}-${host_os}" in
+        *solaris*)
+
+        # On Solaris (at least, for some versions), libc contains stubbed
+        # (non-functional) versions of the pthreads routines, so link-based
+        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
+        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
+        # a function called by this macro, so we could check for that, but
+        # who knows whether they'll stub that too in a future libc.)  So,
+        # we'll just look for -pthreads and -lpthread first:
+
+        acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
+        ;;
+
+        # The HP-UX compiler just warns about options it does not understand
+        # but it needs -mt.
+        *-hpux*)
+        acx_pthread_flags="-mt $acx_pthread_flags"
+        ;;
+esac
+
+if test x"$acx_pthread_ok" = xno; then
+for flag in $acx_pthread_flags; do
+
+        case $flag in
+                none)
+                { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5
+$as_echo_n "checking whether pthreads work without any flags... " >&6; }
+                ;;
+
+                -*)
+                { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5
+$as_echo_n "checking whether pthreads work with $flag... " >&6; }
+                PTHREAD_CFLAGS="$flag"
+                ;;
+
+		pthread-config)
+		# Extract the first word of "pthread-config", so it can be a program name with args.
+set dummy pthread-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_acx_pthread_config+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$acx_pthread_config"; then
+  ac_cv_prog_acx_pthread_config="$acx_pthread_config" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_acx_pthread_config="yes"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_prog_acx_pthread_config" && ac_cv_prog_acx_pthread_config="no"
+fi
+fi
+acx_pthread_config=$ac_cv_prog_acx_pthread_config
+if test -n "$acx_pthread_config"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_config" >&5
+$as_echo "$acx_pthread_config" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+		if test x"$acx_pthread_config" = xno; then continue; fi
+		PTHREAD_CFLAGS="`pthread-config --cflags`"
+		PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+		;;
+
+                *)
+                { $as_echo "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5
+$as_echo_n "checking for the pthreads library -l$flag... " >&6; }
+                PTHREAD_LIBS="-l$flag"
+                ;;
+        esac
+
+        save_LIBS="$LIBS"
+        save_CFLAGS="$CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Check for various functions.  We must include pthread.h,
+        # since some functions may be macros.  (On the Sequent, we
+        # need a special flag -Kthread to make this header compile.)
+        # We check for pthread_join because it is in -lpthread on IRIX
+        # while pthread_create is in libc.  We check for pthread_attr_init
+        # due to DEC craziness with -lpthreads.  We check for
+        # pthread_cleanup_push because it is one of the few pthread
+        # functions on Solaris that doesn't have a non-functional libc stub.
+        # We try pthread_create on general principles.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+                     pthread_attr_init(0); pthread_cleanup_push(0, 0);
+                     pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  acx_pthread_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5
+$as_echo "$acx_pthread_ok" >&6; }
+        if test "x$acx_pthread_ok" = xyes; then
+                break;
+        fi
+
+        PTHREAD_LIBS=""
+        PTHREAD_CFLAGS=""
+done
+fi
+
+# Various other checks:
+if test "x$acx_pthread_ok" = xyes; then
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5
+$as_echo_n "checking for joinable pthread attribute... " >&6; }
+	attr_name=unknown
+	for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+	    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <pthread.h>
+int
+main ()
+{
+int attr=$attr; return attr;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  attr_name=$attr; break
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+	done
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5
+$as_echo "$attr_name" >&6; }
+        if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
+
+cat >>confdefs.h <<_ACEOF
+#define PTHREAD_CREATE_JOINABLE $attr_name
+_ACEOF
+
+        fi
+
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5
+$as_echo_n "checking if more special flags are required for pthreads... " >&6; }
+        flag=no
+        case "${host_cpu}-${host_os}" in
+            *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";;
+            *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";;
+        esac
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5
+$as_echo "${flag}" >&6; }
+        if test "x$flag" != xno; then
+            PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
+        fi
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        # More AIX lossage: must compile with xlc_r or cc_r
+        case "${host_os}" in
+          aix* )
+            if test x"$GCC" != xyes; then
+              case "$CC" in
+                *xlc )
+                  # Extract the first word of "xlc_r", so it can be a program name with args.
+set dummy xlc_r; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_PTHREAD_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$PTHREAD_CC"; then
+  ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_PTHREAD_CC="xlc_r"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}"
+fi
+fi
+PTHREAD_CC=$ac_cv_prog_PTHREAD_CC
+if test -n "$PTHREAD_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5
+$as_echo "$PTHREAD_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ ;;
+                *cc )
+                  # Extract the first word of "cc_r", so it can be a program name with args.
+set dummy cc_r; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_PTHREAD_CC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$PTHREAD_CC"; then
+  ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_PTHREAD_CC="cc_r"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}"
+fi
+fi
+PTHREAD_CC=$ac_cv_prog_PTHREAD_CC
+if test -n "$PTHREAD_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5
+$as_echo "$PTHREAD_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ ;;
+              esac
+            fi
+            case "$CXX" in
+              *xlC )
+                # Extract the first word of "xlC_r", so it can be a program name with args.
+set dummy xlC_r; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_PTHREAD_CXX+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$PTHREAD_CXX"; then
+  ac_cv_prog_PTHREAD_CXX="$PTHREAD_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_PTHREAD_CXX="xlC_r"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_prog_PTHREAD_CXX" && ac_cv_prog_PTHREAD_CXX="${CXX}"
+fi
+fi
+PTHREAD_CXX=$ac_cv_prog_PTHREAD_CXX
+if test -n "$PTHREAD_CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CXX" >&5
+$as_echo "$PTHREAD_CXX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ ;;
+            esac
+            ;;
+        esac
+fi
+
+if test "${PTHREAD_CC}x" = "x"
+then
+  PTHREAD_CC="$CC"
+fi
+if test "${PTHREAD_CXX}x" = "x"
+then
+  PTHREAD_CXX="$CXX"
+fi
+
+
+
+
+
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$acx_pthread_ok" = xyes; then
+
+$as_echo "#define HAVE_PTHREAD 1" >>confdefs.h
+
+        :
+else
+        acx_pthread_ok=no
+
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+  if test "$acx_pthread_ok" = yes
+  then
+    have_threads=yes
+
+    DEF_THREAD="$PTHREAD_CFLAGS"
+    CFLAGS="$CFLAGS $DEF_THREAD"
+    CXXFLAGS="$CXXFLAGS $DEF_THREAD"
+
+    if test "$CC" != "$PTHREAD_CC"
+    then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Replacing compiler $CC with compiler $PTHREAD_CC to support pthreads." >&5
+$as_echo "$as_me: WARNING: Replacing compiler $CC with compiler $PTHREAD_CC to support pthreads." >&2;}
+      CC="$PTHREAD_CC"
+    fi
+    if test "$CXX" != "$PTHREAD_CXX"
+    then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Replacing compiler $CXX with compiler $PTHREAD_CXX to support pthreads." >&5
+$as_echo "$as_me: WARNING: Replacing compiler $CXX with compiler $PTHREAD_CXX to support pthreads." >&2;}
+      CXX="$PTHREAD_CXX"
+    fi
+  fi
+fi
+
+
+#
+# Find math library
+#
+LIB_MATH=''
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqrt in -lm" >&5
+$as_echo_n "checking for sqrt in -lm... " >&6; }
+if ${ac_cv_lib_m_sqrt+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char sqrt ();
+int
+main ()
+{
+return sqrt ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_m_sqrt=yes
+else
+  ac_cv_lib_m_sqrt=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_sqrt" >&5
+$as_echo "$ac_cv_lib_m_sqrt" >&6; }
+if test "x$ac_cv_lib_m_sqrt" = xyes; then :
+  LIB_MATH="-lm"
+fi
+
+LIBS="$LIB_MATH $LIBS"
+
+
+#
+# Find Posix threads library
+#
+LIB_THREAD=''
+if test "$with_threads" != 'no' && test "$have_threads" = 'yes'
+then
+  for lib in pthread pthreads
+  do
+    if test "x$PTHREAD_LIBS" = "x" ; then
+      as_ac_Lib=`$as_echo "ac_cv_lib_$lib''_pthread_mutex_lock" | $as_tr_sh`
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_mutex_lock in -l$lib" >&5
+$as_echo_n "checking for pthread_mutex_lock in -l$lib... " >&6; }
+if eval \${$as_ac_Lib+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-l$lib  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_mutex_lock ();
+int
+main ()
+{
+return pthread_mutex_lock ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$as_ac_Lib=yes"
+else
+  eval "$as_ac_Lib=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+eval ac_res=\$$as_ac_Lib
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+if eval test \"x\$"$as_ac_Lib"\" = x"yes"; then :
+  PTHREAD_LIBS=-l$lib
+fi
+
+    fi
+  done
+
+  LIB_THREAD="$PTHREAD_LIBS"
+  LIBS="$LIBS $LIB_THREAD"
+
+$as_echo "#define HasTHREADS 1" >>confdefs.h
+
+else
+
+$as_echo "#define HasTHREADS 0" >>confdefs.h
+
+fi
+
+
+#
+# Check for JPEG
+#
+have_jpeg='no'
+LIB_JPEG=''
+if test ! "$with_jpeg" = 'no'
+then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for JPEG support" >&5
+$as_echo_n "checking for JPEG support... " >&6; }
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
+$as_echo "" >&6; }
+    failed=0;
+    passed=0;
+    ac_fn_c_check_header_mongrel "$LINENO" "jconfig.h" "ac_cv_header_jconfig_h" "$ac_includes_default"
+if test "x$ac_cv_header_jconfig_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    ac_fn_c_check_header_mongrel "$LINENO" "jerror.h" "ac_cv_header_jerror_h" "$ac_includes_default"
+if test "x$ac_cv_header_jerror_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    ac_fn_c_check_header_mongrel "$LINENO" "jmorecfg.h" "ac_cv_header_jmorecfg_h" "$ac_includes_default"
+if test "x$ac_cv_header_jmorecfg_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    ac_fn_c_check_header_mongrel "$LINENO" "jpeglib.h" "ac_cv_header_jpeglib_h" "$ac_includes_default"
+if test "x$ac_cv_header_jpeglib_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for jpeg_read_header in -ljpeg" >&5
+$as_echo_n "checking for jpeg_read_header in -ljpeg... " >&6; }
+if ${ac_cv_lib_jpeg_jpeg_read_header+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ljpeg  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char jpeg_read_header ();
+int
+main ()
+{
+return jpeg_read_header ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_jpeg_jpeg_read_header=yes
+else
+  ac_cv_lib_jpeg_jpeg_read_header=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_jpeg_jpeg_read_header" >&5
+$as_echo "$ac_cv_lib_jpeg_jpeg_read_header" >&6; }
+if test "x$ac_cv_lib_jpeg_jpeg_read_header" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+# Test for compatible JPEG library
+if test ! "$ac_cv_jpeg_version_ok" = 'yes' ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for JPEG library is version 6b or later" >&5
+$as_echo_n "checking for JPEG library is version 6b or later... " >&6; }
+if ${ac_cv_jpeg_version_ok+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+#include <stdlib.h>
+#include <jpeglib.h>
+
+int
+main ()
+{
+
+#if JPEG_LIB_VERSION < 62
+#error IJG JPEG library must be version 6b or newer!
+#endif
+return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_jpeg_version_ok='yes'
+else
+  ac_cv_jpeg_version_ok='no'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_jpeg_version_ok" >&5
+$as_echo "$ac_cv_jpeg_version_ok" >&6; }
+if test "$ac_cv_jpeg_version_ok" = 'yes' ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ passed=`expr $passed + 1`
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ failed=`expr $failed + 1`
+fi
+fi
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if JPEG package is complete" >&5
+$as_echo_n "checking if JPEG package is complete... " >&6; }
+    if test $passed -gt 0
+    then
+    if test $failed -gt 0
+    then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no -- some components failed test" >&5
+$as_echo "no -- some components failed test" >&6; }
+        have_jpeg='no (failed tests)'
+    else
+	LIB_JPEG='-ljpeg'
+	LIBS="$LIB_JPEG $LIBS"
+
+$as_echo "#define HasJPEG 1" >>confdefs.h
+
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+        have_jpeg='yes'
+    fi
+    else
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+fi
+ if test "$have_jpeg" = 'yes'; then
+  HasJPEG_TRUE=
+  HasJPEG_FALSE='#'
+else
+  HasJPEG_TRUE='#'
+  HasJPEG_FALSE=
+fi
+
+
+
+#
+# Check for ZLIB
+#
+have_zlib='no'
+if test ! "$with_zlib" = 'no' || test ! "$with_png" = 'no'
+then
+  LIB_ZLIB=''
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZLIB support " >&5
+$as_echo_n "checking for ZLIB support ... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
+$as_echo "" >&6; }
+  failed=0;
+  passed=0;
+  ac_fn_c_check_header_mongrel "$LINENO" "zconf.h" "ac_cv_header_zconf_h" "$ac_includes_default"
+if test "x$ac_cv_header_zconf_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+  ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
+if test "x$ac_cv_header_zlib_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for compress in -lz" >&5
+$as_echo_n "checking for compress in -lz... " >&6; }
+if ${ac_cv_lib_z_compress+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char compress ();
+int
+main ()
+{
+return compress ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_compress=yes
+else
+  ac_cv_lib_z_compress=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_compress" >&5
+$as_echo "$ac_cv_lib_z_compress" >&6; }
+if test "x$ac_cv_lib_z_compress" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uncompress in -lz" >&5
+$as_echo_n "checking for uncompress in -lz... " >&6; }
+if ${ac_cv_lib_z_uncompress+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uncompress ();
+int
+main ()
+{
+return uncompress ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_uncompress=yes
+else
+  ac_cv_lib_z_uncompress=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_uncompress" >&5
+$as_echo "$ac_cv_lib_z_uncompress" >&6; }
+if test "x$ac_cv_lib_z_uncompress" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for deflate in -lz" >&5
+$as_echo_n "checking for deflate in -lz... " >&6; }
+if ${ac_cv_lib_z_deflate+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char deflate ();
+int
+main ()
+{
+return deflate ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_deflate=yes
+else
+  ac_cv_lib_z_deflate=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_deflate" >&5
+$as_echo "$ac_cv_lib_z_deflate" >&6; }
+if test "x$ac_cv_lib_z_deflate" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inflate in -lz" >&5
+$as_echo_n "checking for inflate in -lz... " >&6; }
+if ${ac_cv_lib_z_inflate+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char inflate ();
+int
+main ()
+{
+return inflate ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_inflate=yes
+else
+  ac_cv_lib_z_inflate=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_inflate" >&5
+$as_echo "$ac_cv_lib_z_inflate" >&6; }
+if test "x$ac_cv_lib_z_inflate" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gzseek in -lz" >&5
+$as_echo_n "checking for gzseek in -lz... " >&6; }
+if ${ac_cv_lib_z_gzseek+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char gzseek ();
+int
+main ()
+{
+return gzseek ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_gzseek=yes
+else
+  ac_cv_lib_z_gzseek=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzseek" >&5
+$as_echo "$ac_cv_lib_z_gzseek" >&6; }
+if test "x$ac_cv_lib_z_gzseek" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gztell in -lz" >&5
+$as_echo_n "checking for gztell in -lz... " >&6; }
+if ${ac_cv_lib_z_gztell+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char gztell ();
+int
+main ()
+{
+return gztell ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_z_gztell=yes
+else
+  ac_cv_lib_z_gztell=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gztell" >&5
+$as_echo "$ac_cv_lib_z_gztell" >&6; }
+if test "x$ac_cv_lib_z_gztell" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if ZLIB package is complete" >&5
+$as_echo_n "checking if ZLIB package is complete... " >&6; }
+  if test $passed -gt 0
+  then
+    if test $failed -gt 0
+    then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no -- some components failed test" >&5
+$as_echo "no -- some components failed test" >&6; }
+      have_zlib='no (failed tests)'
+    else
+      LIB_ZLIB='-lz'
+      LIBS="$LIB_ZLIB $LIBS"
+
+$as_echo "#define HasZLIB 1" >>confdefs.h
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+      have_zlib='yes'
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  fi
+fi
+ if test "$have_zlib" = 'yes'; then
+  HasZLIB_TRUE=
+  HasZLIB_FALSE='#'
+else
+  HasZLIB_TRUE='#'
+  HasZLIB_FALSE=
+fi
+
+
+
+#
+# Check for TIFF
+#
+have_tiff='no'
+LIB_TIFF=''
+if test ! "$with_tiff" = 'no'
+then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for TIFF support" >&5
+$as_echo_n "checking for TIFF support... " >&6; }
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
+$as_echo "" >&6; }
+    failed=0;
+    passed=0;
+    ac_fn_c_check_header_mongrel "$LINENO" "tiff.h" "ac_cv_header_tiff_h" "$ac_includes_default"
+if test "x$ac_cv_header_tiff_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    ac_fn_c_check_header_mongrel "$LINENO" "tiffio.h" "ac_cv_header_tiffio_h" "$ac_includes_default"
+if test "x$ac_cv_header_tiffio_h" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for TIFFOpen in -ltiff" >&5
+$as_echo_n "checking for TIFFOpen in -ltiff... " >&6; }
+if ${ac_cv_lib_tiff_TIFFOpen+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ltiff  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char TIFFOpen ();
+int
+main ()
+{
+return TIFFOpen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_tiff_TIFFOpen=yes
+else
+  ac_cv_lib_tiff_TIFFOpen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tiff_TIFFOpen" >&5
+$as_echo "$ac_cv_lib_tiff_TIFFOpen" >&6; }
+if test "x$ac_cv_lib_tiff_TIFFOpen" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for TIFFClientOpen in -ltiff" >&5
+$as_echo_n "checking for TIFFClientOpen in -ltiff... " >&6; }
+if ${ac_cv_lib_tiff_TIFFClientOpen+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ltiff  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char TIFFClientOpen ();
+int
+main ()
+{
+return TIFFClientOpen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_tiff_TIFFClientOpen=yes
+else
+  ac_cv_lib_tiff_TIFFClientOpen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tiff_TIFFClientOpen" >&5
+$as_echo "$ac_cv_lib_tiff_TIFFClientOpen" >&6; }
+if test "x$ac_cv_lib_tiff_TIFFClientOpen" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for TIFFIsByteSwapped in -ltiff" >&5
+$as_echo_n "checking for TIFFIsByteSwapped in -ltiff... " >&6; }
+if ${ac_cv_lib_tiff_TIFFIsByteSwapped+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ltiff  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char TIFFIsByteSwapped ();
+int
+main ()
+{
+return TIFFIsByteSwapped ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_tiff_TIFFIsByteSwapped=yes
+else
+  ac_cv_lib_tiff_TIFFIsByteSwapped=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tiff_TIFFIsByteSwapped" >&5
+$as_echo "$ac_cv_lib_tiff_TIFFIsByteSwapped" >&6; }
+if test "x$ac_cv_lib_tiff_TIFFIsByteSwapped" = xyes; then :
+  passed=`expr $passed + 1`
+else
+  failed=`expr $failed + 1`
+fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if TIFF package is complete" >&5
+$as_echo_n "checking if TIFF package is complete... " >&6; }
+    if test $passed -gt 0
+    then
+    if test $failed -gt 0
+    then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no -- some components failed test" >&5
+$as_echo "no -- some components failed test" >&6; }
+	have_tiff='no (failed tests)'
+    else
+	LIB_TIFF='-ltiff'
+	LIBS="$LIB_TIFF $LIBS"
+
+$as_echo "#define HasTIFF 1" >>confdefs.h
+
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+	have_tiff='yes'
+	for ac_header in tiffconf.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "tiffconf.h" "ac_cv_header_tiffconf_h" "$ac_includes_default"
+if test "x$ac_cv_header_tiffconf_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_TIFFCONF_H 1
+_ACEOF
+
+fi
+
+done
+
+    fi
+    else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+fi
+ if test "$have_tiff" = 'yes'; then
+  HasTIFF_TRUE=
+  HasTIFF_FALSE='#'
+else
+  HasTIFF_TRUE='#'
+  HasTIFF_FALSE=
+fi
+
+
+
+
+# Libraries that the LCMS library depends on
+LCMS_LIB_DEPLIBS="$LIB_MATH $LIB_THREAD"
+LCMS_LIB_DEPLIBS=`echo $LCMS_LIB_DEPLIBS | sed -e 's/  */ /g'`
+
+
+# Libraries that the jpegicc program depends on
+JPEGICC_DEPLIBS="$LIB_JPEG $LIB_MATH $LIB_THREAD"
+JPEGICC_DEPLIBS=`echo $JPEGICC_DEPLIBS | sed -e 's/  */ /g'`
+
+
+# Libraries that the tifficc program depends on
+TIFFICC_DEPLIBS="$LIB_TIFF $LIB_JPEG $LIB_ZLIB $LIB_MATH $LIB_THREAD"
+TIFFICC_DEPLIBS=`echo $TIFFICC_DEPLIBS | sed -e 's/  */ /g'`
+
+
+LIBS=''
+
+#
+# Perform substitutions
+#
+ac_config_files="$ac_config_files Makefile"
+
+ac_config_files="$ac_config_files lcms2.pc"
+
+ac_config_files="$ac_config_files include/Makefile"
+
+ac_config_files="$ac_config_files src/Makefile"
+
+ac_config_files="$ac_config_files utils/tificc/Makefile"
+
+ac_config_files="$ac_config_files utils/transicc/Makefile"
+
+ac_config_files="$ac_config_files utils/linkicc/Makefile"
+
+ac_config_files="$ac_config_files utils/jpgicc/Makefile"
+
+ac_config_files="$ac_config_files utils/psicc/Makefile"
+
+ac_config_files="$ac_config_files testbed/Makefile"
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section.  Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
+t clear
+:clear
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+	g
+	s/^\n//
+	s/\n/ /g
+	p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5
+$as_echo_n "checking that generated files are newer than configure... " >&6; }
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
+$as_echo "done" >&6; }
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
+  as_fn_error $? "conditional \"AMDEP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCXX\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
+  as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+
+if test -z "${HasJPEG_TRUE}" && test -z "${HasJPEG_FALSE}"; then
+  as_fn_error $? "conditional \"HasJPEG\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${HasZLIB_TRUE}" && test -z "${HasZLIB_FALSE}"; then
+  as_fn_error $? "conditional \"HasZLIB\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${HasTIFF_TRUE}" && test -z "${HasTIFF_FALSE}"; then
+  as_fn_error $? "conditional \"HasTIFF\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  $as_echo "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by lcms2 $as_me 2.9, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Configuration commands:
+$config_commands
+
+Report bugs to the package provider."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+lcms2 config.status 2.9
+configured by $0, generated by GNU Autoconf 2.69,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h |  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"
+
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+sed_quote_subst='$sed_quote_subst'
+double_quote_subst='$double_quote_subst'
+delay_variable_subst='$delay_variable_subst'
+enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
+enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
+AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`'
+DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`'
+OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`'
+macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
+macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
+pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
+enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
+SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
+ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
+host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
+host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
+host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
+build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`'
+build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`'
+build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`'
+SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`'
+Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`'
+GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`'
+EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`'
+FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`'
+LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`'
+NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`'
+LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`'
+max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`'
+ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`'
+exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`'
+lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`'
+lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`'
+lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`'
+lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`'
+lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`'
+reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`'
+reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`'
+deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`'
+file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`'
+file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`'
+want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`'
+sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`'
+AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`'
+AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`'
+archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`'
+STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`'
+RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`'
+old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`'
+old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`'
+lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`'
+CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`'
+CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`'
+compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`'
+GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`'
+nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`'
+lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`'
+objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`'
+MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`'
+need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`'
+MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`'
+DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`'
+NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`'
+LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`'
+OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`'
+OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`'
+libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`'
+shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`'
+extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
+archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`'
+whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`'
+compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`'
+old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
+archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`'
+module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`'
+with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
+no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
+hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
+hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`'
+hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`'
+hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`'
+inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`'
+always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`'
+export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`'
+exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`'
+include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`'
+prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`'
+postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`'
+file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`'
+variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`'
+need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`'
+need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`'
+version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`'
+runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`'
+libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`'
+library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`'
+soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`'
+install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`'
+postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`'
+postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
+finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`'
+finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`'
+hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`'
+sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`'
+sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`'
+hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`'
+enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`'
+enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`'
+enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`'
+old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`'
+striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`'
+predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`'
+postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`'
+predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`'
+postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`'
+LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`'
+reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`'
+reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`'
+GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`'
+archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
+no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`'
+inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`'
+always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`'
+export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
+include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
+prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`'
+predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`'
+postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`'
+predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`'
+postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`'
+
+LTCC='$LTCC'
+LTCFLAGS='$LTCFLAGS'
+compiler='$compiler_DEFAULT'
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$1
+_LTECHO_EOF'
+}
+
+# Quote evaled strings.
+for var in AS \
+DLLTOOL \
+OBJDUMP \
+SHELL \
+ECHO \
+PATH_SEPARATOR \
+SED \
+GREP \
+EGREP \
+FGREP \
+LD \
+NM \
+LN_S \
+lt_SP2NL \
+lt_NL2SP \
+reload_flag \
+deplibs_check_method \
+file_magic_cmd \
+file_magic_glob \
+want_nocaseglob \
+sharedlib_from_linklib_cmd \
+AR \
+AR_FLAGS \
+archiver_list_spec \
+STRIP \
+RANLIB \
+CC \
+CFLAGS \
+compiler \
+lt_cv_sys_global_symbol_pipe \
+lt_cv_sys_global_symbol_to_cdecl \
+lt_cv_sys_global_symbol_to_c_name_address \
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \
+nm_file_list_spec \
+lt_prog_compiler_no_builtin_flag \
+lt_prog_compiler_pic \
+lt_prog_compiler_wl \
+lt_prog_compiler_static \
+lt_cv_prog_compiler_c_o \
+need_locks \
+MANIFEST_TOOL \
+DSYMUTIL \
+NMEDIT \
+LIPO \
+OTOOL \
+OTOOL64 \
+shrext_cmds \
+export_dynamic_flag_spec \
+whole_archive_flag_spec \
+compiler_needs_object \
+with_gnu_ld \
+allow_undefined_flag \
+no_undefined_flag \
+hardcode_libdir_flag_spec \
+hardcode_libdir_separator \
+exclude_expsyms \
+include_expsyms \
+file_list_spec \
+variables_saved_for_relink \
+libname_spec \
+library_names_spec \
+soname_spec \
+install_override_mode \
+finish_eval \
+old_striplib \
+striplib \
+compiler_lib_search_dirs \
+predep_objects \
+postdep_objects \
+predeps \
+postdeps \
+compiler_lib_search_path \
+LD_CXX \
+reload_flag_CXX \
+compiler_CXX \
+lt_prog_compiler_no_builtin_flag_CXX \
+lt_prog_compiler_pic_CXX \
+lt_prog_compiler_wl_CXX \
+lt_prog_compiler_static_CXX \
+lt_cv_prog_compiler_c_o_CXX \
+export_dynamic_flag_spec_CXX \
+whole_archive_flag_spec_CXX \
+compiler_needs_object_CXX \
+with_gnu_ld_CXX \
+allow_undefined_flag_CXX \
+no_undefined_flag_CXX \
+hardcode_libdir_flag_spec_CXX \
+hardcode_libdir_separator_CXX \
+exclude_expsyms_CXX \
+include_expsyms_CXX \
+file_list_spec_CXX \
+compiler_lib_search_dirs_CXX \
+predep_objects_CXX \
+postdep_objects_CXX \
+predeps_CXX \
+postdeps_CXX \
+compiler_lib_search_path_CXX; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+# Double-quote double-evaled strings.
+for var in reload_cmds \
+old_postinstall_cmds \
+old_postuninstall_cmds \
+old_archive_cmds \
+extract_expsyms_cmds \
+old_archive_from_new_cmds \
+old_archive_from_expsyms_cmds \
+archive_cmds \
+archive_expsym_cmds \
+module_cmds \
+module_expsym_cmds \
+export_symbols_cmds \
+prelink_cmds \
+postlink_cmds \
+postinstall_cmds \
+postuninstall_cmds \
+finish_cmds \
+sys_lib_search_path_spec \
+sys_lib_dlsearch_path_spec \
+reload_cmds_CXX \
+old_archive_cmds_CXX \
+old_archive_from_new_cmds_CXX \
+old_archive_from_expsyms_cmds_CXX \
+archive_cmds_CXX \
+archive_expsym_cmds_CXX \
+module_cmds_CXX \
+module_expsym_cmds_CXX \
+export_symbols_cmds_CXX \
+prelink_cmds_CXX \
+postlink_cmds_CXX; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+ac_aux_dir='$ac_aux_dir'
+xsi_shell='$xsi_shell'
+lt_shell_append='$lt_shell_append'
+
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes INIT.
+if test -n "\${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+
+    PACKAGE='$PACKAGE'
+    VERSION='$VERSION'
+    TIMESTAMP='$TIMESTAMP'
+    RM='$RM'
+    ofile='$ofile'
+
+
+
+
+
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+    "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "lcms2.pc") CONFIG_FILES="$CONFIG_FILES lcms2.pc" ;;
+    "include/Makefile") CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
+    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+    "utils/tificc/Makefile") CONFIG_FILES="$CONFIG_FILES utils/tificc/Makefile" ;;
+    "utils/transicc/Makefile") CONFIG_FILES="$CONFIG_FILES utils/transicc/Makefile" ;;
+    "utils/linkicc/Makefile") CONFIG_FILES="$CONFIG_FILES utils/linkicc/Makefile" ;;
+    "utils/jpgicc/Makefile") CONFIG_FILES="$CONFIG_FILES utils/jpgicc/Makefile" ;;
+    "utils/psicc/Makefile") CONFIG_FILES="$CONFIG_FILES utils/psicc/Makefile" ;;
+    "testbed/Makefile") CONFIG_FILES="$CONFIG_FILES testbed/Makefile" ;;
+
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+
+eval set X "  :F $CONFIG_FILES      :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+
+
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  case $CONFIG_FILES in
+  *\'*) eval set x "$CONFIG_FILES" ;;
+  *)   set x $CONFIG_FILES ;;
+  esac
+  shift
+  for mf
+  do
+    # Strip MF so we end up with the name of the file.
+    mf=`echo "$mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile or not.
+    # We used to match only the files named 'Makefile.in', but
+    # some people rename them; so instead we look at the file content.
+    # Grep'ing the first line is not enough: some people post-process
+    # each Makefile.in and add a new line on top of each file to say so.
+    # Grep'ing the whole file is not good either: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
+      dirpart=`$as_dirname -- "$mf" ||
+$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$mf" : 'X\(//\)[^/]' \| \
+	 X"$mf" : 'X\(//\)$' \| \
+	 X"$mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+    else
+      continue
+    fi
+    # Extract the definition of DEPDIR, am__include, and am__quote
+    # from the Makefile without running 'make'.
+    DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
+    test -z "$DEPDIR" && continue
+    am__include=`sed -n 's/^am__include = //p' < "$mf"`
+    test -z "$am__include" && continue
+    am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
+    # Find all dependency output files, they are included files with
+    # $(DEPDIR) in their names.  We invoke sed twice because it is the
+    # simplest approach to changing $(DEPDIR) to its actual value in the
+    # expansion.
+    for file in `sed -n "
+      s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
+	 sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
+      # Make sure the directory exists.
+      test -f "$dirpart/$file" && continue
+      fdir=`$as_dirname -- "$file" ||
+$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$file" : 'X\(//\)[^/]' \| \
+	 X"$file" : 'X\(//\)$' \| \
+	 X"$file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      as_dir=$dirpart/$fdir; as_fn_mkdir_p
+      # echo "creating $dirpart/$file"
+      echo '# dummy' > "$dirpart/$file"
+    done
+  done
+}
+ ;;
+    "libtool":C)
+
+    # See if we are running on zsh, and set the options which allow our
+    # commands through without removal of \ escapes.
+    if test -n "${ZSH_VERSION+set}" ; then
+      setopt NO_GLOB_SUBST
+    fi
+
+    cfgfile="${ofile}T"
+    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
+    $RM "$cfgfile"
+
+    cat <<_LT_EOF >> "$cfgfile"
+#! $SHELL
+
+# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+#   This file is part of GNU Libtool.
+#
+# GNU Libtool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
+# obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+
+# The names of the tagged configurations supported by this script.
+available_tags="CXX "
+
+# ### BEGIN LIBTOOL CONFIG
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Assembler program.
+AS=$lt_AS
+
+# DLL creation program.
+DLLTOOL=$lt_DLLTOOL
+
+# Object dumper program.
+OBJDUMP=$lt_OBJDUMP
+
+# Which release of libtool.m4 was used?
+macro_version=$macro_version
+macro_revision=$macro_revision
+
+# What type of objects to build.
+pic_mode=$pic_mode
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# An echo program that protects backslashes.
+ECHO=$lt_ECHO
+
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
+# The host system.
+host_alias=$host_alias
+host=$host
+host_os=$host_os
+
+# The build system.
+build_alias=$build_alias
+build=$build
+build_os=$build_os
+
+# A sed program that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="\$SED -e 1s/^X//"
+
+# A grep program that handles long lines.
+GREP=$lt_GREP
+
+# An ERE matcher.
+EGREP=$lt_EGREP
+
+# A literal string matcher.
+FGREP=$lt_FGREP
+
+# A BSD- or MS-compatible name lister.
+NM=$lt_NM
+
+# Whether we need soft or hard links.
+LN_S=$lt_LN_S
+
+# What is the maximum length of a command?
+max_cmd_len=$max_cmd_len
+
+# Object file suffix (normally "o").
+objext=$ac_objext
+
+# Executable file suffix (normally "").
+exeext=$exeext
+
+# whether the shell understands "unset".
+lt_unset=$lt_unset
+
+# turn spaces into newlines.
+SP2NL=$lt_lt_SP2NL
+
+# turn newlines into spaces.
+NL2SP=$lt_lt_NL2SP
+
+# convert \$build file names to \$host format.
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+
+# convert \$build files to toolchain format.
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method = "file_magic".
+file_magic_cmd=$lt_file_magic_cmd
+
+# How to find potential files when deplibs_check_method = "file_magic".
+file_magic_glob=$lt_file_magic_glob
+
+# Find potential files using nocaseglob when deplibs_check_method = "file_magic".
+want_nocaseglob=$lt_want_nocaseglob
+
+# Command to associate shared and link libraries.
+sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd
+
+# The archiver.
+AR=$lt_AR
+
+# Flags to create an archive.
+AR_FLAGS=$lt_AR_FLAGS
+
+# How to feed a file listing to the archiver.
+archiver_list_spec=$lt_archiver_list_spec
+
+# A symbol stripping program.
+STRIP=$lt_STRIP
+
+# Commands used to install an old-style archive.
+RANLIB=$lt_RANLIB
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Whether to use a lock for old archive extraction.
+lock_old_archive_extraction=$lock_old_archive_extraction
+
+# A C compiler.
+LTCC=$lt_CC
+
+# LTCC compiler flags.
+LTCFLAGS=$lt_CFLAGS
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration.
+global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair.
+global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
+
+# Transform the output of nm in a C name address pair when lib prefix is needed.
+global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix
+
+# Specify filename containing input files for \$NM.
+nm_file_list_spec=$lt_nm_file_list_spec
+
+# The root where to search for dependent libraries,and in which our libraries should be installed.
+lt_sysroot=$lt_sysroot
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# Used to examine libraries when file_magic_cmd begins with "file".
+MAGIC_CMD=$MAGIC_CMD
+
+# Must we lock files when doing compilation?
+need_locks=$lt_need_locks
+
+# Manifest tool.
+MANIFEST_TOOL=$lt_MANIFEST_TOOL
+
+# Tool to manipulate archived DWARF debug symbol files on Mac OS X.
+DSYMUTIL=$lt_DSYMUTIL
+
+# Tool to change global to local symbols on Mac OS X.
+NMEDIT=$lt_NMEDIT
+
+# Tool to manipulate fat objects and archives on Mac OS X.
+LIPO=$lt_LIPO
+
+# ldd/readelf like tool for Mach-O binaries on Mac OS X.
+OTOOL=$lt_OTOOL
+
+# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4.
+OTOOL64=$lt_OTOOL64
+
+# Old archive suffix (normally "a").
+libext=$libext
+
+# Shared library suffix (normally ".so").
+shrext_cmds=$lt_shrext_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at link time.
+variables_saved_for_relink=$lt_variables_saved_for_relink
+
+# Do we need the "lib" prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Library versioning type.
+version_type=$version_type
+
+# Shared library runtime path variable.
+runpath_var=$runpath_var
+
+# Shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Permission mode override for installation of shared libraries.
+install_override_mode=$lt_install_override_mode
+
+# Command to use after installation of a shared archive.
+postinstall_cmds=$lt_postinstall_cmds
+
+# Command to use after uninstallation of a shared archive.
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# As "finish_cmds", except a single script fragment to be evaled but
+# not shown.
+finish_eval=$lt_finish_eval
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Compile-time system search path for libraries.
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries.
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+
+# The linker used to build libraries.
+LD=$lt_LD
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds
+
+# A language specific compiler.
+CC=$lt_compiler
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds
+archive_expsym_cmds=$lt_archive_expsym_cmds
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds
+module_expsym_cmds=$lt_module_expsym_cmds
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# The directories searched by this compiler when creating a shared library.
+compiler_lib_search_dirs=$lt_compiler_lib_search_dirs
+
+# Dependencies to place before and after the objects being linked to
+# create a shared library.
+predep_objects=$lt_predep_objects
+postdep_objects=$lt_postdep_objects
+predeps=$lt_predeps
+postdeps=$lt_postdeps
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_compiler_lib_search_path
+
+# ### END LIBTOOL CONFIG
+
+_LT_EOF
+
+  case $host_os in
+  aix3*)
+    cat <<\_LT_EOF >> "$cfgfile"
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+_LT_EOF
+    ;;
+  esac
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" \
+     || (rm -f "$cfgfile"; exit 1)
+
+  if test x"$xsi_shell" = xyes; then
+  sed -e '/^func_dirname ()$/,/^} # func_dirname /c\
+func_dirname ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_basename ()$/,/^} # func_basename /c\
+func_basename ()\
+{\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\
+func_dirname_and_basename ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_stripname ()$/,/^} # func_stripname /c\
+func_stripname ()\
+{\
+\    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\
+\    # positional parameters, so assign one to ordinary parameter first.\
+\    func_stripname_result=${3}\
+\    func_stripname_result=${func_stripname_result#"${1}"}\
+\    func_stripname_result=${func_stripname_result%"${2}"}\
+} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\
+func_split_long_opt ()\
+{\
+\    func_split_long_opt_name=${1%%=*}\
+\    func_split_long_opt_arg=${1#*=}\
+} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\
+func_split_short_opt ()\
+{\
+\    func_split_short_opt_arg=${1#??}\
+\    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\
+} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\
+func_lo2o ()\
+{\
+\    case ${1} in\
+\      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\
+\      *)    func_lo2o_result=${1} ;;\
+\    esac\
+} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_xform ()$/,/^} # func_xform /c\
+func_xform ()\
+{\
+    func_xform_result=${1%.*}.lo\
+} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_arith ()$/,/^} # func_arith /c\
+func_arith ()\
+{\
+    func_arith_result=$(( $* ))\
+} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_len ()$/,/^} # func_len /c\
+func_len ()\
+{\
+    func_len_result=${#1}\
+} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+fi
+
+if test x"$lt_shell_append" = xyes; then
+  sed -e '/^func_append ()$/,/^} # func_append /c\
+func_append ()\
+{\
+    eval "${1}+=\\${2}"\
+} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\
+func_append_quoted ()\
+{\
+\    func_quote_for_eval "${2}"\
+\    eval "${1}+=\\\\ \\$func_quote_for_eval_result"\
+} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  # Save a `func_append' function call where possible by direct use of '+='
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+else
+  # Save a `func_append' function call even when '+=' is not available
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+fi
+
+if test x"$_lt_function_replace_fail" = x":"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5
+$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;}
+fi
+
+
+   mv -f "$cfgfile" "$ofile" ||
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+
+
+    cat <<_LT_EOF >> "$ofile"
+
+# ### BEGIN LIBTOOL TAG CONFIG: CXX
+
+# The linker used to build libraries.
+LD=$lt_LD_CXX
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag_CXX
+reload_cmds=$lt_reload_cmds_CXX
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds_CXX
+
+# A language specific compiler.
+CC=$lt_compiler_CXX
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC_CXX
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic_CXX
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl_CXX
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static_CXX
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc_CXX
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object_CXX
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds_CXX
+archive_expsym_cmds=$lt_archive_expsym_cmds_CXX
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds_CXX
+module_expsym_cmds=$lt_module_expsym_cmds_CXX
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld_CXX
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag_CXX
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag_CXX
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct_CXX
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute_CXX
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L_CXX
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic_CXX
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath_CXX
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs_CXX
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols_CXX
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds_CXX
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms_CXX
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms_CXX
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds_CXX
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds_CXX
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec_CXX
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action_CXX
+
+# The directories searched by this compiler when creating a shared library.
+compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX
+
+# Dependencies to place before and after the objects being linked to
+# create a shared library.
+predep_objects=$lt_predep_objects_CXX
+postdep_objects=$lt_postdep_objects_CXX
+predeps=$lt_predeps_CXX
+postdeps=$lt_postdeps_CXX
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_compiler_lib_search_path_CXX
+
+# ### END LIBTOOL TAG CONFIG: CXX
+_LT_EOF
+
+ ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/configure.ac b/third-party/libjxl/libjxl/third_party/lcms/configure.ac
new file mode 100644
index 0000000000..c59c230903
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/configure.ac
@@ -0,0 +1,382 @@
+#
+# LittleCMS 2 configure script
+#
+
+AC_PREREQ(2.60)
+
+#
+# Set the package name and version
+#
+AC_INIT(lcms2,2.9)
+
+# Specify directory where m4 macros may be found.
+AC_CONFIG_MACRO_DIR([m4])
+
+#
+# Libtool library revision control info
+# See the libtool documentation under the heading "Libtool's versioning
+# system" in order to understand the meaning of these fields
+#
+# Here are a set of rules to help you update your library version
+# information:
+#
+#  1. Start with version information of `0:0:0' for each libtool library.
+#  2. Update the version information only immediately before a public
+#     release of your software. More frequent updates are unnecessary, and
+#     only guarantee that the current interface number gets larger faster.
+#  3. If the library source code has changed at all since the last update,
+#     then increment revision (`c:r:a' becomes `c:r+1:a').
+#  4. If any interfaces have been added, removed, or changed since the last
+#     update, increment current, and set revision to 0.
+#  5. If any interfaces have been added since the last public release, then
+#     increment age.
+#  6. If any interfaces have been removed since the last public release,
+#     then set age to 0.
+#
+LIBRARY_CURRENT=2
+LIBRARY_REVISION=8
+LIBRARY_AGE=0
+
+AC_SUBST(LIBRARY_CURRENT)dnl
+AC_SUBST(LIBRARY_REVISION)dnl
+AC_SUBST(LIBRARY_AGE)dnl
+
+# Obtain system type by running config.guess
+AC_CANONICAL_HOST
+
+AM_INIT_AUTOMAKE([foreign 1.7.2 no-define dist-zip subdir-objects])
+
+
+# Check for programs
+AC_PROG_CC_STDC
+AC_PROG_CPP
+AC_PROG_CXX
+
+#AM_PROG_LD
+#AC_SUBST(LD)
+#AC_PROG_INSTALL
+#AC_PROG_MAKE_SET
+#AC_PROG_LN_S
+
+#
+# Tests for Windows
+#
+AC_EXEEXT
+AC_OBJEXT
+
+# Configure libtool
+AC_ENABLE_SHARED
+AC_ENABLE_STATIC
+AC_LIBTOOL_WIN32_DLL
+AC_LIBTOOL_SETUP
+AC_PROG_LIBTOOL
+AC_SUBST(LIBTOOL_DEPS)
+
+# Add configure option --enable-maintainer-mode which enables dependency
+# checking and generation useful to package maintainers.  This is made an
+# option to avoid confusing end users.
+AM_MAINTAINER_MODE
+
+# If the C compiler supports the keyword inline, do nothing. Otherwise
+# define inline to __inline__ or __inline if it accepts one of those,
+# otherwise define inline to be empty.
+AC_C_INLINE
+AC_SUBST(inline)
+
+# Check if the C compiler supports the "visibility" function attribute
+# If supported, defines HAVE_FUNC_ATTRIBUTE_VISIBILITY
+AX_GCC_FUNC_ATTRIBUTE(visibility)
+
+# Check if the compiler supports "-fvisibility=hidden" and if yes, add it to CFLAGS
+# This means that symbols that are not marked explicitly for export (CMSAPI)
+# will not be reachable in the shared library.
+AX_APPEND_COMPILE_FLAGS(["-fvisibility=hidden"])
+
+# If words are stored with the most significant byte first (like
+# Motorola and SPARC CPUs), define `WORDS_BIGENDIAN'.
+AC_C_BIGENDIAN
+
+# Point to JPEG installed in DIR or disable JPEG with --without-jpeg.
+AC_ARG_WITH(jpeg,
+            [  --with-jpeg=DIR         use jpeg installed in DIR],
+            [
+            if [ test "x$withval" = "xno" ]; then
+              [with_jpeg='no']
+            else
+              if [ test "x$withval" != "xyes" ]; then
+                with_jpeg=$withval
+                JPEG_DIR=$withval
+                CPPFLAGS="$CPPFLAGS -I$JPEG_DIR/include"
+                LDFLAGS="$LDFLAGS -L$JPEG_DIR/lib"
+              fi
+              [with_jpeg='yes']
+            fi
+            ],
+	        [with_jpeg='yes'])
+
+# Point to TIFF installed in DIR or disable TIFF with --without-tiff.
+AC_ARG_WITH(tiff,
+            [  --with-tiff=DIR         use tiff installed in DIR],
+            [
+            if [ test "x$withval" = "xno" ]; then
+              [with_tiff='no']
+            else
+              if [ test "x$withval" != "xyes" ]; then
+                with_tiff=$withval
+                TIFF_DIR=$withval
+                CPPFLAGS="$CPPFLAGS -I$TIFF_DIR/include"
+                LDFLAGS="$LDFLAGS -L$TIFF_DIR/lib"
+              fi
+              [with_tiff='yes']  
+            fi
+            ],
+	        [with_tiff='yes'])
+
+# Disable ZLIB
+AC_ARG_WITH(zlib,
+	      [  --without-zlib          disable ZLIB support],
+	      [with_zlib=$withval],
+	      [with_zlib='yes'])
+
+#
+# Determine POSIX threads settings
+#
+# Enable support for POSIX thread APIs
+AC_ARG_WITH(threads,
+	      AS_HELP_STRING([--without-threads],
+                             [disable POSIX threads API support]),
+	      [with_threads=$withval],
+	      [with_threads='yes'])
+
+have_threads=no
+if test "$with_threads" != 'no'
+then
+
+  ACX_PTHREAD()
+ 
+  if test "$acx_pthread_ok" = yes
+  then
+    have_threads=yes
+
+    DEF_THREAD="$PTHREAD_CFLAGS"
+    CFLAGS="$CFLAGS $DEF_THREAD"
+    CXXFLAGS="$CXXFLAGS $DEF_THREAD"
+
+    if test "$CC" != "$PTHREAD_CC"
+    then
+      AC_MSG_WARN([Replacing compiler $CC with compiler $PTHREAD_CC to support pthreads.])
+      CC="$PTHREAD_CC"
+    fi
+    if test "$CXX" != "$PTHREAD_CXX"
+    then
+      AC_MSG_WARN([Replacing compiler $CXX with compiler $PTHREAD_CXX to support pthreads.])
+      CXX="$PTHREAD_CXX"
+    fi
+  fi
+fi
+
+
+#
+# Find math library
+#
+LIB_MATH=''
+AC_CHECK_LIB(m,sqrt,LIB_MATH="-lm",,)
+LIBS="$LIB_MATH $LIBS"
+AC_SUBST(LIB_MATH)
+
+#
+# Find Posix threads library
+#
+LIB_THREAD=''
+if test "$with_threads" != 'no' && test "$have_threads" = 'yes'
+then
+  for lib in pthread pthreads
+  do
+    if test "x$PTHREAD_LIBS" = "x" ; then
+      AC_CHECK_LIB([$lib],pthread_mutex_lock,[PTHREAD_LIBS=-l$lib],,)
+    fi
+  done
+
+  LIB_THREAD="$PTHREAD_LIBS"
+  LIBS="$LIBS $LIB_THREAD"
+  AC_DEFINE(HasTHREADS,1,[Define if you have pthreads library])
+else
+  AC_DEFINE(HasTHREADS,0,[Define if you don't have pthreads library])
+fi
+AC_SUBST(LIB_THREAD)
+
+#
+# Check for JPEG
+#
+have_jpeg='no'
+LIB_JPEG=''
+if test ! "$with_jpeg" = 'no'
+then
+    AC_MSG_CHECKING([for JPEG support])
+    AC_MSG_RESULT()
+    failed=0;
+    passed=0;
+    AC_CHECK_HEADER(jconfig.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_HEADER(jerror.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_HEADER(jmorecfg.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_HEADER(jpeglib.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_LIB(jpeg,jpeg_read_header,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+
+# Test for compatible JPEG library
+if test ! "$ac_cv_jpeg_version_ok" = 'yes' ; then
+AC_CACHE_CHECK(for JPEG library is version 6b or later, ac_cv_jpeg_version_ok,
+[AC_TRY_COMPILE(
+#include <stdio.h>
+#include <stdlib.h>
+#include <jpeglib.h>
+,
+changequote(<<, >>)dnl
+<<
+#if JPEG_LIB_VERSION < 62
+#error IJG JPEG library must be version 6b or newer!
+#endif
+return 0;
+>>,
+changequote([, ])dnl
+ac_cv_jpeg_version_ok='yes',
+ac_cv_jpeg_version_ok='no')])
+if test "$ac_cv_jpeg_version_ok" = 'yes' ; then
+ AC_MSG_RESULT(yes)
+ passed=`expr $passed + 1`
+else
+ AC_MSG_RESULT(no)
+ failed=`expr $failed + 1`
+fi
+fi
+    AC_MSG_CHECKING(if JPEG package is complete)
+    if test $passed -gt 0
+    then
+    if test $failed -gt 0
+    then
+	AC_MSG_RESULT(no -- some components failed test)
+        have_jpeg='no (failed tests)'
+    else
+	LIB_JPEG='-ljpeg'
+	LIBS="$LIB_JPEG $LIBS"
+	AC_DEFINE(HasJPEG,1,Define if you have JPEG library)
+	AC_MSG_RESULT(yes)
+        have_jpeg='yes'
+    fi
+    else
+        AC_MSG_RESULT(no)
+    fi
+fi
+AM_CONDITIONAL(HasJPEG, test "$have_jpeg" = 'yes')
+AC_SUBST(LIB_JPEG)
+
+#
+# Check for ZLIB
+#
+have_zlib='no'
+dnl PNG requires zlib so enable zlib check if PNG is requested
+if test ! "$with_zlib" = 'no' || test ! "$with_png" = 'no'
+then
+  LIB_ZLIB=''
+  AC_MSG_CHECKING(for ZLIB support )
+  AC_MSG_RESULT()
+  failed=0;
+  passed=0;
+  AC_CHECK_HEADER(zconf.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+  AC_CHECK_HEADER(zlib.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+  AC_CHECK_LIB(z,compress,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_CHECK_LIB(z,uncompress,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_CHECK_LIB(z,deflate,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_CHECK_LIB(z,inflate,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_CHECK_LIB(z,gzseek,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_CHECK_LIB(z,gztell,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+  AC_MSG_CHECKING(if ZLIB package is complete)
+  if test $passed -gt 0
+  then
+    if test $failed -gt 0
+    then
+      AC_MSG_RESULT(no -- some components failed test)
+      have_zlib='no (failed tests)'
+    else
+      LIB_ZLIB='-lz'
+      LIBS="$LIB_ZLIB $LIBS"
+      AC_DEFINE(HasZLIB,1,Define if you have zlib compression library)
+      AC_MSG_RESULT(yes)
+      have_zlib='yes'
+    fi
+  else
+    AC_MSG_RESULT(no)
+  fi
+fi
+AM_CONDITIONAL(HasZLIB, test "$have_zlib" = 'yes')
+AC_SUBST(LIB_ZLIB)
+
+#
+# Check for TIFF
+#
+have_tiff='no'
+LIB_TIFF=''
+if test ! "$with_tiff" = 'no'
+then
+    AC_MSG_CHECKING([for TIFF support])
+    AC_MSG_RESULT()
+    failed=0;
+    passed=0;
+    AC_CHECK_HEADER(tiff.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_HEADER(tiffio.h,passed=`expr $passed + 1`,failed=`expr $failed + 1`)
+    AC_CHECK_LIB(tiff,TIFFOpen,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+    AC_CHECK_LIB(tiff,TIFFClientOpen,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+    AC_CHECK_LIB(tiff,TIFFIsByteSwapped,passed=`expr $passed + 1`,failed=`expr $failed + 1`,)
+    AC_MSG_CHECKING(if TIFF package is complete)
+    if test $passed -gt 0
+    then
+    if test $failed -gt 0
+    then
+	AC_MSG_RESULT(no -- some components failed test)
+	have_tiff='no (failed tests)'
+    else
+	LIB_TIFF='-ltiff'
+	LIBS="$LIB_TIFF $LIBS"
+	AC_DEFINE(HasTIFF,1,Define if you have TIFF library)
+	AC_MSG_RESULT(yes)
+	have_tiff='yes'
+	AC_CHECK_HEADERS(tiffconf.h)
+    fi
+    else
+    AC_MSG_RESULT(no)
+    fi
+fi
+AM_CONDITIONAL(HasTIFF, test "$have_tiff" = 'yes')
+AC_SUBST(LIB_TIFF)
+
+
+# Libraries that the LCMS library depends on
+LCMS_LIB_DEPLIBS="$LIB_MATH $LIB_THREAD"
+LCMS_LIB_DEPLIBS=`echo $LCMS_LIB_DEPLIBS | sed -e 's/  */ /g'`
+AC_SUBST(LCMS_LIB_DEPLIBS)
+
+# Libraries that the jpegicc program depends on
+JPEGICC_DEPLIBS="$LIB_JPEG $LIB_MATH $LIB_THREAD"
+JPEGICC_DEPLIBS=`echo $JPEGICC_DEPLIBS | sed -e 's/  */ /g'`
+AC_SUBST(JPEGICC_DEPLIBS)
+
+# Libraries that the tifficc program depends on
+TIFFICC_DEPLIBS="$LIB_TIFF $LIB_JPEG $LIB_ZLIB $LIB_MATH $LIB_THREAD"
+TIFFICC_DEPLIBS=`echo $TIFFICC_DEPLIBS | sed -e 's/  */ /g'`
+AC_SUBST(TIFFICC_DEPLIBS)
+
+LIBS=''
+
+#
+# Perform substitutions
+#
+AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([lcms2.pc])
+AC_CONFIG_FILES([include/Makefile])
+AC_CONFIG_FILES([src/Makefile])
+AC_CONFIG_FILES([utils/tificc/Makefile])
+AC_CONFIG_FILES([utils/transicc/Makefile])
+AC_CONFIG_FILES([utils/linkicc/Makefile])
+AC_CONFIG_FILES([utils/jpgicc/Makefile])
+AC_CONFIG_FILES([utils/psicc/Makefile])
+AC_CONFIG_FILES([testbed/Makefile])
+AC_OUTPUT
diff --git a/third-party/libjxl/libjxl/third_party/lcms/depcomp b/third-party/libjxl/libjxl/third_party/lcms/depcomp
new file mode 100755
index 0000000000..fc98710e2a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/depcomp
@@ -0,0 +1,791 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2013-05-30.07; # UTC
+
+# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+  '')
+    echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+    exit 1;
+    ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by 'PROGRAMS ARGS'.
+  object      Object file output by 'PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputting dependencies.
+  libtool     Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit $?
+    ;;
+esac
+
+# Get the directory component of the given path, and save it in the
+# global variables '$dir'.  Note that this directory component will
+# be either empty or ending with a '/' character.  This is deliberate.
+set_dir_from ()
+{
+  case $1 in
+    */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
+      *) dir=;;
+  esac
+}
+
+# Get the suffix-stripped basename of the given path, and save it the
+# global variable '$base'.
+set_base_from ()
+{
+  base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
+}
+
+# If no dependency file was actually created by the compiler invocation,
+# we still have to create a dummy depfile, to avoid errors with the
+# Makefile "include basename.Plo" scheme.
+make_dummy_depfile ()
+{
+  echo "#dummy" > "$depfile"
+}
+
+# Factor out some common post-processing of the generated depfile.
+# Requires the auxiliary global variable '$tmpdepfile' to be set.
+aix_post_process_depfile ()
+{
+  # If the compiler actually managed to produce a dependency file,
+  # post-process it.
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form 'foo.o: dependency.h'.
+    # Do two passes, one to just change these to
+    #   $object: dependency.h
+    # and one to simply output
+    #   dependency.h:
+    # which is needed to avoid the deleted-header problem.
+    { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
+      sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
+    } > "$depfile"
+    rm -f "$tmpdepfile"
+  else
+    make_dummy_depfile
+  fi
+}
+
+# A tabulation character.
+tab='	'
+# A newline character.
+nl='
+'
+# Character ranges might be problematic outside the C locale.
+# These definitions help.
+upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
+lower=abcdefghijklmnopqrstuvwxyz
+digits=0123456789
+alpha=${upper}${lower}
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+  # This is just like dashmstdout with a different argument.
+  dashmflag=-xM
+  depmode=dashmstdout
+fi
+
+cygpath_u="cygpath -u -f -"
+if test "$depmode" = msvcmsys; then
+  # This is just like msvisualcpp but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvisualcpp
+fi
+
+if test "$depmode" = msvc7msys; then
+  # This is just like msvc7 but w/o cygpath translation.
+  # Just convert the backslash-escaped backslashes to single forward
+  # slashes to satisfy depend.m4
+  cygpath_u='sed s,\\\\,/,g'
+  depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+  # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
+  gccflag=-qmakedep=gcc,-MF
+  depmode=gcc
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am.  Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+  for arg
+  do
+    case $arg in
+    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+    *)  set fnord "$@" "$arg" ;;
+    esac
+    shift # fnord
+    shift # $arg
+  done
+  "$@"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).  Also, it might not be
+##   supported by the other compilers which use the 'gcc' depmode.
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The second -e expression handles DOS-style file names with drive
+  # letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the "deleted header file" problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+## Some versions of gcc put a space before the ':'.  On the theory
+## that the space means something, we add a space to the output as
+## well.  hp depmode also adds that space, but also prefixes the VPATH
+## to the object.  Take care to not repeat it in the output.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like '#:fec' to the end of the
+    # dependency line.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
+      | tr "$nl" ' ' >> "$depfile"
+    echo >> "$depfile"
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' "$nl" < "$tmpdepfile" \
+      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+      >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+xlc)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts '$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  set_dir_from "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$base.u
+    tmpdepfile3=$dir.libs/$base.u
+    "$@" -Wc,-M
+  else
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$dir$base.u
+    tmpdepfile3=$dir$base.u
+    "$@" -M
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  aix_post_process_depfile
+  ;;
+
+tcc)
+  # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
+  # FIXME: That version still under development at the moment of writing.
+  #        Make that this statement remains true also for stable, released
+  #        versions.
+  # It will wrap lines (doesn't matter whether long or short) with a
+  # trailing '\', as in:
+  #
+  #   foo.o : \
+  #    foo.c \
+  #    foo.h \
+  #
+  # It will put a trailing '\' even on the last line, and will use leading
+  # spaces rather than leading tabs (at least since its commit 0394caf7
+  # "Emit spaces for -MD").
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
+  # We have to change lines of the first kind to '$object: \'.
+  sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
+  # And for each line of the second kind, we have to emit a 'dep.h:'
+  # dummy dependency, to avoid the deleted-header problem.
+  sed -n -e 's|^  *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+## The order of this option in the case statement is important, since the
+## shell code in configure will try each of these formats in the order
+## listed in this file.  A plain '-MD' option would be understood by many
+## compilers, so we must ensure this comes after the gcc and icc options.
+pgcc)
+  # Portland's C compiler understands '-MD'.
+  # Will always output deps to 'file.d' where file is the root name of the
+  # source file under compilation, even if file resides in a subdirectory.
+  # The object file name does not affect the name of the '.d' file.
+  # pgcc 10.2 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using '\' :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+  set_dir_from "$object"
+  # Use the source, not the object, to determine the base name, since
+  # that's sadly what pgcc will do too.
+  set_base_from "$source"
+  tmpdepfile=$base.d
+
+  # For projects that build the same source file twice into different object
+  # files, the pgcc approach of using the *source* file root name can cause
+  # problems in parallel builds.  Use a locking strategy to avoid stomping on
+  # the same $tmpdepfile.
+  lockdir=$base.d-lock
+  trap "
+    echo '$0: caught signal, cleaning up...' >&2
+    rmdir '$lockdir'
+    exit 1
+  " 1 2 13 15
+  numtries=100
+  i=$numtries
+  while test $i -gt 0; do
+    # mkdir is a portable test-and-set.
+    if mkdir "$lockdir" 2>/dev/null; then
+      # This process acquired the lock.
+      "$@" -MD
+      stat=$?
+      # Release the lock.
+      rmdir "$lockdir"
+      break
+    else
+      # If the lock is being held by a different process, wait
+      # until the winning process is done or we timeout.
+      while test -d "$lockdir" && test $i -gt 0; do
+        sleep 1
+        i=`expr $i - 1`
+      done
+    fi
+    i=`expr $i - 1`
+  done
+  trap - 1 2 13 15
+  if test $i -le 0; then
+    echo "$0: failed to acquire lock after $numtries attempts" >&2
+    echo "$0: check lockdir '$lockdir'" >&2
+    exit 1
+  fi
+
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp2)
+  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+  # compilers, which have integrated preprocessors.  The correct option
+  # to use with these is +Maked; it writes dependencies to a file named
+  # 'foo.d', which lands next to the object file, wherever that
+  # happens to be.
+  # Much of this is similar to the tru64 case; see comments there.
+  set_dir_from  "$object"
+  set_base_from "$object"
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir.libs/$base.d
+    "$@" -Wc,+Maked
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    "$@" +Maked
+  fi
+  stat=$?
+  if test $stat -ne 0; then
+     rm -f "$tmpdepfile1" "$tmpdepfile2"
+     exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
+    # Add 'dependent.h:' lines.
+    sed -ne '2,${
+               s/^ *//
+               s/ \\*$//
+               s/$/:/
+               p
+             }' "$tmpdepfile" >> "$depfile"
+  else
+    make_dummy_depfile
+  fi
+  rm -f "$tmpdepfile" "$tmpdepfile2"
+  ;;
+
+tru64)
+  # The Tru64 compiler uses -MD to generate dependencies as a side
+  # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
+  # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+  # dependencies in 'foo.d' instead, so we check for that too.
+  # Subdirectories are respected.
+  set_dir_from  "$object"
+  set_base_from "$object"
+
+  if test "$libtool" = yes; then
+    # Libtool generates 2 separate objects for the 2 libraries.  These
+    # two compilations output dependencies in $dir.libs/$base.o.d and
+    # in $dir$base.o.d.  We have to check for both files, because
+    # one of the two compilations can be disabled.  We should prefer
+    # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+    # automatically cleaned when .libs/ is deleted, while ignoring
+    # the former would cause a distcleancheck panic.
+    tmpdepfile1=$dir$base.o.d          # libtool 1.5
+    tmpdepfile2=$dir.libs/$base.o.d    # Likewise.
+    tmpdepfile3=$dir.libs/$base.d      # Compaq CCC V6.2-504
+    "$@" -Wc,-MD
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    tmpdepfile3=$dir$base.d
+    "$@" -MD
+  fi
+
+  stat=$?
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  # Same post-processing that is required for AIX mode.
+  aix_post_process_depfile
+  ;;
+
+msvc7)
+  if test "$libtool" = yes; then
+    showIncludes=-Wc,-showIncludes
+  else
+    showIncludes=-showIncludes
+  fi
+  "$@" $showIncludes > "$tmpdepfile"
+  stat=$?
+  grep -v '^Note: including file: ' "$tmpdepfile"
+  if test $stat -ne 0; then
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  # The first sed program below extracts the file names and escapes
+  # backslashes for cygpath.  The second sed program outputs the file
+  # name when reading, but also accumulates all include files in the
+  # hold buffer in order to output them again at the end.  This only
+  # works with sed implementations that can handle large buffers.
+  sed < "$tmpdepfile" -n '
+/^Note: including file:  *\(.*\)/ {
+  s//\1/
+  s/\\/\\\\/g
+  p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+  s/.*/'"$tab"'/
+  G
+  p
+}' >> "$depfile"
+  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
+  rm -f "$tmpdepfile"
+  ;;
+
+msvc7msys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for ':'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
+  "$@" $dashmflag |
+    sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this sed invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  tr ' ' "$nl" < "$tmpdepfile" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no eat=no
+  for arg
+  do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    if test $eat = yes; then
+      eat=no
+      continue
+    fi
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -arch)
+      eat=yes ;;
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix=`echo "$object" | sed 's/^.*\././'`
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  # makedepend may prepend the VPATH from the source file name to the object.
+  # No need to regex-escape $object, excess matching of '.' is harmless.
+  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process the last invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed '1,2d' "$tmpdepfile" \
+    | tr ' ' "$nl" \
+    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+    | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove '-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  "$@" -E \
+    | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+             -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+    | sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test "X$1" != 'X--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+        set fnord "$@"
+        shift
+        shift
+        ;;
+    *)
+        set fnord "$@" "$arg"
+        shift
+        shift
+        ;;
+    esac
+  done
+  "$@" -E 2>/dev/null |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+  echo "$tab" >> "$depfile"
+  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvcmsys)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 API.pdf b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 API.pdf
new file mode 100644
index 0000000000..16c16f6b39
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 API.pdf differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf
new file mode 100644
index 0000000000..83f3043834
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf
new file mode 100644
index 0000000000..2fd3335179
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/doc/src.zip b/third-party/libjxl/libjxl/third_party/lcms/doc/src.zip
new file mode 100755
index 0000000000..13f3aeb675
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/doc/src.zip differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.am
new file mode 100644
index 0000000000..7dbe0e43bd
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.am
@@ -0,0 +1,7 @@
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+include_HEADERS = lcms2.h lcms2_plugin.h
+
+EXTRA_DIST = lcms2.h lcms2_plugin.h
diff --git a/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.in
new file mode 100644
index 0000000000..22901ea0af
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/include/Makefile.in
@@ -0,0 +1,590 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \
+	$(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(includedir)"
+HEADERS = $(include_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+include_HEADERS = lcms2.h lcms2_plugin.h
+EXTRA_DIST = lcms2.h lcms2_plugin.h
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign include/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(includedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-includeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-includeHEADERS install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-includeHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/include/lcms2.h b/third-party/libjxl/libjxl/third_party/lcms/include/lcms2.h
new file mode 100644
index 0000000000..d7e8c1653e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/include/lcms2.h
@@ -0,0 +1,1921 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// Version 2.10alpha
+//
+
+#ifndef _lcms2_H
+
+// ********** Configuration toggles ****************************************
+
+// Uncomment this one if you are using big endian machines
+// #define CMS_USE_BIG_ENDIAN   1
+
+// Uncomment this one if your compiler/machine does NOT support the
+// "long long" type.
+// #define CMS_DONT_USE_INT64        1
+
+// Uncomment this if your compiler doesn't work with fast floor function
+// #define CMS_DONT_USE_FAST_FLOOR 1
+
+// Uncomment this line if you want lcms to use the black point tag in profile,
+// if commented, lcms will compute the black point by its own.
+// It is safer to leave it commented out
+// #define CMS_USE_PROFILE_BLACK_POINT_TAG    1
+
+// Uncomment this line if you are compiling as C++ and want a C++ API
+// #define CMS_USE_CPP_API
+
+// Uncomment this line if you need strict CGATS syntax. Makes CGATS files to
+// require "KEYWORD" on undefined identifiers, keep it commented out unless needed
+// #define CMS_STRICT_CGATS  1
+
+// Uncomment to get rid of the tables for "half" float support
+// #define CMS_NO_HALF_SUPPORT 1
+
+// Uncomment to get rid of pthreads/windows dependency
+// #define CMS_NO_PTHREADS  1
+
+// Uncomment this for special windows mutex initialization (see lcms2_internal.h)
+// #define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+
+// Uncomment this to remove the "CMSREGISTER" storage class
+// #define CMS_NO_REGISTER_KEYWORD 1
+
+// ********** End of configuration toggles ******************************
+
+// Needed for streams
+#include <stdio.h>
+
+// Needed for portability (C99 per 7.1.2)
+#include <limits.h>
+#include <time.h>
+#include <stddef.h>
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Version/release
+#define LCMS_VERSION        2100
+
+// I will give the chance of redefining basic types for compilers that are not fully C99 compliant
+#ifndef CMS_BASIC_TYPES_ALREADY_DEFINED
+
+// Base types
+typedef unsigned char        cmsUInt8Number;   // That is guaranteed by the C99 spec
+typedef signed char          cmsInt8Number;    // That is guaranteed by the C99 spec
+
+#if CHAR_BIT != 8
+#  error "Unable to find 8 bit type, unsupported compiler"
+#endif
+
+// IEEE float storage numbers
+typedef float                cmsFloat32Number;
+typedef double               cmsFloat64Number;
+
+// 16-bit base types
+#if (USHRT_MAX == 65535U)
+ typedef unsigned short      cmsUInt16Number;
+#elif (UINT_MAX == 65535U)
+ typedef unsigned int        cmsUInt16Number;
+#else
+#  error "Unable to find 16 bits unsigned type, unsupported compiler"
+#endif
+
+#if (SHRT_MAX == 32767)
+  typedef  short             cmsInt16Number;
+#elif (INT_MAX == 32767)
+  typedef  int               cmsInt16Number;
+#else
+#  error "Unable to find 16 bits signed type, unsupported compiler"
+#endif
+
+// 32-bit base type
+#if (UINT_MAX == 4294967295U)
+ typedef unsigned int        cmsUInt32Number;
+#elif (ULONG_MAX == 4294967295U)
+ typedef unsigned long       cmsUInt32Number;
+#else
+#  error "Unable to find 32 bit unsigned type, unsupported compiler"
+#endif
+
+#if (INT_MAX == +2147483647)
+ typedef  int                cmsInt32Number;
+#elif (LONG_MAX == +2147483647)
+ typedef  long               cmsInt32Number;
+#else
+#  error "Unable to find 32 bit signed type, unsupported compiler"
+#endif
+
+// 64-bit base types
+#ifndef CMS_DONT_USE_INT64
+#  if (ULONG_MAX  == 18446744073709551615U)
+    typedef unsigned long   cmsUInt64Number;
+#  elif (ULLONG_MAX == 18446744073709551615U)
+      typedef unsigned long long   cmsUInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#  if (LONG_MAX == +9223372036854775807)
+      typedef  long          cmsInt64Number;
+#  elif (LLONG_MAX == +9223372036854775807)
+      typedef  long long     cmsInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#endif
+#endif
+
+// Handle "register" keyword
+#if defined(CMS_NO_REGISTER_KEYWORD) && !defined(CMS_DLL) && !defined(CMS_DLL_BUILD) 
+#  define CMSREGISTER
+#else
+#  define CMSREGISTER register
+#endif
+
+// In the case 64 bit numbers are not supported by the compiler
+#ifdef CMS_DONT_USE_INT64
+    typedef cmsUInt32Number      cmsUInt64Number[2];
+    typedef cmsInt32Number       cmsInt64Number[2];
+#endif
+
+// Derivative types
+typedef cmsUInt32Number      cmsSignature;
+typedef cmsUInt16Number      cmsU8Fixed8Number;
+typedef cmsInt32Number       cmsS15Fixed16Number;
+typedef cmsUInt32Number      cmsU16Fixed16Number;
+
+// Boolean type, which will be using the native integer
+typedef int                  cmsBool;
+
+// Try to detect windows
+#if defined (_WIN32) || defined(_WIN64) || defined(WIN32) || defined(_WIN32_)
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef _MSC_VER
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef __BORLANDC__
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+// Try to detect big endian platforms. This list can be endless, so primarily rely on the configure script
+// on Unix-like systems, and allow it to be set on the compiler command line using
+// -DCMS_USE_BIG_ENDIAN or something similar
+#ifdef CMS_USE_BIG_ENDIAN // set at compiler command line takes overall precedence
+
+#  if CMS_USE_BIG_ENDIAN == 0
+#    undef CMS_USE_BIG_ENDIAN
+#  endif
+
+#else // CMS_USE_BIG_ENDIAN
+
+#  ifdef WORDS_BIGENDIAN // set by configure (or explicitly on compiler command line)
+#    define CMS_USE_BIG_ENDIAN 1
+#  else // WORDS_BIGENDIAN
+// Fall back to platform/compiler specific tests
+#    if defined(__sgi__) || defined(__sgi) || defined(sparc)
+#      define CMS_USE_BIG_ENDIAN      1
+#    endif
+
+#    if defined(__s390__) || defined(__s390x__)
+#      define CMS_USE_BIG_ENDIAN   1
+#    endif
+
+#    ifdef macintosh
+#      ifdef __BIG_ENDIAN__
+#        define CMS_USE_BIG_ENDIAN      1
+#      endif
+#      ifdef __LITTLE_ENDIAN__
+#        undef CMS_USE_BIG_ENDIAN
+#      endif
+#    endif
+#  endif  // WORDS_BIGENDIAN
+
+#  if defined(_HOST_BIG_ENDIAN) || defined(__BIG_ENDIAN__)
+#    define CMS_USE_BIG_ENDIAN      1
+#  endif
+
+#endif  // CMS_USE_BIG_ENDIAN
+
+
+// Calling convention -- this is hardly platform and compiler dependent
+#ifdef CMS_IS_WINDOWS_
+#  if defined(CMS_DLL) || defined(CMS_DLL_BUILD)
+#     ifdef __BORLANDC__
+#        define CMSEXPORT       __stdcall _export
+#        define CMSAPI
+#     else
+#        define CMSEXPORT      __stdcall
+#        ifdef CMS_DLL_BUILD
+#            define CMSAPI    __declspec(dllexport)
+#        else
+#           define CMSAPI     __declspec(dllimport)
+#        endif
+#     endif
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#else  // not Windows
+#  ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#     define CMSEXPORT
+#     define CMSAPI    __attribute__((visibility("default")))
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#endif  // CMS_IS_WINDOWS_
+
+#ifdef HasTHREADS
+# if HasTHREADS == 1
+#    undef CMS_NO_PTHREADS
+# else
+#    define CMS_NO_PTHREADS 1
+# endif
+#endif
+
+// Some common definitions
+#define cmsMAX_PATH     256
+
+#ifndef FALSE
+#       define FALSE 0
+#endif
+#ifndef TRUE
+#       define TRUE  1
+#endif
+
+// D50 XYZ normalized to Y=1.0
+#define cmsD50X  0.9642
+#define cmsD50Y  1.0
+#define cmsD50Z  0.8249
+
+// V4 perceptual black
+#define cmsPERCEPTUAL_BLACK_X  0.00336
+#define cmsPERCEPTUAL_BLACK_Y  0.0034731
+#define cmsPERCEPTUAL_BLACK_Z  0.00287
+
+// Definitions in ICC spec
+#define cmsMagicNumber  0x61637370     // 'acsp'
+#define lcmsSignature   0x6c636d73     // 'lcms'
+
+
+// Base ICC type definitions
+typedef enum {
+    cmsSigChromaticityType                  = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderType                 = 0x636C726F,  // 'clro'
+    cmsSigColorantTableType                 = 0x636C7274,  // 'clrt'
+    cmsSigCrdInfoType                       = 0x63726469,  // 'crdi'
+    cmsSigCurveType                         = 0x63757276,  // 'curv'
+    cmsSigDataType                          = 0x64617461,  // 'data'
+    cmsSigDictType                          = 0x64696374,  // 'dict'
+    cmsSigDateTimeType                      = 0x6474696D,  // 'dtim'
+    cmsSigDeviceSettingsType                = 0x64657673,  // 'devs'
+    cmsSigLut16Type                         = 0x6d667432,  // 'mft2'
+    cmsSigLut8Type                          = 0x6d667431,  // 'mft1'
+    cmsSigLutAtoBType                       = 0x6d414220,  // 'mAB '
+    cmsSigLutBtoAType                       = 0x6d424120,  // 'mBA '
+    cmsSigMeasurementType                   = 0x6D656173,  // 'meas'
+    cmsSigMultiLocalizedUnicodeType         = 0x6D6C7563,  // 'mluc'
+    cmsSigMultiProcessElementType           = 0x6D706574,  // 'mpet'
+    cmsSigNamedColorType                    = 0x6E636f6C,  // 'ncol' -- DEPRECATED!
+    cmsSigNamedColor2Type                   = 0x6E636C32,  // 'ncl2'
+    cmsSigParametricCurveType               = 0x70617261,  // 'para'
+    cmsSigProfileSequenceDescType           = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdType             = 0x70736964,  // 'psid'
+    cmsSigResponseCurveSet16Type            = 0x72637332,  // 'rcs2'
+    cmsSigS15Fixed16ArrayType               = 0x73663332,  // 'sf32'
+    cmsSigScreeningType                     = 0x7363726E,  // 'scrn'
+    cmsSigSignatureType                     = 0x73696720,  // 'sig '
+    cmsSigTextType                          = 0x74657874,  // 'text'
+    cmsSigTextDescriptionType               = 0x64657363,  // 'desc'
+    cmsSigU16Fixed16ArrayType               = 0x75663332,  // 'uf32'
+    cmsSigUcrBgType                         = 0x62666420,  // 'bfd '
+    cmsSigUInt16ArrayType                   = 0x75693136,  // 'ui16'
+    cmsSigUInt32ArrayType                   = 0x75693332,  // 'ui32'
+    cmsSigUInt64ArrayType                   = 0x75693634,  // 'ui64'
+    cmsSigUInt8ArrayType                    = 0x75693038,  // 'ui08'
+    cmsSigVcgtType                          = 0x76636774,  // 'vcgt'
+    cmsSigViewingConditionsType             = 0x76696577,  // 'view'
+    cmsSigXYZType                           = 0x58595A20   // 'XYZ '
+
+
+} cmsTagTypeSignature;
+
+// Base ICC tag definitions
+typedef enum {
+    cmsSigAToB0Tag                          = 0x41324230,  // 'A2B0'
+    cmsSigAToB1Tag                          = 0x41324231,  // 'A2B1'
+    cmsSigAToB2Tag                          = 0x41324232,  // 'A2B2'
+    cmsSigBlueColorantTag                   = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueMatrixColumnTag               = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueTRCTag                        = 0x62545243,  // 'bTRC'
+    cmsSigBToA0Tag                          = 0x42324130,  // 'B2A0'
+    cmsSigBToA1Tag                          = 0x42324131,  // 'B2A1'
+    cmsSigBToA2Tag                          = 0x42324132,  // 'B2A2'
+    cmsSigCalibrationDateTimeTag            = 0x63616C74,  // 'calt'
+    cmsSigCharTargetTag                     = 0x74617267,  // 'targ'
+    cmsSigChromaticAdaptationTag            = 0x63686164,  // 'chad'
+    cmsSigChromaticityTag                   = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderTag                  = 0x636C726F,  // 'clro'
+    cmsSigColorantTableTag                  = 0x636C7274,  // 'clrt'
+    cmsSigColorantTableOutTag               = 0x636C6F74,  // 'clot'
+    cmsSigColorimetricIntentImageStateTag   = 0x63696973,  // 'ciis'
+    cmsSigCopyrightTag                      = 0x63707274,  // 'cprt'
+    cmsSigCrdInfoTag                        = 0x63726469,  // 'crdi'
+    cmsSigDataTag                           = 0x64617461,  // 'data'
+    cmsSigDateTimeTag                       = 0x6474696D,  // 'dtim'
+    cmsSigDeviceMfgDescTag                  = 0x646D6E64,  // 'dmnd'
+    cmsSigDeviceModelDescTag                = 0x646D6464,  // 'dmdd'
+    cmsSigDeviceSettingsTag                 = 0x64657673,  // 'devs'
+    cmsSigDToB0Tag                          = 0x44324230,  // 'D2B0'
+    cmsSigDToB1Tag                          = 0x44324231,  // 'D2B1'
+    cmsSigDToB2Tag                          = 0x44324232,  // 'D2B2'
+    cmsSigDToB3Tag                          = 0x44324233,  // 'D2B3'
+    cmsSigBToD0Tag                          = 0x42324430,  // 'B2D0'
+    cmsSigBToD1Tag                          = 0x42324431,  // 'B2D1'
+    cmsSigBToD2Tag                          = 0x42324432,  // 'B2D2'
+    cmsSigBToD3Tag                          = 0x42324433,  // 'B2D3'
+    cmsSigGamutTag                          = 0x67616D74,  // 'gamt'
+    cmsSigGrayTRCTag                        = 0x6b545243,  // 'kTRC'
+    cmsSigGreenColorantTag                  = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenMatrixColumnTag              = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenTRCTag                       = 0x67545243,  // 'gTRC'
+    cmsSigLuminanceTag                      = 0x6C756d69,  // 'lumi'
+    cmsSigMeasurementTag                    = 0x6D656173,  // 'meas'
+    cmsSigMediaBlackPointTag                = 0x626B7074,  // 'bkpt'
+    cmsSigMediaWhitePointTag                = 0x77747074,  // 'wtpt'
+    cmsSigNamedColorTag                     = 0x6E636f6C,  // 'ncol' // Deprecated by the ICC
+    cmsSigNamedColor2Tag                    = 0x6E636C32,  // 'ncl2'
+    cmsSigOutputResponseTag                 = 0x72657370,  // 'resp'
+    cmsSigPerceptualRenderingIntentGamutTag = 0x72696730,  // 'rig0'
+    cmsSigPreview0Tag                       = 0x70726530,  // 'pre0'
+    cmsSigPreview1Tag                       = 0x70726531,  // 'pre1'
+    cmsSigPreview2Tag                       = 0x70726532,  // 'pre2'
+    cmsSigProfileDescriptionTag             = 0x64657363,  // 'desc'
+    cmsSigProfileDescriptionMLTag           = 0x6473636d,  // 'dscm'
+    cmsSigProfileSequenceDescTag            = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdTag              = 0x70736964,  // 'psid'
+    cmsSigPs2CRD0Tag                        = 0x70736430,  // 'psd0'
+    cmsSigPs2CRD1Tag                        = 0x70736431,  // 'psd1'
+    cmsSigPs2CRD2Tag                        = 0x70736432,  // 'psd2'
+    cmsSigPs2CRD3Tag                        = 0x70736433,  // 'psd3'
+    cmsSigPs2CSATag                         = 0x70733273,  // 'ps2s'
+    cmsSigPs2RenderingIntentTag             = 0x70733269,  // 'ps2i'
+    cmsSigRedColorantTag                    = 0x7258595A,  // 'rXYZ'
+    cmsSigRedMatrixColumnTag                = 0x7258595A,  // 'rXYZ'
+    cmsSigRedTRCTag                         = 0x72545243,  // 'rTRC'
+    cmsSigSaturationRenderingIntentGamutTag = 0x72696732,  // 'rig2'
+    cmsSigScreeningDescTag                  = 0x73637264,  // 'scrd'
+    cmsSigScreeningTag                      = 0x7363726E,  // 'scrn'
+    cmsSigTechnologyTag                     = 0x74656368,  // 'tech'
+    cmsSigUcrBgTag                          = 0x62666420,  // 'bfd '
+    cmsSigViewingCondDescTag                = 0x76756564,  // 'vued'
+    cmsSigViewingConditionsTag              = 0x76696577,  // 'view'
+    cmsSigVcgtTag                           = 0x76636774,  // 'vcgt'
+    cmsSigMetaTag                           = 0x6D657461,  // 'meta'
+    cmsSigArgyllArtsTag                     = 0x61727473   // 'arts'
+
+} cmsTagSignature;
+
+
+// ICC Technology tag
+typedef enum {
+    cmsSigDigitalCamera                     = 0x6463616D,  // 'dcam'
+    cmsSigFilmScanner                       = 0x6673636E,  // 'fscn'
+    cmsSigReflectiveScanner                 = 0x7273636E,  // 'rscn'
+    cmsSigInkJetPrinter                     = 0x696A6574,  // 'ijet'
+    cmsSigThermalWaxPrinter                 = 0x74776178,  // 'twax'
+    cmsSigElectrophotographicPrinter        = 0x6570686F,  // 'epho'
+    cmsSigElectrostaticPrinter              = 0x65737461,  // 'esta'
+    cmsSigDyeSublimationPrinter             = 0x64737562,  // 'dsub'
+    cmsSigPhotographicPaperPrinter          = 0x7270686F,  // 'rpho'
+    cmsSigFilmWriter                        = 0x6670726E,  // 'fprn'
+    cmsSigVideoMonitor                      = 0x7669646D,  // 'vidm'
+    cmsSigVideoCamera                       = 0x76696463,  // 'vidc'
+    cmsSigProjectionTelevision              = 0x706A7476,  // 'pjtv'
+    cmsSigCRTDisplay                        = 0x43525420,  // 'CRT '
+    cmsSigPMDisplay                         = 0x504D4420,  // 'PMD '
+    cmsSigAMDisplay                         = 0x414D4420,  // 'AMD '
+    cmsSigPhotoCD                           = 0x4B504344,  // 'KPCD'
+    cmsSigPhotoImageSetter                  = 0x696D6773,  // 'imgs'
+    cmsSigGravure                           = 0x67726176,  // 'grav'
+    cmsSigOffsetLithography                 = 0x6F666673,  // 'offs'
+    cmsSigSilkscreen                        = 0x73696C6B,  // 'silk'
+    cmsSigFlexography                       = 0x666C6578,  // 'flex'
+    cmsSigMotionPictureFilmScanner          = 0x6D706673,  // 'mpfs'
+    cmsSigMotionPictureFilmRecorder         = 0x6D706672,  // 'mpfr'
+    cmsSigDigitalMotionPictureCamera        = 0x646D7063,  // 'dmpc'
+    cmsSigDigitalCinemaProjector            = 0x64636A70   // 'dcpj'
+
+} cmsTechnologySignature;
+
+
+// ICC Color spaces
+typedef enum {
+    cmsSigXYZData                           = 0x58595A20,  // 'XYZ '
+    cmsSigLabData                           = 0x4C616220,  // 'Lab '
+    cmsSigLuvData                           = 0x4C757620,  // 'Luv '
+    cmsSigYCbCrData                         = 0x59436272,  // 'YCbr'
+    cmsSigYxyData                           = 0x59787920,  // 'Yxy '
+    cmsSigRgbData                           = 0x52474220,  // 'RGB '
+    cmsSigGrayData                          = 0x47524159,  // 'GRAY'
+    cmsSigHsvData                           = 0x48535620,  // 'HSV '
+    cmsSigHlsData                           = 0x484C5320,  // 'HLS '
+    cmsSigCmykData                          = 0x434D594B,  // 'CMYK'
+    cmsSigCmyData                           = 0x434D5920,  // 'CMY '
+    cmsSigMCH1Data                          = 0x4D434831,  // 'MCH1'
+    cmsSigMCH2Data                          = 0x4D434832,  // 'MCH2'
+    cmsSigMCH3Data                          = 0x4D434833,  // 'MCH3'
+    cmsSigMCH4Data                          = 0x4D434834,  // 'MCH4'
+    cmsSigMCH5Data                          = 0x4D434835,  // 'MCH5'
+    cmsSigMCH6Data                          = 0x4D434836,  // 'MCH6'
+    cmsSigMCH7Data                          = 0x4D434837,  // 'MCH7'
+    cmsSigMCH8Data                          = 0x4D434838,  // 'MCH8'
+    cmsSigMCH9Data                          = 0x4D434839,  // 'MCH9'
+    cmsSigMCHAData                          = 0x4D434841,  // 'MCHA'
+    cmsSigMCHBData                          = 0x4D434842,  // 'MCHB'
+    cmsSigMCHCData                          = 0x4D434843,  // 'MCHC'
+    cmsSigMCHDData                          = 0x4D434844,  // 'MCHD'
+    cmsSigMCHEData                          = 0x4D434845,  // 'MCHE'
+    cmsSigMCHFData                          = 0x4D434846,  // 'MCHF'
+    cmsSigNamedData                         = 0x6e6d636c,  // 'nmcl'
+    cmsSig1colorData                        = 0x31434C52,  // '1CLR'
+    cmsSig2colorData                        = 0x32434C52,  // '2CLR'
+    cmsSig3colorData                        = 0x33434C52,  // '3CLR'
+    cmsSig4colorData                        = 0x34434C52,  // '4CLR'
+    cmsSig5colorData                        = 0x35434C52,  // '5CLR'
+    cmsSig6colorData                        = 0x36434C52,  // '6CLR'
+    cmsSig7colorData                        = 0x37434C52,  // '7CLR'
+    cmsSig8colorData                        = 0x38434C52,  // '8CLR'
+    cmsSig9colorData                        = 0x39434C52,  // '9CLR'
+    cmsSig10colorData                       = 0x41434C52,  // 'ACLR'
+    cmsSig11colorData                       = 0x42434C52,  // 'BCLR'
+    cmsSig12colorData                       = 0x43434C52,  // 'CCLR'
+    cmsSig13colorData                       = 0x44434C52,  // 'DCLR'
+    cmsSig14colorData                       = 0x45434C52,  // 'ECLR'
+    cmsSig15colorData                       = 0x46434C52,  // 'FCLR'
+    cmsSigLuvKData                          = 0x4C75764B   // 'LuvK'
+
+} cmsColorSpaceSignature;
+
+// ICC Profile Class
+typedef enum {
+    cmsSigInputClass                        = 0x73636E72,  // 'scnr'
+    cmsSigDisplayClass                      = 0x6D6E7472,  // 'mntr'
+    cmsSigOutputClass                       = 0x70727472,  // 'prtr'
+    cmsSigLinkClass                         = 0x6C696E6B,  // 'link'
+    cmsSigAbstractClass                     = 0x61627374,  // 'abst'
+    cmsSigColorSpaceClass                   = 0x73706163,  // 'spac'
+    cmsSigNamedColorClass                   = 0x6e6d636c   // 'nmcl'
+
+} cmsProfileClassSignature;
+
+// ICC Platforms
+typedef enum {
+    cmsSigMacintosh                         = 0x4150504C,  // 'APPL'
+    cmsSigMicrosoft                         = 0x4D534654,  // 'MSFT'
+    cmsSigSolaris                           = 0x53554E57,  // 'SUNW'
+    cmsSigSGI                               = 0x53474920,  // 'SGI '
+    cmsSigTaligent                          = 0x54474E54,  // 'TGNT'
+    cmsSigUnices                            = 0x2A6E6978   // '*nix'   // From argyll -- Not official
+
+} cmsPlatformSignature;
+
+// Reference gamut
+#define  cmsSigPerceptualReferenceMediumGamut         0x70726d67  //'prmg'
+
+// For cmsSigColorimetricIntentImageStateTag
+#define  cmsSigSceneColorimetryEstimates              0x73636F65  //'scoe'
+#define  cmsSigSceneAppearanceEstimates               0x73617065  //'sape'
+#define  cmsSigFocalPlaneColorimetryEstimates         0x66706365  //'fpce'
+#define  cmsSigReflectionHardcopyOriginalColorimetry  0x72686F63  //'rhoc'
+#define  cmsSigReflectionPrintOutputColorimetry       0x72706F63  //'rpoc'
+
+// Multi process elements types
+typedef enum {
+    cmsSigCurveSetElemType              = 0x63767374,  //'cvst'
+    cmsSigMatrixElemType                = 0x6D617466,  //'matf'
+    cmsSigCLutElemType                  = 0x636C7574,  //'clut'
+
+    cmsSigBAcsElemType                  = 0x62414353,  // 'bACS'
+    cmsSigEAcsElemType                  = 0x65414353,  // 'eACS'
+
+    // Custom from here, not in the ICC Spec
+    cmsSigXYZ2LabElemType               = 0x6C327820,  // 'l2x '
+    cmsSigLab2XYZElemType               = 0x78326C20,  // 'x2l '
+    cmsSigNamedColorElemType            = 0x6E636C20,  // 'ncl '
+    cmsSigLabV2toV4                     = 0x32203420,  // '2 4 '
+    cmsSigLabV4toV2                     = 0x34203220,  // '4 2 '
+  
+    // Identities
+    cmsSigIdentityElemType              = 0x69646E20,  // 'idn '
+
+    // Float to floatPCS
+    cmsSigLab2FloatPCS                  = 0x64326C20,  // 'd2l '
+    cmsSigFloatPCS2Lab                  = 0x6C326420,  // 'l2d '
+    cmsSigXYZ2FloatPCS                  = 0x64327820,  // 'd2x '
+    cmsSigFloatPCS2XYZ                  = 0x78326420,  // 'x2d '  
+    cmsSigClipNegativesElemType         = 0x636c7020   // 'clp '
+
+} cmsStageSignature;
+
+// Types of CurveElements
+typedef enum {
+
+    cmsSigFormulaCurveSeg               = 0x70617266, // 'parf'
+    cmsSigSampledCurveSeg               = 0x73616D66, // 'samf'
+    cmsSigSegmentedCurve                = 0x63757266  // 'curf'
+
+} cmsCurveSegSignature;
+
+// Used in ResponseCurveType
+#define  cmsSigStatusA                    0x53746141 //'StaA'
+#define  cmsSigStatusE                    0x53746145 //'StaE'
+#define  cmsSigStatusI                    0x53746149 //'StaI'
+#define  cmsSigStatusT                    0x53746154 //'StaT'
+#define  cmsSigStatusM                    0x5374614D //'StaM'
+#define  cmsSigDN                         0x444E2020 //'DN  '
+#define  cmsSigDNP                        0x444E2050 //'DN P'
+#define  cmsSigDNN                        0x444E4E20 //'DNN '
+#define  cmsSigDNNP                       0x444E4E50 //'DNNP'
+
+// Device attributes, currently defined values correspond to the low 4 bytes
+// of the 8 byte attribute quantity
+#define cmsReflective     0
+#define cmsTransparency   1
+#define cmsGlossy         0
+#define cmsMatte          2
+
+// Common structures in ICC tags
+typedef struct {
+    cmsUInt32Number len;
+    cmsUInt32Number flag;
+    cmsUInt8Number  data[1];
+
+} cmsICCData;
+
+// ICC date time
+typedef struct {
+    cmsUInt16Number      year;
+    cmsUInt16Number      month;
+    cmsUInt16Number      day;
+    cmsUInt16Number      hours;
+    cmsUInt16Number      minutes;
+    cmsUInt16Number      seconds;
+
+} cmsDateTimeNumber;
+
+// ICC XYZ
+typedef struct {
+    cmsS15Fixed16Number  X;
+    cmsS15Fixed16Number  Y;
+    cmsS15Fixed16Number  Z;
+
+} cmsEncodedXYZNumber;
+
+
+// Profile ID as computed by MD5 algorithm
+typedef union {
+    cmsUInt8Number       ID8[16];
+    cmsUInt16Number      ID16[8];
+    cmsUInt32Number      ID32[4];
+
+} cmsProfileID;
+
+
+// ----------------------------------------------------------------------------------------------
+// ICC profile internal base types. Strictly, shouldn't be declared in this header, but maybe
+// somebody want to use this info for accessing profile header directly, so here it is.
+
+// Profile header -- it is 32-bit aligned, so no issues are expected on alignment
+typedef struct {
+    cmsUInt32Number              size;           // Profile size in bytes
+    cmsSignature                 cmmId;          // CMM for this profile
+    cmsUInt32Number              version;        // Format version number
+    cmsProfileClassSignature     deviceClass;    // Type of profile
+    cmsColorSpaceSignature       colorSpace;     // Color space of data
+    cmsColorSpaceSignature       pcs;            // PCS, XYZ or Lab only
+    cmsDateTimeNumber            date;           // Date profile was created
+    cmsSignature                 magic;          // Magic Number to identify an ICC profile
+    cmsPlatformSignature         platform;       // Primary Platform
+    cmsUInt32Number              flags;          // Various bit settings
+    cmsSignature                 manufacturer;   // Device manufacturer
+    cmsUInt32Number              model;          // Device model number
+    cmsUInt64Number              attributes;     // Device attributes
+    cmsUInt32Number              renderingIntent;// Rendering intent
+    cmsEncodedXYZNumber          illuminant;     // Profile illuminant
+    cmsSignature                 creator;        // Profile creator
+    cmsProfileID                 profileID;      // Profile ID using MD5
+    cmsInt8Number                reserved[28];   // Reserved for future use
+
+} cmsICCHeader;
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} cmsTagBase;
+
+// A tag entry in directory
+typedef struct {
+    cmsTagSignature      sig;            // The tag signature
+    cmsUInt32Number      offset;         // Start of tag
+    cmsUInt32Number      size;           // Size in bytes
+
+} cmsTagEntry;
+
+// ----------------------------------------------------------------------------------------------
+
+// Little CMS specific typedefs
+
+typedef void* cmsHANDLE ;              // Generic handle
+typedef void* cmsHPROFILE;             // Opaque typedefs to hide internals
+typedef void* cmsHTRANSFORM;
+
+#define cmsMAXCHANNELS  16                // Maximum number of channels in ICC profiles
+
+// Format of pixel is defined by one cmsUInt32Number, using bit fields as follows
+//
+//                               2                1          0
+//                          3 2 10987 6 5 4 3 2 1 098 7654 321
+//                          A O TTTTT U Y F P X S EEE CCCC BBB
+//
+//            A: Floating point -- With this flag we can differentiate 16 bits as float and as int
+//            O: Optimized -- previous optimization already returns the final 8-bit value
+//            T: Pixeltype
+//            F: Flavor  0=MinIsBlack(Chocolate) 1=MinIsWhite(Vanilla)
+//            P: Planar? 0=Chunky, 1=Planar
+//            X: swap 16 bps endianness?
+//            S: Do swap? ie, BGR, KYMC
+//            E: Extra samples
+//            C: Channels (Samples per pixel)
+//            B: bytes per sample
+//            Y: Swap first - changes ABGR to BGRA and KCMY to CMYK
+
+#define FLOAT_SH(a)            ((a) << 22)
+#define OPTIMIZED_SH(s)        ((s) << 21)
+#define COLORSPACE_SH(s)       ((s) << 16)
+#define SWAPFIRST_SH(s)        ((s) << 14)
+#define FLAVOR_SH(s)           ((s) << 13)
+#define PLANAR_SH(p)           ((p) << 12)
+#define ENDIAN16_SH(e)         ((e) << 11)
+#define DOSWAP_SH(e)           ((e) << 10)
+#define EXTRA_SH(e)            ((e) << 7)
+#define CHANNELS_SH(c)         ((c) << 3)
+#define BYTES_SH(b)            (b)
+
+// These macros unpack format specifiers into integers
+#define T_FLOAT(a)            (((a)>>22)&1)
+#define T_OPTIMIZED(o)        (((o)>>21)&1)
+#define T_COLORSPACE(s)       (((s)>>16)&31)
+#define T_SWAPFIRST(s)        (((s)>>14)&1)
+#define T_FLAVOR(s)           (((s)>>13)&1)
+#define T_PLANAR(p)           (((p)>>12)&1)
+#define T_ENDIAN16(e)         (((e)>>11)&1)
+#define T_DOSWAP(e)           (((e)>>10)&1)
+#define T_EXTRA(e)            (((e)>>7)&7)
+#define T_CHANNELS(c)         (((c)>>3)&15)
+#define T_BYTES(b)            ((b)&7)
+
+
+// Pixel types
+#define PT_ANY       0    // Don't check colorspace
+                          // 1 & 2 are reserved
+#define PT_GRAY      3
+#define PT_RGB       4
+#define PT_CMY       5
+#define PT_CMYK      6
+#define PT_YCbCr     7
+#define PT_YUV       8      // Lu'v'
+#define PT_XYZ       9
+#define PT_Lab       10
+#define PT_YUVK      11     // Lu'v'K
+#define PT_HSV       12
+#define PT_HLS       13
+#define PT_Yxy       14
+
+#define PT_MCH1      15
+#define PT_MCH2      16
+#define PT_MCH3      17
+#define PT_MCH4      18
+#define PT_MCH5      19
+#define PT_MCH6      20
+#define PT_MCH7      21
+#define PT_MCH8      22
+#define PT_MCH9      23
+#define PT_MCH10     24
+#define PT_MCH11     25
+#define PT_MCH12     26
+#define PT_MCH13     27
+#define PT_MCH14     28
+#define PT_MCH15     29
+
+#define PT_LabV2     30     // Identical to PT_Lab, but using the V2 old encoding
+
+// Some (not all!) representations
+
+#ifndef TYPE_RGB_8      // TYPE_RGB_8 is a very common identifier, so don't include ours
+                        // if user has it already defined.
+
+#define TYPE_GRAY_8            (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAY_8_REV        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_GRAY_16           (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAY_16_REV       (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_GRAY_16_SE        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8           (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAYA_16          (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAYA_16_SE       (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8_PLANAR    (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_GRAYA_16_PLANAR   (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|PLANAR_SH(1))
+
+#define TYPE_RGB_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGB_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_BGR_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_RGB_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGB_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGB_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_BGR_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGR_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_RGBA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGBA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_RGBA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGBA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_ARGB_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ARGB_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_ARGB_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+
+#define TYPE_ABGR_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_ABGR_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_ABGR_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_BGRA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_BGRA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMY_8             (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_CMY_8_PLANAR      (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMY_16            (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMY_16_PLANAR     (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMY_16_SE         (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_CMYK_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYKA_8           (COLORSPACE_SH(PT_CMYK)|EXTRA_SH(1)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYK_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_YUVK_8            TYPE_CMYK_8_REV
+#define TYPE_CMYK_8_PLANAR     (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+#define TYPE_CMYK_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_YUVK_16           TYPE_CMYK_16_REV
+#define TYPE_CMYK_16_PLANAR    (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_KYMC_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_KCMY_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMYK5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1))
+#define TYPE_CMYK5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2))
+#define TYPE_CMYK5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK6_8           (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1))
+#define TYPE_CMYK6_8_PLANAR    (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK6_16          (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2))
+#define TYPE_CMYK6_16_PLANAR   (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK6_16_SE       (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_CMYK7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1))
+#define TYPE_CMYK7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2))
+#define TYPE_CMYK7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1))
+#define TYPE_CMYK8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2))
+#define TYPE_CMYK8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1))
+#define TYPE_CMYK9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2))
+#define TYPE_CMYK9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1))
+#define TYPE_CMYK10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2))
+#define TYPE_CMYK10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1))
+#define TYPE_CMYK11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2))
+#define TYPE_CMYK11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1))
+#define TYPE_CMYK12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2))
+#define TYPE_CMYK12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+// Colorimetric
+#define TYPE_XYZ_16            (COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Lab_8             (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_LabV2_8           (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1))
+
+#define TYPE_ALab_8            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ALabV2_8          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_Lab_16            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_LabV2_16          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Yxy_16            (COLORSPACE_SH(PT_Yxy)|CHANNELS_SH(3)|BYTES_SH(2))
+
+// YCbCr
+#define TYPE_YCbCr_8           (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YCbCr_8_PLANAR    (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YCbCr_16          (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YCbCr_16_PLANAR   (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YCbCr_16_SE       (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// YUV
+#define TYPE_YUV_8             (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YUV_8_PLANAR      (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YUV_16            (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YUV_16_PLANAR     (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YUV_16_SE         (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HLS
+#define TYPE_HLS_8             (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HLS_8_PLANAR      (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HLS_16            (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HLS_16_PLANAR     (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HLS_16_SE         (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HSV
+#define TYPE_HSV_8             (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HSV_8_PLANAR      (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HSV_16            (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HSV_16_PLANAR     (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HSV_16_SE         (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// Named color index. Only 16 bits allowed (don't check colorspace)
+#define TYPE_NAMED_COLOR_INDEX (CHANNELS_SH(1)|BYTES_SH(2))
+
+// Float formatters.
+#define TYPE_XYZ_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_Lab_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_LabA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_GRAY_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(4))
+#define TYPE_RGB_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4))
+
+#define TYPE_RGBA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_ARGB_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|SWAPFIRST_SH(1))
+#define TYPE_BGR_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+#define TYPE_BGRA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+
+#define TYPE_CMYK_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(4))
+
+// Floating point formatters.
+// NOTE THAT 'BYTES' FIELD IS SET TO ZERO ON DLB because 8 bytes overflows the bitfield
+#define TYPE_XYZ_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_Lab_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_GRAY_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(0))
+#define TYPE_RGB_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_BGR_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0)|DOSWAP_SH(1))
+#define TYPE_CMYK_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(0))
+
+// IEEE 754-2008 "half"
+#define TYPE_GRAY_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_RGB_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMYK_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_ARGB_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_BGR_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGRA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+
+#endif
+
+// Colorspaces
+typedef struct {
+        cmsFloat64Number X;
+        cmsFloat64Number Y;
+        cmsFloat64Number Z;
+
+    } cmsCIEXYZ;
+
+typedef struct {
+        cmsFloat64Number x;
+        cmsFloat64Number y;
+        cmsFloat64Number Y;
+
+    } cmsCIExyY;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number a;
+        cmsFloat64Number b;
+
+    } cmsCIELab;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsCIELCh;
+
+typedef struct {
+        cmsFloat64Number J;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsJCh;
+
+typedef struct {
+        cmsCIEXYZ  Red;
+        cmsCIEXYZ  Green;
+        cmsCIEXYZ  Blue;
+
+    } cmsCIEXYZTRIPLE;
+
+typedef struct {
+        cmsCIExyY  Red;
+        cmsCIExyY  Green;
+        cmsCIExyY  Blue;
+
+    } cmsCIExyYTRIPLE;
+
+// Illuminant types for structs below
+#define cmsILLUMINANT_TYPE_UNKNOWN 0x0000000
+#define cmsILLUMINANT_TYPE_D50     0x0000001
+#define cmsILLUMINANT_TYPE_D65     0x0000002
+#define cmsILLUMINANT_TYPE_D93     0x0000003
+#define cmsILLUMINANT_TYPE_F2      0x0000004
+#define cmsILLUMINANT_TYPE_D55     0x0000005
+#define cmsILLUMINANT_TYPE_A       0x0000006
+#define cmsILLUMINANT_TYPE_E       0x0000007
+#define cmsILLUMINANT_TYPE_F8      0x0000008
+
+typedef struct {
+        cmsUInt32Number  Observer;    // 0 = unknown, 1=CIE 1931, 2=CIE 1964
+        cmsCIEXYZ        Backing;     // Value of backing
+        cmsUInt32Number  Geometry;    // 0=unknown, 1=45/0, 0/45 2=0d, d/0
+        cmsFloat64Number Flare;       // 0..1.0
+        cmsUInt32Number  IlluminantType;
+
+    } cmsICCMeasurementConditions;
+
+typedef struct {
+        cmsCIEXYZ       IlluminantXYZ;   // Not the same struct as CAM02,
+        cmsCIEXYZ       SurroundXYZ;     // This is for storing the tag
+        cmsUInt32Number IlluminantType;  // viewing condition
+
+    } cmsICCViewingConditions;
+
+// Get LittleCMS version (for shared objects) -----------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsGetEncodedCMMversion(void);
+
+// Support of non-standard functions --------------------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2);
+CMSAPI long int          CMSEXPORT cmsfilelength(FILE* f);
+
+
+// Context handling --------------------------------------------------------------------------------------------------------
+
+// Each context holds its owns globals and its own plug-ins. There is a global context with the id = 0 for lecacy compatibility
+// though using the global context is not recommended. Proper context handling makes lcms more thread-safe.
+
+typedef struct _cmsContext_struct* cmsContext;
+
+CMSAPI cmsContext       CMSEXPORT cmsCreateContext(void* Plugin, void* UserData);
+CMSAPI void             CMSEXPORT cmsDeleteContext(cmsContext ContexID);
+CMSAPI cmsContext       CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData);
+CMSAPI void*            CMSEXPORT cmsGetContextUserData(cmsContext ContextID);
+
+// Plug-In registering  --------------------------------------------------------------------------------------------------
+
+CMSAPI cmsBool           CMSEXPORT cmsPlugin(void* Plugin);
+CMSAPI cmsBool           CMSEXPORT cmsPluginTHR(cmsContext ContextID, void* Plugin);
+CMSAPI void              CMSEXPORT cmsUnregisterPlugins(void);
+CMSAPI void              CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID);
+
+// Error logging ----------------------------------------------------------------------------------------------------------
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other may return FALSE.
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function will get
+// an ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user if you wish, or just create some sort of log on disk.
+// The logging function should NOT terminate the program, as this obviously can leave
+// unfreed resources. It is the programmer's responsibility to check each function
+// return code to make sure it didn't fail.
+
+#define cmsERROR_UNDEFINED                    0
+#define cmsERROR_FILE                         1
+#define cmsERROR_RANGE                        2
+#define cmsERROR_INTERNAL                     3
+#define cmsERROR_NULL                         4
+#define cmsERROR_READ                         5
+#define cmsERROR_SEEK                         6
+#define cmsERROR_WRITE                        7
+#define cmsERROR_UNKNOWN_EXTENSION            8
+#define cmsERROR_COLORSPACE_CHECK             9
+#define cmsERROR_ALREADY_DEFINED              10
+#define cmsERROR_BAD_SIGNATURE                11
+#define cmsERROR_CORRUPTION_DETECTED          12
+#define cmsERROR_NOT_SUITABLE                 13
+
+// Error logger is called with the ContextID when a message is raised. This gives the
+// chance to know which thread is responsible of the warning and any environment associated
+// with it. Non-multithreading applications may safely ignore this parameter.
+// Note that under certain special circumstances, ContextID may be NULL.
+typedef void  (* cmsLogErrorHandlerFunction)(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Allows user to set any specific logger
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn);
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn);
+
+// Conversions --------------------------------------------------------------------------------------------------------------
+
+// Returns pointers to constant structs
+CMSAPI const cmsCIEXYZ*  CMSEXPORT cmsD50_XYZ(void);
+CMSAPI const cmsCIExyY*  CMSEXPORT cmsD50_xyY(void);
+
+// Colorimetric space conversions
+CMSAPI void              CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source);
+CMSAPI void              CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source);
+CMSAPI void              CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz);
+CMSAPI void              CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLab2LCh(cmsCIELCh*LCh, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh);
+
+// Encoding /Decoding on PCS
+CMSAPI void              CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fxyz, const cmsUInt16Number XYZ[3]);
+CMSAPI void              CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ);
+
+// DeltaE metrics
+CMSAPI cmsFloat64Number  CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh);
+
+// Temperature <-> Chromaticity (Black body)
+CMSAPI cmsBool           CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number  TempK);
+CMSAPI cmsBool           CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint);
+
+// Chromatic adaptation
+CMSAPI cmsBool           CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result, const cmsCIEXYZ* SourceWhitePt,
+                                                                           const cmsCIEXYZ* Illuminant,
+                                                                           const cmsCIEXYZ* Value);
+
+// CIECAM02 ---------------------------------------------------------------------------------------------------
+
+// Viewing conditions. Please note those are CAM model viewing conditions, and not the ICC tag viewing
+// conditions, which I'm naming cmsICCViewingConditions to make differences evident. Unfortunately, the tag
+// cannot deal with surround La, Yb and D value so is basically useless to store CAM02 viewing conditions.
+
+
+#define AVG_SURROUND       1
+#define DIM_SURROUND       2
+#define DARK_SURROUND      3
+#define CUTSHEET_SURROUND  4
+
+#define D_CALCULATE        (-1)
+
+typedef struct {
+    cmsCIEXYZ        whitePoint;
+    cmsFloat64Number Yb;
+    cmsFloat64Number La;
+    cmsUInt32Number  surround;
+    cmsFloat64Number D_value;
+
+    } cmsViewingConditions;
+
+CMSAPI cmsHANDLE         CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC);
+CMSAPI void              CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel);
+CMSAPI void              CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut);
+CMSAPI void              CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn,    cmsCIEXYZ* pOut);
+
+
+// Tone curves -----------------------------------------------------------------------------------------
+
+// This describes a curve segment. For a table of supported types, see the manual. User can increase the number of
+// available types by using a proper plug-in. Parametric segments allow 10 parameters at most
+
+typedef struct {
+    cmsFloat32Number   x0, x1;           // Domain; for x0 < x <= x1
+    cmsInt32Number     Type;             // Parametric type, Type == 0 means sampled segment. Negative values are reserved
+    cmsFloat64Number   Params[10];       // Parameters if Type != 0
+    cmsUInt32Number    nGridPoints;      // Number of grid points if Type == 0
+    cmsFloat32Number*  SampledPoints;    // Points to an array of floats if Type == 0
+
+} cmsCurveSegment;
+
+// The internal representation is none of your business.
+typedef struct _cms_curve_struct cmsToneCurve;
+
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID, cmsUInt32Number nSegments, const cmsCurveSegment Segments[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number values[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[]);
+CMSAPI void              CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve);
+CMSAPI void              CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsDupToneCurve(const cmsToneCurve* Src);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsJoinToneCurve(cmsContext ContextID, const cmsToneCurve* X,  const cmsToneCurve* Y, cmsUInt32Number nPoints);
+CMSAPI cmsBool           CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda);
+CMSAPI cmsFloat32Number  CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v);
+CMSAPI cmsUInt16Number   CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* InGamma);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision);
+
+// Tone curve tabular estimation
+CMSAPI cmsUInt32Number         CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t);
+CMSAPI const cmsUInt16Number*  CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t);
+
+
+// Implements pipelines of multi-processing elements -------------------------------------------------------------
+
+// Nothing to see here, move along
+typedef struct _cmsPipeline_struct cmsPipeline;
+typedef struct _cmsStage_struct cmsStage;
+
+// Those are hi-level pipelines
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels);
+CMSAPI void              CMSEXPORT cmsPipelineFree(cmsPipeline* lut);
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineDup(const cmsPipeline* Orig);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut);
+
+CMSAPI void              CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[], const cmsPipeline* lut);
+CMSAPI void              CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[], cmsFloat32Number Result[], cmsFloat32Number Hint[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On);
+
+// Where to place/locate the stages in the pipeline chain
+typedef enum { cmsAT_BEGIN, cmsAT_END } cmsStageLoc;
+
+CMSAPI cmsBool           CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe);
+
+// This function is quite useful to analyze the structure of a Pipeline and retrieve the Stage elements
+// that conform the Pipeline. It should be called with the Pipeline, the number of expected elements and
+// then a list of expected types followed with a list of double pointers to Stage elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything.
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...);
+
+// Matrix has double precision and CLUT has only float precision. That is because an ICC profile can encode
+// matrices with far more precision that CLUTS
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[]);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols, const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageDup(cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsStageFree(cmsStage* mpe);
+CMSAPI cmsStage*         CMSEXPORT cmsStageNext(const cmsStage* mpe);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageInputChannels(const cmsStage* mpe);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe);
+CMSAPI cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe);
+CMSAPI void*             CMSEXPORT cmsStageData(const cmsStage* mpe);
+
+// Sampling
+typedef cmsInt32Number (* cmsSAMPLER16)   (CMSREGISTER const cmsUInt16Number In[],
+                                           CMSREGISTER cmsUInt16Number Out[],
+                                           CMSREGISTER void * Cargo);
+
+typedef cmsInt32Number (* cmsSAMPLERFLOAT)(CMSREGISTER const cmsFloat32Number In[],
+                                           CMSREGISTER cmsFloat32Number Out[],
+                                           CMSREGISTER void * Cargo);
+
+// Use this flag to prevent changes being written to destination
+#define SAMPLER_INSPECT     0x01000000
+
+// For CLUT only
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe,    cmsSAMPLER16 Sampler, void* Cargo, cmsUInt32Number dwFlags);
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void* Cargo, cmsUInt32Number dwFlags);
+
+// Slicers
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLER16 Sampler, void * Cargo);
+
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLERFLOAT Sampler, void * Cargo);
+
+// Multilocalized Unicode management ---------------------------------------------------------------------------------------
+
+typedef struct _cms_MLU_struct cmsMLU;
+
+#define  cmsNoLanguage "\0\0"
+#define  cmsNoCountry  "\0\0"
+
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems);
+CMSAPI void              CMSEXPORT cmsMLUfree(cmsMLU* mlu);
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUdup(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const char* ASCIIString);
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetWide(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const wchar_t* WideString);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  char* Buffer,    cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                                 const char LanguageCode[3], const char CountryCode[3],
+                                                 wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                                         const char LanguageCode[3], const char CountryCode[3],
+                                                         char ObtainedLanguage[3], char ObtainedCountry[3]);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                                             cmsUInt32Number idx,
+                                                             char LanguageCode[3],
+                                                             char CountryCode[3]);
+ 
+// Undercolorremoval & black generation -------------------------------------------------------------------------------------
+
+typedef struct {
+        cmsToneCurve* Ucr;
+        cmsToneCurve* Bg;
+        cmsMLU*       Desc;
+
+} cmsUcrBg;
+
+// Screening ----------------------------------------------------------------------------------------------------------------
+
+#define cmsPRINTER_DEFAULT_SCREENS     0x0001
+#define cmsFREQUENCE_UNITS_LINES_CM    0x0000
+#define cmsFREQUENCE_UNITS_LINES_INCH  0x0002
+
+#define cmsSPOT_UNKNOWN         0
+#define cmsSPOT_PRINTER_DEFAULT 1
+#define cmsSPOT_ROUND           2
+#define cmsSPOT_DIAMOND         3
+#define cmsSPOT_ELLIPSE         4
+#define cmsSPOT_LINE            5
+#define cmsSPOT_SQUARE          6
+#define cmsSPOT_CROSS           7
+
+typedef struct {
+    cmsFloat64Number  Frequency;
+    cmsFloat64Number  ScreenAngle;
+    cmsUInt32Number   SpotShape;
+
+} cmsScreeningChannel;
+
+typedef struct {
+    cmsUInt32Number Flag;
+    cmsUInt32Number nChannels;
+    cmsScreeningChannel Channels[cmsMAXCHANNELS];
+
+} cmsScreening;
+
+
+// Named color -----------------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_NAMEDCOLORLIST_struct cmsNAMEDCOLORLIST;
+
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID,
+                                                           cmsUInt32Number n,
+                                                           cmsUInt32Number ColorantCount,
+                                                           const char* Prefix, const char* Suffix);
+
+CMSAPI void               CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v);
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsBool            CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* v, const char* Name,
+                                                            cmsUInt16Number PCS[3],
+                                                            cmsUInt16Number Colorant[cmsMAXCHANNELS]);
+
+CMSAPI cmsUInt32Number    CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsInt32Number     CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* v, const char* Name);
+
+CMSAPI cmsBool            CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                                      char* Name,
+                                                      char* Prefix,
+                                                      char* Suffix,
+                                                      cmsUInt16Number* PCS,
+                                                      cmsUInt16Number* Colorant);
+
+// Retrieve named color list from transform
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform);
+
+// Profile sequence -----------------------------------------------------------------------------------------------------
+
+// Profile sequence descriptor. Some fields come from profile sequence descriptor tag, others
+// come from Profile Sequence Identifier Tag
+typedef struct {
+
+    cmsSignature           deviceMfg;
+    cmsSignature           deviceModel;
+    cmsUInt64Number        attributes;
+    cmsTechnologySignature technology;
+    cmsProfileID           ProfileID;
+    cmsMLU*                Manufacturer;
+    cmsMLU*                Model;
+    cmsMLU*                Description;
+
+} cmsPSEQDESC;
+
+typedef struct {
+
+    cmsUInt32Number n;
+    cmsContext      ContextID;
+    cmsPSEQDESC*    seq;
+
+} cmsSEQ;
+
+CMSAPI cmsSEQ*           CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n);
+CMSAPI cmsSEQ*           CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq);
+CMSAPI void              CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq);
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+typedef struct _cmsDICTentry_struct {
+
+    struct _cmsDICTentry_struct* Next;
+
+    cmsMLU *DisplayName;
+    cmsMLU *DisplayValue;
+    wchar_t* Name;
+    wchar_t* Value;
+
+} cmsDICTentry;
+
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictAlloc(cmsContext ContextID);
+CMSAPI void                CMSEXPORT cmsDictFree(cmsHANDLE hDict);
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictDup(cmsHANDLE hDict);
+
+CMSAPI cmsBool             CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e);
+
+// Access to Profile data ----------------------------------------------------------------------------------------------
+CMSAPI cmsHPROFILE       CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile);
+CMSAPI cmsTagSignature   CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write pre-formatted data
+CMSAPI void*             CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+CMSAPI cmsBool           CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data);
+CMSAPI cmsBool           CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest);
+CMSAPI cmsTagSignature   CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write raw data
+CMSAPI cmsUInt32Number   CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* Buffer, cmsUInt32Number BufferSize);
+CMSAPI cmsBool           CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size);
+
+// Access header data
+#define cmsEmbeddedProfileFalse    0x00000000
+#define cmsEmbeddedProfileTrue     0x00000001
+#define cmsUseAnywhere             0x00000000
+#define cmsUseWithEmbeddedDataOnly 0x00000002
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags);
+CMSAPI void              CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI cmsBool           CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile);
+
+CMSAPI void              CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model);
+CMSAPI void              CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags);
+CMSAPI void              CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI void              CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent);
+
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs);
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig);
+CMSAPI cmsProfileClassSignature
+                         CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig);
+CMSAPI void              CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version);
+
+// How profiles may be used
+#define LCMS_USED_AS_INPUT      0
+#define LCMS_USED_AS_OUTPUT     1
+#define LCMS_USED_AS_PROOF      2
+
+CMSAPI cmsBool           CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+CMSAPI cmsBool           CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+
+// Translate form/to our notation to ICC
+CMSAPI cmsColorSpaceSignature   CMSEXPORT _cmsICCcolorSpace(int OurNotation);
+CMSAPI int                      CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace);
+
+// Build a suitable formatter for the colorspace of this profile. nBytes=1 means 8 bits, nBytes=2 means 16 bits. 
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+
+
+// Localized info
+typedef enum {
+             cmsInfoDescription  = 0,
+             cmsInfoManufacturer = 1,
+             cmsInfoModel        = 2,
+             cmsInfoCopyright    = 3
+} cmsInfoType;
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            char* Buffer, cmsUInt32Number BufferSize);
+
+// IO handlers ----------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_io_handler cmsIOHANDLER;
+
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io);
+
+// MD5 message digest --------------------------------------------------------------------------------------------------
+
+CMSAPI cmsBool           CMSEXPORT cmsMD5computeID(cmsHPROFILE hProfile);
+
+// Profile high level functions ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMem(const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write);
+CMSAPI cmsBool          CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile);
+
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io);
+
+// Predefined virtual profiles ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                                   const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                                    const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                                cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                              cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                             cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfile(void);
+
+// Converts a transform to a devicelink profile
+CMSAPI cmsHPROFILE      CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags);
+
+// Intents ----------------------------------------------------------------------------------------------
+
+// ICC Intents
+#define INTENT_PERCEPTUAL                              0
+#define INTENT_RELATIVE_COLORIMETRIC                   1
+#define INTENT_SATURATION                              2
+#define INTENT_ABSOLUTE_COLORIMETRIC                   3
+
+// Non-ICC intents
+#define INTENT_PRESERVE_K_ONLY_PERCEPTUAL             10
+#define INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC  11
+#define INTENT_PRESERVE_K_ONLY_SATURATION             12
+#define INTENT_PRESERVE_K_PLANE_PERCEPTUAL            13
+#define INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC 14
+#define INTENT_PRESERVE_K_PLANE_SATURATION            15
+
+// Call with NULL as parameters to get the intent count
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+
+// Flags
+
+#define cmsFLAGS_NOCACHE                  0x0040    // Inhibit 1-pixel cache
+#define cmsFLAGS_NOOPTIMIZE               0x0100    // Inhibit optimizations
+#define cmsFLAGS_NULLTRANSFORM            0x0200    // Don't transform anyway
+
+// Proofing flags
+#define cmsFLAGS_GAMUTCHECK               0x1000    // Out of Gamut alarm
+#define cmsFLAGS_SOFTPROOFING             0x4000    // Do softproofing
+
+// Misc
+#define cmsFLAGS_BLACKPOINTCOMPENSATION   0x2000
+#define cmsFLAGS_NOWHITEONWHITEFIXUP      0x0004    // Don't fix scum dot
+#define cmsFLAGS_HIGHRESPRECALC           0x0400    // Use more memory to give better accuracy
+#define cmsFLAGS_LOWRESPRECALC            0x0800    // Use less memory to minimize resources
+
+// For devicelink creation
+#define cmsFLAGS_8BITS_DEVICELINK         0x0008   // Create 8 bits devicelinks
+#define cmsFLAGS_GUESSDEVICECLASS         0x0020   // Guess device class (for transform2devicelink)
+#define cmsFLAGS_KEEP_SEQUENCE            0x0080   // Keep profile sequence for devicelink creation
+
+// Specific to a particular optimizations
+#define cmsFLAGS_FORCE_CLUT               0x0002    // Force CLUT optimization
+#define cmsFLAGS_CLUT_POST_LINEARIZATION  0x0001    // create postlinearization tables if possible
+#define cmsFLAGS_CLUT_PRE_LINEARIZATION   0x0010    // create prelinearization tables if possible
+
+// Specific to unbounded mode
+#define cmsFLAGS_NONEGATIVES              0x8000    // Prevent negative numbers in floating point transforms
+
+// Copy alpha channels when transforming           
+#define cmsFLAGS_COPY_ALPHA               0x04000000 // Alpha channels are copied on cmsDoTransform()
+
+// Fine-tune control over number of gridpoints
+#define cmsFLAGS_GRIDPOINTS(n)           (((n) & 0xFF) << 16)
+
+// CRD special
+#define cmsFLAGS_NODEFAULTRESOURCEDEF     0x01000000
+
+// Transforms ---------------------------------------------------------------------------------------------------
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags);
+
+CMSAPI void             CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform);
+
+CMSAPI void             CMSEXPORT cmsDoTransform(cmsHTRANSFORM Transform,
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size);
+
+CMSAPI void             CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM Transform,   // Deprecated
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size,
+                                                 cmsUInt32Number Stride);
+
+CMSAPI void             CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                                                 const void* InputBuffer,
+                                                 void* OutputBuffer,
+                                                 cmsUInt32Number PixelsPerLine,
+                                                 cmsUInt32Number LineCount,
+                                                 cmsUInt32Number BytesPerLineIn,
+                                                 cmsUInt32Number BytesPerLineOut,
+                                                 cmsUInt32Number BytesPerPlaneIn,
+                                                 cmsUInt32Number BytesPerPlaneOut);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, 
+                                                          const cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, 
+                                                          cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+
+
+
+// Adaptation state for absolute colorimetric intent
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d);
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d);
+
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+CMSAPI cmsContext       CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform);
+
+// Grab the input/output formats
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform);
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform);
+
+// For backwards compatibility
+CMSAPI cmsBool          CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                                         cmsUInt32Number InputFormat,
+                                                         cmsUInt32Number OutputFormat);
+
+
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray ----------------------------------------------------
+
+typedef enum { cmsPS_RESOURCE_CSA, cmsPS_RESOURCE_CRD } cmsPSResourceType;
+
+// lcms2 unified method to access postscript color resources
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                                cmsPSResourceType Type,
+                                                                cmsHPROFILE hProfile,
+                                                                cmsUInt32Number Intent,
+                                                                cmsUInt32Number dwFlags,
+                                                                cmsIOHANDLER* io);
+
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8Alloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsIT8Free(cmsHANDLE hIT8);
+
+// Tables
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8);
+CMSAPI cmsInt32Number   CMSEXPORT cmsIT8SetTable(cmsHANDLE hIT8, cmsUInt32Number nTable);
+
+// Persistence
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName);
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len);
+// CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromIOhandler(cmsContext ContextID, cmsIOHANDLER* io);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded);
+
+// Properties
+CMSAPI const char*      CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* cComment);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* cProp, const char *Str);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer);
+
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* cProp);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp);
+CMSAPI const char*      CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames);
+
+// Datasets
+CMSAPI const char*      CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col,
+                                                const char* Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col,
+                                                cmsFloat64Number Val);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                const char *Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                cmsFloat64Number Val);
+
+CMSAPI int              CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE hIT8, int n, const char *Sample);
+CMSAPI int              CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer);
+CMSAPI int              CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch);
+
+// The LABEL extension
+CMSAPI int              CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample);
+
+// Formatter for double
+CMSAPI void             CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter);
+
+// Gamut boundary description routines ------------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsGBDAlloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsGBDFree(cmsHANDLE hGBD);
+CMSAPI cmsBool          CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCompute(cmsHANDLE  hGDB, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+
+// Feature detection  ----------------------------------------------------------------------------------------------
+
+// Estimate the black point
+CMSAPI cmsBool          CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+
+// Estimate total area coverage
+CMSAPI cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile);
+
+
+// Poor man's gamut mapping
+CMSAPI cmsBool          CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                                   double amax, double amin,
+                                                   double bmax, double bmin);
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms2_H
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/include/lcms2_plugin.h b/third-party/libjxl/libjxl/third_party/lcms/include/lcms2_plugin.h
new file mode 100644
index 0000000000..0d32191c42
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/include/lcms2_plugin.h
@@ -0,0 +1,671 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// This is the plug-in header file. Normal LittleCMS clients should not use it.
+// It is provided for plug-in writters that may want to access the support
+// functions to do low level operations. All plug-in related structures
+// are defined here. Including this file forces to include the standard API too.
+
+#ifndef _lcms_plugin_H
+
+// Deal with Microsoft's attempt at deprecating C standard runtime functions
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#      ifndef _CRT_SECURE_NO_DEPRECATE
+#        define _CRT_SECURE_NO_DEPRECATE
+#      endif
+#      ifndef _CRT_SECURE_NO_WARNINGS
+#        define _CRT_SECURE_NO_WARNINGS
+#      endif
+#    endif
+#endif
+
+#ifndef _lcms2_H
+#include "lcms2.h"
+#endif
+
+// We need some standard C functions.
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <string.h>
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Vector & Matrix operations -----------------------------------------------------------------------
+
+// Axis of the matrix/array. No specific meaning at all.
+#define VX      0
+#define VY      1
+#define VZ      2
+
+// Vectors
+typedef struct {
+    cmsFloat64Number n[3];
+
+    } cmsVEC3;
+
+// 3x3 Matrix
+typedef struct {
+    cmsVEC3 v[3];
+
+    } cmsMAT3;
+
+CMSAPI void               CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z);
+CMSAPI void               CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3length(const cmsVEC3* a);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b);
+
+CMSAPI void               CMSEXPORT _cmsMAT3identity(cmsMAT3* a);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a);
+CMSAPI void               CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v);
+
+
+// MD5 low level  -------------------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE          CMSEXPORT cmsMD5alloc(cmsContext ContextID);
+CMSAPI void               CMSEXPORT cmsMD5add(cmsHANDLE Handle, const cmsUInt8Number* buf, cmsUInt32Number len);
+CMSAPI void               CMSEXPORT cmsMD5finish(cmsProfileID* ProfileID, cmsHANDLE Handle);
+
+// Error logging  -------------------------------------------------------------------------------------
+
+CMSAPI void               CMSEXPORT  cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...);
+
+// Memory management ----------------------------------------------------------------------------------
+
+CMSAPI void*              CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+CMSAPI void               CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr);
+CMSAPI void*              CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+// I/O handler ----------------------------------------------------------------------------------
+
+struct _cms_io_handler {
+
+    void* stream;   // Associated stream, which is implemented differently depending on media.
+
+    cmsContext        ContextID;
+    cmsUInt32Number   UsedSpace;
+    cmsUInt32Number   ReportedSize;
+    char              PhysicalFile[cmsMAX_PATH];
+
+    cmsUInt32Number   (* Read)(struct _cms_io_handler* iohandler, void *Buffer,
+                                                                  cmsUInt32Number size,
+                                                                  cmsUInt32Number count);
+    cmsBool           (* Seek)(struct _cms_io_handler* iohandler, cmsUInt32Number offset);
+    cmsBool           (* Close)(struct _cms_io_handler* iohandler);
+    cmsUInt32Number   (* Tell)(struct _cms_io_handler* iohandler);
+    cmsBool           (* Write)(struct _cms_io_handler* iohandler, cmsUInt32Number size,
+                                                                   const void* Buffer);
+};
+
+// Endianness adjust functions
+CMSAPI cmsUInt16Number   CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word);
+CMSAPI cmsUInt32Number   CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number Value);
+CMSAPI void              CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord);
+
+// Helper IO functions
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io,  cmsUInt8Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array);
+
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array);
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} _cmsTagBase;
+
+// Type base helper functions
+CMSAPI cmsTagTypeSignature  CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io);
+CMSAPI cmsBool              CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig);
+
+// Alignment functions
+CMSAPI cmsBool             CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io);
+CMSAPI cmsBool             CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io);
+
+// To deal with text streams. 2K at most
+CMSAPI cmsBool             CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...);
+
+// Fixed point helper functions
+CMSAPI cmsFloat64Number    CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8);
+CMSAPI cmsUInt16Number     CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val);
+
+CMSAPI cmsFloat64Number    CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32);
+CMSAPI cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v);
+
+// Date/time helper functions
+CMSAPI void                CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source);
+CMSAPI void                CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Shared callbacks for user data
+typedef void     (* _cmsFreeUserDataFn)(cmsContext ContextID, void* Data);
+typedef void*    (* _cmsDupUserDataFn)(cmsContext ContextID, const void* Data);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Plug-in foundation
+#define cmsPluginMagicNumber                 0x61637070     // 'acpp'
+
+#define cmsPluginMemHandlerSig               0x6D656D48     // 'memH'
+#define cmsPluginInterpolationSig            0x696E7048     // 'inpH'
+#define cmsPluginParametricCurveSig          0x70617248     // 'parH'
+#define cmsPluginFormattersSig               0x66726D48     // 'frmH
+#define cmsPluginTagTypeSig                  0x74797048     // 'typH'
+#define cmsPluginTagSig                      0x74616748     // 'tagH'
+#define cmsPluginRenderingIntentSig          0x696E7448     // 'intH'
+#define cmsPluginMultiProcessElementSig      0x6D706548     // 'mpeH'
+#define cmsPluginOptimizationSig             0x6F707448     // 'optH'
+#define cmsPluginTransformSig                0x7A666D48     // 'xfmH'
+#define cmsPluginMutexSig                    0x6D747A48     // 'mtxH'
+
+typedef struct _cmsPluginBaseStruct {
+
+        cmsUInt32Number                Magic;               // 'acpp' signature
+        cmsUInt32Number                ExpectedVersion;     // Expected version of LittleCMS
+        cmsUInt32Number                Type;                // Type of plug-in
+        struct _cmsPluginBaseStruct*   Next;                // For multiple plugin definition. NULL for end of list.
+
+} cmsPluginBase;
+
+// Maximum number of types in a plugin array
+#define MAX_TYPES_IN_LCMS_PLUGIN    20
+
+//----------------------------------------------------------------------------------------------------------
+
+// Memory handler. Each new plug-in type replaces current behaviour
+
+typedef void* (* _cmsMallocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void  (* _cmsFreeFnPtrType)(cmsContext ContextID, void *Ptr);
+typedef void* (* _cmsReallocFnPtrType)(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+
+typedef void* (* _cmsMalloZerocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void* (* _cmsCallocFnPtrType)(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+typedef void* (* _cmsDupFnPtrType)(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+typedef struct {
+
+        cmsPluginBase base;
+
+        // Required
+        _cmsMallocFnPtrType  MallocPtr;
+        _cmsFreeFnPtrType    FreePtr;
+        _cmsReallocFnPtrType ReallocPtr;
+
+        // Optional
+       _cmsMalloZerocFnPtrType MallocZeroPtr;
+       _cmsCallocFnPtrType     CallocPtr;
+       _cmsDupFnPtrType        DupPtr;
+
+} cmsPluginMemHandler;
+
+
+// ------------------------------------------------------------------------------------------------------------------
+
+// Interpolation. 16 bits and floating point versions.
+struct _cms_interp_struc;
+
+// Interpolation callbacks
+
+// 16 bits forward interpolation. This function performs precision-limited linear interpolation
+// and is supposed to be quite fast. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFn16)(CMSREGISTER const cmsUInt16Number Input[],
+                                CMSREGISTER cmsUInt16Number Output[],
+                                CMSREGISTER const struct _cms_interp_struc* p);
+
+// Floating point forward interpolation. Full precision interpolation using floats. This is not a
+// time critical function. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFnFloat)(cmsFloat32Number const Input[],
+                                   cmsFloat32Number Output[],
+                                   const struct _cms_interp_struc* p);
+
+
+
+// This type holds a pointer to an interpolator that can be either 16 bits or float
+typedef union {
+    _cmsInterpFn16       Lerp16;            // Forward interpolation in 16 bits
+    _cmsInterpFnFloat    LerpFloat;         // Forward interpolation in floating point
+} cmsInterpFunction;
+
+// Flags for interpolator selection
+#define CMS_LERP_FLAGS_16BITS             0x0000        // The default
+#define CMS_LERP_FLAGS_FLOAT              0x0001        // Requires different implementation
+#define CMS_LERP_FLAGS_TRILINEAR          0x0100        // Hint only
+
+
+#define MAX_INPUT_DIMENSIONS 8
+
+typedef struct _cms_interp_struc {  // Used on all interpolations. Supplied by lcms2 when calling the interpolation function
+
+    cmsContext ContextID;     // The calling thread
+
+    cmsUInt32Number dwFlags;  // Keep original flags
+    cmsUInt32Number nInputs;  // != 1 only in 3D interpolation
+    cmsUInt32Number nOutputs; // != 1 only in 3D interpolation
+
+    cmsUInt32Number nSamples[MAX_INPUT_DIMENSIONS];  // Valid on all kinds of tables
+    cmsUInt32Number Domain[MAX_INPUT_DIMENSIONS];    // Domain = nSamples - 1
+
+    cmsUInt32Number opta[MAX_INPUT_DIMENSIONS];     // Optimization for 3D CLUT. This is the number of nodes premultiplied for each
+                                                    // dimension. For example, in 7 nodes, 7, 7^2 , 7^3, 7^4, etc. On non-regular
+                                                    // Samplings may vary according of the number of nodes for each dimension.
+
+    const void *Table;                // Points to the actual interpolation table
+    cmsInterpFunction Interpolation;  // Points to the function to do the interpolation
+
+ } cmsInterpParams;
+
+// Interpolators factory
+typedef cmsInterpFunction (* cmsInterpFnFactory)(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// The plug-in
+typedef struct {
+    cmsPluginBase base;
+
+    // Points to a user-supplied function which implements the factory
+    cmsInterpFnFactory InterpolatorsFactory;
+
+} cmsPluginInterpolation;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Parametric curves. A negative type means same function but analytically inverted. Max. number of params is 10
+
+// Evaluator callback for user-supplied parametric curves. May implement more than one type
+typedef  cmsFloat64Number (* cmsParametricCurveEvaluator)(cmsInt32Number Type, const cmsFloat64Number Params[10], cmsFloat64Number R);
+
+// Plug-in may implement an arbitrary number of parametric curves
+typedef struct {
+    cmsPluginBase base;
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions
+    cmsUInt32Number FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator    Evaluator;                       // The evaluator
+
+} cmsPluginParametricCurves;
+//----------------------------------------------------------------------------------------------------------
+
+// Formatters. This plug-in adds new handlers, replacing them if they already exist. Formatters dealing with
+// cmsFloat32Number (bps = 4) or double (bps = 0) types are requested via FormatterFloat callback. Others come across
+// Formatter16 callback
+
+struct _cmstransform_struct;
+
+typedef cmsUInt8Number* (* cmsFormatter16)(CMSREGISTER struct _cmstransform_struct* CMMcargo,
+                                           CMSREGISTER cmsUInt16Number Values[],
+                                           CMSREGISTER cmsUInt8Number* Buffer,
+                                           CMSREGISTER cmsUInt32Number Stride);
+
+typedef cmsUInt8Number* (* cmsFormatterFloat)(struct _cmstransform_struct* CMMcargo,
+                                              cmsFloat32Number Values[],
+                                              cmsUInt8Number*  Buffer,
+                                              cmsUInt32Number  Stride);
+
+// This type holds a pointer to a formatter that can be either 16 bits or cmsFloat32Number
+typedef union {
+    cmsFormatter16    Fmt16;
+    cmsFormatterFloat FmtFloat;
+
+} cmsFormatter;
+
+#define CMS_PACK_FLAGS_16BITS       0x0000
+#define CMS_PACK_FLAGS_FLOAT        0x0001
+
+typedef enum { cmsFormatterInput=0, cmsFormatterOutput=1 } cmsFormatterDirection;
+
+typedef cmsFormatter (* cmsFormatterFactory)(cmsUInt32Number Type,           // Specific type, i.e. TYPE_RGB_8
+                                             cmsFormatterDirection Dir,
+                                             cmsUInt32Number dwFlags);      // precision
+
+// Plug-in may implement an arbitrary number of formatters
+typedef struct {
+    cmsPluginBase          base;
+    cmsFormatterFactory    FormattersFactory;
+
+} cmsPluginFormatters;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Tag type handler. Each type is free to return anything it wants, and it is up to the caller to
+// know in advance what is the type contained in the tag.
+typedef struct _cms_typehandler_struct {
+
+        cmsTagTypeSignature Signature;     // The signature of the type
+
+        // Allocates and reads items
+        void *   (* ReadPtr)(struct _cms_typehandler_struct* self,
+                             cmsIOHANDLER*      io,
+                             cmsUInt32Number*   nItems,
+                             cmsUInt32Number    SizeOfTag);
+
+        // Writes n Items
+        cmsBool  (* WritePtr)(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER*     io,
+                              void*             Ptr,
+                              cmsUInt32Number   nItems);
+
+        // Duplicate an item or array of items
+        void*   (* DupPtr)(struct _cms_typehandler_struct* self,
+                           const void *Ptr,
+                           cmsUInt32Number n);
+
+        // Free all resources
+        void    (* FreePtr)(struct _cms_typehandler_struct* self,
+                            void *Ptr);
+
+        // Additional parameters used by the calling thread
+        cmsContext       ContextID;
+        cmsUInt32Number  ICCVersion;
+
+} cmsTagTypeHandler;
+
+// Each plug-in implements a single type
+typedef struct {
+        cmsPluginBase      base;
+        cmsTagTypeHandler  Handler;
+
+} cmsPluginTagType;
+
+//----------------------------------------------------------------------------------------------------------
+
+// This is the tag plugin, which identifies tags. For writing, a pointer to function is provided.
+// This function should return the desired type for this tag, given the version of profile
+// and the data being serialized.
+typedef struct {
+
+    cmsUInt32Number     ElemCount;          // If this tag needs an array, how many elements should keep
+
+    // For reading.
+    cmsUInt32Number     nSupportedTypes;    // In how many types this tag can come (MAX_TYPES_IN_LCMS_PLUGIN maximum)
+    cmsTagTypeSignature SupportedTypes[MAX_TYPES_IN_LCMS_PLUGIN];
+
+    // For writing
+    cmsTagTypeSignature (* DecideType)(cmsFloat64Number ICCVersion, const void *Data);
+
+} cmsTagDescriptor;
+
+// Plug-in implements a single tag
+typedef struct {
+    cmsPluginBase    base;
+
+    cmsTagSignature  Signature;
+    cmsTagDescriptor Descriptor;
+
+} cmsPluginTag;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Custom intents. This function should join all profiles specified in the array in
+// a single LUT. Any custom intent in the chain redirects to custom function. If more than
+// one custom intent is found, the one located first is invoked. Usually users should use only one
+// custom intent, so mixing custom intents in same multiprofile transform is not supported.
+
+typedef cmsPipeline* (* cmsIntentFn)( cmsContext       ContextID,
+                                      cmsUInt32Number  nProfiles,
+                                      cmsUInt32Number  Intents[],
+                                      cmsHPROFILE      hProfiles[],
+                                      cmsBool          BPC[],
+                                      cmsFloat64Number AdaptationStates[],
+                                      cmsUInt32Number  dwFlags);
+
+
+// Each plug-in defines a single intent number.
+typedef struct {
+    cmsPluginBase     base;
+    cmsUInt32Number   Intent;
+    cmsIntentFn       Link;
+    char              Description[256];
+
+} cmsPluginRenderingIntent;
+
+
+// The default ICC intents (perceptual, saturation, rel.col and abs.col)
+CMSAPI cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext       ContextID,
+                                                     cmsUInt32Number  nProfiles,
+                                                     cmsUInt32Number  Intents[],
+                                                     cmsHPROFILE      hProfiles[],
+                                                     cmsBool          BPC[],
+                                                     cmsFloat64Number AdaptationStates[],
+                                                     cmsUInt32Number  dwFlags);
+
+
+//----------------------------------------------------------------------------------------------------------
+
+// Pipelines, Multi Process Elements.
+
+typedef void (* _cmsStageEvalFn)     (const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage* mpe);
+typedef void*(* _cmsStageDupElemFn)  (cmsStage* mpe);
+typedef void (* _cmsStageFreeElemFn) (cmsStage* mpe);
+
+
+// This function allocates a generic MPE
+CMSAPI cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature     Type,
+                                cmsUInt32Number       InputChannels,
+                                cmsUInt32Number       OutputChannels,
+                                _cmsStageEvalFn       EvalPtr,            // Points to fn that evaluates the element (always in floating point)
+                                _cmsStageDupElemFn    DupElemPtr,         // Points to a fn that duplicates the stage
+                                _cmsStageFreeElemFn   FreePtr,            // Points to a fn that sets the element free
+                                void*                 Data);              // A generic pointer to whatever memory needed by the element
+typedef struct {
+      cmsPluginBase     base;
+      cmsTagTypeHandler Handler;
+
+}  cmsPluginMultiProcessElement;
+
+
+// Data kept in "Element" member of cmsStage
+
+// Curves
+typedef struct {
+    cmsUInt32Number nCurves;
+    cmsToneCurve**  TheCurves;
+
+} _cmsStageToneCurvesData;
+
+// Matrix
+typedef struct {
+    cmsFloat64Number*  Double;          // floating point for the matrix
+    cmsFloat64Number*  Offset;          // The offset
+
+} _cmsStageMatrixData;
+
+// CLUT
+typedef struct {
+
+    union {                       // Can have only one of both representations at same time
+        cmsUInt16Number*  T;      // Points to the table 16 bits table
+        cmsFloat32Number* TFloat; // Points to the cmsFloat32Number table
+
+    } Tab;
+
+    cmsInterpParams* Params;
+    cmsUInt32Number  nEntries;
+    cmsBool          HasFloatValues;
+
+} _cmsStageCLutData;
+
+
+//----------------------------------------------------------------------------------------------------------
+// Optimization. Using this plug-in, additional optimization strategies may be implemented.
+// The function should return TRUE if any optimization is done on the LUT, this terminates
+// the optimization  search. Or FALSE if it is unable to optimize and want to give a chance
+// to the rest of optimizers.
+
+typedef void     (* _cmsOPTeval16Fn)(CMSREGISTER const cmsUInt16Number In[],
+                                     CMSREGISTER cmsUInt16Number Out[],
+                                     CMSREGISTER const void* Data);
+
+
+typedef cmsBool  (* _cmsOPToptimizeFn)(cmsPipeline** Lut,
+                                       cmsUInt32Number  Intent,
+                                       cmsUInt32Number* InputFormat,
+                                       cmsUInt32Number* OutputFormat,
+                                       cmsUInt32Number* dwFlags);
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+
+CMSAPI void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                               _cmsOPTeval16Fn Eval16,
+                                               void* PrivateData,
+                                               _cmsFreeUserDataFn FreePrivateDataFn,
+                                               _cmsDupUserDataFn DupPrivateDataFn);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Optimize entry point
+      _cmsOPToptimizeFn  OptimizePtr;
+
+}  cmsPluginOptimization;
+
+//----------------------------------------------------------------------------------------------------------
+// Full xform
+
+typedef struct {
+       cmsUInt32Number BytesPerLineIn;
+       cmsUInt32Number BytesPerLineOut;
+       cmsUInt32Number BytesPerPlaneIn;
+       cmsUInt32Number BytesPerPlaneOut;
+
+} cmsStride;
+
+typedef void     (* _cmsTransformFn)(struct _cmstransform_struct *CMMcargo,   // Legacy function, handles just ONE scanline.
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number Size,
+                                     cmsUInt32Number Stride);                 // Stride in bytes to the next plana in planar formats
+
+
+typedef void     (*_cmsTransform2Fn)(struct _cmstransform_struct *CMMcargo,
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number PixelsPerLine,     
+                                     cmsUInt32Number LineCount,          
+                                     const cmsStride* Stride);  
+
+typedef cmsBool  (* _cmsTransformFactory)(_cmsTransformFn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+typedef cmsBool  (* _cmsTransform2Factory)(_cmsTransform2Fn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+
+// Retrieve user data as specified by the factory
+CMSAPI void   CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn);
+CMSAPI void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo);
+
+
+// Retrieve formatters
+CMSAPI void   CMSEXPORT _cmsGetTransformFormatters16   (struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput);
+CMSAPI void   CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Transform entry point
+      union {
+             _cmsTransformFactory        legacy_xform;
+             _cmsTransform2Factory       xform;
+      } factories;
+
+}  cmsPluginTransform;
+
+//----------------------------------------------------------------------------------------------------------
+// Mutex 
+
+typedef void*    (* _cmsCreateMutexFnPtrType)(cmsContext ContextID);
+typedef void     (* _cmsDestroyMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef cmsBool  (* _cmsLockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef void     (* _cmsUnlockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+
+typedef struct {
+      cmsPluginBase     base;
+
+     _cmsCreateMutexFnPtrType  CreateMutexPtr;
+     _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+     _cmsLockMutexFnPtrType    LockMutexPtr;
+     _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+}  cmsPluginMutex;
+
+CMSAPI void*   CMSEXPORT _cmsCreateMutex(cmsContext ContextID);
+CMSAPI void    CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx);
+CMSAPI cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx);
+CMSAPI void    CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx);
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms_plugin_H
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/install-sh b/third-party/libjxl/libjxl/third_party/lcms/install-sh
new file mode 100755
index 0000000000..0b0fdcbba6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/install-sh
@@ -0,0 +1,501 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2013-12-25.23; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+tab='	'
+nl='
+'
+IFS=" $tab$nl"
+
+# Set DOITPROG to "echo" to test this script.
+
+doit=${DOITPROG-}
+doit_exec=${doit:-exec}
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+is_target_a_directory=possibly
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+     --help     display this help and exit.
+     --version  display version info and exit.
+
+  -c            (ignored)
+  -C            install only if different (preserve the last data modification time)
+  -d            create directories instead of installing files.
+  -g GROUP      $chgrpprog installed files to GROUP.
+  -m MODE       $chmodprog installed files to MODE.
+  -o USER       $chownprog installed files to USER.
+  -s            $stripprog installed files.
+  -t DIRECTORY  install into DIRECTORY.
+  -T            report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+  RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+  case $1 in
+    -c) ;;
+
+    -C) copy_on_change=true;;
+
+    -d) dir_arg=true;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift;;
+
+    --help) echo "$usage"; exit $?;;
+
+    -m) mode=$2
+        case $mode in
+          *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+            echo "$0: invalid mode: $mode" >&2
+            exit 1;;
+        esac
+        shift;;
+
+    -o) chowncmd="$chownprog $2"
+        shift;;
+
+    -s) stripcmd=$stripprog;;
+
+    -t)
+        is_target_a_directory=always
+        dst_arg=$2
+        # Protect names problematic for 'test' and other utilities.
+        case $dst_arg in
+          -* | [=\(\)!]) dst_arg=./$dst_arg;;
+        esac
+        shift;;
+
+    -T) is_target_a_directory=never;;
+
+    --version) echo "$0 $scriptversion"; exit $?;;
+
+    --) shift
+        break;;
+
+    -*) echo "$0: invalid option: $1" >&2
+        exit 1;;
+
+    *)  break;;
+  esac
+  shift
+done
+
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+  if test -n "$dst_arg"; then
+    echo "$0: target directory not allowed when installing a directory." >&2
+    exit 1
+  fi
+fi
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+  # When -d is used, all remaining arguments are directories to create.
+  # When -t is used, the destination is already specified.
+  # Otherwise, the last argument is the destination.  Remove it from $@.
+  for arg
+  do
+    if test -n "$dst_arg"; then
+      # $@ is not empty: it contains at least $arg.
+      set fnord "$@" "$dst_arg"
+      shift # fnord
+    fi
+    shift # arg
+    dst_arg=$arg
+    # Protect names problematic for 'test' and other utilities.
+    case $dst_arg in
+      -* | [=\(\)!]) dst_arg=./$dst_arg;;
+    esac
+  done
+fi
+
+if test $# -eq 0; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call 'install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+if test -z "$dir_arg"; then
+  if test $# -gt 1 || test "$is_target_a_directory" = always; then
+    if test ! -d "$dst_arg"; then
+      echo "$0: $dst_arg: Is not a directory." >&2
+      exit 1
+    fi
+  fi
+fi
+
+if test -z "$dir_arg"; then
+  do_exit='(exit $ret); exit $ret'
+  trap "ret=129; $do_exit" 1
+  trap "ret=130; $do_exit" 2
+  trap "ret=141; $do_exit" 13
+  trap "ret=143; $do_exit" 15
+
+  # Set umask so as not to create temps with too-generous modes.
+  # However, 'strip' requires both read and write access to temps.
+  case $mode in
+    # Optimize common cases.
+    *644) cp_umask=133;;
+    *755) cp_umask=22;;
+
+    *[0-7])
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw='% 200'
+      fi
+      cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+    *)
+      if test -z "$stripcmd"; then
+        u_plus_rw=
+      else
+        u_plus_rw=,u+rw
+      fi
+      cp_umask=$mode$u_plus_rw;;
+  esac
+fi
+
+for src
+do
+  # Protect names problematic for 'test' and other utilities.
+  case $src in
+    -* | [=\(\)!]) src=./$src;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    dstdir=$dst
+    test -d "$dstdir"
+    dstdir_status=$?
+  else
+
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dst_arg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+    dst=$dst_arg
+
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      if test "$is_target_a_directory" = never; then
+        echo "$0: $dst_arg: Is a directory" >&2
+        exit 1
+      fi
+      dstdir=$dst
+      dst=$dstdir/`basename "$src"`
+      dstdir_status=0
+    else
+      dstdir=`dirname "$dst"`
+      test -d "$dstdir"
+      dstdir_status=$?
+    fi
+  fi
+
+  obsolete_mkdir_used=false
+
+  if test $dstdir_status != 0; then
+    case $posix_mkdir in
+      '')
+        # Create intermediate dirs using mode 755 as modified by the umask.
+        # This is like FreeBSD 'install' as of 1997-10-28.
+        umask=`umask`
+        case $stripcmd.$umask in
+          # Optimize common cases.
+          *[2367][2367]) mkdir_umask=$umask;;
+          .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+          *[0-7])
+            mkdir_umask=`expr $umask + 22 \
+              - $umask % 100 % 40 + $umask % 20 \
+              - $umask % 10 % 4 + $umask % 2
+            `;;
+          *) mkdir_umask=$umask,go-w;;
+        esac
+
+        # With -d, create the new directory with the user-specified mode.
+        # Otherwise, rely on $mkdir_umask.
+        if test -n "$dir_arg"; then
+          mkdir_mode=-m$mode
+        else
+          mkdir_mode=
+        fi
+
+        posix_mkdir=false
+        case $umask in
+          *[123567][0-7][0-7])
+            # POSIX mkdir -p sets u+wx bits regardless of umask, which
+            # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+            ;;
+          *)
+            tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+            trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+            if (umask $mkdir_umask &&
+                exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
+            then
+              if test -z "$dir_arg" || {
+                   # Check for POSIX incompatibilities with -m.
+                   # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+                   # other-writable bit of parent directory when it shouldn't.
+                   # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+                   ls_ld_tmpdir=`ls -ld "$tmpdir"`
+                   case $ls_ld_tmpdir in
+                     d????-?r-*) different_mode=700;;
+                     d????-?--*) different_mode=755;;
+                     *) false;;
+                   esac &&
+                   $mkdirprog -m$different_mode -p -- "$tmpdir" && {
+                     ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
+                     test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+                   }
+                 }
+              then posix_mkdir=:
+              fi
+              rmdir "$tmpdir/d" "$tmpdir"
+            else
+              # Remove any dirs left behind by ancient mkdir implementations.
+              rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
+            fi
+            trap '' 0;;
+        esac;;
+    esac
+
+    if
+      $posix_mkdir && (
+        umask $mkdir_umask &&
+        $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+      )
+    then :
+    else
+
+      # The umask is ridiculous, or mkdir does not conform to POSIX,
+      # or it failed possibly due to a race condition.  Create the
+      # directory the slow way, step by step, checking for races as we go.
+
+      case $dstdir in
+        /*) prefix='/';;
+        [-=\(\)!]*) prefix='./';;
+        *)  prefix='';;
+      esac
+
+      oIFS=$IFS
+      IFS=/
+      set -f
+      set fnord $dstdir
+      shift
+      set +f
+      IFS=$oIFS
+
+      prefixes=
+
+      for d
+      do
+        test X"$d" = X && continue
+
+        prefix=$prefix$d
+        if test -d "$prefix"; then
+          prefixes=
+        else
+          if $posix_mkdir; then
+            (umask=$mkdir_umask &&
+             $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+            # Don't fail if two instances are running concurrently.
+            test -d "$prefix" || exit 1
+          else
+            case $prefix in
+              *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+              *) qprefix=$prefix;;
+            esac
+            prefixes="$prefixes '$qprefix'"
+          fi
+        fi
+        prefix=$prefix/
+      done
+
+      if test -n "$prefixes"; then
+        # Don't fail if two instances are running concurrently.
+        (umask $mkdir_umask &&
+         eval "\$doit_exec \$mkdirprog $prefixes") ||
+          test -d "$dstdir" || exit 1
+        obsolete_mkdir_used=true
+      fi
+    fi
+  fi
+
+  if test -n "$dir_arg"; then
+    { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+    { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+      test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+  else
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+    # Copy the file name to the temp name.
+    (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+    { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+    { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+    { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+    # If -C, don't bother to copy if it wouldn't change the file.
+    if $copy_on_change &&
+       old=`LC_ALL=C ls -dlL "$dst"     2>/dev/null` &&
+       new=`LC_ALL=C ls -dlL "$dsttmp"  2>/dev/null` &&
+       set -f &&
+       set X $old && old=:$2:$4:$5:$6 &&
+       set X $new && new=:$2:$4:$5:$6 &&
+       set +f &&
+       test "$old" = "$new" &&
+       $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+    then
+      rm -f "$dsttmp"
+    else
+      # Rename the file to the real destination.
+      $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+      # The rename failed, perhaps because mv can't rename something else
+      # to itself, or perhaps because mv is so ancient that it does not
+      # support -f.
+      {
+        # Now remove or move aside any old file at destination location.
+        # We try this two ways since rm can't unlink itself on some
+        # systems and the destination file might be busy for other
+        # reasons.  In this case, the final cleanup might fail but the new
+        # file should still install successfully.
+        {
+          test ! -f "$dst" ||
+          $doit $rmcmd -f "$dst" 2>/dev/null ||
+          { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+            { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+          } ||
+          { echo "$0: cannot unlink or rename $dst" >&2
+            (exit 1); exit 1
+          }
+        } &&
+
+        # Now rename the file to the real destination.
+        $doit $mvcmd "$dsttmp" "$dst"
+      }
+    fi || exit 1
+
+    trap '' 0
+  fi
+done
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/lcms2.pc.in b/third-party/libjxl/libjxl/third_party/lcms/lcms2.pc.in
new file mode 100644
index 0000000000..5b4213c36a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/lcms2.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: @PACKAGE@
+Description: LCMS Color Management Library
+Version: @VERSION@
+Libs: -L${libdir} -llcms2
+Libs.private: @LIB_MATH@ @LIB_THREAD@
+Cflags: -I${includedir}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/ltmain.sh b/third-party/libjxl/libjxl/third_party/lcms/ltmain.sh
new file mode 100644
index 0000000000..1b425a27f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/ltmain.sh
@@ -0,0 +1,9656 @@
+
+# libtool (GNU libtool) 2.4.2
+# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
+# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+# This is free software; see the source for copying conditions.  There is NO
+# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+# GNU Libtool is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html,
+# or obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# Usage: $progname [OPTION]... [MODE-ARG]...
+#
+# Provide generalized library-building support services.
+#
+#       --config             show all configuration variables
+#       --debug              enable verbose shell tracing
+#   -n, --dry-run            display commands without modifying any files
+#       --features           display basic configuration information and exit
+#       --mode=MODE          use operation mode MODE
+#       --preserve-dup-deps  don't remove duplicate dependency libraries
+#       --quiet, --silent    don't print informational messages
+#       --no-quiet, --no-silent
+#                            print informational messages (default)
+#       --no-warn            don't display warning messages
+#       --tag=TAG            use configuration variables from tag TAG
+#   -v, --verbose            print more informational messages than default
+#       --no-verbose         don't print the extra informational messages
+#       --version            print version information
+#   -h, --help, --help-all   print short, long, or detailed help message
+#
+# MODE must be one of the following:
+#
+#         clean              remove files from the build directory
+#         compile            compile a source file into a libtool object
+#         execute            automatically set library path, then run a program
+#         finish             complete the installation of libtool libraries
+#         install            install libraries or executables
+#         link               create a library or an executable
+#         uninstall          remove libraries from an installed directory
+#
+# MODE-ARGS vary depending on the MODE.  When passed as first option,
+# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that.
+# Try `$progname --help --mode=MODE' for a more detailed description of MODE.
+#
+# When reporting a bug, please describe a test case to reproduce it and
+# include the following information:
+#
+#         host-triplet:	$host
+#         shell:		$SHELL
+#         compiler:		$LTCC
+#         compiler flags:		$LTCFLAGS
+#         linker:		$LD (gnu? $with_gnu_ld)
+#         $progname:	(GNU libtool) 2.4.2
+#         automake:	$automake_version
+#         autoconf:	$autoconf_version
+#
+# Report bugs to <bug-libtool@gnu.org>.
+# GNU libtool home page: <http://www.gnu.org/software/libtool/>.
+# General help using GNU software: <http://www.gnu.org/gethelp/>.
+
+PROGRAM=libtool
+PACKAGE=libtool
+VERSION=2.4.2
+TIMESTAMP=""
+package_revision=1.3337
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+}
+
+# NLS nuisances: We save the old values to restore during execute mode.
+lt_user_locale=
+lt_safe_locale=
+for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
+do
+  eval "if test \"\${$lt_var+set}\" = set; then
+          save_$lt_var=\$$lt_var
+          $lt_var=C
+	  export $lt_var
+	  lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\"
+	  lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\"
+	fi"
+done
+LC_ALL=C
+LANGUAGE=C
+export LANGUAGE LC_ALL
+
+$lt_unset CDPATH
+
+
+# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
+# is ksh but when the shell is invoked as "sh" and the current value of
+# the _XPG environment variable is not equal to 1 (one), the special
+# positional parameter $0, within a function call, is the name of the
+# function.
+progpath="$0"
+
+
+
+: ${CP="cp -f"}
+test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'}
+: ${MAKE="make"}
+: ${MKDIR="mkdir"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+: ${SHELL="${CONFIG_SHELL-/bin/sh}"}
+: ${Xsed="$SED -e 1s/^X//"}
+
+# Global variables:
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+EXIT_MISMATCH=63  # $? = 63 is used to indicate version mismatch to missing.
+EXIT_SKIP=77	  # $? = 77 is used to indicate a skipped test to automake.
+
+exit_status=$EXIT_SUCCESS
+
+# Make sure IFS has a sensible default
+lt_nl='
+'
+IFS=" 	$lt_nl"
+
+dirname="s,/[^/]*$,,"
+basename="s,^.*/,,"
+
+# func_dirname file append nondir_replacement
+# Compute the dirname of FILE.  If nonempty, add APPEND to the result,
+# otherwise set result to NONDIR_REPLACEMENT.
+func_dirname ()
+{
+    func_dirname_result=`$ECHO "${1}" | $SED "$dirname"`
+    if test "X$func_dirname_result" = "X${1}"; then
+      func_dirname_result="${3}"
+    else
+      func_dirname_result="$func_dirname_result${2}"
+    fi
+} # func_dirname may be replaced by extended shell implementation
+
+
+# func_basename file
+func_basename ()
+{
+    func_basename_result=`$ECHO "${1}" | $SED "$basename"`
+} # func_basename may be replaced by extended shell implementation
+
+
+# func_dirname_and_basename file append nondir_replacement
+# perform func_basename and func_dirname in a single function
+# call:
+#   dirname:  Compute the dirname of FILE.  If nonempty,
+#             add APPEND to the result, otherwise set result
+#             to NONDIR_REPLACEMENT.
+#             value returned in "$func_dirname_result"
+#   basename: Compute filename of FILE.
+#             value returned in "$func_basename_result"
+# Implementation must be kept synchronized with func_dirname
+# and func_basename. For efficiency, we do not delegate to
+# those functions but instead duplicate the functionality here.
+func_dirname_and_basename ()
+{
+    # Extract subdirectory from the argument.
+    func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"`
+    if test "X$func_dirname_result" = "X${1}"; then
+      func_dirname_result="${3}"
+    else
+      func_dirname_result="$func_dirname_result${2}"
+    fi
+    func_basename_result=`$ECHO "${1}" | $SED -e "$basename"`
+} # func_dirname_and_basename may be replaced by extended shell implementation
+
+
+# func_stripname prefix suffix name
+# strip PREFIX and SUFFIX off of NAME.
+# PREFIX and SUFFIX must not contain globbing or regex special
+# characters, hashes, percent signs, but SUFFIX may contain a leading
+# dot (in which case that matches only a dot).
+# func_strip_suffix prefix name
+func_stripname ()
+{
+    case ${2} in
+      .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+      *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+    esac
+} # func_stripname may be replaced by extended shell implementation
+
+
+# These SED scripts presuppose an absolute path with a trailing slash.
+pathcar='s,^/\([^/]*\).*$,\1,'
+pathcdr='s,^/[^/]*,,'
+removedotparts=':dotsl
+		s@/\./@/@g
+		t dotsl
+		s,/\.$,/,'
+collapseslashes='s@/\{1,\}@/@g'
+finalslash='s,/*$,/,'
+
+# func_normal_abspath PATH
+# Remove doubled-up and trailing slashes, "." path components,
+# and cancel out any ".." path components in PATH after making
+# it an absolute path.
+#             value returned in "$func_normal_abspath_result"
+func_normal_abspath ()
+{
+  # Start from root dir and reassemble the path.
+  func_normal_abspath_result=
+  func_normal_abspath_tpath=$1
+  func_normal_abspath_altnamespace=
+  case $func_normal_abspath_tpath in
+    "")
+      # Empty path, that just means $cwd.
+      func_stripname '' '/' "`pwd`"
+      func_normal_abspath_result=$func_stripname_result
+      return
+    ;;
+    # The next three entries are used to spot a run of precisely
+    # two leading slashes without using negated character classes;
+    # we take advantage of case's first-match behaviour.
+    ///*)
+      # Unusual form of absolute path, do nothing.
+    ;;
+    //*)
+      # Not necessarily an ordinary path; POSIX reserves leading '//'
+      # and for example Cygwin uses it to access remote file shares
+      # over CIFS/SMB, so we conserve a leading double slash if found.
+      func_normal_abspath_altnamespace=/
+    ;;
+    /*)
+      # Absolute path, do nothing.
+    ;;
+    *)
+      # Relative path, prepend $cwd.
+      func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath
+    ;;
+  esac
+  # Cancel out all the simple stuff to save iterations.  We also want
+  # the path to end with a slash for ease of parsing, so make sure
+  # there is one (and only one) here.
+  func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"`
+  while :; do
+    # Processed it all yet?
+    if test "$func_normal_abspath_tpath" = / ; then
+      # If we ascended to the root using ".." the result may be empty now.
+      if test -z "$func_normal_abspath_result" ; then
+        func_normal_abspath_result=/
+      fi
+      break
+    fi
+    func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$pathcar"`
+    func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$pathcdr"`
+    # Figure out what to do with it
+    case $func_normal_abspath_tcomponent in
+      "")
+        # Trailing empty path component, ignore it.
+      ;;
+      ..)
+        # Parent dir; strip last assembled component from result.
+        func_dirname "$func_normal_abspath_result"
+        func_normal_abspath_result=$func_dirname_result
+      ;;
+      *)
+        # Actual path component, append it.
+        func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent
+      ;;
+    esac
+  done
+  # Restore leading double-slash if one was found on entry.
+  func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result
+}
+
+# func_relative_path SRCDIR DSTDIR
+# generates a relative path from SRCDIR to DSTDIR, with a trailing
+# slash if non-empty, suitable for immediately appending a filename
+# without needing to append a separator.
+#             value returned in "$func_relative_path_result"
+func_relative_path ()
+{
+  func_relative_path_result=
+  func_normal_abspath "$1"
+  func_relative_path_tlibdir=$func_normal_abspath_result
+  func_normal_abspath "$2"
+  func_relative_path_tbindir=$func_normal_abspath_result
+
+  # Ascend the tree starting from libdir
+  while :; do
+    # check if we have found a prefix of bindir
+    case $func_relative_path_tbindir in
+      $func_relative_path_tlibdir)
+        # found an exact match
+        func_relative_path_tcancelled=
+        break
+        ;;
+      $func_relative_path_tlibdir*)
+        # found a matching prefix
+        func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir"
+        func_relative_path_tcancelled=$func_stripname_result
+        if test -z "$func_relative_path_result"; then
+          func_relative_path_result=.
+        fi
+        break
+        ;;
+      *)
+        func_dirname $func_relative_path_tlibdir
+        func_relative_path_tlibdir=${func_dirname_result}
+        if test "x$func_relative_path_tlibdir" = x ; then
+          # Have to descend all the way to the root!
+          func_relative_path_result=../$func_relative_path_result
+          func_relative_path_tcancelled=$func_relative_path_tbindir
+          break
+        fi
+        func_relative_path_result=../$func_relative_path_result
+        ;;
+    esac
+  done
+
+  # Now calculate path; take care to avoid doubling-up slashes.
+  func_stripname '' '/' "$func_relative_path_result"
+  func_relative_path_result=$func_stripname_result
+  func_stripname '/' '/' "$func_relative_path_tcancelled"
+  if test "x$func_stripname_result" != x ; then
+    func_relative_path_result=${func_relative_path_result}/${func_stripname_result}
+  fi
+
+  # Normalisation. If bindir is libdir, return empty string,
+  # else relative path ending with a slash; either way, target
+  # file name can be directly appended.
+  if test ! -z "$func_relative_path_result"; then
+    func_stripname './' '' "$func_relative_path_result/"
+    func_relative_path_result=$func_stripname_result
+  fi
+}
+
+# The name of this program:
+func_dirname_and_basename "$progpath"
+progname=$func_basename_result
+
+# Make sure we have an absolute path for reexecution:
+case $progpath in
+  [\\/]*|[A-Za-z]:\\*) ;;
+  *[\\/]*)
+     progdir=$func_dirname_result
+     progdir=`cd "$progdir" && pwd`
+     progpath="$progdir/$progname"
+     ;;
+  *)
+     save_IFS="$IFS"
+     IFS=${PATH_SEPARATOR-:}
+     for progdir in $PATH; do
+       IFS="$save_IFS"
+       test -x "$progdir/$progname" && break
+     done
+     IFS="$save_IFS"
+     test -n "$progdir" || progdir=`pwd`
+     progpath="$progdir/$progname"
+     ;;
+esac
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed="${SED}"' -e 1s/^X//'
+sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\(["`\\]\)/\\\1/g'
+
+# Sed substitution that turns a string into a regex matching for the
+# string literally.
+sed_make_literal_regex='s,[].[^$\\*\/],\\&,g'
+
+# Sed substitution that converts a w32 file name or path
+# which contains forward slashes, into one that contains
+# (escaped) backslashes.  A very naive implementation.
+lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
+
+# Re-`\' parameter expansions in output of double_quote_subst that were
+# `\'-ed in input to the same.  If an odd number of `\' preceded a '$'
+# in input to double_quote_subst, that '$' was protected from expansion.
+# Since each input `\' is now two `\'s, look for any number of runs of
+# four `\'s followed by two `\'s and then a '$'.  `\' that '$'.
+bs='\\'
+bs2='\\\\'
+bs4='\\\\\\\\'
+dollar='\$'
+sed_double_backslash="\
+  s/$bs4/&\\
+/g
+  s/^$bs2$dollar/$bs&/
+  s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g
+  s/\n//g"
+
+# Standard options:
+opt_dry_run=false
+opt_help=false
+opt_quiet=false
+opt_verbose=false
+opt_warning=:
+
+# func_echo arg...
+# Echo program name prefixed message, along with the current mode
+# name if it has been set yet.
+func_echo ()
+{
+    $ECHO "$progname: ${opt_mode+$opt_mode: }$*"
+}
+
+# func_verbose arg...
+# Echo program name prefixed message in verbose mode only.
+func_verbose ()
+{
+    $opt_verbose && func_echo ${1+"$@"}
+
+    # A bug in bash halts the script if the last line of a function
+    # fails when set -e is in force, so we need another command to
+    # work around that:
+    :
+}
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO "$*"
+}
+
+# func_error arg...
+# Echo program name prefixed message to standard error.
+func_error ()
+{
+    $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2
+}
+
+# func_warning arg...
+# Echo program name prefixed warning message to standard error.
+func_warning ()
+{
+    $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2
+
+    # bash bug again:
+    :
+}
+
+# func_fatal_error arg...
+# Echo program name prefixed message to standard error, and exit.
+func_fatal_error ()
+{
+    func_error ${1+"$@"}
+    exit $EXIT_FAILURE
+}
+
+# func_fatal_help arg...
+# Echo program name prefixed message to standard error, followed by
+# a help hint, and exit.
+func_fatal_help ()
+{
+    func_error ${1+"$@"}
+    func_fatal_error "$help"
+}
+help="Try \`$progname --help' for more information."  ## default
+
+
+# func_grep expression filename
+# Check whether EXPRESSION matches any line of FILENAME, without output.
+func_grep ()
+{
+    $GREP "$1" "$2" >/dev/null 2>&1
+}
+
+
+# func_mkdir_p directory-path
+# Make sure the entire path to DIRECTORY-PATH is available.
+func_mkdir_p ()
+{
+    my_directory_path="$1"
+    my_dir_list=
+
+    if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then
+
+      # Protect directory names starting with `-'
+      case $my_directory_path in
+        -*) my_directory_path="./$my_directory_path" ;;
+      esac
+
+      # While some portion of DIR does not yet exist...
+      while test ! -d "$my_directory_path"; do
+        # ...make a list in topmost first order.  Use a colon delimited
+	# list incase some portion of path contains whitespace.
+        my_dir_list="$my_directory_path:$my_dir_list"
+
+        # If the last portion added has no slash in it, the list is done
+        case $my_directory_path in */*) ;; *) break ;; esac
+
+        # ...otherwise throw away the child directory and loop
+        my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"`
+      done
+      my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'`
+
+      save_mkdir_p_IFS="$IFS"; IFS=':'
+      for my_dir in $my_dir_list; do
+	IFS="$save_mkdir_p_IFS"
+        # mkdir can fail with a `File exist' error if two processes
+        # try to create one of the directories concurrently.  Don't
+        # stop in that case!
+        $MKDIR "$my_dir" 2>/dev/null || :
+      done
+      IFS="$save_mkdir_p_IFS"
+
+      # Bail out if we (or some other process) failed to create a directory.
+      test -d "$my_directory_path" || \
+        func_fatal_error "Failed to create \`$1'"
+    fi
+}
+
+
+# func_mktempdir [string]
+# Make a temporary directory that won't clash with other running
+# libtool processes, and avoids race conditions if possible.  If
+# given, STRING is the basename for that directory.
+func_mktempdir ()
+{
+    my_template="${TMPDIR-/tmp}/${1-$progname}"
+
+    if test "$opt_dry_run" = ":"; then
+      # Return a directory name, but don't create it in dry-run mode
+      my_tmpdir="${my_template}-$$"
+    else
+
+      # If mktemp works, use that first and foremost
+      my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
+
+      if test ! -d "$my_tmpdir"; then
+        # Failing that, at least try and use $RANDOM to avoid a race
+        my_tmpdir="${my_template}-${RANDOM-0}$$"
+
+        save_mktempdir_umask=`umask`
+        umask 0077
+        $MKDIR "$my_tmpdir"
+        umask $save_mktempdir_umask
+      fi
+
+      # If we're not in dry-run mode, bomb out on failure
+      test -d "$my_tmpdir" || \
+        func_fatal_error "cannot create temporary directory \`$my_tmpdir'"
+    fi
+
+    $ECHO "$my_tmpdir"
+}
+
+
+# func_quote_for_eval arg
+# Aesthetically quote ARG to be evaled later.
+# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT
+# is double-quoted, suitable for a subsequent eval, whereas
+# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters
+# which are still active within double quotes backslashified.
+func_quote_for_eval ()
+{
+    case $1 in
+      *[\\\`\"\$]*)
+	func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;;
+      *)
+        func_quote_for_eval_unquoted_result="$1" ;;
+    esac
+
+    case $func_quote_for_eval_unquoted_result in
+      # Double-quote args containing shell metacharacters to delay
+      # word splitting, command substitution and and variable
+      # expansion for a subsequent eval.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+        func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\""
+        ;;
+      *)
+        func_quote_for_eval_result="$func_quote_for_eval_unquoted_result"
+    esac
+}
+
+
+# func_quote_for_expand arg
+# Aesthetically quote ARG to be evaled later; same as above,
+# but do not quote variable references.
+func_quote_for_expand ()
+{
+    case $1 in
+      *[\\\`\"]*)
+	my_arg=`$ECHO "$1" | $SED \
+	    -e "$double_quote_subst" -e "$sed_double_backslash"` ;;
+      *)
+        my_arg="$1" ;;
+    esac
+
+    case $my_arg in
+      # Double-quote args containing shell metacharacters to delay
+      # word splitting and command substitution for a subsequent eval.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+        my_arg="\"$my_arg\""
+        ;;
+    esac
+
+    func_quote_for_expand_result="$my_arg"
+}
+
+
+# func_show_eval cmd [fail_exp]
+# Unless opt_silent is true, then output CMD.  Then, if opt_dryrun is
+# not true, evaluate CMD.  If the evaluation of CMD fails, and FAIL_EXP
+# is given, then evaluate it.
+func_show_eval ()
+{
+    my_cmd="$1"
+    my_fail_exp="${2-:}"
+
+    ${opt_silent-false} || {
+      func_quote_for_expand "$my_cmd"
+      eval "func_echo $func_quote_for_expand_result"
+    }
+
+    if ${opt_dry_run-false}; then :; else
+      eval "$my_cmd"
+      my_status=$?
+      if test "$my_status" -eq 0; then :; else
+	eval "(exit $my_status); $my_fail_exp"
+      fi
+    fi
+}
+
+
+# func_show_eval_locale cmd [fail_exp]
+# Unless opt_silent is true, then output CMD.  Then, if opt_dryrun is
+# not true, evaluate CMD.  If the evaluation of CMD fails, and FAIL_EXP
+# is given, then evaluate it.  Use the saved locale for evaluation.
+func_show_eval_locale ()
+{
+    my_cmd="$1"
+    my_fail_exp="${2-:}"
+
+    ${opt_silent-false} || {
+      func_quote_for_expand "$my_cmd"
+      eval "func_echo $func_quote_for_expand_result"
+    }
+
+    if ${opt_dry_run-false}; then :; else
+      eval "$lt_user_locale
+	    $my_cmd"
+      my_status=$?
+      eval "$lt_safe_locale"
+      if test "$my_status" -eq 0; then :; else
+	eval "(exit $my_status); $my_fail_exp"
+      fi
+    fi
+}
+
+# func_tr_sh
+# Turn $1 into a string suitable for a shell variable name.
+# Result is stored in $func_tr_sh_result.  All characters
+# not in the set a-zA-Z0-9_ are replaced with '_'. Further,
+# if $1 begins with a digit, a '_' is prepended as well.
+func_tr_sh ()
+{
+  case $1 in
+  [0-9]* | *[!a-zA-Z0-9_]*)
+    func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'`
+    ;;
+  * )
+    func_tr_sh_result=$1
+    ;;
+  esac
+}
+
+
+# func_version
+# Echo version message to standard output and exit.
+func_version ()
+{
+    $opt_debug
+
+    $SED -n '/(C)/!b go
+	:more
+	/\./!{
+	  N
+	  s/\n# / /
+	  b more
+	}
+	:go
+	/^# '$PROGRAM' (GNU /,/# warranty; / {
+        s/^# //
+	s/^# *$//
+        s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/
+        p
+     }' < "$progpath"
+     exit $?
+}
+
+# func_usage
+# Echo short help message to standard output and exit.
+func_usage ()
+{
+    $opt_debug
+
+    $SED -n '/^# Usage:/,/^#  *.*--help/ {
+        s/^# //
+	s/^# *$//
+	s/\$progname/'$progname'/
+	p
+    }' < "$progpath"
+    echo
+    $ECHO "run \`$progname --help | more' for full usage"
+    exit $?
+}
+
+# func_help [NOEXIT]
+# Echo long help message to standard output and exit,
+# unless 'noexit' is passed as argument.
+func_help ()
+{
+    $opt_debug
+
+    $SED -n '/^# Usage:/,/# Report bugs to/ {
+	:print
+        s/^# //
+	s/^# *$//
+	s*\$progname*'$progname'*
+	s*\$host*'"$host"'*
+	s*\$SHELL*'"$SHELL"'*
+	s*\$LTCC*'"$LTCC"'*
+	s*\$LTCFLAGS*'"$LTCFLAGS"'*
+	s*\$LD*'"$LD"'*
+	s/\$with_gnu_ld/'"$with_gnu_ld"'/
+	s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/
+	s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/
+	p
+	d
+     }
+     /^# .* home page:/b print
+     /^# General help using/b print
+     ' < "$progpath"
+    ret=$?
+    if test -z "$1"; then
+      exit $ret
+    fi
+}
+
+# func_missing_arg argname
+# Echo program name prefixed message to standard error and set global
+# exit_cmd.
+func_missing_arg ()
+{
+    $opt_debug
+
+    func_error "missing argument for $1."
+    exit_cmd=exit
+}
+
+
+# func_split_short_opt shortopt
+# Set func_split_short_opt_name and func_split_short_opt_arg shell
+# variables after splitting SHORTOPT after the 2nd character.
+func_split_short_opt ()
+{
+    my_sed_short_opt='1s/^\(..\).*$/\1/;q'
+    my_sed_short_rest='1s/^..\(.*\)$/\1/;q'
+
+    func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"`
+    func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"`
+} # func_split_short_opt may be replaced by extended shell implementation
+
+
+# func_split_long_opt longopt
+# Set func_split_long_opt_name and func_split_long_opt_arg shell
+# variables after splitting LONGOPT at the `=' sign.
+func_split_long_opt ()
+{
+    my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q'
+    my_sed_long_arg='1s/^--[^=]*=//'
+
+    func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"`
+    func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"`
+} # func_split_long_opt may be replaced by extended shell implementation
+
+exit_cmd=:
+
+
+
+
+
+magic="%%%MAGIC variable%%%"
+magic_exe="%%%MAGIC EXE variable%%%"
+
+# Global variables.
+nonopt=
+preserve_args=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+extracted_archives=
+extracted_serial=0
+
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
+# func_append var value
+# Append VALUE to the end of shell variable VAR.
+func_append ()
+{
+    eval "${1}=\$${1}\${2}"
+} # func_append may be replaced by extended shell implementation
+
+# func_append_quoted var value
+# Quote VALUE and append to the end of shell variable VAR, separated
+# by a space.
+func_append_quoted ()
+{
+    func_quote_for_eval "${2}"
+    eval "${1}=\$${1}\\ \$func_quote_for_eval_result"
+} # func_append_quoted may be replaced by extended shell implementation
+
+
+# func_arith arithmetic-term...
+func_arith ()
+{
+    func_arith_result=`expr "${@}"`
+} # func_arith may be replaced by extended shell implementation
+
+
+# func_len string
+# STRING may not start with a hyphen.
+func_len ()
+{
+    func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len`
+} # func_len may be replaced by extended shell implementation
+
+
+# func_lo2o object
+func_lo2o ()
+{
+    func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"`
+} # func_lo2o may be replaced by extended shell implementation
+
+
+# func_xform libobj-or-source
+func_xform ()
+{
+    func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'`
+} # func_xform may be replaced by extended shell implementation
+
+
+# func_fatal_configuration arg...
+# Echo program name prefixed message to standard error, followed by
+# a configuration failure hint, and exit.
+func_fatal_configuration ()
+{
+    func_error ${1+"$@"}
+    func_error "See the $PACKAGE documentation for more information."
+    func_fatal_error "Fatal configuration error."
+}
+
+
+# func_config
+# Display the configuration for all the tags in this script.
+func_config ()
+{
+    re_begincf='^# ### BEGIN LIBTOOL'
+    re_endcf='^# ### END LIBTOOL'
+
+    # Default configuration.
+    $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath"
+
+    # Now print the configurations for the tags.
+    for tagname in $taglist; do
+      $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath"
+    done
+
+    exit $?
+}
+
+# func_features
+# Display the features supported by this script.
+func_features ()
+{
+    echo "host: $host"
+    if test "$build_libtool_libs" = yes; then
+      echo "enable shared libraries"
+    else
+      echo "disable shared libraries"
+    fi
+    if test "$build_old_libs" = yes; then
+      echo "enable static libraries"
+    else
+      echo "disable static libraries"
+    fi
+
+    exit $?
+}
+
+# func_enable_tag tagname
+# Verify that TAGNAME is valid, and either flag an error and exit, or
+# enable the TAGNAME tag.  We also add TAGNAME to the global $taglist
+# variable here.
+func_enable_tag ()
+{
+  # Global variable:
+  tagname="$1"
+
+  re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$"
+  re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$"
+  sed_extractcf="/$re_begincf/,/$re_endcf/p"
+
+  # Validate tagname.
+  case $tagname in
+    *[!-_A-Za-z0-9,/]*)
+      func_fatal_error "invalid tag name: $tagname"
+      ;;
+  esac
+
+  # Don't test for the "default" C tag, as we know it's
+  # there but not specially marked.
+  case $tagname in
+    CC) ;;
+    *)
+      if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then
+	taglist="$taglist $tagname"
+
+	# Evaluate the configuration.  Be careful to quote the path
+	# and the sed script, to avoid splitting on whitespace, but
+	# also don't use non-portable quotes within backquotes within
+	# quotes we have to do it in 2 steps:
+	extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"`
+	eval "$extractedcf"
+      else
+	func_error "ignoring unknown tag $tagname"
+      fi
+      ;;
+  esac
+}
+
+# func_check_version_match
+# Ensure that we are using m4 macros, and libtool script from the same
+# release of libtool.
+func_check_version_match ()
+{
+  if test "$package_revision" != "$macro_revision"; then
+    if test "$VERSION" != "$macro_version"; then
+      if test -z "$macro_version"; then
+        cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
+$progname: definition of this LT_INIT comes from an older release.
+$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
+$progname: and run autoconf again.
+_LT_EOF
+      else
+        cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
+$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
+$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
+$progname: and run autoconf again.
+_LT_EOF
+      fi
+    else
+      cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, revision $package_revision,
+$progname: but the definition of this LT_INIT comes from revision $macro_revision.
+$progname: You should recreate aclocal.m4 with macros from revision $package_revision
+$progname: of $PACKAGE $VERSION and run autoconf again.
+_LT_EOF
+    fi
+
+    exit $EXIT_MISMATCH
+  fi
+}
+
+
+# Shorthand for --mode=foo, only valid as the first argument
+case $1 in
+clean|clea|cle|cl)
+  shift; set dummy --mode clean ${1+"$@"}; shift
+  ;;
+compile|compil|compi|comp|com|co|c)
+  shift; set dummy --mode compile ${1+"$@"}; shift
+  ;;
+execute|execut|execu|exec|exe|ex|e)
+  shift; set dummy --mode execute ${1+"$@"}; shift
+  ;;
+finish|finis|fini|fin|fi|f)
+  shift; set dummy --mode finish ${1+"$@"}; shift
+  ;;
+install|instal|insta|inst|ins|in|i)
+  shift; set dummy --mode install ${1+"$@"}; shift
+  ;;
+link|lin|li|l)
+  shift; set dummy --mode link ${1+"$@"}; shift
+  ;;
+uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
+  shift; set dummy --mode uninstall ${1+"$@"}; shift
+  ;;
+esac
+
+
+
+# Option defaults:
+opt_debug=:
+opt_dry_run=false
+opt_config=false
+opt_preserve_dup_deps=false
+opt_features=false
+opt_finish=false
+opt_help=false
+opt_help_all=false
+opt_silent=:
+opt_warning=:
+opt_verbose=:
+opt_silent=false
+opt_verbose=false
+
+
+# Parse options once, thoroughly.  This comes as soon as possible in the
+# script to make things like `--version' happen as quickly as we can.
+{
+  # this just eases exit handling
+  while test $# -gt 0; do
+    opt="$1"
+    shift
+    case $opt in
+      --debug|-x)	opt_debug='set -x'
+			func_echo "enabling shell trace mode"
+			$opt_debug
+			;;
+      --dry-run|--dryrun|-n)
+			opt_dry_run=:
+			;;
+      --config)
+			opt_config=:
+func_config
+			;;
+      --dlopen|-dlopen)
+			optarg="$1"
+			opt_dlopen="${opt_dlopen+$opt_dlopen
+}$optarg"
+			shift
+			;;
+      --preserve-dup-deps)
+			opt_preserve_dup_deps=:
+			;;
+      --features)
+			opt_features=:
+func_features
+			;;
+      --finish)
+			opt_finish=:
+set dummy --mode finish ${1+"$@"}; shift
+			;;
+      --help)
+			opt_help=:
+			;;
+      --help-all)
+			opt_help_all=:
+opt_help=': help-all'
+			;;
+      --mode)
+			test $# = 0 && func_missing_arg $opt && break
+			optarg="$1"
+			opt_mode="$optarg"
+case $optarg in
+  # Valid mode arguments:
+  clean|compile|execute|finish|install|link|relink|uninstall) ;;
+
+  # Catch anything else as an error
+  *) func_error "invalid argument for $opt"
+     exit_cmd=exit
+     break
+     ;;
+esac
+			shift
+			;;
+      --no-silent|--no-quiet)
+			opt_silent=false
+func_append preserve_args " $opt"
+			;;
+      --no-warning|--no-warn)
+			opt_warning=false
+func_append preserve_args " $opt"
+			;;
+      --no-verbose)
+			opt_verbose=false
+func_append preserve_args " $opt"
+			;;
+      --silent|--quiet)
+			opt_silent=:
+func_append preserve_args " $opt"
+        opt_verbose=false
+			;;
+      --verbose|-v)
+			opt_verbose=:
+func_append preserve_args " $opt"
+opt_silent=false
+			;;
+      --tag)
+			test $# = 0 && func_missing_arg $opt && break
+			optarg="$1"
+			opt_tag="$optarg"
+func_append preserve_args " $opt $optarg"
+func_enable_tag "$optarg"
+			shift
+			;;
+
+      -\?|-h)		func_usage				;;
+      --help)		func_help				;;
+      --version)	func_version				;;
+
+      # Separate optargs to long options:
+      --*=*)
+			func_split_long_opt "$opt"
+			set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"}
+			shift
+			;;
+
+      # Separate non-argument short options:
+      -\?*|-h*|-n*|-v*)
+			func_split_short_opt "$opt"
+			set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"}
+			shift
+			;;
+
+      --)		break					;;
+      -*)		func_fatal_help "unrecognized option \`$opt'" ;;
+      *)		set dummy "$opt" ${1+"$@"};	shift; break  ;;
+    esac
+  done
+
+  # Validate options:
+
+  # save first non-option argument
+  if test "$#" -gt 0; then
+    nonopt="$opt"
+    shift
+  fi
+
+  # preserve --debug
+  test "$opt_debug" = : || func_append preserve_args " --debug"
+
+  case $host in
+    *cygwin* | *mingw* | *pw32* | *cegcc*)
+      # don't eliminate duplications in $postdeps and $predeps
+      opt_duplicate_compiler_generated_deps=:
+      ;;
+    *)
+      opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps
+      ;;
+  esac
+
+  $opt_help || {
+    # Sanity checks first:
+    func_check_version_match
+
+    if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+      func_fatal_configuration "not configured to build any kind of library"
+    fi
+
+    # Darwin sucks
+    eval std_shrext=\"$shrext_cmds\"
+
+    # Only execute mode is allowed to have -dlopen flags.
+    if test -n "$opt_dlopen" && test "$opt_mode" != execute; then
+      func_error "unrecognized option \`-dlopen'"
+      $ECHO "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Change the help message to a mode-specific one.
+    generic_help="$help"
+    help="Try \`$progname --help --mode=$opt_mode' for more information."
+  }
+
+
+  # Bail if the options were screwed
+  $exit_cmd $EXIT_FAILURE
+}
+
+
+
+
+## ----------- ##
+##    Main.    ##
+## ----------- ##
+
+# func_lalib_p file
+# True iff FILE is a libtool `.la' library or `.lo' object file.
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_lalib_p ()
+{
+    test -f "$1" &&
+      $SED -e 4q "$1" 2>/dev/null \
+        | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1
+}
+
+# func_lalib_unsafe_p file
+# True iff FILE is a libtool `.la' library or `.lo' object file.
+# This function implements the same check as func_lalib_p without
+# resorting to external programs.  To this end, it redirects stdin and
+# closes it afterwards, without saving the original file descriptor.
+# As a safety measure, use it only where a negative result would be
+# fatal anyway.  Works if `file' does not exist.
+func_lalib_unsafe_p ()
+{
+    lalib_p=no
+    if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then
+	for lalib_p_l in 1 2 3 4
+	do
+	    read lalib_p_line
+	    case "$lalib_p_line" in
+		\#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;;
+	    esac
+	done
+	exec 0<&5 5<&-
+    fi
+    test "$lalib_p" = yes
+}
+
+# func_ltwrapper_script_p file
+# True iff FILE is a libtool wrapper script
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_script_p ()
+{
+    func_lalib_p "$1"
+}
+
+# func_ltwrapper_executable_p file
+# True iff FILE is a libtool wrapper executable
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_executable_p ()
+{
+    func_ltwrapper_exec_suffix=
+    case $1 in
+    *.exe) ;;
+    *) func_ltwrapper_exec_suffix=.exe ;;
+    esac
+    $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1
+}
+
+# func_ltwrapper_scriptname file
+# Assumes file is an ltwrapper_executable
+# uses $file to determine the appropriate filename for a
+# temporary ltwrapper_script.
+func_ltwrapper_scriptname ()
+{
+    func_dirname_and_basename "$1" "" "."
+    func_stripname '' '.exe' "$func_basename_result"
+    func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
+}
+
+# func_ltwrapper_p file
+# True iff FILE is a libtool wrapper script or wrapper executable
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_p ()
+{
+    func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1"
+}
+
+
+# func_execute_cmds commands fail_cmd
+# Execute tilde-delimited COMMANDS.
+# If FAIL_CMD is given, eval that upon failure.
+# FAIL_CMD may read-access the current command in variable CMD!
+func_execute_cmds ()
+{
+    $opt_debug
+    save_ifs=$IFS; IFS='~'
+    for cmd in $1; do
+      IFS=$save_ifs
+      eval cmd=\"$cmd\"
+      func_show_eval "$cmd" "${2-:}"
+    done
+    IFS=$save_ifs
+}
+
+
+# func_source file
+# Source FILE, adding directory component if necessary.
+# Note that it is not necessary on cygwin/mingw to append a dot to
+# FILE even if both FILE and FILE.exe exist: automatic-append-.exe
+# behavior happens only for exec(3), not for open(2)!  Also, sourcing
+# `FILE.' does not work on cygwin managed mounts.
+func_source ()
+{
+    $opt_debug
+    case $1 in
+    */* | *\\*)	. "$1" ;;
+    *)		. "./$1" ;;
+    esac
+}
+
+
+# func_resolve_sysroot PATH
+# Replace a leading = in PATH with a sysroot.  Store the result into
+# func_resolve_sysroot_result
+func_resolve_sysroot ()
+{
+  func_resolve_sysroot_result=$1
+  case $func_resolve_sysroot_result in
+  =*)
+    func_stripname '=' '' "$func_resolve_sysroot_result"
+    func_resolve_sysroot_result=$lt_sysroot$func_stripname_result
+    ;;
+  esac
+}
+
+# func_replace_sysroot PATH
+# If PATH begins with the sysroot, replace it with = and
+# store the result into func_replace_sysroot_result.
+func_replace_sysroot ()
+{
+  case "$lt_sysroot:$1" in
+  ?*:"$lt_sysroot"*)
+    func_stripname "$lt_sysroot" '' "$1"
+    func_replace_sysroot_result="=$func_stripname_result"
+    ;;
+  *)
+    # Including no sysroot.
+    func_replace_sysroot_result=$1
+    ;;
+  esac
+}
+
+# func_infer_tag arg
+# Infer tagged configuration to use if any are available and
+# if one wasn't chosen via the "--tag" command line option.
+# Only attempt this if the compiler in the base compile
+# command doesn't match the default compiler.
+# arg is usually of the form 'gcc ...'
+func_infer_tag ()
+{
+    $opt_debug
+    if test -n "$available_tags" && test -z "$tagname"; then
+      CC_quoted=
+      for arg in $CC; do
+	func_append_quoted CC_quoted "$arg"
+      done
+      CC_expanded=`func_echo_all $CC`
+      CC_quoted_expanded=`func_echo_all $CC_quoted`
+      case $@ in
+      # Blanks in the command may have been stripped by the calling shell,
+      # but not from the CC environment variable when configure was run.
+      " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
+      " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;;
+      # Blanks at the start of $base_compile will cause this to fail
+      # if we don't check for them as well.
+      *)
+	for z in $available_tags; do
+	  if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
+	    # Evaluate the configuration.
+	    eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
+	    CC_quoted=
+	    for arg in $CC; do
+	      # Double-quote args containing other shell metacharacters.
+	      func_append_quoted CC_quoted "$arg"
+	    done
+	    CC_expanded=`func_echo_all $CC`
+	    CC_quoted_expanded=`func_echo_all $CC_quoted`
+	    case "$@ " in
+	    " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
+	    " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*)
+	      # The compiler in the base compile command matches
+	      # the one in the tagged configuration.
+	      # Assume this is the tagged configuration we want.
+	      tagname=$z
+	      break
+	      ;;
+	    esac
+	  fi
+	done
+	# If $tagname still isn't set, then no tagged configuration
+	# was found and let the user know that the "--tag" command
+	# line option must be used.
+	if test -z "$tagname"; then
+	  func_echo "unable to infer tagged configuration"
+	  func_fatal_error "specify a tag with \`--tag'"
+#	else
+#	  func_verbose "using $tagname tagged configuration"
+	fi
+	;;
+      esac
+    fi
+}
+
+
+
+# func_write_libtool_object output_name pic_name nonpic_name
+# Create a libtool object file (analogous to a ".la" file),
+# but don't create it if we're doing a dry run.
+func_write_libtool_object ()
+{
+    write_libobj=${1}
+    if test "$build_libtool_libs" = yes; then
+      write_lobj=\'${2}\'
+    else
+      write_lobj=none
+    fi
+
+    if test "$build_old_libs" = yes; then
+      write_oldobj=\'${3}\'
+    else
+      write_oldobj=none
+    fi
+
+    $opt_dry_run || {
+      cat >${write_libobj}T <<EOF
+# $write_libobj - a libtool object file
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# Name of the PIC object.
+pic_object=$write_lobj
+
+# Name of the non-PIC object
+non_pic_object=$write_oldobj
+
+EOF
+      $MV "${write_libobj}T" "${write_libobj}"
+    }
+}
+
+
+##################################################
+# FILE NAME AND PATH CONVERSION HELPER FUNCTIONS #
+##################################################
+
+# func_convert_core_file_wine_to_w32 ARG
+# Helper function used by file name conversion functions when $build is *nix,
+# and $host is mingw, cygwin, or some other w32 environment. Relies on a
+# correctly configured wine environment available, with the winepath program
+# in $build's $PATH.
+#
+# ARG is the $build file name to be converted to w32 format.
+# Result is available in $func_convert_core_file_wine_to_w32_result, and will
+# be empty on error (or when ARG is empty)
+func_convert_core_file_wine_to_w32 ()
+{
+  $opt_debug
+  func_convert_core_file_wine_to_w32_result="$1"
+  if test -n "$1"; then
+    # Unfortunately, winepath does not exit with a non-zero error code, so we
+    # are forced to check the contents of stdout. On the other hand, if the
+    # command is not found, the shell will set an exit code of 127 and print
+    # *an error message* to stdout. So we must check for both error code of
+    # zero AND non-empty stdout, which explains the odd construction:
+    func_convert_core_file_wine_to_w32_tmp=`winepath -w "$1" 2>/dev/null`
+    if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then
+      func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" |
+        $SED -e "$lt_sed_naive_backslashify"`
+    else
+      func_convert_core_file_wine_to_w32_result=
+    fi
+  fi
+}
+# end: func_convert_core_file_wine_to_w32
+
+
+# func_convert_core_path_wine_to_w32 ARG
+# Helper function used by path conversion functions when $build is *nix, and
+# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly
+# configured wine environment available, with the winepath program in $build's
+# $PATH. Assumes ARG has no leading or trailing path separator characters.
+#
+# ARG is path to be converted from $build format to win32.
+# Result is available in $func_convert_core_path_wine_to_w32_result.
+# Unconvertible file (directory) names in ARG are skipped; if no directory names
+# are convertible, then the result may be empty.
+func_convert_core_path_wine_to_w32 ()
+{
+  $opt_debug
+  # unfortunately, winepath doesn't convert paths, only file names
+  func_convert_core_path_wine_to_w32_result=""
+  if test -n "$1"; then
+    oldIFS=$IFS
+    IFS=:
+    for func_convert_core_path_wine_to_w32_f in $1; do
+      IFS=$oldIFS
+      func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f"
+      if test -n "$func_convert_core_file_wine_to_w32_result" ; then
+        if test -z "$func_convert_core_path_wine_to_w32_result"; then
+          func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result"
+        else
+          func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result"
+        fi
+      fi
+    done
+    IFS=$oldIFS
+  fi
+}
+# end: func_convert_core_path_wine_to_w32
+
+
+# func_cygpath ARGS...
+# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when
+# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2)
+# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or
+# (2), returns the Cygwin file name or path in func_cygpath_result (input
+# file name or path is assumed to be in w32 format, as previously converted
+# from $build's *nix or MSYS format). In case (3), returns the w32 file name
+# or path in func_cygpath_result (input file name or path is assumed to be in
+# Cygwin format). Returns an empty string on error.
+#
+# ARGS are passed to cygpath, with the last one being the file name or path to
+# be converted.
+#
+# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH
+# environment variable; do not put it in $PATH.
+func_cygpath ()
+{
+  $opt_debug
+  if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then
+    func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null`
+    if test "$?" -ne 0; then
+      # on failure, ensure result is empty
+      func_cygpath_result=
+    fi
+  else
+    func_cygpath_result=
+    func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'"
+  fi
+}
+#end: func_cygpath
+
+
+# func_convert_core_msys_to_w32 ARG
+# Convert file name or path ARG from MSYS format to w32 format.  Return
+# result in func_convert_core_msys_to_w32_result.
+func_convert_core_msys_to_w32 ()
+{
+  $opt_debug
+  # awkward: cmd appends spaces to result
+  func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null |
+    $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"`
+}
+#end: func_convert_core_msys_to_w32
+
+
+# func_convert_file_check ARG1 ARG2
+# Verify that ARG1 (a file name in $build format) was converted to $host
+# format in ARG2. Otherwise, emit an error message, but continue (resetting
+# func_to_host_file_result to ARG1).
+func_convert_file_check ()
+{
+  $opt_debug
+  if test -z "$2" && test -n "$1" ; then
+    func_error "Could not determine host file name corresponding to"
+    func_error "  \`$1'"
+    func_error "Continuing, but uninstalled executables may not work."
+    # Fallback:
+    func_to_host_file_result="$1"
+  fi
+}
+# end func_convert_file_check
+
+
+# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH
+# Verify that FROM_PATH (a path in $build format) was converted to $host
+# format in TO_PATH. Otherwise, emit an error message, but continue, resetting
+# func_to_host_file_result to a simplistic fallback value (see below).
+func_convert_path_check ()
+{
+  $opt_debug
+  if test -z "$4" && test -n "$3"; then
+    func_error "Could not determine the host path corresponding to"
+    func_error "  \`$3'"
+    func_error "Continuing, but uninstalled executables may not work."
+    # Fallback.  This is a deliberately simplistic "conversion" and
+    # should not be "improved".  See libtool.info.
+    if test "x$1" != "x$2"; then
+      lt_replace_pathsep_chars="s|$1|$2|g"
+      func_to_host_path_result=`echo "$3" |
+        $SED -e "$lt_replace_pathsep_chars"`
+    else
+      func_to_host_path_result="$3"
+    fi
+  fi
+}
+# end func_convert_path_check
+
+
+# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG
+# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT
+# and appending REPL if ORIG matches BACKPAT.
+func_convert_path_front_back_pathsep ()
+{
+  $opt_debug
+  case $4 in
+  $1 ) func_to_host_path_result="$3$func_to_host_path_result"
+    ;;
+  esac
+  case $4 in
+  $2 ) func_append func_to_host_path_result "$3"
+    ;;
+  esac
+}
+# end func_convert_path_front_back_pathsep
+
+
+##################################################
+# $build to $host FILE NAME CONVERSION FUNCTIONS #
+##################################################
+# invoked via `$to_host_file_cmd ARG'
+#
+# In each case, ARG is the path to be converted from $build to $host format.
+# Result will be available in $func_to_host_file_result.
+
+
+# func_to_host_file ARG
+# Converts the file name ARG from $build format to $host format. Return result
+# in func_to_host_file_result.
+func_to_host_file ()
+{
+  $opt_debug
+  $to_host_file_cmd "$1"
+}
+# end func_to_host_file
+
+
+# func_to_tool_file ARG LAZY
+# converts the file name ARG from $build format to toolchain format. Return
+# result in func_to_tool_file_result.  If the conversion in use is listed
+# in (the comma separated) LAZY, no conversion takes place.
+func_to_tool_file ()
+{
+  $opt_debug
+  case ,$2, in
+    *,"$to_tool_file_cmd",*)
+      func_to_tool_file_result=$1
+      ;;
+    *)
+      $to_tool_file_cmd "$1"
+      func_to_tool_file_result=$func_to_host_file_result
+      ;;
+  esac
+}
+# end func_to_tool_file
+
+
+# func_convert_file_noop ARG
+# Copy ARG to func_to_host_file_result.
+func_convert_file_noop ()
+{
+  func_to_host_file_result="$1"
+}
+# end func_convert_file_noop
+
+
+# func_convert_file_msys_to_w32 ARG
+# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic
+# conversion to w32 is not available inside the cwrapper.  Returns result in
+# func_to_host_file_result.
+func_convert_file_msys_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_msys_to_w32 "$1"
+    func_to_host_file_result="$func_convert_core_msys_to_w32_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_msys_to_w32
+
+
+# func_convert_file_cygwin_to_w32 ARG
+# Convert file name ARG from Cygwin to w32 format.  Returns result in
+# func_to_host_file_result.
+func_convert_file_cygwin_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    # because $build is cygwin, we call "the" cygpath in $PATH; no need to use
+    # LT_CYGPATH in this case.
+    func_to_host_file_result=`cygpath -m "$1"`
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_cygwin_to_w32
+
+
+# func_convert_file_nix_to_w32 ARG
+# Convert file name ARG from *nix to w32 format.  Requires a wine environment
+# and a working winepath. Returns result in func_to_host_file_result.
+func_convert_file_nix_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_file_wine_to_w32 "$1"
+    func_to_host_file_result="$func_convert_core_file_wine_to_w32_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_nix_to_w32
+
+
+# func_convert_file_msys_to_cygwin ARG
+# Convert file name ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
+# Returns result in func_to_host_file_result.
+func_convert_file_msys_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_msys_to_w32 "$1"
+    func_cygpath -u "$func_convert_core_msys_to_w32_result"
+    func_to_host_file_result="$func_cygpath_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_msys_to_cygwin
+
+
+# func_convert_file_nix_to_cygwin ARG
+# Convert file name ARG from *nix to Cygwin format.  Requires Cygwin installed
+# in a wine environment, working winepath, and LT_CYGPATH set.  Returns result
+# in func_to_host_file_result.
+func_convert_file_nix_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    # convert from *nix to w32, then use cygpath to convert from w32 to cygwin.
+    func_convert_core_file_wine_to_w32 "$1"
+    func_cygpath -u "$func_convert_core_file_wine_to_w32_result"
+    func_to_host_file_result="$func_cygpath_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_nix_to_cygwin
+
+
+#############################################
+# $build to $host PATH CONVERSION FUNCTIONS #
+#############################################
+# invoked via `$to_host_path_cmd ARG'
+#
+# In each case, ARG is the path to be converted from $build to $host format.
+# The result will be available in $func_to_host_path_result.
+#
+# Path separators are also converted from $build format to $host format.  If
+# ARG begins or ends with a path separator character, it is preserved (but
+# converted to $host format) on output.
+#
+# All path conversion functions are named using the following convention:
+#   file name conversion function    : func_convert_file_X_to_Y ()
+#   path conversion function         : func_convert_path_X_to_Y ()
+# where, for any given $build/$host combination the 'X_to_Y' value is the
+# same.  If conversion functions are added for new $build/$host combinations,
+# the two new functions must follow this pattern, or func_init_to_host_path_cmd
+# will break.
+
+
+# func_init_to_host_path_cmd
+# Ensures that function "pointer" variable $to_host_path_cmd is set to the
+# appropriate value, based on the value of $to_host_file_cmd.
+to_host_path_cmd=
+func_init_to_host_path_cmd ()
+{
+  $opt_debug
+  if test -z "$to_host_path_cmd"; then
+    func_stripname 'func_convert_file_' '' "$to_host_file_cmd"
+    to_host_path_cmd="func_convert_path_${func_stripname_result}"
+  fi
+}
+
+
+# func_to_host_path ARG
+# Converts the path ARG from $build format to $host format. Return result
+# in func_to_host_path_result.
+func_to_host_path ()
+{
+  $opt_debug
+  func_init_to_host_path_cmd
+  $to_host_path_cmd "$1"
+}
+# end func_to_host_path
+
+
+# func_convert_path_noop ARG
+# Copy ARG to func_to_host_path_result.
+func_convert_path_noop ()
+{
+  func_to_host_path_result="$1"
+}
+# end func_convert_path_noop
+
+
+# func_convert_path_msys_to_w32 ARG
+# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic
+# conversion to w32 is not available inside the cwrapper.  Returns result in
+# func_to_host_path_result.
+func_convert_path_msys_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # Remove leading and trailing path separator characters from ARG.  MSYS
+    # behavior is inconsistent here; cygpath turns them into '.;' and ';.';
+    # and winepath ignores them completely.
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
+    func_to_host_path_result="$func_convert_core_msys_to_w32_result"
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_msys_to_w32
+
+
+# func_convert_path_cygwin_to_w32 ARG
+# Convert path ARG from Cygwin to w32 format.  Returns result in
+# func_to_host_file_result.
+func_convert_path_cygwin_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"`
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_cygwin_to_w32
+
+
+# func_convert_path_nix_to_w32 ARG
+# Convert path ARG from *nix to w32 format.  Requires a wine environment and
+# a working winepath.  Returns result in func_to_host_file_result.
+func_convert_path_nix_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
+    func_to_host_path_result="$func_convert_core_path_wine_to_w32_result"
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_nix_to_w32
+
+
+# func_convert_path_msys_to_cygwin ARG
+# Convert path ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
+# Returns result in func_to_host_file_result.
+func_convert_path_msys_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
+    func_cygpath -u -p "$func_convert_core_msys_to_w32_result"
+    func_to_host_path_result="$func_cygpath_result"
+    func_convert_path_check : : \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
+  fi
+}
+# end func_convert_path_msys_to_cygwin
+
+
+# func_convert_path_nix_to_cygwin ARG
+# Convert path ARG from *nix to Cygwin format.  Requires Cygwin installed in a
+# a wine environment, working winepath, and LT_CYGPATH set.  Returns result in
+# func_to_host_file_result.
+func_convert_path_nix_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # Remove leading and trailing path separator characters from
+    # ARG. msys behavior is inconsistent here, cygpath turns them
+    # into '.;' and ';.', and winepath ignores them completely.
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
+    func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result"
+    func_to_host_path_result="$func_cygpath_result"
+    func_convert_path_check : : \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
+  fi
+}
+# end func_convert_path_nix_to_cygwin
+
+
+# func_mode_compile arg...
+func_mode_compile ()
+{
+    $opt_debug
+    # Get the compilation command and the source file.
+    base_compile=
+    srcfile="$nonopt"  #  always keep a non-empty value in "srcfile"
+    suppress_opt=yes
+    suppress_output=
+    arg_mode=normal
+    libobj=
+    later=
+    pie_flag=
+
+    for arg
+    do
+      case $arg_mode in
+      arg  )
+	# do not "continue".  Instead, add this to base_compile
+	lastarg="$arg"
+	arg_mode=normal
+	;;
+
+      target )
+	libobj="$arg"
+	arg_mode=normal
+	continue
+	;;
+
+      normal )
+	# Accept any command-line options.
+	case $arg in
+	-o)
+	  test -n "$libobj" && \
+	    func_fatal_error "you cannot specify \`-o' more than once"
+	  arg_mode=target
+	  continue
+	  ;;
+
+	-pie | -fpie | -fPIE)
+          func_append pie_flag " $arg"
+	  continue
+	  ;;
+
+	-shared | -static | -prefer-pic | -prefer-non-pic)
+	  func_append later " $arg"
+	  continue
+	  ;;
+
+	-no-suppress)
+	  suppress_opt=no
+	  continue
+	  ;;
+
+	-Xcompiler)
+	  arg_mode=arg  #  the next one goes into the "base_compile" arg list
+	  continue      #  The current "srcfile" will either be retained or
+	  ;;            #  replaced later.  I would guess that would be a bug.
+
+	-Wc,*)
+	  func_stripname '-Wc,' '' "$arg"
+	  args=$func_stripname_result
+	  lastarg=
+	  save_ifs="$IFS"; IFS=','
+	  for arg in $args; do
+	    IFS="$save_ifs"
+	    func_append_quoted lastarg "$arg"
+	  done
+	  IFS="$save_ifs"
+	  func_stripname ' ' '' "$lastarg"
+	  lastarg=$func_stripname_result
+
+	  # Add the arguments to base_compile.
+	  func_append base_compile " $lastarg"
+	  continue
+	  ;;
+
+	*)
+	  # Accept the current argument as the source file.
+	  # The previous "srcfile" becomes the current argument.
+	  #
+	  lastarg="$srcfile"
+	  srcfile="$arg"
+	  ;;
+	esac  #  case $arg
+	;;
+      esac    #  case $arg_mode
+
+      # Aesthetically quote the previous argument.
+      func_append_quoted base_compile "$lastarg"
+    done # for arg
+
+    case $arg_mode in
+    arg)
+      func_fatal_error "you must specify an argument for -Xcompile"
+      ;;
+    target)
+      func_fatal_error "you must specify a target with \`-o'"
+      ;;
+    *)
+      # Get the name of the library object.
+      test -z "$libobj" && {
+	func_basename "$srcfile"
+	libobj="$func_basename_result"
+      }
+      ;;
+    esac
+
+    # Recognize several different file suffixes.
+    # If the user specifies -o file.o, it is replaced with file.lo
+    case $libobj in
+    *.[cCFSifmso] | \
+    *.ada | *.adb | *.ads | *.asm | \
+    *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \
+    *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup)
+      func_xform "$libobj"
+      libobj=$func_xform_result
+      ;;
+    esac
+
+    case $libobj in
+    *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;;
+    *)
+      func_fatal_error "cannot determine name of library object from \`$libobj'"
+      ;;
+    esac
+
+    func_infer_tag $base_compile
+
+    for arg in $later; do
+      case $arg in
+      -shared)
+	test "$build_libtool_libs" != yes && \
+	  func_fatal_configuration "can not build a shared library"
+	build_old_libs=no
+	continue
+	;;
+
+      -static)
+	build_libtool_libs=no
+	build_old_libs=yes
+	continue
+	;;
+
+      -prefer-pic)
+	pic_mode=yes
+	continue
+	;;
+
+      -prefer-non-pic)
+	pic_mode=no
+	continue
+	;;
+      esac
+    done
+
+    func_quote_for_eval "$libobj"
+    test "X$libobj" != "X$func_quote_for_eval_result" \
+      && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"'	 &()|`$[]' \
+      && func_warning "libobj name \`$libobj' may not contain shell special characters."
+    func_dirname_and_basename "$obj" "/" ""
+    objname="$func_basename_result"
+    xdir="$func_dirname_result"
+    lobj=${xdir}$objdir/$objname
+
+    test -z "$base_compile" && \
+      func_fatal_help "you must specify a compilation command"
+
+    # Delete any leftover library objects.
+    if test "$build_old_libs" = yes; then
+      removelist="$obj $lobj $libobj ${libobj}T"
+    else
+      removelist="$lobj $libobj ${libobj}T"
+    fi
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2* | cegcc*)
+      pic_mode=default
+      ;;
+    esac
+    if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+    else
+      output_obj=
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
+	func_echo "Waiting for $lockfile to be removed"
+	sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+	$ECHO "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$opt_dry_run || $RM $removelist
+	exit $EXIT_FAILURE
+      fi
+      func_append removelist " $output_obj"
+      $ECHO "$srcfile" > "$lockfile"
+    fi
+
+    $opt_dry_run || $RM $removelist
+    func_append removelist " $lockfile"
+    trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15
+
+    func_to_tool_file "$srcfile" func_convert_file_msys_to_w32
+    srcfile=$func_to_tool_file_result
+    func_quote_for_eval "$srcfile"
+    qsrcfile=$func_quote_for_eval_result
+
+    # Only build a PIC object if we are building libtool libraries.
+    if test "$build_libtool_libs" = yes; then
+      # Without this assignment, base_compile gets emptied.
+      fbsd_hideous_sh_bug=$base_compile
+
+      if test "$pic_mode" != no; then
+	command="$base_compile $qsrcfile $pic_flag"
+      else
+	# Don't build PIC code
+	command="$base_compile $qsrcfile"
+      fi
+
+      func_mkdir_p "$xdir$objdir"
+
+      if test -z "$output_obj"; then
+	# Place PIC objects in $objdir
+	func_append command " -o $lobj"
+      fi
+
+      func_show_eval_locale "$command"	\
+          'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE'
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$ECHO "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$opt_dry_run || $RM $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
+	func_show_eval '$MV "$output_obj" "$lobj"' \
+	  'error=$?; $opt_dry_run || $RM $removelist; exit $error'
+      fi
+
+      # Allow error messages only from the first compilation.
+      if test "$suppress_opt" = yes; then
+	suppress_output=' >/dev/null 2>&1'
+      fi
+    fi
+
+    # Only build a position-dependent object if we build old libraries.
+    if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+	# Don't build PIC code
+	command="$base_compile $qsrcfile$pie_flag"
+      else
+	command="$base_compile $qsrcfile $pic_flag"
+      fi
+      if test "$compiler_c_o" = yes; then
+	func_append command " -o $obj"
+      fi
+
+      # Suppress compiler output if we already did a PIC compilation.
+      func_append command "$suppress_output"
+      func_show_eval_locale "$command" \
+        '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE'
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$ECHO "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$opt_dry_run || $RM $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed
+      if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
+	func_show_eval '$MV "$output_obj" "$obj"' \
+	  'error=$?; $opt_dry_run || $RM $removelist; exit $error'
+      fi
+    fi
+
+    $opt_dry_run || {
+      func_write_libtool_object "$libobj" "$objdir/$objname" "$objname"
+
+      # Unlock the critical section if it was locked
+      if test "$need_locks" != no; then
+	removelist=$lockfile
+        $RM "$lockfile"
+      fi
+    }
+
+    exit $EXIT_SUCCESS
+}
+
+$opt_help || {
+  test "$opt_mode" = compile && func_mode_compile ${1+"$@"}
+}
+
+func_mode_help ()
+{
+    # We need to display help for each of the modes.
+    case $opt_mode in
+      "")
+        # Generic help is extracted from the usage comments
+        # at the start of this file.
+        func_help
+        ;;
+
+      clean)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+        ;;
+
+      compile)
+      $ECHO \
+"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -no-suppress      do not suppress compiler output for multiple passes
+  -prefer-pic       try to build PIC objects only
+  -prefer-non-pic   try to build non-PIC objects only
+  -shared           do not build a \`.o' file suitable for static linking
+  -static           only build a \`.o' file suitable for static linking
+  -Wc,FLAG          pass FLAG directly to the compiler
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+        ;;
+
+      execute)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+  -dlopen FILE      add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+        ;;
+
+      finish)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges.  Use
+the \`--dry-run' option if you just want to see what would be executed."
+        ;;
+
+      install)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command.  The first component should be
+either the \`install' or \`cp' program.
+
+The following components of INSTALL-COMMAND are treated specially:
+
+  -inst-prefix-dir PREFIX-DIR  Use PREFIX-DIR as a staging area for installation
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+        ;;
+
+      link)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+  -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
+  -bindir BINDIR    specify path to binaries directory (for systems where
+                    libraries must be found in the PATH setting at runtime)
+  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
+  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+                    try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+                    try to export only the symbols matching REGEX
+  -LLIBDIR          search LIBDIR for required installed libraries
+  -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
+  -no-undefined     declare that a library does not refer to external symbols
+  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
+  -objectlist FILE  Use a list of object files found in FILE to specify objects
+  -precious-files-regex REGEX
+                    don't remove output files matching REGEX
+  -release RELEASE  specify package release information
+  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
+  -shared           only do dynamic linking of libtool libraries
+  -shrext SUFFIX    override the standard shared library file extension
+  -static           do not do any dynamic linking of uninstalled libtool libraries
+  -static-libtool-libs
+                    do not do any dynamic linking of libtool libraries
+  -version-info CURRENT[:REVISION[:AGE]]
+                    specify library version info [each variable defaults to 0]
+  -weak LIBNAME     declare that the target provides the LIBNAME interface
+  -Wc,FLAG
+  -Xcompiler FLAG   pass linker-specific FLAG directly to the compiler
+  -Wl,FLAG
+  -Xlinker FLAG     pass linker-specific FLAG directly to the linker
+  -XCClinker FLAG   pass link-specific FLAG to the compiler driver (CC)
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename.  Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+        ;;
+
+      uninstall)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+        ;;
+
+      *)
+        func_fatal_help "invalid operation mode \`$opt_mode'"
+        ;;
+    esac
+
+    echo
+    $ECHO "Try \`$progname --help' for more information about other modes."
+}
+
+# Now that we've collected a possible --mode arg, show help if necessary
+if $opt_help; then
+  if test "$opt_help" = :; then
+    func_mode_help
+  else
+    {
+      func_help noexit
+      for opt_mode in compile link execute install finish uninstall clean; do
+	func_mode_help
+      done
+    } | sed -n '1p; 2,$s/^Usage:/  or: /p'
+    {
+      func_help noexit
+      for opt_mode in compile link execute install finish uninstall clean; do
+	echo
+	func_mode_help
+      done
+    } |
+    sed '1d
+      /^When reporting/,/^Report/{
+	H
+	d
+      }
+      $x
+      /information about other modes/d
+      /more detailed .*MODE/d
+      s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/'
+  fi
+  exit $?
+fi
+
+
+# func_mode_execute arg...
+func_mode_execute ()
+{
+    $opt_debug
+    # The first argument is the command name.
+    cmd="$nonopt"
+    test -z "$cmd" && \
+      func_fatal_help "you must specify a COMMAND"
+
+    # Handle -dlopen flags immediately.
+    for file in $opt_dlopen; do
+      test -f "$file" \
+	|| func_fatal_help "\`$file' is not a file"
+
+      dir=
+      case $file in
+      *.la)
+	func_resolve_sysroot "$file"
+	file=$func_resolve_sysroot_result
+
+	# Check to see that this really is a libtool archive.
+	func_lalib_unsafe_p "$file" \
+	  || func_fatal_help "\`$lib' is not a valid libtool archive"
+
+	# Read the libtool library.
+	dlname=
+	library_names=
+	func_source "$file"
+
+	# Skip this library if it cannot be dlopened.
+	if test -z "$dlname"; then
+	  # Warn if it was a shared library.
+	  test -n "$library_names" && \
+	    func_warning "\`$file' was not linked with \`-export-dynamic'"
+	  continue
+	fi
+
+	func_dirname "$file" "" "."
+	dir="$func_dirname_result"
+
+	if test -f "$dir/$objdir/$dlname"; then
+	  func_append dir "/$objdir"
+	else
+	  if test ! -f "$dir/$dlname"; then
+	    func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'"
+	  fi
+	fi
+	;;
+
+      *.lo)
+	# Just add the directory containing the .lo file.
+	func_dirname "$file" "" "."
+	dir="$func_dirname_result"
+	;;
+
+      *)
+	func_warning "\`-dlopen' is ignored for non-libtool libraries and objects"
+	continue
+	;;
+      esac
+
+      # Get the absolute pathname.
+      absdir=`cd "$dir" && pwd`
+      test -n "$absdir" && dir="$absdir"
+
+      # Now add the directory to shlibpath_var.
+      if eval "test -z \"\$$shlibpath_var\""; then
+	eval "$shlibpath_var=\"\$dir\""
+      else
+	eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+      fi
+    done
+
+    # This variable tells wrapper scripts just to set shlibpath_var
+    # rather than running their programs.
+    libtool_execute_magic="$magic"
+
+    # Check if any of the arguments is a wrapper script.
+    args=
+    for file
+    do
+      case $file in
+      -* | *.la | *.lo ) ;;
+      *)
+	# Do a test to see if this is really a libtool program.
+	if func_ltwrapper_script_p "$file"; then
+	  func_source "$file"
+	  # Transform arg to wrapped name.
+	  file="$progdir/$program"
+	elif func_ltwrapper_executable_p "$file"; then
+	  func_ltwrapper_scriptname "$file"
+	  func_source "$func_ltwrapper_scriptname_result"
+	  # Transform arg to wrapped name.
+	  file="$progdir/$program"
+	fi
+	;;
+      esac
+      # Quote arguments (to preserve shell metacharacters).
+      func_append_quoted args "$file"
+    done
+
+    if test "X$opt_dry_run" = Xfalse; then
+      if test -n "$shlibpath_var"; then
+	# Export the shlibpath_var.
+	eval "export $shlibpath_var"
+      fi
+
+      # Restore saved environment variables
+      for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
+      do
+	eval "if test \"\${save_$lt_var+set}\" = set; then
+                $lt_var=\$save_$lt_var; export $lt_var
+	      else
+		$lt_unset $lt_var
+	      fi"
+      done
+
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
+    else
+      # Display what would be done.
+      if test -n "$shlibpath_var"; then
+	eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\""
+	echo "export $shlibpath_var"
+      fi
+      $ECHO "$cmd$args"
+      exit $EXIT_SUCCESS
+    fi
+}
+
+test "$opt_mode" = execute && func_mode_execute ${1+"$@"}
+
+
+# func_mode_finish arg...
+func_mode_finish ()
+{
+    $opt_debug
+    libs=
+    libdirs=
+    admincmds=
+
+    for opt in "$nonopt" ${1+"$@"}
+    do
+      if test -d "$opt"; then
+	func_append libdirs " $opt"
+
+      elif test -f "$opt"; then
+	if func_lalib_unsafe_p "$opt"; then
+	  func_append libs " $opt"
+	else
+	  func_warning "\`$opt' is not a valid libtool archive"
+	fi
+
+      else
+	func_fatal_error "invalid argument \`$opt'"
+      fi
+    done
+
+    if test -n "$libs"; then
+      if test -n "$lt_sysroot"; then
+        sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"`
+        sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;"
+      else
+        sysroot_cmd=
+      fi
+
+      # Remove sysroot references
+      if $opt_dry_run; then
+        for lib in $libs; do
+          echo "removing references to $lt_sysroot and \`=' prefixes from $lib"
+        done
+      else
+        tmpdir=`func_mktempdir`
+        for lib in $libs; do
+	  sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \
+	    > $tmpdir/tmp-la
+	  mv -f $tmpdir/tmp-la $lib
+	done
+        ${RM}r "$tmpdir"
+      fi
+    fi
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      for libdir in $libdirs; do
+	if test -n "$finish_cmds"; then
+	  # Do each command in the finish commands.
+	  func_execute_cmds "$finish_cmds" 'admincmds="$admincmds
+'"$cmd"'"'
+	fi
+	if test -n "$finish_eval"; then
+	  # Do the single finish_eval.
+	  eval cmds=\"$finish_eval\"
+	  $opt_dry_run || eval "$cmds" || func_append admincmds "
+       $cmds"
+	fi
+      done
+    fi
+
+    # Exit here if they wanted silent mode.
+    $opt_silent && exit $EXIT_SUCCESS
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      echo "----------------------------------------------------------------------"
+      echo "Libraries have been installed in:"
+      for libdir in $libdirs; do
+	$ECHO "   $libdir"
+      done
+      echo
+      echo "If you ever happen to want to link against installed libraries"
+      echo "in a given directory, LIBDIR, you must either use libtool, and"
+      echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+      echo "flag during linking and do at least one of the following:"
+      if test -n "$shlibpath_var"; then
+	echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
+	echo "     during execution"
+      fi
+      if test -n "$runpath_var"; then
+	echo "   - add LIBDIR to the \`$runpath_var' environment variable"
+	echo "     during linking"
+      fi
+      if test -n "$hardcode_libdir_flag_spec"; then
+	libdir=LIBDIR
+	eval flag=\"$hardcode_libdir_flag_spec\"
+
+	$ECHO "   - use the \`$flag' linker flag"
+      fi
+      if test -n "$admincmds"; then
+	$ECHO "   - have your system administrator run these commands:$admincmds"
+      fi
+      if test -f /etc/ld.so.conf; then
+	echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+      fi
+      echo
+
+      echo "See any operating system documentation about shared libraries for"
+      case $host in
+	solaris2.[6789]|solaris2.1[0-9])
+	  echo "more information, such as the ld(1), crle(1) and ld.so(8) manual"
+	  echo "pages."
+	  ;;
+	*)
+	  echo "more information, such as the ld(1) and ld.so(8) manual pages."
+	  ;;
+      esac
+      echo "----------------------------------------------------------------------"
+    fi
+    exit $EXIT_SUCCESS
+}
+
+test "$opt_mode" = finish && func_mode_finish ${1+"$@"}
+
+
+# func_mode_install arg...
+func_mode_install ()
+{
+    $opt_debug
+    # There may be an optional sh(1) argument at the beginning of
+    # install_prog (especially on Windows NT).
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       case $nonopt in *shtool*) :;; *) false;; esac; then
+      # Aesthetically quote it.
+      func_quote_for_eval "$nonopt"
+      install_prog="$func_quote_for_eval_result "
+      arg=$1
+      shift
+    else
+      install_prog=
+      arg=$nonopt
+    fi
+
+    # The real first argument should be the name of the installation program.
+    # Aesthetically quote it.
+    func_quote_for_eval "$arg"
+    func_append install_prog "$func_quote_for_eval_result"
+    install_shared_prog=$install_prog
+    case " $install_prog " in
+      *[\\\ /]cp\ *) install_cp=: ;;
+      *) install_cp=false ;;
+    esac
+
+    # We need to accept at least all the BSD install flags.
+    dest=
+    files=
+    opts=
+    prev=
+    install_type=
+    isdir=no
+    stripme=
+    no_mode=:
+    for arg
+    do
+      arg2=
+      if test -n "$dest"; then
+	func_append files " $dest"
+	dest=$arg
+	continue
+      fi
+
+      case $arg in
+      -d) isdir=yes ;;
+      -f)
+	if $install_cp; then :; else
+	  prev=$arg
+	fi
+	;;
+      -g | -m | -o)
+	prev=$arg
+	;;
+      -s)
+	stripme=" -s"
+	continue
+	;;
+      -*)
+	;;
+      *)
+	# If the previous option needed an argument, then skip it.
+	if test -n "$prev"; then
+	  if test "x$prev" = x-m && test -n "$install_override_mode"; then
+	    arg2=$install_override_mode
+	    no_mode=false
+	  fi
+	  prev=
+	else
+	  dest=$arg
+	  continue
+	fi
+	;;
+      esac
+
+      # Aesthetically quote the argument.
+      func_quote_for_eval "$arg"
+      func_append install_prog " $func_quote_for_eval_result"
+      if test -n "$arg2"; then
+	func_quote_for_eval "$arg2"
+      fi
+      func_append install_shared_prog " $func_quote_for_eval_result"
+    done
+
+    test -z "$install_prog" && \
+      func_fatal_help "you must specify an install program"
+
+    test -n "$prev" && \
+      func_fatal_help "the \`$prev' option requires an argument"
+
+    if test -n "$install_override_mode" && $no_mode; then
+      if $install_cp; then :; else
+	func_quote_for_eval "$install_override_mode"
+	func_append install_shared_prog " -m $func_quote_for_eval_result"
+      fi
+    fi
+
+    if test -z "$files"; then
+      if test -z "$dest"; then
+	func_fatal_help "no file or destination specified"
+      else
+	func_fatal_help "you must specify a destination"
+      fi
+    fi
+
+    # Strip any trailing slash from the destination.
+    func_stripname '' '/' "$dest"
+    dest=$func_stripname_result
+
+    # Check to see that the destination is a directory.
+    test -d "$dest" && isdir=yes
+    if test "$isdir" = yes; then
+      destdir="$dest"
+      destname=
+    else
+      func_dirname_and_basename "$dest" "" "."
+      destdir="$func_dirname_result"
+      destname="$func_basename_result"
+
+      # Not a directory, so check to see that there is only one file specified.
+      set dummy $files; shift
+      test "$#" -gt 1 && \
+	func_fatal_help "\`$dest' is not a directory"
+    fi
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
+    *)
+      for file in $files; do
+	case $file in
+	*.lo) ;;
+	*)
+	  func_fatal_help "\`$destdir' must be an absolute directory name"
+	  ;;
+	esac
+      done
+      ;;
+    esac
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    staticlibs=
+    future_libdirs=
+    current_libdirs=
+    for file in $files; do
+
+      # Do each installation.
+      case $file in
+      *.$libext)
+	# Do the static libraries later.
+	func_append staticlibs " $file"
+	;;
+
+      *.la)
+	func_resolve_sysroot "$file"
+	file=$func_resolve_sysroot_result
+
+	# Check to see that this really is a libtool archive.
+	func_lalib_unsafe_p "$file" \
+	  || func_fatal_help "\`$file' is not a valid libtool archive"
+
+	library_names=
+	old_library=
+	relink_command=
+	func_source "$file"
+
+	# Add the libdir to current_libdirs if it is the destination.
+	if test "X$destdir" = "X$libdir"; then
+	  case "$current_libdirs " in
+	  *" $libdir "*) ;;
+	  *) func_append current_libdirs " $libdir" ;;
+	  esac
+	else
+	  # Note the libdir as a future libdir.
+	  case "$future_libdirs " in
+	  *" $libdir "*) ;;
+	  *) func_append future_libdirs " $libdir" ;;
+	  esac
+	fi
+
+	func_dirname "$file" "/" ""
+	dir="$func_dirname_result"
+	func_append dir "$objdir"
+
+	if test -n "$relink_command"; then
+	  # Determine the prefix the user has applied to our future dir.
+	  inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"`
+
+	  # Don't allow the user to place us outside of our expected
+	  # location b/c this prevents finding dependent libraries that
+	  # are installed to the same prefix.
+	  # At present, this check doesn't affect windows .dll's that
+	  # are installed into $libdir/../bin (currently, that works fine)
+	  # but it's something to keep an eye on.
+	  test "$inst_prefix_dir" = "$destdir" && \
+	    func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir"
+
+	  if test -n "$inst_prefix_dir"; then
+	    # Stick the inst_prefix_dir data into the link command.
+	    relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
+	  else
+	    relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"`
+	  fi
+
+	  func_warning "relinking \`$file'"
+	  func_show_eval "$relink_command" \
+	    'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"'
+	fi
+
+	# See the names of the shared library.
+	set dummy $library_names; shift
+	if test -n "$1"; then
+	  realname="$1"
+	  shift
+
+	  srcname="$realname"
+	  test -n "$relink_command" && srcname="$realname"T
+
+	  # Install the shared library and build the symlinks.
+	  func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \
+	      'exit $?'
+	  tstripme="$stripme"
+	  case $host_os in
+	  cygwin* | mingw* | pw32* | cegcc*)
+	    case $realname in
+	    *.dll.a)
+	      tstripme=""
+	      ;;
+	    esac
+	    ;;
+	  esac
+	  if test -n "$tstripme" && test -n "$striplib"; then
+	    func_show_eval "$striplib $destdir/$realname" 'exit $?'
+	  fi
+
+	  if test "$#" -gt 0; then
+	    # Delete the old symlinks, and create new ones.
+	    # Try `ln -sf' first, because the `ln' binary might depend on
+	    # the symlink we replace!  Solaris /bin/ln does not understand -f,
+	    # so we also need to try rm && ln -s.
+	    for linkname
+	    do
+	      test "$linkname" != "$realname" \
+		&& func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })"
+	    done
+	  fi
+
+	  # Do each command in the postinstall commands.
+	  lib="$destdir/$realname"
+	  func_execute_cmds "$postinstall_cmds" 'exit $?'
+	fi
+
+	# Install the pseudo-library for information purposes.
+	func_basename "$file"
+	name="$func_basename_result"
+	instname="$dir/$name"i
+	func_show_eval "$install_prog $instname $destdir/$name" 'exit $?'
+
+	# Maybe install the static library, too.
+	test -n "$old_library" && func_append staticlibs " $dir/$old_library"
+	;;
+
+      *.lo)
+	# Install (i.e. copy) a libtool object.
+
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  func_basename "$file"
+	  destfile="$func_basename_result"
+	  destfile="$destdir/$destfile"
+	fi
+
+	# Deduce the name of the destination old-style object file.
+	case $destfile in
+	*.lo)
+	  func_lo2o "$destfile"
+	  staticdest=$func_lo2o_result
+	  ;;
+	*.$objext)
+	  staticdest="$destfile"
+	  destfile=
+	  ;;
+	*)
+	  func_fatal_help "cannot copy a libtool object to \`$destfile'"
+	  ;;
+	esac
+
+	# Install the libtool object if requested.
+	test -n "$destfile" && \
+	  func_show_eval "$install_prog $file $destfile" 'exit $?'
+
+	# Install the old object if enabled.
+	if test "$build_old_libs" = yes; then
+	  # Deduce the name of the old-style object file.
+	  func_lo2o "$file"
+	  staticobj=$func_lo2o_result
+	  func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?'
+	fi
+	exit $EXIT_SUCCESS
+	;;
+
+      *)
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  func_basename "$file"
+	  destfile="$func_basename_result"
+	  destfile="$destdir/$destfile"
+	fi
+
+	# If the file is missing, and there is a .exe on the end, strip it
+	# because it is most likely a libtool script we actually want to
+	# install
+	stripped_ext=""
+	case $file in
+	  *.exe)
+	    if test ! -f "$file"; then
+	      func_stripname '' '.exe' "$file"
+	      file=$func_stripname_result
+	      stripped_ext=".exe"
+	    fi
+	    ;;
+	esac
+
+	# Do a test to see if this is really a libtool program.
+	case $host in
+	*cygwin* | *mingw*)
+	    if func_ltwrapper_executable_p "$file"; then
+	      func_ltwrapper_scriptname "$file"
+	      wrapper=$func_ltwrapper_scriptname_result
+	    else
+	      func_stripname '' '.exe' "$file"
+	      wrapper=$func_stripname_result
+	    fi
+	    ;;
+	*)
+	    wrapper=$file
+	    ;;
+	esac
+	if func_ltwrapper_script_p "$wrapper"; then
+	  notinst_deplibs=
+	  relink_command=
+
+	  func_source "$wrapper"
+
+	  # Check the variables that should have been set.
+	  test -z "$generated_by_libtool_version" && \
+	    func_fatal_error "invalid libtool wrapper script \`$wrapper'"
+
+	  finalize=yes
+	  for lib in $notinst_deplibs; do
+	    # Check to see that each library is installed.
+	    libdir=
+	    if test -f "$lib"; then
+	      func_source "$lib"
+	    fi
+	    libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test
+	    if test -n "$libdir" && test ! -f "$libfile"; then
+	      func_warning "\`$lib' has not been installed in \`$libdir'"
+	      finalize=no
+	    fi
+	  done
+
+	  relink_command=
+	  func_source "$wrapper"
+
+	  outputname=
+	  if test "$fast_install" = no && test -n "$relink_command"; then
+	    $opt_dry_run || {
+	      if test "$finalize" = yes; then
+	        tmpdir=`func_mktempdir`
+		func_basename "$file$stripped_ext"
+		file="$func_basename_result"
+	        outputname="$tmpdir/$file"
+	        # Replace the output file specification.
+	        relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'`
+
+	        $opt_silent || {
+	          func_quote_for_expand "$relink_command"
+		  eval "func_echo $func_quote_for_expand_result"
+	        }
+	        if eval "$relink_command"; then :
+	          else
+		  func_error "error: relink \`$file' with the above command before installing it"
+		  $opt_dry_run || ${RM}r "$tmpdir"
+		  continue
+	        fi
+	        file="$outputname"
+	      else
+	        func_warning "cannot relink \`$file'"
+	      fi
+	    }
+	  else
+	    # Install the binary that we compiled earlier.
+	    file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"`
+	  fi
+	fi
+
+	# remove .exe since cygwin /usr/bin/install will append another
+	# one anyway
+	case $install_prog,$host in
+	*/usr/bin/install*,*cygwin*)
+	  case $file:$destfile in
+	  *.exe:*.exe)
+	    # this is ok
+	    ;;
+	  *.exe:*)
+	    destfile=$destfile.exe
+	    ;;
+	  *:*.exe)
+	    func_stripname '' '.exe' "$destfile"
+	    destfile=$func_stripname_result
+	    ;;
+	  esac
+	  ;;
+	esac
+	func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?'
+	$opt_dry_run || if test -n "$outputname"; then
+	  ${RM}r "$tmpdir"
+	fi
+	;;
+      esac
+    done
+
+    for file in $staticlibs; do
+      func_basename "$file"
+      name="$func_basename_result"
+
+      # Set up the ranlib parameters.
+      oldlib="$destdir/$name"
+      func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+      tool_oldlib=$func_to_tool_file_result
+
+      func_show_eval "$install_prog \$file \$oldlib" 'exit $?'
+
+      if test -n "$stripme" && test -n "$old_striplib"; then
+	func_show_eval "$old_striplib $tool_oldlib" 'exit $?'
+      fi
+
+      # Do each command in the postinstall commands.
+      func_execute_cmds "$old_postinstall_cmds" 'exit $?'
+    done
+
+    test -n "$future_libdirs" && \
+      func_warning "remember to run \`$progname --finish$future_libdirs'"
+
+    if test -n "$current_libdirs"; then
+      # Maybe just do a dry run.
+      $opt_dry_run && current_libdirs=" -n$current_libdirs"
+      exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
+    else
+      exit $EXIT_SUCCESS
+    fi
+}
+
+test "$opt_mode" = install && func_mode_install ${1+"$@"}
+
+
+# func_generate_dlsyms outputname originator pic_p
+# Extract symbols from dlprefiles and create ${outputname}S.o with
+# a dlpreopen symbol table.
+func_generate_dlsyms ()
+{
+    $opt_debug
+    my_outputname="$1"
+    my_originator="$2"
+    my_pic_p="${3-no}"
+    my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'`
+    my_dlsyms=
+
+    if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+      if test -n "$NM" && test -n "$global_symbol_pipe"; then
+	my_dlsyms="${my_outputname}S.c"
+      else
+	func_error "not configured to extract global symbols from dlpreopened files"
+      fi
+    fi
+
+    if test -n "$my_dlsyms"; then
+      case $my_dlsyms in
+      "") ;;
+      *.c)
+	# Discover the nlist of each of the dlfiles.
+	nlist="$output_objdir/${my_outputname}.nm"
+
+	func_show_eval "$RM $nlist ${nlist}S ${nlist}T"
+
+	# Parse the name list into a source file.
+	func_verbose "creating $output_objdir/$my_dlsyms"
+
+	$opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\
+/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */
+/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4))
+#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
+#endif
+
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT_DLSYM_CONST
+#else
+# define LT_DLSYM_CONST const
+#endif
+
+/* External symbol declarations for the compiler. */\
+"
+
+	if test "$dlself" = yes; then
+	  func_verbose "generating symbol list for \`$output'"
+
+	  $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist"
+
+	  # Add our own program objects to the symbol list.
+	  progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP`
+	  for progfile in $progfiles; do
+	    func_to_tool_file "$progfile" func_convert_file_msys_to_w32
+	    func_verbose "extracting global C symbols from \`$func_to_tool_file_result'"
+	    $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'"
+	  done
+
+	  if test -n "$exclude_expsyms"; then
+	    $opt_dry_run || {
+	      eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+	      eval '$MV "$nlist"T "$nlist"'
+	    }
+	  fi
+
+	  if test -n "$export_symbols_regex"; then
+	    $opt_dry_run || {
+	      eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+	      eval '$MV "$nlist"T "$nlist"'
+	    }
+	  fi
+
+	  # Prepare the list of exported symbols
+	  if test -z "$export_symbols"; then
+	    export_symbols="$output_objdir/$outputname.exp"
+	    $opt_dry_run || {
+	      $RM $export_symbols
+	      eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+	      case $host in
+	      *cygwin* | *mingw* | *cegcc* )
+                eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+                eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
+	        ;;
+	      esac
+	    }
+	  else
+	    $opt_dry_run || {
+	      eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
+	      eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
+	      eval '$MV "$nlist"T "$nlist"'
+	      case $host in
+	        *cygwin* | *mingw* | *cegcc* )
+	          eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+	          eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
+	          ;;
+	      esac
+	    }
+	  fi
+	fi
+
+	for dlprefile in $dlprefiles; do
+	  func_verbose "extracting global C symbols from \`$dlprefile'"
+	  func_basename "$dlprefile"
+	  name="$func_basename_result"
+          case $host in
+	    *cygwin* | *mingw* | *cegcc* )
+	      # if an import library, we need to obtain dlname
+	      if func_win32_import_lib_p "$dlprefile"; then
+	        func_tr_sh "$dlprefile"
+	        eval "curr_lafile=\$libfile_$func_tr_sh_result"
+	        dlprefile_dlbasename=""
+	        if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then
+	          # Use subshell, to avoid clobbering current variable values
+	          dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"`
+	          if test -n "$dlprefile_dlname" ; then
+	            func_basename "$dlprefile_dlname"
+	            dlprefile_dlbasename="$func_basename_result"
+	          else
+	            # no lafile. user explicitly requested -dlpreopen <import library>.
+	            $sharedlib_from_linklib_cmd "$dlprefile"
+	            dlprefile_dlbasename=$sharedlib_from_linklib_result
+	          fi
+	        fi
+	        $opt_dry_run || {
+	          if test -n "$dlprefile_dlbasename" ; then
+	            eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"'
+	          else
+	            func_warning "Could not compute DLL name from $name"
+	            eval '$ECHO ": $name " >> "$nlist"'
+	          fi
+	          func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+	          eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe |
+	            $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'"
+	        }
+	      else # not an import lib
+	        $opt_dry_run || {
+	          eval '$ECHO ": $name " >> "$nlist"'
+	          func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+	          eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
+	        }
+	      fi
+	    ;;
+	    *)
+	      $opt_dry_run || {
+	        eval '$ECHO ": $name " >> "$nlist"'
+	        func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+	        eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
+	      }
+	    ;;
+          esac
+	done
+
+	$opt_dry_run || {
+	  # Make sure we have at least an empty file.
+	  test -f "$nlist" || : > "$nlist"
+
+	  if test -n "$exclude_expsyms"; then
+	    $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+	    $MV "$nlist"T "$nlist"
+	  fi
+
+	  # Try sorting and uniquifying the output.
+	  if $GREP -v "^: " < "$nlist" |
+	      if sort -k 3 </dev/null >/dev/null 2>&1; then
+		sort -k 3
+	      else
+		sort +2
+	      fi |
+	      uniq > "$nlist"S; then
+	    :
+	  else
+	    $GREP -v "^: " < "$nlist" > "$nlist"S
+	  fi
+
+	  if test -f "$nlist"S; then
+	    eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"'
+	  else
+	    echo '/* NONE */' >> "$output_objdir/$my_dlsyms"
+	  fi
+
+	  echo >> "$output_objdir/$my_dlsyms" "\
+
+/* The mapping between symbol names and symbols.  */
+typedef struct {
+  const char *name;
+  void *address;
+} lt_dlsymlist;
+extern LT_DLSYM_CONST lt_dlsymlist
+lt_${my_prefix}_LTX_preloaded_symbols[];
+LT_DLSYM_CONST lt_dlsymlist
+lt_${my_prefix}_LTX_preloaded_symbols[] =
+{\
+  { \"$my_originator\", (void *) 0 },"
+
+	  case $need_lib_prefix in
+	  no)
+	    eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms"
+	    ;;
+	  *)
+	    eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms"
+	    ;;
+	  esac
+	  echo >> "$output_objdir/$my_dlsyms" "\
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_${my_prefix}_LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+	} # !$opt_dry_run
+
+	pic_flag_for_symtable=
+	case "$compile_command " in
+	*" -static "*) ;;
+	*)
+	  case $host in
+	  # compiling the symbol table file with pic_flag works around
+	  # a FreeBSD bug that causes programs to crash when -lm is
+	  # linked before any other PIC object.  But we must not use
+	  # pic_flag when linking with -static.  The problem exists in
+	  # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+	  *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+	    pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;;
+	  *-*-hpux*)
+	    pic_flag_for_symtable=" $pic_flag"  ;;
+	  *)
+	    if test "X$my_pic_p" != Xno; then
+	      pic_flag_for_symtable=" $pic_flag"
+	    fi
+	    ;;
+	  esac
+	  ;;
+	esac
+	symtab_cflags=
+	for arg in $LTCFLAGS; do
+	  case $arg in
+	  -pie | -fpie | -fPIE) ;;
+	  *) func_append symtab_cflags " $arg" ;;
+	  esac
+	done
+
+	# Now compile the dynamic symbol file.
+	func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?'
+
+	# Clean up the generated files.
+	func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"'
+
+	# Transform the symbol file into the correct name.
+	symfileobj="$output_objdir/${my_outputname}S.$objext"
+	case $host in
+	*cygwin* | *mingw* | *cegcc* )
+	  if test -f "$output_objdir/$my_outputname.def"; then
+	    compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
+	    finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
+	  else
+	    compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+	    finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+	  fi
+	  ;;
+	*)
+	  compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+	  finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+	  ;;
+	esac
+	;;
+      *)
+	func_fatal_error "unknown suffix for \`$my_dlsyms'"
+	;;
+      esac
+    else
+      # We keep going just in case the user didn't refer to
+      # lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+      # really was required.
+
+      # Nullify the symbol file.
+      compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"`
+      finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"`
+    fi
+}
+
+# func_win32_libid arg
+# return the library type of file 'arg'
+#
+# Need a lot of goo to handle *both* DLLs and import libs
+# Has to be a shell function in order to 'eat' the argument
+# that is supplied when $file_magic_command is called.
+# Despite the name, also deal with 64 bit binaries.
+func_win32_libid ()
+{
+  $opt_debug
+  win32_libid_type="unknown"
+  win32_fileres=`file -L $1 2>/dev/null`
+  case $win32_fileres in
+  *ar\ archive\ import\ library*) # definitely import
+    win32_libid_type="x86 archive import"
+    ;;
+  *ar\ archive*) # could be an import, or static
+    # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD.
+    if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null |
+       $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then
+      func_to_tool_file "$1" func_convert_file_msys_to_w32
+      win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" |
+	$SED -n -e '
+	    1,100{
+		/ I /{
+		    s,.*,import,
+		    p
+		    q
+		}
+	    }'`
+      case $win32_nmres in
+      import*)  win32_libid_type="x86 archive import";;
+      *)        win32_libid_type="x86 archive static";;
+      esac
+    fi
+    ;;
+  *DLL*)
+    win32_libid_type="x86 DLL"
+    ;;
+  *executable*) # but shell scripts are "executable" too...
+    case $win32_fileres in
+    *MS\ Windows\ PE\ Intel*)
+      win32_libid_type="x86 DLL"
+      ;;
+    esac
+    ;;
+  esac
+  $ECHO "$win32_libid_type"
+}
+
+# func_cygming_dll_for_implib ARG
+#
+# Platform-specific function to extract the
+# name of the DLL associated with the specified
+# import library ARG.
+# Invoked by eval'ing the libtool variable
+#    $sharedlib_from_linklib_cmd
+# Result is available in the variable
+#    $sharedlib_from_linklib_result
+func_cygming_dll_for_implib ()
+{
+  $opt_debug
+  sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"`
+}
+
+# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs
+#
+# The is the core of a fallback implementation of a
+# platform-specific function to extract the name of the
+# DLL associated with the specified import library LIBNAME.
+#
+# SECTION_NAME is either .idata$6 or .idata$7, depending
+# on the platform and compiler that created the implib.
+#
+# Echos the name of the DLL associated with the
+# specified import library.
+func_cygming_dll_for_implib_fallback_core ()
+{
+  $opt_debug
+  match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"`
+  $OBJDUMP -s --section "$1" "$2" 2>/dev/null |
+    $SED '/^Contents of section '"$match_literal"':/{
+      # Place marker at beginning of archive member dllname section
+      s/.*/====MARK====/
+      p
+      d
+    }
+    # These lines can sometimes be longer than 43 characters, but
+    # are always uninteresting
+    /:[	 ]*file format pe[i]\{,1\}-/d
+    /^In archive [^:]*:/d
+    # Ensure marker is printed
+    /^====MARK====/p
+    # Remove all lines with less than 43 characters
+    /^.\{43\}/!d
+    # From remaining lines, remove first 43 characters
+    s/^.\{43\}//' |
+    $SED -n '
+      # Join marker and all lines until next marker into a single line
+      /^====MARK====/ b para
+      H
+      $ b para
+      b
+      :para
+      x
+      s/\n//g
+      # Remove the marker
+      s/^====MARK====//
+      # Remove trailing dots and whitespace
+      s/[\. \t]*$//
+      # Print
+      /./p' |
+    # we now have a list, one entry per line, of the stringified
+    # contents of the appropriate section of all members of the
+    # archive which possess that section. Heuristic: eliminate
+    # all those which have a first or second character that is
+    # a '.' (that is, objdump's representation of an unprintable
+    # character.) This should work for all archives with less than
+    # 0x302f exports -- but will fail for DLLs whose name actually
+    # begins with a literal '.' or a single character followed by
+    # a '.'.
+    #
+    # Of those that remain, print the first one.
+    $SED -e '/^\./d;/^.\./d;q'
+}
+
+# func_cygming_gnu_implib_p ARG
+# This predicate returns with zero status (TRUE) if
+# ARG is a GNU/binutils-style import library. Returns
+# with nonzero status (FALSE) otherwise.
+func_cygming_gnu_implib_p ()
+{
+  $opt_debug
+  func_to_tool_file "$1" func_convert_file_msys_to_w32
+  func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'`
+  test -n "$func_cygming_gnu_implib_tmp"
+}
+
+# func_cygming_ms_implib_p ARG
+# This predicate returns with zero status (TRUE) if
+# ARG is an MS-style import library. Returns
+# with nonzero status (FALSE) otherwise.
+func_cygming_ms_implib_p ()
+{
+  $opt_debug
+  func_to_tool_file "$1" func_convert_file_msys_to_w32
+  func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'`
+  test -n "$func_cygming_ms_implib_tmp"
+}
+
+# func_cygming_dll_for_implib_fallback ARG
+# Platform-specific function to extract the
+# name of the DLL associated with the specified
+# import library ARG.
+#
+# This fallback implementation is for use when $DLLTOOL
+# does not support the --identify-strict option.
+# Invoked by eval'ing the libtool variable
+#    $sharedlib_from_linklib_cmd
+# Result is available in the variable
+#    $sharedlib_from_linklib_result
+func_cygming_dll_for_implib_fallback ()
+{
+  $opt_debug
+  if func_cygming_gnu_implib_p "$1" ; then
+    # binutils import library
+    sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"`
+  elif func_cygming_ms_implib_p "$1" ; then
+    # ms-generated import library
+    sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"`
+  else
+    # unknown
+    sharedlib_from_linklib_result=""
+  fi
+}
+
+
+# func_extract_an_archive dir oldlib
+func_extract_an_archive ()
+{
+    $opt_debug
+    f_ex_an_ar_dir="$1"; shift
+    f_ex_an_ar_oldlib="$1"
+    if test "$lock_old_archive_extraction" = yes; then
+      lockfile=$f_ex_an_ar_oldlib.lock
+      until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
+	func_echo "Waiting for $lockfile to be removed"
+	sleep 2
+      done
+    fi
+    func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \
+		   'stat=$?; rm -f "$lockfile"; exit $stat'
+    if test "$lock_old_archive_extraction" = yes; then
+      $opt_dry_run || rm -f "$lockfile"
+    fi
+    if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
+     :
+    else
+      func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
+    fi
+}
+
+
+# func_extract_archives gentop oldlib ...
+func_extract_archives ()
+{
+    $opt_debug
+    my_gentop="$1"; shift
+    my_oldlibs=${1+"$@"}
+    my_oldobjs=""
+    my_xlib=""
+    my_xabs=""
+    my_xdir=""
+
+    for my_xlib in $my_oldlibs; do
+      # Extract the objects.
+      case $my_xlib in
+	[\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
+	*) my_xabs=`pwd`"/$my_xlib" ;;
+      esac
+      func_basename "$my_xlib"
+      my_xlib="$func_basename_result"
+      my_xlib_u=$my_xlib
+      while :; do
+        case " $extracted_archives " in
+	*" $my_xlib_u "*)
+	  func_arith $extracted_serial + 1
+	  extracted_serial=$func_arith_result
+	  my_xlib_u=lt$extracted_serial-$my_xlib ;;
+	*) break ;;
+	esac
+      done
+      extracted_archives="$extracted_archives $my_xlib_u"
+      my_xdir="$my_gentop/$my_xlib_u"
+
+      func_mkdir_p "$my_xdir"
+
+      case $host in
+      *-darwin*)
+	func_verbose "Extracting $my_xabs"
+	# Do not bother doing anything if just a dry run
+	$opt_dry_run || {
+	  darwin_orig_dir=`pwd`
+	  cd $my_xdir || exit $?
+	  darwin_archive=$my_xabs
+	  darwin_curdir=`pwd`
+	  darwin_base_archive=`basename "$darwin_archive"`
+	  darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true`
+	  if test -n "$darwin_arches"; then
+	    darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'`
+	    darwin_arch=
+	    func_verbose "$darwin_base_archive has multiple architectures $darwin_arches"
+	    for darwin_arch in  $darwin_arches ; do
+	      func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
+	      cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      func_extract_an_archive "`pwd`" "${darwin_base_archive}"
+	      cd "$darwin_curdir"
+	      $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
+	    done # $darwin_arches
+            ## Okay now we've a bunch of thin objects, gotta fatten them up :)
+	    darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u`
+	    darwin_file=
+	    darwin_files=
+	    for darwin_file in $darwin_filelist; do
+	      darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP`
+	      $LIPO -create -output "$darwin_file" $darwin_files
+	    done # $darwin_filelist
+	    $RM -rf unfat-$$
+	    cd "$darwin_orig_dir"
+	  else
+	    cd $darwin_orig_dir
+	    func_extract_an_archive "$my_xdir" "$my_xabs"
+	  fi # $darwin_arches
+	} # !$opt_dry_run
+	;;
+      *)
+        func_extract_an_archive "$my_xdir" "$my_xabs"
+	;;
+      esac
+      my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP`
+    done
+
+    func_extract_archives_result="$my_oldobjs"
+}
+
+
+# func_emit_wrapper [arg=no]
+#
+# Emit a libtool wrapper script on stdout.
+# Don't directly open a file because we may want to
+# incorporate the script contents within a cygwin/mingw
+# wrapper executable.  Must ONLY be called from within
+# func_mode_link because it depends on a number of variables
+# set therein.
+#
+# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR
+# variable will take.  If 'yes', then the emitted script
+# will assume that the directory in which it is stored is
+# the $objdir directory.  This is a cygwin/mingw-specific
+# behavior.
+func_emit_wrapper ()
+{
+	func_emit_wrapper_arg1=${1-no}
+
+	$ECHO "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+sed_quote_subst='$sed_quote_subst'
+
+# Be Bourne compatible
+if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+  # install mode needs the following variables:
+  generated_by_libtool_version='$macro_version'
+  notinst_deplibs='$notinst_deplibs'
+else
+  # When we are sourced in execute mode, \$file and \$ECHO are already set.
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
+    file=\"\$0\""
+
+    qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"`
+    $ECHO "\
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$1
+_LTECHO_EOF'
+}
+    ECHO=\"$qECHO\"
+  fi
+
+# Very basic option parsing. These options are (a) specific to
+# the libtool wrapper, (b) are identical between the wrapper
+# /script/ and the wrapper /executable/ which is used only on
+# windows platforms, and (c) all begin with the string "--lt-"
+# (application programs are unlikely to have options which match
+# this pattern).
+#
+# There are only two supported options: --lt-debug and
+# --lt-dump-script. There is, deliberately, no --lt-help.
+#
+# The first argument to this parsing function should be the
+# script's $0 value, followed by "$@".
+lt_option_debug=
+func_parse_lt_options ()
+{
+  lt_script_arg0=\$0
+  shift
+  for lt_opt
+  do
+    case \"\$lt_opt\" in
+    --lt-debug) lt_option_debug=1 ;;
+    --lt-dump-script)
+        lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\`
+        test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=.
+        lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\`
+        cat \"\$lt_dump_D/\$lt_dump_F\"
+        exit 0
+      ;;
+    --lt-*)
+        \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2
+        exit 1
+      ;;
+    esac
+  done
+
+  # Print the debug banner immediately:
+  if test -n \"\$lt_option_debug\"; then
+    echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2
+  fi
+}
+
+# Used when --lt-debug. Prints its arguments to stdout
+# (redirection is the responsibility of the caller)
+func_lt_dump_args ()
+{
+  lt_dump_args_N=1;
+  for lt_arg
+  do
+    \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\"
+    lt_dump_args_N=\`expr \$lt_dump_args_N + 1\`
+  done
+}
+
+# Core function for launching the target application
+func_exec_program_core ()
+{
+"
+  case $host in
+  # Backslashes separate directories on plain windows
+  *-*-mingw | *-*-os2* | *-cegcc*)
+    $ECHO "\
+      if test -n \"\$lt_option_debug\"; then
+        \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2
+        func_lt_dump_args \${1+\"\$@\"} 1>&2
+      fi
+      exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
+"
+    ;;
+
+  *)
+    $ECHO "\
+      if test -n \"\$lt_option_debug\"; then
+        \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2
+        func_lt_dump_args \${1+\"\$@\"} 1>&2
+      fi
+      exec \"\$progdir/\$program\" \${1+\"\$@\"}
+"
+    ;;
+  esac
+  $ECHO "\
+      \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2
+      exit 1
+}
+
+# A function to encapsulate launching the target application
+# Strips options in the --lt-* namespace from \$@ and
+# launches target application with the remaining arguments.
+func_exec_program ()
+{
+  case \" \$* \" in
+  *\\ --lt-*)
+    for lt_wr_arg
+    do
+      case \$lt_wr_arg in
+      --lt-*) ;;
+      *) set x \"\$@\" \"\$lt_wr_arg\"; shift;;
+      esac
+      shift
+    done ;;
+  esac
+  func_exec_program_core \${1+\"\$@\"}
+}
+
+  # Parse options
+  func_parse_lt_options \"\$0\" \${1+\"\$@\"}
+
+  # Find the directory that this script lives in.
+  thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\`
+  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\`
+  while test -n \"\$file\"; do
+    destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\`
+
+    # If there was a directory component, then change thisdir.
+    if test \"x\$destdir\" != \"x\$file\"; then
+      case \"\$destdir\" in
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
+      *) thisdir=\"\$thisdir/\$destdir\" ;;
+      esac
+    fi
+
+    file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\`
+  done
+
+  # Usually 'no', except on cygwin/mingw when embedded into
+  # the cwrapper.
+  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1
+  if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then
+    # special case for '.'
+    if test \"\$thisdir\" = \".\"; then
+      thisdir=\`pwd\`
+    fi
+    # remove .libs from thisdir
+    case \"\$thisdir\" in
+    *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;;
+    $objdir )   thisdir=. ;;
+    esac
+  fi
+
+  # Try to get the absolute directory name.
+  absdir=\`cd \"\$thisdir\" && pwd\`
+  test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+	if test "$fast_install" = yes; then
+	  $ECHO "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" ||
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
+
+    if test ! -d \"\$progdir\"; then
+      $MKDIR \"\$progdir\"
+    else
+      $RM \"\$progdir/\$file\"
+    fi"
+
+	  $ECHO "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+	$ECHO \"\$relink_command_output\" >&2
+	$RM \"\$progdir/\$file\"
+	exit 1
+      fi
+    fi
+
+    $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $RM \"\$progdir/\$program\";
+      $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $RM \"\$progdir/\$file\"
+  fi"
+	else
+	  $ECHO "\
+  program='$outputname'
+  progdir=\"\$thisdir/$objdir\"
+"
+	fi
+
+	$ECHO "\
+
+  if test -f \"\$progdir/\$program\"; then"
+
+	# fixup the dll searchpath if we need to.
+	#
+	# Fix the DLL searchpath if we need to.  Do this before prepending
+	# to shlibpath, because on Windows, both are PATH and uninstalled
+	# libraries must come first.
+	if test -n "$dllsearchpath"; then
+	  $ECHO "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+	fi
+
+	# Export our shlibpath_var if we have one.
+	if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+	  $ECHO "\
+    # Add our own library path to $shlibpath_var
+    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+    # Some systems cannot cope with colon-terminated $shlibpath_var
+    # The second colon is a workaround for a bug in BeOS R4 sed
+    $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\`
+
+    export $shlibpath_var
+"
+	fi
+
+	$ECHO "\
+    if test \"\$libtool_execute_magic\" != \"$magic\"; then
+      # Run the actual program with our arguments.
+      func_exec_program \${1+\"\$@\"}
+    fi
+  else
+    # The program doesn't exist.
+    \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
+    \$ECHO \"This script is just a wrapper for \$program.\" 1>&2
+    \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2
+    exit 1
+  fi
+fi\
+"
+}
+
+
+# func_emit_cwrapperexe_src
+# emit the source code for a wrapper executable on stdout
+# Must ONLY be called from within func_mode_link because
+# it depends on a number of variable set therein.
+func_emit_cwrapperexe_src ()
+{
+	cat <<EOF
+
+/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
+   Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+
+   The $output program cannot be directly executed until all the libtool
+   libraries that it depends on are installed.
+
+   This wrapper executable should never be moved out of the build directory.
+   If it is, it will not operate correctly.
+*/
+EOF
+	    cat <<"EOF"
+#ifdef _MSC_VER
+# define _CRT_SECURE_NO_DEPRECATE 1
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+# include <direct.h>
+# include <process.h>
+# include <io.h>
+#else
+# include <unistd.h>
+# include <stdint.h>
+# ifdef __CYGWIN__
+#  include <io.h>
+# endif
+#endif
+#include <malloc.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* declarations of non-ANSI functions */
+#if defined(__MINGW32__)
+# ifdef __STRICT_ANSI__
+int _putenv (const char *);
+# endif
+#elif defined(__CYGWIN__)
+# ifdef __STRICT_ANSI__
+char *realpath (const char *, char *);
+int putenv (char *);
+int setenv (const char *, const char *, int);
+# endif
+/* #elif defined (other platforms) ... */
+#endif
+
+/* portability defines, excluding path handling macros */
+#if defined(_MSC_VER)
+# define setmode _setmode
+# define stat    _stat
+# define chmod   _chmod
+# define getcwd  _getcwd
+# define putenv  _putenv
+# define S_IXUSR _S_IEXEC
+# ifndef _INTPTR_T_DEFINED
+#  define _INTPTR_T_DEFINED
+#  define intptr_t int
+# endif
+#elif defined(__MINGW32__)
+# define setmode _setmode
+# define stat    _stat
+# define chmod   _chmod
+# define getcwd  _getcwd
+# define putenv  _putenv
+#elif defined(__CYGWIN__)
+# define HAVE_SETENV
+# define FOPEN_WB "wb"
+/* #elif defined (other platforms) ... */
+#endif
+
+#if defined(PATH_MAX)
+# define LT_PATHMAX PATH_MAX
+#elif defined(MAXPATHLEN)
+# define LT_PATHMAX MAXPATHLEN
+#else
+# define LT_PATHMAX 1024
+#endif
+
+#ifndef S_IXOTH
+# define S_IXOTH 0
+#endif
+#ifndef S_IXGRP
+# define S_IXGRP 0
+#endif
+
+/* path handling portability macros */
+#ifndef DIR_SEPARATOR
+# define DIR_SEPARATOR '/'
+# define PATH_SEPARATOR ':'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
+  defined (__OS2__)
+# define HAVE_DOS_BASED_FILE_SYSTEM
+# define FOPEN_WB "wb"
+# ifndef DIR_SEPARATOR_2
+#  define DIR_SEPARATOR_2 '\\'
+# endif
+# ifndef PATH_SEPARATOR_2
+#  define PATH_SEPARATOR_2 ';'
+# endif
+#endif
+
+#ifndef DIR_SEPARATOR_2
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else /* DIR_SEPARATOR_2 */
+# define IS_DIR_SEPARATOR(ch) \
+	(((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif /* DIR_SEPARATOR_2 */
+
+#ifndef PATH_SEPARATOR_2
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
+#else /* PATH_SEPARATOR_2 */
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
+#endif /* PATH_SEPARATOR_2 */
+
+#ifndef FOPEN_WB
+# define FOPEN_WB "w"
+#endif
+#ifndef _O_BINARY
+# define _O_BINARY 0
+#endif
+
+#define XMALLOC(type, num)      ((type *) xmalloc ((num) * sizeof(type)))
+#define XFREE(stale) do { \
+  if (stale) { free ((void *) stale); stale = 0; } \
+} while (0)
+
+#if defined(LT_DEBUGWRAPPER)
+static int lt_debug = 1;
+#else
+static int lt_debug = 0;
+#endif
+
+const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */
+
+void *xmalloc (size_t num);
+char *xstrdup (const char *string);
+const char *base_name (const char *name);
+char *find_executable (const char *wrapper);
+char *chase_symlinks (const char *pathspec);
+int make_executable (const char *path);
+int check_executable (const char *path);
+char *strendzap (char *str, const char *pat);
+void lt_debugprintf (const char *file, int line, const char *fmt, ...);
+void lt_fatal (const char *file, int line, const char *message, ...);
+static const char *nonnull (const char *s);
+static const char *nonempty (const char *s);
+void lt_setenv (const char *name, const char *value);
+char *lt_extend_str (const char *orig_value, const char *add, int to_end);
+void lt_update_exe_path (const char *name, const char *value);
+void lt_update_lib_path (const char *name, const char *value);
+char **prepare_spawn (char **argv);
+void lt_dump_script (FILE *f);
+EOF
+
+	    cat <<EOF
+volatile const char * MAGIC_EXE = "$magic_exe";
+const char * LIB_PATH_VARNAME = "$shlibpath_var";
+EOF
+
+	    if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+              func_to_host_path "$temp_rpath"
+	      cat <<EOF
+const char * LIB_PATH_VALUE   = "$func_to_host_path_result";
+EOF
+	    else
+	      cat <<"EOF"
+const char * LIB_PATH_VALUE   = "";
+EOF
+	    fi
+
+	    if test -n "$dllsearchpath"; then
+              func_to_host_path "$dllsearchpath:"
+	      cat <<EOF
+const char * EXE_PATH_VARNAME = "PATH";
+const char * EXE_PATH_VALUE   = "$func_to_host_path_result";
+EOF
+	    else
+	      cat <<"EOF"
+const char * EXE_PATH_VARNAME = "";
+const char * EXE_PATH_VALUE   = "";
+EOF
+	    fi
+
+	    if test "$fast_install" = yes; then
+	      cat <<EOF
+const char * TARGET_PROGRAM_NAME = "lt-$outputname"; /* hopefully, no .exe */
+EOF
+	    else
+	      cat <<EOF
+const char * TARGET_PROGRAM_NAME = "$outputname"; /* hopefully, no .exe */
+EOF
+	    fi
+
+
+	    cat <<"EOF"
+
+#define LTWRAPPER_OPTION_PREFIX         "--lt-"
+
+static const char *ltwrapper_option_prefix = LTWRAPPER_OPTION_PREFIX;
+static const char *dumpscript_opt       = LTWRAPPER_OPTION_PREFIX "dump-script";
+static const char *debug_opt            = LTWRAPPER_OPTION_PREFIX "debug";
+
+int
+main (int argc, char *argv[])
+{
+  char **newargz;
+  int  newargc;
+  char *tmp_pathspec;
+  char *actual_cwrapper_path;
+  char *actual_cwrapper_name;
+  char *target_name;
+  char *lt_argv_zero;
+  intptr_t rval = 127;
+
+  int i;
+
+  program_name = (char *) xstrdup (base_name (argv[0]));
+  newargz = XMALLOC (char *, argc + 1);
+
+  /* very simple arg parsing; don't want to rely on getopt
+   * also, copy all non cwrapper options to newargz, except
+   * argz[0], which is handled differently
+   */
+  newargc=0;
+  for (i = 1; i < argc; i++)
+    {
+      if (strcmp (argv[i], dumpscript_opt) == 0)
+	{
+EOF
+	    case "$host" in
+	      *mingw* | *cygwin* )
+		# make stdout use "unix" line endings
+		echo "          setmode(1,_O_BINARY);"
+		;;
+	      esac
+
+	    cat <<"EOF"
+	  lt_dump_script (stdout);
+	  return 0;
+	}
+      if (strcmp (argv[i], debug_opt) == 0)
+	{
+          lt_debug = 1;
+          continue;
+	}
+      if (strcmp (argv[i], ltwrapper_option_prefix) == 0)
+        {
+          /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX
+             namespace, but it is not one of the ones we know about and
+             have already dealt with, above (including dump-script), then
+             report an error. Otherwise, targets might begin to believe
+             they are allowed to use options in the LTWRAPPER_OPTION_PREFIX
+             namespace. The first time any user complains about this, we'll
+             need to make LTWRAPPER_OPTION_PREFIX a configure-time option
+             or a configure.ac-settable value.
+           */
+          lt_fatal (__FILE__, __LINE__,
+		    "unrecognized %s option: '%s'",
+                    ltwrapper_option_prefix, argv[i]);
+        }
+      /* otherwise ... */
+      newargz[++newargc] = xstrdup (argv[i]);
+    }
+  newargz[++newargc] = NULL;
+
+EOF
+	    cat <<EOF
+  /* The GNU banner must be the first non-error debug message */
+  lt_debugprintf (__FILE__, __LINE__, "libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\n");
+EOF
+	    cat <<"EOF"
+  lt_debugprintf (__FILE__, __LINE__, "(main) argv[0]: %s\n", argv[0]);
+  lt_debugprintf (__FILE__, __LINE__, "(main) program_name: %s\n", program_name);
+
+  tmp_pathspec = find_executable (argv[0]);
+  if (tmp_pathspec == NULL)
+    lt_fatal (__FILE__, __LINE__, "couldn't find %s", argv[0]);
+  lt_debugprintf (__FILE__, __LINE__,
+                  "(main) found exe (before symlink chase) at: %s\n",
+		  tmp_pathspec);
+
+  actual_cwrapper_path = chase_symlinks (tmp_pathspec);
+  lt_debugprintf (__FILE__, __LINE__,
+                  "(main) found exe (after symlink chase) at: %s\n",
+		  actual_cwrapper_path);
+  XFREE (tmp_pathspec);
+
+  actual_cwrapper_name = xstrdup (base_name (actual_cwrapper_path));
+  strendzap (actual_cwrapper_path, actual_cwrapper_name);
+
+  /* wrapper name transforms */
+  strendzap (actual_cwrapper_name, ".exe");
+  tmp_pathspec = lt_extend_str (actual_cwrapper_name, ".exe", 1);
+  XFREE (actual_cwrapper_name);
+  actual_cwrapper_name = tmp_pathspec;
+  tmp_pathspec = 0;
+
+  /* target_name transforms -- use actual target program name; might have lt- prefix */
+  target_name = xstrdup (base_name (TARGET_PROGRAM_NAME));
+  strendzap (target_name, ".exe");
+  tmp_pathspec = lt_extend_str (target_name, ".exe", 1);
+  XFREE (target_name);
+  target_name = tmp_pathspec;
+  tmp_pathspec = 0;
+
+  lt_debugprintf (__FILE__, __LINE__,
+		  "(main) libtool target name: %s\n",
+		  target_name);
+EOF
+
+	    cat <<EOF
+  newargz[0] =
+    XMALLOC (char, (strlen (actual_cwrapper_path) +
+		    strlen ("$objdir") + 1 + strlen (actual_cwrapper_name) + 1));
+  strcpy (newargz[0], actual_cwrapper_path);
+  strcat (newargz[0], "$objdir");
+  strcat (newargz[0], "/");
+EOF
+
+	    cat <<"EOF"
+  /* stop here, and copy so we don't have to do this twice */
+  tmp_pathspec = xstrdup (newargz[0]);
+
+  /* do NOT want the lt- prefix here, so use actual_cwrapper_name */
+  strcat (newargz[0], actual_cwrapper_name);
+
+  /* DO want the lt- prefix here if it exists, so use target_name */
+  lt_argv_zero = lt_extend_str (tmp_pathspec, target_name, 1);
+  XFREE (tmp_pathspec);
+  tmp_pathspec = NULL;
+EOF
+
+	    case $host_os in
+	      mingw*)
+	    cat <<"EOF"
+  {
+    char* p;
+    while ((p = strchr (newargz[0], '\\')) != NULL)
+      {
+	*p = '/';
+      }
+    while ((p = strchr (lt_argv_zero, '\\')) != NULL)
+      {
+	*p = '/';
+      }
+  }
+EOF
+	    ;;
+	    esac
+
+	    cat <<"EOF"
+  XFREE (target_name);
+  XFREE (actual_cwrapper_path);
+  XFREE (actual_cwrapper_name);
+
+  lt_setenv ("BIN_SH", "xpg4"); /* for Tru64 */
+  lt_setenv ("DUALCASE", "1");  /* for MSK sh */
+  /* Update the DLL searchpath.  EXE_PATH_VALUE ($dllsearchpath) must
+     be prepended before (that is, appear after) LIB_PATH_VALUE ($temp_rpath)
+     because on Windows, both *_VARNAMEs are PATH but uninstalled
+     libraries must come first. */
+  lt_update_exe_path (EXE_PATH_VARNAME, EXE_PATH_VALUE);
+  lt_update_lib_path (LIB_PATH_VARNAME, LIB_PATH_VALUE);
+
+  lt_debugprintf (__FILE__, __LINE__, "(main) lt_argv_zero: %s\n",
+		  nonnull (lt_argv_zero));
+  for (i = 0; i < newargc; i++)
+    {
+      lt_debugprintf (__FILE__, __LINE__, "(main) newargz[%d]: %s\n",
+		      i, nonnull (newargz[i]));
+    }
+
+EOF
+
+	    case $host_os in
+	      mingw*)
+		cat <<"EOF"
+  /* execv doesn't actually work on mingw as expected on unix */
+  newargz = prepare_spawn (newargz);
+  rval = _spawnv (_P_WAIT, lt_argv_zero, (const char * const *) newargz);
+  if (rval == -1)
+    {
+      /* failed to start process */
+      lt_debugprintf (__FILE__, __LINE__,
+		      "(main) failed to launch target \"%s\": %s\n",
+		      lt_argv_zero, nonnull (strerror (errno)));
+      return 127;
+    }
+  return rval;
+EOF
+		;;
+	      *)
+		cat <<"EOF"
+  execv (lt_argv_zero, newargz);
+  return rval; /* =127, but avoids unused variable warning */
+EOF
+		;;
+	    esac
+
+	    cat <<"EOF"
+}
+
+void *
+xmalloc (size_t num)
+{
+  void *p = (void *) malloc (num);
+  if (!p)
+    lt_fatal (__FILE__, __LINE__, "memory exhausted");
+
+  return p;
+}
+
+char *
+xstrdup (const char *string)
+{
+  return string ? strcpy ((char *) xmalloc (strlen (string) + 1),
+			  string) : NULL;
+}
+
+const char *
+base_name (const char *name)
+{
+  const char *base;
+
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  /* Skip over the disk name in MSDOS pathnames. */
+  if (isalpha ((unsigned char) name[0]) && name[1] == ':')
+    name += 2;
+#endif
+
+  for (base = name; *name; name++)
+    if (IS_DIR_SEPARATOR (*name))
+      base = name + 1;
+  return base;
+}
+
+int
+check_executable (const char *path)
+{
+  struct stat st;
+
+  lt_debugprintf (__FILE__, __LINE__, "(check_executable): %s\n",
+                  nonempty (path));
+  if ((!path) || (!*path))
+    return 0;
+
+  if ((stat (path, &st) >= 0)
+      && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
+    return 1;
+  else
+    return 0;
+}
+
+int
+make_executable (const char *path)
+{
+  int rval = 0;
+  struct stat st;
+
+  lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n",
+                  nonempty (path));
+  if ((!path) || (!*path))
+    return 0;
+
+  if (stat (path, &st) >= 0)
+    {
+      rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR);
+    }
+  return rval;
+}
+
+/* Searches for the full path of the wrapper.  Returns
+   newly allocated full path name if found, NULL otherwise
+   Does not chase symlinks, even on platforms that support them.
+*/
+char *
+find_executable (const char *wrapper)
+{
+  int has_slash = 0;
+  const char *p;
+  const char *p_next;
+  /* static buffer for getcwd */
+  char tmp[LT_PATHMAX + 1];
+  int tmp_len;
+  char *concat_name;
+
+  lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n",
+                  nonempty (wrapper));
+
+  if ((wrapper == NULL) || (*wrapper == '\0'))
+    return NULL;
+
+  /* Absolute path? */
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':')
+    {
+      concat_name = xstrdup (wrapper);
+      if (check_executable (concat_name))
+	return concat_name;
+      XFREE (concat_name);
+    }
+  else
+    {
+#endif
+      if (IS_DIR_SEPARATOR (wrapper[0]))
+	{
+	  concat_name = xstrdup (wrapper);
+	  if (check_executable (concat_name))
+	    return concat_name;
+	  XFREE (concat_name);
+	}
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+    }
+#endif
+
+  for (p = wrapper; *p; p++)
+    if (*p == '/')
+      {
+	has_slash = 1;
+	break;
+      }
+  if (!has_slash)
+    {
+      /* no slashes; search PATH */
+      const char *path = getenv ("PATH");
+      if (path != NULL)
+	{
+	  for (p = path; *p; p = p_next)
+	    {
+	      const char *q;
+	      size_t p_len;
+	      for (q = p; *q; q++)
+		if (IS_PATH_SEPARATOR (*q))
+		  break;
+	      p_len = q - p;
+	      p_next = (*q == '\0' ? q : q + 1);
+	      if (p_len == 0)
+		{
+		  /* empty path: current directory */
+		  if (getcwd (tmp, LT_PATHMAX) == NULL)
+		    lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
+                              nonnull (strerror (errno)));
+		  tmp_len = strlen (tmp);
+		  concat_name =
+		    XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
+		  memcpy (concat_name, tmp, tmp_len);
+		  concat_name[tmp_len] = '/';
+		  strcpy (concat_name + tmp_len + 1, wrapper);
+		}
+	      else
+		{
+		  concat_name =
+		    XMALLOC (char, p_len + 1 + strlen (wrapper) + 1);
+		  memcpy (concat_name, p, p_len);
+		  concat_name[p_len] = '/';
+		  strcpy (concat_name + p_len + 1, wrapper);
+		}
+	      if (check_executable (concat_name))
+		return concat_name;
+	      XFREE (concat_name);
+	    }
+	}
+      /* not found in PATH; assume curdir */
+    }
+  /* Relative path | not found in path: prepend cwd */
+  if (getcwd (tmp, LT_PATHMAX) == NULL)
+    lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
+              nonnull (strerror (errno)));
+  tmp_len = strlen (tmp);
+  concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
+  memcpy (concat_name, tmp, tmp_len);
+  concat_name[tmp_len] = '/';
+  strcpy (concat_name + tmp_len + 1, wrapper);
+
+  if (check_executable (concat_name))
+    return concat_name;
+  XFREE (concat_name);
+  return NULL;
+}
+
+char *
+chase_symlinks (const char *pathspec)
+{
+#ifndef S_ISLNK
+  return xstrdup (pathspec);
+#else
+  char buf[LT_PATHMAX];
+  struct stat s;
+  char *tmp_pathspec = xstrdup (pathspec);
+  char *p;
+  int has_symlinks = 0;
+  while (strlen (tmp_pathspec) && !has_symlinks)
+    {
+      lt_debugprintf (__FILE__, __LINE__,
+		      "checking path component for symlinks: %s\n",
+		      tmp_pathspec);
+      if (lstat (tmp_pathspec, &s) == 0)
+	{
+	  if (S_ISLNK (s.st_mode) != 0)
+	    {
+	      has_symlinks = 1;
+	      break;
+	    }
+
+	  /* search backwards for last DIR_SEPARATOR */
+	  p = tmp_pathspec + strlen (tmp_pathspec) - 1;
+	  while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
+	    p--;
+	  if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
+	    {
+	      /* no more DIR_SEPARATORS left */
+	      break;
+	    }
+	  *p = '\0';
+	}
+      else
+	{
+	  lt_fatal (__FILE__, __LINE__,
+		    "error accessing file \"%s\": %s",
+		    tmp_pathspec, nonnull (strerror (errno)));
+	}
+    }
+  XFREE (tmp_pathspec);
+
+  if (!has_symlinks)
+    {
+      return xstrdup (pathspec);
+    }
+
+  tmp_pathspec = realpath (pathspec, buf);
+  if (tmp_pathspec == 0)
+    {
+      lt_fatal (__FILE__, __LINE__,
+		"could not follow symlinks for %s", pathspec);
+    }
+  return xstrdup (tmp_pathspec);
+#endif
+}
+
+char *
+strendzap (char *str, const char *pat)
+{
+  size_t len, patlen;
+
+  assert (str != NULL);
+  assert (pat != NULL);
+
+  len = strlen (str);
+  patlen = strlen (pat);
+
+  if (patlen <= len)
+    {
+      str += len - patlen;
+      if (strcmp (str, pat) == 0)
+	*str = '\0';
+    }
+  return str;
+}
+
+void
+lt_debugprintf (const char *file, int line, const char *fmt, ...)
+{
+  va_list args;
+  if (lt_debug)
+    {
+      (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line);
+      va_start (args, fmt);
+      (void) vfprintf (stderr, fmt, args);
+      va_end (args);
+    }
+}
+
+static void
+lt_error_core (int exit_status, const char *file,
+	       int line, const char *mode,
+	       const char *message, va_list ap)
+{
+  fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode);
+  vfprintf (stderr, message, ap);
+  fprintf (stderr, ".\n");
+
+  if (exit_status >= 0)
+    exit (exit_status);
+}
+
+void
+lt_fatal (const char *file, int line, const char *message, ...)
+{
+  va_list ap;
+  va_start (ap, message);
+  lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap);
+  va_end (ap);
+}
+
+static const char *
+nonnull (const char *s)
+{
+  return s ? s : "(null)";
+}
+
+static const char *
+nonempty (const char *s)
+{
+  return (s && !*s) ? "(empty)" : nonnull (s);
+}
+
+void
+lt_setenv (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+		  "(lt_setenv) setting '%s' to '%s'\n",
+                  nonnull (name), nonnull (value));
+  {
+#ifdef HAVE_SETENV
+    /* always make a copy, for consistency with !HAVE_SETENV */
+    char *str = xstrdup (value);
+    setenv (name, str, 1);
+#else
+    int len = strlen (name) + 1 + strlen (value) + 1;
+    char *str = XMALLOC (char, len);
+    sprintf (str, "%s=%s", name, value);
+    if (putenv (str) != EXIT_SUCCESS)
+      {
+        XFREE (str);
+      }
+#endif
+  }
+}
+
+char *
+lt_extend_str (const char *orig_value, const char *add, int to_end)
+{
+  char *new_value;
+  if (orig_value && *orig_value)
+    {
+      int orig_value_len = strlen (orig_value);
+      int add_len = strlen (add);
+      new_value = XMALLOC (char, add_len + orig_value_len + 1);
+      if (to_end)
+        {
+          strcpy (new_value, orig_value);
+          strcpy (new_value + orig_value_len, add);
+        }
+      else
+        {
+          strcpy (new_value, add);
+          strcpy (new_value + add_len, orig_value);
+        }
+    }
+  else
+    {
+      new_value = xstrdup (add);
+    }
+  return new_value;
+}
+
+void
+lt_update_exe_path (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+		  "(lt_update_exe_path) modifying '%s' by prepending '%s'\n",
+                  nonnull (name), nonnull (value));
+
+  if (name && *name && value && *value)
+    {
+      char *new_value = lt_extend_str (getenv (name), value, 0);
+      /* some systems can't cope with a ':'-terminated path #' */
+      int len = strlen (new_value);
+      while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1]))
+        {
+          new_value[len-1] = '\0';
+        }
+      lt_setenv (name, new_value);
+      XFREE (new_value);
+    }
+}
+
+void
+lt_update_lib_path (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+		  "(lt_update_lib_path) modifying '%s' by prepending '%s'\n",
+                  nonnull (name), nonnull (value));
+
+  if (name && *name && value && *value)
+    {
+      char *new_value = lt_extend_str (getenv (name), value, 0);
+      lt_setenv (name, new_value);
+      XFREE (new_value);
+    }
+}
+
+EOF
+	    case $host_os in
+	      mingw*)
+		cat <<"EOF"
+
+/* Prepares an argument vector before calling spawn().
+   Note that spawn() does not by itself call the command interpreter
+     (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") :
+      ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+         GetVersionEx(&v);
+         v.dwPlatformId == VER_PLATFORM_WIN32_NT;
+      }) ? "cmd.exe" : "command.com").
+   Instead it simply concatenates the arguments, separated by ' ', and calls
+   CreateProcess().  We must quote the arguments since Win32 CreateProcess()
+   interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a
+   special way:
+   - Space and tab are interpreted as delimiters. They are not treated as
+     delimiters if they are surrounded by double quotes: "...".
+   - Unescaped double quotes are removed from the input. Their only effect is
+     that within double quotes, space and tab are treated like normal
+     characters.
+   - Backslashes not followed by double quotes are not special.
+   - But 2*n+1 backslashes followed by a double quote become
+     n backslashes followed by a double quote (n >= 0):
+       \" -> "
+       \\\" -> \"
+       \\\\\" -> \\"
+ */
+#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+char **
+prepare_spawn (char **argv)
+{
+  size_t argc;
+  char **new_argv;
+  size_t i;
+
+  /* Count number of arguments.  */
+  for (argc = 0; argv[argc] != NULL; argc++)
+    ;
+
+  /* Allocate new argument vector.  */
+  new_argv = XMALLOC (char *, argc + 1);
+
+  /* Put quoted arguments into the new argument vector.  */
+  for (i = 0; i < argc; i++)
+    {
+      const char *string = argv[i];
+
+      if (string[0] == '\0')
+	new_argv[i] = xstrdup ("\"\"");
+      else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL)
+	{
+	  int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL);
+	  size_t length;
+	  unsigned int backslashes;
+	  const char *s;
+	  char *quoted_string;
+	  char *p;
+
+	  length = 0;
+	  backslashes = 0;
+	  if (quote_around)
+	    length++;
+	  for (s = string; *s != '\0'; s++)
+	    {
+	      char c = *s;
+	      if (c == '"')
+		length += backslashes + 1;
+	      length++;
+	      if (c == '\\')
+		backslashes++;
+	      else
+		backslashes = 0;
+	    }
+	  if (quote_around)
+	    length += backslashes + 1;
+
+	  quoted_string = XMALLOC (char, length + 1);
+
+	  p = quoted_string;
+	  backslashes = 0;
+	  if (quote_around)
+	    *p++ = '"';
+	  for (s = string; *s != '\0'; s++)
+	    {
+	      char c = *s;
+	      if (c == '"')
+		{
+		  unsigned int j;
+		  for (j = backslashes + 1; j > 0; j--)
+		    *p++ = '\\';
+		}
+	      *p++ = c;
+	      if (c == '\\')
+		backslashes++;
+	      else
+		backslashes = 0;
+	    }
+	  if (quote_around)
+	    {
+	      unsigned int j;
+	      for (j = backslashes; j > 0; j--)
+		*p++ = '\\';
+	      *p++ = '"';
+	    }
+	  *p = '\0';
+
+	  new_argv[i] = quoted_string;
+	}
+      else
+	new_argv[i] = (char *) string;
+    }
+  new_argv[argc] = NULL;
+
+  return new_argv;
+}
+EOF
+		;;
+	    esac
+
+            cat <<"EOF"
+void lt_dump_script (FILE* f)
+{
+EOF
+	    func_emit_wrapper yes |
+	      $SED -n -e '
+s/^\(.\{79\}\)\(..*\)/\1\
+\2/
+h
+s/\([\\"]\)/\\\1/g
+s/$/\\n/
+s/\([^\n]*\).*/  fputs ("\1", f);/p
+g
+D'
+            cat <<"EOF"
+}
+EOF
+}
+# end: func_emit_cwrapperexe_src
+
+# func_win32_import_lib_p ARG
+# True if ARG is an import lib, as indicated by $file_magic_cmd
+func_win32_import_lib_p ()
+{
+    $opt_debug
+    case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in
+    *import*) : ;;
+    *) false ;;
+    esac
+}
+
+# func_mode_link arg...
+func_mode_link ()
+{
+    $opt_debug
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invocation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args=$nonopt
+    base_compile="$nonopt $@"
+    compile_command=$nonopt
+    finalize_command=$nonopt
+
+    compile_rpath=
+    finalize_rpath=
+    compile_shlibpath=
+    finalize_shlibpath=
+    convenience=
+    old_convenience=
+    deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+    inst_prefix_dir=
+    new_inherited_linker_flags=
+
+    avoid_version=no
+    bindir=
+    dlfiles=
+    dlprefiles=
+    dlself=no
+    export_dynamic=no
+    export_symbols=
+    export_symbols_regex=
+    generated=
+    libobjs=
+    ltlibs=
+    module=no
+    no_install=no
+    objs=
+    non_pic_objects=
+    precious_files_regex=
+    prefer_static_libs=no
+    preload=no
+    prev=
+    prevarg=
+    release=
+    rpath=
+    xrpath=
+    perm_rpath=
+    temp_rpath=
+    thread_safe=no
+    vinfo=
+    vinfo_number=no
+    weak_libs=
+    single_module="${wl}-single_module"
+    func_infer_tag $base_compile
+
+    # We need to know -static, to get the right output filenames.
+    for arg
+    do
+      case $arg in
+      -shared)
+	test "$build_libtool_libs" != yes && \
+	  func_fatal_configuration "can not build a shared library"
+	build_old_libs=no
+	break
+	;;
+      -all-static | -static | -static-libtool-libs)
+	case $arg in
+	-all-static)
+	  if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+	    func_warning "complete static linking is impossible in this configuration"
+	  fi
+	  if test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=yes
+	  ;;
+	-static)
+	  if test -z "$pic_flag" && test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=built
+	  ;;
+	-static-libtool-libs)
+	  if test -z "$pic_flag" && test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=yes
+	  ;;
+	esac
+	build_libtool_libs=no
+	build_old_libs=yes
+	break
+	;;
+      esac
+    done
+
+    # See if our shared archives depend on static archives.
+    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+    # Go through the arguments, transforming them on the way.
+    while test "$#" -gt 0; do
+      arg="$1"
+      shift
+      func_quote_for_eval "$arg"
+      qarg=$func_quote_for_eval_unquoted_result
+      func_append libtool_args " $func_quote_for_eval_result"
+
+      # If the previous option needs an argument, assign it.
+      if test -n "$prev"; then
+	case $prev in
+	output)
+	  func_append compile_command " @OUTPUT@"
+	  func_append finalize_command " @OUTPUT@"
+	  ;;
+	esac
+
+	case $prev in
+	bindir)
+	  bindir="$arg"
+	  prev=
+	  continue
+	  ;;
+	dlfiles|dlprefiles)
+	  if test "$preload" = no; then
+	    # Add the symbol object into the linking commands.
+	    func_append compile_command " @SYMFILE@"
+	    func_append finalize_command " @SYMFILE@"
+	    preload=yes
+	  fi
+	  case $arg in
+	  *.la | *.lo) ;;  # We handle these cases below.
+	  force)
+	    if test "$dlself" = no; then
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  self)
+	    if test "$prev" = dlprefiles; then
+	      dlself=yes
+	    elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+	      dlself=yes
+	    else
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  *)
+	    if test "$prev" = dlfiles; then
+	      func_append dlfiles " $arg"
+	    else
+	      func_append dlprefiles " $arg"
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  esac
+	  ;;
+	expsyms)
+	  export_symbols="$arg"
+	  test -f "$arg" \
+	    || func_fatal_error "symbol file \`$arg' does not exist"
+	  prev=
+	  continue
+	  ;;
+	expsyms_regex)
+	  export_symbols_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	framework)
+	  case $host in
+	    *-*-darwin*)
+	      case "$deplibs " in
+		*" $qarg.ltframework "*) ;;
+		*) func_append deplibs " $qarg.ltframework" # this is fixed later
+		   ;;
+	      esac
+	      ;;
+	  esac
+	  prev=
+	  continue
+	  ;;
+	inst_prefix)
+	  inst_prefix_dir="$arg"
+	  prev=
+	  continue
+	  ;;
+	objectlist)
+	  if test -f "$arg"; then
+	    save_arg=$arg
+	    moreargs=
+	    for fil in `cat "$save_arg"`
+	    do
+#	      func_append moreargs " $fil"
+	      arg=$fil
+	      # A libtool-controlled object.
+
+	      # Check to see that this really is a libtool object.
+	      if func_lalib_unsafe_p "$arg"; then
+		pic_object=
+		non_pic_object=
+
+		# Read the .lo file
+		func_source "$arg"
+
+		if test -z "$pic_object" ||
+		   test -z "$non_pic_object" ||
+		   test "$pic_object" = none &&
+		   test "$non_pic_object" = none; then
+		  func_fatal_error "cannot find name of object for \`$arg'"
+		fi
+
+		# Extract subdirectory from the argument.
+		func_dirname "$arg" "/" ""
+		xdir="$func_dirname_result"
+
+		if test "$pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  pic_object="$xdir$pic_object"
+
+		  if test "$prev" = dlfiles; then
+		    if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		      func_append dlfiles " $pic_object"
+		      prev=
+		      continue
+		    else
+		      # If libtool objects are unsupported, then we need to preload.
+		      prev=dlprefiles
+		    fi
+		  fi
+
+		  # CHECK ME:  I think I busted this.  -Ossama
+		  if test "$prev" = dlprefiles; then
+		    # Preload the old-style object.
+		    func_append dlprefiles " $pic_object"
+		    prev=
+		  fi
+
+		  # A PIC object.
+		  func_append libobjs " $pic_object"
+		  arg="$pic_object"
+		fi
+
+		# Non-PIC object.
+		if test "$non_pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  non_pic_object="$xdir$non_pic_object"
+
+		  # A standard non-PIC object
+		  func_append non_pic_objects " $non_pic_object"
+		  if test -z "$pic_object" || test "$pic_object" = none ; then
+		    arg="$non_pic_object"
+		  fi
+		else
+		  # If the PIC object exists, use it instead.
+		  # $xdir was prepended to $pic_object above.
+		  non_pic_object="$pic_object"
+		  func_append non_pic_objects " $non_pic_object"
+		fi
+	      else
+		# Only an error if not doing a dry-run.
+		if $opt_dry_run; then
+		  # Extract subdirectory from the argument.
+		  func_dirname "$arg" "/" ""
+		  xdir="$func_dirname_result"
+
+		  func_lo2o "$arg"
+		  pic_object=$xdir$objdir/$func_lo2o_result
+		  non_pic_object=$xdir$func_lo2o_result
+		  func_append libobjs " $pic_object"
+		  func_append non_pic_objects " $non_pic_object"
+	        else
+		  func_fatal_error "\`$arg' is not a valid libtool object"
+		fi
+	      fi
+	    done
+	  else
+	    func_fatal_error "link input file \`$arg' does not exist"
+	  fi
+	  arg=$save_arg
+	  prev=
+	  continue
+	  ;;
+	precious_regex)
+	  precious_files_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	release)
+	  release="-$arg"
+	  prev=
+	  continue
+	  ;;
+	rpath | xrpath)
+	  # We need an absolute path.
+	  case $arg in
+	  [\\/]* | [A-Za-z]:[\\/]*) ;;
+	  *)
+	    func_fatal_error "only absolute run-paths are allowed"
+	    ;;
+	  esac
+	  if test "$prev" = rpath; then
+	    case "$rpath " in
+	    *" $arg "*) ;;
+	    *) func_append rpath " $arg" ;;
+	    esac
+	  else
+	    case "$xrpath " in
+	    *" $arg "*) ;;
+	    *) func_append xrpath " $arg" ;;
+	    esac
+	  fi
+	  prev=
+	  continue
+	  ;;
+	shrext)
+	  shrext_cmds="$arg"
+	  prev=
+	  continue
+	  ;;
+	weak)
+	  func_append weak_libs " $arg"
+	  prev=
+	  continue
+	  ;;
+	xcclinker)
+	  func_append linker_flags " $qarg"
+	  func_append compiler_flags " $qarg"
+	  prev=
+	  func_append compile_command " $qarg"
+	  func_append finalize_command " $qarg"
+	  continue
+	  ;;
+	xcompiler)
+	  func_append compiler_flags " $qarg"
+	  prev=
+	  func_append compile_command " $qarg"
+	  func_append finalize_command " $qarg"
+	  continue
+	  ;;
+	xlinker)
+	  func_append linker_flags " $qarg"
+	  func_append compiler_flags " $wl$qarg"
+	  prev=
+	  func_append compile_command " $wl$qarg"
+	  func_append finalize_command " $wl$qarg"
+	  continue
+	  ;;
+	*)
+	  eval "$prev=\"\$arg\""
+	  prev=
+	  continue
+	  ;;
+	esac
+      fi # test -n "$prev"
+
+      prevarg="$arg"
+
+      case $arg in
+      -all-static)
+	if test -n "$link_static_flag"; then
+	  # See comment for -static flag below, for more details.
+	  func_append compile_command " $link_static_flag"
+	  func_append finalize_command " $link_static_flag"
+	fi
+	continue
+	;;
+
+      -allow-undefined)
+	# FIXME: remove this flag sometime in the future.
+	func_fatal_error "\`-allow-undefined' must not be used because it is the default"
+	;;
+
+      -avoid-version)
+	avoid_version=yes
+	continue
+	;;
+
+      -bindir)
+	prev=bindir
+	continue
+	;;
+
+      -dlopen)
+	prev=dlfiles
+	continue
+	;;
+
+      -dlpreopen)
+	prev=dlprefiles
+	continue
+	;;
+
+      -export-dynamic)
+	export_dynamic=yes
+	continue
+	;;
+
+      -export-symbols | -export-symbols-regex)
+	if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	  func_fatal_error "more than one -exported-symbols argument is not allowed"
+	fi
+	if test "X$arg" = "X-export-symbols"; then
+	  prev=expsyms
+	else
+	  prev=expsyms_regex
+	fi
+	continue
+	;;
+
+      -framework)
+	prev=framework
+	continue
+	;;
+
+      -inst-prefix-dir)
+	prev=inst_prefix
+	continue
+	;;
+
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+	case $with_gcc/$host in
+	no/*-*-irix* | /*-*-irix*)
+	  func_append compile_command " $arg"
+	  func_append finalize_command " $arg"
+	  ;;
+	esac
+	continue
+	;;
+
+      -L*)
+	func_stripname "-L" '' "$arg"
+	if test -z "$func_stripname_result"; then
+	  if test "$#" -gt 0; then
+	    func_fatal_error "require no space between \`-L' and \`$1'"
+	  else
+	    func_fatal_error "need path for \`-L' option"
+	  fi
+	fi
+	func_resolve_sysroot "$func_stripname_result"
+	dir=$func_resolve_sysroot_result
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  absdir=`cd "$dir" && pwd`
+	  test -z "$absdir" && \
+	    func_fatal_error "cannot determine absolute directory name of \`$dir'"
+	  dir="$absdir"
+	  ;;
+	esac
+	case "$deplibs " in
+	*" -L$dir "* | *" $arg "*)
+	  # Will only happen for absolute or sysroot arguments
+	  ;;
+	*)
+	  # Preserve sysroot, but never include relative directories
+	  case $dir in
+	    [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;;
+	    *) func_append deplibs " -L$dir" ;;
+	  esac
+	  func_append lib_search_path " $dir"
+	  ;;
+	esac
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+	  testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$dir:"*) ;;
+	  ::) dllsearchpath=$dir;;
+	  *) func_append dllsearchpath ":$dir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  ::) dllsearchpath=$testbindir;;
+	  *) func_append dllsearchpath ":$testbindir";;
+	  esac
+	  ;;
+	esac
+	continue
+	;;
+
+      -l*)
+	if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*)
+	    # These systems don't actually have a C or math library (as such)
+	    continue
+	    ;;
+	  *-*-os2*)
+	    # These systems don't actually have a C library (as such)
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C and math libraries are in the System framework
+	    func_append deplibs " System.ltframework"
+	    continue
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  esac
+	elif test "X$arg" = "X-lc_r"; then
+	 case $host in
+	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	   # Do not include libc_r directly, use -pthread flag.
+	   continue
+	   ;;
+	 esac
+	fi
+	func_append deplibs " $arg"
+	continue
+	;;
+
+      -module)
+	module=yes
+	continue
+	;;
+
+      # Tru64 UNIX uses -model [arg] to determine the layout of C++
+      # classes, name mangling, and exception handling.
+      # Darwin uses the -arch flag to determine output architecture.
+      -model|-arch|-isysroot|--sysroot)
+	func_append compiler_flags " $arg"
+	func_append compile_command " $arg"
+	func_append finalize_command " $arg"
+	prev=xcompiler
+	continue
+	;;
+
+      -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+      |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
+	func_append compiler_flags " $arg"
+	func_append compile_command " $arg"
+	func_append finalize_command " $arg"
+	case "$new_inherited_linker_flags " in
+	    *" $arg "*) ;;
+	    * ) func_append new_inherited_linker_flags " $arg" ;;
+	esac
+	continue
+	;;
+
+      -multi_module)
+	single_module="${wl}-multi_module"
+	continue
+	;;
+
+      -no-fast-install)
+	fast_install=no
+	continue
+	;;
+
+      -no-install)
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*)
+	  # The PATH hackery in wrapper scripts is required on Windows
+	  # and Darwin in order for the loader to find any dlls it needs.
+	  func_warning "\`-no-install' is ignored for $host"
+	  func_warning "assuming \`-no-fast-install' instead"
+	  fast_install=no
+	  ;;
+	*) no_install=yes ;;
+	esac
+	continue
+	;;
+
+      -no-undefined)
+	allow_undefined=no
+	continue
+	;;
+
+      -objectlist)
+	prev=objectlist
+	continue
+	;;
+
+      -o) prev=output ;;
+
+      -precious-files-regex)
+	prev=precious_regex
+	continue
+	;;
+
+      -release)
+	prev=release
+	continue
+	;;
+
+      -rpath)
+	prev=rpath
+	continue
+	;;
+
+      -R)
+	prev=xrpath
+	continue
+	;;
+
+      -R*)
+	func_stripname '-R' '' "$arg"
+	dir=$func_stripname_result
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	=*)
+	  func_stripname '=' '' "$dir"
+	  dir=$lt_sysroot$func_stripname_result
+	  ;;
+	*)
+	  func_fatal_error "only absolute run-paths are allowed"
+	  ;;
+	esac
+	case "$xrpath " in
+	*" $dir "*) ;;
+	*) func_append xrpath " $dir" ;;
+	esac
+	continue
+	;;
+
+      -shared)
+	# The effects of -shared are defined in a previous loop.
+	continue
+	;;
+
+      -shrext)
+	prev=shrext
+	continue
+	;;
+
+      -static | -static-libtool-libs)
+	# The effects of -static are defined in a previous loop.
+	# We used to do the same as -all-static on platforms that
+	# didn't have a PIC flag, but the assumption that the effects
+	# would be equivalent was wrong.  It would break on at least
+	# Digital Unix and AIX.
+	continue
+	;;
+
+      -thread-safe)
+	thread_safe=yes
+	continue
+	;;
+
+      -version-info)
+	prev=vinfo
+	continue
+	;;
+
+      -version-number)
+	prev=vinfo
+	vinfo_number=yes
+	continue
+	;;
+
+      -weak)
+        prev=weak
+	continue
+	;;
+
+      -Wc,*)
+	func_stripname '-Wc,' '' "$arg"
+	args=$func_stripname_result
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+          func_quote_for_eval "$flag"
+	  func_append arg " $func_quote_for_eval_result"
+	  func_append compiler_flags " $func_quote_for_eval_result"
+	done
+	IFS="$save_ifs"
+	func_stripname ' ' '' "$arg"
+	arg=$func_stripname_result
+	;;
+
+      -Wl,*)
+	func_stripname '-Wl,' '' "$arg"
+	args=$func_stripname_result
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+          func_quote_for_eval "$flag"
+	  func_append arg " $wl$func_quote_for_eval_result"
+	  func_append compiler_flags " $wl$func_quote_for_eval_result"
+	  func_append linker_flags " $func_quote_for_eval_result"
+	done
+	IFS="$save_ifs"
+	func_stripname ' ' '' "$arg"
+	arg=$func_stripname_result
+	;;
+
+      -Xcompiler)
+	prev=xcompiler
+	continue
+	;;
+
+      -Xlinker)
+	prev=xlinker
+	continue
+	;;
+
+      -XCClinker)
+	prev=xcclinker
+	continue
+	;;
+
+      # -msg_* for osf cc
+      -msg_*)
+	func_quote_for_eval "$arg"
+	arg="$func_quote_for_eval_result"
+	;;
+
+      # Flags to be passed through unchanged, with rationale:
+      # -64, -mips[0-9]      enable 64-bit mode for the SGI compiler
+      # -r[0-9][0-9]*        specify processor for the SGI compiler
+      # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler
+      # +DA*, +DD*           enable 64-bit mode for the HP compiler
+      # -q*                  compiler args for the IBM compiler
+      # -m*, -t[45]*, -txscale* architecture-specific flags for GCC
+      # -F/path              path to uninstalled frameworks, gcc on darwin
+      # -p, -pg, --coverage, -fprofile-*  profiling flags for GCC
+      # @file                GCC response files
+      # -tp=*                Portland pgcc target processor selection
+      # --sysroot=*          for sysroot support
+      # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization
+      -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \
+      -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \
+      -O*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-fgnu-tm| \
+      -shared-libgcc|-static-libgcc|-static-libgfortran|-static-libstdc++)
+        func_quote_for_eval "$arg"
+	arg="$func_quote_for_eval_result"
+        func_append compile_command " $arg"
+        func_append finalize_command " $arg"
+        func_append compiler_flags " $arg"
+        continue
+        ;;
+
+      # Some other compiler flag.
+      -* | +*)
+        func_quote_for_eval "$arg"
+	arg="$func_quote_for_eval_result"
+	;;
+
+      *.$objext)
+	# A standard object.
+	func_append objs " $arg"
+	;;
+
+      *.lo)
+	# A libtool-controlled object.
+
+	# Check to see that this really is a libtool object.
+	if func_lalib_unsafe_p "$arg"; then
+	  pic_object=
+	  non_pic_object=
+
+	  # Read the .lo file
+	  func_source "$arg"
+
+	  if test -z "$pic_object" ||
+	     test -z "$non_pic_object" ||
+	     test "$pic_object" = none &&
+	     test "$non_pic_object" = none; then
+	    func_fatal_error "cannot find name of object for \`$arg'"
+	  fi
+
+	  # Extract subdirectory from the argument.
+	  func_dirname "$arg" "/" ""
+	  xdir="$func_dirname_result"
+
+	  if test "$pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    pic_object="$xdir$pic_object"
+
+	    if test "$prev" = dlfiles; then
+	      if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		func_append dlfiles " $pic_object"
+		prev=
+		continue
+	      else
+		# If libtool objects are unsupported, then we need to preload.
+		prev=dlprefiles
+	      fi
+	    fi
+
+	    # CHECK ME:  I think I busted this.  -Ossama
+	    if test "$prev" = dlprefiles; then
+	      # Preload the old-style object.
+	      func_append dlprefiles " $pic_object"
+	      prev=
+	    fi
+
+	    # A PIC object.
+	    func_append libobjs " $pic_object"
+	    arg="$pic_object"
+	  fi
+
+	  # Non-PIC object.
+	  if test "$non_pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    non_pic_object="$xdir$non_pic_object"
+
+	    # A standard non-PIC object
+	    func_append non_pic_objects " $non_pic_object"
+	    if test -z "$pic_object" || test "$pic_object" = none ; then
+	      arg="$non_pic_object"
+	    fi
+	  else
+	    # If the PIC object exists, use it instead.
+	    # $xdir was prepended to $pic_object above.
+	    non_pic_object="$pic_object"
+	    func_append non_pic_objects " $non_pic_object"
+	  fi
+	else
+	  # Only an error if not doing a dry-run.
+	  if $opt_dry_run; then
+	    # Extract subdirectory from the argument.
+	    func_dirname "$arg" "/" ""
+	    xdir="$func_dirname_result"
+
+	    func_lo2o "$arg"
+	    pic_object=$xdir$objdir/$func_lo2o_result
+	    non_pic_object=$xdir$func_lo2o_result
+	    func_append libobjs " $pic_object"
+	    func_append non_pic_objects " $non_pic_object"
+	  else
+	    func_fatal_error "\`$arg' is not a valid libtool object"
+	  fi
+	fi
+	;;
+
+      *.$libext)
+	# An archive.
+	func_append deplibs " $arg"
+	func_append old_deplibs " $arg"
+	continue
+	;;
+
+      *.la)
+	# A libtool-controlled library.
+
+	func_resolve_sysroot "$arg"
+	if test "$prev" = dlfiles; then
+	  # This library was specified with -dlopen.
+	  func_append dlfiles " $func_resolve_sysroot_result"
+	  prev=
+	elif test "$prev" = dlprefiles; then
+	  # The library was specified with -dlpreopen.
+	  func_append dlprefiles " $func_resolve_sysroot_result"
+	  prev=
+	else
+	  func_append deplibs " $func_resolve_sysroot_result"
+	fi
+	continue
+	;;
+
+      # Some other compiler argument.
+      *)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	func_quote_for_eval "$arg"
+	arg="$func_quote_for_eval_result"
+	;;
+      esac # arg
+
+      # Now actually substitute the argument into the commands.
+      if test -n "$arg"; then
+	func_append compile_command " $arg"
+	func_append finalize_command " $arg"
+      fi
+    done # argument parsing loop
+
+    test -n "$prev" && \
+      func_fatal_help "the \`$prevarg' option requires an argument"
+
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      func_append compile_command " $arg"
+      func_append finalize_command " $arg"
+    fi
+
+    oldlibs=
+    # calculate the name of the file, without its directory
+    func_basename "$output"
+    outputname="$func_basename_result"
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    func_dirname "$output" "/" ""
+    output_objdir="$func_dirname_result$objdir"
+    func_to_tool_file "$output_objdir/"
+    tool_output_objdir=$func_to_tool_file_result
+    # Create the object directory.
+    func_mkdir_p "$output_objdir"
+
+    # Determine the type of output
+    case $output in
+    "")
+      func_fatal_help "you must specify an output file"
+      ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
+
+    specialdeplibs=
+
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if $opt_preserve_dup_deps ; then
+	case "$libs " in
+	*" $deplib "*) func_append specialdeplibs " $deplib" ;;
+	esac
+      fi
+      func_append libs " $deplib"
+    done
+
+    if test "$linkmode" = lib; then
+      libs="$predeps $libs $compiler_lib_search_path $postdeps"
+
+      # Compute libraries that are listed more than once in $predeps
+      # $postdeps and mark them as special (i.e., whose duplicates are
+      # not to be eliminated).
+      pre_post_deps=
+      if $opt_duplicate_compiler_generated_deps; then
+	for pre_post_dep in $predeps $postdeps; do
+	  case "$pre_post_deps " in
+	  *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;;
+	  esac
+	  func_append pre_post_deps " $pre_post_dep"
+	done
+      fi
+      pre_post_deps=
+    fi
+
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    notinst_path= # paths that contain not-installed libtool libraries
+
+    case $linkmode in
+    lib)
+	passes="conv dlpreopen link"
+	for file in $dlfiles $dlprefiles; do
+	  case $file in
+	  *.la) ;;
+	  *)
+	    func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file"
+	    ;;
+	  esac
+	done
+	;;
+    prog)
+	compile_deplibs=
+	finalize_deplibs=
+	alldeplibs=no
+	newdlfiles=
+	newdlprefiles=
+	passes="conv scan dlopen dlpreopen link"
+	;;
+    *)  passes="conv"
+	;;
+    esac
+
+    for pass in $passes; do
+      # The preopen pass in lib mode reverses $deplibs; put it back here
+      # so that -L comes before libs that need it for instance...
+      if test "$linkmode,$pass" = "lib,link"; then
+	## FIXME: Find the place where the list is rebuilt in the wrong
+	##        order, and fix it there properly
+        tmp_deplibs=
+	for deplib in $deplibs; do
+	  tmp_deplibs="$deplib $tmp_deplibs"
+	done
+	deplibs="$tmp_deplibs"
+      fi
+
+      if test "$linkmode,$pass" = "lib,link" ||
+	 test "$linkmode,$pass" = "prog,scan"; then
+	libs="$deplibs"
+	deplibs=
+      fi
+      if test "$linkmode" = prog; then
+	case $pass in
+	dlopen) libs="$dlfiles" ;;
+	dlpreopen) libs="$dlprefiles" ;;
+	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	esac
+      fi
+      if test "$linkmode,$pass" = "lib,dlpreopen"; then
+	# Collect and forward deplibs of preopened libtool libs
+	for lib in $dlprefiles; do
+	  # Ignore non-libtool-libs
+	  dependency_libs=
+	  func_resolve_sysroot "$lib"
+	  case $lib in
+	  *.la)	func_source "$func_resolve_sysroot_result" ;;
+	  esac
+
+	  # Collect preopened libtool deplibs, except any this library
+	  # has declared as weak libs
+	  for deplib in $dependency_libs; do
+	    func_basename "$deplib"
+            deplib_base=$func_basename_result
+	    case " $weak_libs " in
+	    *" $deplib_base "*) ;;
+	    *) func_append deplibs " $deplib" ;;
+	    esac
+	  done
+	done
+	libs="$dlprefiles"
+      fi
+      if test "$pass" = dlopen; then
+	# Collect dlpreopened libraries
+	save_deplibs="$deplibs"
+	deplibs=
+      fi
+
+      for deplib in $libs; do
+	lib=
+	found=no
+	case $deplib in
+	-mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+        |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
+	  if test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$deplib $compile_deplibs"
+	    finalize_deplibs="$deplib $finalize_deplibs"
+	  else
+	    func_append compiler_flags " $deplib"
+	    if test "$linkmode" = lib ; then
+		case "$new_inherited_linker_flags " in
+		    *" $deplib "*) ;;
+		    * ) func_append new_inherited_linker_flags " $deplib" ;;
+		esac
+	    fi
+	  fi
+	  continue
+	  ;;
+	-l*)
+	  if test "$linkmode" != lib && test "$linkmode" != prog; then
+	    func_warning "\`-l' is ignored for archives/objects"
+	    continue
+	  fi
+	  func_stripname '-l' '' "$deplib"
+	  name=$func_stripname_result
+	  if test "$linkmode" = lib; then
+	    searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path"
+	  else
+	    searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path"
+	  fi
+	  for searchdir in $searchdirs; do
+	    for search_ext in .la $std_shrext .so .a; do
+	      # Search the libtool library
+	      lib="$searchdir/lib${name}${search_ext}"
+	      if test -f "$lib"; then
+		if test "$search_ext" = ".la"; then
+		  found=yes
+		else
+		  found=no
+		fi
+		break 2
+	      fi
+	    done
+	  done
+	  if test "$found" != yes; then
+	    # deplib doesn't seem to be a libtool library
+	    if test "$linkmode,$pass" = "prog,link"; then
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      deplibs="$deplib $deplibs"
+	      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    continue
+	  else # deplib is a libtool library
+	    # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
+	    # We need to do some special things here, and not later.
+	    if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	      case " $predeps $postdeps " in
+	      *" $deplib "*)
+		if func_lalib_p "$lib"; then
+		  library_names=
+		  old_library=
+		  func_source "$lib"
+		  for l in $old_library $library_names; do
+		    ll="$l"
+		  done
+		  if test "X$ll" = "X$old_library" ; then # only static version available
+		    found=no
+		    func_dirname "$lib" "" "."
+		    ladir="$func_dirname_result"
+		    lib=$ladir/$old_library
+		    if test "$linkmode,$pass" = "prog,link"; then
+		      compile_deplibs="$deplib $compile_deplibs"
+		      finalize_deplibs="$deplib $finalize_deplibs"
+		    else
+		      deplibs="$deplib $deplibs"
+		      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+		    fi
+		    continue
+		  fi
+		fi
+		;;
+	      *) ;;
+	      esac
+	    fi
+	  fi
+	  ;; # -l
+	*.ltframework)
+	  if test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$deplib $compile_deplibs"
+	    finalize_deplibs="$deplib $finalize_deplibs"
+	  else
+	    deplibs="$deplib $deplibs"
+	    if test "$linkmode" = lib ; then
+		case "$new_inherited_linker_flags " in
+		    *" $deplib "*) ;;
+		    * ) func_append new_inherited_linker_flags " $deplib" ;;
+		esac
+	    fi
+	  fi
+	  continue
+	  ;;
+	-L*)
+	  case $linkmode in
+	  lib)
+	    deplibs="$deplib $deplibs"
+	    test "$pass" = conv && continue
+	    newdependency_libs="$deplib $newdependency_libs"
+	    func_stripname '-L' '' "$deplib"
+	    func_resolve_sysroot "$func_stripname_result"
+	    func_append newlib_search_path " $func_resolve_sysroot_result"
+	    ;;
+	  prog)
+	    if test "$pass" = conv; then
+	      deplibs="$deplib $deplibs"
+	      continue
+	    fi
+	    if test "$pass" = scan; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    func_stripname '-L' '' "$deplib"
+	    func_resolve_sysroot "$func_stripname_result"
+	    func_append newlib_search_path " $func_resolve_sysroot_result"
+	    ;;
+	  *)
+	    func_warning "\`-L' is ignored for archives/objects"
+	    ;;
+	  esac # linkmode
+	  continue
+	  ;; # -L
+	-R*)
+	  if test "$pass" = link; then
+	    func_stripname '-R' '' "$deplib"
+	    func_resolve_sysroot "$func_stripname_result"
+	    dir=$func_resolve_sysroot_result
+	    # Make sure the xrpath contains only unique directories.
+	    case "$xrpath " in
+	    *" $dir "*) ;;
+	    *) func_append xrpath " $dir" ;;
+	    esac
+	  fi
+	  deplibs="$deplib $deplibs"
+	  continue
+	  ;;
+	*.la)
+	  func_resolve_sysroot "$deplib"
+	  lib=$func_resolve_sysroot_result
+	  ;;
+	*.$libext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	    continue
+	  fi
+	  case $linkmode in
+	  lib)
+	    # Linking convenience modules into shared libraries is allowed,
+	    # but linking other static libraries is non-portable.
+	    case " $dlpreconveniencelibs " in
+	    *" $deplib "*) ;;
+	    *)
+	      valid_a_lib=no
+	      case $deplibs_check_method in
+		match_pattern*)
+		  set dummy $deplibs_check_method; shift
+		  match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+		  if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \
+		    | $EGREP "$match_pattern_regex" > /dev/null; then
+		    valid_a_lib=yes
+		  fi
+		;;
+		pass_all)
+		  valid_a_lib=yes
+		;;
+	      esac
+	      if test "$valid_a_lib" != yes; then
+		echo
+		$ECHO "*** Warning: Trying to link with static lib archive $deplib."
+		echo "*** I have the capability to make that library automatically link in when"
+		echo "*** you link to this library.  But I can only do this if you have a"
+		echo "*** shared version of the library, which you do not appear to have"
+		echo "*** because the file extensions .$libext of this argument makes me believe"
+		echo "*** that it is just a static archive that I should not use here."
+	      else
+		echo
+		$ECHO "*** Warning: Linking the shared library $output against the"
+		$ECHO "*** static library $deplib is not portable!"
+		deplibs="$deplib $deplibs"
+	      fi
+	      ;;
+	    esac
+	    continue
+	    ;;
+	  prog)
+	    if test "$pass" != link; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    continue
+	    ;;
+	  esac # linkmode
+	  ;; # *.$libext
+	*.lo | *.$objext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	  elif test "$linkmode" = prog; then
+	    if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+	      # If there is no dlopen support or we're linking statically,
+	      # we need to preload.
+	      func_append newdlprefiles " $deplib"
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      func_append newdlfiles " $deplib"
+	    fi
+	  fi
+	  continue
+	  ;;
+	%DEPLIBS%)
+	  alldeplibs=yes
+	  continue
+	  ;;
+	esac # case $deplib
+
+	if test "$found" = yes || test -f "$lib"; then :
+	else
+	  func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'"
+	fi
+
+	# Check to see that this really is a libtool archive.
+	func_lalib_unsafe_p "$lib" \
+	  || func_fatal_error "\`$lib' is not a valid libtool archive"
+
+	func_dirname "$lib" "" "."
+	ladir="$func_dirname_result"
+
+	dlname=
+	dlopen=
+	dlpreopen=
+	libdir=
+	library_names=
+	old_library=
+	inherited_linker_flags=
+	# If the library was installed with an old release of libtool,
+	# it will not redefine variables installed, or shouldnotlink
+	installed=yes
+	shouldnotlink=no
+	avoidtemprpath=
+
+
+	# Read the .la file
+	func_source "$lib"
+
+	# Convert "-framework foo" to "foo.ltframework"
+	if test -n "$inherited_linker_flags"; then
+	  tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`
+	  for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do
+	    case " $new_inherited_linker_flags " in
+	      *" $tmp_inherited_linker_flag "*) ;;
+	      *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";;
+	    esac
+	  done
+	fi
+	dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	if test "$linkmode,$pass" = "lib,link" ||
+	   test "$linkmode,$pass" = "prog,scan" ||
+	   { test "$linkmode" != prog && test "$linkmode" != lib; }; then
+	  test -n "$dlopen" && func_append dlfiles " $dlopen"
+	  test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen"
+	fi
+
+	if test "$pass" = conv; then
+	  # Only check for convenience libraries
+	  deplibs="$lib $deplibs"
+	  if test -z "$libdir"; then
+	    if test -z "$old_library"; then
+	      func_fatal_error "cannot find name of link library for \`$lib'"
+	    fi
+	    # It is a libtool convenience library, so add in its objects.
+	    func_append convenience " $ladir/$objdir/$old_library"
+	    func_append old_convenience " $ladir/$objdir/$old_library"
+	  elif test "$linkmode" != prog && test "$linkmode" != lib; then
+	    func_fatal_error "\`$lib' is not a convenience library"
+	  fi
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    deplibs="$deplib $deplibs"
+	    if $opt_preserve_dup_deps ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) func_append specialdeplibs " $deplib" ;;
+	      esac
+	    fi
+	    func_append tmp_libs " $deplib"
+	  done
+	  continue
+	fi # $pass = conv
+
+
+	# Get the name of the library we link against.
+	linklib=
+	if test -n "$old_library" &&
+	   { test "$prefer_static_libs" = yes ||
+	     test "$prefer_static_libs,$installed" = "built,no"; }; then
+	  linklib=$old_library
+	else
+	  for l in $old_library $library_names; do
+	    linklib="$l"
+	  done
+	fi
+	if test -z "$linklib"; then
+	  func_fatal_error "cannot find name of link library for \`$lib'"
+	fi
+
+	# This library was specified with -dlopen.
+	if test "$pass" = dlopen; then
+	  if test -z "$libdir"; then
+	    func_fatal_error "cannot -dlopen a convenience library: \`$lib'"
+	  fi
+	  if test -z "$dlname" ||
+	     test "$dlopen_support" != yes ||
+	     test "$build_libtool_libs" = no; then
+	    # If there is no dlname, no dlopen support or we're linking
+	    # statically, we need to preload.  We also need to preload any
+	    # dependent libraries so libltdl's deplib preloader doesn't
+	    # bomb out in the load deplibs phase.
+	    func_append dlprefiles " $lib $dependency_libs"
+	  else
+	    func_append newdlfiles " $lib"
+	  fi
+	  continue
+	fi # $pass = dlopen
+
+	# We need an absolute path.
+	case $ladir in
+	[\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+	*)
+	  abs_ladir=`cd "$ladir" && pwd`
+	  if test -z "$abs_ladir"; then
+	    func_warning "cannot determine absolute directory name of \`$ladir'"
+	    func_warning "passing it literally to the linker, although it might fail"
+	    abs_ladir="$ladir"
+	  fi
+	  ;;
+	esac
+	func_basename "$lib"
+	laname="$func_basename_result"
+
+	# Find the relevant object directory and library name.
+	if test "X$installed" = Xyes; then
+	  if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    func_warning "library \`$lib' was moved."
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    libdir="$abs_ladir"
+	  else
+	    dir="$lt_sysroot$libdir"
+	    absdir="$lt_sysroot$libdir"
+	  fi
+	  test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
+	else
+	  if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    # Remove this search path later
+	    func_append notinst_path " $abs_ladir"
+	  else
+	    dir="$ladir/$objdir"
+	    absdir="$abs_ladir/$objdir"
+	    # Remove this search path later
+	    func_append notinst_path " $abs_ladir"
+	  fi
+	fi # $installed = yes
+	func_stripname 'lib' '.la' "$laname"
+	name=$func_stripname_result
+
+	# This library was specified with -dlpreopen.
+	if test "$pass" = dlpreopen; then
+	  if test -z "$libdir" && test "$linkmode" = prog; then
+	    func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'"
+	  fi
+	  case "$host" in
+	    # special handling for platforms with PE-DLLs.
+	    *cygwin* | *mingw* | *cegcc* )
+	      # Linker will automatically link against shared library if both
+	      # static and shared are present.  Therefore, ensure we extract
+	      # symbols from the import library if a shared library is present
+	      # (otherwise, the dlopen module name will be incorrect).  We do
+	      # this by putting the import library name into $newdlprefiles.
+	      # We recover the dlopen module name by 'saving' the la file
+	      # name in a special purpose variable, and (later) extracting the
+	      # dlname from the la file.
+	      if test -n "$dlname"; then
+	        func_tr_sh "$dir/$linklib"
+	        eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname"
+	        func_append newdlprefiles " $dir/$linklib"
+	      else
+	        func_append newdlprefiles " $dir/$old_library"
+	        # Keep a list of preopened convenience libraries to check
+	        # that they are being used correctly in the link pass.
+	        test -z "$libdir" && \
+	          func_append dlpreconveniencelibs " $dir/$old_library"
+	      fi
+	    ;;
+	    * )
+	      # Prefer using a static library (so that no silly _DYNAMIC symbols
+	      # are required to link).
+	      if test -n "$old_library"; then
+	        func_append newdlprefiles " $dir/$old_library"
+	        # Keep a list of preopened convenience libraries to check
+	        # that they are being used correctly in the link pass.
+	        test -z "$libdir" && \
+	          func_append dlpreconveniencelibs " $dir/$old_library"
+	      # Otherwise, use the dlname, so that lt_dlopen finds it.
+	      elif test -n "$dlname"; then
+	        func_append newdlprefiles " $dir/$dlname"
+	      else
+	        func_append newdlprefiles " $dir/$linklib"
+	      fi
+	    ;;
+	  esac
+	fi # $pass = dlpreopen
+
+	if test -z "$libdir"; then
+	  # Link the convenience library
+	  if test "$linkmode" = lib; then
+	    deplibs="$dir/$old_library $deplibs"
+	  elif test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$dir/$old_library $compile_deplibs"
+	    finalize_deplibs="$dir/$old_library $finalize_deplibs"
+	  else
+	    deplibs="$lib $deplibs" # used for prog,scan pass
+	  fi
+	  continue
+	fi
+
+
+	if test "$linkmode" = prog && test "$pass" != link; then
+	  func_append newlib_search_path " $ladir"
+	  deplibs="$lib $deplibs"
+
+	  linkalldeplibs=no
+	  if test "$link_all_deplibs" != no || test -z "$library_names" ||
+	     test "$build_libtool_libs" = no; then
+	    linkalldeplibs=yes
+	  fi
+
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    case $deplib in
+	    -L*) func_stripname '-L' '' "$deplib"
+	         func_resolve_sysroot "$func_stripname_result"
+	         func_append newlib_search_path " $func_resolve_sysroot_result"
+		 ;;
+	    esac
+	    # Need to link against all dependency_libs?
+	    if test "$linkalldeplibs" = yes; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      # Need to hardcode shared library paths
+	      # or/and link against static libraries
+	      newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    if $opt_preserve_dup_deps ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) func_append specialdeplibs " $deplib" ;;
+	      esac
+	    fi
+	    func_append tmp_libs " $deplib"
+	  done # for deplib
+	  continue
+	fi # $linkmode = prog...
+
+	if test "$linkmode,$pass" = "prog,link"; then
+	  if test -n "$library_names" &&
+	     { { test "$prefer_static_libs" = no ||
+	         test "$prefer_static_libs,$installed" = "built,yes"; } ||
+	       test -z "$old_library"; }; then
+	    # We need to hardcode the library path
+	    if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
+	      # Make sure the rpath contains only unique directories.
+	      case "$temp_rpath:" in
+	      *"$absdir:"*) ;;
+	      *) func_append temp_rpath "$absdir:" ;;
+	      esac
+	    fi
+
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) func_append compile_rpath " $absdir" ;;
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) func_append finalize_rpath " $libdir" ;;
+	      esac
+	      ;;
+	    esac
+	  fi # $linkmode,$pass = prog,link...
+
+	  if test "$alldeplibs" = yes &&
+	     { test "$deplibs_check_method" = pass_all ||
+	       { test "$build_libtool_libs" = yes &&
+		 test -n "$library_names"; }; }; then
+	    # We only need to search for static libraries
+	    continue
+	  fi
+	fi
+
+	link_static=no # Whether the deplib will be linked statically
+	use_static_libs=$prefer_static_libs
+	if test "$use_static_libs" = built && test "$installed" = yes; then
+	  use_static_libs=no
+	fi
+	if test -n "$library_names" &&
+	   { test "$use_static_libs" = no || test -z "$old_library"; }; then
+	  case $host in
+	  *cygwin* | *mingw* | *cegcc*)
+	      # No point in relinking DLLs because paths are not encoded
+	      func_append notinst_deplibs " $lib"
+	      need_relink=no
+	    ;;
+	  *)
+	    if test "$installed" = no; then
+	      func_append notinst_deplibs " $lib"
+	      need_relink=yes
+	    fi
+	    ;;
+	  esac
+	  # This is a shared library
+
+	  # Warn about portability, can't link against -module's on some
+	  # systems (darwin).  Don't bleat about dlopened modules though!
+	  dlopenmodule=""
+	  for dlpremoduletest in $dlprefiles; do
+	    if test "X$dlpremoduletest" = "X$lib"; then
+	      dlopenmodule="$dlpremoduletest"
+	      break
+	    fi
+	  done
+	  if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then
+	    echo
+	    if test "$linkmode" = prog; then
+	      $ECHO "*** Warning: Linking the executable $output against the loadable module"
+	    else
+	      $ECHO "*** Warning: Linking the shared library $output against the loadable module"
+	    fi
+	    $ECHO "*** $linklib is not portable!"
+	  fi
+	  if test "$linkmode" = lib &&
+	     test "$hardcode_into_libs" = yes; then
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) func_append compile_rpath " $absdir" ;;
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) func_append finalize_rpath " $libdir" ;;
+	      esac
+	      ;;
+	    esac
+	  fi
+
+	  if test -n "$old_archive_from_expsyms_cmds"; then
+	    # figure out the soname
+	    set dummy $library_names
+	    shift
+	    realname="$1"
+	    shift
+	    libname=`eval "\\$ECHO \"$libname_spec\""`
+	    # use dlname if we got it. it's perfectly good, no?
+	    if test -n "$dlname"; then
+	      soname="$dlname"
+	    elif test -n "$soname_spec"; then
+	      # bleh windows
+	      case $host in
+	      *cygwin* | mingw* | *cegcc*)
+	        func_arith $current - $age
+		major=$func_arith_result
+		versuffix="-$major"
+		;;
+	      esac
+	      eval soname=\"$soname_spec\"
+	    else
+	      soname="$realname"
+	    fi
+
+	    # Make a new name for the extract_expsyms_cmds to use
+	    soroot="$soname"
+	    func_basename "$soroot"
+	    soname="$func_basename_result"
+	    func_stripname 'lib' '.dll' "$soname"
+	    newlib=libimp-$func_stripname_result.a
+
+	    # If the library has no export list, then create one now
+	    if test -f "$output_objdir/$soname-def"; then :
+	    else
+	      func_verbose "extracting exported symbol list from \`$soname'"
+	      func_execute_cmds "$extract_expsyms_cmds" 'exit $?'
+	    fi
+
+	    # Create $newlib
+	    if test -f "$output_objdir/$newlib"; then :; else
+	      func_verbose "generating import library for \`$soname'"
+	      func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?'
+	    fi
+	    # make sure the library variables are pointing to the new library
+	    dir=$output_objdir
+	    linklib=$newlib
+	  fi # test -n "$old_archive_from_expsyms_cmds"
+
+	  if test "$linkmode" = prog || test "$opt_mode" != relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    lib_linked=yes
+	    case $hardcode_action in
+	    immediate | unsupported)
+	      if test "$hardcode_direct" = no; then
+		add="$dir/$linklib"
+		case $host in
+		  *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
+		  *-*-sysv4*uw2*) add_dir="-L$dir" ;;
+		  *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
+		    *-*-unixware7*) add_dir="-L$dir" ;;
+		  *-*-darwin* )
+		    # if the lib is a (non-dlopened) module then we can not
+		    # link against it, someone is ignoring the earlier warnings
+		    if /usr/bin/file -L $add 2> /dev/null |
+			 $GREP ": [^:]* bundle" >/dev/null ; then
+		      if test "X$dlopenmodule" != "X$lib"; then
+			$ECHO "*** Warning: lib $linklib is a module, not a shared library"
+			if test -z "$old_library" ; then
+			  echo
+			  echo "*** And there doesn't seem to be a static archive available"
+			  echo "*** The link will probably fail, sorry"
+			else
+			  add="$dir/$old_library"
+			fi
+		      elif test -n "$old_library"; then
+			add="$dir/$old_library"
+		      fi
+		    fi
+		esac
+	      elif test "$hardcode_minus_L" = no; then
+		case $host in
+		*-*-sunos*) add_shlibpath="$dir" ;;
+		esac
+		add_dir="-L$dir"
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = no; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    relink)
+	      if test "$hardcode_direct" = yes &&
+	         test "$hardcode_direct_absolute" = no; then
+		add="$dir/$linklib"
+	      elif test "$hardcode_minus_L" = yes; then
+		add_dir="-L$absdir"
+		# Try looking first in the location we're being installed to.
+		if test -n "$inst_prefix_dir"; then
+		  case $libdir in
+		    [\\/]*)
+		      func_append add_dir " -L$inst_prefix_dir$libdir"
+		      ;;
+		  esac
+		fi
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = yes; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    *) lib_linked=no ;;
+	    esac
+
+	    if test "$lib_linked" != yes; then
+	      func_fatal_configuration "unsupported hardcode properties"
+	    fi
+
+	    if test -n "$add_shlibpath"; then
+	      case :$compile_shlibpath: in
+	      *":$add_shlibpath:"*) ;;
+	      *) func_append compile_shlibpath "$add_shlibpath:" ;;
+	      esac
+	    fi
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+	      test -n "$add" && compile_deplibs="$add $compile_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	      if test "$hardcode_direct" != yes &&
+		 test "$hardcode_minus_L" != yes &&
+		 test "$hardcode_shlibpath_var" = yes; then
+		case :$finalize_shlibpath: in
+		*":$libdir:"*) ;;
+		*) func_append finalize_shlibpath "$libdir:" ;;
+		esac
+	      fi
+	    fi
+	  fi
+
+	  if test "$linkmode" = prog || test "$opt_mode" = relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    # Finalize command for both is simple: just hardcode it.
+	    if test "$hardcode_direct" = yes &&
+	       test "$hardcode_direct_absolute" = no; then
+	      add="$libdir/$linklib"
+	    elif test "$hardcode_minus_L" = yes; then
+	      add_dir="-L$libdir"
+	      add="-l$name"
+	    elif test "$hardcode_shlibpath_var" = yes; then
+	      case :$finalize_shlibpath: in
+	      *":$libdir:"*) ;;
+	      *) func_append finalize_shlibpath "$libdir:" ;;
+	      esac
+	      add="-l$name"
+	    elif test "$hardcode_automatic" = yes; then
+	      if test -n "$inst_prefix_dir" &&
+		 test -f "$inst_prefix_dir$libdir/$linklib" ; then
+		add="$inst_prefix_dir$libdir/$linklib"
+	      else
+		add="$libdir/$linklib"
+	      fi
+	    else
+	      # We cannot seem to hardcode it, guess we'll fake it.
+	      add_dir="-L$libdir"
+	      # Try looking first in the location we're being installed to.
+	      if test -n "$inst_prefix_dir"; then
+		case $libdir in
+		  [\\/]*)
+		    func_append add_dir " -L$inst_prefix_dir$libdir"
+		    ;;
+		esac
+	      fi
+	      add="-l$name"
+	    fi
+
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+	      test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	    fi
+	  fi
+	elif test "$linkmode" = prog; then
+	  # Here we assume that one of hardcode_direct or hardcode_minus_L
+	  # is not unsupported.  This is valid on all known static and
+	  # shared platforms.
+	  if test "$hardcode_direct" != unsupported; then
+	    test -n "$old_library" && linklib="$old_library"
+	    compile_deplibs="$dir/$linklib $compile_deplibs"
+	    finalize_deplibs="$dir/$linklib $finalize_deplibs"
+	  else
+	    compile_deplibs="-l$name -L$dir $compile_deplibs"
+	    finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+	  fi
+	elif test "$build_libtool_libs" = yes; then
+	  # Not a shared library
+	  if test "$deplibs_check_method" != pass_all; then
+	    # We're trying link a shared library against a static one
+	    # but the system doesn't support it.
+
+	    # Just print a warning and add the library to dependency_libs so
+	    # that the program can be linked against the static library.
+	    echo
+	    $ECHO "*** Warning: This system can not link to static lib archive $lib."
+	    echo "*** I have the capability to make that library automatically link in when"
+	    echo "*** you link to this library.  But I can only do this if you have a"
+	    echo "*** shared version of the library, which you do not appear to have."
+	    if test "$module" = yes; then
+	      echo "*** But as you try to build a module library, libtool will still create "
+	      echo "*** a static module, that should work as long as the dlopening application"
+	      echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+	      if test -z "$global_symbol_pipe"; then
+		echo
+		echo "*** However, this would only work if libtool was able to extract symbol"
+		echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+		echo "*** not find such a program.  So, this module is probably useless."
+		echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	      fi
+	      if test "$build_old_libs" = no; then
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  else
+	    deplibs="$dir/$old_library $deplibs"
+	    link_static=yes
+	  fi
+	fi # link shared/static library?
+
+	if test "$linkmode" = lib; then
+	  if test -n "$dependency_libs" &&
+	     { test "$hardcode_into_libs" != yes ||
+	       test "$build_old_libs" = yes ||
+	       test "$link_static" = yes; }; then
+	    # Extract -R from dependency_libs
+	    temp_deplibs=
+	    for libdir in $dependency_libs; do
+	      case $libdir in
+	      -R*) func_stripname '-R' '' "$libdir"
+	           temp_xrpath=$func_stripname_result
+		   case " $xrpath " in
+		   *" $temp_xrpath "*) ;;
+		   *) func_append xrpath " $temp_xrpath";;
+		   esac;;
+	      *) func_append temp_deplibs " $libdir";;
+	      esac
+	    done
+	    dependency_libs="$temp_deplibs"
+	  fi
+
+	  func_append newlib_search_path " $absdir"
+	  # Link against this library
+	  test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+	  # ... and its dependency_libs
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    newdependency_libs="$deplib $newdependency_libs"
+	    case $deplib in
+              -L*) func_stripname '-L' '' "$deplib"
+                   func_resolve_sysroot "$func_stripname_result";;
+              *) func_resolve_sysroot "$deplib" ;;
+            esac
+	    if $opt_preserve_dup_deps ; then
+	      case "$tmp_libs " in
+	      *" $func_resolve_sysroot_result "*)
+                func_append specialdeplibs " $func_resolve_sysroot_result" ;;
+	      esac
+	    fi
+	    func_append tmp_libs " $func_resolve_sysroot_result"
+	  done
+
+	  if test "$link_all_deplibs" != no; then
+	    # Add the search paths of all dependency libraries
+	    for deplib in $dependency_libs; do
+	      path=
+	      case $deplib in
+	      -L*) path="$deplib" ;;
+	      *.la)
+	        func_resolve_sysroot "$deplib"
+	        deplib=$func_resolve_sysroot_result
+	        func_dirname "$deplib" "" "."
+		dir=$func_dirname_result
+		# We need an absolute path.
+		case $dir in
+		[\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+		*)
+		  absdir=`cd "$dir" && pwd`
+		  if test -z "$absdir"; then
+		    func_warning "cannot determine absolute directory name of \`$dir'"
+		    absdir="$dir"
+		  fi
+		  ;;
+		esac
+		if $GREP "^installed=no" $deplib > /dev/null; then
+		case $host in
+		*-*-darwin*)
+		  depdepl=
+		  eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
+		  if test -n "$deplibrary_names" ; then
+		    for tmp in $deplibrary_names ; do
+		      depdepl=$tmp
+		    done
+		    if test -f "$absdir/$objdir/$depdepl" ; then
+		      depdepl="$absdir/$objdir/$depdepl"
+		      darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
+                      if test -z "$darwin_install_name"; then
+                          darwin_install_name=`${OTOOL64} -L $depdepl  | awk '{if (NR == 2) {print $1;exit}}'`
+                      fi
+		      func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}"
+		      func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}"
+		      path=
+		    fi
+		  fi
+		  ;;
+		*)
+		  path="-L$absdir/$objdir"
+		  ;;
+		esac
+		else
+		  eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		  test -z "$libdir" && \
+		    func_fatal_error "\`$deplib' is not a valid libtool archive"
+		  test "$absdir" != "$libdir" && \
+		    func_warning "\`$deplib' seems to be moved"
+
+		  path="-L$absdir"
+		fi
+		;;
+	      esac
+	      case " $deplibs " in
+	      *" $path "*) ;;
+	      *) deplibs="$path $deplibs" ;;
+	      esac
+	    done
+	  fi # link_all_deplibs != no
+	fi # linkmode = lib
+      done # for deplib in $libs
+      if test "$pass" = link; then
+	if test "$linkmode" = "prog"; then
+	  compile_deplibs="$new_inherited_linker_flags $compile_deplibs"
+	  finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs"
+	else
+	  compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	fi
+      fi
+      dependency_libs="$newdependency_libs"
+      if test "$pass" = dlpreopen; then
+	# Link the dlpreopened libraries before other libraries
+	for deplib in $save_deplibs; do
+	  deplibs="$deplib $deplibs"
+	done
+      fi
+      if test "$pass" != dlopen; then
+	if test "$pass" != conv; then
+	  # Make sure lib_search_path contains only unique directories.
+	  lib_search_path=
+	  for dir in $newlib_search_path; do
+	    case "$lib_search_path " in
+	    *" $dir "*) ;;
+	    *) func_append lib_search_path " $dir" ;;
+	    esac
+	  done
+	  newlib_search_path=
+	fi
+
+	if test "$linkmode,$pass" != "prog,link"; then
+	  vars="deplibs"
+	else
+	  vars="compile_deplibs finalize_deplibs"
+	fi
+	for var in $vars dependency_libs; do
+	  # Add libraries to $var in reverse order
+	  eval tmp_libs=\"\$$var\"
+	  new_libs=
+	  for deplib in $tmp_libs; do
+	    # FIXME: Pedantically, this is the right thing to do, so
+	    #        that some nasty dependency loop isn't accidentally
+	    #        broken:
+	    #new_libs="$deplib $new_libs"
+	    # Pragmatically, this seems to cause very few problems in
+	    # practice:
+	    case $deplib in
+	    -L*) new_libs="$deplib $new_libs" ;;
+	    -R*) ;;
+	    *)
+	      # And here is the reason: when a library appears more
+	      # than once as an explicit dependence of a library, or
+	      # is implicitly linked in more than once by the
+	      # compiler, it is considered special, and multiple
+	      # occurrences thereof are not removed.  Compare this
+	      # with having the same library being listed as a
+	      # dependency of multiple other libraries: in this case,
+	      # we know (pedantically, we assume) the library does not
+	      # need to be listed more than once, so we keep only the
+	      # last copy.  This is not always right, but it is rare
+	      # enough that we require users that really mean to play
+	      # such unportable linking tricks to link the library
+	      # using -Wl,-lname, so that libtool does not consider it
+	      # for duplicate removal.
+	      case " $specialdeplibs " in
+	      *" $deplib "*) new_libs="$deplib $new_libs" ;;
+	      *)
+		case " $new_libs " in
+		*" $deplib "*) ;;
+		*) new_libs="$deplib $new_libs" ;;
+		esac
+		;;
+	      esac
+	      ;;
+	    esac
+	  done
+	  tmp_libs=
+	  for deplib in $new_libs; do
+	    case $deplib in
+	    -L*)
+	      case " $tmp_libs " in
+	      *" $deplib "*) ;;
+	      *) func_append tmp_libs " $deplib" ;;
+	      esac
+	      ;;
+	    *) func_append tmp_libs " $deplib" ;;
+	    esac
+	  done
+	  eval $var=\"$tmp_libs\"
+	done # for var
+      fi
+      # Last step: remove runtime libs from dependency_libs
+      # (they stay in deplibs)
+      tmp_libs=
+      for i in $dependency_libs ; do
+	case " $predeps $postdeps $compiler_lib_search_path " in
+	*" $i "*)
+	  i=""
+	  ;;
+	esac
+	if test -n "$i" ; then
+	  func_append tmp_libs " $i"
+	fi
+      done
+      dependency_libs=$tmp_libs
+    done # for pass
+    if test "$linkmode" = prog; then
+      dlfiles="$newdlfiles"
+    fi
+    if test "$linkmode" = prog || test "$linkmode" = lib; then
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	func_warning "\`-dlopen' is ignored for archives"
+      fi
+
+      case " $deplibs" in
+      *\ -l* | *\ -L*)
+	func_warning "\`-l' and \`-L' are ignored for archives" ;;
+      esac
+
+      test -n "$rpath" && \
+	func_warning "\`-rpath' is ignored for archives"
+
+      test -n "$xrpath" && \
+	func_warning "\`-R' is ignored for archives"
+
+      test -n "$vinfo" && \
+	func_warning "\`-version-info/-version-number' is ignored for archives"
+
+      test -n "$release" && \
+	func_warning "\`-release' is ignored for archives"
+
+      test -n "$export_symbols$export_symbols_regex" && \
+	func_warning "\`-export-symbols' is ignored for archives"
+
+      # Now set the variables for building old libraries.
+      build_libtool_libs=no
+      oldlibs="$output"
+      func_append objs "$old_deplibs"
+      ;;
+
+    lib)
+      # Make sure we only generate libraries of the form `libNAME.la'.
+      case $outputname in
+      lib*)
+	func_stripname 'lib' '.la' "$outputname"
+	name=$func_stripname_result
+	eval shared_ext=\"$shrext_cmds\"
+	eval libname=\"$libname_spec\"
+	;;
+      *)
+	test "$module" = no && \
+	  func_fatal_help "libtool library \`$output' must begin with \`lib'"
+
+	if test "$need_lib_prefix" != no; then
+	  # Add the "lib" prefix for modules if required
+	  func_stripname '' '.la' "$outputname"
+	  name=$func_stripname_result
+	  eval shared_ext=\"$shrext_cmds\"
+	  eval libname=\"$libname_spec\"
+	else
+	  func_stripname '' '.la' "$outputname"
+	  libname=$func_stripname_result
+	fi
+	;;
+      esac
+
+      if test -n "$objs"; then
+	if test "$deplibs_check_method" != pass_all; then
+	  func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs"
+	else
+	  echo
+	  $ECHO "*** Warning: Linking the shared library $output against the non-libtool"
+	  $ECHO "*** objects $objs is not portable!"
+	  func_append libobjs " $objs"
+	fi
+      fi
+
+      test "$dlself" != no && \
+	func_warning "\`-dlopen self' is ignored for libtool libraries"
+
+      set dummy $rpath
+      shift
+      test "$#" -gt 1 && \
+	func_warning "ignoring multiple \`-rpath's for a libtool library"
+
+      install_libdir="$1"
+
+      oldlibs=
+      if test -z "$rpath"; then
+	if test "$build_libtool_libs" = yes; then
+	  # Building a libtool convenience library.
+	  # Some compilers have problems with a `.al' extension so
+	  # convenience libraries should have the same extension an
+	  # archive normally would.
+	  oldlibs="$output_objdir/$libname.$libext $oldlibs"
+	  build_libtool_libs=convenience
+	  build_old_libs=yes
+	fi
+
+	test -n "$vinfo" && \
+	  func_warning "\`-version-info/-version-number' is ignored for convenience libraries"
+
+	test -n "$release" && \
+	  func_warning "\`-release' is ignored for convenience libraries"
+      else
+
+	# Parse the version information argument.
+	save_ifs="$IFS"; IFS=':'
+	set dummy $vinfo 0 0 0
+	shift
+	IFS="$save_ifs"
+
+	test -n "$7" && \
+	  func_fatal_help "too many parameters to \`-version-info'"
+
+	# convert absolute version numbers to libtool ages
+	# this retains compatibility with .la files and attempts
+	# to make the code below a bit more comprehensible
+
+	case $vinfo_number in
+	yes)
+	  number_major="$1"
+	  number_minor="$2"
+	  number_revision="$3"
+	  #
+	  # There are really only two kinds -- those that
+	  # use the current revision as the major version
+	  # and those that subtract age and use age as
+	  # a minor version.  But, then there is irix
+	  # which has an extra 1 added just for fun
+	  #
+	  case $version_type in
+	  # correct linux to gnu/linux during the next big refactor
+	  darwin|linux|osf|windows|none)
+	    func_arith $number_major + $number_minor
+	    current=$func_arith_result
+	    age="$number_minor"
+	    revision="$number_revision"
+	    ;;
+	  freebsd-aout|freebsd-elf|qnx|sunos)
+	    current="$number_major"
+	    revision="$number_minor"
+	    age="0"
+	    ;;
+	  irix|nonstopux)
+	    func_arith $number_major + $number_minor
+	    current=$func_arith_result
+	    age="$number_minor"
+	    revision="$number_minor"
+	    lt_irix_increment=no
+	    ;;
+	  esac
+	  ;;
+	no)
+	  current="$1"
+	  revision="$2"
+	  age="$3"
+	  ;;
+	esac
+
+	# Check that each of the things are valid numbers.
+	case $current in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  func_error "CURRENT \`$current' must be a nonnegative integer"
+	  func_fatal_error "\`$vinfo' is not valid version information"
+	  ;;
+	esac
+
+	case $revision in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  func_error "REVISION \`$revision' must be a nonnegative integer"
+	  func_fatal_error "\`$vinfo' is not valid version information"
+	  ;;
+	esac
+
+	case $age in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  func_error "AGE \`$age' must be a nonnegative integer"
+	  func_fatal_error "\`$vinfo' is not valid version information"
+	  ;;
+	esac
+
+	if test "$age" -gt "$current"; then
+	  func_error "AGE \`$age' is greater than the current interface number \`$current'"
+	  func_fatal_error "\`$vinfo' is not valid version information"
+	fi
+
+	# Calculate the version variables.
+	major=
+	versuffix=
+	verstring=
+	case $version_type in
+	none) ;;
+
+	darwin)
+	  # Like Linux, but with the current version available in
+	  # verstring for coding it into the library header
+	  func_arith $current - $age
+	  major=.$func_arith_result
+	  versuffix="$major.$age.$revision"
+	  # Darwin ld doesn't like 0 for these options...
+	  func_arith $current + 1
+	  minor_current=$func_arith_result
+	  xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
+	  verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
+	  ;;
+
+	freebsd-aout)
+	  major=".$current"
+	  versuffix=".$current.$revision";
+	  ;;
+
+	freebsd-elf)
+	  major=".$current"
+	  versuffix=".$current"
+	  ;;
+
+	irix | nonstopux)
+	  if test "X$lt_irix_increment" = "Xno"; then
+	    func_arith $current - $age
+	  else
+	    func_arith $current - $age + 1
+	  fi
+	  major=$func_arith_result
+
+	  case $version_type in
+	    nonstopux) verstring_prefix=nonstopux ;;
+	    *)         verstring_prefix=sgi ;;
+	  esac
+	  verstring="$verstring_prefix$major.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$revision
+	  while test "$loop" -ne 0; do
+	    func_arith $revision - $loop
+	    iface=$func_arith_result
+	    func_arith $loop - 1
+	    loop=$func_arith_result
+	    verstring="$verstring_prefix$major.$iface:$verstring"
+	  done
+
+	  # Before this point, $major must not contain `.'.
+	  major=.$major
+	  versuffix="$major.$revision"
+	  ;;
+
+	linux) # correct to gnu/linux during the next big refactor
+	  func_arith $current - $age
+	  major=.$func_arith_result
+	  versuffix="$major.$age.$revision"
+	  ;;
+
+	osf)
+	  func_arith $current - $age
+	  major=.$func_arith_result
+	  versuffix=".$current.$age.$revision"
+	  verstring="$current.$age.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$age
+	  while test "$loop" -ne 0; do
+	    func_arith $current - $loop
+	    iface=$func_arith_result
+	    func_arith $loop - 1
+	    loop=$func_arith_result
+	    verstring="$verstring:${iface}.0"
+	  done
+
+	  # Make executables depend on our current version.
+	  func_append verstring ":${current}.0"
+	  ;;
+
+	qnx)
+	  major=".$current"
+	  versuffix=".$current"
+	  ;;
+
+	sunos)
+	  major=".$current"
+	  versuffix=".$current.$revision"
+	  ;;
+
+	windows)
+	  # Use '-' rather than '.', since we only want one
+	  # extension on DOS 8.3 filesystems.
+	  func_arith $current - $age
+	  major=$func_arith_result
+	  versuffix="-$major"
+	  ;;
+
+	*)
+	  func_fatal_configuration "unknown library version type \`$version_type'"
+	  ;;
+	esac
+
+	# Clear the version info if we defaulted, and they specified a release.
+	if test -z "$vinfo" && test -n "$release"; then
+	  major=
+	  case $version_type in
+	  darwin)
+	    # we can't check for "0.0" in archive_cmds due to quoting
+	    # problems, so we reset it completely
+	    verstring=
+	    ;;
+	  *)
+	    verstring="0.0"
+	    ;;
+	  esac
+	  if test "$need_version" = no; then
+	    versuffix=
+	  else
+	    versuffix=".0.0"
+	  fi
+	fi
+
+	# Remove version info from name if versioning should be avoided
+	if test "$avoid_version" = yes && test "$need_version" = no; then
+	  major=
+	  versuffix=
+	  verstring=""
+	fi
+
+	# Check to see if the archive will have undefined symbols.
+	if test "$allow_undefined" = yes; then
+	  if test "$allow_undefined_flag" = unsupported; then
+	    func_warning "undefined symbols not allowed in $host shared libraries"
+	    build_libtool_libs=no
+	    build_old_libs=yes
+	  fi
+	else
+	  # Don't allow undefined symbols.
+	  allow_undefined_flag="$no_undefined_flag"
+	fi
+
+      fi
+
+      func_generate_dlsyms "$libname" "$libname" "yes"
+      func_append libobjs " $symfileobj"
+      test "X$libobjs" = "X " && libobjs=
+
+      if test "$opt_mode" != relink; then
+	# Remove our outputs, but don't remove object files since they
+	# may have been created when compiling PIC objects.
+	removelist=
+	tempremovelist=`$ECHO "$output_objdir/*"`
+	for p in $tempremovelist; do
+	  case $p in
+	    *.$objext | *.gcno)
+	       ;;
+	    $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
+	       if test "X$precious_files_regex" != "X"; then
+		 if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
+		 then
+		   continue
+		 fi
+	       fi
+	       func_append removelist " $p"
+	       ;;
+	    *) ;;
+	  esac
+	done
+	test -n "$removelist" && \
+	  func_show_eval "${RM}r \$removelist"
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+	func_append oldlibs " $output_objdir/$libname.$libext"
+
+	# Transform .lo files to .o files.
+	oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      #for path in $notinst_path; do
+      #	lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"`
+      #	deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"`
+      #	dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"`
+      #done
+
+      if test -n "$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	temp_xrpath=
+	for libdir in $xrpath; do
+	  func_replace_sysroot "$libdir"
+	  func_append temp_xrpath " -R$func_replace_sysroot_result"
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) func_append finalize_rpath " $libdir" ;;
+	  esac
+	done
+	if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
+	  dependency_libs="$temp_xrpath $dependency_libs"
+	fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+	case " $dlprefiles $dlfiles " in
+	*" $lib "*) ;;
+	*) func_append dlfiles " $lib" ;;
+	esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+	case "$dlprefiles " in
+	*" $lib "*) ;;
+	*) func_append dlprefiles " $lib" ;;
+	esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+	if test -n "$rpath"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*)
+	    # these systems don't actually have a c library (as such)!
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C library is in the System framework
+	    func_append deplibs " System.ltframework"
+	    ;;
+	  *-*-netbsd*)
+	    # Don't link with libc until the a.out ld.so is fixed.
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    ;;
+	  *)
+	    # Add libc to deplibs on all other systems if necessary.
+	    if test "$build_libtool_need_lc" = "yes"; then
+	      func_append deplibs " -lc"
+	    fi
+	    ;;
+	  esac
+	fi
+
+	# Transform deplibs into only deplibs that can be linked in shared.
+	name_save=$name
+	libname_save=$libname
+	release_save=$release
+	versuffix_save=$versuffix
+	major_save=$major
+	# I'm not sure if I'm treating the release correctly.  I think
+	# release should show up in the -l (ie -lgmp5) so we don't want to
+	# add it in twice.  Is that correct?
+	release=""
+	versuffix=""
+	major=""
+	newdeplibs=
+	droppeddeps=no
+	case $deplibs_check_method in
+	pass_all)
+	  # Don't check for shared/static.  Everything works.
+	  # This might be a little naive.  We might want to check
+	  # whether the library exists or not.  But this is on
+	  # osf3 & osf4 and I'm not really sure... Just
+	  # implementing what was already the behavior.
+	  newdeplibs=$deplibs
+	  ;;
+	test_compile)
+	  # This code stresses the "libraries are programs" paradigm to its
+	  # limits. Maybe even breaks it.  We compile a program, linking it
+	  # against the deplibs as a proxy for the library.  Then we can check
+	  # whether they linked in statically or dynamically with ldd.
+	  $opt_dry_run || $RM conftest.c
+	  cat > conftest.c <<EOF
+	  int main() { return 0; }
+EOF
+	  $opt_dry_run || $RM conftest
+	  if $LTCC $LTCFLAGS -o conftest conftest.c $deplibs; then
+	    ldd_output=`ldd conftest`
+	    for i in $deplibs; do
+	      case $i in
+	      -l*)
+		func_stripname -l '' "$i"
+		name=$func_stripname_result
+		if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		  case " $predeps $postdeps " in
+		  *" $i "*)
+		    func_append newdeplibs " $i"
+		    i=""
+		    ;;
+		  esac
+		fi
+		if test -n "$i" ; then
+		  libname=`eval "\\$ECHO \"$libname_spec\""`
+		  deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
+		  set dummy $deplib_matches; shift
+		  deplib_match=$1
+		  if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		    func_append newdeplibs " $i"
+		  else
+		    droppeddeps=yes
+		    echo
+		    $ECHO "*** Warning: dynamic linker does not accept needed library $i."
+		    echo "*** I have the capability to make that library automatically link in when"
+		    echo "*** you link to this library.  But I can only do this if you have a"
+		    echo "*** shared version of the library, which I believe you do not have"
+		    echo "*** because a test_compile did reveal that the linker did not use it for"
+		    echo "*** its dynamic dependency list that programs get resolved with at runtime."
+		  fi
+		fi
+		;;
+	      *)
+		func_append newdeplibs " $i"
+		;;
+	      esac
+	    done
+	  else
+	    # Error occurred in the first compile.  Let's try to salvage
+	    # the situation: Compile a separate program for each library.
+	    for i in $deplibs; do
+	      case $i in
+	      -l*)
+		func_stripname -l '' "$i"
+		name=$func_stripname_result
+		$opt_dry_run || $RM conftest
+		if $LTCC $LTCFLAGS -o conftest conftest.c $i; then
+		  ldd_output=`ldd conftest`
+		  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		    case " $predeps $postdeps " in
+		    *" $i "*)
+		      func_append newdeplibs " $i"
+		      i=""
+		      ;;
+		    esac
+		  fi
+		  if test -n "$i" ; then
+		    libname=`eval "\\$ECHO \"$libname_spec\""`
+		    deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
+		    set dummy $deplib_matches; shift
+		    deplib_match=$1
+		    if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		      func_append newdeplibs " $i"
+		    else
+		      droppeddeps=yes
+		      echo
+		      $ECHO "*** Warning: dynamic linker does not accept needed library $i."
+		      echo "*** I have the capability to make that library automatically link in when"
+		      echo "*** you link to this library.  But I can only do this if you have a"
+		      echo "*** shared version of the library, which you do not appear to have"
+		      echo "*** because a test_compile did reveal that the linker did not use this one"
+		      echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+		    fi
+		  fi
+		else
+		  droppeddeps=yes
+		  echo
+		  $ECHO "*** Warning!  Library $i is needed by this library but I was not able to"
+		  echo "*** make it link in!  You will probably need to install it or some"
+		  echo "*** library that it depends on before this library will be fully"
+		  echo "*** functional.  Installing it before continuing would be even better."
+		fi
+		;;
+	      *)
+		func_append newdeplibs " $i"
+		;;
+	      esac
+	    done
+	  fi
+	  ;;
+	file_magic*)
+	  set dummy $deplibs_check_method; shift
+	  file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    case $a_deplib in
+	    -l*)
+	      func_stripname -l '' "$a_deplib"
+	      name=$func_stripname_result
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  func_append newdeplibs " $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval "\\$ECHO \"$libname_spec\""`
+		if test -n "$file_magic_glob"; then
+		  libnameglob=`func_echo_all "$libname" | $SED -e $file_magic_glob`
+		else
+		  libnameglob=$libname
+		fi
+		test "$want_nocaseglob" = yes && nocaseglob=`shopt -p nocaseglob`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  if test "$want_nocaseglob" = yes; then
+		    shopt -s nocaseglob
+		    potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
+		    $nocaseglob
+		  else
+		    potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
+		  fi
+		  for potent_lib in $potential_libs; do
+		      # Follow soft links.
+		      if ls -lLd "$potent_lib" 2>/dev/null |
+			 $GREP " -> " >/dev/null; then
+			continue
+		      fi
+		      # The statement above tries to avoid entering an
+		      # endless loop below, in case of cyclic links.
+		      # We might still enter an endless loop, since a link
+		      # loop can be closed while we follow links,
+		      # but so what?
+		      potlib="$potent_lib"
+		      while test -h "$potlib" 2>/dev/null; do
+			potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
+			case $potliblink in
+			[\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+			*) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";;
+			esac
+		      done
+		      if eval $file_magic_cmd \"\$potlib\" 2>/dev/null |
+			 $SED -e 10q |
+			 $EGREP "$file_magic_regex" > /dev/null; then
+			func_append newdeplibs " $a_deplib"
+			a_deplib=""
+			break 2
+		      fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		echo
+		$ECHO "*** Warning: linker path does not have real file for library $a_deplib."
+		echo "*** I have the capability to make that library automatically link in when"
+		echo "*** you link to this library.  But I can only do this if you have a"
+		echo "*** shared version of the library, which you do not appear to have"
+		echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $ECHO "*** with $libname but no candidates were found. (...for file magic test)"
+		else
+		  $ECHO "*** with $libname and none of the candidates passed a file format test"
+		  $ECHO "*** using a file magic. Last file checked: $potlib"
+		fi
+	      fi
+	      ;;
+	    *)
+	      # Add a -L argument.
+	      func_append newdeplibs " $a_deplib"
+	      ;;
+	    esac
+	  done # Gone through all deplibs.
+	  ;;
+	match_pattern*)
+	  set dummy $deplibs_check_method; shift
+	  match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    case $a_deplib in
+	    -l*)
+	      func_stripname -l '' "$a_deplib"
+	      name=$func_stripname_result
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  func_append newdeplibs " $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval "\\$ECHO \"$libname_spec\""`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		  for potent_lib in $potential_libs; do
+		    potlib="$potent_lib" # see symlink-check above in file_magic test
+		    if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \
+		       $EGREP "$match_pattern_regex" > /dev/null; then
+		      func_append newdeplibs " $a_deplib"
+		      a_deplib=""
+		      break 2
+		    fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		echo
+		$ECHO "*** Warning: linker path does not have real file for library $a_deplib."
+		echo "*** I have the capability to make that library automatically link in when"
+		echo "*** you link to this library.  But I can only do this if you have a"
+		echo "*** shared version of the library, which you do not appear to have"
+		echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)"
+		else
+		  $ECHO "*** with $libname and none of the candidates passed a file format test"
+		  $ECHO "*** using a regex pattern. Last file checked: $potlib"
+		fi
+	      fi
+	      ;;
+	    *)
+	      # Add a -L argument.
+	      func_append newdeplibs " $a_deplib"
+	      ;;
+	    esac
+	  done # Gone through all deplibs.
+	  ;;
+	none | unknown | *)
+	  newdeplibs=""
+	  tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'`
+	  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	    for i in $predeps $postdeps ; do
+	      # can't use Xsed below, because $i might contain '/'
+	      tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"`
+	    done
+	  fi
+	  case $tmp_deplibs in
+	  *[!\	\ ]*)
+	    echo
+	    if test "X$deplibs_check_method" = "Xnone"; then
+	      echo "*** Warning: inter-library dependencies are not supported in this platform."
+	    else
+	      echo "*** Warning: inter-library dependencies are not known to be supported."
+	    fi
+	    echo "*** All declared inter-library dependencies are being dropped."
+	    droppeddeps=yes
+	    ;;
+	  esac
+	  ;;
+	esac
+	versuffix=$versuffix_save
+	major=$major_save
+	release=$release_save
+	libname=$libname_save
+	name=$name_save
+
+	case $host in
+	*-*-rhapsody* | *-*-darwin1.[012])
+	  # On Rhapsody replace the C library with the System framework
+	  newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'`
+	  ;;
+	esac
+
+	if test "$droppeddeps" = yes; then
+	  if test "$module" = yes; then
+	    echo
+	    echo "*** Warning: libtool could not satisfy all declared inter-library"
+	    $ECHO "*** dependencies of module $libname.  Therefore, libtool will create"
+	    echo "*** a static module, that should work as long as the dlopening"
+	    echo "*** application is linked with the -dlopen flag."
+	    if test -z "$global_symbol_pipe"; then
+	      echo
+	      echo "*** However, this would only work if libtool was able to extract symbol"
+	      echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+	      echo "*** not find such a program.  So, this module is probably useless."
+	      echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	    fi
+	    if test "$build_old_libs" = no; then
+	      oldlibs="$output_objdir/$libname.$libext"
+	      build_libtool_libs=module
+	      build_old_libs=yes
+	    else
+	      build_libtool_libs=no
+	    fi
+	  else
+	    echo "*** The inter-library dependencies that have been dropped here will be"
+	    echo "*** automatically added whenever a program is linked with this library"
+	    echo "*** or is declared to -dlopen it."
+
+	    if test "$allow_undefined" = no; then
+	      echo
+	      echo "*** Since this library must not contain undefined symbols,"
+	      echo "*** because either the platform does not support them or"
+	      echo "*** it was explicitly requested with -no-undefined,"
+	      echo "*** libtool will only create a static version of it."
+	      if test "$build_old_libs" = no; then
+		oldlibs="$output_objdir/$libname.$libext"
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  fi
+	fi
+	# Done checking deplibs!
+	deplibs=$newdeplibs
+      fi
+      # Time to change all our "foo.ltframework" stuff back to "-framework foo"
+      case $host in
+	*-*-darwin*)
+	  newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	  new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	  deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	  ;;
+      esac
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $deplibs " in
+	  *" -L$path/$objdir "*)
+	    func_append new_libs " -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) func_append new_libs " $deplib" ;;
+	  esac
+	  ;;
+	*) func_append new_libs " $deplib" ;;
+	esac
+      done
+      deplibs="$new_libs"
+
+      # All the library-specific variables (install_libdir is set above).
+      library_names=
+      old_library=
+      dlname=
+
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+	# Remove ${wl} instances when linking with ld.
+	# FIXME: should test the right _cmds variable.
+	case $archive_cmds in
+	  *\$LD\ *) wl= ;;
+        esac
+	if test "$hardcode_into_libs" = yes; then
+	  # Hardcode the library paths
+	  hardcode_libdirs=
+	  dep_rpath=
+	  rpath="$finalize_rpath"
+	  test "$opt_mode" != relink && rpath="$compile_rpath$rpath"
+	  for libdir in $rpath; do
+	    if test -n "$hardcode_libdir_flag_spec"; then
+	      if test -n "$hardcode_libdir_separator"; then
+		func_replace_sysroot "$libdir"
+		libdir=$func_replace_sysroot_result
+		if test -z "$hardcode_libdirs"; then
+		  hardcode_libdirs="$libdir"
+		else
+		  # Just accumulate the unique libdirs.
+		  case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+		  *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		    ;;
+		  *)
+		    func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+		    ;;
+		  esac
+		fi
+	      else
+		eval flag=\"$hardcode_libdir_flag_spec\"
+		func_append dep_rpath " $flag"
+	      fi
+	    elif test -n "$runpath_var"; then
+	      case "$perm_rpath " in
+	      *" $libdir "*) ;;
+	      *) func_append perm_rpath " $libdir" ;;
+	      esac
+	    fi
+	  done
+	  # Substitute the hardcoded libdirs into the rpath.
+	  if test -n "$hardcode_libdir_separator" &&
+	     test -n "$hardcode_libdirs"; then
+	    libdir="$hardcode_libdirs"
+	    eval "dep_rpath=\"$hardcode_libdir_flag_spec\""
+	  fi
+	  if test -n "$runpath_var" && test -n "$perm_rpath"; then
+	    # We should set the runpath_var.
+	    rpath=
+	    for dir in $perm_rpath; do
+	      func_append rpath "$dir:"
+	    done
+	    eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+	  fi
+	  test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+	fi
+
+	shlibpath="$finalize_shlibpath"
+	test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+	if test -n "$shlibpath"; then
+	  eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+	fi
+
+	# Get the real and link names of the library.
+	eval shared_ext=\"$shrext_cmds\"
+	eval library_names=\"$library_names_spec\"
+	set dummy $library_names
+	shift
+	realname="$1"
+	shift
+
+	if test -n "$soname_spec"; then
+	  eval soname=\"$soname_spec\"
+	else
+	  soname="$realname"
+	fi
+	if test -z "$dlname"; then
+	  dlname=$soname
+	fi
+
+	lib="$output_objdir/$realname"
+	linknames=
+	for link
+	do
+	  func_append linknames " $link"
+	done
+
+	# Use standard objects if they are pic
+	test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP`
+	test "X$libobjs" = "X " && libobjs=
+
+	delfiles=
+	if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	  $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp"
+	  export_symbols="$output_objdir/$libname.uexp"
+	  func_append delfiles " $export_symbols"
+	fi
+
+	orig_export_symbols=
+	case $host_os in
+	cygwin* | mingw* | cegcc*)
+	  if test -n "$export_symbols" && test -z "$export_symbols_regex"; then
+	    # exporting using user supplied symfile
+	    if test "x`$SED 1q $export_symbols`" != xEXPORTS; then
+	      # and it's NOT already a .def file. Must figure out
+	      # which of the given symbols are data symbols and tag
+	      # them as such. So, trigger use of export_symbols_cmds.
+	      # export_symbols gets reassigned inside the "prepare
+	      # the list of exported symbols" if statement, so the
+	      # include_expsyms logic still works.
+	      orig_export_symbols="$export_symbols"
+	      export_symbols=
+	      always_export_symbols=yes
+	    fi
+	  fi
+	  ;;
+	esac
+
+	# Prepare the list of exported symbols
+	if test -z "$export_symbols"; then
+	  if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+	    func_verbose "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $opt_dry_run || $RM $export_symbols
+	    cmds=$export_symbols_cmds
+	    save_ifs="$IFS"; IFS='~'
+	    for cmd1 in $cmds; do
+	      IFS="$save_ifs"
+	      # Take the normal branch if the nm_file_list_spec branch
+	      # doesn't work or if tool conversion is not needed.
+	      case $nm_file_list_spec~$to_tool_file_cmd in
+		*~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*)
+		  try_normal_branch=yes
+		  eval cmd=\"$cmd1\"
+		  func_len " $cmd"
+		  len=$func_len_result
+		  ;;
+		*)
+		  try_normal_branch=no
+		  ;;
+	      esac
+	      if test "$try_normal_branch" = yes \
+		 && { test "$len" -lt "$max_cmd_len" \
+		      || test "$max_cmd_len" -le -1; }
+	      then
+		func_show_eval "$cmd" 'exit $?'
+		skipped_export=false
+	      elif test -n "$nm_file_list_spec"; then
+		func_basename "$output"
+		output_la=$func_basename_result
+		save_libobjs=$libobjs
+		save_output=$output
+		output=${output_objdir}/${output_la}.nm
+		func_to_tool_file "$output"
+		libobjs=$nm_file_list_spec$func_to_tool_file_result
+		func_append delfiles " $output"
+		func_verbose "creating $NM input file list: $output"
+		for obj in $save_libobjs; do
+		  func_to_tool_file "$obj"
+		  $ECHO "$func_to_tool_file_result"
+		done > "$output"
+		eval cmd=\"$cmd1\"
+		func_show_eval "$cmd" 'exit $?'
+		output=$save_output
+		libobjs=$save_libobjs
+		skipped_export=false
+	      else
+		# The command line is too long to execute in one step.
+		func_verbose "using reloadable object file for export list..."
+		skipped_export=:
+		# Break out early, otherwise skipped_export may be
+		# set to false by a later but shorter cmd.
+		break
+	      fi
+	    done
+	    IFS="$save_ifs"
+	    if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then
+	      func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+	      func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
+	    fi
+	  fi
+	fi
+
+	if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	  tmp_export_symbols="$export_symbols"
+	  test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
+	  $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
+	fi
+
+	if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then
+	  # The given exports_symbols file has to be filtered, so filter it.
+	  func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
+	  # FIXME: $output_objdir/$libname.filter potentially contains lots of
+	  # 's' commands which not all seds can handle. GNU sed should be fine
+	  # though. Also, the filter scales superlinearly with the number of
+	  # global variables. join(1) would be nice here, but unfortunately
+	  # isn't a blessed tool.
+	  $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
+	  func_append delfiles " $export_symbols $output_objdir/$libname.filter"
+	  export_symbols=$output_objdir/$libname.def
+	  $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
+	fi
+
+	tmp_deplibs=
+	for test_deplib in $deplibs; do
+	  case " $convenience " in
+	  *" $test_deplib "*) ;;
+	  *)
+	    func_append tmp_deplibs " $test_deplib"
+	    ;;
+	  esac
+	done
+	deplibs="$tmp_deplibs"
+
+	if test -n "$convenience"; then
+	  if test -n "$whole_archive_flag_spec" &&
+	    test "$compiler_needs_object" = yes &&
+	    test -z "$libobjs"; then
+	    # extract the archives, so we have objects to list.
+	    # TODO: could optimize this to just extract one archive.
+	    whole_archive_flag_spec=
+	  fi
+	  if test -n "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	    test "X$libobjs" = "X " && libobjs=
+	  else
+	    gentop="$output_objdir/${outputname}x"
+	    func_append generated " $gentop"
+
+	    func_extract_archives $gentop $convenience
+	    func_append libobjs " $func_extract_archives_result"
+	    test "X$libobjs" = "X " && libobjs=
+	  fi
+	fi
+
+	if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+	  eval flag=\"$thread_safe_flag_spec\"
+	  func_append linker_flags " $flag"
+	fi
+
+	# Make a backup of the uninstalled library when relinking
+	if test "$opt_mode" = relink; then
+	  $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $?
+	fi
+
+	# Do each of the archive commands.
+	if test "$module" = yes && test -n "$module_cmds" ; then
+	  if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	    eval test_cmds=\"$module_expsym_cmds\"
+	    cmds=$module_expsym_cmds
+	  else
+	    eval test_cmds=\"$module_cmds\"
+	    cmds=$module_cmds
+	  fi
+	else
+	  if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	    eval test_cmds=\"$archive_expsym_cmds\"
+	    cmds=$archive_expsym_cmds
+	  else
+	    eval test_cmds=\"$archive_cmds\"
+	    cmds=$archive_cmds
+	  fi
+	fi
+
+	if test "X$skipped_export" != "X:" &&
+	   func_len " $test_cmds" &&
+	   len=$func_len_result &&
+	   test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  :
+	else
+	  # The command line is too long to link in one step, link piecewise
+	  # or, if using GNU ld and skipped_export is not :, use a linker
+	  # script.
+
+	  # Save the value of $output and $libobjs because we want to
+	  # use them later.  If we have whole_archive_flag_spec, we
+	  # want to use save_libobjs as it was before
+	  # whole_archive_flag_spec was expanded, because we can't
+	  # assume the linker understands whole_archive_flag_spec.
+	  # This may have to be revisited, in case too many
+	  # convenience libraries get linked in and end up exceeding
+	  # the spec.
+	  if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	  fi
+	  save_output=$output
+	  func_basename "$output"
+	  output_la=$func_basename_result
+
+	  # Clear the reloadable object creation command queue and
+	  # initialize k to one.
+	  test_cmds=
+	  concat_cmds=
+	  objlist=
+	  last_robj=
+	  k=1
+
+	  if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then
+	    output=${output_objdir}/${output_la}.lnkscript
+	    func_verbose "creating GNU ld script: $output"
+	    echo 'INPUT (' > $output
+	    for obj in $save_libobjs
+	    do
+	      func_to_tool_file "$obj"
+	      $ECHO "$func_to_tool_file_result" >> $output
+	    done
+	    echo ')' >> $output
+	    func_append delfiles " $output"
+	    func_to_tool_file "$output"
+	    output=$func_to_tool_file_result
+	  elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then
+	    output=${output_objdir}/${output_la}.lnk
+	    func_verbose "creating linker input file list: $output"
+	    : > $output
+	    set x $save_libobjs
+	    shift
+	    firstobj=
+	    if test "$compiler_needs_object" = yes; then
+	      firstobj="$1 "
+	      shift
+	    fi
+	    for obj
+	    do
+	      func_to_tool_file "$obj"
+	      $ECHO "$func_to_tool_file_result" >> $output
+	    done
+	    func_append delfiles " $output"
+	    func_to_tool_file "$output"
+	    output=$firstobj\"$file_list_spec$func_to_tool_file_result\"
+	  else
+	    if test -n "$save_libobjs"; then
+	      func_verbose "creating reloadable object files..."
+	      output=$output_objdir/$output_la-${k}.$objext
+	      eval test_cmds=\"$reload_cmds\"
+	      func_len " $test_cmds"
+	      len0=$func_len_result
+	      len=$len0
+
+	      # Loop over the list of objects to be linked.
+	      for obj in $save_libobjs
+	      do
+		func_len " $obj"
+		func_arith $len + $func_len_result
+		len=$func_arith_result
+		if test "X$objlist" = X ||
+		   test "$len" -lt "$max_cmd_len"; then
+		  func_append objlist " $obj"
+		else
+		  # The command $test_cmds is almost too long, add a
+		  # command to the queue.
+		  if test "$k" -eq 1 ; then
+		    # The first file doesn't have a previous command to add.
+		    reload_objs=$objlist
+		    eval concat_cmds=\"$reload_cmds\"
+		  else
+		    # All subsequent reloadable object files will link in
+		    # the last one created.
+		    reload_objs="$objlist $last_robj"
+		    eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\"
+		  fi
+		  last_robj=$output_objdir/$output_la-${k}.$objext
+		  func_arith $k + 1
+		  k=$func_arith_result
+		  output=$output_objdir/$output_la-${k}.$objext
+		  objlist=" $obj"
+		  func_len " $last_robj"
+		  func_arith $len0 + $func_len_result
+		  len=$func_arith_result
+		fi
+	      done
+	      # Handle the remaining objects by creating one last
+	      # reloadable object file.  All subsequent reloadable object
+	      # files will link in the last one created.
+	      test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	      reload_objs="$objlist $last_robj"
+	      eval concat_cmds=\"\${concat_cmds}$reload_cmds\"
+	      if test -n "$last_robj"; then
+	        eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\"
+	      fi
+	      func_append delfiles " $output"
+
+	    else
+	      output=
+	    fi
+
+	    if ${skipped_export-false}; then
+	      func_verbose "generating symbol list for \`$libname.la'"
+	      export_symbols="$output_objdir/$libname.exp"
+	      $opt_dry_run || $RM $export_symbols
+	      libobjs=$output
+	      # Append the command to create the export file.
+	      test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	      eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\"
+	      if test -n "$last_robj"; then
+		eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\"
+	      fi
+	    fi
+
+	    test -n "$save_libobjs" &&
+	      func_verbose "creating a temporary reloadable object file: $output"
+
+	    # Loop through the commands generated above and execute them.
+	    save_ifs="$IFS"; IFS='~'
+	    for cmd in $concat_cmds; do
+	      IFS="$save_ifs"
+	      $opt_silent || {
+		  func_quote_for_expand "$cmd"
+		  eval "func_echo $func_quote_for_expand_result"
+	      }
+	      $opt_dry_run || eval "$cmd" || {
+		lt_exit=$?
+
+		# Restore the uninstalled library and exit
+		if test "$opt_mode" = relink; then
+		  ( cd "$output_objdir" && \
+		    $RM "${realname}T" && \
+		    $MV "${realname}U" "$realname" )
+		fi
+
+		exit $lt_exit
+	      }
+	    done
+	    IFS="$save_ifs"
+
+	    if test -n "$export_symbols_regex" && ${skipped_export-false}; then
+	      func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+	      func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
+	    fi
+	  fi
+
+          if ${skipped_export-false}; then
+	    if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	      tmp_export_symbols="$export_symbols"
+	      test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
+	      $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
+	    fi
+
+	    if test -n "$orig_export_symbols"; then
+	      # The given exports_symbols file has to be filtered, so filter it.
+	      func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
+	      # FIXME: $output_objdir/$libname.filter potentially contains lots of
+	      # 's' commands which not all seds can handle. GNU sed should be fine
+	      # though. Also, the filter scales superlinearly with the number of
+	      # global variables. join(1) would be nice here, but unfortunately
+	      # isn't a blessed tool.
+	      $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
+	      func_append delfiles " $export_symbols $output_objdir/$libname.filter"
+	      export_symbols=$output_objdir/$libname.def
+	      $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
+	    fi
+	  fi
+
+	  libobjs=$output
+	  # Restore the value of output.
+	  output=$save_output
+
+	  if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	    test "X$libobjs" = "X " && libobjs=
+	  fi
+	  # Expand the library linking commands again to reset the
+	  # value of $libobjs for piecewise linking.
+
+	  # Do each of the archive commands.
+	  if test "$module" = yes && test -n "$module_cmds" ; then
+	    if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	      cmds=$module_expsym_cmds
+	    else
+	      cmds=$module_cmds
+	    fi
+	  else
+	    if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	      cmds=$archive_expsym_cmds
+	    else
+	      cmds=$archive_cmds
+	    fi
+	  fi
+	fi
+
+	if test -n "$delfiles"; then
+	  # Append the command to remove temporary files to $cmds.
+	  eval cmds=\"\$cmds~\$RM $delfiles\"
+	fi
+
+	# Add any objects from preloaded convenience libraries
+	if test -n "$dlprefiles"; then
+	  gentop="$output_objdir/${outputname}x"
+	  func_append generated " $gentop"
+
+	  func_extract_archives $gentop $dlprefiles
+	  func_append libobjs " $func_extract_archives_result"
+	  test "X$libobjs" = "X " && libobjs=
+	fi
+
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  eval cmd=\"$cmd\"
+	  $opt_silent || {
+	    func_quote_for_expand "$cmd"
+	    eval "func_echo $func_quote_for_expand_result"
+	  }
+	  $opt_dry_run || eval "$cmd" || {
+	    lt_exit=$?
+
+	    # Restore the uninstalled library and exit
+	    if test "$opt_mode" = relink; then
+	      ( cd "$output_objdir" && \
+	        $RM "${realname}T" && \
+		$MV "${realname}U" "$realname" )
+	    fi
+
+	    exit $lt_exit
+	  }
+	done
+	IFS="$save_ifs"
+
+	# Restore the uninstalled library and exit
+	if test "$opt_mode" = relink; then
+	  $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $?
+
+	  if test -n "$convenience"; then
+	    if test -z "$whole_archive_flag_spec"; then
+	      func_show_eval '${RM}r "$gentop"'
+	    fi
+	  fi
+
+	  exit $EXIT_SUCCESS
+	fi
+
+	# Create links to the real library.
+	for linkname in $linknames; do
+	  if test "$realname" != "$linkname"; then
+	    func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?'
+	  fi
+	done
+
+	# If -module or -export-dynamic was specified, set the dlname.
+	if test "$module" = yes || test "$export_dynamic" = yes; then
+	  # On all known operating systems, these are identical.
+	  dlname="$soname"
+	fi
+      fi
+      ;;
+
+    obj)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	func_warning "\`-dlopen' is ignored for objects"
+      fi
+
+      case " $deplibs" in
+      *\ -l* | *\ -L*)
+	func_warning "\`-l' and \`-L' are ignored for objects" ;;
+      esac
+
+      test -n "$rpath" && \
+	func_warning "\`-rpath' is ignored for objects"
+
+      test -n "$xrpath" && \
+	func_warning "\`-R' is ignored for objects"
+
+      test -n "$vinfo" && \
+	func_warning "\`-version-info' is ignored for objects"
+
+      test -n "$release" && \
+	func_warning "\`-release' is ignored for objects"
+
+      case $output in
+      *.lo)
+	test -n "$objs$old_deplibs" && \
+	  func_fatal_error "cannot build library object \`$output' from non-libtool objects"
+
+	libobj=$output
+	func_lo2o "$libobj"
+	obj=$func_lo2o_result
+	;;
+      *)
+	libobj=
+	obj="$output"
+	;;
+      esac
+
+      # Delete the old objects.
+      $opt_dry_run || $RM $obj $libobj
+
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec and hope we can get by with
+      # turning comma into space..
+      wl=
+
+      if test -n "$convenience"; then
+	if test -n "$whole_archive_flag_spec"; then
+	  eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\"
+	  reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'`
+	else
+	  gentop="$output_objdir/${obj}x"
+	  func_append generated " $gentop"
+
+	  func_extract_archives $gentop $convenience
+	  reload_conv_objs="$reload_objs $func_extract_archives_result"
+	fi
+      fi
+
+      # If we're not building shared, we need to use non_pic_objs
+      test "$build_libtool_libs" != yes && libobjs="$non_pic_objects"
+
+      # Create the old-style object.
+      reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+      output="$obj"
+      func_execute_cmds "$reload_cmds" 'exit $?'
+
+      # Exit if we aren't doing a library object file.
+      if test -z "$libobj"; then
+	if test -n "$gentop"; then
+	  func_show_eval '${RM}r "$gentop"'
+	fi
+
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$build_libtool_libs" != yes; then
+	if test -n "$gentop"; then
+	  func_show_eval '${RM}r "$gentop"'
+	fi
+
+	# Create an invalid libtool object if no PIC, so that we don't
+	# accidentally link it into a program.
+	# $show "echo timestamp > $libobj"
+	# $opt_dry_run || eval "echo timestamp > $libobj" || exit $?
+	exit $EXIT_SUCCESS
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+	# Only do commands if we really have different PIC objects.
+	reload_objs="$libobjs $reload_conv_objs"
+	output="$libobj"
+	func_execute_cmds "$reload_cmds" 'exit $?'
+      fi
+
+      if test -n "$gentop"; then
+	func_show_eval '${RM}r "$gentop"'
+      fi
+
+      exit $EXIT_SUCCESS
+      ;;
+
+    prog)
+      case $host in
+	*cygwin*) func_stripname '' '.exe' "$output"
+	          output=$func_stripname_result.exe;;
+      esac
+      test -n "$vinfo" && \
+	func_warning "\`-version-info' is ignored for programs"
+
+      test -n "$release" && \
+	func_warning "\`-release' is ignored for programs"
+
+      test "$preload" = yes \
+        && test "$dlopen_support" = unknown \
+	&& test "$dlopen_self" = unknown \
+	&& test "$dlopen_self_static" = unknown && \
+	  func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support."
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+	# On Rhapsody replace the C library is the System framework
+	compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'`
+	finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'`
+	;;
+      esac
+
+      case $host in
+      *-*-darwin*)
+	# Don't allow lazy linking, it breaks C++ global constructors
+	# But is supposedly fixed on 10.4 or later (yay!).
+	if test "$tagname" = CXX ; then
+	  case ${MACOSX_DEPLOYMENT_TARGET-10.0} in
+	    10.[0123])
+	      func_append compile_command " ${wl}-bind_at_load"
+	      func_append finalize_command " ${wl}-bind_at_load"
+	    ;;
+	  esac
+	fi
+	# Time to change all our "foo.ltframework" stuff back to "-framework foo"
+	compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+	;;
+      esac
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $compile_deplibs " in
+	  *" -L$path/$objdir "*)
+	    func_append new_libs " -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $compile_deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) func_append new_libs " $deplib" ;;
+	  esac
+	  ;;
+	*) func_append new_libs " $deplib" ;;
+	esac
+      done
+      compile_deplibs="$new_libs"
+
+
+      func_append compile_command " $compile_deplibs"
+      func_append finalize_command " $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	for libdir in $rpath $xrpath; do
+	  # This is the magic to use -rpath.
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) func_append finalize_rpath " $libdir" ;;
+	  esac
+	done
+      fi
+
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    func_append rpath " $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) func_append perm_rpath " $libdir" ;;
+	  esac
+	fi
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+	  testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$libdir:"*) ;;
+	  ::) dllsearchpath=$libdir;;
+	  *) func_append dllsearchpath ":$libdir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  ::) dllsearchpath=$testbindir;;
+	  *) func_append dllsearchpath ":$testbindir";;
+	  esac
+	  ;;
+	esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    func_append rpath " $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$finalize_perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) func_append finalize_perm_rpath " $libdir" ;;
+	  esac
+	fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      finalize_rpath="$rpath"
+
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+	# Transform all the library objects into standard objects.
+	compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
+	finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
+      fi
+
+      func_generate_dlsyms "$outputname" "@PROGRAM@" "no"
+
+      # template prelinking step
+      if test -n "$prelink_cmds"; then
+	func_execute_cmds "$prelink_cmds" 'exit $?'
+      fi
+
+      wrappers_required=yes
+      case $host in
+      *cegcc* | *mingw32ce*)
+        # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway.
+        wrappers_required=no
+        ;;
+      *cygwin* | *mingw* )
+        if test "$build_libtool_libs" != yes; then
+          wrappers_required=no
+        fi
+        ;;
+      *)
+        if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
+          wrappers_required=no
+        fi
+        ;;
+      esac
+      if test "$wrappers_required" = no; then
+	# Replace the output file specification.
+	compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
+	link_command="$compile_command$compile_rpath"
+
+	# We have no uninstalled library dependencies, so finalize right now.
+	exit_status=0
+	func_show_eval "$link_command" 'exit_status=$?'
+
+	if test -n "$postlink_cmds"; then
+	  func_to_tool_file "$output"
+	  postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+	  func_execute_cmds "$postlink_cmds" 'exit $?'
+	fi
+
+	# Delete the generated files.
+	if test -f "$output_objdir/${outputname}S.${objext}"; then
+	  func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"'
+	fi
+
+	exit $exit_status
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+	compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+      fi
+      if test -n "$finalize_shlibpath"; then
+	finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+	if test -n "$perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $perm_rpath; do
+	    func_append rpath "$dir:"
+	  done
+	  compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+	if test -n "$finalize_perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $finalize_perm_rpath; do
+	    func_append rpath "$dir:"
+	  done
+	  finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+      fi
+
+      if test "$no_install" = yes; then
+	# We don't need to create a wrapper script.
+	link_command="$compile_var$compile_command$compile_rpath"
+	# Replace the output file specification.
+	link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
+	# Delete the old output file.
+	$opt_dry_run || $RM $output
+	# Link the executable and exit
+	func_show_eval "$link_command" 'exit $?'
+
+	if test -n "$postlink_cmds"; then
+	  func_to_tool_file "$output"
+	  postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+	  func_execute_cmds "$postlink_cmds" 'exit $?'
+	fi
+
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$hardcode_action" = relink; then
+	# Fast installation is not supported
+	link_command="$compile_var$compile_command$compile_rpath"
+	relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+	func_warning "this platform does not like uninstalled shared libraries"
+	func_warning "\`$output' will be relinked during installation"
+      else
+	if test "$fast_install" != no; then
+	  link_command="$finalize_var$compile_command$finalize_rpath"
+	  if test "$fast_install" = yes; then
+	    relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'`
+	  else
+	    # fast_install is set to needless
+	    relink_command=
+	  fi
+	else
+	  link_command="$compile_var$compile_command$compile_rpath"
+	  relink_command="$finalize_var$finalize_command$finalize_rpath"
+	fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+      # Delete the old output files.
+      $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      func_show_eval "$link_command" 'exit $?'
+
+      if test -n "$postlink_cmds"; then
+	func_to_tool_file "$output_objdir/$outputname"
+	postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+	func_execute_cmds "$postlink_cmds" 'exit $?'
+      fi
+
+      # Now create the wrapper script.
+      func_verbose "creating $output"
+
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+	# Preserve any variables that may affect compiler behavior
+	for var in $variables_saved_for_relink; do
+	  if eval test -z \"\${$var+set}\"; then
+	    relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
+	  elif eval var_value=\$$var; test -z "$var_value"; then
+	    relink_command="$var=; export $var; $relink_command"
+	  else
+	    func_quote_for_eval "$var_value"
+	    relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
+	  fi
+	done
+	relink_command="(cd `pwd`; $relink_command)"
+	relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
+      fi
+
+      # Only actually do things if not in dry run mode.
+      $opt_dry_run || {
+	# win32 will think the script is a binary if it has
+	# a .exe suffix, so we strip it off here.
+	case $output in
+	  *.exe) func_stripname '' '.exe' "$output"
+	         output=$func_stripname_result ;;
+	esac
+	# test for cygwin because mv fails w/o .exe extensions
+	case $host in
+	  *cygwin*)
+	    exeext=.exe
+	    func_stripname '' '.exe' "$outputname"
+	    outputname=$func_stripname_result ;;
+	  *) exeext= ;;
+	esac
+	case $host in
+	  *cygwin* | *mingw* )
+	    func_dirname_and_basename "$output" "" "."
+	    output_name=$func_basename_result
+	    output_path=$func_dirname_result
+	    cwrappersource="$output_path/$objdir/lt-$output_name.c"
+	    cwrapper="$output_path/$output_name.exe"
+	    $RM $cwrappersource $cwrapper
+	    trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
+
+	    func_emit_cwrapperexe_src > $cwrappersource
+
+	    # The wrapper executable is built using the $host compiler,
+	    # because it contains $host paths and files. If cross-
+	    # compiling, it, like the target executable, must be
+	    # executed on the $host or under an emulation environment.
+	    $opt_dry_run || {
+	      $LTCC $LTCFLAGS -o $cwrapper $cwrappersource
+	      $STRIP $cwrapper
+	    }
+
+	    # Now, create the wrapper script for func_source use:
+	    func_ltwrapper_scriptname $cwrapper
+	    $RM $func_ltwrapper_scriptname_result
+	    trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15
+	    $opt_dry_run || {
+	      # note: this script will not be executed, so do not chmod.
+	      if test "x$build" = "x$host" ; then
+		$cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result
+	      else
+		func_emit_wrapper no > $func_ltwrapper_scriptname_result
+	      fi
+	    }
+	  ;;
+	  * )
+	    $RM $output
+	    trap "$RM $output; exit $EXIT_FAILURE" 1 2 15
+
+	    func_emit_wrapper no > $output
+	    chmod +x $output
+	  ;;
+	esac
+      }
+      exit $EXIT_SUCCESS
+      ;;
+    esac
+
+    # See if we need to build an old-fashioned archive.
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+	oldobjs="$libobjs_save $symfileobj"
+	addlibs="$convenience"
+	build_libtool_libs=no
+      else
+	if test "$build_libtool_libs" = module; then
+	  oldobjs="$libobjs_save"
+	  build_libtool_libs=no
+	else
+	  oldobjs="$old_deplibs $non_pic_objects"
+	  if test "$preload" = yes && test -f "$symfileobj"; then
+	    func_append oldobjs " $symfileobj"
+	  fi
+	fi
+	addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+	gentop="$output_objdir/${outputname}x"
+	func_append generated " $gentop"
+
+	func_extract_archives $gentop $addlibs
+	func_append oldobjs " $func_extract_archives_result"
+      fi
+
+      # Do each command in the archive commands.
+      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+	cmds=$old_archive_from_new_cmds
+      else
+
+	# Add any objects from preloaded convenience libraries
+	if test -n "$dlprefiles"; then
+	  gentop="$output_objdir/${outputname}x"
+	  func_append generated " $gentop"
+
+	  func_extract_archives $gentop $dlprefiles
+	  func_append oldobjs " $func_extract_archives_result"
+	fi
+
+	# POSIX demands no paths to be encoded in archives.  We have
+	# to avoid creating archives with duplicate basenames if we
+	# might have to extract them afterwards, e.g., when creating a
+	# static archive out of a convenience library, or when linking
+	# the entirety of a libtool archive into another (currently
+	# not supported by libtool).
+	if (for obj in $oldobjs
+	    do
+	      func_basename "$obj"
+	      $ECHO "$func_basename_result"
+	    done | sort | sort -uc >/dev/null 2>&1); then
+	  :
+	else
+	  echo "copying selected object files to avoid basename conflicts..."
+	  gentop="$output_objdir/${outputname}x"
+	  func_append generated " $gentop"
+	  func_mkdir_p "$gentop"
+	  save_oldobjs=$oldobjs
+	  oldobjs=
+	  counter=1
+	  for obj in $save_oldobjs
+	  do
+	    func_basename "$obj"
+	    objbase="$func_basename_result"
+	    case " $oldobjs " in
+	    " ") oldobjs=$obj ;;
+	    *[\ /]"$objbase "*)
+	      while :; do
+		# Make sure we don't pick an alternate name that also
+		# overlaps.
+		newobj=lt$counter-$objbase
+		func_arith $counter + 1
+		counter=$func_arith_result
+		case " $oldobjs " in
+		*[\ /]"$newobj "*) ;;
+		*) if test ! -f "$gentop/$newobj"; then break; fi ;;
+		esac
+	      done
+	      func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
+	      func_append oldobjs " $gentop/$newobj"
+	      ;;
+	    *) func_append oldobjs " $obj" ;;
+	    esac
+	  done
+	fi
+	func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+	tool_oldlib=$func_to_tool_file_result
+	eval cmds=\"$old_archive_cmds\"
+
+	func_len " $cmds"
+	len=$func_len_result
+	if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  cmds=$old_archive_cmds
+	elif test -n "$archiver_list_spec"; then
+	  func_verbose "using command file archive linking..."
+	  for obj in $oldobjs
+	  do
+	    func_to_tool_file "$obj"
+	    $ECHO "$func_to_tool_file_result"
+	  done > $output_objdir/$libname.libcmd
+	  func_to_tool_file "$output_objdir/$libname.libcmd"
+	  oldobjs=" $archiver_list_spec$func_to_tool_file_result"
+	  cmds=$old_archive_cmds
+	else
+	  # the command line is too long to link in one step, link in parts
+	  func_verbose "using piecewise archive linking..."
+	  save_RANLIB=$RANLIB
+	  RANLIB=:
+	  objlist=
+	  concat_cmds=
+	  save_oldobjs=$oldobjs
+	  oldobjs=
+	  # Is there a better way of finding the last object in the list?
+	  for obj in $save_oldobjs
+	  do
+	    last_oldobj=$obj
+	  done
+	  eval test_cmds=\"$old_archive_cmds\"
+	  func_len " $test_cmds"
+	  len0=$func_len_result
+	  len=$len0
+	  for obj in $save_oldobjs
+	  do
+	    func_len " $obj"
+	    func_arith $len + $func_len_result
+	    len=$func_arith_result
+	    func_append objlist " $obj"
+	    if test "$len" -lt "$max_cmd_len"; then
+	      :
+	    else
+	      # the above command should be used before it gets too long
+	      oldobjs=$objlist
+	      if test "$obj" = "$last_oldobj" ; then
+		RANLIB=$save_RANLIB
+	      fi
+	      test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	      eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
+	      objlist=
+	      len=$len0
+	    fi
+	  done
+	  RANLIB=$save_RANLIB
+	  oldobjs=$objlist
+	  if test "X$oldobjs" = "X" ; then
+	    eval cmds=\"\$concat_cmds\"
+	  else
+	    eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
+	  fi
+	fi
+      fi
+      func_execute_cmds "$cmds" 'exit $?'
+    done
+
+    test -n "$generated" && \
+      func_show_eval "${RM}r$generated"
+
+    # Now create the libtool archive.
+    case $output in
+    *.la)
+      old_library=
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
+      func_verbose "creating $output"
+
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+	if eval test -z \"\${$var+set}\"; then
+	  relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
+	elif eval var_value=\$$var; test -z "$var_value"; then
+	  relink_command="$var=; export $var; $relink_command"
+	else
+	  func_quote_for_eval "$var_value"
+	  relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
+	fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
+      relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
+      if test "$hardcode_automatic" = yes ; then
+	relink_command=
+      fi
+
+      # Only create the output if not a dry run.
+      $opt_dry_run || {
+	for installed in no yes; do
+	  if test "$installed" = yes; then
+	    if test -z "$install_libdir"; then
+	      break
+	    fi
+	    output="$output_objdir/$outputname"i
+	    # Replace all uninstalled libtool libraries with the installed ones
+	    newdependency_libs=
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      *.la)
+		func_basename "$deplib"
+		name="$func_basename_result"
+		func_resolve_sysroot "$deplib"
+		eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result`
+		test -z "$libdir" && \
+		  func_fatal_error "\`$deplib' is not a valid libtool archive"
+		func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name"
+		;;
+	      -L*)
+		func_stripname -L '' "$deplib"
+		func_replace_sysroot "$func_stripname_result"
+		func_append newdependency_libs " -L$func_replace_sysroot_result"
+		;;
+	      -R*)
+		func_stripname -R '' "$deplib"
+		func_replace_sysroot "$func_stripname_result"
+		func_append newdependency_libs " -R$func_replace_sysroot_result"
+		;;
+	      *) func_append newdependency_libs " $deplib" ;;
+	      esac
+	    done
+	    dependency_libs="$newdependency_libs"
+	    newdlfiles=
+
+	    for lib in $dlfiles; do
+	      case $lib in
+	      *.la)
+	        func_basename "$lib"
+		name="$func_basename_result"
+		eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+		test -z "$libdir" && \
+		  func_fatal_error "\`$lib' is not a valid libtool archive"
+		func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name"
+		;;
+	      *) func_append newdlfiles " $lib" ;;
+	      esac
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      case $lib in
+	      *.la)
+		# Only pass preopened files to the pseudo-archive (for
+		# eventual linking with the app. that links it) if we
+		# didn't already link the preopened objects directly into
+		# the library:
+		func_basename "$lib"
+		name="$func_basename_result"
+		eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+		test -z "$libdir" && \
+		  func_fatal_error "\`$lib' is not a valid libtool archive"
+		func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name"
+		;;
+	      esac
+	    done
+	    dlprefiles="$newdlprefiles"
+	  else
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      func_append newdlfiles " $abs"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      func_append newdlprefiles " $abs"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  fi
+	  $RM $output
+	  # place dlname in correct position for cygwin
+	  # In fact, it would be nice if we could use this code for all target
+	  # systems that can't hard-code library paths into their executables
+	  # and that have no shared library path variable independent of PATH,
+	  # but it turns out we can't easily determine that from inspecting
+	  # libtool variables, so we have to hard-code the OSs to which it
+	  # applies here; at the moment, that means platforms that use the PE
+	  # object format with DLL files.  See the long comment at the top of
+	  # tests/bindir.at for full details.
+	  tdlname=$dlname
+	  case $host,$output,$installed,$module,$dlname in
+	    *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll)
+	      # If a -bindir argument was supplied, place the dll there.
+	      if test "x$bindir" != x ;
+	      then
+		func_relative_path "$install_libdir" "$bindir"
+		tdlname=$func_relative_path_result$dlname
+	      else
+		# Otherwise fall back on heuristic.
+		tdlname=../bin/$dlname
+	      fi
+	      ;;
+	  esac
+	  $ECHO > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$tdlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Linker flags that can not go in dependency_libs.
+inherited_linker_flags='$new_inherited_linker_flags'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Names of additional weak libraries provided by this library
+weak_library_names='$weak_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Should we warn about portability when linking against -modules?
+shouldnotlink=$module
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+	  if test "$installed" = no && test "$need_relink" = yes; then
+	    $ECHO >> $output "\
+relink_command=\"$relink_command\""
+	  fi
+	done
+      }
+
+      # Do a symbolic link so that the libtool archive can be found in
+      # LD_LIBRARY_PATH before the program is installed.
+      func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?'
+      ;;
+    esac
+    exit $EXIT_SUCCESS
+}
+
+{ test "$opt_mode" = link || test "$opt_mode" = relink; } &&
+    func_mode_link ${1+"$@"}
+
+
+# func_mode_uninstall arg...
+func_mode_uninstall ()
+{
+    $opt_debug
+    RM="$nonopt"
+    files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    for arg
+    do
+      case $arg in
+      -f) func_append RM " $arg"; rmforce=yes ;;
+      -*) func_append RM " $arg" ;;
+      *) func_append files " $arg" ;;
+      esac
+    done
+
+    test -z "$RM" && \
+      func_fatal_help "you must specify an RM program"
+
+    rmdirs=
+
+    for file in $files; do
+      func_dirname "$file" "" "."
+      dir="$func_dirname_result"
+      if test "X$dir" = X.; then
+	odir="$objdir"
+      else
+	odir="$dir/$objdir"
+      fi
+      func_basename "$file"
+      name="$func_basename_result"
+      test "$opt_mode" = uninstall && odir="$dir"
+
+      # Remember odir for removal later, being careful to avoid duplicates
+      if test "$opt_mode" = clean; then
+	case " $rmdirs " in
+	  *" $odir "*) ;;
+	  *) func_append rmdirs " $odir" ;;
+	esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if { test -L "$file"; } >/dev/null 2>&1 ||
+	 { test -h "$file"; } >/dev/null 2>&1 ||
+	 test -f "$file"; then
+	:
+      elif test -d "$file"; then
+	exit_status=1
+	continue
+      elif test "$rmforce" = yes; then
+	continue
+      fi
+
+      rmfiles="$file"
+
+      case $name in
+      *.la)
+	# Possibly a libtool archive, so verify it.
+	if func_lalib_p "$file"; then
+	  func_source $dir/$name
+
+	  # Delete the libtool libraries and symlinks.
+	  for n in $library_names; do
+	    func_append rmfiles " $odir/$n"
+	  done
+	  test -n "$old_library" && func_append rmfiles " $odir/$old_library"
+
+	  case "$opt_mode" in
+	  clean)
+	    case " $library_names " in
+	    *" $dlname "*) ;;
+	    *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;;
+	    esac
+	    test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i"
+	    ;;
+	  uninstall)
+	    if test -n "$library_names"; then
+	      # Do each command in the postuninstall commands.
+	      func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
+	    fi
+
+	    if test -n "$old_library"; then
+	      # Do each command in the old_postuninstall commands.
+	      func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
+	    fi
+	    # FIXME: should reinstall the best remaining shared library.
+	    ;;
+	  esac
+	fi
+	;;
+
+      *.lo)
+	# Possibly a libtool object, so verify it.
+	if func_lalib_p "$file"; then
+
+	  # Read the .lo file
+	  func_source $dir/$name
+
+	  # Add PIC object to the list of files to remove.
+	  if test -n "$pic_object" &&
+	     test "$pic_object" != none; then
+	    func_append rmfiles " $dir/$pic_object"
+	  fi
+
+	  # Add non-PIC object to the list of files to remove.
+	  if test -n "$non_pic_object" &&
+	     test "$non_pic_object" != none; then
+	    func_append rmfiles " $dir/$non_pic_object"
+	  fi
+	fi
+	;;
+
+      *)
+	if test "$opt_mode" = clean ; then
+	  noexename=$name
+	  case $file in
+	  *.exe)
+	    func_stripname '' '.exe' "$file"
+	    file=$func_stripname_result
+	    func_stripname '' '.exe' "$name"
+	    noexename=$func_stripname_result
+	    # $file with .exe has already been added to rmfiles,
+	    # add $file without .exe
+	    func_append rmfiles " $file"
+	    ;;
+	  esac
+	  # Do a test to see if this is a libtool program.
+	  if func_ltwrapper_p "$file"; then
+	    if func_ltwrapper_executable_p "$file"; then
+	      func_ltwrapper_scriptname "$file"
+	      relink_command=
+	      func_source $func_ltwrapper_scriptname_result
+	      func_append rmfiles " $func_ltwrapper_scriptname_result"
+	    else
+	      relink_command=
+	      func_source $dir/$noexename
+	    fi
+
+	    # note $name still contains .exe if it was in $file originally
+	    # as does the version of $file that was added into $rmfiles
+	    func_append rmfiles " $odir/$name $odir/${name}S.${objext}"
+	    if test "$fast_install" = yes && test -n "$relink_command"; then
+	      func_append rmfiles " $odir/lt-$name"
+	    fi
+	    if test "X$noexename" != "X$name" ; then
+	      func_append rmfiles " $odir/lt-${noexename}.c"
+	    fi
+	  fi
+	fi
+	;;
+      esac
+      func_show_eval "$RM $rmfiles" 'exit_status=1'
+    done
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+	func_show_eval "rmdir $dir >/dev/null 2>&1"
+      fi
+    done
+
+    exit $exit_status
+}
+
+{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } &&
+    func_mode_uninstall ${1+"$@"}
+
+test -z "$opt_mode" && {
+  help="$generic_help"
+  func_fatal_help "you must specify a MODE"
+}
+
+test -z "$exec_cmd" && \
+  func_fatal_help "invalid operation mode \`$opt_mode'"
+
+if test -n "$exec_cmd"; then
+  eval exec "$exec_cmd"
+  exit $EXIT_FAILURE
+fi
+
+exit $exit_status
+
+
+# The TAGs below are defined such that we never get into a situation
+# in which we disable both kinds of libraries.  Given conflicting
+# choices, we go for a static library, that is the most portable,
+# since we can't tell whether shared libraries were disabled because
+# the user asked for that or because the platform doesn't support
+# them.  This is particularly important on AIX, because we don't
+# support having both static and shared libraries enabled at the same
+# time on that platform, so we default to a shared-only configuration.
+# If a disable-shared tag is given, we'll fallback to a static-only
+# configuration.  But we'll never go from static-only to shared-only.
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
+build_libtool_libs=no
+build_old_libs=yes
+# ### END LIBTOOL TAG CONFIG: disable-shared
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-static
+build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
+# ### END LIBTOOL TAG CONFIG: disable-static
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
+# vi:sw=2
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/acx_pthread.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/acx_pthread.m4
new file mode 100755
index 0000000000..cdfed55f44
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/acx_pthread.m4
@@ -0,0 +1,305 @@
+##### http://autoconf-archive.cryp.to/acx_pthread.html
+#
+# SYNOPSIS
+#
+#   ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro figures out how to build C programs using POSIX threads.
+#   It sets the PTHREAD_LIBS output variable to the threads library and
+#   linker flags, and the PTHREAD_CFLAGS output variable to any special
+#   C compiler flags that are needed. (The user can also force certain
+#   compiler flags/libs to be tested by setting these environment
+#   variables.)
+#
+#   Also sets PTHREAD_CC to any special C compiler that is needed for
+#   multi-threaded programs (defaults to the value of CC otherwise).
+#   (This is necessary on AIX to use the special cc_r compiler alias.)
+#
+#   NOTE: You are assumed to not only compile your program with these
+#   flags, but also link it with them as well. e.g. you should link
+#   with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS
+#   $LIBS
+#
+#   If you are only building threads programs, you may wish to use
+#   these variables in your default LIBS, CFLAGS, and CC:
+#
+#          LIBS="$PTHREAD_LIBS $LIBS"
+#          CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+#          CC="$PTHREAD_CC"
+#
+#   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute
+#   constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to
+#   that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a threads
+#   library is found, and ACTION-IF-NOT-FOUND is a list of commands to
+#   run it if it is not found. If ACTION-IF-FOUND is not specified, the
+#   default action will define HAVE_PTHREAD.
+#
+#   Please let the authors know if this macro fails on any platform, or
+#   if you have any other suggestions or comments. This macro was based
+#   on work by SGJ on autoconf scripts for FFTW (http://www.fftw.org/)
+#   (with help from M. Frigo), as well as ac_pthread and hb_pthread
+#   macros posted by Alejandro Forero Cuervo to the autoconf macro
+#   repository. We are also grateful for the helpful feedback of
+#   numerous users.
+#
+# LAST MODIFICATION
+#
+#   2006-05-29
+#
+# COPYLEFT
+#
+#   Copyright (c) 2006 Steven G. Johnson <stevenj@alum.mit.edu>
+#
+#   This program is free software; you can redistribute it and/or
+#   modify it under the terms of the GNU General Public License as
+#   published by the Free Software Foundation; either version 2 of the
+#   License, or (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+#   02111-1307, USA.
+#
+#   As a special exception, the respective Autoconf Macro's copyright
+#   owner gives unlimited permission to copy, distribute and modify the
+#   configure scripts that are the output of Autoconf when processing
+#   the Macro. You need not follow the terms of the GNU General Public
+#   License when using or distributing such scripts, even though
+#   portions of the text of the Macro appear in them. The GNU General
+#   Public License (GPL) does govern all other use of the material that
+#   constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the
+#   Autoconf Macro released by the Autoconf Macro Archive. When you
+#   make and distribute a modified version of the Autoconf Macro, you
+#   may extend this special exception to the GPL to apply to your
+#   modified version as well.
+
+AC_DEFUN([ACX_PTHREAD], [
+AC_REQUIRE([AC_CANONICAL_HOST])
+AC_LANG_SAVE
+AC_LANG_C
+acx_pthread_ok=no
+
+# We used to check for pthread.h first, but this fails if pthread.h
+# requires special compiler flags (e.g. on True64 or Sequent).
+# It gets checked for in the link test anyway.
+
+# First of all, check if the user has set any of the PTHREAD_LIBS,
+# etcetera environment variables, and if threads linking works using
+# them:
+if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
+        AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes)
+        AC_MSG_RESULT($acx_pthread_ok)
+        if test x"$acx_pthread_ok" = xno; then
+                PTHREAD_LIBS=""
+                PTHREAD_CFLAGS=""
+        fi
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+fi
+
+# We must check for the threads library under a number of different
+# names; the ordering is very important because some systems
+# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
+# libraries is broken (non-POSIX).
+
+# Create a list of thread flags to try.  Items starting with a "-" are
+# C compiler flags, and other items are library names, except for "none"
+# which indicates that we try without any flags at all, and "pthread-config"
+# which is a program returning the flags for the Pth emulation library.
+
+acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt lpthread pthread-config"
+
+# The ordering *is* (sometimes) important.  Some notes on the
+# individual items follow:
+
+# pthreads: AIX (must check this before -lpthread)
+# none: in case threads are in libc; should be tried before -Kthread and
+#       other compiler flags to prevent continual compiler warnings
+# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
+# -pthreads: Solaris/gcc
+# -mthreads: Mingw32/gcc, Lynx/gcc
+# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+#      doesn't hurt to check since this sometimes defines pthreads too;
+#      also defines -D_REENTRANT)
+#      ... -mt is also the pthreads flag for HP/aCC
+# pthread: Linux, etcetera
+# --thread-safe: KAI C++
+# pthread-config: use pthread-config program (for GNU Pth library)
+
+case "${host_cpu}-${host_os}" in
+        *solaris*)
+
+        # On Solaris (at least, for some versions), libc contains stubbed
+        # (non-functional) versions of the pthreads routines, so link-based
+        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
+        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
+        # a function called by this macro, so we could check for that, but
+        # who knows whether they'll stub that too in a future libc.)  So,
+        # we'll just look for -pthreads and -lpthread first:
+
+        acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
+        ;;
+
+        # The HP-UX compiler just warns about options it does not understand
+        # but it needs -mt.
+        *-hpux*)
+        acx_pthread_flags="-mt $acx_pthread_flags"
+        ;;
+esac
+
+if test x"$acx_pthread_ok" = xno; then
+for flag in $acx_pthread_flags; do
+
+        case $flag in
+                none)
+                AC_MSG_CHECKING([whether pthreads work without any flags])
+                ;;
+
+                -*)
+                AC_MSG_CHECKING([whether pthreads work with $flag])
+                PTHREAD_CFLAGS="$flag"
+                ;;
+
+		pthread-config)
+		AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no)
+		if test x"$acx_pthread_config" = xno; then continue; fi
+		PTHREAD_CFLAGS="`pthread-config --cflags`"
+		PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+		;;
+
+                *)
+                AC_MSG_CHECKING([for the pthreads library -l$flag])
+                PTHREAD_LIBS="-l$flag"
+                ;;
+        esac
+
+        save_LIBS="$LIBS"
+        save_CFLAGS="$CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Check for various functions.  We must include pthread.h,
+        # since some functions may be macros.  (On the Sequent, we
+        # need a special flag -Kthread to make this header compile.)
+        # We check for pthread_join because it is in -lpthread on IRIX
+        # while pthread_create is in libc.  We check for pthread_attr_init
+        # due to DEC craziness with -lpthreads.  We check for
+        # pthread_cleanup_push because it is one of the few pthread
+        # functions on Solaris that doesn't have a non-functional libc stub.
+        # We try pthread_create on general principles.
+        AC_TRY_LINK([#include <pthread.h>],
+                    [pthread_t th; pthread_join(th, 0);
+                     pthread_attr_init(0); pthread_cleanup_push(0, 0);
+                     pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+                    [acx_pthread_ok=yes])
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        AC_MSG_RESULT($acx_pthread_ok)
+        if test "x$acx_pthread_ok" = xyes; then
+                break;
+        fi
+
+        PTHREAD_LIBS=""
+        PTHREAD_CFLAGS=""
+done
+fi
+
+# Various other checks:
+if test "x$acx_pthread_ok" = xyes; then
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
+	AC_MSG_CHECKING([for joinable pthread attribute])
+	attr_name=unknown
+	for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+	    AC_TRY_LINK([#include <pthread.h>], [int attr=$attr; return attr;],
+                        [attr_name=$attr; break])
+	done
+        AC_MSG_RESULT($attr_name)
+        if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
+            AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name,
+                               [Define to necessary symbol if this constant
+                                uses a non-standard name on your system.])
+        fi
+
+        AC_MSG_CHECKING([if more special flags are required for pthreads])
+        flag=no
+        case "${host_cpu}-${host_os}" in
+            *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";;
+            *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";;
+        esac
+        AC_MSG_RESULT(${flag})
+        if test "x$flag" != xno; then
+            PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
+        fi
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        # More AIX lossage: must compile with xlc_r or cc_r
+        case "${host_os}" in
+          aix* )
+            if test x"$GCC" != xyes; then
+              case "$CC" in
+                *xlc )
+                  AC_CHECK_PROG(PTHREAD_CC, xlc_r, xlc_r, ${CC}) ;;
+                *cc )
+                  AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) ;;
+              esac
+            fi
+            case "$CXX" in
+              *xlC )
+                AC_CHECK_PROG(PTHREAD_CXX, xlC_r, xlC_r, ${CXX}) ;;
+            esac
+            ;;
+        esac
+fi
+
+if test "${PTHREAD_CC}x" = "x"
+then
+  PTHREAD_CC="$CC"
+fi
+if test "${PTHREAD_CXX}x" = "x"
+then
+  PTHREAD_CXX="$CXX"
+fi
+
+AC_SUBST(PTHREAD_LIBS)
+AC_SUBST(PTHREAD_CFLAGS)
+AC_SUBST(PTHREAD_CC)
+AC_SUBST(PTHREAD_CXX)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$acx_pthread_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1])
+        :
+else
+        acx_pthread_ok=no
+        $2
+fi
+AC_LANG_RESTORE
+])dnl ACX_PTHREAD
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_compile_flags.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_compile_flags.m4
new file mode 100644
index 0000000000..2bb27ef2b1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_compile_flags.m4
@@ -0,0 +1,67 @@
+# ===========================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_append_compile_flags.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_APPEND_COMPILE_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT])
+#
+# DESCRIPTION
+#
+#   For every FLAG1, FLAG2 it is checked whether the compiler works with the
+#   flag.  If it does, the flag is added FLAGS-VARIABLE
+#
+#   If FLAGS-VARIABLE is not specified, the current language's flags (e.g.
+#   CFLAGS) is used.  During the check the flag is always added to the
+#   current language's flags.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   INPUT gives an alternative input source to AC_COMPILE_IFELSE.
+#
+#   NOTE: This macro depends on the AX_APPEND_FLAG and
+#   AX_CHECK_COMPILE_FLAG. Please keep this macro in sync with
+#   AX_APPEND_LINK_FLAGS.
+#
+# LICENSE
+#
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 5
+
+AC_DEFUN([AX_APPEND_COMPILE_FLAGS],
+[AX_REQUIRE_DEFINED([AX_CHECK_COMPILE_FLAG])
+AX_REQUIRE_DEFINED([AX_APPEND_FLAG])
+for flag in $1; do
+  AX_CHECK_COMPILE_FLAG([$flag], [AX_APPEND_FLAG([$flag], [$2])], [], [$3], [$4])
+done
+])dnl AX_APPEND_COMPILE_FLAGS
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_flag.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_flag.m4
new file mode 100644
index 0000000000..08f2e07ec6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_append_flag.m4
@@ -0,0 +1,71 @@
+# ===========================================================================
+#      http://www.gnu.org/software/autoconf-archive/ax_append_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_APPEND_FLAG(FLAG, [FLAGS-VARIABLE])
+#
+# DESCRIPTION
+#
+#   FLAG is appended to the FLAGS-VARIABLE shell variable, with a space
+#   added in between.
+#
+#   If FLAGS-VARIABLE is not specified, the current language's flags (e.g.
+#   CFLAGS) is used.  FLAGS-VARIABLE is not changed if it already contains
+#   FLAG.  If FLAGS-VARIABLE is unset in the shell, it is set to exactly
+#   FLAG.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 6
+
+AC_DEFUN([AX_APPEND_FLAG],
+[dnl
+AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_SET_IF
+AS_VAR_PUSHDEF([FLAGS], [m4_default($2,_AC_LANG_PREFIX[FLAGS])])
+AS_VAR_SET_IF(FLAGS,[
+  AS_CASE([" AS_VAR_GET(FLAGS) "],
+    [*" $1 "*], [AC_RUN_LOG([: FLAGS already contains $1])],
+    [
+     AS_VAR_APPEND(FLAGS,[" $1"])
+     AC_RUN_LOG([: FLAGS="$FLAGS"])
+    ])
+  ],
+  [
+  AS_VAR_SET(FLAGS,[$1])
+  AC_RUN_LOG([: FLAGS="$FLAGS"])
+  ])
+AS_VAR_POPDEF([FLAGS])dnl
+])dnl AX_APPEND_FLAG
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ax_check_compile_flag.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_check_compile_flag.m4
new file mode 100644
index 0000000000..ca3639715e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_check_compile_flag.m4
@@ -0,0 +1,74 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
+#
+# DESCRIPTION
+#
+#   Check whether the given FLAG works with the current language's compiler
+#   or gives an error.  (Warnings, however, are ignored)
+#
+#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+#   success/failure.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   INPUT gives an alternative input source to AC_COMPILE_IFELSE.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 4
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_VAR_IF(CACHEVAR,yes,
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ax_gcc_func_attribute.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_gcc_func_attribute.m4
new file mode 100644
index 0000000000..1edceba26f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_gcc_func_attribute.m4
@@ -0,0 +1,221 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
+#
+# DESCRIPTION
+#
+#   This macro checks if the compiler supports one of GCC's function
+#   attributes; many other compilers also provide function attributes with
+#   the same syntax. Compiler warnings are used to detect supported
+#   attributes as unsupported ones are ignored by default so quieting
+#   warnings when using this macro will yield false positives.
+#
+#   The ATTRIBUTE parameter holds the name of the attribute to be checked.
+#
+#   If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
+#
+#   The macro caches its result in the ax_cv_have_func_attribute_<attribute>
+#   variable.
+#
+#   The macro currently supports the following function attributes:
+#
+#    alias
+#    aligned
+#    alloc_size
+#    always_inline
+#    artificial
+#    cold
+#    const
+#    constructor
+#    constructor_priority for constructor attribute with priority
+#    deprecated
+#    destructor
+#    dllexport
+#    dllimport
+#    error
+#    externally_visible
+#    flatten
+#    format
+#    format_arg
+#    gnu_inline
+#    hot
+#    ifunc
+#    leaf
+#    malloc
+#    noclone
+#    noinline
+#    nonnull
+#    noreturn
+#    nothrow
+#    optimize
+#    pure
+#    unused
+#    used
+#    visibility
+#    warning
+#    warn_unused_result
+#    weak
+#    weakref
+#
+#   Unsuppored function attributes will be tested with a prototype returning
+#   an int and not accepting any arguments and the result of the check might
+#   be wrong or meaningless so use with care.
+#
+# LICENSE
+#
+#   Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.  This file is offered as-is, without any
+#   warranty.
+
+#serial 3
+
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
+    AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
+
+    AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([
+            m4_case([$1],
+                [alias], [
+                    int foo( void ) { return 0; }
+                    int bar( void ) __attribute__(($1("foo")));
+                ],
+                [aligned], [
+                    int foo( void ) __attribute__(($1(32)));
+                ],
+                [alloc_size], [
+                    void *foo(int a) __attribute__(($1(1)));
+                ],
+                [always_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [artificial], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [cold], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [const], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [constructor_priority], [
+                    int foo( void ) __attribute__((__constructor__(65535/2)));
+                ],
+                [constructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [deprecated], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [destructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [dllexport], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [dllimport], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [error], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [externally_visible], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [flatten], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [format], [
+                    int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
+                ],
+                [format_arg], [
+                    char *foo(const char *p) __attribute__(($1(1)));
+                ],
+                [gnu_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [hot], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [ifunc], [
+                    int my_foo( void ) { return 0; }
+                    static int (*resolve_foo(void))(void) { return my_foo; }
+                    int foo( void ) __attribute__(($1("resolve_foo")));
+                ],
+                [leaf], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [malloc], [
+                    void *foo( void ) __attribute__(($1));
+                ],
+                [noclone], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [noinline], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [nonnull], [
+                    int foo(char *p) __attribute__(($1(1)));
+                ],
+                [noreturn], [
+                    void foo( void ) __attribute__(($1));
+                ],
+                [nothrow], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [optimize], [
+                    __attribute__(($1(3))) int foo( void ) { return 0; }
+                ],
+                [pure], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [unused], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [used], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [visibility], [
+                    int foo_def( void ) __attribute__(($1("default")));
+                    int foo_hid( void ) __attribute__(($1("hidden")));
+                ],
+                [warning], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [warn_unused_result], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weak], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weakref], [
+                    static int foo( void ) { return 0; }
+                    static int bar( void ) __attribute__(($1("foo")));
+                ],
+                [
+                 m4_warn([syntax], [Unsupported attribute $1, the test may fail])
+                 int foo( void ) __attribute__(($1));
+                ]
+            )], [])
+            ],
+            dnl GCC doesn't exit with an error if an unknown attribute is
+            dnl provided but only outputs a warning, so accept the attribute
+            dnl only if no warning were issued.
+            [AS_IF([test -s conftest.err],
+                [AS_VAR_SET([ac_var], [no])],
+                [AS_VAR_SET([ac_var], [yes])])],
+            [AS_VAR_SET([ac_var], [no])])
+    ])
+
+    AS_IF([test yes = AS_VAR_GET([ac_var])],
+        [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
+            [Define to 1 if the system has the `$1' function attribute])], [])
+
+    AS_VAR_POPDEF([ac_var])
+])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ax_require_defined.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_require_defined.m4
new file mode 100644
index 0000000000..cae11112d9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ax_require_defined.m4
@@ -0,0 +1,37 @@
+# ===========================================================================
+#    http://www.gnu.org/software/autoconf-archive/ax_require_defined.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_REQUIRE_DEFINED(MACRO)
+#
+# DESCRIPTION
+#
+#   AX_REQUIRE_DEFINED is a simple helper for making sure other macros have
+#   been defined and thus are available for use.  This avoids random issues
+#   where a macro isn't expanded.  Instead the configure script emits a
+#   non-fatal:
+#
+#     ./configure: line 1673: AX_CFLAGS_WARN_ALL: command not found
+#
+#   It's like AC_REQUIRE except it doesn't expand the required macro.
+#
+#   Here's an example:
+#
+#     AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG])
+#
+# LICENSE
+#
+#   Copyright (c) 2014 Mike Frysinger <vapier@gentoo.org>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 1
+
+AC_DEFUN([AX_REQUIRE_DEFINED], [dnl
+  m4_ifndef([$1], [m4_fatal([macro ]$1[ is not defined; is a m4 file missing?])])
+])dnl AX_REQUIRE_DEFINED
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/libtool.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/libtool.m4
new file mode 100644
index 0000000000..ae1a3df9bb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/libtool.m4
@@ -0,0 +1,7982 @@
+# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
+#
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+m4_define([_LT_COPYING], [dnl
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+#   This file is part of GNU Libtool.
+#
+# GNU Libtool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
+# obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+])
+
+# serial 57 LT_INIT
+
+
+# LT_PREREQ(VERSION)
+# ------------------
+# Complain and exit if this libtool version is less that VERSION.
+m4_defun([LT_PREREQ],
+[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1,
+       [m4_default([$3],
+		   [m4_fatal([Libtool version $1 or higher is required],
+		             63)])],
+       [$2])])
+
+
+# _LT_CHECK_BUILDDIR
+# ------------------
+# Complain if the absolute build directory name contains unusual characters
+m4_defun([_LT_CHECK_BUILDDIR],
+[case `pwd` in
+  *\ * | *\	*)
+    AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;;
+esac
+])
+
+
+# LT_INIT([OPTIONS])
+# ------------------
+AC_DEFUN([LT_INIT],
+[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT
+AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+AC_BEFORE([$0], [LT_LANG])dnl
+AC_BEFORE([$0], [LT_OUTPUT])dnl
+AC_BEFORE([$0], [LTDL_INIT])dnl
+m4_require([_LT_CHECK_BUILDDIR])dnl
+
+dnl Autoconf doesn't catch unexpanded LT_ macros by default:
+m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl
+m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl
+dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4
+dnl unless we require an AC_DEFUNed macro:
+AC_REQUIRE([LTOPTIONS_VERSION])dnl
+AC_REQUIRE([LTSUGAR_VERSION])dnl
+AC_REQUIRE([LTVERSION_VERSION])dnl
+AC_REQUIRE([LTOBSOLETE_VERSION])dnl
+m4_require([_LT_PROG_LTMAIN])dnl
+
+_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}])
+
+dnl Parse OPTIONS
+_LT_SET_OPTIONS([$0], [$1])
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ltmain"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+AC_SUBST(LIBTOOL)dnl
+
+_LT_SETUP
+
+# Only expand once:
+m4_define([LT_INIT])
+])# LT_INIT
+
+# Old names:
+AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT])
+AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_PROG_LIBTOOL], [])
+dnl AC_DEFUN([AM_PROG_LIBTOOL], [])
+
+
+# _LT_CC_BASENAME(CC)
+# -------------------
+# Calculate cc_basename.  Skip known compiler wrappers and cross-prefix.
+m4_defun([_LT_CC_BASENAME],
+[for cc_temp in $1""; do
+  case $cc_temp in
+    compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
+    distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+])
+
+
+# _LT_FILEUTILS_DEFAULTS
+# ----------------------
+# It is okay to use these file commands and assume they have been set
+# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'.
+m4_defun([_LT_FILEUTILS_DEFAULTS],
+[: ${CP="cp -f"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+])# _LT_FILEUTILS_DEFAULTS
+
+
+# _LT_SETUP
+# ---------
+m4_defun([_LT_SETUP],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
+AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
+
+_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
+dnl
+_LT_DECL([], [host_alias], [0], [The host system])dnl
+_LT_DECL([], [host], [0])dnl
+_LT_DECL([], [host_os], [0])dnl
+dnl
+_LT_DECL([], [build_alias], [0], [The build system])dnl
+_LT_DECL([], [build], [0])dnl
+_LT_DECL([], [build_os], [0])dnl
+dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([LT_PATH_LD])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+dnl
+AC_REQUIRE([AC_PROG_LN_S])dnl
+test -z "$LN_S" && LN_S="ln -s"
+_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl
+dnl
+AC_REQUIRE([LT_CMD_MAX_LEN])dnl
+_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl
+_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl
+dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_CHECK_SHELL_FEATURES])dnl
+m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl
+m4_require([_LT_CMD_RELOAD])dnl
+m4_require([_LT_CHECK_MAGIC_METHOD])dnl
+m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl
+m4_require([_LT_CMD_OLD_ARCHIVE])dnl
+m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
+m4_require([_LT_WITH_SYSROOT])dnl
+
+_LT_CONFIG_LIBTOOL_INIT([
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes INIT.
+if test -n "\${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+])
+if test -n "${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+_LT_CHECK_OBJDIR
+
+m4_require([_LT_TAG_COMPILER])dnl
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Global variables:
+ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$LD" && LD=ld
+test -z "$ac_objext" && ac_objext=o
+
+_LT_CC_BASENAME([$compiler])
+
+# Only perform the check for file, if the check method requires it
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    _LT_PATH_MAGIC
+  fi
+  ;;
+esac
+
+# Use C for the default configuration in the libtool script
+LT_SUPPORTED_TAG([CC])
+_LT_LANG_C_CONFIG
+_LT_LANG_DEFAULT_CONFIG
+_LT_CONFIG_COMMANDS
+])# _LT_SETUP
+
+
+# _LT_PREPARE_SED_QUOTE_VARS
+# --------------------------
+# Define a few sed substitution that help us do robust quoting.
+m4_defun([_LT_PREPARE_SED_QUOTE_VARS],
+[# Backslashify metacharacters that are still active within
+# double-quoted strings.
+sed_quote_subst='s/\([["`$\\]]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([["`\\]]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to delay expansion of an escaped single quote.
+delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+])
+
+# _LT_PROG_LTMAIN
+# ---------------
+# Note that this code is called both from `configure', and `config.status'
+# now that we use AC_CONFIG_COMMANDS to generate libtool.  Notably,
+# `config.status' has no value for ac_aux_dir unless we are using Automake,
+# so we pass a copy along to make sure it has a sensible value anyway.
+m4_defun([_LT_PROG_LTMAIN],
+[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl
+_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir'])
+ltmain="$ac_aux_dir/ltmain.sh"
+])# _LT_PROG_LTMAIN
+
+
+## ------------------------------------- ##
+## Accumulate code for creating libtool. ##
+## ------------------------------------- ##
+
+# So that we can recreate a full libtool script including additional
+# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS
+# in macros and then make a single call at the end using the `libtool'
+# label.
+
+
+# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS])
+# ----------------------------------------
+# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later.
+m4_define([_LT_CONFIG_LIBTOOL_INIT],
+[m4_ifval([$1],
+          [m4_append([_LT_OUTPUT_LIBTOOL_INIT],
+                     [$1
+])])])
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_INIT])
+
+
+# _LT_CONFIG_LIBTOOL([COMMANDS])
+# ------------------------------
+# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later.
+m4_define([_LT_CONFIG_LIBTOOL],
+[m4_ifval([$1],
+          [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS],
+                     [$1
+])])])
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS])
+
+
+# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS])
+# -----------------------------------------------------
+m4_defun([_LT_CONFIG_SAVE_COMMANDS],
+[_LT_CONFIG_LIBTOOL([$1])
+_LT_CONFIG_LIBTOOL_INIT([$2])
+])
+
+
+# _LT_FORMAT_COMMENT([COMMENT])
+# -----------------------------
+# Add leading comment marks to the start of each line, and a trailing
+# full-stop to the whole comment if one is not present already.
+m4_define([_LT_FORMAT_COMMENT],
+[m4_ifval([$1], [
+m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])],
+              [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.])
+)])
+
+
+
+## ------------------------ ##
+## FIXME: Eliminate VARNAME ##
+## ------------------------ ##
+
+
+# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?])
+# -------------------------------------------------------------------
+# CONFIGNAME is the name given to the value in the libtool script.
+# VARNAME is the (base) name used in the configure script.
+# VALUE may be 0, 1 or 2 for a computed quote escaped value based on
+# VARNAME.  Any other value will be used directly.
+m4_define([_LT_DECL],
+[lt_if_append_uniq([lt_decl_varnames], [$2], [, ],
+    [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name],
+	[m4_ifval([$1], [$1], [$2])])
+    lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3])
+    m4_ifval([$4],
+	[lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])])
+    lt_dict_add_subkey([lt_decl_dict], [$2],
+	[tagged?], [m4_ifval([$5], [yes], [no])])])
+])
+
+
+# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION])
+# --------------------------------------------------------
+m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])])
+
+
+# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...])
+# ------------------------------------------------
+m4_define([lt_decl_tag_varnames],
+[_lt_decl_filter([tagged?], [yes], $@)])
+
+
+# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..])
+# ---------------------------------------------------------
+m4_define([_lt_decl_filter],
+[m4_case([$#],
+  [0], [m4_fatal([$0: too few arguments: $#])],
+  [1], [m4_fatal([$0: too few arguments: $#: $1])],
+  [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)],
+  [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)],
+  [lt_dict_filter([lt_decl_dict], $@)])[]dnl
+])
+
+
+# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...])
+# --------------------------------------------------
+m4_define([lt_decl_quote_varnames],
+[_lt_decl_filter([value], [1], $@)])
+
+
+# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...])
+# ---------------------------------------------------
+m4_define([lt_decl_dquote_varnames],
+[_lt_decl_filter([value], [2], $@)])
+
+
+# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...])
+# ---------------------------------------------------
+m4_define([lt_decl_varnames_tagged],
+[m4_assert([$# <= 2])dnl
+_$0(m4_quote(m4_default([$1], [[, ]])),
+    m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]),
+    m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))])
+m4_define([_lt_decl_varnames_tagged],
+[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])])
+
+
+# lt_decl_all_varnames([SEPARATOR], [VARNAME1...])
+# ------------------------------------------------
+m4_define([lt_decl_all_varnames],
+[_$0(m4_quote(m4_default([$1], [[, ]])),
+     m4_if([$2], [],
+	   m4_quote(lt_decl_varnames),
+	m4_quote(m4_shift($@))))[]dnl
+])
+m4_define([_lt_decl_all_varnames],
+[lt_join($@, lt_decl_varnames_tagged([$1],
+			lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl
+])
+
+
+# _LT_CONFIG_STATUS_DECLARE([VARNAME])
+# ------------------------------------
+# Quote a variable value, and forward it to `config.status' so that its
+# declaration there will have the same value as in `configure'.  VARNAME
+# must have a single quote delimited value for this to work.
+m4_define([_LT_CONFIG_STATUS_DECLARE],
+[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`'])
+
+
+# _LT_CONFIG_STATUS_DECLARATIONS
+# ------------------------------
+# We delimit libtool config variables with single quotes, so when
+# we write them to config.status, we have to be sure to quote all
+# embedded single quotes properly.  In configure, this macro expands
+# each variable declared with _LT_DECL (and _LT_TAGDECL) into:
+#
+#    <var>='`$ECHO "$<var>" | $SED "$delay_single_quote_subst"`'
+m4_defun([_LT_CONFIG_STATUS_DECLARATIONS],
+[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames),
+    [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])])
+
+
+# _LT_LIBTOOL_TAGS
+# ----------------
+# Output comment and list of tags supported by the script
+m4_defun([_LT_LIBTOOL_TAGS],
+[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl
+available_tags="_LT_TAGS"dnl
+])
+
+
+# _LT_LIBTOOL_DECLARE(VARNAME, [TAG])
+# -----------------------------------
+# Extract the dictionary values for VARNAME (optionally with TAG) and
+# expand to a commented shell variable setting:
+#
+#    # Some comment about what VAR is for.
+#    visible_name=$lt_internal_name
+m4_define([_LT_LIBTOOL_DECLARE],
+[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1],
+					   [description])))[]dnl
+m4_pushdef([_libtool_name],
+    m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl
+m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])),
+    [0], [_libtool_name=[$]$1],
+    [1], [_libtool_name=$lt_[]$1],
+    [2], [_libtool_name=$lt_[]$1],
+    [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl
+m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl
+])
+
+
+# _LT_LIBTOOL_CONFIG_VARS
+# -----------------------
+# Produce commented declarations of non-tagged libtool config variables
+# suitable for insertion in the LIBTOOL CONFIG section of the `libtool'
+# script.  Tagged libtool config variables (even for the LIBTOOL CONFIG
+# section) are produced by _LT_LIBTOOL_TAG_VARS.
+m4_defun([_LT_LIBTOOL_CONFIG_VARS],
+[m4_foreach([_lt_var],
+    m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)),
+    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])])
+
+
+# _LT_LIBTOOL_TAG_VARS(TAG)
+# -------------------------
+m4_define([_LT_LIBTOOL_TAG_VARS],
+[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames),
+    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])])
+
+
+# _LT_TAGVAR(VARNAME, [TAGNAME])
+# ------------------------------
+m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])])
+
+
+# _LT_CONFIG_COMMANDS
+# -------------------
+# Send accumulated output to $CONFIG_STATUS.  Thanks to the lists of
+# variables for single and double quote escaping we saved from calls
+# to _LT_DECL, we can put quote escaped variables declarations
+# into `config.status', and then the shell code to quote escape them in
+# for loops in `config.status'.  Finally, any additional code accumulated
+# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded.
+m4_defun([_LT_CONFIG_COMMANDS],
+[AC_PROVIDE_IFELSE([LT_OUTPUT],
+	dnl If the libtool generation code has been placed in $CONFIG_LT,
+	dnl instead of duplicating it all over again into config.status,
+	dnl then we will have config.status run $CONFIG_LT later, so it
+	dnl needs to know what name is stored there:
+        [AC_CONFIG_COMMANDS([libtool],
+            [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])],
+    dnl If the libtool generation code is destined for config.status,
+    dnl expand the accumulated commands and init code now:
+    [AC_CONFIG_COMMANDS([libtool],
+        [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])])
+])#_LT_CONFIG_COMMANDS
+
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT],
+[
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+sed_quote_subst='$sed_quote_subst'
+double_quote_subst='$double_quote_subst'
+delay_variable_subst='$delay_variable_subst'
+_LT_CONFIG_STATUS_DECLARATIONS
+LTCC='$LTCC'
+LTCFLAGS='$LTCFLAGS'
+compiler='$compiler_DEFAULT'
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$[]1
+_LTECHO_EOF'
+}
+
+# Quote evaled strings.
+for var in lt_decl_all_varnames([[ \
+]], lt_decl_quote_varnames); do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[[\\\\\\\`\\"\\\$]]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+# Double-quote double-evaled strings.
+for var in lt_decl_all_varnames([[ \
+]], lt_decl_dquote_varnames); do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[[\\\\\\\`\\"\\\$]]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+_LT_OUTPUT_LIBTOOL_INIT
+])
+
+# _LT_GENERATED_FILE_INIT(FILE, [COMMENT])
+# ------------------------------------
+# Generate a child script FILE with all initialization necessary to
+# reuse the environment learned by the parent script, and make the
+# file executable.  If COMMENT is supplied, it is inserted after the
+# `#!' sequence but before initialization text begins.  After this
+# macro, additional text can be appended to FILE to form the body of
+# the child script.  The macro ends with non-zero status if the
+# file could not be fully written (such as if the disk is full).
+m4_ifdef([AS_INIT_GENERATED],
+[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])],
+[m4_defun([_LT_GENERATED_FILE_INIT],
+[m4_require([AS_PREPARE])]dnl
+[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl
+[lt_write_fail=0
+cat >$1 <<_ASEOF || lt_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+$2
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$1 <<\_ASEOF || lt_write_fail=1
+AS_SHELL_SANITIZE
+_AS_PREPARE
+exec AS_MESSAGE_FD>&1
+_ASEOF
+test $lt_write_fail = 0 && chmod +x $1[]dnl
+m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT
+
+# LT_OUTPUT
+# ---------
+# This macro allows early generation of the libtool script (before
+# AC_OUTPUT is called), incase it is used in configure for compilation
+# tests.
+AC_DEFUN([LT_OUTPUT],
+[: ${CONFIG_LT=./config.lt}
+AC_MSG_NOTICE([creating $CONFIG_LT])
+_LT_GENERATED_FILE_INIT(["$CONFIG_LT"],
+[# Run this file to recreate a libtool stub with the current configuration.])
+
+cat >>"$CONFIG_LT" <<\_LTEOF
+lt_cl_silent=false
+exec AS_MESSAGE_LOG_FD>>config.log
+{
+  echo
+  AS_BOX([Running $as_me.])
+} >&AS_MESSAGE_LOG_FD
+
+lt_cl_help="\
+\`$as_me' creates a local libtool stub from the current configuration,
+for use in further configure time tests before the real libtool is
+generated.
+
+Usage: $[0] [[OPTIONS]]
+
+  -h, --help      print this help, then exit
+  -V, --version   print version number, then exit
+  -q, --quiet     do not print progress messages
+  -d, --debug     don't remove temporary files
+
+Report bugs to <bug-libtool@gnu.org>."
+
+lt_cl_version="\
+m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
+m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
+configured by $[0], generated by m4_PACKAGE_STRING.
+
+Copyright (C) 2011 Free Software Foundation, Inc.
+This config.lt script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+while test $[#] != 0
+do
+  case $[1] in
+    --version | --v* | -V )
+      echo "$lt_cl_version"; exit 0 ;;
+    --help | --h* | -h )
+      echo "$lt_cl_help"; exit 0 ;;
+    --debug | --d* | -d )
+      debug=: ;;
+    --quiet | --q* | --silent | --s* | -q )
+      lt_cl_silent=: ;;
+
+    -*) AC_MSG_ERROR([unrecognized option: $[1]
+Try \`$[0] --help' for more information.]) ;;
+
+    *) AC_MSG_ERROR([unrecognized argument: $[1]
+Try \`$[0] --help' for more information.]) ;;
+  esac
+  shift
+done
+
+if $lt_cl_silent; then
+  exec AS_MESSAGE_FD>/dev/null
+fi
+_LTEOF
+
+cat >>"$CONFIG_LT" <<_LTEOF
+_LT_OUTPUT_LIBTOOL_COMMANDS_INIT
+_LTEOF
+
+cat >>"$CONFIG_LT" <<\_LTEOF
+AC_MSG_NOTICE([creating $ofile])
+_LT_OUTPUT_LIBTOOL_COMMANDS
+AS_EXIT(0)
+_LTEOF
+chmod +x "$CONFIG_LT"
+
+# configure is writing to config.log, but config.lt does its own redirection,
+# appending to config.log, which fails on DOS, as config.log is still kept
+# open by configure.  Here we exec the FD to /dev/null, effectively closing
+# config.log, so it can be properly (re)opened and appended to by config.lt.
+lt_cl_success=:
+test "$silent" = yes &&
+  lt_config_lt_args="$lt_config_lt_args --quiet"
+exec AS_MESSAGE_LOG_FD>/dev/null
+$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false
+exec AS_MESSAGE_LOG_FD>>config.log
+$lt_cl_success || AS_EXIT(1)
+])# LT_OUTPUT
+
+
+# _LT_CONFIG(TAG)
+# ---------------
+# If TAG is the built-in tag, create an initial libtool script with a
+# default configuration from the untagged config vars.  Otherwise add code
+# to config.status for appending the configuration named by TAG from the
+# matching tagged config vars.
+m4_defun([_LT_CONFIG],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+_LT_CONFIG_SAVE_COMMANDS([
+  m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl
+  m4_if(_LT_TAG, [C], [
+    # See if we are running on zsh, and set the options which allow our
+    # commands through without removal of \ escapes.
+    if test -n "${ZSH_VERSION+set}" ; then
+      setopt NO_GLOB_SUBST
+    fi
+
+    cfgfile="${ofile}T"
+    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
+    $RM "$cfgfile"
+
+    cat <<_LT_EOF >> "$cfgfile"
+#! $SHELL
+
+# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+_LT_COPYING
+_LT_LIBTOOL_TAGS
+
+# ### BEGIN LIBTOOL CONFIG
+_LT_LIBTOOL_CONFIG_VARS
+_LT_LIBTOOL_TAG_VARS
+# ### END LIBTOOL CONFIG
+
+_LT_EOF
+
+  case $host_os in
+  aix3*)
+    cat <<\_LT_EOF >> "$cfgfile"
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+_LT_EOF
+    ;;
+  esac
+
+  _LT_PROG_LTMAIN
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" \
+     || (rm -f "$cfgfile"; exit 1)
+
+  _LT_PROG_REPLACE_SHELLFNS
+
+   mv -f "$cfgfile" "$ofile" ||
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+],
+[cat <<_LT_EOF >> "$ofile"
+
+dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded
+dnl in a comment (ie after a #).
+# ### BEGIN LIBTOOL TAG CONFIG: $1
+_LT_LIBTOOL_TAG_VARS(_LT_TAG)
+# ### END LIBTOOL TAG CONFIG: $1
+_LT_EOF
+])dnl /m4_if
+],
+[m4_if([$1], [], [
+    PACKAGE='$PACKAGE'
+    VERSION='$VERSION'
+    TIMESTAMP='$TIMESTAMP'
+    RM='$RM'
+    ofile='$ofile'], [])
+])dnl /_LT_CONFIG_SAVE_COMMANDS
+])# _LT_CONFIG
+
+
+# LT_SUPPORTED_TAG(TAG)
+# ---------------------
+# Trace this macro to discover what tags are supported by the libtool
+# --tag option, using:
+#    autoconf --trace 'LT_SUPPORTED_TAG:$1'
+AC_DEFUN([LT_SUPPORTED_TAG], [])
+
+
+# C support is built-in for now
+m4_define([_LT_LANG_C_enabled], [])
+m4_define([_LT_TAGS], [])
+
+
+# LT_LANG(LANG)
+# -------------
+# Enable libtool support for the given language if not already enabled.
+AC_DEFUN([LT_LANG],
+[AC_BEFORE([$0], [LT_OUTPUT])dnl
+m4_case([$1],
+  [C],			[_LT_LANG(C)],
+  [C++],		[_LT_LANG(CXX)],
+  [Go],			[_LT_LANG(GO)],
+  [Java],		[_LT_LANG(GCJ)],
+  [Fortran 77],		[_LT_LANG(F77)],
+  [Fortran],		[_LT_LANG(FC)],
+  [Windows Resource],	[_LT_LANG(RC)],
+  [m4_ifdef([_LT_LANG_]$1[_CONFIG],
+    [_LT_LANG($1)],
+    [m4_fatal([$0: unsupported language: "$1"])])])dnl
+])# LT_LANG
+
+
+# _LT_LANG(LANGNAME)
+# ------------------
+m4_defun([_LT_LANG],
+[m4_ifdef([_LT_LANG_]$1[_enabled], [],
+  [LT_SUPPORTED_TAG([$1])dnl
+  m4_append([_LT_TAGS], [$1 ])dnl
+  m4_define([_LT_LANG_]$1[_enabled], [])dnl
+  _LT_LANG_$1_CONFIG($1)])dnl
+])# _LT_LANG
+
+
+m4_ifndef([AC_PROG_GO], [
+############################################################
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_GO.  When it is available in    #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+############################################################
+m4_defun([AC_PROG_GO],
+[AC_LANG_PUSH(Go)dnl
+AC_ARG_VAR([GOC],     [Go compiler command])dnl
+AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
+_AC_ARG_VAR_LDFLAGS()dnl
+AC_CHECK_TOOL(GOC, gccgo)
+if test -z "$GOC"; then
+  if test -n "$ac_tool_prefix"; then
+    AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
+  fi
+fi
+if test -z "$GOC"; then
+  AC_CHECK_PROG(GOC, gccgo, gccgo, false)
+fi
+])#m4_defun
+])#m4_ifndef
+
+
+# _LT_LANG_DEFAULT_CONFIG
+# -----------------------
+m4_defun([_LT_LANG_DEFAULT_CONFIG],
+[AC_PROVIDE_IFELSE([AC_PROG_CXX],
+  [LT_LANG(CXX)],
+  [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])])
+
+AC_PROVIDE_IFELSE([AC_PROG_F77],
+  [LT_LANG(F77)],
+  [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])])
+
+AC_PROVIDE_IFELSE([AC_PROG_FC],
+  [LT_LANG(FC)],
+  [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])])
+
+dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal
+dnl pulling things in needlessly.
+AC_PROVIDE_IFELSE([AC_PROG_GCJ],
+  [LT_LANG(GCJ)],
+  [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
+    [LT_LANG(GCJ)],
+    [AC_PROVIDE_IFELSE([LT_PROG_GCJ],
+      [LT_LANG(GCJ)],
+      [m4_ifdef([AC_PROG_GCJ],
+	[m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])])
+       m4_ifdef([A][M_PROG_GCJ],
+	[m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])])
+       m4_ifdef([LT_PROG_GCJ],
+	[m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
+
+AC_PROVIDE_IFELSE([AC_PROG_GO],
+  [LT_LANG(GO)],
+  [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
+
+AC_PROVIDE_IFELSE([LT_PROG_RC],
+  [LT_LANG(RC)],
+  [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
+])# _LT_LANG_DEFAULT_CONFIG
+
+# Obsolete macros:
+AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)])
+AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)])
+AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)])
+AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)])
+AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_CXX], [])
+dnl AC_DEFUN([AC_LIBTOOL_F77], [])
+dnl AC_DEFUN([AC_LIBTOOL_FC], [])
+dnl AC_DEFUN([AC_LIBTOOL_GCJ], [])
+dnl AC_DEFUN([AC_LIBTOOL_RC], [])
+
+
+# _LT_TAG_COMPILER
+# ----------------
+m4_defun([_LT_TAG_COMPILER],
+[AC_REQUIRE([AC_PROG_CC])dnl
+
+_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl
+_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl
+_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl
+_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+])# _LT_TAG_COMPILER
+
+
+# _LT_COMPILER_BOILERPLATE
+# ------------------------
+# Check for compiler boilerplate output or warnings with
+# the simple compiler test code.
+m4_defun([_LT_COMPILER_BOILERPLATE],
+[m4_require([_LT_DECL_SED])dnl
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+])# _LT_COMPILER_BOILERPLATE
+
+
+# _LT_LINKER_BOILERPLATE
+# ----------------------
+# Check for linker boilerplate output or warnings with
+# the simple link test code.
+m4_defun([_LT_LINKER_BOILERPLATE],
+[m4_require([_LT_DECL_SED])dnl
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+])# _LT_LINKER_BOILERPLATE
+
+# _LT_REQUIRED_DARWIN_CHECKS
+# -------------------------
+m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
+  case $host_os in
+    rhapsody* | darwin*)
+    AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:])
+    AC_CHECK_TOOL([NMEDIT], [nmedit], [:])
+    AC_CHECK_TOOL([LIPO], [lipo], [:])
+    AC_CHECK_TOOL([OTOOL], [otool], [:])
+    AC_CHECK_TOOL([OTOOL64], [otool64], [:])
+    _LT_DECL([], [DSYMUTIL], [1],
+      [Tool to manipulate archived DWARF debug symbol files on Mac OS X])
+    _LT_DECL([], [NMEDIT], [1],
+      [Tool to change global to local symbols on Mac OS X])
+    _LT_DECL([], [LIPO], [1],
+      [Tool to manipulate fat objects and archives on Mac OS X])
+    _LT_DECL([], [OTOOL], [1],
+      [ldd/readelf like tool for Mach-O binaries on Mac OS X])
+    _LT_DECL([], [OTOOL64], [1],
+      [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4])
+
+    AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod],
+      [lt_cv_apple_cc_single_mod=no
+      if test -z "${LT_MULTI_MODULE}"; then
+	# By default we will add the -single_module flag. You can override
+	# by either setting the environment variable LT_MULTI_MODULE
+	# non-empty at configure time, or by adding -multi_module to the
+	# link flags.
+	rm -rf libconftest.dylib*
+	echo "int foo(void){return 1;}" > conftest.c
+	echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD
+	$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+	  -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
+        _lt_result=$?
+	# If there is a non-empty error log, and "single_module"
+	# appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+	  cat conftest.err >&AS_MESSAGE_LOG_FD
+	# Otherwise, if the output was created with a 0 exit code from
+	# the compiler, it worked.
+	elif test -f libconftest.dylib && test $_lt_result -eq 0; then
+	  lt_cv_apple_cc_single_mod=yes
+	else
+	  cat conftest.err >&AS_MESSAGE_LOG_FD
+	fi
+	rm -rf libconftest.dylib*
+	rm -f conftest.*
+      fi])
+
+    AC_CACHE_CHECK([for -exported_symbols_list linker flag],
+      [lt_cv_ld_exported_symbols_list],
+      [lt_cv_ld_exported_symbols_list=no
+      save_LDFLAGS=$LDFLAGS
+      echo "_main" > conftest.sym
+      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
+      AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
+	[lt_cv_ld_exported_symbols_list=yes],
+	[lt_cv_ld_exported_symbols_list=no])
+	LDFLAGS="$save_LDFLAGS"
+    ])
+
+    AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
+      [lt_cv_ld_force_load=no
+      cat > conftest.c << _LT_EOF
+int forced_loaded() { return 2;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD
+      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD
+      echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD
+      $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD
+      echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD
+      $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD
+      cat > conftest.c << _LT_EOF
+int main() { return 0;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
+      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
+      _lt_result=$?
+      if test -s conftest.err && $GREP force_load conftest.err; then
+	cat conftest.err >&AS_MESSAGE_LOG_FD
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
+	lt_cv_ld_force_load=yes
+      else
+	cat conftest.err >&AS_MESSAGE_LOG_FD
+      fi
+        rm -f conftest.err libconftest.a conftest conftest.c
+        rm -rf conftest.dSYM
+    ])
+    case $host_os in
+    rhapsody* | darwin1.[[012]])
+      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
+    darwin1.*)
+      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+    darwin*) # darwin 5.x on
+      # if running on 10.5 or later, the deployment target defaults
+      # to the OS version, if on x86, and 10.4, the deployment
+      # target defaults to 10.4. Don't you love it?
+      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
+	10.0,*86*-darwin8*|10.0,*-darwin[[91]]*)
+	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+	10.[[012]]*)
+	  _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+	10.*)
+	  _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+      esac
+    ;;
+  esac
+    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
+      _lt_dar_single_mod='$single_module'
+    fi
+    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
+      _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
+    else
+      _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    fi
+    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
+      _lt_dsymutil='~$DSYMUTIL $lib || :'
+    else
+      _lt_dsymutil=
+    fi
+    ;;
+  esac
+])
+
+
+# _LT_DARWIN_LINKER_FEATURES([TAG])
+# ---------------------------------
+# Checks for linker and compiler features on darwin
+m4_defun([_LT_DARWIN_LINKER_FEATURES],
+[
+  m4_require([_LT_REQUIRED_DARWIN_CHECKS])
+  _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+  _LT_TAGVAR(hardcode_direct, $1)=no
+  _LT_TAGVAR(hardcode_automatic, $1)=yes
+  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+    m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
+                  [FC],  [_LT_TAGVAR(compiler_needs_object, $1)=yes])
+  else
+    _LT_TAGVAR(whole_archive_flag_spec, $1)=''
+  fi
+  _LT_TAGVAR(link_all_deplibs, $1)=yes
+  _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+    m4_if([$1], [CXX],
+[   if test "$lt_cv_apple_cc_single_mod" != "yes"; then
+      _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
+      _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
+    fi
+],[])
+  else
+  _LT_TAGVAR(ld_shlibs, $1)=no
+  fi
+])
+
+# _LT_SYS_MODULE_PATH_AIX([TAGNAME])
+# ----------------------------------
+# Links a minimal program and checks the executable
+# for the system default hardcoded library path. In most cases,
+# this is /usr/lib:/lib, but when the MPI compilers are used
+# the location of the communication and MPI libs are included too.
+# If we don't find anything, use the default library path according
+# to the aix ld manual.
+# Store the results from the different compilers for each TAGNAME.
+# Allow to override them for all tags through lt_cv_aix_libpath.
+m4_defun([_LT_SYS_MODULE_PATH_AIX],
+[m4_require([_LT_DECL_SED])dnl
+if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])],
+  [AC_LINK_IFELSE([AC_LANG_PROGRAM],[
+  lt_aix_libpath_sed='[
+      /Import File Strings/,/^$/ {
+	  /^0/ {
+	      s/^0  *\([^ ]*\) *$/\1/
+	      p
+	  }
+      }]'
+  _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
+    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi],[])
+  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
+    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib"
+  fi
+  ])
+  aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])
+fi
+])# _LT_SYS_MODULE_PATH_AIX
+
+
+# _LT_SHELL_INIT(ARG)
+# -------------------
+m4_define([_LT_SHELL_INIT],
+[m4_divert_text([M4SH-INIT], [$1
+])])# _LT_SHELL_INIT
+
+
+
+# _LT_PROG_ECHO_BACKSLASH
+# -----------------------
+# Find how we can fake an echo command that does not interpret backslash.
+# In particular, with Autoconf 2.60 or later we add some code to the start
+# of the generated configure script which will find a shell with a builtin
+# printf (which we can use as an echo command).
+m4_defun([_LT_PROG_ECHO_BACKSLASH],
+[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+
+AC_MSG_CHECKING([how to print strings])
+# Test print first, because it will be a builtin if present.
+if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
+   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='print -r --'
+elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='printf %s\n'
+else
+  # Use this function as a fallback that always works.
+  func_fallback_echo ()
+  {
+    eval 'cat <<_LTECHO_EOF
+$[]1
+_LTECHO_EOF'
+  }
+  ECHO='func_fallback_echo'
+fi
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO "$*" 
+}
+
+case "$ECHO" in
+  printf*) AC_MSG_RESULT([printf]) ;;
+  print*) AC_MSG_RESULT([print -r]) ;;
+  *) AC_MSG_RESULT([cat]) ;;
+esac
+
+m4_ifdef([_AS_DETECT_SUGGESTED],
+[_AS_DETECT_SUGGESTED([
+  test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || (
+    ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+    PATH=/empty FPATH=/empty; export PATH FPATH
+    test "X`printf %s $ECHO`" = "X$ECHO" \
+      || test "X`print -r -- $ECHO`" = "X$ECHO" )])])
+
+_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts])
+_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes])
+])# _LT_PROG_ECHO_BACKSLASH
+
+
+# _LT_WITH_SYSROOT
+# ----------------
+AC_DEFUN([_LT_WITH_SYSROOT],
+[AC_MSG_CHECKING([for sysroot])
+AC_ARG_WITH([sysroot],
+[  --with-sysroot[=DIR] Search for dependent libraries within DIR
+                        (or the compiler's sysroot if not specified).],
+[], [with_sysroot=no])
+
+dnl lt_sysroot will always be passed unquoted.  We quote it here
+dnl in case the user passed a directory name.
+lt_sysroot=
+case ${with_sysroot} in #(
+ yes)
+   if test "$GCC" = yes; then
+     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
+   fi
+   ;; #(
+ /*)
+   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   ;; #(
+ no|'')
+   ;; #(
+ *)
+   AC_MSG_RESULT([${with_sysroot}])
+   AC_MSG_ERROR([The sysroot must be an absolute path.])
+   ;;
+esac
+
+ AC_MSG_RESULT([${lt_sysroot:-no}])
+_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl
+[dependent libraries, and in which our libraries should be installed.])])
+
+# _LT_ENABLE_LOCK
+# ---------------
+m4_defun([_LT_ENABLE_LOCK],
+[AC_ARG_ENABLE([libtool-lock],
+  [AS_HELP_STRING([--disable-libtool-lock],
+    [avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.$ac_objext` in
+      *ELF-32*)
+	HPUX_IA64_MODE="32"
+	;;
+      *ELF-64*)
+	HPUX_IA64_MODE="64"
+	;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    if test "$lt_cv_prog_gnu_ld" = yes; then
+      case `/usr/bin/file conftest.$ac_objext` in
+	*32-bit*)
+	  LD="${LD-ld} -melf32bsmip"
+	  ;;
+	*N32*)
+	  LD="${LD-ld} -melf32bmipn32"
+	  ;;
+	*64-bit*)
+	  LD="${LD-ld} -melf64bmip"
+	;;
+      esac
+    else
+      case `/usr/bin/file conftest.$ac_objext` in
+	*32-bit*)
+	  LD="${LD-ld} -32"
+	  ;;
+	*N32*)
+	  LD="${LD-ld} -n32"
+	  ;;
+	*64-bit*)
+	  LD="${LD-ld} -64"
+	  ;;
+      esac
+    fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+      *32-bit*)
+	case $host in
+	  x86_64-*kfreebsd*-gnu)
+	    LD="${LD-ld} -m elf_i386_fbsd"
+	    ;;
+	  x86_64-*linux*)
+	    LD="${LD-ld} -m elf_i386"
+	    ;;
+	  ppc64-*linux*|powerpc64-*linux*)
+	    LD="${LD-ld} -m elf32ppclinux"
+	    ;;
+	  s390x-*linux*)
+	    LD="${LD-ld} -m elf_s390"
+	    ;;
+	  sparc64-*linux*)
+	    LD="${LD-ld} -m elf32_sparc"
+	    ;;
+	esac
+	;;
+      *64-bit*)
+	case $host in
+	  x86_64-*kfreebsd*-gnu)
+	    LD="${LD-ld} -m elf_x86_64_fbsd"
+	    ;;
+	  x86_64-*linux*)
+	    LD="${LD-ld} -m elf_x86_64"
+	    ;;
+	  ppc*-*linux*|powerpc*-*linux*)
+	    LD="${LD-ld} -m elf64ppc"
+	    ;;
+	  s390*-*linux*|s390*-*tpf*)
+	    LD="${LD-ld} -m elf64_s390"
+	    ;;
+	  sparc*-*linux*)
+	    LD="${LD-ld} -m elf64_sparc"
+	    ;;
+	esac
+	;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+    [AC_LANG_PUSH(C)
+     AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+     AC_LANG_POP])
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
+      *)
+	if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
+	  LD="${LD-ld} -64"
+	fi
+	;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+esac
+
+need_locks="$enable_libtool_lock"
+])# _LT_ENABLE_LOCK
+
+
+# _LT_PROG_AR
+# -----------
+m4_defun([_LT_PROG_AR],
+[AC_CHECK_TOOLS(AR, [ar], false)
+: ${AR=ar}
+: ${AR_FLAGS=cru}
+_LT_DECL([], [AR], [1], [The archiver])
+_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive])
+
+AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file],
+  [lt_cv_ar_at_file=no
+   AC_COMPILE_IFELSE([AC_LANG_PROGRAM],
+     [echo conftest.$ac_objext > conftest.lst
+      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD'
+      AC_TRY_EVAL([lt_ar_try])
+      if test "$ac_status" -eq 0; then
+	# Ensure the archiver fails upon bogus file names.
+	rm -f conftest.$ac_objext libconftest.a
+	AC_TRY_EVAL([lt_ar_try])
+	if test "$ac_status" -ne 0; then
+          lt_cv_ar_at_file=@
+        fi
+      fi
+      rm -f conftest.* libconftest.a
+     ])
+  ])
+
+if test "x$lt_cv_ar_at_file" = xno; then
+  archiver_list_spec=
+else
+  archiver_list_spec=$lt_cv_ar_at_file
+fi
+_LT_DECL([], [archiver_list_spec], [1],
+  [How to feed a file listing to the archiver])
+])# _LT_PROG_AR
+
+
+# _LT_CMD_OLD_ARCHIVE
+# -------------------
+m4_defun([_LT_CMD_OLD_ARCHIVE],
+[_LT_PROG_AR
+
+AC_CHECK_TOOL(STRIP, strip, :)
+test -z "$STRIP" && STRIP=:
+_LT_DECL([], [STRIP], [1], [A symbol stripping program])
+
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+test -z "$RANLIB" && RANLIB=:
+_LT_DECL([], [RANLIB], [1],
+    [Commands used to install an old-style archive])
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
+fi
+
+case $host_os in
+  darwin*)
+    lock_old_archive_extraction=yes ;;
+  *)
+    lock_old_archive_extraction=no ;;
+esac
+_LT_DECL([], [old_postinstall_cmds], [2])
+_LT_DECL([], [old_postuninstall_cmds], [2])
+_LT_TAGDECL([], [old_archive_cmds], [2],
+    [Commands used to build an old-style archive])
+_LT_DECL([], [lock_old_archive_extraction], [0],
+    [Whether to use a lock for old archive extraction])
+])# _LT_CMD_OLD_ARCHIVE
+
+
+# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#		[OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([_LT_COMPILER_OPTION],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_SED])dnl
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$3"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       $2=yes
+     fi
+   fi
+   $RM conftest*
+])
+
+if test x"[$]$2" = xyes; then
+    m4_if([$5], , :, [$5])
+else
+    m4_if([$6], , :, [$6])
+fi
+])# _LT_COMPILER_OPTION
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], [])
+
+
+# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#                  [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------
+# Check whether the given linker option works
+AC_DEFUN([_LT_LINKER_OPTION],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_SED])dnl
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $3"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&AS_MESSAGE_LOG_FD
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         $2=yes
+       fi
+     else
+       $2=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+])
+
+if test x"[$]$2" = xyes; then
+    m4_if([$4], , :, [$4])
+else
+    m4_if([$5], , :, [$5])
+fi
+])# _LT_LINKER_OPTION
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], [])
+
+
+# LT_CMD_MAX_LEN
+#---------------
+AC_DEFUN([LT_CMD_MAX_LEN],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+# find the maximum length of command line arguments
+AC_MSG_CHECKING([the maximum length of command line arguments])
+AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
+  i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw* | cegcc*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  mint*)
+    # On MiNT this can take a long time and run out of memory.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536	# usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[	 ]]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      # Make teststring a little bigger before we do anything with it.
+      # a 1K string should be a reasonable start.
+      for i in 1 2 3 4 5 6 7 8 ; do
+        teststring=$teststring$teststring
+      done
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      # If test is not a shell built-in, we'll probably end up computing a
+      # maximum length that is only half of the actual maximum length, but
+      # we can't tell.
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
+	         = "X$teststring$teststring"; } >/dev/null 2>&1 &&
+	      test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      # Only check the string length outside the loop.
+      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on
+      # massive amounts of additional arguments before passing them to the
+      # linker.  It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
+    ;;
+  esac
+])
+if test -n $lt_cv_sys_max_cmd_len ; then
+  AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
+else
+  AC_MSG_RESULT(none)
+fi
+max_cmd_len=$lt_cv_sys_max_cmd_len
+_LT_DECL([], [max_cmd_len], [0],
+    [What is the maximum length of a command?])
+])# LT_CMD_MAX_LEN
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], [])
+
+
+# _LT_HEADER_DLFCN
+# ----------------
+m4_defun([_LT_HEADER_DLFCN],
+[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl
+])# _LT_HEADER_DLFCN
+
+
+# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
+#                      ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
+# ----------------------------------------------------------------
+m4_defun([_LT_TRY_DLOPEN_SELF],
+[m4_require([_LT_HEADER_DLFCN])dnl
+if test "$cross_compiling" = yes; then :
+  [$4]
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+[#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+	  if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+	}
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}]
+_LT_EOF
+  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) $1 ;;
+      x$lt_dlneed_uscore) $2 ;;
+      x$lt_dlunknown|x*) $3 ;;
+    esac
+  else :
+    # compilation failed
+    $3
+  fi
+fi
+rm -fr conftest*
+])# _LT_TRY_DLOPEN_SELF
+
+
+# LT_SYS_DLOPEN_SELF
+# ------------------
+AC_DEFUN([LT_SYS_DLOPEN_SELF],
+[m4_require([_LT_HEADER_DLFCN])dnl
+if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ])
+    ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+	  [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+	    [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"],
+	[AC_CHECK_FUNC([dlopen],
+	      [lt_cv_dlopen="dlopen"],
+	  [AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+	    [AC_CHECK_LIB([svld], [dlopen],
+		  [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+	      [AC_CHECK_LIB([dld], [dld_link],
+		    [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"])
+	      ])
+	    ])
+	  ])
+	])
+      ])
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    AC_CACHE_CHECK([whether a program can dlopen itself],
+	  lt_cv_dlopen_self, [dnl
+	  _LT_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
+	    lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
+    ])
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
+	  lt_cv_dlopen_self_static, [dnl
+	  _LT_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
+	    lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
+      ])
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+_LT_DECL([dlopen_support], [enable_dlopen], [0],
+	 [Whether dlopen is supported])
+_LT_DECL([dlopen_self], [enable_dlopen_self], [0],
+	 [Whether dlopen of programs is supported])
+_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0],
+	 [Whether dlopen of statically linked programs is supported])
+])# LT_SYS_DLOPEN_SELF
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [])
+
+
+# _LT_COMPILER_C_O([TAGNAME])
+# ---------------------------
+# Check to see if options -c and -o are simultaneously supported by compiler.
+# This macro does not hard code the compiler like AC_PROG_CC_C_O.
+m4_defun([_LT_COMPILER_C_O],
+[m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
+  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
+  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+     fi
+   fi
+   chmod u+w . 2>&AS_MESSAGE_LOG_FD
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+])
+_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1],
+	[Does compiler simultaneously support -c and -o options?])
+])# _LT_COMPILER_C_O
+
+
+# _LT_COMPILER_FILE_LOCKS([TAGNAME])
+# ----------------------------------
+# Check to see if we can do hard links to lock some files if needed
+m4_defun([_LT_COMPILER_FILE_LOCKS],
+[m4_require([_LT_ENABLE_LOCK])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+_LT_COMPILER_C_O([$1])
+
+hard_links="nottested"
+if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  AC_MSG_CHECKING([if we can lock with hard links])
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  AC_MSG_RESULT([$hard_links])
+  if test "$hard_links" = no; then
+    AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?])
+])# _LT_COMPILER_FILE_LOCKS
+
+
+# _LT_CHECK_OBJDIR
+# ----------------
+m4_defun([_LT_CHECK_OBJDIR],
+[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
+[rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null])
+objdir=$lt_cv_objdir
+_LT_DECL([], [objdir], [0],
+         [The name of the directory that contains temporary libtool files])dnl
+m4_pattern_allow([LT_OBJDIR])dnl
+AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/",
+  [Define to the sub-directory in which libtool stores uninstalled libraries.])
+])# _LT_CHECK_OBJDIR
+
+
+# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME])
+# --------------------------------------
+# Check hardcoding attributes.
+m4_defun([_LT_LINKER_HARDCODE_LIBPATH],
+[AC_MSG_CHECKING([how to hardcode library paths into programs])
+_LT_TAGVAR(hardcode_action, $1)=
+if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" ||
+   test -n "$_LT_TAGVAR(runpath_var, $1)" ||
+   test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$_LT_TAGVAR(hardcode_direct, $1)" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
+     test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then
+    # Linking always hardcodes the temporary library directory.
+    _LT_TAGVAR(hardcode_action, $1)=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    _LT_TAGVAR(hardcode_action, $1)=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  _LT_TAGVAR(hardcode_action, $1)=unsupported
+fi
+AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)])
+
+if test "$_LT_TAGVAR(hardcode_action, $1)" = relink ||
+   test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+_LT_TAGDECL([], [hardcode_action], [0],
+    [How to hardcode a shared library path into an executable])
+])# _LT_LINKER_HARDCODE_LIBPATH
+
+
+# _LT_CMD_STRIPLIB
+# ----------------
+m4_defun([_LT_CMD_STRIPLIB],
+[m4_require([_LT_DECL_EGREP])
+striplib=
+old_striplib=
+AC_MSG_CHECKING([whether stripping libraries is possible])
+if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  AC_MSG_RESULT([yes])
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+  darwin*)
+    if test -n "$STRIP" ; then
+      striplib="$STRIP -x"
+      old_striplib="$STRIP -S"
+      AC_MSG_RESULT([yes])
+    else
+      AC_MSG_RESULT([no])
+    fi
+    ;;
+  *)
+    AC_MSG_RESULT([no])
+    ;;
+  esac
+fi
+_LT_DECL([], [old_striplib], [1], [Commands to strip libraries])
+_LT_DECL([], [striplib], [1])
+])# _LT_CMD_STRIPLIB
+
+
+# _LT_SYS_DYNAMIC_LINKER([TAG])
+# -----------------------------
+# PORTME Fill in your ld.so characteristics
+m4_defun([_LT_SYS_DYNAMIC_LINKER],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_OBJDUMP])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_CHECK_SHELL_FEATURES])dnl
+AC_MSG_CHECKING([dynamic linker characteristics])
+m4_if([$1],
+	[], [
+if test "$GCC" = yes; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  case $host_os in
+    mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;;
+    *) lt_sed_strip_eq="s,=/,/,g" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
+  case $lt_search_path_spec in
+  *\;*)
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
+    ;;
+  *)
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
+    ;;
+  esac
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+	lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[[lt_foo]]++; }
+  if (lt_freq[[lt_foo]] == 1) { print lt_foo; }
+}'`
+  # AWK program above erroneously prepends '/' to C:/dos/paths
+  # for these hosts.
+  case $host_os in
+    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
+      $SED 's,/\([[A-Za-z]]:\),\1,g'` ;;
+  esac
+  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi])
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[[4-9]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[[01]] | aix4.[[01]].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[[45]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+m4_if([$1], [],[
+      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"])
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+m4_if([$1], [],[
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"])
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[[23]].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[[01]]* | freebsdelf3.[[01]]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
+  freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[[3-9]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux # correct to gnu/linux during the next big refactor
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath],
+    [lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \
+	 LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\""
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
+      [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null],
+	 [lt_cv_shlibpath_overrides_runpath=yes])])
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+    ])
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[	 ]*hwcap[	 ]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*)	need_version=yes ;;
+    *)				need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[[89]] | openbsd2.[[89]].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+AC_MSG_RESULT([$dynamic_linker])
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+_LT_DECL([], [variables_saved_for_relink], [1],
+    [Variables whose values should be saved in libtool wrapper scripts and
+    restored at link time])
+_LT_DECL([], [need_lib_prefix], [0],
+    [Do we need the "lib" prefix for modules?])
+_LT_DECL([], [need_version], [0], [Do we need a version for libraries?])
+_LT_DECL([], [version_type], [0], [Library versioning type])
+_LT_DECL([], [runpath_var], [0],  [Shared library runtime path variable])
+_LT_DECL([], [shlibpath_var], [0],[Shared library path variable])
+_LT_DECL([], [shlibpath_overrides_runpath], [0],
+    [Is shlibpath searched before the hard-coded library search path?])
+_LT_DECL([], [libname_spec], [1], [Format of library name prefix])
+_LT_DECL([], [library_names_spec], [1],
+    [[List of archive names.  First name is the real one, the rest are links.
+    The last name is the one that the linker finds with -lNAME]])
+_LT_DECL([], [soname_spec], [1],
+    [[The coded name of the library, if different from the real name]])
+_LT_DECL([], [install_override_mode], [1],
+    [Permission mode override for installation of shared libraries])
+_LT_DECL([], [postinstall_cmds], [2],
+    [Command to use after installation of a shared archive])
+_LT_DECL([], [postuninstall_cmds], [2],
+    [Command to use after uninstallation of a shared archive])
+_LT_DECL([], [finish_cmds], [2],
+    [Commands used to finish a libtool library installation in a directory])
+_LT_DECL([], [finish_eval], [1],
+    [[As "finish_cmds", except a single script fragment to be evaled but
+    not shown]])
+_LT_DECL([], [hardcode_into_libs], [0],
+    [Whether we should hardcode library paths into libraries])
+_LT_DECL([], [sys_lib_search_path_spec], [2],
+    [Compile-time system search path for libraries])
+_LT_DECL([], [sys_lib_dlsearch_path_spec], [2],
+    [Run-time system search path for libraries])
+])# _LT_SYS_DYNAMIC_LINKER
+
+
+# _LT_PATH_TOOL_PREFIX(TOOL)
+# --------------------------
+# find a file program which can recognize shared library
+AC_DEFUN([_LT_PATH_TOOL_PREFIX],
+[m4_require([_LT_DECL_EGREP])dnl
+AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
+[case $MAGIC_CMD in
+[[\\/*] |  ?:[\\/]*])
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+  ac_dummy="m4_if([$2], , $PATH, [$2])"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$1; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
+      if test -n "$file_magic_test_file"; then
+	case $deplibs_check_method in
+	"file_magic "*)
+	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+	    $EGREP "$file_magic_regex" > /dev/null; then
+	    :
+	  else
+	    cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+	  fi ;;
+	esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac])
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  AC_MSG_RESULT($MAGIC_CMD)
+else
+  AC_MSG_RESULT(no)
+fi
+_LT_DECL([], [MAGIC_CMD], [0],
+	 [Used to examine libraries when file_magic_cmd begins with "file"])dnl
+])# _LT_PATH_TOOL_PREFIX
+
+# Old name:
+AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], [])
+
+
+# _LT_PATH_MAGIC
+# --------------
+# find a file program which can recognize a shared library
+m4_defun([_LT_PATH_MAGIC],
+[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
+  else
+    MAGIC_CMD=:
+  fi
+fi
+])# _LT_PATH_MAGIC
+
+
+# LT_PATH_LD
+# ----------
+# find the pathname to the GNU or non-GNU linker
+AC_DEFUN([LT_PATH_LD],
+[AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_PROG_ECHO_BACKSLASH])dnl
+
+AC_ARG_WITH([gnu-ld],
+    [AS_HELP_STRING([--with-gnu-ld],
+	[assume the C compiler uses GNU ld @<:@default=no@:>@])],
+    [test "$withval" = no || with_gnu_ld=yes],
+    [with_gnu_ld=no])dnl
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  AC_MSG_CHECKING([for ld used by $CC])
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [[\\/]]* | ?:[[\\/]]*)
+      re_direlt='/[[^/]][[^/]]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+	ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  AC_MSG_CHECKING([for GNU ld])
+else
+  AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(lt_cv_path_LD,
+[if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+	test "$with_gnu_ld" != no && break
+	;;
+      *)
+	test "$with_gnu_ld" != yes && break
+	;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  AC_MSG_RESULT($LD)
+else
+  AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+_LT_PATH_LD_GNU
+AC_SUBST([LD])
+
+_LT_TAGDECL([], [LD], [1], [The linker used to build libraries])
+])# LT_PATH_LD
+
+# Old names:
+AU_ALIAS([AM_PROG_LD], [LT_PATH_LD])
+AU_ALIAS([AC_PROG_LD], [LT_PATH_LD])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_PROG_LD], [])
+dnl AC_DEFUN([AC_PROG_LD], [])
+
+
+# _LT_PATH_LD_GNU
+#- --------------
+m4_defun([_LT_PATH_LD_GNU],
+[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac])
+with_gnu_ld=$lt_cv_prog_gnu_ld
+])# _LT_PATH_LD_GNU
+
+
+# _LT_CMD_RELOAD
+# --------------
+# find reload flag for linker
+#   -- PORTME Some linkers may need a different reload flag.
+m4_defun([_LT_CMD_RELOAD],
+[AC_CACHE_CHECK([for $LD option to reload object files],
+  lt_cv_ld_reload_flag,
+  [lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    if test "$GCC" != yes; then
+      reload_cmds=false
+    fi
+    ;;
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+_LT_TAGDECL([], [reload_flag], [1], [How to create reloadable object files])dnl
+_LT_TAGDECL([], [reload_cmds], [2])dnl
+])# _LT_CMD_RELOAD
+
+
+# _LT_CHECK_MAGIC_METHOD
+# ----------------------
+# how to check for library dependencies
+#  -- PORTME fill in with the dynamic library characteristics
+m4_defun([_LT_CHECK_MAGIC_METHOD],
+[m4_require([_LT_DECL_EGREP])
+m4_require([_LT_DECL_OBJDUMP])
+AC_CACHE_CHECK([how to recognize dependent libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix[[4-9]]*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[[45]]*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
+  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    # Keep this pattern in sync with the one in func_win32_libid.
+    lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
+  ;;
+
+cegcc*)
+  # use the weaker test based on 'objdump'. See mingw*.
+  lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | dragonfly*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+haiku*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]']
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix[[3-9]]*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+*nto* | *qnx*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+tpf*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+])
+
+file_magic_glob=
+want_nocaseglob=no
+if test "$build" = "$host"; then
+  case $host_os in
+  mingw* | pw32*)
+    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
+      want_nocaseglob=yes
+    else
+      file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"`
+    fi
+    ;;
+  esac
+fi
+
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+_LT_DECL([], [deplibs_check_method], [1],
+    [Method to check whether dependent libraries are shared objects])
+_LT_DECL([], [file_magic_cmd], [1],
+    [Command to use when deplibs_check_method = "file_magic"])
+_LT_DECL([], [file_magic_glob], [1],
+    [How to find potential files when deplibs_check_method = "file_magic"])
+_LT_DECL([], [want_nocaseglob], [1],
+    [Find potential files using nocaseglob when deplibs_check_method = "file_magic"])
+])# _LT_CHECK_MAGIC_METHOD
+
+
+# LT_PATH_NM
+# ----------
+# find the pathname to a BSD- or MS-compatible name lister
+AC_DEFUN([LT_PATH_NM],
+[AC_REQUIRE([AC_PROG_CC])dnl
+AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM,
+[if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi])
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :)
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+  AC_SUBST([DUMPBIN])
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+AC_SUBST([NM])
+_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl
+
+AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface],
+  [lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD)
+  cat conftest.out >&AS_MESSAGE_LOG_FD
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*])
+])# LT_PATH_NM
+
+# Old names:
+AU_ALIAS([AM_PROG_NM], [LT_PATH_NM])
+AU_ALIAS([AC_PROG_NM], [LT_PATH_NM])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_PROG_NM], [])
+dnl AC_DEFUN([AC_PROG_NM], [])
+
+# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
+# --------------------------------
+# how to determine the name of the shared library
+# associated with a specific link library.
+#  -- PORTME fill in with the dynamic library characteristics
+m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB],
+[m4_require([_LT_DECL_EGREP])
+m4_require([_LT_DECL_OBJDUMP])
+m4_require([_LT_DECL_DLLTOOL])
+AC_CACHE_CHECK([how to associate runtime and link libraries],
+lt_cv_sharedlib_from_linklib_cmd,
+[lt_cv_sharedlib_from_linklib_cmd='unknown'
+
+case $host_os in
+cygwin* | mingw* | pw32* | cegcc*)
+  # two different shell functions defined in ltmain.sh
+  # decide which to use based on capabilities of $DLLTOOL
+  case `$DLLTOOL --help 2>&1` in
+  *--identify-strict*)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
+    ;;
+  *)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
+    ;;
+  esac
+  ;;
+*)
+  # fallback: assume linklib IS sharedlib
+  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
+  ;;
+esac
+])
+sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
+test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
+
+_LT_DECL([], [sharedlib_from_linklib_cmd], [1],
+    [Command to associate shared and link libraries])
+])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
+
+
+# _LT_PATH_MANIFEST_TOOL
+# ----------------------
+# locate the manifest tool
+m4_defun([_LT_PATH_MANIFEST_TOOL],
+[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :)
+test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
+AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool],
+  [lt_cv_path_mainfest_tool=no
+  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD
+  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
+    lt_cv_path_mainfest_tool=yes
+  fi
+  rm -f conftest*])
+if test "x$lt_cv_path_mainfest_tool" != xyes; then
+  MANIFEST_TOOL=:
+fi
+_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl
+])# _LT_PATH_MANIFEST_TOOL
+
+
+# LT_LIB_M
+# --------
+# check for math library
+AC_DEFUN([LT_LIB_M],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case $host in
+*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*)
+  # These system don't have libm, or don't need it
+  ;;
+*-ncr-sysv4.3*)
+  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+  AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
+  ;;
+*)
+  AC_CHECK_LIB(m, cos, LIBM="-lm")
+  ;;
+esac
+AC_SUBST([LIBM])
+])# LT_LIB_M
+
+# Old name:
+AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_CHECK_LIBM], [])
+
+
+# _LT_COMPILER_NO_RTTI([TAGNAME])
+# -------------------------------
+m4_defun([_LT_COMPILER_NO_RTTI],
+[m4_require([_LT_TAG_COMPILER])dnl
+
+_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+
+if test "$GCC" = yes; then
+  case $cc_basename in
+  nvcc*)
+    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;;
+  *)
+    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;;
+  esac
+
+  _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
+    lt_cv_prog_compiler_rtti_exceptions,
+    [-fno-rtti -fno-exceptions], [],
+    [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
+fi
+_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1],
+	[Compiler flag to turn off builtin functions])
+])# _LT_COMPILER_NO_RTTI
+
+
+# _LT_CMD_GLOBAL_SYMBOLS
+# ----------------------
+m4_defun([_LT_CMD_GLOBAL_SYMBOLS],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+AC_REQUIRE([LT_PATH_LD])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+AC_MSG_CHECKING([command to parse $NM output from $compiler object])
+AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
+[
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[[BCDEGRST]]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[[BCDT]]'
+  ;;
+cygwin* | mingw* | pw32* | cegcc*)
+  symcode='[[ABCDGISTW]]'
+  ;;
+hpux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDEGRST]]'
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[[BCDEGRST]]'
+  ;;
+osf*)
+  symcode='[[BCDEGQRST]]'
+  ;;
+solaris*)
+  symcode='[[BDRT]]'
+  ;;
+sco3.2v5*)
+  symcode='[[DT]]'
+  ;;
+sysv4.2uw2*)
+  symcode='[[DT]]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[[ABDT]]'
+  ;;
+sysv4)
+  symcode='[[DFNSTU]]'
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[[ABCDGIRSTW]]' ;;
+esac
+
+# Transform an extracted symbol line into a proper C declaration.
+# Some systems (esp. on ia64) link data and code symbols differently,
+# so use this general approach.
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p'"
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"lib\2\", (void *) \&\2},/p'"
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# Try without a prefix underscore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+    # Fake it for dumpbin and say T for any non-static function
+    # and D for any global variable.
+    # Also find C++ and __fastcall symbols from MSVC++,
+    # which start with @ or ?.
+    lt_cv_sys_global_symbol_pipe="$AWK ['"\
+"     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
+"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
+"     \$ 0!~/External *\|/{next};"\
+"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
+"     {if(hide[section]) next};"\
+"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
+"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
+"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
+"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
+"     ' prfx=^$ac_symprfx]"
+  else
+    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[	 ]]\($symcode$symcode*\)[[	 ]][[	 ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+  fi
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<_LT_EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(void);
+void nm_test_func(void){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+_LT_EOF
+
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
+	if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<_LT_EOF > conftest.$ac_ext
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT@&t@_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT@&t@_DLSYM_CONST
+#else
+# define LT@&t@_DLSYM_CONST const
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_LT_EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
+
+	  cat <<_LT_EOF >> conftest.$ac_ext
+
+/* The mapping between symbol names and symbols.  */
+LT@&t@_DLSYM_CONST struct {
+  const char *name;
+  void       *address;
+}
+lt__PROGRAM__LTX_preloaded_symbols[[]] =
+{
+  { "@PROGRAM@", (void *) 0 },
+_LT_EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
+	  cat <<\_LT_EOF >> conftest.$ac_ext
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt__PROGRAM__LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+_LT_EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_globsym_save_LIBS=$LIBS
+	  lt_globsym_save_CFLAGS=$CFLAGS
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
+	  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS=$lt_globsym_save_LIBS
+	  CFLAGS=$lt_globsym_save_CFLAGS
+	else
+	  echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.$ac_ext >&5
+  fi
+  rm -rf conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+])
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  AC_MSG_RESULT(failed)
+else
+  AC_MSG_RESULT(ok)
+fi
+
+# Response file support.
+if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+  nm_file_list_spec='@'
+elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then
+  nm_file_list_spec='@'
+fi
+
+_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1],
+    [Take the output of nm and produce a listing of raw symbols and C names])
+_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1],
+    [Transform the output of nm in a proper C declaration])
+_LT_DECL([global_symbol_to_c_name_address],
+    [lt_cv_sys_global_symbol_to_c_name_address], [1],
+    [Transform the output of nm in a C name address pair])
+_LT_DECL([global_symbol_to_c_name_address_lib_prefix],
+    [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1],
+    [Transform the output of nm in a C name address pair when lib prefix is needed])
+_LT_DECL([], [nm_file_list_spec], [1],
+    [Specify filename containing input files for $NM])
+]) # _LT_CMD_GLOBAL_SYMBOLS
+
+
+# _LT_COMPILER_PIC([TAGNAME])
+# ---------------------------
+m4_defun([_LT_COMPILER_PIC],
+[m4_require([_LT_TAG_COMPILER])dnl
+_LT_TAGVAR(lt_prog_compiler_wl, $1)=
+_LT_TAGVAR(lt_prog_compiler_pic, $1)=
+_LT_TAGVAR(lt_prog_compiler_static, $1)=
+
+m4_if([$1], [CXX], [
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | cygwin* | os2* | pw32* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      m4_if([$1], [GCJ], [],
+	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+      ;;
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)=
+      ;;
+    interix[[3-9]]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+	;;
+      *)
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+    *qnx* | *nto*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+    *)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix[[4-9]]*)
+	# All AIX code is PIC.
+	if test "$host_cpu" = ia64; then
+	  # AIX 5 now supports IA64 processor
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	else
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+	fi
+	;;
+      chorus*)
+	case $cc_basename in
+	cxch68*)
+	  # Green Hills C++ Compiler
+	  # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+	  ;;
+	esac
+	;;
+      mingw* | cygwin* | os2* | pw32* | cegcc*)
+	# This hack is so that the source file can tell whether it is being
+	# built for inclusion in a dll (and should export symbols for example).
+	m4_if([$1], [GCJ], [],
+	  [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+	;;
+      dgux*)
+	case $cc_basename in
+	  ec++*)
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  ghcx*)
+	    # Green Hills C++ Compiler
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      freebsd* | dragonfly*)
+	# FreeBSD uses GNU C++
+	;;
+      hpux9* | hpux10* | hpux11*)
+	case $cc_basename in
+	  CC*)
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    if test "$host_cpu" != ia64; then
+	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	    fi
+	    ;;
+	  aCC*)
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    case $host_cpu in
+	    hppa*64*|ia64*)
+	      # +Z the default
+	      ;;
+	    *)
+	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	      ;;
+	    esac
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      interix*)
+	# This is c89, which is MS Visual C++ (no shared libs)
+	# Anyone wants to do a port?
+	;;
+      irix5* | irix6* | nonstopux*)
+	case $cc_basename in
+	  CC*)
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    # CC pic flag -KPIC is the default.
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+	case $cc_basename in
+	  KCC*)
+	    # KAI C++ Compiler
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	    ;;
+	  ecpc* )
+	    # old Intel C++ for x86_64 which still supported -KPIC.
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+	    ;;
+	  icpc* )
+	    # Intel C++, used to be incompatible with GCC.
+	    # ICC 10 doesn't accept -KPIC any more.
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+	    ;;
+	  pgCC* | pgcpp*)
+	    # Portland Group C++ compiler
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  cxx*)
+	    # Compaq C++
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*)
+	    # IBM XL 8.0, 9.0 on PPC and BlueGene
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
+	    ;;
+	  *)
+	    case `$CC -V 2>&1 | sed 5q` in
+	    *Sun\ C*)
+	      # Sun C++ 5.9
+	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+	      ;;
+	    esac
+	    ;;
+	esac
+	;;
+      lynxos*)
+	;;
+      m88k*)
+	;;
+      mvs*)
+	case $cc_basename in
+	  cxx*)
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      netbsd*)
+	;;
+      *qnx* | *nto*)
+        # QNX uses GNU C++, but need to define -shared option too, otherwise
+        # it will coredump.
+        _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+        ;;
+      osf3* | osf4* | osf5*)
+	case $cc_basename in
+	  KCC*)
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    ;;
+	  RCC*)
+	    # Rational C++ 2.4.1
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  cxx*)
+	    # Digital/Compaq C++
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      psos*)
+	;;
+      solaris*)
+	case $cc_basename in
+	  CC* | sunCC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+	    ;;
+	  gcx*)
+	    # Green Hills C++ Compiler
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sunos4*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.x
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  lcc*)
+	    # Lucid
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+	case $cc_basename in
+	  CC*)
+	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	esac
+	;;
+      tandem*)
+	case $cc_basename in
+	  NCC*)
+	    # NonStop-UX NCC 3.20
+	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      vxworks*)
+	;;
+      *)
+	_LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+	;;
+    esac
+  fi
+],
+[
+  if test "$GCC" = yes; then
+    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      m4_if([$1], [GCJ], [],
+	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+	# +Z the default
+	;;
+      *)
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+
+    interix[[3-9]]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
+      if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+        _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      else
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      m4_if([$1], [GCJ], [],
+	[_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC (with -KPIC) is the default.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='--static'
+	;;
+      nagfor*)
+	# NAG Fortran compiler
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+	# which looks to be a dead project)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+        ;;
+      ccc*)
+        _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+        # All Alpha code is PIC.
+        _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+	# IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
+	;;
+      *)
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
+	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
+	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	  _LT_TAGVAR(lt_prog_compiler_wl, $1)=''
+	  ;;
+	*Sun\ F* | *Sun*Fortran*)
+	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+	  ;;
+	*Sun\ C*)
+	  # Sun C 5.9
+	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	  ;;
+        *Intel*\ [[CF]]*Compiler*)
+	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+	  ;;
+	*Portland\ Group*)
+	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	  ;;
+	esac
+	;;
+      esac
+      ;;
+
+    newsos6)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # All OSF/1 code is PIC.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    rdos*)
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    solaris*)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
+      *)
+	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
+	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    unicos*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+
+    uts4*)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *)
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+    esac
+  fi
+])
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+    ;;
+  *)
+    _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])"
+    ;;
+esac
+
+AC_CACHE_CHECK([for $compiler option to produce PIC],
+  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)],
+  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)])
+_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+  _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works],
+    [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)],
+    [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [],
+    [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in
+     "" | " "*) ;;
+     *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;;
+     esac],
+    [_LT_TAGVAR(lt_prog_compiler_pic, $1)=
+     _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
+fi
+_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1],
+	[Additional compiler flags for building library objects])
+
+_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1],
+	[How to pass a linker flag through the compiler])
+#
+# Check to make sure the static flag actually works.
+#
+wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\"
+_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
+  _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1),
+  $lt_tmp_static_flag,
+  [],
+  [_LT_TAGVAR(lt_prog_compiler_static, $1)=])
+_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1],
+	[Compiler flag to prevent dynamic linking])
+])# _LT_COMPILER_PIC
+
+
+# _LT_LINKER_SHLIBS([TAGNAME])
+# ----------------------------
+# See if the linker supports building shared libraries.
+m4_defun([_LT_LINKER_SHLIBS],
+[AC_REQUIRE([LT_PATH_LD])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+m4_require([_LT_PATH_MANIFEST_TOOL])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+m4_if([$1], [CXX], [
+  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
+  case $host_os in
+  aix[[4-9]]*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    # Also, AIX nm treats weak defined symbols like other global defined
+    # symbols, whereas GNU nm marks them as "W".
+    if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    else
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
+    ;;
+  cygwin* | mingw* | cegcc*)
+    case $cc_basename in
+    cl*)
+      _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+      ;;
+    *)
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
+      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
+      ;;
+    esac
+    ;;
+  *)
+    _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+    ;;
+  esac
+], [
+  runpath_var=
+  _LT_TAGVAR(allow_undefined_flag, $1)=
+  _LT_TAGVAR(always_export_symbols, $1)=no
+  _LT_TAGVAR(archive_cmds, $1)=
+  _LT_TAGVAR(archive_expsym_cmds, $1)=
+  _LT_TAGVAR(compiler_needs_object, $1)=no
+  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+  _LT_TAGVAR(export_dynamic_flag_spec, $1)=
+  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  _LT_TAGVAR(hardcode_automatic, $1)=no
+  _LT_TAGVAR(hardcode_direct, $1)=no
+  _LT_TAGVAR(hardcode_direct_absolute, $1)=no
+  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+  _LT_TAGVAR(hardcode_libdir_separator, $1)=
+  _LT_TAGVAR(hardcode_minus_L, $1)=no
+  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  _LT_TAGVAR(inherit_rpath, $1)=no
+  _LT_TAGVAR(link_all_deplibs, $1)=unknown
+  _LT_TAGVAR(module_cmds, $1)=
+  _LT_TAGVAR(module_expsym_cmds, $1)=
+  _LT_TAGVAR(old_archive_from_new_cmds, $1)=
+  _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)=
+  _LT_TAGVAR(thread_safe_flag_spec, $1)=
+  _LT_TAGVAR(whole_archive_flag_spec, $1)=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  _LT_TAGVAR(include_expsyms, $1)=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+dnl Note also adjust exclude_expsyms for C++ above.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  _LT_TAGVAR(ld_shlibs, $1)=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+	# The AIX port of GNU ld has always aspired to compatibility
+	# with the native linker.  However, as the warning in the GNU ld
+	# block says, versions before 2.19.5* couldn't really create working
+	# shared libraries, regardless of the interface used.
+	case `$LD -v 2>&1` in
+	  *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+	  *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;;
+	  *\ \(GNU\ Binutils\)\ [[3-9]]*) ;;
+	  *)
+	    lt_use_gnu_ld_interface=yes
+	    ;;
+	esac
+	;;
+      *)
+	lt_use_gnu_ld_interface=yes
+	;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      _LT_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[[3-9]]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+	_LT_TAGVAR(ld_shlibs, $1)=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            _LT_TAGVAR(archive_expsym_cmds, $1)=''
+        ;;
+      m68k)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	# Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	# support --undefined.  This deserves some investigation.  FIXME
+	_LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+	_LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+      # as there is no search path for DLLs.
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(always_export_symbols, $1)=no
+      _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
+      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	# If the export-symbols file already is a .def file (1st line
+	# is EXPORTS), use it as is; otherwise, prepend...
+	_LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	  cp $export_symbols $output_objdir/$soname.def;
+	else
+	  echo EXPORTS > $output_objdir/$soname.def;
+	  cat $export_symbols >> $output_objdir/$soname.def;
+	fi~
+	$CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+	_LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    haiku*)
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    interix[[3-9]]*)
+      _LT_TAGVAR(hardcode_direct, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+	case $cc_basename in
+	  diet\ *) tmp_diet=yes;;	# linux-dietlibc with static linking (!diet-dyn)
+	esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+	 && test "$tmp_diet" = no
+      then
+	tmp_addflag=' $pic_flag'
+	tmp_sharedflag='-shared'
+	case $cc_basename,$host_cpu in
+        pgcc*)				# Portland Group C compiler
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag'
+	  ;;
+	pgf77* | pgf90* | pgf95* | pgfortran*)
+					# Portland Group f77 and f90 compilers
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag -Mnomain' ;;
+	ecc*,ia64* | icc*,ia64*)	# Intel C compiler on ia64
+	  tmp_addflag=' -i_dynamic' ;;
+	efc*,ia64* | ifort*,ia64*)	# Intel Fortran compiler on ia64
+	  tmp_addflag=' -i_dynamic -nofor_main' ;;
+	ifc* | ifort*)			# Intel Fortran compiler
+	  tmp_addflag=' -nofor_main' ;;
+	lf95*)				# Lahey Fortran 8.1
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)=
+	  tmp_sharedflag='--shared' ;;
+	xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below)
+	  tmp_sharedflag='-qmkshrobj'
+	  tmp_addflag= ;;
+	nvcc*)	# Cuda Compiler Driver 2.2
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  _LT_TAGVAR(compiler_needs_object, $1)=yes
+	  ;;
+	esac
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)			# Sun C 5.9
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	  _LT_TAGVAR(compiler_needs_object, $1)=yes
+	  tmp_sharedflag='-G' ;;
+	*Sun\ F*)			# Sun Fortran 8.3
+	  tmp_sharedflag='-G' ;;
+	esac
+	_LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+	    cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+	    echo "local: *; };" >> $output_objdir/$libname.ver~
+	    $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+	case $cc_basename in
+	xlf* | bgf* | bgxlf* | mpixlf*)
+	  # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
+	  if test "x$supports_anon_versioning" = xyes; then
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+	      cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+	      echo "local: *; };" >> $output_objdir/$libname.ver~
+	      $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
+	  fi
+	  ;;
+	esac
+      else
+        _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+	wlarc=
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+	_LT_TAGVAR(ld_shlibs, $1)=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*)
+	_LT_TAGVAR(ld_shlibs, $1)=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+	;;
+	*)
+	  # For security reasons, it is highly recommended that you always
+	  # use absolute paths for naming shared libraries, and exclude the
+	  # DT_RUNPATH tag from executables and libraries.  But doing so
+	  # requires that you compile everything twice, which is a pain.
+	  if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  else
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	  fi
+	;;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+
+    if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then
+      runpath_var=
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)=
+      _LT_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(always_export_symbols, $1)=yes
+      _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+	# Neither direct hardcoding nor static linking is supported with a
+	# broken collect2.
+	_LT_TAGVAR(hardcode_direct, $1)=unsupported
+      fi
+      ;;
+
+    aix[[4-9]]*)
+      if test "$host_cpu" = ia64; then
+	# On IA64, the linker does run time linking by default, so we don't
+	# have to do anything special.
+	aix_use_runtimelinking=no
+	exp_sym_flag='-Bexport'
+	no_entry_flag=""
+      else
+	# If we're using GNU nm, then we don't want the "-C" option.
+	# -C means demangle to AIX nm, but means don't demangle with GNU nm
+	# Also, AIX nm treats weak defined symbols like other global
+	# defined symbols, whereas GNU nm marks them as "W".
+	if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+	  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+	else
+	  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+	fi
+	aix_use_runtimelinking=no
+
+	# Test if we are trying to use run time linking or normal
+	# AIX style linking. If -brtl is somewhere in LDFLAGS, we
+	# need to do runtime linking.
+	case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
+	  for ld_flag in $LDFLAGS; do
+	  if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+	    aix_use_runtimelinking=yes
+	    break
+	  fi
+	  done
+	  ;;
+	esac
+
+	exp_sym_flag='-bexport'
+	no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      _LT_TAGVAR(archive_cmds, $1)=''
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
+
+      if test "$GCC" = yes; then
+	case $host_os in aix4.[[012]]|aix4.[[012]].*)
+	# We only want to do this on AIX 4.2 and lower, the check
+	# below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" &&
+	   strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+	  then
+	  # We have reworked collect2
+	  :
+	  else
+	  # We have old collect2
+	  _LT_TAGVAR(hardcode_direct, $1)=unsupported
+	  # It fails to find uninstalled libraries when the uninstalled
+	  # path is not listed in the libpath.  Setting hardcode_minus_L
+	  # to unsupported forces relinking
+	  _LT_TAGVAR(hardcode_minus_L, $1)=yes
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+	  _LT_TAGVAR(hardcode_libdir_separator, $1)=
+	  fi
+	  ;;
+	esac
+	shared_flag='-shared'
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag="$shared_flag "'${wl}-G'
+	fi
+      else
+	# not using gcc
+	if test "$host_cpu" = ia64; then
+	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	# chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+	else
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag='${wl}-G'
+	  else
+	    shared_flag='${wl}-bM:SRE'
+	  fi
+	fi
+      fi
+
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      _LT_TAGVAR(always_export_symbols, $1)=yes
+      if test "$aix_use_runtimelinking" = yes; then
+	# Warning - without using the other runtime loading flags (-brtl),
+	# -berok will link without error, but may produce a broken library.
+	_LT_TAGVAR(allow_undefined_flag, $1)='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        _LT_SYS_MODULE_PATH_AIX([$1])
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+	if test "$host_cpu" = ia64; then
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	  _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	  _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+	else
+	 # Determine the default libpath from the value encoded in an
+	 # empty executable.
+	 _LT_SYS_MODULE_PATH_AIX([$1])
+	 _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	  # Warning - without using the other run time loading flags,
+	  # -berok will link without error, but may produce a broken library.
+	  _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	  _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	  if test "$with_gnu_ld" = yes; then
+	    # We only use this code for GNU lds that support --whole-archive.
+	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	  else
+	    # Exported symbols can be pulled into shared objects from archives
+	    _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	  fi
+	  _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+	  # This is similar to how AIX traditionally builds its shared libraries.
+	  _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+	fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            _LT_TAGVAR(archive_expsym_cmds, $1)=''
+        ;;
+      m68k)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[[45]]*)
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+	# Native MSVC
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	_LT_TAGVAR(always_export_symbols, $1)=yes
+	_LT_TAGVAR(file_list_spec, $1)='@'
+	# Tell ltmain to make .lib files, not .a files.
+	libext=lib
+	# Tell ltmain to make .dll files, not .so files.
+	shrext_cmds=".dll"
+	# FIXME: Setting linknames here is a bad hack.
+	_LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+	_LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	    sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+	  else
+	    sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+	  fi~
+	  $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+	  linknames='
+	# The linker will not automatically build a static lib if we build a DLL.
+	# _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+	_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+	_LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+	_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
+	# Don't use ranlib
+	_LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
+	_LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
+	  lt_tool_outputfile="@TOOL_OUTPUT@"~
+	  case $lt_outputfile in
+	    *.exe|*.EXE) ;;
+	    *)
+	      lt_outputfile="$lt_outputfile.exe"
+	      lt_tool_outputfile="$lt_tool_outputfile.exe"
+	      ;;
+	  esac~
+	  if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+	    $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+	    $RM "$lt_outputfile.manifest";
+	  fi'
+	;;
+      *)
+	# Assume MSVC wrapper
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+	_LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	# Tell ltmain to make .lib files, not .a files.
+	libext=lib
+	# Tell ltmain to make .dll files, not .so files.
+	shrext_cmds=".dll"
+	# FIXME: Setting linknames here is a bad hack.
+	_LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+	# The linker will automatically build a .lib file if we build a DLL.
+	_LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+	# FIXME: Should let the user specify the lib program.
+	_LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs'
+	_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+	;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+      _LT_DARWIN_LINKER_FEATURES($1)
+      ;;
+
+    dgux*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_TAGVAR(hardcode_libdir_separator, $1)=:
+	_LT_TAGVAR(hardcode_direct, $1)=yes
+	_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+	# hardcode_minus_L: Not really in the search PATH,
+	# but as the default location of the library.
+	_LT_TAGVAR(hardcode_minus_L, $1)=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+	case $host_cpu in
+	hppa*64*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      else
+	case $host_cpu in
+	hppa*64*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	m4_if($1, [], [
+	  # Older versions of the 11.00 compiler do not understand -b yet
+	  # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
+	  _LT_LINKER_OPTION([if $CC understands -b],
+	    _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b],
+	    [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'],
+	    [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])],
+	  [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'])
+	  ;;
+	esac
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	case $host_cpu in
+	hppa*64*|ia64*)
+	  _LT_TAGVAR(hardcode_direct, $1)=no
+	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	  ;;
+	*)
+	  _LT_TAGVAR(hardcode_direct, $1)=yes
+	  _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+	  # hardcode_minus_L: Not really in the search PATH,
+	  # but as the default location of the library.
+	  _LT_TAGVAR(hardcode_minus_L, $1)=yes
+	  ;;
+	esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	# Try to use the -exported_symbol ld option, if it does not
+	# work, assume that -exports_file does not work either and
+	# implicitly export all symbols.
+	# This should be the same for all languages, so no per-tag cache variable.
+	AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol],
+	  [lt_cv_irix_exported_symbol],
+	  [save_LDFLAGS="$LDFLAGS"
+	   LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+	   AC_LINK_IFELSE(
+	     [AC_LANG_SOURCE(
+	        [AC_LANG_CASE([C], [[int foo (void) { return 0; }]],
+			      [C++], [[int foo (void) { return 0; }]],
+			      [Fortran 77], [[
+      subroutine foo
+      end]],
+			      [Fortran], [[
+      subroutine foo
+      end]])])],
+	      [lt_cv_irix_exported_symbol=yes],
+	      [lt_cv_irix_exported_symbol=no])
+           LDFLAGS="$save_LDFLAGS"])
+	if test "$lt_cv_irix_exported_symbol" = yes; then
+          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+	fi
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(inherit_rpath, $1)=yes
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    newsos6)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+	_LT_TAGVAR(hardcode_direct, $1)=yes
+	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+	if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+	else
+	  case $host_os in
+	   openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+	     _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	     _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	     ;;
+	   *)
+	     _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	     _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	     ;;
+	  esac
+	fi
+      else
+	_LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    os2*)
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	_LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    osf4* | osf5*)	# as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      else
+	_LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
+	$CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
+
+	# Both c and cxx compiler support -rpath directly
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    solaris*)
+      _LT_TAGVAR(no_undefined_flag, $1)=' -z defs'
+      if test "$GCC" = yes; then
+	wlarc='${wl}'
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+	case `$CC -V 2>&1` in
+	*"Compilers 5.0"*)
+	  wlarc=''
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
+	  ;;
+	*)
+	  wlarc='${wl}'
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	  $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+	  ;;
+	esac
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      case $host_os in
+      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+      *)
+	# The compiler driver will combine and reorder linker options,
+	# but understands `-z linker_flag'.  GCC discards it without `$wl',
+	# but is careful enough not to reorder.
+	# Supported since Solaris 2.6 (maybe 2.5.1?)
+	if test "$GCC" = yes; then
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	else
+	  _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
+	fi
+	;;
+      esac
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+	# Use $CC to link under sequent, because it throws in some extra .o
+	# files that make .init and .fini sections work.
+	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+	sni)
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true???
+	;;
+	siemens)
+	  ## LD is ld it makes a PLAMLIB
+	  ## CC just makes a GrossModule.
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
+	  _LT_TAGVAR(hardcode_direct, $1)=no
+        ;;
+	motorola)
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
+	;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4.3*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	runpath_var=LD_RUN_PATH
+	hardcode_runpath_var=yes
+	_LT_TAGVAR(ld_shlibs, $1)=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      _LT_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym'
+	;;
+      esac
+    fi
+  fi
+])
+AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
+test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld
+
+_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl
+_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl
+_LT_DECL([], [extract_expsyms_cmds], [2],
+    [The commands to extract the exported symbol list from a shared archive])
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in
+x|xyes)
+  # Assume -lc should be added
+  _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $_LT_TAGVAR(archive_cmds, $1) in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      AC_CACHE_CHECK([whether -lc should be explicitly linked in],
+	[lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1),
+	[$RM conftest*
+	echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+	if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
+	  soname=conftest
+	  lib=conftest
+	  libobjs=conftest.$ac_objext
+	  deplibs=
+	  wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1)
+	  pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1)
+	  compiler_flags=-v
+	  linker_flags=-v
+	  verstring=
+	  output_objdir=.
+	  libname=conftest
+	  lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1)
+	  _LT_TAGVAR(allow_undefined_flag, $1)=
+	  if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1)
+	  then
+	    lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+	  else
+	    lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+	  fi
+	  _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
+	else
+	  cat conftest.err 1>&5
+	fi
+	$RM conftest*
+	])
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0],
+    [Whether or not to add -lc for building shared libraries])
+_LT_TAGDECL([allow_libtool_libs_with_static_runtimes],
+    [enable_shared_with_static_runtimes], [0],
+    [Whether or not to disallow shared libs when runtime libs are static])
+_LT_TAGDECL([], [export_dynamic_flag_spec], [1],
+    [Compiler flag to allow reflexive dlopens])
+_LT_TAGDECL([], [whole_archive_flag_spec], [1],
+    [Compiler flag to generate shared objects directly from archives])
+_LT_TAGDECL([], [compiler_needs_object], [1],
+    [Whether the compiler copes with passing no objects directly])
+_LT_TAGDECL([], [old_archive_from_new_cmds], [2],
+    [Create an old-style archive from a shared archive])
+_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2],
+    [Create a temporary old-style archive to link instead of a shared archive])
+_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive])
+_LT_TAGDECL([], [archive_expsym_cmds], [2])
+_LT_TAGDECL([], [module_cmds], [2],
+    [Commands used to build a loadable module if different from building
+    a shared archive.])
+_LT_TAGDECL([], [module_expsym_cmds], [2])
+_LT_TAGDECL([], [with_gnu_ld], [1],
+    [Whether we are building with GNU ld or not])
+_LT_TAGDECL([], [allow_undefined_flag], [1],
+    [Flag that allows shared libraries with undefined symbols to be built])
+_LT_TAGDECL([], [no_undefined_flag], [1],
+    [Flag that enforces no undefined symbols])
+_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
+    [Flag to hardcode $libdir into a binary during linking.
+    This must work even if $libdir does not exist])
+_LT_TAGDECL([], [hardcode_libdir_separator], [1],
+    [Whether we need a single "-rpath" flag with a separated argument])
+_LT_TAGDECL([], [hardcode_direct], [0],
+    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
+    DIR into the resulting binary])
+_LT_TAGDECL([], [hardcode_direct_absolute], [0],
+    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
+    DIR into the resulting binary and the resulting library dependency is
+    "absolute", i.e impossible to change by setting ${shlibpath_var} if the
+    library is relocated])
+_LT_TAGDECL([], [hardcode_minus_L], [0],
+    [Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+    into the resulting binary])
+_LT_TAGDECL([], [hardcode_shlibpath_var], [0],
+    [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+    into the resulting binary])
+_LT_TAGDECL([], [hardcode_automatic], [0],
+    [Set to "yes" if building a shared library automatically hardcodes DIR
+    into the library and all subsequent libraries and executables linked
+    against it])
+_LT_TAGDECL([], [inherit_rpath], [0],
+    [Set to yes if linker adds runtime paths of dependent libraries
+    to runtime path list])
+_LT_TAGDECL([], [link_all_deplibs], [0],
+    [Whether libtool must link a program against all its dependency libraries])
+_LT_TAGDECL([], [always_export_symbols], [0],
+    [Set to "yes" if exported symbols are required])
+_LT_TAGDECL([], [export_symbols_cmds], [2],
+    [The commands to list exported symbols])
+_LT_TAGDECL([], [exclude_expsyms], [1],
+    [Symbols that should not be listed in the preloaded symbols])
+_LT_TAGDECL([], [include_expsyms], [1],
+    [Symbols that must always be exported])
+_LT_TAGDECL([], [prelink_cmds], [2],
+    [Commands necessary for linking programs (against libraries) with templates])
+_LT_TAGDECL([], [postlink_cmds], [2],
+    [Commands necessary for finishing linking programs])
+_LT_TAGDECL([], [file_list_spec], [1],
+    [Specify filename containing input files])
+dnl FIXME: Not yet implemented
+dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1],
+dnl    [Compiler flag to generate thread safe objects])
+])# _LT_LINKER_SHLIBS
+
+
+# _LT_LANG_C_CONFIG([TAG])
+# ------------------------
+# Ensure that the configuration variables for a C compiler are suitably
+# defined.  These variables are subsequently used by _LT_CONFIG to write
+# the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_C_CONFIG],
+[m4_require([_LT_DECL_EGREP])dnl
+lt_save_CC="$CC"
+AC_LANG_PUSH(C)
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}'
+
+_LT_TAG_COMPILER
+# Save the default compiler, since it gets overwritten when the other
+# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
+compiler_DEFAULT=$CC
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_SYS_DYNAMIC_LINKER($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+  LT_SYS_DLOPEN_SELF
+  _LT_CMD_STRIPLIB
+
+  # Report which library types will actually be built
+  AC_MSG_CHECKING([if libtool supports shared libraries])
+  AC_MSG_RESULT([$can_build_shared])
+
+  AC_MSG_CHECKING([whether to build shared libraries])
+  test "$can_build_shared" = "no" && enable_shared=no
+
+  # On AIX, shared libraries and static libraries use the same namespace, and
+  # are all built from PIC.
+  case $host_os in
+  aix3*)
+    test "$enable_shared" = yes && enable_static=no
+    if test -n "$RANLIB"; then
+      archive_cmds="$archive_cmds~\$RANLIB \$lib"
+      postinstall_cmds='$RANLIB $lib'
+    fi
+    ;;
+
+  aix[[4-9]]*)
+    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+      test "$enable_shared" = yes && enable_static=no
+    fi
+    ;;
+  esac
+  AC_MSG_RESULT([$enable_shared])
+
+  AC_MSG_CHECKING([whether to build static libraries])
+  # Make sure either enable_shared or enable_static is yes.
+  test "$enable_shared" = yes || enable_static=yes
+  AC_MSG_RESULT([$enable_static])
+
+  _LT_CONFIG($1)
+fi
+AC_LANG_POP
+CC="$lt_save_CC"
+])# _LT_LANG_C_CONFIG
+
+
+# _LT_LANG_CXX_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for a C++ compiler are suitably
+# defined.  These variables are subsequently used by _LT_CONFIG to write
+# the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_CXX_CONFIG],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_PATH_MANIFEST_TOOL])dnl
+if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  AC_PROG_CXXCPP
+else
+  _lt_caught_CXX_error=yes
+fi
+
+AC_LANG_PUSH(C++)
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(compiler_needs_object, $1)=no
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the CXX compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_caught_CXX_error" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="int some_variable = 0;"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }'
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC=$CC
+  lt_save_CFLAGS=$CFLAGS
+  lt_save_LD=$LD
+  lt_save_GCC=$GCC
+  GCC=$GXX
+  lt_save_with_gnu_ld=$with_gnu_ld
+  lt_save_path_LD=$lt_cv_path_LD
+  if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+    lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+  else
+    $as_unset lt_cv_prog_gnu_ld
+  fi
+  if test -n "${lt_cv_path_LDCXX+set}"; then
+    lt_cv_path_LD=$lt_cv_path_LDCXX
+  else
+    $as_unset lt_cv_path_LD
+  fi
+  test -z "${LDCXX+set}" || LD=$LDCXX
+  CC=${CXX-"c++"}
+  CFLAGS=$CXXFLAGS
+  compiler=$CC
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+
+  if test -n "$compiler"; then
+    # We don't want -fno-exception when compiling C++ code, so set the
+    # no_builtin_flag separately
+    if test "$GXX" = yes; then
+      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+    else
+      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+    fi
+
+    if test "$GXX" = yes; then
+      # Set up default GNU C++ configuration
+
+      LT_PATH_LD
+
+      # Check if GNU C++ uses GNU ld as the underlying linker, since the
+      # archiving commands below assume that GNU ld is being used.
+      if test "$with_gnu_ld" = yes; then
+        _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+        # If archive_cmds runs LD, not CC, wlarc should be empty
+        # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+        #     investigate it a little bit more. (MM)
+        wlarc='${wl}'
+
+        # ancient GNU ld didn't support --whole-archive et. al.
+        if eval "`$CC -print-prog-name=ld` --help 2>&1" |
+	  $GREP 'no-whole-archive' > /dev/null; then
+          _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+        else
+          _LT_TAGVAR(whole_archive_flag_spec, $1)=
+        fi
+      else
+        with_gnu_ld=no
+        wlarc=
+
+        # A generic and very simple default shared library creation
+        # command for GNU C++ for the case where it uses the native
+        # linker, instead of GNU ld.  If possible, this setting should
+        # overridden to take advantage of the native linker features on
+        # the platform it is being used on.
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+      fi
+
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+    else
+      GXX=no
+      with_gnu_ld=no
+      wlarc=
+    fi
+
+    # PORTME: fill in a description of your system's C++ link characteristics
+    AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+    _LT_TAGVAR(ld_shlibs, $1)=yes
+    case $host_os in
+      aix3*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+      aix[[4-9]]*)
+        if test "$host_cpu" = ia64; then
+          # On IA64, the linker does run time linking by default, so we don't
+          # have to do anything special.
+          aix_use_runtimelinking=no
+          exp_sym_flag='-Bexport'
+          no_entry_flag=""
+        else
+          aix_use_runtimelinking=no
+
+          # Test if we are trying to use run time linking or normal
+          # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+          # need to do runtime linking.
+          case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
+	    for ld_flag in $LDFLAGS; do
+	      case $ld_flag in
+	      *-brtl*)
+	        aix_use_runtimelinking=yes
+	        break
+	        ;;
+	      esac
+	    done
+	    ;;
+          esac
+
+          exp_sym_flag='-bexport'
+          no_entry_flag='-bnoentry'
+        fi
+
+        # When large executables or shared objects are built, AIX ld can
+        # have problems creating the table of contents.  If linking a library
+        # or program results in "error TOC overflow" add -mminimal-toc to
+        # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+        # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+        _LT_TAGVAR(archive_cmds, $1)=''
+        _LT_TAGVAR(hardcode_direct, $1)=yes
+        _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+        _LT_TAGVAR(link_all_deplibs, $1)=yes
+        _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
+
+        if test "$GXX" = yes; then
+          case $host_os in aix4.[[012]]|aix4.[[012]].*)
+          # We only want to do this on AIX 4.2 and lower, the check
+          # below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" &&
+	     strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+	  then
+	    # We have reworked collect2
+	    :
+	  else
+	    # We have old collect2
+	    _LT_TAGVAR(hardcode_direct, $1)=unsupported
+	    # It fails to find uninstalled libraries when the uninstalled
+	    # path is not listed in the libpath.  Setting hardcode_minus_L
+	    # to unsupported forces relinking
+	    _LT_TAGVAR(hardcode_minus_L, $1)=yes
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+	    _LT_TAGVAR(hardcode_libdir_separator, $1)=
+	  fi
+          esac
+          shared_flag='-shared'
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag="$shared_flag "'${wl}-G'
+	  fi
+        else
+          # not using gcc
+          if test "$host_cpu" = ia64; then
+	  # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	  # chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+          else
+	    if test "$aix_use_runtimelinking" = yes; then
+	      shared_flag='${wl}-G'
+	    else
+	      shared_flag='${wl}-bM:SRE'
+	    fi
+          fi
+        fi
+
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
+        # It seems that -bexpall does not export symbols beginning with
+        # underscore (_), so it is better to generate a list of symbols to
+	# export.
+        _LT_TAGVAR(always_export_symbols, $1)=yes
+        if test "$aix_use_runtimelinking" = yes; then
+          # Warning - without using the other runtime loading flags (-brtl),
+          # -berok will link without error, but may produce a broken library.
+          _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
+          # Determine the default libpath from the value encoded in an empty
+          # executable.
+          _LT_SYS_MODULE_PATH_AIX([$1])
+          _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+        else
+          if test "$host_cpu" = ia64; then
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	    _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	    _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+          else
+	    # Determine the default libpath from the value encoded in an
+	    # empty executable.
+	    _LT_SYS_MODULE_PATH_AIX([$1])
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	    # Warning - without using the other run time loading flags,
+	    # -berok will link without error, but may produce a broken library.
+	    _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	    _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	    if test "$with_gnu_ld" = yes; then
+	      # We only use this code for GNU lds that support --whole-archive.
+	      _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	    else
+	      # Exported symbols can be pulled into shared objects from archives
+	      _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	    fi
+	    _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+	    # This is similar to how AIX traditionally builds its shared
+	    # libraries.
+	    _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+          fi
+        fi
+        ;;
+
+      beos*)
+	if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	  # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	  # support --undefined.  This deserves some investigation.  FIXME
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	else
+	  _LT_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+
+      chorus*)
+        case $cc_basename in
+          *)
+	  # FIXME: insert proper C++ library support
+	  _LT_TAGVAR(ld_shlibs, $1)=no
+	  ;;
+        esac
+        ;;
+
+      cygwin* | mingw* | pw32* | cegcc*)
+	case $GXX,$cc_basename in
+	,cl* | no,cl*)
+	  # Native MSVC
+	  # hardcode_libdir_flag_spec is actually meaningless, as there is
+	  # no search path for DLLs.
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	  _LT_TAGVAR(always_export_symbols, $1)=yes
+	  _LT_TAGVAR(file_list_spec, $1)='@'
+	  # Tell ltmain to make .lib files, not .a files.
+	  libext=lib
+	  # Tell ltmain to make .dll files, not .so files.
+	  shrext_cmds=".dll"
+	  # FIXME: Setting linknames here is a bad hack.
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	      $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+	    else
+	      $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+	    fi~
+	    $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+	    linknames='
+	  # The linker will not automatically build a static lib if we build a DLL.
+	  # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+	  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+	  # Don't use ranlib
+	  _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
+	  _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
+	    lt_tool_outputfile="@TOOL_OUTPUT@"~
+	    case $lt_outputfile in
+	      *.exe|*.EXE) ;;
+	      *)
+		lt_outputfile="$lt_outputfile.exe"
+		lt_tool_outputfile="$lt_tool_outputfile.exe"
+		;;
+	    esac~
+	    func_to_tool_file "$lt_outputfile"~
+	    if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+	      $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+	      $RM "$lt_outputfile.manifest";
+	    fi'
+	  ;;
+	*)
+	  # g++
+	  # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+	  # as there is no search path for DLLs.
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+	  _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
+	  _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+	  _LT_TAGVAR(always_export_symbols, $1)=no
+	  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+
+	  if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	    # If the export-symbols file already is a .def file (1st line
+	    # is EXPORTS), use it as is; otherwise, prepend...
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	      cp $export_symbols $output_objdir/$soname.def;
+	    else
+	      echo EXPORTS > $output_objdir/$soname.def;
+	      cat $export_symbols >> $output_objdir/$soname.def;
+	    fi~
+	    $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	  else
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	  fi
+	  ;;
+	esac
+	;;
+      darwin* | rhapsody*)
+        _LT_DARWIN_LINKER_FEATURES($1)
+	;;
+
+      dgux*)
+        case $cc_basename in
+          ec++*)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          ghcx*)
+	    # Green Hills C++ Compiler
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+        esac
+        ;;
+
+      freebsd2.*)
+        # C++ shared libraries reported to be fairly broken before
+	# switch to ELF
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      freebsd-elf*)
+        _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+        ;;
+
+      freebsd* | dragonfly*)
+        # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+        # conventions
+        _LT_TAGVAR(ld_shlibs, $1)=yes
+        ;;
+
+      gnu*)
+        ;;
+
+      haiku*)
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        _LT_TAGVAR(link_all_deplibs, $1)=yes
+        ;;
+
+      hpux9*)
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+        _LT_TAGVAR(hardcode_direct, $1)=yes
+        _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+				             # but as the default
+				             # location of the library.
+
+        case $cc_basename in
+          CC*)
+            # FIXME: insert proper C++ library support
+            _LT_TAGVAR(ld_shlibs, $1)=no
+            ;;
+          aCC*)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            # Commands to make compiler produce verbose output that lists
+            # what "hidden" libraries, object files and flags are used when
+            # linking a shared library.
+            #
+            # There doesn't appear to be a way to prevent this compiler from
+            # explicitly linking system object files so we need to strip them
+            # from the output so that they don't get included in the library
+            # dependencies.
+            output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+            ;;
+          *)
+            if test "$GXX" = yes; then
+              _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            else
+              # FIXME: insert proper C++ library support
+              _LT_TAGVAR(ld_shlibs, $1)=no
+            fi
+            ;;
+        esac
+        ;;
+
+      hpux10*|hpux11*)
+        if test $with_gnu_ld = no; then
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	  _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+          case $host_cpu in
+            hppa*64*|ia64*)
+              ;;
+            *)
+	      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+              ;;
+          esac
+        fi
+        case $host_cpu in
+          hppa*64*|ia64*)
+            _LT_TAGVAR(hardcode_direct, $1)=no
+            _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+            ;;
+          *)
+            _LT_TAGVAR(hardcode_direct, $1)=yes
+            _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+					         # but as the default
+					         # location of the library.
+            ;;
+        esac
+
+        case $cc_basename in
+          CC*)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          aCC*)
+	    case $host_cpu in
+	      hppa*64*)
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	      ia64*)
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	      *)
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	        ;;
+	    esac
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+	    ;;
+          *)
+	    if test "$GXX" = yes; then
+	      if test $with_gnu_ld = no; then
+	        case $host_cpu in
+	          hppa*64*)
+	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	          ia64*)
+	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	          *)
+	            _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	            ;;
+	        esac
+	      fi
+	    else
+	      # FIXME: insert proper C++ library support
+	      _LT_TAGVAR(ld_shlibs, $1)=no
+	    fi
+	    ;;
+        esac
+        ;;
+
+      interix[[3-9]]*)
+	_LT_TAGVAR(hardcode_direct, $1)=no
+	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+	# Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+	# Instead, shared libraries are loaded at an image base (0x10000000 by
+	# default) and relocated if they conflict, which is a slow very memory
+	# consuming and fragmenting process.  To avoid this, we pick a random,
+	# 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+	# time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+	;;
+      irix5* | irix6*)
+        case $cc_basename in
+          CC*)
+	    # SGI C++
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+	    # necessary to make sure instantiated templates are included
+	    # in the archive.
+	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
+	    ;;
+          *)
+	    if test "$GXX" = yes; then
+	      if test "$with_gnu_ld" = no; then
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	      else
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
+	      fi
+	    fi
+	    _LT_TAGVAR(link_all_deplibs, $1)=yes
+	    ;;
+        esac
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+        _LT_TAGVAR(inherit_rpath, $1)=yes
+        ;;
+
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+        case $cc_basename in
+          KCC*)
+	    # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	    # KCC will only create a shared library if the output file
+	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
+	    # to its proper name (with version) after linking.
+	    _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+	    ;;
+	  icpc* | ecpc* )
+	    # Intel C++
+	    with_gnu_ld=yes
+	    # version 8.0 and above of icpc choke on multiply defined symbols
+	    # if we add $predep_objects and $postdep_objects, however 7.1 and
+	    # earlier do not add the objects themselves.
+	    case `$CC -V 2>&1` in
+	      *"Version 7."*)
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+		_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+		;;
+	      *)  # Version 8.0 or newer
+	        tmp_idyn=
+	        case $host_cpu in
+		  ia64*) tmp_idyn=' -i_dynamic';;
+		esac
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+		_LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+		;;
+	    esac
+	    _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	    ;;
+          pgCC* | pgcpp*)
+            # Portland Group C++ compiler
+	    case `$CC -V` in
+	    *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
+	      _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
+		compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
+	      _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
+		$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
+		$RANLIB $oldlib'
+	      _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+	      _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~
+		rm -rf $tpldir~
+		$CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+		$CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+	      ;;
+	    *) # Version 6 and above use weak symbols
+	      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+	      _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+	      ;;
+	    esac
+
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	    _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+            ;;
+	  cxx*)
+	    # Compaq C++
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+	    runpath_var=LD_RUN_PATH
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
+	    ;;
+	  xl* | mpixl* | bgxl*)
+	    # IBM XL 8.0 on PPC, with GNU ld
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	    if test "x$supports_anon_versioning" = xyes; then
+	      _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+		cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+		echo "local: *; };" >> $output_objdir/$libname.ver~
+		$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	    fi
+	    ;;
+	  *)
+	    case `$CC -V 2>&1 | sed 5q` in
+	    *Sun\ C*)
+	      # Sun C++ 5.9
+	      _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+	      _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
+	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	      _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+	      _LT_TAGVAR(compiler_needs_object, $1)=yes
+
+	      # Not sure whether something based on
+	      # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
+	      # would be better.
+	      output_verbose_link_cmd='func_echo_all'
+
+	      # Archives containing C++ object files must be created using
+	      # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	      # necessary to make sure instantiated templates are included
+	      # in the archive.
+	      _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+	      ;;
+	    esac
+	    ;;
+	esac
+	;;
+
+      lynxos*)
+        # FIXME: insert proper C++ library support
+	_LT_TAGVAR(ld_shlibs, $1)=no
+	;;
+
+      m88k*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+	;;
+
+      mvs*)
+        case $cc_basename in
+          cxx*)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+	  *)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+	esac
+	;;
+
+      netbsd*)
+        if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+	  _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+	  wlarc=
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	  _LT_TAGVAR(hardcode_direct, $1)=yes
+	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	fi
+	# Workaround some broken pre-1.5 toolchains
+	output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+	;;
+
+      *nto* | *qnx*)
+        _LT_TAGVAR(ld_shlibs, $1)=yes
+	;;
+
+      openbsd2*)
+        # C++ shared libraries are fairly broken
+	_LT_TAGVAR(ld_shlibs, $1)=no
+	;;
+
+      openbsd*)
+	if test -f /usr/libexec/ld.so; then
+	  _LT_TAGVAR(hardcode_direct, $1)=yes
+	  _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	  _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+	  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+	    _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+	  fi
+	  output_verbose_link_cmd=func_echo_all
+	else
+	  _LT_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+
+      osf3* | osf4* | osf5*)
+        case $cc_basename in
+          KCC*)
+	    # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	    # KCC will only create a shared library if the output file
+	    # ends with ".so" (or ".sl" for HP-UX), so rename the library
+	    # to its proper name (with version) after linking.
+	    _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	    # Archives containing C++ object files must be created using
+	    # the KAI C++ compiler.
+	    case $host in
+	      osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;;
+	      *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;;
+	    esac
+	    ;;
+          RCC*)
+	    # Rational C++ 2.4.1
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          cxx*)
+	    case $host in
+	      osf3*)
+	        _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+		;;
+	      *)
+	        _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+	        _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+	          echo "-hidden">> $lib.exp~
+	          $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp  `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
+	          $RM $lib.exp'
+	        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+		;;
+	    esac
+
+	    _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    #
+	    # There doesn't appear to be a way to prevent this compiler from
+	    # explicitly linking system object files so we need to strip them
+	    # from the output so that they don't get included in the library
+	    # dependencies.
+	    output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+	    ;;
+	  *)
+	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	      _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	      case $host in
+	        osf3*)
+	          _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+		  ;;
+	        *)
+	          _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+		  ;;
+	      esac
+
+	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	      # Commands to make compiler produce verbose output that lists
+	      # what "hidden" libraries, object files and flags are used when
+	      # linking a shared library.
+	      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+	    else
+	      # FIXME: insert proper C++ library support
+	      _LT_TAGVAR(ld_shlibs, $1)=no
+	    fi
+	    ;;
+        esac
+        ;;
+
+      psos*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      sunos4*)
+        case $cc_basename in
+          CC*)
+	    # Sun C++ 4.x
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          lcc*)
+	    # Lucid
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+        esac
+        ;;
+
+      solaris*)
+        case $cc_basename in
+          CC* | sunCC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+            _LT_TAGVAR(archive_cmds_need_lc,$1)=yes
+	    _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+	      $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	    _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	    case $host_os in
+	      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+	      *)
+		# The compiler driver will combine and reorder linker options,
+		# but understands `-z linker_flag'.
+	        # Supported since Solaris 2.6 (maybe 2.5.1?)
+		_LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
+	        ;;
+	    esac
+	    _LT_TAGVAR(link_all_deplibs, $1)=yes
+
+	    output_verbose_link_cmd='func_echo_all'
+
+	    # Archives containing C++ object files must be created using
+	    # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	    # necessary to make sure instantiated templates are included
+	    # in the archive.
+	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+	    ;;
+          gcx*)
+	    # Green Hills C++ Compiler
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+	    # The C++ compiler must be used to create the archive.
+	    _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+	    ;;
+          *)
+	    # GNU C++ compiler with Solaris linker
+	    if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	      _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
+	      if $CC --version | $GREP -v '^2\.7' > /dev/null; then
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	        _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+		  $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	        # Commands to make compiler produce verbose output that lists
+	        # what "hidden" libraries, object files and flags are used when
+	        # linking a shared library.
+	        output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+	      else
+	        # g++ 2.7 appears to require `-G' NOT `-shared' on this
+	        # platform.
+	        _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	        _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+		  $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+	        # Commands to make compiler produce verbose output that lists
+	        # what "hidden" libraries, object files and flags are used when
+	        # linking a shared library.
+	        output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+	      fi
+
+	      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
+	      case $host_os in
+		solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+		*)
+		  _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+		  ;;
+	      esac
+	    fi
+	    ;;
+        esac
+        ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      case $cc_basename in
+        CC*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+      esac
+      ;;
+
+      sysv5* | sco3.2v5* | sco5v6*)
+	# Note: We can NOT use -z defs as we might desire, because we do not
+	# link with -lc, and that would cause any symbols used from libc to
+	# always be unresolved, which means just about no library would
+	# ever link correctly.  If we're not using GNU ld we use -z text
+	# though, which does catch some bad symbols but isn't as heavy-handed
+	# as -z defs.
+	_LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+	_LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+	_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+	_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+	_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
+	_LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+	_LT_TAGVAR(link_all_deplibs, $1)=yes
+	_LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+	runpath_var='LD_RUN_PATH'
+
+	case $cc_basename in
+          CC*)
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~
+	      '"$_LT_TAGVAR(old_archive_cmds, $1)"
+	    _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~
+	      '"$_LT_TAGVAR(reload_cmds, $1)"
+	    ;;
+	  *)
+	    _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	    ;;
+	esac
+      ;;
+
+      tandem*)
+        case $cc_basename in
+          NCC*)
+	    # NonStop-UX NCC 3.20
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+          *)
+	    # FIXME: insert proper C++ library support
+	    _LT_TAGVAR(ld_shlibs, $1)=no
+	    ;;
+        esac
+        ;;
+
+      vxworks*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      *)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+    esac
+
+    AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
+    test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+    _LT_TAGVAR(GCC, $1)="$GXX"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_SYS_HIDDEN_LIBDEPS($1)
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+  LDCXX=$LD
+  LD=$lt_save_LD
+  GCC=$lt_save_GCC
+  with_gnu_ld=$lt_save_with_gnu_ld
+  lt_cv_path_LDCXX=$lt_cv_path_LD
+  lt_cv_path_LD=$lt_save_path_LD
+  lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+  lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+fi # test "$_lt_caught_CXX_error" != yes
+
+AC_LANG_POP
+])# _LT_LANG_CXX_CONFIG
+
+
+# _LT_FUNC_STRIPNAME_CNF
+# ----------------------
+# func_stripname_cnf prefix suffix name
+# strip PREFIX and SUFFIX off of NAME.
+# PREFIX and SUFFIX must not contain globbing or regex special
+# characters, hashes, percent signs, but SUFFIX may contain a leading
+# dot (in which case that matches only a dot).
+#
+# This function is identical to the (non-XSI) version of func_stripname,
+# except this one can be used by m4 code that may be executed by configure,
+# rather than the libtool script.
+m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl
+AC_REQUIRE([_LT_DECL_SED])
+AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])
+func_stripname_cnf ()
+{
+  case ${2} in
+  .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+  *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+  esac
+} # func_stripname_cnf
+])# _LT_FUNC_STRIPNAME_CNF
+
+# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME])
+# ---------------------------------
+# Figure out "hidden" library dependencies from verbose
+# compiler output when linking a shared library.
+# Parse the compiler output and extract the necessary
+# objects, libraries and library flags.
+m4_defun([_LT_SYS_HIDDEN_LIBDEPS],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl
+# Dependencies to place before and after the object being linked:
+_LT_TAGVAR(predep_objects, $1)=
+_LT_TAGVAR(postdep_objects, $1)=
+_LT_TAGVAR(predeps, $1)=
+_LT_TAGVAR(postdeps, $1)=
+_LT_TAGVAR(compiler_lib_search_path, $1)=
+
+dnl we can't use the lt_simple_compile_test_code here,
+dnl because it contains code intended for an executable,
+dnl not a library.  It's possible we should let each
+dnl tag define a new lt_????_link_test_code variable,
+dnl but it's only used here...
+m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF
+int a;
+void foo (void) { a = 0; }
+_LT_EOF
+], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+_LT_EOF
+], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF
+      subroutine foo
+      implicit none
+      integer*4 a
+      a=0
+      return
+      end
+_LT_EOF
+], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF
+      subroutine foo
+      implicit none
+      integer a
+      a=0
+      return
+      end
+_LT_EOF
+], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF
+public class foo {
+  private int a;
+  public void bar (void) {
+    a = 0;
+  }
+};
+_LT_EOF
+], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
+package foo
+func foo() {
+}
+_LT_EOF
+])
+
+_lt_libdeps_save_CFLAGS=$CFLAGS
+case "$CC $CFLAGS " in #(
+*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
+*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
+esac
+
+dnl Parse the compiler output and extract the necessary
+dnl objects, libraries and library flags.
+if AC_TRY_EVAL(ac_compile); then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  for p in `eval "$output_verbose_link_cmd"`; do
+    case ${prev}${p} in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" ||
+          test $p = "-R"; then
+	 prev=$p
+	 continue
+       fi
+
+       # Expand the sysroot to ease extracting the directories later.
+       if test -z "$prev"; then
+         case $p in
+         -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
+         -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
+         -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
+         esac
+       fi
+       case $p in
+       =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
+       esac
+       if test "$pre_test_object_deps_done" = no; then
+	 case ${prev} in
+	 -L | -R)
+	   # Internal compiler library paths should come after those
+	   # provided the user.  The postdeps already come after the
+	   # user supplied libs so there is no need to process them.
+	   if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then
+	     _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
+	   else
+	     _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
+	   fi
+	   ;;
+	 # The "-l" case would never come before the object being
+	 # linked, so don't bother handling this case.
+	 esac
+       else
+	 if test -z "$_LT_TAGVAR(postdeps, $1)"; then
+	   _LT_TAGVAR(postdeps, $1)="${prev}${p}"
+	 else
+	   _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}"
+	 fi
+       fi
+       prev=
+       ;;
+
+    *.lto.$objext) ;; # Ignore GCC LTO objects
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+	 pre_test_object_deps_done=yes
+	 continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 if test -z "$_LT_TAGVAR(predep_objects, $1)"; then
+	   _LT_TAGVAR(predep_objects, $1)="$p"
+	 else
+	   _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p"
+	 fi
+       else
+	 if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then
+	   _LT_TAGVAR(postdep_objects, $1)="$p"
+	 else
+	   _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p"
+	 fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling $1 test program"
+fi
+
+$RM -f confest.$objext
+CFLAGS=$_lt_libdeps_save_CFLAGS
+
+# PORTME: override above test on systems where it is broken
+m4_if([$1], [CXX],
+[case $host_os in
+interix[[3-9]]*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  _LT_TAGVAR(predep_objects,$1)=
+  _LT_TAGVAR(postdep_objects,$1)=
+  _LT_TAGVAR(postdeps,$1)=
+  ;;
+
+linux*)
+  case `$CC -V 2>&1 | sed 5q` in
+  *Sun\ C*)
+    # Sun C++ 5.9
+
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    if test "$solaris_use_stlport4" != yes; then
+      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC* | sunCC*)
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    if test "$solaris_use_stlport4" != yes; then
+      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+esac
+])
+
+case " $_LT_TAGVAR(postdeps, $1) " in
+*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;;
+esac
+ _LT_TAGVAR(compiler_lib_search_dirs, $1)=
+if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then
+ _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
+fi
+_LT_TAGDECL([], [compiler_lib_search_dirs], [1],
+    [The directories searched by this compiler when creating a shared library])
+_LT_TAGDECL([], [predep_objects], [1],
+    [Dependencies to place before and after the objects being linked to
+    create a shared library])
+_LT_TAGDECL([], [postdep_objects], [1])
+_LT_TAGDECL([], [predeps], [1])
+_LT_TAGDECL([], [postdeps], [1])
+_LT_TAGDECL([], [compiler_lib_search_path], [1],
+    [The library search path used internally by the compiler when linking
+    a shared library])
+])# _LT_SYS_HIDDEN_LIBDEPS
+
+
+# _LT_LANG_F77_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for a Fortran 77 compiler are
+# suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_F77_CONFIG],
+[AC_LANG_PUSH(Fortran 77)
+if test -z "$F77" || test "X$F77" = "Xno"; then
+  _lt_disable_F77=yes
+fi
+
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for f77 test sources.
+ac_ext=f
+
+# Object file extension for compiled f77 test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the F77 compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_disable_F77" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code="\
+      program t
+      end
+"
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC="$CC"
+  lt_save_GCC=$GCC
+  lt_save_CFLAGS=$CFLAGS
+  CC=${F77-"f77"}
+  CFLAGS=$FFLAGS
+  compiler=$CC
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+  GCC=$G77
+  if test -n "$compiler"; then
+    AC_MSG_CHECKING([if libtool supports shared libraries])
+    AC_MSG_RESULT([$can_build_shared])
+
+    AC_MSG_CHECKING([whether to build shared libraries])
+    test "$can_build_shared" = "no" && enable_shared=no
+
+    # On AIX, shared libraries and static libraries use the same namespace, and
+    # are all built from PIC.
+    case $host_os in
+      aix3*)
+        test "$enable_shared" = yes && enable_static=no
+        if test -n "$RANLIB"; then
+          archive_cmds="$archive_cmds~\$RANLIB \$lib"
+          postinstall_cmds='$RANLIB $lib'
+        fi
+        ;;
+      aix[[4-9]]*)
+	if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+	  test "$enable_shared" = yes && enable_static=no
+	fi
+        ;;
+    esac
+    AC_MSG_RESULT([$enable_shared])
+
+    AC_MSG_CHECKING([whether to build static libraries])
+    # Make sure either enable_shared or enable_static is yes.
+    test "$enable_shared" = yes || enable_static=yes
+    AC_MSG_RESULT([$enable_static])
+
+    _LT_TAGVAR(GCC, $1)="$G77"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  GCC=$lt_save_GCC
+  CC="$lt_save_CC"
+  CFLAGS="$lt_save_CFLAGS"
+fi # test "$_lt_disable_F77" != yes
+
+AC_LANG_POP
+])# _LT_LANG_F77_CONFIG
+
+
+# _LT_LANG_FC_CONFIG([TAG])
+# -------------------------
+# Ensure that the configuration variables for a Fortran compiler are
+# suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_FC_CONFIG],
+[AC_LANG_PUSH(Fortran)
+
+if test -z "$FC" || test "X$FC" = "Xno"; then
+  _lt_disable_FC=yes
+fi
+
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for fc test sources.
+ac_ext=${ac_fc_srcext-f}
+
+# Object file extension for compiled fc test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the FC compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_disable_FC" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code="\
+      program t
+      end
+"
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC="$CC"
+  lt_save_GCC=$GCC
+  lt_save_CFLAGS=$CFLAGS
+  CC=${FC-"f95"}
+  CFLAGS=$FCFLAGS
+  compiler=$CC
+  GCC=$ac_cv_fc_compiler_gnu
+
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+
+  if test -n "$compiler"; then
+    AC_MSG_CHECKING([if libtool supports shared libraries])
+    AC_MSG_RESULT([$can_build_shared])
+
+    AC_MSG_CHECKING([whether to build shared libraries])
+    test "$can_build_shared" = "no" && enable_shared=no
+
+    # On AIX, shared libraries and static libraries use the same namespace, and
+    # are all built from PIC.
+    case $host_os in
+      aix3*)
+        test "$enable_shared" = yes && enable_static=no
+        if test -n "$RANLIB"; then
+          archive_cmds="$archive_cmds~\$RANLIB \$lib"
+          postinstall_cmds='$RANLIB $lib'
+        fi
+        ;;
+      aix[[4-9]]*)
+	if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+	  test "$enable_shared" = yes && enable_static=no
+	fi
+        ;;
+    esac
+    AC_MSG_RESULT([$enable_shared])
+
+    AC_MSG_CHECKING([whether to build static libraries])
+    # Make sure either enable_shared or enable_static is yes.
+    test "$enable_shared" = yes || enable_static=yes
+    AC_MSG_RESULT([$enable_static])
+
+    _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_SYS_HIDDEN_LIBDEPS($1)
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  GCC=$lt_save_GCC
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+fi # test "$_lt_disable_FC" != yes
+
+AC_LANG_POP
+])# _LT_LANG_FC_CONFIG
+
+
+# _LT_LANG_GCJ_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Java Compiler compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GCJ_CONFIG],
+[AC_REQUIRE([LT_PROG_GCJ])dnl
+AC_LANG_SAVE
+
+# Source file extension for Java test sources.
+ac_ext=java
+
+# Object file extension for compiled Java test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="class foo {}"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GCJ-"gcj"}
+CFLAGS=$GCJFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# GCJ did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+
+  _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GCJ_CONFIG
+
+
+# _LT_LANG_GO_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Go compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GO_CONFIG],
+[AC_REQUIRE([LT_PROG_GO])dnl
+AC_LANG_SAVE
+
+# Source file extension for Go test sources.
+ac_ext=go
+
+# Object file extension for compiled Go test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="package main; func main() { }"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='package main; func main() { }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GOC-"gccgo"}
+CFLAGS=$GOFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# Go did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+
+  _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GO_CONFIG
+
+
+# _LT_LANG_RC_CONFIG([TAG])
+# -------------------------
+# Ensure that the configuration variables for the Windows resource compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_RC_CONFIG],
+[AC_REQUIRE([LT_PROG_RC])dnl
+AC_LANG_SAVE
+
+# Source file extension for RC test sources.
+ac_ext=rc
+
+# Object file extension for compiled RC test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="$lt_simple_compile_test_code"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=
+CC=${RC-"windres"}
+CFLAGS=
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+
+if test -n "$compiler"; then
+  :
+  _LT_CONFIG($1)
+fi
+
+GCC=$lt_save_GCC
+AC_LANG_RESTORE
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_RC_CONFIG
+
+
+# LT_PROG_GCJ
+# -----------
+AC_DEFUN([LT_PROG_GCJ],
+[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ],
+  [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ],
+    [AC_CHECK_TOOL(GCJ, gcj,)
+      test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
+      AC_SUBST(GCJFLAGS)])])[]dnl
+])
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
+
+
+# LT_PROG_GO
+# ----------
+AC_DEFUN([LT_PROG_GO],
+[AC_CHECK_TOOL(GOC, gccgo,)
+])
+
+
+# LT_PROG_RC
+# ----------
+AC_DEFUN([LT_PROG_RC],
+[AC_CHECK_TOOL(RC, windres,)
+])
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_RC], [])
+
+
+# _LT_DECL_EGREP
+# --------------
+# If we don't have a new enough Autoconf to choose the best grep
+# available, choose the one first in the user's PATH.
+m4_defun([_LT_DECL_EGREP],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_REQUIRE([AC_PROG_FGREP])dnl
+test -z "$GREP" && GREP=grep
+_LT_DECL([], [GREP], [1], [A grep program that handles long lines])
+_LT_DECL([], [EGREP], [1], [An ERE matcher])
+_LT_DECL([], [FGREP], [1], [A literal string matcher])
+dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too
+AC_SUBST([GREP])
+])
+
+
+# _LT_DECL_OBJDUMP
+# --------------
+# If we don't have a new enough Autoconf to choose the best objdump
+# available, choose the one first in the user's PATH.
+m4_defun([_LT_DECL_OBJDUMP],
+[AC_CHECK_TOOL(OBJDUMP, objdump, false)
+test -z "$OBJDUMP" && OBJDUMP=objdump
+_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper])
+AC_SUBST([OBJDUMP])
+])
+
+# _LT_DECL_DLLTOOL
+# ----------------
+# Ensure DLLTOOL variable is set.
+m4_defun([_LT_DECL_DLLTOOL],
+[AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+_LT_DECL([], [DLLTOOL], [1], [DLL creation program])
+AC_SUBST([DLLTOOL])
+])
+
+# _LT_DECL_SED
+# ------------
+# Check for a fully-functional sed program, that truncates
+# as few characters as possible.  Prefer GNU sed if found.
+m4_defun([_LT_DECL_SED],
+[AC_PROG_SED
+test -z "$SED" && SED=sed
+Xsed="$SED -e 1s/^X//"
+_LT_DECL([], [SED], [1], [A sed program that does not truncate output])
+_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"],
+    [Sed that helps us avoid accidentally triggering echo(1) options like -n])
+])# _LT_DECL_SED
+
+m4_ifndef([AC_PROG_SED], [
+############################################################
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+############################################################
+
+m4_defun([AC_PROG_SED],
+[AC_MSG_CHECKING([for a sed that does not truncate output])
+AC_CACHE_VAL(lt_cv_path_SED,
+[# Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for lt_ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
+        lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+IFS=$as_save_IFS
+lt_ac_max=0
+lt_ac_count=0
+# Add /usr/xpg4/bin/sed as it is typically found on Solaris
+# along with /bin/sed that truncates output.
+for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
+  test ! -f $lt_ac_sed && continue
+  cat /dev/null > conftest.in
+  lt_ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >conftest.in
+  # Check for GNU sed and select it if it is found.
+  if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
+    lt_cv_path_SED=$lt_ac_sed
+    break
+  fi
+  while true; do
+    cat conftest.in conftest.in >conftest.tmp
+    mv conftest.tmp conftest.in
+    cp conftest.in conftest.nl
+    echo >>conftest.nl
+    $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
+    cmp -s conftest.out conftest.nl || break
+    # 10000 chars as input seems more than enough
+    test $lt_ac_count -gt 10 && break
+    lt_ac_count=`expr $lt_ac_count + 1`
+    if test $lt_ac_count -gt $lt_ac_max; then
+      lt_ac_max=$lt_ac_count
+      lt_cv_path_SED=$lt_ac_sed
+    fi
+  done
+done
+])
+SED=$lt_cv_path_SED
+AC_SUBST([SED])
+AC_MSG_RESULT([$SED])
+])#AC_PROG_SED
+])#m4_ifndef
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_SED], [])
+
+
+# _LT_CHECK_SHELL_FEATURES
+# ------------------------
+# Find out whether the shell is Bourne or XSI compatible,
+# or has some other useful features.
+m4_defun([_LT_CHECK_SHELL_FEATURES],
+[AC_MSG_CHECKING([whether the shell understands some XSI constructs])
+# Try some XSI features
+xsi_shell=no
+( _lt_dummy="a/b/c"
+  test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
+      = c,a/b,b/c, \
+    && eval 'test $(( 1 + 1 )) -eq 2 \
+    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
+  && xsi_shell=yes
+AC_MSG_RESULT([$xsi_shell])
+_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell'])
+
+AC_MSG_CHECKING([whether the shell understands "+="])
+lt_shell_append=no
+( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \
+    >/dev/null 2>&1 \
+  && lt_shell_append=yes
+AC_MSG_RESULT([$lt_shell_append])
+_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append'])
+
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  lt_unset=unset
+else
+  lt_unset=false
+fi
+_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl
+
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  lt_SP2NL='tr \040 \012'
+  lt_NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  lt_SP2NL='tr \100 \n'
+  lt_NL2SP='tr \r\n \100\100'
+  ;;
+esac
+_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl
+_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl
+])# _LT_CHECK_SHELL_FEATURES
+
+
+# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY)
+# ------------------------------------------------------
+# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and
+# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY.
+m4_defun([_LT_PROG_FUNCTION_REPLACE],
+[dnl {
+sed -e '/^$1 ()$/,/^} # $1 /c\
+$1 ()\
+{\
+m4_bpatsubsts([$2], [$], [\\], [^\([	 ]\)], [\\\1])
+} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+])
+
+
+# _LT_PROG_REPLACE_SHELLFNS
+# -------------------------
+# Replace existing portable implementations of several shell functions with
+# equivalent extended shell implementations where those features are available..
+m4_defun([_LT_PROG_REPLACE_SHELLFNS],
+[if test x"$xsi_shell" = xyes; then
+  _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl
+    case ${1} in
+      */*) func_dirname_result="${1%/*}${2}" ;;
+      *  ) func_dirname_result="${3}" ;;
+    esac])
+
+  _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl
+    func_basename_result="${1##*/}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl
+    case ${1} in
+      */*) func_dirname_result="${1%/*}${2}" ;;
+      *  ) func_dirname_result="${3}" ;;
+    esac
+    func_basename_result="${1##*/}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl
+    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are
+    # positional parameters, so assign one to ordinary parameter first.
+    func_stripname_result=${3}
+    func_stripname_result=${func_stripname_result#"${1}"}
+    func_stripname_result=${func_stripname_result%"${2}"}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl
+    func_split_long_opt_name=${1%%=*}
+    func_split_long_opt_arg=${1#*=}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl
+    func_split_short_opt_arg=${1#??}
+    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl
+    case ${1} in
+      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;
+      *)    func_lo2o_result=${1} ;;
+    esac])
+
+  _LT_PROG_FUNCTION_REPLACE([func_xform], [    func_xform_result=${1%.*}.lo])
+
+  _LT_PROG_FUNCTION_REPLACE([func_arith], [    func_arith_result=$(( $[*] ))])
+
+  _LT_PROG_FUNCTION_REPLACE([func_len], [    func_len_result=${#1}])
+fi
+
+if test x"$lt_shell_append" = xyes; then
+  _LT_PROG_FUNCTION_REPLACE([func_append], [    eval "${1}+=\\${2}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl
+    func_quote_for_eval "${2}"
+dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \
+    eval "${1}+=\\\\ \\$func_quote_for_eval_result"])
+
+  # Save a `func_append' function call where possible by direct use of '+='
+  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+else
+  # Save a `func_append' function call even when '+=' is not available
+  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+fi
+
+if test x"$_lt_function_replace_fail" = x":"; then
+  AC_MSG_WARN([Unable to substitute extended shell functions in $ofile])
+fi
+])
+
+# _LT_PATH_CONVERSION_FUNCTIONS
+# -----------------------------
+# Determine which file name conversion functions should be used by
+# func_to_host_file (and, implicitly, by func_to_host_path).  These are needed
+# for certain cross-compile configurations and native mingw.
+m4_defun([_LT_PATH_CONVERSION_FUNCTIONS],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_MSG_CHECKING([how to convert $build file names to $host format])
+AC_CACHE_VAL(lt_cv_to_host_file_cmd,
+[case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
+        ;;
+    esac
+    ;;
+  *-*-cygwin* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_noop
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
+        ;;
+    esac
+    ;;
+  * ) # unhandled hosts (and "normal" native builds)
+    lt_cv_to_host_file_cmd=func_convert_file_noop
+    ;;
+esac
+])
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+AC_MSG_RESULT([$lt_cv_to_host_file_cmd])
+_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd],
+         [0], [convert $build file names to $host format])dnl
+
+AC_MSG_CHECKING([how to convert $build file names to toolchain format])
+AC_CACHE_VAL(lt_cv_to_tool_file_cmd,
+[#assume ordinary cross tools, or native build.
+lt_cv_to_tool_file_cmd=func_convert_file_noop
+case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
+        ;;
+    esac
+    ;;
+esac
+])
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+AC_MSG_RESULT([$lt_cv_to_tool_file_cmd])
+_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd],
+         [0], [convert $build files to toolchain format])dnl
+])# _LT_PATH_CONVERSION_FUNCTIONS
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ltoptions.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ltoptions.m4
new file mode 100644
index 0000000000..5d9acd8e23
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ltoptions.m4
@@ -0,0 +1,384 @@
+# Helper functions for option handling.                    -*- Autoconf -*-
+#
+#   Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
+#   Inc.
+#   Written by Gary V. Vaughan, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 7 ltoptions.m4
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
+
+
+# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
+# ------------------------------------------
+m4_define([_LT_MANGLE_OPTION],
+[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
+
+
+# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
+# ---------------------------------------
+# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
+# matching handler defined, dispatch to it.  Other OPTION-NAMEs are
+# saved as a flag.
+m4_define([_LT_SET_OPTION],
+[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
+m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
+        _LT_MANGLE_DEFUN([$1], [$2]),
+    [m4_warning([Unknown $1 option `$2'])])[]dnl
+])
+
+
+# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
+# ------------------------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+m4_define([_LT_IF_OPTION],
+[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
+
+
+# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
+# -------------------------------------------------------
+# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
+# are set.
+m4_define([_LT_UNLESS_OPTIONS],
+[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
+	    [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
+		      [m4_define([$0_found])])])[]dnl
+m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
+])[]dnl
+])
+
+
+# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
+# ----------------------------------------
+# OPTION-LIST is a space-separated list of Libtool options associated
+# with MACRO-NAME.  If any OPTION has a matching handler declared with
+# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
+# the unknown option and exit.
+m4_defun([_LT_SET_OPTIONS],
+[# Set options
+m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
+    [_LT_SET_OPTION([$1], _LT_Option)])
+
+m4_if([$1],[LT_INIT],[
+  dnl
+  dnl Simply set some default values (i.e off) if boolean options were not
+  dnl specified:
+  _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
+  ])
+  _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
+  ])
+  dnl
+  dnl If no reference was made to various pairs of opposing options, then
+  dnl we run the default mode handler for the pair.  For example, if neither
+  dnl `shared' nor `disable-shared' was passed, we enable building of shared
+  dnl archives by default:
+  _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
+  _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
+  _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
+  _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
+  		   [_LT_ENABLE_FAST_INSTALL])
+  ])
+])# _LT_SET_OPTIONS
+
+
+## --------------------------------- ##
+## Macros to handle LT_INIT options. ##
+## --------------------------------- ##
+
+# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
+# -----------------------------------------
+m4_define([_LT_MANGLE_DEFUN],
+[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
+
+
+# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
+# -----------------------------------------------
+m4_define([LT_OPTION_DEFINE],
+[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
+])# LT_OPTION_DEFINE
+
+
+# dlopen
+# ------
+LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
+])
+
+AU_DEFUN([AC_LIBTOOL_DLOPEN],
+[_LT_SET_OPTION([LT_INIT], [dlopen])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `dlopen' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
+
+
+# win32-dll
+# ---------
+# Declare package support for building win32 dll's.
+LT_OPTION_DEFINE([LT_INIT], [win32-dll],
+[enable_win32_dll=yes
+
+case $host in
+*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
+  AC_CHECK_TOOL(AS, as, false)
+  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+  AC_CHECK_TOOL(OBJDUMP, objdump, false)
+  ;;
+esac
+
+test -z "$AS" && AS=as
+_LT_DECL([], [AS],      [1], [Assembler program])dnl
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
+])# win32-dll
+
+AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+_LT_SET_OPTION([LT_INIT], [win32-dll])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `win32-dll' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
+
+
+# _LT_ENABLE_SHARED([DEFAULT])
+# ----------------------------
+# implement the --enable-shared flag, and supports the `shared' and
+# `disable-shared' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_SHARED],
+[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([shared],
+    [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
+	[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_shared=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
+
+    _LT_DECL([build_libtool_libs], [enable_shared], [0],
+	[Whether or not to build shared libraries])
+])# _LT_ENABLE_SHARED
+
+LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
+
+# Old names:
+AC_DEFUN([AC_ENABLE_SHARED],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
+])
+
+AC_DEFUN([AC_DISABLE_SHARED],
+[_LT_SET_OPTION([LT_INIT], [disable-shared])
+])
+
+AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
+AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_ENABLE_SHARED], [])
+dnl AC_DEFUN([AM_DISABLE_SHARED], [])
+
+
+
+# _LT_ENABLE_STATIC([DEFAULT])
+# ----------------------------
+# implement the --enable-static flag, and support the `static' and
+# `disable-static' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_STATIC],
+[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([static],
+    [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
+	[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_static=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_static=]_LT_ENABLE_STATIC_DEFAULT)
+
+    _LT_DECL([build_old_libs], [enable_static], [0],
+	[Whether or not to build static libraries])
+])# _LT_ENABLE_STATIC
+
+LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
+
+# Old names:
+AC_DEFUN([AC_ENABLE_STATIC],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
+])
+
+AC_DEFUN([AC_DISABLE_STATIC],
+[_LT_SET_OPTION([LT_INIT], [disable-static])
+])
+
+AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
+AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_ENABLE_STATIC], [])
+dnl AC_DEFUN([AM_DISABLE_STATIC], [])
+
+
+
+# _LT_ENABLE_FAST_INSTALL([DEFAULT])
+# ----------------------------------
+# implement the --enable-fast-install flag, and support the `fast-install'
+# and `disable-fast-install' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_FAST_INSTALL],
+[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([fast-install],
+    [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
+    [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_fast_install=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
+
+_LT_DECL([fast_install], [enable_fast_install], [0],
+	 [Whether or not to optimize for fast installation])dnl
+])# _LT_ENABLE_FAST_INSTALL
+
+LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
+
+# Old names:
+AU_DEFUN([AC_ENABLE_FAST_INSTALL],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you put
+the `fast-install' option into LT_INIT's first parameter.])
+])
+
+AU_DEFUN([AC_DISABLE_FAST_INSTALL],
+[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you put
+the `disable-fast-install' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
+dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
+
+
+# _LT_WITH_PIC([MODE])
+# --------------------
+# implement the --with-pic flag, and support the `pic-only' and `no-pic'
+# LT_INIT options.
+# MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
+m4_define([_LT_WITH_PIC],
+[AC_ARG_WITH([pic],
+    [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
+	[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
+    [lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+	IFS="$lt_save_ifs"
+	if test "X$lt_pkg" = "X$lt_p"; then
+	  pic_mode=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [pic_mode=default])
+
+test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
+
+_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
+])# _LT_WITH_PIC
+
+LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
+LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
+
+# Old name:
+AU_DEFUN([AC_LIBTOOL_PICMODE],
+[_LT_SET_OPTION([LT_INIT], [pic-only])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `pic-only' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
+
+## ----------------- ##
+## LTDL_INIT Options ##
+## ----------------- ##
+
+m4_define([_LTDL_MODE], [])
+LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
+		 [m4_define([_LTDL_MODE], [nonrecursive])])
+LT_OPTION_DEFINE([LTDL_INIT], [recursive],
+		 [m4_define([_LTDL_MODE], [recursive])])
+LT_OPTION_DEFINE([LTDL_INIT], [subproject],
+		 [m4_define([_LTDL_MODE], [subproject])])
+
+m4_define([_LTDL_TYPE], [])
+LT_OPTION_DEFINE([LTDL_INIT], [installable],
+		 [m4_define([_LTDL_TYPE], [installable])])
+LT_OPTION_DEFINE([LTDL_INIT], [convenience],
+		 [m4_define([_LTDL_TYPE], [convenience])])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ltsugar.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ltsugar.m4
new file mode 100644
index 0000000000..9000a057d3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ltsugar.m4
@@ -0,0 +1,123 @@
+# ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-
+#
+# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+# Written by Gary V. Vaughan, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 6 ltsugar.m4
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
+
+
+# lt_join(SEP, ARG1, [ARG2...])
+# -----------------------------
+# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
+# associated separator.
+# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
+# versions in m4sugar had bugs.
+m4_define([lt_join],
+[m4_if([$#], [1], [],
+       [$#], [2], [[$2]],
+       [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
+m4_define([_lt_join],
+[m4_if([$#$2], [2], [],
+       [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
+
+
+# lt_car(LIST)
+# lt_cdr(LIST)
+# ------------
+# Manipulate m4 lists.
+# These macros are necessary as long as will still need to support
+# Autoconf-2.59 which quotes differently.
+m4_define([lt_car], [[$1]])
+m4_define([lt_cdr],
+[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
+       [$#], 1, [],
+       [m4_dquote(m4_shift($@))])])
+m4_define([lt_unquote], $1)
+
+
+# lt_append(MACRO-NAME, STRING, [SEPARATOR])
+# ------------------------------------------
+# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
+# Note that neither SEPARATOR nor STRING are expanded; they are appended
+# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
+# No SEPARATOR is output if MACRO-NAME was previously undefined (different
+# than defined and empty).
+#
+# This macro is needed until we can rely on Autoconf 2.62, since earlier
+# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
+m4_define([lt_append],
+[m4_define([$1],
+	   m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
+
+
+
+# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
+# ----------------------------------------------------------
+# Produce a SEP delimited list of all paired combinations of elements of
+# PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list
+# has the form PREFIXmINFIXSUFFIXn.
+# Needed until we can rely on m4_combine added in Autoconf 2.62.
+m4_define([lt_combine],
+[m4_if(m4_eval([$# > 3]), [1],
+       [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
+[[m4_foreach([_Lt_prefix], [$2],
+	     [m4_foreach([_Lt_suffix],
+		]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
+	[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
+
+
+# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
+# -----------------------------------------------------------------------
+# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
+# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
+m4_define([lt_if_append_uniq],
+[m4_ifdef([$1],
+	  [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
+		 [lt_append([$1], [$2], [$3])$4],
+		 [$5])],
+	  [lt_append([$1], [$2], [$3])$4])])
+
+
+# lt_dict_add(DICT, KEY, VALUE)
+# -----------------------------
+m4_define([lt_dict_add],
+[m4_define([$1($2)], [$3])])
+
+
+# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
+# --------------------------------------------
+m4_define([lt_dict_add_subkey],
+[m4_define([$1($2:$3)], [$4])])
+
+
+# lt_dict_fetch(DICT, KEY, [SUBKEY])
+# ----------------------------------
+m4_define([lt_dict_fetch],
+[m4_ifval([$3],
+	m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
+    m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
+
+
+# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
+# -----------------------------------------------------------------
+m4_define([lt_if_dict_fetch],
+[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
+	[$5],
+    [$6])])
+
+
+# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
+# --------------------------------------------------------------
+m4_define([lt_dict_filter],
+[m4_if([$5], [], [],
+  [lt_join(m4_quote(m4_default([$4], [[, ]])),
+           lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
+		      [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
+])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/ltversion.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/ltversion.m4
new file mode 100644
index 0000000000..07a8602d48
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/ltversion.m4
@@ -0,0 +1,23 @@
+# ltversion.m4 -- version numbers			-*- Autoconf -*-
+#
+#   Copyright (C) 2004 Free Software Foundation, Inc.
+#   Written by Scott James Remnant, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# @configure_input@
+
+# serial 3337 ltversion.m4
+# This file is part of GNU Libtool
+
+m4_define([LT_PACKAGE_VERSION], [2.4.2])
+m4_define([LT_PACKAGE_REVISION], [1.3337])
+
+AC_DEFUN([LTVERSION_VERSION],
+[macro_version='2.4.2'
+macro_revision='1.3337'
+_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
+_LT_DECL(, macro_revision, 0)
+])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/m4/lt~obsolete.m4 b/third-party/libjxl/libjxl/third_party/lcms/m4/lt~obsolete.m4
new file mode 100644
index 0000000000..c573da90c5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/m4/lt~obsolete.m4
@@ -0,0 +1,98 @@
+# lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-
+#
+#   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+#   Written by Scott James Remnant, 2004.
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 5 lt~obsolete.m4
+
+# These exist entirely to fool aclocal when bootstrapping libtool.
+#
+# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
+# which have later been changed to m4_define as they aren't part of the
+# exported API, or moved to Autoconf or Automake where they belong.
+#
+# The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN
+# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
+# using a macro with the same name in our local m4/libtool.m4 it'll
+# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
+# and doesn't know about Autoconf macros at all.)
+#
+# So we provide this file, which has a silly filename so it's always
+# included after everything else.  This provides aclocal with the
+# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
+# because those macros already exist, or will be overwritten later.
+# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 
+#
+# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
+# Yes, that means every name once taken will need to remain here until
+# we give up compatibility with versions before 1.7, at which point
+# we need to keep only those names which we still refer to.
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
+
+m4_ifndef([AC_LIBTOOL_LINKER_OPTION],	[AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
+m4_ifndef([AC_PROG_EGREP],		[AC_DEFUN([AC_PROG_EGREP])])
+m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
+m4_ifndef([_LT_AC_SHELL_INIT],		[AC_DEFUN([_LT_AC_SHELL_INIT])])
+m4_ifndef([_LT_AC_SYS_LIBPATH_AIX],	[AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
+m4_ifndef([_LT_PROG_LTMAIN],		[AC_DEFUN([_LT_PROG_LTMAIN])])
+m4_ifndef([_LT_AC_TAGVAR],		[AC_DEFUN([_LT_AC_TAGVAR])])
+m4_ifndef([AC_LTDL_ENABLE_INSTALL],	[AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
+m4_ifndef([AC_LTDL_PREOPEN],		[AC_DEFUN([AC_LTDL_PREOPEN])])
+m4_ifndef([_LT_AC_SYS_COMPILER],	[AC_DEFUN([_LT_AC_SYS_COMPILER])])
+m4_ifndef([_LT_AC_LOCK],		[AC_DEFUN([_LT_AC_LOCK])])
+m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],	[AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
+m4_ifndef([_LT_AC_TRY_DLOPEN_SELF],	[AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
+m4_ifndef([AC_LIBTOOL_PROG_CC_C_O],	[AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
+m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
+m4_ifndef([AC_LIBTOOL_OBJDIR],		[AC_DEFUN([AC_LIBTOOL_OBJDIR])])
+m4_ifndef([AC_LTDL_OBJDIR],		[AC_DEFUN([AC_LTDL_OBJDIR])])
+m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
+m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],	[AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
+m4_ifndef([AC_PATH_MAGIC],		[AC_DEFUN([AC_PATH_MAGIC])])
+m4_ifndef([AC_PROG_LD_GNU],		[AC_DEFUN([AC_PROG_LD_GNU])])
+m4_ifndef([AC_PROG_LD_RELOAD_FLAG],	[AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
+m4_ifndef([AC_DEPLIBS_CHECK_METHOD],	[AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
+m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
+m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
+m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
+m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS],	[AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
+m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP],	[AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
+m4_ifndef([LT_AC_PROG_EGREP],		[AC_DEFUN([LT_AC_PROG_EGREP])])
+m4_ifndef([LT_AC_PROG_SED],		[AC_DEFUN([LT_AC_PROG_SED])])
+m4_ifndef([_LT_CC_BASENAME],		[AC_DEFUN([_LT_CC_BASENAME])])
+m4_ifndef([_LT_COMPILER_BOILERPLATE],	[AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
+m4_ifndef([_LT_LINKER_BOILERPLATE],	[AC_DEFUN([_LT_LINKER_BOILERPLATE])])
+m4_ifndef([_AC_PROG_LIBTOOL],		[AC_DEFUN([_AC_PROG_LIBTOOL])])
+m4_ifndef([AC_LIBTOOL_SETUP],		[AC_DEFUN([AC_LIBTOOL_SETUP])])
+m4_ifndef([_LT_AC_CHECK_DLFCN],		[AC_DEFUN([_LT_AC_CHECK_DLFCN])])
+m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],	[AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
+m4_ifndef([_LT_AC_TAGCONFIG],		[AC_DEFUN([_LT_AC_TAGCONFIG])])
+m4_ifndef([AC_DISABLE_FAST_INSTALL],	[AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
+m4_ifndef([_LT_AC_LANG_CXX],		[AC_DEFUN([_LT_AC_LANG_CXX])])
+m4_ifndef([_LT_AC_LANG_F77],		[AC_DEFUN([_LT_AC_LANG_F77])])
+m4_ifndef([_LT_AC_LANG_GCJ],		[AC_DEFUN([_LT_AC_LANG_GCJ])])
+m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
+m4_ifndef([_LT_AC_LANG_C_CONFIG],	[AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
+m4_ifndef([_LT_AC_LANG_CXX_CONFIG],	[AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
+m4_ifndef([_LT_AC_LANG_F77_CONFIG],	[AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
+m4_ifndef([_LT_AC_LANG_GCJ_CONFIG],	[AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
+m4_ifndef([_LT_AC_LANG_RC_CONFIG],	[AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
+m4_ifndef([AC_LIBTOOL_CONFIG],		[AC_DEFUN([AC_LIBTOOL_CONFIG])])
+m4_ifndef([_LT_AC_FILE_LTDLL_C],	[AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
+m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],	[AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
+m4_ifndef([_LT_AC_PROG_CXXCPP],		[AC_DEFUN([_LT_AC_PROG_CXXCPP])])
+m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],	[AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
+m4_ifndef([_LT_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
+m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
+m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
+m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
diff --git a/third-party/libjxl/libjxl/third_party/lcms/missing b/third-party/libjxl/libjxl/third_party/lcms/missing
new file mode 100755
index 0000000000..f62bbae306
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/missing
@@ -0,0 +1,215 @@
+#! /bin/sh
+# Common wrapper for a few potentially missing GNU programs.
+
+scriptversion=2013-10-28.13; # UTC
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try '$0 --help' for more information"
+  exit 1
+fi
+
+case $1 in
+
+  --is-lightweight)
+    # Used by our autoconf macros to check whether the available missing
+    # script is modern enough.
+    exit 0
+    ;;
+
+  --run)
+    # Back-compat with the calling convention used by older automake.
+    shift
+    ;;
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
+to PROGRAM being missing or too old.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+
+Supported PROGRAM values:
+  aclocal   autoconf  autoheader   autom4te  automake  makeinfo
+  bison     yacc      flex         lex       help2man
+
+Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
+'g' are ignored when checking the name.
+
+Send bug reports to <bug-automake@gnu.org>."
+    exit $?
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit $?
+    ;;
+
+  -*)
+    echo 1>&2 "$0: unknown '$1' option"
+    echo 1>&2 "Try '$0 --help' for more information"
+    exit 1
+    ;;
+
+esac
+
+# Run the given program, remember its exit status.
+"$@"; st=$?
+
+# If it succeeded, we are done.
+test $st -eq 0 && exit 0
+
+# Also exit now if we it failed (or wasn't found), and '--version' was
+# passed; such an option is passed most likely to detect whether the
+# program is present and works.
+case $2 in --version|--help) exit $st;; esac
+
+# Exit code 63 means version mismatch.  This often happens when the user
+# tries to use an ancient version of a tool on a file that requires a
+# minimum version.
+if test $st -eq 63; then
+  msg="probably too old"
+elif test $st -eq 127; then
+  # Program was missing.
+  msg="missing on your system"
+else
+  # Program was found and executed, but failed.  Give up.
+  exit $st
+fi
+
+perl_URL=http://www.perl.org/
+flex_URL=http://flex.sourceforge.net/
+gnu_software_URL=http://www.gnu.org/software
+
+program_details ()
+{
+  case $1 in
+    aclocal|automake)
+      echo "The '$1' program is part of the GNU Automake package:"
+      echo "<$gnu_software_URL/automake>"
+      echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/autoconf>"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+    autoconf|autom4te|autoheader)
+      echo "The '$1' program is part of the GNU Autoconf package:"
+      echo "<$gnu_software_URL/autoconf/>"
+      echo "It also requires GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+  esac
+}
+
+give_advice ()
+{
+  # Normalize program name to check for.
+  normalized_program=`echo "$1" | sed '
+    s/^gnu-//; t
+    s/^gnu//; t
+    s/^g//; t'`
+
+  printf '%s\n' "'$1' is $msg."
+
+  configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
+  case $normalized_program in
+    autoconf*)
+      echo "You should only need it if you modified 'configure.ac',"
+      echo "or m4 files included by it."
+      program_details 'autoconf'
+      ;;
+    autoheader*)
+      echo "You should only need it if you modified 'acconfig.h' or"
+      echo "$configure_deps."
+      program_details 'autoheader'
+      ;;
+    automake*)
+      echo "You should only need it if you modified 'Makefile.am' or"
+      echo "$configure_deps."
+      program_details 'automake'
+      ;;
+    aclocal*)
+      echo "You should only need it if you modified 'acinclude.m4' or"
+      echo "$configure_deps."
+      program_details 'aclocal'
+      ;;
+   autom4te*)
+      echo "You might have modified some maintainer files that require"
+      echo "the 'autom4te' program to be rebuilt."
+      program_details 'autom4te'
+      ;;
+    bison*|yacc*)
+      echo "You should only need it if you modified a '.y' file."
+      echo "You may want to install the GNU Bison package:"
+      echo "<$gnu_software_URL/bison/>"
+      ;;
+    lex*|flex*)
+      echo "You should only need it if you modified a '.l' file."
+      echo "You may want to install the Fast Lexical Analyzer package:"
+      echo "<$flex_URL>"
+      ;;
+    help2man*)
+      echo "You should only need it if you modified a dependency" \
+           "of a man page."
+      echo "You may want to install the GNU Help2man package:"
+      echo "<$gnu_software_URL/help2man/>"
+    ;;
+    makeinfo*)
+      echo "You should only need it if you modified a '.texi' file, or"
+      echo "any other file indirectly affecting the aspect of the manual."
+      echo "You might want to install the Texinfo package:"
+      echo "<$gnu_software_URL/texinfo/>"
+      echo "The spurious makeinfo call might also be the consequence of"
+      echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
+      echo "want to install GNU make:"
+      echo "<$gnu_software_URL/make/>"
+      ;;
+    *)
+      echo "You might have modified some files without having the proper"
+      echo "tools for further handling them.  Check the 'README' file, it"
+      echo "often tells you about the needed prerequisites for installing"
+      echo "this package.  You may also peek at any GNU archive site, in"
+      echo "case some other package contains this missing '$1' program."
+      ;;
+  esac
+}
+
+give_advice "$1" | sed -e '1s/^/WARNING: /' \
+                       -e '2,$s/^/         /' >&2
+
+# Propagate the correct exit status (expected to be 127 for a program
+# not found, 63 for a program that failed due to version mismatch).
+exit $st
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.am
new file mode 100644
index 0000000000..227d79d823
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.am
@@ -0,0 +1,31 @@
+#
+# Makefile for building lcms 2 library
+#
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+
+includedir = ${prefix}/include
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+
+LIBRARY_CURRENT    = @LIBRARY_CURRENT@
+LIBRARY_REVISION   = @LIBRARY_REVISION@
+LIBRARY_AGE        = @LIBRARY_AGE@
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmd5.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.in
new file mode 100644
index 0000000000..1a606e44b4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/Makefile.in
@@ -0,0 +1,724 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms 2 library
+#
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+liblcms2_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am_liblcms2_la_OBJECTS = cmscnvrt.lo cmserr.lo cmsgamma.lo cmsgmt.lo \
+	cmsintrp.lo cmsio0.lo cmsio1.lo cmslut.lo cmsplugin.lo \
+	cmssm.lo cmsmd5.lo cmsmtrx.lo cmspack.lo cmspcs.lo cmswtpnt.lo \
+	cmsxform.lo cmssamp.lo cmsnamed.lo cmscam02.lo cmsvirt.lo \
+	cmstypes.lo cmscgats.lo cmsps2.lo cmsopt.lo cmshalf.lo \
+	cmsalpha.lo
+liblcms2_la_OBJECTS = $(am_liblcms2_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+liblcms2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(liblcms2_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(liblcms2_la_SOURCES)
+DIST_SOURCES = $(liblcms2_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+includedir = ${prefix}/include
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmd5.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+liblcms2.la: $(liblcms2_la_OBJECTS) $(liblcms2_la_DEPENDENCIES) $(EXTRA_liblcms2_la_DEPENDENCIES) 
+	$(AM_V_CCLD)$(liblcms2_la_LINK) -rpath $(libdir) $(liblcms2_la_OBJECTS) $(liblcms2_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsalpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscam02.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscgats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscnvrt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmserr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgamma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgmt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmshalf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsintrp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio0.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmslut.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsmd5.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsmtrx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsnamed.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsopt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspcs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsplugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsps2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssamp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmstypes.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsvirt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmswtpnt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsxform.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsalpha.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsalpha.c
new file mode 100644
index 0000000000..ae9f3d9a11
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsalpha.c
@@ -0,0 +1,635 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Alpha copy ------------------------------------------------------------------------------------------------------------------
+
+// This macro return words stored as big endian
+#define CHANGE_ENDIAN(w)    (cmsUInt16Number) ((cmsUInt16Number) ((w)<<8)|((w)>>8))
+
+
+// Floor to byte, taking care of saturation
+cmsINLINE cmsUInt8Number _cmsQuickSaturateByte(cmsFloat64Number d)
+{
+       d += 0.5;
+       if (d <= 0) return 0;
+       if (d >= 255.0) return 255;
+
+       return (cmsUInt8Number) _cmsQuickFloorWord(d);
+}
+
+
+// Return the size in bytes of a given formatter
+static
+cmsUInt32Number trueBytesSize(cmsUInt32Number Format)
+{
+    cmsUInt32Number fmt_bytes = T_BYTES(Format);
+
+    // For double, the T_BYTES field returns zero
+    if (fmt_bytes == 0)
+        return sizeof(double);
+
+    // Otherwise, it is already correct for all formats
+    return fmt_bytes;
+}
+
+
+// Several format converters
+
+typedef void(*cmsFormatterAlphaFn)(void* dst, const void* src);
+
+
+// From 8
+
+static
+void copy8(void* dst, const void* src)
+{
+       memmove(dst, src, 1);
+}
+
+static
+void from8to16(void* dst, const void* src)
+{
+       cmsUInt8Number n = *(cmsUInt8Number*)src;
+       *(cmsUInt16Number*) dst = FROM_8_TO_16(n);
+}
+
+static
+void from8to16SE(void* dst, const void* src)
+{
+    cmsUInt8Number n = *(cmsUInt8Number*)src;    
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(FROM_8_TO_16(n));
+}
+
+static
+void from8toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt8Number*)src) / 255.0f;
+}
+
+static
+void from8toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt8Number*)src) / 255.0;
+}
+
+static
+void from8toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt8Number*)src) / 255.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From 16
+
+static
+void from16to8(void* dst, const void* src)
+{
+       cmsUInt16Number n = *(cmsUInt16Number*)src;
+       *(cmsUInt8Number*) dst = FROM_16_TO_8(n);
+}
+
+static
+void from16SEto8(void* dst, const void* src)
+{
+    cmsUInt16Number n = *(cmsUInt16Number*)src;
+    *(cmsUInt8Number*)dst = FROM_16_TO_8(CHANGE_ENDIAN(n));
+}
+
+static
+void copy16(void* dst, const void* src)
+{
+       memmove(dst, src, 2);
+}
+
+static
+void from16to16(void* dst, const void* src)
+{
+    cmsUInt16Number n = *(cmsUInt16Number*)src;
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(n);
+}
+
+void from16toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+void from16SEtoFLT(void* dst, const void* src)
+{
+    *(cmsFloat32Number*)dst = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+}
+
+void from16toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+void from16SEtoDBL(void* dst, const void* src)
+{
+    *(cmsFloat64Number*)dst = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+}
+
+static
+void from16toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt16Number*)src) / 65535.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void from16SEtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+    cmsFloat32Number n = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+    *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+// From Float
+
+static
+void fromFLTto8(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+}
+
+static
+void fromFLTto16(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void fromFLTto16SE(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    cmsUInt16Number i = _cmsQuickSaturateWord(n * 65535.0f);
+
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+}
+
+static
+void copy32(void* dst, const void* src)
+{
+    memmove(dst, src, sizeof(cmsFloat32Number));
+}
+
+static
+void fromFLTtoDBL(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsFloat64Number*)dst = (cmsFloat64Number)n;
+}
+
+static
+void fromFLTtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = *(cmsFloat32Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+
+// From HALF
+
+static
+void fromHLFto8(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+
+}
+
+static
+void fromHLFto16(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFto16SE(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+    cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+    cmsUInt16Number i = _cmsQuickSaturateWord(n * 65535.0f);
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+static
+void fromHLFtoFLT(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat32Number*)dst = _cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFtoDBL(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat64Number*)dst = (cmsFloat64Number)_cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From double
+static
+void fromDBLto8(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0);
+}
+
+static
+void fromDBLto16(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void fromDBLto16SE(void* dst, const void* src)
+{
+    cmsFloat64Number n = *(cmsFloat64Number*)src;
+    cmsUInt16Number  i = _cmsQuickSaturateWord(n * 65535.0f);
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+}
+static
+void fromDBLtoFLT(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsFloat32Number*)dst = (cmsFloat32Number) n;
+}
+
+static
+void fromDBLtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (cmsFloat32Number) *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void copy64(void* dst, const void* src)
+{
+       memmove(dst, src, sizeof(cmsFloat64Number));
+}
+
+
+// Returns the position (x or y) of the formatter in the table of functions
+static
+int FormatterPos(cmsUInt32Number frm)
+{
+    cmsUInt32Number  b = T_BYTES(frm);
+
+    if (b == 0 && T_FLOAT(frm))
+        return 5; // DBL
+#ifndef CMS_NO_HALF_SUPPORT
+    if (b == 2 && T_FLOAT(frm))
+        return 3; // HLF
+#endif
+    if (b == 4 && T_FLOAT(frm))
+        return 4; // FLT
+    if (b == 2 && !T_FLOAT(frm))
+        return 1; // 16
+    if (b == 1 && !T_FLOAT(frm))
+        return 0; // 8
+    if (b == 2 && T_ENDIAN16(frm))
+        return 3;
+    return -1; // not recognized
+}
+
+// Obtains a alpha-to-alpha funmction formatter
+static
+cmsFormatterAlphaFn _cmsGetFormatterAlpha(cmsContext id, cmsUInt32Number in, cmsUInt32Number out)
+{
+static cmsFormatterAlphaFn FormattersAlpha[6][6] = {
+
+       /* from 8 */  { copy8,       from8to16,   from8to16SE,   from8toHLF,   from8toFLT,    from8toDBL    },
+       /* from 16*/  { from16to8,   copy16,      from16to16,    from16toHLF,  from16toFLT,   from16toDBL   },
+       /* from 16SE*/{ from16SEto8, from16to16,  copy16,        from16SEtoHLF,from16SEtoFLT, from16SEtoDBL },
+       /* from HLF*/ { fromHLFto8,  fromHLFto16, fromHLFto16SE, copy16,       fromHLFtoFLT,  fromHLFtoDBL  },
+       /* from FLT*/ { fromFLTto8,  fromFLTto16, fromFLTto16SE, fromFLTtoHLF, copy32,        fromFLTtoDBL  },
+       /* from DBL*/ { fromDBLto8,  fromDBLto16, fromDBLto16SE, fromDBLtoHLF, fromDBLtoFLT,  copy64 }};
+
+        int in_n  = FormatterPos(in);
+        int out_n = FormatterPos(out);
+
+        if (in_n < 0 || out_n < 0 || in_n > 4 || out_n > 4) {
+
+               cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized alpha channel width");
+               return NULL;
+        }
+
+        return FormattersAlpha[in_n][out_n];
+}
+
+
+
+// This function computes the distance from each component to the next one in bytes. 
+static
+void ComputeIncrementsForChunky(cmsUInt32Number Format,                                 
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+       cmsUInt32Number pixelSize = channelSize * total_chans;
+       
+	   // Sanity check
+	   if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+		   return;
+
+        memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = pixelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 1) {
+              
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans-1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       if (channelSize > 1)
+              for (i = 0; i < total_chans; i++) {
+                     channels[i] *= channelSize;
+              }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+//  On planar configurations, the distance is the stride added to any non-negative
+static
+void ComputeIncrementsForPlanar(cmsUInt32Number Format, 
+                                cmsUInt32Number BytesPerPlane,
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];       
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+      
+       // Sanity check
+       if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+           return;
+
+       memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = channelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 0) {
+
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans - 1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       for (i = 0; i < total_chans; i++) {
+              channels[i] *= BytesPerPlane;
+       }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+// Dispatcher por chunky and planar RGB
+static
+void  ComputeComponentIncrements(cmsUInt32Number Format,
+                                 cmsUInt32Number BytesPerPlane,
+                                 cmsUInt32Number ComponentStartingOrder[], 
+                                 cmsUInt32Number ComponentPointerIncrements[])
+{
+       if (T_PLANAR(Format)) {
+
+              ComputeIncrementsForPlanar(Format,  BytesPerPlane, ComponentStartingOrder, ComponentPointerIncrements);
+       }
+       else {
+              ComputeIncrementsForChunky(Format,  ComponentStartingOrder, ComponentPointerIncrements);
+       }
+
+}
+
+
+
+// Handles extra channels copying alpha if requested by the flags
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                                               void* out,
+                                               cmsUInt32Number PixelsPerLine,
+                                               cmsUInt32Number LineCount,
+                                               const cmsStride* Stride)
+{
+    cmsUInt32Number i, j, k;
+    cmsUInt32Number nExtra;
+    cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+    cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+    cmsFormatterAlphaFn copyValueFn;
+
+    // Make sure we need some copy
+    if (!(p->dwOriginalFlags & cmsFLAGS_COPY_ALPHA))
+        return;
+
+    // Exit early if in-place color-management is occurring - no need to copy extra channels to themselves.
+    if (p->InputFormat == p->OutputFormat && in == out)
+        return;
+
+    // Make sure we have same number of alpha channels. If not, just return as this should be checked at transform creation time.
+    nExtra = T_EXTRA(p->InputFormat);
+    if (nExtra != T_EXTRA(p->OutputFormat))
+        return;
+
+    // Anything to do?
+    if (nExtra == 0)
+        return;
+
+    // Compute the increments 
+    ComputeComponentIncrements(p->InputFormat, Stride->BytesPerPlaneIn, SourceStartingOrder, SourceIncrements);
+    ComputeComponentIncrements(p->OutputFormat, Stride->BytesPerPlaneOut, DestStartingOrder, DestIncrements);
+
+    // Check for conversions 8, 16, half, float, dbl
+    copyValueFn = _cmsGetFormatterAlpha(p->ContextID, p->InputFormat, p->OutputFormat);
+
+    if (nExtra == 1) { // Optimized routine for copying a single extra channel quickly
+
+        cmsUInt8Number* SourcePtr;
+        cmsUInt8Number* DestPtr;
+
+        cmsUInt32Number SourceStrideIncrement = 0;
+        cmsUInt32Number DestStrideIncrement = 0;
+
+        // The loop itself
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            SourcePtr = (cmsUInt8Number*)in + SourceStartingOrder[0] + SourceStrideIncrement;
+            DestPtr = (cmsUInt8Number*)out + DestStartingOrder[0] + DestStrideIncrement;
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                copyValueFn(DestPtr, SourcePtr);
+
+                SourcePtr += SourceIncrements[0];
+                DestPtr += DestIncrements[0];
+            }
+
+            SourceStrideIncrement += Stride->BytesPerLineIn;
+            DestStrideIncrement += Stride->BytesPerLineOut;
+        }
+
+    }
+    else { // General case with more than one extra channel
+
+        cmsUInt8Number* SourcePtr[cmsMAXCHANNELS];
+        cmsUInt8Number* DestPtr[cmsMAXCHANNELS];
+
+        cmsUInt32Number SourceStrideIncrements[cmsMAXCHANNELS];
+        cmsUInt32Number DestStrideIncrements[cmsMAXCHANNELS];
+
+        memset(SourceStrideIncrements, 0, sizeof(SourceStrideIncrements));
+        memset(DestStrideIncrements, 0, sizeof(DestStrideIncrements));
+
+        // The loop itself       
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            for (j = 0; j < nExtra; j++) {
+
+                SourcePtr[j] = (cmsUInt8Number*)in + SourceStartingOrder[j] + SourceStrideIncrements[j];
+                DestPtr[j] = (cmsUInt8Number*)out + DestStartingOrder[j] + DestStrideIncrements[j];
+            }
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                for (k = 0; k < nExtra; k++) {
+
+                    copyValueFn(DestPtr[k], SourcePtr[k]);
+
+                    SourcePtr[k] += SourceIncrements[k];
+                    DestPtr[k] += DestIncrements[k];
+                }
+            }
+
+            for (j = 0; j < nExtra; j++) {
+
+                SourceStrideIncrements[j] += Stride->BytesPerLineIn;
+                DestStrideIncrements[j] += Stride->BytesPerLineOut;
+            }
+        }
+    }
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmscam02.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmscam02.c
new file mode 100644
index 0000000000..9cc49fbf20
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmscam02.c
@@ -0,0 +1,486 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// CIECAM 02 appearance model. Many thanks to Jordi Vilar for the debugging.
+
+// ---------- Implementation --------------------------------------------
+
+typedef struct  {
+
+    cmsFloat64Number XYZ[3];
+    cmsFloat64Number RGB[3];
+    cmsFloat64Number RGBc[3];
+    cmsFloat64Number RGBp[3];
+    cmsFloat64Number RGBpa[3];
+    cmsFloat64Number a, b, h, e, H, A, J, Q, s, t, C, M;
+    cmsFloat64Number abC[2];
+    cmsFloat64Number abs[2];
+    cmsFloat64Number abM[2];
+
+} CAM02COLOR;
+
+typedef struct  {
+
+    CAM02COLOR adoptedWhite;
+    cmsFloat64Number LA, Yb;
+    cmsFloat64Number F, c, Nc;
+    cmsUInt32Number surround;
+    cmsFloat64Number n, Nbb, Ncb, z, FL, D;
+
+    cmsContext ContextID;
+
+} cmsCIECAM02;
+
+
+static
+cmsFloat64Number compute_n(cmsCIECAM02* pMod)
+{
+    return (pMod -> Yb / pMod -> adoptedWhite.XYZ[1]);
+}
+
+static
+cmsFloat64Number compute_z(cmsCIECAM02* pMod)
+{
+    return (1.48 + pow(pMod -> n, 0.5));
+}
+
+static
+cmsFloat64Number computeNbb(cmsCIECAM02* pMod)
+{
+    return (0.725 * pow((1.0 / pMod -> n), 0.2));
+}
+
+static
+cmsFloat64Number computeFL(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number k, FL;
+
+    k = 1.0 / ((5.0 * pMod->LA) + 1.0);
+    FL = 0.2 * pow(k, 4.0) * (5.0 * pMod->LA) + 0.1 *
+        (pow((1.0 - pow(k, 4.0)), 2.0)) *
+        (pow((5.0 * pMod->LA), (1.0 / 3.0)));
+
+    return FL;
+}
+
+static
+cmsFloat64Number computeD(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number D;
+
+    D = pMod->F - (1.0/3.6)*(exp(((-pMod ->LA-42) / 92.0)));
+
+    return D;
+}
+
+
+static
+CAM02COLOR XYZtoCAT02(CAM02COLOR clr)
+{
+    clr.RGB[0] = (clr.XYZ[0] *  0.7328) + (clr.XYZ[1] *  0.4296) + (clr.XYZ[2] * -0.1624);
+    clr.RGB[1] = (clr.XYZ[0] * -0.7036) + (clr.XYZ[1] *  1.6975) + (clr.XYZ[2] *  0.0061);
+    clr.RGB[2] = (clr.XYZ[0] *  0.0030) + (clr.XYZ[1] *  0.0136) + (clr.XYZ[2] *  0.9834);
+
+    return clr;
+}
+
+static
+CAM02COLOR ChromaticAdaptation(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+
+    for (i = 0; i < 3; i++) {
+        clr.RGBc[i] = ((pMod -> adoptedWhite.XYZ[1] *
+            (pMod->D / pMod -> adoptedWhite.RGB[i])) +
+            (1.0 - pMod->D)) * clr.RGB[i];
+    }
+
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toHPE(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] =(( 0.38971 *  1.096124) + (0.68898 * 0.454369) + (-0.07868 * -0.009628));
+    M[1] =(( 0.38971 * -0.278869) + (0.68898 * 0.473533) + (-0.07868 * -0.005698));
+    M[2] =(( 0.38971 *  0.182745) + (0.68898 * 0.072098) + (-0.07868 *  1.015326));
+    M[3] =((-0.22981 *  1.096124) + (1.18340 * 0.454369) + ( 0.04641 * -0.009628));
+    M[4] =((-0.22981 * -0.278869) + (1.18340 * 0.473533) + ( 0.04641 * -0.005698));
+    M[5] =((-0.22981 *  0.182745) + (1.18340 * 0.072098) + ( 0.04641 *  1.015326));
+    M[6] =(-0.009628);
+    M[7] =(-0.005698);
+    M[8] =( 1.015326);
+
+    clr.RGBp[0] = (clr.RGBc[0] * M[0]) +  (clr.RGBc[1] * M[1]) + (clr.RGBc[2] * M[2]);
+    clr.RGBp[1] = (clr.RGBc[0] * M[3]) +  (clr.RGBc[1] * M[4]) + (clr.RGBc[2] * M[5]);
+    clr.RGBp[2] = (clr.RGBc[0] * M[6]) +  (clr.RGBc[1] * M[7]) + (clr.RGBc[2] * M[8]);
+
+    return  clr;
+}
+
+static
+CAM02COLOR NonlinearCompression(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number temp;
+
+    for (i = 0; i < 3; i++) {
+        if (clr.RGBp[i] < 0) {
+
+            temp = pow((-1.0 * pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (-1.0 * 400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+        else {
+            temp = pow((pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+    }
+
+    clr.A = (((2.0 * clr.RGBpa[0]) + clr.RGBpa[1] +
+        (clr.RGBpa[2] / 20.0)) - 0.305) * pMod->Nbb;
+
+    return clr;
+}
+
+static
+CAM02COLOR ComputeCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsFloat64Number a, b, temp, e, t, r2d, d2r;
+
+    a = clr.RGBpa[0] - (12.0 * clr.RGBpa[1] / 11.0) + (clr.RGBpa[2] / 11.0);
+    b = (clr.RGBpa[0] + clr.RGBpa[1] - (2.0 * clr.RGBpa[2])) / 9.0;
+
+    r2d = (180.0 / 3.141592654);
+    if (a == 0) {
+        if (b == 0)     clr.h = 0;
+        else if (b > 0) clr.h = 90;
+        else            clr.h = 270;
+    }
+    else if (a > 0) {
+        temp = b / a;
+        if (b > 0)       clr.h = (r2d * atan(temp));
+        else if (b == 0) clr.h = 0;
+        else             clr.h = (r2d * atan(temp)) + 360;
+    }
+    else {
+        temp = b / a;
+        clr.h = (r2d * atan(temp)) + 180;
+    }
+
+    d2r = (3.141592654 / 180.0);
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    if (clr.h < 20.14) {
+        temp = ((clr.h + 122.47)/1.2) + ((20.14 - clr.h)/0.8);
+        clr.H = 300 + (100*((clr.h + 122.47)/1.2)) / temp;
+    }
+    else if (clr.h < 90.0) {
+        temp = ((clr.h - 20.14)/0.8) + ((90.00 - clr.h)/0.7);
+        clr.H = (100*((clr.h - 20.14)/0.8)) / temp;
+    }
+    else if (clr.h < 164.25) {
+        temp = ((clr.h - 90.00)/0.7) + ((164.25 - clr.h)/1.0);
+        clr.H = 100 + ((100*((clr.h - 90.00)/0.7)) / temp);
+    }
+    else if (clr.h < 237.53) {
+        temp = ((clr.h - 164.25)/1.0) + ((237.53 - clr.h)/1.2);
+        clr.H = 200 + ((100*((clr.h - 164.25)/1.0)) / temp);
+    }
+    else {
+        temp = ((clr.h - 237.53)/1.2) + ((360 - clr.h + 20.14)/0.8);
+        clr.H = 300 + ((100*((clr.h - 237.53)/1.2)) / temp);
+    }
+
+    clr.J = 100.0 * pow((clr.A / pMod->adoptedWhite.A),
+        (pMod->c * pMod->z));
+
+    clr.Q = (4.0 / pMod->c) * pow((clr.J / 100.0), 0.5) *
+        (pMod->adoptedWhite.A + 4.0) * pow(pMod->FL, 0.25);
+
+    t = (e * pow(((a * a) + (b * b)), 0.5)) /
+        (clr.RGBpa[0] + clr.RGBpa[1] +
+        ((21.0 / 20.0) * clr.RGBpa[2]));
+
+    clr.C = pow(t, 0.9) * pow((clr.J / 100.0), 0.5) *
+        pow((1.64 - pow(0.29, pMod->n)), 0.73);
+
+    clr.M = clr.C * pow(pMod->FL, 0.25);
+    clr.s = 100.0 * pow((clr.M / clr.Q), 0.5);
+
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+
+    cmsFloat64Number t, e, p1, p2, p3, p4, p5, hr, d2r;
+    d2r = 3.141592654 / 180.0;
+
+    t = pow( (clr.C / (pow((clr.J / 100.0), 0.5) *
+        (pow((1.64 - pow(0.29, pMod->n)), 0.73)))),
+        (1.0 / 0.9) );
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    clr.A = pMod->adoptedWhite.A * pow(
+           (clr.J / 100.0),
+           (1.0 / (pMod->c * pMod->z)));
+
+    p1 = e / t;
+    p2 = (clr.A / pMod->Nbb) + 0.305;
+    p3 = 21.0 / 20.0;
+
+    hr = clr.h * d2r;
+
+    if (fabs(sin(hr)) >= fabs(cos(hr))) {
+        p4 = p1 / sin(hr);
+        clr.b = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p4 + (2.0 + p3) * (220.0 / 1403.0) *
+            (cos(hr) / sin(hr)) - (27.0 / 1403.0) +
+            p3 * (6300.0 / 1403.0));
+        clr.a = clr.b * (cos(hr) / sin(hr));
+    }
+    else {
+        p5 = p1 / cos(hr);
+        clr.a = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p5 + (2.0 + p3) * (220.0 / 1403.0) -
+            ((27.0 / 1403.0) - p3 * (6300.0 / 1403.0)) *
+            (sin(hr) / cos(hr)));
+        clr.b = clr.a * (sin(hr) / cos(hr));
+    }
+
+    clr.RGBpa[0] = ((460.0 / 1403.0) * p2) +
+              ((451.0 / 1403.0) * clr.a) +
+              ((288.0 / 1403.0) * clr.b);
+    clr.RGBpa[1] = ((460.0 / 1403.0) * p2) -
+              ((891.0 / 1403.0) * clr.a) -
+              ((261.0 / 1403.0) * clr.b);
+    clr.RGBpa[2] = ((460.0 / 1403.0) * p2) -
+              ((220.0 / 1403.0) * clr.a) -
+              ((6300.0 / 1403.0) * clr.b);
+
+    return clr;
+}
+
+static
+CAM02COLOR InverseNonlinearity(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number c1;
+
+    for (i = 0; i < 3; i++) {
+        if ((clr.RGBpa[i] - 0.1) < 0) c1 = -1;
+        else                               c1 = 1;
+        clr.RGBp[i] = c1 * (100.0 / pMod->FL) *
+            pow(((27.13 * fabs(clr.RGBpa[i] - 0.1)) /
+            (400.0 - fabs(clr.RGBpa[i] - 0.1))),
+            (1.0 / 0.42));
+    }
+
+    return clr;
+}
+
+static
+CAM02COLOR HPEtoCAT02(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] = (( 0.7328 *  1.910197) + (0.4296 * 0.370950));
+    M[1] = (( 0.7328 * -1.112124) + (0.4296 * 0.629054));
+    M[2] = (( 0.7328 *  0.201908) + (0.4296 * 0.000008) - 0.1624);
+    M[3] = ((-0.7036 *  1.910197) + (1.6975 * 0.370950));
+    M[4] = ((-0.7036 * -1.112124) + (1.6975 * 0.629054));
+    M[5] = ((-0.7036 *  0.201908) + (1.6975 * 0.000008) + 0.0061);
+    M[6] = (( 0.0030 *  1.910197) + (0.0136 * 0.370950));
+    M[7] = (( 0.0030 * -1.112124) + (0.0136 * 0.629054));
+    M[8] = (( 0.0030 *  0.201908) + (0.0136 * 0.000008) + 0.9834);;
+
+    clr.RGBc[0] = (clr.RGBp[0] * M[0]) + (clr.RGBp[1] * M[1]) + (clr.RGBp[2] * M[2]);
+    clr.RGBc[1] = (clr.RGBp[0] * M[3]) + (clr.RGBp[1] * M[4]) + (clr.RGBp[2] * M[5]);
+    clr.RGBc[2] = (clr.RGBp[0] * M[6]) + (clr.RGBp[1] * M[7]) + (clr.RGBp[2] * M[8]);
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseChromaticAdaptation(CAM02COLOR clr,  cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    for (i = 0; i < 3; i++) {
+        clr.RGB[i] = clr.RGBc[i] /
+            ((pMod->adoptedWhite.XYZ[1] * pMod->D / pMod->adoptedWhite.RGB[i]) + 1.0 - pMod->D);
+    }
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toXYZ(CAM02COLOR clr)
+{
+    clr.XYZ[0] = (clr.RGB[0] *  1.096124) + (clr.RGB[1] * -0.278869) + (clr.RGB[2] *  0.182745);
+    clr.XYZ[1] = (clr.RGB[0] *  0.454369) + (clr.RGB[1] *  0.473533) + (clr.RGB[2] *  0.072098);
+    clr.XYZ[2] = (clr.RGB[0] * -0.009628) + (clr.RGB[1] * -0.005698) + (clr.RGB[2] *  1.015326);
+
+    return clr;
+}
+
+
+cmsHANDLE  CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC)
+{
+    cmsCIECAM02* lpMod;
+
+    _cmsAssert(pVC != NULL);
+
+    if((lpMod = (cmsCIECAM02*) _cmsMallocZero(ContextID, sizeof(cmsCIECAM02))) == NULL) {
+        return NULL;
+    }
+
+    lpMod ->ContextID = ContextID;
+
+    lpMod ->adoptedWhite.XYZ[0] = pVC ->whitePoint.X;
+    lpMod ->adoptedWhite.XYZ[1] = pVC ->whitePoint.Y;
+    lpMod ->adoptedWhite.XYZ[2] = pVC ->whitePoint.Z;
+
+    lpMod -> LA       = pVC ->La;
+    lpMod -> Yb       = pVC ->Yb;
+    lpMod -> D        = pVC ->D_value;
+    lpMod -> surround = pVC ->surround;
+
+    switch (lpMod -> surround) {
+
+
+    case CUTSHEET_SURROUND:
+        lpMod->F = 0.8;
+        lpMod->c = 0.41;
+        lpMod->Nc = 0.8;
+        break;
+
+    case DARK_SURROUND:
+        lpMod -> F  = 0.8;
+        lpMod -> c  = 0.525;
+        lpMod -> Nc = 0.8;
+        break;
+
+    case DIM_SURROUND:
+        lpMod -> F  = 0.9;
+        lpMod -> c  = 0.59;
+        lpMod -> Nc = 0.95;
+        break;
+
+    default:
+        // Average surround
+        lpMod -> F  = 1.0;
+        lpMod -> c  = 0.69;
+        lpMod -> Nc = 1.0;
+    }
+
+    lpMod -> n   = compute_n(lpMod);
+    lpMod -> z   = compute_z(lpMod);
+    lpMod -> Nbb = computeNbb(lpMod);
+    lpMod -> FL  = computeFL(lpMod);
+
+    if (lpMod -> D == D_CALCULATE) {
+        lpMod -> D   = computeD(lpMod);
+    }
+
+    lpMod -> Ncb = lpMod -> Nbb;
+
+    lpMod -> adoptedWhite = XYZtoCAT02(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = ChromaticAdaptation(lpMod -> adoptedWhite, lpMod);
+    lpMod -> adoptedWhite = CAT02toHPE(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = NonlinearCompression(lpMod -> adoptedWhite, lpMod);
+
+    return (cmsHANDLE) lpMod;
+
+}
+
+void CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel)
+{
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+
+    if (lpMod) _cmsFree(lpMod ->ContextID, lpMod);
+}
+
+
+void CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+  
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.XYZ[0] = pIn ->X;
+    clr.XYZ[1] = pIn ->Y;
+    clr.XYZ[2] = pIn ->Z;
+
+    clr = XYZtoCAT02(clr);
+    clr = ChromaticAdaptation(clr, lpMod);
+    clr = CAT02toHPE(clr);
+    clr = NonlinearCompression(clr, lpMod);
+    clr = ComputeCorrelates(clr, lpMod);
+
+    pOut ->J = clr.J;
+    pOut ->C = clr.C;
+    pOut ->h = clr.h;
+}
+
+void CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn, cmsCIEXYZ* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+    
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.J = pIn -> J;
+    clr.C = pIn -> C;
+    clr.h = pIn -> h;
+
+    clr = InverseCorrelates(clr, lpMod);
+    clr = InverseNonlinearity(clr, lpMod);
+    clr = HPEtoCAT02(clr);
+    clr = InverseChromaticAdaptation(clr, lpMod);
+    clr = CAT02toXYZ(clr);
+
+    pOut ->X = clr.XYZ[0];
+    pOut ->Y = clr.XYZ[1];
+    pOut ->Z = clr.XYZ[2];
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmscgats.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmscgats.c
new file mode 100644
index 0000000000..bd263573b1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmscgats.c
@@ -0,0 +1,2795 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2018 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+
+#define MAXID        128     // Max length of identifier
+#define MAXSTR      1024     // Max length of string
+#define MAXTABLES    255     // Max Number of tables in a single stream
+#define MAXINCLUDE    20     // Max number of nested includes
+
+#define DEFAULT_DBL_FORMAT  "%.10g" // Double formatting
+
+#ifdef CMS_IS_WINDOWS_
+#    include <io.h>
+#    define DIR_CHAR    '\\'
+#else
+#    define DIR_CHAR    '/'
+#endif
+
+
+// Symbols
+typedef enum {
+
+        SUNDEFINED,
+        SINUM,      // Integer
+        SDNUM,      // Real
+        SIDENT,     // Identifier
+        SSTRING,    // string
+        SCOMMENT,   // comment
+        SEOLN,      // End of line
+        SEOF,       // End of stream
+        SSYNERROR,  // Syntax error found on stream
+
+        // Keywords
+
+        SBEGIN_DATA,
+        SBEGIN_DATA_FORMAT,
+        SEND_DATA,
+        SEND_DATA_FORMAT,
+        SKEYWORD,
+        SDATA_FORMAT_ID,
+        SINCLUDE
+
+    } SYMBOL;
+
+
+// How to write the value
+typedef enum {
+
+        WRITE_UNCOOKED,
+        WRITE_STRINGIFY,
+        WRITE_HEXADECIMAL,
+        WRITE_BINARY,
+        WRITE_PAIR
+
+    } WRITEMODE;
+
+// Linked list of variable names
+typedef struct _KeyVal {
+
+        struct _KeyVal*  Next;
+        char*            Keyword;       // Name of variable
+        struct _KeyVal*  NextSubkey;    // If key is a dictionary, points to the next item
+        char*            Subkey;        // If key is a dictionary, points to the subkey name
+        char*            Value;         // Points to value
+        WRITEMODE        WriteAs;       // How to write the value
+
+   } KEYVALUE;
+
+
+// Linked list of memory chunks (Memory sink)
+typedef struct _OwnedMem {
+
+        struct _OwnedMem* Next;
+        void *            Ptr;          // Point to value
+
+   } OWNEDMEM;
+
+// Suballocator
+typedef struct _SubAllocator {
+
+         cmsUInt8Number* Block;
+         cmsUInt32Number BlockSize;
+         cmsUInt32Number Used;
+
+    } SUBALLOCATOR;
+
+// Table. Each individual table can hold properties and rows & cols
+typedef struct _Table {
+
+        char SheetType[MAXSTR];               // The first row of the IT8 (the type)
+
+        int            nSamples, nPatches;    // Cols, Rows
+        int            SampleID;              // Pos of ID
+
+        KEYVALUE*      HeaderList;            // The properties
+
+        char**         DataFormat;            // The binary stream descriptor
+        char**         Data;                  // The binary stream
+
+    } TABLE;
+
+// File stream being parsed
+typedef struct _FileContext {
+        char           FileName[cmsMAX_PATH];    // File name if being read from file
+        FILE*          Stream;                   // File stream or NULL if holded in memory
+    } FILECTX;
+
+// This struct hold all information about an open IT8 handler.
+typedef struct {
+
+
+        cmsUInt32Number  TablesCount;                     // How many tables in this stream
+        cmsUInt32Number  nTable;                          // The actual table
+
+        TABLE Tab[MAXTABLES];
+
+        // Memory management
+        OWNEDMEM*      MemorySink;            // The storage backend
+        SUBALLOCATOR   Allocator;             // String suballocator -- just to keep it fast
+
+        // Parser state machine
+        SYMBOL             sy;                // Current symbol
+        int                ch;                // Current character
+
+        cmsInt32Number     inum;              // integer value
+        cmsFloat64Number   dnum;              // real value
+
+        char           id[MAXID];             // identifier
+        char           str[MAXSTR];           // string
+
+        // Allowed keywords & datasets. They have visibility on whole stream
+        KEYVALUE*      ValidKeywords;
+        KEYVALUE*      ValidSampleID;
+
+        char*          Source;                // Points to loc. being parsed
+        cmsInt32Number lineno;                // line counter for error reporting
+
+        FILECTX*       FileStack[MAXINCLUDE]; // Stack of files being parsed
+        cmsInt32Number IncludeSP;             // Include Stack Pointer
+
+        char*          MemoryBlock;           // The stream if holded in memory
+
+        char           DoubleFormatter[MAXID];// Printf-like 'cmsFloat64Number' formatter
+
+        cmsContext    ContextID;              // The threading context
+
+   } cmsIT8;
+
+
+// The stream for save operations
+typedef struct {
+
+        FILE* stream;   // For save-to-file behaviour
+
+        cmsUInt8Number* Base;
+        cmsUInt8Number* Ptr;        // For save-to-mem behaviour
+        cmsUInt32Number Used;
+        cmsUInt32Number Max;
+
+    } SAVESTREAM;
+
+
+// ------------------------------------------------------ cmsIT8 parsing routines
+
+
+// A keyword
+typedef struct {
+
+        const char *id;
+        SYMBOL sy;
+
+   } KEYWORD;
+
+// The keyword->symbol translation table. Sorting is required.
+static const KEYWORD TabKeys[] = {
+
+        {"$INCLUDE",               SINCLUDE},   // This is an extension!
+        {".INCLUDE",               SINCLUDE},   // This is an extension!
+
+        {"BEGIN_DATA",             SBEGIN_DATA },
+        {"BEGIN_DATA_FORMAT",      SBEGIN_DATA_FORMAT },
+        {"DATA_FORMAT_IDENTIFIER", SDATA_FORMAT_ID},
+        {"END_DATA",               SEND_DATA},
+        {"END_DATA_FORMAT",        SEND_DATA_FORMAT},
+        {"KEYWORD",                SKEYWORD}
+        };
+
+#define NUMKEYS (sizeof(TabKeys)/sizeof(KEYWORD))
+
+// Predefined properties
+
+// A property
+typedef struct {
+        const char *id;    // The identifier
+        WRITEMODE as;      // How is supposed to be written
+    } PROPERTY;
+
+static PROPERTY PredefinedProperties[] = {
+
+        {"NUMBER_OF_FIELDS", WRITE_UNCOOKED},    // Required - NUMBER OF FIELDS
+        {"NUMBER_OF_SETS",   WRITE_UNCOOKED},    // Required - NUMBER OF SETS
+        {"ORIGINATOR",       WRITE_STRINGIFY},   // Required - Identifies the specific system, organization or individual that created the data file.
+        {"FILE_DESCRIPTOR",  WRITE_STRINGIFY},   // Required - Describes the purpose or contents of the data file.
+        {"CREATED",          WRITE_STRINGIFY},   // Required - Indicates date of creation of the data file.
+        {"DESCRIPTOR",       WRITE_STRINGIFY},   // Required  - Describes the purpose or contents of the data file.
+        {"DIFFUSE_GEOMETRY", WRITE_STRINGIFY},   // The diffuse geometry used. Allowed values are "sphere" or "opal".
+        {"MANUFACTURER",     WRITE_STRINGIFY},
+        {"MANUFACTURE",      WRITE_STRINGIFY},   // Some broken Fuji targets does store this value
+        {"PROD_DATE",        WRITE_STRINGIFY},   // Identifies year and month of production of the target in the form yyyy:mm.
+        {"SERIAL",           WRITE_STRINGIFY},   // Uniquely identifies individual physical target.
+
+        {"MATERIAL",         WRITE_STRINGIFY},    // Identifies the material on which the target was produced using a code
+                                                  // uniquely identifying th e material. This is intend ed to be used for IT8.7
+                                                  // physical targets only (i.e . IT8.7/1 a nd IT8.7/2).
+
+        {"INSTRUMENTATION",  WRITE_STRINGIFY},    // Used to report the specific instrumentation used (manufacturer and
+                                                  // model number) to generate the data reported. This data will often
+                                                  // provide more information about the particular data collected than an
+                                                  // extensive list of specific details. This is particularly important for
+                                                  // spectral data or data derived from spectrophotometry.
+
+        {"MEASUREMENT_SOURCE", WRITE_STRINGIFY},  // Illumination used for spectral measurements. This data helps provide
+                                                  // a guide to the potential for issues of paper fluorescence, etc.
+
+        {"PRINT_CONDITIONS", WRITE_STRINGIFY},     // Used to define the characteristics of the printed sheet being reported.
+                                                   // Where standard conditions have been defined (e.g., SWOP at nominal)
+                                                   // named conditions may suffice. Otherwise, detailed information is
+                                                   // needed.
+
+        {"SAMPLE_BACKING",   WRITE_STRINGIFY},     // Identifies the backing material used behind the sample during
+                                                   // measurement. Allowed values are "black", "white", or {"na".
+                                                  
+        {"CHISQ_DOF",        WRITE_STRINGIFY},     // Degrees of freedom associated with the Chi squared statistic
+                                                   // below properties are new in recent specs:
+
+        {"MEASUREMENT_GEOMETRY", WRITE_STRINGIFY}, // The type of measurement, either reflection or transmission, should be indicated
+                                                   // along with details of the geometry and the aperture size and shape. For example,
+                                                   // for transmission measurements it is important to identify 0/diffuse, diffuse/0,
+                                                   // opal or integrating sphere, etc. For reflection it is important to identify 0/45,
+                                                   // 45/0, sphere (specular included or excluded), etc.
+
+       {"FILTER",            WRITE_STRINGIFY},     // Identifies the use of physical filter(s) during measurement. Typically used to
+                                                   // denote the use of filters such as none, D65, Red, Green or Blue.
+                                                  
+       {"POLARIZATION",      WRITE_STRINGIFY},     // Identifies the use of a physical polarization filter during measurement. Allowed
+                                                   // values are {"yes", "white", "none" or "na".
+
+       {"WEIGHTING_FUNCTION", WRITE_PAIR},         // Indicates such functions as: the CIE standard observer functions used in the
+                                                   // calculation of various data parameters (2 degree and 10 degree), CIE standard
+                                                   // illuminant functions used in the calculation of various data parameters (e.g., D50,
+                                                   // D65, etc.), density status response, etc. If used there shall be at least one
+                                                   // name-value pair following the WEIGHTING_FUNCTION tag/keyword. The first attribute
+                                                   // in the set shall be {"name" and shall identify the particular parameter used.
+                                                   // The second shall be {"value" and shall provide the value associated with that name.
+                                                   // For ASCII data, a string containing the Name and Value attribute pairs shall follow
+                                                   // the weighting function keyword. A semi-colon separates attribute pairs from each
+                                                   // other and within the attribute the name and value are separated by a comma.
+
+       {"COMPUTATIONAL_PARAMETER", WRITE_PAIR},    // Parameter that is used in computing a value from measured data. Name is the name
+                                                   // of the calculation, parameter is the name of the parameter used in the calculation
+                                                   // and value is the value of the parameter.
+                                                   
+       {"TARGET_TYPE",        WRITE_STRINGIFY},    // The type of target being measured, e.g. IT8.7/1, IT8.7/3, user defined, etc.
+                                                  
+       {"COLORANT",           WRITE_STRINGIFY},    // Identifies the colorant(s) used in creating the target.
+                                                  
+       {"TABLE_DESCRIPTOR",   WRITE_STRINGIFY},    // Describes the purpose or contents of a data table.
+                                                  
+       {"TABLE_NAME",         WRITE_STRINGIFY}     // Provides a short name for a data table.
+};
+
+#define NUMPREDEFINEDPROPS (sizeof(PredefinedProperties)/sizeof(PROPERTY))
+
+
+// Predefined sample types on dataset
+static const char* PredefinedSampleID[] = {
+        "SAMPLE_ID",      // Identifies sample that data represents
+        "STRING",         // Identifies label, or other non-machine readable value.
+                          // Value must begin and end with a " symbol
+
+        "CMYK_C",         // Cyan component of CMYK data expressed as a percentage
+        "CMYK_M",         // Magenta component of CMYK data expressed as a percentage
+        "CMYK_Y",         // Yellow component of CMYK data expressed as a percentage
+        "CMYK_K",         // Black component of CMYK data expressed as a percentage
+        "D_RED",          // Red filter density
+        "D_GREEN",        // Green filter density
+        "D_BLUE",         // Blue filter density
+        "D_VIS",          // Visual filter density
+        "D_MAJOR_FILTER", // Major filter d ensity
+        "RGB_R",          // Red component of RGB data
+        "RGB_G",          // Green component of RGB data
+        "RGB_B",          // Blue com ponent of RGB data
+        "SPECTRAL_NM",    // Wavelength of measurement expressed in nanometers
+        "SPECTRAL_PCT",   // Percentage reflectance/transmittance
+        "SPECTRAL_DEC",   // Reflectance/transmittance
+        "XYZ_X",          // X component of tristimulus data
+        "XYZ_Y",          // Y component of tristimulus data
+        "XYZ_Z",          // Z component of tristimulus data
+        "XYY_X",          // x component of chromaticity data
+        "XYY_Y",          // y component of chromaticity data
+        "XYY_CAPY",       // Y component of tristimulus data
+        "LAB_L",          // L* component of Lab data
+        "LAB_A",          // a* component of Lab data
+        "LAB_B",          // b* component of Lab data
+        "LAB_C",          // C*ab component of Lab data
+        "LAB_H",          // hab component of Lab data
+        "LAB_DE",         // CIE dE
+        "LAB_DE_94",      // CIE dE using CIE 94
+        "LAB_DE_CMC",     // dE using CMC
+        "LAB_DE_2000",    // CIE dE using CIE DE 2000
+        "MEAN_DE",        // Mean Delta E (LAB_DE) of samples compared to batch average
+                          // (Used for data files for ANSI IT8.7/1 and IT8.7/2 targets)
+        "STDEV_X",        // Standard deviation of X (tristimulus data)
+        "STDEV_Y",        // Standard deviation of Y (tristimulus data)
+        "STDEV_Z",        // Standard deviation of Z (tristimulus data)
+        "STDEV_L",        // Standard deviation of L*
+        "STDEV_A",        // Standard deviation of a*
+        "STDEV_B",        // Standard deviation of b*
+        "STDEV_DE",       // Standard deviation of CIE dE
+        "CHI_SQD_PAR"};   // The average of the standard deviations of L*, a* and b*. It is
+                          // used to derive an estimate of the chi-squared parameter which is
+                          // recommended as the predictor of the variability of dE
+
+#define NUMPREDEFINEDSAMPLEID (sizeof(PredefinedSampleID)/sizeof(char *))
+
+//Forward declaration of some internal functions
+static void* AllocChunk(cmsIT8* it8, cmsUInt32Number size);
+
+// Checks whatever c is a separator
+static
+cmsBool isseparator(int c)
+{
+    return (c == ' ') || (c == '\t') ; 
+}
+
+// Checks whatever c is a valid identifier char
+static
+cmsBool ismiddle(int c)
+{
+   return (!isseparator(c) && (c != '#') && (c !='\"') && (c != '\'') && (c > 32) && (c < 127));
+}
+
+// Checks whatsever c is a valid identifier middle char.
+static
+cmsBool isidchar(int c)
+{
+   return isalnum(c) || ismiddle(c);
+}
+
+// Checks whatsever c is a valid identifier first char.
+static
+cmsBool isfirstidchar(int c)
+{
+     return !isdigit(c) && ismiddle(c);
+}
+
+// Guess whether the supplied path looks like an absolute path
+static
+cmsBool isabsolutepath(const char *path)
+{
+    char ThreeChars[4];
+
+    if(path == NULL)
+        return FALSE;
+    if (path[0] == 0)
+        return FALSE;
+
+    strncpy(ThreeChars, path, 3);
+    ThreeChars[3] = 0;
+
+    if(ThreeChars[0] == DIR_CHAR)
+        return TRUE;
+
+#ifdef  CMS_IS_WINDOWS_
+    if (isalpha((int) ThreeChars[0]) && ThreeChars[1] == ':')
+        return TRUE;
+#endif
+    return FALSE;
+}
+
+
+// Makes a file path based on a given reference path
+// NOTE: this function doesn't check if the path exists or even if it's legal
+static
+cmsBool BuildAbsolutePath(const char *relPath, const char *basePath, char *buffer, cmsUInt32Number MaxLen)
+{
+    char *tail;
+    cmsUInt32Number len;
+
+    // Already absolute?
+    if (isabsolutepath(relPath)) {
+
+        strncpy(buffer, relPath, MaxLen);
+        buffer[MaxLen-1] = 0;
+        return TRUE;
+    }
+
+    // No, search for last
+    strncpy(buffer, basePath, MaxLen);
+    buffer[MaxLen-1] = 0;
+
+    tail = strrchr(buffer, DIR_CHAR);
+    if (tail == NULL) return FALSE;    // Is not absolute and has no separators??
+
+    len = (cmsUInt32Number) (tail - buffer);
+    if (len >= MaxLen) return FALSE;
+
+    // No need to assure zero terminator over here
+    strncpy(tail + 1, relPath, MaxLen - len);
+
+    return TRUE;
+}
+
+
+// Make sure no exploit is being even tried
+static
+const char* NoMeta(const char* str)
+{
+    if (strchr(str, '%') != NULL)
+        return "**** CORRUPTED FORMAT STRING ***";
+
+    return str;
+}
+
+// Syntax error
+static
+cmsBool SynError(cmsIT8* it8, const char *Txt, ...)
+{
+    char Buffer[256], ErrMsg[1024];
+    va_list args;
+
+    va_start(args, Txt);
+    vsnprintf(Buffer, 255, Txt, args);
+    Buffer[255] = 0;
+    va_end(args);
+
+    snprintf(ErrMsg, 1023, "%s: Line %d, %s", it8->FileStack[it8 ->IncludeSP]->FileName, it8->lineno, Buffer);
+    ErrMsg[1023] = 0;
+    it8->sy = SSYNERROR;
+    cmsSignalError(it8 ->ContextID, cmsERROR_CORRUPTION_DETECTED, "%s", ErrMsg);
+    return FALSE;
+}
+
+// Check if current symbol is same as specified. issue an error else.
+static
+cmsBool Check(cmsIT8* it8, SYMBOL sy, const char* Err)
+{
+        if (it8 -> sy != sy)
+                return SynError(it8, NoMeta(Err));
+        return TRUE;
+}
+
+// Read Next character from stream
+static
+void NextCh(cmsIT8* it8)
+{
+    if (it8 -> FileStack[it8 ->IncludeSP]->Stream) {
+
+        it8 ->ch = fgetc(it8 ->FileStack[it8 ->IncludeSP]->Stream);
+
+        if (feof(it8 -> FileStack[it8 ->IncludeSP]->Stream))  {
+
+            if (it8 ->IncludeSP > 0) {
+
+                fclose(it8 ->FileStack[it8->IncludeSP--]->Stream);
+                it8 -> ch = ' ';                            // Whitespace to be ignored
+
+            } else
+                it8 ->ch = 0;   // EOF
+        }
+    }
+    else {
+        it8->ch = *it8->Source;
+        if (it8->ch) it8->Source++;
+    }
+}
+
+
+// Try to see if current identifier is a keyword, if so return the referred symbol
+static
+SYMBOL BinSrchKey(const char *id)
+{
+    int l = 1;
+    int r = NUMKEYS;
+    int x, res;
+
+    while (r >= l)
+    {
+        x = (l+r)/2;
+        res = cmsstrcasecmp(id, TabKeys[x-1].id);
+        if (res == 0) return TabKeys[x-1].sy;
+        if (res < 0) r = x - 1;
+        else l = x + 1;
+    }
+
+    return SUNDEFINED;
+}
+
+
+// 10 ^n
+static
+cmsFloat64Number xpow10(int n)
+{
+    return pow(10, (cmsFloat64Number) n);
+}
+
+
+//  Reads a Real number, tries to follow from integer number
+static
+void ReadReal(cmsIT8* it8, cmsInt32Number inum)
+{
+    it8->dnum = (cmsFloat64Number)inum;
+
+    while (isdigit(it8->ch)) {
+
+        it8->dnum = (cmsFloat64Number)it8->dnum * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+        NextCh(it8);
+    }
+
+    if (it8->ch == '.') {        // Decimal point
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        NextCh(it8);               // Eats dec. point
+
+        while (isdigit(it8->ch)) {
+
+            frac = frac * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+            prec++;
+            NextCh(it8);
+        }
+
+        it8->dnum = it8->dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (toupper(it8->ch) == 'E') {
+
+        cmsInt32Number e;
+        cmsInt32Number sgn;
+
+        NextCh(it8); sgn = 1;
+
+        if (it8->ch == '-') {
+
+            sgn = -1; NextCh(it8);
+        }
+        else
+            if (it8->ch == '+') {
+
+                sgn = +1;
+                NextCh(it8);
+            }
+
+        e = 0;
+        while (isdigit(it8->ch)) {
+
+            cmsInt32Number digit = (it8->ch - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + (cmsFloat64Number)digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            NextCh(it8);
+        }
+
+        e = sgn*e;
+        it8->dnum = it8->dnum * xpow10(e);
+    }
+}
+
+// Parses a float number
+// This can not call directly atof because it uses locale dependent
+// parsing, while CCMX files always use . as decimal separator
+static
+cmsFloat64Number ParseFloatNumber(const char *Buffer)
+{
+    cmsFloat64Number dnum = 0.0;
+    int sign = 1;
+
+    // keep safe
+    if (Buffer == NULL) return 0.0;
+
+    if (*Buffer == '-' || *Buffer == '+') {
+
+        sign = (*Buffer == '-') ? -1 : 1;
+        Buffer++;
+    }
+
+
+    while (*Buffer && isdigit((int)*Buffer)) {
+
+        dnum = dnum * 10.0 + (*Buffer - '0');
+        if (*Buffer) Buffer++;
+    }
+
+    if (*Buffer == '.') {
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        if (*Buffer) Buffer++;
+
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            frac = frac * 10.0 + (*Buffer - '0');
+            prec++;
+            if (*Buffer) Buffer++;
+        }
+
+        dnum = dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (*Buffer && toupper(*Buffer) == 'E') {
+
+        int e;
+        int sgn;
+
+        if (*Buffer) Buffer++;
+        sgn = 1;
+
+        if (*Buffer == '-') {
+
+            sgn = -1;
+            if (*Buffer) Buffer++;
+        }
+        else
+            if (*Buffer == '+') {
+
+                sgn = +1;
+                if (*Buffer) Buffer++;
+            }
+
+        e = 0;
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            cmsInt32Number digit = (*Buffer - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            if (*Buffer) Buffer++;
+        }
+
+        e = sgn*e;
+        dnum = dnum * xpow10(e);
+    }
+
+    return sign * dnum;
+}
+
+
+// Reads next symbol
+static
+void InSymbol(cmsIT8* it8)
+{
+    CMSREGISTER char *idptr;
+    CMSREGISTER int k;
+    SYMBOL key;
+    int sng;
+    
+    do {
+
+        while (isseparator(it8->ch))
+            NextCh(it8);
+
+        if (isfirstidchar(it8->ch)) {          // Identifier
+
+            k = 0;
+            idptr = it8->id;
+
+            do {
+
+                if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                NextCh(it8);
+
+            } while (isidchar(it8->ch));
+
+            *idptr = '\0';
+
+
+            key = BinSrchKey(it8->id);
+            if (key == SUNDEFINED) it8->sy = SIDENT;
+            else it8->sy = key;
+
+        }
+        else                         // Is a number?
+            if (isdigit(it8->ch) || it8->ch == '.' || it8->ch == '-' || it8->ch == '+')
+            {
+                int sign = 1;
+
+                if (it8->ch == '-') {
+                    sign = -1;
+                    NextCh(it8);
+                }
+
+                it8->inum = 0;
+                it8->sy   = SINUM;
+
+                if (it8->ch == '0') {          // 0xnnnn (Hexa) or 0bnnnn (Binary)
+
+                    NextCh(it8);
+                    if (toupper(it8->ch) == 'X') {
+
+                        int j;
+
+                        NextCh(it8);
+                        while (isxdigit(it8->ch))
+                        {
+                            it8->ch = toupper(it8->ch);
+                            if (it8->ch >= 'A' && it8->ch <= 'F')  j = it8->ch -'A'+10;
+                            else j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 16.0 + (cmsFloat64Number) j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid hexadecimal number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 16 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+
+                    if (toupper(it8->ch) == 'B') {  // Binary
+
+                        int j;
+
+                        NextCh(it8);
+                        while (it8->ch == '0' || it8->ch == '1')
+                        {
+                            j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 2.0 + j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid binary number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 2 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+                }
+
+
+                while (isdigit(it8->ch)) {
+
+                    cmsInt32Number digit = (it8->ch - '0');
+
+                    if ((cmsFloat64Number) it8->inum * 10.0 + (cmsFloat64Number) digit > (cmsFloat64Number) +2147483647.0) {
+                        ReadReal(it8, it8->inum);
+                        it8->sy = SDNUM;
+                        it8->dnum *= sign;
+                        return;
+                    }
+
+                    it8->inum = it8->inum * 10 + digit;
+                    NextCh(it8);
+                }
+
+                if (it8->ch == '.') {
+
+                    ReadReal(it8, it8->inum);
+                    it8->sy = SDNUM;
+                    it8->dnum *= sign;
+                    return;
+                }
+
+                it8 -> inum *= sign;
+
+                // Special case. Numbers followed by letters are taken as identifiers
+
+                if (isidchar(it8 ->ch)) {
+
+                    if (it8 ->sy == SINUM) {
+
+                        snprintf(it8->id, 127, "%d", it8->inum);
+                    }
+                    else {
+
+                        snprintf(it8->id, 127, it8 ->DoubleFormatter, it8->dnum);
+                    }
+
+                    k = (int) strlen(it8 ->id);
+                    idptr = it8 ->id + k;
+                    do {
+
+                        if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                        NextCh(it8);
+
+                    } while (isidchar(it8->ch));
+
+                    *idptr = '\0';
+                    it8->sy = SIDENT;
+                }
+                return;
+
+            }
+            else
+                switch ((int) it8->ch) {
+
+        // EOF marker -- ignore it
+        case '\x1a':
+            NextCh(it8);
+            break;
+
+        // Eof stream markers
+        case 0:
+        case -1:
+            it8->sy = SEOF;
+            break;
+
+
+        // Next line
+        case '\r':
+            NextCh(it8);
+            if (it8 ->ch == '\n') 
+                NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        case '\n':
+            NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        // Comment
+        case '#':
+            NextCh(it8);
+            while (it8->ch && it8->ch != '\n' && it8->ch != '\r')
+                NextCh(it8);
+
+            it8->sy = SCOMMENT;
+            break;
+
+        // String.
+        case '\'':
+        case '\"':
+            idptr = it8->str;
+            sng = it8->ch;
+            k = 0;
+            NextCh(it8);
+
+            while (k < (MAXSTR-1) && it8->ch != sng) {
+
+                if (it8->ch == '\n'|| it8->ch == '\r') k = MAXSTR+1;
+                else {
+                    *idptr++ = (char) it8->ch;
+                    NextCh(it8);
+                    k++;
+                }
+            }
+
+            it8->sy = SSTRING;
+            *idptr = '\0';
+            NextCh(it8);
+            break;
+
+
+        default:
+            SynError(it8, "Unrecognized character: 0x%x", it8 ->ch);
+            return;
+            }
+
+    } while (it8->sy == SCOMMENT);
+
+    // Handle the include special token
+
+    if (it8 -> sy == SINCLUDE) {
+
+                FILECTX* FileNest;
+
+                if(it8 -> IncludeSP >= (MAXINCLUDE-1)) {
+
+                    SynError(it8, "Too many recursion levels");
+                    return;
+                }
+
+                InSymbol(it8);
+                if (!Check(it8, SSTRING, "Filename expected")) return;
+
+                FileNest = it8 -> FileStack[it8 -> IncludeSP + 1];
+                if(FileNest == NULL) {
+
+                    FileNest = it8 ->FileStack[it8 -> IncludeSP + 1] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+                    //if(FileNest == NULL)
+                    //  TODO: how to manage out-of-memory conditions?
+                }
+
+                if (BuildAbsolutePath(it8->str,
+                                      it8->FileStack[it8->IncludeSP]->FileName,
+                                      FileNest->FileName, cmsMAX_PATH-1) == FALSE) {
+                    SynError(it8, "File path too long");
+                    return;
+                }
+
+                FileNest->Stream = fopen(FileNest->FileName, "rt");
+                if (FileNest->Stream == NULL) {
+
+                        SynError(it8, "File %s not found", FileNest->FileName);
+                        return;
+                }
+                it8->IncludeSP++;
+
+                it8 ->ch = ' ';
+                InSymbol(it8);
+    }
+
+}
+
+// Checks end of line separator
+static
+cmsBool CheckEOLN(cmsIT8* it8)
+{
+        if (!Check(it8, SEOLN, "Expected separator")) return FALSE;
+        while (it8 -> sy == SEOLN)
+                        InSymbol(it8);
+        return TRUE;
+
+}
+
+// Skip a symbol
+
+static
+void Skip(cmsIT8* it8, SYMBOL sy)
+{
+        if (it8->sy == sy && it8->sy != SEOF)
+                        InSymbol(it8);
+}
+
+
+// Skip multiple EOLN
+static
+void SkipEOLN(cmsIT8* it8)
+{
+    while (it8->sy == SEOLN) {
+             InSymbol(it8);
+    }
+}
+
+
+// Returns a string holding current value
+static
+cmsBool GetVal(cmsIT8* it8, char* Buffer, cmsUInt32Number max, const char* ErrorTitle)
+{
+    switch (it8->sy) {
+
+    case SEOLN:   // Empty value
+                  Buffer[0]=0;
+                  break;
+    case SIDENT:  strncpy(Buffer, it8->id, max);
+                  Buffer[max-1]=0;
+                  break;
+    case SINUM:   snprintf(Buffer, max, "%d", it8 -> inum); break;
+    case SDNUM:   snprintf(Buffer, max, it8->DoubleFormatter, it8 -> dnum); break;
+    case SSTRING: strncpy(Buffer, it8->str, max);
+                  Buffer[max-1] = 0;
+                  break;
+
+
+    default:
+         return SynError(it8, "%s", ErrorTitle);
+    }
+
+    Buffer[max] = 0;
+    return TRUE;
+}
+
+// ---------------------------------------------------------- Table
+
+static
+TABLE* GetTable(cmsIT8* it8)
+{
+   if ((it8 -> nTable >= it8 ->TablesCount)) {
+
+           SynError(it8, "Table %d out of sequence", it8 -> nTable);
+           return it8 -> Tab;
+   }
+
+   return it8 ->Tab + it8 ->nTable;
+}
+
+// ---------------------------------------------------------- Memory management
+
+
+// Frees an allocator and owned memory
+void CMSEXPORT cmsIT8Free(cmsHANDLE hIT8)
+{
+   cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (it8 == NULL)
+        return;
+
+    if (it8->MemorySink) {
+
+        OWNEDMEM* p;
+        OWNEDMEM* n;
+
+        for (p = it8->MemorySink; p != NULL; p = n) {
+
+            n = p->Next;
+            if (p->Ptr) _cmsFree(it8 ->ContextID, p->Ptr);
+            _cmsFree(it8 ->ContextID, p);
+        }
+    }
+
+    if (it8->MemoryBlock)
+        _cmsFree(it8 ->ContextID, it8->MemoryBlock);
+
+    _cmsFree(it8 ->ContextID, it8);
+}
+
+
+// Allocates a chunk of data, keep linked list
+static
+void* AllocBigBlock(cmsIT8* it8, cmsUInt32Number size)
+{
+    OWNEDMEM* ptr1;
+    void* ptr = _cmsMallocZero(it8->ContextID, size);
+
+    if (ptr != NULL) {
+
+        ptr1 = (OWNEDMEM*) _cmsMallocZero(it8 ->ContextID, sizeof(OWNEDMEM));
+
+        if (ptr1 == NULL) {
+
+            _cmsFree(it8 ->ContextID, ptr);
+            return NULL;
+        }
+
+        ptr1-> Ptr        = ptr;
+        ptr1-> Next       = it8 -> MemorySink;
+        it8 -> MemorySink = ptr1;
+    }
+
+    return ptr;
+}
+
+
+// Suballocator.
+static
+void* AllocChunk(cmsIT8* it8, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = it8 ->Allocator.BlockSize - it8 ->Allocator.Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    if (size > Free) {
+
+        if (it8 -> Allocator.BlockSize == 0)
+
+                it8 -> Allocator.BlockSize = 20*1024;
+        else
+                it8 ->Allocator.BlockSize *= 2;
+
+        if (it8 ->Allocator.BlockSize < size)
+                it8 ->Allocator.BlockSize = size;
+
+        it8 ->Allocator.Used = 0;
+        it8 ->Allocator.Block = (cmsUInt8Number*)  AllocBigBlock(it8, it8 ->Allocator.BlockSize);
+    }
+
+    ptr = it8 ->Allocator.Block + it8 ->Allocator.Used;
+    it8 ->Allocator.Used += size;
+
+    return (void*) ptr;
+
+}
+
+
+// Allocates a string
+static
+char *AllocString(cmsIT8* it8, const char* str)
+{
+    cmsUInt32Number Size = (cmsUInt32Number) strlen(str)+1;
+    char *ptr;
+
+
+    ptr = (char *) AllocChunk(it8, Size);
+    if (ptr) strncpy (ptr, str, Size-1);
+
+    return ptr;
+}
+
+// Searches through linked list
+
+static
+cmsBool IsAvailableOnList(KEYVALUE* p, const char* Key, const char* Subkey, KEYVALUE** LastPtr)
+{
+    if (LastPtr) *LastPtr = p;
+
+    for (;  p != NULL; p = p->Next) {
+
+        if (LastPtr) *LastPtr = p;
+
+        if (*Key != '#') { // Comments are ignored
+
+            if (cmsstrcasecmp(Key, p->Keyword) == 0)
+                break;
+        }
+    }
+
+    if (p == NULL)
+        return FALSE;
+
+    if (Subkey == 0)
+        return TRUE;
+
+    for (; p != NULL; p = p->NextSubkey) {
+
+        if (p ->Subkey == NULL) continue;
+
+        if (LastPtr) *LastPtr = p;
+
+        if (cmsstrcasecmp(Subkey, p->Subkey) == 0)
+            return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+
+// Add a property into a linked list
+static
+KEYVALUE* AddToList(cmsIT8* it8, KEYVALUE** Head, const char *Key, const char *Subkey, const char* xValue, WRITEMODE WriteAs)
+{
+    KEYVALUE* p;
+    KEYVALUE* last;
+
+
+    // Check if property is already in list
+
+    if (IsAvailableOnList(*Head, Key, Subkey, &p)) {
+
+        // This may work for editing properties
+
+        //     return SynError(it8, "duplicate key <%s>", Key);
+    }
+    else {
+
+        last = p;
+
+        // Allocate the container
+        p = (KEYVALUE*) AllocChunk(it8, sizeof(KEYVALUE));
+        if (p == NULL)
+        {
+            SynError(it8, "AddToList: out of memory");
+            return NULL;
+        }
+
+        // Store name and value
+        p->Keyword = AllocString(it8, Key);
+        p->Subkey = (Subkey == NULL) ? NULL : AllocString(it8, Subkey);
+
+        // Keep the container in our list
+        if (*Head == NULL) {
+            *Head = p;
+        }
+        else
+        {
+            if (Subkey != NULL && last != NULL) {
+
+                last->NextSubkey = p;
+
+                // If Subkey is not null, then last is the last property with the same key,
+                // but not necessarily is the last property in the list, so we need to move
+                // to the actual list end
+                while (last->Next != NULL)
+                         last = last->Next;
+            }
+
+            if (last != NULL) last->Next = p;
+        }
+
+        p->Next    = NULL;
+        p->NextSubkey = NULL;
+    }
+
+    p->WriteAs = WriteAs;
+
+    if (xValue != NULL) {
+
+        p->Value   = AllocString(it8, xValue);
+    }
+    else {
+        p->Value   = NULL;
+    }
+
+    return p;
+}
+
+static
+KEYVALUE* AddAvailableProperty(cmsIT8* it8, const char* Key, WRITEMODE as)
+{
+    return AddToList(it8, &it8->ValidKeywords, Key, NULL, NULL, as);
+}
+
+
+static
+KEYVALUE* AddAvailableSampleID(cmsIT8* it8, const char* Key)
+{
+    return AddToList(it8, &it8->ValidSampleID, Key, NULL, NULL, WRITE_UNCOOKED);
+}
+
+
+static
+void AllocTable(cmsIT8* it8)
+{
+    TABLE* t;
+
+    t = it8 ->Tab + it8 ->TablesCount;
+
+    t->HeaderList = NULL;
+    t->DataFormat = NULL;
+    t->Data       = NULL;
+
+    it8 ->TablesCount++;
+}
+
+
+cmsInt32Number CMSEXPORT cmsIT8SetTable(cmsHANDLE  IT8, cmsUInt32Number nTable)
+{
+     cmsIT8* it8 = (cmsIT8*) IT8;
+
+     if (nTable >= it8 ->TablesCount) {
+
+         if (nTable == it8 ->TablesCount) {
+
+             AllocTable(it8);
+         }
+         else {
+             SynError(it8, "Table %d is out of sequence", nTable);
+             return -1;
+         }
+     }
+
+     it8 ->nTable = nTable;
+
+     return (cmsInt32Number) nTable;
+}
+
+
+
+// Init an empty container
+cmsHANDLE  CMSEXPORT cmsIT8Alloc(cmsContext ContextID)
+{
+    cmsIT8* it8;
+    cmsUInt32Number i;
+
+    it8 = (cmsIT8*) _cmsMallocZero(ContextID, sizeof(cmsIT8));
+    if (it8 == NULL) return NULL;
+
+    AllocTable(it8);
+
+    it8->MemoryBlock = NULL;
+    it8->MemorySink  = NULL;
+
+    it8 ->nTable = 0;
+
+    it8->ContextID = ContextID;
+    it8->Allocator.Used = 0;
+    it8->Allocator.Block = NULL;
+    it8->Allocator.BlockSize = 0;
+
+    it8->ValidKeywords = NULL;
+    it8->ValidSampleID = NULL;
+
+    it8 -> sy = SUNDEFINED;
+    it8 -> ch = ' ';
+    it8 -> Source = NULL;
+    it8 -> inum = 0;
+    it8 -> dnum = 0.0;
+
+    it8->FileStack[0] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+    it8->IncludeSP   = 0;
+    it8 -> lineno = 1;
+
+    strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    cmsIT8SetSheetType((cmsHANDLE) it8, "CGATS.17");
+
+    // Initialize predefined properties & data
+
+    for (i=0; i < NUMPREDEFINEDPROPS; i++)
+            AddAvailableProperty(it8, PredefinedProperties[i].id, PredefinedProperties[i].as);
+
+    for (i=0; i < NUMPREDEFINEDSAMPLEID; i++)
+            AddAvailableSampleID(it8, PredefinedSampleID[i]);
+
+
+   return (cmsHANDLE) it8;
+}
+
+
+const char* CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8)
+{
+        return GetTable((cmsIT8*) hIT8)->SheetType;
+}
+
+cmsBool CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type)
+{
+        TABLE* t = GetTable((cmsIT8*) hIT8);
+
+        strncpy(t ->SheetType, Type, MAXSTR-1);
+        t ->SheetType[MAXSTR-1] = 0;
+        return TRUE;
+}
+
+cmsBool CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, "# ", NULL, Val, WRITE_UNCOOKED) != NULL;
+}
+
+// Sets a property
+cmsBool CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* Key, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Val, WRITE_STRINGIFY) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, it8->DoubleFormatter, Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, "%u", Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_HEXADECIMAL) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, SubKey, Buffer, WRITE_PAIR) != NULL;
+}
+
+// Gets a property
+const char* CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* Key)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, NULL, &p))
+    {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp)
+{
+    const char *v = cmsIT8GetProperty(hIT8, cProp);
+
+    if (v == NULL) return 0.0;
+
+    return ParseFloatNumber(v);
+}
+
+const char* CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, SubKey, &p)) {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+// ----------------------------------------------------------------- Datasets
+
+
+static
+void AllocateDataFormat(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> DataFormat) return;    // Already allocated
+
+    t -> nSamples  = (int) cmsIT8GetPropertyDbl(it8, "NUMBER_OF_FIELDS");
+
+    if (t -> nSamples <= 0) {
+
+        SynError(it8, "AllocateDataFormat: Unknown NUMBER_OF_FIELDS");
+        t -> nSamples = 10;
+        }
+
+    t -> DataFormat = (char**) AllocChunk (it8, ((cmsUInt32Number) t->nSamples + 1) * sizeof(char *));
+    if (t->DataFormat == NULL) {
+
+        SynError(it8, "AllocateDataFormat: Unable to allocate dataFormat array");
+    }
+
+}
+
+static
+const char *GetDataFormat(cmsIT8* it8, int n)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t->DataFormat)
+        return t->DataFormat[n];
+
+    return NULL;
+}
+
+static
+cmsBool SetDataFormat(cmsIT8* it8, int n, const char *label)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->DataFormat)
+        AllocateDataFormat(it8);
+
+    if (n > t -> nSamples) {
+        SynError(it8, "More than NUMBER_OF_FIELDS fields.");
+        return FALSE;
+    }
+
+    if (t->DataFormat) {
+        t->DataFormat[n] = AllocString(it8, label);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE  h, int n, const char *Sample)
+{
+    cmsIT8* it8 = (cmsIT8*)h;
+    return SetDataFormat(it8, n, Sample);
+}
+
+static
+void AllocateDataSet(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> Data) return;    // Already allocated
+
+    t-> nSamples   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+    t-> nPatches   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+    if (t -> nSamples < 0 || t->nSamples > 0x7ffe || t->nPatches < 0 || t->nPatches > 0x7ffe)
+    {
+        SynError(it8, "AllocateDataSet: too much data");
+    }
+    else {
+        t->Data = (char**)AllocChunk(it8, ((cmsUInt32Number)t->nSamples + 1) * ((cmsUInt32Number)t->nPatches + 1) * sizeof(char*));
+        if (t->Data == NULL) {
+
+            SynError(it8, "AllocateDataSet: Unable to allocate data array");
+        }
+    }
+
+}
+
+static
+char* GetData(cmsIT8* it8, int nSet, int nField)
+{
+    TABLE* t = GetTable(it8);
+    int nSamples    = t -> nSamples;
+    int nPatches    = t -> nPatches;
+
+    if (nSet >= nPatches || nField >= nSamples)
+        return NULL;
+
+    if (!t->Data) return NULL;
+    return t->Data [nSet * nSamples + nField];
+}
+
+static
+cmsBool SetData(cmsIT8* it8, int nSet, int nField, const char *Val)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    if (!t->Data) return FALSE;
+
+    if (nSet > t -> nPatches || nSet < 0) {
+
+            return SynError(it8, "Patch %d out of range, there are %d patches", nSet, t -> nPatches);
+    }
+
+    if (nField > t ->nSamples || nField < 0) {
+            return SynError(it8, "Sample %d out of range, there are %d samples", nField, t ->nSamples);
+
+    }
+
+    t->Data [nSet * t -> nSamples + nField] = AllocString(it8, Val);
+    return TRUE;
+}
+
+
+// --------------------------------------------------------------- File I/O
+
+
+// Writes a string to file
+static
+void WriteStr(SAVESTREAM* f, const char *str)
+{
+    cmsUInt32Number len;
+
+    if (str == NULL)
+        str = " ";
+
+    // Length to write
+    len = (cmsUInt32Number) strlen(str);
+    f ->Used += len;
+
+
+    if (f ->stream) {   // Should I write it to a file?
+
+        if (fwrite(str, 1, len, f->stream) != len) {
+            cmsSignalError(0, cmsERROR_WRITE, "Write to file error in CGATS parser");
+            return;
+        }
+
+    }
+    else {  // Or to a memory block?
+
+        if (f ->Base) {   // Am I just counting the bytes?
+
+            if (f ->Used > f ->Max) {
+
+                 cmsSignalError(0, cmsERROR_WRITE, "Write to memory overflows in CGATS parser");
+                 return;
+            }
+
+            memmove(f ->Ptr, str, len);
+            f->Ptr += len;
+        }
+
+    }
+}
+
+
+// Write formatted
+
+static
+void Writef(SAVESTREAM* f, const char* frm, ...)
+{
+    char Buffer[4096];
+    va_list args;
+
+    va_start(args, frm);
+    vsnprintf(Buffer, 4095, frm, args);
+    Buffer[4095] = 0;
+    WriteStr(f, Buffer);
+    va_end(args);
+
+}
+
+// Writes full header
+static
+void WriteHeader(cmsIT8* it8, SAVESTREAM* fp)
+{
+    KEYVALUE* p;
+    TABLE* t = GetTable(it8);
+
+    // Writes the type
+    WriteStr(fp, t->SheetType);
+    WriteStr(fp, "\n");
+
+    for (p = t->HeaderList; (p != NULL); p = p->Next)
+    {
+        if (*p ->Keyword == '#') {
+
+            char* Pt;
+
+            WriteStr(fp, "#\n# ");
+            for (Pt = p ->Value; *Pt; Pt++) {
+
+
+                Writef(fp, "%c", *Pt);
+
+                if (*Pt == '\n') {
+                    WriteStr(fp, "# ");
+                }
+            }
+
+            WriteStr(fp, "\n#\n");
+            continue;
+        }
+
+
+        if (!IsAvailableOnList(it8-> ValidKeywords, p->Keyword, NULL, NULL)) {
+
+#ifdef CMS_STRICT_CGATS
+            WriteStr(fp, "KEYWORD\t\"");
+            WriteStr(fp, p->Keyword);
+            WriteStr(fp, "\"\n");
+#endif
+
+            AddAvailableProperty(it8, p->Keyword, WRITE_UNCOOKED);
+        }
+
+        WriteStr(fp, p->Keyword);
+        if (p->Value) {
+
+            switch (p ->WriteAs) {
+
+            case WRITE_UNCOOKED:
+                    Writef(fp, "\t%s", p ->Value);
+                    break;
+
+            case WRITE_STRINGIFY:
+                    Writef(fp, "\t\"%s\"", p->Value );
+                    break;
+
+            case WRITE_HEXADECIMAL:
+                    Writef(fp, "\t0x%X", atoi(p ->Value));
+                    break;
+
+            case WRITE_BINARY:
+                    Writef(fp, "\t0x%B", atoi(p ->Value));
+                    break;
+
+            case WRITE_PAIR:
+                    Writef(fp, "\t\"%s,%s\"", p->Subkey, p->Value);
+                    break;
+
+            default: SynError(it8, "Unknown write mode %d", p ->WriteAs);
+                     return;
+            }
+        }
+
+        WriteStr (fp, "\n");
+    }
+
+}
+
+
+// Writes the data format
+static
+void WriteDataFormat(SAVESTREAM* fp, cmsIT8* it8)
+{
+    int i, nSamples;
+    TABLE* t = GetTable(it8);
+
+    if (!t -> DataFormat) return;
+
+       WriteStr(fp, "BEGIN_DATA_FORMAT\n");
+       WriteStr(fp, " ");
+       nSamples = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+
+       for (i = 0; i < nSamples; i++) {
+
+              WriteStr(fp, t->DataFormat[i]);
+              WriteStr(fp, ((i == (nSamples-1)) ? "\n" : "\t"));
+          }
+
+       WriteStr (fp, "END_DATA_FORMAT\n");
+}
+
+
+// Writes data array
+static
+void WriteData(SAVESTREAM* fp, cmsIT8* it8)
+{
+       int  i, j;
+       TABLE* t = GetTable(it8);
+
+       if (!t->Data) return;
+
+       WriteStr (fp, "BEGIN_DATA\n");
+
+       t->nPatches = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+       for (i = 0; i < t-> nPatches; i++) {
+
+              WriteStr(fp, " ");
+
+              for (j = 0; j < t->nSamples; j++) {
+
+                     char *ptr = t->Data[i*t->nSamples+j];
+
+                     if (ptr == NULL) WriteStr(fp, "\"\"");
+                     else {
+                         // If value contains whitespace, enclose within quote
+
+                         if (strchr(ptr, ' ') != NULL) {
+
+                             WriteStr(fp, "\"");
+                             WriteStr(fp, ptr);
+                             WriteStr(fp, "\"");
+                         }
+                         else
+                            WriteStr(fp, ptr);
+                     }
+
+                     WriteStr(fp, ((j == (t->nSamples-1)) ? "\n" : "\t"));
+              }
+       }
+       WriteStr (fp, "END_DATA\n");
+}
+
+
+
+// Saves whole file
+cmsBool CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = fopen(cFileName, "wt");
+    if (!sd.stream) return FALSE;
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+            cmsIT8SetTable(hIT8, i);
+            WriteHeader(it8, &sd);
+            WriteDataFormat(&sd, it8);
+            WriteData(&sd, it8);
+    }
+
+    if (fclose(sd.stream) != 0) return FALSE;
+
+    return TRUE;
+}
+
+
+// Saves to memory
+cmsBool CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = NULL;
+    sd.Base   = (cmsUInt8Number*)  MemPtr;
+    sd.Ptr    = sd.Base;
+
+    sd.Used = 0;
+
+    if (sd.Base)
+        sd.Max  = *BytesNeeded;     // Write to memory?
+    else
+        sd.Max  = 0;                // Just counting the needed bytes
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+        cmsIT8SetTable(hIT8, i);
+        WriteHeader(it8, &sd);
+        WriteDataFormat(&sd, it8);
+        WriteData(&sd, it8);
+    }
+
+    sd.Used++;  // The \0 at the very end
+
+    if (sd.Base)
+        *sd.Ptr = 0;
+
+    *BytesNeeded = sd.Used;
+
+    return TRUE;
+}
+
+
+// -------------------------------------------------------------- Higher level parsing
+
+static
+cmsBool DataFormatSection(cmsIT8* it8)
+{
+    int iField = 0;
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA_FORMAT"
+    CheckEOLN(it8);
+
+    while (it8->sy != SEND_DATA_FORMAT &&
+        it8->sy != SEOLN &&
+        it8->sy != SEOF &&
+        it8->sy != SSYNERROR)  {
+
+            if (it8->sy != SIDENT) {
+
+                return SynError(it8, "Sample type expected");
+            }
+
+            if (!SetDataFormat(it8, iField, it8->id)) return FALSE;
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+       }
+
+       SkipEOLN(it8);
+       Skip(it8, SEND_DATA_FORMAT);
+       SkipEOLN(it8);
+
+       if (iField != t ->nSamples) {
+           SynError(it8, "Count mismatch. NUMBER_OF_FIELDS was %d, found %d\n", t ->nSamples, iField);
+
+
+       }
+
+       return TRUE;
+}
+
+
+
+static
+cmsBool DataSection (cmsIT8* it8)
+{
+    int  iField = 0;
+    int  iSet   = 0;
+    char Buffer[256];
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA"
+    CheckEOLN(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    while (it8->sy != SEND_DATA && it8->sy != SEOF)
+    {
+        if (iField >= t -> nSamples) {
+            iField = 0;
+            iSet++;
+
+        }
+
+        if (it8->sy != SEND_DATA && it8->sy != SEOF) {
+
+            if (!GetVal(it8, Buffer, 255, "Sample data expected"))
+                return FALSE;
+
+            if (!SetData(it8, iSet, iField, Buffer))
+                return FALSE;
+
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+        }
+    }
+
+    SkipEOLN(it8);
+    Skip(it8, SEND_DATA);
+    SkipEOLN(it8);
+
+    // Check for data completion.
+
+    if ((iSet+1) != t -> nPatches)
+        return SynError(it8, "Count mismatch. NUMBER_OF_SETS was %d, found %d\n", t ->nPatches, iSet+1);
+
+    return TRUE;
+}
+
+
+
+
+static
+cmsBool HeaderSection(cmsIT8* it8)
+{
+    char VarName[MAXID];
+    char Buffer[MAXSTR];
+    KEYVALUE* Key;
+
+        while (it8->sy != SEOF &&
+               it8->sy != SSYNERROR &&
+               it8->sy != SBEGIN_DATA_FORMAT &&
+               it8->sy != SBEGIN_DATA) {
+
+
+        switch (it8 -> sy) {
+
+        case SKEYWORD:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableProperty(it8, Buffer, WRITE_UNCOOKED)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SDATA_FORMAT_ID:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableSampleID(it8, Buffer)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SIDENT:
+            strncpy(VarName, it8->id, MAXID - 1);
+            VarName[MAXID - 1] = 0;
+
+            if (!IsAvailableOnList(it8->ValidKeywords, VarName, NULL, &Key)) {
+
+#ifdef CMS_STRICT_CGATS
+                return SynError(it8, "Undefined keyword '%s'", VarName);
+#else
+                Key = AddAvailableProperty(it8, VarName, WRITE_UNCOOKED);
+                if (Key == NULL) return FALSE;
+#endif
+            }
+
+            InSymbol(it8);
+            if (!GetVal(it8, Buffer, MAXSTR - 1, "Property data expected")) return FALSE;
+
+            if (Key->WriteAs != WRITE_PAIR) {
+                AddToList(it8, &GetTable(it8)->HeaderList, VarName, NULL, Buffer,
+                    (it8->sy == SSTRING) ? WRITE_STRINGIFY : WRITE_UNCOOKED);
+            }
+            else {
+                const char *Subkey;
+                char *Nextkey;
+                if (it8->sy != SSTRING)
+                    return SynError(it8, "Invalid value '%s' for property '%s'.", Buffer, VarName);
+
+                // chop the string as a list of "subkey, value" pairs, using ';' as a separator
+                for (Subkey = Buffer; Subkey != NULL; Subkey = Nextkey)
+                {
+                    char *Value, *temp;
+
+                    //  identify token pair boundary
+                    Nextkey = (char*)strchr(Subkey, ';');
+                    if (Nextkey)
+                        *Nextkey++ = '\0';
+
+                    // for each pair, split the subkey and the value
+                    Value = (char*)strrchr(Subkey, ',');
+                    if (Value == NULL)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+
+                    // gobble the spaces before the coma, and the coma itself
+                    temp = Value++;
+                    do *temp-- = '\0'; while (temp >= Subkey && *temp == ' ');
+
+                    // gobble any space at the right
+                    temp = Value + strlen(Value) - 1;
+                    while (*temp == ' ') *temp-- = '\0';
+
+                    // trim the strings from the left
+                    Subkey += strspn(Subkey, " ");
+                    Value += strspn(Value, " ");
+
+                    if (Subkey[0] == 0 || Value[0] == 0)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+                    AddToList(it8, &GetTable(it8)->HeaderList, VarName, Subkey, Value, WRITE_PAIR);
+                }
+            }
+
+            InSymbol(it8);
+            break;
+
+
+        case SEOLN: break;
+
+        default:
+                return SynError(it8, "expected keyword or identifier");
+        }
+
+    SkipEOLN(it8);
+    }
+
+    return TRUE;
+
+}
+
+
+static
+void ReadType(cmsIT8* it8, char* SheetTypePtr)
+{
+    cmsInt32Number cnt = 0;
+
+    // First line is a very special case.
+
+    while (isseparator(it8->ch))
+            NextCh(it8);
+
+    while (it8->ch != '\r' && it8 ->ch != '\n' && it8->ch != '\t' && it8 -> ch != 0) {
+
+        if (cnt++ < MAXSTR) 
+            *SheetTypePtr++= (char) it8 ->ch;
+        NextCh(it8);
+    }
+
+    *SheetTypePtr = 0;
+}
+
+
+static
+cmsBool ParseIT8(cmsIT8* it8, cmsBool nosheet)
+{
+    char* SheetTypePtr = it8 ->Tab[0].SheetType;
+
+    if (nosheet == 0) {
+        ReadType(it8, SheetTypePtr);
+    }
+
+    InSymbol(it8);
+
+    SkipEOLN(it8);
+
+    while (it8-> sy != SEOF &&
+           it8-> sy != SSYNERROR) {
+
+            switch (it8 -> sy) {
+
+            case SBEGIN_DATA_FORMAT:
+                    if (!DataFormatSection(it8)) return FALSE;
+                    break;
+
+            case SBEGIN_DATA:
+
+                    if (!DataSection(it8)) return FALSE;
+
+                    if (it8 -> sy != SEOF) {
+
+                            AllocTable(it8);
+                            it8 ->nTable = it8 ->TablesCount - 1;
+
+                            // Read sheet type if present. We only support identifier and string.
+                            // <ident> <eoln> is a type string
+                            // anything else, is not a type string
+                            if (nosheet == 0) {
+
+                                if (it8 ->sy == SIDENT) {
+
+                                    // May be a type sheet or may be a prop value statement. We cannot use insymbol in
+                                    // this special case...
+                                     while (isseparator(it8->ch))
+                                         NextCh(it8);
+
+                                     // If a newline is found, then this is a type string
+                                    if (it8 ->ch == '\n' || it8->ch == '\r') {
+
+                                         cmsIT8SetSheetType(it8, it8 ->id);
+                                         InSymbol(it8);
+                                    }
+                                    else
+                                    {
+                                        // It is not. Just continue
+                                        cmsIT8SetSheetType(it8, "");
+                                    }
+                                }
+                                else
+                                    // Validate quoted strings
+                                    if (it8 ->sy == SSTRING) {
+                                        cmsIT8SetSheetType(it8, it8 ->str);
+                                        InSymbol(it8);
+                                    }
+                           }
+
+                    }
+                    break;
+
+            case SEOLN:
+                    SkipEOLN(it8);
+                    break;
+
+            default:
+                    if (!HeaderSection(it8)) return FALSE;
+           }
+
+    }
+
+    return (it8 -> sy != SSYNERROR);
+}
+
+
+
+// Init useful pointers
+
+static
+void CookPointers(cmsIT8* it8)
+{
+    int idField, i;
+    char* Fld;
+    cmsUInt32Number j;
+    cmsUInt32Number nOldTable = it8 ->nTable;
+
+    for (j=0; j < it8 ->TablesCount; j++) {
+
+    TABLE* t = it8 ->Tab + j;
+
+    t -> SampleID = 0;
+    it8 ->nTable = j;
+
+    for (idField = 0; idField < t -> nSamples; idField++)
+    {
+        if (t ->DataFormat == NULL){
+            SynError(it8, "Undefined DATA_FORMAT");
+            return;
+        }
+
+        Fld = t->DataFormat[idField];
+        if (!Fld) continue;
+
+
+        if (cmsstrcasecmp(Fld, "SAMPLE_ID") == 0) {
+
+            t -> SampleID = idField;
+
+            for (i=0; i < t -> nPatches; i++) {
+
+                char *Data = GetData(it8, i, idField);
+                if (Data) {
+                    char Buffer[256];
+
+                    strncpy(Buffer, Data, 255);
+                    Buffer[255] = 0;
+
+                    if (strlen(Buffer) <= strlen(Data))
+                        strcpy(Data, Buffer);
+                    else
+                        SetData(it8, i, idField, Buffer);
+
+                }
+            }
+
+        }
+
+        // "LABEL" is an extension. It keeps references to forward tables
+
+        if ((cmsstrcasecmp(Fld, "LABEL") == 0) || Fld[0] == '$' ) {
+
+                    // Search for table references...
+                    for (i=0; i < t -> nPatches; i++) {
+
+                            char *Label = GetData(it8, i, idField);
+
+                            if (Label) {
+
+                                cmsUInt32Number k;
+
+                                // This is the label, search for a table containing
+                                // this property
+
+                                for (k=0; k < it8 ->TablesCount; k++) {
+
+                                    TABLE* Table = it8 ->Tab + k;
+                                    KEYVALUE* p;
+
+                                    if (IsAvailableOnList(Table->HeaderList, Label, NULL, &p)) {
+
+                                        // Available, keep type and table
+                                        char Buffer[256];
+
+                                        char *Type  = p ->Value;
+                                        int  nTable = (int) k;
+
+                                        snprintf(Buffer, 255, "%s %d %s", Label, nTable, Type );
+
+                                        SetData(it8, i, idField, Buffer);
+                                    }
+                                }
+
+
+                            }
+
+                    }
+
+
+        }
+
+    }
+    }
+
+    it8 ->nTable = nOldTable;
+}
+
+// Try to infere if the file is a CGATS/IT8 file at all. Read first line
+// that should be something like some printable characters plus a \n
+// returns 0 if this is not like a CGATS, or an integer otherwise. This integer is the number of words in first line?
+static
+int IsMyBlock(const cmsUInt8Number* Buffer, cmsUInt32Number n)
+{
+    int words = 1, space = 0, quot = 0;
+    cmsUInt32Number i;
+
+    if (n < 10) return 0;   // Too small
+
+    if (n > 132)
+        n = 132;
+
+    for (i = 1; i < n; i++) {
+
+        switch(Buffer[i])
+        {
+        case '\n':
+        case '\r':
+            return ((quot == 1) || (words > 2)) ? 0 : words;
+        case '\t':
+        case ' ':
+            if(!quot && !space)
+                space = 1;
+            break;
+        case '\"':
+            quot = !quot;
+            break;
+        default:
+            if (Buffer[i] < 32) return 0;
+            if (Buffer[i] > 127) return 0;
+            words += space;
+            space = 0;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+static
+cmsBool IsMyFile(const char* FileName)
+{
+   FILE *fp;
+   cmsUInt32Number Size;
+   cmsUInt8Number Ptr[133];
+
+   fp = fopen(FileName, "rt");
+   if (!fp) {
+       cmsSignalError(0, cmsERROR_FILE, "File '%s' not found", FileName);
+       return FALSE;
+   }
+
+   Size = (cmsUInt32Number) fread(Ptr, 1, 132, fp);
+
+   if (fclose(fp) != 0)
+       return FALSE;
+
+   Ptr[Size] = '\0';
+
+   return IsMyBlock(Ptr, Size);
+}
+
+// ---------------------------------------------------------- Exported routines
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len)
+{
+    cmsHANDLE hIT8;
+    cmsIT8*  it8;
+    int type;
+
+    _cmsAssert(Ptr != NULL);
+    _cmsAssert(len != 0);
+
+    type = IsMyBlock((const cmsUInt8Number*)Ptr, len);
+    if (type == 0) return NULL;
+
+    hIT8 = cmsIT8Alloc(ContextID);
+    if (!hIT8) return NULL;
+
+    it8 = (cmsIT8*) hIT8;
+    it8 ->MemoryBlock = (char*) _cmsMalloc(ContextID, len + 1);
+    if (it8->MemoryBlock == NULL)
+    {
+        cmsIT8Free(hIT8);
+        return FALSE;
+    }
+
+    strncpy(it8 ->MemoryBlock, (const char*) Ptr, len);
+    it8 ->MemoryBlock[len] = 0;
+
+    strncpy(it8->FileStack[0]->FileName, "", cmsMAX_PATH-1);
+    it8-> Source = it8 -> MemoryBlock;
+
+    if (!ParseIT8(it8, type-1)) {
+
+        cmsIT8Free(hIT8);
+        return FALSE;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    _cmsFree(ContextID, it8->MemoryBlock);
+    it8 -> MemoryBlock = NULL;
+
+    return hIT8;
+
+
+}
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName)
+{
+
+     cmsHANDLE hIT8;
+     cmsIT8*  it8;
+     int type;
+
+     _cmsAssert(cFileName != NULL);
+
+     type = IsMyFile(cFileName);
+     if (type == 0) return NULL;
+
+     hIT8 = cmsIT8Alloc(ContextID);
+     it8 = (cmsIT8*) hIT8;
+     if (!hIT8) return NULL;
+
+
+     it8 ->FileStack[0]->Stream = fopen(cFileName, "rt");
+
+     if (!it8 ->FileStack[0]->Stream) {
+         cmsIT8Free(hIT8);
+         return NULL;
+     }
+
+
+    strncpy(it8->FileStack[0]->FileName, cFileName, cmsMAX_PATH-1);
+    it8->FileStack[0]->FileName[cmsMAX_PATH-1] = 0;
+
+    if (!ParseIT8(it8, type-1)) {
+
+            fclose(it8 ->FileStack[0]->Stream);
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    if (fclose(it8 ->FileStack[0]->Stream)!= 0) {
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    return hIT8;
+
+}
+
+int CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    if (SampleNames)
+        *SampleNames = t -> DataFormat;
+    return t -> nSamples;
+}
+
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+    cmsUInt32Number n;
+    char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        n++;
+    }
+
+
+    Props = (char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        Props[n++] = p -> Keyword;
+    }
+
+    *PropertyNames = Props;
+    return n;
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE *p, *tmp;
+    cmsUInt32Number n;
+    const char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+
+    t = GetTable(it8);
+
+    if(!IsAvailableOnList(t->HeaderList, cProp, NULL, &p)) {
+        *SubpropertyNames = 0;
+        return 0;
+    }
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            n++;
+    }
+
+
+    Props = (const char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            Props[n++] = p ->Subkey;
+    }
+
+    *SubpropertyNames = Props;
+    return n;
+}
+
+static
+int LocatePatch(cmsIT8* it8, const char* cPatch)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data != NULL) {
+
+                if (cmsstrcasecmp(data, cPatch) == 0)
+                        return i;
+                }
+        }
+
+        // SynError(it8, "Couldn't find patch '%s'\n", cPatch);
+        return -1;
+}
+
+
+static
+int LocateEmptyPatch(cmsIT8* it8)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data == NULL)
+            return i;
+
+    }
+
+    return -1;
+}
+
+static
+int LocateSample(cmsIT8* it8, const char* cSample)
+{
+    int i;
+    const char *fld;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t->nSamples; i++) {
+
+        fld = GetDataFormat(it8, i);
+        if (fld != NULL) {
+            if (cmsstrcasecmp(fld, cSample) == 0)
+                return i;
+        }
+    }
+
+    return -1;
+
+}
+
+
+int CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return LocateSample(it8, cSample);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return GetData(it8, row, col);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetDataRowCol(hIT8, row, col);
+
+    if (Buffer == NULL) return 0.0;
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return SetData(it8, row, col, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+
+    return SetData(it8, row, col, Buff);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+
+    _cmsAssert(hIT8 != NULL);
+
+    iField = LocateSample(it8, cSample);
+    if (iField < 0) {
+        return NULL;
+    }
+
+    iSet = LocatePatch(it8, cPatch);
+    if (iSet < 0) {
+            return NULL;
+    }
+
+    return GetData(it8, iSet, iField);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE  it8, const char* cPatch, const char* cSample)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetData(it8, cPatch, cSample);
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+
+cmsBool CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    iField = LocateSample(it8, cSample);
+
+    if (iField < 0)
+        return FALSE;
+
+    if (t-> nPatches == 0) {
+
+        AllocateDataFormat(it8);
+        AllocateDataSet(it8);
+        CookPointers(it8);
+    }
+
+    if (cmsstrcasecmp(cSample, "SAMPLE_ID") == 0) {
+
+        iSet   = LocateEmptyPatch(it8);
+        if (iSet < 0) {
+            return SynError(it8, "Couldn't add more patches '%s'\n", cPatch);
+        }
+
+        iField = t -> SampleID;
+    }
+    else {
+        iSet = LocatePatch(it8, cPatch);
+        if (iSet < 0) {
+            return FALSE;
+        }
+    }
+
+    return SetData(it8, iSet, iField, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                   const char* cSample,
+                                   cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+    return cmsIT8SetData(hIT8, cPatch, cSample, Buff);
+}
+
+// Buffer should get MAXSTR at least
+
+const char* CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+    char* Data;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+    Data = GetData(it8, nPatch, t->SampleID);
+
+    if (!Data) return NULL;
+    if (!buffer) return Data;
+
+    strncpy(buffer, Data, MAXSTR-1);
+    buffer[MAXSTR-1] = 0;
+    return buffer;
+}
+
+int CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch)
+{
+    _cmsAssert(hIT8 != NULL);
+
+    return LocatePatch((cmsIT8*)hIT8, cPatch);
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return it8 ->TablesCount;
+}
+
+// This handles the "LABEL" extension.
+// Label, nTable, Type
+
+int CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType)
+{
+    const char* cLabelFld;
+    char Type[256], Label[256];
+    cmsUInt32Number nTable;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (cField != NULL && *cField == 0)
+            cField = "LABEL";
+
+    if (cField == NULL)
+            cField = "LABEL";
+
+    cLabelFld = cmsIT8GetData(hIT8, cSet, cField);
+    if (!cLabelFld) return -1;
+
+    if (sscanf(cLabelFld, "%255s %u %255s", Label, &nTable, Type) != 3)
+            return -1;
+
+    if (ExpectedType != NULL && *ExpectedType == 0)
+        ExpectedType = NULL;
+
+    if (ExpectedType) {
+
+        if (cmsstrcasecmp(Type, ExpectedType) != 0) return -1;
+    }
+
+    return cmsIT8SetTable(hIT8, nTable);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int pos;
+
+    _cmsAssert(hIT8 != NULL);
+
+    pos = LocateSample(it8, cSample);
+    if(pos == -1)
+        return FALSE;
+
+    it8->Tab[it8->nTable].SampleID = pos;
+    return TRUE;
+}
+
+
+void CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (Formatter == NULL)
+        strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    else
+        strncpy(it8->DoubleFormatter, Formatter, sizeof(it8->DoubleFormatter));
+
+    it8 ->DoubleFormatter[sizeof(it8 ->DoubleFormatter)-1] = 0;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmscnvrt.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmscnvrt.c
new file mode 100644
index 0000000000..706c450212
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmscnvrt.c
@@ -0,0 +1,1162 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Link several profiles to obtain a single LUT modelling the whole color transform. Intents, Black point
+// compensation and Adaptation parameters may vary across profiles. BPC and Adaptation refers to the PCS
+// after the profile. I.e, BPC[0] refers to connexion between profile(0) and profile(1)
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number Intents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the default routine for ICC-style intents. A user may decide to override it by using a plugin.
+// Supported intents are perceptual, relative colorimetric, saturation and ICC-absolute colorimetric
+static
+cmsPipeline* DefaultICCintents(cmsContext     ContextID,
+                               cmsUInt32Number nProfiles,
+                               cmsUInt32Number Intents[],
+                               cmsHPROFILE     hProfiles[],
+                               cmsBool         BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-preserving K-only intents, which are non-ICC. Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number Intents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-plane preserving, which are non-ICC. Again, Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                           cmsUInt32Number nProfiles,
+                                           cmsUInt32Number Intents[],
+                                           cmsHPROFILE     hProfiles[],
+                                           cmsBool         BPC[],
+                                           cmsFloat64Number AdaptationStates[],
+                                           cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+
+// This is a structure holding implementations for all supported intents.
+typedef struct _cms_intents_list {
+
+    cmsUInt32Number Intent;
+    char            Description[256];
+    cmsIntentFn     Link;
+    struct _cms_intents_list*  Next;
+
+} cmsIntentsList;
+
+
+// Built-in intents
+static cmsIntentsList DefaultIntents[] = {
+
+    { INTENT_PERCEPTUAL,                            "Perceptual",                                   DefaultICCintents,            &DefaultIntents[1] },
+    { INTENT_RELATIVE_COLORIMETRIC,                 "Relative colorimetric",                        DefaultICCintents,            &DefaultIntents[2] },
+    { INTENT_SATURATION,                            "Saturation",                                   DefaultICCintents,            &DefaultIntents[3] },
+    { INTENT_ABSOLUTE_COLORIMETRIC,                 "Absolute colorimetric",                        DefaultICCintents,            &DefaultIntents[4] },
+    { INTENT_PRESERVE_K_ONLY_PERCEPTUAL,            "Perceptual preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[5] },
+    { INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC, "Relative colorimetric preserving black ink",   BlackPreservingKOnlyIntents,  &DefaultIntents[6] },
+    { INTENT_PRESERVE_K_ONLY_SATURATION,            "Saturation preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[7] },
+    { INTENT_PRESERVE_K_PLANE_PERCEPTUAL,           "Perceptual preserving black plane",            BlackPreservingKPlaneIntents, &DefaultIntents[8] },
+    { INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC,"Relative colorimetric preserving black plane", BlackPreservingKPlaneIntents, &DefaultIntents[9] },
+    { INTENT_PRESERVE_K_PLANE_SATURATION,           "Saturation preserving black plane",            BlackPreservingKPlaneIntents, NULL }
+};
+
+
+// A pointer to the beginning of the list
+_cmsIntentsPluginChunkType _cmsIntentsPluginChunk = { NULL };
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginIntentsList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsIntentsPluginChunkType newHead = { NULL };
+   cmsIntentsList*  entry;
+   cmsIntentsList*  Anterior = NULL;
+   _cmsIntentsPluginChunkType* head = (_cmsIntentsPluginChunkType*) src->chunks[IntentPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->Intents;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            cmsIntentsList *newEntry = ( cmsIntentsList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsIntentsList));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.Intents == NULL)
+                newHead.Intents = newEntry;
+    }
+
+  ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsIntentsPluginChunkType));
+}
+
+void  _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginIntentsList(ctx, src);
+    }
+    else {
+        static _cmsIntentsPluginChunkType IntentsPluginChunkType = { NULL };
+        ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx ->MemPool, &IntentsPluginChunkType, sizeof(_cmsIntentsPluginChunkType));
+    }
+}
+
+
+// Search the list for a suitable intent. Returns NULL if not found
+static
+cmsIntentsList* SearchIntent(cmsContext ContextID, cmsUInt32Number Intent)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+
+    for (pt = ctx -> Intents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    for (pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    return NULL;
+}
+
+// Black point compensation. Implemented as a linear scaling in XYZ. Black points
+// should come relative to the white point. Fills an matrix/offset element m
+// which is organized as a 4x4 matrix.
+static
+void ComputeBlackPointCompensation(const cmsCIEXYZ* BlackPointIn,
+                                   const cmsCIEXYZ* BlackPointOut,
+                                   cmsMAT3* m, cmsVEC3* off)
+{
+  cmsFloat64Number ax, ay, az, bx, by, bz, tx, ty, tz;
+
+   // Now we need to compute a matrix plus an offset m and of such of
+   // [m]*bpin + off = bpout
+   // [m]*D50  + off = D50
+   //
+   // This is a linear scaling in the form ax+b, where
+   // a = (bpout - D50) / (bpin - D50)
+   // b = - D50* (bpout - bpin) / (bpin - D50)
+
+   tx = BlackPointIn->X - cmsD50_XYZ()->X;
+   ty = BlackPointIn->Y - cmsD50_XYZ()->Y;
+   tz = BlackPointIn->Z - cmsD50_XYZ()->Z;
+
+   ax = (BlackPointOut->X - cmsD50_XYZ()->X) / tx;
+   ay = (BlackPointOut->Y - cmsD50_XYZ()->Y) / ty;
+   az = (BlackPointOut->Z - cmsD50_XYZ()->Z) / tz;
+
+   bx = - cmsD50_XYZ()-> X * (BlackPointOut->X - BlackPointIn->X) / tx;
+   by = - cmsD50_XYZ()-> Y * (BlackPointOut->Y - BlackPointIn->Y) / ty;
+   bz = - cmsD50_XYZ()-> Z * (BlackPointOut->Z - BlackPointIn->Z) / tz;
+
+   _cmsVEC3init(&m ->v[0], ax, 0,  0);
+   _cmsVEC3init(&m ->v[1], 0, ay,  0);
+   _cmsVEC3init(&m ->v[2], 0,  0,  az);
+   _cmsVEC3init(off, bx, by, bz);
+
+}
+
+
+// Approximate a blackbody illuminant based on CHAD information
+static
+cmsFloat64Number CHAD2Temp(const cmsMAT3* Chad)
+{
+    // Convert D50 across inverse CHAD to get the absolute white point
+    cmsVEC3 d, s;
+    cmsCIEXYZ Dest;
+    cmsCIExyY DestChromaticity;
+    cmsFloat64Number TempK;
+    cmsMAT3 m1, m2;
+
+    m1 = *Chad;
+    if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+
+    s.n[VX] = cmsD50_XYZ() -> X;
+    s.n[VY] = cmsD50_XYZ() -> Y;
+    s.n[VZ] = cmsD50_XYZ() -> Z;
+
+    _cmsMAT3eval(&d, &m2, &s);
+
+    Dest.X = d.n[VX];
+    Dest.Y = d.n[VY];
+    Dest.Z = d.n[VZ];
+
+    cmsXYZ2xyY(&DestChromaticity, &Dest);
+
+    if (!cmsTempFromWhitePoint(&TempK, &DestChromaticity))
+        return -1.0;
+
+    return TempK;
+}
+
+// Compute a CHAD based on a given temperature
+static
+    void Temp2CHAD(cmsMAT3* Chad, cmsFloat64Number Temp)
+{
+    cmsCIEXYZ White;
+    cmsCIExyY ChromaticityOfWhite;
+
+    cmsWhitePointFromTemp(&ChromaticityOfWhite, Temp);
+    cmsxyY2XYZ(&White, &ChromaticityOfWhite);
+    _cmsAdaptationMatrix(Chad, NULL, &White, cmsD50_XYZ());
+}
+
+// Join scalings to obtain relative input to absolute and then to relative output.
+// Result is stored in a 3x3 matrix
+static
+cmsBool  ComputeAbsoluteIntent(cmsFloat64Number AdaptationState,
+                               const cmsCIEXYZ* WhitePointIn,
+                               const cmsMAT3* ChromaticAdaptationMatrixIn,
+                               const cmsCIEXYZ* WhitePointOut,
+                               const cmsMAT3* ChromaticAdaptationMatrixOut,
+                               cmsMAT3* m)
+{
+    cmsMAT3 Scale, m1, m2, m3, m4;
+
+    // TODO: Follow Marc Mahy's recommendation to check if CHAD is same by using M1*M2 == M2*M1. If so, do nothing.
+    // TODO: Add support for ArgyllArts tag
+
+    // Adaptation state
+    if (AdaptationState == 1.0) {
+
+        // Observer is fully adapted. Keep chromatic adaptation.
+        // That is the standard V4 behaviour
+        _cmsVEC3init(&m->v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&m->v[1], 0, WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&m->v[2], 0, 0, WhitePointIn->Z / WhitePointOut->Z);
+
+    }
+    else  {
+
+        // Incomplete adaptation. This is an advanced feature.
+        _cmsVEC3init(&Scale.v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&Scale.v[1], 0,  WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&Scale.v[2], 0, 0,  WhitePointIn->Z / WhitePointOut->Z);
+
+
+        if (AdaptationState == 0.0) {
+        
+            m1 = *ChromaticAdaptationMatrixOut;
+            _cmsMAT3per(&m2, &m1, &Scale);
+            // m2 holds CHAD from output white to D50 times abs. col. scaling
+
+            // Observer is not adapted, undo the chromatic adaptation
+            _cmsMAT3per(m, &m2, ChromaticAdaptationMatrixOut);
+
+            m3 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m3, &m4)) return FALSE;
+            _cmsMAT3per(m, &m2, &m4);
+
+        } else {
+
+            cmsMAT3 MixedCHAD;
+            cmsFloat64Number TempSrc, TempDest, Temp;
+
+            m1 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+            _cmsMAT3per(&m3, &m2, &Scale);
+            // m3 holds CHAD from input white to D50 times abs. col. scaling
+
+            TempSrc  = CHAD2Temp(ChromaticAdaptationMatrixIn);
+            TempDest = CHAD2Temp(ChromaticAdaptationMatrixOut);
+
+            if (TempSrc < 0.0 || TempDest < 0.0) return FALSE; // Something went wrong
+
+            if (_cmsMAT3isIdentity(&Scale) && fabs(TempSrc - TempDest) < 0.01) {
+
+                _cmsMAT3identity(m);
+                return TRUE;
+            }
+
+            Temp = (1.0 - AdaptationState) * TempDest + AdaptationState * TempSrc;
+
+            // Get a CHAD from whatever output temperature to D50. This replaces output CHAD
+            Temp2CHAD(&MixedCHAD, Temp);
+
+            _cmsMAT3per(m, &m3, &MixedCHAD);
+        }
+
+    }
+    return TRUE;
+
+}
+
+// Just to see if m matrix should be applied
+static
+cmsBool IsEmptyLayer(cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number diff = 0;
+    cmsMAT3 Ident;
+    int i;
+
+    if (m == NULL && off == NULL) return TRUE;  // NULL is allowed as an empty layer
+    if (m == NULL && off != NULL) return FALSE; // This is an internal error
+
+    _cmsMAT3identity(&Ident);
+
+    for (i=0; i < 3*3; i++)
+        diff += fabs(((cmsFloat64Number*)m)[i] - ((cmsFloat64Number*)&Ident)[i]);
+
+    for (i=0; i < 3; i++)
+        diff += fabs(((cmsFloat64Number*)off)[i]);
+
+
+    return (diff < 0.002);
+}
+
+
+// Compute the conversion layer
+static
+cmsBool ComputeConversion(cmsUInt32Number i, 
+                          cmsHPROFILE hProfiles[],
+                          cmsUInt32Number Intent,
+                          cmsBool BPC,
+                          cmsFloat64Number AdaptationState,
+                          cmsMAT3* m, cmsVEC3* off)
+{
+
+    int k;
+
+    // m  and off are set to identity and this is detected latter on
+    _cmsMAT3identity(m);
+    _cmsVEC3init(off, 0, 0, 0);
+
+    // If intent is abs. colorimetric,
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        cmsCIEXYZ WhitePointIn, WhitePointOut;
+        cmsMAT3 ChromaticAdaptationMatrixIn, ChromaticAdaptationMatrixOut;
+
+        _cmsReadMediaWhitePoint(&WhitePointIn,  hProfiles[i-1]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixIn, hProfiles[i-1]);
+
+        _cmsReadMediaWhitePoint(&WhitePointOut,  hProfiles[i]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixOut, hProfiles[i]);
+
+        if (!ComputeAbsoluteIntent(AdaptationState,
+                                  &WhitePointIn,  &ChromaticAdaptationMatrixIn,
+                                  &WhitePointOut, &ChromaticAdaptationMatrixOut, m)) return FALSE;
+
+    }
+    else {
+        // Rest of intents may apply BPC.
+
+        if (BPC) {
+
+            cmsCIEXYZ BlackPointIn, BlackPointOut;
+
+            cmsDetectBlackPoint(&BlackPointIn,  hProfiles[i-1], Intent, 0);
+            cmsDetectDestinationBlackPoint(&BlackPointOut, hProfiles[i], Intent, 0);
+
+            // If black points are equal, then do nothing
+            if (BlackPointIn.X != BlackPointOut.X ||
+                BlackPointIn.Y != BlackPointOut.Y ||
+                BlackPointIn.Z != BlackPointOut.Z)
+                    ComputeBlackPointCompensation(&BlackPointIn, &BlackPointOut, m, off);
+        }
+    }
+
+    // Offset should be adjusted because the encoding. We encode XYZ normalized to 0..1.0,
+    // to do that, we divide by MAX_ENCODEABLE_XZY. The conversion stage goes XYZ -> XYZ so
+    // we have first to convert from encoded to XYZ and then convert back to encoded.
+    // y = Mx + Off
+    // x = x'c
+    // y = M x'c + Off
+    // y = y'c; y' = y / c
+    // y' = (Mx'c + Off) /c = Mx' + (Off / c)
+
+    for (k=0; k < 3; k++) {
+        off ->n[k] /= MAX_ENCODEABLE_XYZ;
+    }
+
+    return TRUE;
+}
+
+
+// Add a conversion stage if needed. If a matrix/offset m is given, it applies to XYZ space
+static
+cmsBool AddConversion(cmsPipeline* Result, cmsColorSpaceSignature InPCS, cmsColorSpaceSignature OutPCS, cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number* m_as_dbl = (cmsFloat64Number*) m;
+    cmsFloat64Number* off_as_dbl = (cmsFloat64Number*) off;
+
+    // Handle PCS mismatches. A specialized stage is added to the LUT in such case
+    switch (InPCS) {
+
+    case cmsSigXYZData: // Input profile operates in XYZ
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // XYZ -> XYZ
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // XYZ -> Lab
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                return FALSE;
+            break;
+
+        default:
+            return FALSE;   // Colorspace mismatch
+        }
+        break;
+
+    case cmsSigLabData: // Input profile operates in Lab
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // Lab -> XYZ
+
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)))
+                return FALSE;
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // Lab -> Lab
+
+            if (!IsEmptyLayer(m, off)) {
+                if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                    return FALSE;
+            }
+            break;
+
+        default:
+            return FALSE;  // Mismatch
+        }
+        break;
+
+        // On colorspaces other than PCS, check for same space
+    default:
+        if (InPCS != OutPCS) return FALSE;
+        break;
+    }
+
+    return TRUE;
+}
+
+
+// Is a given space compatible with another?
+static
+cmsBool ColorSpaceIsCompatible(cmsColorSpaceSignature a, cmsColorSpaceSignature b)
+{
+    // If they are same, they are compatible.
+    if (a == b) return TRUE;
+
+    // Check for MCH4 substitution of CMYK
+    if ((a == cmsSig4colorData) && (b == cmsSigCmykData)) return TRUE;
+    if ((a == cmsSigCmykData) && (b == cmsSig4colorData)) return TRUE;
+
+    // Check for XYZ/Lab. Those spaces are interchangeable as they can be computed one from other.
+    if ((a == cmsSigXYZData) && (b == cmsSigLabData)) return TRUE;
+    if ((a == cmsSigLabData) && (b == cmsSigXYZData)) return TRUE;
+
+    return FALSE;
+}
+
+
+// Default handler for ICC-style intents
+static
+cmsPipeline* DefaultICCintents(cmsContext       ContextID,
+                               cmsUInt32Number  nProfiles,
+                               cmsUInt32Number  TheIntents[],
+                               cmsHPROFILE      hProfiles[],
+                               cmsBool          BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number  dwFlags)
+{
+    cmsPipeline* Lut = NULL;
+    cmsPipeline* Result;
+    cmsHPROFILE hProfile;
+    cmsMAT3 m;
+    cmsVEC3 off;
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut = cmsSigLabData, CurrentColorSpace;
+    cmsProfileClassSignature ClassSig;
+    cmsUInt32Number  i, Intent;
+
+    // For safety
+    if (nProfiles == 0) return NULL;
+
+    // Allocate an empty LUT for holding the result. 0 as channel count means 'undefined'
+    Result = cmsPipelineAlloc(ContextID, 0, 0);
+    if (Result == NULL) return NULL;
+
+    CurrentColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsBool  lIsDeviceLink, lIsInput;
+
+        hProfile      = hProfiles[i];
+        ClassSig      = cmsGetDeviceClass(hProfile);
+        lIsDeviceLink = (ClassSig == cmsSigLinkClass || ClassSig == cmsSigAbstractClass );
+
+        // First profile is used as input unless devicelink or abstract
+        if ((i == 0) && !lIsDeviceLink) {
+            lIsInput = TRUE;
+        }
+        else {
+          // Else use profile in the input direction if current space is not PCS
+        lIsInput      = (CurrentColorSpace != cmsSigXYZData) &&
+                        (CurrentColorSpace != cmsSigLabData);
+        }
+
+        Intent        = TheIntents[i];
+
+        if (lIsInput || lIsDeviceLink) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else {
+
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (!ColorSpaceIsCompatible(ColorSpaceIn, CurrentColorSpace)) {
+
+            cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "ColorSpace mismatch");
+            goto Error;
+        }
+
+        // If devicelink is found, then no custom intent is allowed and we can
+        // read the LUT to be applied. Settings don't apply here.
+        if (lIsDeviceLink || ((ClassSig == cmsSigNamedColorClass) && (nProfiles == 1))) {
+
+            // Get the involved LUT from the profile
+            Lut = _cmsReadDevicelinkLUT(hProfile, Intent);
+            if (Lut == NULL) goto Error;
+
+            // What about abstract profiles?
+             if (ClassSig == cmsSigAbstractClass && i > 0) {
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+             }
+             else {
+                _cmsMAT3identity(&m);
+                _cmsVEC3init(&off, 0, 0, 0);
+             }
+
+
+            if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+        }
+        else {
+
+            if (lIsInput) {
+                // Input direction means non-pcs connection, so proceed like devicelinks
+                Lut = _cmsReadInputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+            }
+            else {
+
+                // Output direction means PCS connection. Intent may apply here
+                Lut = _cmsReadOutputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+
+
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+                if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+            }
+        }
+
+        // Concatenate to the output LUT
+        if (!cmsPipelineCat(Result, Lut))
+            goto Error;
+
+        cmsPipelineFree(Lut);
+        Lut = NULL;
+
+        // Update current space
+        CurrentColorSpace = ColorSpaceOut;
+    }
+
+    // Check for non-negatives clip
+    if (dwFlags & cmsFLAGS_NONEGATIVES) {
+
+           if (ColorSpaceOut == cmsSigGrayData ||
+                  ColorSpaceOut == cmsSigRgbData ||
+                  ColorSpaceOut == cmsSigCmykData) {
+
+                  cmsStage* clip = _cmsStageClipNegatives(Result->ContextID, cmsChannelsOf(ColorSpaceOut));
+                  if (clip == NULL) goto Error;
+
+                  if (!cmsPipelineInsertStage(Result, cmsAT_END, clip))
+                         goto Error;
+           }
+
+    }
+
+    return Result;
+
+Error:
+
+    if (Lut != NULL) cmsPipelineFree(Lut);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+// Wrapper for DLL calling convention
+cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext     ContextID,
+                                              cmsUInt32Number nProfiles,
+                                              cmsUInt32Number TheIntents[],
+                                              cmsHPROFILE     hProfiles[],
+                                              cmsBool         BPC[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number dwFlags)
+{
+    return DefaultICCintents(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// Black preserving intents ---------------------------------------------------------------------------------------------
+
+// Translate black-preserving intents to ICC ones
+static
+cmsUInt32Number TranslateNonICCIntents(cmsUInt32Number Intent)
+{
+    switch (Intent) {
+        case INTENT_PRESERVE_K_ONLY_PERCEPTUAL:
+        case INTENT_PRESERVE_K_PLANE_PERCEPTUAL:
+            return INTENT_PERCEPTUAL;
+
+        case INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC:
+        case INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC:
+            return INTENT_RELATIVE_COLORIMETRIC;
+
+        case INTENT_PRESERVE_K_ONLY_SATURATION:
+        case INTENT_PRESERVE_K_PLANE_SATURATION:
+            return INTENT_SATURATION;
+
+        default: return Intent;
+    }
+}
+
+// Sampler for Black-only preserving CMYK->CMYK transforms
+
+typedef struct {
+    cmsPipeline*    cmyk2cmyk;      // The original transform
+    cmsToneCurve*   KTone;          // Black-to-black tone curve
+
+} GrayOnlyParams;
+
+
+// Preserve black only if that is the only ink used
+static
+int BlackPreservingGrayOnlySampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    GrayOnlyParams* bp = (GrayOnlyParams*) Cargo;
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        // TAC does not apply because it is black ink!
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = cmsEvalToneCurve16(bp->KTone, In[3]);
+        return TRUE;
+    }
+
+    // Keep normal transform for other colors
+    bp ->cmyk2cmyk ->Eval16Fn(In, Out, bp ->cmyk2cmyk->Data);
+    return TRUE;
+}
+
+// This is the entry for black-preserving K-only intents, which are non-ICC
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    GrayOnlyParams  bp;
+    cmsPipeline*    Result;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1]) != cmsSigCmykData)
+           return DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    memset(&bp, 0, sizeof(bp));
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.cmyk2cmyk == NULL) goto Error;
+
+    // Now, compute the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID,
+        4096,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.KTone == NULL) goto Error;
+
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+    // Create the CLUT. 16 bits
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // This is the one and only MPE in this LUT
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Error;
+
+    // Sample it. We cannot afford pre/post linearization this time.
+    if (!cmsStageSampleCLut16bit(CLUT, BlackPreservingGrayOnlySampler, (void*) &bp, 0))
+        goto Error;
+
+    // Get rid of xform and tone curve
+    cmsPipelineFree(bp.cmyk2cmyk);
+    cmsFreeToneCurve(bp.KTone);
+
+    return Result;
+
+Error:
+
+    if (bp.cmyk2cmyk != NULL) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.KTone != NULL)  cmsFreeToneCurve(bp.KTone);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+}
+
+// K Plane-preserving CMYK to CMYK ------------------------------------------------------------------------------------
+
+typedef struct {
+
+    cmsPipeline*     cmyk2cmyk;     // The original transform
+    cmsHTRANSFORM    hProofOutput;  // Output CMYK to Lab (last profile)
+    cmsHTRANSFORM    cmyk2Lab;      // The input chain
+    cmsToneCurve*    KTone;         // Black-to-black tone curve
+    cmsPipeline*     LabK2cmyk;     // The output profile
+    cmsFloat64Number MaxError;
+
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat64Number MaxTAC;
+
+
+} PreserveKPlaneParams;
+
+
+// The CLUT will be stored at 16 bits, but calculations are performed at cmsFloat32Number precision
+static
+int BlackPreservingSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    int i;
+    cmsFloat32Number Inf[4], Outf[4];
+    cmsFloat32Number LabK[4];
+    cmsFloat64Number SumCMY, SumCMYK, Error, Ratio;
+    cmsCIELab ColorimetricLab, BlackPreservingLab;
+    PreserveKPlaneParams* bp = (PreserveKPlaneParams*) Cargo;
+
+    // Convert from 16 bits to floating point
+    for (i=0; i < 4; i++)
+        Inf[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Get the K across Tone curve
+    LabK[3] = cmsEvalToneCurveFloat(bp ->KTone, Inf[3]);
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = _cmsQuickSaturateWord(LabK[3] * 65535.0);
+        return TRUE;
+    }
+
+    // Try the original transform,
+    cmsPipelineEvalFloat( Inf, Outf, bp ->cmyk2cmyk);
+
+    // Store a copy of the floating point result into 16-bit
+    for (i=0; i < 4; i++)
+            Out[i] = _cmsQuickSaturateWord(Outf[i] * 65535.0);
+
+    // Maybe K is already ok (mostly on K=0)
+    if ( fabs(Outf[3] - LabK[3]) < (3.0 / 65535.0) ) {
+        return TRUE;
+    }
+
+    // K differ, measure and keep Lab measurement for further usage
+    // this is done in relative colorimetric intent
+    cmsDoTransform(bp->hProofOutput, Out, &ColorimetricLab, 1);
+
+    // Is not black only and the transform doesn't keep black.
+    // Obtain the Lab of output CMYK. After that we have Lab + K
+    cmsDoTransform(bp ->cmyk2Lab, Outf, LabK, 1);
+
+    // Obtain the corresponding CMY using reverse interpolation
+    // (K is fixed in LabK[3])
+    if (!cmsPipelineEvalReverseFloat(LabK, Outf, Outf, bp ->LabK2cmyk)) {
+
+        // Cannot find a suitable value, so use colorimetric xform
+        // which is already stored in Out[]
+        return TRUE;
+    }
+
+    // Make sure to pass through K (which now is fixed)
+    Outf[3] = LabK[3];
+
+    // Apply TAC if needed
+    SumCMY   = Outf[0]  + Outf[1] + Outf[2];
+    SumCMYK  = SumCMY + Outf[3];
+
+    if (SumCMYK > bp ->MaxTAC) {
+
+        Ratio = 1 - ((SumCMYK - bp->MaxTAC) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else
+       Ratio = 1.0;
+
+    Out[0] = _cmsQuickSaturateWord(Outf[0] * Ratio * 65535.0);     // C
+    Out[1] = _cmsQuickSaturateWord(Outf[1] * Ratio * 65535.0);     // M
+    Out[2] = _cmsQuickSaturateWord(Outf[2] * Ratio * 65535.0);     // Y
+    Out[3] = _cmsQuickSaturateWord(Outf[3] * 65535.0);
+
+    // Estimate the error (this goes 16 bits to Lab DBL)
+    cmsDoTransform(bp->hProofOutput, Out, &BlackPreservingLab, 1);
+    Error = cmsDeltaE(&ColorimetricLab, &BlackPreservingLab);
+    if (Error > bp -> MaxError)
+        bp->MaxError = Error;
+
+    return TRUE;
+}
+
+// This is the entry for black-plane preserving, which are non-ICC
+static
+cmsPipeline* BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    PreserveKPlaneParams bp;
+    cmsPipeline*    Result = NULL;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+    cmsHPROFILE hLab;
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        !(cmsGetColorSpace(hProfiles[nProfiles-1]) == cmsSigCmykData ||
+        cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigOutputClass))
+           return  DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+
+    memset(&bp, 0, sizeof(bp));
+
+    // We need the input LUT of the last profile, assuming this one is responsible of
+    // black generation. This LUT will be searched in inverse order.
+    bp.LabK2cmyk = _cmsReadInputLUT(hProfiles[nProfiles-1], INTENT_RELATIVE_COLORIMETRIC);
+    if (bp.LabK2cmyk == NULL) goto Cleanup;
+
+    // Get total area coverage (in 0..1 domain)
+    bp.MaxTAC = cmsDetectTAC(hProfiles[nProfiles-1]) / 100.0;
+    if (bp.MaxTAC <= 0) goto Cleanup;
+
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+                                         nProfiles,
+                                         ICCIntents,
+                                         hProfiles,
+                                         BPC,
+                                         AdaptationStates,
+                                         dwFlags);
+    if (bp.cmyk2cmyk == NULL) goto Cleanup;
+
+    // Now the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID, 4096, nProfiles,
+                                   ICCIntents,
+                                   hProfiles,
+                                   BPC,
+                                   AdaptationStates,
+                                   dwFlags);
+    if (bp.KTone == NULL) goto Cleanup;
+
+    // To measure the output, Last profile to Lab
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    bp.hProofOutput = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         CHANNELS_SH(4)|BYTES_SH(2), hLab, TYPE_Lab_DBL,
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if ( bp.hProofOutput == NULL) goto Cleanup;
+
+    // Same as anterior, but lab in the 0..1 range
+    bp.cmyk2Lab = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         FLOAT_SH(1)|CHANNELS_SH(4)|BYTES_SH(4), hLab,
+                                         FLOAT_SH(1)|CHANNELS_SH(3)|BYTES_SH(4),
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if (bp.cmyk2Lab == NULL) goto Cleanup;
+    cmsCloseProfile(hLab);
+
+    // Error estimation (for debug only)
+    bp.MaxError = 0;
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Cleanup;
+
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Cleanup;
+
+    cmsStageSampleCLut16bit(CLUT, BlackPreservingSampler, (void*) &bp, 0);
+
+Cleanup:
+
+    if (bp.cmyk2cmyk) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.cmyk2Lab) cmsDeleteTransform(bp.cmyk2Lab);
+    if (bp.hProofOutput) cmsDeleteTransform(bp.hProofOutput);
+
+    if (bp.KTone) cmsFreeToneCurve(bp.KTone);
+    if (bp.LabK2cmyk) cmsPipelineFree(bp.LabK2cmyk);
+
+    return Result;
+}
+
+// Link routines ------------------------------------------------------------------------------------------------------
+
+// Chain several profiles into a single LUT. It just checks the parameters and then calls the handler
+// for the first intent in chain. The handler may be user-defined. Is up to the handler to deal with the
+// rest of intents in chain. A maximum of 255 profiles at time are supported, which is pretty reasonable.
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number TheIntents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsIntentsList* Intent;
+
+    // Make sure a reasonable number of profiles is provided
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't link '%d' profiles", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+
+        // Check if black point is really needed or allowed. Note that
+        // following Adobe's document:
+        // BPC does not apply to devicelink profiles, nor to abs colorimetric,
+        // and applies always on V4 perceptual and saturation.
+
+        if (TheIntents[i] == INTENT_ABSOLUTE_COLORIMETRIC)
+            BPC[i] = FALSE;
+
+        if (TheIntents[i] == INTENT_PERCEPTUAL || TheIntents[i] == INTENT_SATURATION) {
+
+            // Force BPC for V4 profiles in perceptual and saturation
+            if (cmsGetEncodedICCversion(hProfiles[i]) >= 0x4000000)
+                BPC[i] = TRUE;
+        }
+    }
+
+    // Search for a handler. The first intent in the chain defines the handler. That would
+    // prevent using multiple custom intents in a multiintent chain, but the behaviour of
+    // this case would present some issues if the custom intent tries to do things like
+    // preserve primaries. This solution is not perfect, but works well on most cases.
+
+    Intent = SearchIntent(ContextID, TheIntents[0]);
+    if (Intent == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported intent '%d'", TheIntents[0]);
+        return NULL;
+    }
+
+    // Call the handler
+    return Intent ->Link(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// Get information about available intents. nMax is the maximum space for the supplied "Codes"
+// and "Descriptions" the function returns the total number of intents, which may be greater
+// than nMax, although the matrices are not populated beyond this level.
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+    cmsUInt32Number nIntents;
+
+
+    for (nIntents=0, pt = ctx->Intents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+
+    for (nIntents=0, pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+    return nIntents;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    return cmsGetSupportedIntentsTHR(NULL, nMax, Codes, Descriptions);
+}
+
+// The plug-in registration. User can add new intents or override default routines
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(id, IntentPlugin);
+    cmsPluginRenderingIntent* Plugin = (cmsPluginRenderingIntent*) Data;
+    cmsIntentsList* fl;
+
+    // Do we have to reset the custom intents?
+    if (Data == NULL) {
+
+        ctx->Intents = NULL;
+        return TRUE;
+    }
+
+    fl = (cmsIntentsList*) _cmsPluginMalloc(id, sizeof(cmsIntentsList));
+    if (fl == NULL) return FALSE;
+
+
+    fl ->Intent  = Plugin ->Intent;
+    strncpy(fl ->Description, Plugin ->Description, sizeof(fl ->Description)-1);
+    fl ->Description[sizeof(fl ->Description)-1] = 0;
+
+    fl ->Link    = Plugin ->Link;
+
+    fl ->Next = ctx ->Intents;
+    ctx ->Intents = fl;
+
+    return TRUE;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmserr.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmserr.c
new file mode 100644
index 0000000000..bb386eaaf2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmserr.c
@@ -0,0 +1,663 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "lcms2_internal.h"
+
+
+// This function is here to help applications to prevent mixing lcms versions on header and shared objects.
+int CMSEXPORT cmsGetEncodedCMMversion(void)
+{
+       return LCMS_VERSION;
+}
+
+// I am so tired about incompatibilities on those functions that here are some replacements
+// that hopefully would be fully portable.
+
+// compare two strings ignoring case
+int CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2)
+{
+    CMSREGISTER const unsigned char *us1 = (const unsigned char *)s1,
+                                 *us2 = (const unsigned char *)s2;
+
+    while (toupper(*us1) == toupper(*us2++))
+        if (*us1++ == '\0')
+            return 0;
+
+    return (toupper(*us1) - toupper(*--us2));
+}
+
+// long int because C99 specifies ftell in such way (7.19.9.2)
+long int CMSEXPORT cmsfilelength(FILE* f)
+{
+    long int p , n;
+
+    p = ftell(f); // register current file position
+    if (p == -1L) 
+        return -1L;
+
+    if (fseek(f, 0, SEEK_END) != 0) {
+        return -1L;
+    }
+
+    n = ftell(f);
+    fseek(f, p, SEEK_SET); // file position restored
+
+    return n;
+}
+
+
+// Memory handling ------------------------------------------------------------------
+//
+// This is the interface to low-level memory management routines. By default a simple
+// wrapping to malloc/free/realloc is provided, although there is a limit on the max
+// amount of memoy that can be reclaimed. This is mostly as a safety feature to prevent 
+// bogus or evil code to allocate huge blocks that otherwise lcms would never need.
+
+#define MAX_MEMORY_FOR_ALLOC  ((cmsUInt32Number)(1024U*1024U*512U))
+
+// User may override this behaviour by using a memory plug-in, which basically replaces
+// the default memory management functions. In this case, no check is performed and it
+// is up to the plug-in writter to keep in the safe side. There are only three functions
+// required to be implemented: malloc, realloc and free, although the user may want to
+// replace the optional mallocZero, calloc and dup as well.
+
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// *********************************************************************************
+
+// This is the default memory allocation function. It does a very coarse
+// check of amount of memory, just to prevent exploits
+static
+void* _cmsMallocDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never allow over maximum
+
+    return (void*) malloc(size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// Generic allocate & zero
+static
+void* _cmsMallocZeroDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    void *pt = _cmsMalloc(ContextID, size);
+    if (pt == NULL) return NULL;
+
+    memset(pt, 0, size);
+    return pt;
+}
+
+
+// The default free function. The only check proformed is against NULL pointers
+static
+void _cmsFreeDefaultFn(cmsContext ContextID, void *Ptr)
+{
+    // free(NULL) is defined a no-op by C99, therefore it is safe to
+    // avoid the check, but it is here just in case...
+
+    if (Ptr) free(Ptr);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// The default realloc function. Again it checks for exploits. If Ptr is NULL,
+// realloc behaves the same way as malloc and allocates a new block of size bytes.
+static
+void* _cmsReallocDefaultFn(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never realloc over 512Mb
+
+    return realloc(Ptr, size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+// The default calloc function. Allocates an array of num elements, each one of size bytes
+// all memory is initialized to zero.
+static
+void* _cmsCallocDefaultFn(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    cmsUInt32Number Total = num * size;
+
+    // Preserve calloc behaviour
+    if (Total == 0) return NULL;
+
+    // Safe check for overflow.
+    if (num >= UINT_MAX / size) return NULL;
+
+    // Check for overflow
+    if (Total < num || Total < size) {
+        return NULL;
+    }
+
+    if (Total > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never alloc over 512Mb
+
+    return _cmsMallocZero(ContextID, Total);
+}
+
+// Generic block duplication
+static
+void* _cmsDupDefaultFn(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    void* mem;
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never dup over 512Mb
+
+    mem = _cmsMalloc(ContextID, size);
+
+    if (mem != NULL && Org != NULL)
+        memmove(mem, Org, size);
+
+    return mem;
+}
+
+
+// Pointers to memory manager functions in Context0
+_cmsMemPluginChunkType _cmsMemPluginChunk = { _cmsMallocDefaultFn, _cmsMallocZeroDefaultFn, _cmsFreeDefaultFn, 
+                                              _cmsReallocDefaultFn, _cmsCallocDefaultFn,    _cmsDupDefaultFn
+                                            };
+
+
+// Reset and duplicate memory manager
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {    
+
+        // Duplicate
+        ctx ->chunks[MemPlugin] = _cmsSubAllocDup(ctx ->MemPool, src ->chunks[MemPlugin], sizeof(_cmsMemPluginChunkType));  
+    }
+    else {
+
+        // To reset it, we use the default allocators, which cannot be overridden
+        ctx ->chunks[MemPlugin] = &ctx ->DefaultMemoryManager;
+    } 
+}
+
+// Auxiliary to fill memory management functions from plugin (or context 0 defaults)
+void _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr)
+{
+    if (Plugin == NULL) {
+
+        memcpy(ptr, &_cmsMemPluginChunk, sizeof(_cmsMemPluginChunk));
+    }
+    else {
+
+        ptr ->MallocPtr  = Plugin -> MallocPtr;
+        ptr ->FreePtr    = Plugin -> FreePtr;
+        ptr ->ReallocPtr = Plugin -> ReallocPtr;
+
+        // Make sure we revert to defaults
+        ptr ->MallocZeroPtr= _cmsMallocZeroDefaultFn;
+        ptr ->CallocPtr    = _cmsCallocDefaultFn;
+        ptr ->DupPtr       = _cmsDupDefaultFn;
+      
+        if (Plugin ->MallocZeroPtr != NULL) ptr ->MallocZeroPtr = Plugin -> MallocZeroPtr;
+        if (Plugin ->CallocPtr != NULL)     ptr ->CallocPtr     = Plugin -> CallocPtr;
+        if (Plugin ->DupPtr != NULL)        ptr ->DupPtr        = Plugin -> DupPtr;
+        
+    }
+}
+
+
+// Plug-in replacement entry
+cmsBool  _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase *Data)
+{
+    cmsPluginMemHandler* Plugin = (cmsPluginMemHandler*) Data;     
+    _cmsMemPluginChunkType* ptr;
+
+    // NULL forces to reset to defaults. In this special case, the defaults are stored in the context structure. 
+    // Remaining plug-ins does NOT have any copy in the context structure, but this is somehow special as the
+    // context internal data should be malloce'd by using those functions. 
+    if (Data == NULL) {
+
+       struct _cmsContext_struct* ctx = ( struct _cmsContext_struct*) ContextID;
+
+       // Return to the default allocators
+        if (ContextID != NULL) {
+            ctx->chunks[MemPlugin] = (void*) &ctx->DefaultMemoryManager;
+        }
+        return TRUE;
+    }
+
+    // Check for required callbacks
+    if (Plugin -> MallocPtr == NULL ||
+        Plugin -> FreePtr == NULL ||
+        Plugin -> ReallocPtr == NULL) return FALSE;
+
+    // Set replacement functions
+    ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    if (ptr == NULL) 
+        return FALSE;
+
+    _cmsInstallAllocFunctions(Plugin, ptr);
+    return TRUE;
+}
+
+// Generic allocate
+void* CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->MallocPtr(ContextID, size);
+}
+
+// Generic allocate & zero
+void* CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->MallocZeroPtr(ContextID, size);
+}
+
+// Generic calloc
+void* CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->CallocPtr(ContextID, num, size);
+}
+
+// Generic reallocate
+void* CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->ReallocPtr(ContextID, Ptr, size);
+}
+
+// Generic free memory
+void CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr)
+{
+    if (Ptr != NULL) {
+        _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+        ptr ->FreePtr(ContextID, Ptr);
+    }
+}
+
+// Generic block duplication
+void* CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->DupPtr(ContextID, Org, size);
+}
+
+// ********************************************************************************************
+
+// Sub allocation takes care of many pointers of small size. The memory allocated in
+// this way have be freed at once. Next function allocates a single chunk for linked list
+// I prefer this method over realloc due to the big inpact on xput realloc may have if
+// memory is being swapped to disk. This approach is safer (although that may not be true on all platforms)
+static
+_cmsSubAllocator_chunk* _cmsCreateSubAllocChunk(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator_chunk* chunk;
+
+    // 20K by default
+    if (Initial == 0)
+        Initial = 20*1024;
+
+    // Create the container
+    chunk = (_cmsSubAllocator_chunk*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator_chunk));
+    if (chunk == NULL) return NULL;
+
+    // Initialize values
+    chunk ->Block     = (cmsUInt8Number*) _cmsMalloc(ContextID, Initial);
+    if (chunk ->Block == NULL) {
+
+        // Something went wrong
+        _cmsFree(ContextID, chunk);
+        return NULL;
+    }
+
+    chunk ->BlockSize = Initial;
+    chunk ->Used      = 0;
+    chunk ->next      = NULL;
+
+    return chunk;
+}
+
+// The suballocated is nothing but a pointer to the first element in the list. We also keep
+// the thread ID in this structure.
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator* sub;
+
+    // Create the container
+    sub = (_cmsSubAllocator*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator));
+    if (sub == NULL) return NULL;
+
+    sub ->ContextID = ContextID;
+
+    sub ->h = _cmsCreateSubAllocChunk(ContextID, Initial);
+    if (sub ->h == NULL) {
+        _cmsFree(ContextID, sub);
+        return NULL;
+    }
+
+    return sub;
+}
+
+
+// Get rid of whole linked list
+void _cmsSubAllocDestroy(_cmsSubAllocator* sub)
+{
+    _cmsSubAllocator_chunk *chunk, *n;
+
+    for (chunk = sub ->h; chunk != NULL; chunk = n) {
+
+        n = chunk->next;
+        if (chunk->Block != NULL) _cmsFree(sub ->ContextID, chunk->Block);
+        _cmsFree(sub ->ContextID, chunk);
+    }
+
+    // Free the header
+    _cmsFree(sub ->ContextID, sub);
+}
+
+
+// Get a pointer to small memory block.
+void*  _cmsSubAlloc(_cmsSubAllocator* sub, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = sub -> h ->BlockSize - sub -> h -> Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    // Check for memory. If there is no room, allocate a new chunk of double memory size.
+    if (size > Free) {
+
+        _cmsSubAllocator_chunk* chunk;
+        cmsUInt32Number newSize;
+
+        newSize = sub -> h ->BlockSize * 2;
+        if (newSize < size) newSize = size;
+
+        chunk = _cmsCreateSubAllocChunk(sub -> ContextID, newSize);
+        if (chunk == NULL) return NULL;
+
+        // Link list
+        chunk ->next = sub ->h;
+        sub ->h    = chunk;
+
+    }
+
+    ptr =  sub -> h ->Block + sub -> h ->Used;
+    sub -> h -> Used += size;
+
+    return (void*) ptr;
+}
+
+// Duplicate in pool
+void* _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size)
+{
+    void *NewPtr;
+    
+    // Dup of null pointer is also NULL
+    if (ptr == NULL)
+        return NULL;
+
+    NewPtr = _cmsSubAlloc(s, size);
+
+    if (ptr != NULL && NewPtr != NULL) {
+        memcpy(NewPtr, ptr, size);
+    }
+
+    return NewPtr;
+}
+
+
+
+// Error logging ******************************************************************
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other return FALSE
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function does receive
+// a ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user, or just create some sort of log.
+// The logging function should NOT terminate the program, as this obviously can leave
+// resources. It is the programmer's responsibility to check each function return code
+// to make sure it didn't fail.
+
+// Error messages are limited to MAX_ERROR_MESSAGE_LEN
+
+#define MAX_ERROR_MESSAGE_LEN   1024
+
+// ---------------------------------------------------------------------------------------------------------
+
+// This is our default log error
+static void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Context0 storage, which is global
+_cmsLogErrorChunkType _cmsLogErrorChunk = { DefaultLogErrorHandlerFunction };
+
+// Allocates and inits error logger container for a given context. If src is NULL, only initializes the value
+// to the default. Otherwise, it duplicates the value. The interface is standard across all context clients
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src)
+{    
+    static _cmsLogErrorChunkType LogErrorChunk = { DefaultLogErrorHandlerFunction };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[Logger];       
+    }
+    else {
+       from = &LogErrorChunk;
+    }
+    
+    ctx ->chunks[Logger] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsLogErrorChunkType));   
+}
+
+// The default error logger does nothing.
+static
+void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    // fprintf(stderr, "[lcms]: %s\n", Text);
+    // fflush(stderr);
+
+     cmsUNUSED_PARAMETER(ContextID);
+     cmsUNUSED_PARAMETER(ErrorCode);
+     cmsUNUSED_PARAMETER(Text);
+}
+
+// Change log error, context based
+void CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn)
+{
+    _cmsLogErrorChunkType* lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+
+    if (lhg != NULL) {
+
+        if (Fn == NULL)
+            lhg -> LogErrorHandler = DefaultLogErrorHandlerFunction;
+        else
+            lhg -> LogErrorHandler = Fn;
+    }
+}
+
+// Change log error, legacy
+void CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn)
+{
+    cmsSetLogErrorHandlerTHR(NULL, Fn);    
+}
+
+// Log an error
+// ErrorText is a text holding an english description of error.
+void CMSEXPORT cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...)
+{
+    va_list args;
+    char Buffer[MAX_ERROR_MESSAGE_LEN];
+    _cmsLogErrorChunkType* lhg;
+
+
+    va_start(args, ErrorText);
+    vsnprintf(Buffer, MAX_ERROR_MESSAGE_LEN-1, ErrorText, args);
+    va_end(args);
+
+    // Check for the context, if specified go there. If not, go for the global
+    lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+    if (lhg ->LogErrorHandler) {
+        lhg ->LogErrorHandler(ContextID, ErrorCode, Buffer);
+    }   
+}
+
+// Utility function to print signatures
+void _cmsTagSignature2String(char String[5], cmsTagSignature sig)
+{
+    cmsUInt32Number be;
+
+    // Convert to big endian
+    be = _cmsAdjustEndianess32((cmsUInt32Number) sig);
+
+    // Move chars
+    memmove(String, &be, 4);
+
+    // Make sure of terminator
+    String[4] = 0;
+}
+
+//--------------------------------------------------------------------------------------------------
+
+
+static
+void* defMtxCreate(cmsContext id)
+{
+    _cmsMutex* ptr_mutex = (_cmsMutex*) _cmsMalloc(id, sizeof(_cmsMutex));
+    _cmsInitMutexPrimitive(ptr_mutex);
+    return (void*) ptr_mutex;   
+}
+
+static
+void defMtxDestroy(cmsContext id, void* mtx)
+{
+    _cmsDestroyMutexPrimitive((_cmsMutex *) mtx); 
+    _cmsFree(id, mtx);
+}
+
+static
+cmsBool defMtxLock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    return _cmsLockPrimitive((_cmsMutex *) mtx) == 0;     
+}
+
+static
+void defMtxUnlock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    _cmsUnlockPrimitive((_cmsMutex *) mtx); 
+}
+
+
+
+// Pointers to memory manager functions in Context0
+_cmsMutexPluginChunkType _cmsMutexPluginChunk = { defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    static _cmsMutexPluginChunkType MutexChunk = {defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[MutexPlugin];       
+    }
+    else {
+       from = &MutexChunk;
+    }
+    
+    ctx ->chunks[MutexPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsMutexPluginChunkType));   
+}
+
+// Register new ways to transform
+cmsBool  _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginMutex* Plugin = (cmsPluginMutex*) Data;
+    _cmsMutexPluginChunkType* ctx = ( _cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (Data == NULL) {
+
+        // No lock routines
+        ctx->CreateMutexPtr = NULL; 
+        ctx->DestroyMutexPtr = NULL; 
+        ctx->LockMutexPtr = NULL;
+        ctx ->UnlockMutexPtr = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin ->CreateMutexPtr == NULL || Plugin ->DestroyMutexPtr == NULL || 
+        Plugin ->LockMutexPtr == NULL || Plugin ->UnlockMutexPtr == NULL) return FALSE;
+
+
+    ctx->CreateMutexPtr  = Plugin->CreateMutexPtr;
+    ctx->DestroyMutexPtr = Plugin ->DestroyMutexPtr;
+    ctx ->LockMutexPtr   = Plugin ->LockMutexPtr;
+    ctx ->UnlockMutexPtr = Plugin ->UnlockMutexPtr;
+
+    // All is ok
+    return TRUE;
+}
+
+// Generic Mutex fns
+void* CMSEXPORT _cmsCreateMutex(cmsContext ContextID)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->CreateMutexPtr == NULL) return NULL;
+
+    return ptr ->CreateMutexPtr(ContextID);
+}
+
+void CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->DestroyMutexPtr != NULL) {
+
+        ptr ->DestroyMutexPtr(ContextID, mtx);
+    }
+}
+
+cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->LockMutexPtr == NULL) return TRUE;
+
+    return ptr ->LockMutexPtr(ContextID, mtx);
+}
+
+void CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->UnlockMutexPtr != NULL) {
+
+        ptr ->UnlockMutexPtr(ContextID, mtx);
+    }
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsgamma.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsgamma.c
new file mode 100644
index 0000000000..8bd212c6e6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsgamma.c
@@ -0,0 +1,1433 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2013 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+#include "lcms2_internal.h"
+
+// Tone curves are powerful constructs that can contain curves specified in diverse ways.
+// The curve is stored in segments, where each segment can be sampled or specified by parameters.
+// a 16.bit simplification of the *whole* curve is kept for optimization purposes. For float operation,
+// each segment is evaluated separately. Plug-ins may be used to define new parametric schemes,
+// each plug-in may define up to MAX_TYPES_IN_LCMS_PLUGIN functions types. For defining a function,
+// the plug-in should provide the type id, how many parameters each type has, and a pointer to
+// a procedure that evaluates the function. In the case of reverse evaluation, the evaluator will
+// be called with the type id as a negative value, and a sampled version of the reversed curve
+// will be built.
+
+// ----------------------------------------------------------------- Implementation
+// Maxim number of nodes
+#define MAX_NODES_IN_CURVE   4097
+#define MINUS_INF            (-1E22F)
+#define PLUS_INF             (+1E22F)
+
+// The list of supported parametric curves
+typedef struct _cmsParametricCurvesCollection_st {
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions in this chunk
+    cmsInt32Number  FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator Evaluator;                          // The evaluator
+
+    struct _cmsParametricCurvesCollection_st* Next; // Next in list
+
+} _cmsParametricCurvesCollection;
+
+// This is the default (built-in) evaluator
+static cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R);
+
+// The built-in list
+static _cmsParametricCurvesCollection DefaultCurves = {
+    9,                                  // # of curve types
+    { 1, 2, 3, 4, 5, 6, 7, 8, 108 },    // Parametric curve ID
+    { 1, 3, 4, 5, 7, 4, 5, 5, 1 },      // Parameters by type
+    DefaultEvalParametricFn,            // Evaluator
+    NULL                                // Next in chain
+};
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginCurvesList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsCurvesPluginChunkType newHead = { NULL };
+   _cmsParametricCurvesCollection*  entry;
+   _cmsParametricCurvesCollection*  Anterior = NULL;
+   _cmsCurvesPluginChunkType* head = (_cmsCurvesPluginChunkType*) src->chunks[CurvesPlugin];
+
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->ParametricCurves;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsParametricCurvesCollection *newEntry = ( _cmsParametricCurvesCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsParametricCurvesCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.ParametricCurves == NULL)
+                newHead.ParametricCurves = newEntry;
+    }
+
+  ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsCurvesPluginChunkType));
+}
+
+// The allocator have to follow the chain
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginCurvesList(ctx, src);
+    }
+    else {
+        static _cmsCurvesPluginChunkType CurvesPluginChunk = { NULL };
+        ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx ->MemPool, &CurvesPluginChunk, sizeof(_cmsCurvesPluginChunkType));
+    }
+}
+
+
+// The linked list head
+_cmsCurvesPluginChunkType _cmsCurvesPluginChunk = { NULL };
+
+// As a way to install new parametric curves
+cmsBool _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+    cmsPluginParametricCurves* Plugin = (cmsPluginParametricCurves*) Data;
+    _cmsParametricCurvesCollection* fl;
+
+    if (Data == NULL) {
+
+          ctx -> ParametricCurves =  NULL;
+          return TRUE;
+    }
+
+    fl = (_cmsParametricCurvesCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsParametricCurvesCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->Evaluator  = Plugin ->Evaluator;
+    fl ->nFunctions = Plugin ->nFunctions;
+
+    // Make sure no mem overwrites
+    if (fl ->nFunctions > MAX_TYPES_IN_LCMS_PLUGIN)
+        fl ->nFunctions = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    // Copy the data
+    memmove(fl->FunctionTypes,  Plugin ->FunctionTypes,   fl->nFunctions * sizeof(cmsUInt32Number));
+    memmove(fl->ParameterCount, Plugin ->ParameterCount,  fl->nFunctions * sizeof(cmsUInt32Number));
+
+    // Keep linked list
+    fl ->Next = ctx->ParametricCurves;
+    ctx->ParametricCurves = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+// Search in type list, return position or -1 if not found
+static
+int IsInSet(int Type, _cmsParametricCurvesCollection* c)
+{
+    int i;
+
+    for (i=0; i < (int) c ->nFunctions; i++)
+        if (abs(Type) == c ->FunctionTypes[i]) return i;
+
+    return -1;
+}
+
+
+// Search for the collection which contains a specific type
+static
+_cmsParametricCurvesCollection *GetParametricCurveByType(cmsContext ContextID, int Type, int* index)
+{
+    _cmsParametricCurvesCollection* c;
+    int Position;
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+
+    for (c = ctx->ParametricCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+    // If none found, revert for defaults
+    for (c = &DefaultCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+
+    return NULL;
+}
+
+// Low level allocate, which takes care of memory details. nEntries may be zero, and in this case
+// no optimation curve is computed. nSegments may also be zero in the inverse case, where only the
+// optimization curve is given. Both features simultaneously is an error
+static
+cmsToneCurve* AllocateToneCurveStruct(cmsContext ContextID, cmsUInt32Number nEntries,
+                                      cmsUInt32Number nSegments, const cmsCurveSegment* Segments,
+                                      const cmsUInt16Number* Values)
+{
+    cmsToneCurve* p;
+    cmsUInt32Number i;
+
+    // We allow huge tables, which are then restricted for smoothing operations
+    if (nEntries > 65530) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve of more than 65530 entries");
+        return NULL;
+    }
+
+    if (nEntries == 0 && nSegments == 0) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve with zero segments and no table");
+        return NULL;
+    }
+
+    // Allocate all required pointers, etc.
+    p = (cmsToneCurve*) _cmsMallocZero(ContextID, sizeof(cmsToneCurve));
+    if (!p) return NULL;
+
+    // In this case, there are no segments
+    if (nSegments == 0) {
+        p ->Segments = NULL;
+        p ->Evals = NULL;
+    }
+    else {
+        p ->Segments = (cmsCurveSegment*) _cmsCalloc(ContextID, nSegments, sizeof(cmsCurveSegment));
+        if (p ->Segments == NULL) goto Error;
+
+        p ->Evals    = (cmsParametricCurveEvaluator*) _cmsCalloc(ContextID, nSegments, sizeof(cmsParametricCurveEvaluator));
+        if (p ->Evals == NULL) goto Error;
+    }
+
+    p -> nSegments = nSegments;
+
+    // This 16-bit table contains a limited precision representation of the whole curve and is kept for
+    // increasing xput on certain operations.
+    if (nEntries == 0) {
+        p ->Table16 = NULL;
+    }
+    else {
+       p ->Table16 = (cmsUInt16Number*)  _cmsCalloc(ContextID, nEntries, sizeof(cmsUInt16Number));
+       if (p ->Table16 == NULL) goto Error;
+    }
+
+    p -> nEntries  = nEntries;
+
+    // Initialize members if requested
+    if (Values != NULL && (nEntries > 0)) {
+
+        for (i=0; i < nEntries; i++)
+            p ->Table16[i] = Values[i];
+    }
+
+    // Initialize the segments stuff. The evaluator for each segment is located and a pointer to it
+    // is placed in advance to maximize performance.
+    if (Segments != NULL && (nSegments > 0)) {
+
+        _cmsParametricCurvesCollection *c;
+
+        p ->SegInterp = (cmsInterpParams**) _cmsCalloc(ContextID, nSegments, sizeof(cmsInterpParams*));
+        if (p ->SegInterp == NULL) goto Error;
+
+        for (i=0; i < nSegments; i++) {
+
+            // Type 0 is a special marker for table-based curves
+            if (Segments[i].Type == 0)
+                p ->SegInterp[i] = _cmsComputeInterpParams(ContextID, Segments[i].nGridPoints, 1, 1, NULL, CMS_LERP_FLAGS_FLOAT);
+
+            memmove(&p ->Segments[i], &Segments[i], sizeof(cmsCurveSegment));
+
+            if (Segments[i].Type == 0 && Segments[i].SampledPoints != NULL)
+                p ->Segments[i].SampledPoints = (cmsFloat32Number*) _cmsDupMem(ContextID, Segments[i].SampledPoints, sizeof(cmsFloat32Number) * Segments[i].nGridPoints);
+            else
+                p ->Segments[i].SampledPoints = NULL;
+
+
+            c = GetParametricCurveByType(ContextID, Segments[i].Type, NULL);
+            if (c != NULL)
+                    p ->Evals[i] = c ->Evaluator;
+        }
+    }
+
+    p ->InterpParams = _cmsComputeInterpParams(ContextID, p ->nEntries, 1, 1, p->Table16, CMS_LERP_FLAGS_16BITS);
+    if (p->InterpParams != NULL)
+        return p;
+
+Error:
+    if (p -> Segments) _cmsFree(ContextID, p ->Segments);
+    if (p -> Evals) _cmsFree(ContextID, p -> Evals);
+    if (p ->Table16) _cmsFree(ContextID, p ->Table16);
+    _cmsFree(ContextID, p);
+    return NULL;
+}
+
+
+// Parametric Fn using floating point
+static
+cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R)
+{
+    cmsFloat64Number e, Val, disc;
+
+    switch (Type) {
+
+   // X = Y ^ Gamma
+    case 1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+            Val = pow(R, Params[0]);
+        break;
+
+    // Type 1 Reversed: X = Y ^1/gamma
+    case -1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+        {
+            if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+                Val = PLUS_INF;
+            else
+                Val = pow(R, 1 / Params[0]);
+        }
+        break;
+
+    // CIE 122-1966
+    // Y = (aX + b)^Gamma  | X >= -b/a
+    // Y = 0               | else
+    case 2:
+    {
+
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]);
+                else
+                    Val = 0;
+            }
+            else
+                Val = 0;
+        }
+    }
+    break;
+
+     // Type 2 Reversed
+     // X = (Y ^1/g  - b) / a
+     case -2:
+     {
+         if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+             fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+         {
+             Val = 0;
+         }
+         else
+         {
+             if (R < 0)
+                 Val = 0;
+             else
+                 Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+
+             if (Val < 0)
+                 Val = 0;
+         }
+     }         
+     break;
+
+
+    // IEC 61966-3
+    // Y = (aX + b)^Gamma | X <= -b/a
+    // Y = c              | else
+    case 3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+            if (disc < 0)
+                disc = 0;
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]) + Params[3];
+                else
+                    Val = 0;
+            }
+            else
+                Val = Params[3];
+        }
+    }
+    break;
+
+
+    // Type 3 reversed
+    // X=((Y-c)^1/g - b)/a      | (Y>=c)
+    // X=-b/a                   | (Y<c)
+    case -3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            if (R >= Params[3]) {
+
+                e = R - Params[3];
+
+                if (e > 0)
+                    Val = (pow(e, 1 / Params[0]) - Params[2]) / Params[1];
+                else
+                    Val = 0;
+            }
+            else {
+                Val = -Params[2] / Params[1];
+            }
+        }
+    }
+    break;
+
+
+    // IEC 61966-2.1 (sRGB)
+    // Y = (aX + b)^Gamma | X >= d
+    // Y = cX             | X < d
+    case 4:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]);
+            else
+                Val = 0;
+        }
+        else
+            Val = R * Params[3];
+        break;
+
+    // Type 4 reversed
+    // X=((Y^1/g-b)/a)    | Y >= (ad+b)^g
+    // X=Y/c              | Y< (ad+b)^g
+    case -4:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = Params[1] * Params[4] + Params[2];
+            if (e < 0)
+                disc = 0;
+            else
+                disc = pow(e, Params[0]);
+
+            if (R >= disc) {
+
+                Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = R / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Y = (aX + b)^Gamma + e | X >= d
+    // Y = cX + f             | X < d
+    case 5:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]) + Params[5];
+            else
+                Val = Params[5];
+        }
+        else
+            Val = R*Params[3] + Params[6];
+        break;
+
+
+    // Reversed type 5
+    // X=((Y-e)1/g-b)/a   | Y >=(ad+b)^g+e), cd+f
+    // X=(Y-f)/c          | else
+    case -5:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = Params[3] * Params[4] + Params[6];
+            if (R >= disc) {
+
+                e = R - Params[5];
+                if (e < 0)
+                    Val = 0;
+                else
+                    Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = (R - Params[6]) / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Types 6,7,8 comes from segmented curves as described in ICCSpecRevision_02_11_06_Float.pdf
+    // Type 6 is basically identical to type 5 without d
+
+    // Y = (a * X + b) ^ Gamma + c
+    case 6:
+        e = Params[1]*R + Params[2];
+
+        if (e < 0)
+            Val = Params[3];
+        else
+            Val = pow(e, Params[0]) + Params[3];
+        break;
+
+    // ((Y - c) ^1/Gamma - b) / a
+    case -6:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = R - Params[3];
+            if (e < 0)
+                Val = 0;
+            else
+                Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+        }
+    }
+    break;
+
+
+    // Y = a * log (b * X^Gamma + c) + d
+    case 7:
+
+       e = Params[2] * pow(R, Params[0]) + Params[3];
+       if (e <= 0)
+           Val = Params[4];
+       else
+           Val = Params[1]*log10(e) + Params[4];
+       break;
+
+    // (Y - d) / a = log(b * X ^Gamma + c)
+    // pow(10, (Y-d) / a) = b * X ^Gamma + c
+    // pow((pow(10, (Y-d) / a) - c) / b, 1/g) = X
+    case -7:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            Val = pow((pow(10.0, (R - Params[4]) / Params[1]) - Params[3]) / Params[2], 1.0 / Params[0]);
+        }
+    }
+    break;
+
+
+   //Y = a * b^(c*X+d) + e
+   case 8:
+       Val = (Params[0] * pow(Params[1], Params[2] * R + Params[3]) + Params[4]);
+       break;
+
+
+   // Y = (log((y-e) / a) / log(b) - d ) / c
+   // a=0, b=1, c=2, d=3, e=4,
+   case -8:
+
+       disc = R - Params[4];
+       if (disc < 0) Val = 0;
+       else
+       {
+           if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+               fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+           {
+               Val = 0;
+           }
+           else
+           {
+               Val = (log(disc / Params[0]) / log(Params[1]) - Params[3]) / Params[2];
+           }
+       }
+       break;
+
+   // S-Shaped: (1 - (1-x)^1/g)^1/g
+   case 108:
+       if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+           Val = 0;
+       else
+           Val = pow(1.0 - pow(1 - R, 1/Params[0]), 1/Params[0]);
+      break;
+
+    // y = (1 - (1-x)^1/g)^1/g
+    // y^g = (1 - (1-x)^1/g)
+    // 1 - y^g = (1-x)^1/g
+    // (1 - y^g)^g = 1 - x
+    // 1 - (1 - y^g)^g
+    case -108:
+        Val = 1 - pow(1 - pow(R, Params[0]), Params[0]);
+        break;
+
+    default:
+        // Unsupported parametric curve. Should never reach here
+        return 0;
+    }
+
+    return Val;
+}
+
+// Evaluate a segmented function for a single value. Return -Inf if no valid segment found .
+// If fn type is 0, perform an interpolation on the table
+static
+cmsFloat64Number EvalSegmentedFn(const cmsToneCurve *g, cmsFloat64Number R)
+{
+    int i;
+    cmsFloat32Number Out32;
+    cmsFloat64Number Out;
+
+    for (i = (int) g->nSegments - 1; i >= 0; --i) {
+
+        // Check for domain
+        if ((R > g->Segments[i].x0) && (R <= g->Segments[i].x1)) {
+
+            // Type == 0 means segment is sampled
+            if (g->Segments[i].Type == 0) {
+
+                cmsFloat32Number R1 = (cmsFloat32Number)(R - g->Segments[i].x0) / (g->Segments[i].x1 - g->Segments[i].x0);
+
+                // Setup the table (TODO: clean that)
+                g->SegInterp[i]->Table = g->Segments[i].SampledPoints;
+
+                g->SegInterp[i]->Interpolation.LerpFloat(&R1, &Out32, g->SegInterp[i]);
+                Out = (cmsFloat64Number) Out32;
+
+            }
+            else {
+                Out = g->Evals[i](g->Segments[i].Type, g->Segments[i].Params, R);
+            }
+
+            if (isinf(Out))
+                return PLUS_INF;
+            else
+            {
+                if (isinf(-Out))
+                    return MINUS_INF;
+            }
+
+            return Out;
+        }
+    }
+
+    return MINUS_INF;
+}
+
+// Access to estimated low-res table
+cmsUInt32Number CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->nEntries;
+}
+
+const cmsUInt16Number* CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->Table16;
+}
+
+
+// Create an empty gamma curve, by using tables. This specifies only the limited-precision part, and leaves the
+// floating point description empty.
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number Values[])
+{
+    return AllocateToneCurveStruct(ContextID, nEntries, 0, NULL, Values);
+}
+
+static
+cmsUInt32Number EntriesByGamma(cmsFloat64Number Gamma)
+{
+    if (fabs(Gamma - 1.0) < 0.001) return 2;
+    return 4096;
+}
+
+
+// Create a segmented gamma, fill the table
+cmsToneCurve* CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID,
+                                                   cmsUInt32Number nSegments, const cmsCurveSegment Segments[])
+{
+    cmsUInt32Number i;
+    cmsFloat64Number R, Val;
+    cmsToneCurve* g;
+    cmsUInt32Number nGridPoints = 4096;
+
+    _cmsAssert(Segments != NULL);
+
+    // Optimizatin for identity curves.
+    if (nSegments == 1 && Segments[0].Type == 1) {
+
+        nGridPoints = EntriesByGamma(Segments[0].Params[0]);
+    }
+
+    g = AllocateToneCurveStruct(ContextID, nGridPoints, nSegments, Segments, NULL);
+    if (g == NULL) return NULL;
+
+    // Once we have the floating point version, we can approximate a 16 bit table of 4096 entries
+    // for performance reasons. This table would normally not be used except on 8/16 bits transforms.
+    for (i = 0; i < nGridPoints; i++) {
+
+        R   = (cmsFloat64Number) i / (nGridPoints-1);
+
+        Val = EvalSegmentedFn(g, R);
+
+        // Round and saturate
+        g ->Table16[i] = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+
+    return g;
+}
+
+// Use a segmented curve to store the floating point table
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[])
+{
+    cmsCurveSegment Seg[3];
+
+    // A segmented tone curve should have function segments in the first and last positions
+    // Initialize segmented curve part up to 0 to constant value = samples[0]
+    Seg[0].x0 = MINUS_INF;
+    Seg[0].x1 = 0;
+    Seg[0].Type = 6;
+
+    Seg[0].Params[0] = 1;
+    Seg[0].Params[1] = 0;
+    Seg[0].Params[2] = 0;
+    Seg[0].Params[3] = values[0];
+    Seg[0].Params[4] = 0;
+
+    // From zero to 1
+    Seg[1].x0 = 0;
+    Seg[1].x1 = 1.0;
+    Seg[1].Type = 0;
+
+    Seg[1].nGridPoints = nEntries;
+    Seg[1].SampledPoints = (cmsFloat32Number*) values;
+
+    // Final segment is constant = lastsample
+    Seg[2].x0 = 1.0;
+    Seg[2].x1 = PLUS_INF;
+    Seg[2].Type = 6;
+    
+    Seg[2].Params[0] = 1;
+    Seg[2].Params[1] = 0;
+    Seg[2].Params[2] = 0;
+    Seg[2].Params[3] = values[nEntries-1];
+    Seg[2].Params[4] = 0;
+    
+
+    return cmsBuildSegmentedToneCurve(ContextID, 3, Seg);
+}
+
+// Parametric curves
+//
+// Parameters goes as: Curve, a, b, c, d, e, f
+// Type is the ICC type +1
+// if type is negative, then the curve is analytically inverted
+cmsToneCurve* CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[])
+{
+    cmsCurveSegment Seg0;
+    int Pos = 0;
+    cmsUInt32Number size;
+    _cmsParametricCurvesCollection* c = GetParametricCurveByType(ContextID, Type, &Pos);
+
+    _cmsAssert(Params != NULL);
+
+    if (c == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Invalid parametric curve type %d", Type);
+        return NULL;
+    }
+
+    memset(&Seg0, 0, sizeof(Seg0));
+
+    Seg0.x0   = MINUS_INF;
+    Seg0.x1   = PLUS_INF;
+    Seg0.Type = Type;
+
+    size = c->ParameterCount[Pos] * sizeof(cmsFloat64Number);
+    memmove(Seg0.Params, Params, size);
+
+    return cmsBuildSegmentedToneCurve(ContextID, 1, &Seg0);
+}
+
+
+
+// Build a gamma table based on gamma constant
+cmsToneCurve* CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma)
+{
+    return cmsBuildParametricToneCurve(ContextID, 1, &Gamma);
+}
+
+
+// Free all memory taken by the gamma curve
+void CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve)
+{
+    cmsContext ContextID;
+
+    if (Curve == NULL) return;
+
+    ContextID = Curve ->InterpParams->ContextID;
+
+    _cmsFreeInterpParams(Curve ->InterpParams);
+
+    if (Curve -> Table16)
+        _cmsFree(ContextID, Curve ->Table16);
+
+    if (Curve ->Segments) {
+
+        cmsUInt32Number i;
+
+        for (i=0; i < Curve ->nSegments; i++) {
+
+            if (Curve ->Segments[i].SampledPoints) {
+                _cmsFree(ContextID, Curve ->Segments[i].SampledPoints);
+            }
+
+            if (Curve ->SegInterp[i] != 0)
+                _cmsFreeInterpParams(Curve->SegInterp[i]);
+        }
+
+        _cmsFree(ContextID, Curve ->Segments);
+        _cmsFree(ContextID, Curve ->SegInterp);
+    }
+
+    if (Curve -> Evals)
+        _cmsFree(ContextID, Curve -> Evals);
+
+    if (Curve) _cmsFree(ContextID, Curve);
+}
+
+// Utility function, free 3 gamma tables
+void CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3])
+{
+
+    _cmsAssert(Curve != NULL);
+
+    if (Curve[0] != NULL) cmsFreeToneCurve(Curve[0]);
+    if (Curve[1] != NULL) cmsFreeToneCurve(Curve[1]);
+    if (Curve[2] != NULL) cmsFreeToneCurve(Curve[2]);
+
+    Curve[0] = Curve[1] = Curve[2] = NULL;
+}
+
+
+// Duplicate a gamma table
+cmsToneCurve* CMSEXPORT cmsDupToneCurve(const cmsToneCurve* In)
+{
+    if (In == NULL) return NULL;
+
+    return  AllocateToneCurveStruct(In ->InterpParams ->ContextID, In ->nEntries, In ->nSegments, In ->Segments, In ->Table16);
+}
+
+// Joins two curves for X and Y. Curves should be monotonic.
+// We want to get
+//
+//      y = Y^-1(X(t))
+//
+cmsToneCurve* CMSEXPORT cmsJoinToneCurve(cmsContext ContextID,
+                                      const cmsToneCurve* X,
+                                      const cmsToneCurve* Y, cmsUInt32Number nResultingPoints)
+{
+    cmsToneCurve* out = NULL;
+    cmsToneCurve* Yreversed = NULL;
+    cmsFloat32Number t, x;
+    cmsFloat32Number* Res = NULL;
+    cmsUInt32Number i;
+
+
+    _cmsAssert(X != NULL);
+    _cmsAssert(Y != NULL);
+
+    Yreversed = cmsReverseToneCurveEx(nResultingPoints, Y);
+    if (Yreversed == NULL) goto Error;
+
+    Res = (cmsFloat32Number*) _cmsCalloc(ContextID, nResultingPoints, sizeof(cmsFloat32Number));
+    if (Res == NULL) goto Error;
+
+    //Iterate
+    for (i=0; i <  nResultingPoints; i++) {
+
+        t = (cmsFloat32Number) i / (nResultingPoints-1);
+        x = cmsEvalToneCurveFloat(X,  t);
+        Res[i] = cmsEvalToneCurveFloat(Yreversed, x);
+    }
+
+    // Allocate space for output
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nResultingPoints, Res);
+
+Error:
+
+    if (Res != NULL) _cmsFree(ContextID, Res);
+    if (Yreversed != NULL) cmsFreeToneCurve(Yreversed);
+
+    return out;
+}
+
+
+
+// Get the surrounding nodes. This is tricky on non-monotonic tables
+static
+int GetInterval(cmsFloat64Number In, const cmsUInt16Number LutTable[], const struct _cms_interp_struc* p)
+{
+    int i;
+    int y0, y1;
+
+    // A 1 point table is not allowed
+    if (p -> Domain[0] < 1) return -1;
+
+    // Let's see if ascending or descending.
+    if (LutTable[0] < LutTable[p ->Domain[0]]) {
+
+        // Table is overall ascending
+        for (i = (int) p->Domain[0] - 1; i >= 0; --i) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+    else {
+        // Table is overall descending
+        for (i=0; i < (int) p -> Domain[0]; i++) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+
+    return -1;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InCurve)
+{
+    cmsToneCurve *out;
+    cmsFloat64Number a = 0, b = 0, y, x1, y1, x2, y2;
+    int i, j;
+    int Ascending;
+
+    _cmsAssert(InCurve != NULL);
+
+    // Try to reverse it analytically whatever possible
+ 
+    if (InCurve ->nSegments == 1 && InCurve ->Segments[0].Type > 0 && 
+        /* InCurve -> Segments[0].Type <= 5 */ 
+        GetParametricCurveByType(InCurve ->InterpParams->ContextID, InCurve ->Segments[0].Type, NULL) != NULL) {
+
+        return cmsBuildParametricToneCurve(InCurve ->InterpParams->ContextID,
+                                       -(InCurve -> Segments[0].Type),
+                                       InCurve -> Segments[0].Params);
+    }
+
+    // Nope, reverse the table.
+    out = cmsBuildTabulatedToneCurve16(InCurve ->InterpParams->ContextID, nResultSamples, NULL);
+    if (out == NULL)
+        return NULL;
+
+    // We want to know if this is an ascending or descending table
+    Ascending = !cmsIsToneCurveDescending(InCurve);
+
+    // Iterate across Y axis
+    for (i=0; i < (int) nResultSamples; i++) {
+
+        y = (cmsFloat64Number) i * 65535.0 / (nResultSamples - 1);
+
+        // Find interval in which y is within.
+        j = GetInterval(y, InCurve->Table16, InCurve->InterpParams);
+        if (j >= 0) {
+
+
+            // Get limits of interval
+            x1 = InCurve ->Table16[j];
+            x2 = InCurve ->Table16[j+1];
+
+            y1 = (cmsFloat64Number) (j * 65535.0) / (InCurve ->nEntries - 1);
+            y2 = (cmsFloat64Number) ((j+1) * 65535.0 ) / (InCurve ->nEntries - 1);
+
+            // If collapsed, then use any
+            if (x1 == x2) {
+
+                out ->Table16[i] = _cmsQuickSaturateWord(Ascending ? y2 : y1);
+                continue;
+
+            } else {
+
+                // Interpolate
+                a = (y2 - y1) / (x2 - x1);
+                b = y2 - a * x2;
+            }
+        }
+
+        out ->Table16[i] = _cmsQuickSaturateWord(a* y + b);
+    }
+
+
+    return out;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma)
+{
+    _cmsAssert(InGamma != NULL);
+
+    return cmsReverseToneCurveEx(4096, InGamma);
+}
+
+// From: Eilers, P.H.C. (1994) Smoothing and interpolation with finite
+// differences. in: Graphic Gems IV, Heckbert, P.S. (ed.), Academic press.
+//
+// Smoothing and interpolation with second differences.
+//
+//   Input:  weights (w), data (y): vector from 1 to m.
+//   Input:  smoothing parameter (lambda), length (m).
+//   Output: smoothed vector (z): vector from 1 to m.
+
+static
+cmsBool smooth2(cmsContext ContextID, cmsFloat32Number w[], cmsFloat32Number y[], 
+                cmsFloat32Number z[], cmsFloat32Number lambda, int m)
+{
+    int i, i1, i2;
+    cmsFloat32Number *c, *d, *e;
+    cmsBool st;
+
+
+    c = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    d = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    e = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+
+    if (c != NULL && d != NULL && e != NULL) {
+
+
+    d[1] = w[1] + lambda;
+    c[1] = -2 * lambda / d[1];
+    e[1] = lambda /d[1];
+    z[1] = w[1] * y[1];
+    d[2] = w[2] + 5 * lambda - d[1] * c[1] *  c[1];
+    c[2] = (-4 * lambda - d[1] * c[1] * e[1]) / d[2];
+    e[2] = lambda / d[2];
+    z[2] = w[2] * y[2] - c[1] * z[1];
+
+    for (i = 3; i < m - 1; i++) {
+        i1 = i - 1; i2 = i - 2;
+        d[i]= w[i] + 6 * lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+        c[i] = (-4 * lambda -d[i1] * c[i1] * e[i1])/ d[i];
+        e[i] = lambda / d[i];
+        z[i] = w[i] * y[i] - c[i1] * z[i1] - e[i2] * z[i2];
+    }
+
+    i1 = m - 2; i2 = m - 3;
+
+    d[m - 1] = w[m - 1] + 5 * lambda -c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    c[m - 1] = (-2 * lambda - d[i1] * c[i1] * e[i1]) / d[m - 1];
+    z[m - 1] = w[m - 1] * y[m - 1] - c[i1] * z[i1] - e[i2] * z[i2];
+    i1 = m - 1; i2 = m - 2;
+
+    d[m] = w[m] + lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    z[m] = (w[m] * y[m] - c[i1] * z[i1] - e[i2] * z[i2]) / d[m];
+    z[m - 1] = z[m - 1] / d[m - 1] - c[m - 1] * z[m];
+
+    for (i = m - 2; 1<= i; i--)
+        z[i] = z[i] / d[i] - c[i] * z[i + 1] - e[i] * z[i + 2];
+
+      st = TRUE;
+    }
+    else st = FALSE;
+
+    if (c != NULL) _cmsFree(ContextID, c);
+    if (d != NULL) _cmsFree(ContextID, d);
+    if (e != NULL) _cmsFree(ContextID, e);
+
+    return st;
+}
+
+// Smooths a curve sampled at regular intervals.
+cmsBool  CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda)
+{
+    cmsBool SuccessStatus = TRUE;
+    cmsFloat32Number *w, *y, *z;
+    cmsUInt32Number i, nItems, Zeros, Poles;
+
+    if (Tab != NULL && Tab->InterpParams != NULL)
+    {
+        cmsContext ContextID = Tab->InterpParams->ContextID;
+
+        if (!cmsIsToneCurveLinear(Tab)) // Only non-linear curves need smoothing
+        {
+            nItems = Tab->nEntries;
+            if (nItems < MAX_NODES_IN_CURVE)
+            {
+                // Allocate one more item than needed
+                w = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                y = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                z = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+
+                if (w != NULL && y != NULL && z != NULL) // Ensure no memory allocation failure
+                {
+                    memset(w, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(y, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(z, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+
+                    for (i = 0; i < nItems; i++)
+                    {
+                        y[i + 1] = (cmsFloat32Number)Tab->Table16[i];
+                        w[i + 1] = 1.0;
+                    }
+
+                    if (smooth2(ContextID, w, y, z, (cmsFloat32Number)lambda, (int)nItems))
+                    {
+                        // Do some reality - checking...
+
+                        Zeros = Poles = 0;
+                        for (i = nItems; i > 1; --i)
+                        {
+                            if (z[i] == 0.) Zeros++;
+                            if (z[i] >= 65535.) Poles++;
+                            if (z[i] < z[i - 1])
+                            {
+                                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Non-Monotonic.");
+                                SuccessStatus = FALSE;
+                                break;
+                            }
+                        }
+
+                        if (SuccessStatus && Zeros > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly zeros.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus && Poles > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly poles.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus) // Seems ok
+                        {
+                            for (i = 0; i < nItems; i++)
+                            {
+                                // Clamp to cmsUInt16Number
+                                Tab->Table16[i] = _cmsQuickSaturateWord(z[i + 1]);
+                            }
+                        }
+                    }
+                    else // Could not smooth
+                    {
+                        cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Function smooth2 failed.");
+                        SuccessStatus = FALSE;
+                    }
+                }
+                else // One or more buffers could not be allocated
+                {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Could not allocate memory.");
+                    SuccessStatus = FALSE;
+                }
+
+                if (z != NULL)
+                    _cmsFree(ContextID, z);
+
+                if (y != NULL)
+                    _cmsFree(ContextID, y);
+
+                if (w != NULL)
+                    _cmsFree(ContextID, w);
+            }
+            else // too many items in the table
+            {
+                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Too many points.");
+                SuccessStatus = FALSE;
+            }
+        }
+    }
+    else // Tab parameter or Tab->InterpParams is NULL
+    {
+        // Can't signal an error here since the ContextID is not known at this point
+        SuccessStatus = FALSE;
+    }
+
+    return SuccessStatus;
+}
+
+// Is a table linear? Do not use parametric since we cannot guarantee some weird parameters resulting
+// in a linear table. This way assures it is linear in 12 bits, which should be enough in most cases.
+cmsBool CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve)
+{
+    int i;
+    int diff;
+
+    _cmsAssert(Curve != NULL);
+
+    for (i=0; i < (int) Curve ->nEntries; i++) {
+
+        diff = abs((int) Curve->Table16[i] - (int) _cmsQuantizeVal(i, Curve ->nEntries));
+        if (diff > 0x0f)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Same, but for monotonicity
+cmsBool  CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t)
+{
+    cmsUInt32Number n;
+    int i, last;
+    cmsBool lDescending;
+
+    _cmsAssert(t != NULL);
+
+    // Degenerated curves are monotonic? Ok, let's pass them
+    n = t ->nEntries;
+    if (n < 2) return TRUE;
+
+    // Curve direction
+    lDescending = cmsIsToneCurveDescending(t);
+
+    if (lDescending) {
+
+        last = t ->Table16[0];
+
+        for (i = 1; i < (int) n; i++) {
+
+            if (t ->Table16[i] - last > 2) // We allow some ripple
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+    else {
+
+        last = t ->Table16[n-1];
+
+        for (i = (int) n - 2; i >= 0; --i) {
+
+            if (t ->Table16[i] - last > 2)
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Same, but for descending tables
+cmsBool  CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t ->Table16[0] > t ->Table16[t ->nEntries-1];
+}
+
+
+// Another info fn: is out gamma table multisegment?
+cmsBool  CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t -> nSegments > 1;
+}
+
+cmsInt32Number  CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    if (t -> nSegments != 1) return 0;
+    return t ->Segments[0].Type;
+}
+
+// We need accuracy this time
+cmsFloat32Number CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v)
+{
+    _cmsAssert(Curve != NULL);
+
+    // Check for 16 bits table. If so, this is a limited-precision tone curve
+    if (Curve ->nSegments == 0) {
+
+        cmsUInt16Number In, Out;
+
+        In = (cmsUInt16Number) _cmsQuickSaturateWord(v * 65535.0);
+        Out = cmsEvalToneCurve16(Curve, In);
+
+        return (cmsFloat32Number) (Out / 65535.0);
+    }
+
+    return (cmsFloat32Number) EvalSegmentedFn(Curve, v);
+}
+
+// We need xput over here
+cmsUInt16Number CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v)
+{
+    cmsUInt16Number out;
+
+    _cmsAssert(Curve != NULL);
+
+    Curve ->InterpParams ->Interpolation.Lerp16(&v, &out, Curve ->InterpParams);
+    return out;
+}
+
+
+// Least squares fitting.
+// A mathematical procedure for finding the best-fitting curve to a given set of points by
+// minimizing the sum of the squares of the offsets ("the residuals") of the points from the curve.
+// The sum of the squares of the offsets is used instead of the offset absolute values because
+// this allows the residuals to be treated as a continuous differentiable quantity.
+//
+// y = f(x) = x ^ g
+//
+// R  = (yi - (xi^g))
+// R2 = (yi - (xi^g))2
+// SUM R2 = SUM (yi - (xi^g))2
+//
+// dR2/dg = -2 SUM x^g log(x)(y - x^g)
+// solving for dR2/dg = 0
+//
+// g = 1/n * SUM(log(y) / log(x))
+
+cmsFloat64Number CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision)
+{
+    cmsFloat64Number gamma, sum, sum2;
+    cmsFloat64Number n, x, y, Std;
+    cmsUInt32Number i;
+
+    _cmsAssert(t != NULL);
+
+    sum = sum2 = n = 0;
+
+    // Excluding endpoints
+    for (i=1; i < (MAX_NODES_IN_CURVE-1); i++) {
+
+        x = (cmsFloat64Number) i / (MAX_NODES_IN_CURVE-1);
+        y = (cmsFloat64Number) cmsEvalToneCurveFloat(t, (cmsFloat32Number) x);
+
+        // Avoid 7% on lower part to prevent
+        // artifacts due to linear ramps
+
+        if (y > 0. && y < 1. && x > 0.07) {
+
+            gamma = log(y) / log(x);
+            sum  += gamma;
+            sum2 += gamma * gamma;
+            n++;
+        }
+    }
+
+    // Take a look on SD to see if gamma isn't exponential at all
+    Std = sqrt((n * sum2 - sum * sum) / (n*(n-1)));
+
+    if (Std > Precision)
+        return -1.0;
+
+    return (sum / n);   // The mean
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsgmt.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsgmt.c
new file mode 100644
index 0000000000..d710271c9b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsgmt.c
@@ -0,0 +1,590 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Auxiliary: append a Lab identity after the given sequence of profiles
+// and return the transform. Lab profile is closed, rest of profiles are kept open.
+cmsHTRANSFORM _cmsChain2Lab(cmsContext            ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsHPROFILE   ProfileList[256];
+    cmsBool       BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+    cmsUInt32Number i;
+
+    // This is a rather big number and there is no need of dynamic memory
+    // since we are adding a profile, 254 + 1 = 255 and this is the limit
+    if (nProfiles > 254) return NULL;
+
+    // The output space
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+    // Create a copy of parameters
+    for (i=0; i < nProfiles; i++) {
+
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Place Lab identity at chain's end.
+    ProfileList[nProfiles]    = hLab;
+    BPCList[nProfiles]        = 0;
+    AdaptationList[nProfiles] = 1.0;
+    IntentList[nProfiles]     = INTENT_RELATIVE_COLORIMETRIC;
+
+    // Create the transform
+    xform = cmsCreateExtendedTransform(ContextID, nProfiles + 1, ProfileList,
+                                       BPCList,
+                                       IntentList,
+                                       AdaptationList,
+                                       NULL, 0,
+                                       InputFormat,
+                                       OutputFormat,
+                                       dwFlags);
+
+    cmsCloseProfile(hLab);
+
+    return xform;
+}
+
+
+// Compute K -> L* relationship. Flags may include black point compensation. In this case,
+// the relationship is assumed from the profile with BPC to a black point zero.
+static
+cmsToneCurve* ComputeKToLstar(cmsContext            ContextID,
+                               cmsUInt32Number       nPoints,
+                               cmsUInt32Number       nProfiles,
+                               const cmsUInt32Number Intents[],
+                               const cmsHPROFILE     hProfiles[],
+                               const cmsBool         BPC[],
+                               const cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags)
+{
+    cmsToneCurve* out = NULL;
+    cmsUInt32Number i;
+    cmsHTRANSFORM xform;
+    cmsCIELab Lab;
+    cmsFloat32Number cmyk[4];
+    cmsFloat32Number* SampledPoints;
+
+    xform = _cmsChain2Lab(ContextID, nProfiles, TYPE_CMYK_FLT, TYPE_Lab_DBL, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (xform == NULL) return NULL;
+
+    SampledPoints = (cmsFloat32Number*) _cmsCalloc(ContextID, nPoints, sizeof(cmsFloat32Number));
+    if (SampledPoints  == NULL) goto Error;
+
+    for (i=0; i < nPoints; i++) {
+
+        cmyk[0] = 0;
+        cmyk[1] = 0;
+        cmyk[2] = 0;
+        cmyk[3] = (cmsFloat32Number) ((i * 100.0) / (nPoints-1));
+
+        cmsDoTransform(xform, cmyk, &Lab, 1);
+        SampledPoints[i]= (cmsFloat32Number) (1.0 - Lab.L / 100.0); // Negate K for easier operation
+    }
+
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nPoints, SampledPoints);
+
+Error:
+
+    cmsDeleteTransform(xform);
+    if (SampledPoints) _cmsFree(ContextID, SampledPoints);
+
+    return out;
+}
+
+
+// Compute Black tone curve on a CMYK -> CMYK transform. This is done by
+// using the proof direction on both profiles to find K->L* relationship
+// then joining both curves. dwFlags may include black point compensation.
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext        ContextID,
+                                   cmsUInt32Number   nPoints,
+                                   cmsUInt32Number   nProfiles,
+                                   const cmsUInt32Number Intents[],
+                                   const cmsHPROFILE hProfiles[],
+                                   const cmsBool     BPC[],
+                                   const cmsFloat64Number AdaptationStates[],
+                                   cmsUInt32Number   dwFlags)
+{
+    cmsToneCurve *in, *out, *KTone;
+
+    // Make sure CMYK -> CMYK
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1])!= cmsSigCmykData) return NULL;
+
+
+    // Make sure last is an output profile
+    if (cmsGetDeviceClass(hProfiles[nProfiles - 1]) != cmsSigOutputClass) return NULL;
+
+    // Create individual curves. BPC works also as each K to L* is
+    // computed as a BPC to zero black point in case of L*
+    in  = ComputeKToLstar(ContextID, nPoints, nProfiles - 1, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (in == NULL) return NULL;
+
+    out = ComputeKToLstar(ContextID, nPoints, 1,
+                            Intents + (nProfiles - 1),
+                            &hProfiles [nProfiles - 1],
+                            BPC + (nProfiles - 1),
+                            AdaptationStates + (nProfiles - 1),
+                            dwFlags);
+    if (out == NULL) {
+        cmsFreeToneCurve(in);
+        return NULL;
+    }
+
+    // Build the relationship. This effectively limits the maximum accuracy to 16 bits, but
+    // since this is used on black-preserving LUTs, we are not losing  accuracy in any case
+    KTone = cmsJoinToneCurve(ContextID, in, out, nPoints);
+
+    // Get rid of components
+    cmsFreeToneCurve(in); cmsFreeToneCurve(out);
+
+    // Something went wrong...
+    if (KTone == NULL) return NULL;
+
+    // Make sure it is monotonic
+    if (!cmsIsToneCurveMonotonic(KTone)) {
+        cmsFreeToneCurve(KTone);
+        return NULL;
+    }
+
+    return KTone;
+}
+
+
+// Gamut LUT Creation -----------------------------------------------------------------------------------------
+
+// Used by gamut & softproofing
+
+typedef struct {
+
+    cmsHTRANSFORM hInput;               // From whatever input color space. 16 bits to DBL
+    cmsHTRANSFORM hForward, hReverse;   // Transforms going from Lab to colorant and back
+    cmsFloat64Number Thereshold;        // The thereshold after which is considered out of gamut
+
+    } GAMUTCHAIN;
+
+// This sampler does compute gamut boundaries by comparing original
+// values with a transform going back and forth. Values above ERR_THERESHOLD
+// of maximum are considered out of gamut.
+
+#define ERR_THERESHOLD      5
+
+
+static
+int GamutSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    GAMUTCHAIN*  t = (GAMUTCHAIN* ) Cargo;
+    cmsCIELab LabIn1, LabOut1;
+    cmsCIELab LabIn2, LabOut2;
+    cmsUInt16Number Proof[cmsMAXCHANNELS], Proof2[cmsMAXCHANNELS];
+    cmsFloat64Number dE1, dE2, ErrorRatio;
+
+    // Assume in-gamut by default.
+    ErrorRatio = 1.0;
+
+    // Convert input to Lab
+    cmsDoTransform(t -> hInput, In, &LabIn1, 1);
+
+    // converts from PCS to colorant. This always
+    // does return in-gamut values,
+    cmsDoTransform(t -> hForward, &LabIn1, Proof, 1);
+
+    // Now, do the inverse, from colorant to PCS.
+    cmsDoTransform(t -> hReverse, Proof, &LabOut1, 1);
+
+    memmove(&LabIn2, &LabOut1, sizeof(cmsCIELab));
+
+    // Try again, but this time taking Check as input
+    cmsDoTransform(t -> hForward, &LabOut1, Proof2, 1);
+    cmsDoTransform(t -> hReverse, Proof2, &LabOut2, 1);
+
+    // Take difference of direct value
+    dE1 = cmsDeltaE(&LabIn1, &LabOut1);
+
+    // Take difference of converted value
+    dE2 = cmsDeltaE(&LabIn2, &LabOut2);
+
+
+    // if dE1 is small and dE2 is small, value is likely to be in gamut
+    if (dE1 < t->Thereshold && dE2 < t->Thereshold)
+        Out[0] = 0;
+    else {
+
+        // if dE1 is small and dE2 is big, undefined. Assume in gamut
+        if (dE1 < t->Thereshold && dE2 > t->Thereshold)
+            Out[0] = 0;
+        else
+            // dE1 is big and dE2 is small, clearly out of gamut
+            if (dE1 > t->Thereshold && dE2 < t->Thereshold)
+                Out[0] = (cmsUInt16Number) _cmsQuickFloor((dE1 - t->Thereshold) + .5);
+            else  {
+
+                // dE1 is big and dE2 is also big, could be due to perceptual mapping
+                // so take error ratio
+                if (dE2 == 0.0)
+                    ErrorRatio = dE1;
+                else
+                    ErrorRatio = dE1 / dE2;
+
+                if (ErrorRatio > t->Thereshold)
+                    Out[0] = (cmsUInt16Number)  _cmsQuickFloor((ErrorRatio - t->Thereshold) + .5);
+                else
+                    Out[0] = 0;
+            }
+    }
+
+
+    return TRUE;
+}
+
+// Does compute a gamut LUT going back and forth across pcs -> relativ. colorimetric intent -> pcs
+// the dE obtained is then annotated on the LUT. Values truly out of gamut are clipped to dE = 0xFFFE
+// and values changed are supposed to be handled by any gamut remapping, so, are out of gamut as well.
+//
+// **WARNING: This algorithm does assume that gamut remapping algorithms does NOT move in-gamut colors,
+// of course, many perceptual and saturation intents does not work in such way, but relativ. ones should.
+
+cmsPipeline* _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                          cmsHPROFILE hProfiles[],
+                                          cmsBool  BPC[],
+                                          cmsUInt32Number Intents[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number nGamutPCSposition,
+                                          cmsHPROFILE hGamut)
+{
+    cmsHPROFILE hLab;
+    cmsPipeline* Gamut;
+    cmsStage* CLUT;
+    cmsUInt32Number dwFormat;
+    GAMUTCHAIN Chain;
+    cmsUInt32Number nChannels, nGridpoints;
+    cmsColorSpaceSignature ColorSpace;
+    cmsUInt32Number i;
+    cmsHPROFILE ProfileList[256];
+    cmsBool     BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+
+    memset(&Chain, 0, sizeof(GAMUTCHAIN));
+
+
+    if (nGamutPCSposition <= 0 || nGamutPCSposition > 255) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong position of PCS. 1..255 expected, %d found.", nGamutPCSposition);
+        return NULL;
+    }
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+
+    // The figure of merit. On matrix-shaper profiles, should be almost zero as
+    // the conversion is pretty exact. On LUT based profiles, different resolutions
+    // of input and output CLUT may result in differences.
+
+    if (cmsIsMatrixShaper(hGamut)) {
+
+        Chain.Thereshold = 1.0;
+    }
+    else {
+        Chain.Thereshold = ERR_THERESHOLD;
+    }
+
+
+    // Create a copy of parameters
+    for (i=0; i < nGamutPCSposition; i++) {
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Fill Lab identity
+    ProfileList[nGamutPCSposition] = hLab;
+    BPCList[nGamutPCSposition] = 0;
+    AdaptationList[nGamutPCSposition] = 1.0;
+    IntentList[nGamutPCSposition] = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    ColorSpace  = cmsGetColorSpace(hGamut);
+
+    nChannels   = cmsChannelsOf(ColorSpace);
+    nGridpoints = _cmsReasonableGridpointsByColorspace(ColorSpace, cmsFLAGS_HIGHRESPRECALC);
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+
+    // 16 bits to Lab double
+    Chain.hInput = cmsCreateExtendedTransform(ContextID,
+        nGamutPCSposition + 1,
+        ProfileList,
+        BPCList,
+        IntentList,
+        AdaptationList,
+        NULL, 0,
+        dwFormat, TYPE_Lab_DBL,
+        cmsFLAGS_NOCACHE);
+
+
+    // Does create the forward step. Lab double to device
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+    Chain.hForward = cmsCreateTransformTHR(ContextID,
+        hLab, TYPE_Lab_DBL,
+        hGamut, dwFormat,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+    // Does create the backwards step
+    Chain.hReverse = cmsCreateTransformTHR(ContextID, hGamut, dwFormat,
+        hLab, TYPE_Lab_DBL,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+
+    // All ok?
+    if (Chain.hInput && Chain.hForward && Chain.hReverse) {
+
+        // Go on, try to compute gamut LUT from PCS. This consist on a single channel containing
+        // dE when doing a transform back and forth on the colorimetric intent.
+
+        Gamut = cmsPipelineAlloc(ContextID, 3, 1);
+        if (Gamut != NULL) {
+
+            CLUT = cmsStageAllocCLut16bit(ContextID, nGridpoints, nChannels, 1, NULL);
+            if (!cmsPipelineInsertStage(Gamut, cmsAT_BEGIN, CLUT)) {
+                cmsPipelineFree(Gamut);
+                Gamut = NULL;
+            } 
+            else {
+                cmsStageSampleCLut16bit(CLUT, GamutSampler, (void*) &Chain, 0);
+            }
+        }
+    }
+    else
+        Gamut = NULL;   // Didn't work...
+
+    // Free all needed stuff.
+    if (Chain.hInput)   cmsDeleteTransform(Chain.hInput);
+    if (Chain.hForward) cmsDeleteTransform(Chain.hForward);
+    if (Chain.hReverse) cmsDeleteTransform(Chain.hReverse);
+    if (hLab) cmsCloseProfile(hLab);
+
+    // And return computed hull
+    return Gamut;
+}
+
+// Total Area Coverage estimation ----------------------------------------------------------------
+
+typedef struct {
+    cmsUInt32Number  nOutputChans;
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat32Number MaxTAC;
+    cmsFloat32Number MaxInput[cmsMAXCHANNELS];
+
+} cmsTACestimator;
+
+
+// This callback just accounts the maximum ink dropped in the given node. It does not populate any
+// memory, as the destination table is NULL. Its only purpose it to know the global maximum.
+static
+int EstimateTAC(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void * Cargo)
+{
+    cmsTACestimator* bp = (cmsTACestimator*) Cargo;
+    cmsFloat32Number RoundTrip[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsFloat32Number Sum;
+
+
+    // Evaluate the xform
+    cmsDoTransform(bp->hRoundTrip, In, RoundTrip, 1);
+
+    // All all amounts of ink
+    for (Sum=0, i=0; i < bp ->nOutputChans; i++)
+            Sum += RoundTrip[i];
+
+    // If above maximum, keep track of input values
+    if (Sum > bp ->MaxTAC) {
+
+            bp ->MaxTAC = Sum;
+
+            for (i=0; i < bp ->nOutputChans; i++) {
+                bp ->MaxInput[i] = In[i];
+            }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Out);
+}
+
+
+// Detect Total area coverage of the profile
+cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile)
+{
+    cmsTACestimator bp;
+    cmsUInt32Number dwFormatter;
+    cmsUInt32Number GridPoints[MAX_INPUT_DIMENSIONS];
+    cmsHPROFILE hLab;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // TAC only works on output profiles
+    if (cmsGetDeviceClass(hProfile) != cmsSigOutputClass) {
+        return 0;
+    }
+
+    // Create a fake formatter for result
+    dwFormatter = cmsFormatterForColorspaceOfProfile(hProfile, 4, TRUE);
+
+    bp.nOutputChans = T_CHANNELS(dwFormatter);
+    bp.MaxTAC = 0;    // Initial TAC is 0
+
+    //  for safety
+    if (bp.nOutputChans >= cmsMAXCHANNELS) return 0;
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return 0;
+    // Setup a roundtrip on perceptual intent in output profile for TAC estimation
+    bp.hRoundTrip = cmsCreateTransformTHR(ContextID, hLab, TYPE_Lab_16,
+                                          hProfile, dwFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+
+    cmsCloseProfile(hLab);
+    if (bp.hRoundTrip == NULL) return 0;
+
+    // For L* we only need black and white. For C* we need many points
+    GridPoints[0] = 6;
+    GridPoints[1] = 74;
+    GridPoints[2] = 74;
+
+
+    if (!cmsSliceSpace16(3, GridPoints, EstimateTAC, &bp)) {
+        bp.MaxTAC = 0;
+    }
+
+    cmsDeleteTransform(bp.hRoundTrip);
+
+    // Results in %
+    return bp.MaxTAC;
+}
+
+
+// Carefully,  clamp on CIELab space.
+
+cmsBool CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                   double amax, double amin,
+                                   double bmax, double bmin)
+{
+
+    // Whole Luma surface to zero
+
+    if (Lab -> L < 0) {
+
+        Lab-> L = Lab->a = Lab-> b = 0.0;
+        return FALSE;
+    }
+
+    // Clamp white, DISCARD HIGHLIGHTS. This is done
+    // in such way because icc spec doesn't allow the
+    // use of L>100 as a highlight means.
+
+    if (Lab->L > 100)
+        Lab -> L = 100;
+
+    // Check out gamut prism, on a, b faces
+
+    if (Lab -> a < amin || Lab->a > amax||
+        Lab -> b < bmin || Lab->b > bmax) {
+
+            cmsCIELCh LCh;
+            double h, slope;
+
+            // Falls outside a, b limits. Transports to LCh space,
+            // and then do the clipping
+
+
+            if (Lab -> a == 0.0) { // Is hue exactly 90?
+
+                // atan will not work, so clamp here
+                Lab -> b = Lab->b < 0 ? bmin : bmax;
+                return TRUE;
+            }
+
+            cmsLab2LCh(&LCh, Lab);
+
+            slope = Lab -> b / Lab -> a;
+            h = LCh.h;
+
+            // There are 4 zones
+
+            if ((h >= 0. && h < 45.) ||
+                (h >= 315 && h <= 360.)) {
+
+                    // clip by amax
+                    Lab -> a = amax;
+                    Lab -> b = amax * slope;
+            }
+            else
+                if (h >= 45. && h < 135.)
+                {
+                    // clip by bmax
+                    Lab -> b = bmax;
+                    Lab -> a = bmax / slope;
+                }
+                else
+                    if (h >= 135. && h < 225.) {
+                        // clip by amin
+                        Lab -> a = amin;
+                        Lab -> b = amin * slope;
+
+                    }
+                    else
+                        if (h >= 225. && h < 315.) {
+                            // clip by bmin
+                            Lab -> b = bmin;
+                            Lab -> a = bmin / slope;
+                        }
+                        else  {
+                            cmsSignalError(0, cmsERROR_RANGE, "Invalid angle");
+                            return FALSE;
+                        }
+
+    }
+
+    return TRUE;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmshalf.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmshalf.c
new file mode 100644
index 0000000000..66c2701134
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmshalf.c
@@ -0,0 +1,535 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+//
+#include "lcms2_internal.h"
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// This code is inspired in the paper "Fast Half Float Conversions"
+// by Jeroen van der Zijp
+
+static cmsUInt32Number Mantissa[2048] = {
+
+0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
+0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
+0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
+0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
+0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
+0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
+0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
+0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
+0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
+0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
+0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
+0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
+0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
+0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
+0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
+0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
+0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
+0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
+0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
+0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
+0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
+0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
+0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
+0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
+0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
+0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
+0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
+0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
+0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
+0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
+0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
+0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
+0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
+0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
+0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
+0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
+0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
+0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
+0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
+0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
+0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
+0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
+0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
+0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
+0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
+0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
+0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
+0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
+0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
+0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
+0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
+0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
+0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
+0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
+0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
+0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
+0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
+0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
+0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
+0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
+0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
+0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
+0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
+0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
+0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
+0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
+0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
+0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
+0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
+0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
+0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
+0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
+0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
+0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
+0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
+0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
+0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
+0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
+0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
+0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
+0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
+0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
+0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
+0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
+0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
+0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
+0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
+0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
+0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
+0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
+0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
+0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
+0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
+0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
+0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
+0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
+0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
+0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
+0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
+0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
+0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
+0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
+0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
+0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
+0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
+0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
+0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
+0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
+0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
+0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
+0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
+0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
+0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
+0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
+0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
+0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
+0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
+0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
+0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
+0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
+0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
+0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
+0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
+0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
+0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
+0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
+0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
+0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
+0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
+0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
+0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
+0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
+0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
+0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
+0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
+0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
+0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
+0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
+0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
+0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
+0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
+0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
+0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
+0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
+0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
+0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
+0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
+0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
+0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
+0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
+0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
+0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
+0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
+0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
+0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
+0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
+0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
+0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
+0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
+0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
+0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
+0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
+0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
+0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
+0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
+0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
+0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
+0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
+0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
+0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
+0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
+0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
+0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
+0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
+0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
+0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
+0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
+0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
+0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
+0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
+0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
+0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
+0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
+0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
+0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
+0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
+0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
+0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
+0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
+0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
+0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
+0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
+0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
+0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
+0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
+0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
+0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
+0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
+0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
+0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
+0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
+0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
+0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
+0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
+0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
+0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
+0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
+0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
+0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
+0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
+0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
+0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
+0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
+0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
+0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
+0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
+0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
+0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
+0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
+0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
+0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
+0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
+0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
+0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
+0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
+0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
+0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
+0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
+0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
+0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
+0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
+0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
+0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
+0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
+0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
+0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
+0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
+0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
+0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
+0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
+0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
+0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
+0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
+0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
+0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
+0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
+0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
+0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
+0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
+0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
+0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
+0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
+0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
+0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
+0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
+0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
+0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
+0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
+0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
+0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
+0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
+0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
+0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
+0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
+0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
+0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
+0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
+0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
+0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
+0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
+0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
+0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
+0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
+0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
+0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
+0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
+0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
+0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
+0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
+0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
+0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
+0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
+0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
+0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
+0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
+0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
+0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
+0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
+0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
+0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
+0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
+0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
+0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
+0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
+0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
+0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
+0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
+0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
+0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
+0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
+0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
+0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
+0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
+0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
+0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
+0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
+0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
+0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
+0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
+0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
+0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
+0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
+0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
+0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
+0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
+0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
+0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
+0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
+0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
+0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
+0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
+0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
+0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
+0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
+0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
+0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
+0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
+0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
+0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
+0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
+0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
+0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
+0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
+0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
+0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
+0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
+0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
+0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
+0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
+0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
+0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
+0x387fc000, 0x387fe000
+};
+
+static cmsUInt16Number Offset[64] = {
+0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0000, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400
+};
+
+static cmsUInt32Number Exponent[64] = {
+0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
+0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
+0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
+0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
+0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
+0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
+0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
+0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
+0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
+0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
+0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000
+};
+
+static cmsUInt16Number Base[512] = {
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00,
+0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400,
+0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00,
+0x7000, 0x7400, 0x7800, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
+0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400,
+0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400,
+0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00
+};
+
+static cmsUInt8Number  Shift[512] = {
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
+0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x0d
+};
+
+cmsFloat32Number CMSEXPORT _cmsHalf2Float(cmsUInt16Number h)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } out;
+
+    int n = h >> 10;
+
+    out.num = Mantissa[  (h & 0x3ff) + Offset[ n ] ] + Exponent[ n ];
+    return out.flt;
+}
+
+cmsUInt16Number CMSEXPORT _cmsFloat2Half(cmsFloat32Number flt)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } in;
+
+    cmsUInt32Number n, j;
+
+    in.flt = flt;
+    n = in.num;
+    j = (n >> 23) & 0x1ff;
+
+    return (cmsUInt16Number) ((cmsUInt32Number) Base[ j ] + (( n & 0x007fffff) >> Shift[ j ]));
+}
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsintrp.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsintrp.c
new file mode 100644
index 0000000000..31c5aa46ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsintrp.c
@@ -0,0 +1,1521 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module incorporates several interpolation routines, for 1 to 8 channels on input and
+// up to 65535 channels on output. The user may change those by using the interpolation plug-in
+
+// Some people may want to compile as C++ with all warnings on, in this case make compiler silent
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#       pragma warning( disable : 4365 )
+#    endif
+#endif
+
+// Interpolation routines by default
+static cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// This is the default factory
+_cmsInterpPluginChunkType _cmsInterpPluginChunk = { NULL };
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    void* from;
+
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+        from = src ->chunks[InterpPlugin];       
+    }
+    else { 
+        static _cmsInterpPluginChunkType InterpPluginChunk = { NULL };
+
+        from = &InterpPluginChunk;
+    }
+
+    _cmsAssert(from != NULL);
+    ctx ->chunks[InterpPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsInterpPluginChunkType));
+}
+
+
+// Main plug-in entry
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginInterpolation* Plugin = (cmsPluginInterpolation*) Data;
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    if (Data == NULL) {
+
+        ptr ->Interpolators = NULL;
+        return TRUE;
+    }
+
+    // Set replacement functions
+    ptr ->Interpolators = Plugin ->InterpolatorsFactory;
+    return TRUE;
+}
+
+
+// Set the interpolation method
+cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
+{      
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    p ->Interpolation.Lerp16 = NULL;
+
+   // Invoke factory, possibly in the Plug-in
+    if (ptr ->Interpolators != NULL)
+        p ->Interpolation = ptr->Interpolators(p -> nInputs, p ->nOutputs, p ->dwFlags);
+    
+    // If unsupported by the plug-in, go for the LittleCMS default.
+    // If happens only if an extern plug-in is being used
+    if (p ->Interpolation.Lerp16 == NULL)
+        p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags);
+
+    // Check for valid interpolator (we just check one member of the union)
+    if (p ->Interpolation.Lerp16 == NULL) {
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// This function precalculates as many parameters as possible to speed up the interpolation.
+cmsInterpParams* _cmsComputeInterpParamsEx(cmsContext ContextID,
+                                           const cmsUInt32Number nSamples[],
+                                           cmsUInt32Number InputChan, cmsUInt32Number OutputChan,
+                                           const void *Table,
+                                           cmsUInt32Number dwFlags)
+{
+    cmsInterpParams* p;
+    cmsUInt32Number i;
+
+    // Check for maximum inputs
+    if (InputChan > MAX_INPUT_DIMENSIONS) {
+             cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", InputChan, MAX_INPUT_DIMENSIONS);
+            return NULL;
+    }
+
+    // Creates an empty object
+    p = (cmsInterpParams*) _cmsMallocZero(ContextID, sizeof(cmsInterpParams));
+    if (p == NULL) return NULL;
+
+    // Keep original parameters
+    p -> dwFlags  = dwFlags;
+    p -> nInputs  = InputChan;
+    p -> nOutputs = OutputChan;
+    p ->Table     = Table;
+    p ->ContextID  = ContextID;
+
+    // Fill samples per input direction and domain (which is number of nodes minus one)
+    for (i=0; i < InputChan; i++) {
+
+        p -> nSamples[i] = nSamples[i];
+        p -> Domain[i]   = nSamples[i] - 1;
+    }
+
+    // Compute factors to apply to each component to index the grid array
+    p -> opta[0] = p -> nOutputs;
+    for (i=1; i < InputChan; i++)
+        p ->opta[i] = p ->opta[i-1] * nSamples[InputChan-i];
+
+
+    if (!_cmsSetInterpolationRoutine(ContextID, p)) {
+         cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported interpolation (%d->%d channels)", InputChan, OutputChan);
+        _cmsFree(ContextID, p);
+        return NULL;
+    }
+
+    // All seems ok
+    return p;
+}
+
+
+// This one is a wrapper on the anterior, but assuming all directions have same number of nodes
+cmsInterpParams* CMSEXPORT _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, 
+                                                   cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags)
+{
+    int i;
+    cmsUInt32Number Samples[MAX_INPUT_DIMENSIONS];
+
+    // Fill the auxiliary array
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Samples[i] = nSamples;
+
+    // Call the extended function
+    return _cmsComputeInterpParamsEx(ContextID, Samples, InputChan, OutputChan, Table, dwFlags);
+}
+
+
+// Free all associated memory
+void CMSEXPORT _cmsFreeInterpParams(cmsInterpParams* p)
+{
+    if (p != NULL) _cmsFree(p ->ContextID, p);
+}
+
+
+// Inline fixed point interpolation
+cmsINLINE CMS_NO_SANITIZE cmsUInt16Number LinearInterp(cmsS15Fixed16Number a, cmsS15Fixed16Number l, cmsS15Fixed16Number h)
+{
+    cmsUInt32Number dif = (cmsUInt32Number) (h - l) * a + 0x8000;
+    dif = (dif >> 16) + l;
+    return (cmsUInt16Number) (dif);
+}
+
+
+//  Linear interpolation (Fixed-point optimized)
+static
+void LinLerp1D(CMSREGISTER const cmsUInt16Number Value[],
+               CMSREGISTER cmsUInt16Number Output[],
+               CMSREGISTER const cmsInterpParams* p)
+{
+    cmsUInt16Number y1, y0;
+    int cell0, rest;
+    int val3;
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+
+    // if last value...
+    if (Value[0] == 0xffff) {
+
+        Output[0] = LutTable[p -> Domain[0]];      
+    }
+    else
+    {
+        val3 = p->Domain[0] * Value[0];
+        val3 = _cmsToFixedDomain(val3);    // To fixed 15.16
+
+        cell0 = FIXED_TO_INT(val3);             // Cell is 16 MSB bits
+        rest = FIXED_REST_TO_INT(val3);        // Rest is 16 LSB bits
+
+        y0 = LutTable[cell0];
+        y1 = LutTable[cell0 + 1];
+
+        Output[0] = LinearInterp(rest, y0, y1);
+    }
+}
+
+// To prevent out of bounds indexing
+cmsINLINE cmsFloat32Number fclamp(cmsFloat32Number v) 
+{
+    return ((v < 1.0e-9f) || isnan(v)) ? 0.0f : (v > 1.0f ? 1.0f : v);
+}
+
+// Floating-point version of 1D interpolation
+static
+void LinLerp1Dfloat(const cmsFloat32Number Value[],
+                    cmsFloat32Number Output[],
+                    const cmsInterpParams* p)
+{
+       cmsFloat32Number y1, y0;
+       cmsFloat32Number val2, rest;
+       int cell0, cell1;
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+       val2 = fclamp(Value[0]);
+
+       // if last value...
+       if (val2 == 1.0) {
+           Output[0] = LutTable[p -> Domain[0]];          
+       }
+       else
+       {
+           val2 *= p->Domain[0];
+
+           cell0 = (int)floor(val2);
+           cell1 = (int)ceil(val2);
+
+           // Rest is 16 LSB bits
+           rest = val2 - cell0;
+
+           y0 = LutTable[cell0];
+           y1 = LutTable[cell1];
+
+           Output[0] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+
+// Eval gray LUT having only one input channel
+static CMS_NO_SANITIZE
+void Eval1Input(CMSREGISTER const cmsUInt16Number Input[],
+                CMSREGISTER cmsUInt16Number Output[],
+                CMSREGISTER const cmsInterpParams* p16)
+{
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, k1, rk, K0, K1;
+       int v;
+       cmsUInt32Number OutChan;
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+
+       v = Input[0] * p16 -> Domain[0];
+       fk = _cmsToFixedDomain(v);
+
+       k0 = FIXED_TO_INT(fk);
+       rk = (cmsUInt16Number) FIXED_REST_TO_INT(fk);
+
+       k1 = k0 + (Input[0] != 0xFFFFU ? 1 : 0);
+
+       K0 = p16 -> opta[0] * k0;
+       K1 = p16 -> opta[0] * k1;
+
+       for (OutChan=0; OutChan < p16->nOutputs; OutChan++) {
+
+           Output[OutChan] = LinearInterp(rk, LutTable[K0+OutChan], LutTable[K1+OutChan]);
+       }
+}
+
+
+
+// Eval gray LUT having only one input channel
+static
+void Eval1InputFloat(const cmsFloat32Number Value[],
+                     cmsFloat32Number Output[],
+                     const cmsInterpParams* p)
+{
+    cmsFloat32Number y1, y0;
+    cmsFloat32Number val2, rest;
+    int cell0, cell1;
+    cmsUInt32Number OutChan;
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+    val2 = fclamp(Value[0]);
+
+    // if last value...
+    if (val2 == 1.0) {
+
+        y0 = LutTable[p->Domain[0]];
+
+        for (OutChan = 0; OutChan < p->nOutputs; OutChan++) {
+            Output[OutChan] = y0;
+        }        
+    }
+    else
+    {
+        val2 *= p->Domain[0];
+
+        cell0 = (int)floor(val2);
+        cell1 = (int)ceil(val2);
+
+        // Rest is 16 LSB bits
+        rest = val2 - cell0;
+
+        cell0 *= p->opta[0];
+        cell1 *= p->opta[0];
+
+        for (OutChan = 0; OutChan < p->nOutputs; OutChan++) {
+
+            y0 = LutTable[cell0 + OutChan];
+            y1 = LutTable[cell1 + OutChan];
+
+            Output[OutChan] = y0 + (y1 - y0) * rest;
+        }
+    }
+}
+
+// Bilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void BilinearInterpFloat(const cmsFloat32Number Input[],
+                         cmsFloat32Number Output[],
+                         const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)    (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j)      (LutTable[(i)+(j)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py;
+    int        x0, y0,
+               X0, Y0, X1, Y1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy,
+        d00, d01, d10, d11,
+        dx0, dx1,
+        dxy;
+
+    TotalOut   = p -> nOutputs;
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+
+    x0 = (int) _cmsQuickFloor(px); fx = px - (cmsFloat32Number) x0;
+    y0 = (int) _cmsQuickFloor(py); fy = py - (cmsFloat32Number) y0;
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(fx, d00, d10);
+        dx1 = LERP(fx, d01, d11);
+
+        dxy = LERP(fy, dx0, dx1);
+
+        Output[OutChan] = dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Bilinear interpolation (16 bits) - optimized version
+static CMS_NO_SANITIZE
+void BilinearInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                      CMSREGISTER cmsUInt16Number Output[],
+                      CMSREGISTER const cmsInterpParams* p)
+
+{
+#define DENS(i,j) (LutTable[(i)+(j)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy;
+  CMSREGISTER int        rx, ry;
+           int        x0, y0;
+  CMSREGISTER int        X0, X1, Y0, Y1;
+           int        d00, d01, d10, d11,
+                      dx0, dx1,
+                      dxy;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(rx, d00, d10);
+        dx1 = LERP(rx, d01, d11);
+
+        dxy = LERP(ry, dx0, dx1);
+
+        Output[OutChan] = (cmsUInt16Number) dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Trilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void TrilinearInterpFloat(const cmsFloat32Number Input[],
+                          cmsFloat32Number Output[],
+                          const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)      (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j,k)      (LutTable[(i)+(j)+(k)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy, fz,
+        d000, d001, d010, d011,
+        d100, d101, d110, d111,
+        dx00, dx01, dx10, dx11,
+        dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); fx = px - (cmsFloat32Number) x0;  // We need full floor funcionality here
+    y0 = (int) floor(py); fy = py - (cmsFloat32Number) y0;
+    z0 = (int) floor(pz); fz = pz - (cmsFloat32Number) z0;
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(fx, d000, d100);
+        dx01 = LERP(fx, d001, d101);
+        dx10 = LERP(fx, d010, d110);
+        dx11 = LERP(fx, d011, d111);
+
+        dxy0 = LERP(fy, dx00, dx10);
+        dxy1 = LERP(fy, dx01, dx11);
+
+        dxyz = LERP(fz, dxy0, dxy1);
+
+        Output[OutChan] = dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Trilinear interpolation (16 bits) - optimized version
+static CMS_NO_SANITIZE
+void TrilinearInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                       CMSREGISTER cmsUInt16Number Output[],
+                       CMSREGISTER const cmsInterpParams* p)
+
+{
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy, fz;
+  CMSREGISTER int        rx, ry, rz;
+           int        x0, y0, z0;
+  CMSREGISTER int        X0, X1, Y0, Y1, Z0, Z1;
+           int        d000, d001, d010, d011,
+                      d100, d101, d110, d111,
+                      dx00, dx01, dx10, dx11,
+                      dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+    z0 = FIXED_TO_INT(fz);
+    rz = FIXED_REST_TO_INT(fz);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(rx, d000, d100);
+        dx01 = LERP(rx, d001, d101);
+        dx10 = LERP(rx, d010, d110);
+        dx11 = LERP(rx, d011, d111);
+
+        dxy0 = LERP(ry, dx00, dx10);
+        dxy1 = LERP(ry, dx01, dx11);
+
+        dxyz = LERP(rz, dxy0, dxy1);
+
+        Output[OutChan] = (cmsUInt16Number) dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Tetrahedral interpolation, using Sakamoto algorithm.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static
+void TetrahedralInterpFloat(const cmsFloat32Number Input[],
+                            cmsFloat32Number Output[],
+                            const cmsInterpParams* p)
+{
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+    cmsFloat32Number     px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    cmsFloat32Number     rx, ry, rz;
+    cmsFloat32Number     c0, c1=0, c2=0, c3=0;
+    int                  OutChan, TotalOut;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); rx = (px - (cmsFloat32Number) x0);  // We need full floor functionality here
+    y0 = (int) floor(py); ry = (py - (cmsFloat32Number) y0);
+    z0 = (int) floor(pz); rz = (pz - (cmsFloat32Number) z0);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+       // These are the 6 Tetrahedral
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+       Output[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz;
+       }
+
+}
+
+#undef DENS
+
+
+
+
+static CMS_NO_SANITIZE
+void TetrahedralInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                         CMSREGISTER cmsUInt16Number Output[],
+                         CMSREGISTER const cmsInterpParams* p)
+{
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p -> Table;
+    cmsS15Fixed16Number fx, fy, fz;
+    cmsS15Fixed16Number rx, ry, rz;
+    int x0, y0, z0;
+    cmsS15Fixed16Number c0, c1, c2, c3, Rest;
+    cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number TotalOut = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+
+    x0 = FIXED_TO_INT(fx);
+    y0 = FIXED_TO_INT(fy);
+    z0 = FIXED_TO_INT(fz);
+
+    rx = FIXED_REST_TO_INT(fx);
+    ry = FIXED_REST_TO_INT(fy);
+    rz = FIXED_REST_TO_INT(fz);
+
+    X0 = p -> opta[2] * x0;
+    X1 = (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    LutTable = &LutTable[X0+Y0+Z0];
+
+    // Output should be computed as x = ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest))
+    // which expands as: x = (Rest + ((Rest+0x7fff)/0xFFFF) + 0x8000)>>16
+    // This can be replaced by: t = Rest+0x8001, x = (t + (t>>16))>>16
+    // at the cost of being off by one at 7fff and 17ffe.
+
+    if (rx >= ry) {
+        if (ry >= rz) {
+            Y1 += X1;
+            Z1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c2;
+                c2 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (rz >= rx) {
+            X1 += Z1;
+            Y1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c1;
+                c1 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Z1 += X1;
+            Y1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c3;
+                c3 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    } else {
+        if (rx >= rz) {
+            X1 += Y1;
+            Z1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c1;
+                c1 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (ry >= rz) {
+            Z1 += Y1;
+            X1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c3;
+                c3 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Y1 += Z1;
+            X1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c2;
+                c2 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    }
+}
+
+
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static CMS_NO_SANITIZE
+void Eval4Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                     CMSREGISTER cmsUInt16Number Output[],
+                     CMSREGISTER const cmsInterpParams* p16)
+{
+    const cmsUInt16Number* LutTable;
+    cmsS15Fixed16Number fk;
+    cmsS15Fixed16Number k0, rk;
+    int K0, K1;
+    cmsS15Fixed16Number    fx, fy, fz;
+    cmsS15Fixed16Number    rx, ry, rz;
+    int                    x0, y0, z0;
+    cmsS15Fixed16Number    X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number i;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    cmsUInt32Number        OutChan;
+    cmsUInt16Number        Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+
+
+    fk  = _cmsToFixedDomain((int) Input[0] * p16 -> Domain[0]);
+    fx  = _cmsToFixedDomain((int) Input[1] * p16 -> Domain[1]);
+    fy  = _cmsToFixedDomain((int) Input[2] * p16 -> Domain[2]);
+    fz  = _cmsToFixedDomain((int) Input[3] * p16 -> Domain[3]);
+
+    k0  = FIXED_TO_INT(fk);
+    x0  = FIXED_TO_INT(fx);
+    y0  = FIXED_TO_INT(fy);
+    z0  = FIXED_TO_INT(fz);
+
+    rk  = FIXED_REST_TO_INT(fk);
+    rx  = FIXED_REST_TO_INT(fx);
+    ry  = FIXED_REST_TO_INT(fy);
+    rz  = FIXED_REST_TO_INT(fz);
+
+    K0 = p16 -> opta[3] * k0;
+    K1 = K0 + (Input[0] == 0xFFFFU ? 0 : p16->opta[3]);
+
+    X0 = p16 -> opta[2] * x0;
+    X1 = X0 + (Input[1] == 0xFFFFU ? 0 : p16->opta[2]);
+
+    Y0 = p16 -> opta[1] * y0;
+    Y1 = Y0 + (Input[2] == 0xFFFFU ? 0 : p16->opta[1]);
+
+    Z0 = p16 -> opta[0] * z0;
+    Z1 = Z0 + (Input[3] == 0xFFFFU ? 0 : p16->opta[0]);
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K0;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz;
+
+        Tmp1[OutChan] = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K1;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz;
+
+        Tmp2[OutChan] = (cmsUInt16Number) (c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+
+    for (i=0; i < p16 -> nOutputs; i++) {
+        Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+    }
+}
+#undef DENS
+
+
+// For more that 3 inputs (i.e., CMYK)
+// evaluate two 3-dimensional interpolations and then linearly interpolate between them.
+
+
+static
+void Eval4InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[3] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[3]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       TetrahedralInterpFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       TetrahedralInterpFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++)
+       {
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static CMS_NO_SANITIZE
+void Eval5Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[4] * k0;
+       K1 = p16 -> opta[4] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval5InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[4] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[4]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+
+static CMS_NO_SANITIZE
+void Eval6Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[5] * k0;
+       K1 = p16 -> opta[5] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval6InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[5] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[5]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static CMS_NO_SANITIZE
+void Eval7Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[6] * k0;
+       K1 = p16 -> opta[6] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+static
+void Eval7InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[6] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[6]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+
+       }
+}
+
+static CMS_NO_SANITIZE
+void Eval8Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[7] * k0;
+       K1 = p16 -> opta[7] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       Eval7Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+
+static
+void Eval8InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[7] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[7]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+// The default factory
+static
+cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags)
+{
+
+    cmsInterpFunction Interpolation;
+    cmsBool  IsFloat     = (dwFlags & CMS_LERP_FLAGS_FLOAT);
+    cmsBool  IsTrilinear = (dwFlags & CMS_LERP_FLAGS_TRILINEAR);
+
+    memset(&Interpolation, 0, sizeof(Interpolation));
+
+    // Safety check
+    if (nInputChannels >= 4 && nOutputChannels >= MAX_STAGE_CHANNELS)
+        return Interpolation;
+
+    switch (nInputChannels) {
+
+           case 1: // Gray LUT / linear
+
+               if (nOutputChannels == 1) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = LinLerp1Dfloat;
+                   else
+                       Interpolation.Lerp16 = LinLerp1D;
+
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = Eval1InputFloat;
+                   else
+                       Interpolation.Lerp16 = Eval1Input;
+               }
+               break;
+
+           case 2: // Duotone
+               if (IsFloat)
+                      Interpolation.LerpFloat =  BilinearInterpFloat;
+               else
+                      Interpolation.Lerp16    =  BilinearInterp16;
+               break;
+
+           case 3:  // RGB et al
+
+               if (IsTrilinear) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TrilinearInterpFloat;
+                   else
+                       Interpolation.Lerp16 = TrilinearInterp16;
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TetrahedralInterpFloat;
+                   else {
+
+                       Interpolation.Lerp16 = TetrahedralInterp16;
+                   }
+               }
+               break;
+
+           case 4:  // CMYK lut
+
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval4InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval4Inputs;
+               break;
+
+           case 5: // 5 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval5InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval5Inputs;
+               break;
+
+           case 6: // 6 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval6InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval6Inputs;
+               break;
+
+           case 7: // 7 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval7InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval7Inputs;
+               break;
+
+           case 8: // 8 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval8InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval8Inputs;
+               break;
+
+               break;
+
+           default:
+               Interpolation.Lerp16 = NULL;
+    }
+
+    return Interpolation;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsio0.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsio0.c
new file mode 100644
index 0000000000..ffebfa36b3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsio0.c
@@ -0,0 +1,1946 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Generic I/O, tag dictionary management, profile struct
+
+// IOhandlers are abstractions used by littleCMS to read from whatever file, stream,
+// memory block or any storage. Each IOhandler provides implementations for read,
+// write, seek and tell functions. LittleCMS code deals with IO across those objects.
+// In this way, is easier to add support for new storage media.
+
+// NULL stream, for taking care of used space -------------------------------------
+
+// NULL IOhandler basically does nothing but keep track on how many bytes have been
+// written. This is handy when creating profiles, where the file size is needed in the
+// header. Then, whole profile is serialized across NULL IOhandler and a second pass
+// writes the bytes to the pertinent IOhandler.
+
+typedef struct {
+    cmsUInt32Number Pointer;         // Points to current location
+} FILENULL;
+
+static
+cmsUInt32Number NULLRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    cmsUInt32Number len = size * count;
+    ResData -> Pointer += len;
+    return count;
+
+    cmsUNUSED_PARAMETER(Buffer);
+}
+
+static
+cmsBool  NULLSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+static
+cmsUInt32Number NULLTell(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+    return ResData -> Pointer;
+}
+
+static
+cmsBool  NULLWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer += size;
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Ptr);
+}
+
+static
+cmsBool  NULLClose(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// The NULL IOhandler creator
+cmsIOHANDLER*  CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID)
+{
+    struct _cms_io_handler* iohandler = NULL;
+    FILENULL* fm = NULL;
+
+    iohandler = (struct _cms_io_handler*) _cmsMallocZero(ContextID, sizeof(struct _cms_io_handler));
+    if (iohandler == NULL) return NULL;
+
+    fm = (FILENULL*) _cmsMallocZero(ContextID, sizeof(FILENULL));
+    if (fm == NULL) goto Error;
+
+    fm ->Pointer = 0;
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->ReportedSize = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = NULLRead;
+    iohandler ->Seek    = NULLSeek;
+    iohandler ->Close   = NULLClose;
+    iohandler ->Tell    = NULLTell;
+    iohandler ->Write   = NULLWrite;
+
+    return iohandler;
+
+Error:    
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+
+}
+
+
+// Memory-based stream --------------------------------------------------------------
+
+// Those functions implements an iohandler which takes a block of memory as storage medium.
+
+typedef struct {
+    cmsUInt8Number* Block;    // Points to allocated memory
+    cmsUInt32Number Size;     // Size of allocated memory
+    cmsUInt32Number Pointer;  // Points to current location
+    int FreeBlockOnClose;     // As title
+
+} FILEMEM;
+
+static
+cmsUInt32Number MemoryRead(struct _cms_io_handler* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+    cmsUInt8Number* Ptr;
+    cmsUInt32Number len = size * count;
+
+    if (ResData -> Pointer + len > ResData -> Size){
+
+        len = (ResData -> Size - ResData -> Pointer);
+        cmsSignalError(iohandler ->ContextID, cmsERROR_READ, "Read from memory error. Got %d bytes, block should be of %d bytes", len, count * size);
+        return 0;
+    }
+
+    Ptr  = ResData -> Block;
+    Ptr += ResData -> Pointer;
+    memmove(Buffer, Ptr, len);
+    ResData -> Pointer += len;
+
+    return count;
+}
+
+// SEEK_CUR is assumed
+static
+cmsBool  MemorySeek(struct _cms_io_handler* iohandler, cmsUInt32Number offset)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (offset > ResData ->Size) {
+        cmsSignalError(iohandler ->ContextID, cmsERROR_SEEK,  "Too few data; probably corrupted profile");
+        return FALSE;
+    }
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+// Tell for memory
+static
+cmsUInt32Number MemoryTell(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return 0;
+    return ResData -> Pointer;
+}
+
+
+// Writes data to memory, also keeps used space for further reference.
+static
+cmsBool MemoryWrite(struct _cms_io_handler* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return FALSE; // Housekeeping
+
+    // Check for available space. Clip.
+    if (ResData->Pointer + size > ResData->Size) {
+        size = ResData ->Size - ResData->Pointer;
+    }
+      
+    if (size == 0) return TRUE;     // Write zero bytes is ok, but does nothing
+
+    memmove(ResData ->Block + ResData ->Pointer, Ptr, size);
+    ResData ->Pointer += size;
+
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+}
+
+
+static
+cmsBool  MemoryClose(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData ->FreeBlockOnClose) {
+
+        if (ResData ->Block) _cmsFree(iohandler ->ContextID, ResData ->Block);
+    }
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+
+    return TRUE;
+}
+
+// Create a iohandler for memory block. AccessMode=='r' assumes the iohandler is going to read, and makes
+// a copy of the memory block for letting user to free the memory after invoking open profile. In write
+// mode ("w"), Buffer points to the begin of memory block to be written.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILEMEM* fm = NULL;
+
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        if (Buffer == NULL) {
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't read profile from NULL pointer");
+            goto Error;
+        }
+
+        fm ->Block = (cmsUInt8Number*) _cmsMalloc(ContextID, size);
+        if (fm ->Block == NULL) {
+
+            _cmsFree(ContextID, fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't allocate %ld bytes for profile", size);
+            return NULL;
+        }
+
+
+        memmove(fm->Block, Buffer, size);
+        fm ->FreeBlockOnClose = TRUE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = size;
+        break;
+
+    case 'w':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        fm ->Block = (cmsUInt8Number*) Buffer;
+        fm ->FreeBlockOnClose = FALSE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = MemoryRead;
+    iohandler ->Seek    = MemorySeek;
+    iohandler ->Close   = MemoryClose;
+    iohandler ->Tell    = MemoryTell;
+    iohandler ->Write   = MemoryWrite;
+
+    return iohandler;
+
+Error:
+    if (fm) _cmsFree(ContextID, fm);
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+}
+
+// File-based stream -------------------------------------------------------
+
+// Read count elements of size bytes each. Return number of elements read
+static
+cmsUInt32Number FileRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    cmsUInt32Number nReaded = (cmsUInt32Number) fread(Buffer, size, count, (FILE*) iohandler->stream);
+
+    if (nReaded != count) {
+            cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Read error. Got %d bytes, block should be of %d bytes", nReaded * size, count * size);
+            return 0;
+    }
+
+    return nReaded;
+}
+
+// Position file pointer in the file
+static
+cmsBool  FileSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    if (fseek((FILE*) iohandler ->stream, (long) offset, SEEK_SET) != 0) {
+
+       cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Seek error; probably corrupted file");
+       return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Returns file pointer position or 0 on error, which is also a valid position.
+static
+cmsUInt32Number FileTell(cmsIOHANDLER* iohandler)
+{
+    long t = ftell((FILE*)iohandler ->stream);
+    if (t == -1L) {
+        cmsSignalError(iohandler->ContextID, cmsERROR_FILE, "Tell error; probably corrupted file");
+        return 0;
+    }
+
+    return (cmsUInt32Number)t;
+}
+
+// Writes data to stream, also keeps used space for further reference. Returns TRUE on success, FALSE on error
+static
+cmsBool  FileWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void* Buffer)
+{
+    if (size == 0) return TRUE;  // We allow to write 0 bytes, but nothing is written
+
+    iohandler->UsedSpace += size;
+    return (fwrite(Buffer, size, 1, (FILE*)iohandler->stream) == 1);
+}
+
+// Closes the file
+static
+cmsBool  FileClose(cmsIOHANDLER* iohandler)
+{
+    if (fclose((FILE*) iohandler ->stream) != 0) return FALSE;
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// Create a iohandler for disk based files.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILE* fm = NULL;
+    cmsInt32Number fileLen;
+
+    _cmsAssert(FileName != NULL);
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = fopen(FileName, "rb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "File '%s' not found", FileName);
+            return NULL;
+        }                                     
+        fileLen = cmsfilelength(fm);
+        if (fileLen < 0)
+        {
+            fclose(fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of file '%s'", FileName);
+            return NULL;
+        }
+
+        iohandler -> ReportedSize = (cmsUInt32Number) fileLen;
+        break;
+
+    case 'w':
+        fm = fopen(FileName, "wb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "Couldn't create '%s'", FileName);
+            return NULL;
+        }
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        _cmsFree(ContextID, iohandler);
+         cmsSignalError(ContextID, cmsERROR_FILE, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream = (void*) fm;
+    iohandler ->UsedSpace = 0;
+
+    // Keep track of the original file    
+    strncpy(iohandler -> PhysicalFile, FileName, sizeof(iohandler -> PhysicalFile)-1);
+    iohandler -> PhysicalFile[sizeof(iohandler -> PhysicalFile)-1] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+// Create a iohandler for stream based files
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    cmsInt32Number fileSize;
+
+    fileSize = cmsfilelength(Stream);
+    if (fileSize < 0)
+    {
+        cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of stream");
+        return NULL;
+    }
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    iohandler -> ContextID = ContextID;
+    iohandler -> stream = (void*) Stream;
+    iohandler -> UsedSpace = 0;
+    iohandler -> ReportedSize = (cmsUInt32Number) fileSize;
+    iohandler -> PhysicalFile[0] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+
+
+// Close an open IO handler
+cmsBool CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io)
+{
+    return io -> Close(io);
+}
+
+// -------------------------------------------------------------------------------------------------------
+
+cmsIOHANDLER* CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile)
+{
+	_cmsICCPROFILE* Icc = (_cmsICCPROFILE*)hProfile;
+
+	if (Icc == NULL) return NULL;
+	return Icc->IOhandler;
+}
+
+// Creates an empty structure holding all required parameters
+cmsHPROFILE CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID)
+{
+    time_t now = time(NULL);
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) _cmsMallocZero(ContextID, sizeof(_cmsICCPROFILE));
+    if (Icc == NULL) return NULL;
+
+    Icc ->ContextID = ContextID;
+
+    // Set it to empty
+    Icc -> TagCount   = 0;
+
+    // Set default version
+    Icc ->Version =  0x02100000;
+
+    // Set creation date/time
+    memmove(&Icc ->Created, gmtime(&now), sizeof(Icc ->Created));
+
+    // Create a mutex if the user provided proper plugin. NULL otherwise
+    Icc ->UsrMutex = _cmsCreateMutex(ContextID);
+
+    // Return the handle
+    return (cmsHPROFILE) Icc;
+}
+
+cmsContext CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile)
+{
+     _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (Icc == NULL) return NULL;
+    return Icc -> ContextID;
+}
+
+
+// Return the number of tags
+cmsInt32Number CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    if (Icc == NULL) return -1;
+
+    return  (cmsInt32Number) Icc->TagCount;
+}
+
+// Return the tag signature of a given tag number
+cmsTagSignature CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (n > Icc->TagCount) return (cmsTagSignature) 0;  // Mark as not available
+    if (n >= MAX_TABLE_TAG) return (cmsTagSignature) 0; // As double check
+
+    return Icc ->TagNames[n];
+}
+
+
+static
+int SearchOneTag(_cmsICCPROFILE* Profile, cmsTagSignature sig)
+{
+    int i;
+
+    for (i=0; i < (int) Profile -> TagCount; i++) {
+
+        if (sig == Profile -> TagNames[i])
+            return i;
+    }
+
+    return -1;
+}
+
+// Search for a specific tag in tag dictionary. Returns position or -1 if tag not found.
+// If followlinks is turned on, then the position of the linked tag is returned
+int _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks)
+{
+    int n;
+    cmsTagSignature LinkedSig;
+
+    do {
+
+        // Search for given tag in ICC profile directory
+        n = SearchOneTag(Icc, sig);
+        if (n < 0)
+            return -1;        // Not found
+
+        if (!lFollowLinks)
+            return n;         // Found, don't follow links
+
+        // Is this a linked tag?
+        LinkedSig = Icc ->TagLinked[n];
+
+        // Yes, follow link
+        if (LinkedSig != (cmsTagSignature) 0) {
+            sig = LinkedSig;
+        }
+
+    } while (LinkedSig != (cmsTagSignature) 0);
+
+    return n;
+}
+
+// Deletes a tag entry
+
+static
+void _cmsDeleteTagByPos(_cmsICCPROFILE* Icc, int i)
+{
+    _cmsAssert(Icc != NULL);
+    _cmsAssert(i >= 0);
+
+   
+    if (Icc -> TagPtrs[i] != NULL) {
+
+        // Free previous version
+        if (Icc ->TagSaveAsRaw[i]) {
+            _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+        else {
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameter
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+                Icc ->TagPtrs[i] = NULL;
+            }
+        }
+
+    } 
+}
+
+
+// Creates a new tag entry
+static
+cmsBool _cmsNewTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, int* NewPos)
+{
+    int i;
+
+    // Search for the tag
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i >= 0) {
+
+        // Already exists? delete it
+        _cmsDeleteTagByPos(Icc, i);
+        *NewPos = i;
+    }
+    else  {
+
+        // No, make a new one
+        if (Icc -> TagCount >= MAX_TABLE_TAG) {
+            cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", MAX_TABLE_TAG);
+            return FALSE;
+        }
+
+        *NewPos = (int) Icc ->TagCount;
+        Icc -> TagCount++;
+    }
+
+    return TRUE;
+}
+
+
+// Check existence
+cmsBool CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+       _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) (void*) hProfile;
+       return _cmsSearchTag(Icc, sig, FALSE) >= 0;
+}
+
+// Enforces that the profile version is per. spec.
+// Operates on the big endian bytes from the profile.
+// Called before converting to platform endianness.
+// Byte 0 is BCD major version, so max 9.
+// Byte 1 is 2 BCD digits, one per nibble.
+// Reserved bytes 2 & 3 must be 0.
+static 
+cmsUInt32Number _validatedVersion(cmsUInt32Number DWord)
+{
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    if (*pByte > 0x09) *pByte = (cmsUInt8Number) 0x09;
+    temp1 = (cmsUInt8Number) (*(pByte+1) & 0xf0);
+    temp2 = (cmsUInt8Number) (*(pByte+1) & 0x0f);
+    if (temp1 > 0x90U) temp1 = 0x90U;
+    if (temp2 > 0x09U) temp2 = 0x09U;
+    *(pByte+1) = (cmsUInt8Number)(temp1 | temp2);
+    *(pByte+2) = (cmsUInt8Number)0;
+    *(pByte+3) = (cmsUInt8Number)0;
+
+    return DWord;
+}
+
+// Read profile header and validate it
+cmsBool _cmsReadHeader(_cmsICCPROFILE* Icc)
+{
+    cmsTagEntry Tag;
+    cmsICCHeader Header;
+    cmsUInt32Number i, j;
+    cmsUInt32Number HeaderSize;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsUInt32Number TagCount;
+
+
+    // Read the header
+    if (io -> Read(io, &Header, sizeof(cmsICCHeader), 1) != 1) {
+        return FALSE;
+    }
+
+    // Validate file as an ICC profile
+    if (_cmsAdjustEndianess32(Header.magic) != cmsMagicNumber) {
+        cmsSignalError(Icc ->ContextID, cmsERROR_BAD_SIGNATURE, "not an ICC profile, invalid signature");
+        return FALSE;
+    }
+
+    // Adjust endianness of the used parameters
+    Icc -> DeviceClass     = (cmsProfileClassSignature) _cmsAdjustEndianess32(Header.deviceClass);
+    Icc -> ColorSpace      = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.colorSpace);
+    Icc -> PCS             = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.pcs);
+   
+    Icc -> RenderingIntent = _cmsAdjustEndianess32(Header.renderingIntent);
+    Icc -> flags           = _cmsAdjustEndianess32(Header.flags);
+    Icc -> manufacturer    = _cmsAdjustEndianess32(Header.manufacturer);
+    Icc -> model           = _cmsAdjustEndianess32(Header.model);
+    Icc -> creator         = _cmsAdjustEndianess32(Header.creator);
+    
+    _cmsAdjustEndianess64(&Icc -> attributes, &Header.attributes);
+    Icc -> Version         = _cmsAdjustEndianess32(_validatedVersion(Header.version));
+
+    // Get size as reported in header
+    HeaderSize = _cmsAdjustEndianess32(Header.size);
+
+    // Make sure HeaderSize is lower than profile size
+    if (HeaderSize >= Icc ->IOhandler ->ReportedSize)
+            HeaderSize = Icc ->IOhandler ->ReportedSize;
+
+
+    // Get creation date/time
+    _cmsDecodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    // The profile ID are 32 raw bytes
+    memmove(Icc ->ProfileID.ID32, Header.profileID.ID32, 16);
+
+
+    // Read tag directory
+    if (!_cmsReadUInt32Number(io, &TagCount)) return FALSE;
+    if (TagCount > MAX_TABLE_TAG) {
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", TagCount);
+        return FALSE;
+    }
+
+
+    // Read tag directory
+    Icc -> TagCount = 0;
+    for (i=0; i < TagCount; i++) {
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *) &Tag.sig)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.offset)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.size)) return FALSE;
+
+        // Perform some sanity check. Offset + size should fall inside file.
+        if (Tag.offset + Tag.size > HeaderSize ||
+            Tag.offset + Tag.size < Tag.offset)
+                  continue;
+
+        Icc -> TagNames[Icc ->TagCount]   = Tag.sig;
+        Icc -> TagOffsets[Icc ->TagCount] = Tag.offset;
+        Icc -> TagSizes[Icc ->TagCount]   = Tag.size;
+
+       // Search for links
+        for (j=0; j < Icc ->TagCount; j++) {
+
+            if ((Icc ->TagOffsets[j] == Tag.offset) &&
+                (Icc ->TagSizes[j]   == Tag.size)) {
+
+                Icc ->TagLinked[Icc ->TagCount] = Icc ->TagNames[j];
+            }
+
+        }
+
+        Icc ->TagCount++;
+    }
+
+    return TRUE;
+}
+
+// Saves profile header
+cmsBool _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace)
+{
+    cmsICCHeader Header;
+    cmsUInt32Number i;
+    cmsTagEntry Tag;
+    cmsUInt32Number Count;
+
+    Header.size        = _cmsAdjustEndianess32(UsedSpace);
+    Header.cmmId       = _cmsAdjustEndianess32(lcmsSignature);
+    Header.version     = _cmsAdjustEndianess32(Icc ->Version);
+
+    Header.deviceClass = (cmsProfileClassSignature) _cmsAdjustEndianess32(Icc -> DeviceClass);
+    Header.colorSpace  = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> ColorSpace);
+    Header.pcs         = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> PCS);
+
+    //   NOTE: in v4 Timestamp must be in UTC rather than in local time
+    _cmsEncodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    Header.magic       = _cmsAdjustEndianess32(cmsMagicNumber);
+
+#ifdef CMS_IS_WINDOWS_
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMicrosoft);
+#else
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMacintosh);
+#endif
+
+    Header.flags        = _cmsAdjustEndianess32(Icc -> flags);
+    Header.manufacturer = _cmsAdjustEndianess32(Icc -> manufacturer);
+    Header.model        = _cmsAdjustEndianess32(Icc -> model);
+
+    _cmsAdjustEndianess64(&Header.attributes, &Icc -> attributes);
+
+    // Rendering intent in the header (for embedded profiles)
+    Header.renderingIntent = _cmsAdjustEndianess32(Icc -> RenderingIntent);
+
+    // Illuminant is always D50
+    Header.illuminant.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->X));
+    Header.illuminant.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Y));
+    Header.illuminant.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Z));
+
+    // Created by LittleCMS (that's me!)
+    Header.creator      = _cmsAdjustEndianess32(lcmsSignature);
+
+    memset(&Header.reserved, 0, sizeof(Header.reserved));
+
+    // Set profile ID. Endianness is always big endian
+    memmove(&Header.profileID, &Icc ->ProfileID, 16);
+
+    // Dump the header
+    if (!Icc -> IOhandler->Write(Icc->IOhandler, sizeof(cmsICCHeader), &Header)) return FALSE;
+
+    // Saves Tag directory
+
+    // Get true count
+    Count = 0;
+    for (i=0;  i < Icc -> TagCount; i++) {
+        if (Icc ->TagNames[i] != (cmsTagSignature) 0)
+            Count++;
+    }
+
+    // Store number of tags
+    if (!_cmsWriteUInt32Number(Icc ->IOhandler, Count)) return FALSE;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;   // It is just a placeholder
+
+        Tag.sig    = (cmsTagSignature) _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagNames[i]);
+        Tag.offset = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagOffsets[i]);
+        Tag.size   = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagSizes[i]);
+
+        if (!Icc ->IOhandler -> Write(Icc-> IOhandler, sizeof(cmsTagEntry), &Tag)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ----------------------------------------------------------------------- Set/Get several struct members
+
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> RenderingIntent;
+}
+
+void CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> RenderingIntent = RenderingIntent;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return (cmsUInt32Number) Icc -> flags;
+}
+
+void CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> flags = (cmsUInt32Number) Flags;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->manufacturer;
+}
+
+void CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> manufacturer = manufacturer;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->creator;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->model;
+}
+
+void CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> model = model;
+}
+
+void CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Flags, &Icc -> attributes, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> attributes, &Flags, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(ProfileID, Icc ->ProfileID.ID8, 16);
+}
+
+void CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> ProfileID, ProfileID, 16);
+}
+
+cmsBool  CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Dest, &Icc ->Created, sizeof(struct tm));
+    return TRUE;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> PCS;
+}
+
+void CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> PCS = pcs;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> ColorSpace;
+}
+
+void CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> ColorSpace = sig;
+}
+
+cmsProfileClassSignature CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> DeviceClass;
+}
+
+void CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> DeviceClass = sig;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> Version;
+}
+
+void CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> Version = Version;
+}
+
+// Get an hexadecimal number with same digits as v
+static
+cmsUInt32Number BaseToBase(cmsUInt32Number in, int BaseIn, int BaseOut)
+{
+    char Buff[100];
+    int i, len;
+    cmsUInt32Number out;
+
+    for (len=0; in > 0 && len < 100; len++) {
+
+        Buff[len] = (char) (in % BaseIn);
+        in /= BaseIn;
+    }
+
+    for (i=len-1, out=0; i >= 0; --i) {
+        out = out * BaseOut + Buff[i];
+    }
+
+    return out;
+}
+
+void  CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+
+    // 4.2 -> 0x4200000
+
+    Icc -> Version = BaseToBase((cmsUInt32Number) floor(Version * 100.0 + 0.5), 10, 16) << 16;
+}
+
+cmsFloat64Number CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    cmsUInt32Number n = Icc -> Version >> 16;
+
+    return BaseToBase(n, 16, 10) / 100.0;
+}
+// --------------------------------------------------------------------------------------------------------------
+
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (write) {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+// Create profile from disk file
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *lpFileName, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromFile(ContextID, lpFileName, sAccess);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'W' || *sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromFileTHR(NULL, ICCProfile, sAccess);
+}
+
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromStream(ContextID, ICCProfile);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+
+}
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromStreamTHR(NULL, ICCProfile, sAccess);
+}
+
+
+// Open from memory block
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void* MemPtr, cmsUInt32Number dwSize)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty;
+
+    hEmpty = cmsCreateProfilePlaceholder(ContextID);
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    // Ok, in this case const void* is casted to void* just because open IO handler
+    // shares read and writing modes. Don't abuse this feature!
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromMem(ContextID, (void*) MemPtr, dwSize, "r");
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMem(const void* MemPtr, cmsUInt32Number dwSize)
+{
+    return cmsOpenProfileFromMemTHR(NULL, MemPtr, dwSize);
+}
+
+
+
+// Dump tag contents. If the profile is being modified, untouched tags are copied from FileOrig
+static
+cmsBool SaveTags(_cmsICCPROFILE* Icc, _cmsICCPROFILE* FileOrig)
+{
+    cmsUInt8Number* Data;
+    cmsUInt32Number i;
+    cmsUInt32Number Begin;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagDescriptor* TagDescriptor;
+    cmsTagTypeSignature TypeBase;
+    cmsTagTypeSignature Type;
+    cmsTagTypeHandler* TypeHandler;
+    cmsFloat64Number   Version = cmsGetProfileVersion((cmsHPROFILE) Icc);
+    cmsTagTypeHandler LocalTypeHandler;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;
+
+        // Linked tags are not written
+        if (Icc ->TagLinked[i] != (cmsTagSignature) 0) continue;
+
+        Icc -> TagOffsets[i] = Begin = io ->UsedSpace;
+
+        Data = (cmsUInt8Number*)  Icc -> TagPtrs[i];
+
+        if (!Data) {
+
+            // Reach here if we are copying a tag from a disk-based ICC profile which has not been modified by user.
+            // In this case a blind copy of the block data is performed
+            if (FileOrig != NULL && Icc -> TagOffsets[i]) {
+
+                cmsUInt32Number TagSize   = FileOrig -> TagSizes[i];
+                cmsUInt32Number TagOffset = FileOrig -> TagOffsets[i];
+                void* Mem;
+
+                if (!FileOrig ->IOhandler->Seek(FileOrig ->IOhandler, TagOffset)) return FALSE;
+
+                Mem = _cmsMalloc(Icc ->ContextID, TagSize);
+                if (Mem == NULL) return FALSE;
+
+                if (FileOrig ->IOhandler->Read(FileOrig->IOhandler, Mem, TagSize, 1) != 1) return FALSE;
+                if (!io ->Write(io, TagSize, Mem)) return FALSE;
+                _cmsFree(Icc ->ContextID, Mem);
+
+                Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+
+                // Align to 32 bit boundary.
+                if (! _cmsWriteAlignment(io))
+                    return FALSE;
+            }
+
+            continue;
+        }
+
+
+        // Should this tag be saved as RAW? If so, tagsizes should be specified in advance (no further cooking is done)
+        if (Icc ->TagSaveAsRaw[i]) {
+
+            if (io -> Write(io, Icc ->TagSizes[i], Data) != 1) return FALSE;
+        }
+        else {
+
+            // Search for support on this tag
+            TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, Icc -> TagNames[i]);
+            if (TagDescriptor == NULL) continue;                        // Unsupported, ignore it
+           
+            if (TagDescriptor ->DecideType != NULL) {
+
+                Type = TagDescriptor ->DecideType(Version, Data);
+            }
+            else {
+
+                Type = TagDescriptor ->SupportedTypes[0];
+            }
+
+            TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+
+            if (TypeHandler == NULL) {
+                cmsSignalError(Icc ->ContextID, cmsERROR_INTERNAL, "(Internal) no handler for tag %x", Icc -> TagNames[i]);
+                continue;
+            }
+
+            TypeBase = TypeHandler ->Signature;
+            if (!_cmsWriteTypeBase(io, TypeBase))
+                return FALSE;
+
+            LocalTypeHandler = *TypeHandler;
+            LocalTypeHandler.ContextID  = Icc ->ContextID;
+            LocalTypeHandler.ICCVersion = Icc ->Version;
+            if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, io, Data, TagDescriptor ->ElemCount)) {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) TypeBase);
+                cmsSignalError(Icc ->ContextID, cmsERROR_WRITE, "Couldn't write type '%s'", String);
+                return FALSE;
+            }
+        }
+
+
+        Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+        // Align to 32 bit boundary.
+        if (! _cmsWriteAlignment(io))
+            return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+// Fill the offset and size fields for all linked tags
+static
+cmsBool SetLinks( _cmsICCPROFILE* Icc)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        cmsTagSignature lnk = Icc ->TagLinked[i];
+        if (lnk != (cmsTagSignature) 0) {
+
+            int j = _cmsSearchTag(Icc, lnk, FALSE);
+            if (j >= 0) {
+
+                Icc ->TagOffsets[i] = Icc ->TagOffsets[j];
+                Icc ->TagSizes[i]   = Icc ->TagSizes[j];
+            }
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Low-level save to IOHANDLER. It returns the number of bytes used to
+// store the profile, or zero on error. io may be NULL and in this case
+// no data is written--only sizes are calculated
+cmsUInt32Number CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    _cmsICCPROFILE Keep;
+    cmsIOHANDLER* PrevIO = NULL;
+    cmsUInt32Number UsedSpace;
+    cmsContext ContextID;
+
+    _cmsAssert(hProfile != NULL);
+    
+    if (!_cmsLockMutex(Icc->ContextID, Icc->UsrMutex)) return 0;
+    memmove(&Keep, Icc, sizeof(_cmsICCPROFILE));
+
+    ContextID = cmsGetProfileContextID(hProfile);
+    PrevIO = Icc ->IOhandler = cmsOpenIOhandlerFromNULL(ContextID);
+    if (PrevIO == NULL) {
+        _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+        return 0;
+    }
+
+    // Pass #1 does compute offsets
+
+    if (!_cmsWriteHeader(Icc, 0)) goto Error;
+    if (!SaveTags(Icc, &Keep)) goto Error;
+
+    UsedSpace = PrevIO ->UsedSpace;
+
+    // Pass #2 does save to iohandler
+
+    if (io != NULL) {
+
+        Icc ->IOhandler = io;
+        if (!SetLinks(Icc)) goto Error;
+        if (!_cmsWriteHeader(Icc, UsedSpace)) goto Error;
+        if (!SaveTags(Icc, &Keep)) goto Error;
+    }
+
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    if (!cmsCloseIOhandler(PrevIO)) 
+        UsedSpace = 0; // As a error marker
+
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return UsedSpace;
+
+
+Error:
+    cmsCloseIOhandler(PrevIO);
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return 0;
+}
+
+
+// Low-level save to disk.
+cmsBool  CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromFile(ContextID, FileName, "w");
+    cmsBool rc;
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    if (rc == FALSE) {          // remove() is C99 per 7.19.4.1
+            remove(FileName);   // We have to IGNORE return value in this case
+    }
+    return rc;
+}
+
+// Same as anterior, but for streams
+cmsBool CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream)
+{
+    cmsBool rc;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromStream(ContextID, Stream);
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+// Same as anterior, but for memory blocks. In this case, a NULL as MemPtr means calculate needed space only
+cmsBool CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    cmsBool rc;
+    cmsIOHANDLER* io;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    _cmsAssert(BytesNeeded != NULL);
+
+    // Should we just calculate the needed space?
+    if (MemPtr == NULL) {
+
+           *BytesNeeded =  cmsSaveProfileToIOhandler(hProfile, NULL);
+            return (*BytesNeeded == 0) ? FALSE : TRUE;
+    }
+
+    // That is a real write operation
+    io =  cmsOpenIOhandlerFromMem(ContextID, MemPtr, *BytesNeeded, "w");
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+
+// Closes a profile freeing any involved resources
+cmsBool  CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsBool  rc = TRUE;
+    cmsUInt32Number i;
+
+    if (!Icc) return FALSE;
+
+    // Was open in write mode?
+    if (Icc ->IsWrite) {
+
+        Icc ->IsWrite = FALSE;      // Assure no further writing
+        rc &= cmsSaveProfileToFile(hProfile, Icc ->IOhandler->PhysicalFile);
+    }
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc -> TagPtrs[i]) {
+
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameters
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+            }
+            else
+                _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+    }
+
+    if (Icc ->IOhandler != NULL) {
+        rc &= cmsCloseIOhandler(Icc->IOhandler);
+    }
+
+    _cmsDestroyMutex(Icc->ContextID, Icc->UsrMutex);
+
+    _cmsFree(Icc ->ContextID, Icc);   // Free placeholder memory
+
+    return rc;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+// Returns TRUE if a given tag is supported by a plug-in
+static
+cmsBool IsTypeSupported(cmsTagDescriptor* TagDescriptor, cmsTagTypeSignature Type)
+{
+    cmsUInt32Number i, nMaxTypes;
+
+    nMaxTypes = TagDescriptor->nSupportedTypes;
+    if (nMaxTypes >= MAX_TYPES_IN_LCMS_PLUGIN)
+        nMaxTypes = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    for (i=0; i < nMaxTypes; i++) {
+        if (Type == TagDescriptor ->SupportedTypes[i]) return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+// That's the main read function
+void* CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagTypeHandler* TypeHandler;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor*  TagDescriptor;
+    cmsTagTypeSignature BaseType;
+    cmsUInt32Number Offset, TagSize;
+    cmsUInt32Number ElemCount;
+    int n;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return NULL;
+
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) goto Error;               // Not found, return NULL
+
+
+    // If the element is already in memory, return the pointer
+    if (Icc -> TagPtrs[n]) {
+
+        if (Icc->TagTypeHandlers[n] == NULL) goto Error;
+
+        // Sanity check
+        BaseType = Icc->TagTypeHandlers[n]->Signature;
+        if (BaseType == 0) goto Error;
+
+        TagDescriptor = _cmsGetTagDescriptor(Icc->ContextID, sig);
+        if (TagDescriptor == NULL) goto Error;
+
+        if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+
+        if (Icc ->TagSaveAsRaw[n]) goto Error;  // We don't support read raw tags as cooked
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc -> TagPtrs[n];
+    }
+
+    // We need to read it. Get the offset and size to the file
+    Offset    = Icc -> TagOffsets[n];
+    TagSize   = Icc -> TagSizes[n];
+
+    if (TagSize < 8) goto Error;
+
+    // Seek to its location
+    if (!io -> Seek(io, Offset))
+        goto Error;
+
+    // Search for support on this tag
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+
+        // An unknown element was found.
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown tag type '%s' found.", String);
+        goto Error;     // Unsupported.
+    }
+
+    // if supported, get type and check if in list
+    BaseType = _cmsReadTypeBase(io);
+    if (BaseType == 0) goto Error;
+
+    if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+   
+    TagSize  -= 8;       // Already read by the type base logic
+
+    // Get type handler
+    TypeHandler = _cmsGetTagTypeHandler(Icc ->ContextID, BaseType);
+    if (TypeHandler == NULL) goto Error;
+    LocalTypeHandler = *TypeHandler;
+
+
+    // Read the tag
+    Icc -> TagTypeHandlers[n] = TypeHandler;
+
+    LocalTypeHandler.ContextID = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc -> TagPtrs[n] = LocalTypeHandler.ReadPtr(&LocalTypeHandler, io, &ElemCount, TagSize);
+
+    // The tag type is supported, but something wrong happened and we cannot read the tag.
+    // let know the user about this (although it is just a warning)
+    if (Icc -> TagPtrs[n] == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Corrupted tag '%s'", String);
+        goto Error;
+    }
+
+    // This is a weird error that may be a symptom of something more serious, the number of
+    // stored item is actually less than the number of required elements.
+    if (ElemCount < TagDescriptor ->ElemCount) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "'%s' Inconsistent number of items: expected %d, got %d",
+            String, TagDescriptor ->ElemCount, ElemCount);
+        goto Error;
+    }
+
+
+    // Return the data
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return Icc -> TagPtrs[n];
+
+
+    // Return error and unlock tha data
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return NULL;
+}
+
+
+// Get true type of data
+cmsTagTypeSignature _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler;
+    int n;
+
+    // Search for given tag in ICC profile directory
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) return (cmsTagTypeSignature) 0;                // Not found, return NULL
+
+    // Get the handler. The true type is there
+    TypeHandler =  Icc -> TagTypeHandlers[n];
+    return TypeHandler ->Signature;
+}
+
+
+// Write a single tag. This just keeps track of the tak into a list of "to be written". If the tag is already
+// in that list, the previous version is deleted.
+cmsBool CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsTagTypeSignature Type;
+    int i;
+    cmsFloat64Number Version;
+    char TypeString[5], SigString[5];
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    // To delete tags.
+    if (data == NULL) {
+
+         // Delete the tag
+         i = _cmsSearchTag(Icc, sig, FALSE);
+         if (i >= 0) {
+                
+             // Use zero as a mark of deleted 
+             _cmsDeleteTagByPos(Icc, i);
+             Icc ->TagNames[i] = (cmsTagSignature) 0;
+             _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+             return TRUE;
+         }
+         // Didn't find the tag
+        goto Error;
+    }
+
+    if (!_cmsNewTag(Icc, sig, &i)) goto Error;
+
+    // This is not raw
+    Icc ->TagSaveAsRaw[i] = FALSE;
+
+    // This is not a link
+    Icc ->TagLinked[i] = (cmsTagSignature) 0;
+
+    // Get information about the TAG.
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL){
+         cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag '%x'", sig);
+        goto Error;
+    }
+
+
+    // Now we need to know which type to use. It depends on the version.
+    Version = cmsGetProfileVersion(hProfile);
+
+    if (TagDescriptor ->DecideType != NULL) {
+
+        // Let the tag descriptor to decide the type base on depending on
+        // the data. This is useful for example on parametric curves, where
+        // curves specified by a table cannot be saved as parametric and needs
+        // to be casted to single v2-curves, even on v4 profiles.
+
+        Type = TagDescriptor ->DecideType(Version, data);
+    }
+    else {
+
+        Type = TagDescriptor ->SupportedTypes[0];
+    }
+
+    // Does the tag support this type?
+    if (!IsTypeSupported(TagDescriptor, Type)) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;
+    }
+
+    // Does we have a handler for this type?
+    TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+    if (TypeHandler == NULL) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;           // Should never happen
+    }
+
+
+    // Fill fields on icc structure
+    Icc ->TagTypeHandlers[i]  = TypeHandler;
+    Icc ->TagNames[i]         = sig;
+    Icc ->TagSizes[i]         = 0;
+    Icc ->TagOffsets[i]       = 0;
+
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc ->TagPtrs[i]            = LocalTypeHandler.DupPtr(&LocalTypeHandler, data, TagDescriptor ->ElemCount);
+
+    if (Icc ->TagPtrs[i] == NULL)  {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Malformed struct in type '%s' for tag '%s'", TypeString, SigString);
+
+        goto Error;
+    }
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return FALSE;
+
+}
+
+// Read and write raw data. The only way those function would work and keep consistence with normal read and write
+// is to do an additional step of serialization. That means, readRaw would issue a normal read and then convert the obtained
+// data to raw bytes by using the "write" serialization logic. And vice-versa. I know this may end in situations where
+// raw data written does not exactly correspond with the raw data proposed to cmsWriteRaw data, but this approach allows
+// to write a tag as raw data and the read it as handled.
+
+cmsUInt32Number CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* data, cmsUInt32Number BufferSize)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    void *Object;
+    int i;
+    cmsIOHANDLER* MemIO;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsUInt32Number rc;
+    cmsUInt32Number Offset, TagSize;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, TRUE);
+    if (i < 0) goto Error;                 // Not found, 
+
+    // It is already read?
+    if (Icc -> TagPtrs[i] == NULL) {
+
+        // No yet, get original position
+        Offset   = Icc ->TagOffsets[i];
+        TagSize  = Icc ->TagSizes[i];
+
+        // read the data directly, don't keep copy
+        if (data != NULL) {
+
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            if (!Icc ->IOhandler ->Seek(Icc ->IOhandler, Offset)) goto Error;
+            if (!Icc ->IOhandler ->Read(Icc ->IOhandler, data, 1, TagSize)) goto Error;
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // The data has been already read, or written. But wait!, maybe the user chose to save as
+    // raw data. In this case, return the raw data directly
+    if (Icc ->TagSaveAsRaw[i]) {
+
+        if (data != NULL)  {
+
+            TagSize  = Icc ->TagSizes[i];
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            memmove(data, Icc ->TagPtrs[i], TagSize);
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // Already read, or previously set by cmsWriteTag(). We need to serialize that
+    // data to raw in order to maintain consistency.
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    Object = cmsReadTag(hProfile, sig);
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (Object == NULL) goto Error;
+
+    // Now we need to serialize to a memory block: just use a memory iohandler
+
+    if (data == NULL) {
+        MemIO = cmsOpenIOhandlerFromNULL(cmsGetProfileContextID(hProfile));
+    } else{
+        MemIO = cmsOpenIOhandlerFromMem(cmsGetProfileContextID(hProfile), data, BufferSize, "w");
+    }
+    if (MemIO == NULL) goto Error;
+
+    // Obtain type handling for the tag
+    TypeHandler = Icc ->TagTypeHandlers[i];
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+    
+    if (TypeHandler == NULL) goto Error;
+
+    // Serialize
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+
+    if (!_cmsWriteTypeBase(MemIO, TypeHandler ->Signature)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, MemIO, Object, TagDescriptor ->ElemCount)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    // Get Size and close
+    rc = MemIO ->Tell(MemIO);
+    cmsCloseIOhandler(MemIO);      // Ignore return code this time
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return rc;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return 0;
+}
+
+// Similar to the anterior. This function allows to write directly to the ICC profile any data, without
+// checking anything. As a rule, mixing Raw with cooked doesn't work, so writing a tag as raw and then reading
+// it as cooked without serializing does result into an error. If that is what you want, you will need to dump
+// the profile to memry or disk and then reopen it.
+cmsBool CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+         return FALSE;
+    }
+
+    // Mark the tag as being written as RAW
+    Icc ->TagSaveAsRaw[i] = TRUE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = (cmsTagSignature) 0;
+
+    // Keep a copy of the block
+    Icc ->TagPtrs[i]  = _cmsDupMem(Icc ->ContextID, data, Size);
+    Icc ->TagSizes[i] = Size;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+
+    if (Icc->TagPtrs[i] == NULL) {           
+           Icc->TagNames[i] = (cmsTagSignature) 0;
+           return FALSE;
+    }
+    return TRUE;
+}
+
+// Using this function you can collapse several tag entries to the same block in the profile
+cmsBool CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+     if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return FALSE;
+    }
+
+    // Keep necessary information
+    Icc ->TagSaveAsRaw[i] = FALSE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = dest;
+
+    Icc ->TagPtrs[i]    = NULL;
+    Icc ->TagSizes[i]   = 0;
+    Icc ->TagOffsets[i] = 0;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+}
+
+
+// Returns the tag linked to sig, in the case two tags are sharing same resource
+cmsTagSignature  CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i < 0) return (cmsTagSignature) 0;                 // Not found, return 0
+
+    return Icc -> TagLinked[i];
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsio1.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsio1.c
new file mode 100644
index 0000000000..ae7ebe0eff
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsio1.c
@@ -0,0 +1,1029 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Read tags using low-level functions, provides necessary glue code to adapt versions, etc.
+
+// LUT tags
+static const cmsTagSignature Device2PCS16[]   =  {cmsSigAToB0Tag,     // Perceptual
+                                                  cmsSigAToB1Tag,     // Relative colorimetric
+                                                  cmsSigAToB2Tag,     // Saturation
+                                                  cmsSigAToB1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature Device2PCSFloat[] = {cmsSigDToB0Tag,     // Perceptual
+                                                  cmsSigDToB1Tag,     // Relative colorimetric
+                                                  cmsSigDToB2Tag,     // Saturation
+                                                  cmsSigDToB3Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2Device16[]    = {cmsSigBToA0Tag,     // Perceptual
+                                                  cmsSigBToA1Tag,     // Relative colorimetric
+                                                  cmsSigBToA2Tag,     // Saturation
+                                                  cmsSigBToA1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2DeviceFloat[] = {cmsSigBToD0Tag,     // Perceptual
+                                                  cmsSigBToD1Tag,     // Relative colorimetric
+                                                  cmsSigBToD2Tag,     // Saturation
+                                                  cmsSigBToD3Tag };   // Absolute colorimetric
+
+
+// Factors to convert from 1.15 fixed point to 0..1.0 range and vice-versa
+#define InpAdj   (1.0/MAX_ENCODEABLE_XYZ)     // (65536.0/(65535.0*2.0))
+#define OutpAdj  (MAX_ENCODEABLE_XYZ)         // ((2.0*65535.0)/65536.0)
+
+// Several resources for gray conversions.
+static const cmsFloat64Number GrayInputMatrix[] = { (InpAdj*cmsD50X),  (InpAdj*cmsD50Y),  (InpAdj*cmsD50Z) };
+static const cmsFloat64Number OneToThreeInputMatrix[] = { 1, 1, 1 };
+static const cmsFloat64Number PickYMatrix[] = { 0, (OutpAdj*cmsD50Y), 0 };
+static const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+// Get a media white point fixing some issues found in certain old profiles
+cmsBool  _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+    // If no wp, take D50
+    if (Tag == NULL) {
+        *Dest = *cmsD50_XYZ();
+        return TRUE;
+    }
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+            *Dest = *cmsD50_XYZ();
+            return TRUE;
+        }
+    }
+
+    // All seems ok
+    *Dest = *Tag;
+    return TRUE;
+}
+
+
+// Chromatic adaptation matrix. Fix some issues as well
+cmsBool  _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile)
+{
+    cmsMAT3* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsMAT3*) cmsReadTag(hProfile, cmsSigChromaticAdaptationTag);
+
+    if (Tag != NULL) {
+        *Dest = *Tag;
+        return TRUE;
+    }
+
+    // No CHAD available, default it to identity
+    _cmsMAT3identity(Dest);
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+
+            cmsCIEXYZ* White = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+            if (White == NULL) {
+
+                _cmsMAT3identity(Dest);
+                return TRUE;
+            }
+
+            return _cmsAdaptationMatrix(Dest, NULL, White, cmsD50_XYZ());
+        }
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read colorants as a MAT3 structure. Used by any function that needs a matrix-shaper
+static
+cmsBool ReadICCMatrixRGB2XYZ(cmsMAT3* r, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ *PtrRed, *PtrGreen, *PtrBlue;
+
+    _cmsAssert(r != NULL);
+
+    PtrRed   = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigRedColorantTag);
+    PtrGreen = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigGreenColorantTag);
+    PtrBlue  = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigBlueColorantTag);
+
+    if (PtrRed == NULL || PtrGreen == NULL || PtrBlue == NULL)
+        return FALSE;
+
+    _cmsVEC3init(&r -> v[0], PtrRed -> X, PtrGreen -> X,  PtrBlue -> X);
+    _cmsVEC3init(&r -> v[1], PtrRed -> Y, PtrGreen -> Y,  PtrBlue -> Y);
+    _cmsVEC3init(&r -> v[2], PtrRed -> Z, PtrGreen -> Z,  PtrBlue -> Z);
+
+    return TRUE;
+}
+
+
+// Gray input pipeline
+static
+cmsPipeline* BuildGrayInputMatrixPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 1, 3);
+    if (Lut == NULL)
+        goto Error;
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        // In this case we implement the profile as an  identity matrix plus 3 tone curves
+        cmsUInt16Number Zero[2] = { 0x8080, 0x8080 };
+        cmsToneCurve* EmptyTab;
+        cmsToneCurve* LabCurves[3];
+
+        EmptyTab = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+
+        if (EmptyTab == NULL)
+            goto Error;
+
+        LabCurves[0] = GrayTRC;
+        LabCurves[1] = EmptyTab;
+        LabCurves[2] = EmptyTab;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, OneToThreeInputMatrix, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, LabCurves))) {
+                cmsFreeToneCurve(EmptyTab);
+                goto Error;
+        }
+
+        cmsFreeToneCurve(EmptyTab);
+
+    }
+    else  {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &GrayTRC)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, GrayInputMatrix, NULL)))
+            goto Error;
+    }
+
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(GrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// RGB Matrix shaper
+static
+cmsPipeline* BuildRGBInputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsMAT3 Mat;
+    cmsToneCurve *Shapes[3];
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    int i, j;
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile)) return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix output comes in 0..0xffff range, so
+    // we need to adjust the output by a factor of (0x10000/0xffff) to put data in
+    // a 1.16 range, and then a >> 1 to obtain 1.15. The total factor is (65536.0)/(65535.0*2)
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Mat.v[i].n[j] *= InpAdj;
+
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, Shapes)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Mat, NULL)))
+            goto Error;
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocXYZ2Lab(ContextID)))
+                goto Error;
+        }
+
+    }
+
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatInputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // input and output of transform are in lcms 0..1 encoding.  If XYZ or Lab spaces are used, 
+    //  these need to be normalized into the appropriate ranges (Lab = 100,0,0, XYZ=1.0,1.0,1.0)
+    if ( spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (spc == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if( PCS == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Read and create a BRAND NEW MPE LUT from a given profile. All stuff dependent of version, etc
+// is adjusted here in order to create a LUT that takes care of all those details.
+// We add intent = 0xffffffff as a way to read matrix shaper always, no matter of other LUT
+cmsPipeline* CMSEXPORT _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsPipeline* Lut;
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL) {
+            cmsFreeNamedColorList(nc);
+            return NULL;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, TRUE)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID))) {
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+        return Lut;
+    }
+
+    // This is an attempt to reuse this function to retrieve the matrix-shaper as pipeline no
+    // matter other LUT are present and have precedence. Intent = 0xffffffff can be used for that.
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = Device2PCS16[Intent];
+        tagFloat = Device2PCSFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4, but the encoding range is no
+            // longer 0..1.0, so we need to add an stage depending on the color space
+            return _cmsReadFloatInputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = Device2PCS16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have now info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+
+            // We need to adjust data only for Lab16 on output
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // If the input is Lab, add also a conversion at the begin
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData &&
+                !cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // Add a matrix for conversion V2 to V4 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut was not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayInputMatrixPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper
+    return BuildRGBInputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Gray output pipeline.
+// XYZ -> Gray or Lab -> Gray. Since we only know the GrayTRC, we need to do some assumptions. Gray component will be
+// given by Y on XYZ PCS and by L* on Lab PCS, Both across inverse TRC curve.
+// The complete pipeline on XYZ is Matrix[3:1] -> Tone curve and in Lab Matrix[3:1] -> Tone Curve as well.
+
+static
+cmsPipeline* BuildGrayOutputPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC, *RevGrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    RevGrayTRC = cmsReverseToneCurve(GrayTRC);
+    if (RevGrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 1);
+    if (Lut == NULL) {
+        cmsFreeToneCurve(RevGrayTRC);
+        return NULL;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickLstarMatrix, NULL)))
+            goto Error;
+    }
+    else  {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickYMatrix, NULL)))
+            goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &RevGrayTRC)))
+        goto Error;
+
+    cmsFreeToneCurve(RevGrayTRC);
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(RevGrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+static
+cmsPipeline* BuildRGBOutputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsToneCurve *Shapes[3], *InvShapes[3];
+    cmsMAT3 Mat, Inv;
+    int i, j;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile))
+        return NULL;
+
+    if (!_cmsMAT3inverse(&Mat, &Inv))
+        return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix input should come in 0..0xffff range, so
+    // we need to adjust the input by a << 1 to obtain a 1.16 fixed and then by a factor of
+    // (0xffff/0x10000) to put data in 0..0xffff range. Total factor is (2.0*65535.0)/65536.0;
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Inv.v[i].n[j] *= OutpAdj;
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    InvShapes[0] = cmsReverseToneCurve(Shapes[0]);
+    InvShapes[1] = cmsReverseToneCurve(Shapes[1]);
+    InvShapes[2] = cmsReverseToneCurve(Shapes[2]);
+
+    if (!InvShapes[0] || !InvShapes[1] || !InvShapes[2]) {
+        return NULL;
+    }
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLab2XYZ(ContextID)))
+                goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Inv, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, InvShapes)))
+            goto Error;
+    }
+
+    cmsFreeToneCurveTriple(InvShapes);
+    return Lut;
+Error:
+    cmsFreeToneCurveTriple(InvShapes);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Change CLUT interpolation to trilinear
+static
+void ChangeInterpolationToTrilinear(cmsPipeline* Lut)
+{
+    cmsStage* Stage;
+
+    for (Stage = cmsPipelineGetPtrToFirstStage(Lut);
+        Stage != NULL;
+        Stage = cmsStageNext(Stage)) {
+
+            if (cmsStageType(Stage) == cmsSigCLutElemType) {
+
+                _cmsStageCLutData* CLUT = (_cmsStageCLutData*) Stage ->Data;
+
+                CLUT ->Params->dwFlags |= CMS_LERP_FLAGS_TRILINEAR;
+                _cmsSetInterpolationRoutine(Lut->ContextID, CLUT ->Params);
+            }
+    }
+}
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatOutputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature dataSpace = cmsGetColorSpace(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // If PCS is Lab or XYZ, the floating point tag is accepting data in the space encoding,
+    // and since the formatter has already accommodated to 0..1.0, we should undo this change
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+    
+    // the output can be Lab or XYZ, in which case normalisation is needed on the end of the pipeline
+    if ( dataSpace == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (dataSpace == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// Create an output MPE LUT from agiven profile. Version mismatches are handled here
+cmsPipeline* CMSEXPORT _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID  = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = PCS2Device16[Intent];
+        tagFloat = PCS2DeviceFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4
+            return _cmsReadFloatOutputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = PCS2Device16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+            if (Lut == NULL) return NULL;
+
+            // Now it is time for a controversial stuff. I found that for 3D LUTS using
+            // Lab used as indexer space,  trilinear interpolation should be used
+            if (cmsGetPCS(hProfile) == cmsSigLabData)
+                ChangeInterpolationToTrilinear(Lut);
+
+            // We need to adjust data only for Lab and Lut16 type
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // Add a matrix for conversion V4 to V2 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // If the output is Lab, add also a conversion at the end
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+                if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                    goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayOutputPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper, which only operates in XYZ space  
+    return BuildRGBOutputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read the AToD0 tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatDevicelinkTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut = cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+
+    if (Lut == NULL) return NULL;
+
+    if (spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (spc == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    if (PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    return Lut;
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// This one includes abstract profiles as well. Matrix-shaper cannot be obtained on that device class. The
+// tag name here may default to AToB0
+cmsPipeline* CMSEXPORT _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsPipeline* Lut;
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent > INTENT_ABSOLUTE_COLORIMETRIC)
+        return NULL;
+
+    tag16 = Device2PCS16[Intent];
+    tagFloat = Device2PCSFloat[Intent];
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*)cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL)
+            goto Error;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, FALSE)))
+            goto Error;
+
+        if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+        return Lut;
+    Error:
+        cmsPipelineFree(Lut);
+        cmsFreeNamedColorList(nc);
+        return NULL;
+    }
+
+
+    if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+        // Floating point LUT are always V
+        return _cmsReadFloatDevicelinkTag(hProfile, tagFloat);
+    }
+
+    tagFloat = Device2PCSFloat[0];
+    if (cmsIsTag(hProfile, tagFloat)) {
+
+        return cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    }
+
+    if (!cmsIsTag(hProfile, tag16)) {  // Is there any LUT-Based table?
+
+        tag16 = Device2PCS16[0];
+        if (!cmsIsTag(hProfile, tag16)) return NULL;
+    }
+
+    // Check profile version and LUT type. Do the necessary adjustments if needed
+
+    // Read the tag
+    Lut = (cmsPipeline*)cmsReadTag(hProfile, tag16);
+    if (Lut == NULL) return NULL;
+
+    // The profile owns the Lut, so we need to copy it
+    Lut = cmsPipelineDup(Lut);
+    if (Lut == NULL) return NULL;
+
+    // Now it is time for a controversial stuff. I found that for 3D LUTS using
+    // Lab used as indexer space,  trilinear interpolation should be used
+    if (cmsGetPCS(hProfile) == cmsSigLabData)
+        ChangeInterpolationToTrilinear(Lut);
+
+    // After reading it, we have info about the original type
+    OriginalType = _cmsGetTagTrueType(hProfile, tag16);
+
+    // We need to adjust data for Lab16 on output
+    if (OriginalType != cmsSigLut16Type) return Lut;
+
+    // Here it is possible to get Lab on both sides
+
+    if (cmsGetColorSpace(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error2;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+            goto Error2;
+    }
+
+    return Lut;
+
+Error2:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Returns TRUE if the profile is implemented as matrix-shaper
+cmsBool  CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile)
+{
+    switch (cmsGetColorSpace(hProfile)) {
+
+    case cmsSigGrayData:
+
+        return cmsIsTag(hProfile, cmsSigGrayTRCTag);
+
+    case cmsSigRgbData:
+
+        return (cmsIsTag(hProfile, cmsSigRedColorantTag) &&
+                cmsIsTag(hProfile, cmsSigGreenColorantTag) &&
+                cmsIsTag(hProfile, cmsSigBlueColorantTag) &&
+                cmsIsTag(hProfile, cmsSigRedTRCTag) &&
+                cmsIsTag(hProfile, cmsSigGreenTRCTag) &&
+                cmsIsTag(hProfile, cmsSigBlueTRCTag));
+
+    default:
+
+        return FALSE;
+    }
+}
+
+// Returns TRUE if the intent is implemented as CLUT
+cmsBool  CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+    const cmsTagSignature* TagTable;
+
+    // For devicelinks, the supported intent is that one stated in the header
+    if (cmsGetDeviceClass(hProfile) == cmsSigLinkClass) {
+            return (cmsGetHeaderRenderingIntent(hProfile) == Intent);
+    }
+
+    switch (UsedDirection) {
+
+       case LCMS_USED_AS_INPUT: TagTable = Device2PCS16; break;
+       case LCMS_USED_AS_OUTPUT:TagTable = PCS2Device16; break;
+
+       // For proofing, we need rel. colorimetric in output. Let's do some recursion
+       case LCMS_USED_AS_PROOF:
+           return cmsIsIntentSupported(hProfile, Intent, LCMS_USED_AS_INPUT) &&
+                  cmsIsIntentSupported(hProfile, INTENT_RELATIVE_COLORIMETRIC, LCMS_USED_AS_OUTPUT);
+
+       default:
+           cmsSignalError(cmsGetProfileContextID(hProfile), cmsERROR_RANGE, "Unexpected direction (%d)", UsedDirection);
+           return FALSE;
+    }
+
+    return cmsIsTag(hProfile, TagTable[Intent]);
+
+}
+
+
+// Return info about supported intents
+cmsBool  CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile,
+                                        cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+
+    if (cmsIsCLUT(hProfile, Intent, UsedDirection)) return TRUE;
+
+    // Is there any matrix-shaper? If so, the intent is supported. This is a bit odd, since V2 matrix shaper
+    // does not fully support relative colorimetric because they cannot deal with non-zero black points, but
+    // many profiles claims that, and this is certainly not true for V4 profiles. Lets answer "yes" no matter
+    // the accuracy would be less than optimal in rel.col and v2 case.
+
+    return cmsIsMatrixShaper(hProfile);
+}
+
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read both, profile sequence description and profile sequence id if present. Then combine both to
+// create qa unique structure holding both. Shame on ICC to store things in such complicated way.
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile)
+{
+    cmsSEQ* ProfileSeq;
+    cmsSEQ* ProfileId;
+    cmsSEQ* NewSeq;
+    cmsUInt32Number i;
+
+    // Take profile sequence description first
+    ProfileSeq = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceDescTag);
+
+    // Take profile sequence ID
+    ProfileId  = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceIdTag);
+
+    if (ProfileSeq == NULL && ProfileId == NULL) return NULL;
+
+    if (ProfileSeq == NULL) return cmsDupProfileSequenceDescription(ProfileId);
+    if (ProfileId  == NULL) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // We have to mix both together. For that they must agree
+    if (ProfileSeq ->n != ProfileId ->n) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    NewSeq = cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // Ok, proceed to the mixing
+    if (NewSeq != NULL) {
+        for (i=0; i < ProfileSeq ->n; i++) {
+
+            memmove(&NewSeq ->seq[i].ProfileID, &ProfileId ->seq[i].ProfileID, sizeof(cmsProfileID));
+            NewSeq ->seq[i].Description = cmsMLUdup(ProfileId ->seq[i].Description);
+        }
+    }
+    return NewSeq;
+}
+
+// Dump the contents of profile sequence in both tags (if v4 available)
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq)
+{
+    if (!cmsWriteTag(hProfile, cmsSigProfileSequenceDescTag, seq)) return FALSE;
+
+    if (cmsGetEncodedICCversion(hProfile) >= 0x4000000) {
+
+            if (!cmsWriteTag(hProfile, cmsSigProfileSequenceIdTag, seq)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read and duplicate a MLU if found.
+static
+cmsMLU* GetMLUFromProfile(cmsHPROFILE h, cmsTagSignature sig)
+{
+    cmsMLU* mlu = (cmsMLU*) cmsReadTag(h, sig);
+    if (mlu == NULL) return NULL;
+
+    return cmsMLUdup(mlu);
+}
+
+// Create a sequence description out of an array of profiles
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[])
+{
+    cmsUInt32Number i;
+    cmsSEQ* seq = cmsAllocProfileSequenceDescription(ContextID, nProfiles);
+
+    if (seq == NULL) return NULL;
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsPSEQDESC* ps = &seq ->seq[i];
+        cmsHPROFILE h = hProfiles[i];
+        cmsTechnologySignature* techpt;
+
+        cmsGetHeaderAttributes(h, &ps ->attributes);
+        cmsGetHeaderProfileID(h, ps ->ProfileID.ID8);
+        ps ->deviceMfg   = cmsGetHeaderManufacturer(h);
+        ps ->deviceModel = cmsGetHeaderModel(h);
+
+        techpt = (cmsTechnologySignature*) cmsReadTag(h, cmsSigTechnologyTag);
+        if (techpt == NULL)
+            ps ->technology   =  (cmsTechnologySignature) 0;
+        else
+            ps ->technology   = *techpt;
+
+        ps ->Manufacturer = GetMLUFromProfile(h,  cmsSigDeviceMfgDescTag);
+        ps ->Model        = GetMLUFromProfile(h,  cmsSigDeviceModelDescTag);
+        ps ->Description  = GetMLUFromProfile(h, cmsSigProfileDescriptionTag);
+
+    }
+
+    return seq;
+}
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+static
+const cmsMLU* GetInfo(cmsHPROFILE hProfile, cmsInfoType Info)
+{
+    cmsTagSignature sig;
+
+    switch (Info) {
+
+    case cmsInfoDescription:
+        sig = cmsSigProfileDescriptionTag;
+        break;
+
+    case cmsInfoManufacturer:
+        sig = cmsSigDeviceMfgDescTag;
+        break;
+
+    case cmsInfoModel:
+        sig = cmsSigDeviceModelDescTag;
+         break;
+
+    case cmsInfoCopyright:
+        sig = cmsSigCopyrightTag;
+        break;
+
+    default: return NULL;
+    }
+
+
+    return (cmsMLU*) cmsReadTag(hProfile, sig);
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                            const char LanguageCode[3], const char CountryCode[3],
+                                            wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetWide(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                          const char LanguageCode[3], const char CountryCode[3],
+                                                          char* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetASCII(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmslut.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmslut.c
new file mode 100644
index 0000000000..01906a12c3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmslut.c
@@ -0,0 +1,1843 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Allocates an empty multi profile element
+cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature Type,
+                                cmsUInt32Number InputChannels,
+                                cmsUInt32Number OutputChannels,
+                                _cmsStageEvalFn     EvalPtr,
+                                _cmsStageDupElemFn  DupElemPtr,
+                                _cmsStageFreeElemFn FreePtr,
+                                void*             Data)
+{
+    cmsStage* ph = (cmsStage*) _cmsMallocZero(ContextID, sizeof(cmsStage));
+
+    if (ph == NULL) return NULL;
+
+
+    ph ->ContextID = ContextID;
+
+    ph ->Type       = Type;
+    ph ->Implements = Type;   // By default, no clue on what is implementing
+
+    ph ->InputChannels  = InputChannels;
+    ph ->OutputChannels = OutputChannels;
+    ph ->EvalPtr        = EvalPtr;
+    ph ->DupElemPtr     = DupElemPtr;
+    ph ->FreePtr        = FreePtr;
+    ph ->Data           = Data;
+
+    return ph;
+}
+
+
+static
+void EvaluateIdentity(const cmsFloat32Number In[],
+                            cmsFloat32Number Out[],
+                      const cmsStage *mpe)
+{
+    memmove(Out, In, mpe ->InputChannels * sizeof(cmsFloat32Number));
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    return _cmsStageAllocPlaceholder(ContextID,
+                                   cmsSigIdentityElemType,
+                                   nChannels, nChannels,
+                                   EvaluateIdentity,
+                                   NULL,
+                                   NULL,
+                                   NULL);
+ }
+
+// Conversion functions. From floating point to 16 bits
+static
+void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0);
+    }
+}
+
+// From 16 bits to floating point
+static
+void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = (cmsFloat32Number) In[i] / 65535.0F;
+    }
+}
+
+
+// This function is quite useful to analyze the structure of a LUT and retrieve the MPE elements
+// that conform the LUT. It should be called with the LUT, the number of expected elements and
+// then a list of expected types followed with a list of cmsFloat64Number pointers to MPE elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything. Setting pointers to NULL does bypass
+// the storage process.
+cmsBool  CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...)
+{
+    va_list args;
+    cmsUInt32Number i;
+    cmsStage* mpe;
+    cmsStageSignature Type;
+    void** ElemPtr;
+
+    // Make sure same number of elements
+    if (cmsPipelineStageCount(Lut) != n) return FALSE;
+
+    va_start(args, n);
+
+    // Iterate across asked types
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        // Get asked type. cmsStageSignature is promoted to int by compiler
+        Type  = (cmsStageSignature)va_arg(args, int);
+        if (mpe ->Type != Type) {
+
+            va_end(args);       // Mismatch. We are done.
+            return FALSE;
+        }
+        mpe = mpe ->Next;
+    }
+
+    // Found a combination, fill pointers if not NULL
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        ElemPtr = va_arg(args, void**);
+        if (ElemPtr != NULL)
+            *ElemPtr = mpe;
+
+        mpe = mpe ->Next;
+    }
+
+    va_end(args);
+    return TRUE;
+}
+
+// Below there are implementations for several types of elements. Each type may be implemented by a
+// evaluation function, a duplication function, a function to free resources and a constructor.
+
+// *************************************************************************************************
+// Type cmsSigCurveSetElemType (curves)
+// *************************************************************************************************
+
+cmsToneCurve** _cmsStageGetPtrToCurveSet(const cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    return Data ->TheCurves;
+}
+
+static
+void EvaluateCurves(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves == NULL) return;
+
+    for (i=0; i < Data ->nCurves; i++) {
+        Out[i] = cmsEvalToneCurveFloat(Data ->TheCurves[i], In[i]);
+    }
+}
+
+static
+void CurveSetElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves != NULL) {
+        for (i=0; i < Data ->nCurves; i++) {
+            if (Data ->TheCurves[i] != NULL)
+                cmsFreeToneCurve(Data ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, Data ->TheCurves);
+    _cmsFree(mpe ->ContextID, Data);
+}
+
+
+static
+void* CurveSetDup(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    _cmsStageToneCurvesData* NewElem;
+    cmsUInt32Number i;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nCurves   = Data ->nCurves;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(mpe ->ContextID, NewElem ->nCurves, sizeof(cmsToneCurve*));
+
+    if (NewElem ->TheCurves == NULL) goto Error;
+
+    for (i=0; i < NewElem ->nCurves; i++) {
+
+        // Duplicate each curve. It may fail.
+        NewElem ->TheCurves[i] = cmsDupToneCurve(Data ->TheCurves[i]);
+        if (NewElem ->TheCurves[i] == NULL) goto Error;
+
+
+    }
+    return (void*) NewElem;
+
+Error:
+
+    if (NewElem ->TheCurves != NULL) {
+        for (i=0; i < NewElem ->nCurves; i++) {
+            if (NewElem ->TheCurves[i])
+                cmsFreeToneCurve(NewElem ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, NewElem ->TheCurves);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+// Curves == NULL forces identity curves
+cmsStage* CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[])
+{
+    cmsUInt32Number i;
+    _cmsStageToneCurvesData* NewElem;
+    cmsStage* NewMPE;
+
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCurveSetElemType, nChannels, nChannels,
+                                     EvaluateCurves, CurveSetDup, CurveSetElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem ->nCurves   = nChannels;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(ContextID, nChannels, sizeof(cmsToneCurve*));
+    if (NewElem ->TheCurves == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (Curves == NULL) {
+            NewElem ->TheCurves[i] = cmsBuildGamma(ContextID, 1.0);
+        }
+        else {
+            NewElem ->TheCurves[i] = cmsDupToneCurve(Curves[i]);
+        }
+
+        if (NewElem ->TheCurves[i] == NULL) {
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    }
+
+   return NewMPE;
+}
+
+
+// Create a bunch of identity curves
+cmsStage* CMSEXPORT _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    cmsStage* mpe = cmsStageAllocToneCurves(ContextID, nChannels, NULL);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigMatrixElemType (Matrices)
+// *************************************************************************************************
+
+
+// Special care should be taken here because precision loss. A temporary cmsFloat64Number buffer is being used
+static
+void EvaluateMatrix(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    cmsUInt32Number i, j;
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    cmsFloat64Number Tmp;
+
+    // Input is already in 0..1.0 notation
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        Tmp = 0;
+        for (j=0; j < mpe->InputChannels; j++) {
+            Tmp += In[j] * Data->Double[i*mpe->InputChannels + j];
+        }
+
+        if (Data ->Offset != NULL)
+            Tmp += Data->Offset[i];
+
+        Out[i] = (cmsFloat32Number) Tmp;
+    }
+
+
+    // Output in 0..1.0 domain
+}
+
+
+// Duplicate a yet-existing matrix element
+static
+void* MatrixElemDup(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    _cmsStageMatrixData* NewElem;
+    cmsUInt32Number sz;
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+    sz = mpe ->InputChannels * mpe ->OutputChannels;
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID, Data ->Double, sz * sizeof(cmsFloat64Number)) ;
+
+    if (Data ->Offset)
+        NewElem ->Offset = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID,
+                                                Data ->Offset, mpe -> OutputChannels * sizeof(cmsFloat64Number)) ;
+
+    return (void*) NewElem;
+}
+
+
+static
+void MatrixElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    if (Data == NULL)
+        return;
+    if (Data ->Double)
+        _cmsFree(mpe ->ContextID, Data ->Double);
+
+    if (Data ->Offset)
+        _cmsFree(mpe ->ContextID, Data ->Offset);
+
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+
+cmsStage*  CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols,
+                                     const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset)
+{
+    cmsUInt32Number i, n;
+    _cmsStageMatrixData* NewElem;
+    cmsStage* NewMPE;
+
+    n = Rows * Cols;
+
+    // Check for overflow
+    if (n == 0) return NULL;
+    if (n >= UINT_MAX / Cols) return NULL;
+    if (n >= UINT_MAX / Rows) return NULL;
+    if (n < Rows || n < Cols) return NULL;
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigMatrixElemType, Cols, Rows,
+                                     EvaluateMatrix, MatrixElemDup, MatrixElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat64Number));
+
+    if (NewElem->Double == NULL) {
+        MatrixElemTypeFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        NewElem ->Double[i] = Matrix[i];
+    }
+
+
+    if (Offset != NULL) {
+
+        NewElem ->Offset = (cmsFloat64Number*) _cmsCalloc(ContextID, Rows, sizeof(cmsFloat64Number));
+        if (NewElem->Offset == NULL) {
+           MatrixElemTypeFree(NewMPE);
+           return NULL;
+        }
+
+        for (i=0; i < Rows; i++) {
+                NewElem ->Offset[i] = Offset[i];
+        }
+
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+    return NewMPE;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigCLutElemType
+// *************************************************************************************************
+
+
+// Evaluate in true floating point
+static
+void EvaluateCLUTfloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    Data -> Params ->Interpolation.LerpFloat(In, Out, Data->Params);
+}
+
+
+// Convert to 16 bits, evaluate, and back to floating point
+static
+void EvaluateCLUTfloatIn16(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    cmsUInt16Number In16[MAX_STAGE_CHANNELS], Out16[MAX_STAGE_CHANNELS];
+
+    _cmsAssert(mpe ->InputChannels  <= MAX_STAGE_CHANNELS);
+    _cmsAssert(mpe ->OutputChannels <= MAX_STAGE_CHANNELS);
+
+    FromFloatTo16(In, In16, mpe ->InputChannels);
+    Data -> Params ->Interpolation.Lerp16(In16, Out16, Data->Params);
+    From16ToFloat(Out16, Out,  mpe ->OutputChannels);
+}
+
+
+// Given an hypercube of b dimensions, with Dims[] number of nodes by dimension, calculate the total amount of nodes
+static
+cmsUInt32Number CubeSize(const cmsUInt32Number Dims[], cmsUInt32Number b)
+{
+    cmsUInt32Number rv, dim;
+
+    _cmsAssert(Dims != NULL);
+
+    for (rv = 1; b > 0; b--) {
+
+        dim = Dims[b-1];
+        if (dim == 0) return 0;  // Error
+
+        rv *= dim;
+
+        // Check for overflow
+        if (rv > UINT_MAX / dim) return 0;
+    }
+
+    return rv;
+}
+
+static
+void* CLUTElemDup(cmsStage* mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    _cmsStageCLutData* NewElem;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nEntries       = Data ->nEntries;
+    NewElem ->HasFloatValues = Data ->HasFloatValues;
+
+    if (Data ->Tab.T) {
+
+        if (Data ->HasFloatValues) {
+            NewElem ->Tab.TFloat = (cmsFloat32Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.TFloat, Data ->nEntries * sizeof (cmsFloat32Number));
+            if (NewElem ->Tab.TFloat == NULL)
+                goto Error;
+        } else {
+            NewElem ->Tab.T = (cmsUInt16Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.T, Data ->nEntries * sizeof (cmsUInt16Number));
+            if (NewElem ->Tab.T == NULL)
+                goto Error;
+        }
+    }
+
+    NewElem ->Params   = _cmsComputeInterpParamsEx(mpe ->ContextID,
+                                                   Data ->Params ->nSamples,
+                                                   Data ->Params ->nInputs,
+                                                   Data ->Params ->nOutputs,
+                                                   NewElem ->Tab.T,
+                                                   Data ->Params ->dwFlags);
+    if (NewElem->Params != NULL)
+        return (void*) NewElem;
+ Error:
+    if (NewElem->Tab.T)
+        // This works for both types
+        _cmsFree(mpe ->ContextID, NewElem -> Tab.T);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+static
+void CLutElemTypeFree(cmsStage* mpe)
+{
+
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    // Already empty
+    if (Data == NULL) return;
+
+    // This works for both types
+    if (Data -> Tab.T)
+        _cmsFree(mpe ->ContextID, Data -> Tab.T);
+
+    _cmsFreeInterpParams(Data ->Params);
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+// Allocates a 16-bit multidimensional CLUT. This is evaluated at 16-bit precision. Table may have different
+// granularity on each dimension.
+cmsStage* CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID,
+                                         const cmsUInt32Number clutPoints[],
+                                         cmsUInt32Number inputChan,
+                                         cmsUInt32Number outputChan,
+                                         const cmsUInt16Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                     EvaluateCLUTfloatIn16, CLUTElemDup, CLutElemTypeFree, NULL );
+
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = FALSE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+
+    NewElem ->Tab.T  = (cmsUInt16Number*) _cmsCalloc(ContextID, n, sizeof(cmsUInt16Number));
+    if (NewElem ->Tab.T == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.T[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints, inputChan, outputChan, NewElem ->Tab.T, CMS_LERP_FLAGS_16BITS);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+cmsStage* CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID,
+                                    cmsUInt32Number nGridPoints,
+                                    cmsUInt32Number inputChan,
+                                    cmsUInt32Number outputChan,
+                                    const cmsUInt16Number* Table)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    int i;
+
+   // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLut16bitGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID,
+                                       cmsUInt32Number nGridPoints,
+                                       cmsUInt32Number inputChan,
+                                       cmsUInt32Number outputChan,
+                                       const cmsFloat32Number* Table)
+{
+   cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+   int i;
+
+    // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLutFloatGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                             EvaluateCLUTfloat, CLUTElemDup, CLutElemTypeFree, NULL);
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    // There is a potential integer overflow on conputing n and nEntries.
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = TRUE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewElem ->Tab.TFloat  = (cmsFloat32Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat32Number));
+    if (NewElem ->Tab.TFloat == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.TFloat[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints,  inputChan, outputChan, NewElem ->Tab.TFloat, CMS_LERP_FLAGS_FLOAT);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+
+static
+int IdentitySampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void * Cargo)
+{
+    int nChan = *(int*) Cargo;
+    int i;
+
+    for (i=0; i < nChan; i++)
+        Out[i] = In[i];
+
+    return 1;
+}
+
+// Creates an MPE that just copies input to output
+cmsStage* CMSEXPORT _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsStage* mpe ;
+    int i;
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = 2;
+
+    mpe = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, nChan, nChan, NULL);
+    if (mpe == NULL) return NULL;
+
+    if (!cmsStageSampleCLut16bit(mpe, IdentitySampler, &nChan, 0)) {
+        cmsStageFree(mpe);
+        return NULL;
+    }
+
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+
+// Quantize a value 0 <= i < MaxSamples to 0..0xffff
+cmsUInt16Number CMSEXPORT _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples)
+{
+    cmsFloat64Number x;
+
+    x = ((cmsFloat64Number) i * 65535.) / (cmsFloat64Number) (MaxSamples - 1);
+    return _cmsQuickSaturateWord(x);
+}
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe, cmsSAMPLER16 Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsUInt16Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut;
+
+    if (mpe == NULL) return FALSE;
+
+    clut = (_cmsStageCLutData*) mpe->Data;
+
+    if (clut == NULL) return FALSE;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    memset(In, 0, sizeof(In));
+    memset(Out, 0, sizeof(Out));
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int)nInputs - 1; t >= 0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] = _cmsQuantizeVal(Colorant, nSamples[t]);
+        }
+
+        if (clut ->Tab.T != NULL) {
+            for (t = 0; t < (int)nOutputs; t++)
+                Out[t] = clut->Tab.T[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.T != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.T[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+// Same as anterior, but for floating point
+cmsBool CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsFloat32Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe->Data;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs  > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int)nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, nSamples[t]) / 65535.0);
+        }
+
+        if (clut ->Tab.TFloat != NULL) {
+            for (t=0; t < (int) nOutputs; t++)
+                Out[t] = clut->Tab.TFloat[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.TFloat != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.TFloat[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                         cmsSAMPLER16 Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt16Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] = _cmsQuantizeVal(Colorant, clutPoints[t]);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsInt32Number CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                            cmsSAMPLERFLOAT Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsFloat32Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, clutPoints[t]) / 65535.0);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ********************************************************************************
+// Type cmsSigLab2XYZElemType
+// ********************************************************************************
+
+
+static
+void EvaluateLab2XYZ(const cmsFloat32Number In[],
+                     cmsFloat32Number Out[],
+                     const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // V4 rules
+    Lab.L = In[0] * 100.0;
+    Lab.a = In[1] * 255.0 - 128.0;
+    Lab.b = In[2] * 255.0 - 128.0;
+
+    cmsLab2XYZ(NULL, &XYZ, &Lab);
+
+    // From XYZ, range 0..19997 to 0..1.0, note that 1.99997 comes from 0xffff
+    // encoded as 1.15 fixed point, so 1 + (32767.0 / 32768.0)
+
+    Out[0] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.X / XYZadj);
+    Out[1] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Y / XYZadj);
+    Out[2] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Z / XYZadj);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+
+// No dup or free routines needed, as the structure has no pointers in it.
+cmsStage* CMSEXPORT _cmsStageAllocLab2XYZ(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigLab2XYZElemType, 3, 3, EvaluateLab2XYZ, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+
+// v2 L=100 is supposed to be placed on 0xFF00. There is no reasonable
+// number of gridpoints that would make exact match. However, a prelinearization
+// of 258 entries, would map 0xFF00 exactly on entry 257, and this is good to avoid scum dot.
+// Almost all what we need but unfortunately, the rest of entries should be scaled by
+// (255*257/256) and this is not exact.
+
+cmsStage* _cmsStageAllocLabV2ToV4curves(cmsContext ContextID)
+{
+    cmsStage* mpe;
+    cmsToneCurve* LabTable[3];
+    int i, j;
+
+    LabTable[0] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[1] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[2] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+
+    for (j=0; j < 3; j++) {
+
+        if (LabTable[j] == NULL) {
+            cmsFreeToneCurveTriple(LabTable);
+            return NULL;
+        }
+
+        // We need to map * (0xffff / 0xff00), that's same as (257 / 256)
+        // So we can use 258-entry tables to do the trick (i / 257) * (255 * 257) * (257 / 256);
+        for (i=0; i < 257; i++)  {
+
+            LabTable[j]->Table16[i] = (cmsUInt16Number) ((i * 0xffff + 0x80) >> 8);
+        }
+
+        LabTable[j] ->Table16[257] = 0xffff;
+    }
+
+    mpe = cmsStageAllocToneCurves(ContextID, 3, LabTable);
+    cmsFreeToneCurveTriple(LabTable);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+// ********************************************************************************
+
+// Matrix-based conversion, which is more accurate, but slower and cannot properly be saved in devicelink profiles
+cmsStage* CMSEXPORT _cmsStageAllocLabV2ToV4(cmsContext ContextID)
+{
+    static const cmsFloat64Number V2ToV4[] = { 65535.0/65280.0, 0, 0,
+                                     0, 65535.0/65280.0, 0,
+                                     0, 0, 65535.0/65280.0
+                                     };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V2ToV4, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+
+// Reverse direction
+cmsStage* CMSEXPORT _cmsStageAllocLabV4ToV2(cmsContext ContextID)
+{
+    static const cmsFloat64Number V4ToV2[] = { 65280.0/65535.0, 0, 0,
+                                     0, 65280.0/65535.0, 0,
+                                     0, 0, 65280.0/65535.0
+                                     };
+
+     cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V4ToV2, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV4toV2;
+    return mpe;
+}
+
+
+// To Lab to float. Note that the MPE gives numbers in normal Lab range
+// and we need 0..1.0 range for the formatters
+// L* : 0...100 => 0...1.0  (L* / 100)
+// ab* : -128..+127 to 0..1  ((ab* + 128) / 255)
+
+cmsStage* _cmsStageNormalizeFromLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        1.0/100.0, 0, 0,
+        0, 1.0/255.0, 0,
+        0, 0, 1.0/255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        128.0/255.0,
+        128.0/255.0
+    };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLab2FloatPCS;
+    return mpe;
+}
+
+// Fom XYZ to floating point PCS
+cmsStage* _cmsStageNormalizeFromXyzFloat(cmsContext ContextID)
+{
+#define n (32768.0/65535.0)
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigXYZ2FloatPCS;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        100.0, 0, 0,
+        0, 255.0, 0,
+        0, 0, 255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        -128.0,
+        -128.0
+    };
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2Lab;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToXyzFloat(cmsContext ContextID)
+{
+#define n (65535.0/32768.0)
+
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2XYZ;
+    return mpe;
+}
+
+// Clips values smaller than zero
+static
+void Clipper(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+       cmsUInt32Number i;
+       for (i = 0; i < mpe->InputChannels; i++) {
+
+              cmsFloat32Number n = In[i];
+              Out[i] = n < 0 ? 0 : n;
+       }
+}
+
+cmsStage*  _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+       return _cmsStageAllocPlaceholder(ContextID, cmsSigClipNegativesElemType,
+              nChannels, nChannels, Clipper, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+// Type cmsSigXYZ2LabElemType
+// ********************************************************************************
+
+static
+void EvaluateXYZ2Lab(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // From 0..1.0 to XYZ
+
+    XYZ.X = In[0] * XYZadj;
+    XYZ.Y = In[1] * XYZadj;
+    XYZ.Z = In[2] * XYZadj;
+
+    cmsXYZ2Lab(NULL, &Lab, &XYZ);
+
+    // From V4 Lab to 0..1.0
+
+    Out[0] = (cmsFloat32Number) (Lab.L / 100.0);
+    Out[1] = (cmsFloat32Number) ((Lab.a + 128.0) / 255.0);
+    Out[2] = (cmsFloat32Number) ((Lab.b + 128.0) / 255.0);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+cmsStage* CMSEXPORT _cmsStageAllocXYZ2Lab(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigXYZ2LabElemType, 3, 3, EvaluateXYZ2Lab, NULL, NULL, NULL);
+
+}
+
+// ********************************************************************************
+
+// For v4, S-Shaped curves are placed in a/b axis to increase resolution near gray
+
+cmsStage* _cmsStageAllocLabPrelin(cmsContext ContextID)
+{
+    cmsToneCurve* LabTable[3];
+    cmsFloat64Number Params[1] =  {2.4} ;
+
+    LabTable[0] = cmsBuildGamma(ContextID, 1.0);
+    LabTable[1] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+    LabTable[2] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+
+    return cmsStageAllocToneCurves(ContextID, 3, LabTable);
+}
+
+
+// Free a single MPE
+void CMSEXPORT cmsStageFree(cmsStage* mpe)
+{
+    if (mpe ->FreePtr)
+        mpe ->FreePtr(mpe);
+
+    _cmsFree(mpe ->ContextID, mpe);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsStageInputChannels(const cmsStage* mpe)
+{
+    return mpe ->InputChannels;
+}
+
+cmsUInt32Number  CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe)
+{
+    return mpe ->OutputChannels;
+}
+
+cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe)
+{
+    return mpe -> Type;
+}
+
+void* CMSEXPORT cmsStageData(const cmsStage* mpe)
+{
+    return mpe -> Data;
+}
+
+cmsStage*  CMSEXPORT cmsStageNext(const cmsStage* mpe)
+{
+    return mpe -> Next;
+}
+
+
+// Duplicates an MPE
+cmsStage* CMSEXPORT cmsStageDup(cmsStage* mpe)
+{
+    cmsStage* NewMPE;
+
+    if (mpe == NULL) return NULL;
+    NewMPE = _cmsStageAllocPlaceholder(mpe ->ContextID,
+                                     mpe ->Type,
+                                     mpe ->InputChannels,
+                                     mpe ->OutputChannels,
+                                     mpe ->EvalPtr,
+                                     mpe ->DupElemPtr,
+                                     mpe ->FreePtr,
+                                     NULL);
+    if (NewMPE == NULL) return NULL;
+
+    NewMPE ->Implements = mpe ->Implements;
+
+    if (mpe ->DupElemPtr) {
+
+        NewMPE ->Data = mpe ->DupElemPtr(mpe);
+
+        if (NewMPE->Data == NULL) {
+
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    } else {
+
+        NewMPE ->Data       = NULL;
+    }
+
+    return NewMPE;
+}
+
+
+// ***********************************************************************************************************
+
+// This function sets up the channel count
+static
+cmsBool BlessLUT(cmsPipeline* lut)
+{
+    // We can set the input/output channels only if we have elements.
+    if (lut ->Elements != NULL) {
+
+        cmsStage* prev;
+        cmsStage* next;
+        cmsStage* First;
+        cmsStage* Last;
+
+        First  = cmsPipelineGetPtrToFirstStage(lut);
+        Last   = cmsPipelineGetPtrToLastStage(lut);
+
+        if (First == NULL || Last == NULL) return FALSE;
+
+        lut->InputChannels = First->InputChannels;
+        lut->OutputChannels = Last->OutputChannels;
+
+        // Check chain consistency
+        prev = First;
+        next = prev->Next;
+
+        while (next != NULL)
+        {
+            if (next->InputChannels != prev->OutputChannels)
+                return FALSE;
+
+            next = next->Next;
+            prev = prev->Next;
+    }
+}
+
+    return TRUE;    
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis. Precision is retained.
+static
+void _LUTeval16(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[],  CMSREGISTER const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels);
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NextPhase = Phase ^ 1;
+             mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+             Phase = NextPhase;
+    }
+
+
+    FromFloatTo16(&Storage[Phase][0], Out, lut ->OutputChannels);
+}
+
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+static
+void _LUTevalFloat(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    memmove(&Storage[Phase][0], In, lut ->InputChannels  * sizeof(cmsFloat32Number));
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+              NextPhase = Phase ^ 1;
+              mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+              Phase = NextPhase;
+    }
+
+    memmove(Out, &Storage[Phase][0], lut ->OutputChannels * sizeof(cmsFloat32Number));
+}
+
+
+// LUT Creation & Destruction
+cmsPipeline* CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+       cmsPipeline* NewLUT;
+
+       // A value of zero in channels is allowed as placeholder
+       if (InputChannels >= cmsMAXCHANNELS ||
+           OutputChannels >= cmsMAXCHANNELS) return NULL;
+
+       NewLUT = (cmsPipeline*) _cmsMallocZero(ContextID, sizeof(cmsPipeline));
+       if (NewLUT == NULL) return NULL;
+
+       NewLUT -> InputChannels  = InputChannels;
+       NewLUT -> OutputChannels = OutputChannels;
+
+       NewLUT ->Eval16Fn    = _LUTeval16;
+       NewLUT ->EvalFloatFn = _LUTevalFloat;
+       NewLUT ->DupDataFn   = NULL;
+       NewLUT ->FreeDataFn  = NULL;
+       NewLUT ->Data        = NewLUT;
+       NewLUT ->ContextID   = ContextID;
+
+       if (!BlessLUT(NewLUT))
+       {
+           _cmsFree(ContextID, NewLUT);
+           return NULL;
+       }
+
+       return NewLUT;
+}
+
+cmsContext CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->ContextID;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->InputChannels;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->OutputChannels;
+}
+
+// Free a profile elements LUT
+void CMSEXPORT cmsPipelineFree(cmsPipeline* lut)
+{
+    cmsStage *mpe, *Next;
+
+    if (lut == NULL) return;
+
+    for (mpe = lut ->Elements;
+        mpe != NULL;
+        mpe = Next) {
+
+            Next = mpe ->Next;
+            cmsStageFree(mpe);
+    }
+
+    if (lut ->FreeDataFn) lut ->FreeDataFn(lut ->ContextID, lut ->Data);
+
+    _cmsFree(lut ->ContextID, lut);
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis.
+void CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[],  const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->Eval16Fn(In, Out, lut->Data);
+}
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+void CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->EvalFloatFn(In, Out, lut);
+}
+
+
+
+// Duplicates a LUT
+cmsPipeline* CMSEXPORT cmsPipelineDup(const cmsPipeline* lut)
+{
+    cmsPipeline* NewLUT;
+    cmsStage *NewMPE, *Anterior = NULL, *mpe;
+    cmsBool  First = TRUE;
+
+    if (lut == NULL) return NULL;
+
+    NewLUT = cmsPipelineAlloc(lut ->ContextID, lut ->InputChannels, lut ->OutputChannels);
+    if (NewLUT == NULL) return NULL;
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NewMPE = cmsStageDup(mpe);
+
+             if (NewMPE == NULL) {
+                 cmsPipelineFree(NewLUT);
+                 return NULL;
+             }
+
+             if (First) {
+                 NewLUT ->Elements = NewMPE;
+                 First = FALSE;
+             }
+             else {
+                if (Anterior != NULL) 
+                    Anterior ->Next = NewMPE;
+             }
+
+            Anterior = NewMPE;
+    }
+
+    NewLUT ->Eval16Fn    = lut ->Eval16Fn;
+    NewLUT ->EvalFloatFn = lut ->EvalFloatFn;
+    NewLUT ->DupDataFn   = lut ->DupDataFn;
+    NewLUT ->FreeDataFn  = lut ->FreeDataFn;
+
+    if (NewLUT ->DupDataFn != NULL)
+        NewLUT ->Data = NewLUT ->DupDataFn(lut ->ContextID, lut->Data);
+
+
+    NewLUT ->SaveAs8Bits    = lut ->SaveAs8Bits;
+
+    if (!BlessLUT(NewLUT))
+    {
+        _cmsFree(lut->ContextID, NewLUT);
+        return NULL;
+    }
+
+    return NewLUT;
+}
+
+
+int CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe)
+{
+    cmsStage* Anterior = NULL, *pt;
+
+    if (lut == NULL || mpe == NULL)
+        return FALSE;
+
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            mpe ->Next = lut ->Elements;
+            lut ->Elements = mpe;
+            break;
+
+        case cmsAT_END:
+
+            if (lut ->Elements == NULL)
+                lut ->Elements = mpe;
+            else {
+
+                for (pt = lut ->Elements;
+                     pt != NULL;
+                     pt = pt -> Next) Anterior = pt;
+                
+                Anterior ->Next = mpe;
+                mpe ->Next = NULL;
+            }
+            break;
+        default:;
+            return FALSE;
+    }
+
+    return BlessLUT(lut);    
+}
+
+// Unlink an element and return the pointer to it
+void CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe)
+{
+    cmsStage *Anterior, *pt, *Last;
+    cmsStage *Unlinked = NULL;
+
+
+    // If empty LUT, there is nothing to remove
+    if (lut ->Elements == NULL) {
+        if (mpe) *mpe = NULL;
+        return;
+    }
+
+    // On depending on the strategy...
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            {
+                cmsStage* elem = lut ->Elements;
+
+                lut ->Elements = elem -> Next;
+                elem ->Next = NULL;
+                Unlinked = elem;
+
+            }
+            break;
+
+        case cmsAT_END:
+            Anterior = Last = NULL;
+            for (pt = lut ->Elements;
+                pt != NULL;
+                pt = pt -> Next) {
+                    Anterior = Last;
+                    Last = pt;
+            }
+
+            Unlinked = Last;  // Next already points to NULL
+
+            // Truncate the chain
+            if (Anterior)
+                Anterior ->Next = NULL;
+            else
+                lut ->Elements = NULL;
+            break;
+        default:;
+    }
+
+    if (mpe)
+        *mpe = Unlinked;
+    else
+        cmsStageFree(Unlinked);
+
+    // May fail, but we ignore it
+    BlessLUT(lut);
+}
+
+
+// Concatenate two LUT into a new single one
+cmsBool  CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2)
+{
+    cmsStage* mpe;
+
+    // If both LUTS does not have elements, we need to inherit
+    // the number of channels
+    if (l1 ->Elements == NULL && l2 ->Elements == NULL) {
+        l1 ->InputChannels  = l2 ->InputChannels;
+        l1 ->OutputChannels = l2 ->OutputChannels;
+    }
+
+    // Cat second
+    for (mpe = l2 ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+            // We have to dup each element
+            if (!cmsPipelineInsertStage(l1, cmsAT_END, cmsStageDup(mpe)))
+                return FALSE;
+    }
+
+    return BlessLUT(l1);    
+}
+
+
+cmsBool CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On)
+{
+    cmsBool Anterior = lut ->SaveAs8Bits;
+
+    lut ->SaveAs8Bits = On;
+    return Anterior;
+}
+
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut)
+{
+    return lut ->Elements;
+}
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut)
+{
+    cmsStage *mpe, *Anterior = NULL;
+
+    for (mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+        Anterior = mpe;
+
+    return Anterior;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut)
+{
+    cmsStage *mpe;
+    cmsUInt32Number n;
+
+    for (n=0, mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+            n++;
+
+    return n;
+}
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                        _cmsOPTeval16Fn Eval16,
+                                        void* PrivateData,
+                                        _cmsFreeUserDataFn FreePrivateDataFn,
+                                        _cmsDupUserDataFn  DupPrivateDataFn)
+{
+
+    Lut ->Eval16Fn = Eval16;
+    Lut ->DupDataFn = DupPrivateDataFn;
+    Lut ->FreeDataFn = FreePrivateDataFn;
+    Lut ->Data = PrivateData;
+}
+
+
+// ----------------------------------------------------------- Reverse interpolation
+// Here's how it goes. The derivative Df(x) of the function f is the linear
+// transformation that best approximates f near the point x. It can be represented
+// by a matrix A whose entries are the partial derivatives of the components of f
+// with respect to all the coordinates. This is know as the Jacobian
+//
+// The best linear approximation to f is given by the matrix equation:
+//
+// y-y0 = A (x-x0)
+//
+// So, if x0 is a good "guess" for the zero of f, then solving for the zero of this
+// linear approximation will give a "better guess" for the zero of f. Thus let y=0,
+// and since y0=f(x0) one can solve the above equation for x. This leads to the
+// Newton's method formula:
+//
+// xn+1 = xn - A-1 f(xn)
+//
+// where xn+1 denotes the (n+1)-st guess, obtained from the n-th guess xn in the
+// fashion described above. Iterating this will give better and better approximations
+// if you have a "good enough" initial guess.
+
+
+#define JACOBIAN_EPSILON            0.001f
+#define INVERSION_MAX_ITERATIONS    30
+
+// Increment with reflexion on boundary
+static
+void IncDelta(cmsFloat32Number *Val)
+{
+    if (*Val < (1.0 - JACOBIAN_EPSILON))
+
+        *Val += JACOBIAN_EPSILON;
+
+    else
+        *Val -= JACOBIAN_EPSILON;
+
+}
+
+
+
+// Euclidean distance between two vectors of n elements each one
+static
+cmsFloat32Number EuclideanDistance(cmsFloat32Number a[], cmsFloat32Number b[], int n)
+{
+    cmsFloat32Number sum = 0;
+    int i;
+
+    for (i=0; i < n; i++) {
+        cmsFloat32Number dif = b[i] - a[i];
+        sum +=  dif * dif;
+    }
+
+    return sqrtf(sum);
+}
+
+
+// Evaluate a LUT in reverse direction. It only searches on 3->3 LUT. Uses Newton method
+//
+// x1 <- x - [J(x)]^-1 * f(x)
+//
+// lut: The LUT on where to do the search
+// Target: LabK, 3 values of Lab plus destination K which is fixed
+// Result: The obtained CMYK
+// Hint:   Location where begin the search
+
+cmsBool CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[],
+                                              cmsFloat32Number Result[],
+                                              cmsFloat32Number Hint[],
+                                              const cmsPipeline* lut)
+{
+    cmsUInt32Number  i, j;
+    cmsFloat64Number  error, LastError = 1E20;
+    cmsFloat32Number  fx[4], x[4], xd[4], fxd[4];
+    cmsVEC3 tmp, tmp2;
+    cmsMAT3 Jacobian;
+    
+    // Only 3->3 and 4->3 are supported
+    if (lut ->InputChannels != 3 && lut ->InputChannels != 4) return FALSE;
+    if (lut ->OutputChannels != 3) return FALSE;
+   
+    // Take the hint as starting point if specified
+    if (Hint == NULL) {
+
+        // Begin at any point, we choose 1/3 of CMY axis
+        x[0] = x[1] = x[2] = 0.3f;
+    }
+    else {
+
+        // Only copy 3 channels from hint...
+        for (j=0; j < 3; j++)
+            x[j] = Hint[j];
+    }
+
+    // If Lut is 4-dimensions, then grab target[3], which is fixed
+    if (lut ->InputChannels == 4) {
+        x[3] = Target[3];
+    }
+    else x[3] = 0; // To keep lint happy
+
+
+    // Iterate
+    for (i = 0; i < INVERSION_MAX_ITERATIONS; i++) {
+
+        // Get beginning fx
+        cmsPipelineEvalFloat(x, fx, lut);
+
+        // Compute error
+        error = EuclideanDistance(fx, Target, 3);
+
+        // If not convergent, return last safe value
+        if (error >= LastError)
+            break;
+
+        // Keep latest values
+        LastError     = error;
+        for (j=0; j < lut ->InputChannels; j++)
+                Result[j] = x[j];
+
+        // Found an exact match?
+        if (error <= 0)
+            break;
+
+        // Obtain slope (the Jacobian)
+        for (j = 0; j < 3; j++) {
+
+            xd[0] = x[0];
+            xd[1] = x[1];
+            xd[2] = x[2];
+            xd[3] = x[3];  // Keep fixed channel
+
+            IncDelta(&xd[j]);
+
+            cmsPipelineEvalFloat(xd, fxd, lut);
+
+            Jacobian.v[0].n[j] = ((fxd[0] - fx[0]) / JACOBIAN_EPSILON);
+            Jacobian.v[1].n[j] = ((fxd[1] - fx[1]) / JACOBIAN_EPSILON);
+            Jacobian.v[2].n[j] = ((fxd[2] - fx[2]) / JACOBIAN_EPSILON);
+        }
+
+        // Solve system
+        tmp2.n[0] = fx[0] - Target[0];
+        tmp2.n[1] = fx[1] - Target[1];
+        tmp2.n[2] = fx[2] - Target[2];
+
+        if (!_cmsMAT3solve(&tmp, &Jacobian, &tmp2))
+            return FALSE;
+
+        // Move our guess
+        x[0] -= (cmsFloat32Number) tmp.n[0];
+        x[1] -= (cmsFloat32Number) tmp.n[1];
+        x[2] -= (cmsFloat32Number) tmp.n[2];
+
+        // Some clipping....
+        for (j=0; j < 3; j++) {
+            if (x[j] < 0) x[j] = 0;
+            else
+                if (x[j] > 1.0) x[j] = 1.0;
+        }
+    }
+
+    return TRUE;
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsmd5.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsmd5.c
new file mode 100644
index 0000000000..e07b77af32
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsmd5.c
@@ -0,0 +1,313 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+
+#include "lcms2_internal.h"
+
+#ifdef CMS_USE_BIG_ENDIAN
+
+static
+void byteReverse(cmsUInt8Number * buf, cmsUInt32Number longs)
+{
+    do {
+
+        cmsUInt32Number t = _cmsAdjustEndianess32(*(cmsUInt32Number *) buf);
+        *(cmsUInt32Number *) buf = t;
+        buf += sizeof(cmsUInt32Number);
+
+    } while (--longs);
+
+}
+
+#else
+#define byteReverse(buf, len)
+#endif
+
+
+typedef struct {
+
+    cmsUInt32Number buf[4];
+    cmsUInt32Number bits[2];
+    cmsUInt8Number in[64];
+    cmsContext ContextID;
+
+} _cmsMD5;
+
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+#define STEP(f, w, x, y, z, data, s) \
+    ( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )
+
+
+static
+void cmsMD5_Transform(cmsUInt32Number buf[4], cmsUInt32Number in[16])
+{
+    CMSREGISTER cmsUInt32Number a, b, c, d;
+
+    a = buf[0];
+    b = buf[1];
+    c = buf[2];
+    d = buf[3];
+
+    STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+    STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+    STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+    STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+    STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+    STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+    STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+    STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+    STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+    STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+    STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+    STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+    STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+    STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+    STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+    STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+    STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+    STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+    STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+    STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+    STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+    STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+    STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+    STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+    STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+    STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+    STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+    STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+    STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+    STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+    STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+    STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+    STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+    STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+    STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+    STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+    STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+    STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+    STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+    STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+    STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+    STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+    STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+    STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+    STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+    STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+    STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+    STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+    STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+    STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+    STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+    STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+    STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+    STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+    STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+    STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+    STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+    STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+    STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+    STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+    STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+    STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+    STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+    STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+    buf[0] += a;
+    buf[1] += b;
+    buf[2] += c;
+    buf[3] += d;
+}
+
+
+// Create a MD5 object
+
+cmsHANDLE CMSEXPORT cmsMD5alloc(cmsContext ContextID)
+{
+    _cmsMD5* ctx = (_cmsMD5*) _cmsMallocZero(ContextID, sizeof(_cmsMD5));
+    if (ctx == NULL) return NULL;
+
+    ctx ->ContextID = ContextID;
+
+    ctx->buf[0] = 0x67452301;
+    ctx->buf[1] = 0xefcdab89;
+    ctx->buf[2] = 0x98badcfe;
+    ctx->buf[3] = 0x10325476;
+
+    ctx->bits[0] = 0;
+    ctx->bits[1] = 0;
+
+    return (cmsHANDLE) ctx;
+}
+
+void CMSEXPORT cmsMD5add(cmsHANDLE Handle, const cmsUInt8Number* buf, cmsUInt32Number len)
+{
+    _cmsMD5* ctx = (_cmsMD5*) Handle;
+    cmsUInt32Number t;
+
+    t = ctx->bits[0];
+    if ((ctx->bits[0] = t + (len << 3)) < t)
+        ctx->bits[1]++;
+
+    ctx->bits[1] += len >> 29;
+
+    t = (t >> 3) & 0x3f;
+
+    if (t) {
+
+        cmsUInt8Number *p = (cmsUInt8Number *) ctx->in + t;
+
+        t = 64 - t;
+        if (len < t) {
+            memmove(p, buf, len);
+            return;
+        }
+
+        memmove(p, buf, t);
+        byteReverse(ctx->in, 16);
+
+        cmsMD5_Transform(ctx->buf, (cmsUInt32Number *) ctx->in);
+        buf += t;
+        len -= t;
+    }
+
+    while (len >= 64) {
+        memmove(ctx->in, buf, 64);
+        byteReverse(ctx->in, 16);
+        cmsMD5_Transform(ctx->buf, (cmsUInt32Number *) ctx->in);
+        buf += 64;
+        len -= 64;
+    }
+
+    memmove(ctx->in, buf, len);
+}
+
+// Destroy the object and return the checksum
+void CMSEXPORT cmsMD5finish(cmsProfileID* ProfileID,  cmsHANDLE Handle)
+{
+    _cmsMD5* ctx = (_cmsMD5*) Handle;
+    cmsUInt32Number count;
+    cmsUInt8Number *p;
+
+    count = (ctx->bits[0] >> 3) & 0x3F;
+
+    p = ctx->in + count;
+    *p++ = 0x80;
+
+    count = 64 - 1 - count;
+
+    if (count < 8) {
+
+        memset(p, 0, count);
+        byteReverse(ctx->in, 16);
+        cmsMD5_Transform(ctx->buf, (cmsUInt32Number *) ctx->in);
+
+        memset(ctx->in, 0, 56);
+    } else {
+        memset(p, 0, count - 8);
+    }
+    byteReverse(ctx->in, 14);
+
+    ((cmsUInt32Number *) ctx->in)[14] = ctx->bits[0];
+    ((cmsUInt32Number *) ctx->in)[15] = ctx->bits[1];
+
+    cmsMD5_Transform(ctx->buf, (cmsUInt32Number *) ctx->in);
+
+    byteReverse((cmsUInt8Number *) ctx->buf, 4);
+    memmove(ProfileID ->ID8, ctx->buf, 16);
+
+    _cmsFree(ctx ->ContextID, ctx);
+}
+
+
+
+// Assuming io points to an ICC profile, compute and store MD5 checksum
+// In the header, rendering intentent, attributes and ID should be set to zero
+// before computing MD5 checksum (per 6.1.13 in ICC spec)
+
+cmsBool CMSEXPORT cmsMD5computeID(cmsHPROFILE hProfile)
+{
+    cmsContext   ContextID;
+    cmsUInt32Number BytesNeeded;
+    cmsUInt8Number* Mem = NULL;
+    cmsHANDLE  MD5 = NULL;
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    _cmsICCPROFILE Keep;
+
+    _cmsAssert(hProfile != NULL);
+
+    ContextID = cmsGetProfileContextID(hProfile);
+
+    // Save a copy of the profile header
+    memmove(&Keep, Icc, sizeof(_cmsICCPROFILE));
+
+    // Set RI, attributes and ID
+    memset(&Icc ->attributes, 0, sizeof(Icc ->attributes));
+    Icc ->RenderingIntent = 0;
+    memset(&Icc ->ProfileID, 0, sizeof(Icc ->ProfileID));
+
+    // Compute needed storage
+    if (!cmsSaveProfileToMem(hProfile, NULL, &BytesNeeded)) goto Error;
+
+    // Allocate memory
+    Mem = (cmsUInt8Number*) _cmsMalloc(ContextID, BytesNeeded);
+    if (Mem == NULL) goto Error;
+
+    // Save to temporary storage
+    if (!cmsSaveProfileToMem(hProfile, Mem, &BytesNeeded)) goto Error;
+
+    // Create MD5 object
+    MD5 = cmsMD5alloc(ContextID);
+    if (MD5 == NULL) goto Error;
+
+    // Add all bytes
+    cmsMD5add(MD5, Mem, BytesNeeded);
+
+    // Temp storage is no longer needed
+    _cmsFree(ContextID, Mem);
+
+    // Restore header
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+
+    // And store the ID
+    cmsMD5finish(&Icc ->ProfileID,  MD5);
+    return TRUE;
+
+Error:
+
+    // Free resources as something went wrong
+    // "MD5" cannot be other than NULL here, so no need to free it
+    if (Mem != NULL) _cmsFree(ContextID, Mem);
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    return FALSE;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsmtrx.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsmtrx.c
new file mode 100644
index 0000000000..a83d39ddb6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsmtrx.c
@@ -0,0 +1,176 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define DSWAP(x, y)     {cmsFloat64Number tmp = (x); (x)=(y); (y)=tmp;}
+
+
+// Initiate a vector
+void CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z)
+{
+    r -> n[VX] = x;
+    r -> n[VY] = y;
+    r -> n[VZ] = z;
+}
+
+// Vector subtraction
+void CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b)
+{
+  r -> n[VX] = a -> n[VX] - b -> n[VX];
+  r -> n[VY] = a -> n[VY] - b -> n[VY];
+  r -> n[VZ] = a -> n[VZ] - b -> n[VZ];
+}
+
+// Vector cross product
+void CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v)
+{
+    r ->n[VX] = u->n[VY] * v->n[VZ] - v->n[VY] * u->n[VZ];
+    r ->n[VY] = u->n[VZ] * v->n[VX] - v->n[VZ] * u->n[VX];
+    r ->n[VZ] = u->n[VX] * v->n[VY] - v->n[VX] * u->n[VY];
+}
+
+// Vector dot product
+cmsFloat64Number CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v)
+{
+    return u->n[VX] * v->n[VX] + u->n[VY] * v->n[VY] + u->n[VZ] * v->n[VZ];
+}
+
+// Euclidean length
+cmsFloat64Number CMSEXPORT _cmsVEC3length(const cmsVEC3* a)
+{
+    return sqrt(a ->n[VX] * a ->n[VX] +
+                a ->n[VY] * a ->n[VY] +
+                a ->n[VZ] * a ->n[VZ]);
+}
+
+// Euclidean distance
+cmsFloat64Number CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b)
+{
+    cmsFloat64Number d1 = a ->n[VX] - b ->n[VX];
+    cmsFloat64Number d2 = a ->n[VY] - b ->n[VY];
+    cmsFloat64Number d3 = a ->n[VZ] - b ->n[VZ];
+
+    return sqrt(d1*d1 + d2*d2 + d3*d3);
+}
+
+
+
+// 3x3 Identity
+void CMSEXPORT _cmsMAT3identity(cmsMAT3* a)
+{
+    _cmsVEC3init(&a-> v[0], 1.0, 0.0, 0.0);
+    _cmsVEC3init(&a-> v[1], 0.0, 1.0, 0.0);
+    _cmsVEC3init(&a-> v[2], 0.0, 0.0, 1.0);
+}
+
+static
+cmsBool CloseEnough(cmsFloat64Number a, cmsFloat64Number b)
+{
+    return fabs(b - a) < (1.0 / 65535.0);
+}
+
+
+cmsBool CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a)
+{
+    cmsMAT3 Identity;
+    int i, j;
+
+    _cmsMAT3identity(&Identity);
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            if (!CloseEnough(a ->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+    return TRUE;
+}
+
+
+// Multiply two matrices
+void CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b)
+{
+#define ROWCOL(i, j) \
+    a->v[i].n[0]*b->v[0].n[j] + a->v[i].n[1]*b->v[1].n[j] + a->v[i].n[2]*b->v[2].n[j]
+
+    _cmsVEC3init(&r-> v[0], ROWCOL(0,0), ROWCOL(0,1), ROWCOL(0,2));
+    _cmsVEC3init(&r-> v[1], ROWCOL(1,0), ROWCOL(1,1), ROWCOL(1,2));
+    _cmsVEC3init(&r-> v[2], ROWCOL(2,0), ROWCOL(2,1), ROWCOL(2,2));
+
+#undef ROWCOL //(i, j)
+}
+
+
+
+// Inverse of a matrix b = a^(-1)
+cmsBool  CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b)
+{
+   cmsFloat64Number det, c0, c1, c2;
+
+   c0 =  a -> v[1].n[1]*a -> v[2].n[2] - a -> v[1].n[2]*a -> v[2].n[1];
+   c1 = -a -> v[1].n[0]*a -> v[2].n[2] + a -> v[1].n[2]*a -> v[2].n[0];
+   c2 =  a -> v[1].n[0]*a -> v[2].n[1] - a -> v[1].n[1]*a -> v[2].n[0];
+
+   det = a -> v[0].n[0]*c0 + a -> v[0].n[1]*c1 + a -> v[0].n[2]*c2;
+
+   if (fabs(det) < MATRIX_DET_TOLERANCE) return FALSE;  // singular matrix; can't invert
+
+   b -> v[0].n[0] = c0/det;
+   b -> v[0].n[1] = (a -> v[0].n[2]*a -> v[2].n[1] - a -> v[0].n[1]*a -> v[2].n[2])/det;
+   b -> v[0].n[2] = (a -> v[0].n[1]*a -> v[1].n[2] - a -> v[0].n[2]*a -> v[1].n[1])/det;
+   b -> v[1].n[0] = c1/det;
+   b -> v[1].n[1] = (a -> v[0].n[0]*a -> v[2].n[2] - a -> v[0].n[2]*a -> v[2].n[0])/det;
+   b -> v[1].n[2] = (a -> v[0].n[2]*a -> v[1].n[0] - a -> v[0].n[0]*a -> v[1].n[2])/det;
+   b -> v[2].n[0] = c2/det;
+   b -> v[2].n[1] = (a -> v[0].n[1]*a -> v[2].n[0] - a -> v[0].n[0]*a -> v[2].n[1])/det;
+   b -> v[2].n[2] = (a -> v[0].n[0]*a -> v[1].n[1] - a -> v[0].n[1]*a -> v[1].n[0])/det;
+
+   return TRUE;
+}
+
+
+// Solve a system in the form Ax = b
+cmsBool  CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b)
+{
+    cmsMAT3 m, a_1;
+
+    memmove(&m, a, sizeof(cmsMAT3));
+
+    if (!_cmsMAT3inverse(&m, &a_1)) return FALSE;  // Singular matrix
+
+    _cmsMAT3eval(x, &a_1, b);
+    return TRUE;
+}
+
+// Evaluate a vector across a matrix
+void CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v)
+{
+    r->n[VX] = a->v[0].n[VX]*v->n[VX] + a->v[0].n[VY]*v->n[VY] + a->v[0].n[VZ]*v->n[VZ];
+    r->n[VY] = a->v[1].n[VX]*v->n[VX] + a->v[1].n[VY]*v->n[VY] + a->v[1].n[VZ]*v->n[VZ];
+    r->n[VZ] = a->v[2].n[VX]*v->n[VX] + a->v[2].n[VY]*v->n[VY] + a->v[2].n[VZ]*v->n[VZ];
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsnamed.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsnamed.c
new file mode 100644
index 0000000000..773e4d2091
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsnamed.c
@@ -0,0 +1,962 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Multilocalized unicode objects. That is an attempt to encapsulate i18n.
+
+
+// Allocates an empty multi localizad unicode object
+cmsMLU* CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu;
+
+    // nItems should be positive if given
+    if (nItems <= 0) nItems = 2;
+
+    // Create the container
+    mlu = (cmsMLU*) _cmsMallocZero(ContextID, sizeof(cmsMLU));
+    if (mlu == NULL) return NULL;
+
+    mlu ->ContextID = ContextID;
+
+    // Create entry array
+    mlu ->Entries = (_cmsMLUentry*) _cmsCalloc(ContextID, nItems, sizeof(_cmsMLUentry));
+    if (mlu ->Entries == NULL) {
+        _cmsFree(ContextID, mlu);
+        return NULL;
+    }
+
+    // Ok, keep indexes up to date
+    mlu ->AllocatedEntries    = nItems;
+    mlu ->UsedEntries         = 0;
+
+    return mlu;
+}
+
+
+// Grows a mempool table for a MLU. Each time this function is called, mempool size is multiplied times two.
+static
+cmsBool GrowMLUpool(cmsMLU* mlu)
+{
+    cmsUInt32Number size;
+    void *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    if (mlu ->PoolSize == 0)
+        size = 256;
+    else
+        size = mlu ->PoolSize * 2;
+
+    // Check for overflow
+    if (size < mlu ->PoolSize) return FALSE;
+
+    // Reallocate the pool
+    NewPtr = _cmsRealloc(mlu ->ContextID, mlu ->MemPool, size);
+    if (NewPtr == NULL) return FALSE;
+
+
+    mlu ->MemPool  = NewPtr;
+    mlu ->PoolSize = size;
+
+    return TRUE;
+}
+
+
+// Grows a entry table for a MLU. Each time this function is called, table size is multiplied times two.
+static
+cmsBool GrowMLUtable(cmsMLU* mlu)
+{
+    cmsUInt32Number AllocatedEntries;
+    _cmsMLUentry *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    AllocatedEntries = mlu ->AllocatedEntries * 2;
+
+    // Check for overflow
+    if (AllocatedEntries / 2 != mlu ->AllocatedEntries) return FALSE;
+
+    // Reallocate the memory
+    NewPtr = (_cmsMLUentry*)_cmsRealloc(mlu ->ContextID, mlu ->Entries, AllocatedEntries*sizeof(_cmsMLUentry));
+    if (NewPtr == NULL) return FALSE;
+
+    mlu ->Entries          = NewPtr;
+    mlu ->AllocatedEntries = AllocatedEntries;
+
+    return TRUE;
+}
+
+
+// Search for a specific entry in the structure. Language and Country are used.
+static
+int SearchMLUEntry(cmsMLU* mlu, cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number i;
+
+    // Sanity check
+    if (mlu == NULL) return -1;
+
+    // Iterate whole table
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        if (mlu ->Entries[i].Country  == CountryCode &&
+            mlu ->Entries[i].Language == LanguageCode) return (int) i;
+    }
+
+    // Not found
+    return -1;
+}
+
+// Add a block of characters to the intended MLU. Language and country are specified.
+// Only one entry for Language/country pair is allowed.
+static
+cmsBool AddMLUBlock(cmsMLU* mlu, cmsUInt32Number size, const wchar_t *Block,
+                     cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number Offset;
+    cmsUInt8Number* Ptr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    // Is there any room available?
+    if (mlu ->UsedEntries >= mlu ->AllocatedEntries) {
+        if (!GrowMLUtable(mlu)) return FALSE;
+    }
+
+    // Only one ASCII string
+    if (SearchMLUEntry(mlu, LanguageCode, CountryCode) >= 0) return FALSE;  // Only one  is allowed!
+
+    // Check for size
+    while ((mlu ->PoolSize - mlu ->PoolUsed) < size) {
+
+            if (!GrowMLUpool(mlu)) return FALSE;
+    }
+
+    Offset = mlu ->PoolUsed;
+
+    Ptr = (cmsUInt8Number*) mlu ->MemPool;
+    if (Ptr == NULL) return FALSE;
+
+    // Set the entry
+    memmove(Ptr + Offset, Block, size);
+    mlu ->PoolUsed += size;
+
+    mlu ->Entries[mlu ->UsedEntries].StrW     = Offset;
+    mlu ->Entries[mlu ->UsedEntries].Len      = size;
+    mlu ->Entries[mlu ->UsedEntries].Country  = CountryCode;
+    mlu ->Entries[mlu ->UsedEntries].Language = LanguageCode;
+    mlu ->UsedEntries++;
+
+    return TRUE;
+}
+
+// Convert from a 3-char code to a cmsUInt16Number. It is done in this way because some
+// compilers don't properly align beginning of strings
+
+static
+cmsUInt16Number strTo16(const char str[3])
+{   
+    const cmsUInt8Number* ptr8 = (const cmsUInt8Number*)str;
+    cmsUInt16Number n = (cmsUInt16Number)(((cmsUInt16Number)ptr8[0] << 8) | ptr8[1]);
+
+    return n;
+}
+
+static
+void strFrom16(char str[3], cmsUInt16Number n)
+{
+    str[0] = (char)(n >> 8);
+    str[1] = (char)n;
+    str[2] = (char)0;
+
+}
+
+// Add an ASCII entry. Do not add any \0 termination (ICC1v43_2010-12.pdf page 61)
+cmsBool CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu, const char LanguageCode[3], const char CountryCode[3], const char* ASCIIString)
+{
+    cmsUInt32Number i, len = (cmsUInt32Number) strlen(ASCIIString);
+    wchar_t* WStr;
+    cmsBool  rc;
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    if (mlu == NULL) return FALSE;
+
+    WStr = (wchar_t*) _cmsCalloc(mlu ->ContextID, len,  sizeof(wchar_t));
+    if (WStr == NULL) return FALSE;
+
+    for (i=0; i < len; i++)
+        WStr[i] = (wchar_t) ASCIIString[i];
+
+    rc = AddMLUBlock(mlu, len  * sizeof(wchar_t), WStr, Lang, Cntry);
+
+    _cmsFree(mlu ->ContextID, WStr);
+    return rc;
+
+}
+
+// We don't need any wcs support library
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+// Add a wide entry. Do not add any \0 terminator (ICC1v43_2010-12.pdf page 61)
+cmsBool  CMSEXPORT cmsMLUsetWide(cmsMLU* mlu, const char Language[3], const char Country[3], const wchar_t* WideString)
+{
+    cmsUInt16Number Lang  = strTo16(Language);
+    cmsUInt16Number Cntry = strTo16(Country);
+    cmsUInt32Number len;
+
+    if (mlu == NULL) return FALSE;
+    if (WideString == NULL) return FALSE;
+
+    len = (cmsUInt32Number) (mywcslen(WideString)) * sizeof(wchar_t);
+    return AddMLUBlock(mlu, len, WideString, Lang, Cntry);
+}
+
+// Duplicating a MLU is as easy as copying all members
+cmsMLU* CMSEXPORT cmsMLUdup(const cmsMLU* mlu)
+{
+    cmsMLU* NewMlu = NULL;
+
+    // Duplicating a NULL obtains a NULL
+    if (mlu == NULL) return NULL;
+
+    NewMlu = cmsMLUalloc(mlu ->ContextID, mlu ->UsedEntries);
+    if (NewMlu == NULL) return NULL;
+
+    // Should never happen
+    if (NewMlu ->AllocatedEntries < mlu ->UsedEntries)
+        goto Error;
+
+    // Sanitize...
+    if (NewMlu ->Entries == NULL || mlu ->Entries == NULL)  goto Error;
+
+    memmove(NewMlu ->Entries, mlu ->Entries, mlu ->UsedEntries * sizeof(_cmsMLUentry));
+    NewMlu ->UsedEntries = mlu ->UsedEntries;
+
+    // The MLU may be empty
+    if (mlu ->PoolUsed == 0) {
+        NewMlu ->MemPool = NULL;
+    }
+    else {
+        // It is not empty
+        NewMlu ->MemPool = _cmsMalloc(mlu ->ContextID, mlu ->PoolUsed);
+        if (NewMlu ->MemPool == NULL) goto Error;
+    }
+
+    NewMlu ->PoolSize = mlu ->PoolUsed;
+
+    if (NewMlu ->MemPool == NULL || mlu ->MemPool == NULL) goto Error;
+
+    memmove(NewMlu ->MemPool, mlu->MemPool, mlu ->PoolUsed);
+    NewMlu ->PoolUsed = mlu ->PoolUsed;
+
+    return NewMlu;
+
+Error:
+
+    if (NewMlu != NULL) cmsMLUfree(NewMlu);
+    return NULL;
+}
+
+// Free any used memory
+void CMSEXPORT cmsMLUfree(cmsMLU* mlu)
+{
+    if (mlu) {
+
+        if (mlu -> Entries) _cmsFree(mlu ->ContextID, mlu->Entries);
+        if (mlu -> MemPool) _cmsFree(mlu ->ContextID, mlu->MemPool);
+
+        _cmsFree(mlu ->ContextID, mlu);
+    }
+}
+
+
+// The algorithm first searches for an exact match of country and language, if not found it uses
+// the Language. If none is found, first entry is used instead.
+static
+const wchar_t* _cmsMLUgetWide(const cmsMLU* mlu,
+                              cmsUInt32Number *len,
+                              cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode,
+                              cmsUInt16Number* UsedLanguageCode, cmsUInt16Number* UsedCountryCode)
+{
+    cmsUInt32Number i;
+    int Best = -1;
+    _cmsMLUentry* v;
+
+    if (mlu == NULL) return NULL;
+
+    if (mlu -> AllocatedEntries <= 0) return NULL;
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        v = mlu ->Entries + i;
+
+        if (v -> Language == LanguageCode) {
+
+            if (Best == -1) Best = (int) i;
+
+            if (v -> Country == CountryCode) {
+
+                if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+                if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+                if (len != NULL) *len = v ->Len;
+
+                return (wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v -> StrW);        // Found exact match
+            }
+        }
+    }
+
+    // No string found. Return First one
+    if (Best == -1)
+        Best = 0;
+
+    v = mlu ->Entries + Best;
+
+    if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+    if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+    if (len != NULL) *len   = v ->Len;
+
+    return(wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v ->StrW);
+}
+
+
+// Obtain an ASCII representation of the wide string. Setting buffer to NULL returns the len
+cmsUInt32Number CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                       const char LanguageCode[3], const char CountryCode[3],
+                                       char* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+    cmsUInt32Number ASCIIlen, i;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    // Get WideChar
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    ASCIIlen = StrLen / sizeof(wchar_t);
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return ASCIIlen + 1; // Note the zero at the end
+
+    // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < ASCIIlen + 1)
+        ASCIIlen = BufferSize - 1;
+
+    // Precess each character
+    for (i=0; i < ASCIIlen; i++) {
+
+        if (Wide[i] == 0)
+            Buffer[i] = 0;
+        else
+            Buffer[i] = (char) Wide[i];
+    }
+
+    // We put a termination "\0"
+    Buffer[ASCIIlen] = 0;
+    return ASCIIlen + 1;
+}
+
+// Obtain a wide representation of the MLU, on depending on current locale settings
+cmsUInt32Number CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                      const char LanguageCode[3], const char CountryCode[3],
+                                      wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return StrLen + sizeof(wchar_t);
+
+  // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < StrLen + sizeof(wchar_t))
+        StrLen = BufferSize - + sizeof(wchar_t);
+
+    memmove(Buffer, Wide, StrLen);
+    Buffer[StrLen / sizeof(wchar_t)] = 0;
+
+    return StrLen + sizeof(wchar_t);
+}
+
+
+// Get also the language and country
+CMSAPI cmsBool CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                              const char LanguageCode[3], const char CountryCode[3],
+                                              char ObtainedLanguage[3], char ObtainedCountry[3])
+{
+    const wchar_t *Wide;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+    cmsUInt16Number ObtLang, ObtCode;
+
+    // Sanitize
+    if (mlu == NULL) return FALSE;
+
+    Wide = _cmsMLUgetWide(mlu, NULL, Lang, Cntry, &ObtLang, &ObtCode);
+    if (Wide == NULL) return FALSE;
+
+    // Get used language and code
+    strFrom16(ObtainedLanguage, ObtLang);
+    strFrom16(ObtainedCountry, ObtCode);
+
+    return TRUE;
+}
+
+
+
+// Get the number of translations in the MLU object
+cmsUInt32Number CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu)
+{
+    if (mlu == NULL) return 0;
+    return mlu->UsedEntries;
+}
+
+// Get the language and country codes for a specific MLU index
+cmsBool CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                          cmsUInt32Number idx,
+                                          char LanguageCode[3],
+                                          char CountryCode[3])
+{
+    _cmsMLUentry *entry;
+
+    if (mlu == NULL) return FALSE;
+
+    if (idx >= mlu->UsedEntries) return FALSE;
+
+    entry = &mlu->Entries[idx];
+    
+    strFrom16(LanguageCode, entry->Language);
+    strFrom16(CountryCode, entry->Country);
+
+    return TRUE;
+}
+
+
+// Named color lists --------------------------------------------------------------------------------------------
+
+// Grow the list to keep at least NumElements
+static
+cmsBool  GrowNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    cmsUInt32Number size;
+    _cmsNAMEDCOLOR * NewPtr;
+
+    if (v == NULL) return FALSE;
+
+    if (v ->Allocated == 0)
+        size = 64;   // Initial guess
+    else
+        size = v ->Allocated * 2;
+
+    // Keep a maximum color lists can grow, 100K entries seems reasonable
+    if (size > 1024 * 100) {
+        _cmsFree(v->ContextID, (void*) v->List);
+        v->List = NULL;
+        return FALSE;
+    }
+
+    NewPtr = (_cmsNAMEDCOLOR*) _cmsRealloc(v ->ContextID, v ->List, size * sizeof(_cmsNAMEDCOLOR));
+    if (NewPtr == NULL)
+        return FALSE;
+
+    v ->List      = NewPtr;
+    v ->Allocated = size;
+    return TRUE;
+}
+
+// Allocate a list for n elements
+cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID, cmsUInt32Number n, cmsUInt32Number ColorantCount, const char* Prefix, const char* Suffix)
+{
+    cmsNAMEDCOLORLIST* v = (cmsNAMEDCOLORLIST*) _cmsMallocZero(ContextID, sizeof(cmsNAMEDCOLORLIST));
+
+    if (v == NULL) return NULL;
+
+    v ->List      = NULL;
+    v ->nColors   = 0;
+    v ->ContextID  = ContextID;
+
+    while (v -> Allocated < n) {
+        if (!GrowNamedColorList(v)) {
+            _cmsFree(ContextID, (void*) v);
+            return NULL;
+        }
+    }
+
+    strncpy(v ->Prefix, Prefix, sizeof(v ->Prefix)-1);
+    strncpy(v ->Suffix, Suffix, sizeof(v ->Suffix)-1);
+    v->Prefix[32] = v->Suffix[32] = 0;
+
+    v -> ColorantCount = ColorantCount;
+
+    return v;
+}
+
+// Free a list
+void CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    if (v == NULL) return;
+    if (v ->List) _cmsFree(v ->ContextID, v ->List);
+    _cmsFree(v ->ContextID, v);
+}
+
+cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v)
+{
+    cmsNAMEDCOLORLIST* NewNC;
+
+    if (v == NULL) return NULL;
+
+    NewNC= cmsAllocNamedColorList(v ->ContextID, v -> nColors, v ->ColorantCount, v ->Prefix, v ->Suffix);
+    if (NewNC == NULL) return NULL;
+
+    // For really large tables we need this
+    while (NewNC ->Allocated < v ->Allocated){
+        if (!GrowNamedColorList(NewNC)) return NULL;
+    }
+
+    memmove(NewNC ->Prefix, v ->Prefix, sizeof(v ->Prefix));
+    memmove(NewNC ->Suffix, v ->Suffix, sizeof(v ->Suffix));
+    NewNC ->ColorantCount = v ->ColorantCount;
+    memmove(NewNC->List, v ->List, v->nColors * sizeof(_cmsNAMEDCOLOR));
+    NewNC ->nColors = v ->nColors;
+    return NewNC;
+}
+
+
+// Append a color to a list. List pointer may change if reallocated
+cmsBool  CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* NamedColorList,
+                                       const char* Name,
+                                       cmsUInt16Number PCS[3], cmsUInt16Number Colorant[cmsMAXCHANNELS])
+{
+    cmsUInt32Number i;
+
+    if (NamedColorList == NULL) return FALSE;
+
+    if (NamedColorList ->nColors + 1 > NamedColorList ->Allocated) {
+        if (!GrowNamedColorList(NamedColorList)) return FALSE;
+    }
+
+    for (i=0; i < NamedColorList ->ColorantCount; i++)
+        NamedColorList ->List[NamedColorList ->nColors].DeviceColorant[i] = Colorant == NULL ? (cmsUInt16Number)0 : Colorant[i];
+
+    for (i=0; i < 3; i++)
+        NamedColorList ->List[NamedColorList ->nColors].PCS[i] = PCS == NULL ? (cmsUInt16Number) 0 : PCS[i];
+
+    if (Name != NULL) {
+
+        strncpy(NamedColorList ->List[NamedColorList ->nColors].Name, Name, cmsMAX_PATH-1);
+        NamedColorList ->List[NamedColorList ->nColors].Name[cmsMAX_PATH-1] = 0;
+
+    }
+    else
+        NamedColorList ->List[NamedColorList ->nColors].Name[0] = 0;
+
+
+    NamedColorList ->nColors++;
+    return TRUE;
+}
+
+// Returns number of elements
+cmsUInt32Number CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* NamedColorList)
+{
+     if (NamedColorList == NULL) return 0;
+     return NamedColorList ->nColors;
+}
+
+// Info aboout a given color
+cmsBool  CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                     char* Name,
+                                     char* Prefix,
+                                     char* Suffix,
+                                     cmsUInt16Number* PCS,
+                                     cmsUInt16Number* Colorant)
+{
+    if (NamedColorList == NULL) return FALSE;
+
+    if (nColor >= cmsNamedColorCount(NamedColorList)) return FALSE;
+
+    // strcpy instead of strncpy because many apps are using small buffers
+    if (Name) strcpy(Name, NamedColorList->List[nColor].Name);
+    if (Prefix) strcpy(Prefix, NamedColorList->Prefix);
+    if (Suffix) strcpy(Suffix, NamedColorList->Suffix);
+    if (PCS)
+        memmove(PCS, NamedColorList ->List[nColor].PCS, 3*sizeof(cmsUInt16Number));
+
+    if (Colorant)
+        memmove(Colorant, NamedColorList ->List[nColor].DeviceColorant,
+                                sizeof(cmsUInt16Number) * NamedColorList ->ColorantCount);
+
+
+    return TRUE;
+}
+
+// Search for a given color name (no prefix or suffix)
+cmsInt32Number CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* NamedColorList, const char* Name)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number n;
+
+    if (NamedColorList == NULL) return -1;
+    n = cmsNamedColorCount(NamedColorList);
+    for (i=0; i < n; i++) {
+        if (cmsstrcasecmp(Name,  NamedColorList->List[i].Name) == 0)
+            return (cmsInt32Number) i;
+    }
+
+    return -1;
+}
+
+// MPE support -----------------------------------------------------------------------------------------------------------------
+
+static
+void FreeNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsFreeNamedColorList(List);
+}
+
+static
+void* DupNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    return cmsDupNamedColorList(List);
+}
+
+static
+void EvalNamedColorPCS(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        Out[0] = Out[1] = Out[2] = 0.0f;
+    }
+    else {
+
+            // Named color always uses Lab
+            Out[0] = (cmsFloat32Number) (NamedColorList->List[index].PCS[0] / 65535.0);
+            Out[1] = (cmsFloat32Number) (NamedColorList->List[index].PCS[1] / 65535.0);
+            Out[2] = (cmsFloat32Number) (NamedColorList->List[index].PCS[2] / 65535.0);
+    }
+}
+
+static
+void EvalNamedColor(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+    cmsUInt32Number j;
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        for (j = 0; j < NamedColorList->ColorantCount; j++)
+            Out[j] = 0.0f;
+
+    }
+    else {
+        for (j=0; j < NamedColorList ->ColorantCount; j++)
+            Out[j] = (cmsFloat32Number) (NamedColorList->List[index].DeviceColorant[j] / 65535.0);
+    }
+}
+
+
+// Named color lookup element
+cmsStage* CMSEXPORT _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS)
+{
+    return _cmsStageAllocPlaceholder(NamedColorList ->ContextID,
+                                   cmsSigNamedColorElemType,
+                                   1, UsePCS ? 3 : NamedColorList ->ColorantCount,
+                                   UsePCS ? EvalNamedColorPCS : EvalNamedColor,
+                                   DupNamedColorList,
+                                   FreeNamedColorList,
+                                   cmsDupNamedColorList(NamedColorList));
+
+}
+
+
+// Retrieve the named color list from a transform. Should be first element in the LUT
+cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsStage* mpe  = v ->Lut->Elements;
+
+    if (mpe ->Type != cmsSigNamedColorElemType) return NULL;
+    return (cmsNAMEDCOLORLIST*) mpe ->Data;
+}
+
+
+// Profile sequence description routines -------------------------------------------------------------------------------------
+
+cmsSEQ* CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n)
+{
+    cmsSEQ* Seq;
+    cmsUInt32Number i;
+
+    if (n == 0) return NULL;
+
+    // In a absolutely arbitrary way, I hereby decide to allow a maxim of 255 profiles linked
+    // in a devicelink. It makes not sense anyway and may be used for exploits, so let's close the door!
+    if (n > 255) return NULL;
+
+    Seq = (cmsSEQ*) _cmsMallocZero(ContextID, sizeof(cmsSEQ));
+    if (Seq == NULL) return NULL;
+
+    Seq -> ContextID = ContextID;
+    Seq -> seq      = (cmsPSEQDESC*) _cmsCalloc(ContextID, n, sizeof(cmsPSEQDESC));
+    Seq -> n        = n;
+
+    if (Seq -> seq == NULL) {
+        _cmsFree(ContextID, Seq);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        Seq -> seq[i].Manufacturer = NULL;
+        Seq -> seq[i].Model        = NULL;
+        Seq -> seq[i].Description  = NULL;
+    }
+
+    return Seq;
+}
+
+void CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < pseq ->n; i++) {
+        if (pseq ->seq[i].Manufacturer != NULL) cmsMLUfree(pseq ->seq[i].Manufacturer);
+        if (pseq ->seq[i].Model != NULL) cmsMLUfree(pseq ->seq[i].Model);
+        if (pseq ->seq[i].Description != NULL) cmsMLUfree(pseq ->seq[i].Description);
+    }
+
+    if (pseq ->seq != NULL) _cmsFree(pseq ->ContextID, pseq ->seq);
+    _cmsFree(pseq -> ContextID, pseq);
+}
+
+cmsSEQ* CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq)
+{
+    cmsSEQ *NewSeq;
+    cmsUInt32Number i;
+
+    if (pseq == NULL)
+        return NULL;
+
+    NewSeq = (cmsSEQ*) _cmsMalloc(pseq -> ContextID, sizeof(cmsSEQ));
+    if (NewSeq == NULL) return NULL;
+
+
+    NewSeq -> seq      = (cmsPSEQDESC*) _cmsCalloc(pseq ->ContextID, pseq ->n, sizeof(cmsPSEQDESC));
+    if (NewSeq ->seq == NULL) goto Error;
+
+    NewSeq -> ContextID = pseq ->ContextID;
+    NewSeq -> n        = pseq ->n;
+
+    for (i=0; i < pseq->n; i++) {
+
+        memmove(&NewSeq ->seq[i].attributes, &pseq ->seq[i].attributes, sizeof(cmsUInt64Number));
+
+        NewSeq ->seq[i].deviceMfg   = pseq ->seq[i].deviceMfg;
+        NewSeq ->seq[i].deviceModel = pseq ->seq[i].deviceModel;
+        memmove(&NewSeq ->seq[i].ProfileID, &pseq ->seq[i].ProfileID, sizeof(cmsProfileID));
+        NewSeq ->seq[i].technology  = pseq ->seq[i].technology;
+
+        NewSeq ->seq[i].Manufacturer = cmsMLUdup(pseq ->seq[i].Manufacturer);
+        NewSeq ->seq[i].Model        = cmsMLUdup(pseq ->seq[i].Model);
+        NewSeq ->seq[i].Description  = cmsMLUdup(pseq ->seq[i].Description);
+
+    }
+
+    return NewSeq;
+
+Error:
+
+    cmsFreeProfileSequenceDescription(NewSeq);
+    return NULL;
+}
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+// Dictionaries are just very simple linked lists
+
+
+typedef struct _cmsDICT_struct {
+    cmsDICTentry* head;
+    cmsContext ContextID;
+} _cmsDICT;
+
+
+// Allocate an empty dictionary
+cmsHANDLE CMSEXPORT cmsDictAlloc(cmsContext ContextID)
+{
+    _cmsDICT* dict = (_cmsDICT*) _cmsMallocZero(ContextID, sizeof(_cmsDICT));
+    if (dict == NULL) return NULL;
+
+    dict ->ContextID = ContextID;
+    return (cmsHANDLE) dict;
+
+}
+
+// Dispose resources
+void CMSEXPORT cmsDictFree(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry, *next;
+
+    _cmsAssert(dict != NULL);
+
+    // Walk the list freeing all nodes
+    entry = dict ->head;
+    while (entry != NULL) {
+
+            if (entry ->DisplayName  != NULL) cmsMLUfree(entry ->DisplayName);
+            if (entry ->DisplayValue != NULL) cmsMLUfree(entry ->DisplayValue);
+            if (entry ->Name != NULL) _cmsFree(dict ->ContextID, entry -> Name);
+            if (entry ->Value != NULL) _cmsFree(dict ->ContextID, entry -> Value);
+
+            // Don't fall in the habitual trap...
+            next = entry ->Next;
+            _cmsFree(dict ->ContextID, entry);
+
+            entry = next;
+    }
+
+    _cmsFree(dict ->ContextID, dict);
+}
+
+
+// Duplicate a wide char string
+static
+wchar_t* DupWcs(cmsContext ContextID, const wchar_t* ptr)
+{
+    if (ptr == NULL) return NULL;
+    return (wchar_t*) _cmsDupMem(ContextID, ptr, (mywcslen(ptr) + 1) * sizeof(wchar_t));
+}
+
+// Add a new entry to the linked list
+cmsBool CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry;
+
+    _cmsAssert(dict != NULL);
+    _cmsAssert(Name != NULL);
+
+    entry = (cmsDICTentry*) _cmsMallocZero(dict ->ContextID, sizeof(cmsDICTentry));
+    if (entry == NULL) return FALSE;
+
+    entry ->DisplayName  = cmsMLUdup(DisplayName);
+    entry ->DisplayValue = cmsMLUdup(DisplayValue);
+    entry ->Name         = DupWcs(dict ->ContextID, Name);
+    entry ->Value        = DupWcs(dict ->ContextID, Value);
+
+    entry ->Next = dict ->head;
+    dict ->head = entry;
+
+    return TRUE;
+}
+
+
+// Duplicates an existing dictionary
+cmsHANDLE CMSEXPORT cmsDictDup(cmsHANDLE hDict)
+{
+    _cmsDICT* old_dict = (_cmsDICT*) hDict;
+    cmsHANDLE hNew;
+    cmsDICTentry *entry;
+
+    _cmsAssert(old_dict != NULL);
+
+    hNew  = cmsDictAlloc(old_dict ->ContextID);
+    if (hNew == NULL) return NULL;
+
+    // Walk the list freeing all nodes
+    entry = old_dict ->head;
+    while (entry != NULL) {
+
+        if (!cmsDictAddEntry(hNew, entry ->Name, entry ->Value, entry ->DisplayName, entry ->DisplayValue)) {
+
+            cmsDictFree(hNew);
+            return NULL;
+        }
+
+        entry = entry -> Next;
+    }
+
+    return hNew;
+}
+
+// Get a pointer to the linked list
+const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+
+    if (dict == NULL) return NULL;
+    return dict ->head;
+}
+
+// Helper For external languages
+const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e)
+{
+     if (e == NULL) return NULL;
+     return e ->Next;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsopt.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsopt.c
new file mode 100644
index 0000000000..5be87bba30
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsopt.c
@@ -0,0 +1,1960 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+//----------------------------------------------------------------------------------
+
+// Optimization for 8 bits, Shaper-CLUT (3 inputs only)
+typedef struct {
+
+    cmsContext ContextID;
+
+    const cmsInterpParams* p;   // Tetrahedrical interpolation parameters. This is a not-owned pointer.
+
+    cmsUInt16Number rx[256], ry[256], rz[256];
+    cmsUInt32Number X0[256], Y0[256], Z0[256];  // Precomputed nodes and offsets for 8-bit input data
+
+
+} Prelin8Data;
+
+
+// Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs)
+typedef struct {
+
+    cmsContext ContextID;
+
+    // Number of channels
+    cmsUInt32Number nInputs;
+    cmsUInt32Number nOutputs;
+
+    _cmsInterpFn16 EvalCurveIn16[MAX_INPUT_DIMENSIONS];       // The maximum number of input channels is known in advance
+    cmsInterpParams*  ParamsCurveIn16[MAX_INPUT_DIMENSIONS];
+
+    _cmsInterpFn16 EvalCLUT;            // The evaluator for 3D grid
+    const cmsInterpParams* CLUTparams;  // (not-owned pointer)
+
+
+    _cmsInterpFn16* EvalCurveOut16;       // Points to an array of curve evaluators in 16 bits (not-owned pointer)
+    cmsInterpParams**  ParamsCurveOut16;  // Points to an array of references to interpolation params (not-owned pointer)
+
+
+} Prelin16Data;
+
+
+// Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed
+
+typedef cmsInt32Number cmsS1Fixed14Number;   // Note that this may hold more than 16 bits!
+
+#define DOUBLE_TO_1FIXED14(x) ((cmsS1Fixed14Number) floor((x) * 16384.0 + 0.5))
+
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsS1Fixed14Number Shaper1R[256];  // from 0..255 to 1.14  (0.0...1.0)
+    cmsS1Fixed14Number Shaper1G[256];
+    cmsS1Fixed14Number Shaper1B[256];
+
+    cmsS1Fixed14Number Mat[3][3];     // n.14 to n.14 (needs a saturation after that)
+    cmsS1Fixed14Number Off[3];
+
+    cmsUInt16Number Shaper2R[16385];    // 1.14 to 0..255
+    cmsUInt16Number Shaper2G[16385];
+    cmsUInt16Number Shaper2B[16385];
+
+} MatShaper8Data;
+
+// Curves, optimization is shared between 8 and 16 bits
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsUInt32Number nCurves;      // Number of curves
+    cmsUInt32Number nElements;    // Elements in curves
+    cmsUInt16Number** Curves;     // Points to a dynamically  allocated array
+
+} Curves16Data;
+
+
+// Simple optimizations ----------------------------------------------------------------------------------------------------------
+
+
+// Remove an element in linked chain
+static
+void _RemoveElement(cmsStage** head)
+{
+    cmsStage* mpe = *head;
+    cmsStage* next = mpe ->Next;
+    *head = next;
+    cmsStageFree(mpe);
+}
+
+// Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer.
+static
+cmsBool _Remove1Op(cmsPipeline* Lut, cmsStageSignature UnaryOp)
+{
+    cmsStage** pt = &Lut ->Elements;
+    cmsBool AnyOpt = FALSE;
+
+    while (*pt != NULL) {
+
+        if ((*pt) ->Implements == UnaryOp) {
+            _RemoveElement(pt);
+            AnyOpt = TRUE;
+        }
+        else
+            pt = &((*pt) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+// Same, but only if two adjacent elements are found
+static
+cmsBool _Remove2Op(cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2)
+{
+    cmsStage** pt1;
+    cmsStage** pt2;
+    cmsBool AnyOpt = FALSE;
+
+    pt1 = &Lut ->Elements;
+    if (*pt1 == NULL) return AnyOpt;
+
+    while (*pt1 != NULL) {
+
+        pt2 = &((*pt1) -> Next);
+        if (*pt2 == NULL) return AnyOpt;
+
+        if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) {
+            _RemoveElement(pt2);
+            _RemoveElement(pt1);
+            AnyOpt = TRUE;
+        }
+        else
+            pt1 = &((*pt1) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+
+static
+cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
+{
+       return fabs(b - a) < 0.00001f;
+}
+
+static
+cmsBool  isFloatMatrixIdentity(const cmsMAT3* a)
+{
+       cmsMAT3 Identity;
+       int i, j;
+
+       _cmsMAT3identity(&Identity);
+
+       for (i = 0; i < 3; i++)
+              for (j = 0; j < 3; j++)
+                     if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+       return TRUE;
+}
+// if two adjacent matrices are found, multiply them. 
+static
+cmsBool _MultiplyMatrix(cmsPipeline* Lut)
+{
+       cmsStage** pt1;
+       cmsStage** pt2;
+       cmsStage*  chain;
+       cmsBool AnyOpt = FALSE;
+
+       pt1 = &Lut->Elements;
+       if (*pt1 == NULL) return AnyOpt;
+
+       while (*pt1 != NULL) {
+
+              pt2 = &((*pt1)->Next);
+              if (*pt2 == NULL) return AnyOpt;
+
+              if ((*pt1)->Implements == cmsSigMatrixElemType && (*pt2)->Implements == cmsSigMatrixElemType) {
+
+                     // Get both matrices
+                     _cmsStageMatrixData* m1 = (_cmsStageMatrixData*) cmsStageData(*pt1);
+                     _cmsStageMatrixData* m2 = (_cmsStageMatrixData*) cmsStageData(*pt2);
+                     cmsMAT3 res;
+                     
+                     // Input offset and output offset should be zero to use this optimization
+                     if (m1->Offset != NULL || m2 ->Offset != NULL || 
+                            cmsStageInputChannels(*pt1) != 3 || cmsStageOutputChannels(*pt1) != 3 ||                            
+                            cmsStageInputChannels(*pt2) != 3 || cmsStageOutputChannels(*pt2) != 3)
+                            return FALSE;
+
+                     // Multiply both matrices to get the result
+                     _cmsMAT3per(&res, (cmsMAT3*)m2->Double, (cmsMAT3*)m1->Double);
+
+                     // Get the next in chain after the matrices
+                     chain = (*pt2)->Next;
+
+                     // Remove both matrices
+                     _RemoveElement(pt2);
+                     _RemoveElement(pt1);
+
+                     // Now what if the result is a plain identity?                     
+                     if (!isFloatMatrixIdentity(&res)) {
+
+                            // We can not get rid of full matrix                            
+                            cmsStage* Multmat = cmsStageAllocMatrix(Lut->ContextID, 3, 3, (const cmsFloat64Number*) &res, NULL);
+                            if (Multmat == NULL) return FALSE;  // Should never happen
+
+                            // Recover the chain
+                            Multmat->Next = chain;
+                            *pt1 = Multmat;
+                     }
+
+                     AnyOpt = TRUE;
+              }
+              else
+                     pt1 = &((*pt1)->Next);
+       }
+
+       return AnyOpt;
+}
+
+
+// Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
+// by a v4 to v2 and vice-versa. The elements are then discarded.
+static
+cmsBool PreOptimize(cmsPipeline* Lut)
+{
+    cmsBool AnyOpt = FALSE, Opt;
+
+    do {
+
+        Opt = FALSE;
+
+        // Remove all identities
+        Opt |= _Remove1Op(Lut, cmsSigIdentityElemType);
+
+        // Remove XYZ2Lab followed by Lab2XYZ
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
+
+        // Remove Lab2XYZ followed by XYZ2Lab
+        Opt |= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
+
+        // Remove V4 to V2 followed by V2 to V4
+        Opt |= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
+
+        // Remove V2 to V4 followed by V4 to V2
+        Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
+
+        // Simplify matrix. 
+        Opt |= _MultiplyMatrix(Lut);
+
+        if (Opt) AnyOpt = TRUE;
+
+    } while (Opt);
+
+    return AnyOpt;
+}
+
+static
+void Eval16nop1D(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const struct _cms_interp_struc* p)
+{
+    Output[0] = Input[0];
+
+    cmsUNUSED_PARAMETER(p);
+}
+
+static
+void PrelinEval16(CMSREGISTER const cmsUInt16Number Input[],
+                  CMSREGISTER cmsUInt16Number Output[],
+                  CMSREGISTER const void* D)
+{
+    Prelin16Data* p16 = (Prelin16Data*) D;
+    cmsUInt16Number  StageABC[MAX_INPUT_DIMENSIONS];
+    cmsUInt16Number  StageDEF[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    for (i=0; i < p16 ->nInputs; i++) {
+
+        p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
+    }
+
+    p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
+
+    for (i=0; i < p16 ->nOutputs; i++) {
+
+        p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
+    }
+}
+
+
+static
+void PrelinOpt16free(cmsContext ContextID, void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+
+    _cmsFree(ContextID, p16 ->EvalCurveOut16);
+    _cmsFree(ContextID, p16 ->ParamsCurveOut16);
+
+    _cmsFree(ContextID, p16);
+}
+
+static
+void* Prelin16dup(cmsContext ContextID, const void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+    Prelin16Data* Duped = (Prelin16Data*) _cmsDupMem(ContextID, p16, sizeof(Prelin16Data));
+
+    if (Duped == NULL) return NULL;
+
+    Duped->EvalCurveOut16 = (_cmsInterpFn16*) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs * sizeof(_cmsInterpFn16));
+    Duped->ParamsCurveOut16 = (cmsInterpParams**)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs * sizeof(cmsInterpParams*));
+
+    return Duped;
+}
+
+
+static
+Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
+                               const cmsInterpParams* ColorMap,
+                               cmsUInt32Number nInputs, cmsToneCurve** In,
+                               cmsUInt32Number nOutputs, cmsToneCurve** Out )
+{
+    cmsUInt32Number i;
+    Prelin16Data* p16 = (Prelin16Data*)_cmsMallocZero(ContextID, sizeof(Prelin16Data));
+    if (p16 == NULL) return NULL;
+
+    p16 ->nInputs = nInputs;
+    p16 ->nOutputs = nOutputs;
+
+
+    for (i=0; i < nInputs; i++) {
+
+        if (In == NULL) {
+            p16 -> ParamsCurveIn16[i] = NULL;
+            p16 -> EvalCurveIn16[i] = Eval16nop1D;
+
+        }
+        else {
+            p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
+            p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    p16 ->CLUTparams = ColorMap;
+    p16 ->EvalCLUT   = ColorMap ->Interpolation.Lerp16;
+
+
+    p16 -> EvalCurveOut16 = (_cmsInterpFn16*) _cmsCalloc(ContextID, nOutputs, sizeof(_cmsInterpFn16));
+    p16 -> ParamsCurveOut16 = (cmsInterpParams**) _cmsCalloc(ContextID, nOutputs, sizeof(cmsInterpParams* ));
+
+    for (i=0; i < nOutputs; i++) {
+
+        if (Out == NULL) {
+            p16 ->ParamsCurveOut16[i] = NULL;
+            p16 -> EvalCurveOut16[i] = Eval16nop1D;
+        }
+        else {
+
+            p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
+            p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    return p16;
+}
+
+
+
+// Resampling ---------------------------------------------------------------------------------
+
+#define PRELINEARIZATION_POINTS 4096
+
+// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
+// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
+static
+cmsInt32Number XFormSampler16(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Cargo;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
+    _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
+
+    // From 16 bit to floating point
+    for (i=0; i < Lut ->InputChannels; i++)
+        InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Evaluate in floating point
+    cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
+
+    // Back to 16 bits representation
+    for (i=0; i < Lut ->OutputChannels; i++)
+        Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0);
+
+    // Always succeed
+    return TRUE;
+}
+
+// Try to see if the curves of a given MPE are linear
+static
+cmsBool AllCurvesAreLinear(cmsStage* mpe)
+{
+    cmsToneCurve** Curves;
+    cmsUInt32Number i, n;
+
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+    if (Curves == NULL) return FALSE;
+
+    n = cmsStageOutputChannels(mpe);
+
+    for (i=0; i < n; i++) {
+        if (!cmsIsToneCurveLinear(Curves[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose
+// is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels
+static
+cmsBool  PatchLUT(cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[],
+                  cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn)
+{
+    _cmsStageCLutData* Grid = (_cmsStageCLutData*) CLUT ->Data;
+    cmsInterpParams* p16  = Grid ->Params;
+    cmsFloat64Number px, py, pz, pw;
+    int        x0, y0, z0, w0;
+    int        i, index;
+
+    if (CLUT -> Type != cmsSigCLutElemType) {
+        cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) Attempt to PatchLUT on non-lut stage");
+        return FALSE;
+    }
+
+    if (nChannelsIn == 4) {
+
+        px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+        py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+        pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+        pw = ((cmsFloat64Number) At[3] * (p16->Domain[3])) / 65535.0;
+
+        x0 = (int) floor(px);
+        y0 = (int) floor(py);
+        z0 = (int) floor(pz);
+        w0 = (int) floor(pw);
+
+        if (((px - x0) != 0) ||
+            ((py - y0) != 0) ||
+            ((pz - z0) != 0) ||
+            ((pw - w0) != 0)) return FALSE; // Not on exact node
+
+        index = (int) p16 -> opta[3] * x0 +
+                (int) p16 -> opta[2] * y0 +
+                (int) p16 -> opta[1] * z0 +
+                (int) p16 -> opta[0] * w0;
+    }
+    else
+        if (nChannelsIn == 3) {
+
+            px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+            py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+            pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+           
+            x0 = (int) floor(px);
+            y0 = (int) floor(py);
+            z0 = (int) floor(pz);
+           
+            if (((px - x0) != 0) ||
+                ((py - y0) != 0) ||
+                ((pz - z0) != 0)) return FALSE;  // Not on exact node
+
+            index = (int) p16 -> opta[2] * x0 +
+                    (int) p16 -> opta[1] * y0 +
+                    (int) p16 -> opta[0] * z0;
+        }
+        else
+            if (nChannelsIn == 1) {
+
+                px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+                
+                x0 = (int) floor(px);
+                
+                if (((px - x0) != 0)) return FALSE; // Not on exact node
+
+                index = (int) p16 -> opta[0] * x0;
+            }
+            else {
+                cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) %d Channels are not supported on PatchLUT", nChannelsIn);
+                return FALSE;
+            }
+
+    for (i = 0; i < (int) nChannelsOut; i++)
+        Grid->Tab.T[index + i] = Value[i];
+
+    return TRUE;
+}
+
+// Auxiliary, to see if two values are equal or very different
+static
+cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] )
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+
+        if (abs(White1[i] - White2[i]) > 0xf000) return TRUE;  // Values are so extremely different that the fixup should be avoided
+        if (White1[i] != White2[i]) return FALSE;
+    }
+    return TRUE;
+}
+
+
+// Locate the node for the white point and fix it to pure white in order to avoid scum dot.
+static
+cmsBool FixWhiteMisalignment(cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace)
+{
+    cmsUInt16Number *WhitePointIn, *WhitePointOut;
+    cmsUInt16Number  WhiteIn[cmsMAXCHANNELS], WhiteOut[cmsMAXCHANNELS], ObtainedOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, nOuts, nIns;
+    cmsStage *PreLin = NULL, *CLUT = NULL, *PostLin = NULL;
+
+    if (!_cmsEndPointsBySpace(EntryColorSpace,
+        &WhitePointIn, NULL, &nIns)) return FALSE;
+
+    if (!_cmsEndPointsBySpace(ExitColorSpace,
+        &WhitePointOut, NULL, &nOuts)) return FALSE;
+
+    // It needs to be fixed?
+    if (Lut ->InputChannels != nIns) return FALSE;
+    if (Lut ->OutputChannels != nOuts) return FALSE;
+
+    cmsPipelineEval16(WhitePointIn, ObtainedOut, Lut);
+
+    if (WhitesAreEqual(nOuts, WhitePointOut, ObtainedOut)) return TRUE; // whites already match
+
+    // Check if the LUT comes as Prelin, CLUT or Postlin. We allow all combinations
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &PreLin, &CLUT, &PostLin))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCurveSetElemType, cmsSigCLutElemType, &PreLin, &CLUT))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCLutElemType, cmsSigCurveSetElemType, &CLUT, &PostLin))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCLutElemType, &CLUT))
+                    return FALSE;
+
+    // We need to interpolate white points of both, pre and post curves
+    if (PreLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PreLin);
+
+        for (i=0; i < nIns; i++) {
+            WhiteIn[i] = cmsEvalToneCurve16(Curves[i], WhitePointIn[i]);
+        }
+    }
+    else {
+        for (i=0; i < nIns; i++)
+            WhiteIn[i] = WhitePointIn[i];
+    }
+
+    // If any post-linearization, we need to find how is represented white before the curve, do
+    // a reverse interpolation in this case.
+    if (PostLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PostLin);
+
+        for (i=0; i < nOuts; i++) {
+
+            cmsToneCurve* InversePostLin = cmsReverseToneCurve(Curves[i]);
+            if (InversePostLin == NULL) {
+                WhiteOut[i] = WhitePointOut[i];    
+
+            } else {
+
+                WhiteOut[i] = cmsEvalToneCurve16(InversePostLin, WhitePointOut[i]);
+                cmsFreeToneCurve(InversePostLin);
+            }
+        }
+    }
+    else {
+        for (i=0; i < nOuts; i++)
+            WhiteOut[i] = WhitePointOut[i];
+    }
+
+    // Ok, proceed with patching. May fail and we don't care if it fails
+    PatchLUT(CLUT, WhiteIn, WhiteOut, nOuts, nIns);
+
+    return TRUE;
+}
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// This function creates simple LUT from complex ones. The generated LUT has an optional set of
+// prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables.
+// These curves have to exist in the original LUT in order to be used in the simplified output.
+// Caller may also use the flags to allow this feature.
+// LUTS with all curves will be simplified to a single curve. Parametric curves are lost.
+// This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+static
+cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* Src = NULL;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* CLUT;
+    cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL;
+    cmsUInt32Number nGridPoints;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage *NewPreLin = NULL;
+    cmsStage *NewPostLin = NULL;
+    _cmsStageCLutData* DataCLUT;
+    cmsToneCurve** DataSetIn;
+    cmsToneCurve** DataSetOut;
+    Prelin16Data* p16;
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // For empty LUTs, 2 points are enough
+    if (cmsPipelineStageCount(*Lut) == 0)
+        nGridPoints = 2;
+
+    Src = *Lut;
+
+    // Named color pipelines cannot be optimized either
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+        mpe != NULL;
+        mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Prelinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION) {
+
+        // Get a pointer to the prelinearization element
+        cmsStage* PreLin = cmsPipelineGetPtrToFirstStage(Src);
+
+        // Check if suitable
+        if (PreLin && PreLin ->Type == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PreLin)) {
+
+                // All seems ok, proceed.
+                NewPreLin = cmsStageDup(PreLin);
+                if(!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, NewPreLin))
+                    goto Error;
+
+                // Remove prelinearization. Since we have duplicated the curve
+                // in destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_BEGIN, &KeepPreLin);
+            }
+        }
+    }
+
+    // Allocate the CLUT
+    CLUT = cmsStageAllocCLut16bit(Src ->ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Postlinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_POST_LINEARIZATION) {
+
+        // Get a pointer to the postlinearization if present
+        cmsStage* PostLin = cmsPipelineGetPtrToLastStage(Src);
+
+        // Check if suitable
+        if (PostLin && cmsStageType(PostLin) == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PostLin)) {
+
+                // All seems ok, proceed.
+                NewPostLin = cmsStageDup(PostLin);
+                if (!cmsPipelineInsertStage(Dest, cmsAT_END, NewPostLin))
+                    goto Error;
+
+                // In destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_END, &KeepPostLin);
+            }
+        }
+    }
+
+    // Now its time to do the sampling. We have to ignore pre/post linearization
+    // The source LUT without pre/post curves is passed as parameter.
+    if (!cmsStageSampleCLut16bit(CLUT, XFormSampler16, (void*) Src, 0)) {
+Error:
+        // Ops, something went wrong, Restore stages
+        if (KeepPreLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_BEGIN, KeepPreLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        if (KeepPostLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_END,   KeepPostLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        cmsPipelineFree(Dest);
+        return FALSE;
+    }
+
+    // Done.
+
+    if (KeepPreLin != NULL) cmsStageFree(KeepPreLin);
+    if (KeepPostLin != NULL) cmsStageFree(KeepPostLin);
+    cmsPipelineFree(Src);
+
+    DataCLUT = (_cmsStageCLutData*) CLUT ->Data;
+
+    if (NewPreLin == NULL) DataSetIn = NULL;
+    else DataSetIn = ((_cmsStageToneCurvesData*) NewPreLin ->Data) ->TheCurves;
+
+    if (NewPostLin == NULL) DataSetOut = NULL;
+    else  DataSetOut = ((_cmsStageToneCurvesData*) NewPostLin ->Data) ->TheCurves;
+
+
+    if (DataSetIn == NULL && DataSetOut == NULL) {
+
+        _cmsPipelineSetOptimizationParameters(Dest, (_cmsOPTeval16Fn) DataCLUT->Params->Interpolation.Lerp16, DataCLUT->Params, NULL, NULL);
+    }
+    else {
+
+        p16 = PrelinOpt16alloc(Dest ->ContextID,
+            DataCLUT ->Params,
+            Dest ->InputChannels,
+            DataSetIn,
+            Dest ->OutputChannels,
+            DataSetOut);
+
+        _cmsPipelineSetOptimizationParameters(Dest, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+    }
+
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        FixWhiteMisalignment(Dest, ColorSpace, OutputColorSpace);
+    }
+
+    *Lut = Dest;
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Intent);
+}
+
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on
+// Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works
+// for RGB transforms. See the paper for more details
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+
+// Normalize endpoints by slope limiting max and min. This assures endpoints as well.
+// Descending curves are handled as well.
+static
+void SlopeLimiting(cmsToneCurve* g)
+{
+    int BeginVal, EndVal;
+    int AtBegin = (int) floor((cmsFloat64Number) g ->nEntries * 0.02 + 0.5);   // Cutoff at 2%
+    int AtEnd   = (int) g ->nEntries - AtBegin - 1;                                  // And 98%
+    cmsFloat64Number Val, Slope, beta;
+    int i;
+
+    if (cmsIsToneCurveDescending(g)) {
+        BeginVal = 0xffff; EndVal = 0;
+    }
+    else {
+        BeginVal = 0; EndVal = 0xffff;
+    }
+
+    // Compute slope and offset for begin of curve
+    Val   = g ->Table16[AtBegin];
+    Slope = (Val - BeginVal) / AtBegin;
+    beta  = Val - Slope * AtBegin;
+
+    for (i=0; i < AtBegin; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+
+    // Compute slope and offset for the end
+    Val   = g ->Table16[AtEnd];
+    Slope = (EndVal - Val) / AtBegin;   // AtBegin holds the X interval, which is same in both cases
+    beta  = Val - Slope * AtEnd;
+
+    for (i = AtEnd; i < (int) g ->nEntries; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+}
+
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
+{
+    int i;
+    cmsUInt16Number Input[3];
+    cmsS15Fixed16Number v1, v2, v3;
+    Prelin8Data* p8;
+
+    p8 = (Prelin8Data*)_cmsMallocZero(ContextID, sizeof(Prelin8Data));
+    if (p8 == NULL) return NULL;
+
+    // Since this only works for 8 bit input, values comes always as x * 257,
+    // we can safely take msb byte (x << 8 + x)
+
+    for (i=0; i < 256; i++) {
+
+        if (G != NULL) {
+
+            // Get 16-bit representation
+            Input[0] = cmsEvalToneCurve16(G[0], FROM_8_TO_16(i));
+            Input[1] = cmsEvalToneCurve16(G[1], FROM_8_TO_16(i));
+            Input[2] = cmsEvalToneCurve16(G[2], FROM_8_TO_16(i));
+        }
+        else {
+            Input[0] = FROM_8_TO_16(i);
+            Input[1] = FROM_8_TO_16(i);
+            Input[2] = FROM_8_TO_16(i);
+        }
+
+
+        // Move to 0..1.0 in fixed domain
+        v1 = _cmsToFixedDomain((int) (Input[0] * p -> Domain[0]));
+        v2 = _cmsToFixedDomain((int) (Input[1] * p -> Domain[1]));
+        v3 = _cmsToFixedDomain((int) (Input[2] * p -> Domain[2]));
+
+        // Store the precalculated table of nodes
+        p8 ->X0[i] = (p->opta[2] * FIXED_TO_INT(v1));
+        p8 ->Y0[i] = (p->opta[1] * FIXED_TO_INT(v2));
+        p8 ->Z0[i] = (p->opta[0] * FIXED_TO_INT(v3));
+
+        // Store the precalculated table of offsets
+        p8 ->rx[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v1);
+        p8 ->ry[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v2);
+        p8 ->rz[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v3);
+    }
+
+    p8 ->ContextID = ContextID;
+    p8 ->p = p;
+
+    return p8;
+}
+
+static
+void Prelin8free(cmsContext ContextID, void* ptr)
+{
+    _cmsFree(ContextID, ptr);
+}
+
+static
+void* Prelin8dup(cmsContext ContextID, const void* ptr)
+{
+    return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
+}
+
+
+
+// A optimized interpolation for 8-bit input.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static CMS_NO_SANITIZE
+void PrelinEval8(CMSREGISTER const cmsUInt16Number Input[],
+                  CMSREGISTER cmsUInt16Number Output[],
+                  CMSREGISTER const void* D)
+{
+
+    cmsUInt8Number         r, g, b;
+    cmsS15Fixed16Number    rx, ry, rz;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    int                    OutChan;
+    CMSREGISTER cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    Prelin8Data* p8 = (Prelin8Data*) D;
+    CMSREGISTER const cmsInterpParams* p = p8 ->p;
+    int                    TotalOut = (int) p -> nOutputs;
+    const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
+
+    r = (cmsUInt8Number) (Input[0] >> 8);
+    g = (cmsUInt8Number) (Input[1] >> 8);
+    b = (cmsUInt8Number) (Input[2] >> 8);
+
+    X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
+    Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
+    Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
+
+    rx = p8 ->rx[r];
+    ry = p8 ->ry[g];
+    rz = p8 ->rz[b];
+
+    X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 :  p ->opta[2]);
+    Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 :  p ->opta[1]);
+    Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 :  p ->opta[0]);
+
+
+    // These are the 6 Tetrahedral
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz)
+        {
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+        }
+        else
+            if (rx >= rz && rz >= ry)
+            {
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+            }
+            else
+                if (rz >= rx && rx >= ry)
+                {
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+                }
+                else
+                    if (ry >= rx && rx >= rz)
+                    {
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+                    }
+                    else
+                        if (ry >= rz && rz >= rx)
+                        {
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+                        }
+                        else
+                            if (rz >= ry && ry >= rx)
+                            {
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+        Output[OutChan] = (cmsUInt16Number) (c0 + ((Rest + (Rest >> 16)) >> 16));
+
+    }
+}
+
+#undef DENS
+
+
+// Curves that contain wide empty areas are not optimizeable
+static
+cmsBool IsDegenerated(const cmsToneCurve* g)
+{
+    cmsUInt32Number i, Zeros = 0, Poles = 0;
+    cmsUInt32Number nEntries = g ->nEntries;
+
+    for (i=0; i < nEntries; i++) {
+
+        if (g ->Table16[i] == 0x0000) Zeros++;
+        if (g ->Table16[i] == 0xffff) Poles++;
+    }
+
+    if (Zeros == 1 && Poles == 1) return FALSE;  // For linear tables
+    if (Zeros > (nEntries / 20)) return TRUE;  // Degenerated, many zeros
+    if (Poles > (nEntries / 20)) return TRUE;  // Degenerated, many poles
+
+    return FALSE;
+}
+
+// --------------------------------------------------------------------------------------------------------------
+// We need xput over here
+
+static
+cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* OriginalLut;
+    cmsUInt32Number nGridPoints;
+    cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS];
+    cmsUInt32Number t, i;
+    cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
+    cmsBool lIsSuitable, lIsLinear;
+    cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
+    cmsStage* OptimizedCLUTmpe;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage* OptimizedPrelinMpe;
+    cmsStage* mpe;
+    cmsToneCurve** OptimizedPrelinCurves;
+    _cmsStageCLutData* OptimizedPrelinCLUT;
+
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    // Only on chunky RGB
+    if (T_COLORSPACE(*InputFormat)  != PT_RGB) return FALSE;
+    if (T_PLANAR(*InputFormat)) return FALSE;
+
+    if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE;
+    if (T_PLANAR(*OutputFormat)) return FALSE;
+
+    // On 16 bits, user has to specify the feature
+    if (!_cmsFormatterIs8bit(*InputFormat)) {
+        if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE;
+    }
+
+    OriginalLut = *Lut;
+
+   // Named color pipelines cannot be optimized either
+   for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // Empty gamma containers
+    memset(Trans, 0, sizeof(Trans));
+    memset(TransReverse, 0, sizeof(TransReverse));
+
+    // If the last stage of the original lut are curves, and those curves are
+    // degenerated, it is likely the transform is squeezing and clipping
+    // the output from previous CLUT. We cannot optimize this case     
+    {
+        cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
+
+        if (cmsStageType(last) == cmsSigCurveSetElemType) {
+
+            _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
+            for (i = 0; i < Data->nCurves; i++) {
+                if (IsDegenerated(Data->TheCurves[i]))
+                    goto Error;
+            }
+        }
+    }
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (Trans[t] == NULL) goto Error;
+    }
+
+    // Populate the curves
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        // Feed input with a gray ramp
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            In[t] = v;
+
+        // Evaluate the gray value
+        cmsPipelineEvalFloat(In, Out, OriginalLut);
+
+        // Store result in curve
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            Trans[t] ->Table16[i] = _cmsQuickSaturateWord(Out[t] * 65535.0);
+    }
+
+    // Slope-limit the obtained curves
+    for (t = 0; t < OriginalLut ->InputChannels; t++)
+        SlopeLimiting(Trans[t]);
+
+    // Check for validity
+    lIsSuitable = TRUE;
+    lIsLinear   = TRUE;
+    for (t=0; (lIsSuitable && (t < OriginalLut ->InputChannels)); t++) {
+
+        // Exclude if already linear
+        if (!cmsIsToneCurveLinear(Trans[t]))
+            lIsLinear = FALSE;
+
+        // Exclude if non-monotonic
+        if (!cmsIsToneCurveMonotonic(Trans[t]))
+            lIsSuitable = FALSE;
+
+        if (IsDegenerated(Trans[t]))
+            lIsSuitable = FALSE;
+    }
+
+    // If it is not suitable, just quit
+    if (!lIsSuitable) goto Error;
+
+    // Invert curves if possible
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        TransReverse[t] = cmsReverseToneCurveEx(PRELINEARIZATION_POINTS, Trans[t]);
+        if (TransReverse[t] == NULL) goto Error;
+    }
+
+    // Now inset the reversed curves at the begin of transform
+    LutPlusCurves = cmsPipelineDup(OriginalLut);
+    if (LutPlusCurves == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LutPlusCurves, cmsAT_BEGIN, cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, TransReverse)))
+        goto Error;
+
+    // Create the result LUT
+    OptimizedLUT = cmsPipelineAlloc(OriginalLut ->ContextID, OriginalLut ->InputChannels, OriginalLut ->OutputChannels);
+    if (OptimizedLUT == NULL) goto Error;
+
+    OptimizedPrelinMpe = cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, Trans);
+
+    // Create and insert the curves at the beginning
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedPrelinMpe))
+        goto Error;
+
+    // Allocate the CLUT for result
+    OptimizedCLUTmpe = cmsStageAllocCLut16bit(OriginalLut ->ContextID, nGridPoints, OriginalLut ->InputChannels, OriginalLut ->OutputChannels, NULL);
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_END, OptimizedCLUTmpe))
+        goto Error;
+
+    // Resample the LUT
+    if (!cmsStageSampleCLut16bit(OptimizedCLUTmpe, XFormSampler16, (void*) LutPlusCurves, 0)) goto Error;
+
+    // Free resources
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    cmsPipelineFree(LutPlusCurves);
+
+
+    OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
+    OptimizedPrelinCLUT   = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
+
+    // Set the evaluator if 8-bit
+    if (_cmsFormatterIs8bit(*InputFormat)) {
+
+        Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
+                                                OptimizedPrelinCLUT ->Params,
+                                                OptimizedPrelinCurves);
+        if (p8 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
+
+    }
+    else
+    {
+        Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
+            OptimizedPrelinCLUT ->Params,
+            3, OptimizedPrelinCurves, 3, NULL);
+        if (p16 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+
+    }
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
+
+            return FALSE;
+        }
+    }
+
+    // And return the obtained LUT
+
+    cmsPipelineFree(OriginalLut);
+    *Lut = OptimizedLUT;
+    return TRUE;
+
+Error:
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    if (LutPlusCurves != NULL) cmsPipelineFree(LutPlusCurves);
+    if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
+
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(lIsLinear);
+}
+
+
+// Curves optimizer ------------------------------------------------------------------------------------------------------------------
+
+static
+void CurvesFree(cmsContext ContextID, void* ptr)
+{
+     Curves16Data* Data = (Curves16Data*) ptr;
+     cmsUInt32Number i;
+
+     for (i=0; i < Data -> nCurves; i++) {
+
+         _cmsFree(ContextID, Data ->Curves[i]);
+     }
+
+     _cmsFree(ContextID, Data ->Curves);
+     _cmsFree(ContextID, ptr);
+}
+
+static
+void* CurvesDup(cmsContext ContextID, const void* ptr)
+{
+    Curves16Data* Data = (Curves16Data*)_cmsDupMem(ContextID, ptr, sizeof(Curves16Data));
+    cmsUInt32Number i;
+
+    if (Data == NULL) return NULL;
+
+    Data->Curves = (cmsUInt16Number**) _cmsDupMem(ContextID, Data->Curves, Data->nCurves * sizeof(cmsUInt16Number*));
+
+    for (i=0; i < Data -> nCurves; i++) {
+        Data->Curves[i] = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements * sizeof(cmsUInt16Number));
+    }
+
+    return (void*) Data;
+}
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G)
+{
+    cmsUInt32Number i, j;
+    Curves16Data* c16;
+
+    c16 = (Curves16Data*)_cmsMallocZero(ContextID, sizeof(Curves16Data));
+    if (c16 == NULL) return NULL;
+
+    c16 ->nCurves = nCurves;
+    c16 ->nElements = nElements;
+
+    c16->Curves = (cmsUInt16Number**) _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*));
+    if (c16->Curves == NULL) {
+        _cmsFree(ContextID, c16);
+        return NULL;
+    }
+
+    for (i=0; i < nCurves; i++) {
+
+        c16->Curves[i] = (cmsUInt16Number*) _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number));
+
+        if (c16->Curves[i] == NULL) {
+
+            for (j=0; j < i; j++) {
+                _cmsFree(ContextID, c16->Curves[j]);
+            }
+            _cmsFree(ContextID, c16->Curves);
+            _cmsFree(ContextID, c16);
+            return NULL;
+        }
+
+        if (nElements == 256U) {
+
+            for (j=0; j < nElements; j++) {
+
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
+            }
+        }
+        else {
+
+            for (j=0; j < nElements; j++) {
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
+            }
+        }
+    }
+
+    return c16;
+}
+
+static
+void FastEvaluateCurves8(CMSREGISTER const cmsUInt16Number In[],
+                          CMSREGISTER cmsUInt16Number Out[],
+                          CMSREGISTER const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    int x;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+
+         x = (In[i] >> 8);
+         Out[i] = Data -> Curves[i][x];
+    }
+}
+
+
+static
+void FastEvaluateCurves16(CMSREGISTER const cmsUInt16Number In[],
+                          CMSREGISTER cmsUInt16Number Out[],
+                          CMSREGISTER const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+         Out[i] = Data -> Curves[i][In[i]];
+    }
+}
+
+
+static
+void FastIdentity16(CMSREGISTER const cmsUInt16Number In[],
+                    CMSREGISTER cmsUInt16Number Out[],
+                    CMSREGISTER const void* D)
+{
+    cmsPipeline* Lut = (cmsPipeline*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Lut ->InputChannels; i++) {
+         Out[i] = In[i];
+    }
+}
+
+
+// If the target LUT holds only curves, the optimization procedure is to join all those
+// curves together. That only works on curves and does not work on matrices.
+static
+cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsToneCurve** GammaTables = NULL;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i, j;
+    cmsPipeline* Src = *Lut;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* ObtainedCurves = NULL;
+
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    //  Only curves in this LUT?
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (Dest == NULL) return FALSE;
+
+    // Create target curves
+    GammaTables = (cmsToneCurve**) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (GammaTables[i] == NULL) goto Error;
+    }
+
+    // Compute 16 bit result by using floating point
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        for (j=0; j < Src ->InputChannels; j++)
+            InFloat[j] = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        cmsPipelineEvalFloat(InFloat, OutFloat, Src);
+
+        for (j=0; j < Src ->InputChannels; j++)
+            GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0);
+    }
+
+    ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
+    if (ObtainedCurves == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        cmsFreeToneCurve(GammaTables[i]);
+        GammaTables[i] = NULL;
+    }
+
+    if (GammaTables != NULL) {
+        _cmsFree(Src->ContextID, GammaTables);
+        GammaTables = NULL;
+    }
+
+    // Maybe the curves are linear at the end
+    if (!AllCurvesAreLinear(ObtainedCurves)) {
+       _cmsStageToneCurvesData* Data;
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
+            goto Error;
+        Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
+        ObtainedCurves = NULL;
+
+        // If the curves are to be applied in 8 bits, we can save memory
+        if (_cmsFormatterIs8bit(*InputFormat)) {
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 256, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error;
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
+
+        }
+        else {
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 65536, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error;
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
+        }
+    }
+    else {
+
+        // LUT optimizes to nothing. Set the identity LUT
+        cmsStageFree(ObtainedCurves);
+        ObtainedCurves = NULL;
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
+            goto Error;
+
+        *dwFlags |= cmsFLAGS_NOCACHE;
+        _cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
+    }
+
+    // We are done.
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+
+Error:
+
+    if (ObtainedCurves != NULL) cmsStageFree(ObtainedCurves);
+    if (GammaTables != NULL) {
+        for (i=0; i < Src ->InputChannels; i++) {
+            if (GammaTables[i] != NULL) cmsFreeToneCurve(GammaTables[i]);
+        }
+
+        _cmsFree(Src ->ContextID, GammaTables);
+    }
+
+    if (Dest != NULL) cmsPipelineFree(Dest);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(InputFormat);
+    cmsUNUSED_PARAMETER(OutputFormat);
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
+
+
+static
+void  FreeMatShaper(cmsContext ContextID, void* Data)
+{
+    if (Data != NULL) _cmsFree(ContextID, Data);
+}
+
+static
+void* DupMatShaper(cmsContext ContextID, const void* Data)
+{
+    return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
+}
+
+
+// A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
+// to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
+// in total about 50K, and the performance boost is huge!
+static
+void MatShaperEval16(CMSREGISTER const cmsUInt16Number In[],
+                     CMSREGISTER cmsUInt16Number Out[],
+                     CMSREGISTER const void* D)
+{
+    MatShaper8Data* p = (MatShaper8Data*) D;
+    cmsS1Fixed14Number l1, l2, l3, r, g, b;
+    cmsUInt32Number ri, gi, bi;
+
+    // In this case (and only in this case!) we can use this simplification since
+    // In[] is assured to come from a 8 bit number. (a << 8 | a)
+    ri = In[0] & 0xFFU;
+    gi = In[1] & 0xFFU;
+    bi = In[2] & 0xFFU;
+
+    // Across first shaper, which also converts to 1.14 fixed point
+    r = p->Shaper1R[ri];
+    g = p->Shaper1G[gi];
+    b = p->Shaper1B[bi];
+
+    // Evaluate the matrix in 1.14 fixed point
+    l1 =  (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14;
+    l2 =  (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14;
+    l3 =  (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14;
+
+    // Now we have to clip to 0..1.0 range
+    ri = (l1 < 0) ? 0 : ((l1 > 16384) ? 16384U : (cmsUInt32Number) l1);
+    gi = (l2 < 0) ? 0 : ((l2 > 16384) ? 16384U : (cmsUInt32Number) l2);
+    bi = (l3 < 0) ? 0 : ((l3 > 16384) ? 16384U : (cmsUInt32Number) l3);
+
+    // And across second shaper,
+    Out[0] = p->Shaper2R[ri];
+    Out[1] = p->Shaper2G[gi];
+    Out[2] = p->Shaper2B[bi];
+
+}
+
+// This table converts from 8 bits to 1.14 after applying the curve
+static
+void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
+{
+    int i;
+    cmsFloat32Number R, y;
+
+    for (i=0; i < 256; i++) {
+
+        R   = (cmsFloat32Number) (i / 255.0);
+        y   = cmsEvalToneCurveFloat(Curve, R);
+
+        if (y < 131072.0)
+            Table[i] = DOUBLE_TO_1FIXED14(y);
+        else
+            Table[i] = 0x7fffffff;
+    }
+}
+
+// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
+static
+void FillSecondShaper(cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput)
+{
+    int i;
+    cmsFloat32Number R, Val;
+
+    for (i=0; i < 16385; i++) {
+
+        R   = (cmsFloat32Number) (i / 16384.0);
+        Val = cmsEvalToneCurveFloat(Curve, R);    // Val comes 0..1.0
+
+        if (Val < 0)
+            Val = 0;
+
+        if (Val > 1.0)
+            Val = 1.0;
+
+        if (Is8BitsOutput) {
+
+            // If 8 bits output, we can optimize further by computing the / 257 part.
+            // first we compute the resulting byte and then we store the byte times
+            // 257. This quantization allows to round very quick by doing a >> 8, but
+            // since the low byte is always equal to msb, we can do a & 0xff and this works!
+            cmsUInt16Number w = _cmsQuickSaturateWord(Val * 65535.0);
+            cmsUInt8Number  b = FROM_16_TO_8(w);
+
+            Table[i] = FROM_8_TO_16(b);
+        }
+        else Table[i]  = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+}
+
+// Compute the matrix-shaper structure
+static
+cmsBool SetMatShaper(cmsPipeline* Dest, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3], cmsUInt32Number* OutputFormat)
+{
+    MatShaper8Data* p;
+    int i, j;
+    cmsBool Is8Bits = _cmsFormatterIs8bit(*OutputFormat);
+
+    // Allocate a big chuck of memory to store precomputed tables
+    p = (MatShaper8Data*) _cmsMalloc(Dest ->ContextID, sizeof(MatShaper8Data));
+    if (p == NULL) return FALSE;
+
+    p -> ContextID = Dest -> ContextID;
+
+    // Precompute tables
+    FillFirstShaper(p ->Shaper1R, Curve1[0]);
+    FillFirstShaper(p ->Shaper1G, Curve1[1]);
+    FillFirstShaper(p ->Shaper1B, Curve1[2]);
+
+    FillSecondShaper(p ->Shaper2R, Curve2[0], Is8Bits);
+    FillSecondShaper(p ->Shaper2G, Curve2[1], Is8Bits);
+    FillSecondShaper(p ->Shaper2B, Curve2[2], Is8Bits);
+
+    // Convert matrix to nFixed14. Note that those values may take more than 16 bits 
+    for (i=0; i < 3; i++) {
+        for (j=0; j < 3; j++) {
+            p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
+        }
+    }
+
+    for (i=0; i < 3; i++) {
+
+        if (Off == NULL) {
+            p ->Off[i] = 0;
+        }
+        else {
+            p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
+        }
+    }
+
+    // Mark as optimized for faster formatter
+    if (Is8Bits)
+        *OutputFormat |= OPTIMIZED_SH(1);
+
+    // Fill function pointers
+    _cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
+    return TRUE;
+}
+
+//  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
+static
+cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+       cmsStage* Curve1, *Curve2;
+       cmsStage* Matrix1, *Matrix2;
+       cmsMAT3 res;
+       cmsBool IdentityMat;
+       cmsPipeline* Dest, *Src;
+       cmsFloat64Number* Offset;
+
+       // Only works on RGB to RGB
+       if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE;
+
+       // Only works on 8 bit input
+       if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE;
+
+       // Seems suitable, proceed
+       Src = *Lut;
+
+       // Check for:
+       // 
+       //    shaper-matrix-matrix-shaper 
+       //    shaper-matrix-shaper
+       // 
+       // Both of those constructs are possible (first because abs. colorimetric). 
+       // additionally, In the first case, the input matrix offset should be zero.
+
+       IdentityMat = FALSE;
+       if (cmsPipelineCheckAndRetreiveStages(Src, 4,
+              cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+              &Curve1, &Matrix1, &Matrix2, &Curve2)) {
+
+              // Get both matrices
+              _cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+              _cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(Matrix2);
+
+              // Input offset should be zero
+              if (Data1->Offset != NULL) return FALSE;
+
+              // Multiply both matrices to get the result
+              _cmsMAT3per(&res, (cmsMAT3*)Data2->Double, (cmsMAT3*)Data1->Double);
+
+              // Only 2nd matrix has offset, or it is zero 
+              Offset = Data2->Offset;
+
+              // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
+              if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                     // We can get rid of full matrix
+                     IdentityMat = TRUE;
+              }
+
+       }
+       else {
+
+              if (cmsPipelineCheckAndRetreiveStages(Src, 3,
+                     cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                     &Curve1, &Matrix1, &Curve2)) {
+
+                     _cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+
+                     // Copy the matrix to our result
+                     memcpy(&res, Data->Double, sizeof(res));
+
+                     // Preserve the Odffset (may be NULL as a zero offset)
+                     Offset = Data->Offset;
+
+                     if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                            // We can get rid of full matrix
+                            IdentityMat = TRUE;
+                     }
+              }
+              else
+                     return FALSE; // Not optimizeable this time
+
+       }
+
+      // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Assamble the new LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
+        goto Error;
+
+    if (!IdentityMat) {
+
+           if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
+                  goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
+        goto Error;
+
+    // If identity on matrix, we can further optimize the curves, so call the join curves routine
+    if (IdentityMat) {
+
+        OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+    else {
+        _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
+        _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
+
+        // In this particular optimization, cache does not help as it takes more time to deal with
+        // the cache that with the pixel handling
+        *dwFlags |= cmsFLAGS_NOCACHE;
+
+        // Setup the optimizarion routines
+        SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
+    }
+
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+Error:
+    // Leave Src unchanged
+    cmsPipelineFree(Dest);
+    return FALSE;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// Optimization plug-ins
+
+// List of optimizations
+typedef struct _cmsOptimizationCollection_st {
+
+    _cmsOPToptimizeFn  OptimizePtr;
+
+    struct _cmsOptimizationCollection_st *Next;
+
+} _cmsOptimizationCollection;
+
+
+// The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling
+static _cmsOptimizationCollection DefaultOptimization[] = {
+
+    { OptimizeByJoiningCurves,            &DefaultOptimization[1] },
+    { OptimizeMatrixShaper,               &DefaultOptimization[2] },
+    { OptimizeByComputingLinearization,   &DefaultOptimization[3] },
+    { OptimizeByResampling,               NULL }
+};
+
+// The linked list head
+_cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginOptimizationList(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+   _cmsOptimizationPluginChunkType newHead = { NULL };
+   _cmsOptimizationCollection*  entry;
+   _cmsOptimizationCollection*  Anterior = NULL;
+   _cmsOptimizationPluginChunkType* head = (_cmsOptimizationPluginChunkType*) src->chunks[OptimizationPlugin];
+
+    _cmsAssert(ctx != NULL);
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->OptimizationCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsOptimizationCollection *newEntry = ( _cmsOptimizationCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsOptimizationCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.OptimizationCollection == NULL)
+                newHead.OptimizationCollection = newEntry;
+    }
+
+  ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsOptimizationPluginChunkType));
+}
+
+void  _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+  if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginOptimizationList(ctx, src);
+    }
+    else {
+        static _cmsOptimizationPluginChunkType OptimizationPluginChunkType = { NULL };
+        ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx ->MemPool, &OptimizationPluginChunkType, sizeof(_cmsOptimizationPluginChunkType));
+    }
+}
+
+
+// Register new ways to optimize
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginOptimization* Plugin = (cmsPluginOptimization*) Data;
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* fl;
+
+    if (Data == NULL) {
+
+        ctx->OptimizationCollection = NULL;
+        return TRUE;
+    }
+
+    // Optimizer callback is required
+    if (Plugin ->OptimizePtr == NULL) return FALSE;
+
+    fl = (_cmsOptimizationCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsOptimizationCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->OptimizePtr = Plugin ->OptimizePtr;
+
+    // Keep linked list
+    fl ->Next = ctx->OptimizationCollection;
+
+    // Set the head
+    ctx ->OptimizationCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+// The entry point for LUT optimization
+cmsBool _cmsOptimizePipeline(cmsContext ContextID,
+                             cmsPipeline**    PtrLut,
+                             cmsUInt32Number  Intent,
+                             cmsUInt32Number* InputFormat,
+                             cmsUInt32Number* OutputFormat,
+                             cmsUInt32Number* dwFlags)
+{
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* Opts;
+    cmsBool AnySuccess = FALSE;
+
+    // A CLUT is being asked, so force this specific optimization
+    if (*dwFlags & cmsFLAGS_FORCE_CLUT) {
+
+        PreOptimize(*PtrLut);
+        return OptimizeByResampling(PtrLut, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+
+    // Anything to optimize?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Try to get rid of identities and trivial conversions.
+    AnySuccess = PreOptimize(*PtrLut);
+
+    // After removal do we end with an identity?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Do not optimize, keep all precision
+    if (*dwFlags & cmsFLAGS_NOOPTIMIZE)
+        return FALSE;
+
+    // Try plug-in optimizations 
+    for (Opts = ctx->OptimizationCollection;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            // If one schema succeeded, we are done
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;    // Optimized!
+            }
+    }
+
+   // Try built-in optimizations 
+    for (Opts = DefaultOptimization;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;  
+            }
+    }
+
+    // Only simple optimizations succeeded
+    return AnySuccess;
+}
+
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmspack.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmspack.c
new file mode 100644
index 0000000000..db34969ef5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmspack.c
@@ -0,0 +1,3433 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module handles all formats supported by lcms. There are two flavors, 16 bits and
+// floating point. Floating point is supported only in a subset, those formats holding
+// cmsFloat32Number (4 bytes per component) and double (marked as 0 bytes per component
+// as special case)
+
+// ---------------------------------------------------------------------------
+
+
+// This macro return words stored as big endian
+#define CHANGE_ENDIAN(w)    (cmsUInt16Number) ((cmsUInt16Number) ((w)<<8)|((w)>>8))
+
+// These macros handles reversing (negative)
+#define REVERSE_FLAVOR_8(x)     ((cmsUInt8Number) (0xff-(x)))
+#define REVERSE_FLAVOR_16(x)    ((cmsUInt16Number)(0xffff-(x)))
+
+// * 0xffff / 0xff00 = (255 * 257) / (255 * 256) = 257 / 256
+cmsINLINE cmsUInt16Number FomLabV2ToLabV4(cmsUInt16Number x)
+{
+    int a = (x << 8 | x) >> 8;  // * 257 / 256
+    if ( a > 0xffff) return 0xffff;
+    return (cmsUInt16Number) a;
+}
+
+// * 0xf00 / 0xffff = * 256 / 257
+cmsINLINE cmsUInt16Number FomLabV4ToLabV2(cmsUInt16Number x)
+{
+    return (cmsUInt16Number) (((x << 8) + 0x80) / 257);
+}
+
+
+typedef struct {
+    cmsUInt32Number Type;
+    cmsUInt32Number Mask;
+    cmsFormatter16  Frm;
+
+} cmsFormatters16;
+
+typedef struct {
+    cmsUInt32Number    Type;
+    cmsUInt32Number    Mask;
+    cmsFormatterFloat  Frm;
+
+} cmsFormattersFloat;
+
+
+#define ANYSPACE        COLORSPACE_SH(31)
+#define ANYCHANNELS     CHANNELS_SH(15)
+#define ANYEXTRA        EXTRA_SH(7)
+#define ANYPLANAR       PLANAR_SH(1)
+#define ANYENDIAN       ENDIAN16_SH(1)
+#define ANYSWAP         DOSWAP_SH(1)
+#define ANYSWAPFIRST    SWAPFIRST_SH(1)
+#define ANYFLAVOR       FLAVOR_SH(1)
+
+
+// Suppress waning about info never being used
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+// Unpacking routines (16 bits) ----------------------------------------------------------------------------------------
+
+
+// Does almost everything but is slow
+static
+cmsUInt8Number* UnrollChunkyBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number v;
+    cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_8_TO_16(*accum);
+        v = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        wIn[index] = v;
+        accum++;
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+
+}
+
+// Extra channels are just ignored because come in the next planes
+static
+cmsUInt8Number* UnrollPlanarBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->InputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap ^ SwapFirst) {
+        accum += T_EXTRA(info -> InputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = FROM_8_TO_16(*accum);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        accum += Stride;
+    }
+
+    return (Init + 1);
+}
+
+// Special cases, provided for performance
+static
+cmsUInt8Number* Unroll4Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // C
+    wIn[1] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // M
+    wIn[2] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // Y
+    wIn[3] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                          CMSREGISTER cmsUInt16Number wIn[],
+                                          CMSREGISTER cmsUInt8Number* accum,
+                                          CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info, 
+                                              CMSREGISTER cmsUInt16Number wIn[], 
+                                              CMSREGISTER cmsUInt8Number* accum,
+                                              CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    accum++; // A
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info, 
+                                           CMSREGISTER cmsUInt16Number wIn[], 
+                                           CMSREGISTER cmsUInt8Number* accum,
+                                           CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// BRG
+static
+cmsUInt8Number* Unroll3BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                              CMSREGISTER cmsUInt16Number wIn[],
+                              CMSREGISTER cmsUInt8Number* accum,
+                              CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollALabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++;  // A
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_16(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // L
+    wIn[1] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // a
+    wIn[2] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// for duplex
+static
+cmsUInt8Number* Unroll2Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                                     CMSREGISTER cmsUInt16Number wIn[],
+                                     CMSREGISTER cmsUInt8Number* accum,
+                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // ch1
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+
+// Monochrome duplicates L into RGB for null-transforms
+static
+cmsUInt8Number* Unroll1Byte(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wIn[],
+                            CMSREGISTER cmsUInt8Number* accum,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Unroll1ByteSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 1;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteSkip2(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(FROM_8_TO_16(*accum)); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* UnrollAnyWords(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+   cmsUInt32Number nChan       = T_CHANNELS(info -> InputFormat);
+   cmsUInt32Number SwapEndian  = T_ENDIAN16(info -> InputFormat);
+   cmsUInt32Number DoSwap      = T_DOSWAP(info ->InputFormat);
+   cmsUInt32Number Reverse     = T_FLAVOR(info ->InputFormat);
+   cmsUInt32Number SwapFirst   = T_SWAPFIRST(info -> InputFormat);
+   cmsUInt32Number Extra       = T_EXTRA(info -> InputFormat);
+   cmsUInt32Number ExtraFirst  = DoSwap ^ SwapFirst;
+   cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollPlanarWords(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap= T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse= T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap) {
+        accum += T_EXTRA(info -> InputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum +=  Stride;
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+
+static
+cmsUInt8Number* Unroll4Words(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // C
+    wIn[1] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // M
+    wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // Y
+    wIn[3] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                          CMSREGISTER cmsUInt16Number wIn[],
+                                          CMSREGISTER cmsUInt8Number* accum,
+                                          CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Words(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                           CMSREGISTER cmsUInt16Number wIn[],
+                                           CMSREGISTER cmsUInt8Number* accum,
+                                           CMSREGISTER cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1Word(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wIn[],
+                            CMSREGISTER cmsUInt8Number* accum,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;   // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordSkip3(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum;
+
+    accum += 8;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll2Words(CMSREGISTER _cmsTRANSFORM* info,
+                                     CMSREGISTER cmsUInt16Number wIn[],
+                                     CMSREGISTER cmsUInt8Number* accum,
+                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2;    // ch1
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2;    // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// This is a conversion of Lab double to 16 bits
+static
+cmsUInt8Number* UnrollLabDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number  Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIELab Lab;
+        cmsUInt8Number* pos_L;
+        cmsUInt8Number* pos_a;
+        cmsUInt8Number* pos_b;
+        
+        pos_L = accum;
+        pos_a = accum + Stride;
+        pos_b = accum + Stride * 2;
+
+        Lab.L = *(cmsFloat64Number*) pos_L;
+        Lab.a = *(cmsFloat64Number*) pos_a;
+        Lab.b = *(cmsFloat64Number*) pos_b;
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsFloat2LabEncoded(wIn, (cmsCIELab*) accum);
+        accum += sizeof(cmsCIELab) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+        return accum;
+    }
+}
+
+
+// This is a conversion of Lab float to 16 bits
+static
+cmsUInt8Number* UnrollLabFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number  Stride)
+{
+    cmsCIELab Lab;
+    
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsUInt8Number* pos_L;
+        cmsUInt8Number* pos_a;
+        cmsUInt8Number* pos_b;
+
+        pos_L = accum;
+        pos_a = accum + Stride;
+        pos_b = accum + Stride * 2;
+
+        Lab.L = *(cmsFloat32Number*)pos_L;
+        Lab.a = *(cmsFloat32Number*)pos_a;
+        Lab.b = *(cmsFloat32Number*)pos_b;
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+ 
+        Lab.L = ((cmsFloat32Number*) accum)[0];
+        Lab.a = ((cmsFloat32Number*) accum)[1];
+        Lab.b = ((cmsFloat32Number*) accum)[2];
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        accum += (3 + T_EXTRA(info ->InputFormat)) * sizeof(cmsFloat32Number);
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ double to 16 bits
+static
+cmsUInt8Number* UnrollXYZDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsUInt8Number* pos_X;
+        cmsUInt8Number* pos_Y;
+        cmsUInt8Number* pos_Z;
+
+        pos_X = accum;
+        pos_Y = accum + Stride;
+        pos_Z = accum + Stride * 2;
+
+        XYZ.X = *(cmsFloat64Number*)pos_X;
+        XYZ.Y = *(cmsFloat64Number*)pos_Y;
+        XYZ.Z = *(cmsFloat64Number*)pos_Z;
+
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+
+    }
+
+    else {
+        cmsFloat2XYZEncoded(wIn, (cmsCIEXYZ*) accum);
+        accum += sizeof(cmsCIEXYZ) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ float to 16 bits
+static
+cmsUInt8Number* UnrollXYZFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wIn[],
+                                   CMSREGISTER cmsUInt8Number* accum,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsUInt8Number* pos_X;
+        cmsUInt8Number* pos_Y;
+        cmsUInt8Number* pos_Z;
+
+        pos_X = accum;
+        pos_Y = accum + Stride;
+        pos_Z = accum + Stride * 2;
+
+        XYZ.X = *(cmsFloat32Number*)pos_X;
+        XYZ.Y = *(cmsFloat32Number*)pos_Y;
+        XYZ.Z = *(cmsFloat32Number*)pos_Z;
+
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+
+    }
+
+    else {
+        cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+        cmsCIEXYZ XYZ;
+
+        XYZ.X = Pt[0];
+        XYZ.Y = Pt[1];
+        XYZ.Z = Pt[2];
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        accum += 3 * sizeof(cmsFloat32Number) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat32Number);
+
+        return accum;
+    }
+}
+
+// Check if space is marked as ink
+cmsINLINE cmsBool IsInkSpace(cmsUInt32Number Type)
+{
+    switch (T_COLORSPACE(Type)) {
+
+     case PT_CMY:
+     case PT_CMYK:
+     case PT_MCH5:
+     case PT_MCH6:
+     case PT_MCH7:
+     case PT_MCH8:
+     case PT_MCH9:
+     case PT_MCH10:
+     case PT_MCH11:
+     case PT_MCH12:
+     case PT_MCH13:
+     case PT_MCH14:
+     case PT_MCH15: return TRUE;
+
+     default: return FALSE;
+    }
+}
+
+// Return the size in bytes of a given formatter
+static
+cmsUInt32Number PixelSize(cmsUInt32Number Format)
+{
+    cmsUInt32Number fmt_bytes = T_BYTES(Format);
+
+    // For double, the T_BYTES field is zero
+    if (fmt_bytes == 0)
+        return sizeof(cmsUInt64Number);
+
+    // Otherwise, it is already correct for all formats
+    return fmt_bytes;
+}
+
+// Inks does come in percentage, remaining cases are between 0..1.0, again to 16 bits
+static
+cmsUInt8Number* UnrollDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+static
+cmsUInt8Number* UnrollFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+
+// For 1 channel, we need to duplicate data (it comes in 0..1.0 range)
+static
+cmsUInt8Number* UnrollDouble1Chan(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Inks = (cmsFloat64Number*) accum;
+
+    wIn[0] = wIn[1] = wIn[2] = _cmsQuickSaturateWord(Inks[0] * 65535.0);
+
+    return accum + sizeof(cmsFloat64Number);
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+//-------------------------------------------------------------------------------------------------------------------
+
+// For anything going from cmsFloat32Number
+static
+cmsUInt8Number* UnrollFloatsToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+// For anything going from double
+
+static
+cmsUInt8Number* UnrollDoublesToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 100.0 : 1.0;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[(i + start)  * Stride];
+        else
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = (cmsFloat32Number) (Reverse ? 1.0 - v : v);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                 // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                 // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// 1.15 fixed point, that means maximum value is MAX_ENCODEABLE_XYZ (0xFFFF)
+static
+cmsUInt8Number* UnrollXYZDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+static
+cmsUInt8Number* UnrollXYZFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// Packing routines -----------------------------------------------------------------------------------------------------------
+
+
+// Generic chunky for byte
+
+static
+cmsUInt8Number* PackAnyBytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt8Number* swap1;
+    cmsUInt8Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = output;
+
+    if (ExtraFirst) {
+        output += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_16_TO_8(wOut[index]);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_8(v);
+
+        *output++ = v;
+    }
+
+    if (!ExtraFirst) {
+        output += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, nChan-1);
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* PackAnyWords(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number* swap1;
+    cmsUInt16Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = (cmsUInt16Number*) output;
+
+    if (ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+
+        output += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsUInt16Number));
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->OutputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+
+
+    if (DoSwap ^ SwapFirst) {
+        output += T_EXTRA(info -> OutputFormat) * Stride;
+    }
+
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt8Number v = FROM_16_TO_8(wOut[index]);
+
+        *(cmsUInt8Number*)  output = (cmsUInt8Number) (Reverse ? REVERSE_FLAVOR_8(v) : v);
+        output += Stride;
+    }
+
+    return (Init + 1);
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarWords(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+    cmsUInt16Number v;
+
+    if (DoSwap) {
+        output += T_EXTRA(info -> OutputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v =  REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+        output += Stride;
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+// CMYKcm (unrolled for speed)
+
+static
+cmsUInt8Number* Pack6Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[5]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+
+static
+cmsUInt8Number* Pack6BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[5]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYKcm
+static
+cmsUInt8Number* Pack6Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+static
+cmsUInt8Number* Pack6WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[0]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[1]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[2]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[3]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4BytesSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4WordsReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYK
+static
+cmsUInt8Number* Pack4WordsBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackLabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wOut[],
+                            CMSREGISTER cmsUInt8Number* output,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackALabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackLabV2_16(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[0]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[1]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[2]);
+    output += 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwapOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Optimized(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirstOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                     CMSREGISTER cmsUInt16Number wOut[],
+                                                     CMSREGISTER cmsUInt8Number* output,
+                                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                       CMSREGISTER cmsUInt16Number wOut[],
+                                       CMSREGISTER cmsUInt8Number* output,
+                                       CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirstOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                         CMSREGISTER cmsUInt16Number wOut[],
+                                                         CMSREGISTER cmsUInt8Number* output,
+                                                         CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                       CMSREGISTER cmsUInt16Number wOut[],
+                                       CMSREGISTER cmsUInt8Number* output,
+                                       CMSREGISTER cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* Pack1Byte(CMSREGISTER _cmsTRANSFORM* info,
+                          CMSREGISTER cmsUInt16Number wOut[],
+                          CMSREGISTER cmsUInt8Number* output,
+                          CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(REVERSE_FLAVOR_16(wOut[0]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1Word(CMSREGISTER _cmsTRANSFORM* info,
+                          CMSREGISTER cmsUInt16Number wOut[],
+                          CMSREGISTER cmsUInt8Number* output,
+                          CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 4;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    output += 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// Unencoded Float values -- don't try optimize speed
+static
+cmsUInt8Number* PackLabDoubleFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+
+    if (T_PLANAR(info -> OutputFormat)) {
+
+        cmsCIELab  Lab;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsLabEncoded2Float(&Lab, wOut);
+
+        Out[0]        = Lab.L;
+        Out[Stride]   = Lab.a;
+        Out[Stride*2] = Lab.b;
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsLabEncoded2Float((cmsCIELab*) output, wOut);
+        return output + (sizeof(cmsCIELab) + T_EXTRA(info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+
+static
+cmsUInt8Number* PackLabFloatFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsCIELab  Lab;
+    cmsLabEncoded2Float(&Lab, wOut);
+
+    if (T_PLANAR(info -> OutputFormat)) {
+       
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+    
+        Stride /= PixelSize(info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number)Lab.L;
+        Out[Stride]   = (cmsFloat32Number)Lab.a;
+        Out[Stride*2] = (cmsFloat32Number)Lab.b;
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+       ((cmsFloat32Number*) output)[0] = (cmsFloat32Number) Lab.L;
+       ((cmsFloat32Number*) output)[1] = (cmsFloat32Number) Lab.a;
+       ((cmsFloat32Number*) output)[2] = (cmsFloat32Number) Lab.b;
+
+        return output + (3 + T_EXTRA(info ->OutputFormat)) * sizeof(cmsFloat32Number);
+    }
+}
+
+static
+cmsUInt8Number* PackXYZDoubleFrom16(CMSREGISTER _cmsTRANSFORM* Info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = XYZ.X;
+        Out[Stride]   = XYZ.Y;
+        Out[Stride*2] = XYZ.Z;
+
+        return output + sizeof(cmsFloat64Number);
+
+    }
+    else {
+
+        cmsXYZEncoded2Float((cmsCIEXYZ*) output, wOut);
+
+        return output + (sizeof(cmsCIEXYZ) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+static
+cmsUInt8Number* PackXYZFloatFrom16(CMSREGISTER _cmsTRANSFORM* Info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) XYZ.X;
+        Out[Stride]   = (cmsFloat32Number) XYZ.Y;
+        Out[Stride*2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + sizeof(cmsFloat32Number);
+
+    }
+    else {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Out[0] = (cmsFloat32Number) XYZ.X;
+        Out[1] = (cmsFloat32Number) XYZ.Y;
+        Out[2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + (3 * sizeof(cmsFloat32Number) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+}
+
+static
+cmsUInt8Number* PackDoubleFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number Planar     = T_PLANAR(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsFloat64Number maximum = IsInkSpace(info ->OutputFormat) ? 655.35 : 65535.0;
+    cmsFloat64Number v = 0;
+    cmsFloat64Number* swap1 = (cmsFloat64Number*) output;
+    cmsUInt32Number i, start = 0;
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+        start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = (cmsFloat64Number) wOut[index] / maximum;
+
+        if (Reverse)
+            v = maximum - v;
+
+        if (Planar)
+            ((cmsFloat64Number*) output)[(i + start)  * Stride]= v;
+        else
+            ((cmsFloat64Number*) output)[i + start] = v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+
+         memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsFloat64Number));
+        *swap1 = v;
+    }
+
+    if (T_PLANAR(info -> OutputFormat))
+        return output + sizeof(cmsFloat64Number);
+    else
+        return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+static
+cmsUInt8Number* PackFloatFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 655.35 : 65535.0;
+       cmsFloat64Number v = 0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat64Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start) * Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+       
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+static
+cmsUInt8Number* PackFloatsFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsFloat64Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start)* Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+static
+cmsUInt8Number* PackDoublesFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat64Number v = 0;
+       cmsFloat64Number* swap1 = (cmsFloat64Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat64Number*)output)[(i + start) * Stride] = v;
+              else
+                     ((cmsFloat64Number*)output)[i + start] = v;
+       }
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat64Number));
+              *swap1 = v;
+       }
+
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat64Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+
+
+
+static
+cmsUInt8Number* PackLabFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+
+static
+cmsUInt8Number* PackLabDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// From 0..1 range to 0..MAX_ENCODEABLE_XYZ
+static
+cmsUInt8Number* PackXYZFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+// Same, but convert to double
+static
+cmsUInt8Number* PackXYZDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// ----------------------------------------------------------------------------------------------------------------
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 655.35F : 65535.0F;
+
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        if (Reverse) v = maximum - v;
+
+        wIn[index] = _cmsQuickSaturateWord(v * maximum);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+static
+cmsUInt8Number* PackHalfFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 655.35F : 65535.0F;
+       cmsFloat32Number v = 0;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat32Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start) * Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = _cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+
+static
+cmsUInt8Number* PackHalfFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 100.0F : 1.0F;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsFloat32Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+           cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start)* Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = (cmsUInt16Number)_cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra)* sizeof(cmsUInt16Number);
+}
+
+#endif
+
+// ----------------------------------------------------------------------------------------------------------------
+
+
+static const cmsFormatters16 InputFormatters16[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    { TYPE_Lab_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollLabDoubleTo16},
+    { TYPE_XYZ_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleTo16},
+    { TYPE_Lab_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollLabFloatTo16},
+    { TYPE_XYZ_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZFloatTo16},
+    { TYPE_GRAY_DBL,                                                 0,   UnrollDouble1Chan},
+    { FLOAT_SH(1)|BYTES_SH(0), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollDoubleTo16},
+    { FLOAT_SH(1)|BYTES_SH(4), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollFloatTo16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                            ANYEXTRA|ANYSWAP|ANYSPACE,   UnrollHalfTo16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                              ANYSPACE,  Unroll1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                  ANYSPACE,  Unroll1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(2),                  ANYSPACE,  Unroll1ByteSkip2},
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1ByteReversed},
+    { COLORSPACE_SH(PT_MCH2)|CHANNELS_SH(2)|BYTES_SH(1),              0,  Unroll2Bytes},
+
+    { TYPE_LabV2_8,                                                   0,  UnrollLabV2_8 },
+    { TYPE_ALabV2_8,                                                  0,  UnrollALabV2_8 },
+    { TYPE_LabV2_16,                                                  0,  UnrollLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                              ANYSPACE,  Unroll3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3BytesSwap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3BytesSkip1Swap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3BytesSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),  
+                                                               ANYSPACE,  Unroll3BytesSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(1),                              ANYSPACE,  Unroll4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4BytesSwapSwapFirst},
+
+    { BYTES_SH(1)|PLANAR_SH(1), ANYFLAVOR|ANYSWAPFIRST|
+                                   ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollPlanarBytes},
+
+    { BYTES_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                           ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollChunkyBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                              ANYSPACE,  Unroll1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(3),                  ANYSPACE,  Unroll1WordSkip3},
+
+    { CHANNELS_SH(2)|BYTES_SH(2),                              ANYSPACE,  Unroll2Words},
+    { CHANNELS_SH(3)|BYTES_SH(2),                              ANYSPACE,  Unroll3Words},
+    { CHANNELS_SH(4)|BYTES_SH(2),                              ANYSPACE,  Unroll4Words},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3WordsSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3WordsSkip1Swap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4WordsSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4WordsSwapSwapFirst},
+
+
+    { BYTES_SH(2)|PLANAR_SH(1),  ANYFLAVOR|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollPlanarWords},
+    { BYTES_SH(2),  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollAnyWords},
+};
+
+
+
+static const cmsFormattersFloat InputFormattersFloat[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    {     TYPE_Lab_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollLabDoubleToFloat},
+    {     TYPE_Lab_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollLabFloatToFloat},
+
+    {     TYPE_XYZ_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleToFloat},
+    {     TYPE_XYZ_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollXYZFloatToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                      ANYCHANNELS|ANYSPACE,  UnrollFloatsToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollDoublesToFloat},
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollHalfToFloat},
+#endif
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockInputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    switch (dwFlags) {
+
+    case CMS_PACK_FLAGS_16BITS: {
+        for (i=0; i < sizeof(InputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = InputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+        for (i=0; i < sizeof(InputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = InputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+static const cmsFormatters16 OutputFormatters16[] = {
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+
+    { TYPE_Lab_DBL,                                      ANYPLANAR|ANYEXTRA,  PackLabDoubleFrom16},
+    { TYPE_XYZ_DBL,                                      ANYPLANAR|ANYEXTRA,  PackXYZDoubleFrom16},
+
+    { TYPE_Lab_FLT,                                      ANYPLANAR|ANYEXTRA,  PackLabFloatFrom16},
+    { TYPE_XYZ_FLT,                                      ANYPLANAR|ANYEXTRA,  PackXYZFloatFrom16},
+    
+    { FLOAT_SH(1)|BYTES_SH(0),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackDoubleFrom16},
+    { FLOAT_SH(1)|BYTES_SH(4),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackFloatFrom16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackHalfFrom16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                                  ANYSPACE,  Pack1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1ByteSkip1SwapFirst},
+
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack1ByteReversed},
+
+    { TYPE_LabV2_8,                                                       0,  PackLabV2_8 },
+    { TYPE_ALabV2_8,                                                      0,  PackALabV2_8 },
+    { TYPE_LabV2_16,                                                      0,  PackLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1)|OPTIMIZED_SH(1),                  ANYSPACE,  Pack3BytesOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),      ANYSPACE,  Pack3BytesAndSkip1Optimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|OPTIMIZED_SH(1),     ANYSPACE,  Pack3BytesSwapOptimized},
+
+
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                                  ANYSPACE,  Pack3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack3BytesAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3BytesAndSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1),         ANYSPACE,  Pack3BytesAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack3BytesSwap},
+    { CHANNELS_SH(6)|BYTES_SH(1),                                  ANYSPACE,  Pack6Bytes},
+    { CHANNELS_SH(6)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack6BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1),                                  ANYSPACE,  Pack4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),                  ANYSPACE,  Pack4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),     ANYSPACE,  Pack4BytesSwapSwapFirst},
+
+    { BYTES_SH(1),                 ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyBytes},
+    { BYTES_SH(1)|PLANAR_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                                  ANYSPACE,  Pack1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack1WordSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1WordSkip1SwapFirst},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack1WordBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2),                                  ANYSPACE,  Pack3Words},
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack3WordsBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack3WordsAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),         ANYSPACE,  Pack3WordsAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3WordsAndSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3WordsAndSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(2),                                  ANYSPACE,  Pack4Words},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack4WordsBigEndian},
+
+    { CHANNELS_SH(6)|BYTES_SH(2),                                  ANYSPACE,  Pack6Words},
+    { CHANNELS_SH(6)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack6WordsSwap},
+
+    { BYTES_SH(2)|PLANAR_SH(1),     ANYFLAVOR|ANYENDIAN|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarWords},
+    { BYTES_SH(2),                  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyWords}
+
+};
+
+
+static const cmsFormattersFloat OutputFormattersFloat[] = {
+    //    Type                                          Mask                                 Function
+    //  ----------------------------   ---------------------------------------------------  ----------------------------
+    {     TYPE_Lab_FLT,                                                ANYPLANAR|ANYEXTRA,   PackLabFloatFromFloat},
+    {     TYPE_XYZ_FLT,                                                ANYPLANAR|ANYEXTRA,   PackXYZFloatFromFloat},
+
+    {     TYPE_Lab_DBL,                                                ANYPLANAR|ANYEXTRA,   PackLabDoubleFromFloat},
+    {     TYPE_XYZ_DBL,                                                ANYPLANAR|ANYEXTRA,   PackXYZDoubleFromFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackFloatsFromFloat },
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackDoublesFromFloat },
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2),                                   
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackHalfFromFloat },
+#endif
+
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockOutputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    // Optimization is only a hint
+    dwInput &= ~OPTIMIZED_SH(1);
+
+    switch (dwFlags)
+    {
+
+     case CMS_PACK_FLAGS_16BITS: {
+
+        for (i=0; i < sizeof(OutputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = OutputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+
+        for (i=0; i < sizeof(OutputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = OutputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+
+typedef struct _cms_formatters_factory_list {
+
+    cmsFormatterFactory Factory;
+    struct _cms_formatters_factory_list *Next;
+
+} cmsFormattersFactoryList;
+
+_cmsFormattersPluginChunkType _cmsFormattersPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupFormatterFactoryList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsFormattersPluginChunkType newHead = { NULL };
+   cmsFormattersFactoryList*  entry;
+   cmsFormattersFactoryList*  Anterior = NULL;
+   _cmsFormattersPluginChunkType* head = (_cmsFormattersPluginChunkType*) src->chunks[FormattersPlugin];
+
+     _cmsAssert(head != NULL);
+
+   // Walk the list copying all nodes
+   for (entry = head->FactoryList;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           cmsFormattersFactoryList *newEntry = ( cmsFormattersFactoryList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsFormattersFactoryList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.FactoryList == NULL)
+               newHead.FactoryList = newEntry;
+   }
+
+   ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsFormattersPluginChunkType));
+}
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                    const struct _cmsContext_struct* src)
+{
+      _cmsAssert(ctx != NULL);
+
+     if (src != NULL) {
+        
+         // Duplicate the LIST
+         DupFormatterFactoryList(ctx, src);
+     }
+     else {
+          static _cmsFormattersPluginChunkType FormattersPluginChunk = { NULL };
+          ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx ->MemPool, &FormattersPluginChunk, sizeof(_cmsFormattersPluginChunkType));
+     }
+}
+
+
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsPluginFormatters* Plugin = (cmsPluginFormatters*) Data;
+    cmsFormattersFactoryList* fl ;
+
+    // Reset to built-in defaults
+    if (Data == NULL) {
+
+          ctx ->FactoryList = NULL;
+          return TRUE;
+    }
+
+    fl = (cmsFormattersFactoryList*) _cmsPluginMalloc(ContextID, sizeof(cmsFormattersFactoryList));
+    if (fl == NULL) return FALSE;
+
+    fl ->Factory    = Plugin ->FormattersFactory;
+
+    fl ->Next = ctx -> FactoryList;
+    ctx ->FactoryList = fl;
+
+    return TRUE;
+}
+
+cmsFormatter CMSEXPORT _cmsGetFormatter(cmsContext ContextID,
+                                        cmsUInt32Number Type,         // Specific type, i.e. TYPE_RGB_8
+                                        cmsFormatterDirection Dir,
+                                        cmsUInt32Number dwFlags)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsFormattersFactoryList* f;
+
+    for (f =ctx->FactoryList; f != NULL; f = f ->Next) {
+
+        cmsFormatter fn = f ->Factory(Type, Dir, dwFlags);
+        if (fn.Fmt16 != NULL) return fn;
+    }
+
+    // Revert to default
+    if (Dir == cmsFormatterInput)
+        return _cmsGetStockInputFormatter(Type, dwFlags);
+    else
+        return _cmsGetStockOutputFormatter(Type, dwFlags);
+}
+
+
+// Return whatever given formatter refers to float values
+cmsBool  _cmsFormatterIsFloat(cmsUInt32Number Type)
+{
+    return T_FLOAT(Type) ? TRUE : FALSE;
+}
+
+// Return whatever given formatter refers to 8 bits
+cmsBool  _cmsFormatterIs8bit(cmsUInt32Number Type)
+{
+    cmsUInt32Number Bytes = T_BYTES(Type);
+
+    return (Bytes == 1);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace      = cmsGetColorSpace(hProfile);
+    cmsUInt32Number        ColorSpaceBits  = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number        nOutputChans    = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number        Float           = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+    cmsUInt32Number ColorSpaceBits = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number nOutputChans = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number Float = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmspcs.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmspcs.c
new file mode 100644
index 0000000000..ea70484d5b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmspcs.c
@@ -0,0 +1,940 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+//      inter PCS conversions XYZ <-> CIE L* a* b*
+/*
+
+
+       CIE 15:2004 CIELab is defined as:
+
+       L* = 116*f(Y/Yn) - 16                     0 <= L* <= 100
+       a* = 500*[f(X/Xn) - f(Y/Yn)]
+       b* = 200*[f(Y/Yn) - f(Z/Zn)]
+
+       and
+
+              f(t) = t^(1/3)                     1 >= t >  (24/116)^3
+                     (841/108)*t + (16/116)      0 <= t <= (24/116)^3
+
+
+       Reverse transform is:
+
+       X = Xn*[a* / 500 + (L* + 16) / 116] ^ 3   if (X/Xn) > (24/116)
+         = Xn*(a* / 500 + L* / 116) / 7.787      if (X/Xn) <= (24/116)
+
+
+
+       PCS in Lab2 is encoded as:
+
+              8 bit Lab PCS:
+
+                     L*      0..100 into a 0..ff byte.
+                     a*      t + 128 range is -128.0  +127.0
+                     b*
+
+             16 bit Lab PCS:
+
+                     L*     0..100  into a 0..ff00 word.
+                     a*     t + 128  range is  -128.0  +127.9961
+                     b*
+
+
+
+Interchange Space   Component     Actual Range        Encoded Range
+CIE XYZ             X             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Y             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Z             0 -> 1.99997        0x0000 -> 0xffff
+
+Version 2,3
+-----------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xff00
+CIELAB (16 bit)     a*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+
+
+Version 4
+---------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xffff
+CIELAB (16 bit)     a*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+
+*/
+
+// Conversions
+void CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source)
+{
+    cmsFloat64Number ISum;
+
+    ISum = 1./(Source -> X + Source -> Y + Source -> Z);
+
+    Dest -> x = (Source -> X) * ISum;
+    Dest -> y = (Source -> Y) * ISum;
+    Dest -> Y = Source -> Y;
+}
+
+void CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source)
+{
+    Dest -> X = (Source -> x / Source -> y) * Source -> Y;
+    Dest -> Y = Source -> Y;
+    Dest -> Z = ((1 - Source -> x - Source -> y) / Source -> y) * Source -> Y;
+}
+
+/*
+       The break point (24/116)^3 = (6/29)^3 is a very small amount of tristimulus 
+       primary (0.008856).  Generally, this only happens for 
+       nearly ideal blacks and for some orange / amber colors in transmission mode.  
+       For example, the Z value of the orange turn indicator lamp lens on an 
+       automobile will often be below this value.  But the Z does not 
+       contribute to the perceived color directly.
+*/
+
+static
+cmsFloat64Number f(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0) * (24.0/116.0) * (24.0/116.0);
+
+    if (t <= Limit)
+        return (841.0/108.0) * t + (16.0/116.0);
+    else
+        return pow(t, 1.0/3.0);
+}
+
+static
+cmsFloat64Number f_1(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0);
+
+    if (t <= Limit) {
+        return (108.0/841.0) * (t - (16.0/116.0));
+    }
+
+    return t * t * t;
+}
+
+
+// Standard XYZ to Lab. it can handle negative XZY numbers in some cases
+void CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz)
+{
+    cmsFloat64Number fx, fy, fz;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    fx = f(xyz->X / WhitePoint->X);
+    fy = f(xyz->Y / WhitePoint->Y);
+    fz = f(xyz->Z / WhitePoint->Z);
+
+    Lab->L = 116.0*fy - 16.0;
+    Lab->a = 500.0*(fx - fy);
+    Lab->b = 200.0*(fy - fz);
+}
+
+
+// Standard XYZ to Lab. It can return negative XYZ in some cases
+void CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz,  const cmsCIELab* Lab)
+{
+    cmsFloat64Number x, y, z;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    y = (Lab-> L + 16.0) / 116.0;
+    x = y + 0.002 * Lab -> a;
+    z = y - 0.005 * Lab -> b;
+
+    xyz -> X = f_1(x) * WhitePoint -> X;
+    xyz -> Y = f_1(y) * WhitePoint -> Y;
+    xyz -> Z = f_1(z) * WhitePoint -> Z;
+
+}
+
+static
+cmsFloat64Number L2float2(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 652.800;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float2(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 256.0) - 128.0;
+}
+
+static
+cmsUInt16Number L2Fix2(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  652.8);
+}
+
+static
+cmsUInt16Number ab2Fix2(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 256.0);
+}
+
+
+static
+cmsFloat64Number L2float4(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 655.35;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float4(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 257.0) - 128.0;
+}
+
+
+void CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float2(wLab[0]);
+        Lab->a = ab2float2(wLab[1]);
+        Lab->b = ab2float2(wLab[2]);
+}
+
+
+void CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float4(wLab[0]);
+        Lab->a = ab2float4(wLab[1]);
+        Lab->b = ab2float4(wLab[2]);
+}
+
+static
+cmsFloat64Number Clamp_L_doubleV2(cmsFloat64Number L)
+{
+    const cmsFloat64Number L_max = (cmsFloat64Number) (0xFFFF * 100.0) / 0xFF00;
+
+    if (L < 0) L = 0;
+    if (L > L_max) L = L_max;
+
+    return L;
+}
+
+
+static
+cmsFloat64Number Clamp_ab_doubleV2(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab2) ab = MIN_ENCODEABLE_ab2;
+    if (ab > MAX_ENCODEABLE_ab2) ab = MAX_ENCODEABLE_ab2;
+
+    return ab;
+}
+
+void CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV2(fLab ->L);
+    Lab.a = Clamp_ab_doubleV2(fLab ->a);
+    Lab.b = Clamp_ab_doubleV2(fLab ->b);
+
+    wLab[0] = L2Fix2(Lab.L);
+    wLab[1] = ab2Fix2(Lab.a);
+    wLab[2] = ab2Fix2(Lab.b);
+}
+
+
+static
+cmsFloat64Number Clamp_L_doubleV4(cmsFloat64Number L)
+{
+    if (L < 0) L = 0;
+    if (L > 100.0) L = 100.0;
+
+    return L;
+}
+
+static
+cmsFloat64Number Clamp_ab_doubleV4(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab4) ab = MIN_ENCODEABLE_ab4;
+    if (ab > MAX_ENCODEABLE_ab4) ab = MAX_ENCODEABLE_ab4;
+
+    return ab;
+}
+
+static
+cmsUInt16Number L2Fix4(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  655.35);
+}
+
+static
+cmsUInt16Number ab2Fix4(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 257.0);
+}
+
+void CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV4(fLab ->L);
+    Lab.a = Clamp_ab_doubleV4(fLab ->a);
+    Lab.b = Clamp_ab_doubleV4(fLab ->b);
+
+    wLab[0] = L2Fix4(Lab.L);
+    wLab[1] = ab2Fix4(Lab.a);
+    wLab[2] = ab2Fix4(Lab.b);
+}
+
+// Auxiliary: convert to Radians
+static
+cmsFloat64Number RADIANS(cmsFloat64Number deg)
+{
+    return (deg * M_PI) / 180.;
+}
+
+
+// Auxiliary: atan2 but operating in degrees and returning 0 if a==b==0
+static
+cmsFloat64Number atan2deg(cmsFloat64Number a, cmsFloat64Number b)
+{
+   cmsFloat64Number h;
+
+   if (a == 0 && b == 0)
+            h   = 0;
+    else
+            h = atan2(a, b);
+
+    h *= (180. / M_PI);
+
+    while (h > 360.)
+        h -= 360.;
+
+    while ( h < 0)
+        h += 360.;
+
+    return h;
+}
+
+
+// Auxiliary: Square
+static
+cmsFloat64Number Sqr(cmsFloat64Number v)
+{
+    return v *  v;
+}
+// From cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLab2LCh(cmsCIELCh* LCh, const cmsCIELab* Lab)
+{
+    LCh -> L = Lab -> L;
+    LCh -> C = pow(Sqr(Lab ->a) + Sqr(Lab ->b), 0.5);
+    LCh -> h = atan2deg(Lab ->b, Lab ->a);
+}
+
+
+// To cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh)
+{
+    cmsFloat64Number h = (LCh -> h * M_PI) / 180.0;
+
+    Lab -> L = LCh -> L;
+    Lab -> a = LCh -> C * cos(h);
+    Lab -> b = LCh -> C * sin(h);
+}
+
+// In XYZ All 3 components are encoded using 1.15 fixed point
+static
+cmsUInt16Number XYZ2Fix(cmsFloat64Number d)
+{
+    return _cmsQuickSaturateWord(d * 32768.0);
+}
+
+void CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ)
+{
+    cmsCIEXYZ xyz;
+
+    xyz.X = fXYZ -> X;
+    xyz.Y = fXYZ -> Y;
+    xyz.Z = fXYZ -> Z;
+
+    // Clamp to encodeable values.
+    if (xyz.Y <= 0) {
+
+        xyz.X = 0;
+        xyz.Y = 0;
+        xyz.Z = 0;
+    }
+
+    if (xyz.X > MAX_ENCODEABLE_XYZ)
+        xyz.X = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.X < 0)
+        xyz.X = 0;
+
+    if (xyz.Y > MAX_ENCODEABLE_XYZ)
+        xyz.Y = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Y < 0)
+        xyz.Y = 0;
+
+    if (xyz.Z > MAX_ENCODEABLE_XYZ)
+        xyz.Z = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Z < 0)
+        xyz.Z = 0;
+
+
+    XYZ[0] = XYZ2Fix(xyz.X);
+    XYZ[1] = XYZ2Fix(xyz.Y);
+    XYZ[2] = XYZ2Fix(xyz.Z);
+}
+
+
+//  To convert from Fixed 1.15 point to cmsFloat64Number
+static
+cmsFloat64Number XYZ2float(cmsUInt16Number v)
+{
+    cmsS15Fixed16Number fix32;
+
+    // From 1.15 to 15.16
+    fix32 = v << 1;
+
+    // From fixed 15.16 to cmsFloat64Number
+    return _cms15Fixed16toDouble(fix32);
+}
+
+
+void CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fXYZ, const cmsUInt16Number XYZ[3])
+{
+    fXYZ -> X = XYZ2float(XYZ[0]);
+    fXYZ -> Y = XYZ2float(XYZ[1]);
+    fXYZ -> Z = XYZ2float(XYZ[2]);
+}
+
+
+// Returns dE on two Lab values
+cmsFloat64Number CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number dL, da, db;
+
+    dL = fabs(Lab1 -> L - Lab2 -> L);
+    da = fabs(Lab1 -> a - Lab2 -> a);
+    db = fabs(Lab1 -> b - Lab2 -> b);
+
+    return pow(Sqr(dL) + Sqr(da) + Sqr(db), 0.5);
+}
+
+
+// Return the CIE94 Delta E
+cmsFloat64Number CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsCIELCh LCh1, LCh2;
+    cmsFloat64Number dE, dL, dC, dh, dhsq;
+    cmsFloat64Number c12, sc, sh;
+
+    dL = fabs(Lab1 ->L - Lab2 ->L);
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    dC  = fabs(LCh1.C - LCh2.C);
+    dE  = cmsDeltaE(Lab1, Lab2);
+
+    dhsq = Sqr(dE) - Sqr(dL) - Sqr(dC);
+    if (dhsq < 0)
+        dh = 0;
+    else
+        dh = pow(dhsq, 0.5);
+
+    c12 = sqrt(LCh1.C * LCh2.C);
+
+    sc = 1.0 + (0.048 * c12);
+    sh = 1.0 + (0.014 * c12);
+
+    return sqrt(Sqr(dL)  + Sqr(dC) / Sqr(sc) + Sqr(dh) / Sqr(sh));
+}
+
+
+// Auxiliary
+static
+cmsFloat64Number ComputeLBFD(const cmsCIELab* Lab)
+{
+  cmsFloat64Number yt;
+
+  if (Lab->L > 7.996969)
+        yt = (Sqr((Lab->L+16)/116)*((Lab->L+16)/116))*100;
+  else
+        yt = 100 * (Lab->L / 903.3);
+
+  return (54.6 * (M_LOG10E * (log(yt + 1.5))) - 9.6);
+}
+
+
+
+// bfd - gets BFD(1:1) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number lbfd1,lbfd2,AveC,Aveh,dE,deltaL,
+        deltaC,deltah,dc,t,g,dh,rh,rc,rt,bfd;
+    cmsCIELCh LCh1, LCh2;
+
+
+    lbfd1 = ComputeLBFD(Lab1);
+    lbfd2 = ComputeLBFD(Lab2);
+    deltaL = lbfd2 - lbfd1;
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    deltaC = LCh2.C - LCh1.C;
+    AveC = (LCh1.C+LCh2.C)/2;
+    Aveh = (LCh1.h+LCh2.h)/2;
+
+    dE = cmsDeltaE(Lab1, Lab2);
+
+    if (Sqr(dE)>(Sqr(Lab2->L-Lab1->L)+Sqr(deltaC)))
+        deltah = sqrt(Sqr(dE)-Sqr(Lab2->L-Lab1->L)-Sqr(deltaC));
+    else
+        deltah =0;
+
+
+    dc   = 0.035 * AveC / (1 + 0.00365 * AveC)+0.521;
+    g    = sqrt(Sqr(Sqr(AveC))/(Sqr(Sqr(AveC))+14000));
+    t    = 0.627+(0.055*cos((Aveh-254)/(180/M_PI))-
+           0.040*cos((2*Aveh-136)/(180/M_PI))+
+           0.070*cos((3*Aveh-31)/(180/M_PI))+
+           0.049*cos((4*Aveh+114)/(180/M_PI))-
+           0.015*cos((5*Aveh-103)/(180/M_PI)));
+
+    dh    = dc*(g*t+1-g);
+    rh    = -0.260*cos((Aveh-308)/(180/M_PI))-
+           0.379*cos((2*Aveh-160)/(180/M_PI))-
+           0.636*cos((3*Aveh+254)/(180/M_PI))+
+           0.226*cos((4*Aveh+140)/(180/M_PI))-
+           0.194*cos((5*Aveh+280)/(180/M_PI));
+
+    rc = sqrt((AveC*AveC*AveC*AveC*AveC*AveC)/((AveC*AveC*AveC*AveC*AveC*AveC)+70000000));
+    rt = rh*rc;
+
+    bfd = sqrt(Sqr(deltaL)+Sqr(deltaC/dc)+Sqr(deltah/dh)+(rt*(deltaC/dc)*(deltah/dh)));
+
+    return bfd;
+}
+
+
+//  cmc - CMC(l:c) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c)
+{
+  cmsFloat64Number dE,dL,dC,dh,sl,sc,sh,t,f,cmc;
+  cmsCIELCh LCh1, LCh2;
+
+  if (Lab1 ->L == 0 && Lab2 ->L == 0) return 0;
+
+  cmsLab2LCh(&LCh1, Lab1);
+  cmsLab2LCh(&LCh2, Lab2);
+
+
+  dL = Lab2->L-Lab1->L;
+  dC = LCh2.C-LCh1.C;
+
+  dE = cmsDeltaE(Lab1, Lab2);
+
+  if (Sqr(dE)>(Sqr(dL)+Sqr(dC)))
+            dh = sqrt(Sqr(dE)-Sqr(dL)-Sqr(dC));
+  else
+            dh =0;
+
+  if ((LCh1.h > 164) && (LCh1.h < 345))
+      t = 0.56 + fabs(0.2 * cos(((LCh1.h + 168)/(180/M_PI))));
+  else
+      t = 0.36 + fabs(0.4 * cos(((LCh1.h + 35 )/(180/M_PI))));
+
+   sc  = 0.0638   * LCh1.C / (1 + 0.0131  * LCh1.C) + 0.638;
+   sl  = 0.040975 * Lab1->L /(1 + 0.01765 * Lab1->L);
+
+   if (Lab1->L<16)
+         sl = 0.511;
+
+   f   = sqrt((LCh1.C * LCh1.C * LCh1.C * LCh1.C)/((LCh1.C * LCh1.C * LCh1.C * LCh1.C)+1900));
+   sh  = sc*(t*f+1-f);
+   cmc = sqrt(Sqr(dL/(l*sl))+Sqr(dC/(c*sc))+Sqr(dh/sh));
+
+   return cmc;
+}
+
+// dE2000 The weightings KL, KC and KH can be modified to reflect the relative
+// importance of lightness, chroma and hue in different industrial applications
+cmsFloat64Number CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2,
+                                  cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh)
+{
+    cmsFloat64Number L1  = Lab1->L;
+    cmsFloat64Number a1  = Lab1->a;
+    cmsFloat64Number b1  = Lab1->b;
+    cmsFloat64Number C   = sqrt( Sqr(a1) + Sqr(b1) );
+
+    cmsFloat64Number Ls = Lab2 ->L;
+    cmsFloat64Number as = Lab2 ->a;
+    cmsFloat64Number bs = Lab2 ->b;
+    cmsFloat64Number Cs = sqrt( Sqr(as) + Sqr(bs) );
+
+    cmsFloat64Number G = 0.5 * ( 1 - sqrt(pow((C + Cs) / 2 , 7.0) / (pow((C + Cs) / 2, 7.0) + pow(25.0, 7.0) ) ));
+
+    cmsFloat64Number a_p = (1 + G ) * a1;
+    cmsFloat64Number b_p = b1;
+    cmsFloat64Number C_p = sqrt( Sqr(a_p) + Sqr(b_p));
+    cmsFloat64Number h_p = atan2deg(b_p, a_p);
+
+
+    cmsFloat64Number a_ps = (1 + G) * as;
+    cmsFloat64Number b_ps = bs;
+    cmsFloat64Number C_ps = sqrt(Sqr(a_ps) + Sqr(b_ps));
+    cmsFloat64Number h_ps = atan2deg(b_ps, a_ps);
+
+    cmsFloat64Number meanC_p =(C_p + C_ps) / 2;
+
+    cmsFloat64Number hps_plus_hp  = h_ps + h_p;
+    cmsFloat64Number hps_minus_hp = h_ps - h_p;
+
+    cmsFloat64Number meanh_p = fabs(hps_minus_hp) <= 180.000001 ? (hps_plus_hp)/2 :
+                            (hps_plus_hp) < 360 ? (hps_plus_hp + 360)/2 :
+                                                 (hps_plus_hp - 360)/2;
+
+    cmsFloat64Number delta_h = (hps_minus_hp) <= -180.000001 ?  (hps_minus_hp + 360) :
+                            (hps_minus_hp) > 180 ? (hps_minus_hp - 360) :
+                                                    (hps_minus_hp);
+    cmsFloat64Number delta_L = (Ls - L1);
+    cmsFloat64Number delta_C = (C_ps - C_p );
+
+
+    cmsFloat64Number delta_H =2 * sqrt(C_ps*C_p) * sin(RADIANS(delta_h) / 2);
+
+    cmsFloat64Number T = 1 - 0.17 * cos(RADIANS(meanh_p-30))
+                 + 0.24 * cos(RADIANS(2*meanh_p))
+                 + 0.32 * cos(RADIANS(3*meanh_p + 6))
+                 - 0.2  * cos(RADIANS(4*meanh_p - 63));
+
+    cmsFloat64Number Sl = 1 + (0.015 * Sqr((Ls + L1) /2- 50) )/ sqrt(20 + Sqr( (Ls+L1)/2 - 50) );
+
+    cmsFloat64Number Sc = 1 + 0.045 * (C_p + C_ps)/2;
+    cmsFloat64Number Sh = 1 + 0.015 * ((C_ps + C_p)/2) * T;
+
+    cmsFloat64Number delta_ro = 30 * exp( -Sqr(((meanh_p - 275 ) / 25)));
+
+    cmsFloat64Number Rc = 2 * sqrt(( pow(meanC_p, 7.0) )/( pow(meanC_p, 7.0) + pow(25.0, 7.0)));
+
+    cmsFloat64Number Rt = -sin(2 * RADIANS(delta_ro)) * Rc;
+
+    cmsFloat64Number deltaE00 = sqrt( Sqr(delta_L /(Sl * Kl)) +
+                            Sqr(delta_C/(Sc * Kc))  +
+                            Sqr(delta_H/(Sh * Kh))  +
+                            Rt*(delta_C/(Sc * Kc)) * (delta_H / (Sh * Kh)));
+
+    return deltaE00;
+}
+
+// This function returns a number of gridpoints to be used as LUT table. It assumes same number
+// of gripdpoints in all dimensions. Flags may override the choice.
+cmsUInt32Number _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number nChannels;
+
+    // Already specified?
+    if (dwFlags & 0x00FF0000) {
+            // Yes, grab'em
+            return (dwFlags >> 16) & 0xFF;
+    }
+
+    nChannels = cmsChannelsOf(Colorspace);
+
+    // HighResPrecalc is maximum resolution
+    if (dwFlags & cmsFLAGS_HIGHRESPRECALC) {
+
+        if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+        if (nChannels == 4)     // 23 for CMYK
+                return 23;
+
+        return 49;      // 49 for RGB and others
+    }
+
+
+    // LowResPrecal is lower resolution
+    if (dwFlags & cmsFLAGS_LOWRESPRECALC) {
+
+        if (nChannels > 4)
+                return 6;       // 6 for more than 4 channels
+
+        if (nChannels == 1)
+                return 33;      // For monochrome
+
+        return 17;              // 17 for remaining
+    }
+
+    // Default values
+    if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+    if (nChannels == 4)
+                return 17;      // 17 for CMYK
+
+    return 33;                  // 33 for RGB
+}
+
+
+cmsBool  _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                             cmsUInt16Number **White,
+                             cmsUInt16Number **Black,
+                             cmsUInt32Number *nOutputs)
+{
+       // Only most common spaces
+
+       static cmsUInt16Number RGBblack[4]  = { 0, 0, 0 };
+       static cmsUInt16Number RGBwhite[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYKblack[4] = { 0xffff, 0xffff, 0xffff, 0xffff };   // 400% of ink
+       static cmsUInt16Number CMYKwhite[4] = { 0, 0, 0, 0 };
+       static cmsUInt16Number LABblack[4]  = { 0, 0x8080, 0x8080 };               // V4 Lab encoding
+       static cmsUInt16Number LABwhite[4]  = { 0xFFFF, 0x8080, 0x8080 };
+       static cmsUInt16Number CMYblack[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYwhite[4]  = { 0, 0, 0 };
+       static cmsUInt16Number Grayblack[4] = { 0 };
+       static cmsUInt16Number GrayWhite[4] = { 0xffff };
+
+       switch (Space) {
+
+       case cmsSigGrayData: if (White)    *White = GrayWhite;
+                           if (Black)    *Black = Grayblack;
+                           if (nOutputs) *nOutputs = 1;
+                           return TRUE;
+
+       case cmsSigRgbData:  if (White)    *White = RGBwhite;
+                           if (Black)    *Black = RGBblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigLabData:  if (White)    *White = LABwhite;
+                           if (Black)    *Black = LABblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigCmykData: if (White)    *White = CMYKwhite;
+                           if (Black)    *Black = CMYKblack;
+                           if (nOutputs) *nOutputs = 4;
+                           return TRUE;
+
+       case cmsSigCmyData:  if (White)    *White = CMYwhite;
+                           if (Black)    *Black = CMYblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       default:;
+       }
+
+  return FALSE;
+}
+
+
+
+// Several utilities -------------------------------------------------------
+
+// Translate from our colorspace to ICC representation
+
+cmsColorSpaceSignature CMSEXPORT _cmsICCcolorSpace(int OurNotation)
+{
+       switch (OurNotation) {
+
+       case 1:
+       case PT_GRAY: return cmsSigGrayData;
+
+       case 2:
+       case PT_RGB:  return cmsSigRgbData;
+
+       case PT_CMY:  return cmsSigCmyData;
+       case PT_CMYK: return cmsSigCmykData;
+       case PT_YCbCr:return cmsSigYCbCrData;
+       case PT_YUV:  return cmsSigLuvData;
+       case PT_XYZ:  return cmsSigXYZData;
+
+       case PT_LabV2:
+       case PT_Lab:  return cmsSigLabData;
+
+       case PT_YUVK: return cmsSigLuvKData;
+       case PT_HSV:  return cmsSigHsvData;
+       case PT_HLS:  return cmsSigHlsData;
+       case PT_Yxy:  return cmsSigYxyData;
+
+       case PT_MCH1: return cmsSigMCH1Data;
+       case PT_MCH2: return cmsSigMCH2Data;
+       case PT_MCH3: return cmsSigMCH3Data;
+       case PT_MCH4: return cmsSigMCH4Data;
+       case PT_MCH5: return cmsSigMCH5Data;
+       case PT_MCH6: return cmsSigMCH6Data;
+       case PT_MCH7: return cmsSigMCH7Data;
+       case PT_MCH8: return cmsSigMCH8Data;
+
+       case PT_MCH9:  return cmsSigMCH9Data;
+       case PT_MCH10: return cmsSigMCHAData;
+       case PT_MCH11: return cmsSigMCHBData;
+       case PT_MCH12: return cmsSigMCHCData;
+       case PT_MCH13: return cmsSigMCHDData;
+       case PT_MCH14: return cmsSigMCHEData;
+       case PT_MCH15: return cmsSigMCHFData;
+
+       default:  return (cmsColorSpaceSignature) 0;
+       }
+}
+
+
+int CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace)
+{
+    switch (ProfileSpace) {
+
+    case cmsSigGrayData: return  PT_GRAY;
+    case cmsSigRgbData:  return  PT_RGB;
+    case cmsSigCmyData:  return  PT_CMY;
+    case cmsSigCmykData: return  PT_CMYK;
+    case cmsSigYCbCrData:return  PT_YCbCr;
+    case cmsSigLuvData:  return  PT_YUV;
+    case cmsSigXYZData:  return  PT_XYZ;
+    case cmsSigLabData:  return  PT_Lab;
+    case cmsSigLuvKData: return  PT_YUVK;
+    case cmsSigHsvData:  return  PT_HSV;
+    case cmsSigHlsData:  return  PT_HLS;
+    case cmsSigYxyData:  return  PT_Yxy;
+
+    case cmsSig1colorData:
+    case cmsSigMCH1Data: return PT_MCH1;
+
+    case cmsSig2colorData:
+    case cmsSigMCH2Data: return PT_MCH2;
+
+    case cmsSig3colorData:
+    case cmsSigMCH3Data: return PT_MCH3;
+
+    case cmsSig4colorData:
+    case cmsSigMCH4Data: return PT_MCH4;
+
+    case cmsSig5colorData:
+    case cmsSigMCH5Data: return PT_MCH5;
+
+    case cmsSig6colorData:
+    case cmsSigMCH6Data: return PT_MCH6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:return PT_MCH7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:return PT_MCH8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:return PT_MCH9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData:return PT_MCH10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData:return PT_MCH11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData:return PT_MCH12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData:return PT_MCH13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData:return PT_MCH14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData:return PT_MCH15;
+
+    default:  return (cmsColorSpaceSignature) 0;
+    }
+}
+
+
+cmsUInt32Number CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace)
+{
+    switch (ColorSpace) {
+
+    case cmsSigMCH1Data:
+    case cmsSig1colorData:
+    case cmsSigGrayData: return 1;
+
+    case cmsSigMCH2Data:
+    case cmsSig2colorData:  return 2;
+
+    case cmsSigXYZData:
+    case cmsSigLabData:
+    case cmsSigLuvData:
+    case cmsSigYCbCrData:
+    case cmsSigYxyData:
+    case cmsSigRgbData:
+    case cmsSigHsvData:
+    case cmsSigHlsData:
+    case cmsSigCmyData:
+    case cmsSigMCH3Data:
+    case cmsSig3colorData:  return 3;
+
+    case cmsSigLuvKData:
+    case cmsSigCmykData:
+    case cmsSigMCH4Data:
+    case cmsSig4colorData:  return 4;
+
+    case cmsSigMCH5Data:
+    case cmsSig5colorData:  return 5;
+
+    case cmsSigMCH6Data:
+    case cmsSig6colorData:  return 6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:  return  7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:  return  8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:  return  9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData: return 10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData: return 11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData: return 12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData: return 13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData: return 14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData: return 15;
+
+    default: return 3;
+    }
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsplugin.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsplugin.c
new file mode 100644
index 0000000000..d140dc0da5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsplugin.c
@@ -0,0 +1,992 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ----------------------------------------------------------------------------------
+// Encoding & Decoding support functions
+// ----------------------------------------------------------------------------------
+
+//      Little-Endian to Big-Endian
+
+// Adjust a word value after being read/ before being written from/to an ICC profile
+cmsUInt16Number CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &Word;
+    cmsUInt8Number tmp;
+
+    tmp = pByte[0];
+    pByte[0] = pByte[1];
+    pByte[1] = tmp;
+#endif
+
+    return Word;
+}
+
+
+// Transports to properly encoded values - note that icc profiles does use big endian notation.
+
+// 1 2 3 4
+// 4 3 2 1
+
+cmsUInt32Number CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number DWord)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    temp1 = *pByte++;
+    temp2 = *pByte++;
+    *(pByte-1) = *pByte;
+    *pByte++ = temp2;
+    *(pByte-3) = *pByte;
+    *pByte = temp1;
+#endif
+    return DWord;
+}
+
+// 1 2 3 4 5 6 7 8
+// 8 7 6 5 4 3 2 1
+
+void CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord)
+{
+
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pIn  = (cmsUInt8Number*) QWord;
+    cmsUInt8Number* pOut = (cmsUInt8Number*) Result;
+
+    _cmsAssert(Result != NULL);
+
+    pOut[7] = pIn[0];
+    pOut[6] = pIn[1];
+    pOut[5] = pIn[2];
+    pOut[4] = pIn[3];
+    pOut[3] = pIn[4];
+    pOut[2] = pIn[5];
+    pOut[1] = pIn[6];
+    pOut[0] = pIn[7];
+
+#else
+    _cmsAssert(Result != NULL);
+
+#  ifdef CMS_DONT_USE_INT64
+    (*Result)[0] = (*QWord)[0];
+    (*Result)[1] = (*QWord)[1];
+#  else
+    *Result = *QWord;
+#  endif
+#endif
+}
+
+// Auxiliary -- read 8, 16 and 32-bit numbers
+cmsBool CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io, cmsUInt8Number* n)
+{
+    cmsUInt8Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt8Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = tmp;
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt16Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess16(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+            if (!_cmsReadUInt16Number(io, Array + i)) return FALSE;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess32(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io->Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+        return FALSE;
+
+    if (n != NULL) {
+
+        tmp = _cmsAdjustEndianess32(tmp);
+        *n = *(cmsFloat32Number*)(void*)&tmp;
+        
+        // Safeguard which covers against absurd values
+        if (*n > 1E+20 || *n < -1E+20) return FALSE;
+
+        #if defined(_MSC_VER) && _MSC_VER < 1800
+           return TRUE;
+        #elif defined (__BORLANDC__)
+           return TRUE;
+        #else
+
+           // fpclassify() required by C99 (only provided by MSVC >= 1800, VS2013 onwards)
+           return ((fpclassify(*n) == FP_ZERO) || (fpclassify(*n) == FP_NORMAL));
+        #endif        
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT   _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt64Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+
+        _cmsAdjustEndianess64(n, &tmp);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+        *n = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32(tmp));
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+
+    if (io ->Read(io, &xyz, sizeof(cmsEncodedXYZNumber), 1) != 1) return FALSE;
+
+    if (XYZ != NULL) {
+
+        XYZ->X = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.X));
+        XYZ->Y = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Y));
+        XYZ->Z = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Z));
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n)
+{
+    _cmsAssert(io != NULL);
+
+    if (io -> Write(io, sizeof(cmsUInt8Number), &n) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess16(n);
+    if (io -> Write(io, sizeof(cmsUInt16Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(Array != NULL);
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32(n);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = *(cmsUInt32Number*) (void*) &n;
+    tmp = _cmsAdjustEndianess32(tmp);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    _cmsAdjustEndianess64(&tmp, n);
+    if (io -> Write(io, sizeof(cmsUInt64Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(n));
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(XYZ != NULL);
+
+    xyz.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->X));
+    xyz.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Y));
+    xyz.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Z));
+
+    return io -> Write(io,  sizeof(cmsEncodedXYZNumber), &xyz);
+}
+
+// from Fixed point 8.8 to double
+cmsFloat64Number CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8)
+{
+       cmsUInt8Number  msb, lsb;
+
+       lsb = (cmsUInt8Number) (fixed8 & 0xff);
+       msb = (cmsUInt8Number) (((cmsUInt16Number) fixed8 >> 8) & 0xff);
+
+       return (cmsFloat64Number) ((cmsFloat64Number) msb + ((cmsFloat64Number) lsb / 256.0));
+}
+
+cmsUInt16Number CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val)
+{
+    cmsS15Fixed16Number GammaFixed32 = _cmsDoubleTo15Fixed16(val);
+    return  (cmsUInt16Number) ((GammaFixed32 >> 8) & 0xFFFF);
+}
+
+// from Fixed point 15.16 to double
+cmsFloat64Number CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32)
+{
+    cmsFloat64Number floater, sign, mid;
+    int Whole, FracPart;
+
+    sign  = (fix32 < 0 ? -1 : 1);
+    fix32 = abs(fix32);
+
+    Whole     = (cmsUInt16Number)(fix32 >> 16) & 0xffff;
+    FracPart  = (cmsUInt16Number)(fix32 & 0xffff);
+
+    mid     = (cmsFloat64Number) FracPart / 65536.0;
+    floater = (cmsFloat64Number) Whole + mid;
+
+    return sign * floater;
+}
+
+// from double to Fixed point 15.16
+cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v)
+{
+    return ((cmsS15Fixed16Number) floor((v)*65536.0 + 0.5));
+}
+
+// Date/Time functions
+
+void CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest)
+{
+
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->tm_sec   = _cmsAdjustEndianess16(Source->seconds);
+    Dest->tm_min   = _cmsAdjustEndianess16(Source->minutes);
+    Dest->tm_hour  = _cmsAdjustEndianess16(Source->hours);
+    Dest->tm_mday  = _cmsAdjustEndianess16(Source->day);
+    Dest->tm_mon   = _cmsAdjustEndianess16(Source->month) - 1;
+    Dest->tm_year  = _cmsAdjustEndianess16(Source->year) - 1900;
+    Dest->tm_wday  = -1;
+    Dest->tm_yday  = -1;
+    Dest->tm_isdst = 0;
+}
+
+void CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source)
+{
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->seconds = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_sec);
+    Dest->minutes = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_min);
+    Dest->hours   = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_hour);
+    Dest->day     = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_mday);
+    Dest->month   = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_mon + 1));
+    Dest->year    = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_year + 1900));
+}
+
+// Read base and return type base
+cmsTagTypeSignature CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io)
+{
+    _cmsTagBase Base;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &Base, sizeof(_cmsTagBase), 1) != 1)
+        return (cmsTagTypeSignature) 0;
+
+    return (cmsTagTypeSignature) _cmsAdjustEndianess32(Base.sig);
+}
+
+// Setup base marker
+cmsBool  CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig)
+{
+    _cmsTagBase  Base;
+
+    _cmsAssert(io != NULL);
+
+    Base.sig = (cmsTagTypeSignature) _cmsAdjustEndianess32(sig);
+    memset(&Base.reserved, 0, sizeof(Base.reserved));
+    return io -> Write(io, sizeof(_cmsTagBase), &Base);
+}
+
+cmsBool CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    return (io ->Read(io, Buffer, BytesToNextAlignedPos, 1) == 1);
+}
+
+cmsBool CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    memset(Buffer, 0, BytesToNextAlignedPos);
+    return io -> Write(io, BytesToNextAlignedPos, Buffer);
+}
+
+
+// To deal with text streams. 2K at most
+cmsBool CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...)
+{
+    va_list args;
+    int len;
+    cmsUInt8Number Buffer[2048];
+    cmsBool rc;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(frm != NULL);
+
+    va_start(args, frm);
+
+    len = vsnprintf((char*) Buffer, 2047, frm, args);
+    if (len < 0) {
+        va_end(args);
+        return FALSE;   // Truncated, which is a fatal error for us
+    }
+
+    rc = io ->Write(io, (cmsUInt32Number) len, Buffer);
+
+    va_end(args);
+
+    return rc;
+}
+
+
+// Plugin memory management -------------------------------------------------------------------------------------------------
+
+// Specialized malloc for plug-ins, that is freed upon exit.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    struct _cmsContext_struct* ctx = _cmsGetContext(ContextID);
+
+    if (ctx ->MemPool == NULL) {
+
+        if (ContextID == NULL) {
+
+            ctx->MemPool = _cmsCreateSubAlloc(0, 2*1024);
+            if (ctx->MemPool == NULL) return NULL;
+        }
+        else {
+            cmsSignalError(ContextID, cmsERROR_CORRUPTION_DETECTED, "NULL memory pool on context");
+            return NULL;
+        }
+    }
+
+    return _cmsSubAlloc(ctx->MemPool, size);
+}
+
+
+// Main plug-in dispatcher
+cmsBool CMSEXPORT cmsPlugin(void* Plug_in)
+{
+    return cmsPluginTHR(NULL, Plug_in);
+}
+
+cmsBool CMSEXPORT cmsPluginTHR(cmsContext id, void* Plug_in)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) Plug_in;
+         Plugin != NULL;
+         Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic != cmsPluginMagicNumber) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin");
+                return FALSE;
+            }
+
+            if (Plugin ->ExpectedVersion > LCMS_VERSION) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "plugin needs Little CMS %d, current version is %d",
+                    Plugin ->ExpectedVersion, LCMS_VERSION);
+                return FALSE;
+            }
+
+            switch (Plugin -> Type) {
+
+                case cmsPluginMemHandlerSig:
+                    if (!_cmsRegisterMemHandlerPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginInterpolationSig:
+                    if (!_cmsRegisterInterpPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagTypeSig:
+                    if (!_cmsRegisterTagTypePlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagSig:
+                    if (!_cmsRegisterTagPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginFormattersSig:
+                    if (!_cmsRegisterFormattersPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginRenderingIntentSig:
+                    if (!_cmsRegisterRenderingIntentPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginParametricCurveSig:
+                    if (!_cmsRegisterParametricCurvesPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMultiProcessElementSig:
+                    if (!_cmsRegisterMultiProcessElementPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginOptimizationSig:
+                    if (!_cmsRegisterOptimizationPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTransformSig:
+                    if (!_cmsRegisterTransformPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMutexSig:
+                    if (!_cmsRegisterMutexPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                default:
+                    cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin type '%X'", Plugin -> Type);
+                    return FALSE;
+            }
+    }
+
+    // Keep a reference to the plug-in
+    return TRUE;
+}
+
+
+// Revert all plug-ins to default
+void CMSEXPORT cmsUnregisterPlugins(void)
+{
+    cmsUnregisterPluginsTHR(NULL);
+}
+
+
+// The Global storage for system context. This is the one and only global variable
+// pointers structure. All global vars are referenced here.
+static struct _cmsContext_struct globalContext = {
+
+    NULL,                              // Not in the linked list
+    NULL,                              // No suballocator
+    {
+        NULL,                          //  UserPtr,            
+        &_cmsLogErrorChunk,            //  Logger,
+        &_cmsAlarmCodesChunk,          //  AlarmCodes,
+        &_cmsAdaptationStateChunk,     //  AdaptationState, 
+        &_cmsMemPluginChunk,           //  MemPlugin,
+        &_cmsInterpPluginChunk,        //  InterpPlugin,
+        &_cmsCurvesPluginChunk,        //  CurvesPlugin,
+        &_cmsFormattersPluginChunk,    //  FormattersPlugin,
+        &_cmsTagTypePluginChunk,       //  TagTypePlugin,
+        &_cmsTagPluginChunk,           //  TagPlugin,
+        &_cmsIntentsPluginChunk,       //  IntentPlugin,
+        &_cmsMPETypePluginChunk,       //  MPEPlugin,
+        &_cmsOptimizationPluginChunk,  //  OptimizationPlugin,
+        &_cmsTransformPluginChunk,     //  TransformPlugin,
+        &_cmsMutexPluginChunk          //  MutexPlugin
+    },
+    
+    { NULL, NULL, NULL, NULL, NULL, NULL } // The default memory allocator is not used for context 0
+};
+
+
+// The context pool (linked list head)
+static _cmsMutex _cmsContextPoolHeadMutex = CMS_MUTEX_INITIALIZER;
+static struct _cmsContext_struct* _cmsContextPoolHead = NULL;
+
+// Internal, get associated pointer, with guessing. Never returns NULL.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID)
+{
+    struct _cmsContext_struct* id = (struct _cmsContext_struct*) ContextID;
+    struct _cmsContext_struct* ctx;
+
+
+    // On 0, use global settings
+    if (id == NULL) 
+        return &globalContext;
+
+    // Search
+    for (ctx = _cmsContextPoolHead;
+         ctx != NULL;
+         ctx = ctx ->Next) {
+
+            // Found it?
+            if (id == ctx)
+                return ctx; // New-style context, 
+    }
+
+    return &globalContext;
+}
+
+
+// Internal: get the memory area associanted with each context client
+// Returns the block assigned to the specific zone. Never return NULL.
+void* _cmsContextGetClientChunk(cmsContext ContextID, _cmsMemoryClient mc)
+{
+    struct _cmsContext_struct* ctx;
+    void *ptr;
+
+    if ((int) mc < 0 || mc >= MemoryClientMax) {
+        
+           cmsSignalError(ContextID, cmsERROR_INTERNAL, "Bad context client -- possible corruption");
+
+           // This is catastrophic. Should never reach here
+           _cmsAssert(0);
+
+           // Reverts to global context
+           return globalContext.chunks[UserPtr];
+    }
+    
+    ctx = _cmsGetContext(ContextID);
+    ptr = ctx ->chunks[mc];
+
+    if (ptr != NULL)
+        return ptr;
+
+    // A null ptr means no special settings for that context, and this 
+    // reverts to Context0 globals
+    return globalContext.chunks[mc];    
+}
+
+
+// This function returns the given context its default pristine state,
+// as no plug-ins were declared. There is no way to unregister a single 
+// plug-in, as a single call to cmsPluginTHR() function may register 
+// many different plug-ins simultaneously, then there is no way to 
+// identify which plug-in to unregister.
+void CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID)
+{
+    _cmsRegisterMemHandlerPlugin(ContextID, NULL);
+    _cmsRegisterInterpPlugin(ContextID, NULL);
+    _cmsRegisterTagTypePlugin(ContextID, NULL);
+    _cmsRegisterTagPlugin(ContextID, NULL);
+    _cmsRegisterFormattersPlugin(ContextID, NULL);
+    _cmsRegisterRenderingIntentPlugin(ContextID, NULL);
+    _cmsRegisterParametricCurvesPlugin(ContextID, NULL);
+    _cmsRegisterMultiProcessElementPlugin(ContextID, NULL);
+    _cmsRegisterOptimizationPlugin(ContextID, NULL);
+    _cmsRegisterTransformPlugin(ContextID, NULL);    
+    _cmsRegisterMutexPlugin(ContextID, NULL);
+}
+
+
+// Returns the memory manager plug-in, if any, from the Plug-in bundle
+static
+cmsPluginMemHandler* _cmsFindMemoryPlugin(void* PluginBundle)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) PluginBundle;
+        Plugin != NULL;
+        Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic == cmsPluginMagicNumber && 
+                Plugin -> ExpectedVersion <= LCMS_VERSION && 
+                Plugin -> Type == cmsPluginMemHandlerSig) {
+
+                    // Found!
+                    return (cmsPluginMemHandler*) Plugin;  
+            }
+    }
+
+    // Nope, revert to defaults 
+    return NULL;
+}
+
+
+// Creates a new context with optional associated plug-ins. Caller may also specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger.
+cmsContext CMSEXPORT cmsCreateContext(void* Plugin, void* UserData)
+{
+    struct _cmsContext_struct* ctx;
+    struct _cmsContext_struct  fakeContext;
+        
+    // See the comments regarding locking in lcms2_internal.h
+    // for an explanation of why we need the following code.
+#ifdef CMS_IS_WINDOWS_
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+    {
+        static HANDLE _cmsWindowsInitMutex = NULL;
+        static volatile HANDLE* mutex = &_cmsWindowsInitMutex;
+
+        if (*mutex == NULL)
+        {
+            HANDLE p = CreateMutex(NULL, FALSE, NULL);
+            if (p && InterlockedCompareExchangePointer((void **)mutex, (void*)p, NULL) != NULL)
+                CloseHandle(p);
+        }
+        if (*mutex == NULL || WaitForSingleObject(*mutex, INFINITE) == WAIT_FAILED)
+            return NULL;
+        if (((void **)&_cmsContextPoolHeadMutex)[0] == NULL)
+            InitializeCriticalSection(&_cmsContextPoolHeadMutex);
+        if (*mutex == NULL || !ReleaseMutex(*mutex))
+            return NULL;
+    }
+#endif
+#endif
+
+    _cmsInstallAllocFunctions(_cmsFindMemoryPlugin(Plugin), &fakeContext.DefaultMemoryManager);
+    
+    fakeContext.chunks[UserPtr]     = UserData;
+    fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+    // Create the context structure.
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(&fakeContext, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened!
+
+    // Init the structure and the memory manager
+    memset(ctx, 0, sizeof(struct _cmsContext_struct));
+
+    // Keep memory manager
+    memcpy(&ctx->DefaultMemoryManager, &fakeContext.DefaultMemoryManager, sizeof(_cmsMemPluginChunk)); 
+   
+    // Maintain the linked list (with proper locking)
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]     = UserData;
+    ctx ->chunks[MemPlugin]   = &ctx->DefaultMemoryManager;
+   
+    // Now we can allocate the pool by using default memory manager
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));  // default size about 22 pointers
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    _cmsAllocLogErrorChunk(ctx, NULL);
+    _cmsAllocAlarmCodesChunk(ctx, NULL);
+    _cmsAllocAdaptationStateChunk(ctx, NULL);
+    _cmsAllocMemPluginChunk(ctx, NULL);
+    _cmsAllocInterpPluginChunk(ctx, NULL);
+    _cmsAllocCurvesPluginChunk(ctx, NULL);
+    _cmsAllocFormattersPluginChunk(ctx, NULL);
+    _cmsAllocTagTypePluginChunk(ctx, NULL);
+    _cmsAllocMPETypePluginChunk(ctx, NULL);
+    _cmsAllocTagPluginChunk(ctx, NULL);
+    _cmsAllocIntentsPluginChunk(ctx, NULL);
+    _cmsAllocOptimizationPluginChunk(ctx, NULL);
+    _cmsAllocTransformPluginChunk(ctx, NULL);
+    _cmsAllocMutexPluginChunk(ctx, NULL);
+
+    // Setup the plug-ins
+    if (!cmsPluginTHR(ctx, Plugin)) {
+    
+        cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    return (cmsContext) ctx;  
+}
+
+// Duplicates a context with all associated plug-ins. 
+// Caller may specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger. 
+cmsContext CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData)
+{
+    int i;
+    struct _cmsContext_struct* ctx;
+    const struct _cmsContext_struct* src = _cmsGetContext(ContextID);
+
+    void* userData = (NewUserData != NULL) ? NewUserData : src -> chunks[UserPtr];
+    
+    
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(ContextID, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened
+
+    // Setup default memory allocators
+    memcpy(&ctx->DefaultMemoryManager, &src->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+    // Maintain the linked list
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]    = userData;
+    ctx ->chunks[MemPlugin]  = &ctx->DefaultMemoryManager;
+
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    // Allocate all required chunks.
+    _cmsAllocLogErrorChunk(ctx, src);
+    _cmsAllocAlarmCodesChunk(ctx, src);
+    _cmsAllocAdaptationStateChunk(ctx, src);
+    _cmsAllocMemPluginChunk(ctx, src);
+    _cmsAllocInterpPluginChunk(ctx, src);
+    _cmsAllocCurvesPluginChunk(ctx, src);
+    _cmsAllocFormattersPluginChunk(ctx, src);
+    _cmsAllocTagTypePluginChunk(ctx, src);
+    _cmsAllocMPETypePluginChunk(ctx, src);
+    _cmsAllocTagPluginChunk(ctx, src);
+    _cmsAllocIntentsPluginChunk(ctx, src);
+    _cmsAllocOptimizationPluginChunk(ctx, src);
+    _cmsAllocTransformPluginChunk(ctx, src);
+    _cmsAllocMutexPluginChunk(ctx, src);
+
+    // Make sure no one failed
+    for (i=Logger; i < MemoryClientMax; i++) {
+
+        if (src ->chunks[i] == NULL) {
+            cmsDeleteContext((cmsContext) ctx);
+            return NULL;
+        }
+    }
+
+    return (cmsContext) ctx;
+}
+
+
+/*
+static
+struct _cmsContext_struct* FindPrev(struct _cmsContext_struct* id)
+{
+    struct _cmsContext_struct* prev;
+
+    // Search for previous
+    for (prev = _cmsContextPoolHead; 
+             prev != NULL;
+             prev = prev ->Next)
+    {
+        if (prev ->Next == id)
+            return prev;
+    }
+
+    return NULL;  // List is empty or only one element!
+}
+*/
+
+// Frees any resources associated with the given context, 
+// and destroys the context placeholder. 
+// The ContextID can no longer be used in any THR operation.  
+void CMSEXPORT cmsDeleteContext(cmsContext ContextID)
+{
+    if (ContextID != NULL) {
+
+        struct _cmsContext_struct* ctx = (struct _cmsContext_struct*) ContextID;              
+        struct _cmsContext_struct  fakeContext;  
+        struct _cmsContext_struct* prev;
+
+        memcpy(&fakeContext.DefaultMemoryManager, &ctx->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+        fakeContext.chunks[UserPtr]     = ctx ->chunks[UserPtr];
+        fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+        // Get rid of plugins
+        cmsUnregisterPluginsTHR(ContextID); 
+
+        // Since all memory is allocated in the private pool, all what we need to do is destroy the pool
+        if (ctx -> MemPool != NULL)
+              _cmsSubAllocDestroy(ctx ->MemPool);
+        ctx -> MemPool = NULL;
+
+        // Maintain list
+        _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+        if (_cmsContextPoolHead == ctx) { 
+
+            _cmsContextPoolHead = ctx->Next;
+        }
+        else {
+
+            // Search for previous
+            for (prev = _cmsContextPoolHead; 
+                 prev != NULL;
+                 prev = prev ->Next)
+            {
+                if (prev -> Next == ctx) {
+                    prev -> Next = ctx ->Next;
+                    break;
+                }
+            }
+        }
+        _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+        // free the memory block itself
+        _cmsFree(&fakeContext, ctx);
+    }
+}
+
+// Returns the user data associated to the given ContextID, or NULL if no user data was attached on context creation
+void* CMSEXPORT cmsGetContextUserData(cmsContext ContextID)
+{
+    return _cmsContextGetClientChunk(ContextID, UserPtr);
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsps2.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsps2.c
new file mode 100644
index 0000000000..deab55d11f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsps2.c
@@ -0,0 +1,1597 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray
+
+
+#define MAXPSCOLS   60      // Columns on tables
+
+/*
+    Implementation
+    --------------
+
+  PostScript does use XYZ as its internal PCS. But since PostScript
+  interpolation tables are limited to 8 bits, I use Lab as a way to
+  improve the accuracy, favoring perceptual results. So, for the creation
+  of each CRD, CSA the profiles are converted to Lab via a device
+  link between  profile -> Lab or Lab -> profile. The PS code necessary to
+  convert Lab <-> XYZ is also included.
+
+
+
+  Color Space Arrays (CSA)
+  ==================================================================================
+
+  In order to obtain precision, code chooses between three ways to implement
+  the device -> XYZ transform. These cases identifies monochrome profiles (often
+  implemented as a set of curves), matrix-shaper and Pipeline-based.
+
+  Monochrome
+  -----------
+
+  This is implemented as /CIEBasedA CSA. The prelinearization curve is
+  placed into /DecodeA section, and matrix equals to D50. Since here is
+  no interpolation tables, I do the conversion directly to XYZ
+
+  NOTE: CLUT-based monochrome profiles are NOT supported. So, cmsFLAGS_MATRIXINPUT
+  flag is forced on such profiles.
+
+    [ /CIEBasedA
+      <<
+            /DecodeA { transfer function } bind
+            /MatrixA [D50]
+            /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+      >>
+    ]
+
+   On simpler profiles, the PCS is already XYZ, so no conversion is required.
+
+
+   Matrix-shaper based
+   -------------------
+
+   This is implemented both with /CIEBasedABC or /CIEBasedDEF depending on the
+   profile implementation. Since here there are no interpolation tables, I do
+   the conversion directly to XYZ
+
+
+
+    [ /CIEBasedABC
+            <<
+                /DecodeABC [ {transfer1} {transfer2} {transfer3} ]
+                /MatrixABC [Matrix]
+                /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+                /DecodeLMN [ { / 2} dup dup ]
+                /WhitePoint [D50]
+                /BlackPoint [BP]
+                /RenderingIntent (intent)
+            >>
+    ]
+
+
+    CLUT based
+    ----------
+
+     Lab is used in such cases.
+
+    [ /CIEBasedDEF
+            <<
+            /DecodeDEF [ <prelinearization> ]
+            /Table [ p p p [<...>]]
+            /RangeABC [ 0 1 0 1 0 1]
+            /DecodeABC[ <postlinearization> ]
+            /RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]
+               % -128/500 1+127/500 0 1  -127/200 1+128/200
+            /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+    ]
+
+
+  Color Rendering Dictionaries (CRD)
+  ==================================
+  These are always implemented as CLUT, and always are using Lab. Since CRD are expected to
+  be used as resources, the code adds the definition as well.
+
+  <<
+    /ColorRenderingType 1
+    /WhitePoint [ D50 ]
+    /BlackPoint [BP]
+    /MatrixPQR [ Bradford ]
+    /RangePQR [-0.125 1.375 -0.125 1.375 -0.125 1.375 ]
+    /TransformPQR [
+    {4 index 3 get div 2 index 3 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 4 get div 2 index 4 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 5 get div 2 index 5 get mul exch pop exch pop exch pop exch pop } bind
+    ]
+    /MatrixABC <...>
+    /EncodeABC <...>
+    /RangeABC  <.. used for  XYZ -> Lab>
+    /EncodeLMN
+    /RenderTable [ p p p [<...>]]
+
+    /RenderingIntent (Perceptual)
+  >>
+  /Current exch /ColorRendering defineresource pop
+
+
+  The following stages are used to convert from XYZ to Lab
+  --------------------------------------------------------
+
+  Input is given at LMN stage on X, Y, Z
+
+  Encode LMN gives us f(X/Xn), f(Y/Yn), f(Z/Zn)
+
+  /EncodeLMN [
+
+    { 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+
+    ]
+
+
+  MatrixABC is used to compute f(Y/Yn), f(X/Xn) - f(Y/Yn), f(Y/Yn) - f(Z/Zn)
+
+  | 0  1  0|
+  | 1 -1  0|
+  | 0  1 -1|
+
+  /MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]
+
+ EncodeABC finally gives Lab values.
+
+  /EncodeABC [
+    { 116 mul  16 sub 100 div  } bind
+    { 500 mul 128 add 255 div  } bind
+    { 200 mul 128 add 255 div  } bind
+    ]
+
+  The following stages are used to convert Lab to XYZ
+  ----------------------------------------------------
+
+    /RangeABC [ 0 1 0 1 0 1]
+    /DecodeABC [ { 100 mul 16 add 116 div } bind
+                 { 255 mul 128 sub 500 div } bind
+                 { 255 mul 128 sub 200 div } bind
+               ]
+
+    /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+    /DecodeLMN [
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind
+                ]
+
+
+*/
+
+/*
+
+ PostScript algorithms discussion.
+ =========================================================================================================
+
+  1D interpolation algorithm
+
+
+  1D interpolation (float)
+  ------------------------
+
+    val2 = Domain * Value;
+
+    cell0 = (int) floor(val2);
+    cell1 = (int) ceil(val2);
+
+    rest = val2 - cell0;
+
+    y0 = LutTable[cell0] ;
+    y1 = LutTable[cell1] ;
+
+    y = y0 + (y1 - y0) * rest;
+
+
+
+  PostScript code                   Stack
+  ================================================
+
+  {                                 % v
+    <check 0..1.0>
+    [array]                         % v tab
+    dup                             % v tab tab
+    length 1 sub                    % v tab dom
+
+    3 -1 roll                       % tab dom v
+
+    mul                             % tab val2
+    dup                             % tab val2 val2
+    dup                             % tab val2 val2 val2
+    floor cvi                       % tab val2 val2 cell0
+    exch                            % tab val2 cell0 val2
+    ceiling cvi                     % tab val2 cell0 cell1
+
+    3 index                         % tab val2 cell0 cell1 tab
+    exch                            % tab val2 cell0 tab cell1
+    get                             % tab val2 cell0 y1
+
+    4 -1 roll                       % val2 cell0 y1 tab
+    3 -1 roll                       % val2 y1 tab cell0
+    get                             % val2 y1 y0
+
+    dup                             % val2 y1 y0 y0
+    3 1 roll                        % val2 y0 y1 y0
+
+    sub                             % val2 y0 (y1-y0)
+    3 -1 roll                       % y0 (y1-y0) val2
+    dup                             % y0 (y1-y0) val2 val2
+    floor cvi                       % y0 (y1-y0) val2 floor(val2)
+    sub                             % y0 (y1-y0) rest
+    mul                             % y0 t1
+    add                             % y
+    65535 div                       % result
+
+  } bind
+
+
+*/
+
+
+// This struct holds the memory block currently being write
+typedef struct {
+    _cmsStageCLutData* Pipeline;
+    cmsIOHANDLER* m;
+
+    int FirstComponent;
+    int SecondComponent;
+
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin;
+    const char* PostMin;
+
+    int  FixWhite;    // Force mapping of pure white
+
+    cmsColorSpaceSignature  ColorSpace;  // ColorSpace of profile
+
+
+} cmsPsSamplerCargo;
+
+static int _cmsPSActualColumn = 0;
+
+
+// Convert to byte
+static
+cmsUInt8Number Word2Byte(cmsUInt16Number w)
+{
+    return (cmsUInt8Number) floor((cmsFloat64Number) w / 257.0 + 0.5);
+}
+
+
+// Convert to byte (using ICC2 notation)
+/*
+static
+cmsUInt8Number L2Byte(cmsUInt16Number w)
+{
+    int ww = w + 0x0080;
+
+    if (ww > 0xFFFF) return 0xFF;
+
+    return (cmsUInt8Number) ((cmsUInt16Number) (ww >> 8) & 0xFF);
+}
+*/
+
+// Write a cooked byte
+
+static
+void WriteByte(cmsIOHANDLER* m, cmsUInt8Number b)
+{
+    _cmsIOPrintf(m, "%02x", b);
+    _cmsPSActualColumn += 2;
+
+    if (_cmsPSActualColumn > MAXPSCOLS) {
+
+        _cmsIOPrintf(m, "\n");
+        _cmsPSActualColumn = 0;
+    }
+}
+
+// ----------------------------------------------------------------- PostScript generation
+
+
+// Removes offending Carriage returns
+static
+char* RemoveCR(const char* txt)
+{
+    static char Buffer[2048];
+    char* pt;
+
+    strncpy(Buffer, txt, 2047);
+    Buffer[2047] = 0;
+    for (pt = Buffer; *pt; pt++)
+            if (*pt == '\n' || *pt == '\r') *pt = ' ';
+
+    return Buffer;
+
+}
+
+static
+void EmitHeader(cmsIOHANDLER* m, const char* Title, cmsHPROFILE hProfile)
+{
+    time_t timer;
+    cmsMLU *Description, *Copyright;
+    char DescASCII[256], CopyrightASCII[256];
+
+    time(&timer);
+
+    Description = (cmsMLU*) cmsReadTag(hProfile, cmsSigProfileDescriptionTag);
+    Copyright   = (cmsMLU*) cmsReadTag(hProfile, cmsSigCopyrightTag);
+
+    DescASCII[0] = DescASCII[255] = 0;
+    CopyrightASCII[0] = CopyrightASCII[255] = 0;
+
+    if (Description != NULL) cmsMLUgetASCII(Description,  cmsNoLanguage, cmsNoCountry, DescASCII,       255);
+    if (Copyright != NULL)   cmsMLUgetASCII(Copyright,    cmsNoLanguage, cmsNoCountry, CopyrightASCII,  255);
+
+    _cmsIOPrintf(m, "%%!PS-Adobe-3.0\n");
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%% %s\n", Title);
+    _cmsIOPrintf(m, "%% Source: %s\n", RemoveCR(DescASCII));
+    _cmsIOPrintf(m, "%%         %s\n", RemoveCR(CopyrightASCII));
+    _cmsIOPrintf(m, "%% Created: %s", ctime(&timer)); // ctime appends a \n!!!
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%%%%BeginResource\n");
+
+}
+
+
+// Emits White & Black point. White point is always D50, Black point is the device
+// Black point adapted to D50.
+
+static
+void EmitWhiteBlackD50(cmsIOHANDLER* m, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "/BlackPoint [%f %f %f]\n", BlackPoint -> X,
+                                          BlackPoint -> Y,
+                                          BlackPoint -> Z);
+
+    _cmsIOPrintf(m, "/WhitePoint [%f %f %f]\n", cmsD50_XYZ()->X,
+                                          cmsD50_XYZ()->Y,
+                                          cmsD50_XYZ()->Z);
+}
+
+
+static
+void EmitRangeCheck(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "dup 0.0 lt { pop 0.0 } if "
+                    "dup 1.0 gt { pop 1.0 } if ");
+
+}
+
+// Does write the intent
+
+static
+void EmitIntent(cmsIOHANDLER* m, cmsUInt32Number RenderingIntent)
+{
+    const char *intent;
+
+    switch (RenderingIntent) {
+
+        case INTENT_PERCEPTUAL:            intent = "Perceptual"; break;
+        case INTENT_RELATIVE_COLORIMETRIC: intent = "RelativeColorimetric"; break;
+        case INTENT_ABSOLUTE_COLORIMETRIC: intent = "AbsoluteColorimetric"; break;
+        case INTENT_SATURATION:            intent = "Saturation"; break;
+
+        default: intent = "Undefined"; break;
+    }
+
+    _cmsIOPrintf(m, "/RenderingIntent (%s)\n", intent );
+}
+
+//
+//  Convert L* to Y
+//
+//      Y = Yn*[ (L* + 16) / 116] ^ 3   if (L*) >= 6 / 29
+//        = Yn*( L* / 116) / 7.787      if (L*) < 6 / 29
+//
+
+/*
+static
+void EmitL2Y(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m,
+            "{ "
+                "100 mul 16 add 116 div "               // (L * 100 + 16) / 116
+                 "dup 6 29 div ge "                     // >= 6 / 29 ?
+                 "{ dup dup mul mul } "                 // yes, ^3 and done
+                 "{ 4 29 div sub 108 841 div mul } "    // no, slope limiting
+            "ifelse } bind ");
+}
+*/
+
+
+// Lab -> XYZ, see the discussion above
+
+static
+void EmitLab2XYZ(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeABC [ 0 1 0 1 0 1]\n");
+    _cmsIOPrintf(m, "/DecodeABC [\n");
+    _cmsIOPrintf(m, "{100 mul  16 add 116 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 500 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 200 div } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 1 1 1 1 0 0 0 0 -1]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]\n");
+    _cmsIOPrintf(m, "/DecodeLMN [\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind\n");
+    _cmsIOPrintf(m, "]\n");
+}
+
+
+
+// Outputs a table of words. It does use 16 bits
+
+static
+void Emit1Gamma(cmsIOHANDLER* m, cmsToneCurve* Table)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number gamma;
+
+    if (Table == NULL) return; // Error
+
+    if (Table ->nEntries <= 0) return;  // Empty table
+
+    // Suppress whole if identity
+    if (cmsIsToneCurveLinear(Table)) return;
+
+    // Check if is really an exponential. If so, emit "exp"
+    gamma = cmsEstimateGamma(Table, 0.001);
+     if (gamma > 0) {
+            _cmsIOPrintf(m, "{ %g exp } bind ", gamma);
+            return;
+     }
+
+    _cmsIOPrintf(m, "{ ");
+
+    // Bounds check
+    EmitRangeCheck(m);
+
+    // Emit intepolation code
+
+    // PostScript code                      Stack
+    // ===============                      ========================
+                                            // v
+    _cmsIOPrintf(m, " [");
+
+    for (i=0; i < Table->nEntries; i++) {
+        _cmsIOPrintf(m, "%d ", Table->Table16[i]);
+    }
+
+    _cmsIOPrintf(m, "] ");                        // v tab
+
+    _cmsIOPrintf(m, "dup ");                      // v tab tab
+    _cmsIOPrintf(m, "length 1 sub ");             // v tab dom
+    _cmsIOPrintf(m, "3 -1 roll ");                // tab dom v
+    _cmsIOPrintf(m, "mul ");                      // tab val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // tab val2 val2 cell0
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 val2
+    _cmsIOPrintf(m, "ceiling cvi ");              // tab val2 cell0 cell1
+    _cmsIOPrintf(m, "3 index ");                  // tab val2 cell0 cell1 tab
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 tab cell1
+    _cmsIOPrintf(m, "get ");                      // tab val2 cell0 y1
+    _cmsIOPrintf(m, "4 -1 roll ");                // val2 cell0 y1 tab
+    _cmsIOPrintf(m, "3 -1 roll ");                // val2 y1 tab cell0
+    _cmsIOPrintf(m, "get ");                      // val2 y1 y0
+    _cmsIOPrintf(m, "dup ");                      // val2 y1 y0 y0
+    _cmsIOPrintf(m, "3 1 roll ");                 // val2 y0 y1 y0
+    _cmsIOPrintf(m, "sub ");                      // val2 y0 (y1-y0)
+    _cmsIOPrintf(m, "3 -1 roll ");                // y0 (y1-y0) val2
+    _cmsIOPrintf(m, "dup ");                      // y0 (y1-y0) val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // y0 (y1-y0) val2 floor(val2)
+    _cmsIOPrintf(m, "sub ");                      // y0 (y1-y0) rest
+    _cmsIOPrintf(m, "mul ");                      // y0 t1
+    _cmsIOPrintf(m, "add ");                      // y
+    _cmsIOPrintf(m, "65535 div ");                // result
+
+    _cmsIOPrintf(m, " } bind ");
+}
+
+
+// Compare gamma table
+
+static
+cmsBool GammaTableEquals(cmsUInt16Number* g1, cmsUInt16Number* g2, cmsUInt32Number nEntries)
+{
+    return memcmp(g1, g2, nEntries* sizeof(cmsUInt16Number)) == 0;
+}
+
+
+// Does write a set of gamma curves
+
+static
+void EmitNGamma(cmsIOHANDLER* m, cmsUInt32Number n, cmsToneCurve* g[])
+{
+    cmsUInt32Number i;
+
+    for( i=0; i < n; i++ )
+    {
+        if (g[i] == NULL) return; // Error
+
+        if (i > 0 && GammaTableEquals(g[i-1]->Table16, g[i]->Table16, g[i]->nEntries)) {
+
+            _cmsIOPrintf(m, "dup ");
+        }
+        else {
+            Emit1Gamma(m, g[i]);
+        }
+    }
+
+}
+
+
+
+
+
+// Following code dumps a LUT onto memory stream
+
+
+// This is the sampler. Intended to work in SAMPLER_INSPECT mode,
+// that is, the callback will be called for each knot with
+//
+//          In[]  The grid location coordinates, normalized to 0..ffff
+//          Out[] The Pipeline values, normalized to 0..ffff
+//
+//  Returning a value other than 0 does terminate the sampling process
+//
+//  Each row contains Pipeline values for all but first component. So, I
+//  detect row changing by keeping a copy of last value of first
+//  component. -1 is used to mark beginning of whole block.
+
+static
+int OutputValueSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsPsSamplerCargo* sc = (cmsPsSamplerCargo*) Cargo;
+    cmsUInt32Number i;
+
+
+    if (sc -> FixWhite) {
+
+        if (In[0] == 0xFFFF) {  // Only in L* = 100, ab = [-8..8]
+
+            if ((In[1] >= 0x7800 && In[1] <= 0x8800) &&
+                (In[2] >= 0x7800 && In[2] <= 0x8800)) {
+
+                cmsUInt16Number* Black;
+                cmsUInt16Number* White;
+                cmsUInt32Number nOutputs;
+
+                if (!_cmsEndPointsBySpace(sc ->ColorSpace, &White, &Black, &nOutputs))
+                        return 0;
+
+                for (i=0; i < nOutputs; i++)
+                        Out[i] = White[i];
+            }
+
+
+        }
+    }
+
+
+    // Hadle the parenthesis on rows
+
+    if (In[0] != sc ->FirstComponent) {
+
+            if (sc ->FirstComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+                    sc ->SecondComponent = -1;
+                    _cmsIOPrintf(sc ->m, sc ->PostMaj);
+            }
+
+            // Begin block
+            _cmsPSActualColumn = 0;
+
+            _cmsIOPrintf(sc ->m, sc ->PreMaj);
+            sc ->FirstComponent = In[0];
+    }
+
+
+      if (In[1] != sc ->SecondComponent) {
+
+            if (sc ->SecondComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+            }
+
+            _cmsIOPrintf(sc ->m, sc ->PreMin);
+            sc ->SecondComponent = In[1];
+    }
+
+      // Dump table.
+
+      for (i=0; i < sc -> Pipeline ->Params->nOutputs; i++) {
+
+          cmsUInt16Number wWordOut = Out[i];
+          cmsUInt8Number wByteOut;           // Value as byte
+
+
+          // We always deal with Lab4
+
+          wByteOut = Word2Byte(wWordOut);
+          WriteByte(sc -> m, wByteOut);
+      }
+
+      return 1;
+}
+
+// Writes a Pipeline on memstream. Could be 8 or 16 bits based
+
+static
+void WriteCLUT(cmsIOHANDLER* m, cmsStage* mpe, const char* PreMaj,
+                                             const char* PostMaj,
+                                             const char* PreMin,
+                                             const char* PostMin,
+                                             int FixWhite,
+                                             cmsColorSpaceSignature ColorSpace)
+{
+    cmsUInt32Number i;
+    cmsPsSamplerCargo sc;
+
+    sc.FirstComponent = -1;
+    sc.SecondComponent = -1;
+    sc.Pipeline = (_cmsStageCLutData *) mpe ->Data;
+    sc.m   = m;
+    sc.PreMaj = PreMaj;
+    sc.PostMaj= PostMaj;
+
+    sc.PreMin   = PreMin;
+    sc.PostMin  = PostMin;
+    sc.FixWhite = FixWhite;
+    sc.ColorSpace = ColorSpace;
+
+    _cmsIOPrintf(m, "[");
+
+    for (i=0; i < sc.Pipeline->Params->nInputs; i++)
+        _cmsIOPrintf(m, " %d ", sc.Pipeline->Params->nSamples[i]);
+
+    _cmsIOPrintf(m, " [\n");
+
+    cmsStageSampleCLut16bit(mpe, OutputValueSampler, (void*) &sc, SAMPLER_INSPECT);
+
+    _cmsIOPrintf(m, PostMin);
+    _cmsIOPrintf(m, PostMaj);
+    _cmsIOPrintf(m, "] ");
+
+}
+
+
+// Dumps CIEBasedA Color Space Array
+
+static
+int EmitCIEBasedA(cmsIOHANDLER* m, cmsToneCurve* Curve, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "[ /CIEBasedA\n");
+    _cmsIOPrintf(m, "  <<\n");
+
+    _cmsIOPrintf(m, "/DecodeA ");
+
+    Emit1Gamma(m, Curve);
+
+    _cmsIOPrintf(m, " \n");
+
+    _cmsIOPrintf(m, "/MatrixA [ 0.9642 1.0000 0.8249 ]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+
+// Dumps CIEBasedABC Color Space Array
+
+static
+int EmitCIEBasedABC(cmsIOHANDLER* m, cmsFloat64Number* Matrix, cmsToneCurve** CurveSet, cmsCIEXYZ* BlackPoint)
+{
+    int i;
+
+    _cmsIOPrintf(m, "[ /CIEBasedABC\n");
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/DecodeABC [ ");
+
+    EmitNGamma(m, 3, CurveSet);
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/MatrixABC [ " );
+
+    for( i=0; i < 3; i++ ) {
+
+        _cmsIOPrintf(m, "%.6f %.6f %.6f ", Matrix[i + 3*0],
+                                           Matrix[i + 3*1],
+                                           Matrix[i + 3*2]);
+    }
+
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+
+    return 1;
+}
+
+
+static
+int EmitCIEBasedDEF(cmsIOHANDLER* m, cmsPipeline* Pipeline, cmsUInt32Number Intent, cmsCIEXYZ* BlackPoint)
+{
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin, *PostMin;
+    cmsStage* mpe;
+
+    mpe = Pipeline ->Elements;
+
+    switch (cmsStageInputChannels(mpe)) {
+    case 3:
+
+            _cmsIOPrintf(m, "[ /CIEBasedDEF\n");
+            PreMaj ="<";
+            PostMaj= ">\n";
+            PreMin = PostMin = "";
+            break;
+    case 4:
+            _cmsIOPrintf(m, "[ /CIEBasedDEFG\n");
+            PreMaj = "[";
+            PostMaj = "]\n";
+            PreMin = "<";
+            PostMin = ">\n";
+            break;
+    default:
+            return 0;
+
+    }
+
+    _cmsIOPrintf(m, "<<\n");
+
+    if (cmsStageType(mpe) == cmsSigCurveSetElemType) {
+
+        _cmsIOPrintf(m, "/DecodeDEF [ ");
+        EmitNGamma(m, cmsStageOutputChannels(mpe), _cmsStageGetPtrToCurveSet(mpe));
+        _cmsIOPrintf(m, "]\n");
+
+        mpe = mpe ->Next;
+    }
+
+    if (cmsStageType(mpe) == cmsSigCLutElemType) {
+
+            _cmsIOPrintf(m, "/Table ");
+            WriteCLUT(m, mpe, PreMaj, PostMaj, PreMin, PostMin, FALSE, (cmsColorSpaceSignature) 0);
+            _cmsIOPrintf(m, "]\n");
+    }
+
+    EmitLab2XYZ(m);
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, "   >>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+// Generates a curve from a gray profile
+
+static
+cmsToneCurve* ExtractGray2Y(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsToneCurve* Out = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+    cmsHPROFILE hXYZ  = cmsCreateXYZProfile();
+    cmsHTRANSFORM xform = cmsCreateTransformTHR(ContextID, hProfile, TYPE_GRAY_8, hXYZ, TYPE_XYZ_DBL, Intent, cmsFLAGS_NOOPTIMIZE);
+    int i;
+
+    if (Out != NULL && xform != NULL) {
+        for (i=0; i < 256; i++) {
+
+            cmsUInt8Number Gray = (cmsUInt8Number) i;
+            cmsCIEXYZ XYZ;
+
+            cmsDoTransform(xform, &Gray, &XYZ, 1);
+
+            Out ->Table16[i] =_cmsQuickSaturateWord(XYZ.Y * 65535.0);
+        }
+    }
+
+    if (xform) cmsDeleteTransform(xform);
+    if (hXYZ) cmsCloseProfile(hXYZ);
+    return Out;
+}
+
+
+
+// Because PostScript has only 8 bits in /Table, we should use
+// a more perceptually uniform space... I do choose Lab.
+
+static
+int WriteInputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number InputFormat;
+    int rc;
+    cmsHPROFILE Profiles[2];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    // Does create a device-link based transform.
+    // The DeviceLink is next dumped as working CSA.
+
+    InputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels   = T_CHANNELS(InputFormat);
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Adjust output to Lab4
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+
+    Profiles[0] = hProfile;
+    Profiles[1] = hLab;
+
+    xform = cmsCreateMultiprofileTransform(Profiles, 2,  InputFormat, TYPE_Lab_DBL, Intent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Profile -> Lab");
+        return 0;
+    }
+
+    // Only 1, 3 and 4 channels are allowed
+
+    switch (nChannels) {
+
+    case 1: {
+            cmsToneCurve* Gray2Y = ExtractGray2Y(m ->ContextID, hProfile, Intent);
+            EmitCIEBasedA(m, Gray2Y, &BlackPointAdaptedToD50);
+            cmsFreeToneCurve(Gray2Y);
+            }
+            break;
+
+    case 3:
+    case 4: {
+            cmsUInt32Number OutFrm = TYPE_Lab_16;
+            cmsPipeline* DeviceLink;
+            _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+
+            DeviceLink = cmsPipelineDup(v ->Lut);
+            if (DeviceLink == NULL) return 0;
+
+            dwFlags |= cmsFLAGS_FORCE_CLUT;
+            _cmsOptimizePipeline(m->ContextID, &DeviceLink, Intent, &InputFormat, &OutFrm, &dwFlags);
+
+            rc = EmitCIEBasedDEF(m, DeviceLink, Intent, &BlackPointAdaptedToD50);
+            cmsPipelineFree(DeviceLink);
+            if (rc == 0) return 0;
+            }
+            break;
+
+    default:
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Only 3, 4 channels supported for CSA. This profile has %d channels.", nChannels);
+        return 0;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+static
+cmsFloat64Number* GetPtrToMatrix(const cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+
+    return Data -> Double;
+}
+
+
+// Does create CSA based on matrix-shaper. Allowed types are gray and RGB based
+static
+int WriteInputMatrixShaper(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsStage* Matrix, cmsStage* Shaper)
+{
+    cmsColorSpaceSignature ColorSpace;
+    int rc;
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+    if (ColorSpace == cmsSigGrayData) {
+
+        cmsToneCurve** ShaperCurve = _cmsStageGetPtrToCurveSet(Shaper);
+        rc = EmitCIEBasedA(m, ShaperCurve[0], &BlackPointAdaptedToD50);
+
+    }
+    else
+        if (ColorSpace == cmsSigRgbData) {
+
+            cmsMAT3 Mat;
+            int i, j;
+
+            memmove(&Mat, GetPtrToMatrix(Matrix), sizeof(Mat));
+
+            for (i = 0; i < 3; i++)
+                for (j = 0; j < 3; j++)
+                    Mat.v[i].n[j] *= MAX_ENCODEABLE_XYZ;
+
+            rc = EmitCIEBasedABC(m, (cmsFloat64Number *)&Mat,
+                _cmsStageGetPtrToCurveSet(Shaper),
+                &BlackPointAdaptedToD50);
+        }
+        else {
+
+            cmsSignalError(m->ContextID, cmsERROR_COLORSPACE_CHECK, "Profile is not suitable for CSA. Unsupported colorspace.");
+            return 0;
+        }
+
+    return rc;
+}
+
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension, and it works in Lab instead of XYZ
+
+static
+int WriteNamedColorCSA(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsUInt32Number i, nColors;
+    char ColorName[cmsMAX_PATH];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+    hLab  = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, hLab, TYPE_Lab_DBL, Intent, 0);
+    if (xform == NULL) return 0;
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s)\n", "Named color CSA");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsCIELab Lab;
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, &Lab, 1);
+        _cmsIOPrintf(m, "  (%s) [ %.3f %.3f %.3f ]\n", ColorName, Lab.L, Lab.a, Lab.b);
+    }
+
+
+
+    _cmsIOPrintf(m, ">>\n");
+
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(hLab);
+    return 1;
+}
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+static
+cmsUInt32Number GenerateCSA(cmsContext ContextID,
+                            cmsHPROFILE hProfile,
+                            cmsUInt32Number Intent,
+                            cmsUInt32Number dwFlags,
+                            cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+    cmsPipeline* lut = NULL;
+    cmsStage* Matrix, *Shaper;
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCSA(mem, hProfile, Intent)) goto Error;
+    }
+    else {
+
+
+        // Any profile class are allowed (including devicelink), but
+        // output (PCS) colorspace must be XYZ or Lab
+        cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+        if (ColorSpace != cmsSigXYZData &&
+            ColorSpace != cmsSigLabData) {
+
+                cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Invalid output color space");
+                goto Error;
+        }
+
+
+        // Read the lut with all necessary conversion stages
+        lut = _cmsReadInputLUT(hProfile, Intent);
+        if (lut == NULL) goto Error;
+
+
+        // Tone curves + matrix can be implemented without any LUT
+        if (cmsPipelineCheckAndRetreiveStages(lut, 2, cmsSigCurveSetElemType, cmsSigMatrixElemType, &Shaper, &Matrix)) {
+
+            if (!WriteInputMatrixShaper(mem, hProfile, Matrix, Shaper)) goto Error;
+
+        }
+        else {
+           // We need a LUT for the rest
+           if (!WriteInputLUT(mem, hProfile, Intent, dwFlags)) goto Error;
+        }
+    }
+
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Get rid of LUT
+    if (lut != NULL) cmsPipelineFree(lut);
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+Error:
+    if (lut != NULL) cmsPipelineFree(lut);
+    return 0;
+}
+
+// ------------------------------------------------------ Color Rendering Dictionary (CRD)
+
+
+
+/*
+
+  Black point compensation plus chromatic adaptation:
+
+  Step 1 - Chromatic adaptation
+  =============================
+
+          WPout
+    X = ------- PQR
+          Wpin
+
+  Step 2 - Black point compensation
+  =================================
+
+          (WPout - BPout)*X - WPout*(BPin - BPout)
+    out = ---------------------------------------
+                        WPout - BPin
+
+
+  Algorithm discussion
+  ====================
+
+  TransformPQR(WPin, BPin, WPout, BPout, PQR)
+
+  Wpin,etc= { Xws Yws Zws Pws Qws Rws }
+
+
+  Algorithm             Stack 0...n
+  ===========================================================
+                        PQR BPout WPout BPin WPin
+  4 index 3 get         WPin PQR BPout WPout BPin WPin
+  div                   (PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         WPout (PQR/WPin) BPout WPout BPin WPin
+  mult                  WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         BPout WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (WPout-BPout) WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (WPout-BPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  4 index 3 get         BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  3 index 3 get         BPout BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  sub                   (BPin-BPout) WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (BPin-BPout)*WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+
+  3 index 3 get         BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  3 index 3 get         WPout BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  exch
+  sub                   (WPout-BPin) (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  div
+
+  exch pop
+  exch pop
+  exch pop
+  exch pop
+
+*/
+
+
+static
+void EmitPQRStage(cmsIOHANDLER* m, cmsHPROFILE hProfile, int DoBPC, int lIsAbsolute)
+{
+
+
+        if (lIsAbsolute) {
+
+            // For absolute colorimetric intent, encode back to relative
+            // and generate a relative Pipeline
+
+            // Relative encoding is obtained across XYZpcs*(D50/WhitePoint)
+
+            cmsCIEXYZ White;
+
+            _cmsReadMediaWhitePoint(&White, hProfile);
+
+            _cmsIOPrintf(m,"/MatrixPQR [1 0 0 0 1 0 0 0 1 ]\n");
+            _cmsIOPrintf(m,"/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+            _cmsIOPrintf(m, "%% Absolute colorimetric -- encode to relative to maximize LUT usage\n"
+                      "/TransformPQR [\n"
+                      "{0.9642 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{1.0000 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{0.8249 mul %g div exch pop exch pop exch pop exch pop} bind\n]\n",
+                      White.X, White.Y, White.Z);
+            return;
+        }
+
+
+        _cmsIOPrintf(m,"%% Bradford Cone Space\n"
+                 "/MatrixPQR [0.8951 -0.7502 0.0389 0.2664 1.7135 -0.0685 -0.1614 0.0367 1.0296 ] \n");
+
+        _cmsIOPrintf(m, "/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+
+        // No BPC
+
+        if (!DoBPC) {
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space\n"
+                      "/TransformPQR [\n"
+                      "{exch pop exch 3 get mul exch pop exch 3 get div} bind\n"
+                      "{exch pop exch 4 get mul exch pop exch 4 get div} bind\n"
+                      "{exch pop exch 5 get mul exch pop exch 5 get div} bind\n]\n");
+        } else {
+
+            // BPC
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space plus BPC\n"
+                      "/TransformPQR [\n");
+
+            _cmsIOPrintf(m, "{4 index 3 get div 2 index 3 get mul "
+                    "2 index 3 get 2 index 3 get sub mul "
+                    "2 index 3 get 4 index 3 get 3 index 3 get sub mul sub "
+                    "3 index 3 get 3 index 3 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 4 get div 2 index 4 get mul "
+                    "2 index 4 get 2 index 4 get sub mul "
+                    "2 index 4 get 4 index 4 get 3 index 4 get sub mul sub "
+                    "3 index 4 get 3 index 4 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 5 get div 2 index 5 get mul "
+                    "2 index 5 get 2 index 5 get sub mul "
+                    "2 index 5 get 4 index 5 get 3 index 5 get sub mul sub "
+                    "3 index 5 get 3 index 5 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n]\n");
+
+        }
+
+
+}
+
+
+static
+void EmitXYZ2Lab(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeLMN [ -0.635 2.0 0 2 -0.635 2.0 ]\n");
+    _cmsIOPrintf(m, "/EncodeLMN [\n");
+    _cmsIOPrintf(m, "{ 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]\n");
+    _cmsIOPrintf(m, "/EncodeABC [\n");
+
+
+    _cmsIOPrintf(m, "{ 116 mul  16 sub 100 div  } bind\n");
+    _cmsIOPrintf(m, "{ 500 mul 128 add 256 div  } bind\n");
+    _cmsIOPrintf(m, "{ 200 mul 128 add 256 div  } bind\n");
+
+
+    _cmsIOPrintf(m, "]\n");
+
+
+}
+
+// Due to impedance mismatch between XYZ and almost all RGB and CMYK spaces
+// I choose to dump LUTS in Lab instead of XYZ. There is still a lot of wasted
+// space on 3D CLUT, but since space seems not to be a problem here, 33 points
+// would give a reasonable accuracy. Note also that CRD tables must operate in
+// 8 bits.
+
+static
+int WriteOutputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nChannels;
+    cmsUInt32Number OutputFormat;
+    _cmsTRANSFORM* v;
+    cmsPipeline* DeviceLink;
+    cmsHPROFILE Profiles[3];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+    cmsBool lDoBPC = (cmsBool) (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION);
+    cmsBool lFixWhite = (cmsBool) !(dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP);
+    cmsUInt32Number InFrm = TYPE_Lab_16;
+    cmsUInt32Number RelativeEncodingIntent;
+    cmsColorSpaceSignature ColorSpace;
+
+
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    if (hLab == NULL) return 0;
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels    = T_CHANNELS(OutputFormat);
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    // For absolute colorimetric, the LUT is encoded as relative in order to preserve precision.
+
+    RelativeEncodingIntent = Intent;
+    if (RelativeEncodingIntent == INTENT_ABSOLUTE_COLORIMETRIC)
+        RelativeEncodingIntent = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    // Use V4 Lab always
+    Profiles[0] = hLab;
+    Profiles[1] = hProfile;
+
+    xform = cmsCreateMultiprofileTransformTHR(m ->ContextID,
+                                              Profiles, 2, TYPE_Lab_DBL,
+                                              OutputFormat, RelativeEncodingIntent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Lab -> Profile in CRD creation");
+        return 0;
+    }
+
+    // Get a copy of the internal devicelink
+    v = (_cmsTRANSFORM*) xform;
+    DeviceLink = cmsPipelineDup(v ->Lut);
+    if (DeviceLink == NULL) return 0;
+
+
+    // We need a CLUT
+    dwFlags |= cmsFLAGS_FORCE_CLUT;
+    _cmsOptimizePipeline(m->ContextID, &DeviceLink, RelativeEncodingIntent, &InFrm, &OutputFormat, &dwFlags);
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/ColorRenderingType 1\n");
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Emit headers, etc.
+    EmitWhiteBlackD50(m, &BlackPointAdaptedToD50);
+    EmitPQRStage(m, hProfile, lDoBPC, Intent == INTENT_ABSOLUTE_COLORIMETRIC);
+    EmitXYZ2Lab(m);
+
+
+    // FIXUP: map Lab (100, 0, 0) to perfect white, because the particular encoding for Lab
+    // does map a=b=0 not falling into any specific node. Since range a,b goes -128..127,
+    // zero is slightly moved towards right, so assure next node (in L=100 slice) is mapped to
+    // zero. This would sacrifice a bit of highlights, but failure to do so would cause
+    // scum dot. Ouch.
+
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+            lFixWhite = FALSE;
+
+    _cmsIOPrintf(m, "/RenderTable ");
+
+
+    WriteCLUT(m, cmsPipelineGetPtrToFirstStage(DeviceLink), "<", ">\n", "", "", lFixWhite, ColorSpace);
+
+    _cmsIOPrintf(m, " %d {} bind ", nChannels);
+
+    for (i=1; i < nChannels; i++)
+            _cmsIOPrintf(m, "dup ");
+
+    _cmsIOPrintf(m, "]\n");
+
+
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, ">>\n");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(m, "/Current exch /ColorRendering defineresource pop\n");
+    }
+
+    cmsPipelineFree(DeviceLink);
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+
+// Builds a ASCII string containing colorant list in 0..1.0 range
+static
+void BuildColorantList(char *Colorant, cmsUInt32Number nColorant, cmsUInt16Number Out[])
+{
+    char Buff[32];
+    cmsUInt32Number j;
+
+    Colorant[0] = 0;
+    if (nColorant > cmsMAXCHANNELS)
+        nColorant = cmsMAXCHANNELS;
+
+    for (j = 0; j < nColorant; j++) {
+
+        snprintf(Buff, 31, "%.3f", Out[j] / 65535.0);
+        Buff[31] = 0;
+        strcat(Colorant, Buff);
+        if (j < nColorant - 1)
+            strcat(Colorant, " ");
+
+    }
+}
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension.
+
+static
+int WriteNamedColorCRD(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nColors, nColorant;
+    cmsUInt32Number OutputFormat;
+    char ColorName[cmsMAX_PATH];
+    char Colorant[128];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hNamedColor, 2, FALSE);
+    nColorant    = T_CHANNELS(OutputFormat);
+
+
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, NULL, OutputFormat, Intent, dwFlags);
+    if (xform == NULL) return 0;
+
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s) \n", "Named profile");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsUInt16Number Out[cmsMAXCHANNELS];
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, Out, 1);
+        BuildColorantList(Colorant, nColorant, Out);
+        _cmsIOPrintf(m, "  (%s) [ %s ]\n", ColorName, Colorant);
+    }
+
+    _cmsIOPrintf(m, "   >>");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+    _cmsIOPrintf(m, " /Current exch /HPSpotTable defineresource pop\n");
+    }
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+
+// This one does create a Color Rendering Dictionary.
+// CRD are always LUT-Based, no matter if profile is
+// implemented as matrix-shaper.
+
+static
+cmsUInt32Number  GenerateCRD(cmsContext ContextID,
+                             cmsHPROFILE hProfile,
+                             cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                             cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        EmitHeader(mem, "Color Rendering Dictionary (CRD)", hProfile);
+    }
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCRD(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+    else {
+
+        // CRD are always implemented as LUT
+
+        if (!WriteOutputLUT(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(mem, "%%%%EndResource\n");
+        _cmsIOPrintf(mem, "\n%% CRD End\n");
+    }
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                               cmsPSResourceType Type,
+                                                               cmsHPROFILE hProfile,
+                                                               cmsUInt32Number Intent,
+                                                               cmsUInt32Number dwFlags,
+                                                               cmsIOHANDLER* io)
+{
+    cmsUInt32Number  rc;
+
+
+    switch (Type) {
+
+        case cmsPS_RESOURCE_CSA:
+            rc = GenerateCSA(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+
+        default:
+        case cmsPS_RESOURCE_CRD:
+            rc = GenerateCRD(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+    }
+
+    return rc;
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID,
+                              cmsHPROFILE hProfile,
+                              cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                              void* Buffer, cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    // Set up the serialization engine
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CRD, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+}
+
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID,
+                                              cmsHPROFILE hProfile,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags,
+                                              void* Buffer,
+                                              cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CSA, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmssamp.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmssamp.c
new file mode 100644
index 0000000000..1fc5f5d467
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmssamp.c
@@ -0,0 +1,547 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define cmsmin(a, b) (((a) < (b)) ? (a) : (b))
+#define cmsmax(a, b) (((a) > (b)) ? (a) : (b))
+
+// This file contains routines for resampling and LUT optimization, black point detection
+// and black preservation.
+
+// Black point detection -------------------------------------------------------------------------
+
+
+// PCS -> PCS round trip transform, always uses relative intent on the device -> pcs
+static
+cmsHTRANSFORM CreateRoundtripXForm(cmsHPROFILE hProfile, cmsUInt32Number nIntent)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsHPROFILE hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    cmsHTRANSFORM xform;
+    cmsBool BPC[4] = { FALSE, FALSE, FALSE, FALSE };
+    cmsFloat64Number States[4] = { 1.0, 1.0, 1.0, 1.0 };
+    cmsHPROFILE hProfiles[4];
+    cmsUInt32Number Intents[4];
+
+    hProfiles[0] = hLab; hProfiles[1] = hProfile; hProfiles[2] = hProfile; hProfiles[3] = hLab;
+    Intents[0]   = INTENT_RELATIVE_COLORIMETRIC; Intents[1] = nIntent; Intents[2] = INTENT_RELATIVE_COLORIMETRIC; Intents[3] = INTENT_RELATIVE_COLORIMETRIC;
+
+    xform =  cmsCreateExtendedTransform(ContextID, 4, hProfiles, BPC, Intents,
+        States, NULL, 0, TYPE_Lab_DBL, TYPE_Lab_DBL, cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+
+    cmsCloseProfile(hLab);
+    return xform;
+}
+
+// Use darker colorants to obtain black point. This works in the relative colorimetric intent and
+// assumes more ink results in darker colors. No ink limit is assumed.
+static
+cmsBool  BlackPointAsDarkerColorant(cmsHPROFILE    hInput,
+                                    cmsUInt32Number Intent,
+                                    cmsCIEXYZ* BlackPoint,
+                                    cmsUInt32Number dwFlags)
+{
+    cmsUInt16Number *Black;
+    cmsHTRANSFORM xform;
+    cmsColorSpaceSignature Space;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number dwFormat;
+    cmsHPROFILE hLab;
+    cmsCIELab  Lab;
+    cmsCIEXYZ  BlackXYZ;
+    cmsContext ContextID = cmsGetProfileContextID(hInput);
+
+    // If the profile does not support input direction, assume Black point 0
+    if (!cmsIsIntentSupported(hInput, Intent, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Create a formatter which has n channels and floating point
+    dwFormat = cmsFormatterForColorspaceOfProfile(hInput, 2, FALSE);
+
+   // Try to get black by using black colorant
+    Space = cmsGetColorSpace(hInput);
+
+    // This function returns darker colorant in 16 bits for several spaces
+    if (!_cmsEndPointsBySpace(Space, NULL, &Black, &nChannels)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    if (nChannels != T_CHANNELS(dwFormat)) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Lab will be used as the output space, but lab2 will avoid recursion
+    hLab = cmsCreateLab2ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Create the transform
+    xform = cmsCreateTransformTHR(ContextID, hInput, dwFormat,
+                                hLab, TYPE_Lab_DBL, Intent, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        // Something went wrong. Get rid of open resources and return zero as black
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Convert black to Lab
+    cmsDoTransform(xform, Black, &Lab, 1);
+
+    // Force it to be neutral, clip to max. L* of 50
+    Lab.a = Lab.b = 0;
+    if (Lab.L > 50) Lab.L = 50;
+
+    // Free the resources
+    cmsDeleteTransform(xform);
+
+    // Convert from Lab (which is now clipped) to XYZ.
+    cmsLab2XYZ(NULL, &BlackXYZ, &Lab);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// Get a black point of output CMYK profile, discounting any ink-limiting embedded
+// in the profile. For doing that, we use perceptual intent in input direction:
+// Lab (0, 0, 0) -> [Perceptual] Profile -> CMYK -> [Rel. colorimetric] Profile -> Lab
+static
+cmsBool BlackPointUsingPerceptualBlack(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile)
+{
+    cmsHTRANSFORM hRoundTrip;
+    cmsCIELab LabIn, LabOut;
+    cmsCIEXYZ  BlackXYZ;
+
+     // Is the intent supported by the profile?
+    if (!cmsIsIntentSupported(hProfile, INTENT_PERCEPTUAL, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return TRUE;
+    }
+
+    hRoundTrip = CreateRoundtripXForm(hProfile, INTENT_PERCEPTUAL);
+    if (hRoundTrip == NULL) {
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    LabIn.L = LabIn.a = LabIn.b = 0;
+    cmsDoTransform(hRoundTrip, &LabIn, &LabOut, 1);
+
+    // Clip Lab to reasonable limits
+    if (LabOut.L > 50) LabOut.L = 50;
+    LabOut.a = LabOut.b = 0;
+
+    cmsDeleteTransform(hRoundTrip);
+
+    // Convert it to XYZ
+    cmsLab2XYZ(NULL, &BlackXYZ, &LabOut);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+}
+
+// This function shouldn't exist at all -- there is such quantity of broken
+// profiles on black point tag, that we must somehow fix chromaticity to
+// avoid huge tint when doing Black point compensation. This function does
+// just that. There is a special flag for using black point tag, but turned
+// off by default because it is bogus on most profiles. The detection algorithm
+// involves to turn BP to neutral and to use only L component.
+cmsBool CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+
+            return TRUE;
+    }
+
+
+#ifdef CMS_USE_PROFILE_BLACK_POINT_TAG
+
+    // v2, v4 rel/abs colorimetric
+    if (cmsIsTag(hProfile, cmsSigMediaBlackPointTag) &&
+        Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+            cmsCIEXYZ *BlackPtr, BlackXYZ, UntrustedBlackPoint, TrustedBlackPoint, MediaWhite;
+            cmsCIELab Lab;
+
+            // If black point is specified, then use it,
+
+            BlackPtr = cmsReadTag(hProfile, cmsSigMediaBlackPointTag);
+            if (BlackPtr != NULL) {
+
+                BlackXYZ = *BlackPtr;
+                _cmsReadMediaWhitePoint(&MediaWhite, hProfile);
+
+                // Black point is absolute XYZ, so adapt to D50 to get PCS value
+                cmsAdaptToIlluminant(&UntrustedBlackPoint, &MediaWhite, cmsD50_XYZ(), &BlackXYZ);
+
+                // Force a=b=0 to get rid of any chroma
+                cmsXYZ2Lab(NULL, &Lab, &UntrustedBlackPoint);
+                Lab.a = Lab.b = 0;
+                if (Lab.L > 50) Lab.L = 50; // Clip to L* <= 50
+                cmsLab2XYZ(NULL, &TrustedBlackPoint, &Lab);
+
+                if (BlackPoint != NULL)
+                    *BlackPoint = TrustedBlackPoint;
+
+                return TRUE;
+            }
+    }
+#endif
+
+    // That is about v2 profiles.
+
+    // If output profile, discount ink-limiting and that's all
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC &&
+        (cmsGetDeviceClass(hProfile) == cmsSigOutputClass) &&
+        (cmsGetColorSpace(hProfile)  == cmsSigCmykData))
+        return BlackPointUsingPerceptualBlack(BlackPoint, hProfile);
+
+    // Nope, compute BP using current intent.
+    return BlackPointAsDarkerColorant(hProfile, Intent, BlackPoint, dwFlags);
+}
+
+
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Least Squares Fit of a Quadratic Curve to Data
+// http://www.personal.psu.edu/jhm/f90/lectures/lsq2.html
+
+static
+cmsFloat64Number RootOfLeastSquaresFitQuadraticCurve(int n, cmsFloat64Number x[], cmsFloat64Number y[])
+{
+    double sum_x = 0, sum_x2 = 0, sum_x3 = 0, sum_x4 = 0;
+    double sum_y = 0, sum_yx = 0, sum_yx2 = 0;
+    double d, a, b, c;
+    int i;
+    cmsMAT3 m;
+    cmsVEC3 v, res;
+
+    if (n < 4) return 0;
+
+    for (i=0; i < n; i++) {
+
+        double xn = x[i];
+        double yn = y[i];
+
+        sum_x  += xn;
+        sum_x2 += xn*xn;
+        sum_x3 += xn*xn*xn;
+        sum_x4 += xn*xn*xn*xn;
+
+        sum_y += yn;
+        sum_yx += yn*xn;
+        sum_yx2 += yn*xn*xn;
+    }
+
+    _cmsVEC3init(&m.v[0], n,      sum_x,  sum_x2);
+    _cmsVEC3init(&m.v[1], sum_x,  sum_x2, sum_x3);
+    _cmsVEC3init(&m.v[2], sum_x2, sum_x3, sum_x4);
+
+    _cmsVEC3init(&v, sum_y, sum_yx, sum_yx2);
+
+    if (!_cmsMAT3solve(&res, &m, &v)) return 0;
+
+      
+    a = res.n[2];
+    b = res.n[1];
+    c = res.n[0];
+
+    if (fabs(a) < 1.0E-10) {
+    
+        return cmsmin(0, cmsmax(50, -c/b ));
+    }
+    else {
+
+         d = b*b - 4.0 * a * c;
+         if (d <= 0) {
+             return 0;
+         }
+         else {
+
+             double rt = (-b + sqrt(d)) / (2.0 * a);
+
+             return cmsmax(0, cmsmin(50, rt));
+         }
+   }
+
+}
+
+
+
+// Calculates the black point of a destination profile.
+// This algorithm comes from the Adobe paper disclosing its black point compensation method.
+cmsBool CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsColorSpaceSignature ColorSpace;
+    cmsHTRANSFORM hRoundTrip = NULL;
+    cmsCIELab InitialLab, destLab, Lab;
+    cmsFloat64Number inRamp[256], outRamp[256];
+    cmsFloat64Number MinL, MaxL;
+    cmsBool NearlyStraightMidrange = TRUE;  
+    cmsFloat64Number yRamp[256];
+    cmsFloat64Number x[256], y[256];
+    cmsFloat64Number lo, hi;
+    int n, l;
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+            return TRUE;
+    }
+
+
+    // Check if the profile is lut based and gray, rgb or cmyk (7.2 in Adobe's document)
+    ColorSpace = cmsGetColorSpace(hProfile);
+    if (!cmsIsCLUT(hProfile, Intent, LCMS_USED_AS_OUTPUT ) ||
+        (ColorSpace != cmsSigGrayData &&
+         ColorSpace != cmsSigRgbData  &&
+         ColorSpace != cmsSigCmykData)) {
+
+        // In this case, handle as input case
+        return cmsDetectBlackPoint(BlackPoint, hProfile, Intent, dwFlags);
+    }
+
+    // It is one of the valid cases!, use Adobe algorithm
+
+    
+    // Set a first guess, that should work on good profiles.
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+        cmsCIEXYZ IniXYZ;
+
+        // calculate initial Lab as source black point
+        if (!cmsDetectBlackPoint(&IniXYZ, hProfile, Intent, dwFlags)) {
+            return FALSE;
+        }
+
+        // convert the XYZ to lab
+        cmsXYZ2Lab(NULL, &InitialLab, &IniXYZ);
+
+    } else {
+
+        // set the initial Lab to zero, that should be the black point for perceptual and saturation
+        InitialLab.L = 0;
+        InitialLab.a = 0;
+        InitialLab.b = 0;
+    }
+
+
+    // Step 2
+    // ======
+
+    // Create a roundtrip. Define a Transform BT for all x in L*a*b*
+    hRoundTrip = CreateRoundtripXForm(hProfile, Intent);
+    if (hRoundTrip == NULL)  return FALSE;
+
+    // Compute ramps
+
+    for (l=0; l < 256; l++) {
+
+        Lab.L = (cmsFloat64Number) (l * 100.0) / 255.0;
+        Lab.a = cmsmin(50, cmsmax(-50, InitialLab.a));
+        Lab.b = cmsmin(50, cmsmax(-50, InitialLab.b));
+
+        cmsDoTransform(hRoundTrip, &Lab, &destLab, 1);
+
+        inRamp[l]  = Lab.L;
+        outRamp[l] = destLab.L;
+    }
+
+    // Make monotonic
+    for (l = 254; l > 0; --l) {
+        outRamp[l] = cmsmin(outRamp[l], outRamp[l+1]);
+    }
+
+    // Check
+    if (! (outRamp[0] < outRamp[255])) {
+
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+
+    // Test for mid range straight (only on relative colorimetric)
+    NearlyStraightMidrange = TRUE;
+    MinL = outRamp[0]; MaxL = outRamp[255];
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+      
+        for (l=0; l < 256; l++) {
+
+            if (! ((inRamp[l] <= MinL + 0.2 * (MaxL - MinL) ) ||   
+                (fabs(inRamp[l] - outRamp[l]) < 4.0 )))
+                NearlyStraightMidrange = FALSE;
+        }
+
+        // If the mid range is straight (as determined above) then the 
+        // DestinationBlackPoint shall be the same as initialLab. 
+        // Otherwise, the DestinationBlackPoint shall be determined 
+        // using curve fitting.
+        if (NearlyStraightMidrange) {
+
+            cmsLab2XYZ(NULL, BlackPoint, &InitialLab);
+            cmsDeleteTransform(hRoundTrip);
+            return TRUE;
+        }
+    }
+
+ 
+    // curve fitting: The round-trip curve normally looks like a nearly constant section at the black point,
+    // with a corner and a nearly straight line to the white point.  
+    for (l=0; l < 256; l++) {
+    
+        yRamp[l] = (outRamp[l] - MinL) / (MaxL - MinL);
+    }
+
+    // find the black point using the least squares error quadratic curve fitting
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+        lo = 0.1;
+        hi = 0.5;
+    }
+    else {
+
+        // Perceptual and saturation
+        lo = 0.03;
+        hi = 0.25;
+    }
+
+    // Capture shadow points for the fitting.
+    n = 0;
+    for (l=0; l < 256; l++) {
+    
+        cmsFloat64Number ff = yRamp[l];
+
+        if (ff >= lo && ff < hi) {
+            x[n] = inRamp[l];
+            y[n] = yRamp[l];
+            n++;
+        }    
+    }
+
+    
+    // No suitable points
+    if (n < 3 ) {
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+  
+    // fit and get the vertex of quadratic curve
+    Lab.L = RootOfLeastSquaresFitQuadraticCurve(n, x, y);
+
+    if (Lab.L < 0.0) { // clip to zero L* if the vertex is negative
+        Lab.L = 0;
+    }
+
+    Lab.a = InitialLab.a;
+    Lab.b = InitialLab.b;
+
+    cmsLab2XYZ(NULL, BlackPoint, &Lab);
+
+    cmsDeleteTransform(hRoundTrip);
+    return TRUE;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmssm.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmssm.c
new file mode 100644
index 0000000000..a0fdbc86c5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmssm.c
@@ -0,0 +1,736 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ------------------------------------------------------------------------
+
+// Gamut boundary description by using Jan Morovic's Segment maxima method
+// Many thanks to Jan for allowing me to use his algorithm.
+
+// r = C*
+// alpha = Hab
+// theta = L*
+
+#define SECTORS 16      // number of divisions in alpha and theta
+
+// Spherical coordinates
+typedef struct {
+
+    cmsFloat64Number r;
+    cmsFloat64Number alpha;
+    cmsFloat64Number theta;
+
+} cmsSpherical;
+
+typedef  enum {
+        GP_EMPTY,
+        GP_SPECIFIED,
+        GP_MODELED
+
+    } GDBPointType;
+
+
+typedef struct {
+
+    GDBPointType Type;
+    cmsSpherical p;         // Keep also alpha & theta of maximum
+
+} cmsGDBPoint;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    cmsGDBPoint Gamut[SECTORS][SECTORS];
+
+} cmsGDB;
+
+
+// A line using the parametric form
+// P = a + t*u
+typedef struct {
+
+    cmsVEC3 a;
+    cmsVEC3 u;
+
+} cmsLine;
+
+
+// A plane using the parametric form
+// Q = b + r*v + s*w
+typedef struct {
+
+    cmsVEC3 b;
+    cmsVEC3 v;
+    cmsVEC3 w;
+
+} cmsPlane;
+
+
+
+// --------------------------------------------------------------------------------------------
+
+// ATAN2() which always returns degree positive numbers
+
+static
+cmsFloat64Number _cmsAtan2(cmsFloat64Number y, cmsFloat64Number x)
+{
+    cmsFloat64Number a;
+
+    // Deal with undefined case
+    if (x == 0.0 && y == 0.0) return 0;
+
+    a = (atan2(y, x) * 180.0) / M_PI;
+
+    while (a < 0) {
+        a += 360;
+    }
+
+    return a;
+}
+
+// Convert to spherical coordinates
+static
+void ToSpherical(cmsSpherical* sp, const cmsVEC3* v)
+{
+
+    cmsFloat64Number L, a, b;
+
+    L = v ->n[VX];
+    a = v ->n[VY];
+    b = v ->n[VZ];
+
+    sp ->r = sqrt( L*L + a*a + b*b );
+
+   if (sp ->r == 0) {
+        sp ->alpha = sp ->theta = 0;
+        return;
+    }
+
+    sp ->alpha = _cmsAtan2(a, b);
+    sp ->theta = _cmsAtan2(sqrt(a*a + b*b), L);
+}
+
+
+// Convert to cartesian from spherical
+static
+void ToCartesian(cmsVEC3* v, const cmsSpherical* sp)
+{
+    cmsFloat64Number sin_alpha;
+    cmsFloat64Number cos_alpha;
+    cmsFloat64Number sin_theta;
+    cmsFloat64Number cos_theta;
+    cmsFloat64Number L, a, b;
+
+    sin_alpha = sin((M_PI * sp ->alpha) / 180.0);
+    cos_alpha = cos((M_PI * sp ->alpha) / 180.0);
+    sin_theta = sin((M_PI * sp ->theta) / 180.0);
+    cos_theta = cos((M_PI * sp ->theta) / 180.0);
+
+    a = sp ->r * sin_theta * sin_alpha;
+    b = sp ->r * sin_theta * cos_alpha;
+    L = sp ->r * cos_theta;
+
+    v ->n[VX] = L;
+    v ->n[VY] = a;
+    v ->n[VZ] = b;
+}
+
+
+// Quantize sector of a spherical coordinate. Saturate 360, 180 to last sector
+// The limits are the centers of each sector, so
+static
+void QuantizeToSector(const cmsSpherical* sp, int* alpha, int* theta)
+{
+    *alpha = (int) floor(((sp->alpha * (SECTORS)) / 360.0) );
+    *theta = (int) floor(((sp->theta * (SECTORS)) / 180.0) );
+
+    if (*alpha >= SECTORS)
+        *alpha = SECTORS-1;
+    if (*theta >= SECTORS)
+        *theta = SECTORS-1;
+}
+
+
+// Line determined by 2 points
+static
+void LineOf2Points(cmsLine* line, cmsVEC3* a, cmsVEC3* b)
+{
+
+    _cmsVEC3init(&line ->a, a ->n[VX], a ->n[VY], a ->n[VZ]);
+    _cmsVEC3init(&line ->u, b ->n[VX] - a ->n[VX],
+                            b ->n[VY] - a ->n[VY],
+                            b ->n[VZ] - a ->n[VZ]);
+}
+
+
+// Evaluate parametric line
+static
+void GetPointOfLine(cmsVEC3* p, const cmsLine* line, cmsFloat64Number t)
+{
+    p ->n[VX] = line ->a.n[VX] + t * line->u.n[VX];
+    p ->n[VY] = line ->a.n[VY] + t * line->u.n[VY];
+    p ->n[VZ] = line ->a.n[VZ] + t * line->u.n[VZ];
+}
+
+
+
+/*
+    Closest point in sector line1 to sector line2 (both are defined as 0 <=t <= 1)
+    http://softsurfer.com/Archive/algorithm_0106/algorithm_0106.htm
+
+    Copyright 2001, softSurfer (www.softsurfer.com)
+    This code may be freely used and modified for any purpose
+    providing that this copyright notice is included with it.
+    SoftSurfer makes no warranty for this code, and cannot be held
+    liable for any real or imagined damage resulting from its use.
+    Users of this code must verify correctness for their application.
+
+*/
+
+static
+cmsBool ClosestLineToLine(cmsVEC3* r, const cmsLine* line1, const cmsLine* line2)
+{
+    cmsFloat64Number a, b, c, d, e, D;
+    cmsFloat64Number sc, sN, sD;
+    //cmsFloat64Number tc; // left for future use
+    cmsFloat64Number tN, tD;
+    cmsVEC3 w0;
+
+    _cmsVEC3minus(&w0, &line1 ->a, &line2 ->a);
+
+    a  = _cmsVEC3dot(&line1 ->u, &line1 ->u);
+    b  = _cmsVEC3dot(&line1 ->u, &line2 ->u);
+    c  = _cmsVEC3dot(&line2 ->u, &line2 ->u);
+    d  = _cmsVEC3dot(&line1 ->u, &w0);
+    e  = _cmsVEC3dot(&line2 ->u, &w0);
+
+    D  = a*c - b * b;      // Denominator
+    sD = tD = D;           // default sD = D >= 0
+
+    if (D <  MATRIX_DET_TOLERANCE) {   // the lines are almost parallel
+
+        sN = 0.0;        // force using point P0 on segment S1
+        sD = 1.0;        // to prevent possible division by 0.0 later
+        tN = e;
+        tD = c;
+    }
+    else {                // get the closest points on the infinite lines
+
+        sN = (b*e - c*d);
+        tN = (a*e - b*d);
+
+        if (sN < 0.0) {       // sc < 0 => the s=0 edge is visible
+
+            sN = 0.0;
+            tN = e;
+            tD = c;
+        }
+        else if (sN > sD) {   // sc > 1 => the s=1 edge is visible
+            sN = sD;
+            tN = e + b;
+            tD = c;
+        }
+    }
+
+    if (tN < 0.0) {           // tc < 0 => the t=0 edge is visible
+
+        tN = 0.0;
+        // recompute sc for this edge
+        if (-d < 0.0)
+            sN = 0.0;
+        else if (-d > a)
+            sN = sD;
+        else {
+            sN = -d;
+            sD = a;
+        }
+    }
+    else if (tN > tD) {      // tc > 1 => the t=1 edge is visible
+
+        tN = tD;
+
+        // recompute sc for this edge
+        if ((-d + b) < 0.0)
+            sN = 0;
+        else if ((-d + b) > a)
+            sN = sD;
+        else {
+            sN = (-d + b);
+            sD = a;
+        }
+    }
+    // finally do the division to get sc and tc
+    sc = (fabs(sN) < MATRIX_DET_TOLERANCE ? 0.0 : sN / sD);
+    //tc = (fabs(tN) < MATRIX_DET_TOLERANCE ? 0.0 : tN / tD); // left for future use.
+
+    GetPointOfLine(r, line1, sc);
+    return TRUE;
+}
+
+
+
+// ------------------------------------------------------------------ Wrapper
+
+
+// Allocate & free structure
+cmsHANDLE  CMSEXPORT cmsGBDAlloc(cmsContext ContextID)
+{
+    cmsGDB* gbd = (cmsGDB*) _cmsMallocZero(ContextID, sizeof(cmsGDB));
+    if (gbd == NULL) return NULL;
+
+    gbd -> ContextID = ContextID;
+
+    return (cmsHANDLE) gbd;
+}
+
+
+void CMSEXPORT cmsGBDFree(cmsHANDLE hGBD)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    if (hGBD != NULL)
+        _cmsFree(gbd->ContextID, (void*) gbd);
+}
+
+
+// Auxiliary to retrieve a pointer to the segmentr containing the Lab value
+static
+cmsGDBPoint* GetPoint(cmsGDB* gbd, const cmsCIELab* Lab, cmsSpherical* sp)
+{
+    cmsVEC3 v;
+    int alpha, theta;
+
+    // Housekeeping
+    _cmsAssert(gbd != NULL);
+    _cmsAssert(Lab != NULL);
+    _cmsAssert(sp != NULL);
+
+    // Center L* by subtracting half of its domain, that's 50
+    _cmsVEC3init(&v, Lab ->L - 50.0, Lab ->a, Lab ->b);
+
+    // Convert to spherical coordinates
+    ToSpherical(sp, &v);
+
+    if (sp ->r < 0 || sp ->alpha < 0 || sp->theta < 0) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, "spherical value out of range");
+         return NULL;
+    }
+
+    // On which sector it falls?
+    QuantizeToSector(sp, &alpha, &theta);
+
+    if (alpha < 0 || theta < 0 || alpha >= SECTORS || theta >= SECTORS) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, " quadrant out of range");
+         return NULL;
+    }
+
+    // Get pointer to the sector
+    return &gbd ->Gamut[theta][alpha];
+}
+
+// Add a point to gamut descriptor. Point to add is in Lab color space.
+// GBD is centered on a=b=0 and L*=50
+cmsBool CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, add it
+    if (ptr ->Type == GP_EMPTY) {
+
+        ptr -> Type = GP_SPECIFIED;
+        ptr -> p    = sp;
+    }
+    else {
+
+
+        // Substitute only if radius is greater
+        if (sp.r > ptr -> p.r) {
+
+                ptr -> Type = GP_SPECIFIED;
+                ptr -> p    = sp;
+        }
+    }
+
+    return TRUE;
+}
+
+// Check if a given point falls inside gamut
+cmsBool CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, return no data
+    if (ptr ->Type == GP_EMPTY) return FALSE;
+
+    // In gamut only if radius is greater
+
+    return (sp.r <= ptr -> p.r);
+}
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+// Find near sectors. The list of sectors found is returned on Close[].
+// The function returns the number of sectors as well.
+
+// 24   9  10  11  12
+// 23   8   1   2  13
+// 22   7   *   3  14
+// 21   6   5   4  15
+// 20  19  18  17  16
+//
+// Those are the relative movements
+// {-2,-2}, {-1, -2}, {0, -2}, {+1, -2}, {+2,  -2},
+// {-2,-1}, {-1, -1}, {0, -1}, {+1, -1}, {+2,  -1},
+// {-2, 0}, {-1,  0}, {0,  0}, {+1,  0}, {+2,   0},
+// {-2,+1}, {-1, +1}, {0, +1}, {+1,  +1}, {+2,  +1},
+// {-2,+2}, {-1, +2}, {0, +2}, {+1,  +2}, {+2,  +2}};
+
+
+static
+const struct _spiral {
+
+    int AdvX, AdvY;
+
+    } Spiral[] = { {0,  -1}, {+1, -1}, {+1,  0}, {+1, +1}, {0,  +1}, {-1, +1},
+                   {-1,  0}, {-1, -1}, {-1, -2}, {0,  -2}, {+1, -2}, {+2, -2},
+                   {+2, -1}, {+2,  0}, {+2, +1}, {+2, +2}, {+1, +2}, {0,  +2},
+                   {-1, +2}, {-2, +2}, {-2, +1}, {-2, 0},  {-2, -1}, {-2, -2} };
+
+#define NSTEPS (sizeof(Spiral) / sizeof(struct _spiral))
+
+static
+int FindNearSectors(cmsGDB* gbd, int alpha, int theta, cmsGDBPoint* Close[])
+{
+    int nSectors = 0;
+    int a, t;
+    cmsUInt32Number i;
+    cmsGDBPoint* pt;
+
+    for (i=0; i < NSTEPS; i++) {
+
+        a = alpha + Spiral[i].AdvX;
+        t = theta + Spiral[i].AdvY;
+
+        // Cycle at the end
+        a %= SECTORS;
+        t %= SECTORS;
+
+        // Cycle at the begin
+        if (a < 0) a = SECTORS + a;
+        if (t < 0) t = SECTORS + t;
+
+        pt = &gbd ->Gamut[t][a];
+
+        if (pt -> Type != GP_EMPTY) {
+
+            Close[nSectors++] = pt;
+        }
+    }
+
+    return nSectors;
+}
+
+
+// Interpolate a missing sector. Method identifies whatever this is top, bottom or mid
+static
+cmsBool InterpolateMissingSector(cmsGDB* gbd, int alpha, int theta)
+{
+    cmsSpherical sp;
+    cmsVEC3 Lab;
+    cmsVEC3 Centre;
+    cmsLine ray;
+    int nCloseSectors;
+    cmsGDBPoint* Close[NSTEPS + 1];
+    cmsSpherical closel, templ;
+    cmsLine edge;
+    int k, m;
+
+    // Is that point already specified?
+    if (gbd ->Gamut[theta][alpha].Type != GP_EMPTY) return TRUE;
+
+    // Fill close points
+    nCloseSectors = FindNearSectors(gbd, alpha, theta, Close);
+
+
+    // Find a central point on the sector
+    sp.alpha = (cmsFloat64Number) ((alpha + 0.5) * 360.0) / (SECTORS);
+    sp.theta = (cmsFloat64Number) ((theta + 0.5) * 180.0) / (SECTORS);
+    sp.r     = 50.0;
+
+    // Convert to Cartesian
+    ToCartesian(&Lab, &sp);
+
+    // Create a ray line from centre to this point
+    _cmsVEC3init(&Centre, 50.0, 0, 0);
+    LineOf2Points(&ray, &Lab, &Centre);
+
+    // For all close sectors
+    closel.r = 0.0;
+    closel.alpha = 0;
+    closel.theta = 0;
+
+    for (k=0; k < nCloseSectors; k++) {
+
+        for(m = k+1; m < nCloseSectors; m++) {
+
+            cmsVEC3 temp, a1, a2;
+
+            // A line from sector to sector
+            ToCartesian(&a1, &Close[k]->p);
+            ToCartesian(&a2, &Close[m]->p);
+
+            LineOf2Points(&edge, &a1, &a2);
+
+            // Find a line
+            ClosestLineToLine(&temp, &ray, &edge);
+
+            // Convert to spherical
+            ToSpherical(&templ, &temp);
+
+
+            if ( templ.r > closel.r &&
+                 templ.theta >= (theta*180.0/SECTORS) &&
+                 templ.theta <= ((theta+1)*180.0/SECTORS) &&
+                 templ.alpha >= (alpha*360.0/SECTORS) &&
+                 templ.alpha <= ((alpha+1)*360.0/SECTORS)) {
+
+                closel = templ;
+            }
+        }
+    }
+
+    gbd ->Gamut[theta][alpha].p = closel;
+    gbd ->Gamut[theta][alpha].Type = GP_MODELED;
+
+    return TRUE;
+
+}
+
+
+// Interpolate missing parts. The algorithm fist computes slices at
+// theta=0 and theta=Max.
+cmsBool CMSEXPORT cmsGDBCompute(cmsHANDLE hGBD, cmsUInt32Number dwFlags)
+{
+    int alpha, theta;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+
+    _cmsAssert(hGBD != NULL);
+
+    // Interpolate black
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, 0)) return FALSE;
+    }
+
+    // Interpolate white
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, SECTORS-1)) return FALSE;
+    }
+
+
+    // Interpolate Mid
+    for (theta = 1; theta < SECTORS; theta++) {
+        for (alpha = 0; alpha < SECTORS; alpha++) {
+
+            if (!InterpolateMissingSector(gbd, alpha, theta)) return FALSE;
+        }
+    }
+
+    // Done
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+// Great for debug, but not suitable for real use
+
+#if 0
+cmsBool cmsGBDdumpVRML(cmsHANDLE hGBD, const char* fname)
+{
+    FILE* fp;
+    int   i, j;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* pt;
+
+    fp = fopen (fname, "wt");
+    if (fp == NULL)
+        return FALSE;
+
+    fprintf (fp, "#VRML V2.0 utf8\n");
+
+    // set the viewing orientation and distance
+    fprintf (fp, "DEF CamTest Group {\n");
+    fprintf (fp, "\tchildren [\n");
+    fprintf (fp, "\t\tDEF Cameras Group {\n");
+    fprintf (fp, "\t\t\tchildren [\n");
+    fprintf (fp, "\t\t\t\tDEF DefaultView Viewpoint {\n");
+    fprintf (fp, "\t\t\t\t\tposition 0 0 340\n");
+    fprintf (fp, "\t\t\t\t\torientation 0 0 1 0\n");
+    fprintf (fp, "\t\t\t\t\tdescription \"default view\"\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t]\n");
+    fprintf (fp, "\t\t},\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the background stuff
+    fprintf (fp, "Background {\n");
+    fprintf (fp, "\tskyColor [\n");
+    fprintf (fp, "\t\t.5 .5 .5\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the shape stuff
+    fprintf (fp, "Transform {\n");
+    fprintf (fp, "\tscale .3 .3 .3\n");
+    fprintf (fp, "\tchildren [\n");
+
+    // Draw the axes as a shape:
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1.0 1.0 1.0\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry IndexedLineSet {\n");
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 0.0,\n");
+    fprintf (fp, "\t\t\t\t\t%f 0.0 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 %f 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 %f]\n",  255.0);
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t\tcoordIndex [\n");
+    fprintf (fp, "\t\t\t\t\t0, 1, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 2, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 3, -1]\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+
+
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1 1 1\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry PointSet {\n");
+
+    // fill in the points here
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+
+    // We need to transverse all gamut hull.
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+            cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+            ToCartesian(&v, &pt ->p);
+
+            fprintf (fp, "\t\t\t\t\t%g %g %g", v.n[0]+50, v.n[1], v.n[2]);
+
+            if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+
+        }
+
+        fprintf (fp, "\t\t\t\t}\n");
+
+
+
+    // fill in the face colors
+    fprintf (fp, "\t\t\t\tcolor Color {\n");
+    fprintf (fp, "\t\t\t\t\tcolor [\n");
+
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+           cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+
+
+            ToCartesian(&v, &pt ->p);
+
+
+        if (pt ->Type == GP_EMPTY)
+            fprintf (fp, "\t\t\t\t\t%g %g %g", 0.0, 0.0, 0.0);
+        else
+            if (pt ->Type == GP_MODELED)
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, .5, .5);
+            else {
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, 1.0, 1.0);
+
+            }
+
+        if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+    }
+    fprintf (fp, "\t\t\t}\n");
+
+
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    fclose (fp);
+
+    return TRUE;
+}
+#endif
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmstypes.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmstypes.c
new file mode 100644
index 0000000000..521bef085c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmstypes.c
@@ -0,0 +1,5633 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Tag Serialization  -----------------------------------------------------------------------------
+// This file implements every single tag and tag type as described in the ICC spec. Some types
+// have been deprecated, like ncl and Data. There is no implementation for those types as there
+// are no profiles holding them. The programmer can also extend this list by defining his own types
+// by using the appropriate plug-in. There are three types of plug ins regarding that. First type
+// allows to define new tags using any existing type. Next plug-in type allows to define new types
+// and the third one is very specific: allows to extend the number of elements in the multiprocessing
+// elements special type.
+//--------------------------------------------------------------------------------------------------
+
+// Some broken types
+#define cmsCorbisBrokenXYZtype    ((cmsTagTypeSignature) 0x17A505B8)
+#define cmsMonacoBrokenCurveType  ((cmsTagTypeSignature) 0x9478ee00)
+
+// This is the linked list that keeps track of the defined types
+typedef struct _cmsTagTypeLinkedList_st {
+
+    cmsTagTypeHandler Handler;
+    struct _cmsTagTypeLinkedList_st* Next;
+
+} _cmsTagTypeLinkedList;
+
+// Some macros to define callbacks.
+#define READ_FN(x)  Type_##x##_Read
+#define WRITE_FN(x) Type_##x##_Write
+#define FREE_FN(x)  Type_##x##_Free
+#define DUP_FN(x)   Type_##x##_Dup
+
+// Helper macro to define a handler. Callbacks do have a fixed naming convention.
+#define TYPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), DUP_FN(x), FREE_FN(x), NULL, 0 }
+
+// Helper macro to define a MPE handler. Callbacks do have a fixed naming convention
+#define TYPE_MPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), GenericMPEdup, GenericMPEfree, NULL, 0 }
+
+// Infinites
+#define MINUS_INF   (-1E22F)
+#define PLUS_INF    (+1E22F)
+
+
+// Register a new type handler. This routine is shared between normal types and MPE. LinkedList points to the optional list head
+static
+cmsBool RegisterTypesPlugin(cmsContext id, cmsPluginBase* Data, _cmsMemoryClient pos)
+{
+    cmsPluginTagType* Plugin = (cmsPluginTagType*) Data;
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(id, pos);
+    _cmsTagTypeLinkedList *pt;
+
+    // Calling the function with NULL as plug-in would unregister the plug in.
+    if (Data == NULL) {
+
+        // There is no need to set free the memory, as pool is destroyed as a whole.
+        ctx ->TagTypes = NULL;
+        return TRUE;
+    }
+
+    // Registering happens in plug-in memory pool.
+    pt = (_cmsTagTypeLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagTypeLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Handler   = Plugin ->Handler;
+    pt ->Next      = ctx ->TagTypes;
+
+    ctx ->TagTypes = pt;
+     
+    return TRUE;
+}
+
+// Return handler for a given type or NULL if not found. Shared between normal types and MPE. It first tries the additons 
+// made by plug-ins and then the built-in defaults.
+static
+cmsTagTypeHandler* GetHandler(cmsTagTypeSignature sig, _cmsTagTypeLinkedList* PluginLinkedList, _cmsTagTypeLinkedList* DefaultLinkedList)
+{
+    _cmsTagTypeLinkedList* pt;
+
+    for (pt = PluginLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    for (pt = DefaultLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    return NULL;
+}
+
+
+// Auxiliary to convert UTF-32 to UTF-16 in some cases
+static
+cmsBool _cmsWriteWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, const wchar_t* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(!(Array == NULL && n > 0));
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Auxiliary to read an array of wchar_t
+static
+cmsBool _cmsReadWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, wchar_t* Array)
+{
+    cmsUInt32Number i;
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+
+            if (!_cmsReadUInt16Number(io, &tmp)) return FALSE;
+            Array[i] = (wchar_t) tmp;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+// To deal with position tables
+typedef cmsBool (* PositionTableEntryFn)(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag);
+
+// Helper function to deal with position tables as described in ICC spec 4.3
+// A table of n elements is read, where first comes n records containing offsets and sizes and
+// then a block containing the data itself. This allows to reuse same data in more than one entry
+static
+cmsBool ReadPositionTable(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER* io,
+                              cmsUInt32Number Count,
+                              cmsUInt32Number BaseOffset,
+                              void *Cargo,
+                              PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+    cmsUInt32Number currentPosition;
+
+    currentPosition = io->Tell(io);
+
+    // Verify there is enough space left to read at least two cmsUInt32Number items for Count items.
+    if (((io->ReportedSize - currentPosition) / (2 * sizeof(cmsUInt32Number))) < Count)
+        return FALSE;
+
+    // Let's take the offsets to each element
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt32Number(io, &ElementOffsets[i])) goto Error;
+        if (!_cmsReadUInt32Number(io, &ElementSizes[i])) goto Error;
+
+        ElementOffsets[i] += BaseOffset;
+    }
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!io -> Seek(io, ElementOffsets[i])) goto Error;
+
+        // This is the reader callback
+        if (!ElementFn(self, io, Cargo, i, ElementSizes[i])) goto Error;
+    }
+
+    // Success
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+// Same as anterior, but for write position tables
+static
+cmsBool WritePositionTable(struct _cms_typehandler_struct* self,
+                               cmsIOHANDLER* io,
+                               cmsUInt32Number SizeOfTag,
+                               cmsUInt32Number Count,
+                               cmsUInt32Number BaseOffset,
+                               void *Cargo,
+                               PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number DirectoryPos, CurrentPos, Before;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+
+     // Create table
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Keep starting position of curve offsets
+    DirectoryPos = io ->Tell(io);
+  
+    // Write a fake directory to be filled latter on
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each element. Keep track of the size as well.
+    for (i=0; i < Count; i++) {
+
+        Before = io ->Tell(io);
+        ElementOffsets[i] = Before - BaseOffset;
+
+        // Callback to write...
+        if (!ElementFn(self, io, Cargo, i, SizeOfTag)) goto Error;
+
+        // Now the size
+        ElementSizes[i] = io ->Tell(io) - Before;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i <  Count; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+
+// ********************************************************************************
+// Type XYZ. Only one value is allowed
+// ********************************************************************************
+
+//The XYZType contains an array of three encoded values for the XYZ tristimulus
+//values. Tristimulus values must be non-negative. The signed encoding allows for
+//implementation optimizations by minimizing the number of fixed formats.
+
+
+static
+void *Type_XYZ_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIEXYZ* xyz;
+
+    *nItems = 0;
+    xyz = (cmsCIEXYZ*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIEXYZ));
+    if (xyz == NULL) return NULL;
+
+    if (!_cmsReadXYZNumber(io, xyz)) {
+        _cmsFree(self ->ContextID, xyz);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) xyz;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_XYZ_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    return _cmsWriteXYZNumber(io, (cmsCIEXYZ*) Ptr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_XYZ_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIEXYZ));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_XYZ_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+static
+cmsTagTypeSignature DecideXYZtype(cmsFloat64Number ICCVersion, const void *Data)
+{
+    return cmsSigXYZType;
+
+    cmsUNUSED_PARAMETER(ICCVersion);
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type chromaticity. Only one value is allowed
+// ********************************************************************************
+// The chromaticity tag type provides basic chromaticity data and type of
+// phosphors or colorants of a monitor to applications and utilities.
+
+static
+void *Type_Chromaticity_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIExyYTRIPLE* chrm;
+    cmsUInt16Number nChans, Table;
+
+    *nItems = 0;
+    chrm =  (cmsCIExyYTRIPLE*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIExyYTRIPLE));
+    if (chrm == NULL) return NULL;
+
+    if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+
+    // Let's recover from a bug introduced in early versions of lcms1
+    if (nChans == 0 && SizeOfTag == 32) {
+
+        if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+        if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+    }
+
+    if (nChans != 3) goto Error;
+
+    if (!_cmsReadUInt16Number(io, &Table)) goto Error;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.y)) goto Error;
+
+    chrm ->Red.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.y)) goto Error;
+
+    chrm ->Green.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.y)) goto Error;
+
+    chrm ->Blue.Y = 1.0;
+
+    *nItems = 1;
+    return (void*) chrm;
+
+Error:
+    _cmsFree(self ->ContextID, (void*) chrm);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  SaveOneChromaticity(cmsFloat64Number x, cmsFloat64Number y, cmsIOHANDLER* io)
+{
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(x))) return FALSE;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(y))) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool  Type_Chromaticity_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsCIExyYTRIPLE* chrm = (cmsCIExyYTRIPLE*) Ptr;
+
+    if (!_cmsWriteUInt16Number(io, 3)) return FALSE;        // nChannels
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Table
+
+    if (!SaveOneChromaticity(chrm -> Red.x,   chrm -> Red.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Green.x, chrm -> Green.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Blue.x,  chrm -> Blue.y, io)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Chromaticity_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIExyYTRIPLE));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Chromaticity_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigColorantOrderType
+// ********************************************************************************
+
+// This is an optional tag which specifies the laydown order in which colorants will
+// be printed on an n-colorant device. The laydown order may be the same as the
+// channel generation order listed in the colorantTableTag or the channel order of a
+// colour space such as CMYK, in which case this tag is not needed. When this is not
+// the case (for example, ink-towers sometimes use the order KCMY), this tag may be
+// used to specify the laydown order of the colorants.
+
+
+static
+void *Type_ColorantOrderType_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number* ColorantOrder;
+    cmsUInt32Number Count;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (Count > cmsMAXCHANNELS) return NULL;
+
+    ColorantOrder = (cmsUInt8Number*) _cmsCalloc(self ->ContextID, cmsMAXCHANNELS, sizeof(cmsUInt8Number));
+    if (ColorantOrder == NULL) return NULL;
+
+    // We use FF as end marker
+    memset(ColorantOrder, 0xFF, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    if (io ->Read(io, ColorantOrder, sizeof(cmsUInt8Number), Count) != Count) {
+
+        _cmsFree(self ->ContextID, (void*) ColorantOrder);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) ColorantOrder;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool Type_ColorantOrderType_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number*  ColorantOrder = (cmsUInt8Number*) Ptr;
+    cmsUInt32Number i, sz, Count;
+
+    // Get the length
+    for (Count=i=0; i < cmsMAXCHANNELS; i++) {
+        if (ColorantOrder[i] != 0xFF) Count++;
+    }
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+
+    sz = Count * sizeof(cmsUInt8Number);
+    if (!io -> Write(io, sz, ColorantOrder)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_ColorantOrderType_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ColorantOrderType_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigS15Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit fixed point quantity.
+// The number of values is determined from the size of the tag.
+
+static
+void *Type_S15Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &array_double[i])) {
+
+            _cmsFree(self ->ContextID, array_double);
+            return NULL;
+        }
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_S15Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Value[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_S15Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+
+static
+void Type_S15Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigU16Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit quantity.
+// The number of values is determined from the size of the tag.
+
+
+static
+void *Type_U16Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number v;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsReadUInt32Number(io, &v)) {
+            _cmsFree(self ->ContextID, (void*) array_double);
+            return NULL;
+        }
+
+        // Convert to cmsFloat64Number
+        array_double[i] =  (cmsFloat64Number) (v / 65536.0);
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_U16Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        cmsUInt32Number v = (cmsUInt32Number) floor(Value[i]*65536.0 + 0.5);
+
+        if (!_cmsWriteUInt32Number(io, v)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_U16Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+static
+void Type_U16Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigSignatureType
+// ********************************************************************************
+//
+// The signatureType contains a four-byte sequence, Sequences of less than four
+// characters are padded at the end with spaces, 20h.
+// Typically this type is used for registered tags that can be displayed on many
+// development systems as a sequence of four characters.
+
+static
+void *Type_Signature_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSignature* SigPtr = (cmsSignature*) _cmsMalloc(self ->ContextID, sizeof(cmsSignature));
+    if (SigPtr == NULL) return NULL;
+
+     if (!_cmsReadUInt32Number(io, SigPtr)) return NULL;
+     *nItems = 1;
+
+     return SigPtr;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_Signature_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSignature* SigPtr = (cmsSignature*) Ptr;
+
+    return _cmsWriteUInt32Number(io, *SigPtr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Signature_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsSignature));
+}
+
+static
+void Type_Signature_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigTextType
+// ********************************************************************************
+//
+// The textType is a simple text structure that contains a 7-bit ASCII text string.
+// The length of the string is obtained by subtracting 8 from the element size portion
+// of the tag itself. This string must be terminated with a 00h byte.
+
+static
+void *Type_Text_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+
+    // Create a container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    *nItems = 0;
+
+    // We need to store the "\0" at the end, so +1
+    if (SizeOfTag == UINT_MAX) goto Error;
+
+    Text = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (Text == NULL) goto Error;
+
+    if (io -> Read(io, Text, sizeof(char), SizeOfTag) != SizeOfTag) goto Error;
+
+    // Make sure text is properly ended
+    Text[SizeOfTag] = 0;
+    *nItems = 1;
+
+    // Keep the result
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+
+    _cmsFree(self ->ContextID, Text);
+    return (void*) mlu;
+
+Error:
+    if (mlu != NULL)
+        cmsMLUfree(mlu);
+    if (Text != NULL)
+        _cmsFree(self ->ContextID, Text);
+
+    return NULL;
+}
+
+// The conversion implies to choose a language. So, we choose the actual language.
+static
+cmsBool Type_Text_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsUInt32Number size;
+    cmsBool  rc;
+    char* Text;
+
+    // Get the size of the string. Note there is an extra "\0" at the end
+    size = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    if (size == 0) return FALSE;       // Cannot be zero!
+
+    // Create memory
+    Text = (char*) _cmsMalloc(self ->ContextID, size);
+    if (Text == NULL) return FALSE;
+
+    cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text, size);
+
+    // Write it, including separator
+    rc = io ->Write(io, size, Text);
+
+    _cmsFree(self ->ContextID, Text);
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_Text_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Text_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+cmsTagTypeSignature DecideTextType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDataType
+// ********************************************************************************
+
+// General purpose data type
+static
+void *Type_Data_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCData* BinData;
+    cmsUInt32Number LenOfData;
+
+    *nItems = 0;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    LenOfData = SizeOfTag - sizeof(cmsUInt32Number);
+    if (LenOfData > INT_MAX) return NULL;
+
+    BinData = (cmsICCData*) _cmsMalloc(self ->ContextID, sizeof(cmsICCData) + LenOfData - 1);
+    if (BinData == NULL) return NULL;
+
+    BinData ->len = LenOfData;
+    if (!_cmsReadUInt32Number(io, &BinData->flag)) {
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    if (io -> Read(io, BinData ->data, sizeof(cmsUInt8Number), LenOfData) != LenOfData) {
+
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    *nItems = 1;
+
+    return (void*) BinData;
+}
+
+
+static
+cmsBool Type_Data_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+   cmsICCData* BinData = (cmsICCData*) Ptr;
+
+   if (!_cmsWriteUInt32Number(io, BinData ->flag)) return FALSE;
+
+   return io ->Write(io, BinData ->len, BinData ->data);
+
+   cmsUNUSED_PARAMETER(nItems);
+   cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Data_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsICCData* BinData = (cmsICCData*) Ptr;
+
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCData) + BinData ->len - 1);
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Data_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigTextDescriptionType
+// ********************************************************************************
+
+static
+void *Type_Text_Description_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+    cmsUInt32Number  AsciiCount;
+    cmsUInt32Number  i, UnicodeCode, UnicodeCount;
+    cmsUInt16Number  ScriptCodeCode, Dummy;
+    cmsUInt8Number   ScriptCodeCount;
+
+    *nItems = 0;
+
+    //  One dword should be there
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    // Read len of ASCII
+    if (!_cmsReadUInt32Number(io, &AsciiCount)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for size
+    if (SizeOfTag < AsciiCount) return NULL;
+
+    // All seems Ok, allocate the container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    // As many memory as size of tag
+    Text = (char*) _cmsMalloc(self ->ContextID, AsciiCount + 1);
+    if (Text == NULL) goto Error;
+
+    // Read it
+    if (io ->Read(io, Text, sizeof(char), AsciiCount) != AsciiCount) goto Error;
+    SizeOfTag -= AsciiCount;
+
+    // Make sure there is a terminator
+    Text[AsciiCount] = 0;
+
+    // Set the MLU entry. From here we can be tolerant to wrong types
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+    _cmsFree(self ->ContextID, (void*) Text);
+    Text = NULL;
+
+    // Skip Unicode code
+    if (SizeOfTag < 2* sizeof(cmsUInt32Number)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCode)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCount)) goto Done;
+    SizeOfTag -= 2* sizeof(cmsUInt32Number);
+
+    if (SizeOfTag < UnicodeCount*sizeof(cmsUInt16Number)) goto Done;
+
+    for (i=0; i < UnicodeCount; i++) {
+        if (!io ->Read(io, &Dummy, sizeof(cmsUInt16Number), 1)) goto Done;
+    }
+    SizeOfTag -= UnicodeCount*sizeof(cmsUInt16Number);
+
+    // Skip ScriptCode code if present. Some buggy profiles does have less
+    // data that stricttly required. We need to skip it as this type may come
+    // embedded in other types.
+
+    if (SizeOfTag >= sizeof(cmsUInt16Number) + sizeof(cmsUInt8Number) + 67) {
+
+        if (!_cmsReadUInt16Number(io, &ScriptCodeCode)) goto Done;
+        if (!_cmsReadUInt8Number(io,  &ScriptCodeCount)) goto Done;
+
+        // Skip rest of tag
+        for (i=0; i < 67; i++) {
+            if (!io ->Read(io, &Dummy, sizeof(cmsUInt8Number), 1)) goto Error;
+        }
+    }
+
+Done:
+
+    *nItems = 1;
+    return mlu;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, (void*) Text);
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+
+// This tag can come IN UNALIGNED SIZE. In order to prevent issues, we force zeros on description to align it
+static
+cmsBool  Type_Text_Description_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    char *Text = NULL;
+    wchar_t *Wide = NULL;
+    cmsUInt32Number len, len_text, len_tag_requirement, len_aligned;
+    cmsBool  rc = FALSE;
+    char Filler[68];
+
+    // Used below for writing zeroes
+    memset(Filler, 0, sizeof(Filler));
+
+    // Get the len of string
+    len = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+
+    // Specification ICC.1:2001-04 (v2.4.0): It has been found that textDescriptionType can contain misaligned data
+    //(see clause 4.1 for the definition of 'aligned'). Because the Unicode language
+    // code and Unicode count immediately follow the ASCII description, their
+    // alignment is not correct if the ASCII count is not a multiple of four. The
+    // ScriptCode code is misaligned when the ASCII count is odd. Profile reading and
+    // writing software must be written carefully in order to handle these alignment
+    // problems.
+    //
+    // The above last sentence suggest to handle alignment issues in the
+    // parser. The provided example (Table 69 on Page 60) makes this clear. 
+    // The padding only in the ASCII count is not sufficient for a aligned tag
+    // size, with the same text size in ASCII and Unicode.
+
+    // Null strings
+    if (len <= 0) {
+
+        Text = (char*)    _cmsDupMem(self ->ContextID, "", sizeof(char));
+        Wide = (wchar_t*) _cmsDupMem(self ->ContextID, L"", sizeof(wchar_t));
+    }
+    else {
+        // Create independent buffers
+        Text = (char*) _cmsCalloc(self ->ContextID, len, sizeof(char));
+        if (Text == NULL) goto Error;
+
+        Wide = (wchar_t*) _cmsCalloc(self ->ContextID, len, sizeof(wchar_t));
+        if (Wide == NULL) goto Error;
+
+        // Get both representations.
+        cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry,  Text, len * sizeof(char));
+        cmsMLUgetWide(mlu,  cmsNoLanguage, cmsNoCountry,  Wide, len * sizeof(wchar_t));
+    }
+
+    // Tell the real text len including the null terminator and padding
+    len_text = (cmsUInt32Number) strlen(Text) + 1;
+    // Compute an total tag size requirement
+    len_tag_requirement = (8+4+len_text+4+4+2*len_text+2+1+67);
+    len_aligned = _cmsALIGNLONG(len_tag_requirement);
+
+  // * cmsUInt32Number       count;          * Description length
+  // * cmsInt8Number         desc[count]     * NULL terminated ascii string
+  // * cmsUInt32Number       ucLangCode;     * UniCode language code
+  // * cmsUInt32Number       ucCount;        * UniCode description length
+  // * cmsInt16Number        ucDesc[ucCount];* The UniCode description
+  // * cmsUInt16Number       scCode;         * ScriptCode code
+  // * cmsUInt8Number        scCount;        * ScriptCode count
+  // * cmsInt8Number         scDesc[67];     * ScriptCode Description
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    if (!io ->Write(io, len_text, Text)) goto Error;
+
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // ucLanguageCode
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    // Note that in some compilers sizeof(cmsUInt16Number) != sizeof(wchar_t)
+    if (!_cmsWriteWCharArray(io, len_text, Wide)) goto Error;
+
+    // ScriptCode Code & count (unused)
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt8Number(io, 0)) goto Error;
+
+    if (!io ->Write(io, 67, Filler)) goto Error;
+
+    // possibly add pad at the end of tag
+    if(len_aligned - len_tag_requirement > 0)
+      if (!io ->Write(io, len_aligned - len_tag_requirement, Filler)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, Text);
+    if (Wide) _cmsFree(self ->ContextID, Wide);
+
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Text_Description_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Text_Description_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+cmsTagTypeSignature DecideTextDescType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextDescriptionType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigCurveType
+// ********************************************************************************
+
+static
+void *Type_Curve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number Count;
+    cmsToneCurve* NewGamma;
+    
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    switch (Count) {
+
+           case 0:   // Linear.
+               {
+                   cmsFloat64Number SingleGamma = 1.0;
+
+                   NewGamma = cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+                   if (!NewGamma) return NULL;
+                   *nItems = 1;
+                   return NewGamma;
+               }
+              
+           case 1:  // Specified as the exponent of gamma function
+               {
+                   cmsUInt16Number SingleGammaFixed;
+                   cmsFloat64Number SingleGamma;
+
+                   if (!_cmsReadUInt16Number(io, &SingleGammaFixed)) return NULL;
+                   SingleGamma = _cms8Fixed8toDouble(SingleGammaFixed);
+
+                   *nItems = 1;
+                   return cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+               }
+
+           default:  // Curve
+
+               if (Count > 0x7FFF)
+                   return NULL; // This is to prevent bad guys for doing bad things
+
+               NewGamma = cmsBuildTabulatedToneCurve16(self ->ContextID, Count, NULL);
+               if (!NewGamma) return NULL;
+
+               if (!_cmsReadUInt16Array(io, Count, NewGamma -> Table16)) {
+                   cmsFreeToneCurve(NewGamma);
+                   return NULL;
+               }
+
+               *nItems = 1;
+               return NewGamma;
+    }
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Curve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+
+    if (Curve ->nSegments == 1 && Curve ->Segments[0].Type == 1) {
+
+            // Single gamma, preserve number
+            cmsUInt16Number SingleGammaFixed = _cmsDoubleTo8Fixed8(Curve ->Segments[0].Params[0]);
+
+            if (!_cmsWriteUInt32Number(io, 1)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, SingleGammaFixed)) return FALSE;
+            return TRUE;
+
+    }
+
+    if (!_cmsWriteUInt32Number(io, Curve ->nEntries)) return FALSE;
+    return _cmsWriteUInt16Array(io, Curve ->nEntries, Curve ->Table16);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Curve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Curve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigParametricCurveType
+// ********************************************************************************
+
+
+// Decide which curve type to use on writing
+static
+cmsTagTypeSignature DecideCurveType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Data;
+
+    if (ICCVersion < 4.0) return cmsSigCurveType;
+    if (Curve ->nSegments != 1) return cmsSigCurveType;          // Only 1-segment curves can be saved as parametric
+    if (Curve ->Segments[0].Type < 0) return cmsSigCurveType;    // Only non-inverted curves
+    if (Curve ->Segments[0].Type > 5) return cmsSigCurveType;    // Only ICC parametric curves
+
+    return cmsSigParametricCurveType;
+}
+
+static
+void *Type_ParametricCurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    static const int ParamsByType[] = { 1, 3, 4, 5, 7 };
+    cmsFloat64Number Params[10];
+    cmsUInt16Number Type;
+    int i, n;
+    cmsToneCurve* NewGamma;
+
+    if (!_cmsReadUInt16Number(io, &Type)) return NULL;
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;   // Reserved
+
+    if (Type > 4) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown parametric curve type '%d'", Type);
+        return NULL;
+    }
+
+    memset(Params, 0, sizeof(Params));
+    n = ParamsByType[Type];
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &Params[i])) return NULL;
+    }
+
+    NewGamma = cmsBuildParametricToneCurve(self ->ContextID, Type+1, Params);
+
+    *nItems = 1;
+    return NewGamma;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_ParametricCurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+    int i, nParams, typen;
+    static const int ParamsByType[] = { 0, 1, 3, 4, 5, 7 };
+
+    typen = Curve -> Segments[0].Type;
+
+    if (Curve ->nSegments > 1 || typen < 1) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Multisegment or Inverted parametric curves cannot be written");
+        return FALSE;
+    }
+
+    if (typen > 5) {
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported parametric curve");
+        return FALSE;
+    }
+
+    nParams = ParamsByType[typen];
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) (Curve ->Segments[0].Type - 1))) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Reserved
+
+    for (i=0; i < nParams; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Curve -> Segments[0].Params[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ParametricCurve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ParametricCurve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDateTimeType
+// ********************************************************************************
+
+// A 12-byte value representation of the time and date, where the byte usage is assigned
+// as specified in table 1. The actual values are encoded as 16-bit unsigned integers
+// (uInt16Number - see 5.1.6).
+//
+// All the dateTimeNumber values in a profile shall be in Coordinated Universal Time
+// (UTC, also known as GMT or ZULU Time). Profile writers are required to convert local
+// time to UTC when setting these values. Programmes that display these values may show
+// the dateTimeNumber as UTC, show the equivalent local time (at current locale), or
+// display both UTC and local versions of the dateTimeNumber.
+
+static
+void *Type_DateTime_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsDateTimeNumber timestamp;
+    struct tm * NewDateTime;
+
+    *nItems = 0;
+    NewDateTime = (struct tm*) _cmsMalloc(self ->ContextID, sizeof(struct tm));
+    if (NewDateTime == NULL) return NULL;
+
+    if (io->Read(io, &timestamp, sizeof(cmsDateTimeNumber), 1) != 1) return NULL;
+
+     _cmsDecodeDateTimeNumber(&timestamp, NewDateTime);
+
+     *nItems = 1;
+     return NewDateTime;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_DateTime_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    struct tm * DateTime = (struct tm*) Ptr;
+    cmsDateTimeNumber timestamp;
+
+    _cmsEncodeDateTimeNumber(&timestamp, DateTime);
+    if (!io ->Write(io, sizeof(cmsDateTimeNumber), &timestamp)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_DateTime_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(struct tm));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_DateTime_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+
+// ********************************************************************************
+// Type icMeasurementType
+// ********************************************************************************
+
+/*
+The measurementType information refers only to the internal profile data and is
+meant to provide profile makers an alternative to the default measurement
+specifications.
+*/
+
+static
+void *Type_Measurement_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCMeasurementConditions mc;
+
+	
+    memset(&mc, 0, sizeof(mc));
+	
+    if (!_cmsReadUInt32Number(io, &mc.Observer)) return NULL;
+    if (!_cmsReadXYZNumber(io,    &mc.Backing)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.Geometry)) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &mc.Flare)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.IlluminantType)) return NULL;
+
+    *nItems = 1;
+    return _cmsDupMem(self ->ContextID, &mc, sizeof(cmsICCMeasurementConditions));
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Measurement_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCMeasurementConditions* mc =(cmsICCMeasurementConditions*) Ptr;
+
+    if (!_cmsWriteUInt32Number(io, mc->Observer)) return FALSE;
+    if (!_cmsWriteXYZNumber(io,    &mc->Backing)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->Geometry)) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, mc->Flare)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Measurement_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+     return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCMeasurementConditions));
+
+     cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Measurement_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiLocalizedUnicodeType
+// ********************************************************************************
+//
+//   Do NOT trust SizeOfTag as there is an issue on the definition of profileSequenceDescTag. See the TechNote from
+//   Max Derhak and Rohit Patil about this: basically the size of the string table should be guessed and cannot be
+//   taken from the size of tag if this tag is embedded as part of bigger structures (profileSequenceDescTag, for instance)
+//
+
+static
+void *Type_MLU_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu;
+    cmsUInt32Number Count, RecLen, NumOfWchar;
+    cmsUInt32Number SizeOfHeader;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number  i;
+    wchar_t*         Block;
+    cmsUInt32Number  BeginOfThisString, EndOfThisString, LargestPosition;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &RecLen)) return NULL;
+
+    if (RecLen != 12) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "multiLocalizedUnicodeType of len != 12 is not supported.");
+        return NULL;
+    }
+
+    mlu = cmsMLUalloc(self ->ContextID, Count);
+    if (mlu == NULL) return NULL;
+
+    mlu ->UsedEntries = Count;
+
+    SizeOfHeader = 12 * Count + sizeof(_cmsTagBase);
+    LargestPosition = 0;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Language)) goto Error;
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Country))  goto Error;
+
+        // Now deal with Len and offset.
+        if (!_cmsReadUInt32Number(io, &Len)) goto Error;
+        if (!_cmsReadUInt32Number(io, &Offset)) goto Error;
+
+        // Check for overflow
+        if (Offset < (SizeOfHeader + 8)) goto Error;        
+        if (((Offset + Len) < Len) || ((Offset + Len) > SizeOfTag + 8)) goto Error;
+
+        // True begin of the string
+        BeginOfThisString = Offset - SizeOfHeader - 8;
+
+        // Adjust to wchar_t elements
+        mlu ->Entries[i].Len = (Len * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+        mlu ->Entries[i].StrW = (BeginOfThisString * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+
+        // To guess maximum size, add offset + len
+        EndOfThisString = BeginOfThisString + Len;
+        if (EndOfThisString > LargestPosition)
+            LargestPosition = EndOfThisString;
+    }
+
+    // Now read the remaining of tag and fill all strings. Subtract the directory
+    SizeOfTag   = (LargestPosition * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+    if (SizeOfTag == 0)
+    {
+        Block = NULL;
+        NumOfWchar = 0;
+
+    }
+    else
+    {
+        Block = (wchar_t*) _cmsMalloc(self ->ContextID, SizeOfTag);
+        if (Block == NULL) goto Error;
+        NumOfWchar = SizeOfTag / sizeof(wchar_t);
+        if (!_cmsReadWCharArray(io, NumOfWchar, Block)) goto Error;
+    }
+
+    mlu ->MemPool  = Block;
+    mlu ->PoolSize = SizeOfTag;
+    mlu ->PoolUsed = SizeOfTag;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+static
+cmsBool  Type_MLU_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu =(cmsMLU*) Ptr;
+    cmsUInt32Number HeaderSize;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number i;
+
+    if (Ptr == NULL) {
+
+          // Empty placeholder
+          if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+          if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+          return TRUE;
+    }
+
+    if (!_cmsWriteUInt32Number(io, mlu ->UsedEntries)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+
+    HeaderSize = 12 * mlu ->UsedEntries + sizeof(_cmsTagBase);
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        Len    =  mlu ->Entries[i].Len;
+        Offset =  mlu ->Entries[i].StrW;
+
+        Len    = (Len * sizeof(cmsUInt16Number)) / sizeof(wchar_t);
+        Offset = (Offset * sizeof(cmsUInt16Number)) / sizeof(wchar_t) + HeaderSize + 8;
+
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Language)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Country))  return FALSE;
+        if (!_cmsWriteUInt32Number(io, Len)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, Offset)) return FALSE;
+    }
+
+    if (!_cmsWriteWCharArray(io, mlu ->PoolUsed / sizeof(wchar_t), (wchar_t*)  mlu ->MemPool)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_MLU_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MLU_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLut8Type
+// ********************************************************************************
+
+// Decide which LUT type to use on writing
+static
+cmsTagTypeSignature DecideLUTtypeA2B(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutAtoBType;
+    }
+}
+
+static
+cmsTagTypeSignature DecideLUTtypeB2A(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutBtoAType;
+    }
+}
+
+/*
+This structure represents a colour transform using tables of 8-bit precision.
+This type contains four processing elements: a 3 by 3 matrix (which shall be
+the identity matrix unless the input colour space is XYZ), a set of one dimensional
+input tables, a multidimensional lookup table, and a set of one dimensional output
+tables. Data is processed using these elements via the following sequence:
+(matrix) -> (1d input tables)  -> (multidimensional lookup table - CLUT) -> (1d output tables)
+
+Byte Position   Field Length (bytes)  Content Encoded as...
+8                  1          Number of Input Channels (i)    uInt8Number
+9                  1          Number of Output Channels (o)   uInt8Number
+10                 1          Number of CLUT grid points (identical for each side) (g) uInt8Number
+11                 1          Reserved for padding (fill with 00h)
+
+12..15             4          Encoded e00 parameter   s15Fixed16Number
+*/
+
+
+// Read 8 bit tables as gamma functions
+static
+cmsBool  Read8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, cmsUInt32Number nChannels)
+{
+    cmsUInt8Number* Temp = NULL;
+    cmsUInt32Number i, j;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+    if (nChannels <= 0) return FALSE;
+
+    memset(Tables, 0, sizeof(Tables));
+
+    Temp = (cmsUInt8Number*) _cmsMalloc(ContextID, 256);
+    if (Temp == NULL) return FALSE;
+
+    for (i=0; i < nChannels; i++) {
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+        if (Tables[i] == NULL) goto Error;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (io ->Read(io, Temp, 256, 1) != 1) goto Error;
+
+        for (j=0; j < 256; j++)
+            Tables[i]->Table16[j] = (cmsUInt16Number) FROM_8_TO_16(Temp[j]);
+    }
+
+    _cmsFree(ContextID, Temp);
+    Temp = NULL;
+
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    if (Temp) _cmsFree(ContextID, Temp);
+    return FALSE;
+}
+
+
+static
+cmsBool Write8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsUInt32Number n, _cmsStageToneCurvesData* Tables)
+{
+    int j;
+    cmsUInt32Number i;
+    cmsUInt8Number val;
+
+    for (i=0; i < n; i++) {
+
+        if (Tables) {
+
+            // Usual case of identity curves
+            if ((Tables ->TheCurves[i]->nEntries == 2) && 
+                (Tables->TheCurves[i]->Table16[0] == 0) && 
+                (Tables->TheCurves[i]->Table16[1] == 65535)) {
+
+                    for (j=0; j < 256; j++) {
+                        if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) j)) return FALSE;
+                    }
+            }
+            else 
+                if (Tables ->TheCurves[i]->nEntries != 256) {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "LUT8 needs 256 entries on prelinearization");
+                    return FALSE;                
+                }
+                else
+                    for (j=0; j < 256; j++) {
+
+                        val = (cmsUInt8Number) FROM_16_TO_8(Tables->TheCurves[i]->Table16[j]);
+
+                        if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+                    }
+        }
+    }
+    return TRUE;
+}
+
+
+// Check overflow
+static
+cmsUInt32Number uipow(cmsUInt32Number n, cmsUInt32Number a, cmsUInt32Number b)
+{
+    cmsUInt32Number rv = 1, rc;
+
+    if (a == 0) return 0;
+    if (n == 0) return 0;
+
+    for (; b > 0; b--) {
+
+        rv *= a;
+
+        // Check for overflow
+        if (rv > UINT_MAX / a) return (cmsUInt32Number) -1;
+
+    }
+
+    rc = rv * n;
+
+    if (rv != rc / n) return (cmsUInt32Number) -1;
+    return rc;
+}
+
+
+// That will create a MPE LUT with Matrix, pre tables, CLUT and post tables.
+// 8 bit lut may be scaled easely to v4 PCS, but we need also to properly adjust
+// PCS on BToAxx tags and AtoB if abstract. We need to fix input direction.
+
+static
+void *Type_LUT8_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsUInt8Number* Temp = NULL;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize, i;
+    cmsFloat64Number Matrix[3*3];
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) goto Error;
+
+     if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) goto Error;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+   // Allocates an empty Pipeline
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates if not identity...
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_BEGIN, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    // Get input tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, InputChannels)) goto Error;
+
+    // Get 3D CLUT. Check the overflow....
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *PtrW, *T;
+       
+        PtrW = T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        Temp = (cmsUInt8Number*) _cmsMalloc(self ->ContextID, nTabSize);
+        if (Temp == NULL) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (io ->Read(io, Temp, nTabSize, 1) != 1) {
+            _cmsFree(self ->ContextID, T);
+            _cmsFree(self ->ContextID, Temp);
+            goto Error;
+        }
+
+        for (i = 0; i < nTabSize; i++) {
+
+            *PtrW++ = FROM_8_TO_16(Temp[i]);
+        }
+        _cmsFree(self ->ContextID, Temp);
+        Temp = NULL;
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, OutputChannels)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow a specific MPE structure: Matrix plus prelin, plus clut, plus post-lin.
+static
+cmsBool  Type_LUT8_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number j, nTabSize;
+    cmsUInt8Number  val;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT8");
+        return FALSE;
+    }
+
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+    // The prelinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->InputChannels, PreMPE)) return FALSE;
+
+    nTabSize = uipow(NewLUT->OutputChannels, clutPoints, NewLUT ->InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+
+        // The 3D CLUT.
+        if (clut != NULL) {
+
+            for (j=0; j < nTabSize; j++) {
+
+                val = (cmsUInt8Number) FROM_16_TO_8(clut ->Tab.T[j]);
+                if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+            }
+        }
+    }
+
+    // The postlinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->OutputChannels, PostMPE)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUT8_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT8_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigLut16Type
+// ********************************************************************************
+
+// Read 16 bit tables as gamma functions
+static
+cmsBool  Read16bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, 
+                                    cmsUInt32Number nChannels, cmsUInt32Number nEntries)
+{
+    cmsUInt32Number i;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    // Maybe an empty table? (this is a lcms extension)
+    if (nEntries <= 0) return TRUE;
+
+    // Check for malicious profiles
+    if (nEntries < 2) return FALSE;
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+
+    // Init table to zero
+    memset(Tables, 0, sizeof(Tables));
+
+    for (i=0; i < nChannels; i++) {
+
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, nEntries, NULL);
+        if (Tables[i] == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nEntries, Tables[i]->Table16)) goto Error;
+    }
+
+
+    // Add the table (which may certainly be an identity, but this is up to the optimizer, not the reading code)
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    return FALSE;
+}
+
+static
+cmsBool Write16bitTables(cmsContext ContextID, cmsIOHANDLER* io, _cmsStageToneCurvesData* Tables)
+{
+    cmsUInt32Number j;
+    cmsUInt32Number i;
+    cmsUInt16Number val;
+    cmsUInt32Number nEntries;
+
+    _cmsAssert(Tables != NULL);
+
+    nEntries = Tables->TheCurves[0]->nEntries;
+
+    for (i=0; i < Tables ->nCurves; i++) {
+
+        for (j=0; j < nEntries; j++) {
+
+            val = Tables->TheCurves[i]->Table16[j];        
+            if (!_cmsWriteUInt16Number(io, val)) return FALSE;
+        }
+    }
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+static
+void *Type_LUT16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize;
+    cmsFloat64Number Matrix[3*3];
+    cmsUInt16Number InputEntries, OutputEntries;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) return NULL;   // 255 maximum
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates on 3 channels
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    if (!_cmsReadUInt16Number(io, &InputEntries)) goto Error;
+    if (!_cmsReadUInt16Number(io, &OutputEntries)) goto Error;
+
+    if (InputEntries > 0x7FFF || OutputEntries > 0x7FFF) goto Error;
+    if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Get input tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, InputChannels, InputEntries)) goto Error;
+
+    // Get 3D CLUT
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *T;
+
+        T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nTabSize, T)) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, OutputChannels, OutputEntries)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow some specific MPE structures: Matrix plus prelin, plus clut, plus post-lin.
+// Some empty defaults are created for missing parts
+
+static
+cmsBool  Type_LUT16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number nTabSize;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number i, InputChannels, OutputChannels, clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe != NULL && mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT16");
+        return FALSE;
+    }
+
+    InputChannels  = cmsPipelineInputChannels(NewLUT);
+    OutputChannels = cmsPipelineOutputChannels(NewLUT);
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+
+    if (PreMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PreMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+            if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+    }
+
+    if (PostMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PostMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+    }
+
+    // The prelinearization table
+
+    if (PreMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PreMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < InputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    nTabSize = uipow(OutputChannels, clutPoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+        // The 3D CLUT.
+        if (clut != NULL) {
+            if (!_cmsWriteUInt16Array(io, nTabSize, clut->Tab.T)) return FALSE;
+        }
+    }
+
+    // The postlinearization table
+    if (PostMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PostMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < OutputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_LUT16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLutAToBType
+// ********************************************************************************
+
+
+// V4 stuff. Read matrix for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset)
+{
+    cmsFloat64Number dMat[3*3];
+    cmsFloat64Number dOff[3];
+    cmsStage* Mat;
+
+    // Go to address
+    if (!io -> Seek(io, Offset)) return NULL;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io, &dMat[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[2])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[3])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[4])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[5])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[6])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[7])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[8])) return NULL;
+
+    if (!_cmsRead15Fixed16Number(io, &dOff[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[2])) return NULL;
+
+    Mat = cmsStageAllocMatrix(self ->ContextID, 3, 3, dMat, dOff);
+
+     return Mat;
+}
+
+
+
+
+//  V4 stuff. Read CLUT part for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, 
+                   cmsUInt32Number Offset, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+    cmsUInt8Number  gridPoints8[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number GridPoints[cmsMAXCHANNELS], i;
+    cmsUInt8Number  Precision;
+    cmsStage* CLUT;
+    _cmsStageCLutData* Data;
+
+    if (!io -> Seek(io, Offset)) return NULL;
+    if (io -> Read(io, gridPoints8, cmsMAXCHANNELS, 1) != 1) return NULL;
+
+
+    for (i=0; i < cmsMAXCHANNELS; i++) {
+
+        if (gridPoints8[i] == 1) return NULL; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = gridPoints8[i];
+    }
+
+    if (!_cmsReadUInt8Number(io, &Precision)) return NULL;
+
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    CLUT = cmsStageAllocCLut16bitGranular(self ->ContextID, GridPoints, InputChannels, OutputChannels, NULL);
+    if (CLUT == NULL) return NULL;
+
+    Data = (_cmsStageCLutData*) CLUT ->Data;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        cmsUInt8Number  v;
+
+        for (i=0; i < Data ->nEntries; i++) {
+
+            if (io ->Read(io, &v, sizeof(cmsUInt8Number), 1) != 1) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+            Data ->Tab.T[i] = FROM_8_TO_16(v);
+        }
+
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsReadUInt16Array(io, Data->nEntries, Data ->Tab.T)) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+        }
+        else {
+            cmsStageFree(CLUT);
+            cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return NULL;
+        }
+
+    return CLUT;
+}
+
+static
+cmsToneCurve* ReadEmbeddedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+    switch (BaseType) {
+
+            case cmsSigCurveType:
+                return (cmsToneCurve*) Type_Curve_Read(self, io, &nItems, 0);
+
+            case cmsSigParametricCurveType:
+                return (cmsToneCurve*) Type_ParametricCurve_Read(self, io, &nItems, 0);
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) BaseType);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return NULL;
+    }
+}
+
+
+// Read a set of curves from specific offset
+static
+cmsStage* ReadSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset, cmsUInt32Number nCurves)
+{
+    cmsToneCurve* Curves[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsStage* Lin = NULL;
+
+    if (nCurves > cmsMAXCHANNELS) return FALSE;
+
+    if (!io -> Seek(io, Offset)) return FALSE;
+
+    for (i=0; i < nCurves; i++)
+        Curves[i] = NULL;
+
+    for (i=0; i < nCurves; i++) {
+
+        Curves[i] = ReadEmbeddedCurve(self, io);
+        if (Curves[i] == NULL) goto Error;
+        if (!_cmsReadAlignment(io)) goto Error;
+
+    }
+
+    Lin = cmsStageAllocToneCurves(self ->ContextID, nCurves, Curves);
+
+Error:
+    for (i=0; i < nCurves; i++)
+        cmsFreeToneCurve(Curves[i]);
+
+    return Lin;
+}
+
+
+// LutAtoB type
+
+// This structure represents a colour transform. The type contains up to five processing
+// elements which are stored in the AtoBTag tag in the following order: a set of one
+// dimensional curves, a 3 by 3 matrix with offset terms, a set of one dimensional curves,
+// a multidimensional lookup table, and a set of one dimensional output curves.
+// Data are processed using these elements via the following sequence:
+//
+//("A" curves) -> (multidimensional lookup table - CLUT) -> ("M" curves) -> (matrix) -> ("B" curves).
+//
+/*
+It is possible to use any or all of these processing elements. At least one processing element
+must be included.Only the following combinations are allowed:
+
+B
+M - Matrix - B
+A - CLUT - B
+A - CLUT - M - Matrix - B
+
+*/
+
+static
+void* Type_LUTA2B_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number      BaseOffset;
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, outputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a set of curves
+static
+cmsBool  WriteMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsStage* mpe)
+{
+    _cmsStageMatrixData* m = (_cmsStageMatrixData*) mpe -> Data;
+
+    // Write the Matrix
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[2])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[3])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[4])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[5])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[6])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[7])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[8])) return FALSE;
+
+    if (m ->Offset != NULL) {
+
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[2])) return FALSE;
+    }
+    else {
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+
+    }
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// Write a set of curves
+static
+cmsBool WriteSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsTagTypeSignature Type, cmsStage* mpe)
+{
+    cmsUInt32Number i, n;
+    cmsTagTypeSignature CurrentType;
+    cmsToneCurve** Curves;
+
+
+    n      = cmsStageOutputChannels(mpe);
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+
+    for (i=0; i < n; i++) {
+
+        // If this is a table-based curve, use curve type even on V4
+        CurrentType = Type;
+
+        if ((Curves[i] ->nSegments == 0)||
+            ((Curves[i]->nSegments == 2) && (Curves[i] ->Segments[1].Type == 0)) )
+            CurrentType = cmsSigCurveType;
+        else
+        if (Curves[i] ->Segments[0].Type < 0)
+            CurrentType = cmsSigCurveType;
+
+        if (!_cmsWriteTypeBase(io, CurrentType)) return FALSE;
+
+        switch (CurrentType) {
+
+            case cmsSigCurveType:
+                if (!Type_Curve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            case cmsSigParametricCurveType:
+                if (!Type_ParametricCurve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) Type);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return FALSE;
+        }
+
+        if (!_cmsWriteAlignment(io)) return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+static
+cmsBool WriteCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt8Number  Precision, cmsStage* mpe)
+{
+    cmsUInt8Number  gridPoints[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number i;
+    _cmsStageCLutData* CLUT = ( _cmsStageCLutData*) mpe -> Data;
+
+    if (CLUT ->HasFloatValues) {
+         cmsSignalError(self ->ContextID, cmsERROR_NOT_SUITABLE, "Cannot save floating point data, CLUT are 8 or 16 bit only");
+         return FALSE;
+    }
+
+    memset(gridPoints, 0, sizeof(gridPoints));
+    for (i=0; i < (cmsUInt32Number) CLUT ->Params ->nInputs; i++)
+        gridPoints[i] = (cmsUInt8Number) CLUT ->Params ->nSamples[i];
+
+    if (!io -> Write(io, cmsMAXCHANNELS*sizeof(cmsUInt8Number), gridPoints)) return FALSE;
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) Precision)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        for (i=0; i < CLUT->nEntries; i++) {
+
+            if (!_cmsWriteUInt8Number(io, FROM_16_TO_8(CLUT->Tab.T[i]))) return FALSE;
+        }
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsWriteUInt16Array(io, CLUT->nEntries, CLUT ->Tab.T)) return FALSE;
+        }
+        else {
+             cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return FALSE;
+        }
+
+    if (!_cmsWriteAlignment(io)) return FALSE;
+
+    return TRUE;
+}
+
+
+
+
+static
+cmsBool Type_LUTA2B_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage * Matrix = NULL;
+    cmsStage * CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+    // Get the base for all offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (Lut ->Elements != NULL)
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &M, &Matrix, &B))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &A, &CLUT, &B))
+                    if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType,
+                        cmsSigMatrixElemType, cmsSigCurveSetElemType, &A, &CLUT, &M, &Matrix, &B)) {
+
+                            cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutAToB");
+                            return FALSE;
+                    }
+
+    // Get input, output channels
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    // Write channel count
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    // Keep directory to be filled latter
+    DirectoryPos = io ->Tell(io);
+
+    // Write the directory
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUTA2B_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTA2B_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// LutBToA type
+
+static
+void* Type_LUTB2A_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      BaseOffset;     // Actual position in file
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Padding
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, inputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+/*
+B
+B - Matrix - M
+B - CLUT - A
+B - Matrix - M - CLUT - A
+*/
+
+static
+cmsBool  Type_LUTB2A_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage *Matrix = NULL;
+    cmsStage *CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &B, &Matrix, &M))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &CLUT, &A))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                    cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &Matrix, &M, &CLUT, &A)) {
+                        cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutBToA");
+                        return FALSE;
+                }
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    DirectoryPos = io ->Tell(io);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+static
+void* Type_LUTB2A_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTB2A_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// ********************************************************************************
+// Type cmsSigColorantTableType
+// ********************************************************************************
+/*
+The purpose of this tag is to identify the colorants used in the profile by a
+unique name and set of XYZ or L*a*b* values to give the colorant an unambiguous
+value. The first colorant listed is the colorant of the first device channel of
+a lut tag. The second colorant listed is the colorant of the second device channel
+of a lut tag, and so on.
+*/
+
+static
+void *Type_ColorantTable_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number i, Count;
+    cmsNAMEDCOLORLIST* List;
+    char Name[34];
+    cmsUInt16Number PCS[3];
+
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (Count > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many colorants '%d'", Count);
+        return NULL;
+    }
+
+    List = cmsAllocNamedColorList(self ->ContextID, Count, 0, "", "");
+    for (i=0; i < Count; i++) {
+
+        if (io ->Read(io, Name, 32, 1) != 1) goto Error;
+        Name[32] = 0;
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+
+        if (!cmsAppendNamedColor(List, Name, PCS, NULL)) goto Error;
+
+    }
+
+    *nItems = 1;
+    return List;
+
+Error:
+    *nItems = 0;
+    cmsFreeNamedColorList(List);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// Saves a colorant table. It is using the named color structure for simplicity sake
+static
+cmsBool  Type_ColorantTable_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    cmsUInt32Number i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+        char root[cmsMAX_PATH];
+        cmsUInt16Number PCS[3];
+
+        memset(root, 0, sizeof(root));
+
+        if (!cmsNamedColorInfo(NamedColorList, i, root, NULL, NULL, PCS, NULL)) return 0;
+        root[32] = 0;
+
+        if (!io ->Write(io, 32, root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ColorantTable_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_ColorantTable_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigNamedColor2Type
+// ********************************************************************************
+//
+//The namedColor2Type is a count value and array of structures that provide color
+//coordinates for 7-bit ASCII color names. For each named color, a PCS and optional
+//device representation of the color are given. Both representations are 16-bit values.
+//The device representation corresponds to the header's 'color space of data' field.
+//This representation should be consistent with the 'number of device components'
+//field in the namedColor2Type. If this field is 0, device coordinates are not provided.
+//The PCS representation corresponds to the header's PCS field. The PCS representation
+//is always provided. Color names are fixed-length, 32-byte fields including null
+//termination. In order to maintain maximum portability, it is strongly recommended
+//that special characters of the 7-bit ASCII set not be used.
+
+static
+void *Type_NamedColor_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+
+    cmsUInt32Number      vendorFlag;     // Bottom 16 bits for ICC use
+    cmsUInt32Number      count;          // Count of named colors
+    cmsUInt32Number      nDeviceCoords;  // Num of device coordinates
+    char                 prefix[32];     // Prefix for each color name
+    char                 suffix[32];     // Suffix for each color name
+    cmsNAMEDCOLORLIST*   v;
+    cmsUInt32Number      i;
+
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &vendorFlag)) return NULL;
+    if (!_cmsReadUInt32Number(io, &count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &nDeviceCoords)) return NULL;
+
+    if (io -> Read(io, prefix, 32, 1) != 1) return NULL;
+    if (io -> Read(io, suffix, 32, 1) != 1) return NULL;
+
+    prefix[31] = suffix[31] = 0;
+
+    v = cmsAllocNamedColorList(self ->ContextID, count, nDeviceCoords, prefix, suffix);
+    if (v == NULL) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many named colors '%d'", count);
+        return NULL;
+    }
+
+    if (nDeviceCoords > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many device coordinates '%d'", nDeviceCoords);
+        goto Error;
+    }
+    for (i=0; i < count; i++) {
+
+        cmsUInt16Number PCS[3];
+        cmsUInt16Number Colorant[cmsMAXCHANNELS];
+        char Root[33];
+
+        memset(Colorant, 0, sizeof(Colorant));
+        if (io -> Read(io, Root, 32, 1) != 1) goto Error;
+        Root[32] = 0;  // To prevent exploits
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+        if (!_cmsReadUInt16Array(io, nDeviceCoords, Colorant)) goto Error;
+
+        if (!cmsAppendNamedColor(v, Root, PCS, Colorant)) goto Error;
+    }
+
+    *nItems = 1;
+    return (void*) v ;
+
+Error:
+    cmsFreeNamedColorList(v);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Saves a named color list into a named color profile
+static
+cmsBool Type_NamedColor_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    char                prefix[33];     // Prefix for each color name
+    char                suffix[33];     // Suffix for each color name
+    cmsUInt32Number     i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, NamedColorList ->ColorantCount)) return FALSE;
+
+    strncpy(prefix, (const char*) NamedColorList->Prefix, 32);
+    strncpy(suffix, (const char*) NamedColorList->Suffix, 32);
+
+    suffix[32] = prefix[32] = 0;
+
+    if (!io ->Write(io, 32, prefix)) return FALSE;
+    if (!io ->Write(io, 32, suffix)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+       cmsUInt16Number PCS[3];
+       cmsUInt16Number Colorant[cmsMAXCHANNELS];
+       char Root[cmsMAX_PATH];
+
+        if (!cmsNamedColorInfo(NamedColorList, i, Root, NULL, NULL, PCS, Colorant)) return 0;
+        Root[32] = 0;
+        if (!io ->Write(io, 32 , Root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, NamedColorList ->ColorantCount, Colorant)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_NamedColor_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_NamedColor_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceDescType
+// ********************************************************************************
+
+// This type is an array of structures, each of which contains information from the
+// header fields and tags from the original profiles which were combined to create
+// the final profile. The order of the structures is the order in which the profiles
+// were combined and includes a structure for the final profile. This provides a
+// description of the profile sequence from source to destination,
+// typically used with the DeviceLink profile.
+
+static
+cmsBool ReadEmbeddedText(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU** mlu, cmsUInt32Number SizeOfTag)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+
+    switch (BaseType) {
+
+       case cmsSigTextType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu = (cmsMLU*)Type_Text_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       case cmsSigTextDescriptionType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_Text_Description_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+           /*
+           TBD: Size is needed for MLU, and we have no idea on which is the available size
+           */
+
+       case cmsSigMultiLocalizedUnicodeType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_MLU_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       default: return FALSE;
+    }
+}
+
+
+static
+void *Type_ProfileSequenceDesc_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number i, Count;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+    OutSeq ->n = Count;
+
+    // Get structures as well
+
+    for (i=0; i < Count; i++) {
+
+        cmsPSEQDESC* sec = &OutSeq -> seq[i];
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceMfg)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceModel)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt64Number(io, &sec ->attributes)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt64Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt64Number);
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *)&sec ->technology)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!ReadEmbeddedText(self, io, &sec ->Manufacturer, SizeOfTag)) goto Error;
+        if (!ReadEmbeddedText(self, io, &sec ->Model, SizeOfTag)) goto Error;
+    }
+
+    *nItems = 1;
+    return OutSeq;
+
+Error:
+    cmsFreeProfileSequenceDescription(OutSeq);
+    return NULL;
+}
+
+
+// Aux--Embed a text description type. It can be of type text description or multilocalized unicode
+// and it depends of the version number passed on cmsTagDescriptor structure instead of stack
+static
+cmsBool  SaveDescription(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* Text)
+{
+    if (self ->ICCVersion < 0x4000000) {
+
+        if (!_cmsWriteTypeBase(io, cmsSigTextDescriptionType)) return FALSE;
+        return Type_Text_Description_Write(self, io, Text, 1);
+    }
+    else {
+        if (!_cmsWriteTypeBase(io, cmsSigMultiLocalizedUnicodeType)) return FALSE;
+        return Type_MLU_Write(self, io, Text, 1);
+    }
+}
+
+
+static
+cmsBool  Type_ProfileSequenceDesc_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, Seq->n)) return FALSE;
+
+    for (i=0; i < Seq ->n; i++) {
+
+        cmsPSEQDESC* sec = &Seq -> seq[i];
+
+        if (!_cmsWriteUInt32Number(io, sec ->deviceMfg)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->deviceModel)) return FALSE;
+        if (!_cmsWriteUInt64Number(io, &sec ->attributes)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->technology)) return FALSE;
+
+        if (!SaveDescription(self, io, sec ->Manufacturer)) return FALSE;
+        if (!SaveDescription(self, io, sec ->Model)) return FALSE;
+    }
+
+     return TRUE;
+
+     cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_ProfileSequenceDesc_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceDesc_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceIdType
+// ********************************************************************************
+/*
+In certain workflows using ICC Device Link Profiles, it is necessary to identify the
+original profiles that were combined to create the Device Link Profile.
+This type is an array of structures, each of which contains information for
+identification of a profile used in a sequence
+*/
+
+
+static
+cmsBool ReadSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq = (cmsSEQ*) Cargo;
+    cmsPSEQDESC* seq = &OutSeq ->seq[n];
+
+    if (io -> Read(io, seq ->ProfileID.ID8, 16, 1) != 1) return FALSE;
+    if (!ReadEmbeddedText(self, io, &seq ->Description, SizeOfTag)) return FALSE;
+
+    return TRUE;
+}
+
+
+
+static
+void *Type_ProfileSequenceId_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number Count;
+    cmsUInt32Number BaseOffset;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get table count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Allocate an empty structure
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+
+    // Read the position table
+    if (!ReadPositionTable(self, io, Count, BaseOffset, OutSeq, ReadSeqID)) {
+
+        cmsFreeProfileSequenceDescription(OutSeq);
+        return NULL;
+    }
+
+    // Success
+    *nItems = 1;
+    return OutSeq;
+
+}
+
+
+static
+cmsBool WriteSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Cargo;
+
+    if (!io ->Write(io, 16, Seq ->seq[n].ProfileID.ID8)) return FALSE;
+
+    // Store here the MLU
+    if (!SaveDescription(self, io, Seq ->seq[n].Description)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_ProfileSequenceId_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number BaseOffset;
+
+    // Keep the base offset
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // This is the table count
+    if (!_cmsWriteUInt32Number(io, Seq ->n)) return FALSE;
+
+    // This is the position table and content
+    if (!WritePositionTable(self, io, 0, Seq ->n, BaseOffset, Seq, WriteSeqID)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ProfileSequenceId_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceId_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigUcrBgType
+// ********************************************************************************
+/*
+This type contains curves representing the under color removal and black
+generation and a text string which is a general description of the method used
+for the ucr/bg.
+*/
+
+static
+void *Type_UcrBg_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUcrBg* n = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+    cmsUInt32Number CountUcr, CountBg;
+    char* ASCIIString;
+
+    *nItems = 0;
+    if (n == NULL) return NULL;
+
+    // First curve is Under color removal
+    if (!_cmsReadUInt32Number(io, &CountUcr)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Ucr = cmsBuildTabulatedToneCurve16(self ->ContextID, CountUcr, NULL);
+    if (n ->Ucr == NULL) return NULL;
+
+    if (!_cmsReadUInt16Array(io, CountUcr, n ->Ucr->Table16)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= CountUcr * sizeof(cmsUInt16Number);
+
+    // Second curve is Black generation
+    if (!_cmsReadUInt32Number(io, &CountBg)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Bg = cmsBuildTabulatedToneCurve16(self ->ContextID, CountBg, NULL);
+    if (n ->Bg == NULL) return NULL;
+    if (!_cmsReadUInt16Array(io, CountBg, n ->Bg->Table16)) return NULL;
+    if (SizeOfTag < CountBg * sizeof(cmsUInt16Number)) return NULL;
+    SizeOfTag -= CountBg * sizeof(cmsUInt16Number);
+    if (SizeOfTag == UINT_MAX) return NULL;
+
+    // Now comes the text. The length is specified by the tag size
+    n ->Desc = cmsMLUalloc(self ->ContextID, 1);
+    if (n ->Desc == NULL) return NULL;
+
+    ASCIIString = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (io ->Read(io, ASCIIString, sizeof(char), SizeOfTag) != SizeOfTag) return NULL;
+    ASCIIString[SizeOfTag] = 0;
+    cmsMLUsetASCII(n ->Desc, cmsNoLanguage, cmsNoCountry, ASCIIString);
+    _cmsFree(self ->ContextID, ASCIIString);
+
+    *nItems = 1;
+    return (void*) n;
+}
+
+static
+cmsBool  Type_UcrBg_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUcrBg* Value = (cmsUcrBg*) Ptr;
+    cmsUInt32Number TextSize;
+    char* Text;
+
+    // First curve is Under color removal
+    if (!_cmsWriteUInt32Number(io, Value ->Ucr ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Ucr ->nEntries, Value ->Ucr ->Table16)) return FALSE;
+
+    // Then black generation
+    if (!_cmsWriteUInt32Number(io, Value ->Bg ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Bg ->nEntries, Value ->Bg ->Table16)) return FALSE;
+
+    // Now comes the text. The length is specified by the tag size
+    TextSize = cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+    if (cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, Text, TextSize) != TextSize) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_UcrBg_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+    cmsUcrBg* NewUcrBg = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+
+    if (NewUcrBg == NULL) return NULL;
+
+    NewUcrBg ->Bg   = cmsDupToneCurve(Src ->Bg);
+    NewUcrBg ->Ucr  = cmsDupToneCurve(Src ->Ucr);
+    NewUcrBg ->Desc = cmsMLUdup(Src ->Desc);
+
+    return (void*) NewUcrBg;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_UcrBg_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+   cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+
+   if (Src ->Ucr) cmsFreeToneCurve(Src ->Ucr);
+   if (Src ->Bg)  cmsFreeToneCurve(Src ->Bg);
+   if (Src ->Desc) cmsMLUfree(Src ->Desc);
+
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigCrdInfoType
+// ********************************************************************************
+
+/*
+This type contains the PostScript product name to which this profile corresponds
+and the names of the companion CRDs. Recall that a single profile can generate
+multiple CRDs. It is implemented as a MLU being the language code "PS" and then
+country varies for each element:
+
+                nm: PostScript product name
+                #0: Rendering intent 0 CRD name
+                #1: Rendering intent 1 CRD name
+                #2: Rendering intent 2 CRD name
+                #3: Rendering intent 3 CRD name
+*/
+
+
+
+// Auxiliary, read an string specified as count + string
+static
+cmsBool  ReadCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, cmsUInt32Number* SizeOfTag, const char* Section)
+{
+    cmsUInt32Number Count;
+    char* Text;
+
+    if (*SizeOfTag < sizeof(cmsUInt32Number)) return FALSE;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return FALSE;
+
+    if (Count > UINT_MAX - sizeof(cmsUInt32Number)) return FALSE;
+    if (*SizeOfTag < Count + sizeof(cmsUInt32Number)) return FALSE;
+
+    Text     = (char*) _cmsMalloc(self ->ContextID, Count+1);
+    if (Text == NULL) return FALSE;
+
+    if (io ->Read(io, Text, sizeof(cmsUInt8Number), Count) != Count) {
+        _cmsFree(self ->ContextID, Text);
+        return FALSE;
+    }
+
+    Text[Count] = 0;
+
+    cmsMLUsetASCII(mlu, "PS", Section, Text);
+    _cmsFree(self ->ContextID, Text);
+
+    *SizeOfTag -= (Count + sizeof(cmsUInt32Number));
+    return TRUE;
+}
+
+static
+cmsBool  WriteCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, const char* Section)
+{
+ cmsUInt32Number TextSize;
+ char* Text;
+
+    TextSize = cmsMLUgetASCII(mlu, "PS", Section, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+
+    if (!_cmsWriteUInt32Number(io, TextSize)) return FALSE;
+
+    if (cmsMLUgetASCII(mlu, "PS", Section, Text, TextSize) == 0) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+}
+
+static
+void *Type_CrdInfo_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu = cmsMLUalloc(self ->ContextID, 5);
+
+    *nItems = 0;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "nm")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#0")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#1")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#2")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#3")) goto Error;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    cmsMLUfree(mlu);
+    return NULL;
+
+}
+
+static
+cmsBool  Type_CrdInfo_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    if (!WriteCountAndSting(self, io, mlu, "nm")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#0")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#1")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#2")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#3")) goto Error;
+
+    return TRUE;
+
+Error:
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_CrdInfo_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_CrdInfo_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigScreeningType
+// ********************************************************************************
+//
+//The screeningType describes various screening parameters including screen
+//frequency, screening angle, and spot shape.
+
+static
+void *Type_Screening_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsScreening* sc = NULL;
+    cmsUInt32Number i;
+
+    sc = (cmsScreening*) _cmsMallocZero(self ->ContextID, sizeof(cmsScreening));
+    if (sc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &sc ->Flag)) goto Error;
+    if (!_cmsReadUInt32Number(io, &sc ->nChannels)) goto Error;
+
+    if (sc ->nChannels > cmsMAXCHANNELS - 1)
+        sc ->nChannels = cmsMAXCHANNELS - 1;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].Frequency)) goto Error;
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].ScreenAngle)) goto Error;
+        if (!_cmsReadUInt32Number(io, &sc ->Channels[i].SpotShape)) goto Error;
+    }
+
+
+    *nItems = 1;
+
+    return (void*) sc;
+
+Error:
+    if (sc != NULL)
+        _cmsFree(self ->ContextID, sc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_Screening_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsScreening* sc = (cmsScreening* ) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, sc ->Flag)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->nChannels)) return FALSE;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].Frequency)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].ScreenAngle)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sc ->Channels[i].SpotShape)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Screening_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsScreening));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_Screening_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigViewingConditionsType
+// ********************************************************************************
+//
+//This type represents a set of viewing condition parameters including:
+//CIE 'absolute' illuminant white point tristimulus values and CIE 'absolute'
+//surround tristimulus values.
+
+static
+void *Type_ViewingConditions_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCViewingConditions* vc = NULL;
+
+    vc = (cmsICCViewingConditions*) _cmsMallocZero(self ->ContextID, sizeof(cmsICCViewingConditions));
+    if (vc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadXYZNumber(io, &vc ->IlluminantXYZ)) goto Error;
+    if (!_cmsReadXYZNumber(io, &vc ->SurroundXYZ)) goto Error;
+    if (!_cmsReadUInt32Number(io, &vc ->IlluminantType)) goto Error;
+
+    *nItems = 1;
+
+    return (void*) vc;
+
+Error:
+    if (vc != NULL)
+        _cmsFree(self ->ContextID, vc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_ViewingConditions_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCViewingConditions* sc = (cmsICCViewingConditions* ) Ptr;
+
+    if (!_cmsWriteXYZNumber(io, &sc ->IlluminantXYZ)) return FALSE;
+    if (!_cmsWriteXYZNumber(io, &sc ->SurroundXYZ)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ViewingConditions_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self->ContextID, Ptr, sizeof(cmsICCViewingConditions));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ViewingConditions_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiProcessElementType
+// ********************************************************************************
+
+
+static
+void* GenericMPEdup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsStageDup((cmsStage*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void GenericMPEfree(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsStageFree((cmsStage*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Each curve is stored in one or more curve segments, with break-points specified between curve segments.
+// The first curve segment always starts at -Infinity, and the last curve segment always ends at +Infinity. The
+// first and last curve segments shall be specified in terms of a formula, whereas the other segments shall be
+// specified either in terms of a formula, or by a sampled curve.
+
+
+// Read an embedded segmented curve
+static
+cmsToneCurve* ReadSegmentedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsCurveSegSignature ElementSig;
+    cmsUInt32Number i, j;
+    cmsUInt16Number nSegments;
+    cmsCurveSegment*  Segments;
+    cmsToneCurve* Curve;
+    cmsFloat32Number PrevBreak = MINUS_INF;    // - infinite
+
+    // Take signature and channels for each element.
+     if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return NULL;
+
+     // That should be a segmented curve
+     if (ElementSig != cmsSigSegmentedCurve) return NULL;
+
+     if (!_cmsReadUInt32Number(io, NULL)) return NULL;
+     if (!_cmsReadUInt16Number(io, &nSegments)) return NULL;
+     if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+     if (nSegments < 1) return NULL;
+     Segments = (cmsCurveSegment*) _cmsCalloc(self ->ContextID, nSegments, sizeof(cmsCurveSegment));
+     if (Segments == NULL) return NULL;
+
+     // Read breakpoints
+     for (i=0; i < (cmsUInt32Number) nSegments - 1; i++) {
+
+         Segments[i].x0 = PrevBreak;
+         if (!_cmsReadFloat32Number(io, &Segments[i].x1)) goto Error;
+         PrevBreak = Segments[i].x1;
+     }
+
+     Segments[nSegments-1].x0 = PrevBreak;
+     Segments[nSegments-1].x1 = PLUS_INF;     // A big cmsFloat32Number number
+
+     // Read segments
+     for (i=0; i < nSegments; i++) {
+
+          if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) goto Error;
+          if (!_cmsReadUInt32Number(io, NULL)) goto Error;
+
+           switch (ElementSig) {
+
+            case cmsSigFormulaCurveSeg: {
+
+                cmsUInt16Number Type;
+                cmsUInt32Number ParamsByType[] = {4, 5, 5 };
+
+                if (!_cmsReadUInt16Number(io, &Type)) goto Error;
+                if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+
+                Segments[i].Type = Type + 6;
+                if (Type > 2) goto Error;
+
+                for (j=0; j < ParamsByType[Type]; j++) {
+
+                    cmsFloat32Number f;
+                    if (!_cmsReadFloat32Number(io, &f)) goto Error;
+                    Segments[i].Params[j] = f;
+                }
+                }
+                break;
+
+
+            case cmsSigSampledCurveSeg: {
+                cmsUInt32Number Count;
+
+                if (!_cmsReadUInt32Number(io, &Count)) goto Error;
+
+                Segments[i].nGridPoints = Count;
+                Segments[i].SampledPoints = (cmsFloat32Number*) _cmsCalloc(self ->ContextID, Count, sizeof(cmsFloat32Number));
+                if (Segments[i].SampledPoints == NULL) goto Error;
+
+                for (j=0; j < Count; j++) {
+                    if (!_cmsReadFloat32Number(io, &Segments[i].SampledPoints[j])) goto Error;
+                }
+                }
+                break;
+
+            default:
+                {
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+                cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve element type '%s' found.", String);
+                }
+                goto Error;
+
+         }
+     }
+
+     Curve = cmsBuildSegmentedToneCurve(self ->ContextID, nSegments, Segments);
+
+     for (i=0; i < nSegments; i++) {
+         if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+     }
+     _cmsFree(self ->ContextID, Segments);
+     return Curve;
+
+Error:
+     if (Segments) {
+         for (i=0; i < nSegments; i++) {
+             if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+         }
+         _cmsFree(self ->ContextID, Segments);
+     }
+     return NULL;
+}
+
+
+static
+cmsBool ReadMPECurve(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     void* Cargo,
+                     cmsUInt32Number n,
+                     cmsUInt32Number SizeOfTag)
+{
+      cmsToneCurve** GammaTables = ( cmsToneCurve**) Cargo;
+
+      GammaTables[n] = ReadSegmentedCurve(self, io);
+      return (GammaTables[n] != NULL);
+
+      cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+void *Type_MPEcurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number i, BaseOffset;
+    cmsToneCurve** GammaTables;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans != OutputChans) return NULL;
+
+    GammaTables = (cmsToneCurve**) _cmsCalloc(self ->ContextID, InputChans, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) return NULL;
+
+    if (ReadPositionTable(self, io, InputChans, BaseOffset, GammaTables, ReadMPECurve)) {
+
+        mpe = cmsStageAllocToneCurves(self ->ContextID, InputChans, GammaTables);
+    }
+    else {
+        mpe = NULL;
+    }
+
+    for (i=0; i < InputChans; i++) {
+        if (GammaTables[i]) cmsFreeToneCurve(GammaTables[i]);
+    }
+
+    _cmsFree(self ->ContextID, GammaTables);
+    *nItems = (mpe != NULL) ? 1U : 0;
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Write a single segmented curve. NO CHECK IS PERFORMED ON VALIDITY
+static
+cmsBool WriteSegmentedCurve(cmsIOHANDLER* io, cmsToneCurve* g)
+{
+    cmsUInt32Number i, j;
+    cmsCurveSegment* Segments = g ->Segments;
+    cmsUInt32Number nSegments = g ->nSegments;
+
+    if (!_cmsWriteUInt32Number(io, cmsSigSegmentedCurve)) goto Error;
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) nSegments)) goto Error;
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+    // Write the break-points
+    for (i=0; i < nSegments - 1; i++) {
+        if (!_cmsWriteFloat32Number(io, Segments[i].x1)) goto Error;
+    }
+
+    // Write the segments
+    for (i=0; i < g ->nSegments; i++) {
+
+        cmsCurveSegment* ActualSeg = Segments + i;
+
+        if (ActualSeg -> Type == 0) {
+
+            // This is a sampled curve
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigSampledCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+            if (!_cmsWriteUInt32Number(io, ActualSeg -> nGridPoints)) goto Error;
+
+            for (j=0; j < g ->Segments[i].nGridPoints; j++) {
+                if (!_cmsWriteFloat32Number(io, ActualSeg -> SampledPoints[j])) goto Error;
+            }
+
+        }
+        else {
+            int Type;
+            cmsUInt32Number ParamsByType[] = { 4, 5, 5 };
+
+            // This is a formula-based
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigFormulaCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+
+            // We only allow 1, 2 and 3 as types
+            Type = ActualSeg ->Type - 6;
+            if (Type > 2 || Type < 0) goto Error;
+
+            if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Type)) goto Error;
+            if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+            for (j=0; j < ParamsByType[Type]; j++) {
+                if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) ActualSeg ->Params[j])) goto Error;
+            }
+        }
+
+        // It seems there is no need to align. Code is here, and for safety commented out
+        // if (!_cmsWriteAlignment(io)) goto Error;
+    }
+
+    return TRUE;
+
+Error:
+    return FALSE;
+}
+
+
+static
+cmsBool WriteMPECurve(struct _cms_typehandler_struct* self,
+                      cmsIOHANDLER* io,
+                      void* Cargo,
+                      cmsUInt32Number n,
+                      cmsUInt32Number SizeOfTag)
+{
+    _cmsStageToneCurvesData* Curves  = (_cmsStageToneCurvesData*) Cargo;
+
+    return WriteSegmentedCurve(io, Curves ->TheCurves[n]);
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Write a curve, checking first for validity
+static
+cmsBool  Type_MPEcurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number BaseOffset;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageToneCurvesData* Curves = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Write the header. Since those are curves, input and output channels are same
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+
+    if (!WritePositionTable(self, io, 0,
+                                mpe ->InputChannels, BaseOffset, Curves, WriteMPECurve)) return FALSE;
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+// The matrix is organized as an array of PxQ+Q elements, where P is the number of input channels to the
+// matrix, and Q is the number of output channels. The matrix elements are each float32Numbers. The array
+// is organized as follows:
+// array = [e11, e12, ..., e1P, e21, e22, ..., e2P, ..., eQ1, eQ2, ..., eQP, e1, e2, ..., eQ]
+
+static
+void *Type_MPEmatrix_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe;
+    cmsUInt16Number   InputChans, OutputChans;
+    cmsUInt32Number   nElems, i;
+    cmsFloat64Number* Matrix;
+    cmsFloat64Number* Offsets;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+
+    // Input and output chans may be ANY (up to 0xffff), 
+    // but we choose to limit to 16 channels for now
+    if (InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    nElems = (cmsUInt32Number) InputChans * OutputChans;
+
+    Matrix = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, nElems, sizeof(cmsFloat64Number));
+    if (Matrix == NULL) return NULL;
+
+    Offsets = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, OutputChans, sizeof(cmsFloat64Number));
+    if (Offsets == NULL) {
+
+        _cmsFree(self ->ContextID, Matrix);
+        return NULL;
+    }
+
+    for (i=0; i < nElems; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Matrix[i] = v;
+    }
+
+
+    for (i=0; i < OutputChans; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Offsets[i] = v;
+    }
+
+
+    mpe = cmsStageAllocMatrix(self ->ContextID, OutputChans, InputChans, Matrix, Offsets);
+    _cmsFree(self ->ContextID, Matrix);
+    _cmsFree(self ->ContextID, Offsets);
+
+    *nItems = 1;
+
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_MPEmatrix_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, nElems;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageMatrixData* Matrix = (_cmsStageMatrixData*) mpe ->Data;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    nElems = mpe ->InputChannels * mpe ->OutputChannels;
+
+    for (i=0; i < nElems; i++) {
+        if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Double[i])) return FALSE;
+    }
+
+
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        if (Matrix ->Offset == NULL) {
+
+               if (!_cmsWriteFloat32Number(io, 0)) return FALSE;
+        }
+        else {
+               if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Offset[i])) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+static
+void *Type_MPEclut_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt8Number Dimensions8[16];
+    cmsUInt32Number i, nMaxGrids, GridPoints[MAX_INPUT_DIMENSIONS];
+    _cmsStageCLutData* clut;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0) goto Error;
+    if (OutputChans == 0) goto Error;
+
+    if (io ->Read(io, Dimensions8, sizeof(cmsUInt8Number), 16) != 16)
+        goto Error;
+
+    // Copy MAX_INPUT_DIMENSIONS at most. Expand to cmsUInt32Number
+    nMaxGrids = InputChans > MAX_INPUT_DIMENSIONS ? (cmsUInt32Number) MAX_INPUT_DIMENSIONS : InputChans;
+
+    for (i = 0; i < nMaxGrids; i++) {
+        if (Dimensions8[i] == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = (cmsUInt32Number)Dimensions8[i];
+    }
+    
+    // Allocate the true CLUT
+    mpe = cmsStageAllocCLutFloatGranular(self ->ContextID, GridPoints, InputChans, OutputChans, NULL);
+    if (mpe == NULL) goto Error;
+
+    // Read and sanitize the data
+    clut = (_cmsStageCLutData*) mpe ->Data;
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsReadFloat32Number(io, &clut->Tab.TFloat[i])) goto Error;       
+    }
+
+    *nItems = 1;
+    return mpe;
+
+Error:
+    *nItems = 0;
+    if (mpe != NULL) cmsStageFree(mpe);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a CLUT in floating point
+static
+cmsBool  Type_MPEclut_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number Dimensions8[16];  // 16 because the spec says 16 and not max number of channels
+    cmsUInt32Number i;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe ->Data;
+
+    // Check for maximum number of channels supported by lcms
+    if (mpe -> InputChannels > MAX_INPUT_DIMENSIONS) return FALSE;
+
+    // Only floats are supported in MPE
+    if (clut ->HasFloatValues == FALSE) return FALSE;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    memset(Dimensions8, 0, sizeof(Dimensions8));
+
+    for (i=0; i < mpe ->InputChannels; i++)
+        Dimensions8[i] = (cmsUInt8Number) clut ->Params ->nSamples[i];
+
+    if (!io ->Write(io, 16, Dimensions8)) return FALSE;
+
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsWriteFloat32Number(io, clut ->Tab.TFloat[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// This is the list of built-in MPE types
+static _cmsTagTypeLinkedList SupportedMPEtypes[] = {
+
+{{ (cmsTagTypeSignature) cmsSigBAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[1] },   // Ignore those elements for now
+{{ (cmsTagTypeSignature) cmsSigEAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[2] },   // (That's what the spec says)
+
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCurveSetElemType,     MPEcurve),      &SupportedMPEtypes[3] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigMatrixElemType,       MPEmatrix),     &SupportedMPEtypes[4] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCLutElemType,         MPEclut),        NULL },
+};
+
+_cmsTagTypePluginChunkType _cmsMPETypePluginChunk = { NULL };
+
+static
+cmsBool ReadMPEElem(struct _cms_typehandler_struct* self,
+                    cmsIOHANDLER* io,
+                    void* Cargo,
+                    cmsUInt32Number n,
+                    cmsUInt32Number SizeOfTag)
+{
+    cmsStageSignature ElementSig;
+    cmsTagTypeHandler* TypeHandler;
+    cmsUInt32Number nItems;
+    cmsPipeline *NewLUT = (cmsPipeline *) Cargo;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+
+    // Take signature and channels for each element.
+    if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return FALSE;
+
+    // The reserved placeholder
+    if (!_cmsReadUInt32Number(io, NULL)) return FALSE;
+
+    // Read diverse MPE types
+    TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk ->TagTypes, SupportedMPEtypes);
+    if (TypeHandler == NULL)  {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+        // An unknown element was found.
+        cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown MPE type '%s' found.", String);
+        return FALSE;
+    }
+
+    // If no read method, just ignore the element (valid for cmsSigBAcsElemType and cmsSigEAcsElemType)
+    // Read the MPE. No size is given
+    if (TypeHandler ->ReadPtr != NULL) {
+
+        // This is a real element which should be read and processed
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, (cmsStage*) TypeHandler ->ReadPtr(self, io, &nItems, SizeOfTag)))
+            return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+// This is the main dispatcher for MPE
+static
+void *Type_MPE_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number ElementCount;
+    cmsPipeline *NewLUT = NULL;
+    cmsUInt32Number BaseOffset;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Read channels and element count
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0 || InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans == 0 || OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChans, OutputChans);
+    if (NewLUT == NULL) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &ElementCount)) goto Error;    
+    if (!ReadPositionTable(self, io, ElementCount, BaseOffset, NewLUT, ReadMPEElem)) goto Error;
+
+    // Check channel count
+    if (InputChans != NewLUT->InputChannels ||
+        OutputChans != NewLUT->OutputChannels) goto Error;
+
+    // Success
+    *nItems = 1;
+    return NewLUT;
+
+    // Error
+Error:    
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    *nItems = 0;
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// This one is a liitle bit more complex, so we don't use position tables this time.
+static
+cmsBool Type_MPE_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, BaseOffset, DirectoryPos, CurrentPos;
+    cmsUInt32Number inputChan, outputChan;
+    cmsUInt32Number ElemCount;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL, Before;
+    cmsStageSignature ElementSig;
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsStage* Elem = Lut ->Elements;
+    cmsTagTypeHandler* TypeHandler;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+    ElemCount  = cmsPipelineStageCount(Lut);
+
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Write the head
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) inputChan)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) outputChan)) goto Error;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt16Number) ElemCount)) goto Error;
+
+    DirectoryPos = io ->Tell(io);
+
+    // Write a fake directory to be filled latter on
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each single tag. Keep track of the size as well.
+    for (i=0; i < ElemCount; i++) {
+
+        ElementOffsets[i] = io ->Tell(io) - BaseOffset;
+
+        ElementSig = Elem ->Type;
+
+        TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk->TagTypes, SupportedMPEtypes);
+        if (TypeHandler == NULL)  {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+                 // An unknown element was found.
+                 cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Found unknown MPE type '%s'", String);
+                 goto Error;
+        }
+
+        if (!_cmsWriteUInt32Number(io, ElementSig)) goto Error;
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+        Before = io ->Tell(io);
+        if (!TypeHandler ->WritePtr(self, io, Elem, 1)) goto Error;
+        if (!_cmsWriteAlignment(io)) goto Error;
+
+        ElementSizes[i] = io ->Tell(io) - Before;
+
+        Elem = Elem ->Next;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_MPE_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MPE_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigVcgtType
+// ********************************************************************************
+
+
+#define cmsVideoCardGammaTableType    0
+#define cmsVideoCardGammaFormulaType  1
+
+// Used internally
+typedef struct {
+    double Gamma;
+    double Min;
+    double Max;
+} _cmsVCGTGAMMA;
+
+
+static
+void *Type_vcgt_Read(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     cmsUInt32Number* nItems,
+                     cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number TagType, n, i;
+    cmsToneCurve** Curves;
+
+    *nItems = 0;
+
+    // Read tag type
+    if (!_cmsReadUInt32Number(io, &TagType)) return NULL;
+
+    // Allocate space for the array
+    Curves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (Curves == NULL) return NULL;
+
+    // There are two possible flavors
+    switch (TagType) {
+
+    // Gamma is stored as a table
+    case cmsVideoCardGammaTableType:
+    {
+       cmsUInt16Number nChannels, nElems, nBytes;
+
+       // Check channel count, which should be 3 (we don't support monochrome this time)
+       if (!_cmsReadUInt16Number(io, &nChannels)) goto Error;
+
+       if (nChannels != 3) {
+           cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported number of channels for VCGT '%d'", nChannels);
+           goto Error;
+       }
+
+       // Get Table element count and bytes per element
+       if (!_cmsReadUInt16Number(io, &nElems)) goto Error;
+       if (!_cmsReadUInt16Number(io, &nBytes)) goto Error;
+
+       // Adobe's quirk fixup. Fixing broken profiles...
+       if (nElems == 256 && nBytes == 1 && SizeOfTag == 1576)
+           nBytes = 2;
+
+
+       // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           Curves[n] = cmsBuildTabulatedToneCurve16(self ->ContextID, nElems, NULL);
+           if (Curves[n] == NULL) goto Error;
+
+           // On depending on byte depth
+           switch (nBytes) {
+
+           // One byte, 0..255
+           case 1:
+               for (i=0; i < nElems; i++) {
+
+                   cmsUInt8Number v;
+
+                      if (!_cmsReadUInt8Number(io, &v)) goto Error;
+                      Curves[n] ->Table16[i] = FROM_8_TO_16(v);
+               }
+               break;
+
+           // One word 0..65535
+           case 2:
+              if (!_cmsReadUInt16Array(io, nElems, Curves[n]->Table16)) goto Error;
+              break;
+
+          // Unsupported
+           default:
+              cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported bit depth for VCGT '%d'", nBytes * 8);
+              goto Error;
+           }
+       } // For all 3 channels
+    }
+    break;
+
+   // In this case, gamma is stored as a formula
+   case cmsVideoCardGammaFormulaType:
+   {
+       _cmsVCGTGAMMA Colorant[3];
+
+        // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           double Params[10];
+
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Gamma)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Min)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Max)) goto Error;
+
+            // Parametric curve type 5 is:
+            // Y = (aX + b)^Gamma + e | X >= d
+            // Y = cX + f             | X < d
+
+            // vcgt formula is:
+            // Y = (Max - Min) * (X ^ Gamma) + Min
+
+            // So, the translation is
+            // a = (Max - Min) ^ ( 1 / Gamma)
+            // e = Min
+            // b=c=d=f=0
+
+           Params[0] = Colorant[n].Gamma;
+           Params[1] = pow((Colorant[n].Max - Colorant[n].Min), (1.0 / Colorant[n].Gamma));
+           Params[2] = 0;
+           Params[3] = 0;
+           Params[4] = 0;
+           Params[5] = Colorant[n].Min;
+           Params[6] = 0;
+
+           Curves[n] = cmsBuildParametricToneCurve(self ->ContextID, 5, Params);
+           if (Curves[n] == NULL) goto Error;
+       }
+   }
+   break;
+
+   // Unsupported
+   default:
+      cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag type for VCGT '%d'", TagType);
+      goto Error;
+   }
+
+   *nItems = 1;
+   return (void*) Curves;
+
+// Regret,  free all resources
+Error:
+
+    cmsFreeToneCurveTriple(Curves);
+    _cmsFree(self ->ContextID, Curves);
+    return NULL;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// We don't support all flavors, only 16bits tables and formula
+static
+cmsBool Type_vcgt_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve** Curves =  (cmsToneCurve**) Ptr;
+    cmsUInt32Number i, j;
+
+    if (cmsGetToneCurveParametricType(Curves[0]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[1]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[2]) == 5) {
+
+            if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaFormulaType)) return FALSE;
+
+            // Save parameters
+            for (i=0; i < 3; i++) {
+
+                _cmsVCGTGAMMA v;
+
+                v.Gamma = Curves[i] ->Segments[0].Params[0];
+                v.Min   = Curves[i] ->Segments[0].Params[5];
+                v.Max   = pow(Curves[i] ->Segments[0].Params[1], v.Gamma) + v.Min;
+
+                if (!_cmsWrite15Fixed16Number(io, v.Gamma)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Min)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Max)) return FALSE;
+            }
+    }
+
+    else {
+
+        // Always store as a table of 256 words
+        if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaTableType)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 3)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 256)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+        for (i=0; i < 3; i++) {
+            for (j=0; j < 256; j++) {
+
+                cmsFloat32Number v = cmsEvalToneCurveFloat(Curves[i], (cmsFloat32Number) (j / 255.0));
+                cmsUInt16Number  n = _cmsQuickSaturateWord(v * 65535.0);
+
+                if (!_cmsWriteUInt16Number(io, n)) return FALSE;
+            }
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_vcgt_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsToneCurve** OldCurves =  (cmsToneCurve**) Ptr;
+    cmsToneCurve** NewCurves;
+
+    NewCurves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (NewCurves == NULL) return NULL;
+
+    NewCurves[0] = cmsDupToneCurve(OldCurves[0]);
+    NewCurves[1] = cmsDupToneCurve(OldCurves[1]);
+    NewCurves[2] = cmsDupToneCurve(OldCurves[2]);
+
+    return (void*) NewCurves;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_vcgt_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeToneCurveTriple((cmsToneCurve**) Ptr);
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDictType
+// ********************************************************************************
+
+// Single column of the table can point to wchar or MLUC elements. Holds arrays of data
+typedef struct {
+    cmsContext ContextID;
+    cmsUInt32Number *Offsets;
+    cmsUInt32Number *Sizes;
+} _cmsDICelem;
+
+typedef struct {
+    _cmsDICelem Name, Value, DisplayName, DisplayValue;
+
+} _cmsDICarray;
+
+// Allocate an empty array element
+static
+cmsBool AllocElem(cmsContext ContextID, _cmsDICelem* e,  cmsUInt32Number Count)
+{
+    e->Offsets = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Offsets == NULL) return FALSE;
+
+    e->Sizes = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Sizes == NULL) {
+
+        _cmsFree(ContextID, e -> Offsets);
+        return FALSE;
+    }
+
+    e ->ContextID = ContextID;
+    return TRUE;
+}
+
+// Free an array element
+static
+void FreeElem(_cmsDICelem* e)
+{
+    if (e ->Offsets != NULL)  _cmsFree(e -> ContextID, e -> Offsets);
+    if (e ->Sizes   != NULL)  _cmsFree(e -> ContextID, e -> Sizes);
+    e->Offsets = e ->Sizes = NULL;
+}
+
+// Get rid of whole array
+static
+void FreeArray( _cmsDICarray* a)
+{
+    if (a ->Name.Offsets != NULL) FreeElem(&a->Name);
+    if (a ->Value.Offsets != NULL) FreeElem(&a ->Value);
+    if (a ->DisplayName.Offsets != NULL) FreeElem(&a->DisplayName);
+    if (a ->DisplayValue.Offsets != NULL) FreeElem(&a ->DisplayValue);
+}
+
+
+// Allocate whole array
+static
+cmsBool AllocArray(cmsContext ContextID, _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    // Empty values
+    memset(a, 0, sizeof(_cmsDICarray));
+
+    // On depending on record size, create column arrays
+    if (!AllocElem(ContextID, &a ->Name, Count)) goto Error;
+    if (!AllocElem(ContextID, &a ->Value, Count)) goto Error;
+
+    if (Length > 16) {
+        if (!AllocElem(ContextID, &a -> DisplayName, Count)) goto Error;
+
+    }
+    if (Length > 24) {
+        if (!AllocElem(ContextID, &a ->DisplayValue, Count)) goto Error;
+    }
+    return TRUE;
+
+Error:
+    FreeArray(a);
+    return FALSE;
+}
+
+// Read one element
+static
+cmsBool ReadOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsUInt32Number BaseOffset)
+{
+    if (!_cmsReadUInt32Number(io, &e->Offsets[i])) return FALSE;
+    if (!_cmsReadUInt32Number(io, &e ->Sizes[i])) return FALSE;
+
+    // An offset of zero has special meaning and shal be preserved
+    if (e ->Offsets[i] > 0)
+        e ->Offsets[i] += BaseOffset;
+    return TRUE;
+}
+
+
+static
+cmsBool ReadOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number i;
+
+    // Read column arrays
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneElem(io, &a -> Name, i, BaseOffset)) return FALSE;
+        if (!ReadOneElem(io, &a -> Value, i, BaseOffset)) return FALSE;
+
+        if (Length > 16) {
+
+            if (!ReadOneElem(io, &a ->DisplayName, i, BaseOffset)) return FALSE;
+
+        }
+
+        if (Length > 24) {
+
+            if (!ReadOneElem(io, & a -> DisplayValue, i, BaseOffset)) return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+
+// Write one element
+static
+cmsBool WriteOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i)
+{
+    if (!_cmsWriteUInt32Number(io, e->Offsets[i])) return FALSE;
+    if (!_cmsWriteUInt32Number(io, e ->Sizes[i])) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool WriteOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneElem(io, &a -> Name, i)) return FALSE;
+        if (!WriteOneElem(io, &a -> Value, i))  return FALSE;
+
+        if (Length > 16) {
+
+            if (!WriteOneElem(io, &a -> DisplayName, i))  return FALSE;
+        }
+
+        if (Length > 24) {
+
+            if (!WriteOneElem(io, &a -> DisplayValue, i))  return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+static
+cmsBool ReadOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, wchar_t ** wcstr)
+{
+
+    cmsUInt32Number nChars;
+
+      // Special case for undefined strings (see ICC Votable
+      // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+      if (e -> Offsets[i] == 0) {
+
+          *wcstr = NULL;
+          return TRUE;
+      }
+
+      if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+      nChars = e ->Sizes[i] / sizeof(cmsUInt16Number);
+
+
+      *wcstr = (wchar_t*) _cmsMallocZero(e ->ContextID, (nChars + 1) * sizeof(wchar_t));
+      if (*wcstr == NULL) return FALSE;
+
+      if (!_cmsReadWCharArray(io, nChars, *wcstr)) {
+          _cmsFree(e ->ContextID, *wcstr);
+          return FALSE;
+      }
+
+      // End of string marker
+      (*wcstr)[nChars] = 0;
+      return TRUE;
+}
+
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+static
+cmsBool WriteOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const wchar_t * wcstr, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before = io ->Tell(io);
+    cmsUInt32Number n;
+
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (wcstr == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    n = mywcslen(wcstr);
+    if (!_cmsWriteWCharArray(io,  n, wcstr)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+static
+cmsBool ReadOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsMLU** mlu)
+{
+    cmsUInt32Number nItems = 0;
+
+    // A way to get null MLUCs
+    if (e -> Offsets[i] == 0 || e ->Sizes[i] == 0) {
+
+        *mlu = NULL;
+        return TRUE;
+    }
+
+    if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+    *mlu = (cmsMLU*) Type_MLU_Read(self, io, &nItems, e ->Sizes[i]);
+    return *mlu != NULL;
+}
+
+static
+cmsBool WriteOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const cmsMLU* mlu, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before;
+
+     // Special case for undefined strings (see ICC Votable
+     // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+     if (mlu == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    Before = io ->Tell(io);
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (!Type_MLU_Write(self, io, (void*) mlu, 1)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+
+static
+void *Type_Dictionary_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+   cmsHANDLE hDict;
+   cmsUInt32Number i, Count, Length;
+   cmsUInt32Number BaseOffset;
+   _cmsDICarray a;
+   wchar_t *NameWCS = NULL, *ValueWCS = NULL;
+   cmsMLU *DisplayNameMLU = NULL, *DisplayValueMLU=NULL;
+   cmsBool rc;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get name-value record count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Get rec length
+    if (!_cmsReadUInt32Number(io, &Length)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for valid lengths
+    if (Length != 16 && Length != 24 && Length != 32) {
+         cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown record length in dictionary '%d'", Length);
+         return NULL;
+    }
+
+    // Creates an empty dictionary
+    hDict = cmsDictAlloc(self -> ContextID);
+    if (hDict == NULL) return NULL;
+
+    // On depending on record size, create column arrays
+    if (!AllocArray(self -> ContextID, &a, Count, Length)) goto Error;
+
+    // Read column arrays
+    if (!ReadOffsetArray(io, &a, Count, Length, BaseOffset)) goto Error;
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneWChar(io, &a.Name, i, &NameWCS)) goto Error;
+        if (!ReadOneWChar(io, &a.Value, i, &ValueWCS)) goto Error;
+
+        if (Length > 16) {
+            if (!ReadOneMLUC(self, io, &a.DisplayName, i, &DisplayNameMLU)) goto Error;
+        }
+
+        if (Length > 24) {
+            if (!ReadOneMLUC(self, io, &a.DisplayValue, i, &DisplayValueMLU)) goto Error;
+        }
+
+        if (NameWCS == NULL || ValueWCS == NULL) {
+        
+            cmsSignalError(self->ContextID, cmsERROR_CORRUPTION_DETECTED, "Bad dictionary Name/Value");        
+            rc = FALSE;
+        }
+        else {
+
+            rc = cmsDictAddEntry(hDict, NameWCS, ValueWCS, DisplayNameMLU, DisplayValueMLU);
+        }
+
+        if (NameWCS != NULL) _cmsFree(self ->ContextID, NameWCS);
+        if (ValueWCS != NULL) _cmsFree(self ->ContextID, ValueWCS);
+        if (DisplayNameMLU != NULL) cmsMLUfree(DisplayNameMLU);
+        if (DisplayValueMLU != NULL) cmsMLUfree(DisplayValueMLU);
+
+        if (!rc) goto Error;
+    }
+
+   FreeArray(&a);
+   *nItems = 1;
+   return (void*) hDict;
+
+Error:
+   FreeArray(&a);
+   cmsDictFree(hDict);
+   return NULL;
+}
+
+
+static
+cmsBool Type_Dictionary_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsHANDLE hDict = (cmsHANDLE) Ptr;
+    const cmsDICTentry* p;
+    cmsBool AnyName, AnyValue;
+    cmsUInt32Number i, Count, Length;
+    cmsUInt32Number DirectoryPos, CurrentPos, BaseOffset;
+   _cmsDICarray a;
+
+    if (hDict == NULL) return FALSE;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Let's inspect the dictionary
+    Count = 0; AnyName = FALSE; AnyValue = FALSE;
+    for (p = cmsDictGetEntryList(hDict); p != NULL; p = cmsDictNextEntry(p)) {
+
+        if (p ->DisplayName != NULL) AnyName = TRUE;
+        if (p ->DisplayValue != NULL) AnyValue = TRUE;
+        Count++;
+    }
+
+    Length = 16;
+    if (AnyName)  Length += 8;
+    if (AnyValue) Length += 8;
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, Length)) return FALSE;
+
+    // Keep starting position of offsets table
+    DirectoryPos = io ->Tell(io);
+
+    // Allocate offsets array
+    if (!AllocArray(self ->ContextID, &a, Count, Length)) goto Error;
+
+    // Write a fake directory to be filled latter on
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    // Write each element. Keep track of the size as well.
+    p = cmsDictGetEntryList(hDict);
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneWChar(io, &a.Name, i,  p ->Name, BaseOffset)) goto Error;
+        if (!WriteOneWChar(io, &a.Value, i, p ->Value, BaseOffset)) goto Error;
+
+        if (p ->DisplayName != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayName, i, p ->DisplayName, BaseOffset)) goto Error;
+        }
+
+        if (p ->DisplayValue != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayValue, i, p ->DisplayValue, BaseOffset)) goto Error;
+        }
+
+       p = cmsDictNextEntry(p);
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    FreeArray(&a);
+    return TRUE;
+
+Error:
+    FreeArray(&a);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Dictionary_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*)  cmsDictDup((cmsHANDLE) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Dictionary_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsDictFree((cmsHANDLE) Ptr);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type support main routines
+// ********************************************************************************
+
+
+// This is the list of built-in types
+static const _cmsTagTypeLinkedList SupportedTagTypes[] = {
+
+{TYPE_HANDLER(cmsSigChromaticityType,          Chromaticity),       (_cmsTagTypeLinkedList*) &SupportedTagTypes[1] },
+{TYPE_HANDLER(cmsSigColorantOrderType,         ColorantOrderType),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[2] },
+{TYPE_HANDLER(cmsSigS15Fixed16ArrayType,       S15Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[3] },
+{TYPE_HANDLER(cmsSigU16Fixed16ArrayType,       U16Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[4] },
+{TYPE_HANDLER(cmsSigTextType,                  Text),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[5] },
+{TYPE_HANDLER(cmsSigTextDescriptionType,       Text_Description),   (_cmsTagTypeLinkedList*) &SupportedTagTypes[6] },
+{TYPE_HANDLER(cmsSigCurveType,                 Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[7] },
+{TYPE_HANDLER(cmsSigParametricCurveType,       ParametricCurve),    (_cmsTagTypeLinkedList*) &SupportedTagTypes[8] },
+{TYPE_HANDLER(cmsSigDateTimeType,              DateTime),           (_cmsTagTypeLinkedList*) &SupportedTagTypes[9] },
+{TYPE_HANDLER(cmsSigLut8Type,                  LUT8),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[10] },
+{TYPE_HANDLER(cmsSigLut16Type,                 LUT16),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[11] },
+{TYPE_HANDLER(cmsSigColorantTableType,         ColorantTable),      (_cmsTagTypeLinkedList*) &SupportedTagTypes[12] },
+{TYPE_HANDLER(cmsSigNamedColor2Type,           NamedColor),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[13] },
+{TYPE_HANDLER(cmsSigMultiLocalizedUnicodeType, MLU),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[14] },
+{TYPE_HANDLER(cmsSigProfileSequenceDescType,   ProfileSequenceDesc),(_cmsTagTypeLinkedList*) &SupportedTagTypes[15] },
+{TYPE_HANDLER(cmsSigSignatureType,             Signature),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[16] },
+{TYPE_HANDLER(cmsSigMeasurementType,           Measurement),        (_cmsTagTypeLinkedList*) &SupportedTagTypes[17] },
+{TYPE_HANDLER(cmsSigDataType,                  Data),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[18] },
+{TYPE_HANDLER(cmsSigLutAtoBType,               LUTA2B),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[19] },
+{TYPE_HANDLER(cmsSigLutBtoAType,               LUTB2A),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[20] },
+{TYPE_HANDLER(cmsSigUcrBgType,                 UcrBg),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[21] },
+{TYPE_HANDLER(cmsSigCrdInfoType,               CrdInfo),            (_cmsTagTypeLinkedList*) &SupportedTagTypes[22] },
+{TYPE_HANDLER(cmsSigMultiProcessElementType,   MPE),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[23] },
+{TYPE_HANDLER(cmsSigScreeningType,             Screening),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[24] },
+{TYPE_HANDLER(cmsSigViewingConditionsType,     ViewingConditions),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[25] },
+{TYPE_HANDLER(cmsSigXYZType,                   XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[26] },
+{TYPE_HANDLER(cmsCorbisBrokenXYZtype,          XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[27] },
+{TYPE_HANDLER(cmsMonacoBrokenCurveType,        Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[28] },
+{TYPE_HANDLER(cmsSigProfileSequenceIdType,     ProfileSequenceId),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[29] },
+{TYPE_HANDLER(cmsSigDictType,                  Dictionary),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[30] },
+{TYPE_HANDLER(cmsSigVcgtType,                  vcgt),                NULL }
+};
+
+
+_cmsTagTypePluginChunkType _cmsTagTypePluginChunk = { NULL };
+
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagTypeList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src, 
+                    int loc)
+{
+   _cmsTagTypePluginChunkType newHead = { NULL };
+   _cmsTagTypeLinkedList*  entry;
+   _cmsTagTypeLinkedList*  Anterior = NULL;
+   _cmsTagTypePluginChunkType* head = (_cmsTagTypePluginChunkType*) src->chunks[loc];
+
+   // Walk the list copying all nodes
+   for (entry = head->TagTypes;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagTypeLinkedList *newEntry = ( _cmsTagTypeLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagTypeLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.TagTypes == NULL)
+               newHead.TagTypes = newEntry;
+   }
+
+   ctx ->chunks[loc] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagTypePluginChunkType));
+}
+
+
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, TagTypePlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[TagTypePlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+}
+
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, MPEPlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[MPEPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+
+}
+
+
+// Both kind of plug-ins share same structure
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data, TagTypePlugin);
+}
+
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data,MPEPlugin);
+}
+
+
+// Wrapper for tag types
+cmsTagTypeHandler* _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig)
+{
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(ContextID, TagTypePlugin);
+
+    return GetHandler(sig, ctx->TagTypes, (_cmsTagTypeLinkedList*) SupportedTagTypes);
+}
+
+// ********************************************************************************
+// Tag support main routines
+// ********************************************************************************
+
+typedef struct _cmsTagLinkedList_st {
+
+            cmsTagSignature Signature;
+            cmsTagDescriptor Descriptor;
+            struct _cmsTagLinkedList_st* Next;
+
+} _cmsTagLinkedList;
+
+// This is the list of built-in tags. The data of this list can be modified by plug-ins
+static _cmsTagLinkedList SupportedTags[] = {
+
+    { cmsSigAToB0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[1]},
+    { cmsSigAToB1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[2]},
+    { cmsSigAToB2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[3]},
+    { cmsSigBToA0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[4]},
+    { cmsSigBToA1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[5]},
+    { cmsSigBToA2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[6]},
+
+    // Allow corbis  and its broken XYZ type
+    { cmsSigRedColorantTag,         { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[7]},
+    { cmsSigGreenColorantTag,       { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[8]},
+    { cmsSigBlueColorantTag,        { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[9]},
+
+    { cmsSigRedTRCTag,              { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[10]},
+    { cmsSigGreenTRCTag,            { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[11]},
+    { cmsSigBlueTRCTag,             { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[12]},
+
+    { cmsSigCalibrationDateTimeTag, { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[13]},
+    { cmsSigCharTargetTag,          { 1, 1, { cmsSigTextType },     NULL}, &SupportedTags[14]},
+
+    { cmsSigChromaticAdaptationTag, { 9, 1, { cmsSigS15Fixed16ArrayType }, NULL}, &SupportedTags[15]},
+    { cmsSigChromaticityTag,        { 1, 1, { cmsSigChromaticityType    }, NULL}, &SupportedTags[16]},
+    { cmsSigColorantOrderTag,       { 1, 1, { cmsSigColorantOrderType   }, NULL}, &SupportedTags[17]},
+    { cmsSigColorantTableTag,       { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[18]},
+    { cmsSigColorantTableOutTag,    { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[19]},
+
+    { cmsSigCopyrightTag,           { 1, 3, { cmsSigTextType,  cmsSigMultiLocalizedUnicodeType, cmsSigTextDescriptionType}, DecideTextType}, &SupportedTags[20]},
+    { cmsSigDateTimeTag,            { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[21]},
+
+    { cmsSigDeviceMfgDescTag,       { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[22]},
+    { cmsSigDeviceModelDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[23]},
+
+    { cmsSigGamutTag,               { 1, 3, { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[24]},
+
+    { cmsSigGrayTRCTag,             { 1, 2, { cmsSigCurveType, cmsSigParametricCurveType }, DecideCurveType}, &SupportedTags[25]},
+    { cmsSigLuminanceTag,           { 1, 1, { cmsSigXYZType }, NULL}, &SupportedTags[26]},
+
+    { cmsSigMediaBlackPointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[27]},
+    { cmsSigMediaWhitePointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[28]},
+
+    { cmsSigNamedColor2Tag,         { 1, 1, { cmsSigNamedColor2Type }, NULL}, &SupportedTags[29]},
+
+    { cmsSigPreview0Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[30]},
+    { cmsSigPreview1Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[31]},
+    { cmsSigPreview2Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[32]},
+
+    { cmsSigProfileDescriptionTag,  { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[33]},
+    { cmsSigProfileSequenceDescTag, { 1, 1, { cmsSigProfileSequenceDescType }, NULL},  &SupportedTags[34]},
+    { cmsSigTechnologyTag,          { 1, 1, { cmsSigSignatureType }, NULL},  &SupportedTags[35]},
+
+    { cmsSigColorimetricIntentImageStateTag,   { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[36]},
+    { cmsSigPerceptualRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[37]},
+    { cmsSigSaturationRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[38]},
+
+    { cmsSigMeasurementTag,         { 1, 1, { cmsSigMeasurementType }, NULL}, &SupportedTags[39]},
+
+    { cmsSigPs2CRD0Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[40]},
+    { cmsSigPs2CRD1Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[41]},
+    { cmsSigPs2CRD2Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[42]},
+    { cmsSigPs2CRD3Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[43]},
+    { cmsSigPs2CSATag,              { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[44]},
+    { cmsSigPs2RenderingIntentTag,  { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[45]},
+
+    { cmsSigViewingCondDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[46]},
+
+    { cmsSigUcrBgTag,               { 1, 1, { cmsSigUcrBgType}, NULL},    &SupportedTags[47]},
+    { cmsSigCrdInfoTag,             { 1, 1, { cmsSigCrdInfoType}, NULL},  &SupportedTags[48]},
+
+    { cmsSigDToB0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[49]},
+    { cmsSigDToB1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[50]},
+    { cmsSigDToB2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[51]},
+    { cmsSigDToB3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[52]},
+    { cmsSigBToD0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[53]},
+    { cmsSigBToD1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[54]},
+    { cmsSigBToD2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[55]},
+    { cmsSigBToD3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[56]},
+
+    { cmsSigScreeningDescTag,       { 1, 1, { cmsSigTextDescriptionType },    NULL}, &SupportedTags[57]},
+    { cmsSigViewingConditionsTag,   { 1, 1, { cmsSigViewingConditionsType },  NULL}, &SupportedTags[58]},
+
+    { cmsSigScreeningTag,           { 1, 1, { cmsSigScreeningType},          NULL }, &SupportedTags[59]},
+    { cmsSigVcgtTag,                { 1, 1, { cmsSigVcgtType},               NULL }, &SupportedTags[60]},
+    { cmsSigMetaTag,                { 1, 1, { cmsSigDictType},               NULL }, &SupportedTags[61]},
+    { cmsSigProfileSequenceIdTag,   { 1, 1, { cmsSigProfileSequenceIdType},  NULL }, &SupportedTags[62]},
+
+    { cmsSigProfileDescriptionMLTag,{ 1, 1, { cmsSigMultiLocalizedUnicodeType}, NULL}, &SupportedTags[63]},
+    { cmsSigArgyllArtsTag,          { 9, 1, { cmsSigS15Fixed16ArrayType},    NULL}, NULL}
+
+};
+
+/*
+    Not supported                 Why
+    =======================       =========================================
+    cmsSigOutputResponseTag   ==> WARNING, POSSIBLE PATENT ON THIS SUBJECT!
+    cmsSigNamedColorTag       ==> Deprecated
+    cmsSigDataTag             ==> Ancient, unused
+    cmsSigDeviceSettingsTag   ==> Deprecated, useless
+*/
+
+
+_cmsTagPluginChunkType _cmsTagPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src)
+{
+   _cmsTagPluginChunkType newHead = { NULL };
+   _cmsTagLinkedList*  entry;
+   _cmsTagLinkedList*  Anterior = NULL;
+   _cmsTagPluginChunkType* head = (_cmsTagPluginChunkType*) src->chunks[TagPlugin];
+
+   // Walk the list copying all nodes
+   for (entry = head->Tag;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagLinkedList *newEntry = ( _cmsTagLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.Tag == NULL)
+               newHead.Tag = newEntry;
+   }
+
+   ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagPluginChunkType));
+}
+
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        DupTagList(ctx, src);
+    }
+    else {
+        static _cmsTagPluginChunkType TagPluginChunk = { NULL };
+        ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagPluginChunk, sizeof(_cmsTagPluginChunkType));
+    }
+
+}
+
+cmsBool  _cmsRegisterTagPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    cmsPluginTag* Plugin = (cmsPluginTag*) Data;
+    _cmsTagLinkedList *pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(id, TagPlugin);
+
+    if (Data == NULL) {
+
+        TagPluginChunk->Tag = NULL;
+        return TRUE;
+    }
+
+    pt = (_cmsTagLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Signature  = Plugin ->Signature;
+    pt ->Descriptor = Plugin ->Descriptor;
+    pt ->Next       = TagPluginChunk ->Tag;
+
+    TagPluginChunk ->Tag = pt;
+    
+    return TRUE;
+}
+
+// Return a descriptor for a given tag or NULL
+cmsTagDescriptor* _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig)
+{
+    _cmsTagLinkedList* pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(ContextID, TagPlugin);
+
+    for (pt = TagPluginChunk->Tag;
+             pt != NULL;
+             pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    for (pt = SupportedTags;
+            pt != NULL;
+            pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    return NULL;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsvirt.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsvirt.c
new file mode 100644
index 0000000000..b431478eab
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsvirt.c
@@ -0,0 +1,1216 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Virtual (built-in) profiles
+// -----------------------------------------------------------------------------------
+
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile, const wchar_t* Description)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    DescriptionMLU  = cmsMLUalloc(ContextID, 1);
+    CopyrightMLU    = cmsMLUalloc(ContextID, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetWide(DescriptionMLU,  "en", "US", Description)) goto Error;
+    if (!cmsMLUsetWide(CopyrightMLU,    "en", "US", L"No copyright, use freely")) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+static
+cmsBool  SetSeqDescTag(cmsHPROFILE hProfile, const char* Model)
+{
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsSEQ* Seq = cmsAllocProfileSequenceDescription(ContextID, 1);
+
+    if (Seq == NULL) return FALSE;
+
+    Seq->seq[0].deviceMfg = (cmsSignature) 0;
+    Seq->seq[0].deviceModel = (cmsSignature) 0;
+
+#ifdef CMS_DONT_USE_INT64
+    Seq->seq[0].attributes[0] = 0;
+    Seq->seq[0].attributes[1] = 0;
+#else
+    Seq->seq[0].attributes = 0;
+#endif
+
+    Seq->seq[0].technology = (cmsTechnologySignature) 0;
+
+    cmsMLUsetASCII( Seq->seq[0].Manufacturer, cmsNoLanguage, cmsNoCountry, "Little CMS");
+    cmsMLUsetASCII( Seq->seq[0].Model,        cmsNoLanguage, cmsNoCountry, Model);
+
+    if (!_cmsWriteProfileSequence(hProfile, Seq)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Seq)
+        cmsFreeProfileSequenceDescription(Seq);
+
+    return rc;
+}
+
+
+
+// This function creates a profile based on White point, primaries and
+// transfer functions.
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                          const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    cmsHPROFILE hICC;
+    cmsMAT3 MColorants;
+    cmsCIEXYZTRIPLE Colorants;
+    cmsCIExyY MaxWhite;
+    cmsMAT3 CHAD;
+    cmsCIEXYZ WhitePointXYZ;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigRgbData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigRedColorantTag
+    //  4 cmsSigGreenColorantTag
+    //  5 cmsSigBlueColorantTag
+    //  6 cmsSigRedTRCTag
+    //  7 cmsSigGreenTRCTag
+    //  8 cmsSigBlueTRCTag
+    //  9 Chromatic adaptation Tag
+    // This conforms a standard RGB DisplayProfile as says ICC, and then I add (As per addendum II)
+    // 10 cmsSigChromaticityTag
+
+
+    if (!SetTextTags(hICC, L"RGB built-in")) goto Error;
+
+    if (WhitePoint) {
+
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+        cmsxyY2XYZ(&WhitePointXYZ, WhitePoint);
+        _cmsAdaptationMatrix(&CHAD, NULL, &WhitePointXYZ, cmsD50_XYZ());
+
+        // This is a V4 tag, but many CMM does read and understand it no matter which version
+        if (!cmsWriteTag(hICC, cmsSigChromaticAdaptationTag, (void*) &CHAD)) goto Error;
+    }
+
+    if (WhitePoint && Primaries) {
+
+        MaxWhite.x =  WhitePoint -> x;
+        MaxWhite.y =  WhitePoint -> y;
+        MaxWhite.Y =  1.0;
+
+        if (!_cmsBuildRGB2XYZtransferMatrix(&MColorants, &MaxWhite, Primaries)) goto Error;
+
+        Colorants.Red.X   = MColorants.v[0].n[0];
+        Colorants.Red.Y   = MColorants.v[1].n[0];
+        Colorants.Red.Z   = MColorants.v[2].n[0];
+
+        Colorants.Green.X = MColorants.v[0].n[1];
+        Colorants.Green.Y = MColorants.v[1].n[1];
+        Colorants.Green.Z = MColorants.v[2].n[1];
+
+        Colorants.Blue.X  = MColorants.v[0].n[2];
+        Colorants.Blue.Y  = MColorants.v[1].n[2];
+        Colorants.Blue.Z  = MColorants.v[2].n[2];
+
+        if (!cmsWriteTag(hICC, cmsSigRedColorantTag,   (void*) &Colorants.Red)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigBlueColorantTag,  (void*) &Colorants.Blue)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigGreenColorantTag, (void*) &Colorants.Green)) goto Error;
+    }
+
+
+    if (TransferFunction) {
+
+        // Tries to minimize space. Thanks to Richard Hughes for this nice idea         
+        if (!cmsWriteTag(hICC, cmsSigRedTRCTag,   (void*) TransferFunction[0])) goto Error;
+
+        if (TransferFunction[1] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigGreenTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigGreenTRCTag, (void*) TransferFunction[1])) goto Error;
+        }
+
+        if (TransferFunction[2] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigBlueTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigBlueTRCTag, (void*) TransferFunction[2])) goto Error;
+        }
+    }
+
+    if (Primaries) {
+        if (!cmsWriteTag(hICC, cmsSigChromaticityTag, (void*) Primaries)) goto Error;
+    }
+
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    return cmsCreateRGBProfileTHR(NULL, WhitePoint, Primaries, TransferFunction);
+}
+
+
+
+// This function creates a profile based on White point and transfer function.
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                           const cmsCIExyY* WhitePoint,
+                                           const cmsToneCurve* TransferFunction)
+{
+    cmsHPROFILE hICC;
+    cmsCIEXYZ tmp;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigGrayData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigGrayTRCTag
+
+    // This conforms a standard Gray DisplayProfile
+
+    // Fill-in the tags
+
+    if (!SetTextTags(hICC, L"gray built-in")) goto Error;
+
+
+    if (WhitePoint) {
+
+        cmsxyY2XYZ(&tmp, WhitePoint);
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) &tmp)) goto Error;
+    }
+
+    if (TransferFunction) {
+
+        if (!cmsWriteTag(hICC, cmsSigGrayTRCTag, (void*) TransferFunction)) goto Error;
+    }
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction)
+{
+    return cmsCreateGrayProfileTHR(NULL, WhitePoint, TransferFunction);
+}
+
+// This is a devicelink operating in the target colorspace with as many transfer functions as components
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                          cmsColorSpaceSignature ColorSpace,
+                                                          cmsToneCurve* const TransferFunctions[])
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    cmsUInt32Number nChannels;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Set up channels
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    // Creates a Pipeline with prelinearization step only
+    Pipeline = cmsPipelineAlloc(ContextID, nChannels, nChannels);
+    if (Pipeline == NULL) goto Error;
+
+
+    // Copy tables to Pipeline
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_BEGIN, cmsStageAllocToneCurves(ContextID, nChannels, TransferFunctions)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"Linearization built-in")) goto Error;
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline)) goto Error;
+    if (!SetSeqDescTag(hICC, "Linearization built-in")) goto Error;
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    if (hICC)
+        cmsCloseProfile(hICC);
+
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                 cmsToneCurve* const TransferFunctions[])
+{
+    return cmsCreateLinearizationDeviceLinkTHR(NULL, ColorSpace, TransferFunctions);
+}
+
+// Ink-limiting algorithm
+//
+//  Sum = C + M + Y + K
+//  If Sum > InkLimit
+//        Ratio= 1 - (Sum - InkLimit) / (C + M + Y)
+//        if Ratio <0
+//              Ratio=0
+//        endif
+//     Else
+//         Ratio=1
+//     endif
+//
+//     C = Ratio * C
+//     M = Ratio * M
+//     Y = Ratio * Y
+//     K: Does not change
+
+static
+int InkLimitingSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsFloat64Number InkLimit = *(cmsFloat64Number *) Cargo;
+    cmsFloat64Number SumCMY, SumCMYK, Ratio;
+
+    InkLimit = (InkLimit * 655.35);
+
+    SumCMY   = In[0]  + In[1] + In[2];
+    SumCMYK  = SumCMY + In[3];
+
+    if (SumCMYK > InkLimit) {
+
+        Ratio = 1 - ((SumCMYK - InkLimit) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else Ratio = 1;
+
+    Out[0] = _cmsQuickSaturateWord(In[0] * Ratio);     // C
+    Out[1] = _cmsQuickSaturateWord(In[1] * Ratio);     // M
+    Out[2] = _cmsQuickSaturateWord(In[2] * Ratio);     // Y
+
+    Out[3] = In[3];                                 // K (untouched)
+
+    return TRUE;
+}
+
+// This is a devicelink operating in CMYK for ink-limiting
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                     cmsColorSpaceSignature ColorSpace,
+                                                     cmsFloat64Number Limit)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* LUT;
+    cmsStage* CLUT;
+    cmsUInt32Number nChannels;
+
+    if (ColorSpace != cmsSigCmykData) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "InkLimiting: Only CMYK currently supported");
+        return NULL;
+    }
+
+    if (Limit < 0.0 || Limit > 400) {
+
+        cmsSignalError(ContextID, cmsERROR_RANGE, "InkLimiting: Limit should be between 0..400");
+        if (Limit < 0) Limit = 0;
+        if (Limit > 400) Limit = 400;
+
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Creates a Pipeline with 3D grid only
+    LUT = cmsPipelineAlloc(ContextID, 4, 4);
+    if (LUT == NULL) goto Error;
+
+
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, 17, nChannels, nChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    if (!cmsStageSampleCLut16bit(CLUT, InkLimitingSampler, (void*) &Limit, 0)) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, nChannels)) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, CLUT) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocIdentityCurves(ContextID, nChannels)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"ink-limiting built-in")) goto Error;
+
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) LUT))  goto Error;
+    if (!SetSeqDescTag(hICC, "ink-limiting built-in")) goto Error;
+
+    // cmsPipeline is already on virtual profile
+    cmsPipelineFree(LUT);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hICC != NULL)
+        cmsCloseProfile(hICC);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit)
+{
+    return cmsCreateInkLimitingDeviceLinkTHR(NULL, ColorSpace, Limit);
+}
+
+
+// Creates a fake Lab identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 2.1);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) return NULL;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCLut(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab2ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake Lab V4 identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) goto Error;
+
+    // An empty LUTs is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab4ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake XYZ identity
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, cmsD50_xyY(), NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigXYZData);
+    cmsSetPCS(hProfile,         cmsSigXYZData);
+
+    if (!SetTextTags(hProfile, L"XYZ identity built-in")) goto Error;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfile(void)
+{
+    return cmsCreateXYZProfileTHR(NULL);
+}
+
+
+//sRGB Curves are defined by:
+//
+//If  R'sRGB,G'sRGB, B'sRGB < 0.04045
+//
+//    R =  R'sRGB / 12.92
+//    G =  G'sRGB / 12.92
+//    B =  B'sRGB / 12.92
+//
+//
+//else if  R'sRGB,G'sRGB, B'sRGB >= 0.04045
+//
+//    R = ((R'sRGB + 0.055) / 1.055)^2.4
+//    G = ((G'sRGB + 0.055) / 1.055)^2.4
+//    B = ((B'sRGB + 0.055) / 1.055)^2.4
+
+static
+cmsToneCurve* Build_sRGBGamma(cmsContext ContextID)
+{
+    cmsFloat64Number Parameters[5];
+
+    Parameters[0] = 2.4;
+    Parameters[1] = 1. / 1.055;
+    Parameters[2] = 0.055 / 1.055;
+    Parameters[3] = 1. / 12.92;
+    Parameters[4] = 0.04045;
+
+    return cmsBuildParametricToneCurve(ContextID, 4, Parameters);
+}
+
+// Create the ICC virtual profile for sRGB space
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID)
+{
+       cmsCIExyY       D65 = { 0.3127, 0.3290, 1.0 };
+       cmsCIExyYTRIPLE Rec709Primaries = {
+                                   {0.6400, 0.3300, 1.0},
+                                   {0.3000, 0.6000, 1.0},
+                                   {0.1500, 0.0600, 1.0}
+                                   };
+       cmsToneCurve* Gamma22[3];
+       cmsHPROFILE  hsRGB;
+
+      // cmsWhitePointFromTemp(&D65, 6504);
+       Gamma22[0] = Gamma22[1] = Gamma22[2] = Build_sRGBGamma(ContextID);
+       if (Gamma22[0] == NULL) return NULL;
+
+       hsRGB = cmsCreateRGBProfileTHR(ContextID, &D65, &Rec709Primaries, Gamma22);
+       cmsFreeToneCurve(Gamma22[0]);
+       if (hsRGB == NULL) return NULL;
+
+       if (!SetTextTags(hsRGB, L"sRGB built-in")) {
+           cmsCloseProfile(hsRGB);
+           return NULL;
+       }
+
+       return hsRGB;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfile(void)
+{
+    return cmsCreate_sRGBProfileTHR(NULL);
+}
+
+
+
+typedef struct {
+                cmsFloat64Number Brightness;
+                cmsFloat64Number Contrast;
+                cmsFloat64Number Hue;
+                cmsFloat64Number Saturation;
+                cmsBool          lAdjustWP;
+                cmsCIEXYZ WPsrc, WPdest;
+
+} BCHSWADJUSTS, *LPBCHSWADJUSTS;
+
+
+static
+int bchswSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsCIELab LabIn, LabOut;
+    cmsCIELCh LChIn, LChOut;
+    cmsCIEXYZ XYZ;
+    LPBCHSWADJUSTS bchsw = (LPBCHSWADJUSTS) Cargo;
+
+
+    cmsLabEncoded2Float(&LabIn, In);
+
+
+    cmsLab2LCh(&LChIn, &LabIn);
+
+    // Do some adjusts on LCh
+
+    LChOut.L = LChIn.L * bchsw ->Contrast + bchsw ->Brightness;
+    LChOut.C = LChIn.C + bchsw -> Saturation;
+    LChOut.h = LChIn.h + bchsw -> Hue;
+
+
+    cmsLCh2Lab(&LabOut, &LChOut);
+
+    // Move white point in Lab
+    if (bchsw->lAdjustWP) {
+           cmsLab2XYZ(&bchsw->WPsrc, &XYZ, &LabOut);
+           cmsXYZ2Lab(&bchsw->WPdest, &LabOut, &XYZ);
+    }
+
+    // Back to encoded
+
+    cmsFloat2LabEncoded(Out, &LabOut);
+
+    return TRUE;
+}
+
+
+// Creates an abstract profile operating in Lab space for Brightness,
+// contrast, Saturation and white point displacement
+
+cmsHPROFILE CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                       cmsUInt32Number nLUTPoints,
+                                                       cmsFloat64Number Bright,
+                                                       cmsFloat64Number Contrast,
+                                                       cmsFloat64Number Hue,
+                                                       cmsFloat64Number Saturation,
+                                                       cmsUInt32Number TempSrc,
+                                                       cmsUInt32Number TempDest)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    BCHSWADJUSTS bchsw;
+    cmsCIExyY WhitePnt;
+    cmsStage* CLUT;
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsUInt32Number i;
+
+    bchsw.Brightness = Bright;
+    bchsw.Contrast   = Contrast;
+    bchsw.Hue        = Hue;
+    bchsw.Saturation = Saturation;
+    if (TempSrc == TempDest) {
+
+           bchsw.lAdjustWP = FALSE;
+    }
+    else {
+           bchsw.lAdjustWP = TRUE;
+           cmsWhitePointFromTemp(&WhitePnt, TempSrc);
+           cmsxyY2XYZ(&bchsw.WPsrc, &WhitePnt);
+           cmsWhitePointFromTemp(&WhitePnt, TempDest);
+           cmsxyY2XYZ(&bchsw.WPdest, &WhitePnt);
+     
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetDeviceClass(hICC,      cmsSigAbstractClass);
+    cmsSetColorSpace(hICC,       cmsSigLabData);
+    cmsSetPCS(hICC,              cmsSigLabData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Creates a Pipeline with 3D grid only
+    Pipeline = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Pipeline == NULL) {
+        cmsCloseProfile(hICC);
+        return NULL;
+    }
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++) Dimensions[i] = nLUTPoints;
+    CLUT = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, 3, 3, NULL);
+    if (CLUT == NULL) goto Error;
+
+
+    if (!cmsStageSampleCLut16bit(CLUT, bchswSampler, (void*) &bchsw, 0)) {
+
+        // Shouldn't reach here
+        goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Create tags
+    if (!SetTextTags(hICC, L"BCHS built-in")) return NULL;
+
+    cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) cmsD50_XYZ());
+
+    cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline);
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+CMSAPI cmsHPROFILE   CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest)
+{
+    return cmsCreateBCHSWabstractProfileTHR(NULL, nLUTPoints, Bright, Contrast, Hue, Saturation, TempSrc, TempDest);
+}
+
+
+// Creates a fake NULL profile. This profile return 1 channel as always 0.
+// Is useful only for gamut checking tricks
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+    cmsStage* PostLin;
+    cmsStage* OutLin;
+    cmsToneCurve* EmptyTab[3];
+    cmsUInt16Number Zero[2] = { 0, 0 };
+    const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    if (!SetTextTags(hProfile, L"NULL profile built-in")) goto Error;
+
+
+    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+    cmsSetColorSpace(hProfile,  cmsSigGrayData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    // Create a valid ICC 4 structure
+    LUT = cmsPipelineAlloc(ContextID, 3, 1);
+    if (LUT == NULL) goto Error;
+    
+    EmptyTab[0] = EmptyTab[1] = EmptyTab[2] = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+    PostLin = cmsStageAllocToneCurves(ContextID, 3, EmptyTab);
+    OutLin  = cmsStageAllocToneCurves(ContextID, 1, EmptyTab);
+    cmsFreeToneCurve(EmptyTab[0]);
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, PostLin))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, cmsStageAllocMatrix(ContextID, 1, 3, PickLstarMatrix, NULL)))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, OutLin))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigBToA0Tag, (void*) LUT)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfile(void)
+{
+    return cmsCreateNULLProfileTHR(NULL);
+}
+
+
+static
+int IsPCS(cmsColorSpaceSignature ColorSpace)
+{
+    return (ColorSpace == cmsSigXYZData ||
+            ColorSpace == cmsSigLabData);
+}
+
+
+static
+void FixColorSpaces(cmsHPROFILE hProfile,
+                              cmsColorSpaceSignature ColorSpace,
+                              cmsColorSpaceSignature PCS,
+                              cmsUInt32Number dwFlags)
+{
+    if (dwFlags & cmsFLAGS_GUESSDEVICECLASS) {
+
+            if (IsPCS(ColorSpace) && IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile,      cmsSigAbstractClass);
+                    cmsSetColorSpace(hProfile,       ColorSpace);
+                    cmsSetPCS(hProfile,              PCS);
+                    return;
+            }
+
+            if (IsPCS(ColorSpace) && !IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+                    cmsSetPCS(hProfile,         ColorSpace);
+                    cmsSetColorSpace(hProfile,  PCS);
+                    return;
+            }
+
+            if (IsPCS(PCS) && !IsPCS(ColorSpace)) {
+
+                   cmsSetDeviceClass(hProfile,  cmsSigInputClass);
+                   cmsSetColorSpace(hProfile,   ColorSpace);
+                   cmsSetPCS(hProfile,          PCS);
+                   return;
+            }
+    }
+
+    cmsSetDeviceClass(hProfile,      cmsSigLinkClass);
+    cmsSetColorSpace(hProfile,       ColorSpace);
+    cmsSetPCS(hProfile,              PCS);
+}
+
+
+
+// This function creates a named color profile dumping all the contents of transform to a single profile
+// In this way, LittleCMS may be used to "group" several named color databases into a single profile.
+// It has, however, several minor limitations. PCS is always Lab, which is not very critic since this
+// is the normal PCS for named color profiles.
+static
+cmsHPROFILE CreateNamedColorDevicelink(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsHPROFILE hICC = NULL;
+    cmsUInt32Number i, nColors;
+    cmsNAMEDCOLORLIST *nc2 = NULL, *Original = NULL;
+
+    // Create an empty placeholder
+    hICC = cmsCreateProfilePlaceholder(v->ContextID);
+    if (hICC == NULL) return NULL;
+
+    // Critical information
+    cmsSetDeviceClass(hICC, cmsSigNamedColorClass);
+    cmsSetColorSpace(hICC, v ->ExitColorSpace);
+    cmsSetPCS(hICC, cmsSigLabData);
+
+    // Tag profile with information
+    if (!SetTextTags(hICC, L"Named color devicelink")) goto Error;
+
+    Original = cmsGetNamedColorList(xform);
+    if (Original == NULL) goto Error;
+
+    nColors = cmsNamedColorCount(Original);
+    nc2     = cmsDupNamedColorList(Original);
+    if (nc2 == NULL) goto Error;
+
+    // Colorant count now depends on the output space
+    nc2 ->ColorantCount = cmsPipelineOutputChannels(v ->Lut);
+
+    // Make sure we have proper formatters
+    cmsChangeBuffersFormat(xform, TYPE_NAMED_COLOR_INDEX,
+        FLOAT_SH(0) | COLORSPACE_SH(_cmsLCMScolorSpace(v ->ExitColorSpace))
+        | BYTES_SH(2) | CHANNELS_SH(cmsChannelsOf(v ->ExitColorSpace)));
+
+    // Apply the transfor to colorants.
+    for (i=0; i < nColors; i++) {
+        cmsDoTransform(xform, &i, nc2 ->List[i].DeviceColorant, 1);
+    }
+
+    if (!cmsWriteTag(hICC, cmsSigNamedColor2Tag, (void*) nc2)) goto Error;
+    cmsFreeNamedColorList(nc2);
+
+    return hICC;
+
+Error:
+    if (hICC != NULL) cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+// This structure holds information about which MPU can be stored on a profile based on the version
+
+typedef struct {
+    cmsBool              IsV4;             // Is a V4 tag?
+    cmsTagSignature      RequiredTag;      // Set to 0 for both types
+    cmsTagTypeSignature  LutType;          // The LUT type
+    int                  nTypes;           // Number of types (up to 5)
+    cmsStageSignature    MpeTypes[5];      // 5 is the maximum number
+
+} cmsAllowedLUT;
+
+#define cmsSig0 ((cmsTagSignature) 0) 
+
+static const cmsAllowedLUT AllowedLUTTypes[] = {
+
+    { FALSE, cmsSig0,        cmsSigLut16Type, 4, { cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 3, { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 2, { cmsSigCurveSetElemType, cmsSigCLutElemType } },
+    { TRUE,  cmsSig0,        cmsSigLutAtoBType, 1, { cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType   } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  5,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  1,  { cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  5,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }}
+};
+
+#define SIZE_OF_ALLOWED_LUT (sizeof(AllowedLUTTypes)/sizeof(cmsAllowedLUT))
+
+// Check a single entry
+static
+cmsBool CheckOne(const cmsAllowedLUT* Tab, const cmsPipeline* Lut)
+{
+    cmsStage* mpe;
+    int n;
+
+    for (n=0, mpe = Lut ->Elements; mpe != NULL; mpe = mpe ->Next, n++) {
+
+        if (n > Tab ->nTypes) return FALSE;
+        if (cmsStageType(mpe) != Tab ->MpeTypes[n]) return FALSE;
+    }
+
+    return (n == Tab ->nTypes);
+}
+
+
+static
+const cmsAllowedLUT* FindCombination(const cmsPipeline* Lut, cmsBool IsV4, cmsTagSignature DestinationTag)
+{
+    cmsUInt32Number n;
+
+    for (n=0; n < SIZE_OF_ALLOWED_LUT; n++) {
+
+        const cmsAllowedLUT* Tab = AllowedLUTTypes + n;
+
+        if (IsV4 ^ Tab -> IsV4) continue;
+        if ((Tab ->RequiredTag != 0) && (Tab ->RequiredTag != DestinationTag)) continue;
+
+        if (CheckOne(Tab, Lut)) return Tab;
+    }
+
+    return NULL;
+}
+
+
+// Does convert a transform into a device link profile
+cmsHPROFILE CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hProfile = NULL;
+    cmsUInt32Number FrmIn, FrmOut, ChansIn, ChansOut;
+    int ColorSpaceBitsIn, ColorSpaceBitsOut;
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsPipeline* LUT = NULL;
+    cmsStage* mpe;
+    cmsContext ContextID = cmsGetTransformContextID(hTransform);
+    const cmsAllowedLUT* AllowedLUT;
+    cmsTagSignature DestinationTag;
+    cmsProfileClassSignature deviceClass; 
+
+    _cmsAssert(hTransform != NULL);
+
+    // Get the first mpe to check for named color
+    mpe = cmsPipelineGetPtrToFirstStage(xform ->Lut);
+
+    // Check if is a named color transform
+    if (mpe != NULL) {
+
+        if (cmsStageType(mpe) == cmsSigNamedColorElemType) {
+            return CreateNamedColorDevicelink(hTransform);
+        }
+    }
+
+    // First thing to do is to get a copy of the transformation
+    LUT = cmsPipelineDup(xform ->Lut);
+    if (LUT == NULL) return NULL;
+
+    // Time to fix the Lab2/Lab4 issue.
+    if ((xform ->EntryColorSpace == cmsSigLabData) && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocLabV2ToV4curves(ContextID)))
+            goto Error;
+    }
+
+    // On the output side too
+    if ((xform ->ExitColorSpace) == cmsSigLabData && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error;
+    }
+
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile) goto Error;                    // can't allocate
+
+    cmsSetProfileVersion(hProfile, Version);
+
+    FixColorSpaces(hProfile, xform -> EntryColorSpace, xform -> ExitColorSpace, dwFlags);
+
+    // Optimize the LUT and precalculate a devicelink
+
+    ChansIn  = cmsChannelsOf(xform -> EntryColorSpace);
+    ChansOut = cmsChannelsOf(xform -> ExitColorSpace);
+
+    ColorSpaceBitsIn  = _cmsLCMScolorSpace(xform -> EntryColorSpace);
+    ColorSpaceBitsOut = _cmsLCMScolorSpace(xform -> ExitColorSpace);
+
+    FrmIn  = COLORSPACE_SH(ColorSpaceBitsIn) | CHANNELS_SH(ChansIn)|BYTES_SH(2);
+    FrmOut = COLORSPACE_SH(ColorSpaceBitsOut) | CHANNELS_SH(ChansOut)|BYTES_SH(2);
+
+    deviceClass = cmsGetDeviceClass(hProfile);
+
+     if (deviceClass == cmsSigOutputClass)
+         DestinationTag = cmsSigBToA0Tag;
+     else
+         DestinationTag = cmsSigAToB0Tag;
+
+    // Check if the profile/version can store the result
+    if (dwFlags & cmsFLAGS_FORCE_CLUT)
+        AllowedLUT = NULL;
+    else
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    if (AllowedLUT == NULL) {
+
+        // Try to optimize
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    }
+
+    // If no way, then force CLUT that for sure can be written
+    if (AllowedLUT == NULL) {
+
+        cmsStage* FirstStage;
+        cmsStage* LastStage;
+
+        dwFlags |= cmsFLAGS_FORCE_CLUT;
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+
+        // Put identity curves if needed
+        FirstStage = cmsPipelineGetPtrToFirstStage(LUT);
+        if (FirstStage != NULL && FirstStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, ChansIn)))
+                 goto Error;
+
+        LastStage = cmsPipelineGetPtrToLastStage(LUT);
+        if (LastStage != NULL && LastStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_END,   _cmsStageAllocIdentityCurves(ContextID, ChansOut)))
+                 goto Error;
+
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+    }
+
+    // Somethings is wrong...
+    if (AllowedLUT == NULL) {
+        goto Error;
+    }
+
+
+    if (dwFlags & cmsFLAGS_8BITS_DEVICELINK)
+                     cmsPipelineSetSaveAs8bitsFlag(LUT, TRUE);
+
+    // Tag profile with information
+    if (!SetTextTags(hProfile, L"devicelink")) goto Error;
+
+    // Store result
+    if (!cmsWriteTag(hProfile, DestinationTag, LUT)) goto Error;
+
+
+    if (xform -> InputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableTag, xform->InputColorant)) goto Error;
+    }
+
+    if (xform -> OutputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableOutTag, xform->OutputColorant)) goto Error;
+    }
+
+    if ((deviceClass == cmsSigLinkClass) && (xform ->Sequence != NULL)) {
+        if (!_cmsWriteProfileSequence(hProfile, xform ->Sequence)) goto Error;
+    }
+
+    // Set the white point
+    if (deviceClass == cmsSigInputClass) {
+        if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->EntryWhitePoint)) goto Error;
+    }
+    else {
+         if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->ExitWhitePoint)) goto Error;
+    }
+
+  
+    // Per 7.2.15 in spec 4.3
+    cmsSetHeaderRenderingIntent(hProfile, xform ->RenderingIntent);
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+    if (LUT != NULL) cmsPipelineFree(LUT);
+    cmsCloseProfile(hProfile);
+    return NULL;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmswtpnt.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmswtpnt.c
new file mode 100644
index 0000000000..fab0da2a8c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmswtpnt.c
@@ -0,0 +1,350 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// D50 - Widely used
+const cmsCIEXYZ* CMSEXPORT cmsD50_XYZ(void)
+{
+    static cmsCIEXYZ D50XYZ = {cmsD50X, cmsD50Y, cmsD50Z};
+
+    return &D50XYZ;
+}
+
+const cmsCIExyY* CMSEXPORT cmsD50_xyY(void)
+{
+    static cmsCIExyY D50xyY;
+
+    cmsXYZ2xyY(&D50xyY, cmsD50_XYZ());
+
+    return &D50xyY;
+}
+
+// Obtains WhitePoint from Temperature
+cmsBool  CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number TempK)
+{
+    cmsFloat64Number x, y;
+    cmsFloat64Number T, T2, T3;
+    // cmsFloat64Number M1, M2;
+
+    _cmsAssert(WhitePoint != NULL);
+
+    T = TempK;
+    T2 = T*T;            // Square
+    T3 = T2*T;           // Cube
+
+    // For correlated color temperature (T) between 4000K and 7000K:
+
+    if (T >= 4000. && T <= 7000.)
+    {
+        x = -4.6070*(1E9/T3) + 2.9678*(1E6/T2) + 0.09911*(1E3/T) + 0.244063;
+    }
+    else
+        // or for correlated color temperature (T) between 7000K and 25000K:
+
+        if (T > 7000.0 && T <= 25000.0)
+        {
+            x = -2.0064*(1E9/T3) + 1.9018*(1E6/T2) + 0.24748*(1E3/T) + 0.237040;
+        }
+        else {
+            cmsSignalError(0, cmsERROR_RANGE, "cmsWhitePointFromTemp: invalid temp");
+            return FALSE;
+        }
+
+    // Obtain y(x)
+    y = -3.000*(x*x) + 2.870*x - 0.275;
+
+    // wave factors (not used, but here for futures extensions)
+
+    // M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
+    // M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
+
+    WhitePoint -> x = x;
+    WhitePoint -> y = y;
+    WhitePoint -> Y = 1.0;
+
+    return TRUE;
+}
+
+
+
+typedef struct {
+
+    cmsFloat64Number mirek;  // temp (in microreciprocal kelvin)
+    cmsFloat64Number ut;     // u coord of intersection w/ blackbody locus
+    cmsFloat64Number vt;     // v coord of intersection w/ blackbody locus
+    cmsFloat64Number tt;     // slope of ISOTEMPERATURE. line
+
+    } ISOTEMPERATURE;
+
+static const ISOTEMPERATURE isotempdata[] = {
+//  {Mirek, Ut,       Vt,      Tt      }
+    {0,     0.18006,  0.26352,  -0.24341},
+    {10,    0.18066,  0.26589,  -0.25479},
+    {20,    0.18133,  0.26846,  -0.26876},
+    {30,    0.18208,  0.27119,  -0.28539},
+    {40,    0.18293,  0.27407,  -0.30470},
+    {50,    0.18388,  0.27709,  -0.32675},
+    {60,    0.18494,  0.28021,  -0.35156},
+    {70,    0.18611,  0.28342,  -0.37915},
+    {80,    0.18740,  0.28668,  -0.40955},
+    {90,    0.18880,  0.28997,  -0.44278},
+    {100,   0.19032,  0.29326,  -0.47888},
+    {125,   0.19462,  0.30141,  -0.58204},
+    {150,   0.19962,  0.30921,  -0.70471},
+    {175,   0.20525,  0.31647,  -0.84901},
+    {200,   0.21142,  0.32312,  -1.0182 },
+    {225,   0.21807,  0.32909,  -1.2168 },
+    {250,   0.22511,  0.33439,  -1.4512 },
+    {275,   0.23247,  0.33904,  -1.7298 },
+    {300,   0.24010,  0.34308,  -2.0637 },
+    {325,   0.24702,  0.34655,  -2.4681 },
+    {350,   0.25591,  0.34951,  -2.9641 },
+    {375,   0.26400,  0.35200,  -3.5814 },
+    {400,   0.27218,  0.35407,  -4.3633 },
+    {425,   0.28039,  0.35577,  -5.3762 },
+    {450,   0.28863,  0.35714,  -6.7262 },
+    {475,   0.29685,  0.35823,  -8.5955 },
+    {500,   0.30505,  0.35907,  -11.324 },
+    {525,   0.31320,  0.35968,  -15.628 },
+    {550,   0.32129,  0.36011,  -23.325 },
+    {575,   0.32931,  0.36038,  -40.770 },
+    {600,   0.33724,  0.36051,  -116.45  }
+};
+
+#define NISO sizeof(isotempdata)/sizeof(ISOTEMPERATURE)
+
+
+// Robertson's method
+cmsBool  CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint)
+{
+    cmsUInt32Number j;
+    cmsFloat64Number us,vs;
+    cmsFloat64Number uj,vj,tj,di,dj,mi,mj;
+    cmsFloat64Number xs, ys;
+
+    _cmsAssert(WhitePoint != NULL);
+    _cmsAssert(TempK != NULL);
+
+    di = mi = 0;
+    xs = WhitePoint -> x;
+    ys = WhitePoint -> y;
+
+    // convert (x,y) to CIE 1960 (u,WhitePoint)
+
+    us = (2*xs) / (-xs + 6*ys + 1.5);
+    vs = (3*ys) / (-xs + 6*ys + 1.5);
+
+
+    for (j=0; j < NISO; j++) {
+
+        uj = isotempdata[j].ut;
+        vj = isotempdata[j].vt;
+        tj = isotempdata[j].tt;
+        mj = isotempdata[j].mirek;
+
+        dj = ((vs - vj) - tj * (us - uj)) / sqrt(1.0 + tj * tj);
+
+        if ((j != 0) && (di/dj < 0.0)) {
+
+            // Found a match
+            *TempK = 1000000.0 / (mi + (di / (di - dj)) * (mj - mi));
+            return TRUE;
+        }
+
+        di = dj;
+        mi = mj;
+    }
+
+    // Not found
+    return FALSE;
+}
+
+
+// Compute chromatic adaptation matrix using Chad as cone matrix
+
+static
+cmsBool ComputeChromaticAdaptation(cmsMAT3* Conversion,
+                                const cmsCIEXYZ* SourceWhitePoint,
+                                const cmsCIEXYZ* DestWhitePoint,
+                                const cmsMAT3* Chad)
+
+{
+
+    cmsMAT3 Chad_Inv;
+    cmsVEC3 ConeSourceXYZ, ConeSourceRGB;
+    cmsVEC3 ConeDestXYZ, ConeDestRGB;
+    cmsMAT3 Cone, Tmp;
+
+
+    Tmp = *Chad;
+    if (!_cmsMAT3inverse(&Tmp, &Chad_Inv)) return FALSE;
+
+    _cmsVEC3init(&ConeSourceXYZ, SourceWhitePoint -> X,
+                             SourceWhitePoint -> Y,
+                             SourceWhitePoint -> Z);
+
+    _cmsVEC3init(&ConeDestXYZ,   DestWhitePoint -> X,
+                             DestWhitePoint -> Y,
+                             DestWhitePoint -> Z);
+
+    _cmsMAT3eval(&ConeSourceRGB, Chad, &ConeSourceXYZ);
+    _cmsMAT3eval(&ConeDestRGB,   Chad, &ConeDestXYZ);
+
+    // Build matrix
+    _cmsVEC3init(&Cone.v[0], ConeDestRGB.n[0]/ConeSourceRGB.n[0],    0.0,  0.0);
+    _cmsVEC3init(&Cone.v[1], 0.0,   ConeDestRGB.n[1]/ConeSourceRGB.n[1],   0.0);
+    _cmsVEC3init(&Cone.v[2], 0.0,   0.0,   ConeDestRGB.n[2]/ConeSourceRGB.n[2]);
+
+
+    // Normalize
+    _cmsMAT3per(&Tmp, &Cone, Chad);
+    _cmsMAT3per(Conversion, &Chad_Inv, &Tmp);
+
+    return TRUE;
+}
+
+// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
+// The cone matrix can be specified in ConeMatrix. If NULL, Bradford is assumed
+cmsBool  _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll)
+{
+    cmsMAT3 LamRigg   = {{ // Bradford matrix
+        {{  0.8951,  0.2664, -0.1614 }},
+        {{ -0.7502,  1.7135,  0.0367 }},
+        {{  0.0389, -0.0685,  1.0296 }}
+    }};
+
+    if (ConeMatrix == NULL)
+        ConeMatrix = &LamRigg;
+
+    return ComputeChromaticAdaptation(r, FromIll, ToIll, ConeMatrix);
+}
+
+// Same as anterior, but assuming D50 destination. White point is given in xyY
+static
+cmsBool _cmsAdaptMatrixToD50(cmsMAT3* r, const cmsCIExyY* SourceWhitePt)
+{
+    cmsCIEXYZ Dn;
+    cmsMAT3 Bradford;
+    cmsMAT3 Tmp;
+
+    cmsxyY2XYZ(&Dn, SourceWhitePt);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, &Dn, cmsD50_XYZ())) return FALSE;
+
+    Tmp = *r;
+    _cmsMAT3per(r, &Bradford, &Tmp);
+
+    return TRUE;
+}
+
+// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
+// This is just an approximation, I am not handling all the non-linear
+// aspects of the RGB to XYZ process, and assumming that the gamma correction
+// has transitive property in the transformation chain.
+//
+// the alghoritm:
+//
+//            - First I build the absolute conversion matrix using
+//              primaries in XYZ. This matrix is next inverted
+//            - Then I eval the source white point across this matrix
+//              obtaining the coeficients of the transformation
+//            - Then, I apply these coeficients to the original matrix
+//
+cmsBool _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePt, const cmsCIExyYTRIPLE* Primrs)
+{
+    cmsVEC3 WhitePoint, Coef;
+    cmsMAT3 Result, Primaries;
+    cmsFloat64Number xn, yn;
+    cmsFloat64Number xr, yr;
+    cmsFloat64Number xg, yg;
+    cmsFloat64Number xb, yb;
+
+    xn = WhitePt -> x;
+    yn = WhitePt -> y;
+    xr = Primrs -> Red.x;
+    yr = Primrs -> Red.y;
+    xg = Primrs -> Green.x;
+    yg = Primrs -> Green.y;
+    xb = Primrs -> Blue.x;
+    yb = Primrs -> Blue.y;
+
+    // Build Primaries matrix
+    _cmsVEC3init(&Primaries.v[0], xr,        xg,         xb);
+    _cmsVEC3init(&Primaries.v[1], yr,        yg,         yb);
+    _cmsVEC3init(&Primaries.v[2], (1-xr-yr), (1-xg-yg),  (1-xb-yb));
+
+
+    // Result = Primaries ^ (-1) inverse matrix
+    if (!_cmsMAT3inverse(&Primaries, &Result))
+        return FALSE;
+
+
+    _cmsVEC3init(&WhitePoint, xn/yn, 1.0, (1.0-xn-yn)/yn);
+
+    // Across inverse primaries ...
+    _cmsMAT3eval(&Coef, &Result, &WhitePoint);
+
+    // Give us the Coefs, then I build transformation matrix
+    _cmsVEC3init(&r -> v[0], Coef.n[VX]*xr,          Coef.n[VY]*xg,          Coef.n[VZ]*xb);
+    _cmsVEC3init(&r -> v[1], Coef.n[VX]*yr,          Coef.n[VY]*yg,          Coef.n[VZ]*yb);
+    _cmsVEC3init(&r -> v[2], Coef.n[VX]*(1.0-xr-yr), Coef.n[VY]*(1.0-xg-yg), Coef.n[VZ]*(1.0-xb-yb));
+
+
+    return _cmsAdaptMatrixToD50(r, WhitePt);
+
+}
+
+
+// Adapts a color to a given illuminant. Original color is expected to have
+// a SourceWhitePt white point.
+cmsBool CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result,
+                                       const cmsCIEXYZ* SourceWhitePt,
+                                       const cmsCIEXYZ* Illuminant,
+                                       const cmsCIEXYZ* Value)
+{
+    cmsMAT3 Bradford;
+    cmsVEC3 In, Out;
+
+    _cmsAssert(Result != NULL);
+    _cmsAssert(SourceWhitePt != NULL);
+    _cmsAssert(Illuminant != NULL);
+    _cmsAssert(Value != NULL);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, SourceWhitePt, Illuminant)) return FALSE;
+
+    _cmsVEC3init(&In, Value -> X, Value -> Y, Value -> Z);
+    _cmsMAT3eval(&Out, &Bradford, &In);
+
+    Result -> X = Out.n[0];
+    Result -> Y = Out.n[1];
+    Result -> Z = Out.n[2];
+
+    return TRUE;
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/cmsxform.c b/third-party/libjxl/libjxl/third_party/lcms/src/cmsxform.c
new file mode 100644
index 0000000000..d8c69648f8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/cmsxform.c
@@ -0,0 +1,1339 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Transformations stuff
+// -----------------------------------------------------------------------
+
+#define DEFAULT_OBSERVER_ADAPTATION_STATE 1.0
+
+// The Context0 observer adaptation state.
+_cmsAdaptationStateChunkType _cmsAdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+
+// Init and duplicate observer adaptation state
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src)
+{
+    static _cmsAdaptationStateChunkType AdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AdaptationStateContext];       
+    }
+    else {
+       from = &AdaptationStateChunk;
+    }
+    
+    ctx ->chunks[AdaptationStateContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAdaptationStateChunkType));     
+}
+
+
+// Sets adaptation state for absolute colorimetric intent in the given context.  Adaptation state applies on all 
+// but cmsCreateExtendedTransformTHR().  Little CMS can handle incomplete adaptation states.
+cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d)
+{
+    cmsFloat64Number prev;
+    _cmsAdaptationStateChunkType* ptr = (_cmsAdaptationStateChunkType*) _cmsContextGetClientChunk(ContextID, AdaptationStateContext);
+
+    // Get previous value for return
+    prev = ptr ->AdaptationState;
+
+    // Set the value if d is positive or zero
+    if (d >= 0.0) {
+
+        ptr ->AdaptationState = d;
+    }
+
+    // Always return previous value
+    return prev;
+}
+
+
+// The adaptation state may be defaulted by this function. If you don't like it, use the extended transform routine
+cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d)
+{    
+    return cmsSetAdaptationStateTHR(NULL, d);
+}
+
+// -----------------------------------------------------------------------
+
+// Alarm codes for 16-bit transformations, because the fixed range of containers there are
+// no values left to mark out of gamut. 
+
+#define DEFAULT_ALARM_CODES_VALUE {0x7F00, 0x7F00, 0x7F00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+_cmsAlarmCodesChunkType _cmsAlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+
+// Sets the codes used to mark out-out-gamut on Proofing transforms for a given context. Values are meant to be 
+// encoded in 16 bits.
+void CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, const cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+       
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+    
+    memcpy(ContextAlarmCodes->AlarmCodes, AlarmCodesP, sizeof(ContextAlarmCodes->AlarmCodes));    
+}
+
+// Gets the current codes used to mark out-out-gamut on Proofing transforms for the given context.
+// Values are meant to be encoded in 16 bits.
+void CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+
+    memcpy(AlarmCodesP, ContextAlarmCodes->AlarmCodes, sizeof(ContextAlarmCodes->AlarmCodes));
+}
+
+void CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS])
+{
+    _cmsAssert(NewAlarm != NULL);
+
+    cmsSetAlarmCodesTHR(NULL, NewAlarm);
+}
+
+void CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number OldAlarm[cmsMAXCHANNELS])
+{ 
+    _cmsAssert(OldAlarm != NULL);
+    cmsGetAlarmCodesTHR(NULL, OldAlarm);
+}
+
+
+// Init and duplicate alarm codes
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                              const struct _cmsContext_struct* src)
+{
+    static _cmsAlarmCodesChunkType AlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AlarmCodesContext];       
+    }
+    else {
+       from = &AlarmCodesChunk;
+    }
+    
+    ctx ->chunks[AlarmCodesContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAlarmCodesChunkType));     
+}
+
+// -----------------------------------------------------------------------
+
+// Get rid of transform resources
+void CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) hTransform;
+
+    _cmsAssert(p != NULL);
+
+    if (p -> GamutCheck)
+        cmsPipelineFree(p -> GamutCheck);
+
+    if (p -> Lut)
+        cmsPipelineFree(p -> Lut);
+
+    if (p ->InputColorant)
+        cmsFreeNamedColorList(p ->InputColorant);
+
+    if (p -> OutputColorant)
+        cmsFreeNamedColorList(p ->OutputColorant);
+
+    if (p ->Sequence)
+        cmsFreeProfileSequenceDescription(p ->Sequence);
+
+    if (p ->UserData)
+        p ->FreeUserData(p ->ContextID, p ->UserData);
+
+    _cmsFree(p ->ContextID, (void *) p);
+}
+
+// Apply transform.
+void CMSEXPORT cmsDoTransform(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  // Not used
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Size;
+    stride.BytesPerPlaneOut = Size;
+           
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+
+// This is a legacy stride for planar
+void CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size, cmsUInt32Number Stride)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Stride;
+    stride.BytesPerPlaneOut = Stride;
+
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+// This is the "fast" function for plugins
+void CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number PixelsPerLine,
+                              cmsUInt32Number LineCount,
+                              cmsUInt32Number BytesPerLineIn,
+                              cmsUInt32Number BytesPerLineOut,
+                              cmsUInt32Number BytesPerPlaneIn,
+                              cmsUInt32Number BytesPerPlaneOut)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = BytesPerLineIn;
+    stride.BytesPerLineOut = BytesPerLineOut;
+    stride.BytesPerPlaneIn = BytesPerPlaneIn;
+    stride.BytesPerPlaneOut = BytesPerPlaneOut;
+
+    p->xform(p, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, &stride);
+}
+
+
+
+// Transform routines ----------------------------------------------------------------------------------------------------------
+
+// Float xform converts floats. Since there are no performance issues, one routine does all job, including gamut check.
+// Note that because extended range, we can use a -1.0 value for out of gamut in this case.
+static
+void FloatXFORM(_cmsTRANSFORM* p,
+                const void* in,
+                void* out, 
+                cmsUInt32Number PixelsPerLine,
+                cmsUInt32Number LineCount,
+                const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS], fOut[cmsMAXCHANNELS];
+    cmsFloat32Number OutOfGamut;
+    cmsUInt32Number i, j, c, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+    memset(fOut, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInputFloat(p, fIn, accum, Stride->BytesPerPlaneIn);
+
+            // Any gamut chack to do?
+            if (p->GamutCheck != NULL) {
+
+                // Evaluate gamut marker.
+                cmsPipelineEvalFloat(fIn, &OutOfGamut, p->GamutCheck);
+
+                // Is current color out of gamut?
+                if (OutOfGamut > 0.0) {
+
+                    // Certainly, out of gamut
+                    for (c = 0; c < cmsMAXCHANNELS; c++)
+                        fOut[c] = -1.0;
+
+                }
+                else {
+                    // No, proceed normally
+                    cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+                }
+            }
+            else {
+
+                // No gamut check at all
+                cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+            }
+
+
+            output = p->ToOutputFloat(p, fOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+static
+void NullFloatXFORM(_cmsTRANSFORM* p,
+                    const void* in,
+                    void* out, 
+                    cmsUInt32Number PixelsPerLine,
+                    cmsUInt32Number LineCount,
+                    const cmsStride* Stride)
+
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*) in + strideIn;
+           output = (cmsUInt8Number*) out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInputFloat(p, fIn, accum, Stride ->BytesPerPlaneIn);
+                  output = p->ToOutputFloat(p, fIn, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// 16 bit precision -----------------------------------------------------------------------------------------------------------
+
+// Null transformation, only applies formatters. No cache
+static
+void NullXFORM(_cmsTRANSFORM* p,
+               const void* in,
+               void* out,
+               cmsUInt32Number PixelsPerLine,
+               cmsUInt32Number LineCount,
+               const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  output = p->ToOutput(p, wIn, output, Stride->BytesPerPlaneOut);
+    }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// No gamut check, no cache, 16 bits
+static
+void PrecalculatedXFORM(_cmsTRANSFORM* p,
+                        const void* in,
+                        void* out, 
+                        cmsUInt32Number PixelsPerLine,
+                        cmsUInt32Number LineCount,
+                        const cmsStride* Stride)
+{
+    CMSREGISTER cmsUInt8Number* accum;
+    CMSREGISTER cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+            p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// Auxiliary: Handle precalculated gamut check. The retrieval of context may be alittle bit slow, but this function is not critical.
+static
+void TransformOnePixelWithGamutCheck(_cmsTRANSFORM* p,
+                                     const cmsUInt16Number wIn[],
+                                     cmsUInt16Number wOut[])
+{
+    cmsUInt16Number wOutOfGamut;
+
+    p ->GamutCheck ->Eval16Fn(wIn, &wOutOfGamut, p ->GamutCheck ->Data);
+    if (wOutOfGamut >= 1) {
+
+        cmsUInt16Number i;
+        _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(p->ContextID, AlarmCodesContext);        
+
+        for (i=0; i < p ->Lut->OutputChannels; i++) {
+
+            wOut[i] = ContextAlarmCodes ->AlarmCodes[i];
+        }
+    }
+    else
+        p ->Lut ->Eval16Fn(wIn, wOut, p -> Lut->Data);
+}
+
+// Gamut check, No cache, 16 bits.
+static
+void PrecalculatedXFORMGamutCheck(_cmsTRANSFORM* p,
+                                  const void* in,
+                                  void* out, 
+                                  cmsUInt32Number PixelsPerLine,
+                                  cmsUInt32Number LineCount,
+                                  const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  TransformOnePixelWithGamutCheck(p, wIn, wOut);
+                  output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+
+// No gamut check, Cache, 16 bits,
+static
+void CachedXFORM(_cmsTRANSFORM* p,
+                 const void* in,
+                 void* out,
+                 cmsUInt32Number PixelsPerLine,
+                 cmsUInt32Number LineCount,
+                 const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// All those nice features together
+static
+void CachedXFORMGamutCheck(_cmsTRANSFORM* p,
+                           const void* in,
+                           void* out, 
+                           cmsUInt32Number PixelsPerLine,
+                           cmsUInt32Number LineCount,
+                           const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                TransformOnePixelWithGamutCheck(p, wIn, wOut);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// Transform plug-ins ----------------------------------------------------------------------------------------------------
+
+// List of used-defined transform factories
+typedef struct _cmsTransformCollection_st {
+
+    _cmsTransform2Factory  Factory;
+    cmsBool                OldXform;   // Factory returns xform function in the old style
+
+    struct _cmsTransformCollection_st *Next;
+
+} _cmsTransformCollection;
+
+// The linked list head
+_cmsTransformPluginChunkType _cmsTransformPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginTransformList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsTransformPluginChunkType newHead = { NULL };
+   _cmsTransformCollection*  entry;
+   _cmsTransformCollection*  Anterior = NULL;
+   _cmsTransformPluginChunkType* head = (_cmsTransformPluginChunkType*) src->chunks[TransformPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->TransformCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsTransformCollection *newEntry = ( _cmsTransformCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTransformCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.TransformCollection == NULL)
+                newHead.TransformCollection = newEntry;
+    }
+
+  ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTransformPluginChunkType));
+}
+
+// Allocates memory for transform plugin factory
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginTransformList(ctx, src);
+    }
+    else {
+        static _cmsTransformPluginChunkType TransformPluginChunkType = { NULL };
+        ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TransformPluginChunkType, sizeof(_cmsTransformPluginChunkType));
+    }
+}
+
+// Adaptor for old versions of plug-in
+static
+void _cmsTransform2toTransformAdaptor(struct _cmstransform_struct *CMMcargo,
+                                      const void* InputBuffer,
+                                      void* OutputBuffer,
+                                      cmsUInt32Number PixelsPerLine,
+                                      cmsUInt32Number LineCount,
+                                      const cmsStride* Stride)
+{
+     
+       cmsUInt32Number i, strideIn, strideOut;
+
+       _cmsHandleExtraChannels(CMMcargo, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, Stride);
+
+       strideIn = 0;
+       strideOut = 0;
+
+       for (i = 0; i < LineCount; i++) {
+
+              void *accum = (cmsUInt8Number*)InputBuffer + strideIn;
+              void *output = (cmsUInt8Number*)OutputBuffer + strideOut;
+
+              CMMcargo->OldXform(CMMcargo, accum, output, PixelsPerLine, Stride->BytesPerPlaneIn);
+
+              strideIn += Stride->BytesPerLineIn;
+              strideOut += Stride->BytesPerLineOut;
+       }
+}
+
+
+
+// Register new ways to transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginTransform* Plugin = (cmsPluginTransform*) Data;
+    _cmsTransformCollection* fl;
+    _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID,TransformPlugin);
+
+    if (Data == NULL) {
+
+        // Free the chain. Memory is safely freed at exit
+        ctx->TransformCollection = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin->factories.xform == NULL) return FALSE;
+
+
+    fl = (_cmsTransformCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsTransformCollection));
+    if (fl == NULL) return FALSE;
+
+    // Check for full xform plug-ins previous to 2.8, we would need an adapter in that case
+    if (Plugin->base.ExpectedVersion < 2080) {
+
+           fl->OldXform = TRUE;
+    }
+    else
+           fl->OldXform = FALSE;
+
+    // Copy the parameters
+    fl->Factory = Plugin->factories.xform;
+
+    // Keep linked list
+    fl ->Next = ctx->TransformCollection;
+    ctx->TransformCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+void CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn)
+{
+    _cmsAssert(CMMcargo != NULL);
+    CMMcargo ->UserData = ptr;
+    CMMcargo ->FreeUserData = FreePrivateDataFn;
+}
+
+// returns the pointer defined by the plug-in to store private data
+void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo)
+{
+    _cmsAssert(CMMcargo != NULL);
+    return CMMcargo ->UserData;
+}
+
+// returns the current formatters
+void CMSEXPORT _cmsGetTransformFormatters16(struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInput;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutput;
+}
+
+void CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInputFloat;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutputFloat;
+}
+
+
+// Allocate transform struct and set it to defaults. Ask the optimization plug-in about if those formats are proper
+// for separated transforms. If this is the case,
+static
+_cmsTRANSFORM* AllocEmptyTransform(cmsContext ContextID, cmsPipeline* lut,
+                                               cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+     _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID, TransformPlugin);
+     _cmsTransformCollection* Plugin;
+
+       // Allocate needed memory
+       _cmsTRANSFORM* p = (_cmsTRANSFORM*)_cmsMallocZero(ContextID, sizeof(_cmsTRANSFORM));
+       if (!p) {
+              cmsPipelineFree(lut);
+              return NULL;
+       }
+
+       // Store the proposed pipeline
+       p->Lut = lut;
+
+       // Let's see if any plug-in want to do the transform by itself
+       if (p->Lut != NULL) {
+
+              for (Plugin = ctx->TransformCollection;
+                     Plugin != NULL;
+                     Plugin = Plugin->Next) {
+
+                     if (Plugin->Factory(&p->xform, &p->UserData, &p->FreeUserData, &p->Lut, InputFormat, OutputFormat, dwFlags)) {
+
+                            // Last plugin in the declaration order takes control. We just keep
+                            // the original parameters as a logging. 
+                            // Note that cmsFLAGS_CAN_CHANGE_FORMATTER is not set, so by default 
+                            // an optimized transform is not reusable. The plug-in can, however, change
+                            // the flags and make it suitable.
+
+                            p->ContextID = ContextID;
+                            p->InputFormat = *InputFormat;
+                            p->OutputFormat = *OutputFormat;
+                            p->dwOriginalFlags = *dwFlags;
+
+                            // Fill the formatters just in case the optimized routine is interested.
+                            // No error is thrown if the formatter doesn't exist. It is up to the optimization 
+                            // factory to decide what to do in those cases.
+                            p->FromInput = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->ToOutput = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+                            p->ToOutputFloat = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+
+                            // Save the day? (Ignore the warning)
+                            if (Plugin->OldXform) {
+                                   p->OldXform = (_cmsTransformFn)(void*) p->xform;
+                                   p->xform = _cmsTransform2toTransformAdaptor;
+                            }
+                             
+                            return p;
+                     }
+              }
+
+              // Not suitable for the transform plug-in, let's check  the pipeline plug-in
+              _cmsOptimizePipeline(ContextID, &p->Lut, Intent, InputFormat, OutputFormat, dwFlags);
+       }
+
+    // Check whatever this is a true floating point transform
+    if (_cmsFormatterIsFloat(*InputFormat) && _cmsFormatterIsFloat(*OutputFormat)) {
+
+        // Get formatter function always return a valid union, but the contents of this union may be NULL.
+        p ->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        p ->ToOutputFloat  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        if (p ->FromInputFloat == NULL || p ->ToOutputFloat == NULL) {
+
+            cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+            cmsDeleteTransform(p);
+            return NULL;
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullFloatXFORM;
+        }
+        else {
+            // Float transforms don't use cache, always are non-NULL
+            p ->xform = FloatXFORM;
+        }
+
+    }
+    else {
+
+        if (*InputFormat == 0 && *OutputFormat == 0) {
+            p ->FromInput = p ->ToOutput = NULL;
+            *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+        }
+        else {
+
+            cmsUInt32Number BytesPerPixelInput;
+
+            p ->FromInput = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+            p ->ToOutput  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+            if (p ->FromInput == NULL || p ->ToOutput == NULL) {
+
+                cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+                cmsDeleteTransform(p);
+                return NULL;
+            }
+
+            BytesPerPixelInput = T_BYTES(p ->InputFormat);
+            if (BytesPerPixelInput == 0 || BytesPerPixelInput >= 2)
+                   *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullXFORM;
+        }
+        else {
+            if (*dwFlags & cmsFLAGS_NOCACHE) {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = PrecalculatedXFORMGamutCheck;  // Gamut check, no cache
+                else
+                    p ->xform = PrecalculatedXFORM;  // No cache, no gamut check
+            }
+            else {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = CachedXFORMGamutCheck;    // Gamut check, cache
+                else
+                    p ->xform = CachedXFORM;  // No gamut check, cache
+
+            }
+        }
+    }
+
+    p ->InputFormat     = *InputFormat;
+    p ->OutputFormat    = *OutputFormat;
+    p ->dwOriginalFlags = *dwFlags;
+    p ->ContextID       = ContextID;
+    p ->UserData        = NULL;
+    return p;
+}
+
+static
+cmsBool GetXFormColorSpaces(cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[], cmsColorSpaceSignature* Input, cmsColorSpaceSignature* Output)
+{
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut;
+    cmsColorSpaceSignature PostColorSpace;
+    cmsUInt32Number i;
+
+    if (nProfiles == 0) return FALSE;
+    if (hProfiles[0] == NULL) return FALSE;
+
+    *Input = PostColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsProfileClassSignature cls;
+        cmsHPROFILE hProfile = hProfiles[i];
+
+        int lIsInput = (PostColorSpace != cmsSigXYZData) &&
+                       (PostColorSpace != cmsSigLabData);
+
+        if (hProfile == NULL) return FALSE;
+
+        cls = cmsGetDeviceClass(hProfile);
+
+        if (cls == cmsSigNamedColorClass) {
+
+            ColorSpaceIn    = cmsSig1colorData;
+            ColorSpaceOut   = (nProfiles > 1) ? cmsGetPCS(hProfile) : cmsGetColorSpace(hProfile);
+        }
+        else
+        if (lIsInput || (cls == cmsSigLinkClass)) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else
+        {
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (i==0)
+            *Input = ColorSpaceIn;
+
+        PostColorSpace = ColorSpaceOut;
+    }
+
+    *Output = PostColorSpace;
+
+    return TRUE;
+}
+
+// Check colorspace
+static
+cmsBool  IsProperColorSpace(cmsColorSpaceSignature Check, cmsUInt32Number dwFormat)
+{
+    int Space1 = (int) T_COLORSPACE(dwFormat);
+    int Space2 = _cmsLCMScolorSpace(Check);
+
+    if (Space1 == PT_ANY) return TRUE;
+    if (Space1 == Space2) return TRUE;
+
+    if (Space1 == PT_LabV2 && Space2 == PT_Lab) return TRUE;
+    if (Space1 == PT_Lab   && Space2 == PT_LabV2) return TRUE;
+
+    return FALSE;
+}
+
+// ----------------------------------------------------------------------------------------------------------------
+
+// Jun-21-2000: Some profiles (those that comes with W2K) comes
+// with the media white (media black?) x 100. Add a sanity check
+
+static
+void NormalizeXYZ(cmsCIEXYZ* Dest)
+{
+    while (Dest -> X > 2. &&
+           Dest -> Y > 2. &&
+           Dest -> Z > 2.) {
+
+               Dest -> X /= 10.;
+               Dest -> Y /= 10.;
+               Dest -> Z /= 10.;
+       }
+}
+
+static
+void SetWhitePoint(cmsCIEXYZ* wtPt, const cmsCIEXYZ* src)
+{
+    if (src == NULL) {
+        wtPt ->X = cmsD50X;
+        wtPt ->Y = cmsD50Y;
+        wtPt ->Z = cmsD50Z;
+    }
+    else {
+        wtPt ->X = src->X;
+        wtPt ->Y = src->Y;
+        wtPt ->Z = src->Z;
+
+        NormalizeXYZ(wtPt);
+    }
+
+}
+
+// New to lcms 2.0 -- have all parameters available.
+cmsHTRANSFORM CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags)
+{
+    _cmsTRANSFORM* xform;    
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+    cmsPipeline* Lut;
+    cmsUInt32Number LastIntent = Intents[nProfiles-1];
+
+    // If it is a fake transform
+    if (dwFlags & cmsFLAGS_NULLTRANSFORM)
+    {
+        return AllocEmptyTransform(ContextID, NULL, INTENT_PERCEPTUAL, &InputFormat, &OutputFormat, &dwFlags);
+    }
+
+    // If gamut check is requested, make sure we have a gamut profile
+    if (dwFlags & cmsFLAGS_GAMUTCHECK) {
+        if (hGamutProfile == NULL) dwFlags &= ~cmsFLAGS_GAMUTCHECK;
+    }
+
+    // On floating point transforms, inhibit cache
+    if (_cmsFormatterIsFloat(InputFormat) || _cmsFormatterIsFloat(OutputFormat))
+        dwFlags |= cmsFLAGS_NOCACHE;
+
+    // Mark entry/exit spaces
+    if (!GetXFormColorSpaces(nProfiles, hProfiles, &EntryColorSpace, &ExitColorSpace)) {
+        cmsSignalError(ContextID, cmsERROR_NULL, "NULL input profiles on transform");
+        return NULL;
+    }
+
+    // Check if proper colorspaces
+    if (!IsProperColorSpace(EntryColorSpace, InputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong input color space on transform");
+        return NULL;
+    }
+
+    if (!IsProperColorSpace(ExitColorSpace, OutputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong output color space on transform");
+        return NULL;
+    }
+
+    // Create a pipeline with all transformations
+    Lut = _cmsLinkProfiles(ContextID, nProfiles, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (Lut == NULL) {
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Couldn't link the profiles");
+        return NULL;
+    }
+
+    // Check channel count
+    if ((cmsChannelsOf(EntryColorSpace) != cmsPipelineInputChannels(Lut)) ||
+        (cmsChannelsOf(ExitColorSpace)  != cmsPipelineOutputChannels(Lut))) {
+        cmsPipelineFree(Lut);
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Channel count doesn't match. Profile is corrupted");
+        return NULL;
+    }
+
+
+    // All seems ok
+    xform = AllocEmptyTransform(ContextID, Lut, LastIntent, &InputFormat, &OutputFormat, &dwFlags);
+    if (xform == NULL) {
+        return NULL;
+    }
+
+    // Keep values
+    xform ->EntryColorSpace = EntryColorSpace;
+    xform ->ExitColorSpace  = ExitColorSpace;
+    xform ->RenderingIntent = Intents[nProfiles-1];
+
+    // Take white points
+    SetWhitePoint(&xform->EntryWhitePoint, (cmsCIEXYZ*) cmsReadTag(hProfiles[0], cmsSigMediaWhitePointTag));
+    SetWhitePoint(&xform->ExitWhitePoint,  (cmsCIEXYZ*) cmsReadTag(hProfiles[nProfiles-1], cmsSigMediaWhitePointTag));
+   
+
+    // Create a gamut check LUT if requested
+    if (hGamutProfile != NULL && (dwFlags & cmsFLAGS_GAMUTCHECK))
+        xform ->GamutCheck  = _cmsCreateGamutCheckPipeline(ContextID, hProfiles,
+                                                        BPC, Intents,
+                                                        AdaptationStates,
+                                                        nGamutPCSposition,
+                                                        hGamutProfile);
+
+
+    // Try to read input and output colorant table
+    if (cmsIsTag(hProfiles[0], cmsSigColorantTableTag)) {
+
+        // Input table can only come in this way.
+        xform ->InputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[0], cmsSigColorantTableTag));
+    }
+
+    // Output is a little bit more complex.
+    if (cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigLinkClass) {
+
+        // This tag may exist only on devicelink profiles.
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag)) {
+
+            // It may be NULL if error
+            xform ->OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag));
+        }
+
+    } else {
+
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableTag)) {
+
+            xform -> OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableTag));
+        }
+    }
+
+    // Store the sequence of profiles
+    if (dwFlags & cmsFLAGS_KEEP_SEQUENCE) {
+        xform ->Sequence = _cmsCompileProfileSequence(ContextID, nProfiles, hProfiles);
+    }
+    else
+        xform ->Sequence = NULL;
+
+    // If this is a cached transform, init first value, which is zero (16 bits only)
+    if (!(dwFlags & cmsFLAGS_NOCACHE)) {
+
+        memset(&xform ->Cache.CacheIn, 0, sizeof(xform ->Cache.CacheIn));
+
+        if (xform ->GamutCheck != NULL) {
+            TransformOnePixelWithGamutCheck(xform, xform ->Cache.CacheIn, xform->Cache.CacheOut);
+        }
+        else {
+
+            xform ->Lut ->Eval16Fn(xform ->Cache.CacheIn, xform->Cache.CacheOut, xform -> Lut->Data);
+        }
+
+    }
+
+    return (cmsHTRANSFORM) xform;
+}
+
+// Multiprofile transforms: Gamut check is not available here, as it is unclear from which profile the gamut comes.
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                       cmsHPROFILE hProfiles[],
+                                                       cmsUInt32Number nProfiles,
+                                                       cmsUInt32Number InputFormat,
+                                                       cmsUInt32Number OutputFormat,
+                                                       cmsUInt32Number Intent,
+                                                       cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsBool BPC[256];
+    cmsUInt32Number Intents[256];
+    cmsFloat64Number AdaptationStates[256];
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+        BPC[i] = dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION ? TRUE : FALSE;
+        Intents[i] = Intent;
+        AdaptationStates[i] = cmsSetAdaptationStateTHR(ContextID, -1);
+    }
+
+
+    return cmsCreateExtendedTransform(ContextID, nProfiles, hProfiles, BPC, Intents, AdaptationStates, NULL, 0, InputFormat, OutputFormat, dwFlags);
+}
+
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(NULL, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+         return NULL;
+    }
+
+    return cmsCreateMultiprofileTransformTHR(cmsGetProfileContextID(hProfiles[0]),
+                                                  hProfiles,
+                                                  nProfiles,
+                                                  InputFormat,
+                                                  OutputFormat,
+                                                  Intent,
+                                                  dwFlags);
+}
+
+cmsHTRANSFORM CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                              cmsHPROFILE Input,
+                                              cmsUInt32Number InputFormat,
+                                              cmsHPROFILE Output,
+                                              cmsUInt32Number OutputFormat,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags)
+{
+
+    cmsHPROFILE hArray[2];
+
+    hArray[0] = Input;
+    hArray[1] = Output;
+
+    return cmsCreateMultiprofileTransformTHR(ContextID, hArray, Output == NULL ? 1U : 2U, InputFormat, OutputFormat, Intent, dwFlags);
+}
+
+CMSAPI cmsHTRANSFORM CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+    return cmsCreateTransformTHR(cmsGetProfileContextID(Input), Input, InputFormat, Output, OutputFormat, Intent, dwFlags);
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                   cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hArray[4];
+    cmsUInt32Number Intents[4];
+    cmsBool  BPC[4];
+    cmsFloat64Number Adaptation[4];
+    cmsBool  DoBPC = (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION) ? TRUE : FALSE;
+
+
+    hArray[0]  = InputProfile; hArray[1] = ProofingProfile; hArray[2]  = ProofingProfile;               hArray[3] = OutputProfile;
+    Intents[0] = nIntent;      Intents[1] = nIntent;        Intents[2] = INTENT_RELATIVE_COLORIMETRIC;  Intents[3] = ProofingIntent;
+    BPC[0]     = DoBPC;        BPC[1] = DoBPC;              BPC[2] = 0;                                 BPC[3] = 0;
+
+    Adaptation[0] = Adaptation[1] = Adaptation[2] = Adaptation[3] = cmsSetAdaptationStateTHR(ContextID, -1);
+
+    if (!(dwFlags & (cmsFLAGS_SOFTPROOFING|cmsFLAGS_GAMUTCHECK)))
+        return cmsCreateTransformTHR(ContextID, InputProfile, InputFormat, OutputProfile, OutputFormat, nIntent, dwFlags);
+
+    return cmsCreateExtendedTransform(ContextID, 4, hArray, BPC, Intents, Adaptation,
+                                        ProofingProfile, 1, InputFormat, OutputFormat, dwFlags);
+
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    return cmsCreateProofingTransformTHR(cmsGetProfileContextID(InputProfile),
+                                                   InputProfile,
+                                                   InputFormat,
+                                                   OutputProfile,
+                                                   OutputFormat,
+                                                   ProofingProfile,
+                                                   nIntent,
+                                                   ProofingIntent,
+                                                   dwFlags);
+}
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+cmsContext CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return NULL;
+    return xform -> ContextID;
+}
+
+// Grab the input/output formats
+cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->InputFormat;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->OutputFormat;
+}
+
+// For backwards compatibility
+cmsBool CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                         cmsUInt32Number InputFormat,
+                                         cmsUInt32Number OutputFormat)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsFormatter16 FromInput, ToOutput;
+
+
+    // We only can afford to change formatters if previous transform is at least 16 bits
+    if (!(xform ->dwOriginalFlags & cmsFLAGS_CAN_CHANGE_FORMATTER)) {
+
+        cmsSignalError(xform ->ContextID, cmsERROR_NOT_SUITABLE, "cmsChangeBuffersFormat works only on transforms created originally with at least 16 bits of precision");
+        return FALSE;
+    }
+
+    FromInput = _cmsGetFormatter(xform->ContextID, InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+    ToOutput  = _cmsGetFormatter(xform->ContextID, OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+    if (FromInput == NULL || ToOutput == NULL) {
+
+        cmsSignalError(xform -> ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+        return FALSE;
+    }
+
+    xform ->InputFormat  = InputFormat;
+    xform ->OutputFormat = OutputFormat;
+    xform ->FromInput    = FromInput;
+    xform ->ToOutput     = ToOutput;
+    return TRUE;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/lcms2.def b/third-party/libjxl/libjxl/third_party/lcms/src/lcms2.def
new file mode 100644
index 0000000000..c5e8603d29
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/lcms2.def
@@ -0,0 +1,343 @@
+LIBRARY     LCMS2.DLL
+
+EXPORTS
+
+_cms15Fixed16toDouble                    =   _cms15Fixed16toDouble
+_cms8Fixed8toDouble                      =   _cms8Fixed8toDouble
+cmsAdaptToIlluminant                     =    cmsAdaptToIlluminant
+_cmsAdjustEndianess16                    =   _cmsAdjustEndianess16
+_cmsAdjustEndianess32                    =   _cmsAdjustEndianess32
+_cmsAdjustEndianess64                    =   _cmsAdjustEndianess64
+cmsAllocNamedColorList                   =   cmsAllocNamedColorList
+cmsAllocProfileSequenceDescription       =   cmsAllocProfileSequenceDescription
+cmsAppendNamedColor                      =   cmsAppendNamedColor
+cmsBFDdeltaE                             =   cmsBFDdeltaE
+cmsBuildGamma                            =   cmsBuildGamma
+cmsBuildParametricToneCurve              =   cmsBuildParametricToneCurve
+cmsBuildSegmentedToneCurve               =   cmsBuildSegmentedToneCurve
+cmsBuildTabulatedToneCurve16             =   cmsBuildTabulatedToneCurve16
+cmsBuildTabulatedToneCurveFloat          =   cmsBuildTabulatedToneCurveFloat
+_cmsCalloc                               =   _cmsCalloc
+cmsChannelsOf                            =    cmsChannelsOf
+cmsCIE2000DeltaE                         =    cmsCIE2000DeltaE
+cmsCIE94DeltaE                           =    cmsCIE94DeltaE
+cmsCIECAM02Done                          =    cmsCIECAM02Done
+cmsCIECAM02Forward                       =    cmsCIECAM02Forward
+cmsCIECAM02Init                          =    cmsCIECAM02Init
+cmsCIECAM02Reverse                       =    cmsCIECAM02Reverse
+cmsCloseIOhandler                        =    cmsCloseIOhandler
+cmsCloseProfile                          =    cmsCloseProfile
+cmsCMCdeltaE                             =    cmsCMCdeltaE
+cmsCreate_sRGBProfile                    =    cmsCreate_sRGBProfile
+cmsCreate_sRGBProfileTHR                 =    cmsCreate_sRGBProfileTHR
+cmsCreateBCHSWabstractProfile            =    cmsCreateBCHSWabstractProfile
+cmsCreateBCHSWabstractProfileTHR         =    cmsCreateBCHSWabstractProfileTHR
+cmsCreateExtendedTransform               =    cmsCreateExtendedTransform
+cmsCreateGrayProfile                     =    cmsCreateGrayProfile
+cmsCreateGrayProfileTHR                  =    cmsCreateGrayProfileTHR
+cmsCreateInkLimitingDeviceLink           =    cmsCreateInkLimitingDeviceLink
+cmsCreateInkLimitingDeviceLinkTHR        =    cmsCreateInkLimitingDeviceLinkTHR
+cmsCreateLab2Profile                     =    cmsCreateLab2Profile
+cmsCreateLab2ProfileTHR                  =    cmsCreateLab2ProfileTHR
+cmsCreateLab4Profile                     =    cmsCreateLab4Profile
+cmsCreateLab4ProfileTHR                  =    cmsCreateLab4ProfileTHR
+cmsCreateLinearizationDeviceLink         =    cmsCreateLinearizationDeviceLink
+cmsCreateLinearizationDeviceLinkTHR      =    cmsCreateLinearizationDeviceLinkTHR
+cmsCreateMultiprofileTransform           =    cmsCreateMultiprofileTransform
+cmsCreateMultiprofileTransformTHR        =    cmsCreateMultiprofileTransformTHR
+cmsCreateNULLProfile                     =    cmsCreateNULLProfile
+cmsCreateNULLProfileTHR                  =    cmsCreateNULLProfileTHR
+cmsCreateProfilePlaceholder              =    cmsCreateProfilePlaceholder
+cmsCreateProofingTransform               =    cmsCreateProofingTransform
+cmsCreateProofingTransformTHR            =    cmsCreateProofingTransformTHR
+cmsCreateRGBProfile                      =    cmsCreateRGBProfile
+cmsCreateRGBProfileTHR                   =    cmsCreateRGBProfileTHR
+cmsCreateTransform                       =    cmsCreateTransform
+cmsCreateTransformTHR                    =    cmsCreateTransformTHR
+cmsCreateXYZProfile                      =    cmsCreateXYZProfile
+cmsCreateXYZProfileTHR                   =    cmsCreateXYZProfileTHR
+cmsD50_xyY                               =    cmsD50_xyY
+cmsD50_XYZ                               =    cmsD50_XYZ
+_cmsDecodeDateTimeNumber                 =    _cmsDecodeDateTimeNumber
+_cmsDefaultICCintents                    =    _cmsDefaultICCintents
+cmsDeleteTransform                       =    cmsDeleteTransform
+cmsDeltaE                                =    cmsDeltaE
+cmsDetectBlackPoint                      =    cmsDetectBlackPoint
+cmsDetectDestinationBlackPoint           =    cmsDetectDestinationBlackPoint
+cmsDetectTAC                             =    cmsDetectTAC
+cmsDesaturateLab                         =    cmsDesaturateLab
+cmsDoTransform                           =    cmsDoTransform
+cmsDoTransformStride                     =    cmsDoTransformStride
+_cmsDoubleTo15Fixed16                    =    _cmsDoubleTo15Fixed16
+_cmsDoubleTo8Fixed8                      =    _cmsDoubleTo8Fixed8
+_cmsDupMem                               =    _cmsDupMem
+cmsDupNamedColorList                     =    cmsDupNamedColorList
+cmsDupProfileSequenceDescription         =    cmsDupProfileSequenceDescription
+cmsDupToneCurve                          =    cmsDupToneCurve
+_cmsEncodeDateTimeNumber                 =    _cmsEncodeDateTimeNumber
+cmsEstimateGamma                         =    cmsEstimateGamma
+cmsGetToneCurveEstimatedTableEntries     =    cmsGetToneCurveEstimatedTableEntries
+cmsGetToneCurveEstimatedTable            =    cmsGetToneCurveEstimatedTable
+cmsEvalToneCurve16                       =    cmsEvalToneCurve16
+cmsEvalToneCurveFloat                    =    cmsEvalToneCurveFloat
+cmsfilelength                            =    cmsfilelength
+cmsFloat2LabEncoded                      =    cmsFloat2LabEncoded
+cmsFloat2LabEncodedV2                    =    cmsFloat2LabEncodedV2
+cmsFloat2XYZEncoded                      =    cmsFloat2XYZEncoded
+cmsFormatterForColorspaceOfProfile       =    cmsFormatterForColorspaceOfProfile
+cmsFormatterForPCSOfProfile              =    cmsFormatterForPCSOfProfile
+_cmsFree                                 =    _cmsFree
+cmsFreeNamedColorList                    =    cmsFreeNamedColorList
+cmsFreeProfileSequenceDescription        =    cmsFreeProfileSequenceDescription
+cmsFreeToneCurve                         =    cmsFreeToneCurve
+cmsFreeToneCurveTriple                   =    cmsFreeToneCurveTriple
+cmsGBDAlloc                              =    cmsGBDAlloc
+cmsGBDFree                               =    cmsGBDFree
+cmsGDBAddPoint                           =    cmsGDBAddPoint
+cmsGDBCheckPoint                         =    cmsGDBCheckPoint
+cmsGDBCompute                            =    cmsGDBCompute
+cmsGetAlarmCodes                         =    cmsGetAlarmCodes
+cmsGetColorSpace                         =    cmsGetColorSpace
+cmsGetDeviceClass                        =    cmsGetDeviceClass
+cmsGetEncodedICCversion                  =    cmsGetEncodedICCversion
+cmsGetHeaderAttributes                   =    cmsGetHeaderAttributes
+cmsGetHeaderCreationDateTime             =    cmsGetHeaderCreationDateTime
+cmsGetHeaderFlags                        =    cmsGetHeaderFlags
+cmsGetHeaderManufacturer                 =    cmsGetHeaderManufacturer
+cmsGetHeaderModel                        =    cmsGetHeaderModel
+cmsGetHeaderProfileID                    =    cmsGetHeaderProfileID
+cmsGetHeaderRenderingIntent              =    cmsGetHeaderRenderingIntent
+cmsGetNamedColorList                     =    cmsGetNamedColorList
+cmsGetPCS                                =    cmsGetPCS
+cmsGetPostScriptColorResource            =    cmsGetPostScriptColorResource
+cmsGetPostScriptCRD                      =    cmsGetPostScriptCRD
+cmsGetPostScriptCSA                      =    cmsGetPostScriptCSA
+cmsGetProfileInfo                        =    cmsGetProfileInfo
+cmsGetProfileInfoASCII                   =    cmsGetProfileInfoASCII
+cmsGetProfileContextID                   =    cmsGetProfileContextID
+cmsGetProfileVersion                     =    cmsGetProfileVersion
+cmsGetSupportedIntents                   =    cmsGetSupportedIntents
+cmsGetTagCount                           =    cmsGetTagCount
+cmsGetTagSignature                       =    cmsGetTagSignature
+cmsGetTransformContextID                 =    cmsGetTransformContextID
+_cmsICCcolorSpace                        =    _cmsICCcolorSpace
+_cmsIOPrintf                             =    _cmsIOPrintf
+cmsIsCLUT                                =    cmsIsCLUT
+cmsIsIntentSupported                     =    cmsIsIntentSupported
+cmsIsMatrixShaper                        =    cmsIsMatrixShaper
+cmsIsTag                                 =    cmsIsTag
+cmsIsToneCurveDescending                 =    cmsIsToneCurveDescending
+cmsIsToneCurveLinear                     =    cmsIsToneCurveLinear
+cmsIsToneCurveMonotonic                  =    cmsIsToneCurveMonotonic
+cmsIsToneCurveMultisegment               =    cmsIsToneCurveMultisegment
+cmsGetToneCurveParametricType            =    cmsGetToneCurveParametricType
+cmsIT8Alloc                              =    cmsIT8Alloc
+cmsIT8DefineDblFormat                    =    cmsIT8DefineDblFormat
+cmsIT8EnumDataFormat                     =    cmsIT8EnumDataFormat
+cmsIT8EnumProperties                     =    cmsIT8EnumProperties
+cmsIT8EnumPropertyMulti                  =    cmsIT8EnumPropertyMulti
+cmsIT8Free                               =    cmsIT8Free
+cmsIT8GetData                            =    cmsIT8GetData
+cmsIT8GetDataDbl                         =    cmsIT8GetDataDbl
+cmsIT8FindDataFormat                     =    cmsIT8FindDataFormat
+cmsIT8GetDataRowCol                      =    cmsIT8GetDataRowCol
+cmsIT8GetDataRowColDbl                   =    cmsIT8GetDataRowColDbl
+cmsIT8GetPatchName                       =    cmsIT8GetPatchName
+cmsIT8GetPatchByName                     =    cmsIT8GetPatchByName
+cmsIT8GetProperty                        =    cmsIT8GetProperty
+cmsIT8GetPropertyDbl                     =    cmsIT8GetPropertyDbl
+cmsIT8GetPropertyMulti                   =    cmsIT8GetPropertyMulti
+cmsIT8GetSheetType                       =    cmsIT8GetSheetType
+cmsIT8LoadFromFile                       =    cmsIT8LoadFromFile
+cmsIT8LoadFromMem                        =    cmsIT8LoadFromMem
+cmsIT8SaveToFile                         =    cmsIT8SaveToFile
+cmsIT8SaveToMem                          =    cmsIT8SaveToMem
+cmsIT8SetComment                         =    cmsIT8SetComment
+cmsIT8SetData                            =    cmsIT8SetData
+cmsIT8SetDataDbl                         =    cmsIT8SetDataDbl
+cmsIT8SetDataFormat                      =    cmsIT8SetDataFormat
+cmsIT8SetDataRowCol                      =    cmsIT8SetDataRowCol
+cmsIT8SetDataRowColDbl                   =    cmsIT8SetDataRowColDbl
+cmsIT8SetPropertyDbl                     =    cmsIT8SetPropertyDbl
+cmsIT8SetPropertyHex                     =    cmsIT8SetPropertyHex
+cmsIT8SetPropertyStr                     =    cmsIT8SetPropertyStr
+cmsIT8SetPropertyMulti                   =    cmsIT8SetPropertyMulti
+cmsIT8SetPropertyUncooked                =    cmsIT8SetPropertyUncooked
+cmsIT8SetSheetType                       =    cmsIT8SetSheetType
+cmsIT8SetTable                           =    cmsIT8SetTable
+cmsIT8SetTableByLabel                    =    cmsIT8SetTableByLabel
+cmsIT8SetIndexColumn                     =    cmsIT8SetIndexColumn
+cmsIT8TableCount                         =    cmsIT8TableCount
+cmsJoinToneCurve                         =    cmsJoinToneCurve
+cmsLab2LCh                               =    cmsLab2LCh
+cmsLab2XYZ                               =    cmsLab2XYZ
+cmsLabEncoded2Float                      =    cmsLabEncoded2Float
+cmsLabEncoded2FloatV2                    =    cmsLabEncoded2FloatV2
+cmsLCh2Lab                               =    cmsLCh2Lab
+_cmsLCMScolorSpace                       =    _cmsLCMScolorSpace
+cmsLinkTag                               =    cmsLinkTag
+cmsTagLinkedTo                           =    cmsTagLinkedTo
+cmsPipelineAlloc                         =    cmsPipelineAlloc
+cmsPipelineCat                           =    cmsPipelineCat
+cmsPipelineCheckAndRetreiveStages        =    cmsPipelineCheckAndRetreiveStages
+cmsPipelineDup                           =    cmsPipelineDup
+cmsPipelineStageCount                    =    cmsPipelineStageCount
+cmsPipelineEval16                        =    cmsPipelineEval16
+cmsPipelineEvalFloat                     =    cmsPipelineEvalFloat
+cmsPipelineEvalReverseFloat              =    cmsPipelineEvalReverseFloat
+cmsPipelineFree                          =    cmsPipelineFree
+cmsPipelineGetPtrToFirstStage            =    cmsPipelineGetPtrToFirstStage
+cmsPipelineGetPtrToLastStage             =    cmsPipelineGetPtrToLastStage
+cmsPipelineInputChannels                 =    cmsPipelineInputChannels
+cmsPipelineInsertStage                   =    cmsPipelineInsertStage
+cmsPipelineOutputChannels                =    cmsPipelineOutputChannels
+cmsPipelineSetSaveAs8bitsFlag            =    cmsPipelineSetSaveAs8bitsFlag
+_cmsPipelineSetOptimizationParameters    =    _cmsPipelineSetOptimizationParameters
+cmsPipelineUnlinkStage                   =    cmsPipelineUnlinkStage
+_cmsMalloc                               =    _cmsMalloc
+_cmsMallocZero                           =    _cmsMallocZero
+_cmsMAT3eval                             =    _cmsMAT3eval
+_cmsMAT3identity                         =    _cmsMAT3identity
+_cmsMAT3inverse                          =    _cmsMAT3inverse
+_cmsMAT3isIdentity                       =    _cmsMAT3isIdentity
+_cmsMAT3per                              =    _cmsMAT3per
+_cmsMAT3solve                            =    _cmsMAT3solve
+cmsMD5computeID                          =    cmsMD5computeID
+cmsMLUalloc                              =    cmsMLUalloc
+cmsMLUdup                                =    cmsMLUdup
+cmsMLUfree                               =    cmsMLUfree
+cmsMLUgetASCII                           =    cmsMLUgetASCII
+cmsMLUgetTranslation                     =    cmsMLUgetTranslation
+cmsMLUgetWide                            =    cmsMLUgetWide
+cmsMLUsetASCII                           =    cmsMLUsetASCII
+cmsMLUsetWide                            =    cmsMLUsetWide
+cmsStageAllocCLut16bit                   =    cmsStageAllocCLut16bit
+cmsStageAllocCLut16bitGranular           =    cmsStageAllocCLut16bitGranular
+cmsStageAllocCLutFloat                   =    cmsStageAllocCLutFloat
+cmsStageAllocCLutFloatGranular           =    cmsStageAllocCLutFloatGranular
+cmsStageAllocToneCurves                  =    cmsStageAllocToneCurves
+cmsStageAllocIdentity                    =    cmsStageAllocIdentity
+cmsStageAllocMatrix                      =    cmsStageAllocMatrix
+_cmsStageAllocPlaceholder                =    _cmsStageAllocPlaceholder
+cmsStageDup                              =    cmsStageDup
+cmsStageFree                             =    cmsStageFree
+cmsStageNext                             =    cmsStageNext
+cmsStageInputChannels                    =    cmsStageInputChannels
+cmsStageOutputChannels                   =    cmsStageOutputChannels
+cmsStageSampleCLut16bit                  =    cmsStageSampleCLut16bit
+cmsStageSampleCLutFloat                  =    cmsStageSampleCLutFloat
+cmsStageType                             =    cmsStageType
+cmsStageData                             =    cmsStageData
+cmsNamedColorCount                       =    cmsNamedColorCount
+cmsNamedColorIndex                       =    cmsNamedColorIndex
+cmsNamedColorInfo                        =    cmsNamedColorInfo
+cmsOpenIOhandlerFromFile                 =    cmsOpenIOhandlerFromFile
+cmsOpenIOhandlerFromMem                  =    cmsOpenIOhandlerFromMem
+cmsOpenIOhandlerFromNULL                 =    cmsOpenIOhandlerFromNULL
+cmsOpenIOhandlerFromStream               =    cmsOpenIOhandlerFromStream
+cmsOpenProfileFromFile                   =    cmsOpenProfileFromFile
+cmsOpenProfileFromFileTHR                =    cmsOpenProfileFromFileTHR
+cmsOpenProfileFromIOhandlerTHR           =    cmsOpenProfileFromIOhandlerTHR
+cmsOpenProfileFromMem                    =    cmsOpenProfileFromMem
+cmsOpenProfileFromMemTHR                 =    cmsOpenProfileFromMemTHR
+cmsOpenProfileFromStream                 =    cmsOpenProfileFromStream
+cmsOpenProfileFromStreamTHR              =    cmsOpenProfileFromStreamTHR
+cmsPlugin                                =    cmsPlugin
+_cmsRead15Fixed16Number                  =    _cmsRead15Fixed16Number
+_cmsReadAlignment                        =    _cmsReadAlignment
+_cmsReadFloat32Number                    =    _cmsReadFloat32Number
+cmsReadRawTag                            =    cmsReadRawTag
+cmsReadTag                               =    cmsReadTag
+_cmsReadTypeBase                         =    _cmsReadTypeBase
+_cmsReadUInt16Array                      =    _cmsReadUInt16Array
+_cmsReadUInt16Number                     =    _cmsReadUInt16Number
+_cmsReadUInt32Number                     =    _cmsReadUInt32Number
+_cmsReadUInt64Number                     =    _cmsReadUInt64Number
+_cmsReadUInt8Number                      =    _cmsReadUInt8Number
+_cmsReadXYZNumber                        =    _cmsReadXYZNumber
+_cmsRealloc                              =    _cmsRealloc
+cmsReverseToneCurve                      =    cmsReverseToneCurve
+cmsReverseToneCurveEx                    =    cmsReverseToneCurveEx
+cmsSaveProfileToFile                     =    cmsSaveProfileToFile
+cmsSaveProfileToIOhandler                =    cmsSaveProfileToIOhandler
+cmsSaveProfileToMem                      =    cmsSaveProfileToMem
+cmsSaveProfileToStream                   =    cmsSaveProfileToStream
+cmsSetAdaptationState                    =    cmsSetAdaptationState
+cmsSetAlarmCodes                         =    cmsSetAlarmCodes
+cmsSetColorSpace                         =    cmsSetColorSpace
+cmsSetDeviceClass                        =    cmsSetDeviceClass
+cmsSetEncodedICCversion                  =    cmsSetEncodedICCversion
+cmsSetHeaderAttributes                   =    cmsSetHeaderAttributes
+cmsSetHeaderFlags                        =    cmsSetHeaderFlags
+cmsSetHeaderManufacturer                 =    cmsSetHeaderManufacturer
+cmsSetHeaderModel                        =    cmsSetHeaderModel
+cmsSetHeaderProfileID                    =    cmsSetHeaderProfileID
+cmsSetHeaderRenderingIntent              =    cmsSetHeaderRenderingIntent
+cmsSetLogErrorHandler                    =    cmsSetLogErrorHandler
+cmsSetPCS                                =    cmsSetPCS
+cmsSetProfileVersion                     =    cmsSetProfileVersion
+cmsSignalError                           =    cmsSignalError
+cmsSmoothToneCurve                       =    cmsSmoothToneCurve
+cmsstrcasecmp                            =    cmsstrcasecmp
+cmsTempFromWhitePoint                    =    cmsTempFromWhitePoint
+cmsTransform2DeviceLink                  =    cmsTransform2DeviceLink
+cmsUnregisterPlugins                     =    cmsUnregisterPlugins
+_cmsVEC3cross                            =    _cmsVEC3cross
+_cmsVEC3distance                         =    _cmsVEC3distance
+_cmsVEC3dot                              =    _cmsVEC3dot
+_cmsVEC3init                             =    _cmsVEC3init
+_cmsVEC3length                           =    _cmsVEC3length
+_cmsVEC3minus                            =    _cmsVEC3minus
+cmsWhitePointFromTemp                    =    cmsWhitePointFromTemp
+_cmsWrite15Fixed16Number                 =    _cmsWrite15Fixed16Number
+_cmsWriteAlignment                       =    _cmsWriteAlignment
+_cmsWriteFloat32Number                   =    _cmsWriteFloat32Number
+cmsWriteRawTag                           =    cmsWriteRawTag
+cmsWriteTag                              =    cmsWriteTag
+_cmsWriteTypeBase                        =    _cmsWriteTypeBase
+_cmsWriteUInt16Array                     =    _cmsWriteUInt16Array
+_cmsWriteUInt16Number                    =    _cmsWriteUInt16Number
+_cmsWriteUInt32Number                    =    _cmsWriteUInt32Number
+_cmsWriteUInt64Number                    =    _cmsWriteUInt64Number
+_cmsWriteUInt8Number                     =    _cmsWriteUInt8Number
+_cmsWriteXYZNumber                       =    _cmsWriteXYZNumber
+cmsxyY2XYZ                               =   cmsxyY2XYZ
+cmsXYZ2Lab                               =   cmsXYZ2Lab
+cmsXYZ2xyY                               =   cmsXYZ2xyY
+cmsXYZEncoded2Float                      =   cmsXYZEncoded2Float
+cmsSliceSpace16                          =   cmsSliceSpace16
+cmsSliceSpaceFloat                       =   cmsSliceSpaceFloat
+cmsChangeBuffersFormat                   =   cmsChangeBuffersFormat
+cmsDictAlloc                             =   cmsDictAlloc
+cmsDictFree                              =   cmsDictFree
+cmsDictDup                               =   cmsDictDup
+cmsDictAddEntry                          =   cmsDictAddEntry
+cmsDictGetEntryList                      =   cmsDictGetEntryList
+cmsDictNextEntry                         =   cmsDictNextEntry
+_cmsGetTransformUserData                 =   _cmsGetTransformUserData
+_cmsSetTransformUserData                 =   _cmsSetTransformUserData
+_cmsGetTransformFormatters16             =   _cmsGetTransformFormatters16
+_cmsGetTransformFormattersFloat          =   _cmsGetTransformFormattersFloat
+cmsGetHeaderCreator                      =   cmsGetHeaderCreator
+cmsPluginTHR                             =   cmsPluginTHR
+cmsGetPipelineContextID                  =   cmsGetPipelineContextID
+cmsGetTransformInputFormat               =   cmsGetTransformInputFormat
+cmsGetTransformOutputFormat              =   cmsGetTransformOutputFormat
+cmsCreateContext                         =   cmsCreateContext            
+cmsDupContext                            =   cmsDupContext               
+cmsDeleteContext                         =   cmsDeleteContext              
+cmsGetContextUserData                    =   cmsGetContextUserData       
+cmsUnregisterPluginsTHR                  =   cmsUnregisterPluginsTHR 
+cmsSetAlarmCodesTHR                      =   cmsSetAlarmCodesTHR     
+cmsGetAlarmCodesTHR                      =   cmsGetAlarmCodesTHR
+cmsSetAdaptationStateTHR                 =   cmsSetAdaptationStateTHR
+cmsSetLogErrorHandlerTHR                 =   cmsSetLogErrorHandlerTHR
+cmsGetSupportedIntentsTHR                =   cmsGetSupportedIntentsTHR
+cmsMLUtranslationsCount                  =   cmsMLUtranslationsCount
+cmsMLUtranslationsCodes                  =   cmsMLUtranslationsCodes
+_cmsCreateMutex                          =   _cmsCreateMutex 
+_cmsDestroyMutex                         =   _cmsDestroyMutex
+_cmsLockMutex                            =   _cmsLockMutex   
+_cmsUnlockMutex                          =   _cmsUnlockMutex 
+cmsGetProfileIOhandler                   =   cmsGetProfileIOhandler
+cmsGetEncodedCMMversion                  =   cmsGetEncodedCMMversion
diff --git a/third-party/libjxl/libjxl/third_party/lcms/src/lcms2_internal.h b/third-party/libjxl/libjxl/third_party/lcms/src/lcms2_internal.h
new file mode 100644
index 0000000000..ceab205685
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/src/lcms2_internal.h
@@ -0,0 +1,1118 @@
+
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#ifndef _lcms_internal_H
+
+// Include plug-in foundation
+#ifndef _lcms_plugin_H
+#   include "lcms2_plugin.h"
+#endif
+
+// ctype is part of C99 as per 7.1.2
+#include <ctype.h>
+
+// assert macro is part of C99 as per 7.2
+#include <assert.h>
+
+// Some needed constants
+#ifndef M_PI
+#       define M_PI        3.14159265358979323846
+#endif
+
+#ifndef M_LOG10E
+#       define M_LOG10E    0.434294481903251827651
+#endif
+
+// BorlandC 5.5, VC2003 are broken on that
+#if defined(__BORLANDC__) || (_MSC_VER < 1400) // 1400 == VC++ 8.0
+#define sinf(x) (float)sin((float)x)
+#define sqrtf(x) (float)sqrt((float)x)
+#endif
+
+
+// Alignment of ICC file format uses 4 bytes (cmsUInt32Number)
+#define _cmsALIGNLONG(x) (((x)+(sizeof(cmsUInt32Number)-1)) & ~(sizeof(cmsUInt32Number)-1))
+
+// Alignment to memory pointer
+
+// (Ultra)SPARC with gcc requires ptr alignment of 8 bytes
+// even though sizeof(void *) is only four: for greatest flexibility
+// allow the build to specify ptr alignment.
+#ifndef CMS_PTR_ALIGNMENT
+# define CMS_PTR_ALIGNMENT sizeof(void *)
+#endif
+
+#define _cmsALIGNMEM(x)  (((x)+(CMS_PTR_ALIGNMENT - 1)) & ~(CMS_PTR_ALIGNMENT - 1))
+
+// Maximum encodeable values in floating point
+#define MAX_ENCODEABLE_XYZ  (1.0 + 32767.0/32768.0)
+#define MIN_ENCODEABLE_ab2  (-128.0)
+#define MAX_ENCODEABLE_ab2  ((65535.0/256.0) - 128.0)
+#define MIN_ENCODEABLE_ab4  (-128.0)
+#define MAX_ENCODEABLE_ab4  (127.0)
+
+// Maximum of channels for internal pipeline evaluation
+#define MAX_STAGE_CHANNELS  128
+
+// Unused parameter warning suppression
+#define cmsUNUSED_PARAMETER(x) ((void)x)
+
+// The specification for "inline" is section 6.7.4 of the C99 standard (ISO/IEC 9899:1999).
+// unfortunately VisualC++ does not conform that
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+#   define cmsINLINE __inline
+#else
+#   define cmsINLINE static inline
+#endif
+
+// Allow signed overflow, we know this is harmless in this particular context 
+#if defined(__clang__)
+#   define CMS_NO_SANITIZE __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+#   define CMS_NO_SANITIZE 
+#endif
+
+// Other replacement functions
+#ifdef _MSC_VER
+# ifndef snprintf
+#       define snprintf  _snprintf
+# endif
+# ifndef vsnprintf
+#       define vsnprintf  _vsnprintf
+# endif
+
+/// Properly define some macros to accommodate
+/// older MSVC versions.
+# if _MSC_VER <= 1700
+        #include <float.h>
+        #define isnan _isnan
+        #define isinf(x) (!_finite((x)))
+# endif
+
+#endif
+
+// A fast way to convert from/to 16 <-> 8 bits
+#define FROM_8_TO_16(rgb) (cmsUInt16Number) ((((cmsUInt16Number) (rgb)) << 8)|(rgb))
+#define FROM_16_TO_8(rgb) (cmsUInt8Number) ((((cmsUInt32Number)(rgb) * 65281U + 8388608U) >> 24) & 0xFFU)
+
+// Code analysis is broken on asserts
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1500)
+#            define _cmsAssert(a)  { assert((a)); __analysis_assume((a)); }
+#     else
+#            define _cmsAssert(a)   assert((a))
+#     endif
+#else
+#      define _cmsAssert(a)   assert((a))
+#endif
+
+//---------------------------------------------------------------------------------
+
+// Determinant lower than that are assumed zero (used on matrix invert)
+#define MATRIX_DET_TOLERANCE    0.0001
+
+//---------------------------------------------------------------------------------
+
+// Fixed point
+#define FIXED_TO_INT(x)         ((x)>>16)
+#define FIXED_REST_TO_INT(x)    ((x)&0xFFFFU)
+#define ROUND_FIXED_TO_INT(x)   (((x)+0x8000)>>16)
+
+cmsINLINE cmsS15Fixed16Number _cmsToFixedDomain(int a)                   { return a + ((a + 0x7fff) / 0xffff); }
+cmsINLINE int                 _cmsFromFixedDomain(cmsS15Fixed16Number a) { return a - ((a + 0x7fff) >> 16); }
+
+// -----------------------------------------------------------------------------------------------------------
+
+// Fast floor conversion logic. Thanks to Sree Kotay and Stuart Nixon
+// note than this only works in the range ..-32767...+32767 because
+// mantissa is interpreted as 15.16 fixed point.
+// The union is to avoid pointer aliasing overoptimization.
+cmsINLINE int _cmsQuickFloor(cmsFloat64Number val)
+{
+#ifdef CMS_DONT_USE_FAST_FLOOR
+    return (int) floor(val);
+#else
+    const cmsFloat64Number _lcms_double2fixmagic = 68719476736.0 * 1.5;  // 2^36 * 1.5, (52-16=36) uses limited precision to floor
+    union {
+        cmsFloat64Number val;
+        int halves[2];
+    } temp;
+
+    temp.val = val + _lcms_double2fixmagic;
+
+#ifdef CMS_USE_BIG_ENDIAN
+    return temp.halves[1] >> 16;
+#else
+    return temp.halves[0] >> 16;
+#endif
+#endif
+}
+
+// Fast floor restricted to 0..65535.0
+cmsINLINE cmsUInt16Number _cmsQuickFloorWord(cmsFloat64Number d)
+{
+    return (cmsUInt16Number) _cmsQuickFloor(d - 32767.0) + 32767U;
+}
+
+// Floor to word, taking care of saturation
+cmsINLINE cmsUInt16Number _cmsQuickSaturateWord(cmsFloat64Number d)
+{
+    d += 0.5;
+    if (d <= 0) return 0;
+    if (d >= 65535.0) return 0xffff;
+
+    return _cmsQuickFloorWord(d);
+}
+
+// Test bed entry points---------------------------------------------------------------
+#define CMSCHECKPOINT CMSAPI
+
+// Pthread support --------------------------------------------------------------------
+#ifndef CMS_NO_PTHREADS
+
+// This is the threading support. Unfortunately, it has to be platform-dependent because 
+// windows does not support pthreads. 
+#ifdef CMS_IS_WINDOWS_
+
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+
+
+// The locking scheme in LCMS requires a single 'top level' mutex
+// to work. This is actually implemented on Windows as a
+// CriticalSection, because they are lighter weight. With
+// pthreads, this is statically inited. Unfortunately, windows
+// can't officially statically init critical sections.
+//
+// We can work around this in 2 ways.
+//
+// 1) We can use a proper mutex purely to protect the init
+// of the CriticalSection. This in turns requires us to protect
+// the Mutex creation, which we can do using the snappily
+// named InterlockedCompareExchangePointer API (present on
+// windows XP and above).
+//
+// 2) In cases where we want to work on pre-Windows XP, we
+// can use an even more horrible hack described below.
+//
+// So why wouldn't we always use 2)? Because not calling
+// the init function for a critical section means it fails
+// testing with ApplicationVerifier (and presumably similar
+// tools).
+//
+// We therefore default to 1, and people who want to be able
+// to run on pre-Windows XP boxes can build with:
+//     CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// defined. This is automatically set for builds using
+// versions of MSVC that don't have this API available.
+//
+// From: http://locklessinc.com/articles/pthreads_on_windows/
+// The pthreads API has an initialization macro that has no correspondence to anything in 
+// the windows API. By investigating the internal definition of the critical section type, 
+// one may work out how to initialize one without calling InitializeCriticalSection(). 
+// The trick here is that InitializeCriticalSection() is not allowed to fail. It tries 
+// to allocate a critical section debug object, but if no memory is available, it sets 
+// the pointer to a specific value. (One would expect that value to be NULL, but it is 
+// actually (void *)-1 for some reason.) Thus we can use this special value for that 
+// pointer, and the critical section code will work.
+
+// The other important part of the critical section type to initialize is the number 
+// of waiters. This controls whether or not the mutex is locked. Fortunately, this 
+// part of the critical section is unlikely to change. Apparently, many programs 
+// already test critical sections to see if they are locked using this value, so 
+// Microsoft felt that it was necessary to keep it set at -1 for an unlocked critical
+// section, even when they changed the underlying algorithm to be more scalable. 
+// The final parts of the critical section object are unimportant, and can be set 
+// to zero for their defaults. This yields to an initialization macro:
+
+typedef CRITICAL_SECTION _cmsMutex;
+
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1800)
+#          pragma warning(disable : 26135)
+#    endif
+#endif
+
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// If we are building with a version of MSVC smaller
+// than 1400 (i.e. before VS2005) then we don't have
+// the InterlockedCompareExchangePointer API, so use
+// the old version.
+#    ifdef _MSC_VER
+#       if _MSC_VER < 1400
+#          define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#       endif
+#    endif
+#endif
+
+#ifdef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG) -1,-1,0,0,0,0}
+#else
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG)NULL,-1,0,0,0,0}
+#endif
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	InitializeCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	DeleteCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+
+#else
+
+// Rest of the wide world
+#include <pthread.h>
+
+#define CMS_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+typedef pthread_mutex_t _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_init(m, NULL);
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_destroy(m);
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+
+#endif
+#else
+
+#define CMS_MUTEX_INITIALIZER 0
+typedef int _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+#endif
+
+// Plug-In registration ---------------------------------------------------------------
+
+// Specialized function for plug-in memory management. No pairing free() since whole pool is freed at once.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size);
+
+// Memory management
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Interpolation
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Parametric curves
+cmsBool  _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag type management
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag management
+cmsBool  _cmsRegisterTagPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Intent management
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Multi Process elements
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Optimization
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Mutex
+cmsBool _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Suballocators. 
+typedef struct _cmsSubAllocator_chunk_st {
+
+    cmsUInt8Number* Block;
+    cmsUInt32Number BlockSize;
+    cmsUInt32Number Used;
+
+    struct _cmsSubAllocator_chunk_st* next;
+
+} _cmsSubAllocator_chunk;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    _cmsSubAllocator_chunk* h;
+
+} _cmsSubAllocator;
+
+
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial);
+void              _cmsSubAllocDestroy(_cmsSubAllocator* s);
+void*             _cmsSubAlloc(_cmsSubAllocator* s, cmsUInt32Number size);
+void*             _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size);
+
+// ----------------------------------------------------------------------------------
+
+// The context clients. 
+typedef enum {
+
+    UserPtr,            // User-defined pointer
+    Logger,
+    AlarmCodesContext,
+    AdaptationStateContext, 
+    MemPlugin,
+    InterpPlugin,
+    CurvesPlugin,
+    FormattersPlugin,
+    TagTypePlugin,
+    TagPlugin,
+    IntentPlugin,
+    MPEPlugin,
+    OptimizationPlugin,
+    TransformPlugin,
+    MutexPlugin,
+
+    // Last in list
+    MemoryClientMax
+
+} _cmsMemoryClient;
+
+
+// Container for memory management plug-in.
+typedef struct {
+
+    _cmsMallocFnPtrType     MallocPtr;    
+    _cmsMalloZerocFnPtrType MallocZeroPtr;
+    _cmsFreeFnPtrType       FreePtr;
+    _cmsReallocFnPtrType    ReallocPtr;
+    _cmsCallocFnPtrType     CallocPtr;
+    _cmsDupFnPtrType        DupPtr;
+
+} _cmsMemPluginChunkType;
+
+// Copy memory management function pointers from plug-in to chunk, taking care of missing routines
+void  _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr);
+
+// Internal structure for context
+struct _cmsContext_struct {
+    
+    struct _cmsContext_struct* Next;  // Points to next context in the new style
+    _cmsSubAllocator* MemPool;        // The memory pool that stores context data
+    
+    void* chunks[MemoryClientMax];    // array of pointers to client chunks. Memory itself is hold in the suballocator. 
+                                      // If NULL, then it reverts to global Context0
+
+    _cmsMemPluginChunkType DefaultMemoryManager;  // The allocators used for creating the context itself. Cannot be overridden
+};
+
+// Returns a pointer to a valid context structure, including the global one if id is zero. 
+// Verifies the magic number.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID);
+
+// Returns the block assigned to the specific zone. 
+void*     _cmsContextGetClientChunk(cmsContext id, _cmsMemoryClient mc);
+
+
+// Chunks of context memory by plug-in client -------------------------------------------------------
+
+// Those structures encapsulates all variables needed by the several context clients (mostly plug-ins)
+
+// Container for error logger -- not a plug-in
+typedef struct {
+
+    cmsLogErrorHandlerFunction LogErrorHandler;  // Set to NULL for Context0 fallback
+
+} _cmsLogErrorChunkType;
+
+// The global Context0 storage for error logger
+extern  _cmsLogErrorChunkType  _cmsLogErrorChunk;
+
+// Allocate and init error logger container. 
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for alarm codes -- not a plug-in
+typedef struct {
+   
+    cmsUInt16Number AlarmCodes[cmsMAXCHANNELS];
+
+} _cmsAlarmCodesChunkType;
+
+// The global Context0 storage for alarm codes
+extern  _cmsAlarmCodesChunkType _cmsAlarmCodesChunk;
+
+// Allocate and init alarm codes container. 
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for adaptation state -- not a plug-in
+typedef struct {
+    
+    cmsFloat64Number  AdaptationState;
+
+} _cmsAdaptationStateChunkType;
+
+// The global Context0 storage for adaptation state
+extern  _cmsAdaptationStateChunkType    _cmsAdaptationStateChunk;
+
+// Allocate and init adaptation state container.
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src);
+
+
+// The global Context0 storage for memory management
+extern  _cmsMemPluginChunkType _cmsMemPluginChunk;
+
+// Allocate and init memory management container.
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, 
+                             const struct _cmsContext_struct* src);
+
+// Container for interpolation plug-in
+typedef struct {
+
+    cmsInterpFnFactory Interpolators;
+
+} _cmsInterpPluginChunkType;
+
+// The global Context0 storage for interpolation plug-in
+extern  _cmsInterpPluginChunkType _cmsInterpPluginChunk;
+
+// Allocate and init interpolation container.
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src);
+
+// Container for parametric curves plug-in
+typedef struct {
+
+    struct _cmsParametricCurvesCollection_st* ParametricCurves;
+
+} _cmsCurvesPluginChunkType;
+
+// The global Context0 storage for tone curves plug-in
+extern  _cmsCurvesPluginChunkType _cmsCurvesPluginChunk;
+
+// Allocate and init parametric curves container.
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src);
+
+// Container for formatters plug-in
+typedef struct {
+
+    struct _cms_formatters_factory_list* FactoryList;
+
+} _cmsFormattersPluginChunkType;
+
+// The global Context0 storage for formatters plug-in
+extern  _cmsFormattersPluginChunkType _cmsFormattersPluginChunk;
+
+// Allocate and init formatters container.
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                                       const struct _cmsContext_struct* src);
+
+// This chunk type is shared by TagType plug-in and MPE Plug-in
+typedef struct {
+
+    struct _cmsTagTypeLinkedList_st* TagTypes;
+
+} _cmsTagTypePluginChunkType;
+
+
+// The global Context0 storage for tag types plug-in
+extern  _cmsTagTypePluginChunkType      _cmsTagTypePluginChunk;
+
+
+// The global Context0 storage for mult process elements plug-in
+extern  _cmsTagTypePluginChunkType      _cmsMPETypePluginChunk;
+
+// Allocate and init Tag types container.
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Allocate and init MPE container.
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Container for tag plug-in
+typedef struct {
+   
+    struct _cmsTagLinkedList_st* Tag;
+
+} _cmsTagPluginChunkType;
+
+
+// The global Context0 storage for tag plug-in
+extern  _cmsTagPluginChunkType _cmsTagPluginChunk;
+
+// Allocate and init Tag container.
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src); 
+
+// Container for intents plug-in
+typedef struct {
+
+    struct _cms_intents_list* Intents;
+
+} _cmsIntentsPluginChunkType;
+
+
+// The global Context0 storage for intents plug-in
+extern  _cmsIntentsPluginChunkType _cmsIntentsPluginChunk;
+
+// Allocate and init intents container.
+void _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src); 
+
+// Container for optimization plug-in
+typedef struct {
+
+    struct _cmsOptimizationCollection_st* OptimizationCollection;
+
+} _cmsOptimizationPluginChunkType;
+
+
+// The global Context0 storage for optimizers plug-in
+extern  _cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk;
+
+// Allocate and init optimizers container.
+void _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src);
+
+// Container for transform plug-in
+typedef struct {
+
+    struct _cmsTransformCollection_st* TransformCollection;
+
+} _cmsTransformPluginChunkType;
+
+// The global Context0 storage for full-transform replacement plug-in
+extern  _cmsTransformPluginChunkType _cmsTransformPluginChunk;
+
+// Allocate and init transform container.
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// Container for mutex plug-in
+typedef struct {
+
+    _cmsCreateMutexFnPtrType  CreateMutexPtr;
+    _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+    _cmsLockMutexFnPtrType    LockMutexPtr;
+    _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+} _cmsMutexPluginChunkType;
+
+// The global Context0 storage for mutex plug-in
+extern  _cmsMutexPluginChunkType _cmsMutexPluginChunk;
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// ----------------------------------------------------------------------------------
+// MLU internal representation
+typedef struct {
+
+    cmsUInt16Number Language;
+    cmsUInt16Number Country;
+
+    cmsUInt32Number StrW;       // Offset to current unicode string
+    cmsUInt32Number Len;        // Length in bytes
+
+} _cmsMLUentry;
+
+struct _cms_MLU_struct {
+
+    cmsContext ContextID;
+
+    // The directory
+    cmsUInt32Number  AllocatedEntries;
+    cmsUInt32Number  UsedEntries;
+    _cmsMLUentry* Entries;     // Array of pointers to strings allocated in MemPool
+
+    // The Pool
+    cmsUInt32Number PoolSize;  // The maximum allocated size
+    cmsUInt32Number PoolUsed;  // The used size
+    void*  MemPool;            // Pointer to begin of memory pool
+};
+
+// Named color list internal representation
+typedef struct {
+
+    char Name[cmsMAX_PATH];
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number DeviceColorant[cmsMAXCHANNELS];
+
+} _cmsNAMEDCOLOR;
+
+struct _cms_NAMEDCOLORLIST_struct {
+
+    cmsUInt32Number nColors;
+    cmsUInt32Number Allocated;
+    cmsUInt32Number ColorantCount;
+
+    char Prefix[33];      // Prefix and suffix are defined to be 32 characters at most
+    char Suffix[33];
+
+    _cmsNAMEDCOLOR* List;
+
+    cmsContext ContextID;
+};
+
+
+// ----------------------------------------------------------------------------------
+
+// This is the internal struct holding profile details.
+
+// Maximum supported tags in a profile
+#define MAX_TABLE_TAG       100
+
+typedef struct _cms_iccprofile_struct {
+
+    // I/O handler
+    cmsIOHANDLER*            IOhandler;
+
+    // The thread ID
+    cmsContext               ContextID;
+
+    // Creation time
+    struct tm                Created;
+
+    // Only most important items found in ICC profiles
+    cmsUInt32Number          Version;
+    cmsProfileClassSignature DeviceClass;
+    cmsColorSpaceSignature   ColorSpace;
+    cmsColorSpaceSignature   PCS;
+    cmsUInt32Number          RenderingIntent;
+
+    cmsUInt32Number          flags;
+    cmsUInt32Number          manufacturer, model;
+    cmsUInt64Number          attributes;
+    cmsUInt32Number          creator;
+
+    cmsProfileID             ProfileID;
+
+    // Dictionary
+    cmsUInt32Number          TagCount;
+    cmsTagSignature          TagNames[MAX_TABLE_TAG];
+    cmsTagSignature          TagLinked[MAX_TABLE_TAG];           // The tag to which is linked (0=none)
+    cmsUInt32Number          TagSizes[MAX_TABLE_TAG];            // Size on disk
+    cmsUInt32Number          TagOffsets[MAX_TABLE_TAG];
+    cmsBool                  TagSaveAsRaw[MAX_TABLE_TAG];        // True to write uncooked
+    void *                   TagPtrs[MAX_TABLE_TAG];
+    cmsTagTypeHandler*       TagTypeHandlers[MAX_TABLE_TAG];     // Same structure may be serialized on different types
+                                                                 // depending on profile version, so we keep track of the
+                                                                 // type handler for each tag in the list.
+    // Special
+    cmsBool                  IsWrite;
+
+    // Keep a mutex for cmsReadTag -- Note that this only works if the user includes a mutex plugin
+    void *                   UsrMutex;
+
+} _cmsICCPROFILE;
+
+// IO helpers for profiles
+cmsBool              _cmsReadHeader(_cmsICCPROFILE* Icc);
+cmsBool              _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace);
+int                  _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks);
+
+// Tag types
+cmsTagTypeHandler*   _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig);
+cmsTagTypeSignature  _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig);
+cmsTagDescriptor*    _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig);
+
+// Error logging ---------------------------------------------------------------------------------------------------------
+
+void                 _cmsTagSignature2String(char String[5], cmsTagSignature sig);
+
+// Interpolation ---------------------------------------------------------------------------------------------------------
+
+CMSCHECKPOINT cmsInterpParams* CMSEXPORT _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+cmsInterpParams*                         _cmsComputeInterpParamsEx(cmsContext ContextID, const cmsUInt32Number nSamples[], cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+CMSCHECKPOINT void             CMSEXPORT _cmsFreeInterpParams(cmsInterpParams* p);
+cmsBool                                  _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p);
+
+// Curves ----------------------------------------------------------------------------------------------------------------
+
+// This struct holds information about a segment, plus a pointer to the function that implements the evaluation.
+// In the case of table-based, Eval pointer is set to NULL
+
+// The gamma function main structure
+struct _cms_curve_struct {
+
+    cmsInterpParams*  InterpParams;  // Private optimizations for interpolation
+
+    cmsUInt32Number   nSegments;     // Number of segments in the curve. Zero for a 16-bit based tables
+    cmsCurveSegment*  Segments;      // The segments
+    cmsInterpParams** SegInterp;     // Array of private optimizations for interpolation in table-based segments
+
+    cmsParametricCurveEvaluator* Evals;  // Evaluators (one per segment)
+
+    // 16 bit Table-based representation follows
+    cmsUInt32Number    nEntries;      // Number of table elements
+    cmsUInt16Number*   Table16;       // The table itself.
+};
+
+
+//  Pipelines & Stages ---------------------------------------------------------------------------------------------
+
+// A single stage
+struct _cmsStage_struct {
+
+    cmsContext          ContextID;
+
+    cmsStageSignature   Type;           // Identifies the stage
+    cmsStageSignature   Implements;     // Identifies the *function* of the stage (for optimizations)
+
+    cmsUInt32Number     InputChannels;  // Input channels -- for optimization purposes
+    cmsUInt32Number     OutputChannels; // Output channels -- for optimization purposes
+
+    _cmsStageEvalFn     EvalPtr;        // Points to fn that evaluates the stage (always in floating point)
+    _cmsStageDupElemFn  DupElemPtr;     // Points to a fn that duplicates the *data* of the stage
+    _cmsStageFreeElemFn FreePtr;        // Points to a fn that sets the *data* of the stage free
+
+    // A generic pointer to whatever memory needed by the stage
+    void*               Data;
+
+    // Maintains linked list (used internally)
+    struct _cmsStage_struct* Next;
+};
+
+
+// Special Stages (cannot be saved)
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLab2XYZ(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocXYZ2Lab(cmsContext ContextID);
+cmsStage*                          _cmsStageAllocLabPrelin(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLabV2ToV4(cmsContext ContextID);
+cmsStage*                          _cmsStageAllocLabV2ToV4curves(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLabV4ToV2(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan);
+cmsStage*                          _cmsStageNormalizeFromLabFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeFromXyzFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeToLabFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeToXyzFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels);
+
+
+// For curve set only
+cmsToneCurve**     _cmsStageGetPtrToCurveSet(const cmsStage* mpe);
+
+
+// Pipeline Evaluator (in floating point)
+typedef void (* _cmsPipelineEvalFloatFn)(const cmsFloat32Number In[],
+                                         cmsFloat32Number Out[],
+                                         const void* Data);
+
+struct _cmsPipeline_struct {
+
+    cmsStage* Elements;                                // Points to elements chain
+    cmsUInt32Number InputChannels, OutputChannels;
+
+    // Data & evaluators
+    void *Data;
+
+   _cmsOPTeval16Fn         Eval16Fn;
+   _cmsPipelineEvalFloatFn EvalFloatFn;
+   _cmsFreeUserDataFn      FreeDataFn;
+   _cmsDupUserDataFn       DupDataFn;
+
+    cmsContext ContextID;            // Environment
+
+    cmsBool  SaveAs8Bits;            // Implementation-specific: save as 8 bits if possible
+};
+
+// LUT reading & creation -------------------------------------------------------------------------------------------
+
+// Read tags using low-level function, provide necessary glue code to adapt versions, etc. All those return a brand new copy
+// of the LUTS, since ownership of original is up to the profile. The user should free allocated resources.
+
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+
+// Special values
+cmsBool           _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile);
+cmsBool           _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile);
+
+// Profile linker --------------------------------------------------------------------------------------------------
+
+cmsPipeline* _cmsLinkProfiles(cmsContext         ContextID,
+                              cmsUInt32Number    nProfiles,
+                              cmsUInt32Number    TheIntents[],
+                              cmsHPROFILE        hProfiles[],
+                              cmsBool            BPC[],
+                              cmsFloat64Number   AdaptationStates[],
+                              cmsUInt32Number    dwFlags);
+
+// Sequence --------------------------------------------------------------------------------------------------------
+
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile);
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq);
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[]);
+
+
+// LUT optimization ------------------------------------------------------------------------------------------------
+
+CMSCHECKPOINT cmsUInt16Number  CMSEXPORT _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples);
+
+cmsUInt32Number  _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags);
+
+cmsBool          _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                                      cmsUInt16Number **White,
+                                      cmsUInt16Number **Black,
+                                      cmsUInt32Number *nOutputs);
+
+cmsBool          _cmsOptimizePipeline(cmsContext ContextID,
+                                      cmsPipeline**    Lut,
+                                      cmsUInt32Number  Intent,
+                                      cmsUInt32Number* InputFormat,
+                                      cmsUInt32Number* OutputFormat,
+                                      cmsUInt32Number* dwFlags );
+
+
+// Hi level LUT building ----------------------------------------------------------------------------------------------
+
+cmsPipeline*     _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                              cmsHPROFILE hProfiles[],
+                                              cmsBool  BPC[],
+                                              cmsUInt32Number Intents[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number nGamutPCSposition,
+                                              cmsHPROFILE hGamut);
+
+
+// Formatters ------------------------------------------------------------------------------------------------------------
+
+#define cmsFLAGS_CAN_CHANGE_FORMATTER     0x02000000   // Allow change buffer format
+
+cmsBool         _cmsFormatterIsFloat(cmsUInt32Number Type);
+cmsBool         _cmsFormatterIs8bit(cmsUInt32Number Type);
+
+CMSCHECKPOINT cmsFormatter CMSEXPORT _cmsGetFormatter(cmsContext ContextID,
+                                                      cmsUInt32Number Type,          // Specific type, i.e. TYPE_RGB_8
+                                                      cmsFormatterDirection Dir,
+                                                      cmsUInt32Number dwFlags);
+
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Half float
+CMSCHECKPOINT cmsFloat32Number CMSEXPORT _cmsHalf2Float(cmsUInt16Number h);
+CMSCHECKPOINT cmsUInt16Number  CMSEXPORT _cmsFloat2Half(cmsFloat32Number flt);
+
+#endif
+
+// Transform logic ------------------------------------------------------------------------------------------------------
+
+struct _cmstransform_struct;
+
+typedef struct {
+
+    // 1-pixel cache (16 bits only)
+    cmsUInt16Number CacheIn[cmsMAXCHANNELS];
+    cmsUInt16Number CacheOut[cmsMAXCHANNELS];
+
+} _cmsCACHE;
+
+
+
+// Transformation
+typedef struct _cmstransform_struct {
+
+    cmsUInt32Number InputFormat, OutputFormat; // Keep formats for further reference
+
+    // Points to transform code
+    _cmsTransform2Fn xform;
+
+    // Formatters, cannot be embedded into LUT because cache
+    cmsFormatter16 FromInput;
+    cmsFormatter16 ToOutput;
+
+    cmsFormatterFloat FromInputFloat;
+    cmsFormatterFloat ToOutputFloat;
+
+    // 1-pixel cache seed for zero as input (16 bits, read only)
+    _cmsCACHE Cache;
+
+    // A Pipeline holding the full (optimized) transform
+    cmsPipeline* Lut;
+
+    // A Pipeline holding the gamut check. It goes from the input space to bilevel
+    cmsPipeline* GamutCheck;
+
+    // Colorant tables
+    cmsNAMEDCOLORLIST* InputColorant;       // Input Colorant table
+    cmsNAMEDCOLORLIST* OutputColorant;      // Colorant table (for n chans > CMYK)
+
+    // Informational only
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+
+    // White points (informative only)
+    cmsCIEXYZ EntryWhitePoint;
+    cmsCIEXYZ ExitWhitePoint;
+
+    // Profiles used to create the transform
+    cmsSEQ* Sequence;
+
+    cmsUInt32Number  dwOriginalFlags;
+    cmsFloat64Number AdaptationState;
+
+    // The intent of this transform. That is usually the last intent in the profilechain, but may differ
+    cmsUInt32Number RenderingIntent;
+
+    // An id that uniquely identifies the running context. May be null.
+    cmsContext ContextID;
+
+    // A user-defined pointer that can be used to store data for transform plug-ins
+    void* UserData;
+    _cmsFreeUserDataFn FreeUserData;
+
+    // A way to provide backwards compatibility with full xform plugins
+    _cmsTransformFn OldXform;
+
+} _cmsTRANSFORM;
+
+// Copies extra channels from input to output if the original flags in the transform structure
+// instructs to do so. This function is called on all standard transform functions.
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                             void* out, 
+                             cmsUInt32Number PixelsPerLine,
+                             cmsUInt32Number LineCount,
+                             const cmsStride* Stride);
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+cmsHTRANSFORM _cmsChain2Lab(cmsContext             ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext       ContextID,
+                            cmsUInt32Number        nPoints,
+                            cmsUInt32Number        nProfiles,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+cmsBool   _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll);
+
+cmsBool   _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePoint, const cmsCIExyYTRIPLE* Primaries);
+
+
+#define _lcms_internal_H
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.am
new file mode 100644
index 0000000000..50e3439845
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.am
@@ -0,0 +1,34 @@
+#
+# Makefile for building testcms
+#
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+AM_CPPFLAGS    =  -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/src
+
+check_PROGRAMS = testcms
+
+# CFLAGS = --pedantic -Wall -std=c99 -O2
+
+# The testsuite binary is statically linked. This is necessary as it uses some
+# of the internal functions that are not necessarily exported by the shared
+# library.
+testcms_LDADD = $(top_builddir)/src/liblcms2.la 
+testcms_LDFLAGS = -static @LDFLAGS@
+testcms_SOURCES = testcms2.c testplugin.c zoo_icc.c testcms2.h
+
+EXTRA_DIST = test1.icc bad.icc toosmall.icc test2.icc \
+             test3.icc test4.icc \
+             test5.icc ibm-t61.icc 
+
+check:
+	if [ $(top_srcdir) != $(top_builddir) ]; then \
+		cp $(top_srcdir)/testbed/*.ic? $(top_builddir)/testbed; \
+	fi
+	LD_LIBRARY_PATH=$(top_builddir)/src/.libs ./testcms
+	if [ $(top_srcdir) != $(top_builddir) ]; then \
+		rm -f $(top_builddir)/testbed/*.ic?; \
+	fi
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.in
new file mode 100644
index 0000000000..efc8c2b85f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/Makefile.in
@@ -0,0 +1,647 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building testcms
+#
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+check_PROGRAMS = testcms$(EXEEXT)
+subdir = testbed
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am_testcms_OBJECTS = testcms2.$(OBJEXT) testplugin.$(OBJEXT) \
+	zoo_icc.$(OBJEXT)
+testcms_OBJECTS = $(am_testcms_OBJECTS)
+testcms_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+testcms_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(testcms_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(testcms_SOURCES)
+DIST_SOURCES = $(testcms_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/src
+
+# CFLAGS = --pedantic -Wall -std=c99 -O2
+
+# The testsuite binary is statically linked. This is necessary as it uses some
+# of the internal functions that are not necessarily exported by the shared
+# library.
+testcms_LDADD = $(top_builddir)/src/liblcms2.la 
+testcms_LDFLAGS = -static @LDFLAGS@
+testcms_SOURCES = testcms2.c testplugin.c zoo_icc.c testcms2.h
+EXTRA_DIST = test1.icc bad.icc toosmall.icc test2.icc \
+             test3.icc test4.icc \
+             test5.icc ibm-t61.icc 
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign testbed/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign testbed/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+	@list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+
+testcms$(EXEEXT): $(testcms_OBJECTS) $(testcms_DEPENDENCIES) $(EXTRA_testcms_DEPENDENCIES) 
+	@rm -f testcms$(EXEEXT)
+	$(AM_V_CCLD)$(testcms_LINK) $(testcms_OBJECTS) $(testcms_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testcms2.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testplugin.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zoo_icc.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-checkPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+check:
+	if [ $(top_srcdir) != $(top_builddir) ]; then \
+		cp $(top_srcdir)/testbed/*.ic? $(top_builddir)/testbed; \
+	fi
+	LD_LIBRARY_PATH=$(top_builddir)/src/.libs ./testcms
+	if [ $(top_srcdir) != $(top_builddir) ]; then \
+		rm -f $(top_builddir)/testbed/*.ic?; \
+	fi
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/bad.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/bad.icc
new file mode 100644
index 0000000000..ddfa62291f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/bad.icc
@@ -0,0 +1,21 @@
+SHELL = /bin/sh
+
+CFLAGS = -g -O4 -fomit-frame-pointer -Wall -I../include
+
+testcms.o: testcms.c
+
+testcms: testcms.o ../src/liblcms.a
+	$(CC) $(CFLAGS) testcms.o ../src/liblcms.a -o $@ -lm
+
+all: testcms test
+
+test: testcms
+	./testcms
+
+install:
+	# Nothing to install
+	
+clean:
+	-rm testcms.o testcms testcms.exe
+
+	
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/bad_mpe.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/bad_mpe.icc
new file mode 100644
index 0000000000..5a86c6c879
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/bad_mpe.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/crayons.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/crayons.icc
new file mode 100644
index 0000000000..d04ad0d2e6
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/crayons.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/ibm-t61.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/ibm-t61.icc
new file mode 100755
index 0000000000..53b3505c66
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/ibm-t61.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/new.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/new.icc
new file mode 100644
index 0000000000..53b3505c66
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/new.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/test1.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/test1.icc
new file mode 100755
index 0000000000..d0245c813c
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/test1.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/test2.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/test2.icc
new file mode 100755
index 0000000000..73f1b5aa69
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/test2.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/test3.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/test3.icc
new file mode 100755
index 0000000000..d0e79301d7
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/test3.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/test4.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/test4.icc
new file mode 100755
index 0000000000..2270061be6
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/test4.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/test5.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/test5.icc
new file mode 100755
index 0000000000..34583ab1cd
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/test5.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.c b/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.c
new file mode 100644
index 0000000000..7bf17ad735
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.c
@@ -0,0 +1,9133 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+
+#include "testcms2.h"
+
+// On Visual Studio, use debug CRT
+#ifdef _MSC_VER
+#     include "crtdbg.h"
+#     include <io.h>
+#endif
+
+// A single check. Returns 1 if success, 0 if failed
+typedef cmsInt32Number (*TestFn)(void);
+
+// A parametric Tone curve test function
+typedef cmsFloat32Number (* dblfnptr)(cmsFloat32Number x, const cmsFloat64Number Params[]);
+
+// Some globals to keep track of error
+#define TEXT_ERROR_BUFFER_SIZE  4096
+
+static char ReasonToFailBuffer[TEXT_ERROR_BUFFER_SIZE];
+static char SubTestBuffer[TEXT_ERROR_BUFFER_SIZE];
+static cmsInt32Number TotalTests = 0, TotalFail = 0;
+static cmsBool TrappedError;
+static cmsInt32Number SimultaneousErrors;
+
+
+#define cmsmin(a, b) (((a) < (b)) ? (a) : (b))
+
+// Die, a fatal unexpected error is detected!
+void Die(const char* Reason, ...)
+{
+    va_list args;
+    va_start(args, Reason);
+    vsprintf(ReasonToFailBuffer, Reason, args);
+    va_end(args);
+    printf("\n%s\n", ReasonToFailBuffer);
+    fflush(stdout);
+    exit(1);
+}
+
+// Memory management replacement -----------------------------------------------------------------------------
+
+
+// This is just a simple plug-in for malloc, free and realloc to keep track of memory allocated,
+// maximum requested as a single block and maximum allocated at a given time. Results are printed at the end
+static cmsUInt32Number SingleHit, MaxAllocated=0, TotalMemory=0;
+
+// I'm hiding the size before the block. This is a well-known technique and probably the blocks coming from
+// malloc are built in a way similar to that, but I do on my own to be portable.
+typedef struct {
+    cmsUInt32Number KeepSize;
+    cmsContext      WhoAllocated;
+    cmsUInt32Number DontCheck;
+
+    union {
+        cmsUInt64Number HiSparc;
+
+        // '_cmsMemoryBlock' block is prepended by the
+        // allocator for any requested size. Thus, union holds
+        // "widest" type to guarantee proper '_cmsMemoryBlock'
+        // alignment for any requested size.
+
+    } alignment;
+
+
+} _cmsMemoryBlock;
+
+#define SIZE_OF_MEM_HEADER (sizeof(_cmsMemoryBlock))
+
+// This is a fake thread descriptor used to check thread integrity.
+// Basically it returns a different threadID each time it is called.
+// Then the memory management replacement functions does check if each
+// free() is being called with same ContextID used on malloc()
+static
+cmsContext DbgThread(void)
+{
+    static cmsUInt32Number n = 1;
+
+    return (cmsContext) (void*)(n++ % 0xff0);
+}
+
+// The allocate routine
+static
+void* DebugMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemoryBlock* blk;
+
+    if (size <= 0) {
+       Die("malloc requested with zero bytes");
+    }
+
+    TotalMemory += size;
+
+    if (TotalMemory > MaxAllocated)
+        MaxAllocated = TotalMemory;
+
+    if (size > SingleHit)
+        SingleHit = size;
+
+    blk = (_cmsMemoryBlock*) malloc(size + SIZE_OF_MEM_HEADER);
+    if (blk == NULL) return NULL;
+
+    blk ->KeepSize = size;
+    blk ->WhoAllocated = ContextID;
+    blk ->DontCheck = 0;
+
+    return (void*) ((cmsUInt8Number*) blk + SIZE_OF_MEM_HEADER);
+}
+
+
+// The free routine
+static
+void  DebugFree(cmsContext ContextID, void *Ptr)
+{
+    _cmsMemoryBlock* blk;
+
+    if (Ptr == NULL) {
+        Die("NULL free (which is a no-op in C, but may be an clue of something going wrong)");
+    }
+
+    blk = (_cmsMemoryBlock*) (((cmsUInt8Number*) Ptr) - SIZE_OF_MEM_HEADER);
+    TotalMemory -= blk ->KeepSize;
+
+    if (blk ->WhoAllocated != ContextID && !blk->DontCheck) {
+        Die("Trying to free memory allocated by a different thread");
+    }
+
+    free(blk);
+}
+
+
+// Reallocate, just a malloc, a copy and a free in this case.
+static
+void * DebugRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize)
+{
+    _cmsMemoryBlock* blk;
+    void*  NewPtr;
+    cmsUInt32Number max_sz;
+
+    NewPtr = DebugMalloc(ContextID, NewSize);
+    if (Ptr == NULL) return NewPtr;
+
+    blk = (_cmsMemoryBlock*) (((cmsUInt8Number*) Ptr) - SIZE_OF_MEM_HEADER);
+    max_sz = blk -> KeepSize > NewSize ? NewSize : blk ->KeepSize;
+    memmove(NewPtr, Ptr, max_sz);
+    DebugFree(ContextID, Ptr);
+
+    return NewPtr;
+}
+
+// Let's know the totals
+static
+void DebugMemPrintTotals(void)
+{
+    printf("[Memory statistics]\n");
+    printf("Allocated = %u MaxAlloc = %u Single block hit = %u\n", TotalMemory, MaxAllocated, SingleHit);
+}
+
+
+void DebugMemDontCheckThis(void *Ptr)
+{
+     _cmsMemoryBlock* blk = (_cmsMemoryBlock*) (((cmsUInt8Number*) Ptr) - SIZE_OF_MEM_HEADER);
+
+     blk ->DontCheck = 1;
+}
+
+
+// Memory string
+static
+const char* MemStr(cmsUInt32Number size)
+{
+    static char Buffer[1024];
+
+    if (size > 1024*1024) {
+        sprintf(Buffer, "%g Mb", (cmsFloat64Number) size / (1024.0*1024.0));
+    }
+    else
+        if (size > 1024) {
+            sprintf(Buffer, "%g Kb", (cmsFloat64Number) size / 1024.0);
+        }
+        else
+            sprintf(Buffer, "%g bytes", (cmsFloat64Number) size);
+
+    return Buffer;
+}
+
+
+void TestMemoryLeaks(cmsBool ok)
+{
+    if (TotalMemory > 0)
+        printf("Ok, but %s are left!\n", MemStr(TotalMemory));
+    else {
+        if (ok) printf("Ok.\n");
+    }
+}
+
+// Here we go with the plug-in declaration
+static cmsPluginMemHandler DebugMemHandler = {{ cmsPluginMagicNumber, 2060, cmsPluginMemHandlerSig, NULL },
+                                               DebugMalloc, DebugFree, DebugRealloc, NULL, NULL, NULL };
+
+// Returnds a pointer to the memhandler plugin
+void* PluginMemHandler(void)
+{
+    return (void*) &DebugMemHandler;
+}
+
+cmsContext WatchDogContext(void* usr)
+{
+    cmsContext ctx;
+
+    ctx = cmsCreateContext(&DebugMemHandler, usr);
+
+    if (ctx == NULL)
+        Die("Unable to create memory managed context");
+
+    DebugMemDontCheckThis(ctx);
+    return ctx;
+}
+
+
+
+static
+void FatalErrorQuit(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    Die(Text);
+
+    cmsUNUSED_PARAMETER(ContextID);
+    cmsUNUSED_PARAMETER(ErrorCode);
+}
+
+
+void ResetFatalError(void)
+{
+    cmsSetLogErrorHandler(FatalErrorQuit);
+}
+
+
+// Print a dot for gauging
+void Dot(void)
+{
+    fprintf(stdout, "."); fflush(stdout);
+}
+
+void Say(const char* str)
+{
+    fprintf(stdout, "%s", str); fflush(stdout);
+}
+
+
+// Keep track of the reason to fail
+
+void Fail(const char* frm, ...)
+{
+    va_list args;
+    va_start(args, frm);
+    vsprintf(ReasonToFailBuffer, frm, args);
+    va_end(args);
+}
+
+// Keep track of subtest
+
+void SubTest(const char* frm, ...)
+{
+    va_list args;
+
+    Dot();
+    va_start(args, frm);
+    vsprintf(SubTestBuffer, frm, args);
+    va_end(args);
+}
+
+// The check framework
+static
+void Check(const char* Title, TestFn Fn)
+{
+    printf("Checking %s ...", Title);
+    fflush(stdout);
+
+    ReasonToFailBuffer[0] = 0;
+    SubTestBuffer[0] = 0;
+    TrappedError = FALSE;
+    SimultaneousErrors = 0;
+    TotalTests++;
+
+    if (Fn() && !TrappedError) {
+
+        // It is a good place to check memory
+        TestMemoryLeaks(TRUE);
+
+    }
+    else {
+        printf("FAIL!\n");
+
+        if (SubTestBuffer[0])
+            printf("%s: [%s]\n\t%s\n", Title, SubTestBuffer, ReasonToFailBuffer);
+        else
+            printf("%s:\n\t%s\n", Title, ReasonToFailBuffer);
+
+        if (SimultaneousErrors > 1)
+               printf("\tMore than one (%d) errors were reported\n", SimultaneousErrors);
+
+        TotalFail++;
+    }
+    fflush(stdout);
+}
+
+// Dump a tone curve, for easy diagnostic
+void DumpToneCurve(cmsToneCurve* gamma, const char* FileName)
+{
+    cmsHANDLE hIT8;
+    cmsUInt32Number i;
+
+    hIT8 = cmsIT8Alloc(gamma ->InterpParams->ContextID);
+
+    cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_FIELDS", 2);
+    cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", gamma ->nEntries);
+
+    cmsIT8SetDataFormat(hIT8, 0, "SAMPLE_ID");
+    cmsIT8SetDataFormat(hIT8, 1, "VALUE");
+
+    for (i=0; i < gamma ->nEntries; i++) {
+        char Val[30];
+
+        sprintf(Val, "%u", i);
+        cmsIT8SetDataRowCol(hIT8, i, 0, Val);
+        sprintf(Val, "0x%x", gamma ->Table16[i]);
+        cmsIT8SetDataRowCol(hIT8, i, 1, Val);
+    }
+
+    cmsIT8SaveToFile(hIT8, FileName);
+    cmsIT8Free(hIT8);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+
+// Used to perform several checks.
+// The space used is a clone of a well-known commercial
+// color space which I will name "Above RGB"
+static
+cmsHPROFILE Create_AboveRGB(void)
+{
+    cmsToneCurve* Curve[3];
+    cmsHPROFILE hProfile;
+    cmsCIExyY D65;
+    cmsCIExyYTRIPLE Primaries = {{0.64, 0.33, 1 },
+                                 {0.21, 0.71, 1 },
+                                 {0.15, 0.06, 1 }};
+
+    Curve[0] = Curve[1] = Curve[2] = cmsBuildGamma(DbgThread(), 2.19921875);
+
+    cmsWhitePointFromTemp(&D65, 6504);
+    hProfile = cmsCreateRGBProfileTHR(DbgThread(), &D65, &Primaries, Curve);
+    cmsFreeToneCurve(Curve[0]);
+
+    return hProfile;
+}
+
+// A gamma-2.2 gray space
+static
+cmsHPROFILE Create_Gray22(void)
+{
+    cmsHPROFILE hProfile;
+    cmsToneCurve* Curve = cmsBuildGamma(DbgThread(), 2.2);
+    if (Curve == NULL) return NULL;
+
+    hProfile = cmsCreateGrayProfileTHR(DbgThread(), cmsD50_xyY(), Curve);
+    cmsFreeToneCurve(Curve);
+
+    return hProfile;
+}
+
+// A gamma-3.0 gray space
+static
+cmsHPROFILE Create_Gray30(void)
+{
+    cmsHPROFILE hProfile;
+    cmsToneCurve* Curve = cmsBuildGamma(DbgThread(), 3.0);
+    if (Curve == NULL) return NULL;
+
+    hProfile = cmsCreateGrayProfileTHR(DbgThread(), cmsD50_xyY(), Curve);
+    cmsFreeToneCurve(Curve);
+
+    return hProfile;
+}
+
+
+static
+cmsHPROFILE Create_GrayLab(void)
+{
+    cmsHPROFILE hProfile;
+    cmsToneCurve* Curve = cmsBuildGamma(DbgThread(), 1.0);
+    if (Curve == NULL) return NULL;
+
+    hProfile = cmsCreateGrayProfileTHR(DbgThread(), cmsD50_xyY(), Curve);
+    cmsFreeToneCurve(Curve);
+
+    cmsSetPCS(hProfile, cmsSigLabData);
+    return hProfile;
+}
+
+// A CMYK devicelink that adds gamma 3.0 to each channel
+static
+cmsHPROFILE Create_CMYK_DeviceLink(void)
+{
+    cmsHPROFILE hProfile;
+    cmsToneCurve* Tab[4];
+    cmsToneCurve* Curve = cmsBuildGamma(DbgThread(), 3.0);
+    if (Curve == NULL) return NULL;
+
+    Tab[0] = Curve;
+    Tab[1] = Curve;
+    Tab[2] = Curve;
+    Tab[3] = Curve;
+
+    hProfile = cmsCreateLinearizationDeviceLinkTHR(DbgThread(), cmsSigCmykData, Tab);
+    if (hProfile == NULL) return NULL;
+
+    cmsFreeToneCurve(Curve);
+
+    return hProfile;
+}
+
+
+// Create a fake CMYK profile, without any other requeriment that being coarse CMYK.
+// DON'T USE THIS PROFILE FOR ANYTHING, IT IS USELESS BUT FOR TESTING PURPOSES.
+typedef struct {
+
+    cmsHTRANSFORM hLab2sRGB;
+    cmsHTRANSFORM sRGB2Lab;
+    cmsHTRANSFORM hIlimit;
+
+} FakeCMYKParams;
+
+static
+cmsFloat64Number Clip(cmsFloat64Number v)
+{
+    if (v < 0) return 0;
+    if (v > 1) return 1;
+
+    return v;
+}
+
+static
+cmsInt32Number ForwardSampler(register const cmsUInt16Number In[], cmsUInt16Number Out[], void* Cargo)
+{
+    FakeCMYKParams* p = (FakeCMYKParams*) Cargo;
+    cmsFloat64Number rgb[3], cmyk[4];
+    cmsFloat64Number c, m, y, k;
+
+    cmsDoTransform(p ->hLab2sRGB, In, rgb, 1);
+
+    c = 1 - rgb[0];
+    m = 1 - rgb[1];
+    y = 1 - rgb[2];
+
+    k = (c < m ? cmsmin(c, y) : cmsmin(m, y));
+
+    // NONSENSE WARNING!: I'm doing this just because this is a test
+    // profile that may have ink limit up to 400%. There is no UCR here
+    // so the profile is basically useless for anything but testing.
+
+    cmyk[0] = c;
+    cmyk[1] = m;
+    cmyk[2] = y;
+    cmyk[3] = k;
+
+    cmsDoTransform(p ->hIlimit, cmyk, Out, 1);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number ReverseSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    FakeCMYKParams* p = (FakeCMYKParams*) Cargo;
+    cmsFloat64Number c, m, y, k, rgb[3];
+
+    c = In[0] / 65535.0;
+    m = In[1] / 65535.0;
+    y = In[2] / 65535.0;
+    k = In[3] / 65535.0;
+
+    if (k == 0) {
+
+        rgb[0] = Clip(1 - c);
+        rgb[1] = Clip(1 - m);
+        rgb[2] = Clip(1 - y);
+    }
+    else
+        if (k == 1) {
+
+            rgb[0] = rgb[1] = rgb[2] = 0;
+        }
+        else {
+
+            rgb[0] = Clip((1 - c) * (1 - k));
+            rgb[1] = Clip((1 - m) * (1 - k));
+            rgb[2] = Clip((1 - y) * (1 - k));
+        }
+
+        cmsDoTransform(p ->sRGB2Lab, rgb, Out, 1);
+        return 1;
+}
+
+
+
+static
+cmsHPROFILE CreateFakeCMYK(cmsFloat64Number InkLimit, cmsBool lUseAboveRGB)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* AToB0, *BToA0;
+    cmsStage* CLUT;
+    cmsContext ContextID;
+    FakeCMYKParams p;
+    cmsHPROFILE hLab, hsRGB, hLimit;
+    cmsUInt32Number cmykfrm;
+
+
+    if (lUseAboveRGB)
+        hsRGB = Create_AboveRGB();
+    else
+       hsRGB  = cmsCreate_sRGBProfile();
+
+    hLab   = cmsCreateLab4Profile(NULL);
+    hLimit = cmsCreateInkLimitingDeviceLink(cmsSigCmykData, InkLimit);
+
+    cmykfrm = FLOAT_SH(1) | BYTES_SH(0)|CHANNELS_SH(4);
+    p.hLab2sRGB = cmsCreateTransform(hLab,  TYPE_Lab_16,  hsRGB, TYPE_RGB_DBL, INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+    p.sRGB2Lab  = cmsCreateTransform(hsRGB, TYPE_RGB_DBL, hLab,  TYPE_Lab_16,  INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+    p.hIlimit   = cmsCreateTransform(hLimit, cmykfrm, NULL, TYPE_CMYK_16, INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+
+    cmsCloseProfile(hLab); cmsCloseProfile(hsRGB); cmsCloseProfile(hLimit);
+
+    ContextID = DbgThread();
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC) return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC, cmsSigOutputClass);
+    cmsSetColorSpace(hICC,  cmsSigCmykData);
+    cmsSetPCS(hICC,         cmsSigLabData);
+
+    BToA0 = cmsPipelineAlloc(ContextID, 3, 4);
+    if (BToA0 == NULL) return 0;
+    CLUT = cmsStageAllocCLut16bit(ContextID, 17, 3, 4, NULL);
+    if (CLUT == NULL) return 0;
+    if (!cmsStageSampleCLut16bit(CLUT, ForwardSampler, &p, 0)) return 0;
+
+    cmsPipelineInsertStage(BToA0, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3));
+    cmsPipelineInsertStage(BToA0, cmsAT_END, CLUT);
+    cmsPipelineInsertStage(BToA0, cmsAT_END, _cmsStageAllocIdentityCurves(ContextID, 4));
+
+    if (!cmsWriteTag(hICC, cmsSigBToA0Tag, (void*) BToA0)) return 0;
+    cmsPipelineFree(BToA0);
+
+    AToB0 = cmsPipelineAlloc(ContextID, 4, 3);
+    if (AToB0 == NULL) return 0;
+    CLUT = cmsStageAllocCLut16bit(ContextID, 17, 4, 3, NULL);
+    if (CLUT == NULL) return 0;
+    if (!cmsStageSampleCLut16bit(CLUT, ReverseSampler, &p, 0)) return 0;
+
+    cmsPipelineInsertStage(AToB0, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 4));
+    cmsPipelineInsertStage(AToB0, cmsAT_END, CLUT);
+    cmsPipelineInsertStage(AToB0, cmsAT_END, _cmsStageAllocIdentityCurves(ContextID, 3));
+
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) AToB0)) return 0;
+    cmsPipelineFree(AToB0);
+
+    cmsDeleteTransform(p.hLab2sRGB);
+    cmsDeleteTransform(p.sRGB2Lab);
+    cmsDeleteTransform(p.hIlimit);
+
+    cmsLinkTag(hICC, cmsSigAToB1Tag, cmsSigAToB0Tag);
+    cmsLinkTag(hICC, cmsSigAToB2Tag, cmsSigAToB0Tag);
+    cmsLinkTag(hICC, cmsSigBToA1Tag, cmsSigBToA0Tag);
+    cmsLinkTag(hICC, cmsSigBToA2Tag, cmsSigBToA0Tag);
+
+    return hICC;
+}
+
+
+// Does create several profiles for latter use------------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number OneVirtual(cmsHPROFILE h, const char* SubTestTxt, const char* FileName)
+{
+    SubTest(SubTestTxt);
+    if (h == NULL) return 0;
+
+    if (!cmsSaveProfileToFile(h, FileName)) return 0;
+    cmsCloseProfile(h);
+
+    h = cmsOpenProfileFromFile(FileName, "r");
+    if (h == NULL) return 0;
+    
+    cmsCloseProfile(h);
+    return 1;
+}
+
+
+
+// This test checks the ability of lcms2 to save its built-ins as valid profiles.
+// It does not check the functionality of such profiles
+static
+cmsInt32Number CreateTestProfiles(void)
+{
+    cmsHPROFILE h;
+
+    h = cmsCreate_sRGBProfileTHR(DbgThread());
+    if (!OneVirtual(h, "sRGB profile", "sRGBlcms2.icc")) return 0;
+
+    // ----
+
+    h = Create_AboveRGB();
+    if (!OneVirtual(h, "aRGB profile", "aRGBlcms2.icc")) return 0;
+
+    // ----
+
+    h = Create_Gray22();
+    if (!OneVirtual(h, "Gray profile", "graylcms2.icc")) return 0;
+
+    // ----
+
+    h = Create_Gray30();
+    if (!OneVirtual(h, "Gray 3.0 profile", "gray3lcms2.icc")) return 0;
+
+    // ----
+
+    h = Create_GrayLab();
+    if (!OneVirtual(h, "Gray Lab profile", "glablcms2.icc")) return 0;
+
+    // ----
+
+    h = Create_CMYK_DeviceLink();
+    if (!OneVirtual(h, "Linearization profile", "linlcms2.icc")) return 0;
+
+    // -------
+    h = cmsCreateInkLimitingDeviceLinkTHR(DbgThread(), cmsSigCmykData, 150);
+    if (h == NULL) return 0;
+    if (!OneVirtual(h, "Ink-limiting profile", "limitlcms2.icc")) return 0;
+
+    // ------
+
+    h = cmsCreateLab2ProfileTHR(DbgThread(), NULL);
+    if (!OneVirtual(h, "Lab 2 identity profile", "labv2lcms2.icc")) return 0;
+
+    // ----
+
+    h = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+    if (!OneVirtual(h, "Lab 4 identity profile", "labv4lcms2.icc")) return 0;
+
+    // ----
+
+    h = cmsCreateXYZProfileTHR(DbgThread());
+    if (!OneVirtual(h, "XYZ identity profile", "xyzlcms2.icc")) return 0;
+
+    // ----
+
+    h = cmsCreateNULLProfileTHR(DbgThread());
+    if (!OneVirtual(h, "NULL profile", "nullcms2.icc")) return 0;
+
+    // ---
+
+    h = cmsCreateBCHSWabstractProfileTHR(DbgThread(), 17, 0, 0, 0, 0, 5000, 6000);
+    if (!OneVirtual(h, "BCHS profile", "bchslcms2.icc")) return 0;
+
+    // ---
+
+    h = CreateFakeCMYK(300, FALSE);
+    if (!OneVirtual(h, "Fake CMYK profile", "lcms2cmyk.icc")) return 0;
+
+    // ---
+
+    h = cmsCreateBCHSWabstractProfileTHR(DbgThread(), 17, 0, 1.2, 0, 3, 5000, 5000);
+    if (!OneVirtual(h, "Brightness", "brightness.icc")) return 0;
+    return 1;
+}
+
+static
+void RemoveTestProfiles(void)
+{
+    remove("sRGBlcms2.icc");
+    remove("aRGBlcms2.icc");
+    remove("graylcms2.icc");
+    remove("gray3lcms2.icc");
+    remove("linlcms2.icc");
+    remove("limitlcms2.icc");
+    remove("labv2lcms2.icc");
+    remove("labv4lcms2.icc");
+    remove("xyzlcms2.icc");
+    remove("nullcms2.icc");
+    remove("bchslcms2.icc");
+    remove("lcms2cmyk.icc");
+    remove("glablcms2.icc");
+    remove("lcms2link.icc");
+    remove("lcms2link2.icc");
+    remove("brightness.icc");
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// Check the size of basic types. If this test fails, nothing is going to work anyway
+static
+cmsInt32Number CheckBaseTypes(void)
+{
+    // Ignore warnings about conditional expression
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+    if (sizeof(cmsUInt8Number) != 1) return 0;
+    if (sizeof(cmsInt8Number) != 1) return 0;
+    if (sizeof(cmsUInt16Number) != 2) return 0;
+    if (sizeof(cmsInt16Number) != 2) return 0;
+    if (sizeof(cmsUInt32Number) != 4) return 0;
+    if (sizeof(cmsInt32Number) != 4) return 0;
+    if (sizeof(cmsUInt64Number) != 8) return 0;
+    if (sizeof(cmsInt64Number) != 8) return 0;
+    if (sizeof(cmsFloat32Number) != 4) return 0;
+    if (sizeof(cmsFloat64Number) != 8) return 0;
+    if (sizeof(cmsSignature) != 4) return 0;
+    if (sizeof(cmsU8Fixed8Number) != 2) return 0;
+    if (sizeof(cmsS15Fixed16Number) != 4) return 0;
+    if (sizeof(cmsU16Fixed16Number) != 4) return 0;
+
+    return 1;
+}
+
+// -------------------------------------------------------------------------------------------------
+
+
+// Are we little or big endian?  From Harbison&Steele.
+static
+cmsInt32Number CheckEndianness(void)
+{
+    cmsInt32Number BigEndian, IsOk;
+    union {
+        long l;
+        char c[sizeof (long)];
+    } u;
+
+    u.l = 1;
+    BigEndian = (u.c[sizeof (long) - 1] == 1);
+
+#ifdef CMS_USE_BIG_ENDIAN
+    IsOk = BigEndian;
+#else
+    IsOk = !BigEndian;
+#endif
+
+    if (!IsOk) {
+        Die("\nOOOPPSS! You have CMS_USE_BIG_ENDIAN toggle misconfigured!\n\n"
+            "Please, edit lcms2.h and %s the CMS_USE_BIG_ENDIAN toggle.\n", BigEndian? "uncomment" : "comment");
+        return 0;
+    }
+
+    return 1;
+}
+
+// Check quick floor
+static
+cmsInt32Number CheckQuickFloor(void)
+{
+    if ((_cmsQuickFloor(1.234) != 1) ||
+        (_cmsQuickFloor(32767.234) != 32767) ||
+        (_cmsQuickFloor(-1.234) != -2) ||
+        (_cmsQuickFloor(-32767.1) != -32768)) {
+
+            Die("\nOOOPPSS! _cmsQuickFloor() does not work as expected in your machine!\n\n"
+                "Please, edit lcms2.h and uncomment the CMS_DONT_USE_FAST_FLOOR toggle.\n");
+            return 0;
+
+    }
+
+    return 1;
+}
+
+// Quick floor restricted to word
+static
+cmsInt32Number CheckQuickFloorWord(void)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < 65535; i++) {
+
+        if (_cmsQuickFloorWord((cmsFloat64Number) i + 0.1234) != i) {
+
+            Die("\nOOOPPSS! _cmsQuickFloorWord() does not work as expected in your machine!\n\n"
+                "Please, edit lcms2.h and uncomment the CMS_DONT_USE_FAST_FLOOR toggle.\n");
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// Precision stuff.
+
+// On 15.16 fixed point, this is the maximum we can obtain. Remember ICC profiles have storage limits on this number
+#define FIXED_PRECISION_15_16 (1.0 / 65535.0)
+
+// On 8.8 fixed point, that is the max we can obtain.
+#define FIXED_PRECISION_8_8 (1.0 / 255.0)
+
+// On cmsFloat32Number type, this is the precision we expect
+#define FLOAT_PRECISSION      (0.00001)
+
+static cmsFloat64Number MaxErr;
+static cmsFloat64Number AllowedErr = FIXED_PRECISION_15_16;
+
+cmsBool IsGoodVal(const char *title, cmsFloat64Number in, cmsFloat64Number out, cmsFloat64Number max)
+{
+    cmsFloat64Number Err = fabs(in - out);
+
+    if (Err > MaxErr) MaxErr = Err;
+
+        if ((Err > max )) {
+
+              Fail("(%s): Must be %f, But is %f ", title, in, out);
+              return FALSE;
+              }
+
+       return TRUE;
+}
+
+
+cmsBool  IsGoodFixed15_16(const char *title, cmsFloat64Number in, cmsFloat64Number out)
+{
+    return IsGoodVal(title, in, out, FIXED_PRECISION_15_16);
+}
+
+
+cmsBool  IsGoodFixed8_8(const char *title, cmsFloat64Number in, cmsFloat64Number out)
+{
+    return IsGoodVal(title, in, out, FIXED_PRECISION_8_8);
+}
+
+cmsBool  IsGoodWord(const char *title, cmsUInt16Number in, cmsUInt16Number out)
+{
+    if ((abs(in - out) > 0 )) {
+
+        Fail("(%s): Must be %x, But is %x ", title, in, out);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsBool  IsGoodWordPrec(const char *title, cmsUInt16Number in, cmsUInt16Number out, cmsUInt16Number maxErr)
+{
+    if ((abs(in - out) > maxErr )) {
+
+        Fail("(%s): Must be %x, But is %x ", title, in, out);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Fixed point ----------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number TestSingleFixed15_16(cmsFloat64Number d)
+{
+    cmsS15Fixed16Number f = _cmsDoubleTo15Fixed16(d);
+    cmsFloat64Number RoundTrip = _cms15Fixed16toDouble(f);
+    cmsFloat64Number Error     = fabs(d - RoundTrip);
+
+    return ( Error <= FIXED_PRECISION_15_16);
+}
+
+static
+cmsInt32Number CheckFixedPoint15_16(void)
+{
+    if (!TestSingleFixed15_16(1.0)) return 0;
+    if (!TestSingleFixed15_16(2.0)) return 0;
+    if (!TestSingleFixed15_16(1.23456)) return 0;
+    if (!TestSingleFixed15_16(0.99999)) return 0;
+    if (!TestSingleFixed15_16(0.1234567890123456789099999)) return 0;
+    if (!TestSingleFixed15_16(-1.0)) return 0;
+    if (!TestSingleFixed15_16(-2.0)) return 0;
+    if (!TestSingleFixed15_16(-1.23456)) return 0;
+    if (!TestSingleFixed15_16(-1.1234567890123456789099999)) return 0;
+    if (!TestSingleFixed15_16(+32767.1234567890123456789099999)) return 0;
+    if (!TestSingleFixed15_16(-32767.1234567890123456789099999)) return 0;
+    return 1;
+}
+
+static
+cmsInt32Number TestSingleFixed8_8(cmsFloat64Number d)
+{
+    cmsS15Fixed16Number f = _cmsDoubleTo8Fixed8(d);
+    cmsFloat64Number RoundTrip = _cms8Fixed8toDouble((cmsUInt16Number) f);
+    cmsFloat64Number Error     = fabs(d - RoundTrip);
+
+    return ( Error <= FIXED_PRECISION_8_8);
+}
+
+static
+cmsInt32Number CheckFixedPoint8_8(void)
+{
+    if (!TestSingleFixed8_8(1.0)) return 0;
+    if (!TestSingleFixed8_8(2.0)) return 0;
+    if (!TestSingleFixed8_8(1.23456)) return 0;
+    if (!TestSingleFixed8_8(0.99999)) return 0;
+    if (!TestSingleFixed8_8(0.1234567890123456789099999)) return 0;
+    if (!TestSingleFixed8_8(+255.1234567890123456789099999)) return 0;
+
+    return 1;
+}
+
+// D50 constant --------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckD50Roundtrip(void)
+{
+    cmsFloat64Number cmsD50X_2 =  0.96420288;
+    cmsFloat64Number cmsD50Y_2 =  1.0;
+    cmsFloat64Number cmsD50Z_2 = 0.82490540;
+
+    cmsS15Fixed16Number xe = _cmsDoubleTo15Fixed16(cmsD50X);
+    cmsS15Fixed16Number ye = _cmsDoubleTo15Fixed16(cmsD50Y);
+    cmsS15Fixed16Number ze = _cmsDoubleTo15Fixed16(cmsD50Z);
+
+    cmsFloat64Number x =  _cms15Fixed16toDouble(xe);
+    cmsFloat64Number y =  _cms15Fixed16toDouble(ye);
+    cmsFloat64Number z =  _cms15Fixed16toDouble(ze);
+
+    double dx = fabs(cmsD50X - x);
+    double dy = fabs(cmsD50Y - y);
+    double dz = fabs(cmsD50Z - z);
+
+    double euc = sqrt(dx*dx + dy*dy + dz* dz);
+
+    if (euc > 1E-5) {
+
+        Fail("D50 roundtrip |err| > (%f) ", euc);
+        return 0;
+    }
+
+    xe = _cmsDoubleTo15Fixed16(cmsD50X_2);
+    ye = _cmsDoubleTo15Fixed16(cmsD50Y_2);
+    ze = _cmsDoubleTo15Fixed16(cmsD50Z_2);
+
+    x =  _cms15Fixed16toDouble(xe);
+    y =  _cms15Fixed16toDouble(ye);
+    z =  _cms15Fixed16toDouble(ze);
+
+    dx = fabs(cmsD50X_2 - x);
+    dy = fabs(cmsD50Y_2 - y);
+    dz = fabs(cmsD50Z_2 - z);
+
+    euc = sqrt(dx*dx + dy*dy + dz* dz);
+
+    if (euc > 1E-5) {
+
+        Fail("D50 roundtrip |err| > (%f) ", euc);
+        return 0;
+    }
+
+
+    return 1;
+}
+
+// Linear interpolation -----------------------------------------------------------------------------------------------
+
+// Since prime factors of 65535 (FFFF) are,
+//
+//            0xFFFF = 3 * 5 * 17 * 257
+//
+// I test tables of 2, 4, 6, and 18 points, that will be exact.
+
+static
+void BuildTable(cmsInt32Number n, cmsUInt16Number Tab[], cmsBool  Descending)
+{
+    cmsInt32Number i;
+
+    for (i=0; i < n; i++) {
+        cmsFloat64Number v = (cmsFloat64Number) ((cmsFloat64Number) 65535.0 * i ) / (n-1);
+
+        Tab[Descending ? (n - i - 1) : i ] = (cmsUInt16Number) floor(v + 0.5);
+    }
+}
+
+// A single function that does check 1D interpolation
+// nNodesToCheck = number on nodes to check
+// Down = Create decreasing tables
+// Reverse = Check reverse interpolation
+// max_err = max allowed error
+
+static
+cmsInt32Number Check1D(cmsInt32Number nNodesToCheck, cmsBool  Down, cmsInt32Number max_err)
+{
+    cmsUInt32Number i;
+    cmsUInt16Number in, out;
+    cmsInterpParams* p;
+    cmsUInt16Number* Tab;
+
+    Tab = (cmsUInt16Number*) malloc(sizeof(cmsUInt16Number)* nNodesToCheck);
+    if (Tab == NULL) return 0;
+
+    p = _cmsComputeInterpParams(DbgThread(), nNodesToCheck, 1, 1, Tab, CMS_LERP_FLAGS_16BITS);
+    if (p == NULL) return 0;
+
+    BuildTable(nNodesToCheck, Tab, Down);
+
+    for (i=0; i <= 0xffff; i++) {
+
+        in = (cmsUInt16Number) i;
+        out = 0;
+
+        p ->Interpolation.Lerp16(&in, &out, p);
+
+        if (Down) out = 0xffff - out;
+
+        if (abs(out - in) > max_err) {
+
+            Fail("(%dp): Must be %x, But is %x : ", nNodesToCheck, in, out);
+            _cmsFreeInterpParams(p);
+            free(Tab);
+            return 0;
+        }
+    }
+
+    _cmsFreeInterpParams(p);
+    free(Tab);
+    return 1;
+}
+
+
+static
+cmsInt32Number Check1DLERP2(void)
+{
+    return Check1D(2, FALSE, 0);
+}
+
+
+static
+cmsInt32Number Check1DLERP3(void)
+{
+    return Check1D(3, FALSE, 1);
+}
+
+
+static
+cmsInt32Number Check1DLERP4(void)
+{
+    return Check1D(4, FALSE, 0);
+}
+
+static
+cmsInt32Number Check1DLERP6(void)
+{
+    return Check1D(6, FALSE, 0);
+}
+
+static
+cmsInt32Number Check1DLERP18(void)
+{
+    return Check1D(18, FALSE, 0);
+}
+
+
+static
+cmsInt32Number Check1DLERP2Down(void)
+{
+    return Check1D(2, TRUE, 0);
+}
+
+
+static
+cmsInt32Number Check1DLERP3Down(void)
+{
+    return Check1D(3, TRUE, 1);
+}
+
+static
+cmsInt32Number Check1DLERP6Down(void)
+{
+    return Check1D(6, TRUE, 0);
+}
+
+static
+cmsInt32Number Check1DLERP18Down(void)
+{
+    return Check1D(18, TRUE, 0);
+}
+
+static
+cmsInt32Number ExhaustiveCheck1DLERP(void)
+{
+    cmsUInt32Number j;
+
+    printf("\n");
+    for (j=10; j <= 4096; j++) {
+
+        if ((j % 10) == 0) printf("%u    \r", j);
+
+        if (!Check1D(j, FALSE, 1)) return 0;
+    }
+
+    printf("\rResult is ");
+    return 1;
+}
+
+static
+cmsInt32Number ExhaustiveCheck1DLERPDown(void)
+{
+    cmsUInt32Number j;
+
+    printf("\n");
+    for (j=10; j <= 4096; j++) {
+
+        if ((j % 10) == 0) printf("%u    \r", j);
+
+        if (!Check1D(j, TRUE, 1)) return 0;
+    }
+
+
+    printf("\rResult is ");
+    return 1;
+}
+
+
+
+// 3D interpolation -------------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number Check3DinterpolationFloatTetrahedral(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number i;
+    cmsFloat32Number In[3], Out[3];
+    cmsFloat32Number FloatTable[] = { //R     G    B
+
+        0,    0,   0,     // B=0,G=0,R=0
+        0,    0,  .25,    // B=1,G=0,R=0
+
+        0,   .5,    0,    // B=0,G=1,R=0
+        0,   .5,  .25,    // B=1,G=1,R=0
+
+        1,    0,    0,    // B=0,G=0,R=1
+        1,    0,  .25,    // B=1,G=0,R=1
+
+        1,    .5,   0,    // B=0,G=1,R=1
+        1,    .5,  .25    // B=1,G=1,R=1
+
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, FloatTable, CMS_LERP_FLAGS_FLOAT);
+
+
+    MaxErr = 0.0;
+     for (i=0; i < 0xffff; i++) {
+
+       In[0] = In[1] = In[2] = (cmsFloat32Number) ( (cmsFloat32Number) i / 65535.0F);
+
+        p ->Interpolation.LerpFloat(In, Out, p);
+
+       if (!IsGoodFixed15_16("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodFixed15_16("Channel 2", Out[1], (cmsFloat32Number) In[1] / 2.F)) goto Error;
+       if (!IsGoodFixed15_16("Channel 3", Out[2], (cmsFloat32Number) In[2] / 4.F)) goto Error;
+     }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+static
+cmsInt32Number Check3DinterpolationFloatTrilinear(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number i;
+    cmsFloat32Number In[3], Out[3];
+    cmsFloat32Number FloatTable[] = { //R     G    B
+
+        0,    0,   0,     // B=0,G=0,R=0
+        0,    0,  .25,    // B=1,G=0,R=0
+
+        0,   .5,    0,    // B=0,G=1,R=0
+        0,   .5,  .25,    // B=1,G=1,R=0
+
+        1,    0,    0,    // B=0,G=0,R=1
+        1,    0,  .25,    // B=1,G=0,R=1
+
+        1,    .5,   0,    // B=0,G=1,R=1
+        1,    .5,  .25    // B=1,G=1,R=1
+
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, FloatTable, CMS_LERP_FLAGS_FLOAT|CMS_LERP_FLAGS_TRILINEAR);
+
+    MaxErr = 0.0;
+     for (i=0; i < 0xffff; i++) {
+
+       In[0] = In[1] = In[2] = (cmsFloat32Number) ( (cmsFloat32Number) i / 65535.0F);
+
+        p ->Interpolation.LerpFloat(In, Out, p);
+
+       if (!IsGoodFixed15_16("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodFixed15_16("Channel 2", Out[1], (cmsFloat32Number) In[1] / 2.F)) goto Error;
+       if (!IsGoodFixed15_16("Channel 3", Out[2], (cmsFloat32Number) In[2] / 4.F)) goto Error;
+     }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+
+}
+
+static
+cmsInt32Number Check3DinterpolationTetrahedral16(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number i;
+    cmsUInt16Number In[3], Out[3];
+    cmsUInt16Number Table[] = {
+
+        0,    0,   0,
+        0,    0,   0xffff,
+
+        0,    0xffff,    0,
+        0,    0xffff,    0xffff,
+
+        0xffff,    0,    0,
+        0xffff,    0,    0xffff,
+
+        0xffff,    0xffff,   0,
+        0xffff,    0xffff,   0xffff
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, Table, CMS_LERP_FLAGS_16BITS);
+
+    MaxErr = 0.0;
+     for (i=0; i < 0xffff; i++) {
+
+       In[0] = In[1] = In[2] = (cmsUInt16Number) i;
+
+        p ->Interpolation.Lerp16(In, Out, p);
+
+       if (!IsGoodWord("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodWord("Channel 2", Out[1], In[1])) goto Error;
+       if (!IsGoodWord("Channel 3", Out[2], In[2])) goto Error;
+     }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+static
+cmsInt32Number Check3DinterpolationTrilinear16(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number i;
+    cmsUInt16Number In[3], Out[3];
+    cmsUInt16Number Table[] = {
+
+        0,    0,   0,
+        0,    0,   0xffff,
+
+        0,    0xffff,    0,
+        0,    0xffff,    0xffff,
+
+        0xffff,    0,    0,
+        0xffff,    0,    0xffff,
+
+        0xffff,    0xffff,   0,
+        0xffff,    0xffff,   0xffff
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, Table, CMS_LERP_FLAGS_TRILINEAR);
+
+    MaxErr = 0.0;
+     for (i=0; i < 0xffff; i++) {
+
+       In[0] = In[1] = In[2] = (cmsUInt16Number) i;
+
+        p ->Interpolation.Lerp16(In, Out, p);
+
+       if (!IsGoodWord("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodWord("Channel 2", Out[1], In[1])) goto Error;
+       if (!IsGoodWord("Channel 3", Out[2], In[2])) goto Error;
+     }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+
+static
+cmsInt32Number ExaustiveCheck3DinterpolationFloatTetrahedral(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number r, g, b;
+    cmsFloat32Number In[3], Out[3];
+    cmsFloat32Number FloatTable[] = { //R     G    B
+
+        0,    0,   0,     // B=0,G=0,R=0
+        0,    0,  .25,    // B=1,G=0,R=0
+
+        0,   .5,    0,    // B=0,G=1,R=0
+        0,   .5,  .25,    // B=1,G=1,R=0
+
+        1,    0,    0,    // B=0,G=0,R=1
+        1,    0,  .25,    // B=1,G=0,R=1
+
+        1,    .5,   0,    // B=0,G=1,R=1
+        1,    .5,  .25    // B=1,G=1,R=1
+
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, FloatTable, CMS_LERP_FLAGS_FLOAT);
+
+    MaxErr = 0.0;
+    for (r=0; r < 0xff; r++)
+        for (g=0; g < 0xff; g++)
+            for (b=0; b < 0xff; b++)
+        {
+
+            In[0] = (cmsFloat32Number) r / 255.0F;
+            In[1] = (cmsFloat32Number) g / 255.0F;
+            In[2] = (cmsFloat32Number) b / 255.0F;
+
+
+        p ->Interpolation.LerpFloat(In, Out, p);
+
+       if (!IsGoodFixed15_16("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodFixed15_16("Channel 2", Out[1], (cmsFloat32Number) In[1] / 2.F)) goto Error;
+       if (!IsGoodFixed15_16("Channel 3", Out[2], (cmsFloat32Number) In[2] / 4.F)) goto Error;
+     }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+static
+cmsInt32Number ExaustiveCheck3DinterpolationFloatTrilinear(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number r, g, b;
+    cmsFloat32Number In[3], Out[3];
+    cmsFloat32Number FloatTable[] = { //R     G    B
+
+        0,    0,   0,     // B=0,G=0,R=0
+        0,    0,  .25,    // B=1,G=0,R=0
+
+        0,   .5,    0,    // B=0,G=1,R=0
+        0,   .5,  .25,    // B=1,G=1,R=0
+
+        1,    0,    0,    // B=0,G=0,R=1
+        1,    0,  .25,    // B=1,G=0,R=1
+
+        1,    .5,   0,    // B=0,G=1,R=1
+        1,    .5,  .25    // B=1,G=1,R=1
+
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, FloatTable, CMS_LERP_FLAGS_FLOAT|CMS_LERP_FLAGS_TRILINEAR);
+
+    MaxErr = 0.0;
+    for (r=0; r < 0xff; r++)
+        for (g=0; g < 0xff; g++)
+            for (b=0; b < 0xff; b++)
+            {
+
+                In[0] = (cmsFloat32Number) r / 255.0F;
+                In[1] = (cmsFloat32Number) g / 255.0F;
+                In[2] = (cmsFloat32Number) b / 255.0F;
+
+
+                p ->Interpolation.LerpFloat(In, Out, p);
+
+                if (!IsGoodFixed15_16("Channel 1", Out[0], In[0])) goto Error;
+                if (!IsGoodFixed15_16("Channel 2", Out[1], (cmsFloat32Number) In[1] / 2.F)) goto Error;
+                if (!IsGoodFixed15_16("Channel 3", Out[2], (cmsFloat32Number) In[2] / 4.F)) goto Error;
+            }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr);
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+
+}
+
+static
+cmsInt32Number ExhaustiveCheck3DinterpolationTetrahedral16(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number r, g, b;
+    cmsUInt16Number In[3], Out[3];
+    cmsUInt16Number Table[] = {
+
+        0,    0,   0,
+        0,    0,   0xffff,
+
+        0,    0xffff,    0,
+        0,    0xffff,    0xffff,
+
+        0xffff,    0,    0,
+        0xffff,    0,    0xffff,
+
+        0xffff,    0xffff,   0,
+        0xffff,    0xffff,   0xffff
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, Table, CMS_LERP_FLAGS_16BITS);
+
+    for (r=0; r < 0xff; r++)
+        for (g=0; g < 0xff; g++)
+            for (b=0; b < 0xff; b++)
+        {
+            In[0] = (cmsUInt16Number) r ;
+            In[1] = (cmsUInt16Number) g ;
+            In[2] = (cmsUInt16Number) b ;
+
+
+        p ->Interpolation.Lerp16(In, Out, p);
+
+       if (!IsGoodWord("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodWord("Channel 2", Out[1], In[1])) goto Error;
+       if (!IsGoodWord("Channel 3", Out[2], In[2])) goto Error;
+     }
+
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+static
+cmsInt32Number ExhaustiveCheck3DinterpolationTrilinear16(void)
+{
+    cmsInterpParams* p;
+    cmsInt32Number r, g, b;
+    cmsUInt16Number In[3], Out[3];
+    cmsUInt16Number Table[] = {
+
+        0,    0,   0,
+        0,    0,   0xffff,
+
+        0,    0xffff,    0,
+        0,    0xffff,    0xffff,
+
+        0xffff,    0,    0,
+        0xffff,    0,    0xffff,
+
+        0xffff,    0xffff,   0,
+        0xffff,    0xffff,   0xffff
+    };
+
+    p = _cmsComputeInterpParams(DbgThread(), 2, 3, 3, Table, CMS_LERP_FLAGS_TRILINEAR);
+
+    for (r=0; r < 0xff; r++)
+        for (g=0; g < 0xff; g++)
+            for (b=0; b < 0xff; b++)
+        {
+            In[0] = (cmsUInt16Number) r ;
+            In[1] = (cmsUInt16Number)g ;
+            In[2] = (cmsUInt16Number)b ;
+
+
+        p ->Interpolation.Lerp16(In, Out, p);
+
+       if (!IsGoodWord("Channel 1", Out[0], In[0])) goto Error;
+       if (!IsGoodWord("Channel 2", Out[1], In[1])) goto Error;
+       if (!IsGoodWord("Channel 3", Out[2], In[2])) goto Error;
+     }
+
+
+    _cmsFreeInterpParams(p);
+    return 1;
+
+Error:
+    _cmsFreeInterpParams(p);
+    return 0;
+}
+
+// Check reverse interpolation on LUTS. This is right now exclusively used by K preservation algorithm
+static
+cmsInt32Number CheckReverseInterpolation3x3(void)
+{
+ cmsPipeline* Lut;
+ cmsStage* clut;
+ cmsFloat32Number Target[4], Result[4], Hint[4];
+ cmsFloat32Number err, max;
+ cmsInt32Number i;
+ cmsUInt16Number Table[] = {
+
+        0,    0,   0,                 // 0 0 0
+        0,    0,   0xffff,            // 0 0 1
+
+        0,    0xffff,    0,           // 0 1 0
+        0,    0xffff,    0xffff,      // 0 1 1
+
+        0xffff,    0,    0,           // 1 0 0
+        0xffff,    0,    0xffff,      // 1 0 1
+
+        0xffff,    0xffff,   0,       // 1 1 0
+        0xffff,    0xffff,   0xffff,  // 1 1 1
+    };
+
+
+
+   Lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+   clut = cmsStageAllocCLut16bit(DbgThread(), 2, 3, 3, Table);
+   cmsPipelineInsertStage(Lut, cmsAT_BEGIN, clut);
+
+   Target[0] = 0; Target[1] = 0; Target[2] = 0;
+   Hint[0] = 0; Hint[1] = 0; Hint[2] = 0;
+   cmsPipelineEvalReverseFloat(Target, Result, NULL, Lut);
+   if (Result[0] != 0 || Result[1] != 0 || Result[2] != 0){
+
+       Fail("Reverse interpolation didn't find zero");
+       goto Error;
+   }
+
+   // Transverse identity
+   max = 0;
+   for (i=0; i <= 100; i++) {
+
+       cmsFloat32Number in = i / 100.0F;
+
+       Target[0] = in; Target[1] = 0; Target[2] = 0;
+       cmsPipelineEvalReverseFloat(Target, Result, Hint, Lut);
+
+       err = fabsf(in - Result[0]);
+       if (err > max) max = err;
+
+       memcpy(Hint, Result, sizeof(Hint));
+   }
+
+    cmsPipelineFree(Lut);
+    return (max <= FLOAT_PRECISSION);
+
+Error:
+    cmsPipelineFree(Lut);
+    return 0;
+}
+
+
+static
+cmsInt32Number CheckReverseInterpolation4x3(void)
+{
+ cmsPipeline* Lut;
+ cmsStage* clut;
+ cmsFloat32Number Target[4], Result[4], Hint[4];
+ cmsFloat32Number err, max;
+ cmsInt32Number i;
+
+ // 4 -> 3, output gets 3 first channels copied
+ cmsUInt16Number Table[] = {
+
+        0,         0,         0,          //  0 0 0 0   = ( 0, 0, 0)
+        0,         0,         0,          //  0 0 0 1   = ( 0, 0, 0)
+
+        0,         0,         0xffff,     //  0 0 1 0   = ( 0, 0, 1)
+        0,         0,         0xffff,     //  0 0 1 1   = ( 0, 0, 1)
+
+        0,         0xffff,    0,          //  0 1 0 0   = ( 0, 1, 0)
+        0,         0xffff,    0,          //  0 1 0 1   = ( 0, 1, 0)
+
+        0,         0xffff,    0xffff,     //  0 1 1 0    = ( 0, 1, 1)
+        0,         0xffff,    0xffff,     //  0 1 1 1    = ( 0, 1, 1)
+
+        0xffff,    0,         0,          //  1 0 0 0    = ( 1, 0, 0)
+        0xffff,    0,         0,          //  1 0 0 1    = ( 1, 0, 0)
+
+        0xffff,    0,         0xffff,     //  1 0 1 0    = ( 1, 0, 1)
+        0xffff,    0,         0xffff,     //  1 0 1 1    = ( 1, 0, 1)
+
+        0xffff,    0xffff,    0,          //  1 1 0 0    = ( 1, 1, 0)
+        0xffff,    0xffff,    0,          //  1 1 0 1    = ( 1, 1, 0)
+
+        0xffff,    0xffff,    0xffff,     //  1 1 1 0    = ( 1, 1, 1)
+        0xffff,    0xffff,    0xffff,     //  1 1 1 1    = ( 1, 1, 1)
+    };
+
+
+   Lut = cmsPipelineAlloc(DbgThread(), 4, 3);
+
+   clut = cmsStageAllocCLut16bit(DbgThread(), 2, 4, 3, Table);
+   cmsPipelineInsertStage(Lut, cmsAT_BEGIN, clut);
+
+   // Check if the LUT is behaving as expected
+   SubTest("4->3 feasibility");
+   for (i=0; i <= 100; i++) {
+
+       Target[0] = i / 100.0F;
+       Target[1] = Target[0];
+       Target[2] = 0;
+       Target[3] = 12;
+
+       cmsPipelineEvalFloat(Target, Result, Lut);
+
+       if (!IsGoodFixed15_16("0", Target[0], Result[0])) goto Error;
+       if (!IsGoodFixed15_16("1", Target[1], Result[1])) goto Error;
+       if (!IsGoodFixed15_16("2", Target[2], Result[2])) goto Error;
+   }
+
+   SubTest("4->3 zero");
+   Target[0] = 0;
+   Target[1] = 0;
+   Target[2] = 0;
+
+   // This one holds the fixed K
+   Target[3] = 0;
+
+   // This is our hint (which is a big lie in this case)
+   Hint[0] = 0.1F; Hint[1] = 0.1F; Hint[2] = 0.1F;
+
+   cmsPipelineEvalReverseFloat(Target, Result, Hint, Lut);
+
+   if (Result[0] != 0 || Result[1] != 0 || Result[2] != 0 || Result[3] != 0){
+
+       Fail("Reverse interpolation didn't find zero");
+       goto Error;
+   }
+
+   SubTest("4->3 find CMY");
+   max = 0;
+   for (i=0; i <= 100; i++) {
+
+       cmsFloat32Number in = i / 100.0F;
+
+       Target[0] = in; Target[1] = 0; Target[2] = 0;
+       cmsPipelineEvalReverseFloat(Target, Result, Hint, Lut);
+
+       err = fabsf(in - Result[0]);
+       if (err > max) max = err;
+
+       memcpy(Hint, Result, sizeof(Hint));
+   }
+
+    cmsPipelineFree(Lut);
+    return (max <= FLOAT_PRECISSION);
+
+Error:
+    cmsPipelineFree(Lut);
+    return 0;
+}
+
+
+
+// Check all interpolation.
+
+static
+cmsUInt16Number Fn8D1(cmsUInt16Number a1, cmsUInt16Number a2, cmsUInt16Number a3, cmsUInt16Number a4,
+                      cmsUInt16Number a5, cmsUInt16Number a6, cmsUInt16Number a7, cmsUInt16Number a8,
+                      cmsUInt32Number m)
+{
+    return (cmsUInt16Number) ((a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8) / m);
+}
+
+
+static
+cmsUInt16Number Fn8D2(cmsUInt16Number a1, cmsUInt16Number a2, cmsUInt16Number a3, cmsUInt16Number a4,
+                      cmsUInt16Number a5, cmsUInt16Number a6, cmsUInt16Number a7, cmsUInt16Number a8,
+                      cmsUInt32Number m)
+{
+    return (cmsUInt16Number) ((a1 + 3* a2 + 3* a3 + a4 + a5 + a6 + a7 + a8 ) / (m + 4));
+}
+
+
+static
+cmsUInt16Number Fn8D3(cmsUInt16Number a1, cmsUInt16Number a2, cmsUInt16Number a3, cmsUInt16Number a4,
+                      cmsUInt16Number a5, cmsUInt16Number a6, cmsUInt16Number a7, cmsUInt16Number a8,
+                      cmsUInt32Number m)
+{
+    return (cmsUInt16Number) ((3*a1 + 2*a2 + 3*a3 + a4 + a5 + a6 + a7 + a8) / (m + 5));
+}
+
+
+
+
+static
+cmsInt32Number Sampler3D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], 0, 0, 0, 0, 0, 3);
+    Out[1] = Fn8D2(In[0], In[1], In[2], 0, 0, 0, 0, 0, 3);
+    Out[2] = Fn8D3(In[0], In[1], In[2], 0, 0, 0, 0, 0, 3);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+
+}
+
+static
+cmsInt32Number Sampler4D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], In[3], 0, 0, 0, 0, 4);
+    Out[1] = Fn8D2(In[0], In[1], In[2], In[3], 0, 0, 0, 0, 4);
+    Out[2] = Fn8D3(In[0], In[1], In[2], In[3], 0, 0, 0, 0, 4);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+}
+
+static
+cmsInt32Number Sampler5D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], In[3], In[4], 0, 0, 0, 5);
+    Out[1] = Fn8D2(In[0], In[1], In[2], In[3], In[4], 0, 0, 0, 5);
+    Out[2] = Fn8D3(In[0], In[1], In[2], In[3], In[4], 0, 0, 0, 5);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+}
+
+static
+cmsInt32Number Sampler6D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], In[3], In[4], In[5], 0, 0, 6);
+    Out[1] = Fn8D2(In[0], In[1], In[2], In[3], In[4], In[5], 0, 0, 6);
+    Out[2] = Fn8D3(In[0], In[1], In[2], In[3], In[4], In[5], 0, 0, 6);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+}
+
+static
+cmsInt32Number Sampler7D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], In[3], In[4], In[5], In[6], 0, 7);
+    Out[1] = Fn8D2(In[0], In[1], In[2], In[3], In[4], In[5], In[6], 0, 7);
+    Out[2] = Fn8D3(In[0], In[1], In[2], In[3], In[4], In[5], In[6], 0, 7);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+}
+
+static
+cmsInt32Number Sampler8D(register const cmsUInt16Number In[],
+               register cmsUInt16Number Out[],
+               register void * Cargo)
+{
+
+    Out[0] = Fn8D1(In[0], In[1], In[2], In[3], In[4], In[5], In[6], In[7], 8);
+    Out[1] = Fn8D2(In[0], In[1], In[2], In[3], In[4], In[5], In[6], In[7], 8);
+    Out[2] = Fn8D3(In[0], In[1], In[2], In[3], In[4], In[5], In[6], In[7], 8);
+
+    return 1;
+
+    cmsUNUSED_PARAMETER(Cargo);
+}
+
+static
+cmsBool CheckOne3D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2, cmsUInt16Number a3)
+{
+    cmsUInt16Number In[3], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler3D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool CheckOne4D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2, cmsUInt16Number a3, cmsUInt16Number a4)
+{
+    cmsUInt16Number In[4], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3; In[3] = a4;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler4D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool CheckOne5D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2,
+                                     cmsUInt16Number a3, cmsUInt16Number a4, cmsUInt16Number a5)
+{
+    cmsUInt16Number In[5], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3; In[3] = a4; In[4] = a5;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler5D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool CheckOne6D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2,
+                                     cmsUInt16Number a3, cmsUInt16Number a4,
+                                     cmsUInt16Number a5, cmsUInt16Number a6)
+{
+    cmsUInt16Number In[6], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3; In[3] = a4; In[4] = a5; In[5] = a6;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler6D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+
+static
+cmsBool CheckOne7D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2,
+                                     cmsUInt16Number a3, cmsUInt16Number a4,
+                                     cmsUInt16Number a5, cmsUInt16Number a6,
+                                     cmsUInt16Number a7)
+{
+    cmsUInt16Number In[7], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3; In[3] = a4; In[4] = a5; In[5] = a6; In[6] = a7;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler7D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+
+static
+cmsBool CheckOne8D(cmsPipeline* lut, cmsUInt16Number a1, cmsUInt16Number a2,
+                                     cmsUInt16Number a3, cmsUInt16Number a4,
+                                     cmsUInt16Number a5, cmsUInt16Number a6,
+                                     cmsUInt16Number a7, cmsUInt16Number a8)
+{
+    cmsUInt16Number In[8], Out1[3], Out2[3];
+
+    In[0] = a1; In[1] = a2; In[2] = a3; In[3] = a4; In[4] = a5; In[5] = a6; In[6] = a7; In[7] = a8;
+
+    // This is the interpolated value
+    cmsPipelineEval16(In, Out1, lut);
+
+    // This is the real value
+    Sampler8D(In, Out2, NULL);
+
+    // Let's see the difference
+
+    if (!IsGoodWordPrec("Channel 1", Out1[0], Out2[0], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 2", Out1[1], Out2[1], 2)) return FALSE;
+    if (!IsGoodWordPrec("Channel 3", Out1[2], Out2[2], 2)) return FALSE;
+
+    return TRUE;
+}
+
+
+static
+cmsInt32Number Check3Dinterp(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+
+    lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    mpe = cmsStageAllocCLut16bit(DbgThread(), 9, 3, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler3D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne3D(lut, 0, 0, 0)) return 0;
+    if (!CheckOne3D(lut, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne3D(lut, 0x8080, 0x8080, 0x8080)) return 0;
+    if (!CheckOne3D(lut, 0x0000, 0xFE00, 0x80FF)) return 0;
+    if (!CheckOne3D(lut, 0x1111, 0x2222, 0x3333)) return 0;
+    if (!CheckOne3D(lut, 0x0000, 0x0012, 0x0013)) return 0;
+    if (!CheckOne3D(lut, 0x3141, 0x1415, 0x1592)) return 0;
+    if (!CheckOne3D(lut, 0xFF00, 0xFF01, 0xFF12)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+static
+cmsInt32Number Check3DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 7, 8, 9 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 3, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler3D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne3D(lut, 0, 0, 0)) return 0;
+    if (!CheckOne3D(lut, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne3D(lut, 0x8080, 0x8080, 0x8080)) return 0;
+    if (!CheckOne3D(lut, 0x0000, 0xFE00, 0x80FF)) return 0;
+    if (!CheckOne3D(lut, 0x1111, 0x2222, 0x3333)) return 0;
+    if (!CheckOne3D(lut, 0x0000, 0x0012, 0x0013)) return 0;
+    if (!CheckOne3D(lut, 0x3141, 0x1415, 0x1592)) return 0;
+    if (!CheckOne3D(lut, 0xFF00, 0xFF01, 0xFF12)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number Check4Dinterp(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+
+    lut = cmsPipelineAlloc(DbgThread(), 4, 3);
+    mpe = cmsStageAllocCLut16bit(DbgThread(), 9, 4, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler4D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne4D(lut, 0, 0, 0, 0)) return 0;
+    if (!CheckOne4D(lut, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne4D(lut, 0x8080, 0x8080, 0x8080, 0x8080)) return 0;
+    if (!CheckOne4D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888)) return 0;
+    if (!CheckOne4D(lut, 0x1111, 0x2222, 0x3333, 0x4444)) return 0;
+    if (!CheckOne4D(lut, 0x0000, 0x0012, 0x0013, 0x0014)) return 0;
+    if (!CheckOne4D(lut, 0x3141, 0x1415, 0x1592, 0x9261)) return 0;
+    if (!CheckOne4D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+
+
+static
+cmsInt32Number Check4DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 9, 8, 7, 6 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 4, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 4, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler4D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne4D(lut, 0, 0, 0, 0)) return 0;
+    if (!CheckOne4D(lut, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne4D(lut, 0x8080, 0x8080, 0x8080, 0x8080)) return 0;
+    if (!CheckOne4D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888)) return 0;
+    if (!CheckOne4D(lut, 0x1111, 0x2222, 0x3333, 0x4444)) return 0;
+    if (!CheckOne4D(lut, 0x0000, 0x0012, 0x0013, 0x0014)) return 0;
+    if (!CheckOne4D(lut, 0x3141, 0x1415, 0x1592, 0x9261)) return 0;
+    if (!CheckOne4D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number Check5DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 3, 2, 2, 2, 2 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 5, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 5, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler5D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne5D(lut, 0, 0, 0, 0, 0)) return 0;
+    if (!CheckOne5D(lut, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne5D(lut, 0x8080, 0x8080, 0x8080, 0x8080, 0x1234)) return 0;
+    if (!CheckOne5D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888, 0x8078)) return 0;
+    if (!CheckOne5D(lut, 0x1111, 0x2222, 0x3333, 0x4444, 0x1455)) return 0;
+    if (!CheckOne5D(lut, 0x0000, 0x0012, 0x0013, 0x0014, 0x2333)) return 0;
+    if (!CheckOne5D(lut, 0x3141, 0x1415, 0x1592, 0x9261, 0x4567)) return 0;
+    if (!CheckOne5D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13, 0xF344)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+static
+cmsInt32Number Check6DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 4, 3, 3, 2, 2, 2 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 6, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 6, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler6D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne6D(lut, 0, 0, 0, 0, 0, 0)) return 0;
+    if (!CheckOne6D(lut, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne6D(lut, 0x8080, 0x8080, 0x8080, 0x8080, 0x1234, 0x1122)) return 0;
+    if (!CheckOne6D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888, 0x8078, 0x2233)) return 0;
+    if (!CheckOne6D(lut, 0x1111, 0x2222, 0x3333, 0x4444, 0x1455, 0x3344)) return 0;
+    if (!CheckOne6D(lut, 0x0000, 0x0012, 0x0013, 0x0014, 0x2333, 0x4455)) return 0;
+    if (!CheckOne6D(lut, 0x3141, 0x1415, 0x1592, 0x9261, 0x4567, 0x5566)) return 0;
+    if (!CheckOne6D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13, 0xF344, 0x6677)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+static
+cmsInt32Number Check7DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 4, 3, 3, 2, 2, 2, 2 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 7, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 7, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler7D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne7D(lut, 0, 0, 0, 0, 0, 0, 0)) return 0;
+    if (!CheckOne7D(lut, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne7D(lut, 0x8080, 0x8080, 0x8080, 0x8080, 0x1234, 0x1122, 0x0056)) return 0;
+    if (!CheckOne7D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888, 0x8078, 0x2233, 0x0088)) return 0;
+    if (!CheckOne7D(lut, 0x1111, 0x2222, 0x3333, 0x4444, 0x1455, 0x3344, 0x1987)) return 0;
+    if (!CheckOne7D(lut, 0x0000, 0x0012, 0x0013, 0x0014, 0x2333, 0x4455, 0x9988)) return 0;
+    if (!CheckOne7D(lut, 0x3141, 0x1415, 0x1592, 0x9261, 0x4567, 0x5566, 0xfe56)) return 0;
+    if (!CheckOne7D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13, 0xF344, 0x6677, 0xbabe)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number Check8DinterpGranular(void)
+{
+    cmsPipeline* lut;
+    cmsStage* mpe;
+    cmsUInt32Number Dimensions[] = { 4, 3, 3, 2, 2, 2, 2, 2 };
+
+    lut = cmsPipelineAlloc(DbgThread(), 8, 3);
+    mpe = cmsStageAllocCLut16bitGranular(DbgThread(), Dimensions, 8, 3, NULL);
+    cmsStageSampleCLut16bit(mpe, Sampler8D, NULL, 0);
+    cmsPipelineInsertStage(lut, cmsAT_BEGIN, mpe);
+
+    // Check accuracy
+
+    if (!CheckOne8D(lut, 0, 0, 0, 0, 0, 0, 0, 0)) return 0;
+    if (!CheckOne8D(lut, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff)) return 0;
+
+    if (!CheckOne8D(lut, 0x8080, 0x8080, 0x8080, 0x8080, 0x1234, 0x1122, 0x0056, 0x0011)) return 0;
+    if (!CheckOne8D(lut, 0x0000, 0xFE00, 0x80FF, 0x8888, 0x8078, 0x2233, 0x0088, 0x2020)) return 0;
+    if (!CheckOne8D(lut, 0x1111, 0x2222, 0x3333, 0x4444, 0x1455, 0x3344, 0x1987, 0x4532)) return 0;
+    if (!CheckOne8D(lut, 0x0000, 0x0012, 0x0013, 0x0014, 0x2333, 0x4455, 0x9988, 0x1200)) return 0;
+    if (!CheckOne8D(lut, 0x3141, 0x1415, 0x1592, 0x9261, 0x4567, 0x5566, 0xfe56, 0x6666)) return 0;
+    if (!CheckOne8D(lut, 0xFF00, 0xFF01, 0xFF12, 0xFF13, 0xF344, 0x6677, 0xbabe, 0xface)) return 0;
+
+    cmsPipelineFree(lut);
+
+    return 1;
+}
+
+// Colorimetric conversions -------------------------------------------------------------------------------------------------
+
+// Lab to LCh and back should be performed at 1E-12 accuracy at least
+static
+cmsInt32Number CheckLab2LCh(void)
+{
+    cmsInt32Number l, a, b;
+    cmsFloat64Number dist, Max = 0;
+    cmsCIELab Lab, Lab2;
+    cmsCIELCh LCh;
+
+    for (l=0; l <= 100; l += 10) {
+
+        for (a=-128; a <= +128; a += 8) {
+
+            for (b=-128; b <= 128; b += 8) {
+
+                Lab.L = l;
+                Lab.a = a;
+                Lab.b = b;
+
+                cmsLab2LCh(&LCh, &Lab);
+                cmsLCh2Lab(&Lab2, &LCh);
+
+                dist = cmsDeltaE(&Lab, &Lab2);
+                if (dist > Max) Max = dist;
+            }
+        }
+    }
+
+    return Max < 1E-12;
+}
+
+// Lab to LCh and back should be performed at 1E-12 accuracy at least
+static
+cmsInt32Number CheckLab2XYZ(void)
+{
+    cmsInt32Number l, a, b;
+    cmsFloat64Number dist, Max = 0;
+    cmsCIELab Lab, Lab2;
+    cmsCIEXYZ XYZ;
+
+    for (l=0; l <= 100; l += 10) {
+
+        for (a=-128; a <= +128; a += 8) {
+
+            for (b=-128; b <= 128; b += 8) {
+
+                Lab.L = l;
+                Lab.a = a;
+                Lab.b = b;
+
+                cmsLab2XYZ(NULL, &XYZ, &Lab);
+                cmsXYZ2Lab(NULL, &Lab2, &XYZ);
+
+                dist = cmsDeltaE(&Lab, &Lab2);
+                if (dist > Max) Max = dist;
+
+            }
+        }
+    }
+
+    return Max < 1E-12;
+}
+
+// Lab to xyY and back should be performed at 1E-12 accuracy at least
+static
+cmsInt32Number CheckLab2xyY(void)
+{
+    cmsInt32Number l, a, b;
+    cmsFloat64Number dist, Max = 0;
+    cmsCIELab Lab, Lab2;
+    cmsCIEXYZ XYZ;
+    cmsCIExyY xyY;
+
+    for (l=0; l <= 100; l += 10) {
+
+        for (a=-128; a <= +128; a += 8) {
+
+            for (b=-128; b <= 128; b += 8) {
+
+                Lab.L = l;
+                Lab.a = a;
+                Lab.b = b;
+
+                cmsLab2XYZ(NULL, &XYZ, &Lab);
+                cmsXYZ2xyY(&xyY, &XYZ);
+                cmsxyY2XYZ(&XYZ, &xyY);
+                cmsXYZ2Lab(NULL, &Lab2, &XYZ);
+
+                dist = cmsDeltaE(&Lab, &Lab2);
+                if (dist > Max) Max = dist;
+
+            }
+        }
+    }
+
+    return Max < 1E-12;
+}
+
+
+static
+cmsInt32Number CheckLabV2encoding(void)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt16Number Inw[3], aw[3];
+    cmsCIELab Lab;
+
+    n2=0;
+
+    for (j=0; j < 65535; j++) {
+
+        Inw[0] = Inw[1] = Inw[2] = (cmsUInt16Number) j;
+
+        cmsLabEncoded2FloatV2(&Lab, Inw);
+        cmsFloat2LabEncodedV2(aw, &Lab);
+
+        for (i=0; i < 3; i++) {
+
+        if (aw[i] != j) {
+            n2++;
+        }
+        }
+
+    }
+
+    return (n2 == 0);
+}
+
+static
+cmsInt32Number CheckLabV4encoding(void)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt16Number Inw[3], aw[3];
+    cmsCIELab Lab;
+
+    n2=0;
+
+    for (j=0; j < 65535; j++) {
+
+        Inw[0] = Inw[1] = Inw[2] = (cmsUInt16Number) j;
+
+        cmsLabEncoded2Float(&Lab, Inw);
+        cmsFloat2LabEncoded(aw, &Lab);
+
+        for (i=0; i < 3; i++) {
+
+        if (aw[i] != j) {
+            n2++;
+        }
+        }
+
+    }
+
+    return (n2 == 0);
+}
+
+
+// BlackBody -----------------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckTemp2CHRM(void)
+{
+    cmsInt32Number j;
+    cmsFloat64Number d, v, Max = 0;
+    cmsCIExyY White;
+
+    for (j=4000; j < 25000; j++) {
+
+        cmsWhitePointFromTemp(&White, j);
+        if (!cmsTempFromWhitePoint(&v, &White)) return 0;
+
+        d = fabs(v - j);
+        if (d > Max) Max = d;
+    }
+
+    // 100 degree is the actual resolution
+    return (Max < 100);
+}
+
+
+
+// Tone curves -----------------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckGammaEstimation(cmsToneCurve* c, cmsFloat64Number g)
+{
+    cmsFloat64Number est = cmsEstimateGamma(c, 0.001);
+
+    SubTest("Gamma estimation");
+    if (fabs(est - g) > 0.001) return 0;
+    return 1;
+}
+
+static
+cmsInt32Number CheckGammaCreation16(void)
+{
+    cmsToneCurve* LinGamma = cmsBuildGamma(DbgThread(), 1.0);
+    cmsInt32Number i;
+    cmsUInt16Number in, out;
+
+    for (i=0; i < 0xffff; i++) {
+
+        in = (cmsUInt16Number) i;
+        out = cmsEvalToneCurve16(LinGamma, in);
+        if (in != out) {
+            Fail("(lin gamma): Must be %x, But is %x : ", in, out);
+            cmsFreeToneCurve(LinGamma);
+            return 0;
+        }
+    }
+
+    if (!CheckGammaEstimation(LinGamma, 1.0)) return 0;
+
+    cmsFreeToneCurve(LinGamma);
+    return 1;
+
+}
+
+static
+cmsInt32Number CheckGammaCreationFlt(void)
+{
+    cmsToneCurve* LinGamma = cmsBuildGamma(DbgThread(), 1.0);
+    cmsInt32Number i;
+    cmsFloat32Number in, out;
+
+    for (i=0; i < 0xffff; i++) {
+
+        in = (cmsFloat32Number) (i / 65535.0);
+        out = cmsEvalToneCurveFloat(LinGamma, in);
+        if (fabs(in - out) > (1/65535.0)) {
+            Fail("(lin gamma): Must be %f, But is %f : ", in, out);
+            cmsFreeToneCurve(LinGamma);
+            return 0;
+        }
+    }
+
+    if (!CheckGammaEstimation(LinGamma, 1.0)) return 0;
+    cmsFreeToneCurve(LinGamma);
+    return 1;
+}
+
+// Curve curves using a single power function
+// Error is given in 0..ffff counts
+static
+cmsInt32Number CheckGammaFloat(cmsFloat64Number g)
+{
+    cmsToneCurve* Curve = cmsBuildGamma(DbgThread(), g);
+    cmsInt32Number i;
+    cmsFloat32Number in, out;
+    cmsFloat64Number val, Err;
+
+    MaxErr = 0.0;
+    for (i=0; i < 0xffff; i++) {
+
+        in = (cmsFloat32Number) (i / 65535.0);
+        out = cmsEvalToneCurveFloat(Curve, in);
+        val = pow((cmsFloat64Number) in, g);
+
+        Err = fabs( val - out);
+        if (Err > MaxErr) MaxErr = Err;
+    }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr * 65535.0);
+
+    if (!CheckGammaEstimation(Curve, g)) return 0;
+
+    cmsFreeToneCurve(Curve);
+    return 1;
+}
+
+static cmsInt32Number CheckGamma18(void)
+{
+    return CheckGammaFloat(1.8);
+}
+
+static cmsInt32Number CheckGamma22(void)
+{
+    return CheckGammaFloat(2.2);
+}
+
+static cmsInt32Number CheckGamma30(void)
+{
+    return CheckGammaFloat(3.0);
+}
+
+
+// Check table-based gamma functions
+static
+cmsInt32Number CheckGammaFloatTable(cmsFloat64Number g)
+{
+    cmsFloat32Number Values[1025];
+    cmsToneCurve* Curve;
+    cmsInt32Number i;
+    cmsFloat32Number in, out;
+    cmsFloat64Number val, Err;
+
+    for (i=0; i <= 1024; i++) {
+
+        in = (cmsFloat32Number) (i / 1024.0);
+        Values[i] = powf(in, (float) g);
+    }
+
+    Curve = cmsBuildTabulatedToneCurveFloat(DbgThread(), 1025, Values);
+
+    MaxErr = 0.0;
+    for (i=0; i <= 0xffff; i++) {
+
+        in = (cmsFloat32Number) (i / 65535.0);
+        out = cmsEvalToneCurveFloat(Curve, in);
+        val = pow(in, g);
+
+        Err = fabs(val - out);
+        if (Err > MaxErr) MaxErr = Err;
+    }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr * 65535.0);
+
+    if (!CheckGammaEstimation(Curve, g)) return 0;
+
+    cmsFreeToneCurve(Curve);
+    return 1;
+}
+
+
+static cmsInt32Number CheckGamma18Table(void)
+{
+    return CheckGammaFloatTable(1.8);
+}
+
+static cmsInt32Number CheckGamma22Table(void)
+{
+    return CheckGammaFloatTable(2.2);
+}
+
+static cmsInt32Number CheckGamma30Table(void)
+{
+    return CheckGammaFloatTable(3.0);
+}
+
+// Create a curve from a table (which is a pure gamma function) and check it against the pow function.
+static
+cmsInt32Number CheckGammaWordTable(cmsFloat64Number g)
+{
+    cmsUInt16Number Values[1025];
+    cmsToneCurve* Curve;
+    cmsInt32Number i;
+    cmsFloat32Number in, out;
+    cmsFloat64Number val, Err;
+
+    for (i=0; i <= 1024; i++) {
+
+        in = (cmsFloat32Number) (i / 1024.0);
+        Values[i] = (cmsUInt16Number) floor(pow(in, g) * 65535.0 + 0.5);
+    }
+
+    Curve = cmsBuildTabulatedToneCurve16(DbgThread(), 1025, Values);
+
+    MaxErr = 0.0;
+    for (i=0; i <= 0xffff; i++) {
+
+        in = (cmsFloat32Number) (i / 65535.0);
+        out = cmsEvalToneCurveFloat(Curve, in);
+        val = pow(in, g);
+
+        Err = fabs(val - out);
+        if (Err > MaxErr) MaxErr = Err;
+    }
+
+    if (MaxErr > 0) printf("|Err|<%lf ", MaxErr * 65535.0);
+
+    if (!CheckGammaEstimation(Curve, g)) return 0;
+
+    cmsFreeToneCurve(Curve);
+    return 1;
+}
+
+static cmsInt32Number CheckGamma18TableWord(void)
+{
+    return CheckGammaWordTable(1.8);
+}
+
+static cmsInt32Number CheckGamma22TableWord(void)
+{
+    return CheckGammaWordTable(2.2);
+}
+
+static cmsInt32Number CheckGamma30TableWord(void)
+{
+    return CheckGammaWordTable(3.0);
+}
+
+
+// Curve joining test. Joining two high-gamma of 3.0 curves should
+// give something like linear
+static
+cmsInt32Number CheckJointCurves(void)
+{
+    cmsToneCurve *Forward, *Reverse, *Result;
+    cmsBool  rc;
+
+    Forward = cmsBuildGamma(DbgThread(), 3.0);
+    Reverse = cmsBuildGamma(DbgThread(), 3.0);
+
+    Result = cmsJoinToneCurve(DbgThread(), Forward, Reverse, 256);
+
+    cmsFreeToneCurve(Forward); cmsFreeToneCurve(Reverse);
+
+    rc = cmsIsToneCurveLinear(Result);
+    cmsFreeToneCurve(Result);
+
+    if (!rc)
+        Fail("Joining same curve twice does not result in a linear ramp");
+
+    return rc;
+}
+
+
+// Create a gamma curve by cheating the table
+static
+cmsToneCurve* GammaTableLinear(cmsInt32Number nEntries, cmsBool Dir)
+{
+    cmsInt32Number i;
+    cmsToneCurve* g = cmsBuildTabulatedToneCurve16(DbgThread(), nEntries, NULL);
+
+    for (i=0; i < nEntries; i++) {
+
+        cmsInt32Number v = _cmsQuantizeVal(i, nEntries);
+
+        if (Dir)
+            g->Table16[i] = (cmsUInt16Number) v;
+        else
+            g->Table16[i] = (cmsUInt16Number) (0xFFFF - v);
+    }
+
+    return g;
+}
+
+
+static
+cmsInt32Number CheckJointCurvesDescending(void)
+{
+    cmsToneCurve *Forward, *Reverse, *Result;
+    cmsInt32Number i, rc;
+
+     Forward = cmsBuildGamma(DbgThread(), 2.2);
+
+    // Fake the curve to be table-based
+
+    for (i=0; i < 4096; i++)
+        Forward ->Table16[i] = 0xffff - Forward->Table16[i];
+    Forward ->Segments[0].Type = 0;
+
+    Reverse = cmsReverseToneCurve(Forward);
+
+    Result = cmsJoinToneCurve(DbgThread(), Reverse, Reverse, 256);
+
+    cmsFreeToneCurve(Forward);
+    cmsFreeToneCurve(Reverse);
+
+    rc = cmsIsToneCurveLinear(Result);
+    cmsFreeToneCurve(Result);
+
+    return rc;
+}
+
+
+static
+cmsInt32Number CheckFToneCurvePoint(cmsToneCurve* c, cmsUInt16Number Point, cmsInt32Number Value)
+{
+    cmsInt32Number Result;
+
+    Result = cmsEvalToneCurve16(c, Point);
+
+    return (abs(Value - Result) < 2);
+}
+
+static
+cmsInt32Number CheckReverseDegenerated(void)
+{
+    cmsToneCurve* p, *g;
+    cmsUInt16Number Tab[16];
+
+    Tab[0] = 0;
+    Tab[1] = 0;
+    Tab[2] = 0;
+    Tab[3] = 0;
+    Tab[4] = 0;
+    Tab[5] = 0x5555;
+    Tab[6] = 0x6666;
+    Tab[7] = 0x7777;
+    Tab[8] = 0x8888;
+    Tab[9] = 0x9999;
+    Tab[10]= 0xffff;
+    Tab[11]= 0xffff;
+    Tab[12]= 0xffff;
+    Tab[13]= 0xffff;
+    Tab[14]= 0xffff;
+    Tab[15]= 0xffff;
+
+    p = cmsBuildTabulatedToneCurve16(DbgThread(), 16, Tab);
+    g = cmsReverseToneCurve(p);
+
+    // Now let's check some points
+    if (!CheckFToneCurvePoint(g, 0x5555, 0x5555)) return 0;
+    if (!CheckFToneCurvePoint(g, 0x7777, 0x7777)) return 0;
+
+    // First point for zero
+    if (!CheckFToneCurvePoint(g, 0x0000, 0x4444)) return 0;
+
+    // Last point
+    if (!CheckFToneCurvePoint(g, 0xFFFF, 0xFFFF)) return 0;
+
+    cmsFreeToneCurve(p);
+    cmsFreeToneCurve(g);
+
+    return 1;
+}
+
+
+// Build a parametric sRGB-like curve
+static
+cmsToneCurve* Build_sRGBGamma(void)
+{
+    cmsFloat64Number Parameters[5];
+
+    Parameters[0] = 2.4;
+    Parameters[1] = 1. / 1.055;
+    Parameters[2] = 0.055 / 1.055;
+    Parameters[3] = 1. / 12.92;
+    Parameters[4] = 0.04045;    // d
+
+    return cmsBuildParametricToneCurve(DbgThread(), 4, Parameters);
+}
+
+
+
+// Join two gamma tables in floating point format. Result should be a straight line
+static
+cmsToneCurve* CombineGammaFloat(cmsToneCurve* g1, cmsToneCurve* g2)
+{
+    cmsUInt16Number Tab[256];
+    cmsFloat32Number f;
+    cmsInt32Number i;
+
+    for (i=0; i < 256; i++) {
+
+        f = (cmsFloat32Number) i / 255.0F;
+        f = cmsEvalToneCurveFloat(g2, cmsEvalToneCurveFloat(g1, f));
+
+        Tab[i] = (cmsUInt16Number) floor(f * 65535.0 + 0.5);
+    }
+
+    return  cmsBuildTabulatedToneCurve16(DbgThread(), 256, Tab);
+}
+
+// Same of anterior, but using quantized tables
+static
+cmsToneCurve* CombineGamma16(cmsToneCurve* g1, cmsToneCurve* g2)
+{
+    cmsUInt16Number Tab[256];
+
+    cmsInt32Number i;
+
+    for (i=0; i < 256; i++) {
+
+        cmsUInt16Number wValIn;
+
+        wValIn = _cmsQuantizeVal(i, 256);
+        Tab[i] = cmsEvalToneCurve16(g2, cmsEvalToneCurve16(g1, wValIn));
+    }
+
+    return  cmsBuildTabulatedToneCurve16(DbgThread(), 256, Tab);
+}
+
+static
+cmsInt32Number CheckJointFloatCurves_sRGB(void)
+{
+    cmsToneCurve *Forward, *Reverse, *Result;
+    cmsBool  rc;
+
+    Forward = Build_sRGBGamma();
+    Reverse = cmsReverseToneCurve(Forward);
+    Result = CombineGammaFloat(Forward, Reverse);
+    cmsFreeToneCurve(Forward); cmsFreeToneCurve(Reverse);
+
+    rc = cmsIsToneCurveLinear(Result);
+    cmsFreeToneCurve(Result);
+
+    return rc;
+}
+
+static
+cmsInt32Number CheckJoint16Curves_sRGB(void)
+{
+    cmsToneCurve *Forward, *Reverse, *Result;
+    cmsBool  rc;
+
+    Forward = Build_sRGBGamma();
+    Reverse = cmsReverseToneCurve(Forward);
+    Result = CombineGamma16(Forward, Reverse);
+    cmsFreeToneCurve(Forward); cmsFreeToneCurve(Reverse);
+
+    rc = cmsIsToneCurveLinear(Result);
+    cmsFreeToneCurve(Result);
+
+    return rc;
+}
+
+// sigmoidal curve f(x) = (1-x^g) ^(1/g)
+
+static
+cmsInt32Number CheckJointCurvesSShaped(void)
+{
+    cmsFloat64Number p = 3.2;
+    cmsToneCurve *Forward, *Reverse, *Result;
+    cmsInt32Number rc;
+
+    Forward = cmsBuildParametricToneCurve(DbgThread(), 108, &p);
+    Reverse = cmsReverseToneCurve(Forward);
+    Result = cmsJoinToneCurve(DbgThread(), Forward, Forward, 4096);
+
+    cmsFreeToneCurve(Forward);
+    cmsFreeToneCurve(Reverse);
+
+    rc = cmsIsToneCurveLinear(Result);
+    cmsFreeToneCurve(Result);
+    return rc;
+}
+
+
+// --------------------------------------------------------------------------------------------------------
+
+// Implementation of some tone curve functions
+static
+cmsFloat32Number Gamma(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    return (cmsFloat32Number) pow(x, Params[0]);
+}
+
+static
+cmsFloat32Number CIE122(cmsFloat32Number x, const cmsFloat64Number Params[])
+
+{
+    cmsFloat64Number e, Val;
+
+    if (x >= -Params[2] / Params[1]) {
+
+        e = Params[1]*x + Params[2];
+
+        if (e > 0)
+            Val = pow(e, Params[0]);
+        else
+            Val = 0;
+    }
+    else
+        Val = 0;
+
+    return (cmsFloat32Number) Val;
+}
+
+static
+cmsFloat32Number IEC61966_3(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number e, Val;
+
+    if (x >= -Params[2] / Params[1]) {
+
+        e = Params[1]*x + Params[2];
+
+        if (e > 0)
+            Val = pow(e, Params[0]) + Params[3];
+        else
+            Val = 0;
+    }
+    else
+        Val = Params[3];
+
+    return (cmsFloat32Number) Val;
+}
+
+static
+cmsFloat32Number IEC61966_21(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number e, Val;
+
+    if (x >= Params[4]) {
+
+        e = Params[1]*x + Params[2];
+
+        if (e > 0)
+            Val = pow(e, Params[0]);
+        else
+            Val = 0;
+    }
+    else
+        Val = x * Params[3];
+
+    return (cmsFloat32Number) Val;
+}
+
+static
+cmsFloat32Number param_5(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number e, Val;
+    // Y = (aX + b)^Gamma + e | X >= d
+    // Y = cX + f             | else
+    if (x >= Params[4]) {
+
+        e = Params[1]*x + Params[2];
+        if (e > 0)
+            Val = pow(e, Params[0]) + Params[5];
+        else
+            Val = 0;
+    }
+    else
+        Val = x*Params[3] + Params[6];
+
+    return (cmsFloat32Number) Val;
+}
+
+static
+cmsFloat32Number param_6(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number e, Val;
+
+    e = Params[1]*x + Params[2];
+    if (e > 0)
+        Val = pow(e, Params[0]) + Params[3];
+    else
+        Val = 0;
+
+    return (cmsFloat32Number) Val;
+}
+
+static
+cmsFloat32Number param_7(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number Val;
+
+
+    Val = Params[1]*log10(Params[2] * pow(x, Params[0]) + Params[3]) + Params[4];
+
+    return (cmsFloat32Number) Val;
+}
+
+
+static
+cmsFloat32Number param_8(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number Val;
+
+    Val = (Params[0] * pow(Params[1], Params[2] * x + Params[3]) + Params[4]);
+
+    return (cmsFloat32Number) Val;
+}
+
+
+static
+cmsFloat32Number sigmoidal(cmsFloat32Number x, const cmsFloat64Number Params[])
+{
+    cmsFloat64Number Val;
+
+    Val = pow(1.0 - pow(1 - x, 1/Params[0]), 1/Params[0]);
+
+    return (cmsFloat32Number) Val;
+}
+
+
+static
+cmsBool CheckSingleParametric(const char* Name, dblfnptr fn, cmsInt32Number Type, const cmsFloat64Number Params[])
+{
+    cmsInt32Number i;
+    cmsToneCurve* tc;
+    cmsToneCurve* tc_1;
+    char InverseText[256];
+
+    tc = cmsBuildParametricToneCurve(DbgThread(), Type, Params);
+    tc_1 = cmsBuildParametricToneCurve(DbgThread(), -Type, Params);
+
+    for (i=0; i <= 1000; i++) {
+
+        cmsFloat32Number x = (cmsFloat32Number) i / 1000;
+        cmsFloat32Number y_fn, y_param, x_param, y_param2;
+
+        y_fn = fn(x, Params);
+        y_param = cmsEvalToneCurveFloat(tc, x);
+        x_param = cmsEvalToneCurveFloat(tc_1, y_param);
+
+        y_param2 = fn(x_param, Params);
+
+        if (!IsGoodVal(Name, y_fn, y_param, FIXED_PRECISION_15_16))
+            goto Error;
+
+        sprintf(InverseText, "Inverse %s", Name);
+        if (!IsGoodVal(InverseText, y_fn, y_param2, FIXED_PRECISION_15_16))
+            goto Error;
+    }
+
+    cmsFreeToneCurve(tc);
+    cmsFreeToneCurve(tc_1);
+    return TRUE;
+
+Error:
+    cmsFreeToneCurve(tc);
+    cmsFreeToneCurve(tc_1);
+    return FALSE;
+}
+
+// Check against some known values
+static
+cmsInt32Number CheckParametricToneCurves(void)
+{
+    cmsFloat64Number Params[10];
+
+     // 1) X = Y ^ Gamma
+
+     Params[0] = 2.2;
+
+     if (!CheckSingleParametric("Gamma", Gamma, 1, Params)) return 0;
+
+     // 2) CIE 122-1966
+     // Y = (aX + b)^Gamma  | X >= -b/a
+     // Y = 0               | else
+
+     Params[0] = 2.2;
+     Params[1] = 1.5;
+     Params[2] = -0.5;
+
+     if (!CheckSingleParametric("CIE122-1966", CIE122, 2, Params)) return 0;
+
+     // 3) IEC 61966-3
+     // Y = (aX + b)^Gamma | X <= -b/a
+     // Y = c              | else
+
+     Params[0] = 2.2;
+     Params[1] = 1.5;
+     Params[2] = -0.5;
+     Params[3] = 0.3;
+
+
+     if (!CheckSingleParametric("IEC 61966-3", IEC61966_3, 3, Params)) return 0;
+
+     // 4) IEC 61966-2.1 (sRGB)
+     // Y = (aX + b)^Gamma | X >= d
+     // Y = cX             | X < d
+
+     Params[0] = 2.4;
+     Params[1] = 1. / 1.055;
+     Params[2] = 0.055 / 1.055;
+     Params[3] = 1. / 12.92;
+     Params[4] = 0.04045;
+
+     if (!CheckSingleParametric("IEC 61966-2.1", IEC61966_21, 4, Params)) return 0;
+
+
+     // 5) Y = (aX + b)^Gamma + e | X >= d
+     // Y = cX + f             | else
+
+     Params[0] = 2.2;
+     Params[1] = 0.7;
+     Params[2] = 0.2;
+     Params[3] = 0.3;
+     Params[4] = 0.1;
+     Params[5] = 0.5;
+     Params[6] = 0.2;
+
+     if (!CheckSingleParametric("param_5", param_5, 5, Params)) return 0;
+
+     // 6) Y = (aX + b) ^ Gamma + c
+
+     Params[0] = 2.2;
+     Params[1] = 0.7;
+     Params[2] = 0.2;
+     Params[3] = 0.3;
+
+     if (!CheckSingleParametric("param_6", param_6, 6, Params)) return 0;
+
+     // 7) Y = a * log (b * X^Gamma + c) + d
+
+     Params[0] = 2.2;
+     Params[1] = 0.9;
+     Params[2] = 0.9;
+     Params[3] = 0.02;
+     Params[4] = 0.1;
+
+     if (!CheckSingleParametric("param_7", param_7, 7, Params)) return 0;
+
+     // 8) Y = a * b ^ (c*X+d) + e
+
+     Params[0] = 0.9;
+     Params[1] = 0.9;
+     Params[2] = 1.02;
+     Params[3] = 0.1;
+     Params[4] = 0.2;
+
+     if (!CheckSingleParametric("param_8", param_8, 8, Params)) return 0;
+
+     // 108: S-Shaped: (1 - (1-x)^1/g)^1/g
+
+     Params[0] = 1.9;
+     if (!CheckSingleParametric("sigmoidal", sigmoidal, 108, Params)) return 0;
+
+     // All OK
+
+     return 1;
+}
+
+// LUT checks ------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckLUTcreation(void)
+{
+    cmsPipeline* lut;
+    cmsPipeline* lut2;
+    cmsInt32Number n1, n2;
+
+    lut = cmsPipelineAlloc(DbgThread(), 1, 1);
+    n1 = cmsPipelineStageCount(lut);
+    lut2 = cmsPipelineDup(lut);
+    n2 = cmsPipelineStageCount(lut2);
+
+    cmsPipelineFree(lut);
+    cmsPipelineFree(lut2);
+
+    return (n1 == 0) && (n2 == 0);
+}
+
+// Create a MPE for a identity matrix
+static
+void AddIdentityMatrix(cmsPipeline* lut)
+{
+    const cmsFloat64Number Identity[] = { 1, 0, 0,
+                          0, 1, 0,
+                          0, 0, 1,
+                          0, 0, 0 };
+
+    cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocMatrix(DbgThread(), 3, 3, Identity, NULL));
+}
+
+// Create a MPE for identity cmsFloat32Number CLUT
+static
+void AddIdentityCLUTfloat(cmsPipeline* lut)
+{
+    const cmsFloat32Number  Table[] = {
+
+        0,    0,    0,
+        0,    0,    1.0,
+
+        0,    1.0,    0,
+        0,    1.0,    1.0,
+
+        1.0,    0,    0,
+        1.0,    0,    1.0,
+
+        1.0,    1.0,    0,
+        1.0,    1.0,    1.0
+    };
+
+    cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocCLutFloat(DbgThread(), 2, 3, 3, Table));
+}
+
+// Create a MPE for identity cmsFloat32Number CLUT
+static
+void AddIdentityCLUT16(cmsPipeline* lut)
+{
+    const cmsUInt16Number Table[] = {
+
+        0,    0,    0,
+        0,    0,    0xffff,
+
+        0,    0xffff,    0,
+        0,    0xffff,    0xffff,
+
+        0xffff,    0,    0,
+        0xffff,    0,    0xffff,
+
+        0xffff,    0xffff,    0,
+        0xffff,    0xffff,    0xffff
+    };
+
+
+    cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocCLut16bit(DbgThread(), 2, 3, 3, Table));
+}
+
+
+// Create a 3 fn identity curves
+
+static
+void Add3GammaCurves(cmsPipeline* lut, cmsFloat64Number Curve)
+{
+    cmsToneCurve* id = cmsBuildGamma(DbgThread(), Curve);
+    cmsToneCurve* id3[3];
+
+    id3[0] = id;
+    id3[1] = id;
+    id3[2] = id;
+
+    cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(DbgThread(), 3, id3));
+
+    cmsFreeToneCurve(id);
+}
+
+
+static
+cmsInt32Number CheckFloatLUT(cmsPipeline* lut)
+{
+    cmsInt32Number n1, i, j;
+    cmsFloat32Number Inf[3], Outf[3];
+
+    n1=0;
+
+    for (j=0; j < 65535; j++) {
+
+        cmsInt32Number af[3];
+
+        Inf[0] = Inf[1] = Inf[2] = (cmsFloat32Number) j / 65535.0F;
+        cmsPipelineEvalFloat(Inf, Outf, lut);
+
+        af[0] = (cmsInt32Number) floor(Outf[0]*65535.0 + 0.5);
+        af[1] = (cmsInt32Number) floor(Outf[1]*65535.0 + 0.5);
+        af[2] = (cmsInt32Number) floor(Outf[2]*65535.0 + 0.5);
+
+        for (i=0; i < 3; i++) {
+
+            if (af[i] != j) {
+                n1++;
+            }
+        }
+
+    }
+
+    return (n1 == 0);
+}
+
+
+static
+cmsInt32Number Check16LUT(cmsPipeline* lut)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt16Number Inw[3], Outw[3];
+
+    n2=0;
+
+    for (j=0; j < 65535; j++) {
+
+        cmsInt32Number aw[3];
+
+        Inw[0] = Inw[1] = Inw[2] = (cmsUInt16Number) j;
+        cmsPipelineEval16(Inw, Outw, lut);
+        aw[0] = Outw[0];
+        aw[1] = Outw[1];
+        aw[2] = Outw[2];
+
+        for (i=0; i < 3; i++) {
+
+        if (aw[i] != j) {
+            n2++;
+        }
+        }
+
+    }
+
+    return (n2 == 0);
+}
+
+
+// Check any LUT that is linear
+static
+cmsInt32Number CheckStagesLUT(cmsPipeline* lut, cmsInt32Number ExpectedStages)
+{
+
+    cmsInt32Number nInpChans, nOutpChans, nStages;
+
+    nInpChans  = cmsPipelineInputChannels(lut);
+    nOutpChans = cmsPipelineOutputChannels(lut);
+    nStages    = cmsPipelineStageCount(lut);
+
+    return (nInpChans == 3) && (nOutpChans == 3) && (nStages == ExpectedStages);
+}
+
+
+static
+cmsInt32Number CheckFullLUT(cmsPipeline* lut, cmsInt32Number ExpectedStages)
+{
+    cmsInt32Number rc = CheckStagesLUT(lut, ExpectedStages) && Check16LUT(lut) && CheckFloatLUT(lut);
+
+    cmsPipelineFree(lut);
+    return rc;
+}
+
+
+static
+cmsInt32Number Check1StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    return CheckFullLUT(lut, 1);
+}
+
+
+
+static
+cmsInt32Number Check2StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUTfloat(lut);
+
+    return CheckFullLUT(lut, 2);
+}
+
+static
+cmsInt32Number Check2Stage16LUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUT16(lut);
+
+    return CheckFullLUT(lut, 2);
+}
+
+
+
+static
+cmsInt32Number Check3StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUTfloat(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 3);
+}
+
+static
+cmsInt32Number Check3Stage16LUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUT16(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 3);
+}
+
+
+
+static
+cmsInt32Number Check4StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUTfloat(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+
+    return CheckFullLUT(lut, 4);
+}
+
+static
+cmsInt32Number Check4Stage16LUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUT16(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+
+    return CheckFullLUT(lut, 4);
+}
+
+static
+cmsInt32Number Check5StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUTfloat(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 5);
+}
+
+
+static
+cmsInt32Number Check5Stage16LUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    AddIdentityCLUT16(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 5);
+}
+
+static
+cmsInt32Number Check6StageLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityCLUTfloat(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 6);
+}
+
+static
+cmsInt32Number Check6Stage16LUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityCLUT16(lut);
+    Add3GammaCurves(lut, 1.0);
+    AddIdentityMatrix(lut);
+    Add3GammaCurves(lut, 1.0);
+
+    return CheckFullLUT(lut, 6);
+}
+
+
+static
+cmsInt32Number CheckLab2LabLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    cmsInt32Number rc;
+
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocLab2XYZ(DbgThread()));
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocXYZ2Lab(DbgThread()));
+
+    rc = CheckFloatLUT(lut) && CheckStagesLUT(lut, 2);
+
+    cmsPipelineFree(lut);
+
+    return rc;
+}
+
+
+static
+cmsInt32Number CheckXYZ2XYZLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    cmsInt32Number rc;
+
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocXYZ2Lab(DbgThread()));
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocLab2XYZ(DbgThread()));
+
+    rc = CheckFloatLUT(lut) && CheckStagesLUT(lut, 2);
+
+    cmsPipelineFree(lut);
+
+    return rc;
+}
+
+
+
+static
+cmsInt32Number CheckLab2LabMatLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    cmsInt32Number rc;
+
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocLab2XYZ(DbgThread()));
+    AddIdentityMatrix(lut);
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocXYZ2Lab(DbgThread()));
+
+    rc = CheckFloatLUT(lut) && CheckStagesLUT(lut, 3);
+
+    cmsPipelineFree(lut);
+
+    return rc;
+}
+
+static
+cmsInt32Number CheckNamedColorLUT(void)
+{
+    cmsPipeline* lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+    cmsNAMEDCOLORLIST* nc;
+    cmsInt32Number i,j, rc = 1, n2;
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number Colorant[cmsMAXCHANNELS];
+    char Name[255];
+    cmsUInt16Number Inw[3], Outw[3];
+
+
+
+    nc = cmsAllocNamedColorList(DbgThread(), 256, 3, "pre", "post");
+    if (nc == NULL) return 0;
+
+    for (i=0; i < 256; i++) {
+
+        PCS[0] = PCS[1] = PCS[2] = (cmsUInt16Number) i;
+        Colorant[0] = Colorant[1] = Colorant[2] = Colorant[3] = (cmsUInt16Number) i;
+
+        sprintf(Name, "#%d", i);
+        if (!cmsAppendNamedColor(nc, Name, PCS, Colorant)) { rc = 0; break; }
+    }
+
+    cmsPipelineInsertStage(lut, cmsAT_END, _cmsStageAllocNamedColor(nc, FALSE));
+
+    cmsFreeNamedColorList(nc);
+    if (rc == 0) return 0;
+
+    n2=0;
+
+    for (j=0; j < 256; j++) {
+
+        Inw[0] = (cmsUInt16Number) j;
+
+        cmsPipelineEval16(Inw, Outw, lut);
+        for (i=0; i < 3; i++) {
+
+            if (Outw[i] != j) {
+                n2++;
+            }
+        }
+
+    }
+
+    cmsPipelineFree(lut);
+    return (n2 == 0);
+}
+
+
+
+// --------------------------------------------------------------------------------------------
+
+// A lightweight test of multilocalized unicode structures.
+
+static
+cmsInt32Number CheckMLU(void)
+{
+    cmsMLU* mlu, *mlu2, *mlu3;
+    char Buffer[256], Buffer2[256];
+    cmsInt32Number rc = 1;
+    cmsInt32Number i;
+    cmsHPROFILE h= NULL;
+
+    // Allocate a MLU structure, no preferred size
+    mlu = cmsMLUalloc(DbgThread(), 0);
+
+    // Add some localizations
+    cmsMLUsetWide(mlu, "en", "US", L"Hello, world");
+    cmsMLUsetWide(mlu, "es", "ES", L"Hola, mundo");
+    cmsMLUsetWide(mlu, "fr", "FR", L"Bonjour, le monde");
+    cmsMLUsetWide(mlu, "ca", "CA", L"Hola, mon");
+
+
+    // Check the returned string for each language
+
+    cmsMLUgetASCII(mlu, "en", "US", Buffer, 256);
+    if (strcmp(Buffer, "Hello, world") != 0) rc = 0;
+
+
+    cmsMLUgetASCII(mlu, "es", "ES", Buffer, 256);
+    if (strcmp(Buffer, "Hola, mundo") != 0) rc = 0;
+
+
+    cmsMLUgetASCII(mlu, "fr", "FR", Buffer, 256);
+    if (strcmp(Buffer, "Bonjour, le monde") != 0) rc = 0;
+
+
+    cmsMLUgetASCII(mlu, "ca", "CA", Buffer, 256);
+    if (strcmp(Buffer, "Hola, mon") != 0) rc = 0;
+
+    if (rc == 0)
+        Fail("Unexpected string '%s'", Buffer);
+
+    // So far, so good.
+    cmsMLUfree(mlu);
+
+    // Now for performance, allocate an empty struct
+    mlu = cmsMLUalloc(DbgThread(), 0);
+
+    // Fill it with several thousands of different lenguages
+    for (i=0; i < 4096; i++) {
+
+        char Lang[3];
+
+        Lang[0] = (char) (i % 255);
+        Lang[1] = (char) (i / 255);
+        Lang[2] = 0;
+
+        sprintf(Buffer, "String #%i", i);
+        cmsMLUsetASCII(mlu, Lang, Lang, Buffer);
+    }
+
+    // Duplicate it
+    mlu2 = cmsMLUdup(mlu);
+
+    // Get rid of original
+    cmsMLUfree(mlu);
+
+    // Check all is still in place
+    for (i=0; i < 4096; i++) {
+
+        char Lang[3];
+
+        Lang[0] = (char)(i % 255);
+        Lang[1] = (char)(i / 255);
+        Lang[2] = 0;
+
+        cmsMLUgetASCII(mlu2, Lang, Lang, Buffer2, 256);
+        sprintf(Buffer, "String #%i", i);
+
+        if (strcmp(Buffer, Buffer2) != 0) { rc = 0; break; }
+    }
+
+    if (rc == 0)
+        Fail("Unexpected string '%s'", Buffer2);
+
+    // Check profile IO
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "mlucheck.icc", "w");
+
+    cmsSetProfileVersion(h, 4.3);
+
+    cmsWriteTag(h, cmsSigProfileDescriptionTag, mlu2);
+    cmsCloseProfile(h);
+    cmsMLUfree(mlu2);
+
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "mlucheck.icc", "r");
+
+    mlu3 = (cmsMLU *) cmsReadTag(h, cmsSigProfileDescriptionTag); 
+    if (mlu3 == NULL) { Fail("Profile didn't get the MLU\n"); rc = 0; goto Error; }
+
+    // Check all is still in place
+    for (i=0; i < 4096; i++) {
+
+        char Lang[3];
+
+        Lang[0] = (char) (i % 255);
+        Lang[1] = (char) (i / 255);
+        Lang[2] = 0;
+
+        cmsMLUgetASCII(mlu3, Lang, Lang, Buffer2, 256);
+        sprintf(Buffer, "String #%i", i);
+
+        if (strcmp(Buffer, Buffer2) != 0) { rc = 0; break; }
+    }
+
+    if (rc == 0) Fail("Unexpected string '%s'", Buffer2);
+
+Error:
+
+    if (h != NULL) cmsCloseProfile(h);
+    remove("mlucheck.icc");
+
+    return rc;
+}
+
+
+// A lightweight test of named color structures.
+static
+cmsInt32Number CheckNamedColorList(void)
+{
+    cmsNAMEDCOLORLIST* nc = NULL, *nc2;
+    cmsInt32Number i, j, rc=1;
+    char Name[cmsMAX_PATH];
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number Colorant[cmsMAXCHANNELS];
+    char CheckName[cmsMAX_PATH];
+    cmsUInt16Number CheckPCS[3];
+    cmsUInt16Number CheckColorant[cmsMAXCHANNELS];
+    cmsHPROFILE h;
+
+    nc = cmsAllocNamedColorList(DbgThread(), 0, 4, "prefix", "suffix");
+    if (nc == NULL) return 0;
+
+    for (i=0; i < 4096; i++) {
+
+
+        PCS[0] = PCS[1] = PCS[2] = (cmsUInt16Number) i;
+        Colorant[0] = Colorant[1] = Colorant[2] = Colorant[3] = (cmsUInt16Number) (4096 - i);
+
+        sprintf(Name, "#%d", i);
+        if (!cmsAppendNamedColor(nc, Name, PCS, Colorant)) { rc = 0; break; }
+    }
+
+    for (i=0; i < 4096; i++) {
+
+        CheckPCS[0] = CheckPCS[1] = CheckPCS[2] = (cmsUInt16Number) i;
+        CheckColorant[0] = CheckColorant[1] = CheckColorant[2] = CheckColorant[3] = (cmsUInt16Number) (4096 - i);
+
+        sprintf(CheckName, "#%d", i);
+        if (!cmsNamedColorInfo(nc, i, Name, NULL, NULL, PCS, Colorant)) { rc = 0; goto Error; }
+
+
+        for (j=0; j < 3; j++) {
+            if (CheckPCS[j] != PCS[j]) { rc = 0; Fail("Invalid PCS"); goto Error; }
+        }
+
+        for (j=0; j < 4; j++) {
+            if (CheckColorant[j] != Colorant[j]) { rc = 0; Fail("Invalid Colorant"); goto Error; };
+        }
+
+        if (strcmp(Name, CheckName) != 0) {rc = 0; Fail("Invalid Name"); goto Error; };
+    }
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "namedcol.icc", "w");
+    if (h == NULL) return 0;
+    if (!cmsWriteTag(h, cmsSigNamedColor2Tag, nc)) return 0;
+    cmsCloseProfile(h);
+    cmsFreeNamedColorList(nc);
+    nc = NULL;
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "namedcol.icc", "r");
+    nc2 = (cmsNAMEDCOLORLIST *) cmsReadTag(h, cmsSigNamedColor2Tag); 
+
+    if (cmsNamedColorCount(nc2) != 4096) { rc = 0; Fail("Invalid count"); goto Error; }
+
+    i = cmsNamedColorIndex(nc2, "#123");
+    if (i != 123) { rc = 0; Fail("Invalid index"); goto Error; }
+
+
+    for (i=0; i < 4096; i++) {
+
+        CheckPCS[0] = CheckPCS[1] = CheckPCS[2] = (cmsUInt16Number) i;
+        CheckColorant[0] = CheckColorant[1] = CheckColorant[2] = CheckColorant[3] = (cmsUInt16Number) (4096 - i);
+
+        sprintf(CheckName, "#%d", i);
+        if (!cmsNamedColorInfo(nc2, i, Name, NULL, NULL, PCS, Colorant)) { rc = 0; goto Error; }
+
+
+        for (j=0; j < 3; j++) {
+            if (CheckPCS[j] != PCS[j]) { rc = 0; Fail("Invalid PCS"); goto Error; }
+        }
+
+        for (j=0; j < 4; j++) {
+            if (CheckColorant[j] != Colorant[j]) { rc = 0; Fail("Invalid Colorant"); goto Error; };
+        }
+
+        if (strcmp(Name, CheckName) != 0) {rc = 0; Fail("Invalid Name"); goto Error; };
+    }
+
+    cmsCloseProfile(h);
+    remove("namedcol.icc");
+
+Error:
+    if (nc != NULL) cmsFreeNamedColorList(nc);
+    return rc;
+}
+
+
+
+// ----------------------------------------------------------------------------------------------------------
+
+// Formatters
+
+static cmsBool  FormatterFailed;
+
+static
+void CheckSingleFormatter16(cmsContext id, cmsUInt32Number Type, const char* Text)
+{
+    cmsUInt16Number Values[cmsMAXCHANNELS];
+    cmsUInt8Number Buffer[1024];
+    cmsFormatter f, b;
+    cmsInt32Number i, j, nChannels, bytes;
+    _cmsTRANSFORM info;
+
+    // Already failed?
+    if (FormatterFailed) return;
+
+    memset(&info, 0, sizeof(info));
+    info.OutputFormat = info.InputFormat = Type;
+
+    // Go forth and back
+    f = _cmsGetFormatter(id, Type,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS);
+    b = _cmsGetFormatter(id, Type,  cmsFormatterOutput, CMS_PACK_FLAGS_16BITS);
+
+    if (f.Fmt16 == NULL || b.Fmt16 == NULL) {
+        Fail("no formatter for %s", Text);
+        FormatterFailed = TRUE;
+
+        // Useful for debug
+        f = _cmsGetFormatter(id, Type,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS);
+        b = _cmsGetFormatter(id, Type,  cmsFormatterOutput, CMS_PACK_FLAGS_16BITS);
+        return;
+    }
+
+    nChannels = T_CHANNELS(Type);
+    bytes     = T_BYTES(Type);
+
+    for (j=0; j < 5; j++) {
+
+        for (i=0; i < nChannels; i++) {
+            Values[i] = (cmsUInt16Number) (i+j);
+            // For 8-bit
+            if (bytes == 1)
+                Values[i] <<= 8;
+        }
+
+    b.Fmt16(&info, Values, Buffer, 2);
+    memset(Values, 0, sizeof(Values));
+    f.Fmt16(&info, Values, Buffer, 2);
+
+    for (i=0; i < nChannels; i++) {
+        if (bytes == 1)
+            Values[i] >>= 8;
+
+        if (Values[i] != i+j) {
+
+            Fail("%s failed", Text);
+            FormatterFailed = TRUE;
+
+            // Useful for debug
+            for (i=0; i < nChannels; i++) {
+                Values[i] = (cmsUInt16Number) (i+j);
+                // For 8-bit
+                if (bytes == 1)
+                    Values[i] <<= 8;
+            }
+
+            b.Fmt16(&info, Values, Buffer, 1);
+            f.Fmt16(&info, Values, Buffer, 1);
+            return;
+        }
+    }
+    }
+}
+
+#define C(a) CheckSingleFormatter16(0, a, #a)
+
+
+// Check all formatters
+static
+cmsInt32Number CheckFormatters16(void)
+{
+    FormatterFailed = FALSE;
+
+   C( TYPE_GRAY_8            );
+   C( TYPE_GRAY_8_REV        );
+   C( TYPE_GRAY_16           );
+   C( TYPE_GRAY_16_REV       );
+   C( TYPE_GRAY_16_SE        );
+   C( TYPE_GRAYA_8           );
+   C( TYPE_GRAYA_16          );
+   C( TYPE_GRAYA_16_SE       );
+   C( TYPE_GRAYA_8_PLANAR    );
+   C( TYPE_GRAYA_16_PLANAR   );
+   C( TYPE_RGB_8             );
+   C( TYPE_RGB_8_PLANAR      );
+   C( TYPE_BGR_8             );
+   C( TYPE_BGR_8_PLANAR      );
+   C( TYPE_RGB_16            );
+   C( TYPE_RGB_16_PLANAR     );
+   C( TYPE_RGB_16_SE         );
+   C( TYPE_BGR_16            );
+   C( TYPE_BGR_16_PLANAR     );
+   C( TYPE_BGR_16_SE         );
+   C( TYPE_RGBA_8            );
+   C( TYPE_RGBA_8_PLANAR     );
+   C( TYPE_RGBA_16           );
+   C( TYPE_RGBA_16_PLANAR    );
+   C( TYPE_RGBA_16_SE        );
+   C( TYPE_ARGB_8            );
+   C( TYPE_ARGB_8_PLANAR     );
+   C( TYPE_ARGB_16           );
+   C( TYPE_ABGR_8            );
+   C( TYPE_ABGR_8_PLANAR     );
+   C( TYPE_ABGR_16           );
+   C( TYPE_ABGR_16_PLANAR    );
+   C( TYPE_ABGR_16_SE        );
+   C( TYPE_BGRA_8            );
+   C( TYPE_BGRA_8_PLANAR     );
+   C( TYPE_BGRA_16           );
+   C( TYPE_BGRA_16_SE        );
+   C( TYPE_CMY_8             );
+   C( TYPE_CMY_8_PLANAR      );
+   C( TYPE_CMY_16            );
+   C( TYPE_CMY_16_PLANAR     );
+   C( TYPE_CMY_16_SE         );
+   C( TYPE_CMYK_8            );
+   C( TYPE_CMYKA_8           );
+   C( TYPE_CMYK_8_REV        );
+   C( TYPE_YUVK_8            );
+   C( TYPE_CMYK_8_PLANAR     );
+   C( TYPE_CMYK_16           );
+   C( TYPE_CMYK_16_REV       );
+   C( TYPE_YUVK_16           );
+   C( TYPE_CMYK_16_PLANAR    );
+   C( TYPE_CMYK_16_SE        );
+   C( TYPE_KYMC_8            );
+   C( TYPE_KYMC_16           );
+   C( TYPE_KYMC_16_SE        );
+   C( TYPE_KCMY_8            );
+   C( TYPE_KCMY_8_REV        );
+   C( TYPE_KCMY_16           );
+   C( TYPE_KCMY_16_REV       );
+   C( TYPE_KCMY_16_SE        );
+   C( TYPE_CMYK5_8           );
+   C( TYPE_CMYK5_16          );
+   C( TYPE_CMYK5_16_SE       );
+   C( TYPE_KYMC5_8           );
+   C( TYPE_KYMC5_16          );
+   C( TYPE_KYMC5_16_SE       );
+   C( TYPE_CMYK6_8          );
+   C( TYPE_CMYK6_8_PLANAR   );
+   C( TYPE_CMYK6_16         );
+   C( TYPE_CMYK6_16_PLANAR  );
+   C( TYPE_CMYK6_16_SE      );
+   C( TYPE_CMYK7_8           );
+   C( TYPE_CMYK7_16          );
+   C( TYPE_CMYK7_16_SE       );
+   C( TYPE_KYMC7_8           );
+   C( TYPE_KYMC7_16          );
+   C( TYPE_KYMC7_16_SE       );
+   C( TYPE_CMYK8_8           );
+   C( TYPE_CMYK8_16          );
+   C( TYPE_CMYK8_16_SE       );
+   C( TYPE_KYMC8_8           );
+   C( TYPE_KYMC8_16          );
+   C( TYPE_KYMC8_16_SE       );
+   C( TYPE_CMYK9_8           );
+   C( TYPE_CMYK9_16          );
+   C( TYPE_CMYK9_16_SE       );
+   C( TYPE_KYMC9_8           );
+   C( TYPE_KYMC9_16          );
+   C( TYPE_KYMC9_16_SE       );
+   C( TYPE_CMYK10_8          );
+   C( TYPE_CMYK10_16         );
+   C( TYPE_CMYK10_16_SE      );
+   C( TYPE_KYMC10_8          );
+   C( TYPE_KYMC10_16         );
+   C( TYPE_KYMC10_16_SE      );
+   C( TYPE_CMYK11_8          );
+   C( TYPE_CMYK11_16         );
+   C( TYPE_CMYK11_16_SE      );
+   C( TYPE_KYMC11_8          );
+   C( TYPE_KYMC11_16         );
+   C( TYPE_KYMC11_16_SE      );
+   C( TYPE_CMYK12_8          );
+   C( TYPE_CMYK12_16         );
+   C( TYPE_CMYK12_16_SE      );
+   C( TYPE_KYMC12_8          );
+   C( TYPE_KYMC12_16         );
+   C( TYPE_KYMC12_16_SE      );
+   C( TYPE_XYZ_16            );
+   C( TYPE_Lab_8             );
+   C( TYPE_ALab_8            );
+   C( TYPE_Lab_16            );
+   C( TYPE_Yxy_16            );
+   C( TYPE_YCbCr_8           );
+   C( TYPE_YCbCr_8_PLANAR    );
+   C( TYPE_YCbCr_16          );
+   C( TYPE_YCbCr_16_PLANAR   );
+   C( TYPE_YCbCr_16_SE       );
+   C( TYPE_YUV_8             );
+   C( TYPE_YUV_8_PLANAR      );
+   C( TYPE_YUV_16            );
+   C( TYPE_YUV_16_PLANAR     );
+   C( TYPE_YUV_16_SE         );
+   C( TYPE_HLS_8             );
+   C( TYPE_HLS_8_PLANAR      );
+   C( TYPE_HLS_16            );
+   C( TYPE_HLS_16_PLANAR     );
+   C( TYPE_HLS_16_SE         );
+   C( TYPE_HSV_8             );
+   C( TYPE_HSV_8_PLANAR      );
+   C( TYPE_HSV_16            );
+   C( TYPE_HSV_16_PLANAR     );
+   C( TYPE_HSV_16_SE         );
+
+   C( TYPE_XYZ_FLT  );
+   C( TYPE_Lab_FLT  );
+   C( TYPE_GRAY_FLT );
+   C( TYPE_RGB_FLT  );
+   C( TYPE_BGR_FLT  );
+   C( TYPE_CMYK_FLT );
+   C( TYPE_LabA_FLT );
+   C( TYPE_RGBA_FLT );
+   C( TYPE_ARGB_FLT );
+   C( TYPE_BGRA_FLT );
+   C( TYPE_ABGR_FLT );
+
+
+   C( TYPE_XYZ_DBL  );
+   C( TYPE_Lab_DBL  );
+   C( TYPE_GRAY_DBL );
+   C( TYPE_RGB_DBL  );
+   C( TYPE_BGR_DBL  );
+   C( TYPE_CMYK_DBL );
+
+   C( TYPE_LabV2_8  );
+   C( TYPE_ALabV2_8 );
+   C( TYPE_LabV2_16 );
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+   C( TYPE_GRAY_HALF_FLT );
+   C( TYPE_RGB_HALF_FLT  );
+   C( TYPE_CMYK_HALF_FLT );
+   C( TYPE_RGBA_HALF_FLT );
+
+   C( TYPE_RGBA_HALF_FLT );
+   C( TYPE_ARGB_HALF_FLT );
+   C( TYPE_BGR_HALF_FLT  );
+   C( TYPE_BGRA_HALF_FLT );
+   C( TYPE_ABGR_HALF_FLT );
+
+#endif
+
+   return FormatterFailed == 0 ? 1 : 0;
+}
+#undef C
+
+static
+void CheckSingleFormatterFloat(cmsUInt32Number Type, const char* Text)
+{
+    cmsFloat32Number Values[cmsMAXCHANNELS];
+    cmsUInt8Number Buffer[1024];
+    cmsFormatter f, b;
+    cmsInt32Number i, j, nChannels;
+    _cmsTRANSFORM info;
+
+    // Already failed?
+    if (FormatterFailed) return;
+
+    memset(&info, 0, sizeof(info));
+    info.OutputFormat = info.InputFormat = Type;
+
+    // Go forth and back
+    f = _cmsGetFormatter(0, Type,  cmsFormatterInput, CMS_PACK_FLAGS_FLOAT);
+    b = _cmsGetFormatter(0, Type,  cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT);
+
+    if (f.FmtFloat == NULL || b.FmtFloat == NULL) {
+        Fail("no formatter for %s", Text);
+        FormatterFailed = TRUE;
+
+        // Useful for debug
+        f = _cmsGetFormatter(0, Type,  cmsFormatterInput, CMS_PACK_FLAGS_FLOAT);
+        b = _cmsGetFormatter(0, Type,  cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT);
+        return;
+    }
+
+    nChannels = T_CHANNELS(Type);
+
+    for (j=0; j < 5; j++) {
+
+        for (i=0; i < nChannels; i++) {
+            Values[i] = (cmsFloat32Number) (i+j);
+        }
+
+        b.FmtFloat(&info, Values, Buffer, 1);
+        memset(Values, 0, sizeof(Values));
+        f.FmtFloat(&info, Values, Buffer, 1);
+
+        for (i=0; i < nChannels; i++) {
+
+            cmsFloat64Number delta = fabs(Values[i] - ( i+j));
+
+            if (delta > 0.000000001) {
+
+                Fail("%s failed", Text);
+                FormatterFailed = TRUE;
+
+                // Useful for debug
+                for (i=0; i < nChannels; i++) {
+                    Values[i] = (cmsFloat32Number) (i+j);
+                }
+
+                b.FmtFloat(&info, Values, Buffer, 1);
+                f.FmtFloat(&info, Values, Buffer, 1);
+                return;
+            }
+        }
+    }
+}
+
+#define C(a) CheckSingleFormatterFloat(a, #a)
+
+static
+cmsInt32Number CheckFormattersFloat(void)
+{
+    FormatterFailed = FALSE;
+
+    C( TYPE_XYZ_FLT  );
+    C( TYPE_Lab_FLT  );
+    C( TYPE_GRAY_FLT );
+    C( TYPE_RGB_FLT  );
+    C( TYPE_BGR_FLT  );
+    C( TYPE_CMYK_FLT );
+
+    C( TYPE_LabA_FLT );
+    C( TYPE_RGBA_FLT );
+
+    C( TYPE_ARGB_FLT );
+    C( TYPE_BGRA_FLT );
+    C( TYPE_ABGR_FLT );
+
+    C( TYPE_XYZ_DBL  );
+    C( TYPE_Lab_DBL  );
+    C( TYPE_GRAY_DBL );
+    C( TYPE_RGB_DBL  );
+    C( TYPE_BGR_DBL  );
+    C( TYPE_CMYK_DBL );
+    C( TYPE_XYZ_FLT );
+
+#ifndef CMS_NO_HALF_SUPPORT 
+   C( TYPE_GRAY_HALF_FLT );
+   C( TYPE_RGB_HALF_FLT  );
+   C( TYPE_CMYK_HALF_FLT );
+   C( TYPE_RGBA_HALF_FLT );
+
+   C( TYPE_RGBA_HALF_FLT );
+   C( TYPE_ARGB_HALF_FLT );
+   C( TYPE_BGR_HALF_FLT  );
+   C( TYPE_BGRA_HALF_FLT );
+   C( TYPE_ABGR_HALF_FLT );
+#endif
+
+
+
+
+   return FormatterFailed == 0 ? 1 : 0;
+}
+#undef C
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Check half float
+#define my_isfinite(x) ((x) != (x))
+static
+cmsInt32Number CheckFormattersHalf(void)
+{
+    int i, j;
+
+
+    for (i=0; i < 0xffff; i++) {
+
+        cmsFloat32Number f = _cmsHalf2Float((cmsUInt16Number) i);
+
+        if (!my_isfinite(f))  {
+
+            j = _cmsFloat2Half(f);
+
+            if (i != j) {
+                Fail("%d != %d in Half float support!\n", i, j);
+                return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
+#endif
+
+static
+cmsInt32Number CheckOneRGB(cmsHTRANSFORM xform, cmsUInt16Number R, cmsUInt16Number G, cmsUInt16Number B, cmsUInt16Number Ro, cmsUInt16Number Go, cmsUInt16Number Bo)
+{
+    cmsUInt16Number RGB[3];
+    cmsUInt16Number Out[3];
+
+    RGB[0] = R;
+    RGB[1] = G;
+    RGB[2] = B;
+
+    cmsDoTransform(xform, RGB, Out, 1);
+
+    return IsGoodWord("R", Ro , Out[0]) &&
+           IsGoodWord("G", Go , Out[1]) &&
+           IsGoodWord("B", Bo , Out[2]);
+}
+
+// Check known values going from sRGB to XYZ
+static
+cmsInt32Number CheckOneRGB_double(cmsHTRANSFORM xform, cmsFloat64Number R, cmsFloat64Number G, cmsFloat64Number B, cmsFloat64Number Ro, cmsFloat64Number Go, cmsFloat64Number Bo)
+{
+    cmsFloat64Number RGB[3];
+    cmsFloat64Number Out[3];
+
+    RGB[0] = R;
+    RGB[1] = G;
+    RGB[2] = B;
+
+    cmsDoTransform(xform, RGB, Out, 1);
+
+    return IsGoodVal("R", Ro , Out[0], 0.01) &&
+           IsGoodVal("G", Go , Out[1], 0.01) &&
+           IsGoodVal("B", Bo , Out[2], 0.01);
+}
+
+
+static
+cmsInt32Number CheckChangeBufferFormat(void)
+{
+    cmsHPROFILE hsRGB = cmsCreate_sRGBProfile();
+    cmsHTRANSFORM xform;
+
+
+    xform = cmsCreateTransform(hsRGB, TYPE_RGB_16, hsRGB, TYPE_RGB_16, INTENT_PERCEPTUAL, 0);
+    cmsCloseProfile(hsRGB);
+    if (xform == NULL) return 0;
+
+
+    if (!CheckOneRGB(xform, 0, 0, 0, 0, 0, 0)) return 0;
+    if (!CheckOneRGB(xform, 120, 0, 0, 120, 0, 0)) return 0;
+    if (!CheckOneRGB(xform, 0, 222, 255, 0, 222, 255)) return 0;
+
+
+    if (!cmsChangeBuffersFormat(xform, TYPE_BGR_16, TYPE_RGB_16)) return 0;
+
+    if (!CheckOneRGB(xform, 0, 0, 123, 123, 0, 0)) return 0;
+    if (!CheckOneRGB(xform, 154, 234, 0, 0, 234, 154)) return 0;
+
+    if (!cmsChangeBuffersFormat(xform, TYPE_RGB_DBL, TYPE_RGB_DBL)) return 0;
+
+    if (!CheckOneRGB_double(xform, 0.20, 0, 0, 0.20, 0, 0)) return 0;
+    if (!CheckOneRGB_double(xform, 0, 0.9, 1, 0, 0.9, 1)) return 0;
+
+    cmsDeleteTransform(xform);
+
+return 1;
+}
+
+
+// Write tag testbed ----------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckXYZ(cmsInt32Number Pass, cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsCIEXYZ XYZ, *Pt;
+
+
+    switch (Pass) {
+
+        case 1:
+
+            XYZ.X = 1.0; XYZ.Y = 1.1; XYZ.Z = 1.2;
+            return cmsWriteTag(hProfile, tag, &XYZ);
+
+        case 2:
+            Pt = (cmsCIEXYZ *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            return IsGoodFixed15_16("X", 1.0, Pt ->X) &&
+                   IsGoodFixed15_16("Y", 1.1, Pt->Y) &&
+                   IsGoodFixed15_16("Z", 1.2, Pt -> Z);
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckGamma(cmsInt32Number Pass, cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsToneCurve *g, *Pt;
+    cmsInt32Number rc;
+
+    switch (Pass) {
+
+        case 1:
+
+            g = cmsBuildGamma(DbgThread(), 1.0);
+            rc = cmsWriteTag(hProfile, tag, g);
+            cmsFreeToneCurve(g);
+            return rc;
+
+        case 2:
+            Pt = (cmsToneCurve *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            return cmsIsToneCurveLinear(Pt);
+
+        default:
+            return 0;
+    }
+}
+
+static
+cmsInt32Number CheckTextSingle(cmsInt32Number Pass, cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsMLU *m, *Pt;
+    cmsInt32Number rc;
+    char Buffer[256];
+
+
+    switch (Pass) {
+
+    case 1:
+        m = cmsMLUalloc(DbgThread(), 0);
+        cmsMLUsetASCII(m, cmsNoLanguage, cmsNoCountry, "Test test");    
+        rc = cmsWriteTag(hProfile, tag, m);
+        cmsMLUfree(m);
+        return rc;
+
+    case 2:
+        Pt = (cmsMLU *) cmsReadTag(hProfile, tag); 
+        if (Pt == NULL) return 0;
+        cmsMLUgetASCII(Pt, cmsNoLanguage, cmsNoCountry, Buffer, 256);
+        if (strcmp(Buffer, "Test test") != 0) return FALSE;
+        return TRUE;
+
+    default:
+        return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckText(cmsInt32Number Pass, cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsMLU *m, *Pt;
+    cmsInt32Number rc;
+    char Buffer[256];
+
+
+    switch (Pass) {
+
+        case 1:
+            m = cmsMLUalloc(DbgThread(), 0);
+            cmsMLUsetASCII(m, cmsNoLanguage, cmsNoCountry, "Test test");
+            cmsMLUsetASCII(m, "en",  "US",  "1 1 1 1");
+            cmsMLUsetASCII(m, "es",  "ES",  "2 2 2 2");
+            cmsMLUsetASCII(m, "ct",  "ES",  "3 3 3 3");
+            cmsMLUsetASCII(m, "en",  "GB",  "444444444");
+            rc = cmsWriteTag(hProfile, tag, m);
+            cmsMLUfree(m);
+            return rc;
+
+        case 2:
+            Pt = (cmsMLU *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            cmsMLUgetASCII(Pt, cmsNoLanguage, cmsNoCountry, Buffer, 256);
+            if (strcmp(Buffer, "Test test") != 0) return FALSE;
+            cmsMLUgetASCII(Pt, "en", "US", Buffer, 256);
+            if (strcmp(Buffer, "1 1 1 1") != 0) return FALSE;
+            cmsMLUgetASCII(Pt, "es", "ES", Buffer, 256);
+            if (strcmp(Buffer, "2 2 2 2") != 0) return FALSE;
+            cmsMLUgetASCII(Pt, "ct", "ES", Buffer, 256);
+            if (strcmp(Buffer, "3 3 3 3") != 0) return FALSE;
+            cmsMLUgetASCII(Pt, "en", "GB",  Buffer, 256);
+            if (strcmp(Buffer, "444444444") != 0) return FALSE;
+            return TRUE;
+
+        default:
+            return 0;
+    }
+}
+
+static
+cmsInt32Number CheckData(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsICCData *Pt;
+    cmsICCData d = { 1, 0, { '?' }};
+    cmsInt32Number rc;
+
+
+    switch (Pass) {
+
+        case 1:
+            rc = cmsWriteTag(hProfile, tag, &d);
+            return rc;
+
+        case 2:
+            Pt = (cmsICCData *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            return (Pt ->data[0] == '?') && (Pt ->flag == 0) && (Pt ->len == 1);
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckSignature(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsTagSignature *Pt, Holder;
+
+    switch (Pass) {
+
+        case 1:
+            Holder = (cmsTagSignature) cmsSigPerceptualReferenceMediumGamut; 
+            return cmsWriteTag(hProfile, tag, &Holder);
+
+        case 2:
+            Pt = (cmsTagSignature *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            return *Pt == cmsSigPerceptualReferenceMediumGamut;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckDateTime(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    struct tm *Pt, Holder;
+
+    switch (Pass) {
+
+        case 1:
+
+            Holder.tm_hour = 1;
+            Holder.tm_min = 2;
+            Holder.tm_sec = 3;
+            Holder.tm_mday = 4;
+            Holder.tm_mon = 5;
+            Holder.tm_year = 2009 - 1900;
+            return cmsWriteTag(hProfile, tag, &Holder);
+
+        case 2:
+            Pt = (struct tm *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            return (Pt ->tm_hour == 1 &&
+                Pt ->tm_min == 2 &&
+                Pt ->tm_sec == 3 &&
+                Pt ->tm_mday == 4 &&
+                Pt ->tm_mon == 5 &&
+                Pt ->tm_year == 2009 - 1900);
+
+        default:
+            return 0;
+    }
+
+}
+
+
+static
+cmsInt32Number CheckNamedColor(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag, cmsInt32Number max_check, cmsBool  colorant_check)
+{
+    cmsNAMEDCOLORLIST* nc;
+    cmsInt32Number i, j, rc;
+    char Name[255];
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number Colorant[cmsMAXCHANNELS];
+    char CheckName[255];
+    cmsUInt16Number CheckPCS[3];
+    cmsUInt16Number CheckColorant[cmsMAXCHANNELS];
+
+    switch (Pass) {
+
+    case 1:
+
+        nc = cmsAllocNamedColorList(DbgThread(), 0, 4, "prefix", "suffix");
+        if (nc == NULL) return 0;
+
+        for (i=0; i < max_check; i++) {
+
+            PCS[0] = PCS[1] = PCS[2] = (cmsUInt16Number) i;
+            Colorant[0] = Colorant[1] = Colorant[2] = Colorant[3] = (cmsUInt16Number) (max_check - i);
+
+            sprintf(Name, "#%d", i);
+            if (!cmsAppendNamedColor(nc, Name, PCS, Colorant)) { Fail("Couldn't append named color"); return 0; }
+        }
+
+        rc = cmsWriteTag(hProfile, tag, nc);
+        cmsFreeNamedColorList(nc);
+        return rc;
+
+    case 2:
+
+        nc = (cmsNAMEDCOLORLIST *) cmsReadTag(hProfile, tag); 
+        if (nc == NULL) return 0;
+
+        for (i=0; i < max_check; i++) {
+
+            CheckPCS[0] = CheckPCS[1] = CheckPCS[2] = (cmsUInt16Number) i;
+            CheckColorant[0] = CheckColorant[1] = CheckColorant[2] = CheckColorant[3] = (cmsUInt16Number) (max_check - i);
+
+            sprintf(CheckName, "#%d", i);
+            if (!cmsNamedColorInfo(nc, i, Name, NULL, NULL, PCS, Colorant)) { Fail("Invalid string"); return 0; }
+
+
+            for (j=0; j < 3; j++) {
+                if (CheckPCS[j] != PCS[j]) {  Fail("Invalid PCS"); return 0; }
+            }
+
+            // This is only used on named color list
+            if (colorant_check) {
+
+            for (j=0; j < 4; j++) {
+                if (CheckColorant[j] != Colorant[j]) { Fail("Invalid Colorant"); return 0; };
+            }
+            }
+
+            if (strcmp(Name, CheckName) != 0) { Fail("Invalid Name");  return 0; };
+        }
+        return 1;
+
+
+    default: return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckLUT(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsPipeline* Lut, *Pt;
+    cmsInt32Number rc;
+
+
+    switch (Pass) {
+
+        case 1:
+
+            Lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+            if (Lut == NULL) return 0;
+
+            // Create an identity LUT
+            cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(DbgThread(), 3));
+            cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocIdentityCLut(DbgThread(), 3));
+            cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocIdentityCurves(DbgThread(), 3));
+
+            rc =  cmsWriteTag(hProfile, tag, Lut);
+            cmsPipelineFree(Lut);
+            return rc;
+
+        case 2:
+            Pt = (cmsPipeline *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            // Transform values, check for identity
+            return Check16LUT(Pt);
+
+        default:
+            return 0;
+    }
+}
+
+static
+cmsInt32Number CheckCHAD(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsFloat64Number *Pt;
+    cmsFloat64Number CHAD[] = { 0, .1, .2, .3, .4, .5, .6, .7, .8 };
+    cmsInt32Number i;
+
+    switch (Pass) {
+
+        case 1:
+            return cmsWriteTag(hProfile, tag, CHAD);
+
+
+        case 2:
+            Pt = (cmsFloat64Number *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            for (i=0; i < 9; i++) {
+                if (!IsGoodFixed15_16("CHAD", Pt[i], CHAD[i])) return 0;
+            }
+
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+static
+cmsInt32Number CheckChromaticity(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsCIExyYTRIPLE *Pt, c = { {0, .1, 1 }, { .3, .4, 1 }, { .6, .7, 1 }};
+
+    switch (Pass) {
+
+        case 1:
+            return cmsWriteTag(hProfile, tag, &c);
+
+
+        case 2:
+            Pt = (cmsCIExyYTRIPLE *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            if (!IsGoodFixed15_16("xyY", Pt ->Red.x, c.Red.x)) return 0;
+            if (!IsGoodFixed15_16("xyY", Pt ->Red.y, c.Red.y)) return 0;
+            if (!IsGoodFixed15_16("xyY", Pt ->Green.x, c.Green.x)) return 0;
+            if (!IsGoodFixed15_16("xyY", Pt ->Green.y, c.Green.y)) return 0;
+            if (!IsGoodFixed15_16("xyY", Pt ->Blue.x, c.Blue.x)) return 0;
+            if (!IsGoodFixed15_16("xyY", Pt ->Blue.y, c.Blue.y)) return 0;
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckColorantOrder(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsUInt8Number *Pt, c[cmsMAXCHANNELS];
+    cmsInt32Number i;
+
+    switch (Pass) {
+
+        case 1:
+            for (i=0; i < cmsMAXCHANNELS; i++) c[i] = (cmsUInt8Number) (cmsMAXCHANNELS - i - 1);
+            return cmsWriteTag(hProfile, tag, c);
+
+
+        case 2:
+            Pt = (cmsUInt8Number *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            for (i=0; i < cmsMAXCHANNELS; i++) {
+                if (Pt[i] != ( cmsMAXCHANNELS - i - 1 )) return 0;
+            }
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+static
+cmsInt32Number CheckMeasurement(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsICCMeasurementConditions *Pt, m;
+
+    switch (Pass) {
+
+        case 1:
+            m.Backing.X = 0.1;
+            m.Backing.Y = 0.2;
+            m.Backing.Z = 0.3;
+            m.Flare = 1.0;
+            m.Geometry = 1;
+            m.IlluminantType = cmsILLUMINANT_TYPE_D50;
+            m.Observer = 1;
+            return cmsWriteTag(hProfile, tag, &m);
+
+
+        case 2:
+            Pt = (cmsICCMeasurementConditions *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            if (!IsGoodFixed15_16("Backing", Pt ->Backing.X, 0.1)) return 0;
+            if (!IsGoodFixed15_16("Backing", Pt ->Backing.Y, 0.2)) return 0;
+            if (!IsGoodFixed15_16("Backing", Pt ->Backing.Z, 0.3)) return 0;
+            if (!IsGoodFixed15_16("Flare",   Pt ->Flare, 1.0)) return 0;
+
+            if (Pt ->Geometry != 1) return 0;
+            if (Pt ->IlluminantType != cmsILLUMINANT_TYPE_D50) return 0;
+            if (Pt ->Observer != 1) return 0;
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckUcrBg(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsUcrBg *Pt, m;
+    cmsInt32Number rc;
+    char Buffer[256];
+
+    switch (Pass) {
+
+        case 1:
+            m.Ucr = cmsBuildGamma(DbgThread(), 2.4);
+            m.Bg  = cmsBuildGamma(DbgThread(), -2.2);
+            m.Desc = cmsMLUalloc(DbgThread(), 1);
+            cmsMLUsetASCII(m.Desc,  cmsNoLanguage, cmsNoCountry, "test UCR/BG");
+            rc = cmsWriteTag(hProfile, tag, &m);
+            cmsMLUfree(m.Desc);
+            cmsFreeToneCurve(m.Bg);
+            cmsFreeToneCurve(m.Ucr);
+            return rc;
+
+
+        case 2:
+            Pt = (cmsUcrBg *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            cmsMLUgetASCII(Pt ->Desc, cmsNoLanguage, cmsNoCountry, Buffer, 256);
+            if (strcmp(Buffer, "test UCR/BG") != 0) return 0;
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckCRDinfo(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsMLU *mlu;
+    char Buffer[256];
+    cmsInt32Number rc;
+
+    switch (Pass) {
+
+        case 1:
+            mlu = cmsMLUalloc(DbgThread(), 5);
+
+            cmsMLUsetWide(mlu,  "PS", "nm", L"test postscript");
+            cmsMLUsetWide(mlu,  "PS", "#0", L"perceptual");
+            cmsMLUsetWide(mlu,  "PS", "#1", L"relative_colorimetric");
+            cmsMLUsetWide(mlu,  "PS", "#2", L"saturation");
+            cmsMLUsetWide(mlu,  "PS", "#3", L"absolute_colorimetric");
+            rc = cmsWriteTag(hProfile, tag, mlu);
+            cmsMLUfree(mlu);
+            return rc;
+
+
+        case 2:
+            mlu = (cmsMLU*) cmsReadTag(hProfile, tag);
+            if (mlu == NULL) return 0;
+
+
+
+             cmsMLUgetASCII(mlu, "PS", "nm", Buffer, 256);
+             if (strcmp(Buffer, "test postscript") != 0) return 0;
+
+
+             cmsMLUgetASCII(mlu, "PS", "#0", Buffer, 256);
+             if (strcmp(Buffer, "perceptual") != 0) return 0;
+
+
+             cmsMLUgetASCII(mlu, "PS", "#1", Buffer, 256);
+             if (strcmp(Buffer, "relative_colorimetric") != 0) return 0;
+
+
+             cmsMLUgetASCII(mlu, "PS", "#2", Buffer, 256);
+             if (strcmp(Buffer, "saturation") != 0) return 0;
+
+
+             cmsMLUgetASCII(mlu, "PS", "#3", Buffer, 256);
+             if (strcmp(Buffer, "absolute_colorimetric") != 0) return 0;
+             return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsToneCurve *CreateSegmentedCurve(void)
+{
+    cmsCurveSegment Seg[3];
+    cmsFloat32Number Sampled[2] = { 0, 1};
+
+    Seg[0].Type = 6;
+    Seg[0].Params[0] = 1;
+    Seg[0].Params[1] = 0;
+    Seg[0].Params[2] = 0;
+    Seg[0].Params[3] = 0;
+    Seg[0].x0 = -1E22F;
+    Seg[0].x1 = 0;
+
+    Seg[1].Type = 0;
+    Seg[1].nGridPoints = 2;
+    Seg[1].SampledPoints = Sampled;
+    Seg[1].x0 = 0;
+    Seg[1].x1 = 1;
+
+    Seg[2].Type = 6;
+    Seg[2].Params[0] = 1;
+    Seg[2].Params[1] = 0;
+    Seg[2].Params[2] = 0;
+    Seg[2].Params[3] = 0;
+    Seg[2].x0 = 1;
+    Seg[2].x1 = 1E22F;
+
+    return cmsBuildSegmentedToneCurve(DbgThread(), 3, Seg);
+}
+
+
+static
+cmsInt32Number CheckMPE(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsPipeline* Lut, *Pt;
+    cmsToneCurve* G[3];
+    cmsInt32Number rc;
+
+    switch (Pass) {
+
+        case 1:
+
+            Lut = cmsPipelineAlloc(DbgThread(), 3, 3);
+
+            cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV2ToV4(DbgThread()));
+            cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV4ToV2(DbgThread()));
+            AddIdentityCLUTfloat(Lut);
+
+            G[0] = G[1] = G[2] = CreateSegmentedCurve();
+            cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(DbgThread(), 3, G));
+            cmsFreeToneCurve(G[0]);
+
+            rc = cmsWriteTag(hProfile, tag, Lut);
+            cmsPipelineFree(Lut);
+            return rc;
+
+        case 2:
+            Pt = (cmsPipeline *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+            return CheckFloatLUT(Pt);
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckScreening(cmsInt32Number Pass,  cmsHPROFILE hProfile, cmsTagSignature tag)
+{
+    cmsScreening *Pt, sc;
+    cmsInt32Number rc;
+
+    switch (Pass) {
+
+        case 1:
+
+            sc.Flag = 0;
+            sc.nChannels = 1;
+            sc.Channels[0].Frequency = 2.0;
+            sc.Channels[0].ScreenAngle = 3.0;
+            sc.Channels[0].SpotShape = cmsSPOT_ELLIPSE;
+
+            rc = cmsWriteTag(hProfile, tag, &sc);
+            return rc;
+
+
+        case 2:
+            Pt = (cmsScreening *) cmsReadTag(hProfile, tag); 
+            if (Pt == NULL) return 0;
+
+            if (Pt ->nChannels != 1) return 0;
+            if (Pt ->Flag      != 0) return 0;
+            if (!IsGoodFixed15_16("Freq", Pt ->Channels[0].Frequency, 2.0)) return 0;
+            if (!IsGoodFixed15_16("Angle", Pt ->Channels[0].ScreenAngle, 3.0)) return 0;
+            if (Pt ->Channels[0].SpotShape != cmsSPOT_ELLIPSE) return 0;
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+static
+cmsBool CheckOneStr(cmsMLU* mlu, cmsInt32Number n)
+{
+    char Buffer[256], Buffer2[256];
+
+
+    cmsMLUgetASCII(mlu, "en", "US", Buffer, 255);
+    sprintf(Buffer2, "Hello, world %d", n);
+    if (strcmp(Buffer, Buffer2) != 0) return FALSE;
+
+
+    cmsMLUgetASCII(mlu, "es", "ES", Buffer, 255);
+    sprintf(Buffer2, "Hola, mundo %d", n);
+    if (strcmp(Buffer, Buffer2) != 0) return FALSE;
+
+    return TRUE;
+}
+
+
+static
+void SetOneStr(cmsMLU** mlu, wchar_t* s1, wchar_t* s2)
+{
+    *mlu = cmsMLUalloc(DbgThread(), 0);
+    cmsMLUsetWide(*mlu, "en", "US", s1);
+    cmsMLUsetWide(*mlu, "es", "ES", s2);
+}
+
+
+static
+cmsInt32Number CheckProfileSequenceTag(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    cmsSEQ* s;
+    cmsInt32Number i;
+
+    switch (Pass) {
+
+    case 1:
+
+        s = cmsAllocProfileSequenceDescription(DbgThread(), 3);
+        if (s == NULL) return 0;
+
+        SetOneStr(&s -> seq[0].Manufacturer, L"Hello, world 0", L"Hola, mundo 0");
+        SetOneStr(&s -> seq[0].Model, L"Hello, world 0", L"Hola, mundo 0");
+        SetOneStr(&s -> seq[1].Manufacturer, L"Hello, world 1", L"Hola, mundo 1");
+        SetOneStr(&s -> seq[1].Model, L"Hello, world 1", L"Hola, mundo 1");
+        SetOneStr(&s -> seq[2].Manufacturer, L"Hello, world 2", L"Hola, mundo 2");
+        SetOneStr(&s -> seq[2].Model, L"Hello, world 2", L"Hola, mundo 2");
+
+
+#ifdef CMS_DONT_USE_INT64
+        s ->seq[0].attributes[0] = cmsTransparency|cmsMatte;
+        s ->seq[0].attributes[1] = 0;
+#else
+        s ->seq[0].attributes = cmsTransparency|cmsMatte;
+#endif
+
+#ifdef CMS_DONT_USE_INT64
+        s ->seq[1].attributes[0] = cmsReflective|cmsMatte;
+        s ->seq[1].attributes[1] = 0;
+#else
+        s ->seq[1].attributes = cmsReflective|cmsMatte;
+#endif
+
+#ifdef CMS_DONT_USE_INT64
+        s ->seq[2].attributes[0] = cmsTransparency|cmsGlossy;
+        s ->seq[2].attributes[1] = 0;
+#else
+        s ->seq[2].attributes = cmsTransparency|cmsGlossy;
+#endif
+
+        if (!cmsWriteTag(hProfile, cmsSigProfileSequenceDescTag, s)) return 0;
+        cmsFreeProfileSequenceDescription(s);
+        return 1;
+
+    case 2:
+
+        s = (cmsSEQ *) cmsReadTag(hProfile, cmsSigProfileSequenceDescTag); 
+        if (s == NULL) return 0;
+
+        if (s ->n != 3) return 0;
+
+#ifdef CMS_DONT_USE_INT64
+        if (s ->seq[0].attributes[0] != (cmsTransparency|cmsMatte)) return 0;
+        if (s ->seq[0].attributes[1] != 0) return 0;
+#else
+        if (s ->seq[0].attributes != (cmsTransparency|cmsMatte)) return 0;
+#endif
+
+#ifdef CMS_DONT_USE_INT64
+        if (s ->seq[1].attributes[0] != (cmsReflective|cmsMatte)) return 0;
+        if (s ->seq[1].attributes[1] != 0) return 0;
+#else
+        if (s ->seq[1].attributes != (cmsReflective|cmsMatte)) return 0;
+#endif
+
+#ifdef CMS_DONT_USE_INT64
+        if (s ->seq[2].attributes[0] != (cmsTransparency|cmsGlossy)) return 0;
+        if (s ->seq[2].attributes[1] != 0) return 0;
+#else
+        if (s ->seq[2].attributes != (cmsTransparency|cmsGlossy)) return 0;
+#endif
+
+        // Check MLU
+        for (i=0; i < 3; i++) {
+
+            if (!CheckOneStr(s -> seq[i].Manufacturer, i)) return 0;
+            if (!CheckOneStr(s -> seq[i].Model, i)) return 0;
+        }
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckProfileSequenceIDTag(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    cmsSEQ* s;
+    cmsInt32Number i;
+
+    switch (Pass) {
+
+    case 1:
+
+        s = cmsAllocProfileSequenceDescription(DbgThread(), 3);
+        if (s == NULL) return 0;
+
+        memcpy(s ->seq[0].ProfileID.ID8, "0123456789ABCDEF", 16);
+        memcpy(s ->seq[1].ProfileID.ID8, "1111111111111111", 16);
+        memcpy(s ->seq[2].ProfileID.ID8, "2222222222222222", 16);
+
+
+        SetOneStr(&s -> seq[0].Description, L"Hello, world 0", L"Hola, mundo 0");
+        SetOneStr(&s -> seq[1].Description, L"Hello, world 1", L"Hola, mundo 1");
+        SetOneStr(&s -> seq[2].Description, L"Hello, world 2", L"Hola, mundo 2");
+
+        if (!cmsWriteTag(hProfile, cmsSigProfileSequenceIdTag, s)) return 0;
+        cmsFreeProfileSequenceDescription(s);
+        return 1;
+
+    case 2:
+
+        s = (cmsSEQ *) cmsReadTag(hProfile, cmsSigProfileSequenceIdTag); 
+        if (s == NULL) return 0;
+
+        if (s ->n != 3) return 0;
+
+        if (memcmp(s ->seq[0].ProfileID.ID8, "0123456789ABCDEF", 16) != 0) return 0;
+        if (memcmp(s ->seq[1].ProfileID.ID8, "1111111111111111", 16) != 0) return 0;
+        if (memcmp(s ->seq[2].ProfileID.ID8, "2222222222222222", 16) != 0) return 0;
+
+        for (i=0; i < 3; i++) {
+
+            if (!CheckOneStr(s -> seq[i].Description, i)) return 0;
+        }
+
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
+
+static
+cmsInt32Number CheckICCViewingConditions(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    cmsICCViewingConditions* v;
+    cmsICCViewingConditions  s;
+
+    switch (Pass) {
+
+        case 1:
+            s.IlluminantType = 1;
+            s.IlluminantXYZ.X = 0.1;
+            s.IlluminantXYZ.Y = 0.2;
+            s.IlluminantXYZ.Z = 0.3;
+            s.SurroundXYZ.X = 0.4;
+            s.SurroundXYZ.Y = 0.5;
+            s.SurroundXYZ.Z = 0.6;
+
+            if (!cmsWriteTag(hProfile, cmsSigViewingConditionsTag, &s)) return 0;
+            return 1;
+
+        case 2:
+            v = (cmsICCViewingConditions *) cmsReadTag(hProfile, cmsSigViewingConditionsTag); 
+            if (v == NULL) return 0;
+
+            if (v ->IlluminantType != 1) return 0;
+            if (!IsGoodVal("IlluminantXYZ.X", v ->IlluminantXYZ.X, 0.1, 0.001)) return 0;
+            if (!IsGoodVal("IlluminantXYZ.Y", v ->IlluminantXYZ.Y, 0.2, 0.001)) return 0;
+            if (!IsGoodVal("IlluminantXYZ.Z", v ->IlluminantXYZ.Z, 0.3, 0.001)) return 0;
+
+            if (!IsGoodVal("SurroundXYZ.X", v ->SurroundXYZ.X, 0.4, 0.001)) return 0;
+            if (!IsGoodVal("SurroundXYZ.Y", v ->SurroundXYZ.Y, 0.5, 0.001)) return 0;
+            if (!IsGoodVal("SurroundXYZ.Z", v ->SurroundXYZ.Z, 0.6, 0.001)) return 0;
+
+            return 1;
+
+        default:
+            return 0;
+    }
+
+}
+
+
+static
+cmsInt32Number CheckVCGT(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    cmsToneCurve* Curves[3];
+    cmsToneCurve** PtrCurve;
+
+     switch (Pass) {
+
+        case 1:
+            Curves[0] = cmsBuildGamma(DbgThread(), 1.1);
+            Curves[1] = cmsBuildGamma(DbgThread(), 2.2);
+            Curves[2] = cmsBuildGamma(DbgThread(), 3.4);
+
+            if (!cmsWriteTag(hProfile, cmsSigVcgtTag, Curves)) return 0;
+
+            cmsFreeToneCurveTriple(Curves);
+            return 1;
+
+
+        case 2:
+
+             PtrCurve = (cmsToneCurve **) cmsReadTag(hProfile, cmsSigVcgtTag); 
+             if (PtrCurve == NULL) return 0;
+             if (!IsGoodVal("VCGT R", cmsEstimateGamma(PtrCurve[0], 0.01), 1.1, 0.001)) return 0;
+             if (!IsGoodVal("VCGT G", cmsEstimateGamma(PtrCurve[1], 0.01), 2.2, 0.001)) return 0;
+             if (!IsGoodVal("VCGT B", cmsEstimateGamma(PtrCurve[2], 0.01), 3.4, 0.001)) return 0;
+             return 1;
+
+        default:;
+    }
+
+    return 0;
+}
+
+
+// Only one of the two following may be used, as they share the same tag
+static
+cmsInt32Number CheckDictionary16(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+      cmsHANDLE hDict;
+      const cmsDICTentry* e;
+      switch (Pass) {
+
+        case 1:
+            hDict = cmsDictAlloc(DbgThread());
+            cmsDictAddEntry(hDict, L"Name0",  NULL, NULL, NULL);
+            cmsDictAddEntry(hDict, L"Name1",  L"", NULL, NULL);
+            cmsDictAddEntry(hDict, L"Name",  L"String", NULL, NULL);
+            cmsDictAddEntry(hDict, L"Name2", L"12",    NULL, NULL);
+            if (!cmsWriteTag(hProfile, cmsSigMetaTag, hDict)) return 0;
+            cmsDictFree(hDict);
+            return 1;
+
+
+        case 2:
+
+             hDict = cmsReadTag(hProfile, cmsSigMetaTag);
+             if (hDict == NULL) return 0;
+             e = cmsDictGetEntryList(hDict);
+             if (memcmp(e ->Name, L"Name2", sizeof(wchar_t) * 5) != 0) return 0;
+             if (memcmp(e ->Value, L"12",  sizeof(wchar_t) * 2) != 0) return 0;
+             e = cmsDictNextEntry(e);
+             if (memcmp(e ->Name, L"Name", sizeof(wchar_t) * 4) != 0) return 0;
+             if (memcmp(e ->Value, L"String",  sizeof(wchar_t) * 5) != 0) return 0;
+             e = cmsDictNextEntry(e);
+             if (memcmp(e ->Name, L"Name1", sizeof(wchar_t) *5) != 0) return 0;
+             if (e ->Value == NULL) return 0;
+             if (*e->Value != 0) return 0;
+             e = cmsDictNextEntry(e);
+             if (memcmp(e ->Name, L"Name0", sizeof(wchar_t) * 5) != 0) return 0;
+             if (e ->Value != NULL) return 0;
+             return 1;
+
+
+        default:;
+    }
+
+    return 0;
+}
+
+
+
+static
+cmsInt32Number CheckDictionary24(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    cmsHANDLE hDict;
+    const cmsDICTentry* e;
+    cmsMLU* DisplayName;
+    char Buffer[256];
+    cmsInt32Number rc = 1;
+
+    switch (Pass) {
+
+    case 1:
+        hDict = cmsDictAlloc(DbgThread());
+
+        DisplayName = cmsMLUalloc(DbgThread(), 0);
+
+        cmsMLUsetWide(DisplayName, "en", "US", L"Hello, world");
+        cmsMLUsetWide(DisplayName, "es", "ES", L"Hola, mundo");
+        cmsMLUsetWide(DisplayName, "fr", "FR", L"Bonjour, le monde");
+        cmsMLUsetWide(DisplayName, "ca", "CA", L"Hola, mon");
+
+        cmsDictAddEntry(hDict, L"Name",  L"String", DisplayName, NULL);
+        cmsMLUfree(DisplayName);
+
+        cmsDictAddEntry(hDict, L"Name2", L"12",    NULL, NULL);
+        if (!cmsWriteTag(hProfile, cmsSigMetaTag, hDict)) return 0;
+        cmsDictFree(hDict);
+
+        return 1;
+
+
+    case 2:
+
+        hDict = cmsReadTag(hProfile, cmsSigMetaTag);
+        if (hDict == NULL) return 0;
+
+        e = cmsDictGetEntryList(hDict);
+        if (memcmp(e ->Name, L"Name2", sizeof(wchar_t) * 5) != 0) return 0;
+        if (memcmp(e ->Value, L"12",  sizeof(wchar_t) * 2) != 0) return 0;
+        e = cmsDictNextEntry(e);
+        if (memcmp(e ->Name, L"Name", sizeof(wchar_t) * 4) != 0) return 0;
+        if (memcmp(e ->Value, L"String",  sizeof(wchar_t) * 5) != 0) return 0;
+
+        cmsMLUgetASCII(e->DisplayName, "en", "US", Buffer, 256);
+        if (strcmp(Buffer, "Hello, world") != 0) rc = 0;
+
+
+        cmsMLUgetASCII(e->DisplayName, "es", "ES", Buffer, 256);
+        if (strcmp(Buffer, "Hola, mundo") != 0) rc = 0;
+
+
+        cmsMLUgetASCII(e->DisplayName, "fr", "FR", Buffer, 256);
+        if (strcmp(Buffer, "Bonjour, le monde") != 0) rc = 0;
+
+
+        cmsMLUgetASCII(e->DisplayName, "ca", "CA", Buffer, 256);
+        if (strcmp(Buffer, "Hola, mon") != 0) rc = 0;
+
+        if (rc == 0)
+            Fail("Unexpected string '%s'", Buffer);
+        return 1;
+
+    default:;
+    }
+
+    return 0;
+}
+
+static
+cmsInt32Number CheckRAWtags(cmsInt32Number Pass,  cmsHPROFILE hProfile)
+{
+    char Buffer[7];
+
+    switch (Pass) {
+
+        case 1:
+            return cmsWriteRawTag(hProfile, (cmsTagSignature) 0x31323334, "data123", 7); 
+
+        case 2:
+            if (!cmsReadRawTag(hProfile, (cmsTagSignature) 0x31323334, Buffer, 7)) return 0; 
+
+            if (strncmp(Buffer, "data123", 7) != 0) return 0;
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+
+// This is a very big test that checks every single tag
+static
+cmsInt32Number CheckProfileCreation(void)
+{
+    cmsHPROFILE h;
+    cmsInt32Number Pass;
+
+    h = cmsCreateProfilePlaceholder(DbgThread());
+    if (h == NULL) return 0;
+
+    cmsSetProfileVersion(h, 4.3);
+    if (cmsGetTagCount(h) != 0) { Fail("Empty profile with nonzero number of tags"); goto Error; }
+    if (cmsIsTag(h, cmsSigAToB0Tag)) { Fail("Found a tag in an empty profile"); goto Error; }
+
+    cmsSetColorSpace(h, cmsSigRgbData);
+    if (cmsGetColorSpace(h) !=  cmsSigRgbData) { Fail("Unable to set colorspace"); goto Error; }
+
+    cmsSetPCS(h, cmsSigLabData);
+    if (cmsGetPCS(h) !=  cmsSigLabData) { Fail("Unable to set colorspace"); goto Error; }
+
+    cmsSetDeviceClass(h, cmsSigDisplayClass);
+    if (cmsGetDeviceClass(h) != cmsSigDisplayClass) { Fail("Unable to set deviceclass"); goto Error; }
+
+    cmsSetHeaderRenderingIntent(h, INTENT_SATURATION);
+    if (cmsGetHeaderRenderingIntent(h) != INTENT_SATURATION) { Fail("Unable to set rendering intent"); goto Error; }
+
+    for (Pass = 1; Pass <= 2; Pass++) {
+
+        SubTest("Tags holding XYZ");
+
+        if (!CheckXYZ(Pass, h, cmsSigBlueColorantTag)) goto Error;
+        if (!CheckXYZ(Pass, h, cmsSigGreenColorantTag)) goto Error;
+        if (!CheckXYZ(Pass, h, cmsSigRedColorantTag)) goto Error;
+        if (!CheckXYZ(Pass, h, cmsSigMediaBlackPointTag)) goto Error;
+        if (!CheckXYZ(Pass, h, cmsSigMediaWhitePointTag)) goto Error;
+        if (!CheckXYZ(Pass, h, cmsSigLuminanceTag)) goto Error;
+
+        SubTest("Tags holding curves");
+
+        if (!CheckGamma(Pass, h, cmsSigBlueTRCTag)) goto Error;
+        if (!CheckGamma(Pass, h, cmsSigGrayTRCTag)) goto Error;
+        if (!CheckGamma(Pass, h, cmsSigGreenTRCTag)) goto Error;
+        if (!CheckGamma(Pass, h, cmsSigRedTRCTag)) goto Error;
+
+        SubTest("Tags holding text");
+
+        if (!CheckTextSingle(Pass, h, cmsSigCharTargetTag)) goto Error;
+        if (!CheckTextSingle(Pass, h, cmsSigScreeningDescTag)) goto Error;
+
+        if (!CheckText(Pass, h, cmsSigCopyrightTag)) goto Error;
+        if (!CheckText(Pass, h, cmsSigProfileDescriptionTag)) goto Error;
+        if (!CheckText(Pass, h, cmsSigDeviceMfgDescTag)) goto Error;
+        if (!CheckText(Pass, h, cmsSigDeviceModelDescTag)) goto Error;
+        if (!CheckText(Pass, h, cmsSigViewingCondDescTag)) goto Error;
+
+     
+
+        SubTest("Tags holding cmsICCData");
+
+        if (!CheckData(Pass, h, cmsSigPs2CRD0Tag)) goto Error;
+        if (!CheckData(Pass, h, cmsSigPs2CRD1Tag)) goto Error;
+        if (!CheckData(Pass, h, cmsSigPs2CRD2Tag)) goto Error;
+        if (!CheckData(Pass, h, cmsSigPs2CRD3Tag)) goto Error;
+        if (!CheckData(Pass, h, cmsSigPs2CSATag)) goto Error;
+        if (!CheckData(Pass, h, cmsSigPs2RenderingIntentTag)) goto Error;
+
+        SubTest("Tags holding signatures");
+
+        if (!CheckSignature(Pass, h, cmsSigColorimetricIntentImageStateTag)) goto Error;
+        if (!CheckSignature(Pass, h, cmsSigPerceptualRenderingIntentGamutTag)) goto Error;
+        if (!CheckSignature(Pass, h, cmsSigSaturationRenderingIntentGamutTag)) goto Error;
+        if (!CheckSignature(Pass, h, cmsSigTechnologyTag)) goto Error;
+
+        SubTest("Tags holding date_time");
+
+        if (!CheckDateTime(Pass, h, cmsSigCalibrationDateTimeTag)) goto Error;
+        if (!CheckDateTime(Pass, h, cmsSigDateTimeTag)) goto Error;
+
+        SubTest("Tags holding named color lists");
+
+        if (!CheckNamedColor(Pass, h, cmsSigColorantTableTag, 15, FALSE)) goto Error;
+        if (!CheckNamedColor(Pass, h, cmsSigColorantTableOutTag, 15, FALSE)) goto Error;
+        if (!CheckNamedColor(Pass, h, cmsSigNamedColor2Tag, 4096, TRUE)) goto Error;
+
+        SubTest("Tags holding LUTs");
+
+        if (!CheckLUT(Pass, h, cmsSigAToB0Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigAToB1Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigAToB2Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigBToA0Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigBToA1Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigBToA2Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigPreview0Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigPreview1Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigPreview2Tag)) goto Error;
+        if (!CheckLUT(Pass, h, cmsSigGamutTag)) goto Error;
+
+        SubTest("Tags holding CHAD");
+        if (!CheckCHAD(Pass, h, cmsSigChromaticAdaptationTag)) goto Error;
+
+        SubTest("Tags holding Chromaticity");
+        if (!CheckChromaticity(Pass, h, cmsSigChromaticityTag)) goto Error;
+
+        SubTest("Tags holding colorant order");
+        if (!CheckColorantOrder(Pass, h, cmsSigColorantOrderTag)) goto Error;
+
+        SubTest("Tags holding measurement");
+        if (!CheckMeasurement(Pass, h, cmsSigMeasurementTag)) goto Error;
+
+        SubTest("Tags holding CRD info");
+        if (!CheckCRDinfo(Pass, h, cmsSigCrdInfoTag)) goto Error;
+
+        SubTest("Tags holding UCR/BG");
+        if (!CheckUcrBg(Pass, h, cmsSigUcrBgTag)) goto Error;
+
+        SubTest("Tags holding MPE");
+        if (!CheckMPE(Pass, h, cmsSigDToB0Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigDToB1Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigDToB2Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigDToB3Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigBToD0Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigBToD1Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigBToD2Tag)) goto Error;
+        if (!CheckMPE(Pass, h, cmsSigBToD3Tag)) goto Error;
+
+        SubTest("Tags using screening");
+        if (!CheckScreening(Pass, h, cmsSigScreeningTag)) goto Error;
+
+        SubTest("Tags holding profile sequence description");
+        if (!CheckProfileSequenceTag(Pass, h)) goto Error;
+        if (!CheckProfileSequenceIDTag(Pass, h)) goto Error;
+
+        SubTest("Tags holding ICC viewing conditions");
+        if (!CheckICCViewingConditions(Pass, h)) goto Error;
+
+        SubTest("VCGT tags");
+        if (!CheckVCGT(Pass, h)) goto Error;
+
+        SubTest("RAW tags");
+        if (!CheckRAWtags(Pass, h)) goto Error;
+
+        SubTest("Dictionary meta tags");
+        // if (!CheckDictionary16(Pass, h)) goto Error;
+        if (!CheckDictionary24(Pass, h)) goto Error;
+
+        if (Pass == 1) {
+            cmsSaveProfileToFile(h, "alltags.icc");
+            cmsCloseProfile(h);
+            h = cmsOpenProfileFromFileTHR(DbgThread(), "alltags.icc", "r");
+        }
+
+    }
+
+    /*
+    Not implemented (by design):
+
+    cmsSigDataTag                           = 0x64617461,  // 'data'  -- Unused
+    cmsSigDeviceSettingsTag                 = 0x64657673,  // 'devs'  -- Unused
+    cmsSigNamedColorTag                     = 0x6E636f6C,  // 'ncol'  -- Don't use this one, deprecated by ICC
+    cmsSigOutputResponseTag                 = 0x72657370,  // 'resp'  -- Possible patent on this
+    */
+
+    cmsCloseProfile(h);
+    remove("alltags.icc");
+    return 1;
+
+Error:
+    cmsCloseProfile(h);
+    remove("alltags.icc");
+    return 0;
+}
+
+
+// Thanks to Christopher James Halse Rogers for the bugfixing and providing this test 
+static
+cmsInt32Number CheckVersionHeaderWriting(void)
+{
+    cmsHPROFILE h;
+    int index;
+    float test_versions[] = {
+      2.3f,
+      4.08f,
+      4.09f,
+      4.3f
+    };
+
+    for (index = 0; index < sizeof(test_versions)/sizeof(test_versions[0]); index++) {
+
+      h = cmsCreateProfilePlaceholder(DbgThread());
+      if (h == NULL) return 0;
+
+      cmsSetProfileVersion(h, test_versions[index]);
+
+      cmsSaveProfileToFile(h, "versions.icc");
+      cmsCloseProfile(h);
+
+      h = cmsOpenProfileFromFileTHR(DbgThread(), "versions.icc", "r");
+
+      // Only the first 3 digits are significant
+      if (fabs(cmsGetProfileVersion(h) - test_versions[index]) > 0.005) {
+        Fail("Version failed to round-trip: wrote %.2f, read %.2f",
+             test_versions[index], cmsGetProfileVersion(h));
+        return 0;
+      }
+
+      cmsCloseProfile(h);
+      remove("versions.icc");
+    }
+    return 1;
+}
+
+
+// Test on Richard Hughes "crayons.icc"
+static
+cmsInt32Number CheckMultilocalizedProfile(void)
+{
+    cmsHPROFILE hProfile;
+    cmsMLU *Pt;
+    char Buffer[256];
+
+    hProfile = cmsOpenProfileFromFile("crayons.icc", "r");
+
+    Pt = (cmsMLU *) cmsReadTag(hProfile, cmsSigProfileDescriptionTag); 
+    cmsMLUgetASCII(Pt, "en", "GB", Buffer, 256);
+    if (strcmp(Buffer, "Crayon Colours") != 0) return FALSE;
+    cmsMLUgetASCII(Pt, "en", "US", Buffer, 256);
+    if (strcmp(Buffer, "Crayon Colors") != 0) return FALSE;
+
+    cmsCloseProfile(hProfile);
+
+    return TRUE;
+}
+
+
+// Error reporting  -------------------------------------------------------------------------------------------------------
+
+
+static
+void ErrorReportingFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    TrappedError = TRUE;
+    SimultaneousErrors++;
+    strncpy(ReasonToFailBuffer, Text, TEXT_ERROR_BUFFER_SIZE-1);
+
+    cmsUNUSED_PARAMETER(ContextID);
+    cmsUNUSED_PARAMETER(ErrorCode);
+}
+
+
+static
+cmsInt32Number CheckBadProfiles(void)
+{
+    cmsHPROFILE h;
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "IDoNotExist.icc", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "IAmIllFormed*.icc", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    // No profile name given
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "..", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "IHaveBadAccessMode.icc", "@");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "bad.icc", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+     h = cmsOpenProfileFromFileTHR(DbgThread(), "toosmall.icc", "r");
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromMemTHR(DbgThread(), NULL, 3);
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    h = cmsOpenProfileFromMemTHR(DbgThread(), "123", 3);
+    if (h != NULL) {
+        cmsCloseProfile(h);
+        return 0;
+    }
+
+    if (SimultaneousErrors != 9) return 0;
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckErrReportingOnBadProfiles(void)
+{
+    cmsInt32Number rc;
+
+    cmsSetLogErrorHandler(ErrorReportingFunction);
+    rc = CheckBadProfiles();
+    cmsSetLogErrorHandler(FatalErrorQuit);
+
+    // Reset the error state
+    TrappedError = FALSE;
+    return rc;
+}
+
+
+static
+cmsInt32Number CheckBadTransforms(void)
+{
+    cmsHPROFILE h1 = cmsCreate_sRGBProfile();
+    cmsHTRANSFORM x1;
+
+    x1 = cmsCreateTransform(NULL, 0, NULL, 0, 0, 0);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+
+
+
+    x1 = cmsCreateTransform(h1, TYPE_RGB_8, h1, TYPE_RGB_8, 12345, 0);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+
+    x1 = cmsCreateTransform(h1, TYPE_CMYK_8, h1, TYPE_RGB_8, 0, 0);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+
+    x1 = cmsCreateTransform(h1, TYPE_RGB_8, h1, TYPE_CMYK_8, 1, 0);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+
+    // sRGB does its output as XYZ!
+    x1 = cmsCreateTransform(h1, TYPE_RGB_8, NULL, TYPE_Lab_8, 1, 0);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+
+    cmsCloseProfile(h1);
+
+
+    {
+
+    cmsHPROFILE hp1 = cmsOpenProfileFromFile("test1.icc", "r");
+    cmsHPROFILE hp2 = cmsCreate_sRGBProfile();
+
+    x1 = cmsCreateTransform(hp1, TYPE_BGR_8, hp2, TYPE_BGR_8, INTENT_PERCEPTUAL, 0);
+
+    cmsCloseProfile(hp1); cmsCloseProfile(hp2);
+    if (x1 != NULL) {
+        cmsDeleteTransform(x1);
+        return 0;
+    }
+    }
+
+    return 1;
+
+}
+
+static
+cmsInt32Number CheckErrReportingOnBadTransforms(void)
+{
+    cmsInt32Number rc;
+
+    cmsSetLogErrorHandler(ErrorReportingFunction);
+    rc = CheckBadTransforms();
+    cmsSetLogErrorHandler(FatalErrorQuit);
+
+    // Reset the error state
+    TrappedError = FALSE;
+    return rc;
+}
+
+
+
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Check a linear xform
+static
+cmsInt32Number Check8linearXFORM(cmsHTRANSFORM xform, cmsInt32Number nChan)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt8Number Inw[cmsMAXCHANNELS], Outw[cmsMAXCHANNELS];
+
+    n2=0;
+
+    for (j=0; j < 0xFF; j++) {
+
+        memset(Inw, j, sizeof(Inw));
+        cmsDoTransform(xform, Inw, Outw, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           cmsInt32Number dif = abs(Outw[i] - j);
+           if (dif > n2) n2 = dif;
+
+        }
+    }
+
+   // We allow 2 contone of difference on 8 bits
+    if (n2 > 2) {
+
+        Fail("Differences too big (%x)", n2);
+        return 0;
+    }
+
+    return 1;
+}
+
+static
+cmsInt32Number Compare8bitXFORM(cmsHTRANSFORM xform1, cmsHTRANSFORM xform2, cmsInt32Number nChan)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt8Number Inw[cmsMAXCHANNELS], Outw1[cmsMAXCHANNELS], Outw2[cmsMAXCHANNELS];;
+
+    n2=0;
+
+    for (j=0; j < 0xFF; j++) {
+
+        memset(Inw, j, sizeof(Inw));
+        cmsDoTransform(xform1, Inw, Outw1, 1);
+        cmsDoTransform(xform2, Inw, Outw2, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           cmsInt32Number dif = abs(Outw2[i] - Outw1[i]);
+           if (dif > n2) n2 = dif;
+
+        }
+    }
+
+   // We allow 2 contone of difference on 8 bits
+    if (n2 > 2) {
+
+        Fail("Differences too big (%x)", n2);
+        return 0;
+    }
+
+
+    return 1;
+}
+
+
+// Check a linear xform
+static
+cmsInt32Number Check16linearXFORM(cmsHTRANSFORM xform, cmsInt32Number nChan)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt16Number Inw[cmsMAXCHANNELS], Outw[cmsMAXCHANNELS];
+
+    n2=0;
+    for (j=0; j < 0xFFFF; j++) {
+
+        for (i=0; i < nChan; i++) Inw[i] = (cmsUInt16Number) j;
+
+        cmsDoTransform(xform, Inw, Outw, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           cmsInt32Number dif = abs(Outw[i] - j);
+           if (dif > n2) n2 = dif;
+
+        }
+
+
+   // We allow 2 contone of difference on 16 bits
+    if (n2 > 0x200) {
+
+        Fail("Differences too big (%x)", n2);
+        return 0;
+    }
+    }
+
+    return 1;
+}
+
+static
+cmsInt32Number Compare16bitXFORM(cmsHTRANSFORM xform1, cmsHTRANSFORM xform2, cmsInt32Number nChan)
+{
+    cmsInt32Number n2, i, j;
+    cmsUInt16Number Inw[cmsMAXCHANNELS], Outw1[cmsMAXCHANNELS], Outw2[cmsMAXCHANNELS];;
+
+    n2=0;
+
+    for (j=0; j < 0xFFFF; j++) {
+
+        for (i=0; i < nChan; i++) Inw[i] = (cmsUInt16Number) j;
+
+        cmsDoTransform(xform1, Inw, Outw1, 1);
+        cmsDoTransform(xform2, Inw, Outw2, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           cmsInt32Number dif = abs(Outw2[i] - Outw1[i]);
+           if (dif > n2) n2 = dif;
+
+        }
+    }
+
+   // We allow 2 contone of difference on 16 bits
+    if (n2 > 0x200) {
+
+        Fail("Differences too big (%x)", n2);
+        return 0;
+    }
+
+
+    return 1;
+}
+
+
+// Check a linear xform
+static
+cmsInt32Number CheckFloatlinearXFORM(cmsHTRANSFORM xform, cmsInt32Number nChan)
+{
+    cmsInt32Number i, j;
+    cmsFloat32Number In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
+
+    for (j=0; j < 0xFFFF; j++) {
+
+        for (i=0; i < nChan; i++) In[i] = (cmsFloat32Number) (j / 65535.0);;
+
+        cmsDoTransform(xform, In, Out, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           // We allow no difference in floating point
+            if (!IsGoodFixed15_16("linear xform cmsFloat32Number", Out[i], (cmsFloat32Number) (j / 65535.0)))
+                return 0;
+        }
+    }
+
+    return 1;
+}
+
+
+// Check a linear xform
+static
+cmsInt32Number CompareFloatXFORM(cmsHTRANSFORM xform1, cmsHTRANSFORM xform2, cmsInt32Number nChan)
+{
+    cmsInt32Number i, j;
+    cmsFloat32Number In[cmsMAXCHANNELS], Out1[cmsMAXCHANNELS], Out2[cmsMAXCHANNELS];
+
+    for (j=0; j < 0xFFFF; j++) {
+
+        for (i=0; i < nChan; i++) In[i] = (cmsFloat32Number) (j / 65535.0);;
+
+        cmsDoTransform(xform1, In, Out1, 1);
+        cmsDoTransform(xform2, In, Out2, 1);
+
+        for (i=0; i < nChan; i++) {
+
+           // We allow no difference in floating point
+            if (!IsGoodFixed15_16("linear xform cmsFloat32Number", Out1[i], Out2[i]))
+                return 0;
+        }
+
+    }
+
+    return 1;
+}
+
+
+// Curves only transforms ----------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckCurvesOnlyTransforms(void)
+{
+
+    cmsHTRANSFORM xform1, xform2;
+    cmsHPROFILE h1, h2, h3;
+    cmsToneCurve* c1, *c2, *c3;
+    cmsInt32Number rc = 1;
+
+
+    c1 = cmsBuildGamma(DbgThread(), 2.2);
+    c2 = cmsBuildGamma(DbgThread(), 1/2.2);
+    c3 = cmsBuildGamma(DbgThread(), 4.84);
+
+    h1 = cmsCreateLinearizationDeviceLinkTHR(DbgThread(), cmsSigGrayData, &c1);
+    h2 = cmsCreateLinearizationDeviceLinkTHR(DbgThread(), cmsSigGrayData, &c2);
+    h3 = cmsCreateLinearizationDeviceLinkTHR(DbgThread(), cmsSigGrayData, &c3);
+
+    SubTest("Gray float optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_FLT, h2, TYPE_GRAY_FLT, INTENT_PERCEPTUAL, 0);
+    rc &= CheckFloatlinearXFORM(xform1, 1);
+    cmsDeleteTransform(xform1);
+    if (rc == 0) goto Error;
+
+    SubTest("Gray 8 optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_8, h2, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+    rc &= Check8linearXFORM(xform1, 1);
+    cmsDeleteTransform(xform1);
+    if (rc == 0) goto Error;
+
+    SubTest("Gray 16 optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_16, h2, TYPE_GRAY_16, INTENT_PERCEPTUAL, 0);
+    rc &= Check16linearXFORM(xform1, 1);
+    cmsDeleteTransform(xform1);
+    if (rc == 0) goto Error;
+
+    SubTest("Gray float non-optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_FLT, h1, TYPE_GRAY_FLT, INTENT_PERCEPTUAL, 0);
+    xform2 = cmsCreateTransform(h3, TYPE_GRAY_FLT, NULL, TYPE_GRAY_FLT, INTENT_PERCEPTUAL, 0);
+
+    rc &= CompareFloatXFORM(xform1, xform2, 1);
+    cmsDeleteTransform(xform1);
+    cmsDeleteTransform(xform2);
+    if (rc == 0) goto Error;
+
+    SubTest("Gray 8 non-optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_8, h1, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+    xform2 = cmsCreateTransform(h3, TYPE_GRAY_8, NULL, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+
+    rc &= Compare8bitXFORM(xform1, xform2, 1);
+    cmsDeleteTransform(xform1);
+    cmsDeleteTransform(xform2);
+    if (rc == 0) goto Error;
+
+
+    SubTest("Gray 16 non-optimizeable transform");
+    xform1 = cmsCreateTransform(h1, TYPE_GRAY_16, h1, TYPE_GRAY_16, INTENT_PERCEPTUAL, 0);
+    xform2 = cmsCreateTransform(h3, TYPE_GRAY_16, NULL, TYPE_GRAY_16, INTENT_PERCEPTUAL, 0);
+
+    rc &= Compare16bitXFORM(xform1, xform2, 1);
+    cmsDeleteTransform(xform1);
+    cmsDeleteTransform(xform2);
+    if (rc == 0) goto Error;
+
+Error:
+
+    cmsCloseProfile(h1); cmsCloseProfile(h2); cmsCloseProfile(h3);
+    cmsFreeToneCurve(c1); cmsFreeToneCurve(c2); cmsFreeToneCurve(c3);
+
+    return rc;
+}
+
+
+
+// Lab to Lab trivial transforms ----------------------------------------------------------------------------------------
+
+static cmsFloat64Number MaxDE;
+
+static
+cmsInt32Number CheckOneLab(cmsHTRANSFORM xform, cmsFloat64Number L, cmsFloat64Number a, cmsFloat64Number b)
+{
+    cmsCIELab In, Out;
+    cmsFloat64Number dE;
+
+    In.L = L; In.a = a; In.b = b;
+    cmsDoTransform(xform, &In, &Out, 1);
+
+    dE = cmsDeltaE(&In, &Out);
+
+    if (dE > MaxDE) MaxDE = dE;
+
+    if (MaxDE >  0.003) {
+        Fail("dE=%f Lab1=(%f, %f, %f)\n\tLab2=(%f %f %f)", MaxDE, In.L, In.a, In.b, Out.L, Out.a, Out.b);
+        cmsDoTransform(xform, &In, &Out, 1);
+        return 0;
+    }
+
+    return 1;
+}
+
+// Check several Lab, slicing at non-exact values. Precision should be 16 bits. 50x50x50 checks aprox.
+static
+cmsInt32Number CheckSeveralLab(cmsHTRANSFORM xform)
+{
+    cmsInt32Number L, a, b;
+
+    MaxDE = 0;
+    for (L=0; L < 65536; L += 1311) {
+
+        for (a = 0; a < 65536; a += 1232) {
+
+            for (b = 0; b < 65536; b += 1111) {
+
+                if (!CheckOneLab(xform, (L * 100.0) / 65535.0,
+                                        (a  / 257.0) - 128, (b / 257.0) - 128))
+                    return 0;
+            }
+
+        }
+
+    }
+    return 1;
+}
+
+
+static
+cmsInt32Number OneTrivialLab(cmsHPROFILE hLab1, cmsHPROFILE hLab2, const char* txt)
+{
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc;
+
+    SubTest(txt);
+    xform = cmsCreateTransformTHR(DbgThread(), hLab1, TYPE_Lab_DBL, hLab2, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hLab1); cmsCloseProfile(hLab2);
+
+    rc = CheckSeveralLab(xform);
+    cmsDeleteTransform(xform);
+    return rc;
+}
+
+
+static
+cmsInt32Number CheckFloatLabTransforms(void)
+{
+    return OneTrivialLab(cmsCreateLab4ProfileTHR(DbgThread(), NULL), cmsCreateLab4ProfileTHR(DbgThread(), NULL),  "Lab4/Lab4") &&
+           OneTrivialLab(cmsCreateLab2ProfileTHR(DbgThread(), NULL), cmsCreateLab2ProfileTHR(DbgThread(), NULL),  "Lab2/Lab2") &&
+           OneTrivialLab(cmsCreateLab4ProfileTHR(DbgThread(), NULL), cmsCreateLab2ProfileTHR(DbgThread(), NULL),  "Lab4/Lab2") &&
+           OneTrivialLab(cmsCreateLab2ProfileTHR(DbgThread(), NULL), cmsCreateLab4ProfileTHR(DbgThread(), NULL),  "Lab2/Lab4");
+}
+
+
+static
+cmsInt32Number CheckEncodedLabTransforms(void)
+{
+    cmsHTRANSFORM xform;
+    cmsUInt16Number In[3];
+    cmsCIELab Lab;
+    cmsCIELab White = { 100, 0, 0 };
+    cmsHPROFILE hLab1 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+    cmsHPROFILE hLab2 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+
+    xform = cmsCreateTransformTHR(DbgThread(), hLab1, TYPE_Lab_16, hLab2, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hLab1); cmsCloseProfile(hLab2);
+
+    In[0] = 0xFFFF;
+    In[1] = 0x8080;
+    In[2] = 0x8080;
+
+    cmsDoTransform(xform, In, &Lab, 1);
+
+    if (cmsDeltaE(&Lab, &White) > 0.0001) return 0;
+    cmsDeleteTransform(xform);
+
+    hLab1 = cmsCreateLab2ProfileTHR(DbgThread(), NULL);
+    hLab2 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hLab1, TYPE_LabV2_16, hLab2, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hLab1); cmsCloseProfile(hLab2);
+
+
+    In[0] = 0xFF00;
+    In[1] = 0x8000;
+    In[2] = 0x8000;
+
+    cmsDoTransform(xform, In, &Lab, 1);
+
+    if (cmsDeltaE(&Lab, &White) > 0.0001) return 0;
+
+    cmsDeleteTransform(xform);
+
+    hLab2 = cmsCreateLab2ProfileTHR(DbgThread(), NULL);
+    hLab1 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hLab1, TYPE_Lab_DBL, hLab2, TYPE_LabV2_16, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hLab1); cmsCloseProfile(hLab2);
+
+    Lab.L = 100;
+    Lab.a = 0;
+    Lab.b = 0;
+
+    cmsDoTransform(xform, &Lab, In, 1);
+    if (In[0] != 0xFF00 ||
+        In[1] != 0x8000 ||
+        In[2] != 0x8000) return 0;
+
+    cmsDeleteTransform(xform);
+
+    hLab1 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+    hLab2 = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hLab1, TYPE_Lab_DBL, hLab2, TYPE_Lab_16, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hLab1); cmsCloseProfile(hLab2);
+
+    Lab.L = 100;
+    Lab.a = 0;
+    Lab.b = 0;
+
+    cmsDoTransform(xform, &Lab, In, 1);
+
+    if (In[0] != 0xFFFF ||
+        In[1] != 0x8080 ||
+        In[2] != 0x8080) return 0;
+
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+static
+cmsInt32Number CheckStoredIdentities(void)
+{
+    cmsHPROFILE hLab, hLink, h4, h2;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc = 1;
+
+    hLab  = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+    xform = cmsCreateTransformTHR(DbgThread(), hLab, TYPE_Lab_8, hLab, TYPE_Lab_8, 0, 0);
+
+    hLink = cmsTransform2DeviceLink(xform, 3.4, 0);
+    cmsSaveProfileToFile(hLink, "abstractv2.icc");
+    cmsCloseProfile(hLink);
+
+    hLink = cmsTransform2DeviceLink(xform, 4.3, 0);
+    cmsSaveProfileToFile(hLink, "abstractv4.icc");
+    cmsCloseProfile(hLink);
+
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(hLab);
+
+    h4 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv4.icc", "r");
+
+    xform = cmsCreateTransformTHR(DbgThread(), h4, TYPE_Lab_DBL, h4, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+    SubTest("V4");
+    rc &= CheckSeveralLab(xform);
+
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(h4);
+    if (!rc) goto Error;
+
+
+    SubTest("V2");
+    h2 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv2.icc", "r");
+
+    xform = cmsCreateTransformTHR(DbgThread(), h2, TYPE_Lab_DBL, h2, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    rc &= CheckSeveralLab(xform);
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(h2);
+    if (!rc) goto Error;
+
+
+    SubTest("V2 -> V4");
+    h2 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv2.icc", "r");
+    h4 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv4.icc", "r");
+
+    xform = cmsCreateTransformTHR(DbgThread(), h4, TYPE_Lab_DBL, h2, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    rc &= CheckSeveralLab(xform);
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(h2);
+    cmsCloseProfile(h4);
+
+    SubTest("V4 -> V2");
+    h2 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv2.icc", "r");
+    h4 = cmsOpenProfileFromFileTHR(DbgThread(), "abstractv4.icc", "r");
+
+    xform = cmsCreateTransformTHR(DbgThread(), h2, TYPE_Lab_DBL, h4, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    rc &= CheckSeveralLab(xform);
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(h2);
+    cmsCloseProfile(h4);
+
+Error:
+    remove("abstractv2.icc");
+    remove("abstractv4.icc");
+    return rc;
+
+}
+
+
+
+// Check a simple xform from a matrix profile to itself. Test floating point accuracy.
+static
+cmsInt32Number CheckMatrixShaperXFORMFloat(void)
+{
+    cmsHPROFILE hAbove, hSRGB;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc1, rc2;
+
+    hAbove = Create_AboveRGB();
+    xform = cmsCreateTransformTHR(DbgThread(), hAbove, TYPE_RGB_FLT, hAbove, TYPE_RGB_FLT,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hAbove);
+    rc1 = CheckFloatlinearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+    hSRGB = cmsCreate_sRGBProfileTHR(DbgThread());
+    xform = cmsCreateTransformTHR(DbgThread(), hSRGB, TYPE_RGB_FLT, hSRGB, TYPE_RGB_FLT,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hSRGB);
+    rc2 = CheckFloatlinearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+
+    return rc1 && rc2;
+}
+
+// Check a simple xform from a matrix profile to itself. Test 16 bits accuracy.
+static
+cmsInt32Number CheckMatrixShaperXFORM16(void)
+{
+    cmsHPROFILE hAbove, hSRGB;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc1, rc2;
+
+    hAbove = Create_AboveRGB();
+    xform = cmsCreateTransformTHR(DbgThread(), hAbove, TYPE_RGB_16, hAbove, TYPE_RGB_16,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hAbove);
+
+    rc1 = Check16linearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+    hSRGB = cmsCreate_sRGBProfileTHR(DbgThread());
+    xform = cmsCreateTransformTHR(DbgThread(), hSRGB, TYPE_RGB_16, hSRGB, TYPE_RGB_16,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hSRGB);
+    rc2 = Check16linearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+    return rc1 && rc2;
+
+}
+
+
+// Check a simple xform from a matrix profile to itself. Test 8 bits accuracy.
+static
+cmsInt32Number CheckMatrixShaperXFORM8(void)
+{
+    cmsHPROFILE hAbove, hSRGB;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc1, rc2;
+
+    hAbove = Create_AboveRGB();
+    xform = cmsCreateTransformTHR(DbgThread(), hAbove, TYPE_RGB_8, hAbove, TYPE_RGB_8,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hAbove);
+    rc1 = Check8linearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+    hSRGB = cmsCreate_sRGBProfileTHR(DbgThread());
+    xform = cmsCreateTransformTHR(DbgThread(), hSRGB, TYPE_RGB_8, hSRGB, TYPE_RGB_8,  INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hSRGB);
+    rc2 = Check8linearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+
+
+    return rc1 && rc2;
+}
+
+
+// TODO: Check LUT based to LUT based transforms for CMYK
+
+
+
+
+
+
+// -----------------------------------------------------------------------------------------------------------------
+
+
+// Check known values going from sRGB to XYZ
+static
+cmsInt32Number CheckOneRGB_f(cmsHTRANSFORM xform, cmsInt32Number R, cmsInt32Number G, cmsInt32Number B, cmsFloat64Number X, cmsFloat64Number Y, cmsFloat64Number Z, cmsFloat64Number err)
+{
+    cmsFloat32Number RGB[3];
+    cmsFloat64Number Out[3];
+
+    RGB[0] = (cmsFloat32Number) (R / 255.0);
+    RGB[1] = (cmsFloat32Number) (G / 255.0);
+    RGB[2] = (cmsFloat32Number) (B / 255.0);
+
+    cmsDoTransform(xform, RGB, Out, 1);
+
+    return IsGoodVal("X", X , Out[0], err) &&
+           IsGoodVal("Y", Y , Out[1], err) &&
+           IsGoodVal("Z", Z , Out[2], err);
+}
+
+static
+cmsInt32Number Chack_sRGB_Float(void)
+{
+    cmsHPROFILE hsRGB, hXYZ, hLab;
+    cmsHTRANSFORM xform1, xform2;
+    cmsInt32Number rc;
+
+
+    hsRGB = cmsCreate_sRGBProfileTHR(DbgThread());
+    hXYZ  = cmsCreateXYZProfileTHR(DbgThread());
+    hLab  = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform1 =  cmsCreateTransformTHR(DbgThread(), hsRGB, TYPE_RGB_FLT, hXYZ, TYPE_XYZ_DBL,
+                                INTENT_RELATIVE_COLORIMETRIC, 0);
+
+    xform2 =  cmsCreateTransformTHR(DbgThread(), hsRGB, TYPE_RGB_FLT, hLab, TYPE_Lab_DBL,
+                                INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hsRGB);
+    cmsCloseProfile(hXYZ);
+    cmsCloseProfile(hLab);
+
+    MaxErr = 0;
+
+    // Xform 1 goes from 8 bits to XYZ,
+    rc  = CheckOneRGB_f(xform1, 1, 1, 1,        0.0002927, 0.0003035,  0.000250,  0.0001);
+    rc  &= CheckOneRGB_f(xform1, 127, 127, 127, 0.2046329, 0.212230,   0.175069,  0.0001);
+    rc  &= CheckOneRGB_f(xform1, 12, 13, 15,    0.0038364, 0.0039928,  0.003853,  0.0001);
+    rc  &= CheckOneRGB_f(xform1, 128, 0, 0,     0.0941240, 0.0480256,  0.003005,  0.0001);
+    rc  &= CheckOneRGB_f(xform1, 190, 25, 210,  0.3204592, 0.1605926,  0.468213,  0.0001);
+
+    // Xform 2 goes from 8 bits to Lab, we allow 0.01 error max
+    rc  &= CheckOneRGB_f(xform2, 1, 1, 1,       0.2741748, 0, 0,                   0.01);
+    rc  &= CheckOneRGB_f(xform2, 127, 127, 127, 53.192776, 0, 0,                   0.01);
+    rc  &= CheckOneRGB_f(xform2, 190, 25, 210,  47.052136, 74.565610, -56.883274,  0.01);
+    rc  &= CheckOneRGB_f(xform2, 128, 0, 0,     26.164701, 48.478171, 39.4384713,  0.01);
+
+    cmsDeleteTransform(xform1);
+    cmsDeleteTransform(xform2);
+    return rc;
+}
+
+
+// ---------------------------------------------------
+
+static
+cmsBool GetProfileRGBPrimaries(cmsHPROFILE hProfile,
+                                cmsCIEXYZTRIPLE *result,
+                                cmsUInt32Number intent)
+{
+    cmsHPROFILE hXYZ;
+    cmsHTRANSFORM hTransform;
+    cmsFloat64Number rgb[3][3] = {{1., 0., 0.},
+    {0., 1., 0.},
+    {0., 0., 1.}};
+
+    hXYZ = cmsCreateXYZProfile();
+    if (hXYZ == NULL) return FALSE;
+
+    hTransform = cmsCreateTransform(hProfile, TYPE_RGB_DBL, hXYZ, TYPE_XYZ_DBL,
+        intent, cmsFLAGS_NOCACHE | cmsFLAGS_NOOPTIMIZE);
+    cmsCloseProfile(hXYZ);
+    if (hTransform == NULL) return FALSE;
+
+    cmsDoTransform(hTransform, rgb, result, 3);
+    cmsDeleteTransform(hTransform);
+    return TRUE;
+}
+
+
+static
+int CheckRGBPrimaries(void)
+{
+    cmsHPROFILE hsRGB;
+    cmsCIEXYZTRIPLE tripXYZ;
+    cmsCIExyYTRIPLE tripxyY;
+    cmsBool result;
+
+    cmsSetAdaptationState(0);
+    hsRGB = cmsCreate_sRGBProfileTHR(DbgThread());
+    if (!hsRGB) return 0;
+
+    result = GetProfileRGBPrimaries(hsRGB, &tripXYZ,
+        INTENT_ABSOLUTE_COLORIMETRIC);
+
+    cmsCloseProfile(hsRGB);
+    if (!result) return 0;
+
+    cmsXYZ2xyY(&tripxyY.Red, &tripXYZ.Red);
+    cmsXYZ2xyY(&tripxyY.Green, &tripXYZ.Green);
+    cmsXYZ2xyY(&tripxyY.Blue, &tripXYZ.Blue);
+
+    /* valus were taken from
+    http://en.wikipedia.org/wiki/RGB_color_spaces#Specifications */
+
+    if (!IsGoodFixed15_16("xRed", tripxyY.Red.x, 0.64) ||
+        !IsGoodFixed15_16("yRed", tripxyY.Red.y, 0.33) ||
+        !IsGoodFixed15_16("xGreen", tripxyY.Green.x, 0.30) ||
+        !IsGoodFixed15_16("yGreen", tripxyY.Green.y, 0.60) ||
+        !IsGoodFixed15_16("xBlue", tripxyY.Blue.x, 0.15) ||
+        !IsGoodFixed15_16("yBlue", tripxyY.Blue.y, 0.06)) {
+            Fail("One or more primaries are wrong.");
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// -----------------------------------------------------------------------------------------------------------------
+
+// This function will check CMYK -> CMYK transforms. It uses FOGRA29 and SWOP ICC profiles
+
+static
+cmsInt32Number CheckCMYK(cmsInt32Number Intent, const char *Profile1, const char* Profile2)
+{
+    cmsHPROFILE hSWOP  = cmsOpenProfileFromFileTHR(DbgThread(), Profile1, "r");
+    cmsHPROFILE hFOGRA = cmsOpenProfileFromFileTHR(DbgThread(), Profile2, "r");
+    cmsHTRANSFORM xform, swop_lab, fogra_lab;
+    cmsFloat32Number CMYK1[4], CMYK2[4];
+    cmsCIELab Lab1, Lab2;
+    cmsHPROFILE hLab;
+    cmsFloat64Number DeltaL, Max;
+    cmsInt32Number i;
+
+    hLab = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hSWOP, TYPE_CMYK_FLT, hFOGRA, TYPE_CMYK_FLT, Intent, 0);
+
+    swop_lab = cmsCreateTransformTHR(DbgThread(), hSWOP,   TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, Intent, 0);
+    fogra_lab = cmsCreateTransformTHR(DbgThread(), hFOGRA, TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, Intent, 0);
+
+    Max = 0;
+    for (i=0; i <= 100; i++) {
+
+        CMYK1[0] = 10;
+        CMYK1[1] = 20;
+        CMYK1[2] = 30;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        cmsDoTransform(swop_lab, CMYK1, &Lab1, 1);
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+        cmsDoTransform(fogra_lab, CMYK2, &Lab2, 1);
+
+        DeltaL = fabs(Lab1.L - Lab2.L);
+
+        if (DeltaL > Max) Max = DeltaL;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+
+    xform = cmsCreateTransformTHR(DbgThread(),  hFOGRA, TYPE_CMYK_FLT, hSWOP, TYPE_CMYK_FLT, Intent, 0);
+
+    for (i=0; i <= 100; i++) {
+        CMYK1[0] = 10;
+        CMYK1[1] = 20;
+        CMYK1[2] = 30;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        cmsDoTransform(fogra_lab, CMYK1, &Lab1, 1);
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+        cmsDoTransform(swop_lab, CMYK2, &Lab2, 1);
+
+        DeltaL = fabs(Lab1.L - Lab2.L);
+
+        if (DeltaL > Max) Max = DeltaL;
+    }
+
+
+    cmsCloseProfile(hSWOP);
+    cmsCloseProfile(hFOGRA);
+    cmsCloseProfile(hLab);
+
+    cmsDeleteTransform(xform);
+    cmsDeleteTransform(swop_lab);
+    cmsDeleteTransform(fogra_lab);
+
+    return Max < 3.0;
+}
+
+static
+cmsInt32Number CheckCMYKRoundtrip(void)
+{
+    return CheckCMYK(INTENT_RELATIVE_COLORIMETRIC, "test1.icc", "test1.icc");
+}
+
+
+static
+cmsInt32Number CheckCMYKPerceptual(void)
+{
+    return CheckCMYK(INTENT_PERCEPTUAL, "test1.icc", "test2.icc");
+}
+
+
+
+static
+cmsInt32Number CheckCMYKRelCol(void)
+{
+    return CheckCMYK(INTENT_RELATIVE_COLORIMETRIC, "test1.icc", "test2.icc");
+}
+
+
+
+static
+cmsInt32Number CheckKOnlyBlackPreserving(void)
+{
+    cmsHPROFILE hSWOP  = cmsOpenProfileFromFileTHR(DbgThread(), "test1.icc", "r");
+    cmsHPROFILE hFOGRA = cmsOpenProfileFromFileTHR(DbgThread(), "test2.icc", "r");
+    cmsHTRANSFORM xform, swop_lab, fogra_lab;
+    cmsFloat32Number CMYK1[4], CMYK2[4];
+    cmsCIELab Lab1, Lab2;
+    cmsHPROFILE hLab;
+    cmsFloat64Number DeltaL, Max;
+    cmsInt32Number i;
+
+    hLab = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hSWOP, TYPE_CMYK_FLT, hFOGRA, TYPE_CMYK_FLT, INTENT_PRESERVE_K_ONLY_PERCEPTUAL, 0);
+
+    swop_lab = cmsCreateTransformTHR(DbgThread(), hSWOP,   TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, INTENT_PERCEPTUAL, 0);
+    fogra_lab = cmsCreateTransformTHR(DbgThread(), hFOGRA, TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, INTENT_PERCEPTUAL, 0);
+
+    Max = 0;
+
+    for (i=0; i <= 100; i++) {
+        CMYK1[0] = 0;
+        CMYK1[1] = 0;
+        CMYK1[2] = 0;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        // SWOP CMYK to Lab1
+        cmsDoTransform(swop_lab, CMYK1, &Lab1, 1);
+
+        // SWOP To FOGRA using black preservation
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+
+        // Obtained FOGRA CMYK to Lab2
+        cmsDoTransform(fogra_lab, CMYK2, &Lab2, 1);
+
+        // We care only on L*
+        DeltaL = fabs(Lab1.L - Lab2.L);
+
+        if (DeltaL > Max) Max = DeltaL;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    // dL should be below 3.0
+
+
+    // Same, but FOGRA to SWOP
+    xform = cmsCreateTransformTHR(DbgThread(), hFOGRA, TYPE_CMYK_FLT, hSWOP, TYPE_CMYK_FLT, INTENT_PRESERVE_K_ONLY_PERCEPTUAL, 0);
+
+    for (i=0; i <= 100; i++) {
+        CMYK1[0] = 0;
+        CMYK1[1] = 0;
+        CMYK1[2] = 0;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        cmsDoTransform(fogra_lab, CMYK1, &Lab1, 1);
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+        cmsDoTransform(swop_lab, CMYK2, &Lab2, 1);
+
+        DeltaL = fabs(Lab1.L - Lab2.L);
+
+        if (DeltaL > Max) Max = DeltaL;
+    }
+
+
+    cmsCloseProfile(hSWOP);
+    cmsCloseProfile(hFOGRA);
+    cmsCloseProfile(hLab);
+
+    cmsDeleteTransform(xform);
+    cmsDeleteTransform(swop_lab);
+    cmsDeleteTransform(fogra_lab);
+
+    return Max < 3.0;
+}
+
+static
+cmsInt32Number CheckKPlaneBlackPreserving(void)
+{
+    cmsHPROFILE hSWOP  = cmsOpenProfileFromFileTHR(DbgThread(), "test1.icc", "r");
+    cmsHPROFILE hFOGRA = cmsOpenProfileFromFileTHR(DbgThread(), "test2.icc", "r");
+    cmsHTRANSFORM xform, swop_lab, fogra_lab;
+    cmsFloat32Number CMYK1[4], CMYK2[4];
+    cmsCIELab Lab1, Lab2;
+    cmsHPROFILE hLab;
+    cmsFloat64Number DeltaE, Max;
+    cmsInt32Number i;
+
+    hLab = cmsCreateLab4ProfileTHR(DbgThread(), NULL);
+
+    xform = cmsCreateTransformTHR(DbgThread(), hSWOP, TYPE_CMYK_FLT, hFOGRA, TYPE_CMYK_FLT, INTENT_PERCEPTUAL, 0);
+
+    swop_lab = cmsCreateTransformTHR(DbgThread(), hSWOP,  TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, INTENT_PERCEPTUAL, 0);
+    fogra_lab = cmsCreateTransformTHR(DbgThread(), hFOGRA, TYPE_CMYK_FLT, hLab, TYPE_Lab_DBL, INTENT_PERCEPTUAL, 0);
+
+    Max = 0;
+
+    for (i=0; i <= 100; i++) {
+        CMYK1[0] = 0;
+        CMYK1[1] = 0;
+        CMYK1[2] = 0;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        cmsDoTransform(swop_lab, CMYK1, &Lab1, 1);
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+        cmsDoTransform(fogra_lab, CMYK2, &Lab2, 1);
+
+        DeltaE = cmsDeltaE(&Lab1, &Lab2);
+
+        if (DeltaE > Max) Max = DeltaE;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    xform = cmsCreateTransformTHR(DbgThread(),  hFOGRA, TYPE_CMYK_FLT, hSWOP, TYPE_CMYK_FLT, INTENT_PRESERVE_K_PLANE_PERCEPTUAL, 0);
+
+    for (i=0; i <= 100; i++) {
+        CMYK1[0] = 30;
+        CMYK1[1] = 20;
+        CMYK1[2] = 10;
+        CMYK1[3] = (cmsFloat32Number) i;
+
+        cmsDoTransform(fogra_lab, CMYK1, &Lab1, 1);
+        cmsDoTransform(xform, CMYK1, CMYK2, 1);
+        cmsDoTransform(swop_lab, CMYK2, &Lab2, 1);
+
+        DeltaE = cmsDeltaE(&Lab1, &Lab2);
+
+        if (DeltaE > Max) Max = DeltaE;
+    }
+
+    cmsDeleteTransform(xform);
+
+
+
+    cmsCloseProfile(hSWOP);
+    cmsCloseProfile(hFOGRA);
+    cmsCloseProfile(hLab);
+
+
+    cmsDeleteTransform(swop_lab);
+    cmsDeleteTransform(fogra_lab);
+
+    return Max < 30.0;
+}
+
+
+// ------------------------------------------------------------------------------------------------------
+
+
+static
+cmsInt32Number CheckProofingXFORMFloat(void)
+{
+    cmsHPROFILE hAbove;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc;
+
+    hAbove = Create_AboveRGB();
+    xform =  cmsCreateProofingTransformTHR(DbgThread(), hAbove, TYPE_RGB_FLT, hAbove, TYPE_RGB_FLT, hAbove,
+                                INTENT_RELATIVE_COLORIMETRIC, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_SOFTPROOFING);
+    cmsCloseProfile(hAbove);
+    rc = CheckFloatlinearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+    return rc;
+}
+
+static
+cmsInt32Number CheckProofingXFORM16(void)
+{
+    cmsHPROFILE hAbove;
+    cmsHTRANSFORM xform;
+    cmsInt32Number rc;
+
+    hAbove = Create_AboveRGB();
+    xform =  cmsCreateProofingTransformTHR(DbgThread(), hAbove, TYPE_RGB_16, hAbove, TYPE_RGB_16, hAbove,
+                                INTENT_RELATIVE_COLORIMETRIC, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_SOFTPROOFING|cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hAbove);
+    rc = Check16linearXFORM(xform, 3);
+    cmsDeleteTransform(xform);
+    return rc;
+}
+
+
+static
+cmsInt32Number CheckGamutCheck(void)
+{
+        cmsHPROFILE hSRGB, hAbove;
+        cmsHTRANSFORM xform;
+        cmsInt32Number rc;
+        cmsUInt16Number Alarm[16] = { 0xDEAD, 0xBABE, 0xFACE };
+
+        // Set alarm codes to fancy values so we could check the out of gamut condition
+        cmsSetAlarmCodes(Alarm);
+
+        // Create the profiles
+        hSRGB  = cmsCreate_sRGBProfileTHR(DbgThread());
+        hAbove = Create_AboveRGB();
+
+        if (hSRGB == NULL || hAbove == NULL) return 0;  // Failed
+
+        SubTest("Gamut check on floating point");
+
+        // Create a gamut checker in the same space. No value should be out of gamut
+        xform = cmsCreateProofingTransformTHR(DbgThread(), hAbove, TYPE_RGB_FLT, hAbove, TYPE_RGB_FLT, hAbove,
+                                INTENT_RELATIVE_COLORIMETRIC, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_GAMUTCHECK);
+
+
+        if (!CheckFloatlinearXFORM(xform, 3)) {
+            cmsCloseProfile(hSRGB);
+            cmsCloseProfile(hAbove);
+            cmsDeleteTransform(xform);
+            Fail("Gamut check on same profile failed");
+            return 0;
+        }
+
+        cmsDeleteTransform(xform);
+
+        SubTest("Gamut check on 16 bits");
+
+        xform = cmsCreateProofingTransformTHR(DbgThread(), hAbove, TYPE_RGB_16, hAbove, TYPE_RGB_16, hSRGB,
+                                INTENT_RELATIVE_COLORIMETRIC, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_GAMUTCHECK);
+
+        cmsCloseProfile(hSRGB);
+        cmsCloseProfile(hAbove);
+
+        rc = Check16linearXFORM(xform, 3);
+
+        cmsDeleteTransform(xform);
+
+        return rc;
+}
+
+
+
+// -------------------------------------------------------------------------------------------------------------------
+
+static
+cmsInt32Number CheckBlackPoint(void)
+{
+    cmsHPROFILE hProfile;
+    cmsCIEXYZ Black;
+    cmsCIELab Lab;
+
+    hProfile  = cmsOpenProfileFromFileTHR(DbgThread(), "test5.icc", "r");
+    cmsDetectDestinationBlackPoint(&Black, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hProfile);
+
+
+    hProfile = cmsOpenProfileFromFileTHR(DbgThread(), "test1.icc", "r");
+    cmsDetectDestinationBlackPoint(&Black, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsXYZ2Lab(NULL, &Lab, &Black);
+    cmsCloseProfile(hProfile);
+
+    hProfile = cmsOpenProfileFromFileTHR(DbgThread(), "lcms2cmyk.icc", "r");
+    cmsDetectDestinationBlackPoint(&Black, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsXYZ2Lab(NULL, &Lab, &Black);
+    cmsCloseProfile(hProfile);
+
+    hProfile = cmsOpenProfileFromFileTHR(DbgThread(), "test2.icc", "r");
+    cmsDetectDestinationBlackPoint(&Black, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsXYZ2Lab(NULL, &Lab, &Black);
+    cmsCloseProfile(hProfile);
+
+    hProfile = cmsOpenProfileFromFileTHR(DbgThread(), "test1.icc", "r");
+    cmsDetectDestinationBlackPoint(&Black, hProfile, INTENT_PERCEPTUAL, 0);
+    cmsXYZ2Lab(NULL, &Lab, &Black);
+    cmsCloseProfile(hProfile);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckOneTAC(cmsFloat64Number InkLimit)
+{
+    cmsHPROFILE h;
+    cmsFloat64Number d;
+
+    h =CreateFakeCMYK(InkLimit, TRUE);
+    cmsSaveProfileToFile(h, "lcmstac.icc");
+    cmsCloseProfile(h);
+
+    h = cmsOpenProfileFromFile("lcmstac.icc", "r");
+    d = cmsDetectTAC(h);
+    cmsCloseProfile(h);
+
+    remove("lcmstac.icc");
+
+    if (fabs(d - InkLimit) > 5) return 0;
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckTAC(void)
+{
+    if (!CheckOneTAC(180)) return 0;
+    if (!CheckOneTAC(220)) return 0;
+    if (!CheckOneTAC(286)) return 0;
+    if (!CheckOneTAC(310)) return 0;
+    if (!CheckOneTAC(330)) return 0;
+
+    return 1;
+}
+
+// -------------------------------------------------------------------------------------------------------
+
+
+#define NPOINTS_IT8 10  // (17*17*17*17)
+
+static
+cmsInt32Number CheckCGATS(void)
+{
+    cmsHANDLE  it8;
+    cmsInt32Number i;
+
+    SubTest("IT8 creation");
+    it8 = cmsIT8Alloc(DbgThread());
+    if (it8 == NULL) return 0;
+
+    cmsIT8SetSheetType(it8, "LCMS/TESTING");
+    cmsIT8SetPropertyStr(it8, "ORIGINATOR",   "1 2 3 4");
+    cmsIT8SetPropertyUncooked(it8, "DESCRIPTOR",   "1234");
+    cmsIT8SetPropertyStr(it8, "MANUFACTURER", "3");
+    cmsIT8SetPropertyDbl(it8, "CREATED",      4);
+    cmsIT8SetPropertyDbl(it8, "SERIAL",       5);
+    cmsIT8SetPropertyHex(it8, "MATERIAL",     0x123);
+
+    cmsIT8SetPropertyDbl(it8, "NUMBER_OF_SETS", NPOINTS_IT8);
+    cmsIT8SetPropertyDbl(it8, "NUMBER_OF_FIELDS", 4);
+
+    cmsIT8SetDataFormat(it8, 0, "SAMPLE_ID");
+    cmsIT8SetDataFormat(it8, 1, "RGB_R");
+    cmsIT8SetDataFormat(it8, 2, "RGB_G");
+    cmsIT8SetDataFormat(it8, 3, "RGB_B");
+
+    SubTest("Table creation");
+    for (i=0; i < NPOINTS_IT8; i++) {
+
+          char Patch[20];
+
+          sprintf(Patch, "P%d", i);
+
+          cmsIT8SetDataRowCol(it8, i, 0, Patch);
+          cmsIT8SetDataRowColDbl(it8, i, 1, i);
+          cmsIT8SetDataRowColDbl(it8, i, 2, i);
+          cmsIT8SetDataRowColDbl(it8, i, 3, i);
+    }
+
+    SubTest("Save to file");
+    cmsIT8SaveToFile(it8, "TEST.IT8");
+    cmsIT8Free(it8);
+
+    SubTest("Load from file");
+    it8 = cmsIT8LoadFromFile(DbgThread(), "TEST.IT8");
+    if (it8 == NULL) return 0;
+
+    SubTest("Save again file");
+    cmsIT8SaveToFile(it8, "TEST.IT8");
+    cmsIT8Free(it8);
+
+
+    SubTest("Load from file (II)");
+    it8 = cmsIT8LoadFromFile(DbgThread(), "TEST.IT8");
+    if (it8 == NULL) return 0;
+
+
+     SubTest("Change prop value");
+    if (cmsIT8GetPropertyDbl(it8, "DESCRIPTOR") != 1234) {
+
+        return 0;
+    }
+
+
+    cmsIT8SetPropertyDbl(it8, "DESCRIPTOR", 5678);
+    if (cmsIT8GetPropertyDbl(it8, "DESCRIPTOR") != 5678) {
+
+        return 0;
+    }
+
+     SubTest("Positive numbers");
+    if (cmsIT8GetDataDbl(it8, "P3", "RGB_G") != 3) {
+
+        return 0;
+    }
+
+
+     SubTest("Positive exponent numbers");
+     cmsIT8SetPropertyDbl(it8, "DBL_PROP", 123E+12);
+     if ((cmsIT8GetPropertyDbl(it8, "DBL_PROP") - 123E+12) > 1 ) {
+
+        return 0;
+    }
+
+    SubTest("Negative exponent numbers");
+    cmsIT8SetPropertyDbl(it8, "DBL_PROP_NEG", 123E-45);
+     if ((cmsIT8GetPropertyDbl(it8, "DBL_PROP_NEG") - 123E-45) > 1E-45 ) {
+
+        return 0;
+    }
+
+
+    SubTest("Negative numbers");
+    cmsIT8SetPropertyDbl(it8, "DBL_NEG_VAL", -123);
+    if ((cmsIT8GetPropertyDbl(it8, "DBL_NEG_VAL")) != -123 ) {
+
+        return 0;
+    }
+
+    cmsIT8Free(it8);
+
+    remove("TEST.IT8");
+    return 1;
+
+}
+
+
+static
+cmsInt32Number CheckCGATS2(void)
+{
+    cmsHANDLE handle;
+    const cmsUInt8Number junk[] = { 0x0, 0xd, 0xd, 0xa, 0x20, 0xd, 0x20, 0x20, 0x20, 0x3a, 0x31, 0x3d, 0x3d, 0x3d, 0x3d };
+
+    handle = cmsIT8LoadFromMem(0, (const void*)junk, sizeof(junk));
+    if (handle)
+        cmsIT8Free(handle);
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckCGATS_Overflow(void)
+{
+    cmsHANDLE handle;
+    const cmsUInt8Number junk[] = { "@\nA 1.e2147483648\n" };
+
+    handle = cmsIT8LoadFromMem(0, (const void*)junk, sizeof(junk));
+    if (handle)
+        cmsIT8Free(handle);
+
+    return 1;
+}
+
+// Create CSA/CRD
+
+static
+void GenerateCSA(const char* cInProf, const char* FileName)
+{
+    cmsHPROFILE hProfile;
+    cmsUInt32Number n;
+    char* Buffer;
+    cmsContext BuffThread = DbgThread();
+    FILE* o;
+
+
+    if (cInProf == NULL)
+        hProfile = cmsCreateLab4Profile(NULL);
+    else
+        hProfile = cmsOpenProfileFromFile(cInProf, "r");
+
+    n = cmsGetPostScriptCSA(DbgThread(), hProfile, 0, 0, NULL, 0);
+    if (n == 0) return;
+
+    Buffer = (char*) _cmsMalloc(BuffThread, n + 1);
+    cmsGetPostScriptCSA(DbgThread(), hProfile, 0, 0, Buffer, n);
+    Buffer[n] = 0;
+
+    if (FileName != NULL) {
+        o = fopen(FileName, "wb");
+        fwrite(Buffer, n, 1, o);
+        fclose(o);
+    }
+
+    _cmsFree(BuffThread, Buffer);
+    cmsCloseProfile(hProfile);
+    if (FileName != NULL)
+        remove(FileName);
+}
+
+
+static
+void GenerateCRD(const char* cOutProf, const char* FileName)
+{
+    cmsHPROFILE hProfile;
+    cmsUInt32Number n;
+    char* Buffer;
+    cmsUInt32Number dwFlags = 0;
+    cmsContext BuffThread = DbgThread();
+
+
+    if (cOutProf == NULL)
+        hProfile = cmsCreateLab4Profile(NULL);
+    else
+        hProfile = cmsOpenProfileFromFile(cOutProf, "r");
+
+    n = cmsGetPostScriptCRD(DbgThread(), hProfile, 0, dwFlags, NULL, 0);
+    if (n == 0) return;
+
+    Buffer = (char*) _cmsMalloc(BuffThread, n + 1);
+    cmsGetPostScriptCRD(DbgThread(), hProfile, 0, dwFlags, Buffer, n);
+    Buffer[n] = 0;
+
+    if (FileName != NULL) {
+        FILE* o = fopen(FileName, "wb");
+        fwrite(Buffer, n, 1, o);
+        fclose(o);
+    }
+
+    _cmsFree(BuffThread, Buffer);
+    cmsCloseProfile(hProfile);
+    if (FileName != NULL)
+        remove(FileName);
+}
+
+static
+cmsInt32Number CheckPostScript(void)
+{
+    GenerateCSA("test5.icc", "sRGB_CSA.ps");
+    GenerateCSA("aRGBlcms2.icc", "aRGB_CSA.ps");
+    GenerateCSA("test4.icc", "sRGBV4_CSA.ps");
+    GenerateCSA("test1.icc", "SWOP_CSA.ps");
+    GenerateCSA(NULL, "Lab_CSA.ps");
+    GenerateCSA("graylcms2.icc", "gray_CSA.ps");
+
+    GenerateCRD("test5.icc", "sRGB_CRD.ps");
+    GenerateCRD("aRGBlcms2.icc", "aRGB_CRD.ps");
+    GenerateCRD(NULL, "Lab_CRD.ps");
+    GenerateCRD("test1.icc", "SWOP_CRD.ps");
+    GenerateCRD("test4.icc", "sRGBV4_CRD.ps");
+    GenerateCRD("graylcms2.icc", "gray_CRD.ps");
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckGray(cmsHTRANSFORM xform, cmsUInt8Number g, double L)
+{
+    cmsCIELab Lab;
+
+    cmsDoTransform(xform, &g, &Lab, 1);
+
+    if (!IsGoodVal("a axis on gray", 0, Lab.a, 0.001)) return 0;
+    if (!IsGoodVal("b axis on gray", 0, Lab.b, 0.001)) return 0;
+
+    return IsGoodVal("Gray value", L, Lab.L, 0.01);
+}
+
+static
+cmsInt32Number CheckInputGray(void)
+{
+    cmsHPROFILE hGray = Create_Gray22();
+    cmsHPROFILE hLab  = cmsCreateLab4Profile(NULL);
+    cmsHTRANSFORM xform;
+
+    if (hGray == NULL || hLab == NULL) return 0;
+
+    xform = cmsCreateTransform(hGray, TYPE_GRAY_8, hLab, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hGray); cmsCloseProfile(hLab);
+
+    if (!CheckGray(xform, 0, 0)) return 0;
+    if (!CheckGray(xform, 125, 52.768)) return 0;
+    if (!CheckGray(xform, 200, 81.069)) return 0;
+    if (!CheckGray(xform, 255, 100.0)) return 0;
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+static
+cmsInt32Number CheckLabInputGray(void)
+{
+    cmsHPROFILE hGray = Create_GrayLab();
+    cmsHPROFILE hLab  = cmsCreateLab4Profile(NULL);
+    cmsHTRANSFORM xform;
+
+    if (hGray == NULL || hLab == NULL) return 0;
+
+    xform = cmsCreateTransform(hGray, TYPE_GRAY_8, hLab, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hGray); cmsCloseProfile(hLab);
+
+    if (!CheckGray(xform, 0, 0)) return 0;
+    if (!CheckGray(xform, 125, 49.019)) return 0;
+    if (!CheckGray(xform, 200, 78.431)) return 0;
+    if (!CheckGray(xform, 255, 100.0)) return 0;
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckOutGray(cmsHTRANSFORM xform, double L, cmsUInt8Number g)
+{
+    cmsCIELab Lab;
+    cmsUInt8Number g_out;
+
+    Lab.L = L;
+    Lab.a = 0;
+    Lab.b = 0;
+
+    cmsDoTransform(xform, &Lab, &g_out, 1);
+
+    return IsGoodVal("Gray value", g, (double) g_out, 0.01);
+}
+
+static
+cmsInt32Number CheckOutputGray(void)
+{
+    cmsHPROFILE hGray = Create_Gray22();
+    cmsHPROFILE hLab  = cmsCreateLab4Profile(NULL);
+    cmsHTRANSFORM xform;
+
+    if (hGray == NULL || hLab == NULL) return 0;
+
+    xform = cmsCreateTransform( hLab, TYPE_Lab_DBL, hGray, TYPE_GRAY_8, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hGray); cmsCloseProfile(hLab);
+
+    if (!CheckOutGray(xform, 0, 0)) return 0;
+    if (!CheckOutGray(xform, 100, 255)) return 0;
+
+    if (!CheckOutGray(xform, 20, 52)) return 0;
+    if (!CheckOutGray(xform, 50, 118)) return 0;
+
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckLabOutputGray(void)
+{
+    cmsHPROFILE hGray = Create_GrayLab();
+    cmsHPROFILE hLab  = cmsCreateLab4Profile(NULL);
+    cmsHTRANSFORM xform;
+    cmsInt32Number i;
+
+    if (hGray == NULL || hLab == NULL) return 0;
+
+    xform = cmsCreateTransform( hLab, TYPE_Lab_DBL, hGray, TYPE_GRAY_8, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(hGray); cmsCloseProfile(hLab);
+
+    if (!CheckOutGray(xform, 0, 0)) return 0;
+    if (!CheckOutGray(xform, 100, 255)) return 0;
+
+    for (i=0; i < 100; i++) {
+
+        cmsUInt8Number g;
+
+        g = (cmsUInt8Number) floor(i * 255.0 / 100.0 + 0.5);
+
+        if (!CheckOutGray(xform, i, g)) return 0;
+    }
+
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckV4gamma(void)
+{
+    cmsHPROFILE h;
+    cmsUInt16Number Lin[] = {0, 0xffff};
+    cmsToneCurve*g = cmsBuildTabulatedToneCurve16(DbgThread(), 2, Lin);
+
+    h = cmsOpenProfileFromFileTHR(DbgThread(), "v4gamma.icc", "w");
+    if (h == NULL) return 0;
+
+
+    cmsSetProfileVersion(h, 4.3);
+
+    if (!cmsWriteTag(h, cmsSigGrayTRCTag, g)) return 0;
+    cmsCloseProfile(h);
+
+    cmsFreeToneCurve(g);
+    remove("v4gamma.icc");
+    return 1;
+}
+
+// cmsBool cmsGBDdumpVRML(cmsHANDLE hGBD, const char* fname);
+
+// Gamut descriptor routines
+static
+cmsInt32Number CheckGBD(void)
+{
+    cmsCIELab Lab;
+    cmsHANDLE  h;
+    cmsInt32Number L, a, b;
+    cmsUInt32Number r1, g1, b1;
+    cmsHPROFILE hLab, hsRGB;
+    cmsHTRANSFORM xform;
+
+    h = cmsGBDAlloc(DbgThread());
+    if (h == NULL) return 0;
+
+    // Fill all Lab gamut as valid
+    SubTest("Filling RAW gamut");
+
+    for (L=0; L <= 100; L += 10)
+        for (a = -128; a <= 128; a += 5)
+            for (b = -128; b <= 128; b += 5) {
+
+                Lab.L = L;
+                Lab.a = a;
+                Lab.b = b;
+                if (!cmsGDBAddPoint(h, &Lab)) return 0;
+            }
+
+    // Complete boundaries
+    SubTest("computing Lab gamut");
+    if (!cmsGDBCompute(h, 0)) return 0;
+
+
+    // All points should be inside gamut
+    SubTest("checking Lab gamut");
+    for (L=10; L <= 90; L += 25)
+        for (a = -120; a <= 120; a += 25)
+            for (b = -120; b <= 120; b += 25) {
+
+                Lab.L = L;
+                Lab.a = a;
+                Lab.b = b;
+                if (!cmsGDBCheckPoint(h, &Lab)) {
+                    return 0;
+                }
+            }
+    cmsGBDFree(h);
+
+
+    // Now for sRGB
+    SubTest("checking sRGB gamut");
+    h = cmsGBDAlloc(DbgThread());
+    hsRGB = cmsCreate_sRGBProfile();
+    hLab  = cmsCreateLab4Profile(NULL);
+
+    xform = cmsCreateTransform(hsRGB, TYPE_RGB_8, hLab, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hsRGB); cmsCloseProfile(hLab);
+
+
+    for (r1=0; r1 < 256; r1 += 5) {
+        for (g1=0; g1 < 256; g1 += 5)
+            for (b1=0; b1 < 256; b1 += 5) {
+
+
+                cmsUInt8Number rgb[3];
+
+                rgb[0] = (cmsUInt8Number) r1;
+                rgb[1] = (cmsUInt8Number) g1;
+                rgb[2] = (cmsUInt8Number) b1;
+
+                cmsDoTransform(xform, rgb, &Lab, 1);
+
+                // if (fabs(Lab.b) < 20 && Lab.a > 0) continue;
+
+                if (!cmsGDBAddPoint(h, &Lab)) {
+                    cmsGBDFree(h);
+                    return 0;
+                }
+
+
+            }
+    }
+
+
+    if (!cmsGDBCompute(h, 0)) return 0;
+    // cmsGBDdumpVRML(h, "c:\\colormaps\\lab.wrl");
+
+    for (r1=10; r1 < 200; r1 += 10) {
+        for (g1=10; g1 < 200; g1 += 10)
+            for (b1=10; b1 < 200; b1 += 10) {
+
+
+                cmsUInt8Number rgb[3];
+
+                rgb[0] = (cmsUInt8Number) r1;
+                rgb[1] = (cmsUInt8Number) g1;
+                rgb[2] = (cmsUInt8Number) b1;
+
+                cmsDoTransform(xform, rgb, &Lab, 1);
+                if (!cmsGDBCheckPoint(h, &Lab)) {
+
+                    cmsDeleteTransform(xform);
+                    cmsGBDFree(h);
+                    return 0;
+                }
+            }
+    }
+
+
+    cmsDeleteTransform(xform);
+    cmsGBDFree(h);
+
+    SubTest("checking LCh chroma ring");
+    h = cmsGBDAlloc(DbgThread());
+
+
+    for (r1=0; r1 < 360; r1++) {
+
+        cmsCIELCh LCh;
+
+        LCh.L = 70;
+        LCh.C = 60;
+        LCh.h = r1;
+
+        cmsLCh2Lab(&Lab, &LCh);
+        if (!cmsGDBAddPoint(h, &Lab)) {
+                    cmsGBDFree(h);
+                    return 0;
+                }
+    }
+
+
+    if (!cmsGDBCompute(h, 0)) return 0;
+
+    cmsGBDFree(h);
+
+    return 1;
+}
+
+
+static
+int CheckMD5(void)
+{
+    _cmsICCPROFILE* h;
+    cmsHPROFILE pProfile = cmsOpenProfileFromFile("sRGBlcms2.icc", "r");
+    cmsProfileID ProfileID1, ProfileID2, ProfileID3, ProfileID4;
+
+    h =(_cmsICCPROFILE*) pProfile;
+    if (cmsMD5computeID(pProfile)) cmsGetHeaderProfileID(pProfile, ProfileID1.ID8);
+    if (cmsMD5computeID(pProfile)) cmsGetHeaderProfileID(pProfile,ProfileID2.ID8);
+
+    cmsCloseProfile(pProfile);
+
+
+    pProfile = cmsOpenProfileFromFile("sRGBlcms2.icc", "r");
+
+    h =(_cmsICCPROFILE*) pProfile;
+    if (cmsMD5computeID(pProfile)) cmsGetHeaderProfileID(pProfile, ProfileID3.ID8);
+    if (cmsMD5computeID(pProfile)) cmsGetHeaderProfileID(pProfile,ProfileID4.ID8);
+
+    cmsCloseProfile(pProfile);
+
+    return ((memcmp(ProfileID1.ID8, ProfileID3.ID8, sizeof(ProfileID1)) == 0) &&
+            (memcmp(ProfileID2.ID8, ProfileID4.ID8, sizeof(ProfileID2)) == 0));
+}
+
+
+
+static
+int CheckLinking(void)
+{
+    cmsHPROFILE h;
+    cmsPipeline * pipeline;
+    cmsStage *stageBegin, *stageEnd;
+
+    // Create a CLUT based profile
+     h = cmsCreateInkLimitingDeviceLinkTHR(DbgThread(), cmsSigCmykData, 150);
+
+     // link a second tag
+     cmsLinkTag(h, cmsSigAToB1Tag, cmsSigAToB0Tag);
+
+     // Save the linked devicelink
+    if (!cmsSaveProfileToFile(h, "lcms2link.icc")) return 0;
+    cmsCloseProfile(h);
+
+    // Now open the profile and read the pipeline
+    h = cmsOpenProfileFromFile("lcms2link.icc", "r");
+    if (h == NULL) return 0;
+
+    pipeline = (cmsPipeline*) cmsReadTag(h, cmsSigAToB1Tag);
+    if (pipeline == NULL)
+    {
+        return 0;
+    }
+
+    pipeline = cmsPipelineDup(pipeline);
+
+    // extract stage from pipe line
+    cmsPipelineUnlinkStage(pipeline, cmsAT_BEGIN, &stageBegin);
+    cmsPipelineUnlinkStage(pipeline, cmsAT_END,   &stageEnd);
+    cmsPipelineInsertStage(pipeline, cmsAT_END,    stageEnd);
+    cmsPipelineInsertStage(pipeline, cmsAT_BEGIN,  stageBegin);
+    
+    if (cmsTagLinkedTo(h, cmsSigAToB1Tag) != cmsSigAToB0Tag) return 0;
+
+    cmsWriteTag(h, cmsSigAToB0Tag, pipeline);
+    cmsPipelineFree(pipeline);
+
+    if (!cmsSaveProfileToFile(h, "lcms2link2.icc")) return 0;
+    cmsCloseProfile(h);
+
+
+    return 1;
+
+}
+
+//  TestMPE
+//
+//  Created by Paul Miller on 30/08/2016.
+//
+static 
+cmsHPROFILE IdentityMatrixProfile( cmsColorSpaceSignature dataSpace)
+{
+    cmsContext ctx = 0;
+    cmsVEC3 zero = {{0,0,0}};
+    cmsMAT3 identity;
+    cmsPipeline* forward;
+    cmsPipeline* reverse;
+    cmsHPROFILE identityProfile = cmsCreateProfilePlaceholder( ctx);
+    
+
+    cmsSetProfileVersion(identityProfile, 4.3);
+    
+    cmsSetDeviceClass( identityProfile,     cmsSigColorSpaceClass);
+    cmsSetColorSpace(identityProfile,       dataSpace);
+    cmsSetPCS(identityProfile,              cmsSigXYZData);
+    
+    cmsSetHeaderRenderingIntent(identityProfile,  INTENT_RELATIVE_COLORIMETRIC);
+    
+    cmsWriteTag(identityProfile, cmsSigMediaWhitePointTag, cmsD50_XYZ());
+    
+   
+    
+    _cmsMAT3identity( &identity);
+    
+    // build forward transform.... (RGB to PCS)
+    forward = cmsPipelineAlloc( 0, 3, 3);
+    cmsPipelineInsertStage( forward, cmsAT_END, cmsStageAllocMatrix( ctx, 3, 3, (cmsFloat64Number*)&identity, (cmsFloat64Number*)&zero));
+    cmsWriteTag( identityProfile, cmsSigDToB1Tag, forward);
+    
+    cmsPipelineFree( forward);
+    
+    reverse = cmsPipelineAlloc( 0, 3, 3);
+    cmsPipelineInsertStage( reverse, cmsAT_END, cmsStageAllocMatrix( ctx, 3, 3, (cmsFloat64Number*)&identity, (cmsFloat64Number*)&zero));
+    cmsWriteTag( identityProfile, cmsSigBToD1Tag, reverse);
+    
+    cmsPipelineFree( reverse);
+    
+    return identityProfile;
+}
+
+static
+cmsInt32Number CheckFloatXYZ(void)
+{
+    cmsHPROFILE input;
+    cmsHPROFILE xyzProfile = cmsCreateXYZProfile();
+    cmsHTRANSFORM xform;
+    cmsFloat32Number in[4];   
+    cmsFloat32Number out[4];
+    
+    in[0] = 1.0;
+    in[1] = 1.0;
+    in[2] = 1.0;
+    in[3] = 0.5;
+    
+    // RGB to XYZ
+    input = IdentityMatrixProfile( cmsSigRgbData);
+    
+    xform = cmsCreateTransform( input, TYPE_RGB_FLT, xyzProfile, TYPE_XYZ_FLT, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(input);
+    
+    cmsDoTransform( xform, in, out, 1);
+    cmsDeleteTransform( xform);
+    
+    if (!IsGoodVal("Float RGB->XYZ", in[0], out[0], FLOAT_PRECISSION) ||
+        !IsGoodVal("Float RGB->XYZ", in[1], out[1], FLOAT_PRECISSION) ||
+        !IsGoodVal("Float RGB->XYZ", in[2], out[2], FLOAT_PRECISSION))
+           return 0;
+    
+    
+    // XYZ to XYZ
+    input = IdentityMatrixProfile( cmsSigXYZData);
+    
+    xform = cmsCreateTransform( input, TYPE_XYZ_FLT, xyzProfile, TYPE_XYZ_FLT, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(input);
+
+    cmsDoTransform( xform, in, out, 1);
+    
+    
+    cmsDeleteTransform( xform);
+    
+     if (!IsGoodVal("Float XYZ->XYZ", in[0], out[0], FLOAT_PRECISSION) ||
+         !IsGoodVal("Float XYZ->XYZ", in[1], out[1], FLOAT_PRECISSION) ||
+         !IsGoodVal("Float XYZ->XYZ", in[2], out[2], FLOAT_PRECISSION))
+           return 0;
+   
+    
+    input = IdentityMatrixProfile( cmsSigXYZData);
+
+#   define TYPE_XYZA_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+    
+    xform = cmsCreateTransform( input, TYPE_XYZA_FLT, xyzProfile, TYPE_XYZA_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA);
+    cmsCloseProfile(input);
+
+    cmsDoTransform( xform, in, out, 1);
+    
+    
+    cmsDeleteTransform( xform);
+    
+     if (!IsGoodVal("Float XYZA->XYZA", in[0], out[0], FLOAT_PRECISSION) ||
+         !IsGoodVal("Float XYZA->XYZA", in[1], out[1], FLOAT_PRECISSION) ||
+         !IsGoodVal("Float XYZA->XYZA", in[2], out[2], FLOAT_PRECISSION) ||
+         !IsGoodVal("Float XYZA->XYZA", in[3], out[3], FLOAT_PRECISSION))
+           return 0;
+   
+    
+    // XYZ to RGB
+    input = IdentityMatrixProfile( cmsSigRgbData);
+    
+    xform = cmsCreateTransform( xyzProfile, TYPE_XYZ_FLT, input, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(input);
+    
+    cmsDoTransform( xform, in, out, 1);
+   
+    cmsDeleteTransform( xform);
+
+       if (!IsGoodVal("Float XYZ->RGB", in[0], out[0], FLOAT_PRECISSION) ||
+           !IsGoodVal("Float XYZ->RGB", in[1], out[1], FLOAT_PRECISSION) ||
+           !IsGoodVal("Float XYZ->RGB", in[2], out[2], FLOAT_PRECISSION))
+           return 0;
+        
+
+    // Now the optimizer should remove a stage
+
+    // XYZ to RGB
+    input = IdentityMatrixProfile( cmsSigRgbData);
+    
+    xform = cmsCreateTransform( input, TYPE_RGB_FLT, input, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsCloseProfile(input);
+    
+    cmsDoTransform( xform, in, out, 1);
+   
+    cmsDeleteTransform( xform);
+
+       if (!IsGoodVal("Float RGB->RGB", in[0], out[0], FLOAT_PRECISSION) ||
+           !IsGoodVal("Float RGB->RGB", in[1], out[1], FLOAT_PRECISSION) ||
+           !IsGoodVal("Float RGB->RGB", in[2], out[2], FLOAT_PRECISSION))
+           return 0;
+    
+    cmsCloseProfile(xyzProfile);
+
+
+    return 1;
+}
+
+
+/*
+Bug reported
+
+        1)
+        sRGB built-in V4.3 -> Lab identity built-in V4.3
+        Flags: "cmsFLAGS_NOCACHE", "cmsFLAGS_NOOPTIMIZE"
+        Input format: TYPE_RGBA_FLT
+        Output format: TYPE_LabA_FLT
+
+        2) and back
+        Lab identity built-in V4.3 -> sRGB built-in V4.3 
+        Flags: "cmsFLAGS_NOCACHE", "cmsFLAGS_NOOPTIMIZE"
+        Input format: TYPE_LabA_FLT
+        Output format: TYPE_RGBA_FLT
+
+*/
+static
+cmsInt32Number ChecksRGB2LabFLT(void)
+{
+    cmsHPROFILE hSRGB = cmsCreate_sRGBProfile();
+    cmsHPROFILE hLab  = cmsCreateLab4Profile(NULL);
+
+    cmsHTRANSFORM xform1 = cmsCreateTransform(hSRGB, TYPE_RGBA_FLT, hLab, TYPE_LabA_FLT, 0, cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    cmsHTRANSFORM xform2 = cmsCreateTransform(hLab, TYPE_LabA_FLT, hSRGB, TYPE_RGBA_FLT, 0, cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+
+    cmsFloat32Number RGBA1[4], RGBA2[4], LabA[4];
+    int i;
+
+
+    for (i = 0; i <= 100; i++)
+    {
+        RGBA1[0] = i / 100.0F;
+        RGBA1[1] = i / 100.0F;
+        RGBA1[2] = i / 100.0F;
+        RGBA1[3] = 0;
+
+        cmsDoTransform(xform1, RGBA1, LabA,  1);
+        cmsDoTransform(xform2, LabA, RGBA2, 1);
+
+        if (!IsGoodVal("Float RGB->RGB", RGBA1[0], RGBA2[0], FLOAT_PRECISSION) ||
+            !IsGoodVal("Float RGB->RGB", RGBA1[1], RGBA2[1], FLOAT_PRECISSION) ||
+            !IsGoodVal("Float RGB->RGB", RGBA1[2], RGBA2[2], FLOAT_PRECISSION))
+            return 0;
+    }
+
+
+    cmsDeleteTransform(xform1);
+    cmsDeleteTransform(xform2);
+    cmsCloseProfile(hSRGB);
+    cmsCloseProfile(hLab);
+
+    return 1;
+}
+
+/*
+ * parametric curve for Rec709
+ */
+static
+double Rec709(double L)
+{
+    if (L <0.018) return 4.5*L;
+    else
+    {
+          double a = 1.099* pow(L, 0.45);
+          
+          a = a - 0.099;
+          return a;
+    }
+}
+
+
+static
+cmsInt32Number CheckParametricRec709(void)
+{
+    cmsFloat64Number params[7];
+    cmsToneCurve* t;
+    int i;
+
+    params[0] = 0.45; /* y */
+    params[1] = pow(1.099, 1.0 / 0.45); /* a */
+    params[2] = 0.0; /* b */
+    params[3] = 4.5; /* c */
+    params[4] = 0.018; /* d */
+    params[5] = -0.099; /* e */
+    params[6] = 0.0; /* f */
+        
+    t = cmsBuildParametricToneCurve (NULL, 5, params);
+
+
+    for (i=0; i < 256; i++)
+    {
+        cmsFloat32Number n = (cmsFloat32Number) i / 255.0F;
+        cmsUInt16Number f1 = (cmsUInt16Number) floor(255.0 * cmsEvalToneCurveFloat(t, n) + 0.5);
+        cmsUInt16Number f2 = (cmsUInt16Number) floor(255.0*Rec709((double) i / 255.0) + 0.5);
+
+        if (f1 != f2) 
+        {
+            cmsFreeToneCurve(t);
+            return 0;
+        }
+    }
+
+    cmsFreeToneCurve(t);
+    return 1;
+}
+
+
+#define kNumPoints  10
+
+typedef cmsFloat32Number(*Function)(cmsFloat32Number x);
+
+static cmsFloat32Number StraightLine( cmsFloat32Number x) 
+{
+    return (cmsFloat32Number) (0.1 + 0.9 * x);
+}
+
+static cmsInt32Number TestCurve( const char* label, cmsToneCurve* curve, Function fn) 
+{
+    cmsInt32Number ok = 1;
+    int i;
+    for (i = 0; i < kNumPoints*3; i++) {
+        
+        cmsFloat32Number x = (cmsFloat32Number)i / (kNumPoints*3 - 1);
+        cmsFloat32Number expectedY = fn(x);
+        cmsFloat32Number out = cmsEvalToneCurveFloat( curve, x);
+        
+        if (!IsGoodVal(label, expectedY, out, FLOAT_PRECISSION)) {
+            ok = 0;
+        }
+    }
+    return ok;
+}
+
+static
+cmsInt32Number CheckFloatSamples(void)
+{
+    cmsFloat32Number y[kNumPoints];
+    int i;
+    cmsToneCurve *curve;
+    cmsInt32Number ok;
+
+    for (i = 0; i < kNumPoints; i++) {
+        cmsFloat32Number x = (cmsFloat32Number)i / (kNumPoints-1);
+        
+        y[i] = StraightLine(x);
+    }
+    
+    curve = cmsBuildTabulatedToneCurveFloat(NULL, kNumPoints, y);
+    ok = TestCurve( "Float Samples", curve, StraightLine);
+    cmsFreeToneCurve(curve);
+    
+    return ok;
+}
+
+static
+cmsInt32Number CheckFloatSegments(void)
+{
+    cmsInt32Number ok = 1;
+    int i;
+    cmsToneCurve *curve;
+    
+    cmsFloat32Number y[ kNumPoints];
+    
+    // build a segmented curve with a sampled section...
+    cmsCurveSegment Seg[3];
+    
+    // Initialize segmented curve part up to 0.1
+    Seg[0].x0 = -1e22f;      // -infinity
+    Seg[0].x1 = 0.1f;
+    Seg[0].Type = 6;             // Y = (a * X + b) ^ Gamma + c
+    Seg[0].Params[0] = 1.0f;     // gamma
+    Seg[0].Params[1] = 0.9f;     // a
+    Seg[0].Params[2] = 0.0f;        // b
+    Seg[0].Params[3] = 0.1f;     // c
+    Seg[0].Params[4] = 0.0f;
+    
+    // From zero to 1
+    Seg[1].x0 = 0.1f;
+    Seg[1].x1 = 0.9f;
+    Seg[1].Type = 0;
+    
+    Seg[1].nGridPoints = kNumPoints;
+    Seg[1].SampledPoints = y;
+    
+    for (i = 0; i < kNumPoints; i++) {
+        cmsFloat32Number x = (cmsFloat32Number) (0.1 + ((cmsFloat32Number)i / (kNumPoints-1)) * (0.9 - 0.1));
+        y[i] = StraightLine(x);
+    }
+    
+    // from 1 to +infinity
+    Seg[2].x0 = 0.9f;
+    Seg[2].x1 = 1e22f;   // +infinity
+    Seg[2].Type = 6;
+    
+    Seg[2].Params[0] = 1.0f;
+    Seg[2].Params[1] = 0.9f;
+    Seg[2].Params[2] = 0.0f;
+    Seg[2].Params[3] = 0.1f;
+    Seg[2].Params[4] = 0.0f;
+    
+    curve = cmsBuildSegmentedToneCurve(0, 3, Seg);
+    
+    ok = TestCurve( "Float Segmented Curve", curve, StraightLine);
+
+    cmsFreeToneCurve( curve);
+
+    return ok;
+}
+
+
+static
+cmsInt32Number CheckReadRAW(void)
+{
+    cmsInt32Number tag_size, tag_size1;
+    char buffer[4];
+    cmsHPROFILE hProfile;
+    
+
+    SubTest("RAW read on on-disk");
+    hProfile = cmsOpenProfileFromFile("test1.icc", "r");
+
+    if (hProfile == NULL) 
+        return 0;
+    
+    tag_size = cmsReadRawTag(hProfile, cmsSigGamutTag, buffer, 4);
+    tag_size1 = cmsReadRawTag(hProfile, cmsSigGamutTag, NULL, 0);
+
+    cmsCloseProfile(hProfile);
+
+    if (tag_size != 4)
+        return 0;
+
+    if (tag_size1 != 37009)
+        return 0;
+
+    SubTest("RAW read on in-memory created profiles");
+    hProfile = cmsCreate_sRGBProfile();
+    tag_size = cmsReadRawTag(hProfile, cmsSigGreenColorantTag, buffer, 4);
+    tag_size1 = cmsReadRawTag(hProfile, cmsSigGreenColorantTag, NULL, 0);
+
+    cmsCloseProfile(hProfile);
+
+    if (tag_size != 4)
+        return 0;
+    if (tag_size1 != 20)
+        return 0;
+
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckMeta(void)
+{
+	char *data;
+	cmsHANDLE dict;
+	cmsHPROFILE p;
+	cmsUInt32Number clen;
+	FILE *fp;
+	int rc;
+	
+	/* open file */
+	p = cmsOpenProfileFromFile("ibm-t61.icc", "r");
+	if (p == NULL) return 0;
+
+	/* read dictionary, but don't do anything with the value */
+	//COMMENT OUT THE NEXT TWO LINES AND IT WORKS FINE!!!
+	dict = cmsReadTag(p, cmsSigMetaTag);
+	if (dict == NULL) return 0;
+
+	/* serialize profile to memory */
+	rc = cmsSaveProfileToMem(p, NULL, &clen);
+	if (!rc) return 0;
+
+	data = (char*) malloc(clen);
+	rc = cmsSaveProfileToMem(p, data, &clen);
+	if (!rc) return 0;
+
+	/* write the memory blob to a file */
+	//NOTE: The crash does not happen if cmsSaveProfileToFile() is used */
+	fp = fopen("new.icc", "wb");
+	fwrite(data, 1, clen, fp);
+	fclose(fp);
+	free(data);
+
+	cmsCloseProfile(p);
+
+	/* open newly created file and read metadata */
+	p = cmsOpenProfileFromFile("new.icc", "r");
+	//ERROR: Bad dictionary Name/Value
+	//ERROR: Corrupted tag 'meta'
+	//test: test.c:59: main: Assertion `dict' failed.
+	dict = cmsReadTag(p, cmsSigMetaTag);
+   if (dict == NULL) return 0;
+
+   cmsCloseProfile(p);
+	return 1;
+}
+
+
+// Bug on applying null transforms on floating point buffers
+static
+cmsInt32Number CheckFloatNULLxform(void)
+{
+    int i;
+    cmsFloat32Number in[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    cmsFloat32Number out[10];
+
+    cmsHTRANSFORM xform = cmsCreateTransform(NULL, TYPE_GRAY_FLT, NULL, TYPE_GRAY_FLT, INTENT_PERCEPTUAL, cmsFLAGS_NULLTRANSFORM);
+
+    if (xform == NULL) {
+        Fail("Unable to create float null transform");
+        return 0;
+    }
+
+    cmsDoTransform(xform, in, out, 10);
+
+    cmsDeleteTransform(xform);
+    for (i=0; i < 10; i++) {
+    
+        if (!IsGoodVal("float nullxform", in[i], out[i], 0.001)) {
+        
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+static
+cmsInt32Number CheckRemoveTag(void)
+{
+    cmsHPROFILE p;
+    cmsMLU *mlu;
+    int ret;
+
+    p = cmsCreate_sRGBProfileTHR(NULL);
+
+    /* set value */
+    mlu = cmsMLUalloc (NULL, 1);
+    ret = cmsMLUsetASCII (mlu, "en", "US", "bar");
+    if (!ret) return 0;
+
+    ret = cmsWriteTag (p, cmsSigDeviceMfgDescTag, mlu);
+    if (!ret) return 0;
+     
+    cmsMLUfree (mlu);
+
+    /* remove the tag  */
+    ret = cmsWriteTag (p, cmsSigDeviceMfgDescTag, NULL);
+    if (!ret) return 0;
+
+    /* THIS EXPLODES */
+    cmsCloseProfile(p);
+    return 1;
+}
+
+
+static
+cmsInt32Number CheckMatrixSimplify(void)
+{
+     
+       cmsHPROFILE pIn;
+       cmsHPROFILE pOut;
+       cmsHTRANSFORM t;
+       unsigned char buf[3] = { 127, 32, 64 };
+
+       
+       pIn = cmsCreate_sRGBProfile();
+       pOut = cmsOpenProfileFromFile("ibm-t61.icc", "r");
+       if (pIn == NULL || pOut == NULL)
+              return 0;
+
+       t = cmsCreateTransform(pIn, TYPE_RGB_8, pOut, TYPE_RGB_8, INTENT_PERCEPTUAL, 0);
+       cmsDoTransformStride(t, buf, buf, 1, 1);
+       cmsDeleteTransform(t);
+       cmsCloseProfile(pIn);
+       cmsCloseProfile(pOut);
+      
+
+       return buf[0] == 144 && buf[1] == 0 && buf[2] == 69;
+}
+
+
+
+static
+cmsInt32Number CheckTransformLineStride(void)
+{
+
+       cmsHPROFILE pIn;
+       cmsHPROFILE pOut;
+       cmsHTRANSFORM t;
+
+       // Our buffer is formed by 4 RGB8 lines, each line is 2 pixels wide plus a padding of one byte
+
+       cmsUInt8Number buf1[]= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0,
+                                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 
+                                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 
+                                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, };
+
+       // Our buffer2 is formed by 4 RGBA lines, each line is 2 pixels wide plus a padding of one byte
+
+       cmsUInt8Number buf2[] = { 0xff, 0xff, 0xff, 1, 0xff, 0xff, 0xff, 1, 0,
+                                 0xff, 0xff, 0xff, 1, 0xff, 0xff, 0xff, 1, 0,
+                                 0xff, 0xff, 0xff, 1, 0xff, 0xff, 0xff, 1, 0,
+                                 0xff, 0xff, 0xff, 1, 0xff, 0xff, 0xff, 1, 0};
+
+       // Our buffer3 is formed by 4 RGBA16 lines, each line is 2 pixels wide plus a padding of two bytes
+
+       cmsUInt16Number buf3[] = { 0xffff, 0xffff, 0xffff, 0x0101, 0xffff, 0xffff, 0xffff, 0x0101, 0,
+                                  0xffff, 0xffff, 0xffff, 0x0101, 0xffff, 0xffff, 0xffff, 0x0101, 0,
+                                  0xffff, 0xffff, 0xffff, 0x0101, 0xffff, 0xffff, 0xffff, 0x0101, 0,
+                                  0xffff, 0xffff, 0xffff, 0x0101, 0xffff, 0xffff, 0xffff, 0x0101, 0 };
+
+       cmsUInt8Number out[1024];
+
+
+       memset(out, 0, sizeof(out));
+       pIn = cmsCreate_sRGBProfile();
+       pOut = cmsOpenProfileFromFile("ibm-t61.icc", "r");
+       if (pIn == NULL || pOut == NULL)
+              return 0;
+
+       t = cmsCreateTransform(pIn, TYPE_RGB_8, pOut, TYPE_RGB_8, INTENT_PERCEPTUAL, cmsFLAGS_COPY_ALPHA);
+       
+       cmsDoTransformLineStride(t, buf1, out, 2, 4, 7, 7, 0, 0);
+       cmsDeleteTransform(t);
+
+       if (memcmp(out, buf1, sizeof(buf1)) != 0) {
+              Fail("Failed transform line stride on RGB8");
+              cmsCloseProfile(pIn);
+              cmsCloseProfile(pOut);
+              return 0;
+       }
+
+       memset(out, 0, sizeof(out));
+
+       t = cmsCreateTransform(pIn, TYPE_RGBA_8, pOut, TYPE_RGBA_8, INTENT_PERCEPTUAL, cmsFLAGS_COPY_ALPHA);
+       
+       cmsDoTransformLineStride(t, buf2, out, 2, 4, 9, 9, 0, 0);
+
+       cmsDeleteTransform(t);
+
+
+       if (memcmp(out, buf2, sizeof(buf2)) != 0) {
+              cmsCloseProfile(pIn);
+              cmsCloseProfile(pOut);
+              Fail("Failed transform line stride on RGBA8");
+              return 0;
+       }
+
+       memset(out, 0, sizeof(out));
+
+       t = cmsCreateTransform(pIn, TYPE_RGBA_16, pOut, TYPE_RGBA_16, INTENT_PERCEPTUAL, cmsFLAGS_COPY_ALPHA);
+
+       cmsDoTransformLineStride(t, buf3, out, 2, 4, 18, 18, 0, 0);
+
+       cmsDeleteTransform(t);
+
+       if (memcmp(out, buf3, sizeof(buf3)) != 0) {
+              cmsCloseProfile(pIn);
+              cmsCloseProfile(pOut);
+              Fail("Failed transform line stride on RGBA16");
+              return 0;
+       }
+
+
+       memset(out, 0, sizeof(out));
+
+
+       // From 8 to 16
+       t = cmsCreateTransform(pIn, TYPE_RGBA_8, pOut, TYPE_RGBA_16, INTENT_PERCEPTUAL, cmsFLAGS_COPY_ALPHA);
+
+       cmsDoTransformLineStride(t, buf2, out, 2, 4, 9, 18, 0, 0);
+
+       cmsDeleteTransform(t);
+
+       if (memcmp(out, buf3, sizeof(buf3)) != 0) {
+              cmsCloseProfile(pIn);
+              cmsCloseProfile(pOut);
+              Fail("Failed transform line stride on RGBA16");
+              return 0;
+       }
+
+
+
+       cmsCloseProfile(pIn);
+       cmsCloseProfile(pOut);
+
+       return 1;
+}
+
+
+static
+int CheckPlanar8opt(void)
+{
+    cmsHPROFILE aboveRGB = Create_AboveRGB();
+    cmsHPROFILE sRGB = cmsCreate_sRGBProfile();
+
+    cmsHTRANSFORM transform = cmsCreateTransform(sRGB, TYPE_RGB_8_PLANAR,
+        aboveRGB, TYPE_RGB_8_PLANAR,
+        INTENT_PERCEPTUAL, 0);
+
+    cmsDeleteTransform(transform);
+    cmsCloseProfile(aboveRGB);
+    cmsCloseProfile(sRGB);
+
+    return 1;
+}
+
+/**
+* Bug reported & fixed. Thanks to Kornel Lesinski for spotting this.
+*/
+static
+int CheckSE(void)
+{
+    cmsHPROFILE input_profile = Create_AboveRGB();
+    cmsHPROFILE output_profile = cmsCreate_sRGBProfile();
+
+    cmsHTRANSFORM tr = cmsCreateTransform(input_profile, TYPE_RGBA_8, output_profile, TYPE_RGBA_16_SE, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA);
+   
+    cmsUInt8Number rgba[4] = { 40, 41, 41, 0xfa };
+    cmsUInt16Number out[4];
+
+    cmsDoTransform(tr, rgba, out, 1);
+    cmsCloseProfile(input_profile);
+    cmsCloseProfile(output_profile);
+    cmsDeleteTransform(tr);
+
+    if (out[0] != 0xf622 || out[1] != 0x7f24 || out[2] != 0x7f24)
+        return 0;
+
+    return 1;
+}
+
+/**
+* Bug reported.
+*/
+static
+int CheckForgedMPE(void) 
+{
+    cmsUInt32Number i;
+    cmsHPROFILE srcProfile;
+    cmsHPROFILE dstProfile;
+    cmsColorSpaceSignature srcCS;
+    cmsUInt32Number nSrcComponents;
+    cmsUInt32Number srcFormat;
+    cmsUInt32Number intent = 0;
+    cmsUInt32Number flags = 0;
+    cmsHTRANSFORM hTransform;
+    cmsUInt8Number output[4];
+
+    srcProfile = cmsOpenProfileFromFile("bad_mpe.icc", "r");
+    if (!srcProfile)
+        return 0;
+
+    dstProfile = cmsCreate_sRGBProfile();
+    if (!dstProfile) {
+        cmsCloseProfile(srcProfile);
+        return 0;
+    }
+
+    srcCS = cmsGetColorSpace(srcProfile);
+    nSrcComponents = cmsChannelsOf(srcCS);
+    
+    if (srcCS == cmsSigLabData) {
+        srcFormat =
+            COLORSPACE_SH(PT_Lab) | CHANNELS_SH(nSrcComponents) | BYTES_SH(0);
+    }
+    else {
+        srcFormat =
+            COLORSPACE_SH(PT_ANY) | CHANNELS_SH(nSrcComponents) | BYTES_SH(1);
+    }
+
+    cmsSetLogErrorHandler(ErrorReportingFunction);
+
+    hTransform = cmsCreateTransform(srcProfile, srcFormat, dstProfile,
+        TYPE_BGR_8, intent, flags);
+    cmsCloseProfile(srcProfile);
+    cmsCloseProfile(dstProfile);
+
+    cmsSetLogErrorHandler(FatalErrorQuit);    
+
+    // Should report error
+    if (!TrappedError) return 0;
+
+    TrappedError = FALSE;
+
+    // Transform should NOT be created
+    if (!hTransform) return 1;
+    
+    // Never should reach here
+    if (T_BYTES(srcFormat) == 0) {  // 0 means double
+        double input[128];
+        for (i = 0; i < nSrcComponents; i++)
+            input[i] = 0.5f;
+        cmsDoTransform(hTransform, input, output, 1);
+    }
+    else {
+        cmsUInt8Number input[128];
+        for (i = 0; i < nSrcComponents; i++)
+            input[i] = 128;
+        cmsDoTransform(hTransform, input, output, 1);
+    }
+    cmsDeleteTransform(hTransform);
+
+    return 0;
+}
+
+/**
+* What the self test is trying to do is creating a proofing transform
+* with gamut check, so we can getting the coverage of one profile of
+* another, i.e. to approximate the gamut intersection. e.g.
+* Thanks to Richard Hughes for providing the test
+*/
+static
+int CheckProofingIntersection(void)
+{
+    cmsHPROFILE profile_null, hnd1, hnd2;
+    cmsHTRANSFORM transform;
+
+    hnd1 = cmsCreate_sRGBProfile();
+    hnd2 = Create_AboveRGB();
+
+    profile_null = cmsCreateNULLProfileTHR(DbgThread());
+    transform = cmsCreateProofingTransformTHR(DbgThread(),
+        hnd1,
+        TYPE_RGB_FLT,
+        profile_null,
+        TYPE_GRAY_FLT,
+        hnd2,
+        INTENT_ABSOLUTE_COLORIMETRIC,
+        INTENT_ABSOLUTE_COLORIMETRIC,
+        cmsFLAGS_GAMUTCHECK |
+        cmsFLAGS_SOFTPROOFING);
+
+    cmsCloseProfile(hnd1);
+    cmsCloseProfile(hnd2);
+    cmsCloseProfile(profile_null);
+
+    // Failed?
+    if (transform == NULL) return 0;
+
+    cmsDeleteTransform(transform);
+    return 1;
+}
+
+// --------------------------------------------------------------------------------------------------
+// P E R F O R M A N C E   C H E C K S
+// --------------------------------------------------------------------------------------------------
+
+
+typedef struct {cmsUInt8Number r, g, b, a;}    Scanline_rgba8;	
+typedef struct {cmsUInt16Number r, g, b, a;}   Scanline_rgba16;
+typedef struct {cmsFloat32Number r, g, b, a;}  Scanline_rgba32;
+typedef struct {cmsUInt8Number r, g, b;}       Scanline_rgb8;
+typedef struct {cmsUInt16Number r, g, b;}      Scanline_rgb16;
+typedef struct {cmsFloat32Number r, g, b;}     Scanline_rgb32;
+
+
+static
+void TitlePerformance(const char* Txt)
+{
+    printf("%-45s: ", Txt); fflush(stdout);
+}
+
+static
+void PrintPerformance(cmsUInt32Number Bytes, cmsUInt32Number SizeOfPixel, cmsFloat64Number diff)
+{
+    cmsFloat64Number seconds  = (cmsFloat64Number) diff / CLOCKS_PER_SEC;
+    cmsFloat64Number mpix_sec = Bytes / (1024.0*1024.0*seconds*SizeOfPixel);
+
+    printf("%#4.3g MPixel/sec.\n", mpix_sec);	
+    fflush(stdout);
+}
+
+
+static
+void SpeedTest32bits(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgba32 *In;
+    cmsUInt32Number Mb;
+    cmsUInt32Number Interval = 4; // Power of 2 number to increment r,g,b values by in the loops to keep the test duration practically short
+    cmsUInt32Number NumPixels;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_RGBA_FLT,
+        hlcmsProfileOut, TYPE_RGBA_FLT, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    NumPixels = 256 / Interval * 256 / Interval * 256 / Interval;
+    Mb = NumPixels * sizeof(Scanline_rgba32);
+
+    In = (Scanline_rgba32 *) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r += Interval)
+        for (g=0; g < 256; g += Interval)
+            for (b=0; b < 256; b += Interval) {
+
+                In[j].r = r / 256.0f;
+                In[j].g = g / 256.0f;
+                In[j].b = b / 256.0f;
+                In[j].a = (In[j].r + In[j].g + In[j].b) / 3; 
+
+                j++;
+            }
+
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, NumPixels);
+
+    diff = clock() - atime;
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgba32), diff);
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest16bits(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgb16 *In;
+    cmsUInt32Number Mb;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_RGB_16,
+        hlcmsProfileOut, TYPE_RGB_16, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    Mb = 256*256*256 * sizeof(Scanline_rgb16);
+
+    In = (Scanline_rgb16*) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+                In[j].r = (cmsUInt16Number) ((r << 8) | r);
+                In[j].g = (cmsUInt16Number) ((g << 8) | g);
+                In[j].b = (cmsUInt16Number) ((b << 8) | b);
+
+                j++;
+            }
+
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgb16), diff);
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest32bitsCMYK(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgba32 *In;
+    cmsUInt32Number Mb;
+    cmsUInt32Number Interval = 4; // Power of 2 number to increment r,g,b values by in the loops to keep the test duration practically short
+    cmsUInt32Number NumPixels;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_CMYK_FLT,
+        hlcmsProfileOut, TYPE_CMYK_FLT, INTENT_PERCEPTUAL, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    NumPixels = 256 / Interval * 256 / Interval * 256 / Interval;
+    Mb = NumPixels * sizeof(Scanline_rgba32);
+
+    In = (Scanline_rgba32 *) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r += Interval)
+        for (g=0; g < 256; g += Interval)
+            for (b=0; b < 256; b += Interval) {
+
+                In[j].r = r / 256.0f;
+                In[j].g = g / 256.0f;
+                In[j].b = b / 256.0f;
+                In[j].a = (In[j].r + In[j].g + In[j].b) / 3; 
+
+                j++;
+            }
+
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, NumPixels);
+
+    diff = clock() - atime;
+
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgba32), diff);
+
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest16bitsCMYK(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgba16 *In;
+    cmsUInt32Number Mb;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_CMYK_16,
+        hlcmsProfileOut, TYPE_CMYK_16, INTENT_PERCEPTUAL,  cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    Mb = 256*256*256*sizeof(Scanline_rgba16);
+
+    In = (Scanline_rgba16*) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+                In[j].r = (cmsUInt16Number) ((r << 8) | r);
+                In[j].g = (cmsUInt16Number) ((g << 8) | g);
+                In[j].b = (cmsUInt16Number) ((b << 8) | b);
+                In[j].a = 0;
+
+                j++;
+            }
+
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgba16), diff);
+
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest8bits(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgb8 *In;
+    cmsUInt32Number Mb;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_RGB_8,
+                            hlcmsProfileOut, TYPE_RGB_8, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    Mb = 256*256*256*sizeof(Scanline_rgb8);
+
+    In = (Scanline_rgb8*) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+        In[j].r = (cmsUInt8Number) r;
+        In[j].g = (cmsUInt8Number) g;
+        In[j].b = (cmsUInt8Number) b;
+
+        j++;
+    }
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgb8), diff);
+
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest8bitsCMYK(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    Scanline_rgba8 *In;
+    cmsUInt32Number Mb;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn, TYPE_CMYK_8,
+                        hlcmsProfileOut, TYPE_CMYK_8, INTENT_PERCEPTUAL, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    Mb = 256*256*256*sizeof(Scanline_rgba8);
+
+    In = (Scanline_rgba8*) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+        In[j].r = (cmsUInt8Number) r;
+        In[j].g = (cmsUInt8Number) g;
+        In[j].b = (cmsUInt8Number) b;
+        In[j].a = (cmsUInt8Number) 0;
+
+        j++;
+    }
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+
+    free(In);
+
+    PrintPerformance(Mb, sizeof(Scanline_rgba8), diff);
+
+
+    cmsDeleteTransform(hlcmsxform);
+
+}
+
+
+static
+void SpeedTest32bitsGray(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    cmsFloat32Number *In;
+    cmsUInt32Number Mb;
+    cmsUInt32Number Interval = 4; // Power of 2 number to increment r,g,b values by in the loops to keep the test duration practically short
+    cmsUInt32Number NumPixels;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn,
+        TYPE_GRAY_FLT, hlcmsProfileOut, TYPE_GRAY_FLT, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    NumPixels = 256 / Interval * 256 / Interval * 256 / Interval;
+    Mb = NumPixels * sizeof(cmsFloat32Number);
+
+    In = (cmsFloat32Number*) malloc(Mb);
+
+    j = 0;
+    for (r = 0; r < 256; r += Interval)
+        for (g = 0; g < 256; g += Interval)
+            for (b = 0; b < 256; b += Interval) {
+
+                In[j] = ((r + g + b) / 768.0f);
+
+                j++;
+            }
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, NumPixels);
+
+    diff = clock() - atime;
+    free(In);
+
+    PrintPerformance(Mb, sizeof(cmsFloat32Number), diff);
+    cmsDeleteTransform(hlcmsxform);
+}
+
+
+static
+void SpeedTest16bitsGray(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    cmsUInt16Number *In;
+    cmsUInt32Number Mb;
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn,
+        TYPE_GRAY_16, hlcmsProfileOut, TYPE_GRAY_16, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+    Mb = 256*256*256 * sizeof(cmsUInt16Number);
+
+    In = (cmsUInt16Number *) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+                In[j] = (cmsUInt16Number) ((r + g + b) / 3);
+
+                j++;
+            }
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+    free(In);
+
+    PrintPerformance(Mb, sizeof(cmsUInt16Number), diff);
+    cmsDeleteTransform(hlcmsxform);
+}
+
+
+static
+void SpeedTest8bitsGray(const char * Title, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent)
+{
+    cmsInt32Number r, g, b, j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    cmsUInt8Number *In;
+    cmsUInt32Number Mb;
+
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Die("Unable to open profiles");
+
+    hlcmsxform  = cmsCreateTransformTHR(DbgThread(), hlcmsProfileIn,
+        TYPE_GRAY_8, hlcmsProfileOut, TYPE_GRAY_8, Intent, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+    Mb = 256*256*256;
+
+    In = (cmsUInt8Number*) malloc(Mb);
+
+    j = 0;
+    for (r=0; r < 256; r++)
+        for (g=0; g < 256; g++)
+            for (b=0; b < 256; b++) {
+
+                In[j] = (cmsUInt8Number) r;
+
+                j++;
+            }
+
+    TitlePerformance(Title);
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, 256*256*256);
+
+    diff = clock() - atime;
+    free(In);
+
+    PrintPerformance(Mb, sizeof(cmsUInt8Number), diff);
+    cmsDeleteTransform(hlcmsxform);
+}
+
+
+static
+cmsHPROFILE CreateCurves(void)
+{
+    cmsToneCurve* Gamma = cmsBuildGamma(DbgThread(), 1.1);
+    cmsToneCurve* Transfer[3];
+    cmsHPROFILE h;
+
+    Transfer[0] = Transfer[1] = Transfer[2] = Gamma;
+    h = cmsCreateLinearizationDeviceLink(cmsSigRgbData, Transfer);
+
+    cmsFreeToneCurve(Gamma);
+
+    return h;
+}
+
+
+static
+void SpeedTest(void)
+{
+    printf("\n\nP E R F O R M A N C E   T E S T S\n");
+    printf(    "=================================\n\n");
+    fflush(stdout);
+
+    SpeedTest8bits("8 bits on CLUT profiles",
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("test3.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest16bits("16 bits on CLUT profiles",
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("test3.icc", "r"), INTENT_PERCEPTUAL);
+
+    SpeedTest32bits("32 bits on CLUT profiles",
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("test3.icc", "r"), INTENT_PERCEPTUAL);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bits("8 bits on Matrix-Shaper profiles",
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest16bits("16 bits on Matrix-Shaper profiles",
+       cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest32bits("32 bits on Matrix-Shaper profiles",
+       cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bits("8 bits on SAME Matrix-Shaper profiles",
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        cmsOpenProfileFromFile("test5.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest16bits("16 bits on SAME Matrix-Shaper profiles",
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest32bits("32 bits on SAME Matrix-Shaper profiles",
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_PERCEPTUAL);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bits("8 bits on Matrix-Shaper profiles (AbsCol)",
+       cmsOpenProfileFromFile("test5.icc", "r"),
+       cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_ABSOLUTE_COLORIMETRIC);
+
+    SpeedTest16bits("16 bits on Matrix-Shaper profiles (AbsCol)",
+       cmsOpenProfileFromFile("test5.icc", "r"),
+       cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_ABSOLUTE_COLORIMETRIC);
+
+    SpeedTest32bits("32 bits on Matrix-Shaper profiles (AbsCol)",
+       cmsOpenProfileFromFile("test5.icc", "r"),
+       cmsOpenProfileFromFile("aRGBlcms2.icc", "r"),
+        INTENT_ABSOLUTE_COLORIMETRIC);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bits("8 bits on curves",
+        CreateCurves(),
+        CreateCurves(),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest16bits("16 bits on curves",
+        CreateCurves(),
+        CreateCurves(),
+        INTENT_PERCEPTUAL);
+
+    SpeedTest32bits("32 bits on curves",
+        CreateCurves(),
+        CreateCurves(),
+        INTENT_PERCEPTUAL);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bitsCMYK("8 bits on CMYK profiles",
+        cmsOpenProfileFromFile("test1.icc", "r"),
+        cmsOpenProfileFromFile("test2.icc", "r"));
+
+    SpeedTest16bitsCMYK("16 bits on CMYK profiles",
+        cmsOpenProfileFromFile("test1.icc", "r"),
+        cmsOpenProfileFromFile("test2.icc", "r"));
+
+    SpeedTest32bitsCMYK("32 bits on CMYK profiles",
+        cmsOpenProfileFromFile("test1.icc", "r"),
+        cmsOpenProfileFromFile("test2.icc", "r"));
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bitsGray("8 bits on gray-to gray",
+        cmsOpenProfileFromFile("gray3lcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    SpeedTest16bitsGray("16 bits on gray-to gray",
+        cmsOpenProfileFromFile("gray3lcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    SpeedTest32bitsGray("32 bits on gray-to gray",
+        cmsOpenProfileFromFile("gray3lcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bitsGray("8 bits on gray-to-lab gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("glablcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    SpeedTest16bitsGray("16 bits on gray-to-lab gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("glablcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    SpeedTest32bitsGray("32 bits on gray-to-lab gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("glablcms2.icc", "r"), INTENT_RELATIVE_COLORIMETRIC);
+
+    printf("\n");
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    SpeedTest8bitsGray("8 bits on SAME gray-to-gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_PERCEPTUAL);
+
+    SpeedTest16bitsGray("16 bits on SAME gray-to-gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_PERCEPTUAL);
+
+    SpeedTest32bitsGray("32 bits on SAME gray-to-gray",
+        cmsOpenProfileFromFile("graylcms2.icc", "r"),
+        cmsOpenProfileFromFile("graylcms2.icc", "r"), INTENT_PERCEPTUAL);
+
+    printf("\n");
+}
+
+
+// -----------------------------------------------------------------------------------------------------
+
+
+// Print the supported intents
+static
+void PrintSupportedIntents(void)
+{
+    cmsUInt32Number n, i;
+    cmsUInt32Number Codes[200];
+    char* Descriptions[200];
+
+    n = cmsGetSupportedIntents(200, Codes, Descriptions);
+
+    printf("Supported intents:\n");
+    for (i=0; i < n; i++) {
+        printf("\t%u - %s\n", Codes[i], Descriptions[i]);
+    }
+    printf("\n");
+}
+
+
+
+// ---------------------------------------------------------------------------------------
+
+#ifdef LCMS_FAST_EXTENSIONS
+    void* cmsFast8Bitextensions(void);
+#endif
+
+int main(int argc, char* argv[])
+{
+    cmsInt32Number Exhaustive = 0;
+    cmsInt32Number DoSpeedTests = 1;
+    cmsInt32Number DoCheckTests = 1;
+    cmsInt32Number DoPluginTests = 1;
+    cmsInt32Number DoZooTests = 0;
+
+#ifdef _MSC_VER
+    _CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF );
+#endif
+
+
+    // First of all, check for the right header
+   if (cmsGetEncodedCMMversion() != LCMS_VERSION) {
+          Die("Oops, you are mixing header and shared lib!\nHeader version reports to be '%d' and shared lib '%d'\n", LCMS_VERSION, cmsGetEncodedCMMversion());
+   }
+
+    printf("LittleCMS %2.2f test bed %s %s\n\n", LCMS_VERSION / 1000.0, __DATE__, __TIME__);
+
+    if ((argc == 2) && strcmp(argv[1], "--exhaustive") == 0) {
+
+        Exhaustive = 1;
+        printf("Running exhaustive tests (will take a while...)\n\n");
+    }
+
+#ifdef LCMS_FAST_EXTENSIONS
+   printf("Installing fast 8 bit extension ...");   
+   cmsPlugin(cmsFast8Bitextensions());
+   printf("done.\n");
+#endif
+
+
+    printf("Installing debug memory plug-in ... ");
+    cmsPlugin(&DebugMemHandler);
+    printf("done.\n");
+
+    printf("Installing error logger ... ");
+    cmsSetLogErrorHandler(FatalErrorQuit);
+    printf("done.\n");
+    
+    PrintSupportedIntents();
+    
+    Check("Base types", CheckBaseTypes);
+    Check("endianness", CheckEndianness);
+    Check("quick floor", CheckQuickFloor);
+    Check("quick floor word", CheckQuickFloorWord);
+    Check("Fixed point 15.16 representation", CheckFixedPoint15_16);
+    Check("Fixed point 8.8 representation", CheckFixedPoint8_8);
+    Check("D50 roundtrip", CheckD50Roundtrip);
+
+    // Create utility profiles
+    if (DoCheckTests || DoSpeedTests)
+        Check("Creation of test profiles", CreateTestProfiles);
+
+    if (DoCheckTests) {
+   
+    // Forward 1D interpolation
+    Check("1D interpolation in 2pt tables", Check1DLERP2);
+    Check("1D interpolation in 3pt tables", Check1DLERP3);
+    Check("1D interpolation in 4pt tables", Check1DLERP4);
+    Check("1D interpolation in 6pt tables", Check1DLERP6);
+    Check("1D interpolation in 18pt tables", Check1DLERP18);
+    Check("1D interpolation in descending 2pt tables", Check1DLERP2Down);
+    Check("1D interpolation in descending 3pt tables", Check1DLERP3Down);
+    Check("1D interpolation in descending 6pt tables", Check1DLERP6Down);
+    Check("1D interpolation in descending 18pt tables", Check1DLERP18Down);
+
+    if (Exhaustive) {
+
+        Check("1D interpolation in n tables", ExhaustiveCheck1DLERP);
+        Check("1D interpolation in descending tables", ExhaustiveCheck1DLERPDown);
+    }
+
+    // Forward 3D interpolation
+    Check("3D interpolation Tetrahedral (float) ", Check3DinterpolationFloatTetrahedral);
+    Check("3D interpolation Trilinear (float) ", Check3DinterpolationFloatTrilinear);
+    Check("3D interpolation Tetrahedral (16) ", Check3DinterpolationTetrahedral16);
+    Check("3D interpolation Trilinear (16) ", Check3DinterpolationTrilinear16);
+
+    if (Exhaustive) {
+
+        Check("Exhaustive 3D interpolation Tetrahedral (float) ", ExaustiveCheck3DinterpolationFloatTetrahedral);
+        Check("Exhaustive 3D interpolation Trilinear  (float) ", ExaustiveCheck3DinterpolationFloatTrilinear);
+        Check("Exhaustive 3D interpolation Tetrahedral (16) ", ExhaustiveCheck3DinterpolationTetrahedral16);
+        Check("Exhaustive 3D interpolation Trilinear (16) ", ExhaustiveCheck3DinterpolationTrilinear16);
+    }
+
+    Check("Reverse interpolation 3 -> 3", CheckReverseInterpolation3x3);
+    Check("Reverse interpolation 4 -> 3", CheckReverseInterpolation4x3);
+
+
+    // High dimensionality interpolation
+
+    Check("3D interpolation", Check3Dinterp);
+    Check("3D interpolation with granularity", Check3DinterpGranular);
+    Check("4D interpolation", Check4Dinterp);
+    Check("4D interpolation with granularity", Check4DinterpGranular);
+    Check("5D interpolation with granularity", Check5DinterpGranular);
+    Check("6D interpolation with granularity", Check6DinterpGranular);
+    Check("7D interpolation with granularity", Check7DinterpGranular);
+    Check("8D interpolation with granularity", Check8DinterpGranular);
+
+    // Encoding of colorspaces
+    Check("Lab to LCh and back (float only) ", CheckLab2LCh);
+    Check("Lab to XYZ and back (float only) ", CheckLab2XYZ);
+    Check("Lab to xyY and back (float only) ", CheckLab2xyY);
+    Check("Lab V2 encoding", CheckLabV2encoding);
+    Check("Lab V4 encoding", CheckLabV4encoding);
+
+    // BlackBody
+    Check("Blackbody radiator", CheckTemp2CHRM);
+
+    // Tone curves
+    Check("Linear gamma curves (16 bits)", CheckGammaCreation16);
+    Check("Linear gamma curves (float)", CheckGammaCreationFlt);
+
+    Check("Curve 1.8 (float)", CheckGamma18);
+    Check("Curve 2.2 (float)", CheckGamma22);
+    Check("Curve 3.0 (float)", CheckGamma30);
+
+    Check("Curve 1.8 (table)", CheckGamma18Table);
+    Check("Curve 2.2 (table)", CheckGamma22Table);
+    Check("Curve 3.0 (table)", CheckGamma30Table);
+
+    Check("Curve 1.8 (word table)", CheckGamma18TableWord);
+    Check("Curve 2.2 (word table)", CheckGamma22TableWord);
+    Check("Curve 3.0 (word table)", CheckGamma30TableWord);
+
+    Check("Parametric curves", CheckParametricToneCurves);
+
+    Check("Join curves", CheckJointCurves);
+    Check("Join curves descending", CheckJointCurvesDescending);
+    Check("Join curves degenerated", CheckReverseDegenerated);
+    Check("Join curves sRGB (Float)", CheckJointFloatCurves_sRGB);
+    Check("Join curves sRGB (16 bits)", CheckJoint16Curves_sRGB);
+    Check("Join curves sigmoidal", CheckJointCurvesSShaped);
+
+    // LUT basics
+    Check("LUT creation & dup", CheckLUTcreation);
+    Check("1 Stage LUT ", Check1StageLUT);
+    Check("2 Stage LUT ", Check2StageLUT);
+    Check("2 Stage LUT (16 bits)", Check2Stage16LUT);
+    Check("3 Stage LUT ", Check3StageLUT);
+    Check("3 Stage LUT (16 bits)", Check3Stage16LUT);
+    Check("4 Stage LUT ", Check4StageLUT);
+    Check("4 Stage LUT (16 bits)", Check4Stage16LUT);
+    Check("5 Stage LUT ", Check5StageLUT);
+    Check("5 Stage LUT (16 bits) ", Check5Stage16LUT);
+    Check("6 Stage LUT ", Check6StageLUT);
+    Check("6 Stage LUT (16 bits) ", Check6Stage16LUT);
+
+    // LUT operation
+    Check("Lab to Lab LUT (float only) ", CheckLab2LabLUT);
+    Check("XYZ to XYZ LUT (float only) ", CheckXYZ2XYZLUT);
+    Check("Lab to Lab MAT LUT (float only) ", CheckLab2LabMatLUT);
+    Check("Named Color LUT", CheckNamedColorLUT);
+    Check("Usual formatters", CheckFormatters16);
+    Check("Floating point formatters", CheckFormattersFloat);
+
+#ifndef CMS_NO_HALF_SUPPORT 
+    Check("HALF formatters", CheckFormattersHalf);
+#endif
+    // ChangeBuffersFormat
+    Check("ChangeBuffersFormat", CheckChangeBufferFormat);
+
+    // MLU
+    Check("Multilocalized Unicode", CheckMLU);
+
+    // Named color
+    Check("Named color lists", CheckNamedColorList);
+
+    // Profile I/O (this one is huge!)
+    Check("Profile creation", CheckProfileCreation);
+    Check("Header version", CheckVersionHeaderWriting);
+    Check("Multilocalized profile", CheckMultilocalizedProfile);
+
+    // Error reporting
+    Check("Error reporting on bad profiles", CheckErrReportingOnBadProfiles);
+    Check("Error reporting on bad transforms", CheckErrReportingOnBadTransforms);
+
+    // Transforms
+    Check("Curves only transforms", CheckCurvesOnlyTransforms);
+    Check("Float Lab->Lab transforms", CheckFloatLabTransforms);
+    Check("Encoded Lab->Lab transforms", CheckEncodedLabTransforms);
+    Check("Stored identities", CheckStoredIdentities);
+
+    Check("Matrix-shaper transform (float)",   CheckMatrixShaperXFORMFloat);
+    Check("Matrix-shaper transform (16 bits)", CheckMatrixShaperXFORM16);
+    Check("Matrix-shaper transform (8 bits)",  CheckMatrixShaperXFORM8);
+
+    Check("Primaries of sRGB", CheckRGBPrimaries);
+
+    // Known values
+    Check("Known values across matrix-shaper", Chack_sRGB_Float);
+    Check("Gray input profile", CheckInputGray);
+    Check("Gray Lab input profile", CheckLabInputGray);
+    Check("Gray output profile", CheckOutputGray);
+    Check("Gray Lab output profile", CheckLabOutputGray);
+
+    Check("Matrix-shaper proofing transform (float)",   CheckProofingXFORMFloat);
+    Check("Matrix-shaper proofing transform (16 bits)",  CheckProofingXFORM16);
+
+    Check("Gamut check", CheckGamutCheck);
+
+    Check("CMYK roundtrip on perceptual transform",   CheckCMYKRoundtrip);
+
+    Check("CMYK perceptual transform",   CheckCMYKPerceptual);
+    // Check("CMYK rel.col. transform",   CheckCMYKRelCol);
+
+    Check("Black ink only preservation", CheckKOnlyBlackPreserving);
+    Check("Black plane preservation", CheckKPlaneBlackPreserving);
+
+
+    Check("Deciding curve types", CheckV4gamma);
+
+    Check("Black point detection", CheckBlackPoint);
+    Check("TAC detection", CheckTAC);
+
+    Check("CGATS parser", CheckCGATS);
+    Check("CGATS parser on junk", CheckCGATS2);
+    Check("CGATS parser on overflow", CheckCGATS_Overflow);
+    Check("PostScript generator", CheckPostScript);
+    Check("Segment maxima GBD", CheckGBD);
+    Check("MD5 digest", CheckMD5);
+    Check("Linking", CheckLinking);
+    Check("floating point tags on XYZ", CheckFloatXYZ);
+    Check("RGB->Lab->RGB with alpha on FLT", ChecksRGB2LabFLT);
+    Check("Parametric curve on Rec709", CheckParametricRec709);
+    Check("Floating Point sampled curve with non-zero start", CheckFloatSamples);
+    Check("Floating Point segmented curve with short sampled segment", CheckFloatSegments);
+    Check("Read RAW portions", CheckReadRAW);
+    Check("Check MetaTag", CheckMeta);
+    Check("Null transform on floats", CheckFloatNULLxform);
+    Check("Set free a tag", CheckRemoveTag);
+    Check("Matrix simplification", CheckMatrixSimplify);
+    Check("Planar 8 optimization", CheckPlanar8opt);
+    Check("Swap endian feature", CheckSE);
+    Check("Transform line stride RGB", CheckTransformLineStride);
+    Check("Forged MPE profile", CheckForgedMPE);
+    Check("Proofing intersection", CheckProofingIntersection);
+    }
+
+    if (DoPluginTests)
+    {
+
+        Check("Context memory handling", CheckAllocContext);
+        Check("Simple context functionality", CheckSimpleContext);
+        Check("Alarm codes context", CheckAlarmColorsContext);
+        Check("Adaptation state context", CheckAdaptationStateContext);
+        Check("1D interpolation plugin", CheckInterp1DPlugin); 
+        Check("3D interpolation plugin", CheckInterp3DPlugin); 
+        Check("Parametric curve plugin", CheckParametricCurvePlugin);        
+        Check("Formatters plugin",       CheckFormattersPlugin);        
+        Check("Tag type plugin",         CheckTagTypePlugin);
+        Check("MPE type plugin",         CheckMPEPlugin);       
+        Check("Optimization plugin",     CheckOptimizationPlugin); 
+        Check("Rendering intent plugin", CheckIntentPlugin);
+        Check("Full transform plugin",   CheckTransformPlugin);
+        Check("Mutex plugin",            CheckMutexPlugin);
+       
+    }
+
+
+    if (DoSpeedTests)
+        SpeedTest();
+
+
+#ifdef CMS_IS_WINDOWS_
+    if (DoZooTests) 
+         CheckProfileZOO();
+#endif
+
+    DebugMemPrintTotals();
+
+    cmsUnregisterPlugins();
+
+    // Cleanup
+    if (DoCheckTests || DoSpeedTests)
+        RemoveTestProfiles();
+
+   return TotalFail;
+}
+
+
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.h b/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.h
new file mode 100755
index 0000000000..a9cf86bc64
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/testcms2.h
@@ -0,0 +1,82 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2014 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#ifndef TESTCMS2_H
+#define TESTCMS2_H
+
+#ifdef _MSC_VER
+#    define _CRT_SECURE_NO_WARNINGS 1
+#     include "crtdbg.h"
+#     include <io.h>
+#endif
+
+#include "lcms2_internal.h"
+
+#define cmsmin(a, b) (((a) < (b)) ? (a) : (b))
+
+// Used to mark special pointers
+void DebugMemDontCheckThis(void *Ptr);
+
+
+cmsBool IsGoodVal(const char *title, cmsFloat64Number in, cmsFloat64Number out, cmsFloat64Number max);
+cmsBool IsGoodFixed15_16(const char *title, cmsFloat64Number in, cmsFloat64Number out);
+cmsBool IsGoodFixed8_8(const char *title, cmsFloat64Number in, cmsFloat64Number out);
+cmsBool IsGoodWord(const char *title, cmsUInt16Number in, cmsUInt16Number out);
+cmsBool IsGoodWordPrec(const char *title, cmsUInt16Number in, cmsUInt16Number out, cmsUInt16Number maxErr);
+
+void* PluginMemHandler(void);
+cmsContext WatchDogContext(void* usr);
+
+void ResetFatalError(void);
+void Die(const char* Reason, ...);
+void Dot(void);
+void Fail(const char* frm, ...);
+void SubTest(const char* frm, ...);
+void TestMemoryLeaks(cmsBool ok);
+void Say(const char* str);
+
+// Plug-in tests
+cmsInt32Number CheckSimpleContext(void);
+cmsInt32Number CheckAllocContext(void);
+cmsInt32Number CheckAlarmColorsContext(void);
+cmsInt32Number CheckAdaptationStateContext(void);
+cmsInt32Number CheckInterp1DPlugin(void);
+cmsInt32Number CheckInterp3DPlugin(void);
+cmsInt32Number CheckParametricCurvePlugin(void);
+cmsInt32Number CheckFormattersPlugin(void);
+cmsInt32Number CheckTagTypePlugin(void);
+cmsInt32Number CheckMPEPlugin(void);
+cmsInt32Number CheckOptimizationPlugin(void);
+cmsInt32Number CheckIntentPlugin(void);
+cmsInt32Number CheckTransformPlugin(void);
+cmsInt32Number CheckMutexPlugin(void);
+
+
+// Zoo
+void CheckProfileZOO(void);
+
+#endif
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/testplugin.c b/third-party/libjxl/libjxl/third_party/lcms/testbed/testplugin.c
new file mode 100755
index 0000000000..942a8d5b1b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/testplugin.c
@@ -0,0 +1,1476 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "testcms2.h"
+
+// --------------------------------------------------------------------------------------------------
+// Auxiliary, duplicate a context and mark the block as non-debug because in this case the allocator
+// and deallocator have different context owners
+// --------------------------------------------------------------------------------------------------
+
+static
+cmsContext DupContext(cmsContext src, void* Data)
+{
+    cmsContext cpy = cmsDupContext(src, Data);
+
+    DebugMemDontCheckThis(cpy);
+
+    return cpy;
+}
+
+// --------------------------------------------------------------------------------------------------
+// Simple context functions
+// --------------------------------------------------------------------------------------------------
+
+// Allocation order
+cmsInt32Number CheckAllocContext(void)
+{
+     cmsContext c1, c2, c3, c4;
+
+
+     c1 = cmsCreateContext(NULL, NULL);                 // This creates a context by using the normal malloc
+     DebugMemDontCheckThis(c1);
+     cmsDeleteContext(c1); 
+
+     c2 = cmsCreateContext(PluginMemHandler(), NULL);   // This creates a context by using the debug malloc
+     DebugMemDontCheckThis(c2);
+     cmsDeleteContext(c2); 
+
+     c1 = cmsCreateContext(NULL, NULL); 
+     DebugMemDontCheckThis(c1);
+
+     c2 = cmsCreateContext(PluginMemHandler(), NULL);  
+     DebugMemDontCheckThis(c2);
+
+     cmsPluginTHR(c1, PluginMemHandler()); // Now the context have custom allocators
+
+     c3 = DupContext(c1, NULL);     
+     c4 = DupContext(c2, NULL);
+     
+
+
+     cmsDeleteContext(c1);  // Should be deleted by using nomal malloc
+     cmsDeleteContext(c2);  // Should be deleted by using debug malloc
+     cmsDeleteContext(c3);  // Should be deleted by using nomal malloc
+     cmsDeleteContext(c4);  // Should be deleted by using debug malloc
+
+     return 1;
+}
+
+// Test the very basic context capabilities
+cmsInt32Number CheckSimpleContext(void)
+{
+    int a = 1;
+    int b = 32;
+    cmsInt32Number rc = 0;
+
+    cmsContext c1, c2, c3;
+
+    // This function creates a context with a special 
+    // memory manager that check allocation
+    c1 = WatchDogContext(&a);
+    cmsDeleteContext(c1);
+
+    c1 = WatchDogContext(&a);
+    
+    // Let's check duplication
+    c2 = DupContext(c1, NULL);    
+    c3 = DupContext(c2, NULL);    
+
+    // User data should have been propagated
+    rc = (*(int*) cmsGetContextUserData(c3)) == 1 ;
+
+    // Free resources
+    cmsDeleteContext(c1);
+    cmsDeleteContext(c2);
+    cmsDeleteContext(c3);
+
+    if (!rc) {
+        Fail("Creation of user data failed");
+        return 0;
+    }
+
+    // Back to create 3 levels of inherance
+    c1 = cmsCreateContext(NULL, &a);
+    DebugMemDontCheckThis(c1);
+
+    c2 = DupContext(c1, NULL);
+    c3 = DupContext(c2, &b);    
+
+    rc = (*(int*) cmsGetContextUserData(c3)) == 32 ;
+
+    cmsDeleteContext(c1);
+    cmsDeleteContext(c2);
+    cmsDeleteContext(c3);
+
+    if (!rc) {
+        Fail("Modification of user data failed");
+        return 0;
+    }
+
+    // All seems ok
+    return rc;
+}
+
+
+
+
+// --------------------------------------------------------------------------------------------------
+//Alarm color functions
+// --------------------------------------------------------------------------------------------------
+
+// This function tests the alarm codes across contexts
+cmsInt32Number CheckAlarmColorsContext(void)
+{
+    cmsInt32Number rc = 0;
+    const cmsUInt16Number codes[] = {0x0000, 0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888, 0x9999, 0xaaaa, 0xbbbb, 0xcccc, 0xdddd, 0xeeee, 0xffff};
+    cmsUInt16Number out[16];
+    cmsContext c1, c2, c3;
+    int i;
+
+    c1 = WatchDogContext(NULL);
+
+    cmsSetAlarmCodesTHR(c1, codes);
+    c2 = DupContext(c1, NULL);
+    c3 = DupContext(c2, NULL);
+    
+    cmsGetAlarmCodesTHR(c3, out);
+
+    rc = 1;
+    for (i=0; i < 16; i++) {
+        if (out[i] != codes[i]) {
+            Fail("Bad alarm code %x != %x", out[i], codes[i]);
+            rc = 0;
+            break;
+        }
+    }
+
+    cmsDeleteContext(c1);
+    cmsDeleteContext(c2);
+    cmsDeleteContext(c3);
+
+    return rc;
+}
+
+
+// --------------------------------------------------------------------------------------------------
+//Adaptation state functions
+// --------------------------------------------------------------------------------------------------
+
+// Similar to the previous, but for adaptation state
+cmsInt32Number CheckAdaptationStateContext(void)
+{
+    cmsInt32Number rc = 0;
+    cmsContext c1, c2, c3;
+    cmsFloat64Number old1, old2;
+
+    old1 =  cmsSetAdaptationStateTHR(NULL, -1);
+
+    c1 = WatchDogContext(NULL);
+
+    cmsSetAdaptationStateTHR(c1, 0.7);
+
+    c2 = DupContext(c1, NULL);    
+    c3 = DupContext(c2, NULL);
+    
+    rc = IsGoodVal("Adaptation state", cmsSetAdaptationStateTHR(c3, -1), 0.7, 0.001);
+
+    cmsDeleteContext(c1);
+    cmsDeleteContext(c2);
+    cmsDeleteContext(c3);
+   
+    old2 =  cmsSetAdaptationStateTHR(NULL, -1);
+
+    if (old1 != old2) {
+        Fail("Adaptation state has changed");
+        return 0;
+    }
+
+    return rc;
+}
+
+// --------------------------------------------------------------------------------------------------
+// Interpolation plugin check: A fake 1D and 3D interpolation will be used to test the functionality. 
+// --------------------------------------------------------------------------------------------------
+
+// This fake interpolation takes always the closest lower node in the interpolation table for 1D 
+static
+void Fake1Dfloat(const cmsFloat32Number Value[], 
+                    cmsFloat32Number Output[],  
+                    const cmsInterpParams* p)
+{
+       cmsFloat32Number val2;
+       int cell;
+       const cmsFloat32Number* LutTable = (const cmsFloat32Number*) p ->Table;
+
+       // Clip upper values
+       if (Value[0] >= 1.0) {
+           Output[0] = LutTable[p -> Domain[0]]; 
+           return; 
+       }
+
+       val2 = p -> Domain[0] * Value[0];
+       cell = (int) floor(val2);
+       Output[0] =  LutTable[cell] ;
+}
+
+// This fake interpolation just uses scrambled negated indexes for output
+static
+void Fake3D16(register const cmsUInt16Number Input[],
+              register cmsUInt16Number Output[],
+              register const struct _cms_interp_struc* p)
+{
+       Output[0] =  0xFFFF - Input[2];
+       Output[1] =  0xFFFF - Input[1];
+       Output[2] =  0xFFFF - Input[0];
+}
+
+// The factory chooses interpolation routines on depending on certain conditions.
+cmsInterpFunction my_Interpolators_Factory(cmsUInt32Number nInputChannels, 
+                                           cmsUInt32Number nOutputChannels, 
+                                           cmsUInt32Number dwFlags)
+{
+    cmsInterpFunction Interpolation;
+    cmsBool  IsFloat = (dwFlags & CMS_LERP_FLAGS_FLOAT);
+
+    // Initialize the return to zero as a non-supported mark
+    memset(&Interpolation, 0, sizeof(Interpolation));
+
+    // For 1D to 1D and floating point
+    if (nInputChannels == 1 && nOutputChannels == 1 && IsFloat) {
+
+        Interpolation.LerpFloat = Fake1Dfloat;
+    }
+    else
+    if (nInputChannels == 3 && nOutputChannels == 3 && !IsFloat) {
+    
+        // For 3D to 3D and 16 bits
+        Interpolation.Lerp16 = Fake3D16;
+    }
+
+    // Here is the interpolation 
+    return Interpolation;
+}
+
+// Interpolation plug-in
+static
+cmsPluginInterpolation InterpPluginSample = {
+
+    { cmsPluginMagicNumber, 2060, cmsPluginInterpolationSig, NULL }, 
+    my_Interpolators_Factory 
+};
+
+
+// This is the check code for 1D interpolation plug-in
+cmsInt32Number CheckInterp1DPlugin(void)
+{
+    cmsToneCurve* Sampled1D = NULL;
+    cmsContext ctx = NULL;
+    cmsContext cpy = NULL;
+    const cmsFloat32Number tab[] = { 0.0f, 0.10f, 0.20f, 0.30f, 0.40f, 0.50f, 0.60f, 0.70f, 0.80f, 0.90f, 1.00f };  // A straight line
+
+    // 1st level context
+    ctx = WatchDogContext(NULL);
+    if (ctx == NULL) {
+        Fail("Cannot create context");
+        goto Error;
+    }
+
+    cmsPluginTHR(ctx, &InterpPluginSample);
+
+    cpy = DupContext(ctx, NULL);    
+     if (cpy == NULL) {
+        Fail("Cannot create context (2)");
+        goto Error;
+    }
+
+    Sampled1D = cmsBuildTabulatedToneCurveFloat(cpy, 11, tab);
+    if (Sampled1D == NULL) {
+        Fail("Cannot create tone curve (1)");
+        goto Error;
+    }
+    
+    // Do some interpolations with the plugin
+    if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(Sampled1D, 0.10f), 0.10, 0.01)) goto Error;
+    if (!IsGoodVal("0.13", cmsEvalToneCurveFloat(Sampled1D, 0.13f), 0.10, 0.01)) goto Error;
+    if (!IsGoodVal("0.55", cmsEvalToneCurveFloat(Sampled1D, 0.55f), 0.50, 0.01)) goto Error;
+    if (!IsGoodVal("0.9999", cmsEvalToneCurveFloat(Sampled1D, 0.9999f), 0.90, 0.01)) goto Error;
+
+    cmsFreeToneCurve(Sampled1D);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+   
+    // Now in global context
+    Sampled1D = cmsBuildTabulatedToneCurveFloat(NULL, 11, tab);
+    if (Sampled1D == NULL) {
+        Fail("Cannot create tone curve (2)");
+        goto Error;
+    }
+    
+    // Now without the plug-in
+    if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(Sampled1D, 0.10f), 0.10, 0.001)) goto Error;
+    if (!IsGoodVal("0.13", cmsEvalToneCurveFloat(Sampled1D, 0.13f), 0.13, 0.001)) goto Error;
+    if (!IsGoodVal("0.55", cmsEvalToneCurveFloat(Sampled1D, 0.55f), 0.55, 0.001)) goto Error;
+    if (!IsGoodVal("0.9999", cmsEvalToneCurveFloat(Sampled1D, 0.9999f), 0.9999, 0.001)) goto Error;
+
+    cmsFreeToneCurve(Sampled1D);
+    return 1;
+
+Error:
+    if (ctx != NULL) cmsDeleteContext(ctx);
+     if (cpy != NULL) cmsDeleteContext(ctx);
+    if (Sampled1D != NULL) cmsFreeToneCurve(Sampled1D);
+    return 0;
+
+}
+
+// Checks the 3D interpolation
+cmsInt32Number CheckInterp3DPlugin(void)
+{
+
+    cmsPipeline* p;
+    cmsStage* clut;
+    cmsContext ctx;
+    cmsUInt16Number In[3], Out[3];
+    cmsUInt16Number identity[] = { 
+
+       0,       0,       0,      
+       0,       0,       0xffff, 
+       0,       0xffff,  0,      
+       0,       0xffff,  0xffff, 
+       0xffff,  0,       0,      
+       0xffff,  0,       0xffff, 
+       0xffff,  0xffff,  0,      
+       0xffff,  0xffff,  0xffff 
+    };
+
+
+    ctx = WatchDogContext(NULL);
+    if (ctx == NULL) {
+        Fail("Cannot create context");
+       return 0;
+    }
+
+
+    cmsPluginTHR(ctx, &InterpPluginSample);
+
+
+    p =  cmsPipelineAlloc(ctx, 3, 3);
+    clut = cmsStageAllocCLut16bit(ctx, 2, 3, 3, identity);
+    cmsPipelineInsertStage(p, cmsAT_BEGIN, clut);
+
+    // Do some interpolations with the plugin
+
+    In[0] = 0; In[1] = 0; In[2] = 0;
+    cmsPipelineEval16(In, Out, p);
+
+    if (!IsGoodWord("0", Out[0], 0xFFFF - 0)) goto Error;
+    if (!IsGoodWord("1", Out[1], 0xFFFF - 0)) goto Error;
+    if (!IsGoodWord("2", Out[2], 0xFFFF - 0)) goto Error;
+
+    In[0] = 0x1234; In[1] = 0x5678; In[2] = 0x9ABC;
+    cmsPipelineEval16(In, Out, p);
+
+    if (!IsGoodWord("0", 0xFFFF - 0x9ABC, Out[0])) goto Error;
+    if (!IsGoodWord("1", 0xFFFF - 0x5678, Out[1])) goto Error;
+    if (!IsGoodWord("2", 0xFFFF - 0x1234, Out[2])) goto Error;
+
+    cmsPipelineFree(p);
+    cmsDeleteContext(ctx);
+
+    // Now without the plug-in
+
+    p =  cmsPipelineAlloc(NULL, 3, 3);
+    clut = cmsStageAllocCLut16bit(NULL, 2, 3, 3, identity);
+    cmsPipelineInsertStage(p, cmsAT_BEGIN, clut);
+
+    In[0] = 0; In[1] = 0; In[2] = 0;
+    cmsPipelineEval16(In, Out, p);
+
+    if (!IsGoodWord("0", 0, Out[0])) goto Error;
+    if (!IsGoodWord("1", 0, Out[1])) goto Error;
+    if (!IsGoodWord("2", 0, Out[2])) goto Error;
+
+    In[0] = 0x1234; In[1] = 0x5678; In[2] = 0x9ABC;
+    cmsPipelineEval16(In, Out, p);
+
+    if (!IsGoodWord("0", 0x1234, Out[0])) goto Error;
+    if (!IsGoodWord("1", 0x5678, Out[1])) goto Error;
+    if (!IsGoodWord("2", 0x9ABC, Out[2])) goto Error;
+
+    cmsPipelineFree(p);
+    return 1;
+
+Error:
+    cmsPipelineFree(p);
+    return 0;
+
+}
+
+// --------------------------------------------------------------------------------------------------
+// Parametric curve plugin check: sin(x)/cos(x) function will be used to test the functionality. 
+// --------------------------------------------------------------------------------------------------
+
+#define TYPE_SIN  1000
+#define TYPE_COS  1010
+#define TYPE_TAN  1020
+#define TYPE_709  709
+
+static cmsFloat64Number my_fns(cmsInt32Number Type, 
+                        const cmsFloat64Number Params[], 
+                        cmsFloat64Number R)
+{
+    cmsFloat64Number Val;
+    switch (Type) {
+
+    case TYPE_SIN:     
+        Val = Params[0]* sin(R * M_PI);
+        break;
+
+    case -TYPE_SIN:
+        Val = asin(R) / (M_PI * Params[0]);
+        break;
+
+    case TYPE_COS:     
+        Val = Params[0]* cos(R * M_PI);
+        break;
+
+    case -TYPE_COS:
+        Val = acos(R) / (M_PI * Params[0]);
+        break;
+
+    default: return -1.0;
+
+     }
+
+   return Val;
+}
+
+static 
+cmsFloat64Number my_fns2(cmsInt32Number Type, 
+                        const cmsFloat64Number Params[], 
+                        cmsFloat64Number R)
+{
+    cmsFloat64Number Val;
+    switch (Type) {
+
+    case TYPE_TAN:     
+        Val = Params[0]* tan(R * M_PI);
+        break;
+
+    case -TYPE_TAN:
+        Val = atan(R) / (M_PI * Params[0]);
+        break;
+
+     default: return -1.0;
+     }
+
+   return Val;
+}
+
+
+static double Rec709Math(int Type, const double Params[], double R)
+{ 
+    double Fun = 0;
+
+    switch (Type)
+    {
+    case 709:
+
+        if (R <= (Params[3]*Params[4])) Fun = R / Params[3];
+        else Fun = pow(((R - Params[2])/Params[1]), Params[0]);
+        break;
+
+    case -709:
+
+        if (R <= Params[4]) Fun = R * Params[3];
+        else Fun = Params[1] * pow(R, (1/Params[0])) + Params[2];
+        break;
+    }
+    return Fun;
+}
+
+
+// Add nonstandard TRC curves -> Rec709
+
+cmsPluginParametricCurves Rec709Plugin = {
+
+    { cmsPluginMagicNumber, 2060, cmsPluginParametricCurveSig, NULL },
+
+    1, {TYPE_709}, {5}, Rec709Math
+
+};
+
+
+static
+cmsPluginParametricCurves CurvePluginSample = {
+    { cmsPluginMagicNumber, 2060, cmsPluginParametricCurveSig, NULL }, 
+    
+    2,                       // nFunctions
+    { TYPE_SIN, TYPE_COS },  // Function Types
+    { 1, 1 },                // ParameterCount
+    my_fns                   // Evaluator
+};
+
+static
+cmsPluginParametricCurves CurvePluginSample2 = {
+    { cmsPluginMagicNumber, 2060, cmsPluginParametricCurveSig, NULL }, 
+    
+    1,                       // nFunctions
+    { TYPE_TAN},             // Function Types
+    { 1 },                   // ParameterCount
+    my_fns2                  // Evaluator
+};
+
+// --------------------------------------------------------------------------------------------------
+// In this test, the DupContext function will be checked as well                      
+// --------------------------------------------------------------------------------------------------
+cmsInt32Number CheckParametricCurvePlugin(void)
+{
+    cmsContext ctx = NULL;
+    cmsContext cpy = NULL;
+    cmsContext cpy2 = NULL;
+    cmsToneCurve* sinus;
+    cmsToneCurve* cosinus;
+    cmsToneCurve* tangent;
+    cmsToneCurve* reverse_sinus;
+    cmsToneCurve* reverse_cosinus;
+    cmsFloat64Number scale = 1.0;
+
+
+    ctx = WatchDogContext(NULL);
+
+    cmsPluginTHR(ctx, &CurvePluginSample);
+
+    cpy = DupContext(ctx, NULL);
+    
+    cmsPluginTHR(cpy, &CurvePluginSample2);
+
+    cpy2 =  DupContext(cpy, NULL);
+    
+    cmsPluginTHR(cpy2, &Rec709Plugin);
+    
+
+    sinus = cmsBuildParametricToneCurve(cpy, TYPE_SIN, &scale);
+    cosinus = cmsBuildParametricToneCurve(cpy, TYPE_COS, &scale);
+    tangent = cmsBuildParametricToneCurve(cpy, TYPE_TAN, &scale);
+    reverse_sinus = cmsReverseToneCurve(sinus);
+    reverse_cosinus = cmsReverseToneCurve(cosinus);
+
+
+     if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(sinus, 0.10f), sin(0.10 * M_PI) , 0.001)) goto Error;
+     if (!IsGoodVal("0.60", cmsEvalToneCurveFloat(sinus, 0.60f), sin(0.60* M_PI), 0.001)) goto Error;
+     if (!IsGoodVal("0.90", cmsEvalToneCurveFloat(sinus, 0.90f), sin(0.90* M_PI), 0.001)) goto Error;
+
+     if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(cosinus, 0.10f), cos(0.10* M_PI), 0.001)) goto Error;
+     if (!IsGoodVal("0.60", cmsEvalToneCurveFloat(cosinus, 0.60f), cos(0.60* M_PI), 0.001)) goto Error;
+     if (!IsGoodVal("0.90", cmsEvalToneCurveFloat(cosinus, 0.90f), cos(0.90* M_PI), 0.001)) goto Error;
+
+     if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(tangent, 0.10f), tan(0.10* M_PI), 0.001)) goto Error;
+     if (!IsGoodVal("0.60", cmsEvalToneCurveFloat(tangent, 0.60f), tan(0.60* M_PI), 0.001)) goto Error;
+     if (!IsGoodVal("0.90", cmsEvalToneCurveFloat(tangent, 0.90f), tan(0.90* M_PI), 0.001)) goto Error;
+
+     
+     if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(reverse_sinus, 0.10f), asin(0.10)/M_PI, 0.001)) goto Error;
+     if (!IsGoodVal("0.60", cmsEvalToneCurveFloat(reverse_sinus, 0.60f), asin(0.60)/M_PI, 0.001)) goto Error;
+     if (!IsGoodVal("0.90", cmsEvalToneCurveFloat(reverse_sinus, 0.90f), asin(0.90)/M_PI, 0.001)) goto Error;
+
+     if (!IsGoodVal("0.10", cmsEvalToneCurveFloat(reverse_cosinus, 0.10f), acos(0.10)/M_PI, 0.001)) goto Error;
+     if (!IsGoodVal("0.60", cmsEvalToneCurveFloat(reverse_cosinus, 0.60f), acos(0.60)/M_PI, 0.001)) goto Error;
+     if (!IsGoodVal("0.90", cmsEvalToneCurveFloat(reverse_cosinus, 0.90f), acos(0.90)/M_PI, 0.001)) goto Error;
+
+     cmsFreeToneCurve(sinus);
+     cmsFreeToneCurve(cosinus);
+     cmsFreeToneCurve(tangent);
+     cmsFreeToneCurve(reverse_sinus);
+     cmsFreeToneCurve(reverse_cosinus);
+
+     cmsDeleteContext(ctx);
+     cmsDeleteContext(cpy);
+     cmsDeleteContext(cpy2);
+
+     return 1;
+
+Error:
+     
+     cmsFreeToneCurve(sinus);
+     cmsFreeToneCurve(reverse_sinus);
+     cmsFreeToneCurve(cosinus);
+     cmsFreeToneCurve(reverse_cosinus);
+
+     if (ctx != NULL) cmsDeleteContext(ctx);
+     if (cpy != NULL) cmsDeleteContext(cpy);
+     if (cpy2 != NULL) cmsDeleteContext(cpy2);
+     return 0;
+}
+
+// --------------------------------------------------------------------------------------------------
+// formatters plugin check: 5-6-5 RGB format
+// --------------------------------------------------------------------------------------------------
+
+// We define this special type as 0 bytes not float, and set the upper bit 
+
+#define TYPE_RGB_565  (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0) | (1 << 23))
+
+cmsUInt8Number* my_Unroll565(register struct _cmstransform_struct* nfo, 
+                            register cmsUInt16Number wIn[], 
+                            register cmsUInt8Number* accum,
+                            register cmsUInt32Number Stride)
+{
+    cmsUInt16Number pixel = *(cmsUInt16Number*) accum;  // Take whole pixel
+
+    double r = floor(((double) (pixel & 31) * 65535.0) / 31.0 + 0.5);
+    double g = floor((((pixel >> 5) & 63) * 65535.0) / 63.0 + 0.5);
+    double b = floor((((pixel >> 11) & 31) * 65535.0) / 31.0 + 0.5);
+    
+    wIn[2] = (cmsUInt16Number) r;
+    wIn[1] = (cmsUInt16Number) g;
+    wIn[0] = (cmsUInt16Number) b;
+    
+    return accum + 2;
+}
+
+cmsUInt8Number* my_Pack565(register _cmsTRANSFORM* info, 
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+
+    register cmsUInt16Number pixel;
+    int r, g, b;
+
+    r = (int) floor(( wOut[2] * 31) / 65535.0 + 0.5);
+    g = (int) floor(( wOut[1] * 63) / 65535.0 + 0.5);
+    b = (int) floor(( wOut[0] * 31) / 65535.0 + 0.5);
+
+
+    pixel = (r & 31)  | (( g & 63) << 5) | ((b & 31) << 11);
+
+    
+    *(cmsUInt16Number*) output = pixel;
+    return output + 2;
+}
+
+
+cmsFormatter my_FormatterFactory(cmsUInt32Number Type, 
+                                  cmsFormatterDirection Dir, 
+                                  cmsUInt32Number dwFlags)
+{
+    cmsFormatter Result = { NULL };
+
+    if ((Type == TYPE_RGB_565) && 
+        !(dwFlags & CMS_PACK_FLAGS_FLOAT) &&
+        (Dir == cmsFormatterInput)) {
+            Result.Fmt16 = my_Unroll565;       
+    }
+    return Result;
+}
+
+
+cmsFormatter my_FormatterFactory2(cmsUInt32Number Type, 
+                                  cmsFormatterDirection Dir, 
+                                  cmsUInt32Number dwFlags)
+{
+    cmsFormatter Result = { NULL };
+
+    if ((Type == TYPE_RGB_565) && 
+        !(dwFlags & CMS_PACK_FLAGS_FLOAT) &&
+        (Dir == cmsFormatterOutput)) {
+            Result.Fmt16 = my_Pack565;       
+    }
+    return Result;
+}
+
+static
+cmsPluginFormatters FormattersPluginSample = { {cmsPluginMagicNumber, 
+                                2060,  
+                                cmsPluginFormattersSig, 
+                                NULL}, 
+                                my_FormatterFactory };
+
+
+
+static
+cmsPluginFormatters FormattersPluginSample2 = { {cmsPluginMagicNumber, 
+                                2060,  
+                                cmsPluginFormattersSig, 
+                                NULL}, 
+                                my_FormatterFactory2 };
+
+
+cmsInt32Number CheckFormattersPlugin(void)
+{
+    cmsContext ctx = WatchDogContext(NULL);
+    cmsContext cpy;
+    cmsContext cpy2;
+    cmsHTRANSFORM xform;
+    cmsUInt16Number stream[]= { 0xffffU, 0x1234U, 0x0000U, 0x33ddU };
+    cmsUInt16Number result[4];
+    int i;
+
+    
+    cmsPluginTHR(ctx, &FormattersPluginSample);
+
+    cpy = DupContext(ctx, NULL);
+    
+    cmsPluginTHR(cpy, &FormattersPluginSample2);
+
+    cpy2 = DupContext(cpy, NULL);
+    
+    xform = cmsCreateTransformTHR(cpy2, NULL, TYPE_RGB_565, NULL, TYPE_RGB_565, INTENT_PERCEPTUAL, cmsFLAGS_NULLTRANSFORM);
+
+    cmsDoTransform(xform, stream, result, 4);
+
+    cmsDeleteTransform(xform);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    cmsDeleteContext(cpy2);
+
+    for (i=0; i < 4; i++)
+        if (stream[i] != result[i]) return 0;
+
+    return 1;
+}
+
+// --------------------------------------------------------------------------------------------------
+// TagTypePlugin plugin check
+// --------------------------------------------------------------------------------------------------
+
+#define SigIntType      ((cmsTagTypeSignature)  0x74747448)   //   'tttH'
+#define SigInt          ((cmsTagSignature)  0x74747448)       //   'tttH'
+
+static
+void *Type_int_Read(struct _cms_typehandler_struct* self,
+ 			    cmsIOHANDLER* io, 
+               cmsUInt32Number* nItems, 
+               cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number* Ptr = (cmsUInt32Number*) _cmsMalloc(self ->ContextID, sizeof(cmsUInt32Number));
+    if (Ptr == NULL) return NULL;
+    if (!_cmsReadUInt32Number(io, Ptr)) return NULL;
+    *nItems = 1;
+    return Ptr;
+}
+
+static
+cmsBool Type_int_Write(struct _cms_typehandler_struct* self,
+                        cmsIOHANDLER* io, 
+                        void* Ptr, cmsUInt32Number nItems)
+{
+    return _cmsWriteUInt32Number(io, *(cmsUInt32Number*) Ptr);
+}
+
+static
+void* Type_int_Dup(struct _cms_typehandler_struct* self, 
+                   const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsUInt32Number));
+}
+
+void Type_int_Free(struct _cms_typehandler_struct* self, 
+                   void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+static cmsPluginTag HiddenTagPluginSample = {
+
+    { cmsPluginMagicNumber, 2060, cmsPluginTagSig, NULL},
+    SigInt,  {  1, 1, { SigIntType }, NULL }  
+};
+
+static cmsPluginTagType TagTypePluginSample = {
+
+     { cmsPluginMagicNumber, 2060, cmsPluginTagTypeSig,  (cmsPluginBase*) &HiddenTagPluginSample},
+     { SigIntType, Type_int_Read, Type_int_Write, Type_int_Dup, Type_int_Free, NULL }        
+};
+
+
+cmsInt32Number CheckTagTypePlugin(void)
+{
+    cmsContext ctx = NULL;
+    cmsContext cpy = NULL;
+    cmsContext cpy2 = NULL;
+    cmsHPROFILE h = NULL;
+    cmsUInt32Number myTag = 1234;
+    cmsUInt32Number rc = 0;
+    char* data = NULL;
+    cmsUInt32Number *ptr = NULL;
+    cmsUInt32Number clen = 0;
+
+
+    ctx = WatchDogContext(NULL);
+    cmsPluginTHR(ctx, &TagTypePluginSample);
+
+    cpy = DupContext(ctx, NULL);    
+    cpy2 = DupContext(cpy, NULL);
+    
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    
+    h = cmsCreateProfilePlaceholder(cpy2);
+    if (h == NULL) {
+        Fail("Create placeholder failed");
+        goto Error;
+    }
+
+
+    if (!cmsWriteTag(h, SigInt, &myTag)) {
+        Fail("Plug-in failed");
+        goto Error;
+    }
+
+    rc = cmsSaveProfileToMem(h, NULL, &clen);
+    if (!rc) {
+        Fail("Fetch mem size failed");
+        goto Error;        
+    }
+
+
+    data = (char*) malloc(clen);
+    if (data == NULL) {
+        Fail("malloc failed ?!?");
+        goto Error;
+    }
+
+
+    rc = cmsSaveProfileToMem(h, data, &clen);
+    if (!rc) {
+        Fail("Save to mem failed");
+        goto Error;
+    }
+
+    cmsCloseProfile(h);
+
+    cmsSetLogErrorHandler(NULL);
+    h = cmsOpenProfileFromMem(data, clen);    
+    if (h == NULL) {
+        Fail("Open profile failed");
+        goto Error;
+    }
+
+    ptr = (cmsUInt32Number*) cmsReadTag(h, SigInt);
+    if (ptr != NULL) {
+
+        Fail("read tag/context switching failed");
+        goto Error;
+    }
+
+    cmsCloseProfile(h);
+    ResetFatalError();
+
+    h = cmsOpenProfileFromMemTHR(cpy2, data, clen);    
+    if (h == NULL) {
+        Fail("Open profile from mem failed");
+        goto Error;
+    }
+
+    // Get rid of data
+    free(data); data = NULL;
+
+    ptr = (cmsUInt32Number*) cmsReadTag(h, SigInt);
+    if (ptr == NULL) {        
+        Fail("Read tag/conext switching failed (2)");
+        return 0;
+    }
+   
+    rc = (*ptr == 1234);
+
+    cmsCloseProfile(h);
+
+    cmsDeleteContext(cpy2);
+
+    return rc;
+
+Error:
+
+    if (h != NULL) cmsCloseProfile(h);
+    if (ctx != NULL) cmsDeleteContext(ctx);
+    if (cpy != NULL) cmsDeleteContext(cpy);
+    if (cpy2 != NULL) cmsDeleteContext(cpy2);
+    if (data) free(data);
+
+    return 0;
+}
+
+// --------------------------------------------------------------------------------------------------
+// MPE plugin check:
+// --------------------------------------------------------------------------------------------------
+#define SigNegateType ((cmsStageSignature)0x6E202020)
+
+static
+void EvaluateNegate(const cmsFloat32Number In[], 
+                     cmsFloat32Number Out[], 
+                     const cmsStage *mpe)
+{
+    Out[0] = 1.0f - In[0];
+    Out[1] = 1.0f - In[1];
+    Out[2] = 1.0f - In[2];
+}
+
+static
+cmsStage* StageAllocNegate(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID,
+                 SigNegateType, 3, 3, EvaluateNegate, 
+                 NULL, NULL, NULL);
+}
+
+static
+void *Type_negate_Read(struct _cms_typehandler_struct* self,
+ 			    cmsIOHANDLER* io, 
+                cmsUInt32Number* nItems, 
+                cmsUInt32Number SizeOfTag)
+{
+    cmsUInt16Number   Chans;
+    if (!_cmsReadUInt16Number(io, &Chans)) return NULL;
+    if (Chans != 3) return NULL;
+
+    *nItems = 1;
+    return StageAllocNegate(self -> ContextID);
+}
+
+static
+cmsBool Type_negate_Write(struct _cms_typehandler_struct* self,
+                        cmsIOHANDLER* io, 
+                        void* Ptr, cmsUInt32Number nItems)
+{
+
+    if (!_cmsWriteUInt16Number(io, 3)) return FALSE;    
+    return TRUE;
+}
+
+static
+cmsPluginMultiProcessElement MPEPluginSample = {
+
+    {cmsPluginMagicNumber, 2060, cmsPluginMultiProcessElementSig, NULL}, 
+
+    { (cmsTagTypeSignature) SigNegateType, Type_negate_Read, Type_negate_Write, NULL, NULL, NULL }
+};
+
+
+cmsInt32Number CheckMPEPlugin(void)
+{
+    cmsContext ctx = NULL;
+    cmsContext cpy = NULL;
+    cmsContext cpy2 = NULL;
+    cmsHPROFILE h = NULL;
+    cmsUInt32Number myTag = 1234;
+    cmsUInt32Number rc = 0;
+    char* data = NULL;
+    cmsUInt32Number clen = 0;
+    cmsFloat32Number In[3], Out[3];
+    cmsPipeline* pipe;
+
+    ctx = WatchDogContext(NULL);
+    cmsPluginTHR(ctx, &MPEPluginSample);
+
+    cpy =  DupContext(ctx, NULL);    
+    cpy2 = DupContext(cpy, NULL);
+    
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    
+    h = cmsCreateProfilePlaceholder(cpy2);
+    if (h == NULL) {
+        Fail("Create placeholder failed");
+        goto Error;
+    }
+    
+    pipe = cmsPipelineAlloc(cpy2, 3, 3);
+    cmsPipelineInsertStage(pipe, cmsAT_BEGIN, StageAllocNegate(cpy2));
+
+
+    In[0] = 0.3f; In[1] = 0.2f; In[2] = 0.9f;
+    cmsPipelineEvalFloat(In, Out, pipe);
+
+    rc = (IsGoodVal("0", Out[0], 1.0-In[0], 0.001) && 
+           IsGoodVal("1", Out[1], 1.0-In[1], 0.001) && 
+           IsGoodVal("2", Out[2], 1.0-In[2], 0.001));
+
+    if (!rc) {
+        Fail("Pipeline failed");
+        goto Error;    
+    }
+
+    if (!cmsWriteTag(h, cmsSigDToB3Tag, pipe)) {
+        Fail("Plug-in failed");
+        goto Error;
+    }
+
+    // This cleans the stage as well
+    cmsPipelineFree(pipe);
+
+    rc = cmsSaveProfileToMem(h, NULL, &clen);
+    if (!rc) {
+        Fail("Fetch mem size failed");
+        goto Error;        
+    }
+
+
+    data = (char*) malloc(clen);
+    if (data == NULL) {
+        Fail("malloc failed ?!?");
+        goto Error;
+    }
+
+
+    rc = cmsSaveProfileToMem(h, data, &clen);
+    if (!rc) {
+        Fail("Save to mem failed");
+        goto Error;
+    }
+
+    cmsCloseProfile(h);
+
+
+    cmsSetLogErrorHandler(NULL);
+    h = cmsOpenProfileFromMem(data, clen);    
+    if (h == NULL) {
+        Fail("Open profile failed");
+        goto Error;
+    } 
+
+    pipe = (cmsPipeline*) cmsReadTag(h, cmsSigDToB3Tag);
+    if (pipe != NULL) {
+
+        // Unsupported stage, should fail
+        Fail("read tag/context switching failed");
+        goto Error;
+    }
+
+    cmsCloseProfile(h);
+
+    ResetFatalError();
+
+    h = cmsOpenProfileFromMemTHR(cpy2, data, clen);    
+    if (h == NULL) {
+        Fail("Open profile from mem failed");
+        goto Error;
+    }
+
+    // Get rid of data
+    free(data); data = NULL;
+
+    pipe = (cmsPipeline*) cmsReadTag(h, cmsSigDToB3Tag);
+    if (pipe == NULL) {        
+        Fail("Read tag/conext switching failed (2)");
+        return 0;
+    }
+   
+    // Evaluate for negation
+    In[0] = 0.3f; In[1] = 0.2f; In[2] = 0.9f;
+    cmsPipelineEvalFloat(In, Out, pipe);
+
+     rc = (IsGoodVal("0", Out[0], 1.0-In[0], 0.001) && 
+           IsGoodVal("1", Out[1], 1.0-In[1], 0.001) && 
+           IsGoodVal("2", Out[2], 1.0-In[2], 0.001));
+        
+    cmsCloseProfile(h);
+
+    cmsDeleteContext(cpy2);
+
+    return rc;
+
+Error:
+
+    if (h != NULL) cmsCloseProfile(h);
+    if (ctx != NULL) cmsDeleteContext(ctx);
+    if (cpy != NULL) cmsDeleteContext(cpy);
+    if (cpy2 != NULL) cmsDeleteContext(cpy2);
+    if (data) free(data);
+
+    return 0;
+}
+
+
+// --------------------------------------------------------------------------------------------------
+// Optimization plugin check:
+// --------------------------------------------------------------------------------------------------
+
+static
+void FastEvaluateCurves(register const cmsUInt16Number In[],
+                                     register cmsUInt16Number Out[],
+                                     register const void* Data)
+{
+    Out[0] = In[0];
+}
+
+static
+cmsBool MyOptimize(cmsPipeline** Lut, 
+                   cmsUInt32Number  Intent, 
+                   cmsUInt32Number* InputFormat, 
+                   cmsUInt32Number* OutputFormat, 
+                   cmsUInt32Number* dwFlags)
+{
+    cmsStage* mpe;
+     _cmsStageToneCurvesData* Data;
+
+    //  Only curves in this LUT? All are identities?
+    for (mpe = cmsPipelineGetPtrToFirstStage(*Lut);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+
+            if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
+
+            // Check for identity
+            Data = (_cmsStageToneCurvesData*) cmsStageData(mpe);
+            if (Data ->nCurves != 1) return FALSE;
+            if (cmsEstimateGamma(Data->TheCurves[0], 0.1) > 1.0) return FALSE;
+          
+    }
+
+    *dwFlags |= cmsFLAGS_NOCACHE;
+    _cmsPipelineSetOptimizationParameters(*Lut, FastEvaluateCurves, NULL, NULL, NULL);
+
+    return TRUE;
+}
+
+cmsPluginOptimization OptimizationPluginSample = {
+
+    {cmsPluginMagicNumber, 2060, cmsPluginOptimizationSig, NULL}, 
+    MyOptimize
+};
+
+
+cmsInt32Number CheckOptimizationPlugin(void)
+{
+    cmsContext ctx = WatchDogContext(NULL);
+    cmsContext cpy;
+    cmsContext cpy2;
+    cmsHTRANSFORM xform;
+    cmsUInt8Number In[]= { 10, 20, 30, 40 };
+    cmsUInt8Number Out[4];
+    cmsToneCurve* Linear[1];
+    cmsHPROFILE h;
+    int i;
+    
+    cmsPluginTHR(ctx, &OptimizationPluginSample);
+
+    cpy = DupContext(ctx, NULL);
+    cpy2 = DupContext(cpy, NULL);
+    
+    Linear[0] = cmsBuildGamma(cpy2, 1.0);
+    h = cmsCreateLinearizationDeviceLinkTHR(cpy2, cmsSigGrayData, Linear);
+    cmsFreeToneCurve(Linear[0]);
+
+    xform = cmsCreateTransformTHR(cpy2, h, TYPE_GRAY_8, h, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+    cmsCloseProfile(h);
+
+    cmsDoTransform(xform, In, Out, 4);
+
+    cmsDeleteTransform(xform);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    cmsDeleteContext(cpy2);
+
+    for (i=0; i < 4; i++)
+        if (In[i] != Out[i]) return 0;
+
+    return 1;
+}
+
+
+// --------------------------------------------------------------------------------------------------
+// Check the intent plug-in
+// --------------------------------------------------------------------------------------------------
+
+/*
+   This example creates a new rendering intent, at intent number 300, that is identical to perceptual 
+   intent for all color spaces but gray to gray transforms, in this case it bypasses the data. 
+   Note that it has to clear all occurrences of intent 300 in the intents array to avoid 
+   infinite recursion.
+*/
+
+#define INTENT_DECEPTIVE   300
+
+static
+cmsPipeline*  MyNewIntent(cmsContext      ContextID, 
+                          cmsUInt32Number nProfiles,
+                          cmsUInt32Number TheIntents[], 
+                          cmsHPROFILE     hProfiles[], 
+                          cmsBool         BPC[],
+                          cmsFloat64Number AdaptationStates[],
+                          cmsUInt32Number dwFlags)
+{
+    cmsPipeline*    Result;
+    cmsUInt32Number ICCIntents[256];
+    cmsUInt32Number i;
+
+ for (i=0; i < nProfiles; i++) 
+        ICCIntents[i] = (TheIntents[i] == INTENT_DECEPTIVE) ? INTENT_PERCEPTUAL : 
+                                                 TheIntents[i];
+
+ if (cmsGetColorSpace(hProfiles[0]) != cmsSigGrayData ||
+     cmsGetColorSpace(hProfiles[nProfiles-1]) != cmsSigGrayData) 
+           return _cmsDefaultICCintents(ContextID, nProfiles, 
+                                   ICCIntents, hProfiles, 
+                                   BPC, AdaptationStates, 
+                                   dwFlags);
+
+    Result = cmsPipelineAlloc(ContextID, 1, 1);
+    if (Result == NULL) return NULL;
+
+    cmsPipelineInsertStage(Result, cmsAT_BEGIN,
+                            cmsStageAllocIdentity(ContextID, 1));
+
+    return Result;
+}
+
+static cmsPluginRenderingIntent IntentPluginSample = {
+
+    {cmsPluginMagicNumber, 2060, cmsPluginRenderingIntentSig, NULL},
+                     
+    INTENT_DECEPTIVE, MyNewIntent,  "bypass gray to gray rendering intent" 
+};
+
+cmsInt32Number CheckIntentPlugin(void)
+{
+    cmsContext ctx = WatchDogContext(NULL);
+    cmsContext cpy;
+    cmsContext cpy2;
+    cmsHTRANSFORM xform;
+    cmsHPROFILE h1, h2;
+    cmsToneCurve* Linear1;
+    cmsToneCurve* Linear2;
+    cmsUInt8Number In[]= { 10, 20, 30, 40 };
+    cmsUInt8Number Out[4];
+    int i;
+    
+    cmsPluginTHR(ctx, &IntentPluginSample);
+
+    cpy  = DupContext(ctx, NULL);    
+    cpy2 = DupContext(cpy, NULL);
+    
+    Linear1 = cmsBuildGamma(cpy2, 3.0);
+    Linear2 = cmsBuildGamma(cpy2, 0.1);
+    h1 = cmsCreateLinearizationDeviceLinkTHR(cpy2, cmsSigGrayData, &Linear1);
+    h2 = cmsCreateLinearizationDeviceLinkTHR(cpy2, cmsSigGrayData, &Linear2);
+
+    cmsFreeToneCurve(Linear1);
+    cmsFreeToneCurve(Linear2);
+
+    xform = cmsCreateTransformTHR(cpy2, h1, TYPE_GRAY_8, h2, TYPE_GRAY_8, INTENT_DECEPTIVE, 0);
+    cmsCloseProfile(h1); cmsCloseProfile(h2);
+
+    cmsDoTransform(xform, In, Out, 4);
+
+    cmsDeleteTransform(xform);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    cmsDeleteContext(cpy2);
+
+    for (i=0; i < 4; i++)
+        if (Out[i] != In[i]) return 0;
+
+    return 1;    
+}
+
+
+// --------------------------------------------------------------------------------------------------
+// Check the full transform plug-in
+// --------------------------------------------------------------------------------------------------
+
+// This is a sample intent that only works for gray8 as output, and always returns '42'
+static
+void TrancendentalTransform(struct _cmstransform_struct * CMM,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size,
+                              cmsUInt32Number Stride)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Size; i++)
+    {
+        ((cmsUInt8Number*) OutputBuffer)[i] = 0x42;
+    }
+
+}
+
+
+cmsBool  TransformFactory(_cmsTransformFn* xformPtr,
+                          void** UserData,
+                           _cmsFreeUserDataFn* FreePrivateDataFn,
+                           cmsPipeline** Lut,
+                           cmsUInt32Number* InputFormat,
+                           cmsUInt32Number* OutputFormat,
+                           cmsUInt32Number* dwFlags)
+
+{
+    if (*OutputFormat == TYPE_GRAY_8)
+    {
+        // *Lut holds the pipeline to be applied
+        *xformPtr = TrancendentalTransform;
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+// The Plug-in entry point
+static cmsPluginTransform FullTransformPluginSample = {
+                           
+     { cmsPluginMagicNumber, 2060, cmsPluginTransformSig, NULL}, 
+
+     TransformFactory                          
+};
+
+cmsInt32Number CheckTransformPlugin(void)
+{
+    cmsContext ctx = WatchDogContext(NULL);
+    cmsContext cpy;
+    cmsContext cpy2;
+    cmsHTRANSFORM xform;
+    cmsUInt8Number In[]= { 10, 20, 30, 40 };
+    cmsUInt8Number Out[4];
+    cmsToneCurve* Linear;
+    cmsHPROFILE h;
+    int i;
+
+    
+    cmsPluginTHR(ctx, &FullTransformPluginSample);
+
+    cpy  = DupContext(ctx, NULL);
+    cpy2 = DupContext(cpy, NULL);
+    
+    Linear = cmsBuildGamma(cpy2, 1.0);
+    h = cmsCreateLinearizationDeviceLinkTHR(cpy2, cmsSigGrayData, &Linear);
+    cmsFreeToneCurve(Linear);
+
+    xform = cmsCreateTransformTHR(cpy2, h, TYPE_GRAY_8, h, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+    cmsCloseProfile(h);
+
+    cmsDoTransform(xform, In, Out, 4);
+
+
+    cmsDeleteTransform(xform);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    cmsDeleteContext(cpy2);
+
+    for (i=0; i < 4; i++)
+        if (Out[i] != 0x42) return 0;
+
+    return 1;
+}
+
+
+// --------------------------------------------------------------------------------------------------
+// Check the mutex plug-in
+// --------------------------------------------------------------------------------------------------
+
+typedef struct {
+    int nlocks;
+} MyMtx;
+
+
+static
+void* MyMtxCreate(cmsContext id)
+{
+   MyMtx* mtx = (MyMtx*) _cmsMalloc(id, sizeof(MyMtx));
+   mtx ->nlocks = 0;
+   return mtx;
+}
+
+static
+void MyMtxDestroy(cmsContext id, void* mtx)
+{
+    MyMtx* mtx_ = (MyMtx*) mtx;
+
+    if (mtx_->nlocks != 0)
+        Die("Locks != 0 when setting free a mutex");
+
+    _cmsFree(id, mtx);
+
+}
+
+static
+cmsBool MyMtxLock(cmsContext id, void* mtx)
+{
+    MyMtx* mtx_ = (MyMtx*) mtx;
+    mtx_->nlocks++;
+
+    return TRUE;
+}
+
+static
+void MyMtxUnlock(cmsContext id, void* mtx)
+{
+    MyMtx* mtx_ = (MyMtx*) mtx;
+    mtx_->nlocks--;
+
+}
+
+
+static cmsPluginMutex MutexPluginSample = {
+                           
+     { cmsPluginMagicNumber, 2060, cmsPluginMutexSig, NULL}, 
+
+     MyMtxCreate,  MyMtxDestroy,  MyMtxLock,  MyMtxUnlock                       
+};
+
+
+cmsInt32Number CheckMutexPlugin(void)
+{
+    cmsContext ctx = WatchDogContext(NULL);
+    cmsContext cpy;
+    cmsContext cpy2;
+    cmsHTRANSFORM xform;
+    cmsUInt8Number In[]= { 10, 20, 30, 40 };
+    cmsUInt8Number Out[4];
+    cmsToneCurve* Linear;
+    cmsHPROFILE h;
+    int i;
+
+    
+    cmsPluginTHR(ctx, &MutexPluginSample);
+
+    cpy  = DupContext(ctx, NULL);
+    cpy2 = DupContext(cpy, NULL);
+    
+    Linear = cmsBuildGamma(cpy2, 1.0);
+    h = cmsCreateLinearizationDeviceLinkTHR(cpy2, cmsSigGrayData, &Linear);
+    cmsFreeToneCurve(Linear);
+
+    xform = cmsCreateTransformTHR(cpy2, h, TYPE_GRAY_8, h, TYPE_GRAY_8, INTENT_PERCEPTUAL, 0);
+    cmsCloseProfile(h);
+
+    cmsDoTransform(xform, In, Out, 4);
+
+
+    cmsDeleteTransform(xform);
+    cmsDeleteContext(ctx);
+    cmsDeleteContext(cpy);
+    cmsDeleteContext(cpy2);
+
+    for (i=0; i < 4; i++)
+        if (Out[i] != In[i]) return 0;
+
+    return 1;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/testthread.cpp b/third-party/libjxl/libjxl/third_party/lcms/testbed/testthread.cpp
new file mode 100644
index 0000000000..b932761202
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/testthread.cpp
@@ -0,0 +1,120 @@
+
+#include <windows.h>
+#include "lcms2_plugin.h"
+
+static cmsContext ctx;
+static cmsHPROFILE prof_cmyk, prof_rgb;
+static volatile int rc = 0;
+
+
+static
+void* MyMtxCreate(cmsContext id)
+{
+   return (void*) CreateMutex( NULL, FALSE, NULL);   
+}
+
+static
+void MyMtxDestroy(cmsContext id, void* mtx)
+{
+    CloseHandle((HANDLE) mtx);
+}
+
+static
+cmsBool MyMtxLock(cmsContext id, void* mtx)
+{
+    WaitForSingleObject((HANDLE) mtx, INFINITE);
+    return TRUE;
+}
+
+static
+void MyMtxUnlock(cmsContext id, void* mtx)
+{
+    ReleaseMutex((HANDLE) mtx);
+}
+
+
+static cmsPluginMutex MutexPluginSample = {
+                           
+     { cmsPluginMagicNumber, 2060, cmsPluginMutexSig, NULL}, 
+
+     MyMtxCreate,  MyMtxDestroy,  MyMtxLock,  MyMtxUnlock                       
+};
+
+
+static DWORD WINAPI one_thread(LPVOID lpParameter)
+{
+    int i, j;
+    cmsUInt8Number rgb[3*1000];
+    cmsUInt8Number cmyk[4*1000];
+
+    Sleep(rand() % 500 );
+    cmsHTRANSFORM xform = cmsCreateTransformTHR(ctx, prof_rgb, TYPE_RGB_8, prof_cmyk, TYPE_CMYK_8, 0, 0);
+
+    for (i=0; i < 100000; i++) {
+
+        for (j=0; j < 1000; j++) 
+        {
+            rgb[j * 3    ] = 189;
+            rgb[j * 3 + 1] = 100;
+            rgb[j * 3 + 2] = 75;
+        }
+        cmsDoTransform(xform, rgb, cmyk, 1000);
+        for (j=0; j < 1000; j++) 
+        {
+            if (cmyk[j * 4 ] != 37 ||
+                cmyk[j * 4 + 1 ] != 188 ||
+                cmyk[j * 4 + 2 ] != 195 ||
+                cmyk[j * 4 + 3 ] != 7) 
+            {
+                OutputDebugString(L"ERROR\n"); 
+                rc = 1;
+            }
+
+        }
+
+    }
+        
+    cmsDeleteTransform(xform);
+
+    return 0;
+}
+
+int WINAPI WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,LPSTR lpCmdLine,int nCmdShow)
+{
+    int i;
+    cmsContext ctx;
+
+    OutputDebugString(L"Test in progress...\n"); 
+
+    ctx = cmsCreateContext(NULL, 0);
+
+    prof_cmyk = cmsOpenProfileFromFileTHR(ctx, "USWebCoatedSWOP.icc", "r");
+    prof_rgb = cmsOpenProfileFromFileTHR(ctx, "AdobeRGB1998.icc","r");
+   
+
+#define NWORKERS 10
+
+    HANDLE workers[NWORKERS];
+
+
+    for (int i=0; i<NWORKERS; ++i)
+    {
+        DWORD threadid;
+
+        workers[i] = CreateThread(NULL,0,one_thread,NULL,0,&threadid);
+    }
+
+    WaitForMultipleObjects(NWORKERS,workers,TRUE,INFINITE);
+
+    for ( i=0;i<NWORKERS;++i)
+        CloseHandle(workers[i]);
+
+
+    cmsCloseProfile(prof_rgb);
+    cmsCloseProfile(prof_cmyk);
+    cmsDeleteContext(ctx);
+
+    OutputDebugString(L"Test Done\n"); 
+
+    return rc;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/toosmall.icc b/third-party/libjxl/libjxl/third_party/lcms/testbed/toosmall.icc
new file mode 100755
index 0000000000..15e5e49784
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/testbed/toosmall.icc differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/testbed/zoo_icc.c b/third-party/libjxl/libjxl/third_party/lcms/testbed/zoo_icc.c
new file mode 100755
index 0000000000..f68861c2c8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/testbed/zoo_icc.c
@@ -0,0 +1,310 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+
+#include "testcms2.h"
+
+
+// ZOO checks ------------------------------------------------------------------------------------------------------------
+
+
+#ifdef CMS_IS_WINDOWS_
+
+static char ZOOfolder[cmsMAX_PATH] = "c:\\colormaps\\";
+static char ZOOwrite[cmsMAX_PATH]  = "c:\\colormaps\\write\\";
+static char ZOORawWrite[cmsMAX_PATH]  = "c:\\colormaps\\rawwrite\\";
+
+
+// Read all tags on a profile given by its handle
+static
+void ReadAllTags(cmsHPROFILE h)
+{
+    cmsInt32Number i, n;
+    cmsTagSignature sig;
+
+    n = cmsGetTagCount(h);
+    for (i=0; i < n; i++) {
+
+        sig = cmsGetTagSignature(h, i);
+        if (cmsReadTag(h, sig) == NULL) return;
+    }
+}
+
+
+// Read all tags on a profile given by its handle
+static
+void ReadAllRAWTags(cmsHPROFILE h)
+{
+    cmsInt32Number i, n;
+    cmsTagSignature sig;
+    cmsInt32Number len;
+
+    n = cmsGetTagCount(h);
+    for (i=0; i < n; i++) {
+
+        sig = cmsGetTagSignature(h, i);
+        len = cmsReadRawTag(h, sig, NULL, 0);
+    }
+}
+
+
+static
+void PrintInfo(cmsHPROFILE h, cmsInfoType Info)
+{
+    wchar_t* text;
+    cmsInt32Number len;
+    cmsContext id = 0;
+
+    len = cmsGetProfileInfo(h, Info, "en", "US", NULL, 0);
+    if (len == 0) return;
+
+    text = _cmsMalloc(id, len);
+    cmsGetProfileInfo(h, Info, "en", "US", text, len);
+
+    wprintf(L"%s\n", text);
+    _cmsFree(id, text);
+}
+
+
+static
+void PrintAllInfos(cmsHPROFILE h)
+{
+     PrintInfo(h, cmsInfoDescription);
+     PrintInfo(h, cmsInfoManufacturer);
+     PrintInfo(h, cmsInfoModel);
+     PrintInfo(h, cmsInfoCopyright);
+     printf("\n\n");
+}
+
+static
+void ReadAllLUTS(cmsHPROFILE h)
+{
+    cmsPipeline* a;
+    cmsCIEXYZ Black;
+
+    a = _cmsReadInputLUT(h, INTENT_PERCEPTUAL);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadInputLUT(h, INTENT_RELATIVE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadInputLUT(h, INTENT_SATURATION);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadInputLUT(h, INTENT_ABSOLUTE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+
+    a = _cmsReadOutputLUT(h, INTENT_PERCEPTUAL);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadOutputLUT(h, INTENT_RELATIVE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadOutputLUT(h, INTENT_SATURATION);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadOutputLUT(h, INTENT_ABSOLUTE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+
+    a = _cmsReadDevicelinkLUT(h, INTENT_PERCEPTUAL);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadDevicelinkLUT(h, INTENT_RELATIVE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadDevicelinkLUT(h, INTENT_SATURATION);
+    if (a) cmsPipelineFree(a);
+
+    a = _cmsReadDevicelinkLUT(h, INTENT_ABSOLUTE_COLORIMETRIC);
+    if (a) cmsPipelineFree(a);
+
+
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_PERCEPTUAL, 0);
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_SATURATION, 0);
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_ABSOLUTE_COLORIMETRIC, 0);
+    cmsDetectTAC(h);
+}
+
+// Check one specimen in the ZOO
+
+static
+cmsInt32Number CheckSingleSpecimen(const char* Profile)
+{
+    char BuffSrc[256];
+    char BuffDst[256];
+    cmsHPROFILE h;
+
+    sprintf(BuffSrc, "%s%s", ZOOfolder, Profile);
+    sprintf(BuffDst, "%s%s", ZOOwrite,  Profile);
+
+    h = cmsOpenProfileFromFile(BuffSrc, "r");
+    if (h == NULL) return 0;
+
+    printf("%s\n", Profile);
+
+    PrintAllInfos(h);  
+    ReadAllTags(h);    
+    ReadAllLUTS(h);
+ // ReadAllRAWTags(h);
+
+
+    cmsSaveProfileToFile(h, BuffDst);
+    cmsCloseProfile(h);
+
+    h = cmsOpenProfileFromFile(BuffDst, "r");
+    if (h == NULL) return 0;
+    ReadAllTags(h);
+
+
+    cmsCloseProfile(h);
+
+    return 1;
+}
+
+static
+cmsInt32Number CheckRAWSpecimen(const char* Profile)
+{
+    char BuffSrc[256];
+    char BuffDst[256];
+    cmsHPROFILE h;
+
+    sprintf(BuffSrc, "%s%s", ZOOfolder, Profile);
+    sprintf(BuffDst, "%s%s", ZOORawWrite,  Profile);
+
+    h = cmsOpenProfileFromFile(BuffSrc, "r");
+    if (h == NULL) return 0;
+
+    ReadAllTags(h);
+    ReadAllRAWTags(h);
+    cmsSaveProfileToFile(h, BuffDst);
+    cmsCloseProfile(h);
+
+    h = cmsOpenProfileFromFile(BuffDst, "r");
+    if (h == NULL) return 0;
+    ReadAllTags(h);
+    cmsCloseProfile(h);
+
+    return 1;
+}
+
+
+static int input = 0, 
+           disp = 0,
+           output = 0,
+           link = 0,
+           abst = 0,
+           color = 0, 
+           named = 0;
+
+static int rgb = 0,
+           cmyk = 0,
+           gray = 0,
+           other = 0;
+
+
+
+static
+int count_stats(const char* Profile)
+{
+    char BuffSrc[256];
+    cmsHPROFILE h;
+    cmsCIEXYZ Black;
+
+    sprintf(BuffSrc, "%s%s", ZOOfolder, Profile);
+   
+    h = cmsOpenProfileFromFile(BuffSrc, "r");
+    if (h == NULL) return 0;
+
+  
+    switch (cmsGetDeviceClass(h)) { 
+
+    case cmsSigInputClass        : input++; break;
+    case cmsSigDisplayClass      : disp++; break;
+    case cmsSigOutputClass       : output++; break;
+    case cmsSigLinkClass         : link++;  break;
+    case cmsSigAbstractClass     : abst++; break;
+    case cmsSigColorSpaceClass   : color++; break;
+    case cmsSigNamedColorClass   : named ++; break;
+    }
+
+
+    switch (cmsGetColorSpace(h)) {
+
+    case cmsSigRgbData: rgb++; break;
+    case cmsSigCmykData: cmyk++; break;
+    case cmsSigGrayData: gray++; break;
+    default: other++;
+    }
+
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_PERCEPTUAL, 0);
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_RELATIVE_COLORIMETRIC, 0);
+    cmsDetectDestinationBlackPoint(&Black, h, INTENT_SATURATION, 0);
+   
+    cmsCloseProfile(h);
+   
+    return 1;
+}
+
+
+
+void CheckProfileZOO(void)
+{
+
+    struct _finddata_t c_file;
+    intptr_t hFile;
+
+    cmsSetLogErrorHandler(NULL);
+
+    if ( (hFile = _findfirst("c:\\colormaps\\*.*", &c_file)) == -1L )
+        printf("No files in current directory");
+    else
+    {
+        do
+        {
+            if (strcmp(c_file.name, ".") != 0 &&
+                strcmp(c_file.name, "..") != 0) {
+
+                    CheckSingleSpecimen( c_file.name);
+                    CheckRAWSpecimen( c_file.name);
+
+                    count_stats(c_file.name);
+
+                    TestMemoryLeaks(FALSE);
+
+            }
+
+        } while ( _findnext(hFile, &c_file) == 0 );
+
+        _findclose(hFile);
+    }
+
+     ResetFatalError();
+}
+
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/common/utils.h b/third-party/libjxl/libjxl/third_party/lcms/utils/common/utils.h
new file mode 100644
index 0000000000..6cd459df95
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/common/utils.h
@@ -0,0 +1,103 @@
+
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#ifndef _lcms_utils_h
+
+// Deal with Microsoft's attempt at deprecating C standard runtime functions
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#      ifndef _CRT_SECURE_NO_DEPRECATE
+#        define _CRT_SECURE_NO_DEPRECATE
+#      endif
+#      ifndef _CRT_SECURE_NO_WARNINGS
+#        define _CRT_SECURE_NO_WARNINGS
+#      endif
+#    endif
+#endif
+
+#include "lcms2.h"
+
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include <wchar.h>
+
+// Avoid warnings
+
+#define UTILS_UNUSED_PARAMETER(x) ((void)x)
+
+// Init the utility functions
+
+void InitUtils(const char* PName);
+
+// Fatal Error (print the message and exit(1))---------------------------------------------
+
+extern int Verbose;
+
+void FatalError(const char *frm, ...);
+
+// xgetopt() interface -------------------------------------------------------------
+
+extern int   xoptind;
+extern char *xoptarg;
+extern int   xopterr;
+extern char  SW;
+
+int xgetopt(int argc, char *argv[], char *optionS);
+
+// The stock profile utility -------------------------------------------------------
+
+cmsHPROFILE OpenStockProfile(cmsContext ContextID, const char* File);
+
+// The print info utility ----------------------------------------------------------
+
+void PrintProfileInformation(cmsHPROFILE h);
+
+// ---------------------------------------------------------------------------------
+
+void PrintRenderingIntents(void);
+void PrintBuiltins(void);
+
+// ---------------------------------------------------------------------------------
+
+cmsBool SaveMemoryBlock(const cmsUInt8Number* Buffer, cmsUInt32Number dwLen, const char* Filename);
+
+// ---------------------------------------------------------------------------------
+
+// Return a pixel type on depending on the number of channels
+int PixelTypeFromChanCount(int ColorChannels);
+
+// ------------------------------------------------------------------------------
+
+// Return number of channels of pixel type
+int ChanCountFromPixelType(int ColorChannels);
+
+#define _lcms_utils_h
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/common/vprf.c b/third-party/libjxl/libjxl/third_party/lcms/utils/common/vprf.c
new file mode 100644
index 0000000000..5ddcfe4274
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/common/vprf.c
@@ -0,0 +1,336 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+
+
+int Verbose = 0;
+
+static char ProgramName[256] = "";
+
+void FatalError(const char *frm, ...)
+{
+    va_list args;
+
+    va_start(args, frm);
+    fprintf(stderr, "[%s fatal error]: ", ProgramName);
+    vfprintf(stderr, frm, args);
+    fprintf(stderr, "\n");
+    va_end(args);
+
+    exit(1);
+}
+
+// Show errors to the end user (unless quiet option)
+static
+void MyErrorLogHandler(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    if (Verbose >= 0)
+        fprintf(stderr, "[%s]: %s\n", ProgramName, Text);
+
+    UTILS_UNUSED_PARAMETER(ErrorCode);
+    UTILS_UNUSED_PARAMETER(ContextID);
+}
+
+
+void InitUtils(const char* PName)
+{
+      strncpy(ProgramName, PName, sizeof(ProgramName));
+      ProgramName[sizeof(ProgramName)-1] = 0;
+
+      cmsSetLogErrorHandler(MyErrorLogHandler);
+}
+
+
+// Virtual profiles are handled here.
+cmsHPROFILE OpenStockProfile(cmsContext ContextID, const char* File)
+{
+       if (!File)
+            return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Lab2") == 0)
+                return cmsCreateLab2ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab4") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*LabD65") == 0) {
+
+           cmsCIExyY D65xyY;
+
+           cmsWhitePointFromTemp( &D65xyY, 6504);
+           return cmsCreateLab4ProfileTHR(ContextID, &D65xyY);
+       }
+
+       if (cmsstrcasecmp(File, "*XYZ") == 0)
+                return cmsCreateXYZProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Gray22") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 2.2);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+        if (cmsstrcasecmp(File, "*Gray30") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 3.0);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+       if (cmsstrcasecmp(File, "*srgb") == 0)
+                return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*null") == 0)
+                return cmsCreateNULLProfileTHR(ContextID);
+
+
+       if (cmsstrcasecmp(File, "*Lin2222") == 0) {
+
+            cmsToneCurve*  Gamma = cmsBuildGamma(0, 2.2);
+            cmsToneCurve*  Gamma4[4];
+            cmsHPROFILE hProfile;
+
+            Gamma4[0] = Gamma4[1] = Gamma4[2] = Gamma4[3] = Gamma;
+            hProfile = cmsCreateLinearizationDeviceLink(cmsSigCmykData, Gamma4);
+            cmsFreeToneCurve(Gamma);
+            return hProfile;
+       }
+
+
+        return cmsOpenProfileFromFileTHR(ContextID, File, "r");
+}
+
+// Help on available built-ins
+void PrintBuiltins(void)
+{
+     fprintf(stderr, "\nBuilt-in profiles:\n\n");
+     fprintf(stderr, "\t*Lab2  -- D50-based v2 CIEL*a*b\n"
+                     "\t*Lab4  -- D50-based v4 CIEL*a*b\n"
+                     "\t*Lab   -- D50-based v4 CIEL*a*b\n"
+                     "\t*XYZ   -- CIE XYZ (PCS)\n"
+                     "\t*sRGB  -- sRGB color space\n"
+                     "\t*Gray22 - Monochrome of Gamma 2.2\n"
+                     "\t*Gray30 - Monochrome of Gamma 3.0\n"
+                     "\t*null   - Monochrome black for all input\n"
+                     "\t*Lin2222- CMYK linearization of gamma 2.2 on each channel\n");
+}
+
+
+// Auxiliary for printing information on profile
+static
+void PrintInfo(cmsHPROFILE h, cmsInfoType Info)
+{
+    char* text;
+    int len;
+
+    len = cmsGetProfileInfoASCII(h, Info, "en", "US", NULL, 0);
+    if (len == 0) return;
+
+    text = (char*) malloc(len * sizeof(char));
+    if (text == NULL) return;
+
+    cmsGetProfileInfoASCII(h, Info, "en", "US", text, len);
+
+    if (strlen(text) > 0)
+        printf("%s\n", text);
+
+    free(text);
+}
+
+
+
+// Displays the colorant table
+static
+void PrintColorantTable(cmsHPROFILE hInput, cmsTagSignature Sig, const char* Title)
+{
+    cmsNAMEDCOLORLIST* list;
+    int i, n;
+
+    if (cmsIsTag(hInput, Sig)) {
+
+        printf("%s:\n", Title);
+
+        list = (cmsNAMEDCOLORLIST*) cmsReadTag(hInput, Sig);
+        if (list == NULL) {
+            printf("(Unavailable)\n");
+            return;
+        }
+
+        n = cmsNamedColorCount(list);
+        for (i=0; i < n; i++) {
+
+            char Name[cmsMAX_PATH];
+
+            cmsNamedColorInfo(list, i, Name, NULL, NULL, NULL, NULL);
+            printf("\t%s\n", Name);
+        }
+
+        printf("\n");
+    }
+
+}
+
+
+void PrintProfileInformation(cmsHPROFILE hInput)
+{
+    if (hInput == NULL) {
+			fprintf(stderr, "*Wrong or corrupted profile*\n");
+            return;
+    }
+
+    PrintInfo(hInput, cmsInfoDescription);
+    PrintInfo(hInput, cmsInfoManufacturer);
+    PrintInfo(hInput, cmsInfoModel);
+    PrintInfo(hInput, cmsInfoCopyright);
+
+    if (Verbose > 2) {
+
+        PrintColorantTable(hInput, cmsSigColorantTableTag,    "Input colorant table");
+        PrintColorantTable(hInput, cmsSigColorantTableOutTag, "Input colorant out table");
+    }
+
+    printf("\n");
+}
+
+// -----------------------------------------------------------------------------
+
+
+void PrintRenderingIntents(void)
+{
+    cmsUInt32Number Codes[200];
+    char* Descriptions[200];
+    cmsUInt32Number n, i;
+
+    fprintf(stderr, "%ct<n> rendering intent:\n\n", SW);
+
+    n = cmsGetSupportedIntents(200, Codes, Descriptions);
+
+    for (i=0; i < n; i++) {
+        fprintf(stderr, "\t%u - %s\n", Codes[i], Descriptions[i]);
+    }
+    fprintf(stderr, "\n");
+}
+
+
+
+// ------------------------------------------------------------------------------
+
+cmsBool SaveMemoryBlock(const cmsUInt8Number* Buffer, cmsUInt32Number dwLen, const char* Filename)
+{
+    FILE* out = fopen(Filename, "wb");
+    if (out == NULL) {
+        FatalError("Cannot create '%s'", Filename);
+        return FALSE;
+    }
+
+    if (fwrite(Buffer, 1, dwLen, out) != dwLen) {
+        FatalError("Cannot write %ld bytes to %s", dwLen, Filename);
+        return FALSE;
+    }
+
+    if (fclose(out) != 0) {
+        FatalError("Error flushing file '%s'", Filename);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ------------------------------------------------------------------------------
+
+// Return a pixel type on depending on the number of channels
+int PixelTypeFromChanCount(int ColorChannels)
+{
+    switch (ColorChannels) {
+
+        case 1: return PT_GRAY;
+        case 2: return PT_MCH2;
+        case 3: return PT_MCH3;
+        case 4: return PT_CMYK;
+        case 5: return PT_MCH5;
+        case 6: return PT_MCH6;
+        case 7: return PT_MCH7;
+        case 8: return PT_MCH8;
+        case 9: return PT_MCH9;
+        case 10: return PT_MCH10;
+        case 11: return PT_MCH11;
+        case 12: return PT_MCH12;
+        case 13: return PT_MCH13;
+        case 14: return PT_MCH14;
+        case 15: return PT_MCH15;
+
+        default:
+
+            FatalError("What a weird separation of %d channels?!?!", ColorChannels);
+            return -1;
+    }
+}
+
+
+// ------------------------------------------------------------------------------
+
+// Return number of channels of pixel type
+int ChanCountFromPixelType(int ColorChannels)
+{
+    switch (ColorChannels) {
+
+      case PT_GRAY: return 1;
+
+      case PT_RGB:
+      case PT_CMY:
+      case PT_Lab:
+      case PT_YUV:
+      case PT_YCbCr: return 3;
+
+      case PT_CMYK: return 4 ;
+      case PT_MCH2: return 2 ;
+      case PT_MCH3: return 3 ;
+      case PT_MCH4: return 4 ;
+      case PT_MCH5: return 5 ;
+      case PT_MCH6: return 6 ;
+      case PT_MCH7: return 7 ;
+      case PT_MCH8: return 8 ;
+      case PT_MCH9: return 9 ;
+      case PT_MCH10: return 10;
+      case PT_MCH11: return 11;
+      case PT_MCH12: return 12;
+      case PT_MCH13: return 12;
+      case PT_MCH14: return 14;
+      case PT_MCH15: return 15;
+
+      default:
+
+          FatalError("Unsupported color space of %d channels", ColorChannels);
+          return -1;
+    }
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/common/xgetopt.c b/third-party/libjxl/libjxl/third_party/lcms/utils/common/xgetopt.c
new file mode 100644
index 0000000000..7f3dc548a7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/common/xgetopt.c
@@ -0,0 +1,75 @@
+/*
+    getopt.c
+
+*/
+
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+
+int     xoptind = 1;    /* index of which argument is next  */
+char   *xoptarg;        /* pointer to argument of current option */
+int     xopterr = 0;    /* allow error message  */
+
+static  char   *letP = NULL;    /* remember next option char's location */
+char    SW = '-';				/* DOS switch character, either '-' or '/' */
+
+/*
+  Parse the command line options, System V style.
+
+  Standard option syntax is:
+
+    option ::= SW [optLetter]* [argLetter space* argument]
+
+*/
+
+int xgetopt(int argc, char *argv[], char *optionS)
+{
+    unsigned char ch;
+    char *optP;
+
+    if (SW == 0) {
+        SW = '/';
+    }
+
+    if (argc > xoptind) {
+        if (letP == NULL) {
+            if ((letP = argv[xoptind]) == NULL ||
+                *(letP++) != SW)  goto gopEOF;
+            if (*letP == SW) {
+                xoptind++;  goto gopEOF;
+            }
+        }
+        if (0 == (ch = *(letP++))) {
+            xoptind++;  goto gopEOF;
+        }
+        if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
+            goto gopError;
+        if (':' == *(++optP)) {
+            xoptind++;
+            if (0 == *letP) {
+                if (argc <= xoptind)  goto  gopError;
+                letP = argv[xoptind++];
+            }
+            xoptarg = letP;
+            letP = NULL;
+        } else {
+            if (0 == *letP) {
+                xoptind++;
+                letP = NULL;
+            }
+            xoptarg = NULL;
+        }
+        return ch;
+    }
+gopEOF:
+    xoptarg = letP = NULL;
+    return EOF;
+
+gopError:
+    xoptarg = NULL;
+    errno  = EINVAL;
+    if (xopterr)
+        perror ("get command line option");
+    return ('?');
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dpr b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dpr
new file mode 100644
index 0000000000..9180c04fea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dpr
@@ -0,0 +1,13 @@
+program delphidemo;
+
+uses
+  Forms,
+  demo1 in 'demo1.pas' {Form1};
+
+{$R *.RES}
+
+begin
+  Application.Initialize;
+  Application.CreateForm(TForm1, Form1);
+  Application.Run;
+end.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dproj b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dproj
new file mode 100644
index 0000000000..25b97d8282
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.dproj
@@ -0,0 +1,114 @@
+﻿	<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+		<PropertyGroup>
+			<ProjectGuid>{E3F889E8-CB8A-49AE-8173-4DDA022466BE}</ProjectGuid>
+			<MainSource>delphidemo.dpr</MainSource>
+			<Config Condition="'$(Config)'==''">Debug</Config>
+			<DCC_DCCCompiler>DCC32</DCC_DCCCompiler>
+			<ProjectVersion>12.0</ProjectVersion>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Base' or '$(Base)'!=''">
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Release' or '$(Cfg_1)'!=''">
+			<Cfg_1>true</Cfg_1>
+			<CfgParent>Base</CfgParent>
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Debug' or '$(Cfg_2)'!=''">
+			<Cfg_2>true</Cfg_2>
+			<CfgParent>Base</CfgParent>
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Base)'!=''">
+			<DCC_UsePackage>vcl;rtl;vclx;vclimg;vclactnband;dbrtl;vcldb;vcldbx;bdertl;vcltouch;xmlrtl;dsnap;dsnapcon;TeeUI;TeeDB;Tee;vclib;ibxpress;adortl;IndyCore;IndySystem;IndyProtocols;inet;intrawebdb_100_140;Intraweb_100_140;VclSmp;vclie;websnap;webdsnap;inetdb;inetdbbde;inetdbxpress;soaprtl;vclribbon;dbexpress;DbxCommonDriver;DataSnapIndy10ServerTransport;DataSnapProviderClient;DbxClientDriver;DataSnapServer;DBXInterBaseDriver;DBXMySQLDriver;dbxcds;DBXFirebirdDriver;DBXSybaseASEDriver;DBXSybaseASADriver;DBXOracleDriver;DBXMSSQLDriver;DBXInformixDriver;DBXDb2Driver;Rave77VCL</DCC_UsePackage>
+			<DCC_ImageBase>00400000</DCC_ImageBase>
+			<DCC_SymbolReferenceInfo>1</DCC_SymbolReferenceInfo>
+			<DCC_DependencyCheckOutputName>delphidemo.exe</DCC_DependencyCheckOutputName>
+			<DCC_UnitAlias>WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;WinTypes=Windows;WinProcs=Windows;$(DCC_UnitAlias)</DCC_UnitAlias>
+			<DCC_Platform>x86</DCC_Platform>
+			<DCC_N>true</DCC_N>
+			<DCC_S>false</DCC_S>
+			<DCC_K>false</DCC_K>
+			<DCC_E>false</DCC_E>
+			<DCC_F>false</DCC_F>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Cfg_1)'!=''">
+			<DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
+			<DCC_Define>RELEASE;$(DCC_Define)</DCC_Define>
+			<DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
+			<DCC_DebugInformation>false</DCC_DebugInformation>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Cfg_2)'!=''">
+			<DCC_Define>DEBUG;$(DCC_Define)</DCC_Define>
+		</PropertyGroup>
+		<ItemGroup>
+			<DelphiCompile Include="delphidemo.dpr">
+				<MainSource>MainSource</MainSource>
+			</DelphiCompile>
+			<DCCReference Include="demo1.pas">
+				<Form>Form1</Form>
+			</DCCReference>
+			<BuildConfiguration Include="Base">
+				<Key>Base</Key>
+			</BuildConfiguration>
+			<BuildConfiguration Include="Debug">
+				<Key>Cfg_2</Key>
+				<CfgParent>Base</CfgParent>
+			</BuildConfiguration>
+			<BuildConfiguration Include="Release">
+				<Key>Cfg_1</Key>
+				<CfgParent>Base</CfgParent>
+			</BuildConfiguration>
+		</ItemGroup>
+		<Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/>
+		<ProjectExtensions>
+			<Borland.Personality>Delphi.Personality.12</Borland.Personality>
+			<Borland.ProjectType>VCLApplication</Borland.ProjectType>
+			<BorlandProject>
+				<Delphi.Personality>
+					<Source>
+						<Source Name="MainSource">delphidemo.dpr</Source>
+					</Source>
+					<Parameters>
+						<Parameters Name="UseLauncher">False</Parameters>
+						<Parameters Name="DebugCWD">d:\lcms-1.13\delphi</Parameters>
+						<Parameters Name="LoadAllSymbols">True</Parameters>
+						<Parameters Name="LoadUnspecifiedSymbols">False</Parameters>
+					</Parameters>
+					<VersionInfo>
+						<VersionInfo Name="IncludeVerInfo">False</VersionInfo>
+						<VersionInfo Name="AutoIncBuild">False</VersionInfo>
+						<VersionInfo Name="MajorVer">1</VersionInfo>
+						<VersionInfo Name="MinorVer">0</VersionInfo>
+						<VersionInfo Name="Release">0</VersionInfo>
+						<VersionInfo Name="Build">0</VersionInfo>
+						<VersionInfo Name="Debug">False</VersionInfo>
+						<VersionInfo Name="PreRelease">False</VersionInfo>
+						<VersionInfo Name="Special">False</VersionInfo>
+						<VersionInfo Name="Private">False</VersionInfo>
+						<VersionInfo Name="DLL">False</VersionInfo>
+						<VersionInfo Name="Locale">3082</VersionInfo>
+						<VersionInfo Name="CodePage">1252</VersionInfo>
+					</VersionInfo>
+					<VersionInfoKeys>
+						<VersionInfoKeys Name="CompanyName"/>
+						<VersionInfoKeys Name="FileDescription"/>
+						<VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys>
+						<VersionInfoKeys Name="InternalName"/>
+						<VersionInfoKeys Name="LegalCopyright"/>
+						<VersionInfoKeys Name="LegalTrademarks"/>
+						<VersionInfoKeys Name="OriginalFilename"/>
+						<VersionInfoKeys Name="ProductName"/>
+						<VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys>
+						<VersionInfoKeys Name="Comments"/>
+					</VersionInfoKeys>
+					<Excluded_Packages>
+						<Excluded_Packages Name="$(BDS)\bin\dcloffice2k140.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+						<Excluded_Packages Name="$(BDS)\bin\dclofficexp140.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+					</Excluded_Packages>
+				</Delphi.Personality>
+				<ModelSupport>False</ModelSupport>
+			</BorlandProject>
+			<ProjectFileVersion>12</ProjectFileVersion>
+		</ProjectExtensions>
+	</Project>
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.res b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.res
new file mode 100755
index 0000000000..ca4824f8cb
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/delphidemo.res differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.dfm b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.dfm
new file mode 100755
index 0000000000..c7722534bb
Binary files /dev/null and b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.dfm differ
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.pas b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.pas
new file mode 100644
index 0000000000..8b69c98e04
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/demo1.pas
@@ -0,0 +1,322 @@
+unit demo1;
+
+interface
+
+uses
+  Windows, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
+  ExtCtrls, StdCtrls, ExtDlgs, lcms2dll, ComCtrls;
+
+type
+  TForm1 = class(TForm)
+
+    Image1: TImage;
+    Image2: TImage;
+    Panel1: TPanel;
+    Splitter1: TSplitter;
+    Button2: TButton;
+    ComboBoxInput: TComboBox;
+    ComboBoxOutput: TComboBox;
+    Label1: TLabel;
+    Label2: TLabel;
+    WBCompensation: TCheckBox;
+    NoTransform: TCheckBox;
+    RadioGroup1: TRadioGroup;
+    OpenPictureDialog1: TOpenPictureDialog;
+    Button1: TButton;
+    ProgressBar1: TProgressBar;
+    ComboBoxIntent: TComboBox;
+    Label3: TLabel;
+    Button3: TButton;
+    Button4: TButton;
+    OpenDialog1: TOpenDialog;
+    Label4: TLabel;
+    ScrollBar1: TScrollBar;
+
+    procedure Button2Click(Sender: TObject);
+    procedure Button1Click(Sender: TObject);
+    procedure Button3Click(Sender: TObject);
+    procedure Button4Click(Sender: TObject);
+    procedure ComboBoxIntentChange(Sender: TObject);
+    procedure ScrollBar1Change(Sender: TObject);
+  private
+    { Private declarations }
+    function ComputeFlags: DWORD;
+
+  public
+    constructor Create(Owner: TComponent); Override;
+    { Public declarations }
+  end;
+
+var
+  Form1: TForm1;
+
+implementation
+
+{$R *.DFM}
+
+CONST
+  IS_INPUT = $1;
+  IS_DISPLAY = $2;
+  IS_COLORSPACE = $4;
+  IS_OUTPUT = $8;
+  IS_ABSTRACT = $10;
+
+VAR
+   IntentCodes: array [0 .. 20] of cmsUInt32Number;
+
+FUNCTION InSignatures(Signature: cmsProfileClassSignature;  dwFlags: DWORD): Boolean;
+BEGIN
+
+  if (((dwFlags AND IS_DISPLAY) <> 0) AND (Signature = cmsSigDisplayClass)) then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_OUTPUT) <> 0) AND (Signature = cmsSigOutputClass))
+    then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_INPUT) <> 0) AND (Signature = cmsSigInputClass))
+    then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_COLORSPACE) <> 0) AND
+      (Signature = cmsSigColorSpaceClass)) then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_ABSTRACT) <> 0) AND
+      (Signature = cmsSigAbstractClass)) then
+    InSignatures := TRUE
+  else
+    InSignatures := FALSE
+END;
+
+PROCEDURE FillCombo(var Combo: TComboBox; Signatures: DWORD);
+var
+  Files, Descriptions: TStringList;
+  Found: Integer;
+  SearchRec: TSearchRec;
+  Path, Profile: String;
+  Dir: ARRAY [0 .. 1024] OF Char;
+  hProfile: cmsHPROFILE;
+  Descrip: array [0 .. 256] of Char;
+begin
+  Files := TStringList.Create;
+  Descriptions := TStringList.Create;
+  GetSystemDirectory(Dir, 1023);
+  Path := String(Dir) + '\SPOOL\DRIVERS\COLOR\';
+  Found := FindFirst(Path + '*.ic?', faAnyFile, SearchRec);
+  while Found = 0 do
+  begin
+    Profile := Path + SearchRec.Name;
+    hProfile := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Profile)), 'r');
+    if (hProfile <> NIL) THEN
+    begin
+
+      if ((cmsGetColorSpace(hProfile) = cmsSigRgbData) AND InSignatures
+          (cmsGetDeviceClass(hProfile), Signatures)) then
+      begin
+        cmsGetProfileInfo(hProfile, cmsInfoDescription, 'EN', 'us', Descrip,
+          256);
+        Descriptions.Add(Descrip);
+        Files.Add(Profile);
+      end;
+      cmsCloseProfile(hProfile);
+    end;
+
+    Found := FindNext(SearchRec);
+
+  end;
+  FindClose(SearchRec);
+  Combo.Items := Descriptions;
+  Combo.Tag := Integer(Files);
+end;
+
+// A rather simple Logger... note the "cdecl" convention
+PROCEDURE ErrorLogger(ContextID: cmsContext; ErrorCode: cmsUInt32Number;
+  Text: PAnsiChar); Cdecl;
+begin
+  MessageBox(0, PWideChar(WideString(Text)), 'Something is going wrong...',
+    MB_OK OR MB_ICONWARNING or MB_TASKMODAL);
+end;
+
+constructor TForm1.Create(Owner: TComponent);
+var
+  IntentNames: array [0 .. 20] of PAnsiChar;
+  i, n: Integer;
+begin
+  inherited Create(Owner);
+
+   // Set the logger
+  cmsSetLogErrorHandler(ErrorLogger);
+
+  ScrollBar1.Min := 0;
+  ScrollBar1.Max := 100;
+
+  FillCombo(ComboBoxInput, IS_INPUT OR IS_COLORSPACE OR IS_DISPLAY);
+  FillCombo(ComboBoxOutput, $FFFF  );
+
+
+  // Get the supported intents
+  n := cmsGetSupportedIntents(20, @IntentCodes, @IntentNames);
+
+
+  ComboBoxIntent.Items.BeginUpdate;
+  ComboBoxIntent.Items.Clear;
+  for i:= 0 TO n - 1 DO
+    ComboBoxIntent.Items.Add(String(IntentNames[i]));
+
+  ComboBoxIntent.ItemIndex := 0;
+  ComboBoxIntent.Items.EndUpdate;
+end;
+
+
+
+procedure TForm1.ScrollBar1Change(Sender: TObject);
+var d: Integer;
+    s: String;
+begin
+     d := ScrollBar1.Position;
+     Str(d, s);
+     Label4.Caption := 'Adaptation state '+s + '% (Abs. col only)';
+end;
+
+procedure TForm1.Button2Click(Sender: TObject);
+begin
+  if OpenPictureDialog1.Execute then
+  begin
+    Image1.Picture.LoadFromFile(OpenPictureDialog1.FileName);
+    Image1.Picture.Bitmap.PixelFormat := pf24bit;
+
+    Image2.Picture.LoadFromFile(OpenPictureDialog1.FileName);
+    Image2.Picture.Bitmap.PixelFormat := pf24bit;
+
+  end
+end;
+
+function SelectedFile(var Combo: TComboBox): string;
+var
+  List: TStringList;
+  n: Integer;
+begin
+
+  List := TStringList(Combo.Tag);
+  n := Combo.ItemIndex;
+  if (n >= 0) then
+    SelectedFile := List.Strings[n]
+  else
+    SelectedFile := Combo.Text;
+end;
+
+procedure TForm1.ComboBoxIntentChange(Sender: TObject);
+begin
+   ScrollBar1.Enabled := (ComboBoxIntent.itemIndex = 3);
+end;
+
+function TForm1.ComputeFlags: DWORD;
+var
+  dwFlags: DWORD;
+begin
+  dwFlags := 0;
+  if (WBCompensation.Checked) then
+  begin
+    dwFlags := dwFlags OR cmsFLAGS_BLACKPOINTCOMPENSATION
+  end;
+
+  if (NoTransform.Checked) then
+  begin
+    dwFlags := dwFlags OR cmsFLAGS_NULLTRANSFORM
+  end;
+
+  case RadioGroup1.ItemIndex of
+    0:
+      dwFlags := dwFlags OR cmsFLAGS_NOOPTIMIZE;
+    1:
+      dwFlags := dwFlags OR cmsFLAGS_HIGHRESPRECALC;
+    3:
+      dwFlags := dwFlags OR cmsFLAGS_LOWRESPRECALC;
+  end;
+
+  ComputeFlags := dwFlags
+end;
+
+procedure TForm1.Button1Click(Sender: TObject);
+var
+  Source, Dest: String;
+  hSrc, hDest: cmsHPROFILE;
+  xform: cmsHTRANSFORM;
+  i, PicW, PicH: Integer;
+  Intent: Integer;
+  dwFlags: DWORD;
+begin
+
+  Source := SelectedFile(ComboBoxInput);
+  Dest := SelectedFile(ComboBoxOutput);
+
+  dwFlags := ComputeFlags;
+
+  Intent := IntentCodes[ComboBoxIntent.ItemIndex];
+
+  cmsSetAdaptationState(  ScrollBar1.Position / 100.0 );
+
+  if (Source <> '') AND (Dest <> '') then
+  begin
+    hSrc := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Source)), 'r');
+    hDest := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Dest)), 'r');
+
+    if (hSrc <> Nil) and (hDest <> Nil) then
+    begin
+      xform := cmsCreateTransform(hSrc, TYPE_BGR_8, hDest, TYPE_BGR_8, Intent,
+        dwFlags);
+    end
+    else
+    begin
+      xform := nil;
+    end;
+
+    if hSrc <> nil then
+    begin
+      cmsCloseProfile(hSrc);
+    end;
+
+    if hDest <> Nil then
+    begin
+      cmsCloseProfile(hDest);
+    end;
+
+    if (xform <> nil) then
+    begin
+
+      PicW := Image2.Picture.width;
+      PicH := Image2.Picture.height;
+      ProgressBar1.Min := 0;
+      ProgressBar1.Max := PicH;
+      ProgressBar1.Step := 1;
+
+      for i := 0 TO (PicH - 1) do
+      begin
+        if ((i MOD 100) = 0) then
+          ProgressBar1.Position := i;
+
+        cmsDoTransform(xform, Image1.Picture.Bitmap.Scanline[i],
+          Image2.Picture.Bitmap.Scanline[i], PicW);
+
+      end;
+      ProgressBar1.Position := PicH;
+
+      cmsDeleteTransform(xform);
+
+    end;
+
+    Image2.Repaint;
+    ProgressBar1.Position := 0;
+  end
+end;
+
+procedure TForm1.Button3Click(Sender: TObject);
+begin
+  if OpenDialog1.Execute then
+    ComboBoxInput.Text := OpenDialog1.FileName;
+end;
+
+procedure TForm1.Button4Click(Sender: TObject);
+begin
+  if OpenDialog1.Execute then
+    ComboBoxOutput.Text := OpenDialog1.FileName;
+end;
+
+end.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/lcms2dll.pas b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/lcms2dll.pas
new file mode 100644
index 0000000000..9af7a0d11d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/delphi/lcms2dll.pas
@@ -0,0 +1,2156 @@
+//
+//  Little cms DELPHI wrapper
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2014 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// Version 2.6
+//
+
+UNIT lcms2dll;
+
+{$IFDEF FPC}
+  {$MODE Delphi}
+{$ENDIF}
+
+INTERFACE
+
+{$IFNDEF MSWINDOWS}
+   USES LCLType, types;
+   Type PWChar = PWideChar;
+{$ELSE}
+   USES Windows;
+{$ENDIF}
+
+ CONST
+
+  LCMS2_SO = {$IFDEF DARWIN} 'liblcms2.2.dylib'; {$ELSE} 'lcms2.dll'; {$ENDIF}
+
+ TYPE
+
+  Uint8   = Byte;
+  Int8    = Shortint;
+  UInt16  = Word;
+  Int16   = Smallint;
+  UInt32  = LongWord;
+  Int32   = Longint;
+
+ TYPE
+     cmsUInt8Number   = Uint8;
+     cmsInt8Number    = Int8;
+     cmsUInt16Number  = UInt16;
+     cmsInt16Number   = Int16;
+
+     cmsUInt32Number  = UInt32;
+     cmsInt32Number   = Int32;
+     cmsInt64Number   = Int64;
+     cmsUInt64Number  = UInt64;
+
+     cmsFloat32Number = Single;
+     cmsFloat64Number = Double;
+
+     LPcmsUInt8Number    = ^cmsUInt8Number;
+     LPcmsInt8Number     = ^cmsInt8Number;
+     LPcmsUInt16Number   = ^cmsUInt16Number;
+     LPcmsInt16Number    = ^cmsInt16Number;
+
+     LPcmsUInt32Number   = ^cmsUInt32Number;
+     LPcmsInt32Number    = ^cmsInt32Number;
+     LPcmsInt64Number    = ^cmsInt64Number;
+     LPcmsUInt64Number   = ^cmsUInt64Number;
+
+     LPcmsFloat32Number  = ^cmsFloat32Number;
+     LPcmsFloat64Number  = ^cmsFloat64Number;
+
+
+     // Derivative types
+     cmsSignature        = cmsUInt32Number;
+     cmsU8Fixed8Number   = cmsUInt16Number;
+     cmsS15Fixed16Number = cmsInt32Number;
+     cmsU16Fixed16Number = cmsUInt32Number;
+
+     // Boolean type, which will be using the native integer
+     cmsBool = Boolean;
+
+ CONST
+
+    // Some common definitions
+    cmsMAX_PATH     = 256;
+
+    // D50 XYZ normalized to Y=1.0
+    cmsD50X             = 0.9642;
+    cmsD50Y             = 1.0;
+    cmsD50Z             = 0.8249;
+
+    // V4 perceptual black
+    cmsPERCEPTUAL_BLACK_X  = 0.00336;
+    cmsPERCEPTUAL_BLACK_Y  = 0.0034731;
+    cmsPERCEPTUAL_BLACK_Z  = 0.00287;
+
+    // Definitions in ICC spec
+    cmsMagicNumber      = $61637370;     // 'acsp'
+    lcmsSignature       = $6c636d73;     // 'lcms'
+
+
+TYPE
+
+// Base ICC type definitions
+cmsTagTypeSignature = (
+  cmsSigChromaticityType                  = $6368726D,  // 'chrm'
+  cmsSigColorantOrderType                 = $636C726F,  // 'clro'
+  cmsSigColorantTableType                 = $636C7274,  // 'clrt'
+  cmsSigCrdInfoType                       = $63726469,  // 'crdi'
+  cmsSigCurveType                         = $63757276,  // 'curv'
+  cmsSigDataType                          = $64617461,  // 'data'
+  cmsSigDictType                          = $64696374,  // 'dict'
+  cmsSigDateTimeType                      = $6474696D,  // 'dtim'
+  cmsSigDeviceSettingsType                = $64657673,  // 'devs'
+  cmsSigLut16Type                         = $6d667432,  // 'mft2'
+  cmsSigLut8Type                          = $6d667431,  // 'mft1'
+  cmsSigLutAtoBType                       = $6d414220,  // 'mAB '
+  cmsSigLutBtoAType                       = $6d424120,  // 'mBA '
+  cmsSigMeasurementType                   = $6D656173,  // 'meas'
+  cmsSigMultiLocalizedUnicodeType         = $6D6C7563,  // 'mluc'
+  cmsSigMultiProcessElementType           = $6D706574,  // 'mpet'
+  cmsSigNamedColorType                    = $6E636f6C,  // 'ncol' -- DEPRECATED!
+  cmsSigNamedColor2Type                   = $6E636C32,  // 'ncl2'
+  cmsSigParametricCurveType               = $70617261,  // 'para'
+  cmsSigProfileSequenceDescType           = $70736571,  // 'pseq'
+  cmsSigProfileSequenceIdType             = $70736964,  // 'psid'
+  cmsSigResponseCurveSet16Type            = $72637332,  // 'rcs2'
+  cmsSigS15Fixed16ArrayType               = $73663332,  // 'sf32'
+  cmsSigScreeningType                     = $7363726E,  // 'scrn'
+  cmsSigSignatureType                     = $73696720,  // 'sig '
+  cmsSigTextType                          = $74657874,  // 'text'
+  cmsSigTextDescriptionType               = $64657363,  // 'desc'
+  cmsSigU16Fixed16ArrayType               = $75663332,  // 'uf32'
+  cmsSigUcrBgType                         = $62666420,  // 'bfd '
+  cmsSigUInt16ArrayType                   = $75693136,  // 'ui16'
+  cmsSigUInt32ArrayType                   = $75693332,  // 'ui32'
+  cmsSigUInt64ArrayType                   = $75693634,  // 'ui64'
+  cmsSigUInt8ArrayType                    = $75693038,  // 'ui08'
+  cmsSigViewingConditionsType             = $76696577,  // 'view'
+  cmsSigXYZType                           = $58595A20,  // 'XYZ '
+  cmsSigVcgtType                          = $76636774   // 'vcgt'
+  );
+
+// Base ICC tag definitions
+cmsTagSignature = (
+    cmsSigAToB0Tag                          = $41324230,  // 'A2B0'
+    cmsSigAToB1Tag                          = $41324231,  // 'A2B1'
+    cmsSigAToB2Tag                          = $41324232,  // 'A2B2'
+    cmsSigBlueColorantTag                   = $6258595A,  // 'bXYZ'
+    cmsSigBlueMatrixColumnTag               = $6258595A,  // 'bXYZ'
+    cmsSigBlueTRCTag                        = $62545243,  // 'bTRC'
+    cmsSigBToA0Tag                          = $42324130,  // 'B2A0'
+    cmsSigBToA1Tag                          = $42324131,  // 'B2A1'
+    cmsSigBToA2Tag                          = $42324132,  // 'B2A2'
+    cmsSigCalibrationDateTimeTag            = $63616C74,  // 'calt'
+    cmsSigCharTargetTag                     = $74617267,  // 'targ'
+    cmsSigChromaticAdaptationTag            = $63686164,  // 'chad'
+    cmsSigChromaticityTag                   = $6368726D,  // 'chrm'
+    cmsSigColorantOrderTag                  = $636C726F,  // 'clro'
+    cmsSigColorantTableTag                  = $636C7274,  // 'clrt'
+    cmsSigColorantTableOutTag               = $636C6F74,  // 'clot'
+    cmsSigColorimetricIntentImageStateTag   = $63696973,  // 'ciis'
+    cmsSigCopyrightTag                      = $63707274,  // 'cprt'
+    cmsSigCrdInfoTag                        = $63726469,  // 'crdi'
+    cmsSigDataTag                           = $64617461,  // 'data'
+    cmsSigDateTimeTag                       = $6474696D,  // 'dtim'
+    cmsSigDeviceMfgDescTag                  = $646D6E64,  // 'dmnd'
+    cmsSigDeviceModelDescTag                = $646D6464,  // 'dmdd'
+    cmsSigDeviceSettingsTag                 = $64657673,  // 'devs'
+    cmsSigDToB0Tag                          = $44324230,  // 'D2B0'
+    cmsSigDToB1Tag                          = $44324231,  // 'D2B1'
+    cmsSigDToB2Tag                          = $44324232,  // 'D2B2'
+    cmsSigDToB3Tag                          = $44324233,  // 'D2B3'
+    cmsSigBToD0Tag                          = $42324430,  // 'B2D0'
+    cmsSigBToD1Tag                          = $42324431,  // 'B2D1'
+    cmsSigBToD2Tag                          = $42324432,  // 'B2D2'
+    cmsSigBToD3Tag                          = $42324433,  // 'B2D3'
+    cmsSigGamutTag                          = $67616D74,  // 'gamt'
+    cmsSigGrayTRCTag                        = $6b545243,  // 'kTRC'
+    cmsSigGreenColorantTag                  = $6758595A,  // 'gXYZ'
+    cmsSigGreenMatrixColumnTag              = $6758595A,  // 'gXYZ'
+    cmsSigGreenTRCTag                       = $67545243,  // 'gTRC'
+    cmsSigLuminanceTag                      = $6C756d69,  // 'lumi'
+    cmsSigMeasurementTag                    = $6D656173,  // 'meas'
+    cmsSigMediaBlackPointTag                = $626B7074,  // 'bkpt'
+    cmsSigMediaWhitePointTag                = $77747074,  // 'wtpt'
+    cmsSigNamedColorTag                     = $6E636f6C,  // 'ncol' // Deprecated by the ICC
+    cmsSigNamedColor2Tag                    = $6E636C32,  // 'ncl2'
+    cmsSigOutputResponseTag                 = $72657370,  // 'resp'
+    cmsSigPerceptualRenderingIntentGamutTag = $72696730,  // 'rig0'
+    cmsSigPreview0Tag                       = $70726530,  // 'pre0'
+    cmsSigPreview1Tag                       = $70726531,  // 'pre1'
+    cmsSigPreview2Tag                       = $70726532,  // 'pre2'
+    cmsSigProfileDescriptionTag             = $64657363,  // 'desc'
+    cmsSigProfileSequenceDescTag            = $70736571,  // 'pseq'
+    cmsSigProfileSequenceIdTag              = $70736964,  // 'psid'
+    cmsSigPs2CRD0Tag                        = $70736430,  // 'psd0'
+    cmsSigPs2CRD1Tag                        = $70736431,  // 'psd1'
+    cmsSigPs2CRD2Tag                        = $70736432,  // 'psd2'
+    cmsSigPs2CRD3Tag                        = $70736433,  // 'psd3'
+    cmsSigPs2CSATag                         = $70733273,  // 'ps2s'
+    cmsSigPs2RenderingIntentTag             = $70733269,  // 'ps2i'
+    cmsSigRedColorantTag                    = $7258595A,  // 'rXYZ'
+    cmsSigRedMatrixColumnTag                = $7258595A,  // 'rXYZ'
+    cmsSigRedTRCTag                         = $72545243,  // 'rTRC'
+    cmsSigSaturationRenderingIntentGamutTag = $72696732,  // 'rig2'
+    cmsSigScreeningDescTag                  = $73637264,  // 'scrd'
+    cmsSigScreeningTag                      = $7363726E,  // 'scrn'
+    cmsSigTechnologyTag                     = $74656368,  // 'tech'
+    cmsSigUcrBgTag                          = $62666420,  // 'bfd '
+    cmsSigViewingCondDescTag                = $76756564,  // 'vued'
+    cmsSigViewingConditionsTag              = $76696577,  // 'view'
+    cmsSigVcgtTag                           = $76636774,  // 'vcgt'
+    cmsSigMetaTag                           = $6D657461   // 'meta'
+);
+
+// ICC Technology tag
+cmsTechnologySignature = (
+    cmsSigDigitalCamera                     = $6463616D,  // 'dcam'
+    cmsSigFilmScanner                       = $6673636E,  // 'fscn'
+    cmsSigReflectiveScanner                 = $7273636E,  // 'rscn'
+    cmsSigInkJetPrinter                     = $696A6574,  // 'ijet'
+    cmsSigThermalWaxPrinter                 = $74776178,  // 'twax'
+    cmsSigElectrophotographicPrinter        = $6570686F,  // 'epho'
+    cmsSigElectrostaticPrinter              = $65737461,  // 'esta'
+    cmsSigDyeSublimationPrinter             = $64737562,  // 'dsub'
+    cmsSigPhotographicPaperPrinter          = $7270686F,  // 'rpho'
+    cmsSigFilmWriter                        = $6670726E,  // 'fprn'
+    cmsSigVideoMonitor                      = $7669646D,  // 'vidm'
+    cmsSigVideoCamera                       = $76696463,  // 'vidc'
+    cmsSigProjectionTelevision              = $706A7476,  // 'pjtv'
+    cmsSigCRTDisplay                        = $43525420,  // 'CRT '
+    cmsSigPMDisplay                         = $504D4420,  // 'PMD '
+    cmsSigAMDisplay                         = $414D4420,  // 'AMD '
+    cmsSigPhotoCD                           = $4B504344,  // 'KPCD'
+    cmsSigPhotoImageSetter                  = $696D6773,  // 'imgs'
+    cmsSigGravure                           = $67726176,  // 'grav'
+    cmsSigOffsetLithography                 = $6F666673,  // 'offs'
+    cmsSigSilkscreen                        = $73696C6B,  // 'silk'
+    cmsSigFlexography                       = $666C6578,  // 'flex'
+    cmsSigMotionPictureFilmScanner          = $6D706673,  // 'mpfs'
+    cmsSigMotionPictureFilmRecorder         = $6D706672,  // 'mpfr'
+    cmsSigDigitalMotionPictureCamera        = $646D7063,  // 'dmpc'
+    cmsSigDigitalCinemaProjector            = $64636A70   // 'dcpj'
+);
+
+
+// ICC Color spaces
+cmsColorSpaceSignature = (
+    cmsSigXYZData                           = $58595A20,  // 'XYZ '
+    cmsSigLabData                           = $4C616220,  // 'Lab '
+    cmsSigLuvData                           = $4C757620,  // 'Luv '
+    cmsSigYCbCrData                         = $59436272,  // 'YCbr'
+    cmsSigYxyData                           = $59787920,  // 'Yxy '
+    cmsSigRgbData                           = $52474220,  // 'RGB '
+    cmsSigGrayData                          = $47524159,  // 'GRAY'
+    cmsSigHsvData                           = $48535620,  // 'HSV '
+    cmsSigHlsData                           = $484C5320,  // 'HLS '
+    cmsSigCmykData                          = $434D594B,  // 'CMYK'
+    cmsSigCmyData                           = $434D5920,  // 'CMY '
+    cmsSigMCH1Data                          = $4D434831,  // 'MCH1'
+    cmsSigMCH2Data                          = $4D434832,  // 'MCH2'
+    cmsSigMCH3Data                          = $4D434833,  // 'MCH3'
+    cmsSigMCH4Data                          = $4D434834,  // 'MCH4'
+    cmsSigMCH5Data                          = $4D434835,  // 'MCH5'
+    cmsSigMCH6Data                          = $4D434836,  // 'MCH6'
+    cmsSigMCH7Data                          = $4D434837,  // 'MCH7'
+    cmsSigMCH8Data                          = $4D434838,  // 'MCH8'
+    cmsSigMCH9Data                          = $4D434839,  // 'MCH9'
+    cmsSigMCHAData                          = $4D43483A,  // 'MCHA'
+    cmsSigMCHBData                          = $4D43483B,  // 'MCHB'
+    cmsSigMCHCData                          = $4D43483C,  // 'MCHC'
+    cmsSigMCHDData                          = $4D43483D,  // 'MCHD'
+    cmsSigMCHEData                          = $4D43483E,  // 'MCHE'
+    cmsSigMCHFData                          = $4D43483F,  // 'MCHF'
+    cmsSigNamedData                         = $6e6d636c,  // 'nmcl'
+    cmsSig1colorData                        = $31434C52,  // '1CLR'
+    cmsSig2colorData                        = $32434C52,  // '2CLR'
+    cmsSig3colorData                        = $33434C52,  // '3CLR'
+    cmsSig4colorData                        = $34434C52,  // '4CLR'
+    cmsSig5colorData                        = $35434C52,  // '5CLR'
+    cmsSig6colorData                        = $36434C52,  // '6CLR'
+    cmsSig7colorData                        = $37434C52,  // '7CLR'
+    cmsSig8colorData                        = $38434C52,  // '8CLR'
+    cmsSig9colorData                        = $39434C52,  // '9CLR'
+    cmsSig10colorData                       = $41434C52,  // 'ACLR'
+    cmsSig11colorData                       = $42434C52,  // 'BCLR'
+    cmsSig12colorData                       = $43434C52,  // 'CCLR'
+    cmsSig13colorData                       = $44434C52,  // 'DCLR'
+    cmsSig14colorData                       = $45434C52,  // 'ECLR'
+    cmsSig15colorData                       = $46434C52,  // 'FCLR'
+    cmsSigLuvKData                          = $4C75764B   // 'LuvK'
+);
+
+// ICC Profile Class
+cmsProfileClassSignature = (
+    cmsSigInputClass                        = $73636E72,  // 'scnr'
+    cmsSigDisplayClass                      = $6D6E7472,  // 'mntr'
+    cmsSigOutputClass                       = $70727472,  // 'prtr'
+    cmsSigLinkClass                         = $6C696E6B,  // 'link'
+    cmsSigAbstractClass                     = $61627374,  // 'abst'
+    cmsSigColorSpaceClass                   = $73706163,  // 'spac'
+    cmsSigNamedColorClass                   = $6e6d636c   // 'nmcl'
+);
+
+
+// ICC Platforms
+cmsPlatformSignature = (
+    cmsSigMacintosh                         = $4150504C,  // 'APPL'
+    cmsSigMicrosoft                         = $4D534654,  // 'MSFT'
+    cmsSigSolaris                           = $53554E57,  // 'SUNW'
+    cmsSigSGI                               = $53474920,  // 'SGI '
+    cmsSigTaligent                          = $54474E54,  // 'TGNT'
+    cmsSigUnices                            = $2A6E6978   // '*nix'   // From argyll -- Not official
+);
+
+CONST
+
+    // Reference gamut
+    cmsSigPerceptualReferenceMediumGamut         = $70726d67;  //'prmg'
+
+    // For cmsSigColorimetricIntentImageStateTag
+    cmsSigSceneColorimetryEstimates              = $73636F65;  //'scoe'
+    cmsSigSceneAppearanceEstimates               = $73617065;  //'sape'
+    cmsSigFocalPlaneColorimetryEstimates         = $66706365;  //'fpce'
+    cmsSigReflectionHardcopyOriginalColorimetry  = $72686F63;  //'rhoc'
+    cmsSigReflectionPrintOutputColorimetry       = $72706F63;  //'rpoc'
+
+TYPE
+
+// Multi process elements types
+cmsStageSignature = (
+    cmsSigCurveSetElemType              = $63767374,  //'cvst'
+    cmsSigMatrixElemType                = $6D617466,  //'matf'
+    cmsSigCLutElemType                  = $636C7574,  //'clut'
+
+    cmsSigBAcsElemType                  = $62414353,  // 'bACS'
+    cmsSigEAcsElemType                  = $65414353,  // 'eACS'
+
+    // Custom from here, not in the ICC Spec
+    cmsSigXYZ2LabElemType               = $6C327820,  // 'l2x '
+    cmsSigLab2XYZElemType               = $78326C20,  // 'x2l '
+    cmsSigNamedColorElemType            = $6E636C20,  // 'ncl '
+    cmsSigLabV2toV4                     = $32203420,  // '2 4 '
+    cmsSigLabV4toV2                     = $34203220,  // '4 2 '
+
+    // Identities
+    cmsSigIdentityElemType              = $69646E20   // 'idn '
+);
+
+// Types of CurveElements
+cmsCurveSegSignature = (
+
+    cmsSigFormulaCurveSeg               = $70617266, // 'parf'
+    cmsSigSampledCurveSeg               = $73616D66, // 'samf'
+    cmsSigSegmentedCurve                = $63757266  // 'curf'
+);
+
+CONST
+
+    // Used in ResponseCurveType
+    cmsSigStatusA                    = $53746141; //'StaA'
+    cmsSigStatusE                    = $53746145; //'StaE'
+    cmsSigStatusI                    = $53746149; //'StaI'
+    cmsSigStatusT                    = $53746154; //'StaT'
+    cmsSigStatusM                    = $5374614D; //'StaM'
+    cmsSigDN                         = $444E2020; //'DN  '
+    cmsSigDNP                        = $444E2050; //'DN P'
+    cmsSigDNN                        = $444E4E20; //'DNN '
+    cmsSigDNNP                       = $444E4E50; //'DNNP'
+
+    // Device attributes, currently defined values correspond to the low 4 bytes
+    // of the 8 byte attribute quantity
+    cmsReflective     = 0;
+    cmsTransparency   = 1;
+    cmsGlossy         = 0;
+    cmsMatte          = 2;
+
+TYPE
+
+// Common structures in ICC tags
+cmsICCData = PACKED RECORD
+     len  :    cmsUInt32Number;
+     flag :    cmsUInt32Number;
+     data : Array [0..1] of cmsUInt8Number;
+    END;
+
+// ICC date time
+cmsDateTimeNumber = PACKED RECORD
+    year:     cmsUInt16Number;
+    month:    cmsUInt16Number;
+    day:      cmsUInt16Number;
+    hours:    cmsUInt16Number;
+    minutes:  cmsUInt16Number;
+    seconds:  cmsUInt16Number;
+END;
+
+// ICC XYZ
+
+cmsEncodedXYZNumber = PACKED RECORD
+      X: cmsS15Fixed16Number;
+      Y: cmsS15Fixed16Number;
+      Z: cmsS15Fixed16Number;
+END;
+
+
+// Profile ID as computed by MD5 algorithm
+cmsProfileID = PACKED RECORD
+    CASE Integer OF
+    1: (ID8: Array[0..15] OF cmsUInt8Number);
+    2: (ID16: Array[0..7] OF cmsUInt16Number);
+    3: (ID32: Array[0..3] OF cmsUInt32Number);
+END;
+
+
+
+// ----------------------------------------------------------------------------------------------
+// ICC profile internal base types. Strictly, shouldn't be declared in this unit, but maybe
+// somebody want to use this info for accessing profile header directly, so here it is.
+
+// Profile header -- it is 32-bit aligned, so no issues are expected on alignment
+cmsICCHeader = PACKED RECORD
+         size:           cmsUInt32Number;          // Profile size in bytes
+         cmmId:          cmsSignature;             // CMM for this profile
+         version:        cmsUInt32Number;          // Format version number
+         deviceClass:    cmsProfileClassSignature; // Type of profile
+         colorSpace:     cmsColorSpaceSignature;   // Color space of data
+         pcs:            cmsColorSpaceSignature;   // PCS, XYZ or Lab only
+         date:           cmsDateTimeNumber;        // Date profile was created
+         magic:          cmsSignature;             // Magic Number to identify an ICC profile
+         platform:       cmsPlatformSignature;     // Primary Platform
+         flags:          cmsUInt32Number;          // Various bit settings
+         manufacturer:   cmsSignature;             // Device manufacturer
+         model:          cmsUInt32Number;          // Device model number
+         attributes:     cmsUInt64Number;          // Device attributes
+         renderingIntent:cmsUInt32Number;          // Rendering intent
+         illuminant:     cmsEncodedXYZNumber;      // Profile illuminant
+         creator:        cmsSignature;             // Profile creator
+         profileID:      cmsProfileID;             // Profile ID using MD5
+         reserved: array [0..27] of cmsInt8Number; // Reserved for future use
+END;
+
+// ICC base tag
+cmsTagBase = PACKED RECORD
+     sig:         cmsTagTypeSignature;
+     reserved:    array[0..3] of cmsInt8Number;
+END;
+
+// A tag entry in directory
+cmsTagEntry = PACKED RECORD
+    sig:    cmsTagSignature;   // The tag signature
+    offset: cmsUInt32Number;   // Start of tag
+    size:   cmsUInt32Number;   // Size in bytes
+END;
+
+
+cmsContext    = Pointer;              // Context identifier for multithreaded environments
+cmsHANDLE     = Pointer;              // Generic handle
+cmsHPROFILE   = Pointer;              // Opaque typedefs to hide internals
+cmsHTRANSFORM = Pointer;
+
+
+CONST
+
+     cmsMAXCHANNELS  = 16;                // Maximum number of channels in ICC profiles
+
+// Format of pixel is defined by one cmsUInt32Number, using bit fields as follows
+//
+//            A O TTTTT U Y F P X S EEE CCCC BBB
+//
+//            A: Floating point -- With this flag we can differentiate 16 bits as float and as int
+//            O: Optimized -- previous optimization already returns the final 8-bit value
+//            T: Pixeltype
+//            F: Flavor  0=MinIsBlack(Chocolate) 1=MinIsWhite(Vanilla)
+//            P: Planar? 0=Chunky, 1=Planar
+//            X: swap 16 bps endianness?
+//            S: Do swap? ie, BGR, KYMC
+//            E: Extra samples
+//            C: Channels (Samples per pixel)
+//            B: bytes per sample
+//            Y: Swap first - changes ABGR to BGRA and KCMY to CMYK
+
+    FUNCTION FLOAT_SH(a: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION OPTIMIZED_SH(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION COLORSPACE_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION SWAPFIRST_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION FLAVOR_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION PLANAR_SH(p: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION ENDIAN16_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION DOSWAP_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION EXTRA_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION CHANNELS_SH(c: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION BYTES_SH(b: cmsUInt32Number):cmsUInt32Number;
+
+
+    FUNCTION T_FLOAT(a: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_OPTIMIZED(o: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_COLORSPACE(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_SWAPFIRST(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_FLAVOR(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_PLANAR(p: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_ENDIAN16(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_DOSWAP(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_EXTRA(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_CHANNELS(c: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_BYTES(b: cmsUInt32Number): cmsUInt32Number;
+
+CONST
+
+
+// Pixel types
+
+    PT_ANY     =  0;    // Don't check colorspace
+                      // 1 & 2 are reserved
+    PT_GRAY    =  3;
+    PT_RGB     =  4;
+    PT_CMY     =  5;
+    PT_CMYK    =  6;
+    PT_YCbCr   =  7;
+    PT_YUV     =  8;      // Lu'v'
+    PT_XYZ     =  9;
+    PT_Lab     =  10;
+    PT_YUVK    =  11;     // Lu'v'K
+    PT_HSV     =  12;
+    PT_HLS     =  13;
+    PT_Yxy     =  14;
+
+    PT_MCH1    =  15;
+    PT_MCH2    =  16;
+    PT_MCH3    =  17;
+    PT_MCH4    =  18;
+    PT_MCH5    =  19;
+    PT_MCH6    =  20;
+    PT_MCH7    =  21;
+    PT_MCH8    =  22;
+    PT_MCH9    =  23;
+    PT_MCH10   =  24;
+    PT_MCH11   =  25;
+    PT_MCH12   =  26;
+    PT_MCH13   =  27;
+    PT_MCH14   =  28;
+    PT_MCH15   =  29;
+
+    PT_LabV2   =  30;     // Identical to PT_Lab, but using the V2 old encoding
+
+
+    // Format descriptors
+    TYPE_GRAY_8          = $030009;
+    TYPE_GRAY_8_REV      = $032009;
+    TYPE_GRAY_16         = $03000a;
+    TYPE_GRAY_16_REV     = $03200a;
+    TYPE_GRAY_16_SE      = $03080a;
+    TYPE_GRAYA_8         = $030089;
+    TYPE_GRAYA_16        = $03008a;
+    TYPE_GRAYA_16_SE     = $03088a;
+    TYPE_GRAYA_8_PLANAR  = $031089;
+    TYPE_GRAYA_16_PLANAR = $03108a;
+    TYPE_RGB_8           = $040019;
+    TYPE_RGB_8_PLANAR    = $041019;
+    TYPE_BGR_8           = $040419;
+    TYPE_BGR_8_PLANAR    = $041419;
+    TYPE_RGB_16          = $04001a;
+    TYPE_RGB_16_PLANAR   = $04101a;
+    TYPE_RGB_16_SE       = $04081a;
+    TYPE_BGR_16          = $04041a;
+    TYPE_BGR_16_PLANAR   = $04141a;
+    TYPE_BGR_16_SE       = $040c1a;
+    TYPE_RGBA_8          = $040099;
+    TYPE_RGBA_8_PLANAR   = $041099;
+    TYPE_ARGB_8_PLANAR   = $045099;
+    TYPE_ABGR_8_PLANAR   = $041499;
+    TYPE_BGRA_8_PLANAR   = $045499;
+    TYPE_RGBA_16         = $04009a;
+    TYPE_RGBA_16_PLANAR  = $04109a;
+    TYPE_RGBA_16_SE      = $04089a;
+    TYPE_ARGB_8          = $044099;
+    TYPE_ARGB_16         = $04409a;
+    TYPE_ABGR_8          = $040499;
+    TYPE_ABGR_16         = $04049a;
+    TYPE_ABGR_16_PLANAR  = $04149a;
+    TYPE_ABGR_16_SE      = $040c9a;
+    TYPE_BGRA_8          = $044499;
+    TYPE_BGRA_16         = $04449a;
+    TYPE_BGRA_16_SE      = $04489a;
+    TYPE_CMY_8           = $050019;
+    TYPE_CMY_8_PLANAR    = $051019;
+    TYPE_CMY_16          = $05001a;
+    TYPE_CMY_16_PLANAR   = $05101a;
+    TYPE_CMY_16_SE       = $05081a;
+    TYPE_CMYK_8          = $060021;
+    TYPE_CMYKA_8         = $0600a1;
+    TYPE_CMYK_8_REV      = $062021;
+    TYPE_YUVK_8          = $062021;
+    TYPE_CMYK_8_PLANAR   = $061021;
+    TYPE_CMYK_16         = $060022;
+    TYPE_CMYK_16_REV     = $062022;
+    TYPE_YUVK_16         = $062022;
+    TYPE_CMYK_16_PLANAR  = $061022;
+    TYPE_CMYK_16_SE      = $060822;
+    TYPE_KYMC_8          = $060421;
+    TYPE_KYMC_16         = $060422;
+    TYPE_KYMC_16_SE      = $060c22;
+    TYPE_KCMY_8          = $064021;
+    TYPE_KCMY_8_REV      = $066021;
+    TYPE_KCMY_16         = $064022;
+    TYPE_KCMY_16_REV     = $066022;
+    TYPE_KCMY_16_SE      = $064822;
+    TYPE_CMYK5_8         = $130029;
+    TYPE_CMYK5_16        = $13002a;
+    TYPE_CMYK5_16_SE     = $13082a;
+    TYPE_KYMC5_8         = $130429;
+    TYPE_KYMC5_16        = $13042a;
+    TYPE_KYMC5_16_SE     = $130c2a;
+    TYPE_CMYK6_8         = $140031;
+    TYPE_CMYK6_8_PLANAR  = $141031;
+    TYPE_CMYK6_16        = $140032;
+    TYPE_CMYK6_16_PLANAR = $141032;
+    TYPE_CMYK6_16_SE     = $140832;
+    TYPE_CMYK7_8         = $150039;
+    TYPE_CMYK7_16        = $15003a;
+    TYPE_CMYK7_16_SE     = $15083a;
+    TYPE_KYMC7_8         = $150439;
+    TYPE_KYMC7_16        = $15043a;
+    TYPE_KYMC7_16_SE     = $150c3a;
+    TYPE_CMYK8_8         = $160041;
+    TYPE_CMYK8_16        = $160042;
+    TYPE_CMYK8_16_SE     = $160842;
+    TYPE_KYMC8_8         = $160441;
+    TYPE_KYMC8_16        = $160442;
+    TYPE_KYMC8_16_SE     = $160c42;
+    TYPE_CMYK9_8         = $170049;
+    TYPE_CMYK9_16        = $17004a;
+    TYPE_CMYK9_16_SE     = $17084a;
+    TYPE_KYMC9_8         = $170449;
+    TYPE_KYMC9_16        = $17044a;
+    TYPE_KYMC9_16_SE     = $170c4a;
+    TYPE_CMYK10_8        = $180051;
+    TYPE_CMYK10_16       = $180052;
+    TYPE_CMYK10_16_SE    = $180852;
+    TYPE_KYMC10_8        = $180451;
+    TYPE_KYMC10_16       = $180452;
+    TYPE_KYMC10_16_SE    = $180c52;
+    TYPE_CMYK11_8        = $190059;
+    TYPE_CMYK11_16       = $19005a;
+    TYPE_CMYK11_16_SE    = $19085a;
+    TYPE_KYMC11_8        = $190459;
+    TYPE_KYMC11_16       = $19045a;
+    TYPE_KYMC11_16_SE    = $190c5a;
+    TYPE_CMYK12_8        = $1a0061;
+    TYPE_CMYK12_16       = $1a0062;
+    TYPE_CMYK12_16_SE    = $1a0862;
+    TYPE_KYMC12_8        = $1a0461;
+    TYPE_KYMC12_16       = $1a0462;
+    TYPE_KYMC12_16_SE    = $1a0c62;
+    TYPE_XYZ_16          = $09001a;
+    TYPE_Lab_8           = $0a0019;
+    TYPE_ALab_8          = $0a0499;
+    TYPE_Lab_16          = $0a001a;
+    TYPE_Yxy_16          = $0e001a;
+    TYPE_YCbCr_8         = $070019;
+    TYPE_YCbCr_8_PLANAR  = $071019;
+    TYPE_YCbCr_16        = $07001a;
+    TYPE_YCbCr_16_PLANAR = $07101a;
+    TYPE_YCbCr_16_SE     = $07081a;
+    TYPE_YUV_8           = $080019;
+    TYPE_YUV_8_PLANAR    = $081019;
+    TYPE_YUV_16          = $08001a;
+    TYPE_YUV_16_PLANAR   = $08101a;
+    TYPE_YUV_16_SE       = $08081a;
+    TYPE_HLS_8           = $0d0019;
+    TYPE_HLS_8_PLANAR    = $0d1019;
+    TYPE_HLS_16          = $0d001a;
+    TYPE_HLS_16_PLANAR   = $0d101a;
+    TYPE_HLS_16_SE       = $0d081a;
+    TYPE_HSV_8           = $0c0019;
+    TYPE_HSV_8_PLANAR    = $0c1019;
+    TYPE_HSV_16          = $0c001a;
+    TYPE_HSV_16_PLANAR   = $0c101a;
+    TYPE_HSV_16_SE       = $0c081a;
+
+    TYPE_NAMED_COLOR_INDEX = $000A;
+
+    TYPE_XYZ_FLT         = $49001c;
+    TYPE_Lab_FLT         = $4a001c;
+    TYPE_GRAY_FLT        = $43000c;
+    TYPE_RGB_FLT         = $44001c;
+    TYPE_CMYK_FLT        = $460024;
+    TYPE_XYZA_FLT        = $49009c;
+    TYPE_LabA_FLT        = $4a009c;
+    TYPE_RGBA_FLT        = $44009c;
+
+    TYPE_XYZ_DBL         = $490018;
+    TYPE_Lab_DBL         = $4a0018;
+    TYPE_GRAY_DBL        = $430008;
+    TYPE_RGB_DBL         = $440018;
+    TYPE_CMYK_DBL        = $460020;
+    TYPE_LabV2_8         = $1e0019;
+    TYPE_ALabV2_8        = $1e0499;
+    TYPE_LabV2_16        = $1e001a;
+
+    TYPE_GRAY_HALF_FLT   = $43000a;
+    TYPE_RGB_HALF_FLT    = $44001a;
+    TYPE_RGBA_HALF_FLT   = $44009a;
+    TYPE_CMYK_HALF_FLT   = $460022;
+
+    TYPE_ARGB_HALF_FLT   = $44409a;
+    TYPE_BGR_HALF_FLT    = $44041a;
+    TYPE_BGRA_HALF_FLT   = $44449a;
+    TYPE_ABGR_HALF_FLT   = $44041a;
+
+TYPE
+
+
+  // Colorimetric spaces
+
+      cmsCIEXYZ = PACKED RECORD
+                        X, Y, Z : cmsFloat64Number;
+                    END;
+      LPcmsCIEXYZ = ^cmsCIEXYZ;
+
+      cmsCIExyY = PACKED RECORD
+                        x, y, YY : cmsFloat64Number
+                        END;
+      LPcmsCIExyY = ^cmsCIEXYY;
+
+      cmsCIELab = PACKED RECORD
+                  L, a, b: cmsFloat64Number
+                  END;
+      LPcmsCIELab = ^cmsCIELab;
+
+     cmsCIELCh = PACKED RECORD
+                  L, C, h : cmsFloat64Number
+                  END;
+     LPcmsCIELCh = ^cmsCIELCh;
+
+     cmsJCh = PACKED RECORD
+                  J, C, h : cmsFloat64Number
+                  END;
+     LPcmsJCh = ^cmsJCH;
+
+
+     cmsCIEXYZTRIPLE = PACKED RECORD
+                        Red, Green, Blue : cmsCIEXYZ
+                        END;
+     LPcmsCIEXYZTRIPLE = ^cmsCIEXYZTRIPLE;
+
+
+      cmsCIExyYTRIPLE = PACKED RECORD
+                        Red, Green, Blue : cmsCIExyY
+                        END;
+      LPcmsCIExyYTRIPLE = ^cmsCIExyYTRIPLE;
+
+
+CONST
+
+    // Illuminant types for structs below
+    cmsILLUMINANT_TYPE_UNKNOWN = $0000000;
+    cmsILLUMINANT_TYPE_D50     = $0000001;
+    cmsILLUMINANT_TYPE_D65     = $0000002;
+    cmsILLUMINANT_TYPE_D93     = $0000003;
+    cmsILLUMINANT_TYPE_F2      = $0000004;
+    cmsILLUMINANT_TYPE_D55     = $0000005;
+    cmsILLUMINANT_TYPE_A       = $0000006;
+    cmsILLUMINANT_TYPE_E       = $0000007;
+    cmsILLUMINANT_TYPE_F8      = $0000008;
+
+TYPE
+
+    cmsICCMeasurementConditions = PACKED RECORD
+
+        Observer: cmsUInt32Number;       // 0 = unknown, 1=CIE 1931, 2=CIE 1964
+        Backing:  cmsCIEXYZ;             // Value of backing
+        Geometry: cmsUInt32Number;       // 0=unknown, 1=45/0, 0/45 2=0d, d/0
+        Flare:    cmsFloat64Number;      // 0..1.0
+        IlluminantType: cmsUInt32Number;
+
+    END;
+
+   cmsICCViewingConditions = PACKED RECORD
+        IlluminantXYZ: cmsCIEXYZ;         // Not the same struct as CAM02,
+        SurroundXYZ: cmsCIEXYZ;           // This is for storing the tag
+        IlluminantType: cmsUInt32Number;  // viewing condition
+    END;
+
+
+// Context   --------------------------------------------------------------------------------------------------------------
+
+FUNCTION  cmsCreateContext(Plugin : Pointer; UserData : Pointer) : cmsContext; StdCall;
+PROCEDURE cmsDeleteContext(ContextID: cmsContext); StdCall;
+FUNCTION  cmsDupContext(ContextID: cmsContext; NewUserData: Pointer): cmsContext; StdCall;
+FUNCTION  cmsGetContextUserData(ContextID: cmsContext): Pointer;  StdCall;
+
+// Plug-In registering  ---------------------------------------------------------------------------------------------------
+
+FUNCTION  cmsPlugin(Plugin: Pointer): cmsBool; StdCall;
+PROCEDURE cmsUnregisterPlugins; StdCall;
+
+// Error logging ----------------------------------------------------------------------------------------------------------
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other may return FALSE.
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function will get
+// an ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user if you wish, or just create some sort of log on disk.
+// The logging function should NOT terminate the program, as this obviously can leave
+// unfreed resources. It is the programmer's responsibility to check each function
+// return code to make sure it didn't fail.
+
+CONST
+
+    cmsERROR_UNDEFINED                  =  0;
+    cmsERROR_FILE                       =  1;
+    cmsERROR_RANGE                      =  2;
+    cmsERROR_INTERNAL                   =  3;
+    cmsERROR_NULL                       =  4;
+    cmsERROR_READ                       =  5;
+    cmsERROR_SEEK                       =  6;
+    cmsERROR_WRITE                      =  7;
+    cmsERROR_UNKNOWN_EXTENSION          =  8;
+    cmsERROR_COLORSPACE_CHECK           =  9;
+    cmsERROR_ALREADY_DEFINED            =  10;
+    cmsERROR_BAD_SIGNATURE              =  11;
+    cmsERROR_CORRUPTION_DETECTED        =  12;
+    cmsERROR_NOT_SUITABLE               =  13;
+
+// Error logger is called with the ContextID when a message is raised. This gives the
+// chance to know which thread is responsible of the warning and any environment associated
+// with it. Non-multithreading applications may safely ignore this parameter.
+// Note that under certain special circumstances, ContextID may be NULL.
+
+TYPE
+
+    cmsLogErrorHandlerFunction = PROCEDURE( ContextID: cmsContext; ErrorCode: cmsUInt32Number; Text: PAnsiChar); CDecl;
+
+    // Allows user to set any specific logger
+    PROCEDURE cmsSetLogErrorHandler(Fn: cmsLogErrorHandlerFunction); StdCall;
+
+
+// Conversions --------------------------------------------------------------------------------------------------------------
+
+
+// Returns pointers to constant structs
+FUNCTION cmsD50_XYZ: LPcmsCIEXYZ; StdCall;
+FUNCTION cmsD50_xyY: LPcmsCIExyY; StdCall;
+
+// Colorimetric space conversions
+PROCEDURE cmsXYZ2xyY(Dest: LPcmsCIExyY; Source: LPcmsCIEXYZ); StdCall;
+PROCEDURE cmsxyY2XYZ(Dest: LPcmsCIEXYZ; Source: LPcmsCIExyY); StdCall;
+PROCEDURE cmsLab2XYZ(WhitePoint: LPcmsCIEXYZ; xyz: LPcmsCIEXYZ; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsXYZ2Lab(WhitePoint: LPcmsCIEXYZ; Lab: LPcmsCIELab; xyz: LPcmsCIEXYZ); StdCall;
+PROCEDURE cmsLab2LCh(LCh: LPcmsCIELCh; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsLCh2Lab(Lab: LPcmsCIELab; LCh: LPcmsCIELCh); StdCall;
+
+// Encoding /Decoding on PCS
+PROCEDURE cmsLabEncoded2Float(Lab: LPcmsCIELab; wLab: Pointer); StdCall;
+PROCEDURE cmsLabEncoded2FloatV2(Lab: LPcmsCIELab; wLab: Pointer); StdCall;
+PROCEDURE cmsFloat2LabEncoded(wLab: Pointer; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsFloat2LabEncodedV2(wLab: Pointer; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsXYZEncoded2Float(fxyz : LPcmsCIEXYZ; XYZ: Pointer); StdCall;
+PROCEDURE cmsFloat2XYZEncoded(XYZ: Pointer; fXYZ: LPcmsCIEXYZ); StdCall;
+
+
+// DeltaE metrics
+FUNCTION cmsDeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCIE94DeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsBFDdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCMCdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCIE2000DeltaE(Lab1, Lab2: LPcmsCIELab; Kl, Kc, Kh: Double): Double; StdCall;
+
+
+// Temperature <-> Chromaticity (Black body)
+FUNCTION  cmsWhitePointFromTemp(var WhitePoint: cmsCIExyY; TempK: cmsFloat64Number) : cmsBool; StdCall;
+FUNCTION  cmsTempFromWhitePoint(var TeampK: cmsFloat64Number; var WhitePoint: cmsCIExyY) : cmsBool; StdCall;
+
+
+// Chromatic adaptation
+FUNCTION cmsAdaptToIlluminant(Result: LPcmsCIEXYZ; SourceWhitePt: LPcmsCIEXYZ;
+                              Illuminant: LPcmsCIEXYZ; Value: LPcmsCIEXYZ): cmsBool; StdCall;
+
+
+// CIECAM02 ---------------------------------------------------------------------------------------------------
+
+// Viewing conditions. Please note those are CAM model viewing conditions, and not the ICC tag viewing
+// conditions, which I'm naming cmsICCViewingConditions to make differences evident. Unfortunately, the tag
+// cannot deal with surround La, Yb and D value so is basically useless to store CAM02 viewing conditions.
+
+ CONST
+
+    AVG_SURROUND       = 1;
+    DIM_SURROUND       = 2;
+    DARK_SURROUND      = 3;
+    CUTSHEET_SURROUND  = 4;
+
+    D_CALCULATE        = -1;
+
+  TYPE
+
+    cmsViewingConditions = PACKED RECORD
+
+                WhitePoint: cmsCIEXYZ;
+                Yb        : cmsFloat64Number;
+                La        : cmsFloat64Number;
+                surround  : Integer;
+                D_value   : cmsFloat64Number
+              END;
+
+
+    LPcmsViewingConditions = ^cmsViewingConditions;
+
+FUNCTION    cmsCIECAM02Init(pVC : LPcmsViewingConditions ) : Pointer; StdCall;
+PROCEDURE   cmsCIECAM02Done(hModel : Pointer); StdCall;
+PROCEDURE   cmsCIECAM02Forward(hModel: Pointer; pIn: LPcmsCIEXYZ; pOut: LPcmsJCh ); StdCall;
+PROCEDURE   cmsCIECAM02Reverse(hModel: Pointer; pIn: LPcmsJCh;   pOut: LPcmsCIEXYZ ); StdCall;
+
+// Tone curves -----------------------------------------------------------------------------------------
+
+// This describes a curve segment. For a table of supported types, see the manual. User can increase the number of
+// available types by using a proper plug-in. Parametric segments allow 10 parameters at most
+
+TYPE
+cmsCurveSegment = PACKED RECORD
+       x0, x1: cmsFloat32Number;                       // Domain; for x0 < x <= x1
+         PType: cmsInt32Number;                        // Parametric type, Type == 0 means sampled segment. Negative values are reserved
+       Params: array [0..9] of cmsFloat64Number;       // Parameters if Type != 0
+    nGridPoints: cmsUInt32Number;                      // Number of grid points if Type == 0
+    SampledPoints: LPcmsFloat32Number;                 // Points to an array of floats if Type == 0
+END;
+
+LPcmsToneCurve = Pointer;
+LPcmsCurveSegmentArray = ^cmsCurveSegmentArray;
+cmsCurveSegmentArray = array[0..0] of cmsCurveSegment;
+
+LPcmsFloat64NumberArray = ^cmsFloat64NumberArray;
+cmsFloat64NumberArray = array[0..0] of cmsFloat64Number;
+
+LPcmsUInt16NumberArray = ^cmsUInt16NumberArray;
+cmsUInt16NumberArray = array[0..0] of cmsUInt16Number;
+
+LPcmsFloat32NumberArray = ^cmsFloat32NumberArray;
+cmsFloat32NumberArray = array[0..0] of cmsFloat32Number;
+
+LPLPcmsToneCurveArray = ^LPcmsToneCurveArray;
+LPcmsToneCurveArray = array[0..0] of LPcmsToneCurve;
+
+LPcmsUInt32NumberArray = ^cmsUInt32NumberArray;
+cmsUInt32NumberArray = array[0..0] of cmsUInt32Number;
+
+FUNCTION  cmsBuildSegmentedToneCurve(ContextID: cmsContext; nSegments: cmsInt32Number; Segments: LPcmsCurveSegmentArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildParametricToneCurve(ContextID: cmsContext;  CType: cmsInt32Number; Params: LPcmsFloat64NumberArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildGamma(ContextID: cmsContext; Gamma: cmsFloat64Number): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildTabulatedToneCurve16(ContextID: cmsContext; nEntries: cmsInt32Number; values: LPcmsUInt16NumberArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildTabulatedToneCurveFloat(ContextID: cmsContext; nEntries: cmsUInt32Number; values: LPcmsFloat32NumberArray): LPcmsToneCurve; StdCall;
+PROCEDURE cmsFreeToneCurve(Curve: LPcmsToneCurve); StdCall;
+PROCEDURE cmsFreeToneCurveTriple(Curve: LPLPcmsToneCurveArray); StdCall;
+FUNCTION  cmsDupToneCurve(Src: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsReverseToneCurve(InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsReverseToneCurveEx(nResultSamples: cmsInt32Number; InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsJoinToneCurve(ContextID: cmsContext; X, Y: LPcmsToneCurve; nPoints: cmsUInt32Number ): LPcmsToneCurve; StdCall;
+FUNCTION  cmsSmoothToneCurve(Tab: LPcmsToneCurve; lambda: cmsFloat64Number): cmsBool; StdCall;
+FUNCTION  cmsEvalToneCurveFloat(Curve: LPcmsToneCurve; v: cmsFloat32Number):cmsFloat32Number; StdCall;
+FUNCTION  cmsEvalToneCurve16(Curve: LPcmsToneCurve; v:cmsUInt16Number):cmsUInt16Number; StdCall;
+FUNCTION  cmsIsToneCurveMultisegment(InGamma: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveLinear(Curve: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveMonotonic(t: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveDescending(t: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsGetToneCurveParametricType(t: LPcmsToneCurve):cmsInt32Number; StdCall;
+FUNCTION  cmsEstimateGamma(t: LPcmsToneCurve; Precision:cmsFloat64Number):cmsFloat64Number; StdCall;
+FUNCTION  cmsGetToneCurveEstimatedTableEntries(t: LPcmsToneCurve): cmsUInt32Number; StdCall;
+FUNCTION  cmsGetToneCurveEstimatedTable(t: LPcmsToneCurve): LPcmsUInt16Number; StdCall;
+
+
+// Implements pipelines of multi-processing elements -------------------------------------------------------------
+
+TYPE
+    LPcmsPipeline = Pointer;
+    LPcmsStage    = Pointer;
+    LPLPcmsStage   = ^LPcmsStage;
+
+// Those are hi-level pipelines
+FUNCTION  cmsPipelineAlloc(ContextID: cmsContext; InputChannels, OutputChannels: cmsUInt32Number): LPcmsPipeline; StdCall;
+PROCEDURE cmsPipelineFree(lut: LPcmsPipeline); StdCall;
+FUNCTION  cmsPipelineDup(Orig: LPcmsPipeline): LPcmsPipeline; StdCall;
+FUNCTION  cmsGetPipelineContextID(lut: LPcmsPipeline) : cmsContext; StdCall;
+FUNCTION  cmsPipelineInputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+FUNCTION  cmsPipelineOutputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+
+FUNCTION cmsPipelineStageCount(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+FUNCTION cmsPipelineGetPtrToFirstStage(lut: LPcmsPipeline): LPcmsStage; StdCall;
+FUNCTION cmsPipelineGetPtrToLastStage(lut: LPcmsPipeline): LPcmsStage; StdCall;
+
+PROCEDURE cmsPipelineEval16(Inv, Outv: LPcmsUInt16NumberArray; lut: LPcmsPipeline); StdCall;
+PROCEDURE cmsPipelineEvalFloat(Inv, Outv: LPcmsFloat32NumberArray; lut: LPcmsPipeline); StdCall;
+
+FUNCTION cmsPipelineEvalReverseFloat(Target, Result, Hint: LPcmsFloat32NumberArray; lut: LPcmsPipeline): cmsBool; StdCall;
+FUNCTION cmsPipelineCat(l1, l2: LPcmsPipeline): cmsBool; StdCall;
+FUNCTION cmsPipelineSetSaveAs8bitsFlag(lut: LPcmsPipeline; On: cmsBool): cmsBool; StdCall;
+
+// Where to place/locate the stages in the pipeline chain
+TYPE
+    cmsStageLoc = (cmsAT_BEGIN = 0, cmsAT_END = 1 );
+
+PROCEDURE cmsPipelineInsertStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPcmsStage); StdCall;
+PROCEDURE cmsPipelineUnlinkStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPLPcmsStage); StdCall;
+
+// This function is quite useful to analyze the structure of a Pipeline and retrieve the Stage elements
+// that conform the Pipeline. It should be called with the Pipeline, the number of expected elements and
+// then a list of expected types followed with a list of double pointers to Stage elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything.
+// FUNCTION cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, n: cmsUInt32Number, ...): cmsBool; StdCall;
+
+// Matrix has double precision and CLUT has only float precision. That is because an ICC profile can encode
+// matrices with far more precision that CLUTS
+FUNCTION  cmsStageAllocIdentity(ContextID: cmsContext; nChannels: cmsUInt32Number): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocToneCurves(ContextID: cmsContext; nChannels: cmsUInt32Number; Curves: LPLPcmsToneCurveArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocMatrix(ContextID: cmsContext; Rows, Cols: cmsUInt32Number; Matrix, Offset: LPcmsFloat64NumberArray): LPcmsStage; StdCall;
+
+FUNCTION  cmsStageAllocCLut16bit(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocCLutFloat(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall;
+
+FUNCTION  cmsStageAllocCLut16bitGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocCLutFloatGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall;
+
+
+FUNCTION  cmsStageDup(mpe: LPcmsStage): LPcmsStage; StdCall;
+PROCEDURE cmsStageFree(mpe: LPcmsStage); StdCall;
+FUNCTION  cmsStageNext(mpe: LPcmsStage): LPcmsStage; StdCall;
+
+FUNCTION cmsStageInputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall;
+FUNCTION cmsStageOutputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall;
+FUNCTION cmsStageType(mpe: LPcmsStage): cmsStageSignature; StdCall;
+FUNCTION cmsStageData(mpe: LPcmsStage): Pointer; StdCall;
+
+// Sampling
+
+Type
+    cmsSAMPLER16    = FUNCTION (Inp, Outp: LPcmsUInt16NumberArray; Cargo: Pointer): cmsInt32Number; CDecl;
+    cmsSAMPLERFLOAT = FUNCTION (Inp, Outp: LPcmsFloat32NumberArray; Cargo: Pointer): cmsInt32Number; CDecl;
+
+// Use this flag to prevent changes being written to destination
+
+Const
+
+SAMPLER_INSPECT     = $01000000;
+
+
+// For CLUT only
+FUNCTION cmsStageSampleCLut16bit(mpe: LPcmsStage;  Sampler: cmsSAMPLER16;    Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION cmsStageSampleCLutFloat(mpe: LPcmsStage;  Sampler: cmsSAMPLERFLOAT; Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+
+
+// Slicers
+FUNCTION  cmsSliceSpace16(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLER16; Cargo: Pointer): cmsBool; StdCall;
+
+FUNCTION cmsSliceSpaceFloat(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLERFLOAT; Cargo: Pointer): cmsBool; StdCall;
+
+// Multilocalized Unicode management ---------------------------------------------------------------------------------------
+
+Type
+   LPcmsMLU = Pointer;
+
+Const
+
+cmsNoLanguage = #0#0#0;
+cmsNoCountry  = #0#0#0;
+
+
+FUNCTION  cmsMLUalloc(ContextID: cmsContext; nItems: cmsUInt32Number): LPcmsMLU; StdCall;
+PROCEDURE cmsMLUfree(mlu: LPcmsMLU); StdCall;
+FUNCTION  cmsMLUdup(mlu: LPcmsMLU): LPcmsMLU; StdCall;
+
+FUNCTION  cmsMLUsetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode, ASCIIString: PAnsiChar): cmsBool; StdCall;
+FUNCTION  cmsMLUsetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; WideString: PWChar): cmsBool; StdCall;
+
+FUNCTION cmsMLUgetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsMLUgetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsMLUgetTranslation(mlu: LPcmsMLU; LanguageCode, CountryCode, ObtainedLanguage, ObtainedCountry: PAnsiChar): cmsBool; StdCall;
+
+// Undercolorremoval & black generation -------------------------------------------------------------------------------------
+
+Type
+
+cmsUcrBg = PACKED RECORD
+            Ucr, Bg: LPcmsToneCurve;
+            Desc: LPcmsMLU;
+            END;
+
+
+// Screening ----------------------------------------------------------------------------------------------------------------
+
+Const
+
+ cmsPRINTER_DEFAULT_SCREENS    = $0001;
+ cmsFREQUENCE_UNITS_LINES_CM   = $0000;
+ cmsFREQUENCE_UNITS_LINES_INCH = $0002;
+
+ cmsSPOT_UNKNOWN         = 0;
+ cmsSPOT_PRINTER_DEFAULT = 1;
+ cmsSPOT_ROUND           = 2;
+ cmsSPOT_DIAMOND         = 3;
+ cmsSPOT_ELLIPSE         = 4;
+ cmsSPOT_LINE            = 5;
+ cmsSPOT_SQUARE          = 6;
+ cmsSPOT_CROSS           = 7;
+
+
+Type
+
+cmsScreeningChannel = PACKED RECORD
+
+      Frequency,
+      ScreenAngle: cmsFloat64Number;
+      SpotShape: cmsUInt32Number;
+
+END;
+
+cmsScreening = PACKED RECORD
+
+    Flag,
+    nChannels : cmsUInt32Number;
+    Channels: Array [0..cmsMAXCHANNELS-1] OF cmsScreeningChannel;
+END;
+
+
+// Named color -----------------------------------------------------------------------------------------------------------------
+
+
+LPcmsNAMEDCOLORLIST = Pointer;
+
+FUNCTION cmsAllocNamedColorList(ContextID: cmsContext; n, ColorantCount :cmsUInt32Number;
+                                                           Prefix, Suffix: PAnsiChar): LPcmsNAMEDCOLORLIST; StdCall;
+
+PROCEDURE cmsFreeNamedColorList(v: LPcmsNAMEDCOLORLIST); StdCall;
+FUNCTION  cmsDupNamedColorList(v: LPcmsNAMEDCOLORLIST): LPcmsNAMEDCOLORLIST; StdCall;
+FUNCTION  cmsAppendNamedColor(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar;
+                                                             PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall;
+
+FUNCTION cmsNamedColorCount(v: LPcmsNAMEDCOLORLIST): cmsUInt32Number; StdCall;
+FUNCTION cmsNamedColorIndex(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar): cmsInt32Number; StdCall;
+
+FUNCTION cmsNamedColorInfo(v: LPcmsNAMEDCOLORLIST; nColor : cmsUInt32Number;
+                                                      Name,Prefix, Suffix : PAnsiChar;
+                                                       PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall;
+
+// Retrieve named color list from transform
+FUNCTION cmsGetNamedColorList(xform: cmsHTRANSFORM ): LPcmsNAMEDCOLORLIST; StdCall;
+
+// Profile sequence -----------------------------------------------------------------------------------------------------
+
+Type
+
+// Profile sequence descriptor. Some fields come from profile sequence descriptor tag, others
+// come from Profile Sequence Identifier Tag
+
+cmsPSEQDESC = PACKED RECORD
+   deviceMfg, deviceModel: cmsSignature;
+
+   attributes: cmsUInt64Number;
+   technology: cmsTechnologySignature;
+   ProfileID: cmsProfileID;
+   Manufacturer,
+   Model,
+   Description : LPcmsMLU;
+ END;
+
+ LPcmsSEQDESC = ^cmsPSEQDESC;
+
+cmsSEQ = PACKED RECORD
+
+    n: cmsUInt32Number;
+    ContextID: cmsContext;
+    seq: LPcmsSEQDESC;
+END;
+
+LPcmsSEQ = ^cmsSEQ;
+
+FUNCTION   cmsAllocProfileSequenceDescription(ContextID: cmsContext; n: cmsUInt32Number):LPcmsSEQ; StdCall;
+FUNCTION   cmsDupProfileSequenceDescription(pseq: LPcmsSEQ):LPcmsSEQ; StdCall;
+PROCEDURE  cmsFreeProfileSequenceDescription(pseq: LPcmsSEQ); StdCall;
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+TYPE
+
+ LPcmsDICTentry = ^cmsDICTentry;
+
+cmsDICTentry = PACKED RECORD
+
+    Next: LPcmsDICTentry;
+
+    DisplayName, DisplayValue: LPcmsMLU;
+    Name, Value : PWChar;
+END;
+
+FUNCTION  cmsDictAlloc(ContextID: cmsContext): cmsHANDLE; StdCall;
+PROCEDURE cmsDictFree(hDict: cmsHANDLE);  StdCall;
+FUNCTION  cmsDictDup(hDict: cmsHANDLE): cmsHANDLE;  StdCall;
+
+FUNCTION cmsDictAddEntry(hDict: cmsHANDLE; Name, Value: PWChar; DisplayName, DisplayValue : LPcmsMLU): cmsBool;  StdCall;
+FUNCTION cmsDictGetEntryList(hDict: cmsHANDLE): LPcmsDICTentry; StdCall;
+FUNCTION cmsDictNextEntry(e : LPcmsDICTentry): LPcmsDICTentry;  StdCall;
+
+// Access to Profile data ----------------------------------------------------------------------------------------------
+FUNCTION cmsCreateProfilePlaceholder(ContextID: cmsContext): cmsHPROFILE; StdCall;
+
+FUNCTION cmsGetProfileContextID(hProfile: cmsHPROFILE):cmsContext; StdCall;
+FUNCTION cmsGetTagCount(hProfile: cmsHPROFILE): cmsInt32Number; StdCall;
+FUNCTION cmsGetTagSignature(hProfile: cmsHPROFILE; n: cmsUInt32Number): cmsTagSignature; StdCall;
+FUNCTION cmsIsTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): cmsBool; StdCall;
+
+// Read and write pre-formatted data
+FUNCTION cmsReadTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): Pointer; StdCall;
+FUNCTION cmsWriteTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer): cmsBool; StdCall;
+FUNCTION cmsLinkTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; dest: cmsTagSignature): cmsBool; StdCall;
+FUNCTION cmsTagLinkedTo(hProfile: cmsHPROFILE; sig: cmsTagSignature):cmsTagSignature; StdCall;
+
+// Read and write raw data
+FUNCTION cmsReadRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; Buffer: Pointer; BufferSize: cmsUInt32Number): cmsInt32Number; StdCall;
+FUNCTION cmsWriteRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer; Size: cmsUInt32Number): cmsBool; StdCall;
+
+// Access header data
+Const
+
+   cmsEmbeddedProfileFalse    = $00000000;
+   cmsEmbeddedProfileTrue     = $00000001;
+   cmsUseAnywhere             = $00000000;
+   cmsUseWithEmbeddedDataOnly = $00000002;
+
+FUNCTION  cmsGetHeaderFlags(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsGetHeaderAttributes(hProfile: cmsHPROFILE; Flags: LPcmsUInt64Number); StdCall;
+PROCEDURE cmsGetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall;
+
+// TODO:
+// FUNCTION  cmsGetHeaderCreationDateTime(hProfile: cmsHPROFILE; struct tm *Dest): cmsBool; StdCall;
+
+FUNCTION  cmsGetHeaderRenderingIntent(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderFlags(hProfile: cmsHPROFILE; Flags: cmsUInt32Number); StdCall;
+FUNCTION  cmsGetHeaderManufacturer(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderManufacturer(hProfile: cmsHPROFILE; manufacturer: cmsUInt32Number ); StdCall;
+FUNCTION  cmsGetHeaderModel(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderModel(hProfile: cmsHPROFILE; model: cmsUInt32Number ); StdCall;
+PROCEDURE cmsSetHeaderAttributes(hProfile: cmsHPROFILE; Flags: cmsUInt64Number); StdCall;
+PROCEDURE cmsSetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall;
+PROCEDURE cmsSetHeaderRenderingIntent(hProfile: cmsHPROFILE; RenderingIntent: cmsUInt32Number ); StdCall;
+
+FUNCTION  cmsGetPCS(hProfile: cmsHPROFILE):cmsColorSpaceSignature; StdCall;
+PROCEDURE cmsSetPCS(hProfile: cmsHPROFILE; pcs: cmsColorSpaceSignature); StdCall;
+FUNCTION  cmsGetColorSpace(hProfile: cmsHPROFILE): cmsColorSpaceSignature; StdCall;
+PROCEDURE cmsSetColorSpace(hProfile: cmsHPROFILE; sig: cmsColorSpaceSignature); StdCall;
+FUNCTION  cmsGetDeviceClass(hProfile: cmsHPROFILE): cmsProfileClassSignature; StdCall;
+PROCEDURE cmsSetDeviceClass(hProfile: cmsHPROFILE; sig: cmsProfileClassSignature); StdCall;
+PROCEDURE cmsSetProfileVersion(hProfile: cmsHPROFILE; Version: cmsFloat64Number); StdCall;
+FUNCTION  cmsGetProfileVersion(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall;
+
+FUNCTION  cmsGetEncodedICCversion(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetEncodedICCversion(hProfile: cmsHPROFILE; Version: cmsUInt32Number); StdCall;
+
+
+Const
+
+    // How profiles may be used
+    LCMS_USED_AS_INPUT     = 0;
+    LCMS_USED_AS_OUTPUT    = 1;
+    LCMS_USED_AS_PROOF     = 2;
+
+FUNCTION   cmsIsIntentSupported(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION   cmsIsMatrixShaper(hProfile: cmsHPROFILE): cmsBool; StdCall;
+FUNCTION   cmsIsCLUT(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall;
+
+// Translate form/to our notation to ICC
+FUNCTION _cmsICCcolorSpace(OurNotation: Integer): cmsColorSpaceSignature; StdCall;
+FUNCTION _cmsLCMScolorSpace(ProfileSpace: cmsColorSpaceSignature): Integer; StdCall;
+
+FUNCTION cmsChannelsOf( ColorSpace: cmsColorSpaceSignature): cmsUInt32Number; StdCall;
+
+// Build a suitable formatter for the colorspace of this profile
+FUNCTION cmsFormatterForColorspaceOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall;
+FUNCTION cmsFormatterForPCSOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall;
+
+Type
+
+// Localized info
+cmsInfoType = (
+             cmsInfoDescription  = 0,
+             cmsInfoManufacturer = 1,
+             cmsInfoModel        = 2,
+             cmsInfoCopyright    = 3
+);
+
+FUNCTION cmsGetProfileInfo(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsGetProfileInfoASCII(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+// IO handlers ----------------------------------------------------------------------------------------------------------
+
+Type
+
+LPcmsIOHANDLER = Pointer;
+
+FUNCTION cmsOpenIOhandlerFromFile(ContextID: cmsContext; FileName, AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall;
+// FUNCTION cmsOpenIOhandlerFromStream(ContextID: cmsContext; FILE* Stream): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsOpenIOhandlerFromMem(ContextID: cmsContext; Buffer: Pointer; size: cmsUInt32Number; AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsOpenIOhandlerFromNULL(ContextID: cmsContext): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsCloseIOhandler(io: LPcmsIOHANDLER): cmsBool; StdCall;
+
+// MD5 message digest --------------------------------------------------------------------------------------------------
+
+FUNCTION cmsMD5computeID(hProfile: cmsHPROFILE): cmsBool; StdCall;
+
+// Profile high level functions ------------------------------------------------------------------------------------------
+
+FUNCTION   cmsOpenProfileFromFile(ICCProfile : PAnsiChar; sAccess: PAnsiChar): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromFileTHR(ContextID: cmsContext; ICCProfile, sAccess: PAnsiChar): cmsHPROFILE; StdCall;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStreamTHR(ContextID: cmsContext; FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromMem(MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromMemTHR(ContextID: cmsContext; MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromIOhandlerTHR(ContextID: cmsContext; io: LPcmsIOHANDLER): cmsHPROFILE; StdCall;
+FUNCTION   cmsCloseProfile(hProfile: cmsHPROFILE): cmsBool; StdCall;
+
+FUNCTION   cmsSaveProfileToFile(hProfile: cmsHPROFILE; FileName: PAnsiChar): cmsBool; StdCall;
+// FUNCTION         CMSEXPORT cmsSaveProfileToStream(hProfile: cmsHPROFILE, FILE* Stream): cmsBool; StdCall;
+FUNCTION   cmsSaveProfileToMem(hProfile: cmsHPROFILE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall;
+FUNCTION   cmsSaveProfileToIOhandler(hProfile: cmsHPROFILE; io: LPcmsIOHANDLER):cmsUInt32Number; StdCall;
+
+// Predefined virtual profiles ------------------------------------------------------------------------------------------
+
+FUNCTION  cmsCreateRGBProfileTHR(ContextID: cmsContext;
+                                                   WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION  cmsCreateRGBProfile(WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateGrayProfileTHR(ContextID: cmsContext;
+                                                    WhitePoint: LPcmsCIExyY;
+                                                    TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateGrayProfile(WhitePoint: LPcmsCIExyY;
+                                                     TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateLinearizationDeviceLinkTHR(ContextID: cmsContext;
+                                                                 ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateLinearizationDeviceLink(ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateInkLimitingDeviceLinkTHR(ContextID: cmsContext;
+                                                              ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateInkLimitingDeviceLink(ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall;
+
+
+FUNCTION cmsCreateLab2ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab2Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab4ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab4Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateXYZProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateXYZProfile: cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreate_sRGBProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION cmsCreate_sRGBProfile: cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateBCHSWabstractProfileTHR(ContextID: cmsContext;
+                                                             nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateBCHSWabstractProfile(   nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall;
+
+FUNCTION  cmsCreateNULLProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION  cmsCreateNULLProfile: cmsHPROFILE; StdCall;
+
+// Converts a transform to a devicelink profile
+FUNCTION  cmsTransform2DeviceLink(hTransform: cmsHTRANSFORM; Version: cmsFloat64Number; dwFlags: cmsUInt32Number): cmsHPROFILE; StdCall;
+
+// Intents ----------------------------------------------------------------------------------------------
+
+Const
+
+// ICC Intents
+INTENT_PERCEPTUAL                              = 0;
+INTENT_RELATIVE_COLORIMETRIC                   = 1;
+INTENT_SATURATION                              = 2;
+INTENT_ABSOLUTE_COLORIMETRIC                   = 3;
+
+// Non-ICC intents
+INTENT_PRESERVE_K_ONLY_PERCEPTUAL             = 10;
+INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC  = 11;
+INTENT_PRESERVE_K_ONLY_SATURATION             = 12;
+INTENT_PRESERVE_K_PLANE_PERCEPTUAL            = 13;
+INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC = 14;
+INTENT_PRESERVE_K_PLANE_SATURATION            = 15;
+
+Type
+LPPAnsiChar = ^PAnsiChar;
+
+// Call with NULL as parameters to get the intent count
+FUNCTION cmsGetSupportedIntents(nMax: cmsUInt32Number; Codes: LPcmsUInt32Number; Descriptions: LPPAnsiChar): cmsUInt32Number; StdCall;
+
+Const
+
+// Flags
+
+cmsFLAGS_NOCACHE                  = $0040;    // Inhibit 1-pixel cache
+cmsFLAGS_NOOPTIMIZE               = $0100;    // Inhibit optimizations
+cmsFLAGS_NULLTRANSFORM            = $0200;    // Don't transform anyway
+
+// Proofing flags
+cmsFLAGS_GAMUTCHECK               = $1000;    // Out of Gamut alarm
+cmsFLAGS_SOFTPROOFING             = $4000;    // Do softproofing
+
+// Misc
+cmsFLAGS_BLACKPOINTCOMPENSATION   = $2000;
+cmsFLAGS_NOWHITEONWHITEFIXUP      = $0004;    // Don't fix scum dot
+cmsFLAGS_HIGHRESPRECALC           = $0400;    // Use more memory to give better accuracy
+cmsFLAGS_LOWRESPRECALC            = $0800;    // Use less memory to minimize resouces
+
+// For devicelink creation
+cmsFLAGS_8BITS_DEVICELINK         = $0008;   // Create 8 bits devicelinks
+cmsFLAGS_GUESSDEVICECLASS         = $0020;   // Guess device class (for transform2devicelink)
+cmsFLAGS_KEEP_SEQUENCE            = $0080;   // Keep profile sequence for devicelink creation
+
+// Specific to a particular optimizations
+cmsFLAGS_FORCE_CLUT               = $0002;    // Force CLUT optimization
+cmsFLAGS_CLUT_POST_LINEARIZATION  = $0001;    // create postlinearization tables if possible
+cmsFLAGS_CLUT_PRE_LINEARIZATION   = $0010;    // create prelinearization tables if possible
+
+// CRD special
+cmsFLAGS_NODEFAULTRESOURCEDEF     = $01000000;
+
+// Fine-tune control over number of gridpoints
+FUNCTION cmsFLAGS_GRIDPOINTS(n: Integer): Integer;
+
+
+// Transforms ---------------------------------------------------------------------------------------------------
+
+type
+  LPcmsHPROFILEArray = ^cmsHPROFILEArray;
+  cmsHPROFILEArray = array[0..0] of cmsHPROFILE;
+
+  LPcmsBoolArray = ^cmsBoolArray;
+  cmsBoolArray = array[0..0] of cmsBool;
+
+FUNCTION   cmsCreateTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateProofingTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateProofingTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateMultiprofileTransformTHR(ContextID: cmsContext;
+                                                  hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+
+FUNCTION   cmsCreateMultiprofileTransform( hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+
+FUNCTION   cmsCreateExtendedTransform(ContextID: cmsContext;
+                                                   nProfiles: cmsUInt32Number;
+                                                   hProfiles: LPcmsHPROFILEArray;
+                                                   BPC: LPcmsBoolArray;
+                                                   Intents: LPcmsUInt32NumberArray;
+                                                   AdaptationStates: LPcmsFloat64NumberArray;
+                                                   hGamutProfile: cmsHPROFILE;
+                                                   nGamutPCSposition: cmsUInt32Number;
+                                                   InputFormat,
+                                                   OutputFormat: cmsUInt32Number;
+                                                   dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+PROCEDURE  cmsDeleteTransform(hTransform: cmsHTRANSFORM); StdCall;
+
+PROCEDURE  cmsDoTransform(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number);  StdCall;
+PROCEDURE  cmsDoTransformStride(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number; stride: cmsUInt32Number);  StdCall;
+
+
+PROCEDURE  cmsSetAlarmCodes( NewAlarm: LPcmsUInt16NumberArray);  StdCall;
+PROCEDURE  cmsGetAlarmCodes(NewAlarm: LPcmsUInt16NumberArray); StdCall;
+
+// Adaptation state for absolute colorimetric intent
+FUNCTION  cmsSetAdaptationState(d: cmsFloat64Number):cmsFloat64Number; StdCall;
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+FUNCTION  cmsGetTransformContextID(hTransform: cmsHTRANSFORM):cmsContext; StdCall;
+
+// For backwards compatibility
+FUNCTION  cmsChangeBuffersFormat(hTransform: cmsHTRANSFORM; InputFormat, OutputFormat: cmsUInt32Number): cmsBool; StdCall;
+
+
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray ----------------------------------------------------
+
+Type
+
+cmsPSResourceType = (cmsPS_RESOURCE_CSA, cmsPS_RESOURCE_CRD ) ;
+
+// lcms2 unified method to access postscript color resources
+FUNCTION cmsGetPostScriptColorResource(ContextID: cmsContext;   RType: cmsPSResourceType;
+                                                                hProfile: cmsHPROFILE;
+                                                                Intent: cmsUInt32Number;
+                                                                dwFlags: cmsUInt32Number;
+                                                                io: LPcmsIOHANDLER): cmsUInt32Number; StdCall;
+
+FUNCTION cmsGetPostScriptCSA(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number ): cmsUInt32Number; StdCall;
+FUNCTION cmsGetPostScriptCRD(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+
+// IT8.7 / CGATS.17-20$ handling -----------------------------------------------------------------------------
+
+
+// CGATS.13 parser
+
+FUNCTION  cmsIT8Alloc: cmsHANDLE; StdCall;
+PROCEDURE cmsIT8Free(hIT8: cmsHANDLE); StdCall;
+
+// Tables
+
+FUNCTION  cmsIT8TableCount(hIT8: cmsHANDLE): Integer; StdCall;
+FUNCTION  cmsIT8SetTable(hIT8: cmsHANDLE; nTable: Integer): Integer; StdCall;
+
+// Persistence
+FUNCTION  cmsIT8LoadFromFile(cFileName: PAnsiChar): cmsHANDLE; StdCall;
+FUNCTION  cmsIT8LoadFromMem(Ptr: Pointer; size :DWord): cmsHANDLE; StdCall;
+
+FUNCTION cmsIT8SaveToFile(hIT8: cmsHANDLE; cFileName: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SaveToMem(hIT8: cmsHANDLE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall;
+// Properties
+
+FUNCTION cmsIT8GetSheetType(hIT8: cmsHANDLE): PAnsiChar; StdCall;
+FUNCTION cmsIT8SetSheetType(hIT8: cmsHANDLE; TheType: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetComment(hIT8: cmsHANDLE; cComment: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetPropertyStr(hIT8: cmsHANDLE; cProp, Str: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Double): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyHex(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Integer): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyUncooked(hIT8: cmsHANDLE; Key, Buffer: PAnsiChar): cmsBool; StdCall;
+
+
+FUNCTION cmsIT8GetProperty(hIT8: cmsHANDLE; cProp: PAnsiChar): PAnsiChar; StdCall;
+FUNCTION cmsIT8GetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar): Double; StdCall;
+FUNCTION cmsIT8EnumProperties(hIT8: cmsHANDLE; var PropertyNames: LPPAnsiChar): Integer; StdCall;
+
+// Datasets
+
+FUNCTION cmsIT8GetDataRowCol(hIT8: cmsHANDLE; row, col: Integer): PAnsiChar; StdCall;
+FUNCTION cmsIT8GetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer): Double; StdCall;
+
+FUNCTION cmsIT8SetDataRowCol(hIT8: cmsHANDLE; row, col: Integer; Val: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer; Val: Double): cmsBool; StdCall;
+
+FUNCTION cmsIT8GetData(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar): PAnsiChar; StdCall;
+
+FUNCTION cmsIT8GetDataDbl(hIT8: cmsHANDLE;cPatch, cSample: PAnsiChar): Double; StdCall;
+
+FUNCTION cmsIT8SetData(hIT8: cmsHANDLE; cPatch, cSample, Val: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetDataDbl(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar; Val: Double): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetDataFormat(hIT8: cmsHANDLE; n: Integer; Sample: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8EnumDataFormat(hIT8: cmsHANDLE; var SampleNames: LPPAnsiChar): Integer; StdCall;
+FUNCTION cmsIT8GetPatchName(hIT8: cmsHANDLE; nPatch: Integer; Buffer: PAnsiChar): PAnsiChar; StdCall;
+
+// The LABEL extension
+FUNCTION cmsIT8SetTableByLabel(hIT8: cmsHANDLE; cSet, cField, ExpectedType: PAnsiChar): Integer; StdCall;
+
+FUNCTION cmsIT8FindDataFormat(hIT8: cmsHANDLE; cSample: PAnsiChar): Integer; StdCall;
+
+// Formatter for double
+PROCEDURE  cmsIT8DefineDblFormat(hIT8: cmsHANDLE; Formatter: PAnsiChar);  StdCall;
+
+// Gamut boundary description routines ------------------------------------------------------------------------------
+
+FUNCTION  cmsGBDAlloc(ContextID: cmsContext):cmsHANDLE; StdCall;
+PROCEDURE cmsGBDFree(hGBD: cmsHANDLE); StdCall;
+FUNCTION  cmsGDBAddPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall;
+FUNCTION  cmsGDBCompute(hGDB: cmsHANDLE; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION  cmsGDBCheckPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall;
+
+// Feature detection  ----------------------------------------------------------------------------------------------
+
+// Estimate the black point
+FUNCTION cmsDetectBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION cmsDetectDestinationBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+
+
+// Estimate total area coverage
+FUNCTION cmsDetectTAC(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall;
+
+
+// Poor man's gamut mapping
+FUNCTION  cmsDesaturateLab(Lab: LPcmsCIELab; amax, amin, bmax, bmin: cmsFloat64Number): cmsBool; StdCall;
+
+
+IMPLEMENTATION
+
+
+
+    FUNCTION FLOAT_SH(a: cmsUInt32Number): cmsUInt32Number;        begin  FLOAT_SH :=       ((a)  shl  22) end;
+    FUNCTION OPTIMIZED_SH(s: cmsUInt32Number): cmsUInt32Number;    begin  OPTIMIZED_SH :=   ((s)  shl  21) end;
+    FUNCTION COLORSPACE_SH(s: cmsUInt32Number):cmsUInt32Number;    begin  COLORSPACE_SH :=  ((s)  shl  16) end;
+    FUNCTION SWAPFIRST_SH(s: cmsUInt32Number):cmsUInt32Number;     begin  SWAPFIRST_SH :=   ((s)  shl  14) end;
+    FUNCTION FLAVOR_SH(s: cmsUInt32Number):cmsUInt32Number;        begin  FLAVOR_SH :=      ((s)  shl  13) end;
+    FUNCTION PLANAR_SH(p: cmsUInt32Number):cmsUInt32Number;        begin  PLANAR_SH :=      ((p)  shl  12) end;
+    FUNCTION ENDIAN16_SH(e: cmsUInt32Number):cmsUInt32Number;      begin  ENDIAN16_SH :=    ((e)  shl  11) end;
+    FUNCTION DOSWAP_SH(e: cmsUInt32Number):cmsUInt32Number;        begin  DOSWAP_SH :=      ((e)  shl  10) end;
+    FUNCTION EXTRA_SH(e: cmsUInt32Number):cmsUInt32Number;         begin  EXTRA_SH :=       ((e)  shl  7) end;
+    FUNCTION CHANNELS_SH(c: cmsUInt32Number):cmsUInt32Number;      begin  CHANNELS_SH :=    ((c)  shl  3) end;
+    FUNCTION BYTES_SH(b: cmsUInt32Number):cmsUInt32Number;         begin  BYTES_SH :=       (b) end;
+
+
+    FUNCTION T_FLOAT(a: cmsUInt32Number): cmsUInt32Number;          begin  T_FLOAT :=        (((a) shr 22) and 1) end;
+    FUNCTION T_OPTIMIZED(o: cmsUInt32Number): cmsUInt32Number;      begin  T_OPTIMIZED :=    (((o) shr 21) and 1) end;
+    FUNCTION T_COLORSPACE(s: cmsUInt32Number): cmsUInt32Number;     begin  T_COLORSPACE :=   (((s) shr 16) and 31) end;
+    FUNCTION T_SWAPFIRST(s: cmsUInt32Number): cmsUInt32Number;      begin  T_SWAPFIRST :=    (((s) shr 14) and 1) end;
+    FUNCTION T_FLAVOR(s: cmsUInt32Number): cmsUInt32Number;         begin  T_FLAVOR :=       (((s) shr 13) and 1) end;
+    FUNCTION T_PLANAR(p: cmsUInt32Number): cmsUInt32Number;         begin  T_PLANAR :=       (((p) shr 12) and 1) end;
+    FUNCTION T_ENDIAN16(e: cmsUInt32Number): cmsUInt32Number;       begin  T_ENDIAN16 :=     (((e) shr 11) and 1) end;
+    FUNCTION T_DOSWAP(e: cmsUInt32Number): cmsUInt32Number;         begin  T_DOSWAP :=       (((e) shr 10) and 1) end;
+    FUNCTION T_EXTRA(e: cmsUInt32Number): cmsUInt32Number;          begin  T_EXTRA :=        (((e) shr 7) and 7) end;
+    FUNCTION T_CHANNELS(c: cmsUInt32Number): cmsUInt32Number;       begin  T_CHANNELS :=     (((c) shr 3) and 15) end;
+    FUNCTION T_BYTES(b: cmsUInt32Number): cmsUInt32Number;          begin  T_BYTES :=        ((b) and 7) end;
+
+
+
+//
+
+FUNCTION  cmsCreateContext(Plugin : Pointer; UserData : Pointer) : cmsContext; StdCall; external LCMS2_SO;
+PROCEDURE cmsDeleteContext(ContextID: cmsContext); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupContext(ContextID: cmsContext; NewUserData: Pointer): cmsContext; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetContextUserData(ContextID: cmsContext): Pointer;  StdCall; external LCMS2_SO;
+
+FUNCTION  cmsPlugin(Plugin: Pointer): cmsBool; StdCall; external LCMS2_SO;
+PROCEDURE cmsUnregisterPlugins; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetLogErrorHandler(Fn: cmsLogErrorHandlerFunction); StdCall; external LCMS2_SO;
+FUNCTION cmsD50_XYZ: LPcmsCIEXYZ; StdCall; external LCMS2_SO;
+FUNCTION cmsD50_xyY: LPcmsCIExyY; StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZ2xyY(Dest: LPcmsCIExyY; Source: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+PROCEDURE cmsxyY2XYZ(Dest: LPcmsCIEXYZ; Source: LPcmsCIExyY); StdCall; external LCMS2_SO;
+PROCEDURE cmsLab2XYZ(WhitePoint: LPcmsCIEXYZ; xyz: LPcmsCIEXYZ; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZ2Lab(WhitePoint: LPcmsCIEXYZ; Lab: LPcmsCIELab; xyz: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+PROCEDURE cmsLab2LCh(LCh: LPcmsCIELCh; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsLCh2Lab(Lab: LPcmsCIELab; LCh: LPcmsCIELCh); StdCall; external LCMS2_SO;
+PROCEDURE cmsLabEncoded2Float(Lab: LPcmsCIELab; wLab: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsLabEncoded2FloatV2(Lab: LPcmsCIELab; wLab: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2LabEncoded(wLab: Pointer; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2LabEncodedV2(wLab: Pointer; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZEncoded2Float(fxyz : LPcmsCIEXYZ; XYZ: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2XYZEncoded(XYZ: Pointer; fXYZ: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+FUNCTION cmsDeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCIE94DeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsBFDdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCMCdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCIE2000DeltaE(Lab1, Lab2: LPcmsCIELab; Kl, Kc, Kh: Double): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsWhitePointFromTemp(var WhitePoint: cmsCIExyY; TempK: cmsFloat64Number) : cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsTempFromWhitePoint(var TeampK: cmsFloat64Number; var WhitePoint: cmsCIExyY) : cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsAdaptToIlluminant(Result: LPcmsCIEXYZ; SourceWhitePt: LPcmsCIEXYZ;
+                              Illuminant: LPcmsCIEXYZ; Value: LPcmsCIEXYZ): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsCIECAM02Init(pVC : LPcmsViewingConditions ) : Pointer; StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Done(hModel : Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Forward(hModel: Pointer; pIn: LPcmsCIEXYZ; pOut: LPcmsJCh ); StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Reverse(hModel: Pointer; pIn: LPcmsJCh;   pOut: LPcmsCIEXYZ ); StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildSegmentedToneCurve(ContextID: cmsContext; nSegments: cmsInt32Number; Segments: LPcmsCurveSegmentArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildParametricToneCurve(ContextID: cmsContext;  CType: cmsInt32Number; Params: LPcmsFloat64NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildGamma(ContextID: cmsContext; Gamma: cmsFloat64Number): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildTabulatedToneCurve16(ContextID: cmsContext; nEntries: cmsInt32Number; values: LPcmsUInt16NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildTabulatedToneCurveFloat(ContextID: cmsContext; nEntries: cmsUInt32Number; values: LPcmsFloat32NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+PROCEDURE cmsFreeToneCurve(Curve: LPcmsToneCurve); StdCall; external LCMS2_SO;
+PROCEDURE cmsFreeToneCurveTriple(Curve: LPLPcmsToneCurveArray); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupToneCurve(Src: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsReverseToneCurve(InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsReverseToneCurveEx(nResultSamples: cmsInt32Number; InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsJoinToneCurve(ContextID: cmsContext; X, Y: LPcmsToneCurve; nPoints: cmsUInt32Number ): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsSmoothToneCurve(Tab: LPcmsToneCurve; lambda: cmsFloat64Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsEvalToneCurveFloat(Curve: LPcmsToneCurve; v: cmsFloat32Number):cmsFloat32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsEvalToneCurve16(Curve: LPcmsToneCurve; v:cmsUInt16Number):cmsUInt16Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveMultisegment(InGamma: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveLinear(Curve: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveMonotonic(t: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveDescending(t: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveParametricType(t: LPcmsToneCurve):cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsEstimateGamma(t: LPcmsToneCurve; Precision:cmsFloat64Number):cmsFloat64Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveEstimatedTableEntries(t: LPcmsToneCurve): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveEstimatedTable(t: LPcmsToneCurve): LPcmsUInt16Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineAlloc(ContextID: cmsContext; InputChannels, OutputChannels: cmsUInt32Number): LPcmsPipeline; StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineFree(lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineDup(Orig: LPcmsPipeline): LPcmsPipeline; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetPipelineContextID(lut: LPcmsPipeline) : cmsContext; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineInputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineOutputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineStageCount(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineGetPtrToFirstStage(lut: LPcmsPipeline): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineGetPtrToLastStage(lut: LPcmsPipeline): LPcmsStage; StdCall; external LCMS2_SO;
+
+PROCEDURE cmsPipelineEval16(Inv, Outv: LPcmsUInt16NumberArray; lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineEvalFloat(Inv, Outv: LPcmsFloat32NumberArray; lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+
+FUNCTION cmsPipelineEvalReverseFloat(Target, Result, Hint: LPcmsFloat32NumberArray; lut: LPcmsPipeline): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineCat(l1, l2: LPcmsPipeline): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineSetSaveAs8bitsFlag(lut: LPcmsPipeline; On: cmsBool): cmsBool; StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineInsertStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPcmsStage); StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineUnlinkStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPLPcmsStage); StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocIdentity(ContextID: cmsContext; nChannels: cmsUInt32Number): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocToneCurves(ContextID: cmsContext; nChannels: cmsUInt32Number; Curves: LPLPcmsToneCurveArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocMatrix(ContextID: cmsContext; Rows, Cols: cmsUInt32Number; Matrix, Offset: LPcmsFloat64NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLut16bit(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLutFloat(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLut16bitGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLutFloatGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageDup(mpe: LPcmsStage): LPcmsStage; StdCall; external LCMS2_SO;
+PROCEDURE cmsStageFree(mpe: LPcmsStage); StdCall; external LCMS2_SO;
+FUNCTION  cmsStageNext(mpe: LPcmsStage): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION cmsStageInputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsStageOutputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsStageType(mpe: LPcmsStage): cmsStageSignature; StdCall; external LCMS2_SO;
+FUNCTION cmsStageData(mpe: LPcmsStage): Pointer; StdCall; external LCMS2_SO;
+FUNCTION cmsStageSampleCLut16bit(mpe: LPcmsStage;  Sampler: cmsSAMPLER16;    Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsStageSampleCLutFloat(mpe: LPcmsStage;  Sampler: cmsSAMPLERFLOAT; Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsSliceSpace16(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLER16; Cargo: Pointer): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsSliceSpaceFloat(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLERFLOAT; Cargo: Pointer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUalloc(ContextID: cmsContext; nItems: cmsUInt32Number): LPcmsMLU; StdCall; external LCMS2_SO;
+PROCEDURE cmsMLUfree(mlu: LPcmsMLU); StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUdup(mlu: LPcmsMLU): LPcmsMLU; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsMLUsetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode, ASCIIString: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUsetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; WideString: PWChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetTranslation(mlu: LPcmsMLU; LanguageCode, CountryCode, ObtainedLanguage, ObtainedCountry: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsAllocNamedColorList(ContextID: cmsContext; n, ColorantCount :cmsUInt32Number;
+                                                           Prefix, Suffix: PAnsiChar): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+
+PROCEDURE cmsFreeNamedColorList(v: LPcmsNAMEDCOLORLIST); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupNamedColorList(v: LPcmsNAMEDCOLORLIST): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+FUNCTION  cmsAppendNamedColor(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar;
+                                                             PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsNamedColorCount(v: LPcmsNAMEDCOLORLIST): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsNamedColorIndex(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar): cmsInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsNamedColorInfo(v: LPcmsNAMEDCOLORLIST; nColor : cmsUInt32Number;
+                                                      Name,Prefix, Suffix : PAnsiChar;
+                                                       PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetNamedColorList(xform: cmsHTRANSFORM ): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsAllocProfileSequenceDescription(ContextID: cmsContext; n: cmsUInt32Number):LPcmsSEQ; StdCall; external LCMS2_SO;
+FUNCTION   cmsDupProfileSequenceDescription(pseq: LPcmsSEQ):LPcmsSEQ; StdCall; external LCMS2_SO;
+PROCEDURE  cmsFreeProfileSequenceDescription(pseq: LPcmsSEQ); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsDictAlloc(ContextID: cmsContext): cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsDictFree(hDict: cmsHANDLE);  StdCall; external LCMS2_SO;
+FUNCTION  cmsDictDup(hDict: cmsHANDLE): cmsHANDLE;  StdCall; external LCMS2_SO;
+
+FUNCTION cmsDictAddEntry(hDict: cmsHANDLE; Name, Value: PWChar; DisplayName, DisplayValue : LPcmsMLU): cmsBool;  StdCall; external LCMS2_SO;
+FUNCTION cmsDictGetEntryList(hDict: cmsHANDLE): LPcmsDICTentry; StdCall; external LCMS2_SO;
+FUNCTION cmsDictNextEntry(e : LPcmsDICTentry): LPcmsDICTentry;  StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateProfilePlaceholder(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetProfileContextID(hProfile: cmsHPROFILE):cmsContext; StdCall; external LCMS2_SO;
+FUNCTION cmsGetTagCount(hProfile: cmsHPROFILE): cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsGetTagSignature(hProfile: cmsHPROFILE; n: cmsUInt32Number): cmsTagSignature; StdCall; external LCMS2_SO;
+FUNCTION cmsIsTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsReadTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): Pointer; StdCall; external LCMS2_SO;
+FUNCTION cmsWriteTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsLinkTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; dest: cmsTagSignature): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsTagLinkedTo(hProfile: cmsHPROFILE; sig: cmsTagSignature):cmsTagSignature; StdCall; external LCMS2_SO;
+
+FUNCTION cmsReadRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; Buffer: Pointer; BufferSize: cmsUInt32Number): cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsWriteRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer; Size: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetHeaderFlags(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsGetHeaderAttributes(hProfile: cmsHPROFILE; Flags: LPcmsUInt64Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsGetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetHeaderRenderingIntent(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderFlags(hProfile: cmsHPROFILE; Flags: cmsUInt32Number); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetHeaderManufacturer(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderManufacturer(hProfile: cmsHPROFILE; manufacturer: cmsUInt32Number ); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetHeaderModel(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderModel(hProfile: cmsHPROFILE; model: cmsUInt32Number ); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderAttributes(hProfile: cmsHPROFILE; Flags: cmsUInt64Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderRenderingIntent(hProfile: cmsHPROFILE; RenderingIntent: cmsUInt32Number ); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetPCS(hProfile: cmsHPROFILE):cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetPCS(hProfile: cmsHPROFILE; pcs: cmsColorSpaceSignature); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetColorSpace(hProfile: cmsHPROFILE): cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetColorSpace(hProfile: cmsHPROFILE; sig: cmsColorSpaceSignature); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetDeviceClass(hProfile: cmsHPROFILE): cmsProfileClassSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetDeviceClass(hProfile: cmsHPROFILE; sig: cmsProfileClassSignature); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetProfileVersion(hProfile: cmsHPROFILE; Version: cmsFloat64Number); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetProfileVersion(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetEncodedICCversion(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetEncodedICCversion(hProfile: cmsHPROFILE; Version: cmsUInt32Number); StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsIsIntentSupported(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsIsMatrixShaper(hProfile: cmsHPROFILE): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsIsCLUT(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION _cmsICCcolorSpace(OurNotation: Integer): cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+FUNCTION _cmsLCMScolorSpace(ProfileSpace: cmsColorSpaceSignature): Integer; StdCall; external LCMS2_SO;
+
+FUNCTION cmsChannelsOf( ColorSpace: cmsColorSpaceSignature): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsFormatterForColorspaceOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsFormatterForPCSOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsGetProfileInfo(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetProfileInfoASCII(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsOpenIOhandlerFromFile(ContextID: cmsContext; FileName, AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+// FUNCTION cmsOpenIOhandlerFromStream(ContextID: cmsContext; FILE* Stream): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsOpenIOhandlerFromMem(ContextID: cmsContext; Buffer: Pointer; size: cmsUInt32Number; AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsOpenIOhandlerFromNULL(ContextID: cmsContext): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsCloseIOhandler(io: LPcmsIOHANDLER): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMD5computeID(hProfile: cmsHPROFILE): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsOpenProfileFromFile(ICCProfile : PAnsiChar; sAccess: PAnsiChar): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromFileTHR(ContextID: cmsContext; ICCProfile, sAccess: PAnsiChar): cmsHPROFILE; StdCall; external LCMS2_SO;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall; external LCMS2_SO;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStreamTHR(ContextID: cmsContext; FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromMem(MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromMemTHR(ContextID: cmsContext; MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromIOhandlerTHR(ContextID: cmsContext; io: LPcmsIOHANDLER): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsCloseProfile(hProfile: cmsHPROFILE): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsSaveProfileToFile(hProfile: cmsHPROFILE; FileName: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+// FUNCTION         CMSEXPORT cmsSaveProfileToStream(hProfile: cmsHPROFILE, FILE* Stream): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsSaveProfileToMem(hProfile: cmsHPROFILE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsSaveProfileToIOhandler(hProfile: cmsHPROFILE; io: LPcmsIOHANDLER):cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateRGBProfileTHR(ContextID: cmsContext;
+                                                   WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateRGBProfile(WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateGrayProfileTHR(ContextID: cmsContext;
+                                                    WhitePoint: LPcmsCIExyY;
+                                                    TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateGrayProfile(WhitePoint: LPcmsCIExyY;
+                                                     TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateLinearizationDeviceLinkTHR(ContextID: cmsContext;
+                                                                 ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateLinearizationDeviceLink(ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateInkLimitingDeviceLinkTHR(ContextID: cmsContext;
+                                                              ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateInkLimitingDeviceLink(ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsCreateLab2ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab2Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab4ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab4Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateXYZProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateXYZProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreate_sRGBProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreate_sRGBProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateBCHSWabstractProfileTHR(ContextID: cmsContext;
+                                                             nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateBCHSWabstractProfile(   nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateNULLProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION  cmsCreateNULLProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+// Converts a transform to a devicelink profile
+FUNCTION  cmsTransform2DeviceLink(hTransform: cmsHTRANSFORM; Version: cmsFloat64Number; dwFlags: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+// Call with NULL as parameters to get the intent count
+FUNCTION cmsGetSupportedIntents(nMax: cmsUInt32Number; Codes: LPcmsUInt32Number; Descriptions: LPPAnsiChar): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsFLAGS_GRIDPOINTS(n: Integer): Integer; begin cmsFLAGS_GRIDPOINTS :=  (((n) and $FF) shl 16) end;
+
+
+FUNCTION   cmsCreateTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateProofingTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateProofingTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateMultiprofileTransformTHR(ContextID: cmsContext;
+                                                  hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsCreateMultiprofileTransform( hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsCreateExtendedTransform(ContextID: cmsContext;
+                                                   nProfiles: cmsUInt32Number;
+                                                   hProfiles: LPcmsHPROFILEArray;
+                                                   BPC: LPcmsBoolArray;
+                                                   Intents: LPcmsUInt32NumberArray;
+                                                   AdaptationStates: LPcmsFloat64NumberArray;
+                                                   hGamutProfile: cmsHPROFILE;
+                                                   nGamutPCSposition: cmsUInt32Number;
+                                                   InputFormat,
+                                                   OutputFormat: cmsUInt32Number;
+                                                   dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+PROCEDURE  cmsDeleteTransform(hTransform: cmsHTRANSFORM); StdCall; external LCMS2_SO;
+
+PROCEDURE  cmsDoTransform(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsDoTransformStride(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number; stride: cmsUInt32Number);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsSetAlarmCodes( NewAlarm: LPcmsUInt16NumberArray);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsGetAlarmCodes(NewAlarm: LPcmsUInt16NumberArray); StdCall; external LCMS2_SO;
+
+// Adaptation state for absolute colorimetric intent
+FUNCTION  cmsSetAdaptationState(d: cmsFloat64Number):cmsFloat64Number; StdCall; external LCMS2_SO;
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+FUNCTION  cmsGetTransformContextID(hTransform: cmsHTRANSFORM):cmsContext; StdCall; external LCMS2_SO;
+
+// For backwards compatibility
+FUNCTION  cmsChangeBuffersFormat(hTransform: cmsHTRANSFORM; InputFormat, OutputFormat: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+
+// lcms2 unified method to access postscript color resources
+FUNCTION cmsGetPostScriptColorResource(ContextID: cmsContext;   RType: cmsPSResourceType;
+                                                                hProfile: cmsHPROFILE;
+                                                                Intent: cmsUInt32Number;
+                                                                dwFlags: cmsUInt32Number;
+                                                                io: LPcmsIOHANDLER): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetPostScriptCSA(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number ): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsGetPostScriptCRD(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+// CGATS.13 parser
+
+FUNCTION  cmsIT8Alloc: cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsIT8Free(hIT8: cmsHANDLE); StdCall; external LCMS2_SO;
+
+// Tables
+
+FUNCTION  cmsIT8TableCount(hIT8: cmsHANDLE): Integer; StdCall; external LCMS2_SO;
+FUNCTION  cmsIT8SetTable(hIT8: cmsHANDLE; nTable: Integer): Integer; StdCall; external LCMS2_SO;
+
+// Persistence
+FUNCTION  cmsIT8LoadFromFile(cFileName: PAnsiChar): cmsHANDLE; StdCall; external LCMS2_SO;
+FUNCTION  cmsIT8LoadFromMem(Ptr: Pointer; size :DWord): cmsHANDLE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SaveToFile(hIT8: cmsHANDLE; cFileName: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SaveToMem(hIT8: cmsHANDLE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+// Properties
+
+FUNCTION cmsIT8GetSheetType(hIT8: cmsHANDLE): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetSheetType(hIT8: cmsHANDLE; TheType: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetComment(hIT8: cmsHANDLE; cComment: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetPropertyStr(hIT8: cmsHANDLE; cProp, Str: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyHex(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Integer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyUncooked(hIT8: cmsHANDLE; Key, Buffer: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsIT8GetProperty(hIT8: cmsHANDLE; cProp: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8EnumProperties(hIT8: cmsHANDLE; var PropertyNames: LPPAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+// Datasets
+
+FUNCTION cmsIT8GetDataRowCol(hIT8: cmsHANDLE; row, col: Integer): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer): Double; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataRowCol(hIT8: cmsHANDLE; row, col: Integer; Val: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8GetData(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8GetDataDbl(hIT8: cmsHANDLE;cPatch, cSample: PAnsiChar): Double; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetData(hIT8: cmsHANDLE; cPatch, cSample, Val: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataDbl(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataFormat(hIT8: cmsHANDLE; n: Integer; Sample: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8EnumDataFormat(hIT8: cmsHANDLE; var SampleNames: LPPAnsiChar): Integer; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetPatchName(hIT8: cmsHANDLE; nPatch: Integer; Buffer: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+
+// The LABEL extension
+
+FUNCTION cmsIT8SetTableByLabel(hIT8: cmsHANDLE; cSet, cField, ExpectedType: PAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8FindDataFormat(hIT8: cmsHANDLE; cSample: PAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+// Formatter for double
+PROCEDURE  cmsIT8DefineDblFormat(hIT8: cmsHANDLE; Formatter: PAnsiChar);  StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGBDAlloc(ContextID: cmsContext):cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsGBDFree(hGBD: cmsHANDLE); StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBAddPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBCompute(hGDB: cmsHANDLE; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBCheckPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsDetectBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsDetectDestinationBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsDetectTAC(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsDesaturateLab(Lab: LPcmsCIELab; amax, amin, bmax, bmin: cmsFloat64Number): cmsBool; StdCall; external LCMS2_SO;
+
+END.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg
new file mode 100755
index 0000000000..dffd80b51e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg
@@ -0,0 +1,67 @@
+
+THIS LICENSE APPLIES ONLY TO iccjpeg.c file
+-----
+In plain English:
+
+1. We don't promise that this software works.  (But if you find any bugs,
+   please let us know!)
+2. You can use this software for whatever you want.  You don't have to pay us.
+3. You may not pretend that you wrote this software.  If you use it in a
+   program, you must acknowledge somewhere in your documentation that
+   you've used the IJG code.
+
+In legalese:
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
+
+This software is copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+All Rights Reserved except as specified below.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+software (or portions thereof) for any purpose, without fee, subject to these
+conditions:
+(1) If any part of the source code for this software is distributed, then this
+README file must be included, with this copyright and no-warranty notice
+unaltered; and any additions, deletions, or changes to the original files
+must be clearly indicated in accompanying documentation.
+(2) If only executable code is distributed, then the accompanying
+documentation must state that "this software is based in part on the work of
+the Independent JPEG Group".
+(3) Permission for use of this software is granted only if the user accepts
+full responsibility for any undesirable consequences; the authors accept
+NO LIABILITY for damages of any kind.
+
+These conditions apply to any software derived from or based on the IJG code,
+not just to the unmodified library.  If you use our work, you ought to
+acknowledge us.
+
+Permission is NOT granted for the use of any IJG author's name or company name
+in advertising or publicity relating to this software or products derived from
+it.  This software may be referred to only as "the Independent JPEG Group's
+software".
+
+We specifically permit and encourage the use of this software as the basis of
+commercial products, provided that all warranty or liability claims are
+assumed by the product vendor.
+
+
+The Unix configuration script "configure" was produced with GNU Autoconf.
+It is copyright by the Free Software Foundation but is freely distributable.
+The same holds for its supporting scripts (config.guess, config.sub,
+ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
+but is also freely distributable.
+
+The IJG distribution formerly included code to read and write GIF files.
+To avoid entanglement with the Unisys LZW patent, GIF reading support has
+been removed altogether, and the GIF writer has been simplified to produce
+"uncompressed GIFs".  This technique does not use the LZW algorithm; the
+resulting GIF files are larger than usual, but are readable by all standard
+GIF decoders.
+
+We are required to state that
+    "The Graphics Interchange Format(c) is the Copyright property of
+    CompuServe Incorporated.  GIF(sm) is a Service Mark property of
+    CompuServe Incorporated."
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.am
new file mode 100644
index 0000000000..d0a0897f3d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.am
@@ -0,0 +1,22 @@
+#
+# Makefile for building jpegicc
+# Written by Bob Friesenhahn, June 2003
+# Bugs introduced by Marti Maria on October 2004
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+if HasJPEG
+bin_PROGRAMS = jpgicc
+else
+bin_PROGRAMS = 
+endif
+
+jpgicc_LDADD = $(top_builddir)/src/liblcms2.la @JPEGICC_DEPLIBS@
+jpgicc_LDFLAGS = @LDFLAGS@
+jpgicc_SOURCES = jpgicc.c iccjpeg.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = jpgicc.1
+
+EXTRA_DIST = iccjpeg.h $(man_MANS)
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.in
new file mode 100644
index 0000000000..69032832de
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/Makefile.in
@@ -0,0 +1,739 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building jpegicc
+# Written by Bob Friesenhahn, June 2003
+# Bugs introduced by Marti Maria on October 2004
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HasJPEG_TRUE@bin_PROGRAMS = jpgicc$(EXEEXT)
+subdir = utils/jpgicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_jpgicc_OBJECTS = jpgicc.$(OBJEXT) iccjpeg.$(OBJEXT) \
+	../common/xgetopt.$(OBJEXT) ../common/vprf.$(OBJEXT)
+jpgicc_OBJECTS = $(am_jpgicc_OBJECTS)
+jpgicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+jpgicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(jpgicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(jpgicc_SOURCES)
+DIST_SOURCES = $(jpgicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+jpgicc_LDADD = $(top_builddir)/src/liblcms2.la @JPEGICC_DEPLIBS@
+jpgicc_LDFLAGS = @LDFLAGS@
+jpgicc_SOURCES = jpgicc.c iccjpeg.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = jpgicc.1
+EXTRA_DIST = iccjpeg.h $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/jpgicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/jpgicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+jpgicc$(EXEEXT): $(jpgicc_OBJECTS) $(jpgicc_DEPENDENCIES) $(EXTRA_jpgicc_DEPENDENCIES) 
+	@rm -f jpgicc$(EXEEXT)
+	$(AM_V_CCLD)$(jpgicc_LINK) $(jpgicc_OBJECTS) $(jpgicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man_MANS)
+	@$(NORMAL_INSTALL)
+	@list1=''; \
+	list2='$(man_MANS)'; \
+	test -n "$(man1dir)" \
+	  && test -n "`echo $$list1$$list2`" \
+	  || exit 0; \
+	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
+	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
+	{ for i in $$list1; do echo "$$i"; done;  \
+	if test -n "$$list2"; then \
+	  for i in $$list2; do echo "$$i"; done \
+	    | sed -n '/\.1[a-z]*$$/p'; \
+	fi; \
+	} | while read p; do \
+	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; echo "$$p"; \
+	done | \
+	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+	sed 'N;N;s,\n, ,g' | { \
+	list=; while read file base inst; do \
+	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+	  fi; \
+	done; \
+	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+	while read files; do \
+	  test -z "$$files" || { \
+	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+	done; }
+
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list=''; test -n "$(man1dir)" || exit 0; \
+	files=`{ for i in $$list; do echo "$$i"; done; \
+	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+	  sed -n '/\.1[a-z]*$$/p'; \
+	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-man1 \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-man uninstall-man1
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.c b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.c
new file mode 100644
index 0000000000..d08b4bd081
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.c
@@ -0,0 +1,248 @@
+/*
+ * iccprofile.c
+ *
+ * This file provides code to read and write International Color Consortium
+ * (ICC) device profiles embedded in JFIF JPEG image files.  The ICC has
+ * defined a standard format for including such data in JPEG "APP2" markers.
+ * The code given here does not know anything about the internal structure
+ * of the ICC profile data; it just knows how to put the profile data into
+ * a JPEG file being written, or get it back out when reading.
+ *
+ * This code depends on new features added to the IJG JPEG library as of
+ * IJG release 6b; it will not compile or work with older IJG versions.
+ *
+ * NOTE: this code would need surgery to work on 16-bit-int machines
+ * with ICC profiles exceeding 64K bytes in size.  If you need to do that,
+ * change all the "unsigned int" variables to "INT32".  You'll also need
+ * to find a malloc() replacement that can allocate more than 64K.
+ */
+
+#include "iccjpeg.h"
+#include <stdlib.h>			/* define malloc() */
+
+
+/*
+ * Since an ICC profile can be larger than the maximum size of a JPEG marker
+ * (64K), we need provisions to split it into multiple markers.  The format
+ * defined by the ICC specifies one or more APP2 markers containing the
+ * following data:
+ *	Identifying string	ASCII "ICC_PROFILE\0"  (12 bytes)
+ *	Marker sequence number	1 for first APP2, 2 for next, etc (1 byte)
+ *	Number of markers	Total number of APP2's used (1 byte)
+ *      Profile data		(remainder of APP2 data)
+ * Decoders should use the marker sequence numbers to reassemble the profile,
+ * rather than assuming that the APP2 markers appear in the correct sequence.
+ */
+
+#define ICC_MARKER  (JPEG_APP0 + 2)	/* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14		/* size of non-profile data in APP2 */
+#define MAX_BYTES_IN_MARKER  65533	/* maximum data len of a JPEG marker */
+#define MAX_DATA_BYTES_IN_MARKER  (MAX_BYTES_IN_MARKER - ICC_OVERHEAD_LEN)
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.
+ * It *must* be called AFTER calling jpeg_start_compress() and BEFORE
+ * the first call to jpeg_write_scanlines().
+ * (This ordering ensures that the APP2 marker(s) will appear after the
+ * SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+void
+write_icc_profile (j_compress_ptr cinfo,
+		   const JOCTET *icc_data_ptr,
+		   unsigned int icc_data_len)
+{
+  unsigned int num_markers;	/* total number of markers we'll write */
+  int cur_marker = 1;		/* per spec, counting starts at 1 */
+  unsigned int length;		/* number of bytes to write in this marker */
+
+  /* Calculate the number of markers we'll need, rounding up of course */
+  num_markers = icc_data_len / MAX_DATA_BYTES_IN_MARKER;
+  if (num_markers * MAX_DATA_BYTES_IN_MARKER != icc_data_len)
+    num_markers++;
+
+  while (icc_data_len > 0) {
+    /* length of profile to put in this marker */
+    length = icc_data_len;
+    if (length > MAX_DATA_BYTES_IN_MARKER)
+      length = MAX_DATA_BYTES_IN_MARKER;
+    icc_data_len -= length;
+
+    /* Write the JPEG marker header (APP2 code and marker length) */
+    jpeg_write_m_header(cinfo, ICC_MARKER,
+			(unsigned int) (length + ICC_OVERHEAD_LEN));
+
+    /* Write the marker identifying string "ICC_PROFILE" (null-terminated).
+     * We code it in this less-than-transparent way so that the code works
+     * even if the local character set is not ASCII.
+     */
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x5F);
+    jpeg_write_m_byte(cinfo, 0x50);
+    jpeg_write_m_byte(cinfo, 0x52);
+    jpeg_write_m_byte(cinfo, 0x4F);
+    jpeg_write_m_byte(cinfo, 0x46);
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x4C);
+    jpeg_write_m_byte(cinfo, 0x45);
+    jpeg_write_m_byte(cinfo, 0x0);
+
+    /* Add the sequencing info */
+    jpeg_write_m_byte(cinfo, cur_marker);
+    jpeg_write_m_byte(cinfo, (int) num_markers);
+
+    /* Add the profile data */
+    while (length--) {
+      jpeg_write_m_byte(cinfo, *icc_data_ptr);
+      icc_data_ptr++;
+    }
+    cur_marker++;
+  }
+}
+
+
+/*
+ * Prepare for reading an ICC profile
+ */
+
+void
+setup_read_icc_profile (j_decompress_ptr cinfo)
+{
+  /* Tell the library to keep any APP2 data it may find */
+  jpeg_save_markers(cinfo, ICC_MARKER, 0xFFFF);
+}
+
+
+/*
+ * Handy subroutine to test whether a saved marker is an ICC profile marker.
+ */
+
+static boolean
+marker_is_icc (jpeg_saved_marker_ptr marker)
+{
+  return
+    marker->marker == ICC_MARKER &&
+    marker->data_length >= ICC_OVERHEAD_LEN &&
+    /* verify the identifying string */
+    GETJOCTET(marker->data[0]) == 0x49 &&
+    GETJOCTET(marker->data[1]) == 0x43 &&
+    GETJOCTET(marker->data[2]) == 0x43 &&
+    GETJOCTET(marker->data[3]) == 0x5F &&
+    GETJOCTET(marker->data[4]) == 0x50 &&
+    GETJOCTET(marker->data[5]) == 0x52 &&
+    GETJOCTET(marker->data[6]) == 0x4F &&
+    GETJOCTET(marker->data[7]) == 0x46 &&
+    GETJOCTET(marker->data[8]) == 0x49 &&
+    GETJOCTET(marker->data[9]) == 0x4C &&
+    GETJOCTET(marker->data[10]) == 0x45 &&
+    GETJOCTET(marker->data[11]) == 0x0;
+}
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read;
+ * if so, reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.
+ * If TRUE is returned, *icc_data_ptr is set to point to the
+ * returned data, and *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at **icc_data_ptr has been allocated with malloc()
+ * and must be freed by the caller with free() when the caller no longer
+ * needs it.  (Alternatively, we could write this routine to use the
+ * IJG library's memory allocator, so that the data would be freed implicitly
+ * at jpeg_finish_decompress() time.  But it seems likely that many apps
+ * will prefer to have the data stick around after decompression finishes.)
+ *
+ * NOTE: if the file contains invalid ICC APP2 markers, we just silently
+ * return FALSE.  You might want to issue an error message instead.
+ */
+
+boolean
+read_icc_profile (j_decompress_ptr cinfo,
+		  JOCTET **icc_data_ptr,
+		  unsigned int *icc_data_len)
+{
+  jpeg_saved_marker_ptr marker;
+  int num_markers = 0;
+  int seq_no;
+  JOCTET *icc_data;
+  unsigned int total_length;
+#define MAX_SEQ_NO  255		/* sufficient since marker numbers are bytes */
+  char marker_present[MAX_SEQ_NO+1];	  /* 1 if marker found */
+  unsigned int data_length[MAX_SEQ_NO+1]; /* size of profile data in marker */
+  unsigned int data_offset[MAX_SEQ_NO+1]; /* offset for data in marker */
+
+  *icc_data_ptr = NULL;		/* avoid confusion if FALSE return */
+  *icc_data_len = 0;
+
+  /* This first pass over the saved markers discovers whether there are
+   * any ICC markers and verifies the consistency of the marker numbering.
+   */
+
+  for (seq_no = 1; seq_no <= MAX_SEQ_NO; seq_no++)
+    marker_present[seq_no] = 0;
+
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      if (num_markers == 0)
+	num_markers = GETJOCTET(marker->data[13]);
+      else if (num_markers != GETJOCTET(marker->data[13]))
+	return FALSE;		/* inconsistent num_markers fields */
+      seq_no = GETJOCTET(marker->data[12]);
+      if (seq_no <= 0 || seq_no > num_markers)
+	return FALSE;		/* bogus sequence number */
+      if (marker_present[seq_no])
+	return FALSE;		/* duplicate sequence numbers */
+      marker_present[seq_no] = 1;
+      data_length[seq_no] = marker->data_length - ICC_OVERHEAD_LEN;
+    }
+  }
+
+  if (num_markers == 0)
+    return FALSE;
+
+  /* Check for missing markers, count total space needed,
+   * compute offset of each marker's part of the data.
+   */
+
+  total_length = 0;
+  for (seq_no = 1; seq_no <= num_markers; seq_no++) {
+    if (marker_present[seq_no] == 0)
+      return FALSE;		/* missing sequence number */
+    data_offset[seq_no] = total_length;
+    total_length += data_length[seq_no];
+  }
+
+  if (total_length == 0)
+    return FALSE;		/* found only empty markers? */
+
+  /* Allocate space for assembled data */
+  icc_data = (JOCTET *) malloc(total_length * sizeof(JOCTET));
+  if (icc_data == NULL)
+    return FALSE;		/* oops, out of memory */
+
+  /* and fill it in */
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      JOCTET FAR *src_ptr;
+      JOCTET *dst_ptr;
+      unsigned int length;
+      seq_no = GETJOCTET(marker->data[12]);
+      dst_ptr = icc_data + data_offset[seq_no];
+      src_ptr = marker->data + ICC_OVERHEAD_LEN;
+      length = data_length[seq_no];
+      while (length--) {
+	*dst_ptr++ = *src_ptr++;
+      }
+    }
+  }
+
+  *icc_data_ptr = icc_data;
+  *icc_data_len = total_length;
+
+  return TRUE;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.h b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.h
new file mode 100644
index 0000000000..5e1888d9ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/iccjpeg.h
@@ -0,0 +1,73 @@
+/*
+ * iccprofile.h
+ *
+ * This file provides code to read and write International Color Consortium
+ * (ICC) device profiles embedded in JFIF JPEG image files.  The ICC has
+ * defined a standard format for including such data in JPEG "APP2" markers.
+ * The code given here does not know anything about the internal structure
+ * of the ICC profile data; it just knows how to put the profile data into
+ * a JPEG file being written, or get it back out when reading.
+ *
+ * This code depends on new features added to the IJG JPEG library as of
+ * IJG release 6b; it will not compile or work with older IJG versions.
+ *
+ * NOTE: this code would need surgery to work on 16-bit-int machines
+ * with ICC profiles exceeding 64K bytes in size.  See iccprofile.c
+ * for details.
+ */
+
+#include <stdio.h>		/* needed to define "FILE", "NULL" */
+#include "jpeglib.h"
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.
+ * It *must* be called AFTER calling jpeg_start_compress() and BEFORE
+ * the first call to jpeg_write_scanlines().
+ * (This ordering ensures that the APP2 marker(s) will appear after the
+ * SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+extern void write_icc_profile JPP((j_compress_ptr cinfo,
+				   const JOCTET *icc_data_ptr,
+				   unsigned int icc_data_len));
+
+
+/*
+ * Reading a JPEG file that may contain an ICC profile requires two steps:
+ *
+ * 1. After jpeg_create_decompress() but before jpeg_read_header(),
+ *    call setup_read_icc_profile().  This routine tells the IJG library
+ *    to save in memory any APP2 markers it may find in the file.
+ *
+ * 2. After jpeg_read_header(), call read_icc_profile() to find out
+ *    whether there was a profile and obtain it if so.
+ */
+
+
+/*
+ * Prepare for reading an ICC profile
+ */
+
+extern void setup_read_icc_profile JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read;
+ * if so, reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.
+ * If TRUE is returned, *icc_data_ptr is set to point to the
+ * returned data, and *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at **icc_data_ptr has been allocated with malloc()
+ * and must be freed by the caller with free() when the caller no longer
+ * needs it.  (Alternatively, we could write this routine to use the
+ * IJG library's memory allocator, so that the data would be freed implicitly
+ * at jpeg_finish_decompress() time.  But it seems likely that many apps
+ * will prefer to have the data stick around after decompression finishes.)
+ */
+
+extern boolean read_icc_profile JPP((j_decompress_ptr cinfo,
+				     JOCTET **icc_data_ptr,
+				     unsigned int *icc_data_len));
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.1
new file mode 100644
index 0000000000..44795a38eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.1
@@ -0,0 +1,122 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH JPGICC 1 "September 30, 2004"
+.SH NAME
+jpgicc - little cms ICC profile applier for JPEG.
+.SH SYNOPSIS
+.B jpgicc
+.RI [ options ] " input.jpg output.jpg"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B jpgicc
+is a little cms ICC profile applier for JPEG.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Embed destination profile.
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Examples, 2=Built-in profiles, 3=Contact information)
+.TP
+.BI \-i\  profile
+Input profile (defaults to sRGB).
+.TP
+.BI \-l\  link
+TODO: explain this option.
+.TP
+.BI \-m\  NUM
+SoftProof intent (0,1,2,3) [defaults to 0].
+.TP
+.B \-n
+Ignore embedded profile.
+.TP
+.BI \-o\  profile
+Output profile (defaults to sRGB).
+.TP
+.BI \-p\  profile
+Soft proof profile.
+.TP
+.BI \-q\  NUM
+Output JPEG quality, (0..100) [defaults to 75].
+.TP
+.BI \-s\  newprofile
+Save embedded profile as \fInewprofile\fR.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.B \-v
+Verbose.
+.TP
+.BI \-!\  NUM,NUM,NUM
+Out-of-gamut marker channel values (r,g,b) [defaults: 128,128,128].
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To color correct from scanner to sRGB:
+	jpgicc -iscanner.icm in.jpg out.jpg
+
+To convert from monitor1 to monitor2:
+	jpgicc -imon1.icm -omon2.icm in.jpg out.jpg
+
+To make a CMYK separation:
+	jpgicc -oprinter.icm inrgb.jpg outcmyk.jpg
+
+To recover sRGB from a CMYK separation:
+	jpgicc -iprinter.icm incmyk.jpg outrgb.jpg
+
+To convert from CIELab ITU/Fax JPEG to sRGB
+	jpgicc -iitufax.icm in.jpg out.jpg
+
+To convert from CIELab ITU/Fax JPEG to sRGB
+	jpgicc in.jpg out.jpg
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR linkicc (1),
+.BR psicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.c
new file mode 100644
index 0000000000..ea9e655ca7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/jpgicc/jpgicc.c
@@ -0,0 +1,1261 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+// This program does apply profiles to (some) JPEG files
+
+
+#include "utils.h"
+
+#include "jpeglib.h"
+#include "iccjpeg.h"
+
+// Flags
+static cmsBool BlackPointCompensation = FALSE;
+static cmsBool IgnoreEmbedded         = FALSE;
+static cmsBool GamutCheck             = FALSE;
+static cmsBool lIsITUFax              = FALSE;
+static cmsBool lIsPhotoshopApp13      = FALSE;
+static cmsBool lIsEXIF;
+static cmsBool lIsDeviceLink          = FALSE;
+static cmsBool EmbedProfile           = FALSE;
+
+static const char* SaveEmbedded = NULL;
+
+static int Intent                  = INTENT_PERCEPTUAL;
+static int ProofingIntent          = INTENT_PERCEPTUAL;
+static int PrecalcMode             = 1;
+
+static int jpegQuality             = 75;
+
+static cmsFloat64Number ObserverAdaptationState = 0;
+
+
+static char *cInpProf  = NULL;
+static char *cOutProf  = NULL;
+static char *cProofing = NULL;
+
+static FILE * InFile;
+static FILE * OutFile;
+
+static struct jpeg_decompress_struct Decompressor;
+static struct jpeg_compress_struct   Compressor;
+
+
+static struct my_error_mgr {
+
+    struct  jpeg_error_mgr pub;   // "public" fields
+    void*   Cargo;                // "private" fields
+
+} ErrorHandler;
+
+
+cmsUInt16Number Alarm[4] = {128,128,128,0};
+
+
+static
+void my_error_exit (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  (*cinfo->err->format_message) (cinfo, buffer);
+  FatalError(buffer);
+}
+
+/*
+Definition of the APPn Markers Defined for continuous-tone G3FAX
+
+The application code APP1 initiates identification of the image as
+a G3FAX application and defines the spatial resolution and subsampling.
+This marker directly follows the SOI marker. The data format will be as follows:
+
+X'FFE1' (APP1), length, FAX identifier, version, spatial resolution.
+
+The above terms are defined as follows:
+
+Length: (Two octets) Total APP1 field octet count including the octet count itself, but excluding the APP1
+marker.
+
+FAX identifier: (Six octets) X'47', X'33', X'46', X'41', X'58', X'00'. This X'00'-terminated string "G3FAX"
+uniquely identifies this APP1 marker.
+
+Version: (Two octets) X'07CA'. This string specifies the year of approval of the standard, for identification
+in the case of future revision (for example, 1994).
+
+Spatial Resolution: (Two octets) Lightness pixel density in pels/25.4 mm. The basic value is 200. Allowed values are
+100, 200, 300, 400, 600 and 1200 pels/25.4 mm, with square (or equivalent) pels.
+
+NOTE - The functional equivalence of inch-based and mm-based resolutions is maintained. For example, the 200 x 200
+*/
+
+static
+cmsBool IsITUFax(jpeg_saved_marker_ptr ptr)
+{
+    while (ptr)
+    {
+        if (ptr -> marker == (JPEG_APP0 + 1) && ptr -> data_length > 5) {
+
+            const char* data = (const char*) ptr -> data;
+
+            if (strcmp(data, "G3FAX") == 0) return TRUE;
+        }
+
+        ptr = ptr -> next;
+    }
+
+    return FALSE;
+}
+
+// Save a ITU T.42/Fax marker with defaults on boundaries. This is the only mode we support right now.
+static
+void SetITUFax(j_compress_ptr cinfo)
+{
+    unsigned char Marker[] = "G3FAX\x00\0x07\xCA\x00\xC8";
+
+    jpeg_write_marker(cinfo, (JPEG_APP0 + 1), Marker, 10);
+}
+
+
+// Build a profile for decoding ITU T.42/Fax JPEG streams.
+// The profile has an additional ability in the input direction of
+// gamut compress values between 85 < a < -85 and -75 < b < 125. This conforms
+// the default range for ITU/T.42 -- See RFC 2301, section 6.2.3 for details
+
+//  L*  =   [0, 100]
+//  a*  =   [-85, 85]
+//  b*  =   [-75, 125]
+
+
+// These functions does convert the encoding of ITUFAX to floating point
+// and vice-versa. No gamut mapping is performed yet.
+
+static
+void ITU2Lab(const cmsUInt16Number In[3], cmsCIELab* Lab)
+{
+    Lab -> L = (double) In[0] / 655.35;
+    Lab -> a = (double) 170.* (In[1] - 32768.) / 65535.;
+    Lab -> b = (double) 200.* (In[2] - 24576.) / 65535.;
+}
+
+static
+void Lab2ITU(const cmsCIELab* Lab, cmsUInt16Number Out[3])
+{
+    Out[0] = (cmsUInt16Number) floor((double) (Lab -> L / 100.)* 65535. );
+    Out[1] = (cmsUInt16Number) floor((double) (Lab -> a / 170.)* 65535. + 32768. );
+    Out[2] = (cmsUInt16Number) floor((double) (Lab -> b / 200.)* 65535. + 24576. );
+}
+
+// These are the samplers-- They are passed as callbacks to cmsStageSampleCLut16bit()
+// then, cmsSample3DGrid() will sweel whole Lab gamut calling these functions
+// once for each node. In[] will contain the Lab PCS value to convert to ITUFAX
+// on PCS2ITU, or the ITUFAX value to convert to Lab in ITU2PCS
+// You can change the number of sample points if desired, the algorithm will
+// remain same. 33 points gives good accuracy, but you can reduce to 22 or less
+// is space is critical
+
+#define GRID_POINTS 33
+
+static
+int PCS2ITU(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void*  Cargo)
+{
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);
+    cmsDesaturateLab(&Lab, 85, -85, 125, -75);    // This function does the necessary gamut remapping
+    Lab2ITU(&Lab, Out);
+    return TRUE;
+
+    UTILS_UNUSED_PARAMETER(Cargo);
+}
+
+
+static
+int ITU2PCS( register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void*  Cargo)
+{
+    cmsCIELab Lab;
+
+    ITU2Lab(In, &Lab);
+    cmsFloat2LabEncoded(Out, &Lab);
+    return TRUE;
+
+    UTILS_UNUSED_PARAMETER(Cargo);
+}
+
+// This function does create the virtual input profile, which decodes ITU to the profile connection space
+static
+cmsHPROFILE CreateITU2PCS_ICC(void)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* AToB0;
+    cmsStage* ColorMap;
+
+    AToB0 = cmsPipelineAlloc(0, 3, 3);
+    if (AToB0 == NULL) return NULL;
+
+    ColorMap = cmsStageAllocCLut16bit(0, GRID_POINTS, 3, 3, NULL);
+    if (ColorMap == NULL) return NULL;
+
+    cmsPipelineInsertStage(AToB0, cmsAT_BEGIN, ColorMap);
+    cmsStageSampleCLut16bit(ColorMap, ITU2PCS, NULL, 0);
+
+    hProfile = cmsCreateProfilePlaceholder(0);
+    if (hProfile == NULL) {
+        cmsPipelineFree(AToB0);
+        return NULL;
+    }
+
+    cmsWriteTag(hProfile, cmsSigAToB0Tag, AToB0);
+    cmsSetColorSpace(hProfile, cmsSigLabData);
+    cmsSetPCS(hProfile, cmsSigLabData);
+    cmsSetDeviceClass(hProfile, cmsSigColorSpaceClass);
+    cmsPipelineFree(AToB0);
+
+    return hProfile;
+}
+
+
+// This function does create the virtual output profile, with the necessary gamut mapping
+static
+cmsHPROFILE CreatePCS2ITU_ICC(void)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* BToA0;
+    cmsStage* ColorMap;
+
+    BToA0 = cmsPipelineAlloc(0, 3, 3);
+    if (BToA0 == NULL) return NULL;
+
+    ColorMap = cmsStageAllocCLut16bit(0, GRID_POINTS, 3, 3, NULL);
+    if (ColorMap == NULL) return NULL;
+
+    cmsPipelineInsertStage(BToA0, cmsAT_BEGIN, ColorMap);
+    cmsStageSampleCLut16bit(ColorMap, PCS2ITU, NULL, 0);
+
+    hProfile = cmsCreateProfilePlaceholder(0);
+    if (hProfile == NULL) {
+        cmsPipelineFree(BToA0);
+        return NULL;
+    }
+
+    cmsWriteTag(hProfile, cmsSigBToA0Tag, BToA0);
+    cmsSetColorSpace(hProfile, cmsSigLabData);
+    cmsSetPCS(hProfile, cmsSigLabData);
+    cmsSetDeviceClass(hProfile, cmsSigColorSpaceClass);
+
+    cmsPipelineFree(BToA0);
+
+    return hProfile;
+}
+
+
+
+#define PS_FIXED_TO_FLOAT(h, l) ((float) (h) + ((float) (l)/(1<<16)))
+
+static
+cmsBool ProcessPhotoshopAPP13(JOCTET FAR *data, int datalen)
+{
+    int i;
+
+    for (i = 14; i < datalen; )
+    {
+        long len;
+        unsigned int type;
+
+        if (!(GETJOCTET(data[i]  ) == 0x38 &&
+              GETJOCTET(data[i+1]) == 0x42 &&
+              GETJOCTET(data[i+2]) == 0x49 &&
+              GETJOCTET(data[i+3]) == 0x4D)) break; // Not recognized
+
+        i += 4; // identifying string
+
+        type = (unsigned int) (GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]));
+
+        i += 2; // resource type
+
+        i += GETJOCTET(data[i]) + ((GETJOCTET(data[i]) & 1) ? 1 : 2);   // resource name
+
+        len = ((((GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]))<<8) +
+                         GETJOCTET(data[i+2]))<<8) + GETJOCTET(data[i+3]);
+
+        i += 4; // Size
+
+        if (type == 0x03ED && len >= 16) {
+
+            Decompressor.X_density = (UINT16) PS_FIXED_TO_FLOAT(GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]),
+                                                 GETJOCTET(data[i+2]<<8) + GETJOCTET(data[i+3]));
+            Decompressor.Y_density = (UINT16) PS_FIXED_TO_FLOAT(GETJOCTET(data[i+8]<<8) + GETJOCTET(data[i+9]),
+                                                 GETJOCTET(data[i+10]<<8) + GETJOCTET(data[i+11]));
+
+            // Set the density unit to 1 since the
+            // Vertical and Horizontal resolutions
+            // are specified in Pixels per inch
+
+            Decompressor.density_unit = 0x01;
+            return TRUE;
+
+        }
+
+        i += len + ((len & 1) ? 1 : 0);   // Alignment
+    }
+    return FALSE;
+}
+
+
+static
+cmsBool HandlePhotoshopAPP13(jpeg_saved_marker_ptr ptr)
+{
+    while (ptr) {
+
+        if (ptr -> marker == (JPEG_APP0 + 13) && ptr -> data_length > 9)
+        {
+            JOCTET FAR* data = ptr -> data;
+
+            if(GETJOCTET(data[0]) == 0x50 &&
+               GETJOCTET(data[1]) == 0x68 &&
+               GETJOCTET(data[2]) == 0x6F &&
+               GETJOCTET(data[3]) == 0x74 &&
+               GETJOCTET(data[4]) == 0x6F &&
+               GETJOCTET(data[5]) == 0x73 &&
+               GETJOCTET(data[6]) == 0x68 &&
+               GETJOCTET(data[7]) == 0x6F &&
+               GETJOCTET(data[8]) == 0x70) {
+
+                ProcessPhotoshopAPP13(data, ptr -> data_length);
+                return TRUE;
+            }
+        }
+
+        ptr = ptr -> next;
+    }
+
+    return FALSE;
+}
+
+
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+
+#define INTEL_BYTE_ORDER 0x4949
+#define XRESOLUTION 0x011a
+#define YRESOLUTION 0x011b
+#define RESOLUTION_UNIT 0x128
+
+// Read a 16-bit word
+static
+uint16_t read16(uint8_t* arr, int pos,  int swapBytes)
+{
+    uint8_t b1 = arr[pos];
+    uint8_t b2 = arr[pos+1];
+
+    return (swapBytes) ?  ((b2 << 8) | b1) : ((b1 << 8) | b2);
+}
+
+
+// Read a 32-bit word
+static
+uint32_t read32(uint8_t* arr, int pos,  int swapBytes)
+{
+
+    if(!swapBytes) {
+
+        return (arr[pos]   << 24) |
+               (arr[pos+1] << 16) |
+               (arr[pos+2] << 8) |
+                arr[pos+3];
+    }
+
+    return arr[pos] |
+           (arr[pos+1] << 8) |
+           (arr[pos+2] << 16) |
+           (arr[pos+3] << 24);
+}
+
+
+
+static
+int read_tag(uint8_t* arr, int pos,  int swapBytes, void* dest)
+{
+        // Format should be 5 over here (rational)
+    uint32_t format = read16(arr, pos + 2, swapBytes);
+    // Components should be 1
+    uint32_t components = read32(arr, pos + 4, swapBytes);
+    // Points to the value
+    uint32_t offset;
+
+    // sanity
+    if (components != 1) return 0;
+
+    if (format == 3)
+        offset = pos + 8;
+    else
+        offset =  read32(arr, pos + 8, swapBytes);
+
+    switch (format) {
+
+    case 5: // Rational
+          {
+          double num = read32(arr, offset, swapBytes);
+          double den = read32(arr, offset + 4, swapBytes);
+          *(double *) dest = num / den;
+          }
+          break;
+
+    case 3: // uint 16
+        *(int*) dest = read16(arr, offset, swapBytes);
+        break;
+
+    default:  return 0;
+    }
+
+    return 1;
+}
+
+
+
+// Handler for EXIF data
+static
+    cmsBool HandleEXIF(struct jpeg_decompress_struct* cinfo)
+{
+    jpeg_saved_marker_ptr ptr;
+    uint32_t ifd_ofs;
+    int pos = 0, swapBytes = 0;
+    uint32_t i, numEntries;
+    double XRes = -1, YRes = -1;
+    int Unit = 2; // Inches
+
+
+    for (ptr = cinfo ->marker_list; ptr; ptr = ptr ->next) {
+
+        if ((ptr ->marker == JPEG_APP0+1) && ptr ->data_length > 6) {
+            JOCTET FAR* data = ptr -> data;
+
+            if (memcmp(data, "Exif\0\0", 6) == 0) {
+
+                data += 6; // Skip EXIF marker
+
+                // 8 byte TIFF header
+                // first two determine byte order
+                pos = 0;
+                if (read16(data, pos, 0) == INTEL_BYTE_ORDER) {
+                    swapBytes = 1;
+                }
+
+                pos += 2;
+
+                // next two bytes are always 0x002A (TIFF version)
+                pos += 2;
+
+                // offset to Image File Directory (includes the previous 8 bytes)
+                ifd_ofs = read32(data, pos, swapBytes);
+
+                // Search the directory for resolution tags
+                numEntries = read16(data, ifd_ofs, swapBytes);
+
+                for (i=0; i < numEntries; i++) {
+
+                    uint32_t entryOffset = ifd_ofs + 2 + (12 * i);
+                    uint32_t tag = read16(data, entryOffset, swapBytes);
+
+                    switch (tag) {
+
+                    case RESOLUTION_UNIT:
+                        if (!read_tag(data, entryOffset, swapBytes, &Unit)) return FALSE;
+                        break;
+
+                    case XRESOLUTION:
+                        if (!read_tag(data, entryOffset, swapBytes, &XRes)) return FALSE;
+                        break;
+
+                    case YRESOLUTION:
+                        if (!read_tag(data, entryOffset, swapBytes, &YRes)) return FALSE;
+                        break;
+
+                    default:;
+                    }
+
+                }
+
+                // Proceed if all found
+
+                if (XRes != -1 && YRes != -1)
+                {
+
+                    // 1 = None
+                    // 2 = inches
+                    // 3 = cm
+
+                    switch (Unit) {
+
+                    case 2:
+
+                        cinfo ->X_density = (UINT16) floor(XRes + 0.5);
+                        cinfo ->Y_density = (UINT16) floor(YRes + 0.5);
+                        break;
+
+                    case 1:
+
+                        cinfo ->X_density = (UINT16) floor(XRes * 2.54 + 0.5);
+                        cinfo ->Y_density = (UINT16) floor(YRes * 2.54 + 0.5);
+                        break;
+
+                    default: return FALSE;
+                    }
+
+                    cinfo ->density_unit = 1;  /* 1 for dots/inch, or 2 for dots/cm.*/
+
+                }
+
+
+            }
+        }
+    }
+    return FALSE;
+}
+
+
+static
+cmsBool OpenInput(const char* FileName)
+{
+    int m;
+
+    lIsITUFax = FALSE;
+    InFile  = fopen(FileName, "rb");
+    if (InFile == NULL) {
+        FatalError("Cannot open '%s'", FileName);
+    }
+
+    // Now we can initialize the JPEG decompression object.
+    Decompressor.err                 = jpeg_std_error(&ErrorHandler.pub);
+    ErrorHandler.pub.error_exit      = my_error_exit;
+    ErrorHandler.pub.output_message  = my_error_exit;
+
+    jpeg_create_decompress(&Decompressor);
+    jpeg_stdio_src(&Decompressor, InFile);
+
+    for (m = 0; m < 16; m++)
+        jpeg_save_markers(&Decompressor, JPEG_APP0 + m, 0xFFFF);
+
+    // setup_read_icc_profile(&Decompressor);
+
+    fseek(InFile, 0, SEEK_SET);
+    jpeg_read_header(&Decompressor, TRUE);
+
+    return TRUE;
+}
+
+
+static
+cmsBool OpenOutput(const char* FileName)
+{
+
+    OutFile = fopen(FileName, "wb");
+    if (OutFile == NULL) {
+        FatalError("Cannot create '%s'", FileName);
+
+    }
+
+    Compressor.err                   = jpeg_std_error(&ErrorHandler.pub);
+    ErrorHandler.pub.error_exit      = my_error_exit;
+    ErrorHandler.pub.output_message  = my_error_exit;
+
+    Compressor.input_components = Compressor.num_components = 4;
+
+    jpeg_create_compress(&Compressor);
+    jpeg_stdio_dest(&Compressor, OutFile);
+    return TRUE;
+}
+
+static
+cmsBool Done(void)
+{
+    jpeg_destroy_decompress(&Decompressor);
+    jpeg_destroy_compress(&Compressor);
+    return fclose(InFile) + fclose(OutFile);
+
+}
+
+
+// Build up the pixeltype descriptor
+
+static
+cmsUInt32Number GetInputPixelType(void)
+{
+     int space, bps, extra, ColorChannels, Flavor;
+
+     lIsITUFax         = IsITUFax(Decompressor.marker_list);
+     lIsPhotoshopApp13 = HandlePhotoshopAPP13(Decompressor.marker_list);
+     lIsEXIF           = HandleEXIF(&Decompressor);
+
+     ColorChannels = Decompressor.num_components;
+     extra  = 0;            // Alpha = None
+     bps    = 1;            // 8 bits
+     Flavor = 0;            // Vanilla
+
+     if (lIsITUFax) {
+
+        space = PT_Lab;
+        Decompressor.out_color_space = JCS_YCbCr;  // Fake to don't touch
+     }
+     else
+     switch (Decompressor.jpeg_color_space) {
+
+     case JCS_GRAYSCALE:        // monochrome
+              space = PT_GRAY;
+              Decompressor.out_color_space = JCS_GRAYSCALE;
+              break;
+
+     case JCS_RGB:             // red/green/blue
+              space = PT_RGB;
+              Decompressor.out_color_space = JCS_RGB;
+              break;
+
+     case JCS_YCbCr:               // Y/Cb/Cr (also known as YUV)
+              space = PT_RGB;      // Let IJG code to do the conversion
+              Decompressor.out_color_space = JCS_RGB;
+              break;
+
+     case JCS_CMYK:            // C/M/Y/K
+              space = PT_CMYK;
+              Decompressor.out_color_space = JCS_CMYK;
+              if (Decompressor.saw_Adobe_marker)            // Adobe keeps CMYK inverted, so change flavor
+                                Flavor = 1;                 // from vanilla to chocolate
+              break;
+
+     case JCS_YCCK:            // Y/Cb/Cr/K
+              space = PT_CMYK;
+              Decompressor.out_color_space = JCS_CMYK;
+              if (Decompressor.saw_Adobe_marker)            // ditto
+                                Flavor = 1;
+              break;
+
+     default:
+              FatalError("Unsupported color space (0x%x)", Decompressor.jpeg_color_space);
+              return 0;
+     }
+
+     return (EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|COLORSPACE_SH(space)|FLAVOR_SH(Flavor));
+}
+
+
+// Rearrange pixel type to build output descriptor
+static
+cmsUInt32Number ComputeOutputFormatDescriptor(cmsUInt32Number dwInput, int OutColorSpace)
+{
+    int IsPlanar  = T_PLANAR(dwInput);
+    int Channels  = 0;
+    int Flavor    = 0;
+
+    switch (OutColorSpace) {
+
+   case PT_GRAY:
+       Channels = 1;
+       break;
+   case PT_RGB:
+   case PT_CMY:
+   case PT_Lab:
+   case PT_YUV:
+   case PT_YCbCr:
+       Channels = 3;
+       break;
+
+   case PT_CMYK:
+       if (Compressor.write_Adobe_marker)   // Adobe keeps CMYK inverted, so change flavor to chocolate
+           Flavor = 1;
+       Channels = 4;
+       break;
+   default:
+       FatalError("Unsupported output color space");
+    }
+
+    return (COLORSPACE_SH(OutColorSpace)|PLANAR_SH(IsPlanar)|CHANNELS_SH(Channels)|BYTES_SH(1)|FLAVOR_SH(Flavor));
+}
+
+
+// Equivalence between ICC color spaces and lcms color spaces
+static
+int GetProfileColorSpace(cmsHPROFILE hProfile)
+{
+    cmsColorSpaceSignature ProfileSpace = cmsGetColorSpace(hProfile);
+
+    return _cmsLCMScolorSpace(ProfileSpace);
+}
+
+static
+int GetDevicelinkColorSpace(cmsHPROFILE hProfile)
+{
+    cmsColorSpaceSignature ProfileSpace = cmsGetPCS(hProfile);
+
+    return _cmsLCMScolorSpace(ProfileSpace);
+}
+
+
+// From TRANSUPP
+
+static
+void jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
+{
+  jpeg_saved_marker_ptr marker;
+
+  /* In the current implementation, we don't actually need to examine the
+   * option flag here; we just copy everything that got saved.
+   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
+   * if the encoder library already wrote one.
+   */
+  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
+
+    if (dstinfo->write_JFIF_header &&
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x4A &&
+        GETJOCTET(marker->data[1]) == 0x46 &&
+        GETJOCTET(marker->data[2]) == 0x49 &&
+        GETJOCTET(marker->data[3]) == 0x46 &&
+        GETJOCTET(marker->data[4]) == 0)
+                          continue;         /* reject duplicate JFIF */
+
+    if (dstinfo->write_Adobe_marker &&
+        marker->marker == JPEG_APP0+14 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x41 &&
+        GETJOCTET(marker->data[1]) == 0x64 &&
+        GETJOCTET(marker->data[2]) == 0x6F &&
+        GETJOCTET(marker->data[3]) == 0x62 &&
+        GETJOCTET(marker->data[4]) == 0x65)
+                         continue;         /* reject duplicate Adobe */
+
+     jpeg_write_marker(dstinfo, marker->marker,
+                       marker->data, marker->data_length);
+  }
+}
+
+static
+void WriteOutputFields(int OutputColorSpace)
+{
+    J_COLOR_SPACE in_space, jpeg_space;
+    int components;
+
+    switch (OutputColorSpace) {
+
+    case PT_GRAY: in_space = jpeg_space = JCS_GRAYSCALE;
+                  components = 1;
+                  break;
+
+    case PT_RGB:  in_space = JCS_RGB;
+                  jpeg_space = JCS_YCbCr;
+                  components = 3;
+                  break;       // red/green/blue
+
+    case PT_YCbCr: in_space = jpeg_space = JCS_YCbCr;
+                   components = 3;
+                   break;               // Y/Cb/Cr (also known as YUV)
+
+    case PT_CMYK: in_space = JCS_CMYK;
+                  jpeg_space = JCS_YCCK;
+                  components = 4;
+                  break;      // C/M/Y/components
+
+    case PT_Lab:  in_space = jpeg_space = JCS_YCbCr;
+                  components = 3;
+                  break;                // Fake to don't touch
+    default:
+                 FatalError("Unsupported output color space");
+                 return;
+    }
+
+
+    if (jpegQuality >= 100) {
+
+     // avoid destructive conversion when asking for lossless compression
+        jpeg_space = in_space;
+    }
+
+    Compressor.in_color_space =  in_space;
+    Compressor.jpeg_color_space = jpeg_space;
+    Compressor.input_components = Compressor.num_components = components;
+    jpeg_set_defaults(&Compressor);
+    jpeg_set_colorspace(&Compressor, jpeg_space);
+
+
+    // Make sure to pass resolution through
+    if (OutputColorSpace == PT_CMYK)
+        Compressor.write_JFIF_header = 1;
+
+    // Avoid subsampling on high quality factor
+    jpeg_set_quality(&Compressor, jpegQuality, 1);
+    if (jpegQuality >= 70) {
+
+      int i;
+      for(i=0; i < Compressor.num_components; i++) {
+
+            Compressor.comp_info[i].h_samp_factor = 1;
+            Compressor.comp_info[i].v_samp_factor = 1;
+      }
+
+    }
+
+}
+
+
+static
+void DoEmbedProfile(const char* ProfileFile)
+{
+    FILE* f;
+    size_t size, EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+        f = fopen(ProfileFile, "rb");
+        if (f == NULL) return;
+
+        size = cmsfilelength(f);
+        EmbedBuffer = (cmsUInt8Number*) malloc(size + 1);
+        EmbedLen = fread(EmbedBuffer, 1, size, f);
+        fclose(f);
+        EmbedBuffer[EmbedLen] = 0;
+
+        write_icc_profile (&Compressor, EmbedBuffer, (unsigned int) EmbedLen);
+        free(EmbedBuffer);
+}
+
+
+
+static
+int DoTransform(cmsHTRANSFORM hXForm, int OutputColorSpace)
+{
+    JSAMPROW ScanLineIn;
+    JSAMPROW ScanLineOut;
+
+
+       //Preserve resolution values from the original
+       // (Thanks to Robert Bergs for finding out this bug)
+       Compressor.density_unit = Decompressor.density_unit;
+       Compressor.X_density    = Decompressor.X_density;
+       Compressor.Y_density    = Decompressor.Y_density;
+
+      //  Compressor.write_JFIF_header = 1;
+
+       jpeg_start_decompress(&Decompressor);
+       jpeg_start_compress(&Compressor, TRUE);
+
+        if (OutputColorSpace == PT_Lab)
+            SetITUFax(&Compressor);
+
+       // Embed the profile if needed
+       if (EmbedProfile && cOutProf)
+           DoEmbedProfile(cOutProf);
+
+       ScanLineIn  = (JSAMPROW) malloc(Decompressor.output_width * Decompressor.num_components);
+       ScanLineOut = (JSAMPROW) malloc(Compressor.image_width * Compressor.num_components);
+
+       while (Decompressor.output_scanline <
+                            Decompressor.output_height) {
+
+       jpeg_read_scanlines(&Decompressor, &ScanLineIn, 1);
+
+       cmsDoTransform(hXForm, ScanLineIn, ScanLineOut, Decompressor.output_width);
+
+       jpeg_write_scanlines(&Compressor, &ScanLineOut, 1);
+       }
+
+       free(ScanLineIn);
+       free(ScanLineOut);
+
+       jpeg_finish_decompress(&Decompressor);
+       jpeg_finish_compress(&Compressor);
+
+       return TRUE;
+}
+
+
+
+// Transform one image
+
+static
+int TransformImage(char *cDefInpProf, char *cOutputProf)
+{
+       cmsHPROFILE hIn, hOut, hProof;
+       cmsHTRANSFORM xform;
+       cmsUInt32Number wInput, wOutput;
+       int OutputColorSpace;
+       cmsUInt32Number dwFlags = 0;
+       cmsUInt32Number EmbedLen;
+       cmsUInt8Number* EmbedBuffer;
+
+
+       cmsSetAdaptationState(ObserverAdaptationState);
+
+       if (BlackPointCompensation) {
+
+            dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+       }
+
+
+       switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       default:;
+       }
+
+
+       if (GamutCheck) {
+            dwFlags |= cmsFLAGS_GAMUTCHECK;
+            cmsSetAlarmCodes(Alarm);
+       }
+
+       // Take input color space
+       wInput = GetInputPixelType();
+
+        if (lIsDeviceLink) {
+
+            hIn = cmsOpenProfileFromFile(cDefInpProf, "r");
+            hOut = NULL;
+            hProof = NULL;
+       }
+        else {
+
+        if (!IgnoreEmbedded && read_icc_profile(&Decompressor, &EmbedBuffer, &EmbedLen))
+        {
+              hIn = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+
+               if (Verbose) {
+
+                  fprintf(stdout, " (Embedded profile found)\n");
+                  PrintProfileInformation(hIn);
+                  fflush(stdout);
+              }
+
+               if (hIn != NULL && SaveEmbedded != NULL)
+                          SaveMemoryBlock(EmbedBuffer, EmbedLen, SaveEmbedded);
+
+              free(EmbedBuffer);
+        }
+        else
+        {
+            // Default for ITU/Fax
+            if (cDefInpProf == NULL && T_COLORSPACE(wInput) == PT_Lab)
+                cDefInpProf = "*Lab";
+
+            if (cDefInpProf != NULL && cmsstrcasecmp(cDefInpProf, "*lab") == 0)
+                hIn = CreateITU2PCS_ICC();
+            else
+                hIn = OpenStockProfile(0, cDefInpProf);
+       }
+
+        if (cOutputProf != NULL && cmsstrcasecmp(cOutputProf, "*lab") == 0)
+            hOut = CreatePCS2ITU_ICC();
+        else
+        hOut = OpenStockProfile(0, cOutputProf);
+
+       hProof = NULL;
+       if (cProofing != NULL) {
+
+           hProof = OpenStockProfile(0, cProofing);
+           if (hProof == NULL) {
+            FatalError("Proofing profile couldn't be read.");
+           }
+           dwFlags |= cmsFLAGS_SOFTPROOFING;
+          }
+       }
+
+        if (!hIn)
+            FatalError("Input profile couldn't be read.");
+        if (!lIsDeviceLink && !hOut)
+            FatalError("Output profile couldn't be read.");
+
+       // Assure both, input profile and input JPEG are on same colorspace
+       if (cmsGetColorSpace(hIn) != _cmsICCcolorSpace(T_COLORSPACE(wInput)))
+              FatalError("Input profile is not operating in proper color space");
+
+
+       // Output colorspace is given by output profile
+
+        if (lIsDeviceLink) {
+            OutputColorSpace = GetDevicelinkColorSpace(hIn);
+        }
+        else {
+            OutputColorSpace = GetProfileColorSpace(hOut);
+        }
+
+       jpeg_copy_critical_parameters(&Decompressor, &Compressor);
+
+       WriteOutputFields(OutputColorSpace);
+
+       wOutput      = ComputeOutputFormatDescriptor(wInput, OutputColorSpace);
+
+
+       xform = cmsCreateProofingTransform(hIn, wInput,
+                                          hOut, wOutput,
+                                          hProof, Intent,
+                                          ProofingIntent, dwFlags);
+       if (xform == NULL)
+                 FatalError("Cannot transform by using the profiles");
+
+       DoTransform(xform, OutputColorSpace);
+
+
+       jcopy_markers_execute(&Decompressor, &Compressor);
+
+       cmsDeleteTransform(xform);
+       cmsCloseProfile(hIn);
+       cmsCloseProfile(hOut);
+       if (hProof) cmsCloseProfile(hProof);
+
+       return 1;
+}
+
+
+// Simply print help
+
+static
+void Help(int level)
+{
+     fprintf(stderr, "little cms ICC profile applier for JPEG - v3.2 [LittleCMS %2.2f]\n\n", LCMS_VERSION / 1000.0);
+
+     switch(level) {
+
+     default:
+     case 0:
+
+     fprintf(stderr, "usage: jpgicc [flags] input.jpg output.jpg\n");
+
+     fprintf(stderr, "\nflags:\n\n");
+     fprintf(stderr, "%cv - Verbose\n", SW);
+     fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+     fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);
+
+     PrintRenderingIntents();
+
+
+     fprintf(stderr, "%cb - Black point compensation\n", SW);
+     fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n", SW);
+     fprintf(stderr, "%cn - Ignore embedded profile\n", SW);
+     fprintf(stderr, "%ce - Embed destination profile\n", SW);
+     fprintf(stderr, "%cs<new profile> - Save embedded profile as <new profile>\n", SW);
+
+     fprintf(stderr, "\n");
+
+     fprintf(stderr, "%cc<0,1,2,3> - Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1]\n", SW);
+     fprintf(stderr, "\n");
+
+     fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+     fprintf(stderr, "%cm<0,1,2,3> - SoftProof intent\n", SW);
+     fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n", SW);
+     fprintf(stderr, "%c!<r>,<g>,<b> - Out-of-gamut marker channel values\n", SW);
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "%cq<0..100> - Output JPEG quality\n", SW);
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+     break;
+
+     case 1:
+
+     fprintf(stderr, "Examples:\n\n"
+                     "To color correct from scanner to sRGB:\n"
+                     "\tjpgicc %ciscanner.icm in.jpg out.jpg\n"
+                     "To convert from monitor1 to monitor2:\n"
+                     "\tjpgicc %cimon1.icm %comon2.icm in.jpg out.jpg\n"
+                     "To make a CMYK separation:\n"
+                     "\tjpgicc %coprinter.icm inrgb.jpg outcmyk.jpg\n"
+                     "To recover sRGB from a CMYK separation:\n"
+                     "\tjpgicc %ciprinter.icm incmyk.jpg outrgb.jpg\n"
+                     "To convert from CIELab ITU/Fax JPEG to sRGB\n"
+                     "\tjpgicc in.jpg out.jpg\n\n",
+                     SW, SW, SW, SW, SW);
+     break;
+
+     case 2:
+         PrintBuiltins();
+         break;
+
+     case 3:
+
+     fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+                     "engine. Both lcms and this program are freeware. You can\n"
+                     "obtain both in source code at http://www.littlecms.com\n"
+                     "For suggestions, comments, bug reports etc. send mail to\n"
+                     "marti@littlecms.com\n\n");
+     break;
+     }
+
+     exit(0);
+}
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s=xgetopt(argc,argv,"bBnNvVGgh:H:i:I:o:O:P:p:t:T:c:C:Q:q:M:m:L:l:eEs:S:!:D:d:")) != EOF) {
+
+        switch (s)
+        {
+
+        case 'b':
+        case 'B':
+            BlackPointCompensation = TRUE;
+            break;
+
+        case 'd':
+        case 'D': ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 ||
+                ObserverAdaptationState > 1.0)
+                FatalError("Adaptation state should be 0..1");
+            break;
+
+        case 'v':
+        case 'V':
+            Verbose = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified");
+
+            cInpProf = xoptarg;
+            break;
+
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified");
+
+            cOutProf = xoptarg;
+            break;
+
+        case 'l':
+        case 'L':
+            if (cInpProf != NULL || cOutProf != NULL)
+                FatalError("input/output profiles already specified");
+
+            cInpProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);
+            break;
+
+        case 'N':
+        case 'n':
+            IgnoreEmbedded = TRUE;
+            break;
+
+        case 'e':
+        case 'E':
+            EmbedProfile = TRUE;
+            break;
+
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'H':
+        case 'h':  {
+
+            int a =  atoi(xoptarg);
+            Help(a);
+                   }
+            break;
+
+        case 'q':
+        case 'Q':
+            jpegQuality = atoi(xoptarg);
+            if (jpegQuality > 100) jpegQuality = 100;
+            if (jpegQuality < 0)   jpegQuality = 0;
+            break;
+
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            break;
+
+        case 's':
+        case 'S': SaveEmbedded = xoptarg;
+            break;
+
+        case '!':
+            if (sscanf(xoptarg, "%hu,%hu,%hu", &Alarm[0], &Alarm[1], &Alarm[2]) == 3) {
+                int i;
+                for (i=0; i < 3; i++) {
+                    Alarm[i] = (Alarm[i] << 8) | Alarm[i];
+                }
+            }
+            break;
+
+        default:
+
+            FatalError("Unknown option - run without args to see valid ones");
+        }
+
+    }
+}
+
+
+int main(int argc, char* argv[])
+{
+    InitUtils("jpgicc");
+
+    HandleSwitches(argc, argv);
+
+    if ((argc - xoptind) != 2) {
+        Help(0);
+    }
+
+    OpenInput(argv[xoptind]);
+    OpenOutput(argv[xoptind+1]);
+
+    TransformImage(cInpProf, cOutProf);
+
+
+    if (Verbose) { fprintf(stdout, "\n"); fflush(stdout); }
+
+    Done();
+
+    return 0;
+}
+
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.am
new file mode 100644
index 0000000000..3b9186970c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = linkicc 
+
+linkicc_LDADD = $(top_builddir)/src/liblcms2.la 
+linkicc_LDFLAGS = @LDFLAGS@
+linkicc_SOURCES = linkicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+linkicc_MANS = linkicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.in
new file mode 100644
index 0000000000..2631ab8791
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = linkicc$(EXEEXT)
+subdir = utils/linkicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_linkicc_OBJECTS = linkicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+linkicc_OBJECTS = $(am_linkicc_OBJECTS)
+linkicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+linkicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(linkicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(linkicc_SOURCES)
+DIST_SOURCES = $(linkicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+linkicc_LDADD = $(top_builddir)/src/liblcms2.la 
+linkicc_LDFLAGS = @LDFLAGS@
+linkicc_SOURCES = linkicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+linkicc_MANS = linkicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/linkicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/linkicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+linkicc$(EXEEXT): $(linkicc_OBJECTS) $(linkicc_DEPENDENCIES) $(EXTRA_linkicc_DEPENDENCIES) 
+	@rm -f linkicc$(EXEEXT)
+	$(AM_V_CCLD)$(linkicc_LINK) $(linkicc_OBJECTS) $(linkicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.1
new file mode 100644
index 0000000000..56f73bb9a1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.1
@@ -0,0 +1,123 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH LINKICC 1 "September 30, 2004"
+.SH NAME
+linkicc - little cms device link generator.
+.SH SYNOPSIS
+.B linkicc
+.RI [ options ] " profiles"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B linkicc
+is a little cms device link generator.
+.P
+Links two or more profiles into a single devicelink profile.
+Colorspaces must be paired except Lab/XYZ, that can be interchanged.
+.SH OPTIONS
+.TP
+.BR \-a\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 1.0].
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  precision
+Precision (0=LowRes, 1=Normal, 2=Hi-res) [defaults to 1].
+.TP
+.BI \-d\  description
+Description text (quotes can be used).
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Built-in profiles, 2=Examples, 3=Contact information)
+.TP
+.BI \-k\  inklimit
+Ink-limiting in % (CMYK only), (0..400.0, float value) [default 400.0].
+.TP
+.B \-l
+Use linearization curves (may affect accuracy).
+.TP
+.BI \-n\  gridpoints
+Alternate way to set precision, number of CLUT points.
+.TP
+.BI \-o\  profile
+Output devicelink profile [defaults to 'devicelink.icm'].
+.TP
+.BI \-r\  profileversion
+Profile version. (CAUTION: may change the profile implementation), (2.0..4.3, float value) [defaults to 4.3].
+.TP
+.BI \-t\  NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.BI \-v\  verbosity
+Verbosity level, (0=None, 1=Normal, 2=High, 3=Very High) [defaults to 0].
+.TP
+.B \-x
+Creatively, guess deviceclass of resulting profile.
+.TP
+.BI \-y\  copyright
+Copyright notice (quotes can be used) ["No copyright, use freely"].
+.TP
+.B \-8
+Creates 8-bit devicelink.
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To create 'devicelink.icm' from a.icc to b.icc:
+	linkicc a.icc b.icc
+
+To create 'out.icc' from sRGB to cmyk.icc:
+	linkicc -o out.icc *sRGB cmyk.icc
+
+To create a sRGB input profile working in Lab:
+	linkicc -x -o sRGBLab.icc *sRGB *Lab
+
+To create a XYZ -> sRGB output profile:
+	linkicc -x -o sRGBLab.icc *XYZ *sRGB
+
+To create a abstract profile doing softproof for cmyk.icc:
+	linkicc -t1 -x -o softproof.icc *Lab cmyk.icc cmyk.icc *Lab
+
+To create a 'grayer' sRGB input profile:
+	linkicc -x -o grayer.icc *sRGB gray.icc gray.icc *Lab
+
+To embed ink limiting into a cmyk output profile:
+	linkicc -x -o cmyklimited.icc -k 250 cmyk.icc *Lab
+
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR psicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.c
new file mode 100644
index 0000000000..0fd9d0c5c4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/linkicc/linkicc.c
@@ -0,0 +1,384 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+// ---------------------------------------------------------------------------------
+
+static char* Description = "Devicelink profile";
+static char* Copyright   = "No copyright, use freely";
+static int   Intent = INTENT_PERCEPTUAL;
+static char* cOutProf    = "devicelink.icc";
+static int   PrecalcMode  = 1;
+static int   NumOfGridPoints = 0;
+
+static cmsFloat64Number ObserverAdaptationState = 1.0;  // According ICC 4.2 this is the default
+
+static cmsBool BlackPointCompensation = FALSE;
+
+static cmsFloat64Number InkLimit   = 400;
+static cmsBool lUse8bits           = FALSE;
+static cmsBool TagResult           = FALSE;
+static cmsBool KeepLinearization   = FALSE;
+static cmsFloat64Number Version    = 4.3;
+
+
+// The manual
+static
+int Help(int level)
+{
+     switch (level) {
+
+     default:
+     case 0:
+
+         fprintf(stderr, "\nlinkicc: Links profiles into a single devicelink.\n");     
+
+         fprintf(stderr, "\n");     
+         fprintf(stderr, "usage: linkicc [flags] <profiles>\n\n");
+         fprintf(stderr, "flags:\n\n");         
+         fprintf(stderr, "%co<profile> - Output devicelink profile. [defaults to 'devicelink.icc']\n", SW);     
+
+         PrintRenderingIntents();
+
+         fprintf(stderr, "%cc<0,1,2> - Precision (0=LowRes, 1=Normal, 2=Hi-res) [defaults to 1]\n", SW);     
+         fprintf(stderr, "%cn<gridpoints> - Alternate way to set precision, number of CLUT points\n", SW);     
+         fprintf(stderr, "%cd<description> - description text (quotes can be used)\n", SW);     
+         fprintf(stderr, "%cy<copyright> - copyright notice (quotes can be used)\n", SW);    
+         
+         fprintf(stderr, "\n%ck<0..400> - Ink-limiting in %% (CMYK only)\n", SW);
+         fprintf(stderr, "%c8 - Creates 8-bit devicelink\n", SW);
+         fprintf(stderr, "%cx - Creatively, guess deviceclass of resulting profile.\n", SW);
+         fprintf(stderr, "%cb - Black point compensation\n", SW);
+         fprintf(stderr, "%ca<0..1> - Observer adaptation state (abs.col. only)\n\n", SW);
+         fprintf(stderr, "%cl - Use linearization curves (may affect accuracy)\n", SW);
+         fprintf(stderr, "%cr<v.r> - Profile version. (CAUTION: may change the profile implementation)\n", SW);
+         fprintf(stderr, "\n");    
+         fprintf(stderr, "Colorspaces must be paired except Lab/XYZ, that can be interchanged.\n\n");
+
+         fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+         break;
+
+     case 1:
+         PrintBuiltins();
+         break;
+
+     case 2:
+
+         fprintf(stderr, "\nExamples:\n\n"
+             "To create 'devicelink.icm' from a.icc to b.icc:\n"
+             "\tlinkicc a.icc b.icc\n\n"
+             "To create 'out.icc' from sRGB to cmyk.icc:\n"
+             "\tlinkicc -o out.icc *sRGB cmyk.icc\n\n"
+             "To create a sRGB input profile working in Lab:\n"
+             "\tlinkicc -x -o sRGBLab.icc *sRGB *Lab\n\n"
+             "To create a XYZ -> sRGB output profile:\n"
+             "\tlinkicc -x -o sRGBLab.icc *XYZ *sRGB\n\n"
+             "To create a abstract profile doing softproof for cmyk.icc:\n"
+             "\tlinkicc -t1 -x -o softproof.icc *Lab cmyk.icc cmyk.icc *Lab\n\n"
+             "To create a 'grayer' sRGB input profile:\n"
+             "\tlinkicc -x -o grayer.icc *sRGB gray.icc gray.icc *Lab\n\n"
+             "To embed ink limiting into a cmyk output profile:\n"
+             "\tlinkicc -x -o cmyklimited.icc -k 250 cmyk.icc *Lab\n\n");                     
+         break;                       
+
+     case 3:
+
+         fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+             "engine. Both lcms and this program are freeware. You can\n"
+             "obtain both in source code at http://www.littlecms.com\n"
+             "For suggestions, comments, bug reports etc. send mail to\n"
+             "info@littlecms.com\n\n");
+    }
+
+   exit(0);
+}
+
+// The toggles stuff
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s = xgetopt(argc,argv,"a:A:BbC:c:D:d:h:H:k:K:lLn:N:O:o:r:R:T:t:V:v:xX8y:Y:")) != EOF) {
+
+    switch (s) {
+
+
+        case 'a':
+        case 'A':             
+            ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                       FatalError("Adaptation state should be 0..1");
+            break;      
+
+        case 'b':
+        case 'B':
+            BlackPointCompensation = TRUE;
+           break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2) {
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            }
+           break;
+
+       case 'd':
+       case 'D':
+           // Doing that is correct and safe: Description points to memory allocated in the command line.
+           // same for Copyright and output devicelink.
+           Description = xoptarg;
+           break;
+
+        case 'h':
+        case 'H':
+            Help(atoi(xoptarg));
+            return;
+
+        case 'k':
+        case 'K':
+            InkLimit = atof(xoptarg);
+            if (InkLimit < 0.0 || InkLimit > 400.0) {
+                FatalError("Ink limit must be 0%%..400%%");
+            }
+           break;
+
+
+        case 'l':
+        case 'L': KeepLinearization = TRUE;
+           break;
+
+       case 'n':
+       case 'N':
+           if (PrecalcMode != 1) {
+               FatalError("Precalc mode already specified");
+           }
+           NumOfGridPoints = atoi(xoptarg);
+           break;
+
+        case 'o':
+        case 'O':
+            cOutProf = xoptarg;
+           break;
+
+
+       case 'r':
+       case 'R':
+          Version = atof(xoptarg);
+          if (Version < 2.0 || Version > 4.3) {
+              fprintf(stderr, "WARNING: lcms was not aware of this version, tag types may be wrong!\n");
+          }
+          break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);  // Will be validated latter on
+            break;
+
+        case 'V':
+        case 'v':
+            Verbose = atoi(xoptarg);
+            if (Verbose < 0 || Verbose > 3) {
+                FatalError("Unknown verbosity level '%d'", Verbose);
+            }
+            break;
+
+        case '8':
+            lUse8bits = TRUE;
+            break;
+
+
+
+        case 'y':
+        case 'Y':
+            Copyright = xoptarg;
+            break;
+
+
+
+       case 'x':
+       case 'X': TagResult = TRUE;
+           break;
+
+
+           
+       default:
+
+           FatalError("Unknown option - run without args to see valid ones.\n");          
+        }       
+    }
+}
+
+// Set the copyright and description
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    DescriptionMLU  = cmsMLUalloc(ContextID, 1);
+    CopyrightMLU    = cmsMLUalloc(ContextID, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetASCII(DescriptionMLU,  "en", "US", Description)) goto Error;
+    if (!cmsMLUsetASCII(CopyrightMLU,    "en", "US", Copyright)) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;     
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+
+int main(int argc, char *argv[])
+{
+    int i, nargs, rc;
+    cmsHPROFILE Profiles[257];
+    cmsHPROFILE hProfile;
+    cmsUInt32Number dwFlags;
+    cmsHTRANSFORM hTransform = NULL;
+
+    // Here we are
+    fprintf(stderr, "little cms ICC device link generator - v2.2 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+    fflush(stderr);
+
+    // Initialize
+    InitUtils("linkicc");
+    rc = 0;
+    
+    // Get the options
+    HandleSwitches(argc, argv);
+
+    // How many profiles to link?
+    nargs = (argc - xoptind);
+    if (nargs < 1)
+        return Help(0); 
+
+    if (nargs > 255) {
+        FatalError("Holy profile! what are you trying to do with so many profiles!?");
+        goto Cleanup;
+    }
+
+    // Open all profiles
+    memset(Profiles, 0, sizeof(Profiles));
+    for (i=0; i < nargs; i++) {
+
+        Profiles[i] = OpenStockProfile(0, argv[i + xoptind]);
+        if (Profiles[i] == NULL) goto Cleanup;      
+
+        if (Verbose >= 1) {
+            PrintProfileInformation(Profiles[i]);
+        }
+    }
+
+    // Ink limiting
+    if (InkLimit != 400.0) {        
+        cmsColorSpaceSignature EndingColorSpace = cmsGetColorSpace(Profiles[nargs-1]);
+        Profiles[nargs++] = cmsCreateInkLimitingDeviceLink(EndingColorSpace, InkLimit);
+    }
+
+    // Set the flags
+    dwFlags = cmsFLAGS_KEEP_SEQUENCE;
+    switch (PrecalcMode) {
+
+        case 0: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+        case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+        case 1: 
+            if (NumOfGridPoints > 0)
+                dwFlags |= cmsFLAGS_GRIDPOINTS(NumOfGridPoints);
+            break;
+
+        default: 
+            {
+                FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+                goto Cleanup;
+            }
+    }
+
+    if (BlackPointCompensation)
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+    if (TagResult)
+        dwFlags |= cmsFLAGS_GUESSDEVICECLASS;
+
+    if (KeepLinearization)
+        dwFlags |= cmsFLAGS_CLUT_PRE_LINEARIZATION|cmsFLAGS_CLUT_POST_LINEARIZATION;
+
+    if (lUse8bits) dwFlags |= cmsFLAGS_8BITS_DEVICELINK;
+
+     cmsSetAdaptationState(ObserverAdaptationState);
+     
+    // Create the color transform. Specify 0 for the format is safe as the transform 
+    // is intended to be used only for the devicelink.
+    hTransform = cmsCreateMultiprofileTransform(Profiles, nargs, 0, 0, Intent, dwFlags|cmsFLAGS_NOOPTIMIZE);
+    if (hTransform == NULL) {
+        FatalError("Transform creation failed");
+        goto Cleanup;
+    }
+
+    hProfile =  cmsTransform2DeviceLink(hTransform, Version, dwFlags);
+    if (hProfile == NULL) {
+        FatalError("Devicelink creation failed");
+        goto Cleanup;
+    }
+
+    SetTextTags(hProfile);
+    cmsSetHeaderRenderingIntent(hProfile, Intent);
+
+    if (cmsSaveProfileToFile(hProfile, cOutProf)) {
+
+        if (Verbose > 0) 
+            fprintf(stderr, "Ok");
+    }
+    else 
+        FatalError("Error saving file!");
+
+    cmsCloseProfile(hProfile);
+
+
+Cleanup:
+
+    if (hTransform != NULL) cmsDeleteTransform(hTransform);
+    for (i=0; i < nargs; i++) {
+
+        if (Profiles[i] != NULL) cmsCloseProfile(Profiles[i]);
+    }
+
+    return rc;     
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/icctrans.c b/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/icctrans.c
new file mode 100644
index 0000000000..2125f94526
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/icctrans.c
@@ -0,0 +1,724 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2010 Marti Maria, Ignacio Ruiz de Conejo
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "mex.h"
+
+#include "lcms2.h"
+#include "string.h"
+#include "stdarg.h"
+
+// xgetopt() interface -----------------------------------------------------
+
+static int   xoptind;    
+static char *xoptarg; 
+static int   xopterr;  
+static char  *letP;
+static char   SW = '-';
+
+// ------------------------------------------------------------------------
+
+
+static int  Verbose ;			// Print some statistics
+static char *cInProf;			// Input profile
+static char *cOutProf;			// Output profile
+static char *cProofing;			// Softproofing profile
+
+
+static int  Intent;				// Rendering Intent
+static int  ProofingIntent;		// RI for proof
+
+static int  PrecalcMode;		// 0 = Not, 1=Normal, 2=Accurate, 3=Fast
+
+static cmsBool BlackPointCompensation;
+static cmsBool lIsDeviceLink;
+static cmsBool lMultiProfileChain;		// Multiple profile chain
+
+static cmsHPROFILE hInput, hOutput, hProof;
+static cmsHTRANSFORM hColorTransform;
+static cmsHPROFILE hProfiles[255];
+static int nProfiles;
+
+static cmsColorSpaceSignature InputColorSpace, OutputColorSpace;
+static int OutputChannels, InputChannels, nBytesDepth;
+
+
+// Error. Print error message and abort
+
+static
+cmsBool FatalError(const char *frm, ...)
+{
+	va_list args;
+	char Buffer[1024];
+
+	va_start(args, frm);
+	vsprintf(Buffer, frm, args);
+	mexErrMsgTxt(Buffer);   
+	va_end(args);
+
+	return FALSE;               
+}
+
+// This is the handler passed to lcms
+
+static
+void MatLabErrorHandler(cmsContext ContextID, cmsUInt32Number ErrorCode, 
+						const char *Text)
+{      
+	mexErrMsgTxt(Text);    
+}
+//
+//  Parse the command line options, System V style.
+//
+
+static
+void xoptinit()
+{   
+	xoptind = 1;
+	xopterr = 0;
+	letP = NULL;
+}
+
+
+static
+int xgetopt(int argc, char *argv[], char *optionS)
+{
+	unsigned char ch;
+	char *optP;
+
+	if (SW == 0) {
+		SW = '/';
+	}
+
+	if (argc > xoptind) {
+		if (letP == NULL) {
+			if ((letP = argv[xoptind]) == NULL ||
+				*(letP++) != SW)  goto gopEOF;
+			if (*letP == SW) {
+				xoptind++;  goto gopEOF;
+			}
+		}
+		if (0 == (ch = *(letP++))) {
+			xoptind++;  goto gopEOF;
+		}
+		if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
+			goto gopError;
+		if (':' == *(++optP)) {
+			xoptind++;
+			if (0 == *letP) {
+				if (argc <= xoptind)  goto  gopError;
+				letP = argv[xoptind++];
+			}
+			xoptarg = letP;
+			letP = NULL;
+		} else {
+			if (0 == *letP) {
+				xoptind++;
+				letP = NULL;
+			}
+			xoptarg = NULL;
+		}
+		return ch;
+	}
+gopEOF:
+	xoptarg = letP = NULL;
+	return EOF;
+
+gopError:
+	xoptarg = NULL;    
+	if (xopterr)
+		FatalError ("get command line option");
+	return ('?');
+}
+
+
+// Return Mathlab type by depth
+
+static
+size_t SizeOfArrayType(const mxArray *Array)
+{
+
+	switch (mxGetClassID(Array))  {
+
+	 case mxINT8_CLASS:   return 1;
+	 case mxUINT8_CLASS:  return 1;
+	 case mxINT16_CLASS:  return 2;
+	 case mxUINT16_CLASS: return 2;  
+	 case mxSINGLE_CLASS: return 4;
+	 case mxDOUBLE_CLASS: return 0; // Special case -- lcms handles double as size=0
+
+
+	 default:
+		 FatalError("Unsupported data type");
+		 return 0;
+	}
+}
+
+
+// Get number of pixels of input array. Supported arrays are 
+// organized as NxMxD, being N and M the size of image and D the
+// number of components.
+
+static
+size_t GetNumberOfPixels(const mxArray* In)
+{
+	int nDimensions  = mxGetNumberOfDimensions(In); 
+	const int  *Dimensions   = mxGetDimensions(In);
+
+	switch (nDimensions) {
+
+		case 1: return 1;                            // It is just a spot color
+		case 2: return Dimensions[0];                // A scanline
+		case 3: return Dimensions[0]*Dimensions[1];  // A image
+
+		default:
+			FatalError("Unsupported array of %d dimensions", nDimensions);
+			return 0;
+	}
+}   
+
+
+// Allocates the output array. Copies the input array modifying the pixel
+// definition to match "OutputChannels".
+
+static
+mxArray* AllocateOutputArray(const mxArray* In, int OutputChannels)
+{       
+
+	mxArray*	Out			  = mxDuplicateArray(In);   // Make a "deep copy" of Input array 
+	int         nDimensions   = mxGetNumberOfDimensions(In);    
+	const int*	Dimensions    = mxGetDimensions(In);
+	int         InputChannels = Dimensions[nDimensions-1];
+
+
+	// Modify pixel size only if needed
+
+	if (InputChannels != OutputChannels) {
+
+
+		int i, NewSize;
+		int *ModifiedDimensions = (int*) mxMalloc(nDimensions * sizeof(int));
+
+
+		memmove(ModifiedDimensions, Dimensions, nDimensions * sizeof(int));
+		ModifiedDimensions[nDimensions - 1] = OutputChannels;
+
+		switch (mxGetClassID(In))  {
+
+		case mxINT8_CLASS:   NewSize = sizeof(char); break;
+		case mxUINT8_CLASS:  NewSize = sizeof(unsigned char); break;
+		case mxINT16_CLASS:  NewSize = sizeof(short); break;
+		case mxUINT16_CLASS: NewSize = sizeof(unsigned short); break;
+
+		default:
+		case mxDOUBLE_CLASS: NewSize = sizeof(double); break;
+		}
+
+
+		// NewSize = 1;
+		for (i=0; i < nDimensions; i++)
+			NewSize *= ModifiedDimensions[i];
+
+
+		mxSetDimensions(Out, ModifiedDimensions, nDimensions);
+		mxFree(ModifiedDimensions);
+
+		mxSetPr(Out, mxRealloc(mxGetPr(Out), NewSize));             
+
+	}
+
+
+	return Out;
+}
+
+
+
+// Does create a format descriptor. "Bytes" is the sizeof type in bytes
+//  
+//  Bytes  Meaning
+//  ------ --------
+//   0      Floating point (double)
+//   1      8-bit samples
+//   2      16-bit samples   
+
+static
+cmsUInt32Number MakeFormatDescriptor(cmsColorSpaceSignature ColorSpace, int Bytes)
+{
+	int IsFloat = (Bytes == 0 || Bytes == 4) ? 1 : 0;
+	int Channels = cmsChannelsOf(ColorSpace);
+	return FLOAT_SH(IsFloat)|COLORSPACE_SH(_cmsLCMScolorSpace(ColorSpace))|BYTES_SH(Bytes)|CHANNELS_SH(Channels)|PLANAR_SH(1);
+}
+
+
+// Opens a profile or proper built-in
+
+static
+cmsHPROFILE OpenProfile(const char* File)
+{   
+
+	cmsContext ContextID = 0;
+
+	   if (!File) 
+            return cmsCreate_sRGBProfileTHR(ContextID);    
+
+       if (cmsstrcasecmp(File, "*Lab2") == 0)
+                return cmsCreateLab2ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab4") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+       
+       if (cmsstrcasecmp(File, "*LabD65") == 0) {
+
+           cmsCIExyY D65xyY;
+           
+           cmsWhitePointFromTemp( &D65xyY, 6504);           
+           return cmsCreateLab4ProfileTHR(ContextID, &D65xyY);
+       }
+
+       if (cmsstrcasecmp(File, "*XYZ") == 0)
+                return cmsCreateXYZProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Gray22") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 2.2);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+        if (cmsstrcasecmp(File, "*Gray30") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 3.0);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+       if (cmsstrcasecmp(File, "*srgb") == 0)
+                return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*null") == 0)
+                return cmsCreateNULLProfileTHR(ContextID);
+
+       
+       if (cmsstrcasecmp(File, "*Lin2222") == 0) {
+
+            cmsToneCurve*  Gamma = cmsBuildGamma(0, 2.2);
+            cmsToneCurve*  Gamma4[4];
+            cmsHPROFILE hProfile; 
+
+            Gamma4[0] = Gamma4[1] = Gamma4[2] = Gamma4[3] = Gamma;
+            hProfile = cmsCreateLinearizationDeviceLink(cmsSigCmykData, Gamma4);
+            cmsFreeToneCurve(Gamma);
+            return hProfile;
+       }
+
+           
+        return cmsOpenProfileFromFileTHR(ContextID, File, "r");
+}
+
+
+static
+cmsUInt32Number GetFlags()
+{
+	cmsUInt32Number dwFlags = 0; 
+
+	switch (PrecalcMode) {
+
+	case 0: dwFlags = cmsFLAGS_NOOPTIMIZE; break;
+	case 2: dwFlags = cmsFLAGS_HIGHRESPRECALC; break;
+	case 3: dwFlags = cmsFLAGS_LOWRESPRECALC; break;
+	case 1: break;
+
+	default: FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+	}
+
+	if (BlackPointCompensation) 
+		dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+	return dwFlags;
+}
+
+// Create transforms
+
+static
+void OpenTransforms(int argc, char *argv[])
+{
+
+	cmsUInt32Number dwIn, dwOut, dwFlags;
+
+
+	if (lMultiProfileChain) {
+
+		int i;
+		cmsHTRANSFORM hTmp;
+
+
+		nProfiles = argc - xoptind;
+		for (i=0; i < nProfiles; i++) {
+
+			hProfiles[i] = OpenProfile(argv[i+xoptind]);
+		}
+
+
+		// Create a temporary devicelink 
+
+		hTmp = cmsCreateMultiprofileTransform(hProfiles, nProfiles, 
+			0, 0, Intent, GetFlags());
+
+		hInput = cmsTransform2DeviceLink(hTmp, 4.2, 0);
+		hOutput = NULL;
+		cmsDeleteTransform(hTmp);
+
+		InputColorSpace  = cmsGetColorSpace(hInput);
+		OutputColorSpace = cmsGetPCS(hInput);        
+		lIsDeviceLink = TRUE;
+
+	}
+	else
+		if (lIsDeviceLink) {
+
+			hInput  = cmsOpenProfileFromFile(cInProf, "r");
+			hOutput = NULL;
+			InputColorSpace  = cmsGetColorSpace(hInput);
+			OutputColorSpace = cmsGetPCS(hInput);
+
+
+		}
+		else {
+
+			hInput  = OpenProfile(cInProf);
+			hOutput = OpenProfile(cOutProf);    
+
+			InputColorSpace   = cmsGetColorSpace(hInput);
+			OutputColorSpace  = cmsGetColorSpace(hOutput);
+
+			if (cmsGetDeviceClass(hInput) == cmsSigLinkClass ||
+				cmsGetDeviceClass(hOutput) == cmsSigLinkClass)   
+				FatalError("Use %cl flag for devicelink profiles!\n", SW);
+
+		}
+
+
+		/*
+
+		if (Verbose) {
+
+		mexPrintf("From: %s\n", cmsTakeProductName(hInput));
+		if (hOutput) mexPrintf("To  : %s\n\n", cmsTakeProductName(hOutput));
+
+		}
+		*/
+
+
+		OutputChannels = cmsChannelsOf(OutputColorSpace);
+		InputChannels  = cmsChannelsOf(InputColorSpace);
+
+
+		dwIn  = MakeFormatDescriptor(InputColorSpace, nBytesDepth);
+		dwOut = MakeFormatDescriptor(OutputColorSpace, nBytesDepth);
+
+
+		dwFlags = GetFlags();
+
+		if (cProofing != NULL) {
+
+			hProof = OpenProfile(cProofing);
+			dwFlags |= cmsFLAGS_SOFTPROOFING;
+		}
+
+
+
+
+		hColorTransform = cmsCreateProofingTransform(hInput, dwIn, 
+			hOutput, dwOut, 
+			hProof, Intent, 
+			ProofingIntent, 
+			dwFlags);
+
+}
+
+
+
+static
+void ApplyTransforms(const mxArray *In, mxArray *Out)
+{   
+	double *Input  = mxGetPr(In); 
+	double *Output = mxGetPr(Out);    
+	size_t nPixels = GetNumberOfPixels(In);;
+
+	cmsDoTransform(hColorTransform, Input, Output, nPixels );
+
+}
+
+
+static
+void CloseTransforms(void)
+{
+	int i;
+
+	if (hColorTransform) cmsDeleteTransform(hColorTransform);
+	if (hInput) cmsCloseProfile(hInput);
+	if (hOutput) cmsCloseProfile(hOutput);             
+	if (hProof) cmsCloseProfile(hProof);
+
+	for (i=0; i < nProfiles; i++)
+		cmsCloseProfile(hProfiles[i]);
+
+	hColorTransform = NULL; hInput = NULL; hOutput = NULL; hProof = NULL;
+}
+
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+	int  s;
+
+	xoptinit();
+
+	while ((s = xgetopt(argc, argv,"C:c:VvbBI:i:O:o:T:t:L:l:r:r:P:p:Mm")) != EOF) {
+
+
+		switch (s){
+
+		case 'b':
+		case 'B': 
+			BlackPointCompensation = TRUE;
+			break;
+
+		case 'c':
+		case 'C':
+			PrecalcMode = atoi(xoptarg);
+			if (PrecalcMode < 0 || PrecalcMode > 3)
+				FatalError("Unknown precalc mode '%d'", PrecalcMode);
+			break;
+
+		case 'v':
+		case 'V':
+			Verbose = TRUE;
+			break;
+
+		case 'i':
+		case 'I':
+			if (lIsDeviceLink)
+				FatalError("Device-link already specified");
+			cInProf = xoptarg;
+			break;
+
+		case 'o':
+		case 'O':
+			if (lIsDeviceLink)
+				FatalError("Device-link already specified"); 
+			cOutProf = xoptarg;
+			break;
+
+		case 't':
+		case 'T':
+			Intent = atoi(xoptarg);
+			// if (Intent > 3) Intent = 3;
+			if (Intent < 0) Intent = 0;
+			break;
+
+
+		case 'l':
+		case 'L': 
+			cInProf = xoptarg;
+			lIsDeviceLink = TRUE;
+			break;
+
+		case 'p':
+		case 'P':
+			cProofing = xoptarg;
+			break;
+
+
+
+		case 'r':
+		case 'R':
+			ProofingIntent = atoi(xoptarg);
+			// if (ProofingIntent > 3) ProofingIntent = 3;
+			if (ProofingIntent < 0) ProofingIntent = 0;
+			break;
+
+
+		case 'm':
+		case 'M':
+			lMultiProfileChain = TRUE;
+			break;
+
+		default:
+			FatalError("Unknown option.");
+		}
+	}
+
+	// For multiprofile, need to specify -m
+
+	if (xoptind < argc) {
+
+		if (!lMultiProfileChain)
+			FatalError("Use %cm for multiprofile transforms", SW);
+	}
+
+}
+
+
+
+// -------------------------------------------------- Print some fancy help
+static
+void PrintHelp(void)
+{
+	mexPrintf("(MX) little cms ColorSpace conversion tool - v2.0\n\n");
+
+	mexPrintf("usage: icctrans (mVar, flags)\n\n");
+
+	mexPrintf("mVar : Matlab array.\n");
+	mexPrintf("flags: a string containing one or more of following options.\n\n");
+	mexPrintf("\t%cv - Verbose\n", SW);
+	mexPrintf("\t%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+	mexPrintf("\t%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+	mexPrintf("\t%cl<profile> - Transform by device-link profile\n", SW);      
+	mexPrintf("\t%cm<profiles> - Apply multiprofile chain\n", SW);      
+
+	mexPrintf("\t%ct<n> - Rendering intent\n", SW);    
+
+	mexPrintf("\t%cb - Black point compensation\n", SW);
+	mexPrintf("\t%cc<0,1,2,3> - Optimize transform (0=Off, 1=Normal, 2=Hi-res, 3=Lo-Res) [defaults to 1]\n", SW);     
+
+	mexPrintf("\t%cp<profile> - Soft proof profile\n", SW);
+	mexPrintf("\t%cr<0,1,2,3> - Soft proof intent\n", SW);
+
+	mexPrintf("\nYou can use following built-ins as profiles:\n\n");
+
+	mexPrintf("\t*Lab2  -- D50-based v2 CIEL*a*b\n"
+	"\t*Lab4  -- D50-based v4 CIEL*a*b\n"
+	"\t*Lab   -- D50-based v4 CIEL*a*b\n"
+	"\t*XYZ   -- CIE XYZ (PCS)\n"
+	"\t*sRGB  -- IEC6 1996-2.1 sRGB color space\n" 
+	"\t*Gray22 - Monochrome of Gamma 2.2\n"
+	"\t*Gray30 - Monochrome of Gamma 3.0\n"
+	"\t*null   - Monochrome black for all input\n"
+	"\t*Lin2222- CMYK linearization of gamma 2.2 on each channel\n\n");
+
+	mexPrintf("For suggestions, comments, bug reports etc. send mail to info@littlecms.com\n\n");
+
+}
+
+
+
+// Main entry point
+
+void mexFunction(
+				 int nlhs,              // Number of left hand side (output) arguments
+				 mxArray *plhs[],       // Array of left hand side arguments
+				 int nrhs,              // Number of right hand side (input) arguments
+				 const mxArray *prhs[]  // Array of right hand side arguments
+)
+{
+
+	char CommandLine[4096+1];
+	char *pt, *argv[128];
+	int argc = 1;
+
+
+	if (nrhs != 2) {    
+
+		PrintHelp();              
+		return;
+	}
+
+
+	if(nlhs > 1) {        
+		FatalError("Too many output arguments.");
+	}
+
+
+	// Setup error handler
+
+	cmsSetLogErrorHandler(MatLabErrorHandler);
+
+	// Defaults
+
+	Verbose     = 0;
+	cInProf     = NULL;
+	cOutProf    = NULL;
+	cProofing   = NULL;
+
+	lMultiProfileChain = FALSE;
+	nProfiles   = 0;
+
+	Intent                  = INTENT_PERCEPTUAL;
+	ProofingIntent          = INTENT_ABSOLUTE_COLORIMETRIC;
+	PrecalcMode = 1;
+	BlackPointCompensation  = FALSE;
+	lIsDeviceLink           = FALSE;
+
+	// Check types. Fist parameter is array of values, second parameter is command line
+
+	if (!mxIsNumeric(prhs[0]))
+		FatalError("Type mismatch on argument 1 -- Must be numeric");
+
+	if (!mxIsChar(prhs[1]))
+		FatalError("Type mismatch on argument 2 -- Must be string");
+
+
+
+
+	// Unpack string to command line buffer
+
+	if (mxGetString(prhs[1], CommandLine, 4096))
+		FatalError("Cannot unpack command string");
+
+	// Separate to argv[] convention
+
+	argv[0] = NULL;
+	for (pt = strtok(CommandLine, " ");
+		pt;
+		pt = strtok(NULL, " ")) {
+
+			argv[argc++] = pt;
+	}
+
+
+
+	// Parse arguments
+	HandleSwitches(argc, argv);
+
+
+	nBytesDepth = SizeOfArrayType(prhs[0]);
+
+	OpenTransforms(argc, argv);
+
+
+	plhs[0] = AllocateOutputArray(prhs[0], OutputChannels);
+
+
+	ApplyTransforms(prhs[0], plhs[0]);
+
+	CloseTransforms();
+
+	// Done!
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/lcms_rsp b/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/lcms_rsp
new file mode 100644
index 0000000000..c2b8c8d3a2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/matlab/lcms_rsp
@@ -0,0 +1,27 @@
+-O
+-I..\..\include
+icctrans.c
+..\..\src\cmscam02.c
+..\..\src\cmscgats.c
+..\..\src\cmscnvrt.c
+..\..\src\cmserr.c
+..\..\src\cmsgamma.c
+..\..\src\cmsgmt.c
+..\..\src\cmsintrp.c
+..\..\src\cmsio0.c
+..\..\src\cmsio1.c
+..\..\src\cmslut.c
+..\..\src\cmsmd5.c
+..\..\src\cmsmtrx.c
+..\..\src\cmsnamed.c
+..\..\src\cmsopt.c
+..\..\src\cmspack.c
+..\..\src\cmspcs.c
+..\..\src\cmsplugin.c
+..\..\src\cmsps2.c
+..\..\src\cmssamp.c
+..\..\src\cmssm.c
+..\..\src\cmstypes.c
+..\..\src\cmsvirt.c
+..\..\src\cmswtpnt.c
+..\..\src\cmsxform.c
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.am
new file mode 100644
index 0000000000..fd9839a892
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building psicc
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = psicc 
+
+psicc_LDADD = $(top_builddir)/src/liblcms2.la @LCMS_LIB_DEPLIBS@
+psicc_LDFLAGS = @LDFLAGS@
+psicc_SOURCES = psicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+psicc_MANS = psicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.in
new file mode 100644
index 0000000000..6fed475857
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building psicc
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = psicc$(EXEEXT)
+subdir = utils/psicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_psicc_OBJECTS = psicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+psicc_OBJECTS = $(am_psicc_OBJECTS)
+psicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+psicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(psicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(psicc_SOURCES)
+DIST_SOURCES = $(psicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+psicc_LDADD = $(top_builddir)/src/liblcms2.la 
+psicc_LDFLAGS = @LDFLAGS@
+psicc_SOURCES = psicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+psicc_MANS = psicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/psicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/psicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+psicc$(EXEEXT): $(psicc_OBJECTS) $(psicc_DEPENDENCIES) $(EXTRA_psicc_DEPENDENCIES) 
+	@rm -f psicc$(EXEEXT)
+	$(AM_V_CCLD)$(psicc_LINK) $(psicc_OBJECTS) $(psicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.1
new file mode 100644
index 0000000000..19868b5136
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.1
@@ -0,0 +1,47 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH PSICC 1 "September 30, 2004"
+.SH NAME
+psicc - little cms PostScript converter.
+.SH SYNOPSIS
+.B psicc
+.RI [ options ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B psicc
+is a little cms PostScript converter.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation (CRD only).
+.TP
+.BI \-c\  precision
+Precision (0=LowRes, 1=Normal, 2=Hi-res) (CRD only) [defaults to 1].
+.TP
+.BI \-i\  profile
+Input profile: Generates Color Space Array (CSA).
+.TP
+.BI \-n\  gridpoints
+Alternate way to set precision, number of CLUT points (CRD only).
+.TP
+.BI \-o\  profile
+.p
+Output profile: Generates Color Rendering Dictionary(CRD).
+.TP
+.BI \-t\  intent
+Intent (0=Perceptual, 1=Colorimetric, 2=Saturation, 3=Absolute) [defaults to 0].
+.TP
+.B \-u
+Do NOT generate resource name on CRD.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.c
new file mode 100644
index 0000000000..0e3c790d0d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/psicc/psicc.c
@@ -0,0 +1,232 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+// ------------------------------------------------------------------------
+
+static char *cInProf = NULL;
+static char *cOutProf = NULL;
+static int Intent = INTENT_PERCEPTUAL;
+static FILE* OutFile;
+static int BlackPointCompensation = FALSE;
+static int Undecorated = FALSE;
+static int PrecalcMode = 1;
+static int NumOfGridPoints = 0;
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+      
+       while ((s = xgetopt(argc,argv,"uUbBI:i:O:o:T:t:c:C:n:N:")) != EOF) {
+
+       switch (s){
+
+	 
+       case 'i':
+       case 'I':
+            cInProf = xoptarg;
+            break;
+
+       case 'o':
+       case 'O':
+           cOutProf = xoptarg;
+            break;
+
+       case 'b':
+       case 'B': BlackPointCompensation =TRUE;
+            break;
+
+
+       case 't':
+       case 'T':
+            Intent = atoi(xoptarg);
+            if (Intent > 3) Intent = 3;
+            if (Intent < 0) Intent = 0;
+            break;
+     
+       case 'U':
+       case 'u':
+            Undecorated = TRUE;
+            break;
+
+       case 'c':
+       case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2)
+                    FatalError("ERROR: Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+
+       case 'n':
+       case 'N':
+                if (PrecalcMode != 1)
+                    FatalError("Precalc mode already specified");
+                NumOfGridPoints = atoi(xoptarg);
+                break;
+
+
+  default:
+
+       FatalError("Unknown option - run without args to see valid ones.\n");
+    }       
+    }
+}
+
+static
+void Help(void)
+{
+	 fprintf(stderr, "little CMS ICC PostScript generator - v2.1 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+   
+     fprintf(stderr, "usage: psicc [flags] [<Output file>]\n\n");
+
+     fprintf(stderr, "flags:\n\n");
+     
+     fprintf(stderr, "%ci<profile> - Input profile: Generates Color Space Array (CSA)\n", SW);
+     fprintf(stderr, "%co<profile> - Output profile: Generates Color Rendering Dictionary(CRD)\n", SW);   
+     
+     fprintf(stderr, "%ct<0,1,2,3> - Intent (0=Perceptual, 1=Colorimetric, 2=Saturation, 3=Absolute)\n", SW);    
+          
+     fprintf(stderr, "%cb - Black point compensation (CRD only)\n", SW);    
+     fprintf(stderr, "%cu - Do NOT generate resource name on CRD\n", SW);    
+     fprintf(stderr, "%cc<0,1,2> - Precision (0=LowRes, 1=Normal (default), 2=Hi-res) (CRD only)\n", SW);     
+     fprintf(stderr, "%cn<gridpoints> - Alternate way to set precission, number of CLUT points (CRD only)\n", SW);     
+     
+	 fprintf(stderr, "\n");
+	 fprintf(stderr, "If no output file is specified, output goes to stdout.\n\n");
+     fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+                     "engine. Both lcms and this program are freeware. You can\n"
+                     "obtain both in source code at http://www.littlecms.com\n"
+                     "For suggestions, comments, bug reports etc. send mail to\n"
+                     "info@littlecms.com\n\n");
+     exit(0);
+}
+
+
+static
+void GenerateCSA(void)
+{
+	cmsHPROFILE hProfile = OpenStockProfile(0, cInProf);
+	size_t n;
+	char* Buffer;
+
+	if (hProfile == NULL) return;
+
+	n = cmsGetPostScriptCSA(0, hProfile, Intent, 0, NULL, 0);
+	if (n == 0) return;
+
+    Buffer = (char*) malloc(n + 1);
+    if (Buffer != NULL) {
+
+        cmsGetPostScriptCSA(0, hProfile, Intent, 0, Buffer, (cmsUInt32Number) n);
+        Buffer[n] = 0;
+
+        fprintf(OutFile, "%s", Buffer);	
+
+        free(Buffer);
+    }
+
+	cmsCloseProfile(hProfile);
+}
+
+
+static
+void GenerateCRD(void)
+{
+	cmsHPROFILE hProfile = OpenStockProfile(0, cOutProf);
+	size_t n;
+	char* Buffer;
+    cmsUInt32Number dwFlags = 0;
+    
+	if (hProfile == NULL) return;
+
+    if (BlackPointCompensation) dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+    if (Undecorated)            dwFlags |= cmsFLAGS_NODEFAULTRESOURCEDEF;
+
+    switch (PrecalcMode) {
+           	
+	    case 0: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+		case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+		case 1: 
+            if (NumOfGridPoints > 0)
+                dwFlags |= cmsFLAGS_GRIDPOINTS(NumOfGridPoints);
+            break;
+
+		default: FatalError("ERROR: Unknown precalculation mode '%d'", PrecalcMode);
+	 }
+
+	n = cmsGetPostScriptCRD(0, hProfile, Intent, dwFlags, NULL, 0);
+	if (n == 0) return;
+
+	Buffer = (char*) malloc(n + 1);
+	if (Buffer == NULL) return;
+        cmsGetPostScriptCRD(0, hProfile, Intent, dwFlags, Buffer, (cmsUInt32Number) n);
+	Buffer[n] = 0;
+
+	fprintf(OutFile, "%s", Buffer);			
+	free(Buffer);
+	cmsCloseProfile(hProfile);
+}
+
+
+int main(int argc, char *argv[])
+{
+	int nargs;
+
+	// Initialize
+	InitUtils("psicc");
+
+	 HandleSwitches(argc, argv);
+
+     nargs = (argc - xoptind);
+	 if (nargs != 0 && nargs != 1)
+				Help();            
+	
+    if (cInProf == NULL && cOutProf == NULL)
+        Help();
+
+	 if (nargs == 0) 
+			OutFile = stdout;
+	 else
+			OutFile = fopen(argv[xoptind], "wt");
+	   		
+	  if (cInProf != NULL)
+			GenerateCSA();
+		  
+	  if (cOutProf != NULL)
+			GenerateCRD();
+		
+	  if (nargs == 1) {
+		  fclose(OutFile);
+	  }
+
+      return 0;     
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.am
new file mode 100644
index 0000000000..b3b620173f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = wtpt 
+
+wtpt_LDADD = $(top_builddir)/src/liblcms2.la 
+wtpt_LDFLAGS = @LDFLAGS@
+wtpt_SOURCES = wtpt.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+wtpt_MANS = wtpt.1
+
+EXTRA_DIST = $(man_MANS) roundtrip.c mktiff8.c mkgrayer.c mkcmy.c itufax.c
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.in
new file mode 100644
index 0000000000..da52ca57b7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/Makefile.in
@@ -0,0 +1,611 @@
+# Makefile.in generated by automake 1.10 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria Oct 2004
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+bin_PROGRAMS = icctrans$(EXEEXT) wtpt$(EXEEXT) icc2ps$(EXEEXT) \
+	icclink$(EXEEXT)
+subdir = samples
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_CLEAN_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+PROGRAMS = $(bin_PROGRAMS)
+am_icc2ps_OBJECTS = icc2ps.$(OBJEXT) xgetopt.$(OBJEXT)
+icc2ps_OBJECTS = $(am_icc2ps_OBJECTS)
+icc2ps_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icc2ps_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icc2ps_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_icclink_OBJECTS = icclink.$(OBJEXT) xgetopt.$(OBJEXT) \
+	vprf.$(OBJEXT)
+icclink_OBJECTS = $(am_icclink_OBJECTS)
+icclink_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icclink_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icclink_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_icctrans_OBJECTS = icctrans.$(OBJEXT) xgetopt.$(OBJEXT) \
+	vprf.$(OBJEXT)
+icctrans_OBJECTS = $(am_icctrans_OBJECTS)
+icctrans_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icctrans_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icctrans_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_wtpt_OBJECTS = wtpt.$(OBJEXT) xgetopt.$(OBJEXT)
+wtpt_OBJECTS = $(am_wtpt_OBJECTS)
+wtpt_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+wtpt_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(wtpt_LDFLAGS) \
+	$(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(icc2ps_SOURCES) $(icclink_SOURCES) $(icctrans_SOURCES) \
+	$(wtpt_SOURCES)
+DIST_SOURCES = $(icc2ps_SOURCES) $(icclink_SOURCES) \
+	$(icctrans_SOURCES) $(wtpt_SOURCES)
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INT16_T = @INT16_T@
+INT32_T = @INT32_T@
+INT64_T = @INT64_T@
+INT8_T = @INT8_T@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LCMS_PYEXECDIR = @LCMS_PYEXECDIR@
+LCMS_PYINCLUDE = @LCMS_PYINCLUDE@
+LCMS_PYLIB = @LCMS_PYLIB@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PYTHON = @PYTHON@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+UINT16_T = @UINT16_T@
+UINT32_T = @UINT32_T@
+UINT64_T = @UINT64_T@
+UINT8_T = @UINT8_T@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+icctrans_LDADD = $(top_builddir)/src/liblcms.la 
+icctrans_LDFLAGS = @LDFLAGS@
+icctrans_SOURCES = icctrans.c xgetopt.c vprf.c
+icctrans_MANS = icctrans.1
+wtpt_LDADD = $(top_builddir)/src/liblcms.la 
+wtpt_LDFLAGS = @LDFLAGS@
+wtpt_SOURCES = wtpt.c xgetopt.c 
+icc2ps_LDADD = $(top_builddir)/src/liblcms.la 
+icc2ps_LDFLAGS = @LDFLAGS@
+icc2ps_SOURCES = icc2ps.c xgetopt.c 
+icclink_LDADD = $(top_builddir)/src/liblcms.la 
+icclink_LDFLAGS = @LDFLAGS@
+icclink_SOURCES = icclink.c xgetopt.c vprf.c
+man_MANS = wtpt.1 icc2ps.1 icclink.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  samples/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  samples/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  if test -f $$p \
+	     || test -f $$p1 \
+	  ; then \
+	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+	   echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	   $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
+	  else :; fi; \
+	done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  echo " rm -f $$p $$f"; \
+	  rm -f $$p $$f ; \
+	done
+icc2ps$(EXEEXT): $(icc2ps_OBJECTS) $(icc2ps_DEPENDENCIES) 
+	@rm -f icc2ps$(EXEEXT)
+	$(icc2ps_LINK) $(icc2ps_OBJECTS) $(icc2ps_LDADD) $(LIBS)
+icclink$(EXEEXT): $(icclink_OBJECTS) $(icclink_DEPENDENCIES) 
+	@rm -f icclink$(EXEEXT)
+	$(icclink_LINK) $(icclink_OBJECTS) $(icclink_LDADD) $(LIBS)
+icctrans$(EXEEXT): $(icctrans_OBJECTS) $(icctrans_DEPENDENCIES) 
+	@rm -f icctrans$(EXEEXT)
+	$(icctrans_LINK) $(icctrans_OBJECTS) $(icctrans_LDADD) $(LIBS)
+wtpt$(EXEEXT): $(wtpt_OBJECTS) $(wtpt_DEPENDENCIES) 
+	@rm -f wtpt$(EXEEXT)
+	$(wtpt_LINK) $(wtpt_OBJECTS) $(wtpt_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icc2ps.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icclink.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icctrans.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vprf.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wtpt.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xgetopt.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man1_MANS) $(man_MANS)
+	@$(NORMAL_INSTALL)
+	test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \
+	  else file=$$i; fi; \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " rm -f '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  rm -f "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+	clean-generic clean-libtool ctags distclean distclean-compile \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-binPROGRAMS install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-man1 install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am uninstall-binPROGRAMS uninstall-man \
+	uninstall-man1
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/itufax.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/itufax.c
new file mode 100644
index 0000000000..6bad87ea88
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/itufax.c
@@ -0,0 +1,138 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "lcms.h"
+
+// This is a sample on how to build a profile for decoding ITU T.42/Fax JPEG
+// streams. The profile has an additional ability in the input direction of
+// gamut compress values between 85 < a < -85 and -75 < b < 125. This conforms
+// the default range for ITU/T.42 -- See RFC 2301, section 6.2.3 for details
+
+
+//  L* = [0, 100]
+//  a* = [-85, 85]
+//  b* = [-75, 125]
+
+
+// These functions does convert the encoding of ITUFAX to floating point
+
+static
+void ITU2Lab(WORD In[3], LPcmsCIELab Lab)
+{
+   Lab -> L = (double) In[0] / 655.35;
+   Lab -> a = (double) 170.* (In[1] - 32768.) / 65535.;
+   Lab -> b = (double) 200.* (In[2] - 24576.) / 65535.;
+}
+
+
+static
+void Lab2ITU(LPcmsCIELab Lab, WORD Out[3])
+{
+	Out[0] = (WORD) floor((double) (Lab -> L / 100.)* 65535. + 0.5);
+    Out[1] = (WORD) floor((double) (Lab -> a / 170.)* 65535. + 32768. + 0.5);
+    Out[2] = (WORD) floor((double) (Lab -> b / 200.)* 65535. + 24576. + 0.5);
+}
+
+
+// These are the samplers-- They are passed as callbacks to cmsSample3DGrid()
+// then, cmsSample3DGrid() will sweel whole Lab gamut calling these functions
+// once for each node. In[] will contain the Lab PCS value to convert to ITUFAX
+// on InputDirection, or the ITUFAX value to convert to Lab in OutputDirection
+// You can change the number of sample points if desired, the algorithm will
+// remain same. 33 points gives good accuracy, but you can reduce to 22 or less
+// is space is critical
+
+#define GRID_POINTS 33
+
+static
+int InputDirection(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	   
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);    
+    cmsClampLab(&Lab, 85, -85, 125, -75);    // This function does the necessary gamut remapping  
+    Lab2ITU(&Lab, Out);
+
+	return TRUE;
+}
+
+
+static
+int OutputDirection(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+
+	cmsCIELab Lab;
+
+    ITU2Lab(In, &Lab);
+    cmsFloat2LabEncoded(Out, &Lab);    
+
+	return TRUE;
+}
+
+
+// The main entry point. Just create a profile an populate it with required tags.
+// note that cmsOpenProfileFromFile("itufax.icm", "w") will NOT delete the file
+// if already exists. This is for obvious safety reasons.
+
+	
+int main(int argc, char *argv[])
+{
+	LPLUT AToB0, BToA0;
+	cmsHPROFILE hProfile;
+
+	fprintf(stderr, "Creating itufax.icm...");
+
+	unlink("itufax.icm");
+	hProfile = cmsOpenProfileFromFile("itufax.icm", "w");
+	
+    AToB0 = cmsAllocLUT();
+	BToA0 = cmsAllocLUT(); 
+
+	cmsAlloc3DGrid(AToB0, GRID_POINTS, 3, 3);
+	cmsAlloc3DGrid(BToA0, GRID_POINTS, 3, 3);
+    
+	cmsSample3DGrid(AToB0, InputDirection, NULL, 0);
+	cmsSample3DGrid(BToA0, OutputDirection, NULL, 0);
+		
+    cmsAddTag(hProfile, icSigAToB0Tag, AToB0);
+	cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+
+                                
+	cmsSetColorSpace(hProfile, icSigLabData);
+    cmsSetPCS(hProfile, icSigLabData);
+    cmsSetDeviceClass(hProfile, icSigColorSpaceClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "ITU T.42/Fax JPEG CIEL*a*b*");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "No Copyright, use freely.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "ITU T.42/Fax JPEG CIEL*a*b*");
+	
+	cmsCloseProfile(hProfile);
+    
+	cmsFreeLUT(AToB0);
+	cmsFreeLUT(BToA0);
+
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkcmy.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkcmy.c
new file mode 100644
index 0000000000..1b0755f17b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkcmy.c
@@ -0,0 +1,170 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THIS SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+// WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// IN NO EVENT SHALL MARTI MARIA BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
+// INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+// OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+// WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
+// LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+// OF THIS SOFTWARE.
+//
+// Version 1.12
+
+
+#include "lcms.h"
+
+
+typedef struct {
+				cmsHPROFILE   hLab;
+				cmsHPROFILE   hRGB;
+				cmsHTRANSFORM Lab2RGB;
+				cmsHTRANSFORM RGB2Lab;
+
+				} CARGO, FAR* LPCARGO;
+
+
+	 
+ 
+
+// Our space will be CIE primaries plus a gamma of 4.5
+
+static
+int Forward(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+	LPCARGO C = (LPCARGO) Cargo;	
+	WORD RGB[3];
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);
+
+	printf("%g %g %g\n", Lab.L, Lab.a, Lab.b);
+
+	cmsDoTransform(C ->Lab2RGB, In, &RGB, 1);
+
+
+	Out[0] = 0xFFFF - RGB[0]; // Our CMY is negative of RGB
+	Out[1] = 0xFFFF - RGB[1]; 
+	Out[2] = 0xFFFF - RGB[2]; 
+	
+	
+	return TRUE;
+
+}
+
+
+static
+int Reverse(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+
+	LPCARGO C = (LPCARGO) Cargo;	
+	WORD RGB[3];
+  
+	RGB[0] = 0xFFFF - In[0];
+	RGB[1] = 0xFFFF - In[1];
+	RGB[2] = 0xFFFF - In[2];
+
+	cmsDoTransform(C ->RGB2Lab, &RGB, Out, 1);
+	
+	return TRUE;
+
+}
+
+
+
+static
+void InitCargo(LPCARGO Cargo)
+{
+	
+
+	Cargo -> hLab = cmsCreateLabProfile(NULL);
+	Cargo -> hRGB = cmsCreate_sRGBProfile();  
+	
+	Cargo->Lab2RGB = cmsCreateTransform(Cargo->hLab, TYPE_Lab_16, 
+									    Cargo ->hRGB, TYPE_RGB_16,
+										INTENT_RELATIVE_COLORIMETRIC, 
+										cmsFLAGS_NOTPRECALC);
+
+	Cargo->RGB2Lab = cmsCreateTransform(Cargo ->hRGB, TYPE_RGB_16, 
+										Cargo ->hLab, TYPE_Lab_16, 
+										INTENT_RELATIVE_COLORIMETRIC, 
+										cmsFLAGS_NOTPRECALC);
+}
+
+
+
+
+static
+void FreeCargo(LPCARGO Cargo)
+{
+	cmsDeleteTransform(Cargo ->Lab2RGB);
+	cmsDeleteTransform(Cargo ->RGB2Lab);
+	cmsCloseProfile(Cargo ->hLab);
+	cmsCloseProfile(Cargo ->hRGB);
+}
+
+	
+	
+	
+int main(void)
+{
+	LPLUT AToB0, BToA0;	
+	CARGO Cargo;
+	cmsHPROFILE hProfile;
+	
+	fprintf(stderr, "Creating lcmscmy.icm...");	
+	
+	InitCargo(&Cargo);
+
+	hProfile = cmsCreateLabProfile(NULL);
+	
+
+    AToB0 = cmsAllocLUT();
+	BToA0 = cmsAllocLUT();
+
+	cmsAlloc3DGrid(AToB0, 25, 3, 3);
+	cmsAlloc3DGrid(BToA0, 25, 3, 3);
+	
+	
+	cmsSample3DGrid(AToB0, Reverse, &Cargo, 0);
+	cmsSample3DGrid(BToA0, Forward, &Cargo, 0);
+	
+	
+    cmsAddTag(hProfile, icSigAToB0Tag, AToB0);
+	cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+
+	cmsSetColorSpace(hProfile, icSigCmyData);
+	cmsSetDeviceClass(hProfile, icSigOutputClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "CMY ");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "Copyright (c) HP, 2007. All rights reserved.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "CMY space");
+
+	_cmsSaveProfile(hProfile, "lcmscmy.icm");
+	
+	
+	cmsFreeLUT(AToB0);
+	cmsFreeLUT(BToA0);
+	cmsCloseProfile(hProfile);	
+	FreeCargo(&Cargo);
+	fprintf(stderr, "Done.\n");
+
+
+
+	return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkgrayer.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkgrayer.c
new file mode 100644
index 0000000000..46e9286189
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mkgrayer.c
@@ -0,0 +1,93 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "lcms.h"
+
+
+
+static
+int Forward(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+    cmsCIELab Lab;
+
+
+    cmsLabEncoded2Float(&Lab, In);
+
+	if (fabs(Lab.a) < 3 && fabs(Lab.b) < 3) {
+		
+		double L_01 = Lab.L / 100.0;
+	    WORD K;
+
+		if (L_01 > 1) L_01 = 1;
+		K = (WORD) floor(L_01* 65535.0 + 0.5);
+
+		Out[0] = Out[1] = Out[2] = K; 
+	}
+	else {
+		Out[0] = 0xFFFF; Out[1] = 0; Out[2] = 0; 
+	}
+
+	return TRUE;
+}
+
+
+
+
+	
+int main(int argc, char *argv[])
+{
+	LPLUT BToA0;
+	cmsHPROFILE hProfile;
+
+	fprintf(stderr, "Creating interpol2.icc...");
+
+	unlink("interpol2.icc");
+	hProfile = cmsOpenProfileFromFile("interpol2.icc", "w8");
+
+
+    BToA0 = cmsAllocLUT();
+
+	cmsAlloc3DGrid(BToA0, 17, 3, 3);
+	    
+	cmsSample3DGrid(BToA0, Forward, NULL, 0);
+			
+    cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+	                                
+	cmsSetColorSpace(hProfile, icSigRgbData);
+    cmsSetPCS(hProfile, icSigLabData);
+    cmsSetDeviceClass(hProfile, icSigOutputClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "Interpolation test");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "Copyright (c) HP 2007. All rights reserved.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "Interpolation test profile");
+
+	
+	cmsCloseProfile(hProfile);
+    
+	cmsFreeLUT(BToA0);
+	
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mktiff8.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mktiff8.c
new file mode 100644
index 0000000000..ab0b66b8a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/mktiff8.c
@@ -0,0 +1,150 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2010 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+// Creates a devicelink that decodes TIFF8 Lab files 
+
+#include "lcms2.h"
+#include <stdlib.h>
+#include <math.h>
+
+static
+double DecodeAbTIFF(double ab)
+{
+	if (ab <= 128.)
+		ab += 127.;
+	else
+		ab -= 127.;
+
+	return ab;
+}
+
+static
+cmsToneCurve* CreateStep(void)
+{
+	cmsToneCurve* Gamma;
+	cmsUInt16Number* Table;
+	int i;
+	double a;
+
+	Table = calloc(4096, sizeof(cmsUInt16Number));
+	if (Table == NULL) return NULL;
+
+	for (i=0; i < 4096; i++) {
+
+		a = (double) i * 255. / 4095.;
+
+		a = DecodeAbTIFF(a);
+
+		Table[i] = (cmsUInt16Number) floor(a * 257. + 0.5);
+	}
+
+	Gamma = cmsBuildTabulatedToneCurve16(0, 4096, Table);
+	free(Table);
+
+	return Gamma;
+}
+
+
+static
+cmsToneCurve* CreateLinear(void)
+{
+	cmsUInt16Number Linear[2] = { 0, 0xffff };
+
+	return cmsBuildTabulatedToneCurve16(0, 2, Linear);          
+}
+
+
+
+// Set the copyright and description
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+  
+    DescriptionMLU  = cmsMLUalloc(0, 1);
+    CopyrightMLU    = cmsMLUalloc(0, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetASCII(DescriptionMLU,  "en", "US", "Little cms Tiff8 CIELab")) goto Error;
+    if (!cmsMLUsetASCII(CopyrightMLU,    "en", "US", "Copyright (c) Marti Maria, 2010. All rights reserved.")) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;     
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+int main(int argc, char *argv[])
+{
+	cmsHPROFILE hProfile;
+	cmsPipeline *AToB0;
+	cmsToneCurve* PreLinear[3];
+	cmsToneCurve *Lin, *Step;
+
+	fprintf(stderr, "Creating lcmstiff8.icm...");
+    
+    remove("lcmstiff8.icm");
+	hProfile = cmsOpenProfileFromFile("lcmstiff8.icm", "w");
+
+	// Create linearization
+	Lin  = CreateLinear();
+	Step = CreateStep();
+
+	PreLinear[0] = Lin;
+	PreLinear[1] = Step;
+	PreLinear[2] = Step;
+
+    AToB0 = cmsPipelineAlloc(0, 3, 3);
+
+	cmsPipelineInsertStage(AToB0, 
+		cmsAT_BEGIN, cmsStageAllocToneCurves(0, 3, PreLinear));
+
+	cmsSetColorSpace(hProfile, cmsSigLabData);
+	cmsSetPCS(hProfile, cmsSigLabData);
+	cmsSetDeviceClass(hProfile, cmsSigLinkClass);
+	cmsSetProfileVersion(hProfile, 4.2);
+
+    cmsWriteTag(hProfile, cmsSigAToB0Tag, AToB0);
+	
+    SetTextTags(hProfile);
+
+	cmsCloseProfile(hProfile);
+
+	cmsFreeToneCurve(Lin);
+	cmsFreeToneCurve(Step);
+	cmsPipelineFree(AToB0);
+		
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/roundtrip.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/roundtrip.c
new file mode 100644
index 0000000000..94c8bdc140
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/roundtrip.c
@@ -0,0 +1,99 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2011 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+
+#include "lcms2.h"
+#include <math.h>
+
+
+
+static
+double VecDist(cmsUInt8Number bin[3], cmsUInt8Number bout[3])
+{
+       double rdist, gdist, bdist;
+
+       rdist = fabs((double) bout[0] - bin[0]);
+       gdist = fabs((double) bout[1] - bin[1]);
+       bdist = fabs((double) bout[2] - bin[2]);
+
+       return (sqrt((rdist*rdist + gdist*gdist + bdist*bdist)));
+}
+
+
+int main(int  argc, char* argv[])
+{
+
+    int r, g, b;
+    cmsUInt8Number RGB[3], RGB_OUT[3];
+    cmsHTRANSFORM xform;
+    cmsHPROFILE hProfile;
+    double err, SumX=0, SumX2=0, Peak = 0, n = 0;
+
+
+    if (argc != 2) {
+        printf("roundtrip <RGB icc profile>\n");
+        return 1;
+    }
+
+    hProfile = cmsOpenProfileFromFile(argv[1], "r");
+    if (hProfile == NULL)
+    {
+        printf("invalid profile\n");
+        return 1;
+    }
+
+    xform = cmsCreateTransform(hProfile,TYPE_RGB_8, hProfile, TYPE_RGB_8, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOOPTIMIZE);
+    if (xform == NULL)
+    {
+        printf("Not a valid RGB profile\n");
+        return 1;
+    }
+
+    for (r=0; r< 256; r++) {
+        printf("%d  \r", r);
+        for (g=0; g < 256; g++) {
+            for (b=0; b < 256; b++) {
+
+                RGB[0] = r;
+                RGB[1] = g;
+                RGB[2] = b;
+
+                cmsDoTransform(xform, RGB, RGB_OUT, 1);
+
+                err = VecDist(RGB, RGB_OUT);
+
+                SumX  += err;
+                SumX2 += err * err;
+                n += 1.0;
+                if (err > Peak)
+                    Peak = err;
+
+            }
+        }
+    }
+
+    printf("Average %g\n", SumX / n);
+    printf("Max %g\n", Peak);
+    printf("Std  %g\n", sqrt((n*SumX2 - SumX * SumX) / (n*(n-1))));
+    cmsCloseProfile(hProfile);
+    cmsDeleteTransform(xform);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/vericc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/vericc.c
new file mode 100644
index 0000000000..9ac94a76c1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/vericc.c
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2010 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2.h"
+#include <string.h>
+#include <math.h>
+
+static
+int PrintUsage(void)
+{
+	fprintf(stderr, "Sets profile version\n\nUsage: vericc --r<version> iccprofile.icc\n"); 
+	return 0; 
+}
+
+int main(int argc, char *argv[])
+{
+       cmsHPROFILE hProfile;
+	   char* ptr;
+	   cmsFloat64Number Version;
+
+	   if (argc != 3)  return PrintUsage();
+
+	   ptr = argv[1];
+	   if (strncmp(ptr, "--r", 3) != 0) return PrintUsage();
+	   ptr += 3;
+	   if (!*ptr) { fprintf(stderr, "Wrong version number\n"); return 1; }
+
+	   Version = atof(ptr); 
+
+	   hProfile = cmsOpenProfileFromFile(argv[2], "r");
+	   if (hProfile == NULL) { fprintf(stderr, "'%s': cannot open\n", argv[2]); return 1; }
+
+	   cmsSetProfileVersion(hProfile, Version);
+	   cmsSaveProfileToFile(hProfile, "$$tmp.icc");
+	   cmsCloseProfile(hProfile);
+
+	   remove(argv[2]);
+	   rename("$$tmp.icc", argv[2]);
+	   return 0;
+
+
+}
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.1
new file mode 100644
index 0000000000..fbd37ac2b1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.1
@@ -0,0 +1,28 @@
+.\"Shiju P. Nair September 30, 2004
+.TH WTPT 1 "September 30, 2004"
+.SH NAME 
+wtpt - Show media white of profiles, identifying black body locus.
+.SH SYNOPSIS
+.B wtpt
+.RI [ profile ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B wtpt 
+shows media white of profiles, identifying black body locus.
+.P
+If no parameters are given, then this program will
+ask for XYZ value of media white. If parameter given, it must be
+the profile to inspect.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com
+.SH SEE ALSO
+.BR jpegicc (1),
+.BR tifficc (1),
+.BR icc2ps (1),
+.BR icclink (1),
+.BR icctrans (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.c b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.c
new file mode 100644
index 0000000000..45602f796a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/samples/wtpt.c
@@ -0,0 +1,144 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2015 Marti Maria
+//
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2014 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+
+// The toggles stuff
+
+static cmsBool lShowXYZ = TRUE;
+static cmsBool lShowLab = FALSE;
+static cmsBool lShowLCh = FALSE;
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+
+       while ((s = xgetopt(argc, argv, "lcx")) != EOF) {
+
+              switch (s){
+
+
+              case 'l':
+                     lShowLab = TRUE;
+                     break;
+
+              case 'c':
+                     lShowLCh = TRUE;
+                     break;
+
+              case 'x':
+                     lShowXYZ = FALSE;
+                     break;
+
+              default:
+
+                     FatalError("Unknown option - run without args to see valid ones.\n");
+              }
+       }
+}
+
+static
+void Help(void)
+{
+       fprintf(stderr, "little CMS ICC white point utility - v3 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+
+       fprintf(stderr, "usage: wtpt [flags] [<ICC profile>]\n\n");
+
+       fprintf(stderr, "flags:\n\n");
+       
+       fprintf(stderr, "%cl - CIE Lab\n", SW);
+       fprintf(stderr, "%cc - CIE LCh\n", SW);
+       fprintf(stderr, "%cx - Don't show XYZ\n", SW);
+
+       fprintf(stderr, "\nIf no parameters are given, then this program will\n");
+       fprintf(stderr, "ask for XYZ value of media white. If parameter given, it must be\n");
+       fprintf(stderr, "the profile to inspect.\n\n");
+
+       fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+              "engine. Both lcms and this program are freeware. You can\n"
+              "obtain both in source code at http://www.littlecms.com\n"
+              "For suggestions, comments, bug reports etc. send mail to\n"
+              "info@littlecms.com\n\n");
+       exit(0);
+}
+
+
+
+static
+void ShowWhitePoint(cmsCIEXYZ* WtPt)
+{
+       cmsCIELab Lab;
+       cmsCIELCh LCh;
+       cmsCIExyY xyY;
+
+
+       cmsXYZ2Lab(NULL, &Lab, WtPt);
+       cmsLab2LCh(&LCh, &Lab);
+       cmsXYZ2xyY(&xyY, WtPt);
+
+
+       if (lShowXYZ) printf("XYZ=(%3.1f, %3.1f, %3.1f)\n", WtPt->X, WtPt->Y, WtPt->Z);
+       if (lShowLab) printf("Lab=(%3.3f, %3.3f, %3.3f)\n", Lab.L, Lab.a, Lab.b);
+       if (lShowLCh) printf("LCh=(%3.3f, %3.3f, %3.3f)\n", LCh.L, LCh.C, LCh.h);
+       {
+              double Ssens = (LCh.C * 100.0 )/ sqrt(LCh.C*LCh.C + LCh.L * LCh.L) ;
+              printf("Sens = %f\n", Ssens);
+       }
+
+}
+
+
+int main(int argc, char *argv[])
+{
+       int nargs;
+
+       InitUtils("wtpt");
+       
+       HandleSwitches(argc, argv);
+
+       nargs = (argc - xoptind);
+
+       if (nargs != 1)
+              Help();
+
+       else {
+              cmsCIEXYZ* WtPt;
+              cmsHPROFILE hProfile = cmsOpenProfileFromFile(argv[xoptind], "r");  
+              if (hProfile == NULL) return 1;
+
+              WtPt = cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+              ShowWhitePoint(WtPt);
+              cmsCloseProfile(hProfile);
+       }
+       
+       return 0;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.am
new file mode 100644
index 0000000000..50f5dc441e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.am
@@ -0,0 +1,25 @@
+#
+# Makefile for building tificc
+# Originally written by Bob Friesenhahn, June 2003
+# bugs introduced by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+                
+
+if HasTIFF
+bin_PROGRAMS = tificc
+else
+bin_PROGRAMS =
+endif
+
+tificc_LDADD = $(top_builddir)/src/liblcms2.la @TIFFICC_DEPLIBS@
+tificc_LDFLAGS = @LDFLAGS@
+tificc_SOURCES = tificc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = tificc.1
+
+
+EXTRA_DIST = $(man_MANS)
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.in
new file mode 100644
index 0000000000..de4d9c2964
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/Makefile.in
@@ -0,0 +1,739 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building tificc
+# Originally written by Bob Friesenhahn, June 2003
+# bugs introduced by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HasTIFF_TRUE@bin_PROGRAMS = tificc$(EXEEXT)
+subdir = utils/tificc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_tificc_OBJECTS = tificc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+tificc_OBJECTS = $(am_tificc_OBJECTS)
+tificc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+tificc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(tificc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(tificc_SOURCES)
+DIST_SOURCES = $(tificc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+tificc_LDADD = $(top_builddir)/src/liblcms2.la @TIFFICC_DEPLIBS@
+tificc_LDFLAGS = @LDFLAGS@
+tificc_SOURCES = tificc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = tificc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/tificc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/tificc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+tificc$(EXEEXT): $(tificc_OBJECTS) $(tificc_DEPENDENCIES) $(EXTRA_tificc_DEPENDENCIES) 
+	@rm -f tificc$(EXEEXT)
+	$(AM_V_CCLD)$(tificc_LINK) $(tificc_OBJECTS) $(tificc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man_MANS)
+	@$(NORMAL_INSTALL)
+	@list1=''; \
+	list2='$(man_MANS)'; \
+	test -n "$(man1dir)" \
+	  && test -n "`echo $$list1$$list2`" \
+	  || exit 0; \
+	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
+	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
+	{ for i in $$list1; do echo "$$i"; done;  \
+	if test -n "$$list2"; then \
+	  for i in $$list2; do echo "$$i"; done \
+	    | sed -n '/\.1[a-z]*$$/p'; \
+	fi; \
+	} | while read p; do \
+	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; echo "$$p"; \
+	done | \
+	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+	sed 'N;N;s,\n, ,g' | { \
+	list=; while read file base inst; do \
+	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+	  fi; \
+	done; \
+	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+	while read files; do \
+	  test -z "$$files" || { \
+	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+	done; }
+
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list=''; test -n "$(man1dir)" || exit 0; \
+	files=`{ for i in $$list; do echo "$$i"; done; \
+	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+	  sed -n '/\.1[a-z]*$$/p'; \
+	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-man1 \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-man uninstall-man1
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tifdiff.c b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tifdiff.c
new file mode 100644
index 0000000000..b00343062e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tifdiff.c
@@ -0,0 +1,708 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+#include "tiffio.h"
+
+
+// ------------------------------------------------------------------------
+
+static TIFF *Tiff1, *Tiff2, *TiffDiff;
+static const char* TiffDiffFilename;
+static const char* CGATSout;
+
+typedef struct {
+                double  n, x, x2;                    
+                double  Min, Peak;   
+
+    } STAT, *LPSTAT;
+
+
+static STAT ColorantStat[4];
+static STAT EuclideanStat;
+static STAT ColorimetricStat;
+
+static uint16 Channels; 
+
+static cmsHPROFILE hLab;
+
+
+static
+void ConsoleWarningHandler(const char* module, const char* fmt, va_list ap)
+{
+        char e[512] = { '\0' };
+        if (module != NULL)
+              strcat(strcpy(e, module), ": ");
+
+        vsprintf(e+strlen(e), fmt, ap);
+        strcat(e, ".");
+        if (Verbose) {
+
+              fprintf(stderr, "\nWarning");
+              fprintf(stderr, " %s\n", e);
+              fflush(stderr);
+              }
+}
+
+static
+void ConsoleErrorHandler(const char* module, const char* fmt, va_list ap)
+{
+       char e[512] = { '\0' };
+
+       if (module != NULL)
+              strcat(strcpy(e, module), ": ");
+
+       vsprintf(e+strlen(e), fmt, ap);
+       strcat(e, ".");
+       fprintf(stderr, "\nError");
+       fprintf(stderr, " %s\n", e);
+       fflush(stderr);
+}
+
+
+
+static
+void Help()
+{
+    fprintf(stderr, "Little cms TIFF compare utility. v1.0\n\n");
+
+    fprintf(stderr, "usage: tiffdiff [flags] input.tif output.tif\n");
+
+    fprintf(stderr, "\nflags:\n\n");
+
+
+    fprintf(stderr, "%co<tiff>   - Output TIFF file\n", SW);   
+    fprintf(stderr, "%cg<CGATS>  - Output results in CGATS file\n", SW);       
+    
+    fprintf(stderr, "\n");
+
+    fprintf(stderr, "%cv - Verbose (show warnings)\n", SW);
+    fprintf(stderr, "%ch - This help\n", SW);
+
+
+    fflush(stderr);
+    exit(0);
+}
+
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+      
+       while ((s=xgetopt(argc,argv,"o:O:hHvVg:G:")) != EOF) {
+
+       switch (s) {
+
+
+       case 'v':
+       case 'V':
+            Verbose = TRUE;
+            break;
+
+       case 'o':
+       case 'O':           
+           TiffDiffFilename  = xoptarg;
+           break;
+
+                
+        case 'H':
+        case 'h':             
+            Help();            
+            break;
+
+        case 'g':
+        case 'G':
+            CGATSout = xoptarg;
+            break;
+
+  default:
+
+       FatalError("Unknown option - run without args to see valid ones");
+    }       
+    }
+}
+
+
+static
+void ClearStatistics(LPSTAT st) 
+{
+
+    st ->n = st ->x = st->x2 = st->Peak = 0;    
+    st ->Min = 1E10;     
+    
+}
+
+
+static
+void AddOnePixel(LPSTAT st, double dE) 
+{ 
+    
+    st-> x += dE; st ->x2 += (dE * dE); st->n  += 1.0; 
+    if (dE > st ->Peak) st ->Peak = dE;
+    if (dE < st ->Min)  st ->Min= dE;    
+} 
+
+static    
+double Std(LPSTAT st)  
+{ 
+    return sqrt((st->n * st->x2 - st->x * st->x) / (st->n*(st->n-1))); 
+}
+    
+static
+double Mean(LPSTAT st) 
+{ 
+    return st ->x/st ->n; 
+}
+
+
+// Build up the pixeltype descriptor
+
+static
+cmsUInt32Number GetInputPixelType(TIFF *Bank)
+{
+     uint16 Photometric, bps, spp, extra, PlanarConfig, *info;
+     uint16 Compression, reverse = 0;
+     int ColorChannels, IsPlanar = 0, pt = 0;
+
+     TIFFGetField(Bank,           TIFFTAG_PHOTOMETRIC,   &Photometric);
+     TIFFGetFieldDefaulted(Bank,  TIFFTAG_BITSPERSAMPLE, &bps);
+
+     if (bps == 1)
+       FatalError("Sorry, bilevel TIFFs has nothig to do with ICC profiles");
+
+     if (bps != 8 && bps != 16)
+              FatalError("Sorry, 8 or 16 bits per sample only");
+
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_SAMPLESPERPIXEL, &spp);
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_PLANARCONFIG, &PlanarConfig);
+
+     switch (PlanarConfig)
+     {
+     case PLANARCONFIG_CONTIG: IsPlanar = 0; break;
+     case PLANARCONFIG_SEPARATE: FatalError("Planar TIFF are not supported");
+     default:
+
+     FatalError("Unsupported planar configuration (=%d) ", (int) PlanarConfig);
+     }
+
+     // If Samples per pixel == 1, PlanarConfiguration is irrelevant and need
+     // not to be included.
+
+     if (spp == 1) IsPlanar = 0;
+
+
+     // Any alpha?
+
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_EXTRASAMPLES, &extra, &info);
+
+     
+     ColorChannels = spp - extra;
+
+     switch (Photometric) {
+
+     case PHOTOMETRIC_MINISWHITE:
+                                   
+            reverse = 1;
+
+     case PHOTOMETRIC_MINISBLACK:
+                                   
+            pt = PT_GRAY;                                
+            break;
+
+     case PHOTOMETRIC_RGB:
+                                   
+            pt = PT_RGB;
+            break;
+
+
+     case PHOTOMETRIC_PALETTE:
+                                             
+            FatalError("Sorry, palette images not supported (at least on this version)"); 
+
+     case PHOTOMETRIC_SEPARATED:
+           pt = PixelTypeFromChanCount(ColorChannels);
+           break;
+
+     case PHOTOMETRIC_YCBCR:
+           TIFFGetField(Bank, TIFFTAG_COMPRESSION, &Compression);
+           {
+                  uint16 subx, suby;
+
+                  pt = PT_YCbCr;
+                  TIFFGetFieldDefaulted(Bank, TIFFTAG_YCBCRSUBSAMPLING, &subx, &suby);
+                  if (subx != 1 || suby != 1)
+                         FatalError("Sorry, subsampled images not supported");
+
+           }
+           break;
+
+     case 9:
+     case PHOTOMETRIC_CIELAB:
+           pt = PT_Lab;
+           break;
+
+    
+     case PHOTOMETRIC_LOGLUV:      /* CIE Log2(L) (u',v') */
+
+           TIFFSetField(Bank, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_16BIT);
+           pt = PT_YUV;             // *ICCSpace = icSigLuvData;
+           bps = 16;               // 16 bits forced by LibTiff
+           break;
+
+     default:
+           FatalError("Unsupported TIFF color space (Photometric %d)", Photometric);
+     }
+
+     // Convert bits per sample to bytes per sample
+
+     bps >>= 3; 
+
+     return (COLORSPACE_SH(pt)|PLANAR_SH(IsPlanar)|EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|FLAVOR_SH(reverse));
+}
+
+
+
+static
+cmsUInt32Number OpenEmbedded(TIFF* tiff, cmsHPROFILE* PtrProfile, cmsHTRANSFORM* PtrXform)
+{
+
+    cmsUInt32Number EmbedLen, dwFormat = 0;
+    cmsUInt8Number* EmbedBuffer;
+    
+    *PtrProfile = NULL;
+    *PtrXform   = NULL;
+
+    if (TIFFGetField(tiff, TIFFTAG_ICCPROFILE, &EmbedLen, &EmbedBuffer)) {
+
+              *PtrProfile = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+   
+              if (Verbose) {
+                  
+				  fprintf(stdout, "Embedded profile found:\n");                          
+				  PrintProfileInformation(*PtrProfile);
+                  
+              }
+
+              dwFormat  = GetInputPixelType(tiff);
+              *PtrXform = cmsCreateTransform(*PtrProfile, dwFormat, 
+                                          hLab, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+      }
+
+    return dwFormat;
+}
+
+
+static
+size_t PixelSize(cmsUInt32Number dwFormat)
+{
+    return T_BYTES(dwFormat) * (T_CHANNELS(dwFormat) + T_EXTRA(dwFormat));
+}
+
+
+static
+int CmpImages(TIFF* tiff1, TIFF* tiff2, TIFF* diff)
+{
+    cmsUInt8Number* buf1, *buf2, *buf3=NULL;
+    int row, cols, imagewidth = 0, imagelength = 0;
+    uint16   Photometric;
+    double dE = 0;    
+    double dR, dG, dB, dC, dM, dY, dK;
+    int rc = 0;
+    cmsHPROFILE hProfile1 = 0, hProfile2 = 0;
+    cmsHTRANSFORM xform1 = 0, xform2 = 0;
+    cmsUInt32Number dwFormat1, dwFormat2;
+    
+
+
+      TIFFGetField(tiff1, TIFFTAG_PHOTOMETRIC, &Photometric);
+      TIFFGetField(tiff1, TIFFTAG_IMAGEWIDTH,  &imagewidth);
+      TIFFGetField(tiff1, TIFFTAG_IMAGELENGTH, &imagelength);
+      TIFFGetField(tiff1, TIFFTAG_SAMPLESPERPIXEL, &Channels);
+      
+      dwFormat1 = OpenEmbedded(tiff1, &hProfile1, &xform1);
+      dwFormat2 = OpenEmbedded(tiff2, &hProfile2, &xform2);
+    
+      
+      
+      buf1 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(tiff1));
+      buf2 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(tiff2));
+    
+      if (diff) {
+                    
+           TIFFSetField(diff, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
+           TIFFSetField(diff, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
+           TIFFSetField(diff, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); 
+
+           TIFFSetField(diff, TIFFTAG_IMAGEWIDTH,  imagewidth);
+           TIFFSetField(diff, TIFFTAG_IMAGELENGTH, imagelength);
+
+           TIFFSetField(diff, TIFFTAG_SAMPLESPERPIXEL, 1);
+           TIFFSetField(diff, TIFFTAG_BITSPERSAMPLE, 8);
+                      
+           buf3 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(diff));                        
+      }
+      
+
+
+      for (row = 0; row < imagelength; row++) {
+
+        if (TIFFReadScanline(tiff1, buf1, row, 0) < 0) goto Error;
+        if (TIFFReadScanline(tiff2, buf2, row, 0) < 0) goto Error;                  
+
+                
+        for (cols = 0; cols < imagewidth; cols++) {
+
+              
+            switch (Photometric) {
+
+            case PHOTOMETRIC_MINISWHITE:
+            case PHOTOMETRIC_MINISBLACK:
+
+                    dE = fabs(buf2[cols] - buf1[cols]); 
+                
+                    AddOnePixel(&ColorantStat[0], dE);
+                    AddOnePixel(&EuclideanStat, dE);
+                    break;
+
+            case PHOTOMETRIC_RGB:
+                
+                    {
+                        int index = 3 * cols;
+
+                        dR = fabs(buf2[index+0] - buf1[index+0]); 
+                        dG = fabs(buf2[index+1] - buf1[index+1]); 
+                        dB = fabs(buf2[index+2] - buf1[index+2]); 
+
+                        dE = sqrt(dR * dR + dG * dG + dB * dB) / sqrt(3.);
+                    }
+
+                    AddOnePixel(&ColorantStat[0], dR);
+                    AddOnePixel(&ColorantStat[1], dG);
+                    AddOnePixel(&ColorantStat[2], dB);
+                    AddOnePixel(&EuclideanStat,   dE);
+                    break;
+
+            case PHOTOMETRIC_SEPARATED:
+                
+                {
+                        int index = 4 * cols;
+
+                        dC = fabs(buf2[index+0] - buf1[index+0]); 
+                        dM = fabs(buf2[index+1] - buf1[index+1]); 
+                        dY = fabs(buf2[index+2] - buf1[index+2]); 
+                        dK = fabs(buf2[index+3] - buf1[index+3]); 
+
+                        dE = sqrt(dC * dC + dM * dM + dY * dY + dK * dK) / 2.;
+                    }
+                    AddOnePixel(&ColorantStat[0], dC);
+                    AddOnePixel(&ColorantStat[1], dM);
+                    AddOnePixel(&ColorantStat[2], dY);
+                    AddOnePixel(&ColorantStat[3], dK);
+                    AddOnePixel(&EuclideanStat,   dE);
+                    break;
+            
+            default:
+                    FatalError("Unsupported channels: %d", Channels);                 
+            }
+
+            
+            if (xform1 && xform2) {
+
+    
+                cmsCIELab Lab1, Lab2;
+                size_t index1 = cols * PixelSize(dwFormat1);
+                size_t index2 = cols * PixelSize(dwFormat2);
+
+                cmsDoTransform(xform1, &buf1[index1], &Lab1,  1);
+                cmsDoTransform(xform2, &buf2[index2], &Lab2,  1);
+
+                dE = cmsDeltaE(&Lab1, &Lab2);               
+                AddOnePixel(&ColorimetricStat, dE);
+            }
+
+
+            if (diff) {
+                buf3[cols] = (cmsUInt8Number) floor(dE + 0.5);
+        }
+
+        }
+
+        if (diff) {
+
+                if (TIFFWriteScanline(diff, buf3, row, 0) < 0) goto Error;
+        }
+        
+
+      }
+
+     rc = 1;
+
+Error:
+         
+     if (hProfile1) cmsCloseProfile(hProfile1);
+     if (hProfile2) cmsCloseProfile(hProfile2);
+     if (xform1) cmsDeleteTransform(xform1);
+     if (xform2) cmsDeleteTransform(xform2);
+      _TIFFfree(buf1); _TIFFfree(buf2); 
+      if (diff) {
+           TIFFWriteDirectory(diff);
+          if (buf3 != NULL) _TIFFfree(buf3);
+      }
+      return rc;
+}
+
+
+static
+void AssureShortTagIs(TIFF* tif1, TIFF* tiff2, int tag, int Val, const char* Error)
+{
+        uint16 v1;
+
+        
+        if (!TIFFGetField(tif1, tag, &v1)) goto Err;
+        if (v1 != Val) goto Err;
+
+        if (!TIFFGetField(tiff2, tag, &v1)) goto Err;
+        if (v1 != Val) goto Err;
+
+        return;
+Err:
+        FatalError("%s is not proper", Error);
+}
+
+
+static
+int CmpShortTag(TIFF* tif1, TIFF* tif2, int tag)
+{
+        uint16 v1, v2;
+
+        if (!TIFFGetField(tif1, tag, &v1)) return 0;
+        if (!TIFFGetField(tif2, tag, &v2)) return 0;
+
+        return v1 == v2;
+}
+
+static
+int CmpLongTag(TIFF* tif1, TIFF* tif2, int tag)
+{
+        uint32 v1, v2;
+
+        if (!TIFFGetField(tif1, tag, &v1)) return 0;
+        if (!TIFFGetField(tif2, tag, &v2)) return 0;
+
+        return v1 == v2;
+}
+
+
+static
+void EqualShortTag(TIFF* tif1, TIFF* tif2, int tag, const char* Error)
+{
+    if (!CmpShortTag(tif1, tif2, tag))
+        FatalError("%s is different", Error);
+}
+
+
+
+static
+void EqualLongTag(TIFF* tif1, TIFF* tif2, int tag, const char* Error)
+{
+    if (!CmpLongTag(tif1, tif2, tag))
+        FatalError("%s is different", Error);
+}
+
+
+
+static
+void AddOneCGATSRow(cmsHANDLE hIT8, char *Name, LPSTAT st)
+{
+
+    double Per100 = 100.0 * ((255.0 - Mean(st)) / 255.0);
+
+    cmsIT8SetData(hIT8,    Name, "SAMPLE_ID", Name);
+    cmsIT8SetDataDbl(hIT8, Name, "PER100_EQUAL", Per100);
+    cmsIT8SetDataDbl(hIT8, Name, "MEAN_DE", Mean(st));
+    cmsIT8SetDataDbl(hIT8, Name, "STDEV_DE", Std(st));
+    cmsIT8SetDataDbl(hIT8, Name, "MIN_DE", st ->Min);
+    cmsIT8SetDataDbl(hIT8, Name, "MAX_DE", st ->Peak);
+
+}
+
+
+static
+void CreateCGATS(const char* TiffName1, const char* TiffName2)
+{
+    cmsHANDLE hIT8 = cmsIT8Alloc(0);
+    time_t ltime;
+    char Buffer[256];
+
+    cmsIT8SetSheetType(hIT8, "TIFFDIFF");
+    
+   
+    sprintf(Buffer, "Differences between %s and %s", TiffName1, TiffName2);
+  
+    cmsIT8SetComment(hIT8, Buffer);
+
+    cmsIT8SetPropertyStr(hIT8, "ORIGINATOR", "TIFFDIFF");
+    time( &ltime );
+    strcpy(Buffer, ctime(&ltime));
+    Buffer[strlen(Buffer)-1] = 0;     // Remove the nasty "\n"
+
+    cmsIT8SetPropertyStr(hIT8, "CREATED", Buffer);
+
+    cmsIT8SetComment(hIT8, " ");
+
+    cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_FIELDS", 6);
+    
+    
+    cmsIT8SetDataFormat(hIT8, 0, "SAMPLE_ID");
+    cmsIT8SetDataFormat(hIT8, 1, "PER100_EQUAL");
+    cmsIT8SetDataFormat(hIT8, 2, "MEAN_DE");
+    cmsIT8SetDataFormat(hIT8, 3, "STDEV_DE");
+    cmsIT8SetDataFormat(hIT8, 4, "MIN_DE");
+    cmsIT8SetDataFormat(hIT8, 5, "MAX_DE");
+
+       
+    switch (Channels) {
+
+    case 1:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 3);
+            AddOneCGATSRow(hIT8, "GRAY_PLANE", &ColorantStat[0]);            
+            break;
+
+    case 3:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 5);
+            AddOneCGATSRow(hIT8, "R_PLANE", &ColorantStat[0]);            
+            AddOneCGATSRow(hIT8, "G_PLANE", &ColorantStat[1]);            
+            AddOneCGATSRow(hIT8, "B_PLANE", &ColorantStat[2]);            
+            break;
+            
+            
+    case 4:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 6);
+            AddOneCGATSRow(hIT8, "C_PLANE", &ColorantStat[0]);            
+            AddOneCGATSRow(hIT8, "M_PLANE", &ColorantStat[1]);            
+            AddOneCGATSRow(hIT8, "Y_PLANE", &ColorantStat[2]);            
+            AddOneCGATSRow(hIT8, "K_PLANE", &ColorantStat[3]);            
+            break;
+            
+    default: FatalError("Internal error: Bad ColorSpace");
+
+    }
+
+    AddOneCGATSRow(hIT8, "EUCLIDEAN",    &EuclideanStat);    
+    AddOneCGATSRow(hIT8, "COLORIMETRIC", &ColorimetricStat);    
+
+    cmsIT8SaveToFile(hIT8, CGATSout);
+    cmsIT8Free(hIT8);
+}
+
+int main(int argc, char* argv[])
+{
+      int i;
+
+      Tiff1 = Tiff2 = TiffDiff = NULL;
+
+	  InitUtils("tiffdiff");
+
+      HandleSwitches(argc, argv);
+
+      if ((argc - xoptind) != 2) {
+
+              Help();              
+              }
+            
+      TIFFSetErrorHandler(ConsoleErrorHandler);
+      TIFFSetWarningHandler(ConsoleWarningHandler);
+
+      Tiff1 = TIFFOpen(argv[xoptind], "r");
+      if (Tiff1 == NULL) FatalError("Unable to open '%s'", argv[xoptind]);
+
+      Tiff2 = TIFFOpen(argv[xoptind+1], "r");
+      if (Tiff2 == NULL) FatalError("Unable to open '%s'", argv[xoptind+1]);
+             
+      if (TiffDiffFilename) {
+
+          TiffDiff = TIFFOpen(TiffDiffFilename, "w");
+          if (TiffDiff == NULL) FatalError("Unable to create '%s'", TiffDiffFilename);
+
+      }
+
+ 
+      AssureShortTagIs(Tiff1, Tiff2, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG, "Planar Config");
+      AssureShortTagIs(Tiff1, Tiff2, TIFFTAG_BITSPERSAMPLE, 8, "8 bit per sample");
+
+      EqualLongTag(Tiff1, Tiff2, TIFFTAG_IMAGEWIDTH,  "Image width");
+      EqualLongTag(Tiff1, Tiff2, TIFFTAG_IMAGELENGTH, "Image length");
+      
+      EqualShortTag(Tiff1, Tiff2, TIFFTAG_SAMPLESPERPIXEL, "Samples per pixel");
+
+
+      hLab = cmsCreateLab4Profile(NULL);
+
+      ClearStatistics(&EuclideanStat);
+      for (i=0; i < 4; i++)
+            ClearStatistics(&ColorantStat[i]);
+
+      if (!CmpImages(Tiff1, Tiff2, TiffDiff))
+                FatalError("Error comparing images");
+
+      if (CGATSout) {
+            CreateCGATS(argv[xoptind], argv[xoptind+1]);
+      }
+      else {
+
+        double  Per100 = 100.0 * ((255.0 - Mean(&EuclideanStat)) / 255.0);
+
+        printf("Digital counts  %g%% equal. mean %g, min %g, max %g, Std %g\n", Per100, Mean(&EuclideanStat), 
+                                                                                EuclideanStat.Min, 
+                                                                                EuclideanStat.Peak, 
+                                                                                Std(&EuclideanStat));
+
+        if (ColorimetricStat.n > 0) {
+
+            Per100 = 100.0 * ((255.0 - Mean(&ColorimetricStat)) / 255.0);
+
+            printf("dE Colorimetric %g%% equal. mean %g, min %g, max %g, Std %g\n", Per100, Mean(&ColorimetricStat), 
+                                                                                    ColorimetricStat.Min, 
+                                                                                    ColorimetricStat.Peak, 
+                                                                                    Std(&ColorimetricStat));
+        }
+      
+      }
+
+      if (hLab)     cmsCloseProfile(hLab);
+      if (Tiff1)    TIFFClose(Tiff1);
+      if (Tiff2)    TIFFClose(Tiff2);      
+      if (TiffDiff) TIFFClose(TiffDiff);
+
+      return 0;
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.1
new file mode 100644
index 0000000000..9af0d8688c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.1
@@ -0,0 +1,117 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH TIFICC 1 "October 23, 2004"
+.SH NAME
+tificc - little cms ICC profile applier for TIFF.
+.SH SYNOPSIS
+.B tificc
+.RI [ options ] " input.tif output.tif"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B tificc
+is a little cms ICC profile applier for TIFF.
+.SH OPTIONS
+.TP
+.B \-a
+Handle channels > 4 as alpha.
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Embed destination profile.
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Examples, 2=Built-in profiles, 3=Contact information)
+.TP
+.BI \-i\ profile
+Input profile (defaults to sRGB).
+.TP
+.BI \-k\  inklimit
+Ink-limiting in % (CMYK only), (0..400.0, float value) [default 400.0].
+.TP
+.BI \-l\ profile
+Transform by device-link profile.
+.TP
+.B \-m TODO: check if values outside 0..3 are possible
+SoftProof intent [defaults to 0].
+.TP
+.B \-n
+Ignore embedded profile on input.
+.TP
+.BI \-o\  profile
+Output profile (defaults to sRGB).
+.TP
+.BI \-p\  profile
+Soft proof profile.
+.TP
+.BI \-s\  newprofile
+Save embedded profile as \fInewprofile\fR.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.B \-v
+Verbose.
+.TP
+.BI \-w\  NUM
+Output depth (8, 16 or 32). Use 32 for floating-point.
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To color correct from scanner to sRGB:
+	tificc -iscanner.icm in.tif out.tif
+To convert from monitor1 to monitor2:
+	tificc -imon1.icm -omon2.icm in.tif out.tif
+To make a CMYK separation:
+	tificc -oprinter.icm inrgb.tif outcmyk.tif
+To recover sRGB from a CMYK separation:
+	tificc -iprinter.icm incmyk.tif outrgb.tif
+To convert from CIELab TIFF to sRGB
+	tificc -i*Lab in.tif out.tif
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR psicc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was originally written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project. Modified by Marti Maria to reflect further changes.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.c
new file mode 100644
index 0000000000..7707a1077d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/tificc/tificc.c
@@ -0,0 +1,1180 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+// This program does apply profiles to (some) TIFF files
+
+#include "lcms2_plugin.h"
+#include "tiffio.h"
+#include "utils.h"
+
+
+// Flags
+
+static cmsBool BlackWhiteCompensation = FALSE;
+static cmsBool IgnoreEmbedded         = FALSE;
+static cmsBool EmbedProfile           = FALSE;
+static int     Width                  = 8;
+static cmsBool GamutCheck             = FALSE;
+static cmsBool lIsDeviceLink          = FALSE;
+static cmsBool StoreAsAlpha           = FALSE;
+
+static int Intent                  = INTENT_PERCEPTUAL;
+static int ProofingIntent          = INTENT_PERCEPTUAL;
+static int PrecalcMode             = 1;
+static cmsFloat64Number InkLimit   = 400;
+
+static cmsFloat64Number ObserverAdaptationState  = 1.0;  // According ICC 4.3 this is the default
+
+static const char *cInpProf  = NULL;
+static const char *cOutProf  = NULL;
+static const char *cProofing = NULL;
+
+static const char* SaveEmbedded = NULL;
+
+// Console error & warning
+static
+void ConsoleWarningHandler(const char* module, const char* fmt, va_list ap)
+{
+    char e[512] = { '\0' };
+    if (module != NULL)
+        strcat(strcpy(e, module), ": ");
+
+    vsprintf(e+strlen(e), fmt, ap);
+    strcat(e, ".");
+    if (Verbose) {
+
+        fprintf(stderr, "\nWarning");
+        fprintf(stderr, " %s\n", e);
+        fflush(stderr);
+    }
+}
+
+static
+void ConsoleErrorHandler(const char* module, const char* fmt, va_list ap)
+{
+    char e[512] = { '\0' };
+
+    if (module != NULL) {
+        if (strlen(module) < 500)
+               strcat(strcpy(e, module), ": ");
+    }
+
+    vsprintf(e+strlen(e), fmt, ap);
+    strcat(e, ".");
+    fprintf(stderr, "\nError");
+    fprintf(stderr, " %s\n", e);
+    fflush(stderr);
+}
+
+
+// Issue a warning
+static
+void Warning(const char *frm, ...)
+{
+    va_list args;
+
+    va_start(args, frm);
+    ConsoleWarningHandler("[tificc]", frm, args);
+    va_end(args);
+}
+
+
+
+// Out of mememory is a fatal error
+static
+void OutOfMem(cmsUInt32Number size)
+{
+    FatalError("Out of memory on allocating %d bytes.", size);  
+}
+
+
+// -----------------------------------------------------------------------------------------------
+
+// In TIFF, Lab is encoded in a different way, so let's use the plug-in 
+// capabilities of lcms2 to change the meaning of TYPE_Lab_8.  
+
+// * 0xffff / 0xff00 = (255 * 257) / (255 * 256) = 257 / 256
+static int FromLabV2ToLabV4(int x) 
+{
+    int a;
+
+    a = ((x << 8) | x) >> 8;  // * 257 / 256
+    if ( a > 0xffff) return 0xffff;
+    return a;
+}
+
+// * 0xf00 / 0xffff = * 256 / 257
+static int FromLabV4ToLabV2(int x) 
+{
+    return ((x << 8) + 0x80) / 257;
+}
+
+
+// Formatter for 8bit Lab TIFF (photometric 8)
+static
+unsigned char* UnrollTIFFLab8(struct _cmstransform_struct* CMMcargo,
+                              register cmsUInt16Number wIn[], 
+                              register cmsUInt8Number* accum, 
+                              register cmsUInt32Number Stride)
+{
+    wIn[0] = (cmsUInt16Number) FromLabV2ToLabV4((accum[0]) << 8);
+    wIn[1] = (cmsUInt16Number) FromLabV2ToLabV4(((accum[1] > 127) ? (accum[1] - 128) : (accum[1] + 128)) << 8);
+    wIn[2] = (cmsUInt16Number) FromLabV2ToLabV4(((accum[2] > 127) ? (accum[2] - 128) : (accum[2] + 128)) << 8);
+
+    return accum + 3;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+// Formatter for 16bit Lab TIFF (photometric 8)
+static
+unsigned char* UnrollTIFFLab16(struct _cmstransform_struct* CMMcargo,
+                              register cmsUInt16Number wIn[],
+                              register cmsUInt8Number* accum,
+                              register cmsUInt32Number Stride )
+{
+    cmsUInt16Number* accum16 = (cmsUInt16Number*) accum;
+
+    wIn[0] = (cmsUInt16Number) FromLabV2ToLabV4(accum16[0]);
+    wIn[1] = (cmsUInt16Number) FromLabV2ToLabV4(((accum16[1] > 0x7f00) ? (accum16[1] - 0x8000) : (accum16[1] + 0x8000)) );
+    wIn[2] = (cmsUInt16Number) FromLabV2ToLabV4(((accum16[2] > 0x7f00) ? (accum16[2] - 0x8000) : (accum16[2] + 0x8000)) );
+
+    return accum + 3 * sizeof(cmsUInt16Number);
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+
+static
+unsigned char* PackTIFFLab8(struct _cmstransform_struct* CMMcargo, 
+                            register cmsUInt16Number wOut[], 
+                            register cmsUInt8Number* output, 
+                            register cmsUInt32Number Stride)
+{
+    int a, b;
+
+    *output++ = (cmsUInt8Number) (FromLabV4ToLabV2(wOut[0] + 0x0080) >> 8);
+
+    a = (FromLabV4ToLabV2(wOut[1]) + 0x0080) >> 8;
+    b = (FromLabV4ToLabV2(wOut[2]) + 0x0080) >> 8;
+
+    *output++ = (cmsUInt8Number) ((a < 128) ? (a + 128) : (a - 128));
+    *output++ = (cmsUInt8Number) ((b < 128) ? (b + 128) : (b - 128));
+
+    return output;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+static
+unsigned char* PackTIFFLab16(struct _cmstransform_struct* CMMcargo, 
+                            register cmsUInt16Number wOut[], 
+                            register cmsUInt8Number* output, 
+                            register cmsUInt32Number Stride)
+{
+    int a, b;
+    cmsUInt16Number* output16 = (cmsUInt16Number*) output;
+
+    *output16++ = (cmsUInt16Number) FromLabV4ToLabV2(wOut[0]);
+
+    a = FromLabV4ToLabV2(wOut[1]);
+    b = FromLabV4ToLabV2(wOut[2]);
+
+    *output16++ = (cmsUInt16Number) ((a < 0x7f00) ? (a + 0x8000) : (a - 0x8000));
+    *output16++ = (cmsUInt16Number) ((b < 0x7f00) ? (b + 0x8000) : (b - 0x8000));
+
+    return (cmsUInt8Number*) output16;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+
+static
+cmsFormatter TiffFormatterFactory(cmsUInt32Number Type,
+                                  cmsFormatterDirection Dir,
+                                  cmsUInt32Number dwFlags)
+{
+    cmsFormatter Result = { NULL };
+    int bps           = T_BYTES(Type);
+    int IsTiffSpecial = (Type >> 23) & 1;
+
+    if (IsTiffSpecial && !(dwFlags & CMS_PACK_FLAGS_FLOAT))
+    {
+        if (Dir == cmsFormatterInput)
+        {
+            Result.Fmt16 = (bps == 1) ? UnrollTIFFLab8 : UnrollTIFFLab16;
+        }
+        else
+            Result.Fmt16 = (bps == 1) ? PackTIFFLab8 : PackTIFFLab16;
+    }
+
+    return Result;
+}
+
+static cmsPluginFormatters TiffLabPlugin = { {cmsPluginMagicNumber, 2000, cmsPluginFormattersSig, NULL}, TiffFormatterFactory };
+
+
+
+// Build up the pixeltype descriptor
+static
+cmsUInt32Number GetInputPixelType(TIFF *Bank)
+{
+    uint16 Photometric, bps, spp, extra, PlanarConfig, *info;
+    uint16 Compression, reverse = 0;
+    int ColorChannels, IsPlanar = 0, pt = 0, IsFlt;
+    int labTiffSpecial = FALSE;
+
+    TIFFGetField(Bank,           TIFFTAG_PHOTOMETRIC,   &Photometric);
+    TIFFGetFieldDefaulted(Bank,  TIFFTAG_BITSPERSAMPLE, &bps);
+
+    if (bps == 1)
+        FatalError("Sorry, bilevel TIFFs has nothing to do with ICC profiles");
+
+    if (bps != 8 && bps != 16 && bps != 32)
+        FatalError("Sorry, 8, 16 or 32 bits per sample only");
+
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_SAMPLESPERPIXEL, &spp);
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_PLANARCONFIG, &PlanarConfig);
+
+    switch (PlanarConfig) {
+
+     case PLANARCONFIG_CONTIG: IsPlanar = 0; break;
+     case PLANARCONFIG_SEPARATE: IsPlanar = 1; break;
+     default:
+
+         FatalError("Unsupported planar configuration (=%d) ", (int) PlanarConfig);
+    }
+
+    // If Samples per pixel == 1, PlanarConfiguration is irrelevant and need
+    // not to be included.
+
+    if (spp == 1) IsPlanar = 0;
+
+    // Any alpha?
+
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_EXTRASAMPLES, &extra, &info);
+
+    // Read alpha channels as colorant
+
+    if (StoreAsAlpha) {
+
+        ColorChannels = spp;
+        extra = 0;
+    }
+    else
+        ColorChannels = spp - extra;
+
+    switch (Photometric) {
+
+    case PHOTOMETRIC_MINISWHITE:
+
+        reverse = 1;
+
+        // ... fall through ...
+
+    case PHOTOMETRIC_MINISBLACK:                                   
+        pt = PT_GRAY;                                
+        break;
+
+    case PHOTOMETRIC_RGB:                                   
+        pt = PT_RGB;
+        break;
+
+
+     case PHOTOMETRIC_PALETTE:                                             
+         FatalError("Sorry, palette images not supported"); 
+         break;
+
+     case PHOTOMETRIC_SEPARATED: 
+
+         pt = PixelTypeFromChanCount(ColorChannels);
+         break;
+
+     case PHOTOMETRIC_YCBCR:
+         TIFFGetField(Bank, TIFFTAG_COMPRESSION, &Compression);
+         {
+             uint16 subx, suby;
+
+             pt = PT_YCbCr;
+             TIFFGetFieldDefaulted(Bank, TIFFTAG_YCBCRSUBSAMPLING, &subx, &suby);
+             if (subx != 1 || suby != 1)
+                 FatalError("Sorry, subsampled images not supported");
+
+         }
+         break;
+
+     case PHOTOMETRIC_ICCLAB:
+         pt = PT_LabV2;         
+         break;
+
+     case PHOTOMETRIC_CIELAB:
+         pt = PT_Lab;
+         labTiffSpecial = TRUE;
+         break;
+
+
+     case PHOTOMETRIC_LOGLUV:      // CIE Log2(L) (u',v') 
+
+         TIFFSetField(Bank, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_16BIT);
+         pt = PT_YUV;             // *ICCSpace = icSigLuvData;
+         bps = 16;                // 16 bits forced by LibTiff
+         break;
+
+     default:
+         FatalError("Unsupported TIFF color space (Photometric %d)", Photometric);
+    }
+
+    // Convert bits per sample to bytes per sample
+
+    bps >>= 3; 
+    IsFlt = (bps == 0) || (bps == 4);
+
+    return (FLOAT_SH(IsFlt)|COLORSPACE_SH(pt)|PLANAR_SH(IsPlanar)|EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|FLAVOR_SH(reverse) | (labTiffSpecial << 23) );
+}
+
+
+
+// Rearrange pixel type to build output descriptor
+static
+cmsUInt32Number ComputeOutputFormatDescriptor(cmsUInt32Number dwInput, int OutColorSpace, int bps)
+{
+    int IsPlanar  = T_PLANAR(dwInput);
+    int Channels  = ChanCountFromPixelType(OutColorSpace);
+    int IsFlt = (bps == 0) || (bps == 4);
+
+    return (FLOAT_SH(IsFlt)|COLORSPACE_SH(OutColorSpace)|PLANAR_SH(IsPlanar)|CHANNELS_SH(Channels)|BYTES_SH(bps));
+}
+
+
+
+// Tile based transforms
+static
+int TileBasedXform(cmsHTRANSFORM hXForm, TIFF* in, TIFF* out, int nPlanes)
+{
+    tsize_t BufSizeIn  = TIFFTileSize(in);
+    tsize_t BufSizeOut = TIFFTileSize(out);
+    unsigned char *BufferIn, *BufferOut;
+    ttile_t i, TileCount = TIFFNumberOfTiles(in) / nPlanes;
+    uint32 tw, tl;
+    int PixelCount, j;
+
+
+    TIFFGetFieldDefaulted(in, TIFFTAG_TILEWIDTH,  &tw);
+    TIFFGetFieldDefaulted(in, TIFFTAG_TILELENGTH, &tl);
+
+    PixelCount = (int) tw * tl;
+
+    BufferIn = (unsigned char *) _TIFFmalloc(BufSizeIn * nPlanes);
+    if (!BufferIn) OutOfMem(BufSizeIn * nPlanes);
+
+    BufferOut = (unsigned char *) _TIFFmalloc(BufSizeOut * nPlanes);
+    if (!BufferOut) OutOfMem(BufSizeOut * nPlanes);
+
+
+    for (i = 0; i < TileCount; i++) {
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFReadEncodedTile(in, i + (j* TileCount), 
+                BufferIn + (j*BufSizeIn), BufSizeIn) < 0)   goto cleanup;
+        }
+
+        cmsDoTransform(hXForm, BufferIn, BufferOut, PixelCount);
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFWriteEncodedTile(out, i + (j*TileCount),
+                BufferOut + (j*BufSizeOut), BufSizeOut) < 0) goto cleanup;
+        }
+
+    }
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 1;
+
+
+cleanup:
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 0;
+}
+
+
+// Strip based transforms
+
+static
+int StripBasedXform(cmsHTRANSFORM hXForm, TIFF* in, TIFF* out, int nPlanes)
+{
+    tsize_t BufSizeIn  = TIFFStripSize(in);
+    tsize_t BufSizeOut = TIFFStripSize(out);
+    unsigned char *BufferIn, *BufferOut;
+    ttile_t i, StripCount = TIFFNumberOfStrips(in) / nPlanes;
+    uint32 sw;
+    uint32 sl;
+    uint32 iml;
+    int j;
+    int PixelCount;
+
+    TIFFGetFieldDefaulted(in, TIFFTAG_IMAGEWIDTH,  &sw);
+    TIFFGetFieldDefaulted(in, TIFFTAG_ROWSPERSTRIP, &sl);
+    TIFFGetFieldDefaulted(in, TIFFTAG_IMAGELENGTH, &iml);
+
+    // It is possible to get infinite rows per strip
+    if (sl == 0 || sl > iml)
+        sl = iml;   // One strip for whole image
+
+    BufferIn = (unsigned char *) _TIFFmalloc(BufSizeIn * nPlanes);
+    if (!BufferIn) OutOfMem(BufSizeIn * nPlanes);
+
+    BufferOut = (unsigned char *) _TIFFmalloc(BufSizeOut * nPlanes);
+    if (!BufferOut) OutOfMem(BufSizeOut * nPlanes);
+
+
+    for (i = 0; i < StripCount; i++) {
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFReadEncodedStrip(in, i + (j * StripCount), 
+                BufferIn + (j * BufSizeIn), BufSizeIn) < 0)   goto cleanup;
+        }
+
+        PixelCount = (int) sw * (iml < sl ? iml : sl);
+        iml -= sl;
+
+        cmsDoTransform(hXForm, BufferIn, BufferOut, PixelCount);
+
+        for (j=0; j < nPlanes; j++) {
+            if (TIFFWriteEncodedStrip(out, i + (j * StripCount), 
+                BufferOut + j * BufSizeOut, BufSizeOut) < 0) goto cleanup;
+        }
+
+    }
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 1;
+
+cleanup:
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 0;
+}
+
+
+// Creates minimum required tags
+static
+void WriteOutputTags(TIFF *out, int Colorspace, int BytesPerSample)
+{
+    int BitsPerSample = (8 * BytesPerSample);
+    int nChannels     = ChanCountFromPixelType(Colorspace);
+
+    uint16 Extra[] = { EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA
+    };
+
+
+  switch (Colorspace) {
+
+  case PT_GRAY:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 1);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_RGB:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_CMY:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_INKSET, 2);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_CMYK:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 4);
+      TIFFSetField(out, TIFFTAG_INKSET, INKSET_CMYK);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_Lab:
+      if (BitsPerSample == 16) 
+          TIFFSetField(out, TIFFTAG_PHOTOMETRIC, 9);
+      else
+          TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_CIELAB);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);    // Needed by TIFF Spec
+      break;
+
+
+      // Multi-ink separations
+  case PT_MCH2:
+  case PT_MCH3:
+  case PT_MCH4:
+  case PT_MCH5:
+  case PT_MCH6:
+  case PT_MCH7:
+  case PT_MCH8:
+  case PT_MCH9:
+  case PT_MCH10:
+  case PT_MCH11:
+  case PT_MCH12:
+  case PT_MCH13:
+  case PT_MCH14:
+  case PT_MCH15:
+
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, nChannels);
+
+      if (StoreAsAlpha && nChannels >= 4) {                                     
+          // CMYK plus extra alpha
+          TIFFSetField(out, TIFFTAG_EXTRASAMPLES, nChannels - 4, Extra);            
+          TIFFSetField(out, TIFFTAG_INKSET, 1);
+          TIFFSetField(out, TIFFTAG_NUMBEROFINKS, 4);
+      }
+      else {            
+          TIFFSetField(out, TIFFTAG_INKSET, 2);
+          TIFFSetField(out, TIFFTAG_NUMBEROFINKS, nChannels);
+      }
+
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+
+  default:
+      FatalError("Unsupported output colorspace");
+    }
+
+  if (Width == 32) 
+      TIFFSetField(out, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP);
+}
+
+
+// Copies a bunch of tages
+
+static
+void CopyOtherTags(TIFF* in, TIFF* out)
+{
+#define CopyField(tag, v) \
+    if (TIFFGetField(in, tag, &v)) TIFFSetField(out, tag, v)
+
+
+    short shortv;
+    uint32 ow, ol;
+    cmsFloat32Number floatv;
+    char *stringv;
+    uint32 longv;
+
+    CopyField(TIFFTAG_SUBFILETYPE, longv);
+
+    TIFFGetField(in, TIFFTAG_IMAGEWIDTH, &ow);
+    TIFFGetField(in, TIFFTAG_IMAGELENGTH, &ol);
+
+    TIFFSetField(out, TIFFTAG_IMAGEWIDTH, ow);
+    TIFFSetField(out, TIFFTAG_IMAGELENGTH, ol);
+
+    CopyField(TIFFTAG_PLANARCONFIG, shortv);
+    CopyField(TIFFTAG_COMPRESSION, shortv);
+
+    if (Width != 32) 
+        CopyField(TIFFTAG_PREDICTOR, shortv);
+
+    CopyField(TIFFTAG_THRESHHOLDING, shortv);
+    CopyField(TIFFTAG_FILLORDER, shortv);
+    CopyField(TIFFTAG_ORIENTATION, shortv);
+    CopyField(TIFFTAG_MINSAMPLEVALUE, shortv);
+    CopyField(TIFFTAG_MAXSAMPLEVALUE, shortv);
+    CopyField(TIFFTAG_XRESOLUTION, floatv);
+    CopyField(TIFFTAG_YRESOLUTION, floatv);
+    CopyField(TIFFTAG_RESOLUTIONUNIT, shortv);
+    CopyField(TIFFTAG_ROWSPERSTRIP, longv);
+    CopyField(TIFFTAG_XPOSITION, floatv);
+    CopyField(TIFFTAG_YPOSITION, floatv);
+    CopyField(TIFFTAG_IMAGEDEPTH, longv);
+    CopyField(TIFFTAG_TILEDEPTH, longv);
+
+    CopyField(TIFFTAG_TILEWIDTH,  longv);
+    CopyField(TIFFTAG_TILELENGTH, longv);
+
+    CopyField(TIFFTAG_ARTIST, stringv);
+    CopyField(TIFFTAG_IMAGEDESCRIPTION, stringv);
+    CopyField(TIFFTAG_MAKE, stringv);
+    CopyField(TIFFTAG_MODEL, stringv);
+
+    CopyField(TIFFTAG_DATETIME, stringv);
+    CopyField(TIFFTAG_HOSTCOMPUTER, stringv);
+    CopyField(TIFFTAG_PAGENAME, stringv);
+    CopyField(TIFFTAG_DOCUMENTNAME, stringv);
+
+}
+
+// A replacement for (the nonstandard) filelength
+
+
+static
+void DoEmbedProfile(TIFF* Out, const char* ProfileFile)
+{
+    FILE* f;
+    cmsInt32Number size;
+    cmsUInt32Number EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+    f = fopen(ProfileFile, "rb");
+    if (f == NULL) return;
+
+    size = cmsfilelength(f);
+    if (size < 0) return;
+
+    EmbedBuffer = (cmsUInt8Number*) malloc(size + 1);
+    if (EmbedBuffer == NULL) { 
+        OutOfMem(size+1);
+        return;
+    }
+
+    EmbedLen = (cmsUInt32Number) fread(EmbedBuffer, 1, (size_t) size, f);
+
+    if (EmbedLen != size) 
+        FatalError("Cannot read %ld bytes to %s", size, ProfileFile);
+
+    fclose(f);
+    EmbedBuffer[EmbedLen] = 0;
+
+    TIFFSetField(Out, TIFFTAG_ICCPROFILE, EmbedLen, EmbedBuffer);
+    free(EmbedBuffer);
+}
+
+
+
+static
+cmsHPROFILE GetTIFFProfile(TIFF* in)
+{    
+    cmsCIExyYTRIPLE Primaries;
+    cmsFloat32Number* chr;
+    cmsCIExyY WhitePoint;
+    cmsFloat32Number* wp;
+    int i;       
+    cmsToneCurve* Curve[3]; 
+    cmsUInt16Number *gmr, *gmg, *gmb;
+    cmsHPROFILE hProfile;
+    cmsUInt32Number EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+    if (IgnoreEmbedded) return NULL;
+
+    if (TIFFGetField(in, TIFFTAG_ICCPROFILE, &EmbedLen, &EmbedBuffer)) {
+
+        hProfile = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+
+        // Print description found in the profile
+        if (Verbose && (hProfile != NULL)) {
+
+            fprintf(stdout, "\n[Embedded profile]\n");
+            PrintProfileInformation(hProfile);                       
+            fflush(stdout);
+        }
+
+        if (hProfile != NULL && SaveEmbedded != NULL)
+            SaveMemoryBlock(EmbedBuffer, EmbedLen, SaveEmbedded);
+
+        if (hProfile) return hProfile;
+    }
+
+    // Try to see if "colorimetric" tiff
+
+    if (TIFFGetField(in, TIFFTAG_PRIMARYCHROMATICITIES, &chr)) {
+
+        Primaries.Red.x   =  chr[0];
+        Primaries.Red.y   =  chr[1];
+        Primaries.Green.x =  chr[2];
+        Primaries.Green.y =  chr[3];
+        Primaries.Blue.x  =  chr[4];
+        Primaries.Blue.y  =  chr[5];
+
+        Primaries.Red.Y = Primaries.Green.Y = Primaries.Blue.Y = 1.0;
+
+        if (TIFFGetField(in, TIFFTAG_WHITEPOINT, &wp)) {
+
+            WhitePoint.x = wp[0];
+            WhitePoint.y = wp[1];
+            WhitePoint.Y = 1.0;
+
+            // Transferfunction is a bit harder....
+
+            TIFFGetFieldDefaulted(in, TIFFTAG_TRANSFERFUNCTION,
+                &gmr, 
+                &gmg,
+                &gmb);
+
+            Curve[0] = cmsBuildTabulatedToneCurve16(NULL, 256, gmr);
+            Curve[1] = cmsBuildTabulatedToneCurve16(NULL, 256, gmg);
+            Curve[2] = cmsBuildTabulatedToneCurve16(NULL, 256, gmb);
+
+            hProfile = cmsCreateRGBProfileTHR(NULL, &WhitePoint, &Primaries, Curve);
+
+            for (i=0; i < 3; i++)
+                cmsFreeToneCurve(Curve[i]);
+
+            if (Verbose) {
+                fprintf(stdout, "\n[Colorimetric TIFF]\n");
+            }
+
+
+            return hProfile;
+        }
+    }
+
+    return NULL;
+}
+
+
+// Transform one image
+static
+int TransformImage(TIFF* in, TIFF* out, const char *cDefInpProf)
+{
+    cmsHPROFILE hIn, hOut, hProof, hInkLimit = NULL;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number wInput, wOutput;
+    int OutputColorSpace;
+    int bps = Width / 8;
+    cmsUInt32Number dwFlags = 0;        
+    int nPlanes;
+
+    // Observer adaptation state (only meaningful on absolute colorimetric intent)
+
+    cmsSetAdaptationState(ObserverAdaptationState);
+
+    if (EmbedProfile && cOutProf) 
+        DoEmbedProfile(out, cOutProf);
+
+    if (BlackWhiteCompensation) 
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;           
+
+
+    switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       case 1: break;
+
+       default: FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+    }
+
+
+    if (GamutCheck)
+        dwFlags |= cmsFLAGS_GAMUTCHECK;
+
+    hProof = NULL;
+    hOut = NULL;
+
+    if (lIsDeviceLink) {
+
+        hIn = cmsOpenProfileFromFile(cDefInpProf, "r");                  
+    }
+    else {
+
+        hIn =  GetTIFFProfile(in);
+
+        if (hIn == NULL)                    
+            hIn = OpenStockProfile(NULL, cDefInpProf);               
+
+        hOut = OpenStockProfile(NULL, cOutProf);
+
+        if (cProofing != NULL) {
+
+            hProof = OpenStockProfile(NULL, cProofing);
+            dwFlags |= cmsFLAGS_SOFTPROOFING;
+        }
+    }
+
+    // Take input color space
+
+    wInput = GetInputPixelType(in);
+
+    // Assure both, input profile and input TIFF are on same colorspace
+
+    if (_cmsLCMScolorSpace(cmsGetColorSpace(hIn)) != (int) T_COLORSPACE(wInput))
+        FatalError("Input profile is not operating in proper color space");
+
+
+    if (!lIsDeviceLink) 
+        OutputColorSpace = _cmsLCMScolorSpace(cmsGetColorSpace(hOut));
+    else 
+        OutputColorSpace = _cmsLCMScolorSpace(cmsGetPCS(hIn));
+
+    wOutput  = ComputeOutputFormatDescriptor(wInput, OutputColorSpace, bps);
+
+    WriteOutputTags(out, OutputColorSpace, bps);
+    CopyOtherTags(in, out);
+
+    // Ink limit
+    if (InkLimit != 400.0 && 
+        (OutputColorSpace == PT_CMYK || OutputColorSpace == PT_CMY)) {
+
+            cmsHPROFILE hProfiles[10];
+            int nProfiles = 0;
+
+
+            hInkLimit = cmsCreateInkLimitingDeviceLink(cmsGetColorSpace(hOut), InkLimit);
+
+            hProfiles[nProfiles++] = hIn;
+            if (hProof) {
+                hProfiles[nProfiles++] = hProof;
+                hProfiles[nProfiles++] = hProof;
+            }
+
+            hProfiles[nProfiles++] = hOut;
+            hProfiles[nProfiles++] = hInkLimit;
+
+            xform = cmsCreateMultiprofileTransform(hProfiles, nProfiles, 
+                                                   wInput, wOutput, Intent, dwFlags);
+
+    }
+    else {
+
+        xform = cmsCreateProofingTransform(hIn, wInput, 
+                                           hOut, wOutput, 
+                                           hProof, Intent, 
+                                           ProofingIntent, 
+                                           dwFlags);
+    }
+
+    cmsCloseProfile(hIn);
+    cmsCloseProfile(hOut);
+
+    if (hInkLimit) 
+        cmsCloseProfile(hInkLimit);
+    if (hProof) 
+        cmsCloseProfile(hProof);
+
+    if (xform == NULL) return 0;
+
+    // Planar stuff
+    if (T_PLANAR(wInput)) 
+        nPlanes = T_CHANNELS(wInput) + T_EXTRA(wInput);
+    else
+        nPlanes = 1;
+
+
+    // Handle tile by tile or strip by strip
+    if (TIFFIsTiled(in)) {
+
+        TileBasedXform(xform, in, out, nPlanes);
+    }
+    else {
+        StripBasedXform(xform, in, out, nPlanes);
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    TIFFWriteDirectory(out);
+
+    return 1;
+}
+
+
+// Print help
+static
+void Help(int level)
+{
+    fprintf(stderr, "little cms ICC profile applier for TIFF - v6.2 [LittleCMS %2.2f]\n\n", LCMS_VERSION / 1000.0);
+    fflush(stderr);
+
+    switch(level) {
+
+     default:
+     case 0:
+
+         fprintf(stderr, "usage: tificc [flags] input.tif output.tif\n");
+
+         fprintf(stderr, "\nflags:\n\n");
+         fprintf(stderr, "%cv - Verbose\n", SW);
+         fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+         fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+         fprintf(stderr, "%cl<profile> - Transform by device-link profile\n", SW); 
+
+         PrintRenderingIntents();
+
+         fprintf(stderr, "%cb - Black point compensation\n", SW);
+         fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n", SW);
+
+         fprintf(stderr, "%cc<0,1,2,3> - Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes)\n", SW);     
+         fprintf(stderr, "\n");
+
+         fprintf(stderr, "%cw<8,16,32> - Output depth. Use 32 for floating-point\n\n", SW);
+         fprintf(stderr, "%ca - Handle channels > 4 as alpha\n", SW);
+
+         fprintf(stderr, "%cn - Ignore embedded profile on input\n", SW);
+         fprintf(stderr, "%ce - Embed destination profile\n", SW);
+         fprintf(stderr, "%cs<new profile> - Save embedded profile as <new profile>\n", SW);
+         fprintf(stderr, "\n");
+
+
+         fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+         fprintf(stderr, "%cm<n> - Soft proof intent\n", SW);
+         fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n", SW);
+
+         fprintf(stderr, "\n"); 
+   
+         fprintf(stderr, "%ck<0..400> - Ink-limiting in %% (CMYK only)\n", SW);       
+         fprintf(stderr, "\n");
+         fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+         break;
+
+     case 1:
+
+         fprintf(stderr, "Examples:\n\n"
+             "To color correct from scanner to sRGB:\n"
+             "\ttificc %ciscanner.icm in.tif out.tif\n"
+             "To convert from monitor1 to monitor2:\n"
+             "\ttificc %cimon1.icm %comon2.icm in.tif out.tif\n"
+             "To make a CMYK separation:\n"
+             "\ttificc %coprinter.icm inrgb.tif outcmyk.tif\n"
+             "To recover sRGB from a CMYK separation:\n"
+             "\ttificc %ciprinter.icm incmyk.tif outrgb.tif\n"
+             "To convert from CIELab TIFF to sRGB\n"
+             "\ttificc %ci*Lab in.tif out.tif\n\n", 
+             SW, SW, SW, SW, SW, SW);
+         break;
+
+     case 2:
+         PrintBuiltins();
+         break;
+
+     case 3:
+
+         fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+             "engine. Both lcms and this program are freeware. You can\n"
+             "obtain both in source code at http://www.littlecms.com\n"
+             "For suggestions, comments, bug reports etc. send mail to\n"
+             "info@littlecms.com\n\n");
+
+         break;
+    }
+
+    fflush(stderr);
+    exit(0);
+}
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s=xgetopt(argc,argv,"aAeEbBw:W:nNvVGgh:H:i:I:o:O:P:p:t:T:c:C:l:L:M:m:K:k:S:s:D:d:")) != EOF) {
+
+        switch (s) {
+
+        case 'a':
+        case 'A':
+            StoreAsAlpha = TRUE;
+            break;
+        case 'b':
+        case 'B':
+            BlackWhiteCompensation = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 3)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'd':
+        case 'D': ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                Warning("Adaptation state should be 0..1");
+            break;
+
+        case 'e':
+        case 'E':
+            EmbedProfile = TRUE;
+            break;
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'v':
+        case 'V':
+            Verbose = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified"); 
+
+            cInpProf = xoptarg;
+            break;
+
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified"); 
+
+            cOutProf = xoptarg;
+            break;
+
+        case 'l':
+        case 'L': 
+            if (cInpProf != NULL || cOutProf != NULL) 
+                FatalError("input/output profiles already specified");
+
+            cInpProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);
+            break;
+
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            break;
+
+        case 'N':
+        case 'n':
+            IgnoreEmbedded = TRUE;
+            break;
+
+        case 'W':
+        case 'w':
+            Width = atoi(xoptarg);
+            if (Width != 8 && Width != 16 && Width != 32)
+                FatalError("Only 8, 16 and 32 bps are supported");
+            break;
+
+        case 'k':
+        case 'K':
+            InkLimit = atof(xoptarg);
+            if (InkLimit < 0.0 || InkLimit > 400.0)
+                FatalError("Ink limit must be 0%%..400%%");
+            break;
+
+
+        case 's':
+        case 'S': SaveEmbedded = xoptarg;
+            break;
+
+        case 'H':
+        case 'h':  {
+
+            int a =  atoi(xoptarg);
+            Help(a); 
+            }
+            break;
+
+        default:
+
+            FatalError("Unknown option - run without args to see valid ones");
+        }
+
+    }
+}
+
+
+// The main sink
+
+int main(int argc, char* argv[])
+{
+    TIFF *in, *out;
+   
+    cmsPlugin(&TiffLabPlugin);
+
+    InitUtils("tificc");
+
+    HandleSwitches(argc, argv);
+
+    if ((argc - xoptind) != 2) {
+
+        Help(0);              
+    }
+   
+
+    TIFFSetErrorHandler(ConsoleErrorHandler);
+    TIFFSetWarningHandler(ConsoleWarningHandler);
+
+    in = TIFFOpen(argv[xoptind], "r");
+    if (in == NULL) FatalError("Unable to open '%s'", argv[xoptind]);
+
+    out = TIFFOpen(argv[xoptind+1], "w");
+
+    if (out == NULL) {
+
+        TIFFClose(in);
+        FatalError("Unable to write '%s'", argv[xoptind+1]);
+    }
+
+    do {
+
+        TransformImage(in, out, cInpProf);
+
+
+    } while (TIFFReadDirectory(in));
+
+
+    if (Verbose) { fprintf(stdout, "\n"); fflush(stdout); }
+
+    TIFFClose(in);
+    TIFFClose(out);
+
+    return 0;
+}
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.am b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.am
new file mode 100644
index 0000000000..7f033c6c50
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = transicc 
+
+transicc_LDADD = $(top_builddir)/src/liblcms2.la @LCMS_LIB_DEPLIBS@
+transicc_LDFLAGS = @LDFLAGS@
+transicc_SOURCES = transicc.c ../common/xgetopt.c ../common/vprf.c  ../common/utils.h
+transicc_MANS = transicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.in b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.in
new file mode 100644
index 0000000000..ce93e538ae
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = transicc$(EXEEXT)
+subdir = utils/transicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_transicc_OBJECTS = transicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+transicc_OBJECTS = $(am_transicc_OBJECTS)
+transicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+transicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(transicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(transicc_SOURCES)
+DIST_SOURCES = $(transicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+transicc_LDADD = $(top_builddir)/src/liblcms2.la 
+transicc_LDFLAGS = @LDFLAGS@
+transicc_SOURCES = transicc.c ../common/xgetopt.c ../common/vprf.c  ../common/utils.h
+transicc_MANS = transicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/transicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/transicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+transicc$(EXEEXT): $(transicc_OBJECTS) $(transicc_DEPENDENCIES) $(EXTRA_transicc_DEPENDENCIES) 
+	@rm -f transicc$(EXEEXT)
+	$(AM_V_CCLD)$(transicc_LINK) $(transicc_OBJECTS) $(transicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.1 b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.1
new file mode 100644
index 0000000000..0c50a9039e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.1
@@ -0,0 +1,90 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH TRANSICC 1 "MAY 30, 2011"
+.SH NAME
+transicc - little cms ColorSpace conversion calculator.
+.SH SYNOPSIS
+.B transicc
+.RI [ options ]\ [ CGATSINPUT ]\ [ CGATSOUTPUT ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B transicc
+is a lcms ColorSpace conversion calculator.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Encoded representation of numbers is not float (Option \fB\-w\fR=use 16 bits, Option \fB\-x\fR=hexadecimal).
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-i\  profile
+Input profile (defaults to sRGB).
+.TP
+.B \-l
+Transform by device-link profile.
+.TP
+.BI \-m\  NUM
+SoftProof intent (0,1,2,3) [defaults to 0].
+.TP
+.B \-n
+Terse output, intended for pipe usage.
+.TP
+.BI \-o\  profile
+.p
+Output profile (defaults to sRGB).
+.TP
+.B \-q
+Quantize CGATS to 8 bits.
+.TP
+.BI \-s
+Bounded mode.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.BI \-v\  verbosity
+Verbosity level, (0=None, 1=Normal, 2=High, 3=Very High) [defaults to 1].
+.TP
+.B \-w
+Use 16 bits.
+.TP
+.B \-x
+Hexadecimal.
+.TP
+You can use '*Lab' and '*xyz' as built-in profiles.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR psicc (1),
+.BR tificc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.c b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.c
new file mode 100644
index 0000000000..7a76b4d14c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms/utils/transicc/transicc.c
@@ -0,0 +1,1316 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+
+#ifndef _MSC_VER 
+#    include <unistd.h> 
+#endif 
+
+#ifdef CMS_IS_WINDOWS_
+#    include <io.h>
+#endif
+
+#define MAX_INPUT_BUFFER 4096
+
+// Global options
+
+static cmsBool           InHexa                 = FALSE;
+static cmsBool           GamutCheck             = FALSE;
+static cmsBool           Width16                = FALSE;
+static cmsBool           BlackPointCompensation = FALSE;
+static cmsBool           lIsDeviceLink          = FALSE;
+static cmsBool           lQuantize              = FALSE;
+static cmsBool           lUnbounded             = TRUE;
+static cmsBool           lIsFloat               = TRUE;
+
+static cmsUInt32Number   Intent           = INTENT_PERCEPTUAL;
+static cmsUInt32Number   ProofingIntent   = INTENT_PERCEPTUAL;
+
+static int PrecalcMode  = 0;
+
+// --------------------------------------------------------------
+
+static char *cInProf   = NULL;
+static char *cOutProf  = NULL;
+static char *cProofing = NULL;
+
+static char *IncludePart = NULL;
+
+static cmsHANDLE hIT8in = NULL;        // CGATS input 
+static cmsHANDLE hIT8out = NULL;       // CGATS output
+
+static char CGATSPatch[1024];   // Actual Patch Name
+static char CGATSoutFilename[cmsMAX_PATH];
+
+static int nMaxPatches;
+
+static cmsHTRANSFORM hTrans, hTransXYZ, hTransLab;
+static cmsBool InputNamedColor = FALSE;
+
+static cmsColorSpaceSignature InputColorSpace, OutputColorSpace;
+
+static cmsNAMEDCOLORLIST* InputColorant = NULL;
+static cmsNAMEDCOLORLIST* OutputColorant = NULL;
+
+static cmsFloat64Number InputRange, OutputRange;
+
+
+// isatty replacement
+#ifdef _MSC_VER
+#define xisatty(x) _isatty( _fileno( (x) ) )
+#else
+#define xisatty(x) isatty( fileno( (x) ) )
+#endif
+
+//---------------------------------------------------------------------------------------------------
+
+// Print usage to stderr
+static
+void Help(void)
+{           
+
+    fprintf(stderr, "usage: transicc [flags] [CGATS input] [CGATS output]\n\n");
+
+    fprintf(stderr, "flags:\n\n");
+    fprintf(stderr, "%cv<0..3> - Verbosity level\n", SW); 
+
+    fprintf(stderr, "%ce[op] - Encoded representation of numbers\n", SW);
+    fprintf(stderr, "\t%cw - use 16 bits\n", SW);     
+    fprintf(stderr, "\t%cx - Hexadecimal\n\n", SW);
+
+    fprintf(stderr, "%cs - bounded mode (clip negatives and highliths)\n", SW);
+    fprintf(stderr, "%cq - Quantize (round decimals)\n\n", SW);
+
+    fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+    fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+    fprintf(stderr, "%cl<profile> - Transform by device-link profile\n", SW);   
+
+    fprintf(stderr, "\nYou can use '*Lab', '*xyz' and others as built-in profiles\n\n");
+
+    PrintRenderingIntents();
+
+    fprintf(stderr, "\n");
+
+    fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n\n", SW);
+
+    fprintf(stderr, "%cb - Black point compensation\n", SW);
+
+    fprintf(stderr, "%cc<0,1,2,3> Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes)\n\n", SW);     
+    fprintf(stderr, "%cn - Terse output, intended for pipe usage\n", SW);
+
+    fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+    fprintf(stderr, "%cm<0,1,2,3> - Soft proof intent\n", SW);
+    fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n\n", SW);
+
+
+
+    fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+        "engine. Both lcms and this program are freeware. You can\n"
+        "obtain both in source code at http://www.littlecms.com\n"
+        "For suggestions, comments, bug reports etc. send mail to\n"
+        "info@littlecms.com\n\n");
+}
+
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s = xgetopt(argc, argv,
+        "bBC:c:d:D:eEgGI:i:L:l:m:M:nNO:o:p:P:QqSsT:t:V:v:WwxX!:")) != EOF) {
+
+    switch (s){
+
+        case '!': 
+            IncludePart = xoptarg;
+            break;
+
+        case 'b':
+        case 'B': 
+            BlackPointCompensation = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 3)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'd':
+        case 'D': {
+            cmsFloat64Number ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                FatalError("Adaptation states should be between 0 and 1");
+
+            cmsSetAdaptationState(ObserverAdaptationState);
+                  }
+                  break;
+
+        case 'e':
+        case 'E': 
+            lIsFloat = FALSE;
+            break;
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("icctrans: Device-link already specified");
+
+            cInProf = xoptarg;
+            break;  
+
+        case 'l':
+        case 'L': 
+            cInProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+            // No extra intents for proofing
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            if (ProofingIntent > 3)
+                FatalError("Unknown Proofing Intent '%d'", ProofingIntent);        
+            break;      
+
+            // For compatibility
+        case 'n':
+        case 'N':
+            Verbose = 0;
+            break;
+
+            // Output profile        
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("icctrans: Device-link already specified"); 
+            cOutProf = xoptarg;
+            break;
+
+            // Proofing profile
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;      
+
+            // Quantize (get rid of decimals)
+        case 'q':
+        case 'Q': 
+            lQuantize = TRUE;
+            break;
+
+            // Inhibit unbounded mode
+        case 's':
+        case 'S':
+               lUnbounded = FALSE;
+               break;
+
+            // The intent
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);            
+            break;
+
+            // Verbosity level
+        case 'V':
+        case 'v':
+            Verbose = atoi(xoptarg);
+            if (Verbose < 0 || Verbose > 3) {
+                FatalError("Unknown verbosity level '%d'", Verbose);
+            }
+            break;
+
+            // Wide (16 bits)
+        case 'W':
+        case 'w':
+            Width16 = TRUE;
+            break;
+
+            // Hexadecimal        
+        case 'x':
+        case 'X':
+            InHexa = TRUE;
+            break;
+
+        default:            
+            FatalError("Unknown option - run without args to see valid ones.\n");
+            }       
+    }
+
+
+    // If output CGATS involved, switch to float
+    if ((argc - xoptind) > 2) {
+        lIsFloat = TRUE;
+    }
+}
+
+
+
+static
+void SetRange(cmsFloat64Number range, cmsBool IsInput)
+{
+    if (IsInput)
+        InputRange = range;
+    else
+        OutputRange = range;
+}
+
+// Populate a named color list with usual component names. 
+// I am using the first Colorant channel to store the range, but it works since 
+// this space is not used anyway.
+static
+cmsNAMEDCOLORLIST* ComponentNames(cmsColorSpaceSignature space, cmsBool IsInput)
+{
+    cmsNAMEDCOLORLIST* out;
+    int i, n;
+    char Buffer[cmsMAX_PATH];
+
+    out = cmsAllocNamedColorList(0, 12, cmsMAXCHANNELS, "", "");
+    if (out == NULL) return NULL;
+
+    switch (space) {
+
+    case cmsSigXYZData:
+        SetRange(100, IsInput);
+        cmsAppendNamedColor(out, "X", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        cmsAppendNamedColor(out, "Z", NULL, NULL);
+        break;
+
+    case cmsSigLabData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "L*", NULL, NULL);
+        cmsAppendNamedColor(out, "a*", NULL, NULL);
+        cmsAppendNamedColor(out, "b*", NULL, NULL);
+        break;
+
+    case cmsSigLuvData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "L", NULL, NULL);
+        cmsAppendNamedColor(out, "u", NULL, NULL);
+        cmsAppendNamedColor(out, "v", NULL, NULL);
+        break;
+
+    case cmsSigYCbCrData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "Y", NULL, NULL );
+        cmsAppendNamedColor(out, "Cb", NULL, NULL);
+        cmsAppendNamedColor(out, "Cr", NULL, NULL);
+        break;
+
+
+    case cmsSigYxyData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        cmsAppendNamedColor(out, "x", NULL, NULL);
+        cmsAppendNamedColor(out, "y", NULL, NULL);
+        break;
+
+    case cmsSigRgbData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "R", NULL, NULL);
+        cmsAppendNamedColor(out, "G", NULL, NULL);
+        cmsAppendNamedColor(out, "B", NULL, NULL);
+        break;
+
+    case cmsSigGrayData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "G", NULL, NULL);      
+        break;
+
+    case cmsSigHsvData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "H", NULL, NULL);
+        cmsAppendNamedColor(out, "s", NULL, NULL);
+        cmsAppendNamedColor(out, "v", NULL, NULL);
+        break;
+
+    case cmsSigHlsData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "H", NULL, NULL);
+        cmsAppendNamedColor(out, "l", NULL, NULL);
+        cmsAppendNamedColor(out, "s", NULL, NULL);
+        break;
+
+    case cmsSigCmykData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "C", NULL, NULL);
+        cmsAppendNamedColor(out, "M", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);                     
+        cmsAppendNamedColor(out, "K", NULL, NULL);
+        break;
+
+    case cmsSigCmyData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "C", NULL, NULL);
+        cmsAppendNamedColor(out, "M", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        break;
+
+    default:
+
+        SetRange(1, IsInput);
+
+        n = cmsChannelsOf(space);
+
+        for (i=0; i < n; i++) {
+
+            sprintf(Buffer, "Channel #%d", i + 1);
+            cmsAppendNamedColor(out, Buffer, NULL, NULL);
+        }
+    }
+
+    return out;
+
+}
+
+
+// Creates all needed color transforms
+static
+cmsBool OpenTransforms(void)
+{
+    cmsHPROFILE hInput, hOutput, hProof;
+    cmsUInt32Number dwIn, dwOut, dwFlags;
+    cmsNAMEDCOLORLIST* List;
+    int i;
+
+    // We don't need cache
+    dwFlags = cmsFLAGS_NOCACHE;
+
+    if (lIsDeviceLink) {
+
+        hInput  = OpenStockProfile(0, cInProf);
+        if (hInput == NULL) return FALSE; 
+        hOutput = NULL;
+        hProof  = NULL;
+
+        if (cmsGetDeviceClass(hInput) == cmsSigNamedColorClass) {
+            OutputColorSpace  = cmsGetColorSpace(hInput);
+            InputColorSpace = cmsGetPCS(hInput);
+        }
+        else {
+            InputColorSpace  = cmsGetColorSpace(hInput);
+            OutputColorSpace = cmsGetPCS(hInput);
+        }
+
+        // Read colorant tables if present
+        if (cmsIsTag(hInput, cmsSigColorantTableTag)) {
+            List = cmsReadTag(hInput, cmsSigColorantTableTag);
+            InputColorant = cmsDupNamedColorList(List);
+            InputRange = 1;
+        }
+        else InputColorant = ComponentNames(InputColorSpace, TRUE);
+
+        if (cmsIsTag(hInput, cmsSigColorantTableOutTag)){
+
+            List = cmsReadTag(hInput, cmsSigColorantTableOutTag);
+            OutputColorant = cmsDupNamedColorList(List);
+            OutputRange = 1;
+        }
+        else OutputColorant = ComponentNames(OutputColorSpace, FALSE);
+
+    }
+    else {
+
+        hInput  = OpenStockProfile(0, cInProf);
+        if (hInput == NULL) return FALSE;
+
+        hOutput = OpenStockProfile(0, cOutProf);    
+        if (hOutput == NULL) return FALSE;
+        hProof  = NULL;
+
+
+        if (cmsGetDeviceClass(hInput) == cmsSigLinkClass ||
+            cmsGetDeviceClass(hOutput) == cmsSigLinkClass)   
+            FatalError("Use %cl flag for devicelink profiles!\n", SW);
+
+
+        InputColorSpace   = cmsGetColorSpace(hInput);
+        OutputColorSpace  = cmsGetColorSpace(hOutput);
+
+        // Read colorant tables if present
+        if (cmsIsTag(hInput, cmsSigColorantTableTag)) {
+            List = cmsReadTag(hInput, cmsSigColorantTableTag);
+            InputColorant = cmsDupNamedColorList(List);
+            if (cmsNamedColorCount(InputColorant) <= 3) 
+                SetRange(255, TRUE);
+            else
+                SetRange(1, TRUE);  // Inks are already divided by 100 in the formatter
+
+        }
+        else InputColorant = ComponentNames(InputColorSpace, TRUE);
+
+        if (cmsIsTag(hOutput, cmsSigColorantTableTag)){
+
+            List = cmsReadTag(hOutput, cmsSigColorantTableTag);
+            OutputColorant = cmsDupNamedColorList(List);
+            if (cmsNamedColorCount(OutputColorant) <= 3) 
+                SetRange(255, FALSE);
+            else
+                SetRange(1, FALSE);  // Inks are already divided by 100 in the formatter
+        }
+        else OutputColorant = ComponentNames(OutputColorSpace, FALSE);
+
+
+        if (cProofing != NULL) {
+
+            hProof = OpenStockProfile(0, cProofing);
+            if (hProof == NULL) return FALSE;
+            dwFlags |= cmsFLAGS_SOFTPROOFING;
+        }
+    }
+
+    // Print information on profiles
+    if (Verbose > 2) {
+
+        printf("Profile:\n");
+        PrintProfileInformation(hInput);
+
+        if (hOutput) {
+
+            printf("Output profile:\n");
+            PrintProfileInformation(hOutput);
+        }  
+
+        if (hProof != NULL) {
+            printf("Proofing profile:\n");
+            PrintProfileInformation(hProof);
+        }
+    }
+
+
+    // Input is always in floating point
+    dwIn  = cmsFormatterForColorspaceOfProfile(hInput, 0, TRUE);
+
+    if (lIsDeviceLink) {
+
+        dwOut = cmsFormatterForPCSOfProfile(hInput, lIsFloat ? 0 : 2, lIsFloat);
+    }
+    else {
+
+        // 16 bits or floating point (only on output)   
+        dwOut = cmsFormatterForColorspaceOfProfile(hOutput, lIsFloat ? 0 : 2, lIsFloat);
+    }
+
+    // For named color, there is a specialized formatter
+    if (cmsGetDeviceClass(hInput) == cmsSigNamedColorClass) {
+        
+        dwIn = TYPE_NAMED_COLOR_INDEX;
+        InputNamedColor = TRUE;
+    }
+
+    // Precision mode
+    switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       case 1: break;
+
+       default: 
+           FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+    }
+
+
+    if (BlackPointCompensation) 
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+
+    if (GamutCheck) {
+
+        cmsUInt16Number Alarm[cmsMAXCHANNELS];
+
+        if (hProof == NULL)
+            FatalError("I need proofing profile -p for gamut checking!");
+
+        for (i=0; i < cmsMAXCHANNELS; i++)
+            Alarm[i] = 0xFFFF;
+
+        cmsSetAlarmCodes(Alarm);
+        dwFlags |= cmsFLAGS_GAMUTCHECK;            
+    }
+
+
+    // The main transform
+    hTrans = cmsCreateProofingTransform(hInput,  dwIn, hOutput, dwOut, hProof, Intent, ProofingIntent, dwFlags);
+
+    if (hProof) cmsCloseProfile(hProof);
+
+    if (hTrans == NULL) return FALSE;
+
+
+    // PCS Dump if requested
+    hTransXYZ = NULL; hTransLab = NULL;
+
+    if (hOutput && Verbose > 1) {
+
+        cmsHPROFILE hXYZ = cmsCreateXYZProfile();
+        cmsHPROFILE hLab = cmsCreateLab4Profile(NULL);
+
+        hTransXYZ = cmsCreateTransform(hInput, dwIn, hXYZ,  lIsFloat ? TYPE_XYZ_DBL : TYPE_XYZ_16, Intent, cmsFLAGS_NOCACHE);        
+        if (hTransXYZ == NULL) return FALSE;
+
+        hTransLab = cmsCreateTransform(hInput, dwIn, hLab,  lIsFloat? TYPE_Lab_DBL : TYPE_Lab_16, Intent, cmsFLAGS_NOCACHE);    
+        if (hTransLab == NULL) return FALSE;
+
+        cmsCloseProfile(hXYZ);
+        cmsCloseProfile(hLab);
+    } 
+
+    if (hInput) cmsCloseProfile(hInput);
+    if (hOutput) cmsCloseProfile(hOutput); 
+
+    return TRUE;
+}
+
+
+// Free open resources
+static
+void CloseTransforms(void)
+{
+    if (InputColorant) cmsFreeNamedColorList(InputColorant);
+    if (OutputColorant) cmsFreeNamedColorList(OutputColorant);
+
+    if (hTrans) cmsDeleteTransform(hTrans);
+    if (hTransLab) cmsDeleteTransform(hTransLab);
+    if (hTransXYZ) cmsDeleteTransform(hTransXYZ);
+
+}
+
+// ---------------------------------------------------------------------------------------------------
+
+// Get input from user
+static
+void GetLine(char* Buffer, const char* frm, ...)
+{    
+    int res;
+    va_list args;
+
+    va_start(args, frm);
+
+    do {
+        if (xisatty(stdin)) 
+            vfprintf(stderr, frm, args);
+
+        res = scanf("%4095s", Buffer);
+
+        if (res < 0 || toupper(Buffer[0]) == 'Q') { // Quit?
+
+            CloseTransforms();
+
+            if (xisatty(stdin))  
+                fprintf(stderr, "Done.\n");
+
+            exit(0);        
+        }
+    } while (res == 0);
+
+    va_end(args);  
+}
+
+
+// Print a value which is given in double floating point
+static
+void PrintFloatResults(cmsFloat64Number Value[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    cmsFloat64Number v;
+
+    n = cmsChannelsOf(OutputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (OutputColorant != NULL) {
+
+            cmsNamedColorInfo(OutputColorant, i, ChannelName, NULL, NULL, NULL, NULL);         
+        }
+        else {
+            OutputRange = 1;
+            sprintf(ChannelName, "Channel #%u", i + 1);
+        }
+
+        v = (cmsFloat64Number) Value[i]* OutputRange;
+
+        if (lQuantize) 
+            v = floor(v + 0.5);
+
+        if (!lUnbounded) {
+
+               if (v < 0)
+                      v = 0;
+               if (v > OutputRange)
+                      v = OutputRange;
+        }
+
+        if (Verbose <= 0)
+            printf("%.4f ", v);
+        else
+            printf("%s=%.4f ", ChannelName, v);
+    }   
+
+    printf("\n");
+}
+
+
+// Get a named-color index
+static
+cmsUInt16Number GetIndex(void)
+{
+    char Buffer[4096], Name[cmsMAX_PATH], Prefix[40], Suffix[40];
+    int index, max;
+    const cmsNAMEDCOLORLIST* NamedColorList;
+    
+    NamedColorList = cmsGetNamedColorList(hTrans);
+    if (NamedColorList == NULL) return 0;
+
+    max = cmsNamedColorCount(NamedColorList)-1;
+
+    GetLine(Buffer, "Color index (0..%d)? ", max);
+    index = atoi(Buffer);
+
+    if (index > max)
+        FatalError("Named color %d out of range!", index);
+
+    cmsNamedColorInfo(NamedColorList, index, Name, Prefix, Suffix, NULL, NULL);
+
+    printf("\n%s %s %s\n", Prefix, Name, Suffix);
+
+    return (cmsUInt16Number) index;
+}
+
+// Read values from a text file or terminal
+static
+void TakeFloatValues(cmsFloat64Number Float[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    char Buffer[4096];
+
+    if (xisatty(stdin))
+        fprintf(stderr, "\nEnter values, 'q' to quit\n");
+
+    if (InputNamedColor) {
+
+        // This is named color index, which is always cmsUInt16Number
+        cmsUInt16Number index = GetIndex();
+        memcpy(Float, &index, sizeof(cmsUInt16Number));
+        return;
+    }
+
+    n = cmsChannelsOf(InputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (InputColorant) {
+            cmsNamedColorInfo(InputColorant, i, ChannelName, NULL, NULL, NULL, NULL);          
+        }
+        else {
+            InputRange = 1;
+            sprintf(ChannelName, "Channel #%u", i+1);
+        }
+
+        GetLine(Buffer, "%s? ", ChannelName);
+
+        Float[i] = (cmsFloat64Number) atof(Buffer) / InputRange;
+    }       
+
+    if (xisatty(stdin))
+        fprintf(stderr, "\n");
+}
+
+static
+void PrintPCSFloat(cmsFloat64Number Input[])
+{
+    if (Verbose > 1 && hTransXYZ && hTransLab) {
+
+        cmsCIEXYZ XYZ = { 0, 0, 0 };
+        cmsCIELab Lab = { 0, 0, 0 };
+
+        if (hTransXYZ) cmsDoTransform(hTransXYZ, Input, &XYZ, 1);
+        if (hTransLab) cmsDoTransform(hTransLab, Input, &Lab, 1);
+
+        printf("[PCS] Lab=(%.4f,%.4f,%.4f) XYZ=(%.4f,%.4f,%.4f)\n", Lab.L, Lab.a, Lab.b, 
+            XYZ.X * 100.0, XYZ.Y * 100.0, XYZ.Z * 100.0);
+
+    }
+}
+
+
+
+
+// -----------------------------------------------------------------------------------------------
+
+static
+void PrintEncodedResults(cmsUInt16Number Encoded[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    cmsUInt32Number v;
+
+    n = cmsChannelsOf(OutputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (OutputColorant != NULL) {
+
+            cmsNamedColorInfo(OutputColorant, i, ChannelName, NULL, NULL, NULL, NULL);          
+        }
+        else {          
+            sprintf(ChannelName, "Channel #%u", i + 1);
+        }
+
+        if (Verbose > 0)
+            printf("%s=", ChannelName);
+
+        v = Encoded[i];
+
+        if (InHexa) {
+
+            if (Width16)
+                printf("0x%04X ", (int) floor(v + .5));
+            else
+                printf("0x%02X ", (int) floor(v / 257. + .5));
+
+        } else {
+
+            if (Width16)
+                printf("%d ", (int) floor(v + .5));
+            else
+                printf("%d ", (int) floor(v / 257. + .5));
+        }
+
+    }   
+
+    printf("\n");
+}
+
+// Print XYZ/Lab values on verbose mode
+
+static
+void PrintPCSEncoded(cmsFloat64Number Input[])
+{
+    if (Verbose > 1 && hTransXYZ && hTransLab) {
+
+        cmsUInt16Number XYZ[3], Lab[3];
+
+        if (hTransXYZ) cmsDoTransform(hTransXYZ, Input, XYZ, 1);
+        if (hTransLab) cmsDoTransform(hTransLab, Input, Lab, 1);
+
+        printf("[PCS] Lab=(0x%04X,0x%04X,0x%04X) XYZ=(0x%04X,0x%04X,0x%04X)\n", Lab[0], Lab[1], Lab[2], 
+            XYZ[0], XYZ[1], XYZ[2]);
+
+    }
+}
+
+
+// --------------------------------------------------------------------------------------
+
+
+
+// Take a value from IT8 and scale it accordly to fill a cmsUInt16Number (0..FFFF)
+
+static
+cmsFloat64Number GetIT8Val(const char* Name, cmsFloat64Number Max)
+{
+    const char* Val = cmsIT8GetData(hIT8in, CGATSPatch, Name);
+
+    if (Val == NULL) 
+        FatalError("Field '%s' not found", Name);
+
+    return atof(Val) / Max;
+
+}
+
+
+// Read input values from CGATS file.
+
+static
+    void TakeCGATSValues(int nPatch, cmsFloat64Number Float[])
+{
+
+    // At first take the name if SAMPLE_ID is present
+    if (cmsIT8GetPatchName(hIT8in, nPatch, CGATSPatch) == NULL) {
+        FatalError("Sorry, I need 'SAMPLE_ID' on input CGATS to operate.");
+    }
+
+
+    // Special handling for named color profiles. 
+    // Lookup the name in the names database (the transform)
+
+    if (InputNamedColor) {
+
+        const cmsNAMEDCOLORLIST* NamedColorList;
+        int index;
+
+        NamedColorList = cmsGetNamedColorList(hTrans);
+        if (NamedColorList == NULL) 
+            FatalError("Malformed named color profile");
+
+        index = cmsNamedColorIndex(NamedColorList, CGATSPatch);
+        if (index < 0) 
+            FatalError("Named color '%s' not found in the profile", CGATSPatch); 
+
+        Float[0] = index;
+        return;
+    }
+
+    // Color is not a spot color, proceed.
+
+    switch (InputColorSpace) {
+
+        // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+        Float[0] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_X") / 100.0;
+        Float[1] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_Y") / 100.0;
+        Float[2] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_Z") / 100.0;        
+        break;
+
+    case cmsSigLabData:
+        Float[0] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_L");
+        Float[1] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_A");
+        Float[2] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_B");        
+        break;
+
+
+    case cmsSigRgbData:
+        Float[0] = GetIT8Val("RGB_R", 255.0);
+        Float[1] = GetIT8Val("RGB_G", 255.0);
+        Float[2] = GetIT8Val("RGB_B", 255.0);
+        break;
+
+    case cmsSigGrayData:
+        Float[0] = GetIT8Val("GRAY", 255.0);
+        break;
+
+    case cmsSigCmykData:
+        Float[0] = GetIT8Val("CMYK_C", 1.0);
+        Float[1] = GetIT8Val("CMYK_M", 1.0);
+        Float[2] = GetIT8Val("CMYK_Y", 1.0);
+        Float[3] = GetIT8Val("CMYK_K", 1.0);
+        break;
+
+    case cmsSigCmyData:                        
+        Float[0] = GetIT8Val("CMY_C", 1.0);
+        Float[1] = GetIT8Val("CMY_M", 1.0);
+        Float[2] = GetIT8Val("CMY_Y", 1.0);
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "%uCLR_%u", n, i+1);
+                Float[i] = GetIT8Val(Buffer, 100.0);
+            }
+
+        }
+        break;
+
+    default: 
+        {
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "CHAN_%u", i+1);
+                Float[i] = GetIT8Val(Buffer, 1.0);
+            }
+
+        }
+    }
+
+}
+
+static
+void SetCGATSfld(const char* Col, cmsFloat64Number Val)
+{
+    if (lQuantize) 
+        Val = floor(Val + 0.5);
+
+    if (!cmsIT8SetDataDbl(hIT8out, CGATSPatch, Col, Val)) {
+        FatalError("couldn't set '%s' on output cgats '%s'", Col, CGATSoutFilename);
+    }
+}
+
+
+
+static
+void PutCGATSValues(cmsFloat64Number Float[])
+{   
+    cmsIT8SetData(hIT8out, CGATSPatch, "SAMPLE_ID", CGATSPatch);
+    switch (OutputColorSpace) {
+
+
+    // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+
+        SetCGATSfld("XYZ_X", Float[0] * 100.0);
+        SetCGATSfld("XYZ_Y", Float[1] * 100.0);
+        SetCGATSfld("XYZ_Z", Float[2] * 100.0);                    
+        break;
+
+    case cmsSigLabData:
+
+        SetCGATSfld("LAB_L", Float[0]);
+        SetCGATSfld("LAB_A", Float[1]);
+        SetCGATSfld("LAB_B", Float[2]);                    
+        break;
+
+
+    case cmsSigRgbData:
+        SetCGATSfld("RGB_R", Float[0] * 255.0);
+        SetCGATSfld("RGB_G", Float[1] * 255.0);
+        SetCGATSfld("RGB_B", Float[2] * 255.0);
+        break;
+
+    case cmsSigGrayData:
+        SetCGATSfld("GRAY", Float[0] * 255.0);                    
+        break;
+
+    case cmsSigCmykData:
+        SetCGATSfld("CMYK_C", Float[0]);
+        SetCGATSfld("CMYK_M", Float[1]);
+        SetCGATSfld("CMYK_Y", Float[2]);
+        SetCGATSfld("CMYK_K", Float[3]);
+        break;
+
+    case cmsSigCmyData:
+        SetCGATSfld("CMY_C", Float[0]);
+        SetCGATSfld("CMY_M", Float[1]);
+        SetCGATSfld("CMY_Y", Float[2]);                 
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "%uCLR_%u", n, i+1);
+
+                SetCGATSfld(Buffer, Float[i] * 100.0);
+            }
+        }
+        break;
+
+    default: 
+        {
+
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "CHAN_%u", i+1);
+
+                SetCGATSfld(Buffer, Float[i]);
+            }
+        }
+    }
+}
+
+
+
+// Create data format 
+static
+void SetOutputDataFormat(void) 
+{
+    cmsIT8DefineDblFormat(hIT8out, "%.4g");
+    cmsIT8SetPropertyStr(hIT8out, "ORIGINATOR", "icctrans");
+
+    if (IncludePart != NULL) 
+        cmsIT8SetPropertyStr(hIT8out, ".INCLUDE", IncludePart);
+
+    cmsIT8SetComment(hIT8out, "Data follows");
+    cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_SETS", nMaxPatches);
+
+
+    switch (OutputColorSpace) {
+
+
+        // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "XYZ_X");
+        cmsIT8SetDataFormat(hIT8out, 2, "XYZ_Y");
+        cmsIT8SetDataFormat(hIT8out, 3, "XYZ_Z");
+        break;
+
+    case cmsSigLabData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "LAB_L");
+        cmsIT8SetDataFormat(hIT8out, 2, "LAB_A");
+        cmsIT8SetDataFormat(hIT8out, 3, "LAB_B");
+        break;
+
+
+    case cmsSigRgbData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "RGB_R");
+        cmsIT8SetDataFormat(hIT8out, 2, "RGB_G");
+        cmsIT8SetDataFormat(hIT8out, 3, "RGB_B");
+        break;
+
+    case cmsSigGrayData:                
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 2);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "GRAY");
+        break;
+
+    case cmsSigCmykData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 5);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "CMYK_C");
+        cmsIT8SetDataFormat(hIT8out, 2, "CMYK_M");
+        cmsIT8SetDataFormat(hIT8out, 3, "CMYK_Y");
+        cmsIT8SetDataFormat(hIT8out, 4, "CMYK_K");
+        break;
+
+    case cmsSigCmyData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "CMY_C");
+        cmsIT8SetDataFormat(hIT8out, 2, "CMY_M");
+        cmsIT8SetDataFormat(hIT8out, 3, "CMY_Y");                   
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+            int i, n;
+            char Buffer[255];
+
+            n = cmsChannelsOf(OutputColorSpace);
+            cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", n+1);
+            cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+
+            for (i=1; i <= n; i++) {
+                sprintf(Buffer, "%dCLR_%d", n, i);
+                cmsIT8SetDataFormat(hIT8out, i, Buffer);
+            }
+        }
+        break;
+
+    default: {
+
+        int i, n;
+        char Buffer[255];
+
+        n = cmsChannelsOf(OutputColorSpace);
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", n+1);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+
+        for (i=1; i <= n; i++) {
+            sprintf(Buffer, "CHAN_%d", i);
+            cmsIT8SetDataFormat(hIT8out, i, Buffer);
+        }
+    }
+    }
+}
+
+// Open CGATS if specified
+
+static
+void OpenCGATSFiles(int argc, char *argv[])
+{    
+    int nParams = argc - xoptind;
+
+    if (nParams >= 1)  {
+
+        hIT8in = cmsIT8LoadFromFile(0, argv[xoptind]);
+
+        if (hIT8in == NULL) 
+            FatalError("'%s' is not recognized as a CGATS file", argv[xoptind]);
+
+        nMaxPatches = (int) cmsIT8GetPropertyDbl(hIT8in, "NUMBER_OF_SETS");     
+    }
+
+    if (nParams == 2) {
+
+        hIT8out = cmsIT8Alloc(NULL);            
+        SetOutputDataFormat();
+        strncpy(CGATSoutFilename, argv[xoptind+1], cmsMAX_PATH-1);      
+    }
+
+    if (nParams > 2) FatalError("Too many CGATS files");
+}
+
+
+
+// The main sink
+int main(int argc, char *argv[])
+{    
+    cmsUInt16Number Output[cmsMAXCHANNELS];
+    cmsFloat64Number OutputFloat[cmsMAXCHANNELS];
+    cmsFloat64Number InputFloat[cmsMAXCHANNELS];
+
+    int nPatch = 0;
+
+    fprintf(stderr, "LittleCMS ColorSpace conversion calculator - 4.3 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+
+    InitUtils("transicc");
+
+    Verbose = 1;
+
+    if (argc == 1) {
+
+        Help();              
+        return 0;
+    }
+
+    HandleSwitches(argc, argv);
+
+    // Open profiles, create transforms
+    if (!OpenTransforms()) return 1;
+
+    // Open CGATS input if specified
+    OpenCGATSFiles(argc, argv);
+
+    // Main loop: read all values and convert them
+    for(;;) {
+
+        if (hIT8in != NULL) {
+
+            if (nPatch >= nMaxPatches) break;
+            TakeCGATSValues(nPatch++, InputFloat);
+
+        } else {
+
+            if (feof(stdin)) break;         
+            TakeFloatValues(InputFloat);
+
+        }
+
+        if (lIsFloat) 
+            cmsDoTransform(hTrans, InputFloat, OutputFloat, 1);
+        else
+            cmsDoTransform(hTrans, InputFloat, Output, 1);
+
+
+        if (hIT8out != NULL) {
+
+            PutCGATSValues(OutputFloat);
+        }
+        else {
+
+            if (lIsFloat) {
+                PrintFloatResults(OutputFloat); PrintPCSFloat(InputFloat);
+            }
+            else {
+                PrintEncodedResults(Output);   PrintPCSEncoded(InputFloat);      
+            }
+
+        }
+    }
+
+
+    // Cleanup
+    CloseTransforms();
+
+    if (hIT8in)
+        cmsIT8Free(hIT8in);
+
+    if (hIT8out) {      
+        cmsIT8SaveToFile(hIT8out, CGATSoutFilename);
+        cmsIT8Free(hIT8out);
+    }
+
+    // All is ok
+    return 0;     
+}
+
+
diff --git a/third-party/libjxl/libjxl/third_party/lcms2.cmake b/third-party/libjxl/libjxl/third_party/lcms2.cmake
new file mode 100644
index 0000000000..c4551de862
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/lcms2.cmake
@@ -0,0 +1,77 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(lcms2 STATIC EXCLUDE_FROM_ALL
+  lcms/src/cmsalpha.c
+  lcms/src/cmscam02.c
+  lcms/src/cmscgats.c
+  lcms/src/cmscnvrt.c
+  lcms/src/cmserr.c
+  lcms/src/cmsgamma.c
+  lcms/src/cmsgmt.c
+  lcms/src/cmshalf.c
+  lcms/src/cmsintrp.c
+  lcms/src/cmsio0.c
+  lcms/src/cmsio1.c
+  lcms/src/cmslut.c
+  lcms/src/cmsmd5.c
+  lcms/src/cmsmtrx.c
+  lcms/src/cmsnamed.c
+  lcms/src/cmsopt.c
+  lcms/src/cmspack.c
+  lcms/src/cmspcs.c
+  lcms/src/cmsplugin.c
+  lcms/src/cmsps2.c
+  lcms/src/cmssamp.c
+  lcms/src/cmssm.c
+  lcms/src/cmstypes.c
+  lcms/src/cmsvirt.c
+  lcms/src/cmswtpnt.c
+  lcms/src/cmsxform.c
+  lcms/src/lcms2_internal.h
+)
+target_include_directories(lcms2
+    PUBLIC "${CMAKE_CURRENT_LIST_DIR}/lcms/include")
+# This warning triggers with gcc-8.
+if (CMAKE_C_COMPILER_ID MATCHES "GNU")
+target_compile_options(lcms2
+  PRIVATE
+    # gcc-only flags.
+    -Wno-stringop-truncation
+    -Wno-strict-aliasing
+)
+endif()
+# By default LCMS uses sizeof(void*) for memory alignment, but in arm 32-bits we
+# can't access doubles not aligned to 8 bytes. This forces the alignment to 8
+# bytes.
+target_compile_definitions(lcms2
+  PRIVATE "-DCMS_PTR_ALIGNMENT=8")
+target_compile_definitions(lcms2
+  PUBLIC "-DCMS_NO_REGISTER_KEYWORD=1")
+
+# Ensure that a thread safe alternative of gmtime is used in LCMS
+include(CheckSymbolExists)
+check_symbol_exists(gmtime_r "time.h" HAVE_GMTIME_R)
+if (HAVE_GMTIME_R)
+  target_compile_definitions(lcms2
+    PUBLIC "-DHAVE_GMTIME_R=1")
+else()
+  check_symbol_exists(gmtime_s "time.h" HAVE_GMTIME_S)
+  if (HAVE_GMTIME_S)
+    target_compile_definitions(lcms2
+      PUBLIC "-DHAVE_GMTIME_S=1")
+  endif()
+endif()
+
+set_property(TARGET lcms2 PROPERTY POSITION_INDEPENDENT_CODE ON)
diff --git a/third-party/libjxl/libjxl/third_party/sjpeg.cmake b/third-party/libjxl/libjxl/third_party/sjpeg.cmake
new file mode 100644
index 0000000000..f1a69252ba
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/sjpeg.cmake
@@ -0,0 +1,27 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We need to CACHE the SJPEG_BUILD_EXAMPLES to not be removed by the option()
+# inside SJPEG.
+set(SJPEG_BUILD_EXAMPLES NO CACHE BOOL "Examples")
+# SJPEG uses OpenGL which throws a warning if multiple options are installed.
+# This setting makes it prefer the new version.
+set(OpenGL_GL_PREFERENCE GLVND)
+
+# Build SJPEG as a static library.
+set(BUILD_SHARED_LIBS_BACKUP ${BUILD_SHARED_LIBS})
+set(BUILD_SHARED_LIBS OFF)
+add_subdirectory(sjpeg EXCLUDE_FROM_ALL)
+target_include_directories(sjpeg PUBLIC "${CMAKE_CURRENT_LIST_DIR}/sjpeg/src/")
+set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_BACKUP})
diff --git a/third-party/libjxl/libjxl/third_party/skcms.cmake b/third-party/libjxl/libjxl/third_party/skcms.cmake
new file mode 100644
index 0000000000..4d2a79cdbc
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms.cmake
@@ -0,0 +1,51 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(skcms-obj OBJECT EXCLUDE_FROM_ALL skcms/skcms.cc)
+target_include_directories(skcms-obj PUBLIC "${CMAKE_CURRENT_LIST_DIR}/skcms/")
+
+# This library is meant to be compiled/used by external libs (such as plugins)
+# that need to use skcms. We use a wrapper for libjxl.
+add_library(skcms-interface INTERFACE)
+target_sources(skcms-interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/skcms/skcms.cc)
+target_include_directories(skcms-interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/skcms)
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-psabi" CXX_WPSABI_SUPPORTED)
+if(CXX_WPSABI_SUPPORTED)
+  target_compile_options(skcms-obj PRIVATE -Wno-psabi)
+  target_compile_options(skcms-interface INTERFACE -Wno-psabi)
+endif()
+
+if(JPEGXL_BUNDLE_SKCMS)
+  target_compile_options(skcms-obj PRIVATE -DJPEGXL_BUNDLE_SKCMS=1)
+  if(MSVC)
+    target_compile_options(skcms-obj
+      PRIVATE /FI${CMAKE_CURRENT_SOURCE_DIR}/../lib/jxl/enc_jxl_skcms.h)
+  else()
+    target_compile_options(skcms-obj
+      PRIVATE -include ${CMAKE_CURRENT_SOURCE_DIR}/../lib/jxl/enc_jxl_skcms.h)
+  endif()
+endif()
+
+set_target_properties(skcms-obj PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN 1
+)
+
+add_library(skcms STATIC EXCLUDE_FROM_ALL $<TARGET_OBJECTS:skcms-obj>)
+target_include_directories(skcms
+  PUBLIC $<TARGET_PROPERTY:skcms-obj,INCLUDE_DIRECTORIES>)
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/BAZEL.md b/third-party/libjxl/libjxl/third_party/skcms/BAZEL.md
new file mode 100644
index 0000000000..1c55230728
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/BAZEL.md
@@ -0,0 +1,126 @@
+# SkCMS Bazel build
+
+This file provides instructions on how to build and test SkCMS using
+[Bazel](https://bazel.build).
+
+## Linux
+
+### Building and testing locally
+
+Open a terminal and `cd` into your SkCMS repository checkout, then run:
+
+```
+$ bazel build //...
+
+$ bazel test //...
+```
+
+### Building and testing on RBE
+
+Same as above, but add `--config=linux-rbe` to your `bazel` invocation, e.g.:
+
+```
+$ bazel build //... --config=linux-rbe
+
+$ bazel test //... --config=linux-rbe
+```
+
+Note that you need to obtain RBE credentials for this to work (instructions below).
+
+## macOS
+
+TODO(lovisolo)
+
+## Windows
+
+SkCMS can be compiled with either
+[Microsoft Build Tools for Visual Studio 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019)
+or [Clang](https://clang.llvm.org/).
+
+Before continuing, install Bazel by following the instructions
+[here](https://docs.bazel.build/versions/4.2.1/install-windows.html). Make sure
+to include `bazel` binary in your `PATH`.
+
+Note that Bazel requires symlink support to function properly. Enable symlink
+support by enabling
+[Developer Mode](https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development),
+or by running Bazel as an administrator
+([reference](https://docs.bazel.build/versions/main/windows.html#enable-symlink-support)).
+
+### Building and testing locally
+
+The below instructions are based on the
+[Build on Windows](https://bazel.build/configure/windows#using)
+section of the Bazel documentation.
+
+#### With Build Tools for Visual Studio 2019
+
+Download and install Build Tools for Visual Studio 2019 using this
+[link](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019).
+Select "Desktop development with C++" on the installation wizard, and leave all
+other items unchanged.
+
+Open `cmd.exe` and `cd` into your SkCMS repository checkout. Set the `BAZEL_VC`
+environment variable to point to your Build Tools for Visual Studio 2019
+installation:
+
+```
+> set BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC
+```
+
+Then run:
+
+```
+> bazel build //...
+
+> bazel test //... --enable_runfiles
+```
+
+Reference
+[here](https://docs.bazel.build/versions/main/windows.html#build-c-with-msvc).
+
+TODO(lovisolo): Consider adding `--enable_runfiles` to `//.bazelrc`.
+
+#### With Clang
+
+In order to build with Clang, you have to install **both** LLVM and Build Tools
+for Visual Studio 2019 (rationale
+[here](https://docs.bazel.build/versions/main/windows.html#build-c-with-clang)).
+Please install the latter by following the above instructions before proceeding.
+
+Download and install LLVM from this
+[link](https://github.com/llvm/llvm-project/releases/tag/llvmorg-12.0.1).
+
+Open `cmd.exe` and `cd` into your SkCMS repository checkout, then run:
+
+```
+> bazel build //... --compiler=clang-cl
+
+> bazel test //... --compiler=clang-cl --enable_runfiles
+```
+
+If the above commands fail because Bazel cannot find your LLVM installation, set
+the `BAZEL_LLVM` environment variable to point to your LLVM installation:
+
+```
+> set BAZEL_LLVM=C:\Program Files\LLVM
+```
+
+Reference
+[here](https://docs.bazel.build/versions/main/windows.html#build-c-with-clang).
+
+TODO(lovisolo): Investigate adding a platform target to the top-level
+`BUILD.bazel` file as per the instructions
+[here](https://docs.bazel.build/versions/main/windows.html#build-c-with-clang).
+
+### Building and testing on RBE
+
+TODO(lovisolo)
+
+## RBE Credentials
+
+```
+gcloud auth application-default login
+```
+
+Settings in .bazelrc should look to use those default Google cloud credentials.
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/BUILD.bazel b/third-party/libjxl/libjxl/third_party/skcms/BUILD.bazel
new file mode 100644
index 0000000000..8fb6c0fbf4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/BUILD.bazel
@@ -0,0 +1,103 @@
+load("@io_bazel_rules_docker//container:container.bzl", "container_push")
+load("@io_bazel_rules_docker//docker/util:run.bzl", "container_run_and_commit")
+
+##################################
+# Linux RBE toolchain container. #
+##################################
+
+# Generates a Linux RBE toolchain container image for SkCMS.
+#
+# This container can be pushed to GCR via the //:push_rbe_container_skcms_linux rule.
+#
+# To debug this image:
+#
+#     # Build the container image.
+#     $ bazel build //:rbe_container_skcms_linux
+#
+#     # Load the container.
+#     $ docker load -i bazel-bin/rbe_container_skcms_linux_commit.tar
+#     Loaded image: bazel-bin/default:rbe_container_skcms_linux
+#
+#     # Run the container.
+#     $ docker run -it bazel/default:rbe_container_skcms_linux /bin/bash
+container_run_and_commit(
+    name = "rbe_container_skcms_linux",
+    commands = [
+        # Install the packages needed to build SkCMS.
+        "apt-get update",
+        "apt-get install -y clang"
+    ],
+    image = "@ubuntu1804//image",
+    tags = [
+        "manual",  # Exclude it from wildcard queries, e.g. "bazel build //...".
+        "no-remote",
+    ],
+)
+
+# This target can be used to upload the custom RBE container toolchain to GCR. It will be available
+# as gcr.io/skia-public/rbe-container-skcms-linux.
+#
+# Note: this can take several minutes to finish because it will upload a >3GB .tar file to GCR.
+container_push(
+    name = "push_rbe_container_skcms_linux",
+    format = "Docker",
+    image = ":rbe_container_skcms_linux_commit.tar",  # Generated by //:rbe_container_skcms_linux.
+    registry = "gcr.io",
+    repository = "skia-public/rbe-container-skcms-linux",
+    tag = "{STABLE_DOCKER_TAG}",
+    tags = [
+        "manual",  # Exclude it from wildcard queries, e.g. "bazel build //...".
+        "no-remote",  # We cannot build containers on RBE.
+    ],
+)
+
+#########
+# SkCMS #
+#########
+
+cc_library(
+    name = "skcms",
+    srcs = [
+        "skcms.cc",
+        "skcms_internal.h",
+        "src/Transform_inl.h",
+    ],
+    hdrs = ["skcms.h"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "test_only",
+    testonly = True,
+    srcs = ["test_only.c"],
+    hdrs = ["test_only.h"],
+    deps = [":skcms"],
+)
+
+cc_test(
+    name = "tests",
+    size = "small",
+    srcs = ["tests.c"],
+    data = glob(["profiles/**"]),
+    deps = [
+        ":skcms",
+        ":test_only",
+    ],
+)
+
+cc_binary(
+    name = "iccdump",
+    testonly = True,
+    srcs = ["iccdump.c"],
+    linkopts = ["-ldl"],
+    deps = [
+        ":skcms",
+        ":test_only",
+    ],
+)
+
+cc_binary(
+    name = "bench",
+    srcs = ["bench.c"],
+    deps = [":skcms"],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/LICENSE b/third-party/libjxl/libjxl/third_party/skcms/LICENSE
new file mode 100644
index 0000000000..6c7c5be360
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/LICENSE
@@ -0,0 +1,29 @@
+// Copyright (c) 2018 Google Inc. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
diff --git a/third-party/libjxl/libjxl/third_party/skcms/OWNERS b/third-party/libjxl/libjxl/third_party/skcms/OWNERS
new file mode 100644
index 0000000000..72e8ffc0db
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/OWNERS
@@ -0,0 +1 @@
+*
diff --git a/third-party/libjxl/libjxl/third_party/skcms/README.chromium b/third-party/libjxl/libjxl/third_party/skcms/README.chromium
new file mode 100644
index 0000000000..046f6b1d19
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/README.chromium
@@ -0,0 +1,5 @@
+Name: skcms
+URL: https://skia.org/
+Version: unknown
+Security Critical: yes
+License: BSD
diff --git a/third-party/libjxl/libjxl/third_party/skcms/WORKSPACE.bazel b/third-party/libjxl/libjxl/third_party/skcms/WORKSPACE.bazel
new file mode 100644
index 0000000000..19945c083c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/WORKSPACE.bazel
@@ -0,0 +1,57 @@
+###############################################
+# Local repositories with RBE configurations. #
+###############################################
+
+local_repository(
+  name = "rbe_linux_toolchains",
+  path = "bazel/rbe/linux-bazel-4.2.1",
+)
+
+local_repository(
+  name = "rbe_windows_toolchains",
+  path = "bazel/rbe/windows-bazel-4.2.1",
+)
+
+############
+# Android. #
+############
+
+load("//toolchain:download_toolchains.bzl", "download_toolchains_for_skcms")
+
+download_toolchains_for_skcms("clang_linux_amd64", "ndk_linux_amd64")
+
+##################################
+# Docker rules and dependencies. #
+##################################
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+    name = "io_bazel_rules_docker",
+    sha256 = "1f4e59843b61981a96835dc4ac377ad4da9f8c334ebe5e0bb3f58f80c09735f4",
+    strip_prefix = "rules_docker-0.19.0",
+    urls = ["https://github.com/bazelbuild/rules_docker/releases/download/v0.19.0/rules_docker-v0.19.0.tar.gz"],
+)
+
+load(
+    "@io_bazel_rules_docker//repositories:repositories.bzl",
+    container_repositories = "repositories",
+)
+container_repositories()
+
+load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps")
+
+container_deps()
+
+load(
+    "@io_bazel_rules_docker//container:container.bzl",
+    "container_pull",
+)
+
+# Pulls the Docker image used as the base for SkCMS's Linux RBE toolchain container image.
+container_pull(
+    name = "ubuntu1804",
+    digest = "sha256:e006d8c083684299f1726b47361bfe5acfa0638a226e98b957681a2d135fbd40",
+    registry = "gcr.io",
+    repository = "cloud-marketplace/google/ubuntu1804",
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/platform/BUILD.bazel b/third-party/libjxl/libjxl/third_party/skcms/bazel/platform/BUILD.bazel
new file mode 100644
index 0000000000..aacf1bd05f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/platform/BUILD.bazel
@@ -0,0 +1,46 @@
+# https://bazel.build/concepts/platforms-intro
+# https://bazel.build/docs/platforms
+platform(
+    name = "android_arm32",
+    constraint_values = [
+        "@platforms//os:android",  # https://github.com/bazelbuild/platforms/blob/main/os/BUILD
+        "@platforms//cpu:armv7",  # https://github.com/bazelbuild/platforms/blob/main/cpu/BUILD
+    ],
+)
+
+platform(
+    name = "android_arm64",
+    constraint_values = [
+        "@platforms//os:android",
+        "@platforms//cpu:arm64",
+    ],
+)
+
+platform(
+    name = "linux_x64_hermetic",
+    constraint_values = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+        ":use_hermetic_toolchain",
+    ],
+)
+
+platform(
+    name = "host_with_hermetic_toolchain",
+    constraint_values = [
+        ":use_hermetic_toolchain",
+    ],
+    parents = ["@local_config_platform//:host"],
+)
+
+# This constraint allows us to force Bazel to resolve our hermetic toolchain to build
+# the target and not a default one (e.g. on the Linux RBE instance). We do this by
+# adding the constraint to our platforms that describe the target we want Bazel to build for.
+# https://bazel.build/reference/be/platform#constraint_setting
+constraint_setting(name = "skcms_hermetic_toolchain")
+
+constraint_value(
+    name = "use_hermetic_toolchain",
+    constraint_setting = ":skcms_hermetic_toolchain",
+    visibility = ["//visibility:public"],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/README.md b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/README.md
new file mode 100644
index 0000000000..25fe0ff1ec
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/README.md
@@ -0,0 +1,150 @@
+# Bazel toolchain configurations for RBE
+
+This directory contains Bazel toolchain configurations for RBE grouped by operating system.
+
+It might be necessary to rebuild these configurations when the RBE toolchain container image
+for a specific operating system is updated, or when a new Bazel version is released.
+
+## Linux
+
+Directories `linux-bazel-<BAZEL VERSION>` contain Bazel toolchain configurations for Linux RBE
+builds. Multiple such directories may exist to ease migrating from one Bazel version to the next.
+
+### Toolchain configuration regeneration instructions
+
+The Linux RBE toolchain configuration must be regenerated whenever the Linux RBE toolchain
+container image changes, or when upgrading to a new Bazel version.
+
+#### Step 1
+
+Clone the [bazel-toolchains](https://github.com/bazelbuild/bazel-toolchains) repository, build the
+`rbe_configs_gen` binary, and put it in your `$PATH`:
+
+```
+$ git clone https://github.com/bazelbuild/bazel-toolchains
+
+$ cd bazel-toolchains
+
+# This assumes that $HOME/bin is in your $PATH.
+$ go build -o $HOME/bin/rbe_configs_gen ./cmd/rbe_configs_gen/rbe_configs_gen.go
+```
+
+#### Step 2
+
+Generate a new `//bazel/rbe/linux-bazel-<BAZEL VERSION>` directory with the
+`rbe_configs_gen` CLI tool:
+
+```
+# Replace the <PLACEHOLDERS> as needed.
+$ rbe_configs_gen \
+      --bazel_version=<BAZEL VERSION> \
+      --toolchain_container=gcr.io/skia-public/rbe-container-skcms-linux@sha256:<HASH OF MOST RECENT IMAGE> \
+      --output_src_root=<PATH TO REPOSITORY CHECKOUT> \
+      --output_config_path=bazel/rbe/linux-bazel-<BAZEL VERSION> \
+      --generate_java_configs=false \
+      --exec_os=linux \
+      --target_os=linux
+```
+
+If `rbe_configs_gen` fails, try deleting all files under
+`//bazel/rbe/linux-bazel-<BAZEL VERSION>` (if it exists) and re-run `rbe_configs_gen`.
+
+#### Step 3
+
+Add an empty `//bazel/rbe/linux-bazel-<BAZEL VERSION>/WORKSPACE` file.
+
+#### Step 4
+
+Open file `//bazel/rbe/linux-bazel-<BAZEL VERSION>/config/BUILD`, look for the `toolchain`
+rule named `cc-toolchain`, and change the `toolchain` attribute as follows:
+
+```
+# Before.
+toolchain(
+    name = "cc-toolchain",
+    ...
+    toolchain = "//bazel/rbe/linux-bazel-4.2.1/cc:cc-compiler-k8",
+    ...
+)
+
+# After.
+toolchain(
+    name = "cc-toolchain",
+    ...
+    toolchain = "//cc:cc-compiler-k8",
+    ...
+)
+```
+
+#### Step 5
+
+Open file `//bazel/rbe/linux-bazel-<BAZEL VERSION>/config/BUILD`, look for the `platform`
+rule named `platform`, and make change the `exec_properties` attribute as follows:
+
+```
+# Before.
+platform(
+    name = "platform",
+    ...
+    exec_properties = {
+        ...
+    },
+    ...
+)
+
+# After.
+platform(
+    name = "platform",
+    ...
+    exec_properties = {
+        ...
+        "dockerAddCapabilities": "SYS_PTRACE",
+    },
+    ...
+)
+```
+
+This is necessary to run tests on Linux RBE with `--config=asan` (details
+[here](https://github.com/google/sanitizers/issues/916)).
+
+(As an alternative, we could disable memory leak detection by setting the environment variable
+`ASAN_OPTIONS=detect_leaks=1` via the `--action_env` Bazel flag.)
+
+#### Step 6
+
+Update the paths in `//WORKSPACE` as needed.
+
+## Windows
+
+Directories `windows-bazel-<BAZEL VERSION>` contain Bazel toolchain configurations for Windows RBE
+builds. Multiple such directories may exist to ease migrating from one Bazel version to the next.
+
+### Toolchain configuration regeneration instructions
+
+The Windows RBE toolchain configuration must be regenerated whenever the Windows RBE toolchain
+container image changes, or when upgrading to a new Bazel version.
+
+The instructions to regenerate the Windows RBE toolchain are the same as for the Linux RBE
+toolchain, with the following differences:
+
+- Any paths should be changed as needed.
+
+- In Step 2, run the following command instead:
+
+```
+# Replace the <PLACEHOLDERS> as needed.
+$ rbe_configs_gen.exe \
+      --bazel_version=<BAZEL VERSION> \
+      --toolchain_container=gcr.io/skia-public/rbe-container-skia-windows@sha256:<HASH OF MOST RECENT IMAGE> \
+      --output_src_root=<PATH TO REPOSITORY CHECKOUT> \
+      --output_config_path=bazel/rbe/windows-bazel-<BAZEL VERSION> \
+      --generate_java_configs=false \
+      --exec_os=windows \
+      --target_os=windows
+```
+
+- Step 5 can be omitted.
+
+## macOS
+
+TODO(lovisolo)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/LICENSE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/LICENSE
new file mode 100755
index 0000000000..f0a1f59a3c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 Google LLC
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/WORKSPACE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/WORKSPACE
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/BUILD b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/BUILD
new file mode 100644
index 0000000000..5afa0ad6bd
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/BUILD
@@ -0,0 +1,152 @@
+# Copyright 2016 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This becomes the BUILD file for @local_config_cc// under non-BSD unixes.
+
+package(default_visibility = ["//visibility:public"])
+
+load(":cc_toolchain_config.bzl", "cc_toolchain_config")
+load(":armeabi_cc_toolchain_config.bzl", "armeabi_cc_toolchain_config")
+load("@rules_cc//cc:defs.bzl", "cc_toolchain", "cc_toolchain_suite")
+
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "malloc",
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
+
+filegroup(
+    name = "cc_wrapper",
+    srcs = ["cc_wrapper.sh"],
+)
+
+filegroup(
+    name = "compiler_deps",
+    srcs = glob(["extra_tools/**"], allow_empty = True) + [":builtin_include_directory_paths"],
+)
+
+# This is the entry point for --crosstool_top.  Toolchains are found
+# by lopping off the name of --crosstool_top and searching for
+# the "${CPU}" entry in the toolchains attribute.
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "k8|clang": ":cc-compiler-k8",
+        "k8": ":cc-compiler-k8",
+        "armeabi-v7a|compiler": ":cc-compiler-armeabi-v7a",
+        "armeabi-v7a": ":cc-compiler-armeabi-v7a",
+    },
+)
+
+cc_toolchain(
+    name = "cc-compiler-k8",
+    toolchain_identifier = "linux_gnu_x86",
+    toolchain_config = ":linux_gnu_x86",
+    all_files = ":compiler_deps",
+    ar_files = ":compiler_deps",
+    as_files = ":compiler_deps",
+    compiler_files = ":compiler_deps",
+    dwp_files = ":empty",
+    linker_files = ":compiler_deps",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+    module_map = ":module.modulemap",
+)
+
+cc_toolchain_config(
+    name = "linux_gnu_x86",
+    cpu = "k8",
+    compiler = "clang",
+    toolchain_identifier = "linux_gnu_x86",
+    host_system_name = "i686-unknown-linux-gnu",
+    target_system_name = "x86_64-unknown-linux-gnu",
+    target_libc = "glibc_2.19",
+    abi_version = "clang",
+    abi_libc_version = "glibc_2.19",
+    cxx_builtin_include_directories = ["/usr/local/include",
+    "/usr/lib/llvm-6.0/lib/clang/6.0.0/include",
+    "/usr/include/x86_64-linux-gnu",
+    "/usr/include",
+    "/usr/lib/llvm-6.0/lib/clang/6.0.0/share",
+    "/usr/include/c++/7.5.0",
+    "/usr/include/x86_64-linux-gnu/c++/7.5.0",
+    "/usr/include/c++/7.5.0/backward",
+    "/usr/include/clang/6.0.0/include",
+    "/usr/lib/clang/6.0.0/include"],
+    tool_paths = {"ar": "/usr/bin/ar",
+        "ld": "/usr/bin/ld",
+        "llvm-cov": "None",
+        "cpp": "/usr/bin/cpp",
+        "gcc": "/usr/bin/clang",
+        "dwp": "/usr/bin/dwp",
+        "gcov": "None",
+        "nm": "/usr/bin/nm",
+        "objcopy": "/usr/bin/objcopy",
+        "objdump": "/usr/bin/objdump",
+        "strip": "/usr/bin/strip"},
+    compile_flags = ["-U_FORTIFY_SOURCE",
+    "-fstack-protector",
+    "-Wall",
+    "-Wthread-safety",
+    "-Wself-assign",
+    "-fcolor-diagnostics",
+    "-fno-omit-frame-pointer"],
+    opt_compile_flags = ["-g0",
+    "-O2",
+    "-D_FORTIFY_SOURCE=1",
+    "-DNDEBUG",
+    "-ffunction-sections",
+    "-fdata-sections"],
+    dbg_compile_flags = ["-g"],
+    cxx_flags = ["-std=c++0x"],
+    link_flags = ["-fuse-ld=/usr/bin/ld.gold",
+    "-Wl,-no-as-needed",
+    "-Wl,-z,relro,-z,now",
+    "-B/usr/bin"],
+    link_libs = ["-lstdc++",
+    "-lm"],
+    opt_link_flags = ["-Wl,--gc-sections"],
+    unfiltered_compile_flags = ["-no-canonical-prefixes",
+    "-Wno-builtin-macro-redefined",
+    "-D__DATE__=\"redacted\"",
+    "-D__TIMESTAMP__=\"redacted\"",
+    "-D__TIME__=\"redacted\""],
+    coverage_compile_flags = ["--coverage"],
+    coverage_link_flags = ["--coverage"],
+    supports_start_end_lib = True,
+)
+
+# Android tooling requires a default toolchain for the armeabi-v7a cpu.
+cc_toolchain(
+    name = "cc-compiler-armeabi-v7a",
+    toolchain_identifier = "stub_armeabi-v7a",
+    toolchain_config = ":stub_armeabi-v7a",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":empty",
+    compiler_files = ":empty",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+armeabi_cc_toolchain_config(name = "stub_armeabi-v7a")
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/WORKSPACE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/WORKSPACE
new file mode 100644
index 0000000000..bc05b4c36f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/WORKSPACE
@@ -0,0 +1,2 @@
+# DO NOT EDIT: automatically generated WORKSPACE file for cc_autoconf rule
+workspace(name = "local_config_cc")
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl
new file mode 100644
index 0000000000..94e0720bf6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl
@@ -0,0 +1,82 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "feature",
+    "tool_path",
+)
+
+def _impl(ctx):
+    toolchain_identifier = "stub_armeabi-v7a"
+    host_system_name = "armeabi-v7a"
+    target_system_name = "armeabi-v7a"
+    target_cpu = "armeabi-v7a"
+    target_libc = "armeabi-v7a"
+    compiler = "compiler"
+    abi_version = "armeabi-v7a"
+    abi_libc_version = "armeabi-v7a"
+    cc_target_os = None
+    builtin_sysroot = None
+    action_configs = []
+
+    supports_pic_feature = feature(name = "supports_pic", enabled = True)
+    supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+    features = [supports_dynamic_linker_feature, supports_pic_feature]
+
+    cxx_builtin_include_directories = []
+    artifact_name_patterns = []
+    make_variables = []
+
+    tool_paths = [
+        tool_path(name = "ar", path = "/bin/false"),
+        tool_path(name = "compat-ld", path = "/bin/false"),
+        tool_path(name = "cpp", path = "/bin/false"),
+        tool_path(name = "dwp", path = "/bin/false"),
+        tool_path(name = "gcc", path = "/bin/false"),
+        tool_path(name = "gcov", path = "/bin/false"),
+        tool_path(name = "ld", path = "/bin/false"),
+        tool_path(name = "nm", path = "/bin/false"),
+        tool_path(name = "objcopy", path = "/bin/false"),
+        tool_path(name = "objdump", path = "/bin/false"),
+        tool_path(name = "strip", path = "/bin/false"),
+    ]
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        artifact_name_patterns = artifact_name_patterns,
+        cxx_builtin_include_directories = cxx_builtin_include_directories,
+        toolchain_identifier = toolchain_identifier,
+        host_system_name = host_system_name,
+        target_system_name = target_system_name,
+        target_cpu = target_cpu,
+        target_libc = target_libc,
+        compiler = compiler,
+        abi_version = abi_version,
+        abi_libc_version = abi_libc_version,
+        tool_paths = tool_paths,
+        make_variables = make_variables,
+        builtin_sysroot = builtin_sysroot,
+        cc_target_os = cc_target_os,
+    )
+
+armeabi_cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {},
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/builtin_include_directory_paths b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/builtin_include_directory_paths
new file mode 100644
index 0000000000..8602eb8caa
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/builtin_include_directory_paths
@@ -0,0 +1,16 @@
+This file is generated by cc_configure and contains builtin include directories
+that /usr/bin/clang reported. This file is a dependency of every compilation action and
+changes to it will be reflected in the action cache key. When some of these
+paths change, Bazel will make sure to rerun the action, even though none of
+declared action inputs or the action commandline changes.
+
+/usr/local/include
+/usr/lib/llvm-6.0/lib/clang/6.0.0/include
+/usr/include/x86_64-linux-gnu
+/usr/include
+/usr/lib/llvm-6.0/lib/clang/6.0.0/share
+/usr/include/c++/7.5.0
+/usr/include/x86_64-linux-gnu/c++/7.5.0
+/usr/include/c++/7.5.0/backward
+/usr/include/clang/6.0.0/include
+/usr/lib/clang/6.0.0/include
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_toolchain_config.bzl
new file mode 100644
index 0000000000..5dbaa86ab2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_toolchain_config.bzl
@@ -0,0 +1,1272 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "action_config",
+    "feature",
+    "feature_set",
+    "flag_group",
+    "flag_set",
+    "tool",
+    "tool_path",
+    "variable_with_value",
+    "with_feature_set",
+)
+load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")
+
+def layering_check_features(compiler):
+    if compiler != "clang":
+        return []
+    return [
+        feature(
+            name = "use_module_maps",
+            requires = [feature_set(features = ["module_maps"])],
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = [
+                                "-fmodule-name=%{module_name}",
+                                "-fmodule-map-file=%{module_map_file}",
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        ),
+
+        # Tell blaze we support module maps in general, so they will be generated
+        # for all c/c++ rules.
+        # Note: not all C++ rules support module maps; thus, do not imply this
+        # feature from other features - instead, require it.
+        feature(name = "module_maps", enabled = True),
+        feature(
+            name = "layering_check",
+            implies = ["use_module_maps"],
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(flags = [
+                            "-fmodules-strict-decluse",
+                            "-Wprivate-header",
+                        ]),
+                        flag_group(
+                            iterate_over = "dependent_module_map_files",
+                            flags = [
+                                "-fmodule-map-file=%{dependent_module_map_files}",
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        ),
+    ]
+
+all_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+    ACTION_NAMES.lto_backend,
+]
+
+all_cpp_compile_actions = [
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+]
+
+preprocessor_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.clif_match,
+]
+
+codegen_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.lto_backend,
+]
+
+all_link_actions = [
+    ACTION_NAMES.cpp_link_executable,
+    ACTION_NAMES.cpp_link_dynamic_library,
+    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+]
+
+lto_index_actions = [
+    ACTION_NAMES.lto_index_for_executable,
+    ACTION_NAMES.lto_index_for_dynamic_library,
+    ACTION_NAMES.lto_index_for_nodeps_dynamic_library,
+]
+
+def _impl(ctx):
+    tool_paths = [
+        tool_path(name = name, path = path)
+        for name, path in ctx.attr.tool_paths.items()
+    ]
+    action_configs = []
+
+    llvm_cov_action = action_config(
+        action_name = ACTION_NAMES.llvm_cov,
+        tools = [
+            tool(
+                path = ctx.attr.tool_paths["llvm-cov"],
+            ),
+        ],
+    )
+
+    action_configs.append(llvm_cov_action)
+
+    supports_pic_feature = feature(
+        name = "supports_pic",
+        enabled = True,
+    )
+    supports_start_end_lib_feature = feature(
+        name = "supports_start_end_lib",
+        enabled = True,
+    )
+
+    default_compile_flags_feature = feature(
+        name = "default_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = all_compile_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.compile_flags,
+                    ),
+                ] if ctx.attr.compile_flags else []),
+            ),
+            flag_set(
+                actions = all_compile_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.dbg_compile_flags,
+                    ),
+                ] if ctx.attr.dbg_compile_flags else []),
+                with_features = [with_feature_set(features = ["dbg"])],
+            ),
+            flag_set(
+                actions = all_compile_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.opt_compile_flags,
+                    ),
+                ] if ctx.attr.opt_compile_flags else []),
+                with_features = [with_feature_set(features = ["opt"])],
+            ),
+            flag_set(
+                actions = all_cpp_compile_actions + [ACTION_NAMES.lto_backend],
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.cxx_flags,
+                    ),
+                ] if ctx.attr.cxx_flags else []),
+            ),
+        ],
+    )
+
+    default_link_flags_feature = feature(
+        name = "default_link_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.link_flags,
+                    ),
+                ] if ctx.attr.link_flags else []),
+            ),
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.opt_link_flags,
+                    ),
+                ] if ctx.attr.opt_link_flags else []),
+                with_features = [with_feature_set(features = ["opt"])],
+            ),
+        ],
+    )
+
+    dbg_feature = feature(name = "dbg")
+
+    opt_feature = feature(name = "opt")
+
+    sysroot_feature = feature(
+        name = "sysroot",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.linkstamp_compile,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.cpp_module_codegen,
+                    ACTION_NAMES.lto_backend,
+                    ACTION_NAMES.clif_match,
+                ] + all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["--sysroot=%{sysroot}"],
+                        expand_if_available = "sysroot",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    fdo_optimize_feature = feature(
+        name = "fdo_optimize",
+        flag_sets = [
+            flag_set(
+                actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fprofile-use=%{fdo_profile_path}",
+                            "-fprofile-correction",
+                        ],
+                        expand_if_available = "fdo_profile_path",
+                    ),
+                ],
+            ),
+        ],
+        provides = ["profile"],
+    )
+
+    supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+
+    user_compile_flags_feature = feature(
+        name = "user_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = all_compile_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["%{user_compile_flags}"],
+                        iterate_over = "user_compile_flags",
+                        expand_if_available = "user_compile_flags",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    unfiltered_compile_flags_feature = feature(
+        name = "unfiltered_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = all_compile_actions,
+                flag_groups = ([
+                    flag_group(
+                        flags = ctx.attr.unfiltered_compile_flags,
+                    ),
+                ] if ctx.attr.unfiltered_compile_flags else []),
+            ),
+        ],
+    )
+
+    library_search_directories_feature = feature(
+        name = "library_search_directories",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["-L%{library_search_directories}"],
+                        iterate_over = "library_search_directories",
+                        expand_if_available = "library_search_directories",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    static_libgcc_feature = feature(
+        name = "static_libgcc",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.cpp_link_executable,
+                    ACTION_NAMES.cpp_link_dynamic_library,
+                    ACTION_NAMES.lto_index_for_executable,
+                    ACTION_NAMES.lto_index_for_dynamic_library,
+                ],
+                flag_groups = [flag_group(flags = ["-static-libgcc"])],
+                with_features = [
+                    with_feature_set(features = ["static_link_cpp_runtimes"]),
+                ],
+            ),
+        ],
+    )
+
+    pic_feature = feature(
+        name = "pic",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.assemble,
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.linkstamp_compile,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_codegen,
+                    ACTION_NAMES.cpp_module_compile,
+                ],
+                flag_groups = [
+                    flag_group(flags = ["-fPIC"], expand_if_available = "pic"),
+                ],
+            ),
+        ],
+    )
+
+    per_object_debug_info_feature = feature(
+        name = "per_object_debug_info",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.assemble,
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_codegen,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-gsplit-dwarf"],
+                        expand_if_available = "per_object_debug_info_file",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    preprocessor_defines_feature = feature(
+        name = "preprocessor_defines",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.linkstamp_compile,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.clif_match,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-D%{preprocessor_defines}"],
+                        iterate_over = "preprocessor_defines",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    cs_fdo_optimize_feature = feature(
+        name = "cs_fdo_optimize",
+        flag_sets = [
+            flag_set(
+                actions = [ACTION_NAMES.lto_backend],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fprofile-use=%{fdo_profile_path}",
+                            "-Wno-profile-instr-unprofiled",
+                            "-Wno-profile-instr-out-of-date",
+                            "-fprofile-correction",
+                        ],
+                        expand_if_available = "fdo_profile_path",
+                    ),
+                ],
+            ),
+        ],
+        provides = ["csprofile"],
+    )
+
+    autofdo_feature = feature(
+        name = "autofdo",
+        flag_sets = [
+            flag_set(
+                actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fauto-profile=%{fdo_profile_path}",
+                            "-fprofile-correction",
+                        ],
+                        expand_if_available = "fdo_profile_path",
+                    ),
+                ],
+            ),
+        ],
+        provides = ["profile"],
+    )
+
+    runtime_library_search_directories_feature = feature(
+        name = "runtime_library_search_directories",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        iterate_over = "runtime_library_search_directories",
+                        flag_groups = [
+                            flag_group(
+                                flags = [
+                                    "-Wl,-rpath,$EXEC_ORIGIN/%{runtime_library_search_directories}",
+                                ],
+                                expand_if_true = "is_cc_test",
+                            ),
+                            flag_group(
+                                flags = [
+                                    "-Wl,-rpath,$ORIGIN/%{runtime_library_search_directories}",
+                                ],
+                                expand_if_false = "is_cc_test",
+                            ),
+                        ],
+                        expand_if_available =
+                            "runtime_library_search_directories",
+                    ),
+                ],
+                with_features = [
+                    with_feature_set(features = ["static_link_cpp_runtimes"]),
+                ],
+            ),
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        iterate_over = "runtime_library_search_directories",
+                        flag_groups = [
+                            flag_group(
+                                flags = [
+                                    "-Wl,-rpath,$ORIGIN/%{runtime_library_search_directories}",
+                                ],
+                            ),
+                        ],
+                        expand_if_available =
+                            "runtime_library_search_directories",
+                    ),
+                ],
+                with_features = [
+                    with_feature_set(
+                        not_features = ["static_link_cpp_runtimes"],
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    fission_support_feature = feature(
+        name = "fission_support",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["-Wl,--gdb-index"],
+                        expand_if_available = "is_using_fission",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    shared_flag_feature = feature(
+        name = "shared_flag",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.cpp_link_dynamic_library,
+                    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ACTION_NAMES.lto_index_for_dynamic_library,
+                    ACTION_NAMES.lto_index_for_nodeps_dynamic_library,
+                ],
+                flag_groups = [flag_group(flags = ["-shared"])],
+            ),
+        ],
+    )
+
+    random_seed_feature = feature(
+        name = "random_seed",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_codegen,
+                    ACTION_NAMES.cpp_module_compile,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-frandom-seed=%{output_file}"],
+                        expand_if_available = "output_file",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    includes_feature = feature(
+        name = "includes",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.linkstamp_compile,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.clif_match,
+                    ACTION_NAMES.objc_compile,
+                    ACTION_NAMES.objcpp_compile,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-include", "%{includes}"],
+                        iterate_over = "includes",
+                        expand_if_available = "includes",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    fdo_instrument_feature = feature(
+        name = "fdo_instrument",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                ] + all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fprofile-generate=%{fdo_instrument_path}",
+                            "-fno-data-sections",
+                        ],
+                        expand_if_available = "fdo_instrument_path",
+                    ),
+                ],
+            ),
+        ],
+        provides = ["profile"],
+    )
+
+    cs_fdo_instrument_feature = feature(
+        name = "cs_fdo_instrument",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.lto_backend,
+                ] + all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fcs-profile-generate=%{cs_fdo_instrument_path}",
+                        ],
+                        expand_if_available = "cs_fdo_instrument_path",
+                    ),
+                ],
+            ),
+        ],
+        provides = ["csprofile"],
+    )
+
+    include_paths_feature = feature(
+        name = "include_paths",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.linkstamp_compile,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.clif_match,
+                    ACTION_NAMES.objc_compile,
+                    ACTION_NAMES.objcpp_compile,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-iquote", "%{quote_include_paths}"],
+                        iterate_over = "quote_include_paths",
+                    ),
+                    flag_group(
+                        flags = ["-I%{include_paths}"],
+                        iterate_over = "include_paths",
+                    ),
+                    flag_group(
+                        flags = ["-isystem", "%{system_include_paths}"],
+                        iterate_over = "system_include_paths",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    symbol_counts_feature = feature(
+        name = "symbol_counts",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-Wl,--print-symbol-counts=%{symbol_counts_output}",
+                        ],
+                        expand_if_available = "symbol_counts_output",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    llvm_coverage_map_format_feature = feature(
+        name = "llvm_coverage_map_format",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.objc_compile,
+                    ACTION_NAMES.objcpp_compile,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-fprofile-instr-generate",
+                            "-fcoverage-mapping",
+                        ],
+                    ),
+                ],
+            ),
+            flag_set(
+                actions = all_link_actions + lto_index_actions + [
+                    "objc-executable",
+                    "objc++-executable",
+                ],
+                flag_groups = [
+                    flag_group(flags = ["-fprofile-instr-generate"]),
+                ],
+            ),
+        ],
+        requires = [feature_set(features = ["coverage"])],
+        provides = ["profile"],
+    )
+
+    strip_debug_symbols_feature = feature(
+        name = "strip_debug_symbols",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["-Wl,-S"],
+                        expand_if_available = "strip_debug_symbols",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    build_interface_libraries_feature = feature(
+        name = "build_interface_libraries",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.cpp_link_dynamic_library,
+                    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ACTION_NAMES.lto_index_for_dynamic_library,
+                    ACTION_NAMES.lto_index_for_nodeps_dynamic_library,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "%{generate_interface_library}",
+                            "%{interface_library_builder_path}",
+                            "%{interface_library_input_path}",
+                            "%{interface_library_output_path}",
+                        ],
+                        expand_if_available = "generate_interface_library",
+                    ),
+                ],
+                with_features = [
+                    with_feature_set(
+                        features = ["supports_interface_shared_libraries"],
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    libraries_to_link_feature = feature(
+        name = "libraries_to_link",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        iterate_over = "libraries_to_link",
+                        flag_groups = [
+                            flag_group(
+                                flags = ["-Wl,--start-lib"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file_group",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["-Wl,-whole-archive"],
+                                expand_if_true =
+                                    "libraries_to_link.is_whole_archive",
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.object_files}"],
+                                iterate_over = "libraries_to_link.object_files",
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file_group",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "interface_library",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "static_library",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["-l%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "dynamic_library",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["-l:%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "versioned_dynamic_library",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["-Wl,-no-whole-archive"],
+                                expand_if_true = "libraries_to_link.is_whole_archive",
+                            ),
+                            flag_group(
+                                flags = ["-Wl,--end-lib"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file_group",
+                                ),
+                            ),
+                        ],
+                        expand_if_available = "libraries_to_link",
+                    ),
+                    flag_group(
+                        flags = ["-Wl,@%{thinlto_param_file}"],
+                        expand_if_true = "thinlto_param_file",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    user_link_flags_feature = feature(
+        name = "user_link_flags",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["%{user_link_flags}"],
+                        iterate_over = "user_link_flags",
+                        expand_if_available = "user_link_flags",
+                    ),
+                ] + ([flag_group(flags = ctx.attr.link_libs)] if ctx.attr.link_libs else []),
+            ),
+        ],
+    )
+
+    fdo_prefetch_hints_feature = feature(
+        name = "fdo_prefetch_hints",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.lto_backend,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = [
+                            "-mllvm",
+                            "-prefetch-hints-file=%{fdo_prefetch_hints_path}",
+                        ],
+                        expand_if_available = "fdo_prefetch_hints_path",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    linkstamps_feature = feature(
+        name = "linkstamps",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["%{linkstamp_paths}"],
+                        iterate_over = "linkstamp_paths",
+                        expand_if_available = "linkstamp_paths",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    gcc_coverage_map_format_feature = feature(
+        name = "gcc_coverage_map_format",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.objc_compile,
+                    ACTION_NAMES.objcpp_compile,
+                    "objc-executable",
+                    "objc++-executable",
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-fprofile-arcs", "-ftest-coverage"],
+                        expand_if_available = "gcov_gcno_file",
+                    ),
+                ],
+            ),
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [flag_group(flags = ["--coverage"])],
+            ),
+        ],
+        requires = [feature_set(features = ["coverage"])],
+        provides = ["profile"],
+    )
+
+    archiver_flags_feature = feature(
+        name = "archiver_flags",
+        flag_sets = [
+            flag_set(
+                actions = [ACTION_NAMES.cpp_link_static_library],
+                flag_groups = [
+                    flag_group(flags = ["rcsD"]),
+                    flag_group(
+                        flags = ["%{output_execpath}"],
+                        expand_if_available = "output_execpath",
+                    ),
+                ],
+            ),
+            flag_set(
+                actions = [ACTION_NAMES.cpp_link_static_library],
+                flag_groups = [
+                    flag_group(
+                        iterate_over = "libraries_to_link",
+                        flag_groups = [
+                            flag_group(
+                                flags = ["%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.object_files}"],
+                                iterate_over = "libraries_to_link.object_files",
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file_group",
+                                ),
+                            ),
+                        ],
+                        expand_if_available = "libraries_to_link",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    force_pic_flags_feature = feature(
+        name = "force_pic_flags",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.cpp_link_executable,
+                    ACTION_NAMES.lto_index_for_executable,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-pie"],
+                        expand_if_available = "force_pic",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    dependency_file_feature = feature(
+        name = "dependency_file",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.assemble,
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.objc_compile,
+                    ACTION_NAMES.objcpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.clif_match,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["-MD", "-MF", "%{dependency_file}"],
+                        expand_if_available = "dependency_file",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    dynamic_library_linker_tool_path = tool_paths
+    dynamic_library_linker_tool_feature = feature(
+        name = "dynamic_library_linker_tool",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.cpp_link_dynamic_library,
+                    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ACTION_NAMES.lto_index_for_dynamic_library,
+                    ACTION_NAMES.lto_index_for_nodeps_dynamic_library,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = [" + cppLinkDynamicLibraryToolPath + "],
+                        expand_if_available = "generate_interface_library",
+                    ),
+                ],
+                with_features = [
+                    with_feature_set(
+                        features = ["supports_interface_shared_libraries"],
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    output_execpath_flags_feature = feature(
+        name = "output_execpath_flags",
+        flag_sets = [
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["-o", "%{output_execpath}"],
+                        expand_if_available = "output_execpath",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    # Note that we also set --coverage for c++-link-nodeps-dynamic-library. The
+    # generated code contains references to gcov symbols, and the dynamic linker
+    # can't resolve them unless the library is linked against gcov.
+    coverage_feature = feature(
+        name = "coverage",
+        provides = ["profile"],
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                ],
+                flag_groups = ([
+                    flag_group(flags = ctx.attr.coverage_compile_flags),
+                ] if ctx.attr.coverage_compile_flags else []),
+            ),
+            flag_set(
+                actions = all_link_actions + lto_index_actions,
+                flag_groups = ([
+                    flag_group(flags = ctx.attr.coverage_link_flags),
+                ] if ctx.attr.coverage_link_flags else []),
+            ),
+        ],
+    )
+
+    thinlto_feature = feature(
+        name = "thin_lto",
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                ] + all_link_actions + lto_index_actions,
+                flag_groups = [
+                    flag_group(flags = ["-flto=thin"]),
+                    flag_group(
+                        expand_if_available = "lto_indexing_bitcode_file",
+                        flags = [
+                            "-Xclang",
+                            "-fthin-link-bitcode=%{lto_indexing_bitcode_file}",
+                        ],
+                    ),
+                ],
+            ),
+            flag_set(
+                actions = [ACTION_NAMES.linkstamp_compile],
+                flag_groups = [flag_group(flags = ["-DBUILD_LTO_TYPE=thin"])],
+            ),
+            flag_set(
+                actions = lto_index_actions,
+                flag_groups = [
+                    flag_group(flags = [
+                        "-flto=thin",
+                        "-Wl,-plugin-opt,thinlto-index-only%{thinlto_optional_params_file}",
+                        "-Wl,-plugin-opt,thinlto-emit-imports-files",
+                        "-Wl,-plugin-opt,thinlto-prefix-replace=%{thinlto_prefix_replace}",
+                    ]),
+                    flag_group(
+                        expand_if_available = "thinlto_object_suffix_replace",
+                        flags = [
+                            "-Wl,-plugin-opt,thinlto-object-suffix-replace=%{thinlto_object_suffix_replace}",
+                        ],
+                    ),
+                    flag_group(
+                        expand_if_available = "thinlto_merged_object_file",
+                        flags = [
+                            "-Wl,-plugin-opt,obj-path=%{thinlto_merged_object_file}",
+                        ],
+                    ),
+                ],
+            ),
+            flag_set(
+                actions = [ACTION_NAMES.lto_backend],
+                flag_groups = [
+                    flag_group(flags = [
+                        "-c",
+                        "-fthinlto-index=%{thinlto_index}",
+                        "-o",
+                        "%{thinlto_output_object_file}",
+                        "-x",
+                        "ir",
+                        "%{thinlto_input_bitcode_file}",
+                    ]),
+                ],
+            ),
+        ],
+    )
+
+    is_linux = ctx.attr.target_libc != "macosx"
+
+    # TODO(#8303): Mac crosstool should also declare every feature.
+    if is_linux:
+        features = [
+            dependency_file_feature,
+            random_seed_feature,
+            pic_feature,
+            per_object_debug_info_feature,
+            preprocessor_defines_feature,
+            includes_feature,
+            include_paths_feature,
+            fdo_instrument_feature,
+            cs_fdo_instrument_feature,
+            cs_fdo_optimize_feature,
+            thinlto_feature,
+            fdo_prefetch_hints_feature,
+            autofdo_feature,
+            build_interface_libraries_feature,
+            dynamic_library_linker_tool_feature,
+            symbol_counts_feature,
+            shared_flag_feature,
+            linkstamps_feature,
+            output_execpath_flags_feature,
+            runtime_library_search_directories_feature,
+            library_search_directories_feature,
+            archiver_flags_feature,
+            force_pic_flags_feature,
+            fission_support_feature,
+            strip_debug_symbols_feature,
+            coverage_feature,
+            supports_pic_feature,
+        ] + (
+            [
+                supports_start_end_lib_feature,
+            ] if ctx.attr.supports_start_end_lib else []
+        ) + [
+            default_compile_flags_feature,
+            default_link_flags_feature,
+            libraries_to_link_feature,
+            user_link_flags_feature,
+            static_libgcc_feature,
+            fdo_optimize_feature,
+            supports_dynamic_linker_feature,
+            dbg_feature,
+            opt_feature,
+            user_compile_flags_feature,
+            sysroot_feature,
+            unfiltered_compile_flags_feature,
+        ] + layering_check_features(ctx.attr.compiler)
+    else:
+        features = [
+            supports_pic_feature,
+        ] + (
+            [
+                supports_start_end_lib_feature,
+            ] if ctx.attr.supports_start_end_lib else []
+        ) + [
+            coverage_feature,
+            default_compile_flags_feature,
+            default_link_flags_feature,
+            fdo_optimize_feature,
+            supports_dynamic_linker_feature,
+            dbg_feature,
+            opt_feature,
+            user_compile_flags_feature,
+            sysroot_feature,
+            unfiltered_compile_flags_feature,
+        ] + layering_check_features(ctx.attr.compiler)
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        cxx_builtin_include_directories = ctx.attr.cxx_builtin_include_directories,
+        toolchain_identifier = ctx.attr.toolchain_identifier,
+        host_system_name = ctx.attr.host_system_name,
+        target_system_name = ctx.attr.target_system_name,
+        target_cpu = ctx.attr.cpu,
+        target_libc = ctx.attr.target_libc,
+        compiler = ctx.attr.compiler,
+        abi_version = ctx.attr.abi_version,
+        abi_libc_version = ctx.attr.abi_libc_version,
+        tool_paths = tool_paths,
+        builtin_sysroot = ctx.attr.builtin_sysroot,
+    )
+
+cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {
+        "cpu": attr.string(mandatory = True),
+        "compiler": attr.string(mandatory = True),
+        "toolchain_identifier": attr.string(mandatory = True),
+        "host_system_name": attr.string(mandatory = True),
+        "target_system_name": attr.string(mandatory = True),
+        "target_libc": attr.string(mandatory = True),
+        "abi_version": attr.string(mandatory = True),
+        "abi_libc_version": attr.string(mandatory = True),
+        "cxx_builtin_include_directories": attr.string_list(),
+        "tool_paths": attr.string_dict(),
+        "compile_flags": attr.string_list(),
+        "dbg_compile_flags": attr.string_list(),
+        "opt_compile_flags": attr.string_list(),
+        "cxx_flags": attr.string_list(),
+        "link_flags": attr.string_list(),
+        "link_libs": attr.string_list(),
+        "opt_link_flags": attr.string_list(),
+        "unfiltered_compile_flags": attr.string_list(),
+        "coverage_compile_flags": attr.string_list(),
+        "coverage_link_flags": attr.string_list(),
+        "supports_start_end_lib": attr.bool(),
+        "builtin_sysroot": attr.string(),
+    },
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_wrapper.sh b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_wrapper.sh
new file mode 100644
index 0000000000..bfc4ce5035
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/cc_wrapper.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Ship the environment to the C++ action
+#
+set -eu
+
+# Set-up the environment
+
+
+# Call the C++ compiler
+/usr/bin/clang "$@"
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/module.modulemap b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/module.modulemap
new file mode 100644
index 0000000000..037452b5b6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/module.modulemap
@@ -0,0 +1,5760 @@
+module "crosstool" [system] {
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_builtin_vars.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_cmath.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_complex_builtins.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_intrinsics.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_math_forward_declares.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__clang_cuda_runtime_wrapper.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__stddef_max_align_t.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__wmmintrin_aes.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/__wmmintrin_pclmul.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/adxintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/altivec.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/ammintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/arm64intr.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/arm_acle.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/arm_neon.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/armintr.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx2intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512bitalgintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512bwintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512cdintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512dqintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512erintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512fintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512ifmaintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512ifmavlintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512pfintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vbmi2intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vbmiintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vbmivlintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlbitalgintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlbwintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlcdintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vldqintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlvbmi2intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vlvnniintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vnniintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vpopcntdqintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avx512vpopcntdqvlintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/avxintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/bmi2intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/bmiintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/cetintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/clflushoptintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/clwbintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/clzerointrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/cpuid.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/cuda_wrappers/algorithm"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/cuda_wrappers/complex"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/cuda_wrappers/new"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/emmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/f16cintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/float.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/fma4intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/fmaintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/fxsrintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/gfniintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/htmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/htmxlintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/ia32intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/immintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/inttypes.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/iso646.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/limits.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/lwpintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/lzcntintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/mm3dnow.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/mm_malloc.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/mmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/module.modulemap"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/msa.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/mwaitxintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/nmmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/opencl-c.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/pkuintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/pmmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/popcntintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/prfchwintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/rdseedintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/rtmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/s390intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/allocator_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/asan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/common_interface_defs.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/coverage_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/dfsan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/esan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/hwasan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/linux_syscall_hooks.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/lsan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/msan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/scudo_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/tsan_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/sanitizer/tsan_interface_atomic.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/shaintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/smmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdalign.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdarg.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdatomic.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdbool.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stddef.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdint.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/stdnoreturn.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/tbmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/tgmath.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/tmmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/unwind.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/vadefs.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/vaesintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/varargs.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/vecintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/vpclmulqdqintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/wmmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/x86intrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xmmintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xopintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xray/xray_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xray/xray_log_interface.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xsavecintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xsaveintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xsaveoptintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xsavesintrin.h"
+  textual header "/usr/lib/llvm-6.0/lib/clang/6.0.0/include/xtestintrin.h"
+  textual header "/usr/include/x86_64-linux-gnu/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/auxvec.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bitsperlong.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/boot.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bootparam.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bpf_perf_event.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/byteorder.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/debugreg.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/e820.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hw_breakpoint.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hwcap2.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hyperv.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ioctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ioctls.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ipcbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ist.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm_para.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm_perf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ldt.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mce.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/msgbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/msr.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mtrr.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/perf_regs.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_64.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_x32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/prctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/processor-flags.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ptrace-abi.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ptrace.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sembuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/setup.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/shmbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sigcontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sigcontext32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/siginfo.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/signal.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sockios.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/svm.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/swab.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/termbits.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ucontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_64.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_x32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vm86.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vmx.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vsyscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/_G_config.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/auxv.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/byteswap-16.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/byteswap.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/cmathcalls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/confname.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/cpu-set.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/dirent.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/dlfcn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/elfclass.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/endian.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/environments.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/epoll.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/error.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/eventfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl-linux.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fenv.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fenvinline.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/floatn-common.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/floatn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/flt-eval-method.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fp-fast.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fp-logb.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_core.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_ext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/hwcap.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/in.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/initspin.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/inotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ioctl-types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ioctls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ipc.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ipctypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/iscanonical.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libio-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libio.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/link.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/local_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/long-double.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/math-finite.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/math-vector.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathcalls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathdef.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathinline.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman-linux.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman-shared.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/monetary-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mqueue.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mqueue2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/msq.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/netdb.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/poll2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix2_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix_opt.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/printf-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ptrace-shared.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sched.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/select.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/select2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sem.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/semaphore.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/setjmp.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/setjmp2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/shm.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigaction.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigcontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigevent-consts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-consts-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-consts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signalfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signum-generic.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signum.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigstack.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigthread.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sockaddr.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket_type.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ss_flags.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stab.def"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/statvfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdint-intn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdint-uintn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/string_fortified.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/strings_fortified.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stropts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sysctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog-path.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sysmacros.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/time.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/timerfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/timex.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/FILE.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__FILE.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__locale_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__sigval_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/clock_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/clockid_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/locale_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/res_state.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sig_atomic_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigevent_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/siginfo_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigset_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigval_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/stack_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_iovec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_osockaddr.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_rusage.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_sigstack.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_tm.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/time_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/timer_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/wint_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/typesizes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uintn-identity.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uio-ext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uio_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ustat.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utmp.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utmpx.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utsname.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/waitflags.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/waitstatus.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wctype-wchar.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wordsize.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/xtitypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/atomic_word.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/basic_file.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++allocator.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++config.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++io.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cpu_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_base.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_inline.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cxxabi_tweaks.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/error_constants.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/extc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-default.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-single.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/messages_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/os_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdtr1c++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/time_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/ext/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/atomic_word.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/basic_file.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++allocator.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++config.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++io.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/cpu_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/ctype_base.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/ctype_inline.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/cxxabi_tweaks.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/error_constants.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/extc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-single.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/messages_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/os_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/stdc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/stdtr1c++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/time_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/ext/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/ffi.h"
+  textual header "/usr/include/x86_64-linux-gnu/ffitarget.h"
+  textual header "/usr/include/x86_64-linux-gnu/fpu_control.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/lib-names-64.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/lib-names.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/libc-version.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/stubs.h"
+  textual header "/usr/include/x86_64-linux-gnu/ieee754.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/acct.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/auxv.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/bitypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/cdefs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/debugreg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/dir.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/elf.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/epoll.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/eventfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fanotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/file.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fsuid.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/gmon.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/gmon_out.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/inotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/io.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ioctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ipc.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/kd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/klog.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mount.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/msg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mtio.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/pci.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/perm.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/personality.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/prctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/procfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/profil.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ptrace.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/queue.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/quota.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/random.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/raw.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/reboot.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/reg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/select.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sem.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sendfile.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/shm.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/signal.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/signalfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/socketvar.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/soundcard.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/statvfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/stropts.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/swap.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/syscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysinfo.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/syslog.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysmacros.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/time.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timeb.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timerfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/times.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timex.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ttychars.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ttydefaults.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ucontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/uio.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/un.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/user.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ustat.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/utsname.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vlimit.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vm86.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vt.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vtimes.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/wait.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/xattr.h"
+  textual header "/usr/include/_G_config.h"
+  textual header "/usr/include/aio.h"
+  textual header "/usr/include/aliases.h"
+  textual header "/usr/include/alloca.h"
+  textual header "/usr/include/ar.h"
+  textual header "/usr/include/argp.h"
+  textual header "/usr/include/argz.h"
+  textual header "/usr/include/arpa/ftp.h"
+  textual header "/usr/include/arpa/inet.h"
+  textual header "/usr/include/arpa/nameser.h"
+  textual header "/usr/include/arpa/nameser_compat.h"
+  textual header "/usr/include/arpa/telnet.h"
+  textual header "/usr/include/arpa/tftp.h"
+  textual header "/usr/include/asm-generic/auxvec.h"
+  textual header "/usr/include/asm-generic/bitsperlong.h"
+  textual header "/usr/include/asm-generic/bpf_perf_event.h"
+  textual header "/usr/include/asm-generic/errno-base.h"
+  textual header "/usr/include/asm-generic/errno.h"
+  textual header "/usr/include/asm-generic/fcntl.h"
+  textual header "/usr/include/asm-generic/hugetlb_encode.h"
+  textual header "/usr/include/asm-generic/int-l64.h"
+  textual header "/usr/include/asm-generic/int-ll64.h"
+  textual header "/usr/include/asm-generic/ioctl.h"
+  textual header "/usr/include/asm-generic/ioctls.h"
+  textual header "/usr/include/asm-generic/ipcbuf.h"
+  textual header "/usr/include/asm-generic/kvm_para.h"
+  textual header "/usr/include/asm-generic/mman-common.h"
+  textual header "/usr/include/asm-generic/mman.h"
+  textual header "/usr/include/asm-generic/msgbuf.h"
+  textual header "/usr/include/asm-generic/param.h"
+  textual header "/usr/include/asm-generic/poll.h"
+  textual header "/usr/include/asm-generic/posix_types.h"
+  textual header "/usr/include/asm-generic/resource.h"
+  textual header "/usr/include/asm-generic/sembuf.h"
+  textual header "/usr/include/asm-generic/setup.h"
+  textual header "/usr/include/asm-generic/shmbuf.h"
+  textual header "/usr/include/asm-generic/shmparam.h"
+  textual header "/usr/include/asm-generic/siginfo.h"
+  textual header "/usr/include/asm-generic/signal-defs.h"
+  textual header "/usr/include/asm-generic/signal.h"
+  textual header "/usr/include/asm-generic/socket.h"
+  textual header "/usr/include/asm-generic/sockios.h"
+  textual header "/usr/include/asm-generic/stat.h"
+  textual header "/usr/include/asm-generic/statfs.h"
+  textual header "/usr/include/asm-generic/swab.h"
+  textual header "/usr/include/asm-generic/termbits.h"
+  textual header "/usr/include/asm-generic/termios.h"
+  textual header "/usr/include/asm-generic/types.h"
+  textual header "/usr/include/asm-generic/ucontext.h"
+  textual header "/usr/include/asm-generic/unistd.h"
+  textual header "/usr/include/assert.h"
+  textual header "/usr/include/byteswap.h"
+  textual header "/usr/include/c++/7.5.0/algorithm"
+  textual header "/usr/include/c++/7.5.0/any"
+  textual header "/usr/include/c++/7.5.0/array"
+  textual header "/usr/include/c++/7.5.0/atomic"
+  textual header "/usr/include/c++/7.5.0/backward/auto_ptr.h"
+  textual header "/usr/include/c++/7.5.0/backward/backward_warning.h"
+  textual header "/usr/include/c++/7.5.0/backward/binders.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_fun.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_map"
+  textual header "/usr/include/c++/7.5.0/backward/hash_set"
+  textual header "/usr/include/c++/7.5.0/backward/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/backward/strstream"
+  textual header "/usr/include/c++/7.5.0/bits/algorithmfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/alloc_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/allocated_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/allocator.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_futex.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_lockfree_defines.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_ios.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_ios.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/basic_string.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_string.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/boost_concept_check.h"
+  textual header "/usr/include/c++/7.5.0/bits/c++0x_warning.h"
+  textual header "/usr/include/c++/7.5.0/bits/char_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/codecvt.h"
+  textual header "/usr/include/c++/7.5.0/bits/concept_check.h"
+  textual header "/usr/include/c++/7.5.0/bits/cpp_type_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/cxxabi_forced.h"
+  textual header "/usr/include/c++/7.5.0/bits/cxxabi_init_exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/deque.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/enable_special_members.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception_defines.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/forward_list.h"
+  textual header "/usr/include/c++/7.5.0/bits/forward_list.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/fstream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/functexcept.h"
+  textual header "/usr/include/c++/7.5.0/bits/functional_hash.h"
+  textual header "/usr/include/c++/7.5.0/bits/gslice.h"
+  textual header "/usr/include/c++/7.5.0/bits/gslice_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/hash_bytes.h"
+  textual header "/usr/include/c++/7.5.0/bits/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/bits/hashtable_policy.h"
+  textual header "/usr/include/c++/7.5.0/bits/indirect_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/invoke.h"
+  textual header "/usr/include/c++/7.5.0/bits/ios_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/istream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/list.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_classes.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_classes.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_conv.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets_nonio.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets_nonio.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/localefwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/mask_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/memoryfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/move.h"
+  textual header "/usr/include/c++/7.5.0/bits/nested_exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/node_handle.h"
+  textual header "/usr/include/c++/7.5.0/bits/ostream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/ostream_insert.h"
+  textual header "/usr/include/c++/7.5.0/bits/parse_numbers.h"
+  textual header "/usr/include/c++/7.5.0/bits/postypes.h"
+  textual header "/usr/include/c++/7.5.0/bits/predefined_ops.h"
+  textual header "/usr/include/c++/7.5.0/bits/ptr_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/quoted_string.h"
+  textual header "/usr/include/c++/7.5.0/bits/random.h"
+  textual header "/usr/include/c++/7.5.0/bits/random.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/range_access.h"
+  textual header "/usr/include/c++/7.5.0/bits/refwrap.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_automaton.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_automaton.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_compiler.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_compiler.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_constants.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_error.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_executor.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_executor.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_scanner.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_scanner.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr_atomic.h"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/slice_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/specfun.h"
+  textual header "/usr/include/c++/7.5.0/bits/sstream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/std_abs.h"
+  textual header "/usr/include/c++/7.5.0/bits/std_function.h"
+  textual header "/usr/include/c++/7.5.0/bits/std_mutex.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_algo.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_algobase.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_bvector.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_construct.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_deque.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_function.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_heap.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator_base_funcs.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator_base_types.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_list.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_map.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_multimap.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_multiset.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_numeric.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_pair.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_queue.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_raw_storage_iter.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_relops.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_set.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_stack.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_tempbuf.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_tree.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_uninitialized.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_vector.h"
+  textual header "/usr/include/c++/7.5.0/bits/stream_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/streambuf.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/streambuf_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/string_view.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/stringfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/uniform_int_dist.h"
+  textual header "/usr/include/c++/7.5.0/bits/unique_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/bits/unordered_set.h"
+  textual header "/usr/include/c++/7.5.0/bits/uses_allocator.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_after.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_array.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_before.h"
+  textual header "/usr/include/c++/7.5.0/bits/vector.tcc"
+  textual header "/usr/include/c++/7.5.0/bitset"
+  textual header "/usr/include/c++/7.5.0/cassert"
+  textual header "/usr/include/c++/7.5.0/ccomplex"
+  textual header "/usr/include/c++/7.5.0/cctype"
+  textual header "/usr/include/c++/7.5.0/cerrno"
+  textual header "/usr/include/c++/7.5.0/cfenv"
+  textual header "/usr/include/c++/7.5.0/cfloat"
+  textual header "/usr/include/c++/7.5.0/chrono"
+  textual header "/usr/include/c++/7.5.0/cinttypes"
+  textual header "/usr/include/c++/7.5.0/ciso646"
+  textual header "/usr/include/c++/7.5.0/climits"
+  textual header "/usr/include/c++/7.5.0/clocale"
+  textual header "/usr/include/c++/7.5.0/cmath"
+  textual header "/usr/include/c++/7.5.0/codecvt"
+  textual header "/usr/include/c++/7.5.0/complex"
+  textual header "/usr/include/c++/7.5.0/complex.h"
+  textual header "/usr/include/c++/7.5.0/condition_variable"
+  textual header "/usr/include/c++/7.5.0/csetjmp"
+  textual header "/usr/include/c++/7.5.0/csignal"
+  textual header "/usr/include/c++/7.5.0/cstdalign"
+  textual header "/usr/include/c++/7.5.0/cstdarg"
+  textual header "/usr/include/c++/7.5.0/cstdbool"
+  textual header "/usr/include/c++/7.5.0/cstddef"
+  textual header "/usr/include/c++/7.5.0/cstdint"
+  textual header "/usr/include/c++/7.5.0/cstdio"
+  textual header "/usr/include/c++/7.5.0/cstdlib"
+  textual header "/usr/include/c++/7.5.0/cstring"
+  textual header "/usr/include/c++/7.5.0/ctgmath"
+  textual header "/usr/include/c++/7.5.0/ctime"
+  textual header "/usr/include/c++/7.5.0/cuchar"
+  textual header "/usr/include/c++/7.5.0/cwchar"
+  textual header "/usr/include/c++/7.5.0/cwctype"
+  textual header "/usr/include/c++/7.5.0/cxxabi.h"
+  textual header "/usr/include/c++/7.5.0/debug/array"
+  textual header "/usr/include/c++/7.5.0/debug/assertions.h"
+  textual header "/usr/include/c++/7.5.0/debug/bitset"
+  textual header "/usr/include/c++/7.5.0/debug/debug.h"
+  textual header "/usr/include/c++/7.5.0/debug/deque"
+  textual header "/usr/include/c++/7.5.0/debug/formatter.h"
+  textual header "/usr/include/c++/7.5.0/debug/forward_list"
+  textual header "/usr/include/c++/7.5.0/debug/functions.h"
+  textual header "/usr/include/c++/7.5.0/debug/helper_functions.h"
+  textual header "/usr/include/c++/7.5.0/debug/list"
+  textual header "/usr/include/c++/7.5.0/debug/macros.h"
+  textual header "/usr/include/c++/7.5.0/debug/map"
+  textual header "/usr/include/c++/7.5.0/debug/map.h"
+  textual header "/usr/include/c++/7.5.0/debug/multimap.h"
+  textual header "/usr/include/c++/7.5.0/debug/multiset.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_base.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_container.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_iterator.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_local_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_local_iterator.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_sequence.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_sequence.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_base.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_container.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_container.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/set"
+  textual header "/usr/include/c++/7.5.0/debug/set.h"
+  textual header "/usr/include/c++/7.5.0/debug/stl_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/string"
+  textual header "/usr/include/c++/7.5.0/debug/unordered_map"
+  textual header "/usr/include/c++/7.5.0/debug/unordered_set"
+  textual header "/usr/include/c++/7.5.0/debug/vector"
+  textual header "/usr/include/c++/7.5.0/decimal/decimal"
+  textual header "/usr/include/c++/7.5.0/decimal/decimal.h"
+  textual header "/usr/include/c++/7.5.0/deque"
+  textual header "/usr/include/c++/7.5.0/exception"
+  textual header "/usr/include/c++/7.5.0/experimental/algorithm"
+  textual header "/usr/include/c++/7.5.0/experimental/any"
+  textual header "/usr/include/c++/7.5.0/experimental/array"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/erase_if.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_dir.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_fwd.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_ops.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_path.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/lfts_config.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/string_view.tcc"
+  textual header "/usr/include/c++/7.5.0/experimental/chrono"
+  textual header "/usr/include/c++/7.5.0/experimental/deque"
+  textual header "/usr/include/c++/7.5.0/experimental/filesystem"
+  textual header "/usr/include/c++/7.5.0/experimental/forward_list"
+  textual header "/usr/include/c++/7.5.0/experimental/functional"
+  textual header "/usr/include/c++/7.5.0/experimental/iterator"
+  textual header "/usr/include/c++/7.5.0/experimental/list"
+  textual header "/usr/include/c++/7.5.0/experimental/map"
+  textual header "/usr/include/c++/7.5.0/experimental/memory"
+  textual header "/usr/include/c++/7.5.0/experimental/memory_resource"
+  textual header "/usr/include/c++/7.5.0/experimental/numeric"
+  textual header "/usr/include/c++/7.5.0/experimental/optional"
+  textual header "/usr/include/c++/7.5.0/experimental/propagate_const"
+  textual header "/usr/include/c++/7.5.0/experimental/random"
+  textual header "/usr/include/c++/7.5.0/experimental/ratio"
+  textual header "/usr/include/c++/7.5.0/experimental/regex"
+  textual header "/usr/include/c++/7.5.0/experimental/set"
+  textual header "/usr/include/c++/7.5.0/experimental/source_location"
+  textual header "/usr/include/c++/7.5.0/experimental/string"
+  textual header "/usr/include/c++/7.5.0/experimental/string_view"
+  textual header "/usr/include/c++/7.5.0/experimental/system_error"
+  textual header "/usr/include/c++/7.5.0/experimental/tuple"
+  textual header "/usr/include/c++/7.5.0/experimental/type_traits"
+  textual header "/usr/include/c++/7.5.0/experimental/unordered_map"
+  textual header "/usr/include/c++/7.5.0/experimental/unordered_set"
+  textual header "/usr/include/c++/7.5.0/experimental/utility"
+  textual header "/usr/include/c++/7.5.0/experimental/vector"
+  textual header "/usr/include/c++/7.5.0/ext/algorithm"
+  textual header "/usr/include/c++/7.5.0/ext/aligned_buffer.h"
+  textual header "/usr/include/c++/7.5.0/ext/alloc_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/array_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/atomicity.h"
+  textual header "/usr/include/c++/7.5.0/ext/bitmap_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/cast.h"
+  textual header "/usr/include/c++/7.5.0/ext/cmath"
+  textual header "/usr/include/c++/7.5.0/ext/codecvt_specializations.h"
+  textual header "/usr/include/c++/7.5.0/ext/concurrence.h"
+  textual header "/usr/include/c++/7.5.0/ext/debug_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/enc_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/extptr_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/functional"
+  textual header "/usr/include/c++/7.5.0/ext/hash_map"
+  textual header "/usr/include/c++/7.5.0/ext/hash_set"
+  textual header "/usr/include/c++/7.5.0/ext/iterator"
+  textual header "/usr/include/c++/7.5.0/ext/malloc_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/memory"
+  textual header "/usr/include/c++/7.5.0/ext/mt_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/new_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/numeric"
+  textual header "/usr/include/c++/7.5.0/ext/numeric_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/assoc_container.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/node_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/binary_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/entry_cmp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/entry_pred.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/resize_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/binomial_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/binomial_heap_base_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/branch_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/null_node_metadata.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cc_ht_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cmp_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cond_key_dtor_entry_dealtor.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/entry_list_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/size_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cond_dealtor.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/container_base_dispatch.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/debug_map_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/eq_fn/eq_by_less.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/eq_fn/hash_eq_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/gp_ht_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/iterator_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/direct_mask_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/direct_mod_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/linear_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/mask_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/mod_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/probe_fn_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/quadratic_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/left_child_next_sibling_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/entry_metadata_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/lu_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_policy/lu_counter_metadata.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_policy/sample_update_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/node_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/pairing_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/insert_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/pat_trie_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/pat_trie_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/split_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/synth_access_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/update_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/priority_queue_base_dispatch.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/rb_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/rc.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/rc_binomial_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/cc_hash_max_collision_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_exponential_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_size_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_prime_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_standard_resize_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_resize_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_resize_trigger.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_size_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/splay_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/splay_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/standard_policies.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/thin_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/sample_tree_node_update.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_trace_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/prefix_search_node_update_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/sample_trie_access_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/sample_trie_node_update.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/trie_policy_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/trie_string_access_traits_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/type_utils.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/types_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/point_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/exception.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/hash_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/list_update_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/priority_queue.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/tag_and_trait.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/tree_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/trie_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pod_char_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/pointer.h"
+  textual header "/usr/include/c++/7.5.0/ext/pool_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/random"
+  textual header "/usr/include/c++/7.5.0/ext/random.tcc"
+  textual header "/usr/include/c++/7.5.0/ext/rb_tree"
+  textual header "/usr/include/c++/7.5.0/ext/rc_string_base.h"
+  textual header "/usr/include/c++/7.5.0/ext/rope"
+  textual header "/usr/include/c++/7.5.0/ext/ropeimpl.h"
+  textual header "/usr/include/c++/7.5.0/ext/slist"
+  textual header "/usr/include/c++/7.5.0/ext/sso_string_base.h"
+  textual header "/usr/include/c++/7.5.0/ext/stdio_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/stdio_sync_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/string_conversions.h"
+  textual header "/usr/include/c++/7.5.0/ext/throw_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/type_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/typelist.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring.tcc"
+  textual header "/usr/include/c++/7.5.0/ext/vstring_fwd.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring_util.h"
+  textual header "/usr/include/c++/7.5.0/fenv.h"
+  textual header "/usr/include/c++/7.5.0/forward_list"
+  textual header "/usr/include/c++/7.5.0/fstream"
+  textual header "/usr/include/c++/7.5.0/functional"
+  textual header "/usr/include/c++/7.5.0/future"
+  textual header "/usr/include/c++/7.5.0/initializer_list"
+  textual header "/usr/include/c++/7.5.0/iomanip"
+  textual header "/usr/include/c++/7.5.0/ios"
+  textual header "/usr/include/c++/7.5.0/iosfwd"
+  textual header "/usr/include/c++/7.5.0/iostream"
+  textual header "/usr/include/c++/7.5.0/istream"
+  textual header "/usr/include/c++/7.5.0/iterator"
+  textual header "/usr/include/c++/7.5.0/limits"
+  textual header "/usr/include/c++/7.5.0/list"
+  textual header "/usr/include/c++/7.5.0/locale"
+  textual header "/usr/include/c++/7.5.0/map"
+  textual header "/usr/include/c++/7.5.0/math.h"
+  textual header "/usr/include/c++/7.5.0/memory"
+  textual header "/usr/include/c++/7.5.0/mutex"
+  textual header "/usr/include/c++/7.5.0/new"
+  textual header "/usr/include/c++/7.5.0/numeric"
+  textual header "/usr/include/c++/7.5.0/optional"
+  textual header "/usr/include/c++/7.5.0/ostream"
+  textual header "/usr/include/c++/7.5.0/parallel/algo.h"
+  textual header "/usr/include/c++/7.5.0/parallel/algobase.h"
+  textual header "/usr/include/c++/7.5.0/parallel/algorithm"
+  textual header "/usr/include/c++/7.5.0/parallel/algorithmfwd.h"
+  textual header "/usr/include/c++/7.5.0/parallel/balanced_quicksort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/base.h"
+  textual header "/usr/include/c++/7.5.0/parallel/basic_iterator.h"
+  textual header "/usr/include/c++/7.5.0/parallel/checkers.h"
+  textual header "/usr/include/c++/7.5.0/parallel/compatibility.h"
+  textual header "/usr/include/c++/7.5.0/parallel/compiletime_settings.h"
+  textual header "/usr/include/c++/7.5.0/parallel/equally_split.h"
+  textual header "/usr/include/c++/7.5.0/parallel/features.h"
+  textual header "/usr/include/c++/7.5.0/parallel/find.h"
+  textual header "/usr/include/c++/7.5.0/parallel/find_selectors.h"
+  textual header "/usr/include/c++/7.5.0/parallel/for_each.h"
+  textual header "/usr/include/c++/7.5.0/parallel/for_each_selectors.h"
+  textual header "/usr/include/c++/7.5.0/parallel/iterator.h"
+  textual header "/usr/include/c++/7.5.0/parallel/list_partition.h"
+  textual header "/usr/include/c++/7.5.0/parallel/losertree.h"
+  textual header "/usr/include/c++/7.5.0/parallel/merge.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiseq_selection.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiway_merge.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiway_mergesort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/numeric"
+  textual header "/usr/include/c++/7.5.0/parallel/numericfwd.h"
+  textual header "/usr/include/c++/7.5.0/parallel/omp_loop.h"
+  textual header "/usr/include/c++/7.5.0/parallel/omp_loop_static.h"
+  textual header "/usr/include/c++/7.5.0/parallel/par_loop.h"
+  textual header "/usr/include/c++/7.5.0/parallel/parallel.h"
+  textual header "/usr/include/c++/7.5.0/parallel/partial_sum.h"
+  textual header "/usr/include/c++/7.5.0/parallel/partition.h"
+  textual header "/usr/include/c++/7.5.0/parallel/queue.h"
+  textual header "/usr/include/c++/7.5.0/parallel/quicksort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/random_number.h"
+  textual header "/usr/include/c++/7.5.0/parallel/random_shuffle.h"
+  textual header "/usr/include/c++/7.5.0/parallel/search.h"
+  textual header "/usr/include/c++/7.5.0/parallel/set_operations.h"
+  textual header "/usr/include/c++/7.5.0/parallel/settings.h"
+  textual header "/usr/include/c++/7.5.0/parallel/sort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/tags.h"
+  textual header "/usr/include/c++/7.5.0/parallel/types.h"
+  textual header "/usr/include/c++/7.5.0/parallel/unique_copy.h"
+  textual header "/usr/include/c++/7.5.0/parallel/workstealing.h"
+  textual header "/usr/include/c++/7.5.0/profile/array"
+  textual header "/usr/include/c++/7.5.0/profile/base.h"
+  textual header "/usr/include/c++/7.5.0/profile/bitset"
+  textual header "/usr/include/c++/7.5.0/profile/deque"
+  textual header "/usr/include/c++/7.5.0/profile/forward_list"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_algos.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_container_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_hash_func.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_hashtable_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_list_to_slist.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_list_to_vector.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_map_to_unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_node.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_state.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_trace.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_vector_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_vector_to_list.h"
+  textual header "/usr/include/c++/7.5.0/profile/iterator_tracker.h"
+  textual header "/usr/include/c++/7.5.0/profile/list"
+  textual header "/usr/include/c++/7.5.0/profile/map"
+  textual header "/usr/include/c++/7.5.0/profile/map.h"
+  textual header "/usr/include/c++/7.5.0/profile/multimap.h"
+  textual header "/usr/include/c++/7.5.0/profile/multiset.h"
+  textual header "/usr/include/c++/7.5.0/profile/ordered_base.h"
+  textual header "/usr/include/c++/7.5.0/profile/set"
+  textual header "/usr/include/c++/7.5.0/profile/set.h"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_base.h"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_map"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_set"
+  textual header "/usr/include/c++/7.5.0/profile/vector"
+  textual header "/usr/include/c++/7.5.0/queue"
+  textual header "/usr/include/c++/7.5.0/random"
+  textual header "/usr/include/c++/7.5.0/ratio"
+  textual header "/usr/include/c++/7.5.0/regex"
+  textual header "/usr/include/c++/7.5.0/scoped_allocator"
+  textual header "/usr/include/c++/7.5.0/set"
+  textual header "/usr/include/c++/7.5.0/shared_mutex"
+  textual header "/usr/include/c++/7.5.0/sstream"
+  textual header "/usr/include/c++/7.5.0/stack"
+  textual header "/usr/include/c++/7.5.0/stdexcept"
+  textual header "/usr/include/c++/7.5.0/stdlib.h"
+  textual header "/usr/include/c++/7.5.0/streambuf"
+  textual header "/usr/include/c++/7.5.0/string"
+  textual header "/usr/include/c++/7.5.0/string_view"
+  textual header "/usr/include/c++/7.5.0/system_error"
+  textual header "/usr/include/c++/7.5.0/tgmath.h"
+  textual header "/usr/include/c++/7.5.0/thread"
+  textual header "/usr/include/c++/7.5.0/tr1/array"
+  textual header "/usr/include/c++/7.5.0/tr1/bessel_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/beta_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/ccomplex"
+  textual header "/usr/include/c++/7.5.0/tr1/cctype"
+  textual header "/usr/include/c++/7.5.0/tr1/cfenv"
+  textual header "/usr/include/c++/7.5.0/tr1/cfloat"
+  textual header "/usr/include/c++/7.5.0/tr1/cinttypes"
+  textual header "/usr/include/c++/7.5.0/tr1/climits"
+  textual header "/usr/include/c++/7.5.0/tr1/cmath"
+  textual header "/usr/include/c++/7.5.0/tr1/complex"
+  textual header "/usr/include/c++/7.5.0/tr1/complex.h"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdarg"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdbool"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdint"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdio"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdlib"
+  textual header "/usr/include/c++/7.5.0/tr1/ctgmath"
+  textual header "/usr/include/c++/7.5.0/tr1/ctime"
+  textual header "/usr/include/c++/7.5.0/tr1/ctype.h"
+  textual header "/usr/include/c++/7.5.0/tr1/cwchar"
+  textual header "/usr/include/c++/7.5.0/tr1/cwctype"
+  textual header "/usr/include/c++/7.5.0/tr1/ell_integral.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/exp_integral.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/fenv.h"
+  textual header "/usr/include/c++/7.5.0/tr1/float.h"
+  textual header "/usr/include/c++/7.5.0/tr1/functional"
+  textual header "/usr/include/c++/7.5.0/tr1/functional_hash.h"
+  textual header "/usr/include/c++/7.5.0/tr1/gamma.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/tr1/hashtable_policy.h"
+  textual header "/usr/include/c++/7.5.0/tr1/hypergeometric.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/inttypes.h"
+  textual header "/usr/include/c++/7.5.0/tr1/legendre_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/limits.h"
+  textual header "/usr/include/c++/7.5.0/tr1/math.h"
+  textual header "/usr/include/c++/7.5.0/tr1/memory"
+  textual header "/usr/include/c++/7.5.0/tr1/modified_bessel_func.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/poly_hermite.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/poly_laguerre.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/random"
+  textual header "/usr/include/c++/7.5.0/tr1/random.h"
+  textual header "/usr/include/c++/7.5.0/tr1/random.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/regex"
+  textual header "/usr/include/c++/7.5.0/tr1/riemann_zeta.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/tr1/special_function_util.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdarg.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdbool.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdint.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdio.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdlib.h"
+  textual header "/usr/include/c++/7.5.0/tr1/tgmath.h"
+  textual header "/usr/include/c++/7.5.0/tr1/tuple"
+  textual header "/usr/include/c++/7.5.0/tr1/type_traits"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_map"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_set"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_set.h"
+  textual header "/usr/include/c++/7.5.0/tr1/utility"
+  textual header "/usr/include/c++/7.5.0/tr1/wchar.h"
+  textual header "/usr/include/c++/7.5.0/tr1/wctype.h"
+  textual header "/usr/include/c++/7.5.0/tr2/bool_set"
+  textual header "/usr/include/c++/7.5.0/tr2/bool_set.tcc"
+  textual header "/usr/include/c++/7.5.0/tr2/dynamic_bitset"
+  textual header "/usr/include/c++/7.5.0/tr2/dynamic_bitset.tcc"
+  textual header "/usr/include/c++/7.5.0/tr2/ratio"
+  textual header "/usr/include/c++/7.5.0/tr2/type_traits"
+  textual header "/usr/include/c++/7.5.0/tuple"
+  textual header "/usr/include/c++/7.5.0/type_traits"
+  textual header "/usr/include/c++/7.5.0/typeindex"
+  textual header "/usr/include/c++/7.5.0/typeinfo"
+  textual header "/usr/include/c++/7.5.0/unordered_map"
+  textual header "/usr/include/c++/7.5.0/unordered_set"
+  textual header "/usr/include/c++/7.5.0/utility"
+  textual header "/usr/include/c++/7.5.0/valarray"
+  textual header "/usr/include/c++/7.5.0/variant"
+  textual header "/usr/include/c++/7.5.0/vector"
+  textual header "/usr/include/c++/7/algorithm"
+  textual header "/usr/include/c++/7/any"
+  textual header "/usr/include/c++/7/array"
+  textual header "/usr/include/c++/7/atomic"
+  textual header "/usr/include/c++/7/backward/auto_ptr.h"
+  textual header "/usr/include/c++/7/backward/backward_warning.h"
+  textual header "/usr/include/c++/7/backward/binders.h"
+  textual header "/usr/include/c++/7/backward/hash_fun.h"
+  textual header "/usr/include/c++/7/backward/hash_map"
+  textual header "/usr/include/c++/7/backward/hash_set"
+  textual header "/usr/include/c++/7/backward/hashtable.h"
+  textual header "/usr/include/c++/7/backward/strstream"
+  textual header "/usr/include/c++/7/bits/algorithmfwd.h"
+  textual header "/usr/include/c++/7/bits/alloc_traits.h"
+  textual header "/usr/include/c++/7/bits/allocated_ptr.h"
+  textual header "/usr/include/c++/7/bits/allocator.h"
+  textual header "/usr/include/c++/7/bits/atomic_base.h"
+  textual header "/usr/include/c++/7/bits/atomic_futex.h"
+  textual header "/usr/include/c++/7/bits/atomic_lockfree_defines.h"
+  textual header "/usr/include/c++/7/bits/basic_ios.h"
+  textual header "/usr/include/c++/7/bits/basic_ios.tcc"
+  textual header "/usr/include/c++/7/bits/basic_string.h"
+  textual header "/usr/include/c++/7/bits/basic_string.tcc"
+  textual header "/usr/include/c++/7/bits/boost_concept_check.h"
+  textual header "/usr/include/c++/7/bits/c++0x_warning.h"
+  textual header "/usr/include/c++/7/bits/char_traits.h"
+  textual header "/usr/include/c++/7/bits/codecvt.h"
+  textual header "/usr/include/c++/7/bits/concept_check.h"
+  textual header "/usr/include/c++/7/bits/cpp_type_traits.h"
+  textual header "/usr/include/c++/7/bits/cxxabi_forced.h"
+  textual header "/usr/include/c++/7/bits/cxxabi_init_exception.h"
+  textual header "/usr/include/c++/7/bits/deque.tcc"
+  textual header "/usr/include/c++/7/bits/enable_special_members.h"
+  textual header "/usr/include/c++/7/bits/exception.h"
+  textual header "/usr/include/c++/7/bits/exception_defines.h"
+  textual header "/usr/include/c++/7/bits/exception_ptr.h"
+  textual header "/usr/include/c++/7/bits/forward_list.h"
+  textual header "/usr/include/c++/7/bits/forward_list.tcc"
+  textual header "/usr/include/c++/7/bits/fstream.tcc"
+  textual header "/usr/include/c++/7/bits/functexcept.h"
+  textual header "/usr/include/c++/7/bits/functional_hash.h"
+  textual header "/usr/include/c++/7/bits/gslice.h"
+  textual header "/usr/include/c++/7/bits/gslice_array.h"
+  textual header "/usr/include/c++/7/bits/hash_bytes.h"
+  textual header "/usr/include/c++/7/bits/hashtable.h"
+  textual header "/usr/include/c++/7/bits/hashtable_policy.h"
+  textual header "/usr/include/c++/7/bits/indirect_array.h"
+  textual header "/usr/include/c++/7/bits/invoke.h"
+  textual header "/usr/include/c++/7/bits/ios_base.h"
+  textual header "/usr/include/c++/7/bits/istream.tcc"
+  textual header "/usr/include/c++/7/bits/list.tcc"
+  textual header "/usr/include/c++/7/bits/locale_classes.h"
+  textual header "/usr/include/c++/7/bits/locale_classes.tcc"
+  textual header "/usr/include/c++/7/bits/locale_conv.h"
+  textual header "/usr/include/c++/7/bits/locale_facets.h"
+  textual header "/usr/include/c++/7/bits/locale_facets.tcc"
+  textual header "/usr/include/c++/7/bits/locale_facets_nonio.h"
+  textual header "/usr/include/c++/7/bits/locale_facets_nonio.tcc"
+  textual header "/usr/include/c++/7/bits/localefwd.h"
+  textual header "/usr/include/c++/7/bits/mask_array.h"
+  textual header "/usr/include/c++/7/bits/memoryfwd.h"
+  textual header "/usr/include/c++/7/bits/move.h"
+  textual header "/usr/include/c++/7/bits/nested_exception.h"
+  textual header "/usr/include/c++/7/bits/node_handle.h"
+  textual header "/usr/include/c++/7/bits/ostream.tcc"
+  textual header "/usr/include/c++/7/bits/ostream_insert.h"
+  textual header "/usr/include/c++/7/bits/parse_numbers.h"
+  textual header "/usr/include/c++/7/bits/postypes.h"
+  textual header "/usr/include/c++/7/bits/predefined_ops.h"
+  textual header "/usr/include/c++/7/bits/ptr_traits.h"
+  textual header "/usr/include/c++/7/bits/quoted_string.h"
+  textual header "/usr/include/c++/7/bits/random.h"
+  textual header "/usr/include/c++/7/bits/random.tcc"
+  textual header "/usr/include/c++/7/bits/range_access.h"
+  textual header "/usr/include/c++/7/bits/refwrap.h"
+  textual header "/usr/include/c++/7/bits/regex.h"
+  textual header "/usr/include/c++/7/bits/regex.tcc"
+  textual header "/usr/include/c++/7/bits/regex_automaton.h"
+  textual header "/usr/include/c++/7/bits/regex_automaton.tcc"
+  textual header "/usr/include/c++/7/bits/regex_compiler.h"
+  textual header "/usr/include/c++/7/bits/regex_compiler.tcc"
+  textual header "/usr/include/c++/7/bits/regex_constants.h"
+  textual header "/usr/include/c++/7/bits/regex_error.h"
+  textual header "/usr/include/c++/7/bits/regex_executor.h"
+  textual header "/usr/include/c++/7/bits/regex_executor.tcc"
+  textual header "/usr/include/c++/7/bits/regex_scanner.h"
+  textual header "/usr/include/c++/7/bits/regex_scanner.tcc"
+  textual header "/usr/include/c++/7/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7/bits/shared_ptr_atomic.h"
+  textual header "/usr/include/c++/7/bits/shared_ptr_base.h"
+  textual header "/usr/include/c++/7/bits/slice_array.h"
+  textual header "/usr/include/c++/7/bits/specfun.h"
+  textual header "/usr/include/c++/7/bits/sstream.tcc"
+  textual header "/usr/include/c++/7/bits/std_abs.h"
+  textual header "/usr/include/c++/7/bits/std_function.h"
+  textual header "/usr/include/c++/7/bits/std_mutex.h"
+  textual header "/usr/include/c++/7/bits/stl_algo.h"
+  textual header "/usr/include/c++/7/bits/stl_algobase.h"
+  textual header "/usr/include/c++/7/bits/stl_bvector.h"
+  textual header "/usr/include/c++/7/bits/stl_construct.h"
+  textual header "/usr/include/c++/7/bits/stl_deque.h"
+  textual header "/usr/include/c++/7/bits/stl_function.h"
+  textual header "/usr/include/c++/7/bits/stl_heap.h"
+  textual header "/usr/include/c++/7/bits/stl_iterator.h"
+  textual header "/usr/include/c++/7/bits/stl_iterator_base_funcs.h"
+  textual header "/usr/include/c++/7/bits/stl_iterator_base_types.h"
+  textual header "/usr/include/c++/7/bits/stl_list.h"
+  textual header "/usr/include/c++/7/bits/stl_map.h"
+  textual header "/usr/include/c++/7/bits/stl_multimap.h"
+  textual header "/usr/include/c++/7/bits/stl_multiset.h"
+  textual header "/usr/include/c++/7/bits/stl_numeric.h"
+  textual header "/usr/include/c++/7/bits/stl_pair.h"
+  textual header "/usr/include/c++/7/bits/stl_queue.h"
+  textual header "/usr/include/c++/7/bits/stl_raw_storage_iter.h"
+  textual header "/usr/include/c++/7/bits/stl_relops.h"
+  textual header "/usr/include/c++/7/bits/stl_set.h"
+  textual header "/usr/include/c++/7/bits/stl_stack.h"
+  textual header "/usr/include/c++/7/bits/stl_tempbuf.h"
+  textual header "/usr/include/c++/7/bits/stl_tree.h"
+  textual header "/usr/include/c++/7/bits/stl_uninitialized.h"
+  textual header "/usr/include/c++/7/bits/stl_vector.h"
+  textual header "/usr/include/c++/7/bits/stream_iterator.h"
+  textual header "/usr/include/c++/7/bits/streambuf.tcc"
+  textual header "/usr/include/c++/7/bits/streambuf_iterator.h"
+  textual header "/usr/include/c++/7/bits/string_view.tcc"
+  textual header "/usr/include/c++/7/bits/stringfwd.h"
+  textual header "/usr/include/c++/7/bits/uniform_int_dist.h"
+  textual header "/usr/include/c++/7/bits/unique_ptr.h"
+  textual header "/usr/include/c++/7/bits/unordered_map.h"
+  textual header "/usr/include/c++/7/bits/unordered_set.h"
+  textual header "/usr/include/c++/7/bits/uses_allocator.h"
+  textual header "/usr/include/c++/7/bits/valarray_after.h"
+  textual header "/usr/include/c++/7/bits/valarray_array.h"
+  textual header "/usr/include/c++/7/bits/valarray_array.tcc"
+  textual header "/usr/include/c++/7/bits/valarray_before.h"
+  textual header "/usr/include/c++/7/bits/vector.tcc"
+  textual header "/usr/include/c++/7/bitset"
+  textual header "/usr/include/c++/7/cassert"
+  textual header "/usr/include/c++/7/ccomplex"
+  textual header "/usr/include/c++/7/cctype"
+  textual header "/usr/include/c++/7/cerrno"
+  textual header "/usr/include/c++/7/cfenv"
+  textual header "/usr/include/c++/7/cfloat"
+  textual header "/usr/include/c++/7/chrono"
+  textual header "/usr/include/c++/7/cinttypes"
+  textual header "/usr/include/c++/7/ciso646"
+  textual header "/usr/include/c++/7/climits"
+  textual header "/usr/include/c++/7/clocale"
+  textual header "/usr/include/c++/7/cmath"
+  textual header "/usr/include/c++/7/codecvt"
+  textual header "/usr/include/c++/7/complex"
+  textual header "/usr/include/c++/7/complex.h"
+  textual header "/usr/include/c++/7/condition_variable"
+  textual header "/usr/include/c++/7/csetjmp"
+  textual header "/usr/include/c++/7/csignal"
+  textual header "/usr/include/c++/7/cstdalign"
+  textual header "/usr/include/c++/7/cstdarg"
+  textual header "/usr/include/c++/7/cstdbool"
+  textual header "/usr/include/c++/7/cstddef"
+  textual header "/usr/include/c++/7/cstdint"
+  textual header "/usr/include/c++/7/cstdio"
+  textual header "/usr/include/c++/7/cstdlib"
+  textual header "/usr/include/c++/7/cstring"
+  textual header "/usr/include/c++/7/ctgmath"
+  textual header "/usr/include/c++/7/ctime"
+  textual header "/usr/include/c++/7/cuchar"
+  textual header "/usr/include/c++/7/cwchar"
+  textual header "/usr/include/c++/7/cwctype"
+  textual header "/usr/include/c++/7/cxxabi.h"
+  textual header "/usr/include/c++/7/debug/array"
+  textual header "/usr/include/c++/7/debug/assertions.h"
+  textual header "/usr/include/c++/7/debug/bitset"
+  textual header "/usr/include/c++/7/debug/debug.h"
+  textual header "/usr/include/c++/7/debug/deque"
+  textual header "/usr/include/c++/7/debug/formatter.h"
+  textual header "/usr/include/c++/7/debug/forward_list"
+  textual header "/usr/include/c++/7/debug/functions.h"
+  textual header "/usr/include/c++/7/debug/helper_functions.h"
+  textual header "/usr/include/c++/7/debug/list"
+  textual header "/usr/include/c++/7/debug/macros.h"
+  textual header "/usr/include/c++/7/debug/map"
+  textual header "/usr/include/c++/7/debug/map.h"
+  textual header "/usr/include/c++/7/debug/multimap.h"
+  textual header "/usr/include/c++/7/debug/multiset.h"
+  textual header "/usr/include/c++/7/debug/safe_base.h"
+  textual header "/usr/include/c++/7/debug/safe_container.h"
+  textual header "/usr/include/c++/7/debug/safe_iterator.h"
+  textual header "/usr/include/c++/7/debug/safe_iterator.tcc"
+  textual header "/usr/include/c++/7/debug/safe_local_iterator.h"
+  textual header "/usr/include/c++/7/debug/safe_local_iterator.tcc"
+  textual header "/usr/include/c++/7/debug/safe_sequence.h"
+  textual header "/usr/include/c++/7/debug/safe_sequence.tcc"
+  textual header "/usr/include/c++/7/debug/safe_unordered_base.h"
+  textual header "/usr/include/c++/7/debug/safe_unordered_container.h"
+  textual header "/usr/include/c++/7/debug/safe_unordered_container.tcc"
+  textual header "/usr/include/c++/7/debug/set"
+  textual header "/usr/include/c++/7/debug/set.h"
+  textual header "/usr/include/c++/7/debug/stl_iterator.h"
+  textual header "/usr/include/c++/7/debug/string"
+  textual header "/usr/include/c++/7/debug/unordered_map"
+  textual header "/usr/include/c++/7/debug/unordered_set"
+  textual header "/usr/include/c++/7/debug/vector"
+  textual header "/usr/include/c++/7/decimal/decimal"
+  textual header "/usr/include/c++/7/decimal/decimal.h"
+  textual header "/usr/include/c++/7/deque"
+  textual header "/usr/include/c++/7/exception"
+  textual header "/usr/include/c++/7/experimental/algorithm"
+  textual header "/usr/include/c++/7/experimental/any"
+  textual header "/usr/include/c++/7/experimental/array"
+  textual header "/usr/include/c++/7/experimental/bits/erase_if.h"
+  textual header "/usr/include/c++/7/experimental/bits/fs_dir.h"
+  textual header "/usr/include/c++/7/experimental/bits/fs_fwd.h"
+  textual header "/usr/include/c++/7/experimental/bits/fs_ops.h"
+  textual header "/usr/include/c++/7/experimental/bits/fs_path.h"
+  textual header "/usr/include/c++/7/experimental/bits/lfts_config.h"
+  textual header "/usr/include/c++/7/experimental/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7/experimental/bits/string_view.tcc"
+  textual header "/usr/include/c++/7/experimental/chrono"
+  textual header "/usr/include/c++/7/experimental/deque"
+  textual header "/usr/include/c++/7/experimental/filesystem"
+  textual header "/usr/include/c++/7/experimental/forward_list"
+  textual header "/usr/include/c++/7/experimental/functional"
+  textual header "/usr/include/c++/7/experimental/iterator"
+  textual header "/usr/include/c++/7/experimental/list"
+  textual header "/usr/include/c++/7/experimental/map"
+  textual header "/usr/include/c++/7/experimental/memory"
+  textual header "/usr/include/c++/7/experimental/memory_resource"
+  textual header "/usr/include/c++/7/experimental/numeric"
+  textual header "/usr/include/c++/7/experimental/optional"
+  textual header "/usr/include/c++/7/experimental/propagate_const"
+  textual header "/usr/include/c++/7/experimental/random"
+  textual header "/usr/include/c++/7/experimental/ratio"
+  textual header "/usr/include/c++/7/experimental/regex"
+  textual header "/usr/include/c++/7/experimental/set"
+  textual header "/usr/include/c++/7/experimental/source_location"
+  textual header "/usr/include/c++/7/experimental/string"
+  textual header "/usr/include/c++/7/experimental/string_view"
+  textual header "/usr/include/c++/7/experimental/system_error"
+  textual header "/usr/include/c++/7/experimental/tuple"
+  textual header "/usr/include/c++/7/experimental/type_traits"
+  textual header "/usr/include/c++/7/experimental/unordered_map"
+  textual header "/usr/include/c++/7/experimental/unordered_set"
+  textual header "/usr/include/c++/7/experimental/utility"
+  textual header "/usr/include/c++/7/experimental/vector"
+  textual header "/usr/include/c++/7/ext/algorithm"
+  textual header "/usr/include/c++/7/ext/aligned_buffer.h"
+  textual header "/usr/include/c++/7/ext/alloc_traits.h"
+  textual header "/usr/include/c++/7/ext/array_allocator.h"
+  textual header "/usr/include/c++/7/ext/atomicity.h"
+  textual header "/usr/include/c++/7/ext/bitmap_allocator.h"
+  textual header "/usr/include/c++/7/ext/cast.h"
+  textual header "/usr/include/c++/7/ext/cmath"
+  textual header "/usr/include/c++/7/ext/codecvt_specializations.h"
+  textual header "/usr/include/c++/7/ext/concurrence.h"
+  textual header "/usr/include/c++/7/ext/debug_allocator.h"
+  textual header "/usr/include/c++/7/ext/enc_filebuf.h"
+  textual header "/usr/include/c++/7/ext/extptr_allocator.h"
+  textual header "/usr/include/c++/7/ext/functional"
+  textual header "/usr/include/c++/7/ext/hash_map"
+  textual header "/usr/include/c++/7/ext/hash_set"
+  textual header "/usr/include/c++/7/ext/iterator"
+  textual header "/usr/include/c++/7/ext/malloc_allocator.h"
+  textual header "/usr/include/c++/7/ext/memory"
+  textual header "/usr/include/c++/7/ext/mt_allocator.h"
+  textual header "/usr/include/c++/7/ext/new_allocator.h"
+  textual header "/usr/include/c++/7/ext/numeric"
+  textual header "/usr/include/c++/7/ext/numeric_traits.h"
+  textual header "/usr/include/c++/7/ext/pb_ds/assoc_container.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/node_iterators.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/bin_search_tree_/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/binary_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/entry_cmp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/entry_pred.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/resize_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binary_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_/binomial_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/binomial_heap_base_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/binomial_heap_base_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/branch_policy/branch_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/branch_policy/null_node_metadata.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/branch_policy/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/cc_ht_map_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/cmp_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/cond_key_dtor_entry_dealtor.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/entry_list_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/size_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cc_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/cond_dealtor.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/container_base_dispatch.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/debug_map_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/eq_fn/eq_by_less.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/eq_fn/hash_eq_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/find_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/gp_ht_map_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/iterator_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/gp_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/direct_mask_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/direct_mod_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/linear_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/mask_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/mod_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/probe_fn_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/quadratic_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/sample_probe_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/sample_range_hashing.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/sample_ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/hash_fn/sample_ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/left_child_next_sibling_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/node.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/left_child_next_sibling_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/entry_metadata_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/lu_map_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_policy/lu_counter_metadata.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/list_update_policy/sample_update_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/node_iterators.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/ov_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/pairing_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pairing_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/insert_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/pat_trie_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/pat_trie_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/split_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/synth_access_traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/pat_trie_/update_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/priority_queue_base_dispatch.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/node.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/rb_tree_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rb_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/rc.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/rc_binomial_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/rc_binomial_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/cc_hash_max_collision_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/hash_exponential_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_size_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/hash_prime_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/hash_standard_resize_policy_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/sample_resize_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/sample_resize_trigger.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/resize_policy/sample_size_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/node.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/splay_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/splay_tree_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/splay_tree_/traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/standard_policies.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/thin_heap_.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/thin_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/tree_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/tree_policy/sample_tree_node_update.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/tree_trace_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/prefix_search_node_update_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/sample_trie_access_traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/sample_trie_node_update.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/trie_policy_base.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/trie_policy/trie_string_access_traits_imp.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/type_utils.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/types_traits.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/unordered_iterator/const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/unordered_iterator/iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/unordered_iterator/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/detail/unordered_iterator/point_iterator.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/exception.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/hash_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/list_update_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/priority_queue.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/tag_and_trait.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/tree_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pb_ds/trie_policy.hpp"
+  textual header "/usr/include/c++/7/ext/pod_char_traits.h"
+  textual header "/usr/include/c++/7/ext/pointer.h"
+  textual header "/usr/include/c++/7/ext/pool_allocator.h"
+  textual header "/usr/include/c++/7/ext/random"
+  textual header "/usr/include/c++/7/ext/random.tcc"
+  textual header "/usr/include/c++/7/ext/rb_tree"
+  textual header "/usr/include/c++/7/ext/rc_string_base.h"
+  textual header "/usr/include/c++/7/ext/rope"
+  textual header "/usr/include/c++/7/ext/ropeimpl.h"
+  textual header "/usr/include/c++/7/ext/slist"
+  textual header "/usr/include/c++/7/ext/sso_string_base.h"
+  textual header "/usr/include/c++/7/ext/stdio_filebuf.h"
+  textual header "/usr/include/c++/7/ext/stdio_sync_filebuf.h"
+  textual header "/usr/include/c++/7/ext/string_conversions.h"
+  textual header "/usr/include/c++/7/ext/throw_allocator.h"
+  textual header "/usr/include/c++/7/ext/type_traits.h"
+  textual header "/usr/include/c++/7/ext/typelist.h"
+  textual header "/usr/include/c++/7/ext/vstring.h"
+  textual header "/usr/include/c++/7/ext/vstring.tcc"
+  textual header "/usr/include/c++/7/ext/vstring_fwd.h"
+  textual header "/usr/include/c++/7/ext/vstring_util.h"
+  textual header "/usr/include/c++/7/fenv.h"
+  textual header "/usr/include/c++/7/forward_list"
+  textual header "/usr/include/c++/7/fstream"
+  textual header "/usr/include/c++/7/functional"
+  textual header "/usr/include/c++/7/future"
+  textual header "/usr/include/c++/7/initializer_list"
+  textual header "/usr/include/c++/7/iomanip"
+  textual header "/usr/include/c++/7/ios"
+  textual header "/usr/include/c++/7/iosfwd"
+  textual header "/usr/include/c++/7/iostream"
+  textual header "/usr/include/c++/7/istream"
+  textual header "/usr/include/c++/7/iterator"
+  textual header "/usr/include/c++/7/limits"
+  textual header "/usr/include/c++/7/list"
+  textual header "/usr/include/c++/7/locale"
+  textual header "/usr/include/c++/7/map"
+  textual header "/usr/include/c++/7/math.h"
+  textual header "/usr/include/c++/7/memory"
+  textual header "/usr/include/c++/7/mutex"
+  textual header "/usr/include/c++/7/new"
+  textual header "/usr/include/c++/7/numeric"
+  textual header "/usr/include/c++/7/optional"
+  textual header "/usr/include/c++/7/ostream"
+  textual header "/usr/include/c++/7/parallel/algo.h"
+  textual header "/usr/include/c++/7/parallel/algobase.h"
+  textual header "/usr/include/c++/7/parallel/algorithm"
+  textual header "/usr/include/c++/7/parallel/algorithmfwd.h"
+  textual header "/usr/include/c++/7/parallel/balanced_quicksort.h"
+  textual header "/usr/include/c++/7/parallel/base.h"
+  textual header "/usr/include/c++/7/parallel/basic_iterator.h"
+  textual header "/usr/include/c++/7/parallel/checkers.h"
+  textual header "/usr/include/c++/7/parallel/compatibility.h"
+  textual header "/usr/include/c++/7/parallel/compiletime_settings.h"
+  textual header "/usr/include/c++/7/parallel/equally_split.h"
+  textual header "/usr/include/c++/7/parallel/features.h"
+  textual header "/usr/include/c++/7/parallel/find.h"
+  textual header "/usr/include/c++/7/parallel/find_selectors.h"
+  textual header "/usr/include/c++/7/parallel/for_each.h"
+  textual header "/usr/include/c++/7/parallel/for_each_selectors.h"
+  textual header "/usr/include/c++/7/parallel/iterator.h"
+  textual header "/usr/include/c++/7/parallel/list_partition.h"
+  textual header "/usr/include/c++/7/parallel/losertree.h"
+  textual header "/usr/include/c++/7/parallel/merge.h"
+  textual header "/usr/include/c++/7/parallel/multiseq_selection.h"
+  textual header "/usr/include/c++/7/parallel/multiway_merge.h"
+  textual header "/usr/include/c++/7/parallel/multiway_mergesort.h"
+  textual header "/usr/include/c++/7/parallel/numeric"
+  textual header "/usr/include/c++/7/parallel/numericfwd.h"
+  textual header "/usr/include/c++/7/parallel/omp_loop.h"
+  textual header "/usr/include/c++/7/parallel/omp_loop_static.h"
+  textual header "/usr/include/c++/7/parallel/par_loop.h"
+  textual header "/usr/include/c++/7/parallel/parallel.h"
+  textual header "/usr/include/c++/7/parallel/partial_sum.h"
+  textual header "/usr/include/c++/7/parallel/partition.h"
+  textual header "/usr/include/c++/7/parallel/queue.h"
+  textual header "/usr/include/c++/7/parallel/quicksort.h"
+  textual header "/usr/include/c++/7/parallel/random_number.h"
+  textual header "/usr/include/c++/7/parallel/random_shuffle.h"
+  textual header "/usr/include/c++/7/parallel/search.h"
+  textual header "/usr/include/c++/7/parallel/set_operations.h"
+  textual header "/usr/include/c++/7/parallel/settings.h"
+  textual header "/usr/include/c++/7/parallel/sort.h"
+  textual header "/usr/include/c++/7/parallel/tags.h"
+  textual header "/usr/include/c++/7/parallel/types.h"
+  textual header "/usr/include/c++/7/parallel/unique_copy.h"
+  textual header "/usr/include/c++/7/parallel/workstealing.h"
+  textual header "/usr/include/c++/7/profile/array"
+  textual header "/usr/include/c++/7/profile/base.h"
+  textual header "/usr/include/c++/7/profile/bitset"
+  textual header "/usr/include/c++/7/profile/deque"
+  textual header "/usr/include/c++/7/profile/forward_list"
+  textual header "/usr/include/c++/7/profile/impl/profiler.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_algos.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_container_size.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_hash_func.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_hashtable_size.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_list_to_slist.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_list_to_vector.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_map_to_unordered_map.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_node.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_state.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_trace.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_vector_size.h"
+  textual header "/usr/include/c++/7/profile/impl/profiler_vector_to_list.h"
+  textual header "/usr/include/c++/7/profile/iterator_tracker.h"
+  textual header "/usr/include/c++/7/profile/list"
+  textual header "/usr/include/c++/7/profile/map"
+  textual header "/usr/include/c++/7/profile/map.h"
+  textual header "/usr/include/c++/7/profile/multimap.h"
+  textual header "/usr/include/c++/7/profile/multiset.h"
+  textual header "/usr/include/c++/7/profile/ordered_base.h"
+  textual header "/usr/include/c++/7/profile/set"
+  textual header "/usr/include/c++/7/profile/set.h"
+  textual header "/usr/include/c++/7/profile/unordered_base.h"
+  textual header "/usr/include/c++/7/profile/unordered_map"
+  textual header "/usr/include/c++/7/profile/unordered_set"
+  textual header "/usr/include/c++/7/profile/vector"
+  textual header "/usr/include/c++/7/queue"
+  textual header "/usr/include/c++/7/random"
+  textual header "/usr/include/c++/7/ratio"
+  textual header "/usr/include/c++/7/regex"
+  textual header "/usr/include/c++/7/scoped_allocator"
+  textual header "/usr/include/c++/7/set"
+  textual header "/usr/include/c++/7/shared_mutex"
+  textual header "/usr/include/c++/7/sstream"
+  textual header "/usr/include/c++/7/stack"
+  textual header "/usr/include/c++/7/stdexcept"
+  textual header "/usr/include/c++/7/stdlib.h"
+  textual header "/usr/include/c++/7/streambuf"
+  textual header "/usr/include/c++/7/string"
+  textual header "/usr/include/c++/7/string_view"
+  textual header "/usr/include/c++/7/system_error"
+  textual header "/usr/include/c++/7/tgmath.h"
+  textual header "/usr/include/c++/7/thread"
+  textual header "/usr/include/c++/7/tr1/array"
+  textual header "/usr/include/c++/7/tr1/bessel_function.tcc"
+  textual header "/usr/include/c++/7/tr1/beta_function.tcc"
+  textual header "/usr/include/c++/7/tr1/ccomplex"
+  textual header "/usr/include/c++/7/tr1/cctype"
+  textual header "/usr/include/c++/7/tr1/cfenv"
+  textual header "/usr/include/c++/7/tr1/cfloat"
+  textual header "/usr/include/c++/7/tr1/cinttypes"
+  textual header "/usr/include/c++/7/tr1/climits"
+  textual header "/usr/include/c++/7/tr1/cmath"
+  textual header "/usr/include/c++/7/tr1/complex"
+  textual header "/usr/include/c++/7/tr1/complex.h"
+  textual header "/usr/include/c++/7/tr1/cstdarg"
+  textual header "/usr/include/c++/7/tr1/cstdbool"
+  textual header "/usr/include/c++/7/tr1/cstdint"
+  textual header "/usr/include/c++/7/tr1/cstdio"
+  textual header "/usr/include/c++/7/tr1/cstdlib"
+  textual header "/usr/include/c++/7/tr1/ctgmath"
+  textual header "/usr/include/c++/7/tr1/ctime"
+  textual header "/usr/include/c++/7/tr1/ctype.h"
+  textual header "/usr/include/c++/7/tr1/cwchar"
+  textual header "/usr/include/c++/7/tr1/cwctype"
+  textual header "/usr/include/c++/7/tr1/ell_integral.tcc"
+  textual header "/usr/include/c++/7/tr1/exp_integral.tcc"
+  textual header "/usr/include/c++/7/tr1/fenv.h"
+  textual header "/usr/include/c++/7/tr1/float.h"
+  textual header "/usr/include/c++/7/tr1/functional"
+  textual header "/usr/include/c++/7/tr1/functional_hash.h"
+  textual header "/usr/include/c++/7/tr1/gamma.tcc"
+  textual header "/usr/include/c++/7/tr1/hashtable.h"
+  textual header "/usr/include/c++/7/tr1/hashtable_policy.h"
+  textual header "/usr/include/c++/7/tr1/hypergeometric.tcc"
+  textual header "/usr/include/c++/7/tr1/inttypes.h"
+  textual header "/usr/include/c++/7/tr1/legendre_function.tcc"
+  textual header "/usr/include/c++/7/tr1/limits.h"
+  textual header "/usr/include/c++/7/tr1/math.h"
+  textual header "/usr/include/c++/7/tr1/memory"
+  textual header "/usr/include/c++/7/tr1/modified_bessel_func.tcc"
+  textual header "/usr/include/c++/7/tr1/poly_hermite.tcc"
+  textual header "/usr/include/c++/7/tr1/poly_laguerre.tcc"
+  textual header "/usr/include/c++/7/tr1/random"
+  textual header "/usr/include/c++/7/tr1/random.h"
+  textual header "/usr/include/c++/7/tr1/random.tcc"
+  textual header "/usr/include/c++/7/tr1/regex"
+  textual header "/usr/include/c++/7/tr1/riemann_zeta.tcc"
+  textual header "/usr/include/c++/7/tr1/shared_ptr.h"
+  textual header "/usr/include/c++/7/tr1/special_function_util.h"
+  textual header "/usr/include/c++/7/tr1/stdarg.h"
+  textual header "/usr/include/c++/7/tr1/stdbool.h"
+  textual header "/usr/include/c++/7/tr1/stdint.h"
+  textual header "/usr/include/c++/7/tr1/stdio.h"
+  textual header "/usr/include/c++/7/tr1/stdlib.h"
+  textual header "/usr/include/c++/7/tr1/tgmath.h"
+  textual header "/usr/include/c++/7/tr1/tuple"
+  textual header "/usr/include/c++/7/tr1/type_traits"
+  textual header "/usr/include/c++/7/tr1/unordered_map"
+  textual header "/usr/include/c++/7/tr1/unordered_map.h"
+  textual header "/usr/include/c++/7/tr1/unordered_set"
+  textual header "/usr/include/c++/7/tr1/unordered_set.h"
+  textual header "/usr/include/c++/7/tr1/utility"
+  textual header "/usr/include/c++/7/tr1/wchar.h"
+  textual header "/usr/include/c++/7/tr1/wctype.h"
+  textual header "/usr/include/c++/7/tr2/bool_set"
+  textual header "/usr/include/c++/7/tr2/bool_set.tcc"
+  textual header "/usr/include/c++/7/tr2/dynamic_bitset"
+  textual header "/usr/include/c++/7/tr2/dynamic_bitset.tcc"
+  textual header "/usr/include/c++/7/tr2/ratio"
+  textual header "/usr/include/c++/7/tr2/type_traits"
+  textual header "/usr/include/c++/7/tuple"
+  textual header "/usr/include/c++/7/type_traits"
+  textual header "/usr/include/c++/7/typeindex"
+  textual header "/usr/include/c++/7/typeinfo"
+  textual header "/usr/include/c++/7/unordered_map"
+  textual header "/usr/include/c++/7/unordered_set"
+  textual header "/usr/include/c++/7/utility"
+  textual header "/usr/include/c++/7/valarray"
+  textual header "/usr/include/c++/7/variant"
+  textual header "/usr/include/c++/7/vector"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_builtin_vars.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_cmath.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_complex_builtins.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_intrinsics.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_math_forward_declares.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_runtime_wrapper.h"
+  textual header "/usr/include/clang/6.0.0/include/__stddef_max_align_t.h"
+  textual header "/usr/include/clang/6.0.0/include/__wmmintrin_aes.h"
+  textual header "/usr/include/clang/6.0.0/include/__wmmintrin_pclmul.h"
+  textual header "/usr/include/clang/6.0.0/include/adxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/altivec.h"
+  textual header "/usr/include/clang/6.0.0/include/ammintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/arm64intr.h"
+  textual header "/usr/include/clang/6.0.0/include/arm_acle.h"
+  textual header "/usr/include/clang/6.0.0/include/arm_neon.h"
+  textual header "/usr/include/clang/6.0.0/include/armintr.h"
+  textual header "/usr/include/clang/6.0.0/include/avx2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512bitalgintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512bwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512cdintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512dqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512erintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512fintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512ifmaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512ifmavlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512pfintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmiintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmivlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlbitalgintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlbwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlcdintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vldqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlvbmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlvnniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vnniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vpopcntdqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vpopcntdqvlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/bmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/bmiintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/cetintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clflushoptintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clwbintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clzerointrin.h"
+  textual header "/usr/include/clang/6.0.0/include/cpuid.h"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/algorithm"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/complex"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/new"
+  textual header "/usr/include/clang/6.0.0/include/emmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/f16cintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/float.h"
+  textual header "/usr/include/clang/6.0.0/include/fma4intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/fmaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/fxsrintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/gfniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/htmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/htmxlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/ia32intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/immintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/inttypes.h"
+  textual header "/usr/include/clang/6.0.0/include/iso646.h"
+  textual header "/usr/include/clang/6.0.0/include/limits.h"
+  textual header "/usr/include/clang/6.0.0/include/lwpintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/lzcntintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/mm3dnow.h"
+  textual header "/usr/include/clang/6.0.0/include/mm_malloc.h"
+  textual header "/usr/include/clang/6.0.0/include/mmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/module.modulemap"
+  textual header "/usr/include/clang/6.0.0/include/msa.h"
+  textual header "/usr/include/clang/6.0.0/include/mwaitxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/nmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/opencl-c.h"
+  textual header "/usr/include/clang/6.0.0/include/pkuintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/pmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/popcntintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/prfchwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/rdseedintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/rtmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/s390intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/allocator_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/asan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/common_interface_defs.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/coverage_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/dfsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/esan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/hwasan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/linux_syscall_hooks.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/lsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/msan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/scudo_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/tsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/tsan_interface_atomic.h"
+  textual header "/usr/include/clang/6.0.0/include/shaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/smmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/stdalign.h"
+  textual header "/usr/include/clang/6.0.0/include/stdarg.h"
+  textual header "/usr/include/clang/6.0.0/include/stdatomic.h"
+  textual header "/usr/include/clang/6.0.0/include/stdbool.h"
+  textual header "/usr/include/clang/6.0.0/include/stddef.h"
+  textual header "/usr/include/clang/6.0.0/include/stdint.h"
+  textual header "/usr/include/clang/6.0.0/include/stdnoreturn.h"
+  textual header "/usr/include/clang/6.0.0/include/tbmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/tgmath.h"
+  textual header "/usr/include/clang/6.0.0/include/tmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/unwind.h"
+  textual header "/usr/include/clang/6.0.0/include/vadefs.h"
+  textual header "/usr/include/clang/6.0.0/include/vaesintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/varargs.h"
+  textual header "/usr/include/clang/6.0.0/include/vecintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/vpclmulqdqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/wmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/x86intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xopintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xray/xray_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/xray/xray_log_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/xsavecintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsaveintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsaveoptintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsavesintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xtestintrin.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_builtin_vars.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_cmath.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_complex_builtins.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_intrinsics.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_math_forward_declares.h"
+  textual header "/usr/include/clang/6.0/include/__clang_cuda_runtime_wrapper.h"
+  textual header "/usr/include/clang/6.0/include/__stddef_max_align_t.h"
+  textual header "/usr/include/clang/6.0/include/__wmmintrin_aes.h"
+  textual header "/usr/include/clang/6.0/include/__wmmintrin_pclmul.h"
+  textual header "/usr/include/clang/6.0/include/adxintrin.h"
+  textual header "/usr/include/clang/6.0/include/altivec.h"
+  textual header "/usr/include/clang/6.0/include/ammintrin.h"
+  textual header "/usr/include/clang/6.0/include/arm64intr.h"
+  textual header "/usr/include/clang/6.0/include/arm_acle.h"
+  textual header "/usr/include/clang/6.0/include/arm_neon.h"
+  textual header "/usr/include/clang/6.0/include/armintr.h"
+  textual header "/usr/include/clang/6.0/include/avx2intrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512bitalgintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512bwintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512cdintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512dqintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512erintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512fintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512ifmaintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512ifmavlintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512pfintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vbmi2intrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vbmiintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vbmivlintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlbitalgintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlbwintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlcdintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vldqintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlvbmi2intrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vlvnniintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vnniintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vpopcntdqintrin.h"
+  textual header "/usr/include/clang/6.0/include/avx512vpopcntdqvlintrin.h"
+  textual header "/usr/include/clang/6.0/include/avxintrin.h"
+  textual header "/usr/include/clang/6.0/include/bmi2intrin.h"
+  textual header "/usr/include/clang/6.0/include/bmiintrin.h"
+  textual header "/usr/include/clang/6.0/include/cetintrin.h"
+  textual header "/usr/include/clang/6.0/include/clflushoptintrin.h"
+  textual header "/usr/include/clang/6.0/include/clwbintrin.h"
+  textual header "/usr/include/clang/6.0/include/clzerointrin.h"
+  textual header "/usr/include/clang/6.0/include/cpuid.h"
+  textual header "/usr/include/clang/6.0/include/cuda_wrappers/algorithm"
+  textual header "/usr/include/clang/6.0/include/cuda_wrappers/complex"
+  textual header "/usr/include/clang/6.0/include/cuda_wrappers/new"
+  textual header "/usr/include/clang/6.0/include/emmintrin.h"
+  textual header "/usr/include/clang/6.0/include/f16cintrin.h"
+  textual header "/usr/include/clang/6.0/include/float.h"
+  textual header "/usr/include/clang/6.0/include/fma4intrin.h"
+  textual header "/usr/include/clang/6.0/include/fmaintrin.h"
+  textual header "/usr/include/clang/6.0/include/fxsrintrin.h"
+  textual header "/usr/include/clang/6.0/include/gfniintrin.h"
+  textual header "/usr/include/clang/6.0/include/htmintrin.h"
+  textual header "/usr/include/clang/6.0/include/htmxlintrin.h"
+  textual header "/usr/include/clang/6.0/include/ia32intrin.h"
+  textual header "/usr/include/clang/6.0/include/immintrin.h"
+  textual header "/usr/include/clang/6.0/include/intrin.h"
+  textual header "/usr/include/clang/6.0/include/inttypes.h"
+  textual header "/usr/include/clang/6.0/include/iso646.h"
+  textual header "/usr/include/clang/6.0/include/limits.h"
+  textual header "/usr/include/clang/6.0/include/lwpintrin.h"
+  textual header "/usr/include/clang/6.0/include/lzcntintrin.h"
+  textual header "/usr/include/clang/6.0/include/mm3dnow.h"
+  textual header "/usr/include/clang/6.0/include/mm_malloc.h"
+  textual header "/usr/include/clang/6.0/include/mmintrin.h"
+  textual header "/usr/include/clang/6.0/include/module.modulemap"
+  textual header "/usr/include/clang/6.0/include/msa.h"
+  textual header "/usr/include/clang/6.0/include/mwaitxintrin.h"
+  textual header "/usr/include/clang/6.0/include/nmmintrin.h"
+  textual header "/usr/include/clang/6.0/include/opencl-c.h"
+  textual header "/usr/include/clang/6.0/include/pkuintrin.h"
+  textual header "/usr/include/clang/6.0/include/pmmintrin.h"
+  textual header "/usr/include/clang/6.0/include/popcntintrin.h"
+  textual header "/usr/include/clang/6.0/include/prfchwintrin.h"
+  textual header "/usr/include/clang/6.0/include/rdseedintrin.h"
+  textual header "/usr/include/clang/6.0/include/rtmintrin.h"
+  textual header "/usr/include/clang/6.0/include/s390intrin.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/allocator_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/asan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/common_interface_defs.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/coverage_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/dfsan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/esan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/hwasan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/linux_syscall_hooks.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/lsan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/msan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/scudo_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/tsan_interface.h"
+  textual header "/usr/include/clang/6.0/include/sanitizer/tsan_interface_atomic.h"
+  textual header "/usr/include/clang/6.0/include/shaintrin.h"
+  textual header "/usr/include/clang/6.0/include/smmintrin.h"
+  textual header "/usr/include/clang/6.0/include/stdalign.h"
+  textual header "/usr/include/clang/6.0/include/stdarg.h"
+  textual header "/usr/include/clang/6.0/include/stdatomic.h"
+  textual header "/usr/include/clang/6.0/include/stdbool.h"
+  textual header "/usr/include/clang/6.0/include/stddef.h"
+  textual header "/usr/include/clang/6.0/include/stdint.h"
+  textual header "/usr/include/clang/6.0/include/stdnoreturn.h"
+  textual header "/usr/include/clang/6.0/include/tbmintrin.h"
+  textual header "/usr/include/clang/6.0/include/tgmath.h"
+  textual header "/usr/include/clang/6.0/include/tmmintrin.h"
+  textual header "/usr/include/clang/6.0/include/unwind.h"
+  textual header "/usr/include/clang/6.0/include/vadefs.h"
+  textual header "/usr/include/clang/6.0/include/vaesintrin.h"
+  textual header "/usr/include/clang/6.0/include/varargs.h"
+  textual header "/usr/include/clang/6.0/include/vecintrin.h"
+  textual header "/usr/include/clang/6.0/include/vpclmulqdqintrin.h"
+  textual header "/usr/include/clang/6.0/include/wmmintrin.h"
+  textual header "/usr/include/clang/6.0/include/x86intrin.h"
+  textual header "/usr/include/clang/6.0/include/xmmintrin.h"
+  textual header "/usr/include/clang/6.0/include/xopintrin.h"
+  textual header "/usr/include/clang/6.0/include/xray/xray_interface.h"
+  textual header "/usr/include/clang/6.0/include/xray/xray_log_interface.h"
+  textual header "/usr/include/clang/6.0/include/xsavecintrin.h"
+  textual header "/usr/include/clang/6.0/include/xsaveintrin.h"
+  textual header "/usr/include/clang/6.0/include/xsaveoptintrin.h"
+  textual header "/usr/include/clang/6.0/include/xsavesintrin.h"
+  textual header "/usr/include/clang/6.0/include/xtestintrin.h"
+  textual header "/usr/include/complex.h"
+  textual header "/usr/include/cpio.h"
+  textual header "/usr/include/crypt.h"
+  textual header "/usr/include/ctype.h"
+  textual header "/usr/include/dirent.h"
+  textual header "/usr/include/dlfcn.h"
+  textual header "/usr/include/drm/amdgpu_drm.h"
+  textual header "/usr/include/drm/armada_drm.h"
+  textual header "/usr/include/drm/drm.h"
+  textual header "/usr/include/drm/drm_fourcc.h"
+  textual header "/usr/include/drm/drm_mode.h"
+  textual header "/usr/include/drm/drm_sarea.h"
+  textual header "/usr/include/drm/etnaviv_drm.h"
+  textual header "/usr/include/drm/exynos_drm.h"
+  textual header "/usr/include/drm/i810_drm.h"
+  textual header "/usr/include/drm/i915_drm.h"
+  textual header "/usr/include/drm/mga_drm.h"
+  textual header "/usr/include/drm/msm_drm.h"
+  textual header "/usr/include/drm/nouveau_drm.h"
+  textual header "/usr/include/drm/omap_drm.h"
+  textual header "/usr/include/drm/qxl_drm.h"
+  textual header "/usr/include/drm/r128_drm.h"
+  textual header "/usr/include/drm/radeon_drm.h"
+  textual header "/usr/include/drm/savage_drm.h"
+  textual header "/usr/include/drm/sis_drm.h"
+  textual header "/usr/include/drm/tegra_drm.h"
+  textual header "/usr/include/drm/vc4_drm.h"
+  textual header "/usr/include/drm/vgem_drm.h"
+  textual header "/usr/include/drm/via_drm.h"
+  textual header "/usr/include/drm/virtgpu_drm.h"
+  textual header "/usr/include/drm/vmwgfx_drm.h"
+  textual header "/usr/include/elf.h"
+  textual header "/usr/include/endian.h"
+  textual header "/usr/include/envz.h"
+  textual header "/usr/include/err.h"
+  textual header "/usr/include/errno.h"
+  textual header "/usr/include/error.h"
+  textual header "/usr/include/execinfo.h"
+  textual header "/usr/include/fcntl.h"
+  textual header "/usr/include/features.h"
+  textual header "/usr/include/fenv.h"
+  textual header "/usr/include/fmtmsg.h"
+  textual header "/usr/include/fnmatch.h"
+  textual header "/usr/include/fstab.h"
+  textual header "/usr/include/fts.h"
+  textual header "/usr/include/ftw.h"
+  textual header "/usr/include/gconv.h"
+  textual header "/usr/include/getopt.h"
+  textual header "/usr/include/glob.h"
+  textual header "/usr/include/gnu-versions.h"
+  textual header "/usr/include/grp.h"
+  textual header "/usr/include/gshadow.h"
+  textual header "/usr/include/iconv.h"
+  textual header "/usr/include/ifaddrs.h"
+  textual header "/usr/include/inttypes.h"
+  textual header "/usr/include/langinfo.h"
+  textual header "/usr/include/lastlog.h"
+  textual header "/usr/include/libgen.h"
+  textual header "/usr/include/libintl.h"
+  textual header "/usr/include/libio.h"
+  textual header "/usr/include/limits.h"
+  textual header "/usr/include/link.h"
+  textual header "/usr/include/linux/a.out.h"
+  textual header "/usr/include/linux/acct.h"
+  textual header "/usr/include/linux/adb.h"
+  textual header "/usr/include/linux/adfs_fs.h"
+  textual header "/usr/include/linux/affs_hardblocks.h"
+  textual header "/usr/include/linux/agpgart.h"
+  textual header "/usr/include/linux/aio_abi.h"
+  textual header "/usr/include/linux/am437x-vpfe.h"
+  textual header "/usr/include/linux/android/binder.h"
+  textual header "/usr/include/linux/apm_bios.h"
+  textual header "/usr/include/linux/arcfb.h"
+  textual header "/usr/include/linux/arm_sdei.h"
+  textual header "/usr/include/linux/aspeed-lpc-ctrl.h"
+  textual header "/usr/include/linux/atalk.h"
+  textual header "/usr/include/linux/atm.h"
+  textual header "/usr/include/linux/atm_eni.h"
+  textual header "/usr/include/linux/atm_he.h"
+  textual header "/usr/include/linux/atm_idt77105.h"
+  textual header "/usr/include/linux/atm_nicstar.h"
+  textual header "/usr/include/linux/atm_tcp.h"
+  textual header "/usr/include/linux/atm_zatm.h"
+  textual header "/usr/include/linux/atmapi.h"
+  textual header "/usr/include/linux/atmarp.h"
+  textual header "/usr/include/linux/atmbr2684.h"
+  textual header "/usr/include/linux/atmclip.h"
+  textual header "/usr/include/linux/atmdev.h"
+  textual header "/usr/include/linux/atmioc.h"
+  textual header "/usr/include/linux/atmlec.h"
+  textual header "/usr/include/linux/atmmpc.h"
+  textual header "/usr/include/linux/atmppp.h"
+  textual header "/usr/include/linux/atmsap.h"
+  textual header "/usr/include/linux/atmsvc.h"
+  textual header "/usr/include/linux/audit.h"
+  textual header "/usr/include/linux/aufs_type.h"
+  textual header "/usr/include/linux/auto_dev-ioctl.h"
+  textual header "/usr/include/linux/auto_fs.h"
+  textual header "/usr/include/linux/auto_fs4.h"
+  textual header "/usr/include/linux/auxvec.h"
+  textual header "/usr/include/linux/ax25.h"
+  textual header "/usr/include/linux/b1lli.h"
+  textual header "/usr/include/linux/batman_adv.h"
+  textual header "/usr/include/linux/baycom.h"
+  textual header "/usr/include/linux/bcache.h"
+  textual header "/usr/include/linux/bcm933xx_hcs.h"
+  textual header "/usr/include/linux/bfs_fs.h"
+  textual header "/usr/include/linux/binfmts.h"
+  textual header "/usr/include/linux/blkpg.h"
+  textual header "/usr/include/linux/blktrace_api.h"
+  textual header "/usr/include/linux/blkzoned.h"
+  textual header "/usr/include/linux/bpf.h"
+  textual header "/usr/include/linux/bpf_common.h"
+  textual header "/usr/include/linux/bpf_perf_event.h"
+  textual header "/usr/include/linux/bpqether.h"
+  textual header "/usr/include/linux/bsg.h"
+  textual header "/usr/include/linux/bt-bmc.h"
+  textual header "/usr/include/linux/btrfs.h"
+  textual header "/usr/include/linux/btrfs_tree.h"
+  textual header "/usr/include/linux/byteorder/big_endian.h"
+  textual header "/usr/include/linux/byteorder/little_endian.h"
+  textual header "/usr/include/linux/caif/caif_socket.h"
+  textual header "/usr/include/linux/caif/if_caif.h"
+  textual header "/usr/include/linux/can.h"
+  textual header "/usr/include/linux/can/bcm.h"
+  textual header "/usr/include/linux/can/error.h"
+  textual header "/usr/include/linux/can/gw.h"
+  textual header "/usr/include/linux/can/netlink.h"
+  textual header "/usr/include/linux/can/raw.h"
+  textual header "/usr/include/linux/can/vxcan.h"
+  textual header "/usr/include/linux/capability.h"
+  textual header "/usr/include/linux/capi.h"
+  textual header "/usr/include/linux/cciss_defs.h"
+  textual header "/usr/include/linux/cciss_ioctl.h"
+  textual header "/usr/include/linux/cdrom.h"
+  textual header "/usr/include/linux/cec-funcs.h"
+  textual header "/usr/include/linux/cec.h"
+  textual header "/usr/include/linux/cgroupstats.h"
+  textual header "/usr/include/linux/chio.h"
+  textual header "/usr/include/linux/cifs/cifs_mount.h"
+  textual header "/usr/include/linux/cm4000_cs.h"
+  textual header "/usr/include/linux/cn_proc.h"
+  textual header "/usr/include/linux/coda.h"
+  textual header "/usr/include/linux/coda_psdev.h"
+  textual header "/usr/include/linux/coff.h"
+  textual header "/usr/include/linux/connector.h"
+  textual header "/usr/include/linux/const.h"
+  textual header "/usr/include/linux/coresight-stm.h"
+  textual header "/usr/include/linux/cramfs_fs.h"
+  textual header "/usr/include/linux/cryptouser.h"
+  textual header "/usr/include/linux/cuda.h"
+  textual header "/usr/include/linux/cyclades.h"
+  textual header "/usr/include/linux/cycx_cfm.h"
+  textual header "/usr/include/linux/dcbnl.h"
+  textual header "/usr/include/linux/dccp.h"
+  textual header "/usr/include/linux/devlink.h"
+  textual header "/usr/include/linux/dlm.h"
+  textual header "/usr/include/linux/dlm_device.h"
+  textual header "/usr/include/linux/dlm_netlink.h"
+  textual header "/usr/include/linux/dlm_plock.h"
+  textual header "/usr/include/linux/dlmconstants.h"
+  textual header "/usr/include/linux/dm-ioctl.h"
+  textual header "/usr/include/linux/dm-log-userspace.h"
+  textual header "/usr/include/linux/dma-buf.h"
+  textual header "/usr/include/linux/dn.h"
+  textual header "/usr/include/linux/dqblk_xfs.h"
+  textual header "/usr/include/linux/dvb/audio.h"
+  textual header "/usr/include/linux/dvb/ca.h"
+  textual header "/usr/include/linux/dvb/dmx.h"
+  textual header "/usr/include/linux/dvb/frontend.h"
+  textual header "/usr/include/linux/dvb/net.h"
+  textual header "/usr/include/linux/dvb/osd.h"
+  textual header "/usr/include/linux/dvb/version.h"
+  textual header "/usr/include/linux/dvb/video.h"
+  textual header "/usr/include/linux/edd.h"
+  textual header "/usr/include/linux/efs_fs_sb.h"
+  textual header "/usr/include/linux/elf-em.h"
+  textual header "/usr/include/linux/elf-fdpic.h"
+  textual header "/usr/include/linux/elf.h"
+  textual header "/usr/include/linux/elfcore.h"
+  textual header "/usr/include/linux/errno.h"
+  textual header "/usr/include/linux/errqueue.h"
+  textual header "/usr/include/linux/ethtool.h"
+  textual header "/usr/include/linux/eventpoll.h"
+  textual header "/usr/include/linux/fadvise.h"
+  textual header "/usr/include/linux/falloc.h"
+  textual header "/usr/include/linux/fanotify.h"
+  textual header "/usr/include/linux/fb.h"
+  textual header "/usr/include/linux/fcntl.h"
+  textual header "/usr/include/linux/fd.h"
+  textual header "/usr/include/linux/fdreg.h"
+  textual header "/usr/include/linux/fib_rules.h"
+  textual header "/usr/include/linux/fiemap.h"
+  textual header "/usr/include/linux/filter.h"
+  textual header "/usr/include/linux/firewire-cdev.h"
+  textual header "/usr/include/linux/firewire-constants.h"
+  textual header "/usr/include/linux/flat.h"
+  textual header "/usr/include/linux/fou.h"
+  textual header "/usr/include/linux/fs.h"
+  textual header "/usr/include/linux/fsl_hypervisor.h"
+  textual header "/usr/include/linux/fsmap.h"
+  textual header "/usr/include/linux/fuse.h"
+  textual header "/usr/include/linux/futex.h"
+  textual header "/usr/include/linux/gameport.h"
+  textual header "/usr/include/linux/gen_stats.h"
+  textual header "/usr/include/linux/genetlink.h"
+  textual header "/usr/include/linux/genwqe/genwqe_card.h"
+  textual header "/usr/include/linux/gfs2_ondisk.h"
+  textual header "/usr/include/linux/gigaset_dev.h"
+  textual header "/usr/include/linux/gpio.h"
+  textual header "/usr/include/linux/gsmmux.h"
+  textual header "/usr/include/linux/gtp.h"
+  textual header "/usr/include/linux/hash_info.h"
+  textual header "/usr/include/linux/hdlc.h"
+  textual header "/usr/include/linux/hdlc/ioctl.h"
+  textual header "/usr/include/linux/hdlcdrv.h"
+  textual header "/usr/include/linux/hdreg.h"
+  textual header "/usr/include/linux/hid.h"
+  textual header "/usr/include/linux/hiddev.h"
+  textual header "/usr/include/linux/hidraw.h"
+  textual header "/usr/include/linux/hpet.h"
+  textual header "/usr/include/linux/hsi/cs-protocol.h"
+  textual header "/usr/include/linux/hsi/hsi_char.h"
+  textual header "/usr/include/linux/hsr_netlink.h"
+  textual header "/usr/include/linux/hw_breakpoint.h"
+  textual header "/usr/include/linux/hyperv.h"
+  textual header "/usr/include/linux/hysdn_if.h"
+  textual header "/usr/include/linux/i2c-dev.h"
+  textual header "/usr/include/linux/i2c.h"
+  textual header "/usr/include/linux/i2o-dev.h"
+  textual header "/usr/include/linux/i8k.h"
+  textual header "/usr/include/linux/icmp.h"
+  textual header "/usr/include/linux/icmpv6.h"
+  textual header "/usr/include/linux/if.h"
+  textual header "/usr/include/linux/if_addr.h"
+  textual header "/usr/include/linux/if_addrlabel.h"
+  textual header "/usr/include/linux/if_alg.h"
+  textual header "/usr/include/linux/if_arcnet.h"
+  textual header "/usr/include/linux/if_arp.h"
+  textual header "/usr/include/linux/if_bonding.h"
+  textual header "/usr/include/linux/if_bridge.h"
+  textual header "/usr/include/linux/if_cablemodem.h"
+  textual header "/usr/include/linux/if_eql.h"
+  textual header "/usr/include/linux/if_ether.h"
+  textual header "/usr/include/linux/if_fc.h"
+  textual header "/usr/include/linux/if_fddi.h"
+  textual header "/usr/include/linux/if_frad.h"
+  textual header "/usr/include/linux/if_hippi.h"
+  textual header "/usr/include/linux/if_infiniband.h"
+  textual header "/usr/include/linux/if_link.h"
+  textual header "/usr/include/linux/if_ltalk.h"
+  textual header "/usr/include/linux/if_macsec.h"
+  textual header "/usr/include/linux/if_packet.h"
+  textual header "/usr/include/linux/if_phonet.h"
+  textual header "/usr/include/linux/if_plip.h"
+  textual header "/usr/include/linux/if_ppp.h"
+  textual header "/usr/include/linux/if_pppol2tp.h"
+  textual header "/usr/include/linux/if_pppox.h"
+  textual header "/usr/include/linux/if_slip.h"
+  textual header "/usr/include/linux/if_team.h"
+  textual header "/usr/include/linux/if_tun.h"
+  textual header "/usr/include/linux/if_tunnel.h"
+  textual header "/usr/include/linux/if_vlan.h"
+  textual header "/usr/include/linux/if_x25.h"
+  textual header "/usr/include/linux/ife.h"
+  textual header "/usr/include/linux/igmp.h"
+  textual header "/usr/include/linux/iio/events.h"
+  textual header "/usr/include/linux/iio/types.h"
+  textual header "/usr/include/linux/ila.h"
+  textual header "/usr/include/linux/in.h"
+  textual header "/usr/include/linux/in6.h"
+  textual header "/usr/include/linux/in_route.h"
+  textual header "/usr/include/linux/inet_diag.h"
+  textual header "/usr/include/linux/inotify.h"
+  textual header "/usr/include/linux/input-event-codes.h"
+  textual header "/usr/include/linux/input.h"
+  textual header "/usr/include/linux/ioctl.h"
+  textual header "/usr/include/linux/ip.h"
+  textual header "/usr/include/linux/ip6_tunnel.h"
+  textual header "/usr/include/linux/ip_vs.h"
+  textual header "/usr/include/linux/ipc.h"
+  textual header "/usr/include/linux/ipmi.h"
+  textual header "/usr/include/linux/ipmi_msgdefs.h"
+  textual header "/usr/include/linux/ipsec.h"
+  textual header "/usr/include/linux/ipv6.h"
+  textual header "/usr/include/linux/ipv6_route.h"
+  textual header "/usr/include/linux/ipx.h"
+  textual header "/usr/include/linux/irda.h"
+  textual header "/usr/include/linux/irqnr.h"
+  textual header "/usr/include/linux/isdn.h"
+  textual header "/usr/include/linux/isdn/capicmd.h"
+  textual header "/usr/include/linux/isdn_divertif.h"
+  textual header "/usr/include/linux/isdn_ppp.h"
+  textual header "/usr/include/linux/isdnif.h"
+  textual header "/usr/include/linux/iso_fs.h"
+  textual header "/usr/include/linux/ivtv.h"
+  textual header "/usr/include/linux/ivtvfb.h"
+  textual header "/usr/include/linux/ixjuser.h"
+  textual header "/usr/include/linux/jffs2.h"
+  textual header "/usr/include/linux/joystick.h"
+  textual header "/usr/include/linux/kcm.h"
+  textual header "/usr/include/linux/kcmp.h"
+  textual header "/usr/include/linux/kcov.h"
+  textual header "/usr/include/linux/kd.h"
+  textual header "/usr/include/linux/kdev_t.h"
+  textual header "/usr/include/linux/kernel-page-flags.h"
+  textual header "/usr/include/linux/kernel.h"
+  textual header "/usr/include/linux/kernelcapi.h"
+  textual header "/usr/include/linux/kexec.h"
+  textual header "/usr/include/linux/keyboard.h"
+  textual header "/usr/include/linux/keyctl.h"
+  textual header "/usr/include/linux/kfd_ioctl.h"
+  textual header "/usr/include/linux/kvm.h"
+  textual header "/usr/include/linux/kvm_para.h"
+  textual header "/usr/include/linux/l2tp.h"
+  textual header "/usr/include/linux/libc-compat.h"
+  textual header "/usr/include/linux/lightnvm.h"
+  textual header "/usr/include/linux/limits.h"
+  textual header "/usr/include/linux/lirc.h"
+  textual header "/usr/include/linux/llc.h"
+  textual header "/usr/include/linux/loop.h"
+  textual header "/usr/include/linux/lp.h"
+  textual header "/usr/include/linux/lwtunnel.h"
+  textual header "/usr/include/linux/magic.h"
+  textual header "/usr/include/linux/major.h"
+  textual header "/usr/include/linux/map_to_7segment.h"
+  textual header "/usr/include/linux/matroxfb.h"
+  textual header "/usr/include/linux/max2175.h"
+  textual header "/usr/include/linux/mdio.h"
+  textual header "/usr/include/linux/media-bus-format.h"
+  textual header "/usr/include/linux/media.h"
+  textual header "/usr/include/linux/mei.h"
+  textual header "/usr/include/linux/membarrier.h"
+  textual header "/usr/include/linux/memfd.h"
+  textual header "/usr/include/linux/mempolicy.h"
+  textual header "/usr/include/linux/meye.h"
+  textual header "/usr/include/linux/mic_common.h"
+  textual header "/usr/include/linux/mic_ioctl.h"
+  textual header "/usr/include/linux/mii.h"
+  textual header "/usr/include/linux/minix_fs.h"
+  textual header "/usr/include/linux/mman.h"
+  textual header "/usr/include/linux/mmc/ioctl.h"
+  textual header "/usr/include/linux/mmtimer.h"
+  textual header "/usr/include/linux/module.h"
+  textual header "/usr/include/linux/mpls.h"
+  textual header "/usr/include/linux/mpls_iptunnel.h"
+  textual header "/usr/include/linux/mqueue.h"
+  textual header "/usr/include/linux/mroute.h"
+  textual header "/usr/include/linux/mroute6.h"
+  textual header "/usr/include/linux/msdos_fs.h"
+  textual header "/usr/include/linux/msg.h"
+  textual header "/usr/include/linux/mtio.h"
+  textual header "/usr/include/linux/n_r3964.h"
+  textual header "/usr/include/linux/nbd-netlink.h"
+  textual header "/usr/include/linux/nbd.h"
+  textual header "/usr/include/linux/ncp.h"
+  textual header "/usr/include/linux/ncp_fs.h"
+  textual header "/usr/include/linux/ncp_mount.h"
+  textual header "/usr/include/linux/ncp_no.h"
+  textual header "/usr/include/linux/ncsi.h"
+  textual header "/usr/include/linux/ndctl.h"
+  textual header "/usr/include/linux/neighbour.h"
+  textual header "/usr/include/linux/net.h"
+  textual header "/usr/include/linux/net_dropmon.h"
+  textual header "/usr/include/linux/net_namespace.h"
+  textual header "/usr/include/linux/net_tstamp.h"
+  textual header "/usr/include/linux/netconf.h"
+  textual header "/usr/include/linux/netdevice.h"
+  textual header "/usr/include/linux/netfilter.h"
+  textual header "/usr/include/linux/netfilter/ipset/ip_set.h"
+  textual header "/usr/include/linux/netfilter/ipset/ip_set_bitmap.h"
+  textual header "/usr/include/linux/netfilter/ipset/ip_set_hash.h"
+  textual header "/usr/include/linux/netfilter/ipset/ip_set_list.h"
+  textual header "/usr/include/linux/netfilter/nf_conntrack_common.h"
+  textual header "/usr/include/linux/netfilter/nf_conntrack_ftp.h"
+  textual header "/usr/include/linux/netfilter/nf_conntrack_sctp.h"
+  textual header "/usr/include/linux/netfilter/nf_conntrack_tcp.h"
+  textual header "/usr/include/linux/netfilter/nf_conntrack_tuple_common.h"
+  textual header "/usr/include/linux/netfilter/nf_log.h"
+  textual header "/usr/include/linux/netfilter/nf_nat.h"
+  textual header "/usr/include/linux/netfilter/nf_tables.h"
+  textual header "/usr/include/linux/netfilter/nf_tables_compat.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_acct.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_compat.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_conntrack.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_cthelper.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_cttimeout.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_log.h"
+  textual header "/usr/include/linux/netfilter/nfnetlink_queue.h"
+  textual header "/usr/include/linux/netfilter/x_tables.h"
+  textual header "/usr/include/linux/netfilter/xt_AUDIT.h"
+  textual header "/usr/include/linux/netfilter/xt_CHECKSUM.h"
+  textual header "/usr/include/linux/netfilter/xt_CLASSIFY.h"
+  textual header "/usr/include/linux/netfilter/xt_CONNMARK.h"
+  textual header "/usr/include/linux/netfilter/xt_CONNSECMARK.h"
+  textual header "/usr/include/linux/netfilter/xt_CT.h"
+  textual header "/usr/include/linux/netfilter/xt_DSCP.h"
+  textual header "/usr/include/linux/netfilter/xt_HMARK.h"
+  textual header "/usr/include/linux/netfilter/xt_IDLETIMER.h"
+  textual header "/usr/include/linux/netfilter/xt_LED.h"
+  textual header "/usr/include/linux/netfilter/xt_LOG.h"
+  textual header "/usr/include/linux/netfilter/xt_MARK.h"
+  textual header "/usr/include/linux/netfilter/xt_NFLOG.h"
+  textual header "/usr/include/linux/netfilter/xt_NFQUEUE.h"
+  textual header "/usr/include/linux/netfilter/xt_RATEEST.h"
+  textual header "/usr/include/linux/netfilter/xt_SECMARK.h"
+  textual header "/usr/include/linux/netfilter/xt_SYNPROXY.h"
+  textual header "/usr/include/linux/netfilter/xt_TCPMSS.h"
+  textual header "/usr/include/linux/netfilter/xt_TCPOPTSTRIP.h"
+  textual header "/usr/include/linux/netfilter/xt_TEE.h"
+  textual header "/usr/include/linux/netfilter/xt_TPROXY.h"
+  textual header "/usr/include/linux/netfilter/xt_addrtype.h"
+  textual header "/usr/include/linux/netfilter/xt_bpf.h"
+  textual header "/usr/include/linux/netfilter/xt_cgroup.h"
+  textual header "/usr/include/linux/netfilter/xt_cluster.h"
+  textual header "/usr/include/linux/netfilter/xt_comment.h"
+  textual header "/usr/include/linux/netfilter/xt_connbytes.h"
+  textual header "/usr/include/linux/netfilter/xt_connlabel.h"
+  textual header "/usr/include/linux/netfilter/xt_connlimit.h"
+  textual header "/usr/include/linux/netfilter/xt_connmark.h"
+  textual header "/usr/include/linux/netfilter/xt_conntrack.h"
+  textual header "/usr/include/linux/netfilter/xt_cpu.h"
+  textual header "/usr/include/linux/netfilter/xt_dccp.h"
+  textual header "/usr/include/linux/netfilter/xt_devgroup.h"
+  textual header "/usr/include/linux/netfilter/xt_dscp.h"
+  textual header "/usr/include/linux/netfilter/xt_ecn.h"
+  textual header "/usr/include/linux/netfilter/xt_esp.h"
+  textual header "/usr/include/linux/netfilter/xt_hashlimit.h"
+  textual header "/usr/include/linux/netfilter/xt_helper.h"
+  textual header "/usr/include/linux/netfilter/xt_ipcomp.h"
+  textual header "/usr/include/linux/netfilter/xt_iprange.h"
+  textual header "/usr/include/linux/netfilter/xt_ipvs.h"
+  textual header "/usr/include/linux/netfilter/xt_l2tp.h"
+  textual header "/usr/include/linux/netfilter/xt_length.h"
+  textual header "/usr/include/linux/netfilter/xt_limit.h"
+  textual header "/usr/include/linux/netfilter/xt_mac.h"
+  textual header "/usr/include/linux/netfilter/xt_mark.h"
+  textual header "/usr/include/linux/netfilter/xt_multiport.h"
+  textual header "/usr/include/linux/netfilter/xt_nfacct.h"
+  textual header "/usr/include/linux/netfilter/xt_osf.h"
+  textual header "/usr/include/linux/netfilter/xt_owner.h"
+  textual header "/usr/include/linux/netfilter/xt_physdev.h"
+  textual header "/usr/include/linux/netfilter/xt_pkttype.h"
+  textual header "/usr/include/linux/netfilter/xt_policy.h"
+  textual header "/usr/include/linux/netfilter/xt_quota.h"
+  textual header "/usr/include/linux/netfilter/xt_rateest.h"
+  textual header "/usr/include/linux/netfilter/xt_realm.h"
+  textual header "/usr/include/linux/netfilter/xt_recent.h"
+  textual header "/usr/include/linux/netfilter/xt_rpfilter.h"
+  textual header "/usr/include/linux/netfilter/xt_sctp.h"
+  textual header "/usr/include/linux/netfilter/xt_set.h"
+  textual header "/usr/include/linux/netfilter/xt_socket.h"
+  textual header "/usr/include/linux/netfilter/xt_state.h"
+  textual header "/usr/include/linux/netfilter/xt_statistic.h"
+  textual header "/usr/include/linux/netfilter/xt_string.h"
+  textual header "/usr/include/linux/netfilter/xt_tcpmss.h"
+  textual header "/usr/include/linux/netfilter/xt_tcpudp.h"
+  textual header "/usr/include/linux/netfilter/xt_time.h"
+  textual header "/usr/include/linux/netfilter/xt_u32.h"
+  textual header "/usr/include/linux/netfilter_arp.h"
+  textual header "/usr/include/linux/netfilter_arp/arp_tables.h"
+  textual header "/usr/include/linux/netfilter_arp/arpt_mangle.h"
+  textual header "/usr/include/linux/netfilter_bridge.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_802_3.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_among.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_arp.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_arpreply.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_ip.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_ip6.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_limit.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_log.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_mark_m.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_mark_t.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_nat.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_nflog.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_pkttype.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_redirect.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_stp.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebt_vlan.h"
+  textual header "/usr/include/linux/netfilter_bridge/ebtables.h"
+  textual header "/usr/include/linux/netfilter_decnet.h"
+  textual header "/usr/include/linux/netfilter_ipv4.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ip_tables.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_ECN.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_LOG.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_REJECT.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_TTL.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_ah.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_ecn.h"
+  textual header "/usr/include/linux/netfilter_ipv4/ipt_ttl.h"
+  textual header "/usr/include/linux/netfilter_ipv6.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6_tables.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_HL.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_LOG.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_NPT.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_REJECT.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_ah.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_frag.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_hl.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_ipv6header.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_mh.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_opts.h"
+  textual header "/usr/include/linux/netfilter_ipv6/ip6t_rt.h"
+  textual header "/usr/include/linux/netlink.h"
+  textual header "/usr/include/linux/netlink_diag.h"
+  textual header "/usr/include/linux/netrom.h"
+  textual header "/usr/include/linux/nfc.h"
+  textual header "/usr/include/linux/nfs.h"
+  textual header "/usr/include/linux/nfs2.h"
+  textual header "/usr/include/linux/nfs3.h"
+  textual header "/usr/include/linux/nfs4.h"
+  textual header "/usr/include/linux/nfs4_mount.h"
+  textual header "/usr/include/linux/nfs_fs.h"
+  textual header "/usr/include/linux/nfs_idmap.h"
+  textual header "/usr/include/linux/nfs_mount.h"
+  textual header "/usr/include/linux/nfsacl.h"
+  textual header "/usr/include/linux/nfsd/cld.h"
+  textual header "/usr/include/linux/nfsd/debug.h"
+  textual header "/usr/include/linux/nfsd/export.h"
+  textual header "/usr/include/linux/nfsd/nfsfh.h"
+  textual header "/usr/include/linux/nfsd/stats.h"
+  textual header "/usr/include/linux/nilfs2_api.h"
+  textual header "/usr/include/linux/nilfs2_ondisk.h"
+  textual header "/usr/include/linux/nl80211.h"
+  textual header "/usr/include/linux/nsfs.h"
+  textual header "/usr/include/linux/nubus.h"
+  textual header "/usr/include/linux/nvme_ioctl.h"
+  textual header "/usr/include/linux/nvram.h"
+  textual header "/usr/include/linux/omap3isp.h"
+  textual header "/usr/include/linux/omapfb.h"
+  textual header "/usr/include/linux/oom.h"
+  textual header "/usr/include/linux/openvswitch.h"
+  textual header "/usr/include/linux/packet_diag.h"
+  textual header "/usr/include/linux/param.h"
+  textual header "/usr/include/linux/parport.h"
+  textual header "/usr/include/linux/patchkey.h"
+  textual header "/usr/include/linux/pci.h"
+  textual header "/usr/include/linux/pci_regs.h"
+  textual header "/usr/include/linux/pcitest.h"
+  textual header "/usr/include/linux/perf_event.h"
+  textual header "/usr/include/linux/personality.h"
+  textual header "/usr/include/linux/pfkeyv2.h"
+  textual header "/usr/include/linux/pg.h"
+  textual header "/usr/include/linux/phantom.h"
+  textual header "/usr/include/linux/phonet.h"
+  textual header "/usr/include/linux/pkt_cls.h"
+  textual header "/usr/include/linux/pkt_sched.h"
+  textual header "/usr/include/linux/pktcdvd.h"
+  textual header "/usr/include/linux/pmu.h"
+  textual header "/usr/include/linux/poll.h"
+  textual header "/usr/include/linux/posix_acl.h"
+  textual header "/usr/include/linux/posix_acl_xattr.h"
+  textual header "/usr/include/linux/posix_types.h"
+  textual header "/usr/include/linux/ppdev.h"
+  textual header "/usr/include/linux/ppp-comp.h"
+  textual header "/usr/include/linux/ppp-ioctl.h"
+  textual header "/usr/include/linux/ppp_defs.h"
+  textual header "/usr/include/linux/pps.h"
+  textual header "/usr/include/linux/pr.h"
+  textual header "/usr/include/linux/prctl.h"
+  textual header "/usr/include/linux/psample.h"
+  textual header "/usr/include/linux/psci.h"
+  textual header "/usr/include/linux/ptp_clock.h"
+  textual header "/usr/include/linux/ptrace.h"
+  textual header "/usr/include/linux/qnx4_fs.h"
+  textual header "/usr/include/linux/qnxtypes.h"
+  textual header "/usr/include/linux/qrtr.h"
+  textual header "/usr/include/linux/quota.h"
+  textual header "/usr/include/linux/radeonfb.h"
+  textual header "/usr/include/linux/raid/md_p.h"
+  textual header "/usr/include/linux/raid/md_u.h"
+  textual header "/usr/include/linux/random.h"
+  textual header "/usr/include/linux/raw.h"
+  textual header "/usr/include/linux/rds.h"
+  textual header "/usr/include/linux/reboot.h"
+  textual header "/usr/include/linux/reiserfs_fs.h"
+  textual header "/usr/include/linux/reiserfs_xattr.h"
+  textual header "/usr/include/linux/resource.h"
+  textual header "/usr/include/linux/rfkill.h"
+  textual header "/usr/include/linux/rio_cm_cdev.h"
+  textual header "/usr/include/linux/rio_mport_cdev.h"
+  textual header "/usr/include/linux/romfs_fs.h"
+  textual header "/usr/include/linux/rose.h"
+  textual header "/usr/include/linux/route.h"
+  textual header "/usr/include/linux/rpmsg.h"
+  textual header "/usr/include/linux/rtc.h"
+  textual header "/usr/include/linux/rtnetlink.h"
+  textual header "/usr/include/linux/rxrpc.h"
+  textual header "/usr/include/linux/scc.h"
+  textual header "/usr/include/linux/sched.h"
+  textual header "/usr/include/linux/sched/types.h"
+  textual header "/usr/include/linux/scif_ioctl.h"
+  textual header "/usr/include/linux/screen_info.h"
+  textual header "/usr/include/linux/sctp.h"
+  textual header "/usr/include/linux/sdla.h"
+  textual header "/usr/include/linux/seccomp.h"
+  textual header "/usr/include/linux/securebits.h"
+  textual header "/usr/include/linux/sed-opal.h"
+  textual header "/usr/include/linux/seg6.h"
+  textual header "/usr/include/linux/seg6_genl.h"
+  textual header "/usr/include/linux/seg6_hmac.h"
+  textual header "/usr/include/linux/seg6_iptunnel.h"
+  textual header "/usr/include/linux/seg6_local.h"
+  textual header "/usr/include/linux/selinux_netlink.h"
+  textual header "/usr/include/linux/sem.h"
+  textual header "/usr/include/linux/serial.h"
+  textual header "/usr/include/linux/serial_core.h"
+  textual header "/usr/include/linux/serial_reg.h"
+  textual header "/usr/include/linux/serio.h"
+  textual header "/usr/include/linux/shm.h"
+  textual header "/usr/include/linux/signal.h"
+  textual header "/usr/include/linux/signalfd.h"
+  textual header "/usr/include/linux/smc.h"
+  textual header "/usr/include/linux/smc_diag.h"
+  textual header "/usr/include/linux/smiapp.h"
+  textual header "/usr/include/linux/snmp.h"
+  textual header "/usr/include/linux/sock_diag.h"
+  textual header "/usr/include/linux/socket.h"
+  textual header "/usr/include/linux/sockios.h"
+  textual header "/usr/include/linux/sonet.h"
+  textual header "/usr/include/linux/sonypi.h"
+  textual header "/usr/include/linux/sound.h"
+  textual header "/usr/include/linux/soundcard.h"
+  textual header "/usr/include/linux/spi/spidev.h"
+  textual header "/usr/include/linux/stat.h"
+  textual header "/usr/include/linux/stddef.h"
+  textual header "/usr/include/linux/stm.h"
+  textual header "/usr/include/linux/string.h"
+  textual header "/usr/include/linux/sunrpc/debug.h"
+  textual header "/usr/include/linux/suspend_ioctls.h"
+  textual header "/usr/include/linux/swab.h"
+  textual header "/usr/include/linux/switchtec_ioctl.h"
+  textual header "/usr/include/linux/sync_file.h"
+  textual header "/usr/include/linux/synclink.h"
+  textual header "/usr/include/linux/sysctl.h"
+  textual header "/usr/include/linux/sysinfo.h"
+  textual header "/usr/include/linux/target_core_user.h"
+  textual header "/usr/include/linux/taskstats.h"
+  textual header "/usr/include/linux/tc_act/tc_bpf.h"
+  textual header "/usr/include/linux/tc_act/tc_connmark.h"
+  textual header "/usr/include/linux/tc_act/tc_csum.h"
+  textual header "/usr/include/linux/tc_act/tc_defact.h"
+  textual header "/usr/include/linux/tc_act/tc_gact.h"
+  textual header "/usr/include/linux/tc_act/tc_ife.h"
+  textual header "/usr/include/linux/tc_act/tc_ipt.h"
+  textual header "/usr/include/linux/tc_act/tc_mirred.h"
+  textual header "/usr/include/linux/tc_act/tc_nat.h"
+  textual header "/usr/include/linux/tc_act/tc_pedit.h"
+  textual header "/usr/include/linux/tc_act/tc_sample.h"
+  textual header "/usr/include/linux/tc_act/tc_skbedit.h"
+  textual header "/usr/include/linux/tc_act/tc_skbmod.h"
+  textual header "/usr/include/linux/tc_act/tc_tunnel_key.h"
+  textual header "/usr/include/linux/tc_act/tc_vlan.h"
+  textual header "/usr/include/linux/tc_ematch/tc_em_cmp.h"
+  textual header "/usr/include/linux/tc_ematch/tc_em_meta.h"
+  textual header "/usr/include/linux/tc_ematch/tc_em_nbyte.h"
+  textual header "/usr/include/linux/tc_ematch/tc_em_text.h"
+  textual header "/usr/include/linux/tcp.h"
+  textual header "/usr/include/linux/tcp_metrics.h"
+  textual header "/usr/include/linux/tee.h"
+  textual header "/usr/include/linux/telephony.h"
+  textual header "/usr/include/linux/termios.h"
+  textual header "/usr/include/linux/thermal.h"
+  textual header "/usr/include/linux/time.h"
+  textual header "/usr/include/linux/timerfd.h"
+  textual header "/usr/include/linux/times.h"
+  textual header "/usr/include/linux/timex.h"
+  textual header "/usr/include/linux/tiocl.h"
+  textual header "/usr/include/linux/tipc.h"
+  textual header "/usr/include/linux/tipc_config.h"
+  textual header "/usr/include/linux/tipc_netlink.h"
+  textual header "/usr/include/linux/tls.h"
+  textual header "/usr/include/linux/toshiba.h"
+  textual header "/usr/include/linux/tty.h"
+  textual header "/usr/include/linux/tty_flags.h"
+  textual header "/usr/include/linux/types.h"
+  textual header "/usr/include/linux/udf_fs_i.h"
+  textual header "/usr/include/linux/udp.h"
+  textual header "/usr/include/linux/uhid.h"
+  textual header "/usr/include/linux/uinput.h"
+  textual header "/usr/include/linux/uio.h"
+  textual header "/usr/include/linux/uleds.h"
+  textual header "/usr/include/linux/ultrasound.h"
+  textual header "/usr/include/linux/un.h"
+  textual header "/usr/include/linux/unistd.h"
+  textual header "/usr/include/linux/unix_diag.h"
+  textual header "/usr/include/linux/usb/audio.h"
+  textual header "/usr/include/linux/usb/cdc-wdm.h"
+  textual header "/usr/include/linux/usb/cdc.h"
+  textual header "/usr/include/linux/usb/ch11.h"
+  textual header "/usr/include/linux/usb/ch9.h"
+  textual header "/usr/include/linux/usb/charger.h"
+  textual header "/usr/include/linux/usb/functionfs.h"
+  textual header "/usr/include/linux/usb/g_printer.h"
+  textual header "/usr/include/linux/usb/gadgetfs.h"
+  textual header "/usr/include/linux/usb/midi.h"
+  textual header "/usr/include/linux/usb/tmc.h"
+  textual header "/usr/include/linux/usb/video.h"
+  textual header "/usr/include/linux/usbdevice_fs.h"
+  textual header "/usr/include/linux/usbip.h"
+  textual header "/usr/include/linux/userfaultfd.h"
+  textual header "/usr/include/linux/userio.h"
+  textual header "/usr/include/linux/utime.h"
+  textual header "/usr/include/linux/utsname.h"
+  textual header "/usr/include/linux/uuid.h"
+  textual header "/usr/include/linux/uvcvideo.h"
+  textual header "/usr/include/linux/v4l2-common.h"
+  textual header "/usr/include/linux/v4l2-controls.h"
+  textual header "/usr/include/linux/v4l2-dv-timings.h"
+  textual header "/usr/include/linux/v4l2-mediabus.h"
+  textual header "/usr/include/linux/v4l2-subdev.h"
+  textual header "/usr/include/linux/version.h"
+  textual header "/usr/include/linux/veth.h"
+  textual header "/usr/include/linux/vfio.h"
+  textual header "/usr/include/linux/vfio_ccw.h"
+  textual header "/usr/include/linux/vhost.h"
+  textual header "/usr/include/linux/videodev2.h"
+  textual header "/usr/include/linux/virtio_9p.h"
+  textual header "/usr/include/linux/virtio_balloon.h"
+  textual header "/usr/include/linux/virtio_blk.h"
+  textual header "/usr/include/linux/virtio_config.h"
+  textual header "/usr/include/linux/virtio_console.h"
+  textual header "/usr/include/linux/virtio_crypto.h"
+  textual header "/usr/include/linux/virtio_gpu.h"
+  textual header "/usr/include/linux/virtio_ids.h"
+  textual header "/usr/include/linux/virtio_input.h"
+  textual header "/usr/include/linux/virtio_mmio.h"
+  textual header "/usr/include/linux/virtio_net.h"
+  textual header "/usr/include/linux/virtio_pci.h"
+  textual header "/usr/include/linux/virtio_ring.h"
+  textual header "/usr/include/linux/virtio_rng.h"
+  textual header "/usr/include/linux/virtio_scsi.h"
+  textual header "/usr/include/linux/virtio_types.h"
+  textual header "/usr/include/linux/virtio_vsock.h"
+  textual header "/usr/include/linux/vm_sockets.h"
+  textual header "/usr/include/linux/vm_sockets_diag.h"
+  textual header "/usr/include/linux/vsockmon.h"
+  textual header "/usr/include/linux/vt.h"
+  textual header "/usr/include/linux/vtpm_proxy.h"
+  textual header "/usr/include/linux/wait.h"
+  textual header "/usr/include/linux/wanrouter.h"
+  textual header "/usr/include/linux/watchdog.h"
+  textual header "/usr/include/linux/wimax.h"
+  textual header "/usr/include/linux/wimax/i2400m.h"
+  textual header "/usr/include/linux/wireless.h"
+  textual header "/usr/include/linux/wmi.h"
+  textual header "/usr/include/linux/x25.h"
+  textual header "/usr/include/linux/xattr.h"
+  textual header "/usr/include/linux/xfrm.h"
+  textual header "/usr/include/linux/xilinx-v4l2-controls.h"
+  textual header "/usr/include/linux/zorro.h"
+  textual header "/usr/include/linux/zorro_ids.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/APFloat.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/APInt.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/APSInt.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/AllocatorList.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ArrayRef.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/BitVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/BitmaskEnum.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/BreadthFirstIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/CachedHashString.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DAGDeltaAlgorithm.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DeltaAlgorithm.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DenseMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DenseMapInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DenseSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/DepthFirstIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/EpochTracker.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/EquivalenceClasses.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/FoldingSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/GraphTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Hashing.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ImmutableList.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ImmutableMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ImmutableSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/IndexedMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/IntEqClasses.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/IntervalMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/IntrusiveRefCntPtr.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/MapVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/None.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Optional.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PackedVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PointerEmbeddedInt.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PointerIntPair.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PointerSumType.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PointerUnion.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PostOrderIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PriorityQueue.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/PriorityWorklist.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SCCIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/STLExtras.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ScopeExit.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ScopedHashTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Sequence.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SetOperations.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SetVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SmallBitVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SmallPtrSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SmallSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SmallString.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SmallVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SparseBitVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SparseMultiSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/SparseSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Statistic.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/StringExtras.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/StringMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/StringRef.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/StringSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/StringSwitch.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/TinyPtrVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Triple.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/Twine.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/UniqueVector.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/VariadicFunction.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/edit_distance.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist_base.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist_iterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist_node.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist_node_base.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/ilist_node_options.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/iterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/iterator_range.h"
+  textual header "/usr/include/llvm-6.0/llvm/ADT/simple_ilist.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/AliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/AliasAnalysisEvaluator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/AliasSetTracker.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/AssumptionCache.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/BasicAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/BlockFrequencyInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/BlockFrequencyInfoImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/BranchProbabilityInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CFG.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CFGPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CFLAliasAnalysisUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CFLAndersAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CFLSteensAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CGSCCPassManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CallGraph.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CallGraphSCCPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CallPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CaptureTracking.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CmpInstAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/CodeMetrics.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ConstantFolding.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DOTGraphTraitsPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DemandedBits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DependenceAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DivergenceAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DomPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DominanceFrontier.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/DominanceFrontierImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/EHPersonalities.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/GlobalsModRef.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IVUsers.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IndirectCallPromotionAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IndirectCallSiteVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/InlineCost.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/InstructionSimplify.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/Interval.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IntervalIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IntervalPartition.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/IteratedDominanceFrontier.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LazyBlockFrequencyInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LazyBranchProbabilityInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LazyCallGraph.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LazyValueInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/Lint.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/Loads.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopAccessAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopAnalysisManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopInfoImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/LoopUnrollAnalyzer.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/MemoryBuiltins.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/MemoryDependenceAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/MemoryLocation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/MemorySSA.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/MemorySSAUpdater.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ModuleSummaryAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ObjCARCAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ObjCARCAnalysisUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ObjCARCInstKind.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ObjectUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/OptimizationRemarkEmitter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/OrderedBasicBlock.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/PHITransAddr.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/Passes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/PostDominators.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ProfileSummaryInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/PtrUseVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/RegionInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/RegionInfoImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/RegionIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/RegionPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/RegionPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScalarEvolution.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScalarEvolutionExpander.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScalarEvolutionExpressions.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScalarEvolutionNormalization.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ScopedNoAliasAA.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/SparsePropagation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TargetFolder.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TargetLibraryInfo.def"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TargetLibraryInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TargetTransformInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TargetTransformInfoImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/Trace.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TypeBasedAliasAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/TypeMetadataUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ValueLattice.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ValueLatticeUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/ValueTracking.h"
+  textual header "/usr/include/llvm-6.0/llvm/Analysis/VectorUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/AsmParser/Parser.h"
+  textual header "/usr/include/llvm-6.0/llvm/AsmParser/SlotMapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/COFF.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/Dwarf.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/Dwarf.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELF.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/AArch64.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/AMDGPU.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/ARC.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/ARM.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/AVR.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/BPF.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/Hexagon.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/Lanai.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/Mips.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/RISCV.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/Sparc.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/SystemZ.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/WebAssembly.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/i386.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/ELFRelocs/x86_64.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/MachO.def"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/MachO.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/Magic.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/Wasm.h"
+  textual header "/usr/include/llvm-6.0/llvm/BinaryFormat/WasmRelocs.def"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitCodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitcodeReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitcodeWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitcodeWriterPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitstreamReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/BitstreamWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Bitcode/LLVMBitCodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/Analysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/AsmPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/AtomicExpandUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/BasicTTIImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/CalcSpillWeights.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/CallingConvLower.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/CommandFlags.def"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/CostTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/DAGCombine.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/DFAPacketizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/DIE.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/DIEValue.def"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/DwarfStringPoolEntry.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/EdgeBundles.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ExecutionDepsFix.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ExpandReductions.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/FastISel.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/FaultMaps.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/FunctionLoweringInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GCMetadata.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GCMetadataPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GCStrategy.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GCs.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/CallLowering.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/GISelWorkList.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/IRTranslator.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/InstructionSelect.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/InstructionSelector.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/Legalizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/Localizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/RegBankSelect.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/RegisterBank.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/Types.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/GlobalISel/Utils.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ISDOpcodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/IntrinsicLowering.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LatencyPriorityQueue.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LexicalScopes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LinkAllAsmWriterComponents.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LinkAllCodegenComponents.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveInterval.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveIntervalUnion.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveIntervals.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LivePhysRegs.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveRangeEdit.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveRegMatrix.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveRegUnits.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveStacks.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LiveVariables.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/LowLevelType.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MIRParser/MIRParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MIRPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MIRYamlMapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachORelocation.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineBasicBlock.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineBlockFrequencyInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineBranchProbabilityInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineCombinerPattern.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineConstantPool.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineDominanceFrontier.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineDominators.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineFrameInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineFunction.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineFunctionPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineInstr.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineInstrBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineInstrBundle.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineInstrBundleIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineJumpTableInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineLoopInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineMemOperand.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineModuleInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineModuleInfoImpls.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineOperand.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachinePassRegistry.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachinePostDominators.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineRegionInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineRegisterInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineSSAUpdater.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineScheduler.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineTraceMetrics.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MachineValueType.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/MacroFusion.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQP/CostAllocator.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQP/Graph.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQP/Math.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQP/ReductionRules.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQP/Solution.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PBQPRAConstraint.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ParallelCG.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/Passes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PreISelIntrinsicLowering.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/PseudoSourceValue.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegAllocPBQP.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegAllocRegistry.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegisterClassInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegisterPressure.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegisterScavenging.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RegisterUsageInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ResourcePriorityQueue.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RuntimeLibcalls.def"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/RuntimeLibcalls.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SDNodeProperties.td"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScheduleDAG.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScheduleDAGInstrs.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScheduleDAGMutation.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScheduleDFS.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScheduleHazardRecognizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SchedulerRegistry.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ScoreboardHazardRecognizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SelectionDAG.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SelectionDAGISel.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SelectionDAGNodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SelectionDAGTargetInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/SlotIndexes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/StackMaps.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/StackProtector.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TailDuplicator.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetCallingConv.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetFrameLowering.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetInstrInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetLowering.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetLoweringObjectFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetOpcodes.def"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetOpcodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetPassConfig.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetRegisterInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetSchedule.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/TargetSubtargetInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/UnreachableBlockElim.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ValueTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/ValueTypes.td"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/VirtRegMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/CodeGen/WinEHFuncInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Config/AsmParsers.def"
+  textual header "/usr/include/llvm-6.0/llvm/Config/AsmPrinters.def"
+  textual header "/usr/include/llvm-6.0/llvm/Config/Disassemblers.def"
+  textual header "/usr/include/llvm-6.0/llvm/Config/Targets.def"
+  textual header "/usr/include/llvm-6.0/llvm/Config/abi-breaking.h"
+  textual header "/usr/include/llvm-6.0/llvm/Config/llvm-config.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CVDebugRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CVRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeView.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeViewError.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeViewSymbols.def"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/CodeViewTypes.def"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/EnumTables.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/Formatters.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/FunctionId.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/GUID.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/Line.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/RecordName.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/RecordSerialization.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolDumper.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolSerializer.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeCollection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeDeserializer.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeHashing.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeIndex.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeStreamMerger.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeTableCollection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DIContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFAttribute.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFDie.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFExpression.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFFormValue.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFObject.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFSection.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFUnit.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/DWARF/DWARFVerifier.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/MSF/IMSFFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/MSF/MSFBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/MSF/MSFCommon.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/MSF/MSFError.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/MSF/MappedBlockStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIADataStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIAError.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIASession.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIASupport.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/DIA/DIATable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/GenericError.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBDataStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBLineNumber.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBSession.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBSourceFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/IPDBTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/DbiModuleList.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/DbiStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/EnumTables.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/Formatters.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/Hash.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/HashTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/InfoStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/NativeSession.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/PDBFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/PublicsStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/RawConstants.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/RawError.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/RawTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/SymbolStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/TpiHashing.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/TpiStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDB.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBExtras.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymDumper.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolBlock.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolCustom.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolData.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolExe.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolLabel.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolThunk.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/PDBTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/PDB/UDTLayout.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/Symbolize/DIPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+  textual header "/usr/include/llvm-6.0/llvm/DebugInfo/Symbolize/Symbolize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Demangle/Demangle.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/ExecutionEngine.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/GenericValue.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Interpreter.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/JITEventListener.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/JITSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/MCJIT.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/OProfileWrapper.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/ObjectCache.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/CompileUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/LambdaResolver.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/NullResolver.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/OrcABISupport.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/OrcError.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/RPCSerialization.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/RPCUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/RawByteChannel.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/OrcMCJITReplacement.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/RTDyldMemoryManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/RuntimeDyld.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/RuntimeDyldChecker.h"
+  textual header "/usr/include/llvm-6.0/llvm/ExecutionEngine/SectionMemoryManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/FuzzerCLI.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/IRMutator.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/OpDescriptor.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/Operations.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/Random.h"
+  textual header "/usr/include/llvm-6.0/llvm/FuzzMutate/RandomIRBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Argument.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/AssemblyAnnotationWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Attributes.gen"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Attributes.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Attributes.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/AutoUpgrade.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/BasicBlock.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/CFG.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/CallSite.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/CallingConv.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Comdat.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Constant.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ConstantFolder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ConstantRange.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Constants.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DIBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DataLayout.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DebugInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DebugInfoFlags.def"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DebugInfoMetadata.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DebugLoc.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DerivedTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DerivedUser.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DiagnosticHandler.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DiagnosticInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/DiagnosticPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Dominators.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Function.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GVMaterializer.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GetElementPtrTypeIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalAlias.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalIFunc.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalIndirectSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalObject.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalValue.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/GlobalVariable.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IRBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IRPrintingPasses.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/InlineAsm.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/InstIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/InstVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/InstrTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Instruction.def"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Instruction.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Instructions.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicInst.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Intrinsics.gen"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Intrinsics.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Intrinsics.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsAArch64.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsAMDGPU.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsARM.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsBPF.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsHexagon.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsMips.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsNVVM.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsPowerPC.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsSystemZ.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsWebAssembly.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsX86.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/IntrinsicsXCore.td"
+  textual header "/usr/include/llvm-6.0/llvm/IR/LLVMContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/LegacyPassManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/LegacyPassManagers.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/LegacyPassNameParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/MDBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Mangler.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Metadata.def"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Metadata.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Module.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ModuleSlotTracker.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ModuleSummaryIndex.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ModuleSummaryIndexYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/NoFolder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/OperandTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Operator.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/OptBisect.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/PassManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/PassManagerInternal.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/PatternMatch.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/PredIteratorCache.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ProfileSummary.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/SafepointIRVerifier.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Statepoint.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/SymbolTableListTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/TrackingMDRef.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Type.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/TypeBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/TypeFinder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Use.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/UseListOrder.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/User.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Value.def"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Value.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ValueHandle.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ValueMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/ValueSymbolTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/IR/Verifier.h"
+  textual header "/usr/include/llvm-6.0/llvm/IRReader/IRReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/InitializePasses.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/Caching.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/Config.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/LTO.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/LTOBackend.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/legacy/LTOCodeGenerator.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/legacy/LTOModule.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/legacy/ThinLTOCodeGenerator.h"
+  textual header "/usr/include/llvm-6.0/llvm/LTO/legacy/UpdateCompilerUsed.h"
+  textual header "/usr/include/llvm-6.0/llvm/LineEditor/LineEditor.h"
+  textual header "/usr/include/llvm-6.0/llvm/LinkAllIR.h"
+  textual header "/usr/include/llvm-6.0/llvm/LinkAllPasses.h"
+  textual header "/usr/include/llvm-6.0/llvm/Linker/IRMover.h"
+  textual header "/usr/include/llvm-6.0/llvm/Linker/Linker.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/ConstantPools.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/LaneBitmask.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmBackend.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmInfoCOFF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmInfoDarwin.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmInfoELF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmInfoWasm.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmLayout.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAsmMacro.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCAssembler.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCCodeEmitter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCCodePadder.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCCodeView.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDirectives.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDisassembler/MCDisassembler.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDisassembler/MCRelocationInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDisassembler/MCSymbolizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCDwarf.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCELFObjectWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCELFStreamer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCExpr.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCFixedLenDisassembler.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCFixup.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCFixupKindInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCFragment.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInst.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstrAnalysis.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstrDesc.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstrInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCInstrItineraries.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCLabel.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCLinkerOptimizationHint.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCMachObjectWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCObjectFileInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCObjectStreamer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCObjectWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/AsmCond.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/AsmLexer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCAsmLexer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCAsmParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCAsmParserExtension.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCAsmParserUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCParsedAsmOperand.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCParser/MCTargetAsmParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCRegisterInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSchedule.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSection.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSectionCOFF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSectionELF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSectionMachO.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSectionWasm.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCStreamer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSubtargetInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSymbol.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSymbolCOFF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSymbolELF.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSymbolMachO.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCSymbolWasm.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCTargetOptions.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCTargetOptionsCommandFlags.def"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCValue.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWasmObjectWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWasmStreamer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWin64EH.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWinCOFFObjectWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWinCOFFStreamer.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MCWinEH.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/MachineLocation.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/SectionKind.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/StringTableBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/MC/SubtargetFeature.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/Archive.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ArchiveWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/Binary.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/COFF.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/COFFImportFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/COFFModuleDefinition.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/Decompressor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ELF.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ELFObjectFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ELFTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/Error.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/IRObjectFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/IRSymtab.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/MachO.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/MachOUniversal.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ModuleSymbolTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/ObjectFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/RelocVisitor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/StackMapParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/SymbolSize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/SymbolicFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/Wasm.h"
+  textual header "/usr/include/llvm-6.0/llvm/Object/WindowsResource.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/COFFYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/CodeViewYAMLSymbols.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/CodeViewYAMLTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/DWARFEmitter.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/DWARFYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/ELFYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/MachOYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/ObjectYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/WasmYAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/ObjectYAML/YAML.h"
+  textual header "/usr/include/llvm-6.0/llvm/Option/Arg.h"
+  textual header "/usr/include/llvm-6.0/llvm/Option/ArgList.h"
+  textual header "/usr/include/llvm-6.0/llvm/Option/OptParser.td"
+  textual header "/usr/include/llvm-6.0/llvm/Option/OptSpecifier.h"
+  textual header "/usr/include/llvm-6.0/llvm/Option/OptTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/Option/Option.h"
+  textual header "/usr/include/llvm-6.0/llvm/Pass.h"
+  textual header "/usr/include/llvm-6.0/llvm/PassAnalysisSupport.h"
+  textual header "/usr/include/llvm-6.0/llvm/PassInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/PassRegistry.h"
+  textual header "/usr/include/llvm-6.0/llvm/PassSupport.h"
+  textual header "/usr/include/llvm-6.0/llvm/Passes/PassBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/Coverage/CoverageMapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/Coverage/CoverageMappingReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/Coverage/CoverageMappingWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/GCOV.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/InstrProf.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/InstrProfData.inc"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/InstrProfReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/InstrProfWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/ProfileCommon.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/SampleProf.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/SampleProfReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/ProfileData/SampleProfWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/AArch64TargetParser.def"
+  textual header "/usr/include/llvm-6.0/llvm/Support/AMDGPUKernelDescriptor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/AMDGPUMetadata.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ARMAttributeParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ARMBuildAttributes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ARMEHABI.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ARMTargetParser.def"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ARMWinEH.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/AlignOf.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Allocator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ArrayRecycler.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Atomic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/AtomicOrdering.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryByteStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryItemStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStreamArray.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStreamError.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStreamReader.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStreamRef.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BinaryStreamWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BlockFrequency.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/BranchProbability.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CBindingWrapping.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/COM.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CachePruning.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Capacity.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Casting.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Chrono.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CodeGen.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CodeGenCWrappers.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CodeGenCoverage.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CommandLine.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Compiler.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Compression.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ConvertUTF.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/CrashRecoveryContext.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/DOTGraphTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/DataExtractor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/DataTypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Debug.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/DebugCounter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/DynamicLibrary.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Endian.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/EndianStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Errc.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Errno.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Error.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ErrorHandling.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ErrorOr.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FileOutputBuffer.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FileSystem.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FileUtilities.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Format.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormatAdapters.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormatCommon.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormatProviders.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormatVariadic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormatVariadicDetails.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/FormattedStream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/GenericDomTree.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/GenericDomTreeConstruction.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/GlobPattern.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/GraphWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Host.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/JamCRC.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/KnownBits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/LEB128.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/LineIterator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Locale.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/LockFileManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/LowLevelTypeImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/MD5.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ManagedStatic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/MathExtras.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Memory.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/MemoryBuffer.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/MipsABIFlags.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Mutex.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/MutexGuard.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/NativeFormatting.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/OnDiskHashTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Options.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Parallel.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Path.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/PluginLoader.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/PointerLikeTypeTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/PrettyStackTrace.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Printable.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Process.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Program.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/RWMutex.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/RandomNumberGenerator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Recycler.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/RecyclingAllocator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Regex.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Registry.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ReverseIteration.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SHA1.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SMLoc.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SaveAndRestore.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ScaledNumber.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ScopedPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Signals.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Solaris/sys/regset.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SourceMgr.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SpecialCaseList.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/StringPool.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/StringSaver.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SwapByteOrder.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/SystemUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TarWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TargetParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TargetRegistry.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TargetSelect.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ThreadLocal.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ThreadPool.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Threading.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Timer.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/ToolOutputFile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TrailingObjects.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TrigramIndex.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/TypeName.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Unicode.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/UnicodeCharRanges.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/UniqueLock.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/VCSRevision.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Valgrind.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Watchdog.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/Win64EH.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/WindowsError.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/X86TargetParser.def"
+  textual header "/usr/include/llvm-6.0/llvm/Support/YAMLParser.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/YAMLTraits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/circular_raw_ostream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/raw_os_ostream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/raw_ostream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/raw_sha1_ostream.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/thread.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/type_traits.h"
+  textual header "/usr/include/llvm-6.0/llvm/Support/xxhash.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/Error.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/Main.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/Record.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/SearchableTable.td"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/SetTheory.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/StringMatcher.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/StringToOffsetTable.h"
+  textual header "/usr/include/llvm-6.0/llvm/TableGen/TableGenBackend.h"
+  textual header "/usr/include/llvm-6.0/llvm/Target/GenericOpcodes.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/GlobalISel/RegisterBank.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/GlobalISel/SelectionDAGCompat.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/GlobalISel/Target.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/Target.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetCallingConv.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetIntrinsicInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetItinerary.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetMachine.h"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetOptions.h"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetSchedule.td"
+  textual header "/usr/include/llvm-6.0/llvm/Target/TargetSelectionDAG.td"
+  textual header "/usr/include/llvm-6.0/llvm/Testing/Support/Error.h"
+  textual header "/usr/include/llvm-6.0/llvm/Testing/Support/SupportHelpers.h"
+  textual header "/usr/include/llvm-6.0/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
+  textual header "/usr/include/llvm-6.0/llvm/ToolDrivers/llvm-lib/LibDriver.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Coroutines.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/GCOVProfiler.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/AlwaysInliner.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/ArgumentPromotion.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/CalledValuePropagation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/ConstantMerge.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/CrossDSOCFI.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/DeadArgumentElimination.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/ElimAvailExtern.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/ForceFunctionAttrs.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/FunctionAttrs.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/FunctionImport.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/GlobalDCE.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/GlobalOpt.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/GlobalSplit.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/InferFunctionAttrs.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/Inliner.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/Internalize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/LowerTypeTests.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/PartialInlining.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/PassManagerBuilder.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/SCCP.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/StripDeadPrototypes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/IPO/WholeProgramDevirt.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/InstCombine/InstCombine.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/InstCombine/InstCombineWorklist.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/InstrProfiling.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Instrumentation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Instrumentation/BoundsChecking.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/ObjCARC.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/PGOInstrumentation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/SampleProfile.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/ADCE.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/BDCE.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/CallSiteSplitting.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/ConstantHoisting.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/DCE.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/DeadStoreElimination.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/DivRemPairs.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/EarlyCSE.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/Float2Int.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/GVN.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/GVNExpression.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/GuardWidening.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/IVUsersPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/IndVarSimplify.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/JumpThreading.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LICM.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopDataPrefetch.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopDeletion.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopDistribute.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopInstSimplify.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopLoadElimination.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopPassManager.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopPredication.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopRotation.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopSink.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopStrengthReduce.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LoopUnrollPass.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LowerAtomic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/MemCpyOptimizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/NaryReassociate.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/NewGVN.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/Reassociate.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SCCP.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SROA.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SimplifyCFG.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/Sink.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/SpeculativeExecution.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Scalar/TailRecursionElimination.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/ASanStackFrameLayout.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/AddDiscriminators.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/BasicBlockUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/BreakCriticalEdges.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/BuildLibCalls.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/BypassSlowDivision.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/CallPromotionUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/Cloning.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/CodeExtractor.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/CtorUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/EntryExitInstrumenter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/EscapeEnumerator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/Evaluator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/FunctionComparator.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/FunctionImportUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/GlobalStatus.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/IntegerDivision.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LCSSA.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/Local.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LoopSimplify.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LoopUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LoopVersioning.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LowerInvoke.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/LowerMemIntrinsics.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/Mem2Reg.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/ModuleUtils.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/NameAnonGlobals.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/OrderedInstructions.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/PredicateInfo.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/PromoteMemToReg.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SSAUpdater.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SSAUpdaterImpl.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SanitizerStats.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SimplifyIndVar.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SimplifyInstructions.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SimplifyLibCalls.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SplitModule.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/SymbolRewriter.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/UnrollLoop.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/VNCoercion.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Utils/ValueMapper.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Vectorize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Vectorize/LoopVectorize.h"
+  textual header "/usr/include/llvm-6.0/llvm/Transforms/Vectorize/SLPVectorizer.h"
+  textual header "/usr/include/llvm-6.0/llvm/WindowsManifest/WindowsManifestMerger.h"
+  textual header "/usr/include/llvm-6.0/llvm/WindowsResource/ResourceProcessor.h"
+  textual header "/usr/include/llvm-6.0/llvm/WindowsResource/ResourceScriptToken.h"
+  textual header "/usr/include/llvm-6.0/llvm/WindowsResource/ResourceScriptTokenList.h"
+  textual header "/usr/include/llvm-6.0/llvm/XRay/Graph.h"
+  textual header "/usr/include/llvm-6.0/llvm/XRay/InstrumentationMap.h"
+  textual header "/usr/include/llvm-6.0/llvm/XRay/Trace.h"
+  textual header "/usr/include/llvm-6.0/llvm/XRay/XRayRecord.h"
+  textual header "/usr/include/llvm-6.0/llvm/XRay/YAMLXRayRecord.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Analysis.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/BitReader.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/BitWriter.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Core.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/DebugInfo.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Disassembler.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/ErrorHandling.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/ExecutionEngine.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/IRReader.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Initialization.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/LinkTimeOptimizer.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Linker.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Object.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/OrcBindings.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Support.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Target.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/TargetMachine.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Transforms/IPO.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Transforms/PassManagerBuilder.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Transforms/Scalar.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Transforms/Vectorize.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/Types.h"
+  textual header "/usr/include/llvm-c-6.0/llvm-c/lto.h"
+  textual header "/usr/include/locale.h"
+  textual header "/usr/include/malloc.h"
+  textual header "/usr/include/math.h"
+  textual header "/usr/include/mcheck.h"
+  textual header "/usr/include/memory.h"
+  textual header "/usr/include/misc/cxl.h"
+  textual header "/usr/include/misc/ocxl.h"
+  textual header "/usr/include/mntent.h"
+  textual header "/usr/include/monetary.h"
+  textual header "/usr/include/mqueue.h"
+  textual header "/usr/include/mtd/inftl-user.h"
+  textual header "/usr/include/mtd/mtd-abi.h"
+  textual header "/usr/include/mtd/mtd-user.h"
+  textual header "/usr/include/mtd/nftl-user.h"
+  textual header "/usr/include/mtd/ubi-user.h"
+  textual header "/usr/include/net/ethernet.h"
+  textual header "/usr/include/net/if.h"
+  textual header "/usr/include/net/if_arp.h"
+  textual header "/usr/include/net/if_packet.h"
+  textual header "/usr/include/net/if_ppp.h"
+  textual header "/usr/include/net/if_shaper.h"
+  textual header "/usr/include/net/if_slip.h"
+  textual header "/usr/include/net/ppp-comp.h"
+  textual header "/usr/include/net/ppp_defs.h"
+  textual header "/usr/include/net/route.h"
+  textual header "/usr/include/netash/ash.h"
+  textual header "/usr/include/netatalk/at.h"
+  textual header "/usr/include/netax25/ax25.h"
+  textual header "/usr/include/netdb.h"
+  textual header "/usr/include/neteconet/ec.h"
+  textual header "/usr/include/netinet/ether.h"
+  textual header "/usr/include/netinet/icmp6.h"
+  textual header "/usr/include/netinet/if_ether.h"
+  textual header "/usr/include/netinet/if_fddi.h"
+  textual header "/usr/include/netinet/if_tr.h"
+  textual header "/usr/include/netinet/igmp.h"
+  textual header "/usr/include/netinet/in.h"
+  textual header "/usr/include/netinet/in_systm.h"
+  textual header "/usr/include/netinet/ip.h"
+  textual header "/usr/include/netinet/ip6.h"
+  textual header "/usr/include/netinet/ip_icmp.h"
+  textual header "/usr/include/netinet/tcp.h"
+  textual header "/usr/include/netinet/udp.h"
+  textual header "/usr/include/netipx/ipx.h"
+  textual header "/usr/include/netiucv/iucv.h"
+  textual header "/usr/include/netpacket/packet.h"
+  textual header "/usr/include/netrom/netrom.h"
+  textual header "/usr/include/netrose/rose.h"
+  textual header "/usr/include/nfs/nfs.h"
+  textual header "/usr/include/nl_types.h"
+  textual header "/usr/include/nss.h"
+  textual header "/usr/include/obstack.h"
+  textual header "/usr/include/omp.h"
+  textual header "/usr/include/paths.h"
+  textual header "/usr/include/poll.h"
+  textual header "/usr/include/printf.h"
+  textual header "/usr/include/proc_service.h"
+  textual header "/usr/include/protocols/routed.h"
+  textual header "/usr/include/protocols/rwhod.h"
+  textual header "/usr/include/protocols/talkd.h"
+  textual header "/usr/include/protocols/timed.h"
+  textual header "/usr/include/pthread.h"
+  textual header "/usr/include/pty.h"
+  textual header "/usr/include/pwd.h"
+  textual header "/usr/include/rdma/bnxt_re-abi.h"
+  textual header "/usr/include/rdma/cxgb3-abi.h"
+  textual header "/usr/include/rdma/cxgb4-abi.h"
+  textual header "/usr/include/rdma/hfi/hfi1_ioctl.h"
+  textual header "/usr/include/rdma/hfi/hfi1_user.h"
+  textual header "/usr/include/rdma/hns-abi.h"
+  textual header "/usr/include/rdma/ib_user_cm.h"
+  textual header "/usr/include/rdma/ib_user_ioctl_verbs.h"
+  textual header "/usr/include/rdma/ib_user_mad.h"
+  textual header "/usr/include/rdma/ib_user_sa.h"
+  textual header "/usr/include/rdma/ib_user_verbs.h"
+  textual header "/usr/include/rdma/mlx4-abi.h"
+  textual header "/usr/include/rdma/mlx5-abi.h"
+  textual header "/usr/include/rdma/mthca-abi.h"
+  textual header "/usr/include/rdma/nes-abi.h"
+  textual header "/usr/include/rdma/ocrdma-abi.h"
+  textual header "/usr/include/rdma/qedr-abi.h"
+  textual header "/usr/include/rdma/rdma_netlink.h"
+  textual header "/usr/include/rdma/rdma_user_cm.h"
+  textual header "/usr/include/rdma/rdma_user_ioctl.h"
+  textual header "/usr/include/rdma/rdma_user_rxe.h"
+  textual header "/usr/include/rdma/vmw_pvrdma-abi.h"
+  textual header "/usr/include/re_comp.h"
+  textual header "/usr/include/regex.h"
+  textual header "/usr/include/regexp.h"
+  textual header "/usr/include/resolv.h"
+  textual header "/usr/include/rpc/auth.h"
+  textual header "/usr/include/rpc/auth_des.h"
+  textual header "/usr/include/rpc/auth_unix.h"
+  textual header "/usr/include/rpc/clnt.h"
+  textual header "/usr/include/rpc/des_crypt.h"
+  textual header "/usr/include/rpc/key_prot.h"
+  textual header "/usr/include/rpc/netdb.h"
+  textual header "/usr/include/rpc/pmap_clnt.h"
+  textual header "/usr/include/rpc/pmap_prot.h"
+  textual header "/usr/include/rpc/pmap_rmt.h"
+  textual header "/usr/include/rpc/rpc.h"
+  textual header "/usr/include/rpc/rpc_des.h"
+  textual header "/usr/include/rpc/rpc_msg.h"
+  textual header "/usr/include/rpc/svc.h"
+  textual header "/usr/include/rpc/svc_auth.h"
+  textual header "/usr/include/rpc/types.h"
+  textual header "/usr/include/rpc/xdr.h"
+  textual header "/usr/include/rpcsvc/bootparam.h"
+  textual header "/usr/include/rpcsvc/bootparam_prot.h"
+  textual header "/usr/include/rpcsvc/bootparam_prot.x"
+  textual header "/usr/include/rpcsvc/key_prot.h"
+  textual header "/usr/include/rpcsvc/key_prot.x"
+  textual header "/usr/include/rpcsvc/klm_prot.h"
+  textual header "/usr/include/rpcsvc/klm_prot.x"
+  textual header "/usr/include/rpcsvc/mount.h"
+  textual header "/usr/include/rpcsvc/mount.x"
+  textual header "/usr/include/rpcsvc/nfs_prot.h"
+  textual header "/usr/include/rpcsvc/nfs_prot.x"
+  textual header "/usr/include/rpcsvc/nis.h"
+  textual header "/usr/include/rpcsvc/nis.x"
+  textual header "/usr/include/rpcsvc/nis_callback.h"
+  textual header "/usr/include/rpcsvc/nis_callback.x"
+  textual header "/usr/include/rpcsvc/nis_object.x"
+  textual header "/usr/include/rpcsvc/nis_tags.h"
+  textual header "/usr/include/rpcsvc/nislib.h"
+  textual header "/usr/include/rpcsvc/nlm_prot.h"
+  textual header "/usr/include/rpcsvc/nlm_prot.x"
+  textual header "/usr/include/rpcsvc/rex.h"
+  textual header "/usr/include/rpcsvc/rex.x"
+  textual header "/usr/include/rpcsvc/rquota.h"
+  textual header "/usr/include/rpcsvc/rquota.x"
+  textual header "/usr/include/rpcsvc/rstat.h"
+  textual header "/usr/include/rpcsvc/rstat.x"
+  textual header "/usr/include/rpcsvc/rusers.h"
+  textual header "/usr/include/rpcsvc/rusers.x"
+  textual header "/usr/include/rpcsvc/sm_inter.h"
+  textual header "/usr/include/rpcsvc/sm_inter.x"
+  textual header "/usr/include/rpcsvc/spray.h"
+  textual header "/usr/include/rpcsvc/spray.x"
+  textual header "/usr/include/rpcsvc/yp.h"
+  textual header "/usr/include/rpcsvc/yp.x"
+  textual header "/usr/include/rpcsvc/yp_prot.h"
+  textual header "/usr/include/rpcsvc/ypclnt.h"
+  textual header "/usr/include/rpcsvc/yppasswd.h"
+  textual header "/usr/include/rpcsvc/yppasswd.x"
+  textual header "/usr/include/rpcsvc/ypupd.h"
+  textual header "/usr/include/sched.h"
+  textual header "/usr/include/scsi/cxlflash_ioctl.h"
+  textual header "/usr/include/scsi/fc/fc_els.h"
+  textual header "/usr/include/scsi/fc/fc_fs.h"
+  textual header "/usr/include/scsi/fc/fc_gs.h"
+  textual header "/usr/include/scsi/fc/fc_ns.h"
+  textual header "/usr/include/scsi/scsi.h"
+  textual header "/usr/include/scsi/scsi_bsg_fc.h"
+  textual header "/usr/include/scsi/scsi_ioctl.h"
+  textual header "/usr/include/scsi/scsi_netlink.h"
+  textual header "/usr/include/scsi/scsi_netlink_fc.h"
+  textual header "/usr/include/scsi/sg.h"
+  textual header "/usr/include/search.h"
+  textual header "/usr/include/semaphore.h"
+  textual header "/usr/include/setjmp.h"
+  textual header "/usr/include/sgtty.h"
+  textual header "/usr/include/shadow.h"
+  textual header "/usr/include/signal.h"
+  textual header "/usr/include/sound/asequencer.h"
+  textual header "/usr/include/sound/asoc.h"
+  textual header "/usr/include/sound/asound.h"
+  textual header "/usr/include/sound/asound_fm.h"
+  textual header "/usr/include/sound/compress_offload.h"
+  textual header "/usr/include/sound/compress_params.h"
+  textual header "/usr/include/sound/emu10k1.h"
+  textual header "/usr/include/sound/firewire.h"
+  textual header "/usr/include/sound/hdsp.h"
+  textual header "/usr/include/sound/hdspm.h"
+  textual header "/usr/include/sound/sb16_csp.h"
+  textual header "/usr/include/sound/sfnt_info.h"
+  textual header "/usr/include/sound/snd_sst_tokens.h"
+  textual header "/usr/include/sound/tlv.h"
+  textual header "/usr/include/sound/usb_stream.h"
+  textual header "/usr/include/spawn.h"
+  textual header "/usr/include/stab.h"
+  textual header "/usr/include/stdc-predef.h"
+  textual header "/usr/include/stdint.h"
+  textual header "/usr/include/stdio.h"
+  textual header "/usr/include/stdio_ext.h"
+  textual header "/usr/include/stdlib.h"
+  textual header "/usr/include/string.h"
+  textual header "/usr/include/strings.h"
+  textual header "/usr/include/stropts.h"
+  textual header "/usr/include/syscall.h"
+  textual header "/usr/include/sysexits.h"
+  textual header "/usr/include/syslog.h"
+  textual header "/usr/include/tar.h"
+  textual header "/usr/include/termio.h"
+  textual header "/usr/include/termios.h"
+  textual header "/usr/include/tgmath.h"
+  textual header "/usr/include/thread_db.h"
+  textual header "/usr/include/time.h"
+  textual header "/usr/include/ttyent.h"
+  textual header "/usr/include/uchar.h"
+  textual header "/usr/include/ucontext.h"
+  textual header "/usr/include/ulimit.h"
+  textual header "/usr/include/unistd.h"
+  textual header "/usr/include/ustat.h"
+  textual header "/usr/include/utime.h"
+  textual header "/usr/include/utmp.h"
+  textual header "/usr/include/utmpx.h"
+  textual header "/usr/include/values.h"
+  textual header "/usr/include/video/edid.h"
+  textual header "/usr/include/video/sisfb.h"
+  textual header "/usr/include/video/uvesafb.h"
+  textual header "/usr/include/wait.h"
+  textual header "/usr/include/wchar.h"
+  textual header "/usr/include/wctype.h"
+  textual header "/usr/include/wordexp.h"
+  textual header "/usr/include/x86_64-linux-gnu/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/auxvec.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bitsperlong.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/boot.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bootparam.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/bpf_perf_event.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/byteorder.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/debugreg.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/e820.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hw_breakpoint.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hwcap2.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/hyperv.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ioctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ioctls.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ipcbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ist.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm_para.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/kvm_perf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ldt.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mce.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/msgbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/msr.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/mtrr.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/perf_regs.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_64.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/posix_types_x32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/prctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/processor-flags.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ptrace-abi.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ptrace.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sembuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/setup.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/shmbuf.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sigcontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sigcontext32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/siginfo.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/signal.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/sockios.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/svm.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/swab.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/termbits.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/ucontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_64.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/unistd_x32.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vm86.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vmx.h"
+  textual header "/usr/include/x86_64-linux-gnu/asm/vsyscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/_G_config.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/a.out.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/auxv.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/byteswap-16.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/byteswap.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/cmathcalls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/confname.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/cpu-set.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/dirent.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/dlfcn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/elfclass.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/endian.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/environments.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/epoll.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/error.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/eventfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl-linux.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fcntl2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fenv.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fenvinline.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/floatn-common.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/floatn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/flt-eval-method.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fp-fast.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/fp-logb.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_core.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_ext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/getopt_posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/hwcap.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/in.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/initspin.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/inotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ioctl-types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ioctls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ipc.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ipctypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/iscanonical.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libio-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libio.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/link.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/local_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/long-double.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/math-finite.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/math-vector.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathcalls.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathdef.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mathinline.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman-linux.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman-shared.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/monetary-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mqueue.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/mqueue2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/msq.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/netdb.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/poll2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix2_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/posix_opt.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/printf-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ptrace-shared.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sched.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/select.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/select2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sem.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/semaphore.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/setjmp.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/setjmp2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/shm.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigaction.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigcontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigevent-consts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-consts-arch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/siginfo-consts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signalfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signum-generic.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/signum.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigstack.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sigthread.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sockaddr.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/socket_type.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ss_flags.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stab.def"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/statvfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdint-intn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdint-uintn.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stdlib.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/string_fortified.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/strings_fortified.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/stropts.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sysctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog-path.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/syslog.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/sysmacros.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/time.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/timerfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/timex.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/FILE.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__FILE.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__locale_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/__sigval_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/clock_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/clockid_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/locale_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/res_state.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sig_atomic_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigevent_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/siginfo_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigset_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/sigval_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/stack_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_iovec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_osockaddr.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_rusage.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_sigstack.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/struct_tm.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/time_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/timer_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/types/wint_t.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/typesizes.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uintn-identity.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uio-ext.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/uio_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/ustat.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utmp.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utmpx.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/utsname.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/waitflags.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/waitstatus.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar-ldbl.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wchar2.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wctype-wchar.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/wordsize.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h"
+  textual header "/usr/include/x86_64-linux-gnu/bits/xtitypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/atomic_word.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/basic_file.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++allocator.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++config.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++io.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cpu_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_base.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_inline.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cxxabi_tweaks.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/error_constants.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/extc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-default.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-single.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/messages_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/os_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdtr1c++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/time_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/ext/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/atomic_word.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/basic_file.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++allocator.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++config.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++io.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/c++locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/cpu_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/ctype_base.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/ctype_inline.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/cxxabi_tweaks.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/error_constants.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/extc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr-single.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/gthr.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/messages_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/os_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/stdc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/stdtr1c++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/bits/time_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7/ext/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/ffi.h"
+  textual header "/usr/include/x86_64-linux-gnu/ffitarget.h"
+  textual header "/usr/include/x86_64-linux-gnu/fpu_control.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/lib-names-64.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/lib-names.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/libc-version.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h"
+  textual header "/usr/include/x86_64-linux-gnu/gnu/stubs.h"
+  textual header "/usr/include/x86_64-linux-gnu/ieee754.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/acct.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/auxv.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/bitypes.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/cdefs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/debugreg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/dir.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/elf.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/epoll.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/errno.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/eventfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fanotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fcntl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/file.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/fsuid.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/gmon.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/gmon_out.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/inotify.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/io.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ioctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ipc.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/kd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/klog.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mman.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mount.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/msg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/mtio.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/param.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/pci.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/perm.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/personality.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/poll.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/prctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/procfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/profil.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ptrace.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/queue.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/quota.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/random.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/raw.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/reboot.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/reg.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/resource.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/select.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sem.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sendfile.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/shm.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/signal.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/signalfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/socket.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/socketvar.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/soundcard.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/stat.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/statfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/statvfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/stropts.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/swap.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/syscall.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysctl.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysinfo.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/syslog.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/sysmacros.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/termios.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/time.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timeb.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timerfd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/times.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/timex.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ttychars.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ttydefaults.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/types.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ucontext.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/uio.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/un.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/unistd.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/user.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/ustat.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/utsname.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vfs.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vlimit.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vm86.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vt.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/vtimes.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/wait.h"
+  textual header "/usr/include/x86_64-linux-gnu/sys/xattr.h"
+  textual header "/usr/include/xen/evtchn.h"
+  textual header "/usr/include/xen/gntalloc.h"
+  textual header "/usr/include/xen/gntdev.h"
+  textual header "/usr/include/xen/privcmd.h"
+  textual header "/usr/include/c++/7.5.0/algorithm"
+  textual header "/usr/include/c++/7.5.0/any"
+  textual header "/usr/include/c++/7.5.0/array"
+  textual header "/usr/include/c++/7.5.0/atomic"
+  textual header "/usr/include/c++/7.5.0/backward/auto_ptr.h"
+  textual header "/usr/include/c++/7.5.0/backward/backward_warning.h"
+  textual header "/usr/include/c++/7.5.0/backward/binders.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_fun.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_map"
+  textual header "/usr/include/c++/7.5.0/backward/hash_set"
+  textual header "/usr/include/c++/7.5.0/backward/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/backward/strstream"
+  textual header "/usr/include/c++/7.5.0/bits/algorithmfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/alloc_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/allocated_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/allocator.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_futex.h"
+  textual header "/usr/include/c++/7.5.0/bits/atomic_lockfree_defines.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_ios.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_ios.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/basic_string.h"
+  textual header "/usr/include/c++/7.5.0/bits/basic_string.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/boost_concept_check.h"
+  textual header "/usr/include/c++/7.5.0/bits/c++0x_warning.h"
+  textual header "/usr/include/c++/7.5.0/bits/char_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/codecvt.h"
+  textual header "/usr/include/c++/7.5.0/bits/concept_check.h"
+  textual header "/usr/include/c++/7.5.0/bits/cpp_type_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/cxxabi_forced.h"
+  textual header "/usr/include/c++/7.5.0/bits/cxxabi_init_exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/deque.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/enable_special_members.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception_defines.h"
+  textual header "/usr/include/c++/7.5.0/bits/exception_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/forward_list.h"
+  textual header "/usr/include/c++/7.5.0/bits/forward_list.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/fstream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/functexcept.h"
+  textual header "/usr/include/c++/7.5.0/bits/functional_hash.h"
+  textual header "/usr/include/c++/7.5.0/bits/gslice.h"
+  textual header "/usr/include/c++/7.5.0/bits/gslice_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/hash_bytes.h"
+  textual header "/usr/include/c++/7.5.0/bits/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/bits/hashtable_policy.h"
+  textual header "/usr/include/c++/7.5.0/bits/indirect_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/invoke.h"
+  textual header "/usr/include/c++/7.5.0/bits/ios_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/istream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/list.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_classes.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_classes.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_conv.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets_nonio.h"
+  textual header "/usr/include/c++/7.5.0/bits/locale_facets_nonio.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/localefwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/mask_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/memoryfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/move.h"
+  textual header "/usr/include/c++/7.5.0/bits/nested_exception.h"
+  textual header "/usr/include/c++/7.5.0/bits/node_handle.h"
+  textual header "/usr/include/c++/7.5.0/bits/ostream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/ostream_insert.h"
+  textual header "/usr/include/c++/7.5.0/bits/parse_numbers.h"
+  textual header "/usr/include/c++/7.5.0/bits/postypes.h"
+  textual header "/usr/include/c++/7.5.0/bits/predefined_ops.h"
+  textual header "/usr/include/c++/7.5.0/bits/ptr_traits.h"
+  textual header "/usr/include/c++/7.5.0/bits/quoted_string.h"
+  textual header "/usr/include/c++/7.5.0/bits/random.h"
+  textual header "/usr/include/c++/7.5.0/bits/random.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/range_access.h"
+  textual header "/usr/include/c++/7.5.0/bits/refwrap.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_automaton.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_automaton.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_compiler.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_compiler.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_constants.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_error.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_executor.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_executor.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/regex_scanner.h"
+  textual header "/usr/include/c++/7.5.0/bits/regex_scanner.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr_atomic.h"
+  textual header "/usr/include/c++/7.5.0/bits/shared_ptr_base.h"
+  textual header "/usr/include/c++/7.5.0/bits/slice_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/specfun.h"
+  textual header "/usr/include/c++/7.5.0/bits/sstream.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/std_abs.h"
+  textual header "/usr/include/c++/7.5.0/bits/std_function.h"
+  textual header "/usr/include/c++/7.5.0/bits/std_mutex.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_algo.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_algobase.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_bvector.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_construct.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_deque.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_function.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_heap.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator_base_funcs.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_iterator_base_types.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_list.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_map.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_multimap.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_multiset.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_numeric.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_pair.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_queue.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_raw_storage_iter.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_relops.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_set.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_stack.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_tempbuf.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_tree.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_uninitialized.h"
+  textual header "/usr/include/c++/7.5.0/bits/stl_vector.h"
+  textual header "/usr/include/c++/7.5.0/bits/stream_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/streambuf.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/streambuf_iterator.h"
+  textual header "/usr/include/c++/7.5.0/bits/string_view.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/stringfwd.h"
+  textual header "/usr/include/c++/7.5.0/bits/uniform_int_dist.h"
+  textual header "/usr/include/c++/7.5.0/bits/unique_ptr.h"
+  textual header "/usr/include/c++/7.5.0/bits/unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/bits/unordered_set.h"
+  textual header "/usr/include/c++/7.5.0/bits/uses_allocator.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_after.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_array.h"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_array.tcc"
+  textual header "/usr/include/c++/7.5.0/bits/valarray_before.h"
+  textual header "/usr/include/c++/7.5.0/bits/vector.tcc"
+  textual header "/usr/include/c++/7.5.0/bitset"
+  textual header "/usr/include/c++/7.5.0/cassert"
+  textual header "/usr/include/c++/7.5.0/ccomplex"
+  textual header "/usr/include/c++/7.5.0/cctype"
+  textual header "/usr/include/c++/7.5.0/cerrno"
+  textual header "/usr/include/c++/7.5.0/cfenv"
+  textual header "/usr/include/c++/7.5.0/cfloat"
+  textual header "/usr/include/c++/7.5.0/chrono"
+  textual header "/usr/include/c++/7.5.0/cinttypes"
+  textual header "/usr/include/c++/7.5.0/ciso646"
+  textual header "/usr/include/c++/7.5.0/climits"
+  textual header "/usr/include/c++/7.5.0/clocale"
+  textual header "/usr/include/c++/7.5.0/cmath"
+  textual header "/usr/include/c++/7.5.0/codecvt"
+  textual header "/usr/include/c++/7.5.0/complex"
+  textual header "/usr/include/c++/7.5.0/complex.h"
+  textual header "/usr/include/c++/7.5.0/condition_variable"
+  textual header "/usr/include/c++/7.5.0/csetjmp"
+  textual header "/usr/include/c++/7.5.0/csignal"
+  textual header "/usr/include/c++/7.5.0/cstdalign"
+  textual header "/usr/include/c++/7.5.0/cstdarg"
+  textual header "/usr/include/c++/7.5.0/cstdbool"
+  textual header "/usr/include/c++/7.5.0/cstddef"
+  textual header "/usr/include/c++/7.5.0/cstdint"
+  textual header "/usr/include/c++/7.5.0/cstdio"
+  textual header "/usr/include/c++/7.5.0/cstdlib"
+  textual header "/usr/include/c++/7.5.0/cstring"
+  textual header "/usr/include/c++/7.5.0/ctgmath"
+  textual header "/usr/include/c++/7.5.0/ctime"
+  textual header "/usr/include/c++/7.5.0/cuchar"
+  textual header "/usr/include/c++/7.5.0/cwchar"
+  textual header "/usr/include/c++/7.5.0/cwctype"
+  textual header "/usr/include/c++/7.5.0/cxxabi.h"
+  textual header "/usr/include/c++/7.5.0/debug/array"
+  textual header "/usr/include/c++/7.5.0/debug/assertions.h"
+  textual header "/usr/include/c++/7.5.0/debug/bitset"
+  textual header "/usr/include/c++/7.5.0/debug/debug.h"
+  textual header "/usr/include/c++/7.5.0/debug/deque"
+  textual header "/usr/include/c++/7.5.0/debug/formatter.h"
+  textual header "/usr/include/c++/7.5.0/debug/forward_list"
+  textual header "/usr/include/c++/7.5.0/debug/functions.h"
+  textual header "/usr/include/c++/7.5.0/debug/helper_functions.h"
+  textual header "/usr/include/c++/7.5.0/debug/list"
+  textual header "/usr/include/c++/7.5.0/debug/macros.h"
+  textual header "/usr/include/c++/7.5.0/debug/map"
+  textual header "/usr/include/c++/7.5.0/debug/map.h"
+  textual header "/usr/include/c++/7.5.0/debug/multimap.h"
+  textual header "/usr/include/c++/7.5.0/debug/multiset.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_base.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_container.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_iterator.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_local_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_local_iterator.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_sequence.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_sequence.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_base.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_container.h"
+  textual header "/usr/include/c++/7.5.0/debug/safe_unordered_container.tcc"
+  textual header "/usr/include/c++/7.5.0/debug/set"
+  textual header "/usr/include/c++/7.5.0/debug/set.h"
+  textual header "/usr/include/c++/7.5.0/debug/stl_iterator.h"
+  textual header "/usr/include/c++/7.5.0/debug/string"
+  textual header "/usr/include/c++/7.5.0/debug/unordered_map"
+  textual header "/usr/include/c++/7.5.0/debug/unordered_set"
+  textual header "/usr/include/c++/7.5.0/debug/vector"
+  textual header "/usr/include/c++/7.5.0/decimal/decimal"
+  textual header "/usr/include/c++/7.5.0/decimal/decimal.h"
+  textual header "/usr/include/c++/7.5.0/deque"
+  textual header "/usr/include/c++/7.5.0/exception"
+  textual header "/usr/include/c++/7.5.0/experimental/algorithm"
+  textual header "/usr/include/c++/7.5.0/experimental/any"
+  textual header "/usr/include/c++/7.5.0/experimental/array"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/erase_if.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_dir.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_fwd.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_ops.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/fs_path.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/lfts_config.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/experimental/bits/string_view.tcc"
+  textual header "/usr/include/c++/7.5.0/experimental/chrono"
+  textual header "/usr/include/c++/7.5.0/experimental/deque"
+  textual header "/usr/include/c++/7.5.0/experimental/filesystem"
+  textual header "/usr/include/c++/7.5.0/experimental/forward_list"
+  textual header "/usr/include/c++/7.5.0/experimental/functional"
+  textual header "/usr/include/c++/7.5.0/experimental/iterator"
+  textual header "/usr/include/c++/7.5.0/experimental/list"
+  textual header "/usr/include/c++/7.5.0/experimental/map"
+  textual header "/usr/include/c++/7.5.0/experimental/memory"
+  textual header "/usr/include/c++/7.5.0/experimental/memory_resource"
+  textual header "/usr/include/c++/7.5.0/experimental/numeric"
+  textual header "/usr/include/c++/7.5.0/experimental/optional"
+  textual header "/usr/include/c++/7.5.0/experimental/propagate_const"
+  textual header "/usr/include/c++/7.5.0/experimental/random"
+  textual header "/usr/include/c++/7.5.0/experimental/ratio"
+  textual header "/usr/include/c++/7.5.0/experimental/regex"
+  textual header "/usr/include/c++/7.5.0/experimental/set"
+  textual header "/usr/include/c++/7.5.0/experimental/source_location"
+  textual header "/usr/include/c++/7.5.0/experimental/string"
+  textual header "/usr/include/c++/7.5.0/experimental/string_view"
+  textual header "/usr/include/c++/7.5.0/experimental/system_error"
+  textual header "/usr/include/c++/7.5.0/experimental/tuple"
+  textual header "/usr/include/c++/7.5.0/experimental/type_traits"
+  textual header "/usr/include/c++/7.5.0/experimental/unordered_map"
+  textual header "/usr/include/c++/7.5.0/experimental/unordered_set"
+  textual header "/usr/include/c++/7.5.0/experimental/utility"
+  textual header "/usr/include/c++/7.5.0/experimental/vector"
+  textual header "/usr/include/c++/7.5.0/ext/algorithm"
+  textual header "/usr/include/c++/7.5.0/ext/aligned_buffer.h"
+  textual header "/usr/include/c++/7.5.0/ext/alloc_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/array_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/atomicity.h"
+  textual header "/usr/include/c++/7.5.0/ext/bitmap_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/cast.h"
+  textual header "/usr/include/c++/7.5.0/ext/cmath"
+  textual header "/usr/include/c++/7.5.0/ext/codecvt_specializations.h"
+  textual header "/usr/include/c++/7.5.0/ext/concurrence.h"
+  textual header "/usr/include/c++/7.5.0/ext/debug_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/enc_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/extptr_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/functional"
+  textual header "/usr/include/c++/7.5.0/ext/hash_map"
+  textual header "/usr/include/c++/7.5.0/ext/hash_set"
+  textual header "/usr/include/c++/7.5.0/ext/iterator"
+  textual header "/usr/include/c++/7.5.0/ext/malloc_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/memory"
+  textual header "/usr/include/c++/7.5.0/ext/mt_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/new_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/numeric"
+  textual header "/usr/include/c++/7.5.0/ext/numeric_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/assoc_container.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/bin_search_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/node_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/point_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/bin_search_tree_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/binary_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/entry_cmp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/entry_pred.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/resize_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binary_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/binomial_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/binomial_heap_base_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/binomial_heap_base_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/branch_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/null_node_metadata.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/branch_policy/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cc_ht_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cmp_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/cond_key_dtor_entry_dealtor.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/entry_list_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/size_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cc_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/cond_dealtor.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/container_base_dispatch.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/debug_map_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/eq_fn/eq_by_less.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/eq_fn/hash_eq_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/constructor_destructor_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/debug_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/erase_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/find_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/gp_ht_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/insert_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/iterator_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_no_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/resize_store_hash_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/gp_hash_table_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/direct_mask_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/direct_mod_range_hashing_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/linear_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/mask_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/mod_based_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/probe_fn_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/quadratic_probe_fn_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_range_hashing.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_ranged_hash_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/hash_fn/sample_ranged_probe_fn.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/left_child_next_sibling_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/left_child_next_sibling_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/constructor_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/entry_metadata_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/lu_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_map_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_policy/lu_counter_metadata.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/list_update_policy/sample_update_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/node_iterators.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/ov_tree_map_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/ov_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/pairing_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pairing_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/insert_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/iterators_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/pat_trie_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/pat_trie_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/policy_access_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/r_erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/rotate_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/split_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/synth_access_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/pat_trie_/update_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/priority_queue_base_dispatch.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/rb_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rb_tree_map_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/rc.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/rc_binomial_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/rc_binomial_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/cc_hash_max_collision_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_exponential_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_load_check_resize_trigger_size_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_prime_size_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/hash_standard_resize_policy_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_resize_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_resize_trigger.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/resize_policy/sample_size_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/info_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/node.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/splay_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/splay_tree_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/splay_tree_/traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/standard_policies.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/constructors_destructor_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/debug_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/erase_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/find_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/insert_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/split_join_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/thin_heap_.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/thin_heap_/trace_fn_imps.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_policy/sample_tree_node_update.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/tree_trace_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/node_metadata_selector.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/order_statistics_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/prefix_search_node_update_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/sample_trie_access_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/sample_trie_node_update.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/trie_policy_base.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/trie_policy/trie_string_access_traits_imp.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/type_utils.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/types_traits.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/point_const_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/detail/unordered_iterator/point_iterator.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/exception.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/hash_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/list_update_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/priority_queue.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/tag_and_trait.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/tree_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pb_ds/trie_policy.hpp"
+  textual header "/usr/include/c++/7.5.0/ext/pod_char_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/pointer.h"
+  textual header "/usr/include/c++/7.5.0/ext/pool_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/random"
+  textual header "/usr/include/c++/7.5.0/ext/random.tcc"
+  textual header "/usr/include/c++/7.5.0/ext/rb_tree"
+  textual header "/usr/include/c++/7.5.0/ext/rc_string_base.h"
+  textual header "/usr/include/c++/7.5.0/ext/rope"
+  textual header "/usr/include/c++/7.5.0/ext/ropeimpl.h"
+  textual header "/usr/include/c++/7.5.0/ext/slist"
+  textual header "/usr/include/c++/7.5.0/ext/sso_string_base.h"
+  textual header "/usr/include/c++/7.5.0/ext/stdio_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/stdio_sync_filebuf.h"
+  textual header "/usr/include/c++/7.5.0/ext/string_conversions.h"
+  textual header "/usr/include/c++/7.5.0/ext/throw_allocator.h"
+  textual header "/usr/include/c++/7.5.0/ext/type_traits.h"
+  textual header "/usr/include/c++/7.5.0/ext/typelist.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring.tcc"
+  textual header "/usr/include/c++/7.5.0/ext/vstring_fwd.h"
+  textual header "/usr/include/c++/7.5.0/ext/vstring_util.h"
+  textual header "/usr/include/c++/7.5.0/fenv.h"
+  textual header "/usr/include/c++/7.5.0/forward_list"
+  textual header "/usr/include/c++/7.5.0/fstream"
+  textual header "/usr/include/c++/7.5.0/functional"
+  textual header "/usr/include/c++/7.5.0/future"
+  textual header "/usr/include/c++/7.5.0/initializer_list"
+  textual header "/usr/include/c++/7.5.0/iomanip"
+  textual header "/usr/include/c++/7.5.0/ios"
+  textual header "/usr/include/c++/7.5.0/iosfwd"
+  textual header "/usr/include/c++/7.5.0/iostream"
+  textual header "/usr/include/c++/7.5.0/istream"
+  textual header "/usr/include/c++/7.5.0/iterator"
+  textual header "/usr/include/c++/7.5.0/limits"
+  textual header "/usr/include/c++/7.5.0/list"
+  textual header "/usr/include/c++/7.5.0/locale"
+  textual header "/usr/include/c++/7.5.0/map"
+  textual header "/usr/include/c++/7.5.0/math.h"
+  textual header "/usr/include/c++/7.5.0/memory"
+  textual header "/usr/include/c++/7.5.0/mutex"
+  textual header "/usr/include/c++/7.5.0/new"
+  textual header "/usr/include/c++/7.5.0/numeric"
+  textual header "/usr/include/c++/7.5.0/optional"
+  textual header "/usr/include/c++/7.5.0/ostream"
+  textual header "/usr/include/c++/7.5.0/parallel/algo.h"
+  textual header "/usr/include/c++/7.5.0/parallel/algobase.h"
+  textual header "/usr/include/c++/7.5.0/parallel/algorithm"
+  textual header "/usr/include/c++/7.5.0/parallel/algorithmfwd.h"
+  textual header "/usr/include/c++/7.5.0/parallel/balanced_quicksort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/base.h"
+  textual header "/usr/include/c++/7.5.0/parallel/basic_iterator.h"
+  textual header "/usr/include/c++/7.5.0/parallel/checkers.h"
+  textual header "/usr/include/c++/7.5.0/parallel/compatibility.h"
+  textual header "/usr/include/c++/7.5.0/parallel/compiletime_settings.h"
+  textual header "/usr/include/c++/7.5.0/parallel/equally_split.h"
+  textual header "/usr/include/c++/7.5.0/parallel/features.h"
+  textual header "/usr/include/c++/7.5.0/parallel/find.h"
+  textual header "/usr/include/c++/7.5.0/parallel/find_selectors.h"
+  textual header "/usr/include/c++/7.5.0/parallel/for_each.h"
+  textual header "/usr/include/c++/7.5.0/parallel/for_each_selectors.h"
+  textual header "/usr/include/c++/7.5.0/parallel/iterator.h"
+  textual header "/usr/include/c++/7.5.0/parallel/list_partition.h"
+  textual header "/usr/include/c++/7.5.0/parallel/losertree.h"
+  textual header "/usr/include/c++/7.5.0/parallel/merge.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiseq_selection.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiway_merge.h"
+  textual header "/usr/include/c++/7.5.0/parallel/multiway_mergesort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/numeric"
+  textual header "/usr/include/c++/7.5.0/parallel/numericfwd.h"
+  textual header "/usr/include/c++/7.5.0/parallel/omp_loop.h"
+  textual header "/usr/include/c++/7.5.0/parallel/omp_loop_static.h"
+  textual header "/usr/include/c++/7.5.0/parallel/par_loop.h"
+  textual header "/usr/include/c++/7.5.0/parallel/parallel.h"
+  textual header "/usr/include/c++/7.5.0/parallel/partial_sum.h"
+  textual header "/usr/include/c++/7.5.0/parallel/partition.h"
+  textual header "/usr/include/c++/7.5.0/parallel/queue.h"
+  textual header "/usr/include/c++/7.5.0/parallel/quicksort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/random_number.h"
+  textual header "/usr/include/c++/7.5.0/parallel/random_shuffle.h"
+  textual header "/usr/include/c++/7.5.0/parallel/search.h"
+  textual header "/usr/include/c++/7.5.0/parallel/set_operations.h"
+  textual header "/usr/include/c++/7.5.0/parallel/settings.h"
+  textual header "/usr/include/c++/7.5.0/parallel/sort.h"
+  textual header "/usr/include/c++/7.5.0/parallel/tags.h"
+  textual header "/usr/include/c++/7.5.0/parallel/types.h"
+  textual header "/usr/include/c++/7.5.0/parallel/unique_copy.h"
+  textual header "/usr/include/c++/7.5.0/parallel/workstealing.h"
+  textual header "/usr/include/c++/7.5.0/profile/array"
+  textual header "/usr/include/c++/7.5.0/profile/base.h"
+  textual header "/usr/include/c++/7.5.0/profile/bitset"
+  textual header "/usr/include/c++/7.5.0/profile/deque"
+  textual header "/usr/include/c++/7.5.0/profile/forward_list"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_algos.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_container_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_hash_func.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_hashtable_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_list_to_slist.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_list_to_vector.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_map_to_unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_node.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_state.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_trace.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_vector_size.h"
+  textual header "/usr/include/c++/7.5.0/profile/impl/profiler_vector_to_list.h"
+  textual header "/usr/include/c++/7.5.0/profile/iterator_tracker.h"
+  textual header "/usr/include/c++/7.5.0/profile/list"
+  textual header "/usr/include/c++/7.5.0/profile/map"
+  textual header "/usr/include/c++/7.5.0/profile/map.h"
+  textual header "/usr/include/c++/7.5.0/profile/multimap.h"
+  textual header "/usr/include/c++/7.5.0/profile/multiset.h"
+  textual header "/usr/include/c++/7.5.0/profile/ordered_base.h"
+  textual header "/usr/include/c++/7.5.0/profile/set"
+  textual header "/usr/include/c++/7.5.0/profile/set.h"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_base.h"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_map"
+  textual header "/usr/include/c++/7.5.0/profile/unordered_set"
+  textual header "/usr/include/c++/7.5.0/profile/vector"
+  textual header "/usr/include/c++/7.5.0/queue"
+  textual header "/usr/include/c++/7.5.0/random"
+  textual header "/usr/include/c++/7.5.0/ratio"
+  textual header "/usr/include/c++/7.5.0/regex"
+  textual header "/usr/include/c++/7.5.0/scoped_allocator"
+  textual header "/usr/include/c++/7.5.0/set"
+  textual header "/usr/include/c++/7.5.0/shared_mutex"
+  textual header "/usr/include/c++/7.5.0/sstream"
+  textual header "/usr/include/c++/7.5.0/stack"
+  textual header "/usr/include/c++/7.5.0/stdexcept"
+  textual header "/usr/include/c++/7.5.0/stdlib.h"
+  textual header "/usr/include/c++/7.5.0/streambuf"
+  textual header "/usr/include/c++/7.5.0/string"
+  textual header "/usr/include/c++/7.5.0/string_view"
+  textual header "/usr/include/c++/7.5.0/system_error"
+  textual header "/usr/include/c++/7.5.0/tgmath.h"
+  textual header "/usr/include/c++/7.5.0/thread"
+  textual header "/usr/include/c++/7.5.0/tr1/array"
+  textual header "/usr/include/c++/7.5.0/tr1/bessel_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/beta_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/ccomplex"
+  textual header "/usr/include/c++/7.5.0/tr1/cctype"
+  textual header "/usr/include/c++/7.5.0/tr1/cfenv"
+  textual header "/usr/include/c++/7.5.0/tr1/cfloat"
+  textual header "/usr/include/c++/7.5.0/tr1/cinttypes"
+  textual header "/usr/include/c++/7.5.0/tr1/climits"
+  textual header "/usr/include/c++/7.5.0/tr1/cmath"
+  textual header "/usr/include/c++/7.5.0/tr1/complex"
+  textual header "/usr/include/c++/7.5.0/tr1/complex.h"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdarg"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdbool"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdint"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdio"
+  textual header "/usr/include/c++/7.5.0/tr1/cstdlib"
+  textual header "/usr/include/c++/7.5.0/tr1/ctgmath"
+  textual header "/usr/include/c++/7.5.0/tr1/ctime"
+  textual header "/usr/include/c++/7.5.0/tr1/ctype.h"
+  textual header "/usr/include/c++/7.5.0/tr1/cwchar"
+  textual header "/usr/include/c++/7.5.0/tr1/cwctype"
+  textual header "/usr/include/c++/7.5.0/tr1/ell_integral.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/exp_integral.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/fenv.h"
+  textual header "/usr/include/c++/7.5.0/tr1/float.h"
+  textual header "/usr/include/c++/7.5.0/tr1/functional"
+  textual header "/usr/include/c++/7.5.0/tr1/functional_hash.h"
+  textual header "/usr/include/c++/7.5.0/tr1/gamma.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/tr1/hashtable_policy.h"
+  textual header "/usr/include/c++/7.5.0/tr1/hypergeometric.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/inttypes.h"
+  textual header "/usr/include/c++/7.5.0/tr1/legendre_function.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/limits.h"
+  textual header "/usr/include/c++/7.5.0/tr1/math.h"
+  textual header "/usr/include/c++/7.5.0/tr1/memory"
+  textual header "/usr/include/c++/7.5.0/tr1/modified_bessel_func.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/poly_hermite.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/poly_laguerre.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/random"
+  textual header "/usr/include/c++/7.5.0/tr1/random.h"
+  textual header "/usr/include/c++/7.5.0/tr1/random.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/regex"
+  textual header "/usr/include/c++/7.5.0/tr1/riemann_zeta.tcc"
+  textual header "/usr/include/c++/7.5.0/tr1/shared_ptr.h"
+  textual header "/usr/include/c++/7.5.0/tr1/special_function_util.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdarg.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdbool.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdint.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdio.h"
+  textual header "/usr/include/c++/7.5.0/tr1/stdlib.h"
+  textual header "/usr/include/c++/7.5.0/tr1/tgmath.h"
+  textual header "/usr/include/c++/7.5.0/tr1/tuple"
+  textual header "/usr/include/c++/7.5.0/tr1/type_traits"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_map"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_map.h"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_set"
+  textual header "/usr/include/c++/7.5.0/tr1/unordered_set.h"
+  textual header "/usr/include/c++/7.5.0/tr1/utility"
+  textual header "/usr/include/c++/7.5.0/tr1/wchar.h"
+  textual header "/usr/include/c++/7.5.0/tr1/wctype.h"
+  textual header "/usr/include/c++/7.5.0/tr2/bool_set"
+  textual header "/usr/include/c++/7.5.0/tr2/bool_set.tcc"
+  textual header "/usr/include/c++/7.5.0/tr2/dynamic_bitset"
+  textual header "/usr/include/c++/7.5.0/tr2/dynamic_bitset.tcc"
+  textual header "/usr/include/c++/7.5.0/tr2/ratio"
+  textual header "/usr/include/c++/7.5.0/tr2/type_traits"
+  textual header "/usr/include/c++/7.5.0/tuple"
+  textual header "/usr/include/c++/7.5.0/type_traits"
+  textual header "/usr/include/c++/7.5.0/typeindex"
+  textual header "/usr/include/c++/7.5.0/typeinfo"
+  textual header "/usr/include/c++/7.5.0/unordered_map"
+  textual header "/usr/include/c++/7.5.0/unordered_set"
+  textual header "/usr/include/c++/7.5.0/utility"
+  textual header "/usr/include/c++/7.5.0/valarray"
+  textual header "/usr/include/c++/7.5.0/variant"
+  textual header "/usr/include/c++/7.5.0/vector"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/atomic_word.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/basic_file.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++allocator.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++config.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++io.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/c++locale.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cpu_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_base.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/ctype_inline.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/cxxabi_tweaks.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/error_constants.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/extc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-default.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-posix.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr-single.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/gthr.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/messages_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/opt_random.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/os_defines.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdc++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/stdtr1c++.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/bits/time_members.h"
+  textual header "/usr/include/x86_64-linux-gnu/c++/7.5.0/ext/opt_random.h"
+  textual header "/usr/include/c++/7.5.0/backward/auto_ptr.h"
+  textual header "/usr/include/c++/7.5.0/backward/backward_warning.h"
+  textual header "/usr/include/c++/7.5.0/backward/binders.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_fun.h"
+  textual header "/usr/include/c++/7.5.0/backward/hash_map"
+  textual header "/usr/include/c++/7.5.0/backward/hash_set"
+  textual header "/usr/include/c++/7.5.0/backward/hashtable.h"
+  textual header "/usr/include/c++/7.5.0/backward/strstream"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_builtin_vars.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_cmath.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_complex_builtins.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_intrinsics.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_math_forward_declares.h"
+  textual header "/usr/include/clang/6.0.0/include/__clang_cuda_runtime_wrapper.h"
+  textual header "/usr/include/clang/6.0.0/include/__stddef_max_align_t.h"
+  textual header "/usr/include/clang/6.0.0/include/__wmmintrin_aes.h"
+  textual header "/usr/include/clang/6.0.0/include/__wmmintrin_pclmul.h"
+  textual header "/usr/include/clang/6.0.0/include/adxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/altivec.h"
+  textual header "/usr/include/clang/6.0.0/include/ammintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/arm64intr.h"
+  textual header "/usr/include/clang/6.0.0/include/arm_acle.h"
+  textual header "/usr/include/clang/6.0.0/include/arm_neon.h"
+  textual header "/usr/include/clang/6.0.0/include/armintr.h"
+  textual header "/usr/include/clang/6.0.0/include/avx2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512bitalgintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512bwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512cdintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512dqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512erintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512fintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512ifmaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512ifmavlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512pfintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmiintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vbmivlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlbitalgintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlbwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlcdintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vldqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlvbmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vlvnniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vnniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vpopcntdqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avx512vpopcntdqvlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/avxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/bmi2intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/bmiintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/cetintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clflushoptintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clwbintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/clzerointrin.h"
+  textual header "/usr/include/clang/6.0.0/include/cpuid.h"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/algorithm"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/complex"
+  textual header "/usr/include/clang/6.0.0/include/cuda_wrappers/new"
+  textual header "/usr/include/clang/6.0.0/include/emmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/f16cintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/float.h"
+  textual header "/usr/include/clang/6.0.0/include/fma4intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/fmaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/fxsrintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/gfniintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/htmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/htmxlintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/ia32intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/immintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/inttypes.h"
+  textual header "/usr/include/clang/6.0.0/include/iso646.h"
+  textual header "/usr/include/clang/6.0.0/include/limits.h"
+  textual header "/usr/include/clang/6.0.0/include/lwpintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/lzcntintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/mm3dnow.h"
+  textual header "/usr/include/clang/6.0.0/include/mm_malloc.h"
+  textual header "/usr/include/clang/6.0.0/include/mmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/module.modulemap"
+  textual header "/usr/include/clang/6.0.0/include/msa.h"
+  textual header "/usr/include/clang/6.0.0/include/mwaitxintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/nmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/opencl-c.h"
+  textual header "/usr/include/clang/6.0.0/include/pkuintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/pmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/popcntintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/prfchwintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/rdseedintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/rtmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/s390intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/allocator_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/asan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/common_interface_defs.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/coverage_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/dfsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/esan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/hwasan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/linux_syscall_hooks.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/lsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/msan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/scudo_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/tsan_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/sanitizer/tsan_interface_atomic.h"
+  textual header "/usr/include/clang/6.0.0/include/shaintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/smmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/stdalign.h"
+  textual header "/usr/include/clang/6.0.0/include/stdarg.h"
+  textual header "/usr/include/clang/6.0.0/include/stdatomic.h"
+  textual header "/usr/include/clang/6.0.0/include/stdbool.h"
+  textual header "/usr/include/clang/6.0.0/include/stddef.h"
+  textual header "/usr/include/clang/6.0.0/include/stdint.h"
+  textual header "/usr/include/clang/6.0.0/include/stdnoreturn.h"
+  textual header "/usr/include/clang/6.0.0/include/tbmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/tgmath.h"
+  textual header "/usr/include/clang/6.0.0/include/tmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/unwind.h"
+  textual header "/usr/include/clang/6.0.0/include/vadefs.h"
+  textual header "/usr/include/clang/6.0.0/include/vaesintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/varargs.h"
+  textual header "/usr/include/clang/6.0.0/include/vecintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/vpclmulqdqintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/wmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/x86intrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xmmintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xopintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xray/xray_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/xray/xray_log_interface.h"
+  textual header "/usr/include/clang/6.0.0/include/xsavecintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsaveintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsaveoptintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xsavesintrin.h"
+  textual header "/usr/include/clang/6.0.0/include/xtestintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_builtin_vars.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_cmath.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_complex_builtins.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_intrinsics.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_math_forward_declares.h"
+  textual header "/usr/lib/clang/6.0.0/include/__clang_cuda_runtime_wrapper.h"
+  textual header "/usr/lib/clang/6.0.0/include/__stddef_max_align_t.h"
+  textual header "/usr/lib/clang/6.0.0/include/__wmmintrin_aes.h"
+  textual header "/usr/lib/clang/6.0.0/include/__wmmintrin_pclmul.h"
+  textual header "/usr/lib/clang/6.0.0/include/adxintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/altivec.h"
+  textual header "/usr/lib/clang/6.0.0/include/ammintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/arm64intr.h"
+  textual header "/usr/lib/clang/6.0.0/include/arm_acle.h"
+  textual header "/usr/lib/clang/6.0.0/include/arm_neon.h"
+  textual header "/usr/lib/clang/6.0.0/include/armintr.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx2intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512bitalgintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512bwintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512cdintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512dqintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512erintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512fintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512ifmaintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512ifmavlintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512pfintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vbmi2intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vbmiintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vbmivlintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlbitalgintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlbwintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlcdintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vldqintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlvbmi2intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vlvnniintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vnniintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vpopcntdqintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avx512vpopcntdqvlintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/avxintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/bmi2intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/bmiintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/cetintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/clflushoptintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/clwbintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/clzerointrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/cpuid.h"
+  textual header "/usr/lib/clang/6.0.0/include/cuda_wrappers/algorithm"
+  textual header "/usr/lib/clang/6.0.0/include/cuda_wrappers/complex"
+  textual header "/usr/lib/clang/6.0.0/include/cuda_wrappers/new"
+  textual header "/usr/lib/clang/6.0.0/include/emmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/f16cintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/float.h"
+  textual header "/usr/lib/clang/6.0.0/include/fma4intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/fmaintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/fxsrintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/gfniintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/htmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/htmxlintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/ia32intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/immintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/inttypes.h"
+  textual header "/usr/lib/clang/6.0.0/include/iso646.h"
+  textual header "/usr/lib/clang/6.0.0/include/limits.h"
+  textual header "/usr/lib/clang/6.0.0/include/lwpintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/lzcntintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/mm3dnow.h"
+  textual header "/usr/lib/clang/6.0.0/include/mm_malloc.h"
+  textual header "/usr/lib/clang/6.0.0/include/mmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/module.modulemap"
+  textual header "/usr/lib/clang/6.0.0/include/msa.h"
+  textual header "/usr/lib/clang/6.0.0/include/mwaitxintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/nmmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/opencl-c.h"
+  textual header "/usr/lib/clang/6.0.0/include/pkuintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/pmmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/popcntintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/prfchwintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/rdseedintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/rtmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/s390intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/allocator_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/asan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/common_interface_defs.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/coverage_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/dfsan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/esan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/hwasan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/linux_syscall_hooks.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/lsan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/msan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/scudo_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/tsan_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/sanitizer/tsan_interface_atomic.h"
+  textual header "/usr/lib/clang/6.0.0/include/shaintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/smmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdalign.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdarg.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdatomic.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdbool.h"
+  textual header "/usr/lib/clang/6.0.0/include/stddef.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdint.h"
+  textual header "/usr/lib/clang/6.0.0/include/stdnoreturn.h"
+  textual header "/usr/lib/clang/6.0.0/include/tbmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/tgmath.h"
+  textual header "/usr/lib/clang/6.0.0/include/tmmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/unwind.h"
+  textual header "/usr/lib/clang/6.0.0/include/vadefs.h"
+  textual header "/usr/lib/clang/6.0.0/include/vaesintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/varargs.h"
+  textual header "/usr/lib/clang/6.0.0/include/vecintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/vpclmulqdqintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/wmmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/x86intrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xmmintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xopintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xray/xray_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/xray/xray_log_interface.h"
+  textual header "/usr/lib/clang/6.0.0/include/xsavecintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xsaveintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xsaveoptintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xsavesintrin.h"
+  textual header "/usr/lib/clang/6.0.0/include/xtestintrin.h"
+}
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/tools/cpp/empty.cc b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/tools/cpp/empty.cc
new file mode 100644
index 0000000000..c272dabaeb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/cc/tools/cpp/empty.cc
@@ -0,0 +1 @@
+int main() {}
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/config/BUILD b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/config/BUILD
new file mode 100755
index 0000000000..a37358ce13
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/linux-bazel-4.2.1/config/BUILD
@@ -0,0 +1,49 @@
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file is auto-generated by github.com/bazelbuild/bazel-toolchains/pkg/rbeconfigsgen
+# and should not be modified directly.
+
+package(default_visibility = ["//visibility:public"])
+
+
+toolchain(
+    name = "cc-toolchain",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//tools/cpp:clang",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = "//cc:cc-compiler-k8",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+platform(
+    name = "platform",
+    parents = ["@local_config_platform//:host"],
+    constraint_values = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//tools/cpp:clang",
+    ],
+    exec_properties = {
+        "container-image": "docker://gcr.io/skia-public/rbe-container-skcms-linux@sha256:e0242db36ddc4a608482e006c8942c81de731f2cc934ebda01567803a3b84f56",
+        "OSFamily": "Linux",
+        "dockerAddCapabilities": "SYS_PTRACE",
+    },
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/LICENSE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/LICENSE
new file mode 100644
index 0000000000..f0a1f59a3c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021 Google LLC
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/WORKSPACE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/WORKSPACE
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/BUILD b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/BUILD
new file mode 100644
index 0000000000..709ec1cf78
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/BUILD
@@ -0,0 +1,550 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This becomes the BUILD file for @local_config_cc// under Windows.
+
+package(default_visibility = ["//visibility:public"])
+
+load("@rules_cc//cc:defs.bzl", "cc_toolchain", "cc_toolchain_suite", "cc_library")
+load(":windows_cc_toolchain_config.bzl", "cc_toolchain_config")
+load(":armeabi_cc_toolchain_config.bzl", "armeabi_cc_toolchain_config")
+cc_library(
+    name = "malloc",
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
+
+filegroup(
+    name = "mingw_compiler_files",
+    srcs = [":builtin_include_directory_paths_mingw"]
+)
+
+filegroup(
+    name = "clangcl_compiler_files",
+    srcs = [":builtin_include_directory_paths_clangcl"]
+)
+
+filegroup(
+    name = "msvc_compiler_files",
+    srcs = [":builtin_include_directory_paths_msvc"]
+)
+
+# Hardcoded toolchain, legacy behaviour.
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "armeabi-v7a|compiler": ":cc-compiler-armeabi-v7a",
+        "x64_windows|msvc-cl": ":cc-compiler-x64_windows",
+        "x64_x86_windows|msvc-cl": ":cc-compiler-x64_x86_windows",
+        "x64_arm_windows|msvc-cl": ":cc-compiler-x64_arm_windows",
+        "x64_arm64_windows|msvc-cl": ":cc-compiler-x64_arm64_windows",
+        "x64_windows|msys-gcc": ":cc-compiler-x64_windows_msys",
+        "x64_windows|mingw-gcc": ":cc-compiler-x64_windows_mingw",
+        "x64_windows|clang-cl": ":cc-compiler-x64_windows-clang-cl",
+        "x64_windows_msys": ":cc-compiler-x64_windows_msys",
+        "x64_windows": ":cc-compiler-x64_windows",
+        "x64_x86_windows": ":cc-compiler-x64_x86_windows",
+        "x64_arm_windows": ":cc-compiler-x64_arm_windows",
+        "x64_arm64_windows": ":cc-compiler-x64_arm64_windows",
+        "armeabi-v7a": ":cc-compiler-armeabi-v7a",
+    },
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_msys",
+    toolchain_identifier = "msys_x64",
+    toolchain_config = ":msys_x64",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":mingw_compiler_files",
+    compiler_files = ":mingw_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "msys_x64",
+    cpu = "x64_windows",
+    compiler = "msys-gcc",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msys",
+    abi_version = "local",
+    abi_libc_version = "local",
+    cxx_builtin_include_directories = [        "c:/msys64/usr/",
+        ],
+    tool_paths = {"ar": "c:/msys64/usr/bin/ar",
+        "compat-ld": "c:/msys64/usr/bin/compat-ld",
+        "cpp": "c:/msys64/usr/bin/cpp",
+        "dwp": "c:/msys64/usr/bin/dwp",
+        "gcc": "c:/msys64/usr/bin/gcc",
+        "gcov": "c:/msys64/usr/bin/gcov",
+        "ld": "c:/msys64/usr/bin/ld",
+        "nm": "c:/msys64/usr/bin/nm",
+        "objcopy": "c:/msys64/usr/bin/objcopy",
+        "objdump": "c:/msys64/usr/bin/objdump",
+        "strip": "c:/msys64/usr/bin/strip"},
+    tool_bin_path = "c:/msys64/usr/bin",
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_msys",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:msys",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_msys",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_mingw",
+    toolchain_identifier = "msys_x64_mingw",
+    toolchain_config = ":msys_x64_mingw",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":mingw_compiler_files",
+    compiler_files = ":mingw_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 0,
+)
+
+cc_toolchain_config(
+    name = "msys_x64_mingw",
+    cpu = "x64_windows",
+    compiler = "mingw-gcc",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "mingw",
+    abi_version = "local",
+    abi_libc_version = "local",
+    tool_bin_path = "c:/msys64/mingw64/bin",
+    cxx_builtin_include_directories = [        "c:/msys64/mingw64/",
+        ],
+    tool_paths = {"ar": "c:/msys64/mingw64/bin/ar",
+        "compat-ld": "c:/msys64/mingw64/bin/compat-ld",
+        "cpp": "c:/msys64/mingw64/bin/cpp",
+        "dwp": "c:/msys64/mingw64/bin/dwp",
+        "gcc": "c:/msys64/mingw64/bin/gcc",
+        "gcov": "c:/msys64/mingw64/bin/gcov",
+        "ld": "c:/msys64/mingw64/bin/ld",
+        "nm": "c:/msys64/mingw64/bin/nm",
+        "objcopy": "c:/msys64/mingw64/bin/objcopy",
+        "objdump": "c:/msys64/mingw64/bin/objdump",
+        "strip": "c:/msys64/mingw64/bin/strip"},
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_mingw",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:mingw",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_mingw",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows",
+    toolchain_identifier = "msvc_x64",
+    toolchain_config = ":msvc_x64",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":msvc_compiler_files",
+    compiler_files = ":msvc_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "msvc_x64",
+    cpu = "x64_windows",
+    compiler = "msvc-cl",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msvcrt",
+    abi_version = "local",
+    abi_libc_version = "local",
+    toolchain_identifier = "msvc_x64",
+    msvc_env_tmp = "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp",
+    msvc_env_path = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\bin\\HostX64\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\VC\\VCPackages;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\TestWindow;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\TeamFoundation\\Team Explorer;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\MSBuild\\Current\\bin\\Roslyn;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\devinit;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\10.0.19041.0\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\\\MSBuild\\Current\\Bin;C:\\Windows\\Microsoft.NET\\Framework64\\v4.0.30319;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\;;C:\\Windows\\system32;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\Ninja",
+    msvc_env_include = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\include;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt",
+    msvc_env_lib = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\lib\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x64",
+    msvc_cl_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe",
+    msvc_ml_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/ml64.exe",
+    msvc_link_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/link.exe",
+    msvc_lib_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/lib.exe",
+    cxx_builtin_include_directories = [        "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\include",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt"],
+    tool_paths = {
+        "ar": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/lib.exe",
+        "ml": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/ml64.exe",
+        "cpp": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe",
+        "gcc": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/link.exe",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    archiver_flags = ["/MACHINE:X64"],
+    default_link_flags = ["/MACHINE:X64"],
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_x86_windows",
+    toolchain_identifier = "msvc_x64_x86",
+    toolchain_config = ":msvc_x64_x86",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":msvc_compiler_files",
+    compiler_files = ":msvc_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "msvc_x64_x86",
+    cpu = "x64_windows",
+    compiler = "msvc-cl",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msvcrt",
+    abi_version = "local",
+    abi_libc_version = "local",
+    toolchain_identifier = "msvc_x64_x86",
+    msvc_env_tmp = "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp",
+    msvc_env_path = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\bin\\HostX64\\x86;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\bin\\HostX64\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\VC\\VCPackages;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\TestWindow;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\TeamFoundation\\Team Explorer;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\MSBuild\\Current\\bin\\Roslyn;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\devinit;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\10.0.19041.0\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\\\MSBuild\\Current\\Bin;C:\\Windows\\Microsoft.NET\\Framework64\\v4.0.30319;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\Tools\\;;C:\\Windows\\system32;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\Ninja",
+    msvc_env_include = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\include;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt",
+    msvc_env_lib = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\lib\\x86;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\ucrt\\x86;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.19041.0\\um\\x86",
+    msvc_cl_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/cl.exe",
+    msvc_ml_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/ml.exe",
+    msvc_link_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/link.exe",
+    msvc_lib_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/lib.exe",
+    cxx_builtin_include_directories = [        "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Tools\\MSVC\\14.29.30133\\include",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\ucrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\shared",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\um",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\winrt",
+        "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.19041.0\\cppwinrt"],
+    tool_paths = {
+        "ar": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/lib.exe",
+        "ml": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/ml.exe",
+        "cpp": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/cl.exe",
+        "gcc": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/cl.exe",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/HostX64/x86/link.exe",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    archiver_flags = ["/MACHINE:X86"],
+    default_link_flags = ["/MACHINE:X86"],
+    dbg_mode_debug_flag = "/DEBUG:FULL",
+    fastbuild_mode_debug_flag = "/DEBUG:FASTLINK",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_x86_windows",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_32",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_x86_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_arm_windows",
+    toolchain_identifier = "msvc_x64_arm",
+    toolchain_config = ":msvc_x64_arm",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":msvc_compiler_files",
+    compiler_files = ":msvc_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "msvc_x64_arm",
+    cpu = "x64_windows",
+    compiler = "msvc-cl",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msvcrt",
+    abi_version = "local",
+    abi_libc_version = "local",
+    toolchain_identifier = "msvc_x64_arm",
+    msvc_env_tmp = "msvc_not_found",
+    msvc_env_path = "msvc_not_found",
+    msvc_env_include = "msvc_not_found",
+    msvc_env_lib = "msvc_not_found",
+    msvc_cl_path = "vc_installation_error_arm.bat",
+    msvc_ml_path = "vc_installation_error_arm.bat",
+    msvc_link_path = "vc_installation_error_arm.bat",
+    msvc_lib_path = "vc_installation_error_arm.bat",
+    cxx_builtin_include_directories = [],
+    tool_paths = {
+        "ar": "vc_installation_error_arm.bat",
+        "ml": "vc_installation_error_arm.bat",
+        "cpp": "vc_installation_error_arm.bat",
+        "gcc": "vc_installation_error_arm.bat",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "vc_installation_error_arm.bat",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    archiver_flags = ["/MACHINE:ARM"],
+    default_link_flags = ["/MACHINE:ARM"],
+    dbg_mode_debug_flag = "/DEBUG",
+    fastbuild_mode_debug_flag = "/DEBUG",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_arm_windows",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:arm",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_arm_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_arm64_windows",
+    toolchain_identifier = "msvc_x64_arm64",
+    toolchain_config = ":msvc_x64_arm64",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":msvc_compiler_files",
+    compiler_files = ":msvc_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "msvc_x64_arm64",
+    cpu = "x64_windows",
+    compiler = "msvc-cl",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msvcrt",
+    abi_version = "local",
+    abi_libc_version = "local",
+    toolchain_identifier = "msvc_x64_arm64",
+    msvc_env_tmp = "msvc_not_found",
+    msvc_env_path = "msvc_not_found",
+    msvc_env_include = "msvc_not_found",
+    msvc_env_lib = "msvc_not_found",
+    msvc_cl_path = "vc_installation_error_arm64.bat",
+    msvc_ml_path = "vc_installation_error_arm64.bat",
+    msvc_link_path = "vc_installation_error_arm64.bat",
+    msvc_lib_path = "vc_installation_error_arm64.bat",
+    cxx_builtin_include_directories = [],
+    tool_paths = {
+        "ar": "vc_installation_error_arm64.bat",
+        "ml": "vc_installation_error_arm64.bat",
+        "cpp": "vc_installation_error_arm64.bat",
+        "gcc": "vc_installation_error_arm64.bat",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "vc_installation_error_arm64.bat",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    archiver_flags = ["/MACHINE:ARM64"],
+    default_link_flags = ["/MACHINE:ARM64"],
+    dbg_mode_debug_flag = "/DEBUG",
+    fastbuild_mode_debug_flag = "/DEBUG",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_arm64_windows",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:arm64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_arm64_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows-clang-cl",
+    toolchain_identifier = "clang_cl_x64",
+    toolchain_config = ":clang_cl_x64",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":clangcl_compiler_files",
+    compiler_files = ":clangcl_compiler_files",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+cc_toolchain_config(
+    name = "clang_cl_x64",
+    cpu = "x64_windows",
+    compiler = "clang-cl",
+    host_system_name = "local",
+    target_system_name = "local",
+    target_libc = "msvcrt",
+    abi_version = "local",
+    abi_libc_version = "local",
+    toolchain_identifier = "clang_cl_x64",
+    msvc_env_tmp = "clang_cl_not_found",
+    msvc_env_path = "clang_cl_not_found",
+    msvc_env_include = "clang_cl_not_found",
+    msvc_env_lib = "clang_cl_not_found",
+    msvc_cl_path = "clang_installation_error.bat",
+    msvc_ml_path = "clang_installation_error.bat",
+    msvc_link_path = "clang_installation_error.bat",
+    msvc_lib_path = "clang_installation_error.bat",
+    cxx_builtin_include_directories = [],
+    tool_paths = {
+        "ar": "clang_installation_error.bat",
+        "ml": "clang_installation_error.bat",
+        "cpp": "clang_installation_error.bat",
+        "gcc": "clang_installation_error.bat",
+        "gcov": "wrapper/bin/msvc_nop.bat",
+        "ld": "clang_installation_error.bat",
+        "nm": "wrapper/bin/msvc_nop.bat",
+        "objcopy": "wrapper/bin/msvc_nop.bat",
+        "objdump": "wrapper/bin/msvc_nop.bat",
+        "strip": "wrapper/bin/msvc_nop.bat",
+    },
+    archiver_flags = ["/MACHINE:X64"],
+    default_link_flags = ["/MACHINE:X64", "/DEFAULTLIB:clang_rt.builtins-x86_64.lib"],
+    dbg_mode_debug_flag = "/DEBUG",
+    fastbuild_mode_debug_flag = "/DEBUG",
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows-clang-cl",
+    exec_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+        "@bazel_tools//tools/cpp:clang-cl",
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows-clang-cl",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-armeabi-v7a",
+    toolchain_identifier = "stub_armeabi-v7a",
+    toolchain_config = ":stub_armeabi-v7a",
+    all_files = ":empty",
+    ar_files = ":empty",
+    as_files = ":empty",
+    compiler_files = ":empty",
+    dwp_files = ":empty",
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+armeabi_cc_toolchain_config(name = "stub_armeabi-v7a")
+
+toolchain(
+    name = "cc-toolchain-armeabi-v7a",
+    exec_compatible_with = [
+    ],
+    target_compatible_with = [
+        "@platforms//cpu:arm",
+        "@platforms//os:android",
+    ],
+    toolchain = ":cc-compiler-armeabi-v7a",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/WORKSPACE b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/WORKSPACE
new file mode 100644
index 0000000000..bc05b4c36f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/WORKSPACE
@@ -0,0 +1,2 @@
+# DO NOT EDIT: automatically generated WORKSPACE file for cc_autoconf rule
+workspace(name = "local_config_cc")
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl
new file mode 100644
index 0000000000..94e0720bf6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/armeabi_cc_toolchain_config.bzl
@@ -0,0 +1,82 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "feature",
+    "tool_path",
+)
+
+def _impl(ctx):
+    toolchain_identifier = "stub_armeabi-v7a"
+    host_system_name = "armeabi-v7a"
+    target_system_name = "armeabi-v7a"
+    target_cpu = "armeabi-v7a"
+    target_libc = "armeabi-v7a"
+    compiler = "compiler"
+    abi_version = "armeabi-v7a"
+    abi_libc_version = "armeabi-v7a"
+    cc_target_os = None
+    builtin_sysroot = None
+    action_configs = []
+
+    supports_pic_feature = feature(name = "supports_pic", enabled = True)
+    supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+    features = [supports_dynamic_linker_feature, supports_pic_feature]
+
+    cxx_builtin_include_directories = []
+    artifact_name_patterns = []
+    make_variables = []
+
+    tool_paths = [
+        tool_path(name = "ar", path = "/bin/false"),
+        tool_path(name = "compat-ld", path = "/bin/false"),
+        tool_path(name = "cpp", path = "/bin/false"),
+        tool_path(name = "dwp", path = "/bin/false"),
+        tool_path(name = "gcc", path = "/bin/false"),
+        tool_path(name = "gcov", path = "/bin/false"),
+        tool_path(name = "ld", path = "/bin/false"),
+        tool_path(name = "nm", path = "/bin/false"),
+        tool_path(name = "objcopy", path = "/bin/false"),
+        tool_path(name = "objdump", path = "/bin/false"),
+        tool_path(name = "strip", path = "/bin/false"),
+    ]
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        artifact_name_patterns = artifact_name_patterns,
+        cxx_builtin_include_directories = cxx_builtin_include_directories,
+        toolchain_identifier = toolchain_identifier,
+        host_system_name = host_system_name,
+        target_system_name = target_system_name,
+        target_cpu = target_cpu,
+        target_libc = target_libc,
+        compiler = compiler,
+        abi_version = abi_version,
+        abi_libc_version = abi_libc_version,
+        tool_paths = tool_paths,
+        make_variables = make_variables,
+        builtin_sysroot = builtin_sysroot,
+        cc_target_os = cc_target_os,
+    )
+
+armeabi_cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {},
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_clangcl b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_clangcl
new file mode 100644
index 0000000000..f440b6083d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_clangcl
@@ -0,0 +1,7 @@
+This file is generated by cc_configure and contains builtin include directories
+that clang-cl reported. This file is a dependency of every compilation action and
+changes to it will be reflected in the action cache key. When some of these
+paths change, Bazel will make sure to rerun the action, even though none of
+declared action inputs or the action commandline changes.
+
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_mingw b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_mingw
new file mode 100644
index 0000000000..d2cc97c569
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_mingw
@@ -0,0 +1,8 @@
+This file is generated by cc_configure and contains builtin include directories
+that mingw reported. This file is a dependency of every compilation action and
+changes to it will be reflected in the action cache key. When some of these
+paths change, Bazel will make sure to rerun the action, even though none of
+declared action inputs or the action commandline changes.
+
+        "c:/msys64/mingw64/",
+        
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_msvc b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_msvc
new file mode 100644
index 0000000000..1380bc62e1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/builtin_include_directory_paths_msvc
@@ -0,0 +1,7 @@
+This file is generated by cc_configure and contains builtin include directories
+that msvc reported. This file is a dependency of every compilation action and
+changes to it will be reflected in the action cache key. When some of these
+paths change, Bazel will make sure to rerun the action, even though none of
+declared action inputs or the action commandline changes.
+
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/clang_installation_error.bat b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/clang_installation_error.bat
new file mode 100644
index 0000000000..8aa170ae6a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/clang_installation_error.bat
@@ -0,0 +1,24 @@
+:: Copyright 2019 The Bazel Authors. All rights reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+
+@echo OFF
+
+echo. 1>&2
+echo The target you are compiling requires the Clang compiler. 1>&2
+echo Bazel couldn't find a valid Clang installation on your machine. 1>&2
+
+echo Please check your installation following https://docs.bazel.build/versions/master/windows.html#using 1>&2
+echo. 1>&2
+
+exit /b 1
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/get_env.bat b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/get_env.bat
new file mode 100644
index 0000000000..cc498d38bb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/get_env.bat
@@ -0,0 +1,3 @@
+@echo off
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\VCVARSALL.BAT" amd64  -vcvars_ver=14.29.30133 > NUL 
+echo PATH=%PATH%,INCLUDE=%INCLUDE%,LIB=%LIB%,WINDOWSSDKDIR=%WINDOWSSDKDIR% 
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/msys_gcc_installation_error.bat b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/msys_gcc_installation_error.bat
new file mode 100644
index 0000000000..25c35534f9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/msys_gcc_installation_error.bat
@@ -0,0 +1,23 @@
+:: Copyright 2018 The Bazel Authors. All rights reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+
+@echo OFF
+
+echo. 1>&2
+echo The target you are compiling requires MSYS gcc / MINGW gcc. 1>&2
+echo Bazel couldn't find gcc installation on your machine. 1>&2
+echo Please install MSYS gcc / MINGW gcc and set BAZEL_SH environment variable 1>&2
+echo. 1>&2
+
+exit /b 1
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm.bat b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm.bat
new file mode 100644
index 0000000000..952ce7262d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm.bat
@@ -0,0 +1,29 @@
+:: Copyright 2017 The Bazel Authors. All rights reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+
+@echo OFF
+
+echo. 1>&2
+echo The target you are compiling requires Visual C++ build tools. 1>&2
+echo Bazel couldn't find a valid Visual C++ build tools installation on your machine. 1>&2
+echo. 1>&2
+echo Visual C++ build tools seems to be installed at C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC 1>&2
+echo But Bazel can't find the following tools: 1>&2
+echo     cl.exe, link.exe, lib.exe 1>&2
+echo for arm target architecture 1>&2
+echo. 1>&2
+echo Please check your installation following https://docs.bazel.build/versions/master/windows.html#using 1>&2
+echo. 1>&2
+
+exit /b 1
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm64.bat b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm64.bat
new file mode 100644
index 0000000000..7dd9cb6373
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/vc_installation_error_arm64.bat
@@ -0,0 +1,29 @@
+:: Copyright 2017 The Bazel Authors. All rights reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+
+@echo OFF
+
+echo. 1>&2
+echo The target you are compiling requires Visual C++ build tools. 1>&2
+echo Bazel couldn't find a valid Visual C++ build tools installation on your machine. 1>&2
+echo. 1>&2
+echo Visual C++ build tools seems to be installed at C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC 1>&2
+echo But Bazel can't find the following tools: 1>&2
+echo     cl.exe, link.exe, lib.exe 1>&2
+echo for arm64 target architecture 1>&2
+echo. 1>&2
+echo Please check your installation following https://docs.bazel.build/versions/master/windows.html#using 1>&2
+echo. 1>&2
+
+exit /b 1
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/windows_cc_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/windows_cc_toolchain_config.bzl
new file mode 100644
index 0000000000..0c4cf58ad5
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/cc/windows_cc_toolchain_config.bzl
@@ -0,0 +1,1339 @@
+# Copyright 2019 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A Starlark cc_toolchain configuration rule for Windows"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "action_config",
+    "artifact_name_pattern",
+    "env_entry",
+    "env_set",
+    "feature",
+    "feature_set",
+    "flag_group",
+    "flag_set",
+    "tool",
+    "tool_path",
+    "variable_with_value",
+    "with_feature_set",
+)
+load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")
+
+all_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+    ACTION_NAMES.lto_backend,
+]
+
+all_cpp_compile_actions = [
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+]
+
+preprocessor_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.clif_match,
+]
+
+codegen_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.lto_backend,
+]
+
+all_link_actions = [
+    ACTION_NAMES.cpp_link_executable,
+    ACTION_NAMES.cpp_link_dynamic_library,
+    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+]
+
+def _use_msvc_toolchain(ctx):
+    return ctx.attr.cpu == "x64_windows" and (ctx.attr.compiler == "msvc-cl" or ctx.attr.compiler == "clang-cl")
+
+def _impl(ctx):
+    if _use_msvc_toolchain(ctx):
+        artifact_name_patterns = [
+            artifact_name_pattern(
+                category_name = "object_file",
+                prefix = "",
+                extension = ".obj",
+            ),
+            artifact_name_pattern(
+                category_name = "static_library",
+                prefix = "",
+                extension = ".lib",
+            ),
+            artifact_name_pattern(
+                category_name = "alwayslink_static_library",
+                prefix = "",
+                extension = ".lo.lib",
+            ),
+            artifact_name_pattern(
+                category_name = "executable",
+                prefix = "",
+                extension = ".exe",
+            ),
+            artifact_name_pattern(
+                category_name = "dynamic_library",
+                prefix = "",
+                extension = ".dll",
+            ),
+            artifact_name_pattern(
+                category_name = "interface_library",
+                prefix = "",
+                extension = ".if.lib",
+            ),
+        ]
+    else:
+        artifact_name_patterns = [
+            artifact_name_pattern(
+                category_name = "executable",
+                prefix = "",
+                extension = ".exe",
+            ),
+        ]
+
+    if _use_msvc_toolchain(ctx):
+        cpp_link_nodeps_dynamic_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+            implies = [
+                "nologo",
+                "shared_flag",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+                "has_configured_linker_path",
+                "def_file",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        cpp_link_static_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_static_library,
+            implies = [
+                "nologo",
+                "archiver_flags",
+                "input_param_flags",
+                "linker_param_file",
+                "msvc_env",
+            ],
+            tools = [tool(path = ctx.attr.msvc_lib_path)],
+        )
+
+        assemble_action = action_config(
+            action_name = ACTION_NAMES.assemble,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "nologo",
+                "msvc_env",
+                "sysroot",
+            ],
+            tools = [tool(path = ctx.attr.msvc_ml_path)],
+        )
+
+        preprocess_assemble_action = action_config(
+            action_name = ACTION_NAMES.preprocess_assemble,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "nologo",
+                "msvc_env",
+                "sysroot",
+            ],
+            tools = [tool(path = ctx.attr.msvc_ml_path)],
+        )
+
+        c_compile_action = action_config(
+            action_name = ACTION_NAMES.c_compile,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "default_compile_flags",
+                "nologo",
+                "msvc_env",
+                "parse_showincludes",
+                "user_compile_flags",
+                "sysroot",
+                "unfiltered_compile_flags",
+            ],
+            tools = [tool(path = ctx.attr.msvc_cl_path)],
+        )
+
+        cpp_compile_action = action_config(
+            action_name = ACTION_NAMES.cpp_compile,
+            implies = [
+                "compiler_input_flags",
+                "compiler_output_flags",
+                "default_compile_flags",
+                "nologo",
+                "msvc_env",
+                "parse_showincludes",
+                "user_compile_flags",
+                "sysroot",
+                "unfiltered_compile_flags",
+            ],
+            tools = [tool(path = ctx.attr.msvc_cl_path)],
+        )
+
+        cpp_link_executable_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_executable,
+            implies = [
+                "nologo",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        cpp_link_dynamic_library_action = action_config(
+            action_name = ACTION_NAMES.cpp_link_dynamic_library,
+            implies = [
+                "nologo",
+                "shared_flag",
+                "linkstamps",
+                "output_execpath_flags",
+                "input_param_flags",
+                "user_link_flags",
+                "default_link_flags",
+                "linker_subsystem_flag",
+                "linker_param_file",
+                "msvc_env",
+                "no_stripping",
+                "has_configured_linker_path",
+                "def_file",
+            ],
+            tools = [tool(path = ctx.attr.msvc_link_path)],
+        )
+
+        action_configs = [
+            assemble_action,
+            preprocess_assemble_action,
+            c_compile_action,
+            cpp_compile_action,
+            cpp_link_executable_action,
+            cpp_link_dynamic_library_action,
+            cpp_link_nodeps_dynamic_library_action,
+            cpp_link_static_library_action,
+        ]
+    else:
+        action_configs = []
+
+    if _use_msvc_toolchain(ctx):
+        msvc_link_env_feature = feature(
+            name = "msvc_link_env",
+            env_sets = [
+                env_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    env_entries = [env_entry(key = "LIB", value = ctx.attr.msvc_env_lib)],
+                ),
+            ],
+        )
+
+        shared_flag_feature = feature(
+            name = "shared_flag",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [flag_group(flags = ["/DLL"])],
+                ),
+            ],
+        )
+
+        determinism_feature = feature(
+            name = "determinism",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [
+                        flag_group(
+                            flags = [
+                                "/wd4117",
+                                "-D__DATE__=\"redacted\"",
+                                "-D__TIMESTAMP__=\"redacted\"",
+                                "-D__TIME__=\"redacted\"",
+                            ] + (["-Wno-builtin-macro-redefined"] if ctx.attr.compiler == "clang-cl" else []),
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        sysroot_feature = feature(
+            name = "sysroot",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["--sysroot=%{sysroot}"],
+                            iterate_over = "sysroot",
+                            expand_if_available = "sysroot",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        unfiltered_compile_flags_feature = feature(
+            name = "unfiltered_compile_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{unfiltered_compile_flags}"],
+                            iterate_over = "unfiltered_compile_flags",
+                            expand_if_available = "unfiltered_compile_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        compiler_param_file_feature = feature(
+            name = "compiler_param_file",
+        )
+
+        copy_dynamic_libraries_to_binary_feature = feature(
+            name = "copy_dynamic_libraries_to_binary",
+        )
+
+        input_param_flags_feature = feature(
+            name = "input_param_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/IMPLIB:%{interface_library_output_path}"],
+                            expand_if_available = "interface_library_output_path",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{libopts}"],
+                            iterate_over = "libopts",
+                            expand_if_available = "libopts",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            iterate_over = "libraries_to_link",
+                            flag_groups = [
+                                flag_group(
+                                    iterate_over = "libraries_to_link.object_files",
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.object_files}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "object_file_group",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "object_file",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "interface_library",
+                                    ),
+                                ),
+                                flag_group(
+                                    flag_groups = [
+                                        flag_group(
+                                            flags = ["%{libraries_to_link.name}"],
+                                            expand_if_false = "libraries_to_link.is_whole_archive",
+                                        ),
+                                        flag_group(
+                                            flags = ["/WHOLEARCHIVE:%{libraries_to_link.name}"],
+                                            expand_if_true = "libraries_to_link.is_whole_archive",
+                                        ),
+                                    ],
+                                    expand_if_equal = variable_with_value(
+                                        name = "libraries_to_link.type",
+                                        value = "static_library",
+                                    ),
+                                ),
+                            ],
+                            expand_if_available = "libraries_to_link",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        fastbuild_feature = feature(
+            name = "fastbuild",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Od", "/Z7"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = [ctx.attr.fastbuild_mode_debug_flag, "/INCREMENTAL:NO"],
+                        ),
+                    ],
+                ),
+            ],
+            implies = ["generate_pdb_file"],
+        )
+
+        user_compile_flags_feature = feature(
+            name = "user_compile_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{user_compile_flags}"],
+                            iterate_over = "user_compile_flags",
+                            expand_if_available = "user_compile_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        archiver_flags_feature = feature(
+            name = "archiver_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/OUT:%{output_execpath}"],
+                            expand_if_available = "output_execpath",
+                        ),
+                        flag_group(
+                            flags = ctx.attr.archiver_flags,
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        default_link_flags_feature = feature(
+            name = "default_link_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ctx.attr.default_link_flags)],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_feature = feature(name = "static_link_msvcrt")
+
+        dynamic_link_msvcrt_debug_feature = feature(
+            name = "dynamic_link_msvcrt_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MDd"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrtd.lib"])],
+                ),
+            ],
+            requires = [feature_set(features = ["dbg"])],
+        )
+
+        dbg_feature = feature(
+            name = "dbg",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Od", "/Z7"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = [ctx.attr.dbg_mode_debug_flag, "/INCREMENTAL:NO"],
+                        ),
+                    ],
+                ),
+            ],
+            implies = ["generate_pdb_file"],
+        )
+
+        opt_feature = feature(
+            name = "opt",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/O2"])],
+                ),
+            ],
+            implies = ["frame_pointer"],
+        )
+
+        supports_interface_shared_libraries_feature = feature(
+            name = "supports_interface_shared_libraries",
+            enabled = True,
+        )
+
+        user_link_flags_feature = feature(
+            name = "user_link_flags",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{user_link_flags}"],
+                            iterate_over = "user_link_flags",
+                            expand_if_available = "user_link_flags",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        default_compile_flags_feature = feature(
+            name = "default_compile_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.linkstamp_compile,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.lto_backend,
+                        ACTION_NAMES.clif_match,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = [
+                                "/DCOMPILER_MSVC",
+                                "/DNOMINMAX",
+                                "/D_WIN32_WINNT=0x0601",
+                                "/D_CRT_SECURE_NO_DEPRECATE",
+                                "/D_CRT_SECURE_NO_WARNINGS",
+                                "/bigobj",
+                                "/Zm500",
+                                "/EHsc",
+                                "/wd4351",
+                                "/wd4291",
+                                "/wd4250",
+                                "/wd4996",
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        msvc_compile_env_feature = feature(
+            name = "msvc_compile_env",
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                    ],
+                    env_entries = [env_entry(key = "INCLUDE", value = ctx.attr.msvc_env_include)],
+                ),
+            ],
+        )
+
+        preprocessor_defines_feature = feature(
+            name = "preprocessor_defines",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/D%{preprocessor_defines}"],
+                            iterate_over = "preprocessor_defines",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        generate_pdb_file_feature = feature(
+            name = "generate_pdb_file",
+        )
+
+        output_execpath_flags_feature = feature(
+            name = "output_execpath_flags",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/OUT:%{output_execpath}"],
+                            expand_if_available = "output_execpath",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        dynamic_link_msvcrt_no_debug_feature = feature(
+            name = "dynamic_link_msvcrt_no_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MD"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrt.lib"])],
+                ),
+            ],
+            requires = [
+                feature_set(features = ["fastbuild"]),
+                feature_set(features = ["opt"]),
+            ],
+        )
+
+        disable_assertions_feature = feature(
+            name = "disable_assertions",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/DNDEBUG"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+            ],
+        )
+
+        has_configured_linker_path_feature = feature(name = "has_configured_linker_path")
+
+        supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+
+        no_stripping_feature = feature(name = "no_stripping")
+
+        linker_param_file_feature = feature(
+            name = "linker_param_file",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions +
+                              [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["@%{linker_param_file}"],
+                            expand_if_available = "linker_param_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        ignore_noisy_warnings_feature = feature(
+            name = "ignore_noisy_warnings",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.cpp_link_static_library],
+                    flag_groups = [flag_group(flags = ["/ignore:4221"])],
+                ),
+            ],
+        )
+
+        no_legacy_features_feature = feature(name = "no_legacy_features")
+
+        parse_showincludes_feature = feature(
+            name = "parse_showincludes",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                    ],
+                    flag_groups = [flag_group(flags = ["/showIncludes"])],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_no_debug_feature = feature(
+            name = "static_link_msvcrt_no_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MT"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmt.lib"])],
+                ),
+            ],
+            requires = [
+                feature_set(features = ["fastbuild"]),
+                feature_set(features = ["opt"]),
+            ],
+        )
+
+        treat_warnings_as_errors_feature = feature(
+            name = "treat_warnings_as_errors",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/WX"])],
+                ),
+            ],
+        )
+
+        windows_export_all_symbols_feature = feature(name = "windows_export_all_symbols")
+
+        no_windows_export_all_symbols_feature = feature(name = "no_windows_export_all_symbols")
+
+        include_paths_feature = feature(
+            name = "include_paths",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/I%{quote_include_paths}"],
+                            iterate_over = "quote_include_paths",
+                        ),
+                        flag_group(
+                            flags = ["/I%{include_paths}"],
+                            iterate_over = "include_paths",
+                        ),
+                        flag_group(
+                            flags = ["/I%{system_include_paths}"],
+                            iterate_over = "system_include_paths",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        linkstamps_feature = feature(
+            name = "linkstamps",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["%{linkstamp_paths}"],
+                            iterate_over = "linkstamp_paths",
+                            expand_if_available = "linkstamp_paths",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        targets_windows_feature = feature(
+            name = "targets_windows",
+            enabled = True,
+            implies = ["copy_dynamic_libraries_to_binary"],
+        )
+
+        linker_subsystem_flag_feature = feature(
+            name = "linker_subsystem_flag",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/SUBSYSTEM:CONSOLE"])],
+                ),
+            ],
+        )
+
+        static_link_msvcrt_debug_feature = feature(
+            name = "static_link_msvcrt_debug",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/MTd"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmtd.lib"])],
+                ),
+            ],
+            requires = [feature_set(features = ["dbg"])],
+        )
+
+        frame_pointer_feature = feature(
+            name = "frame_pointer",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Oy-"])],
+                ),
+            ],
+        )
+
+        compiler_output_flags_feature = feature(
+            name = "compiler_output_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.assemble],
+                    flag_groups = [
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fo%{output_file}", "/Zi"],
+                                    expand_if_available = "output_file",
+                                    expand_if_not_available = "output_assembly_file",
+                                ),
+                            ],
+                            expand_if_not_available = "output_preprocess_file",
+                        ),
+                    ],
+                ),
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fo%{output_file}"],
+                                    expand_if_not_available = "output_preprocess_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                            expand_if_not_available = "output_assembly_file",
+                        ),
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/Fa%{output_file}"],
+                                    expand_if_available = "output_assembly_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                        ),
+                        flag_group(
+                            flag_groups = [
+                                flag_group(
+                                    flags = ["/P", "/Fi%{output_file}"],
+                                    expand_if_available = "output_preprocess_file",
+                                ),
+                            ],
+                            expand_if_available = "output_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        nologo_feature = feature(
+            name = "nologo",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    flag_groups = [flag_group(flags = ["/nologo"])],
+                ),
+            ],
+        )
+
+        smaller_binary_feature = feature(
+            name = "smaller_binary",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                    flag_groups = [flag_group(flags = ["/Gy", "/Gw"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["/OPT:ICF", "/OPT:REF"])],
+                    with_features = [with_feature_set(features = ["opt"])],
+                ),
+            ],
+        )
+
+        compiler_input_flags_feature = feature(
+            name = "compiler_input_flags",
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                    ],
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/c", "%{source_file}"],
+                            expand_if_available = "source_file",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        def_file_feature = feature(
+            name = "def_file",
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [
+                        flag_group(
+                            flags = ["/DEF:%{def_file_path}", "/ignore:4070"],
+                            expand_if_available = "def_file_path",
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        msvc_env_feature = feature(
+            name = "msvc_env",
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    env_entries = [
+                        env_entry(key = "PATH", value = ctx.attr.msvc_env_path),
+                        env_entry(key = "TMP", value = ctx.attr.msvc_env_tmp),
+                        env_entry(key = "TEMP", value = ctx.attr.msvc_env_tmp),
+                    ],
+                ),
+            ],
+            implies = ["msvc_compile_env", "msvc_link_env"],
+        )
+        features = [
+            no_legacy_features_feature,
+            nologo_feature,
+            has_configured_linker_path_feature,
+            no_stripping_feature,
+            targets_windows_feature,
+            copy_dynamic_libraries_to_binary_feature,
+            default_compile_flags_feature,
+            msvc_env_feature,
+            msvc_compile_env_feature,
+            msvc_link_env_feature,
+            include_paths_feature,
+            preprocessor_defines_feature,
+            parse_showincludes_feature,
+            generate_pdb_file_feature,
+            shared_flag_feature,
+            linkstamps_feature,
+            output_execpath_flags_feature,
+            archiver_flags_feature,
+            input_param_flags_feature,
+            linker_subsystem_flag_feature,
+            user_link_flags_feature,
+            default_link_flags_feature,
+            linker_param_file_feature,
+            static_link_msvcrt_feature,
+            static_link_msvcrt_no_debug_feature,
+            dynamic_link_msvcrt_no_debug_feature,
+            static_link_msvcrt_debug_feature,
+            dynamic_link_msvcrt_debug_feature,
+            dbg_feature,
+            fastbuild_feature,
+            opt_feature,
+            frame_pointer_feature,
+            disable_assertions_feature,
+            determinism_feature,
+            treat_warnings_as_errors_feature,
+            smaller_binary_feature,
+            ignore_noisy_warnings_feature,
+            user_compile_flags_feature,
+            sysroot_feature,
+            unfiltered_compile_flags_feature,
+            compiler_param_file_feature,
+            compiler_output_flags_feature,
+            compiler_input_flags_feature,
+            def_file_feature,
+            windows_export_all_symbols_feature,
+            no_windows_export_all_symbols_feature,
+            supports_dynamic_linker_feature,
+            supports_interface_shared_libraries_feature,
+        ]
+    else:
+        targets_windows_feature = feature(
+            name = "targets_windows",
+            implies = ["copy_dynamic_libraries_to_binary"],
+            enabled = True,
+        )
+
+        copy_dynamic_libraries_to_binary_feature = feature(name = "copy_dynamic_libraries_to_binary")
+
+        gcc_env_feature = feature(
+            name = "gcc_env",
+            enabled = True,
+            env_sets = [
+                env_set(
+                    actions = [
+                        ACTION_NAMES.c_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.assemble,
+                        ACTION_NAMES.preprocess_assemble,
+                        ACTION_NAMES.cpp_link_executable,
+                        ACTION_NAMES.cpp_link_dynamic_library,
+                        ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ACTION_NAMES.cpp_link_static_library,
+                    ],
+                    env_entries = [
+                        env_entry(key = "PATH", value = ctx.attr.tool_bin_path),
+                    ],
+                ),
+            ],
+        )
+
+        default_compile_flags_feature = feature(
+            name = "default_compile_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = [
+                        ACTION_NAMES.linkstamp_compile,
+                        ACTION_NAMES.cpp_compile,
+                        ACTION_NAMES.cpp_header_parsing,
+                        ACTION_NAMES.cpp_module_compile,
+                        ACTION_NAMES.cpp_module_codegen,
+                        ACTION_NAMES.lto_backend,
+                        ACTION_NAMES.clif_match,
+                    ],
+                    flag_groups = [flag_group(flags = ["-std=gnu++0x"])],
+                ),
+            ],
+        )
+
+        default_link_flags_feature = feature(
+            name = "default_link_flags",
+            enabled = True,
+            flag_sets = [
+                flag_set(
+                    actions = all_link_actions,
+                    flag_groups = [flag_group(flags = ["-lstdc++"])],
+                ),
+            ],
+        )
+
+        supports_dynamic_linker_feature = feature(
+            name = "supports_dynamic_linker",
+            enabled = True,
+        )
+
+        if ctx.attr.cpu == "x64_windows" and ctx.attr.compiler == "mingw-gcc":
+            compiler_param_file_feature = feature(
+                name = "compiler_param_file",
+            )
+
+            features = [
+                targets_windows_feature,
+                copy_dynamic_libraries_to_binary_feature,
+                gcc_env_feature,
+                default_compile_flags_feature,
+                compiler_param_file_feature,
+                default_link_flags_feature,
+                supports_dynamic_linker_feature,
+            ]
+        else:
+            supports_pic_feature = feature(
+                name = "supports_pic",
+                enabled = True,
+            )
+            supports_start_end_lib_feature = feature(
+                name = "supports_start_end_lib",
+                enabled = True,
+            )
+
+            dbg_feature = feature(name = "dbg")
+
+            opt_feature = feature(name = "opt")
+
+            sysroot_feature = feature(
+                name = "sysroot",
+                enabled = True,
+                flag_sets = [
+                    flag_set(
+                        actions = [
+                            ACTION_NAMES.preprocess_assemble,
+                            ACTION_NAMES.linkstamp_compile,
+                            ACTION_NAMES.c_compile,
+                            ACTION_NAMES.cpp_compile,
+                            ACTION_NAMES.cpp_header_parsing,
+                            ACTION_NAMES.cpp_module_compile,
+                            ACTION_NAMES.cpp_module_codegen,
+                            ACTION_NAMES.lto_backend,
+                            ACTION_NAMES.clif_match,
+                            ACTION_NAMES.cpp_link_executable,
+                            ACTION_NAMES.cpp_link_dynamic_library,
+                            ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                        ],
+                        flag_groups = [
+                            flag_group(
+                                flags = ["--sysroot=%{sysroot}"],
+                                expand_if_available = "sysroot",
+                            ),
+                        ],
+                    ),
+                ],
+            )
+
+            fdo_optimize_feature = feature(
+                name = "fdo_optimize",
+                flag_sets = [
+                    flag_set(
+                        actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile],
+                        flag_groups = [
+                            flag_group(
+                                flags = [
+                                    "-fprofile-use=%{fdo_profile_path}",
+                                    "-fprofile-correction",
+                                ],
+                                expand_if_available = "fdo_profile_path",
+                            ),
+                        ],
+                    ),
+                ],
+                provides = ["profile"],
+            )
+
+            user_compile_flags_feature = feature(
+                name = "user_compile_flags",
+                enabled = True,
+                flag_sets = [
+                    flag_set(
+                        actions = [
+                            ACTION_NAMES.assemble,
+                            ACTION_NAMES.preprocess_assemble,
+                            ACTION_NAMES.linkstamp_compile,
+                            ACTION_NAMES.c_compile,
+                            ACTION_NAMES.cpp_compile,
+                            ACTION_NAMES.cpp_header_parsing,
+                            ACTION_NAMES.cpp_module_compile,
+                            ACTION_NAMES.cpp_module_codegen,
+                            ACTION_NAMES.lto_backend,
+                            ACTION_NAMES.clif_match,
+                        ],
+                        flag_groups = [
+                            flag_group(
+                                flags = ["%{user_compile_flags}"],
+                                iterate_over = "user_compile_flags",
+                                expand_if_available = "user_compile_flags",
+                            ),
+                        ],
+                    ),
+                ],
+            )
+
+            features = [
+                targets_windows_feature,
+                copy_dynamic_libraries_to_binary_feature,
+                gcc_env_feature,
+                supports_pic_feature,
+                default_compile_flags_feature,
+                default_link_flags_feature,
+                fdo_optimize_feature,
+                supports_dynamic_linker_feature,
+                dbg_feature,
+                opt_feature,
+                user_compile_flags_feature,
+                sysroot_feature,
+            ]
+
+    tool_paths = [
+        tool_path(name = name, path = path)
+        for name, path in ctx.attr.tool_paths.items()
+    ]
+
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        artifact_name_patterns = artifact_name_patterns,
+        cxx_builtin_include_directories = ctx.attr.cxx_builtin_include_directories,
+        toolchain_identifier = ctx.attr.toolchain_identifier,
+        host_system_name = ctx.attr.host_system_name,
+        target_system_name = ctx.attr.target_system_name,
+        target_cpu = ctx.attr.cpu,
+        target_libc = ctx.attr.target_libc,
+        compiler = ctx.attr.compiler,
+        abi_version = ctx.attr.abi_version,
+        abi_libc_version = ctx.attr.abi_libc_version,
+        tool_paths = tool_paths,
+    )
+
+cc_toolchain_config = rule(
+    implementation = _impl,
+    attrs = {
+        "cpu": attr.string(mandatory = True),
+        "compiler": attr.string(),
+        "toolchain_identifier": attr.string(),
+        "host_system_name": attr.string(),
+        "target_system_name": attr.string(),
+        "target_libc": attr.string(),
+        "abi_version": attr.string(),
+        "abi_libc_version": attr.string(),
+        "tool_paths": attr.string_dict(),
+        "cxx_builtin_include_directories": attr.string_list(),
+        "archiver_flags": attr.string_list(default = []),
+        "default_link_flags": attr.string_list(default = []),
+        "msvc_env_tmp": attr.string(default = "msvc_not_found"),
+        "msvc_env_path": attr.string(default = "msvc_not_found"),
+        "msvc_env_include": attr.string(default = "msvc_not_found"),
+        "msvc_env_lib": attr.string(default = "msvc_not_found"),
+        "msvc_cl_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_ml_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_link_path": attr.string(default = "vc_installation_error.bat"),
+        "msvc_lib_path": attr.string(default = "vc_installation_error.bat"),
+        "dbg_mode_debug_flag": attr.string(),
+        "fastbuild_mode_debug_flag": attr.string(),
+        "tool_bin_path": attr.string(default = "not_found"),
+    },
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/config/BUILD b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/config/BUILD
new file mode 100644
index 0000000000..683fc6594f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bazel/rbe/windows-bazel-4.2.1/config/BUILD
@@ -0,0 +1,46 @@
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file is auto-generated by github.com/bazelbuild/bazel-toolchains/pkg/rbeconfigsgen
+# and should not be modified directly.
+
+package(default_visibility = ["//visibility:public"])
+
+
+toolchain(
+    name = "cc-toolchain",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:windows",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:windows",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = "//cc:cc-compiler-x64_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+platform(
+    name = "platform",
+    parents = ["@local_config_platform//:host"],
+    constraint_values = [
+        "@bazel_tools//platforms:windows",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    exec_properties = {
+        "container-image": "docker://gcr.io/skia-public/rbe-container-skia-windows@sha256:ec3825d4b95d590c4c8cbdf0cc6f0ecb85547b261252542236ef131004b6f126",
+        "OSFamily": "Windows",
+    },
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/bench.c b/third-party/libjxl/libjxl/third_party/skcms/bench.c
new file mode 100644
index 0000000000..671a6d778a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/bench.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// A simple bench harness for skcms_Transform(), mostly to run in a profiler.
+
+#ifdef _MSC_VER
+    #define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "skcms.h"
+#include "skcms_internal.h"
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#define expect(cond) if (!(cond)) exit(1)
+
+static void load_file(const char* filename, void** buf, size_t* len) {
+    FILE* fp = fopen(filename, "rb");
+    expect(fp);
+
+    expect(fseek(fp, 0L, SEEK_END) == 0);
+    long size = ftell(fp);
+    expect(size > 0);
+    *len = (size_t)size;
+    rewind(fp);
+
+    *buf = malloc(*len);
+    expect(*buf);
+
+    size_t bytes_read = fread(*buf, 1, *len, fp);
+    expect(bytes_read == *len);
+}
+
+// Just to keep us on our toes, we transform a non-power-of-two number of pixels.
+#define NPIXELS 255
+
+static float src_pixels[NPIXELS * 4],
+             dst_pixels[NPIXELS * 4];
+
+int main(int argc, char** argv) {
+    int           n = 100000;
+    const char* src = NULL;
+    const char* dst = NULL;
+
+    for (int i = 0; i < argc; i++) {
+        if (0 == strcmp(argv[i], "-n")) { n   = atoi(argv[++i]); }
+        if (0 == strcmp(argv[i], "-s")) { src =      argv[++i] ; }
+        if (0 == strcmp(argv[i], "-d")) { dst =      argv[++i] ; }
+    }
+
+    // Default to sRGB -> Display P3.
+    skcms_ICCProfile src_profile = *skcms_sRGB_profile(),
+                     dst_profile = *skcms_sRGB_profile();
+    dst_profile.toXYZD50 = (skcms_Matrix3x3){{
+        { 0.51512146f  , 0.29197692f , 0.15710449f},
+        { 0.24119567f  , 0.6922454f  , 0.0665741f },
+        {-0.0010375976f, 0.041885376f, 0.7840728f },
+    }};
+
+    void *src_buf = NULL,
+         *dst_buf = NULL;
+    size_t src_len,
+           dst_len;
+    if (src) {
+        load_file(src, &src_buf, &src_len);
+        if (!skcms_Parse(src_buf, src_len, &src_profile)) {
+            return 1;
+        }
+    }
+    if (dst) {
+        load_file(dst, &dst_buf, &dst_len);
+        if (!skcms_Parse(dst_buf, dst_len, &dst_profile)) {
+            return 1;
+        }
+    }
+
+    // We'll rotate through pixel formats to get samples from all the various stages.
+    skcms_PixelFormat src_fmt = skcms_PixelFormat_RGB_565,
+                      dst_fmt = skcms_PixelFormat_RGB_565;
+    const int wrap = skcms_PixelFormat_BGRA_ffff+1;
+
+    uint32_t palette[256];
+    for (int i = 0; i < 256; i++) {
+        palette[i] = (uint32_t)(255 - i%256) * 0x01010101;
+    }
+
+    clock_t start = clock();
+    bool all_ok = true;
+    for (int i = 0; i < n; i++) {
+        const skcms_AlphaFormat upm = skcms_AlphaFormat_Unpremul;
+        all_ok &= skcms_TransformWithPalette(src_pixels, src_fmt, upm, &src_profile,
+                                             dst_pixels, dst_fmt, upm, &dst_profile,
+                                             NPIXELS, palette);
+        src_fmt = (src_fmt + 3) % wrap;
+        do {
+            dst_fmt = (dst_fmt + 7) % wrap;
+        } while (needs_palette(dst_fmt));
+    }
+
+    clock_t ticks = clock() - start;
+    printf("%d loops in %g clock ticks, %.3g ns / pixel\n",
+            n, (double)ticks, (double)ticks / (CLOCKS_PER_SEC * 1e-9) / (n * NPIXELS));
+
+    if (src_buf) { free(src_buf); }
+    if (dst_buf) { free(dst_buf); }
+
+    return all_ok ? 0 : 1;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/build.ninja b/third-party/libjxl/libjxl/third_party/skcms/build.ninja
new file mode 100644
index 0000000000..faed2c68c4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/build.ninja
@@ -0,0 +1,47 @@
+builddir = out
+
+subninja ninja/clang
+subninja ninja/clang.O0
+subninja ninja/clang.sse2
+subninja ninja/clang.sse41
+subninja ninja/clang.avx512
+subninja ninja/clang.lsan
+subninja ninja/clang.m32
+subninja ninja/clang.m32-O0
+subninja ninja/clang.msan
+subninja ninja/clang.native
+subninja ninja/clang.portable
+subninja ninja/clang.tiny
+subninja ninja/clang.xsan
+subninja ninja/clang.xsan-portable
+
+subninja ninja/gcc
+subninja ninja/gcc.O0
+subninja ninja/gcc.m32
+subninja ninja/gcc.m32-O0
+subninja ninja/gcc.native
+subninja ninja/gcc.portable
+subninja ninja/gcc.tiny
+subninja ninja/gcc.xsan
+
+subninja ninja/android
+subninja ninja/android.fp16
+subninja ninja/android.nofp16
+subninja ninja/android.lsan
+subninja ninja/android.portable
+subninja ninja/android.tiny
+
+subninja ninja/android-arm
+subninja ninja/android-arm.lsan
+subninja ninja/android-arm.neon-vfpv4
+subninja ninja/android-arm.neon-vfpv4-O3
+subninja ninja/android-arm.portable
+subninja ninja/android-arm.tiny
+subninja ninja/android-arm.vfpv2
+
+subninja ninja/ios
+subninja ninja/ios.portable
+subninja ninja/ios.tiny
+subninja ninja/ios.xsan
+
+subninja ninja/emscripten
diff --git a/third-party/libjxl/libjxl/third_party/skcms/codereview.settings b/third-party/libjxl/libjxl/third_party/skcms/codereview.settings
new file mode 100644
index 0000000000..3abb12bda7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/codereview.settings
@@ -0,0 +1,5 @@
+VIEW_VC: https://skia.googlesource.com/skcms/+/
+CC_LIST: reviews@skia.org
+BUG_PREFIX: skia:
+PROJECT: skcms
+GERRIT_HOST: True
diff --git a/third-party/libjxl/libjxl/third_party/skcms/fuzz/BUILD b/third-party/libjxl/libjxl/third_party/skcms/fuzz/BUILD
new file mode 100644
index 0000000000..60b8f84838
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/fuzz/BUILD
@@ -0,0 +1,31 @@
+cc_library(
+    name = "fuzz_main",
+    srcs = ["fuzz_main.c"],
+)
+
+cc_binary(
+    name = "fuzz_iccprofile_atf",
+    srcs = ["fuzz_iccprofile_atf.c"],
+    deps = [
+        ":fuzz_main",
+        "//:skcms",
+    ],
+)
+
+cc_binary(
+    name = "fuzz_iccprofile_info",
+    srcs = ["fuzz_iccprofile_info.c"],
+    deps = [
+        ":fuzz_main",
+        "//:skcms",
+    ],
+)
+
+cc_binary(
+    name = "fuzz_iccprofile_transform",
+    srcs = ["fuzz_iccprofile_transform.c"],
+    deps = [
+        ":fuzz_main",
+        "//:skcms",
+    ],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_atf.c b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_atf.c
new file mode 100644
index 0000000000..25c99a3682
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_atf.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// This fuzz target parses an ICCProfile and then computes the
+// approximateTransferFunction.  This is separate from fuzz_iccprofile_info
+// because it is a much more time-consuming function call.
+
+#include "../skcms.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+    skcms_ICCProfile p;
+    if (!skcms_Parse(data, size, &p)) {
+        return 0;
+    }
+
+    skcms_TransferFunction tf;
+    float max_error;
+    for (int i = 0; i < 3; ++i) {
+        (void)skcms_ApproximateCurve(&p.trc[i], &tf, &max_error);
+        (void)max_error;
+    }
+
+    return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_info.c b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_info.c
new file mode 100644
index 0000000000..a9f5b4fd2f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_info.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// This fuzz target parses an ICCProfile and then queries several pieces
+// of info from it.
+
+#include "../skcms.h"
+#include "../skcms_internal.h"
+
+static volatile uint32_t g_FoolTheOptimizer = 0;
+
+// Read the first and last byte of any tables present in the curve
+static uint32_t read_table_extents(const skcms_Curve* c) {
+    uint32_t x = 0;
+    if (c->table_entries) {
+        if (c->table_8) {
+            x += c->table_8[0] + c->table_8[c->table_entries - 1];
+        }
+        if (c->table_16) {
+            x += c->table_16[0] + c->table_16[2 * c->table_entries - 1];
+        }
+    }
+    return x;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+    skcms_ICCProfile p;
+    if (!skcms_Parse(data, size, &p)) {
+        return 0;
+    }
+
+    // Instead of testing all tags, just test that we can read the first and last.
+    // This does _not_ imply all the middle will work fine, but these calls should
+    // be enough for the fuzzer to find a way to break us.
+    if (p.tag_count > 0) {
+        skcms_ICCTag tag;
+        skcms_GetTagByIndex(&p,               0, &tag);
+        skcms_GetTagByIndex(&p, p.tag_count - 1, &tag);
+    }
+
+    // For TRC tables, test that we can read the first and last entries of each table.
+    if (p.has_trc) {
+        for (int i = 0; i < 3; ++i) {
+            g_FoolTheOptimizer += read_table_extents(&p.trc[i]);
+        }
+    }
+
+    // For A2B data, test that we can read the first and last entries of each table.
+    if (p.has_A2B) {
+        uint32_t x = 0;
+
+        for (uint32_t i = 0; i < p.A2B.input_channels; ++i) {
+            x += read_table_extents(&p.A2B.input_curves[i]);
+        }
+
+        if (p.A2B.input_channels) {
+            uint64_t grid_size = p.A2B.output_channels;
+            for (uint32_t i = 0; i < p.A2B.input_channels; ++i) {
+                grid_size *= p.A2B.grid_points[i];
+            }
+
+            if (p.A2B.grid_8) {
+                x += p.A2B.grid_8[0] + p.A2B.grid_8[grid_size - 1];
+            }
+
+            if (p.A2B.grid_16) {
+                x += p.A2B.grid_16[0] + p.A2B.grid_16[2 * grid_size - 1];
+            }
+        }
+
+        for (uint32_t i = 0; i < p.A2B.output_channels; ++i) {
+            x += read_table_extents(&p.A2B.matrix_curves[i]);
+        }
+
+        for (uint32_t i = 0; i < p.A2B.output_channels; ++i) {
+            x += read_table_extents(&p.A2B.output_curves[i]);
+        }
+
+        g_FoolTheOptimizer = x;
+    }
+
+    return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_transform.c b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_transform.c
new file mode 100644
index 0000000000..829bb03ca9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_iccprofile_transform.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// This fuzz target parses an ICCProfile and attempts to do some transforms
+// between a known profile and that profile.
+
+#include "../skcms.h"
+
+// This is profiles/sRGB_Facebook.icc, copied here (xxd -i ...) so that this
+// binary does not need any files as input, other than the fuzzed input.
+static unsigned char profiles_sRGB_Facebook_icc[] = {
+  0x00, 0x00, 0x02, 0x0c, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00,
+  0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+  0x07, 0xdc, 0x00, 0x01, 0x00, 0x19, 0x00, 0x03, 0x00, 0x29, 0x00, 0x39,
+  0x61, 0x63, 0x73, 0x70, 0x41, 0x50, 0x50, 0x4c, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x6c, 0x63, 0x6d, 0x73,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x5e,
+  0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x0b,
+  0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x14,
+  0x62, 0x6b, 0x70, 0x74, 0x00, 0x00, 0x01, 0x7c, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x90, 0x00, 0x00, 0x00, 0x14,
+  0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0xa4, 0x00, 0x00, 0x00, 0x14,
+  0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0xb8, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xcc, 0x00, 0x00, 0x00, 0x40,
+  0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xcc, 0x00, 0x00, 0x00, 0x40,
+  0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xcc, 0x00, 0x00, 0x00, 0x40,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x63, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x46, 0x42, 0x00, 0x00,
+  0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x58, 0x59, 0x5a, 0x20,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x16, 0x00, 0x00, 0x03, 0x33,
+  0x00, 0x00, 0x02, 0xa4, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x6f, 0xa2, 0x00, 0x00, 0x38, 0xf5, 0x00, 0x00, 0x03, 0x90,
+  0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x99,
+  0x00, 0x00, 0xb7, 0x85, 0x00, 0x00, 0x18, 0xda, 0x58, 0x59, 0x5a, 0x20,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xa0, 0x00, 0x00, 0x0f, 0x84,
+  0x00, 0x00, 0xb6, 0xcf, 0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xcb, 0x01, 0xc9, 0x03, 0x63,
+  0x05, 0x92, 0x08, 0x6b, 0x0b, 0xf6, 0x10, 0x3f, 0x15, 0x51, 0x1b, 0x34,
+  0x21, 0xf1, 0x29, 0x90, 0x32, 0x18, 0x3b, 0x92, 0x46, 0x05, 0x51, 0x77,
+  0x5d, 0xed, 0x6b, 0x70, 0x7a, 0x05, 0x89, 0xb1, 0x9a, 0x7c, 0xac, 0x69,
+  0xbf, 0x7d, 0xd3, 0xc3, 0xe9, 0x30, 0xff, 0xff
+};
+
+void exit(int);
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+    skcms_ICCProfile p;
+    if (!skcms_Parse(data, size, &p)) {
+        return 0;
+    }
+    skcms_ICCProfile srgb;
+    if (!skcms_Parse(profiles_sRGB_Facebook_icc, sizeof(profiles_sRGB_Facebook_icc), &srgb)) {
+        // This should never happen, but if it does, we want to make
+        // a big fuss about it (exiting kills libfuzzer, as if it crashed).
+        exit(1);
+    }
+
+    for (int mode = 0; mode < 2; mode++) {
+        if (mode == 1) {
+            (void)skcms_MakeUsableAsDestination(&p);
+        }
+
+        uint8_t src[256],
+                dst[256];
+        for (skcms_AlphaFormat srcAlpha = skcms_AlphaFormat_Opaque;
+             srcAlpha <= skcms_AlphaFormat_PremulAsEncoded; ++srcAlpha) {
+            for (skcms_AlphaFormat dstAlpha = skcms_AlphaFormat_Opaque;
+                 dstAlpha <= skcms_AlphaFormat_PremulAsEncoded; ++dstAlpha) {
+                for (int i = 0; i < 256; i++) {
+                    src[i] = (uint8_t)i;
+                }
+                skcms_Transform(src, skcms_PixelFormat_RGBA_8888, srcAlpha, &srgb,
+                                dst, skcms_PixelFormat_RGBA_8888, dstAlpha, &p,
+                                64);
+
+                skcms_Transform(src, skcms_PixelFormat_RGBA_8888, srcAlpha, &p,
+                                dst, skcms_PixelFormat_RGBA_8888, dstAlpha, &srgb,
+                                64);
+            }
+        }
+    }
+    return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_main.c b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_main.c
new file mode 100644
index 0000000000..68249ca73f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/fuzz/fuzz_main.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// This main() can be used to run libfuzzer targets as standalone binaries.
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t*, size_t);
+
+int main(int argc, char** argv) {
+    if (argc != 2) {
+        printf("usage: %s <ICC filename>\n", argv[0]);
+        return 1;
+    }
+    FILE* fp = fopen(argv[1], "rb");
+    if (!fp) {
+        printf("Unable to open input file");
+        return 1;
+    }
+    fseek(fp, 0L, SEEK_END);
+    long slen = ftell(fp);
+    if (slen <= 0) {
+        printf("ftell failed");
+        return 1;
+    }
+    size_t len = (size_t)slen;
+    rewind(fp);
+    void* data = malloc(len);
+    if (!data) {
+        return 1;
+    }
+    size_t size = fread(data, 1, len, fp);
+    fclose(fp);
+    if (size != len) {
+        printf("Unable to read file");
+        return 1;
+    }
+
+    return LLVMFuzzerTestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/iccdump.c b/third-party/libjxl/libjxl/third_party/skcms/iccdump.c
new file mode 100644
index 0000000000..a3e3fd4ef1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/iccdump.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifdef _MSC_VER
+    #define _CRT_SECURE_NO_WARNINGS
+    #define SKCMS_NORETURN __declspec(noreturn)
+#else
+    #include <dlfcn.h>
+    #include <stdnoreturn.h>
+    #define SKCMS_NORETURN noreturn
+#endif
+
+#include "skcms.h"
+#include "skcms_internal.h"
+#include "test_only.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+SKCMS_NORETURN
+static void fatal(const char* msg) {
+    fprintf(stderr, "ERROR: %s\n", msg);
+    exit(1);
+}
+
+// xy co-ordinates of the CIE 1931 standard observer XYZ functions.
+// wavelength is sampled every 5 nm in [360, 700].
+// This is effectively the hull of the horseshoe in a chromaticity diagram.
+static const double kSpectralHull[] = {
+    0.17556, 0.00529384,
+    0.175161, 0.00525635,
+    0.174821, 0.0052206,
+    0.17451, 0.00518164,
+    0.174112, 0.00496373,
+    0.174008, 0.00498055,
+    0.173801, 0.00491541,
+    0.17356, 0.0049232,
+    0.173337, 0.00479674,
+    0.173021, 0.00477505,
+    0.172577, 0.0047993,
+    0.172087, 0.00483252,
+    0.171407, 0.00510217,
+    0.170301, 0.00578851,
+    0.168878, 0.00690024,
+    0.166895, 0.00855561,
+    0.164412, 0.0108576,
+    0.161105, 0.0137934,
+    0.156641, 0.0177048,
+    0.150985, 0.0227402,
+    0.14396, 0.029703,
+    0.135503, 0.0398791,
+    0.124118, 0.0578025,
+    0.109594, 0.0868425,
+    0.0912935, 0.132702,
+    0.0687059, 0.200723,
+    0.0453907, 0.294976,
+    0.0234599, 0.412703,
+    0.00816803, 0.538423,
+    0.00385852, 0.654823,
+    0.0138702, 0.750186,
+    0.0388518, 0.812016,
+    0.0743024, 0.833803,
+    0.114161, 0.826207,
+    0.154722, 0.805863,
+    0.192876, 0.781629,
+    0.22962, 0.754329,
+    0.265775, 0.724324,
+    0.301604, 0.692308,
+    0.337363, 0.658848,
+    0.373102, 0.624451,
+    0.408736, 0.589607,
+    0.444062, 0.554714,
+    0.478775, 0.520202,
+    0.512486, 0.486591,
+    0.544787, 0.454434,
+    0.575151, 0.424232,
+    0.602933, 0.396497,
+    0.627037, 0.372491,
+    0.648233, 0.351395,
+    0.665764, 0.334011,
+    0.680079, 0.319747,
+    0.691504, 0.308342,
+    0.700606, 0.299301,
+    0.707918, 0.292027,
+    0.714032, 0.285929,
+    0.719033, 0.280935,
+    0.723032, 0.276948,
+    0.725992, 0.274008,
+    0.728272, 0.271728,
+    0.729969, 0.270031,
+    0.731089, 0.268911,
+    0.731993, 0.268007,
+    0.732719, 0.267281,
+    0.733417, 0.266583,
+    0.734047, 0.265953,
+    0.73439, 0.26561,
+    0.734592, 0.265408,
+    0.73469, 0.26531,
+};
+
+static uint16_t read_big_u16(const uint8_t* ptr) {
+    uint16_t be;
+    memcpy(&be, ptr, sizeof(be));
+#if defined(_MSC_VER)
+    return _byteswap_ushort(be);
+#else
+    return __builtin_bswap16(be);
+#endif
+}
+
+static uint32_t read_big_u32(const uint8_t* ptr) {
+    uint32_t be;
+    memcpy(&be, ptr, sizeof(be));
+#if defined(_MSC_VER)
+    return _byteswap_ulong(be);
+#else
+    return __builtin_bswap32(be);
+#endif
+}
+
+// TODO: Put state into struct with FP
+static int desmos_id = 0;
+
+static FILE* desmos_open(const char* filename) {
+    FILE* fp = fopen(filename, "wb");
+    if (!fp) {
+        fatal("Unable to open output file");
+    }
+
+    fprintf(fp, "<!DOCTYPE html>\n");
+    fprintf(fp, "<html>\n");
+    fprintf(fp, "<head>\n");
+    fprintf(fp, "<script src=\"https://www.desmos.com/api/v1.1/calculator.js?apiKey=dcb31709b452b1cf9dc26972add0fda6\"></script>\n");
+    fprintf(fp, "<style>\n");
+    fprintf(fp, "  html, body{ width: 100%%; height: 100%%; margin: 0; padding: 0; overflow: hidden; }\n");
+    fprintf(fp, "  #calculator { width: 100%%; height: 100%%; }\n");
+    fprintf(fp, "</style>\n");
+    fprintf(fp, "</head>\n");
+    fprintf(fp, "<body>\n");
+    fprintf(fp, "<div id=\"calculator\"></div>\n");
+    fprintf(fp, "<script>\n");
+    fprintf(fp, "var elt = document.getElementById('calculator');\n");
+    fprintf(fp, "var c = Desmos.GraphingCalculator(elt);\n");
+    fprintf(fp, "c.setState({\n");
+    fprintf(fp, "\"version\": 5,\n");
+    fprintf(fp, "\"expressions\": {\n");
+    fprintf(fp, "\"list\": [\n");
+
+    desmos_id = 0;
+    return fp;
+}
+
+static void desmos_close(FILE* fp) {
+    fprintf(fp, "] } } );\n");
+    fprintf(fp, "c.setMathBounds({left: -0.1, right: 1.1, bottom: -0.1, top: 1.1});\n");
+    fprintf(fp, "</script>\n");
+    fprintf(fp, "</body>\n");
+    fprintf(fp, "</html>\n");
+    fclose(fp);
+}
+
+static void desmos_transfer_function(FILE* fp, const skcms_TransferFunction* tf,
+                                     const char* color) {
+    fprintf(fp, "{\n");
+    fprintf(fp, " \"type\": \"expression\",\n");
+    fprintf(fp, " \"id\": \"%d\",\n", desmos_id++);
+    fprintf(fp, " \"color\": \"%s\",\n", color);
+    fprintf(fp, " \"latex\": \"\\\\left\\\\{"
+            "0 \\\\le x < %.5f: %.5fx + %.5f, "                    // 0 <= x < d: cx + f
+            "%.5f \\\\le x \\\\le 1: (%.5fx + %.5f)^{%.5f} + %.5f" // d <= x <= 1: (ax + b)^g + e
+            "\\\\right\\\\}\"\n",
+            tf->d, tf->c, tf->f,
+            tf->d, tf->a, tf->b, tf->g, tf->e);
+    fprintf(fp, "},\n");
+}
+
+typedef double table_func(int i, const void* ctx);
+
+static void desmos_table(FILE* fp, int N, const char* label, const char* color,
+                         table_func* x, const void* x_ctx,
+                         table_func* y, const void* y_ctx) {
+    int folder_id = desmos_id++,
+        table_id  = desmos_id++,
+        subscript = desmos_id++;
+
+    // Folder
+    fprintf(fp, "{\n");
+    fprintf(fp, " \"type\": \"folder\",\n");
+    fprintf(fp, " \"id\": \"%d\",\n", folder_id);
+    fprintf(fp, " \"title\": \"%s\",\n", label);
+    fprintf(fp, " \"collapsed\": true,\n");
+    fprintf(fp, " \"memberIds\": { \"%d\": true }\n", table_id);
+    fprintf(fp, "},\n");
+
+    // Table
+    fprintf(fp, "{\n");
+    fprintf(fp, " \"type\": \"table\",\n");
+    fprintf(fp, " \"id\": \"%d\",\n", table_id);
+    fprintf(fp, " \"columns\": [\n");
+
+    // X Column
+    fprintf(fp, " {\n");
+    fprintf(fp, "  \"values\": [");
+
+    for (int i = 0; i < N; ++i) {
+        if (i % 6 == 0) {
+            fprintf(fp, "\n  ");
+        }
+        fprintf(fp, " \"%.5f\",", x(i, x_ctx));
+    }
+
+    fprintf(fp, "  ],\n");
+    fprintf(fp, "  \"hidden\": true,\n");
+    fprintf(fp, "  \"id\": \"%d\",\n", desmos_id++);
+    fprintf(fp, "  \"color\": \"%s\",\n", color);
+    fprintf(fp, "  \"latex\": \"x_{%d}\"\n", subscript);
+    fprintf(fp, " },\n");
+
+    // Y Column
+    fprintf(fp, " {\n");
+    fprintf(fp, "  \"values\": [\n");
+
+    for (int i = 0; i < N; ++i) {
+        if (i % 6 == 0) {
+            fprintf(fp, "\n  ");
+        }
+        fprintf(fp, " \"%.5f\",", y(i, y_ctx));
+    }
+    fprintf(fp, "  ],\n");
+    fprintf(fp, "  \"id\": \"%d\",\n", desmos_id++);
+    fprintf(fp, "  \"color\": \"%s\",\n", color);
+    fprintf(fp, "  \"latex\": \"y_{%d}\"\n", subscript);
+    fprintf(fp, " }\n");
+    fprintf(fp, " ]\n");
+    fprintf(fp, "},\n");
+}
+
+static double uniform_scale_table_func(int i, const void* ctx) {
+    double scale = *((const double*)ctx);
+    return i * scale;
+}
+
+static double curve_table_func(int i, const void* ctx) {
+    const skcms_Curve* curve = (const skcms_Curve*)ctx;
+    return curve->table_8 ? curve->table_8[i] / 255.0
+                          : read_big_u16(curve->table_16 + 2*i) / 65535.0;
+}
+
+static void desmos_curve(FILE* fp, const skcms_Curve* curve, const char* color) {
+    if (!curve->table_entries) {
+        desmos_transfer_function(fp, &curve->parametric, color);
+        return;
+    }
+
+    char label[64];
+    (void)snprintf(label, sizeof(label), "%s Table", color);
+
+    double xScale = 1.0 / (curve->table_entries - 1.0);
+    desmos_table(fp, (int)curve->table_entries, label, color,
+                 uniform_scale_table_func, &xScale,
+                 curve_table_func, curve);
+
+    char approx_color[64];
+    (void)snprintf(approx_color, sizeof(approx_color), "Dark%s", color);
+
+    skcms_TransferFunction approx_tf;
+    float max_error;
+    if (skcms_ApproximateCurve(curve, &approx_tf, &max_error)) {
+        desmos_transfer_function(fp, &approx_tf, approx_color);
+    }
+}
+
+static void desmos_curves(FILE* fp, uint32_t num_curves, const skcms_Curve* curves,
+                          const char** colors) {
+    for (uint32_t c = 0; c < num_curves; ++c) {
+        desmos_curve(fp, curves + c, colors[c]);
+    }
+}
+
+static void desmos_inv_curve(FILE* fp, const skcms_Curve* curve, const char* color) {
+    if (!curve->table_entries) {
+        skcms_TransferFunction inv;
+        if (skcms_TransferFunction_invert(&curve->parametric, &inv)) {
+            desmos_transfer_function(fp, &inv, color);
+        }
+        return;
+    }
+
+    char label[64];
+    (void)snprintf(label, sizeof(label), "%s Inverse Table", color);
+
+    double xScale = 1.0 / (curve->table_entries - 1.0);
+    desmos_table(fp, (int)curve->table_entries, label, color,
+                 curve_table_func, curve,
+                 uniform_scale_table_func, &xScale);
+
+    char approx_color[64];
+    (void)snprintf(approx_color, sizeof(approx_color), "Dark%s", color);
+
+    skcms_TransferFunction approx_tf;
+    float max_error;
+    if (skcms_ApproximateCurve(curve, &approx_tf, &max_error)) {
+        skcms_TransferFunction inv;
+        if (skcms_TransferFunction_invert(&approx_tf, &inv)) {
+            desmos_transfer_function(fp, &inv, approx_color);
+        }
+    }
+}
+
+static void desmos_inv_curves(FILE* fp, uint32_t num_curves, const skcms_Curve* curves,
+                              const char** colors) {
+    for (uint32_t c = 0; c < num_curves; ++c) {
+        desmos_inv_curve(fp, curves + c, colors[c]);
+    }
+}
+
+static const double kSVGMarginLeft   = 100.0;
+static const double kSVGMarginRight  = 10.0;
+static const double kSVGMarginTop    = 10.0;
+static const double kSVGMarginBottom = 50.0;
+
+static const double kSVGScaleX = 800.0;
+static const double kSVGScaleY = 800.0;
+
+static const char* kSVG_RGB_Colors[3] = { "Red", "Green", "Blue" };
+static const char* kSVG_CMYK_Colors[4] = { "cyan", "magenta", "yellow", "black" };
+
+static FILE* svg_open(const char* filename) {
+    FILE* fp = fopen(filename, "wb");
+    if (!fp) {
+        fatal("Unable to open output file");
+    }
+
+    fprintf(fp, "<svg width=\"%g\" height=\"%g\" xmlns=\"http://www.w3.org/2000/svg\">\n",
+            kSVGMarginLeft + kSVGScaleX + kSVGMarginRight,
+            kSVGMarginTop + kSVGScaleY + kSVGMarginBottom);
+    return fp;
+}
+
+static void svg_close(FILE* fp) {
+    fprintf(fp, "</svg>\n");
+    fclose(fp);
+}
+
+#define svg_push_group(fp, fmt, ...) fprintf(fp, "<g " fmt ">\n", __VA_ARGS__)
+
+static void svg_pop_group(FILE* fp) {
+    fprintf(fp, "</g>\n");
+}
+
+static void svg_axes(FILE* fp) {
+    fprintf(fp, "<polyline fill=\"none\" stroke=\"black\" vector-effect=\"non-scaling-stroke\" "
+                "points=\"0,1 0,0 1,0\"/>\n");
+}
+
+static void svg_transfer_function(FILE* fp, const skcms_TransferFunction* tf, const char* color) {
+    fprintf(fp, "<polyline fill=\"none\" stroke=\"%s\" vector-effect=\"non-scaling-stroke\" "
+            "points=\"\n", color);
+
+    for (int i = 0; i < 256; ++i) {
+        float x = (float)i / 255.0f;
+        float t = skcms_TransferFunction_eval(tf, x);
+        fprintf(fp, "%g, %g\n", x, t);
+    }
+    fprintf(fp, "\"/>\n");
+}
+
+static void svg_curve(FILE* fp, const skcms_Curve* curve, const char* color) {
+    if (!curve->table_entries) {
+        svg_transfer_function(fp, &curve->parametric, color);
+        return;
+    }
+
+    double xScale = 1.0 / (curve->table_entries - 1.0);
+    double yScale = curve->table_8 ? (1.0 / 255) : (1.0 / 65535);
+    fprintf(fp, "<polyline fill=\"none\" stroke=\"%s\" vector-effect=\"non-scaling-stroke\" "
+            "transform=\"scale(%g %g)\" points=\"\n",
+            color, xScale, yScale);
+
+    for (uint32_t i = 0; i < curve->table_entries; ++i) {
+        if (curve->table_8) {
+            fprintf(fp, "%3u, %3u\n", i, curve->table_8[i]);
+        } else {
+            fprintf(fp, "%4u, %5u\n", i, read_big_u16(curve->table_16 + 2 * i));
+        }
+    }
+    fprintf(fp, "\"/>\n");
+
+    skcms_TransferFunction approx_tf;
+    float max_error;
+    if (skcms_ApproximateCurve(curve, &approx_tf, &max_error)) {
+        svg_transfer_function(fp, &approx_tf, "magenta");
+    }
+}
+
+static void svg_curves(FILE* fp, uint32_t num_curves, const skcms_Curve* curves,
+                       const char** colors) {
+    for (uint32_t c = 0; c < num_curves; ++c) {
+        svg_curve(fp, curves + c, colors[c]);
+    }
+}
+
+static void dump_curves_svg(const char* filename, uint32_t num_curves, const skcms_Curve* curves) {
+    FILE* fp = svg_open(filename);
+    svg_push_group(fp, "transform=\"translate(%g %g) scale(%g %g)\"",
+                   kSVGMarginLeft, kSVGMarginTop + kSVGScaleY, kSVGScaleX, -kSVGScaleY);
+    svg_axes(fp);
+    svg_curves(fp, num_curves, curves, (num_curves == 3) ? kSVG_RGB_Colors : kSVG_CMYK_Colors);
+    svg_pop_group(fp);
+    svg_close(fp);
+}
+
+static const uint8_t png_signature[] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#if defined(_MSC_VER)
+    static bool parse_png_profile(const uint8_t* buf, size_t len, skcms_ICCProfile* profile) {
+        (void)buf;
+        (void)len;
+        (void)profile;
+        (void)read_big_u32;
+        return false;
+    }
+#else
+    static bool parse_png_profile(const uint8_t* buf, size_t len, skcms_ICCProfile* profile) {
+        void* zlib = NULL;
+        if (!zlib) { zlib = dlopen("libz.so",    RTLD_LAZY); }
+        if (!zlib) { zlib = dlopen("libz.dylib", RTLD_LAZY); }
+        if (!zlib) {
+            return false;
+        }
+
+        typedef int(*UncompressFn)(uint8_t*, unsigned long*, const uint8_t*, unsigned long);
+        UncompressFn uncompress = (UncompressFn)dlsym(zlib, "uncompress");
+        if (!uncompress) {
+            return false;
+        }
+
+        const uint8_t* end = buf+len;
+
+        // skip over signature
+        buf += sizeof(png_signature);
+
+        const uint32_t IEND = 0x49454e44,
+                       iCCP = 0x69434350;
+
+        uint32_t size, tag = 0;
+
+        while (buf < end && tag != IEND) {
+            size = read_big_u32(buf+0);
+            tag  = read_big_u32(buf+4);
+            buf += 8;
+
+            if (tag == iCCP) {
+                const char* name = (const char*)buf;
+                printf("Profile name from .png: '%s'\n", name);
+
+                size_t header = strlen(name)
+                              + 1/*NUL*/
+                              + 1/*PNG compression method, always 0 == zlib*/;
+
+                unsigned long inf_size,
+                              guess = len;
+                void* inflated = NULL;
+
+                int err;
+                do {
+                    inf_size = guess;
+                    inflated = realloc(inflated, inf_size);
+
+                    err = uncompress(inflated, &inf_size,
+                                     (const uint8_t*)name+header, size-header);
+                    guess *= 2;
+                } while (err == -5/*Z_BUF_ERROR*/);
+
+                bool ok = err == 0/*Z_OK*/
+                       && skcms_Parse(inflated, inf_size, profile);
+                free(inflated);
+                return ok;
+            }
+
+            buf += size;
+            buf += 4/*skip the PNG CRC*/;
+        }
+        return false;
+    }
+#endif
+
+int main(int argc, char** argv) {
+    const char* filename = NULL;
+    bool svg = false;
+    bool desmos = false;
+
+    for (int i = 1; i < argc; ++i) {
+        if (0 == strcmp(argv[i], "-s")) {
+            svg = true;
+        } else if (0 == strcmp(argv[i], "-d")) {
+            desmos = true;
+        } else {
+            filename = argv[i];
+        }
+    }
+
+    if (!filename) {
+        printf("usage: %s [-s] <ICC filename>\n", argv[0]);
+        return 1;
+    }
+
+    void* buf = NULL;
+    size_t len = 0;
+    if (!load_file(filename, &buf, &len)) {
+        fatal("Unable to load input file");
+    }
+
+    skcms_ICCProfile profile;
+    if (len >= sizeof(png_signature) && 0 == memcmp(buf, png_signature, sizeof(png_signature))) {
+        if (!parse_png_profile(buf, len, &profile)) {
+            fatal("Could not find an ICC profile in this .png");
+        }
+    } else if (!skcms_Parse(buf, len, &profile)) {
+        fatal("Unable to parse ICC profile");
+    }
+
+    dump_profile(&profile, stdout);
+
+    if (desmos) {
+        if (profile.has_trc) {
+            FILE* fp = desmos_open("TRC_curves.html");
+            desmos_curves(fp, 3, profile.trc, kSVG_RGB_Colors);
+            desmos_inv_curves(fp, 3, profile.trc, kSVG_RGB_Colors);
+            desmos_close(fp);
+        }
+    }
+
+    if (svg) {
+        if (profile.has_toXYZD50) {
+            FILE* fp = svg_open("gamut.svg");
+            svg_push_group(fp, "transform=\"translate(%g %g) scale(%g %g)\"",
+                           kSVGMarginLeft, kSVGMarginTop + kSVGScaleY, kSVGScaleX, -kSVGScaleY);
+            svg_axes(fp);
+
+            fprintf(fp, "<polygon fill=\"none\" stroke=\"black\" "
+                    "vector-effect=\"non-scaling-stroke\" points=\"\n");
+            for (int i = 0; i < ARRAY_COUNT(kSpectralHull); i += 2) {
+                fprintf(fp, "%g, %g\n", kSpectralHull[i], kSpectralHull[i + 1]);
+            }
+            fprintf(fp, "\"/>\n");
+
+            skcms_Matrix3x3 m = profile.toXYZD50;
+            skcms_Matrix3x3 chad;
+            if (skcms_GetCHAD(&profile, &chad) && skcms_Matrix3x3_invert(&chad, &chad)) {
+                m = skcms_Matrix3x3_concat(&chad, &m);
+            }
+
+            float rSum = m.vals[0][0] + m.vals[1][0] + m.vals[2][0];
+            float gSum = m.vals[0][1] + m.vals[1][1] + m.vals[2][1];
+            float bSum = m.vals[0][2] + m.vals[1][2] + m.vals[2][2];
+            fprintf(fp, "<polygon fill=\"none\" stroke=\"black\" "
+                    "vector-effect=\"non-scaling-stroke\" points=\"%g,%g %g,%g %g,%g\"/>\n",
+                    (m.vals[0][0] / rSum), (m.vals[1][0] / rSum),
+                    (m.vals[0][1] / gSum), (m.vals[1][1] / gSum),
+                    (m.vals[0][2] / bSum), (m.vals[1][2] / bSum));
+
+            svg_pop_group(fp);
+            svg_close(fp);
+        }
+
+        if (profile.has_trc) {
+            FILE* fp = svg_open("TRC_curves.svg");
+            svg_push_group(fp, "transform=\"translate(%g %g) scale(%g %g)\"",
+                           kSVGMarginLeft, kSVGMarginTop + kSVGScaleY, kSVGScaleX, -kSVGScaleY);
+            svg_axes(fp);
+            svg_curves(fp, 3, profile.trc, kSVG_RGB_Colors);
+            svg_pop_group(fp);
+            svg_close(fp);
+        }
+
+        if (profile.has_A2B) {
+            const skcms_A2B* a2b = &profile.A2B;
+            if (a2b->input_channels) {
+                dump_curves_svg("A_curves.svg", a2b->input_channels, a2b->input_curves);
+            }
+
+            if (a2b->matrix_channels) {
+                dump_curves_svg("M_curves.svg", a2b->matrix_channels, a2b->matrix_curves);
+            }
+
+            dump_curves_svg("B_curves.svg", a2b->output_channels, a2b->output_curves);
+        }
+    }
+
+    return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bazel.py b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bazel.py
new file mode 100644
index 0000000000..a9a8c686fc
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bazel.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import subprocess
+import sys
+import tempfile
+
+
+def call(cmd):
+  print("Executing: " + " ".join(cmd))
+  subprocess.check_call(cmd)
+
+
+def main():
+  build_or_test = sys.argv[1]
+  assert build_or_test in ["build", "test"]
+
+  local_or_rbe = sys.argv[2]
+  assert local_or_rbe in ["local", "rbe"]
+
+  target = sys.argv[3]
+  assert target in ["android-arm", "android-arm64", "linux", "windows"]
+
+  print("Hello from {platform} in {cwd}!".format(platform=sys.platform,
+                                                 cwd=os.getcwd()))
+
+  # Create a temporary directory for the Bazel cache.
+  #
+  # We cannot use the default Bazel cache location ($HOME/.cache/bazel) because:
+  #
+  #  - The cache can be large (>10G).
+  #  - Swarming bots have limited storage space on the root partition (15G).
+  #  - Because the above, the Bazel build fails with a "no space left on
+  #    device" error.
+  #  - The Bazel cache under $HOME/.cache/bazel lingers after the tryjob
+  #    completes, causing the Swarming bot to be quarantined due to low disk
+  #    space.
+  #  - Generally, it's considered poor hygiene to leave a bot in a different
+  #    state.
+  #
+  # The temporary directory created by the below function call lives under
+  # /mnt/pd0, which has significantly more storage space, and will be wiped
+  # after the tryjob completes.
+  #
+  # Reference: https://docs.bazel.build/versions/master/output_directories.html#current-layout.
+  with tempfile.TemporaryDirectory(prefix="bazel-cache-",
+                                   dir=os.environ["TMPDIR"]) as cache_dir:
+    def bazel(args):
+      cmd = ["C:\\b\\s\\w\\ir\\bazelisk\\bazelisk.exe"] if target == "windows" \
+            else ["bazelisk", "--output_user_root=" + cache_dir]
+      print("Running", cmd)
+      call(cmd + args)
+
+    try:
+      # Print the Bazel version.
+      bazel(["version"])
+
+      # Compute the Bazel configuration to use.
+      config = target
+      if local_or_rbe == "rbe":
+        config += "-rbe"
+
+      # Run the requested Bazel command.
+      os.chdir("skcms")
+      bazel([build_or_test, "//...", "--config=" + config])
+
+    finally:
+      # Kill the Bazel server, so as not to leave any children processes
+      # outliving the Swarming task.
+      bazel(["shutdown"])
+
+if __name__ == "__main__":
+  main()
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bot.py b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bot.py
new file mode 100644
index 0000000000..6e88e542f9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/bot.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python2.7
+
+import os
+import subprocess
+import sys
+
+ninja = sys.argv[1]
+
+def call(cmd):
+  subprocess.check_call(cmd, shell=True)
+
+def append(path, line):
+  with open(path, 'a') as f:
+    print >>f, line
+
+print "Hello from {platform} in {cwd}!".format(platform=sys.platform,
+                                               cwd=os.getcwd())
+
+if 'darwin' in sys.platform:
+  # Get Xcode from CIPD using mac_toolchain tool.
+  mac_toolchain = os.path.join(os.getcwd(), sys.argv[3])
+  xcode_app_path = os.path.join(os.getcwd(), sys.argv[4])
+  # See mapping of Xcode version to Xcode build version here:
+  # https://chrome-infra-packages.appspot.com/p/infra_internal/ios/xcode/mac/+/
+  XCODE_BUILD_VERSION = '12d4e'   # xcode 12.4
+  call('rm -rf {xcode_app_path}'.format(xcode_app_path=xcode_app_path))
+  call(('{mac_toolchain}/mac_toolchain install '
+        '-kind mac '
+        '-xcode-version {xcode_build_version} '
+        '-output-dir {xcode_app_path}').format(
+            mac_toolchain=mac_toolchain,
+            xcode_build_version=XCODE_BUILD_VERSION,
+            xcode_app_path=xcode_app_path))
+  call('sudo xcode-select -switch {xcode_app_path}'.format(
+      xcode_app_path=xcode_app_path))
+
+  call('{ninja}/ninja -C skcms -k 0'.format(ninja=ninja))
+
+elif 'linux' in sys.platform:
+  # Point to clang in our clang_linux package.
+  clang_linux = os.path.realpath(sys.argv[3])
+  append('skcms/ninja/clang', 'cc  = {}/bin/clang  '.format(clang_linux))
+  append('skcms/ninja/clang', 'cxx = {}/bin/clang++'.format(clang_linux))
+
+  # Get an Emscripten environment all set up.
+  call('git clone https://github.com/emscripten-core/emsdk.git')
+  os.chdir('emsdk')
+  call('./emsdk install 2.0.14')
+  os.chdir('..')
+
+  emscripten_sdk = os.path.realpath('emsdk')
+  node = emscripten_sdk + '/node/14.18.2_64bit/bin/node'
+
+  em_config = os.path.realpath(os.path.join('.', 'em_config'))
+  with open(em_config, 'w') as f:
+    print >>f, '''
+LLVM_ROOT = '{}/upstream/bin'
+BINARYEN_ROOT = '{}/upstream'
+EMSCRIPTEN_ROOT = '{}/upstream/emscripten'
+NODE_JS = '{}'
+COMPILER_ENGINE = NODE_JS
+JS_ENGINES = [NODE_JS]
+  '''.format(emscripten_sdk, emscripten_sdk, emscripten_sdk, node)
+
+  append('skcms/ninja/emscripten',
+         'cc  = env EM_CONFIG={} {}/upstream/emscripten/emcc'.format(
+           em_config, emscripten_sdk))
+  append('skcms/ninja/emscripten',
+         'cxx = env EM_CONFIG={} {}/upstream/emscripten/em++'.format(
+           em_config, emscripten_sdk))
+  append('skcms/ninja/emscripten',
+         'node = {}'.format(node))
+
+  call('{ninja}/ninja -C skcms -k 0'.format(ninja=ninja))
+
+else:  # Windows
+  win_toolchain = os.path.realpath(sys.argv[2])
+  msvc = win_toolchain + '\\VC\\Tools\\MSVC\\14.24.28314\\'
+  sdk  = win_toolchain + '\\win_sdk\\'
+
+  os.environ['PATH'] = msvc + 'bin\\HostX64\\x64;' + os.environ['PATH']
+  os.environ['INCLUDE'] = msvc + 'include;'
+  os.environ['INCLUDE'] += sdk + 'Include\\10.0.17763.0\\shared;'
+  os.environ['INCLUDE'] += sdk + 'Include\\10.0.17763.0\\ucrt;'
+  os.environ['INCLUDE'] += sdk + 'Include\\10.0.17763.0\\um;'
+  os.environ['LIB'] = msvc + 'lib\\x64;'
+  os.environ['LIB'] += sdk + 'Lib\\10.0.17763.0\\um\\x64;'
+  os.environ['LIB'] += sdk + 'Lib\\10.0.17763.0\\ucrt\\x64;'
+
+  call('{ninja}\\ninja.exe -C skcms -f msvs.ninja -k 0'.format(ninja=ninja))
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/bots/tasks.json b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/tasks.json
new file mode 100644
index 0000000000..9d3c5cbfa3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/bots/tasks.json
@@ -0,0 +1,452 @@
+{
+  "casSpecs": {
+    "whole-repo": {
+      "root": "..",
+      "paths": ["skcms"],
+      "excludes": ["^(.*\\/)*\\.git(\\/.*)*$"]
+    }
+  },
+  "commit_queue": {
+    "skcms": {}
+  },
+  "jobs": {
+    "skcms": {
+      "tasks": [
+        "skcms-Linux",
+        "skcms-Mac",
+        "skcms-Win",
+        "skcms-Linux-Bazel-Build-RBE",
+        "skcms-Linux-Bazel-Test-RBE",
+        "skcms-Win-Bazel-Build-RBE",
+        "skcms-Win-Bazel-Test-RBE",
+        "skcms-Android-ARM-Bazel-Build-RBE",
+        "skcms-Android-ARM64-Bazel-Build-RBE"
+      ]
+    },
+    "skcms-Linux": {
+      "tasks": ["skcms-Linux"]
+    },
+    "skcms-Mac": {
+      "tasks": ["skcms-Mac"]
+    },
+    "skcms-Win": {
+      "tasks": ["skcms-Win"]
+    },
+    "skcms-Linux-Bazel-Build-RBE": {
+      "tasks": ["skcms-Linux-Bazel-Build-RBE"]
+    },
+    "skcms-Linux-Bazel-Test-RBE": {
+      "tasks": ["skcms-Linux-Bazel-Test-RBE"]
+    },
+    "skcms-Win-Bazel-Build-RBE": {
+      "tasks": ["skcms-Win-Bazel-Build-RBE"]
+    },
+    "skcms-Win-Bazel-Test-RBE": {
+      "tasks": ["skcms-Win-Bazel-Test-RBE"]
+    },
+    "skcms-Android-ARM-Bazel-Build-RBE": {
+      "tasks": ["skcms-Android-ARM-Bazel-Build-RBE"]
+    },
+    "skcms-Android-ARM64-Bazel-Build-RBE": {
+      "tasks": ["skcms-Android-ARM64-Bazel-Build-RBE"]
+    }
+  },
+  "tasks": {
+    "skcms-Linux": {
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/ninja/linux-amd64",
+          "path": "ninja",
+          "version": "version:1.8.2"
+        },
+        {
+          "name": "skia/bots/android_ndk_linux",
+          "path": "ndk",
+          "version": "version:16"
+        },
+        {
+          "name": "skia/bots/clang_linux",
+          "path": "clang_linux",
+          "version": "version:16"
+        },
+        {
+          "name": "infra/python/cpython/linux-amd64",
+          "path": "python",
+          "version": "version:2.7.15.chromium14"
+        }
+      ],
+      "command": [
+        "python/bin/python",
+        "skcms/infra/bots/bot.py",
+        "ninja",
+        "ndk",
+        "clang_linux"
+      ],
+      "dimensions": [
+        "os:Linux",
+        "cpu:x86-64-Skylake_GCE",
+        "gpu:none",
+        "pool:Skia"
+      ],
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Mac": {
+      "caches": [
+        {
+          "name": "xcode_skcms",
+          "path": "cache/Xcode_skcms.app"
+        }
+      ],
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/ninja/mac-amd64",
+          "path": "ninja",
+          "version": "version:1.8.2"
+        },
+        {
+          "name": "skia/bots/android_ndk_darwin",
+          "path": "ndk",
+          "version": "version:10"
+        },
+        {
+          "name": "infra/tools/mac_toolchain/${platform}",
+          "path": "mac_toolchain",
+          "version": "git_revision:796d2b92cff93fc2059623ce0a66284373ceea0a"
+        },
+        {
+          "name": "infra/python/cpython/mac-amd64",
+          "path": "python",
+          "version": "version:2.7.15.chromium14"
+        }
+      ],
+      "command": [
+        "python/bin/python",
+        "skcms/infra/bots/bot.py",
+        "ninja",
+        "ndk",
+        "mac_toolchain",
+        "cache/Xcode_skcms.app"
+      ],
+      "dimensions": [
+        "cores:12",
+        "cpu:x86-64",
+        "os:Mac-10.15.7",
+        "pool:Skia"
+      ],
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Win": {
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "skia/bots/win_ninja",
+          "path": "ninja",
+          "version": "version:2"
+        },
+        {
+          "name": "skia/bots/win_toolchain",
+          "path": "win_toolchain",
+          "version": "version:11"
+        },
+        {
+          "name": "skia/bots/clang_win",
+          "path": "clang_win",
+          "version": "version:14"
+        },
+        {
+          "name": "infra/python/cpython/windows-amd64",
+          "path": "python",
+          "version": "version:2.7.15.chromium14"
+        }
+      ],
+      "command": [
+        "python/bin/python.exe",
+        "skcms/infra/bots/bot.py",
+        "ninja",
+        "win_toolchain",
+        "clang_win"
+      ],
+      "dimensions": ["os:Windows-Server-17763", "gpu:none", "pool:Skia"],
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Linux-Bazel-Build-RBE": {
+      "caches": [
+        {
+          "name": "vpython",
+          "path": "cache/vpython"
+        }
+      ],
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/3pp/tools/cpython3/linux-amd64",
+          "path": "cipd_bin_packages/cpython3",
+          "version": "version:2@3.8.10.chromium.19"
+        },
+        {
+          "name": "skia/bots/bazelisk",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "cipd_bin_packages/cpython3/bin/python3",
+        "skcms/infra/bots/bazel.py",
+        "build",
+        "rbe",
+        "linux"
+      ],
+      "dimensions": [
+        "pool:Skia",
+        "os:Debian-10.3",
+        "gpu:none",
+        "cpu:x86-64-Haswell_GCE",
+        "machine_type:n1-highcpu-64",
+        "docker_installed:true"
+      ],
+      "env_prefixes": {
+        "PATH": [
+          "cipd_bin_packages/cpython3",
+          "cipd_bin_packages/cpython3/bin",
+          "bazelisk"
+        ],
+        "VPYTHON_VIRTUALENV_ROOT": [
+          "cache/vpython"
+        ]
+      },
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Linux-Bazel-Test-RBE": {
+      "caches": [
+        {
+          "name": "vpython",
+          "path": "cache/vpython"
+        }
+      ],
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/3pp/tools/cpython3/linux-amd64",
+          "path": "cipd_bin_packages/cpython3",
+          "version": "version:2@3.8.10.chromium.19"
+        },
+        {
+          "name": "skia/bots/bazelisk",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "cipd_bin_packages/cpython3/bin/python3",
+        "skcms/infra/bots/bazel.py",
+        "test",
+        "rbe",
+        "linux"
+      ],
+      "dimensions": [
+        "pool:Skia",
+        "os:Debian-10.3",
+        "gpu:none",
+        "cpu:x86-64-Haswell_GCE",
+        "machine_type:n1-highcpu-64",
+        "docker_installed:true"
+      ],
+      "env_prefixes": {
+        "PATH": [
+          "cipd_bin_packages/cpython3",
+          "cipd_bin_packages/cpython3/bin",
+          "bazelisk"
+        ],
+        "VPYTHON_VIRTUALENV_ROOT": [
+          "cache/vpython"
+        ]
+      },
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Win-Bazel-Build-RBE": {
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/python/cpython3/windows-amd64",
+          "path": "python",
+          "version": "version:3.8.0b1.chromium.1"
+        },
+        {
+          "name": "skia/bots/win_toolchain",
+          "path": "win_toolchain",
+          "version": "version:11"
+        },
+        {
+          "name": "skia/bots/bazelisk_win_amd64",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "python/bin/python3.exe",
+        "skcms/infra/bots/bazel.py",
+        "build",
+        "rbe",
+        "windows"
+      ],
+      "env_prefixes": {
+        "PATH": ["win_toolchain/sys64", "bazelisk"]
+      },
+      "dimensions": ["os:Windows-Server-17763", "gpu:none", "pool:Skia"],
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Win-Bazel-Test-RBE": {
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/python/cpython3/windows-amd64",
+          "path": "python",
+          "version": "version:3.8.0b1.chromium.1"
+        },
+        {
+          "name": "skia/bots/win_toolchain",
+          "path": "win_toolchain",
+          "version": "version:11"
+        },
+        {
+          "name": "skia/bots/bazelisk_win_amd64",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "python/bin/python3.exe",
+        "skcms/infra/bots/bazel.py",
+        "test",
+        "rbe",
+        "windows"
+      ],
+      "env_prefixes": {
+        "PATH": ["win_toolchain/sys64", "bazelisk"]
+      },
+      "dimensions": ["os:Windows-Server-17763", "gpu:none", "pool:Skia"],
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Android-ARM-Bazel-Build-RBE": {
+      "caches": [
+        {
+          "name": "vpython",
+          "path": "cache/vpython"
+        }
+      ],
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/3pp/tools/cpython3/linux-amd64",
+          "path": "cipd_bin_packages/cpython3",
+          "version": "version:2@3.8.10.chromium.19"
+        },
+        {
+          "name": "skia/bots/android_ndk_linux",
+          "path": "android_ndk_linux",
+          "version": "version:16"
+        },
+        {
+          "name": "skia/bots/bazelisk",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "cipd_bin_packages/cpython3/bin/python3",
+        "skcms/infra/bots/bazel.py",
+        "build",
+        "rbe",
+        "android-arm"
+      ],
+      "dimensions": [
+        "pool:Skia",
+        "os:Debian-10.3",
+        "gpu:none",
+        "cpu:x86-64-Haswell_GCE",
+        "machine_type:n1-highcpu-64",
+        "docker_installed:true"
+      ],
+      "env_prefixes": {
+        "ANDROID_NDK_HOME": [
+          "android_ndk_linux"
+        ],
+        "PATH": [
+          "cipd_bin_packages/cpython3",
+          "cipd_bin_packages/cpython3/bin",
+          "bazelisk"
+        ],
+        "VPYTHON_VIRTUALENV_ROOT": [
+          "cache/vpython"
+        ]
+      },
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    },
+    "skcms-Android-ARM64-Bazel-Build-RBE": {
+      "caches": [
+        {
+          "name": "vpython",
+          "path": "cache/vpython"
+        }
+      ],
+      "casSpec": "whole-repo",
+      "cipd_packages": [
+        {
+          "name": "infra/3pp/tools/cpython3/linux-amd64",
+          "path": "cipd_bin_packages/cpython3",
+          "version": "version:2@3.8.10.chromium.19"
+        },
+        {
+          "name": "skia/bots/android_ndk_linux",
+          "path": "android_ndk_linux",
+          "version": "version:16"
+        },
+        {
+          "name": "skia/bots/bazelisk",
+          "path": "bazelisk",
+          "version": "version:0"
+        }
+      ],
+      "command": [
+        "cipd_bin_packages/cpython3/bin/python3",
+        "skcms/infra/bots/bazel.py",
+        "build",
+        "rbe",
+        "android-arm64"
+      ],
+      "dimensions": [
+        "pool:Skia",
+        "os:Debian-10.3",
+        "gpu:none",
+        "cpu:x86-64-Haswell_GCE",
+        "machine_type:n1-highcpu-64",
+        "docker_installed:true"
+      ],
+      "env_prefixes": {
+        "ANDROID_NDK_HOME": [
+          "android_ndk_linux"
+        ],
+        "PATH": [
+          "cipd_bin_packages/cpython3",
+          "cipd_bin_packages/cpython3/bin",
+          "bazelisk"
+        ],
+        "VPYTHON_VIRTUALENV_ROOT": [
+          "cache/vpython"
+        ]
+      },
+      "max_attempts": 1,
+      "service_account": "skia-external-compile-tasks@skia-swarming-bots.iam.gserviceaccount.com"
+    }
+  }
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/README.md b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/README.md
new file mode 100644
index 0000000000..bfcba04898
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/README.md
@@ -0,0 +1 @@
+This directory contains project-wide configuration files for infra services.
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/project.cfg b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/project.cfg
new file mode 100644
index 0000000000..946727ba0b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/project.cfg
@@ -0,0 +1,5 @@
+# For the schema of this file and documentation, see ProjectCfg message in
+# https://luci-config.appspot.com/schemas/projects:project.cfg
+
+name: "skia-skcms"
+access: "group:all" # public
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/refs.cfg b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/refs.cfg
new file mode 100644
index 0000000000..e023102eb4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/project-config/refs.cfg
@@ -0,0 +1,7 @@
+# Refs configuration file. The documentation of the format can be found
+# at https://luci-config.appspot.com/schemas/projects:refs.cfg.
+
+refs {
+  name: "refs/heads/master"
+  config_path: "infra/branch-config"
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/infra/skcq.json b/third-party/libjxl/libjxl/third_party/skcms/infra/skcq.json
new file mode 100644
index 0000000000..47c72ce364
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/infra/skcq.json
@@ -0,0 +1,7 @@
+{
+  "visibility_type": "public",
+  "tasks_json_path": "infra/bots/tasks.json",
+  "committer_list": "project-skia-committers",
+  "dry_run_access_list": "project-skia-tryjob-access",
+  "tree_status_url": "https://tree-status.skia.org/skcms/current"
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/msvs.ninja b/third-party/libjxl/libjxl/third_party/skcms/msvs.ninja
new file mode 100644
index 0000000000..4fd1656072
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/msvs.ninja
@@ -0,0 +1,7 @@
+builddir = out
+
+subninja ninja/msvs
+subninja ninja/msvs.clang
+subninja ninja/msvs.clang-fast
+subninja ninja/msvs.fast
+subninja ninja/msvs.analyze
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android b/third-party/libjxl/libjxl/third_party/skcms/ninja/android
new file mode 100644
index 0000000000..4420d2277b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android
@@ -0,0 +1,18 @@
+ndk     = ../ndk
+cc      = $ndk/toolchains/llvm/prebuilt/*/bin/aarch64-linux-android24-clang
+cxx     = $ndk/toolchains/llvm/prebuilt/*/bin/aarch64-linux-android24-clang++
+
+cflags  = -fcolor-diagnostics -Weverything
+ldflags = -pie -fuse-ld=lld -static-libstdc++
+
+out     = out/android$mode
+
+rule run
+    command = if which adb >/dev/null && adb get-state >/dev/null 2>/dev/null; $
+              then $
+                  adb push --sync profiles /data/local/tmp >/dev/null; $
+                  adb push $in /data/local/tmp/$in >/dev/null; $
+                  adb shell "cd /data/local/tmp; taskset f ./$in" > $out; $
+              else touch $out; fi
+    description = run $in
+include ninja/common
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm
new file mode 100644
index 0000000000..c9fea90b19
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm
@@ -0,0 +1,18 @@
+ndk     = ../ndk
+cc      = $ndk/toolchains/llvm/prebuilt/*/bin/armv7a-linux-androideabi24-clang
+cxx     = $ndk/toolchains/llvm/prebuilt/*/bin/armv7a-linux-androideabi24-clang++
+
+cflags  = -fcolor-diagnostics -Weverything -mthumb
+ldflags = -pie -fuse-ld=lld -static-libstdc++
+
+out     = out/android-arm$mode
+
+rule run
+    command = if which adb >/dev/null && adb get-state >/dev/null 2>/dev/null; $
+              then $
+                  adb push --sync profiles /data/local/tmp >/dev/null; $
+                  adb push $in /data/local/tmp/$in >/dev/null; $
+                  adb shell "cd /data/local/tmp; taskset f ./$in" > $out; $
+              else touch $out; fi
+    description = run $in
+include ninja/common
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.lsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.lsan
new file mode 100644
index 0000000000..bf205c88e9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.lsan
@@ -0,0 +1,4 @@
+mode          = .lsan
+extra_cflags  = -fsanitize=leak
+extra_ldflags = -fsanitize=leak
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4 b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4
new file mode 100644
index 0000000000..8129d6aa9c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4
@@ -0,0 +1,3 @@
+mode         = .neon-vfpv4
+extra_cflags = -mfpu=neon-vfpv4
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4-O3 b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4-O3
new file mode 100644
index 0000000000..d144011401
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.neon-vfpv4-O3
@@ -0,0 +1,3 @@
+mode         = .neon-vfpv4-O3
+extra_cflags = -mfpu=neon-vfpv4 -O3
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.portable
new file mode 100644
index 0000000000..648e4ab740
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.portable
@@ -0,0 +1,3 @@
+mode         = .portable
+extra_cflags = -DSKCMS_PORTABLE
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.tiny b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.tiny
new file mode 100644
index 0000000000..ef4684b97a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.tiny
@@ -0,0 +1,3 @@
+mode         = .tiny
+extra_cflags = -g0 -DNDEBUG -fno-unwind-tables -fno-asynchronous-unwind-tables
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.vfpv2 b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.vfpv2
new file mode 100644
index 0000000000..c6ad1c9058
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android-arm.vfpv2
@@ -0,0 +1,3 @@
+mode         = .vfpv2
+extra_cflags = -mfpu=vfpv2
+include ninja/android-arm
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android.fp16 b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.fp16
new file mode 100644
index 0000000000..058a54a498
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.fp16
@@ -0,0 +1,3 @@
+mode         = .fp16
+extra_cflags = -march=armv8.2a+fp16 -DSKCMS_OPT_INTO_NEON_FP16 -Wno-implicit-float-conversion
+include ninja/android
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android.lsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.lsan
new file mode 100644
index 0000000000..7cec6b7acb
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.lsan
@@ -0,0 +1,4 @@
+mode          = .lsan
+extra_cflags  = -fsanitize=leak
+extra_ldflags = -fsanitize=leak
+include ninja/android
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android.nofp16 b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.nofp16
new file mode 100644
index 0000000000..772eb9875e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.nofp16
@@ -0,0 +1,3 @@
+mode         = .nofp16
+extra_cflags = -march=armv8.2a+fp16
+include ninja/android
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android.portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.portable
new file mode 100644
index 0000000000..2d3d6df4ea
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.portable
@@ -0,0 +1,3 @@
+mode         = .portable
+extra_cflags = -DSKCMS_PORTABLE
+include ninja/android
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/android.tiny b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.tiny
new file mode 100644
index 0000000000..a1d29b9b04
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/android.tiny
@@ -0,0 +1,3 @@
+mode         = .tiny
+extra_cflags = -g0 -DNDEBUG -fno-unwind-tables -fno-asynchronous-unwind-tables
+include ninja/android
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang
new file mode 100644
index 0000000000..1a4b7e6673
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang
@@ -0,0 +1,7 @@
+cc     = clang
+cxx    = clang++
+cflags = -fcolor-diagnostics -Weverything -ffp-contract=off
+out    = out/clang$mode
+
+include ninja/local
+include ninja/common
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.O0 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.O0
new file mode 100644
index 0000000000..1c8fb67ff4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.O0
@@ -0,0 +1,3 @@
+mode          = .O0
+extra_cflags  = -O0
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.avx512 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.avx512
new file mode 100644
index 0000000000..95ec3e6283
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.avx512
@@ -0,0 +1,5 @@
+mode         = .avx512
+extra_cflags = -march=skylake-avx512
+include ninja/clang
+
+disabled = (uname | grep -q Darwin && sysctl machdep.cpu.leaf7_features | grep -qv AVX512F || grep flags /proc/cpuinfo | grep -vq avx512f)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.lsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.lsan
new file mode 100644
index 0000000000..103c409c2c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.lsan
@@ -0,0 +1,6 @@
+mode          = .lsan
+extra_cflags  = -fsanitize=leak
+extra_ldflags = -fsanitize=leak
+include ninja/clang
+
+disabled = (uname | grep -qv Linux)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32
new file mode 100644
index 0000000000..65828a1c63
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32
@@ -0,0 +1,6 @@
+mode          = .m32
+extra_cflags  = -m32 -msse2
+extra_ldflags = -m32
+include ninja/clang
+
+disabled = (uname | grep -qv Linux)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32-O0 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32-O0
new file mode 100644
index 0000000000..435267b5a3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.m32-O0
@@ -0,0 +1,7 @@
+mode          = .m32-O0
+extra_cflags  = -m32 -msse2 -O0
+extra_ldflags = -m32
+include ninja/clang
+
+disabled = (uname | grep -qv Linux)
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.msan b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.msan
new file mode 100644
index 0000000000..a58f3fa2a8
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.msan
@@ -0,0 +1,6 @@
+mode          = .msan
+extra_cflags  = -fsanitize=memory
+extra_ldflags = -fsanitize=memory
+include ninja/clang
+
+disabled = (uname | grep -qv Linux)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.native b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.native
new file mode 100644
index 0000000000..1104e28bfe
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.native
@@ -0,0 +1,3 @@
+mode         = .native
+extra_cflags = -march=native
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.portable
new file mode 100644
index 0000000000..c2adea2084
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.portable
@@ -0,0 +1,3 @@
+mode         = .portable
+extra_cflags = -DSKCMS_PORTABLE
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse2 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse2
new file mode 100644
index 0000000000..0b48577795
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse2
@@ -0,0 +1,4 @@
+mode         = .sse2
+extra_cflags = -msse2 -mno-sse3 -mno-ssse3 -mno-sse4.1 -DSKCMS_NO_RUNTIME_CPU_DETECTION
+include ninja/clang
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse41 b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse41
new file mode 100644
index 0000000000..86215cfd85
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.sse41
@@ -0,0 +1,4 @@
+mode         = .sse41
+extra_cflags = -msse4.1 -DSKCMS_NO_RUNTIME_CPU_DETECTION
+include ninja/clang
+
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.tiny b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.tiny
new file mode 100644
index 0000000000..96eec2ce7d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.tiny
@@ -0,0 +1,3 @@
+mode         = .tiny
+extra_cflags = -g0 -DNDEBUG -fno-unwind-tables -fno-asynchronous-unwind-tables
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan
new file mode 100644
index 0000000000..32d3d3ca2f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan
@@ -0,0 +1,4 @@
+mode          = .xsan
+extra_cflags  = -fsanitize=address,integer,undefined -fno-sanitize-recover=all
+extra_ldflags = -fsanitize=address,integer,undefined
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan-portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan-portable
new file mode 100644
index 0000000000..48ae259f7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/clang.xsan-portable
@@ -0,0 +1,4 @@
+mode          = .xsan-portable
+extra_cflags  = -fsanitize=address,integer,undefined -fno-sanitize-recover=all -DSKCMS_PORTABLE
+extra_ldflags = -fsanitize=address,integer,undefined
+include ninja/clang
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/common b/third-party/libjxl/libjxl/third_party/skcms/ninja/common
new file mode 100644
index 0000000000..147a569b72
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/common
@@ -0,0 +1,40 @@
+builddir = $out
+disabled = false
+
+# Each compiler has enabled all the warnings it can.
+# Here we make them errors, and disable a few we don't want bothering us.
+warnings = -Werror $
+           -Wno-unknown-warning-option $
+           -Wno-poison-system-directories $
+           -Wno-double-promotion $
+           -Wno-float-equal $
+           -Wno-padded $
+
+warnings_c = $warnings $
+           -Wno-declaration-after-statement $
+
+warnings_cc = $warnings $
+           -Wno-c++98-compat-pedantic $
+           -Wno-gnu-anonymous-struct $
+           -Wno-old-style-cast $
+
+
+rule compile_c
+    command = $disabled && touch $out || $cc -std=c11 -g -Os $warnings_c $cflags $extra_cflags $
+             -MD -MF $out.d -c $in -o $out
+    depfile = $out.d
+    deps    = gcc
+    description = compile $out
+
+rule compile_cc
+    command = $disabled && touch $out || $cxx -std=c++11 -g -Os $warnings_cc $cflags $extra_cflags $
+             -MD -MF $out.d -c $in -o $out
+    depfile = $out.d
+    deps    = gcc
+    description = compile $out
+
+rule link
+    command = $disabled && touch $out || $cxx $ldflags $extra_ldflags $in -ldl -o $out
+    description = link $out
+
+include ninja/targets
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten b/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten
new file mode 100644
index 0000000000..6961beb5a7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten
@@ -0,0 +1,19 @@
+cc      = emcc
+cxx     = em++
+cflags  = -g3 -s WASM=1 -s ENVIRONMENT=node
+ldflags = -g3 -s WASM=1 -s ENVIRONMENT=node --embed-file profiles
+exe     = .js
+out     = out/emscripten$mode
+
+node = 'node'
+
+rule run
+    command = if which $node >/dev/null; $
+              then $
+                  $node $in > $out; $
+              else touch $out; fi
+    description = run $in
+
+include ninja/common
+
+disabled = (uname | grep -qv Linux)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten.simd b/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten.simd
new file mode 100644
index 0000000000..e88c8188aa
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/emscripten.simd
@@ -0,0 +1,5 @@
+mode          = .simd
+extra_cflags  = -s SIMD=1
+extra_ldflags = -s SIMD=1
+
+include ninja/emscripten
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc
new file mode 100644
index 0000000000..156c8162ff
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc
@@ -0,0 +1,9 @@
+cc     = gcc
+cxx    = g++
+cflags = -fdiagnostics-color -Wall -Wextra -ffp-contract=off -fstack-usage
+out    = out/gcc$mode
+
+include ninja/local
+include ninja/common
+
+disabled = (uname | grep -qv Linux)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.O0 b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.O0
new file mode 100644
index 0000000000..3bcb94b627
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.O0
@@ -0,0 +1,3 @@
+mode          = .O0
+extra_cflags  = -O0
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32 b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32
new file mode 100644
index 0000000000..d738d9a2d7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32
@@ -0,0 +1,4 @@
+mode          = .m32
+extra_cflags  = -m32 -msse2 -mfpmath=sse
+extra_ldflags = -m32
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32-O0 b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32-O0
new file mode 100644
index 0000000000..aac54769d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.m32-O0
@@ -0,0 +1,4 @@
+mode          = .m32-O0
+extra_cflags  = -m32 -msse2 -mfpmath=sse -O0
+extra_ldflags = -m32
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.musl b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.musl
new file mode 100644
index 0000000000..8f1e240e3d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.musl
@@ -0,0 +1,6 @@
+mode          = .musl
+extra_ldflags = -static
+include ninja/gcc
+
+cc     = env REALGCC=gcc musl-gcc
+cxx    = env REALGCC=g++ musl-gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.native b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.native
new file mode 100644
index 0000000000..e99cda6523
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.native
@@ -0,0 +1,3 @@
+mode         = .native
+extra_cflags = -march=native
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.portable
new file mode 100644
index 0000000000..e52da396a0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.portable
@@ -0,0 +1,3 @@
+mode         = .portable
+extra_cflags = -DSKCMS_PORTABLE
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.tiny b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.tiny
new file mode 100644
index 0000000000..80351d2818
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.tiny
@@ -0,0 +1,3 @@
+mode         = .tiny
+extra_cflags = -g0 -DNDEBUG -fno-unwind-tables -fno-asynchronous-unwind-tables
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.xsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.xsan
new file mode 100644
index 0000000000..97afdfd63c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/gcc.xsan
@@ -0,0 +1,4 @@
+mode          = .xsan
+extra_cflags  = -fsanitize=address,undefined -fno-sanitize-recover=all
+extra_ldflags = -fsanitize=address,undefined
+include ninja/gcc
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/ios b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios
new file mode 100644
index 0000000000..1b1bd33180
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios
@@ -0,0 +1,11 @@
+cc     = clang   -arch arm64 -isysroot `xcrun --sdk iphoneos --show-sdk-path`
+cxx    = clang++ -arch arm64 -isysroot `xcrun --sdk iphoneos --show-sdk-path`
+cflags = -fcolor-diagnostics -Weverything
+out    = out/ios$mode
+
+rule run
+    command = touch $out
+    description = skipping $in
+include ninja/common
+
+disabled = (uname | grep -qv Darwin)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.portable b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.portable
new file mode 100644
index 0000000000..9fae40892c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.portable
@@ -0,0 +1,3 @@
+mode         = .portable
+extra_cflags = -DSKCMS_PORTABLE
+include ninja/ios
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.tiny b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.tiny
new file mode 100644
index 0000000000..8cd40eb493
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.tiny
@@ -0,0 +1,3 @@
+mode         = .tiny
+extra_cflags = -g0 -DNDEBUG -fno-unwind-tables -fno-asynchronous-unwind-tables
+include ninja/ios
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.xsan b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.xsan
new file mode 100644
index 0000000000..da4ee7528f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/ios.xsan
@@ -0,0 +1,7 @@
+mode          = .xsan
+extra_cflags  = -fsanitize=address,integer,undefined -fno-sanitize-recover=all
+extra_ldflags = -fsanitize=address,integer,undefined
+include ninja/ios
+
+cc = /usr/bin/clang
+cxx = /usr/bin/clang++
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/local b/third-party/libjxl/libjxl/third_party/skcms/ninja/local
new file mode 100644
index 0000000000..a24755c497
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/local
@@ -0,0 +1,3 @@
+rule run
+    command = $disabled && touch $out || ./$in > $out
+    description = run $in
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs
new file mode 100644
index 0000000000..2e350c3275
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs
@@ -0,0 +1,27 @@
+cl       = cl.exe
+cflags   = /W4 /wd"4200" /wd"4201" /wd"4204" /wd"4221"
+out      = out/msvs$mode
+exe      = .exe
+builddir = $out
+
+rule run
+    command = cmd /c ""$in" > "$out""
+    description = run $in
+
+rule compile_c
+    command = $cl /c /showIncludes /nologo /Zi /WX /MT /Fo"$out" /Fd"$out.pdb" $
+              $cflags $extra_cflags $in
+    deps = msvc
+    description = compile $out
+
+rule compile_cc
+    command = $cl /c /showIncludes /nologo /Zi /WX /MT /Fo"$out" /Fd"$out.pdb" $
+              $cflags $extra_cflags $in
+    deps = msvc
+    description = compile $out
+
+rule link
+    command = link.exe /nologo /DEBUG $extra_ldflags $in /OUT:"$out" /PDB:"$out.pdb"
+    description = link $out
+
+include ninja/targets
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.analyze b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.analyze
new file mode 100644
index 0000000000..95a60ac8c3
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.analyze
@@ -0,0 +1,4 @@
+mode          = .analyze
+extra_cflags  = /analyze /RTCcsu /guard:cf
+extra_ldflags = /guard:cf
+include ninja/msvs
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang
new file mode 100644
index 0000000000..a277e8d899
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang
@@ -0,0 +1,5 @@
+mode = .clang
+
+include ninja/msvs
+
+cl = "../clang_win/bin/clang-cl.exe"
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang-fast b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang-fast
new file mode 100644
index 0000000000..d963711923
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.clang-fast
@@ -0,0 +1,6 @@
+mode = .clang-fast
+extra_cflags = /O2 /Zc:inline
+
+include ninja/msvs
+
+cl = "../clang_win/bin/clang-cl.exe"
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.fast b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.fast
new file mode 100644
index 0000000000..e111fd33a6
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/msvs.fast
@@ -0,0 +1,3 @@
+mode         = .fast
+extra_cflags = /O2 /Zc:inline
+include ninja/msvs
diff --git a/third-party/libjxl/libjxl/third_party/skcms/ninja/targets b/third-party/libjxl/libjxl/third_party/skcms/ninja/targets
new file mode 100644
index 0000000000..54ff8c98f4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/ninja/targets
@@ -0,0 +1,30 @@
+build $out/skcms.o: compile_cc skcms.cc
+
+build $out/test_only.o: compile_c test_only.c
+
+build $out/tests.o:   compile_c tests.c
+build $out/tests$exe: link $out/skcms.o $out/tests.o $out/test_only.o
+build $out/tests.ok:  run  $out/tests$exe
+
+build $out/bench.o:   compile_c bench.c
+build $out/bench$exe: link $out/skcms.o $out/bench.o
+
+build $out/iccdump.o:   compile_c iccdump.c
+build $out/iccdump$exe: link $out/skcms.o $out/iccdump.o $out/test_only.o
+
+build $out/fuzz/fuzz_main.o: compile_c fuzz/fuzz_main.c
+
+build $out/fuzz/fuzz_iccprofile_atf.o: compile_c fuzz/fuzz_iccprofile_atf.c
+build $out/fuzz_iccprofile_atf$exe:    link $out/fuzz/fuzz_iccprofile_atf.o $
+                                            $out/fuzz/fuzz_main.o $
+                                            $out/skcms.o
+
+build $out/fuzz/fuzz_iccprofile_info.o: compile_c fuzz/fuzz_iccprofile_info.c
+build $out/fuzz_iccprofile_info$exe:    link $out/fuzz/fuzz_iccprofile_info.o $
+                                             $out/fuzz/fuzz_main.o $
+                                             $out/skcms.o
+
+build $out/fuzz/fuzz_iccprofile_transform.o: compile_c fuzz/fuzz_iccprofile_transform.c
+build $out/fuzz_iccprofile_transform$exe:    link $out/fuzz/fuzz_iccprofile_transform.o $
+                                                  $out/fuzz/fuzz_main.o $
+                                                  $out/skcms.o
diff --git a/third-party/libjxl/libjxl/third_party/skcms/skcms.cc b/third-party/libjxl/libjxl/third_party/skcms/skcms.cc
new file mode 100644
index 0000000000..4db244df86
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/skcms.cc
@@ -0,0 +1,3045 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "skcms.h"
+#include "skcms_internal.h"
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_NEON)
+    #include <arm_neon.h>
+#elif defined(__SSE__)
+    #include <immintrin.h>
+
+    #if defined(__clang__)
+        // That #include <immintrin.h> is usually enough, but Clang's headers
+        // "helpfully" skip including the whole kitchen sink when _MSC_VER is
+        // defined, because lots of programs on Windows would include that and
+        // it'd be a lot slower.  But we want all those headers included so we
+        // can use their features after runtime checks later.
+        #include <smmintrin.h>
+        #include <avxintrin.h>
+        #include <avx2intrin.h>
+        #include <avx512fintrin.h>
+        #include <avx512dqintrin.h>
+    #endif
+#endif
+
+static bool runtime_cpu_detection = true;
+void skcms_DisableRuntimeCPUDetection() {
+    runtime_cpu_detection = false;
+}
+
+// sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
+// We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
+//
+// Please do not use sizeof() directly, and size_t only when required.
+// (We have no way of enforcing these requests...)
+#define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
+
+// Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
+#define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
+
+static const union {
+    uint32_t bits;
+    float    f;
+} inf_ = { 0x7f800000 };
+#define INFINITY_ inf_.f
+
+#if defined(__clang__) || defined(__GNUC__)
+    #define small_memcpy __builtin_memcpy
+#else
+    #define small_memcpy memcpy
+#endif
+
+static float log2f_(float x) {
+    // The first approximation of log2(x) is its exponent 'e', minus 127.
+    int32_t bits;
+    small_memcpy(&bits, &x, sizeof(bits));
+
+    float e = (float)bits * (1.0f / (1<<23));
+
+    // If we use the mantissa too we can refine the error signficantly.
+    int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
+    float m;
+    small_memcpy(&m, &m_bits, sizeof(m));
+
+    return (e - 124.225514990f
+              -   1.498030302f*m
+              -   1.725879990f/(0.3520887068f + m));
+}
+static float logf_(float x) {
+    const float ln2 = 0.69314718f;
+    return ln2*log2f_(x);
+}
+
+static float exp2f_(float x) {
+    float fract = x - floorf_(x);
+
+    float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
+                                        -   1.490129070f*fract
+                                        +  27.728023300f/(4.84252568f - fract));
+
+    // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
+    // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
+    // Negative values are effectively underflow - we'll end up returning a (different) negative
+    // value, which makes no sense. So clamp to zero.
+    if (fbits >= (float)INT_MAX) {
+        return INFINITY_;
+    } else if (fbits < 0) {
+        return 0;
+    }
+
+    int32_t bits = (int32_t)fbits;
+    small_memcpy(&x, &bits, sizeof(x));
+    return x;
+}
+
+// Not static, as it's used by some test tools.
+float powf_(float x, float y) {
+    assert (x >= 0);
+    return (x == 0) || (x == 1) ? x
+                                : exp2f_(log2f_(x) * y);
+}
+
+static float expf_(float x) {
+    const float log2_e = 1.4426950408889634074f;
+    return exp2f_(log2_e * x);
+}
+
+static float fmaxf_(float x, float y) { return x > y ? x : y; }
+static float fminf_(float x, float y) { return x < y ? x : y; }
+
+static bool isfinitef_(float x) { return 0 == x*0; }
+
+static float minus_1_ulp(float x) {
+    int32_t bits;
+    memcpy(&bits, &x, sizeof(bits));
+    bits = bits - 1;
+    memcpy(&x, &bits, sizeof(bits));
+    return x;
+}
+
+// Most transfer functions we work with are sRGBish.
+// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
+// and repurpose the other fields to hold the parameters of the HDR functions.
+enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
+struct TF_PQish  { float A,B,C,D,E,F; };
+struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
+// We didn't originally support a scale factor K for HLG, and instead just stored 0 in
+// the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
+// By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
+
+static float TFKind_marker(TFKind kind) {
+    // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
+    return -(float)kind;
+}
+
+static TFKind classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
+                                                       , TF_HLGish* hlg = nullptr) {
+    if (tf.g < 0 && static_cast<float>(static_cast<int>(tf.g)) == tf.g) {
+        // TODO: soundness checks for PQ/HLG like we do for sRGBish?
+        switch ((int)tf.g) {
+            case -PQish:     if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
+            case -HLGish:    if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
+            case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
+        }
+        return Bad;
+    }
+
+    // Basic soundness checks for sRGBish transfer functions.
+    if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
+            // a,c,d,g should be non-negative to make any sense.
+            && tf.a >= 0
+            && tf.c >= 0
+            && tf.d >= 0
+            && tf.g >= 0
+            // Raising a negative value to a fractional tf->g produces complex numbers.
+            && tf.a * tf.d + tf.b >= 0) {
+        return sRGBish;
+    }
+
+    return Bad;
+}
+
+bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
+    return classify(*tf) == sRGBish;
+}
+bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
+    return classify(*tf) == PQish;
+}
+bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
+    return classify(*tf) == HLGish;
+}
+
+bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
+                                      float A, float B, float C,
+                                      float D, float E, float F) {
+    *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
+    assert(skcms_TransferFunction_isPQish(tf));
+    return true;
+}
+
+bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
+                                             float K, float R, float G,
+                                             float a, float b, float c) {
+    *tf = { TFKind_marker(HLGish), R,G, a,b,c, K-1.0f };
+    assert(skcms_TransferFunction_isHLGish(tf));
+    return true;
+}
+
+float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
+    float sign = x < 0 ? -1.0f : 1.0f;
+    x *= sign;
+
+    TF_PQish  pq;
+    TF_HLGish hlg;
+    switch (classify(*tf, &pq, &hlg)) {
+        case Bad:       break;
+
+        case HLGish: {
+            const float K = hlg.K_minus_1 + 1.0f;
+            return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
+                                            : expf_((x-hlg.c)*hlg.a) + hlg.b);
+        }
+
+        // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
+        case HLGinvish: {
+            const float K = hlg.K_minus_1 + 1.0f;
+            x /= K;
+            return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
+                                  : hlg.a * logf_(x - hlg.b) + hlg.c);
+        }
+
+
+        case sRGBish: return sign * (x < tf->d ?       tf->c * x + tf->f
+                                               : powf_(tf->a * x + tf->b, tf->g) + tf->e);
+
+        case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
+                                            / (pq.D + pq.E * powf_(x, pq.C)),
+                                        pq.F);
+    }
+    return 0;
+}
+
+
+static float eval_curve(const skcms_Curve* curve, float x) {
+    if (curve->table_entries == 0) {
+        return skcms_TransferFunction_eval(&curve->parametric, x);
+    }
+
+    float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
+    int   lo = (int)                   ix        ,
+          hi = (int)(float)minus_1_ulp(ix + 1.0f);
+    float t = ix - (float)lo;
+
+    float l, h;
+    if (curve->table_8) {
+        l = curve->table_8[lo] * (1/255.0f);
+        h = curve->table_8[hi] * (1/255.0f);
+    } else {
+        uint16_t be_l, be_h;
+        memcpy(&be_l, curve->table_16 + 2*lo, 2);
+        memcpy(&be_h, curve->table_16 + 2*hi, 2);
+        uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
+        uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
+        l = le_l * (1/65535.0f);
+        h = le_h * (1/65535.0f);
+    }
+    return l + (h-l)*t;
+}
+
+float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
+    uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
+    const float dx = 1.0f / static_cast<float>(N - 1);
+    float err = 0;
+    for (uint32_t i = 0; i < N; i++) {
+        float x = static_cast<float>(i) * dx,
+              y = eval_curve(curve, x);
+        err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
+    }
+    return err;
+}
+
+bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
+    return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
+}
+
+// Additional ICC signature values that are only used internally
+enum {
+    // File signature
+    skcms_Signature_acsp = 0x61637370,
+
+    // Tag signatures
+    skcms_Signature_rTRC = 0x72545243,
+    skcms_Signature_gTRC = 0x67545243,
+    skcms_Signature_bTRC = 0x62545243,
+    skcms_Signature_kTRC = 0x6B545243,
+
+    skcms_Signature_rXYZ = 0x7258595A,
+    skcms_Signature_gXYZ = 0x6758595A,
+    skcms_Signature_bXYZ = 0x6258595A,
+
+    skcms_Signature_A2B0 = 0x41324230,
+    skcms_Signature_B2A0 = 0x42324130,
+
+    skcms_Signature_CHAD = 0x63686164,
+    skcms_Signature_WTPT = 0x77747074,
+
+    skcms_Signature_CICP = 0x63696370,
+
+    // Type signatures
+    skcms_Signature_curv = 0x63757276,
+    skcms_Signature_mft1 = 0x6D667431,
+    skcms_Signature_mft2 = 0x6D667432,
+    skcms_Signature_mAB  = 0x6D414220,
+    skcms_Signature_mBA  = 0x6D424120,
+    skcms_Signature_para = 0x70617261,
+    skcms_Signature_sf32 = 0x73663332,
+    // XYZ is also a PCS signature, so it's defined in skcms.h
+    // skcms_Signature_XYZ = 0x58595A20,
+};
+
+static uint16_t read_big_u16(const uint8_t* ptr) {
+    uint16_t be;
+    memcpy(&be, ptr, sizeof(be));
+#if defined(_MSC_VER)
+    return _byteswap_ushort(be);
+#else
+    return __builtin_bswap16(be);
+#endif
+}
+
+static uint32_t read_big_u32(const uint8_t* ptr) {
+    uint32_t be;
+    memcpy(&be, ptr, sizeof(be));
+#if defined(_MSC_VER)
+    return _byteswap_ulong(be);
+#else
+    return __builtin_bswap32(be);
+#endif
+}
+
+static int32_t read_big_i32(const uint8_t* ptr) {
+    return (int32_t)read_big_u32(ptr);
+}
+
+static float read_big_fixed(const uint8_t* ptr) {
+    return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
+}
+
+// Maps to an in-memory profile so that fields line up to the locations specified
+// in ICC.1:2010, section 7.2
+typedef struct {
+    uint8_t size                [ 4];
+    uint8_t cmm_type            [ 4];
+    uint8_t version             [ 4];
+    uint8_t profile_class       [ 4];
+    uint8_t data_color_space    [ 4];
+    uint8_t pcs                 [ 4];
+    uint8_t creation_date_time  [12];
+    uint8_t signature           [ 4];
+    uint8_t platform            [ 4];
+    uint8_t flags               [ 4];
+    uint8_t device_manufacturer [ 4];
+    uint8_t device_model        [ 4];
+    uint8_t device_attributes   [ 8];
+    uint8_t rendering_intent    [ 4];
+    uint8_t illuminant_X        [ 4];
+    uint8_t illuminant_Y        [ 4];
+    uint8_t illuminant_Z        [ 4];
+    uint8_t creator             [ 4];
+    uint8_t profile_id          [16];
+    uint8_t reserved            [28];
+    uint8_t tag_count           [ 4]; // Technically not part of header, but required
+} header_Layout;
+
+typedef struct {
+    uint8_t signature [4];
+    uint8_t offset    [4];
+    uint8_t size      [4];
+} tag_Layout;
+
+static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
+    return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
+}
+
+// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
+// use of the type is for the CHAD tag that stores exactly nine values.
+typedef struct {
+    uint8_t type     [ 4];
+    uint8_t reserved [ 4];
+    uint8_t values   [36];
+} sf32_Layout;
+
+bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
+    skcms_ICCTag tag;
+    if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
+        return false;
+    }
+
+    if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
+        return false;
+    }
+
+    const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
+    const uint8_t* values = sf32Tag->values;
+    for (int r = 0; r < 3; ++r)
+    for (int c = 0; c < 3; ++c, values += 4) {
+        m->vals[r][c] = read_big_fixed(values);
+    }
+    return true;
+}
+
+// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
+// the type are for tags/data that store exactly one triple.
+typedef struct {
+    uint8_t type     [4];
+    uint8_t reserved [4];
+    uint8_t X        [4];
+    uint8_t Y        [4];
+    uint8_t Z        [4];
+} XYZ_Layout;
+
+static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
+    if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
+        return false;
+    }
+
+    const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
+
+    *x = read_big_fixed(xyzTag->X);
+    *y = read_big_fixed(xyzTag->Y);
+    *z = read_big_fixed(xyzTag->Z);
+    return true;
+}
+
+bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
+    skcms_ICCTag tag;
+    return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
+           read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
+}
+
+static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
+                           const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
+    return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
+           read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
+           read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
+}
+
+typedef struct {
+    uint8_t type          [4];
+    uint8_t reserved_a    [4];
+    uint8_t function_type [2];
+    uint8_t reserved_b    [2];
+    uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
+} para_Layout;
+
+static bool read_curve_para(const uint8_t* buf, uint32_t size,
+                            skcms_Curve* curve, uint32_t* curve_size) {
+    if (size < SAFE_FIXED_SIZE(para_Layout)) {
+        return false;
+    }
+
+    const para_Layout* paraTag = (const para_Layout*)buf;
+
+    enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
+    uint16_t function_type = read_big_u16(paraTag->function_type);
+    if (function_type > kGABCDEF) {
+        return false;
+    }
+
+    static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
+    if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
+        return false;
+    }
+
+    if (curve_size) {
+        *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
+    }
+
+    curve->table_entries = 0;
+    curve->parametric.a  = 1.0f;
+    curve->parametric.b  = 0.0f;
+    curve->parametric.c  = 0.0f;
+    curve->parametric.d  = 0.0f;
+    curve->parametric.e  = 0.0f;
+    curve->parametric.f  = 0.0f;
+    curve->parametric.g  = read_big_fixed(paraTag->variable);
+
+    switch (function_type) {
+        case kGAB:
+            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
+            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
+            if (curve->parametric.a == 0) {
+                return false;
+            }
+            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
+            break;
+        case kGABC:
+            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
+            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
+            curve->parametric.e = read_big_fixed(paraTag->variable + 12);
+            if (curve->parametric.a == 0) {
+                return false;
+            }
+            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
+            curve->parametric.f = curve->parametric.e;
+            break;
+        case kGABCD:
+            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
+            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
+            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
+            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
+            break;
+        case kGABCDEF:
+            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
+            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
+            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
+            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
+            curve->parametric.e = read_big_fixed(paraTag->variable + 20);
+            curve->parametric.f = read_big_fixed(paraTag->variable + 24);
+            break;
+    }
+    return skcms_TransferFunction_isSRGBish(&curve->parametric);
+}
+
+typedef struct {
+    uint8_t type          [4];
+    uint8_t reserved      [4];
+    uint8_t value_count   [4];
+    uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
+} curv_Layout;
+
+static bool read_curve_curv(const uint8_t* buf, uint32_t size,
+                            skcms_Curve* curve, uint32_t* curve_size) {
+    if (size < SAFE_FIXED_SIZE(curv_Layout)) {
+        return false;
+    }
+
+    const curv_Layout* curvTag = (const curv_Layout*)buf;
+
+    uint32_t value_count = read_big_u32(curvTag->value_count);
+    if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
+        return false;
+    }
+
+    if (curve_size) {
+        *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
+    }
+
+    if (value_count < 2) {
+        curve->table_entries = 0;
+        curve->parametric.a  = 1.0f;
+        curve->parametric.b  = 0.0f;
+        curve->parametric.c  = 0.0f;
+        curve->parametric.d  = 0.0f;
+        curve->parametric.e  = 0.0f;
+        curve->parametric.f  = 0.0f;
+        if (value_count == 0) {
+            // Empty tables are a shorthand for an identity curve
+            curve->parametric.g = 1.0f;
+        } else {
+            // Single entry tables are a shorthand for simple gamma
+            curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
+        }
+    } else {
+        curve->table_8       = nullptr;
+        curve->table_16      = curvTag->variable;
+        curve->table_entries = value_count;
+    }
+
+    return true;
+}
+
+// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
+// If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
+static bool read_curve(const uint8_t* buf, uint32_t size,
+                       skcms_Curve* curve, uint32_t* curve_size) {
+    if (!buf || size < 4 || !curve) {
+        return false;
+    }
+
+    uint32_t type = read_big_u32(buf);
+    if (type == skcms_Signature_para) {
+        return read_curve_para(buf, size, curve, curve_size);
+    } else if (type == skcms_Signature_curv) {
+        return read_curve_curv(buf, size, curve, curve_size);
+    }
+
+    return false;
+}
+
+// mft1 and mft2 share a large chunk of data
+typedef struct {
+    uint8_t type                 [ 4];
+    uint8_t reserved_a           [ 4];
+    uint8_t input_channels       [ 1];
+    uint8_t output_channels      [ 1];
+    uint8_t grid_points          [ 1];
+    uint8_t reserved_b           [ 1];
+    uint8_t matrix               [36];
+} mft_CommonLayout;
+
+typedef struct {
+    mft_CommonLayout common      [1];
+
+    uint8_t variable             [1/*variable*/];
+} mft1_Layout;
+
+typedef struct {
+    mft_CommonLayout common      [1];
+
+    uint8_t input_table_entries  [2];
+    uint8_t output_table_entries [2];
+    uint8_t variable             [1/*variable*/];
+} mft2_Layout;
+
+static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
+    // MFT matrices are applied before the first set of curves, but must be identity unless the
+    // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
+    // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
+    // field/flag.
+    a2b->matrix_channels = 0;
+    a2b-> input_channels = mftTag-> input_channels[0];
+    a2b->output_channels = mftTag->output_channels[0];
+
+    // We require exactly three (ie XYZ/Lab/RGB) output channels
+    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
+        return false;
+    }
+    // We require at least one, and no more than four (ie CMYK) input channels
+    if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
+        return false;
+    }
+
+    for (uint32_t i = 0; i < a2b->input_channels; ++i) {
+        a2b->grid_points[i] = mftTag->grid_points[0];
+    }
+    // The grid only makes sense with at least two points along each axis
+    if (a2b->grid_points[0] < 2) {
+        return false;
+    }
+    return true;
+}
+
+// All as the A2B version above, except where noted.
+static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
+    // Same as A2B.
+    b2a->matrix_channels = 0;
+    b2a-> input_channels = mftTag-> input_channels[0];
+    b2a->output_channels = mftTag->output_channels[0];
+
+
+    // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
+    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
+        return false;
+    }
+    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
+        return false;
+    }
+
+    // Same as A2B.
+    for (uint32_t i = 0; i < b2a->input_channels; ++i) {
+        b2a->grid_points[i] = mftTag->grid_points[0];
+    }
+    if (b2a->grid_points[0] < 2) {
+        return false;
+    }
+    return true;
+}
+
+template <typename A2B_or_B2A>
+static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
+                        uint32_t input_table_entries, uint32_t output_table_entries,
+                        A2B_or_B2A* out) {
+    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
+    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
+    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
+
+    // [input|output]_channels are <= 4, so still no overflow
+    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
+    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
+
+    uint64_t grid_size = out->output_channels * byte_width;
+    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
+        grid_size *= out->grid_points[axis];
+    }
+
+    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
+        return false;
+    }
+
+    for (uint32_t i = 0; i < out->input_channels; ++i) {
+        out->input_curves[i].table_entries = input_table_entries;
+        if (byte_width == 1) {
+            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
+            out->input_curves[i].table_16 = nullptr;
+        } else {
+            out->input_curves[i].table_8  = nullptr;
+            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
+        }
+    }
+
+    if (byte_width == 1) {
+        out->grid_8  = table_base + byte_len_all_input_tables;
+        out->grid_16 = nullptr;
+    } else {
+        out->grid_8  = nullptr;
+        out->grid_16 = table_base + byte_len_all_input_tables;
+    }
+
+    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
+    for (uint32_t i = 0; i < out->output_channels; ++i) {
+        out->output_curves[i].table_entries = output_table_entries;
+        if (byte_width == 1) {
+            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
+            out->output_curves[i].table_16 = nullptr;
+        } else {
+            out->output_curves[i].table_8  = nullptr;
+            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
+        }
+    }
+
+    return true;
+}
+
+template <typename A2B_or_B2A>
+static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
+    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
+        return false;
+    }
+
+    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
+    if (!read_mft_common(mftTag->common, out)) {
+        return false;
+    }
+
+    uint32_t input_table_entries  = 256;
+    uint32_t output_table_entries = 256;
+
+    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
+                       input_table_entries, output_table_entries, out);
+}
+
+template <typename A2B_or_B2A>
+static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
+    if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
+        return false;
+    }
+
+    const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
+    if (!read_mft_common(mftTag->common, out)) {
+        return false;
+    }
+
+    uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
+    uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
+
+    // ICC spec mandates that 2 <= table_entries <= 4096
+    if (input_table_entries < 2 || input_table_entries > 4096 ||
+        output_table_entries < 2 || output_table_entries > 4096) {
+        return false;
+    }
+
+    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
+                       input_table_entries, output_table_entries, out);
+}
+
+static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
+                        uint32_t num_curves, skcms_Curve* curves) {
+    for (uint32_t i = 0; i < num_curves; ++i) {
+        if (curve_offset > size) {
+            return false;
+        }
+
+        uint32_t curve_bytes;
+        if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
+            return false;
+        }
+
+        if (curve_bytes > UINT32_MAX - 3) {
+            return false;
+        }
+        curve_bytes = (curve_bytes + 3) & ~3U;
+
+        uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
+        curve_offset = (uint32_t)new_offset_64;
+        if (new_offset_64 != curve_offset) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// mAB and mBA tags use the same encoding, including color lookup tables.
+typedef struct {
+    uint8_t type                 [ 4];
+    uint8_t reserved_a           [ 4];
+    uint8_t input_channels       [ 1];
+    uint8_t output_channels      [ 1];
+    uint8_t reserved_b           [ 2];
+    uint8_t b_curve_offset       [ 4];
+    uint8_t matrix_offset        [ 4];
+    uint8_t m_curve_offset       [ 4];
+    uint8_t clut_offset          [ 4];
+    uint8_t a_curve_offset       [ 4];
+} mAB_or_mBA_Layout;
+
+typedef struct {
+    uint8_t grid_points          [16];
+    uint8_t grid_byte_width      [ 1];
+    uint8_t reserved             [ 3];
+    uint8_t variable             [1/*variable*/];
+} CLUT_Layout;
+
+static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
+    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
+        return false;
+    }
+
+    const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
+
+    a2b->input_channels  = mABTag->input_channels[0];
+    a2b->output_channels = mABTag->output_channels[0];
+
+    // We require exactly three (ie XYZ/Lab/RGB) output channels
+    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
+        return false;
+    }
+    // We require no more than four (ie CMYK) input channels
+    if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
+        return false;
+    }
+
+    uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
+    uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
+    uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
+    uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
+    uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
+
+    // "B" curves must be present
+    if (0 == b_curve_offset) {
+        return false;
+    }
+
+    if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
+                     a2b->output_curves)) {
+        return false;
+    }
+
+    // "M" curves and Matrix must be used together
+    if (0 != m_curve_offset) {
+        if (0 == matrix_offset) {
+            return false;
+        }
+        a2b->matrix_channels = a2b->output_channels;
+        if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
+                         a2b->matrix_curves)) {
+            return false;
+        }
+
+        // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
+        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
+            return false;
+        }
+        float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
+        const uint8_t* mtx_buf = tag->buf + matrix_offset;
+        a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
+        a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
+        a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
+        a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
+        a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
+        a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
+        a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
+        a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
+        a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
+        a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
+        a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
+        a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
+    } else {
+        if (0 != matrix_offset) {
+            return false;
+        }
+        a2b->matrix_channels = 0;
+    }
+
+    // "A" curves and CLUT must be used together
+    if (0 != a_curve_offset) {
+        if (0 == clut_offset) {
+            return false;
+        }
+        if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
+                         a2b->input_curves)) {
+            return false;
+        }
+
+        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
+            return false;
+        }
+        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
+
+        if (clut->grid_byte_width[0] == 1) {
+            a2b->grid_8  = clut->variable;
+            a2b->grid_16 = nullptr;
+        } else if (clut->grid_byte_width[0] == 2) {
+            a2b->grid_8  = nullptr;
+            a2b->grid_16 = clut->variable;
+        } else {
+            return false;
+        }
+
+        uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
+        for (uint32_t i = 0; i < a2b->input_channels; ++i) {
+            a2b->grid_points[i] = clut->grid_points[i];
+            // The grid only makes sense with at least two points along each axis
+            if (a2b->grid_points[i] < 2) {
+                return false;
+            }
+            grid_size *= a2b->grid_points[i];
+        }
+        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
+            return false;
+        }
+    } else {
+        if (0 != clut_offset) {
+            return false;
+        }
+
+        // If there is no CLUT, the number of input and output channels must match
+        if (a2b->input_channels != a2b->output_channels) {
+            return false;
+        }
+
+        // Zero out the number of input channels to signal that we're skipping this stage
+        a2b->input_channels = 0;
+    }
+
+    return true;
+}
+
+// Exactly the same as read_tag_mab(), except where there are comments.
+// TODO: refactor the two to eliminate common code?
+static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
+    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
+        return false;
+    }
+
+    const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
+
+    b2a->input_channels  = mBATag->input_channels[0];
+    b2a->output_channels = mBATag->output_channels[0];
+
+    // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
+    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
+        return false;
+    }
+    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
+        return false;
+    }
+
+    uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
+    uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
+    uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
+    uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
+    uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
+
+    if (0 == b_curve_offset) {
+        return false;
+    }
+
+    // "B" curves are our inputs, not outputs.
+    if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
+                     b2a->input_curves)) {
+        return false;
+    }
+
+    if (0 != m_curve_offset) {
+        if (0 == matrix_offset) {
+            return false;
+        }
+        // Matrix channels is tied to input_channels (3), not output_channels.
+        b2a->matrix_channels = b2a->input_channels;
+
+        if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
+                         b2a->matrix_curves)) {
+            return false;
+        }
+
+        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
+            return false;
+        }
+        float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
+        const uint8_t* mtx_buf = tag->buf + matrix_offset;
+        b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
+        b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
+        b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
+        b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
+        b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
+        b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
+        b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
+        b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
+        b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
+        b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
+        b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
+        b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
+    } else {
+        if (0 != matrix_offset) {
+            return false;
+        }
+        b2a->matrix_channels = 0;
+    }
+
+    if (0 != a_curve_offset) {
+        if (0 == clut_offset) {
+            return false;
+        }
+
+        // "A" curves are our output, not input.
+        if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
+                         b2a->output_curves)) {
+            return false;
+        }
+
+        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
+            return false;
+        }
+        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
+
+        if (clut->grid_byte_width[0] == 1) {
+            b2a->grid_8  = clut->variable;
+            b2a->grid_16 = nullptr;
+        } else if (clut->grid_byte_width[0] == 2) {
+            b2a->grid_8  = nullptr;
+            b2a->grid_16 = clut->variable;
+        } else {
+            return false;
+        }
+
+        uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
+        for (uint32_t i = 0; i < b2a->input_channels; ++i) {
+            b2a->grid_points[i] = clut->grid_points[i];
+            if (b2a->grid_points[i] < 2) {
+                return false;
+            }
+            grid_size *= b2a->grid_points[i];
+        }
+        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
+            return false;
+        }
+    } else {
+        if (0 != clut_offset) {
+            return false;
+        }
+
+        if (b2a->input_channels != b2a->output_channels) {
+            return false;
+        }
+
+        // Zero out *output* channels to skip this stage.
+        b2a->output_channels = 0;
+    }
+    return true;
+}
+
+// If you pass f, we'll fit a possibly-non-zero value for *f.
+// If you pass nullptr, we'll assume you want *f to be treated as zero.
+static int fit_linear(const skcms_Curve* curve, int N, float tol,
+                      float* c, float* d, float* f = nullptr) {
+    assert(N > 1);
+    // We iteratively fit the first points to the TF's linear piece.
+    // We want the cx + f line to pass through the first and last points we fit exactly.
+    //
+    // As we walk along the points we find the minimum and maximum slope of the line before the
+    // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
+    // emtpy, when we definitely can't add any more points.
+    //
+    // Some points' error intervals may intersect the running interval but not lie fully
+    // within it.  So we keep track of the last point we saw that is a valid end point candidate,
+    // and once the search is done, back up to build the line through *that* point.
+    const float dx = 1.0f / static_cast<float>(N - 1);
+
+    int lin_points = 1;
+
+    float f_zero = 0.0f;
+    if (f) {
+        *f = eval_curve(curve, 0);
+    } else {
+        f = &f_zero;
+    }
+
+
+    float slope_min = -INFINITY_;
+    float slope_max = +INFINITY_;
+    for (int i = 1; i < N; ++i) {
+        float x = static_cast<float>(i) * dx;
+        float y = eval_curve(curve, x);
+
+        float slope_max_i = (y + tol - *f) / x,
+              slope_min_i = (y - tol - *f) / x;
+        if (slope_max_i < slope_min || slope_max < slope_min_i) {
+            // Slope intervals would no longer overlap.
+            break;
+        }
+        slope_max = fminf_(slope_max, slope_max_i);
+        slope_min = fmaxf_(slope_min, slope_min_i);
+
+        float cur_slope = (y - *f) / x;
+        if (slope_min <= cur_slope && cur_slope <= slope_max) {
+            lin_points = i + 1;
+            *c = cur_slope;
+        }
+    }
+
+    // Set D to the last point that met our tolerance.
+    *d = static_cast<float>(lin_points - 1) * dx;
+    return lin_points;
+}
+
+// If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
+static void canonicalize_identity(skcms_Curve* curve) {
+    if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
+        int N = (int)curve->table_entries;
+
+        float c = 0.0f, d = 0.0f, f = 0.0f;
+        if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
+            && c == 1.0f
+            && f == 0.0f) {
+            curve->table_entries = 0;
+            curve->table_8       = nullptr;
+            curve->table_16      = nullptr;
+            curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
+        }
+    }
+}
+
+static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
+    bool ok = false;
+    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
+    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
+    if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
+    if (!ok) {
+        return false;
+    }
+
+    if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
+    if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
+    if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
+    if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
+
+    if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
+    if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
+    if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
+
+    if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
+    if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
+    if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
+
+    return true;
+}
+
+static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
+    bool ok = false;
+    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
+    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
+    if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
+    if (!ok) {
+        return false;
+    }
+
+    if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
+    if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
+    if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
+
+    if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
+    if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
+    if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
+
+    if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
+    if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
+    if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
+    if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
+
+    return true;
+}
+
+typedef struct {
+    uint8_t type                     [4];
+    uint8_t reserved                 [4];
+    uint8_t color_primaries          [1];
+    uint8_t transfer_characteristics [1];
+    uint8_t matrix_coefficients      [1];
+    uint8_t video_full_range_flag    [1];
+} CICP_Layout;
+
+static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
+    if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
+        return false;
+    }
+
+    const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
+
+    cicp->color_primaries          = cicpTag->color_primaries[0];
+    cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
+    cicp->matrix_coefficients      = cicpTag->matrix_coefficients[0];
+    cicp->video_full_range_flag    = cicpTag->video_full_range_flag[0];
+    return true;
+}
+
+void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
+    if (!profile || !profile->buffer || !tag) { return; }
+    if (idx > profile->tag_count) { return; }
+    const tag_Layout* tags = get_tag_table(profile);
+    tag->signature = read_big_u32(tags[idx].signature);
+    tag->size      = read_big_u32(tags[idx].size);
+    tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
+    tag->type      = read_big_u32(tag->buf);
+}
+
+bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
+    if (!profile || !profile->buffer || !tag) { return false; }
+    const tag_Layout* tags = get_tag_table(profile);
+    for (uint32_t i = 0; i < profile->tag_count; ++i) {
+        if (read_big_u32(tags[i].signature) == sig) {
+            tag->signature = sig;
+            tag->size      = read_big_u32(tags[i].size);
+            tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
+            tag->type      = read_big_u32(tag->buf);
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool usable_as_src(const skcms_ICCProfile* profile) {
+    return profile->has_A2B
+       || (profile->has_trc && profile->has_toXYZD50);
+}
+
+bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
+                                const int priority[], const int priorities,
+                                skcms_ICCProfile* profile) {
+    assert(SAFE_SIZEOF(header_Layout) == 132);
+
+    if (!profile) {
+        return false;
+    }
+    memset(profile, 0, SAFE_SIZEOF(*profile));
+
+    if (len < SAFE_SIZEOF(header_Layout)) {
+        return false;
+    }
+
+    // Byte-swap all header fields
+    const header_Layout* header  = (const header_Layout*)buf;
+    profile->buffer              = (const uint8_t*)buf;
+    profile->size                = read_big_u32(header->size);
+    uint32_t version             = read_big_u32(header->version);
+    profile->data_color_space    = read_big_u32(header->data_color_space);
+    profile->pcs                 = read_big_u32(header->pcs);
+    uint32_t signature           = read_big_u32(header->signature);
+    float illuminant_X           = read_big_fixed(header->illuminant_X);
+    float illuminant_Y           = read_big_fixed(header->illuminant_Y);
+    float illuminant_Z           = read_big_fixed(header->illuminant_Z);
+    profile->tag_count           = read_big_u32(header->tag_count);
+
+    // Validate signature, size (smaller than buffer, large enough to hold tag table),
+    // and major version
+    uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
+    if (signature != skcms_Signature_acsp ||
+        profile->size > len ||
+        profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
+        (version >> 24) > 4) {
+        return false;
+    }
+
+    // Validate that illuminant is D50 white
+    if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
+        fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
+        fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
+        return false;
+    }
+
+    // Validate that all tag entries have sane offset + size
+    const tag_Layout* tags = get_tag_table(profile);
+    for (uint32_t i = 0; i < profile->tag_count; ++i) {
+        uint32_t tag_offset = read_big_u32(tags[i].offset);
+        uint32_t tag_size   = read_big_u32(tags[i].size);
+        uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
+        if (tag_size < 4 || tag_end > profile->size) {
+            return false;
+        }
+    }
+
+    if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
+        return false;
+    }
+
+    bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
+
+    // Pre-parse commonly used tags.
+    skcms_ICCTag kTRC;
+    if (profile->data_color_space == skcms_Signature_Gray &&
+        skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
+        if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
+            // Malformed tag
+            return false;
+        }
+        profile->trc[1] = profile->trc[0];
+        profile->trc[2] = profile->trc[0];
+        profile->has_trc = true;
+
+        if (pcs_is_xyz) {
+            profile->toXYZD50.vals[0][0] = illuminant_X;
+            profile->toXYZD50.vals[1][1] = illuminant_Y;
+            profile->toXYZD50.vals[2][2] = illuminant_Z;
+            profile->has_toXYZD50 = true;
+        }
+    } else {
+        skcms_ICCTag rTRC, gTRC, bTRC;
+        if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
+            skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
+            skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
+            if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
+                !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
+                !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
+                // Malformed TRC tags
+                return false;
+            }
+            profile->has_trc = true;
+        }
+
+        skcms_ICCTag rXYZ, gXYZ, bXYZ;
+        if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
+            skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
+            skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
+            if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
+                // Malformed XYZ tags
+                return false;
+            }
+            profile->has_toXYZD50 = true;
+        }
+    }
+
+    for (int i = 0; i < priorities; i++) {
+        // enum { perceptual, relative_colormetric, saturation }
+        if (priority[i] < 0 || priority[i] > 2) {
+            return false;
+        }
+        uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
+        skcms_ICCTag tag;
+        if (skcms_GetTagBySignature(profile, sig, &tag)) {
+            if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
+                // Malformed A2B tag
+                return false;
+            }
+            profile->has_A2B = true;
+            break;
+        }
+    }
+
+    for (int i = 0; i < priorities; i++) {
+        // enum { perceptual, relative_colormetric, saturation }
+        if (priority[i] < 0 || priority[i] > 2) {
+            return false;
+        }
+        uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
+        skcms_ICCTag tag;
+        if (skcms_GetTagBySignature(profile, sig, &tag)) {
+            if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
+                // Malformed B2A tag
+                return false;
+            }
+            profile->has_B2A = true;
+            break;
+        }
+    }
+
+    skcms_ICCTag cicp_tag;
+    if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
+        if (!read_cicp(&cicp_tag, &profile->CICP)) {
+            // Malformed CICP tag
+            return false;
+        }
+        profile->has_CICP = true;
+    }
+
+    return usable_as_src(profile);
+}
+
+
+const skcms_ICCProfile* skcms_sRGB_profile() {
+    static const skcms_ICCProfile sRGB_profile = {
+        nullptr,               // buffer, moot here
+
+        0,                     // size, moot here
+        skcms_Signature_RGB,   // data_color_space
+        skcms_Signature_XYZ,   // pcs
+        0,                     // tag count, moot here
+
+        // We choose to represent sRGB with its canonical transfer function,
+        // and with its canonical XYZD50 gamut matrix.
+        true,  // has_trc, followed by the 3 trc curves
+        {
+            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
+            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
+            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
+        },
+
+        true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
+        {{
+            { 0.436065674f, 0.385147095f, 0.143066406f },
+            { 0.222488403f, 0.716873169f, 0.060607910f },
+            { 0.013916016f, 0.097076416f, 0.714096069f },
+        }},
+
+        false, // has_A2B, followed by A2B itself, which we don't care about.
+        {
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+            {0,0,0,0},
+            nullptr,
+            nullptr,
+
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+            {{
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+            }},
+
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+        },
+
+        false, // has_B2A, followed by B2A itself, which we also don't care about.
+        {
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+
+            0,
+            {{
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+            }},
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+
+            0,
+            {0,0,0,0},
+            nullptr,
+            nullptr,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+        },
+
+        false, // has_CICP, followed by cicp itself which we don't care about.
+        { 0, 0, 0, 0 },
+    };
+    return &sRGB_profile;
+}
+
+const skcms_ICCProfile* skcms_XYZD50_profile() {
+    // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
+    static const skcms_ICCProfile XYZD50_profile = {
+        nullptr,               // buffer, moot here
+
+        0,                     // size, moot here
+        skcms_Signature_RGB,   // data_color_space
+        skcms_Signature_XYZ,   // pcs
+        0,                     // tag count, moot here
+
+        true,  // has_trc, followed by the 3 trc curves
+        {
+            {{0, {1,1, 0,0,0,0,0}}},
+            {{0, {1,1, 0,0,0,0,0}}},
+            {{0, {1,1, 0,0,0,0,0}}},
+        },
+
+        true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
+        {{
+            { 1,0,0 },
+            { 0,1,0 },
+            { 0,0,1 },
+        }},
+
+        false, // has_A2B, followed by A2B itself, which we don't care about.
+        {
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+            {0,0,0,0},
+            nullptr,
+            nullptr,
+
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+            {{
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+            }},
+
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+        },
+
+        false, // has_B2A, followed by B2A itself, which we also don't care about.
+        {
+            0,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+
+            0,
+            {{
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+                { 0,0,0,0 },
+            }},
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+
+            0,
+            {0,0,0,0},
+            nullptr,
+            nullptr,
+            {
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+                {{0, {0,0, 0,0,0,0,0}}},
+            },
+        },
+
+        false, // has_CICP, followed by cicp itself which we don't care about.
+        { 0, 0, 0, 0 },
+    };
+
+    return &XYZD50_profile;
+}
+
+const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
+    return &skcms_sRGB_profile()->trc[0].parametric;
+}
+
+const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
+    static const skcms_TransferFunction sRGB_inv =
+        {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
+    return &sRGB_inv;
+}
+
+const skcms_TransferFunction* skcms_Identity_TransferFunction() {
+    static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
+    return &identity;
+}
+
+const uint8_t skcms_252_random_bytes[] = {
+    8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
+    119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
+    154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
+    194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
+    108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
+    70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
+    137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
+    9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
+    129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
+    140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
+    219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
+    123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
+    189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
+    174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
+    2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
+    112, 36, 224, 136, 202, 76, 94, 98, 175, 213
+};
+
+bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
+    // Test for exactly equal profiles first.
+    if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
+        return true;
+    }
+
+    // For now this is the essentially the same strategy we use in test_only.c
+    // for our skcms_Transform() smoke tests:
+    //    1) transform A to XYZD50
+    //    2) transform B to XYZD50
+    //    3) return true if they're similar enough
+    // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
+
+    // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
+    // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
+
+    // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
+    // to be treated as equal.  But CMYK profiles are a totally different ballgame.
+    const auto CMYK = skcms_Signature_CMYK;
+    if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
+        return false;
+    }
+
+    // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
+    // TODO: working with RGBA_8888 either way is probably fastest.
+    skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
+    size_t npixels = 84;
+    if (A->data_color_space == skcms_Signature_CMYK) {
+        fmt = skcms_PixelFormat_RGBA_8888;
+        npixels = 63;
+    }
+
+    // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
+    // use pre-canned results and skip that skcms_Transform() call?
+    uint8_t dstA[252],
+            dstB[252];
+    if (!skcms_Transform(
+                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
+                dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+                npixels)) {
+        return false;
+    }
+    if (!skcms_Transform(
+                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
+                dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+                npixels)) {
+        return false;
+    }
+
+    // TODO: make sure this final check has reasonable codegen.
+    for (size_t i = 0; i < 252; i++) {
+        if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
+                                      const skcms_TransferFunction* inv_tf) {
+    if (!profile || !profile->has_trc) {
+        return false;
+    }
+
+    return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
+           skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
+           skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
+}
+
+static bool is_zero_to_one(float x) {
+    return 0 <= x && x <= 1;
+}
+
+typedef struct { float vals[3]; } skcms_Vector3;
+
+static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
+    skcms_Vector3 dst = {{0,0,0}};
+    for (int row = 0; row < 3; ++row) {
+        dst.vals[row] = m->vals[row][0] * v->vals[0]
+                      + m->vals[row][1] * v->vals[1]
+                      + m->vals[row][2] * v->vals[2];
+    }
+    return dst;
+}
+
+bool skcms_AdaptToXYZD50(float wx, float wy,
+                         skcms_Matrix3x3* toXYZD50) {
+    if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
+        !toXYZD50) {
+        return false;
+    }
+
+    // Assumes that Y is 1.0f.
+    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
+
+    // Now convert toXYZ matrix to toXYZD50.
+    skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
+
+    // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
+    // the matrices below.  The Bradford method is used by Adobe and is widely considered
+    // to be the best.
+    skcms_Matrix3x3 xyz_to_lms = {{
+        {  0.8951f,  0.2664f, -0.1614f },
+        { -0.7502f,  1.7135f,  0.0367f },
+        {  0.0389f, -0.0685f,  1.0296f },
+    }};
+    skcms_Matrix3x3 lms_to_xyz = {{
+        {  0.9869929f, -0.1470543f, 0.1599627f },
+        {  0.4323053f,  0.5183603f, 0.0492912f },
+        { -0.0085287f,  0.0400428f, 0.9684867f },
+    }};
+
+    skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
+    skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
+
+    *toXYZD50 = {{
+        { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
+        { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
+        { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
+    }};
+    *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
+    *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
+
+    return true;
+}
+
+bool skcms_PrimariesToXYZD50(float rx, float ry,
+                             float gx, float gy,
+                             float bx, float by,
+                             float wx, float wy,
+                             skcms_Matrix3x3* toXYZD50) {
+    if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
+        !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
+        !is_zero_to_one(bx) || !is_zero_to_one(by) ||
+        !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
+        !toXYZD50) {
+        return false;
+    }
+
+    // First, we need to convert xy values (primaries) to XYZ.
+    skcms_Matrix3x3 primaries = {{
+        { rx, gx, bx },
+        { ry, gy, by },
+        { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
+    }};
+    skcms_Matrix3x3 primaries_inv;
+    if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
+        return false;
+    }
+
+    // Assumes that Y is 1.0f.
+    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
+    skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
+
+    skcms_Matrix3x3 toXYZ = {{
+        { XYZ.vals[0],           0,           0 },
+        {           0, XYZ.vals[1],           0 },
+        {           0,           0, XYZ.vals[2] },
+    }};
+    toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
+
+    skcms_Matrix3x3 DXtoD50;
+    if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
+        return false;
+    }
+
+    *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
+    return true;
+}
+
+
+bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
+    double a00 = src->vals[0][0],
+           a01 = src->vals[1][0],
+           a02 = src->vals[2][0],
+           a10 = src->vals[0][1],
+           a11 = src->vals[1][1],
+           a12 = src->vals[2][1],
+           a20 = src->vals[0][2],
+           a21 = src->vals[1][2],
+           a22 = src->vals[2][2];
+
+    double b0 = a00*a11 - a01*a10,
+           b1 = a00*a12 - a02*a10,
+           b2 = a01*a12 - a02*a11,
+           b3 = a20,
+           b4 = a21,
+           b5 = a22;
+
+    double determinant = b0*b5
+                       - b1*b4
+                       + b2*b3;
+
+    if (determinant == 0) {
+        return false;
+    }
+
+    double invdet = 1.0 / determinant;
+    if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
+        return false;
+    }
+
+    b0 *= invdet;
+    b1 *= invdet;
+    b2 *= invdet;
+    b3 *= invdet;
+    b4 *= invdet;
+    b5 *= invdet;
+
+    dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
+    dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
+    dst->vals[2][0] = (float)(        +     b2 );
+    dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
+    dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
+    dst->vals[2][1] = (float)(        -     b1 );
+    dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
+    dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
+    dst->vals[2][2] = (float)(        +     b0 );
+
+    for (int r = 0; r < 3; ++r)
+    for (int c = 0; c < 3; ++c) {
+        if (!isfinitef_(dst->vals[r][c])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
+    skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
+    for (int r = 0; r < 3; r++)
+        for (int c = 0; c < 3; c++) {
+            m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
+                         + A->vals[r][1] * B->vals[1][c]
+                         + A->vals[r][2] * B->vals[2][c];
+        }
+    return m;
+}
+
+#if defined(__clang__)
+    [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
+#endif
+bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
+    TF_PQish  pq;
+    TF_HLGish hlg;
+    switch (classify(*src, &pq, &hlg)) {
+        case Bad: return false;
+        case sRGBish: break;  // handled below
+
+        case PQish:
+            *dst = { TFKind_marker(PQish), -pq.A,  pq.D, 1.0f/pq.F
+                                         ,  pq.B, -pq.E, 1.0f/pq.C};
+            return true;
+
+        case HLGish:
+            *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
+                                             , 1.0f/hlg.a, hlg.b, hlg.c
+                                             , hlg.K_minus_1 };
+            return true;
+
+        case HLGinvish:
+            *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
+                                          , 1.0f/hlg.a, hlg.b, hlg.c
+                                          , hlg.K_minus_1 };
+            return true;
+    }
+
+    assert (classify(*src) == sRGBish);
+
+    // We're inverting this function, solving for x in terms of y.
+    //   y = (cx + f)         x < d
+    //       (ax + b)^g + e   x ≥ d
+    // The inverse of this function can be expressed in the same piecewise form.
+    skcms_TransferFunction inv = {0,0,0,0,0,0,0};
+
+    // We'll start by finding the new threshold inv.d.
+    // In principle we should be able to find that by solving for y at x=d from either side.
+    // (If those two d values aren't the same, it's a discontinuous transfer function.)
+    float d_l =       src->c * src->d + src->f,
+          d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
+    if (fabsf_(d_l - d_r) > 1/512.0f) {
+        return false;
+    }
+    inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
+
+    // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
+    if (inv.d > 0) {
+        // Inverting the linear section is pretty straightfoward:
+        //        y       = cx + f
+        //        y - f   = cx
+        //   (1/c)y - f/c = x
+        inv.c =    1.0f/src->c;
+        inv.f = -src->f/src->c;
+    }
+
+    // The interesting part is inverting the nonlinear section:
+    //         y                = (ax + b)^g + e.
+    //         y - e            = (ax + b)^g
+    //        (y - e)^1/g       =  ax + b
+    //        (y - e)^1/g - b   =  ax
+    //   (1/a)(y - e)^1/g - b/a =   x
+    //
+    // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
+    //   let k = (1/a)^g
+    //   (1/a)( y -  e)^1/g - b/a = x
+    //        (ky - ke)^1/g - b/a = x
+
+    float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
+    inv.g = 1.0f / src->g;
+    inv.a = k;
+    inv.b = -k * src->e;
+    inv.e = -src->b / src->a;
+
+    // We need to enforce the same constraints here that we do when fitting a curve,
+    // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
+    // of the source function if we're here.
+
+    // Just like when fitting the curve, there's really no way to rescue a < 0.
+    if (inv.a < 0) {
+        return false;
+    }
+    // On the other hand we can rescue an ad+b that's gone slightly negative here.
+    if (inv.a * inv.d + inv.b < 0) {
+        inv.b = -inv.a * inv.d;
+    }
+
+    // That should usually make classify(inv) == sRGBish true, but there are a couple situations
+    // where we might still fail here, like non-finite parameter values.
+    if (classify(inv) != sRGBish) {
+        return false;
+    }
+
+    assert (inv.a >= 0);
+    assert (inv.a * inv.d + inv.b >= 0);
+
+    // Now in principle we're done.
+    // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
+    // e or f of the inverse, depending on which segment contains src(1.0f).
+    float s = skcms_TransferFunction_eval(src, 1.0f);
+    if (!isfinitef_(s)) {
+        return false;
+    }
+
+    float sign = s < 0 ? -1.0f : 1.0f;
+    s *= sign;
+    if (s < inv.d) {
+        inv.f = 1.0f - sign * inv.c * s;
+    } else {
+        inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
+    }
+
+    *dst = inv;
+    return classify(*dst) == sRGBish;
+}
+
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
+
+// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
+//
+//   tf(x) =  cx + f          x < d
+//   tf(x) = (ax + b)^g + e   x ≥ d
+//
+// When fitting, we add the additional constraint that both pieces meet at d:
+//
+//   cd + f = (ad + b)^g + e
+//
+// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
+//
+//   tf(x) =                           cx + f   x < d
+//   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
+//
+// Our overall strategy is then:
+//    For a couple tolerances,
+//       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
+//       - invert c,d,f
+//       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
+//                          (and by constraint, inverted e) to the inverse of the table.
+//    Return the parameters with least maximum error.
+//
+// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
+// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
+//
+//    let y = Table(x)
+//    r(x) = x - f_inv(y)
+//
+//    ∂r/∂g = ln(ay + b)*(ay + b)^g
+//          - ln(ad + b)*(ad + b)^g
+//    ∂r/∂a = yg(ay + b)^(g-1)
+//          - dg(ad + b)^(g-1)
+//    ∂r/∂b =  g(ay + b)^(g-1)
+//          -  g(ad + b)^(g-1)
+
+// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
+// and fill out the gradient of the residual into dfdP.
+static float rg_nonlinear(float x,
+                          const skcms_Curve* curve,
+                          const skcms_TransferFunction* tf,
+                          float dfdP[3]) {
+    const float y = eval_curve(curve, x);
+
+    const float g = tf->g, a = tf->a, b = tf->b,
+                c = tf->c, d = tf->d, f = tf->f;
+
+    const float Y = fmaxf_(a*y + b, 0.0f),
+                D =        a*d + b;
+    assert (D >= 0);
+
+    // The gradient.
+    dfdP[0] = logf_(Y)*powf_(Y, g)
+            - logf_(D)*powf_(D, g);
+    dfdP[1] = y*g*powf_(Y, g-1)
+            - d*g*powf_(D, g-1);
+    dfdP[2] =   g*powf_(Y, g-1)
+            -   g*powf_(D, g-1);
+
+    // The residual.
+    const float f_inv = powf_(Y, g)
+                      - powf_(D, g)
+                      + c*d + f;
+    return x - f_inv;
+}
+
+static bool gauss_newton_step(const skcms_Curve* curve,
+                                    skcms_TransferFunction* tf,
+                              float x0, float dx, int N) {
+    // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
+    //
+    // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
+    //
+    // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
+    //   where r(P) is the residual vector
+    //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
+    //
+    // Let's review the shape of each of these expressions:
+    //   r(P)   is [N x 1], a column vector with one entry per value of x tested
+    //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
+    //   Jf^T   is [3 x N], the transpose of Jf
+    //
+    //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
+    //                                              and so is its inverse (Jf^T Jf)^-1
+    //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
+    //
+    // Our implementation strategy to get to the final ∆P is
+    //   1) evaluate Jf^T Jf,   call that lhs
+    //   2) evaluate Jf^T r(P), call that rhs
+    //   3) invert lhs
+    //   4) multiply inverse lhs by rhs
+    //
+    // This is a friendly implementation strategy because we don't have to have any
+    // buffers that scale with N, and equally nice don't have to perform any matrix
+    // operations that are variable size.
+    //
+    // Other implementation strategies could trade this off, e.g. evaluating the
+    // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
+    // the residuals.  That would probably require implementing singular value
+    // decomposition, and would create a [3 x N] matrix to be multiplied by the
+    // [N x 1] residual vector, but on the upside I think that'd eliminate the
+    // possibility of this gauss_newton_step() function ever failing.
+
+    // 0) start off with lhs and rhs safely zeroed.
+    skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
+    skcms_Vector3   rhs = {  {0,0,0} };
+
+    // 1,2) evaluate lhs and evaluate rhs
+    //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
+    //   so we'll have to update lhs and rhs at the same time.
+    for (int i = 0; i < N; i++) {
+        float x = x0 + static_cast<float>(i)*dx;
+
+        float dfdP[3] = {0,0,0};
+        float resid = rg_nonlinear(x,curve,tf, dfdP);
+
+        for (int r = 0; r < 3; r++) {
+            for (int c = 0; c < 3; c++) {
+                lhs.vals[r][c] += dfdP[r] * dfdP[c];
+            }
+            rhs.vals[r] += dfdP[r] * resid;
+        }
+    }
+
+    // If any of the 3 P parameters are unused, this matrix will be singular.
+    // Detect those cases and fix them up to indentity instead, so we can invert.
+    for (int k = 0; k < 3; k++) {
+        if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
+            lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
+            lhs.vals[k][k] = 1;
+        }
+    }
+
+    // 3) invert lhs
+    skcms_Matrix3x3 lhs_inv;
+    if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
+        return false;
+    }
+
+    // 4) multiply inverse lhs by rhs
+    skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
+    tf->g += dP.vals[0];
+    tf->a += dP.vals[1];
+    tf->b += dP.vals[2];
+    return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
+}
+
+static float max_roundtrip_error_checked(const skcms_Curve* curve,
+                                         const skcms_TransferFunction* tf_inv) {
+    skcms_TransferFunction tf;
+    if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
+        return INFINITY_;
+    }
+
+    skcms_TransferFunction tf_inv_again;
+    if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
+        return INFINITY_;
+    }
+
+    return skcms_MaxRoundtripError(curve, &tf_inv_again);
+}
+
+// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
+static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
+    // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
+    auto fixup_tf = [tf]() {
+        // a must be non-negative. That ensures the function is monotonically increasing.
+        // We don't really know how to fix up a if it goes negative.
+        if (tf->a < 0) {
+            return false;
+        }
+        // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
+        // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
+        if (tf->a * tf->d + tf->b < 0) {
+            tf->b = -tf->a * tf->d;
+        }
+        assert (tf->a >= 0 &&
+                tf->a * tf->d + tf->b >= 0);
+
+        // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
+        // parameter so we can guarantee this.
+        tf->e =   tf->c*tf->d + tf->f
+          - powf_(tf->a*tf->d + tf->b, tf->g);
+
+        return true;
+    };
+
+    if (!fixup_tf()) {
+        return false;
+    }
+
+    // No matter where we start, dx should always represent N even steps from 0 to 1.
+    const float dx = 1.0f / static_cast<float>(N-1);
+
+    skcms_TransferFunction best_tf = *tf;
+    float best_max_error = INFINITY_;
+
+    // Need this or several curves get worse... *sigh*
+    float init_error = max_roundtrip_error_checked(curve, tf);
+    if (init_error < best_max_error) {
+        best_max_error = init_error;
+        best_tf = *tf;
+    }
+
+    // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
+    for (int j = 0; j < 8; j++) {
+        if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
+            *tf = best_tf;
+            return isfinitef_(best_max_error);
+        }
+
+        float max_error = max_roundtrip_error_checked(curve, tf);
+        if (max_error < best_max_error) {
+            best_max_error = max_error;
+            best_tf = *tf;
+        }
+    }
+
+    *tf = best_tf;
+    return isfinitef_(best_max_error);
+}
+
+bool skcms_ApproximateCurve(const skcms_Curve* curve,
+                            skcms_TransferFunction* approx,
+                            float* max_error) {
+    if (!curve || !approx || !max_error) {
+        return false;
+    }
+
+    if (curve->table_entries == 0) {
+        // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
+        return false;
+    }
+
+    if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
+        // We need at least two points, and must put some reasonable cap on the maximum number.
+        return false;
+    }
+
+    int N = (int)curve->table_entries;
+    const float dx = 1.0f / static_cast<float>(N - 1);
+
+    *max_error = INFINITY_;
+    const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
+    for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
+        skcms_TransferFunction tf,
+                               tf_inv;
+
+        // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
+        tf.f = 0.0f;
+        int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
+
+        if (L == N) {
+            // If the entire data set was linear, move the coefficients to the nonlinear portion
+            // with G == 1.  This lets use a canonical representation with d == 0.
+            tf.g = 1;
+            tf.a = tf.c;
+            tf.b = tf.f;
+            tf.c = tf.d = tf.e = tf.f = 0;
+        } else if (L == N - 1) {
+            // Degenerate case with only two points in the nonlinear segment. Solve directly.
+            tf.g = 1;
+            tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
+                    eval_curve(curve, static_cast<float>(N-2)*dx))
+                 / dx;
+            tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
+                 - tf.a * static_cast<float>(N-2)*dx;
+            tf.e = 0;
+        } else {
+            // Start by guessing a gamma-only curve through the midpoint.
+            int mid = (L + N) / 2;
+            float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
+            float mid_y = eval_curve(curve, mid_x);
+            tf.g = log2f_(mid_y) / log2f_(mid_x);
+            tf.a = 1;
+            tf.b = 0;
+            tf.e =    tf.c*tf.d + tf.f
+              - powf_(tf.a*tf.d + tf.b, tf.g);
+
+
+            if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
+                !fit_nonlinear(curve, L,N, &tf_inv)) {
+                continue;
+            }
+
+            // We fit tf_inv, so calculate tf to keep in sync.
+            // fit_nonlinear() should guarantee invertibility.
+            if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
+                assert(false);
+                continue;
+            }
+        }
+
+        // We'd better have a sane, sRGB-ish TF by now.
+        // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
+        // anything else is just some accident of math and the way we pun tf.g as a type flag.
+        // fit_nonlinear() should guarantee this, but the special cases may fail this test.
+        if (sRGBish != classify(tf)) {
+            continue;
+        }
+
+        // We find our error by roundtripping the table through tf_inv.
+        //
+        // (The most likely use case for this approximation is to be inverted and
+        // used as the transfer function for a destination color space.)
+        //
+        // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
+        // invertible, so re-verify that here (and use the new inverse for testing).
+        // fit_nonlinear() should guarantee this, but the special cases that don't use
+        // it may fail this test.
+        if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
+            continue;
+        }
+
+        float err = skcms_MaxRoundtripError(curve, &tf_inv);
+        if (*max_error > err) {
+            *max_error = err;
+            *approx    = tf;
+        }
+    }
+    return isfinitef_(*max_error);
+}
+
+// ~~~~ Impl. of skcms_Transform() ~~~~
+
+typedef enum {
+    Op_load_a8,
+    Op_load_g8,
+    Op_load_8888_palette8,
+    Op_load_4444,
+    Op_load_565,
+    Op_load_888,
+    Op_load_8888,
+    Op_load_1010102,
+    Op_load_161616LE,
+    Op_load_16161616LE,
+    Op_load_161616BE,
+    Op_load_16161616BE,
+    Op_load_hhh,
+    Op_load_hhhh,
+    Op_load_fff,
+    Op_load_ffff,
+
+    Op_swap_rb,
+    Op_clamp,
+    Op_invert,
+    Op_force_opaque,
+    Op_premul,
+    Op_unpremul,
+    Op_matrix_3x3,
+    Op_matrix_3x4,
+
+    Op_lab_to_xyz,
+    Op_xyz_to_lab,
+
+    Op_tf_r,
+    Op_tf_g,
+    Op_tf_b,
+    Op_tf_a,
+
+    Op_pq_r,
+    Op_pq_g,
+    Op_pq_b,
+    Op_pq_a,
+
+    Op_hlg_r,
+    Op_hlg_g,
+    Op_hlg_b,
+    Op_hlg_a,
+
+    Op_hlginv_r,
+    Op_hlginv_g,
+    Op_hlginv_b,
+    Op_hlginv_a,
+
+    Op_table_r,
+    Op_table_g,
+    Op_table_b,
+    Op_table_a,
+
+    Op_clut_A2B,
+    Op_clut_B2A,
+
+    Op_store_a8,
+    Op_store_g8,
+    Op_store_4444,
+    Op_store_565,
+    Op_store_888,
+    Op_store_8888,
+    Op_store_1010102,
+    Op_store_161616LE,
+    Op_store_16161616LE,
+    Op_store_161616BE,
+    Op_store_16161616BE,
+    Op_store_hhh,
+    Op_store_hhhh,
+    Op_store_fff,
+    Op_store_ffff,
+} Op;
+
+#if defined(__clang__)
+    template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
+#elif defined(__GNUC__)
+    // For some reason GCC accepts this nonsense, but not the more straightforward version,
+    //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
+    template <int N, typename T>
+    struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
+
+    template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
+#endif
+
+// First, instantiate our default exec_ops() implementation using the default compiliation target.
+
+namespace baseline {
+#if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
+                            || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
+    #define N 1
+    template <typename T> using V = T;
+    using Color = float;
+#elif defined(__AVX512F__) && defined(__AVX512DQ__)
+    #define N 16
+    template <typename T> using V = Vec<N,T>;
+    using Color = float;
+#elif defined(__AVX__)
+    #define N 8
+    template <typename T> using V = Vec<N,T>;
+    using Color = float;
+#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
+    #define N 8
+    template <typename T> using V = Vec<N,T>;
+    using Color = _Float16;
+#else
+    #define N 4
+    template <typename T> using V = Vec<N,T>;
+    using Color = float;
+#endif
+
+    #include "src/Transform_inl.h"
+    #undef N
+}
+
+// Now, instantiate any other versions of run_program() we may want for runtime detection.
+#if !defined(SKCMS_PORTABLE) &&                           \
+    !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
+        (( defined(__clang__) && __clang_major__ >= 5) || \
+         (!defined(__clang__) && defined(__GNUC__)))      \
+     && defined(__x86_64__)
+
+    #if !defined(__AVX2__)
+        #if defined(__clang__)
+            #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
+        #elif defined(__GNUC__)
+            #pragma GCC push_options
+            #pragma GCC target("avx2,f16c")
+        #endif
+
+        namespace hsw {
+            #define USING_AVX
+            #define USING_AVX_F16C
+            #define USING_AVX2
+            #define N 8
+            template <typename T> using V = Vec<N,T>;
+            using Color = float;
+
+            #include "src/Transform_inl.h"
+
+            // src/Transform_inl.h will undefine USING_* for us.
+            #undef N
+        }
+
+        #if defined(__clang__)
+            #pragma clang attribute pop
+        #elif defined(__GNUC__)
+            #pragma GCC pop_options
+        #endif
+
+        #define TEST_FOR_HSW
+    #endif
+
+    #if !defined(__AVX512F__) || !defined(__AVX512DQ__)
+        #if defined(__clang__)
+            #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
+        #elif defined(__GNUC__)
+            #pragma GCC push_options
+            #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
+        #endif
+
+        namespace skx {
+            #define USING_AVX512F
+            #define N 16
+            template <typename T> using V = Vec<N,T>;
+            using Color = float;
+
+            #include "src/Transform_inl.h"
+
+            // src/Transform_inl.h will undefine USING_* for us.
+            #undef N
+        }
+
+        #if defined(__clang__)
+            #pragma clang attribute pop
+        #elif defined(__GNUC__)
+            #pragma GCC pop_options
+        #endif
+
+        #define TEST_FOR_SKX
+    #endif
+
+    #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
+        enum class CpuType { None, HSW, SKX };
+        static CpuType cpu_type() {
+            static const CpuType type = []{
+                if (!runtime_cpu_detection) {
+                    return CpuType::None;
+                }
+                // See http://www.sandpile.org/x86/cpuid.htm
+
+                // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
+                uint32_t eax, ebx, ecx, edx;
+                __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+                                             : "0"(1), "2"(0));
+                if ((edx & (1u<<25)) &&  // SSE
+                    (edx & (1u<<26)) &&  // SSE2
+                    (ecx & (1u<< 0)) &&  // SSE3
+                    (ecx & (1u<< 9)) &&  // SSSE3
+                    (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
+                    (ecx & (1u<<19)) &&  // SSE4.1
+                    (ecx & (1u<<20)) &&  // SSE4.2
+                    (ecx & (1u<<26)) &&  // XSAVE
+                    (ecx & (1u<<27)) &&  // OSXSAVE
+                    (ecx & (1u<<28)) &&  // AVX
+                    (ecx & (1u<<29))) {  // F16C
+
+                    // Call cpuid(7) to check for AVX2 and AVX-512 bits.
+                    __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+                                                 : "0"(7), "2"(0));
+                    // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
+                    uint32_t xcr0, dont_need_edx;
+                    __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
+
+                    if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
+                        (xcr0 & (1u<<2)) &&  // YMM register state saved?
+                        (ebx  & (1u<<5))) {  // AVX2
+                        // At this point we're at least HSW.  Continue checking for SKX.
+                        if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
+                            (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
+                            (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
+                            (ebx  & (1u<<16)) && // AVX512F
+                            (ebx  & (1u<<17)) && // AVX512DQ
+                            (ebx  & (1u<<28)) && // AVX512CD
+                            (ebx  & (1u<<30)) && // AVX512BW
+                            (ebx  & (1u<<31))) { // AVX512VL
+                            return CpuType::SKX;
+                        }
+                        return CpuType::HSW;
+                    }
+                }
+                return CpuType::None;
+            }();
+            return type;
+        }
+    #endif
+
+#endif
+
+typedef struct {
+    Op          op;
+    const void* arg;
+} OpAndArg;
+
+static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
+    static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
+        { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
+        { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
+        { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
+        { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
+    };
+    const auto& op = ops[channel];
+
+    if (curve->table_entries == 0) {
+        const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
+
+        const skcms_TransferFunction& tf = curve->parametric;
+
+        if (tf.g == 1 && tf.a == 1 &&
+            tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
+            return noop;
+        }
+
+        switch (classify(tf)) {
+            case Bad:        return noop;
+            case sRGBish:    return OpAndArg{op.sRGBish,   &tf};
+            case PQish:      return OpAndArg{op.PQish,     &tf};
+            case HLGish:     return OpAndArg{op.HLGish,    &tf};
+            case HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
+        }
+    }
+    return OpAndArg{op.table, curve};
+}
+
+static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
+    switch (fmt >> 1) {   // ignore rgb/bgr
+        case skcms_PixelFormat_A_8                >> 1: return  1;
+        case skcms_PixelFormat_G_8                >> 1: return  1;
+        case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
+        case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
+        case skcms_PixelFormat_RGB_565            >> 1: return  2;
+        case skcms_PixelFormat_RGB_888            >> 1: return  3;
+        case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
+        case skcms_PixelFormat_RGBA_8888_sRGB     >> 1: return  4;
+        case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
+        case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
+        case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
+        case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
+        case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
+        case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
+        case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
+        case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
+        case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
+        case skcms_PixelFormat_RGB_fff            >> 1: return 12;
+        case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
+    }
+    assert(false);
+    return 0;
+}
+
+static bool prep_for_destination(const skcms_ICCProfile* profile,
+                                 skcms_Matrix3x3* fromXYZD50,
+                                 skcms_TransferFunction* invR,
+                                 skcms_TransferFunction* invG,
+                                 skcms_TransferFunction* invB) {
+    // skcms_Transform() supports B2A destinations...
+    if (profile->has_B2A) { return true; }
+    // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
+    return profile->has_trc
+        && profile->has_toXYZD50
+        && profile->trc[0].table_entries == 0
+        && profile->trc[1].table_entries == 0
+        && profile->trc[2].table_entries == 0
+        && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
+        && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
+        && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
+        && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
+}
+
+bool skcms_Transform(const void*             src,
+                     skcms_PixelFormat       srcFmt,
+                     skcms_AlphaFormat       srcAlpha,
+                     const skcms_ICCProfile* srcProfile,
+                     void*                   dst,
+                     skcms_PixelFormat       dstFmt,
+                     skcms_AlphaFormat       dstAlpha,
+                     const skcms_ICCProfile* dstProfile,
+                     size_t                  npixels) {
+    return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
+                                      dst, dstFmt, dstAlpha, dstProfile,
+                                      npixels, nullptr);
+}
+
+bool skcms_TransformWithPalette(const void*             src,
+                                skcms_PixelFormat       srcFmt,
+                                skcms_AlphaFormat       srcAlpha,
+                                const skcms_ICCProfile* srcProfile,
+                                void*                   dst,
+                                skcms_PixelFormat       dstFmt,
+                                skcms_AlphaFormat       dstAlpha,
+                                const skcms_ICCProfile* dstProfile,
+                                size_t                  nz,
+                                const void*             palette) {
+    const size_t dst_bpp = bytes_per_pixel(dstFmt),
+                 src_bpp = bytes_per_pixel(srcFmt);
+    // Let's just refuse if the request is absurdly big.
+    if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
+        return false;
+    }
+    int n = (int)nz;
+
+    // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
+    if (!srcProfile) {
+        srcProfile = skcms_sRGB_profile();
+    }
+    if (!dstProfile) {
+        dstProfile = skcms_sRGB_profile();
+    }
+
+    // We can't transform in place unless the PixelFormats are the same size.
+    if (dst == src && dst_bpp != src_bpp) {
+        return false;
+    }
+    // TODO: more careful alias rejection (like, dst == src + 1)?
+
+    if (needs_palette(srcFmt) && !palette) {
+        return false;
+    }
+
+    Op          program  [32];
+    const void* arguments[32];
+
+    Op*          ops  = program;
+    const void** args = arguments;
+
+    // These are always parametric curves of some sort.
+    skcms_Curve dst_curves[3];
+    dst_curves[0].table_entries =
+    dst_curves[1].table_entries =
+    dst_curves[2].table_entries = 0;
+
+    skcms_Matrix3x3        from_xyz;
+
+    switch (srcFmt >> 1) {
+        default: return false;
+        case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
+        case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
+        case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
+        case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
+        case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
+        case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
+        case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
+        case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
+        case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
+        case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
+        case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
+        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
+        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
+        case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
+        case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
+        case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
+        case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
+
+        case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
+                                                        *args++ = palette;
+                                                        break;
+        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
+            *ops++ = Op_load_8888;
+            *ops++ = Op_tf_r;       *args++ = skcms_sRGB_TransferFunction();
+            *ops++ = Op_tf_g;       *args++ = skcms_sRGB_TransferFunction();
+            *ops++ = Op_tf_b;       *args++ = skcms_sRGB_TransferFunction();
+            break;
+    }
+    if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
+        srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
+        *ops++ = Op_clamp;
+    }
+    if (srcFmt & 1) {
+        *ops++ = Op_swap_rb;
+    }
+    skcms_ICCProfile gray_dst_profile;
+    if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
+        // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
+        // luminance (Y) by the destination transfer function.
+        gray_dst_profile = *dstProfile;
+        skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
+        dstProfile = &gray_dst_profile;
+    }
+
+    if (srcProfile->data_color_space == skcms_Signature_CMYK) {
+        // Photoshop creates CMYK images as inverse CMYK.
+        // These happen to be the only ones we've _ever_ seen.
+        *ops++ = Op_invert;
+        // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
+        srcAlpha = skcms_AlphaFormat_Unpremul;
+    }
+
+    if (srcAlpha == skcms_AlphaFormat_Opaque) {
+        *ops++ = Op_force_opaque;
+    } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
+        *ops++ = Op_unpremul;
+    }
+
+    if (dstProfile != srcProfile) {
+
+        if (!prep_for_destination(dstProfile,
+                                  &from_xyz,
+                                  &dst_curves[0].parametric,
+                                  &dst_curves[1].parametric,
+                                  &dst_curves[2].parametric)) {
+            return false;
+        }
+
+        if (srcProfile->has_A2B) {
+            if (srcProfile->A2B.input_channels) {
+                for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
+                    OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+                *ops++  = Op_clamp;
+                *ops++  = Op_clut_A2B;
+                *args++ = &srcProfile->A2B;
+            }
+
+            if (srcProfile->A2B.matrix_channels == 3) {
+                for (int i = 0; i < 3; i++) {
+                    OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+
+                static const skcms_Matrix3x4 I = {{
+                    {1,0,0,0},
+                    {0,1,0,0},
+                    {0,0,1,0},
+                }};
+                if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
+                    *ops++  = Op_matrix_3x4;
+                    *args++ = &srcProfile->A2B.matrix;
+                }
+            }
+
+            if (srcProfile->A2B.output_channels == 3) {
+                for (int i = 0; i < 3; i++) {
+                    OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+            }
+
+            if (srcProfile->pcs == skcms_Signature_Lab) {
+                *ops++ = Op_lab_to_xyz;
+            }
+
+        } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
+            for (int i = 0; i < 3; i++) {
+                OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
+                if (oa.arg) {
+                    *ops++  = oa.op;
+                    *args++ = oa.arg;
+                }
+            }
+        } else {
+            return false;
+        }
+
+        // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
+        assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
+
+        if (dstProfile->has_B2A) {
+            // B2A needs its input in XYZD50, so transform TRC sources now.
+            if (!srcProfile->has_A2B) {
+                *ops++  = Op_matrix_3x3;
+                *args++ = &srcProfile->toXYZD50;
+            }
+
+            if (dstProfile->pcs == skcms_Signature_Lab) {
+                *ops++ = Op_xyz_to_lab;
+            }
+
+            if (dstProfile->B2A.input_channels == 3) {
+                for (int i = 0; i < 3; i++) {
+                    OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+            }
+
+            if (dstProfile->B2A.matrix_channels == 3) {
+                static const skcms_Matrix3x4 I = {{
+                    {1,0,0,0},
+                    {0,1,0,0},
+                    {0,0,1,0},
+                }};
+                if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
+                    *ops++  = Op_matrix_3x4;
+                    *args++ = &dstProfile->B2A.matrix;
+                }
+
+                for (int i = 0; i < 3; i++) {
+                    OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+            }
+
+            if (dstProfile->B2A.output_channels) {
+                *ops++  = Op_clamp;
+                *ops++  = Op_clut_B2A;
+                *args++ = &dstProfile->B2A;
+                for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
+                    OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
+                    if (oa.arg) {
+                        *ops++  = oa.op;
+                        *args++ = oa.arg;
+                    }
+                }
+            }
+        } else {
+            // This is a TRC destination.
+            // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
+            // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
+            static const skcms_Matrix3x3 I = {{
+                { 1.0f, 0.0f, 0.0f },
+                { 0.0f, 1.0f, 0.0f },
+                { 0.0f, 0.0f, 1.0f },
+            }};
+            const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
+
+            // There's a chance the source and destination gamuts are identical,
+            // in which case we can skip the gamut transform.
+            if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
+                // Concat the entire gamut transform into from_xyz,
+                // now slightly misnamed but it's a handy spot to stash the result.
+                from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
+                *ops++  = Op_matrix_3x3;
+                *args++ = &from_xyz;
+            }
+
+            // Encode back to dst RGB using its parametric transfer functions.
+            for (int i = 0; i < 3; i++) {
+                OpAndArg oa = select_curve_op(dst_curves+i, i);
+                if (oa.arg) {
+                    assert (oa.op != Op_table_r &&
+                            oa.op != Op_table_g &&
+                            oa.op != Op_table_b &&
+                            oa.op != Op_table_a);
+                    *ops++  = oa.op;
+                    *args++ = oa.arg;
+                }
+            }
+        }
+    }
+
+    // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
+    // not just to values that fit in [0,1].
+    //
+    // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
+    // but would be carrying r > 1, which is really unexpected for downstream consumers.
+    if (dstFmt < skcms_PixelFormat_RGB_hhh) {
+        *ops++ = Op_clamp;
+    }
+
+    if (dstProfile->data_color_space == skcms_Signature_CMYK) {
+        // Photoshop creates CMYK images as inverse CMYK.
+        // These happen to be the only ones we've _ever_ seen.
+        *ops++ = Op_invert;
+
+        // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
+        dstAlpha = skcms_AlphaFormat_Unpremul;
+    }
+
+    if (dstAlpha == skcms_AlphaFormat_Opaque) {
+        *ops++ = Op_force_opaque;
+    } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
+        *ops++ = Op_premul;
+    }
+    if (dstFmt & 1) {
+        *ops++ = Op_swap_rb;
+    }
+    switch (dstFmt >> 1) {
+        default: return false;
+        case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
+        case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
+        case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
+        case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
+        case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
+        case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
+        case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
+        case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
+        case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
+        case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
+        case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
+        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
+        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
+        case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
+        case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
+        case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
+        case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
+
+        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
+            *ops++ = Op_tf_r;       *args++ = skcms_sRGB_Inverse_TransferFunction();
+            *ops++ = Op_tf_g;       *args++ = skcms_sRGB_Inverse_TransferFunction();
+            *ops++ = Op_tf_b;       *args++ = skcms_sRGB_Inverse_TransferFunction();
+            *ops++ = Op_store_8888;
+            break;
+    }
+
+    auto run = baseline::run_program;
+#if defined(TEST_FOR_HSW)
+    switch (cpu_type()) {
+        case CpuType::None:                        break;
+        case CpuType::HSW: run = hsw::run_program; break;
+        case CpuType::SKX: run = hsw::run_program; break;
+    }
+#endif
+#if defined(TEST_FOR_SKX)
+    switch (cpu_type()) {
+        case CpuType::None:                        break;
+        case CpuType::HSW:                         break;
+        case CpuType::SKX: run = skx::run_program; break;
+    }
+#endif
+    run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
+    return true;
+}
+
+static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
+#if defined(NDEBUG)
+    (void)profile;
+#else
+    skcms_Matrix3x3 fromXYZD50;
+    skcms_TransferFunction invR, invG, invB;
+    assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
+#endif
+}
+
+bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
+    if (!profile->has_B2A) {
+        skcms_Matrix3x3 fromXYZD50;
+        if (!profile->has_trc || !profile->has_toXYZD50
+            || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
+            return false;
+        }
+
+        skcms_TransferFunction tf[3];
+        for (int i = 0; i < 3; i++) {
+            skcms_TransferFunction inv;
+            if (profile->trc[i].table_entries == 0
+                && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
+                tf[i] = profile->trc[i].parametric;
+                continue;
+            }
+
+            float max_error;
+            // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
+            if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
+                return false;
+            }
+        }
+
+        for (int i = 0; i < 3; ++i) {
+            profile->trc[i].table_entries = 0;
+            profile->trc[i].parametric = tf[i];
+        }
+    }
+    assert_usable_as_destination(profile);
+    return true;
+}
+
+bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
+    // Call skcms_MakeUsableAsDestination() with B2A disabled;
+    // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
+    skcms_ICCProfile result = *profile;
+    result.has_B2A = false;
+    if (!skcms_MakeUsableAsDestination(&result)) {
+        return false;
+    }
+
+    // Of the three, pick the transfer function that best fits the other two.
+    int best_tf = 0;
+    float min_max_error = INFINITY_;
+    for (int i = 0; i < 3; i++) {
+        skcms_TransferFunction inv;
+        if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
+            return false;
+        }
+
+        float err = 0;
+        for (int j = 0; j < 3; ++j) {
+            err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
+        }
+        if (min_max_error > err) {
+            min_max_error = err;
+            best_tf = i;
+        }
+    }
+
+    for (int i = 0; i < 3; i++) {
+        result.trc[i].parametric = result.trc[best_tf].parametric;
+    }
+
+    *profile = result;
+    assert_usable_as_destination(profile);
+    return true;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/skcms.gni b/third-party/libjxl/libjxl/third_party/skcms/skcms.gni
new file mode 100644
index 0000000000..819afaef13
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/skcms.gni
@@ -0,0 +1,6 @@
+# Copyright 2018 Google Inc.
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+skcms_sources = [ "skcms.cc" ]
diff --git a/third-party/libjxl/libjxl/third_party/skcms/skcms.h b/third-party/libjxl/libjxl/third_party/skcms/skcms.h
new file mode 100644
index 0000000000..2ee56934ad
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/skcms.h
@@ -0,0 +1,404 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#pragma once
+
+// skcms.h contains the entire public API for skcms.
+
+#ifndef SKCMS_API
+    #define SKCMS_API
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A row-major 3x3 matrix (ie vals[row][col])
+typedef struct skcms_Matrix3x3 {
+    float vals[3][3];
+} skcms_Matrix3x3;
+
+// It is _not_ safe to alias the pointers to invert in-place.
+SKCMS_API bool            skcms_Matrix3x3_invert(const skcms_Matrix3x3*, skcms_Matrix3x3*);
+SKCMS_API skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3*, const skcms_Matrix3x3*);
+
+// A row-major 3x4 matrix (ie vals[row][col])
+typedef struct skcms_Matrix3x4 {
+    float vals[3][4];
+} skcms_Matrix3x4;
+
+// A transfer function mapping encoded values to linear values,
+// represented by this 7-parameter piecewise function:
+//
+//   linear = sign(encoded) *  (c*|encoded| + f)       , 0 <= |encoded| < d
+//          = sign(encoded) * ((a*|encoded| + b)^g + e), d <= |encoded|
+//
+// (A simple gamma transfer function sets g to gamma and a to 1.)
+typedef struct skcms_TransferFunction {
+    float g, a,b,c,d,e,f;
+} skcms_TransferFunction;
+
+SKCMS_API float skcms_TransferFunction_eval  (const skcms_TransferFunction*, float);
+SKCMS_API bool  skcms_TransferFunction_invert(const skcms_TransferFunction*,
+                                              skcms_TransferFunction*);
+
+// We can jam a couple alternate transfer function forms into skcms_TransferFunction,
+// including those matching the general forms of the SMPTE ST 2084 PQ function or HLG.
+//
+// PQish:
+//                              max(A + B|encoded|^C, 0)
+//    linear = sign(encoded) * (------------------------) ^ F
+//                                  D + E|encoded|^C
+SKCMS_API bool skcms_TransferFunction_makePQish(skcms_TransferFunction*,
+                                                float A, float B, float C,
+                                                float D, float E, float F);
+// HLGish:
+//            { K * sign(encoded) * ( (R|encoded|)^G )          when 0   <= |encoded| <= 1/R
+//   linear = { K * sign(encoded) * ( e^(a(|encoded|-c)) + b )  when 1/R <  |encoded|
+SKCMS_API bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction*,
+                                                       float K, float R, float G,
+                                                       float a, float b, float c);
+
+// Compatibility shim with K=1 for old callers.
+static inline bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* fn,
+                                                     float R, float G,
+                                                     float a, float b, float c) {
+    return skcms_TransferFunction_makeScaledHLGish(fn, 1.0f, R,G, a,b,c);
+}
+
+// PQ mapping encoded [0,1] to linear [0,1].
+static inline bool skcms_TransferFunction_makePQ(skcms_TransferFunction* tf) {
+    return skcms_TransferFunction_makePQish(tf, -107/128.0f,         1.0f,   32/2523.0f
+                                              , 2413/128.0f, -2392/128.0f, 8192/1305.0f);
+}
+// HLG mapping encoded [0,1] to linear [0,12].
+static inline bool skcms_TransferFunction_makeHLG(skcms_TransferFunction* tf) {
+    return skcms_TransferFunction_makeHLGish(tf, 2.0f, 2.0f
+                                               , 1/0.17883277f, 0.28466892f, 0.55991073f);
+}
+
+// Is this an ordinary sRGB-ish transfer function, or one of the HDR forms we support?
+SKCMS_API bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction*);
+SKCMS_API bool skcms_TransferFunction_isPQish  (const skcms_TransferFunction*);
+SKCMS_API bool skcms_TransferFunction_isHLGish (const skcms_TransferFunction*);
+
+// Unified representation of 'curv' or 'para' tag data, or a 1D table from 'mft1' or 'mft2'
+typedef union skcms_Curve {
+    struct {
+        uint32_t alias_of_table_entries;
+        skcms_TransferFunction parametric;
+    };
+    struct {
+        uint32_t table_entries;
+        const uint8_t* table_8;
+        const uint8_t* table_16;
+    };
+} skcms_Curve;
+
+// Complex transforms between device space (A) and profile connection space (B):
+//   A2B:  device -> [ "A" curves -> CLUT ] -> [ "M" curves -> matrix ] -> "B" curves -> PCS
+//   B2A:  device <- [ "A" curves <- CLUT ] <- [ "M" curves <- matrix ] <- "B" curves <- PCS
+
+typedef struct skcms_A2B {
+    // Optional: N 1D "A" curves, followed by an N-dimensional CLUT.
+    // If input_channels == 0, these curves and CLUT are skipped,
+    // Otherwise, input_channels must be in [1, 4].
+    uint32_t        input_channels;
+    skcms_Curve     input_curves[4];
+    uint8_t         grid_points[4];
+    const uint8_t*  grid_8;
+    const uint8_t*  grid_16;
+
+    // Optional: 3 1D "M" curves, followed by a color matrix.
+    // If matrix_channels == 0, these curves and matrix are skipped,
+    // Otherwise, matrix_channels must be 3.
+    uint32_t        matrix_channels;
+    skcms_Curve     matrix_curves[3];
+    skcms_Matrix3x4 matrix;
+
+    // Required: 3 1D "B" curves. Always present, and output_channels must be 3.
+    uint32_t        output_channels;
+    skcms_Curve     output_curves[3];
+} skcms_A2B;
+
+typedef struct skcms_B2A {
+    // Required: 3 1D "B" curves. Always present, and input_channels must be 3.
+    uint32_t        input_channels;
+    skcms_Curve     input_curves[3];
+
+    // Optional: a color matrix, followed by 3 1D "M" curves.
+    // If matrix_channels == 0, this matrix and these curves are skipped,
+    // Otherwise, matrix_channels must be 3.
+    uint32_t        matrix_channels;
+    skcms_Matrix3x4 matrix;
+    skcms_Curve     matrix_curves[3];
+
+    // Optional: an N-dimensional CLUT, followed by N 1D "A" curves.
+    // If output_channels == 0, this CLUT and these curves are skipped,
+    // Otherwise, output_channels must be in [1, 4].
+    uint32_t        output_channels;
+    uint8_t         grid_points[4];
+    const uint8_t*  grid_8;
+    const uint8_t*  grid_16;
+    skcms_Curve     output_curves[4];
+} skcms_B2A;
+
+typedef struct skcms_CICP {
+    uint8_t color_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+    uint8_t video_full_range_flag;
+} skcms_CICP;
+
+typedef struct skcms_ICCProfile {
+    const uint8_t* buffer;
+
+    uint32_t size;
+    uint32_t data_color_space;
+    uint32_t pcs;
+    uint32_t tag_count;
+
+    // skcms_Parse() will set commonly-used fields for you when possible:
+
+    // If we can parse red, green and blue transfer curves from the profile,
+    // trc will be set to those three curves, and has_trc will be true.
+    bool                   has_trc;
+    skcms_Curve            trc[3];
+
+    // If this profile's gamut can be represented by a 3x3 transform to XYZD50,
+    // skcms_Parse() sets toXYZD50 to that transform and has_toXYZD50 to true.
+    bool                   has_toXYZD50;
+    skcms_Matrix3x3        toXYZD50;
+
+    // If the profile has a valid A2B0 or A2B1 tag, skcms_Parse() sets A2B to
+    // that data, and has_A2B to true.  skcms_ParseWithA2BPriority() does the
+    // same following any user-provided prioritization of A2B0, A2B1, or A2B2.
+    bool                   has_A2B;
+    skcms_A2B              A2B;
+
+    // If the profile has a valid B2A0 or B2A1 tag, skcms_Parse() sets B2A to
+    // that data, and has_B2A to true.  skcms_ParseWithA2BPriority() does the
+    // same following any user-provided prioritization of B2A0, B2A1, or B2A2.
+    bool                   has_B2A;
+    skcms_B2A              B2A;
+
+    // If the profile has a valid CICP tag, skcms_Parse() sets CICP to that data,
+    // and has_CICP to true.
+    bool                   has_CICP;
+    skcms_CICP             CICP;
+} skcms_ICCProfile;
+
+// The sRGB color profile is so commonly used that we offer a canonical skcms_ICCProfile for it.
+SKCMS_API const skcms_ICCProfile* skcms_sRGB_profile(void);
+// Ditto for XYZD50, the most common profile connection space.
+SKCMS_API const skcms_ICCProfile* skcms_XYZD50_profile(void);
+
+SKCMS_API const skcms_TransferFunction* skcms_sRGB_TransferFunction(void);
+SKCMS_API const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction(void);
+SKCMS_API const skcms_TransferFunction* skcms_Identity_TransferFunction(void);
+
+// Practical equality test for two skcms_ICCProfiles.
+// The implementation is subject to change, but it will always try to answer
+// "can I substitute A for B?" and "can I skip transforming from A to B?".
+SKCMS_API bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A,
+                                                const skcms_ICCProfile* B);
+
+// Practical test that answers: Is curve roughly the inverse of inv_tf? Typically used by passing
+// the inverse of a known parametric transfer function (like sRGB), to determine if a particular
+// curve is very close to sRGB.
+SKCMS_API bool skcms_AreApproximateInverses(const skcms_Curve* curve,
+                                            const skcms_TransferFunction* inv_tf);
+
+// Similar to above, answering the question for all three TRC curves of the given profile. Again,
+// passing skcms_sRGB_InverseTransferFunction as inv_tf will answer the question:
+// "Does this profile have a transfer function that is very close to sRGB?"
+SKCMS_API bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
+                                                const skcms_TransferFunction* inv_tf);
+
+// Parse an ICC profile and return true if possible, otherwise return false.
+// Selects an A2B profile (if present) according to priority list (each entry 0-2).
+// The buffer is not copied; it must remain valid as long as the skcms_ICCProfile will be used.
+SKCMS_API bool skcms_ParseWithA2BPriority(const void*, size_t,
+                                          const int priority[], int priorities,
+                                          skcms_ICCProfile*);
+
+static inline bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
+    // For continuity of existing user expectations,
+    // prefer A2B0 (perceptual) over A2B1 (relative colormetric), and ignore A2B2 (saturation).
+    const int priority[] = {0,1};
+    return skcms_ParseWithA2BPriority(buf, len,
+                                      priority, sizeof(priority)/sizeof(*priority),
+                                      profile);
+}
+
+SKCMS_API bool skcms_ApproximateCurve(const skcms_Curve* curve,
+                                      skcms_TransferFunction* approx,
+                                      float* max_error);
+
+SKCMS_API bool skcms_GetCHAD(const skcms_ICCProfile*, skcms_Matrix3x3*);
+SKCMS_API bool skcms_GetWTPT(const skcms_ICCProfile*, float xyz[3]);
+
+// These are common ICC signature values
+enum {
+    // data_color_space
+    skcms_Signature_CMYK = 0x434D594B,
+    skcms_Signature_Gray = 0x47524159,
+    skcms_Signature_RGB  = 0x52474220,
+
+    // pcs
+    skcms_Signature_Lab  = 0x4C616220,
+    skcms_Signature_XYZ  = 0x58595A20,
+};
+
+typedef enum skcms_PixelFormat {
+    skcms_PixelFormat_A_8,
+    skcms_PixelFormat_A_8_,
+    skcms_PixelFormat_G_8,
+    skcms_PixelFormat_G_8_,
+    skcms_PixelFormat_RGBA_8888_Palette8,
+    skcms_PixelFormat_BGRA_8888_Palette8,
+
+    skcms_PixelFormat_RGB_565,
+    skcms_PixelFormat_BGR_565,
+
+    skcms_PixelFormat_ABGR_4444,
+    skcms_PixelFormat_ARGB_4444,
+
+    skcms_PixelFormat_RGB_888,
+    skcms_PixelFormat_BGR_888,
+    skcms_PixelFormat_RGBA_8888,
+    skcms_PixelFormat_BGRA_8888,
+    skcms_PixelFormat_RGBA_8888_sRGB,   // Automatic sRGB encoding / decoding.
+    skcms_PixelFormat_BGRA_8888_sRGB,   // (Generally used with linear transfer functions.)
+
+    skcms_PixelFormat_RGBA_1010102,
+    skcms_PixelFormat_BGRA_1010102,
+
+    skcms_PixelFormat_RGB_161616LE,     // Little-endian.  Pointers must be 16-bit aligned.
+    skcms_PixelFormat_BGR_161616LE,
+    skcms_PixelFormat_RGBA_16161616LE,
+    skcms_PixelFormat_BGRA_16161616LE,
+
+    skcms_PixelFormat_RGB_161616BE,     // Big-endian.  Pointers must be 16-bit aligned.
+    skcms_PixelFormat_BGR_161616BE,
+    skcms_PixelFormat_RGBA_16161616BE,
+    skcms_PixelFormat_BGRA_16161616BE,
+
+    skcms_PixelFormat_RGB_hhh_Norm,   // 1-5-10 half-precision float in [0,1]
+    skcms_PixelFormat_BGR_hhh_Norm,   // Pointers must be 16-bit aligned.
+    skcms_PixelFormat_RGBA_hhhh_Norm,
+    skcms_PixelFormat_BGRA_hhhh_Norm,
+
+    skcms_PixelFormat_RGB_hhh,        // 1-5-10 half-precision float.
+    skcms_PixelFormat_BGR_hhh,        // Pointers must be 16-bit aligned.
+    skcms_PixelFormat_RGBA_hhhh,
+    skcms_PixelFormat_BGRA_hhhh,
+
+    skcms_PixelFormat_RGB_fff,        // 1-8-23 single-precision float (the normal kind).
+    skcms_PixelFormat_BGR_fff,        // Pointers must be 32-bit aligned.
+    skcms_PixelFormat_RGBA_ffff,
+    skcms_PixelFormat_BGRA_ffff,
+} skcms_PixelFormat;
+
+// We always store any alpha channel linearly.  In the chart below, tf-1() is the inverse
+// transfer function for the given color profile (applying the transfer function linearizes).
+
+// We treat opaque as a strong requirement, not just a performance hint: we will ignore
+// any source alpha and treat it as 1.0, and will make sure that any destination alpha
+// channel is filled with the equivalent of 1.0.
+
+// We used to offer multiple types of premultiplication, but now just one, PremulAsEncoded.
+// This is the premul you're probably used to working with.
+
+typedef enum skcms_AlphaFormat {
+    skcms_AlphaFormat_Opaque,          // alpha is always opaque
+                                       //   tf-1(r),   tf-1(g),   tf-1(b),   1.0
+    skcms_AlphaFormat_Unpremul,        // alpha and color are unassociated
+                                       //   tf-1(r),   tf-1(g),   tf-1(b),   a
+    skcms_AlphaFormat_PremulAsEncoded, // premultiplied while encoded
+                                       //   tf-1(r)*a, tf-1(g)*a, tf-1(b)*a, a
+} skcms_AlphaFormat;
+
+// Convert npixels pixels from src format and color profile to dst format and color profile
+// and return true, otherwise return false.  It is safe to alias dst == src if dstFmt == srcFmt.
+SKCMS_API bool skcms_Transform(const void*             src,
+                               skcms_PixelFormat       srcFmt,
+                               skcms_AlphaFormat       srcAlpha,
+                               const skcms_ICCProfile* srcProfile,
+                               void*                   dst,
+                               skcms_PixelFormat       dstFmt,
+                               skcms_AlphaFormat       dstAlpha,
+                               const skcms_ICCProfile* dstProfile,
+                               size_t                  npixels);
+
+// As skcms_Transform(), supporting srcFmts with a palette.
+SKCMS_API bool skcms_TransformWithPalette(const void*             src,
+                                          skcms_PixelFormat       srcFmt,
+                                          skcms_AlphaFormat       srcAlpha,
+                                          const skcms_ICCProfile* srcProfile,
+                                          void*                   dst,
+                                          skcms_PixelFormat       dstFmt,
+                                          skcms_AlphaFormat       dstAlpha,
+                                          const skcms_ICCProfile* dstProfile,
+                                          size_t                  npixels,
+                                          const void*             palette);
+
+// If profile can be used as a destination in skcms_Transform, return true. Otherwise, attempt to
+// rewrite it with approximations where reasonable. If successful, return true. If no reasonable
+// approximation exists, leave the profile unchanged and return false.
+SKCMS_API bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile);
+
+// If profile can be used as a destination with a single parametric transfer function (ie for
+// rasterization), return true. Otherwise, attempt to rewrite it with approximations where
+// reasonable. If successful, return true. If no reasonable approximation exists, leave the
+// profile unchanged and return false.
+SKCMS_API bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile);
+
+// Returns a matrix to adapt XYZ color from given the whitepoint to D50.
+SKCMS_API bool skcms_AdaptToXYZD50(float wx, float wy,
+                                   skcms_Matrix3x3* toXYZD50);
+
+// Returns a matrix to convert RGB color into XYZ adapted to D50, given the
+// primaries and whitepoint of the RGB model.
+SKCMS_API bool skcms_PrimariesToXYZD50(float rx, float ry,
+                                       float gx, float gy,
+                                       float bx, float by,
+                                       float wx, float wy,
+                                       skcms_Matrix3x3* toXYZD50);
+
+// Call before your first call to skcms_Transform() to skip runtime CPU detection.
+SKCMS_API void skcms_DisableRuntimeCPUDetection(void);
+
+// Utilities for programmatically constructing profiles
+static inline void skcms_Init(skcms_ICCProfile* p) {
+    memset(p, 0, sizeof(*p));
+    p->data_color_space = skcms_Signature_RGB;
+    p->pcs = skcms_Signature_XYZ;
+}
+
+static inline void skcms_SetTransferFunction(skcms_ICCProfile* p,
+                                             const skcms_TransferFunction* tf) {
+    p->has_trc = true;
+    for (int i = 0; i < 3; ++i) {
+        p->trc[i].table_entries = 0;
+        p->trc[i].parametric = *tf;
+    }
+}
+
+static inline void skcms_SetXYZD50(skcms_ICCProfile* p, const skcms_Matrix3x3* m) {
+    p->has_toXYZD50 = true;
+    p->toXYZD50 = *m;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/skcms/skcms_internal.h b/third-party/libjxl/libjxl/third_party/skcms/skcms_internal.h
new file mode 100644
index 0000000000..cc6d578ba0
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/skcms_internal.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#pragma once
+
+// skcms_internal.h contains APIs shared by skcms' internals and its test tools.
+// Please don't use this header from outside the skcms repo.
+
+#include "skcms.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ~~~~ General Helper Macros ~~~~
+    #define ARRAY_COUNT(arr) (int)(sizeof((arr)) / sizeof(*(arr)))
+
+    typedef struct skcms_ICCTag {
+        uint32_t       signature;
+        uint32_t       type;
+        uint32_t       size;
+        const uint8_t* buf;
+    } skcms_ICCTag;
+
+    void skcms_GetTagByIndex    (const skcms_ICCProfile*, uint32_t idx, skcms_ICCTag*);
+    bool skcms_GetTagBySignature(const skcms_ICCProfile*, uint32_t sig, skcms_ICCTag*);
+
+    float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf);
+
+    // 252 of a random shuffle of all possible bytes.
+    // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
+    // Used for ICC profile equivalence testing.
+    extern const uint8_t skcms_252_random_bytes[252];
+
+// ~~~~ Portable Math ~~~~
+    static inline float floorf_(float x) {
+        float roundtrip = (float)((int)x);
+        return roundtrip > x ? roundtrip - 1 : roundtrip;
+    }
+    static inline float fabsf_(float x) { return x < 0 ? -x : x; }
+    float powf_(float, float);
+
+// ~~~~ Does this pixel format need a palette pointer to be usable? ~~~~
+    static inline bool needs_palette(skcms_PixelFormat fmt) {
+        return (fmt >> 1) == (skcms_PixelFormat_RGBA_8888_Palette8 >> 1);
+    }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third-party/libjxl/libjxl/third_party/skcms/src/Transform_inl.h b/third-party/libjxl/libjxl/third_party/skcms/src/Transform_inl.h
new file mode 100644
index 0000000000..7b8aa8ac1c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/src/Transform_inl.h
@@ -0,0 +1,1609 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// Intentionally NO #pragma once... included multiple times.
+
+// This file is included from skcms.cc in a namespace with some pre-defines:
+//    - N:    depth of all vectors, 1,4,8, or 16 (preprocessor define)
+//    - V<T>: a template to create a vector of N T's.
+
+using F   = V<Color>;   // Called F for historic reasons... maybe rename C?
+using I32 = V<int32_t>;
+using U64 = V<uint64_t>;
+using U32 = V<uint32_t>;
+using U16 = V<uint16_t>;
+using U8  = V<uint8_t>;
+
+
+#if defined(__GNUC__) && !defined(__clang__)
+    // Once again, GCC is kind of weird, not allowing vector = scalar directly.
+    static constexpr F F0 = F() + 0.0f,
+                       F1 = F() + 1.0f,
+                       FInfBits = F() + 0x7f800000; // equals 2139095040, the bit pattern of +Inf
+#else
+    static constexpr F F0 = 0.0f,
+                       F1 = 1.0f,
+                       FInfBits = 0x7f800000; // equals 2139095040, the bit pattern of +Inf
+#endif
+
+// Instead of checking __AVX__ below, we'll check USING_AVX.
+// This lets skcms.cc set USING_AVX to force us in even if the compiler's not set that way.
+// Same deal for __F16C__ and __AVX2__ ~~~> USING_AVX_F16C, USING_AVX2.
+
+#if !defined(USING_AVX)      && N == 8 && defined(__AVX__)
+    #define  USING_AVX
+#endif
+#if !defined(USING_AVX_F16C) && defined(USING_AVX) && defined(__F16C__)
+    #define  USING AVX_F16C
+#endif
+#if !defined(USING_AVX2)     && defined(USING_AVX) && defined(__AVX2__)
+    #define  USING_AVX2
+#endif
+#if !defined(USING_AVX512F)  && N == 16 && defined(__AVX512F__) && defined(__AVX512DQ__)
+    #define  USING_AVX512F
+#endif
+
+// Similar to the AVX+ features, we define USING_NEON and USING_NEON_F16C.
+// This is more for organizational clarity... skcms.cc doesn't force these.
+#if N > 1 && defined(__ARM_NEON)
+    #define USING_NEON
+    #if __ARM_FP & 2
+        #define USING_NEON_F16C
+    #endif
+    #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
+        #define USING_NEON_FP16
+    #endif
+#endif
+
+// These -Wvector-conversion warnings seem to trigger in very bogus situations,
+// like vst3q_f32() expecting a 16x char rather than a 4x float vector.  :/
+#if defined(USING_NEON) && defined(__clang__)
+    #pragma clang diagnostic ignored "-Wvector-conversion"
+#endif
+
+// GCC & Clang (but not clang-cl) warn returning U64 on x86 is larger than a register.
+// You'd see warnings like, "using AVX even though AVX is not enabled".
+// We stifle these warnings; our helpers that return U64 are always inlined.
+#if defined(__SSE__) && defined(__GNUC__)
+    #if !defined(__has_warning)
+        #pragma GCC diagnostic ignored "-Wpsabi"
+    #elif __has_warning("-Wpsabi")
+        #pragma GCC diagnostic ignored "-Wpsabi"
+    #endif
+#endif
+
+#if defined(__clang__)
+    #define FALLTHROUGH [[clang::fallthrough]]
+#else
+    #define FALLTHROUGH
+#endif
+
+// We tag most helper functions as SI, to enforce good code generation
+// but also work around what we think is a bug in GCC: when targeting 32-bit
+// x86, GCC tends to pass U16 (4x uint16_t vector) function arguments in the
+// MMX mm0 register, which seems to mess with unrelated code that later uses
+// x87 FP instructions (MMX's mm0 is an alias for x87's st0 register).
+//
+// It helps codegen to call __builtin_memcpy() when we know the byte count at compile time.
+#if defined(__clang__) || defined(__GNUC__)
+    #define SI static inline __attribute__((always_inline))
+#else
+    #define SI static inline
+#endif
+
+template <typename T, typename P>
+SI T load(const P* ptr) {
+    T val;
+    small_memcpy(&val, ptr, sizeof(val));
+    return val;
+}
+template <typename T, typename P>
+SI void store(P* ptr, const T& val) {
+    small_memcpy(ptr, &val, sizeof(val));
+}
+
+// (T)v is a cast when N == 1 and a bit-pun when N>1,
+// so we use cast<T>(v) to actually cast or bit_pun<T>(v) to bit-pun.
+template <typename D, typename S>
+SI D cast(const S& v) {
+#if N == 1
+    return (D)v;
+#elif defined(__clang__)
+    return __builtin_convertvector(v, D);
+#else
+    D d;
+    for (int i = 0; i < N; i++) {
+        d[i] = v[i];
+    }
+    return d;
+#endif
+}
+
+template <typename D, typename S>
+SI D bit_pun(const S& v) {
+    static_assert(sizeof(D) == sizeof(v), "");
+    return load<D>(&v);
+}
+
+// When we convert from float to fixed point, it's very common to want to round,
+// and for some reason compilers generate better code when converting to int32_t.
+// To serve both those ends, we use this function to_fixed() instead of direct cast().
+#if defined(USING_NEON_FP16)
+    // NEON's got a F16 -> U16 instruction, so this should be fine without going via I16.
+    SI U16 to_fixed(F f) {  return cast<U16>(f + 0.5f); }
+#else
+    SI U32 to_fixed(F f) {  return (U32)cast<I32>(f + 0.5f); }
+#endif
+
+
+// Sometimes we do something crazy on one branch of a conditonal,
+// like divide by zero or convert a huge float to an integer,
+// but then harmlessly select the other side.  That trips up N==1
+// sanitizer builds, so we make if_then_else() a macro to avoid
+// evaluating the unused side.
+
+#if N == 1
+    #define if_then_else(cond, t, e) ((cond) ? (t) : (e))
+#else
+    template <typename C, typename T>
+    SI T if_then_else(C cond, T t, T e) {
+        return bit_pun<T>( ( cond & bit_pun<C>(t)) |
+                           (~cond & bit_pun<C>(e)) );
+    }
+#endif
+
+
+SI F F_from_Half(U16 half) {
+#if defined(USING_NEON_FP16)
+    return bit_pun<F>(half);
+#elif defined(USING_NEON_F16C)
+    return vcvt_f32_f16((float16x4_t)half);
+#elif defined(USING_AVX512F)
+    return (F)_mm512_cvtph_ps((__m256i)half);
+#elif defined(USING_AVX_F16C)
+    typedef int16_t __attribute__((vector_size(16))) I16;
+    return __builtin_ia32_vcvtph2ps256((I16)half);
+#else
+    U32 wide = cast<U32>(half);
+    // A half is 1-5-10 sign-exponent-mantissa, with 15 exponent bias.
+    U32 s  = wide & 0x8000,
+        em = wide ^ s;
+
+    // Constructing the float is easy if the half is not denormalized.
+    F norm = bit_pun<F>( (s<<16) + (em<<13) + ((127-15)<<23) );
+
+    // Simply flush all denorm half floats to zero.
+    return if_then_else(em < 0x0400, F0, norm);
+#endif
+}
+
+#if defined(__clang__)
+    // The -((127-15)<<10) underflows that side of the math when
+    // we pass a denorm half float.  It's harmless... we'll take the 0 side anyway.
+    __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+SI U16 Half_from_F(F f) {
+#if defined(USING_NEON_FP16)
+    return bit_pun<U16>(f);
+#elif defined(USING_NEON_F16C)
+    return (U16)vcvt_f16_f32(f);
+#elif defined(USING_AVX512F)
+    return (U16)_mm512_cvtps_ph((__m512 )f, _MM_FROUND_CUR_DIRECTION );
+#elif defined(USING_AVX_F16C)
+    return (U16)__builtin_ia32_vcvtps2ph256(f, 0x04/*_MM_FROUND_CUR_DIRECTION*/);
+#else
+    // A float is 1-8-23 sign-exponent-mantissa, with 127 exponent bias.
+    U32 sem = bit_pun<U32>(f),
+        s   = sem & 0x80000000,
+         em = sem ^ s;
+
+    // For simplicity we flush denorm half floats (including all denorm floats) to zero.
+    return cast<U16>(if_then_else(em < 0x38800000, (U32)F0
+                                                 , (s>>16) + (em>>13) - ((127-15)<<10)));
+#endif
+}
+
+// Swap high and low bytes of 16-bit lanes, converting between big-endian and little-endian.
+#if defined(USING_NEON_FP16)
+    SI U16 swap_endian_16(U16 v) {
+        return (U16)vrev16q_u8((uint8x16_t) v);
+    }
+#elif defined(USING_NEON)
+    SI U16 swap_endian_16(U16 v) {
+        return (U16)vrev16_u8((uint8x8_t) v);
+    }
+#endif
+
+SI U64 swap_endian_16x4(const U64& rgba) {
+    return (rgba & 0x00ff00ff00ff00ff) << 8
+         | (rgba & 0xff00ff00ff00ff00) >> 8;
+}
+
+#if defined(USING_NEON_FP16)
+    SI F min_(F x, F y) { return (F)vminq_f16((float16x8_t)x, (float16x8_t)y); }
+    SI F max_(F x, F y) { return (F)vmaxq_f16((float16x8_t)x, (float16x8_t)y); }
+#elif defined(USING_NEON)
+    SI F min_(F x, F y) { return (F)vminq_f32((float32x4_t)x, (float32x4_t)y); }
+    SI F max_(F x, F y) { return (F)vmaxq_f32((float32x4_t)x, (float32x4_t)y); }
+#else
+    SI F min_(F x, F y) { return if_then_else(x > y, y, x); }
+    SI F max_(F x, F y) { return if_then_else(x < y, y, x); }
+#endif
+
+SI F floor_(F x) {
+#if N == 1
+    return floorf_(x);
+#elif defined(USING_NEON_FP16)
+    return vrndmq_f16(x);
+#elif defined(__aarch64__)
+    return vrndmq_f32(x);
+#elif defined(USING_AVX512F)
+    // Clang's _mm512_floor_ps() passes its mask as -1, not (__mmask16)-1,
+    // and integer santizer catches that this implicit cast changes the
+    // value from -1 to 65535.  We'll cast manually to work around it.
+    // Read this as `return _mm512_floor_ps(x)`.
+    return _mm512_mask_floor_ps(x, (__mmask16)-1, x);
+#elif defined(USING_AVX)
+    return __builtin_ia32_roundps256(x, 0x01/*_MM_FROUND_FLOOR*/);
+#elif defined(__SSE4_1__)
+    return _mm_floor_ps(x);
+#else
+    // Round trip through integers with a truncating cast.
+    F roundtrip = cast<F>(cast<I32>(x));
+    // If x is negative, truncating gives the ceiling instead of the floor.
+    return roundtrip - if_then_else(roundtrip > x, F1, F0);
+
+    // This implementation fails for values of x that are outside
+    // the range an integer can represent.  We expect most x to be small.
+#endif
+}
+
+SI F approx_log2(F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    return x;
+#else
+    // The first approximation of log2(x) is its exponent 'e', minus 127.
+    I32 bits = bit_pun<I32>(x);
+
+    F e = cast<F>(bits) * (1.0f / (1<<23));
+
+    // If we use the mantissa too we can refine the error signficantly.
+    F m = bit_pun<F>( (bits & 0x007fffff) | 0x3f000000 );
+
+    return e - 124.225514990f
+             -   1.498030302f*m
+             -   1.725879990f/(0.3520887068f + m);
+#endif
+}
+
+SI F approx_log(F x) {
+    const float ln2 = 0.69314718f;
+    return ln2 * approx_log2(x);
+}
+
+SI F approx_exp2(F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    return x;
+#else
+    F fract = x - floor_(x);
+
+    F fbits = (1.0f * (1<<23)) * (x + 121.274057500f
+                                    -   1.490129070f*fract
+                                    +  27.728023300f/(4.84252568f - fract));
+    I32 bits = cast<I32>(min_(max_(fbits, F0), FInfBits));
+
+    return bit_pun<F>(bits);
+#endif
+}
+
+SI F approx_pow(F x, float y) {
+    return if_then_else((x == F0) | (x == F1), x
+                                             , approx_exp2(approx_log2(x) * y));
+}
+
+SI F approx_exp(F x) {
+    const float log2_e = 1.4426950408889634074f;
+    return approx_exp2(log2_e * x);
+}
+
+// Return tf(x).
+SI F apply_tf(const skcms_TransferFunction* tf, F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    (void)tf;
+    return x;
+#else
+    // Peel off the sign bit and set x = |x|.
+    U32 bits = bit_pun<U32>(x),
+        sign = bits & 0x80000000;
+    x = bit_pun<F>(bits ^ sign);
+
+    // The transfer function has a linear part up to d, exponential at d and after.
+    F v = if_then_else(x < tf->d,            tf->c*x + tf->f
+                                , approx_pow(tf->a*x + tf->b, tf->g) + tf->e);
+
+    // Tack the sign bit back on.
+    return bit_pun<F>(sign | bit_pun<U32>(v));
+#endif
+}
+
+SI F apply_pq(const skcms_TransferFunction* tf, F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    (void)tf;
+    return x;
+#else
+    U32 bits = bit_pun<U32>(x),
+        sign = bits & 0x80000000;
+    x = bit_pun<F>(bits ^ sign);
+
+    F v = approx_pow(max_(tf->a + tf->b * approx_pow(x, tf->c), F0)
+                       / (tf->d + tf->e * approx_pow(x, tf->c)),
+                     tf->f);
+
+    return bit_pun<F>(sign | bit_pun<U32>(v));
+#endif
+}
+
+SI F apply_hlg(const skcms_TransferFunction* tf, F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    (void)tf;
+    return x;
+#else
+    const float R = tf->a, G = tf->b,
+                a = tf->c, b = tf->d, c = tf->e,
+                K = tf->f + 1;
+    U32 bits = bit_pun<U32>(x),
+        sign = bits & 0x80000000;
+    x = bit_pun<F>(bits ^ sign);
+
+    F v = if_then_else(x*R <= 1, approx_pow(x*R, G)
+                               , approx_exp((x-c)*a) + b);
+
+    return K*bit_pun<F>(sign | bit_pun<U32>(v));
+#endif
+}
+
+SI F apply_hlginv(const skcms_TransferFunction* tf, F x) {
+#if defined(USING_NEON_FP16)
+    // TODO(mtklein)
+    (void)tf;
+    return x;
+#else
+    const float R = tf->a, G = tf->b,
+                a = tf->c, b = tf->d, c = tf->e,
+                K = tf->f + 1;
+    U32 bits = bit_pun<U32>(x),
+        sign = bits & 0x80000000;
+    x = bit_pun<F>(bits ^ sign);
+    x /= K;
+
+    F v = if_then_else(x <= 1, R * approx_pow(x, G)
+                             , a * approx_log(x - b) + c);
+
+    return bit_pun<F>(sign | bit_pun<U32>(v));
+#endif
+}
+
+
+// Strided loads and stores of N values, starting from p.
+template <typename T, typename P>
+SI T load_3(const P* p) {
+#if N == 1
+    return (T)p[0];
+#elif N == 4
+    return T{p[ 0],p[ 3],p[ 6],p[ 9]};
+#elif N == 8
+    return T{p[ 0],p[ 3],p[ 6],p[ 9], p[12],p[15],p[18],p[21]};
+#elif N == 16
+    return T{p[ 0],p[ 3],p[ 6],p[ 9], p[12],p[15],p[18],p[21],
+             p[24],p[27],p[30],p[33], p[36],p[39],p[42],p[45]};
+#endif
+}
+
+template <typename T, typename P>
+SI T load_4(const P* p) {
+#if N == 1
+    return (T)p[0];
+#elif N == 4
+    return T{p[ 0],p[ 4],p[ 8],p[12]};
+#elif N == 8
+    return T{p[ 0],p[ 4],p[ 8],p[12], p[16],p[20],p[24],p[28]};
+#elif N == 16
+    return T{p[ 0],p[ 4],p[ 8],p[12], p[16],p[20],p[24],p[28],
+             p[32],p[36],p[40],p[44], p[48],p[52],p[56],p[60]};
+#endif
+}
+
+template <typename T, typename P>
+SI void store_3(P* p, const T& v) {
+#if N == 1
+    p[0] = v;
+#elif N == 4
+    p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
+#elif N == 8
+    p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
+    p[12] = v[ 4]; p[15] = v[ 5]; p[18] = v[ 6]; p[21] = v[ 7];
+#elif N == 16
+    p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
+    p[12] = v[ 4]; p[15] = v[ 5]; p[18] = v[ 6]; p[21] = v[ 7];
+    p[24] = v[ 8]; p[27] = v[ 9]; p[30] = v[10]; p[33] = v[11];
+    p[36] = v[12]; p[39] = v[13]; p[42] = v[14]; p[45] = v[15];
+#endif
+}
+
+template <typename T, typename P>
+SI void store_4(P* p, const T& v) {
+#if N == 1
+    p[0] = v;
+#elif N == 4
+    p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
+#elif N == 8
+    p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
+    p[16] = v[ 4]; p[20] = v[ 5]; p[24] = v[ 6]; p[28] = v[ 7];
+#elif N == 16
+    p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
+    p[16] = v[ 4]; p[20] = v[ 5]; p[24] = v[ 6]; p[28] = v[ 7];
+    p[32] = v[ 8]; p[36] = v[ 9]; p[40] = v[10]; p[44] = v[11];
+    p[48] = v[12]; p[52] = v[13]; p[56] = v[14]; p[60] = v[15];
+#endif
+}
+
+
+SI U8 gather_8(const uint8_t* p, I32 ix) {
+#if N == 1
+    U8 v = p[ix];
+#elif N == 4
+    U8 v = { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]] };
+#elif N == 8
+    U8 v = { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
+             p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]] };
+#elif N == 16
+    U8 v = { p[ix[ 0]], p[ix[ 1]], p[ix[ 2]], p[ix[ 3]],
+             p[ix[ 4]], p[ix[ 5]], p[ix[ 6]], p[ix[ 7]],
+             p[ix[ 8]], p[ix[ 9]], p[ix[10]], p[ix[11]],
+             p[ix[12]], p[ix[13]], p[ix[14]], p[ix[15]] };
+#endif
+    return v;
+}
+
+SI U16 gather_16(const uint8_t* p, I32 ix) {
+    // Load the i'th 16-bit value from p.
+    auto load_16 = [p](int i) {
+        return load<uint16_t>(p + 2*i);
+    };
+#if N == 1
+    U16 v = load_16(ix);
+#elif N == 4
+    U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]) };
+#elif N == 8
+    U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]),
+              load_16(ix[4]), load_16(ix[5]), load_16(ix[6]), load_16(ix[7]) };
+#elif N == 16
+    U16 v = { load_16(ix[ 0]), load_16(ix[ 1]), load_16(ix[ 2]), load_16(ix[ 3]),
+              load_16(ix[ 4]), load_16(ix[ 5]), load_16(ix[ 6]), load_16(ix[ 7]),
+              load_16(ix[ 8]), load_16(ix[ 9]), load_16(ix[10]), load_16(ix[11]),
+              load_16(ix[12]), load_16(ix[13]), load_16(ix[14]), load_16(ix[15]) };
+#endif
+    return v;
+}
+
+SI U32 gather_32(const uint8_t* p, I32 ix) {
+    // Load the i'th 32-bit value from p.
+    auto load_32 = [p](int i) {
+        return load<uint32_t>(p + 4*i);
+    };
+#if N == 1
+    U32 v = load_32(ix);
+#elif N == 4
+    U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]) };
+#elif N == 8
+    U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]),
+              load_32(ix[4]), load_32(ix[5]), load_32(ix[6]), load_32(ix[7]) };
+#elif N == 16
+    U32 v = { load_32(ix[ 0]), load_32(ix[ 1]), load_32(ix[ 2]), load_32(ix[ 3]),
+              load_32(ix[ 4]), load_32(ix[ 5]), load_32(ix[ 6]), load_32(ix[ 7]),
+              load_32(ix[ 8]), load_32(ix[ 9]), load_32(ix[10]), load_32(ix[11]),
+              load_32(ix[12]), load_32(ix[13]), load_32(ix[14]), load_32(ix[15]) };
+#endif
+    // TODO: AVX2 and AVX-512 gathers (c.f. gather_24).
+    return v;
+}
+
+SI U32 gather_24(const uint8_t* p, I32 ix) {
+    // First, back up a byte.  Any place we're gathering from has a safe junk byte to read
+    // in front of it, either a previous table value, or some tag metadata.
+    p -= 1;
+
+    // Load the i'th 24-bit value from p, and 1 extra byte.
+    auto load_24_32 = [p](int i) {
+        return load<uint32_t>(p + 3*i);
+    };
+
+    // Now load multiples of 4 bytes (a junk byte, then r,g,b).
+#if N == 1
+    U32 v = load_24_32(ix);
+#elif N == 4
+    U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]) };
+#elif N == 8 && !defined(USING_AVX2)
+    U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]),
+              load_24_32(ix[4]), load_24_32(ix[5]), load_24_32(ix[6]), load_24_32(ix[7]) };
+#elif N == 8
+    (void)load_24_32;
+    // The gather instruction here doesn't need any particular alignment,
+    // but the intrinsic takes a const int*.
+    const int* p4 = bit_pun<const int*>(p);
+    I32 zero = { 0, 0, 0, 0,  0, 0, 0, 0},
+        mask = {-1,-1,-1,-1, -1,-1,-1,-1};
+    #if defined(__clang__)
+        U32 v = (U32)__builtin_ia32_gatherd_d256(zero, p4, 3*ix, mask, 1);
+    #elif defined(__GNUC__)
+        U32 v = (U32)__builtin_ia32_gathersiv8si(zero, p4, 3*ix, mask, 1);
+    #endif
+#elif N == 16
+    (void)load_24_32;
+    // The intrinsic is supposed to take const void* now, but it takes const int*, just like AVX2.
+    // And AVX-512 swapped the order of arguments.  :/
+    const int* p4 = bit_pun<const int*>(p);
+    U32 v = (U32)_mm512_i32gather_epi32((__m512i)(3*ix), p4, 1);
+#endif
+
+    // Shift off the junk byte, leaving r,g,b in low 24 bits (and zero in the top 8).
+    return v >> 8;
+}
+
+#if !defined(__arm__)
+    SI void gather_48(const uint8_t* p, I32 ix, U64* v) {
+        // As in gather_24(), with everything doubled.
+        p -= 2;
+
+        // Load the i'th 48-bit value from p, and 2 extra bytes.
+        auto load_48_64 = [p](int i) {
+            return load<uint64_t>(p + 6*i);
+        };
+
+    #if N == 1
+        *v = load_48_64(ix);
+    #elif N == 4
+        *v = U64{
+            load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
+        };
+    #elif N == 8 && !defined(USING_AVX2)
+        *v = U64{
+            load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
+            load_48_64(ix[4]), load_48_64(ix[5]), load_48_64(ix[6]), load_48_64(ix[7]),
+        };
+    #elif N == 8
+        (void)load_48_64;
+        typedef int32_t   __attribute__((vector_size(16))) Half_I32;
+        typedef long long __attribute__((vector_size(32))) Half_I64;
+
+        // The gather instruction here doesn't need any particular alignment,
+        // but the intrinsic takes a const long long*.
+        const long long int* p8 = bit_pun<const long long int*>(p);
+
+        Half_I64 zero = { 0, 0, 0, 0},
+                 mask = {-1,-1,-1,-1};
+
+        ix *= 6;
+        Half_I32 ix_lo = { ix[0], ix[1], ix[2], ix[3] },
+                 ix_hi = { ix[4], ix[5], ix[6], ix[7] };
+
+        #if defined(__clang__)
+            Half_I64 lo = (Half_I64)__builtin_ia32_gatherd_q256(zero, p8, ix_lo, mask, 1),
+                     hi = (Half_I64)__builtin_ia32_gatherd_q256(zero, p8, ix_hi, mask, 1);
+        #elif defined(__GNUC__)
+            Half_I64 lo = (Half_I64)__builtin_ia32_gathersiv4di(zero, p8, ix_lo, mask, 1),
+                     hi = (Half_I64)__builtin_ia32_gathersiv4di(zero, p8, ix_hi, mask, 1);
+        #endif
+        store((char*)v +  0, lo);
+        store((char*)v + 32, hi);
+    #elif N == 16
+        (void)load_48_64;
+        const long long int* p8 = bit_pun<const long long int*>(p);
+        __m512i lo = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 0), p8, 1),
+                hi = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 1), p8, 1);
+        store((char*)v +  0, lo);
+        store((char*)v + 64, hi);
+    #endif
+
+        *v >>= 16;
+    }
+#endif
+
+SI F F_from_U8(U8 v) {
+    return cast<F>(v) * (1/255.0f);
+}
+
+SI F F_from_U16_BE(U16 v) {
+    // All 16-bit ICC values are big-endian, so we byte swap before converting to float.
+    // MSVC catches the "loss" of data here in the portable path, so we also make sure to mask.
+    U16 lo = (v >> 8),
+        hi = (v << 8) & 0xffff;
+    return cast<F>(lo|hi) * (1/65535.0f);
+}
+
+SI U16 U16_from_F(F v) {
+    // 65535 == inf in FP16, so promote to FP32 before converting.
+    return cast<U16>(cast<V<float>>(v) * 65535 + 0.5f);
+}
+
+SI F minus_1_ulp(F v) {
+#if defined(USING_NEON_FP16)
+    return bit_pun<F>( bit_pun<U16>(v) - 1 );
+#else
+    return bit_pun<F>( bit_pun<U32>(v) - 1 );
+#endif
+}
+
+SI F table(const skcms_Curve* curve, F v) {
+    // Clamp the input to [0,1], then scale to a table index.
+    F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
+
+    // We'll look up (equal or adjacent) entries at lo and hi, then lerp by t between the two.
+    I32 lo = cast<I32>(            ix      ),
+        hi = cast<I32>(minus_1_ulp(ix+1.0f));
+    F t = ix - cast<F>(lo);  // i.e. the fractional part of ix.
+
+    // TODO: can we load l and h simultaneously?  Each entry in 'h' is either
+    // the same as in 'l' or adjacent.  We have a rough idea that's it'd always be safe
+    // to read adjacent entries and perhaps underflow the table by a byte or two
+    // (it'd be junk, but always safe to read).  Not sure how to lerp yet.
+    F l,h;
+    if (curve->table_8) {
+        l = F_from_U8(gather_8(curve->table_8, lo));
+        h = F_from_U8(gather_8(curve->table_8, hi));
+    } else {
+        l = F_from_U16_BE(gather_16(curve->table_16, lo));
+        h = F_from_U16_BE(gather_16(curve->table_16, hi));
+    }
+    return l + (h-l)*t;
+}
+
+SI void sample_clut_8(const uint8_t* grid_8, I32 ix, F* r, F* g, F* b) {
+    U32 rgb = gather_24(grid_8, ix);
+
+    *r = cast<F>((rgb >>  0) & 0xff) * (1/255.0f);
+    *g = cast<F>((rgb >>  8) & 0xff) * (1/255.0f);
+    *b = cast<F>((rgb >> 16) & 0xff) * (1/255.0f);
+}
+
+SI void sample_clut_8(const uint8_t* grid_8, I32 ix, F* r, F* g, F* b, F* a) {
+    // TODO: don't forget to optimize gather_32().
+    U32 rgba = gather_32(grid_8, ix);
+
+    *r = cast<F>((rgba >>  0) & 0xff) * (1/255.0f);
+    *g = cast<F>((rgba >>  8) & 0xff) * (1/255.0f);
+    *b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
+    *a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
+}
+
+SI void sample_clut_16(const uint8_t* grid_16, I32 ix, F* r, F* g, F* b) {
+#if defined(__arm__)
+    // This is up to 2x faster on 32-bit ARM than the #else-case fast path.
+    *r = F_from_U16_BE(gather_16(grid_16, 3*ix+0));
+    *g = F_from_U16_BE(gather_16(grid_16, 3*ix+1));
+    *b = F_from_U16_BE(gather_16(grid_16, 3*ix+2));
+#else
+    // This strategy is much faster for 64-bit builds, and fine for 32-bit x86 too.
+    U64 rgb;
+    gather_48(grid_16, ix, &rgb);
+    rgb = swap_endian_16x4(rgb);
+
+    *r = cast<F>((rgb >>  0) & 0xffff) * (1/65535.0f);
+    *g = cast<F>((rgb >> 16) & 0xffff) * (1/65535.0f);
+    *b = cast<F>((rgb >> 32) & 0xffff) * (1/65535.0f);
+#endif
+}
+
+SI void sample_clut_16(const uint8_t* grid_16, I32 ix, F* r, F* g, F* b, F* a) {
+    // TODO: gather_64()-based fast path?
+    *r = F_from_U16_BE(gather_16(grid_16, 4*ix+0));
+    *g = F_from_U16_BE(gather_16(grid_16, 4*ix+1));
+    *b = F_from_U16_BE(gather_16(grid_16, 4*ix+2));
+    *a = F_from_U16_BE(gather_16(grid_16, 4*ix+3));
+}
+
+static void clut(uint32_t input_channels, uint32_t output_channels,
+                 const uint8_t grid_points[4], const uint8_t* grid_8, const uint8_t* grid_16,
+                 F* r, F* g, F* b, F* a) {
+
+    const int dim = (int)input_channels;
+    assert (0 < dim && dim <= 4);
+    assert (output_channels == 3 ||
+            output_channels == 4);
+
+    // For each of these arrays, think foo[2*dim], but we use foo[8] since we know dim <= 4.
+    I32 index [8];  // Index contribution by dimension, first low from 0, then high from 4.
+    F   weight[8];  // Weight for each contribution, again first low, then high.
+
+    // O(dim) work first: calculate index,weight from r,g,b,a.
+    const F inputs[] = { *r,*g,*b,*a };
+    for (int i = dim-1, stride = 1; i >= 0; i--) {
+        // x is where we logically want to sample the grid in the i-th dimension.
+        F x = inputs[i] * (float)(grid_points[i] - 1);
+
+        // But we can't index at floats.  lo and hi are the two integer grid points surrounding x.
+        I32 lo = cast<I32>(            x      ),   // i.e. trunc(x) == floor(x) here.
+            hi = cast<I32>(minus_1_ulp(x+1.0f));
+        // Notice how we fold in the accumulated stride across previous dimensions here.
+        index[i+0] = lo * stride;
+        index[i+4] = hi * stride;
+        stride *= grid_points[i];
+
+        // We'll interpolate between those two integer grid points by t.
+        F t = x - cast<F>(lo);  // i.e. fract(x)
+        weight[i+0] = 1-t;
+        weight[i+4] = t;
+    }
+
+    *r = *g = *b = F0;
+    if (output_channels == 4) {
+        *a = F0;
+    }
+
+    // We'll sample 2^dim == 1<<dim table entries per pixel,
+    // in all combinations of low and high in each dimension.
+    for (int combo = 0; combo < (1<<dim); combo++) {  // This loop can be done in any order.
+
+        // Each of these upcoming (combo&N)*K expressions here evaluates to 0 or 4,
+        // where 0 selects the low index contribution and its weight 1-t,
+        // or 4 the high index contribution and its weight t.
+
+        // Since 0<dim≤4, we can always just start off with the 0-th channel,
+        // then handle the others conditionally.
+        I32 ix = index [0 + (combo&1)*4];
+        F    w = weight[0 + (combo&1)*4];
+
+        switch ((dim-1)&3) {  // This lets the compiler know there are no other cases to handle.
+            case 3: ix += index [3 + (combo&8)/2];
+                    w  *= weight[3 + (combo&8)/2];
+                    FALLTHROUGH;
+                    // fall through
+
+            case 2: ix += index [2 + (combo&4)*1];
+                    w  *= weight[2 + (combo&4)*1];
+                    FALLTHROUGH;
+                    // fall through
+
+            case 1: ix += index [1 + (combo&2)*2];
+                    w  *= weight[1 + (combo&2)*2];
+        }
+
+        F R,G,B,A=F0;
+        if (output_channels == 3) {
+            if (grid_8) { sample_clut_8 (grid_8 ,ix, &R,&G,&B); }
+            else        { sample_clut_16(grid_16,ix, &R,&G,&B); }
+        } else {
+            if (grid_8) { sample_clut_8 (grid_8 ,ix, &R,&G,&B,&A); }
+            else        { sample_clut_16(grid_16,ix, &R,&G,&B,&A); }
+        }
+        *r += w*R;
+        *g += w*G;
+        *b += w*B;
+        *a += w*A;
+    }
+}
+
+static void clut(const skcms_A2B* a2b, F* r, F* g, F* b, F a) {
+    clut(a2b->input_channels, a2b->output_channels,
+         a2b->grid_points, a2b->grid_8, a2b->grid_16,
+         r,g,b,&a);
+}
+static void clut(const skcms_B2A* b2a, F* r, F* g, F* b, F* a) {
+    clut(b2a->input_channels, b2a->output_channels,
+         b2a->grid_points, b2a->grid_8, b2a->grid_16,
+         r,g,b,a);
+}
+
+static void exec_ops(const Op* ops, const void** args,
+                     const char* src, char* dst, int i) {
+    F r = F0, g = F0, b = F0, a = F1;
+    while (true) {
+        switch (*ops++) {
+            case Op_load_a8:{
+                a = F_from_U8(load<U8>(src + 1*i));
+            } break;
+
+            case Op_load_g8:{
+                r = g = b = F_from_U8(load<U8>(src + 1*i));
+            } break;
+
+            case Op_load_4444:{
+                U16 abgr = load<U16>(src + 2*i);
+
+                r = cast<F>((abgr >> 12) & 0xf) * (1/15.0f);
+                g = cast<F>((abgr >>  8) & 0xf) * (1/15.0f);
+                b = cast<F>((abgr >>  4) & 0xf) * (1/15.0f);
+                a = cast<F>((abgr >>  0) & 0xf) * (1/15.0f);
+            } break;
+
+            case Op_load_565:{
+                U16 rgb = load<U16>(src + 2*i);
+
+                r = cast<F>(rgb & (uint16_t)(31<< 0)) * (1.0f / (31<< 0));
+                g = cast<F>(rgb & (uint16_t)(63<< 5)) * (1.0f / (63<< 5));
+                b = cast<F>(rgb & (uint16_t)(31<<11)) * (1.0f / (31<<11));
+            } break;
+
+            case Op_load_888:{
+                const uint8_t* rgb = (const uint8_t*)(src + 3*i);
+            #if defined(USING_NEON_FP16)
+                // See the explanation under USING_NEON below.  This is that doubled up.
+                uint8x16x3_t v = {{ vdupq_n_u8(0), vdupq_n_u8(0), vdupq_n_u8(0) }};
+                v = vld3q_lane_u8(rgb+ 0, v,  0);
+                v = vld3q_lane_u8(rgb+ 3, v,  2);
+                v = vld3q_lane_u8(rgb+ 6, v,  4);
+                v = vld3q_lane_u8(rgb+ 9, v,  6);
+
+                v = vld3q_lane_u8(rgb+12, v,  8);
+                v = vld3q_lane_u8(rgb+15, v, 10);
+                v = vld3q_lane_u8(rgb+18, v, 12);
+                v = vld3q_lane_u8(rgb+21, v, 14);
+
+                r = cast<F>((U16)v.val[0]) * (1/255.0f);
+                g = cast<F>((U16)v.val[1]) * (1/255.0f);
+                b = cast<F>((U16)v.val[2]) * (1/255.0f);
+            #elif defined(USING_NEON)
+                // There's no uint8x4x3_t or vld3 load for it, so we'll load each rgb pixel one at
+                // a time.  Since we're doing that, we might as well load them into 16-bit lanes.
+                // (We'd even load into 32-bit lanes, but that's not possible on ARMv7.)
+                uint8x8x3_t v = {{ vdup_n_u8(0), vdup_n_u8(0), vdup_n_u8(0) }};
+                v = vld3_lane_u8(rgb+0, v, 0);
+                v = vld3_lane_u8(rgb+3, v, 2);
+                v = vld3_lane_u8(rgb+6, v, 4);
+                v = vld3_lane_u8(rgb+9, v, 6);
+
+                // Now if we squint, those 3 uint8x8_t we constructed are really U16s, easy to
+                // convert to F.  (Again, U32 would be even better here if drop ARMv7 or split
+                // ARMv7 and ARMv8 impls.)
+                r = cast<F>((U16)v.val[0]) * (1/255.0f);
+                g = cast<F>((U16)v.val[1]) * (1/255.0f);
+                b = cast<F>((U16)v.val[2]) * (1/255.0f);
+            #else
+                r = cast<F>(load_3<U32>(rgb+0) ) * (1/255.0f);
+                g = cast<F>(load_3<U32>(rgb+1) ) * (1/255.0f);
+                b = cast<F>(load_3<U32>(rgb+2) ) * (1/255.0f);
+            #endif
+            } break;
+
+            case Op_load_8888:{
+                U32 rgba = load<U32>(src + 4*i);
+
+                r = cast<F>((rgba >>  0) & 0xff) * (1/255.0f);
+                g = cast<F>((rgba >>  8) & 0xff) * (1/255.0f);
+                b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
+                a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
+            } break;
+
+            case Op_load_8888_palette8:{
+                const uint8_t* palette = (const uint8_t*) *args++;
+                I32 ix = cast<I32>(load<U8>(src + 1*i));
+                U32 rgba = gather_32(palette, ix);
+
+                r = cast<F>((rgba >>  0) & 0xff) * (1/255.0f);
+                g = cast<F>((rgba >>  8) & 0xff) * (1/255.0f);
+                b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
+                a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
+            } break;
+
+            case Op_load_1010102:{
+                U32 rgba = load<U32>(src + 4*i);
+
+                r = cast<F>((rgba >>  0) & 0x3ff) * (1/1023.0f);
+                g = cast<F>((rgba >> 10) & 0x3ff) * (1/1023.0f);
+                b = cast<F>((rgba >> 20) & 0x3ff) * (1/1023.0f);
+                a = cast<F>((rgba >> 30) & 0x3  ) * (1/   3.0f);
+            } break;
+
+            case Op_load_161616LE:{
+                uintptr_t ptr = (uintptr_t)(src + 6*i);
+                assert( (ptr & 1) == 0 );                   // src must be 2-byte aligned for this
+                const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = vld3q_u16(rgb);
+                r = cast<F>((U16)v.val[0]) * (1/65535.0f);
+                g = cast<F>((U16)v.val[1]) * (1/65535.0f);
+                b = cast<F>((U16)v.val[2]) * (1/65535.0f);
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = vld3_u16(rgb);
+                r = cast<F>((U16)v.val[0]) * (1/65535.0f);
+                g = cast<F>((U16)v.val[1]) * (1/65535.0f);
+                b = cast<F>((U16)v.val[2]) * (1/65535.0f);
+            #else
+                r = cast<F>(load_3<U32>(rgb+0)) * (1/65535.0f);
+                g = cast<F>(load_3<U32>(rgb+1)) * (1/65535.0f);
+                b = cast<F>(load_3<U32>(rgb+2)) * (1/65535.0f);
+            #endif
+            } break;
+
+            case Op_load_16161616LE:{
+                uintptr_t ptr = (uintptr_t)(src + 8*i);
+                assert( (ptr & 1) == 0 );                    // src must be 2-byte aligned for this
+                const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = vld4q_u16(rgba);
+                r = cast<F>((U16)v.val[0]) * (1/65535.0f);
+                g = cast<F>((U16)v.val[1]) * (1/65535.0f);
+                b = cast<F>((U16)v.val[2]) * (1/65535.0f);
+                a = cast<F>((U16)v.val[3]) * (1/65535.0f);
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = vld4_u16(rgba);
+                r = cast<F>((U16)v.val[0]) * (1/65535.0f);
+                g = cast<F>((U16)v.val[1]) * (1/65535.0f);
+                b = cast<F>((U16)v.val[2]) * (1/65535.0f);
+                a = cast<F>((U16)v.val[3]) * (1/65535.0f);
+            #else
+                U64 px = load<U64>(rgba);
+
+                r = cast<F>((px >>  0) & 0xffff) * (1/65535.0f);
+                g = cast<F>((px >> 16) & 0xffff) * (1/65535.0f);
+                b = cast<F>((px >> 32) & 0xffff) * (1/65535.0f);
+                a = cast<F>((px >> 48) & 0xffff) * (1/65535.0f);
+            #endif
+            } break;
+
+            case Op_load_161616BE:{
+                uintptr_t ptr = (uintptr_t)(src + 6*i);
+                assert( (ptr & 1) == 0 );                   // src must be 2-byte aligned for this
+                const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = vld3q_u16(rgb);
+                r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
+                g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
+                b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = vld3_u16(rgb);
+                r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
+                g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
+                b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
+            #else
+                U32 R = load_3<U32>(rgb+0),
+                    G = load_3<U32>(rgb+1),
+                    B = load_3<U32>(rgb+2);
+                // R,G,B are big-endian 16-bit, so byte swap them before converting to float.
+                r = cast<F>((R & 0x00ff)<<8 | (R & 0xff00)>>8) * (1/65535.0f);
+                g = cast<F>((G & 0x00ff)<<8 | (G & 0xff00)>>8) * (1/65535.0f);
+                b = cast<F>((B & 0x00ff)<<8 | (B & 0xff00)>>8) * (1/65535.0f);
+            #endif
+            } break;
+
+            case Op_load_16161616BE:{
+                uintptr_t ptr = (uintptr_t)(src + 8*i);
+                assert( (ptr & 1) == 0 );                    // src must be 2-byte aligned for this
+                const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = vld4q_u16(rgba);
+                r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
+                g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
+                b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
+                a = cast<F>(swap_endian_16((U16)v.val[3])) * (1/65535.0f);
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = vld4_u16(rgba);
+                r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
+                g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
+                b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
+                a = cast<F>(swap_endian_16((U16)v.val[3])) * (1/65535.0f);
+            #else
+                U64 px = swap_endian_16x4(load<U64>(rgba));
+
+                r = cast<F>((px >>  0) & 0xffff) * (1/65535.0f);
+                g = cast<F>((px >> 16) & 0xffff) * (1/65535.0f);
+                b = cast<F>((px >> 32) & 0xffff) * (1/65535.0f);
+                a = cast<F>((px >> 48) & 0xffff) * (1/65535.0f);
+            #endif
+            } break;
+
+            case Op_load_hhh:{
+                uintptr_t ptr = (uintptr_t)(src + 6*i);
+                assert( (ptr & 1) == 0 );                   // src must be 2-byte aligned for this
+                const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = vld3q_u16(rgb);
+                U16 R = (U16)v.val[0],
+                    G = (U16)v.val[1],
+                    B = (U16)v.val[2];
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = vld3_u16(rgb);
+                U16 R = (U16)v.val[0],
+                    G = (U16)v.val[1],
+                    B = (U16)v.val[2];
+            #else
+                U16 R = load_3<U16>(rgb+0),
+                    G = load_3<U16>(rgb+1),
+                    B = load_3<U16>(rgb+2);
+            #endif
+                r = F_from_Half(R);
+                g = F_from_Half(G);
+                b = F_from_Half(B);
+            } break;
+
+            case Op_load_hhhh:{
+                uintptr_t ptr = (uintptr_t)(src + 8*i);
+                assert( (ptr & 1) == 0 );                    // src must be 2-byte aligned for this
+                const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = vld4q_u16(rgba);
+                U16 R = (U16)v.val[0],
+                    G = (U16)v.val[1],
+                    B = (U16)v.val[2],
+                    A = (U16)v.val[3];
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = vld4_u16(rgba);
+                U16 R = (U16)v.val[0],
+                    G = (U16)v.val[1],
+                    B = (U16)v.val[2],
+                    A = (U16)v.val[3];
+            #else
+                U64 px = load<U64>(rgba);
+                U16 R = cast<U16>((px >>  0) & 0xffff),
+                    G = cast<U16>((px >> 16) & 0xffff),
+                    B = cast<U16>((px >> 32) & 0xffff),
+                    A = cast<U16>((px >> 48) & 0xffff);
+            #endif
+                r = F_from_Half(R);
+                g = F_from_Half(G);
+                b = F_from_Half(B);
+                a = F_from_Half(A);
+            } break;
+
+            case Op_load_fff:{
+                uintptr_t ptr = (uintptr_t)(src + 12*i);
+                assert( (ptr & 3) == 0 );                   // src must be 4-byte aligned for this
+                const float* rgb = (const float*)ptr;       // cast to const float* to be safe.
+            #if defined(USING_NEON_FP16)
+                float32x4x3_t lo = vld3q_f32(rgb +  0),
+                              hi = vld3q_f32(rgb + 12);
+                r = (F)vcombine_f16(vcvt_f16_f32(lo.val[0]), vcvt_f16_f32(hi.val[0]));
+                g = (F)vcombine_f16(vcvt_f16_f32(lo.val[1]), vcvt_f16_f32(hi.val[1]));
+                b = (F)vcombine_f16(vcvt_f16_f32(lo.val[2]), vcvt_f16_f32(hi.val[2]));
+            #elif defined(USING_NEON)
+                float32x4x3_t v = vld3q_f32(rgb);
+                r = (F)v.val[0];
+                g = (F)v.val[1];
+                b = (F)v.val[2];
+            #else
+                r = load_3<F>(rgb+0);
+                g = load_3<F>(rgb+1);
+                b = load_3<F>(rgb+2);
+            #endif
+            } break;
+
+            case Op_load_ffff:{
+                uintptr_t ptr = (uintptr_t)(src + 16*i);
+                assert( (ptr & 3) == 0 );                   // src must be 4-byte aligned for this
+                const float* rgba = (const float*)ptr;      // cast to const float* to be safe.
+            #if defined(USING_NEON_FP16)
+                float32x4x4_t lo = vld4q_f32(rgba +  0),
+                              hi = vld4q_f32(rgba + 16);
+                r = (F)vcombine_f16(vcvt_f16_f32(lo.val[0]), vcvt_f16_f32(hi.val[0]));
+                g = (F)vcombine_f16(vcvt_f16_f32(lo.val[1]), vcvt_f16_f32(hi.val[1]));
+                b = (F)vcombine_f16(vcvt_f16_f32(lo.val[2]), vcvt_f16_f32(hi.val[2]));
+                a = (F)vcombine_f16(vcvt_f16_f32(lo.val[3]), vcvt_f16_f32(hi.val[3]));
+            #elif defined(USING_NEON)
+                float32x4x4_t v = vld4q_f32(rgba);
+                r = (F)v.val[0];
+                g = (F)v.val[1];
+                b = (F)v.val[2];
+                a = (F)v.val[3];
+            #else
+                r = load_4<F>(rgba+0);
+                g = load_4<F>(rgba+1);
+                b = load_4<F>(rgba+2);
+                a = load_4<F>(rgba+3);
+            #endif
+            } break;
+
+            case Op_swap_rb:{
+                F t = r;
+                r = b;
+                b = t;
+            } break;
+
+            case Op_clamp:{
+                r = max_(F0, min_(r, F1));
+                g = max_(F0, min_(g, F1));
+                b = max_(F0, min_(b, F1));
+                a = max_(F0, min_(a, F1));
+            } break;
+
+            case Op_invert:{
+                r = F1 - r;
+                g = F1 - g;
+                b = F1 - b;
+                a = F1 - a;
+            } break;
+
+            case Op_force_opaque:{
+                a = F1;
+            } break;
+
+            case Op_premul:{
+                r *= a;
+                g *= a;
+                b *= a;
+            } break;
+
+            case Op_unpremul:{
+                F scale = if_then_else(F1 / a < INFINITY_, F1 / a, F0);
+                r *= scale;
+                g *= scale;
+                b *= scale;
+            } break;
+
+            case Op_matrix_3x3:{
+                const skcms_Matrix3x3* matrix = (const skcms_Matrix3x3*) *args++;
+                const float* m = &matrix->vals[0][0];
+
+                F R = m[0]*r + m[1]*g + m[2]*b,
+                  G = m[3]*r + m[4]*g + m[5]*b,
+                  B = m[6]*r + m[7]*g + m[8]*b;
+
+                r = R;
+                g = G;
+                b = B;
+            } break;
+
+            case Op_matrix_3x4:{
+                const skcms_Matrix3x4* matrix = (const skcms_Matrix3x4*) *args++;
+                const float* m = &matrix->vals[0][0];
+
+                F R = m[0]*r + m[1]*g + m[ 2]*b + m[ 3],
+                  G = m[4]*r + m[5]*g + m[ 6]*b + m[ 7],
+                  B = m[8]*r + m[9]*g + m[10]*b + m[11];
+
+                r = R;
+                g = G;
+                b = B;
+            } break;
+
+            case Op_lab_to_xyz:{
+                // The L*a*b values are in r,g,b, but normalized to [0,1].  Reconstruct them:
+                F L = r * 100.0f,
+                  A = g * 255.0f - 128.0f,
+                  B = b * 255.0f - 128.0f;
+
+                // Convert to CIE XYZ.
+                F Y = (L + 16.0f) * (1/116.0f),
+                  X = Y + A*(1/500.0f),
+                  Z = Y - B*(1/200.0f);
+
+                X = if_then_else(X*X*X > 0.008856f, X*X*X, (X - (16/116.0f)) * (1/7.787f));
+                Y = if_then_else(Y*Y*Y > 0.008856f, Y*Y*Y, (Y - (16/116.0f)) * (1/7.787f));
+                Z = if_then_else(Z*Z*Z > 0.008856f, Z*Z*Z, (Z - (16/116.0f)) * (1/7.787f));
+
+                // Adjust to XYZD50 illuminant, and stuff back into r,g,b for the next op.
+                r = X * 0.9642f;
+                g = Y          ;
+                b = Z * 0.8249f;
+            } break;
+
+            // As above, in reverse.
+            case Op_xyz_to_lab:{
+                F X = r * (1/0.9642f),
+                  Y = g,
+                  Z = b * (1/0.8249f);
+
+                X = if_then_else(X > 0.008856f, approx_pow(X, 1/3.0f), X*7.787f + (16/116.0f));
+                Y = if_then_else(Y > 0.008856f, approx_pow(Y, 1/3.0f), Y*7.787f + (16/116.0f));
+                Z = if_then_else(Z > 0.008856f, approx_pow(Z, 1/3.0f), Z*7.787f + (16/116.0f));
+
+                F L = Y*116.0f - 16.0f,
+                  A = (X-Y)*500.0f,
+                  B = (Y-Z)*200.0f;
+
+                r = L * (1/100.f);
+                g = (A + 128.0f) * (1/255.0f);
+                b = (B + 128.0f) * (1/255.0f);
+            } break;
+
+            case Op_tf_r:{ r = apply_tf((const skcms_TransferFunction*)*args++, r); } break;
+            case Op_tf_g:{ g = apply_tf((const skcms_TransferFunction*)*args++, g); } break;
+            case Op_tf_b:{ b = apply_tf((const skcms_TransferFunction*)*args++, b); } break;
+            case Op_tf_a:{ a = apply_tf((const skcms_TransferFunction*)*args++, a); } break;
+
+            case Op_pq_r:{ r = apply_pq((const skcms_TransferFunction*)*args++, r); } break;
+            case Op_pq_g:{ g = apply_pq((const skcms_TransferFunction*)*args++, g); } break;
+            case Op_pq_b:{ b = apply_pq((const skcms_TransferFunction*)*args++, b); } break;
+            case Op_pq_a:{ a = apply_pq((const skcms_TransferFunction*)*args++, a); } break;
+
+            case Op_hlg_r:{ r = apply_hlg((const skcms_TransferFunction*)*args++, r); } break;
+            case Op_hlg_g:{ g = apply_hlg((const skcms_TransferFunction*)*args++, g); } break;
+            case Op_hlg_b:{ b = apply_hlg((const skcms_TransferFunction*)*args++, b); } break;
+            case Op_hlg_a:{ a = apply_hlg((const skcms_TransferFunction*)*args++, a); } break;
+
+            case Op_hlginv_r:{ r = apply_hlginv((const skcms_TransferFunction*)*args++, r); } break;
+            case Op_hlginv_g:{ g = apply_hlginv((const skcms_TransferFunction*)*args++, g); } break;
+            case Op_hlginv_b:{ b = apply_hlginv((const skcms_TransferFunction*)*args++, b); } break;
+            case Op_hlginv_a:{ a = apply_hlginv((const skcms_TransferFunction*)*args++, a); } break;
+
+            case Op_table_r: { r = table((const skcms_Curve*)*args++, r); } break;
+            case Op_table_g: { g = table((const skcms_Curve*)*args++, g); } break;
+            case Op_table_b: { b = table((const skcms_Curve*)*args++, b); } break;
+            case Op_table_a: { a = table((const skcms_Curve*)*args++, a); } break;
+
+            case Op_clut_A2B: {
+                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
+                clut(a2b, &r,&g,&b,a);
+
+                if (a2b->input_channels == 4) {
+                    // CMYK is opaque.
+                    a = F1;
+                }
+            } break;
+
+            case Op_clut_B2A: {
+                const skcms_B2A* b2a = (const skcms_B2A*) *args++;
+                clut(b2a, &r,&g,&b,&a);
+            } break;
+
+    // Notice, from here on down the store_ ops all return, ending the loop.
+
+            case Op_store_a8: {
+                store(dst + 1*i, cast<U8>(to_fixed(a * 255)));
+            } return;
+
+            case Op_store_g8: {
+                // g should be holding luminance (Y) (r,g,b ~~~> X,Y,Z)
+                store(dst + 1*i, cast<U8>(to_fixed(g * 255)));
+            } return;
+
+            case Op_store_4444: {
+                store<U16>(dst + 2*i, cast<U16>(to_fixed(r * 15) << 12)
+                                    | cast<U16>(to_fixed(g * 15) <<  8)
+                                    | cast<U16>(to_fixed(b * 15) <<  4)
+                                    | cast<U16>(to_fixed(a * 15) <<  0));
+            } return;
+
+            case Op_store_565: {
+                store<U16>(dst + 2*i, cast<U16>(to_fixed(r * 31) <<  0 )
+                                    | cast<U16>(to_fixed(g * 63) <<  5 )
+                                    | cast<U16>(to_fixed(b * 31) << 11 ));
+            } return;
+
+            case Op_store_888: {
+                uint8_t* rgb = (uint8_t*)dst + 3*i;
+            #if defined(USING_NEON_FP16)
+                // See the explanation under USING_NEON below.  This is that doubled up.
+                U16 R = to_fixed(r * 255),
+                    G = to_fixed(g * 255),
+                    B = to_fixed(b * 255);
+
+                uint8x16x3_t v = {{ (uint8x16_t)R, (uint8x16_t)G, (uint8x16_t)B }};
+                vst3q_lane_u8(rgb+ 0, v,  0);
+                vst3q_lane_u8(rgb+ 3, v,  2);
+                vst3q_lane_u8(rgb+ 6, v,  4);
+                vst3q_lane_u8(rgb+ 9, v,  6);
+
+                vst3q_lane_u8(rgb+12, v,  8);
+                vst3q_lane_u8(rgb+15, v, 10);
+                vst3q_lane_u8(rgb+18, v, 12);
+                vst3q_lane_u8(rgb+21, v, 14);
+            #elif defined(USING_NEON)
+                // Same deal as load_888 but in reverse... we'll store using uint8x8x3_t, but
+                // get there via U16 to save some instructions converting to float.  And just
+                // like load_888, we'd prefer to go via U32 but for ARMv7 support.
+                U16 R = cast<U16>(to_fixed(r * 255)),
+                    G = cast<U16>(to_fixed(g * 255)),
+                    B = cast<U16>(to_fixed(b * 255));
+
+                uint8x8x3_t v = {{ (uint8x8_t)R, (uint8x8_t)G, (uint8x8_t)B }};
+                vst3_lane_u8(rgb+0, v, 0);
+                vst3_lane_u8(rgb+3, v, 2);
+                vst3_lane_u8(rgb+6, v, 4);
+                vst3_lane_u8(rgb+9, v, 6);
+            #else
+                store_3(rgb+0, cast<U8>(to_fixed(r * 255)) );
+                store_3(rgb+1, cast<U8>(to_fixed(g * 255)) );
+                store_3(rgb+2, cast<U8>(to_fixed(b * 255)) );
+            #endif
+            } return;
+
+            case Op_store_8888: {
+                store(dst + 4*i, cast<U32>(to_fixed(r * 255)) <<  0
+                               | cast<U32>(to_fixed(g * 255)) <<  8
+                               | cast<U32>(to_fixed(b * 255)) << 16
+                               | cast<U32>(to_fixed(a * 255)) << 24);
+            } return;
+
+            case Op_store_1010102: {
+                store(dst + 4*i, cast<U32>(to_fixed(r * 1023)) <<  0
+                               | cast<U32>(to_fixed(g * 1023)) << 10
+                               | cast<U32>(to_fixed(b * 1023)) << 20
+                               | cast<U32>(to_fixed(a *    3)) << 30);
+            } return;
+
+            case Op_store_161616LE: {
+                uintptr_t ptr = (uintptr_t)(dst + 6*i);
+                assert( (ptr & 1) == 0 );                // The dst pointer must be 2-byte aligned
+                uint16_t* rgb = (uint16_t*)ptr;          // for this cast to uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = {{
+                    (uint16x8_t)U16_from_F(r),
+                    (uint16x8_t)U16_from_F(g),
+                    (uint16x8_t)U16_from_F(b),
+                }};
+                vst3q_u16(rgb, v);
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = {{
+                    (uint16x4_t)U16_from_F(r),
+                    (uint16x4_t)U16_from_F(g),
+                    (uint16x4_t)U16_from_F(b),
+                }};
+                vst3_u16(rgb, v);
+            #else
+                store_3(rgb+0, U16_from_F(r));
+                store_3(rgb+1, U16_from_F(g));
+                store_3(rgb+2, U16_from_F(b));
+            #endif
+
+            } return;
+
+            case Op_store_16161616LE: {
+                uintptr_t ptr = (uintptr_t)(dst + 8*i);
+                assert( (ptr & 1) == 0 );               // The dst pointer must be 2-byte aligned
+                uint16_t* rgba = (uint16_t*)ptr;        // for this cast to uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = {{
+                    (uint16x8_t)U16_from_F(r),
+                    (uint16x8_t)U16_from_F(g),
+                    (uint16x8_t)U16_from_F(b),
+                    (uint16x8_t)U16_from_F(a),
+                }};
+                vst4q_u16(rgba, v);
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = {{
+                    (uint16x4_t)U16_from_F(r),
+                    (uint16x4_t)U16_from_F(g),
+                    (uint16x4_t)U16_from_F(b),
+                    (uint16x4_t)U16_from_F(a),
+                }};
+                vst4_u16(rgba, v);
+            #else
+                U64 px = cast<U64>(to_fixed(r * 65535)) <<  0
+                       | cast<U64>(to_fixed(g * 65535)) << 16
+                       | cast<U64>(to_fixed(b * 65535)) << 32
+                       | cast<U64>(to_fixed(a * 65535)) << 48;
+                store(rgba, px);
+            #endif
+            } return;
+
+            case Op_store_161616BE: {
+                uintptr_t ptr = (uintptr_t)(dst + 6*i);
+                assert( (ptr & 1) == 0 );                // The dst pointer must be 2-byte aligned
+                uint16_t* rgb = (uint16_t*)ptr;          // for this cast to uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = {{
+                    (uint16x8_t)swap_endian_16(U16_from_F(r)),
+                    (uint16x8_t)swap_endian_16(U16_from_F(g)),
+                    (uint16x8_t)swap_endian_16(U16_from_F(b)),
+                }};
+                vst3q_u16(rgb, v);
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = {{
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(r))),
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(g))),
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(b))),
+                }};
+                vst3_u16(rgb, v);
+            #else
+                U32 R = to_fixed(r * 65535),
+                    G = to_fixed(g * 65535),
+                    B = to_fixed(b * 65535);
+                store_3(rgb+0, cast<U16>((R & 0x00ff) << 8 | (R & 0xff00) >> 8) );
+                store_3(rgb+1, cast<U16>((G & 0x00ff) << 8 | (G & 0xff00) >> 8) );
+                store_3(rgb+2, cast<U16>((B & 0x00ff) << 8 | (B & 0xff00) >> 8) );
+            #endif
+
+            } return;
+
+            case Op_store_16161616BE: {
+                uintptr_t ptr = (uintptr_t)(dst + 8*i);
+                assert( (ptr & 1) == 0 );               // The dst pointer must be 2-byte aligned
+                uint16_t* rgba = (uint16_t*)ptr;        // for this cast to uint16_t* to be safe.
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = {{
+                    (uint16x8_t)swap_endian_16(U16_from_F(r)),
+                    (uint16x8_t)swap_endian_16(U16_from_F(g)),
+                    (uint16x8_t)swap_endian_16(U16_from_F(b)),
+                    (uint16x8_t)swap_endian_16(U16_from_F(a)),
+                }};
+                vst4q_u16(rgba, v);
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = {{
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(r))),
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(g))),
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(b))),
+                    (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(a))),
+                }};
+                vst4_u16(rgba, v);
+            #else
+                U64 px = cast<U64>(to_fixed(r * 65535)) <<  0
+                       | cast<U64>(to_fixed(g * 65535)) << 16
+                       | cast<U64>(to_fixed(b * 65535)) << 32
+                       | cast<U64>(to_fixed(a * 65535)) << 48;
+                store(rgba, swap_endian_16x4(px));
+            #endif
+            } return;
+
+            case Op_store_hhh: {
+                uintptr_t ptr = (uintptr_t)(dst + 6*i);
+                assert( (ptr & 1) == 0 );                // The dst pointer must be 2-byte aligned
+                uint16_t* rgb = (uint16_t*)ptr;          // for this cast to uint16_t* to be safe.
+
+                U16 R = Half_from_F(r),
+                    G = Half_from_F(g),
+                    B = Half_from_F(b);
+            #if defined(USING_NEON_FP16)
+                uint16x8x3_t v = {{
+                    (uint16x8_t)R,
+                    (uint16x8_t)G,
+                    (uint16x8_t)B,
+                }};
+                vst3q_u16(rgb, v);
+            #elif defined(USING_NEON)
+                uint16x4x3_t v = {{
+                    (uint16x4_t)R,
+                    (uint16x4_t)G,
+                    (uint16x4_t)B,
+                }};
+                vst3_u16(rgb, v);
+            #else
+                store_3(rgb+0, R);
+                store_3(rgb+1, G);
+                store_3(rgb+2, B);
+            #endif
+            } return;
+
+            case Op_store_hhhh: {
+                uintptr_t ptr = (uintptr_t)(dst + 8*i);
+                assert( (ptr & 1) == 0 );                // The dst pointer must be 2-byte aligned
+                uint16_t* rgba = (uint16_t*)ptr;         // for this cast to uint16_t* to be safe.
+
+                U16 R = Half_from_F(r),
+                    G = Half_from_F(g),
+                    B = Half_from_F(b),
+                    A = Half_from_F(a);
+            #if defined(USING_NEON_FP16)
+                uint16x8x4_t v = {{
+                    (uint16x8_t)R,
+                    (uint16x8_t)G,
+                    (uint16x8_t)B,
+                    (uint16x8_t)A,
+                }};
+                vst4q_u16(rgba, v);
+            #elif defined(USING_NEON)
+                uint16x4x4_t v = {{
+                    (uint16x4_t)R,
+                    (uint16x4_t)G,
+                    (uint16x4_t)B,
+                    (uint16x4_t)A,
+                }};
+                vst4_u16(rgba, v);
+            #else
+                store(rgba, cast<U64>(R) <<  0
+                          | cast<U64>(G) << 16
+                          | cast<U64>(B) << 32
+                          | cast<U64>(A) << 48);
+            #endif
+
+            } return;
+
+            case Op_store_fff: {
+                uintptr_t ptr = (uintptr_t)(dst + 12*i);
+                assert( (ptr & 3) == 0 );                // The dst pointer must be 4-byte aligned
+                float* rgb = (float*)ptr;                // for this cast to float* to be safe.
+            #if defined(USING_NEON_FP16)
+                float32x4x3_t lo = {{
+                    vcvt_f32_f16(vget_low_f16(r)),
+                    vcvt_f32_f16(vget_low_f16(g)),
+                    vcvt_f32_f16(vget_low_f16(b)),
+                }}, hi = {{
+                    vcvt_f32_f16(vget_high_f16(r)),
+                    vcvt_f32_f16(vget_high_f16(g)),
+                    vcvt_f32_f16(vget_high_f16(b)),
+                }};
+                vst3q_f32(rgb +  0, lo);
+                vst3q_f32(rgb + 12, hi);
+            #elif defined(USING_NEON)
+                float32x4x3_t v = {{
+                    (float32x4_t)r,
+                    (float32x4_t)g,
+                    (float32x4_t)b,
+                }};
+                vst3q_f32(rgb, v);
+            #else
+                store_3(rgb+0, r);
+                store_3(rgb+1, g);
+                store_3(rgb+2, b);
+            #endif
+            } return;
+
+            case Op_store_ffff: {
+                uintptr_t ptr = (uintptr_t)(dst + 16*i);
+                assert( (ptr & 3) == 0 );                // The dst pointer must be 4-byte aligned
+                float* rgba = (float*)ptr;               // for this cast to float* to be safe.
+            #if defined(USING_NEON_FP16)
+                float32x4x4_t lo = {{
+                    vcvt_f32_f16(vget_low_f16(r)),
+                    vcvt_f32_f16(vget_low_f16(g)),
+                    vcvt_f32_f16(vget_low_f16(b)),
+                    vcvt_f32_f16(vget_low_f16(a)),
+                }}, hi = {{
+                    vcvt_f32_f16(vget_high_f16(r)),
+                    vcvt_f32_f16(vget_high_f16(g)),
+                    vcvt_f32_f16(vget_high_f16(b)),
+                    vcvt_f32_f16(vget_high_f16(a)),
+                }};
+                vst4q_f32(rgba +  0, lo);
+                vst4q_f32(rgba + 16, hi);
+            #elif defined(USING_NEON)
+                float32x4x4_t v = {{
+                    (float32x4_t)r,
+                    (float32x4_t)g,
+                    (float32x4_t)b,
+                    (float32x4_t)a,
+                }};
+                vst4q_f32(rgba, v);
+            #else
+                store_4(rgba+0, r);
+                store_4(rgba+1, g);
+                store_4(rgba+2, b);
+                store_4(rgba+3, a);
+            #endif
+            } return;
+        }
+    }
+}
+
+
+static void run_program(const Op* program, const void** arguments,
+                        const char* src, char* dst, int n,
+                        const size_t src_bpp, const size_t dst_bpp) {
+    int i = 0;
+    while (n >= N) {
+        exec_ops(program, arguments, src, dst, i);
+        i += N;
+        n -= N;
+    }
+    if (n > 0) {
+        char tmp[4*4*N] = {0};
+
+        memcpy(tmp, (const char*)src + (size_t)i*src_bpp, (size_t)n*src_bpp);
+        exec_ops(program, arguments, tmp, tmp, 0);
+        memcpy((char*)dst + (size_t)i*dst_bpp, tmp, (size_t)n*dst_bpp);
+    }
+}
+
+// Clean up any #defines we may have set so that we can be #included again.
+#if defined(USING_AVX)
+    #undef  USING_AVX
+#endif
+#if defined(USING_AVX_F16C)
+    #undef  USING_AVX_F16C
+#endif
+#if defined(USING_AVX2)
+    #undef  USING_AVX2
+#endif
+#if defined(USING_AVX512F)
+    #undef  USING_AVX512F
+#endif
+
+#if defined(USING_NEON)
+    #undef  USING_NEON
+#endif
+#if defined(USING_NEON_F16C)
+    #undef  USING_NEON_F16C
+#endif
+#if defined(USING_NEON_FP16)
+    #undef  USING_NEON_FP16
+#endif
+
+#undef FALLTHROUGH
diff --git a/third-party/libjxl/libjxl/third_party/skcms/test_only.c b/third-party/libjxl/libjxl/third_party/skcms/test_only.c
new file mode 100644
index 0000000000..c2a00b172d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/test_only.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifdef _MSC_VER
+    #define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "skcms.h"
+#include "skcms_internal.h"
+#include "test_only.h"
+#include <stdlib.h>
+#include <string.h>
+
+static void print_shortest_float(FILE* fp, float x) {
+    char buf[80];
+    int digits;
+    for (digits = 0; digits < 12; digits++) {
+        snprintf(buf, sizeof(buf), "%.*f", digits, x);
+        float back;
+        if (1 != sscanf(buf, "%f", &back) || back == x) {
+            break;
+        }
+    }
+
+    // We've found the smallest number of digits that roundtrips our float.
+    // That'd be the ideal thing to print, but sadly fprintf() rounding is
+    // implementation specific, so results vary in the last digit.
+    //
+    // So we'll print out one _extra_ digit, then chop that off.
+    //
+    // (0x1.7p-6 == 0x3cb80000 is a good number to test this sort of thing with.)
+
+    int chars = snprintf(buf, sizeof(buf), "%.*f", digits+1, x);
+    fprintf(fp, "%.*s", chars-1, buf);
+}
+
+static void dump_transform_to_XYZD50(FILE* fp,
+                                     const skcms_ICCProfile* profile) {
+    // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
+    skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
+    size_t npixels = 84;
+    if (profile->data_color_space == 0x434D594B/*CMYK*/) {
+        fmt = skcms_PixelFormat_RGBA_8888;
+        npixels = 63;
+    }
+
+    float xyz[252];
+
+    if (!skcms_Transform(
+                skcms_252_random_bytes,    fmt, skcms_AlphaFormat_Unpremul, profile,
+                xyz, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+                npixels)) {
+        fprintf(fp, "We can parse this profile, but not transform it to XYZD50!\n");
+        return;
+    }
+
+    fprintf(fp, "252 random bytes transformed to %zu linear XYZD50 pixels:", npixels);
+    for (size_t i = 0; i < npixels; i++) {
+        if (i % 4 == 0) { fprintf(fp, "\n"); }
+        fprintf(fp, "    % .2f % .2f % .2f", xyz[3*i+0], xyz[3*i+1], xyz[3*i+2]);
+    }
+    fprintf(fp, "\n");
+
+    skcms_ICCProfile dstProfile = *profile;
+    if (skcms_MakeUsableAsDestination(&dstProfile)) {
+        uint8_t back[252];
+
+        if (!skcms_Transform(
+                xyz, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+                back,                      fmt, skcms_AlphaFormat_Unpremul, &dstProfile,
+                npixels)) {
+            fprintf(fp, "skcms_MakeUsableAsDestination() was true but skcms_Transform() failed!\n");
+            return;
+        }
+
+        int max_err = 0;
+        for (int i = 0; i < 252; i++) {
+            int err = abs((int)back[i] - (int)skcms_252_random_bytes[i]);
+            if (max_err < err) {
+                max_err = err;
+            }
+        }
+
+        fprintf(fp, "%d max error transforming back from XYZ:", max_err);
+        for (int i = 0; i < 252; i++) {
+            if (i % 21 == 0) { fprintf(fp, "\n   "); }
+            int err = abs((int)back[i] - (int)skcms_252_random_bytes[i]);
+            fprintf(fp, " %3d", err);
+        }
+        fprintf(fp, "\n");
+
+    }
+}
+
+static void dump_transform_to_sRGBA(FILE* fp,
+                                    const skcms_ICCProfile* profile) {
+    // Let's just transform all combinations of 0x00, 0x7f, and 0xff inputs to 32-bit sRGB.
+    // This helps catch issues with alpha, and is mildly interesting on its own.
+
+    uint32_t src[81],
+             dst[81];
+    for (int i = 0; i < 81; i++) {
+        src[i] = (uint32_t)((i/1   % 3) * 127.5f) <<  0
+               | (uint32_t)((i/3   % 3) * 127.5f) <<  8
+               | (uint32_t)((i/9   % 3) * 127.5f) << 16
+               | (uint32_t)((i/27  % 3) * 127.5f) << 24;
+    }
+
+    // No matter profile->data_color_space, this should be fine, either RGBA itself or CMYK.
+    const skcms_PixelFormat pf = skcms_PixelFormat_RGBA_8888;
+    const skcms_AlphaFormat af = skcms_AlphaFormat_Unpremul;
+
+    if (!skcms_Transform(src, pf,af, profile,
+                         dst, pf,af, skcms_sRGB_profile(), 81)) {
+        fprintf(fp, "We can parse this profile, but not transform it to sRGB!\n");
+        return;
+    }
+    fprintf(fp, "81 edge-case pixels transformed to sRGB 8888 (unpremul):\n");
+
+    for (int i = 0; i < 9; i++) {
+        fprintf(fp, "\t%08x %08x %08x  %08x %08x %08x  %08x %08x %08x\n",
+                dst[9*i+0], dst[9*i+1], dst[9*i+2],
+                dst[9*i+3], dst[9*i+4], dst[9*i+5],
+                dst[9*i+6], dst[9*i+7], dst[9*i+8]);
+    }
+}
+
+
+static void signature_to_string(uint32_t sig, char* str) {
+    str[0] = (char)((sig >> 24) & 0xFF);
+    str[1] = (char)((sig >> 16) & 0xFF);
+    str[2] = (char)((sig >>  8) & 0xFF);
+    str[3] = (char)((sig >>  0) & 0xFF);
+    str[4] = 0;
+}
+
+static void dump_sig_field(FILE* fp, const char* name, uint32_t val) {
+    char valStr[5];
+    signature_to_string(val, valStr);
+    fprintf(fp, "%20s : 0x%08X : '%s'\n", name, val, valStr);
+}
+
+static void dump_transfer_function(FILE* fp, const char* name,
+                                   const skcms_TransferFunction* tf, float max_error) {
+    fprintf(fp, "%4s : %.7g, %.7g, %.7g, %.7g, %.7g, %.7g, %.7g", name,
+            tf->g, tf->a, tf->b, tf->c, tf->d, tf->e, tf->f);
+
+    if (max_error > 0) {
+        fprintf(fp, " (Max error: %.6g)", max_error);
+    }
+
+    if (tf->d > 0) {
+        // Has both linear and nonlinear sections, include the discontinuity at D
+        float l_at_d = (tf->c * tf->d + tf->f);
+        float n_at_d = powf_(tf->a * tf->d + tf->b, tf->g) + tf->e;
+        fprintf(fp, " (D-gap: %.6g)", (n_at_d - l_at_d));
+    }
+
+    fprintf(fp, " (f(1) = %.6g)", skcms_TransferFunction_eval(tf, 1.0f));
+
+    skcms_Curve curve;
+    curve.table_entries = 0;
+    curve.parametric = *tf;
+
+    if (skcms_AreApproximateInverses(&curve, skcms_sRGB_Inverse_TransferFunction())) {
+        fprintf(fp, " (~sRGB)");
+    } else if (skcms_AreApproximateInverses(&curve, skcms_Identity_TransferFunction())) {
+        fprintf(fp, " (~Identity)");
+    }
+    fprintf(fp, "\n");
+}
+
+static void dump_curve(FILE* fp, const char* name, const skcms_Curve* curve) {
+    if (curve->table_entries == 0) {
+        dump_transfer_function(fp, name, &curve->parametric, 0);
+    } else {
+        fprintf(fp, "%4s : %d-bit table with %u entries", name,
+                curve->table_8 ? 8 : 16, curve->table_entries);
+        if (skcms_AreApproximateInverses(curve, skcms_sRGB_Inverse_TransferFunction())) {
+            fprintf(fp, " (~sRGB)");
+        }
+        fprintf(fp, "\n");
+        float max_error;
+        skcms_TransferFunction tf;
+        if (skcms_ApproximateCurve(curve, &tf, &max_error)) {
+            dump_transfer_function(fp, "~=", &tf, max_error);
+        }
+    }
+}
+
+void dump_profile(const skcms_ICCProfile* profile, FILE* fp) {
+    fprintf(fp, "%20s : 0x%08X : %u\n", "Size", profile->size, profile->size);
+    dump_sig_field(fp, "Data color space", profile->data_color_space);
+    dump_sig_field(fp, "PCS", profile->pcs);
+    fprintf(fp, "%20s : 0x%08X : %u\n", "Tag count", profile->tag_count, profile->tag_count);
+
+    fprintf(fp, "\n");
+
+    fprintf(fp, " Tag    : Type   : Size   : Offset\n");
+    fprintf(fp, " ------ : ------ : ------ : --------\n");
+    for (uint32_t i = 0; i < profile->tag_count; ++i) {
+        skcms_ICCTag tag;
+        skcms_GetTagByIndex(profile, i, &tag);
+        char tagSig[5];
+        char typeSig[5];
+        signature_to_string(tag.signature, tagSig);
+        signature_to_string(tag.type, typeSig);
+        fprintf(fp, " '%s' : '%s' : %6u : %u\n", tagSig, typeSig, tag.size,
+                (uint32_t)(tag.buf - profile->buffer));
+    }
+
+    fprintf(fp, "\n");
+
+    if (profile->has_trc) {
+        const char* trcNames[3] = { "rTRC", "gTRC", "bTRC" };
+        for (int i = 0; i < 3; ++i) {
+            dump_curve(fp, trcNames[i], &profile->trc[i]);
+        }
+        if (skcms_TRCs_AreApproximateInverse(profile, skcms_sRGB_Inverse_TransferFunction())) {
+            fprintf(fp, "TRCs ≈ sRGB\n");
+        }
+    }
+
+    skcms_ICCProfile best_single_curve = *profile;
+    if (skcms_MakeUsableAsDestinationWithSingleCurve(&best_single_curve)) {
+        dump_transfer_function(fp, "Best", &best_single_curve.trc[0].parametric, 0.0f);
+
+        skcms_TransferFunction inv;
+        if (skcms_TransferFunction_invert(&best_single_curve.trc[0].parametric, &inv)) {
+            dump_transfer_function(fp, "Inv ", &inv, 0.0f);
+
+            fprintf(fp, "Best Error: | %.6g %.6g %.6g |\n",
+                skcms_MaxRoundtripError(&profile->trc[0], &inv),
+                skcms_MaxRoundtripError(&profile->trc[1], &inv),
+                skcms_MaxRoundtripError(&profile->trc[2], &inv));
+        } else {
+            fprintf(fp, "*** could not invert Best ***\n");
+        }
+    }
+
+    if (profile->has_toXYZD50) {
+        skcms_Matrix3x3 toXYZ = profile->toXYZD50;
+
+        fprintf(fp, " XYZ : | ");
+        print_shortest_float(fp, toXYZ.vals[0][0]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[0][1]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[0][2]); fprintf(fp, " |\n");
+
+        fprintf(fp, "       | ");
+        print_shortest_float(fp, toXYZ.vals[1][0]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[1][1]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[1][2]); fprintf(fp, " |\n");
+
+        fprintf(fp, "       | ");
+        print_shortest_float(fp, toXYZ.vals[2][0]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[2][1]); fprintf(fp, " ");
+        print_shortest_float(fp, toXYZ.vals[2][2]); fprintf(fp, " |\n");
+
+        float white_x = toXYZ.vals[0][0] + toXYZ.vals[0][1] + toXYZ.vals[0][2],
+              white_y = toXYZ.vals[1][0] + toXYZ.vals[1][1] + toXYZ.vals[1][2],
+              white_z = toXYZ.vals[2][0] + toXYZ.vals[2][1] + toXYZ.vals[2][2];
+        if (fabsf_(white_x - 0.964f) > 0.01f ||
+            fabsf_(white_y - 1.000f) > 0.01f ||
+            fabsf_(white_z - 0.825f) > 0.01f) {
+            fprintf(fp, " !!! This does not appear to use a D50 whitepoint, rather [%g %g %g]\n",
+                    white_x, white_y, white_z);
+        }
+    }
+
+    if (profile->has_A2B) {
+        const skcms_A2B* a2b = &profile->A2B;
+        fprintf(fp, " A2B : %s%s\"B\"\n", a2b-> input_channels ? "\"A\", CLUT, "   : ""
+                                        , a2b->matrix_channels ? "\"M\", Matrix, " : "");
+        if (a2b->input_channels) {
+            fprintf(fp, "%4s : %u inputs\n", "\"A\"", a2b->input_channels);
+            const char* curveNames[4] = { "A0", "A1", "A2", "A3" };
+            for (uint32_t i = 0; i < a2b->input_channels; ++i) {
+                dump_curve(fp, curveNames[i], &a2b->input_curves[i]);
+            }
+            fprintf(fp, "%4s : ", "CLUT");
+            const char* sep = "";
+            for (uint32_t i = 0; i < a2b->input_channels; ++i) {
+                fprintf(fp, "%s%u", sep, a2b->grid_points[i]);
+                sep = " x ";
+            }
+            fprintf(fp, " (%d bpp)\n", a2b->grid_8 ? 8 : 16);
+        }
+
+        if (a2b->matrix_channels) {
+            fprintf(fp, "%4s : %u inputs\n", "\"M\"", a2b->matrix_channels);
+            const char* curveNames[4] = { "M0", "M1", "M2" };
+            for (uint32_t i = 0; i < a2b->matrix_channels; ++i) {
+                dump_curve(fp, curveNames[i], &a2b->matrix_curves[i]);
+            }
+            const skcms_Matrix3x4* m = &a2b->matrix;
+            fprintf(fp, "Mtrx : | ");
+            print_shortest_float(fp, m->vals[0][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][3]); fprintf(fp, " |\n");
+            fprintf(fp, "       | ");
+            print_shortest_float(fp, m->vals[1][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][3]); fprintf(fp, " |\n");
+            fprintf(fp, "       | ");
+            print_shortest_float(fp, m->vals[2][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][3]); fprintf(fp, " |\n");
+        }
+
+        {
+            fprintf(fp, "%4s : %u outputs\n", "\"B\"", a2b->output_channels);
+            const char* curveNames[3] = { "B0", "B1", "B2" };
+            for (uint32_t i = 0; i < a2b->output_channels; ++i) {
+                dump_curve(fp, curveNames[i], &a2b->output_curves[i]);
+            }
+        }
+    }
+
+    if (profile->has_B2A) {
+        const skcms_B2A* b2a = &profile->B2A;
+        fprintf(fp, " B2A : \"B\"%s%s\n", b2a->matrix_channels ? ", Matrix, \"M\"" : ""
+                                        , b2a->output_channels ? ", CLUT, \"A\""   : "");
+
+        {
+            fprintf(fp, "%4s : %u inputs\n", "\"B\"", b2a->input_channels);
+            const char* curveNames[3] = { "B0", "B1", "B2" };
+            for (uint32_t i = 0; i < b2a->input_channels; ++i) {
+                dump_curve(fp, curveNames[i], &b2a->input_curves[i]);
+            }
+        }
+
+        if (b2a->matrix_channels) {
+            const skcms_Matrix3x4* m = &b2a->matrix;
+            fprintf(fp, "Mtrx : | ");
+            print_shortest_float(fp, m->vals[0][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[0][3]); fprintf(fp, " |\n");
+            fprintf(fp, "       | ");
+            print_shortest_float(fp, m->vals[1][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[1][3]); fprintf(fp, " |\n");
+            fprintf(fp, "       | ");
+            print_shortest_float(fp, m->vals[2][0]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][1]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][2]); fprintf(fp, " ");
+            print_shortest_float(fp, m->vals[2][3]); fprintf(fp, " |\n");
+            fprintf(fp, "%4s : %u inputs\n", "\"M\"", b2a->matrix_channels);
+            const char* curveNames[4] = { "M0", "M1", "M2" };
+            for (uint32_t i = 0; i < b2a->matrix_channels; ++i) {
+                dump_curve(fp, curveNames[i], &b2a->matrix_curves[i]);
+            }
+        }
+
+        if (b2a->output_channels) {
+            fprintf(fp, "%4s : ", "CLUT");
+            const char* sep = "";
+            for (uint32_t i = 0; i < b2a->input_channels; ++i) {
+                fprintf(fp, "%s%u", sep, b2a->grid_points[i]);
+                sep = " x ";
+            }
+            fprintf(fp, " (%d bpp)\n", b2a->grid_8 ? 8 : 16);
+            fprintf(fp, "%4s : %u outputs\n", "\"A\"", b2a->output_channels);
+            const char* curveNames[4] = { "A0", "A1", "A2", "A3" };
+            for (uint32_t i = 0; i < b2a->output_channels; ++i) {
+                dump_curve(fp, curveNames[i], &b2a->output_curves[i]);
+            }
+        }
+    }
+
+    skcms_Matrix3x3 chad;
+    if (skcms_GetCHAD(profile, &chad)) {
+        fprintf(fp, "CHAD : | ");
+        print_shortest_float(fp, chad.vals[0][0]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[0][1]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[0][2]); fprintf(fp, " |\n");
+
+        fprintf(fp, "       | ");
+        print_shortest_float(fp, chad.vals[1][0]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[1][1]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[1][2]); fprintf(fp, " |\n");
+
+        fprintf(fp, "       | ");
+        print_shortest_float(fp, chad.vals[2][0]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[2][1]); fprintf(fp, " ");
+        print_shortest_float(fp, chad.vals[2][2]); fprintf(fp, " |\n");
+    }
+
+    float wtpt[3];
+    if (skcms_GetWTPT(profile, wtpt)) {
+        fprintf(fp, "WTPT : | ");
+        print_shortest_float(fp, wtpt[0]); fprintf(fp, " ");
+        print_shortest_float(fp, wtpt[1]); fprintf(fp, " ");
+        print_shortest_float(fp, wtpt[2]); fprintf(fp, " |\n");
+    }
+
+    if (profile->has_CICP) {
+        fprintf(fp, "CICP : CP: %u TF: %u MC: %u FR: %u\n",
+                profile->CICP.color_primaries, profile->CICP.transfer_characteristics,
+                profile->CICP.matrix_coefficients, profile->CICP.video_full_range_flag);
+    }
+
+    dump_transform_to_XYZD50(fp, profile);
+    dump_transform_to_sRGBA (fp, profile);
+    if (skcms_ApproximatelyEqualProfiles(profile, skcms_sRGB_profile())) {
+        fprintf(fp, "This profile ≈ sRGB.\n");
+    }
+}
+
+bool load_file_fp(FILE* fp, void** buf, size_t* len) {
+    if (fseek(fp, 0L, SEEK_END) != 0) {
+        return false;
+    }
+    long size = ftell(fp);
+    if (size <= 0) {
+        return false;
+    }
+    *len = (size_t)size;
+    rewind(fp);
+
+    *buf = malloc(*len);
+    if (!*buf) {
+        return false;
+    }
+
+    if (fread(*buf, 1, *len, fp) != *len) {
+        free(*buf);
+        return false;
+    }
+    return true;
+}
+
+bool load_file(const char* filename, void** buf, size_t* len) {
+    FILE* fp = fopen(filename, "rb");
+    if (!fp) {
+        return false;
+    }
+    bool result = load_file_fp(fp, buf, len);
+    fclose(fp);
+    return result;
+}
+
+bool write_file(const char* filename, void* buf, size_t len) {
+    FILE* fp = fopen(filename, "wb");
+    if (!fp) {
+        return false;
+    }
+    bool result = (fwrite(buf, 1, len, fp) == len);
+    fclose(fp);
+    return result;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/test_only.h b/third-party/libjxl/libjxl/third_party/skcms/test_only.h
new file mode 100644
index 0000000000..63a2983888
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/test_only.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#pragma once
+
+#include "skcms.h"
+#include <stdio.h>
+
+void dump_profile(const skcms_ICCProfile* profile, FILE* fp);
+
+bool load_file_fp(FILE* fp, void** buf, size_t* len);
+bool load_file(const char* filename, void** buf, size_t* len);
+
+bool write_file(const char* filename, void* buf, size_t len);
diff --git a/third-party/libjxl/libjxl/third_party/skcms/tests.c b/third-party/libjxl/libjxl/third_party/skcms/tests.c
new file mode 100644
index 0000000000..92ea0fa90b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/tests.c
@@ -0,0 +1,1917 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#pragma warning( disable : 6011 ) // dereferencing NULL pointer (from malloc)
+#endif
+
+#include "skcms.h"
+#include "skcms_internal.h"
+#include "test_only.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
+    static bool kFP16 = true;
+#else
+    static bool kFP16 = false;
+#endif
+
+#if defined(_MSC_VER)
+    #define DEBUGBREAK __debugbreak
+#elif defined(__clang__)
+    #define DEBUGBREAK __builtin_debugtrap
+#else
+    #define DEBUGBREAK __builtin_trap
+#endif
+
+#define expect(cond)                                                                  \
+    do {                                                                              \
+        if (!(cond)) {                                                                \
+            fprintf(stderr, "expect(" #cond ") failed at %s:%d\n",__FILE__,__LINE__); \
+            fflush(stderr);   /* stderr is buffered on Windows. */                    \
+            DEBUGBREAK();                                                             \
+        }                                                                             \
+    } while(false)
+
+#define expect_close(x,y)                                                                 \
+    do {                                                                                  \
+        double X = (double)(x),                                                           \
+               Y = (double)(y);                                                           \
+        if (X == (double)(int)X &&                                                        \
+            Y == (double)(int)Y &&                                                        \
+            (X == Y-1 || Y == X-1)) {                                                     \
+            /* These are ints and off by one.  Sounds close to me. */                     \
+        } else {                                                                          \
+            double ratio = (X < Y) ? X / Y                                                \
+                         : (Y < X) ? Y / X                                                \
+                         : 1.0;                                                           \
+            if (ratio < (kFP16 ? 0.995 : 1.0)) {                                          \
+                fprintf(stderr, "expect_close(" #x "==%g, " #y "==%g) failed at %s:%d\n", \
+                        X,Y, __FILE__,__LINE__);                                          \
+                fflush(stderr);   /* stderr is buffered on Windows. */                    \
+                DEBUGBREAK();                                                             \
+            }                                                                             \
+        }                                                                                 \
+    } while(false)
+
+
+
+static void test_ICCProfile() {
+    // Nothing works yet.  :)
+    skcms_ICCProfile profile;
+
+    const uint8_t buf[] = { 0x42 };
+    expect(!skcms_Parse(buf, sizeof(buf), &profile));
+}
+
+static void test_FormatConversions() {
+    // We can interpret src as 85 RGB_888 pixels or 64 RGB_8888 pixels.
+    uint8_t src[256],
+            dst[85*4];
+    for (int i = 0; i < 256; i++) {
+        src[i] = (uint8_t)i;
+    }
+
+    // This should basically be a really complicated memcpy().
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        expect(dst[i] == i);
+    }
+
+    // We can do RGBA -> BGRA swaps two ways:
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 64; i++) {
+        expect(dst[4*i+0] == 4*i+2);
+        expect(dst[4*i+1] == 4*i+1);
+        expect(dst[4*i+2] == 4*i+0);
+        expect(dst[4*i+3] == 4*i+3);
+    }
+    expect(skcms_Transform(src, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 64; i++) {
+        expect(dst[4*i+0] == 4*i+2);
+        expect(dst[4*i+1] == 4*i+1);
+        expect(dst[4*i+2] == 4*i+0);
+        expect(dst[4*i+3] == 4*i+3);
+    }
+
+    // Let's convert RGB_888 to RGBA_8888...
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_888  , skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           85));
+    for (int i = 0; i < 85; i++) {
+        expect(dst[4*i+0] == 3*i+0);
+        expect(dst[4*i+1] == 3*i+1);
+        expect(dst[4*i+2] == 3*i+2);
+        expect(dst[4*i+3] ==   255);
+    }
+    // ... and now all the variants of R-B swaps.
+    expect(skcms_Transform(src, skcms_PixelFormat_BGR_888  , skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           85));
+    for (int i = 0; i < 85; i++) {
+        expect(dst[4*i+0] == 3*i+0);
+        expect(dst[4*i+1] == 3*i+1);
+        expect(dst[4*i+2] == 3*i+2);
+        expect(dst[4*i+3] ==   255);
+    }
+    expect(skcms_Transform(src, skcms_PixelFormat_BGR_888  , skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           85));
+    for (int i = 0; i < 85; i++) {
+        expect(dst[4*i+0] == 3*i+2);
+        expect(dst[4*i+1] == 3*i+1);
+        expect(dst[4*i+2] == 3*i+0);
+        expect(dst[4*i+3] ==   255);
+    }
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_888  , skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           85));
+    for (int i = 0; i < 85; i++) {
+        expect(dst[4*i+0] == 3*i+2);
+        expect(dst[4*i+1] == 3*i+1);
+        expect(dst[4*i+2] == 3*i+0);
+        expect(dst[4*i+3] ==   255);
+    }
+
+    // Let's test in-place transforms.
+    // RGBA_8888 and RGB_888 aren't the same size, so we shouldn't allow this call.
+    expect(!skcms_Transform(src, skcms_PixelFormat_RGB_888  , skcms_AlphaFormat_Unpremul, NULL,
+                            src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                            85));
+
+    // These two should work fine.
+    expect(skcms_Transform(src, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 64; i++) {
+        expect(src[4*i+0] == 4*i+2);
+        expect(src[4*i+1] == 4*i+1);
+        expect(src[4*i+2] == 4*i+0);
+        expect(src[4*i+3] == 4*i+3);
+    }
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           src, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 64; i++) {
+        expect(src[4*i+0] == 4*i+0);
+        expect(src[4*i+1] == 4*i+1);
+        expect(src[4*i+2] == 4*i+2);
+        expect(src[4*i+3] == 4*i+3);
+    }
+
+    uint32_t _8888[3] = { 0x03020100, 0x07060504, 0x0b0a0908 };
+    uint8_t _888[9];
+    expect(skcms_Transform(_8888, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           _888 , skcms_PixelFormat_RGB_888  , skcms_AlphaFormat_Unpremul, NULL,
+                           3));
+    expect(_888[0] == 0 && _888[1] == 1 && _888[2] ==  2);
+    expect(_888[3] == 4 && _888[4] == 5 && _888[5] ==  6);
+    expect(_888[6] == 8 && _888[7] == 9 && _888[8] == 10);
+}
+
+static void test_FormatConversions_565() {
+    // This should hit all the unique values of each lane of 565.
+    uint16_t src[64];
+    for (int i = 0; i < 64; i++) {
+        src[i] = (uint16_t)( (i/2) <<  0 )
+               | (uint16_t)( (i/1) <<  5 )
+               | (uint16_t)( (i/2) << 11 );
+    }
+    expect(src[ 0] == 0x0000);
+    expect(src[31] == 0x7bef);
+    expect(src[63] == 0xffff);
+
+    uint32_t dst[64];
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_565  , skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    // We'll just spot check these results a bit.
+    for (int i = 0; i < 64; i++) {
+        expect((dst[i] >> 24) == 255);  // All opaque.
+    }
+    expect(dst[ 0] == 0xff000000);  // 0 -> 0
+    expect(dst[20] == 0xff525152);  // (10/31) ≈ (82/255) and (20/63) ≈ (81/255)
+    expect(dst[62] == 0xfffffbff);  // (31/31) == (255/255) and (62/63) ≈ (251/255)
+    expect(dst[63] == 0xffffffff);  // 1 -> 1
+
+    // Let's convert back the other way.
+    uint16_t back[64];
+    expect(skcms_Transform(dst , skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           back, skcms_PixelFormat_RGB_565  , skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 64; i++) {
+        expect(src[i] == back[i]);
+    }
+}
+
+static void test_FormatConversions_16161616LE() {
+    // We want to hit each 16-bit value, 4 per each of 16384 pixels.
+    uint64_t* src = malloc(8 * 16384);
+    for (int i = 0; i < 16384; i++) {
+        src[i] = (uint64_t)(4*i + 0) <<  0
+               | (uint64_t)(4*i + 1) << 16
+               | (uint64_t)(4*i + 2) << 32
+               | (uint64_t)(4*i + 3) << 48;
+    }
+    expect(src[    0] == 0x0003000200010000);
+    expect(src[   32] == 0x0083008200810080);  // just on the cusp of rounding to 0x00 or 0x01
+    expect(src[16383] == 0xfffffffefffdfffc);
+
+    uint32_t* dst = malloc(4 * 16384);
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_16161616LE, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888      , skcms_AlphaFormat_Unpremul, NULL,
+                           16384));
+
+    // skcms_Transform() will treat src as holding little-endian 16-bit values.
+
+    expect(dst[    0] == 0x00000000);   // 0x0003 rounds to 0x00, etc.
+    expect(dst[   32] == 0x01010100);   // 0x80 -> 0.9980544747081712, 0x81 -> 1.0019455252918288
+    expect(dst[16383] == 0xffffffff);   // 0xfffc rounds to 0xff, etc.
+
+    // We've lost precision when transforming to 8-bit, so these won't quite round-trip.
+    // Instead we should see the 8-bit dst value byte-doubled, as 65535/255 = 257 = 0x0101.
+    uint64_t* back = malloc(8 * 16384);
+    expect(skcms_Transform(dst , skcms_PixelFormat_RGBA_8888      ,skcms_AlphaFormat_Unpremul, NULL,
+                           back, skcms_PixelFormat_RGBA_16161616LE,skcms_AlphaFormat_Unpremul, NULL,
+                           16384));
+    for (int i = 0; i < 16384; i++) {
+        expect_close( ((back[i] >>  0) & 0xffff) , ((dst[i] >>  0) & 0xff) * 0x0101);
+        expect_close( ((back[i] >> 16) & 0xffff) , ((dst[i] >>  8) & 0xff) * 0x0101);
+        expect_close( ((back[i] >> 32) & 0xffff) , ((dst[i] >> 16) & 0xff) * 0x0101);
+        expect_close( ((back[i] >> 48) & 0xffff) , ((dst[i] >> 24) & 0xff) * 0x0101);
+    }
+
+    free(src);
+    free(dst);
+    free(back);
+}
+
+static void test_FormatConversions_161616LE() {
+    // We'll test the same cases as the _16161616LE() test, as if they were 4 RGB pixels.
+    uint16_t src[] = { 0x0000, 0x0001, 0x0002,
+                       0x0003, 0x0080, 0x0081,
+                       0x0082, 0x0083, 0xfffc,
+                       0xfffd, 0xfffe, 0xffff };
+    uint32_t dst[4];
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_161616LE, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           4));
+
+    expect(dst[0] == 0xff000000);
+    expect(dst[1] == 0xff010000);
+    expect(dst[2] == 0xffff0101);
+    expect(dst[3] == 0xffffffff);
+
+    // We've lost precision when transforming to 8-bit, so these won't quite round-trip.
+    // Instead we should see the 8-bit dst value byte-doubled, as 65535/255 = 257 = 0x0101.
+    uint16_t back[12];
+    expect(skcms_Transform(dst , skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           back, skcms_PixelFormat_RGB_161616LE, skcms_AlphaFormat_Unpremul, NULL,
+                           4));
+
+    uint16_t expected[] = { 0x0000, 0x0000, 0x0000,
+                            0x0000, 0x0000, 0x0101,
+                            0x0101, 0x0101, 0xffff,
+                            0xffff, 0xffff, 0xffff };
+    for (int i = 0; i < 12; i++) {
+        expect_close(back[i], expected[i]);
+    }
+}
+
+static int bswap16(int x) {
+    return (x & 0x00ff) << 8
+         | (x & 0xff00) >> 8;
+}
+
+static void test_FormatConversions_16161616BE() {
+    // We want to hit each 16-bit value, 4 per each of 16384 pixels.
+    uint64_t* src = malloc(8 * 16384);
+    for (int i = 0; i < 16384; i++) {
+        src[i] = (uint64_t)(4*i + 0) <<  0
+               | (uint64_t)(4*i + 1) << 16
+               | (uint64_t)(4*i + 2) << 32
+               | (uint64_t)(4*i + 3) << 48;
+    }
+    expect(src[    0] == 0x0003000200010000);
+    expect(src[ 8127] == 0x7eff7efe7efd7efc);  // This should demonstrate interesting rounding.
+    expect(src[16383] == 0xfffffffefffdfffc);
+
+    uint32_t* dst = malloc(4 * 16384);
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_16161616BE, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888      , skcms_AlphaFormat_Unpremul, NULL,
+                           16384));
+
+    // skcms_Transform() will treat src as holding big-endian 16-bit values,
+    // so the low lanes are actually the most significant byte, and the high least.
+
+    expect(dst[    0] == 0x03020100);
+    expect(dst[ 8127] == (kFP16 ? 0xfffefdfc : 0xfefefdfc));
+    expect(dst[16383] == 0xfffefdfc);
+
+    // We've lost precision when transforming to 8-bit, so these won't quite round-trip.
+    // Instead we should see the 8-bit dst value byte-doubled, as 65535/255 = 257 = 0x0101.
+    uint64_t* back = malloc(8 * 16384);
+    expect(skcms_Transform(dst , skcms_PixelFormat_RGBA_8888      ,skcms_AlphaFormat_Unpremul, NULL,
+                           back, skcms_PixelFormat_RGBA_16161616BE,skcms_AlphaFormat_Unpremul, NULL,
+                           16384));
+    for (int i = 0; i < 16384; i++) {
+        expect_close(bswap16((back[i] >>  0) & 0xffff), ((dst[i] >>  0) & 0xff) * 0x0101);
+        expect_close(bswap16((back[i] >> 16) & 0xffff), ((dst[i] >>  8) & 0xff) * 0x0101);
+        expect_close(bswap16((back[i] >> 32) & 0xffff), ((dst[i] >> 16) & 0xff) * 0x0101);
+        expect_close(bswap16((back[i] >> 48) & 0xffff), ((dst[i] >> 24) & 0xff) * 0x0101);
+    }
+
+    free(src);
+    free(dst);
+    free(back);
+}
+
+static void test_FormatConversions_161616BE() {
+    // We'll test the same cases as the _16161616BE() test, as if they were 4 RGB pixels.
+    uint16_t src[] = { 0x0000, 0x0001, 0x0002,
+                       0x0003, 0x7efc, 0x7efd,
+                       0x7efe, 0x7eff, 0xfffc,
+                       0xfffd, 0xfffe, 0xffff };
+    uint32_t dst[4];
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_161616BE, skcms_AlphaFormat_Unpremul, NULL,
+                           dst, skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           4));
+
+    expect(dst[0] == 0xff020100);
+    expect(dst[1] == 0xfffdfc03);
+    expect(dst[2] == (kFP16 ? 0xfffcfffe : 0xfffcfefe));
+    expect(dst[3] == 0xfffffefd);
+
+    // We've lost precision when transforming to 8-bit, so these won't quite round-trip.
+    // Instead we should see the 8-bit dst value byte doubled, as 65535/255 = 257 = 0x0101.
+    uint16_t back[12];
+    expect(skcms_Transform(dst , skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           back, skcms_PixelFormat_RGB_161616BE, skcms_AlphaFormat_Unpremul, NULL,
+                           4));
+    uint16_t expected[] = { 0x0000, 0x0101, 0x0202,
+                            0x0303, 0xfcfc, 0xfdfd,
+                            0xfefe, 0xfefe, 0xfcfc,
+                            0xfdfd, 0xfefe, 0xffff };
+    for (int i = 0; i < 12; i++) {
+        expect_close(bswap16(back[i]), expected[i]);
+    }
+}
+
+static void test_FormatConversions_101010() {
+    uint32_t src = (uint32_t)1023 <<  0    // 1.0.
+                 | (uint32_t) 511 << 10    // About 1/2.
+                 | (uint32_t)   4 << 20    // Smallest 10-bit channel that's non-zero in 8-bit.
+                 | (uint32_t)   1 << 30;   // 1/3, smallest non-zero alpha.
+    uint32_t dst;
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(dst == 0x55017fff);
+
+    // Same as above, but we'll ignore the 1/3 alpha and fill in 1.0.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Opaque  , NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888   , skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(dst == 0xff017fff);
+
+    // Converting 101010x <-> 1010102 will force opaque in either direction.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Opaque  , NULL,
+                           1));
+    expect(dst == ( (uint32_t)1023 <<  0
+                  | (uint32_t) 511 << 10
+                  | (uint32_t)   4 << 20
+                  | (uint32_t)   3 << 30));
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Opaque  , NULL,
+                           &dst, skcms_PixelFormat_RGBA_1010102, skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(dst == ( (uint32_t)1023 <<  0
+                  | (uint32_t) 511 << 10
+                  | (uint32_t)   4 << 20
+                  | (uint32_t)   3 << 30));
+}
+
+static void test_FormatConversions_half() {
+    uint16_t src[] = {
+        0x3c00,  // 1.0
+        0x3800,  // 0.5
+        0x1805,  // Should round up to 0x01
+        0x1803,  // Should round down to 0x00  (0x1804 may go up or down depending on precision)
+        0x4000,  // 2.0
+        0x03ff,  // A denorm, may be flushed to zero.
+        0x83ff,  // A negative denorm, may be flushed to zero.
+        0xbc00,  // -1.0
+    };
+
+    uint32_t dst[2];
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_hhhh, skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           2));
+    expect(dst[0] == 0x000180ff);
+    expect(dst[1] == 0x000000ff);  // Notice we've clamped 2.0 to 0xff and -1.0 to 0x00.
+
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGB_hhh  , skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           2));
+    expect(dst[0] == 0xff0180ff);
+    expect(dst[1] == 0xff00ff00);  // Remember, this corresponds to src[3-5].
+
+    float fdst[8];
+    expect(skcms_Transform( &src, skcms_PixelFormat_RGBA_hhhh, skcms_AlphaFormat_Unpremul, NULL,
+                           &fdst, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           2));
+    expect(fdst[0] ==  1.0f);
+    expect(fdst[1] ==  0.5f);
+    expect(fdst[2] > 1/510.0f);
+    expect(fdst[3] < 1/510.0f);
+    expect(fdst[4] ==  2.0f);
+    expect(fdst[5] == +0.00006097555f || fdst[5] == 0.0f);  // may have been flushed to zero
+    expect(fdst[6] == -0.00006097555f || fdst[6] == 0.0f);
+    expect(fdst[7] == -1.0f);
+
+    // Now convert back, first to RGBA halfs, then RGB halfs.
+    uint16_t back[8];
+    expect(skcms_Transform(&fdst, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           &back, skcms_PixelFormat_RGBA_hhhh, skcms_AlphaFormat_Unpremul, NULL,
+                           2));
+    expect(back[0] == src[0]);
+    expect(back[1] == src[1]);
+    expect(back[2] == src[2]);
+    expect(back[3] == src[3]);
+    expect(back[4] == src[4]);
+    expect(back[5] == src[5] || back[5] == 0x0000);
+    expect(back[6] == src[6] || back[6] == 0x0000);
+    expect(back[7] == src[7]);
+
+    expect(skcms_Transform(&fdst, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           &back, skcms_PixelFormat_RGB_hhh  , skcms_AlphaFormat_Unpremul, NULL,
+                           2));
+    expect(back[0] == src[0]);
+    expect(back[1] == src[1]);
+    expect(back[2] == src[2]);
+    expect(back[3] == src[4]);
+    expect(back[4] == src[5] || back[4] == 0x0000);
+    expect(back[5] == src[6] || back[5] == 0x0000);
+}
+
+static void test_FormatConversions_half_norm() {
+    const uint16_t src[] = {
+        0x3800,  //  0.5
+        0x3c00,  //  1.0
+        0xbc00,  // -1.0
+        0x4000,  //  2.0
+    };
+    uint16_t dst[ARRAY_COUNT(src)];
+
+    const skcms_AlphaFormat upm = skcms_AlphaFormat_Unpremul;
+
+    // No-op, no clamp, should preserve all values.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_hhhh, upm, NULL,
+                           &dst, skcms_PixelFormat_RGBA_hhhh, upm, NULL, 1));
+    expect(dst[0] == src[0]);
+    expect(dst[1] == src[1]);
+    expect(dst[2] == src[2]);
+    expect(dst[3] == src[3]);
+
+    // Clamp on read.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_hhhh_Norm, upm, NULL,
+                           &dst, skcms_PixelFormat_RGBA_hhhh     , upm, NULL, 1));
+    expect(dst[0] == src[0]);
+    expect(dst[1] == src[1]);
+    expect(dst[2] == 0x0000);
+    expect(dst[3] == src[1]);
+
+    // Clamp on write.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_hhhh     , upm, NULL,
+                           &dst, skcms_PixelFormat_RGBA_hhhh_Norm, upm, NULL, 1));
+    expect(dst[0] == src[0]);
+    expect(dst[1] == src[1]);
+    expect(dst[2] == 0x0000);
+    expect(dst[3] == src[1]);
+}
+
+static void test_FormatConversions_float() {
+    float src[] = { 1.0f, 0.5f, 1/255.0f, 1/512.0f };
+
+    uint32_t dst;
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(dst == 0x000180ff);
+
+    // Same as above, but we'll ignore the 1/512 alpha and fill in 1.0.
+    expect(skcms_Transform(&src, skcms_PixelFormat_RGB_fff  , skcms_AlphaFormat_Unpremul, NULL,
+                           &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(dst == 0xff0180ff);
+
+    // Let's make sure each byte converts to the float we expect.
+    uint32_t bytes[64];
+    float   fdst[4*64];
+    for (int i = 0; i < 64; i++) {
+        bytes[i] = 0x03020100 + 0x04040404 * (uint32_t)i;
+    }
+    expect(skcms_Transform(&bytes, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, NULL,
+                            &fdst, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        expect_close(fdst[i], (float)i*(1/255.0f));
+        if (i == 0 || i == 255) {
+            expect(fdst[i] == (float)i*(1/255.0f));
+        }
+    }
+
+    float ffff[16] = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
+    float  fff[12] = { 0,0,0, 0,0,0, 0,0,0, 0,0,0};
+    expect(skcms_Transform(ffff, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           fff , skcms_PixelFormat_RGB_fff  , skcms_AlphaFormat_Unpremul, NULL,
+                           1));
+    expect(fff[0] == 0); expect(fff[1] == 1); expect(fff[2] == 2);
+
+    expect(skcms_Transform(ffff, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, NULL,
+                           fff , skcms_PixelFormat_RGB_fff  , skcms_AlphaFormat_Unpremul, NULL,
+                           4));
+    expect(fff[0] ==  0); expect(fff[ 1] ==  1); expect(fff[ 2] ==  2);
+    expect(fff[3] ==  4); expect(fff[ 4] ==  5); expect(fff[ 5] ==  6);
+    expect(fff[6] ==  8); expect(fff[ 7] ==  9); expect(fff[ 8] == 10);
+    expect(fff[9] == 12); expect(fff[10] == 13); expect(fff[11] == 14);
+}
+
+static const char* profile_test_cases[] = {
+    // iccMAX profiles that we can't parse at all
+    "profiles/color.org/sRGB_D65_colorimetric.icc",
+    "profiles/color.org/sRGB_D65_MAT.icc",
+    "profiles/color.org/sRGB_ISO22028.icc",
+
+    // V2 or V4 profiles that only include A2B/B2A tags (no TRC or XYZ)
+    "profiles/color.org/sRGB_ICC_v4_Appearance.icc",
+    "profiles/color.org/sRGB_v4_ICC_preference.icc",
+    "profiles/color.org/Upper_Left.icc",
+    "profiles/color.org/Upper_Right.icc",
+    "profiles/misc/Apple_Wide_Color.icc",
+    "profiles/misc/Coated_FOGRA27_CMYK.icc",
+    "profiles/misc/Coated_FOGRA39_CMYK.icc",
+    "profiles/misc/ColorLogic_ISO_Coated_CMYK.icc",  // Has kTRC.
+    "profiles/misc/Japan_Color_2001_Coated.icc",
+    "profiles/misc/Lexmark_X110.icc",
+    "profiles/misc/MartiMaria_browsertest_A2B.icc",
+    "profiles/misc/PrintOpen_ISO_Coated_CMYK.icc",   // Has kTRC.
+    "profiles/misc/sRGB_ICC_v4_beta.icc",
+    "profiles/misc/SWOP_Coated_20_GCR_CMYK.icc",
+    "profiles/misc/US_Web_Coated_SWOP_CMYK.icc",
+    "profiles/misc/XRite_GRACol7_340_CMYK.icc",
+
+    // V2 monochrome output profiles that include kTRC but no A2B
+    "profiles/misc/Dot_Gain_20_Grayscale.icc",  // kTRC table
+    "profiles/misc/Gray_Gamma_22.icc",          // kTRC gamma
+
+    // V4 profiles with parametric TRC curves and XYZ
+    "profiles/mobile/Display_P3_parametric.icc",
+    "profiles/mobile/sRGB_parametric.icc",
+    "profiles/mobile/iPhone7p.icc",
+    "profiles/misc/sRGB_lcms.icc",
+
+    // V4 profiles with LUT TRC curves and XYZ
+    "profiles/mobile/Display_P3_LUT.icc",
+    "profiles/mobile/sRGB_LUT.icc",
+
+    // V2 profiles with gamma TRC and XYZ
+    "profiles/color.org/Lower_Left.icc",
+    "profiles/color.org/Lower_Right.icc",
+    "profiles/misc/AdobeRGB.icc",
+    "profiles/misc/AdobeColorSpin.icc",
+    "profiles/misc/Color_Spin_Gamma_18.icc",
+    "profiles/misc/Generic_RGB_Gamma_18.icc",
+
+    // V2 profiles with LUT TRC and XYZ
+    "profiles/color.org/sRGB2014.icc",
+    "profiles/sRGB_Facebook.icc",
+    "profiles/misc/Apple_Color_LCD.icc",
+    "profiles/misc/HD_709.icc",
+    "profiles/misc/sRGB_black_scaled.icc",
+    "profiles/misc/sRGB_HP.icc",
+    "profiles/misc/sRGB_HP_2.icc",
+
+    // Calibrated monitor profile with identical sRGB-ish tables.
+    "profiles/misc/sRGB_Calibrated_Homogeneous.icc",
+
+    // Calibrated monitor profile with slightly different sRGB-like tables for each channel.
+    "profiles/misc/sRGB_Calibrated_Heterogeneous.icc",
+
+    // Calibrated monitor profile with non-monotonic TRC tables. We approximate, but badly.
+    "profiles/misc/DisplayCal_ASUS_NonMonotonic.icc",
+
+    // Hard test profile. Non-invertible XYZ, three separate tables that fail to approximate
+    "profiles/misc/MartiMaria_browsertest_HARD.icc",
+
+    // Camera profile with three separate tables that fail to approximate
+    "profiles/misc/Phase_One_P25.icc",
+
+    // Profile claims to be sRGB, but seems quite different
+    "profiles/misc/Kodak_sRGB.icc",
+
+    // Bad profiles found inn the wild
+    "profiles/misc/ColorGATE_Sihl_PhotoPaper.icc",  // Broken tag table, and A2B0 fails to parse
+    "profiles/misc/bad_pcs.icc",                    // PCS is 'RGB '
+
+    // Unsure what the bug here is, chromium:875650.
+    "profiles/misc/ThinkpadX1YogaV2.icc",
+    "profiles/misc/XPS13_9360.icc",
+
+    // Calibrated profile where A2B/B2A and XYZ+TRC produce very different gamut mappings.
+    // User was (rightly) confused & convinced that profile was being ignored.
+    "profiles/misc/Calibrated_A2B_XYZ_Mismatch.icc",  // chromium:1055154
+
+    // HDR profiles that include the new 'cicp' tag (from ICC 4.4.0)
+    "profiles/misc/P3_PQ_cicp.icc",
+    "profiles/misc/Rec2020_HLG_cicp.icc",
+    "profiles/misc/Rec2020_PQ_cicp.icc",
+
+    // fuzzer generated profiles that found parsing bugs
+
+    // Bad tag table data - these should not parse
+    "profiles/fuzz/last_tag_too_small.icc",   // skia:7592
+    "profiles/fuzz/named_tag_too_small.icc",  // skia:7592
+
+    // Bad tag data - these should not parse
+    "profiles/fuzz/curv_size_overflow.icc",           // skia:7593
+    "profiles/fuzz/truncated_curv_tag.icc",           // oss-fuzz:6103
+    "profiles/fuzz/zero_a.icc",                       // oss-fuzz:????
+    "profiles/fuzz/a2b_too_many_input_channels.icc",  // oss-fuzz:6521
+    "profiles/fuzz/a2b_too_many_input_channels2.icc", // oss-fuzz:32765
+    "profiles/fuzz/mangled_trc_tags.icc",             // chromium:835666
+    "profiles/fuzz/negative_g_para.icc",              // chromium:836634
+    "profiles/fuzz/b2a_too_few_output_channels.icc",  // oss-fuzz:33281
+
+    // A B2A profile with no CLUT.
+    "profiles/fuzz/b2a_no_clut.icc",  // oss-fuzz:33396
+
+    // Caused skcms_PolyTF fit to round trip indices outside the range of int.
+    "profiles/fuzz/infinite_roundtrip.icc",           // oss-fuzz:8101
+    "profiles/fuzz/polytf_big_float_to_int_cast.icc", // oss-fuzz:8142
+
+    // Caused skcms_ApproximateCurve to violate the a*d+b >= 0 constraint.
+    "profiles/fuzz/inverse_tf_adb_negative.icc",      // oss-fuzz:8130
+
+    // Caused skcms_PolyTF fit to send P to NaN due to very large inverse lhs
+    "profiles/fuzz/polytf_nan_after_update.icc",      // oss-fuzz:8165
+
+    // Table is approximated by an inverse TF whose inverse is not invertible.
+    "profiles/fuzz/inverse_tf_not_invertible.icc",    // chromium:841210
+
+    // Table is approximated by a TF whose inverse has g > 16M (timeout in approx_pow)
+    "profiles/fuzz/inverse_tf_huge_g.icc",            // chromium:842374
+
+    // mAB has a CLUT with 1 input channel
+    "profiles/fuzz/one_d_clut.icc",                   // chromium:874433
+
+    // Non-D50 profiles.
+    "profiles/misc/SM245B.icc",
+    "profiles/misc/BenQ_GL2450.icc",
+
+    // This profile is fine, but has really small TRC tables (5 points).
+    "profiles/misc/BenQ_RL2455.icc",                 // chromium:869115
+
+    // This calibrated profile has a non-zero black.
+    "profiles/misc/calibrated_nonzero_black.icc",
+
+    // A zero g term causes a divide by zero when inverting.
+    "profiles/fuzz/zero_g.icc",                       // oss-fuzz:12430
+
+    // Reasonable table, but gets approximated very badly
+    "profiles/misc/crbug_976551.icc",                 // chromium:976551
+
+    // The a term goes negative when inverting.
+    "profiles/fuzz/negative_a_when_inverted.icc",     // oss-fuzz:16581
+
+    // a + b is negative when inverting, because d>0
+    "profiles/fuzz/negative_a_plus_b.icc",            // oss-fuzz:16584
+
+    "profiles/fuzz/nan_s.icc",                        // oss-fuzz:16674
+    "profiles/fuzz/inf_a.icc",                        // oss-fuzz:16675
+
+    "profiles/fuzz/fit_pq.icc",                       // oss-fuzz:18249
+
+    // Reasonable table, bad approximation (converges very slowly)
+    "profiles/misc/MR2416GSDF.icc",                   // chromium:869115
+
+    // Three different tables w/shoulders, bad approximation (slow convergence)
+    "profiles/misc/crbug_1017960_19.icc",             // chromium:1017960
+
+    "profiles/fuzz/direct_fit_not_invertible.icc",    // oss-fuzz:19341
+    "profiles/fuzz/direct_fit_negative_a.icc",        // oss-fuzz:19467
+
+    // g = 1027 -> -nan from exp2f_, sign-strip doesn't work, leading to powf_ assert
+    "profiles/fuzz/large_g.icc",                      // chromium:996795
+};
+
+static void test_Parse(bool regen) {
+    for (int i = 0; i < ARRAY_COUNT(profile_test_cases); ++i) {
+        const char* filename = profile_test_cases[i];
+
+        void* buf = NULL;
+        size_t len = 0;
+        expect(load_file(filename, &buf, &len));
+        skcms_ICCProfile profile;
+        bool parsed = skcms_Parse(buf, len, &profile);
+
+        FILE* dump = tmpfile();
+        expect(dump);
+
+        if (parsed) {
+            dump_profile(&profile, dump);
+        } else {
+            fprintf(dump, "Unable to parse ICC profile\n");
+        }
+
+        // MakeUsable functions should leave input unchanged when returning false
+        skcms_ICCProfile as_dst = profile;
+        if (!skcms_MakeUsableAsDestination(&as_dst)) {
+            expect(memcmp(&as_dst, &profile, sizeof(profile)) == 0);
+        }
+
+        as_dst = profile;
+        if (!skcms_MakeUsableAsDestinationWithSingleCurve(&as_dst)) {
+            expect(memcmp(&as_dst, &profile, sizeof(profile)) == 0);
+        }
+
+        void* dump_buf = NULL;
+        size_t dump_len = 0;
+        expect(load_file_fp(dump, &dump_buf, &dump_len));
+        fclose(dump);
+
+        char ref_filename[256];
+        if (snprintf(ref_filename, sizeof(ref_filename), "%s.txt", filename) < 0) {
+            expect(false);
+        }
+
+        if (regen) {
+            // Just write out new test data if in regen mode
+            expect(write_file(ref_filename, dump_buf, dump_len));
+        } else {
+            // Read in existing test data
+            void* ref_buf = NULL;
+            size_t ref_len = 0;
+            expect(load_file(ref_filename, &ref_buf, &ref_len));
+
+            if (dump_len != ref_len || memcmp(dump_buf, ref_buf, dump_len) != 0) {
+                const char* cur = dump_buf;
+                const char* ref =  ref_buf;
+                while (*cur == *ref) { cur++; ref++; }
+                size_t off = (size_t)(cur - (const char*)dump_buf);
+                // Write out the new data on a mismatch
+                fprintf(stderr, "Parse mismatch for %s:\n", filename);
+                fwrite(dump_buf, 1, dump_len, stderr);
+                fprintf(stderr, "\n");
+
+                fprintf(stderr, "Mismatch begins at offset %zu, expected '%c', got,\n", off, *ref);
+                fwrite(cur, 1, dump_len - off, stderr);
+                fprintf(stderr, "\n");
+
+                expect(false);
+            }
+            free(ref_buf);
+        }
+
+        free(buf);
+        free(dump_buf);
+    }
+}
+
+static void test_ApproximateCurve_clamped() {
+    // These data represent a transfer function that is clamped at the high
+    // end of its domain. It comes from the color profile attached to
+    // https://crbug.com/750459
+    float t[256] = {
+        0.000000f, 0.000305f, 0.000610f, 0.000916f, 0.001221f, 0.001511f,
+        0.001816f, 0.002121f, 0.002426f, 0.002731f, 0.003037f, 0.003601f,
+        0.003937f, 0.004303f, 0.004685f, 0.005081f, 0.005509f, 0.005951f,
+        0.006409f, 0.006882f, 0.007385f, 0.007904f, 0.008438f, 0.009003f,
+        0.009583f, 0.010193f, 0.010819f, 0.011460f, 0.012131f, 0.012818f,
+        0.013535f, 0.014267f, 0.015030f, 0.015808f, 0.016617f, 0.017456f,
+        0.018296f, 0.019181f, 0.020081f, 0.021012f, 0.021958f, 0.022934f,
+        0.023926f, 0.024949f, 0.026001f, 0.027070f, 0.028168f, 0.029297f,
+        0.030442f, 0.031617f, 0.032822f, 0.034058f, 0.035309f, 0.036591f,
+        0.037903f, 0.039231f, 0.040604f, 0.041993f, 0.043412f, 0.044846f,
+        0.046326f, 0.047822f, 0.049348f, 0.050904f, 0.052491f, 0.054108f,
+        0.055756f, 0.057420f, 0.059113f, 0.060853f, 0.062608f, 0.064393f,
+        0.066209f, 0.068055f, 0.069932f, 0.071839f, 0.073762f, 0.075731f,
+        0.077729f, 0.079759f, 0.081804f, 0.083894f, 0.086015f, 0.088167f,
+        0.090333f, 0.092546f, 0.094789f, 0.097063f, 0.099367f, 0.101701f,
+        0.104067f, 0.106477f, 0.108904f, 0.111360f, 0.113863f, 0.116381f,
+        0.118944f, 0.121538f, 0.124163f, 0.126818f, 0.129519f, 0.132235f,
+        0.134997f, 0.137789f, 0.140612f, 0.143465f, 0.146365f, 0.149279f,
+        0.152239f, 0.155230f, 0.158267f, 0.161318f, 0.164416f, 0.167544f,
+        0.170718f, 0.173907f, 0.177142f, 0.180407f, 0.183719f, 0.187045f,
+        0.190433f, 0.193835f, 0.197284f, 0.200763f, 0.204273f, 0.207813f,
+        0.211398f, 0.215030f, 0.218692f, 0.222385f, 0.226108f, 0.229877f,
+        0.233677f, 0.237522f, 0.241382f, 0.245304f, 0.249256f, 0.253239f,
+        0.257252f, 0.261311f, 0.265415f, 0.269551f, 0.273716f, 0.277928f,
+        0.282170f, 0.286458f, 0.290776f, 0.295140f, 0.299535f, 0.303975f,
+        0.308446f, 0.312947f, 0.317494f, 0.322087f, 0.326711f, 0.331380f,
+        0.336080f, 0.340826f, 0.345602f, 0.350423f, 0.355291f, 0.360174f,
+        0.365118f, 0.370092f, 0.375113f, 0.380163f, 0.385260f, 0.390387f,
+        0.395560f, 0.400778f, 0.406027f, 0.411322f, 0.416663f, 0.422034f,
+        0.427451f, 0.432898f, 0.438392f, 0.443931f, 0.449500f, 0.455116f,
+        0.460777f, 0.466468f, 0.472221f, 0.477989f, 0.483818f, 0.489677f,
+        0.495583f, 0.501518f, 0.507500f, 0.513527f, 0.519600f, 0.525719f,
+        0.531868f, 0.538064f, 0.544289f, 0.550576f, 0.556893f, 0.563256f,
+        0.569650f, 0.576104f, 0.582589f, 0.589120f, 0.595697f, 0.602304f,
+        0.608972f, 0.615671f, 0.622415f, 0.629206f, 0.636027f, 0.642908f,
+        0.649821f, 0.656779f, 0.663783f, 0.670832f, 0.677913f, 0.685054f,
+        0.692226f, 0.699443f, 0.706706f, 0.714015f, 0.721370f, 0.728771f,
+        0.736202f, 0.743694f, 0.751217f, 0.758785f, 0.766400f, 0.774060f,
+        0.781765f, 0.789517f, 0.797314f, 0.805158f, 0.813031f, 0.820966f,
+        0.828946f, 0.836957f, 0.845029f, 0.853132f, 0.861280f, 0.869490f,
+        0.877729f, 0.886015f, 0.894362f, 0.902739f, 0.911162f, 0.919631f,
+        0.928161f, 0.936721f, 0.945327f, 0.953994f, 0.962692f, 0.971435f,
+        0.980240f, 0.989075f, 0.997955f, 1.000000f,
+    };
+
+    uint8_t table_8[ARRAY_COUNT(t)];
+    for (int i = 0; i < ARRAY_COUNT(t); i++) {
+        table_8[i] = (uint8_t)(t[i] * 255.0f + 0.5f);
+    }
+
+    skcms_Curve curve;
+    curve.table_entries = (uint32_t)ARRAY_COUNT(t);
+    curve.table_8       = table_8;
+
+    skcms_TransferFunction tf;
+    float max_error;
+    expect(skcms_ApproximateCurve(&curve, &tf, &max_error));
+
+    // The approximation isn't very good.
+    expect(max_error < 1 / 40.0f);
+}
+
+static void expect_eq_Matrix3x3(skcms_Matrix3x3 a, skcms_Matrix3x3 b) {
+    for (int r = 0; r < 3; r++)
+    for (int c = 0; c < 3; c++) {
+        expect(a.vals[r][c] == b.vals[r][c]);
+    }
+}
+
+static void test_Matrix3x3_invert() {
+    skcms_Matrix3x3 inv;
+
+    skcms_Matrix3x3 I = {{
+        { 1.0f, 0.0f, 0.0f },
+        { 0.0f, 1.0f, 0.0f },
+        { 0.0f, 0.0f, 1.0f },
+    }};
+    inv = (skcms_Matrix3x3){{ {0,0,0}, {0,0,0}, {0,0,0} }};
+    expect(skcms_Matrix3x3_invert(&I, &inv));
+    expect_eq_Matrix3x3(inv, I);
+
+    skcms_Matrix3x3 T = {{
+        { 1.0f, 0.0f, 3.0f },
+        { 0.0f, 1.0f, 4.0f },
+        { 0.0f, 0.0f, 1.0f },
+    }};
+    inv = (skcms_Matrix3x3){{ {0,0,0}, {0,0,0}, {0,0,0} }};
+    expect(skcms_Matrix3x3_invert(&T, &inv));
+    expect_eq_Matrix3x3(inv, (skcms_Matrix3x3){{
+        { 1.0f, 0.0f, -3.0f },
+        { 0.0f, 1.0f, -4.0f },
+        { 0.0f, 0.0f,  1.0f },
+    }});
+
+    skcms_Matrix3x3 S = {{
+        { 2.0f, 0.0f, 0.0f },
+        { 0.0f, 4.0f, 0.0f },
+        { 0.0f, 0.0f, 8.0f },
+    }};
+    inv = (skcms_Matrix3x3){{ {0,0,0}, {0,0,0}, {0,0,0} }};
+    expect(skcms_Matrix3x3_invert(&S, &inv));
+    expect_eq_Matrix3x3(inv, (skcms_Matrix3x3){{
+        { 0.500f, 0.000f,  0.000f },
+        { 0.000f, 0.250f,  0.000f },
+        { 0.000f, 0.000f,  0.125f },
+    }});
+}
+
+static void test_SimpleRoundTrip() {
+    // We'll test that parametric sRGB roundtrips with itself, bytes -> bytes.
+    void*  srgb_ptr;
+    size_t srgb_len;
+    expect(load_file("profiles/mobile/sRGB_parametric.icc", &srgb_ptr, &srgb_len));
+
+    skcms_ICCProfile srgbA, srgbB;
+    expect(skcms_Parse(srgb_ptr, srgb_len, &srgbA));
+    expect(skcms_Parse(srgb_ptr, srgb_len, &srgbB));
+
+    uint8_t src[256],
+            dst[256];
+    for (int i = 0; i < 256; i++) {
+        src[i] = (uint8_t)i;
+    }
+
+    expect(skcms_Transform(src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &srgbB,
+                           dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &srgbA,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        expect(dst[i] == (uint8_t)i);
+    }
+
+    free(srgb_ptr);
+}
+
+// Floats should hold enough precision that we can round trip any two non-degenerate profiles.
+static void expect_round_trip_through_floats(const skcms_ICCProfile* A,
+                                             const skcms_ICCProfile* B) {
+    uint8_t bytes[256];
+    float  floats[256];
+    for (int i = 0; i < 256; i++) {
+        bytes[i] = (uint8_t)i;
+    }
+
+    expect(skcms_Transform(bytes , skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, B,
+                           floats, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, A,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        bytes[i] = 0;
+    }
+    expect(skcms_Transform(floats, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, A,
+                           bytes , skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, B,
+                           64));
+
+    for (int i = 0; i < 256; i++) {
+        expect(bytes[i] == (uint8_t)i);
+    }
+}
+
+static void test_FloatRoundTrips() {
+    void*  srgb_ptr;
+    size_t srgb_len;
+    expect(load_file("profiles/mobile/sRGB_parametric.icc", &srgb_ptr, &srgb_len));
+
+
+    void*  dp3_ptr;
+    size_t dp3_len;
+    expect(load_file("profiles/mobile/Display_P3_parametric.icc", &dp3_ptr, &dp3_len));
+
+    void*  ll_ptr;
+    size_t ll_len;
+    expect(load_file("profiles/color.org/Lower_Left.icc", &ll_ptr, &ll_len));
+
+    void*  lr_ptr;
+    size_t lr_len;
+    expect(load_file("profiles/color.org/Lower_Right.icc", &lr_ptr, &lr_len));
+
+    skcms_ICCProfile srgb, dp3, ll, lr;
+    expect(skcms_Parse(srgb_ptr, srgb_len, &srgb));
+    expect(skcms_Parse( dp3_ptr,  dp3_len, &dp3 ));
+    expect(skcms_Parse(  ll_ptr,   ll_len, &ll  ));
+    expect(skcms_Parse(  lr_ptr,   lr_len, &lr  ));
+
+
+    const skcms_ICCProfile* profiles[] = { &srgb, &dp3, &ll, &lr };
+    for (int i = 0; i < ARRAY_COUNT(profiles); i++)
+    for (int j = 0; j < ARRAY_COUNT(profiles); j++) {
+        expect_round_trip_through_floats(profiles[i], profiles[j]);
+    }
+
+    free(srgb_ptr);
+    free( dp3_ptr);
+    free(  ll_ptr);
+    free(  lr_ptr);
+}
+
+static void test_sRGB_AllBytes() {
+    // Test that our transfer function implementation is perfect to at least 8-bit precision.
+
+    void* ptr;
+    size_t len;
+    skcms_ICCProfile sRGB;
+    expect( load_file("profiles/mobile/sRGB_parametric.icc", &ptr, &len) );
+    expect( skcms_Parse(ptr, len, &sRGB) );
+
+    skcms_ICCProfile linear_sRGB = sRGB;
+    skcms_TransferFunction linearTF = { 1,1,0,0,0,0,0 };
+    skcms_SetTransferFunction(&linear_sRGB, &linearTF);
+
+    // Enough to hit all distinct bytes when interpreted as RGB 888.
+    uint8_t src[258],
+            dst[258];
+    for (int i = 0; i < 258; i++) {
+        src[i] = (uint8_t)(i & 0xFF);  // (We don't really care about bytes 256 and 257.)
+    }
+
+    expect( skcms_Transform(src, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &sRGB,
+                            dst, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &linear_sRGB,
+                            258/3) );
+
+    for (int i = 0; i < 256; i++) {
+        float linear = skcms_TransferFunction_eval(&sRGB.trc[0].parametric, (float)i * (1/255.0f));
+        uint8_t expected = (uint8_t)(linear * 255.0f + 0.5f);
+
+        if (dst[i] != expected) {
+            fprintf(stderr, "%d -> %u, want %u\n", i, dst[i], expected);
+        }
+
+        expect(dst[i] == expected);
+    }
+
+    free(ptr);
+}
+
+static void test_TRC_Table16() {
+    // We'll convert from FB (table-based sRGB) to sRGB (parametric sRGB).
+    skcms_ICCProfile FB, sRGB;
+
+    void  *FB_ptr, *sRGB_ptr;
+    size_t FB_len,  sRGB_len;
+    expect( load_file("profiles/sRGB_Facebook.icc"         , &  FB_ptr, &  FB_len) );
+    expect( load_file("profiles/mobile/sRGB_parametric.icc", &sRGB_ptr, &sRGB_len) );
+    expect( skcms_Parse(  FB_ptr,   FB_len, &  FB) );
+    expect( skcms_Parse(sRGB_ptr, sRGB_len, &sRGB) );
+
+    // Enough to hit all distinct bytes when interpreted as RGB 888.
+    uint8_t src[258],
+            dst[258];
+    for (int i = 0; i < 258; i++) {
+        src[i] = (uint8_t)(i & 0xFF);  // (We don't really care about bytes 256 and 257.)
+    }
+
+    expect( skcms_Transform(src, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &FB,
+                            dst, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &sRGB,
+                            258/3) );
+
+    for (int i = 0; i < 256; i++) {
+        expect( dst[i] == i );
+    }
+
+    free(  FB_ptr);
+    free(sRGB_ptr);
+}
+
+static void test_Premul() {
+    void* ptr;
+    size_t len;
+    skcms_ICCProfile sRGB;
+    expect( load_file("profiles/mobile/sRGB_parametric.icc", &ptr, &len) );
+    expect( skcms_Parse(ptr, len, &sRGB) );
+
+    expect (sRGB.has_trc && sRGB.trc[0].table_entries == 0);
+
+    const skcms_TransferFunction* tf = &sRGB.trc[0].parametric;
+    skcms_TransferFunction inv;
+    expect (skcms_TransferFunction_invert(tf, &inv));
+
+    uint8_t src[256],
+            dst[256] = {0};
+    for (int i = 0; i < 256; i++) {
+        src[i] = (uint8_t)i;
+    }
+
+    expect(skcms_Transform(
+        src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul       , &sRGB,
+        dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_PremulAsEncoded, &sRGB,
+        64));
+    for (int i = 0; i < 256; i+=4) {
+        expect_close( dst[i+0], (uint8_t)( src[i+0] * (src[i+3]/255.0f) + 0.5f ) );
+        expect_close( dst[i+1], (uint8_t)( src[i+1] * (src[i+3]/255.0f) + 0.5f ) );
+        expect_close( dst[i+2], (uint8_t)( src[i+2] * (src[i+3]/255.0f) + 0.5f ) );
+        expect      ( dst[i+3] == src[i+3] );
+    }
+
+    expect(skcms_Transform(
+        src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_PremulAsEncoded, &sRGB,
+        dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul       , &sRGB,
+        64));
+    for (int i = 0; i < 256; i+=4) {
+        expect_close( dst[i+0], (uint8_t)( src[i+0] / (src[i+3]/255.0f) + 0.5f ) );
+        expect_close( dst[i+1], (uint8_t)( src[i+1] / (src[i+3]/255.0f) + 0.5f ) );
+        expect_close( dst[i+2], (uint8_t)( src[i+2] / (src[i+3]/255.0f) + 0.5f ) );
+        expect      ( dst[i+3] == src[i+3] );
+    }
+
+    free(ptr);
+}
+
+static void test_ByteToLinearFloat() {
+    uint32_t src[1] = { 0xFFFFFFFF };
+    float dst[4];
+
+    void*  srgb_ptr;
+    size_t srgb_len;
+    expect(load_file("profiles/mobile/sRGB_parametric.icc", &srgb_ptr, &srgb_len));
+
+    skcms_ICCProfile srgb, srgb_linear;
+    expect(skcms_Parse(srgb_ptr, srgb_len, &srgb));
+    srgb_linear = srgb;
+    for (int i = 0; i < 3; ++i) {
+        srgb_linear.trc[i].parametric.g = 1.0f;
+        srgb_linear.trc[i].parametric.a = 1.0f;
+        srgb_linear.trc[i].parametric.b = 0.0f;
+        srgb_linear.trc[i].parametric.c = 0.0f;
+        srgb_linear.trc[i].parametric.d = 0.0f;
+        srgb_linear.trc[i].parametric.e = 0.0f;
+        srgb_linear.trc[i].parametric.f = 0.0f;
+    }
+
+    skcms_Transform(src, skcms_PixelFormat_BGRA_8888, skcms_AlphaFormat_Unpremul, &srgb,
+                    dst, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, &srgb_linear, 1);
+
+    expect(dst[0] == 1.0f);
+    expect(dst[1] == 1.0f);
+    expect(dst[2] == 1.0f);
+    expect(dst[3] == 1.0f);
+
+    free(srgb_ptr);
+}
+
+// This test is written with the expectation that we use A2B1, not A2B0.
+#if 0
+static void test_CLUT() {
+    // Identity* transform from a v4 A2B profile to good old parametric sRGB.
+    //   * Approximate identity, apparently?
+    void  *srgb_ptr, *a2b_ptr;
+    size_t srgb_len,  a2b_len;
+    expect(load_file("profiles/mobile/sRGB_parametric.icc",           &srgb_ptr, &srgb_len));
+    expect(load_file("profiles/color.org/sRGB_ICC_v4_Appearance.icc", & a2b_ptr, & a2b_len));
+
+    skcms_ICCProfile srgb, a2b;
+    expect( skcms_Parse(srgb_ptr, srgb_len, &srgb) );
+    expect( skcms_Parse( a2b_ptr,  a2b_len, & a2b) );
+
+    // We'll test some edge and middle RGB values.
+    uint8_t src[] = {
+        0x00, 0x00, 0x00,
+        0x00, 0x00, 0x7f,
+        0x00, 0x00, 0xff,
+        0x00, 0x7f, 0x00,
+        0x00, 0xff, 0x00,
+        0x00, 0x7f, 0x7f,
+        0x00, 0xff, 0xff,
+        0x7f, 0x00, 0x00,
+        0xff, 0x00, 0x00,
+        0x7f, 0x00, 0x7f,
+        0xff, 0x00, 0xff,
+        0x7f, 0x7f, 0x00,
+        0xff, 0xff, 0x00,
+        0x7f, 0x7f, 0x7f,
+        0xff, 0xff, 0xff,
+    }, dst[ARRAY_COUNT(src)];
+
+    expect(skcms_Transform(src, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &a2b,
+                           dst, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, &srgb,
+                           ARRAY_COUNT(src)/3));
+
+    for (int i = 0; i < ARRAY_COUNT(src); i++) {
+        // We'd like these all to be perfect (tol = 0),
+        // but that doesn't seem to be what the profile is telling us to do.
+        int tol = 1;
+        if (src[i] == 0) {
+            tol = 9;
+        }
+        if (abs(dst[i] - src[i]) > tol) {
+            printf("%d: %d vs %d\n", i, dst[i], src[i]);
+        }
+        expect(abs(dst[i] - src[i]) <= tol);
+    }
+
+    free(srgb_ptr);
+    free(a2b_ptr);
+}
+#endif
+
+static void test_MakeUsableAsDestination() {
+    void*  ptr;
+    size_t len;
+    expect(load_file("profiles/mobile/sRGB_LUT.icc", &ptr, &len));
+
+    skcms_ICCProfile profile;
+    expect(skcms_Parse(ptr, len, &profile));
+
+    uint32_t src = 0xffaaccee, dst;
+
+    // We can't transform to table-based profiles (yet?).
+    expect(!skcms_Transform(
+                &src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, skcms_sRGB_profile(),
+                &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &profile,
+                1));
+
+    // We should be able to approximate this profile
+    expect(skcms_MakeUsableAsDestination(&profile));
+
+    // Now the transform should work.
+    expect(skcms_Transform(
+               &src, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, skcms_sRGB_profile(),
+               &dst, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &profile,
+               1));
+
+    // This should be pretty much an identity transform.
+    expect(dst == 0xffaaccee);
+
+    free(ptr);
+}
+
+static void test_MakeUsableAsDestinationAdobe() {
+    void*  ptr;
+    size_t len;
+    expect(load_file("profiles/misc/AdobeRGB.icc", &ptr, &len));
+
+    skcms_ICCProfile profile;
+    expect(skcms_Parse(ptr, len, &profile));
+
+    skcms_ICCProfile usable_as_dst = profile;
+    expect(skcms_MakeUsableAsDestination(&usable_as_dst));
+
+    // This profile was already parametric, so it should remain unchanged
+    expect(memcmp(&usable_as_dst, &profile, sizeof(profile)) == 0);
+
+    // Same sequence as above, using the more aggressive SingleCurve version.
+    skcms_ICCProfile single_curve = profile;
+    expect(skcms_MakeUsableAsDestinationWithSingleCurve(&single_curve));
+    expect(memcmp(&single_curve, &profile, sizeof(profile)) == 0);
+
+    free(ptr);
+}
+
+static void test_AdaptToD50() {
+    skcms_Matrix3x3 xyz_to_xyzD50;
+    float x_D65 = 0.3127f;
+    float y_D65 = 0.3290f;
+    expect(skcms_AdaptToXYZD50(x_D65, y_D65, &xyz_to_xyzD50));
+    skcms_Matrix3x3 sRGB_D65 = {{
+        { 0.4124564f, 0.3575761f, 0.1804375f },
+        { 0.2126729f, 0.7151522f, 0.0721750f },
+        { 0.0193339f, 0.1191920f, 0.9503041f }
+    }};
+    skcms_Matrix3x3 sRGB_D50 = skcms_Matrix3x3_concat(&xyz_to_xyzD50, &sRGB_D65);
+    skcms_ICCProfile p = *skcms_sRGB_profile();
+    for (int r = 0; r < 3; ++r)
+        for (int c = 0; c < 3; ++c) {
+            expect(fabsf_(sRGB_D50.vals[r][c] - p.toXYZD50.vals[r][c]) < 0.0001f);
+        }
+}
+
+static void test_PrimariesToXYZ() {
+    skcms_Matrix3x3 srgb_to_xyz;
+    expect(skcms_PrimariesToXYZD50(0.64f, 0.33f,
+                                   0.30f, 0.60f,
+                                   0.15f, 0.06f,
+                                   0.3127f, 0.3290f,
+                                   &srgb_to_xyz));
+
+    skcms_ICCProfile p = *skcms_sRGB_profile();
+    for (int r = 0; r < 3; ++r)
+        for (int c = 0; c < 3; ++c) {
+            expect(fabsf_(srgb_to_xyz.vals[r][c] - p.toXYZD50.vals[r][c]) < 0.0001f);
+        }
+}
+
+static void test_Programmatic_sRGB() {
+    skcms_Matrix3x3 srgb_to_xyz;
+    expect(skcms_PrimariesToXYZD50(0.64f, 0.33f,
+                                   0.30f, 0.60f,
+                                   0.15f, 0.06f,
+                                   0.3127f, 0.3290f,
+                                   &srgb_to_xyz));
+    skcms_ICCProfile srgb = *skcms_sRGB_profile();
+
+    skcms_ICCProfile p;
+    skcms_Init(&p);
+    skcms_SetTransferFunction(&p, &srgb.trc[0].parametric);
+    skcms_SetXYZD50(&p, &srgb_to_xyz);
+
+    expect(skcms_ApproximatelyEqualProfiles(&p, &srgb));
+}
+
+static void test_ExactlyEqual() {
+    const skcms_ICCProfile* srgb = skcms_sRGB_profile();
+    skcms_ICCProfile        copy = *srgb;
+
+    expect(skcms_ApproximatelyEqualProfiles( srgb,  srgb));
+    expect(skcms_ApproximatelyEqualProfiles( srgb, &copy));
+    expect(skcms_ApproximatelyEqualProfiles(&copy,  srgb));
+    expect(skcms_ApproximatelyEqualProfiles(&copy, &copy));
+
+    // This should make a bitwise exact copy of sRGB.
+    skcms_ICCProfile exact;
+    skcms_Init(&exact);
+    skcms_SetTransferFunction(&exact, &srgb->trc[0].parametric);
+    skcms_SetXYZD50(&exact, &srgb->toXYZD50);
+    expect(0 == memcmp(&exact, srgb, sizeof(skcms_ICCProfile)));
+}
+
+static void test_GrayscaleAndRGBCanBeEqual() {
+    const skcms_ICCProfile* srgb = skcms_sRGB_profile();
+    skcms_ICCProfile        gray = *srgb;
+    gray.data_color_space = skcms_Signature_Gray;
+
+    expect(skcms_ApproximatelyEqualProfiles(srgb, &gray));
+    expect(skcms_ApproximatelyEqualProfiles(&gray, srgb));
+}
+
+static void test_Clamp() {
+    // Test that we clamp out-of-gamut values when converting to fixed point,
+    // not just to byte value range but also to gamut (for compatibility with
+    // older systems).
+
+    void*  dp3_ptr;
+    size_t dp3_len;
+    expect(load_file("profiles/mobile/Display_P3_parametric.icc", &dp3_ptr, &dp3_len));
+
+    // Here's the basic premise of the test: sRGB can't represent P3's full green,
+    // but if we scale it by 50% alpha, it would "fit" in a byte.  We want to avoid that.
+    skcms_ICCProfile src,
+                     dst = *skcms_sRGB_profile();
+    skcms_Parse(dp3_ptr, dp3_len, &src);
+    uint8_t rgba[] = { 0, 255, 0, 127 };
+
+    // First double check that the green channel is out of gamut by transforming to float.
+    float flts[4];
+    skcms_Transform(rgba, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &src,
+                    flts, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, &dst,
+                    1);
+    expect(flts[0] < 0);   // A typical out-of-gamut green.  r,b are negative, and g > 1.
+    expect(flts[1] > 1);
+    expect(flts[2] < 0);
+    expect_close(flts[3], 127*(1/255.0f));
+
+    // Now the real test, making sure we clamp that green channel to 1.0 before premul.
+    skcms_Transform(rgba, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul       , &src,
+                    rgba, skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_PremulAsEncoded, &dst,
+                    1);
+
+    expect(rgba[0] ==   0);
+    expect(rgba[1] == 127);  // would be 129 if we clamped after premul
+    expect(rgba[2] ==   0);
+    expect(rgba[3] == 127);
+
+
+    free(dp3_ptr);
+}
+
+static void test_AliasedTransforms() {
+    // We should be able to skcms_Transform() in place if the source and destination
+    // buffers are perfectly aligned and the pixel formats are the same size.
+
+    uint64_t buf = 0;
+    skcms_AlphaFormat upm = skcms_AlphaFormat_Unpremul;
+    const skcms_ICCProfile *srgb = skcms_sRGB_profile(),
+                           *xyz  = skcms_XYZD50_profile();
+
+    expect( skcms_Transform(&buf, skcms_PixelFormat_A_8, upm, srgb,
+                            &buf, skcms_PixelFormat_G_8, upm, xyz, 1) );
+
+    expect( skcms_Transform(&buf, skcms_PixelFormat_RGB_565  , upm, srgb,
+                            &buf, skcms_PixelFormat_ABGR_4444, upm, xyz, 1) );
+
+    expect( skcms_Transform(&buf, skcms_PixelFormat_RGBA_8888   , upm, srgb,
+                            &buf, skcms_PixelFormat_RGBA_1010102, upm, xyz, 1) );
+
+    expect( skcms_Transform(&buf, skcms_PixelFormat_RGB_161616BE, upm, srgb,
+                            &buf, skcms_PixelFormat_BGR_hhh     , upm, xyz, 1) );
+
+    expect( skcms_Transform(&buf, skcms_PixelFormat_RGB_161616LE, upm, srgb,
+                            &buf, skcms_PixelFormat_BGR_161616BE, upm, xyz, 1) );
+}
+
+static void test_Palette8() {
+    uint32_t palette[256];
+    for (int i = 0; i < 256; i++) {
+        palette[i] = (uint32_t)(255 - i) * 0x01010101;
+    }
+
+    uint8_t  src[512];
+    uint32_t dst[512];
+    for (int i = 0; i < 512; i++) {
+        src[i] = (uint8_t)(i % 256);
+    }
+
+    const skcms_ICCProfile* srgb = skcms_sRGB_profile();
+    const skcms_AlphaFormat upm = skcms_AlphaFormat_Unpremul;
+
+    expect( skcms_TransformWithPalette(src, skcms_PixelFormat_RGBA_8888_Palette8, upm, srgb,
+                                       dst, skcms_PixelFormat_RGBA_8888         , upm, srgb,
+                                       512, palette) );
+
+    for (int i = 0; i < 512; i++) {
+        uint32_t expected = (uint32_t)(255 - i%256) * 0x01010101;
+        expect( dst[i] == expected );
+    }
+
+
+    // Double check we can't transform skcms_PixelFormat_RGBA_8888_Palette8 without a palette.
+    expect( !skcms_Transform(src, skcms_PixelFormat_RGBA_8888_Palette8, upm, srgb,
+                             dst, skcms_PixelFormat_RGBA_8888         , upm, srgb,
+                             512) );
+    expect( !skcms_TransformWithPalette(src, skcms_PixelFormat_RGBA_8888_Palette8, upm, srgb,
+                                        dst, skcms_PixelFormat_RGBA_8888         , upm, srgb,
+                                        512, NULL) );
+}
+
+static void test_TF_invert() {
+    const skcms_TransferFunction *sRGB = skcms_sRGB_TransferFunction(),
+                                 *inv  = skcms_sRGB_Inverse_TransferFunction();
+    expect(1.0f == skcms_TransferFunction_eval(sRGB, 1.0f));
+    expect(1.0f == skcms_TransferFunction_eval( inv, 1.0f));
+
+    skcms_TransferFunction sRGB2, inv2;
+    expect(skcms_TransferFunction_invert( inv, &sRGB2));
+    expect(skcms_TransferFunction_invert(sRGB, & inv2));
+
+    expect(1.0f == skcms_TransferFunction_eval(&sRGB2, 1.0f));
+    expect(1.0f == skcms_TransferFunction_eval(& inv2, 1.0f));
+
+    expect(0 == memcmp( inv, & inv2, sizeof(skcms_TransferFunction)));
+  //expect(0 == memcmp(sRGB, &sRGB2, sizeof(skcms_TransferFunction)));
+}
+
+static void test_PQ() {
+    {
+        // This PQ function maps [0,1] to [0,1].
+        skcms_TransferFunction pq;
+        expect(skcms_TransferFunction_makePQ(&pq));
+
+        expect(0.0000f == skcms_TransferFunction_eval(&pq, 0.0f));
+        expect(1.0000f == skcms_TransferFunction_eval(&pq, 1.0f));
+
+        // 100 nits is around 0.508.
+        expect(0.0099f < skcms_TransferFunction_eval(&pq, 0.508f));
+        expect(0.0101f > skcms_TransferFunction_eval(&pq, 0.508f));
+
+        // Try again with skcms_transform().
+        float rgb[] = {0.0f,1.0f,0.508f};
+        skcms_ICCProfile src = *skcms_XYZD50_profile(),
+                         dst = *skcms_XYZD50_profile();
+        skcms_SetTransferFunction(&src, &pq);
+
+        expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src,
+                               rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst, 1));
+        expect(rgb[0] == 0.0f);
+        expect(rgb[1] == 1.0f);
+        expect(0.0099f < rgb[2] && rgb[2] < 0.0101f);
+
+        // And back.
+        expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst,
+                               rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src, 1));
+        expect(0 < rgb[0] && rgb[0] < 1e-6);  // TODO: can we get this perfect?
+        expect(rgb[1] == 1.0f);
+        expect(0.507f < rgb[2] && rgb[2] < 0.508f);
+    }
+
+    {
+        // Let's see if we can get absolute 0-10000 nits.
+        skcms_TransferFunction pq_abs;
+
+        // Mathematically to get 10000 on the output, we want to
+        // scale the A and B PQ terms by R = 10000 ^ (1/F).
+        float R = powf_(10000.0f, 1305/8192.0f);   // ~= 4.33691
+        expect(skcms_TransferFunction_makePQish(&pq_abs,
+                    R*(-107/128.0f), R*       1.0f,   32/2523.0f,
+                       2413/128.0f,   -2392/128.0f, 8192/1305.0f));
+
+        // That gets us close.
+        expect(0.0f == skcms_TransferFunction_eval(&pq_abs, 0.0f));
+        expect(   99.8f < skcms_TransferFunction_eval(&pq_abs, 0.508f));
+        expect(  100.0f > skcms_TransferFunction_eval(&pq_abs, 0.508f));
+        expect( 9989.0f < skcms_TransferFunction_eval(&pq_abs, 1.0f));
+        expect( 9991.0f > skcms_TransferFunction_eval(&pq_abs, 1.0f));
+
+        // We can get a lot closer with an unprincpled tweak to that math.
+        R = powf_(10009.9f, 1305/8192.0f);  // ~= 4.33759
+        expect(skcms_TransferFunction_makePQish(&pq_abs,
+                    R*(-107/128.0f), R*       1.0f,   32/2523.0f,
+                       2413/128.0f,   -2392/128.0f, 8192/1305.0f));
+        expect(0.0f == skcms_TransferFunction_eval(&pq_abs, 0.0f));
+        expect(   99.9f < skcms_TransferFunction_eval(&pq_abs, 0.508f));
+        expect(  100.0f > skcms_TransferFunction_eval(&pq_abs, 0.508f));
+        expect( 9999.0f < skcms_TransferFunction_eval(&pq_abs, 1.0f));
+        expect(10000.0f > skcms_TransferFunction_eval(&pq_abs, 1.0f));
+    }
+}
+
+static void test_HLG() {
+    skcms_TransferFunction enc, dec;
+    expect(skcms_TransferFunction_makeHLG(&dec));
+    expect(skcms_TransferFunction_invert(&dec, &enc));
+
+    // Spot check the lower half of the curve.
+    // Linear 0 encodes as 0.5*(0)^0.5 == 0.
+    expect(0.0f == skcms_TransferFunction_eval(&enc, 0.0f));
+    expect(0.0f == skcms_TransferFunction_eval(&dec, 0.0f));
+
+    // Linear 1 encodes as 0.5*(1)^0.5 == 0.5
+    expect(0.5f == skcms_TransferFunction_eval(&enc, 1.0f));
+    expect(1.0f == skcms_TransferFunction_eval(&dec, 0.5f));
+
+    // Linear 0.5 encodes as 0.5*(0.5)^0.5, about 0.3535.
+    expect(0.3535f < skcms_TransferFunction_eval(&enc, 0.5f));
+    expect(0.3536f > skcms_TransferFunction_eval(&enc, 0.5f));
+    expect(0.5000f < skcms_TransferFunction_eval(&dec, skcms_TransferFunction_eval(&enc, 0.5f)));
+    expect(0.5001f > skcms_TransferFunction_eval(&dec, skcms_TransferFunction_eval(&enc, 0.5f)));
+
+    // Spot check upper half of the curve.
+    // We should have some continuity with the lower half.
+    expect(0.5000f < skcms_TransferFunction_eval(&enc, 1.000001f));
+    expect(0.5001f > skcms_TransferFunction_eval(&enc, 1.000001f));
+
+    // TODO: this isn't really the best round-trip precision.
+    expect(1.000001f < skcms_TransferFunction_eval(&dec,
+                                                   skcms_TransferFunction_eval(&enc, 1.000001f)));
+    expect(1.000010f > skcms_TransferFunction_eval(&dec,
+                                                   skcms_TransferFunction_eval(&enc, 1.000001f)));
+
+    // The maximum value we can encode should be 12.
+    // TODO: it'd be nice to get this to exactly 1.0f.
+    expect(0.999999f < skcms_TransferFunction_eval(&enc, 12.0f));
+    expect(1.000000f > skcms_TransferFunction_eval(&enc, 12.0f));
+    // TODO: it'd be nice to get this to exactly 12.0f.
+    expect(12.00000f < skcms_TransferFunction_eval(&dec, 1.0f));
+    expect(12.00001f > skcms_TransferFunction_eval(&dec, 1.0f));
+
+    // Now let's try that all again with skcms_Transform(), first linear -> HLG.
+    float rgb[] = { 0.0f,1.0f,0.5f, 1.000001f,6.0f,12.0f };
+
+    skcms_ICCProfile src = *skcms_XYZD50_profile(),
+                     dst = *skcms_XYZD50_profile();
+    skcms_SetTransferFunction(&dst, &dec);
+
+    expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src,
+                           rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst, 2));
+    expect(rgb[0] == 0.0f);
+    expect(rgb[1] == 0.5f);
+    expect(0.35350f < rgb[2] && rgb[2] < 0.35360f);
+    expect(0.50000f < rgb[3] && rgb[3] < 0.50010f);
+    expect(0.87164f < rgb[4] && rgb[4] < 0.87165f);
+    expect(0.99999f < rgb[5] && rgb[5] < 1.00000f);
+
+    // Convert back.
+    expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst,
+                           rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src, 2));
+    expect(rgb[0] == 0.0f);
+    expect(rgb[1] == 1.0f);
+    expect( 0.50000f < rgb[2] && rgb[2] <  0.50001f);
+    expect( 1.00000f < rgb[3] && rgb[3] <  1.00001f);
+    expect( 6.00000f < rgb[4] && rgb[4] <  6.00001f);
+    expect(12.00000f < rgb[5] && rgb[5] < 12.00001f);
+}
+
+static void test_scaled_HLG() {
+    // HLG curve scaled 4x, spot checked at a bunch of interesting points.
+    skcms_TransferFunction enc, dec;
+    expect(skcms_TransferFunction_makeScaledHLGish(
+                &dec, 4.0f, 2.0f,2.0f
+                    , 1/0.17883277f, 0.28466892f, 0.55991073f));
+    expect(skcms_TransferFunction_invert(&dec, &enc));
+
+    // TODO: tolerance in ulps?
+    const float exact = 0.0000f,
+                tight = 0.0001f,
+                loose = 0.0002f;
+    struct {
+        float tol, linear, encoded;
+    } cases[] = {
+        // Points well on the gamma side of the curve.
+        {exact, 0.0f, 0.0f},                 // = 0.5*(0.0/4)^0.5
+        {tight, 0.5f, 0.1767766952966369f},  // ≈ 0.5*(0.5/4)^0.5
+        {tight, 1.0f, 0.25f},                // = 0.5*(1.0/4)^0.5
+        {tight, 2.0f, 0.3535533905932738f},  // ≈ 0.5*(2.0/4)^0.5
+
+        // With a scale of 4, linear 4.0f is the border between gamma and exponential curves.
+        {tight, 3.999f, 0.49993749609326166f},   // ≈ 0.5*(3.999/4)^0.5
+        {exact, 4.000f, 0.5f},                   // = 0.5*(4.000/4)^0.5
+        {tight, 4.001f, 0.5000624895514657f},    // ≈ 0.17883*ln(4.001/4 - 0.28467) + 0.55991
+
+        // Points well on the exponential side of the curve.
+        {loose,  6.0f, 0.5947860768815979f},     // ≈ 0.17883*ln( 6.0/4 - 0.28467) + 0.55991
+        {tight, 12.0f, 0.7385492680658274f},     // ≈ 0.17883*ln(12.0/4 - 0.28467) + 0.55991
+        {tight, 48.0f, 1.0f},                    // Unscaled max is 12, ours is 4x higher, 48.
+    };
+
+    for (int i = 0; i < ARRAY_COUNT(cases); i++) {
+        float encoded = skcms_TransferFunction_eval(&enc, cases[i].linear);
+        //fprintf(stderr, "%g -> %g, want %g\n", cases[i].linear, encoded, cases[i].encoded);
+        expect(encoded <= cases[i].encoded + cases[i].tol);
+        expect(encoded >= cases[i].encoded - cases[i].tol);
+
+        float linear = skcms_TransferFunction_eval(&dec, cases[i].encoded);
+        //fprintf(stderr, "%g -> %g, want %g\n", cases[i].encoded, linear, cases[i].linear);
+        expect(linear <= cases[i].linear + cases[i].tol);
+        expect(linear >= cases[i].linear - cases[i].tol);
+    }
+
+    // Now try all the same with skcms_Transform().
+    #define N ((ARRAY_COUNT(cases)+2)/3)
+    float rgb[N*3] = {0};
+
+    skcms_ICCProfile src = *skcms_XYZD50_profile(),
+                     dst = *skcms_XYZD50_profile();
+    skcms_SetTransferFunction(&dst, &dec);
+
+    for (int i = 0; i < ARRAY_COUNT(cases); i++) {
+        rgb[i] = cases[i].linear;
+    }
+    expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src,
+                           rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst, N));
+    for (int i = 0; i < ARRAY_COUNT(cases); i++) {
+        expect(rgb[i] <= cases[i].encoded + cases[i].tol);
+        expect(rgb[i] >= cases[i].encoded - cases[i].tol);
+    }
+
+    for (int i = 0; i < ARRAY_COUNT(cases); i++) {
+        rgb[i] = cases[i].encoded;
+    }
+    expect(skcms_Transform(rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &dst,
+                           rgb, skcms_PixelFormat_RGB_fff,skcms_AlphaFormat_Unpremul, &src, N));
+    for (int i = 0; i < ARRAY_COUNT(cases); i++) {
+        expect(rgb[i] <= cases[i].linear + cases[i].tol);
+        expect(rgb[i] >= cases[i].linear - cases[i].tol);
+    }
+    #undef N
+}
+
+static void test_PQ_invert() {
+    skcms_TransferFunction pqA, invA, invB;
+
+    expect(skcms_TransferFunction_makePQ(&pqA));
+    // PQ's inverse is actually also PQish, so we can write out its expected value here.
+    expect(skcms_TransferFunction_makePQish(&invA, 107/128.0f, 2413/128.0f, 1305/8192.0f
+                                                 ,       1.0f, 2392/128.0f, 2523/  32.0f));
+    expect(skcms_TransferFunction_invert(&pqA, &invB));
+
+    // a,b,d,e really just negate and swap around,
+    // so those should be exact.  c and f will 1.0f/x
+    // each other, so they might not be exactly perfect,
+    // but it turns out we do get lucky here.
+
+    expect(invA.g == invB.g);  // I.e. are we still PQ?
+    expect(invA.a == invB.a);
+    expect(invA.b == invB.b);
+    expect(invA.c == invB.c);  // We got lucky here.
+    expect(invA.d == invB.d);
+    expect(invA.e == invB.e);
+    expect(invA.f == invB.f);  // And here.
+
+    // Just for fun, invert back to PQ.
+    // This just tests the same code path twice.
+    skcms_TransferFunction pqB;
+    expect(skcms_TransferFunction_invert(&invA, &pqB));
+
+    expect(pqA.g == pqB.g);
+    expect(pqA.a == pqB.a);
+    expect(pqA.b == pqB.b);
+    expect(pqA.c == pqB.c);
+    expect(pqA.d == pqB.d);
+    expect(pqA.e == pqB.e);
+    expect(pqA.f == pqB.f);
+
+    // PQ functions invert to the same form.
+    expect(pqA.g == invA.g);
+
+    // TODO: would be nice for this to pass.
+#if 0
+    skcms_Curve pq_curve = {{0,  pqA}},
+               inv_curve = {{0, invA}};
+
+    expect(skcms_AreApproximateInverses(& pq_curve, &invA));
+    expect(skcms_AreApproximateInverses(&inv_curve, & pqA));
+#endif
+}
+
+static void test_HLG_invert() {
+    skcms_TransferFunction hlg, inv;
+
+    expect(skcms_TransferFunction_makeHLG(&hlg));
+    // Unlike PQ, we can't create HLG's inverse directly, only via _invert().
+    expect(skcms_TransferFunction_invert(&hlg, &inv));
+
+    skcms_TransferFunction back;
+    expect(skcms_TransferFunction_invert(&inv, &back));
+
+    expect(hlg.g == back.g);
+    expect(hlg.a == back.a);
+    expect(hlg.b == back.b);
+    expect(hlg.c == back.c);
+    expect(hlg.d == back.d);
+    expect(hlg.e == back.e);
+    expect(hlg.f == back.f);
+
+    // HLG functions invert between two different forms.
+    expect(hlg.g != inv.g);
+
+    skcms_Curve hlg_curve = {{0, hlg}},
+                inv_curve = {{0, inv}};
+
+    expect(skcms_AreApproximateInverses(&hlg_curve, &inv));
+    expect(skcms_AreApproximateInverses(&inv_curve, &hlg));
+}
+
+static void test_RGBA_8888_sRGB() {
+    // We'll convert sRGB to Display P3 two ways and test they're equivalent.
+
+    // Method A: normal sRGB profile we're used to, paired with RGBA_8888.
+    const skcms_ICCProfile* sRGB = skcms_sRGB_profile();
+
+    // Method B: linear sRGB profile paired with RGBA_8888_sRGB.
+    skcms_ICCProfile linear_sRGB = *sRGB;
+    skcms_TransferFunction linearTF = { 1,1,0,0,0,0,0 };
+    skcms_SetTransferFunction(&linear_sRGB, &linearTF);
+
+    struct {
+        skcms_PixelFormat       fmt;
+        const skcms_ICCProfile* prof;
+        float                   f32[256];
+    } A = { skcms_PixelFormat_RGBA_8888     ,         sRGB, {0} },
+      B = { skcms_PixelFormat_RGBA_8888_sRGB, &linear_sRGB, {0} };
+
+    // We'll skip some bytes as alpha, but this is probably plenty of testing.
+    uint8_t bytes[256];
+    for (int i = 0; i < 256; i++) {
+        bytes[i] = i & 0xff;
+    }
+
+    // We transform to another gamut to make sure both methods go through a full-power transform.
+    void*  ptr;
+    size_t len;
+    expect(load_file("profiles/mobile/Display_P3_parametric.icc", &ptr,&len));
+    skcms_ICCProfile dp3;
+    expect(skcms_Parse(ptr, len, &dp3));
+
+    expect(skcms_Transform(bytes,                       A.fmt, skcms_AlphaFormat_Unpremul, A.prof,
+                           A.f32, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul,   &dp3,
+                           64));
+    expect(skcms_Transform(bytes,                       B.fmt, skcms_AlphaFormat_Unpremul, B.prof,
+                           B.f32, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul,   &dp3,
+                           64));
+
+    // The two methods should be bit-exact.
+    for (int i = 0; i < 256; i++) {
+        expect(A.f32[i] == B.f32[i]);
+    }
+
+    // Now let's transform both back and test they're round-trip the same.
+    for (int i = 0; i < 256; i++) { bytes[i] = 0; }
+    expect(skcms_Transform(A.f32, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul,   &dp3,
+                           bytes,                       A.fmt, skcms_AlphaFormat_Unpremul, A.prof,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        expect(bytes[i] == i);
+    }
+
+    for (int i = 0; i < 256; i++) { bytes[i] = 0; }
+    expect(skcms_Transform(B.f32, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul,   &dp3,
+                           bytes,                       B.fmt, skcms_AlphaFormat_Unpremul, B.prof,
+                           64));
+    for (int i = 0; i < 256; i++) {
+        expect(bytes[i] == i);
+    }
+
+    free(ptr);
+}
+
+static void test_ParseWithA2BPriority() {
+    void*  ptr;
+    size_t len;
+    expect(load_file("profiles/misc/US_Web_Coated_SWOP_CMYK.icc", &ptr,&len));
+
+    skcms_ICCProfile simple;
+    expect(skcms_Parse(ptr, len, &simple));  // This will pick up A2B0.
+    expect(simple.has_A2B);
+
+    for (int priority = -1; priority < 4; priority++) {
+        skcms_ICCProfile profile;
+
+        bool ok = skcms_ParseWithA2BPriority(ptr, len, &priority, 1, &profile);
+        if (priority < 0 || priority > 2) {
+            expect(!ok);
+            continue;
+        }
+        expect(ok);
+        if (priority == 0) {
+            expect(0 == memcmp(&profile, &simple, sizeof(profile)));
+        } else {
+            // A2B1 != A2B0, and while A2B2 == A2B0, B2A2 != B2A0.
+            expect(0 != memcmp(&profile, &simple, sizeof(profile)));
+        }
+    }
+
+    free(ptr);
+}
+
+static void test_B2A() {
+    void*  ptr;
+    size_t len;
+    expect(load_file("profiles/color.org/Upper_Left.icc", &ptr,&len));
+
+    skcms_ICCProfile profile;
+    expect(skcms_Parse(ptr, len, &profile));
+    expect(!profile.has_trc);
+    expect(!profile.has_toXYZD50);
+    expect( profile.has_A2B);
+    expect( profile.has_B2A);
+
+    // A B2A profile is usable as a destination unchanged.
+    skcms_ICCProfile copy = profile;
+    expect(skcms_MakeUsableAsDestination(&copy));
+    expect(0 == memcmp(&copy, &profile, sizeof(profile)));
+
+    // A B2A-only profile does not have the TRC curves that …WithSingleCurve() needs.
+    expect(!skcms_MakeUsableAsDestinationWithSingleCurve(&profile));
+
+    // A2B transform should be well-supported.
+    const uint8_t* src = skcms_252_random_bytes;
+    float xyza[252];
+    expect(skcms_Transform(
+            src,  skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &profile,
+            xyza, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+            252/4));
+
+    // Now convert back using B2A.
+    uint8_t dst[252];
+    expect(skcms_Transform(
+            xyza, skcms_PixelFormat_RGBA_ffff, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
+            dst,  skcms_PixelFormat_RGBA_8888, skcms_AlphaFormat_Unpremul, &profile,
+            252/4));
+
+    for (int i = 0; i < 252; i++) {
+        // Alpha should not be changed.
+        if (i % 4 == 3) {
+            expect(dst[i] == src[i]);
+            continue;
+        }
+#if 0  // TODO: this looks nothing like an identity transform!
+        fprintf(stderr, "%3d   %02x  % .4f   %02x\n", i, src[i], xyza[i], dst[i]);
+        //expect(dst[i] == src[i]);
+#endif
+    }
+
+    free(ptr);
+}
+
+int main(int argc, char** argv) {
+    bool regenTestData = false;
+    for (int i = 1; i < argc; ++i) {
+        if (0 == strcmp(argv[i], "-t")) {
+            regenTestData = true;
+        }
+    }
+
+    test_ICCProfile();
+    test_FormatConversions();
+    test_FormatConversions_565();
+    test_FormatConversions_101010();
+    test_FormatConversions_16161616LE();
+    test_FormatConversions_161616LE();
+    test_FormatConversions_16161616BE();
+    test_FormatConversions_161616BE();
+    test_FormatConversions_half();
+    test_FormatConversions_half_norm();
+    test_FormatConversions_float();
+    test_ApproximateCurve_clamped();
+    test_Matrix3x3_invert();
+    test_SimpleRoundTrip();
+    test_FloatRoundTrips();
+    test_ByteToLinearFloat();
+    test_MakeUsableAsDestination();
+    test_MakeUsableAsDestinationAdobe();
+    test_AdaptToD50();
+    test_PrimariesToXYZ();
+    test_Programmatic_sRGB();
+    test_ExactlyEqual();
+    test_GrayscaleAndRGBCanBeEqual();
+    test_AliasedTransforms();
+    test_Palette8();
+    test_TF_invert();
+    test_Clamp();
+    test_Premul();
+    test_PQ();
+    test_HLG();
+    test_scaled_HLG();
+    test_PQ_invert();
+    test_HLG_invert();
+    test_RGBA_8888_sRGB();
+    test_ParseWithA2BPriority();
+    test_B2A();
+
+    // Temporarily disable some tests while getting FP16 compute working.
+    if (!kFP16) {
+        test_Parse(regenTestData);
+        test_sRGB_AllBytes();
+        test_TRC_Table16();
+    }
+#if 0
+    test_CLUT();
+#endif
+
+    return 0;
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/BUILD.bazel b/third-party/libjxl/libjxl/third_party/skcms/toolchain/BUILD.bazel
new file mode 100644
index 0000000000..e69dd59b0c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/BUILD.bazel
@@ -0,0 +1,162 @@
+load(":ndk_linux_arm64_toolchain_config.bzl", "ndk_cc_toolchain_config")
+load(":clang_linux_amd64_toolchain_config.bzl", "provide_linux_amd64_toolchain_config")
+
+package(default_visibility = ["//visibility:public"])
+
+toolchain(
+    name = "linux_amd64_clang_toolchain",
+    # Where should we run this toolchain?
+    exec_compatible_with = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+    ],
+    # What can this toolchain build?
+    target_compatible_with = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+        # We want to be able to explicitly tell Bazel to use this toolchain, and not the
+        # default one on a user's machine or on the RBE worker. Thus we need an extra constraint
+        # that we can use to differentiate the "stock" C++ toolchain from our hermetic one and
+        # force that use by specifying the target platform.
+        "//bazel/platform:use_hermetic_toolchain",
+    ],
+    toolchain = ":linux_amd64_clang",
+    # https://github.com/bazelbuild/rules_cc/blob/8bb0eb5c5ccd96b91753bb112096bb6993d16d13/cc/BUILD#L32-L36
+    toolchain_type = "@rules_cc//cc:toolchain_type",
+)
+
+toolchain(
+    name = "linux_amd64_ndk_arm64_toolchain",
+    exec_compatible_with = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+    ],
+    target_compatible_with = [
+        "@platforms//os:android",
+        "@platforms//cpu:arm64",
+    ],
+    toolchain = ":linux_amd64_ndk_arm64",
+    toolchain_type = "@rules_cc//cc:toolchain_type",
+)
+
+toolchain(
+    name = "linux_amd64_ndk_arm32_toolchain",
+    exec_compatible_with = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+    ],
+    target_compatible_with = [
+        "@platforms//os:android",
+        "@platforms//cpu:armv7",
+    ],
+    toolchain = ":linux_amd64_ndk_arm32",
+    toolchain_type = "@rules_cc//cc:toolchain_type",
+)
+
+filegroup(name = "not_implemented")
+
+###################################
+# clang on linux amd64 toolchain. #
+###################################
+
+provide_linux_amd64_toolchain_config(
+    name = "linux_amd64_toolchain_config",
+)
+
+filegroup(
+    name = "archive_linux_amd64_files",
+    srcs = [
+        "linux_amd64_trampolines/ar.sh",
+        "@clang_linux_amd64//:archive_files",
+    ],
+)
+
+filegroup(
+    name = "compile_linux_amd64_files",
+    srcs = [
+        "linux_amd64_trampolines/clang.sh",
+        "@clang_linux_amd64//:compile_files",
+    ],
+)
+
+filegroup(
+    name = "link_linux_amd64_files",
+    srcs = [
+        # Bazel assumes it is talking to Clang when linking.
+        "linux_amd64_trampolines/clang.sh",
+        "@clang_linux_amd64//:link_files",
+    ],
+)
+
+cc_toolchain(
+    name = "linux_amd64_clang",
+    all_files = ":not_implemented",
+    ar_files = ":archive_linux_amd64_files",
+    compiler_files = ":compile_linux_amd64_files",
+    dwp_files = ":not_implemented",
+    linker_files = ":link_linux_amd64_files",
+    objcopy_files = ":not_implemented",
+    strip_files = ":not_implemented",
+    supports_param_files = False,
+    toolchain_config = ":linux_amd64_toolchain_config",
+)
+
+############################
+# arm64-v8a C++ toolchain. #
+############################
+
+# https://bazel.build/reference/be/c-cpp#cc_toolchain
+cc_toolchain(
+    name = "linux_amd64_ndk_arm64",
+    all_files = ":not_implemented",
+    ar_files = ":ndk_arm64_v8a_toolchain_all_files",
+    compiler_files = ":ndk_arm64_v8a_toolchain_all_files",
+    dwp_files = ":not_implemented",
+    dynamic_runtime_lib = "@ndk_linux_amd64//:arm64_v8a_dynamic_runtime_libraries",
+    linker_files = ":ndk_arm64_v8a_toolchain_all_files",
+    objcopy_files = ":not_implemented",
+    static_runtime_lib = "@ndk_linux_amd64//:arm64_v8a_static_runtime_libraries",
+    strip_files = ":not_implemented",
+    supports_param_files = False,
+    toolchain_config = ":ndk_arm64_v8a_toolchain_config",
+)
+
+filegroup(
+    name = "ndk_arm64_v8a_toolchain_all_files",
+    srcs = glob(["android_trampolines/*.sh"]) + ["@ndk_linux_amd64//:arm64_v8a_all_files"],
+)
+
+ndk_cc_toolchain_config(
+    name = "ndk_arm64_v8a_toolchain_config",
+    cpu = "arm64-v8a",
+)
+
+##############################
+# armeabi-v7a C++ toolchain. #
+##############################
+
+# https://bazel.build/reference/be/c-cpp#cc_toolchain
+cc_toolchain(
+    name = "linux_amd64_ndk_arm32",
+    all_files = ":not_implemented",
+    ar_files = ":ndk_armeabi_v7a_toolchain_all_files",
+    compiler_files = ":ndk_armeabi_v7a_toolchain_all_files",
+    dwp_files = ":not_implemented",
+    dynamic_runtime_lib = "@ndk_linux_amd64//:armeabi_v7a_dynamic_runtime_libraries",
+    linker_files = ":ndk_armeabi_v7a_toolchain_all_files",
+    objcopy_files = ":not_implemented",
+    static_runtime_lib = "@ndk_linux_amd64//:armeabi_v7a_static_runtime_libraries",
+    strip_files = ":not_implemented",
+    supports_param_files = False,
+    toolchain_config = ":ndk_armeabi_v7a_toolchain_config",
+)
+
+filegroup(
+    name = "ndk_armeabi_v7a_toolchain_all_files",
+    srcs = glob(["android_trampolines/*.sh"]) + ["@ndk_linux_amd64//:armeabi_v7a_all_files"],
+)
+
+ndk_cc_toolchain_config(
+    name = "ndk_armeabi_v7a_toolchain_config",
+    cpu = "armeabi-v7a",
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/README.md b/third-party/libjxl/libjxl/third_party/skcms/toolchain/README.md
new file mode 100644
index 0000000000..e587b1091f
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/README.md
@@ -0,0 +1,37 @@
+# Hermetic Android NDK C++ toolchain
+
+This directory defines a hermetic C++ toolchain suite to compile with the Android NDK.
+
+## Motivation
+
+Bazel has a built-in
+[`android_ndk_repository`](https://bazel.build/reference/be/android#android_ndk_repository) rule,
+which generates C++ toolchains based on a local NDK installation provided via the
+`ANDROID_NDK_HOME` environment variable. However, this rule breaks
+[hermeticity](https://bazel.build/concepts/hermeticity), and requires the user to provide an NDK
+installation.
+
+This directory provides a `download_android_ndk` repository rule, which downloads the Android NDK
+under `external/ndk_linux_amd64`, and a C++ toolchain suite that targets 32- and 64-bit ARM.
+
+## Design
+
+The C++ toolchain suite is based on the C++ toolchain generated by the `android_ndk_repository`
+rule.
+
+Steps taken:
+
+- Build SkCMS with the `android_ndk_repository` rule at
+[this revision](https://skia.googlesource.com/skcms/+/30c8e303800c256febb03a09fdcda7f75d119b1b/WORKSPACE#22).
+- Inspect the contents of `bazel-skcms/external/androidndk/BUILD.bazel` and
+`bazel-skcms/external/androidndk/cc_toolchain_config.bzl`, which are generated by said rule.
+- Extract the useful parts into the C++ toolchain defined in this directory.
+
+### Trampoline scripts
+
+The
+[`cc_common.create_cc_toolchain_config_info`](https://bazel.build/rules/lib/cc_common#create_cc_toolchain_config_info)
+function expects tool paths to point to files under the directory in which it is invoked. This
+means we cannot directly reference tools under `external/ndk_linux_amd64`. The solution is to use
+"trampoline" scripts that pass through any command-line arguments to the NDK binaries under
+`external/android_sdk`.
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ar.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ar.sh
new file mode 100755
index 0000000000..b51f0bfc86
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ar.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-ar $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-dwp.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-dwp.sh
new file mode 100755
index 0000000000..5392836591
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-dwp.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-dwp $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ld.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ld.sh
new file mode 100755
index 0000000000..1a2e2ebd05
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-ld.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-ld $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-nm.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-nm.sh
new file mode 100755
index 0000000000..d042102ab7
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-nm.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-nm $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objcopy.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objcopy.sh
new file mode 100755
index 0000000000..6493e609c2
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objcopy.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objcopy $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objdump.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objdump.sh
new file mode 100755
index 0000000000..e11bbbc899
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-objdump.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-strip.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-strip.sh
new file mode 100755
index 0000000000..1d96b13b5d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/aarch64-linux-android-strip.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-strip $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ar.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ar.sh
new file mode 100755
index 0000000000..a492872233
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ar.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ar $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-dwp.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-dwp.sh
new file mode 100755
index 0000000000..5432509760
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-dwp.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-dwp $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ld.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ld.sh
new file mode 100755
index 0000000000..d980e966c4
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-ld.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ld $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-nm.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-nm.sh
new file mode 100755
index 0000000000..e166d0f57d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-nm.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objcopy.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objcopy.sh
new file mode 100755
index 0000000000..4c89c55775
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objcopy.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objdump.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objdump.sh
new file mode 100755
index 0000000000..ba09a2f18d
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-objdump.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-strip.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-strip.sh
new file mode 100755
index 0000000000..1b33c8bd4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/arm-linux-androideabi-strip.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-strip $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/clang.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/clang.sh
new file mode 100755
index 0000000000..83773299f9
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/clang.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+external/ndk_linux_amd64/toolchains/llvm/prebuilt/linux-x86_64/bin/clang $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/gen_trampolines/gen_trampolines.go b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/gen_trampolines/gen_trampolines.go
new file mode 100644
index 0000000000..c873a91645
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/android_trampolines/gen_trampolines/gen_trampolines.go
@@ -0,0 +1,75 @@
+// Helper program to generate trampoline scripts for NDK tools.
+//
+// This program is meant to be run by hand when making changes to the hermetic Android NDK
+// toolchain, e.g. when upgrading to a new Android NDK version.
+//
+// Trampoline scripts are necessary because the `cc_common.create_cc_toolchain_config_info`[1]
+// built-in Bazel function expects tool paths to point to files under the directory in which it is
+// invoked, thus we cannot directly reference tools under `external/ndk_linux_amd64`. The solution is
+// to use trampoline scripts that pass through any command-line arguments to the NDK binaries under
+// `external/android_sdk`.
+//
+// [1] https://bazel.build/rules/lib/cc_common#create_cc_toolchain_config_info
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+const bazelNdkPath = "external/ndk_linux_amd64"
+
+// Paths relative to the Android NDK root directory. These paths can be determined by inspecting
+// the Android NDK ZIP file downloaded by the `download_toolchains` macro defined in
+// //toolchains/download_toolchains.bzl.
+var tools = []string{
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ar",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-dwp",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ld",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objcopy",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump",
+	"toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-strip",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-ar",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-dwp",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-ld",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-nm",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objcopy",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump",
+	"toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-strip",
+	"toolchains/llvm/prebuilt/linux-x86_64/bin/clang",
+}
+
+const trampolineScriptTemplate = `#!/bin/sh
+%s $@
+`
+
+func main() {
+	ndkDirFlag := flag.String("ndk-dir", "", "Path to a local copy of the NDK. Used only to verify that the tool paths assumed by this program are valid. Required.")
+	outDirFlag := flag.String("out-dir", "", "Directory where to save the trampoline scripts. Required.")
+	flag.Parse()
+
+	if *ndkDirFlag == "" || *outDirFlag == "" {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	for _, tool := range tools {
+		// Verify that the tool exists in the NDK.
+		ndkPath := filepath.Join(*ndkDirFlag, tool)
+		if _, err := os.Stat(ndkPath); errors.Is(err, os.ErrNotExist) {
+			fmt.Fprintf(os.Stderr, "File %s not found.", ndkPath)
+			os.Exit(1)
+		}
+
+		// Generate trampoline script.
+		trampolineScript := fmt.Sprintf(trampolineScriptTemplate, filepath.Join(bazelNdkPath, tool))
+		trampolineScriptPath := filepath.Join(*outDirFlag, filepath.Base(tool)+".sh")
+		if err := os.WriteFile(trampolineScriptPath, []byte(trampolineScript), 0750); err != nil {
+			fmt.Fprintf(os.Stderr, "Error writing file %s: %s", trampolineScriptPath, err)
+		}
+	}
+}
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/clang_linux_amd64_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/clang_linux_amd64_toolchain_config.bzl
new file mode 100644
index 0000000000..d4da451a07
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/clang_linux_amd64_toolchain_config.bzl
@@ -0,0 +1,338 @@
+"""
+This file specifies a clang toolchain that can run on a Linux host which doesn't depend on any
+installed packages from the host machine.
+
+See download_clang_linux_amd64.bzl for more details on the creation of the toolchain.
+
+It uses the usr subfolder of the built toolchain as a sysroot
+
+This is largely copied from Skia's clang toolchain.
+"""
+
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "action_config",
+    "feature",
+    "flag_group",
+    "flag_set",
+    "tool",
+    "variable_with_value",
+)
+
+# https://github.com/bazelbuild/bazel/blob/master/tools/build_defs/cc/action_names.bzl
+load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")
+
+# The location of the created clang toolchain.
+EXTERNAL_TOOLCHAIN = "external/clang_linux_amd64"
+
+def _linux_amd64_toolchain_info(ctx):
+    action_configs = _make_action_configs()
+    features = []
+    features += _make_default_flags()
+    features += _make_diagnostic_flags()
+
+    # https://bazel.build/rules/lib/cc_common#create_cc_toolchain_config_info
+    # Note, this rule is defined in Java code, not Starlark
+    # https://cs.opensource.google/bazel/bazel/+/master:src/main/java/com/google/devtools/build/lib/starlarkbuildapi/cpp/CcModuleApi.java
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        features = features,
+        action_configs = action_configs,
+        # This is important because the linker will complain if the libc shared libraries are not
+        # under this directory. Because we extract the libc libraries to
+        # EXTERNAL_TOOLCHAIN/lib, and the various headers and shared libraries to
+        # EXTERNAL_TOOLCHAIN/usr, we make the top level folder the sysroot so the linker can
+        # find the referenced libraries (e.g. EXTERNAL_TOOLCHAIN/usr/lib/x86_64-linux-gnu/libc.so
+        # is just a text file that refers to "/lib/x86_64-linux-gnu/libc.so.6" and
+        # "/lib64/ld-linux-x86-64.so.2" which will use the sysroot as the root).
+        builtin_sysroot = EXTERNAL_TOOLCHAIN,
+        # These are required, but do nothing
+        abi_libc_version = "",
+        abi_version = "",
+        compiler = "",
+        host_system_name = "",
+        target_cpu = "",
+        target_libc = "",
+        target_system_name = "",
+        toolchain_identifier = "",
+    )
+
+provide_linux_amd64_toolchain_config = rule(
+    attrs = {},
+    provides = [CcToolchainConfigInfo],
+    implementation = _linux_amd64_toolchain_info,
+)
+
+def _make_action_configs():
+    """
+    This function sets up the tools needed to perform the various compile/link actions.
+
+    Bazel normally restricts us to referring to (and therefore running) executables/scripts
+    that are in this directory (That is EXEC_ROOT/toolchain). However, the executables we want
+    to run are brought in via WORKSPACE.bazel and are located in EXEC_ROOT/external/clang....
+    Therefore, we make use of "trampoline scripts" that will call the binaries from the
+    toolchain directory.
+
+    These action_configs also let us dynamically specify arguments from the Bazel
+    environment if necessary (see cpp_link_static_library_action).
+    """
+
+    # https://cs.opensource.google/bazel/bazel/+/master:tools/cpp/cc_toolchain_config_lib.bzl;l=435;drc=3b9e6f201a9a3465720aad8712ab7bcdeaf2e5da
+    clang_tool = tool(path = "linux_amd64_trampolines/clang.sh")
+    lld_tool = tool(path = "linux_amd64_trampolines/lld.sh")
+    ar_tool = tool(path = "linux_amd64_trampolines/ar.sh")
+
+    # https://cs.opensource.google/bazel/bazel/+/master:tools/cpp/cc_toolchain_config_lib.bzl;l=488;drc=3b9e6f201a9a3465720aad8712ab7bcdeaf2e5da
+    assemble_action = action_config(
+        action_name = ACTION_NAMES.assemble,
+        tools = [clang_tool],
+    )
+    c_compile_action = action_config(
+        action_name = ACTION_NAMES.c_compile,
+        tools = [clang_tool],
+    )
+    cpp_compile_action = action_config(
+        action_name = ACTION_NAMES.cpp_compile,
+        tools = [clang_tool],
+    )
+    linkstamp_compile_action = action_config(
+        action_name = ACTION_NAMES.linkstamp_compile,
+        tools = [clang_tool],
+    )
+    preprocess_assemble_action = action_config(
+        action_name = ACTION_NAMES.preprocess_assemble,
+        tools = [clang_tool],
+    )
+
+    cpp_link_dynamic_library_action = action_config(
+        action_name = ACTION_NAMES.cpp_link_dynamic_library,
+        tools = [lld_tool],
+    )
+    cpp_link_executable_action = action_config(
+        action_name = ACTION_NAMES.cpp_link_executable,
+        # Bazel assumes it is talking to clang when building an executable. There are
+        # "-Wl" flags on the command: https://releases.llvm.org/6.0.1/tools/clang/docs/ClangCommandLineReference.html#cmdoption-clang-Wl
+        tools = [clang_tool],
+    )
+    cpp_link_nodeps_dynamic_library_action = action_config(
+        action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+        tools = [lld_tool],
+    )
+
+    # This is the same rule as
+    # https://github.com/emscripten-core/emsdk/blob/7f39d100d8cd207094decea907121df72065517e/bazel/emscripten_toolchain/crosstool.bzl#L143
+    # By default, there are no flags or libraries passed to the llvm-ar tool, so
+    # we need to specify them. The variables mentioned by expand_if_available are defined
+    # https://docs.bazel.build/versions/main/cc-toolchain-config-reference.html#cctoolchainconfiginfo-build-variables
+    cpp_link_static_library_action = action_config(
+        action_name = ACTION_NAMES.cpp_link_static_library,
+        flag_sets = [
+            flag_set(
+                flag_groups = [
+                    flag_group(
+                        # https://llvm.org/docs/CommandGuide/llvm-ar.html
+                        # replace existing files or insert them if they already exist,
+                        # create the file if it doesn't already exist
+                        # symbol table should be added
+                        # Deterministic timestamps should be used
+                        flags = ["rcsD", "%{output_execpath}"],
+                        # Despite the name, output_execpath just refers to linker output,
+                        # e.g. libFoo.a
+                        expand_if_available = "output_execpath",
+                    ),
+                ],
+            ),
+            flag_set(
+                flag_groups = [
+                    flag_group(
+                        iterate_over = "libraries_to_link",
+                        flag_groups = [
+                            flag_group(
+                                flags = ["%{libraries_to_link.name}"],
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file",
+                                ),
+                            ),
+                            flag_group(
+                                flags = ["%{libraries_to_link.object_files}"],
+                                iterate_over = "libraries_to_link.object_files",
+                                expand_if_equal = variable_with_value(
+                                    name = "libraries_to_link.type",
+                                    value = "object_file_group",
+                                ),
+                            ),
+                        ],
+                        expand_if_available = "libraries_to_link",
+                    ),
+                ],
+            ),
+            flag_set(
+                flag_groups = [
+                    flag_group(
+                        flags = ["@%{linker_param_file}"],
+                        expand_if_available = "linker_param_file",
+                    ),
+                ],
+            ),
+        ],
+        tools = [ar_tool],
+    )
+
+    action_configs = [
+        assemble_action,
+        c_compile_action,
+        cpp_compile_action,
+        cpp_link_dynamic_library_action,
+        cpp_link_executable_action,
+        cpp_link_nodeps_dynamic_library_action,
+        cpp_link_static_library_action,
+        linkstamp_compile_action,
+        preprocess_assemble_action,
+    ]
+    return action_configs
+
+def _make_default_flags():
+    """Here we define the flags for certain actions that are always applied.
+
+    For any flag that might be conditionally applied, it should be defined in //bazel/copts.bzl.
+
+    Flags that are set here will be unconditionally applied to everything we compile with
+    this toolchain, even third_party deps.
+    """
+
+    # Note: These values must be kept in sync with those defined in cmake_exporter.go.
+    cxx_compile_includes = flag_set(
+        actions = [
+            ACTION_NAMES.c_compile,
+            ACTION_NAMES.cpp_compile,
+        ],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    # THIS ORDER MATTERS GREATLY. If these are in the wrong order, the
+                    # #include_next directives will fail to find the files, causing a compilation
+                    # error (or, without -no-canonical-prefixes, a mysterious case where files
+                    # are included with an absolute path and fail the build).
+                    "-isystem",
+                    EXTERNAL_TOOLCHAIN + "/include/c++/v1",
+                    "-isystem",
+                    EXTERNAL_TOOLCHAIN + "/usr/include",
+                    "-isystem",
+                    EXTERNAL_TOOLCHAIN + "/lib/clang/13.0.0/include",
+                    "-isystem",
+                    EXTERNAL_TOOLCHAIN + "/usr/include/x86_64-linux-gnu",
+                    # We do not want clang to search in absolute paths for files. This makes
+                    # Bazel think we are using an outside resource and fail the compile.
+                    "-no-canonical-prefixes",
+                ],
+            ),
+        ],
+    )
+
+    cpp_compile_flags = flag_set(
+        actions = [
+            ACTION_NAMES.cpp_compile,
+        ],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    "-std=c++17",
+                ],
+            ),
+        ],
+    )
+
+    link_exe_flags = flag_set(
+        actions = [ACTION_NAMES.cpp_link_executable],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    "-fuse-ld=lld",
+                    # We chose to use the llvm runtime, not the gcc one because it is already
+                    # included in the clang binary
+                    "--rtlib=compiler-rt",
+                    "-std=c++17",
+                    # We statically include these libc++ libraries so they do not need to be
+                    # on a developer's machine (they can be tricky to get).
+                    EXTERNAL_TOOLCHAIN + "/lib/libc++.a",
+                    EXTERNAL_TOOLCHAIN + "/lib/libc++abi.a",
+                    EXTERNAL_TOOLCHAIN + "/lib/libunwind.a",
+                    # Dynamically Link in the other parts of glibc (not needed in glibc 2.34+)
+                    "-lpthread",
+                    "-lm",
+                    "-ldl",
+                ],
+            ),
+        ],
+    )
+    return [feature(
+        "default_flags",
+        enabled = True,
+        flag_sets = [
+            cxx_compile_includes,
+            cpp_compile_flags,
+            link_exe_flags,
+        ],
+    )]
+
+def _make_diagnostic_flags():
+    """Here we define the flags that can be turned on via features to yield debug info."""
+    cxx_diagnostic = flag_set(
+        actions = [
+            ACTION_NAMES.c_compile,
+            ACTION_NAMES.cpp_compile,
+        ],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    "--trace-includes",
+                    "-v",
+                ],
+            ),
+        ],
+    )
+
+    link_diagnostic = flag_set(
+        actions = [ACTION_NAMES.cpp_link_executable],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    "-Wl,--verbose",
+                    "-v",
+                ],
+            ),
+        ],
+    )
+
+    link_search_dirs = flag_set(
+        actions = [ACTION_NAMES.cpp_link_executable],
+        flag_groups = [
+            flag_group(
+                flags = [
+                    "--print-search-dirs",
+                ],
+            ),
+        ],
+    )
+    return [
+        # Running a Bazel command with --features diagnostic will cause the compilation and
+        # link steps to be more verbose.
+        feature(
+            "diagnostic",
+            enabled = False,
+            flag_sets = [
+                cxx_diagnostic,
+                link_diagnostic,
+            ],
+        ),
+        # Running a Bazel command with --features print_search_dirs will cause the link to fail
+        # but directories searched for libraries, etc will be displayed.
+        feature(
+            "print_search_dirs",
+            enabled = False,
+            flag_sets = [
+                link_search_dirs,
+            ],
+        ),
+    ]
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_clang_linux_amd64.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_clang_linux_amd64.bzl
new file mode 100644
index 0000000000..29b8f9da64
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_clang_linux_amd64.bzl
@@ -0,0 +1,151 @@
+"""
+This file assembles a toolchain for an amd64 Linux host using the Clang Compiler and glibc.
+
+It downloads the necessary headers, executables, and pre-compiled static/shared libraries to
+the external subfolder of the Bazel cache (the same place third party deps are downloaded with
+http_archive or similar functions in WORKSPACE.bazel). These will be able to be used via our
+custom c++ toolchain configuration (see //toolchain/linux_amd64_toolchain_config.bzl)
+
+Most files are downloaded as .deb files from packages.debian.org (with us acting as the dependency
+resolver) and extracted to
+  [outputRoot (aka Bazel cache)]/[outputUserRoot]/[outputBase]/external/clang_linux_amd64
+  (See https://bazel.build/docs/output_directories#layout-diagram)
+which will act as our sysroot.
+"""
+
+load("//toolchain:utils.bzl", "gcs_mirror_url")
+
+# From https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.0/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz.sha256
+clang_prefix = "clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-20.04/"
+clang_sha256 = "2c2fb857af97f41a5032e9ecadf7f78d3eff389a5cd3c9ec620d24f134ceb3c8"
+clang_url = "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.0/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz"
+
+debs_to_install = [
+    # These three comprise glibc. libc6 has the shared libraries, like libc itself, the math library
+    # (libm), etc. linux-libc-dev has the header files specific to linux. libc6-dev has the libc
+    # system headers (e.g. malloc.h, math.h).
+    {
+        # From https://packages.debian.org/bullseye/amd64/libc6/download
+        "sha256": "a6263062b476cee1052972621d473b159debec6e424f661eda88248b00331d79",
+        "url": "https://ftp.debian.org/debian/pool/main/g/glibc/libc6_2.31-13+deb11u4_amd64.deb",
+    },
+    {
+        # From https://packages.debian.org/bullseye/amd64/linux-libc-dev/download
+        "sha256": "e89023a5fc58c30ebb8cbb82de77f872baeafe7a5449f574b03cea478f7e9e6d",
+        "url": "https://ftp.debian.org/debian/pool/main/l/linux/linux-libc-dev_5.10.140-1_amd64.deb",
+    },
+    {
+        # From https://packages.debian.org/bullseye/amd64/libc6-dev/download
+        "sha256": "5f368eb89d102ccd23529a02fb17aaa1c15e7612506e22ef0c559b71f5049a91",
+        "url": "https://ftp.debian.org/debian/pool/main/g/glibc/libc6-dev_2.31-13+deb11u4_amd64.deb",
+    },
+]
+
+def _download_and_extract_deb(ctx, deb, sha256, prefix, output = ""):
+    """Downloads a debian file and extracts the data into the provided output directory"""
+
+    # https://bazel.build/rules/lib/repository_ctx#download_and_extract
+    # A .deb file has a data.tar.xz and a control.tar.xz, but the important contents
+    # (i.e. the headers or libs) are in the data.tar.xz
+    ctx.download_and_extract(
+        url = gcs_mirror_url(deb, sha256),
+        output = "tmp",
+        sha256 = sha256,
+    )
+
+    # https://bazel.build/rules/lib/repository_ctx#extract
+    ctx.extract(
+        archive = "tmp/data.tar.xz",
+        output = output,
+        stripPrefix = prefix,
+    )
+
+    # Clean up
+    ctx.delete("tmp")
+
+def _download_clang_linux_amd64_impl(ctx):
+    # Download the clang toolchain (the extraction can take a while)
+    # https://bazel.build/rules/lib/repository_ctx#download_and_extract
+    ctx.download_and_extract(
+        url = gcs_mirror_url(clang_url, clang_sha256),
+        output = "",
+        stripPrefix = clang_prefix,
+        sha256 = clang_sha256,
+    )
+
+    # Extract all the debs into our sysroot. This is very similar to installing them, except their
+    # dependencies are not installed automatically.
+    for deb in debs_to_install:
+        _download_and_extract_deb(
+            ctx,
+            deb["url"],
+            deb["sha256"],
+            ".",
+        )
+
+    # Create a BUILD.bazel file that makes the files downloaded into the toolchain visible.
+    # We have separate groups for each task because doing less work (sandboxing fewer files
+    # or uploading less data to RBE) makes compiles go faster. We try to strike a balance
+    # between minimal specifications and not having to edit this file often with our use
+    # of globs.
+    # https://bazel.build/rules/lib/repository_ctx#file
+    ctx.file(
+        "BUILD.bazel",
+        content = """
+# DO NOT EDIT THIS BAZEL FILE DIRECTLY
+# Generated from ctx.file action in download_linux_amd64_toolchain.bzl
+filegroup(
+    name = "archive_files",
+    srcs = [
+        "bin/llvm-ar",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "compile_files",
+    srcs = [
+        "bin/clang",
+    ] + glob(
+        include = [
+            "include/c++/v1/**",
+            "usr/include/**",
+            "lib/clang/13.0.0/include/**",
+            "usr/include/x86_64-linux-gnu/**",
+        ],
+        allow_empty = False,
+    ),
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "link_files",
+    srcs = [
+        "bin/clang",
+        "bin/ld.lld",
+        "bin/lld",
+        "lib/libc++.a",
+        "lib/libc++abi.a",
+        "lib/libunwind.a",
+        "lib64/ld-linux-x86-64.so.2",
+    ] + glob(
+        include = [
+            "lib/clang/13.0.0/lib/**",
+            "lib/x86_64-linux-gnu/**",
+            "usr/lib/x86_64-linux-gnu/**",
+        ],
+        allow_empty = False,
+    ),
+    visibility = ["//visibility:public"],
+)
+""",
+        executable = False,
+    )
+
+# https://bazel.build/rules/repository_rules
+download_clang_linux_amd64 = repository_rule(
+    implementation = _download_clang_linux_amd64_impl,
+    attrs = {},
+    doc = "Downloads clang, and all supporting headers, executables, " +
+          "and shared libraries required to build skcms on a Linux amd64 host",
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_ndk_linux_amd64.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_ndk_linux_amd64.bzl
new file mode 100644
index 0000000000..1002900482
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_ndk_linux_amd64.bzl
@@ -0,0 +1,20 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+# Path to the Android NDK from the point of view of the cc_toolchain rule.
+# Note how this matches the name in http_archive.
+NDK_PATH = "external/ndk_linux_amd64"
+
+def download_ndk_linux_amd64(name):
+    """Downloads the Android NDK under external/ndk_linux_amd64."""
+
+    # Archive taken from https://github.com/android/ndk/wiki/Unsupported-Downloads#r21e.
+    http_archive(
+        name = "ndk_linux_amd64",
+        urls = [
+            "https://dl.google.com/android/repository/android-ndk-r21e-linux-x86_64.zip",
+            "https://storage.googleapis.com/skia-world-readable/bazel/ad7ce5467e18d40050dc51b8e7affc3e635c85bd8c59be62de32352328ed467e.zip",
+        ],
+        sha256 = "ad7ce5467e18d40050dc51b8e7affc3e635c85bd8c59be62de32352328ed467e",
+        strip_prefix = "android-ndk-r21e",
+        build_file = Label("//toolchain:ndk.BUILD"),
+    )
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_toolchains.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_toolchains.bzl
new file mode 100644
index 0000000000..a2690b6d9c
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/download_toolchains.bzl
@@ -0,0 +1,34 @@
+"""
+This file exports the various toolchains for the hosts that we support building skcms on.
+
+Supported:
+ - Linux amd64 (targeting Linux amd64 and Android)
+
+Planned:
+ - Windows amd64
+ - Mac M1 and Intel
+
+"""
+
+load(":download_clang_linux_amd64.bzl", "download_clang_linux_amd64")
+load(":download_ndk_linux_amd64.bzl", "download_ndk_linux_amd64")
+
+name_toolchain = {
+    "clang_linux_amd64": download_clang_linux_amd64,
+    "ndk_linux_amd64": download_ndk_linux_amd64,
+}
+
+def download_toolchains_for_skcms(*args):
+    """
+    Point Bazel to the correct rules for downloading the different toolchains.
+
+    Args:
+        *args: multiple toolchains, see top of file for
+               list of supported toolchains.
+    """
+
+    for toolchain_name in args:
+        if toolchain_name not in name_toolchain:
+            fail("unrecognized toolchain name " + toolchain_name)
+        download_toolchain = name_toolchain[toolchain_name]
+        download_toolchain(name = toolchain_name)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/ar.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/ar.sh
new file mode 100755
index 0000000000..94c9da2380
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/ar.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Copyright 2022 Google LLC
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+external/clang_linux_amd64/bin/llvm-ar $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/clang.sh b/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/clang.sh
new file mode 100755
index 0000000000..be6e59e554
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/linux_amd64_trampolines/clang.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Copyright 2022 Google LLC
+#
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+external/clang_linux_amd64/bin/clang $@
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk.BUILD b/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk.BUILD
new file mode 100644
index 0000000000..81c9580a0e
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk.BUILD
@@ -0,0 +1,65 @@
+# This file is based on the `external/androidndk/BUILD.bazel` file produced by the built-in
+# `android_ndk_repository` Bazel rule[1], which was used to build the SkCMS repository up until
+# this revision[2].
+#
+# The paths in this file point to locations inside the expanded Android NDK ZIP file (found at
+# external/ndk_linux_amd64), and must be updated every time we upgrade to a new Android NDK version.
+#
+# [1] https://github.com/bazelbuild/bazel/blob/4710ef82ce34572878e07c52e83a0144d707f140/src/main/java/com/google/devtools/build/lib/bazel/rules/android/AndroidNdkRepositoryFunction.java
+# [2] https://skia.googlesource.com/skcms/+/30c8e303800c256febb03a09fdcda7f75d119b1b/WORKSPACE#22
+
+filegroup(
+    name = "arm64_v8a_all_files",
+    srcs = glob(["toolchains/llvm/**"]) + glob([
+        "platforms/android-29/arch-arm64/**/*",
+        "sources/cxx-stl/llvm-libc++/include/**/*",
+        "sources/cxx-stl/llvm-libc++abi/include/**/*",
+        "sources/android/support/include/**/*",
+        "sysroot/**/*",
+        "toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/**/*",
+    ]) + [
+        ":arm64_v8a_dynamic_runtime_libraries",
+        ":arm64_v8a_static_runtime_libraries",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "arm64_v8a_dynamic_runtime_libraries",
+    srcs = glob(["sources/cxx-stl/llvm-libc++/libs/arm64-v8a/*.so"]),
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "arm64_v8a_static_runtime_libraries",
+    srcs = glob(["sources/cxx-stl/llvm-libc++/libs/arm64-v8a/*.a"]),
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "armeabi_v7a_all_files",
+    srcs = glob(["toolchains/llvm/**"]) + glob([
+        "platforms/android-29/arch-arm/**/*",
+        "sources/cxx-stl/llvm-libc++/include/**/*",
+        "sources/cxx-stl/llvm-libc++abi/include/**/*",
+        "sources/android/support/include/**/*",
+        "sysroot/**/*",
+        "toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/**/*",
+    ]) + [
+        ":armeabi_v7a_dynamic_runtime_libraries",
+        ":armeabi_v7a_static_runtime_libraries",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "armeabi_v7a_dynamic_runtime_libraries",
+    srcs = glob(["sources/cxx-stl/llvm-libc++/libs/armeabi-v7a/*.so"]),
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "armeabi_v7a_static_runtime_libraries",
+    srcs = glob(["sources/cxx-stl/llvm-libc++/libs/armeabi-v7a/*.a"]),
+    visibility = ["//visibility:public"],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk_linux_arm64_toolchain_config.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk_linux_arm64_toolchain_config.bzl
new file mode 100644
index 0000000000..80d4a6c2ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/ndk_linux_arm64_toolchain_config.bzl
@@ -0,0 +1,404 @@
+"""This module defines the ndk_cc_toolchain_config rule.
+
+This file is based on the `external/androidndk/cc_toolchain_config.bzl` file produced by the
+built-in `android_ndk_repository` Bazel rule[1], which was used to build the SkCMS repository up
+until this revision[2].
+
+The paths in this file point to locations inside the expanded Android NDK ZIP file (found at
+external/ndk_linux_amd64), and must be updated every time we upgrade to a new Android NDK version.
+
+[1] https://github.com/bazelbuild/bazel/blob/4710ef82ce34572878e07c52e83a0144d707f140/src/main/java/com/google/devtools/build/lib/bazel/rules/android/AndroidNdkRepositoryFunction.java#L422
+[2] https://skia.googlesource.com/skcms/+/30c8e303800c256febb03a09fdcda7f75d119b1b/WORKSPACE#22
+"""
+
+load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")
+load(
+    "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
+    "feature",
+    "flag_group",
+    "flag_set",
+    "tool_path",
+    "with_feature_set",
+)
+load(":download_ndk_linux_amd64.bzl", "NDK_PATH")
+
+# Supported CPUs.
+_ARMEABI_V7A = "armeabi-v7a"
+_ARM64_V8A = "arm64-v8a"
+
+_all_compile_actions = [
+    ACTION_NAMES.c_compile,
+    ACTION_NAMES.cpp_compile,
+    ACTION_NAMES.linkstamp_compile,
+    ACTION_NAMES.assemble,
+    ACTION_NAMES.preprocess_assemble,
+    ACTION_NAMES.cpp_header_parsing,
+    ACTION_NAMES.cpp_module_compile,
+    ACTION_NAMES.cpp_module_codegen,
+    ACTION_NAMES.clif_match,
+    ACTION_NAMES.lto_backend,
+]
+
+_all_link_actions = [
+    ACTION_NAMES.cpp_link_executable,
+    ACTION_NAMES.cpp_link_dynamic_library,
+    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+]
+
+def _get_default_compile_flags(cpu):
+    if cpu == _ARMEABI_V7A:
+        return [
+            "-D__ANDROID_API__=29",
+            "-isystem",
+            NDK_PATH + "/sysroot/usr/include/arm-linux-androideabi",
+            "-target",
+            "armv7-none-linux-androideabi",
+            "-march=armv7-a",
+            "-mfloat-abi=softfp",
+            "-mfpu=vfpv3-d16",
+            "-gcc-toolchain",
+            NDK_PATH + "/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64",
+            "-fpic",
+            "-no-canonical-prefixes",
+            "-Wno-invalid-command-line-argument",
+            "-Wno-unused-command-line-argument",
+            "-funwind-tables",
+            "-fstack-protector-strong",
+            "-fno-addrsig",
+            "-Werror=return-type",
+            "-Werror=int-to-pointer-cast",
+            "-Werror=pointer-to-int-cast",
+            "-Werror=implicit-function-declaration",
+        ]
+    if cpu == _ARM64_V8A:
+        return [
+            "-gcc-toolchain",
+            NDK_PATH + "/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64",
+            "-target",
+            "aarch64-none-linux-android",
+            "-fpic",
+            "-isystem",
+            NDK_PATH + "/sysroot/usr/include/aarch64-linux-android",
+            "-D__ANDROID_API__=29",
+            "-no-canonical-prefixes",
+            "-Wno-invalid-command-line-argument",
+            "-Wno-unused-command-line-argument",
+            "-funwind-tables",
+            "-fstack-protector-strong",
+            "-fno-addrsig",
+            "-Werror=return-type",
+            "-Werror=int-to-pointer-cast",
+            "-Werror=pointer-to-int-cast",
+            "-Werror=implicit-function-declaration",
+        ]
+    fail("Unknown CPU: " + cpu)
+
+def _get_default_link_flags(cpu):
+    if cpu == _ARMEABI_V7A:
+        return [
+            "-target",
+            "armv7-none-linux-androideabi",
+            "-gcc-toolchain",
+            NDK_PATH + "/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64",
+            "-L",
+            NDK_PATH + "/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a",
+            "-no-canonical-prefixes",
+            "-Wl,-z,relro",
+            "-Wl,--gc-sections",
+        ]
+    if cpu == _ARM64_V8A:
+        return [
+            "-gcc-toolchain",
+            NDK_PATH + "/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64",
+            "-target",
+            "aarch64-none-linux-android",
+            "-L",
+            NDK_PATH + "/sources/cxx-stl/llvm-libc++/libs/arm64-v8a",
+            "-no-canonical-prefixes",
+            "-Wl,-z,relro",
+            "-Wl,--gc-sections",
+        ]
+    fail("Unknown CPU: " + cpu)
+
+def _get_default_dbg_flags(cpu):
+    if cpu == _ARMEABI_V7A:
+        return ["-g", "-fno-strict-aliasing", "-O0", "-UNDEBUG"]
+    if cpu == _ARM64_V8A:
+        return ["-O0", "-g", "-UNDEBUG"]
+    fail("Unknown CPU: " + cpu)
+
+def _get_default_opt_flags(cpu):
+    if cpu == _ARMEABI_V7A:
+        return ["-mthumb", "-Os", "-g", "-DNDEBUG"]
+    if cpu == _ARM64_V8A:
+        return ["-O2", "-g", "-DNDEBUG"]
+    fail("Unknown CPU: " + cpu)
+
+def _get_toolchain_identifier(cpu):
+    if cpu == _ARMEABI_V7A:
+        return "ndk-armeabi-v7a-toolchain"
+    if cpu == _ARM64_V8A:
+        return "ndk-arm64-v8a-toolchain"
+    fail("Unknown CPU: " + cpu)
+
+def _get_target_system_name(cpu):
+    if cpu == _ARMEABI_V7A:
+        return "arm-linux-androideabi"
+    if cpu == _ARM64_V8A:
+        return "aarch64-linux-android"
+    fail("Unknown CPU: " + cpu)
+
+def _get_builtin_sysroot(cpu):
+    if cpu == _ARMEABI_V7A:
+        return NDK_PATH + "/platforms/android-29/arch-arm"
+    if cpu == _ARM64_V8A:
+        return NDK_PATH + "/platforms/android-29/arch-arm64"
+    fail("Unknown CPU: " + cpu)
+
+def _get_tool_paths(cpu):
+    # The cc_common.create_cc_toolchain_config_info function expects tool paths to point to files
+    # under the directory in which it is invoked. This means we cannot directly reference tools
+    # under external/android_ndk. The solution is to use "trampoline" scripts that pass through
+    # any command-line arguments to the NDK binaries under external/android_sdk.
+
+    if cpu == _ARMEABI_V7A:
+        return [
+            tool_path(
+                name = "ar",
+                path = "android_trampolines/arm-linux-androideabi-ar.sh",
+            ),
+            tool_path(
+                name = "cpp",
+                path = "android_trampolines/clang.sh",
+            ),
+            tool_path(
+                name = "dwp",
+                path = "android_trampolines/arm-linux-androideabi-dwp.sh",
+            ),
+            tool_path(
+                name = "gcc",
+                path = "android_trampolines/clang.sh",
+            ),
+            tool_path(
+                name = "gcov",
+                path = "/bin/false",
+            ),
+            tool_path(
+                name = "ld",
+                path = "android_trampolines/arm-linux-androideabi-ld.sh",
+            ),
+            tool_path(
+                name = "nm",
+                path = "android_trampolines/arm-linux-androideabi-nm.sh",
+            ),
+            tool_path(
+                name = "objcopy",
+                path = "android_trampolines/arm-linux-androideabi-objcopy.sh",
+            ),
+            tool_path(
+                name = "objdump",
+                path = "android_trampolines/arm-linux-androideabi-objdump.sh",
+            ),
+            tool_path(
+                name = "strip",
+                path = "android_trampolines/arm-linux-androideabi-strip.sh",
+            ),
+        ]
+    if cpu == _ARM64_V8A:
+        return [
+            tool_path(
+                name = "ar",
+                path = "android_trampolines/aarch64-linux-android-ar.sh",
+            ),
+            tool_path(
+                name = "cpp",
+                path = "android_trampolines/clang.sh",
+            ),
+            tool_path(
+                name = "dwp",
+                path = "android_trampolines/aarch64-linux-android-dwp.sh",
+            ),
+            tool_path(
+                name = "gcc",
+                path = "android_trampolines/clang.sh",
+            ),
+            tool_path(
+                name = "gcov",
+                path = "/bin/false",
+            ),
+            tool_path(
+                name = "ld",
+                path = "android_trampolines/aarch64-linux-android-ld.sh",
+            ),
+            tool_path(
+                name = "nm",
+                path = "android_trampolines/aarch64-linux-android-nm.sh",
+            ),
+            tool_path(
+                name = "objcopy",
+                path = "android_trampolines/aarch64-linux-android-objcopy.sh",
+            ),
+            tool_path(
+                name = "objdump",
+                path = "android_trampolines/aarch64-linux-android-objdump.sh",
+            ),
+            tool_path(
+                name = "strip",
+                path = "android_trampolines/aarch64-linux-android-strip.sh",
+            ),
+        ]
+    fail("Unknown CPU: " + cpu)
+
+def _ndk_cc_toolchain_config_impl(ctx):
+    default_compile_flags = _get_default_compile_flags(ctx.attr.cpu)
+    unfiltered_compile_flags = [
+        "-isystem",
+        NDK_PATH + "/sources/cxx-stl/llvm-libc++/include",
+        "-isystem",
+        NDK_PATH + "/sources/cxx-stl/llvm-libc++abi/include",
+        "-isystem",
+        NDK_PATH + "/sources/android/support/include",
+        "-isystem",
+        NDK_PATH + "/sysroot/usr/include",
+    ]
+    default_link_flags = _get_default_link_flags(ctx.attr.cpu)
+    default_fastbuild_flags = [""]
+    default_dbg_flags = _get_default_dbg_flags(ctx.attr.cpu)
+    default_opt_flags = _get_default_opt_flags(ctx.attr.cpu)
+
+    opt_feature = feature(name = "opt")
+    fastbuild_feature = feature(name = "fastbuild")
+    dbg_feature = feature(name = "dbg")
+    supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)
+    supports_pic_feature = feature(name = "supports_pic", enabled = True)
+    static_link_cpp_runtimes_feature = feature(name = "static_link_cpp_runtimes", enabled = True)
+
+    default_compile_flags_feature = feature(
+        name = "default_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [flag_group(flags = default_compile_flags)],
+            ),
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [flag_group(flags = default_fastbuild_flags)],
+                with_features = [with_feature_set(features = ["fastbuild"])],
+            ),
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [flag_group(flags = default_dbg_flags)],
+                with_features = [with_feature_set(features = ["dbg"])],
+            ),
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [flag_group(flags = default_opt_flags)],
+                with_features = [with_feature_set(features = ["opt"])],
+            ),
+        ],
+    )
+
+    default_link_flags_feature = feature(
+        name = "default_link_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = _all_link_actions,
+                flag_groups = [flag_group(flags = default_link_flags)],
+            ),
+        ],
+    )
+
+    user_compile_flags_feature = feature(
+        name = "user_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["%{user_compile_flags}"],
+                        iterate_over = "user_compile_flags",
+                        expand_if_available = "user_compile_flags",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    sysroot_feature = feature(
+        name = "sysroot",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = _all_compile_actions + _all_link_actions,
+                flag_groups = [
+                    flag_group(
+                        flags = ["--sysroot=%{sysroot}"],
+                        expand_if_available = "sysroot",
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    unfiltered_compile_flags_feature = feature(
+        name = "unfiltered_compile_flags",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = _all_compile_actions,
+                flag_groups = [flag_group(flags = unfiltered_compile_flags)],
+            ),
+        ],
+    )
+
+    features = [
+        default_compile_flags_feature,
+        default_link_flags_feature,
+        supports_dynamic_linker_feature,
+        supports_pic_feature,
+        static_link_cpp_runtimes_feature,
+        fastbuild_feature,
+        dbg_feature,
+        opt_feature,
+        user_compile_flags_feature,
+        sysroot_feature,
+        unfiltered_compile_flags_feature,
+    ]
+
+    cxx_builtin_include_directories = [
+        NDK_PATH + "/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/9.0.9/include",
+        "%sysroot%/usr/include",
+        NDK_PATH + "/sysroot/usr/include",
+    ]
+
+    # https://bazel.build/rules/lib/cc_common#create_cc_toolchain_config_info
+    return cc_common.create_cc_toolchain_config_info(
+        ctx = ctx,
+        toolchain_identifier = _get_toolchain_identifier(ctx.attr.cpu),
+        host_system_name = "local",
+        target_system_name = _get_target_system_name(ctx.attr.cpu),
+        target_cpu = ctx.attr.cpu,
+        target_libc = "local",
+        compiler = "clang9.0.9",
+        abi_version = ctx.attr.cpu,
+        abi_libc_version = "local",
+        features = features,
+        tool_paths = _get_tool_paths(ctx.attr.cpu),
+        cxx_builtin_include_directories = cxx_builtin_include_directories,
+        builtin_sysroot = _get_builtin_sysroot(ctx.attr.cpu),
+    )
+
+ndk_cc_toolchain_config = rule(
+    implementation = _ndk_cc_toolchain_config_impl,
+    attrs = {
+        "cpu": attr.string(
+            mandatory = True,
+            values = [_ARMEABI_V7A, _ARM64_V8A],
+            doc = "Target CPU.",
+        ),
+    },
+    provides = [CcToolchainConfigInfo],
+)
diff --git a/third-party/libjxl/libjxl/third_party/skcms/toolchain/utils.bzl b/third-party/libjxl/libjxl/third_party/skcms/toolchain/utils.bzl
new file mode 100644
index 0000000000..fd7a4b8600
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/toolchain/utils.bzl
@@ -0,0 +1,40 @@
+"""This module provides the gcs_mirror_url macro."""
+
+# Set to True to force the macro to only return the mirror URL.
+_TEST_GCS_MIRROR = False
+
+# Must be kept in sync with the suffixes supported by gcs_mirror (e.g.
+# https://skia.googlesource.com/skia/+/8ad66c2340713234df6b249e793415233337a103/bazel/gcs_mirror/gcs_mirror.go#140).
+_SUPPORTED_SUFFIXES = [".tar.gz", ".tgz", ".tar.xz", ".deb", ".zip"]
+
+_GCS_MIRROR_PREFIX = "https://storage.googleapis.com/skia-world-readable/bazel"
+
+def gcs_mirror_url(url, sha256):
+    """Takes the URL of an external resource and computes its GCS mirror URL.
+
+    We store backup copies of external resources in the skia-world-readable GCS bucket. This macro
+    returns a list with two elements: the original URL, and the mirrored URL.
+
+    Files are expected to be in the mirror location named after their sha256 hash. The files should
+    still have their file extension, as some of the Starlark functions sniff the file extension
+    (e.g. download_and_extract). See //bazel/gcs_mirror for an automated way to update this mirror.
+
+    To mirror a new URL, please use the `gcs_mirror` utility found at
+    https://skia.googlesource.com/skia/+/8ad66c2340713234df6b249e793415233337a103/bazel/gcs_mirror/gcs_mirror.go.
+
+    Args:
+        url: URL of the mirrored resource.
+        sha256: SHA256 hash of the mirrored resource.
+    Returns:
+        A list of the form [original URL, mirror URL].
+    """
+    extension = ""
+    for suffix in _SUPPORTED_SUFFIXES:
+        if url.endswith(suffix):
+            extension = suffix
+            break
+    if extension == "":
+        fail("URL %s has an unsupported suffix." % url)
+
+    mirror_url = "%s/%s%s" % (_GCS_MIRROR_PREFIX, sha256, extension)
+    return [mirror_url] if _TEST_GCS_MIRROR else [url, mirror_url]
diff --git a/third-party/libjxl/libjxl/third_party/skcms/whitespace.txt b/third-party/libjxl/libjxl/third_party/skcms/whitespace.txt
new file mode 100644
index 0000000000..9daeafb986
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/skcms/whitespace.txt
@@ -0,0 +1 @@
+test
diff --git a/third-party/libjxl/libjxl/third_party/testing.cmake b/third-party/libjxl/libjxl/third_party/testing.cmake
new file mode 100644
index 0000000000..68368675da
--- /dev/null
+++ b/third-party/libjxl/libjxl/third_party/testing.cmake
@@ -0,0 +1,85 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Enable tests in third_party/ as well.
+enable_testing()
+include(CTest)
+
+set(SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party")
+
+if(BUILD_TESTING)
+# Add GTest from source and alias it to what the find_package(GTest) workflow
+# defines. Omitting googletest/ directory would require it to be available in
+# the base system instead, but it would work just fine. This makes packages
+# using GTest and calling find_package(GTest) actually work.
+if (EXISTS "${SOURCE_DIR}/googletest/CMakeLists.txt" AND
+    NOT JPEGXL_FORCE_SYSTEM_GTEST)
+  add_subdirectory(third_party/googletest EXCLUDE_FROM_ALL)
+
+  set(GTEST_ROOT "${SOURCE_DIR}/googletest/googletest")
+  set(GTEST_INCLUDE_DIR "$<TARGET_PROPERTY:INCLUDE_DIRECTORIES,gtest>"
+      CACHE STRING "")
+  set(GMOCK_INCLUDE_DIR "$<TARGET_PROPERTY:INCLUDE_DIRECTORIES,gmock>")
+  set(GTEST_LIBRARY "$<TARGET_FILE:gtest>")
+  set(GTEST_MAIN_LIBRARY "$<TARGET_FILE:gtest_main>")
+  add_library(GTest::gtest ALIAS gtest)
+  add_library(GTest::GTest ALIAS gtest)
+  add_library(GTest::gtest_main ALIAS gtest_main)
+  add_library(GTest::Main ALIAS gtest_main)
+
+  set_target_properties(gtest PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+  set_target_properties(gmock PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+  set_target_properties(gtest_main PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+  set_target_properties(gmock_main PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+
+  # googletest doesn't compile clean with clang-cl (-Wundef)
+  if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set_target_properties(gtest PROPERTIES COMPILE_FLAGS "-Wno-error")
+    set_target_properties(gmock PROPERTIES COMPILE_FLAGS "-Wno-error")
+    set_target_properties(gtest_main PROPERTIES COMPILE_FLAGS "-Wno-error")
+    set_target_properties(gmock_main PROPERTIES COMPILE_FLAGS "-Wno-error")
+  endif ()
+  configure_file("${SOURCE_DIR}/googletest/LICENSE"
+                 ${PROJECT_BINARY_DIR}/LICENSE.googletest COPYONLY)
+else()
+  if(JPEGXL_DEP_LICENSE_DIR)
+    configure_file("${JPEGXL_DEP_LICENSE_DIR}/googletest/copyright"
+                   ${PROJECT_BINARY_DIR}/LICENSE.googletest COPYONLY)
+  endif()  # JPEGXL_DEP_LICENSE_DIR
+endif()
+find_package(GTest)
+if (NOT GTEST_FOUND)
+  set(BUILD_TESTING OFF CACHE BOOL "Build tests" FORCE)
+  message(SEND_ERROR "GTest not found. Install googletest package "
+          "(libgtest-dev) in the system or download googletest to "
+          "third_party/googletest from https://github.com/google/googletest ."
+          "To disable tests instead re-run cmake with -DBUILD_TESTING=OFF.")
+endif()  # NOT GTEST_FOUND
+
+# Look for gmock in the system too.
+if (NOT DEFINED GMOCK_INCLUDE_DIR)
+  find_path(
+      GMOCK_INCLUDE_DIR "gmock/gmock.h"
+      HINTS ${GTEST_INCLUDE_DIRS})
+  if (NOT GMOCK_INCLUDE_DIR)
+    set(BUILD_TESTING OFF CACHE BOOL "Build tests" FORCE)
+    message(SEND_ERROR "GMock not found. Install googletest package "
+            "(libgmock-dev) in the system or download googletest to "
+            "third_party/googletest from https://github.com/google/googletest ."
+            "To disable tests instead re-run cmake with -DBUILD_TESTING=OFF.")
+  else()
+    message(STATUS "Found GMock: ${GMOCK_INCLUDE_DIR}")
+  endif()  # NOT GMOCK_INCLUDE_DIR
+endif()  # NOT DEFINED GMOCK_INCLUDE_DIR
+endif()  # BUILD_TESTING
diff --git a/third-party/libjxl/libjxl/tools/BUILD b/third-party/libjxl/libjxl/tools/BUILD
new file mode 100644
index 0000000000..664e4419da
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/BUILD
@@ -0,0 +1,8 @@
+package(default_visibility = ["//:__subpackages__"])
+
+cc_library(
+    name = "box",
+    srcs = ["box/box.cc"],
+    hdrs = ["box/box.h"],
+    deps = ["//lib:base"],
+)
diff --git a/third-party/libjxl/libjxl/tools/CMakeLists.txt b/third-party/libjxl/libjxl/tools/CMakeLists.txt
new file mode 100644
index 0000000000..1041b5ba49
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/CMakeLists.txt
@@ -0,0 +1,488 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# ICC detection library used by the comparison and viewer tools.
+if(JPEGXL_ENABLE_VIEWERS)
+if(WIN32)
+  find_package(Qt6 QUIET COMPONENTS Widgets)
+  if (NOT Qt6_FOUND)
+    message(WARNING "Qt6 was not found.")
+  else()
+    add_library(icc_detect STATIC EXCLUDE_FROM_ALL
+      icc_detect/icc_detect_win32.cc
+      icc_detect/icc_detect.h
+    )
+    target_include_directories(icc_detect PRIVATE "${PROJECT_SOURCE_DIR}")
+    target_link_libraries(icc_detect PUBLIC Qt6::Widgets)
+    if(JPEGXL_DEP_LICENSE_DIR)
+      configure_file("${JPEGXL_DEP_LICENSE_DIR}/libqt6widgets6/copyright"
+                     ${PROJECT_BINARY_DIR}/LICENSE.libqt6widgets6 COPYONLY)
+    endif()  # JPEGXL_DEP_LICENSE_DIR
+  endif()
+elseif(APPLE)
+  find_package(Qt6 QUIET COMPONENTS Widgets)
+  if (Qt6_FOUND)
+    add_library(icc_detect STATIC EXCLUDE_FROM_ALL
+      icc_detect/icc_detect_empty.cc
+      icc_detect/icc_detect.h
+    )
+    target_include_directories(icc_detect PRIVATE "${PROJECT_SOURCE_DIR}")
+    target_link_libraries(icc_detect PUBLIC Qt6::Widgets)
+  else()
+    message(WARNING "APPLE: Qt6 was not found.")
+  endif()
+else()
+  find_package(Qt6 QUIET COMPONENTS Widgets)
+  find_package(ECM QUIET NO_MODULE)
+  if (NOT Qt6_FOUND OR NOT ECM_FOUND)
+    if (NOT Qt6_FOUND)
+      message(WARNING "Qt6 was not found.")
+    else()
+      message(WARNING "extra-cmake-modules were not found.")
+    endif()
+  else()
+    set(CMAKE_MODULE_PATH ${ECM_FIND_MODULE_DIR})
+    find_package(XCB COMPONENTS XCB)
+    if (XCB_FOUND)
+      add_library(icc_detect STATIC EXCLUDE_FROM_ALL
+        icc_detect/icc_detect_x11.cc
+        icc_detect/icc_detect.h
+      )
+      target_link_libraries(icc_detect PUBLIC jxl-static Qt6::Widgets XCB::XCB)
+    endif ()
+  endif()
+endif()
+endif()  # JPEGXL_ENABLE_VIEWERS
+
+# Tools are added conditionally below.
+set(TOOL_BINARIES)
+# Tools that depend on jxl internal functions.
+set(INTERNAL_TOOL_BINARIES)
+set(FUZZER_CORPUS_BINARIES)
+
+add_library(jxl_tool STATIC EXCLUDE_FROM_ALL
+  cmdline.cc
+  codec_config.cc
+  speed_stats.cc
+  tool_version.cc
+)
+target_compile_options(jxl_tool PUBLIC "${JPEGXL_INTERNAL_FLAGS}")
+target_include_directories(jxl_tool PUBLIC "${PROJECT_SOURCE_DIR}")
+target_link_libraries(jxl_tool PUBLIC hwy)
+jxl_link_libraries(jxl_tool jxl_base-obj)
+
+# The JPEGXL_VERSION is set from the builders.
+if(NOT DEFINED JPEGXL_VERSION OR JPEGXL_VERSION STREQUAL "")
+  find_package(Git QUIET)
+  execute_process(
+      COMMAND "${GIT_EXECUTABLE}" rev-parse --short HEAD
+      OUTPUT_VARIABLE GIT_REV
+      WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+      ERROR_QUIET)
+  string(STRIP "${GIT_REV}" GIT_REV)
+  if(GIT_REV STREQUAL "")
+    set(JPEGXL_VERSION "(unknown)")
+  endif()
+endif()
+
+if(NOT DEFINED JPEGXL_VERSION OR JPEGXL_VERSION STREQUAL "")
+  # We are building from a git environment and the user didn't set
+  # JPEGXL_VERSION. Make a target that computes the GIT_REV at build-time always
+  # but only updates the file if it changed. This allows rebuilds without
+  # modifying cmake files to update the JPEGXL_VERSION.
+  message(STATUS "Building with JPEGXL_VERSION=${GIT_REV} (auto-updated)")
+  add_custom_target(
+    tool_version_git
+    ${CMAKE_COMMAND}
+      -D JPEGXL_ROOT_DIR=${CMAKE_SOURCE_DIR}
+      -D DST=${CMAKE_CURRENT_BINARY_DIR}/tool_version_git.h
+      -P ${CMAKE_CURRENT_SOURCE_DIR}/git_version.cmake
+    BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/tool_version_git.h"
+  )
+  add_dependencies(jxl_tool tool_version_git)
+
+  set_source_files_properties(tool_version.cc PROPERTIES
+    COMPILE_DEFINITIONS JPEGXL_VERSION_FROM_GIT=1)
+  target_include_directories(jxl_tool PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
+  # Note: Ninja looks for dependencies on the jxl_tool target before running
+  # the tool_version_git targets, so when updating the tool_version_git.h the
+  # jxl_tool target is not rebuilt. This forces to generate it at configure time
+  # if needed.
+  execute_process(
+    COMMAND ${CMAKE_COMMAND}
+      -D JPEGXL_ROOT_DIR=${CMAKE_SOURCE_DIR}
+      -D DST=${CMAKE_CURRENT_BINARY_DIR}/tool_version_git.h
+      -P ${CMAKE_CURRENT_SOURCE_DIR}/git_version.cmake)
+else()
+  message(STATUS "Building with JPEGXL_VERSION=${JPEGXL_VERSION}")
+  set_source_files_properties(tool_version.cc PROPERTIES
+    COMPILE_DEFINITIONS JPEGXL_VERSION=\"${JPEGXL_VERSION}\")
+endif()
+
+if(JPEGXL_ENABLE_TOOLS)
+  # Main compressor.
+  add_executable(cjxl cjxl_main.cc)
+  target_link_libraries(cjxl
+    jxl
+    jxl_extras_codec
+    jxl_threads
+    jxl_tool
+  )
+  list(APPEND TOOL_BINARIES cjxl)
+
+  # Main decompressor.
+  add_executable(djxl djxl_main.cc)
+  target_link_libraries(djxl
+    jxl
+    jxl_extras_codec
+    jxl_threads
+    jxl_tool
+  )
+  list(APPEND TOOL_BINARIES djxl)
+
+  if(JPEGXL_ENABLE_JPEGLI)
+    # Depends on parts of jxl_extras that are only built if libjpeg is found and
+    # jpegli is enabled.
+    add_executable(cjpegli cjpegli.cc)
+    add_executable(djpegli djpegli.cc)
+    list(APPEND INTERNAL_TOOL_BINARIES cjpegli djpegli)
+  endif()
+
+  add_executable(jxlinfo jxlinfo.c)
+  target_link_libraries(jxlinfo jxl)
+  list(APPEND TOOL_BINARIES jxlinfo)
+
+  if(NOT SANITIZER STREQUAL "none")
+    # Linking a C test binary with the C++ JPEG XL implementation when using
+    # address sanitizer is not well supported by clang 9, so force using clang++
+    # for linking this test if a sanitizer is used.
+    set_target_properties(jxlinfo PROPERTIES LINKER_LANGUAGE CXX)
+  endif()  # SANITIZER != "none"
+
+endif()  # JPEGXL_ENABLE_TOOLS
+
+# Other developer tools.
+if(JPEGXL_ENABLE_DEVTOOLS)
+  list(APPEND INTERNAL_TOOL_BINARIES
+    butteraugli_main
+    decode_and_encode
+    display_to_hlg
+    exr_to_pq
+    pq_to_hlg
+    render_hlg
+    local_tone_map
+    tone_map
+    texture_to_cube
+    generate_lut_template
+    ssimulacra_main
+    ssimulacra2
+    xyb_range
+    jxl_from_tree
+  )
+
+  add_executable(ssimulacra_main ssimulacra_main.cc ssimulacra.cc)
+  add_executable(ssimulacra2 ssimulacra2_main.cc ssimulacra2.cc)
+  add_executable(butteraugli_main butteraugli_main.cc)
+  add_executable(decode_and_encode decode_and_encode.cc)
+  add_executable(display_to_hlg hdr/display_to_hlg.cc)
+  add_executable(exr_to_pq hdr/exr_to_pq.cc)
+  add_executable(pq_to_hlg hdr/pq_to_hlg.cc)
+  add_executable(render_hlg hdr/render_hlg.cc)
+  add_executable(local_tone_map hdr/local_tone_map.cc)
+  add_executable(tone_map hdr/tone_map.cc)
+  add_executable(texture_to_cube hdr/texture_to_cube.cc)
+  add_executable(generate_lut_template hdr/generate_lut_template.cc)
+  add_executable(xyb_range xyb_range.cc)
+  add_executable(jxl_from_tree jxl_from_tree.cc)
+
+  list(APPEND FUZZER_CORPUS_BINARIES djxl_fuzzer_corpus)
+  add_executable(djxl_fuzzer_corpus djxl_fuzzer_corpus.cc)
+  target_link_libraries(djxl_fuzzer_corpus
+    jxl_extras-static
+    jxl_testlib-static
+    jxl_tool
+  )
+  if(JPEGXL_ENABLE_JPEGLI)
+    list(APPEND FUZZER_CORPUS_BINARIES jpegli_dec_fuzzer_corpus)
+    add_executable(jpegli_dec_fuzzer_corpus jpegli_dec_fuzzer_corpus.cc)
+    target_link_libraries(jpegli_dec_fuzzer_corpus
+      jpegli-static
+      jxl_tool
+      jxl_threads-static
+    )
+  endif()
+endif()  # JPEGXL_ENABLE_DEVTOOLS
+
+# Benchmark tools.
+if(JPEGXL_ENABLE_BENCHMARK AND JPEGXL_ENABLE_TOOLS)
+  list(APPEND INTERNAL_TOOL_BINARIES
+    benchmark_xl
+  )
+
+  add_executable(benchmark_xl
+    benchmark/benchmark_xl.cc
+    benchmark/benchmark_args.cc
+    benchmark/benchmark_codec.cc
+    benchmark/benchmark_file_io.cc
+    benchmark/benchmark_stats.cc
+    benchmark/benchmark_utils.cc
+    benchmark/benchmark_utils.h
+    benchmark/benchmark_codec_custom.cc
+    benchmark/benchmark_codec_custom.h
+    benchmark/benchmark_codec_jpeg.cc
+    benchmark/benchmark_codec_jpeg.h
+    benchmark/benchmark_codec_jxl.cc
+    benchmark/benchmark_codec_jxl.h
+    ssimulacra2.cc
+    ../third_party/dirent.cc
+  )
+  target_link_libraries(benchmark_xl Threads::Threads)
+  if(MINGW)
+  # MINGW doesn't support glob.h.
+  target_compile_definitions(benchmark_xl PRIVATE "-DHAS_GLOB=0")
+  endif() # MINGW
+
+  if(NOT JPEGXL_BUNDLE_LIBPNG)
+    find_package(PNG)
+  endif()
+  if(PNG_FOUND)
+    target_sources(benchmark_xl PRIVATE
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_png.cc"
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_png.h"
+    )
+  endif()
+
+  find_package(PkgConfig)
+  pkg_check_modules(WebP IMPORTED_TARGET libwebp)
+  if(WebP_FOUND)
+    target_sources(benchmark_xl PRIVATE
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_webp.cc"
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_webp.h"
+    )
+    target_compile_definitions(benchmark_xl PRIVATE -DBENCHMARK_WEBP)
+
+    # Use the static version of webp if available.
+    find_library(WebP_STATIC_LINK_LIBRARY NAMES libwebp.a
+        PATHS "${WebP_LIBDIR}")
+    find_library(SharpYuv_STATIC_LINK_LIBRARY NAMES libsharpyuv.a
+        PATHS "${WebP_LIBDIR}")
+    if(NOT WebP_STATIC_LINK_LIBRARY)
+      message(WARNING "Using dynamic libwebp")
+      target_link_libraries(benchmark_xl PkgConfig::WebP)
+    else()
+      target_link_libraries(benchmark_xl "${WebP_STATIC_LINK_LIBRARY}")
+      if(SharpYuv_STATIC_LINK_LIBRARY)
+        target_link_libraries(benchmark_xl "${SharpYuv_STATIC_LINK_LIBRARY}")
+      endif()
+      target_include_directories(benchmark_xl
+          PRIVATE ${WebP_STATIC_INCLUDE_DIRS})
+      target_compile_options(benchmark_xl PRIVATE ${WebP_STATIC_CFLAGS_OTHER})
+    endif()  # NOT WebP_STATIC_LINK_LIBRARY
+  endif()
+
+  pkg_check_modules(AVIF IMPORTED_TARGET libavif)
+  if(AVIF_FOUND)
+    target_sources(benchmark_xl PRIVATE
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_avif.cc"
+      "${CMAKE_CURRENT_LIST_DIR}/benchmark/benchmark_codec_avif.h"
+    )
+    target_compile_definitions(benchmark_xl PRIVATE -DBENCHMARK_AVIF)
+    target_link_libraries(benchmark_xl PkgConfig::AVIF)
+  endif()
+endif()  # JPEGXL_ENABLE_BENCHMARK
+
+# All tool binaries depend on "jxl" library and the tool helpers.
+foreach(BINARY IN LISTS INTERNAL_TOOL_BINARIES)
+  target_link_libraries("${BINARY}"
+    jxl_extras-static
+    jxl_tool
+  )
+endforeach()
+
+list(APPEND TOOL_BINARIES ${INTERNAL_TOOL_BINARIES} ${FUZZER_CORPUS_BINARIES})
+
+foreach(BINARY IN LISTS TOOL_BINARIES)
+  if(JPEGXL_EMSCRIPTEN)
+    set(JXL_WASM_TOOLS_LINK_FLAGS "\
+      -s USE_LIBPNG=1 \
+      -s ALLOW_MEMORY_GROWTH=1 \
+      -s USE_PTHREADS=1 \
+      -s PTHREAD_POOL_SIZE=16 \
+    ")
+    set_target_properties(${BINARY} PROPERTIES LINK_FLAGS "${JXL_WASM_TOOLS_LINK_FLAGS}")
+  endif()
+endforeach()
+
+install(TARGETS ${TOOL_BINARIES} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
+message(STATUS "Building tools: ${TOOL_BINARIES}")
+
+# djxl_fuzzer builds even when not JPEGXL_ENABLE_TOOLS
+set(FUZZER_BINARIES djxl_fuzzer)
+if(JPEGXL_ENABLE_TOOLS)
+  list(APPEND FUZZER_BINARIES
+    color_encoding_fuzzer
+    decode_basic_info_fuzzer
+    cjxl_fuzzer
+    icc_codec_fuzzer
+    fields_fuzzer
+    rans_fuzzer
+    set_from_bytes_fuzzer
+    transforms_fuzzer
+  )
+if(JPEGXL_ENABLE_JPEGLI)
+  list(APPEND FUZZER_BINARIES jpegli_dec_fuzzer)
+endif()
+endif()
+
+# Fuzzers.
+foreach(FUZZER IN LISTS FUZZER_BINARIES)
+  if(JPEGXL_ENABLE_FUZZERS)
+    set(BINARY "${FUZZER}")
+    add_executable("${BINARY}" "${BINARY}.cc")
+    target_link_libraries("${BINARY}" ${JPEGXL_FUZZER_LINK_FLAGS})
+  else()
+    # When not enabled we want a lightweight alternative for regular fuzzers
+    # that just run the target.
+    set(BINARY "${FUZZER}_runner")
+    add_executable("${BINARY}" EXCLUDE_FROM_ALL
+        "fuzzer_stub.cc" "${FUZZER}.cc")
+  endif()  # JPEGXL_ENABLE_FUZZERS
+  target_include_directories("${BINARY}" PRIVATE "${CMAKE_SOURCE_DIR}")
+  if(FUZZER STREQUAL djxl_fuzzer)
+    target_link_libraries("${BINARY}"
+      jxl_dec-static
+      jxl_threads-static
+    )
+  elseif(FUZZER STREQUAL jpegli_dec_fuzzer)
+    target_link_libraries("${BINARY}" jpegli-static)
+  else()
+    target_link_libraries("${BINARY}"
+      jxl_extras_nocodec-static
+      jxl_testlib-static
+      jxl_tool
+    )
+  endif()
+endforeach()
+
+# EMSCRIPTEN doesn't support dynamic libraries so testing for linkage there
+# doesn't make much sense.
+if(BUILD_TESTING AND TARGET jxl AND NOT JPEGXL_EMSCRIPTEN)
+# Library API test. This test is only to check that we can link against the
+# shared library from C99 file and don't need to use internal symbols.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+add_executable(libjxl_test libjxl_test.c)
+set_property(TARGET libjxl_test PROPERTY C_STANDARD 99)
+if(NOT SANITIZER STREQUAL "none")
+  # Linking a C test binary with the C++ JPEG XL implementation when using
+  # address sanitizer is not well supported by clang 9, so force using clang++
+  # for linking this test if a sanitizer is used.
+  set_target_properties(libjxl_test PROPERTIES LINKER_LANGUAGE CXX)
+endif()  # SANITIZER != "none"
+set_target_properties(libjxl_test PROPERTIES PREFIX "tests/")
+target_link_libraries(libjxl_test jxl)
+if (NOT MSVC)
+target_compile_options(libjxl_test PRIVATE -Wall -Wextra -Werror)
+if(NOT WIN32)
+  target_compile_options(libjxl_test PRIVATE -pedantic)
+endif()  # NOT WIN32
+endif()  # NOT MSVC
+
+add_test(
+  NAME LibraryCLinkageTest
+  COMMAND libjxl_test
+  WORKING_DIRECTORY $<TARGET_FILE_DIR:jxl>
+)
+# if user decide to set CMAKE_SKIP_RPATH:BOOL=ON make sure libjxl.so.0.7 can
+# still be found:
+if(UNIX AND CMAKE_SKIP_RPATH)
+  set_property(TEST LibraryCLinkageTest PROPERTY ENVIRONMENT
+     LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}/..
+     )
+endif()
+
+endif()  # BUILD_TESTING AND TARGET jxl AND NOT JPEGXL_EMSCRIPTEN
+
+# Tools defined in subdirectories.
+if(JPEGXL_ENABLE_VIEWERS)
+add_subdirectory(viewer)
+add_subdirectory(comparison_viewer)
+add_subdirectory(flicker_test)
+endif()
+
+add_subdirectory(box)
+add_subdirectory(conformance)
+add_subdirectory(wasm_demo)
+
+if(JPEGXL_ENABLE_JNI)
+find_package(JNI QUIET)
+find_package(Java QUIET)
+
+if (JNI_FOUND AND Java_FOUND)
+  include(UseJava)
+
+  # decoder_jni_onload.cc might be necessary for Android; not used yet.
+  add_library(jxl_jni SHARED jni/org/jpeg/jpegxl/wrapper/decoder_jni.cc)
+  target_include_directories(jxl_jni PRIVATE "${JNI_INCLUDE_DIRS}" "${PROJECT_SOURCE_DIR}")
+  target_link_libraries(jxl_jni PUBLIC jxl_dec-static jxl_threads-static)
+  if(NOT DEFINED JPEGXL_INSTALL_JNIDIR)
+    set(JPEGXL_INSTALL_JNIDIR ${CMAKE_INSTALL_LIBDIR})
+  endif()
+  install(TARGETS jxl_jni DESTINATION ${JPEGXL_INSTALL_JNIDIR})
+
+  add_jar(jxl_jni_wrapper SOURCES
+    jni/org/jpeg/jpegxl/wrapper/Decoder.java
+    jni/org/jpeg/jpegxl/wrapper/DecoderJni.java
+    jni/org/jpeg/jpegxl/wrapper/ImageData.java
+    jni/org/jpeg/jpegxl/wrapper/PixelFormat.java
+    jni/org/jpeg/jpegxl/wrapper/Status.java
+    jni/org/jpeg/jpegxl/wrapper/StreamInfo.java
+    OUTPUT_NAME org.jpeg.jpegxl
+  )
+  get_target_property(JXL_JNI_WRAPPER_JAR jxl_jni_wrapper JAR_FILE)
+  if(NOT DEFINED JPEGXL_INSTALL_JARDIR)
+    set(JPEGXL_INSTALL_JARDIR ${CMAKE_INSTALL_LIBDIR})
+  endif()
+  install_jar(jxl_jni_wrapper DESTINATION ${JPEGXL_INSTALL_JARDIR})
+
+  add_jar(jxl_jni_wrapper_test
+    SOURCES jni/org/jpeg/jpegxl/wrapper/DecoderTest.java
+    INCLUDE_JARS jxl_jni_wrapper
+  )
+  get_target_property(JXL_JNI_WRAPPER_TEST_JAR jxl_jni_wrapper_test JAR_FILE)
+
+  if(NOT SANITIZER MATCHES ".san")
+    # NB: Vanilla OpenJDK 8 / 11 are known to work well (i.e. either
+    #     "which java" or JAVA_HOME environment variable point to the path like
+    #     "/usr/lib/jvm/java-xx-openjdk-yyy" on Debian Linux).
+    add_test(
+      NAME test_jxl_jni_wrapper
+      COMMAND ${Java_JAVA_EXECUTABLE}
+              -cp "${JXL_JNI_WRAPPER_JAR}:${JXL_JNI_WRAPPER_TEST_JAR}"
+              -Dorg.jpeg.jpegxl.wrapper.lib=$<TARGET_FILE:jxl_jni>
+              org.jpeg.jpegxl.wrapper.DecoderTest
+    )
+  endif()  # JPEGXL_ENABLE_FUZZERS
+endif()  # JNI_FOUND & Java_FOUND
+endif()  # JPEGXL_ENABLE_JNI
+
+# End-to-end tests for the tools
+if(JPEGXL_TEST_TOOLS)
+find_program (BASH_PROGRAM bash)
+if (BASH_PROGRAM)
+  set(TEST_SCRIPTS)
+  find_package(JPEG)
+  if (JPEG_FOUND AND JPEGXL_ENABLE_TRANSCODE_JPEG)
+    list(APPEND TEST_SCRIPTS roundtrip_test)
+  endif()
+  if (JPEG_FOUND AND JPEGXL_ENABLE_JPEGLI)
+    list(APPEND TEST_SCRIPTS jpegli_tools_test)
+  endif()
+  foreach(SCRIPT IN LISTS TEST_SCRIPTS)
+    add_test(NAME ${SCRIPT}
+      COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/scripts/${SCRIPT}.sh
+      ${CMAKE_BINARY_DIR})
+  endforeach()
+endif()  # BASH_PROGRAM
+endif()  # JPEGXL_TEST_TOOLS
diff --git a/third-party/libjxl/libjxl/tools/args.h b/third-party/libjxl/libjxl/tools/args.h
new file mode 100644
index 0000000000..e34b75e4b6
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/args.h
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_ARGS_H_
+#define TOOLS_ARGS_H_
+
+// Helpers for parsing command line arguments. No include guard needed.
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "tools/file_io.h"
+
+namespace jpegxl {
+namespace tools {
+
+static inline bool ParseOverride(const char* arg, jxl::Override* out) {
+  const std::string s_arg(arg);
+  if (s_arg == "1") {
+    *out = jxl::Override::kOn;
+    return true;
+  }
+  if (s_arg == "0") {
+    *out = jxl::Override::kOff;
+    return true;
+  }
+  fprintf(stderr, "Invalid flag, %s must be 0 or 1\n", arg);
+  return JXL_FAILURE("Args");
+}
+
+static inline bool ParseFloatPair(const char* arg,
+                                  std::pair<float, float>* out) {
+  int parsed = sscanf(arg, "%f,%f", &out->first, &out->second);
+  if (parsed == 1) {
+    out->second = out->first;
+  } else if (parsed != 2) {
+    fprintf(stderr,
+            "Unable to interpret as float pair separated by a comma: %s.\n",
+            arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+template <typename Callback>
+static inline bool ParseAndAppendKeyValue(const char* arg, Callback* cb) {
+  const char* eq = strchr(arg, '=');
+  if (!eq) {
+    fprintf(stderr, "Expected argument as 'key=value' but received '%s'\n",
+            arg);
+    return false;
+  }
+  std::string key(arg, eq);
+  return (*cb)(key, std::string(eq + 1));
+}
+
+static inline bool ParseCString(const char* arg, const char** out) {
+  *out = arg;
+  return true;
+}
+
+static inline bool IncrementUnsigned(size_t* out) {
+  (*out)++;
+  return true;
+}
+
+struct ColorHintsProxy {
+  jxl::extras::ColorHints target;
+  bool operator()(const std::string& key, const std::string& value) {
+    if (key == "icc_pathname") {
+      std::vector<uint8_t> icc;
+      JXL_RETURN_IF_ERROR(ReadFile(value, &icc));
+      const char* data = reinterpret_cast<const char*>(icc.data());
+      target.Add("icc", std::string(data, data + icc.size()));
+    } else if (key == "exif" || key == "xmp" || key == "jumbf") {
+      std::vector<uint8_t> metadata;
+      JXL_RETURN_IF_ERROR(ReadFile(value, &metadata));
+      const char* data = reinterpret_cast<const char*>(metadata.data());
+      target.Add(key, std::string(data, data + metadata.size()));
+    } else if (key == "strip") {
+      target.Add(value, "");
+    } else {
+      target.Add(key, value);
+    }
+    return true;
+  }
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_ARGS_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.cc
new file mode 100644
index 0000000000..5ee97ba292
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.cc
@@ -0,0 +1,278 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/benchmark/benchmark_args.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_description.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "tools/benchmark/benchmark_codec_custom.h"  // for AddCommand..
+#include "tools/benchmark/benchmark_codec_jpeg.h"  // for AddCommand..
+#include "tools/benchmark/benchmark_codec_jxl.h"
+#include "tools/benchmark/benchmark_codec_png.h"
+
+#ifdef BENCHMARK_WEBP
+#include "tools/benchmark/benchmark_codec_webp.h"
+#endif  // BENCHMARK_WEBP
+
+#ifdef BENCHMARK_AVIF
+#include "tools/benchmark/benchmark_codec_avif.h"
+#endif  // BENCHMARK_AVIF
+
+namespace jpegxl {
+namespace tools {
+
+std::vector<std::string> SplitString(const std::string& s, char c) {
+  std::vector<std::string> result;
+  size_t pos = 0;
+  for (size_t i = 0; i <= s.size(); i++) {
+    if (i == s.size() || s[i] == c) {
+      result.push_back(s.substr(pos, i - pos));
+      pos = i + 1;
+    }
+  }
+  return result;
+}
+
+int ParseIntParam(const std::string& param, int lower_bound, int upper_bound) {
+  int val = strtol(param.substr(1).c_str(), nullptr, 10);
+  JXL_CHECK(val >= lower_bound && val <= upper_bound);
+  return val;
+}
+
+BenchmarkArgs* Args() {
+  static BenchmarkArgs args;
+  return &args;
+}
+
+Status BenchmarkArgs::AddCommandLineOptions() {
+  AddString(&input, "input", "File or file pattern matching input files.");
+  AddString(&codec, "codec",
+            "Comma separated list of image codec descriptions to benchmark.",
+            "jxl");
+  AddFlag(&print_details, "print_details",
+          "Prints size and distortion for each image. Not safe for "
+          "concurrent benchmark runs.",
+          false);
+  AddFlag(&print_details_csv, "print_details_csv",
+          "When print_details is used, print as CSV.", false);
+  AddString(&extra_metrics, "extra_metrics",
+            "Extra metrics to be computed. Only displayed with --print_details "
+            "or --print_details_csv. Comma-separated list of NAME:COMMAND "
+            "pairs; COMMAND is invoked with the original image as the first "
+            "argument, the decompressed image as a second argument, and the "
+            "name of the file where to write the metric value (as a single "
+            "floating point number) as the third argument.",
+            "");
+  AddFlag(
+      &print_more_stats, "print_more_stats",
+      "Prints codec-specific stats. Not safe for concurrent benchmark runs.",
+      false);
+  AddFlag(&print_distance_percentiles, "print_distance_percentiles",
+          "Prints distance percentiles for the corpus. Not safe for "
+          "concurrent benchmark runs.",
+          false);
+  AddFlag(&silent_errors, "silent_errors",
+          "If true, doesn't print error messages on compression or"
+          " decompression errors. Errors counts are still visible in the"
+          " 'Errors' column of the result table. Please note that depending"
+          " depending on the JXL build settings, error messages and asserts"
+          " from within the codec may be printed irrespective of this flag"
+          " anyway, use release build to ensure no messages.",
+          false);
+  AddFlag(&save_compressed, "save_compressed",
+          "Saves the compressed files for each input image and each codec.",
+          false);
+  AddFlag(&save_decompressed, "save_decompressed",
+          "Saves the decompressed files as PNG for each input image "
+          "and each codec.",
+          false);
+  AddString(&output_extension, "output_extension",
+            "Extension (starting with dot) to use for saving output images.",
+            ".png");
+  AddString(&output_description, "output_description",
+            "Color encoding (see ParseDescription; e.g. RGB_D65_SRG_Rel_709) "
+            "for saving output images, "
+            " defaults to sRGB.");
+
+  AddFloat(&intensity_target, "intensity_target",
+           "Intended viewing intensity target in nits. Defaults to 255 for "
+           "SDR images, 4000 for HDR images (when the input image uses PQ or "
+           "HLG transfer function)",
+           0);
+
+  AddString(&color_hints_string, "dec-hints",
+            "Color encoding hints for the input images to encoder. Comma "
+            "separated key=value pairs. The key color_space indicates "
+            "ColorEncoding (see ParseDescription; e.g. RGB_D65_SRG_Rel_709) "
+            "for input images without color encoding (such as PNM)");
+
+  AddUnsigned(
+      &override_bitdepth, "override_bitdepth",
+      "If nonzero, store the given bit depth in the JPEG XL file metadata"
+      " (1-32), instead of using the bit depth from the original input"
+      " image.",
+      0);
+
+  AddDouble(&mul_output, "mul_output",
+            "If nonzero, multiplies linear sRGB by this and clamps to 255",
+            0.0);
+  AddFlag(&save_heatmap, "save_heatmap", "Saves the heatmap images.", true);
+  AddDouble(&heatmap_good, "heatmap_good",
+            "If greater than zero, use this as the good "
+            "threshold for creating heatmap images.",
+            0.0);
+  AddDouble(&heatmap_bad, "heatmap_bad",
+            "If greater than zero, use this as the bad "
+            "threshold for creating heatmap images.",
+            0.0);
+
+  AddFlag(&write_html_report, "write_html_report",
+          "Creates an html report with original and compressed images.", false);
+  AddFlag(&html_report_self_contained, "html_report_self_contained",
+          "Base64-encode the images in the HTML report rather than use "
+          "external file names. May cause very large HTML data size.",
+          false);
+  AddFlag(&html_report_use_decompressed, "html_report_use_decompressed",
+          "Show the compressed image as decompressed to --output_extension.",
+          true);
+  AddFlag(&html_report_add_heatmap, "html_report_add_heatmap",
+          "Add heatmaps to the image comparisons.", false);
+
+  AddFlag(
+      &markdown, "markdown",
+      "Adds formatting around ASCII table to render correctly in Markdown based"
+      " interfaces",
+      true);
+
+  AddFlag(&more_columns, "more_columns", "Print extra columns in the table",
+          false);
+
+  AddString(&originals_url, "originals_url",
+            "Url prefix to serve original images from in the html report.");
+  AddString(&output_dir, "output_dir",
+            "If not empty, save compressed and decompressed "
+            "images here.");
+
+  AddSigned(&num_threads, "num_threads",
+            "The number of threads for concurrent benchmarking. Defaults to "
+            "1 thread per CPU core (if negative).",
+            -1);
+  AddSigned(&inner_threads, "inner_threads",
+            "The number of extra threads per task. "
+            "Defaults to occupy cores (if negative).",
+            -1);
+  AddUnsigned(&encode_reps, "encode_reps",
+              "How many times to encode (>1 for more precise measurements). "
+              "Defaults to 1.",
+              1);
+  AddUnsigned(&decode_reps, "decode_reps",
+              "How many times to decode (>1 for more precise measurements). "
+              "Defaults to 1.",
+              1);
+
+  AddString(&sample_tmp_dir, "sample_tmp_dir",
+            "Directory to put samples from input images.");
+
+  AddSigned(&num_samples, "num_samples", "How many sample areas to take.", 0);
+  AddSigned(&sample_dimensions, "sample_dimensions",
+            "How big areas to sample from the input.", 64);
+
+  AddDouble(&error_pnorm, "error_pnorm",
+            "smallest p norm for pooling butteraugli values", 3.0);
+
+  AddFlag(&show_progress, "show_progress",
+          "Show activity dots per completed file during benchmark.", false);
+
+  AddFlag(&skip_butteraugli, "skip_butteraugli",
+          "If true, doesn't compute distance metrics, only compression and"
+          " decompression speed and size. Distance numbers shown in the"
+          " table are invalid.",
+          false);
+
+  AddFlag(
+      &decode_only, "decode_only",
+      "If true, only decodes, and the input files must be compressed with a "
+      "compatible format for the given codec(s). Only measures decompression "
+      "speed and sizes, and can only use a single set of compatible decoders. "
+      "Distance numbers and compression speeds shown in the table are invalid.",
+      false);
+
+  if (!AddCommandLineOptionsCustomCodec(this)) return false;
+  if (!AddCommandLineOptionsJxlCodec(this)) return false;
+  if (!AddCommandLineOptionsJPEGCodec(this)) return false;
+  if (!AddCommandLineOptionsPNGCodec(this)) return false;
+
+#ifdef BENCHMARK_WEBP
+  if (!AddCommandLineOptionsWebPCodec(this)) return false;
+#endif  // BENCHMARK_WEBP
+#ifdef BENCHMARK_AVIF
+  if (!AddCommandLineOptionsAvifCodec(this)) return false;
+#endif  // BENCHMARK_AVIF
+
+  return true;
+}
+
+Status BenchmarkArgs::ValidateArgs() {
+  if (input.empty()) {
+    fprintf(stderr, "Missing --input filename(s).\n");
+    return false;
+  }
+  if (jxl::extras::CodecFromPath(output_extension) ==
+      jxl::extras::Codec::kUnknown) {
+    JXL_WARNING("Unrecognized output_extension %s, try .png",
+                output_extension.c_str());
+    return false;  // already warned
+  }
+
+  // If empty, don't do anything; callers must only use output_encoding if
+  // output_description is not empty.
+  if (!output_description.empty()) {
+    // Validate, but also create the profile (only needs to happen once).
+    JxlColorEncoding output_encoding_external;
+    if (!jxl::ParseDescription(output_description, &output_encoding_external)) {
+      JXL_WARNING("Unrecognized output_description %s, try RGB_D65_SRG_Rel_Lin",
+                  output_description.c_str());
+      return false;  // already warned
+    }
+    JXL_RETURN_IF_ERROR(jxl::ConvertExternalToInternalColorEncoding(
+        output_encoding_external, &output_encoding));
+    JXL_RETURN_IF_ERROR(output_encoding.CreateICC());
+  }
+
+  JXL_RETURN_IF_ERROR(ValidateArgsJxlCodec(this));
+
+  if (print_details_csv) print_details = true;
+
+  if (override_bitdepth > 32) {
+    return JXL_FAILURE("override_bitdepth must be <= 32");
+  }
+
+  if (!color_hints_string.empty()) {
+    std::vector<std::string> hints = SplitString(color_hints_string, ',');
+    for (const auto& hint : hints) {
+      std::vector<std::string> kv = SplitString(hint, '=');
+      if (kv.size() != 2) {
+        return JXL_FAILURE(
+            "dec-hints key value pairs must have the form 'key=value'");
+      }
+      color_hints.Add(kv[0], kv[1]);
+    }
+  }
+
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.h
new file mode 100644
index 0000000000..bdc385c8ad
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_args.h
@@ -0,0 +1,181 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_ARGS_H_
+#define TOOLS_BENCHMARK_BENCHMARK_ARGS_H_
+
+// Command line parsing and arguments for benchmark_xl
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::ColorEncoding;
+using ::jxl::Override;
+using ::jxl::Status;
+
+std::vector<std::string> SplitString(const std::string& s, char c);
+
+int ParseIntParam(const std::string& param, int lower_bound, int upper_bound);
+
+struct BenchmarkArgs {
+  using OptionId = jpegxl::tools::CommandLineParser::OptionId;
+
+  void AddFlag(bool* field, const char* longName, const char* help,
+               bool defaultValue) {
+    const char* noName = RememberString_(std::string("no") + longName);
+    cmdline.AddOptionFlag('\0', longName, nullptr, field,
+                          &jpegxl::tools::SetBooleanTrue);
+    cmdline.AddOptionFlag('\0', noName, help, field,
+                          &jpegxl::tools::SetBooleanFalse);
+    *field = defaultValue;
+  }
+
+  OptionId AddOverride(Override* field, const char* longName,
+                       const char* help) {
+    OptionId result = cmdline.AddOptionValue('\0', longName, "0|1", help, field,
+                                             &jpegxl::tools::ParseOverride);
+    *field = Override::kDefault;
+    return result;
+  }
+
+  OptionId AddString(std::string* field, const char* longName, const char* help,
+                     const std::string& defaultValue = "") {
+    OptionId result = cmdline.AddOptionValue(
+        '\0', longName, "<string>", help, field, &jpegxl::tools::ParseString);
+    *field = defaultValue;
+    return result;
+  }
+
+  OptionId AddFloat(float* field, const char* longName, const char* help,
+                    float defaultValue) {
+    OptionId result = cmdline.AddOptionValue('\0', longName, "<scalar>", help,
+                                             field, &jpegxl::tools::ParseFloat);
+    *field = defaultValue;
+    return result;
+  }
+
+  OptionId AddDouble(double* field, const char* longName, const char* help,
+                     double defaultValue) {
+    OptionId result = cmdline.AddOptionValue(
+        '\0', longName, "<scalar>", help, field, &jpegxl::tools::ParseDouble);
+    *field = defaultValue;
+    return result;
+  }
+
+  OptionId AddSigned(int* field, const char* longName, const char* help,
+                     int defaultValue) {
+    OptionId result = cmdline.AddOptionValue(
+        '\0', longName, "<integer>", help, field, &jpegxl::tools::ParseSigned);
+    *field = defaultValue;
+    return result;
+  }
+
+  OptionId AddUnsigned(size_t* field, const char* longName, const char* help,
+                       size_t defaultValue) {
+    OptionId result =
+        cmdline.AddOptionValue('\0', longName, "<unsigned>", help, field,
+                               &jpegxl::tools::ParseUnsigned);
+    *field = defaultValue;
+    return result;
+  }
+
+  Status AddCommandLineOptions();
+
+  Status ValidateArgs();
+
+  bool Parse(int argc, const char** argv) { return cmdline.Parse(argc, argv); }
+
+  void PrintHelp() const { cmdline.PrintHelp(); }
+
+  std::string input;
+  std::string codec;
+  bool print_details;
+  bool print_details_csv;
+  bool print_more_stats;
+  bool print_distance_percentiles;
+  bool silent_errors;
+  bool save_compressed;
+  bool save_decompressed;
+  std::string output_extension;    // see CodecFromPath
+  std::string output_description;  // see ParseDescription
+  ColorEncoding output_encoding;   // determined by output_description
+
+  bool decode_only;
+  bool skip_butteraugli;
+
+  float intensity_target;
+
+  std::string color_hints_string;
+  jxl::extras::ColorHints color_hints;
+
+  size_t override_bitdepth;
+
+  double mul_output;
+  double heatmap_good;
+  double heatmap_bad;
+
+  bool save_heatmap;
+  bool write_html_report;
+  bool html_report_self_contained;
+  bool html_report_use_decompressed;
+  bool html_report_add_heatmap;
+  bool markdown;
+  bool more_columns;
+
+  std::string originals_url;
+  std::string output_dir;
+
+  int num_threads;
+  int inner_threads;
+  size_t decode_reps;
+  size_t encode_reps;
+
+  std::string sample_tmp_dir;
+
+  int num_samples;
+  int sample_dimensions;
+
+  double error_pnorm;
+  bool show_progress;
+
+  std::string extra_metrics;
+
+  jpegxl::tools::CommandLineParser cmdline;
+
+ private:
+  const char* RememberString_(const std::string& text) {
+    const char* data = text.c_str();
+    std::vector<char> copy(data, data + text.size() + 1);
+    string_pool_.push_back(copy);
+    return string_pool_.back().data();
+  }
+
+  // A memory pool with stable addresses for strings to provide stable
+  // const char pointers to cmdline.h for dynamic help/name strings.
+  std::deque<std::vector<char>> string_pool_;
+};
+
+// Returns singleton
+BenchmarkArgs* Args();
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_ARGS_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.cc
new file mode 100644
index 0000000000..e107730bc7
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.cc
@@ -0,0 +1,169 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/benchmark/benchmark_codec.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec_custom.h"
+#include "tools/benchmark/benchmark_codec_jpeg.h"
+#include "tools/benchmark/benchmark_codec_jxl.h"
+#include "tools/benchmark/benchmark_codec_png.h"
+#include "tools/benchmark/benchmark_stats.h"
+
+#ifdef BENCHMARK_WEBP
+#include "tools/benchmark/benchmark_codec_webp.h"
+#endif  // BENCHMARK_WEBP
+
+#ifdef BENCHMARK_AVIF
+#include "tools/benchmark/benchmark_codec_avif.h"
+#endif  // BENCHMARK_AVIF
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::Image3F;
+
+void ImageCodec::ParseParameters(const std::string& parameters) {
+  params_ = parameters;
+  std::vector<std::string> parts = SplitString(parameters, ':');
+  for (size_t i = 0; i < parts.size(); ++i) {
+    if (!ParseParam(parts[i])) {
+      JXL_ABORT("Invalid parameter %s", parts[i].c_str());
+    }
+  }
+}
+
+Status ImageCodec::ParseParam(const std::string& param) {
+  if (param[0] == 'q') {  // libjpeg-style quality, [0,100]
+    const std::string quality_param = param.substr(1);
+    char* end;
+    const float q_target = strtof(quality_param.c_str(), &end);
+    if (end == quality_param.c_str() ||
+        end != quality_param.c_str() + quality_param.size()) {
+      return false;
+    }
+    q_target_ = q_target;
+    return true;
+  }
+  if (param[0] == 'd') {  // butteraugli distance
+    const std::string distance_param = param.substr(1);
+    char* end;
+    const float butteraugli_target = strtof(distance_param.c_str(), &end);
+    if (end == distance_param.c_str() ||
+        end != distance_param.c_str() + distance_param.size()) {
+      return false;
+    }
+    butteraugli_target_ = butteraugli_target;
+    return true;
+  } else if (param[0] == 'r') {
+    bitrate_target_ = strtof(param.substr(1).c_str(), nullptr);
+    return true;
+  }
+  return false;
+}
+
+// Low-overhead "codec" for measuring benchmark overhead.
+class NoneCodec : public ImageCodec {
+ public:
+  explicit NoneCodec(const BenchmarkArgs& args) : ImageCodec(args) {}
+  Status ParseParam(const std::string& param) override { return true; }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    const double start = jxl::Now();
+    // Encode image size so we "decompress" something of the same size, as
+    // required by butteraugli.
+    const uint32_t xsize = io->xsize();
+    const uint32_t ysize = io->ysize();
+    compressed->resize(8);
+    memcpy(compressed->data(), &xsize, 4);
+    memcpy(compressed->data() + 4, &ysize, 4);
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    return true;
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    const double start = jxl::Now();
+    JXL_ASSERT(compressed.size() == 8);
+    uint32_t xsize, ysize;
+    memcpy(&xsize, compressed.data(), 4);
+    memcpy(&ysize, compressed.data() + 4, 4);
+    Image3F image(xsize, ysize);
+    ZeroFillImage(&image);
+    io->metadata.m.SetFloat32Samples();
+    io->metadata.m.color_encoding = ColorEncoding::SRGB();
+    io->SetFromImage(std::move(image), io->metadata.m.color_encoding);
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    return true;
+  }
+
+  void GetMoreStats(BenchmarkStats* stats) override {}
+};
+
+ImageCodecPtr CreateImageCodec(const std::string& description) {
+  std::string name = description;
+  std::string parameters = "";
+  size_t colon = description.find(':');
+  if (colon < description.size()) {
+    name = description.substr(0, colon);
+    parameters = description.substr(colon + 1);
+  }
+  ImageCodecPtr result;
+  if (name == "jxl") {
+    result.reset(CreateNewJxlCodec(*Args()));
+#if !defined(__wasm__)
+  } else if (name == "custom") {
+    result.reset(CreateNewCustomCodec(*Args()));
+#endif
+  } else if (name == "jpeg") {
+    result.reset(CreateNewJPEGCodec(*Args()));
+  } else if (name == "png") {
+    result.reset(CreateNewPNGCodec(*Args()));
+  } else if (name == "none") {
+    result.reset(new NoneCodec(*Args()));
+#ifdef BENCHMARK_WEBP
+  } else if (name == "webp") {
+    result.reset(CreateNewWebPCodec(*Args()));
+#endif  // BENCHMARK_WEBP
+#ifdef BENCHMARK_AVIF
+  } else if (name == "avif") {
+    result.reset(CreateNewAvifCodec(*Args()));
+#endif  // BENCHMARK_AVIF
+  }
+  if (!result.get()) {
+    JXL_ABORT("Unknown image codec: %s", name.c_str());
+  }
+  result->set_description(description);
+  if (!parameters.empty()) result->ParseParameters(parameters);
+  return result;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.h
new file mode 100644
index 0000000000..bd01dc7035
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec.h
@@ -0,0 +1,96 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_H_
+
+#include <stdint.h>
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/image.h"
+#include "tools/args.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_stats.h"
+#include "tools/cmdline.h"
+#include "tools/speed_stats.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::CodecInOut;
+using ::jxl::Span;
+
+// Thread-compatible.
+class ImageCodec {
+ public:
+  explicit ImageCodec(const BenchmarkArgs& args)
+      : args_(args),
+        butteraugli_target_(1.0f),
+        q_target_(100.0f),
+        bitrate_target_(0.0f) {}
+
+  virtual ~ImageCodec() = default;
+
+  void set_description(const std::string& desc) { description_ = desc; }
+  const std::string& description() const { return description_; }
+
+  virtual void ParseParameters(const std::string& parameters);
+
+  virtual Status ParseParam(const std::string& param);
+
+  virtual Status Compress(const std::string& filename, const CodecInOut* io,
+                          ThreadPool* pool, std::vector<uint8_t>* compressed,
+                          jpegxl::tools::SpeedStats* speed_stats) = 0;
+
+  virtual Status Decompress(const std::string& filename,
+                            const Span<const uint8_t> compressed,
+                            ThreadPool* pool, CodecInOut* io,
+                            jpegxl::tools::SpeedStats* speed_stats) = 0;
+
+  virtual void GetMoreStats(BenchmarkStats* stats) {}
+
+  virtual bool IgnoreAlpha() const { return false; }
+
+  virtual Status CanRecompressJpeg() const { return false; }
+  virtual Status RecompressJpeg(const std::string& filename,
+                                const std::vector<uint8_t>& data,
+                                std::vector<uint8_t>* compressed,
+                                jpegxl::tools::SpeedStats* speed_stats) {
+    return false;
+  }
+
+  virtual std::string GetErrorMessage() const { return error_message_; }
+
+ protected:
+  const BenchmarkArgs& args_;
+  std::string params_;
+  std::string description_;
+  float butteraugli_target_;
+  float q_target_;
+  float bitrate_target_;
+  std::string error_message_;
+};
+
+using ImageCodecPtr = std::unique_ptr<ImageCodec>;
+
+// Creates an image codec by name, e.g. "jxl" to get a new instance of the
+// jxl codec. Optionally, behind a colon, parameters can be specified,
+// then ParseParameters of the codec gets called with the part behind the colon.
+ImageCodecPtr CreateImageCodec(const std::string& description);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.cc
new file mode 100644
index 0000000000..283e2c2e3a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.cc
@@ -0,0 +1,382 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "tools/benchmark/benchmark_codec_avif.h"
+
+#include <avif/avif.h>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "tools/cmdline.h"
+#include "tools/thread_pool_internal.h"
+
+#define JXL_RETURN_IF_AVIF_ERROR(result)                                       \
+  do {                                                                         \
+    avifResult jxl_return_if_avif_error_result = (result);                     \
+    if (jxl_return_if_avif_error_result != AVIF_RESULT_OK) {                   \
+      return JXL_FAILURE("libavif error: %s",                                  \
+                         avifResultToString(jxl_return_if_avif_error_result)); \
+    }                                                                          \
+  } while (false)
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::CodecInOut;
+using ::jxl::ImageBundle;
+using ::jxl::PaddedBytes;
+using ::jxl::Primaries;
+using ::jxl::Span;
+using ::jxl::ThreadPool;
+using ::jxl::TransferFunction;
+using ::jxl::WhitePoint;
+
+namespace {
+
+size_t GetNumThreads(ThreadPool* pool) {
+  size_t result = 0;
+  const auto count_threads = [&](const size_t num_threads) {
+    result = num_threads;
+    return true;
+  };
+  const auto no_op = [&](const uint32_t /*task*/, size_t /*thread*/) {};
+  (void)jxl::RunOnPool(pool, 0, 1, count_threads, no_op, "Compress");
+  return result;
+}
+
+struct AvifArgs {
+  avifPixelFormat chroma_subsampling = AVIF_PIXEL_FORMAT_YUV444;
+};
+
+AvifArgs* const avifargs = new AvifArgs;
+
+bool ParseChromaSubsampling(const char* arg, avifPixelFormat* subsampling) {
+  if (strcmp(arg, "444") == 0) {
+    *subsampling = AVIF_PIXEL_FORMAT_YUV444;
+    return true;
+  }
+  if (strcmp(arg, "422") == 0) {
+    *subsampling = AVIF_PIXEL_FORMAT_YUV422;
+    return true;
+  }
+  if (strcmp(arg, "420") == 0) {
+    *subsampling = AVIF_PIXEL_FORMAT_YUV420;
+    return true;
+  }
+  if (strcmp(arg, "400") == 0) {
+    *subsampling = AVIF_PIXEL_FORMAT_YUV400;
+    return true;
+  }
+  return false;
+}
+
+void SetUpAvifColor(const ColorEncoding& color, avifImage* const image) {
+  bool need_icc = (color.white_point != WhitePoint::kD65);
+
+  image->matrixCoefficients = AVIF_MATRIX_COEFFICIENTS_BT709;
+  if (!color.HasPrimaries()) {
+    need_icc = true;
+  } else {
+    switch (color.primaries) {
+      case Primaries::kSRGB:
+        image->colorPrimaries = AVIF_COLOR_PRIMARIES_BT709;
+        break;
+      case Primaries::k2100:
+        image->colorPrimaries = AVIF_COLOR_PRIMARIES_BT2020;
+        image->matrixCoefficients = AVIF_MATRIX_COEFFICIENTS_BT2020_NCL;
+        break;
+      default:
+        need_icc = true;
+        image->colorPrimaries = AVIF_COLOR_PRIMARIES_UNKNOWN;
+        break;
+    }
+  }
+
+  switch (color.tf.GetTransferFunction()) {
+    case TransferFunction::kSRGB:
+      image->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_SRGB;
+      break;
+    case TransferFunction::kLinear:
+      image->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_LINEAR;
+      break;
+    case TransferFunction::kPQ:
+      image->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_SMPTE2084;
+      break;
+    case TransferFunction::kHLG:
+      image->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_HLG;
+      break;
+    default:
+      need_icc = true;
+      image->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_UNKNOWN;
+      break;
+  }
+
+  if (need_icc) {
+    avifImageSetProfileICC(image, color.ICC().data(), color.ICC().size());
+  }
+}
+
+Status ReadAvifColor(const avifImage* const image, ColorEncoding* const color) {
+  if (image->icc.size != 0) {
+    PaddedBytes icc;
+    icc.assign(image->icc.data, image->icc.data + image->icc.size);
+    return color->SetICC(std::move(icc), &jxl::GetJxlCms());
+  }
+
+  color->white_point = WhitePoint::kD65;
+  switch (image->colorPrimaries) {
+    case AVIF_COLOR_PRIMARIES_BT709:
+      color->primaries = Primaries::kSRGB;
+      break;
+    case AVIF_COLOR_PRIMARIES_BT2020:
+      color->primaries = Primaries::k2100;
+      break;
+    default:
+      return JXL_FAILURE("unsupported avif primaries");
+  }
+  switch (image->transferCharacteristics) {
+    case AVIF_TRANSFER_CHARACTERISTICS_BT470M:
+      JXL_RETURN_IF_ERROR(color->tf.SetGamma(2.2));
+      break;
+    case AVIF_TRANSFER_CHARACTERISTICS_BT470BG:
+      JXL_RETURN_IF_ERROR(color->tf.SetGamma(2.8));
+      break;
+    case AVIF_TRANSFER_CHARACTERISTICS_LINEAR:
+      color->tf.SetTransferFunction(TransferFunction::kLinear);
+      break;
+    case AVIF_TRANSFER_CHARACTERISTICS_SRGB:
+      color->tf.SetTransferFunction(TransferFunction::kSRGB);
+      break;
+    case AVIF_TRANSFER_CHARACTERISTICS_SMPTE2084:
+      color->tf.SetTransferFunction(TransferFunction::kPQ);
+      break;
+    case AVIF_TRANSFER_CHARACTERISTICS_HLG:
+      color->tf.SetTransferFunction(TransferFunction::kHLG);
+      break;
+    default:
+      return JXL_FAILURE("unsupported avif TRC");
+  }
+  return color->CreateICC();
+}
+
+}  // namespace
+
+Status AddCommandLineOptionsAvifCodec(BenchmarkArgs* args) {
+  args->cmdline.AddOptionValue(
+      '\0', "avif_chroma_subsampling", "444/422/420/400",
+      "default AVIF chroma subsampling (default: 444).",
+      &avifargs->chroma_subsampling, &ParseChromaSubsampling);
+  return true;
+}
+
+class AvifCodec : public ImageCodec {
+ public:
+  explicit AvifCodec(const BenchmarkArgs& args) : ImageCodec(args) {
+    chroma_subsampling_ = avifargs->chroma_subsampling;
+  }
+
+  Status ParseParam(const std::string& param) override {
+    if (param.compare(0, 3, "yuv") == 0) {
+      if (param.size() != 6) return false;
+      return ParseChromaSubsampling(param.c_str() + 3, &chroma_subsampling_);
+    }
+    if (param.compare(0, 10, "log2_cols=") == 0) {
+      log2_cols = strtol(param.c_str() + 10, nullptr, 10);
+      return true;
+    }
+    if (param.compare(0, 10, "log2_rows=") == 0) {
+      log2_rows = strtol(param.c_str() + 10, nullptr, 10);
+      return true;
+    }
+    if (param[0] == 's') {
+      speed_ = strtol(param.c_str() + 1, nullptr, 10);
+      return true;
+    }
+    if (param == "aomenc") {
+      encoder_ = AVIF_CODEC_CHOICE_AOM;
+      return true;
+    }
+    if (param == "aomdec") {
+      decoder_ = AVIF_CODEC_CHOICE_AOM;
+      return true;
+    }
+    if (param == "aom") {
+      encoder_ = AVIF_CODEC_CHOICE_AOM;
+      decoder_ = AVIF_CODEC_CHOICE_AOM;
+      return true;
+    }
+    if (param == "rav1e") {
+      encoder_ = AVIF_CODEC_CHOICE_RAV1E;
+      return true;
+    }
+    if (param == "dav1d") {
+      decoder_ = AVIF_CODEC_CHOICE_DAV1D;
+      return true;
+    }
+    if (param.compare(0, 2, "a=") == 0) {
+      std::string subparam = param.substr(2);
+      size_t pos = subparam.find('=');
+      if (pos == std::string::npos) {
+        codec_specific_options_.emplace_back(subparam, "");
+      } else {
+        std::string key = subparam.substr(0, pos);
+        std::string value = subparam.substr(pos + 1);
+        codec_specific_options_.emplace_back(key, value);
+      }
+      return true;
+    }
+    return ImageCodec::ParseParam(param);
+  }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  SpeedStats* speed_stats) override {
+    double elapsed_convert_image = 0;
+    size_t max_threads = GetNumThreads(pool);
+    const double start = jxl::Now();
+    {
+      const auto depth =
+          std::min<int>(16, io->metadata.m.bit_depth.bits_per_sample);
+      std::unique_ptr<avifEncoder, void (*)(avifEncoder*)> encoder(
+          avifEncoderCreate(), &avifEncoderDestroy);
+      encoder->codecChoice = encoder_;
+      // TODO(sboukortt): configure this separately.
+      encoder->minQuantizer = 0;
+      encoder->maxQuantizer = 63;
+      encoder->tileColsLog2 = log2_cols;
+      encoder->tileRowsLog2 = log2_rows;
+      encoder->speed = speed_;
+      encoder->maxThreads = max_threads;
+      for (const auto& opts : codec_specific_options_) {
+        avifEncoderSetCodecSpecificOption(encoder.get(), opts.first.c_str(),
+                                          opts.second.c_str());
+      }
+      avifAddImageFlags add_image_flags = AVIF_ADD_IMAGE_FLAG_SINGLE;
+      if (io->metadata.m.have_animation) {
+        encoder->timescale = std::lround(
+            static_cast<float>(io->metadata.m.animation.tps_numerator) /
+            io->metadata.m.animation.tps_denominator);
+        add_image_flags = AVIF_ADD_IMAGE_FLAG_NONE;
+      }
+      for (const ImageBundle& ib : io->frames) {
+        std::unique_ptr<avifImage, void (*)(avifImage*)> image(
+            avifImageCreate(ib.xsize(), ib.ysize(), depth, chroma_subsampling_),
+            &avifImageDestroy);
+        image->width = ib.xsize();
+        image->height = ib.ysize();
+        image->depth = depth;
+        SetUpAvifColor(ib.c_current(), image.get());
+        std::unique_ptr<avifRWData, void (*)(avifRWData*)> icc_freer(
+            &image->icc, &avifRWDataFree);
+        avifRGBImage rgb_image;
+        avifRGBImageSetDefaults(&rgb_image, image.get());
+        rgb_image.format =
+            ib.HasAlpha() ? AVIF_RGB_FORMAT_RGBA : AVIF_RGB_FORMAT_RGB;
+        avifRGBImageAllocatePixels(&rgb_image);
+        std::unique_ptr<avifRGBImage, void (*)(avifRGBImage*)> pixels_freer(
+            &rgb_image, &avifRGBImageFreePixels);
+        const double start_convert_image = jxl::Now();
+        JXL_RETURN_IF_ERROR(ConvertToExternal(
+            ib, depth, /*float_out=*/false,
+            /*num_channels=*/ib.HasAlpha() ? 4 : 3, JXL_NATIVE_ENDIAN,
+            /*stride=*/rgb_image.rowBytes, pool, rgb_image.pixels,
+            rgb_image.rowBytes * rgb_image.height,
+            /*out_callback=*/{}, jxl::Orientation::kIdentity));
+        const double end_convert_image = jxl::Now();
+        elapsed_convert_image += end_convert_image - start_convert_image;
+        JXL_RETURN_IF_AVIF_ERROR(avifImageRGBToYUV(image.get(), &rgb_image));
+        JXL_RETURN_IF_AVIF_ERROR(avifEncoderAddImage(
+            encoder.get(), image.get(), ib.duration, add_image_flags));
+      }
+      avifRWData buffer = AVIF_DATA_EMPTY;
+      JXL_RETURN_IF_AVIF_ERROR(avifEncoderFinish(encoder.get(), &buffer));
+      compressed->assign(buffer.data, buffer.data + buffer.size);
+      avifRWDataFree(&buffer);
+    }
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start - elapsed_convert_image);
+    return true;
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io, SpeedStats* speed_stats) override {
+    io->frames.clear();
+    size_t max_threads = GetNumThreads(pool);
+    double elapsed_convert_image = 0;
+    const double start = jxl::Now();
+    {
+      std::unique_ptr<avifDecoder, void (*)(avifDecoder*)> decoder(
+          avifDecoderCreate(), &avifDecoderDestroy);
+      decoder->codecChoice = decoder_;
+      decoder->maxThreads = max_threads;
+      JXL_RETURN_IF_AVIF_ERROR(avifDecoderSetIOMemory(
+          decoder.get(), compressed.data(), compressed.size()));
+      JXL_RETURN_IF_AVIF_ERROR(avifDecoderParse(decoder.get()));
+      const bool has_alpha = decoder->alphaPresent;
+      io->metadata.m.have_animation = decoder->imageCount > 1;
+      io->metadata.m.animation.tps_numerator = decoder->timescale;
+      io->metadata.m.animation.tps_denominator = 1;
+      io->metadata.m.SetUintSamples(decoder->image->depth);
+      io->SetSize(decoder->image->width, decoder->image->height);
+      avifResult next_image;
+      while ((next_image = avifDecoderNextImage(decoder.get())) ==
+             AVIF_RESULT_OK) {
+        ColorEncoding color;
+        JXL_RETURN_IF_ERROR(ReadAvifColor(decoder->image, &color));
+        avifRGBImage rgb_image;
+        avifRGBImageSetDefaults(&rgb_image, decoder->image);
+        rgb_image.format =
+            has_alpha ? AVIF_RGB_FORMAT_RGBA : AVIF_RGB_FORMAT_RGB;
+        avifRGBImageAllocatePixels(&rgb_image);
+        std::unique_ptr<avifRGBImage, void (*)(avifRGBImage*)> pixels_freer(
+            &rgb_image, &avifRGBImageFreePixels);
+        JXL_RETURN_IF_AVIF_ERROR(avifImageYUVToRGB(decoder->image, &rgb_image));
+        const double start_convert_image = jxl::Now();
+        {
+          JxlPixelFormat format = {
+              (has_alpha ? 4u : 3u),
+              (rgb_image.depth <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16),
+              JXL_NATIVE_ENDIAN, 0};
+          ImageBundle ib(&io->metadata.m);
+          JXL_RETURN_IF_ERROR(ConvertFromExternal(
+              Span<const uint8_t>(rgb_image.pixels,
+                                  rgb_image.height * rgb_image.rowBytes),
+              rgb_image.width, rgb_image.height, color, rgb_image.depth, format,
+              pool, &ib));
+          io->frames.push_back(std::move(ib));
+        }
+        const double end_convert_image = jxl::Now();
+        elapsed_convert_image += end_convert_image - start_convert_image;
+      }
+      if (next_image != AVIF_RESULT_NO_IMAGES_REMAINING) {
+        JXL_RETURN_IF_AVIF_ERROR(next_image);
+      }
+    }
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start - elapsed_convert_image);
+    return true;
+  }
+
+ protected:
+  avifPixelFormat chroma_subsampling_;
+  avifCodecChoice encoder_ = AVIF_CODEC_CHOICE_AUTO;
+  avifCodecChoice decoder_ = AVIF_CODEC_CHOICE_AUTO;
+  int speed_ = AVIF_SPEED_DEFAULT;
+  int log2_cols = 0;
+  int log2_rows = 0;
+  std::vector<std::pair<std::string, std::string>> codec_specific_options_;
+};
+
+ImageCodec* CreateNewAvifCodec(const BenchmarkArgs& args) {
+  return new AvifCodec(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.h
new file mode 100644
index 0000000000..c3816cf414
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_avif.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_AVIF_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_AVIF_H_
+
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+ImageCodec* CreateNewAvifCodec(const BenchmarkArgs& args);
+
+// Registers the avif-specific command line options.
+Status AddCommandLineOptionsAvifCodec(BenchmarkArgs* args);
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_AVIF_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.cc
new file mode 100644
index 0000000000..bbbb2bcbf8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.cc
@@ -0,0 +1,216 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/benchmark/benchmark_codec_custom.h"
+
+// Not supported on Windows due to Linux-specific functions.
+#ifndef _WIN32
+
+#include <libgen.h>
+
+#include <fstream>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_description.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/benchmark/benchmark_utils.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+
+struct CustomCodecArgs {
+  std::string extension;
+  std::string colorspace;
+  bool quiet;
+};
+
+static CustomCodecArgs* const custom_args = new CustomCodecArgs;
+
+Status AddCommandLineOptionsCustomCodec(BenchmarkArgs* args) {
+  args->AddString(
+      &custom_args->extension, "custom_codec_extension",
+      "Converts input and output of codec to this file type (default: png).",
+      "png");
+  args->AddString(
+      &custom_args->colorspace, "custom_codec_colorspace",
+      "If not empty, converts input and output of codec to this colorspace.",
+      "");
+  args->AddFlag(&custom_args->quiet, "custom_codec_quiet",
+                "Whether stdin and stdout of custom codec should be shown.",
+                false);
+  return true;
+}
+
+namespace {
+
+// This uses `output_filename` to determine the name of the corresponding
+// `.time` file.
+template <typename F>
+Status ReportCodecRunningTime(F&& function, std::string output_filename,
+                              jpegxl::tools::SpeedStats* const speed_stats) {
+  const double start = jxl::Now();
+  JXL_RETURN_IF_ERROR(function());
+  const double end = jxl::Now();
+  const std::string time_filename =
+      GetBaseName(std::move(output_filename)) + ".time";
+  std::ifstream time_stream(time_filename);
+  double time;
+  if (time_stream >> time) {
+    // Report the time measured by the external codec itself.
+    speed_stats->NotifyElapsed(time);
+  } else {
+    // Fall back to the less accurate time that we measured.
+    speed_stats->NotifyElapsed(end - start);
+  }
+  if (time_stream.is_open()) {
+    remove(time_filename.c_str());
+  }
+  return true;
+}
+
+class CustomCodec : public ImageCodec {
+ public:
+  explicit CustomCodec(const BenchmarkArgs& args) : ImageCodec(args) {}
+
+  Status ParseParam(const std::string& param) override {
+    if (param_index_ == 0) {
+      description_ = "";
+    }
+    switch (param_index_) {
+      case 0:
+        extension_ = param;
+        description_ += param;
+        break;
+      case 1:
+        compress_command_ = param;
+        description_ += std::string(":");
+        if (param.find_last_of('/') < param.size()) {
+          description_ += param.substr(param.find_last_of('/') + 1);
+        } else {
+          description_ += param;
+        }
+        break;
+      case 2:
+        decompress_command_ = param;
+        break;
+      default:
+        compress_args_.push_back(param);
+        description_ += std::string(":");
+        if (param.size() > 2 && param[0] == '-' && param[1] == '-') {
+          description_ += param.substr(2);
+        } else if (param.size() > 2 && param[0] == '-') {
+          description_ += param.substr(1);
+        } else {
+          description_ += param;
+        }
+        break;
+    }
+    ++param_index_;
+    return true;
+  }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    JXL_RETURN_IF_ERROR(param_index_ > 2);
+
+    const std::string basename = GetBaseName(filename);
+    TemporaryFile in_file(basename, custom_args->extension);
+    TemporaryFile encoded_file(basename, extension_);
+    std::string in_filename, encoded_filename;
+    JXL_RETURN_IF_ERROR(in_file.GetFileName(&in_filename));
+    JXL_RETURN_IF_ERROR(encoded_file.GetFileName(&encoded_filename));
+    saved_intensity_target_ = io->metadata.m.IntensityTarget();
+
+    const size_t bits = io->metadata.m.bit_depth.bits_per_sample;
+    ColorEncoding c_enc = io->Main().c_current();
+    if (!custom_args->colorspace.empty()) {
+      JxlColorEncoding colorspace;
+      JXL_RETURN_IF_ERROR(
+          jxl::ParseDescription(custom_args->colorspace, &colorspace));
+      JXL_RETURN_IF_ERROR(
+          jxl::ConvertExternalToInternalColorEncoding(colorspace, &c_enc));
+    }
+    std::vector<uint8_t> encoded;
+    JXL_RETURN_IF_ERROR(Encode(*io, c_enc, bits, in_filename, &encoded, pool));
+    JXL_RETURN_IF_ERROR(WriteFile(in_filename, encoded));
+    std::vector<std::string> arguments = compress_args_;
+    arguments.push_back(in_filename);
+    arguments.push_back(encoded_filename);
+    JXL_RETURN_IF_ERROR(ReportCodecRunningTime(
+        [&, this] {
+          return RunCommand(compress_command_, arguments, custom_args->quiet);
+        },
+        encoded_filename, speed_stats));
+    return ReadFile(encoded_filename, compressed);
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    const std::string basename = GetBaseName(filename);
+    TemporaryFile encoded_file(basename, extension_);
+    TemporaryFile out_file(basename, custom_args->extension);
+    std::string encoded_filename, out_filename;
+    JXL_RETURN_IF_ERROR(encoded_file.GetFileName(&encoded_filename));
+    JXL_RETURN_IF_ERROR(out_file.GetFileName(&out_filename));
+
+    JXL_RETURN_IF_ERROR(WriteFile(encoded_filename, compressed));
+    JXL_RETURN_IF_ERROR(ReportCodecRunningTime(
+        [&, this] {
+          return RunCommand(
+              decompress_command_,
+              std::vector<std::string>{encoded_filename, out_filename},
+              custom_args->quiet);
+        },
+        out_filename, speed_stats));
+    jxl::extras::ColorHints hints;
+    if (!custom_args->colorspace.empty()) {
+      hints.Add("color_space", custom_args->colorspace);
+    }
+    std::vector<uint8_t> encoded;
+    JXL_RETURN_IF_ERROR(ReadFile(out_filename, &encoded));
+    JXL_RETURN_IF_ERROR(
+        jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), hints, io, pool));
+    io->metadata.m.SetIntensityTarget(saved_intensity_target_);
+    return true;
+  }
+
+ private:
+  std::string extension_;
+  std::string compress_command_;
+  std::string decompress_command_;
+  std::vector<std::string> compress_args_;
+  int param_index_ = 0;
+  int saved_intensity_target_ = 255;
+};
+
+}  // namespace
+
+ImageCodec* CreateNewCustomCodec(const BenchmarkArgs& args) {
+  return new CustomCodec(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#else
+
+namespace jpegxl {
+namespace tools {
+
+ImageCodec* CreateNewCustomCodec(const BenchmarkArgs& args) { return nullptr; }
+Status AddCommandLineOptionsCustomCodec(BenchmarkArgs* args) { return true; }
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // _MSC_VER
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.h
new file mode 100644
index 0000000000..6e3d017ac6
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_custom.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_CUSTOM_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_CUSTOM_H_
+
+// This is a benchmark codec that can be used with any command-line
+// encoder/decoder that satisfies the following conditions:
+//
+// - the encoder can read from a PNG file `$input.png` and write the encoded
+//   image to `$encoded.$ext` if it is called as:
+//
+//       $encoder [OPTIONS] $input.png $encoded.$ext
+//
+// - the decoder can read from an encoded file `$encoded.$ext` and write to a
+//   PNG file `$decoded.png` if it is called as:
+//
+//       $decoder $encoded.$ext $decoded.png
+//
+// On the benchmark command line, the codec must be specified as:
+//
+//     custom:$ext:$encoder:$decoder:$options
+//
+// Where the options are also separated by colons.
+//
+// An example with JPEG XL itself would be:
+//
+//     custom:jxl:cjxl:djxl:--distance:3
+//
+// Optionally, to have encoding and decoding speed reported, the codec may write
+// the number of seconds (as a floating point number) elapsed during actual
+// encoding/decoding to $encoded.time and $decoded.time, respectively (replacing
+// the .$ext and .png extensions).
+
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+
+ImageCodec* CreateNewCustomCodec(const BenchmarkArgs& args);
+Status AddCommandLineOptionsCustomCodec(BenchmarkArgs* args);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_CUSTOM_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.cc
new file mode 100644
index 0000000000..fb9148faea
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.cc
@@ -0,0 +1,376 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "tools/benchmark/benchmark_codec_jpeg.h"
+
+#include <stddef.h>
+#include <stdio.h>
+// After stddef/stdio
+#include <stdint.h>
+#include <string.h>
+
+#include <numeric>  // partial_sum
+#include <string>
+
+#if JPEGXL_ENABLE_JPEGLI
+#include "lib/extras/dec/jpegli.h"
+#endif
+#include "lib/extras/dec/jpg.h"
+#if JPEGXL_ENABLE_JPEGLI
+#include "lib/extras/enc/jpegli.h"
+#endif
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/packed_image.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/benchmark/benchmark_utils.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+
+struct JPEGArgs {
+  std::string base_quant_fn;
+  float search_q_start;
+  float search_q_min;
+  float search_q_max;
+  float search_d_min;
+  float search_d_max;
+  int search_max_iters;
+  float search_tolerance;
+  float search_q_precision;
+  float search_first_iter_slope;
+};
+
+static JPEGArgs* const jpegargs = new JPEGArgs;
+
+#define SET_ENCODER_ARG(name)                                  \
+  if (jpegargs->name > 0) {                                    \
+    encoder->SetOption(#name, std::to_string(jpegargs->name)); \
+  }
+
+Status AddCommandLineOptionsJPEGCodec(BenchmarkArgs* args) {
+  args->AddString(&jpegargs->base_quant_fn, "qtables",
+                  "Custom base quantization tables.");
+  args->AddFloat(&jpegargs->search_q_start, "search_q_start",
+                 "Starting quality for quality-to-target search", 0.0f);
+  args->AddFloat(&jpegargs->search_q_min, "search_q_min",
+                 "Minimum quality for quality-to-target search", 0.0f);
+  args->AddFloat(&jpegargs->search_q_max, "search_q_max",
+                 "Maximum quality for quality-to-target search", 0.0f);
+  args->AddFloat(&jpegargs->search_d_min, "search_d_min",
+                 "Minimum distance for quality-to-target search", 0.0f);
+  args->AddFloat(&jpegargs->search_d_max, "search_d_max",
+                 "Maximum distance for quality-to-target search", 0.0f);
+  args->AddFloat(&jpegargs->search_tolerance, "search_tolerance",
+                 "Percentage value, if quality-to-target search result "
+                 "relative error is within this, search stops.",
+                 0.0f);
+  args->AddFloat(&jpegargs->search_q_precision, "search_q_precision",
+                 "If last quality change in quality-to-target search is "
+                 "within this value, search stops.",
+                 0.0f);
+  args->AddFloat(&jpegargs->search_first_iter_slope, "search_first_iter_slope",
+                 "Slope of first extrapolation step in quality-to-target "
+                 "search.",
+                 0.0f);
+  args->AddSigned(&jpegargs->search_max_iters, "search_max_iters",
+                  "Maximum search steps in quality-to-target search.", 0);
+  return true;
+}
+
+class JPEGCodec : public ImageCodec {
+ public:
+  explicit JPEGCodec(const BenchmarkArgs& args) : ImageCodec(args) {}
+
+  Status ParseParam(const std::string& param) override {
+    if (param[0] == 'q' && ImageCodec::ParseParam(param)) {
+      enc_quality_set_ = true;
+      return true;
+    }
+    if (ImageCodec::ParseParam(param)) {
+      return true;
+    }
+    if (param == "sjpeg" || param.find("cjpeg") != std::string::npos) {
+      jpeg_encoder_ = param;
+      return true;
+    }
+#if JPEGXL_ENABLE_JPEGLI
+    if (param == "enc-jpegli") {
+      jpeg_encoder_ = "jpegli";
+      return true;
+    }
+#endif
+    if (param.compare(0, 3, "yuv") == 0) {
+      chroma_subsampling_ = param.substr(3);
+      return true;
+    }
+    if (param.compare(0, 4, "psnr") == 0) {
+      psnr_target_ = std::stof(param.substr(4));
+      return true;
+    }
+    if (param[0] == 'p') {
+      progressive_id_ = strtol(param.substr(1).c_str(), nullptr, 10);
+      return true;
+    }
+    if (param == "fix") {
+      fix_codes_ = true;
+      return true;
+    }
+    if (param[0] == 'Q') {
+      libjpeg_quality_ = strtol(param.substr(1).c_str(), nullptr, 10);
+      return true;
+    }
+    if (param.compare(0, 3, "YUV") == 0) {
+      if (param.size() != 6) return false;
+      libjpeg_chroma_subsampling_ = param.substr(3);
+      return true;
+    }
+    if (param == "noaq") {
+      enable_adaptive_quant_ = false;
+      return true;
+    }
+#if JPEGXL_ENABLE_JPEGLI
+    if (param == "xyb") {
+      xyb_mode_ = true;
+      return true;
+    }
+    if (param == "std") {
+      use_std_tables_ = true;
+      return true;
+    }
+    if (param == "dec-jpegli") {
+      jpeg_decoder_ = "jpegli";
+      return true;
+    }
+    if (param.substr(0, 2) == "bd") {
+      bitdepth_ = strtol(param.substr(2).c_str(), nullptr, 10);
+      return true;
+    }
+    if (param.substr(0, 6) == "cquant") {
+      num_colors_ = strtol(param.substr(6).c_str(), nullptr, 10);
+      return true;
+    }
+#endif
+    return false;
+  }
+
+  bool IgnoreAlpha() const override { return true; }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    if (jpeg_encoder_.find("cjpeg") != std::string::npos) {
+// Not supported on Windows due to Linux-specific functions.
+// Not supported in Android NDK before API 28.
+#if !defined(_WIN32) && !defined(__EMSCRIPTEN__) && \
+    (!defined(__ANDROID_API__) || __ANDROID_API__ >= 28)
+      const std::string basename = GetBaseName(filename);
+      TemporaryFile in_file(basename, "pnm");
+      TemporaryFile encoded_file(basename, "jpg");
+      std::string in_filename, encoded_filename;
+      JXL_RETURN_IF_ERROR(in_file.GetFileName(&in_filename));
+      JXL_RETURN_IF_ERROR(encoded_file.GetFileName(&encoded_filename));
+      const size_t bits = io->metadata.m.bit_depth.bits_per_sample;
+      ColorEncoding c_enc = io->Main().c_current();
+      std::vector<uint8_t> encoded;
+      JXL_RETURN_IF_ERROR(
+          Encode(*io, c_enc, bits, in_filename, &encoded, pool));
+      JXL_RETURN_IF_ERROR(WriteFile(in_filename, encoded));
+      std::string compress_command = jpeg_encoder_;
+      std::vector<std::string> arguments;
+      arguments.push_back("-outfile");
+      arguments.push_back(encoded_filename);
+      arguments.push_back("-quality");
+      arguments.push_back(std::to_string(static_cast<int>(q_target_)));
+      arguments.push_back("-sample");
+      if (chroma_subsampling_ == "444") {
+        arguments.push_back("1x1");
+      } else if (chroma_subsampling_ == "420") {
+        arguments.push_back("2x2");
+      } else if (!chroma_subsampling_.empty()) {
+        return JXL_FAILURE("Unsupported chroma subsampling");
+      }
+      arguments.push_back("-optimize");
+      arguments.push_back(in_filename);
+      const double start = jxl::Now();
+      JXL_RETURN_IF_ERROR(RunCommand(compress_command, arguments, false));
+      const double end = jxl::Now();
+      speed_stats->NotifyElapsed(end - start);
+      return ReadFile(encoded_filename, compressed);
+#else
+      return JXL_FAILURE("Not supported on this build");
+#endif
+    }
+
+    jxl::extras::PackedPixelFile ppf;
+    size_t bits_per_sample = io->metadata.m.bit_depth.bits_per_sample;
+    JxlPixelFormat format = {
+        0,  // num_channels is ignored by the converter
+        bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+        0};
+    JXL_RETURN_IF_ERROR(ConvertCodecInOutToPackedPixelFile(
+        *io, format, io->metadata.m.color_encoding, pool, &ppf));
+    double elapsed = 0.0;
+    if (jpeg_encoder_ == "jpegli") {
+#if JPEGXL_ENABLE_JPEGLI
+      jxl::extras::JpegSettings settings;
+      settings.xyb = xyb_mode_;
+      if (!xyb_mode_) {
+        settings.use_std_quant_tables = use_std_tables_;
+      }
+      if (enc_quality_set_) {
+        settings.quality = q_target_;
+      } else {
+        settings.distance = butteraugli_target_;
+      }
+      if (progressive_id_ >= 0) {
+        settings.progressive_level = progressive_id_;
+      }
+      if (psnr_target_ > 0) {
+        settings.psnr_target = psnr_target_;
+      }
+      if (jpegargs->search_tolerance > 0) {
+        settings.search_tolerance = 0.01f * jpegargs->search_tolerance;
+      }
+      if (jpegargs->search_d_min > 0) {
+        settings.min_distance = jpegargs->search_d_min;
+      }
+      if (jpegargs->search_d_max > 0) {
+        settings.max_distance = jpegargs->search_d_max;
+      }
+      settings.chroma_subsampling = chroma_subsampling_;
+      settings.use_adaptive_quantization = enable_adaptive_quant_;
+      settings.libjpeg_quality = libjpeg_quality_;
+      settings.libjpeg_chroma_subsampling = libjpeg_chroma_subsampling_;
+      settings.optimize_coding = !fix_codes_;
+      const double start = jxl::Now();
+      JXL_RETURN_IF_ERROR(
+          jxl::extras::EncodeJpeg(ppf, settings, pool, compressed));
+      const double end = jxl::Now();
+      elapsed = end - start;
+#endif
+    } else {
+      jxl::extras::EncodedImage encoded;
+      std::unique_ptr<jxl::extras::Encoder> encoder =
+          jxl::extras::GetJPEGEncoder();
+      if (!encoder) {
+        fprintf(stderr, "libjpeg codec is not supported\n");
+        return false;
+      }
+      std::ostringstream os;
+      os << static_cast<int>(std::round(q_target_));
+      encoder->SetOption("q", os.str());
+      encoder->SetOption("jpeg_encoder", jpeg_encoder_);
+      if (!chroma_subsampling_.empty()) {
+        encoder->SetOption("chroma_subsampling", chroma_subsampling_);
+      }
+      if (progressive_id_ >= 0) {
+        encoder->SetOption("progressive", std::to_string(progressive_id_));
+      }
+      if (libjpeg_quality_ > 0) {
+        encoder->SetOption("libjpeg_quality", std::to_string(libjpeg_quality_));
+      }
+      if (!libjpeg_chroma_subsampling_.empty()) {
+        encoder->SetOption("libjpeg_chroma_subsampling",
+                           libjpeg_chroma_subsampling_);
+      }
+      if (fix_codes_) {
+        encoder->SetOption("optimize", "OFF");
+      }
+      if (!enable_adaptive_quant_) {
+        encoder->SetOption("adaptive_q", "OFF");
+      }
+      if (psnr_target_ > 0) {
+        encoder->SetOption("psnr", std::to_string(psnr_target_));
+      }
+      if (!jpegargs->base_quant_fn.empty()) {
+        encoder->SetOption("base_quant_fn", jpegargs->base_quant_fn);
+      }
+      SET_ENCODER_ARG(search_q_start);
+      SET_ENCODER_ARG(search_q_min);
+      SET_ENCODER_ARG(search_q_max);
+      SET_ENCODER_ARG(search_q_precision);
+      SET_ENCODER_ARG(search_tolerance);
+      SET_ENCODER_ARG(search_first_iter_slope);
+      SET_ENCODER_ARG(search_max_iters);
+      const double start = jxl::Now();
+      JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded, pool));
+      const double end = jxl::Now();
+      elapsed = end - start;
+      *compressed = encoded.bitstreams.back();
+    }
+    speed_stats->NotifyElapsed(elapsed);
+    return true;
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    jxl::extras::PackedPixelFile ppf;
+    if (jpeg_decoder_ == "jpegli") {
+#if JPEGXL_ENABLE_JPEGLI
+      std::vector<uint8_t> jpeg_bytes(compressed.data(),
+                                      compressed.data() + compressed.size());
+      const double start = jxl::Now();
+      jxl::extras::JpegDecompressParams dparams;
+      dparams.output_data_type =
+          bitdepth_ > 8 ? JXL_TYPE_UINT16 : JXL_TYPE_UINT8;
+      dparams.num_colors = num_colors_;
+      JXL_RETURN_IF_ERROR(
+          jxl::extras::DecodeJpeg(jpeg_bytes, dparams, pool, &ppf));
+      const double end = jxl::Now();
+      speed_stats->NotifyElapsed(end - start);
+#endif
+    } else {
+      const double start = jxl::Now();
+      jxl::extras::JPGDecompressParams dparams;
+      dparams.num_colors = num_colors_;
+      JXL_RETURN_IF_ERROR(
+          jxl::extras::DecodeImageJPG(compressed, jxl::extras::ColorHints(),
+                                      &ppf, /*constraints=*/nullptr, &dparams));
+      const double end = jxl::Now();
+      speed_stats->NotifyElapsed(end - start);
+    }
+    JXL_RETURN_IF_ERROR(
+        jxl::extras::ConvertPackedPixelFileToCodecInOut(ppf, pool, io));
+    return true;
+  }
+
+ protected:
+  // JPEG encoder and its parameters
+  std::string jpeg_encoder_ = "libjpeg";
+  std::string chroma_subsampling_;
+  int progressive_id_ = -1;
+  bool fix_codes_ = false;
+  float psnr_target_ = 0.0f;
+  bool enc_quality_set_ = false;
+  int libjpeg_quality_ = 0;
+  std::string libjpeg_chroma_subsampling_;
+#if JPEGXL_ENABLE_JPEGLI
+  bool xyb_mode_ = false;
+  bool use_std_tables_ = false;
+#endif
+  bool enable_adaptive_quant_ = true;
+  // JPEG decoder and its parameters
+  std::string jpeg_decoder_ = "libjpeg";
+  int num_colors_ = 0;
+#if JPEGXL_ENABLE_JPEGLI
+  size_t bitdepth_ = 8;
+#endif
+};
+
+ImageCodec* CreateNewJPEGCodec(const BenchmarkArgs& args) {
+  return new JPEGCodec(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.h
new file mode 100644
index 0000000000..d9f0c35d21
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jpeg.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_JPEG_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_JPEG_H_
+
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+ImageCodec* CreateNewJPEGCodec(const BenchmarkArgs& args);
+
+// Registers the jpeg-specific command line options.
+Status AddCommandLineOptionsJPEGCodec(BenchmarkArgs* args);
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_JPEG_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.cc
new file mode 100644
index 0000000000..554115af15
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.cc
@@ -0,0 +1,357 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "tools/benchmark/benchmark_codec_jxl.h"
+
+#include <jxl/stats.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/enc/jxl.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "tools/benchmark/benchmark_file_io.h"
+#include "tools/benchmark/benchmark_stats.h"
+#include "tools/cmdline.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::Image3F;
+using ::jxl::extras::EncodedImage;
+using ::jxl::extras::Encoder;
+using ::jxl::extras::JXLCompressParams;
+using ::jxl::extras::JXLDecompressParams;
+using ::jxl::extras::PackedFrame;
+using ::jxl::extras::PackedPixelFile;
+
+struct JxlArgs {
+  bool qprogressive;  // progressive with shift-quantization.
+  bool progressive;
+  int progressive_dc;
+
+  Override noise;
+  Override dots;
+  Override patches;
+
+  std::string debug_image_dir;
+};
+
+static JxlArgs* const jxlargs = new JxlArgs;
+
+Status AddCommandLineOptionsJxlCodec(BenchmarkArgs* args) {
+  args->AddFlag(&jxlargs->qprogressive, "qprogressive",
+                "Enable quantized progressive mode for AC.", false);
+  args->AddFlag(&jxlargs->progressive, "progressive",
+                "Enable progressive mode for AC.", false);
+  args->AddSigned(&jxlargs->progressive_dc, "progressive_dc",
+                  "Enable progressive mode for DC.", -1);
+
+  args->AddOverride(&jxlargs->noise, "noise",
+                    "Enable(1)/disable(0) noise generation.");
+  args->AddOverride(&jxlargs->dots, "dots",
+                    "Enable(1)/disable(0) dots generation.");
+  args->AddOverride(&jxlargs->patches, "patches",
+                    "Enable(1)/disable(0) patch dictionary.");
+
+  args->AddString(
+      &jxlargs->debug_image_dir, "debug_image_dir",
+      "If not empty, saves debug images for each "
+      "input image and each codec that provides it to this directory.");
+
+  return true;
+}
+
+Status ValidateArgsJxlCodec(BenchmarkArgs* args) { return true; }
+
+inline bool ParseEffort(const std::string& s, int* out) {
+  if (s == "lightning") {
+    *out = 1;
+    return true;
+  } else if (s == "thunder") {
+    *out = 2;
+    return true;
+  } else if (s == "falcon") {
+    *out = 3;
+    return true;
+  } else if (s == "cheetah") {
+    *out = 4;
+    return true;
+  } else if (s == "hare") {
+    *out = 5;
+    return true;
+  } else if (s == "fast" || s == "wombat") {
+    *out = 6;
+    return true;
+  } else if (s == "squirrel") {
+    *out = 7;
+    return true;
+  } else if (s == "kitten") {
+    *out = 8;
+    return true;
+  } else if (s == "guetzli" || s == "tortoise") {
+    *out = 9;
+    return true;
+  } else if (s == "glacier") {
+    *out = 10;
+    return true;
+  }
+  size_t st = static_cast<size_t>(strtoull(s.c_str(), nullptr, 0));
+  if (st <= 10 && st >= 1) {
+    *out = st;
+    return true;
+  }
+  return false;
+}
+
+class JxlCodec : public ImageCodec {
+ public:
+  explicit JxlCodec(const BenchmarkArgs& args)
+      : ImageCodec(args), stats_(nullptr, JxlEncoderStatsDestroy) {}
+
+  Status ParseParam(const std::string& param) override {
+    const std::string kMaxPassesPrefix = "max_passes=";
+    const std::string kDownsamplingPrefix = "downsampling=";
+    const std::string kResamplingPrefix = "resampling=";
+    const std::string kEcResamplingPrefix = "ec_resampling=";
+    int val;
+    float fval;
+    if (param.substr(0, kResamplingPrefix.size()) == kResamplingPrefix) {
+      std::istringstream parser(param.substr(kResamplingPrefix.size()));
+      int resampling;
+      parser >> resampling;
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, resampling);
+    } else if (param.substr(0, kEcResamplingPrefix.size()) ==
+               kEcResamplingPrefix) {
+      std::istringstream parser(param.substr(kEcResamplingPrefix.size()));
+      int ec_resampling;
+      parser >> ec_resampling;
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING,
+                         ec_resampling);
+    } else if (ImageCodec::ParseParam(param)) {
+      // Nothing to do.
+    } else if (param == "uint8") {
+      uint8_ = true;
+    } else if (param[0] == 'D') {
+      cparams_.alpha_distance = strtof(param.substr(1).c_str(), nullptr);
+    } else if (param.substr(0, kMaxPassesPrefix.size()) == kMaxPassesPrefix) {
+      std::istringstream parser(param.substr(kMaxPassesPrefix.size()));
+      parser >> dparams_.max_passes;
+    } else if (param.substr(0, kDownsamplingPrefix.size()) ==
+               kDownsamplingPrefix) {
+      std::istringstream parser(param.substr(kDownsamplingPrefix.size()));
+      parser >> dparams_.max_downsampling;
+    } else if (ParseEffort(param, &val)) {
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, val);
+    } else if (param[0] == 'X') {
+      fval = strtof(param.substr(1).c_str(), nullptr);
+      cparams_.AddFloatOption(
+          JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT, fval);
+    } else if (param[0] == 'Y') {
+      fval = strtof(param.substr(1).c_str(), nullptr);
+      cparams_.AddFloatOption(
+          JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, fval);
+    } else if (param[0] == 'p') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_PALETTE_COLORS, val);
+    } else if (param == "lp") {
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_LOSSY_PALETTE, 1);
+    } else if (param[0] == 'C') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, val);
+    } else if (param[0] == 'c') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, val);
+      has_ctransform_ = true;
+    } else if (param[0] == 'I') {
+      fval = strtof(param.substr(1).c_str(), nullptr);
+      cparams_.AddFloatOption(
+          JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, fval * 100.0);
+    } else if (param[0] == 'E') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, val);
+    } else if (param[0] == 'P') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, val);
+    } else if (param == "slow") {
+      cparams_.AddFloatOption(
+          JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, 50.0);
+    } else if (param == "R") {
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
+    } else if (param[0] == 'R') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, val);
+    } else if (param == "m") {
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR, 1);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, 1);  // kNone
+      modular_mode_ = true;
+    } else if (param.substr(0, 3) == "gab") {
+      val = strtol(param.substr(3).c_str(), nullptr, 10);
+      if (val != 0 && val != 1) {
+        return JXL_FAILURE("Invalid gab value");
+      }
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, val);
+    } else if (param[0] == 'g') {
+      val = strtol(param.substr(1).c_str(), nullptr, 10);
+      if (val < 0 || val > 3) {
+        return JXL_FAILURE("Invalid group size shift value");
+      }
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, val);
+    } else if (param == "plt") {
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, 0);
+      cparams_.AddFloatOption(
+          JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, 0.0f);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 0);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 0);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, 0);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT,
+                         0);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, 0);
+    } else if (param.substr(0, 3) == "epf") {
+      val = strtol(param.substr(3).c_str(), nullptr, 10);
+      if (val > 3) {
+        return JXL_FAILURE("Invalid epf value");
+      }
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_EPF, val);
+    } else if (param.substr(0, 16) == "faster_decoding=") {
+      val = strtol(param.substr(16).c_str(), nullptr, 10);
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_DECODING_SPEED, val);
+    } else {
+      return JXL_FAILURE("Unrecognized param");
+    }
+    return true;
+  }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    PackedPixelFile ppf;
+    JxlPixelFormat format{0, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+    JXL_RETURN_IF_ERROR(ConvertCodecInOutToPackedPixelFile(
+        *io, format, io->Main().c_current(), pool, &ppf));
+    cparams_.runner = pool->runner();
+    cparams_.runner_opaque = pool->runner_opaque();
+    cparams_.distance = butteraugli_target_;
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_NOISE, (int)jxlargs->noise);
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_DOTS, (int)jxlargs->dots);
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_PATCHES, (int)jxlargs->patches);
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC,
+                       jxlargs->progressive);
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC,
+                       jxlargs->qprogressive);
+    cparams_.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC,
+                       jxlargs->progressive_dc);
+    if (butteraugli_target_ > 0.f && modular_mode_ && !has_ctransform_) {
+      // Reset color transform to default XYB for lossy modular.
+      cparams_.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, -1);
+    }
+    std::string debug_prefix;
+    SetDebugImageCallback(filename, &debug_prefix, &cparams_);
+    if (args_.print_more_stats) {
+      stats_.reset(JxlEncoderStatsCreate());
+      cparams_.stats = stats_.get();
+    }
+    const double start = jxl::Now();
+    JXL_RETURN_IF_ERROR(jxl::extras::EncodeImageJXL(
+        cparams_, ppf, /*jpeg_bytes=*/nullptr, compressed));
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    return true;
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    dparams_.runner = pool->runner();
+    dparams_.runner_opaque = pool->runner_opaque();
+    JxlDataType data_type = uint8_ ? JXL_TYPE_UINT8 : JXL_TYPE_FLOAT;
+    dparams_.accepted_formats = {{3, data_type, JXL_NATIVE_ENDIAN, 0},
+                                 {4, data_type, JXL_NATIVE_ENDIAN, 0}};
+    // By default, the decoder will undo exif orientation, giving an image
+    // with identity exif rotation as result. However, the benchmark does
+    // not undo exif orientation of the originals, and compares against the
+    // originals, so we must set the option to keep the original orientation
+    // instead.
+    dparams_.keep_orientation = true;
+    PackedPixelFile ppf;
+    size_t decoded_bytes;
+    const double start = jxl::Now();
+    JXL_RETURN_IF_ERROR(jxl::extras::DecodeImageJXL(
+        compressed.data(), compressed.size(), dparams_, &decoded_bytes, &ppf));
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    JXL_RETURN_IF_ERROR(ConvertPackedPixelFileToCodecInOut(ppf, pool, io));
+    return true;
+  }
+
+  void GetMoreStats(BenchmarkStats* stats) override {
+    stats->jxl_stats.num_inputs += 1;
+    JxlEncoderStatsMerge(stats->jxl_stats.stats.get(), stats_.get());
+  }
+
+ protected:
+  JXLCompressParams cparams_;
+  bool has_ctransform_ = false;
+  bool modular_mode_ = false;
+  JXLDecompressParams dparams_;
+  bool uint8_ = false;
+  std::unique_ptr<JxlEncoderStats, decltype(JxlEncoderStatsDestroy)*> stats_;
+
+ private:
+  void SetDebugImageCallback(const std::string& filename,
+                             std::string* debug_prefix,
+                             JXLCompressParams* cparams) {
+    if (jxlargs->debug_image_dir.empty()) return;
+    *debug_prefix = JoinPath(jxlargs->debug_image_dir, FileBaseName(filename)) +
+                    ".jxl:" + params_ + ".dbg/";
+    JXL_CHECK(MakeDir(*debug_prefix));
+    cparams->debug_image_opaque = debug_prefix;
+    cparams->debug_image = [](void* opaque, const char* label, size_t xsize,
+                              size_t ysize, const JxlColorEncoding* color,
+                              const uint16_t* pixels) {
+      auto encoder = jxl::extras::GetAPNGEncoder();
+      JXL_CHECK(encoder);
+      PackedPixelFile debug_ppf;
+      JxlPixelFormat format{3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+      PackedFrame frame(xsize, ysize, format);
+      memcpy(frame.color.pixels(), pixels, 6 * xsize * ysize);
+      debug_ppf.frames.emplace_back(std::move(frame));
+      debug_ppf.info.xsize = xsize;
+      debug_ppf.info.ysize = ysize;
+      debug_ppf.info.num_color_channels = 3;
+      debug_ppf.info.bits_per_sample = 16;
+      debug_ppf.color_encoding = *color;
+      EncodedImage encoded;
+      JXL_CHECK(encoder->Encode(debug_ppf, &encoded));
+      JXL_CHECK(!encoded.bitstreams.empty());
+      std::string* debug_prefix = reinterpret_cast<std::string*>(opaque);
+      std::string fn = *debug_prefix + std::string(label) + ".png";
+      WriteFile(fn, encoded.bitstreams[0]);
+    };
+  }
+};
+
+ImageCodec* CreateNewJxlCodec(const BenchmarkArgs& args) {
+  return new JxlCodec(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.h
new file mode 100644
index 0000000000..967be26b55
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_jxl.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_JXL_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_JXL_H_
+
+#include <string>
+
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+ImageCodec* CreateNewJxlCodec(const BenchmarkArgs& args);
+
+// Registers the jxl-specific command line options.
+Status AddCommandLineOptionsJxlCodec(BenchmarkArgs* args);
+Status ValidateArgsJxlCodec(BenchmarkArgs* args);
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_JXL_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.cc
new file mode 100644
index 0000000000..4faf34db7f
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.cc
@@ -0,0 +1,84 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/benchmark/benchmark_codec_png.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/apng.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/packed_image.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_metadata.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+
+struct PNGArgs {
+  // Empty, no PNG-specific args currently.
+};
+
+static PNGArgs* const pngargs = new PNGArgs;
+
+Status AddCommandLineOptionsPNGCodec(BenchmarkArgs* args) { return true; }
+
+// Lossless.
+class PNGCodec : public ImageCodec {
+ public:
+  explicit PNGCodec(const BenchmarkArgs& args) : ImageCodec(args) {}
+
+  Status ParseParam(const std::string& param) override { return true; }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    const size_t bits = io->metadata.m.bit_depth.bits_per_sample;
+    const double start = jxl::Now();
+    JXL_RETURN_IF_ERROR(jxl::Encode(*io, jxl::extras::Codec::kPNG,
+                                    io->Main().c_current(), bits, compressed,
+                                    pool));
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    return true;
+  }
+
+  Status Decompress(const std::string& /*filename*/,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    jxl::extras::PackedPixelFile ppf;
+    const double start = jxl::Now();
+    JXL_RETURN_IF_ERROR(jxl::extras::DecodeImageAPNG(
+        compressed, jxl::extras::ColorHints(), &ppf));
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    JXL_RETURN_IF_ERROR(
+        jxl::extras::ConvertPackedPixelFileToCodecInOut(ppf, pool, io));
+    return true;
+  }
+};
+
+ImageCodec* CreateNewPNGCodec(const BenchmarkArgs& args) {
+  if (jxl::extras::GetAPNGEncoder() &&
+      jxl::extras::CanDecode(jxl::extras::Codec::kPNG)) {
+    return new PNGCodec(args);
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.h
new file mode 100644
index 0000000000..8f2958390d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_png.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_PNG_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_PNG_H_
+
+#include <string>
+
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+ImageCodec* CreateNewPNGCodec(const BenchmarkArgs& args);
+
+// Registers the png-specific command line options.
+Status AddCommandLineOptionsPNGCodec(BenchmarkArgs* args);
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_PNG_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.cc
new file mode 100644
index 0000000000..6ea8867e06
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.cc
@@ -0,0 +1,284 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "tools/benchmark/benchmark_codec_webp.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <webp/decode.h>
+#include <webp/encode.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/sanitizers.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::ImageBundle;
+using ::jxl::ImageMetadata;
+using ::jxl::ThreadPool;
+
+// Sets image data from 8-bit sRGB pixel array in bytes.
+// Amount of input bytes per pixel must be:
+// (is_gray ? 1 : 3) + (has_alpha ? 1 : 0)
+Status FromSRGB(const size_t xsize, const size_t ysize, const bool is_gray,
+                const bool has_alpha, const bool is_16bit,
+                const JxlEndianness endianness, const uint8_t* pixels,
+                const uint8_t* end, ThreadPool* pool, ImageBundle* ib) {
+  const ColorEncoding& c = ColorEncoding::SRGB(is_gray);
+  const size_t bits_per_sample = (is_16bit ? 2 : 1) * jxl::kBitsPerByte;
+  const uint32_t num_channels = (is_gray ? 1 : 3) + (has_alpha ? 1 : 0);
+  JxlDataType data_type = is_16bit ? JXL_TYPE_UINT16 : JXL_TYPE_UINT8;
+  JxlPixelFormat format = {num_channels, data_type, endianness, 0};
+  const Span<const uint8_t> span(pixels, end - pixels);
+  return ConvertFromExternal(span, xsize, ysize, c, bits_per_sample, format,
+                             pool, ib);
+}
+
+struct WebPArgs {
+  // Empty, no WebP-specific args currently.
+};
+
+static WebPArgs* const webpargs = new WebPArgs;
+
+Status AddCommandLineOptionsWebPCodec(BenchmarkArgs* args) { return true; }
+
+class WebPCodec : public ImageCodec {
+ public:
+  explicit WebPCodec(const BenchmarkArgs& args) : ImageCodec(args) {}
+
+  Status ParseParam(const std::string& param) override {
+    // Ensure that the 'q' parameter is not used up by ImageCodec.
+    if (param[0] == 'q') {
+      if (near_lossless_) {
+        near_lossless_quality_ = ParseIntParam(param, 0, 99);
+      } else {
+        quality_ = ParseIntParam(param, 1, 100);
+      }
+      return true;
+    } else if (ImageCodec::ParseParam(param)) {
+      return true;
+    } else if (param == "ll") {
+      lossless_ = true;
+      JXL_CHECK(!near_lossless_);
+      return true;
+    } else if (param == "nl") {
+      near_lossless_ = true;
+      JXL_CHECK(!lossless_);
+      return true;
+    } else if (param[0] == 'm') {
+      method_ = ParseIntParam(param, 1, 6);
+      return true;
+    }
+    return false;
+  }
+
+  Status Compress(const std::string& filename, const CodecInOut* io,
+                  ThreadPool* pool, std::vector<uint8_t>* compressed,
+                  jpegxl::tools::SpeedStats* speed_stats) override {
+    const double start = jxl::Now();
+    const ImageBundle& ib = io->Main();
+
+    if (ib.HasAlpha() && ib.metadata()->GetAlphaBits() > 8) {
+      return JXL_FAILURE("WebP alpha must be 8-bit");
+    }
+
+    size_t num_chans = (ib.HasAlpha() ? 4 : 3);
+    ImageMetadata metadata = io->metadata.m;
+    ImageBundle store(&metadata);
+    const ImageBundle* transformed;
+    const ColorEncoding& c_desired = ColorEncoding::SRGB(false);
+    JXL_RETURN_IF_ERROR(jxl::TransformIfNeeded(ib, c_desired, jxl::GetJxlCms(),
+                                               pool, &store, &transformed));
+    size_t xsize = ib.oriented_xsize();
+    size_t ysize = ib.oriented_ysize();
+    size_t stride = xsize * num_chans;
+    std::vector<uint8_t> srgb(stride * ysize);
+    JXL_RETURN_IF_ERROR(ConvertToExternal(
+        *transformed, 8, /*float_out=*/false, num_chans, JXL_BIG_ENDIAN, stride,
+        pool, srgb.data(), srgb.size(),
+        /*out_callback=*/{}, metadata.GetOrientation()));
+
+    if (lossless_ || near_lossless_) {
+      // The lossless codec does not support 16-bit channels.
+      // Color models are currently not supported here and the sRGB 8-bit
+      // conversion causes loss due to clipping.
+      if (!ib.IsSRGB() || ib.metadata()->bit_depth.bits_per_sample > 8 ||
+          ib.metadata()->bit_depth.exponent_bits_per_sample > 0) {
+        return JXL_FAILURE("%s: webp:ll/nl requires 8-bit sRGB",
+                           filename.c_str());
+      }
+      JXL_RETURN_IF_ERROR(
+          CompressInternal(srgb, xsize, ysize, num_chans, 100, compressed));
+    } else if (bitrate_target_ > 0.0) {
+      int quality_bad = 100;
+      int quality_good = 92;
+      size_t target_size = xsize * ysize * bitrate_target_ / 8.0;
+      while (quality_good > 0 &&
+             CompressInternal(srgb, xsize, ysize, num_chans, quality_good,
+                              compressed) &&
+             compressed->size() > target_size) {
+        quality_bad = quality_good;
+        quality_good -= 8;
+      }
+      if (quality_good <= 0) quality_good = 1;
+      while (quality_good + 1 < quality_bad) {
+        int quality = (quality_bad + quality_good) / 2;
+        if (!CompressInternal(srgb, xsize, ysize, num_chans, quality,
+                              compressed)) {
+          break;
+        }
+        if (compressed->size() <= target_size) {
+          quality_good = quality;
+        } else {
+          quality_bad = quality;
+        }
+      }
+      JXL_RETURN_IF_ERROR(CompressInternal(srgb, xsize, ysize, num_chans,
+                                           quality_good, compressed));
+    } else if (quality_ > 0) {
+      JXL_RETURN_IF_ERROR(CompressInternal(srgb, xsize, ysize, num_chans,
+                                           quality_, compressed));
+    } else {
+      return false;
+    }
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    return true;
+  }
+
+  Status Decompress(const std::string& filename,
+                    const Span<const uint8_t> compressed, ThreadPool* pool,
+                    CodecInOut* io,
+                    jpegxl::tools::SpeedStats* speed_stats) override {
+    WebPDecoderConfig config;
+#ifdef MEMORY_SANITIZER
+    // config is initialized by libwebp, which we are not instrumenting with
+    // msan, therefore we need to initialize it here.
+    memset(&config, 0, sizeof(config));
+#endif
+    JXL_RETURN_IF_ERROR(WebPInitDecoderConfig(&config) == 1);
+    config.options.use_threads = 0;
+    config.options.dithering_strength = 0;
+    config.options.bypass_filtering = 0;
+    config.options.no_fancy_upsampling = 0;
+    WebPDecBuffer* const buf = &config.output;
+    buf->colorspace = MODE_RGBA;
+    const uint8_t* webp_data = compressed.data();
+    const int webp_size = compressed.size();
+    const double start = jxl::Now();
+    if (WebPDecode(webp_data, webp_size, &config) != VP8_STATUS_OK) {
+      return JXL_FAILURE("WebPDecode failed");
+    }
+    const double end = jxl::Now();
+    speed_stats->NotifyElapsed(end - start);
+    JXL_CHECK(buf->u.RGBA.stride == buf->width * 4);
+
+    const bool is_gray = false;
+    const bool has_alpha = true;
+    const uint8_t* data_begin = &buf->u.RGBA.rgba[0];
+    const uint8_t* data_end = data_begin + buf->width * buf->height * 4;
+    // The image data is initialized by libwebp, which we are not instrumenting
+    // with msan.
+    jxl::msan::UnpoisonMemory(data_begin, data_end - data_begin);
+    if (io->metadata.m.color_encoding.IsGray() != is_gray) {
+      // TODO(lode): either ensure is_gray matches what the color profile says,
+      // or set a correct color profile, e.g.
+      // io->metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+      // Return a standard failure because SetFromSRGB triggers a fatal assert
+      // for this instead.
+      return JXL_FAILURE("Color profile is-gray mismatch");
+    }
+    io->metadata.m.SetAlphaBits(8);
+    const Status ok = FromSRGB(buf->width, buf->height, is_gray, has_alpha,
+                               /*is_16bit=*/false, JXL_LITTLE_ENDIAN,
+                               data_begin, data_end, pool, &io->Main());
+    WebPFreeDecBuffer(buf);
+    JXL_RETURN_IF_ERROR(ok);
+    return true;
+  }
+
+ private:
+  static int WebPStringWrite(const uint8_t* data, size_t data_size,
+                             const WebPPicture* const picture) {
+    if (data_size) {
+      std::vector<uint8_t>* const out =
+          static_cast<std::vector<uint8_t>*>(picture->custom_ptr);
+      const size_t pos = out->size();
+      out->resize(pos + data_size);
+      memcpy(out->data() + pos, data, data_size);
+    }
+    return 1;
+  }
+  Status CompressInternal(const std::vector<uint8_t>& srgb, size_t xsize,
+                          size_t ysize, size_t num_chans, int quality,
+                          std::vector<uint8_t>* compressed) {
+    compressed->clear();
+    WebPConfig config;
+    WebPConfigInit(&config);
+    JXL_ASSERT(!lossless_ || !near_lossless_);  // can't have both
+    config.lossless = lossless_;
+    config.quality = quality;
+    config.method = method_;
+#if WEBP_ENCODER_ABI_VERSION >= 0x020a
+    config.near_lossless = near_lossless_ ? near_lossless_quality_ : 100;
+#else
+    if (near_lossless_) {
+      JXL_WARNING("Near lossless not supported by this WebP version");
+    }
+#endif
+    JXL_CHECK(WebPValidateConfig(&config));
+
+    WebPPicture pic;
+    WebPPictureInit(&pic);
+    pic.width = static_cast<int>(xsize);
+    pic.height = static_cast<int>(ysize);
+    pic.writer = &WebPStringWrite;
+    if (lossless_ || near_lossless_) pic.use_argb = 1;
+    pic.custom_ptr = compressed;
+
+    if (num_chans == 3) {
+      WebPPictureImportRGB(&pic, srgb.data(), 3 * xsize);
+    } else {
+      WebPPictureImportRGBA(&pic, srgb.data(), 4 * xsize);
+    }
+
+    // WebP encoding may fail, for example, if the image is more than 16384
+    // pixels high or wide.
+    bool ok = WebPEncode(&config, &pic);
+    WebPPictureFree(&pic);
+    // Compressed image data is initialized by libwebp, which we are not
+    // instrumenting with msan.
+    jxl::msan::UnpoisonMemory(compressed->data(), compressed->size());
+    return ok;
+  }
+
+  int quality_ = 90;
+  bool lossless_ = false;
+  bool near_lossless_ = false;
+  bool near_lossless_quality_ = 40;  // only used if near_lossless_
+  int method_ = 6;                   // smallest, some speed cost
+};
+
+ImageCodec* CreateNewWebPCodec(const BenchmarkArgs& args) {
+  return new WebPCodec(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.h
new file mode 100644
index 0000000000..37d3c584e8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_codec_webp.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#ifndef TOOLS_BENCHMARK_BENCHMARK_CODEC_WEBP_H_
+#define TOOLS_BENCHMARK_BENCHMARK_CODEC_WEBP_H_
+
+// To support webp, install libwebp-dev and rerun cmake.
+
+#include <string>
+
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+
+namespace jpegxl {
+namespace tools {
+ImageCodec* CreateNewWebPCodec(const BenchmarkArgs& args);
+
+// Registers the webp-specific command line options.
+Status AddCommandLineOptionsWebPCodec(BenchmarkArgs* args);
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_CODEC_WEBP_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.cc
new file mode 100644
index 0000000000..b8acbfba00
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.cc
@@ -0,0 +1,234 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "tools/benchmark/benchmark_file_io.h"
+
+#include <errno.h>
+#include <sys/stat.h>
+
+#include <cstdio>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include "third_party/dirent.h"
+#else
+#include <dirent.h>
+#include <unistd.h>
+#endif
+
+#ifndef HAS_GLOB
+#define HAS_GLOB 0
+#if defined __has_include
+// <glob.h> is included in previous APIs but glob() function is not defined
+// until API 28.
+#if __has_include(<glob.h>) && \
+    (!defined(__ANDROID_API__) || __ANDROID_API__ >= 28)
+#undef HAS_GLOB
+#define HAS_GLOB 1
+#endif  // __has_include(<glob.h>)
+#endif  // __has_include
+#endif  // HAS_GLOB
+
+#if HAS_GLOB
+#include <glob.h>
+#endif  // HAS_GLOB
+
+// There is no "user" in embedded filesystems.
+#ifndef GLOB_TILDE
+#define GLOB_TILDE 0
+#endif
+
+namespace jpegxl {
+namespace tools {
+
+const char kPathSeparator = '/';
+
+// RAII, ensures dir is closed even when returning early.
+class DirWrapper {
+ public:
+  DirWrapper(const DirWrapper& other) = delete;
+  DirWrapper& operator=(const DirWrapper& other) = delete;
+
+  explicit DirWrapper(const std::string& pathname)
+      : dir_(opendir(pathname.c_str())) {}
+
+  ~DirWrapper() {
+    if (dir_ != nullptr) {
+      const int err = closedir(dir_);
+      JXL_CHECK(err == 0);
+    }
+  }
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  operator DIR*() const { return dir_; }
+
+ private:
+  DIR* const dir_;
+};
+
+// Checks if the file exists, either as file or as directory
+bool PathExists(const std::string& fname) {
+  struct stat s;
+  if (stat(fname.c_str(), &s) != 0) return false;
+  return true;
+}
+
+// Checks if the file exists and is a regular file.
+bool IsRegularFile(const std::string& fname) {
+  struct stat s;
+  if (stat(fname.c_str(), &s) != 0) return false;
+  return S_ISREG(s.st_mode);
+}
+
+// Checks if the file exists and is a directory.
+bool IsDirectory(const std::string& fname) {
+  struct stat s;
+  if (stat(fname.c_str(), &s) != 0) return false;
+  return S_ISDIR(s.st_mode);
+}
+
+// Recursively makes dir, or successfully does nothing if it already exists.
+Status MakeDir(const std::string& dirname) {
+  size_t pos = 0;
+  for (pos = dirname.size(); pos > 0; pos--) {
+    if (pos == dirname.size() || dirname[pos] == kPathSeparator) {
+      // Found existing dir or regular file, break and then start creating
+      // from here (in the latter case we'll get error below).
+      if (PathExists(dirname.substr(0, pos + 1))) {
+        pos += 1;  // Skip past this existing path
+        break;
+      }
+    }
+  }
+  for (; pos <= dirname.size(); pos++) {
+    if (pos == dirname.size() || dirname[pos] == kPathSeparator) {
+      std::string subdir = dirname.substr(0, pos + 1);
+      if (mkdir(subdir.c_str(), 0777) && errno != EEXIST) {
+        return JXL_FAILURE("Failed to create directory");
+      }
+    }
+  }
+  if (!IsDirectory(dirname)) return JXL_FAILURE("Failed to create directory");
+  return true;  // success
+}
+
+Status DeleteFile(const std::string& fname) {
+  if (!IsRegularFile(fname)) {
+    return JXL_FAILURE("Trying to delete non-regular file");
+  }
+  if (std::remove(fname.c_str())) return JXL_FAILURE("Failed to delete file");
+  return true;
+}
+
+std::string FileBaseName(const std::string& fname) {
+  size_t pos = fname.rfind('/');
+  if (pos == std::string::npos) return fname;
+  return fname.substr(pos + 1);
+}
+
+std::string FileDirName(const std::string& fname) {
+  size_t pos = fname.rfind('/');
+  if (pos == std::string::npos) return "";
+  return fname.substr(0, pos);
+}
+
+std::string FileExtension(const std::string& fname) {
+  size_t pos = fname.rfind('.');
+  if (pos == std::string::npos) return "";
+  return fname.substr(pos);
+}
+
+std::string JoinPath(const std::string& first, const std::string& second) {
+  JXL_CHECK(second.empty() || second[0] != kPathSeparator);
+  return (!first.empty() && first.back() == kPathSeparator)
+             ? (first + second)
+             : (first + kPathSeparator + second);
+}
+
+// Can match a single file, or multiple files in a directory (non-recursive).
+// With POSIX, supports glob(), otherwise supports a subset.
+Status MatchFiles(const std::string& pattern, std::vector<std::string>* list) {
+#if HAS_GLOB
+  glob_t g;
+  memset(&g, 0, sizeof(g));
+  int error = glob(pattern.c_str(), GLOB_TILDE, NULL, &g);
+  if (!error) {
+    for (size_t i = 0; i < g.gl_pathc; ++i) {
+      list->push_back(g.gl_pathv[i]);
+    }
+  }
+  globfree(&g);
+  if (error) return JXL_FAILURE("glob failed for %s", pattern.c_str());
+  return true;
+#else
+  std::string dirname = FileDirName(pattern);
+  std::string basename = FileBaseName(pattern);
+  size_t pos0 = basename.find('*');
+  size_t pos1 = pos0 == std::string::npos ? pos0 : basename.find('*', pos0 + 1);
+  std::string prefix, middle, suffix;
+  if (pos0 != std::string::npos) {
+    prefix = basename.substr(0, pos0);
+    if (pos1 != std::string::npos) {
+      middle = basename.substr(pos0 + 1, pos1 - pos0 - 1);
+      suffix = basename.substr(pos1 + 1);
+    } else {
+      suffix = basename.substr(pos0 + 1);
+    }
+  }
+
+  if (prefix.find_first_of("*?[") != std::string::npos ||
+      middle.find_first_of("*?[") != std::string::npos ||
+      suffix.find_first_of("*?[") != std::string::npos ||
+      dirname.find_first_of("*?[") != std::string::npos) {
+    return JXL_FAILURE(
+        "Only glob patterns with max two '*' in the basename"
+        " are supported, e.g. directory/path/*.png or"
+        " /directory/path/*heatmap*");
+  }
+
+  if (pos0 != std::string::npos) {
+    DirWrapper dir(dirname);
+    if (!dir) return JXL_FAILURE("directory %s doesn't exist", dirname.c_str());
+    for (;;) {
+      dirent* ent = readdir(dir);
+      if (!ent) break;
+      std::string name = ent->d_name;
+      // If there was a suffix, only add if it matches (e.g. ".png")
+      bool matches =
+          name.size() >= (prefix.size() + middle.size() + suffix.size());
+      if (matches) {
+        if (!prefix.empty() && name.substr(0, prefix.size()) != prefix) {
+          matches = false;
+        }
+        if (!middle.empty()) {
+          size_t pos = name.find(middle, prefix.size());
+          if (pos == std::string::npos ||
+              pos + middle.size() > name.size() - suffix.size()) {
+            matches = false;
+          }
+        }
+        if (!suffix.empty() &&
+            name.substr(name.size() - suffix.size()) != suffix) {
+          matches = false;
+        }
+      }
+      if (matches) {
+        std::string path = JoinPath(dirname, name);
+
+        if (IsRegularFile(path)) {
+          list->push_back(path);
+        }
+      }
+    }
+    return true;
+  }
+  // No *, so a single regular file is intended
+  if (IsRegularFile(pattern)) {
+    list->push_back(pattern);
+  }
+  return true;
+#endif  // HAS_GLOB
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.h
new file mode 100644
index 0000000000..3c68acc54c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_file_io.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// File utilities for benchmarking and testing, but which are not needed for
+// main jxl itself.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_FILE_IO_H_
+#define TOOLS_BENCHMARK_BENCHMARK_FILE_IO_H_
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "tools/file_io.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::Status;
+
+// Checks if the file exists, either as file or as directory
+bool PathExists(const std::string& fname);
+
+// Checks if the file exists and is a regular file.
+bool IsRegularFile(const std::string& fname);
+
+// Checks if the file exists and is a directory.
+bool IsDirectory(const std::string& fname);
+
+// Recursively makes dir, or successfully does nothing if it already exists.
+Status MakeDir(const std::string& dirname);
+
+// Deletes a single regular file.
+Status DeleteFile(const std::string& fname);
+
+// Returns value similar to unix basename, except it returns empty string if
+// fname ends in '/'.
+std::string FileBaseName(const std::string& fname);
+// Returns value similar to unix dirname, except returns up to before the last
+// slash if fname ends in '/'.
+std::string FileDirName(const std::string& fname);
+
+// Returns the part of the filename starting from the last dot, or empty
+// string if there is no dot.
+std::string FileExtension(const std::string& fname);
+
+// Matches one or more files given glob pattern.
+Status MatchFiles(const std::string& pattern, std::vector<std::string>* list);
+
+std::string JoinPath(const std::string& first, const std::string& second);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_FILE_IO_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.cc
new file mode 100644
index 0000000000..87b998562d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.cc
@@ -0,0 +1,383 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/benchmark/benchmark_stats.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "tools/benchmark/benchmark_args.h"
+
+namespace jpegxl {
+namespace tools {
+
+#define ADD_NAME(val, name) \
+  case JXL_ENC_STAT_##val:  \
+    return name
+const char* JxlStatsName(JxlEncoderStatsKey key) {
+  switch (key) {
+    ADD_NAME(HEADER_BITS, "Header bits");
+    ADD_NAME(TOC_BITS, "TOC bits");
+    ADD_NAME(DICTIONARY_BITS, "Patch dictionary bits");
+    ADD_NAME(SPLINES_BITS, "Splines bits");
+    ADD_NAME(NOISE_BITS, "Noise bits");
+    ADD_NAME(QUANT_BITS, "Quantizer bits");
+    ADD_NAME(MODULAR_TREE_BITS, "Modular tree bits");
+    ADD_NAME(MODULAR_GLOBAL_BITS, "Modular global bits");
+    ADD_NAME(DC_BITS, "DC bits");
+    ADD_NAME(MODULAR_DC_GROUP_BITS, "Modular DC group bits");
+    ADD_NAME(CONTROL_FIELDS_BITS, "Control field bits");
+    ADD_NAME(COEF_ORDER_BITS, "Coeff order bits");
+    ADD_NAME(AC_HISTOGRAM_BITS, "AC histogram bits");
+    ADD_NAME(AC_BITS, "AC token bits");
+    ADD_NAME(MODULAR_AC_GROUP_BITS, "Modular AC group bits");
+    ADD_NAME(NUM_SMALL_BLOCKS, "Number of small blocks");
+    ADD_NAME(NUM_DCT4X8_BLOCKS, "Number of 4x8 blocks");
+    ADD_NAME(NUM_AFV_BLOCKS, "Number of AFV blocks");
+    ADD_NAME(NUM_DCT8_BLOCKS, "Number of 8x8 blocks");
+    ADD_NAME(NUM_DCT8X32_BLOCKS, "Number of 8x32 blocks");
+    ADD_NAME(NUM_DCT16_BLOCKS, "Number of 16x16 blocks");
+    ADD_NAME(NUM_DCT16X32_BLOCKS, "Number of 16x32 blocks");
+    ADD_NAME(NUM_DCT32_BLOCKS, "Number of 32x32 blocks");
+    ADD_NAME(NUM_DCT32X64_BLOCKS, "Number of 32x64 blocks");
+    ADD_NAME(NUM_DCT64_BLOCKS, "Number of 64x64 blocks");
+    ADD_NAME(NUM_BUTTERAUGLI_ITERS, "Butteraugli iters");
+    default:
+      return "";
+  };
+  return "";
+}
+#undef ADD_NAME
+
+void JxlStats::Print() const {
+  for (int i = 0; i < JXL_ENC_NUM_STATS; ++i) {
+    JxlEncoderStatsKey key = static_cast<JxlEncoderStatsKey>(i);
+    size_t value = JxlEncoderStatsGet(stats.get(), key);
+    if (value) printf("%-25s  %10" PRIuS "\n", JxlStatsName(key), value);
+  }
+}
+
+namespace {
+
+// Computes longest codec name from Args()->codec, for table alignment.
+uint32_t ComputeLargestCodecName() {
+  std::vector<std::string> methods = SplitString(Args()->codec, ',');
+  size_t max = strlen("Aggregate:");  // Include final row's name
+  for (const auto& method : methods) {
+    max = std::max(max, method.size());
+  }
+  return max;
+}
+
+// The benchmark result is a table of heterogeneous data, the column type
+// specifies its data type. The type affects how it is printed as well as how
+// aggregate values are computed.
+enum ColumnType {
+  // Formatted string
+  TYPE_STRING,
+  // Positive size, prints 0 as "---"
+  TYPE_SIZE,
+  // Floating point value (double precision) which is interpreted as
+  // "not applicable" if <= 0, must be strictly positive to be valid but can be
+  // set to 0 or negative to be printed as "---", for example for a speed that
+  // is not measured.
+  TYPE_POSITIVE_FLOAT,
+  // Counts of some event
+  TYPE_COUNT,
+};
+
+struct ColumnDescriptor {
+  // Column name
+  std::string label;
+  // Total width to render the values of this column. If t his is a floating
+  // point value, make sure this is large enough to contain a space and the
+  // point, plus precision digits after the point, plus the max amount of
+  // integer digits you expect in front of the point.
+  uint32_t width;
+  // Amount of digits after the point, or 0 if not a floating point value.
+  uint32_t precision;
+  ColumnType type;
+  bool more;  // Whether to print only if more_columns is enabled
+};
+
+static ColumnDescriptor ExtraMetricDescriptor() {
+  ColumnDescriptor d{{"DO NOT USE"}, 12, 4, TYPE_POSITIVE_FLOAT, false};
+  return d;
+}
+
+// To add or change a column to the benchmark ASCII table output, add/change
+// an entry here with table header line 1, table header line 2, width of the
+// column, precision after the point in case of floating point, and the
+// data type. Then add/change the corresponding formula or formatting in
+// the function ComputeColumns.
+std::vector<ColumnDescriptor> GetColumnDescriptors(size_t num_extra_metrics) {
+  // clang-format off
+  std::vector<ColumnDescriptor> result = {
+      {{"Encoding"}, ComputeLargestCodecName() + 1, 0, TYPE_STRING, false},
+      {{"kPixels"},        10,  0, TYPE_SIZE, false},
+      {{"Bytes"},           9,  0, TYPE_SIZE, false},
+      {{"BPP"},            13,  7, TYPE_POSITIVE_FLOAT, false},
+      {{"E MP/s"},          8,  3, TYPE_POSITIVE_FLOAT, false},
+      {{"D MP/s"},          8,  3, TYPE_POSITIVE_FLOAT, false},
+      {{"Max norm"},       13,  8, TYPE_POSITIVE_FLOAT, false},
+      {{"SSIMULACRA2"},    13,  8, TYPE_POSITIVE_FLOAT, false},
+      {{"PSNR"},            7,  2, TYPE_POSITIVE_FLOAT, false},
+      {{"pnorm"},          13,  8, TYPE_POSITIVE_FLOAT, false},
+      {{"BPP*pnorm"},      16, 12, TYPE_POSITIVE_FLOAT, false},
+      {{"QABPP"},           8,  3, TYPE_POSITIVE_FLOAT, false},
+      {{"Bugs"},            7,  5, TYPE_COUNT, false},
+  };
+  // clang-format on
+
+  for (size_t i = 0; i < num_extra_metrics; i++) {
+    result.push_back(ExtraMetricDescriptor());
+  }
+
+  return result;
+}
+
+// Computes throughput [megapixels/s] as reported in the report table
+static double ComputeSpeed(size_t pixels, double time_s) {
+  if (time_s == 0.0) return 0;
+  return pixels * 1E-6 / time_s;
+}
+
+static std::string FormatFloat(const ColumnDescriptor& label, double value) {
+  std::string result =
+      StringPrintf("%*.*f", label.width - 1, label.precision, value);
+
+  // Reduce precision if the value is too wide for the column. However, keep
+  // at least one digit to the right of the point, and especially the integer
+  // digits.
+  if (result.size() >= label.width) {
+    size_t point = result.rfind('.');
+    if (point != std::string::npos) {
+      int end = std::max<int>(point + 2, label.width - 1);
+      result.resize(end);
+    }
+  }
+  return result;
+}
+
+}  // namespace
+
+std::string StringPrintf(const char* format, ...) {
+  char buf[2000];
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buf, sizeof(buf), format, args);
+  va_end(args);
+  return std::string(buf);
+}
+
+void BenchmarkStats::Assimilate(const BenchmarkStats& victim) {
+  total_input_files += victim.total_input_files;
+  total_input_pixels += victim.total_input_pixels;
+  total_compressed_size += victim.total_compressed_size;
+  total_adj_compressed_size += victim.total_adj_compressed_size;
+  total_time_encode += victim.total_time_encode;
+  total_time_decode += victim.total_time_decode;
+  max_distance += pow(victim.max_distance, 2.0) * victim.total_input_pixels;
+  distance_p_norm += victim.distance_p_norm;
+  ssimulacra2 += victim.ssimulacra2;
+  psnr += victim.psnr;
+  distances.insert(distances.end(), victim.distances.begin(),
+                   victim.distances.end());
+  total_errors += victim.total_errors;
+  jxl_stats.Assimilate(victim.jxl_stats);
+  if (extra_metrics.size() < victim.extra_metrics.size()) {
+    extra_metrics.resize(victim.extra_metrics.size());
+  }
+  for (size_t i = 0; i < victim.extra_metrics.size(); i++) {
+    extra_metrics[i] += victim.extra_metrics[i];
+  }
+}
+
+void BenchmarkStats::PrintMoreStats() const {
+  if (Args()->print_more_stats) {
+    jxl_stats.Print();
+  }
+  if (Args()->print_distance_percentiles) {
+    std::vector<float> sorted = distances;
+    std::sort(sorted.begin(), sorted.end());
+    int p50idx = 0.5 * distances.size();
+    int p90idx = 0.9 * distances.size();
+    printf("50th/90th percentile distance: %.8f  %.8f\n", sorted[p50idx],
+           sorted[p90idx]);
+  }
+}
+
+std::vector<ColumnValue> BenchmarkStats::ComputeColumns(
+    const std::string& codec_desc, size_t corpus_size) const {
+  JXL_CHECK(total_input_files == corpus_size);
+  const double comp_bpp = total_compressed_size * 8.0 / total_input_pixels;
+  const double adj_comp_bpp =
+      total_adj_compressed_size * 8.0 / total_input_pixels;
+  // Note: this is not affected by alpha nor bit depth.
+  const double compression_speed =
+      ComputeSpeed(total_input_pixels, total_time_encode);
+  const double decompression_speed =
+      ComputeSpeed(total_input_pixels, total_time_decode);
+  const double psnr_avg = psnr / total_input_pixels;
+  const double p_norm_avg = distance_p_norm / total_input_pixels;
+  const double ssimulacra2_avg = ssimulacra2 / total_input_pixels;
+  const double bpp_p_norm = p_norm_avg * comp_bpp;
+
+  const double max_distance_avg = sqrt(max_distance / total_input_pixels);
+
+  std::vector<ColumnValue> values(
+      GetColumnDescriptors(extra_metrics.size()).size());
+
+  values[0].s = codec_desc;
+  values[1].i = total_input_pixels / 1000;
+  values[2].i = total_compressed_size;
+  values[3].f = comp_bpp;
+  values[4].f = compression_speed;
+  values[5].f = decompression_speed;
+  values[6].f = static_cast<double>(max_distance_avg);
+  values[7].f = ssimulacra2_avg;
+  values[8].f = psnr_avg;
+  values[9].f = p_norm_avg;
+  values[10].f = bpp_p_norm;
+  values[11].f = adj_comp_bpp;
+  values[12].i = total_errors;
+  for (size_t i = 0; i < extra_metrics.size(); i++) {
+    values[13 + i].f = extra_metrics[i] / total_input_files;
+  }
+  return values;
+}
+
+static std::string PrintFormattedEntries(
+    size_t num_extra_metrics, const std::vector<ColumnValue>& values) {
+  const auto& descriptors = GetColumnDescriptors(num_extra_metrics);
+
+  std::string out;
+  for (size_t i = 0; i < descriptors.size(); i++) {
+    if (!Args()->more_columns && descriptors[i].more) continue;
+    std::string value;
+    if (descriptors[i].type == TYPE_STRING) {
+      value = values[i].s;
+    } else if (descriptors[i].type == TYPE_SIZE) {
+      value = values[i].i ? StringPrintf("%" PRIdS, values[i].i) : "---";
+    } else if (descriptors[i].type == TYPE_POSITIVE_FLOAT) {
+      value = FormatFloat(descriptors[i], values[i].f);
+      value = FormatFloat(descriptors[i], values[i].f);
+    } else if (descriptors[i].type == TYPE_COUNT) {
+      value = StringPrintf("%" PRIdS, values[i].i);
+    }
+
+    int numspaces = descriptors[i].width - value.size();
+    if (numspaces < 1) {
+      numspaces = 1;
+    }
+    // All except the first one are right-aligned, the first one is the name,
+    // others are numbers with digits matching from the right.
+    if (i == 0) out += value.c_str();
+    out += std::string(numspaces, ' ');
+    if (i != 0) out += value.c_str();
+  }
+  return out + "\n";
+}
+
+std::string BenchmarkStats::PrintLine(const std::string& codec_desc,
+                                      size_t corpus_size) const {
+  std::vector<ColumnValue> values = ComputeColumns(codec_desc, corpus_size);
+  return PrintFormattedEntries(extra_metrics.size(), values);
+}
+
+std::string PrintHeader(const std::vector<std::string>& extra_metrics_names) {
+  std::string out;
+  // Extra metrics are handled separately.
+  const auto& descriptors = GetColumnDescriptors(0);
+  for (size_t i = 0; i < descriptors.size(); i++) {
+    if (!Args()->more_columns && descriptors[i].more) continue;
+    const std::string& label = descriptors[i].label;
+    int numspaces = descriptors[i].width - label.size();
+    // All except the first one are right-aligned.
+    if (i == 0) out += label.c_str();
+    out += std::string(numspaces, ' ');
+    if (i != 0) out += label.c_str();
+  }
+  for (const std::string& em : extra_metrics_names) {
+    int numspaces = ExtraMetricDescriptor().width - em.size();
+    JXL_CHECK(numspaces >= 1);
+    out += std::string(numspaces, ' ');
+    out += em;
+  }
+  out += '\n';
+  for (const auto& descriptor : descriptors) {
+    if (!Args()->more_columns && descriptor.more) continue;
+    out += std::string(descriptor.width, '-');
+  }
+  out += std::string(ExtraMetricDescriptor().width * extra_metrics_names.size(),
+                     '-');
+  return out + "\n";
+}
+
+std::string PrintAggregate(
+    size_t num_extra_metrics,
+    const std::vector<std::vector<ColumnValue>>& aggregate) {
+  const auto& descriptors = GetColumnDescriptors(num_extra_metrics);
+
+  for (size_t i = 0; i < aggregate.size(); i++) {
+    // Check when statistics has wrong amount of column entries
+    JXL_CHECK(aggregate[i].size() == descriptors.size());
+  }
+
+  std::vector<ColumnValue> result(descriptors.size());
+
+  // Statistics for the aggregate row are combined together with different
+  // formulas than Assimilate uses for combining the statistics of files.
+  for (size_t i = 0; i < descriptors.size(); i++) {
+    if (descriptors[i].type == TYPE_STRING) {
+      // "---" for the Iters column since this does not have meaning for
+      // the aggregate stats.
+      result[i].s = i == 0 ? "Aggregate:" : "---";
+      continue;
+    }
+    if (descriptors[i].type == TYPE_COUNT) {
+      size_t sum = 0;
+      for (size_t j = 0; j < aggregate.size(); j++) {
+        sum += aggregate[j][i].i;
+      }
+      result[i].i = sum;
+      continue;
+    }
+
+    ColumnType type = descriptors[i].type;
+
+    double logsum = 0;
+    size_t numvalid = 0;
+    for (size_t j = 0; j < aggregate.size(); j++) {
+      double value =
+          (type == TYPE_SIZE) ? aggregate[j][i].i : aggregate[j][i].f;
+      if (value > 0) {
+        numvalid++;
+        logsum += std::log2(value);
+      }
+    }
+    double geomean = numvalid ? std::exp2(logsum / numvalid) : 0.0;
+
+    if (type == TYPE_SIZE || type == TYPE_COUNT) {
+      result[i].i = static_cast<size_t>(geomean + 0.5);
+    } else if (type == TYPE_POSITIVE_FLOAT) {
+      result[i].f = geomean;
+    } else {
+      JXL_ABORT("unknown entry type");
+    }
+  }
+
+  return PrintFormattedEntries(num_extra_metrics, result);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.h
new file mode 100644
index 0000000000..deca72af77
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_stats.h
@@ -0,0 +1,81 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_STATS_H_
+#define TOOLS_BENCHMARK_BENCHMARK_STATS_H_
+
+#include <jxl/stats.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace jpegxl {
+namespace tools {
+
+std::string StringPrintf(const char* format, ...);
+
+struct JxlStats {
+  JxlStats()
+      : num_inputs(0), stats(JxlEncoderStatsCreate(), JxlEncoderStatsDestroy) {}
+  void Assimilate(const JxlStats& victim) {
+    num_inputs += victim.num_inputs;
+    JxlEncoderStatsMerge(stats.get(), victim.stats.get());
+  }
+  void Print() const;
+
+  size_t num_inputs;
+  std::unique_ptr<JxlEncoderStats, decltype(JxlEncoderStatsDestroy)*> stats;
+};
+
+// The value of an entry in the table. Depending on the ColumnType, the string,
+// size_t or double should be used.
+struct ColumnValue {
+  std::string s;  // for TYPE_STRING
+  size_t i;       // for TYPE_SIZE and TYPE_COUNT
+  double f;       // for TYPE_POSITIVE_FLOAT
+};
+
+struct BenchmarkStats {
+  void Assimilate(const BenchmarkStats& victim);
+
+  std::vector<ColumnValue> ComputeColumns(const std::string& codec_desc,
+                                          size_t corpus_size) const;
+
+  std::string PrintLine(const std::string& codec_desc,
+                        size_t corpus_size) const;
+
+  void PrintMoreStats() const;
+
+  size_t total_input_files = 0;
+  size_t total_input_pixels = 0;
+  size_t total_compressed_size = 0;
+  size_t total_adj_compressed_size = 0;
+  double total_time_encode = 0.0;
+  double total_time_decode = 0.0;
+  float max_distance = -1.0;  // Max butteraugli score
+  // sum of 8th powers of butteraugli distmap pixels.
+  double distance_p_norm = 0.0;
+  double psnr = 0.0;
+  double ssimulacra2 = 0.0;
+  std::vector<float> distances;
+  size_t total_errors = 0;
+  JxlStats jxl_stats;
+  std::vector<float> extra_metrics;
+};
+
+std::string PrintHeader(const std::vector<std::string>& extra_metrics_names);
+
+// Given the rows of all printed statistics, print an aggregate row.
+std::string PrintAggregate(
+    size_t num_extra_metrics,
+    const std::vector<std::vector<ColumnValue>>& aggregate);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_STATS_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.cc
new file mode 100644
index 0000000000..11753f21eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.cc
@@ -0,0 +1,112 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define _DEFAULT_SOURCE  // for mkstemps().
+
+#include "tools/benchmark/benchmark_utils.h"
+
+// Not supported on Windows due to Linux-specific functions.
+// Not supported in Android NDK before API 28.
+#if !defined(_WIN32) && !defined(__EMSCRIPTEN__) && \
+    (!defined(__ANDROID_API__) || __ANDROID_API__ >= 28)
+
+#include <libgen.h>
+#include <spawn.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <fstream>
+
+#include "lib/jxl/image_bundle.h"
+#include "tools/file_io.h"
+
+extern char** environ;
+
+namespace jpegxl {
+namespace tools {
+TemporaryFile::TemporaryFile(std::string basename, std::string extension) {
+  const auto extension_size = 1 + extension.size();
+  temp_filename_ = std::move(basename) + "_XXXXXX." + std::move(extension);
+  const int fd = mkstemps(&temp_filename_[0], extension_size);
+  if (fd == -1) {
+    ok_ = false;
+    return;
+  }
+  close(fd);
+}
+TemporaryFile::~TemporaryFile() {
+  if (ok_) {
+    unlink(temp_filename_.c_str());
+  }
+}
+
+Status TemporaryFile::GetFileName(std::string* const output) const {
+  JXL_RETURN_IF_ERROR(ok_);
+  *output = temp_filename_;
+  return true;
+}
+
+std::string GetBaseName(std::string filename) {
+  std::string result = std::move(filename);
+  result = basename(&result[0]);
+  const size_t dot = result.rfind('.');
+  if (dot != std::string::npos) {
+    result.resize(dot);
+  }
+  return result;
+}
+
+Status RunCommand(const std::string& command,
+                  const std::vector<std::string>& arguments, bool quiet) {
+  std::vector<char*> args;
+  args.reserve(arguments.size() + 2);
+  args.push_back(const_cast<char*>(command.c_str()));
+  for (const std::string& argument : arguments) {
+    args.push_back(const_cast<char*>(argument.c_str()));
+  }
+  args.push_back(nullptr);
+  pid_t pid;
+  posix_spawn_file_actions_t file_actions;
+  posix_spawn_file_actions_init(&file_actions);
+  if (quiet) {
+    posix_spawn_file_actions_addclose(&file_actions, STDOUT_FILENO);
+    posix_spawn_file_actions_addclose(&file_actions, STDERR_FILENO);
+  }
+  JXL_RETURN_IF_ERROR(posix_spawnp(&pid, command.c_str(), &file_actions,
+                                   nullptr, args.data(), environ) == 0);
+  int wstatus;
+  waitpid(pid, &wstatus, 0);
+  posix_spawn_file_actions_destroy(&file_actions);
+  return WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == EXIT_SUCCESS;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#else
+
+namespace jpegxl {
+namespace tools {
+
+TemporaryFile::TemporaryFile(std::string basename, std::string extension) {}
+TemporaryFile::~TemporaryFile() {}
+Status TemporaryFile::GetFileName(std::string* const output) const {
+  (void)ok_;
+  return JXL_FAILURE("Not supported on this build");
+}
+
+std::string GetBaseName(std::string filename) { return filename; }
+
+Status RunCommand(const std::string& command,
+                  const std::vector<std::string>& arguments, bool quiet) {
+  return JXL_FAILURE("Not supported on this build");
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // _MSC_VER
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.h b/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.h
new file mode 100644
index 0000000000..5df2bec59a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_utils.h
@@ -0,0 +1,42 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_BENCHMARK_BENCHMARK_UTILS_H_
+#define TOOLS_BENCHMARK_BENCHMARK_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::Status;
+
+class TemporaryFile final {
+ public:
+  explicit TemporaryFile(std::string basename, std::string extension);
+  TemporaryFile(const TemporaryFile&) = delete;
+  TemporaryFile& operator=(const TemporaryFile&) = delete;
+  ~TemporaryFile();
+  Status GetFileName(std::string* output) const;
+
+ private:
+  bool ok_ = true;
+
+  std::string temp_filename_;
+};
+
+std::string GetBaseName(std::string filename);
+
+Status RunCommand(const std::string& command,
+                  const std::vector<std::string>& arguments,
+                  bool quiet = false);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BENCHMARK_BENCHMARK_UTILS_H_
diff --git a/third-party/libjxl/libjxl/tools/benchmark/benchmark_xl.cc b/third-party/libjxl/libjxl/tools/benchmark/benchmark_xl.cc
new file mode 100644
index 0000000000..f6a08cd168
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/benchmark_xl.cc
@@ -0,0 +1,1145 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/metrics.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "tools/benchmark/benchmark_args.h"
+#include "tools/benchmark/benchmark_codec.h"
+#include "tools/benchmark/benchmark_file_io.h"
+#include "tools/benchmark/benchmark_stats.h"
+#include "tools/benchmark/benchmark_utils.h"
+#include "tools/codec_config.h"
+#include "tools/file_io.h"
+#include "tools/speed_stats.h"
+#include "tools/ssimulacra2.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace tools {
+namespace {
+
+using ::jxl::ButteraugliParams;
+using ::jxl::CodecInOut;
+using ::jxl::ColorEncoding;
+using ::jxl::Image3F;
+using ::jxl::ImageBundle;
+using ::jxl::ImageF;
+using ::jxl::PaddedBytes;
+using ::jxl::Rng;
+using ::jxl::Status;
+using ::jxl::ThreadPool;
+
+Status WriteImage(Image3F&& image, ThreadPool* pool,
+                  const std::string& filename) {
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(8);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+  io.SetFromImage(std::move(image), io.metadata.m.color_encoding);
+  std::vector<uint8_t> encoded;
+  return Encode(io, filename, &encoded, pool) && WriteFile(filename, encoded);
+}
+
+Status ReadPNG(const std::string& filename, Image3F* image) {
+  CodecInOut io;
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(ReadFile(filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded),
+                              jxl::extras::ColorHints(), &io));
+  *image = Image3F(io.xsize(), io.ysize());
+  CopyImageTo(*io.Main().color(), image);
+  return true;
+}
+
+std::string CodecToExtension(std::string codec_name, char sep) {
+  std::string result;
+  // Add in the parameters of the codec_name in reverse order, so that the
+  // name of the file format (e.g. jxl) is last.
+  int pos = static_cast<int>(codec_name.size()) - 1;
+  while (pos > 0) {
+    int prev = codec_name.find_last_of(sep, pos);
+    if (prev > pos) prev = -1;
+    result += '.' + codec_name.substr(prev + 1, pos - prev);
+    pos = prev - 1;
+  }
+  return result;
+}
+
+void DoCompress(const std::string& filename, const CodecInOut& io,
+                const std::vector<std::string>& extra_metrics_commands,
+                ImageCodec* codec, ThreadPool* inner_pool,
+                std::vector<uint8_t>* compressed, BenchmarkStats* s) {
+  ++s->total_input_files;
+
+  if (io.frames.size() != 1) {
+    // Multiple frames not supported (io.xsize() will checkfail)
+    s->total_errors++;
+    if (!Args()->silent_errors) {
+      JXL_WARNING("multiframe input image not supported %s", filename.c_str());
+    }
+    return;
+  }
+  const size_t xsize = io.xsize();
+  const size_t ysize = io.ysize();
+  const size_t input_pixels = xsize * ysize;
+
+  jpegxl::tools::SpeedStats speed_stats;
+  jpegxl::tools::SpeedStats::Summary summary;
+
+  bool valid = true;  // false if roundtrip, encoding or decoding errors occur.
+
+  if (!Args()->decode_only && (io.xsize() == 0 || io.ysize() == 0)) {
+    // This means the benchmark couldn't load the image, e.g. due to invalid
+    // ICC profile. Warning message about that was already printed. Continue
+    // this function to indicate it as error in the stats.
+    valid = false;
+  }
+
+  std::string ext = FileExtension(filename);
+  if (valid && !Args()->decode_only) {
+    for (size_t i = 0; i < Args()->encode_reps; ++i) {
+      if (codec->CanRecompressJpeg() && (ext == ".jpg" || ext == ".jpeg")) {
+        std::vector<uint8_t> data_in;
+        JXL_CHECK(ReadFile(filename, &data_in));
+        JXL_CHECK(
+            codec->RecompressJpeg(filename, data_in, compressed, &speed_stats));
+      } else {
+        Status status = codec->Compress(filename, &io, inner_pool, compressed,
+                                        &speed_stats);
+        if (!status) {
+          valid = false;
+          if (!Args()->silent_errors) {
+            std::string message = codec->GetErrorMessage();
+            if (!message.empty()) {
+              fprintf(stderr, "Error in %s codec: %s\n",
+                      codec->description().c_str(), message.c_str());
+            } else {
+              fprintf(stderr, "Error in %s codec\n",
+                      codec->description().c_str());
+            }
+          }
+        }
+      }
+    }
+    JXL_CHECK(speed_stats.GetSummary(&summary));
+    s->total_time_encode += summary.central_tendency;
+  }
+
+  if (valid && Args()->decode_only) {
+    std::vector<uint8_t> data_in;
+    JXL_CHECK(ReadFile(filename, &data_in));
+    compressed->insert(compressed->end(), data_in.begin(), data_in.end());
+  }
+
+  // Decompress
+  CodecInOut io2;
+  io2.metadata.m = io.metadata.m;
+  if (valid) {
+    speed_stats = jpegxl::tools::SpeedStats();
+    for (size_t i = 0; i < Args()->decode_reps; ++i) {
+      if (!codec->Decompress(filename, Span<const uint8_t>(*compressed),
+                             inner_pool, &io2, &speed_stats)) {
+        if (!Args()->silent_errors) {
+          fprintf(stderr,
+                  "%s failed to decompress encoded image. Original source:"
+                  " %s\n",
+                  codec->description().c_str(), filename.c_str());
+        }
+        valid = false;
+      }
+      // TODO(veluca): this is a hack. codec->Decompress should set the bitdepth
+      // correctly, but for jxl it currently sets it from the pixel format (i.e.
+      // 32-bit float).
+      io2.metadata.m.bit_depth = io.metadata.m.bit_depth;
+    }
+    for (const auto& frame : io2.frames) {
+      s->total_input_pixels += frame.color().xsize() * frame.color().ysize();
+    }
+    JXL_CHECK(speed_stats.GetSummary(&summary));
+    s->total_time_decode += summary.central_tendency;
+  }
+
+  std::string name = FileBaseName(filename);
+  std::string codec_name = codec->description();
+
+  if (!valid) {
+    s->total_errors++;
+  }
+
+  if (io.frames.size() != io2.frames.size()) {
+    if (!Args()->silent_errors) {
+      // Animated gifs not supported yet?
+      fprintf(stderr,
+              "Frame sizes not equal, is this an animated gif? %s %s %" PRIuS
+              " %" PRIuS "\n",
+              codec_name.c_str(), name.c_str(), io.frames.size(),
+              io2.frames.size());
+    }
+    valid = false;
+  }
+
+  bool skip_butteraugli = Args()->skip_butteraugli || Args()->decode_only;
+  ImageF distmap;
+  float max_distance = 1.0f;
+
+  if (valid && !skip_butteraugli) {
+    JXL_ASSERT(io.frames.size() == io2.frames.size());
+    for (size_t i = 0; i < io.frames.size(); i++) {
+      const ImageBundle& ib1 = io.frames[i];
+      ImageBundle& ib2 = io2.frames[i];
+
+      // Verify output
+      float distance;
+      if (SameSize(ib1, ib2)) {
+        ButteraugliParams params;
+        if (ib1.metadata()->IntensityTarget() !=
+            ib2.metadata()->IntensityTarget()) {
+          fprintf(stderr,
+                  "WARNING: input and output images have different intensity "
+                  "targets");
+        }
+        params.intensity_target = ib1.metadata()->IntensityTarget();
+        // Hack the default intensity target value to be 80.0, the intensity
+        // target of sRGB images and a more reasonable viewing default than
+        // JPEG XL file format's default.
+        if (fabs(params.intensity_target - 255.0f) < 1e-3) {
+          params.intensity_target = 80.0;
+        }
+        distance =
+            ButteraugliDistance(ib1, ib2, params, jxl::GetJxlCms(), &distmap,
+                                inner_pool, codec->IgnoreAlpha());
+      } else {
+        // TODO(veluca): re-upsample and compute proper distance.
+        distance = 1e+4f;
+        distmap = ImageF(1, 1);
+        distmap.Row(0)[0] = distance;
+      }
+      // Update stats
+      s->psnr +=
+          compressed->empty()
+              ? 0
+              : jxl::ComputePSNR(ib1, ib2, jxl::GetJxlCms()) * input_pixels;
+      s->distance_p_norm +=
+          ComputeDistanceP(distmap, ButteraugliParams(), Args()->error_pnorm) *
+          input_pixels;
+      s->ssimulacra2 += ComputeSSIMULACRA2(ib1, ib2).Score() * input_pixels;
+      s->max_distance = std::max(s->max_distance, distance);
+      s->distances.push_back(distance);
+      max_distance = std::max(max_distance, distance);
+    }
+  }
+
+  s->total_compressed_size += compressed->size();
+  s->total_adj_compressed_size += compressed->size() * max_distance;
+  codec->GetMoreStats(s);
+
+  if (io2.frames.size() == 1 &&
+      (Args()->save_compressed || Args()->save_decompressed)) {
+    JXL_ASSERT(io2.frames.size() == 1);
+    ImageBundle& ib2 = io2.Main();
+
+    // By default the benchmark will save the image after roundtrip with the
+    // same color encoding as the image before roundtrip. Not all codecs
+    // necessarily preserve the amount of channels (1 for gray, 3 for RGB)
+    // though, since not all image formats necessarily allow a way to remember
+    // what amount of channels you happened to give the benchmark codec
+    // input (say, an RGB-only format) and that is fine since in the end what
+    // matters is that the pixels look the same on a 3-channel RGB monitor
+    // while using grayscale encoding is an internal compression optimization.
+    // If that is the case, output with the current color model instead,
+    // because CodecInOut does not automatically convert between 1 or 3
+    // channels, and giving a ColorEncoding  with a different amount of
+    // channels is not allowed.
+    const ColorEncoding* c_desired =
+        (ib2.metadata()->color_encoding.Channels() ==
+         ib2.c_current().Channels())
+            ? &ib2.metadata()->color_encoding
+            : &ib2.c_current();
+    // Allow overriding via --output_encoding.
+    if (!Args()->output_description.empty()) {
+      c_desired = &Args()->output_encoding;
+    }
+
+    std::string dir = FileDirName(filename);
+    std::string outdir =
+        Args()->output_dir.empty() ? dir + "/out" : Args()->output_dir;
+    std::string compressed_fn =
+        outdir + "/" + name + CodecToExtension(codec_name, ':');
+    std::string decompressed_fn = compressed_fn + Args()->output_extension;
+    std::string heatmap_fn;
+    if (jxl::extras::GetAPNGEncoder()) {
+      heatmap_fn = compressed_fn + ".heatmap.png";
+    } else {
+      heatmap_fn = compressed_fn + ".heatmap.ppm";
+    }
+    JXL_CHECK(MakeDir(outdir));
+    if (Args()->save_compressed) {
+      JXL_CHECK(WriteFile(compressed_fn, *compressed));
+    }
+    if (Args()->save_decompressed && valid) {
+      // For verifying HDR: scale output.
+      if (Args()->mul_output != 0.0) {
+        fprintf(stderr, "WARNING: scaling outputs by %f\n", Args()->mul_output);
+        JXL_CHECK(ib2.TransformTo(ColorEncoding::LinearSRGB(ib2.IsGray()),
+                                  jxl::GetJxlCms(), inner_pool));
+        ScaleImage(static_cast<float>(Args()->mul_output), ib2.color());
+      }
+
+      std::vector<uint8_t> encoded;
+      JXL_CHECK(Encode(io2, *c_desired,
+                       ib2.metadata()->bit_depth.bits_per_sample,
+                       decompressed_fn, &encoded));
+      JXL_CHECK(WriteFile(decompressed_fn, encoded));
+      if (!skip_butteraugli) {
+        float good = Args()->heatmap_good > 0.0f
+                         ? Args()->heatmap_good
+                         : jxl::ButteraugliFuzzyInverse(1.5);
+        float bad = Args()->heatmap_bad > 0.0f
+                        ? Args()->heatmap_bad
+                        : jxl::ButteraugliFuzzyInverse(0.5);
+        if (Args()->save_heatmap) {
+          JXL_CHECK(WriteImage(CreateHeatMapImage(distmap, good, bad),
+                               inner_pool, heatmap_fn));
+        }
+      }
+    }
+  }
+  if (!extra_metrics_commands.empty()) {
+    CodecInOut in_copy;
+    in_copy.SetFromImage(std::move(*io.Main().Copy().color()),
+                         io.Main().c_current());
+    TemporaryFile tmp_in("original", "pfm");
+    TemporaryFile tmp_out("decoded", "pfm");
+    TemporaryFile tmp_res("result", "txt");
+    std::string tmp_in_fn, tmp_out_fn, tmp_res_fn;
+    JXL_CHECK(tmp_in.GetFileName(&tmp_in_fn));
+    JXL_CHECK(tmp_out.GetFileName(&tmp_out_fn));
+    JXL_CHECK(tmp_res.GetFileName(&tmp_res_fn));
+
+    // Convert everything to non-linear SRGB - this is what most metrics expect.
+    const ColorEncoding& c_desired = ColorEncoding::SRGB(io.Main().IsGray());
+    std::vector<uint8_t> encoded;
+    JXL_CHECK(Encode(io, c_desired, io.metadata.m.bit_depth.bits_per_sample,
+                     tmp_in_fn, &encoded));
+    JXL_CHECK(WriteFile(tmp_in_fn, encoded));
+    JXL_CHECK(Encode(io2, c_desired, io.metadata.m.bit_depth.bits_per_sample,
+                     tmp_out_fn, &encoded));
+    JXL_CHECK(WriteFile(tmp_out_fn, encoded));
+    if (io.metadata.m.IntensityTarget() != io2.metadata.m.IntensityTarget()) {
+      fprintf(stderr,
+              "WARNING: original and decoded have different intensity targets "
+              "(%f vs. %f).\n",
+              io.metadata.m.IntensityTarget(),
+              io2.metadata.m.IntensityTarget());
+    }
+    std::string intensity_target;
+    {
+      std::ostringstream intensity_target_oss;
+      intensity_target_oss << io.metadata.m.IntensityTarget();
+      intensity_target = intensity_target_oss.str();
+    }
+    for (size_t i = 0; i < extra_metrics_commands.size(); i++) {
+      float res = nanf("");
+      bool error = false;
+      if (RunCommand(extra_metrics_commands[i],
+                     {tmp_in_fn, tmp_out_fn, tmp_res_fn, intensity_target})) {
+        FILE* f = fopen(tmp_res_fn.c_str(), "r");
+        if (fscanf(f, "%f", &res) != 1) {
+          error = true;
+        }
+        fclose(f);
+      } else {
+        error = true;
+      }
+      if (error) {
+        fprintf(stderr,
+                "WARNING: Computation of metric with command %s failed\n",
+                extra_metrics_commands[i].c_str());
+      }
+      s->extra_metrics.push_back(res);
+    }
+  }
+
+  if (Args()->show_progress) {
+    fprintf(stderr, ".");
+    fflush(stderr);
+  }
+}
+
+// Makes a base64 data URI for embedded image in HTML
+std::string Base64Image(const std::string& filename) {
+  PaddedBytes bytes;
+  if (!ReadFile(filename, &bytes)) {
+    return "";
+  }
+  static const char* symbols =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  std::string result;
+  for (size_t i = 0; i < bytes.size(); i += 3) {
+    uint8_t o0 = bytes[i + 0];
+    uint8_t o1 = (i + 1 < bytes.size()) ? bytes[i + 1] : 0;
+    uint8_t o2 = (i + 2 < bytes.size()) ? bytes[i + 2] : 0;
+    uint32_t value = (o0 << 16) | (o1 << 8) | o2;
+    for (size_t j = 0; j < 4; j++) {
+      result += (i + j <= bytes.size()) ? symbols[(value >> (6 * (3 - j))) & 63]
+                                        : '=';
+    }
+  }
+  // NOTE: Chrome supports max 2MB of data this way for URLs, but appears to
+  // support larger images anyway as long as it's embedded in the HTML file
+  // itself. If more data is needed, use createObjectURL.
+  return "data:image;base64," + result;
+}
+
+struct Task {
+  ImageCodecPtr codec;
+  size_t idx_image;
+  size_t idx_method;
+  const CodecInOut* image;
+  BenchmarkStats stats;
+};
+
+void WriteHtmlReport(const std::string& codec_desc,
+                     const std::vector<std::string>& fnames,
+                     const std::vector<const Task*>& tasks,
+                     const std::vector<const CodecInOut*>& images,
+                     bool add_heatmap, bool self_contained) {
+  std::string toggle_js =
+      "<script type=\"text/javascript\">\n"
+      "  var codecname = '" +
+      codec_desc + "';\n";
+  if (add_heatmap) {
+    toggle_js += R"(
+  var maintitle = codecname + ' - click images to toggle, press space to' +
+      ' toggle all, h to toggle all heatmaps. Zoom in with CTRL+wheel or' +
+      ' CTRL+plus.';
+  document.title = maintitle;
+  var counter = [];
+  function setState(i, s) {
+    var preview = document.getElementById("preview" + i);
+    var orig = document.getElementById("orig" + i);
+    var hm = document.getElementById("hm" + i);
+    if (s == 0) {
+      preview.style.display = 'none';
+      orig.style.display = 'block';
+      hm.style.display = 'none';
+    } else if (s == 1) {
+      preview.style.display = 'block';
+      orig.style.display = 'none';
+      hm.style.display = 'none';
+    } else if (s == 2) {
+      preview.style.display = 'none';
+      orig.style.display = 'none';
+      hm.style.display = 'block';
+    }
+  }
+  function toggle(i) {
+    for (index = counter.length; index <= i; index++) {
+      counter.push(1);
+    }
+    setState(i, counter[i]);
+    counter[i] = (counter[i] + 1) % 3;
+    document.title = maintitle;
+  }
+  var toggleall_state = 1;
+  document.body.onkeydown = function(e) {
+    // space (32) to toggle orig/compr, 'h' (72) to toggle heatmap/compr
+    if (e.keyCode == 32 || e.keyCode == 72) {
+      var divs = document.getElementsByTagName('div');
+      var key_state = (e.keyCode == 32) ? 0 : 2;
+      toggleall_state = (toggleall_state == key_state) ? 1 : key_state;
+      document.title = codecname + ' - ' + (toggleall_state == 0 ?
+          'originals' : (toggleall_state == 1 ? 'compressed' : 'heatmaps'));
+      for (var i = 0; i < divs.length; i++) {
+        setState(i, toggleall_state);
+      }
+      return false;
+    }
+  };
+</script>
+)";
+  } else {
+    toggle_js += R"(
+  var maintitle = codecname + ' - click images to toggle, press space to' +
+      ' toggle all. Zoom in with CTRL+wheel or CTRL+plus.';
+  document.title = maintitle;
+  var counter = [];
+  function setState(i, s) {
+    var preview = document.getElementById("preview" + i);
+    var orig = document.getElementById("orig" + i);
+    if (s == 0) {
+      preview.style.display = 'none';
+      orig.style.display = 'block';
+    } else if (s == 1) {
+      preview.style.display = 'block';
+      orig.style.display = 'none';
+    }
+  }
+  function toggle(i) {
+    for (index = counter.length; index <= i; index++) {
+      counter.push(1);
+    }
+    setState(i, counter[i]);
+    counter[i] = 1 - counter[i];
+    document.title = maintitle;
+  }
+  var toggleall_state = 1;
+  document.body.onkeydown = function(e) {
+    // space (32) to toggle orig/compr
+    if (e.keyCode == 32) {
+      var divs = document.getElementsByTagName('div');
+      toggleall_state = 1 - toggleall_state;
+      document.title = codecname + ' - ' + (toggleall_state == 0 ?
+          'originals' : 'compressed');
+      for (var i = 0; i < divs.length; i++) {
+        setState(i, toggleall_state);
+      }
+      return false;
+    }
+  };
+</script>
+)";
+  }
+  std::string out_html;
+  std::string outdir;
+  out_html += "<body bgcolor=\"#000\">\n";
+  out_html += "<style>img { image-rendering: pixelated; }</style>\n";
+  std::string codec_name = codec_desc;
+  // Make compatible for filename
+  std::replace(codec_name.begin(), codec_name.end(), ':', '_');
+  for (size_t i = 0; i < fnames.size(); ++i) {
+    std::string name = FileBaseName(fnames[i]);
+    std::string dir = FileDirName(fnames[i]);
+    outdir = Args()->output_dir.empty() ? dir + "/out" : Args()->output_dir;
+    std::string name_out = name + CodecToExtension(codec_name, '_');
+    if (Args()->html_report_use_decompressed) {
+      name_out += Args()->output_extension;
+    }
+    std::string heatmap_out =
+        name + CodecToExtension(codec_name, '_') + ".heatmap.png";
+
+    std::string fname_orig = fnames[i];
+    std::string fname_out = outdir + "/" + name_out;
+    std::string fname_heatmap = outdir + "/" + heatmap_out;
+    std::string url_orig = Args()->originals_url.empty()
+                               ? ("file://" + fnames[i])
+                               : (Args()->originals_url + "/" + name);
+    std::string url_out = name_out;
+    std::string url_heatmap = heatmap_out;
+    if (self_contained) {
+      url_orig = Base64Image(fname_orig);
+      url_out = Base64Image(fname_out);
+      url_heatmap = Base64Image(fname_heatmap);
+    }
+    std::string number = StringPrintf("%" PRIuS, i);
+    const CodecInOut& image = *images[i];
+    size_t xsize = image.frames.size() == 1 ? image.xsize() : 0;
+    size_t ysize = image.frames.size() == 1 ? image.ysize() : 0;
+    std::string html_width = StringPrintf("%" PRIuS "px", xsize);
+    std::string html_height = StringPrintf("%" PRIuS "px", ysize);
+    double bpp = tasks[i]->stats.total_compressed_size * 8.0 /
+                 tasks[i]->stats.total_input_pixels;
+    double pnorm =
+        tasks[i]->stats.distance_p_norm / tasks[i]->stats.total_input_pixels;
+    double max_dist = tasks[i]->stats.max_distance;
+    std::string compressed_title = StringPrintf(
+        "compressed. bpp: %f, pnorm: %f, max dist: %f", bpp, pnorm, max_dist);
+    out_html += "<div onclick=\"toggle(" + number +
+                ");\" style=\"display:inline-block;width:" + html_width +
+                ";height:" + html_height +
+                ";\">\n"
+                "  <img title=\"" +
+                compressed_title + "\" id=\"preview" + number + "\" src=";
+    out_html += "\"" + url_out + "\"style=\"display:block;\"/>\n";
+    out_html += "  <img title=\"original\" id=\"orig" + number + "\" src=";
+    out_html += "\"" + url_orig + "\"style=\"display:none;\"/>\n";
+    if (add_heatmap) {
+      out_html = "  <img title=\"heatmap\" id=\"hm" + number + "\" src=";
+      out_html += "\"" + url_heatmap + "\"style=\"display:none;\"/>\n";
+    }
+    out_html += "</div>\n";
+  }
+  out_html += "</body>\n";
+  out_html += toggle_js;
+  JXL_CHECK(WriteFile(outdir + "/index." + codec_name + ".html", out_html));
+}
+
+// Prints the detailed and aggregate statistics, in the correct order but as
+// soon as possible when multithreaded tasks are done.
+struct StatPrinter {
+  StatPrinter(const std::vector<std::string>& methods,
+              const std::vector<std::string>& extra_metrics_names,
+              const std::vector<std::string>& fnames,
+              const std::vector<Task>& tasks)
+      : methods_(&methods),
+        extra_metrics_names_(&extra_metrics_names),
+        fnames_(&fnames),
+        tasks_(&tasks),
+        tasks_done_(0),
+        stats_printed_(0),
+        details_printed_(0) {
+    stats_done_.resize(methods.size(), 0);
+    details_done_.resize(tasks.size(), 0);
+    max_fname_width_ = 0;
+    for (const auto& fname : fnames) {
+      max_fname_width_ = std::max(max_fname_width_, FileBaseName(fname).size());
+    }
+    max_method_width_ = 0;
+    for (const auto& method : methods) {
+      max_method_width_ =
+          std::max(max_method_width_, FileBaseName(method).size());
+    }
+  }
+
+  void TaskDone(size_t task_index, const Task& t) {
+    std::lock_guard<std::mutex> guard(mutex);
+    tasks_done_++;
+    if (Args()->print_details || Args()->show_progress) {
+      if (Args()->print_details) {
+        // Render individual results as soon as they are ready and all previous
+        // ones in task order are ready.
+        details_done_[task_index] = 1;
+        if (task_index == details_printed_) {
+          while (details_printed_ < tasks_->size() &&
+                 details_done_[details_printed_]) {
+            PrintDetails((*tasks_)[details_printed_]);
+            details_printed_++;
+          }
+        }
+      }
+      // When using "show_progress" or "print_details", the table must be
+      // rendered at the very end, else the details or progress would be
+      // rendered in-between the table rows.
+      if (tasks_done_ == tasks_->size()) {
+        PrintStatsHeader();
+        for (size_t i = 0; i < methods_->size(); i++) {
+          PrintStats((*methods_)[i], i);
+        }
+        PrintStatsFooter();
+      }
+    } else {
+      if (tasks_done_ == 1) {
+        PrintStatsHeader();
+      }
+      // Render lines of the table as soon as it is ready and all previous
+      // lines have been printed.
+      stats_done_[t.idx_method]++;
+      if (stats_done_[t.idx_method] == fnames_->size() &&
+          t.idx_method == stats_printed_) {
+        while (stats_printed_ < stats_done_.size() &&
+               stats_done_[stats_printed_] == fnames_->size()) {
+          PrintStats((*methods_)[stats_printed_], stats_printed_);
+          stats_printed_++;
+        }
+      }
+      if (tasks_done_ == tasks_->size()) {
+        PrintStatsFooter();
+      }
+    }
+  }
+
+  void PrintDetails(const Task& t) {
+    double comp_bpp =
+        t.stats.total_compressed_size * 8.0 / t.stats.total_input_pixels;
+    double p_norm = t.stats.distance_p_norm / t.stats.total_input_pixels;
+    double psnr = t.stats.psnr / t.stats.total_input_pixels;
+    double ssimulacra2 = t.stats.ssimulacra2 / t.stats.total_input_pixels;
+    double bpp_p_norm = p_norm * comp_bpp;
+
+    const double adj_comp_bpp =
+        t.stats.total_adj_compressed_size * 8.0 / t.stats.total_input_pixels;
+
+    size_t pixels = t.stats.total_input_pixels;
+
+    const double enc_mps =
+        t.stats.total_input_pixels / (1000000.0 * t.stats.total_time_encode);
+    const double dec_mps =
+        t.stats.total_input_pixels / (1000000.0 * t.stats.total_time_decode);
+    if (Args()->print_details_csv) {
+      printf("%s,%s,%" PRIdS ",%" PRIdS ",%" PRIdS
+             ",%.8f,%.8f,%.8f,%.8f,%.8f,%.8f,%.8f,%.8f",
+             (*methods_)[t.idx_method].c_str(),
+             FileBaseName((*fnames_)[t.idx_image]).c_str(),
+             t.stats.total_errors, t.stats.total_compressed_size, pixels,
+             enc_mps, dec_mps, comp_bpp, t.stats.max_distance, psnr, p_norm,
+             bpp_p_norm, adj_comp_bpp);
+      for (float m : t.stats.extra_metrics) {
+        printf(",%.8f", m);
+      }
+      printf("\n");
+    } else {
+      printf("%s", (*methods_)[t.idx_method].c_str());
+      for (size_t i = (*methods_)[t.idx_method].size(); i <= max_method_width_;
+           i++) {
+        printf(" ");
+      }
+      printf("%s", FileBaseName((*fnames_)[t.idx_image]).c_str());
+      for (size_t i = FileBaseName((*fnames_)[t.idx_image]).size();
+           i <= max_fname_width_; i++) {
+        printf(" ");
+      }
+      printf(
+          "error:%" PRIdS "    size:%8" PRIdS "    pixels:%9" PRIdS
+          "    enc_speed:%8.8f    dec_speed:%8.8f    bpp:%10.8f    dist:%10.8f"
+          "    psnr:%10.8f    ssimulacra2:%.2f   p:%10.8f    bppp:%10.8f    "
+          "qabpp:%10.8f ",
+          t.stats.total_errors, t.stats.total_compressed_size, pixels, enc_mps,
+          dec_mps, comp_bpp, t.stats.max_distance, psnr, ssimulacra2, p_norm,
+          bpp_p_norm, adj_comp_bpp);
+      for (size_t i = 0; i < t.stats.extra_metrics.size(); i++) {
+        printf(" %s:%.8f", (*extra_metrics_names_)[i].c_str(),
+               t.stats.extra_metrics[i]);
+      }
+      printf("\n");
+    }
+    fflush(stdout);
+  }
+
+  void PrintStats(const std::string& method, size_t idx_method) {
+    // Assimilate all tasks with the same idx_method.
+    BenchmarkStats method_stats;
+    std::vector<const CodecInOut*> images;
+    std::vector<const Task*> tasks;
+    for (const Task& t : *tasks_) {
+      if (t.idx_method == idx_method) {
+        method_stats.Assimilate(t.stats);
+        images.push_back(t.image);
+        tasks.push_back(&t);
+      }
+    }
+
+    std::string out;
+
+    method_stats.PrintMoreStats();  // not concurrent
+    out += method_stats.PrintLine(method, fnames_->size());
+
+    if (Args()->write_html_report) {
+      WriteHtmlReport(method, *fnames_, tasks, images,
+                      Args()->save_heatmap && Args()->html_report_add_heatmap,
+                      Args()->html_report_self_contained);
+    }
+
+    stats_aggregate_.push_back(
+        method_stats.ComputeColumns(method, fnames_->size()));
+
+    printf("%s", out.c_str());
+    fflush(stdout);
+  }
+
+  void PrintStatsHeader() {
+    if (Args()->markdown) {
+      if (Args()->show_progress) {
+        fprintf(stderr, "\n");
+        fflush(stderr);
+      }
+      printf("```\n");
+    }
+    if (fnames_->size() == 1) printf("%s\n", (*fnames_)[0].c_str());
+    printf("%s", PrintHeader(*extra_metrics_names_).c_str());
+    fflush(stdout);
+  }
+
+  void PrintStatsFooter() {
+    printf(
+        "%s",
+        PrintAggregate(extra_metrics_names_->size(), stats_aggregate_).c_str());
+    if (Args()->markdown) printf("```\n");
+    printf("\n");
+    fflush(stdout);
+  }
+
+  const std::vector<std::string>* methods_;
+  const std::vector<std::string>* extra_metrics_names_;
+  const std::vector<std::string>* fnames_;
+  const std::vector<Task>* tasks_;
+
+  size_t tasks_done_;
+
+  size_t stats_printed_;
+  std::vector<size_t> stats_done_;
+
+  size_t details_printed_;
+  std::vector<size_t> details_done_;
+
+  size_t max_fname_width_;
+  size_t max_method_width_;
+
+  std::vector<std::vector<ColumnValue>> stats_aggregate_;
+
+  std::mutex mutex;
+};
+
+class Benchmark {
+  using StringVec = std::vector<std::string>;
+
+ public:
+  // Return the exit code of the program.
+  static int Run() {
+    int ret = EXIT_SUCCESS;
+    {
+      const StringVec methods = GetMethods();
+      const StringVec extra_metrics_names = GetExtraMetricsNames();
+      const StringVec extra_metrics_commands = GetExtraMetricsCommands();
+      const StringVec fnames = GetFilenames();
+      // (non-const because Task.stats are updated)
+      std::vector<Task> tasks = CreateTasks(methods, fnames);
+
+      std::unique_ptr<ThreadPoolInternal> pool;
+      std::vector<std::unique_ptr<ThreadPoolInternal>> inner_pools;
+      InitThreads(tasks.size(), &pool, &inner_pools);
+
+      const std::vector<CodecInOut> loaded_images = LoadImages(fnames, &*pool);
+
+      if (RunTasks(methods, extra_metrics_names, extra_metrics_commands, fnames,
+                   loaded_images, &*pool, inner_pools, &tasks) != 0) {
+        ret = EXIT_FAILURE;
+        if (!Args()->silent_errors) {
+          fprintf(stderr, "There were error(s) in the benchmark.\n");
+        }
+      }
+    }
+
+    jxl::CacheAligned::PrintStats();
+    return ret;
+  }
+
+ private:
+  static size_t NumOuterThreads(const size_t num_hw_threads,
+                                const size_t num_tasks) {
+    // Default to #cores
+    size_t num_threads = num_hw_threads;
+    if (Args()->num_threads >= 0) {
+      num_threads = static_cast<size_t>(Args()->num_threads);
+    }
+
+    // As a safety precaution, limit the number of threads to 4x the number of
+    // available CPUs.
+    num_threads =
+        std::min<size_t>(num_threads, 4 * std::thread::hardware_concurrency());
+
+    // Don't create more threads than there are tasks (pointless/wasteful).
+    num_threads = std::min(num_threads, num_tasks);
+
+    // Just one thread is counterproductive.
+    if (num_threads == 1) num_threads = 0;
+
+    return num_threads;
+  }
+
+  static int NumInnerThreads(const size_t num_hw_threads,
+                             const size_t num_threads) {
+    size_t num_inner;
+
+    // Default: distribute remaining cores among tasks.
+    if (Args()->inner_threads < 0) {
+      if (num_threads == 0) {
+        num_inner = num_hw_threads;
+      } else if (num_hw_threads <= num_threads) {
+        num_inner = 1;
+      } else {
+        num_inner = (num_hw_threads - num_threads) / num_threads;
+      }
+    } else {
+      num_inner = static_cast<size_t>(Args()->inner_threads);
+    }
+
+    // Just one thread is counterproductive.
+    if (num_inner == 1) num_inner = 0;
+
+    return num_inner;
+  }
+
+  static void InitThreads(
+      size_t num_tasks, std::unique_ptr<ThreadPoolInternal>* pool,
+      std::vector<std::unique_ptr<ThreadPoolInternal>>* inner_pools) {
+    const size_t num_hw_threads = std::thread::hardware_concurrency();
+    const size_t num_threads = NumOuterThreads(num_hw_threads, num_tasks);
+    const size_t num_inner = NumInnerThreads(num_hw_threads, num_threads);
+
+    fprintf(stderr,
+            "%" PRIuS " total threads, %" PRIuS " tasks, %" PRIuS
+            " threads, %" PRIuS " inner threads\n",
+            num_hw_threads, num_tasks, num_threads, num_inner);
+
+    pool->reset(new ThreadPoolInternal(num_threads));
+    // Main thread OR worker threads in pool each get a possibly empty nested
+    // pool (helps use all available cores when #tasks < #threads)
+    for (size_t i = 0; i < std::max<size_t>(num_threads, 1); ++i) {
+      inner_pools->emplace_back(new ThreadPoolInternal(num_inner));
+    }
+  }
+
+  static StringVec GetMethods() {
+    StringVec methods = SplitString(Args()->codec, ',');
+    for (auto it = methods.begin(); it != methods.end();) {
+      if (it->empty()) {
+        it = methods.erase(it);
+      } else {
+        ++it;
+      }
+    }
+    return methods;
+  }
+
+  static StringVec GetExtraMetricsNames() {
+    StringVec metrics = SplitString(Args()->extra_metrics, ',');
+    for (auto it = metrics.begin(); it != metrics.end();) {
+      if (it->empty()) {
+        it = metrics.erase(it);
+      } else {
+        *it = SplitString(*it, ':')[0];
+        ++it;
+      }
+    }
+    return metrics;
+  }
+
+  static StringVec GetExtraMetricsCommands() {
+    StringVec metrics = SplitString(Args()->extra_metrics, ',');
+    for (auto it = metrics.begin(); it != metrics.end();) {
+      if (it->empty()) {
+        it = metrics.erase(it);
+      } else {
+        auto s = SplitString(*it, ':');
+        JXL_CHECK(s.size() == 2);
+        *it = s[1];
+        ++it;
+      }
+    }
+    return metrics;
+  }
+
+  static StringVec SampleFromInput(const StringVec& fnames,
+                                   const std::string& sample_tmp_dir,
+                                   int num_samples, size_t size) {
+    JXL_CHECK(!sample_tmp_dir.empty());
+    fprintf(stderr, "Creating samples of %" PRIuS "x%" PRIuS " tiles...\n",
+            size, size);
+    StringVec fnames_out;
+    std::vector<Image3F> images;
+    std::vector<size_t> offsets;
+    size_t total_num_tiles = 0;
+    for (const auto& fname : fnames) {
+      Image3F img;
+      JXL_CHECK(ReadPNG(fname, &img));
+      JXL_CHECK(img.xsize() >= size);
+      JXL_CHECK(img.ysize() >= size);
+      total_num_tiles += (img.xsize() - size + 1) * (img.ysize() - size + 1);
+      offsets.push_back(total_num_tiles);
+      images.emplace_back(std::move(img));
+    }
+    JXL_CHECK(MakeDir(sample_tmp_dir));
+    Rng rng(0);
+    for (int i = 0; i < num_samples; ++i) {
+      int val = rng.UniformI(0, offsets.back());
+      size_t idx = (std::lower_bound(offsets.begin(), offsets.end(), val) -
+                    offsets.begin());
+      JXL_CHECK(idx < images.size());
+      const Image3F& img = images[idx];
+      int x0 = rng.UniformI(0, img.xsize() - size);
+      int y0 = rng.UniformI(0, img.ysize() - size);
+      Image3F sample(size, size);
+      for (size_t c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < size; ++y) {
+          const float* JXL_RESTRICT row_in = img.PlaneRow(c, y0 + y);
+          float* JXL_RESTRICT row_out = sample.PlaneRow(c, y);
+          memcpy(row_out, &row_in[x0], size * sizeof(row_out[0]));
+        }
+      }
+      std::string fn_output =
+          StringPrintf("%s/%s.crop_%dx%d+%d+%d.png", sample_tmp_dir.c_str(),
+                       FileBaseName(fnames[idx]).c_str(), size, size, x0, y0);
+      ThreadPool* null_pool = nullptr;
+      JXL_CHECK(WriteImage(std::move(sample), null_pool, fn_output));
+      fnames_out.push_back(fn_output);
+    }
+    fprintf(stderr, "Created %d sample tiles\n", num_samples);
+    return fnames_out;
+  }
+
+  static StringVec GetFilenames() {
+    StringVec fnames;
+    JXL_CHECK(MatchFiles(Args()->input, &fnames));
+    if (fnames.empty()) {
+      JXL_ABORT("No input file matches pattern: '%s'", Args()->input.c_str());
+    }
+    if (Args()->print_details) {
+      std::sort(fnames.begin(), fnames.end());
+    }
+
+    if (Args()->num_samples > 0) {
+      fnames = SampleFromInput(fnames, Args()->sample_tmp_dir,
+                               Args()->num_samples, Args()->sample_dimensions);
+    }
+    return fnames;
+  }
+
+  // (Load only once, not for every codec)
+  static std::vector<CodecInOut> LoadImages(const StringVec& fnames,
+                                            ThreadPool* pool) {
+    std::vector<CodecInOut> loaded_images;
+    loaded_images.resize(fnames.size());
+    const auto process_image = [&](const uint32_t task, size_t /*thread*/) {
+      const size_t i = static_cast<size_t>(task);
+      Status ok = true;
+
+      if (!Args()->decode_only) {
+        PaddedBytes encoded;
+        ok = ReadFile(fnames[i], &encoded);
+        if (ok) {
+          ok = jxl::SetFromBytes(Span<const uint8_t>(encoded),
+                                 Args()->color_hints, &loaded_images[i]);
+        }
+        if (ok && Args()->intensity_target != 0) {
+          loaded_images[i].metadata.m.SetIntensityTarget(
+              Args()->intensity_target);
+        }
+      }
+      if (!ok) {
+        if (!Args()->silent_errors) {
+          fprintf(stderr, "Failed to load image %s\n", fnames[i].c_str());
+        }
+        return;
+      }
+
+      if (!Args()->decode_only && Args()->override_bitdepth != 0) {
+        if (Args()->override_bitdepth == 32) {
+          loaded_images[i].metadata.m.SetFloat32Samples();
+        } else {
+          loaded_images[i].metadata.m.SetUintSamples(Args()->override_bitdepth);
+        }
+      }
+    };
+    JXL_CHECK(jxl::RunOnPool(pool, 0, static_cast<uint32_t>(fnames.size()),
+                             ThreadPool::NoInit, process_image, "Load images"));
+    return loaded_images;
+  }
+
+  static std::vector<Task> CreateTasks(const StringVec& methods,
+                                       const StringVec& fnames) {
+    std::vector<Task> tasks;
+    tasks.reserve(methods.size() * fnames.size());
+    for (size_t idx_image = 0; idx_image < fnames.size(); ++idx_image) {
+      for (size_t idx_method = 0; idx_method < methods.size(); ++idx_method) {
+        tasks.emplace_back();
+        Task& t = tasks.back();
+        t.codec = CreateImageCodec(methods[idx_method]);
+        t.idx_image = idx_image;
+        t.idx_method = idx_method;
+        // t.stats is default-initialized.
+      }
+    }
+    JXL_ASSERT(tasks.size() == tasks.capacity());
+    return tasks;
+  }
+
+  // Return the total number of errors.
+  static size_t RunTasks(
+      const StringVec& methods, const StringVec& extra_metrics_names,
+      const StringVec& extra_metrics_commands, const StringVec& fnames,
+      const std::vector<CodecInOut>& loaded_images, ThreadPool* pool,
+      const std::vector<std::unique_ptr<ThreadPoolInternal>>& inner_pools,
+      std::vector<Task>* tasks) {
+    StatPrinter printer(methods, extra_metrics_names, fnames, *tasks);
+    if (Args()->print_details_csv) {
+      // Print CSV header
+      printf(
+          "method,image,error,size,pixels,enc_speed,dec_speed,"
+          "bpp,dist,psnr,p,bppp,qabpp");
+      for (const std::string& s : extra_metrics_names) {
+        printf(",%s", s.c_str());
+      }
+      printf("\n");
+    }
+
+    std::vector<uint64_t> errors_thread;
+    JXL_CHECK(jxl::RunOnPool(
+        pool, 0, tasks->size(),
+        [&](const size_t num_threads) {
+          // Reduce false sharing by only writing every 8th slot (64 bytes).
+          errors_thread.resize(8 * num_threads);
+          return true;
+        },
+        [&](const uint32_t i, const size_t thread) {
+          Task& t = (*tasks)[i];
+          const CodecInOut& image = loaded_images[t.idx_image];
+          t.image = &image;
+          std::vector<uint8_t> compressed;
+          DoCompress(fnames[t.idx_image], image, extra_metrics_commands,
+                     t.codec.get(), &*inner_pools[thread], &compressed,
+                     &t.stats);
+          printer.TaskDone(i, t);
+          errors_thread[8 * thread] += t.stats.total_errors;
+        },
+        "Benchmark tasks"));
+    if (Args()->show_progress) fprintf(stderr, "\n");
+    return std::accumulate(errors_thread.begin(), errors_thread.end(),
+                           size_t(0));
+  }
+};
+
+int BenchmarkMain(int argc, const char** argv) {
+  fprintf(stderr, "benchmark_xl %s\n",
+          jpegxl::tools::CodecConfigString(JxlDecoderVersion()).c_str());
+
+  JXL_CHECK(Args()->AddCommandLineOptions());
+
+  if (!Args()->Parse(argc, argv)) {
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return 1;
+  }
+
+  if (Args()->cmdline.HelpFlagPassed()) {
+    Args()->PrintHelp();
+    return 0;
+  }
+  if (!Args()->ValidateArgs()) {
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return 1;
+  }
+  return Benchmark::Run();
+}
+
+}  // namespace
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, const char** argv) {
+  return jpegxl::tools::BenchmarkMain(argc, argv);
+}
diff --git a/third-party/libjxl/libjxl/tools/benchmark/hm/README.md b/third-party/libjxl/libjxl/tools/benchmark/hm/README.md
new file mode 100644
index 0000000000..e54904eff9
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/hm/README.md
@@ -0,0 +1,12 @@
+This directory contains encoding and decoding scripts for HEVC, for use with
+the benchmark custom codec. They use the HEVC reference encoder at https://hevc.hhi.fraunhofer.de/svn/svn_HEVCSoftware/
+and require the `TAppEncoderHighBitDepthStatic` and
+`TAppDecoderHighBitDepthStatic` binaries to be placed in this directory.
+
+Example usage, for encoding at QP = 30:
+
+```
+tools/benchmark_xl --input=image.png --codec='custom:bin:.../tools/benchmark/hm/encode.sh:.../tools/benchmark/hm/decode.sh:-q:30'
+```
+
+The paths to the encode and decode scripts should be adjusted as necessary.
diff --git a/third-party/libjxl/libjxl/tools/benchmark/hm/decode.sh b/third-party/libjxl/libjxl/tools/benchmark/hm/decode.sh
new file mode 100755
index 0000000000..624c8ba729
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/hm/decode.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -euo pipefail
+
+decoder="$(dirname "$0")"/TAppDecoderHighBitDepthStatic
+
+usage() {
+  echo "$0 [-v] <input.bin> <output.png>" >&2
+  exit 1
+}
+
+verbose=0
+
+while getopts ':hv' arg; do
+  case "$arg" in
+    h)
+      usage
+      ;;
+
+    v)
+      verbose=1
+      ;;
+
+    \?)
+      echo "Unrecognized option -$OPTARG" >&2
+      exit 1
+      ;;
+  esac
+done
+shift $((OPTIND-1))
+
+if [ $# -lt 2 ]; then
+  usage
+fi
+
+run() {
+  if [ "$verbose" -eq 1 ]; then
+    "$@"
+  else
+    "$@" > /dev/null 2>&1
+  fi
+}
+
+input="$1"
+output="$2"
+
+bin="$(mktemp)"
+yuv="$(mktemp)"
+width_file="$(mktemp)"
+height_file="$(mktemp)"
+icc_file="$(mktemp --suffix=.icc)"
+
+cleanup() {
+  rm -- "$bin" "$yuv" "$width_file" "$height_file" "$icc_file"
+}
+trap cleanup EXIT
+
+unpack_program="$(cat <<'END'
+  use File::Copy;
+  my ($input, $bin, $width_file, $height_file, $icc_file) = @ARGV;
+  open my $input_fh, '<:raw', $input;
+  sysread($input_fh, my $size, 8) == 8 or die;
+  my ($width, $height) = unpack 'NN', $size;
+  open my $width_fh, '>', $width_file;
+  print {$width_fh} "$width\n";
+  open my $height_fh, '>', $height_file;
+  print {$height_fh} "$height\n";
+  sysread($input_fh, my $icc_size, 4) == 4 or die;
+  $icc_size = unpack 'N', $icc_size;
+  sysread($input_fh, my $icc_data, $icc_size) == $icc_size or die;
+  open my $icc_fh, '>', $icc_file;
+  print {$icc_fh} $icc_data;
+  copy $input_fh, $bin;
+END
+)"
+run perl -Mstrict -Mwarnings -Mautodie -e "$unpack_program" -- "$input" "$bin" "$width_file" "$height_file" "$icc_file"
+
+width="$(cat "$width_file")"
+height="$(cat "$height_file")"
+
+start="$EPOCHREALTIME"
+run "$decoder" --OutputBitDepth=10 -b "$bin" -o "$yuv"
+end="$EPOCHREALTIME"
+
+elapsed="$(echo "$end - $start" | bc)"
+run echo "Completed in $elapsed seconds"
+
+echo "$elapsed" > "${output%.png}".time
+
+run ffmpeg -hide_banner -f rawvideo -vcodec rawvideo -s "${width}x$height" -r 25 -pix_fmt yuv444p10le -i "$yuv" -pix_fmt rgb24 -vf scale=in_color_matrix=bt709 -y "$output"
+if [ -s "$icc_file" ]; then
+  mogrify -profile "$icc_file" "$output"
+fi
diff --git a/third-party/libjxl/libjxl/tools/benchmark/hm/encode.sh b/third-party/libjxl/libjxl/tools/benchmark/hm/encode.sh
new file mode 100755
index 0000000000..319ba6953c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/hm/encode.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -euo pipefail
+
+encoder="$(dirname "$0")"/TAppEncoderHighBitDepthStatic
+cfg_dir="$(dirname "$0")"/../../../third_party/HEVCSoftware/cfg
+
+usage() {
+  echo "$0 [-v] [-q <N>] <input.png> <output.bin>" >&2
+  exit 1
+}
+
+q=27
+verbose=0
+
+while getopts ':hq:v' arg; do
+  case "$arg" in
+    h)
+      usage
+      ;;
+
+    q)
+      q="$OPTARG"
+      ;;
+
+    v)
+      verbose=1
+      ;;
+
+    \?)
+      echo "Unrecognized option -$OPTARG" >&2
+      exit 1
+      ;;
+  esac
+done
+shift $((OPTIND-1))
+
+if [ $# -lt 2 ]; then
+  usage
+fi
+
+run() {
+  if [ "$verbose" -eq 1 ]; then
+    "$@"
+  else
+    "$@" > /dev/null 2>&1
+  fi
+}
+
+input="$1"
+output="$2"
+
+yuv="$(mktemp)"
+bin="$(mktemp)"
+
+to_clean=("$yuv" "$bin")
+cleanup() {
+  rm -- "${to_clean[@]}"
+}
+trap cleanup EXIT
+
+run ffmpeg -hide_banner -i "$input" -pix_fmt yuv444p10le -vf scale=out_color_matrix=bt709 -color_primaries bt709 -color_trc bt709 -colorspace bt709 -f rawvideo -y "$yuv"
+
+width="$(identify -format '%w' "$input")"
+height="$(identify -format '%h' "$input")"
+
+start="$EPOCHREALTIME"
+run "$encoder" -c "$cfg_dir"/encoder_intra_main_scc_10.cfg -f 1 -fr 1 -wdt "$width" -hgt "$height" --InputChromaFormat=444 --InputBitDepth=10 --ConformanceWindowMode=1 -i "$yuv" -b "$bin" -q "$q"
+end="$EPOCHREALTIME"
+
+elapsed="$(echo "$end - $start" | bc)"
+run echo "Completed in $elapsed seconds"
+
+echo "$elapsed" > "${output%.bin}".time
+
+icc="${output%.*}.icc"
+if run convert "$input" "$icc"; then
+  to_clean+=("$icc")
+fi
+
+pack_program="$(cat <<'END'
+  use File::Copy;
+  use IO::Handle;
+  my ($width, $height, $bin, $icc, $output) = @ARGV;
+  open my $output_fh, '>:raw', $output;
+  syswrite $output_fh, pack 'NN', $width, $height;
+  syswrite $output_fh, pack 'N', -s $icc;
+  copy $icc, $output_fh;
+  copy $bin, $output_fh;
+END
+)"
+run perl -Mstrict -Mwarnings -Mautodie -e "$pack_program" -- "$width" "$height" "$bin" "$icc" "$output"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-hdrvdp.m b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-hdrvdp.m
new file mode 100644
index 0000000000..60e40bf32f
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-hdrvdp.m
@@ -0,0 +1,17 @@
+% Copyright (c) the JPEG XL Project Authors. All rights reserved.
+%
+% Use of this source code is governed by a BSD-style
+% license that can be found in the LICENSE file.
+
+pkg load image;
+
+args = argv();
+
+original_filename = args{1};
+decoded_filename = args{2};
+
+original = pfs_read_luminance(original_filename);
+decoded = pfs_read_luminance(decoded_filename);
+
+res = hdrvdp(decoded, original, 'luminance', 30, {});
+printf("%f\n", res.Q);
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-pumetrics.m b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-pumetrics.m
new file mode 100644
index 0000000000..df0fe4bd0e
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute-pumetrics.m
@@ -0,0 +1,26 @@
+% Copyright (c) the JPEG XL Project Authors. All rights reserved.
+%
+% Use of this source code is governed by a BSD-style
+% license that can be found in the LICENSE file.
+
+pkg load image;
+
+args = argv();
+
+metric = args{1};
+original_filename = args{2};
+decoded_filename = args{3};
+
+original = pfs_read_luminance(original_filename);
+decoded = pfs_read_luminance(decoded_filename);
+
+switch (metric)
+  case "psnr"
+    res = qm_pu2_psnr(original, decoded);
+  case "ssim"
+    res = qm_pu2_ssim(original, decoded);
+  otherwise
+    error(sprintf("unrecognized metric %s", metric));
+end
+
+printf("%f\n", res);
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/compute_octave_metric.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute_octave_metric.sh
new file mode 100755
index 0000000000..a31c266592
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/compute_octave_metric.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Usage: ./compute-octave-metric.sh <original> <decoded> <output> <intensity_target> [octave args...]
+# Where octave args do not need to contain -qf or the path to the original and decoded images.
+
+set -euo pipefail
+
+original="$1"
+decoded="$2"
+output="$3"
+intensity_target="$4"
+shift 4
+
+tmpdir="$(mktemp --directory)"
+
+linearized_original="$(mktemp --tmpdir="$tmpdir" --suffix='.pfm')"
+linearized_decoded="$(mktemp --tmpdir="$tmpdir" --suffix='.pfm')"
+
+cleanup() {
+  rm -- "$linearized_original" "$linearized_decoded"
+  rmdir --ignore-fail-on-non-empty -- "$tmpdir"
+}
+trap cleanup EXIT
+
+linearize() {
+  local input="$1"
+  local output="$2"
+  convert "$input" -set colorspace sRGB -colorspace RGB -evaluate multiply "$intensity_target" "$output"
+}
+
+linearize "$original" "$linearized_original"
+linearize "$decoded" "$linearized_decoded"
+
+octave -qf "$@" \
+  "$linearized_original" "$linearized_decoded" \
+  2> /dev/null \
+  > "$output"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/dists-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/dists-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/dists-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-y.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-y.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/fsim-y.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/gmsd-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/gmsd-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/gmsd-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/hdr_plots.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdr_plots.sh
new file mode 100755
index 0000000000..4ce5d9fc4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdr_plots.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"$(dirname "$0")/run_all_hdr_metrics.sh" "$@" | sed -n '/```/q;p' > hdr_results.csv
+mkdir -p hdr_plots/
+rm -rf hdr_plots/*
+python3 "$(dirname "$0")/plots.py" hdr_results.csv hdr_plots
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp-fixes.patch b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp-fixes.patch
new file mode 100644
index 0000000000..23f3f17b6d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp-fixes.patch
@@ -0,0 +1,110 @@
+From 44a21be2c4de409f80d90cbcc2c20cb3f42e859e Mon Sep 17 00:00:00 2001
+From: Sami Boukortt <sboukortt@google.com>
+Date: Fri, 16 Oct 2020 20:01:02 +0200
+Subject: [PATCH] Fixes for Octave
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+----
+
+ifft2: https://savannah.gnu.org/bugs/?43742
+
+Removing #include <matrix.h>: https://octave.org/doc/v5.2.0/Getting-Started-with-Mex_002dFiles.html
+“One important difference between Octave and MATLAB is that the header
+"matrix.h" is implicitly included through the inclusion of "mex.h".”
+
+Length checks: it appears that functions(…).file for MEX files in Octave
+is empty.
+---
+ fast_conv_fft.m                          | 2 +-
+ matlabPyrTools_1.4_fixed/MEX/corrDn.c    | 1 -
+ matlabPyrTools_1.4_fixed/MEX/pointOp.c   | 1 -
+ matlabPyrTools_1.4_fixed/MEX/upConv.c    | 1 -
+ matlabPyrTools_1.4_fixed/reconSpyr.m     | 2 +-
+ matlabPyrTools_1.4_fixed/reconSpyrLevs.m | 2 +-
+ 6 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/fast_conv_fft.m b/fast_conv_fft.m
+index 65ceef8..b89e54b 100644
+--- a/fast_conv_fft.m
++++ b/fast_conv_fft.m
+@@ -16,7 +16,7 @@ pad_size = (size(fH)-size(X));
+ 
+ fX = fft2( padarray( X, pad_size, pad_value, 'post' ) );
+ 
+-Yl = real(ifft2( fX.*fH, size(fX,1), size(fX,2), 'symmetric' ));
++Yl = real(ifft2( fX.*fH, size(fX,1), size(fX,2)));
+ 
+ Y = Yl(1:size(X,1),1:size(X,2));
+ 
+diff --git a/matlabPyrTools_1.4_fixed/MEX/corrDn.c b/matlabPyrTools_1.4_fixed/MEX/corrDn.c
+index d02e272..17e739e 100755
+--- a/matlabPyrTools_1.4_fixed/MEX/corrDn.c
++++ b/matlabPyrTools_1.4_fixed/MEX/corrDn.c
+@@ -6,7 +6,6 @@ RES = corrDn(IM, FILT, EDGES, STEP, START, STOP);
+ */
+ 
+ #define V4_COMPAT
+-#include <matrix.h>  /* Matlab matrices */
+ #include <mex.h>
+ 
+ #include "convolve.h"
+diff --git a/matlabPyrTools_1.4_fixed/MEX/pointOp.c b/matlabPyrTools_1.4_fixed/MEX/pointOp.c
+index 3623a02..e553adf 100755
+--- a/matlabPyrTools_1.4_fixed/MEX/pointOp.c
++++ b/matlabPyrTools_1.4_fixed/MEX/pointOp.c
+@@ -5,7 +5,6 @@ RES = pointOp(IM, LUT, ORIGIN, INCREMENT, WARNINGS)
+ */
+ 
+ #define V4_COMPAT
+-#include <matrix.h>  /* Matlab matrices */
+ #include <mex.h>
+ 
+ #include <stddef.h>  /* NULL */
+diff --git a/matlabPyrTools_1.4_fixed/MEX/upConv.c b/matlabPyrTools_1.4_fixed/MEX/upConv.c
+index 98a2bec..08fdf75 100755
+--- a/matlabPyrTools_1.4_fixed/MEX/upConv.c
++++ b/matlabPyrTools_1.4_fixed/MEX/upConv.c
+@@ -6,7 +6,6 @@ RES = upConv(IM, FILT, EDGES, STEP, START, STOP, RES);
+ */
+ 
+ #define V4_COMPAT
+-#include <matrix.h>  /* Matlab matrices */
+ #include <mex.h>
+ 
+ #include "convolve.h"
+diff --git a/matlabPyrTools_1.4_fixed/reconSpyr.m b/matlabPyrTools_1.4_fixed/reconSpyr.m
+index 05eeafb..1440d8a 100644
+--- a/matlabPyrTools_1.4_fixed/reconSpyr.m
++++ b/matlabPyrTools_1.4_fixed/reconSpyr.m
+@@ -31,7 +31,7 @@ function res = reconSpyr(pyr, pind, filtfile, edges, levs, bands)
+ % Deterimine whether a MEX version of upConv is available
+ is_mex = true;
+ finfo = functions( @upConv );
+-if( strcmp( finfo.file((end-2):end), '.m') )
++if( length(finfo.file) > 2 && strcmp( finfo.file((end-2):end), '.m') )
+     is_mex = false;
+ end
+ 
+diff --git a/matlabPyrTools_1.4_fixed/reconSpyrLevs.m b/matlabPyrTools_1.4_fixed/reconSpyrLevs.m
+index ac5e2b1..d3b91d5 100644
+--- a/matlabPyrTools_1.4_fixed/reconSpyrLevs.m
++++ b/matlabPyrTools_1.4_fixed/reconSpyrLevs.m
+@@ -11,7 +11,7 @@ function res = reconSpyrLevs(pyr,pind,lofilt,bfilts,edges,levs,bands)
+ % Deterimine whether MEX version of upConv is available
+ is_mex = true;
+ finfo = functions( @upConv );
+-if( strcmp( finfo.file((end-2):end), '.m') )
++if( length(finfo.file) > 2 && strcmp( finfo.file((end-2):end), '.m') )
+     is_mex = false;
+ end
+ 
+-- 
+2.28.0
+
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp.sh
new file mode 100755
index 0000000000..659ab85308
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/hdrvdp.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"$(dirname "$0")"/compute_octave_metric.sh "$@" \
+  --path "$(dirname "$0")"/../../../third_party/hdrvdp-2.2.2/ \
+  "$(dirname "$0")"/compute-hdrvdp.m
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa.py b/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa.py
new file mode 100644
index 0000000000..1be9699926
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+import os
+import sys
+import pathlib
+import torch
+from torchvision import transforms
+import numpy as np
+
+path = pathlib.Path(__file__).parent.absolute(
+) / '..' / '..' / '..' / 'third_party' / 'IQA-optimization'
+sys.path.append(str(path))
+
+from IQA_pytorch import SSIM, MS_SSIM, CW_SSIM, GMSD, LPIPSvgg, DISTS, NLPD, FSIM, VSI, VIFs, VIF, MAD
+
+
+# only really works with the output from JXL, but we don't need more than that.
+def read_pfm(fname):
+    with open(fname, 'rb') as f:
+        header_width_height = []
+        while len(header_width_height) < 3:
+            header_width_height += f.readline().rstrip().split()
+        header, width, height = header_width_height
+        assert header == b'PF' or header == b'Pf'
+        width, height = int(width), int(height)
+        scale = float(f.readline().rstrip())
+        fmt = '<f' if scale < 0 else '>f'
+        data = np.fromfile(f, fmt)
+        if header == b'PF':
+            out = np.reshape(data, (height, width, 3))[::-1, :, :]
+        else:
+            out = np.reshape(data, (height, width))[::-1, :]
+        return out.astype(np.float)
+
+
+D_dict = {
+    'cwssim': CW_SSIM,
+    'dists': DISTS,
+    'fsim': FSIM,
+    'gmsd': GMSD,
+    'lpips': LPIPSvgg,
+    'mad': MAD,
+    'msssim': MS_SSIM,
+    'nlpd': NLPD,
+    'ssim': SSIM,
+    'vif': VIF,
+    'vsi': VSI,
+}
+
+algo = os.path.basename(sys.argv[1]).split('.')[0]
+algo, color = algo.split('-')
+
+channels = 3
+
+if color == 'y':
+    channels = 1
+
+
+def Load(path):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+    ])
+    img = read_pfm(path)
+    if len(img.shape) == 3 and channels == 1:  # rgb -> Y
+        assert img.shape[2] == 3
+        tmp = np.zeros((img.shape[0], img.shape[1], 1), dtype=float)
+        tmp[:, :, 0] = (0.2126 * img[:, :, 0] + 0.7152 * img[:, :, 1] +
+                        0.0722 * img[:, :, 2])
+        img = tmp
+    if len(img.shape) == 2 and channels == 3:  # Y -> rgb
+        gray = img
+        img = np.zeros((img.shape[0], img.shape[1], 3), dtype=float)
+        img[:, :, 0] = img[:, :, 1] = img[:, :, 2] = gray
+    if len(img.shape) == 3:
+        img = np.transpose(img, axes=(2, 0, 1)).copy()
+    return torch.FloatTensor(img).unsqueeze(0).to(device)
+
+
+ref_img = Load(sys.argv[2])
+enc_img = Load(sys.argv[3])
+D = D_dict[algo](channels=channels)
+score = D(ref_img, enc_img, as_loss=False)
+
+with open(sys.argv[4], 'w') as f:
+    print(score.item(), file=f)
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa_wrapper.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa_wrapper.sh
new file mode 100755
index 0000000000..1d179fdedc
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/iqa_wrapper.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+python3 "$(dirname "$0")/iqa.py" "$0" "$@" 
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/lpips-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/lpips-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/lpips-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/mrse.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/mrse.sh
new file mode 100755
index 0000000000..54d18d6fe0
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/mrse.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -euo pipefail
+
+original="$1"
+decoded="$2"
+output="$3"
+intensity_target="$4"
+
+tmpdir="$(mktemp --directory)"
+
+linearized_original="$(mktemp --tmpdir="$tmpdir" --suffix='.pfm')"
+linearized_decoded="$(mktemp --tmpdir="$tmpdir" --suffix='.pfm')"
+
+cleanup() {
+  rm -- "$linearized_original" "$linearized_decoded"
+  rmdir --ignore-fail-on-non-empty -- "$tmpdir"
+}
+trap cleanup EXIT
+
+linearize() {
+  local input="$1"
+  local output="$2"
+  convert "$input" -set colorspace sRGB -colorspace RGB -evaluate multiply "$intensity_target" "$output"
+}
+
+linearize "$original" "$linearized_original"
+linearize "$decoded" "$linearized_decoded"
+
+"$(dirname "$0")"/../../../third_party/difftest_ng/difftest_ng --mrse "$linearized_original" "$linearized_decoded" \
+  | sed -e 's/^MRSE:\s*//' \
+  > "$output"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-y.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-y.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/msssim-y.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/nlpd-y.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/nlpd-y.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/nlpd-y.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/plots.py b/third-party/libjxl/libjxl/tools/benchmark/metrics/plots.py
new file mode 100755
index 0000000000..04b2bb24e5
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/plots.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+import csv
+import sys
+import math
+import plotly.graph_objects as go
+
+_, results, output_dir, *rest = sys.argv
+OUTPUT = rest[0] if rest else 'svg'
+# valid values: html, svg, png, webp, jpeg, pdf
+
+with open(results, 'r') as f:
+    reader = csv.DictReader(f)
+    all_results = list(reader)
+
+nonmetric_columns = set([
+    "method", "image", "error", "size", "pixels", "enc_speed", "dec_speed",
+    "bpp", "bppp", "qabpp"
+])
+
+metrics = set(all_results[0].keys()) - nonmetric_columns
+
+
+def codec(method):
+    sm = method.split(':')
+    ssm = set(sm)
+    speeds = set([
+        'kitten', 'falcon', 'wombat', 'cheetah', 'tortoise', 'squirrel',
+        'hare', 'fast'
+    ])
+    s = speeds.intersection(ssm)
+    if sm[0] == 'custom':
+        return sm[1]
+    if sm[0] == 'jxl' and s:
+        return 'jxl-' + list(s)[0]
+    return sm[0]
+
+
+data = {(m, img): {c: []
+                   for c in {codec(x['method'])
+                             for x in all_results}}
+        for m in metrics for img in {x['image']
+                                     for x in all_results}}
+
+for r in all_results:
+    c = codec(r['method'])
+    img = r['image']
+    bpp = r['bpp']
+    for m in metrics:
+        data[(m, img)][c].append((float(bpp), float(r[m])))
+
+
+def pos(codec):
+    if 'jxl-dis' in codec:
+        return 6, codec
+    elif 'jxl' in codec:
+        return 7, codec
+    elif 'avif' in codec:
+        return 5, codec
+    elif 'kdu' in codec:
+        return 4, codec
+    elif 'heif' in codec:
+        return 3, codec
+    elif 'fuif' in codec or 'pik' in codec:
+        return 2, codec
+    elif 'jpg' in codec or 'jpeg' in codec or 'web' in codec:
+        return 1, codec
+    else:
+        return 0, codec
+
+
+def style(codec):
+    configs = {
+        'jxl-cheetah': {
+            'color': '#e41a1c',
+            'dash': '1px, 1px',
+            'width': 2
+        },
+        'jxl-wombat': {
+            'color': '#e41a1c',
+            'dash': '2px, 2px',
+            'width': 2
+        },
+        'jxl-squirrel': {
+            'color': '#e41a1c',
+            'dash': '5px, 5px',
+            'width': 2
+        },
+        'jxl-kitten': {
+            'color': '#e41a1c',
+            'width': 2
+        },
+        'jxl-dis-cheetah': {
+            'color': '#377eb8',
+            'dash': '1px, 1px',
+            'width': 2
+        },
+        'jxl-dis-wombat': {
+            'color': '#377eb8',
+            'dash': '2px, 2px',
+            'width': 2
+        },
+        'jxl-dis-squirrel': {
+            'color': '#377eb8',
+            'dash': '5px, 5px',
+            'width': 2
+        },
+        'jxl-dis-kitten': {
+            'color': '#377eb8',
+            'width': 2
+        },
+        'rav1e.avif': {
+            'color': '#4daf4a',
+            'dash': '3px, 3px',
+            'width': 2
+        },
+        '420.rav1e.avif': {
+            'color': '#4daf4a',
+            'dash': '1px, 1px',
+            'width': 2
+        },
+        '444.rav1e.avif': {
+            'color': '#4daf4a',
+            'dash': '3px, 3px',
+            'width': 2
+        },
+        'psnr.420.aom.avif': {
+            'color': '#4daf4a',
+            'dash': '5px, 5px',
+            'width': 2
+        },
+        'psnr.444.aom.avif': {
+            'color': '#4daf4a',
+            'dash': '7px, 7px',
+            'width': 2
+        },
+        'ssim.420.aom.avif': {
+            'color': '#4daf4a',
+            'dash': '9px, 9px',
+            'width': 2
+        },
+        'ssim.444.aom.avif': {
+            'color': '#4daf4a',
+            'width': 2
+        },
+        'heif': {
+            'color': '#984ea3',
+            'width': 2
+        },
+        'fuif': {
+            'color': '#ff7f00',
+            'dash': '2px, 2px',
+            'width': 2
+        },
+        'pik-cfp': {
+            'color': '#ff7f00',
+            'width': 2
+        },
+        'pik-cfp-fast': {
+            'color': '#ff7f00',
+            'dash': '4px, 4px',
+            'width': 2
+        },
+        'webp': {
+            'color': '#000000',
+            'width': 2
+        },
+        'jpeg': {
+            'color': '#a65628',
+            'width': 2
+        },
+        'xt.jpg': {
+            'color': '#a65628',
+            'width': 2
+        },
+        'perc1.kdu.j2k': {
+            'color': '#f781bf',
+            'dash': '1px, 1px',
+            'width': 2
+        },
+        'perc2.kdu.j2k': {
+            'color': '#f781bf',
+            'dash': '3px, 3px',
+            'width': 2
+        },
+        'perc3.kdu.j2k': {
+            'color': '#f781bf',
+            'dash': '5px, 5px',
+            'width': 2
+        },
+        'perc4.kdu.j2k': {
+            'color': '#f781bf',
+            'dash': '7px, 7px',
+            'width': 2
+        },
+        'default.kdu.j2k': {
+            'color': '#f781bf',
+            'width': 2
+        },
+    }
+    return configs.get(codec, dict())
+
+
+visible_by_default = set([
+    'jxl-kitten', 'ssim.444.aom.avif', 'heif', 'webp', 'jpeg', 'xt.jpg',
+    'default.kdu.j2k'
+])
+
+column_remap = {
+    'p': '6-Butteraugli',
+    'dist': 'Max-Butteraugli',
+    'psnr': "PSNR-YUV 6/8 Y",
+    'MS-SSIM-Y': '-log10(1 - MS-SSIM-Y)',
+    'puSSIM': '-log10(1 - puSSIM)',
+    'FSIM-Y': '-log10(1 - FSIM-Y)',
+    'FSIM-RGB': '-log10(1 - FSIM-RGB)',
+    'VMAF': '-log10(1 - VMAF / 100)',
+}
+
+
+def remap(metric):
+    funs = {
+        'MS-SSIM-Y': lambda x: -math.log10(1 - x),
+        'puSSIM': lambda x: -math.log10(1 - x),
+        'FSIM-Y': lambda x: -math.log10(1 - x),
+        'FSIM-RGB': lambda x: -math.log10(1 - x),
+        'VMAF': lambda x: -math.log10(1 + 1e-8 - x / 100),
+    }
+    return funs.get(metric, lambda x: x)
+
+
+for (m, img) in data:
+    fname = "%s/%s_%s" % (output_dir, m, img)
+    fig = go.Figure()
+    for method in sorted(data[(m, img)].keys(), key=pos):
+        vals = data[(m, img)][method]
+        zvals = list(zip(*sorted(vals)))
+        if not zvals:
+            continue
+        fig.add_trace(
+            go.Scatter(x=zvals[0],
+                       y=[remap(m)(x) for x in zvals[1]],
+                       mode='lines',
+                       name=method,
+                       line=style(method),
+                       visible=True
+                       if method in visible_by_default else 'legendonly'))
+    fig.update_layout(title=img,
+                      xaxis_title='bpp',
+                      yaxis_title=column_remap.get(m, m))
+    fig.update_xaxes(type='log')
+    if OUTPUT == 'html':
+        fig.write_html(fname + '.html', include_plotlyjs='directory')
+    else:
+        fig.write_image(fname + '.' + OUTPUT, scale=4)
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/prepare_metrics.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/prepare_metrics.sh
new file mode 100755
index 0000000000..7ecfaaf194
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/prepare_metrics.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+
+main() {
+  cd "${MYDIR}/../../../third_party"
+  local zipurl
+  local repourl
+  for repourl in \
+    'https://github.com/veluca93/IQA-optimization.git' \
+    'https://github.com/Netflix/vmaf.git' \
+    'https://github.com/thorfdbg/difftest_ng.git'
+  do
+    local reponame=$(basename "${repourl%.git}")
+    local dirname=$(basename "${reponame}")
+    if [[ ! -e "${dirname}" ]]; then
+      git clone "${repourl}"
+    fi
+  done
+  for zipurl in \
+    'https://sourceforge.net/projects/hdrvdp/files/hdrvdp/2.2.2/hdrvdp-2.2.2.zip' \
+    'https://sourceforge.net/projects/hdrvdp/files/simple_metrics/1.0/hdr_metrics.zip'
+  do
+    local zipfile="$(basename "${zipurl}")"
+    local dirname="$(basename "${zipfile}" '.zip')"
+    rm -fr "${dirname}"
+    if [[ ! -e "${zipfile}" ]]; then
+      wget -O "${zipfile}.tmp" "${zipurl}"
+      mv "${zipfile}.tmp" "${zipfile}"
+    fi
+    unzip "${zipfile}" "${dirname}"/'*'
+  done
+
+  pushd hdrvdp-2.2.2
+  patch -p1 < ../../tools/benchmark/metrics/hdrvdp-fixes.patch
+  pushd matlabPyrTools_1.4_fixed
+  mkoctfile --mex MEX/corrDn.c MEX/convolve.c MEX/wrap.c MEX/edges.c
+  mkoctfile --mex MEX/pointOp.c
+  mkoctfile --mex MEX/upConv.c
+  popd
+  popd
+
+
+  pushd difftest_ng
+  ./configure
+  make
+  popd
+
+
+  pushd vmaf/libvmaf
+  rm -rf build
+  meson build --buildtype release
+  ninja -vC build
+  popd
+}
+main "$@"
+
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/pupsnr.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/pupsnr.sh
new file mode 100755
index 0000000000..869fc36173
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/pupsnr.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+./compute_octave_metric.sh "$@" \
+  --path "$(dirname "$0")"/../../../third_party/hdr_metrics/ \
+  "$(dirname "$0")"/compute-pumetrics.m 'psnr'
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/pussim.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/pussim.sh
new file mode 100755
index 0000000000..957cfa1dc1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/pussim.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+./compute_octave_metric.sh "$@" \
+  --path "$(dirname "$0")"/../../../third_party/hdr_metrics/ \
+  "$(dirname "$0")"/compute-pumetrics.m 'ssim'
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_hdr_metrics.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_hdr_metrics.sh
new file mode 100755
index 0000000000..5fb769d667
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_hdr_metrics.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+dir="$(dirname "$0")"
+
+main() {
+  local metrics=(
+    HDR-VDP:"${dir}"/hdrvdp.sh
+    MRSE:"${dir}"/mrse.sh
+    puPSNR:"${dir}"/pupsnr.sh
+    puSSIM:"${dir}"/pussim.sh
+  )
+
+  local metrics_args=$(printf '%s' "${metrics[@]/#/,}")
+  metrics_args=${metrics_args:1}
+
+
+  "${dir}/../../../build/tools/benchmark_xl" \
+    --print_details_csv \
+    --num_threads=32 \
+    --error_pnorm=6 \
+    --extra_metrics ${metrics_args} \
+    "$@"
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_sdr_metrics.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_sdr_metrics.sh
new file mode 100755
index 0000000000..def887b09e
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/run_all_sdr_metrics.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+dir="$(dirname "$0")"
+
+main() {
+  local metrics=(
+    FSIM-Y:"${dir}"/fsim-y.sh
+    FSIM-RGB:"${dir}"/fsim-rgb.sh
+    LPIPS:"${dir}"/lpips-rgb.sh
+    MS-SSIM-Y:"${dir}"/msssim-y.sh
+    NLPD:"${dir}"/nlpd-y.sh
+    SSIMULACRA:"${dir}"/ssimulacra.sh
+    VIF:"${dir}"/vif-rgb.sh
+    VMAF:"${dir}"/vmaf.sh
+  )
+  # other metrics, not in core experiments:
+#    VSI:"${dir}"/vsi-rgb.sh
+#    SSIM-RGB:"${dir}"/ssim-rgb.sh
+#    SSIM-Y:"${dir}"/ssim-y.sh
+#    GMSD:"${dir}"/gmsd.sh
+#    DISTS:"${dir}"/dists-rgb.sh
+#    MS-SSIM-RGB:"${dir}"/msssim-rgb.sh
+
+  local metrics_args=$(printf '%s' "${metrics[@]/#/,}")
+  metrics_args=${metrics_args:1}
+
+
+  "${dir}/../../../build/tools/benchmark_xl" \
+    --print_details_csv \
+    --num_threads=1 \
+    --error_pnorm=6 \
+    --extra_metrics ${metrics_args} \
+    "$@"
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/sdr_plots.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/sdr_plots.sh
new file mode 100755
index 0000000000..d97648e8f8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/sdr_plots.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"$(dirname "$0")/run_all_sdr_metrics.sh" "$@" | sed -n '/```/q;p' > sdr_results.csv
+mkdir -p sdr_plots/
+rm -rf sdr_plots/*
+python3 "$(dirname "$0")/plots.py" sdr_results.csv sdr_plots
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-y.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-y.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssim-y.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/ssimulacra.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssimulacra.sh
new file mode 100755
index 0000000000..65617d1c08
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/ssimulacra.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"$(dirname "$0")"/../../../build/tools/ssimulacra_main "$1" "$2" > "$3" 2>/dev/null
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/vif-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/vif-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/vif-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/vmaf.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/vmaf.sh
new file mode 100755
index 0000000000..ab406d011c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/vmaf.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -euo pipefail
+
+original="$1"
+decoded="$2"
+output="$3"
+
+tmpdir="$(mktemp --directory)"
+
+exr_original="$(mktemp --tmpdir="$tmpdir" --suffix='.exr')"
+exr_decoded="$(mktemp --tmpdir="$tmpdir" --suffix='.exr')"
+
+yuv_original="$(mktemp --tmpdir="$tmpdir" --suffix='.yuv')"
+yuv_decoded="$(mktemp --tmpdir="$tmpdir" --suffix='.yuv')"
+
+vmaf_csv="$(mktemp --tmpdir="$tmpdir" --suffix='.csv')"
+
+cleanup() {
+  rm -- "$exr_original" "$exr_decoded" "$yuv_original" "$yuv_decoded" "$vmaf_csv"
+  rmdir --ignore-fail-on-non-empty -- "$tmpdir"
+}
+trap cleanup EXIT
+
+convert "$original" "$exr_original"
+convert "$decoded" "$exr_decoded"
+
+srgb=(-colorspace bt709 -color_primaries bt709 -color_trc iec61966-2-1)
+ffmpeg "${srgb[@]}" -i "$exr_original" -pix_fmt yuv444p10le "${srgb[@]}" -y "$yuv_original" &>/dev/null
+ffmpeg "${srgb[@]}" -i "$exr_decoded" -pix_fmt yuv444p10le "${srgb[@]}" -y "$yuv_decoded" &>/dev/null
+
+"$(dirname "$0")"/../../../third_party/vmaf/libvmaf/build/tools/vmafossexec \
+  yuv444p10le \
+  "$(identify -format '%w' "$original")" "$(identify -format '%h' "$original")" \
+  "$yuv_original" "$yuv_decoded" \
+  "$(dirname "$0")/../../../third_party/vmaf/model/vmaf_v0.6.1.pkl" \
+  --log-fmt csv --log "$vmaf_csv" &>/dev/null
+
+read_csv="$(cat <<'END'
+import csv
+import sys
+reader = csv.DictReader(sys.stdin)
+for row in reader:
+  print(row['vmaf'])
+END
+)"
+
+python -c "$read_csv" < "$vmaf_csv" > "$output"
diff --git a/third-party/libjxl/libjxl/tools/benchmark/metrics/vsi-rgb.sh b/third-party/libjxl/libjxl/tools/benchmark/metrics/vsi-rgb.sh
new file mode 120000
index 0000000000..9e57c8f660
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/benchmark/metrics/vsi-rgb.sh
@@ -0,0 +1 @@
+iqa_wrapper.sh
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/box/CMakeLists.txt b/third-party/libjxl/libjxl/tools/box/CMakeLists.txt
new file mode 100644
index 0000000000..bed5f24f21
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/box/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+add_library(box STATIC EXCLUDE_FROM_ALL
+  box.cc
+  box.h
+)
+# This library can be included into position independent binaries.
+set_target_properties(box PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+jxl_link_libraries(box jxl_base-obj)
+target_include_directories(box
+  PRIVATE
+  "${PROJECT_SOURCE_DIR}"
+)
+
+if(JPEGXL_ENABLE_DEVTOOLS)
+add_executable(box_list
+  box_list_main.cc
+)
+target_link_libraries(box_list
+  box
+  ${ATOMICS_LIBRARIES}
+)
+set_target_properties(box_list PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}")
+endif()  # JPEGXL_ENABLE_DEVTOOLS
diff --git a/third-party/libjxl/libjxl/tools/box/box.cc b/third-party/libjxl/libjxl/tools/box/box.cc
new file mode 100644
index 0000000000..d581dc4f88
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/box/box.cc
@@ -0,0 +1,285 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/box/box.h"
+
+#include <string.h>
+
+#include "lib/jxl/base/byte_order.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return true;
+  if (pos < a) return true;  // overflow happened
+  return false;
+}
+}  // namespace
+
+// Parses the header of a BMFF box. Returns the result in a Box struct.
+// Sets the position to the end of the box header after parsing. The data size
+// is output if known, or must be handled by the caller and runs until the end
+// of the container file if not known.
+jxl::Status ParseBoxHeader(const uint8_t** next_in, size_t* available_in,
+                           Box* box) {
+  size_t pos = 0;
+  size_t size = *available_in;
+  const uint8_t* in = *next_in;
+
+  if (OutOfBounds(pos, 8, size)) return JXL_FAILURE("out of bounds");
+
+  const size_t initial_pos = pos;
+
+  // Total box_size including this header itself.
+  uint64_t box_size = LoadBE32(in + pos);
+  pos += 4;
+  if (box_size == 1) {
+    // If the size is 1, it indicates extended size read from 64-bit integer.
+    if (OutOfBounds(pos, 8, size)) return JXL_FAILURE("out of bounds");
+    box_size = LoadBE64(in + pos);
+    pos += 8;
+  }
+  memcpy(box->type, in + pos, 4);
+  pos += 4;
+  if (!memcmp("uuid", box->type, 4)) {
+    if (OutOfBounds(pos, 16, size)) return JXL_FAILURE("out of bounds");
+    memcpy(box->extended_type, in + pos, 16);
+    pos += 16;
+  }
+
+  // This is the end of the box header, the box data begins here. Handle
+  // the data size now.
+  const size_t data_pos = pos;
+  const size_t header_size = data_pos - initial_pos;
+
+  if (box_size != 0) {
+    if (box_size < header_size) {
+      return JXL_FAILURE("invalid box size");
+    }
+    box->data_size_given = true;
+    box->data_size = box_size - header_size;
+  } else {
+    // The size extends to the end of the file. We don't necessarily know the
+    // end of the file here, since the input size may be only part of the full
+    // container file. Indicate the size is not given, the caller must handle
+    // this.
+    box->data_size_given = false;
+    box->data_size = 0;
+  }
+
+  // The remaining bytes are the data. If the box is a full box, the first
+  // bytes of the data have a certain structure but this is to be handled by
+  // the caller for the appropriate box type.
+  *next_in += pos;
+  *available_in -= pos;
+
+  return true;
+}
+
+jxl::Status AppendBoxHeader(const Box& box, jxl::PaddedBytes* out) {
+  bool use_extended = !memcmp("uuid", box.type, 4);
+
+  uint64_t box_size = 0;
+  bool large_size = false;
+  if (box.data_size_given) {
+    box_size = box.data_size + 8 + (use_extended ? 16 : 0);
+    if (box_size >= 0x100000000ull) {
+      large_size = true;
+    }
+  }
+
+  out->resize(out->size() + 4);
+  StoreBE32(large_size ? 1 : box_size, &out->back() - 4 + 1);
+
+  out->resize(out->size() + 4);
+  memcpy(&out->back() - 4 + 1, box.type, 4);
+
+  if (large_size) {
+    out->resize(out->size() + 8);
+    StoreBE64(box_size, &out->back() - 8 + 1);
+  }
+
+  if (use_extended) {
+    out->resize(out->size() + 16);
+    memcpy(&out->back() - 16 + 1, box.extended_type, 16);
+  }
+
+  return true;
+}
+
+bool IsContainerHeader(const uint8_t* data, size_t size) {
+  const uint8_t box_header[] = {0,   0,   0,   0xc, 'J',  'X',
+                                'L', ' ', 0xd, 0xa, 0x87, 0xa};
+  if (size < sizeof(box_header)) return false;
+  return memcmp(box_header, data, sizeof(box_header)) == 0;
+}
+
+jxl::Status DecodeJpegXlContainerOneShot(const uint8_t* data, size_t size,
+                                         JpegXlContainer* container) {
+  const uint8_t* in = data;
+  size_t available_in = size;
+
+  container->exif = nullptr;
+  container->exif_size = 0;
+  container->exfc = nullptr;
+  container->exfc_size = 0;
+  container->xml.clear();
+  container->xmlc.clear();
+  container->jumb = nullptr;
+  container->jumb_size = 0;
+  container->codestream.clear();
+  container->jpeg_reconstruction = nullptr;
+  container->jpeg_reconstruction_size = 0;
+
+  size_t box_index = 0;
+
+  while (available_in != 0) {
+    Box box;
+    if (!ParseBoxHeader(&in, &available_in, &box)) {
+      return JXL_FAILURE("Invalid box header");
+    }
+
+    size_t data_size = box.data_size_given ? box.data_size : available_in;
+
+    if (box.data_size > available_in) {
+      return JXL_FAILURE("Unexpected end of file");
+    }
+
+    if (box_index == 0) {
+      // TODO(lode): leave out magic signature box?
+      // Must be magic signature box.
+      if (memcmp("JXL ", box.type, 4) != 0) {
+        return JXL_FAILURE("Invalid magic signature");
+      }
+      if (box.data_size != 4) return JXL_FAILURE("Invalid magic signature");
+      if (in[0] != 0xd || in[1] != 0xa || in[2] != 0x87 || in[3] != 0xa) {
+        return JXL_FAILURE("Invalid magic signature");
+      }
+    } else if (box_index == 1) {
+      // Must be ftyp box.
+      if (memcmp("ftyp", box.type, 4) != 0) {
+        return JXL_FAILURE("Invalid ftyp");
+      }
+      if (box.data_size != 12) return JXL_FAILURE("Invalid ftyp");
+      const char* expected = "jxl \0\0\0\0jxl ";
+      if (memcmp(expected, in, 12) != 0) return JXL_FAILURE("Invalid ftyp");
+    } else if (!memcmp("jxli", box.type, 4)) {
+      // TODO(lode): parse JXL frame index box
+      if (!container->codestream.empty()) {
+        return JXL_FAILURE("frame index must come before codestream");
+      }
+    } else if (!memcmp("jxlc", box.type, 4)) {
+      container->codestream.append(in, in + data_size);
+    } else if (!memcmp("jxlp", box.type, 4)) {
+      if (data_size < 4) return JXL_FAILURE("Invalid jxlp");
+      // TODO(jon): don't just ignore the counter
+      container->codestream.append(in + 4, in + data_size);
+    } else if (!memcmp("Exif", box.type, 4)) {
+      if (data_size < 4) return JXL_FAILURE("Invalid Exif");
+      uint32_t tiff_header_offset = LoadBE32(in);
+      if (tiff_header_offset > data_size - 4)
+        return JXL_FAILURE("Invalid Exif tiff header offset");
+      container->exif = in + 4 + tiff_header_offset;
+      container->exif_size = data_size - 4 - tiff_header_offset;
+    } else if (!memcmp("Exfc", box.type, 4)) {
+      container->exfc = in;
+      container->exfc_size = data_size;
+    } else if (!memcmp("xml ", box.type, 4)) {
+      container->xml.emplace_back(in, data_size);
+    } else if (!memcmp("xmlc", box.type, 4)) {
+      container->xmlc.emplace_back(in, data_size);
+    } else if (!memcmp("jumb", box.type, 4)) {
+      container->jumb = in;
+      container->jumb_size = data_size;
+    } else if (!memcmp("jbrd", box.type, 4)) {
+      container->jpeg_reconstruction = in;
+      container->jpeg_reconstruction_size = data_size;
+    } else {
+      // Do nothing: box not recognized here but may be recognizable by
+      // other software.
+    }
+
+    in += data_size;
+    available_in -= data_size;
+    box_index++;
+  }
+
+  return true;
+}
+
+static jxl::Status AppendBoxAndData(const char type[4], const uint8_t* data,
+                                    size_t data_size, jxl::PaddedBytes* out,
+                                    bool exif = false) {
+  Box box;
+  memcpy(box.type, type, 4);
+  box.data_size = data_size + (exif ? 4 : 0);
+  box.data_size_given = true;
+  JXL_RETURN_IF_ERROR(AppendBoxHeader(box, out));
+  // for Exif: always use tiff header offset 0
+  if (exif)
+    for (int i = 0; i < 4; i++) out->push_back(0);
+  out->append(data, data + data_size);
+  return true;
+}
+
+jxl::Status EncodeJpegXlContainerOneShot(const JpegXlContainer& container,
+                                         jxl::PaddedBytes* out) {
+  const unsigned char header[] = {0,   0,   0,    0xc, 'J', 'X', 'L', ' ',
+                                  0xd, 0xa, 0x87, 0xa, 0,   0,   0,   0x14,
+                                  'f', 't', 'y',  'p', 'j', 'x', 'l', ' ',
+                                  0,   0,   0,    0,   'j', 'x', 'l', ' '};
+  size_t header_size = sizeof(header);
+  out->append(header, header + header_size);
+
+  if (container.exif) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("Exif", container.exif,
+                                         container.exif_size, out, true));
+  }
+
+  if (container.exfc) {
+    JXL_RETURN_IF_ERROR(
+        AppendBoxAndData("Exfc", container.exfc, container.exfc_size, out));
+  }
+
+  for (size_t i = 0; i < container.xml.size(); i++) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("xml ", container.xml[i].first,
+                                         container.xml[i].second, out));
+  }
+
+  for (size_t i = 0; i < container.xmlc.size(); i++) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("xmlc", container.xmlc[i].first,
+                                         container.xmlc[i].second, out));
+  }
+
+  if (container.jpeg_reconstruction) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("jbrd", container.jpeg_reconstruction,
+                                         container.jpeg_reconstruction_size,
+                                         out));
+  }
+
+  if (!container.codestream.empty()) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("jxlc", container.codestream.data(),
+                                         container.codestream.size(), out));
+  } else {
+    return JXL_FAILURE("must have primary image frame");
+  }
+
+  if (container.jumb) {
+    JXL_RETURN_IF_ERROR(
+        AppendBoxAndData("jumb", container.jumb, container.jumb_size, out));
+  }
+
+  return true;
+}
+
+// TODO(veluca): the format defined here encode some things multiple times. Fix
+// that.
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/box/box.h b/third-party/libjxl/libjxl/tools/box/box.h
new file mode 100644
index 0000000000..087ed84db1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/box/box.h
@@ -0,0 +1,115 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tools for reading from / writing to ISOBMFF format for JPEG XL.
+
+#ifndef TOOLS_BOX_BOX_H_
+#define TOOLS_BOX_BOX_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+
+// A top-level box in the box format.
+struct Box {
+  // The type of the box.
+  // If "uuid", use extended_type instead
+  char type[4];
+
+  // The extended_type is only used when type == "uuid".
+  // Extended types are not used in JXL. However, the box format itself
+  // supports this so they are handled correctly.
+  char extended_type[16];
+
+  // Size of the data, excluding box header. The box ends, and next box
+  // begins, at data + size. May not be used if data_size_given is false.
+  uint64_t data_size;
+
+  // If the size is not given, the datasize extends to the end of the file.
+  // If this field is false, the size field may not be used.
+  bool data_size_given;
+};
+
+// Parses the header of a BMFF box. Returns the result in a Box struct.
+// Updates next_in and available_in to point at the data in the box, directly
+// after the header.
+// Sets the data_size if known, or must be handled by the caller and runs until
+// the end of the container file if not known.
+// NOTE: available_in should be at least 8 up to 32 bytes to parse the
+// header without error.
+jxl::Status ParseBoxHeader(const uint8_t** next_in, size_t* available_in,
+                           Box* box);
+
+// TODO(lode): streaming C API
+jxl::Status AppendBoxHeader(const Box& box, jxl::PaddedBytes* out);
+
+// NOTE: after DecodeJpegXlContainerOneShot, the exif etc. pointers point to
+// regions within the input data passed to that function.
+struct JpegXlContainer {
+  // Exif metadata, or null if not present in the container.
+  // The exif data has the format of 'Exif block' as defined in
+  // ISO/IEC23008-12:2017 Clause A.2.1
+  // Here we assume the tiff header offset is 0 and store only the
+  // actual Exif data (starting with the tiff header MM or II)
+  // TODO(lode): support the theoretical case of multiple exif boxes
+  const uint8_t* exif = nullptr;  // Not owned
+  size_t exif_size = 0;
+
+  // Brotli-compressed exif metadata, if present. The data points to the brotli
+  // compressed stream, it is not decompressed here.
+  const uint8_t* exfc = nullptr;  // Not owned
+  size_t exfc_size = 0;
+
+  // XML boxes for XMP. There may be multiple XML boxes.
+  // Each entry points to XML location and provides size.
+  // The memory is not owned.
+  // TODO(lode): for C API, cannot use std::vector.
+  std::vector<std::pair<const uint8_t*, size_t>> xml;
+
+  // Brotli-compressed xml boxes. The bytes are given in brotli-compressed form
+  // and are not decompressed here.
+  std::vector<std::pair<const uint8_t*, size_t>> xmlc;
+
+  // JUMBF superbox data, or null if not present in the container.
+  // The parsing of the nested boxes inside is not handled here.
+  const uint8_t* jumb = nullptr;  // Not owned
+  size_t jumb_size = 0;
+
+  // TODO(lode): add frame index data
+
+  // JPEG reconstruction data, or null if not present in the container.
+  const uint8_t* jpeg_reconstruction = nullptr;
+  size_t jpeg_reconstruction_size = 0;
+
+  // The main JPEG XL codestream, of which there must be 1 in the container.
+  jxl::PaddedBytes codestream;
+};
+
+// Returns whether `data` starts with a container header; definitely returns
+// false if `size` is less than 12 bytes.
+bool IsContainerHeader(const uint8_t* data, size_t size);
+
+// NOTE: the input data must remain valid as long as `container` is used,
+// because its exif etc. pointers point to that data.
+jxl::Status DecodeJpegXlContainerOneShot(const uint8_t* data, size_t size,
+                                         JpegXlContainer* container);
+
+// TODO(lode): streaming C API
+jxl::Status EncodeJpegXlContainerOneShot(const JpegXlContainer& container,
+                                         jxl::PaddedBytes* out);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BOX_BOX_H_
diff --git a/third-party/libjxl/libjxl/tools/box/box_list_main.cc b/third-party/libjxl/libjxl/tools/box/box_list_main.cc
new file mode 100644
index 0000000000..dfa8313ffa
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/box/box_list_main.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This binary tool lists the boxes of any box-based format (JPEG XL,
+// JPEG 2000, MP4, ...).
+// This exists as a test for manual verification, rather than an actual tool.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "tools/box/box.h"
+#include "tools/file_io.h"
+
+namespace jpegxl {
+namespace tools {
+
+int RunMain(int argc, const char* argv[]) {
+  if (argc < 2) {
+    fprintf(stderr, "Usage: %s <filename>", argv[0]);
+    return 1;
+  }
+
+  jxl::PaddedBytes compressed;
+  if (!ReadFile(argv[1], &compressed)) return 1;
+  fprintf(stderr, "Read %" PRIuS " compressed bytes\n", compressed.size());
+
+  const uint8_t* in = compressed.data();
+  size_t available_in = compressed.size();
+
+  fprintf(stderr, "File size: %" PRIuS "\n", compressed.size());
+
+  while (available_in != 0) {
+    const uint8_t* start = in;
+    Box box;
+    if (!ParseBoxHeader(&in, &available_in, &box)) {
+      fprintf(stderr, "Failed at %" PRIuS "\n",
+              compressed.size() - available_in);
+      break;
+    }
+
+    size_t data_size = box.data_size_given ? box.data_size : available_in;
+    size_t header_size = in - start;
+    size_t box_size = header_size + data_size;
+
+    for (size_t i = 0; i < sizeof(box.type); i++) {
+      char c = box.type[i];
+      if (c < 32 || c > 127) {
+        printf("Unprintable character in box type, likely not a box file.\n");
+        return 0;
+      }
+    }
+
+    printf("box: \"%.4s\" box_size:%" PRIuS " data_size:%" PRIuS, box.type,
+           box_size, data_size);
+    if (!memcmp("uuid", box.type, 4)) {
+      printf(" -- extended type:\"%.16s\"", box.extended_type);
+    }
+    if (!memcmp("ftyp", box.type, 4) && data_size > 4) {
+      std::string ftype(in, in + 4);
+      printf(" -- ftype:\"%s\"", ftype.c_str());
+    }
+    printf("\n");
+
+    if (data_size > available_in) {
+      fprintf(
+          stderr, "Unexpected end of file %" PRIuS " %" PRIuS " %" PRIuS "\n",
+          static_cast<size_t>(box.data_size), available_in, compressed.size());
+      break;
+    }
+
+    in += data_size;
+    available_in -= data_size;
+  }
+
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, const char* argv[]) {
+  return jpegxl::tools::RunMain(argc, argv);
+}
diff --git a/third-party/libjxl/libjxl/tools/box/box_test.cc b/third-party/libjxl/libjxl/tools/box/box_test.cc
new file mode 100644
index 0000000000..1dd5e9f2dc
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/box/box_test.cc
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/box/box.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <utility>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/testing.h"
+
+TEST(BoxTest, BoxTest) {
+  size_t test_size = 256;
+  jxl::PaddedBytes exif(test_size);
+  jxl::PaddedBytes xml0(test_size);
+  jxl::PaddedBytes xml1(test_size);
+  jxl::PaddedBytes jumb(test_size);
+  jxl::PaddedBytes codestream(test_size);
+  // Generate arbitrary data for the codestreams: the test is not testing
+  // the contents of them but whether they are preserved in the container.
+  uint8_t v = 0;
+  for (size_t i = 0; i < test_size; ++i) {
+    exif[i] = v++;
+    xml0[i] = v++;
+    xml1[i] = v++;
+    jumb[i] = v++;
+    codestream[i] = v++;
+  }
+
+  jpegxl::tools::JpegXlContainer container;
+  container.exif = exif.data();
+  container.exif_size = exif.size();
+  container.xml.emplace_back(xml0.data(), xml0.size());
+  container.xml.emplace_back(xml1.data(), xml1.size());
+  container.xmlc.emplace_back(xml1.data(), xml1.size());
+  container.jumb = jumb.data();
+  container.jumb_size = jumb.size();
+  container.codestream = std::move(codestream);
+
+  jxl::PaddedBytes file;
+  EXPECT_EQ(true,
+            jpegxl::tools::EncodeJpegXlContainerOneShot(container, &file));
+
+  jpegxl::tools::JpegXlContainer container2;
+  EXPECT_EQ(true, jpegxl::tools::DecodeJpegXlContainerOneShot(
+                      file.data(), file.size(), &container2));
+
+  EXPECT_EQ(exif.size(), container2.exif_size);
+  EXPECT_EQ(0, memcmp(exif.data(), container2.exif, container2.exif_size));
+  EXPECT_EQ(2u, container2.xml.size());
+  if (container2.xml.size() == 2) {
+    EXPECT_EQ(xml0.size(), container2.xml[0].second);
+    EXPECT_EQ(0, memcmp(xml0.data(), container2.xml[0].first,
+                        container2.xml[0].second));
+    EXPECT_EQ(xml1.size(), container2.xml[1].second);
+    EXPECT_EQ(0, memcmp(xml1.data(), container2.xml[1].first,
+                        container2.xml[1].second));
+  }
+  EXPECT_EQ(1u, container2.xmlc.size());
+  if (container2.xmlc.size() == 1) {
+    EXPECT_EQ(xml1.size(), container2.xmlc[0].second);
+    EXPECT_EQ(0, memcmp(xml1.data(), container2.xmlc[0].first,
+                        container2.xmlc[0].second));
+  }
+  EXPECT_EQ(jumb.size(), container2.jumb_size);
+  EXPECT_EQ(0, memcmp(jumb.data(), container2.jumb, container2.jumb_size));
+  EXPECT_EQ(container.codestream.size(), container2.codestream.size());
+  EXPECT_EQ(0, memcmp(container.codestream.data(), container2.codestream.data(),
+                      container2.codestream.size()));
+}
diff --git a/third-party/libjxl/libjxl/tools/butteraugli_main.cc b/third-party/libjxl/libjxl/tools/butteraugli_main.cc
new file mode 100644
index 0000000000..f526d0c662
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/butteraugli_main.cc
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/metrics.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+using jpegxl::tools::ThreadPoolInternal;
+using jxl::ButteraugliParams;
+using jxl::CodecInOut;
+using jxl::ColorEncoding;
+using jxl::Image3F;
+using jxl::ImageF;
+using jxl::Status;
+
+Status WriteImage(Image3F&& image, const std::string& filename) {
+  ThreadPoolInternal pool(4);
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(8);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+  io.SetFromImage(std::move(image), io.metadata.m.color_encoding);
+
+  std::vector<uint8_t> encoded;
+  return jxl::Encode(io, filename, &encoded, &pool) &&
+         jpegxl::tools::WriteFile(filename, encoded);
+}
+
+Status RunButteraugli(const char* pathname1, const char* pathname2,
+                      const std::string& distmap_filename,
+                      const std::string& raw_distmap_filename,
+                      const std::string& colorspace_hint, double p,
+                      float intensity_target) {
+  jxl::extras::ColorHints color_hints;
+  if (!colorspace_hint.empty()) {
+    color_hints.Add("color_space", colorspace_hint);
+  }
+
+  const char* pathname[2] = {pathname1, pathname2};
+  CodecInOut io[2];
+  ThreadPoolInternal pool(4);
+  for (size_t i = 0; i < 2; ++i) {
+    std::vector<uint8_t> encoded;
+    if (!jpegxl::tools::ReadFile(pathname[i], &encoded)) {
+      fprintf(stderr, "Failed to read image from %s\n", pathname[i]);
+      return false;
+    }
+    if (!jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), color_hints,
+                           &io[i], &pool)) {
+      fprintf(stderr, "Failed to decode image from %s\n", pathname[i]);
+      return false;
+    }
+  }
+
+  CodecInOut& io1 = io[0];
+  CodecInOut& io2 = io[1];
+  if (io1.xsize() != io2.xsize()) {
+    fprintf(stderr, "Width mismatch: %" PRIuS " %" PRIuS "\n", io1.xsize(),
+            io2.xsize());
+    return false;
+  }
+  if (io1.ysize() != io2.ysize()) {
+    fprintf(stderr, "Height mismatch: %" PRIuS " %" PRIuS "\n", io1.ysize(),
+            io2.ysize());
+    return false;
+  }
+
+  ImageF distmap;
+  ButteraugliParams ba_params;
+  ba_params.hf_asymmetry = 1.0f;
+  ba_params.xmul = 1.0f;
+  ba_params.intensity_target = intensity_target;
+  const float distance = jxl::ButteraugliDistance(
+      io1.Main(), io2.Main(), ba_params, jxl::GetJxlCms(), &distmap, &pool);
+  printf("%.10f\n", distance);
+
+  double pnorm = jxl::ComputeDistanceP(distmap, ba_params, p);
+  printf("%g-norm: %f\n", p, pnorm);
+
+  if (!distmap_filename.empty()) {
+    float good = jxl::ButteraugliFuzzyInverse(1.5);
+    float bad = jxl::ButteraugliFuzzyInverse(0.5);
+    JXL_CHECK(WriteImage(jxl::CreateHeatMapImage(distmap, good, bad),
+                         distmap_filename));
+  }
+  if (!raw_distmap_filename.empty()) {
+    FILE* out = fopen(raw_distmap_filename.c_str(), "w");
+    JXL_CHECK(out != nullptr);
+    fprintf(out, "Pf\n%" PRIuS " %" PRIuS "\n-1.0\n", distmap.xsize(),
+            distmap.ysize());
+    for (size_t y = distmap.ysize(); y-- > 0;) {
+      fwrite(distmap.Row(y), 4, distmap.xsize(), out);
+    }
+    fclose(out);
+  }
+  return true;
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    fprintf(stderr,
+            "Usage: %s <reference> <distorted>\n"
+            "  [--distmap <distmap>]\n"
+            "  [--rawdistmap <distmap.pfm>]\n"
+            "  [--intensity_target <intensity_target>]\n"
+            "  [--colorspace <colorspace_hint>]\n"
+            "  [--pnorm <pth norm>]\n"
+            "NOTE: images get converted to linear sRGB for butteraugli. Images"
+            " without attached profiles (such as ppm or pfm) are interpreted"
+            " as nonlinear sRGB. The hint format is RGB_D65_SRG_Rel_Lin for"
+            " linear sRGB. Intensity target is viewing conditions screen nits"
+            ", defaults to 80.\n",
+            argv[0]);
+    return 1;
+  }
+  std::string distmap;
+  std::string raw_distmap;
+  std::string colorspace;
+  double p = 3;
+  float intensity_target = 80.0;  // sRGB intensity target.
+  for (int i = 3; i < argc; i++) {
+    if (std::string(argv[i]) == "--distmap" && i + 1 < argc) {
+      distmap = argv[++i];
+    } else if (std::string(argv[i]) == "--rawdistmap" && i + 1 < argc) {
+      raw_distmap = argv[++i];
+    } else if (std::string(argv[i]) == "--colorspace" && i + 1 < argc) {
+      colorspace = argv[++i];
+    } else if (std::string(argv[i]) == "--intensity_target" && i + 1 < argc) {
+      intensity_target = std::stof(std::string(argv[++i]));
+    } else if (std::string(argv[i]) == "--pnorm" && i + 1 < argc) {
+      char* end;
+      p = strtod(argv[++i], &end);
+      if (end == argv[i]) {
+        fprintf(stderr, "Failed to parse pnorm \"%s\".\n", argv[i]);
+        return 1;
+      }
+    } else {
+      fprintf(stderr, "Unrecognized flag \"%s\".\n", argv[i]);
+      return 1;
+    }
+  }
+
+  return !RunButteraugli(argv[1], argv[2], distmap, raw_distmap, colorspace, p,
+                         intensity_target);
+}
diff --git a/third-party/libjxl/libjxl/tools/cjpegli.cc b/third-party/libjxl/libjxl/tools/cjpegli.cc
new file mode 100644
index 0000000000..f621884b5c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/cjpegli.cc
@@ -0,0 +1,270 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vector>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/enc/jpegli.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/span.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/speed_stats.h"
+
+namespace jpegxl {
+namespace tools {
+namespace {
+
+struct Args {
+  void AddCommandLineOptions(CommandLineParser* cmdline) {
+    std::string input_help("the input can be ");
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kPNG)) {
+      input_help.append("PNG, APNG, ");
+    }
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) {
+      input_help.append("GIF, ");
+    }
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kEXR)) {
+      input_help.append("EXR, ");
+    }
+    input_help.append("PPM, PFM, or PGX");
+    cmdline->AddPositionalOption("INPUT", /* required = */ true, input_help,
+                                 &file_in);
+    cmdline->AddPositionalOption("OUTPUT", /* required = */ true,
+                                 "the compressed JPG output file", &file_out);
+
+    cmdline->AddOptionFlag('\0', "disable_output",
+                           "No output file will be written (for benchmarking)",
+                           &disable_output, &SetBooleanTrue, 1);
+
+    cmdline->AddOptionValue(
+        'x', "dec-hints", "key=value",
+        "color_space indicates the ColorEncoding, see Description();\n"
+        "    icc_pathname refers to a binary file containing an ICC profile.",
+        &color_hints_proxy, &ParseAndAppendKeyValue<ColorHintsProxy>, 1);
+
+    opt_distance_id = cmdline->AddOptionValue(
+        'd', "distance", "maxError",
+        "Max. butteraugli distance, lower = higher quality.\n"
+        "    1.0 = visually lossless (default).\n"
+        "    Recommended range: 0.5 .. 3.0. Allowed range: 0.0 ... 25.0.\n"
+        "    Mutually exclusive with --quality and --target_size.",
+        &settings.distance, &ParseFloat);
+
+    opt_quality_id = cmdline->AddOptionValue(
+        'q', "quality", "QUALITY",
+        "Quality setting (is remapped to --distance)."
+        "    Default is quality 90.\n"
+        "    Quality values roughly match libjpeg quality.\n"
+        "    Recommended range: 68 .. 96. Allowed range: 1 .. 100.\n"
+        "    Mutually exclusive with --distance and --target_size.",
+        &quality, &ParseSigned);
+
+    cmdline->AddOptionValue('\0', "chroma_subsampling", "444|440|422|420",
+                            "Chroma subsampling setting.",
+                            &settings.chroma_subsampling, &ParseString);
+
+    cmdline->AddOptionValue(
+        'p', "progressive_level", "N",
+        "Progressive level setting. Range: 0 .. 2.\n"
+        "    Default: 2. Higher number is more scans, 0 means sequential.",
+        &settings.progressive_level, &ParseSigned);
+
+    cmdline->AddOptionFlag('\0', "xyb", "Convert to XYB colorspace",
+                           &settings.xyb, &SetBooleanTrue, 1);
+
+    cmdline->AddOptionFlag(
+        '\0', "std_quant",
+        "Use quantization tables based on Annex K of the JPEG standard.",
+        &settings.use_std_quant_tables, &SetBooleanTrue, 1);
+
+    cmdline->AddOptionFlag(
+        '\0', "noadaptive_quantization", "Disable adaptive quantization.",
+        &settings.use_adaptive_quantization, &SetBooleanFalse, 1);
+
+    cmdline->AddOptionFlag(
+        '\0', "fixed_code",
+        "Disable Huffman code optimization. Must be used together with -p 0.",
+        &settings.optimize_coding, &SetBooleanFalse, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "target_size", "N",
+        "If non-zero, set target size in bytes. This is useful for image \n"
+        "    quality comparisons, but makes encoding speed up to 20x slower.\n"
+        "    Mutually exclusive with --distance and --quality.",
+        &settings.target_size, &ParseUnsigned, 2);
+
+    cmdline->AddOptionValue('\0', "num_reps", "N",
+                            "How many times to compress. (For benchmarking).",
+                            &num_reps, &ParseUnsigned, 1);
+
+    cmdline->AddOptionFlag('\0', "quiet", "Suppress informative output", &quiet,
+                           &SetBooleanTrue, 1);
+
+    cmdline->AddOptionFlag(
+        'v', "verbose",
+        "Verbose output; can be repeated, also applies to help (!).", &verbose,
+        &SetBooleanTrue);
+  }
+
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  bool disable_output = false;
+  ColorHintsProxy color_hints_proxy;
+  jxl::extras::JpegSettings settings;
+  int quality = 90;
+  size_t num_reps = 1;
+  bool quiet = false;
+  bool verbose = false;
+  // References (ids) of specific options to check if they were matched.
+  CommandLineParser::OptionId opt_distance_id = -1;
+  CommandLineParser::OptionId opt_quality_id = -1;
+};
+
+bool ValidateArgs(const Args& args) {
+  const jxl::extras::JpegSettings& settings = args.settings;
+  if (settings.distance < 0.0 || settings.distance > 25.0) {
+    fprintf(stderr, "Invalid --distance argument\n");
+    return false;
+  }
+  if (args.quality <= 0 || args.quality > 100) {
+    fprintf(stderr, "Invalid --quality argument\n");
+    return false;
+  }
+  std::string cs = settings.chroma_subsampling;
+  if (!cs.empty() && cs != "444" && cs != "440" && cs != "422" && cs != "420") {
+    fprintf(stderr, "Invalid --chroma_subsampling argument\n");
+    return false;
+  }
+  if (settings.progressive_level < 0 || settings.progressive_level > 2) {
+    fprintf(stderr, "Invalid --progressive_level argument\n");
+    return false;
+  }
+  if (settings.progressive_level > 0 && !settings.optimize_coding) {
+    fprintf(stderr, "--fixed_code must be used together with -p 0\n");
+    return false;
+  }
+  return true;
+}
+
+bool SetDistance(const Args& args, const CommandLineParser& cmdline,
+                 jxl::extras::JpegSettings* settings) {
+  bool distance_set = cmdline.GetOption(args.opt_distance_id)->matched();
+  bool quality_set = cmdline.GetOption(args.opt_quality_id)->matched();
+  int num_quality_settings = (distance_set ? 1 : 0) + (quality_set ? 1 : 0) +
+                             (args.settings.target_size > 0 ? 1 : 0);
+  if (num_quality_settings > 1) {
+    fprintf(
+        stderr,
+        "Only one of --distance, --quality, or --target_size can be set.\n");
+    return false;
+  }
+  if (quality_set) {
+    settings->quality = args.quality;
+  }
+  return true;
+}
+
+int CJpegliMain(int argc, const char* argv[]) {
+  Args args;
+  CommandLineParser cmdline;
+  args.AddCommandLineOptions(&cmdline);
+
+  if (!cmdline.Parse(argc, const_cast<const char**>(argv))) {
+    // Parse already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information.\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+
+  if (cmdline.HelpFlagPassed() || !args.file_in) {
+    cmdline.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!args.file_out && !args.disable_output) {
+    fprintf(stderr,
+            "No output file specified and --disable_output flag not passed.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (args.disable_output && !args.quiet) {
+    fprintf(stderr,
+            "Encoding will be performed, but the result will be discarded.\n");
+  }
+
+  std::vector<uint8_t> input_bytes;
+  if (!ReadFile(args.file_in, &input_bytes)) {
+    fprintf(stderr, "Failed to read input image %s\n", args.file_in);
+    return EXIT_FAILURE;
+  }
+
+  jxl::extras::PackedPixelFile ppf;
+  if (!jxl::extras::DecodeBytes(jxl::Span<const uint8_t>(input_bytes),
+                                args.color_hints_proxy.target, &ppf)) {
+    fprintf(stderr, "Failed to decode input image %s\n", args.file_in);
+    return EXIT_FAILURE;
+  }
+
+  if (!args.quiet) {
+    fprintf(stderr, "Read %ux%u image, %" PRIuS " bytes.\n", ppf.info.xsize,
+            ppf.info.ysize, input_bytes.size());
+  }
+
+  if (!ValidateArgs(args) || !SetDistance(args, cmdline, &args.settings)) {
+    return EXIT_FAILURE;
+  }
+
+  if (!args.quiet) {
+    const jxl::extras::JpegSettings& s = args.settings;
+    fprintf(stderr, "Encoding [%s%s d%.3f%s %sAQ p%d %s]\n",
+            s.xyb ? "XYB" : "YUV", s.chroma_subsampling.c_str(), s.distance,
+            s.use_std_quant_tables ? " StdQuant" : "",
+            s.use_adaptive_quantization ? "" : "no", s.progressive_level,
+            s.optimize_coding ? "OPT" : "FIX");
+  }
+
+  jpegxl::tools::SpeedStats stats;
+  std::vector<uint8_t> jpeg_bytes;
+  for (size_t num_rep = 0; num_rep < args.num_reps; ++num_rep) {
+    const double t0 = jxl::Now();
+    if (!jxl::extras::EncodeJpeg(ppf, args.settings, nullptr, &jpeg_bytes)) {
+      fprintf(stderr, "jpegli encoding failed\n");
+      return EXIT_FAILURE;
+    }
+    const double t1 = jxl::Now();
+    stats.NotifyElapsed(t1 - t0);
+    stats.SetImageSize(ppf.info.xsize, ppf.info.ysize);
+  }
+
+  if (args.file_out && !args.disable_output) {
+    if (!WriteFile(args.file_out, jpeg_bytes)) {
+      fprintf(stderr, "Could not write jpeg to %s\n", args.file_out);
+      return EXIT_FAILURE;
+    }
+  }
+  if (!args.quiet) {
+    fprintf(stderr, "Compressed to %" PRIuS " bytes ", jpeg_bytes.size());
+    const size_t num_pixels = ppf.info.xsize * ppf.info.ysize;
+    const double bpp =
+        static_cast<double>(jpeg_bytes.size() * jxl::kBitsPerByte) / num_pixels;
+    fprintf(stderr, "(%.3f bpp).\n", bpp);
+    stats.Print(1);
+  }
+  return EXIT_SUCCESS;
+}
+
+}  // namespace
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, const char** argv) {
+  return jpegxl::tools::CJpegliMain(argc, argv);
+}
diff --git a/third-party/libjxl/libjxl/tools/cjxl_fuzzer.cc b/third-party/libjxl/libjxl/tools/cjxl_fuzzer.cc
new file mode 100644
index 0000000000..6175f79e72
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/cjxl_fuzzer.cc
@@ -0,0 +1,231 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <functional>
+#include <hwy/targets.h>
+#include <random>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/test_image.h"
+
+namespace {
+
+#define TRY(expr)                                \
+  do {                                           \
+    if (JXL_ENC_SUCCESS != (expr)) return false; \
+  } while (0)
+
+struct FuzzSpec {
+  size_t xsize;
+  size_t ysize;
+  struct OptionSpec {
+    JxlEncoderFrameSettingId id;
+    int32_t value;
+  };
+  std::vector<OptionSpec> options;
+  bool is_jpeg = false;
+  bool lossless = false;
+  bool have_alpha = false;
+  bool premultiply = false;
+  bool orig_profile = true;
+  uint16_t pixels_seed = 0;
+  uint16_t alpha_seed = 0;
+  size_t bit_depth = 8;
+  size_t alpha_bit_depth = 8;
+  int32_t codestream_level = -1;
+  std::vector<uint8_t> icc;
+  JxlColorEncoding color_encoding;
+  size_t num_frames = 1;
+  size_t output_buffer_size = 1;
+};
+
+bool EncodeJpegXl(const FuzzSpec& spec) {
+  // Multi-threaded parallel runner. Limit to max 2 threads since the fuzzer
+  // itself is already multithreaded.
+  size_t num_threads =
+      std::min<size_t>(2, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  auto runner = JxlThreadParallelRunnerMake(nullptr, num_threads);
+  JxlEncoderPtr enc_ptr = JxlEncoderMake(/*memory_manager=*/nullptr);
+  JxlEncoder* enc = enc_ptr.get();
+  for (size_t num_rep = 0; num_rep < 2; ++num_rep) {
+    JxlEncoderReset(enc);
+    TRY(JxlEncoderSetParallelRunner(enc, JxlThreadParallelRunner,
+                                    runner.get()));
+    JxlEncoderFrameSettings* frame_settings =
+        JxlEncoderFrameSettingsCreate(enc, nullptr);
+
+    for (auto option : spec.options) {
+      TRY(JxlEncoderFrameSettingsSetOption(frame_settings, option.id,
+                                           option.value));
+    }
+
+    TRY(JxlEncoderSetCodestreamLevel(enc, spec.codestream_level));
+    JxlBasicInfo basic_info;
+    JxlEncoderInitBasicInfo(&basic_info);
+    basic_info.xsize = spec.xsize;
+    basic_info.ysize = spec.ysize;
+    basic_info.bits_per_sample = spec.bit_depth;
+    basic_info.uses_original_profile = spec.orig_profile;
+    if (spec.have_alpha) {
+      basic_info.alpha_bits = spec.alpha_bit_depth;
+      basic_info.num_extra_channels = 1;
+    }
+    TRY(JxlEncoderSetBasicInfo(enc, &basic_info));
+    if (spec.lossless) {
+      TRY(JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE));
+    }
+
+    // TODO(szabadka) Add icc color profiles.
+    TRY(JxlEncoderSetColorEncoding(enc, &spec.color_encoding));
+
+    // TODO(szabadka) Add jpeg frames.
+    for (size_t i = 0; i < spec.num_frames; ++i) {
+      JxlFrameHeader frame_header;
+      JxlEncoderInitFrameHeader(&frame_header);
+      // TODO(szabadka) Add more frame header options.
+      TRY(JxlEncoderSetFrameHeader(frame_settings, &frame_header));
+      if (spec.have_alpha) {
+        JxlExtraChannelInfo extra_channel_info;
+        JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &extra_channel_info);
+        TRY(JxlEncoderSetExtraChannelInfo(enc, 0, &extra_channel_info));
+        extra_channel_info.alpha_premultiplied = spec.premultiply;
+      }
+      JxlPixelFormat pixelformat = {3, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0};
+      std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(
+          spec.xsize, spec.ysize, 3, spec.pixels_seed);
+      TRY(JxlEncoderAddImageFrame(frame_settings, &pixelformat, pixels.data(),
+                                  pixels.size()));
+      if (spec.have_alpha) {
+        std::vector<uint8_t> alpha_pixels = jxl::test::GetSomeTestImage(
+            spec.xsize, spec.ysize, 1, spec.alpha_seed);
+        TRY(JxlEncoderSetExtraChannelBuffer(frame_settings, &pixelformat,
+                                            alpha_pixels.data(),
+                                            alpha_pixels.size(), 0));
+      }
+    }
+    // Reading compressed output
+    JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+    while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      std::vector<uint8_t> buf(spec.output_buffer_size + 32);
+      uint8_t* next_out = buf.data();
+      size_t avail_out = buf.size();
+      process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    }
+    if (JXL_ENC_SUCCESS != process_result) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename T>
+T Select(const std::vector<T>& vec, std::function<uint32_t(size_t)> get_index) {
+  return vec[get_index(vec.size() - 1)];
+}
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  uint64_t flags = 0;
+  size_t flag_bits = 0;
+
+  const auto consume_data = [&]() {
+    if (size < 4) abort();
+    uint32_t buf = 0;
+    memcpy(&buf, data, 4);
+    data += 4;
+    size -= 4;
+    flags = (flags << 32) | buf;
+    flag_bits += 32;
+  };
+
+  const auto get_flag = [&](size_t max_value) {
+    size_t limit = 1;
+    while (limit <= max_value) {
+      limit <<= 1;
+      --flag_bits;
+      if (flag_bits <= 16) {
+        consume_data();
+      }
+    }
+    uint32_t result = flags % limit;
+    flags /= limit;
+    return result % (max_value + 1);
+  };
+
+  std::vector<JxlColorSpace> colorspaces = {
+      JXL_COLOR_SPACE_RGB, JXL_COLOR_SPACE_GRAY, JXL_COLOR_SPACE_XYB,
+      JXL_COLOR_SPACE_UNKNOWN};
+  std::vector<JxlWhitePoint> whitepoints = {
+      JXL_WHITE_POINT_D65, JXL_WHITE_POINT_CUSTOM, JXL_WHITE_POINT_E,
+      JXL_WHITE_POINT_DCI};
+  std::vector<JxlPrimaries> primaries = {JXL_PRIMARIES_SRGB,
+                                         JXL_PRIMARIES_CUSTOM,
+                                         JXL_PRIMARIES_2100, JXL_PRIMARIES_P3};
+  std::vector<JxlTransferFunction> transfer_functions = {
+      JXL_TRANSFER_FUNCTION_709,    JXL_TRANSFER_FUNCTION_UNKNOWN,
+      JXL_TRANSFER_FUNCTION_LINEAR, JXL_TRANSFER_FUNCTION_SRGB,
+      JXL_TRANSFER_FUNCTION_PQ,     JXL_TRANSFER_FUNCTION_DCI,
+      JXL_TRANSFER_FUNCTION_HLG,    JXL_TRANSFER_FUNCTION_GAMMA};
+  std::vector<JxlRenderingIntent> rendering_intents = {
+      JXL_RENDERING_INTENT_PERCEPTUAL,
+      JXL_RENDERING_INTENT_RELATIVE,
+      JXL_RENDERING_INTENT_SATURATION,
+      JXL_RENDERING_INTENT_ABSOLUTE,
+  };
+
+  FuzzSpec spec;
+  // Randomly set some options.
+  // TODO(szabadka) Make value bounds option specific.
+  size_t num_options = get_flag(32);
+  for (size_t i = 0; i < num_options; ++i) {
+    FuzzSpec::OptionSpec option;
+    option.id = static_cast<JxlEncoderFrameSettingId>(get_flag(32));
+    option.value = static_cast<int32_t>(get_flag(16)) - 1;
+    spec.options.push_back(option);
+  }
+
+  spec.xsize = get_flag(4095) + 1;
+  spec.ysize = get_flag(4095) + 1;
+  spec.lossless = get_flag(1);
+  if (!spec.lossless) {
+    spec.orig_profile = get_flag(1);
+  }
+  spec.have_alpha = get_flag(1);
+  spec.premultiply = get_flag(1);
+  spec.pixels_seed = get_flag((1 << 16) - 1);
+  spec.alpha_seed = get_flag((1 << 16) - 1);
+  spec.bit_depth = get_flag(15) + 1;
+  spec.alpha_bit_depth = get_flag(15) + 1;
+  spec.color_encoding.color_space = Select(colorspaces, get_flag);
+  spec.color_encoding.white_point = Select(whitepoints, get_flag);
+  spec.color_encoding.primaries = Select(primaries, get_flag);
+  spec.color_encoding.transfer_function = Select(transfer_functions, get_flag);
+  spec.color_encoding.rendering_intent = Select(rendering_intents, get_flag);
+  spec.output_buffer_size = get_flag(4095) + 1;
+
+  const auto targets = hwy::SupportedAndGeneratedTargets();
+  hwy::SetSupportedTargetsForTest(Select(targets, get_flag));
+  EncodeJpegXl(spec);
+  hwy::SetSupportedTargetsForTest(0);
+
+  return 0;
+}
+
+}  // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/cjxl_main.cc b/third-party/libjxl/libjxl/tools/cjxl_main.cc
new file mode 100644
index 0000000000..5368b8e377
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/cjxl_main.cc
@@ -0,0 +1,1090 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Note: This encoder binary does extensive flag-validity checking (in
+// order to produce meaningful error messages), and on top of that
+// checks all libjxl C API call return values. The downside of this
+// vs. libjxl providing meaningful error messages is that a change to
+// the accepted range of a flag-specified parameter in libjxl will
+// also require a change to the range-check here. The advantage is
+// that this minimizes the size of libjxl.
+
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <cstdlib>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <vector>
+
+#include "lib/extras/dec/apng.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/enc/jxl.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/exif.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/codec_config.h"
+#include "tools/file_io.h"
+#include "tools/speed_stats.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+inline bool ParsePhotonNoiseParameter(const char* arg, float* out) {
+  return ParseFloat(arg, out) && *out >= 0;
+}
+inline bool ParseIntensityTarget(const char* arg, float* out) {
+  return ParseFloat(arg, out) && *out > 0;
+}
+}  // namespace
+
+enum CjxlRetCode : int {
+  OK = 0,
+  ERR_PARSE,
+  ERR_INVALID_ARG,
+  ERR_LOAD_INPUT,
+  ERR_INVALID_INPUT,
+  ERR_ENCODING,
+  ERR_CONTAINER,
+  ERR_WRITE,
+  DROPPED_JBRD,
+};
+
+struct CompressArgs {
+  // CompressArgs() = default;
+  void AddCommandLineOptions(CommandLineParser* cmdline) {
+    std::string input_help("the input can be ");
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kPNG)) {
+      input_help.append("PNG, APNG, ");
+    }
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) {
+      input_help.append("GIF, ");
+    }
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) {
+      input_help.append("JPEG, ");
+    } else {
+      input_help.append("JPEG (lossless recompression only), ");
+    }
+    if (jxl::extras::CanDecode(jxl::extras::Codec::kEXR)) {
+      input_help.append("EXR, ");
+    }
+    input_help.append("PPM, PFM, PAM, PGX, or JXL");
+    // Positional arguments.
+    cmdline->AddPositionalOption("INPUT", /* required = */ true, input_help,
+                                 &file_in);
+    cmdline->AddPositionalOption("OUTPUT", /* required = */ true,
+                                 "the compressed JXL output file", &file_out);
+
+    // Flags.
+
+    cmdline->AddHelpText("\nBasic options:", 0);
+
+    // Target distance/size/bpp
+    opt_distance_id = cmdline->AddOptionValue(
+        'd', "distance", "DISTANCE",
+        "Target visual distance in JND units, lower = higher quality.\n"
+        "    0.0 = mathematically lossless. Default for already-lossy input "
+        "(JPEG/GIF).\n"
+        "    1.0 = visually lossless. Default for other input.\n"
+        "    Recommended range: 0.5 .. 3.0. Allowed range: 0.0 ... 25.0. "
+        "Mutually exclusive with --quality.",
+        &distance, &ParseFloat);
+
+    // High-level options
+    opt_quality_id = cmdline->AddOptionValue(
+        'q', "quality", "QUALITY",
+        "Quality setting, higher value = higher quality. This is internally "
+        "mapped to --distance.\n"
+        "    100 = mathematically lossless. 90 = visually lossless.\n"
+        "    Quality values roughly match libjpeg quality.\n"
+        "    Recommended range: 68 .. 96. Allowed range: 0 .. 100. Mutually "
+        "exclusive with --distance.",
+        &quality, &ParseFloat);
+
+    cmdline->AddOptionValue(
+        'e', "effort", "EFFORT",
+        "Encoder effort setting. Range: 1 .. 9.\n"
+        "    Default: 7. Higher numbers allow more computation "
+        "at the expense of time.\n"
+        "    For lossless, generally it will produce smaller files.\n"
+        "    For lossy, higher effort should more accurately reach "
+        "the target quality.",
+        &effort, &ParseUnsigned);
+
+    cmdline->AddOptionFlag('V', "version",
+                           "Print encoder library version number and exit.",
+                           &version, &SetBooleanTrue);
+    cmdline->AddOptionFlag('\0', "quiet", "Be more silent", &quiet,
+                           &SetBooleanTrue);
+    cmdline->AddOptionFlag('v', "verbose",
+                           "Verbose output; can be repeated and also applies "
+                           "to help (!).",
+                           &verbose, &SetBooleanTrue);
+
+    cmdline->AddHelpText("\nAdvanced options:", 1);
+
+    opt_alpha_distance_id = cmdline->AddOptionValue(
+        'a', "alpha_distance", "A_DISTANCE",
+        "Target visual distance for the alpha channel, lower = higher "
+        "quality.\n"
+        "    0.0 = mathematically lossless. 1.0 = visually lossless.\n"
+        "    Default is to use the same value as for the color image.\n"
+        "    Recommended range: 0.5 .. 3.0. Allowed range: 0.0 ... 25.0.",
+        &alpha_distance, &ParseFloat, 1);
+
+    cmdline->AddOptionFlag('p', "progressive",
+                           "Enable (more) progressive/responsive decoding.",
+                           &progressive, &SetBooleanTrue, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "group_order", "0|1",
+        "Order in which 256x256 groups are stored "
+        "in the codestream for progressive rendering.\n"
+        "    0 = scanline order, 1 = center-first order. Default: 0.",
+        &group_order, &ParseOverride, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "container", "0|1",
+        "0 = Avoid the container format unless it is needed (default)\n"
+        "    1 = Force using the container format even if it is not needed.",
+        &container, &ParseOverride, 1);
+
+    cmdline->AddOptionValue('\0', "compress_boxes", "0|1",
+                            "Disable/enable Brotli compression for metadata "
+                            "boxes. Default is 1 (enabled).",
+                            &compress_boxes, &ParseOverride, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "brotli_effort", "B_EFFORT",
+        "Brotli effort setting. Range: 0 .. 11.\n"
+        "    Default: 9. Higher number is more effort (slower).",
+        &brotli_effort, &ParseUnsigned, 1);
+
+    cmdline->AddOptionValue(
+        'm', "modular", "0|1",
+        "Use modular mode (default = encoder chooses, 0 = enforce VarDCT, "
+        "1 = enforce modular mode).",
+        &modular, &ParseOverride, 1);
+
+    // JPEG modes: parallel Brunsli, pixels to JPEG, or JPEG to Brunsli
+    opt_lossless_jpeg_id = cmdline->AddOptionValue(
+        'j', "lossless_jpeg", "0|1",
+        "If the input is JPEG, losslessly transcode JPEG, "
+        "rather than using reencode pixels.",
+        &lossless_jpeg, &ParseUnsigned, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "num_threads", "N",
+        "Number of worker threads (-1 == use machine default, "
+        "0 == do not use multithreading).",
+        &num_threads, &ParseSigned, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "photon_noise_iso", "ISO_FILM_SPEED",
+        "Adds noise to the image emulating photographic film or sensor noise.\n"
+        "    Higher number = grainier image, e.g. 100 gives a low amount of "
+        "noise,\n"
+        "    3200 gives a lot of noise. Default is 0.",
+        &photon_noise_iso, &ParsePhotonNoiseParameter, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "intensity_target", "N",
+        "Upper bound on the intensity level present in the image, in nits.\n"
+        "    Default is 0, which means 'choose a sensible default "
+        "value based on the color encoding.",
+        &intensity_target, &ParseIntensityTarget, 1);
+
+    cmdline->AddOptionValue(
+        'x', "dec-hints", "key=value",
+        "This is useful for 'raw' formats like PPM that cannot store "
+        "colorspace information\n"
+        "    and metadata, or to strip or modify metadata in formats that do.\n"
+        "    The key 'color_space' indicates an enumerated ColorEncoding, for "
+        "example:\n"
+        "      -x color_space=RGB_D65_SRG_Per_SRG is sRGB with perceptual "
+        "rendering intent\n"
+        "      -x color_space=RGB_D65_202_Rel_PeQ is Rec.2100 PQ with relative "
+        "rendering intent\n"
+        "    The key 'icc_pathname' refers to a binary file containing an ICC "
+        "profile.\n"
+        "    The keys 'exif', 'xmp', and 'jumbf' refer to a binary file "
+        "containing metadata;\n"
+        "    existing metadata of the same type will be overwritten.\n"
+        "    Specific metadata can be stripped using e.g. -x strip=exif",
+        &color_hints_proxy, &ParseAndAppendKeyValue<ColorHintsProxy>, 1);
+
+    cmdline->AddHelpText("\nExpert options:", 2);
+
+    cmdline->AddOptionValue(
+        '\0', "jpeg_store_metadata", "0|1",
+        ("If --lossless_jpeg=1, store JPEG reconstruction "
+         "metadata in the JPEG XL container.\n"
+         "    This allows reconstruction of the JPEG codestream. Default: 1."),
+        &jpeg_store_metadata, &ParseUnsigned, 2);
+
+    cmdline->AddOptionValue('\0', "codestream_level", "K",
+                            "The codestream level. Either `-1`, `5` or `10`.",
+                            &codestream_level, &ParseInt64, 2);
+
+    cmdline->AddOptionValue('\0', "faster_decoding", "0|1|2|3|4",
+                            "0 = default, higher values improve decode speed "
+                            "at the expense of quality or density.",
+                            &faster_decoding, &ParseUnsigned, 2);
+
+    cmdline->AddOptionValue('\0', "premultiply", "-1|0|1",
+                            "Force premultiplied (associated) alpha.",
+                            &premultiply, &ParseSigned, 2);
+
+    cmdline->AddOptionValue('\0', "keep_invisible", "0|1",
+                            "disable/enable preserving color of invisible "
+                            "pixels (default: 1 if lossless, 0 if lossy).",
+                            &keep_invisible, &ParseOverride, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "center_x", "-1..XSIZE",
+        "Determines the horizontal position of center for the center-first "
+        "group order.\n"
+        "    Default -1 means 'middle of the image', "
+        "values [0..xsize) set this to a particular coordinate.",
+        &center_x, &ParseInt64, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "center_y", "-1..YSIZE",
+        "Determines the vertical position of center for the center-first "
+        "group order.\n"
+        "    Default -1 means 'middle of the image', "
+        "values [0..ysize) set this to a particular coordinate.",
+        &center_y, &ParseInt64, 2);
+
+    // Flags.
+    cmdline->AddOptionFlag('\0', "progressive_ac",
+                           "Use the progressive mode for AC.", &progressive_ac,
+                           &SetBooleanTrue, 2);
+
+    cmdline->AddOptionFlag(
+        '\0', "qprogressive_ac",
+        "Use the progressive mode for AC with shift quantization.",
+        &qprogressive_ac, &SetBooleanTrue, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "progressive_dc", "num_dc_frames",
+        "Progressive-DC setting. Valid values are: -1, 0, 1, 2.",
+        &progressive_dc, &ParseInt64, 2);
+
+    cmdline->AddOptionValue('\0', "resampling", "-1|1|2|4|8",
+                            "Resampling for color channels. Default of -1 "
+                            "applies resampling only for very low quality.\n"
+                            "    1 = downsampling (1x1), 2 = 2x2 downsampling, "
+                            "4 = 4x4 downsampling, 8 = 8x8 downsampling.",
+                            &resampling, &ParseInt64, 2);
+
+    cmdline->AddOptionValue('\0', "ec_resampling", "-1|1|2|4|8",
+                            "Resampling for extra channels. Same as "
+                            "--resampling but for extra channels like alpha.",
+                            &ec_resampling, &ParseInt64, 2);
+
+    cmdline->AddOptionFlag('\0', "already_downsampled",
+                           "Do not downsample before encoding, "
+                           "but still signal that the decoder should upsample.",
+                           &already_downsampled, &SetBooleanTrue, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "upsampling_mode", "-1|0|1",
+        "Upsampling mode the decoder should use. Mostly useful in combination "
+        "with --already_downsampled. Value -1 means default (non-separable "
+        "upsampling), 0 means nearest neighbor (useful for pixel art)",
+        &upsampling_mode, &ParseInt64, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "epf", "-1|0|1|2|3",
+        "Edge preserving filter level, 0-3. "
+        "Default -1 means encoder chooses, 0-3 set a strength.",
+        &epf, &ParseInt64, 2);
+
+    cmdline->AddOptionValue('\0', "gaborish", "0|1",
+                            "Force disable/enable the gaborish filter. Default "
+                            "is 'encoder chooses'",
+                            &gaborish, &ParseOverride, 2);
+
+    cmdline->AddOptionValue('\0', "override_bitdepth", "BITDEPTH",
+                            "Default is zero (use the input image bit depth); "
+                            "if nonzero, override the bit depth",
+                            &override_bitdepth, &ParseUnsigned, 2);
+
+    cmdline->AddHelpText("\nOptions for experimentation / benchmarking:", 3);
+
+    cmdline->AddOptionValue('\0', "noise", "0|1",
+                            "Force disable/enable adaptive noise generation "
+                            "(experimental). Default "
+                            "is 'encoder chooses'",
+                            &noise, &ParseOverride, 3);
+
+    cmdline->AddOptionValue(
+        '\0', "jpeg_reconstruction_cfl", "0|1",
+        "Enable/disable chroma-from-luma (CFL) for lossless "
+        "JPEG reconstruction.",
+        &jpeg_reconstruction_cfl, &ParseOverride, 3);
+
+    cmdline->AddOptionValue('\0', "num_reps", "N",
+                            "How many times to compress. (For benchmarking).",
+                            &num_reps, &ParseUnsigned, 3);
+
+    cmdline->AddOptionFlag('\0', "disable_output",
+                           "No output file will be written (for benchmarking)",
+                           &disable_output, &SetBooleanTrue, 3);
+
+    cmdline->AddOptionValue(
+        '\0', "dots", "0|1",
+        "Force disable/enable dots generation. "
+        "(not provided = default, 0 = disable, 1 = enable).",
+        &dots, &ParseOverride, 3);
+
+    cmdline->AddOptionValue(
+        '\0', "patches", "0|1",
+        "Force disable/enable patches generation. "
+        "(not provided = default, 0 = disable, 1 = enable).",
+        &patches, &ParseOverride, 3);
+
+    cmdline->AddOptionValue(
+        '\0', "frame_indexing", "INDICES",
+        // TODO(tfish): Add a more convenient vanilla alternative.
+        "INDICES is of the form '^(0*|1[01]*)'. The i-th position indicates "
+        "whether the\n"
+        "    i-th frame will be indexed in the frame index box.",
+        &frame_indexing, &ParseString, 3);
+
+    cmdline->AddOptionFlag('\0', "allow_expert_options",
+                           "Allow specifying advanced options; this allows "
+                           "setting effort to 10, for\n"
+                           "    somewhat better lossless compression at the "
+                           "cost of a massive speed hit.",
+                           &allow_expert_options, &SetBooleanTrue, 3);
+
+    cmdline->AddHelpText("\nModular mode options:", 4);
+
+    // modular mode options
+    cmdline->AddOptionValue(
+        'I', "iterations", "PERCENT",
+        "Percentage of pixels used to learn MA trees. Higher values use\n"
+        "    more encoder memory and can result in better compression. Default "
+        "of -1 means\n"
+        "    the encoder chooses. Zero means no MA trees are used.",
+        &modular_ma_tree_learning_percent, &ParseFloat, 4);
+
+    cmdline->AddOptionValue(
+        'C', "modular_colorspace", "K",
+        ("Color transform: -1 = default (try several per group, depending\n"
+         "    on effort), 0 = RGB (none), 1-41 = fixed RCT (6 = YCoCg)."),
+        &modular_colorspace, &ParseInt64, 4);
+
+    opt_modular_group_size_id = cmdline->AddOptionValue(
+        'g', "modular_group_size", "K",
+        "Group size: -1 = default (let the encoder choose),\n"
+        "    0 = 128x128, 1 = 256x256, 2 = 512x512, 3 = 1024x1024.",
+        &modular_group_size, &ParseInt64, 4);
+
+    cmdline->AddOptionValue(
+        'P', "modular_predictor", "K",
+        "Predictor(s) to use: 0=zero, 1=left, 2=top, 3=avg0, 4=select,\n"
+        "    5=gradient, 6=weighted, 7=topright, 8=topleft, 9=leftleft, "
+        "10=avg1, 11=avg2, 12=avg3,\n"
+        "    13=toptop predictive average, 14=mix 5 and 6, 15=mix everything.\n"
+        "    Default is 14 at effort < 9 and 15 at effort 9.",
+        &modular_predictor, &ParseInt64, 4);
+
+    cmdline->AddOptionValue(
+        'E', "modular_nb_prev_channels", "K",
+        "Number of extra (previous-channel) MA tree properties to use.",
+        &modular_nb_prev_channels, &ParseInt64, 4);
+
+    cmdline->AddOptionValue(
+        '\0', "modular_palette_colors", "K",
+        "Use palette if number of colors is smaller than or equal to this.",
+        &modular_palette_colors, &ParseInt64, 4);
+
+    cmdline->AddOptionFlag(
+        '\0', "modular_lossy_palette",
+        "Use delta palette in a lossy way; it is recommended to also\n"
+        "    set --modular_palette_colors=0 with this "
+        "option to use the default palette only.",
+        &modular_lossy_palette, &SetBooleanTrue, 4);
+
+    cmdline->AddOptionValue('X', "pre-compact", "PERCENT",
+                            "Use global channel palette if the number of "
+                            "sample values is smaller\n"
+                            "    than this percentage of the nominal range. ",
+                            &modular_channel_colors_global_percent, &ParseFloat,
+                            4);
+
+    cmdline->AddOptionValue(
+        'Y', "post-compact", "PERCENT",
+        "Use local (per-group) channel palette if the "
+        "number of sample values is\n"
+        "    smaller than this percentage of the nominal range.",
+        &modular_channel_colors_group_percent, &ParseFloat, 4);
+
+    opt_responsive_id =
+        cmdline->AddOptionValue('R', "responsive", "K",
+                                "Do the Squeeze transform, 0=false, "
+                                "1=true (default: 1 if lossy, 0 if lossless)",
+                                &responsive, &ParseInt64, 4);
+  }
+
+  // Common flags.
+  bool version = false;
+  jxl::Override container = jxl::Override::kDefault;
+  bool quiet = false;
+  bool disable_output = false;
+
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  jxl::Override print_profile = jxl::Override::kDefault;
+
+  // Decoding source image flags
+  ColorHintsProxy color_hints_proxy;
+
+  // JXL flags
+  size_t override_bitdepth = 0;
+  int32_t num_threads = -1;
+  size_t num_reps = 1;
+  float intensity_target = 0;
+
+  // Whether to perform lossless transcoding with kVarDCT or kJPEG encoding.
+  // If true, attempts to load JPEG coefficients instead of pixels.
+  // Reset to false if input image is not a JPEG.
+  size_t lossless_jpeg = 1;
+
+  size_t jpeg_store_metadata = 1;
+
+  float quality = -1001.f;  // Default to lossless if input is already lossy,
+                            // or to VarDCT otherwise.
+  bool verbose = false;
+  bool progressive = false;
+  bool progressive_ac = false;
+  bool qprogressive_ac = false;
+  int64_t progressive_dc = -1;
+  bool modular_lossy_palette = false;
+  int32_t premultiply = -1;
+  bool already_downsampled = false;
+  int64_t upsampling_mode = -1;
+  jxl::Override jpeg_reconstruction_cfl = jxl::Override::kDefault;
+  jxl::Override modular = jxl::Override::kDefault;
+  jxl::Override keep_invisible = jxl::Override::kDefault;
+  jxl::Override dots = jxl::Override::kDefault;
+  jxl::Override patches = jxl::Override::kDefault;
+  jxl::Override gaborish = jxl::Override::kDefault;
+  jxl::Override group_order = jxl::Override::kDefault;
+  jxl::Override compress_boxes = jxl::Override::kDefault;
+  jxl::Override noise = jxl::Override::kDefault;
+
+  size_t faster_decoding = 0;
+  int64_t resampling = -1;
+  int64_t ec_resampling = -1;
+  int64_t epf = -1;
+  int64_t center_x = -1;
+  int64_t center_y = -1;
+  int64_t modular_group_size = -1;
+  int64_t modular_predictor = -1;
+  int64_t modular_colorspace = -1;
+  float modular_channel_colors_global_percent = -1.f;
+  float modular_channel_colors_group_percent = -1.f;
+  int64_t modular_palette_colors = -1;
+  int64_t modular_nb_prev_channels = -1;
+  float modular_ma_tree_learning_percent = -1.f;
+  float photon_noise_iso = 0;
+  int64_t codestream_level = -1;
+  int64_t responsive = -1;
+  float distance = 1.0;
+  float alpha_distance = 1.0;
+  size_t effort = 7;
+  size_t brotli_effort = 9;
+  std::string frame_indexing;
+
+  bool allow_expert_options = false;
+
+  // References (ids) of specific options to check if they were matched.
+  CommandLineParser::OptionId opt_lossless_jpeg_id = -1;
+  CommandLineParser::OptionId opt_responsive_id = -1;
+  CommandLineParser::OptionId opt_distance_id = -1;
+  CommandLineParser::OptionId opt_alpha_distance_id = -1;
+  CommandLineParser::OptionId opt_quality_id = -1;
+  CommandLineParser::OptionId opt_modular_group_size_id = -1;
+};
+
+const char* ModeFromArgs(const CompressArgs& args) {
+  if (args.lossless_jpeg) return "JPEG";
+  if (args.modular == jxl::Override::kOn || args.distance == 0)
+    return "Modular";
+  return "VarDCT";
+}
+
+std::string DistanceFromArgs(const CompressArgs& args) {
+  char buf[100];
+  if (args.lossless_jpeg) {
+    snprintf(buf, sizeof(buf), "lossless transcode");
+  } else if (args.distance == 0) {
+    snprintf(buf, sizeof(buf), "lossless");
+  } else {
+    snprintf(buf, sizeof(buf), "d%.3f", args.distance);
+  }
+  return buf;
+}
+
+void PrintMode(jxl::extras::PackedPixelFile& ppf, const double decode_mps,
+               size_t num_bytes, const CompressArgs& args,
+               jpegxl::tools::CommandLineParser& cmdline) {
+  const char* mode = ModeFromArgs(args);
+  const std::string distance = DistanceFromArgs(args);
+  if (args.lossless_jpeg) {
+    cmdline.VerbosePrintf(1, "Read JPEG image with %" PRIuS " bytes.\n",
+                          num_bytes);
+  } else {
+    cmdline.VerbosePrintf(
+        1, "Read %" PRIuS "x%" PRIuS " image, %" PRIuS " bytes, %.1f MP/s\n",
+        static_cast<size_t>(ppf.info.xsize),
+        static_cast<size_t>(ppf.info.ysize), num_bytes, decode_mps);
+  }
+  cmdline.VerbosePrintf(
+      0, "Encoding [%s%s, %s, effort: %" PRIuS,
+      (args.container == jxl::Override::kOn ? "Container | " : ""), mode,
+      distance.c_str(), args.effort);
+  if (args.container == jxl::Override::kOn) {
+    if (args.lossless_jpeg && args.jpeg_store_metadata)
+      cmdline.VerbosePrintf(0, " | JPEG reconstruction data");
+    if (!ppf.metadata.exif.empty())
+      cmdline.VerbosePrintf(0, " | %" PRIuS "-byte Exif",
+                            ppf.metadata.exif.size());
+    if (!ppf.metadata.xmp.empty())
+      cmdline.VerbosePrintf(0, " | %" PRIuS "-byte XMP",
+                            ppf.metadata.xmp.size());
+    if (!ppf.metadata.jumbf.empty())
+      cmdline.VerbosePrintf(0, " | %" PRIuS "-byte JUMBF",
+                            ppf.metadata.jumbf.size());
+  }
+  cmdline.VerbosePrintf(0, "]\n");
+}
+
+bool IsJPG(const std::vector<uint8_t>& image_data) {
+  return (image_data.size() >= 2 && image_data[0] == 0xFF &&
+          image_data[1] == 0xD8);
+}
+
+using flag_check_fn = std::function<std::string(int64_t)>;
+using flag_check_float_fn = std::function<std::string(float)>;
+
+template <typename T>
+void ProcessFlag(
+    const char* flag_name, T flag_value,
+    JxlEncoderFrameSettingId encoder_option,
+    jxl::extras::JXLCompressParams* params,
+    flag_check_fn flag_check = [](T x) { return std::string(); }) {
+  std::string error = flag_check(flag_value);
+  if (!error.empty()) {
+    std::cerr << "Invalid flag value for --" << flag_name << ": " << error
+              << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  params->options.emplace_back(
+      jxl::extras::JXLOption(encoder_option, flag_value, 0));
+}
+
+void ProcessBoolFlag(jxl::Override flag_value,
+                     JxlEncoderFrameSettingId encoder_option,
+                     jxl::extras::JXLCompressParams* params) {
+  if (flag_value != jxl::Override::kDefault) {
+    int64_t value = flag_value == jxl::Override::kOn ? 1 : 0;
+    params->options.emplace_back(
+        jxl::extras::JXLOption(encoder_option, value, 0));
+  }
+}
+
+void SetDistanceFromFlags(CommandLineParser* cmdline, CompressArgs* args,
+                          jxl::extras::JXLCompressParams* params,
+                          const jxl::extras::Codec& codec) {
+  bool distance_set = cmdline->GetOption(args->opt_distance_id)->matched();
+  bool alpha_distance_set =
+      cmdline->GetOption(args->opt_alpha_distance_id)->matched();
+  bool quality_set = cmdline->GetOption(args->opt_quality_id)->matched();
+  if (((distance_set && (args->distance != 0.0)) ||
+       (quality_set && (args->quality != 100))) &&
+      args->lossless_jpeg) {
+    std::cerr << "Must not set quality below 100 nor non-zero distance in "
+                 "combination with --lossless_jpeg=1."
+              << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  if (quality_set) {
+    if (distance_set) {
+      std::cerr << "Must not set both --distance and --quality." << std::endl;
+      exit(EXIT_FAILURE);
+    }
+    double distance = args->quality >= 100 ? 0.0
+                      : args->quality >= 30
+                          ? 0.1 + (100 - args->quality) * 0.09
+                          : 53.0 / 3000.0 * args->quality * args->quality -
+                                23.0 / 20.0 * args->quality + 25.0;
+    args->distance = distance;
+    distance_set = true;
+  }
+  if (!distance_set) {
+    bool lossy_input = (codec == jxl::extras::Codec::kJPG ||
+                        codec == jxl::extras::Codec::kGIF);
+    args->distance = lossy_input ? 0.0 : 1.0;
+  } else if (args->distance > 0) {
+    args->lossless_jpeg = 0;
+  }
+  params->distance = args->distance;
+  params->alpha_distance =
+      alpha_distance_set ? args->alpha_distance : params->distance;
+}
+
+void ProcessFlags(const jxl::extras::Codec codec,
+                  const jxl::extras::PackedPixelFile& ppf,
+                  const std::vector<uint8_t>* jpeg_bytes,
+                  CommandLineParser* cmdline, CompressArgs* args,
+                  jxl::extras::JXLCompressParams* params) {
+  // Tuning flags.
+  ProcessBoolFlag(args->modular, JXL_ENC_FRAME_SETTING_MODULAR, params);
+  ProcessBoolFlag(args->keep_invisible, JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE,
+                  params);
+  ProcessBoolFlag(args->dots, JXL_ENC_FRAME_SETTING_DOTS, params);
+  ProcessBoolFlag(args->patches, JXL_ENC_FRAME_SETTING_PATCHES, params);
+  ProcessBoolFlag(args->gaborish, JXL_ENC_FRAME_SETTING_GABORISH, params);
+  ProcessBoolFlag(args->group_order, JXL_ENC_FRAME_SETTING_GROUP_ORDER, params);
+  ProcessBoolFlag(args->noise, JXL_ENC_FRAME_SETTING_NOISE, params);
+
+  params->allow_expert_options = args->allow_expert_options;
+
+  if (!args->frame_indexing.empty()) {
+    bool must_be_all_zeros = args->frame_indexing[0] != '1';
+    for (char c : args->frame_indexing) {
+      if (c == '1') {
+        if (must_be_all_zeros) {
+          std::cerr << "Invalid --frame_indexing. If the first character is "
+                       "'0', all must be '0'."
+                    << std::endl;
+          exit(EXIT_FAILURE);
+        }
+      } else if (c != '0') {
+        std::cerr << "Invalid --frame_indexing. Must match the pattern "
+                     "'^(0*|1[01]*)$'."
+                  << std::endl;
+        exit(EXIT_FAILURE);
+      }
+    }
+  }
+
+  ProcessFlag(
+      "effort", static_cast<int64_t>(args->effort),
+      JXL_ENC_FRAME_SETTING_EFFORT, params, [args](int64_t x) -> std::string {
+        if (args->allow_expert_options) {
+          return (1 <= x && x <= 10) ? "" : "Valid range is {1, 2, ..., 10}.";
+        } else {
+          return (1 <= x && x <= 9) ? "" : "Valid range is {1, 2, ..., 9}.";
+        }
+      });
+  ProcessFlag("brotli_effort", static_cast<int64_t>(args->brotli_effort),
+              JXL_ENC_FRAME_SETTING_BROTLI_EFFORT, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 11)
+                           ? ""
+                           : "Valid range is {-1, 0, 1, ..., 11}.";
+              });
+  ProcessFlag(
+      "epf", args->epf, JXL_ENC_FRAME_SETTING_EPF, params,
+      [](int64_t x) -> std::string {
+        return (-1 <= x && x <= 3) ? "" : "Valid range is {-1, 0, 1, 2, 3}.\n";
+      });
+  ProcessFlag("faster_decoding", static_cast<int64_t>(args->faster_decoding),
+              JXL_ENC_FRAME_SETTING_DECODING_SPEED, params,
+              [](int64_t x) -> std::string {
+                return (0 <= x && x <= 4) ? ""
+                                          : "Valid range is {0, 1, 2, 3, 4}.\n";
+              });
+  ProcessFlag("resampling", args->resampling, JXL_ENC_FRAME_SETTING_RESAMPLING,
+              params, [](int64_t x) -> std::string {
+                return (x == -1 || x == 1 || x == 2 || x == 4 || x == 8)
+                           ? ""
+                           : "Valid values are {-1, 1, 2, 4, 8}.\n";
+              });
+  ProcessFlag("ec_resampling", args->ec_resampling,
+              JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, params,
+              [](int64_t x) -> std::string {
+                return (x == -1 || x == 1 || x == 2 || x == 4 || x == 8)
+                           ? ""
+                           : "Valid values are {-1, 1, 2, 4, 8}.\n";
+              });
+  ProcessFlag("photon_noise_iso", args->photon_noise_iso,
+              JXL_ENC_FRAME_SETTING_PHOTON_NOISE, params);
+  ProcessFlag("already_downsampled",
+              static_cast<int64_t>(args->already_downsampled),
+              JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED, params);
+  if (args->already_downsampled) params->already_downsampled = args->resampling;
+
+  SetDistanceFromFlags(cmdline, args, params, codec);
+
+  if (args->group_order != jxl::Override::kOn &&
+      (args->center_x != -1 || args->center_y != -1)) {
+    std::cerr << "Invalid flag combination. Setting --center_x or --center_y "
+              << "requires setting --group_order=1" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  ProcessFlag("center_x", args->center_x,
+              JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X, params,
+              [](int64_t x) -> std::string {
+                if (x < -1) {
+                  return "Valid values are: -1 or [0 .. xsize).";
+                }
+                return "";
+              });
+  ProcessFlag("center_y", args->center_y,
+              JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y, params,
+              [](int64_t x) -> std::string {
+                if (x < -1) {
+                  return "Valid values are: -1 or [0 .. ysize).";
+                }
+                return "";
+              });
+
+  // Progressive/responsive mode settings.
+  bool responsive_set = cmdline->GetOption(args->opt_responsive_id)->matched();
+
+  ProcessFlag("progressive_dc", args->progressive_dc,
+              JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 2) ? ""
+                                           : "Valid range is {-1, 0, 1, 2}.\n";
+              });
+  ProcessFlag("progressive_ac", static_cast<int64_t>(args->progressive_ac),
+              JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, params);
+
+  if (args->progressive) {
+    args->qprogressive_ac = true;
+    args->responsive = 1;
+    responsive_set = true;
+  }
+  if (responsive_set) {
+    ProcessFlag("responsive", args->responsive,
+                JXL_ENC_FRAME_SETTING_RESPONSIVE, params);
+  }
+  if (args->qprogressive_ac) {
+    ProcessFlag("qprogressive_ac", static_cast<int64_t>(1),
+                JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC, params);
+  }
+
+  // Modular mode related.
+  ProcessFlag("modular_group_size", args->modular_group_size,
+              JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 3)
+                           ? ""
+                           : "Invalid --modular_group_size. Valid "
+                             "range is {-1, 0, 1, 2, 3}.\n";
+              });
+  ProcessFlag("modular_predictor", args->modular_predictor,
+              JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 15)
+                           ? ""
+                           : "Invalid --modular_predictor. Valid "
+                             "range is {-1, 0, 1, ..., 15}.\n";
+              });
+  ProcessFlag("modular_colorspace", args->modular_colorspace,
+              JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 41)
+                           ? ""
+                           : "Invalid --modular_colorspace. Valid range is "
+                             "{-1, 0, 1, ..., 41}.\n";
+              });
+  ProcessFlag("modular_ma_tree_learning_percent",
+              args->modular_ma_tree_learning_percent,
+              JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, params,
+              [](float x) -> std::string {
+                return -1 <= x && x <= 100
+                           ? ""
+                           : "Invalid --modular_ma_tree_learning_percent, Valid"
+                             "rang is [-1, 100].\n";
+              });
+  ProcessFlag("modular_nb_prev_channels", args->modular_nb_prev_channels,
+              JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, params,
+              [](int64_t x) -> std::string {
+                return (-1 <= x && x <= 11)
+                           ? ""
+                           : "Invalid --modular_nb_prev_channels. Valid "
+                             "range is {-1, 0, 1, ..., 11}.\n";
+              });
+  if (args->modular_lossy_palette) {
+    if (args->progressive || args->qprogressive_ac) {
+      fprintf(stderr,
+              "WARNING: --modular_lossy_palette is ignored in "
+              "progressive mode.\n");
+      args->modular_lossy_palette = false;
+    }
+  }
+  ProcessFlag("modular_lossy_palette",
+              static_cast<int64_t>(args->modular_lossy_palette),
+              JXL_ENC_FRAME_SETTING_LOSSY_PALETTE, params);
+  ProcessFlag("modular_palette_colors", args->modular_palette_colors,
+              JXL_ENC_FRAME_SETTING_PALETTE_COLORS, params,
+              [](int64_t x) -> std::string {
+                return -1 <= x ? ""
+                               : "Invalid --modular_palette_colors, must "
+                                 "be -1 or non-negative\n";
+              });
+  ProcessFlag("modular_channel_colors_global_percent",
+              args->modular_channel_colors_global_percent,
+              JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT, params,
+              [](float x) -> std::string {
+                return (-1 <= x && x <= 100)
+                           ? ""
+                           : "Invalid --modular_channel_colors_global_percent. "
+                             "Valid "
+                             "range is [-1, 100].\n";
+              });
+  ProcessFlag("modular_channel_colors_group_percent",
+              args->modular_channel_colors_group_percent,
+              JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, params,
+              [](float x) -> std::string {
+                return (-1 <= x && x <= 100)
+                           ? ""
+                           : "Invalid --modular_channel_colors_group_percent. "
+                             "Valid "
+                             "range is [-1, 100].\n";
+              });
+
+  if (args->num_threads < -1) {
+    std::cerr
+        << "Invalid flag value for --num_threads: must be -1, 0 or positive."
+        << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // JPEG specific options.
+  if (jpeg_bytes) {
+    ProcessBoolFlag(args->jpeg_reconstruction_cfl,
+                    JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, params);
+    ProcessBoolFlag(args->compress_boxes,
+                    JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES, params);
+  }
+  // Set per-frame options.
+  for (size_t num_frame = 0; num_frame < ppf.frames.size(); ++num_frame) {
+    if (num_frame < args->frame_indexing.size() &&
+        args->frame_indexing[num_frame] == '1') {
+      int64_t value = 1;
+      params->options.emplace_back(
+          jxl::extras::JXLOption(JXL_ENC_FRAME_INDEX_BOX, value, num_frame));
+    }
+  }
+  // Copy over the rest of the non-option params.
+  params->use_container = args->container == jxl::Override::kOn;
+  params->jpeg_store_metadata = args->jpeg_store_metadata;
+  params->intensity_target = args->intensity_target;
+  params->override_bitdepth = args->override_bitdepth;
+  params->codestream_level = args->codestream_level;
+  params->premultiply = args->premultiply;
+  params->compress_boxes = args->compress_boxes != jxl::Override::kOff;
+  params->upsampling_mode = args->upsampling_mode;
+  if (codec == jxl::extras::Codec::kPNM &&
+      ppf.info.exponent_bits_per_sample == 0) {
+    params->input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+  }
+
+  // If a metadata field is set to an empty value, it is stripped.
+  // Make sure we also strip it when the input image is read with AddJPEGFrame
+  (void)args->color_hints_proxy.target.Foreach(
+      [&params](const std::string& key,
+                const std::string& value) -> jxl::Status {
+        if (value == "") {
+          if (key == "exif") params->jpeg_strip_exif = true;
+          if (key == "xmp") params->jpeg_strip_xmp = true;
+          if (key == "jumbf") params->jpeg_strip_jumbf = true;
+        }
+        return true;
+      });
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, char** argv) {
+  std::string version = jpegxl::tools::CodecConfigString(JxlEncoderVersion());
+  jpegxl::tools::CompressArgs args;
+  jpegxl::tools::CommandLineParser cmdline;
+  args.AddCommandLineOptions(&cmdline);
+
+  if (!cmdline.Parse(argc, const_cast<const char**>(argv))) {
+    // Parse already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return jpegxl::tools::CjxlRetCode::ERR_PARSE;
+  }
+
+  if (args.version) {
+    fprintf(stdout, "cjxl %s\n", version.c_str());
+    fprintf(stdout, "Copyright (c) the JPEG XL Project\n");
+    return jpegxl::tools::CjxlRetCode::OK;
+  }
+
+  if (!args.quiet) {
+    fprintf(stderr, "JPEG XL encoder %s\n", version.c_str());
+  }
+
+  if (cmdline.HelpFlagPassed() || !args.file_in) {
+    cmdline.PrintHelp();
+    return jpegxl::tools::CjxlRetCode::OK;
+  }
+
+  if (!args.file_out && !args.disable_output) {
+    std::cerr
+        << "No output file specified and --disable_output flag not passed."
+        << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  if (args.file_out && args.disable_output && !args.quiet) {
+    fprintf(stderr,
+            "Encoding will be performed, but the result will be discarded.\n");
+  }
+
+  // Loading the input.
+  // Depending on flags-settings, we want to either load a JPEG and
+  // faithfully convert it to JPEG XL, or load (JPEG or non-JPEG)
+  // pixel data.
+  std::vector<uint8_t> image_data;
+  jxl::extras::PackedPixelFile ppf;
+  jxl::extras::Codec codec = jxl::extras::Codec::kUnknown;
+  std::vector<uint8_t>* jpeg_bytes = nullptr;
+  double decode_mps = 0;
+  size_t pixels = 0;
+  if (!jpegxl::tools::ReadFile(args.file_in, &image_data)) {
+    std::cerr << "Reading image data failed." << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  if (!jpegxl::tools::IsJPG(image_data)) args.lossless_jpeg = 0;
+  jxl::extras::JXLCompressParams params;
+  ProcessFlags(codec, ppf, jpeg_bytes, &cmdline, &args, &params);
+  if (!args.lossless_jpeg) {
+    const double t0 = jxl::Now();
+    jxl::Status status = jxl::extras::DecodeBytes(
+        jxl::Span<const uint8_t>(image_data), args.color_hints_proxy.target,
+        &ppf, nullptr, &codec);
+
+    if (!status) {
+      std::cerr << "Getting pixel data failed." << std::endl;
+      exit(EXIT_FAILURE);
+    }
+    if (ppf.frames.empty()) {
+      std::cerr << "No frames on input file." << std::endl;
+      exit(EXIT_FAILURE);
+    }
+
+    const double t1 = jxl::Now();
+    pixels = ppf.info.xsize * ppf.info.ysize;
+    decode_mps = pixels * ppf.info.num_color_channels * 1E-6 / (t1 - t0);
+  }
+  if (args.lossless_jpeg && jpegxl::tools::IsJPG(image_data)) {
+    if (!cmdline.GetOption(args.opt_lossless_jpeg_id)->matched()) {
+      std::cerr << "Note: Implicit-default for JPEG is lossless-transcoding. "
+                << "To silence this message, set --lossless_jpeg=(1|0)."
+                << std::endl;
+    }
+    jpeg_bytes = &image_data;
+  }
+
+  ProcessFlags(codec, ppf, jpeg_bytes, &cmdline, &args, &params);
+
+  if (!ppf.metadata.exif.empty()) {
+    jxl::InterpretExif(ppf.metadata.exif, &ppf.info.orientation);
+  }
+
+  if (!ppf.metadata.exif.empty() || !ppf.metadata.xmp.empty() ||
+      !ppf.metadata.jumbf.empty() || !ppf.metadata.iptc.empty() ||
+      (args.lossless_jpeg && args.jpeg_store_metadata)) {
+    if (args.container == jxl::Override::kDefault) {
+      args.container = jxl::Override::kOn;
+    } else if (args.container == jxl::Override::kOff) {
+      cmdline.VerbosePrintf(
+          1, "Stripping all metadata due to explicit container=0\n");
+      ppf.metadata.exif.clear();
+      ppf.metadata.xmp.clear();
+      ppf.metadata.jumbf.clear();
+      ppf.metadata.iptc.clear();
+      args.jpeg_store_metadata = 0;
+    }
+  }
+
+  if (!args.quiet) {
+    PrintMode(ppf, decode_mps, image_data.size(), args, cmdline);
+  }
+
+  size_t num_worker_threads = JxlThreadParallelRunnerDefaultNumWorkerThreads();
+  int64_t flag_num_worker_threads = args.num_threads;
+  if (flag_num_worker_threads > -1) {
+    num_worker_threads = flag_num_worker_threads;
+  }
+  JxlThreadParallelRunnerPtr runner = JxlThreadParallelRunnerMake(
+      /*memory_manager=*/nullptr, num_worker_threads);
+  params.runner = JxlThreadParallelRunner;
+  params.runner_opaque = runner.get();
+
+  jpegxl::tools::SpeedStats stats;
+  std::vector<uint8_t> compressed;
+  for (size_t num_rep = 0; num_rep < args.num_reps; ++num_rep) {
+    const double t0 = jxl::Now();
+    if (!EncodeImageJXL(params, ppf, jpeg_bytes, &compressed)) {
+      fprintf(stderr, "EncodeImageJXL() failed.\n");
+      return EXIT_FAILURE;
+    }
+    const double t1 = jxl::Now();
+    stats.NotifyElapsed(t1 - t0);
+    stats.SetImageSize(ppf.info.xsize, ppf.info.ysize);
+  }
+
+  if (args.file_out && !args.disable_output) {
+    if (!jpegxl::tools::WriteFile(args.file_out, compressed)) {
+      std::cerr << "Could not write jxl file." << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+  if (!args.quiet) {
+    if (compressed.size() < 100000) {
+      cmdline.VerbosePrintf(0, "Compressed to %" PRIuS " bytes ",
+                            compressed.size());
+    } else {
+      cmdline.VerbosePrintf(0, "Compressed to %.1f kB ",
+                            compressed.size() * 0.001);
+    }
+    // For lossless jpeg-reconstruction, we don't print some stats, since we
+    // don't have easy access to the image dimensions.
+    if (args.container == jxl::Override::kOn) {
+      cmdline.VerbosePrintf(0, "including container ");
+    }
+    if (!args.lossless_jpeg) {
+      const double bpp =
+          static_cast<double>(compressed.size() * jxl::kBitsPerByte) / pixels;
+      cmdline.VerbosePrintf(0, "(%.3f bpp%s).\n", bpp / ppf.frames.size(),
+                            ppf.frames.size() == 1 ? "" : "/frame");
+      JXL_CHECK(stats.Print(num_worker_threads));
+    } else {
+      cmdline.VerbosePrintf(0, "\n");
+    }
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/third-party/libjxl/libjxl/tools/cmdline.cc b/third-party/libjxl/libjxl/tools/cmdline.cc
new file mode 100644
index 0000000000..29e4da8b15
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/cmdline.cc
@@ -0,0 +1,116 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/cmdline.h"
+
+#include <memory>
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+void CommandLineParser::PrintHelp() const {
+  // Use stdout, not stderr, so help can easily be grepped.
+  FILE* out = stdout;
+  fprintf(out, "Usage: %s", program_name_ ? program_name_ : "command");
+
+  for (const auto& option : options_) {
+    if (option->positional()) {
+      if (option->verbosity_level() > verbosity) continue;
+      if (option->required()) {
+        fprintf(out, " %s", option->help_flags().c_str());
+      } else {
+        fprintf(out, " [%s]", option->help_flags().c_str());
+      }
+    }
+  }
+  fprintf(out, " [OPTIONS...]\n");
+
+  bool showed_all = true;
+  int max_verbosity = 0;
+  for (const auto& option : options_) {
+    max_verbosity = std::max(option->verbosity_level(), max_verbosity);
+    if (option->verbosity_level() > verbosity) {
+      showed_all = false;
+      continue;
+    }
+    if (option->help_only()) {
+      fprintf(out, "%s\n", option->help_text());
+      continue;
+    }
+    fprintf(out, " %s\n", option->help_flags().c_str());
+    const char* help_text = option->help_text();
+    if (help_text) {
+      fprintf(out, "    %s\n", help_text);
+    }
+  }
+  fprintf(out, "\n -h, --help\n    Prints this help message. ");
+  if (showed_all) {
+    fprintf(out, "All options are shown above.\n");
+  } else {
+    fprintf(out, "Add -v (up to a total of %i times) to see more options.\n",
+            max_verbosity);
+  }
+}
+
+bool CommandLineParser::Parse(int argc, const char* argv[]) {
+  if (argc) program_name_ = argv[0];
+  int i = 1;  // argv[0] is the program name.
+  // if false, stop matching options and take only positional arguments
+  bool parse_options = true;
+  while (i < argc) {
+    if (!strcmp("-h", argv[i]) || !strcmp("--help", argv[i])) {
+      help_ = true;
+      i++;
+      continue;
+    }
+    if (!strcmp("-v", argv[i]) || !strcmp("--verbose", argv[i])) {
+      verbosity++;
+    }
+    // after "--", filenames starting with "-" can be used
+    if (!strcmp("--", argv[i])) {
+      parse_options = false;
+      i++;
+      continue;
+    }
+    // special case: "-" is a filename denoting stdin or stdout
+    bool parse_this_option = true;
+    if (!strcmp("-", argv[i])) {
+      parse_this_option = false;
+    }
+    bool found = false;
+    for (const auto& option : options_) {
+      if (option->Match(argv[i], parse_options && parse_this_option)) {
+        // Parsing advances the value i on success.
+        const char* arg = argv[i];
+        if (!option->Parse(argc, argv, &i)) {
+          fprintf(stderr, "Error parsing flag %s\n", arg);
+          return false;
+        }
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      // No option matched argv[i].
+      fprintf(stderr, "Unknown argument: %s\n", argv[i]);
+      return false;
+    }
+  }
+  return true;
+}
+
+void CommandLineParser::VerbosePrintf(int min_verbosity, const char* format,
+                                      ...) const {
+  if (min_verbosity > verbosity) return;
+  va_list args;
+  va_start(args, format);
+  vfprintf(stderr, format, args);
+  fflush(stderr);
+  va_end(args);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/cmdline.h b/third-party/libjxl/libjxl/tools/cmdline.h
new file mode 100644
index 0000000000..994341d193
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/cmdline.h
@@ -0,0 +1,442 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CMDLINE_H_
+#define TOOLS_CMDLINE_H_
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace jpegxl {
+namespace tools {
+
+class CommandLineParser {
+ public:
+  typedef int OptionId;
+
+  // An abstract class for defining command line options.
+  class CmdOptionInterface {
+   public:
+    CmdOptionInterface() = default;
+    virtual ~CmdOptionInterface() = default;
+
+    // Return a string with the option name or available flags.
+    virtual std::string help_flags() const = 0;
+
+    // Return the help string if any, or nullptr if no help string.
+    virtual const char* help_text() const = 0;
+
+    // Return the verbosity level for this option
+    virtual int verbosity_level() const = 0;
+
+    // Return whether the option was passed.
+    virtual bool matched() const = 0;
+
+    // Returns whether this option matches the passed command line argument.
+    virtual bool Match(const char* arg, bool parse_options) const = 0;
+
+    // Parses the option. The passed i points to the argument with the flag
+    // that matches either the short or the long name.
+    virtual bool Parse(int argc, const char* argv[], int* i) = 0;
+
+    // Returns whether the option is positional, and therefore will be shown
+    // in the first command line representation of the help output.
+    virtual bool positional() const = 0;
+
+    // Returns whether the option should be displayed as required in the help
+    // output. No effect on validation.
+    virtual bool required() const = 0;
+
+    // Returns whether the option is not really an option but just help text
+    virtual bool help_only() const = 0;
+  };
+
+  // Add help text
+  void AddHelpText(const char* help_text, int verbosity_level = 0) {
+    options_.emplace_back(new CmdHelpText(help_text, verbosity_level));
+  }
+
+  // Add a positional argument. Returns the id of the added option or
+  // kOptionError on error.
+  // The "required" flag indicates whether the parameter is mandatory or
+  // optional, but is only used for how it is displayed in the command line
+  // help.
+  OptionId AddPositionalOption(const char* name, bool required,
+                               const std::string& help_text,
+                               const char** storage, int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionPositional(name, help_text, storage,
+                                                  verbosity_level, required));
+    return options_.size() - 1;
+  }
+
+  // Add an option with a value of type T. The option can be passed as
+  // '-s <value>' or '--long value' or '--long=value'. The CommandLineParser
+  // parser will call the function parser with the string pointing to '<value>'
+  // in either case. Returns the id of the added option or kOptionError on
+  // error.
+  template <typename T>
+  OptionId AddOptionValue(char short_name, const char* long_name,
+                          const char* metavar, const char* help_text,
+                          T* storage, bool(parser)(const char*, T*),
+                          int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionFlag<T>(short_name, long_name, metavar,
+                                               help_text, storage, parser,
+                                               verbosity_level));
+    return options_.size() - 1;
+  }
+
+  // Add a flag without a value. Returns the id of the added option or
+  // kOptionError on error.
+  template <typename T>
+  OptionId AddOptionFlag(char short_name, const char* long_name,
+                         const char* help_text, T* storage, bool(parser)(T*),
+                         int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionFlag<T>(
+        short_name, long_name, help_text, storage, parser, verbosity_level));
+    return options_.size() - 1;
+  }
+
+  const CmdOptionInterface* GetOption(OptionId id) const {
+    return options_[id].get();
+  }
+
+  // Print the help message to stdout.
+  void PrintHelp() const;
+
+  // Whether a help flag was specified
+  bool HelpFlagPassed() const { return help_; }
+
+  int verbosity = 0;
+
+  // Parse the command line.
+  bool Parse(int argc, const char* argv[]);
+
+  // Return the remaining positional args
+  std::vector<const char*> PositionalArgs() const;
+
+  // Conditionally print a message to stderr
+  void VerbosePrintf(int min_verbosity, const char* format, ...) const;
+
+ private:
+  // Help text only.
+  class CmdHelpText : public CmdOptionInterface {
+   public:
+    CmdHelpText(const char* help_text, int verbosity_level)
+        : help_text_(help_text), verbosity_level_(verbosity_level) {}
+
+    std::string help_flags() const override { return ""; }
+    const char* help_text() const override { return help_text_; }
+    int verbosity_level() const override { return verbosity_level_; }
+    bool matched() const override { return false; }
+
+    bool Match(const char* arg, bool parse_options) const override {
+      return false;
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      return true;
+    }
+
+    bool positional() const override { return false; }
+
+    bool required() const override { return false; }
+
+    bool help_only() const override { return true; }
+
+   private:
+    const char* help_text_;
+    const int verbosity_level_;
+  };
+
+  // A positional argument.
+  class CmdOptionPositional : public CmdOptionInterface {
+   public:
+    CmdOptionPositional(const char* name, const std::string& help_text,
+                        const char** storage, int verbosity_level,
+                        bool required)
+        : name_(name),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level),
+          required_(required) {}
+
+    std::string help_flags() const override { return name_; }
+    const char* help_text() const override { return help_text_.c_str(); }
+    int verbosity_level() const override { return verbosity_level_; }
+    bool matched() const override { return matched_; }
+
+    // Only match non-flag values. This means that you can't pass '-foo' as a
+    // positional argument, but it helps with detecting when passed a flag with
+    // a typo. After '--', option matching is disabled so positional arguments
+    // starting with '-' can be used.
+    bool Match(const char* arg, bool parse_options) const override {
+      return !matched_ && (!parse_options || arg[0] != '-');
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      *storage_ = argv[*i];
+      (*i)++;
+      matched_ = true;
+      return true;
+    }
+
+    bool positional() const override { return true; }
+
+    bool required() const override { return required_; }
+
+    bool help_only() const override { return false; }
+
+   private:
+    const char* name_;
+    const std::string help_text_;
+    const char** storage_;
+    const int verbosity_level_;
+    const bool required_;
+
+    bool matched_{false};
+  };
+
+  // A class for handling an option flag like '-v' or '--foo=bar'.
+  template <typename T>
+  class CmdOptionFlag : public CmdOptionInterface {
+   public:
+    // Construct a flag that doesn't take any value, for example '-v' or
+    // '--long'. Passing a value to it raises an error.
+    CmdOptionFlag(char short_name, const char* long_name, const char* help_text,
+                  T* storage, bool(parser)(T*), int verbosity_level)
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(nullptr),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level) {
+      parser_.parser_no_value_ = parser;
+    }
+
+    // Construct a flag that expects a value to be passed.
+    CmdOptionFlag(char short_name, const char* long_name, const char* metavar,
+                  const char* help_text, T* storage,
+                  bool(parser)(const char* arg, T*), int verbosity_level)
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(metavar ? metavar : ""),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level) {
+      parser_.parser_with_arg_ = parser;
+    }
+
+    std::string help_flags() const override {
+      std::string ret;
+      if (short_name_) {
+        ret += std::string("-") + short_name_;
+        if (metavar_) ret += std::string(" ") + metavar_;
+        if (long_name_) ret += ", ";
+      }
+      if (long_name_) {
+        ret += std::string("--") + long_name_;
+        if (metavar_) ret += std::string("=") + metavar_;
+      }
+      return ret;
+    }
+    const char* help_text() const override { return help_text_; }
+    int verbosity_level() const override { return verbosity_level_; }
+    bool matched() const override { return matched_; }
+
+    bool Match(const char* arg, bool parse_options) const override {
+      return parse_options && (MatchShort(arg) || MatchLong(arg));
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      matched_ = true;
+      if (MatchLong(argv[*i])) {
+        const char* arg = argv[*i] + 2 + long_name_len_;
+        if (arg[0] == '=') {
+          if (metavar_) {
+            // Passed '--long_name=...'.
+            (*i)++;
+            // Skip over the '=' on the LongMatch.
+            arg += 1;
+            return (*parser_.parser_with_arg_)(arg, storage_);
+          } else {
+            fprintf(stderr, "--%s didn't expect any argument passed to it.\n",
+                    argv[*i]);
+            return false;
+          }
+        }
+      }
+      // In any other case, it passed a -s or --long_name
+      (*i)++;
+      if (metavar_) {
+        if (argc <= *i) {
+          fprintf(stderr, "--%s expected an argument but none passed.\n",
+                  argv[*i - 1]);
+          return false;
+        }
+        return (*parser_.parser_with_arg_)(argv[(*i)++], storage_);
+      } else {
+        return (*parser_.parser_no_value_)(storage_);
+      }
+    }
+
+    bool positional() const override { return false; }
+
+    bool required() const override {
+      // Only used for help display of positional arguments.
+      return false;
+    }
+
+    bool help_only() const override { return false; }
+
+   private:
+    // Returns whether arg matches the short_name flag of this option.
+    bool MatchShort(const char* arg) const {
+      if (!short_name_ || arg[0] != '-') return false;
+      return arg[1] == short_name_ && arg[2] == 0;
+    }
+
+    // Returns whether arg matches the long_name flag of this option,
+    // potentially with an argument passed to it.
+    bool MatchLong(const char* arg) const {
+      if (!long_name_ || arg[0] != '-' || arg[1] != '-') return false;
+      arg += 2;  // Skips the '--'
+      if (strncmp(long_name_, arg, long_name_len_) != 0) return false;
+      arg += long_name_len_;
+      // Allow "--long_name=foo" and "--long_name" as long matches.
+      return arg[0] == 0 || arg[0] == '=';
+    }
+
+    // A short option passed as '-X' where X is the char. A value of 0 means
+    // no short option.
+    const char short_name_;
+
+    // A long option name passed as '--long' where 'long' is the name of the
+    // option.
+    const char* long_name_;
+    size_t long_name_len_;
+
+    // The text to display when referring to the value passed to this flag, for
+    // example "N" in the flag '--value N'. If null, this flag accepts no value
+    // and therefore no value must be passed.
+    const char* metavar_;
+
+    // The help string for this flag.
+    const char* help_text_;
+
+    // The pointer to the storage of this flag used when parsing.
+    T* storage_;
+
+    // At which verbosity level do we show this option?
+    int verbosity_level_;
+
+    // The function to use to parse the value when matched. The function used is
+    // parser_with_arg_ when metavar_ is not null (and the value string will be
+    // used) or parser_no_value_ when metavar_ is null.
+    union {
+      bool (*parser_with_arg_)(const char*, T*);
+      bool (*parser_no_value_)(T*);
+    } parser_;
+
+    // Whether this flag was matched.
+    bool matched_{false};
+  };
+
+  const char* program_name_{nullptr};
+
+  std::vector<std::unique_ptr<CmdOptionInterface>> options_;
+
+  // If true, help argument was given, so print help to stdout rather than
+  // stderr.
+  bool help_ = false;
+};
+
+//
+// Common parsers for AddOptionValue and AddOptionFlag
+//
+
+static inline bool ParseSigned(const char* arg, int* out) {
+  char* end;
+  *out = static_cast<int>(strtol(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as signed integer: %s.\n", arg);
+    return false;
+  }
+  return true;
+}
+
+static inline bool ParseUnsigned(const char* arg, size_t* out) {
+  char* end;
+  *out = static_cast<size_t>(strtoull(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as unsigned integer: %s.\n", arg);
+    return false;
+  }
+  return true;
+}
+
+static inline bool ParseInt64(const char* arg, int64_t* out) {
+  char* end;
+  *out = strtol(arg, &end, 0);
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as signed integer: %s.\n", arg);
+    return false;
+  }
+  return true;
+}
+
+static inline bool ParseUint32(const char* arg, uint32_t* out) {
+  size_t value = 0;
+  bool ret = ParseUnsigned(arg, &value);
+  if (ret) *out = value;
+  return ret;
+}
+
+static inline bool ParseFloat(const char* arg, float* out) {
+  char* end;
+  *out = static_cast<float>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as float: %s.\n", arg);
+    return false;
+  }
+  return true;
+}
+
+static inline bool ParseDouble(const char* arg, double* out) {
+  char* end;
+  *out = static_cast<double>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as double: %s.\n", arg);
+    return false;
+  }
+  return true;
+}
+
+static inline bool ParseString(const char* arg, std::string* out) {
+  out->assign(arg);
+  return true;
+}
+
+static inline bool SetBooleanTrue(bool* out) {
+  *out = true;
+  return true;
+}
+
+static inline bool SetBooleanFalse(bool* out) {
+  *out = false;
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CMDLINE_H_
diff --git a/third-party/libjxl/libjxl/tools/codec_config.cc b/third-party/libjxl/libjxl/tools/codec_config.cc
new file mode 100644
index 0000000000..8efc26c221
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/codec_config.cc
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/codec_config.h"
+
+#include <hwy/targets.h>
+
+#include "tools/tool_version.h"
+
+namespace jpegxl {
+namespace tools {
+
+std::string CodecConfigString(uint32_t lib_version) {
+  std::string config;
+
+  if (lib_version != 0) {
+    char version_str[20];
+    snprintf(version_str, sizeof(version_str), "v%d.%d.%d ",
+             lib_version / 1000000, (lib_version / 1000) % 1000,
+             lib_version % 1000);
+    config += version_str;
+  }
+
+  std::string version = kJpegxlVersion;
+  if (version != "(unknown)") {
+    config += version + ' ';
+  }
+
+#if defined(ADDRESS_SANITIZER)
+  config += " asan ";
+#elif defined(MEMORY_SANITIZER)
+  config += " msan ";
+#elif defined(THREAD_SANITIZER)
+  config += " tsan ";
+#else
+#endif
+
+  bool saw_target = false;
+  config += "[";
+  for (const uint32_t target : hwy::SupportedAndGeneratedTargets()) {
+    config += hwy::TargetName(target);
+    config += ',';
+    saw_target = true;
+  }
+  if (!saw_target) {
+    config += "no targets found,";
+  }
+  config.resize(config.size() - 1);  // remove trailing comma
+  config += "]";
+
+  return config;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/codec_config.h b/third-party/libjxl/libjxl/tools/codec_config.h
new file mode 100644
index 0000000000..8d1c73f79e
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/codec_config.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CODEC_CONFIG_H_
+#define TOOLS_CODEC_CONFIG_H_
+
+#include <stdint.h>
+
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+// Returns a short string describing the codec version (if known) and build
+// settings such as sanitizers and SIMD targets. Used in the benchmark and
+// command-line tools.
+std::string CodecConfigString(uint32_t lib_version);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CODEC_CONFIG_H_
diff --git a/third-party/libjxl/libjxl/tools/color_encoding_fuzzer.cc b/third-party/libjxl/libjxl/tools/color_encoding_fuzzer.cc
new file mode 100644
index 0000000000..d73dc4f956
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/color_encoding_fuzzer.cc
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string>
+
+#include "lib/extras/dec/color_description.h"
+
+namespace jpegxl {
+namespace tools {
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  std::string description(reinterpret_cast<const char*>(data), size);
+  JxlColorEncoding c;
+  (void)jxl::ParseDescription(description, &c);
+
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/CMakeLists.txt b/third-party/libjxl/libjxl/tools/comparison_viewer/CMakeLists.txt
new file mode 100644
index 0000000000..9c29d2c600
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/CMakeLists.txt
@@ -0,0 +1,74 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(Qt6 QUIET COMPONENTS Concurrent Widgets)
+if (NOT Qt6_FOUND)
+  message(WARNING "Qt6 was not found. The comparison tool will not be built.")
+  return()
+endif ()
+
+if (NOT TARGET icc_detect)
+  message(WARNING "icc_detect not built. The comparison tool will not be built.")
+  return ()
+endif ()
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_AUTOMOC ON)
+set(CMAKE_AUTOUIC ON)
+
+add_library(image_loading STATIC
+  ../viewer/load_jxl.cc
+  ../viewer/load_jxl.h
+  image_loading.cc
+  image_loading.h
+)
+target_include_directories(image_loading PRIVATE
+  $<TARGET_PROPERTY:lcms2,INCLUDE_DIRECTORIES>
+)
+target_link_libraries(image_loading PUBLIC
+  Qt6::Widgets
+  jxl-static
+  jxl_threads-static
+  jxl_extras-static
+  lcms2
+)
+
+add_executable(compare_codecs WIN32
+  codec_comparison_window.cc
+  codec_comparison_window.h
+  codec_comparison_window.ui
+  compare_codecs.cc
+  settings.cc
+  settings.h
+  settings.ui
+  split_image_renderer.cc
+  split_image_renderer.h
+  split_image_view.cc
+  split_image_view.h
+  split_image_view.ui
+)
+target_link_libraries(compare_codecs
+  image_loading
+  Qt6::Concurrent
+  Qt6::Widgets
+  icc_detect
+)
+
+add_executable(compare_images WIN32
+  compare_images.cc
+  settings.cc
+  settings.h
+  settings.ui
+  split_image_renderer.cc
+  split_image_renderer.h
+  split_image_view.cc
+  split_image_view.h
+  split_image_view.ui
+)
+target_link_libraries(compare_images
+  image_loading
+  Qt6::Widgets
+  icc_detect
+)
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.cc
new file mode 100644
index 0000000000..0ecd5798ac
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.cc
@@ -0,0 +1,318 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/comparison_viewer/codec_comparison_window.h"
+
+#include <stdlib.h>
+
+#include <QCollator>
+#include <QComboBox>
+#include <QDir>
+#include <QFileInfo>
+#include <QFlags>
+#include <QIcon>
+#include <QImage>
+#include <QImageReader>
+#include <QLabel>
+#include <QList>
+#include <QMap>
+#include <QString>
+#include <QStringList>
+#include <QtConcurrent>
+#include <algorithm>
+#include <climits>
+#include <functional>
+#include <utility>
+
+#include "lib/extras/codec.h"
+#include "tools/comparison_viewer/image_loading.h"
+#include "tools/comparison_viewer/split_image_view.h"
+#include "tools/icc_detect/icc_detect.h"
+
+namespace jpegxl {
+namespace tools {
+
+static constexpr char kPngSuffix[] = "png";
+
+namespace {
+
+QVector<QPair<QComboBox*, QString>> currentCodecSelection(
+    const Ui::CodecComparisonWindow& ui) {
+  QVector<QPair<QComboBox*, QString>> result;
+  for (QComboBox* const comboBox :
+       {ui.codec1ComboBox, ui.codec2ComboBox, ui.compressionLevel1ComboBox,
+        ui.compressionLevel2ComboBox}) {
+    result << qMakePair(comboBox, comboBox->currentText());
+  }
+  return result;
+}
+
+void restoreCodecSelection(
+    const QVector<QPair<QComboBox*, QString>>& selection) {
+  for (const auto& comboBox : selection) {
+    const int index = comboBox.first->findText(comboBox.second);
+    if (index != -1) {
+      comboBox.first->setCurrentIndex(index);
+    }
+  }
+}
+
+}  // namespace
+
+CodecComparisonWindow::CodecComparisonWindow(const QString& directory,
+                                             const float intensityTarget,
+                                             QWidget* const parent)
+    : QMainWindow(parent),
+      intensityTarget_(intensityTarget),
+      monitorIccProfile_(GetMonitorIccProfile(this)) {
+  ui_.setupUi(this);
+
+  connect(ui_.imageSetComboBox, &QComboBox::currentTextChanged, this,
+          &CodecComparisonWindow::handleImageSetSelection);
+  connect(ui_.imageComboBox, &QComboBox::currentTextChanged, this,
+          &CodecComparisonWindow::handleImageSelection);
+
+  connect(ui_.codec1ComboBox, &QComboBox::currentTextChanged,
+          [this]() { handleCodecChange(Side::LEFT); });
+  connect(ui_.codec2ComboBox, &QComboBox::currentTextChanged,
+          [this]() { handleCodecChange(Side::RIGHT); });
+
+  connect(ui_.compressionLevel1ComboBox, &QComboBox::currentTextChanged,
+          [this]() { updateSideImage(Side::LEFT); });
+  connect(ui_.compressionLevel2ComboBox, &QComboBox::currentTextChanged,
+          [this]() { updateSideImage(Side::RIGHT); });
+
+  connect(ui_.match1Label, &QLabel::linkActivated,
+          [this]() { matchSize(Side::LEFT); });
+  connect(ui_.match2Label, &QLabel::linkActivated,
+          [this]() { matchSize(Side::RIGHT); });
+
+  connect(
+      ui_.splitImageView, &SplitImageView::renderingModeChanged,
+      [this](const SplitImageRenderer::RenderingMode newMode) {
+        switch (newMode) {
+          case SplitImageRenderer::RenderingMode::LEFT:
+          case SplitImageRenderer::RenderingMode::RIGHT: {
+            QString codec, compressionLevel;
+            if (newMode == SplitImageRenderer::RenderingMode::LEFT) {
+              codec = ui_.codec1ComboBox->currentText();
+              compressionLevel = ui_.compressionLevel1ComboBox->currentText();
+            } else {
+              codec = ui_.codec2ComboBox->currentText();
+              compressionLevel = ui_.compressionLevel2ComboBox->currentText();
+            }
+            ui_.renderingModeLabel->setText(tr("Currently displaying: %1 @ %2")
+                                                .arg(codec)
+                                                .arg(compressionLevel));
+            break;
+          }
+
+          case SplitImageRenderer::RenderingMode::MIDDLE:
+            ui_.renderingModeLabel->setText(
+                tr("Currently displaying the original image."));
+            break;
+
+          default:
+            ui_.renderingModeLabel->clear();
+            break;
+        }
+      });
+
+  loadDirectory(directory);
+}
+
+void CodecComparisonWindow::handleImageSetSelection(
+    const QString& imageSetName) {
+  const auto selection = currentCodecSelection(ui_);
+  {
+    const QSignalBlocker blocker(ui_.imageComboBox);
+    ui_.imageComboBox->clear();
+  }
+  const QStringList imageNames = imageSets_.value(imageSetName).keys();
+  const std::function<QIcon(const QString&)> loadIcon =
+      [this, &imageSetName](const QString& imageName) {
+        return QIcon(pathToOriginalImage(imageSetName, imageName));
+      };
+  const QFuture<QIcon> thumbnails = QtConcurrent::mapped(imageNames, loadIcon);
+  int i = 0;
+  for (const QString& imageName : imageNames) {
+    ui_.imageComboBox->addItem(thumbnails.resultAt(i), imageName);
+    ++i;
+  }
+  restoreCodecSelection(selection);
+}
+
+void CodecComparisonWindow::handleImageSelection(const QString& imageName) {
+  const QString imageSetName = ui_.imageSetComboBox->currentText();
+  ui_.splitImageView->setMiddleImage(
+      loadImage(pathToOriginalImage(imageSetName, imageName),
+                monitorIccProfile_, intensityTarget_));
+
+  const auto selection = currentCodecSelection(ui_);
+  QStringList codecs = imageSets_.value(imageSetName).value(imageName).keys();
+  for (QComboBox* const codecComboBox :
+       {ui_.codec1ComboBox, ui_.codec2ComboBox}) {
+    {
+      const QSignalBlocker blocker(codecComboBox);
+      codecComboBox->clear();
+    }
+    codecComboBox->addItems(codecs);
+  }
+  restoreCodecSelection(selection);
+}
+
+void CodecComparisonWindow::handleCodecChange(const Side side) {
+  const QComboBox* const codecComboBox =
+      side == Side::LEFT ? ui_.codec1ComboBox : ui_.codec2ComboBox;
+  QComboBox* const compressionLevelComboBox =
+      side == Side::LEFT ? ui_.compressionLevel1ComboBox
+                         : ui_.compressionLevel2ComboBox;
+
+  QStringList compressionLevels =
+      imageSets_.value(ui_.imageSetComboBox->currentText())
+          .value(ui_.imageComboBox->currentText())
+          .value(codecComboBox->currentText())
+          .keys();
+  QCollator collator;
+  collator.setNumericMode(true);
+  std::sort(compressionLevels.begin(), compressionLevels.end(), collator);
+
+  {
+    const QSignalBlocker blocker(compressionLevelComboBox);
+    compressionLevelComboBox->clear();
+  }
+  compressionLevelComboBox->addItems(compressionLevels);
+  matchSize(side);
+}
+
+void CodecComparisonWindow::updateSideImage(const Side side) {
+  const ComparableImage& imageInfo = currentlySelectedImage(side);
+  if (imageInfo.decodedImagePath.isEmpty()) return;
+  QImage image = loadImage(imageInfo.decodedImagePath, monitorIccProfile_,
+                           intensityTarget_);
+  const int pixels = image.width() * image.height();
+  QLabel* const sizeInfoLabel =
+      side == Side::LEFT ? ui_.size1Label : ui_.size2Label;
+  if (pixels == 0) {
+    sizeInfoLabel->setText(tr("Empty image."));
+  } else {
+    const double bpp =
+        CHAR_BIT * static_cast<double>(imageInfo.byteSize) / pixels;
+    sizeInfoLabel->setText(tr("%L1bpp").arg(bpp));
+  }
+
+  if (side == Side::LEFT) {
+    ui_.splitImageView->setLeftImage(std::move(image));
+  } else {
+    ui_.splitImageView->setRightImage(std::move(image));
+  }
+}
+
+QString CodecComparisonWindow::pathToOriginalImage(
+    const QString& imageSetName, const QString& imageName) const {
+  return baseDirectory_.absolutePath() + "/" + imageSetName + "/" + imageName +
+         "/original.png";
+}
+
+CodecComparisonWindow::ComparableImage
+CodecComparisonWindow::currentlySelectedImage(const Side side) const {
+  const QComboBox* const codecComboBox =
+      side == Side::LEFT ? ui_.codec1ComboBox : ui_.codec2ComboBox;
+  QComboBox* const compressionLevelComboBox =
+      side == Side::LEFT ? ui_.compressionLevel1ComboBox
+                         : ui_.compressionLevel2ComboBox;
+
+  return imageSets_.value(ui_.imageSetComboBox->currentText())
+      .value(ui_.imageComboBox->currentText())
+      .value(codecComboBox->currentText())
+      .value(compressionLevelComboBox->currentText());
+}
+
+void CodecComparisonWindow::matchSize(const Side side) {
+  const Side otherSide = (side == Side::LEFT ? Side::RIGHT : Side::LEFT);
+  const qint64 otherSideSize = currentlySelectedImage(otherSide).byteSize;
+  if (otherSideSize == 0) return;
+
+  const QComboBox* const codecComboBox =
+      side == Side::LEFT ? ui_.codec1ComboBox : ui_.codec2ComboBox;
+  QComboBox* const compressionLevelComboBox =
+      side == Side::LEFT ? ui_.compressionLevel1ComboBox
+                         : ui_.compressionLevel2ComboBox;
+  const Codec codec = imageSets_.value(ui_.imageSetComboBox->currentText())
+                          .value(ui_.imageComboBox->currentText())
+                          .value(codecComboBox->currentText());
+  if (codec.empty()) return;
+  Codec::ConstIterator bestMatch = codec.begin();
+  for (auto it = codec.begin(); it != codec.end(); ++it) {
+    if (std::abs(it->byteSize - otherSideSize) <
+        std::abs(bestMatch->byteSize - otherSideSize)) {
+      bestMatch = it;
+    }
+  }
+  compressionLevelComboBox->setCurrentText(bestMatch.key());
+}
+
+void CodecComparisonWindow::loadDirectory(const QString& directory) {
+  baseDirectory_.setPath(directory);
+  baseDirectory_.makeAbsolute();
+  imageSets_.clear();
+  visited_.clear();
+
+  browseDirectory(directory);
+
+  {
+    const QSignalBlocker blocker(ui_.imageSetComboBox);
+    ui_.imageSetComboBox->clear();
+  }
+  ui_.imageSetComboBox->addItems(imageSets_.keys());
+}
+
+void CodecComparisonWindow::browseDirectory(const QDir& directory, int depth) {
+  for (const QFileInfo& subdirectory : directory.entryInfoList(
+           QDir::Dirs | QDir::NoDotAndDotDot | QDir::NoSymLinks)) {
+    if (visited_.contains(subdirectory.absoluteFilePath())) continue;
+    visited_.insert(subdirectory.absoluteFilePath());
+    browseDirectory(subdirectory.absoluteFilePath(), depth + 1);
+  }
+
+  // Need at least image_name/codec_name/file.
+  if (depth < 2) return;
+
+  for (const QFileInfo& file : directory.entryInfoList(QDir::Files)) {
+    if (file.suffix() == kPngSuffix) continue;
+    QString decodedImage;
+    if (canLoadImageWithExtension(file.suffix())) {
+      decodedImage = file.absoluteFilePath();
+    } else {
+      QFileInfo png(file.absolutePath() + "/" + file.completeBaseName() + "." +
+                    kPngSuffix);
+      if (png.exists()) {
+        decodedImage = png.absoluteFilePath();
+      }
+    }
+
+    if (decodedImage.isEmpty()) continue;
+
+    const QString codec = file.absoluteDir().dirName();
+    QDir imageDirectory = file.absoluteDir();
+    if (!imageDirectory.cdUp()) return;
+    const QString imageName = imageDirectory.dirName();
+    QDir imageSetDirectory = imageDirectory;
+    if (!imageSetDirectory.cdUp()) return;
+    QString imageSetPath =
+        baseDirectory_.relativeFilePath(imageSetDirectory.absolutePath());
+    if (imageSetPath.isEmpty()) {
+      imageSetPath = ".";
+    }
+
+    ComparableImage& image =
+        imageSets_[imageSetPath][imageName][codec][file.completeBaseName()];
+    image.decodedImagePath = decodedImage;
+    image.byteSize = file.size();
+  }
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.h b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.h
new file mode 100644
index 0000000000..479c420cb1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.h
@@ -0,0 +1,80 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_COMPARISON_VIEWER_CODEC_COMPARISON_WINDOW_H_
+#define TOOLS_COMPARISON_VIEWER_CODEC_COMPARISON_WINDOW_H_
+
+#include <QDir>
+#include <QMainWindow>
+#include <QMap>
+#include <QSet>
+#include <QString>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/common.h"
+#include "tools/comparison_viewer/ui_codec_comparison_window.h"
+
+namespace jpegxl {
+namespace tools {
+
+class CodecComparisonWindow : public QMainWindow {
+  Q_OBJECT
+
+ public:
+  explicit CodecComparisonWindow(
+      const QString& directory,
+      float intensityTarget = jxl::kDefaultIntensityTarget,
+      QWidget* parent = nullptr);
+  ~CodecComparisonWindow() override = default;
+
+ private slots:
+  void handleImageSetSelection(const QString& imageSetName);
+  void handleImageSelection(const QString& imageName);
+
+ private:
+  struct ComparableImage {
+    // Absolute path to the decoded PNG (or an image that Qt can read).
+    QString decodedImagePath;
+    // Size of the encoded image (*not* the PNG).
+    qint64 byteSize = 0;
+  };
+  // Keys are compression levels.
+  using Codec = QMap<QString, ComparableImage>;
+  // Keys are codec names.
+  using Codecs = QMap<QString, Codec>;
+  // Keys are image names (relative to the image set directory).
+  using ImageSet = QMap<QString, Codecs>;
+  // Keys are paths to image sets (relative to the base directory chosen by the
+  // user).
+  using ImageSets = QMap<QString, ImageSet>;
+
+  enum class Side { LEFT, RIGHT };
+
+  QString pathToOriginalImage(const QString& imageSet,
+                              const QString& imageName) const;
+  ComparableImage currentlySelectedImage(Side side) const;
+
+  void handleCodecChange(Side side);
+  void updateSideImage(Side side);
+  void matchSize(Side side);
+
+  void loadDirectory(const QString& directory);
+  // Recursive, called by loadDirectory.
+  void browseDirectory(const QDir& directory, int depth = 0);
+
+  Ui::CodecComparisonWindow ui_;
+
+  QDir baseDirectory_;
+  ImageSets imageSets_;
+  QSet<QString> visited_;
+
+  const float intensityTarget_;
+  const QByteArray monitorIccProfile_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_COMPARISON_VIEWER_CODEC_COMPARISON_WINDOW_H_
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.ui b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.ui
new file mode 100644
index 0000000000..85ba810216
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/codec_comparison_window.ui
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>CodecComparisonWindow</class>
+ <widget class="QMainWindow" name="CodecComparisonWindow">
+  <property name="windowTitle">
+   <string>Codec Comparison Tool</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <layout class="QVBoxLayout" name="verticalLayout" stretch="0,0,0,1">
+    <item>
+     <layout class="QHBoxLayout" name="horizontalLayout_5" stretch="1,0,1">
+      <item>
+       <spacer name="horizontalSpacer">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+       </spacer>
+      </item>
+      <item>
+       <layout class="QFormLayout" name="formLayout">
+        <item row="0" column="1">
+         <widget class="QComboBox" name="imageSetComboBox"/>
+        </item>
+        <item row="0" column="0">
+         <widget class="QLabel" name="imageSetLabel">
+          <property name="text">
+           <string>Image set:</string>
+          </property>
+         </widget>
+        </item>
+        <item row="1" column="0">
+         <widget class="QLabel" name="imageLabel">
+          <property name="text">
+           <string>Image:</string>
+          </property>
+         </widget>
+        </item>
+        <item row="1" column="1">
+         <widget class="QComboBox" name="imageComboBox"/>
+        </item>
+       </layout>
+      </item>
+      <item>
+       <spacer name="horizontalSpacer_2">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+       </spacer>
+      </item>
+     </layout>
+    </item>
+    <item>
+     <layout class="QHBoxLayout" name="horizontalLayout" stretch="0,1,0">
+      <item>
+       <layout class="QGridLayout" name="gridLayout">
+        <item row="0" column="1">
+         <widget class="QComboBox" name="compressionLevel1ComboBox"/>
+        </item>
+        <item row="0" column="0">
+         <widget class="QComboBox" name="codec1ComboBox"/>
+        </item>
+        <item row="1" column="1">
+         <widget class="QLabel" name="match1Label">
+          <property name="text">
+           <string>&lt;a href=&quot;#match1&quot;&gt;Match →&lt;/a&gt;</string>
+          </property>
+          <property name="alignment">
+           <set>Qt::AlignCenter</set>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
+      <item>
+       <spacer name="horizontalSpacer_3">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+       </spacer>
+      </item>
+      <item>
+       <layout class="QGridLayout" name="gridLayout_2">
+        <item row="1" column="0">
+         <widget class="QLabel" name="match2Label">
+          <property name="text">
+           <string>&lt;a href=&quot;#match2&quot;&gt;Match ←&lt;/a&gt;</string>
+          </property>
+          <property name="alignment">
+           <set>Qt::AlignCenter</set>
+          </property>
+         </widget>
+        </item>
+        <item row="0" column="1">
+         <widget class="QComboBox" name="compressionLevel2ComboBox"/>
+        </item>
+        <item row="0" column="0">
+         <widget class="QComboBox" name="codec2ComboBox"/>
+        </item>
+       </layout>
+      </item>
+     </layout>
+    </item>
+    <item>
+     <layout class="QHBoxLayout" name="horizontalLayout_2" stretch="0,1,0,1,0">
+      <item>
+       <widget class="QLabel" name="size1Label">
+        <property name="text">
+         <string>No image loaded.</string>
+        </property>
+       </widget>
+      </item>
+      <item>
+       <spacer name="horizontalSpacer_4">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+       </spacer>
+      </item>
+      <item>
+       <widget class="QLabel" name="renderingModeLabel">
+        <property name="text">
+         <string/>
+        </property>
+        <property name="alignment">
+         <set>Qt::AlignCenter</set>
+        </property>
+       </widget>
+      </item>
+      <item>
+       <spacer name="horizontalSpacer_5">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+       </spacer>
+      </item>
+      <item>
+       <widget class="QLabel" name="size2Label">
+        <property name="text">
+         <string>No image loaded.</string>
+        </property>
+        <property name="alignment">
+         <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+    <item>
+     <widget class="jpegxl::tools::SplitImageView" name="splitImageView" native="true"/>
+    </item>
+   </layout>
+  </widget>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>jpegxl::tools::SplitImageView</class>
+   <extends>QWidget</extends>
+   <header>split_image_view.h</header>
+   <container>1</container>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/compare_codecs.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/compare_codecs.cc
new file mode 100644
index 0000000000..3ab4c8df26
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/compare_codecs.cc
@@ -0,0 +1,75 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <QApplication>
+#include <QCommandLineParser>
+#include <QMessageBox>
+#include <QString>
+#include <QStringList>
+
+#include "tools/comparison_viewer/codec_comparison_window.h"
+
+int main(int argc, char** argv) {
+  QApplication application(argc, argv);
+
+  QCommandLineParser parser;
+  parser.setApplicationDescription(
+      QCoreApplication::translate("compare_codecs", "Codec comparison tool"));
+  parser.addHelpOption();
+
+  QCommandLineOption intensityTargetOption(
+      {"intensity-target", "intensity_target", "i"},
+      QCoreApplication::translate("compare_codecs",
+                                  "The peak luminance of the display."),
+      QCoreApplication::translate("compare_codecs", "nits"),
+      QString::number(jxl::kDefaultIntensityTarget));
+  parser.addOption(intensityTargetOption);
+
+  parser.addPositionalArgument(
+      "folders", QCoreApplication::translate("compare_codecs", "Image folders"),
+      "<folders>...");
+
+  parser.process(application);
+
+  bool ok;
+  const float intensityTarget =
+      parser.value(intensityTargetOption).toFloat(&ok);
+  if (!ok) {
+    parser.showHelp(EXIT_FAILURE);
+  }
+
+  QStringList folders = parser.positionalArguments();
+
+  if (folders.empty()) {
+    QMessageBox message;
+    message.setIcon(QMessageBox::Information);
+    message.setWindowTitle(
+        QCoreApplication::translate("CodecComparisonWindow", "Usage"));
+    message.setText(QCoreApplication::translate(
+        "CodecComparisonWindow", "Please specify a directory to use."));
+    message.setDetailedText(QCoreApplication::translate(
+        "CodecComparisonWindow",
+        "That directory should contain images in the following layout:\n"
+        "- .../<image name>/original.png (optional)\n"
+        "- .../<image_name>/<codec_name>/<compression_level>.<ext>\n"
+        "- .../<image_name>/<codec_name>/<compression_level>.png (optional for "
+        "formats that Qt can load)\n"
+        "With arbitrary nesting allowed before that. (The \"...\" part is "
+        "referred to as an \"image set\" by the tool."));
+    message.exec();
+    return EXIT_FAILURE;
+  }
+
+  for (const QString& folder : folders) {
+    auto* const window =
+        new jpegxl::tools::CodecComparisonWindow(folder, intensityTarget);
+    window->setAttribute(Qt::WA_DeleteOnClose);
+    window->show();
+  }
+
+  return application.exec();
+}
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/compare_images.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/compare_images.cc
new file mode 100644
index 0000000000..321b2c492a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/compare_images.cc
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <QApplication>
+#include <QCommandLineOption>
+#include <QCommandLineParser>
+#include <QFlags>
+#include <QImage>
+#include <QMessageBox>
+#include <QStringList>
+
+#include "tools/comparison_viewer/image_loading.h"
+#include "tools/comparison_viewer/split_image_view.h"
+#include "tools/icc_detect/icc_detect.h"
+
+namespace {
+
+void displayLoadingError(const QString& path) {
+  QMessageBox message;
+  message.setIcon(QMessageBox::Critical);
+  message.setWindowTitle(
+      QCoreApplication::translate("SplitImageView", "Error"));
+  message.setText(QCoreApplication::translate("SplitImageView",
+                                              "Could not load image \"%1\".")
+                      .arg(path));
+  message.exec();
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  QApplication application(argc, argv);
+
+  QCommandLineParser parser;
+  parser.setApplicationDescription(
+      QCoreApplication::translate("compare_images", "Image comparison tool"));
+  parser.addHelpOption();
+  parser.addPositionalArgument(
+      "left-image",
+      QCoreApplication::translate("compare_images",
+                                  "The image to display on the left."),
+      "<left-image>");
+  parser.addPositionalArgument(
+      "right-image",
+      QCoreApplication::translate("compare_images",
+                                  "The image to display on the right."),
+      "<right-image>");
+  parser.addPositionalArgument(
+      "middle-image",
+      QCoreApplication::translate(
+          "compare_images", "The image to display in the middle (optional)."),
+      "[<middle-image>]");
+
+  QCommandLineOption colorSpaceOption(
+      {"color-space", "color_space", "c"},
+      QCoreApplication::translate(
+          "compare_images",
+          "The color space to use for untagged images (typically PNM)."),
+      QCoreApplication::translate("compare_images", "color-space"));
+  parser.addOption(colorSpaceOption);
+
+  QCommandLineOption intensityTargetOption(
+      {"intensity-target", "intensity_target", "i"},
+      QCoreApplication::translate("compare_images",
+                                  "The peak luminance of the display."),
+      QCoreApplication::translate("compare_images", "nits"),
+      QString::number(jxl::kDefaultIntensityTarget));
+  parser.addOption(intensityTargetOption);
+
+  parser.process(application);
+
+  const QString colorSpaceHint = parser.value(colorSpaceOption);
+
+  QStringList arguments = parser.positionalArguments();
+  if (arguments.size() < 2 || arguments.size() > 3) {
+    parser.showHelp(EXIT_FAILURE);
+  }
+
+  bool ok;
+  const float intensityTarget =
+      parser.value(intensityTargetOption).toFloat(&ok);
+  if (!ok) {
+    parser.showHelp(EXIT_FAILURE);
+  }
+
+  jpegxl::tools::SplitImageView view;
+
+  const QByteArray monitorIccProfile =
+      jpegxl::tools::GetMonitorIccProfile(&view);
+
+  const QString leftImagePath = arguments.takeFirst();
+  QImage leftImage = jpegxl::tools::loadImage(leftImagePath, monitorIccProfile,
+                                              intensityTarget, colorSpaceHint);
+  if (leftImage.isNull()) {
+    displayLoadingError(leftImagePath);
+    return EXIT_FAILURE;
+  }
+  view.setLeftImage(std::move(leftImage));
+
+  const QString rightImagePath = arguments.takeFirst();
+  QImage rightImage = jpegxl::tools::loadImage(
+      rightImagePath, monitorIccProfile, intensityTarget, colorSpaceHint);
+  if (rightImage.isNull()) {
+    displayLoadingError(rightImagePath);
+    return EXIT_FAILURE;
+  }
+  view.setRightImage(std::move(rightImage));
+
+  if (!arguments.empty()) {
+    const QString middleImagePath = arguments.takeFirst();
+    QImage middleImage = jpegxl::tools::loadImage(
+        middleImagePath, monitorIccProfile, intensityTarget, colorSpaceHint);
+    if (middleImage.isNull()) {
+      displayLoadingError(middleImagePath);
+      return EXIT_FAILURE;
+    }
+    view.setMiddleImage(std::move(middleImage));
+  }
+
+  view.setWindowFlags(view.windowFlags() | Qt::Window);
+  view.setWindowState(Qt::WindowMaximized);
+  view.show();
+
+  return application.exec();
+}
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.cc
new file mode 100644
index 0000000000..7d445e34d1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.cc
@@ -0,0 +1,128 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/comparison_viewer/image_loading.h"
+
+#include <QRgb>
+#include <QThread>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_metadata.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+#include "tools/viewer/load_jxl.h"
+
+namespace jpegxl {
+namespace tools {
+
+using jxl::CodecInOut;
+using jxl::ColorEncoding;
+using jxl::Image3F;
+using jxl::ImageBundle;
+using jxl::PaddedBytes;
+using jxl::Rect;
+using jxl::Span;
+using jxl::Status;
+using jxl::ThreadPool;
+using jxl::extras::ColorHints;
+
+namespace {
+
+Status loadFromFile(const QString& filename, const ColorHints& color_hints,
+                    CodecInOut* const decoded, ThreadPool* const pool) {
+  PaddedBytes compressed;
+  JXL_RETURN_IF_ERROR(
+      jpegxl::tools::ReadFile(filename.toStdString(), &compressed));
+  const Span<const uint8_t> compressed_span(compressed);
+  return jxl::SetFromBytes(compressed_span, color_hints, decoded, pool,
+                           nullptr);
+}
+
+}  // namespace
+
+bool canLoadImageWithExtension(QString extension) {
+  extension = extension.toLower();
+  if (extension == "jxl" || extension == "j" || extension == "brn") {
+    return true;
+  }
+  const auto codec = jxl::extras::CodecFromPath("." + extension.toStdString());
+  return codec != jxl::extras::Codec::kUnknown;
+}
+
+QImage loadImage(const QString& filename, const QByteArray& targetIccProfile,
+                 const float intensityTarget,
+                 const QString& sourceColorSpaceHint) {
+  qint64 elapsed;
+  QImage img = loadJxlImage(filename, targetIccProfile, &elapsed);
+  if (img.width() != 0 && img.height() != 0) {
+    return img;
+  }
+  static ThreadPoolInternal pool(QThread::idealThreadCount());
+
+  CodecInOut decoded;
+  ColorHints color_hints;
+  if (!sourceColorSpaceHint.isEmpty()) {
+    color_hints.Add("color_space", sourceColorSpaceHint.toStdString());
+  }
+  if (!loadFromFile(filename, color_hints, &decoded, &pool)) {
+    return QImage();
+  }
+  decoded.metadata.m.SetIntensityTarget(intensityTarget);
+  const ImageBundle& ib = decoded.Main();
+
+  const JxlCmsInterface& cms = jxl::GetJxlCms();
+
+  ColorEncoding targetColorSpace;
+  PaddedBytes icc;
+  icc.assign(reinterpret_cast<const uint8_t*>(targetIccProfile.data()),
+             reinterpret_cast<const uint8_t*>(targetIccProfile.data() +
+                                              targetIccProfile.size()));
+  if (!targetColorSpace.SetICC(std::move(icc), &cms)) {
+    targetColorSpace = ColorEncoding::SRGB(ib.IsGray());
+  }
+  Image3F converted;
+  if (!ib.CopyTo(Rect(ib), targetColorSpace, cms, &converted, &pool)) {
+    return QImage();
+  }
+
+  QImage image(converted.xsize(), converted.ysize(), QImage::Format_ARGB32);
+
+  const auto ScaleAndClamp = [](const float x) {
+    return jxl::Clamp1(x * 255 + .5f, 0.f, 255.f);
+  };
+
+  if (ib.HasAlpha()) {
+    for (int y = 0; y < image.height(); ++y) {
+      QRgb* const row = reinterpret_cast<QRgb*>(image.scanLine(y));
+      const float* const alphaRow = ib.alpha().ConstRow(y);
+      const float* const redRow = converted.ConstPlaneRow(0, y);
+      const float* const greenRow = converted.ConstPlaneRow(1, y);
+      const float* const blueRow = converted.ConstPlaneRow(2, y);
+      for (int x = 0; x < image.width(); ++x) {
+        row[x] = qRgba(ScaleAndClamp(redRow[x]), ScaleAndClamp(greenRow[x]),
+                       ScaleAndClamp(blueRow[x]), ScaleAndClamp(alphaRow[x]));
+      }
+    }
+  } else {
+    for (int y = 0; y < image.height(); ++y) {
+      QRgb* const row = reinterpret_cast<QRgb*>(image.scanLine(y));
+      const float* const redRow = converted.ConstPlaneRow(0, y);
+      const float* const greenRow = converted.ConstPlaneRow(1, y);
+      const float* const blueRow = converted.ConstPlaneRow(2, y);
+      for (int x = 0; x < image.width(); ++x) {
+        row[x] = qRgb(ScaleAndClamp(redRow[x]), ScaleAndClamp(greenRow[x]),
+                      ScaleAndClamp(blueRow[x]));
+      }
+    }
+  }
+
+  return image;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.h b/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.h
new file mode 100644
index 0000000000..72884a5e7d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/image_loading.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_COMPARISON_VIEWER_IMAGE_LOADING_H_
+#define TOOLS_COMPARISON_VIEWER_IMAGE_LOADING_H_
+
+#include <QByteArray>
+#include <QImage>
+#include <QString>
+
+#include "lib/jxl/common.h"
+
+namespace jpegxl {
+namespace tools {
+
+// `extension` should not include the dot.
+bool canLoadImageWithExtension(QString extension);
+
+// Converts the loaded image to the given display profile, or sRGB if not
+// specified. Thread-hostile.
+QImage loadImage(const QString& filename,
+                 const QByteArray& targetIccProfile = QByteArray(),
+                 float intensityTarget = jxl::kDefaultIntensityTarget,
+                 const QString& sourceColorSpaceHint = QString());
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_COMPARISON_VIEWER_IMAGE_LOADING_H_
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/settings.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.cc
new file mode 100644
index 0000000000..ca5c0c9aaa
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.cc
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/comparison_viewer/settings.h"
+
+namespace jpegxl {
+namespace tools {
+
+SettingsDialog::SettingsDialog(QWidget* const parent)
+    : QDialog(parent), settings_("JPEG XL project", "Comparison tool") {
+  ui_.setupUi(this);
+
+  settings_.beginGroup("rendering");
+  renderingSettings_.fadingMSecs = settings_.value("fadingMSecs", 300).toInt();
+  settings_.beginGroup("gray");
+  renderingSettings_.gray = settings_.value("enabled", false).toBool();
+  renderingSettings_.grayMSecs = settings_.value("delayMSecs", 300).toInt();
+  settings_.endGroup();
+  settings_.endGroup();
+
+  settingsToUi();
+}
+
+SplitImageRenderingSettings SettingsDialog::renderingSettings() const {
+  return renderingSettings_;
+}
+
+void SettingsDialog::on_SettingsDialog_accepted() {
+  renderingSettings_.fadingMSecs = ui_.fadingTime->value();
+  renderingSettings_.gray = ui_.grayGroup->isChecked();
+  renderingSettings_.grayMSecs = ui_.grayTime->value();
+
+  settings_.beginGroup("rendering");
+  settings_.setValue("fadingMSecs", renderingSettings_.fadingMSecs);
+  settings_.beginGroup("gray");
+  settings_.setValue("enabled", renderingSettings_.gray);
+  settings_.setValue("delayMSecs", renderingSettings_.grayMSecs);
+  settings_.endGroup();
+  settings_.endGroup();
+}
+
+void SettingsDialog::on_SettingsDialog_rejected() { settingsToUi(); }
+
+void SettingsDialog::settingsToUi() {
+  ui_.fadingTime->setValue(renderingSettings_.fadingMSecs);
+  ui_.grayGroup->setChecked(renderingSettings_.gray);
+  ui_.grayTime->setValue(renderingSettings_.grayMSecs);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/settings.h b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.h
new file mode 100644
index 0000000000..a54cd87781
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.h
@@ -0,0 +1,42 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_COMPARISON_VIEWER_SETTINGS_H_
+#define TOOLS_COMPARISON_VIEWER_SETTINGS_H_
+
+#include <QDialog>
+#include <QSettings>
+
+#include "tools/comparison_viewer/split_image_renderer.h"
+#include "tools/comparison_viewer/ui_settings.h"
+
+namespace jpegxl {
+namespace tools {
+
+class SettingsDialog : public QDialog {
+  Q_OBJECT
+
+ public:
+  explicit SettingsDialog(QWidget* parent = nullptr);
+  ~SettingsDialog() override = default;
+
+  SplitImageRenderingSettings renderingSettings() const;
+
+ private slots:
+  void on_SettingsDialog_accepted();
+  void on_SettingsDialog_rejected();
+
+ private:
+  void settingsToUi();
+
+  Ui::SettingsDialog ui_;
+  QSettings settings_;
+  SplitImageRenderingSettings renderingSettings_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_COMPARISON_VIEWER_SETTINGS_H_
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/settings.ui b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.ui
new file mode 100644
index 0000000000..ca81a33aec
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/settings.ui
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>SettingsDialog</class>
+ <widget class="QDialog" name="SettingsDialog">
+  <property name="windowTitle">
+   <string>Comparison tool settings</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout" stretch="0,0,1,0">
+   <property name="sizeConstraint">
+    <enum>QLayout::SetFixedSize</enum>
+   </property>
+   <item>
+    <layout class="QFormLayout" name="settingsLayout">
+     <item row="0" column="0">
+      <widget class="QLabel" name="fadingTimePromptLabel">
+       <property name="text">
+        <string>Fading time:</string>
+       </property>
+      </widget>
+     </item>
+     <item row="0" column="1">
+      <widget class="QSpinBox" name="fadingTime">
+       <property name="suffix">
+        <string> ms</string>
+       </property>
+       <property name="maximum">
+        <number>1000</number>
+       </property>
+       <property name="singleStep">
+        <number>50</number>
+       </property>
+       <property name="value">
+        <number>300</number>
+       </property>
+      </widget>
+     </item>
+    </layout>
+   </item>
+   <item>
+    <widget class="QGroupBox" name="grayGroup">
+     <property name="title">
+      <string>Gray in between</string>
+     </property>
+     <property name="checkable">
+      <bool>true</bool>
+     </property>
+     <property name="checked">
+      <bool>false</bool>
+     </property>
+     <layout class="QFormLayout" name="formLayout">
+      <item row="0" column="1">
+       <widget class="QSpinBox" name="grayTime">
+        <property name="suffix">
+         <string> ms</string>
+        </property>
+        <property name="minimum">
+         <number>0</number>
+        </property>
+        <property name="maximum">
+         <number>1000</number>
+        </property>
+        <property name="singleStep">
+         <number>50</number>
+        </property>
+        <property name="value">
+         <number>300</number>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="0">
+       <widget class="QLabel" name="grayTimePromptLabel">
+        <property name="text">
+         <string>Time on gray:</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item>
+    <spacer name="verticalSpacer">
+     <property name="orientation">
+      <enum>Qt::Vertical</enum>
+     </property>
+    </spacer>
+   </item>
+   <item>
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>SettingsDialog</receiver>
+   <slot>accept()</slot>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>SettingsDialog</receiver>
+   <slot>reject()</slot>
+  </connection>
+ </connections>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.cc
new file mode 100644
index 0000000000..911229ce7a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.cc
@@ -0,0 +1,250 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/comparison_viewer/split_image_renderer.h"
+
+#include <QEvent>
+#include <QGuiApplication>
+#include <QPainter>
+#include <QPalette>
+#include <QPen>
+#include <QPoint>
+#include <QRect>
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+namespace jpegxl {
+namespace tools {
+
+SplitImageRenderer::SplitImageRenderer(QWidget* const parent)
+    : QWidget(parent) {
+  setAttribute(Qt::WA_OpaquePaintEvent);
+  setMouseTracking(true);
+  setFocusPolicy(Qt::WheelFocus);
+  grabKeyboard();
+
+  connect(&fadingPoint_, &QVariantAnimation::valueChanged,
+          [this] { update(); });
+}
+
+void SplitImageRenderer::setLeftImage(QImage image) {
+  leftImage_ = QPixmap::fromImage(std::move(image));
+  leftImage_.setDevicePixelRatio(devicePixelRatio());
+  updateMinimumSize();
+  update();
+}
+void SplitImageRenderer::setRightImage(QImage image) {
+  rightImage_ = QPixmap::fromImage(std::move(image));
+  rightImage_.setDevicePixelRatio(devicePixelRatio());
+  updateMinimumSize();
+  update();
+}
+void SplitImageRenderer::setMiddleImage(QImage image) {
+  middleImage_ = QPixmap::fromImage(std::move(image));
+  middleImage_.setDevicePixelRatio(devicePixelRatio());
+  updateMinimumSize();
+  update();
+}
+
+void SplitImageRenderer::setRenderingSettings(
+    const SplitImageRenderingSettings& settings) {
+  renderingSettings_ = settings;
+}
+
+void SplitImageRenderer::setMiddleWidthPercent(const int percent) {
+  middleWidthPercent_ = percent;
+  update();
+}
+
+void SplitImageRenderer::setZoomLevel(double scale) {
+  scale_ = scale;
+  updateMinimumSize();
+  update();
+}
+
+void SplitImageRenderer::keyPressEvent(QKeyEvent* const event) {
+  switch (event->key()) {
+    case Qt::Key_Left:
+      setRenderingMode(RenderingMode::LEFT);
+      break;
+
+    case Qt::Key_Right:
+      setRenderingMode(RenderingMode::RIGHT);
+      break;
+
+    case Qt::Key_Up:
+    case Qt::Key_Down:
+      setRenderingMode(RenderingMode::MIDDLE);
+      break;
+
+    case Qt::Key_Escape:
+      QCoreApplication::quit();
+      break;
+
+    case Qt::Key_ZoomIn:
+      emit zoomLevelIncreaseRequested();
+      break;
+    case Qt::Key_ZoomOut:
+      emit zoomLevelDecreaseRequested();
+      break;
+
+    default:
+      QWidget::keyPressEvent(event);
+      break;
+  }
+  update();
+}
+
+void SplitImageRenderer::mouseMoveEvent(QMouseEvent* const event) {
+  setRenderingMode(RenderingMode::SPLIT);
+  middleX_ = event->pos().x();
+  update();
+}
+
+void SplitImageRenderer::wheelEvent(QWheelEvent* event) {
+  if (QGuiApplication::keyboardModifiers().testFlag(Qt::ControlModifier)) {
+    if (event->angleDelta().y() > 0) {
+      emit zoomLevelIncreaseRequested();
+      return;
+    } else if (event->angleDelta().y() < 0) {
+      emit zoomLevelDecreaseRequested();
+      return;
+    }
+  }
+
+  event->ignore();
+}
+
+void SplitImageRenderer::paintEvent(QPaintEvent* const event) {
+  QRectF drawingArea(0., 0., minimumWidth(), minimumHeight());
+
+  QPainter painter(this);
+  painter.fillRect(rect(), QColor(119, 119, 119));
+  painter.translate(QRectF(rect()).center() - drawingArea.center());
+  painter.scale(scale_, scale_);
+  if (scale_ < 1.) {
+    painter.setRenderHint(QPainter::SmoothPixmapTransform);
+  }
+
+  const auto drawSingleImage = [&](const RenderingMode mode) {
+    const QPixmap* image = nullptr;
+    switch (mode) {
+      case RenderingMode::LEFT:
+        image = &leftImage_;
+        break;
+      case RenderingMode::RIGHT:
+        image = &rightImage_;
+        break;
+      case RenderingMode::MIDDLE:
+        image = &middleImage_;
+        break;
+
+      default:
+        return;
+    }
+    painter.drawPixmap(QPointF(0., 0.), *image);
+  };
+
+  if (mode_ != RenderingMode::SPLIT) {
+    if (fadingPoint_.state() != QAbstractAnimation::Running) {
+      drawSingleImage(mode_);
+      return;
+    }
+
+    const float fadingPoint = fadingPoint_.currentValue().toFloat();
+    if (renderingSettings_.gray) {
+      if (fadingPoint < renderingSettings_.fadingMSecs) {
+        painter.setOpacity((renderingSettings_.fadingMSecs - fadingPoint) /
+                           renderingSettings_.fadingMSecs);
+        drawSingleImage(previousMode_);
+      } else if (fadingPoint > renderingSettings_.fadingMSecs +
+                                   renderingSettings_.grayMSecs) {
+        painter.setOpacity((fadingPoint - renderingSettings_.fadingMSecs -
+                            renderingSettings_.grayMSecs) /
+                           renderingSettings_.fadingMSecs);
+        drawSingleImage(mode_);
+      }
+    } else {
+      drawSingleImage(previousMode_);
+      painter.setOpacity(fadingPoint / renderingSettings_.fadingMSecs);
+      drawSingleImage(mode_);
+    }
+
+    return;
+  }
+
+  const qreal middleWidth =
+      std::min<qreal>((minimumWidth() / scale_) * middleWidthPercent_ / 100.,
+                      middleImage_.width());
+
+  const double transformedMiddleX =
+      painter.transform().inverted().map(QPointF(middleX_, 0.)).x();
+  QRectF middleRect = middleImage_.rect();
+  middleRect.setWidth(middleWidth);
+  middleRect.moveCenter(QPointF(transformedMiddleX * devicePixelRatio(),
+                                middleRect.center().y()));
+  middleRect.setLeft(std::round(middleRect.left()));
+  middleRect.setRight(std::round(middleRect.right()));
+
+  QRectF leftRect = leftImage_.rect();
+  leftRect.setRight(middleRect.left());
+
+  QRectF rightRect = rightImage_.rect();
+  rightRect.setLeft(middleRect.right());
+
+  painter.drawPixmap(QPointF(), leftImage_, leftRect);
+  painter.drawPixmap(middleRect.topLeft() / devicePixelRatio(), middleImage_,
+                     middleRect);
+  painter.drawPixmap(rightRect.topLeft() / devicePixelRatio(), rightImage_,
+                     rightRect);
+
+  QPen middlePen;
+  middlePen.setStyle(Qt::DotLine);
+  painter.setPen(middlePen);
+  painter.drawLine(leftRect.topRight() / devicePixelRatio(),
+                   leftRect.bottomRight() / devicePixelRatio());
+  painter.drawLine(rightRect.topLeft() / devicePixelRatio(),
+                   rightRect.bottomLeft() / devicePixelRatio());
+}
+
+void SplitImageRenderer::updateMinimumSize() {
+  const QSizeF leftSize = leftImage_.deviceIndependentSize();
+  const QSizeF rightSize = rightImage_.deviceIndependentSize();
+  const QSizeF middleSize = middleImage_.deviceIndependentSize();
+  const qreal imagesWidth = std::max(
+      std::max(leftSize.width(), rightSize.width()), middleSize.width());
+  const qreal imagesHeight = std::max(
+      std::max(leftSize.height(), rightSize.height()), middleSize.height());
+  setMinimumSize((scale_ * QSizeF(imagesWidth, imagesHeight)).toSize());
+}
+
+void SplitImageRenderer::setRenderingMode(const RenderingMode newMode) {
+  if (newMode == mode_) return;
+  previousMode_ = mode_;
+  mode_ = newMode;
+  if (previousMode_ == RenderingMode::SPLIT || mode_ == RenderingMode::SPLIT) {
+    fadingPoint_.stop();
+  } else {
+    const int msecs =
+        renderingSettings_.gray
+            ? 2 * renderingSettings_.fadingMSecs + renderingSettings_.grayMSecs
+            : renderingSettings_.fadingMSecs;
+    const float startValue = fadingPoint_.state() == QAbstractAnimation::Running
+                                 ? fadingPoint_.endValue().toFloat() -
+                                       fadingPoint_.currentValue().toFloat()
+                                 : 0.f;
+    fadingPoint_.stop();
+    fadingPoint_.setStartValue(startValue);
+    fadingPoint_.setEndValue(static_cast<float>(msecs));
+    fadingPoint_.setDuration(fadingPoint_.endValue().toFloat() -
+                             fadingPoint_.startValue().toFloat());
+    fadingPoint_.start();
+  }
+  emit renderingModeChanged(mode_);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.h b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.h
new file mode 100644
index 0000000000..5d3029aade
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_renderer.h
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_RENDERER_H_
+#define TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_RENDERER_H_
+
+#include <QImage>
+#include <QKeyEvent>
+#include <QMouseEvent>
+#include <QPaintEvent>
+#include <QPixmap>
+#include <QVariantAnimation>
+#include <QWheelEvent>
+#include <QWidget>
+
+namespace jpegxl {
+namespace tools {
+
+struct SplitImageRenderingSettings {
+  int fadingMSecs;
+  bool gray;
+  int grayMSecs;
+};
+
+class SplitImageRenderer : public QWidget {
+  Q_OBJECT
+
+ public:
+  enum class RenderingMode {
+    // The default mode when using the mouse: one (partial) image is shown on
+    // each side of the cursor, with a vertical band of the middle image if
+    // applicable.
+    SPLIT,
+    // Only show the left image (accessed by pressing the left arrow key when
+    // the renderer has focus).
+    LEFT,
+    // Only show the right image (accessed by pressing the right arrow key).
+    RIGHT,
+    // Only show the middle image (accessed by pressing the up or down arrow
+    // key).
+    MIDDLE,
+  };
+  Q_ENUM(RenderingMode)
+
+  explicit SplitImageRenderer(QWidget* parent = nullptr);
+  ~SplitImageRenderer() override = default;
+
+  QSize sizeHint() const override { return minimumSize(); }
+
+  void setLeftImage(QImage image);
+  void setRightImage(QImage image);
+  void setMiddleImage(QImage image);
+
+  void setRenderingSettings(const SplitImageRenderingSettings& settings);
+
+ public slots:
+  void setMiddleWidthPercent(int percent);
+  void setZoomLevel(double scale);
+
+ signals:
+  void zoomLevelIncreaseRequested();
+  void zoomLevelDecreaseRequested();
+
+  void renderingModeChanged(RenderingMode newMode);
+
+ protected:
+  void keyPressEvent(QKeyEvent* event) override;
+  void mouseMoveEvent(QMouseEvent* event) override;
+  void wheelEvent(QWheelEvent* event) override;
+  void paintEvent(QPaintEvent* event) override;
+
+ private:
+  void updateMinimumSize();
+  void setRenderingMode(RenderingMode newMode);
+
+  QPixmap leftImage_, rightImage_, middleImage_;
+  RenderingMode mode_ = RenderingMode::SPLIT;
+  RenderingMode previousMode_ = RenderingMode::SPLIT;
+  SplitImageRenderingSettings renderingSettings_;
+  // Goes from 0 to the animation duration in milliseconds, as a float.
+  QVariantAnimation fadingPoint_;
+  int middleX_ = 0;
+  int middleWidthPercent_ = 10;
+  double scale_ = 1.;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_RENDERER_H_
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.cc b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.cc
new file mode 100644
index 0000000000..9c27f46277
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.cc
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/comparison_viewer/split_image_view.h"
+
+#include <utility>
+
+#include <QLabel>
+
+#include "tools/comparison_viewer/split_image_renderer.h"
+
+namespace jpegxl {
+namespace tools {
+
+SplitImageView::SplitImageView(QWidget* const parent) : QWidget(parent) {
+  ui_.setupUi(this);
+
+  ui_.splitImageRenderer->setRenderingSettings(settings_.renderingSettings());
+
+  connect(ui_.middleWidthSlider, &QSlider::valueChanged,
+          [this](const int value) {
+            ui_.middleWidthDisplayLabel->setText(tr("%L1%").arg(value));
+          });
+  connect(ui_.middleWidthSlider, &QSlider::valueChanged, ui_.splitImageRenderer,
+          &SplitImageRenderer::setMiddleWidthPercent);
+
+  connect(ui_.zoomLevelSlider, &QSlider::valueChanged, [this](const int value) {
+    if (value >= 0) {
+      ui_.zoomLevelDisplayLabel->setText(tr("&times;%L1").arg(1 << value));
+      ui_.splitImageRenderer->setZoomLevel(1 << value);
+    } else {
+      ui_.zoomLevelDisplayLabel->setText(tr("&times;1/%L1").arg(1 << -value));
+      ui_.splitImageRenderer->setZoomLevel(1. / (1 << -value));
+    }
+  });
+
+  connect(ui_.splitImageRenderer,
+          &SplitImageRenderer::zoomLevelIncreaseRequested, [this]() {
+            ui_.zoomLevelSlider->triggerAction(
+                QAbstractSlider::SliderSingleStepAdd);
+          });
+  connect(ui_.splitImageRenderer,
+          &SplitImageRenderer::zoomLevelDecreaseRequested, [this]() {
+            ui_.zoomLevelSlider->triggerAction(
+                QAbstractSlider::SliderSingleStepSub);
+          });
+
+  connect(ui_.splitImageRenderer, &SplitImageRenderer::renderingModeChanged,
+          this, &SplitImageView::renderingModeChanged);
+}
+
+void SplitImageView::setLeftImage(QImage image) {
+  ui_.splitImageRenderer->setLeftImage(std::move(image));
+}
+
+void SplitImageView::setRightImage(QImage image) {
+  ui_.splitImageRenderer->setRightImage(std::move(image));
+}
+
+void SplitImageView::setMiddleImage(QImage image) {
+  ui_.splitImageRenderer->setMiddleImage(std::move(image));
+}
+
+void SplitImageView::on_settingsButton_clicked() {
+  if (settings_.exec()) {
+    ui_.splitImageRenderer->setRenderingSettings(settings_.renderingSettings());
+  }
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.h b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.h
new file mode 100644
index 0000000000..b9c3536e88
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.h
@@ -0,0 +1,42 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_VIEW_H_
+#define TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_VIEW_H_
+
+#include <QWidget>
+
+#include "tools/comparison_viewer/settings.h"
+#include "tools/comparison_viewer/ui_split_image_view.h"
+
+namespace jpegxl {
+namespace tools {
+
+class SplitImageView : public QWidget {
+  Q_OBJECT
+
+ public:
+  explicit SplitImageView(QWidget* parent = nullptr);
+  ~SplitImageView() override = default;
+
+  void setLeftImage(QImage image);
+  void setRightImage(QImage image);
+  void setMiddleImage(QImage image);
+
+ signals:
+  void renderingModeChanged(SplitImageRenderer::RenderingMode newMode);
+
+ private slots:
+  void on_settingsButton_clicked();
+
+ private:
+  Ui::SplitImageView ui_;
+  SettingsDialog settings_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_COMPARISON_VIEWER_SPLIT_IMAGE_VIEW_H_
diff --git a/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.ui b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.ui
new file mode 100644
index 0000000000..f3b80c9473
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/comparison_viewer/split_image_view.ui
@@ -0,0 +1,141 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>SplitImageView</class>
+ <widget class="QWidget" name="SplitImageView">
+  <property name="windowTitle">
+   <string>Image Comparison Tool</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout" stretch="1,0">
+   <item>
+    <widget class="QScrollArea" name="scrollArea">
+     <property name="widgetResizable">
+      <bool>true</bool>
+     </property>
+     <widget class="jpegxl::tools::SplitImageRenderer" name="splitImageRenderer"/>
+    </widget>
+   </item>
+   <item>
+    <layout class="QHBoxLayout" name="horizontalLayout" stretch="0,1,0,0">
+     <item>
+      <layout class="QFormLayout" name="zoomLevelFormLayout">
+       <item row="0" column="0">
+        <widget class="QLabel" name="zoomLevelPromptLabel">
+         <property name="text">
+          <string>Zoom level:</string>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="1">
+        <layout class="QHBoxLayout" name="horizontalLayout_2">
+         <item>
+          <widget class="QSlider" name="zoomLevelSlider">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="minimum">
+            <number>-3</number>
+           </property>
+           <property name="maximum">
+            <number>3</number>
+           </property>
+           <property name="pageStep">
+            <number>2</number>
+           </property>
+           <property name="orientation">
+            <enum>Qt::Horizontal</enum>
+           </property>
+          </widget>
+         </item>
+         <item>
+          <widget class="QLabel" name="zoomLevelDisplayLabel">
+           <property name="text">
+            <string>×1</string>
+           </property>
+           <property name="textFormat">
+            <enum>Qt::RichText</enum>
+           </property>
+          </widget>
+         </item>
+        </layout>
+       </item>
+      </layout>
+     </item>
+     <item>
+      <spacer name="horizontalSpacer">
+       <property name="orientation">
+        <enum>Qt::Horizontal</enum>
+       </property>
+      </spacer>
+     </item>
+     <item>
+      <layout class="QFormLayout" name="middleWidthFormLayout">
+       <item row="0" column="0">
+        <widget class="QLabel" name="middleWidthPromptLabel">
+         <property name="text">
+          <string>Width of the central band:</string>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="1">
+        <layout class="QHBoxLayout" name="horizontalLayout_3">
+         <item>
+          <widget class="QSlider" name="middleWidthSlider">
+           <property name="sizePolicy">
+            <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+             <horstretch>0</horstretch>
+             <verstretch>0</verstretch>
+            </sizepolicy>
+           </property>
+           <property name="maximum">
+            <number>100</number>
+           </property>
+           <property name="value">
+            <number>10</number>
+           </property>
+           <property name="orientation">
+            <enum>Qt::Horizontal</enum>
+           </property>
+          </widget>
+         </item>
+         <item>
+          <widget class="QLabel" name="middleWidthDisplayLabel">
+           <property name="text">
+            <string>10%</string>
+           </property>
+          </widget>
+         </item>
+        </layout>
+       </item>
+      </layout>
+     </item>
+     <item>
+      <widget class="QToolButton" name="settingsButton">
+       <property name="text">
+        <string>Settings</string>
+       </property>
+      </widget>
+     </item>
+    </layout>
+   </item>
+  </layout>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>jpegxl::tools::SplitImageRenderer</class>
+   <extends>QWidget</extends>
+   <header>split_image_renderer.h</header>
+   <container>1</container>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/conformance/CMakeLists.txt b/third-party/libjxl/libjxl/tools/conformance/CMakeLists.txt
new file mode 100644
index 0000000000..5766612abf
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/conformance/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+if(BUILD_TESTING AND CMAKE_EXECUTABLE_SUFFIX STREQUAL "")
+# Script to validate the tooling.
+find_program (BASH_PROGRAM bash)
+if(BASH_PROGRAM)
+  add_test(
+    NAME conformance_tooling_test
+    COMMAND
+        ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/tooling_test.sh
+        ${CMAKE_BINARY_DIR} ${JPEGXL_TEST_DATA_PATH})
+  # Skip the test if dependencies are not available.
+  set_tests_properties(conformance_tooling_test PROPERTIES SKIP_RETURN_CODE 254)
+endif()
+endif() # BUILD_TESTING
diff --git a/third-party/libjxl/libjxl/tools/conformance/conformance.py b/third-party/libjxl/libjxl/tools/conformance/conformance.py
new file mode 100755
index 0000000000..e4be865973
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/conformance/conformance.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+"""JPEG XL conformance test runner.
+
+Tool to perform a conformance test for a decoder.
+"""
+
+import argparse
+import json
+import numpy
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+import lcms2
+
+def Failure(message):
+    print(f"\033[91m{message}\033[0m", flush=True)
+    return False
+
+def CompareNPY(ref, ref_icc, dec, dec_icc, frame_idx, rmse_limit, peak_error):
+    """Compare a decoded numpy against the reference one."""
+    if ref.shape != dec.shape:
+        return Failure(f'Expected shape {ref.shape} but found {dec.shape}')
+    ref_frame = ref[frame_idx]
+    dec_frame = dec[frame_idx]
+    num_channels = ref_frame.shape[2]
+
+    if ref_icc != dec_icc:
+        # Transform colors before comparison.
+        if num_channels < 3:
+            return Failure(f"Only RGB images are supported")
+        dec_clr = dec_frame[:, :, 0:3]
+        dec_frame[:, :, 0:3] = lcms2.convert_pixels(dec_icc, ref_icc, dec_clr)
+
+    error = numpy.abs(ref_frame - dec_frame)
+    actual_peak_error = error.max()
+    error_by_channel = [error[:, :, ch] for ch in range(num_channels)]
+    actual_rmses = [numpy.sqrt(numpy.mean(error_ch * error_ch)) for error_ch in error_by_channel]
+    actual_rmse = max(actual_rmses)
+
+    print(f"RMSE: {actual_rmses}, peak error: {actual_peak_error}", flush=True)
+
+    if actual_rmse > rmse_limit:
+        return Failure(f"RMSE too large: {actual_rmse} > {rmse_limit}")
+
+    if actual_peak_error > peak_error:
+        return Failure(
+            f"Peak error too large: {actual_peak_error} > {peak_error}")
+    return True
+
+
+def CompareBinaries(ref_bin, dec_bin):
+    """Compare a decoded binary file against the reference for exact contents."""
+    with open(ref_bin, 'rb') as reff:
+        ref_data = reff.read()
+
+    with open(dec_bin, 'rb') as decf:
+        dec_data = decf.read()
+
+    if ref_data != dec_data:
+        return Failure(
+            f'Binary files mismatch: {ref_bin} {dec_bin}')
+    return True
+
+
+TEST_KEYS = set(
+    ['reconstructed_jpeg', 'original_icc', 'rms_error', 'peak_error'])
+
+
+def CheckMeta(dec, ref):
+    if isinstance(ref, dict):
+        if not isinstance(dec, dict):
+            return Failure("Malformed metadata file")
+        for k, v in ref.items():
+            if k in TEST_KEYS:
+                continue
+            if k not in dec:
+                return Failure(
+                    f"Malformed metadata file: key {k} not found")
+            vv = dec[k]
+            return CheckMeta(vv, v)
+    elif isinstance(ref, list):
+        if not isinstance(dec, list) or len(dec) != len(ref):
+            return Failure("Malformed metadata file")
+        for vv, v in zip(dec, ref):
+            return CheckMeta(vv, v)
+    elif isinstance(ref, float):
+        if not isinstance(dec, float):
+            return Failure("Malformed metadata file")
+        if abs(dec - ref) > 0.0001:
+            return Failure(
+                f"Metadata: Expected {ref}, found {dec}")
+    elif dec != ref:
+        return Failure(f"Metadata: Expected {ref}, found {dec}")
+    return True
+
+
+def ConformanceTestRunner(args):
+    ok = True
+    # We can pass either the .txt file or the directory which defaults to the
+    # full corpus. This is useful to run a subset of the corpus in other .txt
+    # files.
+    if os.path.isdir(args.corpus):
+        corpus_dir = args.corpus
+        corpus_txt = os.path.join(args.corpus, 'corpus.txt')
+    else:
+        corpus_dir = os.path.dirname(args.corpus)
+        corpus_txt = args.corpus
+
+    with open(corpus_txt, 'r') as f:
+        for test_id in f:
+            test_id = test_id.rstrip('\n')
+            print(f"\033[94m\033[1mTesting {test_id}\033[0m", flush=True)
+            test_dir = os.path.join(corpus_dir, test_id)
+
+            with open(os.path.join(test_dir, 'test.json'), 'r') as f:
+                descriptor = json.load(f)
+                if 'sha256sums' in descriptor:
+                    del descriptor['sha256sums']
+
+            exact_tests = []
+
+            with tempfile.TemporaryDirectory(prefix=test_id) as work_dir:
+                input_filename = os.path.join(test_dir, 'input.jxl')
+                pixel_prefix = os.path.join(work_dir, 'decoded')
+                output_filename = pixel_prefix + '_image.npy'
+                cmd = [args.decoder, input_filename, output_filename]
+                cmd_jpeg = []
+                if 'preview' in descriptor:
+                    preview_filename = os.path.join(work_dir,
+                                                    'decoded_preview.npy')
+                    cmd.extend(['--preview_out', preview_filename])
+                if 'reconstructed_jpeg' in descriptor:
+                    jpeg_filename = os.path.join(work_dir, 'reconstructed.jpg')
+                    cmd_jpeg = [args.decoder, input_filename, jpeg_filename]
+                    exact_tests.append(('reconstructed.jpg', jpeg_filename))
+                if 'original_icc' in descriptor:
+                    decoded_original_icc = os.path.join(
+                        work_dir, 'decoded_org.icc')
+                    cmd.extend(['--orig_icc_out', decoded_original_icc])
+                    exact_tests.append(('original.icc', decoded_original_icc))
+                meta_filename = os.path.join(work_dir, 'meta.json')
+                cmd.extend(['--metadata_out', meta_filename])
+                cmd.extend(['--icc_out', pixel_prefix + '.icc'])
+                cmd.extend(['--norender_spotcolors'])
+
+                print(f"Running: {cmd}", flush=True)
+                if subprocess.call(cmd) != 0:
+                    ok = Failure('Running the decoder (%s) returned error' %
+                                 ' '.join(cmd))
+                    continue
+                if cmd_jpeg:
+                    print(f"Running: {cmd_jpeg}", flush=True)
+                    if subprocess.call(cmd_jpeg) != 0:
+                        ok = Failure(
+                            'Running the decoder (%s) returned error' %
+                            ' '.join(cmd_jpeg))
+                        continue
+
+                # Run validation of exact files.
+                for reference_basename, decoded_filename in exact_tests:
+                    reference_filename = os.path.join(test_dir,
+                                                      reference_basename)
+                    binary_ok = CompareBinaries(reference_filename,
+                                                decoded_filename)
+                    if not binary_ok and args.update_on_failure:
+                        os.unlink(reference_filename)
+                        shutil.copy2(decoded_filename, reference_filename)
+                        binary_ok = True
+                    ok = ok & binary_ok
+
+                # Validate metadata.
+                with open(meta_filename, 'r') as f:
+                    meta = json.load(f)
+
+                ok = ok & CheckMeta(meta, descriptor)
+
+                # Pixel data.
+                decoded_icc = pixel_prefix + '.icc'
+                with open(decoded_icc, 'rb') as f:
+                    decoded_icc = f.read()
+                reference_icc = os.path.join(test_dir, "reference.icc")
+                with open(reference_icc, 'rb') as f:
+                    reference_icc = f.read()
+
+                reference_npy_fn = os.path.join(test_dir, 'reference_image.npy')
+                decoded_npy_fn = os.path.join(work_dir, 'decoded_image.npy')
+
+                if not os.path.exists(decoded_npy_fn):
+                    ok = Failure('File not decoded: decoded_image.npy')
+                    continue
+
+                reference_npy = numpy.load(reference_npy_fn)
+                decoded_npy = numpy.load(decoded_npy_fn)
+
+                frames_ok = True
+                for i, fd in enumerate(descriptor['frames']):
+                    frames_ok = frames_ok & CompareNPY(
+                        reference_npy, reference_icc, decoded_npy,
+                        decoded_icc, i, fd['rms_error'],
+                        fd['peak_error'])
+
+                if not frames_ok and args.update_on_failure:
+                    os.unlink(reference_npy_fn)
+                    shutil.copy2(decoded_npy_fn, reference_npy_fn)
+                    frames_ok = True
+                ok = ok & frames_ok
+
+                if 'preview' in descriptor:
+                    reference_npy_fn = os.path.join(test_dir,
+                                                    'reference_preview.npy')
+                    decoded_npy_fn = os.path.join(work_dir,
+                                                  'decoded_preview.npy')
+
+                    if not os.path.exists(decoded_npy_fn):
+                        ok = Failure(
+                            'File not decoded: decoded_preview.npy')
+
+                    reference_npy = numpy.load(reference_npy_fn)
+                    decoded_npy = numpy.load(decoded_npy_fn)
+                    preview_ok = CompareNPY(reference_npy, reference_icc,
+                                            decoded_npy, decoded_icc, 0,
+                                            descriptor['preview']['rms_error'],
+                                            descriptor['preview']['peak_error'])
+                    if not preview_ok & args.update_on_failure:
+                        os.unlink(reference_npy_fn)
+                        shutil.copy2(decoded_npy_fn, reference_npy_fn)
+                        preview_ok = True
+                    ok = ok & preview_ok
+
+    return ok
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--decoder',
+                        metavar='DECODER',
+                        required=True,
+                        help='path to the decoder binary under test.')
+    parser.add_argument(
+        '--corpus',
+        metavar='CORPUS',
+        required=True,
+        help=('path to the corpus directory or corpus descriptor'
+              ' text file.'))
+    parser.add_argument(
+        '--update_on_failure', action='store_true',
+        help='If set, updates reference files on failing checks.')
+    args = parser.parse_args()
+    if not ConformanceTestRunner(args):
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/third-party/libjxl/libjxl/tools/conformance/generator.py b/third-party/libjxl/libjxl/tools/conformance/generator.py
new file mode 100755
index 0000000000..e2a9b2e66a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/conformance/generator.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+"""Tool for generating a conformance testing corpus from a set of .jxl files.
+
+This is not the JPEG XL conformance test runner. This is a tool to generate a
+conformance testing corpus from a set of .jxl files.
+"""
+
+import argparse
+import itertools
+import json
+import os
+import shutil
+import subprocess
+import sys
+
+
+def GenerateConformanceCorpus(args):
+    """Generate the conformance test corpus for the given arguments."""
+    files = []
+    for jxl in args.inputs:
+        if os.path.isdir(jxl):
+            # Add all the .jxl files recursively.
+            for root, _, dir_files in os.walk(jxl):
+                files.extend(
+                    os.path.join(root, filename) for filename in dir_files
+                    if filename.lower().endswith('.jxl'))
+        else:
+            files.append(jxl)
+
+    os.makedirs(args.output, 0o755, exist_ok=True)
+
+    test_ids = []
+    for jxl in files:
+        # Generate a unique test_id for this file based on the filename.
+        test_id = os.path.basename(jxl).lower()
+        if test_id.endswith('.jxl'):
+            test_id = test_id[:-4]
+        if test_id in test_ids:
+            for i in itertools.count(2):
+                candidate = test_id + '%02d' % i
+                if candidate not in test_ids:
+                    test_id = candidate
+                    break
+        test_ids.append(test_id)
+
+        test_dir = os.path.join(args.output, test_id)
+        os.makedirs(test_dir, 0o755, exist_ok=True)
+        print('Generating %s' % (test_id, ))
+        input_file = os.path.join(test_dir, 'input.jxl')
+        shutil.copy(jxl, input_file)
+
+        # The test descriptor file.
+        descriptor = {}
+        descriptor['jxl'] = 'input.jxl'
+
+        original_icc_filename = os.path.join(test_dir, 'original.icc')
+        reconstructed_filename = os.path.join(test_dir, 'reconstructed.jpg')
+        pixel_prefix = os.path.join(test_dir, 'reference')
+        output_file = pixel_prefix + '_image.npy'
+        cmd = [args.decoder, input_file, output_file]
+        metadata_filename = os.path.join(test_dir, 'test.json')
+        cmd.extend(['--metadata_out', metadata_filename])
+        cmd.extend(['--icc_out', pixel_prefix + '.icc'])
+
+        # Decode and generate the reference files.
+        subprocess.check_call(cmd)
+
+        with open(metadata_filename, 'r') as f:
+            metadata = json.load(f)
+
+        if os.path.exists(original_icc_filename):
+            metadata['original_icc'] = "original.icc"
+
+        if os.path.exists(reconstructed_filename):
+            metadata['reconstructed_jpeg'] = "reconstructed.jpg"
+
+        for frame in metadata['frames']:
+            frame['rms_error'] = args.rmse
+            frame['peak_error'] = args.peak_error
+
+        if 'preview' in metadata:
+            metadata['preview']['rms_error'] = args.rmse
+            metadata['preview']['peak_error'] = args.peak_error
+
+        # Create the test descriptor file.
+        with open(metadata_filename, 'w') as f:
+            json.dump(metadata, f, indent=2)
+
+    # Generate a corpus descriptor with the list of the all the test_id names,
+    # one per line.
+    with open(os.path.join(args.output, 'corpus.txt'), 'w') as f:
+        f.write(''.join(line + '\n' for line in test_ids))
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--decoder',
+                        metavar='DECODER',
+                        required=True,
+                        help='path to the decoder binary under test.')
+    parser.add_argument('--output',
+                        metavar='DIR',
+                        required=True,
+                        help='path to the output directory')
+    parser.add_argument('--peak_error',
+                        metavar='PEAK_ERROR',
+                        type=float,
+                        required=True,
+                        help='peak error for each testcase')
+    parser.add_argument('--rmse',
+                        metavar='RMSE',
+                        type=float,
+                        required=True,
+                        help='max RMSE for each testcase')
+    parser.add_argument('inputs',
+                        metavar='JXL',
+                        nargs='+',
+                        help='path to input .jxl file(s)')
+    args = parser.parse_args()
+    GenerateConformanceCorpus(args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/third-party/libjxl/libjxl/tools/conformance/lcms2.py b/third-party/libjxl/libjxl/tools/conformance/lcms2.py
new file mode 100644
index 0000000000..f8313cd6b4
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/conformance/lcms2.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+import ctypes
+from numpy.ctypeslib import ndpointer
+import numpy
+import os
+
+lcms2_lib_path = os.getenv("LCMS2_LIB_PATH", "liblcms2.so.2")
+lcms2_lib = ctypes.cdll.LoadLibrary(lcms2_lib_path)
+
+native_open_profile = lcms2_lib.cmsOpenProfileFromMem
+native_open_profile.restype = ctypes.c_void_p
+native_open_profile.argtypes = [
+    ctypes.c_char_p,  # MemPtr
+    ctypes.c_size_t  # dwSize
+]
+
+native_close_profile = lcms2_lib.cmsCloseProfile
+native_close_profile.restype = ctypes.c_int
+native_close_profile.argtypes = [
+    ctypes.c_void_p  # hProfile
+]
+
+native_create_transform = lcms2_lib.cmsCreateTransform
+native_create_transform.restype = ctypes.c_void_p
+native_create_transform.argtypes = [
+    ctypes.c_void_p,  # Input
+    ctypes.c_uint32,  # InputFormat
+    ctypes.c_void_p,  # Output
+    ctypes.c_uint32,  # OutputFormat
+    ctypes.c_uint32,  # Intent
+    ctypes.c_uint32  # dwFlags
+]
+
+native_delete_transform = lcms2_lib.cmsDeleteTransform
+native_delete_transform.restype = None
+native_delete_transform.argtypes = [
+    ctypes.c_void_p  # hTransform
+]
+
+native_do_transform = lcms2_lib.cmsDoTransform
+native_do_transform.restype = None
+native_do_transform.argtypes = [
+    ctypes.c_void_p,  # Transform
+    ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),  # InputBuffer
+    ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),  # OutputBuffer
+    ctypes.c_uint32  # Size
+]
+
+
+def make_format(
+    bytes_per_sample=4,  # float32
+    num_channels=3,  # RGB or XYZ
+    extra_channels=0,
+    swap_channels=0,
+    swap_endiannes=0,
+    planar=0,
+    flavor=0,
+    swap_first=0,
+    unused=0,
+    pixel_type=4,  # RGB
+    optimized=0,
+    floating_point=1):
+    values = [bytes_per_sample, num_channels, extra_channels, swap_channels,
+        swap_endiannes, planar, flavor, swap_first, unused, pixel_type,
+        optimized, floating_point]
+    bit_width = [3, 4, 3, 1, 1, 1, 1, 1, 1, 5, 1, 1]
+    result = 0
+    shift = 0
+    for i in range(len(bit_width)):
+        result += values[i] << shift
+        shift += bit_width[i]
+    return result
+
+
+def convert_pixels(from_icc, to_icc, from_pixels):
+    from_icc = bytearray(from_icc)
+    to_icc = bytearray(to_icc)
+
+    if len(from_pixels.shape) != 3 or from_pixels.shape[2] != 3:
+        raise ValueError("Only WxHx3 shapes are supported")
+    from_pixels_plain = from_pixels.ravel().astype(numpy.float64)
+    num_pixels = len(from_pixels_plain) // 3
+    to_pixels_plain = numpy.empty(num_pixels * 3, dtype=numpy.float64)
+
+    from_icc = (ctypes.c_char * len(from_icc)).from_buffer(from_icc)
+    from_profile = native_open_profile(
+        ctypes.cast(ctypes.pointer(from_icc), ctypes.c_char_p), len(from_icc))
+
+    to_icc = (ctypes.c_char * len(to_icc)).from_buffer(to_icc)
+    to_profile = native_open_profile(
+        ctypes.cast(ctypes.pointer(to_icc), ctypes.c_char_p), len(to_icc))
+
+    # bytes_per_sample=0 actually means 8 bytes (but there are just 3 bits to
+    # encode the length of sample)
+    format_rgb_f64 = make_format(bytes_per_sample=0)
+    intent = 0  # INTENT_PERCEPTUAL
+    flags = 0  # default; no "no-optimization"
+    transform = native_create_transform(
+        from_profile, format_rgb_f64, to_profile, format_rgb_f64, intent, flags)
+
+    native_do_transform(
+        transform, from_pixels_plain, to_pixels_plain, num_pixels)
+
+    native_delete_transform(transform)
+    native_close_profile(to_profile)
+    native_close_profile(from_profile)
+
+    # Return same shape and size as input
+    return to_pixels_plain.reshape(from_pixels.shape).astype(from_pixels.dtype)
+
+if __name__ == '__main__':
+    raise Exception("Not an executable")
diff --git a/third-party/libjxl/libjxl/tools/conformance/tooling_test.sh b/third-party/libjxl/libjxl/tools/conformance/tooling_test.sh
new file mode 100755
index 0000000000..95adefb1eb
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/conformance/tooling_test.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Conformance test tooling test. This is not the JPEG XL conformance test
+# runner. This test that the tooling to generate the conformance test and the
+# conformance test runner work together.
+
+MYDIR=$(dirname $(realpath "$0"))
+
+if [[ $# -eq 2 ]]; then
+    JPEGXL_TEST_DATA_PATH="$2"
+else
+    JPEGXL_TEST_DATA_PATH="${MYDIR}/../../testdata"
+fi
+
+set -eux
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -rf "${CLEANUP_FILES[@]}"
+  fi
+}
+trap 'retcode=$?; { set +x; } 2>/dev/null; cleanup' INT TERM EXIT
+
+main() {
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+
+  if ! python3 -c 'import numpy'; then
+    echo "Missing numpy, skipping test." >&2
+    exit 254  # Signals ctest that we should mark this test as skipped.
+  fi
+
+  local build_dir="${1:-}"
+  if [[ -z "${build_dir}" ]]; then
+    build_dir=$(realpath "${MYDIR}/../../build")
+  fi
+
+  local decoder="${build_dir}/tools/djxl"
+  "${MYDIR}/generator.py" \
+    --decoder="${decoder}" \
+    --output="${tmpdir}" \
+    --peak_error=0.001 \
+    --rmse=0.001 \
+    "${JPEGXL_TEST_DATA_PATH}/jxl/blending/cropped_traffic_light.jxl"
+
+  # List the contents of the corpus dir.
+  tree "${tmpdir}" || true
+
+  "${MYDIR}/conformance.py" \
+    --decoder="${decoder}" \
+    --corpus="${tmpdir}"
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/decode_and_encode.cc b/third-party/libjxl/libjxl/tools/decode_and_encode.cc
new file mode 100644
index 0000000000..4559f31c4b
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/decode_and_encode.cc
@@ -0,0 +1,61 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+// Reads an input file (typically PNM) with color_space hint and writes to an
+// output file (typically PNG) which supports all required metadata.
+int Convert(int argc, char** argv) {
+  if (argc != 4 && argc != 5) {
+    fprintf(stderr, "Args: in colorspace_description out [bits]\n");
+    return 1;
+  }
+  const std::string& pathname_in = argv[1];
+  const std::string& desc = argv[2];
+  const std::string& pathname_out = argv[3];
+
+  std::vector<uint8_t> encoded_in;
+  if (!jpegxl::tools::ReadFile(pathname_in, &encoded_in)) {
+    fprintf(stderr, "Failed to read image from %s\n", pathname_in.c_str());
+    return 1;
+  }
+  jxl::CodecInOut io;
+  jxl::extras::ColorHints color_hints;
+  jpegxl::tools::ThreadPoolInternal pool(4);
+  color_hints.Add("color_space", desc);
+  if (!jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded_in), color_hints, &io,
+                         &pool)) {
+    fprintf(stderr, "Failed to decode %s\n", pathname_in.c_str());
+    return 1;
+  }
+
+  std::vector<uint8_t> encoded_out;
+  if (!jxl::Encode(io, pathname_out, &encoded_out, &pool)) {
+    fprintf(stderr, "Failed to encode %s\n", pathname_out.c_str());
+    return 1;
+  }
+  if (!jpegxl::tools::WriteFile(pathname_out, encoded_out)) {
+    fprintf(stderr, "Failed to write %s\n", pathname_out.c_str());
+    return 1;
+  }
+
+  return 0;
+}
+
+}  // namespace
+
+int main(int argc, char** argv) { return Convert(argc, argv); }
diff --git a/third-party/libjxl/libjxl/tools/decode_basic_info_fuzzer.cc b/third-party/libjxl/libjxl/tools/decode_basic_info_fuzzer.cc
new file mode 100644
index 0000000000..8e97ff6c20
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/decode_basic_info_fuzzer.cc
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <stdint.h>
+
+namespace jpegxl {
+namespace tools {
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  JxlDecoderStatus status;
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING);
+  JxlDecoderSetInput(dec, data, size);
+
+  status = JxlDecoderProcessInput(dec);
+
+  if (status != JXL_DEC_BASIC_INFO) {
+    JxlDecoderDestroy(dec);
+    return 0;
+  }
+
+  JxlBasicInfo info;
+  bool have_basic_info = !JxlDecoderGetBasicInfo(dec, &info);
+
+  if (have_basic_info) {
+    if (info.alpha_bits != 0) {
+      for (int i = 0; i < info.num_extra_channels; ++i) {
+        JxlExtraChannelInfo extra;
+        JxlDecoderGetExtraChannelInfo(dec, 0, &extra);
+      }
+    }
+  }
+  status = JxlDecoderProcessInput(dec);
+
+  if (status != JXL_DEC_COLOR_ENCODING) {
+    JxlDecoderDestroy(dec);
+    return 0;
+  }
+
+  JxlDecoderGetColorAsEncodedProfile(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                     nullptr);
+  size_t dec_profile_size;
+  JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                              &dec_profile_size);
+
+  JxlDecoderDestroy(dec);
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/demo_vardct_select.sh b/third-party/libjxl/libjxl/tools/demo_vardct_select.sh
new file mode 100755
index 0000000000..414eacbbd2
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/demo_vardct_select.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Produces a demo video showing VarDCT block type selection
+# from very high quality to very low quality.
+
+# Assumes ImageMagick convert, ffmpeg, bc are available.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -fr "${CLEANUP_FILES[@]}"
+  fi
+}
+trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+
+
+
+main() {
+  local infile="${1:-}"
+  if [[ -z "${infile}" ]]; then
+    cat >&2 <<EOF
+Use: $0 IMAGE [OUT.apng]
+
+Where IMAGE is an input image and OUT.apng is the output
+EOF
+    exit 1
+  fi
+
+  shift
+
+  local outfile="$@"
+  if [[ -z "${outfile}" ]]; then
+    # default output filename
+    outfile=vardct-select-demo.apng
+  fi
+
+  if ! command -v benchmark_xl &>/dev/null 2>&1; then
+    PATH=$PATH:$MYDIR/../build/tools
+    if ! command -v benchmark_xl &>/dev/null 2>&1; then
+      echo "Could not find benchmark_xl, try building first"
+      exit
+    fi
+  fi
+  local b=benchmark_xl
+
+  if ! command -v ffmpeg &>/dev/null 2>&1; then
+    echo "Could not find ffmpeg"
+    exit
+  fi
+
+  if ! command -v convert &>/dev/null 2>&1; then
+    echo "Could not find ImageMagick (convert)"
+    exit
+  fi
+
+  local tmp=$(mktemp -d --suffix=vardctdemo)
+  CLEANUP_FILES+=("${tmp}")
+
+  cp $infile $tmp/orig
+
+  local n=0
+  local pixels="$(identify -format "(%w * %h)" $tmp/orig)"
+  for i in $(seq 0.2 0.2 2) $(seq 2.5 0.5 5) $(seq 6 1 10) $(seq 12 2 40); do
+    $b --input=$tmp/orig --codec=jxl:d$i --save_decompressed --save_compressed \
+      --debug_image_dir=$tmp --output_dir=$tmp
+    convert $tmp/orig \( $tmp/orig.jxl:d$i.dbg/ac_strategy.png \
+      -alpha set -channel A -evaluate set 66% \) \
+      -composite $tmp/t.ppm
+    bytes=$(stat -c "%s" $tmp/orig.jxl_d$i)
+    bpp=$( echo "$bytes * 8 / $pixels " | bc -l | cut -b 1-6 )
+    label="cjxl -d $i  ($((bytes / 1000)) kb, bpp: $bpp)"
+    convert +append $tmp/t.ppm $tmp/orig.jxl_d$i.png $tmp/t2.ppm
+    convert $tmp/t2.ppm \
+          -gravity north \
+          -pointsize 32 \
+          -stroke '#000C' -strokewidth 5 -annotate +0+12 "$label" \
+          -stroke  none   -fill white    -annotate +0+12 "$label" $tmp/frame-$n.png
+
+    n=$((n+1))
+  done
+
+  ffmpeg -framerate 1 -i $tmp/frame-%d.png $outfile
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/djpegli.cc b/third-party/libjxl/libjxl/tools/djpegli.cc
new file mode 100644
index 0000000000..bac55e1042
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/djpegli.cc
@@ -0,0 +1,197 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/dec/jpegli.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/speed_stats.h"
+
+namespace jpegxl {
+namespace tools {
+namespace {
+
+struct Args {
+  void AddCommandLineOptions(CommandLineParser* cmdline) {
+    std::string output_help("The output can be ");
+    if (jxl::extras::GetAPNGEncoder()) {
+      output_help.append("PNG, ");
+    }
+    output_help.append("PFM or PPM/PGM/PNM");
+    cmdline->AddPositionalOption("INPUT", /* required = */ true,
+                                 "The JPG input file.", &file_in);
+
+    cmdline->AddPositionalOption("OUTPUT", /* required = */ true, output_help,
+                                 &file_out);
+    cmdline->AddOptionFlag('\0', "disable_output",
+                           "No output file will be written (for benchmarking)",
+                           &disable_output, &SetBooleanTrue);
+
+    cmdline->AddOptionValue('\0', "bitdepth", "8|16",
+                            "Sets the output bitdepth for integer based "
+                            "formats, can be 8 (default) "
+                            "or 16. Has no impact on PFM output.",
+                            &bitdepth, &ParseUnsigned);
+
+    cmdline->AddOptionValue('\0', "num_reps", "N",
+                            "Sets the number of times to decompress the image. "
+                            "Used for benchmarking, the default is 1.",
+                            &num_reps, &ParseUnsigned);
+
+    cmdline->AddOptionFlag('\0', "quiet", "Silence output (except for errors).",
+                           &quiet, &SetBooleanTrue);
+  }
+
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  bool disable_output = false;
+  size_t bitdepth = 8;
+  size_t num_reps = 1;
+  bool quiet = false;
+};
+
+bool ValidateArgs(const Args& args) {
+  if (args.bitdepth != 8 && args.bitdepth != 16) {
+    fprintf(stderr, "Invalid --bitdepth argument\n");
+    return false;
+  }
+  return true;
+}
+
+void SetDecompressParams(const Args& args, const std::string& extension,
+                         jxl::extras::JpegDecompressParams* params) {
+  if (extension == ".pfm") {
+    params->output_data_type = JXL_TYPE_FLOAT;
+    params->output_endianness = JXL_BIG_ENDIAN;
+  } else if (args.bitdepth == 16) {
+    params->output_data_type = JXL_TYPE_UINT16;
+    params->output_endianness = JXL_BIG_ENDIAN;
+  }
+  if (extension == ".pgm") {
+    params->force_grayscale = true;
+  } else if (extension == ".ppm") {
+    params->force_rgb = true;
+  }
+}
+
+int DJpegliMain(int argc, const char* argv[]) {
+  Args args;
+  CommandLineParser cmdline;
+  args.AddCommandLineOptions(&cmdline);
+
+  if (!cmdline.Parse(argc, const_cast<const char**>(argv))) {
+    // Parse already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information.\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+
+  if (cmdline.HelpFlagPassed() || !args.file_in) {
+    cmdline.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!args.file_out && !args.disable_output) {
+    fprintf(stderr,
+            "No output file specified and --disable_output flag not passed.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (args.disable_output && !args.quiet) {
+    fprintf(stderr,
+            "Decoding will be performed, but the result will be discarded.\n");
+  }
+
+  if (!ValidateArgs(args)) {
+    return EXIT_FAILURE;
+  }
+
+  std::vector<uint8_t> jpeg_bytes;
+  if (!ReadFile(args.file_in, &jpeg_bytes)) {
+    fprintf(stderr, "Failed to read input image %s\n", args.file_in);
+    return EXIT_FAILURE;
+  }
+
+  if (!args.quiet) {
+    fprintf(stderr, "Read %" PRIuS " compressed bytes.\n", jpeg_bytes.size());
+  }
+
+  std::string filename_out;
+  std::string extension;
+  if (args.file_out) {
+    filename_out = std::string(args.file_out);
+    size_t pos = filename_out.find_last_of('.');
+    if (pos >= filename_out.size()) {
+      fprintf(stderr, "Unrecognized output extension.\n");
+      return EXIT_FAILURE;
+    }
+    extension = filename_out.substr(pos);
+  }
+
+  jxl::extras::JpegDecompressParams dparams;
+  SetDecompressParams(args, extension, &dparams);
+
+  jxl::extras::PackedPixelFile ppf;
+  jpegxl::tools::SpeedStats stats;
+  for (size_t num_rep = 0; num_rep < args.num_reps; ++num_rep) {
+    const double t0 = jxl::Now();
+    if (!jxl::extras::DecodeJpeg(jpeg_bytes, dparams, nullptr, &ppf)) {
+      fprintf(stderr, "jpegli decoding failed\n");
+      return EXIT_FAILURE;
+    }
+    const double t1 = jxl::Now();
+    stats.NotifyElapsed(t1 - t0);
+    stats.SetImageSize(ppf.info.xsize, ppf.info.ysize);
+  }
+
+  if (!args.quiet) {
+    stats.Print(1);
+  }
+
+  if (args.disable_output) {
+    return EXIT_SUCCESS;
+  }
+
+  if (extension == ".pnm") {
+    extension = ppf.info.num_color_channels == 3 ? ".ppm" : ".pgm";
+  }
+
+  std::unique_ptr<jxl::extras::Encoder> encoder =
+      jxl::extras::Encoder::FromExtension(extension);
+  if (encoder == nullptr) {
+    fprintf(stderr, "Can't decode to the file extension '%s'\n",
+            extension.c_str());
+    return EXIT_FAILURE;
+  }
+  jxl::extras::EncodedImage encoded_image;
+  if (!encoder->Encode(ppf, &encoded_image) ||
+      encoded_image.bitstreams.empty()) {
+    fprintf(stderr, "Encode failed\n");
+    return EXIT_FAILURE;
+  }
+  if (!WriteFile(filename_out, encoded_image.bitstreams[0])) {
+    fprintf(stderr, "Failed to write output file %s\n", filename_out.c_str());
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
+
+}  // namespace
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, const char* argv[]) {
+  return jpegxl::tools::DJpegliMain(argc, argv);
+}
diff --git a/third-party/libjxl/libjxl/tools/djxl_fuzzer.cc b/third-party/libjxl/libjxl/tools/djxl_fuzzer.cc
new file mode 100644
index 0000000000..316c445066
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/djxl_fuzzer.cc
@@ -0,0 +1,565 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <hwy/targets.h>
+#include <map>
+#include <mutex>
+#include <random>
+#include <vector>
+
+namespace {
+
+// Externally visible value to ensure pixels are used in the fuzzer.
+int external_code = 0;
+
+constexpr const size_t kStreamingTargetNumberOfChunks = 128;
+
+// Options for the fuzzing
+struct FuzzSpec {
+  JxlDataType output_type;
+  JxlEndianness output_endianness;
+  size_t output_align;
+  bool get_alpha;
+  bool get_grayscale;
+  bool use_streaming;
+  bool jpeg_to_pixels;  // decode to pixels even if it is JPEG-reconstructible
+  // Whether to use the callback mechanism for the output image or not.
+  bool use_callback;
+  bool keep_orientation;
+  bool decode_boxes;
+  bool coalescing;
+  // Used for random variation of chunk sizes, extra channels, ... to get
+  uint32_t random_seed;
+};
+
+template <typename It>
+void Consume(const It& begin, const It& end) {
+  for (auto it = begin; it < end; ++it) {
+    if (*it == 0) {
+      external_code ^= ~0;
+    } else {
+      external_code ^= *it;
+    }
+  }
+}
+
+template <typename T>
+void Consume(const T& entry) {
+  const uint8_t* begin = reinterpret_cast<const uint8_t*>(&entry);
+  Consume(begin, begin + sizeof(T));
+}
+
+// use_streaming: if true, decodes the data in small chunks, if false, decodes
+// it in one shot.
+bool DecodeJpegXl(const uint8_t* jxl, size_t size, size_t max_pixels,
+                  const FuzzSpec& spec, std::vector<uint8_t>* pixels,
+                  std::vector<uint8_t>* jpeg, size_t* xsize, size_t* ysize,
+                  std::vector<uint8_t>* icc_profile) {
+  // Multi-threaded parallel runner. Limit to max 2 threads since the fuzzer
+  // itself is already multithreaded.
+  size_t num_threads =
+      std::min<size_t>(2, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  auto runner = JxlThreadParallelRunnerMake(nullptr, num_threads);
+
+  std::mt19937 mt(spec.random_seed);
+  std::exponential_distribution<> dis_streaming(kStreamingTargetNumberOfChunks);
+
+  auto dec = JxlDecoderMake(nullptr);
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderSubscribeEvents(
+          dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+                         JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME |
+                         JXL_DEC_FULL_IMAGE | JXL_DEC_JPEG_RECONSTRUCTION |
+                         JXL_DEC_BOX)) {
+    return false;
+  }
+  if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+                                                     JxlThreadParallelRunner,
+                                                     runner.get())) {
+    return false;
+  }
+  if (JXL_DEC_SUCCESS !=
+      JxlDecoderSetKeepOrientation(dec.get(), spec.keep_orientation)) {
+    abort();
+  }
+  if (JXL_DEC_SUCCESS != JxlDecoderSetCoalescing(dec.get(), spec.coalescing)) {
+    abort();
+  }
+  JxlBasicInfo info;
+  uint32_t channels = (spec.get_grayscale ? 1 : 3) + (spec.get_alpha ? 1 : 0);
+  JxlPixelFormat format = {channels, spec.output_type, spec.output_endianness,
+                           spec.output_align};
+
+  if (!spec.use_streaming) {
+    // Set all input at once
+    JxlDecoderSetInput(dec.get(), jxl, size);
+    JxlDecoderCloseInput(dec.get());
+  }
+
+  bool seen_basic_info = false;
+  bool seen_color_encoding = false;
+  bool seen_preview = false;
+  bool seen_need_image_out = false;
+  bool seen_full_image = false;
+  bool seen_frame = false;
+  uint32_t num_frames = 0;
+  bool seen_jpeg_reconstruction = false;
+  bool seen_jpeg_need_more_output = false;
+  // If streaming and seen around half the input, test flushing
+  bool tested_flush = false;
+
+  // Size made available for the streaming input, emulating a subset of the
+  // full input size.
+  size_t streaming_size = 0;
+  size_t leftover = size;
+  size_t preview_xsize = 0;
+  size_t preview_ysize = 0;
+  bool want_preview = false;
+  std::vector<uint8_t> preview_pixels;
+
+  std::vector<uint8_t> extra_channel_pixels;
+
+  // Callback function used when decoding with use_callback.
+  struct DecodeCallbackData {
+    JxlBasicInfo info;
+    size_t xsize = 0;
+    size_t ysize = 0;
+    std::mutex called_rows_mutex;
+    // For each row stores the segments of the row being called. For each row
+    // the sum of all the int values in the map up to [i] (inclusive) tell how
+    // many times a callback included the pixel i of that row.
+    std::vector<std::map<uint32_t, int>> called_rows;
+
+    // Use the pixel values.
+    uint32_t value = 0;
+  };
+  DecodeCallbackData decode_callback_data;
+  auto decode_callback = +[](void* opaque, size_t x, size_t y,
+                             size_t num_pixels, const void* pixels) {
+    DecodeCallbackData* data = static_cast<DecodeCallbackData*>(opaque);
+    if (num_pixels > data->xsize) abort();
+    if (x + num_pixels > data->xsize) abort();
+    if (y >= data->ysize) abort();
+    if (num_pixels && !pixels) abort();
+    // Keep track of the segments being called by the callback.
+    {
+      const std::lock_guard<std::mutex> lock(data->called_rows_mutex);
+      data->called_rows[y][x]++;
+      data->called_rows[y][x + num_pixels]--;
+      data->value += *static_cast<const uint8_t*>(pixels);
+    }
+  };
+
+  JxlExtraChannelInfo extra_channel_info;
+
+  std::vector<uint8_t> box_buffer;
+
+  if (spec.decode_boxes &&
+      JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE)) {
+    // error ignored, can still fuzz if it doesn't brotli-decompress brob boxes.
+  }
+
+  for (;;) {
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+    if (status == JXL_DEC_ERROR) {
+      return false;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (spec.use_streaming) {
+        size_t remaining = JxlDecoderReleaseInput(dec.get());
+        // move any remaining bytes to the front if necessary
+        size_t used = streaming_size - remaining;
+        jxl += used;
+        leftover -= used;
+        streaming_size -= used;
+        size_t chunk_size = std::max<size_t>(
+            1, size * std::min<double>(1.0, dis_streaming(mt)));
+        size_t add_size =
+            std::min<size_t>(chunk_size, leftover - streaming_size);
+        if (add_size == 0) {
+          // End of the streaming data reached
+          return false;
+        }
+        streaming_size += add_size;
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetInput(dec.get(), jxl, streaming_size)) {
+          return false;
+        }
+        if (leftover == streaming_size) {
+          // All possible input bytes given
+          JxlDecoderCloseInput(dec.get());
+        }
+
+        if (!tested_flush && seen_frame) {
+          // Test flush max once to avoid too slow fuzzer run
+          tested_flush = true;
+          JxlDecoderFlushImage(dec.get());
+        }
+      } else {
+        return false;
+      }
+    } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+      if (want_preview) abort();  // expected preview before frame
+      if (spec.jpeg_to_pixels) abort();
+      if (!seen_jpeg_reconstruction) abort();
+      seen_jpeg_need_more_output = true;
+      size_t used_jpeg_output =
+          jpeg->size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+      jpeg->resize(std::max<size_t>(4096, jpeg->size() * 2));
+      uint8_t* jpeg_buffer = jpeg->data() + used_jpeg_output;
+      size_t jpeg_buffer_size = jpeg->size() - used_jpeg_output;
+
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderSetJPEGBuffer(dec.get(), jpeg_buffer, jpeg_buffer_size)) {
+        return false;
+      }
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      if (seen_basic_info) abort();  // already seen basic info
+      seen_basic_info = true;
+
+      memset(&info, 0, sizeof(info));
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+        return false;
+      }
+      Consume(info);
+
+      *xsize = info.xsize;
+      *ysize = info.ysize;
+      decode_callback_data.info = info;
+      size_t num_pixels = *xsize * *ysize;
+      // num_pixels overflow
+      if (*xsize != 0 && num_pixels / *xsize != *ysize) return false;
+      // limit max memory of this fuzzer test
+      if (num_pixels > max_pixels) return false;
+
+      if (info.have_preview) {
+        want_preview = true;
+        preview_xsize = info.preview.xsize;
+        preview_ysize = info.preview.ysize;
+        size_t preview_num_pixels = preview_xsize * preview_ysize;
+        // num_pixels overflow
+        if (preview_xsize != 0 &&
+            preview_num_pixels / preview_xsize != preview_ysize) {
+          return false;
+        }
+        // limit max memory of this fuzzer test
+        if (preview_num_pixels > max_pixels) return false;
+      }
+
+      for (size_t ec = 0; ec < info.num_extra_channels; ++ec) {
+        memset(&extra_channel_info, 0, sizeof(extra_channel_info));
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetExtraChannelInfo(dec.get(), ec, &extra_channel_info)) {
+          abort();
+        }
+        Consume(extra_channel_info);
+        std::vector<char> ec_name(extra_channel_info.name_length + 1);
+        if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelName(dec.get(), ec,
+                                                             ec_name.data(),
+                                                             ec_name.size())) {
+          abort();
+        }
+        Consume(ec_name.cbegin(), ec_name.cend());
+      }
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      if (!seen_basic_info) abort();     // expected basic info first
+      if (seen_color_encoding) abort();  // already seen color encoding
+      seen_color_encoding = true;
+
+      // Get the ICC color profile of the pixel data
+      size_t icc_size;
+      if (JXL_DEC_SUCCESS !=
+          JxlDecoderGetICCProfileSize(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA,
+                                      &icc_size)) {
+        return false;
+      }
+      icc_profile->resize(icc_size);
+      if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                 dec.get(), JXL_COLOR_PROFILE_TARGET_DATA,
+                                 icc_profile->data(), icc_profile->size())) {
+        return false;
+      }
+      if (want_preview) {
+        size_t preview_size;
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderPreviewOutBufferSize(dec.get(), &format, &preview_size)) {
+          return false;
+        }
+        preview_pixels.resize(preview_size);
+        if (JXL_DEC_SUCCESS != JxlDecoderSetPreviewOutBuffer(
+                                   dec.get(), &format, preview_pixels.data(),
+                                   preview_pixels.size())) {
+          abort();
+        }
+      }
+    } else if (status == JXL_DEC_PREVIEW_IMAGE) {
+      if (seen_preview) abort();
+      if (!want_preview) abort();
+      if (!seen_color_encoding) abort();
+      want_preview = false;
+      seen_preview = true;
+      Consume(preview_pixels.cbegin(), preview_pixels.cend());
+    } else if (status == JXL_DEC_FRAME ||
+               status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      if (want_preview) abort();          // expected preview before frame
+      if (!seen_color_encoding) abort();  // expected color encoding first
+      if (status == JXL_DEC_FRAME) {
+        if (seen_frame) abort();  // already seen JXL_DEC_FRAME
+        seen_frame = true;
+        JxlFrameHeader frame_header;
+        memset(&frame_header, 0, sizeof(frame_header));
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetFrameHeader(dec.get(), &frame_header)) {
+          abort();
+        }
+        decode_callback_data.xsize = frame_header.layer_info.xsize;
+        decode_callback_data.ysize = frame_header.layer_info.ysize;
+        if (!spec.coalescing) {
+          decode_callback_data.called_rows.clear();
+        }
+        decode_callback_data.called_rows.resize(decode_callback_data.ysize);
+        Consume(frame_header);
+        std::vector<char> frame_name(frame_header.name_length + 1);
+        if (JXL_DEC_SUCCESS != JxlDecoderGetFrameName(dec.get(),
+                                                      frame_name.data(),
+                                                      frame_name.size())) {
+          abort();
+        }
+        Consume(frame_name.cbegin(), frame_name.cend());
+        // When not testing streaming, test that JXL_DEC_NEED_IMAGE_OUT_BUFFER
+        // occurs instead, so do not set buffer now.
+        if (!spec.use_streaming) continue;
+      }
+      if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+        // expected JXL_DEC_FRAME instead
+        if (!seen_frame) abort();
+        // already should have set buffer if streaming
+        if (spec.use_streaming) abort();
+        // already seen need image out
+        if (seen_need_image_out) abort();
+        seen_need_image_out = true;
+      }
+
+      if (info.num_extra_channels > 0) {
+        std::uniform_int_distribution<> dis(0, info.num_extra_channels);
+        size_t ec_index = dis(mt);
+        // There is also a probability no extra channel is chosen
+        if (ec_index < info.num_extra_channels) {
+          size_t ec_index = info.num_extra_channels - 1;
+          size_t ec_size;
+          if (JXL_DEC_SUCCESS != JxlDecoderExtraChannelBufferSize(
+                                     dec.get(), &format, &ec_size, ec_index)) {
+            return false;
+          }
+          extra_channel_pixels.resize(ec_size);
+          if (JXL_DEC_SUCCESS !=
+              JxlDecoderSetExtraChannelBuffer(dec.get(), &format,
+                                              extra_channel_pixels.data(),
+                                              ec_size, ec_index)) {
+            return false;
+          }
+        }
+      }
+
+      if (spec.use_callback) {
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetImageOutCallback(dec.get(), &format, decode_callback,
+                                          &decode_callback_data)) {
+          return false;
+        }
+      } else {
+        // Use the pixels output buffer.
+        size_t buffer_size;
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+          return false;
+        }
+        pixels->resize(buffer_size);
+        void* pixels_buffer = (void*)pixels->data();
+        size_t pixels_buffer_size = pixels->size();
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetImageOutBuffer(dec.get(), &format, pixels_buffer,
+                                        pixels_buffer_size)) {
+          return false;
+        }
+      }
+    } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+      // Do not check preview precedence here, since this event only declares
+      // that JPEG is going to be decoded; though, when first byte of JPEG
+      // arrives (JXL_DEC_JPEG_NEED_MORE_OUTPUT) it is certain that preview
+      // should have been produced already.
+      if (seen_jpeg_reconstruction) abort();
+      seen_jpeg_reconstruction = true;
+      if (!spec.jpeg_to_pixels) {
+        // Make sure buffer is allocated, but current size is too small to
+        // contain valid JPEG.
+        jpeg->resize(1);
+        uint8_t* jpeg_buffer = jpeg->data();
+        size_t jpeg_buffer_size = jpeg->size();
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderSetJPEGBuffer(dec.get(), jpeg_buffer, jpeg_buffer_size)) {
+          return false;
+        }
+      }
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      if (want_preview) abort();  // expected preview before frame
+      if (!spec.jpeg_to_pixels && seen_jpeg_reconstruction) {
+        if (!seen_jpeg_need_more_output) abort();
+        jpeg->resize(jpeg->size() - JxlDecoderReleaseJPEGBuffer(dec.get()));
+      } else {
+        // expected need image out or frame first
+        if (!seen_need_image_out && !seen_frame) abort();
+      }
+
+      seen_full_image = true;  // there may be multiple if animated
+
+      // There may be a next animation frame so expect those again:
+      seen_need_image_out = false;
+      seen_frame = false;
+      num_frames++;
+
+      // "Use" all the pixels; MSAN needs a conditional to count as usage.
+      Consume(pixels->cbegin(), pixels->cend());
+      Consume(jpeg->cbegin(), jpeg->cend());
+
+      // When not coalescing, check that the whole (possibly cropped) frame was
+      // sent
+      if (seen_need_image_out && spec.use_callback && spec.coalescing) {
+        // Check that the callback sent all the pixels
+        for (uint32_t y = 0; y < decode_callback_data.ysize; y++) {
+          // Check that each row was at least called once.
+          if (decode_callback_data.called_rows[y].empty()) abort();
+          uint32_t last_idx = 0;
+          int calls = 0;
+          for (auto it : decode_callback_data.called_rows[y]) {
+            if (it.first > last_idx) {
+              if (static_cast<uint32_t>(calls) != 1) abort();
+            }
+            calls += it.second;
+            last_idx = it.first;
+          }
+        }
+      }
+      // Nothing to do. Do not yet return. If the image is an animation, more
+      // full frames may be decoded. This example only keeps the last one.
+    } else if (status == JXL_DEC_SUCCESS) {
+      if (!seen_full_image) abort();  // expected full image before finishing
+
+      // When decoding we may not get seen_need_image_out unless we were
+      // decoding the image to pixels.
+      if (seen_need_image_out && spec.use_callback && spec.coalescing) {
+        // Check that the callback sent all the pixels
+        for (uint32_t y = 0; y < decode_callback_data.ysize; y++) {
+          // Check that each row was at least called once.
+          if (decode_callback_data.called_rows[y].empty()) abort();
+          uint32_t last_idx = 0;
+          int calls = 0;
+          for (auto it : decode_callback_data.called_rows[y]) {
+            if (it.first > last_idx) {
+              if (static_cast<uint32_t>(calls) != num_frames) abort();
+            }
+            calls += it.second;
+            last_idx = it.first;
+          }
+        }
+      }
+
+      // All decoding successfully finished.
+      // It's not required to call JxlDecoderReleaseInput(dec.get()) here since
+      // the decoder will be destroyed.
+      return true;
+    } else if (status == JXL_DEC_BOX) {
+      if (spec.decode_boxes) {
+        if (!box_buffer.empty()) {
+          size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+          size_t box_size = box_buffer.size() - remaining;
+          if (box_size != 0) {
+            Consume(box_buffer.begin(), box_buffer.begin() + box_size);
+            box_buffer.clear();
+          }
+        }
+        box_buffer.resize(64);
+        JxlDecoderSetBoxBuffer(dec.get(), box_buffer.data(), box_buffer.size());
+      }
+    } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+      if (!spec.decode_boxes) {
+        abort();  // Not expected when not setting output buffer
+      }
+      size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+      size_t box_size = box_buffer.size() - remaining;
+      box_buffer.resize(box_buffer.size() * 2);
+      JxlDecoderSetBoxBuffer(dec.get(), box_buffer.data() + box_size,
+                             box_buffer.size() - box_size);
+    } else {
+      return false;
+    }
+  }
+}
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  if (size < 4) return 0;
+  uint32_t flags = 0;
+  size_t used_flag_bits = 0;
+  memcpy(&flags, data + size - 4, 4);
+  size -= 4;
+
+  const auto getFlag = [&flags, &used_flag_bits](size_t max_value) {
+    size_t limit = 1;
+    while (limit <= max_value) {
+      limit <<= 1;
+      used_flag_bits++;
+      if (used_flag_bits > 32) abort();
+    }
+    uint32_t result = flags % limit;
+    flags /= limit;
+    return result % (max_value + 1);
+  };
+
+  FuzzSpec spec;
+  // Allows some different possible variations in the chunk sizes of the
+  // streaming case
+  spec.random_seed = flags ^ size;
+  spec.get_alpha = !!getFlag(1);
+  spec.get_grayscale = !!getFlag(1);
+  spec.use_streaming = !!getFlag(1);
+  spec.jpeg_to_pixels = !!getFlag(1);
+  spec.use_callback = !!getFlag(1);
+  spec.keep_orientation = !!getFlag(1);
+  spec.coalescing = !!getFlag(1);
+  spec.output_type = static_cast<JxlDataType>(getFlag(JXL_TYPE_FLOAT16));
+  spec.output_endianness = static_cast<JxlEndianness>(getFlag(JXL_BIG_ENDIAN));
+  spec.output_align = getFlag(16);
+  spec.decode_boxes = !!getFlag(1);
+
+  std::vector<uint8_t> pixels;
+  std::vector<uint8_t> jpeg;
+  std::vector<uint8_t> icc;
+  size_t xsize, ysize;
+  size_t max_pixels = 1 << 21;
+
+  const auto targets = hwy::SupportedAndGeneratedTargets();
+  hwy::SetSupportedTargetsForTest(targets[getFlag(targets.size() - 1)]);
+  DecodeJpegXl(data, size, max_pixels, spec, &pixels, &jpeg, &xsize, &ysize,
+               &icc);
+  hwy::SetSupportedTargetsForTest(0);
+
+  return 0;
+}
+
+}  // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/djxl_fuzzer_corpus.cc b/third-party/libjxl/libjxl/tools/djxl_fuzzer_corpus.cc
new file mode 100644
index 0000000000..e54d682df3
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/djxl_fuzzer_corpus.cc
@@ -0,0 +1,463 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#if defined(_WIN32) || defined(_WIN64)
+#include "third_party/dirent.h"
+#else
+#include <dirent.h>
+#include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <mutex>
+#include <random>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+const size_t kMaxWidth = 50000;
+const size_t kMaxHeight = 50000;
+const size_t kMaxPixels = 20 * (1 << 20);  // 20 MP
+const size_t kMaxBitDepth = 24;  // The maximum reasonable bit depth supported.
+
+std::mutex stderr_mutex;
+
+typedef std::function<uint8_t()> PixelGenerator;
+
+// ImageSpec needs to be a packed struct to allow us to use the raw memory of
+// the struct for hashing to create a consistent.
+#pragma pack(push, 1)
+struct ImageSpec {
+  bool Validate() const {
+    if (width > kMaxWidth || height > kMaxHeight ||
+        width * height > kMaxPixels) {
+      return false;
+    }
+    if (bit_depth > kMaxBitDepth || bit_depth == 0) return false;
+    if (num_frames == 0) return false;
+    // JPEG doesn't support all formats, so reconstructible JPEG isn't always
+    // valid.
+    if (is_reconstructible_jpeg && (bit_depth != 8 || num_channels != 3 ||
+                                    alpha_bit_depth != 0 || num_frames != 1))
+      return false;
+    return true;
+  }
+
+  friend std::ostream& operator<<(std::ostream& o, const ImageSpec& spec) {
+    o << "ImageSpec<"
+      << "size=" << spec.width << "x" << spec.height
+      << " * chan=" << spec.num_channels << " depth=" << spec.bit_depth
+      << " alpha=" << spec.alpha_bit_depth
+      << " (premult=" << spec.alpha_is_premultiplied
+      << ") x frames=" << spec.num_frames << " seed=" << spec.seed
+      << ", speed=" << static_cast<int>(spec.params.speed_tier)
+      << ", butteraugli=" << spec.params.butteraugli_distance
+      << ", modular_mode=" << spec.params.modular_mode
+      << ", lossy_palette=" << spec.params.lossy_palette
+      << ", noise=" << spec.params.noise << ", preview=" << spec.params.preview
+      << ", fuzzer_friendly=" << spec.fuzzer_friendly
+      << ", is_reconstructible_jpeg=" << spec.is_reconstructible_jpeg
+      << ", orientation=" << static_cast<int>(spec.orientation) << ">";
+    return o;
+  }
+
+  void SpecHash(uint8_t hash[16]) const {
+    const uint8_t* from = reinterpret_cast<const uint8_t*>(this);
+    std::seed_seq hasher(from, from + sizeof(*this));
+    uint32_t* to = reinterpret_cast<uint32_t*>(hash);
+    hasher.generate(to, to + 4);
+  }
+
+  uint64_t width = 256;
+  uint64_t height = 256;
+  // Number of channels *not* including alpha.
+  uint64_t num_channels = 3;
+  uint64_t bit_depth = 8;
+  // Bit depth for the alpha channel. A value of 0 means no alpha channel.
+  uint64_t alpha_bit_depth = 8;
+  int32_t alpha_is_premultiplied = false;
+
+  // Whether the ANS fuzzer friendly setting is currently enabled.
+  uint32_t fuzzer_friendly = false;
+
+  // Number of frames, all the frames will have the same size.
+  uint64_t num_frames = 1;
+
+  // The seed for the PRNG.
+  uint32_t seed = 7777;
+
+  // Flags used for compression. These are mapped to the CompressedParams.
+  struct CjxlParams {
+    float butteraugli_distance = 1.f;
+    // Must not use Weighted - see force_no_wp
+    jxl::Predictor modular_predictor = jxl::Predictor::Gradient;
+    jxl::ColorTransform color_transform = jxl::ColorTransform::kXYB;
+    jxl::SpeedTier speed_tier = jxl::SpeedTier::kTortoise;
+    bool modular_mode = false;
+    bool lossy_palette = false;
+    bool noise = false;
+    bool preview = false;
+    // CjxlParams is packed; re-add padding when sum of sizes of members is not
+    // multiple of 4.
+    // uint8_t padding_[0] = {};
+  } params;
+
+  uint32_t is_reconstructible_jpeg = false;
+  // Use 0xFFFFFFFF if any random spec is good; otherwise set the desired value.
+  uint32_t override_decoder_spec = 0xFFFFFFFF;
+  // Orientation.
+  uint8_t orientation = 0;
+  uint8_t padding_[3] = {};
+};
+#pragma pack(pop)
+static_assert(sizeof(ImageSpec) % 4 == 0, "Add padding to ImageSpec.");
+
+bool GenerateFile(const char* output_dir, const ImageSpec& spec,
+                  bool regenerate, bool quiet) {
+  // Compute a checksum of the ImageSpec to name the file. This is just to keep
+  // the output of this program repeatable.
+  uint8_t checksum[16];
+  spec.SpecHash(checksum);
+  std::string hash_str(sizeof(checksum) * 2, ' ');
+  static const char* hex_chars = "0123456789abcdef";
+  for (size_t i = 0; i < sizeof(checksum); i++) {
+    hash_str[2 * i] = hex_chars[checksum[i] >> 4];
+    hash_str[2 * i + 1] = hex_chars[checksum[i] % 0x0f];
+  }
+  std::string output_fn = std::string(output_dir) + "/" + hash_str + ".jxl";
+
+  // Don't regenerate files if they already exist on disk to speed-up
+  // consecutive calls when --regenerate is not used.
+  struct stat st;
+  if (!regenerate && stat(output_fn.c_str(), &st) == 0 && S_ISREG(st.st_mode)) {
+    return true;
+  }
+
+  if (!quiet) {
+    std::unique_lock<std::mutex> lock(stderr_mutex);
+    std::cerr << "Generating " << spec << " as " << hash_str << std::endl;
+  }
+
+  jxl::CodecInOut io;
+  if (spec.bit_depth == 32) {
+    io.metadata.m.SetFloat32Samples();
+  } else {
+    io.metadata.m.SetUintSamples(spec.bit_depth);
+  }
+  io.metadata.m.SetAlphaBits(spec.alpha_bit_depth, spec.alpha_is_premultiplied);
+  io.metadata.m.orientation = spec.orientation;
+  io.frames.clear();
+  io.frames.reserve(spec.num_frames);
+
+  jxl::ColorEncoding c;
+  if (spec.num_channels == 1) {
+    c = jxl::ColorEncoding::LinearSRGB(true);
+  } else if (spec.num_channels == 3) {
+    c = jxl::ColorEncoding::SRGB();
+  }
+
+  uint8_t hash[16];
+  spec.SpecHash(hash);
+  std::mt19937 mt(spec.seed);
+
+  // Compress the image.
+  jxl::PaddedBytes compressed;
+
+  std::uniform_int_distribution<> dis(1, 6);
+  PixelGenerator gen = [&]() -> uint8_t { return dis(mt); };
+
+  for (uint32_t frame = 0; frame < spec.num_frames; frame++) {
+    jxl::ImageBundle ib(&io.metadata.m);
+    const bool has_alpha = spec.alpha_bit_depth != 0;
+    const size_t bytes_per_sample =
+        jxl::DivCeil(io.metadata.m.bit_depth.bits_per_sample, 8);
+    const size_t bytes_per_pixel =
+        bytes_per_sample *
+        (io.metadata.m.color_encoding.Channels() + has_alpha);
+    const size_t row_size = spec.width * bytes_per_pixel;
+    std::vector<uint8_t> img_data(row_size * spec.height, 0);
+    for (size_t y = 0; y < spec.height; y++) {
+      size_t pos = row_size * y;
+      for (size_t x = 0; x < spec.width; x++) {
+        for (size_t b = 0; b < bytes_per_pixel; b++) {
+          img_data[pos++] = gen();
+        }
+      }
+    }
+    uint32_t num_channels = bytes_per_pixel / bytes_per_sample;
+    JxlDataType data_type =
+        bytes_per_sample == 1 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
+    JxlPixelFormat format = {num_channels, data_type, JXL_LITTLE_ENDIAN, 0};
+    const jxl::Span<const uint8_t> span(img_data.data(), img_data.size());
+    JXL_RETURN_IF_ERROR(ConvertFromExternal(
+        span, spec.width, spec.height, io.metadata.m.color_encoding,
+        io.metadata.m.bit_depth.bits_per_sample, format, nullptr, &ib));
+    io.frames.push_back(std::move(ib));
+  }
+
+  jxl::CompressParams params;
+  params.speed_tier = spec.params.speed_tier;
+
+  if (spec.is_reconstructible_jpeg) {
+    // If this image is supposed to be a reconstructible JPEG, collect the JPEG
+    // metadata and encode it in the beginning of the compressed bytes.
+    std::vector<uint8_t> jpeg_bytes;
+    io.jpeg_quality = 70;
+    JXL_QUIET_RETURN_IF_ERROR(jxl::Encode(io, jxl::extras::Codec::kJPG,
+                                          io.metadata.m.color_encoding,
+                                          /*bits_per_sample=*/8, &jpeg_bytes,
+                                          /*pool=*/nullptr));
+    JXL_RETURN_IF_ERROR(jxl::jpeg::DecodeImageJPG(
+        jxl::Span<const uint8_t>(jpeg_bytes.data(), jpeg_bytes.size()), &io));
+    jxl::PaddedBytes jpeg_data;
+    JXL_RETURN_IF_ERROR(
+        EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data, params));
+    std::vector<uint8_t> header;
+    header.insert(header.end(), jxl::kContainerHeader,
+                  jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+    jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                         &header);
+    header.insert(header.end(), jpeg_data.data(),
+                  jpeg_data.data() + jpeg_data.size());
+    jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &header);
+    compressed.append(header);
+  }
+
+  params.modular_mode = spec.params.modular_mode;
+  params.color_transform = spec.params.color_transform;
+  params.butteraugli_distance = spec.params.butteraugli_distance;
+  params.options.predictor = {spec.params.modular_predictor};
+  params.lossy_palette = spec.params.lossy_palette;
+  if (spec.params.preview) params.preview = jxl::Override::kOn;
+  if (spec.params.noise) params.noise = jxl::Override::kOn;
+
+  jxl::AuxOut aux_out;
+  jxl::PassesEncoderState passes_encoder_state;
+  // EncodeFile replaces output; pass a temporary storage for it.
+  jxl::PaddedBytes compressed_image;
+  bool ok =
+      jxl::EncodeFile(params, &io, &passes_encoder_state, &compressed_image,
+                      jxl::GetJxlCms(), &aux_out, nullptr);
+  if (!ok) return false;
+  compressed.append(compressed_image);
+
+  // Append 4 bytes with the flags used by djxl_fuzzer to select the decoding
+  // output.
+  std::uniform_int_distribution<> dis256(0, 255);
+  if (spec.override_decoder_spec == 0xFFFFFFFF) {
+    for (size_t i = 0; i < 4; ++i) compressed.push_back(dis256(mt));
+  } else {
+    for (size_t i = 0; i < 4; ++i) {
+      compressed.push_back(spec.override_decoder_spec >> (8 * i));
+    }
+  }
+
+  if (!jpegxl::tools::WriteFile(output_fn, compressed)) return 1;
+  if (!quiet) {
+    std::unique_lock<std::mutex> lock(stderr_mutex);
+    std::cerr << "Stored " << output_fn << " size: " << compressed.size()
+              << std::endl;
+  }
+
+  return true;
+}
+
+std::vector<ImageSpec::CjxlParams> CompressParamsList() {
+  std::vector<ImageSpec::CjxlParams> ret;
+
+  {
+    ImageSpec::CjxlParams params;
+    params.butteraugli_distance = 1.5;
+    ret.push_back(params);
+  }
+
+  {
+    // Lossless
+    ImageSpec::CjxlParams params;
+    params.modular_mode = true;
+    params.color_transform = jxl::ColorTransform::kNone;
+    params.butteraugli_distance = 0.f;
+    params.modular_predictor = {jxl::Predictor::Weighted};
+    ret.push_back(params);
+  }
+
+  return ret;
+}
+
+void Usage() {
+  fprintf(stderr,
+          "Use: fuzzer_corpus [-r] [-q] [-j THREADS] [output_dir]\n"
+          "\n"
+          "  -r Regenerate files if already exist.\n"
+          "  -q Be quiet.\n"
+          "  -j THREADS Number of parallel jobs to run.\n");
+}
+
+}  // namespace
+
+int main(int argc, const char** argv) {
+  const char* dest_dir = nullptr;
+  bool regenerate = false;
+  bool quiet = false;
+  size_t num_threads = std::thread::hardware_concurrency();
+  for (int optind = 1; optind < argc;) {
+    if (!strcmp(argv[optind], "-r")) {
+      regenerate = true;
+      optind++;
+    } else if (!strcmp(argv[optind], "-q")) {
+      quiet = true;
+      optind++;
+    } else if (!strcmp(argv[optind], "-j")) {
+      optind++;
+      if (optind < argc) {
+        num_threads = atoi(argv[optind++]);
+      } else {
+        fprintf(stderr, "-j needs an argument value.\n");
+        Usage();
+        return 1;
+      }
+    } else if (dest_dir == nullptr) {
+      dest_dir = argv[optind++];
+    } else {
+      fprintf(stderr, "Unknown parameter: \"%s\".\n", argv[optind]);
+      Usage();
+      return 1;
+    }
+  }
+  if (!dest_dir) {
+    dest_dir = "corpus";
+  }
+
+  struct stat st;
+  memset(&st, 0, sizeof(st));
+  if (stat(dest_dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
+    fprintf(stderr, "Output path \"%s\" is not a directory.\n", dest_dir);
+    Usage();
+    return 1;
+  }
+
+  // Create the corpus directory if doesn't already exist.
+  std::mt19937 mt(77777);
+
+  std::vector<std::pair<uint32_t, uint32_t>> image_sizes = {
+      {8, 8},
+      {32, 32},
+      {128, 128},
+      // Degenerated cases.
+      {10000, 1},
+      {10000, 2},
+      {1, 10000},
+      {2, 10000},
+      // Large case.
+      {555, 256},
+      {257, 513},
+  };
+  const std::vector<ImageSpec::CjxlParams> params_list = CompressParamsList();
+
+  ImageSpec spec;
+  // The ans_fuzzer_friendly setting is not thread safe and therefore done in
+  // an outer loop. This determines whether to use fuzzer-friendly ANS encoding.
+  for (uint32_t fuzzer_friendly = 0; fuzzer_friendly < 2; ++fuzzer_friendly) {
+    jxl::SetANSFuzzerFriendly(fuzzer_friendly);
+    spec.fuzzer_friendly = fuzzer_friendly;
+
+    std::vector<ImageSpec> specs;
+    for (auto img_size : image_sizes) {
+      spec.width = img_size.first;
+      spec.height = img_size.second;
+      for (uint32_t bit_depth : {1, 2, 8, 16}) {
+        spec.bit_depth = bit_depth;
+        for (uint32_t num_channels : {1, 3}) {
+          spec.num_channels = num_channels;
+          for (uint32_t alpha_bit_depth : {0, 8, 16}) {
+            spec.alpha_bit_depth = alpha_bit_depth;
+            if (bit_depth == 16 && alpha_bit_depth == 8) {
+              // This mode is not supported in CopyTo().
+              continue;
+            }
+            for (uint32_t num_frames : {1, 3}) {
+              spec.num_frames = num_frames;
+              for (uint32_t preview : {0, 1}) {
+                for (bool reconstructible_jpeg : {false, true}) {
+                  spec.is_reconstructible_jpeg = reconstructible_jpeg;
+                  for (const auto& params : params_list) {
+                    spec.params = params;
+
+                    spec.params.preview = preview;
+                    if (alpha_bit_depth) {
+                      spec.alpha_is_premultiplied = mt() % 2;
+                    }
+                    if (spec.width * spec.height > 1000) {
+                      // Increase the encoder speed for larger images.
+                      spec.params.speed_tier = jxl::SpeedTier::kWombat;
+                    }
+                    spec.seed = mt() % 777777;
+                    // Pick the orientation at random. It is orthogonal to all
+                    // other features. Valid values are 1 to 8.
+                    spec.orientation = 1 + (mt() % 8);
+                    if (!spec.Validate()) {
+                      if (!quiet) {
+                        std::cerr << "Skipping " << spec << std::endl;
+                      }
+                    } else {
+                      specs.push_back(spec);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    specs.emplace_back(ImageSpec());
+    specs.back().params.lossy_palette = true;
+    specs.back().override_decoder_spec = 0;
+
+    specs.emplace_back(ImageSpec());
+    specs.back().params.noise = true;
+    specs.back().override_decoder_spec = 0;
+
+    jpegxl::tools::ThreadPoolInternal pool{num_threads};
+    const auto generate = [&specs, dest_dir, regenerate, quiet](
+                              const uint32_t task, size_t /* thread */) {
+      const ImageSpec& spec = specs[task];
+      GenerateFile(dest_dir, spec, regenerate, quiet);
+    };
+    if (!RunOnPool(&pool, 0, specs.size(), jxl::ThreadPool::NoInit, generate,
+                   "FuzzerCorpus")) {
+      std::cerr << "Error generating fuzzer corpus" << std::endl;
+      return 1;
+    }
+  }
+  std::cerr << "Finished generating fuzzer corpus" << std::endl;
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/tools/djxl_fuzzer_test.cc b/third-party/libjxl/libjxl/tools/djxl_fuzzer_test.cc
new file mode 100644
index 0000000000..cb9597c667
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/djxl_fuzzer_test.cc
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+std::vector<uint64_t> AllTestIds() {
+  return {
+      4546077333782528, 4716049045520384, 4718378999218176, 4729306868219904,
+      4787817341911040, 4816304719134720, 4848606801166336, 4859247059402752,
+      4887504894951424, 4984529666834432, 5014934495297536, 5112097090961408,
+      5189497920290816, 5381727462227968, 5382562858532864, 5392074930782208,
+      5467620336336896, 5473482434019328, 5489367788945408, 5556400888086528,
+      5582808628723712, 5631220790198272, 5685623166468096, 5737500246671360,
+      5785438255710208, 5800733037953024, 5849986531721216, 5858549672050688,
+      5899664422993920, 5900921718046720, 5906295376445440, 5914266367557632,
+      6013780411154432, 6165169006313472, 6277573962760192, 6329817929220096,
+      6355777170833408, 6375307931680768, 6448658097242112, 6515680276512768,
+      6569981946494976, 6735607318052864, 6737321070821376, 6748486320652288,
+  };
+}
+
+class DjxlFuzzerTest : public ::testing::TestWithParam<uint64_t> {};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DjxlFuzzerTestInstantiation, DjxlFuzzerTest,
+                                   ::testing::ValuesIn(AllTestIds()));
+TEST_P(DjxlFuzzerTest, TestOne) {
+  uint64_t id = GetParam();
+  std::ostringstream os;
+  os << "oss-fuzz/clusterfuzz-testcase-minimized-djxl_fuzzer-" << id;
+  printf("Testing %s\n", os.str().c_str());
+  const jxl::PaddedBytes input = jxl::test::ReadTestData(os.str());
+  LLVMFuzzerTestOneInput(input.data(), input.size());
+}
diff --git a/third-party/libjxl/libjxl/tools/djxl_main.cc b/third-party/libjxl/libjxl/tools/djxl_main.cc
new file mode 100644
index 0000000000..d0b0a2267d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/djxl_main.cc
@@ -0,0 +1,522 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <jxl/types.h>
+
+#include <climits>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/enc/apng.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/enc/exr.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/enc/pnm.h"
+#include "lib/extras/packed_image.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "tools/cmdline.h"
+#include "tools/codec_config.h"
+#include "tools/file_io.h"
+#include "tools/speed_stats.h"
+
+namespace jpegxl {
+namespace tools {
+
+struct DecompressArgs {
+  DecompressArgs() = default;
+
+  void AddCommandLineOptions(CommandLineParser* cmdline) {
+    std::string output_help("The output format can be ");
+    if (jxl::extras::GetAPNGEncoder()) {
+      output_help.append("PNG, APNG, ");
+    }
+    if (jxl::extras::GetJPEGEncoder()) {
+      output_help.append("JPEG, ");
+    } else {
+      output_help.append("JPEG (lossless reconstruction only), ");
+    }
+    if (jxl::extras::GetEXREncoder()) {
+      output_help.append("EXR, ");
+    }
+    output_help.append(
+        "PPM, PFM, or PAM.\n"
+        "    To extract metadata, use output format EXIF, XMP, or JUMBF.\n"
+        "    The format is selected based on extension ('filename.png') or "
+        "prefix ('png:filename').\n"
+        "    Use '-' for output to stdout (e.g. 'ppm:-')");
+    cmdline->AddPositionalOption(
+        "INPUT", /* required = */ true,
+        "The compressed input file (JXL). Use '-' for input from stdin.",
+        &file_in);
+
+    cmdline->AddPositionalOption("OUTPUT", /* required = */ true, output_help,
+                                 &file_out);
+
+    cmdline->AddHelpText("\nBasic options:", 0);
+
+    cmdline->AddOptionFlag('V', "version", "Print version number and exit.",
+                           &version, &SetBooleanTrue, 0);
+    cmdline->AddOptionFlag('\0', "quiet", "Silence output (except for errors).",
+                           &quiet, &SetBooleanTrue, 0);
+    cmdline->AddOptionFlag('v', "verbose",
+                           "Verbose output; can be repeated and also applies "
+                           "to help (!).",
+                           &verbose, &SetBooleanTrue);
+
+    cmdline->AddHelpText("\nAdvanced options:", 1);
+
+    cmdline->AddOptionValue('\0', "num_threads", "N",
+                            "Number of worker threads (-1 == use machine "
+                            "default, 0 == do not use multithreading).",
+                            &num_threads, &ParseSigned, 1);
+
+    opt_bits_per_sample_id = cmdline->AddOptionValue(
+        '\0', "bits_per_sample", "N",
+        "Sets the output bit depth. The value 0 (default for PNM) "
+        "means the original (input) bit depth.\n"
+        "    The value -1 (default for other codecs) means it depends on the "
+        "output format capabilities\n"
+        "    and the input bit depth (e.g. decoding a 12-bit image to PNG will "
+        "produce a 16-bit PNG).",
+        &bits_per_sample, &ParseSigned, 1);
+
+    cmdline->AddOptionValue('\0', "display_nits", "N",
+                            "If set to a non-zero value, tone maps the image "
+                            "the given peak display luminance.",
+                            &display_nits, &ParseDouble, 1);
+
+    cmdline->AddOptionValue(
+        '\0', "color_space", "COLORSPACE_DESC",
+        "Sets the desired output color space of the image. For example:\n"
+        "      --color_space=RGB_D65_SRG_Per_SRG is sRGB with perceptual "
+        "rendering intent\n"
+        "      --color_space=RGB_D65_202_Rel_PeQ is Rec.2100 PQ with relative "
+        "rendering intent",
+        &color_space, &ParseString, 1);
+
+    cmdline->AddOptionValue('s', "downsampling", "1|2|4|8",
+                            "If the input JXL stream is contains hints for "
+                            "target downsampling ratios,\n"
+                            "    only decode what is needed to produce an "
+                            "image intended for this downsampling ratio.",
+                            &downsampling, &ParseUint32, 1);
+
+    cmdline->AddOptionFlag('\0', "allow_partial_files",
+                           "Allow decoding of truncated files.",
+                           &allow_partial_files, &SetBooleanTrue, 1);
+
+    if (jxl::extras::GetJPEGEncoder()) {
+      cmdline->AddOptionFlag(
+          'j', "pixels_to_jpeg",
+          "By default, if the input JXL is a recompressed JPEG file, "
+          "djxl reconstructs that JPEG file.\n"
+          "    This flag causes the decoder to instead decode to pixels and "
+          "encode a new (lossy) JPEG.",
+          &pixels_to_jpeg, &SetBooleanTrue, 1);
+
+      opt_jpeg_quality_id = cmdline->AddOptionValue(
+          'q', "jpeg_quality", "N",
+          "Sets the JPEG output quality, default is 95. "
+          "Setting this option implies --pixels_to_jpeg.",
+          &jpeg_quality, &ParseUnsigned, 1);
+    }
+
+    cmdline->AddHelpText("\nOptions for experimentation / benchmarking:", 2);
+
+    cmdline->AddOptionValue('\0', "num_reps", "N",
+                            "Sets the number of times to decompress the image. "
+                            "Useful for benchmarking. Default is 1.",
+                            &num_reps, &ParseUnsigned, 2);
+
+    cmdline->AddOptionFlag('\0', "disable_output",
+                           "No output file will be written (for benchmarking)",
+                           &disable_output, &SetBooleanTrue, 2);
+
+    cmdline->AddOptionFlag('\0', "use_sjpeg",
+                           "Use sjpeg instead of libjpeg for JPEG output.",
+                           &use_sjpeg, &SetBooleanTrue, 2);
+
+    cmdline->AddOptionFlag('\0', "norender_spotcolors",
+                           "Disables rendering of spot colors.",
+                           &render_spotcolors, &SetBooleanFalse, 2);
+
+    cmdline->AddOptionValue('\0', "preview_out", "FILENAME",
+                            "If specified, writes the preview image to this "
+                            "file.",
+                            &preview_out, &ParseString, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "icc_out", "FILENAME",
+        "If specified, writes the ICC profile of the decoded image to "
+        "this file.",
+        &icc_out, &ParseString, 2);
+
+    cmdline->AddOptionValue(
+        '\0', "orig_icc_out", "FILENAME",
+        "If specified, writes the ICC profile of the original image to "
+        "this file\n"
+        "    This can be different from the ICC profile of the "
+        "decoded image if --color_space was specified.",
+        &orig_icc_out, &ParseString, 2);
+
+    cmdline->AddOptionValue('\0', "metadata_out", "FILENAME",
+                            "If specified, writes metadata info to a JSON "
+                            "file. Used by the conformance test script",
+                            &metadata_out, &ParseString, 2);
+
+    cmdline->AddOptionFlag('\0', "print_read_bytes",
+                           "Print total number of decoded bytes.",
+                           &print_read_bytes, &SetBooleanTrue, 2);
+  }
+
+  // Validate the passed arguments, checking whether all passed options are
+  // compatible. Returns whether the validation was successful.
+  bool ValidateArgs(const CommandLineParser& cmdline) {
+    if (file_in == nullptr) {
+      fprintf(stderr, "Missing INPUT filename.\n");
+      return false;
+    }
+    if (num_threads < -1) {
+      fprintf(
+          stderr,
+          "Invalid flag value for --num_threads: must be -1, 0 or positive.\n");
+      return false;
+    }
+    return true;
+  }
+
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  bool version = false;
+  bool verbose = false;
+  size_t num_reps = 1;
+  bool disable_output = false;
+  int32_t num_threads = -1;
+  int bits_per_sample = -1;
+  double display_nits = 0.0;
+  std::string color_space;
+  uint32_t downsampling = 0;
+  bool allow_partial_files = false;
+  bool pixels_to_jpeg = false;
+  size_t jpeg_quality = 95;
+  bool use_sjpeg = false;
+  bool render_spotcolors = true;
+  std::string preview_out;
+  std::string icc_out;
+  std::string orig_icc_out;
+  std::string metadata_out;
+  bool print_read_bytes = false;
+  bool quiet = false;
+  // References (ids) of specific options to check if they were matched.
+  CommandLineParser::OptionId opt_bits_per_sample_id = -1;
+  CommandLineParser::OptionId opt_jpeg_quality_id = -1;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+namespace {
+
+bool WriteOptionalOutput(const std::string& filename,
+                         const std::vector<uint8_t>& bytes) {
+  if (filename.empty() || bytes.empty()) {
+    return true;
+  }
+  return jpegxl::tools::WriteFile(filename, bytes);
+}
+
+std::string Filename(const std::string& base, const std::string& extension,
+                     int layer_index, int frame_index, int num_layers,
+                     int num_frames) {
+  if (base == "-") return "-";
+  auto digits = [](int n) { return 1 + static_cast<int>(std::log10(n)); };
+  std::string out = base;
+  if (num_frames > 1) {
+    std::vector<char> buf(2 + digits(num_frames));
+    snprintf(buf.data(), buf.size(), "-%0*d", digits(num_frames), frame_index);
+    out.append(buf.data());
+  }
+  if (num_layers > 1) {
+    std::vector<char> buf(4 + digits(num_layers));
+    snprintf(buf.data(), buf.size(), "-ec%0*d", digits(num_layers),
+             layer_index);
+    out.append(buf.data());
+  }
+  if (extension == ".ppm" && layer_index > 0) {
+    out.append(".pgm");
+  } else {
+    out.append(extension);
+  }
+  return out;
+}
+
+bool DecompressJxlReconstructJPEG(const jpegxl::tools::DecompressArgs& args,
+                                  const std::vector<uint8_t>& compressed,
+                                  void* runner,
+                                  std::vector<uint8_t>* jpeg_bytes,
+                                  jpegxl::tools::SpeedStats* stats) {
+  const double t0 = jxl::Now();
+  jxl::extras::PackedPixelFile ppf;  // for JxlBasicInfo
+  jxl::extras::JXLDecompressParams dparams;
+  dparams.allow_partial_input = args.allow_partial_files;
+  dparams.runner = JxlThreadParallelRunner;
+  dparams.runner_opaque = runner;
+  if (!jxl::extras::DecodeImageJXL(compressed.data(), compressed.size(),
+                                   dparams, nullptr, &ppf, jpeg_bytes)) {
+    return false;
+  }
+  const double t1 = jxl::Now();
+  if (stats) {
+    stats->NotifyElapsed(t1 - t0);
+    stats->SetImageSize(ppf.info.xsize, ppf.info.ysize);
+    stats->SetFileSize(jpeg_bytes->size());
+  }
+  return true;
+}
+
+bool DecompressJxlToPackedPixelFile(
+    const jpegxl::tools::DecompressArgs& args,
+    const std::vector<uint8_t>& compressed,
+    const std::vector<JxlPixelFormat>& accepted_formats, void* runner,
+    jxl::extras::PackedPixelFile* ppf, size_t* decoded_bytes,
+    jpegxl::tools::SpeedStats* stats) {
+  jxl::extras::JXLDecompressParams dparams;
+  dparams.max_downsampling = args.downsampling;
+  dparams.accepted_formats = accepted_formats;
+  dparams.display_nits = args.display_nits;
+  dparams.color_space = args.color_space;
+  dparams.render_spotcolors = args.render_spotcolors;
+  dparams.runner = JxlThreadParallelRunner;
+  dparams.runner_opaque = runner;
+  dparams.allow_partial_input = args.allow_partial_files;
+  dparams.need_icc = !args.icc_out.empty();
+  if (args.bits_per_sample == 0) {
+    dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+  } else if (args.bits_per_sample > 0) {
+    dparams.output_bitdepth.type = JXL_BIT_DEPTH_CUSTOM;
+    dparams.output_bitdepth.bits_per_sample = args.bits_per_sample;
+  }
+  const double t0 = jxl::Now();
+  if (!jxl::extras::DecodeImageJXL(compressed.data(), compressed.size(),
+                                   dparams, decoded_bytes, ppf)) {
+    return false;
+  }
+  const double t1 = jxl::Now();
+  if (stats) {
+    stats->NotifyElapsed(t1 - t0);
+    stats->SetImageSize(ppf->info.xsize, ppf->info.ysize);
+  }
+  return true;
+}
+
+}  // namespace
+
+int main(int argc, const char* argv[]) {
+  std::string version = jpegxl::tools::CodecConfigString(JxlDecoderVersion());
+  jpegxl::tools::DecompressArgs args;
+  jpegxl::tools::CommandLineParser cmdline;
+  args.AddCommandLineOptions(&cmdline);
+
+  if (!cmdline.Parse(argc, argv)) {
+    // Parse already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+
+  if (args.version) {
+    fprintf(stdout, "djxl %s\n", version.c_str());
+    fprintf(stdout, "Copyright (c) the JPEG XL Project\n");
+    return EXIT_SUCCESS;
+  }
+  if (!args.quiet) {
+    fprintf(stderr, "JPEG XL decoder %s\n", version.c_str());
+  }
+
+  if (cmdline.HelpFlagPassed() || !args.file_in) {
+    cmdline.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!args.ValidateArgs(cmdline)) {
+    // ValidateArgs already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+
+  std::vector<uint8_t> compressed;
+  // Reading compressed JPEG XL input
+  if (!jpegxl::tools::ReadFile(args.file_in, &compressed)) {
+    fprintf(stderr, "couldn't load %s\n", args.file_in);
+    return EXIT_FAILURE;
+  }
+  if (!args.quiet) {
+    cmdline.VerbosePrintf(1, "Read %" PRIuS " compressed bytes.\n",
+                          compressed.size());
+  }
+
+  if (!args.file_out && !args.disable_output) {
+    std::cerr
+        << "No output file specified and --disable_output flag not passed."
+        << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  if (args.file_out && args.disable_output && !args.quiet) {
+    fprintf(stderr,
+            "Decoding will be performed, but the result will be discarded.\n");
+  }
+
+  std::string filename_out;
+  std::string base;
+  std::string extension;
+  jxl::extras::Codec codec = jxl::extras::Codec::kUnknown;
+  if (args.file_out && !args.disable_output) {
+    filename_out = std::string(args.file_out);
+    codec = jxl::extras::CodecFromPath(
+        filename_out, /* bits_per_sample */ nullptr, &base, &extension);
+  }
+  if (codec == jxl::extras::Codec::kEXR) {
+    std::string force_colorspace = "RGB_D65_SRG_Rel_Lin";
+    if (!args.color_space.empty() && args.color_space != force_colorspace) {
+      fprintf(stderr, "Warning: colorspace ignored for EXR output\n");
+    }
+    args.color_space = force_colorspace;
+  }
+  if (codec == jxl::extras::Codec::kPNM && extension != ".pfm" &&
+      (args.opt_jpeg_quality_id < 0 ||
+       !cmdline.GetOption(args.opt_jpeg_quality_id)->matched())) {
+    args.bits_per_sample = 0;
+  }
+
+  jpegxl::tools::SpeedStats stats;
+  size_t num_worker_threads = JxlThreadParallelRunnerDefaultNumWorkerThreads();
+  {
+    int64_t flag_num_worker_threads = args.num_threads;
+    if (flag_num_worker_threads > -1) {
+      num_worker_threads = flag_num_worker_threads;
+    }
+  }
+  auto runner = JxlThreadParallelRunnerMake(
+      /*memory_manager=*/nullptr, num_worker_threads);
+
+  bool decode_to_pixels = (codec != jxl::extras::Codec::kJPG);
+  if (args.opt_jpeg_quality_id >= 0 &&
+      (args.pixels_to_jpeg ||
+       cmdline.GetOption(args.opt_jpeg_quality_id)->matched())) {
+    decode_to_pixels = true;
+  }
+
+  size_t num_reps = args.num_reps;
+  if (!decode_to_pixels) {
+    std::vector<uint8_t> bytes;
+    for (size_t i = 0; i < num_reps; ++i) {
+      if (!DecompressJxlReconstructJPEG(args, compressed, runner.get(), &bytes,
+                                        &stats)) {
+        if (bytes.empty()) {
+          if (!args.quiet) {
+            fprintf(stderr,
+                    "Warning: could not decode losslessly to JPEG. Retrying "
+                    "with --pixels_to_jpeg...\n");
+          }
+          decode_to_pixels = true;
+          break;
+        }
+        return EXIT_FAILURE;
+      }
+    }
+    if (!bytes.empty()) {
+      if (!args.quiet) cmdline.VerbosePrintf(0, "Reconstructed to JPEG.\n");
+      if (!filename_out.empty() &&
+          !jpegxl::tools::WriteFile(base == "-" ? "-" : filename_out.c_str(),
+                                    bytes)) {
+        return EXIT_FAILURE;
+      }
+    }
+  }
+  if (decode_to_pixels) {
+    std::vector<JxlPixelFormat> accepted_formats;
+    std::unique_ptr<jxl::extras::Encoder> encoder;
+    if (!filename_out.empty()) {
+      encoder = jxl::extras::Encoder::FromExtension(extension);
+      if (encoder == nullptr) {
+        fprintf(stderr, "can't decode to the file extension '%s'\n",
+                extension.c_str());
+        return EXIT_FAILURE;
+      }
+      accepted_formats = encoder->AcceptedFormats();
+    }
+    jxl::extras::PackedPixelFile ppf;
+    size_t decoded_bytes = 0;
+    for (size_t i = 0; i < num_reps; ++i) {
+      if (!DecompressJxlToPackedPixelFile(args, compressed, accepted_formats,
+                                          runner.get(), &ppf, &decoded_bytes,
+                                          &stats)) {
+        fprintf(stderr, "DecompressJxlToPackedPixelFile failed\n");
+        return EXIT_FAILURE;
+      }
+    }
+    if (!args.quiet) cmdline.VerbosePrintf(0, "Decoded to pixels.\n");
+    if (args.print_read_bytes) {
+      fprintf(stderr, "Decoded bytes: %" PRIuS "\n", decoded_bytes);
+    }
+    if (encoder) {
+      std::ostringstream os;
+      os << args.jpeg_quality;
+      encoder->SetOption("q", os.str());
+    }
+    if (encoder && args.use_sjpeg) {
+      encoder->SetOption("jpeg_encoder", "sjpeg");
+    }
+    jxl::extras::EncodedImage encoded_image;
+    if (encoder) {
+      if (!args.quiet) cmdline.VerbosePrintf(2, "Encoding decoded image\n");
+      if (!encoder->Encode(ppf, &encoded_image)) {
+        fprintf(stderr, "Encode failed\n");
+        return EXIT_FAILURE;
+      }
+    }
+    size_t nlayers = 1 + encoded_image.extra_channel_bitstreams.size();
+    size_t nframes = encoded_image.bitstreams.size();
+    for (size_t i = 0; i < nlayers; ++i) {
+      for (size_t j = 0; j < nframes; ++j) {
+        const std::vector<uint8_t>& bitstream =
+            (i == 0 ? encoded_image.bitstreams[j]
+                    : encoded_image.extra_channel_bitstreams[i - 1][j]);
+        std::string fn = Filename(base, extension, i, j, nlayers, nframes);
+        if (!jpegxl::tools::WriteFile(fn.c_str(), bitstream)) {
+          return EXIT_FAILURE;
+        }
+        if (!args.quiet)
+          cmdline.VerbosePrintf(1, "Wrote output to %s\n", fn.c_str());
+      }
+    }
+    if (!WriteOptionalOutput(args.preview_out,
+                             encoded_image.preview_bitstream) ||
+        !WriteOptionalOutput(args.icc_out, ppf.icc) ||
+        !WriteOptionalOutput(args.orig_icc_out, ppf.orig_icc) ||
+        !WriteOptionalOutput(args.metadata_out, encoded_image.metadata)) {
+      return EXIT_FAILURE;
+    }
+  }
+  if (!args.quiet) {
+    stats.Print(num_worker_threads);
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/third-party/libjxl/libjxl/tools/example_tree.txt b/third-party/libjxl/libjxl/tools/example_tree.txt
new file mode 100644
index 0000000000..c4df6d4089
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/example_tree.txt
@@ -0,0 +1,50 @@
+RCT 1          /* YCoCg */
+GroupShift 3   /* Group size is 128 << 3 == 1024 */
+Width 1024
+Height 1024
+Bitdepth 8
+/* FloatExpBits 3 */
+/* Alpha */
+/* Squeeze */
+/* XYB */
+/* CbYCr */
+
+
+if c > 0
+  /* Co, Cg: diagonal stripes */
+  if W > 50
+    - Set -50
+    - W + 5
+  /* Y: elementary cellular automaton */
+  if y > 0
+    if N > 0
+      if NW-N > -1
+        if N-NE > 0
+          - Set 0
+          - Set 255
+        if N-NE > 0
+          - Set 255
+          - Set 0
+      if NW-N > 0
+        if N-NE > -1
+          - Set 255
+          - Set 0
+        if N-NE > -1
+          - Set 0
+          - Set 255
+   /* First row initialization */
+   if x > 511
+     - Set 255
+     - Set 0
+
+Everything after the end of the tree is ignored.
+
+The tree above represents a cellular automaton on a subtly striped background.
+
+
+
+List of properties: c, g, y, x, |N|, |W|, N, W, W-WW-NW+NWW, W+N-NW, W-NW, NW-N, N-NE, N-NN, W-WW, WGH,
+                    PrevAbs, Prev, PrevAbsErr, PrevErr, PPrevAbs, PPrev, PPrevAbsErr, PPrevErr
+
+List of predictors: Set, W, N, AvgW+N, Select, Gradient, Weighted, NE, NW, WW, AvgW+NW, AvgN+NW, AvgN+NE, AvgAll
+
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/.gitignore b/third-party/libjxl/libjxl/tools/fast_lossless/.gitignore
new file mode 100644
index 0000000000..567609b123
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/README.md b/third-party/libjxl/libjxl/tools/fast_lossless/README.md
new file mode 100644
index 0000000000..5f99c133d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/README.md
@@ -0,0 +1,10 @@
+# Fast-lossless
+This is a script to compile a standalone version of a JXL encoder that supports
+lossless compression, up to 16 bits, of 1- to 4-channel images and animations; it is
+very fast and compression is slightly worse than PNG for 8-bit nonphoto content
+and better or much better than PNG for all other situations.
+
+The main encoder is made out of two files, `lib/jxl/enc_fast_lossless.{cc,h}`;
+it automatically selects and runs a SIMD implementation supported by your CPU.
+
+This folder contains an example build script and `main` file.
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/build-android.sh b/third-party/libjxl/libjxl/tools/fast_lossless/build-android.sh
new file mode 100755
index 0000000000..c155b2169a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/build-android.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+
+mkdir -p /tmp/build-android
+cd /tmp/build-android
+
+CXX="$ANDROID_NDK"/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android30-clang++
+if ! command -v "$CXX" >/dev/null ; then
+  printf >&2 '%s: Android C++ compiler not found, is ANDROID_NDK set properly?\n' "${0##*/}"
+  exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 \
+  -I. lodepng.o \
+  -I"${DIR}"/../../ \
+  "${DIR}"/../../lib/jxl/enc_fast_lossless.cc "${DIR}"/fast_lossless_main.cc \
+  -o fast_lossless
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/build.sh b/third-party/libjxl/libjxl/tools/fast_lossless/build.sh
new file mode 100755
index 0000000000..e2c0aa3fd0
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+mkdir -p "$DIR"/build
+cd "$DIR"/build
+
+# set CXX to clang++ if not set in the environment
+CXX="${CXX-clang++}"
+if ! command -v "$CXX" >/dev/null ; then
+  printf >&2 '%s: C++ compiler not found\n' "${0##*/}"
+  exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 \
+  -I. -g lodepng.o \
+  -I"$DIR"/../../ \
+  "$DIR"/../../lib/jxl/enc_fast_lossless.cc "$DIR"/fast_lossless_main.cc \
+  -o fast_lossless
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/cross_compile_aarch64.sh b/third-party/libjxl/libjxl/tools/fast_lossless/cross_compile_aarch64.sh
new file mode 100755
index 0000000000..a5e6aa2a52
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/cross_compile_aarch64.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+mkdir -p "$DIR"/build-aarch64
+cd "$DIR"/build-aarch64
+
+CXX="${CXX-aarch64-linux-gnu-c++}"
+if ! command -v "$CXX" >/dev/null ; then
+  printf >&2 '%s: C++ compiler not found\n' "${0##*/}"
+  exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 -static \
+  -I. lodepng.o \
+  -I"$DIR"/../../ \
+  "$DIR"/../../lib/jxl/enc_fast_lossless.cc "$DIR"/fast_lossless_main.cc \
+  -o fast_lossless
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/fast_lossless_main.cc b/third-party/libjxl/libjxl/tools/fast_lossless/fast_lossless_main.cc
new file mode 100644
index 0000000000..b59051d4e2
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/fast_lossless_main.cc
@@ -0,0 +1,113 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lodepng.h"
+#include "pam-input.h"
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    fprintf(stderr,
+            "Usage: %s in.png out.jxl [effort] [num_reps] [num_threads]\n",
+            argv[0]);
+    return 1;
+  }
+
+  const char* in = argv[1];
+  const char* out = argv[2];
+  int effort = argc >= 4 ? atoi(argv[3]) : 2;
+  size_t num_reps = argc >= 5 ? atoi(argv[4]) : 1;
+  size_t num_threads = argc >= 6 ? atoi(argv[5]) : 0;
+
+  if (effort < 0 || effort > 127) {
+    fprintf(
+        stderr,
+        "Effort should be between 0 and 127 (default is 2, more is slower)\n");
+    return 1;
+  }
+
+  unsigned char* png;
+  unsigned w, h;
+  size_t nb_chans = 4, bitdepth = 8;
+
+  unsigned error = lodepng_decode32_file(&png, &w, &h, in);
+
+  size_t width = w, height = h;
+  if (error && !DecodePAM(in, &png, &width, &height, &nb_chans, &bitdepth)) {
+    fprintf(stderr, "lodepng error %u: %s\n", error, lodepng_error_text(error));
+    return 1;
+  }
+
+  auto parallel_runner = [](void* num_threads_ptr, void* opaque,
+                            void fun(void*, size_t), size_t count) {
+    size_t num_threads = *(size_t*)num_threads_ptr;
+    if (num_threads == 0) {
+      num_threads = std::thread::hardware_concurrency();
+    }
+    if (num_threads > count) {
+      num_threads = count;
+    }
+    if (num_threads == 1) {
+      for (size_t i = 0; i < count; i++) {
+        fun(opaque, i);
+      }
+    } else {
+      std::atomic<int> task{0};
+      std::vector<std::thread> threads;
+      for (size_t i = 0; i < num_threads; i++) {
+        threads.push_back(std::thread([count, opaque, fun, &task]() {
+          while (true) {
+            int t = task++;
+            if (t >= count) break;
+            fun(opaque, t);
+          }
+        }));
+      }
+      for (auto& t : threads) t.join();
+    }
+  };
+
+  size_t encoded_size = 0;
+  unsigned char* encoded = nullptr;
+  size_t stride = width * nb_chans * (bitdepth > 8 ? 2 : 1);
+
+  auto start = std::chrono::high_resolution_clock::now();
+  for (size_t _ = 0; _ < num_reps; _++) {
+    free(encoded);
+    encoded_size = JxlFastLosslessEncode(
+        png, width, stride, height, nb_chans, bitdepth,
+        /*big_endian=*/true, effort, &encoded, &num_threads, +parallel_runner);
+  }
+  auto stop = std::chrono::high_resolution_clock::now();
+  if (num_reps > 1) {
+    float us =
+        std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
+            .count();
+    size_t pixels = size_t{width} * size_t{height} * num_reps;
+    float mps = pixels / us;
+    fprintf(stderr, "%10.3f MP/s\n", mps);
+    fprintf(stderr, "%10.3f bits/pixel\n",
+            encoded_size * 8.0 / float(width) / float(height));
+  }
+
+  FILE* o = fopen(out, "wb");
+  if (!o) {
+    fprintf(stderr, "error opening %s: %s\n", out, strerror(errno));
+    return 1;
+  }
+  if (fwrite(encoded, 1, encoded_size, o) != encoded_size) {
+    fprintf(stderr, "error writing to %s: %s\n", out, strerror(errno));
+  }
+  fclose(o);
+}
diff --git a/third-party/libjxl/libjxl/tools/fast_lossless/pam-input.h b/third-party/libjxl/libjxl/tools/fast_lossless/pam-input.h
new file mode 100644
index 0000000000..b5a0233028
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fast_lossless/pam-input.h
@@ -0,0 +1,292 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+bool error_msg(const char* message) {
+  fprintf(stderr, "%s\n", message);
+  return false;
+}
+#define return_on_error(X) \
+  if (!X) return false;
+
+size_t Log2(uint32_t value) { return 31 - __builtin_clz(value); }
+
+struct HeaderPNM {
+  size_t xsize;
+  size_t ysize;
+  bool is_gray;    // PGM
+  bool has_alpha;  // PAM
+  size_t bits_per_sample;
+};
+
+class Parser {
+ public:
+  explicit Parser(uint8_t* data, size_t length)
+      : pos_(data), end_(data + length) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  bool ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+    // codec.cc ensures we have at least two bytes => no range check here.
+    if (pos_[0] != 'P') return false;
+    const uint8_t type = pos_[1];
+    pos_ += 2;
+
+    switch (type) {
+      case '5':
+        header->is_gray = true;
+        return ParseHeaderPNM(header, pos);
+
+      case '6':
+        header->is_gray = false;
+        return ParseHeaderPNM(header, pos);
+
+      case '7':
+        return ParseHeaderPAM(header, pos);
+    }
+    return false;
+  }
+
+  // Exposed for testing
+  bool ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return error_msg("PNM: reached end before number");
+    if (!IsDigit(*pos_)) return error_msg("PNM: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+  bool ParseSigned(double* number) {
+    if (pos_ == end_) return error_msg("PNM: reached end before signed");
+
+    if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+      return error_msg("PNM: expected signed number");
+    }
+
+    // Skip sign
+    const bool is_neg = *pos_ == '-';
+    if (is_neg || *pos_ == '+') {
+      ++pos_;
+      if (pos_ == end_) return error_msg("PNM: reached end before digits");
+    }
+
+    // Leading digits
+    *number = 0.0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    // Decimal places?
+    if (pos_ < end_ && *pos_ == '.') {
+      ++pos_;
+      double place = 0.1;
+      while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+        *number += (*pos_ - '0') * place;
+        place *= 0.1;
+        ++pos_;
+      }
+    }
+
+    if (is_neg) *number = -*number;
+    return true;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  bool SkipBlank() {
+    if (pos_ == end_) return error_msg("PNM: reached end before blank");
+    const uint8_t c = *pos_;
+    if (c != ' ' && c != '\n') return error_msg("PNM: expected blank");
+    ++pos_;
+    return true;
+  }
+
+  bool SkipSingleWhitespace() {
+    if (pos_ == end_) return error_msg("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return error_msg("PNM: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  bool SkipWhitespace() {
+    if (pos_ == end_) return error_msg("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_) && *pos_ != '#') {
+      return error_msg("PNM: expected whitespace/comment");
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+
+    // Comment(s)
+    while (pos_ != end_ && *pos_ == '#') {
+      while (pos_ != end_ && !IsLineBreak(*pos_)) {
+        ++pos_;
+      }
+      // Newline(s)
+      while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+    return true;
+  }
+
+  bool MatchString(const char* keyword) {
+    const uint8_t* ppos = pos_;
+    while (*keyword) {
+      if (ppos >= end_) return error_msg("PAM: unexpected end of input");
+      if (*keyword != *ppos) return false;
+      ppos++;
+      keyword++;
+    }
+    pos_ = ppos;
+    return_on_error(SkipWhitespace());
+    return true;
+  }
+
+  bool ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) {
+    size_t num_channels = 3;
+    size_t max_val = 255;
+    while (!MatchString("ENDHDR")) {
+      return_on_error(SkipWhitespace());
+      if (MatchString("WIDTH")) {
+        return_on_error(ParseUnsigned(&header->xsize));
+      } else if (MatchString("HEIGHT")) {
+        return_on_error(ParseUnsigned(&header->ysize));
+      } else if (MatchString("DEPTH")) {
+        return_on_error(ParseUnsigned(&num_channels));
+      } else if (MatchString("MAXVAL")) {
+        return_on_error(ParseUnsigned(&max_val));
+      } else if (MatchString("TUPLTYPE")) {
+        if (MatchString("RGB_ALPHA")) {
+          header->has_alpha = true;
+        } else if (MatchString("RGB")) {
+        } else if (MatchString("GRAYSCALE_ALPHA")) {
+          header->has_alpha = true;
+          header->is_gray = true;
+        } else if (MatchString("GRAYSCALE")) {
+          header->is_gray = true;
+        } else if (MatchString("BLACKANDWHITE_ALPHA")) {
+          header->has_alpha = true;
+          header->is_gray = true;
+          max_val = 1;
+        } else if (MatchString("BLACKANDWHITE")) {
+          header->is_gray = true;
+          max_val = 1;
+        } else {
+          return error_msg("PAM: unknown TUPLTYPE");
+        }
+      } else {
+        return error_msg("PAM: unknown header keyword");
+      }
+    }
+    if (num_channels !=
+        (header->has_alpha ? 1 : 0) + (header->is_gray ? 1 : 3)) {
+      return error_msg("PAM: bad DEPTH");
+    }
+    if (max_val == 0 || max_val >= 65536) {
+      return error_msg("PAM: bad MAXVAL");
+    }
+    header->bits_per_sample = Log2(max_val + 1);
+
+    *pos = pos_;
+    return true;
+  }
+
+  bool ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+    return_on_error(SkipWhitespace());
+    return_on_error(ParseUnsigned(&header->xsize));
+
+    return_on_error(SkipWhitespace());
+    return_on_error(ParseUnsigned(&header->ysize));
+
+    return_on_error(SkipWhitespace());
+    size_t max_val;
+    return_on_error(ParseUnsigned(&max_val));
+    if (max_val == 0 || max_val >= 65536) {
+      return error_msg("PNM: bad MaxVal");
+    }
+    header->bits_per_sample = Log2(max_val + 1);
+
+    return_on_error(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+bool load_file(unsigned char** out, size_t* outsize, const char* filename) {
+  FILE* file;
+  file = fopen(filename, "rb");
+  if (!file) return false;
+  if (fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return false;
+  }
+  *outsize = ftell(file);
+  if (*outsize == LONG_MAX || *outsize < 9 || fseek(file, 0, SEEK_SET)) {
+    fclose(file);
+    return false;
+  }
+  *out = (unsigned char*)malloc(*outsize);
+  if (!(*out)) {
+    fclose(file);
+    return false;
+  }
+  size_t readsize;
+  readsize = fread(*out, 1, *outsize, file);
+  fclose(file);
+  if (readsize != *outsize) return false;
+  return true;
+}
+
+bool DecodePAM(const char* filename, uint8_t** buffer, size_t* w, size_t* h,
+               size_t* nb_chans, size_t* bitdepth) {
+  unsigned char* in_file;
+  size_t in_size;
+  if (!load_file(&in_file, &in_size, filename))
+    return error_msg("Could not read input file");
+  Parser parser(in_file, in_size);
+  HeaderPNM header = {};
+  const uint8_t* pos = nullptr;
+  if (!parser.ParseHeader(&header, &pos)) return false;
+
+  if (header.bits_per_sample == 0 || header.bits_per_sample > 16) {
+    return error_msg("PNM: bits_per_sample invalid (can do at most 16-bit)");
+  }
+  *w = header.xsize;
+  *h = header.ysize;
+  *bitdepth = header.bits_per_sample;
+  *nb_chans = (header.is_gray ? 1 : 3) + (header.has_alpha ? 1 : 0);
+
+  size_t pnm_remaining_size = in_file + in_size - pos;
+  size_t buffer_size = *w * *h * *nb_chans * (*bitdepth > 8 ? 2 : 1);
+  if (pnm_remaining_size < buffer_size) {
+    return error_msg("PNM file too small");
+  }
+  *buffer = (uint8_t*)malloc(buffer_size);
+  memcpy(*buffer, pos, buffer_size);
+  return true;
+}
diff --git a/third-party/libjxl/libjxl/tools/fields_fuzzer.cc b/third-party/libjxl/libjxl/tools/fields_fuzzer.cc
new file mode 100644
index 0000000000..6023414956
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fields_fuzzer.cc
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::BitReader;
+using ::jxl::CodecMetadata;
+using ::jxl::CustomTransformData;
+using ::jxl::ImageMetadata;
+using ::jxl::SizeHeader;
+using ::jxl::Span;
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  // Global parameters used by some headers.
+  CodecMetadata codec_metadata;
+
+  // First byte controls which header to parse.
+  if (size == 0) return 0;
+  BitReader reader(Span<const uint8_t>(data + 1, size - 1));
+#define FUZZER_CASE_HEADER(number, classname, ...) \
+  case number: {                                   \
+    ::jxl::classname header{__VA_ARGS__};          \
+    (void)jxl::Bundle::Read(&reader, &header);     \
+    break;                                         \
+  }
+  switch (data[0]) {
+    case 0: {
+      SizeHeader size_header;
+      (void)jxl::ReadSizeHeader(&reader, &size_header);
+      break;
+    }
+
+    case 1: {
+      ImageMetadata metadata;
+      (void)jxl::ReadImageMetadata(&reader, &metadata);
+      break;
+    }
+
+      FUZZER_CASE_HEADER(2, FrameHeader, &codec_metadata)
+      FUZZER_CASE_HEADER(3, jpeg::JPEGData)
+      FUZZER_CASE_HEADER(4, AnimationFrame, &codec_metadata)
+      FUZZER_CASE_HEADER(5, AnimationHeader)
+      FUZZER_CASE_HEADER(6, BitDepth)
+      FUZZER_CASE_HEADER(7, BlendingInfo)
+      FUZZER_CASE_HEADER(8, ColorEncoding)
+      FUZZER_CASE_HEADER(9, CustomTransferFunction)
+      FUZZER_CASE_HEADER(10, Customxy)
+      FUZZER_CASE_HEADER(11, ExtraChannelInfo)
+      FUZZER_CASE_HEADER(12, GroupHeader)
+      FUZZER_CASE_HEADER(13, weighted::Header)
+      FUZZER_CASE_HEADER(14, LoopFilter)
+      FUZZER_CASE_HEADER(15, LZ77Params)
+      FUZZER_CASE_HEADER(16, OpsinInverseMatrix)
+      FUZZER_CASE_HEADER(17, Passes)
+      FUZZER_CASE_HEADER(18, PreviewHeader)
+      FUZZER_CASE_HEADER(19, QuantizerParams)
+      FUZZER_CASE_HEADER(20, SqueezeParams)
+      FUZZER_CASE_HEADER(21, ToneMapping)
+      FUZZER_CASE_HEADER(22, Transform)
+      FUZZER_CASE_HEADER(23, YCbCrChromaSubsampling)
+
+    default: {
+      CustomTransformData transform_data;
+      transform_data.nonserialized_xyb_encoded = true;
+      (void)jxl::Bundle::Read(&reader, &transform_data);
+      break;
+    }
+  }
+  (void)reader.Close();
+
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/file_io.h b/third-party/libjxl/libjxl/tools/file_io.h
new file mode 100644
index 0000000000..740df61534
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/file_io.h
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_FILE_IO_H_
+#define TOOLS_FILE_IO_H_
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include <list>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+// RAII, ensures files are closed even when returning early.
+class FileWrapper {
+ public:
+  FileWrapper(const FileWrapper& other) = delete;
+  FileWrapper& operator=(const FileWrapper& other) = delete;
+
+  explicit FileWrapper(const std::string& pathname, const char* mode)
+      : file_(pathname == "-" ? (mode[0] == 'r' ? stdin : stdout)
+                              : fopen(pathname.c_str(), mode)),
+        close_on_delete_(pathname != "-") {
+#ifdef _WIN32
+    struct __stat64 s = {};
+    const int err = _stat64(pathname.c_str(), &s);
+    const bool is_file = (s.st_mode & S_IFREG) != 0;
+#else
+    struct stat s = {};
+    const int err = stat(pathname.c_str(), &s);
+    const bool is_file = S_ISREG(s.st_mode);
+#endif
+    if (err == 0 && is_file) {
+      size_ = s.st_size;
+    }
+  }
+
+  ~FileWrapper() {
+    if (file_ != nullptr && close_on_delete_) {
+      const int err = fclose(file_);
+      if (err) {
+        fprintf(stderr,
+                "Could not close file\n"
+                "Error: %s",
+                strerror(errno));
+      }
+    }
+  }
+
+  // We intend to use FileWrapper as a replacement of FILE.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  operator FILE*() const { return file_; }
+
+  int64_t size() { return size_; }
+
+ private:
+  FILE* const file_;
+  bool close_on_delete_ = true;
+  int64_t size_ = -1;
+};
+
+}  // namespace
+
+template <typename ContainerType>
+static inline bool ReadFile(const std::string& filename,
+                            ContainerType* JXL_RESTRICT bytes) {
+  FileWrapper f(filename, "rb");
+
+  if (!f) return false;
+
+  // Get size of file in bytes
+  const int64_t size = f.size();
+  if (size < 0) {
+    // Size is unknown, loop reading chunks until EOF.
+    bytes->clear();
+    std::list<std::vector<uint8_t>> chunks;
+
+    size_t total_size = 0;
+    while (true) {
+      std::vector<uint8_t> chunk(16 * 1024);
+      const size_t bytes_read = fread(chunk.data(), 1, chunk.size(), f);
+      if (ferror(f) || bytes_read > chunk.size()) {
+        return false;
+      }
+
+      chunk.resize(bytes_read);
+      total_size += bytes_read;
+      if (bytes_read != 0) {
+        chunks.emplace_back(std::move(chunk));
+      }
+      if (feof(f)) {
+        break;
+      }
+    }
+    bytes->resize(total_size);
+    size_t pos = 0;
+    for (const auto& chunk : chunks) {
+      memcpy(bytes->data() + pos, chunk.data(), chunk.size());
+      pos += chunk.size();
+    }
+  } else {
+    // Size is known, read the file directly.
+    bytes->resize(static_cast<size_t>(size));
+
+    const size_t bytes_read = fread(bytes->data(), 1, bytes->size(), f);
+    if (bytes_read != static_cast<size_t>(size)) return false;
+  }
+
+  return true;
+}
+
+template <typename ContainerType>
+static inline bool WriteFile(const std::string& filename,
+                             const ContainerType& bytes) {
+  FileWrapper file(filename, "wb");
+  if (!file) {
+    fprintf(stderr,
+            "Could not open %s for writing\n"
+            "Error: %s",
+            filename.c_str(), strerror(errno));
+    return false;
+  }
+  if (fwrite(bytes.data(), 1, bytes.size(), file) != bytes.size()) {
+    fprintf(stderr,
+            "Could not write to file\n"
+            "Error: %s",
+            strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_FILE_IO_H_
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/CMakeLists.txt b/third-party/libjxl/libjxl/tools/flicker_test/CMakeLists.txt
new file mode 100644
index 0000000000..427a34f100
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(Qt6 QUIET COMPONENTS Widgets)
+if (NOT Qt6_FOUND)
+  message(WARNING "Qt6 was not found. The flicker test tool will not be built.")
+  return()
+endif ()
+
+if (NOT TARGET icc_detect OR NOT TARGET image_loading)
+  message(WARNING "Comparison tool not built. The flicker test tool will not be built.")
+  return()
+endif ()
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_AUTOMOC ON)
+set(CMAKE_AUTOUIC ON)
+
+add_executable(flicker_test WIN32
+  main.cc
+  parameters.cc
+  parameters.h
+  setup.cc
+  setup.h
+  setup.ui
+  split_view.cc
+  split_view.h
+  test_window.cc
+  test_window.h
+  test_window.ui)
+
+target_link_libraries(flicker_test PUBLIC
+  Qt6::Widgets
+  image_loading
+  icc_detect
+)
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/main.cc b/third-party/libjxl/libjxl/tools/flicker_test/main.cc
new file mode 100644
index 0000000000..9617765ba0
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/main.cc
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <QApplication>
+
+#include "tools/flicker_test/setup.h"
+#include "tools/flicker_test/test_window.h"
+
+int main(int argc, char** argv) {
+  QApplication application(argc, argv);
+
+  jpegxl::tools::FlickerTestWizard wizard;
+  if (wizard.exec()) {
+    jpegxl::tools::FlickerTestWindow test_window(wizard.parameters());
+    if (test_window.proceedWithTest()) {
+      test_window.showMaximized();
+      return application.exec();
+    }
+  }
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/parameters.cc b/third-party/libjxl/libjxl/tools/flicker_test/parameters.cc
new file mode 100644
index 0000000000..460867be8f
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/parameters.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/flicker_test/parameters.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+constexpr char kPathsGroup[] = "paths";
+constexpr char kOriginalFolderKey[] = "originalFolder";
+constexpr char kAlteredFolderKey[] = "alteredFolder";
+constexpr char kOutputFileKey[] = "outputFile";
+
+constexpr char kTimingGroup[] = "timing";
+constexpr char kAdvanceTimeKey[] = "advanceTimeMSecs";
+constexpr char kViewingTimeKey[] = "viewingTimeSecs";
+constexpr char kBlankingTimeKey[] = "blankingTimeMSecs";
+constexpr char kGrayGroup[] = "gray";
+constexpr char kGrayKey[] = "enabled";
+constexpr char kGrayFadingTimeKey[] = "fadingTimeMSecs";
+constexpr char kGrayTimeKey[] = "timeMSecs";
+
+constexpr char kDisplayGroup[] = "display";
+constexpr char kIntensityTargetKey[] = "intensityTarget";
+constexpr char kSpacingKey[] = "spacing";
+
+}  // namespace
+
+FlickerTestParameters FlickerTestParameters::loadFrom(
+    QSettings* const settings) {
+  FlickerTestParameters parameters;
+
+  settings->beginGroup(kPathsGroup);
+  parameters.originalFolder = settings->value(kOriginalFolderKey).toString();
+  parameters.alteredFolder = settings->value(kAlteredFolderKey).toString();
+  parameters.outputFile = settings->value(kOutputFileKey).toString();
+  settings->endGroup();
+
+  settings->beginGroup(kTimingGroup);
+  parameters.advanceTimeMSecs = settings->value(kAdvanceTimeKey, 100).toInt();
+  parameters.viewingTimeSecs = settings->value(kViewingTimeKey, 4).toInt();
+  parameters.blankingTimeMSecs = settings->value(kBlankingTimeKey, 250).toInt();
+  settings->beginGroup(kGrayGroup);
+  parameters.gray = settings->value(kGrayKey, false).toBool();
+  parameters.grayFadingTimeMSecs =
+      settings->value(kGrayFadingTimeKey, 100).toInt();
+  parameters.grayTimeMSecs = settings->value(kGrayTimeKey, 300).toInt();
+  settings->endGroup();
+  settings->endGroup();
+
+  settings->beginGroup(kDisplayGroup);
+  parameters.intensityTarget =
+      settings->value(kIntensityTargetKey, 250).toInt();
+  parameters.spacing = settings->value(kSpacingKey, 50).toInt();
+  settings->endGroup();
+
+  return parameters;
+}
+
+void FlickerTestParameters::saveTo(QSettings* const settings) const {
+  settings->beginGroup(kPathsGroup);
+  settings->setValue(kOriginalFolderKey, originalFolder);
+  settings->setValue(kAlteredFolderKey, alteredFolder);
+  settings->setValue(kOutputFileKey, outputFile);
+  settings->endGroup();
+
+  settings->beginGroup(kTimingGroup);
+  settings->setValue(kAdvanceTimeKey, advanceTimeMSecs);
+  settings->setValue(kViewingTimeKey, viewingTimeSecs);
+  settings->setValue(kBlankingTimeKey, blankingTimeMSecs);
+  settings->beginGroup(kGrayGroup);
+  settings->setValue(kGrayKey, gray);
+  settings->setValue(kGrayFadingTimeKey, grayFadingTimeMSecs);
+  settings->setValue(kGrayTimeKey, grayTimeMSecs);
+  settings->endGroup();
+  settings->endGroup();
+
+  settings->beginGroup(kDisplayGroup);
+  settings->setValue(kIntensityTargetKey, intensityTarget);
+  settings->setValue(kSpacingKey, spacing);
+  settings->endGroup();
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/parameters.h b/third-party/libjxl/libjxl/tools/flicker_test/parameters.h
new file mode 100644
index 0000000000..777d479d8d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/parameters.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_FLICKER_TEST_PARAMETERS_H_
+#define TOOLS_FLICKER_TEST_PARAMETERS_H_
+
+#include <QSettings>
+
+namespace jpegxl {
+namespace tools {
+
+struct FlickerTestParameters {
+  QString originalFolder;
+  QString alteredFolder;
+  QString outputFile;
+  int advanceTimeMSecs;
+  int viewingTimeSecs;
+  int blankingTimeMSecs;
+  bool gray;
+  int grayFadingTimeMSecs;
+  int grayTimeMSecs;
+  int intensityTarget;
+  int spacing;
+
+  static FlickerTestParameters loadFrom(QSettings* settings);
+  void saveTo(QSettings* settings) const;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_FLICKER_TEST_PARAMETERS_H_
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/setup.cc b/third-party/libjxl/libjxl/tools/flicker_test/setup.cc
new file mode 100644
index 0000000000..ff172861bf
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/setup.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/flicker_test/setup.h"
+
+#include <QCompleter>
+#include <QFileDialog>
+#include <QFileSystemModel>
+#include <QMessageBox>
+#include <QPushButton>
+
+namespace jpegxl {
+namespace tools {
+
+FlickerTestWizard::FlickerTestWizard(QWidget* const parent)
+    : QWizard(parent), settings_("JPEG XL project", "Flickering test") {
+  ui_.setupUi(this);
+
+  connect(ui_.grayFadingTime, SIGNAL(valueChanged(int)), this,
+          SLOT(updateTotalGrayTime()));
+  connect(ui_.grayTime, SIGNAL(valueChanged(int)), this,
+          SLOT(updateTotalGrayTime()));
+
+  ui_.timingButtonBox->button(QDialogButtonBox::RestoreDefaults)
+      ->setText(tr("Restore ISO/IEC 29170-2:2015 parameters"));
+
+  setButtonText(QWizard::FinishButton, tr("Start test"));
+
+  QCompleter* const completer = new QCompleter(this);
+  QFileSystemModel* const model = new QFileSystemModel(completer);
+  model->setRootPath("/");
+  model->setFilter(QDir::Dirs);
+  completer->setModel(model);
+  ui_.originalFolder->setCompleter(completer);
+  ui_.alteredFolder->setCompleter(completer);
+
+  const auto parameters = FlickerTestParameters::loadFrom(&settings_);
+  ui_.originalFolder->setText(parameters.originalFolder);
+  ui_.alteredFolder->setText(parameters.alteredFolder);
+  ui_.outputFile->setText(parameters.outputFile);
+  ui_.advanceTime->setValue(parameters.advanceTimeMSecs);
+  ui_.viewingTime->setValue(parameters.viewingTimeSecs);
+  ui_.blankingTime->setValue(parameters.blankingTimeMSecs);
+  ui_.grayFlickering->setChecked(parameters.gray);
+  ui_.grayFadingTime->setValue(parameters.grayFadingTimeMSecs);
+  ui_.grayTime->setValue(parameters.grayTimeMSecs);
+  ui_.intensityTarget->setValue(parameters.intensityTarget);
+  ui_.spacing->setValue(parameters.spacing);
+
+  QImage white(256, 256, QImage::Format_RGB32);
+  white.fill(Qt::white);
+  ui_.spacingDemo->setOriginalImage(white);
+  ui_.spacingDemo->setAlteredImage(white);
+
+  connect(this, &QDialog::accepted,
+          [&] { this->parameters().saveTo(&settings_); });
+}
+
+FlickerTestParameters FlickerTestWizard::parameters() const {
+  FlickerTestParameters result;
+  result.originalFolder = ui_.originalFolder->text();
+  result.alteredFolder = ui_.alteredFolder->text();
+  result.outputFile = ui_.outputFile->text();
+  result.advanceTimeMSecs = ui_.advanceTime->value();
+  result.viewingTimeSecs = ui_.viewingTime->value();
+  result.blankingTimeMSecs = ui_.blankingTime->value();
+  result.gray = ui_.grayFlickering->isChecked();
+  result.grayFadingTimeMSecs = ui_.grayFadingTime->value();
+  result.grayTimeMSecs = ui_.grayTime->value();
+  result.intensityTarget = ui_.intensityTarget->value();
+  result.spacing = ui_.spacing->value();
+  return result;
+}
+
+void FlickerTestWizard::on_originalFolderBrowseButton_clicked() {
+  const QString path = QFileDialog::getExistingDirectory(
+      this, tr("Folder with original images"), ui_.originalFolder->text());
+  if (!path.isEmpty()) {
+    ui_.originalFolder->setText(path);
+  }
+}
+
+void FlickerTestWizard::on_alteredFolderBrowseButton_clicked() {
+  const QString path = QFileDialog::getExistingDirectory(
+      this, tr("Folder with altered images"), ui_.alteredFolder->text());
+  if (!path.isEmpty()) {
+    ui_.alteredFolder->setText(path);
+  }
+}
+
+void FlickerTestWizard::on_outputFileBrowseButton_clicked() {
+  // The overwrite check is disabled here because it is carried out in
+  // `validateCurrentPage` (called when the user clicks the "Next" button) so
+  // that it also applies to automatically-reloaded settings.
+  const QString path = QFileDialog::getSaveFileName(
+      this, tr("CSV file in which to save the results"), ui_.outputFile->text(),
+      tr("CSV files (*.csv)"), /*selectedFilter=*/nullptr,
+      QFileDialog::DontConfirmOverwrite);
+  if (!path.isEmpty()) {
+    ui_.outputFile->setText(path);
+  }
+}
+
+void FlickerTestWizard::on_timingButtonBox_clicked(
+    QAbstractButton* const button) {
+  if (ui_.timingButtonBox->standardButton(button) ==
+      QDialogButtonBox::RestoreDefaults) {
+    ui_.advanceTime->setValue(100);
+    ui_.viewingTime->setValue(4);
+    ui_.blankingTime->setValue(250);
+    ui_.grayFlickering->setChecked(false);
+  }
+}
+
+void FlickerTestWizard::updateTotalGrayTime() {
+  ui_.totalGrayTimeLabel->setText(
+      tr("Total gray time: %L1&#8239;ms")
+          .arg(2 * ui_.grayFadingTime->value() + ui_.grayTime->value()));
+}
+
+bool FlickerTestWizard::validateCurrentPage() {
+  if (currentPage() == ui_.pathsPage && QFile::exists(ui_.outputFile->text())) {
+    QMessageBox messageBox(this);
+    messageBox.setIcon(QMessageBox::Warning);
+    messageBox.setStandardButtons(QMessageBox::Ok | QMessageBox::Cancel);
+    messageBox.setWindowTitle(tr("Output file already exists"));
+    messageBox.setText(tr("The selected output file \"%1\" already exists.")
+                           .arg(ui_.outputFile->text()));
+    messageBox.setInformativeText(tr("Do you wish to overwrite it?"));
+    if (messageBox.exec() == QMessageBox::Cancel) {
+      return false;
+    }
+  } else if (currentPage() == ui_.timesPage) {
+    if (ui_.grayFlickering->isChecked() &&
+        2 * ui_.grayFadingTime->value() + ui_.grayTime->value() >
+            ui_.advanceTime->value()) {
+      QMessageBox messageBox(this);
+      messageBox.setIcon(QMessageBox::Warning);
+      messageBox.setStandardButtons(QMessageBox::Ok);
+      messageBox.setWindowTitle(tr("Incompatible times selected"));
+      messageBox.setText(
+          tr("The total gray time is greater than the advance time."));
+      messageBox.exec();
+      return false;
+    }
+  }
+  return QWizard::validateCurrentPage();
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/setup.h b/third-party/libjxl/libjxl/tools/flicker_test/setup.h
new file mode 100644
index 0000000000..e034e28430
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/setup.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_FLICKER_TEST_SETUP_H_
+#define TOOLS_FLICKER_TEST_SETUP_H_
+
+#include <QWizard>
+
+#include "tools/flicker_test/parameters.h"
+#include "tools/flicker_test/ui_setup.h"
+
+namespace jpegxl {
+namespace tools {
+
+class FlickerTestWizard : public QWizard {
+  Q_OBJECT
+
+ public:
+  explicit FlickerTestWizard(QWidget* parent = nullptr);
+  ~FlickerTestWizard() override = default;
+
+  FlickerTestParameters parameters() const;
+
+ protected:
+  bool validateCurrentPage() override;
+
+ private slots:
+  void on_originalFolderBrowseButton_clicked();
+  void on_alteredFolderBrowseButton_clicked();
+  void on_outputFileBrowseButton_clicked();
+
+  void on_timingButtonBox_clicked(QAbstractButton* button);
+
+  void updateTotalGrayTime();
+
+ private:
+  Ui::FlickerTestWizard ui_;
+  QSettings settings_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_FLICKER_TEST_SETUP_H_
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/setup.ui b/third-party/libjxl/libjxl/tools/flicker_test/setup.ui
new file mode 100644
index 0000000000..44b850c5aa
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/setup.ui
@@ -0,0 +1,425 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>FlickerTestWizard</class>
+ <widget class="QWizard" name="FlickerTestWizard">
+  <property name="windowTitle">
+   <string>New flicker test</string>
+  </property>
+  <property name="wizardStyle">
+   <enum>QWizard::ClassicStyle</enum>
+  </property>
+  <property name="options">
+   <set>QWizard::NoBackButtonOnStartPage</set>
+  </property>
+  <widget class="QWizardPage" name="pathsPage">
+   <layout class="QFormLayout" name="formLayout">
+    <item row="0" column="0">
+     <widget class="QLabel" name="originalFolderPromptLabel">
+      <property name="text">
+       <string>Folder with the original images:</string>
+      </property>
+     </widget>
+    </item>
+    <item row="0" column="1">
+     <layout class="QHBoxLayout" name="horizontalLayout" stretch="1,0">
+      <item>
+       <widget class="QLineEdit" name="originalFolder"/>
+      </item>
+      <item>
+       <widget class="QToolButton" name="originalFolderBrowseButton">
+        <property name="text">
+         <string>Browse…</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+    <item row="1" column="0">
+     <widget class="QLabel" name="alteredFolderPromptLabel">
+      <property name="text">
+       <string>Folder with the altered images:</string>
+      </property>
+     </widget>
+    </item>
+    <item row="1" column="1">
+     <layout class="QHBoxLayout" name="horizontalLayout_2" stretch="1,0">
+      <item>
+       <widget class="QLineEdit" name="alteredFolder"/>
+      </item>
+      <item>
+       <widget class="QToolButton" name="alteredFolderBrowseButton">
+        <property name="text">
+         <string>Browse…</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+    <item row="2" column="0">
+     <widget class="QLabel" name="outputFilePromptLabel">
+      <property name="text">
+       <string>CSV file in which to save the results:</string>
+      </property>
+     </widget>
+    </item>
+    <item row="2" column="1">
+     <layout class="QHBoxLayout" name="horizontalLayout_4" stretch="1,0">
+      <item>
+       <widget class="QLineEdit" name="outputFile"/>
+      </item>
+      <item>
+       <widget class="QToolButton" name="outputFileBrowseButton">
+        <property name="text">
+         <string>Browse…</string>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QWizardPage" name="timesPage">
+   <layout class="QHBoxLayout" name="horizontalLayout_3" stretch="1,0,1">
+    <item>
+     <spacer name="horizontalSpacer">
+      <property name="orientation">
+       <enum>Qt::Horizontal</enum>
+      </property>
+     </spacer>
+    </item>
+    <item>
+     <layout class="QVBoxLayout" name="verticalLayout_2" stretch="0,0,0,1">
+      <item>
+       <layout class="QFormLayout" name="formLayout_2">
+        <item row="0" column="0">
+         <widget class="QLabel" name="advanceTimePromptLabel">
+          <property name="text">
+           <string>Advance time:</string>
+          </property>
+         </widget>
+        </item>
+        <item row="0" column="1">
+         <widget class="QSpinBox" name="advanceTime">
+          <property name="suffix">
+           <string> ms</string>
+          </property>
+          <property name="minimum">
+           <number>100</number>
+          </property>
+          <property name="maximum">
+           <number>3000</number>
+          </property>
+          <property name="singleStep">
+           <number>100</number>
+          </property>
+         </widget>
+        </item>
+        <item row="1" column="0">
+         <widget class="QLabel" name="viewingTimePromptLabel">
+          <property name="text">
+           <string>Viewing time (t&lt;sub&gt;VIEW&lt;/sub&gt;):</string>
+          </property>
+         </widget>
+        </item>
+        <item row="1" column="1">
+         <widget class="QSpinBox" name="viewingTime">
+          <property name="specialValueText">
+           <string>no limit</string>
+          </property>
+          <property name="suffix">
+           <string> s</string>
+          </property>
+          <property name="minimum">
+           <number>0</number>
+          </property>
+          <property name="maximum">
+           <number>30</number>
+          </property>
+          <property name="value">
+           <number>4</number>
+          </property>
+         </widget>
+        </item>
+        <item row="2" column="0">
+         <widget class="QLabel" name="blankingTimePromptLabel">
+          <property name="text">
+           <string>Blanking time (t&lt;sub&gt;BLANK&lt;/sub&gt;):</string>
+          </property>
+         </widget>
+        </item>
+        <item row="2" column="1">
+         <widget class="QSpinBox" name="blankingTime">
+          <property name="suffix">
+           <string> ms</string>
+          </property>
+          <property name="minimum">
+           <number>50</number>
+          </property>
+          <property name="maximum">
+           <number>1000</number>
+          </property>
+          <property name="singleStep">
+           <number>50</number>
+          </property>
+          <property name="value">
+           <number>250</number>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
+      <item>
+       <widget class="QGroupBox" name="grayFlickering">
+        <property name="title">
+         <string>Gray flickering</string>
+        </property>
+        <property name="checkable">
+         <bool>true</bool>
+        </property>
+        <property name="checked">
+         <bool>false</bool>
+        </property>
+        <layout class="QVBoxLayout" name="verticalLayout_4">
+         <item>
+          <layout class="QFormLayout" name="formLayout_4">
+           <item row="0" column="0">
+            <widget class="QLabel" name="grayFadingTimePromptLabel">
+             <property name="text">
+              <string>Fading time to and from gray:</string>
+             </property>
+            </widget>
+           </item>
+           <item row="0" column="1">
+            <widget class="QSpinBox" name="grayFadingTime">
+             <property name="suffix">
+              <string> ms</string>
+             </property>
+             <property name="maximum">
+              <number>1000</number>
+             </property>
+             <property name="singleStep">
+              <number>100</number>
+             </property>
+             <property name="value">
+              <number>100</number>
+             </property>
+            </widget>
+           </item>
+           <item row="1" column="0">
+            <widget class="QLabel" name="grayTimePromptLabel">
+             <property name="text">
+              <string>Time on gray:</string>
+             </property>
+            </widget>
+           </item>
+           <item row="1" column="1">
+            <widget class="QSpinBox" name="grayTime">
+             <property name="suffix">
+              <string> ms</string>
+             </property>
+             <property name="maximum">
+              <number>1000</number>
+             </property>
+             <property name="singleStep">
+              <number>100</number>
+             </property>
+             <property name="value">
+              <number>300</number>
+             </property>
+            </widget>
+           </item>
+          </layout>
+         </item>
+         <item>
+          <widget class="QLabel" name="totalGrayTimeLabel">
+           <property name="text">
+            <string>Total gray time: 500 ms</string>
+           </property>
+           <property name="textFormat">
+            <enum>Qt::RichText</enum>
+           </property>
+          </widget>
+         </item>
+        </layout>
+       </widget>
+      </item>
+      <item>
+       <widget class="QDialogButtonBox" name="timingButtonBox">
+        <property name="standardButtons">
+         <set>QDialogButtonBox::RestoreDefaults</set>
+        </property>
+        <property name="centerButtons">
+         <bool>true</bool>
+        </property>
+       </widget>
+      </item>
+      <item>
+       <spacer name="verticalSpacer">
+        <property name="orientation">
+         <enum>Qt::Vertical</enum>
+        </property>
+       </spacer>
+      </item>
+     </layout>
+    </item>
+    <item>
+     <spacer name="horizontalSpacer_2">
+      <property name="orientation">
+       <enum>Qt::Horizontal</enum>
+      </property>
+     </spacer>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QWizardPage" name="intensityTargetPage">
+   <layout class="QHBoxLayout" name="horizontalLayout_6" stretch="1,0,1">
+    <item>
+     <spacer name="horizontalSpacer_3">
+      <property name="orientation">
+       <enum>Qt::Horizontal</enum>
+      </property>
+     </spacer>
+    </item>
+    <item>
+     <layout class="QFormLayout" name="formLayout_5">
+      <item row="0" column="0">
+       <widget class="QLabel" name="intensityTargetPromptLabel">
+        <property name="text">
+         <string>Display peak luminance:</string>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="1">
+       <widget class="QSpinBox" name="intensityTarget">
+        <property name="correctionMode">
+         <enum>QAbstractSpinBox::CorrectToNearestValue</enum>
+        </property>
+        <property name="suffix">
+         <string> cd/m²</string>
+        </property>
+        <property name="minimum">
+         <number>20</number>
+        </property>
+        <property name="maximum">
+         <number>10000</number>
+        </property>
+        <property name="stepType">
+         <enum>QAbstractSpinBox::AdaptiveDecimalStepType</enum>
+        </property>
+        <property name="value">
+         <number>250</number>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </item>
+    <item>
+     <spacer name="horizontalSpacer_4">
+      <property name="orientation">
+       <enum>Qt::Horizontal</enum>
+      </property>
+     </spacer>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QWizardPage" name="spacingPage">
+   <layout class="QVBoxLayout" name="verticalLayout_3" stretch="1,0,0">
+    <item>
+     <widget class="jpegxl::tools::SplitView" name="spacingDemo" native="true"/>
+    </item>
+    <item>
+     <spacer name="verticalSpacer_2">
+      <property name="orientation">
+       <enum>Qt::Vertical</enum>
+      </property>
+     </spacer>
+    </item>
+    <item>
+     <layout class="QFormLayout" name="formLayout_3">
+      <item row="0" column="0">
+       <widget class="QLabel" name="spacingPromptLabel">
+        <property name="text">
+         <string>Spacing between the images:</string>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="1">
+       <layout class="QHBoxLayout" name="horizontalLayout_5" stretch="1,0">
+        <item>
+         <widget class="QSlider" name="spacing">
+          <property name="minimum">
+           <number>1</number>
+          </property>
+          <property name="maximum">
+           <number>1000</number>
+          </property>
+          <property name="value">
+           <number>50</number>
+          </property>
+          <property name="orientation">
+           <enum>Qt::Horizontal</enum>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QSpinBox" name="spacingSpinBox">
+          <property name="suffix">
+           <string> px</string>
+          </property>
+          <property name="minimum">
+           <number>1</number>
+          </property>
+          <property name="maximum">
+           <number>1000</number>
+          </property>
+          <property name="value">
+           <number>50</number>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
+     </layout>
+    </item>
+   </layout>
+  </widget>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>jpegxl::tools::SplitView</class>
+   <extends>QWidget</extends>
+   <header>tools/flicker_test/split_view.h</header>
+   <container>1</container>
+   <slots>
+    <slot>setSpacing(int)</slot>
+   </slots>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>spacing</sender>
+   <signal>valueChanged(int)</signal>
+   <receiver>spacingDemo</receiver>
+   <slot>setSpacing(int)</slot>
+  </connection>
+  <connection>
+   <sender>spacing</sender>
+   <signal>valueChanged(int)</signal>
+   <receiver>spacingSpinBox</receiver>
+   <slot>setValue(int)</slot>
+  </connection>
+  <connection>
+   <sender>spacingSpinBox</sender>
+   <signal>valueChanged(int)</signal>
+   <receiver>spacing</receiver>
+   <slot>setValue(int)</slot>
+  </connection>
+ </connections>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/split_view.cc b/third-party/libjxl/libjxl/tools/flicker_test/split_view.cc
new file mode 100644
index 0000000000..87df95ecc7
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/split_view.cc
@@ -0,0 +1,176 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/flicker_test/split_view.h"
+
+#include <QMouseEvent>
+#include <QPainter>
+
+namespace jpegxl {
+namespace tools {
+
+SplitView::SplitView(QWidget* const parent)
+    : QWidget(parent), g_(std::random_device()()) {
+  blankingTimer_.setSingleShot(true);
+  blankingTimer_.setTimerType(Qt::PreciseTimer);
+  viewingTimer_.setSingleShot(true);
+  viewingTimer_.setTimerType(Qt::PreciseTimer);
+  flicker_.setLoopCount(-1);
+  connect(&blankingTimer_, &QTimer::timeout, this, &SplitView::startDisplaying);
+  connect(&flicker_, &QVariantAnimation::valueChanged, this, [&] {
+    if (gray_) {
+      update();
+    }
+  });
+  connect(&flicker_, &QAbstractAnimation::currentLoopChanged, [&] {
+    showingAltered_ = !showingAltered_;
+    update();
+  });
+  connect(&viewingTimer_, &QTimer::timeout, [&] {
+    flicker_.stop();
+    original_.fill(Qt::black);
+    altered_.fill(Qt::black);
+    update();
+  });
+}
+
+void SplitView::setOriginalImage(QImage image) {
+  original_ = QPixmap::fromImage(std::move(image));
+  original_.setDevicePixelRatio(devicePixelRatio());
+  updateMinimumSize();
+  update();
+}
+
+void SplitView::setAlteredImage(QImage image) {
+  altered_ = QPixmap::fromImage(std::move(image));
+  altered_.setDevicePixelRatio(devicePixelRatio());
+  updateMinimumSize();
+  update();
+}
+
+void SplitView::setSpacing(int spacing) {
+  spacing_ = spacing;
+  updateMinimumSize();
+  update();
+}
+
+void SplitView::startTest(QString imageName, const int blankingTimeMSecs,
+                          const int viewingTimeSecs, const int advanceTimeMSecs,
+                          const bool gray, const int grayFadingTimeMSecs,
+                          const int grayTimeMSecs) {
+  imageName_ = std::move(imageName);
+  std::bernoulli_distribution bernoulli;
+  originalSide_ = bernoulli(g_) ? Side::kLeft : Side::kRight;
+  viewingTimer_.setInterval(1000 * viewingTimeSecs);
+
+  flicker_.setDuration(advanceTimeMSecs);
+  gray_ = gray;
+  QVariantAnimation::KeyValues keyValues;
+  if (gray_) {
+    keyValues << QVariantAnimation::KeyValue(0., 0.f)
+              << QVariantAnimation::KeyValue(
+                     static_cast<float>(grayFadingTimeMSecs) / advanceTimeMSecs,
+                     1.f)
+              << QVariantAnimation::KeyValue(
+                     static_cast<float>(advanceTimeMSecs - grayTimeMSecs -
+                                        grayFadingTimeMSecs) /
+                         advanceTimeMSecs,
+                     1.f)
+              << QVariantAnimation::KeyValue(
+                     static_cast<float>(advanceTimeMSecs - grayTimeMSecs) /
+                         advanceTimeMSecs,
+                     0.f)
+              << QVariantAnimation::KeyValue(1.f, 0.f);
+  } else {
+    keyValues << QVariantAnimation::KeyValue(0., 1.f)
+              << QVariantAnimation::KeyValue(1., 1.f);
+  }
+  flicker_.setKeyValues(keyValues);
+
+  state_ = State::kBlanking;
+  blankingTimer_.start(blankingTimeMSecs);
+}
+
+void SplitView::mousePressEvent(QMouseEvent* const event) {
+  if (state_ != State::kDisplaying) return;
+
+  if (leftRect_.contains(event->pos())) {
+    clicking_ = true;
+    clickedSide_ = Side::kLeft;
+  } else if (rightRect_.contains(event->pos())) {
+    clicking_ = true;
+    clickedSide_ = Side::kRight;
+  }
+}
+
+void SplitView::mouseReleaseEvent(QMouseEvent* const event) {
+  if (!clicking_) return;
+  clicking_ = false;
+
+  const int clickDelayMSecs = viewingStartTime_.elapsed();
+
+  if ((clickedSide_ == Side::kLeft && !leftRect_.contains(event->pos())) ||
+      (clickedSide_ == Side::kRight && !rightRect_.contains(event->pos()))) {
+    return;
+  }
+
+  flicker_.stop();
+  viewingTimer_.stop();
+  state_ = State::kBlanking;
+  update();
+
+  emit testResult(imageName_, originalSide_, clickedSide_, clickDelayMSecs);
+}
+
+void SplitView::paintEvent(QPaintEvent* const event) {
+  QPainter painter(this);
+  painter.fillRect(rect(), QColor(119, 119, 119));
+
+  if (state_ == State::kBlanking) return;
+
+  if (gray_ && flicker_.state() == QAbstractAnimation::Running) {
+    painter.setOpacity(flicker_.currentValue().toFloat());
+  }
+
+  const auto imageForSide = [&](const Side side) {
+    if (side == originalSide_) return &original_;
+    return showingAltered_ ? &altered_ : &original_;
+  };
+
+  QPixmap* const leftImage = imageForSide(Side::kLeft);
+  QPixmap* const rightImage = imageForSide(Side::kRight);
+
+  leftRect_ = QRectF(QPoint(), leftImage->deviceIndependentSize());
+  leftRect_.moveCenter(rect().center());
+  leftRect_.moveRight(rect().center().x() -
+                      (spacing_ / 2 + spacing_ % 2) / devicePixelRatio());
+  painter.drawPixmap(leftRect_.topLeft(), *leftImage);
+
+  rightRect_ = QRectF(QPoint(), rightImage->deviceIndependentSize());
+  rightRect_.moveCenter(rect().center());
+  rightRect_.moveLeft(rect().center().x() +
+                      (spacing_ / 2) / devicePixelRatio());
+  painter.drawPixmap(rightRect_.topLeft(), *rightImage);
+}
+
+void SplitView::startDisplaying() {
+  state_ = State::kDisplaying;
+  flicker_.start();
+  viewingStartTime_.start();
+  if (viewingTimer_.interval() > 0) {
+    viewingTimer_.start();
+  }
+}
+
+void SplitView::updateMinimumSize() {
+  setMinimumWidth(2 * std::max(original_.deviceIndependentSize().width(),
+                               altered_.deviceIndependentSize().width()) +
+                  spacing_ / devicePixelRatio());
+  setMinimumHeight(std::max(original_.deviceIndependentSize().height(),
+                            altered_.deviceIndependentSize().height()));
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/split_view.h b/third-party/libjxl/libjxl/tools/flicker_test/split_view.h
new file mode 100644
index 0000000000..37c5f7eb78
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/split_view.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_FLICKER_TEST_SPLIT_VIEW_H_
+#define TOOLS_FLICKER_TEST_SPLIT_VIEW_H_
+
+#include <QElapsedTimer>
+#include <QImage>
+#include <QPixmap>
+#include <QTimer>
+#include <QVariantAnimation>
+#include <QWidget>
+#include <random>
+
+namespace jpegxl {
+namespace tools {
+
+class SplitView : public QWidget {
+  Q_OBJECT
+
+ public:
+  enum class Side {
+    kLeft,
+    kRight,
+  };
+  Q_ENUM(Side)
+
+  explicit SplitView(QWidget* parent = nullptr);
+  ~SplitView() override = default;
+
+  void setOriginalImage(QImage image);
+  void setAlteredImage(QImage image);
+
+ signals:
+  void testResult(const QString& imageName, Side flickeringSide,
+                  Side clickedSide, int clickDelayMSecs);
+
+ public slots:
+  void setSpacing(int spacing);
+  void startTest(QString imageName, int blankingTimeMSecs, int viewingTimeSecs,
+                 int advanceTimeMSecs, bool gray, int grayFadingTimeMSecs,
+                 int grayTimeMSecs);
+
+ protected:
+  void mousePressEvent(QMouseEvent* event) override;
+  void mouseReleaseEvent(QMouseEvent* event) override;
+  void paintEvent(QPaintEvent* event) override;
+
+ private slots:
+  void startDisplaying();
+
+ private:
+  enum class State {
+    kBlanking,
+    kDisplaying,
+  };
+
+  void updateMinimumSize();
+
+  int spacing_ = 50;
+
+  std::mt19937 g_;
+
+  QString imageName_;
+  QPixmap original_, altered_;
+  Side originalSide_;
+  bool clicking_ = false;
+  Side clickedSide_;
+  QRectF leftRect_, rightRect_;
+  State state_ = State::kDisplaying;
+  bool gray_ = false;
+  QTimer blankingTimer_;
+  QTimer viewingTimer_;
+  // Throughout each cycle, animates the opacity of the image being displayed
+  // between 0 and 1 if fading to gray is enabled.
+  QVariantAnimation flicker_;
+  bool showingAltered_ = true;
+  QElapsedTimer viewingStartTime_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_FLICKER_TEST_SPLIT_VIEW_H_
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/test_window.cc b/third-party/libjxl/libjxl/tools/flicker_test/test_window.cc
new file mode 100644
index 0000000000..c21ca6fd72
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/test_window.cc
@@ -0,0 +1,186 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/flicker_test/test_window.h"
+
+#include <QDir>
+#include <QMessageBox>
+#include <QSet>
+#include <algorithm>
+#include <random>
+
+#include "tools/icc_detect/icc_detect.h"
+
+namespace jpegxl {
+namespace tools {
+
+FlickerTestWindow::FlickerTestWindow(FlickerTestParameters parameters,
+                                     QWidget* const parent)
+    : QMainWindow(parent),
+      monitorProfile_(GetMonitorIccProfile(this)),
+      parameters_(std::move(parameters)),
+      originalFolder_(parameters_.originalFolder, "*.png"),
+      alteredFolder_(parameters_.alteredFolder, "*.png"),
+      outputFile_(parameters_.outputFile) {
+  ui_.setupUi(this);
+  ui_.splitView->setSpacing(parameters_.spacing);
+  ui_.endLabel->setText(
+      tr("The test is complete and the results have been saved to \"%1\".")
+          .arg(parameters_.outputFile));
+  connect(ui_.startButton, &QAbstractButton::clicked, [&] {
+    ui_.stackedView->setCurrentWidget(ui_.splitView);
+    nextImage();
+  });
+  connect(ui_.splitView, &SplitView::testResult, this,
+          &FlickerTestWindow::processTestResult);
+
+  if (!outputFile_.open(QIODevice::WriteOnly)) {
+    QMessageBox messageBox;
+    messageBox.setIcon(QMessageBox::Critical);
+    messageBox.setStandardButtons(QMessageBox::Close);
+    messageBox.setWindowTitle(tr("Failed to open output file"));
+    messageBox.setInformativeText(
+        tr("Could not open \"%1\" for writing.").arg(outputFile_.fileName()));
+    messageBox.exec();
+    proceed_ = false;
+    return;
+  }
+  outputStream_.setDevice(&outputFile_);
+  outputStream_ << "image name,original side,clicked side,click delay (ms)\n";
+
+  if (monitorProfile_.isEmpty()) {
+    QMessageBox messageBox;
+    messageBox.setIcon(QMessageBox::Warning);
+    messageBox.setStandardButtons(QMessageBox::Ok);
+    messageBox.setWindowTitle(tr("No monitor profile found"));
+    messageBox.setText(
+        tr("No ICC profile appears to be associated with the display. It will "
+           "be assumed to match sRGB."));
+    messageBox.exec();
+  }
+
+  originalFolder_.setFilter(QDir::Files);
+  alteredFolder_.setFilter(QDir::Files);
+
+#if QT_VERSION < QT_VERSION_CHECK(5, 14, 0)
+  auto originalImages = QSet<QString>::fromList(originalFolder_.entryList());
+  auto alteredImages = QSet<QString>::fromList(alteredFolder_.entryList());
+#else
+  const QStringList originalFolderEntries = originalFolder_.entryList();
+  QSet<QString> originalImages(originalFolderEntries.begin(),
+                               originalFolderEntries.end());
+  const QStringList alteredFolderEntries = alteredFolder_.entryList();
+  QSet<QString> alteredImages(alteredFolderEntries.begin(),
+                              alteredFolderEntries.end());
+#endif
+
+  auto onlyOriginal = originalImages - alteredImages,
+       onlyAltered = alteredImages - originalImages;
+  if (!onlyOriginal.isEmpty() || !onlyAltered.isEmpty()) {
+    QMessageBox messageBox;
+    messageBox.setIcon(QMessageBox::Warning);
+    messageBox.setStandardButtons(QMessageBox::Ok | QMessageBox::Cancel);
+    messageBox.setWindowTitle(tr("Image set mismatch"));
+    messageBox.setText(
+        tr("A mismatch has been detected between the original and altered "
+           "images."));
+    messageBox.setInformativeText(tr("Proceed with the test?"));
+    QStringList detailedTextParagraphs;
+    const QString itemFormat = tr("— %1\n");
+    if (!onlyOriginal.isEmpty()) {
+      QString originalList;
+      for (const QString& original : onlyOriginal) {
+        originalList += itemFormat.arg(original);
+      }
+      detailedTextParagraphs << tr("The following images were only found in "
+                                   "the originals folder:\n%1")
+                                    .arg(originalList);
+    }
+    if (!onlyAltered.isEmpty()) {
+      QString alteredList;
+      for (const QString& altered : onlyAltered) {
+        alteredList += itemFormat.arg(altered);
+      }
+      detailedTextParagraphs << tr("The following images were only found in "
+                                   "the altered images folder:\n%1")
+                                    .arg(alteredList);
+    }
+    messageBox.setDetailedText(detailedTextParagraphs.join("\n\n"));
+    if (messageBox.exec() == QMessageBox::Cancel) {
+      proceed_ = false;
+      return;
+    }
+  }
+
+  remainingImages_ = originalImages.intersect(alteredImages).values();
+  std::random_device rd;
+  std::mt19937 g(rd());
+  std::shuffle(remainingImages_.begin(), remainingImages_.end(), g);
+}
+
+void FlickerTestWindow::processTestResult(const QString& imageName,
+                                          const SplitView::Side originalSide,
+                                          const SplitView::Side clickedSide,
+                                          const int clickDelayMSecs) {
+  const auto sideToString = [](const SplitView::Side side) {
+    switch (side) {
+      case SplitView::Side::kLeft:
+        return "left";
+
+      case SplitView::Side::kRight:
+        return "right";
+    }
+    return "unknown";
+  };
+  outputStream_ << imageName << "," << sideToString(originalSide) << ","
+                << sideToString(clickedSide) << "," << clickDelayMSecs << "\n";
+
+  nextImage();
+}
+
+void FlickerTestWindow::nextImage() {
+  if (remainingImages_.empty()) {
+    outputStream_.flush();
+    ui_.stackedView->setCurrentWidget(ui_.finalPage);
+    return;
+  }
+  const QString image = remainingImages_.takeFirst();
+retry:
+  QImage originalImage =
+      loadImage(originalFolder_.absoluteFilePath(image), monitorProfile_,
+                parameters_.intensityTarget);
+  QImage alteredImage = loadImage(alteredFolder_.absoluteFilePath(image),
+                                  monitorProfile_, parameters_.intensityTarget);
+  if (originalImage.isNull() || alteredImage.isNull()) {
+    QMessageBox messageBox(this);
+    messageBox.setIcon(QMessageBox::Warning);
+    messageBox.setStandardButtons(QMessageBox::Retry | QMessageBox::Ignore |
+                                  QMessageBox::Abort);
+    messageBox.setWindowTitle(tr("Failed to load image"));
+    messageBox.setText(tr("Could not load image \"%1\".").arg(image));
+    switch (messageBox.exec()) {
+      case QMessageBox::Retry:
+        goto retry;
+
+      case QMessageBox::Ignore:
+        outputStream_ << image << ",,,\n";
+        return nextImage();
+
+      case QMessageBox::Abort:
+        ui_.stackedView->setCurrentWidget(ui_.finalPage);
+        return;
+    }
+  }
+
+  ui_.splitView->setOriginalImage(std::move(originalImage));
+  ui_.splitView->setAlteredImage(std::move(alteredImage));
+  ui_.splitView->startTest(
+      image, parameters_.blankingTimeMSecs, parameters_.viewingTimeSecs,
+      parameters_.advanceTimeMSecs, parameters_.gray,
+      parameters_.grayFadingTimeMSecs, parameters_.grayTimeMSecs);
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/test_window.h b/third-party/libjxl/libjxl/tools/flicker_test/test_window.h
new file mode 100644
index 0000000000..ad712afec8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/test_window.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_FLICKER_TEST_TEST_WINDOW_H_
+#define TOOLS_FLICKER_TEST_TEST_WINDOW_H_
+
+#include <QByteArray>
+#include <QDir>
+#include <QMainWindow>
+#include <QStringList>
+#include <QTextStream>
+
+#include "tools/comparison_viewer/image_loading.h"
+#include "tools/flicker_test/parameters.h"
+#include "tools/flicker_test/ui_test_window.h"
+
+namespace jpegxl {
+namespace tools {
+
+class FlickerTestWindow : public QMainWindow {
+  Q_OBJECT
+
+ public:
+  explicit FlickerTestWindow(FlickerTestParameters parameters,
+                             QWidget* parent = nullptr);
+  ~FlickerTestWindow() override = default;
+
+  bool proceedWithTest() const { return proceed_; }
+
+ private slots:
+  void processTestResult(const QString& imageName, SplitView::Side originalSide,
+                         SplitView::Side clickedSide, int clickDelayMSecs);
+
+ private:
+  void nextImage();
+
+  Ui::FlickerTestWindow ui_;
+  bool proceed_ = true;
+  const QByteArray monitorProfile_;
+  FlickerTestParameters parameters_;
+  QDir originalFolder_, alteredFolder_;
+  QFile outputFile_;
+  QTextStream outputStream_;
+  QStringList remainingImages_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_FLICKER_TEST_TEST_WINDOW_H_
diff --git a/third-party/libjxl/libjxl/tools/flicker_test/test_window.ui b/third-party/libjxl/libjxl/tools/flicker_test/test_window.ui
new file mode 100644
index 0000000000..bd42873d13
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/flicker_test/test_window.ui
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>FlickerTestWindow</class>
+ <widget class="QMainWindow" name="FlickerTestWindow">
+  <property name="windowTitle">
+   <string>Flicker test</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <layout class="QVBoxLayout" name="verticalLayout_2">
+    <item>
+     <widget class="QStackedWidget" name="stackedView">
+      <widget class="QWidget" name="startPage">
+       <layout class="QVBoxLayout" name="verticalLayout" stretch="1,0,1">
+        <item>
+         <spacer name="verticalSpacer">
+          <property name="orientation">
+           <enum>Qt::Vertical</enum>
+          </property>
+         </spacer>
+        </item>
+        <item>
+         <layout class="QHBoxLayout" name="horizontalLayout" stretch="1,0,1">
+          <item>
+           <spacer name="spacer_2">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </spacer>
+          </item>
+          <item>
+           <widget class="QPushButton" name="startButton">
+            <property name="text">
+             <string>Start</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <spacer name="spacer">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </spacer>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <spacer name="verticalSpacer_2">
+          <property name="orientation">
+           <enum>Qt::Vertical</enum>
+          </property>
+          <property name="sizeHint" stdset="0">
+           <size>
+            <width>0</width>
+            <height>0</height>
+           </size>
+          </property>
+         </spacer>
+        </item>
+       </layout>
+      </widget>
+      <widget class="jpegxl::tools::SplitView" name="splitView"/>
+      <widget class="QWidget" name="finalPage">
+       <layout class="QVBoxLayout" name="verticalLayout_3">
+        <item>
+         <layout class="QHBoxLayout" name="horizontalLayout_2" stretch="1,0,1">
+          <item>
+           <spacer name="horizontalSpacer">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </spacer>
+          </item>
+          <item>
+           <widget class="QLabel" name="endLabel">
+            <property name="text">
+             <string/>
+            </property>
+            <property name="alignment">
+             <set>Qt::AlignJustify|Qt::AlignVCenter</set>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <spacer name="horizontalSpacer_2">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </spacer>
+          </item>
+         </layout>
+        </item>
+       </layout>
+      </widget>
+     </widget>
+    </item>
+   </layout>
+  </widget>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>jpegxl::tools::SplitView</class>
+   <extends>QWidget</extends>
+   <header>tools/flicker_test/split_view.h</header>
+   <container>1</container>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/fuzzer_stub.cc b/third-party/libjxl/libjxl/tools/fuzzer_stub.cc
new file mode 100644
index 0000000000..2f30e9ee0b
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/fuzzer_stub.cc
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <vector>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+void ProcessInput(const char* filename) {
+  std::ifstream ifs(filename, std::ios::binary);
+  std::vector<char> contents((std::istreambuf_iterator<char>(ifs)),
+                             std::istreambuf_iterator<char>());
+  ifs.close();
+  std::cout << "Processing " << filename << std::endl;
+  LLVMFuzzerTestOneInput(reinterpret_cast<uint8_t*>(contents.data()),
+                         contents.size());
+}
+
+// Read files listed in args and pass their contents to "fuzzer".
+int main(int argc, const char* argv[]) {
+  if (argc == 2) {
+    // No threaded runner for single inputs.
+    ProcessInput(argv[1]);
+  } else if (argc > 2) {
+    auto runner = JxlThreadParallelRunnerMake(
+        nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+    return JxlThreadParallelRunner(
+        runner.get(), argv,
+        /* init= */ +[](void*, size_t) -> JxlParallelRetCode { return 0; },
+        /* func= */
+        +[](void* opaque, uint32_t value, size_t) {
+          const char** proc_argv = static_cast<const char**>(opaque);
+          ProcessInput(proc_argv[value]);
+        },
+        /* start_range= */ 1, /* end_range= */ argc);
+  }
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/tools/git_version.cmake b/third-party/libjxl/libjxl/tools/git_version.cmake
new file mode 100644
index 0000000000..4d216e8f57
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/git_version.cmake
@@ -0,0 +1,34 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# git_version.cmake is a script which creates tools_version_git.h in the build
+# directory if building from a git repository.
+find_package(Git QUIET)
+
+# Check that this script was invoked with the necessary arguments.
+if(NOT IS_DIRECTORY "${JPEGXL_ROOT_DIR}")
+  message(FATAL_ERROR "JPEGXL_ROOT_DIR is invalid")
+endif()
+
+execute_process(
+  COMMAND "${GIT_EXECUTABLE}" rev-parse --short HEAD
+  OUTPUT_VARIABLE GIT_REV
+  WORKING_DIRECTORY "${JPEGXL_ROOT_DIR}"
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  ERROR_QUIET)
+
+# The define line in the file.
+set(JPEGXL_VERSION_DEFINE "#define JPEGXL_VERSION \"${GIT_REV}\"\n")
+
+# Update the header file only if needed.
+if(EXISTS "${DST}")
+  file(READ "${DST}" ORIG_DST)
+  if(NOT ORIG_DST STREQUAL JPEGXL_VERSION_DEFINE)
+    message(STATUS "Changing JPEGXL_VERSION to ${GIT_REV}")
+    file(WRITE "${DST}" "${JPEGXL_VERSION_DEFINE}")
+  endif()
+else()
+  file(WRITE "${DST}" "${JPEGXL_VERSION_DEFINE}")
+endif()
diff --git a/third-party/libjxl/libjxl/tools/hdr/README.md b/third-party/libjxl/libjxl/tools/hdr/README.md
new file mode 100644
index 0000000000..85eb1bd774
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/README.md
@@ -0,0 +1,153 @@
+# HDR tools
+
+This directory contains a small set of command-line tools for HDR conversions,
+including to SDR.
+
+## Tone mapping
+
+`tools/tone_map` implements tone mapping as described in annex 5 of
+[Report ITU-R BT.2408-4](https://www.itu.int/pub/R-REP-BT.2408-4-2021), more
+specifically the YRGB variant. Since the result may contain out-of-gamut colors,
+it additionally does very basic gamut mapping. The balance between preserving
+saturation and preserving luminance can be controlled by passing a number
+between 0 and 1 using `--preserve_saturation`. The default is 0.1. Hue is never
+sacrificed.
+
+### Examples
+
+```shell
+# Tone maps a PQ image for a 300 cd/m² display, and writes the result as an SDR
+# (but still wide-gamut) image to be shown on such a display.
+$ tools/tone_map -t 300 ClassE_507.png ClassE_507_tone_mapped_300.png
+
+# The result can also be written as a PQ image itself:
+$ tools/tone_map -t 300 --pq ClassE_507.png ClassE_507_tone_mapped_300_pq.png
+
+# It is possible to specify the maximum luminance found in the image using
+# `--max_nits`. For OpenEXR input, it will override the `whiteLuminance` tag
+# which indicates the luminance of (1, 1, 1). For PQ, it will not affect the
+# luminance calculated from the signal, but it will tell the tone mapping how
+# much headroom to leave for highlights.
+$ tools/tone_map -m 4000 -t 300 ClassE_507.png ClassE_507_tone_mapped_300.png
+```
+
+## PQ to HLG conversion
+
+`tools/pq_to_hlg` performs conversion of a PQ image to HLG as described in
+section 6 of the aforementioned BT.2408-4. That is, the PQ image is first
+limited to 1000 cd/m² using the tone mapping mentioned above, and the result is
+treated as if it were the output of a reference 1000 cd/m² HLG display: such a
+display  would have a system gamma of 1.2, and therefore, we can apply the
+HLG inverse OOTF with a gamma of 1.2 to get “back” to the linear scene-referred
+signal that would have produced that output on that reference display (and then
+encode it using the OETF).
+
+As with the tone mapping tool, the `--max_nits` and `--preserve_saturation`
+options can be used to guide the 1000 cd/m² limiting.
+
+### Example
+
+```shell
+$ tools/pq_to_hlg ClassE_507.png ClassE_507_hlg.png
+```
+
+## HLG rendering
+
+HLG is designed to look acceptable without specific processing on displays that
+expect a “traditional” SDR signal. Nevertheless, it is possible to optimize the
+appearance for specific viewing conditions by applying the HLG inverse OETF and
+then the OOTF with an appropriate system gamma. Here, the system gamma is
+computed using  the extended model mentioned at the bottom of page 29 of
+[Report ITU-R BT.2390-9](https://www.itu.int/pub/R-REP-BT.2390-9-2021). That
+formula should work well over a wide range of display peak luminances.
+
+It is possible to specify not just the peak luminance of the target display
+(using `--target_nits`) but also the ambient luminance of the viewing
+environment using `--surround_nits`.
+
+As with the tone mapping tool, the result can be written as a PQ image. In that
+case, it would make sense, in further usage of `tools/tone_map` or
+`tools/pq_to_hlg`, to set `--max_nits` to the value that was passed as
+`--target_nits` to this tool. This also applies to the tone mapping tool.
+
+### Examples
+
+```shell
+# Renders an HLG image for a 300 cd/m² display in a 10 cd/m² room.
+$ tools/render_hlg -t 300 -s 10 ClassE_507_hlg.png ClassE_507_hlg_300.png
+
+# Renders it for a reference 1000 cd/m² display and writes the result as a PQ
+# image.
+$ tools/render_hlg -t 1000 --pq ClassE_507_hlg.png ClassE_507_hlg_pq.png
+
+# Informing pq_to_hlg about that maximum luminance then ensures proper
+# roundtripping as it will not needlessly tone map the highlights.
+$ tools/pq_to_hlg -m 1000 ClassE_507_hlg_pq.png ClassE_507_hlg_pq_hlg.png
+```
+
+## Display light to HLG
+
+By applying the inverse OOTF to a display-referred image, it is possible to
+compute the scene light, and from there the HLG signal, that would have
+produced that output on that display:
+
+```shell
+$ tools/display_to_hlg -m 600 -s 5 srgb_input.png hlg_output.png
+```
+
+This is the mathematical inverse of `tools/render_hlg`. Furthermore,
+`tools/pq_to_hlg` is equivalent to `tools/tone_map -t 1000` followed by
+`tools/display_to_hlg -m 1000`.
+
+## OpenEXR to PQ
+
+`tools/exr_to_pq` converts an OpenEXR image into a Rec. 2020 + PQ image, which
+can be saved as a PNG or PPM file. Luminance information is taken from the
+`whiteLuminance` tag if the input has it, and otherwise defaults to treating
+(1, 1, 1) as 100 cd/m². It is also possible to override this using the
+`--luminance` (`-l`) flag, in two different ways:
+
+```shell
+# Specifies that the brightest pixel in the image happens to be 1500 cd/m².
+$ tools/exr_to_pq --luminance='max=1500' input.exr output.png
+
+# Specifies that (1, 1, 1) in the input file is 203 cd/m².
+$ tools/exr_to_pq --luminance='white=203' input.exr output.png
+```
+
+# LUT generation
+
+There are additionally two tools that can be used to generate look-up tables
+for use with e.g. FFmpeg, ReShade, or DaVinci Resolve.
+
+The first of the two tools gives a starting point:
+
+```shell
+$ tools/generate_lut_template --lut_size=64 identity.ppm
+```
+
+From there, one can apply a chain of per-pixel transforms (including other
+LUTs) that the final LUT is intended to represent:
+
+```shell
+$ tools/pq_to_hlg identity.ppm pq_to_hlg.ppm
+$ tools/render_hlg -t 400 pq_to_hlg.ppm pq_to_400nit_rec2020.png
+$ convert pq_to_400nit_rec2020.png -profile /usr/share/color/icc/colord/Rec709.icc pq_to_400nit_rec709.png
+```
+
+From there, the PNG image can be used as-is with ReShade’s “LUT” shader
+(provided that the correct LUT size is set), or it can be converted to a
+[Cube](https://wwwimages2.adobe.com/content/dam/acom/en/products/speedgrade/cc/pdfs/cube-lut-specification-1.0.pdf)
+file for use in other software such as FFmpeg’s [lut3d](https://ffmpeg.org/ffmpeg-filters.html#lut3d-1)
+filter:
+
+```shell
+$ tools/texture_to_cube pq_to_400nit_rec709.png pq_to_400nit_rec709.cube
+$ ffmpeg -i pq_video.mkv -vf lut3d=pq_to_400nit_rec709.cube -colorspace bt709 -color_primaries bt709 -color_trc bt709 400nit_rec709_video.mkv
+```
+
+Note: instead of converting to a standard color space such as Rec. 709, it is
+also possible to convert to the color space of the specific display on which
+the content is to be shown, in which case the transformed content does not need
+any specific tagging and should be displayed directly without color management
+(for example using `ffplay`).
diff --git a/third-party/libjxl/libjxl/tools/hdr/display_to_hlg.cc b/third-party/libjxl/libjxl/tools/hdr/display_to_hlg.cc
new file mode 100644
index 0000000000..f9c2804cc4
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/display_to_hlg.cc
@@ -0,0 +1,91 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/hlg.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/enc_color_management.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/hdr/image_utils.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  float max_nits = 0;
+  auto max_nits_option = parser.AddOptionValue(
+      'm', "max_nits", "nits", "maximum luminance of the display", &max_nits,
+      &jpegxl::tools::ParseFloat, 0);
+  float surround_nits = 5;
+  parser.AddOptionValue(
+      's', "surround_nits", "nits",
+      "surround luminance of the viewing environment (default: 5)",
+      &surround_nits, &jpegxl::tools::ParseFloat, 0);
+  float preserve_saturation = .1f;
+  parser.AddOptionValue(
+      '\0', "preserve_saturation", "0..1",
+      "to what extent to try and preserve saturation over luminance if an "
+      "inverse gamma < 1 generates out-of-gamut colors",
+      &preserve_saturation, &jpegxl::tools::ParseFloat, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(max_nits_option)->matched()) {
+    fprintf(stderr,
+            "Missing required argument --max_nits.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  jxl::CodecInOut image;
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded),
+                              jxl::extras::ColorHints(), &image, &pool));
+  image.metadata.m.SetIntensityTarget(max_nits);
+  JXL_CHECK(jxl::HlgInverseOOTF(
+      &image.Main(), jxl::GetHlgGamma(max_nits, surround_nits), &pool));
+  JXL_CHECK(jxl::GamutMap(&image, preserve_saturation, &pool));
+  image.metadata.m.SetIntensityTarget(301);
+
+  jxl::ColorEncoding hlg;
+  hlg.SetColorSpace(jxl::ColorSpace::kRGB);
+  hlg.primaries = jxl::Primaries::k2100;
+  hlg.white_point = jxl::WhitePoint::kD65;
+  hlg.tf.SetTransferFunction(jxl::TransferFunction::kHLG);
+  JXL_CHECK(hlg.CreateICC());
+  JXL_CHECK(jpegxl::tools::TransformCodecInOutTo(image, hlg, &pool));
+  image.metadata.m.color_encoding = hlg;
+  JXL_CHECK(jxl::Encode(image, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/exr_to_pq.cc b/third-party/libjxl/libjxl/tools/hdr/exr_to_pq.cc
new file mode 100644
index 0000000000..a277871a80
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/exr_to_pq.cc
@@ -0,0 +1,159 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/hdr/image_utils.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+struct LuminanceInfo {
+  enum class Kind { kWhite, kMaximum };
+  Kind kind = Kind::kWhite;
+  float luminance = 100.f;
+};
+
+bool ParseLuminanceInfo(const char* argument, LuminanceInfo* luminance_info) {
+  if (strncmp(argument, "white=", 6) == 0) {
+    luminance_info->kind = LuminanceInfo::Kind::kWhite;
+    argument += 6;
+  } else if (strncmp(argument, "max=", 4) == 0) {
+    luminance_info->kind = LuminanceInfo::Kind::kMaximum;
+    argument += 4;
+  } else {
+    fprintf(stderr,
+            "Invalid prefix for luminance info, expected white= or max=\n");
+    return false;
+  }
+  return jpegxl::tools::ParseFloat(argument, &luminance_info->luminance);
+}
+
+}  // namespace
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  LuminanceInfo luminance_info;
+  auto luminance_option =
+      parser.AddOptionValue('l', "luminance", "<max|white=N>",
+                            "luminance information (defaults to whiteLuminance "
+                            "header if present, otherwise to white=100)",
+                            &luminance_info, &ParseLuminanceInfo, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::extras::PackedPixelFile ppf;
+  std::vector<uint8_t> input_bytes;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &input_bytes));
+  JXL_CHECK(jxl::extras::DecodeBytes(jxl::Span<const uint8_t>(input_bytes),
+                                     jxl::extras::ColorHints(), &ppf));
+
+  jxl::CodecInOut image;
+  JXL_CHECK(
+      jxl::extras::ConvertPackedPixelFileToCodecInOut(ppf, &pool, &image));
+  image.metadata.m.bit_depth.exponent_bits_per_sample = 0;
+  jxl::ColorEncoding linear_rec_2020 = image.Main().c_current();
+  linear_rec_2020.primaries = jxl::Primaries::k2100;
+  linear_rec_2020.tf.SetTransferFunction(jxl::TransferFunction::kLinear);
+  JXL_CHECK(linear_rec_2020.CreateICC());
+  JXL_CHECK(
+      jpegxl::tools::TransformCodecInOutTo(image, linear_rec_2020, &pool));
+
+  float primaries_xyz[9];
+  const jxl::PrimariesCIExy primaries = image.Main().c_current().GetPrimaries();
+  const jxl::CIExy white_point = image.Main().c_current().GetWhitePoint();
+  JXL_CHECK(jxl::PrimariesToXYZ(primaries.r.x, primaries.r.y, primaries.g.x,
+                                primaries.g.y, primaries.b.x, primaries.b.y,
+                                white_point.x, white_point.y, primaries_xyz));
+
+  float max_value = 0.f;
+  float max_relative_luminance = 0.f;
+  float white_luminance = ppf.info.intensity_target != 0 &&
+                                  !parser.GetOption(luminance_option)->matched()
+                              ? ppf.info.intensity_target
+                          : luminance_info.kind == LuminanceInfo::Kind::kWhite
+                              ? luminance_info.luminance
+                              : 0.f;
+  bool out_of_gamut = false;
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* const rows[3] = {image.Main().color()->ConstPlaneRow(0, y),
+                                  image.Main().color()->ConstPlaneRow(1, y),
+                                  image.Main().color()->ConstPlaneRow(2, y)};
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      if (!out_of_gamut &&
+          (rows[0][x] < 0 || rows[1][x] < 0 || rows[2][x] < 0)) {
+        out_of_gamut = true;
+        fprintf(stderr,
+                "WARNING: found colors outside of the Rec. 2020 gamut.\n");
+      }
+      max_value = std::max(
+          max_value, std::max(rows[0][x], std::max(rows[1][x], rows[2][x])));
+      const float luminance = primaries_xyz[1] * rows[0][x] +
+                              primaries_xyz[4] * rows[1][x] +
+                              primaries_xyz[7] * rows[2][x];
+      if (luminance_info.kind == LuminanceInfo::Kind::kMaximum &&
+          luminance > max_relative_luminance) {
+        max_relative_luminance = luminance;
+        white_luminance = luminance_info.luminance / luminance;
+      }
+    }
+  }
+  jxl::ScaleImage(1.f / max_value, image.Main().color());
+  white_luminance *= max_value;
+  image.metadata.m.SetIntensityTarget(white_luminance);
+  if (white_luminance > 10000) {
+    fprintf(stderr,
+            "WARNING: the image is too bright for PQ (would need (1, 1, 1) to "
+            "be %g cd/m^2).\n",
+            white_luminance);
+  } else {
+    fprintf(stderr,
+            "The resulting image should be compressed with "
+            "--intensity_target=%g.\n",
+            white_luminance);
+  }
+
+  jxl::ColorEncoding pq = image.Main().c_current();
+  pq.tf.SetTransferFunction(jxl::TransferFunction::kPQ);
+  JXL_CHECK(pq.CreateICC());
+  JXL_CHECK(jpegxl::tools::TransformCodecInOutTo(image, pq, &pool));
+  image.metadata.m.color_encoding = pq;
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jxl::Encode(image, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/generate_lut_template.cc b/third-party/libjxl/libjxl/tools/hdr/generate_lut_template.cc
new file mode 100644
index 0000000000..da8ecee738
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/generate_lut_template.cc
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/image_metadata.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  size_t N = 64;
+  parser.AddOptionValue('N', "lut_size", "N", "linear size of the LUT", &N,
+                        &jpegxl::tools::ParseUnsigned, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output LUT", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::Image3F image(N * N, N);
+  JXL_CHECK(jxl::RunOnPool(
+      &pool, 0, N, jxl::ThreadPool::NoInit,
+      [&](const uint32_t y, size_t /* thread */) {
+        const float g = static_cast<float>(y) / (N - 1);
+        float* const JXL_RESTRICT rows[3] = {
+            image.PlaneRow(0, y), image.PlaneRow(1, y), image.PlaneRow(2, y)};
+        for (size_t x = 0; x < N * N; ++x) {
+          rows[0][x] = static_cast<float>(x % N) / (N - 1);
+          rows[1][x] = g;
+          rows[2][x] = static_cast<float>(x / N) / (N - 1);
+        }
+      },
+      "GenerateTemplate"));
+
+  jxl::CodecInOut output;
+  output.metadata.m.bit_depth.bits_per_sample = 16;
+  output.SetFromImage(std::move(image), jxl::ColorEncoding::SRGB());
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jxl::Encode(output, jxl::ColorEncoding::SRGB(), 16, output_filename,
+                        &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/image_utils.h b/third-party/libjxl/libjxl/tools/hdr/image_utils.h
new file mode 100644
index 0000000000..0f1fba9260
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/image_utils.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_HDR_IMAGE_UTILS_H_
+#define TOOLS_HDR_IMAGE_UTILS_H_
+
+#include <jxl/cms_interface.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jpegxl {
+namespace tools {
+
+static inline jxl::Status TransformCodecInOutTo(
+    jxl::CodecInOut& io, const jxl::ColorEncoding& c_desired,
+    jxl::ThreadPool* pool) {
+  const JxlCmsInterface& cms = jxl::GetJxlCms();
+  if (io.metadata.m.have_preview) {
+    JXL_RETURN_IF_ERROR(io.preview_frame.TransformTo(c_desired, cms, pool));
+  }
+  for (jxl::ImageBundle& ib : io.frames) {
+    JXL_RETURN_IF_ERROR(ib.TransformTo(c_desired, cms, pool));
+  }
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_HDR_IMAGE_UTILS_H_
diff --git a/third-party/libjxl/libjxl/tools/hdr/local_tone_map.cc b/third-party/libjxl/libjxl/tools/hdr/local_tone_map.cc
new file mode 100644
index 0000000000..36ac346c0c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/local_tone_map.cc
@@ -0,0 +1,490 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/thread_pool_internal.h"
+
+namespace jxl {
+namespace {
+
+constexpr WeightsSeparable5 kPyramidFilter = {
+    {HWY_REP4(.375f), HWY_REP4(.25f), HWY_REP4(.0625f)},
+    {HWY_REP4(.375f), HWY_REP4(.25f), HWY_REP4(.0625f)}};
+
+// Expects sRGB input.
+// Will call consumer(x, y, contrast) for each pixel.
+template <typename Consumer>
+void Contrast(const jxl::Image3F& image, const Consumer& consumer,
+              ThreadPool* const pool) {
+  static constexpr WeightsSymmetric3 kLaplacianWeights = {
+      {HWY_REP4(-4)}, {HWY_REP4(1)}, {HWY_REP4(0)}};
+  ImageF grayscale(image.xsize(), image.ysize());
+  static constexpr float kLuminances[3] = {0.2126, 0.7152, 0.0722};
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* const JXL_RESTRICT input_rows[3] = {
+        image.PlaneRow(0, y), image.PlaneRow(1, y), image.PlaneRow(2, y)};
+    float* const JXL_RESTRICT row = grayscale.Row(y);
+
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      row[x] = LinearToSrgb8Direct(
+          kLuminances[0] * Srgb8ToLinearDirect(input_rows[0][x]) +
+          kLuminances[1] * Srgb8ToLinearDirect(input_rows[1][x]) +
+          kLuminances[2] * Srgb8ToLinearDirect(input_rows[2][x]));
+    }
+  }
+
+  ImageF laplacian(image.xsize(), image.ysize());
+  Symmetric3(grayscale, Rect(grayscale), kLaplacianWeights, pool, &laplacian);
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* const JXL_RESTRICT row = laplacian.ConstRow(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      consumer(x, y, std::abs(row[x]));
+    }
+  }
+}
+
+template <typename Consumer>
+void Saturation(const jxl::Image3F& image, const Consumer& consumer) {
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* const JXL_RESTRICT rows[3] = {
+        image.PlaneRow(0, y), image.PlaneRow(1, y), image.PlaneRow(2, y)};
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      // TODO(sboukortt): experiment with other methods of computing the
+      // saturation, e.g. C*/L* in LUV/LCh.
+      const float mean = (1.f / 3) * (rows[0][x] + rows[1][x] + rows[2][x]);
+      const float deviations[3] = {rows[0][x] - mean, rows[1][x] - mean,
+                                   rows[2][x] - mean};
+      consumer(x, y,
+               std::sqrt((1.f / 3) * (deviations[0] * deviations[0] +
+                                      deviations[1] * deviations[1] +
+                                      deviations[2] * deviations[2])));
+    }
+  }
+}
+
+template <typename Consumer>
+void MidToneness(const jxl::Image3F& image, const float sigma,
+                 const Consumer& consumer) {
+  const float inv_sigma_squared = 1.f / (sigma * sigma);
+  const auto Gaussian = [inv_sigma_squared](const float x) {
+    return std::exp(-.5f * (x - .5f) * (x - .5f) * inv_sigma_squared);
+  };
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* const JXL_RESTRICT rows[3] = {
+        image.PlaneRow(0, y), image.PlaneRow(1, y), image.PlaneRow(2, y)};
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      consumer(
+          x, y,
+          Gaussian(rows[0][x]) * Gaussian(rows[1][x]) * Gaussian(rows[2][x]));
+    }
+  }
+}
+
+ImageF ComputeWeights(const jxl::Image3F& image, const float contrast_weight,
+                      const float saturation_weight,
+                      const float midtoneness_weight,
+                      const float midtoneness_sigma, ThreadPool* const pool) {
+  ImageF log_weights(image.xsize(), image.ysize());
+  ZeroFillImage(&log_weights);
+
+  if (contrast_weight > 0) {
+    Contrast(
+        image,
+        [&log_weights, contrast_weight](const size_t x, const size_t y,
+                                        const float weight) {
+          log_weights.Row(y)[x] = contrast_weight * std::log(weight);
+        },
+        pool);
+  }
+
+  if (saturation_weight > 0) {
+    Saturation(image, [&log_weights, saturation_weight](
+                          const size_t x, const size_t y, const float weight) {
+      log_weights.Row(y)[x] += saturation_weight * std::log(weight);
+    });
+  }
+
+  if (midtoneness_weight > 0) {
+    MidToneness(image, midtoneness_sigma,
+                [&log_weights, midtoneness_weight](
+                    const size_t x, const size_t y, const float weight) {
+                  log_weights.Row(y)[x] +=
+                      midtoneness_weight * std::log(weight);
+                });
+  }
+
+  ImageF weights = std::move(log_weights);
+
+  for (size_t y = 0; y < weights.ysize(); ++y) {
+    float* const JXL_RESTRICT row = weights.Row(y);
+    for (size_t x = 0; x < weights.xsize(); ++x) {
+      row[x] = std::exp(row[x]);
+    }
+  }
+
+  return weights;
+}
+
+std::vector<ImageF> ComputeWeights(const std::vector<Image3F>& images,
+                                   const float contrast_weight,
+                                   const float saturation_weight,
+                                   const float midtoneness_weight,
+                                   const float midtoneness_sigma,
+                                   ThreadPool* const pool) {
+  std::vector<ImageF> weights;
+  weights.reserve(images.size());
+  for (const Image3F& image : images) {
+    if (image.xsize() != images.front().xsize() ||
+        image.ysize() != images.front().ysize()) {
+      return {};
+    }
+    weights.push_back(ComputeWeights(image, contrast_weight, saturation_weight,
+                                     midtoneness_weight, midtoneness_sigma,
+                                     pool));
+  }
+
+  std::vector<float*> rows(images.size());
+  for (size_t y = 0; y < images.front().ysize(); ++y) {
+    for (size_t i = 0; i < images.size(); ++i) {
+      rows[i] = weights[i].Row(y);
+    }
+    for (size_t x = 0; x < images.front().xsize(); ++x) {
+      float sum = 1e-9f;
+      for (size_t i = 0; i < images.size(); ++i) {
+        sum += rows[i][x];
+      }
+      const float ratio = 1.f / sum;
+      for (size_t i = 0; i < images.size(); ++i) {
+        rows[i][x] *= ratio;
+      }
+    }
+  }
+
+  return weights;
+}
+
+ImageF Downsample(const ImageF& image, ThreadPool* const pool) {
+  ImageF filtered(image.xsize(), image.ysize());
+  Separable5(image, Rect(image), kPyramidFilter, pool, &filtered);
+  ImageF result(DivCeil(image.xsize(), 2), DivCeil(image.ysize(), 2));
+  for (size_t y = 0; y < result.ysize(); ++y) {
+    const float* const JXL_RESTRICT filtered_row = filtered.ConstRow(2 * y);
+    float* const JXL_RESTRICT row = result.Row(y);
+    for (size_t x = 0; x < result.xsize(); ++x) {
+      row[x] = filtered_row[2 * x];
+    }
+  }
+  return result;
+}
+
+Image3F Downsample(const Image3F& image, ThreadPool* const pool) {
+  return Image3F(Downsample(image.Plane(0), pool),
+                 Downsample(image.Plane(1), pool),
+                 Downsample(image.Plane(2), pool));
+}
+
+Image3F PadImageMirror(const Image3F& in, const size_t xborder,
+                       const size_t yborder) {
+  size_t xsize = in.xsize();
+  size_t ysize = in.ysize();
+  Image3F out(xsize + 2 * xborder, ysize + 2 * yborder);
+  if (xborder > xsize || yborder > ysize) {
+    for (size_t c = 0; c < 3; c++) {
+      for (int32_t y = 0; y < static_cast<int32_t>(out.ysize()); y++) {
+        float* row_out = out.PlaneRow(c, y);
+        const float* row_in = in.PlaneRow(
+            c, Mirror(y - static_cast<int32_t>(yborder), in.ysize()));
+        for (int32_t x = 0; x < static_cast<int32_t>(out.xsize()); x++) {
+          int32_t xin = Mirror(x - static_cast<int32_t>(xborder), in.xsize());
+          row_out[x] = row_in[xin];
+        }
+      }
+    }
+    return out;
+  }
+  CopyImageTo(Rect(in), in, Rect(xborder, yborder, xsize, ysize), &out);
+  for (size_t c = 0; c < 3; c++) {
+    // Horizontal pad.
+    for (size_t y = 0; y < ysize; y++) {
+      for (size_t x = 0; x < xborder; x++) {
+        out.PlaneRow(c, y + yborder)[x] =
+            in.ConstPlaneRow(c, y)[xborder - x - 1];
+        out.PlaneRow(c, y + yborder)[x + xsize + xborder] =
+            in.ConstPlaneRow(c, y)[xsize - 1 - x];
+      }
+    }
+    // Vertical pad.
+    for (size_t y = 0; y < yborder; y++) {
+      memcpy(out.PlaneRow(c, y), out.ConstPlaneRow(c, 2 * yborder - 1 - y),
+             out.xsize() * sizeof(float));
+      memcpy(out.PlaneRow(c, y + ysize + yborder),
+             out.ConstPlaneRow(c, ysize + yborder - 1 - y),
+             out.xsize() * sizeof(float));
+    }
+  }
+  return out;
+}
+
+Image3F Upsample(const Image3F& image, const bool odd_width,
+                 const bool odd_height, ThreadPool* const pool) {
+  const Image3F padded = PadImageMirror(image, 1, 1);
+  Image3F upsampled(2 * padded.xsize(), 2 * padded.ysize());
+  ZeroFillImage(&upsampled);
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < padded.ysize(); ++y) {
+      const float* const JXL_RESTRICT padded_row = padded.ConstPlaneRow(c, y);
+      float* const JXL_RESTRICT row = upsampled.PlaneRow(c, 2 * y);
+      for (size_t x = 0; x < padded.xsize(); ++x) {
+        row[2 * x] = 4 * padded_row[x];
+      }
+    }
+  }
+  Image3F filtered(upsampled.xsize(), upsampled.ysize());
+  for (int c = 0; c < 3; ++c) {
+    Separable5(upsampled.Plane(c), Rect(upsampled), kPyramidFilter, pool,
+               &filtered.Plane(c));
+  }
+  Image3F result(2 * image.xsize() - (odd_width ? 1 : 0),
+                 2 * image.ysize() - (odd_height ? 1 : 0));
+  CopyImageTo(Rect(2, 2, result.xsize(), result.ysize()), filtered,
+              Rect(result), &result);
+  return result;
+}
+
+std::vector<ImageF> GaussianPyramid(ImageF image, int num_levels,
+                                    ThreadPool* pool) {
+  std::vector<ImageF> pyramid(num_levels);
+  for (int i = 0; i < num_levels - 1; ++i) {
+    ImageF downsampled = Downsample(image, pool);
+    pyramid[i] = std::move(image);
+    image = std::move(downsampled);
+  }
+  pyramid[num_levels - 1] = std::move(image);
+  return pyramid;
+}
+
+std::vector<Image3F> LaplacianPyramid(Image3F image, int num_levels,
+                                      ThreadPool* pool) {
+  std::vector<Image3F> pyramid(num_levels);
+  for (int i = 0; i < num_levels - 1; ++i) {
+    Image3F downsampled = Downsample(image, pool);
+    const bool odd_width = image.xsize() % 2 != 0;
+    const bool odd_height = image.ysize() % 2 != 0;
+    Subtract(image, Upsample(downsampled, odd_width, odd_height, pool), &image);
+    pyramid[i] = std::move(image);
+    image = std::move(downsampled);
+  }
+  pyramid[num_levels - 1] = std::move(image);
+  return pyramid;
+}
+
+Image3F ReconstructFromLaplacianPyramid(std::vector<Image3F> pyramid,
+                                        ThreadPool* const pool) {
+  Image3F result = std::move(pyramid.back());
+  pyramid.pop_back();
+  for (auto it = pyramid.rbegin(); it != pyramid.rend(); ++it) {
+    const bool odd_width = it->xsize() % 2 != 0;
+    const bool odd_height = it->ysize() % 2 != 0;
+    result = Upsample(result, odd_width, odd_height, pool);
+    AddTo(Rect(result), *it, &result);
+  }
+  return result;
+}
+
+// Exposure fusion algorithm as described in:
+// https://mericam.github.io/exposure_fusion/
+//
+// That is, given n images of identical size: for each pixel coordinate, one
+// weight per input image is computed, indicating how much each input image will
+// contribute to the result. There are therefore n weight maps, the sum of which
+// is 1 at every pixel.
+//
+// Those weights are then applied at various scales rather than directly at full
+// resolution. To understand how, it helps to familiarize oneself with Laplacian
+// and Gaussian pyramids, as described in "The Laplacian Pyramid as a Compact
+// Image Code" by P. Burt and E. Adelson:
+// http://persci.mit.edu/pub_pdfs/pyramid83.pdf
+//
+// A Gaussian pyramid of k levels is a sequence of k images in which the first
+// image is the original image and each following level is a low-pass-filtered
+// version of the previous one. A Laplacian pyramid is obtained from a Gaussian
+// pyramid by:
+//
+//   laplacian_pyramid[i] = gaussian_pyramid[i] − gaussian_pyramid[i + 1].
+//   (The last item of the Laplacian pyramid is just the last one from the
+//    Gaussian pyramid without subtraction.)
+//
+// From there, the original image can be reconstructed by adding all the images
+// from the Laplacian pyramid together. (If desired, the Gaussian pyramid can be
+// reconstructed as well by storing the cumulative sums starting from the end.)
+//
+// Having established that, the application of the weight images is done by
+// constructing a Laplacian pyramid for each input image, as well as a Gaussian
+// pyramid for each weight image, and then constructing a Laplacian pyramid such
+// that:
+//
+//   pyramid[i] = sum(laplacian_pyramids[j][i] .* weight_gaussian_pyramids[j][i]
+//                      for j in 1..n)
+//
+// And then reconstructing an image from the pyramid thus obtained.
+Image3F ExposureFusion(std::vector<Image3F> images, int num_levels,
+                       const float contrast_weight,
+                       const float saturation_weight,
+                       const float midtoneness_weight,
+                       const float midtoneness_sigma, ThreadPool* const pool) {
+  std::vector<ImageF> weights =
+      ComputeWeights(images, contrast_weight, saturation_weight,
+                     midtoneness_weight, midtoneness_sigma, pool);
+
+  std::vector<Image3F> pyramid(num_levels);
+  for (size_t i = 0; i < images.size(); ++i) {
+    const std::vector<ImageF> weight_pyramid =
+        GaussianPyramid(std::move(weights[i]), num_levels, pool);
+    const std::vector<Image3F> image_pyramid =
+        LaplacianPyramid(std::move(images[i]), num_levels, pool);
+
+    for (int k = 0; k < num_levels; ++k) {
+      Image3F product(Product(weight_pyramid[k], image_pyramid[k].Plane(0)),
+                      Product(weight_pyramid[k], image_pyramid[k].Plane(1)),
+                      Product(weight_pyramid[k], image_pyramid[k].Plane(2)));
+      if (pyramid[k].xsize() == 0) {
+        pyramid[k] = std::move(product);
+      } else {
+        AddTo(Rect(product), product, &pyramid[k]);
+      }
+    }
+  }
+
+  return ReconstructFromLaplacianPyramid(std::move(pyramid), pool);
+}
+
+}  // namespace
+}  // namespace jxl
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  float max_nits = 0;
+  parser.AddOptionValue('m', "max_nits", "nits",
+                        "maximum luminance in the image", &max_nits,
+                        &jpegxl::tools::ParseFloat, 0);
+  float preserve_saturation = .1f;
+  parser.AddOptionValue(
+      's', "preserve_saturation", "0..1",
+      "to what extent to try and preserve saturation over luminance",
+      &preserve_saturation, &jpegxl::tools::ParseFloat, 0);
+  int64_t num_levels = -1;
+  parser.AddOptionValue('l', "num_levels", "1..",
+                        "number of levels in the pyramid", &num_levels,
+                        &jpegxl::tools::ParseInt64, 0);
+  float contrast_weight = 0.f;
+  parser.AddOptionValue('c', "contrast_weight", "0..",
+                        "importance of contrast when computing weights",
+                        &contrast_weight, &jpegxl::tools::ParseFloat, 0);
+  float saturation_weight = .2f;
+  parser.AddOptionValue('a', "saturation_weight", "0..",
+                        "importance of saturation when computing weights",
+                        &saturation_weight, &jpegxl::tools::ParseFloat, 0);
+  float midtoneness_weight = 1.f;
+  parser.AddOptionValue('t', "midtoneness_weight", "0..",
+                        "importance of \"midtoneness\" when computing weights",
+                        &midtoneness_weight, &jpegxl::tools::ParseFloat, 0);
+  float midtoneness_sigma = .2f;
+  parser.AddOptionValue('g', "midtoneness_sigma", "0..",
+                        "spread of the function that computes midtoneness",
+                        &midtoneness_sigma, &jpegxl::tools::ParseFloat, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::CodecInOut image;
+  jxl::extras::ColorHints color_hints;
+  color_hints.Add("color_space", "RGB_D65_202_Rel_PeQ");
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), color_hints,
+                              &image, &pool));
+
+  if (max_nits > 0) {
+    image.metadata.m.SetIntensityTarget(max_nits);
+  } else {
+    max_nits = image.metadata.m.IntensityTarget();
+  }
+
+  std::vector<jxl::Image3F> input_images;
+
+  if (max_nits <= 4 * jxl::kDefaultIntensityTarget) {
+    jxl::CodecInOut sRGB_image;
+    jxl::Image3F color(image.xsize(), image.ysize());
+    CopyImageTo(*image.Main().color(), &color);
+    sRGB_image.SetFromImage(std::move(color), image.Main().c_current());
+    JXL_CHECK(sRGB_image.Main().TransformTo(jxl::ColorEncoding::SRGB(),
+                                            jxl::GetJxlCms(), &pool));
+    input_images.push_back(std::move(*sRGB_image.Main().color()));
+  }
+
+  for (int i = 0; i < 4; ++i) {
+    const float target = std::ldexp(jxl::kDefaultIntensityTarget, 2 - i);
+    if (target >= max_nits) continue;
+    jxl::CodecInOut tone_mapped_image;
+    jxl::Image3F color(image.xsize(), image.ysize());
+    CopyImageTo(*image.Main().color(), &color);
+    tone_mapped_image.SetFromImage(std::move(color), image.Main().c_current());
+    tone_mapped_image.metadata.m.SetIntensityTarget(
+        image.metadata.m.IntensityTarget());
+    JXL_CHECK(jxl::ToneMapTo({0, target}, &tone_mapped_image, &pool));
+    JXL_CHECK(jxl::GamutMap(&tone_mapped_image, preserve_saturation, &pool));
+    JXL_CHECK(tone_mapped_image.Main().TransformTo(jxl::ColorEncoding::SRGB(),
+                                                   jxl::GetJxlCms(), &pool));
+    input_images.push_back(std::move(*tone_mapped_image.Main().color()));
+  }
+
+  if (num_levels < 1) {
+    num_levels = jxl::FloorLog2Nonzero(std::min(image.xsize(), image.ysize()));
+  }
+
+  jxl::Image3F fused = jxl::ExposureFusion(
+      std::move(input_images), num_levels, contrast_weight, saturation_weight,
+      midtoneness_weight, midtoneness_sigma, &pool);
+
+  jxl::CodecInOut output;
+  output.SetFromImage(std::move(fused), jxl::ColorEncoding::SRGB());
+
+  JXL_CHECK(jxl::Encode(output, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/pq_to_hlg.cc b/third-party/libjxl/libjxl/tools/hdr/pq_to_hlg.cc
new file mode 100644
index 0000000000..ddd5578aed
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/pq_to_hlg.cc
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/hlg.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/enc_color_management.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/hdr/image_utils.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  float max_nits = 0;
+  parser.AddOptionValue('m', "max_nits", "nits",
+                        "maximum luminance in the image", &max_nits,
+                        &jpegxl::tools::ParseFloat, 0);
+  float preserve_saturation = .1f;
+  parser.AddOptionValue(
+      's', "preserve_saturation", "0..1",
+      "to what extent to try and preserve saturation over luminance",
+      &preserve_saturation, &jpegxl::tools::ParseFloat, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::CodecInOut image;
+  jxl::extras::ColorHints color_hints;
+  color_hints.Add("color_space", "RGB_D65_202_Rel_PeQ");
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), color_hints,
+                              &image, &pool));
+  if (max_nits > 0) {
+    image.metadata.m.SetIntensityTarget(max_nits);
+  }
+  const jxl::Primaries original_primaries = image.Main().c_current().primaries;
+  JXL_CHECK(jxl::ToneMapTo({0, 1000}, &image, &pool));
+  JXL_CHECK(jxl::HlgInverseOOTF(&image.Main(), 1.2f, &pool));
+  JXL_CHECK(jxl::GamutMap(&image, preserve_saturation, &pool));
+  // Peak luminance at which the system gamma is 1, since we are now in scene
+  // light, having applied the inverse OOTF ourselves to control the subsequent
+  // gamut mapping instead of leaving it to JxlCms below.
+  image.metadata.m.SetIntensityTarget(301);
+
+  jxl::ColorEncoding hlg;
+  hlg.SetColorSpace(jxl::ColorSpace::kRGB);
+  hlg.primaries = original_primaries;
+  hlg.white_point = jxl::WhitePoint::kD65;
+  hlg.tf.SetTransferFunction(jxl::TransferFunction::kHLG);
+  JXL_CHECK(hlg.CreateICC());
+  JXL_CHECK(jpegxl::tools::TransformCodecInOutTo(image, hlg, &pool));
+  image.metadata.m.color_encoding = hlg;
+  JXL_CHECK(jxl::Encode(image, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/render_hlg.cc b/third-party/libjxl/libjxl/tools/hdr/render_hlg.cc
new file mode 100644
index 0000000000..4e45accd63
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/render_hlg.cc
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/hlg.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/enc_color_management.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/hdr/image_utils.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  float target_nits = 0;
+  auto target_nits_option = parser.AddOptionValue(
+      't', "target_nits", "nits", "peak luminance of the target display",
+      &target_nits, &jpegxl::tools::ParseFloat, 0);
+  float surround_nits = 5;
+  parser.AddOptionValue(
+      's', "surround_nits", "nits",
+      "surround luminance of the viewing environment (default: 5)",
+      &surround_nits, &jpegxl::tools::ParseFloat, 0);
+  float preserve_saturation = .1f;
+  parser.AddOptionValue(
+      '\0', "preserve_saturation", "0..1",
+      "to what extent to try and preserve saturation over luminance if a gamma "
+      "< 1 generates out-of-gamut colors",
+      &preserve_saturation, &jpegxl::tools::ParseFloat, 0);
+  bool pq = false;
+  parser.AddOptionFlag('p', "pq",
+                       "write the output with absolute luminance using PQ", &pq,
+                       &jpegxl::tools::SetBooleanTrue, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(target_nits_option)->matched()) {
+    fprintf(stderr,
+            "Missing required argument --target_nits.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::CodecInOut image;
+  jxl::extras::ColorHints color_hints;
+  color_hints.Add("color_space", "RGB_D65_202_Rel_HLG");
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), color_hints,
+                              &image, &pool));
+  // Ensures that conversions to linear by JxlCms will not apply the OOTF as we
+  // apply it ourselves to control the subsequent gamut mapping.
+  image.metadata.m.SetIntensityTarget(301);
+  const float gamma = jxl::GetHlgGamma(target_nits, surround_nits);
+  fprintf(stderr, "Using a system gamma of %g\n", gamma);
+  JXL_CHECK(jxl::HlgOOTF(&image.Main(), gamma, &pool));
+  JXL_CHECK(jxl::GamutMap(&image, preserve_saturation, &pool));
+  image.metadata.m.SetIntensityTarget(target_nits);
+
+  jxl::ColorEncoding c_out = image.metadata.m.color_encoding;
+  if (pq) {
+    c_out.tf.SetTransferFunction(jxl::TransferFunction::kPQ);
+  } else {
+    c_out.tf.SetTransferFunction(jxl::TransferFunction::kSRGB);
+  }
+  JXL_CHECK(c_out.CreateICC());
+  JXL_CHECK(jpegxl::tools::TransformCodecInOutTo(image, c_out, &pool));
+  image.metadata.m.color_encoding = c_out;
+  JXL_CHECK(jxl::Encode(image, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/texture_to_cube.cc b/third-party/libjxl/libjxl/tools/hdr/texture_to_cube.cc
new file mode 100644
index 0000000000..53a56c5daf
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/texture_to_cube.cc
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output Cube LUT", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::CodecInOut image;
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded),
+                              jxl::extras::ColorHints(), &image, &pool));
+
+  JXL_CHECK(image.xsize() == image.ysize() * image.ysize());
+  const unsigned N = image.ysize();
+
+  FILE* const output = fopen(output_filename, "wb");
+  JXL_CHECK(output);
+
+  fprintf(output, "# Created by libjxl\n");
+  fprintf(output, "LUT_3D_SIZE %u\n", N);
+  fprintf(output, "DOMAIN_MIN 0.0 0.0 0.0\nDOMAIN_MAX 1.0 1.0 1.0\n\n");
+
+  for (size_t b = 0; b < N; ++b) {
+    for (size_t g = 0; g < N; ++g) {
+      const size_t y = g;
+      const float* const JXL_RESTRICT rows[3] = {
+          image.Main().color()->ConstPlaneRow(0, y) + N * b,
+          image.Main().color()->ConstPlaneRow(1, y) + N * b,
+          image.Main().color()->ConstPlaneRow(2, y) + N * b};
+      for (size_t r = 0; r < N; ++r) {
+        const size_t x = r;
+        fprintf(output, "%.6f %.6f %.6f\n", rows[0][x], rows[1][x], rows[2][x]);
+      }
+    }
+  }
+}
diff --git a/third-party/libjxl/libjxl/tools/hdr/tone_map.cc b/third-party/libjxl/libjxl/tools/hdr/tone_map.cc
new file mode 100644
index 0000000000..781f355cc4
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/hdr/tone_map.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/enc_color_management.h"
+#include "tools/args.h"
+#include "tools/cmdline.h"
+#include "tools/file_io.h"
+#include "tools/hdr/image_utils.h"
+#include "tools/thread_pool_internal.h"
+
+int main(int argc, const char** argv) {
+  jpegxl::tools::ThreadPoolInternal pool;
+
+  jpegxl::tools::CommandLineParser parser;
+  float max_nits = 0;
+  parser.AddOptionValue('m', "max_nits", "nits",
+                        "maximum luminance in the image", &max_nits,
+                        &jpegxl::tools::ParseFloat, 0);
+  float target_nits = 0;
+  auto target_nits_option = parser.AddOptionValue(
+      't', "target_nits", "nits",
+      "peak luminance of the display for which to tone map", &target_nits,
+      &jpegxl::tools::ParseFloat, 0);
+  float preserve_saturation = .1f;
+  parser.AddOptionValue(
+      's', "preserve_saturation", "0..1",
+      "to what extent to try and preserve saturation over luminance",
+      &preserve_saturation, &jpegxl::tools::ParseFloat, 0);
+  bool pq = false;
+  parser.AddOptionFlag('p', "pq",
+                       "write the output with absolute luminance using PQ", &pq,
+                       &jpegxl::tools::SetBooleanTrue, 0);
+  const char* input_filename = nullptr;
+  auto input_filename_option = parser.AddPositionalOption(
+      "input", true, "input image", &input_filename, 0);
+  const char* output_filename = nullptr;
+  auto output_filename_option = parser.AddPositionalOption(
+      "output", true, "output image", &output_filename, 0);
+
+  if (!parser.Parse(argc, argv)) {
+    fprintf(stderr, "See -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  if (parser.HelpFlagPassed()) {
+    parser.PrintHelp();
+    return EXIT_SUCCESS;
+  }
+
+  if (!parser.GetOption(target_nits_option)->matched()) {
+    fprintf(stderr,
+            "Missing required argument --target_nits.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(input_filename_option)->matched()) {
+    fprintf(stderr, "Missing input filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+  if (!parser.GetOption(output_filename_option)->matched()) {
+    fprintf(stderr, "Missing output filename.\nSee -h for help.\n");
+    return EXIT_FAILURE;
+  }
+
+  jxl::CodecInOut image;
+  jxl::extras::ColorHints color_hints;
+  color_hints.Add("color_space", "RGB_D65_202_Rel_PeQ");
+  std::vector<uint8_t> encoded;
+  JXL_CHECK(jpegxl::tools::ReadFile(input_filename, &encoded));
+  JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded), color_hints,
+                              &image, &pool));
+  if (max_nits > 0) {
+    image.metadata.m.SetIntensityTarget(max_nits);
+  }
+  JXL_CHECK(jxl::ToneMapTo({0, target_nits}, &image, &pool));
+  JXL_CHECK(jxl::GamutMap(&image, preserve_saturation, &pool));
+
+  jxl::ColorEncoding c_out = image.metadata.m.color_encoding;
+  if (pq) {
+    c_out.tf.SetTransferFunction(jxl::TransferFunction::kPQ);
+  } else {
+    c_out.tf.SetTransferFunction(jxl::TransferFunction::kSRGB);
+  }
+
+  if (jxl::extras::CodecFromPath(output_filename) == jxl::extras::Codec::kEXR) {
+    c_out.tf.SetTransferFunction(jxl::TransferFunction::kLinear);
+    image.metadata.m.SetFloat16Samples();
+  }
+
+  JXL_CHECK(c_out.CreateICC());
+  JXL_CHECK(jpegxl::tools::TransformCodecInOutTo(image, c_out, &pool));
+  image.metadata.m.color_encoding = c_out;
+  JXL_CHECK(jxl::Encode(image, output_filename, &encoded, &pool));
+  JXL_CHECK(jpegxl::tools::WriteFile(output_filename, encoded));
+}
diff --git a/third-party/libjxl/libjxl/tools/icc_codec_fuzzer.cc b/third-party/libjxl/libjxl/tools/icc_codec_fuzzer.cc
new file mode 100644
index 0000000000..91ed8596f6
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/icc_codec_fuzzer.cc
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/icc_codec.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::PaddedBytes;
+
+#ifdef JXL_ICC_FUZZER_SLOW_TEST
+using ::jxl::BitReader;
+using ::jxl::Span;
+#endif
+
+int TestOneInput(const uint8_t* data, size_t size) {
+#if defined(JXL_ICC_FUZZER_ONLY_WRITE)
+  bool read = false;
+#elif defined(JXL_ICC_FUZZER_ONLY_READ)
+  bool read = true;
+#else
+  // Decide whether to test the reader or the writer (both use parsing)
+  if (!size) return 0;
+  bool read = data[0] == 0;
+  data++;
+  size--;
+#endif
+
+#ifdef JXL_ICC_FUZZER_SLOW_TEST
+  // Including JPEG XL LZ77 and ANS compression. These are already fuzzed
+  // separately, so it is better to disable JXL_ICC_FUZZER_SLOW_TEST to focus on
+  // the ICC parsing.
+  if (read) {
+    // Reading parses the compressed format.
+    BitReader br(Span<const uint8_t>(data, size));
+    PaddedBytes result;
+    (void)jxl::ReadICC(&br, &result);
+    (void)br.Close();
+  } else {
+    // Writing parses the original ICC profile.
+    PaddedBytes icc;
+    icc.assign(data, data + size);
+    BitWriter writer;
+    // Writing should support any random bytestream so must succeed, make
+    // fuzzer fail if not.
+    JXL_ASSERT(jxl::WriteICC(icc, &writer, 0, nullptr));
+  }
+#else  // JXL_ICC_FUZZER_SLOW_TEST
+  if (read) {
+    // Reading (unpredicting) parses the compressed format.
+    PaddedBytes result;
+    (void)jxl::UnpredictICC(data, size, &result);
+  } else {
+    // Writing (predicting) parses the original ICC profile.
+    PaddedBytes result;
+    // Writing should support any random bytestream so must succeed, make
+    // fuzzer fail if not.
+    JXL_ASSERT(jxl::PredictICC(data, size, &result));
+    PaddedBytes reconstructed;
+    JXL_ASSERT(jxl::UnpredictICC(result.data(), result.size(), &reconstructed));
+    JXL_ASSERT(reconstructed.size() == size);
+    JXL_ASSERT(memcmp(data, reconstructed.data(), size) == 0);
+  }
+#endif  // JXL_ICC_FUZZER_SLOW_TEST
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/icc_detect/icc_detect.h b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect.h
new file mode 100644
index 0000000000..deca6d7d19
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_ICC_DETECT_ICC_DETECT_H_
+#define TOOLS_ICC_DETECT_ICC_DETECT_H_
+
+#include <QByteArray>
+#include <QWidget>
+
+namespace jpegxl {
+namespace tools {
+
+// Should be cached if possible.
+QByteArray GetMonitorIccProfile(const QWidget* widget);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_ICC_DETECT_ICC_DETECT_H_
diff --git a/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_empty.cc b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_empty.cc
new file mode 100644
index 0000000000..421ac50e93
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_empty.cc
@@ -0,0 +1,16 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/icc_detect/icc_detect.h"
+
+namespace jpegxl {
+namespace tools {
+
+QByteArray GetMonitorIccProfile(const QWidget* const /*widget*/) {
+  return QByteArray();
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_win32.cc b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_win32.cc
new file mode 100644
index 0000000000..f06e688518
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_win32.cc
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/icc_detect/icc_detect.h"
+
+#include <windows.h>
+
+#include <memory>
+#include <type_traits>
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+struct HandleDeleter {
+  void operator()(const HANDLE handle) const {
+    if (handle != INVALID_HANDLE_VALUE) {
+      CloseHandle(handle);
+    }
+  }
+};
+using HandleUniquePtr =
+    std::unique_ptr<std::remove_pointer<HANDLE>::type, HandleDeleter>;
+
+}  // namespace
+
+QByteArray GetMonitorIccProfile(const QWidget* const widget) {
+  const HWND window = reinterpret_cast<HWND>(widget->effectiveWinId());
+  const HDC dc = GetDC(window);
+  wchar_t profile_path[MAX_PATH];
+  DWORD profile_path_size = MAX_PATH;
+  if (!GetICMProfileW(dc, &profile_path_size, profile_path)) {
+    ReleaseDC(window, dc);
+    return QByteArray();
+  }
+  ReleaseDC(window, dc);
+  HandleUniquePtr file(CreateFileW(profile_path, GENERIC_READ, FILE_SHARE_READ,
+                                   nullptr, OPEN_EXISTING,
+                                   FILE_FLAG_SEQUENTIAL_SCAN, nullptr));
+  if (file.get() == INVALID_HANDLE_VALUE) {
+    return QByteArray();
+  }
+  LARGE_INTEGER profile_size;
+  if (!GetFileSizeEx(file.get(), &profile_size)) {
+    return QByteArray();
+  }
+  HandleUniquePtr mapping(
+      CreateFileMappingW(file.get(), nullptr, PAGE_READONLY, 0, 0, nullptr));
+  if (mapping == nullptr) {
+    return QByteArray();
+  }
+  const char* const view = reinterpret_cast<const char*>(
+      MapViewOfFile(mapping.get(), FILE_MAP_READ, 0, 0, 0));
+  if (view == nullptr) {
+    return QByteArray();
+  }
+  QByteArray profile(view, profile_size.QuadPart);
+  UnmapViewOfFile(view);
+  return profile;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_x11.cc b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_x11.cc
new file mode 100644
index 0000000000..e67b30ebf1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/icc_detect/icc_detect_x11.cc
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// clang-format off
+#include "tools/icc_detect/icc_detect.h"
+// clang-format on
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <xcb/xcb.h>
+
+#include <memory>
+
+// clang-format off
+#include <QApplication>
+#include <X11/Xlib.h>
+// clang-format on
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+constexpr char kIccProfileAtomName[] = "_ICC_PROFILE";
+constexpr uint32_t kMaxIccProfileSize = 1 << 24;
+
+struct FreeDeleter {
+  void operator()(void* const p) const { std::free(p); }
+};
+template <typename T>
+using XcbUniquePtr = std::unique_ptr<T, FreeDeleter>;
+
+}  // namespace
+
+QByteArray GetMonitorIccProfile(const QWidget* const widget) {
+  Q_UNUSED(widget)
+  auto* const qX11App =
+      qGuiApp->nativeInterface<QNativeInterface::QX11Application>();
+  if (qX11App == nullptr) {
+    return QByteArray();
+  }
+  xcb_connection_t* const connection = qX11App->connection();
+  if (connection == nullptr) {
+    return QByteArray();
+  }
+
+  const int screenNumber = DefaultScreen(qX11App->display());
+
+  const xcb_intern_atom_cookie_t atomRequest =
+      xcb_intern_atom(connection, /*only_if_exists=*/1,
+                      sizeof kIccProfileAtomName - 1, kIccProfileAtomName);
+  const XcbUniquePtr<xcb_intern_atom_reply_t> atomReply(
+      xcb_intern_atom_reply(connection, atomRequest, nullptr));
+  if (atomReply == nullptr) {
+    return QByteArray();
+  }
+  const xcb_atom_t iccProfileAtom = atomReply->atom;
+
+  const xcb_screen_t* screen = nullptr;
+  int i = 0;
+  for (xcb_screen_iterator_t it =
+           xcb_setup_roots_iterator(xcb_get_setup(connection));
+       it.rem; xcb_screen_next(&it)) {
+    if (i == screenNumber) {
+      screen = it.data;
+      break;
+    }
+    ++i;
+  }
+  if (screen == nullptr) {
+    return QByteArray();
+  }
+  const xcb_get_property_cookie_t profileRequest = xcb_get_property(
+      connection, /*_delete=*/0, screen->root, iccProfileAtom,
+      XCB_GET_PROPERTY_TYPE_ANY, /*long_offset=*/0, kMaxIccProfileSize);
+  const XcbUniquePtr<xcb_get_property_reply_t> profile(
+      xcb_get_property_reply(connection, profileRequest, nullptr));
+  if (profile == nullptr || profile->bytes_after > 0) {
+    return QByteArray();
+  }
+
+  return QByteArray(
+      reinterpret_cast<const char*>(xcb_get_property_value(profile.get())),
+      xcb_get_property_value_length(profile.get()));
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Decoder.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Decoder.java
new file mode 100644
index 0000000000..7bdd6a7e2e
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Decoder.java
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+
+/** JPEG XL JNI decoder wrapper. */
+public class Decoder {
+  /** Utility library, disable object construction. */
+  private Decoder() {}
+
+  /** One-shot decoding. */
+  public static ImageData decode(Buffer data, PixelFormat pixelFormat) {
+    StreamInfo basicInfo = DecoderJni.getBasicInfo(data, pixelFormat);
+    if (basicInfo.status != Status.OK) {
+      throw new IllegalStateException("Decoding failed");
+    }
+    if (basicInfo.width < 0 || basicInfo.height < 0 || basicInfo.pixelsSize < 0
+        || basicInfo.iccSize < 0) {
+      throw new IllegalStateException("JNI has returned negative size");
+    }
+    Buffer pixels = ByteBuffer.allocateDirect(basicInfo.pixelsSize);
+    Buffer icc = ByteBuffer.allocateDirect(basicInfo.iccSize);
+    Status status = DecoderJni.getPixels(data, pixels, icc, pixelFormat);
+    if (status != Status.OK) {
+      throw new IllegalStateException("Decoding failed");
+    }
+    return new ImageData(basicInfo.width, basicInfo.height, pixels, icc, pixelFormat);
+  }
+
+  public static StreamInfo decodeInfo(byte[] data) {
+    return decodeInfo(ByteBuffer.wrap(data));
+  }
+
+  public static StreamInfo decodeInfo(byte[] data, int offset, int length) {
+    return decodeInfo(ByteBuffer.wrap(data, offset, length));
+  }
+
+  public static StreamInfo decodeInfo(Buffer data) {
+    return DecoderJni.getBasicInfo(data, null);
+  }
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderJni.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderJni.java
new file mode 100644
index 0000000000..7a2f2bf7ed
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderJni.java
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+import java.nio.Buffer;
+
+/**
+ * Low level JNI wrapper.
+ *
+ * This class is package-private, should be only be used by high level wrapper.
+ */
+class DecoderJni {
+  private static native void nativeGetBasicInfo(int[] context, Buffer data);
+  private static native void nativeGetPixels(int[] context, Buffer data, Buffer pixels, Buffer icc);
+
+  static Status makeStatus(int statusCode) {
+    switch (statusCode) {
+      case 0:
+        return Status.OK;
+      case -1:
+        return Status.INVALID_STREAM;
+      case 1:
+        return Status.NOT_ENOUGH_INPUT;
+      default:
+        throw new IllegalStateException("Unknown status code");
+    }
+  }
+
+  static StreamInfo makeStreamInfo(int[] context) {
+    StreamInfo result = new StreamInfo();
+    result.status = makeStatus(context[0]);
+    result.width = context[1];
+    result.height = context[2];
+    result.pixelsSize = context[3];
+    result.iccSize = context[4];
+    result.alphaBits = context[5];
+    return result;
+  }
+
+  /** Decode stream information. */
+  static StreamInfo getBasicInfo(Buffer data, PixelFormat pixelFormat) {
+    if (!data.isDirect()) {
+      throw new IllegalArgumentException("data must be direct buffer");
+    }
+    int[] context = new int[6];
+    context[0] = (pixelFormat == null) ? -1 : pixelFormat.ordinal();
+    nativeGetBasicInfo(context, data);
+    return makeStreamInfo(context);
+  }
+
+  /** One-shot decoding. */
+  static Status getPixels(Buffer data, Buffer pixels, Buffer icc, PixelFormat pixelFormat) {
+    if (!data.isDirect()) {
+      throw new IllegalArgumentException("data must be direct buffer");
+    }
+    if (!pixels.isDirect()) {
+      throw new IllegalArgumentException("pixels must be direct buffer");
+    }
+    if (!icc.isDirect()) {
+      throw new IllegalArgumentException("icc must be direct buffer");
+    }
+    int[] context = new int[1];
+    context[0] = pixelFormat.ordinal();
+    nativeGetPixels(context, data, pixels, icc);
+    return makeStatus(context[0]);
+  }
+
+  /** Utility library, disable object construction. */
+  private DecoderJni() {}
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderTest.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderTest.java
new file mode 100644
index 0000000000..44f038c789
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/DecoderTest.java
@@ -0,0 +1,127 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+import java.nio.ByteBuffer;
+
+public class DecoderTest {
+  static {
+    String jniLibrary = System.getProperty("org.jpeg.jpegxl.wrapper.lib");
+    if (jniLibrary != null) {
+      try {
+        System.load(new java.io.File(jniLibrary).getAbsolutePath());
+      } catch (UnsatisfiedLinkError ex) {
+        String message =
+            "If the nested exception message says that some standard library (stdc++, tcmalloc, etc.) was not found, "
+            + "it is likely that JDK discovered by the build system overrides library search path. "
+            + "Try specifying a different JDK via JAVA_HOME environment variable and doing a clean build.";
+        throw new RuntimeException(message, ex);
+      }
+    }
+  }
+
+  private static final int SIMPLE_IMAGE_DIM = 1024;
+  // Base64: "/wr6H0GRCAYBAGAASzgkunkeVbaSBu95EXDn0e7ABz2ShAMA"
+  private static final byte[] SIMPLE_IMAGE_BYTES = {-1, 10, -6, 31, 65, -111, 8, 6, 1, 0, 96, 0, 75,
+      56, 36, -70, 121, 30, 85, -74, -110, 6, -17, 121, 17, 112, -25, -47, -18, -64, 7, 61, -110,
+      -124, 3, 0};
+
+  private static final int PIXEL_IMAGE_DIM = 1;
+  // Base64: "/woAELASCBAQABwASxLFgoUkDA=="
+  private static final byte[] PIXEL_IMAGE_BYTES = {
+      -1, 10, 0, 16, -80, 18, 8, 16, 16, 0, 28, 0, 75, 18, -59, -126, -123, 36, 12};
+
+  static ByteBuffer makeByteBuffer(byte[] src, int length) {
+    ByteBuffer buffer = ByteBuffer.allocateDirect(length);
+    buffer.put(src, 0, length);
+    return buffer;
+  }
+
+  static ByteBuffer makeSimpleImage() {
+    return makeByteBuffer(SIMPLE_IMAGE_BYTES, SIMPLE_IMAGE_BYTES.length);
+  }
+
+  static void checkSimpleImageData(ImageData imageData) {
+    if (imageData.width != SIMPLE_IMAGE_DIM) {
+      throw new IllegalStateException("invalid width");
+    }
+    if (imageData.height != SIMPLE_IMAGE_DIM) {
+      throw new IllegalStateException("invalid height");
+    }
+    int iccSize = imageData.icc.capacity();
+    // Do not expect ICC profile to be some exact size; currently it is 732
+    if (iccSize < 300 || iccSize > 1000) {
+      throw new IllegalStateException("unexpected ICC profile size");
+    }
+  }
+
+  static void checkPixelFormat(PixelFormat pixelFormat, int bytesPerPixel) {
+    ImageData imageData = Decoder.decode(makeSimpleImage(), pixelFormat);
+    checkSimpleImageData(imageData);
+    if (imageData.pixels.limit() != SIMPLE_IMAGE_DIM * SIMPLE_IMAGE_DIM * bytesPerPixel) {
+      throw new IllegalStateException("Unexpected pixels size");
+    }
+  }
+
+  static void testRgba() {
+    checkPixelFormat(PixelFormat.RGBA_8888, 4);
+  }
+
+  static void testRgbaF16() {
+    checkPixelFormat(PixelFormat.RGBA_F16, 8);
+  }
+
+  static void testRgb() {
+    checkPixelFormat(PixelFormat.RGB_888, 3);
+  }
+
+  static void testRgbF16() {
+    checkPixelFormat(PixelFormat.RGB_F16, 6);
+  }
+
+  static void checkGetInfo(ByteBuffer data, int dim, int alphaBits) {
+    StreamInfo streamInfo = Decoder.decodeInfo(data);
+    if (streamInfo.status != Status.OK) {
+      throw new IllegalStateException("Unexpected decoding error");
+    }
+    if (streamInfo.width != dim || streamInfo.height != dim) {
+      throw new IllegalStateException("Invalid width / height");
+    }
+    if (streamInfo.alphaBits != alphaBits) {
+      throw new IllegalStateException("Invalid alphaBits");
+    }
+  }
+
+  static void testGetInfoNoAlpha() {
+    checkGetInfo(makeSimpleImage(), SIMPLE_IMAGE_DIM, 0);
+  }
+
+  static void testGetInfoAlpha() {
+    checkGetInfo(makeByteBuffer(PIXEL_IMAGE_BYTES, PIXEL_IMAGE_BYTES.length), PIXEL_IMAGE_DIM, 8);
+  }
+
+  static void testNotEnoughInput() {
+    for (int i = 0; i < 6; ++i) {
+      ByteBuffer jxlData = makeByteBuffer(SIMPLE_IMAGE_BYTES, i);
+      StreamInfo streamInfo = Decoder.decodeInfo(jxlData);
+      if (streamInfo.status != Status.NOT_ENOUGH_INPUT) {
+        throw new IllegalStateException(
+            "Expected 'not enough input', but got " + streamInfo.status + " " + i);
+      }
+    }
+  }
+
+  // Simple executable to avoid extra dependencies.
+  public static void main(String[] args) {
+    testRgba();
+    testRgbaF16();
+    testRgb();
+    testRgbF16();
+    testGetInfoNoAlpha();
+    testGetInfoAlpha();
+    testNotEnoughInput();
+  }
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/ImageData.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/ImageData.java
new file mode 100644
index 0000000000..a449529a5a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/ImageData.java
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+import java.nio.Buffer;
+
+/** POJO that contains necessary image data (dimensions, pixels,...). */
+public class ImageData {
+  final int width;
+  final int height;
+  final Buffer pixels;
+  final Buffer icc;
+  final PixelFormat pixelFormat;
+
+  ImageData(int width, int height, Buffer pixels, Buffer icc, PixelFormat pixelFormat) {
+    this.width = width;
+    this.height = height;
+    this.pixels = pixels;
+    this.icc = icc;
+    this.pixelFormat = pixelFormat;
+  }
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/PixelFormat.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/PixelFormat.java
new file mode 100644
index 0000000000..5df1225740
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/PixelFormat.java
@@ -0,0 +1,13 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+public enum PixelFormat {
+  RGBA_8888, // 0
+  RGBA_F16, // 1
+  RGB_888, // 2
+  RGB_F16 // 3
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Status.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Status.java
new file mode 100644
index 0000000000..a87206a166
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/Status.java
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+public enum Status {
+  /** Operation was successful. */
+  OK,
+
+  /** So far stream was valid, but incomplete. */
+  NOT_ENOUGH_INPUT,
+
+  /** Stream is corrupted. */
+  INVALID_STREAM
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/StreamInfo.java b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/StreamInfo.java
new file mode 100644
index 0000000000..2419b37f23
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/StreamInfo.java
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package org.jpeg.jpegxl.wrapper;
+
+/** POJO that wraps some fields of JxlBasicInfo. */
+public class StreamInfo {
+  public Status status;
+  public int width;
+  public int height;
+  public int alphaBits;
+
+  // package-private
+  int pixelsSize;
+  int iccSize;
+}
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.cc b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.cc
new file mode 100644
index 0000000000..d61464e54a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.cc
@@ -0,0 +1,271 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.h"
+
+#include <jni.h>
+#include <jxl/decode.h>
+#include <jxl/thread_parallel_runner.h>
+
+#include <cstdlib>
+
+namespace {
+
+template <typename From, typename To>
+bool StaticCast(const From& from, To* to) {
+  To tmp = static_cast<To>(from);
+  // Check sign is preserved.
+  if ((from < 0 && tmp > 0) || (from > 0 && tmp < 0)) return false;
+  // Check value is preserved.
+  if (from != static_cast<From>(tmp)) return false;
+  *to = tmp;
+  return true;
+}
+
+bool BufferToSpan(JNIEnv* env, jobject buffer, uint8_t** data, size_t* size) {
+  if (buffer == nullptr) return true;
+
+  *data = reinterpret_cast<uint8_t*>(env->GetDirectBufferAddress(buffer));
+  if (*data == nullptr) return false;
+  return StaticCast(env->GetDirectBufferCapacity(buffer), size);
+}
+
+enum class Status { OK = 0, FATAL_ERROR = -1, NOT_ENOUGH_INPUT = 1 };
+
+bool IsOk(Status status) { return status == Status::OK; }
+
+#define FAILURE(M) Status::FATAL_ERROR
+
+constexpr const size_t kLastPixelFormat = 3;
+constexpr const size_t kNoPixelFormat = static_cast<size_t>(-1);
+
+JxlPixelFormat ToPixelFormat(size_t pixel_format) {
+  if (pixel_format == 0) {
+    // RGBA, 4 x byte per pixel, no scanline padding.
+    return {/*num_channels=*/4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, /*align=*/0};
+  } else if (pixel_format == 1) {
+    // RGBA, 4 x float16 per pixel, no scanline padding.
+    return {/*num_channels=*/4, JXL_TYPE_FLOAT16, JXL_LITTLE_ENDIAN,
+            /*align=*/0};
+  } else if (pixel_format == 2) {
+    // RGB, 4 x byte per pixel, no scanline padding.
+    return {/*num_channels=*/3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, /*align=*/0};
+  } else if (pixel_format == 3) {
+    // RGB, 4 x float16 per pixel, no scanline padding.
+    return {/*num_channels=*/3, JXL_TYPE_FLOAT16, JXL_LITTLE_ENDIAN,
+            /*align=*/0};
+  } else {
+    abort();
+    return {0, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+  }
+}
+
+Status DoDecode(JNIEnv* env, jobject data_buffer, size_t* info_pixels_size,
+                size_t* info_icc_size, JxlBasicInfo* info, size_t pixel_format,
+                jobject pixels_buffer, jobject icc_buffer) {
+  if (data_buffer == nullptr) return FAILURE("No data buffer");
+
+  uint8_t* data = nullptr;
+  size_t data_size = 0;
+  if (!BufferToSpan(env, data_buffer, &data, &data_size)) {
+    return FAILURE("Failed to access data buffer");
+  }
+
+  uint8_t* pixels = nullptr;
+  size_t pixels_size = 0;
+  if (!BufferToSpan(env, pixels_buffer, &pixels, &pixels_size)) {
+    return FAILURE("Failed to access pixels buffer");
+  }
+
+  uint8_t* icc = nullptr;
+  size_t icc_size = 0;
+  if (!BufferToSpan(env, icc_buffer, &icc, &icc_size)) {
+    return FAILURE("Failed to access ICC buffer");
+  }
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  constexpr size_t kNumThreads = 0;  // Do everything in this thread.
+  void* runner = JxlThreadParallelRunnerCreate(NULL, kNumThreads);
+
+  struct Defer {
+    JxlDecoder* dec;
+    void* runner;
+    ~Defer() {
+      JxlThreadParallelRunnerDestroy(runner);
+      JxlDecoderDestroy(dec);
+    }
+  } defer{dec, runner};
+
+  auto status =
+      JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner);
+  if (status != JXL_DEC_SUCCESS) {
+    return FAILURE("Failed to set parallel runner");
+  }
+  status = JxlDecoderSubscribeEvents(
+      dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_COLOR_ENCODING);
+  if (status != JXL_DEC_SUCCESS) {
+    return FAILURE("Failed to subscribe for events");
+  }
+  status = JxlDecoderSetInput(dec, data, data_size);
+  if (status != JXL_DEC_SUCCESS) {
+    return FAILURE("Failed to set input");
+  }
+  status = JxlDecoderProcessInput(dec);
+  if (status == JXL_DEC_NEED_MORE_INPUT) {
+    return Status::NOT_ENOUGH_INPUT;
+  }
+  if (status != JXL_DEC_BASIC_INFO) {
+    return FAILURE("Unexpected notification (want: basic info)");
+  }
+  if (info_pixels_size) {
+    JxlPixelFormat format = ToPixelFormat(pixel_format);
+    status = JxlDecoderImageOutBufferSize(dec, &format, info_pixels_size);
+    if (status != JXL_DEC_SUCCESS) {
+      return FAILURE("Failed to get pixels size");
+    }
+  }
+  if (info) {
+    status = JxlDecoderGetBasicInfo(dec, info);
+    if (status != JXL_DEC_SUCCESS) {
+      return FAILURE("Failed to get basic info");
+    }
+  }
+  status = JxlDecoderProcessInput(dec);
+  if (status != JXL_DEC_COLOR_ENCODING) {
+    return FAILURE("Unexpected notification (want: color encoding)");
+  }
+  if (info_icc_size) {
+    status = JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                         info_icc_size);
+    if (status != JXL_DEC_SUCCESS) *info_icc_size = 0;
+  }
+  if (icc && icc_size > 0) {
+    status = JxlDecoderGetColorAsICCProfile(dec, JXL_COLOR_PROFILE_TARGET_DATA,
+                                            icc, icc_size);
+    if (status != JXL_DEC_SUCCESS) {
+      return FAILURE("Failed to get ICC");
+    }
+  }
+  if (pixels) {
+    JxlPixelFormat format = ToPixelFormat(pixel_format);
+    status = JxlDecoderProcessInput(dec);
+    if (status != JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      return FAILURE("Unexpected notification (want: need out buffer)");
+    }
+    status = JxlDecoderSetImageOutBuffer(dec, &format, pixels, pixels_size);
+    if (status != JXL_DEC_SUCCESS) {
+      return FAILURE("Failed to set out buffer");
+    }
+    status = JxlDecoderProcessInput(dec);
+    if (status != JXL_DEC_FULL_IMAGE) {
+      return FAILURE("Unexpected notification (want: full image)");
+    }
+    status = JxlDecoderProcessInput(dec);
+    if (status != JXL_DEC_SUCCESS) {
+      return FAILURE("Unexpected notification (want: success)");
+    }
+  }
+
+  return Status::OK;
+}
+
+}  // namespace
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+JNIEXPORT void JNICALL
+Java_org_jpeg_jpegxl_wrapper_DecoderJni_nativeGetBasicInfo(
+    JNIEnv* env, jobject /*jobj*/, jintArray ctx, jobject data_buffer) {
+  jint context[6] = {0};
+  env->GetIntArrayRegion(ctx, 0, 1, context);
+
+  JxlBasicInfo info = {};
+  size_t pixels_size = 0;
+  size_t icc_size = 0;
+  size_t pixel_format = 0;
+
+  Status status = Status::OK;
+
+  if (IsOk(status)) {
+    pixel_format = context[0];
+    if (pixel_format == kNoPixelFormat) {
+      // OK
+    } else if (pixel_format > kLastPixelFormat) {
+      status = FAILURE("Unrecognized pixel format");
+    }
+  }
+
+  if (IsOk(status)) {
+    bool want_output_size = (pixel_format != kNoPixelFormat);
+    if (want_output_size) {
+      status = DoDecode(
+          env, data_buffer, &pixels_size, &icc_size, &info, pixel_format,
+          /* pixels_buffer= */ nullptr, /* icc_buffer= */ nullptr);
+    } else {
+      status =
+          DoDecode(env, data_buffer, /* info_pixels_size= */ nullptr,
+                   /* info_icc_size= */ nullptr, &info, pixel_format,
+                   /* pixels_buffer= */ nullptr, /* icc_buffer= */ nullptr);
+    }
+  }
+
+  if (IsOk(status)) {
+    bool ok = true;
+    ok &= StaticCast(info.xsize, context + 1);
+    ok &= StaticCast(info.ysize, context + 2);
+    ok &= StaticCast(pixels_size, context + 3);
+    ok &= StaticCast(icc_size, context + 4);
+    ok &= StaticCast(info.alpha_bits, context + 5);
+    if (!ok) status = FAILURE("Invalid value");
+  }
+
+  context[0] = static_cast<int>(status);
+
+  env->SetIntArrayRegion(ctx, 0, 6, context);
+}
+
+/**
+ * Get image pixel data.
+ *
+ * @param ctx {out_status} tuple
+ * @param data [in] Buffer with encoded JXL stream
+ * @param pixels [out] Buffer to place pixels to
+ */
+JNIEXPORT void JNICALL Java_org_jpeg_jpegxl_wrapper_DecoderJni_nativeGetPixels(
+    JNIEnv* env, jobject /* jobj */, jintArray ctx, jobject data_buffer,
+    jobject pixels_buffer, jobject icc_buffer) {
+  jint context[1] = {0};
+  env->GetIntArrayRegion(ctx, 0, 1, context);
+
+  size_t pixel_format = 0;
+
+  Status status = Status::OK;
+
+  if (IsOk(status)) {
+    // Unlike getBasicInfo, "no-pixel-format" is not supported.
+    pixel_format = context[0];
+    if (pixel_format > kLastPixelFormat) {
+      status = FAILURE("Unrecognized pixel format");
+    }
+  }
+
+  if (IsOk(status)) {
+    status = DoDecode(env, data_buffer, /* info_pixels_size= */ nullptr,
+                      /* info_icc_size= */ nullptr, /* info= */ nullptr,
+                      pixel_format, pixels_buffer, icc_buffer);
+  }
+
+  context[0] = static_cast<int>(status);
+  env->SetIntArrayRegion(ctx, 0, 1, context);
+}
+
+#undef FAILURE
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.h b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.h
new file mode 100644
index 0000000000..8237fc95a2
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.h
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_JNI_ORG_JPEG_JPEGXL_WRAPPER_DECODER_JNI
+#define TOOLS_JNI_ORG_JPEG_JPEGXL_WRAPPER_DECODER_JNI
+
+#include <jni.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Get basic image information (size, etc.)
+ *
+ * @param ctx {in_pixel_format_out_status, out_width, out_height, pixels_size,
+ *             icc_size} tuple
+ * @param data [in] Buffer with encoded JXL stream
+ */
+JNIEXPORT void JNICALL
+Java_org_jpeg_jpegxl_wrapper_DecoderJni_nativeGetBasicInfo(JNIEnv* env,
+                                                           jobject /*jobj*/,
+                                                           jintArray ctx,
+                                                           jobject data_buffer);
+
+/**
+ * Get image pixel data.
+ *
+ * @param ctx {in_pixel_format_out_status} tuple
+ * @param data [in] Buffer with encoded JXL stream
+ * @param pixels [out] Buffer to place pixels to
+ */
+JNIEXPORT void JNICALL Java_org_jpeg_jpegxl_wrapper_DecoderJni_nativeGetPixels(
+    JNIEnv* env, jobject /*jobj*/, jintArray ctx, jobject data_buffer,
+    jobject pixels_buffer, jobject icc_buffer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TOOLS_JNI_ORG_JPEG_JPEGXL_WRAPPER_DECODER_JNI
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni_onload.cc b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni_onload.cc
new file mode 100644
index 0000000000..c5e6ba3e0f
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni_onload.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jni.h>
+
+#include "tools/jni/org/jpeg/jpegxl/wrapper/decoder_jni.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static char* kGetBasicInfoName = const_cast<char*>("nativeGetBasicInfo");
+static char* kGetBasicInfoSig = const_cast<char*>("([ILjava/nio/Buffer;)V");
+static char* kGetPixelsName = const_cast<char*>("nativeGetPixels");
+static char* kGetPixelsInfoSig = const_cast<char*>(
+    "([ILjava/nio/Buffer;Ljava/nio/Buffer;Ljava/nio/Buffer;)V");
+
+#define JXL_JNI_METHOD(NAME) \
+  (reinterpret_cast<void*>(  \
+      Java_org_jpeg_jpegxl_wrapper_DecoderJni_native##NAME))
+
+static const JNINativeMethod kDecoderMethods[] = {
+    {kGetBasicInfoName, kGetBasicInfoSig, JXL_JNI_METHOD(GetBasicInfo)},
+    {kGetPixelsName, kGetPixelsInfoSig, JXL_JNI_METHOD(GetPixels)}};
+
+static const size_t kNumDecoderMethods = 2;
+
+#undef JXL_JNI_METHOD
+
+JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+  JNIEnv* env;
+  if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != JNI_OK) {
+    return -1;
+  }
+
+  jclass clazz = env->FindClass("org/jpeg/jpegxl/wrapper/DecoderJni");
+  if (clazz == nullptr) {
+    return -1;
+  }
+
+  if (env->RegisterNatives(clazz, kDecoderMethods, kNumDecoderMethods) < 0) {
+    return -1;
+  }
+
+  return JNI_VERSION_1_6;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer.cc b/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer.cc
new file mode 100644
index 0000000000..30c1d957a2
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer.cc
@@ -0,0 +1,213 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <setjmp.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <hwy/targets.h>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+
+namespace {
+
+// Externally visible value to ensure pixels are used in the fuzzer.
+int external_code = 0;
+
+template <typename It>
+void Consume(const It& begin, const It& end) {
+  for (auto it = begin; it < end; ++it) {
+    if (*it == 0) {
+      external_code ^= ~0;
+    } else {
+      external_code ^= *it;
+    }
+  }
+}
+
+// Options for the fuzzing
+struct FuzzSpec {
+  size_t chunk_size;
+  JpegliDataType output_type;
+  JpegliEndianness output_endianness;
+  int crop_output;
+};
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+static constexpr size_t kNumSourceBuffers = 4;
+
+class SourceManager {
+ public:
+  SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size)
+      : data_(data), len_(len), max_chunk_size_(max_chunk_size) {
+    pub_.skip_input_data = skip_input_data;
+    pub_.resync_to_restart = jpegli_resync_to_restart;
+    pub_.term_source = term_source;
+    pub_.init_source = init_source;
+    pub_.fill_input_buffer = fill_input_buffer;
+    if (max_chunk_size_ == 0) max_chunk_size_ = len;
+    buffers_.resize(kNumSourceBuffers, std::vector<uint8_t>(max_chunk_size_));
+    Reset();
+  }
+
+  void Reset() {
+    pub_.next_input_byte = nullptr;
+    pub_.bytes_in_buffer = 0;
+    pos_ = 0;
+    chunk_idx_ = 0;
+  }
+
+ private:
+  jpeg_source_mgr pub_;
+  const uint8_t* data_;
+  size_t len_;
+  size_t chunk_idx_;
+  size_t pos_;
+  size_t max_chunk_size_;
+  std::vector<std::vector<uint8_t>> buffers_;
+
+  static void init_source(j_decompress_ptr cinfo) {}
+
+  static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    if (src->pos_ < src->len_) {
+      size_t remaining = src->len_ - src->pos_;
+      size_t chunk_size = std::min(remaining, src->max_chunk_size_);
+      size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers;
+      // Larger number of chunks causes fuzzer timuout.
+      if (src->chunk_idx_ >= (1u << 15)) {
+        chunk_size = remaining;
+        next_idx = src->buffers_.size();
+        src->buffers_.emplace_back(chunk_size);
+      }
+      uint8_t* next_buffer = src->buffers_[next_idx].data();
+      memcpy(next_buffer, src->data_ + src->pos_, chunk_size);
+      src->pub_.next_input_byte = next_buffer;
+      src->pub_.bytes_in_buffer = chunk_size;
+    } else {
+      src->pub_.next_input_byte = kFakeEoiMarker;
+      src->pub_.bytes_in_buffer = 2;
+      src->len_ += 2;
+    }
+    src->pos_ += src->pub_.bytes_in_buffer;
+    return TRUE;
+  }
+
+  static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+    auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+    if (num_bytes <= 0) {
+      return;
+    }
+    if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+      src->pub_.bytes_in_buffer -= num_bytes;
+      src->pub_.next_input_byte += num_bytes;
+    } else {
+      src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+      src->pub_.bytes_in_buffer = 0;
+    }
+  }
+
+  static void term_source(j_decompress_ptr cinfo) {}
+};
+
+bool DecodeJpeg(const uint8_t* data, size_t size, size_t max_pixels,
+                const FuzzSpec& spec, std::vector<uint8_t>* pixels,
+                size_t* xsize, size_t* ysize) {
+  SourceManager src(data, size, spec.chunk_size);
+  jpeg_decompress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpegli_std_error(&jerr);
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = reinterpret_cast<void*>(&env);
+    cinfo.err->error_exit = [](j_common_ptr cinfo) {
+      jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+      jpegli_destroy(cinfo);
+      longjmp(*env, 1);
+    };
+    cinfo.err->emit_message = [](j_common_ptr cinfo, int msg_level) {};
+    jpegli_create_decompress(&cinfo);
+    cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+    jpegli_read_header(&cinfo, TRUE);
+    *xsize = cinfo.image_width;
+    *ysize = cinfo.image_height;
+    size_t num_pixels = *xsize * *ysize;
+    if (num_pixels > max_pixels) return false;
+    jpegli_set_output_format(&cinfo, spec.output_type, spec.output_endianness);
+    jpegli_start_decompress(&cinfo);
+    if (spec.crop_output) {
+      JDIMENSION xoffset = cinfo.output_width / 3;
+      JDIMENSION xsize_cropped = cinfo.output_width / 3;
+      jpegli_crop_scanline(&cinfo, &xoffset, &xsize_cropped);
+    }
+
+    size_t bytes_per_sample = jpegli_bytes_per_sample(spec.output_type);
+    size_t stride =
+        bytes_per_sample * cinfo.output_components * cinfo.output_width;
+    size_t buffer_size = *ysize * stride;
+    pixels->resize(buffer_size);
+    for (size_t y = 0; y < *ysize; ++y) {
+      JSAMPROW rows[] = {pixels->data() + y * stride};
+      jpegli_read_scanlines(&cinfo, rows, 1);
+    }
+    Consume(pixels->cbegin(), pixels->cend());
+    jpegli_finish_decompress(&cinfo);
+    return true;
+  };
+  bool success = try_catch_block();
+  jpegli_destroy_decompress(&cinfo);
+  return success;
+}
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  if (size < 4) return 0;
+  uint32_t flags = 0;
+  size_t used_flag_bits = 0;
+  memcpy(&flags, data + size - 4, 4);
+  size -= 4;
+
+  const auto getFlag = [&flags, &used_flag_bits](size_t max_value) {
+    size_t limit = 1;
+    while (limit <= max_value) {
+      limit <<= 1;
+      used_flag_bits++;
+      if (used_flag_bits > 32) abort();
+    }
+    uint32_t result = flags % limit;
+    flags /= limit;
+    return result % (max_value + 1);
+  };
+
+  FuzzSpec spec;
+  spec.output_type = static_cast<JpegliDataType>(getFlag(JPEGLI_TYPE_UINT16));
+  spec.output_endianness =
+      static_cast<JpegliEndianness>(getFlag(JPEGLI_BIG_ENDIAN));
+  uint32_t chunks = getFlag(15);
+  spec.chunk_size = chunks ? 1u << (chunks - 1) : 0;
+  spec.crop_output = getFlag(1);
+
+  std::vector<uint8_t> pixels;
+  size_t xsize, ysize;
+  size_t max_pixels = 1 << 21;
+
+  const auto targets = hwy::SupportedAndGeneratedTargets();
+  hwy::SetSupportedTargetsForTest(targets[getFlag(targets.size() - 1)]);
+  DecodeJpeg(data, size, max_pixels, spec, &pixels, &xsize, &ysize);
+  hwy::SetSupportedTargetsForTest(0);
+
+  return 0;
+}
+
+}  // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer_corpus.cc b/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer_corpus.cc
new file mode 100644
index 0000000000..0963e66838
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jpegli_dec_fuzzer_corpus.cc
@@ -0,0 +1,365 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <setjmp.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#if defined(_WIN32) || defined(_WIN64)
+#include "third_party/dirent.h"
+#else
+#include <dirent.h>
+#include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <iostream>
+#include <mutex>
+#include <random>
+#include <vector>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "tools/file_io.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+const size_t kMaxWidth = 50000;
+const size_t kMaxHeight = 50000;
+const size_t kMaxPixels = 20 * (1 << 20);  // 20 MP
+
+std::mutex stderr_mutex;
+
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+                                      size_t num_channels, uint16_t seed) {
+  // Cause more significant image difference for successive seeds.
+  jxl::Rng generator(seed);
+
+  // Returns random integer in interval [0, max_value)
+  auto rng = [&generator](size_t max_value) -> size_t {
+    return generator.UniformU(0, max_value);
+  };
+
+  // Dark background gradient color
+  uint16_t r0 = rng(32768);
+  uint16_t g0 = rng(32768);
+  uint16_t b0 = rng(32768);
+  uint16_t r1 = rng(32768);
+  uint16_t g1 = rng(32768);
+  uint16_t b1 = rng(32768);
+
+  // Circle with different color
+  size_t circle_x = rng(xsize);
+  size_t circle_y = rng(ysize);
+  size_t circle_r = rng(std::min(xsize, ysize));
+
+  // Rectangle with random noise
+  size_t rect_x0 = rng(xsize);
+  size_t rect_y0 = rng(ysize);
+  size_t rect_x1 = rng(xsize);
+  size_t rect_y1 = rng(ysize);
+  if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+  if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels(num_pixels * num_channels);
+  // Create pixel content to test.
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+      uint16_t g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+      uint16_t b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+      // put some shape in there for visual debugging
+      if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+          circle_r * circle_r) {
+        r = (65535 - x * y) ^ seed;
+        g = (x << 8) + y + seed;
+        b = (y << 8) + x * seed;
+      } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+        r = rng(65536);
+        g = rng(65536);
+        b = rng(65536);
+      }
+      size_t i = (y * xsize + x) * num_channels;
+      pixels[i + 0] = (r >> 8);
+      if (num_channels == 3) {
+        pixels[i + 1] = (g >> 8);
+        pixels[i + 2] = (b >> 8);
+      }
+    }
+  }
+  return pixels;
+}
+
+// ImageSpec needs to be a packed struct to allow us to use the raw memory of
+// the struct for hashing to create a consistent id.
+#pragma pack(push, 1)
+struct ImageSpec {
+  bool Validate() const {
+    if (width > kMaxWidth || height > kMaxHeight ||
+        width * height > kMaxPixels) {
+      return false;
+    }
+    return true;
+  }
+
+  friend std::ostream& operator<<(std::ostream& o, const ImageSpec& spec) {
+    o << "ImageSpec<"
+      << "size=" << spec.width << "x" << spec.height
+      << " * chan=" << spec.num_channels << " q=" << spec.quality
+      << " p=" << spec.progressive_level << " r=" << spec.restart_interval
+      << ">";
+    return o;
+  }
+
+  void SpecHash(uint8_t hash[16]) const {
+    const uint8_t* from = reinterpret_cast<const uint8_t*>(this);
+    std::seed_seq hasher(from, from + sizeof(*this));
+    uint32_t* to = reinterpret_cast<uint32_t*>(hash);
+    hasher.generate(to, to + 4);
+  }
+
+  uint32_t width = 256;
+  uint32_t height = 256;
+  uint32_t num_channels = 3;
+  uint32_t quality = 90;
+  uint32_t sampling = 0x11111111;
+  uint32_t progressive_level = 2;
+  uint32_t restart_interval = 0;
+  uint32_t fraction = 100;
+  // The seed for the PRNG.
+  uint32_t seed = 7777;
+};
+#pragma pack(pop)
+static_assert(sizeof(ImageSpec) % 4 == 0, "Add padding to ImageSpec.");
+
+bool EncodeWithJpegli(const ImageSpec& spec, const std::vector<uint8_t>& pixels,
+                      std::vector<uint8_t>* compressed) {
+  uint8_t* buffer = nullptr;
+  unsigned long buffer_size = 0;
+  jpeg_compress_struct cinfo;
+  const auto try_catch_block = [&]() -> bool {
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpegli_std_error(&jerr);
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = reinterpret_cast<void*>(&env);
+    cinfo.err->error_exit = [](j_common_ptr cinfo) {
+      (*cinfo->err->output_message)(cinfo);
+      jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+      jpegli_destroy(cinfo);
+      longjmp(*env, 1);
+    };
+    jpegli_create_compress(&cinfo);
+    jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+    cinfo.image_width = spec.width;
+    cinfo.image_height = spec.height;
+    cinfo.input_components = spec.num_channels;
+    cinfo.in_color_space = spec.num_channels == 1 ? JCS_GRAYSCALE : JCS_RGB;
+    jpegli_set_defaults(&cinfo);
+    jpegli_set_quality(&cinfo, spec.quality, TRUE);
+    uint32_t sampling = spec.sampling;
+    for (int c = 0; c < cinfo.num_components; ++c) {
+      cinfo.comp_info[c].h_samp_factor = sampling & 0xf;
+      cinfo.comp_info[c].v_samp_factor = (sampling >> 4) & 0xf;
+      sampling >>= 8;
+    }
+    jpegli_set_progressive_level(&cinfo, spec.progressive_level);
+    cinfo.restart_interval = spec.restart_interval;
+    jpegli_start_compress(&cinfo, TRUE);
+    size_t stride = cinfo.image_width * cinfo.input_components;
+    std::vector<uint8_t> row_bytes(stride);
+    for (size_t y = 0; y < cinfo.image_height; ++y) {
+      memcpy(&row_bytes[0], &pixels[y * stride], stride);
+      JSAMPROW row[] = {row_bytes.data()};
+      jpegli_write_scanlines(&cinfo, row, 1);
+    }
+    jpegli_finish_compress(&cinfo);
+    return true;
+  };
+  bool success = try_catch_block();
+  jpegli_destroy_compress(&cinfo);
+  if (success) {
+    buffer_size = buffer_size * spec.fraction / 100;
+    compressed->assign(buffer, buffer + buffer_size);
+  }
+  if (buffer) std::free(buffer);
+  return success;
+}
+
+bool GenerateFile(const char* output_dir, const ImageSpec& spec,
+                  bool regenerate, bool quiet) {
+  // Compute a checksum of the ImageSpec to name the file. This is just to keep
+  // the output of this program repeatable.
+  uint8_t checksum[16];
+  spec.SpecHash(checksum);
+  std::string hash_str(sizeof(checksum) * 2, ' ');
+  static const char* hex_chars = "0123456789abcdef";
+  for (size_t i = 0; i < sizeof(checksum); i++) {
+    hash_str[2 * i] = hex_chars[checksum[i] >> 4];
+    hash_str[2 * i + 1] = hex_chars[checksum[i] % 0x0f];
+  }
+  std::string output_fn = std::string(output_dir) + "/" + hash_str + ".jpg";
+
+  // Don't regenerate files if they already exist on disk to speed-up
+  // consecutive calls when --regenerate is not used.
+  struct stat st;
+  if (!regenerate && stat(output_fn.c_str(), &st) == 0 && S_ISREG(st.st_mode)) {
+    return true;
+  }
+
+  if (!quiet) {
+    std::unique_lock<std::mutex> lock(stderr_mutex);
+    std::cerr << "Generating " << spec << " as " << hash_str << std::endl;
+  }
+
+  uint8_t hash[16];
+  spec.SpecHash(hash);
+  std::mt19937 mt(spec.seed);
+
+  std::vector<uint8_t> pixels =
+      GetSomeTestImage(spec.width, spec.height, spec.num_channels, spec.seed);
+  std::vector<uint8_t> compressed;
+  JXL_CHECK(EncodeWithJpegli(spec, pixels, &compressed));
+
+  // Append 4 bytes with the flags used by jpegli_dec_fuzzer to select the
+  // decoding output.
+  std::uniform_int_distribution<> dis256(0, 255);
+  for (size_t i = 0; i < 4; ++i) {
+    compressed.push_back(dis256(mt));
+  }
+
+  if (!jpegxl::tools::WriteFile(output_fn, compressed)) {
+    return false;
+  }
+  if (!quiet) {
+    std::unique_lock<std::mutex> lock(stderr_mutex);
+    std::cerr << "Stored " << output_fn << " size: " << compressed.size()
+              << std::endl;
+  }
+
+  return true;
+}
+
+void Usage() {
+  fprintf(stderr,
+          "Use: fuzzer_corpus [-r] [-q] [-j THREADS] [output_dir]\n"
+          "\n"
+          "  -r Regenerate files if already exist.\n"
+          "  -q Be quiet.\n"
+          "  -j THREADS Number of parallel jobs to run.\n");
+}
+
+}  // namespace
+
+int main(int argc, const char** argv) {
+  const char* dest_dir = nullptr;
+  bool regenerate = false;
+  bool quiet = false;
+  size_t num_threads = std::thread::hardware_concurrency();
+  for (int optind = 1; optind < argc;) {
+    if (!strcmp(argv[optind], "-r")) {
+      regenerate = true;
+      optind++;
+    } else if (!strcmp(argv[optind], "-q")) {
+      quiet = true;
+      optind++;
+    } else if (!strcmp(argv[optind], "-j")) {
+      optind++;
+      if (optind < argc) {
+        num_threads = atoi(argv[optind++]);
+      } else {
+        fprintf(stderr, "-j needs an argument value.\n");
+        Usage();
+        return 1;
+      }
+    } else if (dest_dir == nullptr) {
+      dest_dir = argv[optind++];
+    } else {
+      fprintf(stderr, "Unknown parameter: \"%s\".\n", argv[optind]);
+      Usage();
+      return 1;
+    }
+  }
+  if (!dest_dir) {
+    dest_dir = "corpus";
+  }
+
+  struct stat st;
+  memset(&st, 0, sizeof(st));
+  if (stat(dest_dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
+    fprintf(stderr, "Output path \"%s\" is not a directory.\n", dest_dir);
+    Usage();
+    return 1;
+  }
+
+  std::mt19937 mt(77777);
+
+  std::vector<std::pair<uint32_t, uint32_t>> image_sizes = {
+      {8, 8},     {32, 32},   {128, 128}, {10000, 1}, {10000, 2}, {1, 10000},
+      {2, 10000}, {555, 256}, {257, 513}, {512, 265}, {264, 520},
+  };
+  std::vector<uint32_t> sampling_ratios = {
+      0x11111111,  // 444
+      0x11111112,  // 422
+      0x11111121,  // 440
+      0x11111122,  // 420
+      0x11222211,  // luma subsampling
+  };
+
+  ImageSpec spec;
+  std::vector<ImageSpec> specs;
+  for (auto img_size : image_sizes) {
+    spec.width = img_size.first;
+    spec.height = img_size.second;
+    for (uint32_t num_channels : {1, 3}) {
+      spec.num_channels = num_channels;
+      for (uint32_t sampling : sampling_ratios) {
+        spec.sampling = sampling;
+        if (num_channels == 1 && sampling != 0x11111111) continue;
+        for (uint32_t restart : {0, 1, 1024}) {
+          spec.restart_interval = restart;
+          for (uint32_t prog_level : {0, 1, 2}) {
+            spec.progressive_level = prog_level;
+            for (uint32_t quality : {10, 90, 100}) {
+              spec.quality = quality;
+              for (uint32_t fraction : {10, 70, 100}) {
+                spec.fraction = fraction;
+                spec.seed = mt() % 777777;
+                if (!spec.Validate()) {
+                  if (!quiet) {
+                    std::cerr << "Skipping " << spec << std::endl;
+                  }
+                } else {
+                  specs.push_back(spec);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  jpegxl::tools::ThreadPoolInternal pool{num_threads};
+  const auto generate = [&specs, dest_dir, regenerate, quiet](
+                            const uint32_t task, size_t /* thread */) {
+    const ImageSpec& spec = specs[task];
+    GenerateFile(dest_dir, spec, regenerate, quiet);
+  };
+  if (!RunOnPool(&pool, 0, specs.size(), jxl::ThreadPool::NoInit, generate,
+                 "FuzzerCorpus")) {
+    std::cerr << "Error generating fuzzer corpus" << std::endl;
+    return 1;
+  }
+  std::cerr << "Finished generating fuzzer corpus" << std::endl;
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/tools/jxl_from_tree.cc b/third-party/libjxl/libjxl/tools/jxl_from_tree.cc
new file mode 100644
index 0000000000..63d3934d20
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jxl_from_tree.cc
@@ -0,0 +1,545 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string.h>
+
+#include <fstream>
+#include <iostream>
+#include <istream>
+#include <unordered_map>
+
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_heuristics.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/splines.h"
+#include "tools/file_io.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::BitWriter;
+using ::jxl::BlendMode;
+using ::jxl::CodecInOut;
+using ::jxl::CodecMetadata;
+using ::jxl::ColorCorrelationMap;
+using ::jxl::ColorEncoding;
+using ::jxl::ColorTransform;
+using ::jxl::CompressParams;
+using ::jxl::DefaultEncoderHeuristics;
+using ::jxl::FrameDimensions;
+using ::jxl::FrameInfo;
+using ::jxl::Image3F;
+using ::jxl::ImageF;
+using ::jxl::PaddedBytes;
+using ::jxl::PassesEncoderState;
+using ::jxl::Predictor;
+using ::jxl::PropertyDecisionNode;
+using ::jxl::QuantizedSpline;
+using ::jxl::Spline;
+using ::jxl::Splines;
+using ::jxl::Tree;
+
+namespace {
+struct SplineData {
+  int32_t quantization_adjustment = 1;
+  std::vector<Spline> splines;
+};
+
+Splines SplinesFromSplineData(const SplineData& spline_data,
+                              const ColorCorrelationMap& cmap) {
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  quantized_splines.reserve(spline_data.splines.size());
+  starting_points.reserve(spline_data.splines.size());
+  for (const Spline& spline : spline_data.splines) {
+    JXL_CHECK(!spline.control_points.empty());
+    quantized_splines.emplace_back(spline, spline_data.quantization_adjustment,
+                                   cmap.YtoXRatio(0), cmap.YtoBRatio(0));
+    starting_points.push_back(spline.control_points.front());
+  }
+  return Splines(spline_data.quantization_adjustment,
+                 std::move(quantized_splines), std::move(starting_points));
+}
+
+template <typename F>
+bool ParseNode(F& tok, Tree& tree, SplineData& spline_data,
+               CompressParams& cparams, size_t& W, size_t& H, CodecInOut& io,
+               int& have_next, int& x0, int& y0) {
+  static const std::unordered_map<std::string, int> property_map = {
+      {"c", 0},
+      {"g", 1},
+      {"y", 2},
+      {"x", 3},
+      {"|N|", 4},
+      {"|W|", 5},
+      {"N", 6},
+      {"W", 7},
+      {"W-WW-NW+NWW", 8},
+      {"W+N-NW", 9},
+      {"W-NW", 10},
+      {"NW-N", 11},
+      {"N-NE", 12},
+      {"N-NN", 13},
+      {"W-WW", 14},
+      {"WGH", 15},
+      {"PrevAbs", 16},
+      {"Prev", 17},
+      {"PrevAbsErr", 18},
+      {"PrevErr", 19},
+      {"PPrevAbs", 20},
+      {"PPrev", 21},
+      {"PPrevAbsErr", 22},
+      {"PPrevErr", 23},
+  };
+  static const std::unordered_map<std::string, Predictor> predictor_map = {
+      {"Set", Predictor::Zero},
+      {"W", Predictor::Left},
+      {"N", Predictor::Top},
+      {"AvgW+N", Predictor::Average0},
+      {"Select", Predictor::Select},
+      {"Gradient", Predictor::Gradient},
+      {"Weighted", Predictor::Weighted},
+      {"NE", Predictor::TopRight},
+      {"NW", Predictor::TopLeft},
+      {"WW", Predictor::LeftLeft},
+      {"AvgW+NW", Predictor::Average1},
+      {"AvgN+NW", Predictor::Average2},
+      {"AvgN+NE", Predictor::Average3},
+      {"AvgAll", Predictor::Average4},
+  };
+  auto t = tok();
+  if (t == "if") {
+    // Decision node.
+    int p;
+    t = tok();
+    if (!property_map.count(t)) {
+      fprintf(stderr, "Unexpected property: %s\n", t.c_str());
+      return false;
+    }
+    p = property_map.at(t);
+    if ((t = tok()) != ">") {
+      fprintf(stderr, "Expected >, found %s\n", t.c_str());
+      return false;
+    }
+    t = tok();
+    size_t num = 0;
+    int split = std::stoi(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid splitval: %s\n", t.c_str());
+      return false;
+    }
+    size_t pos = tree.size();
+    tree.emplace_back(PropertyDecisionNode::Split(p, split, pos + 1));
+    JXL_RETURN_IF_ERROR(ParseNode(tok, tree, spline_data, cparams, W, H, io,
+                                  have_next, x0, y0));
+    tree[pos].rchild = tree.size();
+  } else if (t == "-") {
+    // Leaf
+    t = tok();
+    Predictor p;
+    if (!predictor_map.count(t)) {
+      fprintf(stderr, "Unexpected predictor: %s\n", t.c_str());
+      return false;
+    }
+    p = predictor_map.at(t);
+    t = tok();
+    bool subtract = false;
+    if (t == "-") {
+      subtract = true;
+      t = tok();
+    } else if (t == "+") {
+      t = tok();
+    }
+    size_t num = 0;
+    int offset = std::stoi(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid offset: %s\n", t.c_str());
+      return false;
+    }
+    if (subtract) offset = -offset;
+    tree.emplace_back(PropertyDecisionNode::Leaf(p, offset));
+    return true;
+  } else if (t == "Width") {
+    t = tok();
+    size_t num = 0;
+    W = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid width: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Height") {
+    t = tok();
+    size_t num = 0;
+    H = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid height: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "/*") {
+    t = tok();
+    while (t != "*/" && t != "") t = tok();
+  } else if (t == "Squeeze") {
+    cparams.responsive = true;
+  } else if (t == "GroupShift") {
+    t = tok();
+    size_t num = 0;
+    cparams.modular_group_size_shift = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid GroupShift: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "XYB") {
+    cparams.color_transform = ColorTransform::kXYB;
+  } else if (t == "CbYCr") {
+    cparams.color_transform = ColorTransform::kYCbCr;
+  } else if (t == "RCT") {
+    t = tok();
+    size_t num = 0;
+    cparams.colorspace = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid RCT: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Orientation") {
+    t = tok();
+    size_t num = 0;
+    io.metadata.m.orientation = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid Orientation: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Alpha") {
+    io.metadata.m.SetAlphaBits(io.metadata.m.bit_depth.bits_per_sample);
+    ImageF alpha(W, H);
+    io.frames[0].SetAlpha(std::move(alpha));
+  } else if (t == "Bitdepth") {
+    t = tok();
+    size_t num = 0;
+    io.metadata.m.bit_depth.bits_per_sample = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid Bitdepth: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "FloatExpBits") {
+    t = tok();
+    size_t num = 0;
+    io.metadata.m.bit_depth.floating_point_sample = true;
+    io.metadata.m.bit_depth.exponent_bits_per_sample = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid FloatExpBits: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "FramePos") {
+    t = tok();
+    size_t num = 0;
+    x0 = std::stoi(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid FramePos x0: %s\n", t.c_str());
+      return false;
+    }
+    t = tok();
+    y0 = std::stoi(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid FramePos y0: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "NotLast") {
+    have_next = 1;
+  } else if (t == "Upsample") {
+    t = tok();
+    size_t num = 0;
+    cparams.resampling = std::stoul(t, &num);
+    if (num != t.size() ||
+        (cparams.resampling != 1 && cparams.resampling != 2 &&
+         cparams.resampling != 4 && cparams.resampling != 8)) {
+      fprintf(stderr, "Invalid Upsample: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Upsample_EC") {
+    t = tok();
+    size_t num = 0;
+    cparams.ec_resampling = std::stoul(t, &num);
+    if (num != t.size() ||
+        (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 &&
+         cparams.ec_resampling != 4 && cparams.ec_resampling != 8)) {
+      fprintf(stderr, "Invalid Upsample_EC: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Animation") {
+    io.metadata.m.have_animation = true;
+    io.metadata.m.animation.tps_numerator = 1000;
+    io.metadata.m.animation.tps_denominator = 1;
+    io.frames[0].duration = 100;
+  } else if (t == "AnimationFPS") {
+    t = tok();
+    size_t num = 0;
+    io.metadata.m.animation.tps_numerator = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid numerator: %s\n", t.c_str());
+      return false;
+    }
+    t = tok();
+    num = 0;
+    io.metadata.m.animation.tps_denominator = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid denominator: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Duration") {
+    t = tok();
+    size_t num = 0;
+    io.frames[0].duration = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid Duration: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "BlendMode") {
+    t = tok();
+    if (t == "kAdd") {
+      io.frames[0].blendmode = BlendMode::kAdd;
+    } else if (t == "kReplace") {
+      io.frames[0].blendmode = BlendMode::kReplace;
+    } else if (t == "kBlend") {
+      io.frames[0].blendmode = BlendMode::kBlend;
+    } else if (t == "kAlphaWeightedAdd") {
+      io.frames[0].blendmode = BlendMode::kAlphaWeightedAdd;
+    } else if (t == "kMul") {
+      io.frames[0].blendmode = BlendMode::kMul;
+    } else {
+      fprintf(stderr, "Invalid BlendMode: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "SplineQuantizationAdjustment") {
+    t = tok();
+    size_t num = 0;
+    spline_data.quantization_adjustment = std::stoul(t, &num);
+    if (num != t.size()) {
+      fprintf(stderr, "Invalid SplineQuantizationAdjustment: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Spline") {
+    Spline spline;
+    const auto ParseFloat = [&t, &tok](float& output) {
+      t = tok();
+      size_t num = 0;
+      output = std::stof(t, &num);
+      if (num != t.size()) {
+        fprintf(stderr, "Invalid spline data: %s\n", t.c_str());
+        return false;
+      }
+      return true;
+    };
+    for (auto& dct : spline.color_dct) {
+      for (float& coefficient : dct) {
+        JXL_RETURN_IF_ERROR(ParseFloat(coefficient));
+      }
+    }
+    for (float& coefficient : spline.sigma_dct) {
+      JXL_RETURN_IF_ERROR(ParseFloat(coefficient));
+    }
+
+    while (true) {
+      t = tok();
+      if (t == "EndSpline") break;
+      size_t num = 0;
+      Spline::Point point;
+      point.x = std::stof(t, &num);
+      bool ok_x = num == t.size();
+      auto t_y = tok();
+      point.y = std::stof(t_y, &num);
+      if (!ok_x || num != t_y.size()) {
+        fprintf(stderr, "Invalid spline control point: %s %s\n", t.c_str(),
+                t_y.c_str());
+        return false;
+      }
+      spline.control_points.push_back(point);
+    }
+
+    if (spline.control_points.empty()) {
+      fprintf(stderr, "Spline with no control point\n");
+      return false;
+    }
+
+    spline_data.splines.push_back(std::move(spline));
+  } else if (t == "Gaborish") {
+    cparams.gaborish = jxl::Override::kOn;
+  } else if (t == "DeltaPalette") {
+    cparams.lossy_palette = true;
+    cparams.palette_colors = 0;
+  } else if (t == "EPF") {
+    t = tok();
+    size_t num = 0;
+    cparams.epf = std::stoul(t, &num);
+    if (num != t.size() || cparams.epf > 3) {
+      fprintf(stderr, "Invalid EPF: %s\n", t.c_str());
+      return false;
+    }
+  } else if (t == "Noise") {
+    cparams.manual_noise.resize(8);
+    for (size_t i = 0; i < 8; i++) {
+      t = tok();
+      size_t num = 0;
+      cparams.manual_noise[i] = std::stof(t, &num);
+      if (num != t.size()) {
+        fprintf(stderr, "Invalid noise entry: %s\n", t.c_str());
+        return false;
+      }
+    }
+  } else if (t == "XYBFactors") {
+    cparams.manual_xyb_factors.resize(3);
+    for (size_t i = 0; i < 3; i++) {
+      t = tok();
+      size_t num = 0;
+      cparams.manual_xyb_factors[i] = std::stof(t, &num);
+      if (num != t.size()) {
+        fprintf(stderr, "Invalid XYB factor: %s\n", t.c_str());
+        return false;
+      }
+    }
+  } else {
+    fprintf(stderr, "Unexpected node type: %s\n", t.c_str());
+    return false;
+  }
+  JXL_RETURN_IF_ERROR(
+      ParseNode(tok, tree, spline_data, cparams, W, H, io, have_next, x0, y0));
+  return true;
+}
+
+class Heuristics : public DefaultEncoderHeuristics {
+ public:
+  bool CustomFixedTreeLossless(const FrameDimensions& frame_dim,
+                               Tree* tree) override {
+    *tree = tree_;
+    return true;
+  }
+
+  explicit Heuristics(Tree tree) : tree_(std::move(tree)) {}
+
+ private:
+  Tree tree_;
+};
+}  // namespace
+
+int JxlFromTree(const char* in, const char* out, const char* tree_out) {
+  Tree tree;
+  SplineData spline_data;
+  CompressParams cparams = {};
+  size_t width = 1024, height = 1024;
+  int x0 = 0, y0 = 0;
+  cparams.SetLossless();
+  cparams.responsive = false;
+  cparams.resampling = 1;
+  cparams.ec_resampling = 1;
+  cparams.modular_group_size_shift = 3;
+  CodecInOut io;
+  int have_next = 0;
+
+  std::istream* f = &std::cin;
+  std::ifstream file;
+
+  if (strcmp(in, "-")) {
+    file.open(in, std::ifstream::in);
+    f = &file;
+  }
+
+  auto tok = [&f]() {
+    std::string out;
+    *f >> out;
+    return out;
+  };
+  if (!ParseNode(tok, tree, spline_data, cparams, width, height, io, have_next,
+                 x0, y0)) {
+    return 1;
+  }
+
+  if (tree_out) {
+    PrintTree(tree, tree_out);
+  }
+  Image3F image(width, height);
+  io.SetFromImage(std::move(image), ColorEncoding::SRGB());
+  io.SetSize((width + x0) * cparams.resampling,
+             (height + y0) * cparams.resampling);
+  io.metadata.m.color_encoding.DecideIfWantICC(jxl::GetJxlCms());
+  cparams.options.zero_tokens = true;
+  cparams.palette_colors = 0;
+  cparams.channel_colors_pre_transform_percent = 0;
+  cparams.channel_colors_percent = 0;
+  cparams.patches = jxl::Override::kOff;
+  cparams.already_downsampled = true;
+  PaddedBytes compressed;
+
+  io.CheckMetadata();
+  BitWriter writer;
+
+  std::unique_ptr<CodecMetadata> metadata = jxl::make_unique<CodecMetadata>();
+  *metadata = io.metadata;
+  JXL_RETURN_IF_ERROR(metadata->size.Set(io.xsize(), io.ysize()));
+
+  metadata->m.xyb_encoded = (cparams.color_transform == ColorTransform::kXYB);
+
+  JXL_RETURN_IF_ERROR(WriteCodestreamHeaders(metadata.get(), &writer, nullptr));
+  writer.ZeroPadToByte();
+
+  while (true) {
+    PassesEncoderState enc_state;
+    enc_state.heuristics = jxl::make_unique<Heuristics>(tree);
+    enc_state.shared.image_features.splines =
+        SplinesFromSplineData(spline_data, enc_state.shared.cmap);
+
+    FrameInfo info;
+    info.is_last = !have_next;
+    if (!info.is_last) info.save_as_reference = 1;
+
+    io.frames[0].origin.x0 = x0;
+    io.frames[0].origin.y0 = y0;
+    info.clamp = false;
+
+    JXL_RETURN_IF_ERROR(jxl::EncodeFrame(
+        cparams, info, metadata.get(), io.frames[0], &enc_state,
+        jxl::GetJxlCms(), nullptr, &writer, nullptr));
+    if (!have_next) break;
+    tree.clear();
+    spline_data.splines.clear();
+    have_next = 0;
+    cparams.manual_noise.clear();
+    if (!ParseNode(tok, tree, spline_data, cparams, width, height, io,
+                   have_next, x0, y0)) {
+      return 1;
+    }
+    Image3F image(width, height);
+    io.SetFromImage(std::move(image), ColorEncoding::SRGB());
+    io.frames[0].blend = true;
+  }
+
+  compressed = std::move(writer).TakeBytes();
+
+  if (!WriteFile(out, compressed)) {
+    fprintf(stderr, "Failed to write to \"%s\"\n", out);
+    return 1;
+  }
+
+  return 0;
+}
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, char** argv) {
+  if ((argc != 3 && argc != 4) ||
+      (strcmp(argv[1], "-") && !strcmp(argv[1], argv[2]))) {
+    fprintf(stderr, "Usage: %s tree_in.txt out.jxl [tree_drawing]\n", argv[0]);
+    return 1;
+  }
+  return jpegxl::tools::JxlFromTree(argv[1], argv[2],
+                                    argc < 4 ? nullptr : argv[3]);
+}
diff --git a/third-party/libjxl/libjxl/tools/jxlinfo.c b/third-party/libjxl/libjxl/tools/jxlinfo.c
new file mode 100644
index 0000000000..e7d23eeb75
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/jxlinfo.c
@@ -0,0 +1,442 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This example prints information from the main codestream header.
+
+#include <inttypes.h>
+#include <jxl/decode.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int PrintBasicInfo(FILE* file, int verbose) {
+  uint8_t* data = NULL;
+  size_t data_size = 0;
+  // In how large chunks to read from the file and try decoding the basic info.
+  const size_t chunk_size = 2048;
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  if (!dec) {
+    fprintf(stderr, "JxlDecoderCreate failed\n");
+    return 0;
+  }
+
+  JxlDecoderSetKeepOrientation(dec, 1);
+  JxlDecoderSetCoalescing(dec, JXL_FALSE);
+
+  if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(
+                             dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+                                      JXL_DEC_FRAME | JXL_DEC_BOX)) {
+    fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+    JxlDecoderDestroy(dec);
+    return 0;
+  }
+
+  JxlBasicInfo info;
+  int seen_basic_info = 0;
+  JxlFrameHeader frame_header;
+  int framecount = 0;
+  float total_duration = 0.f;
+
+  for (;;) {
+    // The first time, this will output JXL_DEC_NEED_MORE_INPUT because no
+    // input is set yet, this is ok since the input is set when handling this
+    // event.
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+
+    if (status == JXL_DEC_ERROR) {
+      fprintf(stderr, "Decoder error\n");
+      break;
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      // The first time there is nothing to release and it returns 0, but that
+      // is ok.
+      size_t remaining = JxlDecoderReleaseInput(dec);
+      // move any remaining bytes to the front if necessary
+      if (remaining != 0) {
+        memmove(data, data + data_size - remaining, remaining);
+      }
+      // resize the buffer to append one more chunk of data
+      // TODO(lode): avoid unnecessary reallocations
+      data = (uint8_t*)realloc(data, remaining + chunk_size);
+      // append bytes read from the file behind the remaining bytes
+      size_t read_size = fread(data + remaining, 1, chunk_size, file);
+      if (read_size == 0 && feof(file)) {
+        fprintf(stderr, "Unexpected EOF\n");
+        break;
+      }
+      data_size = remaining + read_size;
+      JxlDecoderSetInput(dec, data, data_size);
+      if (feof(file)) JxlDecoderCloseInput(dec);
+    } else if (status == JXL_DEC_SUCCESS) {
+      // Finished all processing.
+      break;
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec, &info)) {
+        fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+        break;
+      }
+
+      seen_basic_info = 1;
+
+      printf("JPEG XL %s, %ux%u, %s",
+             info.have_animation ? "animation" : "image", info.xsize,
+             info.ysize,
+             info.uses_original_profile ? "(possibly) lossless" : "lossy");
+      printf(", %d-bit ", info.bits_per_sample);
+      if (info.exponent_bits_per_sample) {
+        printf("float (%d exponent bits) ", info.exponent_bits_per_sample);
+      }
+      int cmyk = 0;
+      const char* const ec_type_names[] = {
+          "Alpha",     "Depth",     "Spotcolor", "Selection", "Black",
+          "CFA",       "Thermal",   "Reserved0", "Reserved1", "Reserved2",
+          "Reserved3", "Reserved4", "Reserved5", "Reserved6", "Reserved7",
+          "Unknown",   "Optional"};
+      const size_t ec_type_names_size =
+          sizeof(ec_type_names) / sizeof(ec_type_names[0]);
+      for (uint32_t i = 0; i < info.num_extra_channels; i++) {
+        JxlExtraChannelInfo extra;
+        if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelInfo(dec, i, &extra)) {
+          fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n");
+          break;
+        }
+        if (extra.type == JXL_CHANNEL_BLACK) cmyk = 1;
+      }
+      if (info.num_color_channels == 1)
+        printf("Grayscale");
+      else {
+        if (cmyk) {
+          printf("CMY");
+        } else {
+          printf("RGB");
+        }
+      }
+      for (uint32_t i = 0; i < info.num_extra_channels; i++) {
+        JxlExtraChannelInfo extra;
+        if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelInfo(dec, i, &extra)) {
+          fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n");
+          break;
+        }
+        printf("+%s", (extra.type < ec_type_names_size
+                           ? ec_type_names[extra.type]
+                           : "Unknown, please update your libjxl"));
+      }
+      printf("\n");
+      if (verbose) {
+        printf("num_color_channels: %d\n", info.num_color_channels);
+        printf("num_extra_channels: %d\n", info.num_extra_channels);
+
+        for (uint32_t i = 0; i < info.num_extra_channels; i++) {
+          JxlExtraChannelInfo extra;
+          if (JXL_DEC_SUCCESS !=
+              JxlDecoderGetExtraChannelInfo(dec, i, &extra)) {
+            fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n");
+            break;
+          }
+          printf("extra channel %u:\n", i);
+          printf("  type: %s\n", (extra.type < ec_type_names_size
+                                      ? ec_type_names[extra.type]
+                                      : "Unknown, please update your libjxl"));
+          printf("  bits_per_sample: %u\n", extra.bits_per_sample);
+          if (extra.exponent_bits_per_sample > 0) {
+            printf("  float, with exponent_bits_per_sample: %u\n",
+                   extra.exponent_bits_per_sample);
+          }
+          if (extra.dim_shift > 0) {
+            printf("  dim_shift: %u (upsampled %ux)\n", extra.dim_shift,
+                   1 << extra.dim_shift);
+          }
+          if (extra.name_length) {
+            char* name = malloc(extra.name_length + 1);
+            if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelName(
+                                       dec, i, name, extra.name_length + 1)) {
+              fprintf(stderr, "JxlDecoderGetExtraChannelName failed\n");
+              free(name);
+              break;
+            }
+            printf("  name: %s\n", name);
+            free(name);
+          }
+          if (extra.type == JXL_CHANNEL_ALPHA)
+            printf("  alpha_premultiplied: %d (%s)\n",
+                   extra.alpha_premultiplied,
+                   extra.alpha_premultiplied ? "Premultiplied"
+                                             : "Non-premultiplied");
+          if (extra.type == JXL_CHANNEL_SPOT_COLOR) {
+            printf("  spot_color: (%f, %f, %f) with opacity %f\n",
+                   extra.spot_color[0], extra.spot_color[1],
+                   extra.spot_color[2], extra.spot_color[3]);
+          }
+          if (extra.type == JXL_CHANNEL_CFA)
+            printf("  cfa_channel: %u\n", extra.cfa_channel);
+        }
+      }
+
+      if (info.intensity_target != 255.f || info.min_nits != 0.f ||
+          info.relative_to_max_display != 0 ||
+          info.relative_to_max_display != 0.f) {
+        printf("intensity_target: %f nits\n", info.intensity_target);
+        printf("min_nits: %f\n", info.min_nits);
+        printf("relative_to_max_display: %d\n", info.relative_to_max_display);
+        printf("linear_below: %f\n", info.linear_below);
+      }
+      if (verbose) printf("have_preview: %d\n", info.have_preview);
+      if (info.have_preview) {
+        printf("Preview image: %ux%u\n", info.preview.xsize,
+               info.preview.ysize);
+      }
+      if (verbose) printf("have_animation: %d\n", info.have_animation);
+      if (verbose && info.have_animation) {
+        printf("ticks per second (numerator / denominator): %u / %u\n",
+               info.animation.tps_numerator, info.animation.tps_denominator);
+        printf("num_loops: %u\n", info.animation.num_loops);
+        printf("have_timecodes: %d\n", info.animation.have_timecodes);
+      }
+      if (info.xsize != info.intrinsic_xsize ||
+          info.ysize != info.intrinsic_ysize || verbose) {
+        printf("Intrinsic dimensions: %ux%u\n", info.intrinsic_xsize,
+               info.intrinsic_ysize);
+      }
+      const char* const orientation_string[8] = {
+          "Normal",          "Flipped horizontally",
+          "Upside down",     "Flipped vertically",
+          "Transposed",      "90 degrees clockwise",
+          "Anti-Transposed", "90 degrees counter-clockwise"};
+      if (info.orientation > 0 && info.orientation < 9) {
+        if (verbose || info.orientation > 1) {
+          printf("Orientation: %d (%s)\n", info.orientation,
+                 orientation_string[info.orientation - 1]);
+        }
+      } else {
+        fprintf(stderr, "Invalid orientation\n");
+      }
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      printf("Color space: ");
+
+      JxlColorEncoding color_encoding;
+      if (JXL_DEC_SUCCESS ==
+          JxlDecoderGetColorAsEncodedProfile(
+              dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &color_encoding)) {
+        const char* const cs_string[4] = {"RGB", "Grayscale", "XYB", "Unknown"};
+        const char* const wp_string[12] = {"", "D65", "Custom", "", "",  "",
+                                           "", "",    "",       "", "E", "P3"};
+        const char* const pr_string[12] = {
+            "", "sRGB", "Custom", "", "", "", "", "", "", "Rec.2100", "", "P3"};
+        const char* const tf_string[19] = {
+            "", "709", "Unknown", "",     "", "", "",   "",    "Linear", "",
+            "", "",    "",        "sRGB", "", "", "PQ", "DCI", "HLG"};
+        const char* const ri_string[4] = {"Perceptual", "Relative",
+                                          "Saturation", "Absolute"};
+        printf("%s, ", cs_string[color_encoding.color_space]);
+        printf("%s, ", wp_string[color_encoding.white_point]);
+        if (color_encoding.white_point == JXL_WHITE_POINT_CUSTOM) {
+          printf("white_point(x=%f,y=%f), ", color_encoding.white_point_xy[0],
+                 color_encoding.white_point_xy[1]);
+        }
+        if (color_encoding.color_space == JXL_COLOR_SPACE_RGB ||
+            color_encoding.color_space == JXL_COLOR_SPACE_UNKNOWN) {
+          printf("%s primaries", pr_string[color_encoding.primaries]);
+          if (color_encoding.primaries == JXL_PRIMARIES_CUSTOM) {
+            printf(": red(x=%f,y=%f),", color_encoding.primaries_red_xy[0],
+                   color_encoding.primaries_red_xy[1]);
+            printf("  green(x=%f,y=%f),", color_encoding.primaries_green_xy[0],
+                   color_encoding.primaries_green_xy[1]);
+            printf("  blue(x=%f,y=%f)", color_encoding.primaries_blue_xy[0],
+                   color_encoding.primaries_blue_xy[1]);
+          } else
+            printf(", ");
+        }
+        if (color_encoding.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+          printf("gamma(%f) transfer function, ", color_encoding.gamma);
+        } else {
+          printf("%s transfer function, ",
+                 tf_string[color_encoding.transfer_function]);
+        }
+        printf("rendering intent: %s\n",
+               ri_string[color_encoding.rendering_intent]);
+
+      } else {
+        // The profile is not in JPEG XL encoded form, get as ICC profile
+        // instead.
+        size_t profile_size;
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                        &profile_size)) {
+          fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+          continue;
+        }
+        printf("%" PRIu64 "-byte ICC profile, ", (uint64_t)profile_size);
+        if (profile_size < 132) {
+          fprintf(stderr, "ICC profile too small\n");
+          continue;
+        }
+        uint8_t* profile = (uint8_t*)malloc(profile_size);
+        if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+                                   dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                   profile, profile_size)) {
+          fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+          free(profile);
+          continue;
+        }
+        printf("CMM type: \"%.4s\", ", profile + 4);
+        printf("color space: \"%.4s\", ", profile + 16);
+        printf("rendering intent: %d\n", (int)profile[67]);
+        free(profile);
+      }
+    } else if (status == JXL_DEC_FRAME) {
+      if (JXL_DEC_SUCCESS != JxlDecoderGetFrameHeader(dec, &frame_header)) {
+        fprintf(stderr, "JxlDecoderGetFrameHeader failed\n");
+        break;
+      }
+      if (frame_header.duration == 0) {
+        if (frame_header.is_last && framecount == 0 &&
+            frame_header.name_length == 0)
+          continue;
+        printf("layer: ");
+      } else {
+        printf("frame: ");
+      }
+      framecount++;
+      if (frame_header.layer_info.have_crop) {
+        printf("%ux%u at position (%i,%i)", frame_header.layer_info.xsize,
+               frame_header.layer_info.ysize, frame_header.layer_info.crop_x0,
+               frame_header.layer_info.crop_y0);
+      } else {
+        printf("full image size");
+      }
+      if (info.have_animation) {
+        float ms = frame_header.duration * 1000.f *
+                   info.animation.tps_denominator /
+                   info.animation.tps_numerator;
+        total_duration += ms;
+        printf(", duration: %.1f ms", ms);
+        if (info.animation.have_timecodes) {
+          printf(", time code: %X", frame_header.timecode);
+        }
+      }
+      if (frame_header.name_length) {
+        char* name = malloc(frame_header.name_length + 1);
+        if (JXL_DEC_SUCCESS !=
+            JxlDecoderGetFrameName(dec, name, frame_header.name_length + 1)) {
+          fprintf(stderr, "JxlDecoderGetFrameName failed\n");
+          free(name);
+          break;
+        }
+        printf(", name: \"%s\"", name);
+        free(name);
+      }
+      printf("\n");
+    } else if (status == JXL_DEC_BOX) {
+      JxlBoxType type;
+      uint64_t size;
+      JxlDecoderGetBoxType(dec, type, JXL_FALSE);
+      JxlDecoderGetBoxSizeRaw(dec, &size);
+      if (verbose) {
+        printf("box: type: \"%c%c%c%c\" size: %" PRIu64 "\n", type[0], type[1],
+               type[2], type[3], (uint64_t)size);
+      }
+      if (!strncmp(type, "JXL ", 4)) {
+        printf("JPEG XL file format container (ISO/IEC 18181-2)\n");
+      } else if (!strncmp(type, "ftyp", 4)) {
+      } else if (!strncmp(type, "jxlc", 4)) {
+      } else if (!strncmp(type, "jxlp", 4)) {
+      } else if (!strncmp(type, "jxll", 4)) {
+      } else if (!strncmp(type, "jxli", 4)) {
+        printf("Frame index box present\n");
+      } else if (!strncmp(type, "jbrd", 4)) {
+        printf("JPEG bitstream reconstruction data available\n");
+      } else if (!strncmp(type, "jumb", 4) || !strncmp(type, "Exif", 4) ||
+                 !strncmp(type, "xml ", 4)) {
+        printf("Uncompressed %c%c%c%c metadata: %" PRIu64 " bytes\n", type[0],
+               type[1], type[2], type[3], (uint64_t)size);
+
+      } else if (!strncmp(type, "brob", 4)) {
+        JxlDecoderGetBoxType(dec, type, JXL_TRUE);
+        printf("Brotli-compressed %c%c%c%c metadata: %" PRIu64
+               " compressed bytes\n",
+               type[0], type[1], type[2], type[3], (uint64_t)size);
+      } else {
+        printf("unknown box: type: \"%c%c%c%c\" size: %" PRIu64 "\n", type[0],
+               type[1], type[2], type[3], (uint64_t)size);
+      }
+    } else {
+      fprintf(stderr, "Unexpected decoder status\n");
+      break;
+    }
+  }
+  if (info.animation.num_loops > 1) total_duration *= info.animation.num_loops;
+  if (info.have_animation) {
+    printf("Animation length: %.3f seconds%s\n", total_duration * 0.001f,
+           (info.animation.num_loops ? "" : " (looping)"));
+  }
+  JxlDecoderDestroy(dec);
+  free(data);
+
+  return seen_basic_info;
+}
+
+static void print_usage(const char* name) {
+  fprintf(stderr,
+          "Usage: %s [-v] INPUT\n"
+          "  INPUT      input JPEG XL image filename(s)\n"
+          "  -v         more verbose output\n",
+          name);
+}
+
+static int print_basic_info_filename(const char* jxl_filename, int verbose) {
+  FILE* file = fopen(jxl_filename, "rb");
+  if (!file) {
+    fprintf(stderr, "Failed to read file: %s\n", jxl_filename);
+    return 1;
+  }
+  int status = PrintBasicInfo(file, verbose);
+  fclose(file);
+  if (!status) {
+    fprintf(stderr, "Error reading file: %s\n", jxl_filename);
+    return status;
+  }
+
+  return 0;
+}
+
+int main(int argc, char* argv[]) {
+  int verbose = 0, status = 0;
+  const char* const name = argv[0];
+
+  for (int i = 1; i < argc; i++) {
+    const char* const* help_opts =
+        (const char* const[]){"--help", "-h", "-?", NULL};
+    while (*help_opts) {
+      if (!strcmp(*help_opts++, argv[i])) {
+        print_usage(name);
+        return 0;
+      }
+    }
+  }
+
+  const char* const* verbose_opts =
+      (const char* const[]){"--verbose", "-v", NULL};
+  /* argc >= 2 gate prevents segfault on argc = 1 */
+  while (argc >= 2 && *verbose_opts) {
+    if (!strcmp(*verbose_opts++, argv[1])) {
+      verbose = 1;
+      argc--;
+      argv++;
+      break;
+    }
+  }
+
+  if (argc < 2) {
+    print_usage(name);
+    return 2;
+  }
+
+  while (argc-- >= 2) {
+    status |= print_basic_info_filename(*++argv, verbose);
+  }
+
+  return status;
+}
diff --git a/third-party/libjxl/libjxl/tools/libjxl_test.c b/third-party/libjxl/libjxl/tools/libjxl_test.c
new file mode 100644
index 0000000000..f56a1fa414
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/libjxl_test.c
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Program to test that we can link against the public API of libjpegxl from C.
+// This links against the shared libjpegxl library which doesn't expose any of
+// the internals of the jxl namespace.
+
+#include <jxl/decode.h>
+
+int main(void) {
+  if (!JxlDecoderVersion()) return 1;
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  if (!dec) return 1;
+  JxlDecoderDestroy(dec);
+}
diff --git a/third-party/libjxl/libjxl/tools/optimizer/apply_simplex.py b/third-party/libjxl/libjxl/tools/optimizer/apply_simplex.py
new file mode 100755
index 0000000000..273305bb97
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/optimizer/apply_simplex.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""apply_simplex.py: Updates constants based on results of simplex search.
+
+To use this tool, the simplex search parameters must we wrapped in a bias(n)
+function call that returns the value of the VARn environment variable. The
+tool reads a text file containing the simplex definition that simplex_fork.py
+has written, and updates the target source files by substituting the bias(n)
+function calls with the (n+1)th coordinate of the simplex vector, and also
+simplifies these expressions by evaluating them to a sinlge floating point
+literal.
+
+The tool recognizes and evaluates the following expressions:
+  <constant> + bias(n),
+  <constant> * bias(n),
+  <constant> + <coeff> * bias(n).
+
+The --keep_bias command-line flag can be used to continue an aborted simplex
+search. This will keep the same bias(n) terms in the code, but would update the
+surronding constants.
+
+The --index_min and --index_max flags can be used to update only a subset of the
+bias(n) parameters.
+"""
+
+import argparse
+import re
+import sys
+
+def ParseSimplex(fn):
+    """Returns the simplex definition written by simplex_fork.py"""
+
+    with open(fn, "r") as f:
+        line = f.readline()
+        vec = eval(line)
+    return vec
+
+
+def PythonExpr(c_expr):
+    """Removes the f at the end of float literals"""
+
+    def repl(m):
+        return m.group(1)
+
+    return re.sub("(\d+)f", repl, c_expr)
+
+
+def UpdateSourceFile(fn, vec, keep_bias, id_min, id_max, minval):
+    """Updates expressions containing a bias(N) term."""
+
+    with open(fn, "r") as f:
+        lines_in = f.readlines()
+        lines_out = []
+        rbias = "(bias\((\d+)\))"
+        r = " -?\d+\.\d+f?( (\+|-|\*) (\d+\.\d+f? \* )?" + rbias + ")"
+        for line in lines_in:
+            line_out = line
+            x = re.search(r, line)
+            if x:
+                id = int(x.group(5))
+                if id >= id_min and id <= id_max:
+                    expr = re.sub(rbias, str(vec[id + 1]), x.group(0))
+                    val = eval(PythonExpr(expr))
+                    if minval and val < minval:
+                        val = minval
+                    expr_out = " " + str(val) + "f"
+                    if keep_bias:
+                        expr_out += x.group(1)
+                    line_out = re.sub(r, expr_out, line)
+            lines_out.append(line_out)
+
+    with open(fn, "w") as f:
+        f.writelines(lines_out)
+        f.close()
+
+
+def ApplySimplex(args):
+  """Main entry point of the program after parsing parameters."""
+
+  vec = ParseSimplex(args.simplex)
+  for fn in args.target:
+      UpdateSourceFile(fn, vec, args.keep_bias, args.index_min, args.index_max,
+                       args.minval)
+  return 0
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('target', type=str, nargs='+',
+                      help='source file(s) to update')
+  parser.add_argument('--simplex', default='best_simplex.txt',
+                      help='simplex to apply to the code')
+  parser.add_argument('--keep_bias', default=False, action='store_true',
+                      help='keep the bias term in the code, can be used to ' +
+                      'continue simplex search')
+  parser.add_argument('--index_min', type=int, default=0,
+                      help='start index of the simplex to apply')
+  parser.add_argument('--index_max', type=int, default=9999,
+                      help='last index of the simplex to apply')
+  parser.add_argument('--minval', type=float, default=None,
+                      help='apply a minimum to expression results')
+  args = parser.parse_args()
+  sys.exit(ApplySimplex(args))
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third-party/libjxl/libjxl/tools/optimizer/simplex_fork.py b/third-party/libjxl/libjxl/tools/optimizer/simplex_fork.py
new file mode 100755
index 0000000000..3c641a2e78
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/optimizer/simplex_fork.py
@@ -0,0 +1,262 @@
+#!/usr/bin/python
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Implementation of simplex search for an external process.
+
+The external process gets the input vector through environment variables.
+Input of vector as setenv("VAR%dimension", val)
+Getting the optimized function with regexp match from stdout
+of the forked process.
+
+https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method
+
+start as ./simplex_fork.py binary dimensions amount
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import range
+import copy
+import os
+import random
+import re
+import subprocess
+import sys
+
+def Midpoint(simplex):
+  """Nelder-Mead-like simplex midpoint calculation."""
+  simplex.sort()
+  dim = len(simplex) - 1
+  retval = [None] + [0.0] * dim
+  for i in range(1, dim + 1):
+    for k in range(dim):
+      retval[i] += simplex[k][i]
+    retval[i] /= dim
+  return retval
+
+
+def Subtract(a, b):
+  """Vector arithmetic, with [0] being ignored."""
+  return [None if k == 0 else a[k] - b[k] for k in range(len(a))]
+
+def Add(a, b):
+  """Vector arithmetic, with [0] being ignored."""
+  return [None if k == 0 else a[k] + b[k] for k in range(len(a))]
+
+def Average(a, b):
+  """Vector arithmetic, with [0] being ignored."""
+  return [None if k == 0 else 0.5 * (a[k] + b[k]) for k in range(len(a))]
+
+
+eval_hash = {}
+g_best_val = None
+
+def EvalCacheForget():
+  global eval_hash
+  eval_hash = {}
+
+def RandomizedJxlCodecs():
+  retval = []
+  minval = 0.5
+  maxval = 8.3
+  rangeval = maxval/minval
+  steps = 17
+  for i in range(steps):
+    mul = minval * rangeval**(float(i)/(steps - 1))
+    mul *= 0.99 + 0.05 * random.random()
+    retval.append("jxl:d%.4f" % mul)
+  for i in range(steps - 1):
+    mul = minval * rangeval**(float(i+0.5)/(steps - 1))
+    mul *= 0.99 + 0.05 * random.random()
+    retval.append("jxl:d%.4f" % mul)
+  return ",".join(retval)
+
+g_codecs = RandomizedJxlCodecs()
+
+def Eval(vec, binary_name, cached=True):
+  """Evaluates the objective function by forking a process.
+
+  Args:
+    vec: [0] will be set to the objective function, [1:] will
+      contain the vector position for the objective function.
+    binary_name: the name of the binary that evaluates the value.
+  """
+  global eval_hash
+  global g_codecs
+  global g_best_val
+  key = ""
+  # os.environ["BUTTERAUGLI_OPTIMIZE"] = "1"
+  for i in range(300):
+    os.environ["VAR%d" % i] = "0"
+  for i in range(len(vec) - 1):
+    os.environ["VAR%d" % i] = str(vec[i + 1])
+    key += str(vec[i + 1]) + ":"
+  if cached and (key in eval_hash):
+    vec[0] = eval_hash[key]
+    return
+
+  process = subprocess.Popen(
+      (binary_name,
+       '--input',
+       '/usr/local/google/home/jyrki/newcorpus/split/*.png',
+       '--error_pnorm=4',
+       '--more_columns',
+       '--codec', g_codecs),
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      env=dict(os.environ))
+
+  # process.wait()
+  found_score = False
+  vec[0] = 1.0
+  dct2 = 0.0
+  dct4 = 0.0
+  dct16 = 0.0
+  dct32 = 0.0
+  n = 0
+  for line in process.communicate(input=None)[0].splitlines():
+    print("BE", line)
+    sys.stdout.flush()
+    if line[0:3] == b'jxl':
+      bpp = line.split()[3]
+      dist_pnorm = line.split()[9]
+      dist_max = line.split()[6]
+      vec[0] *= float(dist_pnorm) * float(bpp) / 16.0
+      #vec[0] *= (float(dist_max) * float(bpp) / 16.0) ** 0.01
+      n += 1
+      found_score = True
+      distance = float(line.split()[0].split(b'd')[-1])
+      #faultybpp = 1.0 + 0.43 * ((float(bpp) * distance ** 0.69) - 1.595) ** 2
+      #vec[0] *= faultybpp
+
+  print("eval: ", vec)
+  if (vec[0] <= 0.0):
+    vec[0] = 1e30
+  if found_score:
+    eval_hash[key] = vec[0]
+    if not g_best_val or vec[0] < g_best_val:
+      g_best_val = vec[0]
+      print("\nSaving best simplex\n")
+      with open("best_simplex.txt", "w") as f:
+        print(vec, file=f)
+    return
+  vec[0] = 1e33
+  return
+  # sys.exit("awful things happened")
+
+def Reflect(simplex, binary):
+  """Main iteration step of Nelder-Mead optimization. Modifies `simplex`."""
+  simplex.sort()
+  last = simplex[-1]
+  mid = Midpoint(simplex)
+  diff = Subtract(mid, last)
+  mirrored = Add(mid, diff)
+  Eval(mirrored, binary)
+  if mirrored[0] > simplex[-2][0]:
+    print("\nStill worst\n\n")
+    # Still the worst, shrink towards the best.
+    shrinking = Average(simplex[-1], simplex[0])
+    Eval(shrinking, binary)
+    print("\nshrinking...\n\n")
+    simplex[-1] = shrinking
+    return
+  if mirrored[0] < simplex[0][0]:
+    # new best
+    print("\nNew Best\n\n")
+    even_further = Add(mirrored, diff)
+    Eval(even_further, binary)
+    if even_further[0] < mirrored[0]:
+      print("\nEven Further\n\n")
+      mirrored = even_further
+    simplex[-1] = mirrored
+    # try to extend
+    return
+  else:
+    # not a best, not a worst point
+    simplex[-1] = mirrored
+
+
+def OneDimensionalSearch(simplex, shrink, index):
+  # last appended was better than the best so far, try to replace it
+  last_attempt = simplex[-1][:]
+  best = simplex[0]
+  if last_attempt[0] < best[0]:
+    # try expansion of the amount
+    diff = simplex[-1][index] - simplex[0][index]
+    simplex[-1][index] = simplex[0][index] + shrink * diff
+    Eval(simplex[-1], g_binary)
+    if simplex[-1][0] < last_attempt[0]:
+      # it got better
+      return True
+  elif last_attempt[0] >= 0:
+    diff = simplex[-1][index] - simplex[0][index]
+    simplex[-1][index] = simplex[0][index] - diff
+    Eval(simplex[-1], g_binary)
+    if simplex[-1][0] < last_attempt[0]:
+      # it got better
+      return True
+  simplex[-1] = last_attempt
+  return False
+
+def InitialSimplex(vec, dim, amount):
+  """Initialize the simplex at origin."""
+  EvalCacheForget()
+  best = vec[:]
+  Eval(best, g_binary)
+  retval = [best]
+  comp_order = list(range(1, dim + 1))
+  random.shuffle(comp_order)
+
+  for i in range(dim):
+    index = comp_order[i]
+    best = retval[0][:]
+    best[index] += amount
+    Eval(best, g_binary)
+    retval.append(best)
+    do_shrink = True
+    while OneDimensionalSearch(retval, 2.0, index):
+      print("OneDimensionalSearch-Grow")
+    while OneDimensionalSearch(retval, 1.1, index):
+      print("OneDimensionalSearch-SlowGrow")
+      do_shrink = False
+    if do_shrink:
+      while OneDimensionalSearch(retval, 0.9, index):
+        print("OneDimensionalSearch-SlowShrinking")
+    retval.sort()
+  return retval
+
+
+if len(sys.argv) != 4:
+  print("usage: ", sys.argv[0], "binary-name number-of-dimensions simplex-size")
+  exit(1)
+
+g_dim = int(sys.argv[2])
+g_amount = float(sys.argv[3])
+g_binary = sys.argv[1]
+g_simplex = InitialSimplex([None] + [0.0] * g_dim,
+                           g_dim, 7.0 * g_amount)
+best = g_simplex[0][:]
+g_codecs = RandomizedJxlCodecs()
+g_simplex = InitialSimplex(best, g_dim, g_amount * 2.47)
+best = g_simplex[0][:]
+g_simplex = InitialSimplex(best, g_dim, g_amount)
+best = g_simplex[0][:]
+g_simplex = InitialSimplex(best, g_dim, g_amount * 0.33)
+best = g_simplex[0][:]
+
+for restarts in range(99999):
+  for ii in range(g_dim * 5):
+    g_simplex.sort()
+    print("reflect", ii, g_simplex[0])
+    Reflect(g_simplex, g_binary)
+
+  mulli = 0.1 + 15 * random.random()**2.0
+  g_codecs = RandomizedJxlCodecs()
+  print("\n\n\nRestart", restarts, "mulli", mulli)
+  g_simplex.sort()
+  best = g_simplex[0][:]
+  g_simplex = InitialSimplex(best, g_dim, g_amount * mulli)
diff --git a/third-party/libjxl/libjxl/tools/optimizer/update_jpegli_global_scale.py b/third-party/libjxl/libjxl/tools/optimizer/update_jpegli_global_scale.py
new file mode 100755
index 0000000000..1a57c59db6
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/optimizer/update_jpegli_global_scale.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Script to update jpegli global scale after a change affecting quality.
+
+start as ./update_jpegli_global_scale.py build <corpus-dir>
+"""
+
+import os
+import re
+import subprocess
+import sys
+
+def SourceFileName():
+  return "lib/jpegli/quant.cc"
+
+def ScalePattern(scale_type):
+  return "constexpr float kGlobalScale" + scale_type + " = ";
+
+def CodecName(scale_type):
+  if scale_type == "YCbCr":
+    return "jpeg:enc-jpegli:q90"
+  elif scale_type == "XYB":
+    return "jpeg:enc-jpegli:xyb:q90"
+  else:
+    raise Exception("Unknown scale type %s" % scale_type)
+  
+def ReadGlobalScale(scale_type):
+  pattern = ScalePattern(scale_type)
+  with open(SourceFileName()) as f:
+    for line in f.read().splitlines():
+      if line.startswith(pattern):
+        return float(line[len(pattern):-2])
+  raise Exception("Global scale %s not found." % scale_type)
+  
+    
+def UpdateGlobalScale(scale_type, new_val):
+  pattern = ScalePattern(scale_type)
+  found_pattern = False
+  fdata = ""
+  with open(SourceFileName()) as f:
+    for line in f.read().splitlines():
+      if line.startswith(pattern):
+        fdata += pattern + "%.8ff;\n" % new_val
+        found_pattern = True
+      else:
+        fdata += line + "\n"
+  if not found_pattern:
+    raise Exception("Global scale %s not found." % scale_type)
+  with open(SourceFileName(), "w") as f:
+    f.write(fdata)
+    f.close()
+
+def EvalPnorm(build_dir, corpus_dir, codec):
+  compile_args = ["ninja", "-C", build_dir, "tools/benchmark_xl"]
+  try:
+    subprocess.check_output(compile_args)
+  except:
+    subprocess.check_call(compile_args)
+  process = subprocess.Popen(
+    (os.path.join(build_dir, "tools/benchmark_xl"),
+     "--input", os.path.join(corpus_dir, "*.png"),
+     "--codec", codec),
+    stdout=subprocess.PIPE,
+    stderr=subprocess.PIPE)
+  (out, err) = process.communicate(input=None)
+  for line in out.splitlines():
+    if line.startswith(codec):
+      return float(line.split()[8])
+  raise Exception("Unexpected benchmark output:\n%sstderr:\n%s" % (out, err))
+
+
+if len(sys.argv) != 3:
+  print("usage: ", sys.argv[0], "build-dir corpus-dir")
+  exit(1)
+
+build_dir = sys.argv[1]
+corpus_dir = sys.argv[2]
+    
+jpeg_pnorm = EvalPnorm(build_dir, corpus_dir, "jpeg:q90")
+
+print("Libjpeg pnorm: %.8f" % jpeg_pnorm)
+
+for scale_type in ["YCbCr", "XYB"]:
+  scale = ReadGlobalScale(scale_type)
+  best_scale = scale
+  best_rel_error = 100.0
+  for i in range(10):
+    jpegli_pnorm = EvalPnorm(build_dir, corpus_dir, CodecName(scale_type))
+    rel_error = abs(jpegli_pnorm / jpeg_pnorm - 1)
+    print("[%-5s] scale: %.8f  pnorm: %.8f  error: %.8f" %
+          (scale_type, scale, jpegli_pnorm, rel_error))
+    if rel_error < best_rel_error:
+      best_rel_error = rel_error
+      best_scale = scale
+    if rel_error < 0.0001:
+      break
+    scale = scale * jpeg_pnorm / jpegli_pnorm
+    UpdateGlobalScale(scale_type, scale)
+  UpdateGlobalScale(scale_type, best_scale)
diff --git a/third-party/libjxl/libjxl/tools/rans_fuzzer.cc b/third-party/libjxl/libjxl/tools/rans_fuzzer.cc
new file mode 100644
index 0000000000..544ae7dd01
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/rans_fuzzer.cc
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/entropy_coder.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::ANSCode;
+using ::jxl::ANSSymbolReader;
+using ::jxl::BitReader;
+using ::jxl::BitReaderScopedCloser;
+using ::jxl::Span;
+using ::jxl::Status;
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  if (size < 2) return 0;
+  size_t numContexts = data[0] * 256 * data[1] + 1;
+  data += 2;
+  size -= 2;
+
+  std::vector<uint8_t> context_map;
+  Status ret = true;
+  {
+    BitReader br(Span<const uint8_t>(data, size));
+    BitReaderScopedCloser br_closer(&br, &ret);
+    ANSCode code;
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(&br, numContexts, &code, &context_map));
+    ANSSymbolReader ansreader(&code, &br);
+
+    // Limit the maximum amount of reads to avoid (valid) infinite loops.
+    const size_t maxreads = size * 8;
+    size_t numreads = 0;
+    int context = 0;
+    while (jxl::DivCeil(br.TotalBitsConsumed(), jxl::kBitsPerByte) < size &&
+           numreads <= maxreads) {
+      int code = ansreader.ReadHybridUint(context, &br, context_map);
+      context = code % numContexts;
+      numreads++;
+    }
+  }
+
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/scripts/bisector b/third-party/libjxl/libjxl/tools/scripts/bisector
new file mode 100755
index 0000000000..b6a82d0b4d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/bisector
@@ -0,0 +1,287 @@
+#!/usr/bin/env python
+#
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+r"""General-purpose bisector
+
+Prints a space-separated list of values to stdout:
+1_if_success_0_otherwise left_x left_f(x) right_x right_f(x)
+
+Usage examples:
+
+# Finding the square root of 200 via bisection:
+bisector --var=BB --range=0.0,100.0 --target=200 --maxiter=100 \
+         --atol_val=1e-12 --rtol_val=0 --cmd='echo "$BB * $BB" | bc'
+# => 1 14.142135623730923 199.99999999999923 14.142135623731633 200.0000000000193
+
+# Finding an integer approximation to sqrt(200) via bisection:
+bisector --var=BB --range=0,100 --target=200 --maxiter=100 \
+         --atol_arg=1 --cmd='echo "$BB * $BB" | bc'
+# => 1 14 196.0 15 225.0
+
+# Finding a change-id that broke something via bisection:
+bisector --var=BB --range=0,1000000 --target=0.5 --maxiter=100 \
+         --atol_arg=1 \
+         --cmd='test $BB -gt 123456 && echo 1 || echo 0' --verbosity=3
+# => 1 123456 0.0 123457 1.0
+
+# Finding settings that compress /usr/share/dict/words to a given target size:
+bisector --var=BB --range=1,9 --target=250000 --atol_arg=1 \
+  --cmd='gzip -$BB </usr/share/dict/words >/tmp/w_$BB.gz; wc -c /tmp/w_$BB.gz' \
+  --final='mv /tmp/w_$BB.gz /tmp/words.gz; rm /tmp/w_*.gz' \
+  --verbosity=1
+# => 1 3 263170.0 4 240043.0
+
+# JXL-encoding with bisection-for-size (tolerance 0.5%):
+bisector --var=BB --range=0.1,3.0 --target=3500 --rtol_val=0.005 \
+  --cmd='(build/tools/cjxl --distance=$BB /tmp/baseball.png /tmp/baseball_$BB.jxl && wc -c /tmp/baseball_$BB.jxl)' \
+  --final='mv /tmp/baseball_$BB.jxl /tmp/baseball.jxl; rm -f /tmp/baseball_*.jxl' \
+  --verbosity=1
+# => 1 1.1875 3573.0 1.278125 3481.0
+
+# JXL-encoding with bisection-for-bits-per-pixel (tolerance 0.5%), using helper:
+bisector --var=BB --range=0.1,3.0 --target=1.2 --rtol_val=0.005 \
+  --cmd='(build/tools/cjxl --distance=$BB /tmp/baseball.png /tmp/baseball_$BB.jxl && get_bpp /tmp/baseball_$BB.jxl)' \
+  --final='mv /tmp/baseball_$BB.jxl /tmp/baseball.jxl; rm -f /tmp/baseball_*.jxl' \
+  --verbosity=1
+# => ...
+"""
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+
+def _expandvars(vardef, env,
+                max_recursion=100,
+                max_length=10**6,
+                verbosity=0):
+  """os.path.expandvars() variant using parameter env rather than os.environ."""
+  current_expanded = vardef
+  for num_recursions in range(max_recursion):
+    if verbosity >= 3:
+      print(f'_expandvars(): num_recursions={num_recursions}, '
+            f'len={len(current_expanded)}' +
+            (', current: ' + current_expanded if verbosity >= 4 else ''))
+    if len > max_length:
+        break
+    current_expanded, num_replacements = re.subn(
+        r'$\{(\w+)\}|$(\w+)',
+        lambda m: env.get(m[1] if m[1] is not None else m[2], ''),
+        current_expanded)
+    if num_replacements == 0:
+        break
+  return current_expanded
+
+
+def _strtod(string):
+  """Extracts leftmost float from string (like strtod(3))."""
+  match = re.match(r'[+-]?\d*[.]?\d*(?:[eE][+-]?\d+)?', string)
+  return float(match[0]) if match[0] else None
+
+  
+def run_shell_command(shell_command,
+                      bisect_var, bisect_val,
+                      extra_env_defs,
+                      verbosity=0):
+  """Runs a shell command with env modifications, fetching return value."""
+  shell_env = dict(os.environ)
+  shell_env[bisect_var] = str(bisect_val)
+  for env_def in extra_env_defs:
+    varname, vardef = env_def.split('=', 1)
+    shell_env[varname] = _expandvars(vardev, shell_env,
+                                     verbosity=verbosity)
+  shell_ret = subprocess.run(shell_command,
+                             # We explicitly want subshell semantics!
+                             shell=True,
+                             capture_output=True,
+                             env=shell_env)
+  stdout = shell_ret.stdout.decode('utf-8')
+  score = _strtod(stdout)
+  if verbosity >= 2:
+    print(f'{bisect_var}={bisect_val} {shell_command} => '
+          f'{shell_ret.returncode} # {stdout.strip()}')
+  return (shell_ret.returncode == 0,  # Command was successful?
+          score)
+
+
+def _bisect(*,
+            shell_command,
+            final_shell_command,
+            target,
+            int_args,            
+            bisect_var, bisect_left, bisect_right,
+            rtol_val, atol_val, rtol_arg, atol_arg,
+            maxiter,
+            extra_env_defs,
+            verbosity=0
+            ):
+  """Performs bisection."""
+  def _get_val(x):
+    success, val = run_shell_command(shell_command,
+                                     bisect_var, x,
+                                     extra_env_defs,
+                                     verbosity=verbosity)
+    if not success:
+      raise RuntimeError(f'Bisection failed for: {bisect_var}={x}: '
+                         f'success={success}, val={val}, '
+                         f'cmd={shell_command}, var={bisect_var}')
+    return val
+  #
+  bisect_mid, value_mid = None, None
+  try:
+    value_left = _get_val(bisect_left)
+    value_right = _get_val(bisect_right)
+    if (value_left < target) != (target <= value_right):
+      raise RuntimeError(
+          f'Cannot bisect: target={target}, value_left={value_left}, '
+          f'value_right={value_right}')
+    for num_iter in range(maxiter):
+      bisect_mid_f = 0.5 * (bisect_left + bisect_right)
+      bisect_mid = round(bisect_mid_f) if int_args else bisect_mid_f
+      value_mid = _get_val(bisect_mid)
+      if (value_left < target) == (value_mid < target):
+        # Relative to target, `value_mid` is on the same side
+        # as `value_left`.
+        bisect_left = bisect_mid
+        value_left = value_mid
+      else:
+        # Otherwise, this situation must hold for value_right
+        # ("tertium non datur").
+        bisect_right = bisect_mid
+        value_right = value_mid
+      if verbosity >= 1:
+        print(f'bisect target={target}, '
+              f'left: {value_left} at {bisect_left}, '
+              f'right: {value_right} at {bisect_right}, '
+              f'mid: {value_mid} at {bisect_mid}')
+      delta_val = target - value_mid
+      if abs(delta_val) <= atol_val + rtol_val * abs(target):
+        return 1, bisect_left, value_left, bisect_right, value_right
+      delta_arg = bisect_right - bisect_left
+      # Also check whether the argument is "within tolerance".
+      # Here, we have to be careful if bisect_left and bisect_right
+      # have different signs: Then, their absolute magnitude
+      # "sets the relevant scale".
+      if abs(delta_arg) <= atol_arg + (
+              rtol_arg * 0.5 * (abs(bisect_left) + abs(bisect_right))):
+        return 1, bisect_left, value_left, bisect_right, value_right
+    return 0, bisect_left, value_left, bisect_right, value_right
+  finally:
+    # If cleanup is specified, always run it
+    if final_shell_command:
+        run_shell_command(
+            final_shell_command,
+            bisect_var,
+            bisect_mid if bisect_mid is not None else bisect_left,
+            extra_env_defs, verbosity=verbosity)
+
+
+def main(args):
+  """Main entry point."""
+  parser = argparse.ArgumentParser(description='mhtml_walk args')
+  parser.add_argument(
+      '--var',
+      help='The variable to use for bisection.',
+      default='BISECT')
+  parser.add_argument(
+      '--range',
+      help=('The argument range for bisecting, as {low},{high}. '
+            'If no argument has a decimal dot, assume integer parameters.'),
+      default='0.0,1.0')
+  parser.add_argument(
+      '--max',
+      help='The maximal value for bisecting.',
+      type=float,
+      default=0.0)
+  parser.add_argument(
+      '--target',
+      help='The target value to aim for.',
+      type=float,
+      default=1.0)
+  parser.add_argument(
+      '--maxiter',
+      help='The maximal number of iterations to perform.',
+      type=int,
+      default=40)
+  parser.add_argument(
+      '--rtol_val',
+      help='Relative tolerance to accept for deviations from target value.',
+      type=float,
+      default=0.0)
+  parser.add_argument(
+      '--atol_val',
+      help='Absolute tolerance to accept for deviations from target value.',
+      type=float,
+      default=0.0)
+  parser.add_argument(
+      '--rtol_arg',
+      help='Relative tolerance to accept for the argument.',
+      type=float,
+      default=0.0)
+  parser.add_argument(
+      '--atol_arg',
+      help=('Absolute tolerance to accept for the argument '
+            '(e.g. for bisecting change-IDs).'),
+      type=float,
+      default=0.0)
+  parser.add_argument(
+      '--verbosity',
+      help='The verbosity level.',
+      type=int,
+      default=1)
+  parser.add_argument(
+      '--env',
+      help=('Comma-separated list of extra environment variables '
+            'to incrementally add before executing the shell-command.'),
+      default='')
+  parser.add_argument(
+      '--cmd',
+      help=('The shell command to execute. Must print a numerical result '
+            'to stdout.'))
+  parser.add_argument(
+      '--final',
+      help='The cleanup shell command to execute.')
+  #
+  parsed = parser.parse_args(args)
+  extra_env_defs = tuple(filter(None, parsed.env.split(',')))    
+  try:
+    low_high = parsed.range.split(',')
+    if len(low_high) != 2:
+      raise ValueError('--range must be {low},{high}')
+    int_args = False
+    low_val, high_val = map(float, low_high)
+    low_val_int = round(low_val)
+    high_val_int = round(high_val)
+    if low_high == [str(low_val_int), str(high_val_int)]:
+        int_args = True
+        low_val = low_val_int
+        high_val = high_val_int
+    ret = _bisect(
+        shell_command=parsed.cmd,
+        final_shell_command=parsed.final,
+        target=parsed.target,
+        int_args=int_args,        
+        bisect_var=parsed.var,
+        bisect_left=low_val,
+        bisect_right=high_val,
+        rtol_val=parsed.rtol_val,
+        atol_val=parsed.atol_val,
+        rtol_arg=parsed.rtol_arg,
+        atol_arg=parsed.atol_arg,
+        maxiter=parsed.maxiter,
+        extra_env_defs=extra_env_defs,
+        verbosity=parsed.verbosity,
+    )
+    print(' '.join(map(str, ret)))
+  except Exception as exn:
+    sys.exit(f'Problem: {exn}')
+
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
diff --git a/third-party/libjxl/libjxl/tools/scripts/build_cleaner.py b/third-party/libjxl/libjxl/tools/scripts/build_cleaner.py
new file mode 100755
index 0000000000..8183022f7c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/build_cleaner.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+
+"""build_cleaner.py: Update build files.
+
+This tool keeps certain parts of the build files up to date.
+"""
+
+import argparse
+import locale
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+
+HEAD = """# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file is generated, do not modify by manually.
+# Run `tools/scripts/build_cleaner.py --update` to regenerate it.
+"""
+
+
+def RepoFiles(src_dir):
+  """Return the list of files from the source git repository"""
+  git_bin = os.environ.get('GIT_BIN', 'git')
+  files = subprocess.check_output([git_bin, '-C', src_dir, 'ls-files'])
+  ret = files.decode(locale.getpreferredencoding()).splitlines()
+  ret.sort()
+  return ret
+
+
+def Check(condition, msg):
+  if not condition:
+    print(msg)
+    sys.exit(2)
+
+
+def ContainsFn(*parts):
+  return lambda path: any(part in path for part in parts)
+
+
+def HasPrefixFn(*prefixes):
+  return lambda path: any(path.startswith(prefix) for prefix in prefixes)
+
+
+def HasSuffixFn(*suffixes):
+  return lambda path: any(path.endswith(suffix) for suffix in suffixes)
+
+
+def Filter(src, fn):
+  yes_list = []
+  no_list = []
+  for item in src:
+    (yes_list if fn(item) else no_list).append(item)
+  return yes_list, no_list
+
+
+def SplitLibFiles(repo_files):
+  """Splits the library files into the different groups."""
+
+  srcs_base = 'lib/'
+  srcs, _ = Filter(repo_files, HasPrefixFn(srcs_base))
+  srcs = [path[len(srcs_base):] for path in srcs]
+  srcs, _ = Filter(srcs, HasSuffixFn('.cc', '.h', '.ui'))
+  srcs.sort()
+
+  # Let's keep Jpegli sources a bit separate for a while.
+  jpegli_srcs, srcs = Filter(srcs, HasPrefixFn('jpegli'))
+  # TODO(eustas): move to tools?
+  _, srcs = Filter(srcs, HasSuffixFn('gbench_main.cc'))
+
+  # First pick files scattered across directories.
+  tests, srcs = Filter(srcs, HasSuffixFn('_test.cc'))
+  jpegli_tests, jpegli_srcs = Filter(jpegli_srcs, HasSuffixFn('_test.cc'))
+  # TODO(eustas): move to separate list?
+  _, srcs = Filter(srcs, ContainsFn('testing.h'))
+  _, jpegli_srcs = Filter(jpegli_srcs, ContainsFn('testing.h'))
+  testlib_files, srcs = Filter(srcs, ContainsFn('test'))
+  jpegli_testlib_files, jpegli_srcs = Filter(jpegli_srcs, ContainsFn('test'))
+  jpegli_libjpeg_helper_files, jpegli_testlib_files = Filter(
+    jpegli_testlib_files, ContainsFn('libjpeg_test_util'))
+  gbench_sources, srcs = Filter(srcs, HasSuffixFn('_gbench.cc'))
+
+  extras_sources, srcs = Filter(srcs, HasPrefixFn('extras/'))
+  lib_srcs, srcs = Filter(srcs, HasPrefixFn('jxl/'))
+  public_headers, srcs = Filter(srcs, HasPrefixFn('include/jxl/'))
+  threads_sources, srcs = Filter(srcs, HasPrefixFn('threads/'))
+
+  Check(len(srcs) == 0, 'Orphan source files: ' + str(srcs))
+
+  base_sources, lib_srcs = Filter(lib_srcs, HasPrefixFn('jxl/base/'))
+
+  jpegli_wrapper_sources, jpegli_srcs = Filter(
+      jpegli_srcs, HasSuffixFn('libjpeg_wrapper.cc'))
+  jpegli_sources = jpegli_srcs
+
+  threads_public_headers, public_headers = Filter(
+      public_headers, ContainsFn('_parallel_runner'))
+
+  codec_names = ['apng', 'exr', 'gif', 'jpegli', 'jpg', 'jxl', 'npy', 'pgx',
+    'pnm']
+  codecs = {}
+  for codec in codec_names:
+    codec_sources, extras_sources = Filter(extras_sources, HasPrefixFn(
+      f'extras/dec/{codec}', f'extras/enc/{codec}'))
+    codecs[f'codec_{codec}_sources'] = codec_sources
+
+  # TODO(eustas): move to separate folder?
+  extras_for_tools_sources, extras_sources = Filter(extras_sources, ContainsFn(
+    '/codec', '/hlg', '/metrics', '/packed_image_convert', '/render_hdr',
+    '/tone_mapping'))
+
+  # Source files only needed by the encoder or by tools (including decoding
+  # tools), but not by the decoder library.
+  # TODO(eustas): investigate the status of codec_in_out.h
+  # TODO(eustas): rename butteraugli_wrapper.cc to butteraugli.cc?
+  # TODO(eustas): is it possible to make butteraugli more standalone?
+  enc_sources, lib_srcs = Filter(lib_srcs, ContainsFn('/enc_', '/butteraugli',
+    'jxl/encode.cc', 'jxl/encode_internal.h'
+  ))
+
+  # The remaining of the files are in the dec_library.
+  dec_jpeg_sources, dec_sources = Filter(lib_srcs, HasPrefixFn('jxl/jpeg/',
+    'jxl/decode_to_jpeg.cc', 'jxl/decode_to_jpeg.h'))
+  dec_box_sources, dec_sources = Filter(dec_sources, HasPrefixFn(
+    'jxl/box_content_decoder.cc', 'jxl/box_content_decoder.h'))
+
+  # TODO(lode): further prune dec_srcs: only those files that the decoder
+  # absolutely needs, and or not only for encoding, should be listed here.
+
+  return codecs | {'base_sources': base_sources, 
+    'dec_box_sources': dec_box_sources, 'dec_jpeg_sources': dec_jpeg_sources,
+    'dec_sources': dec_sources, 'enc_sources': enc_sources,
+    'extras_for_tools_sources': extras_for_tools_sources,
+    'extras_sources': extras_sources, 'gbench_sources': gbench_sources,
+    'jpegli_sources': jpegli_sources,
+    'jpegli_testlib_files': jpegli_testlib_files,
+    'jpegli_libjpeg_helper_files': jpegli_libjpeg_helper_files,
+    'jpegli_tests': jpegli_tests,
+    'jpegli_wrapper_sources' : jpegli_wrapper_sources,
+    'public_headers': public_headers,
+    'testlib_files': testlib_files, 'tests': tests,
+    'threads_public_headers': threads_public_headers,
+    'threads_sources': threads_sources,
+  }
+
+
+def MaybeUpdateFile(args, filename, new_text):
+  """Optionally replace file with new contents.
+
+  If args.update is set, it will update the file with the new contents,
+  otherwise it will return True when no changes were needed.
+  """
+  filepath = os.path.join(args.src_dir, filename)
+  with open(filepath, 'r') as f:
+    src_text = f.read()
+
+  if new_text == src_text:
+    return True
+
+  if args.update:
+    print('Updating %s' % filename)
+    with open(filepath, 'w') as f:
+      f.write(new_text)
+    return True
+  else:
+    prefix = os.path.basename(filename)
+    with tempfile.NamedTemporaryFile(mode='w', prefix=prefix) as new_file:
+      new_file.write(new_text)
+      new_file.flush()
+      subprocess.call(['diff', '-u', filepath, '--label', 'a/' + filename,
+        new_file.name, '--label', 'b/' + filename])
+    return False
+
+
+def FormatList(items, prefix, suffix):
+  return ''.join(f'{prefix}{item}{suffix}\n' for item in items)
+
+
+def FormatGniVar(name, var):
+  if type(var) is list:
+    contents = FormatList(var, '    "', '",')
+    return f'{name} = [\n{contents}]\n'
+  else:  # TODO: do we need scalar strings?
+    return f'{name} = {var}\n'
+
+
+def FormatCMakeVar(name, var):
+  if type(var) is list:
+    contents = FormatList(var, '  ', '')
+    return f'set({name}\n{contents})\n'
+  else:  # TODO: do we need scalar strings?
+    return f'set({name} {var})\n'
+
+
+def GetJpegLibVersion(src_dir):
+  with open(os.path.join(src_dir, 'CMakeLists.txt'), 'r') as f:
+    cmake_text = f.read()
+    print(cmake_text)
+    m = re.search(r'set\(JPEGLI_LIBJPEG_LIBRARY_SOVERSION "([0-9]+)"',
+                  cmake_text)
+    version = m.group(1)
+    if len(version) == 1:
+      version += "0"
+    return version
+
+
+def BuildCleaner(args):
+  repo_files = RepoFiles(args.src_dir)
+
+  with open(os.path.join(args.src_dir, 'lib/CMakeLists.txt'), 'r') as f:
+    cmake_text = f.read()
+  version = {'major_version': '', 'minor_version': '', 'patch_version': ''}
+  for var in version.keys():
+    cmake_var = f'JPEGXL_{var.upper()}'
+    # TODO(eustas): use `cmake -L`
+    # Regexp:
+    #   set(_varname_ _capture_decimal_)
+    match = re.search(r'set\(' + cmake_var + r' ([0-9]+)\)', cmake_text)
+    version[var] = match.group(1)
+
+  version['jpegli_lib_version'] = GetJpegLibVersion(args.src_dir)
+
+  lists = SplitLibFiles(repo_files)
+
+  cmake_chunks = [HEAD]
+  cmake_parts = lists
+  for var in sorted(cmake_parts):
+    cmake_chunks.append(FormatCMakeVar(
+        'JPEGXL_INTERNAL_' + var.upper(), cmake_parts[var]))
+
+  gni_chunks = [HEAD]
+  gni_parts = version | lists
+  for var in sorted(gni_parts):
+    gni_chunks.append(FormatGniVar('libjxl_' + var, gni_parts[var]))
+
+  okay = [
+    MaybeUpdateFile(args, 'lib/jxl_lists.cmake', '\n'.join(cmake_chunks)),
+    MaybeUpdateFile(args, 'lib/jxl_lists.bzl', '\n'.join(gni_chunks)),
+  ]
+  return all(okay)
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('--src-dir',
+    default=os.path.realpath(os.path.join( os.path.dirname(__file__), '../..')),
+    help='path to the build directory')
+  parser.add_argument('--update', default=False, action='store_true',
+    help='update the build files instead of only checking')
+  args = parser.parse_args()
+  Check(BuildCleaner(args), 'Build files need update.')
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third-party/libjxl/libjxl/tools/scripts/build_stats.py b/third-party/libjxl/libjxl/tools/scripts/build_stats.py
new file mode 100755
index 0000000000..b1dc1ea393
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/build_stats.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+
+"""build_stats.py: Gather statistics about sizes of dependencies.
+
+This tools computes a realistic estimate of the size contribution to a binary
+from a statically linked library. Statically linked libraries compiled with
+-ffunction-sections and linked -gc-sections mean that we could drop part of the
+library at the final binary linking time. This tool takes that into account the
+symbols that end up in the final binary and not just all the symbols of the
+components.
+"""
+
+import argparse
+import collections
+import itertools
+import json
+import os
+import re
+import struct
+import subprocess
+import sys
+import tempfile
+
+# Ignore functions with stack size smaller than this value.
+MIN_STACK_SIZE = 32
+
+
+Symbol = collections.namedtuple('Symbol', ['address', 'size', 'typ', 'name'])
+
+# Represents the stack size information of a function (defined by its address).
+SymbolStack = collections.namedtuple('SymbolStack',
+                                     ['address', 'stack_size'])
+
+ObjectStats = collections.namedtuple('ObjectStats',
+                                     ['name', 'in_partition', 'size_map'])
+
+# An object target file in the build system.
+Target = collections.namedtuple('Target',
+                                ['name', 'deps', 'filename'])
+
+# Sections that end up in the binary file.
+# t - text (code), d - global non-const data, n/r - read-only data,
+# w - weak symbols (likely inline code not inlined),
+# v - weak symbols (vtable / typeinfo)
+# u - unique symbols
+BIN_SIZE = 'tdnrwvu'
+
+# Sections that end up in static RAM.
+RAM_SIZE = 'dbs'
+
+# u - symbols imported from some other library
+# a - absolute address symbols
+IGNORE_SYMBOLS = 'ua'
+
+SIMD_NAMESPACES = [
+    'N_SCALAR', 'N_WASM', 'N_NEON', 'N_PPC8', 'N_SSE4', 'N_AVX2', 'N_AVX3']
+
+
+def LoadSymbols(filename):
+  ret = []
+  nmout = subprocess.check_output(['nm', '--format=posix', filename])
+  for line in nmout.decode('utf-8').splitlines():
+    if line.rstrip().endswith(':'):
+      # Ignore object names.
+      continue
+    # symbol_name, symbol_type, (optional) address, (optional) size
+    symlist = line.rstrip().split(' ')
+    assert 2 <= len(symlist) <= 4
+    ret.append(Symbol(
+        int(symlist[2], 16) if len(symlist) > 2 else None,
+        int(symlist[3], 16) if len(symlist) > 3 else None,
+        symlist[1],
+        symlist[0]))
+  return ret
+
+def LoadTargetCommand(target, build_dir):
+  stdout = subprocess.check_output(
+      ['ninja', '-C', build_dir, '-t', 'commands', target])
+  # The last command is always the command to build (link) the requested
+  # target.
+  command = stdout.splitlines()[-1]
+  return command.decode('utf-8')
+
+
+def LoadTarget(target, build_dir):
+  """Loads a build system target and its dependencies into a Target object"""
+  if target.endswith('.o'):
+    # Speed up this case.
+    return Target(target, [], target)
+
+  link_params = LoadTargetCommand(target, build_dir).split()
+  if 'cmake_symlink_library' in link_params:
+    # The target is a library symlinked, use the target of the symlink
+    # instead.
+    target = link_params[link_params.index('cmake_symlink_library') + 1]
+    link_params = LoadTargetCommand(target, build_dir).split()
+
+  # The target name is not always the same as the filename of the output, for
+  # example, "djxl" target generates "tools/djxl" file.
+  if '-o' in link_params:
+    target_filename = link_params[link_params.index('-o') + 1]
+  elif target.endswith('.a'):
+    # Command is '/path/to/ar', 'qc', 'target.a', ...
+    target_filename = link_params[link_params.index('qc') + 1]
+  else:
+    raise Exception('Unknown "%s" output filename in command: %r' %
+                    (target, link_params))
+
+  tgt_libs = []
+  for entry in link_params:
+    if not entry or not (entry.endswith('.o') or entry.endswith('.a')):
+      continue
+    if entry == target_filename:
+      continue
+    fn = os.path.join(build_dir, entry)
+    if not os.path.exists(fn):
+      continue
+    if entry in tgt_libs:
+      continue
+    tgt_libs.append(entry)
+
+  return Target(target, tgt_libs, target_filename)
+
+
+def TargetTransitiveDeps(all_tgts, target):
+  """Returns the list of all transitive dependencies of target"""
+  ret = all_tgts[target].deps
+  # There can't be loop dependencies in the targets.
+  i = 0
+  while i < len(ret):
+    ret.extend(all_tgts[ret[i]].deps)
+    i += 1
+  return ret
+
+
+def LoadStackSizes(filename, binutils=''):
+  """Loads the stack size used by functions from the ELF.
+
+  This function loads the stack size the compiler stored in the .stack_sizes
+  section, which can be done by compiling with -fstack-size-section in clang.
+  """
+  with tempfile.NamedTemporaryFile() as stack_sizes_sec:
+    subprocess.check_call(
+        [binutils + 'objcopy', '-O', 'binary', '--only-section=.stack_sizes',
+         '--set-section-flags', '.stack_sizes=alloc', filename,
+         stack_sizes_sec.name])
+    stack_sizes = stack_sizes_sec.read()
+  # From the documentation:
+  #  The section will contain an array of pairs of function symbol values
+  #  (pointer size) and stack sizes (unsigned LEB128). The stack size values
+  #  only include the space allocated in the function prologue. Functions with
+  #  dynamic stack allocations are not included.
+
+  # Get the pointer format based on the ELF file.
+  output = subprocess.check_output(
+      [binutils + 'objdump', '-a', filename]).decode('utf-8')
+  elf_format = re.search('file format (.*)$', output, re.MULTILINE).group(1)
+  if elf_format.startswith('elf64-little') or elf_format == 'elf64-x86-64':
+    pointer_fmt = '<Q'
+  elif elf_format.startswith('elf32-little') or elf_format == 'elf32-i386':
+    pointer_fmt = '<I'
+  else:
+    raise Exception('Unknown ELF format: %s' % elf_format)
+  pointer_size = struct.calcsize(pointer_fmt)
+
+  ret = []
+  i = 0
+  while i < len(stack_sizes):
+    assert len(stack_sizes) >= i + pointer_size
+    addr, = struct.unpack_from(pointer_fmt, stack_sizes, i)
+    i += pointer_size
+    # Parse LEB128
+    size = 0
+    for j in range(10):
+      b = stack_sizes[i]
+      i += 1
+      size += (b & 0x7f) << (7 * j)
+      if (b & 0x80) == 0:
+        break
+    if size >= MIN_STACK_SIZE:
+      ret.append(SymbolStack(addr, size))
+  return ret
+
+
+def TargetSize(symbols, symbol_filter=None):
+  ret = {}
+  for sym in symbols:
+    if not sym.size or (symbol_filter is not None and
+                        sym.name not in symbol_filter):
+      continue
+    t = sym.typ.lower()
+    # We can remove symbols if they appear in multiple objects since they will
+    # be merged by the linker.
+    if symbol_filter is not None and (t == sym.typ or t in 'wv'):
+      symbol_filter.remove(sym.name)
+    ret.setdefault(t, 0)
+    ret[t] += sym.size
+  return ret
+
+
+def PrintStats(stats):
+  """Print a table with the size stats for a target"""
+  table = []
+  sum_bin_size = 0
+  sum_ram_size = 0
+
+  for objstat in stats:
+    bin_size = 0
+    ram_size = 0
+    for typ, size in objstat.size_map.items():
+      if typ in BIN_SIZE:
+        bin_size += size
+      if typ in RAM_SIZE:
+        ram_size += size
+      if typ not in BIN_SIZE + RAM_SIZE:
+        raise Exception('Unknown type "%s"' % typ)
+    if objstat.in_partition:
+      sum_bin_size += bin_size
+      sum_ram_size += ram_size
+
+    table.append((objstat.name, bin_size, ram_size))
+  mx_bin_size = max(row[1] for row in table)
+  mx_ram_size = max(row[2] for row in table)
+
+  table.append(('-- unknown --', mx_bin_size - sum_bin_size,
+                mx_ram_size - sum_ram_size))
+
+  # Print the table
+  print('%-32s %17s %17s' % ('Object name', 'Binary size', 'Static RAM size'))
+  for name, bin_size, ram_size in table:
+    print('%-32s %8d (%5.1f%%) %8d (%5.1f%%)' % (
+        name, bin_size, 100. * bin_size / mx_bin_size,
+        ram_size, (100. * ram_size / mx_ram_size) if mx_ram_size else 0))
+  print()
+
+
+def PrintStackStats(tgt_stack_sizes, top_entries=20):
+  if not tgt_stack_sizes:
+    return
+  print(' Stack   Symbol name')
+  for i, (name, size) in zip(itertools.count(), tgt_stack_sizes.items()):
+    if top_entries > 0 and i >= top_entries:
+      break
+    print('%8d %s' % (size, name))
+  print()
+
+
+def PrintTopSymbols(tgt_top_symbols):
+  if not tgt_top_symbols:
+    return
+  print(' Size     T Symbol name')
+  for size, typ, name in tgt_top_symbols:
+    print('%9d %s %s' % (size, typ, name))
+  print()
+
+
+def SizeStats(args):
+  """Main entry point of the program after parsing parameters.
+
+  Computes the size statistics of the given targets and their components."""
+  # The dictionary with the stats that we store on disk as a json. This includes
+  # one entry per passed args.target.
+  stats = {}
+
+  # Cache of Target object of a target.
+  tgts = {}
+
+  # Load all the targets.
+  pending = set(args.target)
+  while pending:
+    target = pending.pop()
+    tgt = LoadTarget(target, args.build_dir)
+    tgts[target] = tgt
+    if args.recursive:
+      for dep in tgt.deps:
+        if dep not in tgts:
+          pending.add(dep)
+
+  # Cache of symbols of a target.
+  syms = {}
+  # Load the symbols from the all targets and its deps.
+  all_deps = set(tgts.keys()).union(*[set(tgt.deps) for tgt in tgts.values()])
+  for entry in all_deps:
+    fn = os.path.join(args.build_dir,
+                      tgts[entry].filename if entry in tgts else entry)
+    syms[entry] = LoadSymbols(fn)
+
+  for target in args.target:
+    tgt_stats = []
+    tgt = tgts[target]
+
+    tgt_syms = syms[target]
+    used_syms = set()
+    for sym in tgt_syms:
+      if sym.typ.lower() in BIN_SIZE + RAM_SIZE:
+        used_syms.add(sym.name)
+      elif sym.typ.lower() in IGNORE_SYMBOLS:
+        continue
+      else:
+        print('Unknown: %s %s' % (sym.typ, sym.name))
+
+    target_path = os.path.join(args.build_dir, tgt.filename)
+    sym_stacks = []
+    if not target_path.endswith('.a'):
+      sym_stacks = LoadStackSizes(target_path, args.binutils)
+    symbols_by_addr = {sym.address: sym for sym in tgt_syms
+                          if sym.typ.lower() in 'tw'}
+    tgt_stack_sizes = collections.OrderedDict()
+    for sym_stack in sorted(sym_stacks, key=lambda s: -s.stack_size):
+      tgt_stack_sizes[
+          symbols_by_addr[sym_stack.address].name] = sym_stack.stack_size
+
+    tgt_top_symbols = []
+    if args.top_symbols:
+      tgt_top_symbols = [(sym.size, sym.typ, sym.name) for sym in tgt_syms
+                         if sym.name in used_syms and sym.size]
+      tgt_top_symbols.sort(key=lambda t: (-t[0], t[2]))
+      tgt_top_symbols = tgt_top_symbols[:args.top_symbols]
+
+    tgt_size = TargetSize(tgt_syms)
+    tgt_stats.append(ObjectStats(target, False, tgt_size))
+
+    # Split out by SIMD.
+    for namespace in SIMD_NAMESPACES:
+      mangled = str(len(namespace)) + namespace
+      if not any(mangled in sym.name for sym in tgt_syms):
+        continue
+      ret = {}
+      for sym in tgt_syms:
+        if not sym.size or mangled not in sym.name:
+          continue
+        t = sym.typ.lower()
+        ret.setdefault(t, 0)
+        ret[t] += sym.size
+      # SIMD namespaces are not part of the partition, they are already included
+      # in the jpegxl-static normally.
+      if not ret:
+        continue
+      tgt_stats.append(ObjectStats('\\--> ' + namespace, False, ret))
+
+    for obj in tgt.deps:
+      dep_used_syms = used_syms.copy()
+      obj_size = TargetSize(syms[obj], used_syms)
+      if not obj_size:
+        continue
+      tgt_stats.append(ObjectStats(os.path.basename(obj), True, obj_size))
+      if args.recursive:
+        # Not really recursive, but it shows all the remaining deps at a second
+        # level.
+        for obj_dep in sorted(TargetTransitiveDeps(tgts, obj),
+                              key=os.path.basename):
+          obj_dep_size = TargetSize(syms[obj_dep], dep_used_syms)
+          if not obj_dep_size:
+            continue
+          tgt_stats.append(ObjectStats(
+              '   '+ os.path.basename(obj_dep), False, obj_dep_size))
+
+    PrintStats(tgt_stats)
+    PrintStackStats(tgt_stack_sizes)
+    PrintTopSymbols(tgt_top_symbols)
+    stats[target] = {
+        'build': tgt_stats,
+        'stack': tgt_stack_sizes,
+        'top': tgt_top_symbols,
+    }
+
+  if args.save:
+    with open(args.save, 'w') as f:
+      json.dump(stats, f)
+
+  # Check the maximum stack size.
+  exit_code = 0
+  if args.max_stack:
+    for name, size in tgt_stack_sizes.items():
+      if size > args.max_stack:
+        print('Error: %s exceeds stack limit: %d vs %d' % (
+                  name, size, args.max_stack),
+              file=sys.stderr)
+        exit_code = 1
+
+  return exit_code
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('target', type=str, nargs='+',
+                      help='target(s) to analyze')
+  parser.add_argument('--build-dir', default='build',
+                      help='path to the build directory')
+  parser.add_argument('--save', default=None,
+                      help='path to save the stats as JSON file')
+  parser.add_argument('-r', '--recursive', default=False, action='store_true',
+                      help='Print recursive entries.')
+  parser.add_argument('--top-symbols', default=0, type=int,
+                      help='Number of largest symbols to print')
+  parser.add_argument('--binutils', default='',
+                      help='prefix path to binutils tools, such as '
+                           'aarch64-linux-gnu-')
+  parser.add_argument('--max-stack', default=None, type=int,
+                      help=('Maximum static stack size of a function. If a '
+                            'static stack is larger it will exit with an error '
+                            'code.'))
+  args = parser.parse_args()
+  sys.exit(SizeStats(args))
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third-party/libjxl/libjxl/tools/scripts/check_author.py b/third-party/libjxl/libjxl/tools/scripts/check_author.py
new file mode 100755
index 0000000000..23f0a2da5e
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/check_author.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+
+"""check_author.py: Check that a given author is listed in the AUTHORS file."""
+
+import argparse
+import fnmatch
+import os
+import re
+import sys
+
+
+def IsAuthorInFile(email, name, filename):
+  """Return whether we find the name/email in the authors filename"""
+  # Organization emails have emails listed as <*@domain.com>. This matches those
+  # patterns.
+  email_pattern_regex = re.compile(r'.*<([^>]+)>')
+
+  with open(filename, 'r') as f:
+    for line in f:
+      line = line.strip()
+      if line.startswith('#') or not line:
+        continue
+      # Exact match for a line without an email is OK.
+      if line == name:
+        return True
+      # Exact email address match is OK, even if the name is different.
+      if fnmatch.fnmatch(line, '* <%s>' % email):
+        print(
+            "User %s <%s> matched with different name %s" % (name, email, line),
+            file=sys.stderr)
+        return True
+      # Organizations often have *@domain.com email patterns which don't match
+      # the name.
+      if '*' in line:
+        m = email_pattern_regex.match(line)
+        if m and fnmatch.fnmatch(email, m.group(1)):
+          print("User %s <%s> matched pattern %s" % (name, email, line),
+                file=sys.stderr)
+          return True
+  return False
+
+def IndividualsInAlphabeticOrder(filename):
+  """Checks if the names are in alphabetic order"""
+  with open(filename, 'r') as f:
+    lines = f.readlines()
+    individual_header = '# Individuals:\n'
+    if individual_header in lines:
+      individual_authors = lines[lines.index(individual_header) + 1:]
+      sorted_authors = sorted(individual_authors, key=str.casefold)
+      if sorted_authors == individual_authors:
+        print("Individual authors are sorted alphabetically.")
+        return True
+      else:
+        print("Individual authors are not sorted alphabetically."
+              " The expected order is:")
+        print(''.join(sorted_authors))
+        return False
+    else:
+      print("Cannot find line '# Individuals:' in file.")
+  return False
+
+
+def CheckAuthor(args):
+  authors_path = os.path.join(args.source_dir, 'AUTHORS')
+  author_in_file = IsAuthorInFile(
+      args.email, args.name, authors_path)
+  if not author_in_file:
+    print("User %s <%s> not found, please add yourself to the AUTHORS file" % (
+              args.name, args.email),
+          file=sys.stderr)
+
+  sorted_alphabetically = IndividualsInAlphabeticOrder(authors_path)
+  if not sorted_alphabetically:
+    print("Authors not in alphabetical order, please sort them.", file=sys.stderr)
+  if not author_in_file or not sorted_alphabetically:
+    if not args.dry_run:
+      sys.exit(1)
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('email', type=str,
+                      help='email of the commit author to check')
+  parser.add_argument('name', type=str,
+                      help='name of the commit author to check')
+  parser.add_argument(
+      '--source-dir',
+      default=os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))),
+      help='path to the source directory where the AUTHORS file is located')
+  parser.add_argument('--dry-run', default=False, action='store_true',
+                      help='Don\'t return an exit code in case of failure')
+  args = parser.parse_args()
+  CheckAuthor(args)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_bpp b/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_bpp
new file mode 100755
index 0000000000..13a908c571
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_bpp
@@ -0,0 +1,45 @@
+#!/bin/sh
+#
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# Bisects JPEG XL encoding quality parameter to reach a given
+# target bits-per-pixel value.
+# (To be used directly, or as a template for tailored processing.)
+#
+# Usage: cjxl_bisect_size {input_filename} {output_filename} {target_bpp}
+
+#
+# We take the `bisector` tool from $PATH, or, if not available,
+# try to locate it in the same directory as the current script.
+# The `get_bpp` helper is taken from the same directory as the current script.
+#
+
+input_filename=$1
+output_filename=$2
+target_size=$3
+
+script_dir=$(dirname $(readlink -f $0))
+bisect_tool=$(which bisector)
+if [ -z $bisect_tool ] ; then
+  bisect_tool="${script_dir}/bisector"
+fi
+jxl_get_bpp_helper="${script_dir}/jxl_get_bpp_helper"
+# If $CJXL_BIN is set, we use this instead of looking for `cjxl` on $PATH.
+
+cjxl_bin=${CJXL_BIN}
+if [ -z $cjxl_bin ] ; then
+  cjxl_bin="cjxl"
+fi
+
+# Using `identify` from ImageMagick here.
+num_pixels=$(identify -format "%w*%h\n" /tmp/baseball.png|bc)
+
+# Allow 0.5% tolerance in size (--rtol=0.005).
+exec $bisect_tool --var=BISECT --range=0.01,15.0 --target=$target_size \
+  --rtol_val=0.005 \
+  --cmd="$cjxl_bin --distance=\$BISECT ${input_filename} ${output_filename}_bisect_\$BISECT.jxl ; (find ${output_filename}_bisect_\$BISECT.jxl -printf \"scale=10;%s/$num_pixels\n\" | bc -l)" \
+  --final="mv ${output_filename}_bisect_\$BISECT.jxl ${output_filename}; rm -f ${output_filename}_bisect_*.jxl" \
+  --verbosity=1
diff --git a/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_size b/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_size
new file mode 100755
index 0000000000..c0945d92f5
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/cjxl_bisect_size
@@ -0,0 +1,41 @@
+#!/bin/sh
+#
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# Bisects JPEG XL encoding quality parameter to reach a given
+# target byte-size.
+# (To be used directly, or as a template for tailored processing.)
+#
+# Usage: cjxl_bisect_size {input_filename} {output_filename} {target_size}
+
+#
+# We take the `bisector` tool from $PATH, or, if not available,
+# try to locate it in the same directory as the current script.
+#
+
+input_filename=$1
+output_filename=$2
+target_size=$3
+
+script_dir=$(dirname $(readlink -f $0))
+bisect_tool=$(which bisector)
+if [ -z $bisect_tool ] ; then
+  bisect_tool="${script_dir}/bisector"
+fi
+
+# If $CJXL_BIN is set, we use this instead of looking for `cjxl` on $PATH.
+
+cjxl_bin=${CJXL_BIN}
+if [-z $cjxl_bin ] ; then
+  cjxl_bin="cjxl"
+fi
+
+# Allow 0.5% tolerance in size (--rtol=0.005).
+exec $bisect_tool --var=BISECT --range=0.01,10.0 --target=$target_size \
+  --rtol_val=0.005 \
+  --cmd="$cjxl_bin --distance=\$BISECT ${input_filename} ${output_filename}_bisect_\$BISECT.jxl && wc -c ${output_filename}_bisect_\$BISECT.jxl" \
+  --final="mv ${output_filename}_bisect_\$BISECT.jxl ${output_filename}; rm -f ${output_filename}_bisect_*.jxl" \
+  --verbosity=1
diff --git a/third-party/libjxl/libjxl/tools/scripts/demo_progressive_saliency_encoding.py b/third-party/libjxl/libjxl/tools/scripts/demo_progressive_saliency_encoding.py
new file mode 100755
index 0000000000..6eb5cadd54
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/demo_progressive_saliency_encoding.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Produces demos for how progressive-saliency encoding would look like.
+
+As long as we do not have a progressive decoder that allows showing images
+generated from partially-available data, we can resort to building
+animated gifs that show how progressive loading would look like.
+
+Method:
+
+1. JPEG-XL encode the image, but stop at the pre-final (2nd) step.
+2. Use separate tool to compute a heatmap which shows where differences between
+   the pre-final and final image are expected to be perceptually worst.
+3. Use this heatmap to JPEG-XL encode the image with the final step split into
+   'salient parts only' and 'non-salient parts'. Generate a sequence of images
+   that stop decoding after the 1st, 2nd, 3rd, 4th step. JPEG-XL decode these
+   truncated images back to PNG.
+4. Measure byte sizes of the truncated-encoded images.
+5. Build an animated GIF with variable delays by calling ImageMagick's
+   `convert` command.
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import zip
+import ast  # For ast.literal_eval() only.
+import os
+import re
+import shlex
+import subprocess
+import sys
+
+_BLOCKSIZE = 8
+
+_CONF_PARSERS = dict(
+    keep_tempfiles=lambda s: bool(ast.literal_eval(s)),
+    heatmap_command=shlex.split,
+    simulated_progressive_loading_time_sec=float,
+    simulated_progressive_loading_delay_until_looparound_sec=float,
+    jpegxl_encoder=shlex.split,
+    jpegxl_decoder=shlex.split,
+    blurring=lambda s: s.split(),
+)
+
+
+def parse_config(config_filename):
+  """Parses the configuration file."""
+  conf = {}
+  re_comment = re.compile(r'^\s*(?:#.*)?$')
+  re_param = re.compile(r'^(?P<option>\w+)\s*:\s*(?P<value>.*?)\s*$')
+  try:
+    with open(config_filename) as h:
+      for line in h:
+        if re_comment.match(line):
+          continue
+        m = re_param.match(line)
+        if not m:
+          raise ValueError('Syntax error')
+        conf[m.group('option')] = (
+            _CONF_PARSERS[m.group('option')](m.group('value')))
+  except Exception as exn:
+    raise ValueError('Bad Configuration line ({}): {}'.format(exn, line))
+  missing_options = set(_CONF_PARSERS) - set(conf)
+  if missing_options:
+    raise ValueError('Missing configuration options: ' + ', '.join(
+        sorted(missing_options)))
+  return conf
+
+
+def generate_demo_image(config, input_filename, output_filename):
+  tempfiles = []
+  #
+  def encode_img(input_filename, output_filename, num_steps,
+                 heatmap_filename=None):
+    replacements = {
+        '${INPUT}': input_filename,
+        '${OUTPUT}': output_filename,
+        '${STEPS}': str(num_steps),
+        # Heatmap argument will be provided in --param=value form.
+        '${HEATMAP_ARG}': ('--saliency_map_filename=' + heatmap_filename
+                           if heatmap_filename is not None else '')
+        }
+    # Remove empty args. This removes the heatmap-argument if no heatmap
+    # is provided..
+    cmd = [
+        _f for _f in
+        [replacements.get(arg, arg) for arg in config['jpegxl_encoder']] if _f
+    ]
+    tempfiles.append(output_filename)
+    subprocess.call(cmd)
+  #
+  def decode_img(input_filename, output_filename):
+    replacements = {'${INPUT}': input_filename, '${OUTPUT}': output_filename}
+    cmd = [replacements.get(arg, arg) for arg in config['jpegxl_decoder']]
+    tempfiles.append(output_filename)
+    subprocess.call(cmd)
+  #
+  def generate_heatmap(orig_image_filename, coarse_grained_filename,
+                       heatmap_filename):
+    cmd = config['heatmap_command'] + [
+        str(_BLOCKSIZE), orig_image_filename, coarse_grained_filename,
+        heatmap_filename]
+    tempfiles.append(heatmap_filename)
+    subprocess.call(cmd)
+  #
+  try:
+    encode_img(input_filename, output_filename + '._step1.pik', 1)
+    decode_img(output_filename + '._step1.pik', output_filename + '._step1.png')
+    encode_img(input_filename, output_filename + '._step2.pik', 2)
+    decode_img(output_filename + '._step2.pik', output_filename + '._step2.png')
+    generate_heatmap(input_filename, output_filename + '._step2.png',
+                     output_filename + '._heatmap.png')
+    encode_img(input_filename,
+               output_filename + '._step3.pik', 3,
+               output_filename + '._heatmap.png')
+    encode_img(input_filename,
+               output_filename + '._step4.pik', 4,
+               output_filename + '._heatmap.png')
+    decode_img(output_filename + '._step3.pik', output_filename + '._step3.png')
+    decode_img(output_filename + '._step4.pik', output_filename + '._step4.png')
+    data_sizes = [
+        os.stat('{}._step{}.pik'.format(output_filename, num_step)).st_size
+        for num_step in (1, 2, 3, 4)]
+    time_offsets = [0] + [
+        # Imagemagick's `convert` accepts delays in units of 1/100 sec.
+        round(100 * config['simulated_progressive_loading_time_sec'] * size /
+              data_sizes[-1]) for size in data_sizes]
+    time_delays = [t_next - t_prev
+                   for t_next, t_prev in zip(time_offsets[1:], time_offsets)]
+    # Add a fake white initial image. As long as no usable image data is
+    # available, the user will see a white background.
+    subprocess.call(['convert',
+                     output_filename + '._step1.png',
+                     '-fill', 'white', '-colorize', '100%',
+                     output_filename + '._step0.png'])
+    tempfiles.append(output_filename + '._step0.png')
+    subprocess.call(
+        ['convert', '-loop', '0', output_filename + '._step0.png'] +
+        [arg for args in [
+            ['-delay', str(time_delays[n - 1]),
+             '-blur', config['blurring'][n - 1],
+             '{}._step{}.png'.format(output_filename, n)]
+            for n in (1, 2, 3, 4)] for arg in args] +
+        ['-delay', str(round(100 * config[
+            'simulated_progressive_loading_delay_until_looparound_sec'])),
+         output_filename + '._step4.png',
+         output_filename])
+  finally:
+    if not config['keep_tempfiles']:
+      for filename in tempfiles:
+        try:
+          os.unlink(filename)
+        except OSError:
+          pass  # May already have been deleted otherwise.
+
+
+def main():
+  if sys.version.startswith('2.'):
+    sys.exit('This is a python3-only script.')
+  if (len(sys.argv) != 4 or not sys.argv[-1].endswith('.gif')
+      or not sys.argv[-2].endswith('.png')):
+    sys.exit(
+        'Usage: {} [config_options_file] [input.png] [output.gif]'.format(
+            sys.argv[0]))
+  try:
+    _, config_filename, input_filename, output_filename = sys.argv
+    config = parse_config(config_filename)
+    generate_demo_image(config, input_filename, output_filename)
+  except ValueError as exn:
+    sys.exit(exn)
+
+
+
+if __name__ == '__main__':
+  main()
diff --git a/third-party/libjxl/libjxl/tools/scripts/jpegli_tools_test.sh b/third-party/libjxl/libjxl/tools/scripts/jpegli_tools_test.sh
new file mode 100644
index 0000000000..96df3b01ba
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/jpegli_tools_test.sh
@@ -0,0 +1,287 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# End-to-end roundtrip tests for cjpegli and djpegli tools, and other linux
+# tools linked with the jpegli library.
+
+set -eux
+
+MYDIR=$(dirname $(realpath "$0"))
+JPEGXL_TEST_DATA_PATH="${MYDIR}/../../testdata"
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -rf "${CLEANUP_FILES[@]}"
+  fi
+}
+trap 'retcode=$?; { set +x; } 2>/dev/null; cleanup' INT TERM EXIT
+
+verify_ssimulacra2() {
+  local score="$("${ssimulacra2}" "${1}" "${2}")"
+  python3 -c "import sys; sys.exit(not ${score} >= ${3})"
+}
+
+verify_max_bpp() {
+  local infn="$1"
+  local jpgfn="$2"
+  local maxbpp="$3"
+  local size="$(wc -c "${jpgfn}" | cut -d' ' -f1)"
+  local pixels=$(( "$(identify "${infn}" | cut -d' ' -f3 | tr 'x' '*')" ))
+  python3 -c "import sys; sys.exit(not ${size} * 8 <= ${maxbpp} * ${pixels})"
+}
+
+# Test that jpeg files created with cjpegli can be decoded with normal djpeg.
+cjpegli_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local minscore="$3"
+  local maxbpp="$4"
+  local jpgfn="$(mktemp -p "${tmpdir}")"
+  local outfn="$(mktemp -p "${tmpdir}").ppm"
+
+  "${cjpegli}" "${infn}" "${jpgfn}" $encargs
+  djpeg -outfile "${outfn}" "${jpgfn}"
+
+  verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+  verify_max_bpp "${infn}" "${jpgfn}" "${maxbpp}"
+}
+
+# Test full cjpegli/djpegli roundtrip.
+cjpegli_djpegli_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local minscore="$3"
+  local maxbpp="$4"
+  local jpgfn="$(mktemp -p "${tmpdir}")"
+  local outfn="$(mktemp -p "${tmpdir}").png"
+
+  "${cjpegli}" "${infn}" "${jpgfn}" $encargs
+  "${djpegli}" "${jpgfn}" "${outfn}"
+
+  verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+  verify_max_bpp "${infn}" "${jpgfn}" "${maxbpp}"
+}
+
+# Test the --target_size command line argument of cjpegli.
+cjpegli_test_target_size() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local target_size="$3"
+  local jpgfn="$(mktemp -p "$tmpdir")"
+
+  "${cjpegli}" "${infn}" "${jpgfn}" $encargs --target_size "${target_size}"
+  local size="$(wc -c "${jpgfn}" | cut -d' ' -f1)"
+  python3 -c "import sys; sys.exit(not ${target_size} * 0.996 <= ${size})"
+  python3 -c "import sys; sys.exit(not ${target_size} * 1.004 >= ${size})"
+}
+
+# Test that jpeg files created with cjpeg binary + jpegli library can be decoded
+# with normal libjpeg.
+cjpeg_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local minscore="$3"
+  local maxbpp="$4"
+  local jpgfn="$(mktemp -p "$tmpdir")"
+  local outfn="$(mktemp -p "${tmpdir}").png"
+
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    cjpeg $encargs -outfile "${jpgfn}" "${infn}"
+  djpeg -outfile "${outfn}" "${jpgfn}"
+
+  verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+  verify_max_bpp "${infn}" "${jpgfn}" "${maxbpp}"
+}
+
+# Test decoding of jpeg files with the djpegli binary.
+djpegli_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local minscore="$3"
+  local jpgfn="$(mktemp -p "$tmpdir")"
+
+  cjpeg $encargs -outfile "${jpgfn}" "${infn}"
+
+  # Test that disabling output works.
+  "${djpegli}" "${jpgfn}" --disable_output
+  for ext in png pgm ppm pfm pnm baz; do
+    "${djpegli}" "${jpgfn}" /foo/bar.$ext --disable_output
+  done
+
+  # Test decoding to PNG, PPM, PNM, PFM
+  for ext in png ppm pnm pfm; do
+    local outfn="$(mktemp -p "${tmpdir}").${ext}"
+    "${djpegli}" "${jpgfn}" "${outfn}" --num_reps 2
+    verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+  done
+
+  # Test decoding to PGM (for grayscale input)
+  if [[ "${infn: -6}" == ".g.png" ]]; then
+    local outfn="$(mktemp -p "${tmpdir}").pgm"
+    "${djpegli}" "${jpgfn}" "${outfn}" --quiet
+    verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+  fi
+
+  # Test decoding to 16 bit
+  for ext in png pnm; do
+    local outfn8="$(mktemp -p "${tmpdir}").8.${ext}"
+    local outfn16="$(mktemp -p "${tmpdir}").16.${ext}"
+    "${djpegli}" "${jpgfn}" "${outfn8}"
+    "${djpegli}" "${jpgfn}" "${outfn16}" --bitdepth 16
+    local score8="$("${ssimulacra2}" "${infn}" "${outfn8}")"
+    local score16="$("${ssimulacra2}" "${infn}" "${outfn16}")"
+    python3 -c "import sys; sys.exit(not ${score16} > ${score8})"
+  done
+}
+
+# Test decoding of jpeg files with the djpeg binary + jpegli library.
+djpeg_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local minscore="$3"
+  local jpgfn="$(mktemp -p "$tmpdir")"
+
+  cjpeg $encargs -outfile "${jpgfn}" "${infn}"
+
+  # Test default behaviour.
+  local outfn="$(mktemp -p "${tmpdir}").pnm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" "${jpgfn}"
+  verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+
+  # Test color quantization.
+  local outfn="$(mktemp -p "${tmpdir}").pnm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" -colors 128 "${jpgfn}"
+  verify_ssimulacra2 "${infn}" "${outfn}" 48
+
+  local outfn="$(mktemp -p "${tmpdir}").pnm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" -colors 128 -onepass -dither fs "${jpgfn}"
+  verify_ssimulacra2 "${infn}" "${outfn}" 30
+
+  local outfn="$(mktemp -p "${tmpdir}").pnm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" -colors 128 -onepass -dither ordered "${jpgfn}"
+  verify_ssimulacra2 "${infn}" "${outfn}" 30
+
+  # Test -grayscale flag.
+  local outfn="$(mktemp -p "${tmpdir}").pgm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" -grayscale "${jpgfn}"
+  local outfn2="$(mktemp -p "${tmpdir}").pgm"
+  convert "${infn}" -set colorspace Gray "${outfn2}"
+  # JPEG color conversion is in gamma-compressed space, so it will not match
+  # the correct grayscale version very well.
+  verify_ssimulacra2 "${outfn2}" "${outfn}" 60
+
+  # Test -rgb flag.
+  local outfn="$(mktemp -p "${tmpdir}").ppm"
+  LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+    djpeg -outfile "${outfn}" -rgb "${jpgfn}"
+  verify_ssimulacra2 "${infn}" "${outfn}" "${minscore}"
+
+  # Test -crop flag.
+  for geometry in 256x256+128+128 256x127+128+117; do
+    local outfn="$(mktemp -p "${tmpdir}").pnm"
+    LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+      djpeg -outfile "${outfn}" -crop "${geometry}" "${jpgfn}"
+    local outfn2="$(mktemp -p "${tmpdir}").pnm"
+    convert "${infn}" -crop "${geometry}" "${outfn2}"
+    verify_ssimulacra2 "${outfn2}" "${outfn}" "${minscore}"
+  done
+
+  # Test output scaling.
+  for scale in 1/4 3/8 1/2 5/8 9/8; do
+    local scalepct="$(python3 -c "print(100.0*${scale})")%"
+    local geometry=96x128+0+0
+    local outfn="$(mktemp -p "${tmpdir}").pnm"
+    LD_LIBRARY_PATH="${build_dir}/lib/jpegli:${LD_LIBRARY_PATH:-}" \
+      djpeg -outfile "${outfn}" -scale "${scale}" -crop "${geometry}" "${jpgfn}"
+    local outfn2="$(mktemp -p "${tmpdir}").pnm"
+    convert "${infn}" -scale "${scalepct}" -crop "${geometry}" "${outfn2}"
+    verify_ssimulacra2 "${outfn2}" "${outfn}" 80
+  done
+}
+
+main() {
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+
+  local build_dir="${1:-}"
+  if [[ -z "${build_dir}" ]]; then
+    build_dir=$(realpath "${MYDIR}/../../build")
+  fi
+
+  local cjpegli="${build_dir}/tools/cjpegli"
+  local djpegli="${build_dir}/tools/djpegli"
+  local ssimulacra2="${build_dir}/tools/ssimulacra2"
+  local rgb_in="jxl/flower/flower_small.rgb.png"
+  local gray_in="jxl/flower/flower_small.g.png"
+  local ppm_rgb="jxl/flower/flower_small.rgb.depth8.ppm"
+  local ppm_gray="jxl/flower/flower_small.g.depth8.pgm"
+
+  cjpegli_test "${rgb_in}" "" 88.5 1.7
+  cjpegli_test "${rgb_in}" "-q 80" 84 1.2
+  cjpegli_test "${rgb_in}" "-q 95" 91.5 2.4
+  cjpegli_test "${rgb_in}" "-d 0.5" 92 2.6
+  cjpegli_test "${rgb_in}" "--chroma_subsampling 420" 87 1.5
+  cjpegli_test "${rgb_in}" "--chroma_subsampling 440" 87 1.6
+  cjpegli_test "${rgb_in}" "--chroma_subsampling 422" 87 1.6
+  cjpegli_test "${rgb_in}" "--std_quant" 91 2.2
+  cjpegli_test "${rgb_in}" "--noadaptive_quantization" 88.5 1.85
+  cjpegli_test "${rgb_in}" "-p 1" 88.5 1.72
+  cjpegli_test "${rgb_in}" "-p 0" 88.5 1.75
+  cjpegli_test "${rgb_in}" "-p 0 --fixed_code" 88.5 1.8
+  cjpegli_test "${gray_in}" "" 92 1.4
+
+  cjpegli_test_target_size "${rgb_in}" "" 10000
+  cjpegli_test_target_size "${rgb_in}" "" 50000
+  cjpegli_test_target_size "${rgb_in}" "" 100000
+  cjpegli_test_target_size "${rgb_in}" "--chroma_subsampling 420" 20000
+  cjpegli_test_target_size "${rgb_in}" "--xyb" 20000
+  cjpegli_test_target_size "${rgb_in}" "-p 0 --fixed_code" 20000
+
+  cjpegli_test "jxl/flower/flower_small.rgb.depth8.ppm" "" 88.5 1.7
+  cjpegli_test "jxl/flower/flower_small.rgb.depth16.ppm" "" 89 1.7
+  cjpegli_test "jxl/flower/flower_small.g.depth8.pgm" "" 89 1.7
+  cjpegli_test "jxl/flower/flower_small.g.depth16.pgm" "" 89 1.7
+
+  cjpegli_djpegli_test "${rgb_in}" "" 89 1.7
+  cjpegli_djpegli_test "${rgb_in}" "--xyb" 87 1.5
+
+  djpegli_test "${ppm_rgb}" "-q 95" 92
+  djpegli_test "${ppm_rgb}" "-q 95 -sample 1x1" 93
+  djpegli_test "${ppm_gray}" "-q 95 -gray" 94
+
+  cjpeg_test "${ppm_rgb}" "" 89 1.9
+  cjpeg_test "${ppm_rgb}" "-optimize" 89 1.85
+  cjpeg_test "${ppm_rgb}" "-optimize -progressive" 89 1.8
+  cjpeg_test "${ppm_rgb}" "-sample 2x2" 87 1.65
+  cjpeg_test "${ppm_rgb}" "-sample 1x2" 88 1.75
+  cjpeg_test "${ppm_rgb}" "-sample 2x1" 88 1.75
+  cjpeg_test "${ppm_rgb}" "-grayscale" -50 1.45
+  cjpeg_test "${ppm_rgb}" "-rgb" 92 4.5
+  cjpeg_test "${ppm_rgb}" "-restart 1" 89 1.9
+  cjpeg_test "${ppm_rgb}" "-restart 1024B" 89 1.9
+  cjpeg_test "${ppm_rgb}" "-smooth 30" 88 1.75
+  cjpeg_test "${ppm_gray}" "-grayscale" 92 1.45
+  # The -q option works differently on v62 vs. v8 cjpeg binaries, so we have to
+  # have looser bounds than would be necessary if we sticked to a particular
+  # cjpeg version.
+  cjpeg_test "${ppm_rgb}" "-q 50" 76 0.95
+  cjpeg_test "${ppm_rgb}" "-q 80" 84 1.6
+  cjpeg_test "${ppm_rgb}" "-q 90" 89 2.35
+  cjpeg_test "${ppm_rgb}" "-q 100" 95 7.45
+
+  djpeg_test "${ppm_rgb}" "-q 95" 92
+  djpeg_test "${ppm_rgb}" "-q 95 -sample 1x1" 93
+  djpeg_test "${ppm_gray}" "-q 95 -gray" 94
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/scripts/jxl-eval.sh b/third-party/libjxl/libjxl/tools/scripts/jxl-eval.sh
new file mode 100755
index 0000000000..138aac8b01
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/jxl-eval.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+
+GSROOT="${GSROOT:-gs://jxl-quality}"
+URLROOT="${URLROOT:-https://storage.googleapis.com/jxl-quality}"
+BUILD_DIR="${BUILD_DIR:-./build}"
+BUILD_MODE="${BUILD_MODE:-opt}"
+DESC="${DESC:-exp}"
+
+build_libjxl() {
+  export BUILD_DIR="${BUILD_DIR}"
+  export SKIP_TEST=1
+  ./ci.sh "${BUILD_MODE}"
+}
+
+build_mozjpeg() {
+  if [[ ! -d "${HOME}/mozjpeg" ]]; then
+    (cd "${HOME}"
+     git clone https://github.com/mozilla/mozjpeg.git
+    )
+  fi
+  (cd "${HOME}/mozjpeg"
+   mkdir -p build
+   cmake -GNinja -B build
+   ninja -C build
+  )
+}
+
+download_corpus() {
+  local corpus="$1"
+  local localdir="${HOME}/corpora/${corpus}"
+  local remotedir="${GSROOT}/corpora/${corpus}"
+  if [[ ! -d "${localdir}" ]]; then
+    mkdir -p "${localdir}"
+  fi
+  gsutil -m rsync "${remotedir}" "${localdir}"
+}
+
+create_report() {
+  local corpus="$1"
+  local codec="$2"
+  shift 2
+  local rev="$(git rev-parse --short HEAD)"
+  local originals="${URLROOT}/corpora/${corpus}"
+  if git diff HEAD --quiet; then
+    local expid="${corpus}/${rev}/base"
+  else
+    local expid="${corpus}/${rev}/${DESC}"
+  fi
+  local output_dir="benchmark_results/${expid}"
+  local bucket="eval/${USER}/${expid}"
+  local indexhtml="index.$(echo ${codec} | tr ':' '_').html"
+  local url="${URLROOT}/${bucket}/${indexhtml}"
+  local use_decompressed="--save_decompressed --html_report_use_decompressed"
+  if [[ "${codec:0:4}" == "jpeg" ]]; then
+    use_decompressed="--nohtml_report_use_decompressed"
+  fi
+  (
+   cd "${BUILD_DIR}"
+   tools/benchmark_xl \
+     --output_dir "${output_dir}" \
+     --input "${HOME}/corpora/${corpus}/*.??g" \
+     --codec="${codec}" \
+     --save_compressed \
+     --write_html_report \
+     "${use_decompressed}" \
+     --originals_url="${originals}" \
+     $@
+   gsutil -m rsync "${output_dir}" "${GSROOT}/${bucket}"
+   echo "You can view evaluation results at:"
+   echo "${url}"
+  )
+}
+
+cmd_upload_corpus() {
+  local corpus="$1"
+  gsutil -m rsync "${HOME}/corpora/${corpus}" "${GSROOT}/corpora/${corpus}"
+}
+
+cmd_report() {
+  local corpus="$1"
+  local codec="$2"
+  if [[ "${codec}" == *","* ]]; then
+    echo "Multiple codecs are not allowed in html report"
+    exit 1
+  fi
+  download_corpus "${corpus}"
+  if [[ "${codec:0:4}" == "jpeg" ]]; then
+    build_mozjpeg
+    export LD_LIBRARY_PATH="${HOME}/mozjpeg/build:${LD_LIBRARY_PATH:-}"
+  fi
+  build_libjxl
+  create_report "$@"
+}
+
+main() {
+  local cmd="${1:-}"
+  if [[ -z "${cmd}" ]]; then
+    cat >&2 <<EOF
+Use: $0 CMD
+
+Where CMD is one of:
+ upload_corpus CORPUS
+   Upload the image corpus in $HOME/corpora/CORPUS to the cloud
+ report CORPUS CODEC
+   Build and run benchmark of codec CODEC on image corpus CORPUS and upload
+   the results to the cloud. If the codec is jpeg, the mozjpeg library will be
+   built and used through LD_LIBRARY_PATH
+EOF
+    echo "Usage $0 CMD"
+    exit 1
+  fi
+  cmd="cmd_${cmd}"
+  shift
+  set -x
+  "${cmd}" "$@"
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/scripts/ossfuzz-build.sh b/third-party/libjxl/libjxl/tools/scripts/ossfuzz-build.sh
new file mode 100755
index 0000000000..b5fbb45b10
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/ossfuzz-build.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Helper builder file to replace the /src/build.sh one in oss-fuzz/
+
+if [[ -z "${FUZZING_ENGINE:-}" ]]; then
+  echo "Don't call this script directly. Use ./ci.sh ossfuzz_* commands" \
+    "instead." >&2
+  exit 1
+fi
+
+set -eux
+
+main() {
+  # Build the fuzzers in release mode but force the inclusion of JXL_DASSERT()
+  # checks.
+  build_args=(
+    -G Ninja
+    -DBUILD_TESTING=OFF
+    -DJPEGXL_ENABLE_BENCHMARK=OFF
+    -DJPEGXL_ENABLE_DEVTOOLS=ON
+    -DJPEGXL_ENABLE_EXAMPLES=OFF
+    -DJPEGXL_ENABLE_FUZZERS=ON
+    -DJPEGXL_ENABLE_MANPAGES=OFF
+    -DJPEGXL_ENABLE_SJPEG=OFF
+    -DJPEGXL_ENABLE_VIEWERS=OFF
+    -DCMAKE_BUILD_TYPE=Release
+  )
+  export CXXFLAGS="${CXXFLAGS} -DJXL_IS_DEBUG_BUILD=1"
+
+  mkdir -p ${WORK}
+  cd ${WORK}
+  cmake \
+    "${build_args[@]}" \
+    -DJPEGXL_FUZZER_LINK_FLAGS="${LIB_FUZZING_ENGINE}" \
+    "${SRC}/libjxl"
+
+  fuzzers=(
+    color_encoding_fuzzer
+    djxl_fuzzer
+    fields_fuzzer
+    icc_codec_fuzzer
+    rans_fuzzer
+    transforms_fuzzer
+  )
+  if [[ -n "${JPEGXL_EXTRA_ARGS:-}" ]]; then
+    # Extra arguments passed to ci.sh ossfuzz commands are treated as ninja
+    # targets. The environment variable is split into individual targets here,
+    # which might break if passing paths with spaces, which is an unlikely use
+    # case.
+    fuzzers=(${JPEGXL_EXTRA_ARGS})
+    echo "Building with targets: ${JPEGXL_EXTRA_ARGS}"
+  fi
+  ninja "${fuzzers[@]}"
+}
+
+# Build as the regular user if not already running as that user. This avoids
+# having root files in the build directory.
+if [[ -n "${JPEGXL_UID:-}" && "${JPEGXL_UID}" != $(id -u) ]]; then
+  userspec="${JPEGXL_UID}:${JPEGXL_GID}"
+  unset JPEGXL_UID
+  unset JPEGXL_GID
+  chroot --skip-chdir --userspec="${userspec}" \
+    / $(realpath "$0") "$@"
+  exit $?
+fi
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/scripts/progressive_saliency.conf b/third-party/libjxl/libjxl/tools/scripts/progressive_saliency.conf
new file mode 100644
index 0000000000..987651a431
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/progressive_saliency.conf
@@ -0,0 +1,32 @@
+# Configuration parameters for progressive-saliency encoding.
+# (They are too many and too complex for command-line arguments.)
+
+# The total number of seconds for the simulated progressive-loading animation.
+simulated_progressive_loading_time_sec: 8.0
+
+# Time delay after the last progressive-loading step before the animation loops.
+simulated_progressive_loading_delay_until_looparound_sec: 10.0
+
+# The JPEG-XL encoding command, as one would pass it to the shell,
+# but with parameters ${HEATMAP_ARG}, ${INPUT}, ${OUTPUT}, ${STEPS}.
+jpegxl_encoder: cjpegxl pik ${INPUT} ${OUTPUT} --progressive --saliency_num_progressive_steps ${STEPS} --fast --saliency_threshold 0.8 ${HEATMAP_ARG}
+
+# The JPEG-XL encoding command, as one would pass it to the shell,
+# but with parameters ${INPUT}, ${OUTPUT}.
+jpegxl_decoder: djpegxl ${INPUT} ${OUTPUT}
+
+# The shell command to use for heatmap-generation.
+# This must adhere the calling conventions stated below.
+#
+# When called as:
+#   {heatmap_command} {blocksize} {input_image_filename} {coarse_grained_input_filename} {output_heatmap_filename}
+# This must produce: {output_heatmap_filename} in a format that is readable by the JPEG-XL encoder, and provides one
+# grayscale value per image-block which encodes saliency - ideally in the form of block-percentiles.
+heatmap_command: ml_get_high_level_saliency
+
+# How much to blur each of the four progressive stages.
+blurring: 16x4 16x1.5 0x0 0x0
+
+# Whether to keep tempfiles.
+# Temporary files will be named by appending suffixes to the desired final output filename.
+keep_tempfiles: True
diff --git a/third-party/libjxl/libjxl/tools/scripts/progressive_sizes.sh b/third-party/libjxl/libjxl/tools/scripts/progressive_sizes.sh
new file mode 100755
index 0000000000..08d3079e95
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/progressive_sizes.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+
+set -eu
+
+TMPDIR=$(mktemp -d)
+
+cleanup() {
+  rm -rf ${TMPDIR}
+}
+
+trap cleanup EXIT
+
+
+CJXL=$(realpath $(dirname "$0"))/../../build/tools/cjxl
+DJXL=$(realpath $(dirname "$0"))/../../build/tools/djxl
+
+${CJXL} "$@" ${TMPDIR}/x.jxl &>/dev/null
+S1=$(${DJXL} ${TMPDIR}/x.jxl --print_read_bytes -s 1 2>&1 | grep 'Decoded' | grep -o '[0-9]*')
+S2=$(${DJXL} ${TMPDIR}/x.jxl --print_read_bytes -s 2 2>&1 | grep 'Decoded' | grep -o '[0-9]*')
+S8=$(${DJXL} ${TMPDIR}/x.jxl --print_read_bytes -s 8 2>&1 | grep 'Decoded' | grep -o '[0-9]*')
+
+echo "8x: $S8 2x: $S2 1x: $S1"
diff --git a/third-party/libjxl/libjxl/tools/scripts/reference_zip.sh b/third-party/libjxl/libjxl/tools/scripts/reference_zip.sh
new file mode 100755
index 0000000000..6a284b43f7
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/reference_zip.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Tool to create the reference software .zip package with its required
+# dependencies bundled.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -fr "${CLEANUP_FILES[@]}"
+  fi
+}
+trap 'retcode=$?; { set +x; } 2>/dev/null; cleanup' INT TERM EXIT
+
+
+main() {
+  # Run from the repo's top level directory.
+  cd "${MYDIR[@]}/.."
+
+  local deps=(
+    third_party/brotli
+    third_party/highway
+    third_party/skcms
+  )
+
+  local ref_files=($(git ls-files))
+  for dep in "${deps[@]}"; do
+    local dep_files=($(git -C "${dep}" ls-files))
+    for dep_file in "${dep_files[@]}"; do
+      ref_files+=("${dep}/${dep_file}")
+    done
+  done
+
+  echo "Packaging ${#ref_files[@]} files..." >&2
+  local dest_zip="reference_package.zip"
+  rm -f "${dest_zip}"
+  printf '%s\n' "${ref_files[@]}" | zip -q -@ "${dest_zip}"
+
+  if [[ "${1:-}" == "test" ]]; then
+    echo "Testing on docker..." >&2
+    set -x
+    sudo docker run --rm -v "$(realpath ${dest_zip}):/home/pkg.zip:ro" \
+      ubuntu:20.04 <<EOF
+set -eux
+
+apt update
+DEBIAN_FRONTEND=noninteractive apt install -y build-essential zip cmake
+
+cd /home/
+unzip -q pkg.zip
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DJPEGXL_ENABLE_SJPEG=OFF ..
+cmake --build . -- -j\$(nproc)
+
+tools/djxl ../testdata/jxl/blending/cropped_traffic_light.jxl test.png
+tools/cjxl ../testdata/jxl/flower/flower.png.im_q85_444.jpg test.jxl
+tools/djxl test.jxl test.jpg
+EOF
+    set +x
+  fi
+  echo "${dest_zip} ready."
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/scripts/roundtrip_test.sh b/third-party/libjxl/libjxl/tools/scripts/roundtrip_test.sh
new file mode 100644
index 0000000000..852a604714
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/scripts/roundtrip_test.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# End-to-end roundtrip tests for cjxl and djxl tools.
+
+MYDIR=$(dirname $(realpath "$0"))
+JPEGXL_TEST_DATA_PATH="${MYDIR}/../../testdata"
+
+set -eux
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+  if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+    rm -rf "${CLEANUP_FILES[@]}"
+  fi
+}
+trap 'retcode=$?; { set +x; } 2>/dev/null; cleanup' INT TERM EXIT
+
+roundtrip_lossless_pnm_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local jxlfn="$(mktemp -p "$tmpdir")"
+  local outfn="$(mktemp -p "$tmpdir").${infn: -3}"
+
+  "${encoder}" "${infn}" "${jxlfn}" -d 0 -e 1
+  "${decoder}" "${jxlfn}" "${outfn}"
+  diff "${infn}" "${outfn}"
+}
+
+roundtrip_test() {
+  local infn="${JPEGXL_TEST_DATA_PATH}/$1"
+  local encargs="$2"
+  local maxdist="$3"
+  local jxlfn="$(mktemp -p "$tmpdir")"
+
+  "${encoder}" "${infn}" "${jxlfn}" $encargs
+
+  if [ "${infn: -3}" == "jpg" ]; then
+      local outfn="$(mktemp -p "$tmpdir").jpg"
+
+      # Test losless jpeg reconstruction.
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2
+      diff "${infn}" "${outfn}"
+
+      # Test decoding to pixels.
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2 --pixels_to_jpeg
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} > 0.0)"
+      python3 -c "import sys; sys.exit(not ${dist} < 0.005)"
+      
+      # Test decoding to pixels by setting the --jpeg_quality flag.
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2 --jpeg_quality 100
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} > 0.0)"
+      python3 -c "import sys; sys.exit(not ${dist} < 0.005)"
+
+      # Test decoding to pixels by writing to a png.
+      outfn="$(mktemp -p "$tmpdir").png"
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} > 0.0)"
+      python3 -c "import sys; sys.exit(not ${dist} < 0.005)"
+  else
+      # Test decoding to png.
+      local outfn="$(mktemp -p "$tmpdir").png"
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist})"
+
+      # Test decoding to 16 bit png.
+      "${decoder}" "${jxlfn}" "${outfn}" --bits_per_sample 16
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist} + 0.0005)"
+
+      # Test decoding to pfm.
+      local outfn="$(mktemp -p "$tmpdir").pfm"
+      "${decoder}" "${jxlfn}" "${outfn}"
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist})"
+
+      # Test decoding to ppm.
+      local outfn="$(mktemp -p "$tmpdir").ppm"
+      "${decoder}" "${jxlfn}" "${outfn}"
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist})"
+
+      # Test decoding to 16 bit ppm.
+      "${decoder}" "${jxlfn}" "${outfn}" --bits_per_sample 16
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist} + 0.0005)"
+
+      # Test decoding to jpg.
+      outfn="$(mktemp -p "$tmpdir").jpg"
+      "${decoder}" "${jxlfn}" "${outfn}" --num_reps 2
+      local dist="$("${comparator}" "${infn}" "${outfn}")"
+      python3 -c "import sys; sys.exit(not ${dist} <= ${maxdist} + 0.05)"
+  fi
+}
+
+main() {
+  local tmpdir=$(mktemp -d)
+  CLEANUP_FILES+=("${tmpdir}")
+
+  local build_dir="${1:-}"
+  if [[ -z "${build_dir}" ]]; then
+    build_dir=$(realpath "${MYDIR}/../../build")
+  fi
+
+  local encoder="${build_dir}/tools/cjxl"
+  local decoder="${build_dir}/tools/djxl"
+  local comparator="${build_dir}/tools/ssimulacra_main"
+
+  roundtrip_test "jxl/flower/flower_small.rgb.png" "-e 1" 0.02
+  roundtrip_test "jxl/flower/flower_small.rgb.png" "-e 1 -d 0.0" 0.0
+  roundtrip_test "jxl/flower/flower_cropped.jpg" "-e 1" 0.0
+
+  roundtrip_lossless_pnm_test "jxl/flower/flower_small.rgb.depth1.ppm"
+  roundtrip_lossless_pnm_test "jxl/flower/flower_small.g.depth1.pgm"
+  for i in `seq 2 16`; do
+      roundtrip_lossless_pnm_test "jxl/flower/flower_small.rgb.depth$i.ppm"
+      roundtrip_lossless_pnm_test "jxl/flower/flower_small.g.depth$i.pgm"
+      roundtrip_lossless_pnm_test "jxl/flower/flower_small.ga.depth$i.pam"
+      roundtrip_lossless_pnm_test "jxl/flower/flower_small.rgba.depth$i.pam"
+  done
+}
+
+main "$@"
diff --git a/third-party/libjxl/libjxl/tools/set_from_bytes_fuzzer.cc b/third-party/libjxl/libjxl/tools/set_from_bytes_fuzzer.cc
new file mode 100644
index 0000000000..abf3f76cd9
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/set_from_bytes_fuzzer.cc
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "tools/thread_pool_internal.h"
+
+namespace {
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  jxl::CodecInOut io;
+  jxl::SizeConstraints constraints;
+  constraints.dec_max_xsize = 1u << 16;
+  constraints.dec_max_ysize = 1u << 16;
+  constraints.dec_max_pixels = 1u << 22;
+  jpegxl::tools::ThreadPoolInternal pool(0);
+
+  (void)jxl::SetFromBytes(jxl::Span<const uint8_t>(data, size), &io, &pool,
+                          &constraints);
+
+  return 0;
+}
+
+}  // namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/speed_stats.cc b/third-party/libjxl/libjxl/tools/speed_stats.cc
new file mode 100644
index 0000000000..d378d09980
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/speed_stats.cc
@@ -0,0 +1,121 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/speed_stats.h"
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+void SpeedStats::NotifyElapsed(double elapsed_seconds) {
+  if (elapsed_seconds > 0.0) {
+    elapsed_.push_back(elapsed_seconds);
+  }
+}
+
+bool SpeedStats::GetSummary(SpeedStats::Summary* s) {
+  if (elapsed_.empty()) return false;
+
+  s->min = *std::min_element(elapsed_.begin(), elapsed_.end());
+  s->max = *std::max_element(elapsed_.begin(), elapsed_.end());
+
+  // Single rep
+  if (elapsed_.size() == 1) {
+    s->central_tendency = elapsed_[0];
+    s->variability = 0.0;
+    s->type = "";
+    return true;
+  }
+
+  // Two: skip first (noisier)
+  if (elapsed_.size() == 2) {
+    s->central_tendency = elapsed_[1];
+    s->variability = 0.0;
+    s->type = " second:";
+    return true;
+  }
+
+  // Prefer geomean unless numerically unreliable (too many reps)
+  if (pow(elapsed_[0], elapsed_.size()) < 1E100) {
+    double product = 1.0;
+    for (size_t i = 1; i < elapsed_.size(); ++i) {
+      product *= elapsed_[i];
+    }
+
+    s->central_tendency = pow(product, 1.0 / (elapsed_.size() - 1));
+    s->variability = 0.0;
+    s->type = " geomean:";
+    if (isnormal(s->central_tendency)) return true;
+  }
+
+  // Else: median
+  std::sort(elapsed_.begin(), elapsed_.end());
+  s->central_tendency = elapsed_.data()[elapsed_.size() / 2];
+  double stdev = 0;
+  for (size_t i = 0; i < elapsed_.size(); i++) {
+    double diff = elapsed_[i] - s->central_tendency;
+    stdev += diff * diff;
+  }
+  s->variability = sqrt(stdev);
+  s->type = " median:";
+  return true;
+}
+
+namespace {
+
+std::string SummaryStat(double value, const char* unit,
+                        const SpeedStats::Summary& s) {
+  if (value == 0.) return "";
+
+  char stat_str[100] = {'\0'};
+  const double value_tendency = value / s.central_tendency;
+  // Note flipped order: higher elapsed = lower mpps.
+  const double value_min = value / s.max;
+  const double value_max = value / s.min;
+
+  char variability[20] = {'\0'};
+  if (s.variability != 0.0) {
+    const double stdev = value / s.variability;
+    snprintf(variability, sizeof(variability), " (stdev %.3f)", stdev);
+  }
+
+  snprintf(stat_str, sizeof(stat_str), ",%s %.3f %s/s [%.2f, %.2f]%s", s.type,
+           value_tendency, unit, value_min, value_max, variability);
+  return stat_str;
+}
+
+}  // namespace
+
+bool SpeedStats::Print(size_t worker_threads) {
+  Summary s;
+  if (!GetSummary(&s)) {
+    return false;
+  }
+  std::string mps_stats = SummaryStat(xsize_ * ysize_ * 1e-6, "MP", s);
+  std::string mbs_stats = SummaryStat(file_size_ * 1e-6, "MB", s);
+
+  fprintf(stderr,
+          "%" PRIu64 " x %" PRIu64 "%s%s, %" PRIu64 " reps, %" PRIu64
+          " threads.\n",
+          static_cast<uint64_t>(xsize_), static_cast<uint64_t>(ysize_),
+          mps_stats.c_str(), mbs_stats.c_str(),
+          static_cast<uint64_t>(elapsed_.size()),
+          static_cast<uint64_t>(worker_threads));
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/speed_stats.h b/third-party/libjxl/libjxl/tools/speed_stats.h
new file mode 100644
index 0000000000..870523f6f1
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/speed_stats.h
@@ -0,0 +1,61 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_SPEED_STATS_H_
+#define TOOLS_SPEED_STATS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+namespace jpegxl {
+namespace tools {
+
+class SpeedStats {
+ public:
+  void NotifyElapsed(double elapsed_seconds);
+
+  struct Summary {
+    // How central_tendency was computed - depends on number of reps.
+    const char* type;
+
+    // Elapsed time
+    double central_tendency;
+    double min;
+    double max;
+    double variability;
+  };
+
+  // Non-const, may sort elapsed_.
+  bool GetSummary(Summary* summary);
+
+  // Sets the image size to allow computing MP/s values.
+  void SetImageSize(size_t xsize, size_t ysize) {
+    xsize_ = xsize;
+    ysize_ = ysize;
+  }
+
+  // Sets the file size to allow computing MB/s values.
+  void SetFileSize(size_t file_size) { file_size_ = file_size; }
+
+  // Calls GetSummary and prints megapixels/sec. SetImageSize() must be called
+  // once before this can be used.
+  bool Print(size_t worker_threads);
+
+ private:
+  std::vector<double> elapsed_;
+  size_t xsize_ = 0;
+  size_t ysize_ = 0;
+
+  // Size of the source binary file, meaningful when decoding a recompressed
+  // JPEG.
+  size_t file_size_ = 0;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_SPEED_STATS_H_
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra.cc b/third-party/libjxl/libjxl/tools/ssimulacra.cc
new file mode 100644
index 0000000000..9ce61b9c74
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra.cc
@@ -0,0 +1,331 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Re-implementation of //tools/ssimulacra.tct using jxl's
+// ImageF library instead of opencv.
+
+#include "tools/ssimulacra.h"
+
+#include <cmath>
+
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+namespace ssimulacra {
+namespace {
+
+using jxl::Image3F;
+using jxl::ImageF;
+
+static const float kC1 = 0.0001f;
+static const float kC2 = 0.0004f;
+static const int kNumScales = 6;
+// Premultiplied by chroma weight 0.2
+static const double kScaleWeights[kNumScales][3] = {
+    {0.04480, 0.00300, 0.00300}, {0.28560, 0.00896, 0.00896},
+    {0.30010, 0.05712, 0.05712}, {0.23630, 0.06002, 0.06002},
+    {0.13330, 0.06726, 0.06726}, {0.10000, 0.05000, 0.05000},
+};
+// Premultiplied by min weights 0.1, 0.005, 0.005
+const double kMinScaleWeights[kNumScales][3] = {
+    {0.02000, 0.00005, 0.00005}, {0.03000, 0.00025, 0.00025},
+    {0.02500, 0.00100, 0.00100}, {0.02000, 0.00150, 0.00150},
+    {0.01200, 0.00175, 0.00175}, {0.00500, 0.00175, 0.00175},
+};
+const double kEdgeWeight[3] = {1.5, 0.1, 0.1};
+const double kGridWeight[3] = {1.0, 0.1, 0.1};
+
+inline void Rgb2Lab(float r, float g, float b, float* L, float* A, float* B) {
+  const float epsilon = 0.00885645167903563081f;
+  const float s = 0.13793103448275862068f;
+  const float k = 7.78703703703703703703f;
+  float fx = (r * 0.43393624408206207259f + g * 0.37619779063650710152f +
+              b * 0.18983429773803261441f);
+  float fy = (r * 0.2126729f + g * 0.7151522f + b * 0.0721750f);
+  float fz = (r * 0.01775381083562901744f + g * 0.10945087235996326905f +
+              b * 0.87263921028466483011f);
+  const float gamma = 1.0f / 3.0f;
+  float X = (fx > epsilon) ? powf(fx, gamma) - s : k * fx;
+  float Y = (fy > epsilon) ? powf(fy, gamma) - s : k * fy;
+  float Z = (fz > epsilon) ? powf(fz, gamma) - s : k * fz;
+  *L = Y * 1.16f;
+  *A = (0.39181818181818181818f + 2.27272727272727272727f * (X - Y));
+  *B = (0.49045454545454545454f + 0.90909090909090909090f * (Y - Z));
+}
+
+Image3F Rgb2Lab(const Image3F& in) {
+  Image3F out(in.xsize(), in.ysize());
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    const float* JXL_RESTRICT row_in0 = in.PlaneRow(0, y);
+    const float* JXL_RESTRICT row_in1 = in.PlaneRow(1, y);
+    const float* JXL_RESTRICT row_in2 = in.PlaneRow(2, y);
+    float* JXL_RESTRICT row_out0 = out.PlaneRow(0, y);
+    float* JXL_RESTRICT row_out1 = out.PlaneRow(1, y);
+    float* JXL_RESTRICT row_out2 = out.PlaneRow(2, y);
+
+    for (size_t x = 0; x < in.xsize(); ++x) {
+      Rgb2Lab(row_in0[x], row_in1[x], row_in2[x], &row_out0[x], &row_out1[x],
+              &row_out2[x]);
+    }
+  }
+  return out;
+}
+
+Image3F Downsample(const Image3F& in, size_t fx, size_t fy) {
+  const size_t out_xsize = (in.xsize() + fx - 1) / fx;
+  const size_t out_ysize = (in.ysize() + fy - 1) / fy;
+  Image3F out(out_xsize, out_ysize);
+  const float normalize = 1.0f / (fx * fy);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t oy = 0; oy < out_ysize; ++oy) {
+      float* JXL_RESTRICT row_out = out.PlaneRow(c, oy);
+      for (size_t ox = 0; ox < out_xsize; ++ox) {
+        float sum = 0.0f;
+        for (size_t iy = 0; iy < fy; ++iy) {
+          for (size_t ix = 0; ix < fx; ++ix) {
+            const size_t x = std::min(ox * fx + ix, in.xsize() - 1);
+            const size_t y = std::min(oy * fy + iy, in.ysize() - 1);
+            sum += in.PlaneRow(c, y)[x];
+          }
+        }
+        row_out[ox] = sum * normalize;
+      }
+    }
+  }
+  return out;
+}
+
+void Multiply(const Image3F& a, const Image3F& b, Image3F* mul) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < a.ysize(); ++y) {
+      const float* JXL_RESTRICT in1 = a.PlaneRow(c, y);
+      const float* JXL_RESTRICT in2 = b.PlaneRow(c, y);
+      float* JXL_RESTRICT out = mul->PlaneRow(c, y);
+      for (size_t x = 0; x < a.xsize(); ++x) {
+        out[x] = in1[x] * in2[x];
+      }
+    }
+  }
+}
+
+void RowColAvgP2(const ImageF& in, double* rp2, double* cp2) {
+  std::vector<double> ravg(in.ysize());
+  std::vector<double> cavg(in.xsize());
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    auto row = in.Row(y);
+    for (size_t x = 0; x < in.xsize(); ++x) {
+      const float val = row[x];
+      ravg[y] += val;
+      cavg[x] += val;
+    }
+  }
+  std::sort(ravg.begin(), ravg.end());
+  std::sort(cavg.begin(), cavg.end());
+  *rp2 = ravg[ravg.size() / 50] / in.xsize();
+  *cp2 = cavg[cavg.size() / 50] / in.ysize();
+}
+
+class StreamingAverage {
+ public:
+  void Add(const float v) {
+    // Numerically stable method.
+    double delta = v - result_;
+    n_ += 1;
+    result_ += delta / n_;
+  }
+
+  double Get() const { return result_; }
+
+ private:
+  double result_ = 0.0;
+  size_t n_ = 0;
+};
+
+void EdgeDiffMap(const Image3F& img1, const Image3F& mu1, const Image3F& img2,
+                 const Image3F& mu2, Image3F* out, double* plane_avg) {
+  for (size_t c = 0; c < 3; ++c) {
+    StreamingAverage avg;
+    for (size_t y = 0; y < img1.ysize(); ++y) {
+      const float* JXL_RESTRICT row1 = img1.PlaneRow(c, y);
+      const float* JXL_RESTRICT row2 = img2.PlaneRow(c, y);
+      const float* JXL_RESTRICT rowm1 = mu1.PlaneRow(c, y);
+      const float* JXL_RESTRICT rowm2 = mu2.PlaneRow(c, y);
+      float* JXL_RESTRICT row_out = out->PlaneRow(c, y);
+      for (size_t x = 0; x < img1.xsize(); ++x) {
+        float edgediff = std::max(
+            std::abs(row2[x] - rowm2[x]) - std::abs(row1[x] - rowm1[x]), 0.0f);
+        row_out[x] = 1.0f - edgediff;
+        avg.Add(row_out[x]);
+      }
+    }
+    plane_avg[c] = avg.Get();
+  }
+}
+
+// Temporary storage for Gaussian blur, reused for multiple images.
+class Blur {
+ public:
+  Blur(const size_t xsize, const size_t ysize)
+      : rg_(jxl::CreateRecursiveGaussian(1.5)), temp_(xsize, ysize) {}
+
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) {
+    jxl::ThreadPool* null_pool = nullptr;
+    FastGaussian(rg_, in, null_pool, &temp_, out);
+  }
+
+  Image3F operator()(const Image3F& in) {
+    Image3F out(in.xsize(), in.ysize());
+    operator()(in.Plane(0), &out.Plane(0));
+    operator()(in.Plane(1), &out.Plane(1));
+    operator()(in.Plane(2), &out.Plane(2));
+    return out;
+  }
+
+  // Allows reusing across scales.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    temp_.ShrinkTo(xsize, ysize);
+  }
+
+ private:
+  hwy::AlignedUniquePtr<jxl::RecursiveGaussian> rg_;
+  ImageF temp_;
+};
+
+void SSIMMap(const Image3F& m1, const Image3F& m2, const Image3F& s11,
+             const Image3F& s22, const Image3F& s12, Image3F* out,
+             double* plane_averages) {
+  for (size_t c = 0; c < 3; ++c) {
+    StreamingAverage avg;
+    for (size_t y = 0; y < out->ysize(); ++y) {
+      const float* JXL_RESTRICT row_m1 = m1.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_m2 = m2.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s11 = s11.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s22 = s22.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s12 = s12.PlaneRow(c, y);
+      float* JXL_RESTRICT row_out = out->PlaneRow(c, y);
+      for (size_t x = 0; x < out->xsize(); ++x) {
+        float mu1 = row_m1[x];
+        float mu2 = row_m2[x];
+        float mu11 = mu1 * mu1;
+        float mu22 = mu2 * mu2;
+        float mu12 = mu1 * mu2;
+        float nom_m = 2 * mu12 + kC1;
+        float nom_s = 2 * (row_s12[x] - mu12) + kC2;
+        float denom_m = mu11 + mu22 + kC1;
+        float denom_s = (row_s11[x] - mu11) + (row_s22[x] - mu22) + kC2;
+        row_out[x] = (nom_m * nom_s) / (denom_m * denom_s);
+        avg.Add(row_out[x]);
+      }
+    }
+    plane_averages[c] = avg.Get();
+  }
+}
+
+}  // namespace
+
+double Ssimulacra::Score() const {
+  double ssim = 0.0;
+  double ssim_max = 0.0;
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t scale = 0; scale < scales.size(); ++scale) {
+      ssim += kScaleWeights[scale][c] * scales[scale].avg_ssim[c];
+      ssim_max += kScaleWeights[scale][c];
+      ssim += kMinScaleWeights[scale][c] * scales[scale].min_ssim[c];
+      ssim_max += kMinScaleWeights[scale][c];
+    }
+    if (!simple) {
+      ssim += kEdgeWeight[c] * avg_edgediff[c];
+      ssim_max += kEdgeWeight[c];
+      ssim += kGridWeight[c] *
+              (row_p2[0][c] + row_p2[1][c] + col_p2[0][c] + col_p2[1][c]);
+      ssim_max += 4.0 * kGridWeight[c];
+    }
+  }
+  double dssim = ssim_max / ssim - 1.0;
+  return std::min(1.0, std::max(0.0, dssim));
+}
+
+inline void PrintItem(const char* name, int scale, const double* vals,
+                      const double* w) {
+  printf("scale %d %s = [%.10f %.10f %.10f]  w = [%.5f %.5f %.5f]\n", scale,
+         name, vals[0], vals[1], vals[2], w[0], w[1], w[2]);
+}
+
+void Ssimulacra::PrintDetails() const {
+  for (size_t s = 0; s < scales.size(); ++s) {
+    if (s < kNumScales) {
+      PrintItem("avg ssim", s, scales[s].avg_ssim, kScaleWeights[s]);
+      PrintItem("min ssim", s, scales[s].min_ssim, kMinScaleWeights[s]);
+    }
+    if (s == 0 && !simple) {
+      PrintItem("avg edif", s, avg_edgediff, kEdgeWeight);
+      PrintItem("rp2 ssim", s, &row_p2[0][0], kGridWeight);
+      PrintItem("cp2 ssim", s, &col_p2[0][0], kGridWeight);
+      PrintItem("rp2 edif", s, &row_p2[1][0], kGridWeight);
+      PrintItem("cp2 edif", s, &col_p2[1][0], kGridWeight);
+    }
+  }
+}
+
+Ssimulacra ComputeDiff(const Image3F& orig, const Image3F& distorted,
+                       bool simple) {
+  Ssimulacra ssimulacra;
+
+  ssimulacra.simple = simple;
+  Image3F img1 = Rgb2Lab(orig);
+  Image3F img2 = Rgb2Lab(distorted);
+
+  Image3F mul(orig.xsize(), orig.ysize());
+  Blur blur(img1.xsize(), img1.ysize());
+
+  for (int scale = 0; scale < kNumScales; scale++) {
+    if (img1.xsize() < 8 || img1.ysize() < 8) {
+      break;
+    }
+    if (scale) {
+      img1 = Downsample(img1, 2, 2);
+      img2 = Downsample(img2, 2, 2);
+    }
+    mul.ShrinkTo(img1.xsize(), img2.ysize());
+    blur.ShrinkTo(img1.xsize(), img2.ysize());
+
+    Multiply(img1, img1, &mul);
+    Image3F sigma1_sq = blur(mul);
+
+    Multiply(img2, img2, &mul);
+    Image3F sigma2_sq = blur(mul);
+
+    Multiply(img1, img2, &mul);
+    Image3F sigma12 = blur(mul);
+
+    Image3F mu1 = blur(img1);
+    Image3F mu2 = blur(img2);
+    // Reuse mul as "ssim_map".
+    SsimulacraScale sscale;
+    SSIMMap(mu1, mu2, sigma1_sq, sigma2_sq, sigma12, &mul, sscale.avg_ssim);
+
+    const Image3F ssim_map = Downsample(mul, 4, 4);
+    for (size_t c = 0; c < 3; c++) {
+      float minval, maxval;
+      ImageMinMax(ssim_map.Plane(c), &minval, &maxval);
+      sscale.min_ssim[c] = static_cast<double>(minval);
+    }
+    ssimulacra.scales.push_back(sscale);
+
+    if (scale == 0 && !simple) {
+      Image3F* edgediff = &sigma1_sq;  // reuse
+      EdgeDiffMap(img1, mu1, img2, mu2, edgediff, ssimulacra.avg_edgediff);
+      for (size_t c = 0; c < 3; c++) {
+        RowColAvgP2(ssim_map.Plane(c), &ssimulacra.row_p2[0][c],
+                    &ssimulacra.col_p2[0][c]);
+        RowColAvgP2(edgediff->Plane(c), &ssimulacra.row_p2[1][c],
+                    &ssimulacra.col_p2[1][c]);
+      }
+    }
+  }
+  return ssimulacra;
+}
+
+}  // namespace ssimulacra
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra.h b/third-party/libjxl/libjxl/tools/ssimulacra.h
new file mode 100644
index 0000000000..95fc9de903
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_SSIMULACRA_H_
+#define TOOLS_SSIMULACRA_H_
+
+#include <vector>
+
+#include "lib/jxl/image.h"
+
+namespace ssimulacra {
+
+struct SsimulacraScale {
+  double avg_ssim[3];
+  double min_ssim[3];
+};
+
+struct Ssimulacra {
+  std::vector<SsimulacraScale> scales;
+  double avg_edgediff[3];
+  double row_p2[2][3];
+  double col_p2[2][3];
+  bool simple;
+
+  double Score() const;
+  void PrintDetails() const;
+};
+
+Ssimulacra ComputeDiff(const jxl::Image3F& orig, const jxl::Image3F& distorted,
+                       bool simple);
+
+}  // namespace ssimulacra
+
+#endif  // TOOLS_SSIMULACRA_H_
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra.txt b/third-party/libjxl/libjxl/tools/ssimulacra.txt
new file mode 100644
index 0000000000..cedda2ae13
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra.txt
@@ -0,0 +1,382 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+    SSIMULACRA - Structural SIMilarity Unveiling Local And Compression Related Artifacts
+
+    Cloudinary's variant of DSSIM, based on Philipp Klaus Krause's adaptation of Rabah Mehdi's SSIM implementation,
+    using ideas from Kornel Lesinski's DSSIM implementation as well as several new ideas.
+
+
+
+
+    Changes compared to Krause's SSIM implementation:
+    - Use C++ OpenCV API
+    - Convert sRGB to linear RGB and then to L*a*b*, to get a perceptually more accurate color space
+    - Multi-scale (6 scales)
+    - Extra penalty for specific kinds of artifacts:
+        - local artifacts
+        - grid-like artifacts (blockiness)
+        - introducing edges where the original is smooth (blockiness / color banding / ringing / mosquito noise)
+
+    Known limitations:
+    - Color profiles are ignored; input images are assumed to be sRGB.
+    - Both input images need to have the same number of channels (Grayscale / RGB / RGBA)
+*/
+
+/*
+    This DSSIM program has been created by Philipp Klaus Krause based on
+    Rabah Mehdi's C++ implementation of SSIM (http://mehdi.rabah.free.fr/SSIM).
+    Originally it has been created for the VMV '09 paper
+    "ftc - floating precision texture compression" by Philipp Klaus Krause.
+
+    The latest version of this program can probably be found somewhere at
+    http://www.colecovision.eu.
+
+    It can be compiled using g++ -I/usr/include/opencv -lcv -lhighgui dssim.cpp
+    Make sure OpenCV is installed (e.g. for Debian/ubuntu: apt-get install
+    libcv-dev libhighgui-dev).
+
+    DSSIM is described in
+    "Structural Similarity-Based Object Tracking in Video Sequences" by Loza et al.
+    however setting all Ci to 0 as proposed there results in numerical instabilities.
+    Thus this implementation used the Ci from the SSIM implementation.
+    SSIM is described in
+    "Image quality assessment: from error visibility to structural similarity" by Wang et al.
+*/
+
+/*
+    Copyright (c) 2005, Rabah Mehdi <mehdi.rabah@gmail.com>
+
+    Feel free to use it as you want and to drop me a mail
+    if it has been useful to you. Please let me know if you enhance it.
+    I'm not responsible if this program destroy your life & blablabla :)
+
+    Copyright (c) 2009, Philipp Klaus Krause <philipp@colecovision.eu>
+
+    Permission to use, copy, modify, and/or distribute this software for any
+    purpose with or without fee is hereby granted, provided that the above
+    copyright notice and this permission notice appear in all copies.
+
+    THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+    WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+    MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+    ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <cv.hpp>
+#include <highgui.h>
+#include <stdio.h>
+#include <set>
+
+// comment this in to produce debug images that show the differences at each scale
+#define DEBUG_IMAGES 1
+using namespace std;
+using namespace cv;
+
+// All of the constants below are more or less arbitrary.
+// Some amount of tweaking/calibration was done, but there is certainly room for improvement.
+
+// SSIM constants. Original C2 was 0.0009, but a smaller value seems to work slightly better.
+const double C1 = 0.0001, C2 = 0.0004;
+
+// Weight of each scale. Somewhat arbitrary.
+// These are based on the values used in IW-SSIM and Kornel's DSSIM.
+// It seems weird to give so little weight to the full-size scale, but then again,
+// differences in more zoomed-out scales have more visual impact.
+// Anyway, these weights seem to work.
+// Added one more scale compared to IW-SSIM and Kornel's DSSIM.
+// Weights for chroma are modified to give more weight to larger scales (similar to Kornel's subsampled chroma)
+const float scale_weights[4][6] = {
+    // 1:1   1:2     1:4     1:8     1:16    1:32
+    {0.0448, 0.2856, 0.3001, 0.2363, 0.1333, 0.1  },
+    {0.015,  0.0448, 0.2856, 0.3001, 0.3363, 0.25 },
+    {0.015,  0.0448, 0.2856, 0.3001, 0.3363, 0.25 },
+    {0.0448, 0.2856, 0.3001, 0.2363, 0.1333, 0.1  },
+    };
+
+// higher value means more importance to chroma (weights above are multiplied by this factor for chroma and alpha)
+const double chroma_weight = 0.2;
+
+// Weights for the worst-case (minimum) score at each scale.
+// Higher value means more importance to worst artifacts, lower value means more importance to average artifacts.
+const float mscale_weights[4][6] = {
+    // 1:4   1:8     1:16    1:32   1:64   1:128
+    {0.2,    0.3,    0.25,   0.2,   0.12,  0.05},
+    {0.01,   0.05,   0.2,    0.3,   0.35,  0.35},
+    {0.01,   0.05,   0.2,    0.3,   0.35,  0.35},
+    {0.2,    0.3,    0.25,   0.2,   0.12,  0.05},
+    };
+
+
+// higher value means more importance to worst local artifacts
+const double min_weight[4] = {0.1,0.005,0.005,0.005};
+
+// higher value means more importance to artifact-edges (edges where original is smooth)
+const double extra_edges_weight[4] = {1.5, 0.1, 0.1, 0.5};
+
+// higher value means more importance to grid-like artifacts (blockiness)
+const double worst_grid_weight[2][4] = 
+    { {1.0, 0.1, 0.1, 0.5},             // on ssim heatmap
+      {1.0, 0.1, 0.1, 0.5} };           // on extra_edges heatmap
+
+
+// Convert linear RGB to L*a*b* (all in 0..1 range)
+inline void rgb2lab(Vec3f &p) __attribute__ ((hot));
+inline void rgb2lab(Vec3f &p) {
+    const float epsilon = 0.00885645167903563081f;
+    const float s = 0.13793103448275862068f;
+    const float k = 7.78703703703703703703f;
+
+    // D65 adjustment included
+    float fx = (p[2] * 0.43393624408206207259f + p[1] * 0.37619779063650710152f + p[0] * .18983429773803261441f) ;
+    float fy = (p[2] * 0.2126729f + p[1] * 0.7151522f + p[0] * 0.0721750f);
+    float fz = (p[2] * 0.01775381083562901744f + p[1] * 0.10945087235996326905f + p[0] * 0.87263921028466483011f) ;
+
+    float X = (fx > epsilon) ? powf(fx,1.0f/3.0f) - s : k * fx;
+    float Y = (fy > epsilon) ? powf(fy,1.0f/3.0f) - s : k * fy;
+    float Z = (fz > epsilon) ? powf(fz,1.0f/3.0f) - s : k * fz;
+
+    p[0] = Y * 1.16f;
+    p[1] = (0.39181818181818181818f + 2.27272727272727272727f * (X - Y));
+    p[2] = (0.49045454545454545454f + 0.90909090909090909090f * (Y - Z));
+}
+
+
+int main(int argc, char** argv) {
+
+    if(argc!=3) {
+        fprintf(stderr, "Usage: %s orig_image distorted_image\n", argv[0]);
+        fprintf(stderr, "Returns a value between 0 (images are identical) and 1 (images are very different)\n");
+        fprintf(stderr, "If the value is above 0.1 (or so), the distortion is likely to be perceptible / annoying.\n");
+        fprintf(stderr, "If the value is below 0.01 (or so), the distortion is likely to be imperceptible.\n");
+        return(-1);
+    }
+
+    Scalar sC1 = {C1,C1,C1,C1}, sC2 = {C2,C2,C2,C2};
+
+    Mat img1, img2, img1_img2, img1_temp, img2_temp, img1_sq, img2_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, sigma1_sq, sigma2_sq, sigma12, ssim_map;
+
+    // read and validate input images
+
+    img1_temp = imread(argv[1],-1);
+    img2_temp = imread(argv[2],-1);
+
+    int nChan = img1_temp.channels();
+    if (nChan != img2_temp.channels()) {
+        fprintf(stderr, "Image file %s has %i channels, while\n", argv[1], nChan);
+        fprintf(stderr, "image file %s has %i channels. Can't compare.\n", argv[2], img2_temp.channels());
+        return -1;
+    }
+    if (img1_temp.size() != img2_temp.size()) {
+        fprintf(stderr,  "Image dimensions have to be identical.\n");
+        return -1;
+    }
+    if (img1_temp.cols < 8 || img1_temp.rows < 8) {
+        fprintf(stderr,  "Image is too small; need at least 8 rows and columns.\n");
+        return -1;
+    }
+    int pixels = img1_temp.rows * img1_temp.cols;
+    if (nChan == 4) {
+        // blend to a gray background to have a fair comparison of semi-transparent RGB values
+        for( int i=0 ; i < pixels; i++ ) {
+            Vec4b & p = img1_temp.at<Vec4b>(i);
+            p[0] = (p[3]*p[0] + (255-p[3])*128 ) / 255;
+            p[1] = (p[3]*p[1] + (255-p[3])*128 ) / 255;
+            p[2] = (p[3]*p[2] + (255-p[3])*128 ) / 255;
+        }
+        for( int i=0 ; i < pixels; i++ ) {
+            Vec4b & p = img2_temp.at<Vec4b>(i);
+            p[0] = (p[3]*p[0] + (255-p[3])*128 ) / 255;
+            p[1] = (p[3]*p[1] + (255-p[3])*128 ) / 255;
+            p[2] = (p[3]*p[2] + (255-p[3])*128 ) / 255;
+        }
+    }
+
+
+    if (nChan > 1) {
+    // Create lookup table to convert 8-bit sRGB to linear RGB
+    Mat sRGB_gamma_LUT(1, 256, CV_32FC1);
+    for (int i = 0; i < 256; i++) {
+        float c = i / 255.0;
+        sRGB_gamma_LUT.at<float>(i) = (c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4));
+    }
+
+    // Convert from sRGB to linear RGB
+    LUT(img1_temp, sRGB_gamma_LUT, img1);
+    LUT(img2_temp, sRGB_gamma_LUT, img2);
+    } else {
+        img1 = Mat(img1_temp.rows, img1_temp.cols, CV_32FC1);
+        img2 = Mat(img1_temp.rows, img1_temp.cols, CV_32FC1);
+    }
+
+    // Convert from linear RGB to Lab in a 0..1 range
+    if (nChan == 3) {
+      for( int i=0 ; i < pixels; i++ ) rgb2lab(img1.at<Vec3f>(i));
+      for( int i=0 ; i < pixels; i++ ) rgb2lab(img2.at<Vec3f>(i));
+    } else if (nChan == 4) {
+      for( int i=0 ; i < pixels; i++ ) { Vec3f p = {img1.at<Vec4f>(i)[0],img1.at<Vec4f>(i)[1],img1.at<Vec4f>(i)[2]}; rgb2lab(p); img1.at<Vec4f>(i)[0] = p[0]; img1.at<Vec4f>(i)[1] = p[1]; img1.at<Vec4f>(i)[2] = p[2];}
+      for( int i=0 ; i < pixels; i++ ) { Vec3f p = {img2.at<Vec4f>(i)[0],img2.at<Vec4f>(i)[1],img2.at<Vec4f>(i)[2]}; rgb2lab(p); img2.at<Vec4f>(i)[0] = p[0]; img2.at<Vec4f>(i)[1] = p[1]; img2.at<Vec4f>(i)[2] = p[2];}
+    } else if (nChan == 1) {
+      for( int i=0 ; i < pixels; i++ ) { img1.at<float>(i) = img1_temp.at<uchar>(i)/255.0;}
+      for( int i=0 ; i < pixels; i++ ) { img2.at<float>(i) = img2_temp.at<uchar>(i)/255.0;}
+    } else {
+        fprintf(stderr, "Can only deal with Grayscale, RGB or RGBA input.\n");
+        return(-1);
+    }
+
+
+    double dssim=0, dssim_max=0;
+
+    for (int scale = 0; scale < 6; scale++) {
+
+      if (img1.cols < 8 || img1.rows < 8) break;
+      if (scale) {
+        // scale down 50% in each iteration.
+        resize(img1, img1, Size(), 0.5, 0.5, INTER_AREA);
+        resize(img2, img2, Size(), 0.5, 0.5, INTER_AREA);
+      }
+
+      // Standard SSIM computation
+      cv::pow( img1, 2, img1_sq );
+      cv::pow( img2, 2, img2_sq );
+
+      multiply( img1, img2, img1_img2, 1 );
+
+      GaussianBlur(img1, mu1, Size(11,11), 1.5);
+      GaussianBlur(img2, mu2, Size(11,11), 1.5);
+
+      cv::pow( mu1, 2, mu1_sq );
+      cv::pow( mu2, 2, mu2_sq );
+      multiply( mu1, mu2, mu1_mu2, 1 );
+
+      GaussianBlur(img1_sq, sigma1_sq, Size(11,11), 1.5);
+      addWeighted( sigma1_sq, 1, mu1_sq, -1, 0, sigma1_sq );
+
+      GaussianBlur(img2_sq, sigma2_sq, Size(11,11), 1.5);
+      addWeighted( sigma2_sq, 1, mu2_sq, -1, 0, sigma2_sq );
+
+      GaussianBlur(img1_img2, sigma12, Size(11,11), 1.5);
+      addWeighted( sigma12, 1, mu1_mu2, -1, 0, sigma12 );
+
+      ssim_map = ((2*mu1_mu2 + sC1).mul(2*sigma12 + sC2))/((mu1_sq + mu2_sq + sC1).mul(sigma1_sq + sigma2_sq + sC2));
+
+
+      // optional: write a nice debug image that shows the problematic areas
+#ifdef DEBUG_IMAGES
+      Mat ssim_image;
+      ssim_map.convertTo(ssim_image,CV_8UC3,255);
+        for( int i=0 ; i < ssim_image.rows * ssim_image.cols; i++ ) {
+            Vec3b &p = ssim_image.at<Vec3b>(i);
+            p = {(uchar)(255-p[2]),(uchar)(255-p[0]),(uchar)(255-p[1])};
+        }
+      imwrite("debug-scale"+to_string(scale)+".png",ssim_image);
+#endif
+
+
+      // average ssim over the entire image
+      Scalar avg = mean( ssim_map );
+      for(unsigned int i = 0; i < nChan; i++) {
+        printf("avg: %i  %f\n",i,avg[i]);
+        dssim += (i>0?chroma_weight:1.0) * avg[i] * scale_weights[i][scale];
+        dssim_max += (i>0?chroma_weight:1.0) * scale_weights[i][scale];
+      }
+
+//      resize(ssim_map, ssim_map, Size(), 0.5, 0.5, INTER_AREA);
+
+
+      // the edge/blockiness penalty is only done for the fullsize images
+      if (scale == 0) {
+
+        // asymmetric: penalty for introducing edges where there are none (e.g. blockiness), no penalty for smoothing away edges
+        Mat edgediff = max(abs(img2 - mu2) - abs(img1 - mu1), 0);   // positive if img2 has an edge where img1 is smooth
+
+        // optional: write a nice debug image that shows the artifact edges
+#ifdef DEBUG_IMAGES
+        Mat edgediff_image;
+        edgediff.convertTo(edgediff_image,CV_8UC3,5000); // multiplying by more than 255 to make things easier to see
+        for( int i=0 ; i < pixels; i++ ) {
+            Vec3b &p = edgediff_image.at<Vec3b>(i);
+            p = {(uchar)(p[1]+p[2]),p[0],p[0]};
+        }
+        imwrite("debug-edgediff.png",edgediff_image);
+#endif
+
+        edgediff = Scalar(1.0,1.0,1.0,1.0) - edgediff;
+
+        avg = mean(edgediff);
+        for(unsigned int i = 0; i < nChan; i++) {
+          printf("extra_edges: %i  %f\n",i,avg[i]);
+          dssim +=  extra_edges_weight[i] * avg[i];
+          dssim_max +=  extra_edges_weight[i];
+        }
+
+        // grid-like artifact detection
+        // do the things below twice: once for the SSIM map, once for the artifact-edge map
+        Mat errormap;
+        for(int twice=0; twice < 2; twice++) {
+          if (twice == 0) errormap = ssim_map;
+          else errormap = edgediff;
+
+          // Find the 2nd percentile worst row. If the compression uses blocks, there will be artifacts around the block edges,
+          // so even with 32x32 blocks, the 2nd percentile will likely be one of the rows with block borders
+          multiset<double> row_scores[4];
+          for (int y = 0; y < errormap.rows; y++) {
+            Mat roi = errormap(Rect(0,y,errormap.cols,1));
+            Scalar ravg = mean(roi);
+            for (unsigned int i = 0; i < nChan; i++) row_scores[i].insert(ravg[i]);
+          }
+          for(unsigned int i = 0; i < nChan; i++) {
+            int k=0; for (const double& s : row_scores[i]) { if (k++ >= errormap.rows/50) { dssim += worst_grid_weight[twice][i] * s; 
+          printf("grid row %s %i:  %f\n",(twice?"edgediff":"ssimmap"),i,s);
+
+ break; } }
+            dssim_max += worst_grid_weight[twice][i];
+          }
+          // Find the 2nd percentile worst column. Same concept as above.
+          multiset<double> col_scores[4];
+          for (int x = 0; x < errormap.cols; x++) {
+            Mat roi = errormap(Rect(x,0,1,errormap.rows));
+            Scalar cavg = mean(roi);
+            for (unsigned int i = 0; i < nChan; i++) col_scores[i].insert(cavg[i]);
+          }
+          for(unsigned int i = 0; i < nChan; i++) {
+            int k=0; for (const double& s : col_scores[i]) { if (k++ >= errormap.cols/50) { dssim += worst_grid_weight[twice][i] * s; 
+          printf("grid col %s %i:  %f\n",(twice?"edgediff":"ssimmap"),i,s);
+
+break; } }
+            dssim_max += worst_grid_weight[twice][i];
+          }
+        }
+      }
+
+      // worst ssim in a particular 4x4 block (larger blocks are considered too because of multi-scale)
+      resize(ssim_map, ssim_map, Size(), 0.25, 0.25, INTER_AREA);
+//      resize(ssim_map, ssim_map, Size(), 0.5, 0.5, INTER_AREA);
+
+      Mat ssim_map_c[4];
+      split(ssim_map, ssim_map_c);
+      for (unsigned int i=0; i < nChan; i++) {
+        double minVal;
+        minMaxLoc(ssim_map_c[i], &minVal);
+          printf("worst %i:  %f\n",i,minVal);
+        dssim += min_weight[i]  * minVal * mscale_weights[i][scale];
+        dssim_max += min_weight[i]  * mscale_weights[i][scale];
+      }
+
+    }
+
+
+    dssim = dssim_max / dssim - 1;
+    if (dssim < 0) dssim = 0; // should not happen
+    if (dssim > 1) dssim = 1; // very different images
+
+    printf("%.8f\n", dssim);
+
+    return(0);
+}
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra2.cc b/third-party/libjxl/libjxl/tools/ssimulacra2.cc
new file mode 100644
index 0000000000..d1614e55ef
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra2.cc
@@ -0,0 +1,492 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+SSIMULACRA 2
+Structural SIMilarity Unveiling Local And Compression Related Artifacts
+
+Perceptual metric developed by Jon Sneyers (Cloudinary) in July 2022,
+updated in April 2023.
+Design:
+- XYB color space (rescaled to a 0..1 range and with B-Y)
+- SSIM map (with correction: no double gamma correction)
+- 'blockiness/ringing' map (distorted has edges where original is smooth)
+- 'smoothing' map (distorted is smooth where original has edges)
+- error maps are computed at 6 scales (1:1 to 1:32) for each component (X,Y,B)
+- downscaling is done in linear RGB
+- for all 6*3*3=54 maps, two norms are computed: 1-norm (mean) and 4-norm
+- a weighted sum of these 54*2=108 norms leads to the final score
+- weights were tuned based on a large set of subjective scores
+  (CID22, TID2013, Kadid10k, KonFiG-IQA).
+*/
+
+#include "tools/ssimulacra2.h"
+
+#include <stdio.h>
+
+#include <cmath>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+namespace {
+
+using jxl::Image3F;
+using jxl::ImageF;
+
+static const float kC2 = 0.0009f;
+static const int kNumScales = 6;
+
+Image3F Downsample(const Image3F& in, size_t fx, size_t fy) {
+  const size_t out_xsize = (in.xsize() + fx - 1) / fx;
+  const size_t out_ysize = (in.ysize() + fy - 1) / fy;
+  Image3F out(out_xsize, out_ysize);
+  const float normalize = 1.0f / (fx * fy);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t oy = 0; oy < out_ysize; ++oy) {
+      float* JXL_RESTRICT row_out = out.PlaneRow(c, oy);
+      for (size_t ox = 0; ox < out_xsize; ++ox) {
+        float sum = 0.0f;
+        for (size_t iy = 0; iy < fy; ++iy) {
+          for (size_t ix = 0; ix < fx; ++ix) {
+            const size_t x = std::min(ox * fx + ix, in.xsize() - 1);
+            const size_t y = std::min(oy * fy + iy, in.ysize() - 1);
+            sum += in.PlaneRow(c, y)[x];
+          }
+        }
+        row_out[ox] = sum * normalize;
+      }
+    }
+  }
+  return out;
+}
+
+void Multiply(const Image3F& a, const Image3F& b, Image3F* mul) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < a.ysize(); ++y) {
+      const float* JXL_RESTRICT in1 = a.PlaneRow(c, y);
+      const float* JXL_RESTRICT in2 = b.PlaneRow(c, y);
+      float* JXL_RESTRICT out = mul->PlaneRow(c, y);
+      for (size_t x = 0; x < a.xsize(); ++x) {
+        out[x] = in1[x] * in2[x];
+      }
+    }
+  }
+}
+
+// Temporary storage for Gaussian blur, reused for multiple images.
+class Blur {
+ public:
+  Blur(const size_t xsize, const size_t ysize)
+      : rg_(jxl::CreateRecursiveGaussian(1.5)), temp_(xsize, ysize) {}
+
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) {
+    jxl::ThreadPool* null_pool = nullptr;
+    FastGaussian(rg_, in, null_pool, &temp_, out);
+  }
+
+  Image3F operator()(const Image3F& in) {
+    Image3F out(in.xsize(), in.ysize());
+    operator()(in.Plane(0), &out.Plane(0));
+    operator()(in.Plane(1), &out.Plane(1));
+    operator()(in.Plane(2), &out.Plane(2));
+    return out;
+  }
+
+  // Allows reusing across scales.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    temp_.ShrinkTo(xsize, ysize);
+  }
+
+ private:
+  hwy::AlignedUniquePtr<jxl::RecursiveGaussian> rg_;
+  ImageF temp_;
+};
+
+double tothe4th(double x) {
+  x *= x;
+  x *= x;
+  return x;
+}
+void SSIMMap(const Image3F& m1, const Image3F& m2, const Image3F& s11,
+             const Image3F& s22, const Image3F& s12, double* plane_averages) {
+  const double onePerPixels = 1.0 / (m1.ysize() * m1.xsize());
+  for (size_t c = 0; c < 3; ++c) {
+    double sum1[2] = {0.0};
+    for (size_t y = 0; y < m1.ysize(); ++y) {
+      const float* JXL_RESTRICT row_m1 = m1.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_m2 = m2.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s11 = s11.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s22 = s22.PlaneRow(c, y);
+      const float* JXL_RESTRICT row_s12 = s12.PlaneRow(c, y);
+      for (size_t x = 0; x < m1.xsize(); ++x) {
+        float mu1 = row_m1[x];
+        float mu2 = row_m2[x];
+        float mu11 = mu1 * mu1;
+        float mu22 = mu2 * mu2;
+        float mu12 = mu1 * mu2;
+        /* Correction applied compared to the original SSIM formula, which has:
+
+             luma_err = 2 * mu1 * mu2 / (mu1^2 + mu2^2)
+                      = 1 - (mu1 - mu2)^2 / (mu1^2 + mu2^2)
+
+           The denominator causes error in the darks (low mu1 and mu2) to weigh
+           more than error in the brights (high mu1 and mu2). This would make
+           sense if values correspond to linear luma. However, the actual values
+           are either gamma-compressed luma (which supposedly is already
+           perceptually uniform) or chroma (where weighing green more than red
+           or blue more than yellow does not make any sense at all). So it is
+           better to simply drop this denominator.
+        */
+        float num_m = 1.0 - (mu1 - mu2) * (mu1 - mu2);
+        float num_s = 2 * (row_s12[x] - mu12) + kC2;
+        float denom_s = (row_s11[x] - mu11) + (row_s22[x] - mu22) + kC2;
+
+        // Use 1 - SSIM' so it becomes an error score instead of a quality
+        // index. This makes it make sense to compute an L_4 norm.
+        double d = 1.0 - (num_m * num_s / denom_s);
+        d = std::max(d, 0.0);
+        sum1[0] += d;
+        sum1[1] += tothe4th(d);
+      }
+    }
+    plane_averages[c * 2] = onePerPixels * sum1[0];
+    plane_averages[c * 2 + 1] = sqrt(sqrt(onePerPixels * sum1[1]));
+  }
+}
+
+void EdgeDiffMap(const Image3F& img1, const Image3F& mu1, const Image3F& img2,
+                 const Image3F& mu2, double* plane_averages) {
+  const double onePerPixels = 1.0 / (img1.ysize() * img1.xsize());
+  for (size_t c = 0; c < 3; ++c) {
+    double sum1[4] = {0.0};
+    for (size_t y = 0; y < img1.ysize(); ++y) {
+      const float* JXL_RESTRICT row1 = img1.PlaneRow(c, y);
+      const float* JXL_RESTRICT row2 = img2.PlaneRow(c, y);
+      const float* JXL_RESTRICT rowm1 = mu1.PlaneRow(c, y);
+      const float* JXL_RESTRICT rowm2 = mu2.PlaneRow(c, y);
+      for (size_t x = 0; x < img1.xsize(); ++x) {
+        double d1 = (1.0 + std::abs(row2[x] - rowm2[x])) /
+                        (1.0 + std::abs(row1[x] - rowm1[x])) -
+                    1.0;
+
+        // d1 > 0: distorted has an edge where original is smooth
+        //         (indicating ringing, color banding, blockiness, etc)
+        double artifact = std::max(d1, 0.0);
+        sum1[0] += artifact;
+        sum1[1] += tothe4th(artifact);
+
+        // d1 < 0: original has an edge where distorted is smooth
+        //         (indicating smoothing, blurring, smearing, etc)
+        double detail_lost = std::max(-d1, 0.0);
+        sum1[2] += detail_lost;
+        sum1[3] += tothe4th(detail_lost);
+      }
+    }
+    plane_averages[c * 4] = onePerPixels * sum1[0];
+    plane_averages[c * 4 + 1] = sqrt(sqrt(onePerPixels * sum1[1]));
+    plane_averages[c * 4 + 2] = onePerPixels * sum1[2];
+    plane_averages[c * 4 + 3] = sqrt(sqrt(onePerPixels * sum1[3]));
+  }
+}
+
+/* Get all components in more or less 0..1 range
+   Range of Rec2020 with these adjustments:
+    X: 0.017223..0.998838
+    Y: 0.010000..0.855303
+    B: 0.048759..0.989551
+   Range of sRGB:
+    X: 0.204594..0.813402
+    Y: 0.010000..0.855308
+    B: 0.272295..0.938012
+   The maximum pixel-wise difference has to be <= 1 for the ssim formula to make
+   sense.
+*/
+void MakePositiveXYB(jxl::Image3F& img) {
+  for (size_t y = 0; y < img.ysize(); ++y) {
+    float* JXL_RESTRICT rowY = img.PlaneRow(1, y);
+    float* JXL_RESTRICT rowB = img.PlaneRow(2, y);
+    float* JXL_RESTRICT rowX = img.PlaneRow(0, y);
+    for (size_t x = 0; x < img.xsize(); ++x) {
+      rowB[x] = (rowB[x] - rowY[x]) + 0.55f;
+      rowX[x] = rowX[x] * 14.f + 0.42f;
+      rowY[x] += 0.01f;
+    }
+  }
+}
+
+void AlphaBlend(jxl::ImageBundle& img, float bg) {
+  for (size_t y = 0; y < img.ysize(); ++y) {
+    float* JXL_RESTRICT r = img.color()->PlaneRow(0, y);
+    float* JXL_RESTRICT g = img.color()->PlaneRow(1, y);
+    float* JXL_RESTRICT b = img.color()->PlaneRow(2, y);
+    const float* JXL_RESTRICT a = img.alpha()->Row(y);
+    for (size_t x = 0; x < img.xsize(); ++x) {
+      r[x] = a[x] * r[x] + (1.f - a[x]) * bg;
+      g[x] = a[x] * g[x] + (1.f - a[x]) * bg;
+      b[x] = a[x] * b[x] + (1.f - a[x]) * bg;
+    }
+  }
+}
+
+}  // namespace
+
+/*
+The final score is based on a weighted sum of 108 sub-scores:
+- for 6 scales (1:1 to 1:32, downsampled in linear RGB)
+- for 3 components (X, Y, B-Y, rescaled to 0..1 range)
+- using 2 norms (the 1-norm and the 4-norm)
+- over 3 error maps:
+    - SSIM' (SSIM without the spurious gamma correction term)
+    - "ringing" (distorted edges where there are no orig edges)
+    - "blurring" (orig edges where there are no distorted edges)
+
+The weights were obtained by running Nelder-Mead simplex search,
+optimizing to minimize MSE for the CID22 training set and to
+maximize Kendall rank correlation (and with a lower weight,
+also Pearson correlation) with the CID22 training set and the
+TID2013, Kadid10k and KonFiG-IQA datasets.
+Validation was done on the CID22 validation set.
+
+Final results after tuning (Kendall | Spearman | Pearson):
+   CID22:     0.6903 | 0.8805 | 0.8583
+   TID2013:   0.6590 | 0.8445 | 0.8471
+   KADID-10k: 0.6175 | 0.8133 | 0.8030
+   KonFiG(F): 0.7668 | 0.9194 | 0.9136
+*/
+double Msssim::Score() const {
+  double ssim = 0.0;
+  constexpr double weight[108] = {0.0,
+                                  0.0007376606707406586,
+                                  0.0,
+                                  0.0,
+                                  0.0007793481682867309,
+                                  0.0,
+                                  0.0,
+                                  0.0004371155730107379,
+                                  0.0,
+                                  1.1041726426657346,
+                                  0.00066284834129271,
+                                  0.00015231632783718752,
+                                  0.0,
+                                  0.0016406437456599754,
+                                  0.0,
+                                  1.8422455520539298,
+                                  11.441172603757666,
+                                  0.0,
+                                  0.0007989109436015163,
+                                  0.000176816438078653,
+                                  0.0,
+                                  1.8787594979546387,
+                                  10.94906990605142,
+                                  0.0,
+                                  0.0007289346991508072,
+                                  0.9677937080626833,
+                                  0.0,
+                                  0.00014003424285435884,
+                                  0.9981766977854967,
+                                  0.00031949755934435053,
+                                  0.0004550992113792063,
+                                  0.0,
+                                  0.0,
+                                  0.0013648766163243398,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  7.466890328078848,
+                                  0.0,
+                                  17.445833984131262,
+                                  0.0006235601634041466,
+                                  0.0,
+                                  0.0,
+                                  6.683678146179332,
+                                  0.00037724407979611296,
+                                  1.027889937768264,
+                                  225.20515300849274,
+                                  0.0,
+                                  0.0,
+                                  19.213238186143016,
+                                  0.0011401524586618361,
+                                  0.001237755635509985,
+                                  176.39317598450694,
+                                  0.0,
+                                  0.0,
+                                  24.43300999870476,
+                                  0.28520802612117757,
+                                  0.0004485436923833408,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  34.77906344483772,
+                                  44.835625328877896,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0008680556573291698,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0005313191874358747,
+                                  0.0,
+                                  0.00016533814161379112,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0004179171803251336,
+                                  0.0017290828234722833,
+                                  0.0,
+                                  0.0020827005846636437,
+                                  0.0,
+                                  0.0,
+                                  8.826982764996862,
+                                  23.19243343998926,
+                                  0.0,
+                                  95.1080498811086,
+                                  0.9863978034400682,
+                                  0.9834382792465353,
+                                  0.0012286405048278493,
+                                  171.2667255897307,
+                                  0.9807858872435379,
+                                  0.0,
+                                  0.0,
+                                  0.0,
+                                  0.0005130064588990679,
+                                  0.0,
+                                  0.00010854057858411537};
+
+  size_t i = 0;
+  char ch[] = "XYB";
+  const bool verbose = false;
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t scale = 0; scale < scales.size(); ++scale) {
+      for (size_t n = 0; n < 2; n++) {
+#ifdef SSIMULACRA2_OUTPUT_RAW_SCORES_FOR_WEIGHT_TUNING
+        printf("%.12f,%.12f,%.12f,", scales[scale].avg_ssim[c * 2 + n],
+               scales[scale].avg_edgediff[c * 4 + n],
+               scales[scale].avg_edgediff[c * 4 + 2 + n]);
+#endif
+        if (verbose) {
+          printf("%f from channel %c ssim, scale 1:%i, %" PRIuS
+                 "-norm (weight %f)\n",
+                 weight[i] * std::abs(scales[scale].avg_ssim[c * 2 + n]), ch[c],
+                 1 << scale, n * 3 + 1, weight[i]);
+        }
+        ssim += weight[i++] * std::abs(scales[scale].avg_ssim[c * 2 + n]);
+        if (verbose) {
+          printf("%f from channel %c ringing, scale 1:%i, %" PRIuS
+                 "-norm (weight %f)\n",
+                 weight[i] * std::abs(scales[scale].avg_edgediff[c * 4 + n]),
+                 ch[c], 1 << scale, n * 3 + 1, weight[i]);
+        }
+        ssim += weight[i++] * std::abs(scales[scale].avg_edgediff[c * 4 + n]);
+        if (verbose) {
+          printf(
+              "%f from channel %c blur, scale 1:%i, %" PRIuS
+              "-norm (weight %f)\n",
+              weight[i] * std::abs(scales[scale].avg_edgediff[c * 4 + n + 2]),
+              ch[c], 1 << scale, n * 3 + 1, weight[i]);
+        }
+        ssim +=
+            weight[i++] * std::abs(scales[scale].avg_edgediff[c * 4 + n + 2]);
+      }
+    }
+  }
+
+  ssim = ssim * 0.9562382616834844;
+  ssim = 2.326765642916932 * ssim - 0.020884521182843837 * ssim * ssim +
+         6.248496625763138e-05 * ssim * ssim * ssim;
+  if (ssim > 0) {
+    ssim = 100.0 - 10.0 * pow(ssim, 0.6276336467831387);
+  } else {
+    ssim = 100.0;
+  }
+  return ssim;
+}
+
+Msssim ComputeSSIMULACRA2(const jxl::ImageBundle& orig,
+                          const jxl::ImageBundle& dist, float bg) {
+  Msssim msssim;
+
+  jxl::Image3F img1(orig.xsize(), orig.ysize());
+  jxl::Image3F img2(img1.xsize(), img1.ysize());
+
+  jxl::ImageBundle orig2 = orig.Copy();
+  jxl::ImageBundle dist2 = dist.Copy();
+
+  if (orig.HasAlpha()) AlphaBlend(orig2, bg);
+  if (dist.HasAlpha()) AlphaBlend(dist2, bg);
+  orig2.ClearExtraChannels();
+  dist2.ClearExtraChannels();
+
+  JXL_CHECK(orig2.TransformTo(jxl::ColorEncoding::LinearSRGB(orig2.IsGray()),
+                              jxl::GetJxlCms()));
+  JXL_CHECK(dist2.TransformTo(jxl::ColorEncoding::LinearSRGB(dist2.IsGray()),
+                              jxl::GetJxlCms()));
+
+  jxl::ToXYB(orig2, nullptr, &img1, jxl::GetJxlCms(), nullptr);
+  jxl::ToXYB(dist2, nullptr, &img2, jxl::GetJxlCms(), nullptr);
+  MakePositiveXYB(img1);
+  MakePositiveXYB(img2);
+
+  Image3F mul(img1.xsize(), img1.ysize());
+  Blur blur(img1.xsize(), img1.ysize());
+
+  for (int scale = 0; scale < kNumScales; scale++) {
+    if (img1.xsize() < 8 || img1.ysize() < 8) {
+      break;
+    }
+    if (scale) {
+      orig2.SetFromImage(Downsample(*orig2.color(), 2, 2),
+                         jxl::ColorEncoding::LinearSRGB(orig2.IsGray()));
+      dist2.SetFromImage(Downsample(*dist2.color(), 2, 2),
+                         jxl::ColorEncoding::LinearSRGB(dist2.IsGray()));
+      img1.ShrinkTo(orig2.xsize(), orig2.ysize());
+      img2.ShrinkTo(orig2.xsize(), orig2.ysize());
+      jxl::ToXYB(orig2, nullptr, &img1, jxl::GetJxlCms(), nullptr);
+      jxl::ToXYB(dist2, nullptr, &img2, jxl::GetJxlCms(), nullptr);
+      MakePositiveXYB(img1);
+      MakePositiveXYB(img2);
+    }
+    mul.ShrinkTo(img1.xsize(), img1.ysize());
+    blur.ShrinkTo(img1.xsize(), img1.ysize());
+
+    Multiply(img1, img1, &mul);
+    Image3F sigma1_sq = blur(mul);
+
+    Multiply(img2, img2, &mul);
+    Image3F sigma2_sq = blur(mul);
+
+    Multiply(img1, img2, &mul);
+    Image3F sigma12 = blur(mul);
+
+    Image3F mu1 = blur(img1);
+    Image3F mu2 = blur(img2);
+
+    MsssimScale sscale;
+    SSIMMap(mu1, mu2, sigma1_sq, sigma2_sq, sigma12, sscale.avg_ssim);
+    EdgeDiffMap(img1, mu1, img2, mu2, sscale.avg_edgediff);
+    msssim.scales.push_back(sscale);
+  }
+  return msssim;
+}
+
+Msssim ComputeSSIMULACRA2(const jxl::ImageBundle& orig,
+                          const jxl::ImageBundle& distorted) {
+  return ComputeSSIMULACRA2(orig, distorted, 0.5f);
+}
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra2.h b/third-party/libjxl/libjxl/tools/ssimulacra2.h
new file mode 100644
index 0000000000..36d1193112
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra2.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_SSIMULACRA2_H_
+#define TOOLS_SSIMULACRA2_H_
+
+#include <vector>
+
+#include "lib/jxl/image_bundle.h"
+
+struct MsssimScale {
+  double avg_ssim[3 * 2];
+  double avg_edgediff[3 * 4];
+};
+
+struct Msssim {
+  std::vector<MsssimScale> scales;
+
+  double Score() const;
+};
+
+// Computes the SSIMULACRA 2 score between reference image 'orig' and
+// distorted image 'distorted'. In case of alpha transparency, assume
+// a gray background if intensity 'bg' (in range 0..1).
+Msssim ComputeSSIMULACRA2(const jxl::ImageBundle &orig,
+                          const jxl::ImageBundle &distorted, float bg);
+Msssim ComputeSSIMULACRA2(const jxl::ImageBundle &orig,
+                          const jxl::ImageBundle &distorted);
+
+#endif  // TOOLS_SSIMULACRA2_H_
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra2_main.cc b/third-party/libjxl/libjxl/tools/ssimulacra2_main.cc
new file mode 100644
index 0000000000..a18f47f786
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra2_main.cc
@@ -0,0 +1,83 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_color_management.h"
+#include "tools/file_io.h"
+#include "tools/ssimulacra2.h"
+
+int PrintUsage(char** argv) {
+  fprintf(stderr, "Usage: %s orig.png distorted.png\n", argv[0]);
+  fprintf(stderr,
+          "Returns a score in range -inf..100, which correlates to subjective "
+          "visual quality:\n");
+  fprintf(stderr,
+          "     30 = low quality (p10 worst output of mozjpeg -quality 30)\n");
+  fprintf(stderr,
+          "     50 = medium quality (average output of cjxl -q 40 or mozjpeg "
+          "-quality 40,\n");
+  fprintf(stderr,
+          "                          p10 output of cjxl -q 50 or mozjpeg "
+          "-quality 60)\n");
+  fprintf(stderr,
+          "     70 = high quality (average output of cjxl -q 70 or mozjpeg "
+          "-quality 70,\n");
+  fprintf(stderr,
+          "                        p10 output of cjxl -q 75 or mozjpeg "
+          "-quality 80)\n");
+  fprintf(stderr,
+          "     90 = very high quality (impossible to distinguish from "
+          "original at 1:1,\n");
+  fprintf(stderr,
+          "                             average output of cjxl -q 90 or "
+          "mozjpeg -quality 90)\n");
+  return 1;
+}
+
+int main(int argc, char** argv) {
+  if (argc != 3) return PrintUsage(argv);
+
+  jxl::CodecInOut io[2];
+  const char* purpose[] = {"original", "distorted"};
+  for (size_t i = 0; i < 2; ++i) {
+    std::vector<uint8_t> encoded;
+    if (!jpegxl::tools::ReadFile(argv[1 + i], &encoded)) {
+      fprintf(stderr, "Could not load %s image: %s\n", purpose[i], argv[1 + i]);
+      return 1;
+    }
+    if (!jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded),
+                           jxl::extras::ColorHints(), &io[i])) {
+      fprintf(stderr, "Could not decode %s image: %s\n", purpose[i],
+              argv[1 + i]);
+      return 1;
+    }
+    if (io[i].xsize() < 8 || io[i].ysize() < 8) {
+      fprintf(stderr, "Minimum image size is 8x8 pixels\n");
+      return 1;
+    }
+  }
+  jxl::CodecInOut& io1 = io[0];
+  jxl::CodecInOut& io2 = io[1];
+
+  if (io1.xsize() != io2.xsize() || io1.ysize() != io2.ysize()) {
+    fprintf(stderr, "Image size mismatch\n");
+    return 1;
+  }
+
+  if (!io1.Main().HasAlpha()) {
+    Msssim msssim = ComputeSSIMULACRA2(io1.Main(), io2.Main());
+    printf("%.8f\n", msssim.Score());
+  } else {
+    // in case of alpha transparency: blend against dark and bright backgrounds
+    // and return the worst of both scores
+    Msssim msssim0 = ComputeSSIMULACRA2(io1.Main(), io2.Main(), 0.1f);
+    Msssim msssim1 = ComputeSSIMULACRA2(io1.Main(), io2.Main(), 0.9f);
+    printf("%.8f\n", std::min(msssim0.Score(), msssim1.Score()));
+  }
+  return 0;
+}
diff --git a/third-party/libjxl/libjxl/tools/ssimulacra_main.cc b/third-party/libjxl/libjxl/tools/ssimulacra_main.cc
new file mode 100644
index 0000000000..55fad692bc
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/ssimulacra_main.cc
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_bundle.h"
+#include "tools/file_io.h"
+#include "tools/ssimulacra.h"
+
+namespace ssimulacra {
+namespace {
+
+int PrintUsage(char** argv) {
+  fprintf(stderr, "Usage: %s [-v] [-s] orig.png distorted.png\n", argv[0]);
+  return 1;
+}
+
+int Run(int argc, char** argv) {
+  if (argc < 2) return PrintUsage(argv);
+
+  bool verbose = false, simple = false;
+  int input_arg = 1;
+  if (!strcmp(argv[input_arg], "-v")) {
+    verbose = true;
+    input_arg++;
+  }
+  if (!strcmp(argv[input_arg], "-s")) {
+    simple = true;
+    input_arg++;
+  }
+  if (argc < input_arg + 2) return PrintUsage(argv);
+
+  jxl::CodecInOut io[2];
+  for (size_t i = 0; i < 2; ++i) {
+    std::vector<uint8_t> encoded;
+    JXL_CHECK(jpegxl::tools::ReadFile(argv[input_arg + i], &encoded));
+    JXL_CHECK(jxl::SetFromBytes(jxl::Span<const uint8_t>(encoded),
+                                jxl::extras::ColorHints(), &io[i]));
+  }
+  jxl::ImageBundle& ib1 = io[0].Main();
+  jxl::ImageBundle& ib2 = io[1].Main();
+  JXL_CHECK(ib1.TransformTo(jxl::ColorEncoding::LinearSRGB(ib1.IsGray()),
+                            jxl::GetJxlCms(), nullptr));
+  JXL_CHECK(ib2.TransformTo(jxl::ColorEncoding::LinearSRGB(ib2.IsGray()),
+                            jxl::GetJxlCms(), nullptr));
+  jxl::Image3F& img1 = *ib1.color();
+  jxl::Image3F& img2 = *ib2.color();
+  if (img1.xsize() != img2.xsize() || img1.ysize() != img2.ysize()) {
+    fprintf(stderr, "Image size mismatch\n");
+    return 1;
+  }
+  if (img1.xsize() < 8 || img1.ysize() < 8) {
+    fprintf(stderr, "Minimum image size is 8x8 pixels\n");
+    return 1;
+  }
+
+  Ssimulacra ssimulacra = ComputeDiff(img1, img2, simple);
+
+  if (verbose) {
+    ssimulacra.PrintDetails();
+  }
+  printf("%.8f\n", ssimulacra.Score());
+  return 0;
+}
+
+}  // namespace
+}  // namespace ssimulacra
+
+int main(int argc, char** argv) { return ssimulacra::Run(argc, argv); }
diff --git a/third-party/libjxl/libjxl/tools/thread_pool_internal.h b/third-party/libjxl/libjxl/tools/thread_pool_internal.h
new file mode 100644
index 0000000000..92a1176757
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/thread_pool_internal.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_THREAD_POOL_INTERNAL_H_
+#define TOOLS_THREAD_POOL_INTERNAL_H_
+
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <stddef.h>
+
+#include <cmath>
+#include <thread>  // NOLINT
+
+#include "lib/jxl/base/data_parallel.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::ThreadPool;
+
+// Helper class to pass an internal ThreadPool-like object using threads.
+class ThreadPoolInternal {
+ public:
+  // Starts the given number of worker threads and blocks until they are ready.
+  // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+  // run on the main thread.
+  explicit ThreadPoolInternal(
+      size_t num_threads = std::thread::hardware_concurrency()) {
+    runner_ =
+        JxlThreadParallelRunnerMake(/* memory_manager */ nullptr, num_threads);
+    pool_.reset(new ThreadPool(JxlThreadParallelRunner, runner_.get()));
+  }
+
+  ThreadPoolInternal(const ThreadPoolInternal&) = delete;
+  ThreadPoolInternal& operator&(const ThreadPoolInternal&) = delete;
+  ThreadPool* operator&() { return pool_.get(); }
+
+ private:
+  JxlThreadParallelRunnerPtr runner_;
+  std::unique_ptr<ThreadPool> pool_;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_THREAD_POOL_INTERNAL_H_
diff --git a/third-party/libjxl/libjxl/tools/tool_version.cc b/third-party/libjxl/libjxl/tools/tool_version.cc
new file mode 100644
index 0000000000..152689dbe5
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/tool_version.cc
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/tool_version.h"
+
+#ifdef JPEGXL_VERSION_FROM_GIT
+#include "tool_version_git.h"
+#endif
+
+namespace jpegxl {
+namespace tools {
+
+const char* kJpegxlVersion = JPEGXL_VERSION;
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/tool_version.h b/third-party/libjxl/libjxl/tools/tool_version.h
new file mode 100644
index 0000000000..c6f7c16253
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/tool_version.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_TOOL_VERSION_H_
+#define TOOLS_TOOL_VERSION_H_
+
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+// Package version as defined by the JPEGXL_VERSION macro. This is not the
+// library semantic versioning number, but instead additional information on the
+// tool version.
+extern const char* kJpegxlVersion;
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_TOOL_VERSION_H_
diff --git a/third-party/libjxl/libjxl/tools/transforms_fuzzer.cc b/third-party/libjxl/libjxl/tools/transforms_fuzzer.cc
new file mode 100644
index 0000000000..2e88829297
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/transforms_fuzzer.cc
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jpegxl {
+namespace tools {
+
+using ::jxl::BitReader;
+using ::jxl::BitReaderScopedCloser;
+using ::jxl::Channel;
+using ::jxl::GroupHeader;
+using ::jxl::Image;
+using ::jxl::ModularOptions;
+using ::jxl::pixel_type;
+using ::jxl::Rng;
+using ::jxl::Span;
+using ::jxl::Status;
+using ::jxl::Transform;
+using ::jxl::weighted::Header;
+
+namespace {
+void FillChannel(Channel& ch, Rng& rng) {
+  auto p = &ch.plane;
+  const size_t w = ch.w;
+  const size_t h = ch.h;
+  for (size_t y = 0; y < h; ++y) {
+    pixel_type* row = p->Row(y);
+    for (size_t x = 0; x < w; ++x) {
+      row[x] = rng.UniformU(0, 0x80000000);
+    }
+  }
+}
+template <typename T>
+void AssertEq(T a, T b) {
+  if (a != b) __builtin_trap();
+}
+}  // namespace
+
+int TestOneInput(const uint8_t* data, size_t size) {
+  static Status nevermind = true;
+  BitReader reader(Span<const uint8_t>(data, size));
+  BitReaderScopedCloser reader_closer(&reader, &nevermind);
+
+  Rng rng(reader.ReadFixedBits<56>());
+
+  // One of {0, 1, _2_, 3}; "2" will be filtered out soon.
+  size_t nb_chans = static_cast<size_t>(reader.ReadFixedBits<8>()) & 0x3;
+  size_t nb_extra = static_cast<size_t>(reader.ReadFixedBits<8>()) & 0x7;
+  // 1..32
+  size_t bit_depth =
+      (static_cast<size_t>(reader.ReadFixedBits<8>()) & 0x1F) + 1;
+  // {0, 1, 2, 3}
+  size_t log_upsampling =
+      (static_cast<size_t>(reader.ReadFixedBits<8>()) & 0x3);
+  size_t upsampling = 1 << log_upsampling;
+
+  size_t w_orig = static_cast<size_t>(reader.ReadFixedBits<16>());
+  size_t h_orig = static_cast<size_t>(reader.ReadFixedBits<16>());
+  size_t w = jxl::DivCeil(w_orig, upsampling);
+  size_t h = jxl::DivCeil(h_orig, upsampling);
+
+  if ((nb_chans == 2) || ((nb_chans + nb_extra) == 0) || (w * h == 0) ||
+      ((w_orig * h_orig * (nb_chans + nb_extra)) > (1 << 23))) {
+    return 0;
+  }
+
+  std::vector<int> hshift;
+  std::vector<int> vshift;
+  std::vector<size_t> ec_upsampling;
+
+  for (size_t c = 0; c < nb_chans; c++) {
+    hshift.push_back(static_cast<int>(reader.ReadFixedBits<8>()) & 1);
+    vshift.push_back(static_cast<int>(reader.ReadFixedBits<8>()) & 1);
+  }
+
+  for (size_t ec = 0; ec < nb_extra; ec++) {
+    size_t log_ec_upsampling =
+        (static_cast<size_t>(reader.ReadFixedBits<8>()) & 0x3);
+    log_ec_upsampling = std::max(log_ec_upsampling, log_upsampling);
+    ec_upsampling.push_back(1 << log_ec_upsampling);
+  }
+
+  Image image(w, h, bit_depth, nb_chans + nb_extra);
+
+  for (size_t c = 0; c < nb_chans; c++) {
+    Channel& ch = image.channel[c];
+    ch.hshift = hshift[c];
+    ch.vshift = vshift[c];
+    ch.shrink(jxl::DivCeil(w, 1 << hshift[c]), jxl::DivCeil(h, 1 << vshift[c]));
+  }
+
+  for (size_t ec = 0; ec < nb_extra; ec++) {
+    Channel& ch = image.channel[ec + nb_chans];
+    size_t ch_up = ec_upsampling[ec];
+    int up_level =
+        jxl::CeilLog2Nonzero(ch_up) - jxl::CeilLog2Nonzero(upsampling);
+    ch.shrink(jxl::DivCeil(w_orig, ch_up), jxl::DivCeil(h_orig, ch_up));
+    ch.hshift = ch.vshift = up_level;
+  }
+
+  GroupHeader header;
+  if (!jxl::Bundle::Read(&reader, &header)) return 0;
+  Header w_header;
+  if (!jxl::Bundle::Read(&reader, &w_header)) return 0;
+
+  // TODO(eustas): give it a try?
+  if (!reader.AllReadsWithinBounds()) return 0;
+
+  image.transform = header.transforms;
+  for (Transform& transform : image.transform) {
+    if (!transform.MetaApply(image)) return 0;
+  }
+  if (image.error) return 0;
+
+  ModularOptions options;
+  if (!ValidateChannelDimensions(image, options)) return 0;
+
+  for (size_t i = 0; i < image.channel.size(); ++i) {
+    FillChannel(image.channel[i], rng);
+  }
+
+  image.undo_transforms(w_header);
+
+  AssertEq(image.error, false);
+  AssertEq<size_t>(image.nb_meta_channels, 0);
+  AssertEq(image.channel.size(), nb_chans + nb_extra);
+
+  for (size_t c = 0; c < nb_chans; c++) {
+    const Channel& ch = image.channel[c];
+    AssertEq(ch.hshift, hshift[c]);
+    AssertEq(ch.vshift, vshift[c]);
+    AssertEq(ch.w, jxl::DivCeil(w, 1 << hshift[c]));
+    AssertEq(ch.h, jxl::DivCeil(h, 1 << vshift[c]));
+  }
+
+  for (size_t ec = 0; ec < nb_extra; ec++) {
+    const Channel& ch = image.channel[ec + nb_chans];
+    size_t ch_up = ec_upsampling[ec];
+    int up_level =
+        jxl::CeilLog2Nonzero(ch_up) - jxl::CeilLog2Nonzero(upsampling);
+    AssertEq(ch.w, jxl::DivCeil(w_orig, ch_up));
+    AssertEq(ch.h, jxl::DivCeil(h_orig, ch_up));
+    AssertEq(ch.hshift, up_level);
+    AssertEq(ch.vshift, up_level);
+  }
+
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  return jpegxl::tools::TestOneInput(data, size);
+}
diff --git a/third-party/libjxl/libjxl/tools/upscaling_coefficients/generate_upscaling_coefficients.py b/third-party/libjxl/libjxl/tools/upscaling_coefficients/generate_upscaling_coefficients.py
new file mode 100755
index 0000000000..17c404d1cd
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/upscaling_coefficients/generate_upscaling_coefficients.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Generates coefficients used in upscaling.
+
+Given an upscaling factor which can be 2, 4 or 8, we generate coefficients and
+indices for lib/jxl/image_metadata.cc in the format needed there.
+"""
+
+import argparse
+import itertools
+import numpy as np
+
+
+def compute_kernel(sigma):
+  """Gaussian-like kernel with standard deviation sigma."""
+  # This controls the length of the kernel.
+  m = 2.5
+  diff = int(max(1, m * abs(sigma)))
+  kernel = np.exp(-np.arange(-diff, diff + 1)**2 /(2 * sigma * sigma))
+  return kernel
+
+
+def convolution(pixels, kernel):
+  """Computes a horizontal convolution and transposes the result."""
+  y, x = pixels.shape
+  kernel_len = len(kernel)
+  offset = kernel_len // 2
+  scale = 1 / sum(kernel)
+  out_pixels = np.zeros(shape=(x, y), dtype=pixels.dtype)
+  for i, j in itertools.product(range(x), range(y)):
+    if kernel_len < i < x - kernel_len:
+      out_pixels[i, j] = scale * sum(
+          pixels[j, i - offset + k] * kernel[k] for k in range(kernel_len))
+    else:
+      out_pixels[i, j] = pixels[j, i]
+  return out_pixels
+
+
+def _super_sample(pixels, n):
+  return np.repeat(np.repeat(pixels, n, axis=0), n, axis=1)
+
+
+def _sub_sample(pixels, n):
+  x, y = pixels.shape
+  assert x%n == 0 and y%n == 0
+  return 1 / (n * n) * pixels.reshape(x // n, n, y // n, n).transpose(
+      [0, 2, 1, 3]).sum(axis=(2, 3))
+
+
+def smooth_4x4_corners(pixels):
+  """Generates a 4x4 upscaled image, to be smoothed afterwards."""
+  overshoot = 3.5
+  m = 1.0 / (4.0 - overshoot)
+  y_size, x_size = pixels.shape
+  for y, x in itertools.product(range(3, y_size - 3, 4),
+                                range(3, x_size - 3, 4)):
+    ave = (
+        pixels[y, x] + pixels[y, x + 1] + pixels[y + 1, x] +
+        pixels[y + 1, x + 1])
+    off = 2
+    other = (ave - overshoot * pixels[y, x]) * m
+    pixels[y - off, x - off] -= (other - pixels[y, x])
+    pixels[y, x] = other
+
+    other = (ave - overshoot * pixels[y, x + 1]) * m
+    pixels[y - off, x + off + 1] -= (other - pixels[y, x + 1])
+    pixels[y, x + 1] = other
+
+    other = (ave - overshoot * pixels[y + 1, x]) * m
+    pixels[y + off + 1, x - off] -= (other - pixels[y + 1, x])
+    pixels[y + 1, x] = other
+
+    other = (ave - overshoot * pixels[y + 1, x + 1]) * m
+    pixels[y + off + 1][x + off + 1] -= (other - pixels[y + 1, x + 1])
+    pixels[y + 1, x + 1] = other
+
+  return pixels
+
+
+def smoothing(pixels):
+  new_pixels = smooth_4x4_corners(_super_sample(pixels, 4))
+  my_kernel = compute_kernel(2.5)
+  smooth_image = convolution(convolution(new_pixels, my_kernel), my_kernel)
+  return smooth_image
+
+
+upscaling = {
+    2: lambda pixels: _sub_sample(smoothing(pixels), 2),
+    4: smoothing,
+    8: lambda pixels: _sub_sample(smoothing(smoothing(pixels)), 2)
+}
+
+
+def get_coeffs(upscaling_factor, kernel_size=5, normalized=True, dtype="float"):
+  """Returns 4-tensor of coefficients.
+
+  Args:
+    upscaling_factor: 2, 4, or 8
+    kernel_size: must be odd
+    normalized: if True, the kernel matrix adds to 1
+    dtype: type of numpy array to return
+
+  Returns:
+    A (upscaling_factor x upscaling_factor) matrix of
+    (kernel_size x kernel_size) matrices, describing the kernel for all pixels.
+  """
+
+  upscaling_method = upscaling[upscaling_factor]
+  patch_size = 2 * kernel_size + 1
+  matrix_bases = np.eye(
+      patch_size * patch_size, dtype=dtype).reshape(patch_size, patch_size,
+                                                    patch_size, patch_size)
+
+  # takes some time...
+  smoothed_bases = np.array(
+      [[upscaling_method(matrix_bases[a, b])
+        for a in range(patch_size)]
+       for b in range(patch_size)])
+
+  middle = patch_size // 2
+  lower = middle - kernel_size // 2
+  upper = middle + kernel_size // 2 + 1
+  assert len(range(lower, upper)) == kernel_size
+  assert sum(range(lower, upper)) == kernel_size * middle
+
+  coefficients = np.array([[[[
+      smoothed_bases[i, j, upscaling_factor * middle + b,
+                     upscaling_factor * middle + a]
+      for i in range(lower, upper)
+  ]
+                             for j in range(lower, upper)]
+                            for a in range(upscaling_factor)]
+                           for b in range(upscaling_factor)])
+
+  if normalized:
+    return coefficients / coefficients.sum(axis=(2, 3))[..., np.newaxis,
+                                                        np.newaxis]
+  else:
+    return coefficients
+
+
+def indices_matrix(upscaling_factor, kernel_size=5):
+  """Matrix containing indices with all symmetries."""
+  matrix = np.zeros(
+      shape=[upscaling_factor * kernel_size] * 2, dtype="int16")
+  # define a fundamental domain
+  counter = 1
+  for i in range((kernel_size * upscaling_factor) // 2):
+    for j in range(i, (kernel_size * upscaling_factor) // 2):
+      matrix[i, j] = counter
+      counter += 1
+
+  matrix_with_transpose = matrix + (matrix.transpose()) * (
+      matrix != matrix.transpose())
+  matrix_vertical = matrix_with_transpose + (
+      np.flip(matrix_with_transpose, axis=0) *
+      (matrix_with_transpose != np.flip(matrix_with_transpose, axis=0)))
+  matrix_horizontal = matrix_vertical + (
+      np.flip(matrix_vertical, axis=1) *
+      (matrix_vertical != np.flip(matrix_vertical, axis=1))) - 1
+  return matrix_horizontal
+
+
+def format_indices_matrix(upscaling_factor, kernel_size=5):
+  """Returns string of commented out numbers-only matrices."""
+  indices = indices_matrix(upscaling_factor)
+  output_str = []
+  for i in range(upscaling_factor // 2):
+    for j in range(kernel_size):
+      output_str.append("//")
+      for a in range(upscaling_factor // 2):
+        for b in range(kernel_size):
+          output_str.append(
+              f"{'{:x}'.format(int(indices[kernel_size*i + j][kernel_size*a + b])).rjust(2)} "
+          )
+        output_str.append(" ")
+      output_str.append("\n")
+    output_str.append("\n")
+  return "".join(output_str)
+
+
+def weights_arrays(upscaling_factor, kernel_size=5):
+  """Returns string describing array of depth 4."""
+  indices = indices_matrix(upscaling_factor)
+  return (
+      f"kernel[{upscaling_factor}][{upscaling_factor}][{kernel_size}][{kernel_size}]"
+      f" = {{" + ", \n".join("{\n" + ", \n\n".join(
+          ("{" + ", \n".join("{" + ", ".join(
+              f"weights[{str(indices[kernel_size*i + j][kernel_size*a + b])}]"
+              for b in range(kernel_size)) + "}"
+                             for j in range(kernel_size)) + "}"
+           for a in range(upscaling_factor // 2))) + "\n}"
+                             for i in range(upscaling_factor // 2)) + "}\n")
+
+
+def coefficients_list(upscaling_factor, kernel_size=5):
+  """Returns string describing coefficients."""
+  coeff_tensor = get_coeffs(upscaling_factor,
+                            kernel_size).transpose([0, 2, 1, 3]).reshape(
+                                kernel_size * upscaling_factor,
+                                kernel_size * upscaling_factor)
+  my_weights = [
+      f'{"{:.8f}".format(coeff_tensor[i][j])}f'
+      for i in range((kernel_size * upscaling_factor) // 2)
+      for j in range(i, (kernel_size * upscaling_factor) // 2)
+  ]
+  return f"kWeights{upscaling_factor} = {{" + ", ".join(my_weights) + "};"
+
+
+def print_all_output(upscaling_factor):
+  print(format_indices_matrix(upscaling_factor))
+  print(coefficients_list(upscaling_factor), end="\n\n")
+  print(weights_arrays(upscaling_factor))
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      description="Generates coefficients used in upscaling.")
+  parser.add_argument(
+      "upscaling_factor",
+      type=int,
+      help="upscaling factor, must be  2, 4 or 8.",
+      nargs="?",
+      default=None)
+
+  args = parser.parse_args()
+  upscaling_factor = args.upscaling_factor
+  if upscaling_factor:
+    print_all_output(upscaling_factor)
+  else:
+    for factor in [2, 4, 8]:
+      print(f"upscaling factor = {factor}")
+      print_all_output(factor)
+
+
+if __name__ == "__main__":
+  main()
diff --git a/third-party/libjxl/libjxl/tools/upscaling_coefficients/upscaler_demo.py b/third-party/libjxl/libjxl/tools/upscaling_coefficients/upscaler_demo.py
new file mode 100644
index 0000000000..e873bd10dc
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/upscaling_coefficients/upscaler_demo.py
@@ -0,0 +1,814 @@
+#!/usr/bin/env python3
+
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""Demo for upscaling.
+
+Given an upscaling factor which can be 2, 4 or 8 we demo upscaling an image by
+that factor.
+
+usage: upscaler_demo.py [-h] [--upscaling_factor N] input_filename output_filename
+
+Upscaling of an image by a factor of 2, 4 or 8.
+
+positional arguments:
+  input_filename        of the PNG image to be upscaled.
+  output_filename       where the upscaled image is written as PNG.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --upscaling_factor N  where N must be 2, 4 (default) or 8.
+"""
+from PIL import Image
+
+import argparse
+import numpy as np
+
+
+def convolution(pixels, kernel):
+  """
+  Returns the convolution of `pixels` with `kernel`.
+
+  Uses padding such that the shape of the returned convoluted array is the
+  same as the shape of `pixels`, scaled by the upscaling_factor implied by the
+  `kernel`.
+
+  Args:
+    pixels: A [height, width]- or [height, width, num_channels]-array
+    representing an image.
+
+    kernel: A [upscaling_factor, upscaling_factor, kernel_size,
+     kernel_size]-array used for the convolution.
+
+  Returns:
+    A [upscaling_factor*height, upscaling_factor*width]- or
+    [upscaling_factor*height, upscaling_factor*width, num_channels]-array representing the
+    convoluted upscaled image.
+  """
+  upscaling_factor, _, kernel_size, _ = kernel.shape
+  output_shape = list(pixels.shape)
+  output_shape[0] *= upscaling_factor
+  output_shape[1] *= upscaling_factor
+  shaped_pixels = pixels.reshape(pixels.shape[:2] + (-1,))
+  pad_width = kernel_size//2
+  padded_pixels = np.pad(
+      shaped_pixels, 2*[2*[pad_width]] + [[0, 0]], mode='edge')
+  x, y, _ = shaped_pixels.shape
+  convoluted = np.block([[np.einsum('rc...,RCrc->...RC',
+                                    padded_pixels[i - pad_width: i + pad_width + 1,
+                                                  j - pad_width: j + pad_width + 1],
+                                    kernel)
+                          for j in range(pad_width, pad_width + y)]
+                         for i in range(pad_width, pad_width + x)])
+  return np.moveaxis(convoluted, 0, -1).reshape(output_shape)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Upscaling of an image by a factor of 2, 4 or 8.")
+    parser.add_argument(
+        "--upscaling_factor",
+        type=int,
+        help="where N must be  2, 4 (default) or 8.",
+        nargs=1,
+        default=[4],
+        metavar='N')
+
+    parser.add_argument(
+        "input_filename",
+        type=str,
+        help="of the PNG image to be upscaled."
+    )
+    parser.add_argument(
+        "output_filename",
+        type=str,
+        help="where the upscaled image is written as PNG."
+    )
+
+    args = parser.parse_args()
+    upscaling_factor = args.upscaling_factor[0]
+    kernel_size = 5
+    if upscaling_factor not in (2, 4, 8):
+        raise ValueError("upscaling_factor must be 2, 4 or 8.")
+    kernel = np.array(_get_scaling_kernels()[upscaling_factor])
+    assert kernel.shape == (
+        upscaling_factor, upscaling_factor, kernel_size, kernel_size)
+    orig_raw = Image.open(args.input_filename)
+    orig = orig_raw.convert('RGB') if orig_raw.mode == 'P' else orig_raw
+    upscaled_float = convolution(np.array(orig), kernel)
+
+    upscaled = Image.fromarray(
+        np.rint(np.clip(upscaled_float, 0, 255)).astype(np.uint8), orig.mode)
+    upscaled.save(args.output_filename)
+
+
+def _get_scaling_kernels():
+    return {2: [[[[-0.017162003089909145, -0.0345230259724203, -0.04022174342753632,
+    -0.029210135410064335, -0.006246448474415789], [-0.0345230259724203,
+    0.14111091126932612, 0.28896754962953114, 0.0027871809188615613,
+    -0.016102674925096382], [-0.04022174342753632, 0.28896754962953114,
+    0.5666155013385713, 0.037776067445408776, -0.01986694439461126],
+    [-0.029210135410064335, 0.0027871809188615535, 0.03777606744540877,
+    -0.031447310821961526, -0.011850679991269755], [-0.006246448474415788,
+    -0.01610267492509638, -0.019866944394611258, -0.011850679991269755,
+    -0.0021353894928012747]], [[-0.006246448474415787, -0.029210135410064328,
+    -0.040221743427536316, -0.034523025972420296, -0.01716200308990914],
+    [-0.01610267492509638, 0.0027871809188615582, 0.2889675496295311,
+    0.1411109112693261, -0.034523025972420296], [-0.019866944394611254,
+    0.037776067445408755, 0.5666155013385712, 0.2889675496295311,
+    -0.04022174342753631], [-0.011850679991269751, -0.03144731082196152,
+    0.037776067445408755, 0.00278718091886156, -0.029210135410064324],
+    [-0.0021353894928012743, -0.011850679991269751, -0.01986694439461125,
+    -0.016102674925096375, -0.006246448474415786]]], [[[-0.006246448474415788,
+    -0.01610267492509638, -0.019866944394611258, -0.011850679991269755,
+    -0.0021353894928012747], [-0.02921013541006433, 0.002787180918861557,
+    0.03777606744540876, -0.031447310821961526, -0.011850679991269755],
+    [-0.04022174342753632, 0.28896754962953114, 0.5666155013385712,
+    0.037776067445408776, -0.019866944394611258], [-0.0345230259724203,
+    0.14111091126932612, 0.28896754962953114, 0.0027871809188615595,
+    -0.016102674925096382], [-0.017162003089909145, -0.03452302597242031,
+    -0.04022174342753633, -0.029210135410064335, -0.006246448474415789]],
+    [[-0.0021353894928012747, -0.011850679991269755, -0.019866944394611258,
+    -0.01610267492509638, -0.006246448474415788], [-0.011850679991269755,
+    -0.031447310821961526, 0.03777606744540876, 0.002787180918861564,
+    -0.02921013541006433], [-0.019866944394611258, 0.037776067445408776,
+    0.5666155013385712, 0.28896754962953114, -0.040221743427536316],
+    [-0.016102674925096382, 0.002787180918861556, 0.28896754962953114,
+    0.14111091126932615, -0.0345230259724203], [-0.006246448474415789,
+    -0.029210135410064335, -0.04022174342753633, -0.0345230259724203,
+    -0.017162003089909145]]]],
+    4: [[[[-0.024190672183733018, -0.03491987403959535, -0.036933511116288356,
+    -0.03094284535390427, -0.005297851729507614], [-0.03491987403959535,
+    0.23651958284942343, 0.3339294481745815, -0.010735433431237009,
+    -0.013131808617501706], [-0.036933511116288356, 0.3339294481745815,
+    0.4691419769580017, -0.0020927007975838127, -0.014845888917802386],
+    [-0.030942845353904277, -0.010735433431237024, -0.0020927007975838035,
+    -0.035516824721615874, -0.007548300818273063], [-0.005297851729507614,
+    -0.013131808617501708, -0.014845888917802386, -0.007548300818273063,
+    -0.0009165296078520004]], [[-0.01663431734052121, -0.03556862997573282,
+    -0.0388890539890255, -0.035168498619353575, -0.009894687488916538],
+    [-0.03556694367519552, 0.13048175192612746, 0.40103024797994685,
+    0.03951149796198834, -0.02077584470399766], [-0.04064806042030105,
+    0.18942529580147974, 0.5627989220290085, 0.06674400125646836,
+    -0.023354943007463536], [-0.0226791877794674, -0.023635779153108244,
+    0.0031580414703133823, -0.03399097960642573, -0.013595188211470589],
+    [-0.003354666868160516, -0.011632944561351362, -0.016102939237729652,
+    -0.00974087766582541, -0.0019162161212866041]], [[-0.009894687488916542,
+    -0.03516849861935358, -0.03888905398902551, -0.035568629975732825,
+    -0.016634317340521215], [-0.020775844703997664, 0.03951149796198835,
+    0.4010302479799469, 0.13048175192612743, -0.03556694367519553],
+    [-0.02335494300746354, 0.06674400125646836, 0.5627989220290086,
+    0.18942529580147976, -0.04064806042030106], [-0.01359518821147059,
+    -0.033990979606425734, 0.003158041470313383, -0.02363577915310824,
+    -0.022679187779467407], [-0.0019162161212866043, -0.00974087766582541,
+    -0.016102939237729656, -0.011632944561351364, -0.0033546668681605166]],
+    [[-0.005297851729507613, -0.030942845353904264, -0.036933511116288356,
+    -0.034919874039595344, -0.024190672183733015], [-0.013131808617501703,
+    -0.010735433431237012, 0.33392944817458137, 0.23651958284942343,
+    -0.03491987403959533], [-0.014845888917802382, -0.0020927007975838153,
+    0.4691419769580016, 0.33392944817458153, -0.03693351111628834],
+    [-0.007548300818273061, -0.03551682472161587, -0.0020927007975838053,
+    -0.010735433431237016, -0.030942845353904264], [-0.0009165296078520002,
+    -0.007548300818273061, -0.014845888917802382, -0.013131808617501704,
+    -0.005297851729507613]]], [[[-0.01663431734052122, -0.03556694367519555,
+    -0.040648060420301065, -0.02267918777946741, -0.0033546668681605175],
+    [-0.03556862997573284, 0.13048175192612746, 0.18942529580147982,
+    -0.023635779153108258, -0.011632944561351367], [-0.038889053989025514,
+    0.401030247979947, 0.5627989220290087, 0.0031580414703133814,
+    -0.01610293923772966], [-0.03516849861935359, 0.03951149796198835,
+    0.06674400125646837, -0.03399097960642574, -0.009740877665825412],
+    [-0.009894687488916542, -0.020775844703997664, -0.023354943007463547,
+    -0.01359518821147059, -0.0019162161212866046]], [[-0.01095445961681655,
+    -0.0319846366701879, -0.04455120920314033, -0.027997902912581793,
+    -0.006459118117528576], [-0.0319846366701879, 0.06390599280769027,
+    0.22963887988104975, 0.006309810655924714, -0.018973492447769916],
+    [-0.04455120920314033, 0.2296388798810498, 0.67537268393182,
+    0.08483369316914859, -0.025349935472536677], [-0.027997902912581786,
+    0.006309810655924713, 0.08483369316914857, -0.02205197197850368,
+    -0.016679994683747115], [-0.006459118117528575, -0.018973492447769913,
+    -0.02534993547253667, -0.016679994683747115, -0.0038444335414517822]],
+    [[-0.006459118117528576, -0.02799790291258179, -0.04455120920314034,
+    -0.0319846366701879, -0.010954459616816552], [-0.01897349244776992,
+    0.006309810655924714, 0.22963887988104978, 0.06390599280769028,
+    -0.03198463667018791], [-0.025349935472536677, 0.08483369316914859,
+    0.6753726839318202, 0.22963887988104975, -0.04455120920314034],
+    [-0.016679994683747118, -0.022051971978503677, 0.08483369316914859,
+    0.0063098106559247085, -0.02799790291258179], [-0.0038444335414517827,
+    -0.016679994683747115, -0.02534993547253667, -0.018973492447769916,
+    -0.006459118117528575]], [[-0.0033546668681605166, -0.022679187779467407,
+    -0.04064806042030106, -0.03556694367519554, -0.016634317340521218],
+    [-0.011632944561351364, -0.023635779153108254, 0.18942529580147976,
+    0.13048175192612743, -0.035568629975732825], [-0.016102939237729656,
+    0.0031580414703133762, 0.5627989220290086, 0.40103024797994696,
+    -0.038889053989025486], [-0.009740877665825409, -0.033990979606425734,
+    0.06674400125646834, 0.03951149796198835, -0.035168498619353575],
+    [-0.0019162161212866041, -0.013595188211470589, -0.02335494300746354,
+    -0.02077584470399766, -0.00989468748891654]]], [[[-0.009894687488916542,
+    -0.020775844703997664, -0.023354943007463547, -0.01359518821147059,
+    -0.0019162161212866046], [-0.03516849861935359, 0.03951149796198835,
+    0.06674400125646836, -0.03399097960642574, -0.009740877665825412],
+    [-0.03888905398902551, 0.401030247979947, 0.5627989220290087,
+    0.0031580414703133814, -0.01610293923772966], [-0.03556862997573284,
+    0.13048175192612746, 0.18942529580147982, -0.023635779153108258,
+    -0.011632944561351367], [-0.016634317340521218, -0.03556694367519555,
+    -0.040648060420301065, -0.022679187779467418, -0.0033546668681605175]],
+    [[-0.006459118117528575, -0.018973492447769916, -0.02534993547253667,
+    -0.016679994683747118, -0.0038444335414517827], [-0.02799790291258179,
+    0.006309810655924723, 0.08483369316914856, -0.022051971978503684,
+    -0.016679994683747118], [-0.04455120920314034, 0.22963887988104978,
+    0.6753726839318203, 0.0848336931691486, -0.02534993547253667],
+    [-0.03198463667018791, 0.06390599280769028, 0.22963887988104978,
+    0.006309810655924709, -0.018973492447769923], [-0.010954459616816552,
+    -0.03198463667018791, -0.04455120920314034, -0.027997902912581796,
+    -0.006459118117528576]], [[-0.0038444335414517822, -0.01667999468374711,
+    -0.02534993547253667, -0.018973492447769913, -0.006459118117528575],
+    [-0.016679994683747115, -0.02205197197850368, 0.08483369316914854,
+    0.006309810655924723, -0.027997902912581786], [-0.02534993547253667,
+    0.08483369316914859, 0.6753726839318202, 0.22963887988104975,
+    -0.04455120920314033], [-0.01897349244776992, 0.006309810655924712,
+    0.22963887988104975, 0.06390599280769027, -0.0319846366701879],
+    [-0.006459118117528576, -0.027997902912581786, -0.04455120920314033,
+    -0.0319846366701879, -0.01095445961681655]], [[-0.0019162161212866041,
+    -0.013595188211470589, -0.02335494300746354, -0.02077584470399766,
+    -0.00989468748891654], [-0.009740877665825409, -0.033990979606425734,
+    0.06674400125646834, 0.03951149796198835, -0.03516849861935358],
+    [-0.016102939237729656, 0.0031580414703133762, 0.5627989220290086,
+    0.40103024797994696, -0.03888905398902548], [-0.011632944561351364,
+    -0.023635779153108254, 0.18942529580147976, 0.1304817519261275,
+    -0.035568629975732825], [-0.0033546668681605166, -0.022679187779467414,
+    -0.04064806042030106, -0.03556694367519554, -0.016634317340521215]]],
+    [[[-0.005297851729507615, -0.013131808617501711, -0.01484588891780239,
+    -0.007548300818273065, -0.0009165296078520006], [-0.030942845353904277,
+    -0.010735433431237028, -0.0020927007975838087, -0.03551682472161588,
+    -0.007548300818273065], [-0.03693351111628837, 0.3339294481745815,
+    0.4691419769580017, -0.002092700797583813, -0.01484588891780239],
+    [-0.03491987403959536, 0.23651958284942348, 0.33392944817458153,
+    -0.010735433431237012, -0.01313180861750171], [-0.024190672183733025,
+    -0.034919874039595365, -0.03693351111628837, -0.030942845353904277,
+    -0.005297851729507615]], [[-0.0033546668681605166, -0.011632944561351364,
+    -0.016102939237729656, -0.009740877665825412, -0.0019162161212866043],
+    [-0.022679187779467404, -0.023635779153108247, 0.003158041470313383,
+    -0.033990979606425734, -0.013595188211470589], [-0.04064806042030106,
+    0.18942529580147982, 0.5627989220290085, 0.06674400125646837,
+    -0.023354943007463547], [-0.03556694367519553, 0.1304817519261275,
+    0.4010302479799469, 0.03951149796198835, -0.020775844703997653],
+    [-0.016634317340521215, -0.035568629975732825, -0.038889053989025514,
+    -0.035168498619353575, -0.009894687488916542]], [[-0.0019162161212866048,
+    -0.009740877665825414, -0.01610293923772966, -0.011632944561351367,
+    -0.0033546668681605175], [-0.01359518821147059, -0.03399097960642574,
+    0.0031580414703133836, -0.023635779153108254, -0.022679187779467407],
+    [-0.023354943007463554, 0.06674400125646839, 0.5627989220290086,
+    0.18942529580147982, -0.040648060420301065], [-0.020775844703997657,
+    0.03951149796198836, 0.401030247979947, 0.13048175192612746,
+    -0.035566943675195535], [-0.009894687488916544, -0.03516849861935359,
+    -0.03888905398902552, -0.03556862997573283, -0.016634317340521218]],
+    [[-0.0009165296078520004, -0.007548300818273063, -0.014845888917802386,
+    -0.013131808617501708, -0.005297851729507614], [-0.007548300818273063,
+    -0.035516824721615874, -0.0020927007975838083, -0.010735433431237009,
+    -0.03094284535390427], [-0.014845888917802386, -0.0020927007975838166,
+    0.4691419769580016, 0.3339294481745815, -0.036933511116288356],
+    [-0.013131808617501706, -0.010735433431237014, 0.3339294481745815,
+    0.23651958284942348, -0.03491987403959534], [-0.005297851729507614,
+    -0.03094284535390427, -0.03693351111628836, -0.03491987403959535,
+    -0.024190672183733018]]]],
+    8: [[[[-0.029286133281073247, -0.03706352644207269, -0.0378381168526885,
+    -0.03324558280295302, -0.004476318148146651], [-0.0370635264420727,
+    0.29895328454745274, 0.3575770812164143, -0.024475522375569658,
+    -0.010817484288013228], [-0.0378381168526885, 0.35757708121641435,
+    0.42720050241527285, -0.0224893852885426, -0.01155272937910007],
+    [-0.03324558280295302, -0.024475522375569672, -0.022489385288542597,
+    -0.03680917952171095, -0.005422291349995999], [-0.00447631814814665,
+    -0.01081748428801323, -0.011552729379100074, -0.005422291349995998,
+    -0.00045072273860512197]], [[-0.02519406150475052, -0.037526010691823306,
+    -0.03901507994141054, -0.03663285147762567, -0.006466489422914399],
+    [-0.043145939817870266, 0.23903219477825294, 0.41119300519363017,
+    -0.005730455022054139, -0.014502394951723473], [-0.04562755195174026,
+    0.28689495518965613, 0.4909386897413151, -7.890574314417001e-05,
+    -0.015459264122748742], [-0.029204772772557758, -0.02788574061911041,
+    -0.021181804710686657, -0.039424021044039116, -0.007755474877032563],
+    [-0.003601096394526256, -0.010202069931803576, -0.012319067611648214,
+    -0.006389875713059274, -0.0007159165805851706]], [[-0.020664074967504838,
+    -0.03838632575427139, -0.04002101086742024, -0.03900035414027985,
+    -0.009019734953997754], [-0.042468451339058966, 0.1756761813778118,
+    0.45220642702382896, 0.02287757117854141, -0.019367833372750356],
+    [-0.045626588213857136, 0.21238920010551757, 0.5398093391410694,
+    0.033694739393926816, -0.020702111700092594], [-0.024336140047717,
+    -0.03193943219458267, -0.020308275361446707, -0.04044013741654317,
+    -0.010740155274818487], [-0.002791220988040244, -0.009571146384946013,
+    -0.012883266171804216, -0.007309372111524051, -0.0010778269600400276]],
+    [[-0.016263925397518374, -0.039541478550530786, -0.04046620032608076,
+    -0.03979621423581153, -0.012244853215160445], [-0.03583254566206615,
+    0.11572472115297627, 0.47416733354946305, 0.06284440084948137,
+    -0.026850659249274114], [-0.038669884759381434, 0.1422954970729258,
+    0.5659339775075575, 0.08045180751196822, -0.028882977402423956],
+    [-0.01930821727497102, -0.03620398561701563, -0.019741250657301437,
+    -0.03919545281633189, -0.014560933634183603], [-0.0021015621671157305,
+    -0.008907053401106528, -0.013176682690936201, -0.008138951872408835,
+    -0.0015349087147535298]], [[-0.01224485321516044, -0.03979621423581152,
+    -0.04046620032608074, -0.03954147855053078, -0.016263925397518367],
+    [-0.0268506592492741, 0.06284440084948137, 0.47416733354946283,
+    0.11572472115297619, -0.03583254566206614], [-0.028882977402423942,
+    0.0804518075119682, 0.5659339775075574, 0.14229549707292571,
+    -0.03866988475938142], [-0.014560933634183596, -0.03919545281633188,
+    -0.01974125065730143, -0.03620398561701561, -0.019308217274971014],
+    [-0.0015349087147535291, -0.008138951872408828, -0.013176682690936196,
+    -0.008907053401106523, -0.002101562167115729]], [[-0.00901973495399775,
+    -0.039000354140279844, -0.040021010867420236, -0.03838632575427138,
+    -0.020664074967504838], [-0.019367833372750352, 0.02287757117854141,
+    0.4522064270238289, 0.17567618137781174, -0.04246845133905896],
+    [-0.020702111700092587, 0.03369473939392681, 0.5398093391410693,
+    0.21238920010551757, -0.04562658821385712], [-0.010740155274818485,
+    -0.04044013741654316, -0.020308275361446707, -0.031939432194582666,
+    -0.024336140047717], [-0.0010778269600400273, -0.007309372111524049,
+    -0.012883266171804212, -0.00957114638494601, -0.0027912209880402426]],
+    [[-0.006466489422914402, -0.03663285147762569, -0.03901507994141056,
+    -0.03752601069182331, -0.02519406150475053], [-0.014502394951723478,
+    -0.005730455022054147, 0.4111930051936302, 0.23903219477825297,
+    -0.04314593981787026], [-0.015459264122748746, -7.890574314417718e-05,
+    0.4909386897413152, 0.2868949551896563, -0.045627551951740265],
+    [-0.007755474877032565, -0.03942402104403913, -0.021181804710686664,
+    -0.027885740619110408, -0.029204772772557765], [-0.0007159165805851706,
+    -0.006389875713059275, -0.012319067611648218, -0.01020206993180358,
+    -0.003601096394526257]], [[-0.00447631814814665, -0.03324558280295302,
+    -0.0378381168526885, -0.03706352644207268, -0.02928613328107324],
+    [-0.01081748428801323, -0.024475522375569672, 0.3575770812164142,
+    0.2989532845474528, -0.03706352644207268], [-0.01155272937910007,
+    -0.02248938528854261, 0.42720050241527285, 0.35757708121641446,
+    -0.037838116852688494], [-0.005422291349995998, -0.03680917952171095,
+    -0.022489385288542604, -0.024475522375569658, -0.03324558280295301],
+    [-0.0004507227386051219, -0.005422291349995998, -0.01155272937910007,
+    -0.010817484288013232, -0.00447631814814665]]], [[[-0.025194061504750523,
+    -0.043145939817870266, -0.04562755195174026, -0.02920477277255776,
+    -0.0036010963945262565], [-0.037526010691823306, 0.23903219477825288,
+    0.28689495518965624, -0.0278857406191104, -0.010202069931803576],
+    [-0.03901507994141054, 0.4111930051936302, 0.4909386897413151,
+    -0.021181804710686657, -0.01231906761164821], [-0.03663285147762567,
+    -0.005730455022054155, -7.890574314415865e-05, -0.039424021044039116,
+    -0.006389875713059272], [-0.0064664894229143986, -0.014502394951723471,
+    -0.015459264122748746, -0.00775547487703256, -0.0007159165805851706]],
+    [[-0.02128481178805433, -0.04173044153813555, -0.04831487472573022,
+    -0.03293190035303922, -0.005252595229206095], [-0.041730441538135564,
+    0.18968272846778533, 0.3306368426789878, -0.013001053856678076,
+    -0.01372950329294693], [-0.04831487472573022, 0.3306368426789878,
+    0.5640812622041927, 0.004583518760872409, -0.016482266055193047],
+    [-0.03293190035303923, -0.013001053856678086, 0.004583518760872414,
+    -0.040827417160105635, -0.009045186119473492], [-0.005252595229206096,
+    -0.01372950329294693, -0.016482266055193047, -0.00904518611947349,
+    -0.0011168422627331077]], [[-0.017203222289937238, -0.040527364499551265,
+    -0.050457063493932794, -0.036073170059570094, -0.007380297997922879],
+    [-0.0401746501391571, 0.13727831636454993, 0.3640223411093611,
+    0.010278898793053761, -0.01832107424986819], [-0.04887867620762643,
+    0.24585519478421125, 0.6202613509857569, 0.04314806591631964,
+    -0.022137366266623233], [-0.02790922286627615, -0.021178184193661707,
+    0.007986619792820032, -0.039957113612285294, -0.012434273033433196],
+    [-0.00411203529942813, -0.012971303942569701, -0.01723725281482718,
+    -0.010225452530604957, -0.0016530642487971611]], [[-0.013417641633011908,
+    -0.03965629331558996, -0.051516162733405924, -0.038148863041386254,
+    -0.010058190693394595], [-0.03365072121724163, 0.08734505711506498,
+    0.38194295165025005, 0.04338227748703876, -0.025259934728481214],
+    [-0.04158013952905281, 0.16637288777763284, 0.6502702298731253,
+    0.0962163605307964, -0.031013880437287037], [-0.02231705358706074,
+    -0.02946265951499448, 0.009920547334197453, -0.03600283468483377,
+    -0.01684919502363355], [-0.003131096848010505, -0.012180160279609381,
+    -0.01763265975706309, -0.011256197301616299, -0.0023166274424323116]],
+    [[-0.01005819069339459, -0.03814886304138624, -0.0515161627334059,
+    -0.03965629331558995, -0.013417641633011903], [-0.025259934728481193,
+    0.04338227748703876, 0.38194295165024994, 0.08734505711506492,
+    -0.033650721217241615], [-0.031013880437287013, 0.09621636053079634,
+    0.6502702298731251, 0.1663728877776328, -0.04158013952905279],
+    [-0.016849195023633544, -0.03600283468483376, 0.009920547334197446,
+    -0.029462659514994466, -0.022317053587060723], [-0.0023166274424323103,
+    -0.01125619730161629, -0.017632659757063088, -0.012180160279609381,
+    -0.0031310968480105037]], [[-0.007380297997922879, -0.0360731700595701,
+    -0.0504570634939328, -0.040527364499551265, -0.01720322228993724],
+    [-0.01832107424986819, 0.010278898793053765, 0.36402234110936105,
+    0.1372783163645499, -0.040174650139157095], [-0.022137366266623233,
+    0.043148065916319624, 0.6202613509857569, 0.24585519478421133,
+    -0.04887867620762643], [-0.012434273033433196, -0.039957113612285294,
+    0.007986619792820032, -0.0211781841936617, -0.027909222866276145],
+    [-0.0016530642487971611, -0.010225452530604957, -0.01723725281482718,
+    -0.012971303942569701, -0.004112035299428131]], [[-0.005252595229206095,
+    -0.03293190035303923, -0.04831487472573021, -0.04173044153813554,
+    -0.021284811788054327], [-0.013729503292946926, -0.013001053856678083,
+    0.33063684267898774, 0.1896827284677853, -0.04173044153813554],
+    [-0.016482266055193047, 0.004583518760872396, 0.5640812622041926,
+    0.33063684267898785, -0.0483148747257302], [-0.00904518611947349,
+    -0.04082741716010563, 0.0045835187608724145, -0.013001053856678076,
+    -0.03293190035303922], [-0.0011168422627331075, -0.009045186119473489,
+    -0.016482266055193043, -0.013729503292946926, -0.005252595229206093]],
+    [[-0.0036010963945262565, -0.029204772772557765, -0.04562755195174027,
+    -0.04314593981787027, -0.02519406150475053], [-0.01020206993180358,
+    -0.02788574061911041, 0.2868949551896562, 0.239032194778253,
+    -0.037526010691823306], [-0.012319067611648214, -0.02118180471068667,
+    0.4909386897413152, 0.41119300519363033, -0.039015079941410534],
+    [-0.0063898757130592745, -0.03942402104403913, -7.890574314417213e-05,
+    -0.00573045502205414, -0.036632851477625676], [-0.0007159165805851707,
+    -0.007755474877032561, -0.015459264122748746, -0.014502394951723478,
+    -0.0064664894229143986]]], [[[-0.020664074967504838,
+    -0.042468451339058966, -0.04562658821385713, -0.024336140047717003,
+    -0.002791220988040243], [-0.03838632575427139, 0.1756761813778117,
+    0.21238920010551754, -0.031939432194582666, -0.00957114638494601],
+    [-0.04002101086742023, 0.4522064270238289, 0.5398093391410693,
+    -0.020308275361446703, -0.012883266171804212], [-0.039000354140279844,
+    0.022877571178541382, 0.03369473939392682, -0.04044013741654317,
+    -0.007309372111524049], [-0.00901973495399775, -0.019367833372750352,
+    -0.02070211170009259, -0.010740155274818482, -0.0010778269600400273]],
+    [[-0.017203222289937245, -0.040174650139157116, -0.04887867620762643,
+    -0.027909222866276156, -0.004112035299428132], [-0.040527364499551286,
+    0.13727831636454993, 0.24585519478421136, -0.021178184193661704,
+    -0.01297130394256971], [-0.05045706349393281, 0.3640223411093611,
+    0.6202613509857569, 0.007986619792820032, -0.017237252814827183],
+    [-0.03607317005957011, 0.010278898793053753, 0.04314806591631965,
+    -0.03995711361228531, -0.010225452530604962], [-0.007380297997922879,
+    -0.01832107424986819, -0.022137366266623236, -0.012434273033433195,
+    -0.0016530642487971616]], [[-0.013741489638205826, -0.037976197105406395,
+    -0.05142937279813894, -0.031173068184164848, -0.005819138225232018],
+    [-0.03797619710540639, 0.09628103622608752, 0.271299908889608,
+    -0.003537793416633379, -0.017341510615908634], [-0.05142937279813893,
+    0.271299908889608, 0.6821432561027145, 0.050180479222290235,
+    -0.023208515458651935], [-0.031173068184164848, -0.003537793416633386,
+    0.050180479222290235, -0.03637638898995256, -0.013943731217351598],
+    [-0.005819138225232015, -0.017341510615908627, -0.023208515458651935,
+    -0.013943731217351598, -0.002408535881464665]], [[-0.010640027531642969,
+    -0.03608088767126983, -0.05272168029782533, -0.03375669845324461,
+    -0.007955856657996018], [-0.03153980584721341, 0.05686230181726655,
+    0.28500998074905703, 0.02230594207226229, -0.023749554287216885],
+    [-0.04383615619088237, 0.1845947431815049, 0.7151797455936234,
+    0.10805612743320024, -0.03263677274980321], [-0.02511202585552835,
+    -0.017286364047844695, 0.054073310615547404, -0.028675684605006257,
+    -0.018931312997898533], [-0.004465109850519687, -0.01636186809305578,
+    -0.023770526019940293, -0.01522847594949364, -0.0033333443560800225]],
+    [[-0.007955856657996014, -0.033756698453244596, -0.052721680297825306,
+    -0.03608088767126982, -0.010640027531642969], [-0.023749554287216878,
+    0.022305942072262296, 0.285009980749057, 0.056862301817266515,
+    -0.031539805847213394], [-0.0326367727498032, 0.10805612743320023,
+    0.7151797455936234, 0.18459474318150482, -0.04383615619088237],
+    [-0.018931312997898533, -0.028675684605006243, 0.05407331061554739,
+    -0.0172863640478447, -0.025112025855528346], [-0.0033333443560800216,
+    -0.015228475949493633, -0.023770526019940286, -0.016361868093055777,
+    -0.004465109850519686]], [[-0.005819138225232017, -0.031173068184164845,
+    -0.05142937279813894, -0.037976197105406395, -0.013741489638205831],
+    [-0.01734151061590863, -0.003537793416633379, 0.27129990888960803,
+    0.09628103622608748, -0.03797619710540639], [-0.023208515458651945,
+    0.05018047922229022, 0.6821432561027146, 0.27129990888960803,
+    -0.05142937279813893], [-0.013943731217351596, -0.03637638898995256,
+    0.050180479222290235, -0.003537793416633377, -0.03117306818416484],
+    [-0.0024085358814646654, -0.013943731217351596, -0.023208515458651942,
+    -0.01734151061590863, -0.005819138225232016]], [[-0.004112035299428132,
+    -0.02790922286627615, -0.04887867620762644, -0.04017465013915711,
+    -0.017203222289937245], [-0.012971303942569708, -0.021178184193661718,
+    0.24585519478421136, 0.13727831636454993, -0.04052736449955127],
+    [-0.017237252814827183, 0.007986619792820018, 0.620261350985757,
+    0.36402234110936116, -0.050457063493932794], [-0.01022545253060496,
+    -0.039957113612285315, 0.043148065916319644, 0.010278898793053767,
+    -0.0360731700595701], [-0.0016530642487971618, -0.012434273033433193,
+    -0.022137366266623233, -0.018321074249868192, -0.007380297997922878]],
+    [[-0.0027912209880402413, -0.02433614004771699, -0.04562658821385711,
+    -0.042468451339058945, -0.020664074967504827], [-0.009571146384946006,
+    -0.03193943219458265, 0.21238920010551743, 0.17567618137781163,
+    -0.038386325754271367], [-0.012883266171804209, -0.020308275361446707,
+    0.5398093391410691, 0.4522064270238288, -0.0400210108674202],
+    [-0.007309372111524046, -0.040440137416543155, 0.033694739393926795,
+    0.022877571178541393, -0.039000354140279817], [-0.001077826960040027,
+    -0.010740155274818476, -0.02070211170009258, -0.019367833372750345,
+    -0.009019734953997745]]], [[[-0.01626392539751837, -0.035832545662066145,
+    -0.03866988475938143, -0.01930821727497102, -0.0021015621671157296],
+    [-0.03954147855053079, 0.11572472115297622, 0.14229549707292574,
+    -0.03620398561701562, -0.008907053401106523], [-0.04046620032608075,
+    0.47416733354946294, 0.5659339775075575, -0.019741250657301427,
+    -0.013176682690936197], [-0.039796214235811526, 0.06284440084948134,
+    0.08045180751196822, -0.03919545281633188, -0.008138951872408828],
+    [-0.012244853215160443, -0.0268506592492741, -0.02888297740242396,
+    -0.014560933634183601, -0.0015349087147535293]], [[-0.013417641633011906,
+    -0.03365072121724163, -0.04158013952905282, -0.022317053587060733,
+    -0.003131096848010505], [-0.039656293315589966, 0.08734505711506495,
+    0.16637288777763282, -0.029462659514994483, -0.012180160279609385],
+    [-0.051516162733405924, 0.3819429516502501, 0.6502702298731253,
+    0.00992054733419745, -0.01763265975706309], [-0.03814886304138625,
+    0.04338227748703875, 0.09621636053079638, -0.03600283468483378,
+    -0.011256197301616295], [-0.010058190693394593, -0.02525993472848121,
+    -0.03101388043728703, -0.016849195023633558, -0.0023166274424323116]],
+    [[-0.01064002753164297, -0.0315398058472134, -0.04383615619088237,
+    -0.02511202585552835, -0.004465109850519686], [-0.03608088767126983,
+    0.05686230181726653, 0.18459474318150484, -0.0172863640478447,
+    -0.01636186809305578], [-0.05272168029782532, 0.2850099807490571,
+    0.7151797455936235, 0.0540733106155474, -0.02377052601994029],
+    [-0.0337566984532446, 0.02230594207226228, 0.1080561274332002,
+    -0.028675684605006246, -0.01522847594949364], [-0.007955856657996014,
+    -0.02374955428721688, -0.03263677274980321, -0.018931312997898533,
+    -0.003333344356080022]], [[-0.008199753617852345, -0.02964168716094745,
+    -0.04499286779343149, -0.02745350495005966, -0.006124077091711166],
+    [-0.02964168716094745, 0.027274160377919326, 0.19446599876518117,
+    0.0015983184753035505, -0.022324728394118268], [-0.04499286779343149,
+    0.19446599876518125, 0.7498250634433566, 0.11452620166036631,
+    -0.03348047712449868], [-0.027453504950059663, 0.0015983184753035505,
+    0.11452620166036631, -0.016056808817843177, -0.02070338975868157],
+    [-0.006124077091711163, -0.022324728394118268, -0.03348047712449867,
+    -0.02070338975868157, -0.004582234640385923]], [[-0.006124077091711165,
+    -0.027453504950059653, -0.04499286779343149, -0.02964168716094745,
+    -0.008199753617852345], [-0.022324728394118268, 0.0015983184753035479,
+    0.19446599876518117, 0.027274160377919316, -0.02964168716094745],
+    [-0.03348047712449866, 0.11452620166036634, 0.7498250634433566,
+    0.19446599876518117, -0.04499286779343149], [-0.020703389758681575,
+    -0.016056808817843174, 0.11452620166036631, 0.0015983184753035442,
+    -0.02745350495005966], [-0.004582234640385922, -0.020703389758681568,
+    -0.03348047712449866, -0.022324728394118268, -0.006124077091711163]],
+    [[-0.004465109850519687, -0.025112025855528342, -0.04383615619088236,
+    -0.03153980584721341, -0.01064002753164297], [-0.01636186809305578,
+    -0.017286364047844702, 0.18459474318150484, 0.056862301817266536,
+    -0.03608088767126984], [-0.023770526019940296, 0.054073310615547404,
+    0.7151797455936236, 0.285009980749057, -0.05272168029782532],
+    [-0.015228475949493642, -0.028675684605006246, 0.10805612743320021,
+    0.022305942072262292, -0.03375669845324461], [-0.003333344356080022,
+    -0.018931312997898533, -0.03263677274980321, -0.023749554287216885,
+    -0.007955856657996013]], [[-0.003131096848010504, -0.02231705358706072,
+    -0.04158013952905278, -0.03365072121724162, -0.013417641633011903],
+    [-0.01218016027960938, -0.029462659514994476, 0.16637288777763273,
+    0.0873450571150649, -0.03965629331558995], [-0.017632659757063088,
+    0.009920547334197435, 0.6502702298731252, 0.38194295165024994,
+    -0.051516162733405875], [-0.01125619730161629, -0.036002834684833764,
+    0.09621636053079632, 0.04338227748703877, -0.03814886304138623],
+    [-0.0023166274424323103, -0.01684919502363354, -0.031013880437287016,
+    -0.025259934728481197, -0.010058190693394588]], [[-0.0021015621671157296,
+    -0.01930821727497101, -0.03866988475938142, -0.035832545662066145,
+    -0.016263925397518367], [-0.008907053401106521, -0.03620398561701562,
+    0.14229549707292571, 0.11572472115297618, -0.03954147855053078],
+    [-0.013176682690936197, -0.019741250657301437, 0.5659339775075574,
+    0.4741673335494629, -0.04046620032608073], [-0.008138951872408826,
+    -0.03919545281633188, 0.08045180751196819, 0.06284440084948135,
+    -0.039796214235811506], [-0.0015349087147535291, -0.014560933634183593,
+    -0.028882977402423952, -0.026850659249274097, -0.01224485321516044]]],
+    [[[-0.012244853215160442, -0.0268506592492741, -0.02888297740242396,
+    -0.0145609336341836, -0.0015349087147535293], [-0.039796214235811526,
+    0.06284440084948134, 0.08045180751196819, -0.03919545281633189,
+    -0.008138951872408828], [-0.040466200326080747, 0.47416733354946294,
+    0.5659339775075575, -0.019741250657301427, -0.013176682690936197],
+    [-0.03954147855053079, 0.1157247211529762, 0.14229549707292577,
+    -0.03620398561701562, -0.008907053401106523], [-0.016263925397518374,
+    -0.035832545662066145, -0.03866988475938143, -0.019308217274971024,
+    -0.00210156216711573]], [[-0.010058190693394592, -0.025259934728481204,
+    -0.031013880437287023, -0.016849195023633547, -0.0023166274424323103],
+    [-0.03814886304138625, 0.04338227748703876, 0.09621636053079634,
+    -0.036002834684833764, -0.011256197301616292], [-0.05151616273340591,
+    0.38194295165025, 0.6502702298731253, 0.009920547334197446,
+    -0.017632659757063088], [-0.039656293315589966, 0.08734505711506492,
+    0.16637288777763276, -0.029462659514994476, -0.012180160279609383],
+    [-0.013417641633011903, -0.03365072121724163, -0.041580139529052804,
+    -0.02231705358706074, -0.0031310968480105046]], [[-0.007955856657996016,
+    -0.02374955428721689, -0.03263677274980321, -0.01893131299789854,
+    -0.003333344356080023], [-0.03375669845324461, 0.02230594207226229,
+    0.10805612743320021, -0.02867568460500625, -0.01522847594949364],
+    [-0.05272168029782533, 0.28500998074905703, 0.7151797455936236,
+    0.05407331061554741, -0.0237705260199403], [-0.03608088767126984,
+    0.05686230181726652, 0.18459474318150484, -0.017286364047844702,
+    -0.016361868093055783], [-0.01064002753164297, -0.03153980584721341,
+    -0.04383615619088236, -0.025112025855528356, -0.004465109850519687]],
+    [[-0.006124077091711165, -0.022324728394118264, -0.03348047712449866,
+    -0.020703389758681568, -0.004582234640385924], [-0.02745350495005966,
+    0.0015983184753035565, 0.11452620166036628, -0.016056808817843167,
+    -0.020703389758681568], [-0.04499286779343149, 0.19446599876518122,
+    0.7498250634433568, 0.1145262016603663, -0.03348047712449867],
+    [-0.02964168716094745, 0.027274160377919326, 0.1944659987651812,
+    0.0015983184753035424, -0.022324728394118264], [-0.008199753617852345,
+    -0.02964168716094745, -0.04499286779343149, -0.027453504950059663,
+    -0.006124077091711164]], [[-0.004582234640385923, -0.02070338975868156,
+    -0.03348047712449866, -0.022324728394118268, -0.006124077091711164],
+    [-0.020703389758681568, -0.01605680881784317, 0.11452620166036626,
+    0.0015983184753035535, -0.02745350495005966], [-0.03348047712449866,
+    0.11452620166036631, 0.7498250634433566, 0.1944659987651812,
+    -0.04499286779343149], [-0.022324728394118268, 0.0015983184753035503,
+    0.19446599876518122, 0.02727416037791932, -0.02964168716094745],
+    [-0.006124077091711164, -0.027453504950059653, -0.04499286779343149,
+    -0.02964168716094745, -0.008199753617852345]], [[-0.0033333443560800216,
+    -0.018931312997898523, -0.0326367727498032, -0.023749554287216878,
+    -0.007955856657996013], [-0.015228475949493635, -0.028675684605006243,
+    0.10805612743320019, 0.022305942072262285, -0.0337566984532446],
+    [-0.02377052601994029, 0.05407331061554739, 0.7151797455936234,
+    0.2850099807490569, -0.052721680297825306], [-0.016361868093055777,
+    -0.0172863640478447, 0.18459474318150482, 0.05686230181726653,
+    -0.03608088767126982], [-0.004465109850519686, -0.025112025855528346,
+    -0.04383615619088235, -0.03153980584721339, -0.010640027531642966]],
+    [[-0.002316627442432311, -0.01684919502363354, -0.031013880437287023,
+    -0.025259934728481207, -0.010058190693394592], [-0.011256197301616293,
+    -0.036002834684833764, 0.09621636053079634, 0.04338227748703875,
+    -0.03814886304138625], [-0.01763265975706309, 0.009920547334197437,
+    0.6502702298731253, 0.38194295165025, -0.05151616273340589],
+    [-0.012180160279609383, -0.029462659514994483, 0.16637288777763276,
+    0.08734505711506496, -0.03965629331558995], [-0.0031310968480105046,
+    -0.022317053587060733, -0.04158013952905281, -0.03365072121724162,
+    -0.013417641633011903]], [[-0.00153490871475353, -0.014560933634183603,
+    -0.028882977402423966, -0.02685065924927412, -0.012244853215160445],
+    [-0.008138951872408833, -0.039195452816331904, 0.08045180751196822,
+    0.06284440084948137, -0.03979621423581154], [-0.013176682690936204,
+    -0.019741250657301448, 0.5659339775075575, 0.47416733354946317,
+    -0.04046620032608075], [-0.008907053401106528, -0.036203985617015634,
+    0.1422954970729258, 0.1157247211529763, -0.0395414785505308],
+    [-0.002101562167115731, -0.01930821727497103, -0.03866988475938145,
+    -0.035832545662066166, -0.01626392539751838]]], [[[-0.00901973495399775,
+    -0.01936783337275035, -0.02070211170009259, -0.010740155274818482,
+    -0.001077826960040027], [-0.039000354140279844, 0.022877571178541386,
+    0.033694739393926795, -0.04044013741654317, -0.007309372111524048],
+    [-0.040021010867420236, 0.452206427023829, 0.5398093391410694,
+    -0.020308275361446707, -0.01288326617180421], [-0.038386325754271394,
+    0.1756761813778117, 0.21238920010551754, -0.031939432194582666,
+    -0.00957114638494601], [-0.020664074967504838, -0.042468451339058966,
+    -0.04562658821385713, -0.024336140047717003, -0.002791220988040243]],
+    [[-0.007380297997922877, -0.018321074249868192, -0.022137366266623233,
+    -0.012434273033433195, -0.0016530642487971611], [-0.0360731700595701,
+    0.01027889879305376, 0.04314806591631964, -0.03995711361228531,
+    -0.010225452530604959], [-0.05045706349393281, 0.3640223411093611,
+    0.6202613509857569, 0.007986619792820032, -0.017237252814827183],
+    [-0.04052736449955128, 0.13727831636454993, 0.24585519478421136,
+    -0.021178184193661704, -0.012971303942569708], [-0.017203222289937245,
+    -0.040174650139157116, -0.04887867620762645, -0.027909222866276163,
+    -0.004112035299428132]], [[-0.005819138225232015, -0.01734151061590863,
+    -0.02320851545865194, -0.0139437312173516, -0.0024085358814646654],
+    [-0.031173068184164845, -0.0035377934166333767, 0.05018047922229021,
+    -0.03637638898995257, -0.013943731217351596], [-0.05142937279813893,
+    0.27129990888960803, 0.6821432561027146, 0.050180479222290235,
+    -0.023208515458651942], [-0.0379761971054064, 0.0962810362260875,
+    0.27129990888960803, -0.0035377934166333793, -0.017341510615908634],
+    [-0.01374148963820583, -0.0379761971054064, -0.05142937279813894,
+    -0.031173068184164855, -0.005819138225232018]], [[-0.004465109850519687,
+    -0.016361868093055777, -0.023770526019940293, -0.015228475949493638,
+    -0.0033333443560800233], [-0.02511202585552834, -0.017286364047844695,
+    0.054073310615547376, -0.028675684605006243, -0.018931312997898533],
+    [-0.04383615619088236, 0.1845947431815049, 0.7151797455936236,
+    0.10805612743320021, -0.03263677274980321], [-0.03153980584721341,
+    0.05686230181726654, 0.285009980749057, 0.02230594207226228,
+    -0.023749554287216885], [-0.010640027531642969, -0.03608088767126984,
+    -0.052721680297825334, -0.03375669845324461, -0.007955856657996016]],
+    [[-0.003333344356080022, -0.015228475949493635, -0.023770526019940282,
+    -0.016361868093055777, -0.004465109850519686], [-0.018931312997898526,
+    -0.02867568460500624, 0.054073310615547356, -0.01728636404784469,
+    -0.02511202585552834], [-0.0326367727498032, 0.10805612743320023,
+    0.7151797455936234, 0.18459474318150484, -0.04383615619088235],
+    [-0.023749554287216878, 0.022305942072262296, 0.285009980749057,
+    0.05686230181726651, -0.03153980584721339], [-0.007955856657996014,
+    -0.033756698453244596, -0.05272168029782531, -0.03608088767126983,
+    -0.010640027531642967]], [[-0.002408535881464665, -0.013943731217351592,
+    -0.023208515458651935, -0.017341510615908627, -0.005819138225232014],
+    [-0.013943731217351592, -0.03637638898995256, 0.050180479222290214,
+    -0.003537793416633366, -0.031173068184164845], [-0.02320851545865193,
+    0.05018047922229023, 0.6821432561027146, 0.271299908889608,
+    -0.051429372798138924], [-0.017341510615908627, -0.003537793416633378,
+    0.271299908889608, 0.0962810362260875, -0.03797619710540638],
+    [-0.005819138225232016, -0.03117306818416484, -0.05142937279813893,
+    -0.03797619710540639, -0.013741489638205826]], [[-0.0016530642487971614,
+    -0.012434273033433195, -0.022137366266623233, -0.01832107424986819,
+    -0.007380297997922878], [-0.01022545253060496, -0.03995711361228531,
+    0.04314806591631963, 0.010278898793053765, -0.0360731700595701],
+    [-0.017237252814827183, 0.007986619792820022, 0.6202613509857569,
+    0.3640223411093611, -0.05045706349393281], [-0.012971303942569706,
+    -0.021178184193661718, 0.24585519478421136, 0.13727831636454998,
+    -0.04052736449955127], [-0.004112035299428132, -0.027909222866276156,
+    -0.04887867620762645, -0.04017465013915711, -0.017203222289937245]],
+    [[-0.0010778269600400273, -0.010740155274818482, -0.02070211170009259,
+    -0.01936783337275035, -0.009019734953997749], [-0.007309372111524049,
+    -0.04044013741654317, 0.03369473939392679, 0.022877571178541403,
+    -0.039000354140279844], [-0.012883266171804212, -0.020308275361446713,
+    0.5398093391410693, 0.4522064270238289, -0.04002101086742022],
+    [-0.00957114638494601, -0.031939432194582666, 0.2123892001055175,
+    0.17567618137781177, -0.03838632575427138], [-0.002791220988040243,
+    -0.024336140047717003, -0.04562658821385713, -0.04246845133905897,
+    -0.020664074967504838]]], [[[-0.006466489422914399, -0.014502394951723476,
+    -0.015459264122748746, -0.007755474877032561, -0.0007159165805851706],
+    [-0.036632851477625676, -0.005730455022054154, -7.890574314417718e-05,
+    -0.03942402104403913, -0.006389875713059275], [-0.039015079941410555,
+    0.4111930051936302, 0.4909386897413152, -0.021181804710686668,
+    -0.012319067611648218], [-0.03752601069182331, 0.239032194778253,
+    0.2868949551896562, -0.027885740619110408, -0.010202069931803578],
+    [-0.025194061504750533, -0.04314593981787028, -0.04562755195174027,
+    -0.029204772772557768, -0.003601096394526257]], [[-0.005252595229206095,
+    -0.013729503292946928, -0.016482266055193047, -0.009045186119473492,
+    -0.0011168422627331079], [-0.03293190035303923, -0.013001053856678081,
+    0.004583518760872412, -0.040827417160105635, -0.009045186119473489],
+    [-0.04831487472573022, 0.33063684267898774, 0.5640812622041926,
+    0.004583518760872408, -0.01648226605519305], [-0.04173044153813555,
+    0.18968272846778533, 0.3306368426789878, -0.01300105385667808,
+    -0.013729503292946928], [-0.02128481178805433, -0.041730441538135564,
+    -0.048314874725730234, -0.03293190035303923, -0.005252595229206095]],
+    [[-0.004112035299428132, -0.012971303942569708, -0.017237252814827186,
+    -0.010225452530604966, -0.0016530642487971618], [-0.027909222866276156,
+    -0.02117818419366171, 0.007986619792820025, -0.039957113612285315,
+    -0.0124342730334332], [-0.04887867620762645, 0.24585519478421144,
+    0.620261350985757, 0.04314806591631966, -0.022137366266623243],
+    [-0.040174650139157116, 0.13727831636454998, 0.3640223411093612,
+    0.010278898793053765, -0.018321074249868192], [-0.017203222289937245,
+    -0.04052736449955129, -0.05045706349393283, -0.03607317005957011,
+    -0.007380297997922882]], [[-0.0031310968480105046, -0.012180160279609381,
+    -0.017632659757063088, -0.011256197301616295, -0.0023166274424323108],
+    [-0.022317053587060723, -0.02946265951499447, 0.009920547334197437,
+    -0.036002834684833764, -0.016849195023633544], [-0.0415801395290528,
+    0.16637288777763282, 0.6502702298731252, 0.09621636053079637,
+    -0.031013880437287027], [-0.03365072121724162, 0.08734505711506496,
+    0.38194295165025005, 0.04338227748703875, -0.025259934728481197],
+    [-0.013417641633011903, -0.03965629331558996, -0.05151616273340592,
+    -0.03814886304138624, -0.010058190693394595]], [[-0.0023166274424323103,
+    -0.011256197301616293, -0.017632659757063088, -0.012180160279609383,
+    -0.0031310968480105046], [-0.016849195023633544, -0.036002834684833764,
+    0.009920547334197428, -0.029462659514994466, -0.02231705358706073],
+    [-0.03101388043728702, 0.0962163605307964, 0.6502702298731252,
+    0.1663728877776328, -0.041580139529052804], [-0.025259934728481197,
+    0.043382277487038774, 0.38194295165025005, 0.08734505711506493,
+    -0.033650721217241615], [-0.010058190693394593, -0.03814886304138624,
+    -0.05151616273340592, -0.03965629331558995, -0.013417641633011903]],
+    [[-0.0016530642487971614, -0.01022545253060496, -0.017237252814827183,
+    -0.012971303942569706, -0.00411203529942813], [-0.012434273033433196,
+    -0.03995711361228531, 0.007986619792820018, -0.021178184193661697,
+    -0.027909222866276152], [-0.022137366266623233, 0.04314806591631965,
+    0.6202613509857569, 0.24585519478421133, -0.04887867620762643],
+    [-0.018321074249868185, 0.010278898793053763, 0.3640223411093611,
+    0.13727831636454993, -0.040174650139157095], [-0.00738029799792288,
+    -0.0360731700595701, -0.05045706349393282, -0.04052736449955128,
+    -0.01720322228993724]], [[-0.0011168422627331075, -0.009045186119473489,
+    -0.016482266055193047, -0.013729503292946922, -0.005252595229206093],
+    [-0.00904518611947349, -0.04082741716010563, 0.004583518760872401,
+    -0.01300105385667807, -0.032931900353039216], [-0.016482266055193047,
+    0.004583518760872399, 0.5640812622041926, 0.33063684267898774,
+    -0.0483148747257302], [-0.013729503292946922, -0.01300105385667808,
+    0.33063684267898774, 0.1896827284677853, -0.04173044153813553],
+    [-0.005252595229206094, -0.03293190035303922, -0.048314874725730206,
+    -0.04173044153813555, -0.021284811788054327]], [[-0.0007159165805851706,
+    -0.00775547487703256, -0.015459264122748746, -0.014502394951723471,
+    -0.006466489422914398], [-0.006389875713059273, -0.039424021044039116,
+    -7.890574314417675e-05, -0.0057304550220541334, -0.03663285147762567],
+    [-0.012319067611648212, -0.02118180471068667, 0.4909386897413151,
+    0.4111930051936303, -0.03901507994141054], [-0.010202069931803573,
+    -0.02788574061911041, 0.28689495518965613, 0.23903219477825297,
+    -0.037526010691823306], [-0.0036010963945262557, -0.029204772772557758,
+    -0.04562755195174025, -0.043145939817870266, -0.025194061504750526]]],
+    [[[-0.004476318148146651, -0.010817484288013232, -0.011552729379100072,
+    -0.005422291349995998, -0.0004507227386051219], [-0.03324558280295301,
+    -0.02447552237556967, -0.022489385288542604, -0.03680917952171095,
+    -0.005422291349995998], [-0.0378381168526885, 0.35757708121641424,
+    0.4272005024152728, -0.022489385288542604, -0.011552729379100074],
+    [-0.0370635264420727, 0.2989532845474528, 0.35757708121641435,
+    -0.024475522375569658, -0.010817484288013228], [-0.029286133281073247,
+    -0.037063526442072704, -0.037838116852688494, -0.03324558280295301,
+    -0.00447631814814665]], [[-0.0036010963945262574, -0.01020206993180358,
+    -0.012319067611648216, -0.006389875713059276, -0.0007159165805851708],
+    [-0.029204772772557765, -0.02788574061911041, -0.02118180471068666,
+    -0.03942402104403913, -0.007755474877032563], [-0.04562755195174027,
+    0.2868949551896562, 0.490938689741315, -7.890574314416898e-05,
+    -0.015459264122748749], [-0.043145939817870266, 0.239032194778253,
+    0.41119300519363033, -0.005730455022054142, -0.014502394951723474],
+    [-0.025194061504750533, -0.03752601069182331, -0.039015079941410555,
+    -0.036632851477625676, -0.0064664894229144]], [[-0.002791220988040242,
+    -0.009571146384946008, -0.012883266171804207, -0.007309372111524051,
+    -0.0010778269600400271], [-0.024336140047716993, -0.03193943219458266,
+    -0.020308275361446703, -0.040440137416543155, -0.010740155274818482],
+    [-0.04562658821385712, 0.2123892001055175, 0.5398093391410692,
+    0.033694739393926816, -0.020702111700092587], [-0.042468451339058945,
+    0.1756761813778117, 0.4522064270238289, 0.022877571178541396,
+    -0.019367833372750342], [-0.020664074967504824, -0.03838632575427138,
+    -0.040021010867420236, -0.03900035414027984, -0.00901973495399775]],
+    [[-0.0021015621671157296, -0.008907053401106525, -0.013176682690936196,
+    -0.008138951872408831, -0.0015349087147535293], [-0.019308217274971017,
+    -0.03620398561701561, -0.019741250657301434, -0.03919545281633189,
+    -0.014560933634183596], [-0.03866988475938143, 0.1422954970729258,
+    0.5659339775075574, 0.0804518075119682, -0.028882977402423963],
+    [-0.035832545662066145, 0.11572472115297625, 0.47416733354946283,
+    0.06284440084948135, -0.026850659249274097], [-0.01626392539751837,
+    -0.039541478550530786, -0.04046620032608076, -0.03979621423581152,
+    -0.012244853215160443]], [[-0.0015349087147535293, -0.008138951872408831,
+    -0.013176682690936201, -0.008907053401106528, -0.00210156216711573],
+    [-0.0145609336341836, -0.0391954528163319, -0.019741250657301437,
+    -0.03620398561701563, -0.01930821727497102], [-0.028882977402423963,
+    0.08045180751196825, 0.5659339775075575, 0.14229549707292577,
+    -0.038669884759381434], [-0.0268506592492741, 0.06284440084948138,
+    0.47416733354946305, 0.11572472115297622, -0.03583254566206615],
+    [-0.012244853215160445, -0.03979621423581153, -0.04046620032608077,
+    -0.03954147855053079, -0.016263925397518374]], [[-0.0010778269600400273,
+    -0.00730937211152405, -0.01288326617180421, -0.00957114638494601,
+    -0.002791220988040242], [-0.010740155274818482, -0.04044013741654317,
+    -0.020308275361446707, -0.031939432194582666, -0.024336140047717],
+    [-0.020702111700092594, 0.03369473939392682, 0.5398093391410693,
+    0.21238920010551754, -0.04562658821385713], [-0.019367833372750342,
+    0.022877571178541396, 0.452206427023829, 0.17567618137781177,
+    -0.04246845133905895], [-0.009019734953997752, -0.039000354140279844,
+    -0.040021010867420236, -0.03838632575427138, -0.02066407496750483]],
+    [[-0.0007159165805851705, -0.006389875713059274, -0.012319067611648212,
+    -0.010202069931803576, -0.0036010963945262565], [-0.007755474877032563,
+    -0.03942402104403912, -0.02118180471068666, -0.0278857406191104,
+    -0.029204772772557758], [-0.015459264122748742, -7.890574314417695e-05,
+    0.4909386897413151, 0.2868949551896562, -0.04562755195174026],
+    [-0.01450239495172347, -0.0057304550220541465, 0.4111930051936302,
+    0.23903219477825297, -0.04314593981787025], [-0.0064664894229144,
+    -0.03663285147762567, -0.03901507994141055, -0.037526010691823306,
+    -0.025194061504750526]], [[-0.0004507227386051219, -0.005422291349995998,
+    -0.01155272937910007, -0.01081748428801323, -0.00447631814814665],
+    [-0.005422291349995998, -0.03680917952171095, -0.022489385288542607,
+    -0.024475522375569655, -0.03324558280295302], [-0.011552729379100074,
+    -0.02248938528854261, 0.4272005024152728, 0.3575770812164143,
+    -0.0378381168526885], [-0.010817484288013228, -0.02447552237556967,
+    0.3575770812164143, 0.29895328454745285, -0.03706352644207268],
+    [-0.00447631814814665, -0.03324558280295302, -0.0378381168526885,
+    -0.03706352644207269, -0.029286133281073243]]]]
+}
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/third-party/libjxl/libjxl/tools/viewer/CMakeLists.txt b/third-party/libjxl/libjxl/tools/viewer/CMakeLists.txt
new file mode 100644
index 0000000000..2b25e26664
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(Qt6 QUIET COMPONENTS Widgets)
+if (NOT Qt6_FOUND)
+  message(WARNING "Qt6 was not found. The directory viewer will not be built.")
+  return()
+endif ()
+
+if (NOT TARGET icc_detect)
+  message(WARNING "The directory viewer depends on the comparison tool and will also not be built.")
+  return ()
+endif ()
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_AUTOMOC ON)
+set(CMAKE_AUTOUIC ON)
+
+add_executable(viewer WIN32
+  load_jxl.cc
+  load_jxl.h
+  main.cc
+  viewer_window.cc
+  viewer_window.h
+  viewer_window.ui
+)
+target_include_directories(viewer PRIVATE
+  $<TARGET_PROPERTY:lcms2,INCLUDE_DIRECTORIES>
+  "${PROJECT_SOURCE_DIR}"
+)
+target_link_libraries(viewer
+  Qt6::Widgets
+  icc_detect
+  jxl
+  jxl_threads
+  lcms2
+)
diff --git a/third-party/libjxl/libjxl/tools/viewer/load_jxl.cc b/third-party/libjxl/libjxl/tools/viewer/load_jxl.cc
new file mode 100644
index 0000000000..76ebe021c5
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/load_jxl.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/viewer/load_jxl.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <QElapsedTimer>
+#include <QFile>
+
+#include "lcms2.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+struct CmsProfileCloser {
+  void operator()(const cmsHPROFILE profile) const {
+    if (profile != nullptr) {
+      cmsCloseProfile(profile);
+    }
+  }
+};
+using CmsProfileUniquePtr =
+    std::unique_ptr<std::remove_pointer<cmsHPROFILE>::type, CmsProfileCloser>;
+
+struct CmsTransformDeleter {
+  void operator()(const cmsHTRANSFORM transform) const {
+    if (transform != nullptr) {
+      cmsDeleteTransform(transform);
+    }
+  }
+};
+using CmsTransformUniquePtr =
+    std::unique_ptr<std::remove_pointer<cmsHTRANSFORM>::type,
+                    CmsTransformDeleter>;
+
+}  // namespace
+
+QImage loadJxlImage(const QString& filename, const QByteArray& targetIccProfile,
+                    qint64* elapsed_ns, bool* usedRequestedProfile) {
+  auto runner = JxlThreadParallelRunnerMake(
+      nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+
+  auto dec = JxlDecoderMake(nullptr);
+
+#define EXPECT_TRUE(a)                                               \
+  do {                                                               \
+    if (!(a)) {                                                      \
+      fprintf(stderr, "Assertion failure (%d): %s\n", __LINE__, #a); \
+      return QImage();                                               \
+    }                                                                \
+  } while (false)
+#define EXPECT_EQ(a, b)                                               \
+  do {                                                                \
+    int a_ = a;                                                       \
+    int b_ = b;                                                       \
+    if (a_ != b_) {                                                   \
+      fprintf(stderr, "Assertion failure (%d): %s (%d) != %s (%d)\n", \
+              __LINE__, #a, a_, #b, b_);                              \
+      return QImage();                                                \
+    }                                                                 \
+  } while (false)
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO |
+                                                     JXL_DEC_COLOR_ENCODING |
+                                                     JXL_DEC_FULL_IMAGE));
+  QFile jpegXlFile(filename);
+  if (!jpegXlFile.open(QIODevice::ReadOnly)) {
+    return QImage();
+  }
+  const QByteArray jpegXlData = jpegXlFile.readAll();
+  if (jpegXlData.size() < 4) {
+    return QImage();
+  }
+
+  QElapsedTimer timer;
+  timer.start();
+  const uint8_t* jxl_data = reinterpret_cast<const uint8_t*>(jpegXlData.data());
+  size_t jxl_size = jpegXlData.size();
+  JxlDecoderSetInput(dec.get(), jxl_data, jxl_size);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec.get()));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec.get(), &info));
+  size_t pixel_count = info.xsize * info.ysize;
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec.get()));
+  static const JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN,
+                                        0};
+  size_t icc_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(
+                dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, &icc_size));
+  std::vector<uint8_t> icc_profile(icc_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                 dec.get(), JXL_COLOR_PROFILE_TARGET_DATA,
+                                 icc_profile.data(), icc_profile.size()));
+
+  std::vector<float> float_pixels(pixel_count * 4);
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec.get()));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetImageOutBuffer(dec.get(), &format, float_pixels.data(),
+                                        pixel_count * 4 * sizeof(float)));
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec.get()));
+
+  std::vector<uint16_t> uint16_pixels(pixel_count * 4);
+  const thread_local cmsContext context = cmsCreateContext(nullptr, nullptr);
+  EXPECT_TRUE(context != nullptr);
+  const CmsProfileUniquePtr jxl_profile(cmsOpenProfileFromMemTHR(
+      context, icc_profile.data(), icc_profile.size()));
+  EXPECT_TRUE(jxl_profile != nullptr);
+  CmsProfileUniquePtr target_profile(cmsOpenProfileFromMemTHR(
+      context, targetIccProfile.data(), targetIccProfile.size()));
+  if (usedRequestedProfile != nullptr) {
+    *usedRequestedProfile = (target_profile != nullptr);
+  }
+  if (target_profile == nullptr) {
+    target_profile.reset(cmsCreate_sRGBProfileTHR(context));
+  }
+  EXPECT_TRUE(target_profile != nullptr);
+  CmsTransformUniquePtr transform(cmsCreateTransformTHR(
+      context, jxl_profile.get(), TYPE_RGBA_FLT, target_profile.get(),
+      TYPE_RGBA_16, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA));
+  EXPECT_TRUE(transform != nullptr);
+  cmsDoTransform(transform.get(), float_pixels.data(), uint16_pixels.data(),
+                 pixel_count);
+  if (elapsed_ns != nullptr) *elapsed_ns = timer.nsecsElapsed();
+
+  QImage result(info.xsize, info.ysize,
+                info.alpha_premultiplied ? QImage::Format_RGBA64_Premultiplied
+                                         : QImage::Format_RGBA64);
+
+  for (int y = 0; y < result.height(); ++y) {
+    QRgba64* const row = reinterpret_cast<QRgba64*>(result.scanLine(y));
+    const uint16_t* const data = uint16_pixels.data() + result.width() * y * 4;
+    for (int x = 0; x < result.width(); ++x) {
+      row[x] = qRgba64(data[4 * x + 0], data[4 * x + 1], data[4 * x + 2],
+                       data[4 * x + 3]);
+    }
+  }
+  return result;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/viewer/load_jxl.h b/third-party/libjxl/libjxl/tools/viewer/load_jxl.h
new file mode 100644
index 0000000000..85dc1a9e52
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/load_jxl.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_VIEWER_LOAD_JXL_H_
+#define TOOLS_VIEWER_LOAD_JXL_H_
+
+#include <QByteArray>
+#include <QImage>
+#include <QString>
+
+namespace jpegxl {
+namespace tools {
+
+QImage loadJxlImage(const QString& filename, const QByteArray& targetIccProfile,
+                    qint64* elapsed, bool* usedRequestedProfile = nullptr);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_VIEWER_LOAD_JXL_H_
diff --git a/third-party/libjxl/libjxl/tools/viewer/main.cc b/third-party/libjxl/libjxl/tools/viewer/main.cc
new file mode 100644
index 0000000000..1e80be3e74
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/main.cc
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <QApplication>
+
+#include "tools/viewer/viewer_window.h"
+
+int main(int argc, char** argv) {
+  QApplication application(argc, argv);
+  QStringList arguments = application.arguments();
+  arguments.removeFirst();
+
+  jpegxl::tools::ViewerWindow window;
+  window.show();
+
+  if (!arguments.empty()) {
+    window.loadFilesAndDirectories(arguments);
+  }
+
+  return application.exec();
+}
diff --git a/third-party/libjxl/libjxl/tools/viewer/viewer_window.cc b/third-party/libjxl/libjxl/tools/viewer/viewer_window.cc
new file mode 100644
index 0000000000..6b5f912d6d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/viewer_window.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/viewer/viewer_window.h"
+
+#include <QElapsedTimer>
+#include <QFileDialog>
+#include <QFileInfo>
+#include <QKeyEvent>
+#include <QMessageBox>
+#include <QSet>
+
+#include "tools/icc_detect/icc_detect.h"
+#include "tools/viewer/load_jxl.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+
+template <typename Output>
+void recursivelyAddSubEntries(const QFileInfo& info,
+                              QSet<QString>* const visited,
+                              Output* const output) {
+  if (visited->contains(info.absoluteFilePath())) return;
+  *visited << info.absoluteFilePath();
+  if (info.isDir()) {
+    QDir dir(info.absoluteFilePath());
+    for (const QFileInfo& entry : dir.entryInfoList(
+             QStringList() << "*.jxl",
+             QDir::Files | QDir::AllDirs | QDir::NoDotAndDotDot)) {
+      recursivelyAddSubEntries(entry, visited, output);
+    }
+  } else {
+    *output << info.absoluteFilePath();
+  }
+}
+
+}  // namespace
+
+ViewerWindow::ViewerWindow(QWidget* const parent)
+    : QMainWindow(parent), monitorProfile_(GetMonitorIccProfile(this)) {
+  ui_.setupUi(this);
+  ui_.actionOpen->setShortcut(QKeySequence::Open);
+  ui_.actionExit->setShortcut(QKeySequence::Quit);
+}
+
+void ViewerWindow::loadFilesAndDirectories(QStringList entries) {
+  filenames_.clear();
+  QSet<QString> visited;
+  for (const QString& entry : entries) {
+    recursivelyAddSubEntries(QFileInfo(entry), &visited, &filenames_);
+  }
+
+  const bool several = filenames_.size() > 1;
+  ui_.actionPreviousImage->setEnabled(several);
+  ui_.actionNextImage->setEnabled(several);
+
+  currentFileIndex_ = 0;
+  refreshImage();
+}
+
+void ViewerWindow::on_actionOpen_triggered() {
+  QFileDialog dialog(this, tr("Select JPEG XL files to open…"));
+  dialog.setFileMode(QFileDialog::ExistingFiles);
+  dialog.setNameFilter(tr("JPEG XL images (*.jxl);;All files (*)"));
+  if (dialog.exec()) {
+    loadFilesAndDirectories(dialog.selectedFiles());
+  }
+}
+
+void ViewerWindow::on_actionPreviousImage_triggered() {
+  currentFileIndex_ =
+      (currentFileIndex_ - 1 + filenames_.size()) % filenames_.size();
+  refreshImage();
+}
+
+void ViewerWindow::on_actionNextImage_triggered() {
+  currentFileIndex_ = (currentFileIndex_ + 1) % filenames_.size();
+  refreshImage();
+}
+
+void ViewerWindow::refreshImage() {
+  if (currentFileIndex_ < 0 || currentFileIndex_ >= filenames_.size()) {
+    return;
+  }
+
+  qint64 elapsed_ns;
+  bool usedRequestedProfile;
+  const QImage image =
+      loadJxlImage(filenames_[currentFileIndex_], monitorProfile_, &elapsed_ns,
+                   &usedRequestedProfile);
+  if (image.isNull()) {
+    const QString message =
+        tr("Failed to load \"%1\".").arg(filenames_[currentFileIndex_]);
+    ui_.image->clear();
+    ui_.statusBar->showMessage(message);
+    QMessageBox errorDialog(this);
+    errorDialog.setIcon(QMessageBox::Critical);
+    errorDialog.setWindowTitle(tr("Failed to load image"));
+    errorDialog.setText(message);
+    errorDialog.exec();
+    return;
+  }
+
+  ui_.image->setPixmap(QPixmap::fromImage(image));
+  ui_.statusBar->showMessage(
+      tr("Loaded image %L1/%L2 (%3, %4×%5) in %L6ms (%L7 fps)")
+          .arg(currentFileIndex_ + 1)
+          .arg(filenames_.size())
+          .arg(filenames_[currentFileIndex_])
+          .arg(image.width())
+          .arg(image.height())
+          .arg(elapsed_ns / 1e6)
+          .arg(1e9 / elapsed_ns));
+
+  if (!usedRequestedProfile && !hasWarnedAboutMonitorProfile_) {
+    hasWarnedAboutMonitorProfile_ = true;
+    QMessageBox message(this);
+    message.setIcon(QMessageBox::Warning);
+    message.setWindowTitle(tr("No valid monitor profile found"));
+    message.setText(
+        tr("Failed to find a usable monitor profile. Images will be shown "
+           "assuming that the monitor's colorspace is sRGB."));
+    message.exec();
+  }
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/third-party/libjxl/libjxl/tools/viewer/viewer_window.h b/third-party/libjxl/libjxl/tools/viewer/viewer_window.h
new file mode 100644
index 0000000000..78aafb9f82
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/viewer_window.h
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_VIEWER_VIEWER_WINDOW_H_
+#define TOOLS_VIEWER_VIEWER_WINDOW_H_
+
+#include <QByteArray>
+#include <QMainWindow>
+#include <QStringList>
+
+#include "tools/viewer/ui_viewer_window.h"
+
+namespace jpegxl {
+namespace tools {
+
+class ViewerWindow : public QMainWindow {
+  Q_OBJECT
+ public:
+  explicit ViewerWindow(QWidget* parent = nullptr);
+
+ public slots:
+  void loadFilesAndDirectories(QStringList entries);
+
+ private slots:
+  void on_actionOpen_triggered();
+  void on_actionPreviousImage_triggered();
+  void on_actionNextImage_triggered();
+  void refreshImage();
+
+ private:
+  const QByteArray monitorProfile_;
+  Ui::ViewerWindow ui_;
+  QStringList filenames_;
+  int currentFileIndex_ = 0;
+  bool hasWarnedAboutMonitorProfile_ = false;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_VIEWER_VIEWER_WINDOW_H_
diff --git a/third-party/libjxl/libjxl/tools/viewer/viewer_window.ui b/third-party/libjxl/libjxl/tools/viewer/viewer_window.ui
new file mode 100644
index 0000000000..9539890550
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/viewer/viewer_window.ui
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <comment>
+  Copyright (c) the JPEG XL Project Authors. All rights reserved.
+
+  Use of this source code is governed by a BSD-style
+  license that can be found in the LICENSE file.
+ </comment>
+ <class>ViewerWindow</class>
+ <widget class="QMainWindow" name="ViewerWindow">
+  <property name="windowTitle">
+   <string>JPEG XL Viewer</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <layout class="QVBoxLayout" name="verticalLayout">
+    <property name="leftMargin">
+     <number>0</number>
+    </property>
+    <property name="topMargin">
+     <number>0</number>
+    </property>
+    <property name="rightMargin">
+     <number>0</number>
+    </property>
+    <property name="bottomMargin">
+     <number>0</number>
+    </property>
+    <item>
+     <widget class="QScrollArea" name="scrollArea">
+      <property name="frameShape">
+       <enum>QFrame::NoFrame</enum>
+      </property>
+      <property name="widgetResizable">
+       <bool>true</bool>
+      </property>
+      <widget class="QLabel" name="image">
+       <property name="alignment">
+        <set>Qt::AlignCenter</set>
+       </property>
+      </widget>
+     </widget>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QMenuBar" name="menuBar">
+   <widget class="QMenu" name="menuFile">
+    <property name="title">
+     <string>&amp;File</string>
+    </property>
+    <addaction name="actionOpen"/>
+    <addaction name="separator"/>
+    <addaction name="actionExit"/>
+   </widget>
+   <addaction name="menuFile"/>
+  </widget>
+  <widget class="QStatusBar" name="statusBar"/>
+  <widget class="QToolBar" name="toolBar">
+   <property name="windowTitle">
+    <string>toolBar</string>
+   </property>
+   <attribute name="toolBarArea">
+    <enum>TopToolBarArea</enum>
+   </attribute>
+   <attribute name="toolBarBreak">
+    <bool>false</bool>
+   </attribute>
+   <addaction name="actionOpen"/>
+   <addaction name="actionPreviousImage"/>
+   <addaction name="actionNextImage"/>
+  </widget>
+  <action name="actionOpen">
+   <property name="icon">
+    <iconset theme="document-open"/>
+   </property>
+   <property name="text">
+    <string>&amp;Open…</string>
+   </property>
+   <property name="menuRole">
+    <enum>QAction::NoRole</enum>
+   </property>
+  </action>
+  <action name="actionExit">
+   <property name="icon">
+    <iconset theme="application-exit"/>
+   </property>
+   <property name="text">
+    <string>E&amp;xit</string>
+   </property>
+   <property name="menuRole">
+    <enum>QAction::QuitRole</enum>
+   </property>
+  </action>
+  <action name="actionPreviousImage">
+   <property name="icon">
+    <iconset theme="go-previous"/>
+   </property>
+   <property name="text">
+    <string>Previous image</string>
+   </property>
+   <property name="shortcut">
+    <string>Left</string>
+   </property>
+  </action>
+  <action name="actionNextImage">
+   <property name="icon">
+    <iconset theme="go-next"/>
+   </property>
+   <property name="text">
+    <string>Next image</string>
+   </property>
+   <property name="shortcut">
+    <string>Right</string>
+   </property>
+  </action>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>actionExit</sender>
+   <signal>triggered()</signal>
+   <receiver>ViewerWindow</receiver>
+   <slot>close()</slot>
+  </connection>
+ </connections>
+</ui>
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/CMakeLists.txt b/third-party/libjxl/libjxl/tools/wasm_demo/CMakeLists.txt
new file mode 100644
index 0000000000..418b598eae
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/CMakeLists.txt
@@ -0,0 +1,64 @@
+if (NOT JPEGXL_ENABLE_TOOLS OR NOT JPEGXL_EMSCRIPTEN)
+  return()
+endif()
+
+# WASM API facade.
+add_executable(jxl_decoder jxl_decoder.cc jxl_decompressor.cc no_png.cc)
+add_executable(jxl_decoder_for_test jxl_decoder.cc jxl_decompressor.cc no_png.cc)
+target_link_libraries(jxl_decoder jxl_extras-static)
+target_link_libraries(jxl_decoder_for_test jxl_extras-static)
+
+set(JXL_C_SYMBOLS
+  _free
+  _malloc
+)
+
+set(JXL_DECODER_SYMBOLS
+  _jxlCreateInstance
+  _jxlDestroyInstance
+  _jxlFlush
+  _jxlProcessInput
+)
+
+set(JXL_DECOMPRESSOR_SYMBOLS
+  _jxlDecompress
+  _jxlCleanup
+)
+
+set(JXL_MODULE_SYMBOLS ${JXL_C_SYMBOLS} ${JXL_DECODER_SYMBOLS} ${JXL_DECOMPRESSOR_SYMBOLS})
+
+list(JOIN JXL_MODULE_SYMBOLS ", " JXL_MODULE_EXPORTS)
+
+set(JXL_WASM_SITE_LINK_FLAGS " -O3 -s FILESYSTEM=0 --closure 1 -mnontrapping-fptoint")
+set(JXL_WASM_TEST_LINK_FLAGS " -O1 -s NODERAWFS=1 ")
+
+set(JXL_WASM_BASE_LINK_FLAGS "\
+  -s ALLOW_MEMORY_GROWTH=1 \
+  -s DISABLE_EXCEPTION_CATCHING=1 \
+  -s MODULARIZE=1 \
+  -s USE_PTHREADS=1 \
+  -s PTHREAD_POOL_SIZE=4 \
+")
+
+# libpng is used only by "decompressor"
+set(JXL_DECODER_LINK_FLAGS "${JXL_WASM_BASE_LINK_FLAGS} \
+  -s EXPORT_NAME=\"JxlDecoderModule\" \
+  -s \"EXPORTED_FUNCTIONS=[${JXL_MODULE_EXPORTS}]\" \
+")
+
+set_target_properties(jxl_decoder PROPERTIES LINK_FLAGS
+  "${JXL_DECODER_LINK_FLAGS} ${JXL_WASM_SITE_LINK_FLAGS}")
+
+set_target_properties(jxl_decoder_for_test PROPERTIES LINK_FLAGS
+  "${JXL_DECODER_LINK_FLAGS} ${JXL_WASM_TEST_LINK_FLAGS}")
+
+if (BUILD_TESTING)
+  add_test(
+    NAME test_wasm_jxl_decoder
+    COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR}
+            --no-experimental-fetch
+            ${CMAKE_CURRENT_SOURCE_DIR}/jxl_decoder_test.js
+  )
+  set_tests_properties(test_wasm_jxl_decoder PROPERTIES
+    ENVIRONMENT NODE_PATH=$<TARGET_FILE_DIR:jxl_decoder_for_test>)
+endif()  # BUILD_TESTING
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/README.md b/third-party/libjxl/libjxl/tools/wasm_demo/README.md
new file mode 100644
index 0000000000..804cd3559b
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/README.md
@@ -0,0 +1,126 @@
+## WebAssembly demonstration
+
+This folder contains an example how to decode JPEG XL files on a web page using
+WASM engine.
+
+### One line demo
+
+The simplest way to get support of JXL images on the client side is simply to
+link one extra script (`<script src="service_worker.js">`) to the page.
+This script installs a `ServiceWorker` that:
+
+ - checks if the browser supports the JXL image format already
+ - if it is not, then advertise `image/jxl` as media format in image requests
+ - then, if the server responds with `image/jxl` content it gets decoded and
+   re-encoded to PNG on the fly
+
+Generally the message / data flow looks the following way:
+
+ - `Fetch API` receives a resource request from client page (e.g. when the HTML
+   engine discovers an `img` tag) and asks the `ServiceWorker` how to proceed
+ - the `ServiceWorker` alters the request and uses the `Fetch API`
+   to obtain data
+ - when data arrives, the `ServiceWorker` forwards it to the "client"
+   (the page) that initiated the resource request
+ - the client forwards the data to a worker (see `client_worker.js`) to avoid
+   processing in the "main loop" thread
+ - a worker does the actual decoding; to make it faster several additional
+   workers are spawned (to enable multi-threading in WASM module);
+   the decoded image is wrapped in non-compressed PNG format and sent back
+   to client
+ - the client relays image data to `ServiceWorker`
+ - the `ServiceWorker` passes data to `Fetch API` as a response to initial
+   resource request
+
+Despite the additional "hop" (client) in the flow, data is not copied every
+time but rather "transferred" between the participants.
+
+Demo page: `one_line_demo.html`. Extended demo, that also shows how long it
+took do decode images: `one_line_demo_with_console.html`.
+
+Page that shows "manual" decoding (and has benchmarking capabilities):
+`manual_decode_demo.html`.
+
+### Hosting
+
+To enable multi-threading some files should be served in a secure context (i.e.
+transferred over HTTPS) and executed in a "site-isolation" mode (controlled by
+COOP and COEP response headers).
+
+Unfortunately [GitHub Pages](https://pages.github.com/) does not allow setting
+response headers.
+
+[Netlify](https://www.netlify.com/) provides free, easy to setup and deploy
+platform for serving such demonstration sites. However, any other
+service provider / software that allows changing response headers could be
+employed as well.
+
+`netlify.toml` and `netlify/precompressed.ts` specify the serving rules.
+Namely, some requests get "upgraded" responses:
+
+ - if a request specifies that `brotli` compression is supported,
+   then precompressed entries are sent
+ - if a request specifies that `image/jxl` format is allowed,
+   then entries transcoded to JXL format are sent
+
+### How to build the demo
+
+`build_site.py` script takes care of JavaScript minification, template
+substitution and resource compression. Its arguments are:
+
+ - source path: site template directory (that contains this README file)
+ - binary path: build directory, that contains compiled WASM module
+ - output path
+
+To complete the site few more files are to be added to output directory:
+
+ - `image00.jpg`, `image01.png` demo images; will be shown if `ServiceWorker`
+   is not yet operable (fallback); to see those one could initiate
+   "hard page reload" (press Shift-(Ctrl|Cmd)-R)
+ - `image00.jpg.jxl`, `image01.png.jxl` demo images in JXL format
+ - `imageNN.jxl` images for "manual" decoding demo; NN is a number starting
+   form `00`
+ - `favicon.ico` is an optional site icon
+ - `index.html` is an optional site "home" page
+
+In the source code (`service_worker.js`) there are two compile-time constants
+that modify the behaviour of Service Worker:
+
+ - `FORCE_COP` flag allows rewriting responses to add COOP / COEP headers;
+   this is useful when it is difficult / impossible to setup response headers
+   otherwise (e.g. GitHub Pages)
+ - `FORCE_DECODING` flag activate JXL decoding when image response type has
+   `Content-Encoding` header set to `application/octet-stream`; this happens
+   when server does not know the JXL MIME-type
+
+One dependency that `build_site.py` requires is [uglifyjs](https://github.com/mishoo/UglifyJS), which can be installed with
+```
+npm install uglify-js -g
+```
+If you followed the [wasm build instructions](../../docs/building_wasm.md),
+assuming you are in the root level of the cloned libjxl repo a typical call to
+build the site would be
+```bash
+python3 ./tools/wasm_demo/build_site.py ./tools/wasm_demo/ ./build-wasm32/tools/wasm_demo/ /path/to/demo-site
+```
+Then you need to put your image files in the correct same place and are should be good to go.
+
+
+To summarize, using the wasm decoder together with a service workder amounts to adding
+```html
+<script src="service_worker.js"></script>
+```
+to your html and then putting the `service_worker.js` and `jxl_decoder.wasm` binary in directory where they can be read.
+
+
+It is not guaranteed, but somewhat fresh demo is hosted on
+`https://jxl-demo.netlify.app/`, e.g.:
+
+ - [one line demo](https://jxl-demo.netlify.app/one_line_demo_with_console.html)
+ - [one line demo with console](https://jxl-demo.netlify.app/one_line_demo.html)
+ - [manual decode demo](https://jxl-demo.netlify.app/manual_decode_demo.html?img=1&colorSpace=rec2100-pq&runBenchmark=30&wantSdr=false&displayNits=1500);
+   URL contains query parameters that control rendering and benchmarking options;
+   please note, that HDR canvas is often not enabled by default, it could be
+   enabled in some browsers via `about://flags/#enable-experimental-web-platform-features`
+ - [`service_worker.js`](https://jxl-demo.netlify.app/service_worker.js)
+ - [`jxl_decoder.wasm`](https://jxl-demo.netlify.app/jxl_decoder.wasm)
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/build_site.py b/third-party/libjxl/libjxl/tools/wasm_demo/build_site.py
new file mode 100644
index 0000000000..44c23e8113
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/build_site.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+import shutil
+import subprocess
+import sys
+
+from pathlib import Path
+
+BROTLIFY = False
+ZOPFLIFY = False
+LEAN = True
+NETLIFY = False
+
+REMOVE_SHEBANG = ['jxl_decoder.js']
+EMBED_BIN = [
+  'jxl_decoder.js',
+  'jxl_decoder.worker.js'
+]
+EMBED_SRC = ['client_worker.js']
+TEMPLATES = ['service_worker.js']
+COPY_BIN = ['jxl_decoder.wasm'] + [] if LEAN else EMBED_BIN
+COPY_SRC = [
+  'one_line_demo.html',
+  'one_line_demo_with_console.html',
+  'manual_decode_demo.html',
+] + [] if not NETLIFY else [
+  'netlify.toml',
+  'netlify'
+] + [] if LEAN else EMBED_SRC
+
+COMPRESS = COPY_BIN + COPY_SRC + TEMPLATES
+COMPRESSIBLE_EXT = ['.html', '.js', '.wasm']
+
+def escape_js(js):
+  return js.replace('\\', '\\\\').replace('\'', '\\\'')
+
+def remove_shebang(txt):
+  lines = txt.splitlines(True) # Keep line-breaks
+  if len(lines) > 0:
+    if lines[0].startswith('#!'):
+      lines = lines[1:]
+  return ''.join(lines)
+
+def compress(path):
+  name = path.name
+  compressible = any([name.endswith(ext) for ext in COMPRESSIBLE_EXT])
+  if not compressible:
+    print(f'Not compressing {name}')
+    return
+  print(f'Processing {name}')
+  orig_size = path.stat().st_size
+  if BROTLIFY:
+    cmd_brotli = ['brotli', '-Zfk', path.absolute()]
+    subprocess.run(cmd_brotli, check=True, stdout=sys.stdout, stderr=sys.stderr)
+    br_size = path.parent.joinpath(name + '.br').stat().st_size
+    print(f'  Brotli: {orig_size} -> {br_size}')
+  if ZOPFLIFY:
+    cmd_zopfli = ['zopfli', path.absolute()]
+    subprocess.run(cmd_zopfli, check=True, stdout=sys.stdout, stderr=sys.stderr)
+    gz_size = path.parent.joinpath(name + '.gz').stat().st_size
+    print(f'  Zopfli: {orig_size} -> {gz_size}')
+
+def check_util(name):
+  cmd = [name, '-h']
+  try:
+    subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+  except:
+    print(f"NOTE: {name} not installed")
+    return False
+  return True
+
+def check_utils():
+  global BROTLIFY
+  BROTLIFY = BROTLIFY and check_util('brotli')
+  global ZOPFLIFY
+  ZOPFLIFY = ZOPFLIFY and check_util('zopfli')
+  if not check_util('uglifyjs'):
+    print("FAIL: uglifyjs is required to build a site")
+    sys.exit()
+
+def uglify(text, name):
+  cmd = ['uglifyjs', '-m', '-c']
+  ugly_result = subprocess.run(
+      cmd, capture_output=True, check=True, input=text, text=True)
+  ugly_text = ugly_result.stdout.strip()
+  print(f'Uglify {name}: {len(text)} -> {len(ugly_text)}')
+  return ugly_text
+
+if __name__ == "__main__":
+  if len(sys.argv) != 4:
+    print(f"Usage: python3 {sys.argv[0]} SRC_DIR BINARY_DIR OUTPUT_DIR")
+    exit(-1)
+  source_path = Path(sys.argv[1]) # CMake build dir
+  binary_path = Path(sys.argv[2]) # Site template dir
+  output_path = Path(sys.argv[3]) # Site output
+
+  check_utils()
+
+  for name in REMOVE_SHEBANG:
+    path = binary_path.joinpath(name)
+    text = path.read_text().strip()
+    path.write_text(remove_shebang(text))
+    remove_shebang
+
+  substitutes = {}
+
+  for name in EMBED_BIN:
+    key = '$' + name + '$'
+    path = binary_path.joinpath(name)
+    value = escape_js(uglify(path.read_text().strip(), name))
+    substitutes[key] = value
+
+  for name in EMBED_SRC:
+    key = '$' + name + '$'
+    path = source_path.joinpath(name)
+    value = escape_js(uglify(path.read_text().strip(), name))
+    substitutes[key] = value
+
+  for name in TEMPLATES:
+    print(f'Processing template {name}')
+    path = source_path.joinpath(name)
+    text = path.read_text().strip()
+    for key, value in substitutes.items():
+      text = text.replace(key, value)
+    #text = uglify(text, name)
+    output_path.joinpath(name).write_text(text)
+
+  for name in COPY_SRC:
+    path = source_path.joinpath(name)
+    if path.is_dir():
+      shutil.copytree(path, output_path.joinpath(
+          name).absolute(), dirs_exist_ok=True)
+    else:
+      shutil.copy(path, output_path.absolute())
+
+  # TODO: uglify
+  for name in COPY_BIN:
+    shutil.copy(binary_path.joinpath(name), output_path.absolute())
+
+  for name in COMPRESS:
+    compress(output_path.joinpath(name))
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/client_worker.js b/third-party/libjxl/libjxl/tools/wasm_demo/client_worker.js
new file mode 100644
index 0000000000..2d75f4e702
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/client_worker.js
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+let decoder = null;
+
+// Serialize work; plus postpone processing until decoder is ready.
+let jobs = [];
+
+const processJobs = () => {
+  // Decoder not yet loaded.
+  if (!decoder) {
+    return;
+  }
+
+  while (true) {
+    let job = null;
+    // Currently we do not do progressive; process only "inputComplete" jobs.
+    for (let i = 0; i < jobs.length; ++i) {
+      if (!jobs[i].inputComplete) {
+        continue;
+      }
+      job = jobs[i];
+      jobs[i] = jobs[jobs.length - 1];
+      jobs.pop();
+      break;
+    }
+    if (!job) {
+      return;
+    }
+    console.log('CW job: ' + job.uid);
+    const input = job.input;
+    let totalInputLength = 0;
+    for (let i = 0; i < input.length; i++) {
+      totalInputLength += input[i].length;
+    }
+
+    // TODO: persist to reduce fragmentation?
+    const buffer = decoder._malloc(totalInputLength);
+    // TODO: check OOM
+    let offset = 0;
+    for (let i = 0; i < input.length; ++i) {
+      decoder.HEAP8.set(input[i], buffer + offset);
+      offset += input[i].length;
+    }
+    let t0 = Date.now();
+    // TODO: check result
+    const result = decoder._jxlDecompress(buffer, totalInputLength);
+    let t1 = Date.now();
+    const msg = 'Decoded ' + job.url + ' in ' + (t1 - t0) + 'ms';
+    // console.log(msg);
+    decoder._free(buffer);
+    const outputLength = decoder.HEAP32[result >> 2];
+    const outputAddr = decoder.HEAP32[(result + 4) >> 2];
+    const output = new Uint8Array(outputLength);
+    const outputSrc = new Uint8Array(decoder.HEAP8.buffer);
+    output.set(outputSrc.slice(outputAddr, outputAddr + outputLength));
+    decoder._jxlCleanup(result);
+    const response = {uid: job.uid, data: output, msg: msg};
+    postMessage(response, [output.buffer]);
+  }
+};
+
+onmessage = function(event) {
+  const data = event.data;
+  console.log('CW received: ' + data.op);
+  if (data.op === 'decodeJxl') {
+    let job = null;
+    for (let i = 0; i < jobs.length; ++i) {
+      if (jobs[i].uid === data.uid) {
+        job = jobs[i];
+        break;
+      }
+    }
+    if (!job) {
+      job = {uid: data.uid, input: [], inputComplete: false, url: data.url};
+      jobs.push(job);
+    }
+    if (data.data) {
+      job.input.push(data.data);
+    } else {
+      job.inputComplete = true;
+    }
+    processJobs();
+  }
+};
+
+const onLoadJxlModule = (instance) => {
+  decoder = instance;
+  processJobs();
+};
+
+importScripts('jxl_decoder.js');
+const config = {
+  mainScriptUrlOrBlob: 'https://jxl-demo.netlify.app/jxl_decoder.js',
+  INITIAL_MEMORY: 16 * 1024 * 1024,
+};
+JxlDecoderModule(config).then(onLoadJxlModule);
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.cc b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.cc
new file mode 100644
index 0000000000..755c0b7901
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.cc
@@ -0,0 +1,227 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/wasm_demo/jxl_decoder.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <cstring>
+#include <memory>
+#include <vector>
+
+extern "C" {
+
+namespace {
+
+struct DecoderInstancePrivate {
+  // Due to "Standard Layout" rules it is guaranteed that address of the entity
+  // and its first non-static member are the same.
+  DecoderInstance info;
+
+  size_t pixels_size = 0;
+  bool want_sdr;
+  uint32_t display_nits;
+  JxlPixelFormat format;
+  JxlDecoderPtr decoder;
+  JxlThreadParallelRunnerPtr thread_pool;
+
+  std::vector<uint8_t> tail;
+};
+
+}  // namespace
+
+DecoderInstance* jxlCreateInstance(bool want_sdr, uint32_t display_nits) {
+  DecoderInstancePrivate* self = new DecoderInstancePrivate();
+
+  if (!self) {
+    return nullptr;
+  }
+
+  self->want_sdr = want_sdr;
+  self->display_nits = display_nits;
+  JxlDataType storageFormat = want_sdr ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
+  self->format = {4, storageFormat, JXL_NATIVE_ENDIAN, 0};
+  self->decoder = JxlDecoderMake(nullptr);
+
+  JxlDecoder* dec = self->decoder.get();
+
+  auto report_error = [&](uint32_t code, const char* text) {
+    fprintf(stderr, "%s\n", text);
+    delete self;
+    return reinterpret_cast<DecoderInstance*>(code);
+  };
+
+  self->thread_pool = JxlThreadParallelRunnerMake(nullptr, 4);
+  void* runner = self->thread_pool.get();
+
+  auto status =
+      JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner);
+
+  if (status != JXL_DEC_SUCCESS) {
+    return report_error(1, "JxlDecoderSetParallelRunner failed");
+  }
+
+  status = JxlDecoderSubscribeEvents(
+      dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE |
+               JXL_DEC_FRAME_PROGRESSION);
+  if (JXL_DEC_SUCCESS != status) {
+    return report_error(2, "JxlDecoderSubscribeEvents failed");
+  }
+
+  status = JxlDecoderSetProgressiveDetail(dec, kPasses);
+  if (JXL_DEC_SUCCESS != status) {
+    return report_error(3, "JxlDecoderSetProgressiveDetail failed");
+  }
+  return &self->info;
+}
+
+void jxlDestroyInstance(DecoderInstance* instance) {
+  if (instance == nullptr) return;
+  DecoderInstancePrivate* self =
+      reinterpret_cast<DecoderInstancePrivate*>(instance);
+  if (instance->pixels) {
+    free(instance->pixels);
+  }
+  delete self;
+}
+
+uint32_t jxlProcessInput(DecoderInstance* instance, const uint8_t* input,
+                         size_t input_size) {
+  if (instance == nullptr) return static_cast<uint32_t>(-1);
+  DecoderInstancePrivate* self =
+      reinterpret_cast<DecoderInstancePrivate*>(instance);
+  JxlDecoder* dec = self->decoder.get();
+
+  auto report_error = [&](int code, const char* text) {
+    fprintf(stderr, "%s\n", text);
+    return static_cast<uint32_t>(code);
+  };
+
+  std::vector<uint8_t>& tail = self->tail;
+  if (!tail.empty()) {
+    tail.reserve(tail.size() + input_size);
+    tail.insert(tail.end(), input, input + input_size);
+    input = tail.data();
+    input_size = tail.size();
+  }
+
+  auto status = JxlDecoderSetInput(dec, input, input_size);
+  if (JXL_DEC_SUCCESS != status) {
+    return report_error(-2, "JxlDecoderSetInput failed");
+  }
+
+  auto release_input = [&]() {
+    size_t unused_input = JxlDecoderReleaseInput(dec);
+    if (unused_input == 0) {
+      tail.clear();
+      return;
+    }
+    if (tail.empty()) {
+      tail.insert(tail.end(), input + input_size - unused_input,
+                  input + input_size);
+    } else {
+      memmove(tail.data(), tail.data() + tail.size() - unused_input,
+              unused_input);
+      tail.resize(unused_input);
+    }
+  };
+
+  while (true) {
+    status = JxlDecoderProcessInput(dec);
+    if (JXL_DEC_SUCCESS == status) {
+      release_input();
+      return 0;  // ¯\_(ツ)_/¯
+    } else if (JXL_DEC_FRAME_PROGRESSION == status) {
+      release_input();
+      return 1;  // ready to flush; client will decide whether it is necessary
+    } else if (JXL_DEC_NEED_MORE_INPUT == status) {
+      release_input();
+      return 2;
+    } else if (JXL_DEC_FULL_IMAGE == status) {
+      release_input();
+      return 0;  // final image is ready
+    } else if (JXL_DEC_BASIC_INFO == status) {
+      JxlBasicInfo info;
+      status = JxlDecoderGetBasicInfo(dec, &info);
+      if (status != JXL_DEC_SUCCESS) {
+        release_input();
+        return report_error(-4, "JxlDecoderGetBasicInfo failed");
+      }
+      instance->width = info.xsize;
+      instance->height = info.ysize;
+      status =
+          JxlDecoderImageOutBufferSize(dec, &self->format, &self->pixels_size);
+      if (status != JXL_DEC_SUCCESS) {
+        release_input();
+        return report_error(-6, "JxlDecoderImageOutBufferSize failed");
+      }
+      if (instance->pixels) {
+        release_input();
+        return report_error(-7, "Tried to realloc pixels");
+      }
+      instance->pixels = reinterpret_cast<uint8_t*>(malloc(self->pixels_size));
+    } else if (JXL_DEC_NEED_IMAGE_OUT_BUFFER == status) {
+      if (!self->info.pixels) {
+        release_input();
+        return report_error(-8, "Out buffer not allocated");
+      }
+      status = JxlDecoderSetImageOutBuffer(dec, &self->format, instance->pixels,
+                                           self->pixels_size);
+      if (status != JXL_DEC_SUCCESS) {
+        release_input();
+        return report_error(-9, "JxlDecoderSetImageOutBuffer failed");
+      }
+    } else if (JXL_DEC_COLOR_ENCODING == status) {
+      JxlColorEncoding color_encoding;
+      color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+      color_encoding.white_point = JXL_WHITE_POINT_D65;
+      color_encoding.primaries =
+          self->want_sdr ? JXL_PRIMARIES_SRGB : JXL_PRIMARIES_2100;
+      color_encoding.transfer_function = self->want_sdr
+                                             ? JXL_TRANSFER_FUNCTION_SRGB
+                                             : JXL_TRANSFER_FUNCTION_PQ;
+      color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL;
+      status = JxlDecoderSetPreferredColorProfile(dec, &color_encoding);
+      if (status != JXL_DEC_SUCCESS) {
+        release_input();
+        return report_error(-5, "JxlDecoderSetPreferredColorProfile failed");
+      }
+    } else {
+      release_input();
+      return report_error(-3, "Unexpected decoder status");
+    }
+  }
+
+  release_input();
+  return 0;
+}
+
+uint32_t jxlFlush(DecoderInstance* instance) {
+  if (instance == nullptr) return static_cast<uint32_t>(-1);
+  DecoderInstancePrivate* self =
+      reinterpret_cast<DecoderInstancePrivate*>(instance);
+  JxlDecoder* dec = self->decoder.get();
+
+  auto report_error = [&](int code, const char* text) {
+    fprintf(stderr, "%s\n", text);
+    // self->result = code;
+    return static_cast<uint32_t>(code);
+  };
+
+  if (!instance->pixels) {
+    return report_error(-2, "Not ready to flush");
+  }
+
+  auto status = JxlDecoderFlushImage(dec);
+  if (status != JXL_DEC_SUCCESS) {
+    return report_error(-3, "Failed to flush");
+  }
+
+  return 0;
+}
+
+}  // extern "C"
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.h b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.h
new file mode 100644
index 0000000000..ad6d88e082
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_WASM_DEMO_JXL_DECODER_H_
+#define TOOLS_WASM_DEMO_JXL_DECODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern "C" {
+
+typedef struct DecoderInstance {
+  uint32_t width = 0;
+  uint32_t height = 0;
+  uint8_t* pixels = nullptr;
+
+  // The rest is opaque.
+} DecoderInstance;
+
+/*
+  Returns (as uint32_t):
+    0 - OOM
+    1 - JxlDecoderSetParallelRunner failed
+    2 - JxlDecoderSubscribeEvents failed
+    3 - JxlDecoderSetProgressiveDetail failed
+    >=4 - OK
+ */
+DecoderInstance* jxlCreateInstance(bool want_sdr, uint32_t display_nits);
+
+void jxlDestroyInstance(DecoderInstance* instance);
+
+/*
+  Returns (as uint32_t):
+    0 - OK (pixels are ready)
+    1 - ready to flush
+    2 - needs more input
+    >=3 - error
+ */
+uint32_t jxlProcessInput(DecoderInstance* instance, const uint8_t* input,
+                         size_t input_size);
+
+uint32_t jxlFlush(DecoderInstance* instance);
+
+}  // extern "C"
+
+#endif  // TOOLS_WASM_DEMO_JXL_DECODER_H_
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder_test.js b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder_test.js
new file mode 100644
index 0000000000..22dfa0724c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decoder_test.js
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+function assertTrue(ok, msg) {
+  if (!ok) {
+    console.log('FAIL: ' + msg);
+    process.exit(1);
+  }
+}
+
+function runTest(testFn) {
+  console.log('Running ' + testFn.name);
+  testFn();
+  console.log('PASS');
+}
+
+let jxlModule;
+
+const isAddress = (v) => {
+  return (v >= 4) && ((v & (1 << 31)) === 0);
+};
+
+let splinesJxl = new Uint8Array([
+  0xff, 0x0a, 0xf8, 0x19, 0x10, 0x09, 0xd8, 0x63, 0x10, 0x00, 0xbc, 0x00,
+  0xa6, 0x19, 0x4a, 0xa3, 0x56, 0x8c, 0x94, 0x62, 0x24, 0x7d, 0x12, 0x72,
+  0x87, 0x00, 0x00, 0xda, 0xd4, 0xc9, 0xc1, 0xe2, 0x9e, 0x02, 0xb9, 0x37,
+  0x00, 0xfe, 0x07, 0x9a, 0x91, 0x08, 0xcd, 0xbf, 0xa1, 0xdc, 0x71, 0x36,
+  0x62, 0xc8, 0x97, 0x31, 0xc4, 0x3e, 0x58, 0x02, 0xc1, 0x01, 0x00
+]);
+
+let crossJxl = new Uint8Array([
+  0xff, 0x0a, 0x98, 0x10, 0x10, 0x50, 0x5c, 0x08, 0x08, 0x02, 0x01,
+  0x00, 0x98, 0x00, 0x4b, 0x18, 0x8b, 0x15, 0x00, 0xd4, 0x92, 0x62,
+  0xcc, 0x98, 0x91, 0x17, 0x08, 0x01, 0xe0, 0x92, 0xbc, 0x7e, 0xdf,
+  0xbf, 0xff, 0x50, 0xc0, 0x64, 0x35, 0xb0, 0x40, 0x1e, 0x24, 0xa9,
+  0xac, 0x38, 0xd9, 0x13, 0x1e, 0x85, 0x4a, 0x0d
+]);
+
+function testSdr() {
+  let decoder = jxlModule._jxlCreateInstance(
+      /* wantSdr */ true, /* displayNits */ 100);
+  assertTrue(isAddress(decoder), 'create decoder instance');
+  let encoded = splinesJxl;
+  let buffer = jxlModule._malloc(encoded.length);
+  jxlModule.HEAP8.set(encoded, buffer);
+
+  let result = jxlModule._jxlProcessInput(decoder, buffer, encoded.length);
+  assertTrue(result === 0, 'process input');
+
+  let w = jxlModule.HEAP32[decoder >> 2];
+  let h = jxlModule.HEAP32[(decoder + 4) >> 2];
+  let pixelData = jxlModule.HEAP32[(decoder + 8) >> 2];
+
+  assertTrue(pixelData, 'output allocated');
+  assertTrue(h === 320, 'output height');
+  assertTrue(w === 320, 'output width ');
+
+  jxlModule._jxlDestroyInstance(decoder);
+  jxlModule._free(buffer);
+}
+
+function testRegular() {
+  let decoder = jxlModule._jxlCreateInstance(
+      /* wantSdr */ false, /* displayNits */ 100);
+  assertTrue(isAddress(decoder), 'create decoder instance');
+  let encoded = splinesJxl;
+  let buffer = jxlModule._malloc(encoded.length);
+  jxlModule.HEAP8.set(encoded, buffer);
+
+  let result = jxlModule._jxlProcessInput(decoder, buffer, encoded.length);
+  assertTrue(result === 0, 'process input');
+
+  let w = jxlModule.HEAP32[decoder >> 2];
+  let h = jxlModule.HEAP32[(decoder + 4) >> 2];
+  let pixelData = jxlModule.HEAP32[(decoder + 8) >> 2];
+
+  assertTrue(pixelData, 'output allocated');
+  assertTrue(h === 320, 'output height');
+  assertTrue(w === 320, 'output width ');
+
+  jxlModule._jxlDestroyInstance(decoder);
+  jxlModule._free(buffer);
+}
+
+function testChunks() {
+  let decoder = jxlModule._jxlCreateInstance(
+      /* wantSdr */ false, /* displayNits */ 100);
+  assertTrue(isAddress(decoder), 'create decoder instance');
+  let encoded = splinesJxl;
+  let buffer = jxlModule._malloc(encoded.length);
+  jxlModule.HEAP8.set(encoded, buffer);
+
+  let part1_length = encoded.length >> 1;
+  let part2_length = encoded.length - part1_length;
+
+  let result = jxlModule._jxlProcessInput(decoder, buffer, part1_length);
+  assertTrue(result === 2, 'process first part');
+
+  result =
+      jxlModule._jxlProcessInput(decoder, buffer + part1_length, part2_length);
+  assertTrue(result === 0, 'process second part');
+
+  let w = jxlModule.HEAP32[decoder >> 2];
+  let h = jxlModule.HEAP32[(decoder + 4) >> 2];
+  let pixelData = jxlModule.HEAP32[(decoder + 8) >> 2];
+
+  assertTrue(pixelData, 'output allocated');
+  assertTrue(h === 320, 'output height');
+  assertTrue(w === 320, 'output width ');
+
+  jxlModule._jxlDestroyInstance(decoder);
+  jxlModule._free(buffer);
+}
+
+function testDecompress() {
+  let encoded = crossJxl;
+  let buffer = jxlModule._malloc(encoded.length);
+  jxlModule.HEAP8.set(encoded, buffer);
+
+  let output = jxlModule._jxlDecompress(buffer, encoded.length);
+  assertTrue(isAddress(output), 'decompress');
+
+  jxlModule._free(buffer);
+
+  let pngSize = jxlModule.HEAP32[output >> 2];
+  let px = 20 * 20;
+  assertTrue(pngSize >= 6 * px, 'png size');
+  assertTrue(pngSize <= 6 * px + 800, 'png size');
+
+  jxlModule._jxlCleanup(output);
+}
+
+require('jxl_decoder_for_test.js')().then(module => {
+  jxlModule = module;
+  let tests = [testSdr, testRegular, testChunks, testDecompress];
+  tests.forEach(runTest);
+  process.exit(0);
+});
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.cc b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.cc
new file mode 100644
index 0000000000..648e1ef9f5
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.cc
@@ -0,0 +1,117 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/wasm_demo/jxl_decompressor.h"
+
+#include <jxl/thread_parallel_runner_cxx.h>
+
+#include <cstring>
+#include <memory>
+
+#include "lib/extras/dec/jxl.h"
+#include "tools/wasm_demo/no_png.h"
+
+extern "C" {
+
+namespace {
+
+struct DecompressorOutputPrivate {
+  // Due to "Standard Layout" rules it is guaranteed that address of the entity
+  // and its first non-static member are the same.
+  DecompressorOutput output;
+};
+
+void MaybeMakeCicp(const jxl::extras::PackedPixelFile& ppf,
+                   std::vector<uint8_t>* cicp) {
+  cicp->clear();
+  const JxlColorEncoding& clr = ppf.color_encoding;
+  uint8_t color_primaries = 0;
+  uint8_t transfer_function = static_cast<uint8_t>(clr.transfer_function);
+
+  if (clr.color_space != JXL_COLOR_SPACE_RGB) {
+    return;
+  }
+  if (clr.primaries == JXL_PRIMARIES_P3) {
+    if (clr.white_point == JXL_WHITE_POINT_D65) {
+      color_primaries = 12;
+    } else if (clr.white_point == JXL_WHITE_POINT_DCI) {
+      color_primaries = 11;
+    } else {
+      return;
+    }
+  } else if (clr.primaries != JXL_PRIMARIES_CUSTOM &&
+             clr.white_point == JXL_WHITE_POINT_D65) {
+    color_primaries = static_cast<uint8_t>(clr.primaries);
+  } else {
+    return;
+  }
+  if (clr.transfer_function == JXL_TRANSFER_FUNCTION_UNKNOWN ||
+      clr.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+    return;
+  }
+
+  cicp->resize(4);
+  cicp->at(0) = color_primaries;    // Colour Primaries
+  cicp->at(1) = transfer_function;  // Transfer Function
+  cicp->at(2) = 0;                  // Matrix Coefficients
+  cicp->at(3) = 1;                  // Video Full Range Flag
+}
+
+}  // namespace
+
+DecompressorOutput* jxlDecompress(const uint8_t* input, size_t input_size) {
+  DecompressorOutputPrivate* self = new DecompressorOutputPrivate();
+
+  if (!self) {
+    return nullptr;
+  }
+
+  auto report_error = [&](uint32_t code, const char* text) {
+    fprintf(stderr, "%s\n", text);
+    delete self;
+    return reinterpret_cast<DecompressorOutput*>(code);
+  };
+
+  auto thread_pool = JxlThreadParallelRunnerMake(nullptr, 4);
+  void* runner = thread_pool.get();
+
+  jxl::extras::JXLDecompressParams dparams;
+  JxlPixelFormat format = {/* num_channels */ 3, JXL_TYPE_UINT16,
+                           JXL_BIG_ENDIAN, /* align */ 0};
+  dparams.accepted_formats.push_back(format);
+  dparams.runner = JxlThreadParallelRunner;
+  dparams.runner_opaque = runner;
+  jxl::extras::PackedPixelFile ppf;
+
+  if (!jxl::extras::DecodeImageJXL(input, input_size, dparams, nullptr, &ppf)) {
+    return report_error(1, "failed to decode jxl");
+  }
+
+  // Just 1-st frame.
+  const auto& image = ppf.frames[0].color;
+  std::vector<uint8_t> cicp;
+  MaybeMakeCicp(ppf, &cicp);
+  self->output.data = WrapPixelsToPng(
+      image.xsize, image.ysize, (format.data_type == JXL_TYPE_UINT16) ? 16 : 8,
+      /* has_alpha */ false, reinterpret_cast<const uint8_t*>(image.pixels()),
+      ppf.icc, cicp, &self->output.size);
+  if (!self->output.data) {
+    return report_error(2, "failed to encode png");
+  }
+
+  return &self->output;
+}
+
+void jxlCleanup(DecompressorOutput* output) {
+  if (output == nullptr) return;
+  DecompressorOutputPrivate* self =
+      reinterpret_cast<DecompressorOutputPrivate*>(output);
+  if (self->output.data) {
+    free(self->output.data);
+  }
+  delete self;
+}
+
+}  // extern "C"
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.h b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.h
new file mode 100644
index 0000000000..2ba16a0e6a
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/jxl_decompressor.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_WASM_DEMO_JXL_DECOMPRESSOR_H_
+#define TOOLS_WASM_DEMO_JXL_DECOMPRESSOR_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern "C" {
+
+typedef struct DecompressorOutput {
+  uint32_t size = 0;
+  uint8_t* data = nullptr;
+
+  // The rest is opaque.
+} DecompressorOutput;
+
+/*
+  Returns (as uint32_t):
+    0 - OOM
+    1 - decoding JXL failed
+    2 - encoding PNG failed
+    >=4 - OK
+ */
+DecompressorOutput* jxlDecompress(const uint8_t* input, size_t input_size);
+
+void jxlCleanup(DecompressorOutput* output);
+
+}  // extern "C"
+
+#endif  // TOOLS_WASM_DEMO_JXL_DECOMPRESSOR_H_
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/manual_decode_demo.html b/third-party/libjxl/libjxl/tools/wasm_demo/manual_decode_demo.html
new file mode 100644
index 0000000000..cfc52fb659
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/manual_decode_demo.html
@@ -0,0 +1,340 @@
+<html>
+<head>
+  <link rel="icon" type="image/x-icon" href="favicon.ico">
+  <style>
+#log p {
+  margin: 0;
+}
+  </style>
+</head>
+<body>
+<div id="log" style="padding:2px; border: solid 1px #000; background-color: #ccc; margin:2px; height: 8em; font-family: monospace; overflow-y: auto; font-size: 8px;"></div>
+<script>
+// WASM module.
+let jxlModule = null;
+// Flag; if true, then HDR color space / 16 bit output is supported.
+let hdrCanvas = false;
+
+// Add message to "console".
+let addMessage = (text, color) => {
+  let log = document.getElementById('log');
+  let message = document.createElement('p');
+  message.style = 'color: ' + color + ';';
+  message.textContent = text;
+  log.append(message);
+  log.scrollTop = log.scrollHeight;
+}
+
+// Callback from WASM module when it becomes available.
+let onLoadJxlModule = (module) => {
+  jxlModule = module;
+  addMessage('WASM module loaded', 'black');
+  onJxlModuleReady();
+};
+
+// Check if multi-threading is supported (i.e. SharedArrayBuffer is allowed).
+let probeMutlithreading = () => {
+  try {
+    new SharedArrayBuffer();
+    return true;
+  } catch (ex) {
+    addMessage('Installing Service Worker, please wait...', 'orange');
+    return false;
+  }
+};
+
+// Check if HDR features are enabled.
+let probeHdr = () => {
+  addMessage('Probing HDR features', 'black');
+  try {
+    let tmpCanvas = document.createElement('canvas');
+    tmpCanvas.width = 1;
+    tmpCanvas.height = 1;
+    let ctx = tmpCanvas.getContext('2d', {colorSpace: 'rec2100-pq', pixelFormat: 'float16'});
+    // make it fail on firefox...
+    ctx.getContextAttributes();
+    addMessage('HDR canvas supported', 'green');
+    return true;
+  } catch (ex) {
+    addMessage(ex, 'red');
+    addMessage('Are Blink experiments enabled? about://flags/#enable-experimental-web-platform-features', 'blue');
+    return false;
+  }
+};
+
+// "main" method executed after page is loaded; all scripts are "synchronous" elements,
+// so it is guaranted that script elements are loaded and executed.
+let onDomContentLoaded = () => {
+  if (!probeMutlithreading()) return;
+  hdrCanvas = probeHdr();
+  JxlDecoderModule().then(onLoadJxlModule);
+};
+
+// Pass next chunk to decoder and interprets result.
+let processInput = (img, chunkLen) => {
+  let response = {
+    wantFlush: false,
+    copyPixels: false,
+    error: false,
+  }
+  do {
+    let t0 = performance.now();
+    let result = jxlModule._jxlProcessInput(img.decoder, img.buffer, chunkLen);
+    let t1 = performance.now();
+    let tProcessing = t1 - t0;
+    // addMessage('Processed chunk in ' + tProcessing + 'ms', 'blue');
+    img.totalProcessing += tProcessing;
+    // addMessage('Process result: ' + result, 'green');
+    if (result === 2) {
+      addMessage('Needs more input', 'gray');
+    } else if (result === 0) {
+      // addMessage('Image ready', 'gray');
+      response.wantFlush = false;
+      response.copyPixels = true;
+    } else if (result === 1) {
+      if (img.wantProgressive) {
+        addMessage('DC ready', 'gray');
+        response.wantFlush = true;
+        response.copyPixels = true;
+      } else {
+        // addMessage('Skipping DC flush', 'gray');
+        chunkLen = 0;
+        continue;
+      }
+    } else {
+      addMessage('Processing error', 'red');
+      img.broken = true;
+      response.error = true;
+      break;
+    }
+    break;
+  } while (true);
+  return response;
+}
+
+// Decode chunk and present results (dump to canvas).
+let processChunk = (img, chunkLen) => {
+  let result = processInput(img, chunkLen);
+  if (result.error) return;
+
+  if (result.wantFlush) {
+    let t2 = performance.now();
+    let flushResult = jxlModule._jxlFlush(img.decoder);
+    let t3 = performance.now();
+    let tFlushing = t3 - t2;
+    addMessage('Flush result: ' + flushResult, 'gray');
+    img.totalFlushing += tFlushing;
+  }
+
+  if (!result.copyPixels) return;
+
+  let w = jxlModule.HEAP32[img.decoder >> 2];
+  let h = jxlModule.HEAP32[(img.decoder + 4) >> 2];
+  let pixelData = jxlModule.HEAP32[(img.decoder + 8) >> 2];
+  if (!img.canvas) {
+    img.canvas = document.createElement('canvas');
+    img.canvas.width = w;
+    img.canvas.height = h;
+    img.canvas.style = 'width:100%';
+    // TODO: postpone until really flushed
+    document.body.appendChild(img.canvas);
+    let ctxOptions = {colorSpace: img.colorSpace, pixelFormat: 'float16'};
+    let pixelOptions = {colorSpace: img.colorSpace, storageFormat: 'uint16'};
+    if (img.wantSdr) {
+      ctxOptions = null;
+      pixelOptions = null;
+    }
+    img.canvasCtx = img.canvas.getContext('2d', ctxOptions);
+    img.pixels = img.canvasCtx.getImageData(0, 0, w, h, pixelOptions);
+  }
+
+  let src = null;
+  let start = pixelData;
+  if (img.wantSdr) {
+    src = new Uint8Array(jxlModule.HEAP8.buffer);
+  } else {
+    src = new Uint16Array(jxlModule.HEAP8.buffer);
+    start = start >> 1;
+  }
+  let end = start + w * h * 4;
+  img.pixels.data.set(src.slice(start, end));
+  img.canvasCtx.putImageData(img.pixels, 0, 0);
+};
+
+const BUF_LEN = 150 * 1024;
+
+// Image data cache for benchmarking.
+let fullImage = new Uint8Array(0);
+
+// Callback for fetch data.
+let onChunk = (img, chunk) => {
+  if (chunk.done) {
+    addMessage('Read finished | total processing: ' + img.totalProcessing.toFixed(1) + 'ms | total flushing ' + img.totalFlushing.toFixed(1) + 'ms', 'black');
+    cleanup(img);
+    img.onComplete(img);
+    return;
+  }
+  if (img.broken) return;
+
+  if (!img.decoder) {
+    let decoder = jxlModule._jxlCreateInstance(img.wantSdr, img.displayNits);
+    if (decoder < 4) {
+      img.broken = true;
+      cleanup(img);
+      addMessage('Failed to create decoder instance', 'red');
+      return;
+    }
+    img.decoder = decoder;
+    img.buffer = jxlModule._malloc(BUF_LEN);
+  }
+
+  // addMessage('Received chunk: ' + chunk.value.length, 'gray');
+  let newFullImage = new Uint8Array(fullImage.length + chunk.value.length);
+  newFullImage.set(fullImage);
+  newFullImage.set(chunk.value, fullImage.length);
+  fullImage = newFullImage;
+
+  let offset = 0;
+  while (offset < chunk.value.length) {
+    let delta = chunk.value.length - offset;
+    if (delta > BUF_LEN) delta = BUF_LEN;
+    jxlModule.HEAP8.set(chunk.value.slice(offset, offset + delta), img.buffer);
+    offset += delta;
+    processChunk(img, delta);
+    if (img.broken) {
+      return;
+    }
+  }
+
+  // Break the promise chain.
+  setTimeout(img.proceed, 0);
+};
+
+// Read next chunk; NB: used to break promise chain.
+let proceed = (img) => {
+  img.reader.read().then(img.onChunk, img.onReadError);
+};
+
+// Release (in-module) memory resources.
+let cleanup = (img) => {
+  if (img.decoder) {
+    jxlModule._jxlDestroyInstance(img.decoder);
+    img.decoder = 0;
+  }
+  if (img.buffer) {
+    jxlModule._free(img.buffer);
+    img.buffer = 0;
+  }
+};
+
+// Report error and cleanup.
+let onReadError = (img, error) => {
+  img.broken = true;
+  cleanup(img);
+  addMessage('Read failed: ' + error, 'red');
+};
+
+// On successful fetch start.
+let onResponse = (img, response) => {
+  if (!response.ok) {
+    addMessage('Fetch failed: ' + response.status + ' (' + response.statusText + ')');
+    return;
+  }
+  // Alas, not supported by fetch:
+  // let reader = response.body.getReader({mode: "byob"});
+  img.reader = response.body.getReader();
+
+  img.proceed();
+};
+
+// On image decoding completion.
+let onComplete = (img) => {
+  if (!img.runBenchmark) return;
+
+  let buffer = jxlModule._malloc(fullImage.length);
+  jxlModule.HEAP8.set(fullImage, buffer);
+  img.buffer = buffer;
+  let results = [];
+
+  for (let i = 0; i < img.runBenchmark; ++i) {
+    img.totalProcessing = 0;
+    img.decoder = jxlModule._jxlCreateInstance(img.wantSdr, img.displayNits);
+    processChunk(img, fullImage.length);
+    jxlModule._jxlDestroyInstance(img.decoder);
+    results.push(img.totalProcessing);
+    //addMessage('Decoding time: ' + img.totalProcessing + 'ms', 'black');
+  }
+
+  results.sort();
+  addMessage('Min decoding time: ' + results[0].toFixed(3) + 'ms', 'black');
+  addMessage('Median decoding time: ' + results[results.length >> 1].toFixed(3) + 'ms', 'black');
+  addMessage('Max decoding time: ' + results[results.length - 1].toFixed(3) + 'ms', 'black');
+
+  jxlModule._free(buffer);
+};
+
+// Fill cookie object template.
+let makeImg = () => {
+  return {
+    name: '',
+    colorSpace: 'rec2100-pq',
+    wantSdr: false,
+    displayNits: 100,
+    broken: false,
+    decoder: 0,
+    canvas: null,
+    canvasCtx: null,
+    pixels: null,
+    buffer: 0,
+    wantProgressive: false,
+    onlyDecode: false,
+    totalProcessing: 0,
+    totalFlushing: 0,
+    runBenchmark: 0,
+    onChunk: () => {},
+    onReadError: () => {},
+    proceed: () => {},
+    onComplete: () => {},
+  };
+}
+
+// Parse URL query and run image decoding / benchmarking.
+let onJxlModuleReady = () => {
+  let params = (new URL(document.location)).searchParams;
+  const images = ['image00.jxl', 'image01.jxl'];
+  let imgIdx = (params.get('img') | 0) % images.length;
+  let imgName = images[imgIdx];
+
+  let colorSpace = params.get('colorSpace') || 'srgb';
+  let wantSdr = params.get('wantSdr') == 'true';
+  let displayNits = parseInt(params.get('displayNits') || '0');
+  let runBenchmark = parseInt(params.get('runBenchmark') || '0');
+
+  if (!hdrCanvas) {
+    colorSpace = 'srgb-linear';
+    displayNits = displayNits || 100;
+    wantSdr = true;
+  }
+
+  addMessage('Color-space: "' + colorSpace + '", tone-map to SDR: ' + wantSdr + ', displayNits: ' + (displayNits || 'n/a'), 'black');
+
+  let img = makeImg();
+  img.name = imgName;
+  img.colorSpace = colorSpace;
+  img.wantSdr = wantSdr;
+  img.displayNits = displayNits;
+  img.onChunk = onChunk.bind(null, img);
+  img.onReadError = onReadError.bind(null, img);
+  img.proceed = proceed.bind(null, img);
+  img.onComplete = onComplete.bind(null, img);
+  img.runBenchmark = runBenchmark;
+
+  fetch(new Request(imgName, {cache: "no-store"})).then(onResponse.bind(null, img));
+};
+
+document.addEventListener('DOMContentLoaded', onDomContentLoaded);
+</script>
+
+<script src="jxl_decoder.js"></script>
+</body>
+</html>
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/netlify.toml b/third-party/libjxl/libjxl/tools/wasm_demo/netlify.toml
new file mode 100644
index 0000000000..44d9d5697d
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/netlify.toml
@@ -0,0 +1,19 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# We use "edge functions" feature to substitute response with pre-compressed
+# entries whenever those are available and browser supports Brotli or Gzip
+# content-encoding.
+[[edge_functions]]
+path = "/*"
+function = "precompressed"
+
+# Request browser "site-isolation" enabled.
+# This allows using "SharedArrayBuffers" required for multi-threaded WASM.
+[[headers]]
+for = "/*"
+  [headers.values]
+    Cross-Origin-Opener-Policy = "same-origin"
+    Cross-Origin-Embedder-Policy = "require-corp"
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/netlify/edge-functions/precompressed.ts b/third-party/libjxl/libjxl/tools/wasm_demo/netlify/edge-functions/precompressed.ts
new file mode 100644
index 0000000000..c169432b6c
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/netlify/edge-functions/precompressed.ts
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+import type {Context} from 'netlify:edge';
+
+// This lambda is executed whenever request URL matches.
+export default async (request: Request, context: Context) => {
+  // Measure time for debugging purpose.
+  let t0 = Date.now();
+  // Get resource path (i.e. ignore query parameters).
+  let url = request.url.split('?')[0];
+  // Pick request headers; fallback to empty string if header is not set.
+  let acceptEncodingHeader = request.headers.get('Accept-Encoding') || '';
+  let acceptHeader = request.headers.get('Accept') || '';
+  let etag = request.headers.get('If-None-Match') || '';
+  // Roughly parse encodings list; this ignores "quality"; no modern browsers
+  // use it -> don't care.
+  let splitter = /[,;]/;
+  let supportedEncodings =
+      acceptEncodingHeader.split(splitter).map(v => v.trimStart());
+  let supportsBr = supportedEncodings.includes('br');
+  let supportedMedia = acceptHeader.split(splitter).map(v => v.trimStart());
+  let supportsJxl = supportedMedia.includes('image/jxl');
+  // Dump basic request info (we care about).
+  context.log(
+      'URL: ' + url + '; acceptEncodingHeader: ' + acceptEncodingHeader +
+      '; supportsBr: ' + supportsBr + '; supportsJxl: ' + supportsJxl +
+      '; etag: ' + etag);
+
+  // If browser does not support Brotli/Jxl - just process request normally.
+
+  if (!supportsBr && !supportsJxl) {
+    return;
+  }
+
+  // Jxl processing is higher priority, because images are (usually) transferred
+  // with 'identity' content encoding.
+  let isJxlWorkflow = supportsJxl;
+  let suffix = isJxlWorkflow ? '.jxl' : '.br';
+
+  // Request pre-compressed resource (with a suffix).
+  let response = await context.rewrite(url + suffix);
+  context.log('Response status: ' + response.status);
+  // First latency checkpoint (as we synchronously wait for resource fetch).
+  let t1 = Date.now();
+  // If pre-compressed resource does not exist - pass.
+  if (response.status == 404) {
+    return;
+  }
+  // Get resource ETag.
+  let responseEtag = response.headers.get('ETag') || '';
+  context.log('Response etag: ' + responseEtag);
+  // We rely on platform to check ETag; add debugging info just in case.
+  if (etag.length >= 4 && responseEtag === etag) {
+    console.log('Match; status: ' + response.status);
+  }
+  // Status 200 is regular "OK" - fetch resource; in such a case we need to
+  // craft response with the response contents.
+  // Status 3xx likely means "use cache"; pass response as is.
+  // Status 4xx is unlikely (404 has been already processed).
+  // Status 5xx is server error - nothing we could do around it.
+  if (response.status != 200) return response;
+  // Second time consuming operation - wait for resource contents.
+  let data = await response.arrayBuffer();
+  let fixedHeaders = new Headers(response.headers);
+
+  if (isJxlWorkflow) {
+    fixedHeaders.set('Content-Type', 'image/jxl');
+  } else {  // is Brotli workflow
+    // Set "Content-Type" based on resource suffix;
+    // otherwise browser will complain.
+    let contentEncoding = 'text/html; charset=UTF-8';
+    if (url.endsWith('.js')) {
+      contentEncoding = 'application/javascript';
+    } else if (url.endsWith('.wasm')) {
+      contentEncoding = 'application/wasm';
+    }
+    fixedHeaders.set('Content-Type', contentEncoding);
+    // Inform browser that data stream is compressed.
+    fixedHeaders.set('Content-Encoding', 'br');
+  }
+  let t2 = Date.now();
+  console.log('Timing: ' + (t1 - t0) + ' ' + (t2 - t1));
+  return new Response(data, {headers: fixedHeaders});
+};
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/no_png.cc b/third-party/libjxl/libjxl/tools/wasm_demo/no_png.cc
new file mode 100644
index 0000000000..95e5b06766
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/no_png.cc
@@ -0,0 +1,220 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/wasm_demo/no_png.h"
+
+#include <array>
+#include <memory>
+
+extern "C" {
+
+namespace {
+
+static std::array<uint32_t, 256> makeCrc32Lut() {
+  std::array<uint32_t, 256> result;
+  for (uint32_t i = 0; i < 256; ++i) {
+    constexpr uint32_t poly = 0xEDB88320;
+    uint32_t v = i;
+    for (size_t i = 0; i < 8; ++i) {
+      uint32_t mask = ~((v & 1) - 1);
+      v = (v >> 1) ^ (poly & mask);
+    }
+    result[i] = v;
+  }
+  return result;
+}
+
+const std::array<uint32_t, 256> kCrc32Lut = makeCrc32Lut();
+
+const std::array<uint32_t, 8> kPngMagic = {137, 80, 78, 71, 13, 10, 26, 10};
+
+// No need to SIMDify it, only small blocks are actually checksummed.
+uint32_t CalculateCrc32(const uint8_t* start, const uint8_t* end) {
+  uint32_t result = ~0;
+  for (const uint8_t* data = start; data < end; ++data) {
+    result ^= *data;
+    result = (result >> 8) ^ kCrc32Lut[result & 0xFF];
+  }
+  return ~result;
+}
+
+void AdlerCopy(const uint8_t* src, uint8_t* dst, size_t length, uint32_t* s1,
+               uint32_t* s2) {
+  // TODO: SIMD-ify and use multithreading.
+
+  // Precondition: s1, s2 normalized; length <= 65535
+  uint32_t a = *s1;
+  uint32_t b = *s2;
+
+  for (size_t i = 0; i < length; ++i) {
+    const uint8_t v = src[i];
+    a += v;
+    b += a;
+    dst[i] = v;
+  }
+
+  // Postcondition: s1, s2 normalized.
+  *s1 = a % 65521;
+  *s2 = b % 65521;
+}
+
+constexpr size_t kMaxDeflateBlock = 65535;
+constexpr uint32_t kIhdrSize = 13;
+constexpr uint32_t kCicpSize = 4;
+
+void WriteU8(uint8_t*& dst, uint8_t value) { *(dst++) = value; }
+
+void WriteU16(uint8_t*& dst, uint16_t value) {
+  memcpy(dst, &value, 2);
+  dst += 2;
+}
+
+void WriteU32(uint8_t*& dst, uint32_t value) {
+  memcpy(dst, &value, 4);
+  dst += 4;
+}
+
+void WriteU32BE(uint8_t*& dst, uint32_t value) {
+  WriteU32(dst, __builtin_bswap32(value));
+}
+
+}  // namespace
+
+uint8_t* WrapPixelsToPng(size_t width, size_t height, size_t bit_depth,
+                         bool has_alpha, const uint8_t* input,
+                         const std::vector<uint8_t>& icc,
+                         const std::vector<uint8_t>& cicp,
+                         uint32_t* output_size) {
+  size_t row_size = width * (bit_depth / 8) * (3 + has_alpha);
+  size_t data_size = height * (row_size + 1);
+  size_t num_deflate_blocks =
+      (data_size + kMaxDeflateBlock - 1) / kMaxDeflateBlock;
+  size_t idat_size = data_size + num_deflate_blocks * 5 + 6;
+  // 64k is enough for everyone
+  bool has_iccp = !icc.empty() && (icc.size() <= kMaxDeflateBlock);
+  size_t iccp_size = 3 + icc.size() + 5 + 6;  // name + data + deflate-wrapping
+  bool has_cicp = (cicp.size() == kCicpSize);
+  size_t total_size = 0;
+  total_size += kPngMagic.size();
+  total_size += 12 + kIhdrSize;
+  total_size += has_cicp ? (kCicpSize + 12) : 0;
+  total_size += has_iccp ? (iccp_size + 12) : 0;
+  total_size += 12 + idat_size;
+  total_size += 12;  // IEND
+
+  uint8_t* output = static_cast<uint8_t*>(malloc(total_size));
+  if (!output) {
+    return nullptr;
+  }
+  uint8_t* dst = output;
+  *output_size = total_size;
+
+  for (size_t i = 0; i < kPngMagic.size(); ++i) {
+    *(dst++) = kPngMagic[i];
+  }
+
+  // IHDR
+  WriteU32BE(dst, kIhdrSize);
+  uint8_t* chunk_start = dst;
+  WriteU32(dst, 0x52444849);
+  WriteU32BE(dst, width);
+  WriteU32BE(dst, height);
+  WriteU8(dst, bit_depth);
+  WriteU8(dst, has_alpha ? 6 : 2);
+  WriteU8(dst, 0);  // compression: deflate
+  WriteU8(dst, 0);  // filters: standard
+  WriteU8(dst, 0);  // interlace: no
+  uint32_t crc32 = CalculateCrc32(chunk_start, dst);
+  WriteU32BE(dst, crc32);
+
+  if (has_cicp) {
+    // cICP
+    WriteU32BE(dst, kCicpSize);
+    uint8_t* chunk_start = dst;
+    WriteU32(dst, 0x50434963);
+    for (size_t i = 0; i < kCicpSize; ++i) {
+      WriteU8(dst, cicp[i]);
+    }
+    uint32_t crc32 = CalculateCrc32(chunk_start, dst);
+    WriteU32BE(dst, crc32);
+  }
+
+  if (has_iccp) {
+    // iCCP
+    WriteU32BE(dst, iccp_size);
+    uint8_t* chunk_start = dst;
+    WriteU32(dst, 0x50434369);
+    WriteU8(dst, '1');   // Profile name
+    WriteU8(dst, 0);     // NUL terminator
+    WriteU8(dst, 0);     // Compression method: deflate
+    WriteU8(dst, 0x08);  // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8))
+    WriteU8(dst, 29);    // FCHECK; (FCHECK + 256* CMF) % 31 = 0
+    uint32_t adler_s1 = 1;
+    uint32_t adler_s2 = 0;
+    WriteU8(dst, 1);  // btype = 00 (uncompressed), last
+    uint16_t block_size = static_cast<uint16_t>(icc.size());
+    WriteU16(dst, block_size);
+    WriteU16(dst, ~block_size);
+    AdlerCopy(icc.data(), dst, block_size, &adler_s1, &adler_s2);
+    dst += block_size;
+    uint32_t adler = (adler_s2 << 8) | adler_s1;
+    WriteU32BE(dst, adler);
+    uint32_t crc32 = CalculateCrc32(chunk_start, dst);
+    WriteU32BE(dst, crc32);
+  }
+
+  // IDAT
+  WriteU32BE(dst, idat_size);
+  WriteU32(dst, 0x54414449);
+  size_t offset = 0;
+  size_t bytes_to_next_row = 0;
+  uint32_t adler_s1 = 1;
+  uint32_t adler_s2 = 0;
+  WriteU8(dst, 0x08);  // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8))
+  WriteU8(dst, 29);    // FCHECK; (FCHECK + 256* CMF) % 31 = 0
+  for (size_t i = 0; i < num_deflate_blocks; ++i) {
+    size_t block_size = data_size - offset;
+    if (block_size > kMaxDeflateBlock) {
+      block_size = kMaxDeflateBlock;
+    }
+    bool is_last = ((i + 1) == num_deflate_blocks);
+    WriteU8(dst, is_last);  // btype = 00 (uncompressed)
+    offset += block_size;
+
+    WriteU16(dst, block_size);
+    WriteU16(dst, ~block_size);
+    while (block_size > 0) {
+      if (bytes_to_next_row == 0) {
+        WriteU8(dst, 0);  // filter: raw
+        adler_s2 += adler_s1;
+        bytes_to_next_row = row_size;
+        block_size--;
+        continue;
+      }
+      size_t bytes_to_copy = std::min(block_size, bytes_to_next_row);
+      AdlerCopy(input, dst, bytes_to_copy, &adler_s1, &adler_s2);
+      dst += bytes_to_copy;
+      input += bytes_to_copy;
+      block_size -= bytes_to_copy;
+      bytes_to_next_row -= bytes_to_copy;
+    }
+  }
+  // Fake Adler works well in Chrome; so let's not waste CPU cycles.
+  uint32_t adler = 0;  // (adler_s2 << 8) | adler_s1;
+  WriteU32BE(dst, adler);
+  WriteU32BE(dst, 0);  // Fake CRC32
+
+  // IEND
+  WriteU32BE(dst, 0);
+  chunk_start = dst;
+  WriteU32(dst, 0x444E4549);
+  // TODO(eustas): this is fixed value; precalculate?
+  crc32 = CalculateCrc32(chunk_start, dst);
+  WriteU32BE(dst, crc32);
+
+  return output;
+}
+
+}  // extern "C"
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/no_png.h b/third-party/libjxl/libjxl/tools/wasm_demo/no_png.h
new file mode 100644
index 0000000000..1486c47942
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/no_png.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_WASM_DEMO_NO_PNG_H_
+#define TOOLS_WASM_DEMO_NO_PNG_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+extern "C" {
+
+uint8_t* WrapPixelsToPng(size_t width, size_t height, size_t bit_depth,
+                         bool has_alpha, const uint8_t* input,
+                         const std::vector<uint8_t>& icc,
+                         const std::vector<uint8_t>& cicp,
+                         uint32_t* output_size);
+
+}  // extern "C"
+
+#endif  // TOOLS_WASM_DEMO_NO_PNG_H_
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo.html b/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo.html
new file mode 100644
index 0000000000..a2966ac651
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo.html
@@ -0,0 +1,20 @@
+<html>
+
+<head>
+  <link rel="icon" type="image/x-icon" href="favicon.ico" />
+  <script src="service_worker.js">
+/*
+ * Just load this script, et voila! It will install ServiceWorker to
+ * advertise image/jxl media type and decode responses.
+ * NB: if "addMessage" function is defined it will be used to report
+ * decoding times / problems.
+ */
+  </script>
+</head>
+
+<body>
+  <img src="image00.jxl" style="width:100%" />
+  <img src="image01.jxl" style="width:100%" />
+</body>
+
+</html>
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo_with_console.html b/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo_with_console.html
new file mode 100644
index 0000000000..e2c52ae1d8
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/one_line_demo_with_console.html
@@ -0,0 +1,34 @@
+<html>
+
+<head>
+  <link rel="icon" type="image/x-icon" href="favicon.ico">
+  <script src="service_worker.js"></script>
+  <style>
+    #log p {
+      margin: 0;
+    }
+  </style>
+</head>
+
+<body>
+  <div id="log" style="padding:2px; border: solid 1px #000; background-color: #ccc; margin:2px; height: 8em; font-family: monospace; overflow-y: auto; font-size: 8px;"></div>
+  <script>
+    let addMessage = (text, color) => {
+      let log = document.getElementById('log');
+      let message = document.createElement('p');
+      message.style = 'color: ' + color + ';';
+      message.textContent = text;
+      log.append(message);
+      log.scrollTop = log.scrollHeight;
+    }
+  </script>
+
+<!-- Use those with capable server
+  <img src="image00.jpg" style="width:100%" />
+  <img src="image01.png" style="width:100%" />
+-->
+  <img src="image00.jxl" style="width:100%" />
+  <img src="image01.jxl" style="width:100%" />
+</body>
+
+</html>
diff --git a/third-party/libjxl/libjxl/tools/wasm_demo/service_worker.js b/third-party/libjxl/libjxl/tools/wasm_demo/service_worker.js
new file mode 100644
index 0000000000..f7ebba5032
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/wasm_demo/service_worker.js
@@ -0,0 +1,317 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * ServiceWorker script.
+ *
+ * Multi-threading in WASM is currently implemented by the means of
+ * SharedArrayBuffer. Due to infamous vulnerabilities this feature is disabled
+ * unless site is running in "cross-origin isolated" mode.
+ * If there is not enough control over the server (e.g. when pages are hosted as
+ * "github pages") ServiceWorker is used to upgrade responses with corresponding
+ * headers.
+ *
+ * This script could be executed in 2 environments: HTML page or ServiceWorker.
+ * The environment is detected by the type of "window" reference.
+ *
+ * When this script is executed from HTML page then ServiceWorker is registered.
+ * Page reload might be necessary in some situations. By default it is done via
+ * `window.location.reload()`. However this can be altered by setting a
+ * configuration object `window.serviceWorkerConfig`. It's `doReload` property
+ * should be a replacement callable.
+ *
+ * When this script is executed from ServiceWorker then standard lifecycle
+ * event dispatchers are setup along with `fetch` interceptor.
+ */
+
+(() => {
+  // Set COOP/COEP headers for document/script responses; use when this can not
+  // be done on server side (e.g. GitHub Pages).
+  const FORCE_COP = true;
+  // Interpret 'content-type: application/octet-stream' as JXL; use when server
+  // does not set appropriate content type (e.g. GitHub Pages).
+  const FORCE_DECODING = true;
+  // Embedded (baked-in) responses for faster turn-around.
+  const EMBEDDED = {
+    'client_worker.js': '$client_worker.js$',
+    'jxl_decoder.js': '$jxl_decoder.js$',
+    'jxl_decoder.worker.js': '$jxl_decoder.worker.js$',
+  };
+
+  // Enable SharedArrayBuffer.
+  const setCopHeaders = (headers) => {
+    headers.set('Cross-Origin-Embedder-Policy', 'require-corp');
+    headers.set('Cross-Origin-Opener-Policy', 'same-origin');
+  };
+
+  // Inflight object: {clientId, uid, timestamp, controller}
+  const inflight = [];
+
+  // Generate (very likely) unique string.
+  const makeUid = () => {
+    return Math.random().toString(36).substring(2) +
+        Math.random().toString(36).substring(2);
+  };
+
+  // Make list (non-recursively) of transferable entities.
+  const gatherTransferrables = (...args) => {
+    const result = [];
+    for (let i = 0; i < args.length; ++i) {
+      if (args[i] && args[i].buffer) {
+        result.push(args[i].buffer);
+      }
+    }
+    return result;
+  };
+
+  // Serve items that are embedded in this service worker.
+  const maybeProcessEmbeddedResources = (event) => {
+    const url = event.request.url;
+    // Shortcut for baked-in scripts.
+    for (const [key, value] of Object.entries(EMBEDDED)) {
+      if (url.endsWith(key)) {
+        const headers = new Headers();
+        headers.set('Content-Type', 'application/javascript');
+        setCopHeaders(headers);
+
+        event.respondWith(new Response(value, {
+          status: 200,
+          statusText: 'OK',
+          headers: headers,
+        }));
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // Decode JXL image response and serve it as a PNG image.
+  const wrapImageResponse = async (clientId, originalResponse) => {
+    // TODO: cache?
+    const client = await clients.get(clientId);
+    // Client is gone? Not our problem then.
+    if (!client) {
+      return originalResponse;
+    }
+
+    const inputStream = await originalResponse.body;
+    // Can't use "BYOB" for regular responses.
+    const reader = inputStream.getReader();
+
+    const inflightEntry = {
+      clientId: clientId,
+      uid: makeUid(),
+      timestamp: Date.now(),
+      inputStreamReader: reader,
+      outputStreamController: null
+    };
+    inflight.push(inflightEntry);
+
+    const outputStream = new ReadableStream({
+      start: (controller) => {
+        inflightEntry.outputStreamController = controller;
+      }
+    });
+
+    const onRead = (chunk) => {
+      const msg = {
+        op: 'decodeJxl',
+        uid: inflightEntry.uid,
+        url: originalResponse.url,
+        data: chunk.value || null
+      };
+      client.postMessage(msg, gatherTransferrables(msg.data));
+      if (!chunk.done) {
+        reader.read().then(onRead);
+      }
+    };
+    // const view = new SharedArrayBuffer(65536);
+    const view = new Uint8Array(65536);
+    reader.read(view).then(onRead);
+
+    let modifiedResponseHeaders = new Headers(originalResponse.headers);
+    modifiedResponseHeaders.delete('Content-Length');
+    modifiedResponseHeaders.set('Content-Type', 'image/png');
+    modifiedResponseHeaders.set('Server', 'ServiceWorker');
+    return new Response(outputStream, {headers: modifiedResponseHeaders});
+  };
+
+  // Check if response needs decoding; if so - do it.
+  const wrapImageRequest = async (clientId, request) => {
+    let modifiedRequestHeaders = new Headers(request.headers);
+    modifiedRequestHeaders.append('Accept', 'image/jxl');
+    let modifiedRequest =
+        new Request(request, {headers: modifiedRequestHeaders});
+    let originalResponse = await fetch(modifiedRequest);
+    let contentType = originalResponse.headers.get('Content-Type');
+
+    let isJxlResponse = (contentType === 'image/jxl');
+    if (FORCE_DECODING && contentType === 'application/octet-stream') {
+      isJxlResponse = true;
+    }
+    if (isJxlResponse) {
+      return wrapImageResponse(clientId, originalResponse);
+    }
+
+    return originalResponse;
+  };
+
+  const reportError = (err) => {
+    // console.error(err);
+  };
+
+  const upgradeResponse = (response) => {
+    if (response.status === 0) {
+      return response;
+    }
+
+    const newHeaders = new Headers(response.headers);
+    setCopHeaders(newHeaders);
+
+    return new Response(response.body, {
+      status: response.status,
+      statusText: response.statusText,
+      headers: newHeaders,
+    });
+  };
+
+  // Process fetch request; either bypass, or serve embedded resource,
+  // or upgrade.
+  const onFetch = async (event) => {
+    const clientId = event.clientId;
+    const request = event.request;
+
+    // Pass direct cached resource requests.
+    if (request.cache === 'only-if-cached' && request.mode !== 'same-origin') {
+      return;
+    }
+
+    // Serve backed resources.
+    if (maybeProcessEmbeddedResources(event)) {
+      return;
+    }
+
+    // Notify server we are JXL-capable.
+    if (request.destination === 'image') {
+      let accept = request.headers.get('Accept');
+      // Only if browser does not support JXL.
+      if (accept.indexOf('image/jxl') === -1) {
+        event.respondWith(wrapImageRequest(clientId, request));
+      }
+      return;
+    }
+
+    if (FORCE_COP) {
+      event.respondWith(
+          fetch(event.request).then(upgradeResponse).catch(reportError));
+    }
+  };
+
+  // Serve decoded bytes.
+  const onMessage = (event) => {
+    const data = event.data;
+    const uid = data.uid;
+    let inflightEntry = null;
+    for (let i = 0; i < inflight.length; ++i) {
+      if (inflight[i].uid === uid) {
+        inflightEntry = inflight[i];
+        break;
+      }
+    }
+    if (!inflightEntry) {
+      console.log('Ooops, not found: ' + uid);
+      return;
+    }
+    inflightEntry.outputStreamController.enqueue(data.data);
+    inflightEntry.outputStreamController.close();
+  };
+
+  // This method is "main" for service worker.
+  const serviceWorkerMain = () => {
+    // https://v8.dev/blog/wasm-code-caching
+    // > Every web site must perform at least one full compilation of a
+    // > WebAssembly module — use workers to hide that from your users.
+    // TODO(eustas): not 100% reliable, investigate why
+    self['JxlDecoderLeak'] =
+        WebAssembly.compileStreaming(fetch('jxl_decoder.wasm'));
+
+    // ServiceWorker lifecycle.
+    self.addEventListener('install', () => {
+      return self.skipWaiting();
+    });
+    self.addEventListener(
+        'activate', (event) => event.waitUntil(self.clients.claim()));
+    self.addEventListener('message', onMessage);
+    // Intercept some requests.
+    self.addEventListener('fetch', onFetch);
+  };
+
+  // Service workers does not support multi-threading; that is why decoding is
+  // relayed back to "client" (document / window).
+  const prepareClient = () => {
+    const clientWorker = new Worker('client_worker.js');
+    clientWorker.onmessage = (event) => {
+      const data = event.data;
+      if (typeof addMessage !== 'undefined') {
+        if (data.msg) {
+          addMessage(data.msg, 'blue');
+        }
+      }
+      navigator.serviceWorker.controller.postMessage(
+          data, gatherTransferrables(data.data));
+    };
+
+    // Forward ServiceWorker requests to "Client" worker.
+    navigator.serviceWorker.addEventListener('message', (event) => {
+      clientWorker.postMessage(
+          event.data, gatherTransferrables(event.data.data));
+    });
+  };
+
+  // Executed in HTML page environment.
+  const maybeRegisterServiceWorker = () => {
+    const config = {
+      log: console.log,
+      error: console.error,
+      requestReload: (msg) => window.location.reload(),
+      ...window.serviceWorkerConfig  // add overrides
+    }
+
+    if (!window.isSecureContext) {
+      config.log('Secure context is required for this ServiceWorker.');
+      return;
+    }
+
+    const nav = navigator;  // Explicitly capture navigator object.
+    const onServiceWorkerRegistrationSuccess = (registration) => {
+      config.log('Service Worker registered', registration.scope);
+      if (!registration.active || !nav.serviceWorker.controller) {
+        config.requestReload(
+            'Reload to allow Service Worker process all requests');
+      }
+    };
+
+    const onServiceWorkerRegistrationFailure = (err) => {
+      config.error('Service Worker failed to register:', err);
+    };
+
+    navigator.serviceWorker.register(window.document.currentScript.src)
+        .then(
+            onServiceWorkerRegistrationSuccess,
+            onServiceWorkerRegistrationFailure);
+  };
+
+  const pageMain = () => {
+    maybeRegisterServiceWorker();
+    prepareClient();
+  };
+
+  // Detect environment and run corresponding "main" method.
+  if (typeof window === 'undefined') {
+    serviceWorkerMain();
+  } else {
+    pageMain();
+  }
+})();
diff --git a/third-party/libjxl/libjxl/tools/xyb_range.cc b/third-party/libjxl/libjxl/tools/xyb_range.cc
new file mode 100644
index 0000000000..c92aec2159
--- /dev/null
+++ b/third-party/libjxl/libjxl/tools/xyb_range.cc
@@ -0,0 +1,88 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jpegxl {
+namespace tools {
+namespace {
+
+using ::jxl::CodecInOut;
+using ::jxl::ColorEncoding;
+using ::jxl::Image3F;
+using ::jxl::ImageBundle;
+using ::jxl::ThreadPool;
+
+void PrintXybRange() {
+  Image3F linear(1u << 16, 257);
+  for (int b = 0; b < 256; ++b) {
+    float* JXL_RESTRICT row0 = linear.PlaneRow(0, b + 1);
+    float* JXL_RESTRICT row1 = linear.PlaneRow(1, b + 1);
+    float* JXL_RESTRICT row2 = linear.PlaneRow(2, b + 1);
+    for (int r = 0; r < 256; ++r) {
+      for (int g = 0; g < 256; ++g) {
+        const int x = (r << 8) + g;
+        row0[x] = r;
+        row1[x] = g;
+        row2[x] = b;
+      }
+    }
+  }
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(8);
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  io.SetFromImage(std::move(linear), io.metadata.m.color_encoding);
+  const ImageBundle& ib = io.Main();
+  ThreadPool* null_pool = nullptr;
+  Image3F opsin(ib.xsize(), ib.ysize());
+  (void)jxl::ToXYB(ib, null_pool, &opsin, jxl::GetJxlCms());
+  for (size_t c = 0; c < 3; ++c) {
+    float minval = 1e10f;
+    float maxval = -1e10f;
+    int rgb_min = 0;
+    int rgb_max = 0;
+    for (int b = 0; b < 256; ++b) {
+      const float* JXL_RESTRICT row = opsin.PlaneRow(c, b);
+      for (int r = 0; r < 256; ++r) {
+        for (int g = 0; g < 256; ++g) {
+          float val = row[(r << 8) + g];
+          if (val < minval) {
+            minval = val;
+            rgb_min = (r << 16) + (g << 8) + b;
+          }
+          if (val > maxval) {
+            maxval = val;
+            rgb_max = (r << 16) + (g << 8) + b;
+          }
+        }
+      }
+    }
+    printf("Opsin image plane %" PRIuS
+           " range: [%8.4f, %8.4f] "
+           "center: %.12f, range: %.12f (RGBmin=%06x, RGBmax=%06x)\n",
+           c, minval, maxval, 0.5 * (minval + maxval), 0.5 * (maxval - minval),
+           rgb_min, rgb_max);
+    // Ensure our constants are at least as wide as those obtained from sRGB.
+  }
+}
+
+}  // namespace
+}  // namespace tools
+}  // namespace jpegxl
+
+int main() { jpegxl::tools::PrintXybRange(); }